From 40e64432ce98f4ee5671d39fbf909ef3f3592366 Mon Sep 17 00:00:00 2001
From: Brian Masney <masneyb@onstation.org>
Date: Thu, 25 Apr 2019 19:03:14 -0400
Subject: dt-bindings: iio: tsl2583: convert bindings to YAML format

Convert the tsl2583 device tree bindings to the new YAML format.

Signed-off-by: Brian Masney <masneyb@onstation.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/light/tsl2583.txt      | 25 ------------
 .../devicetree/bindings/iio/light/tsl2583.yaml     | 46 ++++++++++++++++++++++
 2 files changed, 46 insertions(+), 25 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/iio/light/tsl2583.txt
 create mode 100644 Documentation/devicetree/bindings/iio/light/tsl2583.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/light/tsl2583.txt b/Documentation/devicetree/bindings/iio/light/tsl2583.txt
deleted file mode 100644
index 059dffa1829a..000000000000
--- a/Documentation/devicetree/bindings/iio/light/tsl2583.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-* TAOS TSL 2580/2581/2583 ALS sensor
-
-Required properties:
-
-  - compatible: Should be one of
-		"amstaos,tsl2580"
-		"amstaos,tsl2581"
-		"amstaos,tsl2583"
-  - reg: the I2C address of the device
-
-Optional properties:
-
-  - interrupts: the sole interrupt generated by the device
-
-  Refer to interrupt-controller/interrupts.txt for generic interrupt client
-  node bindings.
-
-  - vcc-supply: phandle to the regulator that provides power to the sensor.
-
-Example:
-
-tsl2581@29 {
-	compatible = "amstaos,tsl2581";
-	reg = <0x29>;
-};
diff --git a/Documentation/devicetree/bindings/iio/light/tsl2583.yaml b/Documentation/devicetree/bindings/iio/light/tsl2583.yaml
new file mode 100644
index 000000000000..e86ef64ecf03
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/light/tsl2583.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/light/tsl2583.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: AMS/TAOS Ambient Light Sensor (ALS)
+
+maintainers:
+  - Brian Masney <masneyb@onstation.org>
+
+description: |
+  Ambient light sensing with an i2c interface.
+
+properties:
+  compatible:
+    enum:
+      - amstaos,tsl2580
+      - amstaos,tsl2581
+      - amstaos,tsl2583
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  vcc-supply:
+    description: Regulator that provides power to the sensor
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        light-sensor@29 {
+                compatible = "amstaos,tsl2581";
+                reg = <0x29>;
+        };
+    };
+...
-- 
cgit 


From 17b62779cbe40773e10a83af000e51c29b764575 Mon Sep 17 00:00:00 2001
From: Brian Masney <masneyb@onstation.org>
Date: Thu, 25 Apr 2019 19:03:13 -0400
Subject: dt-bindings: iio: tsl2772: convert bindings to YAML format

Convert the tsl2772 device tree bindings to the new YAML format.

Signed-off-by: Brian Masney <masneyb@onstation.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/light/tsl2772.txt      | 42 -----------
 .../devicetree/bindings/iio/light/tsl2772.yaml     | 83 ++++++++++++++++++++++
 2 files changed, 83 insertions(+), 42 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/iio/light/tsl2772.txt
 create mode 100644 Documentation/devicetree/bindings/iio/light/tsl2772.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/light/tsl2772.txt b/Documentation/devicetree/bindings/iio/light/tsl2772.txt
deleted file mode 100644
index 1c5e6f17a1df..000000000000
--- a/Documentation/devicetree/bindings/iio/light/tsl2772.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-* AMS/TAOS ALS and proximity sensor
-
-Required properties:
-
-  - compatible: Should be one of
-		"amstaos,tsl2571"
-		"amstaos,tsl2671"
-		"amstaos,tmd2671"
-		"amstaos,tsl2771"
-		"amstaos,tmd2771"
-		"amstaos,tsl2572"
-		"amstaos,tsl2672"
-		"amstaos,tmd2672"
-		"amstaos,tsl2772"
-		"amstaos,tmd2772"
-		"avago,apds9930"
-  - reg: the I2C address of the device
-
-Optional properties:
-
-  - amstaos,proximity-diodes - proximity diodes to enable. <0>, <1>, or <0 1>
-                               are the only valid values.
-  - led-max-microamp - current for the proximity LED. Must be 100000, 50000,
-                       25000, or 13000.
-  - vdd-supply: phandle to the regulator that provides power to the sensor.
-  - vddio-supply: phandle to the regulator that provides power to the bus.
-  - interrupts: the sole interrupt generated by the device
-
-  Refer to interrupt-controller/interrupts.txt for generic interrupt client
-  node bindings.
-
-Example:
-
-tsl2772@39 {
-	compatible = "amstaos,tsl2772";
-	reg = <0x39>;
-	interrupts-extended = <&msmgpio 61 IRQ_TYPE_EDGE_FALLING>;
-	vdd-supply = <&pm8941_l17>;
-	vddio-supply = <&pm8941_lvs1>;
-	amstaos,proximity-diodes = <0>;
-	led-max-microamp = <100000>;
-};
diff --git a/Documentation/devicetree/bindings/iio/light/tsl2772.yaml b/Documentation/devicetree/bindings/iio/light/tsl2772.yaml
new file mode 100644
index 000000000000..ed2c3d5eadf5
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/light/tsl2772.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/light/tsl2772.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: AMS/TAOS Ambient Light Sensor (ALS) and Proximity Detector
+
+maintainers:
+  - Brian Masney <masneyb@onstation.org>
+
+description: |
+  Ambient light sensing and proximity detection with an i2c interface.
+  https://ams.com/documents/20143/36005/TSL2772_DS000181_2-00.pdf
+
+properties:
+  compatible:
+    enum:
+      - amstaos,tsl2571
+      - amstaos,tsl2671
+      - amstaos,tmd2671
+      - amstaos,tsl2771
+      - amstaos,tmd2771
+      - amstaos,tsl2572
+      - amstaos,tsl2672
+      - amstaos,tmd2672
+      - amstaos,tsl2772
+      - amstaos,tmd2772
+      - avago,apds9930
+
+  reg:
+    maxItems: 1
+
+  amstaos,proximity-diodes:
+    description: Proximity diodes to enable
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+      - minItems: 1
+        maxItems: 2
+        items:
+          minimum: 0
+          maximum: 1
+
+  interrupts:
+    maxItems: 1
+
+  led-max-microamp:
+    description: Current for the proximity LED
+    enum:
+      - 13000
+      - 25000
+      - 50000
+      - 100000
+
+  vdd-supply:
+    description: Regulator that provides power to the sensor
+
+  vddio-supply:
+    description: Regulator that provides power to the bus
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        sensor@39 {
+                compatible = "amstaos,tsl2772";
+                reg = <0x39>;
+                interrupts-extended = <&msmgpio 61 IRQ_TYPE_EDGE_FALLING>;
+                vdd-supply = <&pm8941_l17>;
+                vddio-supply = <&pm8941_lvs1>;
+                amstaos,proximity-diodes = <0>;
+                led-max-microamp = <100000>;
+        };
+    };
+...
-- 
cgit 


From 8b7a6a35746292e140304107e60f9a980416abf7 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Fri, 26 Apr 2019 13:39:16 +0200
Subject: iio: cros_ec: add 'id' sysfs entry

This new sysfs entry is used to interpret ring buffer information,
mainly by Android sensor HAL.
It expand to all sensors, the documentation about 'id' we can found
in Documentation/ABI/testing/sysfs-bus-iio-cros-ec.

Also fix typo in docs, I replace 'Septembre' by 'September'.

Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Fabien Lahoudere <fabien.lahoudere@collabora.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/ABI/testing/sysfs-bus-iio-cros-ec | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio-cros-ec b/Documentation/ABI/testing/sysfs-bus-iio-cros-ec
index 0e95c2ca105c..6158f831c761 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-cros-ec
+++ b/Documentation/ABI/testing/sysfs-bus-iio-cros-ec
@@ -18,11 +18,11 @@ Description:
 		values are 'base' and 'lid'.
 
 What:		/sys/bus/iio/devices/iio:deviceX/id
-Date:		Septembre 2017
+Date:		September 2017
 KernelVersion:	4.14
 Contact:	linux-iio@vger.kernel.org
 Description:
-		This attribute is exposed by the CrOS EC legacy accelerometer
-		driver and represents the sensor ID as exposed by the EC. This
-		ID is used by the Android sensor service hardware abstraction
-		layer (sensor HAL) through the Android container on ChromeOS.
+		This attribute is exposed by the CrOS EC sensors driver and
+		represents the sensor ID as exposed by the EC. This ID is used
+		by the Android sensor service hardware abstraction layer (sensor
+		HAL) through the Android container on ChromeOS.
-- 
cgit 


From 7e527e11d672e90f1a3dc8de84e0bfaccda15bba Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 3 Jun 2019 12:14:00 +0300
Subject: mei: docs: move documentation under driver-api

Move mei driver documentation under Documentation/driver-api/
Perform some minimal formating changes to produce correct sphinx rendering
and add index.rst

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/index.rst                |   1 +
 Documentation/driver-api/mei/index.rst            |  22 ++
 Documentation/driver-api/mei/mei-client-bus.rst   | 152 +++++++++++++
 Documentation/driver-api/mei/mei.rst              | 250 ++++++++++++++++++++
 Documentation/misc-devices/mei/mei-client-bus.txt | 141 ------------
 Documentation/misc-devices/mei/mei.txt            | 266 ----------------------
 6 files changed, 425 insertions(+), 407 deletions(-)
 create mode 100644 Documentation/driver-api/mei/index.rst
 create mode 100644 Documentation/driver-api/mei/mei-client-bus.rst
 create mode 100644 Documentation/driver-api/mei/mei.rst
 delete mode 100644 Documentation/misc-devices/mei/mei-client-bus.txt
 delete mode 100644 Documentation/misc-devices/mei/mei.txt

(limited to 'Documentation')

diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index d26308af6036..0dbaa987aa11 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -42,6 +42,7 @@ available subsections can be seen below.
    target
    mtdnand
    miscellaneous
+   mei/index
    w1
    rapidio
    s390-drivers
diff --git a/Documentation/driver-api/mei/index.rst b/Documentation/driver-api/mei/index.rst
new file mode 100644
index 000000000000..35c1117d8366
--- /dev/null
+++ b/Documentation/driver-api/mei/index.rst
@@ -0,0 +1,22 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+===================================================
+Intel(R) Management Engine Interface (Intel(R) MEI)
+===================================================
+
+**Copyright** |copy| 2019 Intel Corporation
+
+
+.. only:: html
+
+   .. class:: toc-title
+
+        Table of Contents
+
+.. toctree::
+   :maxdepth: 2
+
+   mei
+   mei-client-bus
diff --git a/Documentation/driver-api/mei/mei-client-bus.rst b/Documentation/driver-api/mei/mei-client-bus.rst
new file mode 100644
index 000000000000..a26a85453bdf
--- /dev/null
+++ b/Documentation/driver-api/mei/mei-client-bus.rst
@@ -0,0 +1,152 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================================
+Intel(R) Management Engine (ME) Client bus API
+==============================================
+
+
+Rationale
+=========
+
+MEI misc character device is useful for dedicated applications to send and receive
+data to the many FW appliance found in Intel's ME from the user space.
+However for some of the ME functionalities it make sense to leverage existing software
+stack and expose them through existing kernel subsystems.
+
+In order to plug seamlessly into the kernel device driver model we add kernel virtual
+bus abstraction on top of the MEI driver. This allows implementing linux kernel drivers
+for the various MEI features as a stand alone entities found in their respective subsystem.
+Existing device drivers can even potentially be re-used by adding an MEI CL bus layer to
+the existing code.
+
+
+MEI CL bus API
+==============
+
+A driver implementation for an MEI Client is very similar to existing bus
+based device drivers. The driver registers itself as an MEI CL bus driver through
+the ``struct mei_cl_driver`` structure:
+
+.. code-block:: C
+
+        struct mei_cl_driver {
+                struct device_driver driver;
+                const char *name;
+
+                const struct mei_cl_device_id *id_table;
+
+                int (*probe)(struct mei_cl_device *dev, const struct mei_cl_id *id);
+                int (*remove)(struct mei_cl_device *dev);
+        };
+
+        struct mei_cl_id {
+                char name[MEI_NAME_SIZE];
+                kernel_ulong_t driver_info;
+        };
+
+The mei_cl_id structure allows the driver to bind itself against a device name.
+
+To actually register a driver on the ME Client bus one must call the mei_cl_add_driver()
+API. This is typically called at module init time.
+
+Once registered on the ME Client bus, a driver will typically try to do some I/O on
+this bus and this should be done through the mei_cl_send() and mei_cl_recv()
+routines. The latter is synchronous (blocks and sleeps until data shows up).
+In order for drivers to be notified of pending events waiting for them (e.g.
+an Rx event) they can register an event handler through the
+mei_cl_register_event_cb() routine. Currently only the MEI_EVENT_RX event
+will trigger an event handler call and the driver implementation is supposed
+to call mei_recv() from the event handler in order to fetch the pending
+received buffers.
+
+
+Example
+=======
+
+As a theoretical example let's pretend the ME comes with a "contact" NFC IP.
+The driver init and exit routines for this device would look like:
+
+.. code-block:: C
+
+	#define CONTACT_DRIVER_NAME "contact"
+
+	static struct mei_cl_device_id contact_mei_cl_tbl[] = {
+		{ CONTACT_DRIVER_NAME, },
+
+		/* required last entry */
+		{ }
+	};
+	MODULE_DEVICE_TABLE(mei_cl, contact_mei_cl_tbl);
+
+	static struct mei_cl_driver contact_driver = {
+		.id_table = contact_mei_tbl,
+		.name = CONTACT_DRIVER_NAME,
+
+		.probe = contact_probe,
+		.remove = contact_remove,
+	};
+
+	static int contact_init(void)
+	{
+		int r;
+
+		r = mei_cl_driver_register(&contact_driver);
+		if (r) {
+			pr_err(CONTACT_DRIVER_NAME ": driver registration failed\n");
+			return r;
+		}
+
+		return 0;
+	}
+
+	static void __exit contact_exit(void)
+	{
+		mei_cl_driver_unregister(&contact_driver);
+	}
+
+	module_init(contact_init);
+	module_exit(contact_exit);
+
+And the driver's simplified probe routine would look like that:
+
+.. code-block:: C
+
+	int contact_probe(struct mei_cl_device *dev, struct mei_cl_device_id *id)
+	{
+		struct contact_driver *contact;
+
+		[...]
+		mei_cl_enable_device(dev);
+
+		mei_cl_register_event_cb(dev, contact_event_cb, contact);
+
+		return 0;
+	}
+
+In the probe routine the driver first enable the MEI device and then registers
+an ME bus event handler which is as close as it can get to registering a
+threaded IRQ handler.
+The handler implementation will typically call some I/O routine depending on
+the pending events:
+
+#define MAX_NFC_PAYLOAD 128
+
+.. code-block:: C
+
+	static void contact_event_cb(struct mei_cl_device *dev, u32 events,
+				     void *context)
+	{
+		struct contact_driver *contact = context;
+
+		if (events & BIT(MEI_EVENT_RX)) {
+			u8 payload[MAX_NFC_PAYLOAD];
+			int payload_size;
+
+			payload_size = mei_recv(dev, payload, MAX_NFC_PAYLOAD);
+			if (payload_size <= 0)
+				return;
+
+			/* Hook to the NFC subsystem */
+			nfc_hci_recv_frame(contact->hdev, payload, payload_size);
+		}
+	}
diff --git a/Documentation/driver-api/mei/mei.rst b/Documentation/driver-api/mei/mei.rst
new file mode 100644
index 000000000000..5aa3a5e6496a
--- /dev/null
+++ b/Documentation/driver-api/mei/mei.rst
@@ -0,0 +1,250 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Introduction
+============
+
+The Intel Management Engine (Intel ME) is an isolated and protected computing
+resource (Co-processor) residing inside certain Intel chipsets. The Intel ME
+provides support for computer/IT management features. The feature set
+depends on the Intel chipset SKU.
+
+The Intel Management Engine Interface (Intel MEI, previously known as HECI)
+is the interface between the Host and Intel ME. This interface is exposed
+to the host as a PCI device. The Intel MEI Driver is in charge of the
+communication channel between a host application and the Intel ME feature.
+
+Each Intel ME feature (Intel ME Client) is addressed by a GUID/UUID and
+each client has its own protocol. The protocol is message-based with a
+header and payload up to 512 bytes.
+
+Prominent usage of the Intel ME Interface is to communicate with Intel(R)
+Active Management Technology (Intel AMT) implemented in firmware running on
+the Intel ME.
+
+Intel AMT provides the ability to manage a host remotely out-of-band (OOB)
+even when the operating system running on the host processor has crashed or
+is in a sleep state.
+
+Some examples of Intel AMT usage are:
+   - Monitoring hardware state and platform components
+   - Remote power off/on (useful for green computing or overnight IT
+     maintenance)
+   - OS updates
+   - Storage of useful platform information such as software assets
+   - Built-in hardware KVM
+   - Selective network isolation of Ethernet and IP protocol flows based
+     on policies set by a remote management console
+   - IDE device redirection from remote management console
+
+Intel AMT (OOB) communication is based on SOAP (deprecated
+starting with Release 6.0) over HTTP/S or WS-Management protocol over
+HTTP/S that are received from a remote management console application.
+
+For more information about Intel AMT:
+http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
+
+
+Intel MEI Driver
+================
+
+The driver exposes a misc device called /dev/mei.
+
+An application maintains communication with an Intel ME feature while
+/dev/mei is open. The binding to a specific feature is performed by calling
+MEI_CONNECT_CLIENT_IOCTL, which passes the desired UUID.
+The number of instances of an Intel ME feature that can be opened
+at the same time depends on the Intel ME feature, but most of the
+features allow only a single instance.
+
+The Intel AMT Host Interface (Intel AMTHI) feature supports multiple
+simultaneous user connected applications. The Intel MEI driver
+handles this internally by maintaining request queues for the applications.
+
+The driver is transparent to data that are passed between firmware feature
+and host application.
+
+Because some of the Intel ME features can change the system
+configuration, the driver by default allows only a privileged
+user to access it.
+
+A code snippet for an application communicating with Intel AMTHI client:
+
+.. code-block:: C
+
+	struct mei_connect_client_data data;
+	fd = open(MEI_DEVICE);
+
+	data.d.in_client_uuid = AMTHI_UUID;
+
+	ioctl(fd, IOCTL_MEI_CONNECT_CLIENT, &data);
+
+	printf("Ver=%d, MaxLen=%ld\n",
+			data.d.in_client_uuid.protocol_version,
+			data.d.in_client_uuid.max_msg_length);
+
+	[...]
+
+	write(fd, amthi_req_data, amthi_req_data_len);
+
+	[...]
+
+	read(fd, &amthi_res_data, amthi_res_data_len);
+
+	[...]
+	close(fd);
+
+
+IOCTLs
+======
+
+The Intel MEI Driver supports the following IOCTL commands:
+	IOCTL_MEI_CONNECT_CLIENT	Connect to firmware Feature (client).
+
+	usage:
+		struct mei_connect_client_data clientData;
+		ioctl(fd, IOCTL_MEI_CONNECT_CLIENT, &clientData);
+
+	inputs:
+		mei_connect_client_data struct contain the following
+		input field:
+
+		in_client_uuid -	UUID of the FW Feature that needs
+					to connect to.
+	outputs:
+		out_client_properties - Client Properties: MTU and Protocol Version.
+
+	error returns:
+		EINVAL	Wrong IOCTL Number
+		ENODEV	Device or Connection is not initialized or ready. (e.g. Wrong UUID)
+		ENOMEM	Unable to allocate memory to client internal data.
+		EFAULT	Fatal Error (e.g. Unable to access user input data)
+		EBUSY	Connection Already Open
+
+	Notes:
+        max_msg_length (MTU) in client properties describes the maximum
+        data that can be sent or received. (e.g. if MTU=2K, can send
+        requests up to bytes 2k and received responses up to 2k bytes).
+
+	IOCTL_MEI_NOTIFY_SET: enable or disable event notifications
+
+	Usage:
+		uint32_t enable;
+		ioctl(fd, IOCTL_MEI_NOTIFY_SET, &enable);
+
+	Inputs:
+		uint32_t enable = 1;
+		or
+		uint32_t enable[disable] = 0;
+
+	Error returns:
+		EINVAL	Wrong IOCTL Number
+		ENODEV	Device  is not initialized or the client not connected
+		ENOMEM	Unable to allocate memory to client internal data.
+		EFAULT	Fatal Error (e.g. Unable to access user input data)
+		EOPNOTSUPP if the device doesn't support the feature
+
+	Notes:
+	The client must be connected in order to enable notification events
+
+
+	IOCTL_MEI_NOTIFY_GET : retrieve event
+
+	Usage:
+		uint32_t event;
+		ioctl(fd, IOCTL_MEI_NOTIFY_GET, &event);
+
+	Outputs:
+		1 - if an event is pending
+		0 - if there is no even pending
+
+	Error returns:
+		EINVAL	Wrong IOCTL Number
+		ENODEV	Device is not initialized or the client not connected
+		ENOMEM	Unable to allocate memory to client internal data.
+		EFAULT	Fatal Error (e.g. Unable to access user input data)
+		EOPNOTSUPP if the device doesn't support the feature
+
+	Notes:
+	The client must be connected and event notification has to be enabled
+	in order to receive an event
+
+
+Intel ME Applications
+=====================
+
+	1) Intel Local Management Service (Intel LMS)
+
+	   Applications running locally on the platform communicate with Intel AMT Release
+	   2.0 and later releases in the same way that network applications do via SOAP
+	   over HTTP (deprecated starting with Release 6.0) or with WS-Management over
+	   SOAP over HTTP. This means that some Intel AMT features can be accessed from a
+	   local application using the same network interface as a remote application
+	   communicating with Intel AMT over the network.
+
+	   When a local application sends a message addressed to the local Intel AMT host
+	   name, the Intel LMS, which listens for traffic directed to the host name,
+	   intercepts the message and routes it to the Intel MEI.
+	   For more information:
+	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
+	   Under "About Intel AMT" => "Local Access"
+
+	   For downloading Intel LMS:
+	   http://software.intel.com/en-us/articles/download-the-latest-intel-amt-open-source-drivers/
+
+	   The Intel LMS opens a connection using the Intel MEI driver to the Intel LMS
+	   firmware feature using a defined UUID and then communicates with the feature
+	   using a protocol called Intel AMT Port Forwarding Protocol (Intel APF protocol).
+	   The protocol is used to maintain multiple sessions with Intel AMT from a
+	   single application.
+
+	   See the protocol specification in the Intel AMT Software Development Kit (SDK)
+	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
+	   Under "SDK Resources" => "Intel(R) vPro(TM) Gateway (MPS)"
+	   => "Information for Intel(R) vPro(TM) Gateway Developers"
+	   => "Description of the Intel AMT Port Forwarding (APF) Protocol"
+
+	2) Intel AMT Remote configuration using a Local Agent
+
+	   A Local Agent enables IT personnel to configure Intel AMT out-of-the-box
+	   without requiring installing additional data to enable setup. The remote
+	   configuration process may involve an ISV-developed remote configuration
+	   agent that runs on the host.
+	   For more information:
+	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
+	   Under "Setup and Configuration of Intel AMT" =>
+	   "SDK Tools Supporting Setup and Configuration" =>
+	   "Using the Local Agent Sample"
+
+	   An open source Intel AMT configuration utility,	implementing a local agent
+	   that accesses the Intel MEI driver, can be found here:
+	   http://software.intel.com/en-us/articles/download-the-latest-intel-amt-open-source-drivers/
+
+
+Intel AMT OS Health Watchdog
+============================
+
+The Intel AMT Watchdog is an OS Health (Hang/Crash) watchdog.
+Whenever the OS hangs or crashes, Intel AMT will send an event
+to any subscriber to this event. This mechanism means that
+IT knows when a platform crashes even when there is a hard failure on the host.
+
+The Intel AMT Watchdog is composed of two parts:
+	1) Firmware feature - receives the heartbeats
+	   and sends an event when the heartbeats stop.
+	2) Intel MEI iAMT watchdog driver - connects to the watchdog feature,
+	   configures the watchdog and sends the heartbeats.
+
+The Intel iAMT watchdog MEI driver uses the kernel watchdog API to configure
+the Intel AMT Watchdog and to send heartbeats to it. The default timeout of the
+watchdog is 120 seconds.
+
+If the Intel AMT is not enabled in the firmware then the watchdog client won't enumerate
+on the me client bus and watchdog devices won't be exposed.
+
+Supported Chipsets
+==================
+82X38/X48 Express and newer
+
+
+---
+linux-mei@linux.intel.com
diff --git a/Documentation/misc-devices/mei/mei-client-bus.txt b/Documentation/misc-devices/mei/mei-client-bus.txt
deleted file mode 100644
index 743be4ec8989..000000000000
--- a/Documentation/misc-devices/mei/mei-client-bus.txt
+++ /dev/null
@@ -1,141 +0,0 @@
-Intel(R) Management Engine (ME) Client bus API
-==============================================
-
-
-Rationale
-=========
-
-MEI misc character device is useful for dedicated applications to send and receive
-data to the many FW appliance found in Intel's ME from the user space.
-However for some of the ME functionalities it make sense to leverage existing software
-stack and expose them through existing kernel subsystems.
-
-In order to plug seamlessly into the kernel device driver model we add kernel virtual
-bus abstraction on top of the MEI driver. This allows implementing linux kernel drivers
-for the various MEI features as a stand alone entities found in their respective subsystem.
-Existing device drivers can even potentially be re-used by adding an MEI CL bus layer to
-the existing code.
-
-
-MEI CL bus API
-==============
-
-A driver implementation for an MEI Client is very similar to existing bus
-based device drivers. The driver registers itself as an MEI CL bus driver through
-the mei_cl_driver structure:
-
-struct mei_cl_driver {
-	struct device_driver driver;
-	const char *name;
-
-	const struct mei_cl_device_id *id_table;
-
-	int (*probe)(struct mei_cl_device *dev, const struct mei_cl_id *id);
-	int (*remove)(struct mei_cl_device *dev);
-};
-
-struct mei_cl_id {
-	char name[MEI_NAME_SIZE];
-	kernel_ulong_t driver_info;
-};
-
-The mei_cl_id structure allows the driver to bind itself against a device name.
-
-To actually register a driver on the ME Client bus one must call the mei_cl_add_driver()
-API. This is typically called at module init time.
-
-Once registered on the ME Client bus, a driver will typically try to do some I/O on
-this bus and this should be done through the mei_cl_send() and mei_cl_recv()
-routines. The latter is synchronous (blocks and sleeps until data shows up).
-In order for drivers to be notified of pending events waiting for them (e.g.
-an Rx event) they can register an event handler through the
-mei_cl_register_event_cb() routine. Currently only the MEI_EVENT_RX event
-will trigger an event handler call and the driver implementation is supposed
-to call mei_recv() from the event handler in order to fetch the pending
-received buffers.
-
-
-Example
-=======
-
-As a theoretical example let's pretend the ME comes with a "contact" NFC IP.
-The driver init and exit routines for this device would look like:
-
-#define CONTACT_DRIVER_NAME "contact"
-
-static struct mei_cl_device_id contact_mei_cl_tbl[] = {
-	{ CONTACT_DRIVER_NAME, },
-
-	/* required last entry */
-	{ }
-};
-MODULE_DEVICE_TABLE(mei_cl, contact_mei_cl_tbl);
-
-static struct mei_cl_driver contact_driver = {
-	.id_table = contact_mei_tbl,
-	.name = CONTACT_DRIVER_NAME,
-
-	.probe = contact_probe,
-	.remove = contact_remove,
-};
-
-static int contact_init(void)
-{
-	int r;
-
-	r = mei_cl_driver_register(&contact_driver);
-	if (r) {
-		pr_err(CONTACT_DRIVER_NAME ": driver registration failed\n");
-		return r;
-	}
-
-	return 0;
-}
-
-static void __exit contact_exit(void)
-{
-	mei_cl_driver_unregister(&contact_driver);
-}
-
-module_init(contact_init);
-module_exit(contact_exit);
-
-And the driver's simplified probe routine would look like that:
-
-int contact_probe(struct mei_cl_device *dev, struct mei_cl_device_id *id)
-{
-	struct contact_driver *contact;
-
-	[...]
-	mei_cl_enable_device(dev);
-
-	mei_cl_register_event_cb(dev, contact_event_cb, contact);
-
-	return 0;
-}
-
-In the probe routine the driver first enable the MEI device and then registers
-an ME bus event handler which is as close as it can get to registering a
-threaded IRQ handler.
-The handler implementation will typically call some I/O routine depending on
-the pending events:
-
-#define MAX_NFC_PAYLOAD 128
-
-static void contact_event_cb(struct mei_cl_device *dev, u32 events,
-			     void *context)
-{
-	struct contact_driver *contact = context;
-
-	if (events & BIT(MEI_EVENT_RX)) {
-		u8 payload[MAX_NFC_PAYLOAD];
-		int payload_size;
-
-		payload_size = mei_recv(dev, payload, MAX_NFC_PAYLOAD);
-		if (payload_size <= 0)
-			return;
-
-		/* Hook to the NFC subsystem */
-		nfc_hci_recv_frame(contact->hdev, payload, payload_size);
-	}
-}
diff --git a/Documentation/misc-devices/mei/mei.txt b/Documentation/misc-devices/mei/mei.txt
deleted file mode 100644
index 2b80a0cd621f..000000000000
--- a/Documentation/misc-devices/mei/mei.txt
+++ /dev/null
@@ -1,266 +0,0 @@
-Intel(R) Management Engine Interface (Intel(R) MEI)
-===================================================
-
-Introduction
-============
-
-The Intel Management Engine (Intel ME) is an isolated and protected computing
-resource (Co-processor) residing inside certain Intel chipsets. The Intel ME
-provides support for computer/IT management features. The feature set
-depends on the Intel chipset SKU.
-
-The Intel Management Engine Interface (Intel MEI, previously known as HECI)
-is the interface between the Host and Intel ME. This interface is exposed
-to the host as a PCI device. The Intel MEI Driver is in charge of the
-communication channel between a host application and the Intel ME feature.
-
-Each Intel ME feature (Intel ME Client) is addressed by a GUID/UUID and
-each client has its own protocol. The protocol is message-based with a
-header and payload up to 512 bytes.
-
-Prominent usage of the Intel ME Interface is to communicate with Intel(R)
-Active Management Technology (Intel AMT) implemented in firmware running on
-the Intel ME.
-
-Intel AMT provides the ability to manage a host remotely out-of-band (OOB)
-even when the operating system running on the host processor has crashed or
-is in a sleep state.
-
-Some examples of Intel AMT usage are:
-   - Monitoring hardware state and platform components
-   - Remote power off/on (useful for green computing or overnight IT
-     maintenance)
-   - OS updates
-   - Storage of useful platform information such as software assets
-   - Built-in hardware KVM
-   - Selective network isolation of Ethernet and IP protocol flows based
-     on policies set by a remote management console
-   - IDE device redirection from remote management console
-
-Intel AMT (OOB) communication is based on SOAP (deprecated
-starting with Release 6.0) over HTTP/S or WS-Management protocol over
-HTTP/S that are received from a remote management console application.
-
-For more information about Intel AMT:
-http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
-
-
-Intel MEI Driver
-================
-
-The driver exposes a misc device called /dev/mei.
-
-An application maintains communication with an Intel ME feature while
-/dev/mei is open. The binding to a specific feature is performed by calling
-MEI_CONNECT_CLIENT_IOCTL, which passes the desired UUID.
-The number of instances of an Intel ME feature that can be opened
-at the same time depends on the Intel ME feature, but most of the
-features allow only a single instance.
-
-The Intel AMT Host Interface (Intel AMTHI) feature supports multiple
-simultaneous user connected applications. The Intel MEI driver
-handles this internally by maintaining request queues for the applications.
-
-The driver is transparent to data that are passed between firmware feature
-and host application.
-
-Because some of the Intel ME features can change the system
-configuration, the driver by default allows only a privileged
-user to access it.
-
-A code snippet for an application communicating with Intel AMTHI client:
-
-	struct mei_connect_client_data data;
-	fd = open(MEI_DEVICE);
-
-	data.d.in_client_uuid = AMTHI_UUID;
-
-	ioctl(fd, IOCTL_MEI_CONNECT_CLIENT, &data);
-
-	printf("Ver=%d, MaxLen=%ld\n",
-			data.d.in_client_uuid.protocol_version,
-			data.d.in_client_uuid.max_msg_length);
-
-	[...]
-
-	write(fd, amthi_req_data, amthi_req_data_len);
-
-	[...]
-
-	read(fd, &amthi_res_data, amthi_res_data_len);
-
-	[...]
-	close(fd);
-
-
-IOCTL
-=====
-
-The Intel MEI Driver supports the following IOCTL commands:
-	IOCTL_MEI_CONNECT_CLIENT	Connect to firmware Feature (client).
-
-	usage:
-		struct mei_connect_client_data clientData;
-		ioctl(fd, IOCTL_MEI_CONNECT_CLIENT, &clientData);
-
-	inputs:
-		mei_connect_client_data struct contain the following
-		input field:
-
-		in_client_uuid -	UUID of the FW Feature that needs
-					to connect to.
-	outputs:
-		out_client_properties - Client Properties: MTU and Protocol Version.
-
-	error returns:
-		EINVAL	Wrong IOCTL Number
-		ENODEV	Device or Connection is not initialized or ready.
-			(e.g. Wrong UUID)
-		ENOMEM	Unable to allocate memory to client internal data.
-		EFAULT	Fatal Error (e.g. Unable to access user input data)
-		EBUSY	Connection Already Open
-
-	Notes:
-        max_msg_length (MTU) in client properties describes the maximum
-        data that can be sent or received. (e.g. if MTU=2K, can send
-        requests up to bytes 2k and received responses up to 2k bytes).
-
-	IOCTL_MEI_NOTIFY_SET: enable or disable event notifications
-
-	Usage:
-		uint32_t enable;
-		ioctl(fd, IOCTL_MEI_NOTIFY_SET, &enable);
-
-	Inputs:
-		uint32_t enable = 1;
-		or
-		uint32_t enable[disable] = 0;
-
-	Error returns:
-		EINVAL	Wrong IOCTL Number
-		ENODEV	Device  is not initialized or the client not connected
-		ENOMEM	Unable to allocate memory to client internal data.
-		EFAULT	Fatal Error (e.g. Unable to access user input data)
-		EOPNOTSUPP if the device doesn't support the feature
-
-	Notes:
-	The client must be connected in order to enable notification events
-
-
-	IOCTL_MEI_NOTIFY_GET : retrieve event
-
-	Usage:
-		uint32_t event;
-		ioctl(fd, IOCTL_MEI_NOTIFY_GET, &event);
-
-	Outputs:
-		1 - if an event is pending
-		0 - if there is no even pending
-
-	Error returns:
-		EINVAL	Wrong IOCTL Number
-		ENODEV	Device is not initialized or the client not connected
-		ENOMEM	Unable to allocate memory to client internal data.
-		EFAULT	Fatal Error (e.g. Unable to access user input data)
-		EOPNOTSUPP if the device doesn't support the feature
-
-	Notes:
-	The client must be connected and event notification has to be enabled
-	in order to receive an event
-
-
-Intel ME Applications
-=====================
-
-	1) Intel Local Management Service (Intel LMS)
-
-	   Applications running locally on the platform communicate with Intel AMT Release
-	   2.0 and later releases in the same way that network applications do via SOAP
-	   over HTTP (deprecated starting with Release 6.0) or with WS-Management over
-	   SOAP over HTTP. This means that some Intel AMT features can be accessed from a
-	   local application using the same network interface as a remote application
-	   communicating with Intel AMT over the network.
-
-	   When a local application sends a message addressed to the local Intel AMT host
-	   name, the Intel LMS, which listens for traffic directed to the host name,
-	   intercepts the message and routes it to the Intel MEI.
-	   For more information:
-	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
-	   Under "About Intel AMT" => "Local Access"
-
-	   For downloading Intel LMS:
-	   http://software.intel.com/en-us/articles/download-the-latest-intel-amt-open-source-drivers/
-
-	   The Intel LMS opens a connection using the Intel MEI driver to the Intel LMS
-	   firmware feature using a defined UUID and then communicates with the feature
-	   using a protocol called Intel AMT Port Forwarding Protocol (Intel APF protocol).
-	   The protocol is used to maintain multiple sessions with Intel AMT from a
-	   single application.
-
-	   See the protocol specification in the Intel AMT Software Development Kit (SDK)
-	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
-	   Under "SDK Resources" => "Intel(R) vPro(TM) Gateway (MPS)"
-	   => "Information for Intel(R) vPro(TM) Gateway Developers"
-	   => "Description of the Intel AMT Port Forwarding (APF) Protocol"
-
-	2) Intel AMT Remote configuration using a Local Agent
-
-	   A Local Agent enables IT personnel to configure Intel AMT out-of-the-box
-	   without requiring installing additional data to enable setup. The remote
-	   configuration process may involve an ISV-developed remote configuration
-	   agent that runs on the host.
-	   For more information:
-	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
-	   Under "Setup and Configuration of Intel AMT" =>
-	   "SDK Tools Supporting Setup and Configuration" =>
-	   "Using the Local Agent Sample"
-
-	   An open source Intel AMT configuration utility,	implementing a local agent
-	   that accesses the Intel MEI driver, can be found here:
-	   http://software.intel.com/en-us/articles/download-the-latest-intel-amt-open-source-drivers/
-
-
-Intel AMT OS Health Watchdog
-============================
-
-The Intel AMT Watchdog is an OS Health (Hang/Crash) watchdog.
-Whenever the OS hangs or crashes, Intel AMT will send an event
-to any subscriber to this event. This mechanism means that
-IT knows when a platform crashes even when there is a hard failure on the host.
-
-The Intel AMT Watchdog is composed of two parts:
-	1) Firmware feature - receives the heartbeats
-	   and sends an event when the heartbeats stop.
-	2) Intel MEI iAMT watchdog driver - connects to the watchdog feature,
-	   configures the watchdog and sends the heartbeats.
-
-The Intel iAMT watchdog MEI driver uses the kernel watchdog API to configure
-the Intel AMT Watchdog and to send heartbeats to it. The default timeout of the
-watchdog is 120 seconds.
-
-If the Intel AMT is not enabled in the firmware then the watchdog client won't enumerate
-on the me client bus and watchdog devices won't be exposed.
-
-
-Supported Chipsets
-==================
-
-7 Series Chipset Family
-6 Series Chipset Family
-5 Series Chipset Family
-4 Series Chipset Family
-Mobile 4 Series Chipset Family
-ICH9
-82946GZ/GL
-82G35 Express
-82Q963/Q965
-82P965/G965
-Mobile PM965/GM965
-Mobile GME965/GLE960
-82Q35 Express
-82G33/G31/P35/P31 Express
-82Q33 Express
-82X38/X48 Express
-
----
-linux-mei@linux.intel.com
-- 
cgit 


From 815d0f26c104873eb829e24510383d4d098417dd Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 3 Jun 2019 12:14:01 +0300
Subject: mei: docs: move iamt docs to a iamt.rst file

Move intel amt documentation to a seprate file.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/mei/iamt.rst  | 106 +++++++++++++++++++++++++++++++++
 Documentation/driver-api/mei/index.rst |   1 +
 Documentation/driver-api/mei/mei.rst   | 100 -------------------------------
 3 files changed, 107 insertions(+), 100 deletions(-)
 create mode 100644 Documentation/driver-api/mei/iamt.rst

(limited to 'Documentation')

diff --git a/Documentation/driver-api/mei/iamt.rst b/Documentation/driver-api/mei/iamt.rst
new file mode 100644
index 000000000000..6dcf5b16e958
--- /dev/null
+++ b/Documentation/driver-api/mei/iamt.rst
@@ -0,0 +1,106 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Intel(R) Active Management Technology (Intel AMT)
+=================================================
+
+Prominent usage of the Intel ME Interface is to communicate with Intel(R)
+Active Management Technology (Intel AMT) implemented in firmware running on
+the Intel ME.
+
+Intel AMT provides the ability to manage a host remotely out-of-band (OOB)
+even when the operating system running on the host processor has crashed or
+is in a sleep state.
+
+Some examples of Intel AMT usage are:
+   - Monitoring hardware state and platform components
+   - Remote power off/on (useful for green computing or overnight IT
+     maintenance)
+   - OS updates
+   - Storage of useful platform information such as software assets
+   - Built-in hardware KVM
+   - Selective network isolation of Ethernet and IP protocol flows based
+     on policies set by a remote management console
+   - IDE device redirection from remote management console
+
+Intel AMT (OOB) communication is based on SOAP (deprecated
+starting with Release 6.0) over HTTP/S or WS-Management protocol over
+HTTP/S that are received from a remote management console application.
+
+For more information about Intel AMT:
+http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
+
+
+Intel AMT Applications
+======================
+
+	1) Intel Local Management Service (Intel LMS)
+
+	   Applications running locally on the platform communicate with Intel AMT Release
+	   2.0 and later releases in the same way that network applications do via SOAP
+	   over HTTP (deprecated starting with Release 6.0) or with WS-Management over
+	   SOAP over HTTP. This means that some Intel AMT features can be accessed from a
+	   local application using the same network interface as a remote application
+	   communicating with Intel AMT over the network.
+
+	   When a local application sends a message addressed to the local Intel AMT host
+	   name, the Intel LMS, which listens for traffic directed to the host name,
+	   intercepts the message and routes it to the Intel MEI.
+	   For more information:
+	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
+	   Under "About Intel AMT" => "Local Access"
+
+	   For downloading Intel LMS:
+	   http://software.intel.com/en-us/articles/download-the-latest-intel-amt-open-source-drivers/
+
+	   The Intel LMS opens a connection using the Intel MEI driver to the Intel LMS
+	   firmware feature using a defined UUID and then communicates with the feature
+	   using a protocol called Intel AMT Port Forwarding Protocol (Intel APF protocol).
+	   The protocol is used to maintain multiple sessions with Intel AMT from a
+	   single application.
+
+	   See the protocol specification in the Intel AMT Software Development Kit (SDK)
+	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
+	   Under "SDK Resources" => "Intel(R) vPro(TM) Gateway (MPS)"
+	   => "Information for Intel(R) vPro(TM) Gateway Developers"
+	   => "Description of the Intel AMT Port Forwarding (APF) Protocol"
+
+	2) Intel AMT Remote configuration using a Local Agent
+
+	   A Local Agent enables IT personnel to configure Intel AMT out-of-the-box
+	   without requiring installing additional data to enable setup. The remote
+	   configuration process may involve an ISV-developed remote configuration
+	   agent that runs on the host.
+	   For more information:
+	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
+	   Under "Setup and Configuration of Intel AMT" =>
+	   "SDK Tools Supporting Setup and Configuration" =>
+	   "Using the Local Agent Sample"
+
+	   An open source Intel AMT configuration utility,	implementing a local agent
+	   that accesses the Intel MEI driver, can be found here:
+	   http://software.intel.com/en-us/articles/download-the-latest-intel-amt-open-source-drivers/
+
+
+Intel AMT OS Health Watchdog
+============================
+
+The Intel AMT Watchdog is an OS Health (Hang/Crash) watchdog.
+Whenever the OS hangs or crashes, Intel AMT will send an event
+to any subscriber to this event. This mechanism means that
+IT knows when a platform crashes even when there is a hard failure on the host.
+
+The Intel AMT Watchdog is composed of two parts:
+	1) Firmware feature - receives the heartbeats
+	   and sends an event when the heartbeats stop.
+	2) Intel MEI iAMT watchdog driver - connects to the watchdog feature,
+	   configures the watchdog and sends the heartbeats.
+
+The Intel iAMT watchdog MEI driver uses the kernel watchdog API to configure
+the Intel AMT Watchdog and to send heartbeats to it. The default timeout of the
+watchdog is 120 seconds.
+
+If the Intel AMT is not enabled in the firmware then the watchdog client won't enumerate
+on the me client bus and watchdog devices won't be exposed.
+
+---
+linux-mei@linux.intel.com
diff --git a/Documentation/driver-api/mei/index.rst b/Documentation/driver-api/mei/index.rst
index 35c1117d8366..d261afac6852 100644
--- a/Documentation/driver-api/mei/index.rst
+++ b/Documentation/driver-api/mei/index.rst
@@ -20,3 +20,4 @@ Intel(R) Management Engine Interface (Intel(R) MEI)
 
    mei
    mei-client-bus
+   iamt
diff --git a/Documentation/driver-api/mei/mei.rst b/Documentation/driver-api/mei/mei.rst
index 5aa3a5e6496a..c7f10a4b46ff 100644
--- a/Documentation/driver-api/mei/mei.rst
+++ b/Documentation/driver-api/mei/mei.rst
@@ -17,33 +17,6 @@ Each Intel ME feature (Intel ME Client) is addressed by a GUID/UUID and
 each client has its own protocol. The protocol is message-based with a
 header and payload up to 512 bytes.
 
-Prominent usage of the Intel ME Interface is to communicate with Intel(R)
-Active Management Technology (Intel AMT) implemented in firmware running on
-the Intel ME.
-
-Intel AMT provides the ability to manage a host remotely out-of-band (OOB)
-even when the operating system running on the host processor has crashed or
-is in a sleep state.
-
-Some examples of Intel AMT usage are:
-   - Monitoring hardware state and platform components
-   - Remote power off/on (useful for green computing or overnight IT
-     maintenance)
-   - OS updates
-   - Storage of useful platform information such as software assets
-   - Built-in hardware KVM
-   - Selective network isolation of Ethernet and IP protocol flows based
-     on policies set by a remote management console
-   - IDE device redirection from remote management console
-
-Intel AMT (OOB) communication is based on SOAP (deprecated
-starting with Release 6.0) over HTTP/S or WS-Management protocol over
-HTTP/S that are received from a remote management console application.
-
-For more information about Intel AMT:
-http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
-
-
 Intel MEI Driver
 ================
 
@@ -169,82 +142,9 @@ The Intel MEI Driver supports the following IOCTL commands:
 	in order to receive an event
 
 
-Intel ME Applications
-=====================
-
-	1) Intel Local Management Service (Intel LMS)
-
-	   Applications running locally on the platform communicate with Intel AMT Release
-	   2.0 and later releases in the same way that network applications do via SOAP
-	   over HTTP (deprecated starting with Release 6.0) or with WS-Management over
-	   SOAP over HTTP. This means that some Intel AMT features can be accessed from a
-	   local application using the same network interface as a remote application
-	   communicating with Intel AMT over the network.
-
-	   When a local application sends a message addressed to the local Intel AMT host
-	   name, the Intel LMS, which listens for traffic directed to the host name,
-	   intercepts the message and routes it to the Intel MEI.
-	   For more information:
-	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
-	   Under "About Intel AMT" => "Local Access"
-
-	   For downloading Intel LMS:
-	   http://software.intel.com/en-us/articles/download-the-latest-intel-amt-open-source-drivers/
-
-	   The Intel LMS opens a connection using the Intel MEI driver to the Intel LMS
-	   firmware feature using a defined UUID and then communicates with the feature
-	   using a protocol called Intel AMT Port Forwarding Protocol (Intel APF protocol).
-	   The protocol is used to maintain multiple sessions with Intel AMT from a
-	   single application.
-
-	   See the protocol specification in the Intel AMT Software Development Kit (SDK)
-	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
-	   Under "SDK Resources" => "Intel(R) vPro(TM) Gateway (MPS)"
-	   => "Information for Intel(R) vPro(TM) Gateway Developers"
-	   => "Description of the Intel AMT Port Forwarding (APF) Protocol"
-
-	2) Intel AMT Remote configuration using a Local Agent
-
-	   A Local Agent enables IT personnel to configure Intel AMT out-of-the-box
-	   without requiring installing additional data to enable setup. The remote
-	   configuration process may involve an ISV-developed remote configuration
-	   agent that runs on the host.
-	   For more information:
-	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
-	   Under "Setup and Configuration of Intel AMT" =>
-	   "SDK Tools Supporting Setup and Configuration" =>
-	   "Using the Local Agent Sample"
-
-	   An open source Intel AMT configuration utility,	implementing a local agent
-	   that accesses the Intel MEI driver, can be found here:
-	   http://software.intel.com/en-us/articles/download-the-latest-intel-amt-open-source-drivers/
-
-
-Intel AMT OS Health Watchdog
-============================
-
-The Intel AMT Watchdog is an OS Health (Hang/Crash) watchdog.
-Whenever the OS hangs or crashes, Intel AMT will send an event
-to any subscriber to this event. This mechanism means that
-IT knows when a platform crashes even when there is a hard failure on the host.
-
-The Intel AMT Watchdog is composed of two parts:
-	1) Firmware feature - receives the heartbeats
-	   and sends an event when the heartbeats stop.
-	2) Intel MEI iAMT watchdog driver - connects to the watchdog feature,
-	   configures the watchdog and sends the heartbeats.
-
-The Intel iAMT watchdog MEI driver uses the kernel watchdog API to configure
-the Intel AMT Watchdog and to send heartbeats to it. The default timeout of the
-watchdog is 120 seconds.
-
-If the Intel AMT is not enabled in the firmware then the watchdog client won't enumerate
-on the me client bus and watchdog devices won't be exposed.
 
 Supported Chipsets
 ==================
 82X38/X48 Express and newer
 
-
----
 linux-mei@linux.intel.com
-- 
cgit 


From 6080e0cff2bf7108d3f2855a7177b1f7f1830035 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 3 Jun 2019 12:14:03 +0300
Subject: mei: docs: update mei client bus documentation.

The mei client bus API has changed significantly from
time it was documented, and had required update.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/mei/mei-client-bus.rst | 162 +++++++++++++-----------
 1 file changed, 85 insertions(+), 77 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/mei/mei-client-bus.rst b/Documentation/driver-api/mei/mei-client-bus.rst
index a26a85453bdf..7310dd45c484 100644
--- a/Documentation/driver-api/mei/mei-client-bus.rst
+++ b/Documentation/driver-api/mei/mei-client-bus.rst
@@ -8,13 +8,13 @@ Intel(R) Management Engine (ME) Client bus API
 Rationale
 =========
 
-MEI misc character device is useful for dedicated applications to send and receive
+The MEI character device is useful for dedicated applications to send and receive
 data to the many FW appliance found in Intel's ME from the user space.
-However for some of the ME functionalities it make sense to leverage existing software
+However, for some of the ME functionalities it makes sense to leverage existing software
 stack and expose them through existing kernel subsystems.
 
 In order to plug seamlessly into the kernel device driver model we add kernel virtual
-bus abstraction on top of the MEI driver. This allows implementing linux kernel drivers
+bus abstraction on top of the MEI driver. This allows implementing Linux kernel drivers
 for the various MEI features as a stand alone entities found in their respective subsystem.
 Existing device drivers can even potentially be re-used by adding an MEI CL bus layer to
 the existing code.
@@ -23,9 +23,9 @@ the existing code.
 MEI CL bus API
 ==============
 
-A driver implementation for an MEI Client is very similar to existing bus
+A driver implementation for an MEI Client is very similar to any other existing bus
 based device drivers. The driver registers itself as an MEI CL bus driver through
-the ``struct mei_cl_driver`` structure:
+the ``struct mei_cl_driver`` structure defined in :file:`include/linux/mei_cl_bus.c`
 
 .. code-block:: C
 
@@ -39,25 +39,38 @@ the ``struct mei_cl_driver`` structure:
                 int (*remove)(struct mei_cl_device *dev);
         };
 
-        struct mei_cl_id {
-                char name[MEI_NAME_SIZE];
+
+
+The mei_cl_device_id structure defined in :file:`include/linux/mod_devicetable.h` allows a
+driver to bind itself against a device name.
+
+.. code-block:: C
+
+        struct mei_cl_device_id {
+                char name[MEI_CL_NAME_SIZE];
+                uuid_le uuid;
+                __u8    version;
                 kernel_ulong_t driver_info;
         };
 
-The mei_cl_id structure allows the driver to bind itself against a device name.
+To actually register a driver on the ME Client bus one must call the :c:func:`mei_cl_add_driver`
+API. This is typically called at module initialization time.
+
+Once the driver is registered and bound to the device, a driver will typically
+try to do some I/O on this bus and this should be done through the :c:func:`mei_cl_send`
+and :c:func:`mei_cl_recv` functions. More detailed information is in :ref:`api` section.
+
+In order for a driver to be notified about pending traffic or event, the driver
+should register a callback via :c:func:`mei_cl_devev_register_rx_cb` and
+:c:func:`mei_cldev_register_notify_cb` function respectively.
 
-To actually register a driver on the ME Client bus one must call the mei_cl_add_driver()
-API. This is typically called at module init time.
+.. _api:
+
+API:
+----
+.. kernel-doc:: drivers/misc/mei/bus.c
+    :export: drivers/misc/mei/bus.c
 
-Once registered on the ME Client bus, a driver will typically try to do some I/O on
-this bus and this should be done through the mei_cl_send() and mei_cl_recv()
-routines. The latter is synchronous (blocks and sleeps until data shows up).
-In order for drivers to be notified of pending events waiting for them (e.g.
-an Rx event) they can register an event handler through the
-mei_cl_register_event_cb() routine. Currently only the MEI_EVENT_RX event
-will trigger an event handler call and the driver implementation is supposed
-to call mei_recv() from the event handler in order to fetch the pending
-received buffers.
 
 
 Example
@@ -68,85 +81,80 @@ The driver init and exit routines for this device would look like:
 
 .. code-block:: C
 
-	#define CONTACT_DRIVER_NAME "contact"
+        #define CONTACT_DRIVER_NAME "contact"
 
-	static struct mei_cl_device_id contact_mei_cl_tbl[] = {
-		{ CONTACT_DRIVER_NAME, },
+        static struct mei_cl_device_id contact_mei_cl_tbl[] = {
+                { CONTACT_DRIVER_NAME, },
 
-		/* required last entry */
-		{ }
-	};
-	MODULE_DEVICE_TABLE(mei_cl, contact_mei_cl_tbl);
+                /* required last entry */
+                { }
+        };
+        MODULE_DEVICE_TABLE(mei_cl, contact_mei_cl_tbl);
 
-	static struct mei_cl_driver contact_driver = {
-		.id_table = contact_mei_tbl,
-		.name = CONTACT_DRIVER_NAME,
+        static struct mei_cl_driver contact_driver = {
+                .id_table = contact_mei_tbl,
+                .name = CONTACT_DRIVER_NAME,
 
-		.probe = contact_probe,
-		.remove = contact_remove,
-	};
+                .probe = contact_probe,
+                .remove = contact_remove,
+        };
 
-	static int contact_init(void)
-	{
-		int r;
+        static int contact_init(void)
+        {
+                int r;
 
-		r = mei_cl_driver_register(&contact_driver);
-		if (r) {
-			pr_err(CONTACT_DRIVER_NAME ": driver registration failed\n");
-			return r;
-		}
+                r = mei_cl_driver_register(&contact_driver);
+                if (r) {
+                        pr_err(CONTACT_DRIVER_NAME ": driver registration failed\n");
+                        return r;
+                }
 
-		return 0;
-	}
+                return 0;
+        }
 
-	static void __exit contact_exit(void)
-	{
-		mei_cl_driver_unregister(&contact_driver);
-	}
+        static void __exit contact_exit(void)
+        {
+                mei_cl_driver_unregister(&contact_driver);
+        }
 
-	module_init(contact_init);
-	module_exit(contact_exit);
+        module_init(contact_init);
+        module_exit(contact_exit);
 
 And the driver's simplified probe routine would look like that:
 
 .. code-block:: C
 
-	int contact_probe(struct mei_cl_device *dev, struct mei_cl_device_id *id)
-	{
-		struct contact_driver *contact;
+        int contact_probe(struct mei_cl_device *dev, struct mei_cl_device_id *id)
+        {
+                [...]
+                mei_cldev_enable(dev);
 
-		[...]
-		mei_cl_enable_device(dev);
+                mei_cldev_register_rx_cb(dev, contact_rx_cb);
 
-		mei_cl_register_event_cb(dev, contact_event_cb, contact);
-
-		return 0;
-	}
+                return 0;
+        }
 
 In the probe routine the driver first enable the MEI device and then registers
-an ME bus event handler which is as close as it can get to registering a
-threaded IRQ handler.
-The handler implementation will typically call some I/O routine depending on
-the pending events:
-
-#define MAX_NFC_PAYLOAD 128
+an rx handler which is as close as it can get to registering a threaded IRQ handler.
+The handler implementation will typically call :c:func:`mei_cldev_recv` and then
+process received data.
 
 .. code-block:: C
 
-	static void contact_event_cb(struct mei_cl_device *dev, u32 events,
-				     void *context)
-	{
-		struct contact_driver *contact = context;
+        #define MAX_PAYLOAD 128
+        #define HDR_SIZE 4
+        static void conntact_rx_cb(struct mei_cl_device *cldev)
+        {
+                struct contact *c = mei_cldev_get_drvdata(cldev);
+                unsigned char payload[MAX_PAYLOAD];
+                ssize_t payload_sz;
+
+                payload_sz = mei_cldev_recv(cldev, payload,  MAX_PAYLOAD)
+                if (reply_size < HDR_SIZE) {
+                        return;
+                }
 
-		if (events & BIT(MEI_EVENT_RX)) {
-			u8 payload[MAX_NFC_PAYLOAD];
-			int payload_size;
+                c->process_rx(payload);
 
-			payload_size = mei_recv(dev, payload, MAX_NFC_PAYLOAD);
-			if (payload_size <= 0)
-				return;
+        }
 
-			/* Hook to the NFC subsystem */
-			nfc_hci_recv_frame(contact->hdev, payload, payload_size);
-		}
-	}
-- 
cgit 


From 4e3d3b784ae7cd86ace2776c01be99ddfd378801 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 3 Jun 2019 12:14:04 +0300
Subject: mei: docs: add a short description for nfc behind mei

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/mei/index.rst          |  2 +-
 Documentation/driver-api/mei/mei-client-bus.rst |  7 +++++++
 Documentation/driver-api/mei/nfc.rst            | 28 +++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/driver-api/mei/nfc.rst

(limited to 'Documentation')

diff --git a/Documentation/driver-api/mei/index.rst b/Documentation/driver-api/mei/index.rst
index d261afac6852..3a22b522ee78 100644
--- a/Documentation/driver-api/mei/index.rst
+++ b/Documentation/driver-api/mei/index.rst
@@ -16,7 +16,7 @@ Intel(R) Management Engine Interface (Intel(R) MEI)
         Table of Contents
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 3
 
    mei
    mei-client-bus
diff --git a/Documentation/driver-api/mei/mei-client-bus.rst b/Documentation/driver-api/mei/mei-client-bus.rst
index 7310dd45c484..bfe28ebc3ca8 100644
--- a/Documentation/driver-api/mei/mei-client-bus.rst
+++ b/Documentation/driver-api/mei/mei-client-bus.rst
@@ -158,3 +158,10 @@ process received data.
 
         }
 
+MEI Client Bus Drivers
+======================
+
+.. toctree::
+   :maxdepth: 2
+
+   nfc
diff --git a/Documentation/driver-api/mei/nfc.rst b/Documentation/driver-api/mei/nfc.rst
new file mode 100644
index 000000000000..b5b6fc96f85e
--- /dev/null
+++ b/Documentation/driver-api/mei/nfc.rst
@@ -0,0 +1,28 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+MEI NFC
+-------
+
+Some Intel 8 and 9 Serieses chipsets supports NFC devices connected behind
+the Intel Management Engine controller.
+MEI client bus exposes the NFC chips as NFC phy devices and enables
+binding with Microread and NXP PN544 NFC device driver from the Linux NFC
+subsystem.
+
+.. kernel-render:: DOT
+   :alt: MEI NFC digraph
+   :caption: **MEI NFC** Stack
+
+   digraph NFC {
+    cl_nfc -> me_cl_nfc;
+    "drivers/nfc/mei_phy" -> cl_nfc [lhead=bus];
+    "drivers/nfc/microread/mei" -> cl_nfc;
+    "drivers/nfc/microread/mei" -> "drivers/nfc/mei_phy";
+    "drivers/nfc/pn544/mei" -> cl_nfc;
+    "drivers/nfc/pn544/mei" -> "drivers/nfc/mei_phy";
+    "net/nfc" -> "drivers/nfc/microread/mei";
+    "net/nfc" -> "drivers/nfc/pn544/mei";
+    "neard" -> "net/nfc";
+    cl_nfc [label="mei/bus(nfc)"];
+    me_cl_nfc [label="me fw (nfc)"];
+   }
-- 
cgit 


From 0475afd2a5dee99defdb7b030c09ba202ea3c64a Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 3 Jun 2019 12:14:05 +0300
Subject: mei: docs: add hdcp documentation

1. Add a short ducumentation for MEI HDCP driver,
and fix DOC comments in drivers/misc/mei/hdcp/mei_hdcp.c

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/mei/hdcp.rst           | 32 +++++++++++++++++++++++++
 Documentation/driver-api/mei/mei-client-bus.rst |  1 +
 2 files changed, 33 insertions(+)
 create mode 100644 Documentation/driver-api/mei/hdcp.rst

(limited to 'Documentation')

diff --git a/Documentation/driver-api/mei/hdcp.rst b/Documentation/driver-api/mei/hdcp.rst
new file mode 100644
index 000000000000..e85a065b1cdc
--- /dev/null
+++ b/Documentation/driver-api/mei/hdcp.rst
@@ -0,0 +1,32 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+HDCP:
+=====
+
+ME FW as a security engine provides the capability for setting up
+HDCP2.2 protocol negotiation between the Intel graphics device and
+an HDC2.2 sink.
+
+ME FW prepares HDCP2.2 negotiation parameters, signs and encrypts them
+according the HDCP 2.2 spec. The Intel graphics sends the created blob
+to the HDCP2.2 sink.
+
+Similarly, the HDCP2.2 sink's response is transferred to ME FW
+for decryption and verification.
+
+Once all the steps of HDCP2.2 negotiation are completed,
+upon request ME FW will configure the port as authenticated and supply
+the HDCP encryption keys to Intel graphics hardware.
+
+
+mei_hdcp driver
+---------------
+.. kernel-doc:: drivers/misc/mei/hdcp/mei_hdcp.c
+    :doc: MEI_HDCP Client Driver
+
+mei_hdcp api
+------------
+
+.. kernel-doc:: drivers/misc/mei/hdcp/mei_hdcp.c
+    :functions:
+
diff --git a/Documentation/driver-api/mei/mei-client-bus.rst b/Documentation/driver-api/mei/mei-client-bus.rst
index bfe28ebc3ca8..f242b3f8d6aa 100644
--- a/Documentation/driver-api/mei/mei-client-bus.rst
+++ b/Documentation/driver-api/mei/mei-client-bus.rst
@@ -164,4 +164,5 @@ MEI Client Bus Drivers
 .. toctree::
    :maxdepth: 2
 
+   hdcp
    nfc
-- 
cgit 


From 7e706da35a458f4b0d4c565c7b71023d8bfe279b Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 3 Jun 2019 12:14:06 +0300
Subject: mei: docs: fix broken links in iamt documentation.

The iAMT documentation moved from http:// https://,
and LMS is moved to github.com

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/mei/iamt.rst | 105 ++++++++++++++++------------------
 1 file changed, 50 insertions(+), 55 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/mei/iamt.rst b/Documentation/driver-api/mei/iamt.rst
index 6dcf5b16e958..6ef3e613684b 100644
--- a/Documentation/driver-api/mei/iamt.rst
+++ b/Documentation/driver-api/mei/iamt.rst
@@ -27,62 +27,57 @@ starting with Release 6.0) over HTTP/S or WS-Management protocol over
 HTTP/S that are received from a remote management console application.
 
 For more information about Intel AMT:
-http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
+https://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide/default.htm
 
 
 Intel AMT Applications
-======================
-
-	1) Intel Local Management Service (Intel LMS)
-
-	   Applications running locally on the platform communicate with Intel AMT Release
-	   2.0 and later releases in the same way that network applications do via SOAP
-	   over HTTP (deprecated starting with Release 6.0) or with WS-Management over
-	   SOAP over HTTP. This means that some Intel AMT features can be accessed from a
-	   local application using the same network interface as a remote application
-	   communicating with Intel AMT over the network.
-
-	   When a local application sends a message addressed to the local Intel AMT host
-	   name, the Intel LMS, which listens for traffic directed to the host name,
-	   intercepts the message and routes it to the Intel MEI.
-	   For more information:
-	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
-	   Under "About Intel AMT" => "Local Access"
-
-	   For downloading Intel LMS:
-	   http://software.intel.com/en-us/articles/download-the-latest-intel-amt-open-source-drivers/
-
-	   The Intel LMS opens a connection using the Intel MEI driver to the Intel LMS
-	   firmware feature using a defined UUID and then communicates with the feature
-	   using a protocol called Intel AMT Port Forwarding Protocol (Intel APF protocol).
-	   The protocol is used to maintain multiple sessions with Intel AMT from a
-	   single application.
-
-	   See the protocol specification in the Intel AMT Software Development Kit (SDK)
-	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
-	   Under "SDK Resources" => "Intel(R) vPro(TM) Gateway (MPS)"
-	   => "Information for Intel(R) vPro(TM) Gateway Developers"
-	   => "Description of the Intel AMT Port Forwarding (APF) Protocol"
-
-	2) Intel AMT Remote configuration using a Local Agent
-
-	   A Local Agent enables IT personnel to configure Intel AMT out-of-the-box
-	   without requiring installing additional data to enable setup. The remote
-	   configuration process may involve an ISV-developed remote configuration
-	   agent that runs on the host.
-	   For more information:
-	   http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
-	   Under "Setup and Configuration of Intel AMT" =>
-	   "SDK Tools Supporting Setup and Configuration" =>
-	   "Using the Local Agent Sample"
-
-	   An open source Intel AMT configuration utility,	implementing a local agent
-	   that accesses the Intel MEI driver, can be found here:
-	   http://software.intel.com/en-us/articles/download-the-latest-intel-amt-open-source-drivers/
-
+----------------------
+
+    1) Intel Local Management Service (Intel LMS)
+
+       Applications running locally on the platform communicate with Intel AMT Release
+       2.0 and later releases in the same way that network applications do via SOAP
+       over HTTP (deprecated starting with Release 6.0) or with WS-Management over
+       SOAP over HTTP. This means that some Intel AMT features can be accessed from a
+       local application using the same network interface as a remote application
+       communicating with Intel AMT over the network.
+
+       When a local application sends a message addressed to the local Intel AMT host
+       name, the Intel LMS, which listens for traffic directed to the host name,
+       intercepts the message and routes it to the Intel MEI.
+       For more information:
+       https://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide/default.htm
+       Under "About Intel AMT" => "Local Access"
+
+       For downloading Intel LMS:
+       https://github.com/intel/lms
+
+       The Intel LMS opens a connection using the Intel MEI driver to the Intel LMS
+       firmware feature using a defined GUID and then communicates with the feature
+       using a protocol called Intel AMT Port Forwarding Protocol (Intel APF protocol).
+       The protocol is used to maintain multiple sessions with Intel AMT from a
+       single application.
+
+       See the protocol specification in the Intel AMT Software Development Kit (SDK)
+       https://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide/default.htm
+       Under "SDK Resources" => "Intel(R) vPro(TM) Gateway (MPS)"
+       => "Information for Intel(R) vPro(TM) Gateway Developers"
+       => "Description of the Intel AMT Port Forwarding (APF) Protocol"
+
+    2) Intel AMT Remote configuration using a Local Agent
+
+       A Local Agent enables IT personnel to configure Intel AMT out-of-the-box
+       without requiring installing additional data to enable setup. The remote
+       configuration process may involve an ISV-developed remote configuration
+       agent that runs on the host.
+       For more information:
+       https://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide/default.htm
+       Under "Setup and Configuration of Intel AMT" =>
+       "SDK Tools Supporting Setup and Configuration" =>
+       "Using the Local Agent Sample"
 
 Intel AMT OS Health Watchdog
-============================
+----------------------------
 
 The Intel AMT Watchdog is an OS Health (Hang/Crash) watchdog.
 Whenever the OS hangs or crashes, Intel AMT will send an event
@@ -90,10 +85,10 @@ to any subscriber to this event. This mechanism means that
 IT knows when a platform crashes even when there is a hard failure on the host.
 
 The Intel AMT Watchdog is composed of two parts:
-	1) Firmware feature - receives the heartbeats
-	   and sends an event when the heartbeats stop.
-	2) Intel MEI iAMT watchdog driver - connects to the watchdog feature,
-	   configures the watchdog and sends the heartbeats.
+    1) Firmware feature - receives the heartbeats
+       and sends an event when the heartbeats stop.
+    2) Intel MEI iAMT watchdog driver - connects to the watchdog feature,
+       configures the watchdog and sends the heartbeats.
 
 The Intel iAMT watchdog MEI driver uses the kernel watchdog API to configure
 the Intel AMT Watchdog and to send heartbeats to it. The default timeout of the
-- 
cgit 


From d0a178095c5fbbd25454c20e49bc3a7d70ecb769 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 6 Jun 2019 16:31:08 +0300
Subject: mei: docs: update mei documentation

The mei driver went via multiple changes, update
the documentation and fix formatting.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/mei/mei.rst | 96 +++++++++++++++++++++++-------------
 1 file changed, 61 insertions(+), 35 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/mei/mei.rst b/Documentation/driver-api/mei/mei.rst
index c7f10a4b46ff..c800d8e5f422 100644
--- a/Documentation/driver-api/mei/mei.rst
+++ b/Documentation/driver-api/mei/mei.rst
@@ -5,34 +5,32 @@ Introduction
 
 The Intel Management Engine (Intel ME) is an isolated and protected computing
 resource (Co-processor) residing inside certain Intel chipsets. The Intel ME
-provides support for computer/IT management features. The feature set
-depends on the Intel chipset SKU.
+provides support for computer/IT management and security features.
+The actual feature set depends on the Intel chipset SKU.
 
 The Intel Management Engine Interface (Intel MEI, previously known as HECI)
 is the interface between the Host and Intel ME. This interface is exposed
-to the host as a PCI device. The Intel MEI Driver is in charge of the
-communication channel between a host application and the Intel ME feature.
+to the host as a PCI device, actually multiple PCI devices might be exposed.
+The Intel MEI Driver is in charge of the communication channel between
+a host application and the Intel ME features.
 
-Each Intel ME feature (Intel ME Client) is addressed by a GUID/UUID and
+Each Intel ME feature, or Intel ME Client is addressed by a unique GUID and
 each client has its own protocol. The protocol is message-based with a
-header and payload up to 512 bytes.
+header and payload up to maximal number of bytes advertised by the client,
+upon connection.
 
 Intel MEI Driver
 ================
 
-The driver exposes a misc device called /dev/mei.
+The driver exposes a character device with device nodes /dev/meiX.
 
 An application maintains communication with an Intel ME feature while
-/dev/mei is open. The binding to a specific feature is performed by calling
-MEI_CONNECT_CLIENT_IOCTL, which passes the desired UUID.
+/dev/meiX is open. The binding to a specific feature is performed by calling
+:c:macro:`MEI_CONNECT_CLIENT_IOCTL`, which passes the desired GUID.
 The number of instances of an Intel ME feature that can be opened
 at the same time depends on the Intel ME feature, but most of the
 features allow only a single instance.
 
-The Intel AMT Host Interface (Intel AMTHI) feature supports multiple
-simultaneous user connected applications. The Intel MEI driver
-handles this internally by maintaining request queues for the applications.
-
 The driver is transparent to data that are passed between firmware feature
 and host application.
 
@@ -40,6 +38,8 @@ Because some of the Intel ME features can change the system
 configuration, the driver by default allows only a privileged
 user to access it.
 
+The session is terminated calling :c:func:`close(int fd)`.
+
 A code snippet for an application communicating with Intel AMTHI client:
 
 .. code-block:: C
@@ -47,13 +47,13 @@ A code snippet for an application communicating with Intel AMTHI client:
 	struct mei_connect_client_data data;
 	fd = open(MEI_DEVICE);
 
-	data.d.in_client_uuid = AMTHI_UUID;
+	data.d.in_client_uuid = AMTHI_GUID;
 
 	ioctl(fd, IOCTL_MEI_CONNECT_CLIENT, &data);
 
 	printf("Ver=%d, MaxLen=%ld\n",
-			data.d.in_client_uuid.protocol_version,
-			data.d.in_client_uuid.max_msg_length);
+	       data.d.in_client_uuid.protocol_version,
+	       data.d.in_client_uuid.max_msg_length);
 
 	[...]
 
@@ -67,60 +67,86 @@ A code snippet for an application communicating with Intel AMTHI client:
 	close(fd);
 
 
-IOCTLs
-======
+User space API
+
+IOCTLs:
+=======
 
 The Intel MEI Driver supports the following IOCTL commands:
-	IOCTL_MEI_CONNECT_CLIENT	Connect to firmware Feature (client).
 
-	usage:
-		struct mei_connect_client_data clientData;
-		ioctl(fd, IOCTL_MEI_CONNECT_CLIENT, &clientData);
+IOCTL_MEI_CONNECT_CLIENT
+-------------------------
+Connect to firmware Feature/Client.
+
+.. code-block:: none
+
+	Usage:
 
-	inputs:
-		mei_connect_client_data struct contain the following
-		input field:
+        struct mei_connect_client_data client_data;
 
-		in_client_uuid -	UUID of the FW Feature that needs
+        ioctl(fd, IOCTL_MEI_CONNECT_CLIENT, &client_data);
+
+	Inputs:
+
+        struct mei_connect_client_data - contain the following
+	Input field:
+
+		in_client_uuid -	GUID of the FW Feature that needs
 					to connect to.
-	outputs:
+         Outputs:
 		out_client_properties - Client Properties: MTU and Protocol Version.
 
-	error returns:
+         Error returns:
+
+                ENOTTY  No such client (i.e. wrong GUID) or connection is not allowed.
 		EINVAL	Wrong IOCTL Number
-		ENODEV	Device or Connection is not initialized or ready. (e.g. Wrong UUID)
+		ENODEV	Device or Connection is not initialized or ready.
 		ENOMEM	Unable to allocate memory to client internal data.
 		EFAULT	Fatal Error (e.g. Unable to access user input data)
 		EBUSY	Connection Already Open
 
-	Notes:
+:Note:
         max_msg_length (MTU) in client properties describes the maximum
         data that can be sent or received. (e.g. if MTU=2K, can send
         requests up to bytes 2k and received responses up to 2k bytes).
 
-	IOCTL_MEI_NOTIFY_SET: enable or disable event notifications
+
+IOCTL_MEI_NOTIFY_SET
+---------------------
+Enable or disable event notifications.
+
+
+.. code-block:: none
 
 	Usage:
+
 		uint32_t enable;
+
 		ioctl(fd, IOCTL_MEI_NOTIFY_SET, &enable);
 
-	Inputs:
+
 		uint32_t enable = 1;
 		or
 		uint32_t enable[disable] = 0;
 
 	Error returns:
+
+
 		EINVAL	Wrong IOCTL Number
 		ENODEV	Device  is not initialized or the client not connected
 		ENOMEM	Unable to allocate memory to client internal data.
 		EFAULT	Fatal Error (e.g. Unable to access user input data)
 		EOPNOTSUPP if the device doesn't support the feature
 
-	Notes:
+:Note:
 	The client must be connected in order to enable notification events
 
 
-	IOCTL_MEI_NOTIFY_GET : retrieve event
+IOCTL_MEI_NOTIFY_GET
+--------------------
+Retrieve event
+
+.. code-block:: none
 
 	Usage:
 		uint32_t event;
@@ -137,7 +163,7 @@ The Intel MEI Driver supports the following IOCTL commands:
 		EFAULT	Fatal Error (e.g. Unable to access user input data)
 		EOPNOTSUPP if the device doesn't support the feature
 
-	Notes:
+:Note:
 	The client must be connected and event notification has to be enabled
 	in order to receive an event
 
-- 
cgit 


From c2a6ea23a40152a03d6c605d6f0ea239b11605b8 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Fri, 14 Jun 2019 15:32:18 +0100
Subject: dt-bindings: fsl: scu: add ocotp binding

NXP i.MX8QXP is an ARMv8 SoC with a Cortex-M4 core inside as
system controller(SCU), the ocotp controller is being controlled
by the SCU, so Linux need use RPC to SCU for ocotp handling. This
patch adds binding doc for i.MX8 SCU OCOTP driver.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Anson Huang <anson.huang@nxp.com>
Cc: devicetree@vger.kernel.org
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/arm/freescale/fsl,scu.txt  | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
index 5d7dbabbb784..f378922906f6 100644
--- a/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
@@ -133,6 +133,18 @@ RTC bindings based on SCU Message Protocol
 Required properties:
 - compatible: should be "fsl,imx8qxp-sc-rtc";
 
+OCOTP bindings based on SCU Message Protocol
+------------------------------------------------------------
+Required properties:
+- compatible:		Should be "fsl,imx8qxp-scu-ocotp"
+- #address-cells:	Must be 1. Contains byte index
+- #size-cells:		Must be 1. Contains byte length
+
+Optional Child nodes:
+
+- Data cells of ocotp:
+  Detailed bindings are described in bindings/nvmem/nvmem.txt
+
 Example (imx8qxp):
 -------------
 aliases {
@@ -177,6 +189,16 @@ firmware {
 			...
 		};
 
+		ocotp: imx8qx-ocotp {
+			compatible = "fsl,imx8qxp-scu-ocotp";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			fec_mac0: mac@2c4 {
+				reg = <0x2c4 8>;
+			};
+		};
+
 		pd: imx8qx-pd {
 			compatible = "fsl,imx8qxp-scu-pd", "fsl,scu-pd";
 			#power-domain-cells = <1>;
-- 
cgit 


From 42c3dcedc6b76b1383e1b63be82b84c058b5a8ad Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Fri, 14 Jun 2019 15:32:21 +0100
Subject: dt-bindings: nvmem: Convert Allwinner SID to a schema

The Allwinner SoCs have an efuse supported in Linux, with a matching Device
Tree binding.

Now that we have the DT validation in place, let's convert the device tree
bindings for that controller over to a YAML schemas.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../bindings/nvmem/allwinner,sun4i-a10-sid.yaml    | 51 ++++++++++++++++++++++
 .../bindings/nvmem/allwinner,sunxi-sid.txt         | 29 ------------
 2 files changed, 51 insertions(+), 29 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml
 delete mode 100644 Documentation/devicetree/bindings/nvmem/allwinner,sunxi-sid.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml b/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml
new file mode 100644
index 000000000000..c9efd6e2c134
--- /dev/null
+++ b/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/nvmem/allwinner,sun4i-a10-sid.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Security ID Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+allOf:
+  - $ref: "nvmem.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-sid
+      - allwinner,sun7i-a20-sid
+      - allwinner,sun8i-a83t-sid
+      - allwinner,sun8i-h3-sid
+      - allwinner,sun50i-a64-sid
+      - allwinner,sun50i-h5-sid
+      - allwinner,sun50i-h6-sid
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+# FIXME: We should set it, but it would report all the generic
+# properties as additional properties.
+# additionalProperties: false
+
+examples:
+  - |
+    sid@1c23800 {
+        compatible = "allwinner,sun4i-a10-sid";
+        reg = <0x01c23800 0x10>;
+    };
+
+  - |
+    sid@1c23800 {
+        compatible = "allwinner,sun7i-a20-sid";
+        reg = <0x01c23800 0x200>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/nvmem/allwinner,sunxi-sid.txt b/Documentation/devicetree/bindings/nvmem/allwinner,sunxi-sid.txt
deleted file mode 100644
index cfb18b4ef8f7..000000000000
--- a/Documentation/devicetree/bindings/nvmem/allwinner,sunxi-sid.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-Allwinner sunxi-sid
-
-Required properties:
-- compatible: Should be one of the following:
-  "allwinner,sun4i-a10-sid"
-  "allwinner,sun7i-a20-sid"
-  "allwinner,sun8i-a83t-sid"
-  "allwinner,sun8i-h3-sid"
-  "allwinner,sun50i-a64-sid"
-  "allwinner,sun50i-h5-sid"
-  "allwinner,sun50i-h6-sid"
-
-- reg: Should contain registers location and length
-
-= Data cells =
-Are child nodes of sunxi-sid, bindings of which as described in
-bindings/nvmem/nvmem.txt
-
-Example for sun4i:
-	sid@1c23800 {
-		compatible = "allwinner,sun4i-a10-sid";
-		reg = <0x01c23800 0x10>
-	};
-
-Example for sun7i:
-	sid@1c23800 {
-		compatible = "allwinner,sun7i-a20-sid";
-		reg = <0x01c23800 0x200>
-	};
-- 
cgit 


From ae1c6b9aa34be021555e1234a271d56108caa82e Mon Sep 17 00:00:00 2001
From: Pankaj Bansal <pankaj.bansal@nxp.com>
Date: Wed, 12 Jun 2019 08:52:56 +0000
Subject: dt-bindings: add register based devices' mux controller DT bindings

This adds device tree binding documentation for generic register based
multiplexer controlled by a bitfields in a parent device's register range.

since MMIO mux is a special case of generic register based mux, the
MMIO mux bindings have been subsumed in these bindings.

Signed-off-by: Pankaj Bansal <pankaj.bansal@nxp.com>
Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/mux/mmio-mux.txt |  60 ----------
 Documentation/devicetree/bindings/mux/reg-mux.txt  | 129 +++++++++++++++++++++
 2 files changed, 129 insertions(+), 60 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/mux/mmio-mux.txt
 create mode 100644 Documentation/devicetree/bindings/mux/reg-mux.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mux/mmio-mux.txt b/Documentation/devicetree/bindings/mux/mmio-mux.txt
deleted file mode 100644
index a9bfb4d8b6ac..000000000000
--- a/Documentation/devicetree/bindings/mux/mmio-mux.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-MMIO register bitfield-based multiplexer controller bindings
-
-Define register bitfields to be used to control multiplexers. The parent
-device tree node must be a syscon node to provide register access.
-
-Required properties:
-- compatible : "mmio-mux"
-- #mux-control-cells : <1>
-- mux-reg-masks : an array of register offset and pre-shifted bitfield mask
-                  pairs, each describing a single mux control.
-* Standard mux-controller bindings as decribed in mux-controller.txt
-
-Optional properties:
-- idle-states : if present, the state the muxes will have when idle. The
-		special state MUX_IDLE_AS_IS is the default.
-
-The multiplexer state of each multiplexer is defined as the value of the
-bitfield described by the corresponding register offset and bitfield mask pair
-in the mux-reg-masks array, accessed through the parent syscon.
-
-Example:
-
-	syscon {
-		compatible = "syscon";
-
-		mux: mux-controller {
-			compatible = "mmio-mux";
-			#mux-control-cells = <1>;
-
-			mux-reg-masks = <0x3 0x30>, /* 0: reg 0x3, bits 5:4 */
-					<0x3 0x40>, /* 1: reg 0x3, bit 6 */
-			idle-states = <MUX_IDLE_AS_IS>, <0>;
-		};
-	};
-
-	video-mux {
-		compatible = "video-mux";
-		mux-controls = <&mux 0>;
-
-		ports {
-			/* inputs 0..3 */
-			port@0 {
-				reg = <0>;
-			};
-			port@1 {
-				reg = <1>;
-			};
-			port@2 {
-				reg = <2>;
-			};
-			port@3 {
-				reg = <3>;
-			};
-
-			/* output */
-			port@4 {
-				reg = <4>;
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/mux/reg-mux.txt b/Documentation/devicetree/bindings/mux/reg-mux.txt
new file mode 100644
index 000000000000..4afd7ba73d60
--- /dev/null
+++ b/Documentation/devicetree/bindings/mux/reg-mux.txt
@@ -0,0 +1,129 @@
+Generic register bitfield-based multiplexer controller bindings
+
+Define register bitfields to be used to control multiplexers. The parent
+device tree node must be a device node to provide register r/w access.
+
+Required properties:
+- compatible : should be one of
+	"reg-mux" : if parent device of mux controller is not syscon device
+	"mmio-mux" : if parent device of mux controller is syscon device
+- #mux-control-cells : <1>
+- mux-reg-masks : an array of register offset and pre-shifted bitfield mask
+                  pairs, each describing a single mux control.
+* Standard mux-controller bindings as decribed in mux-controller.txt
+
+Optional properties:
+- idle-states : if present, the state the muxes will have when idle. The
+		special state MUX_IDLE_AS_IS is the default.
+
+The multiplexer state of each multiplexer is defined as the value of the
+bitfield described by the corresponding register offset and bitfield mask
+pair in the mux-reg-masks array.
+
+Example 1:
+The parent device of mux controller is not a syscon device.
+
+&i2c0 {
+	fpga@66 { // fpga connected to i2c
+		compatible = "fsl,lx2160aqds-fpga", "fsl,fpga-qixis-i2c",
+			     "simple-mfd";
+		reg = <0x66>;
+
+		mux: mux-controller {
+			compatible = "reg-mux";
+			#mux-control-cells = <1>;
+			mux-reg-masks = <0x54 0xf8>, /* 0: reg 0x54, bits 7:3 */
+					<0x54 0x07>; /* 1: reg 0x54, bits 2:0 */
+		};
+	};
+};
+
+mdio-mux-1 {
+	compatible = "mdio-mux-multiplexer";
+	mux-controls = <&mux 0>;
+	mdio-parent-bus = <&emdio1>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	mdio@0 {
+		reg = <0x0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+	};
+
+	mdio@8 {
+		reg = <0x8>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+	};
+
+	..
+	..
+};
+
+mdio-mux-2 {
+	compatible = "mdio-mux-multiplexer";
+	mux-controls = <&mux 1>;
+	mdio-parent-bus = <&emdio2>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	mdio@0 {
+		reg = <0x0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+	};
+
+	mdio@1 {
+		reg = <0x1>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+	};
+
+	..
+	..
+};
+
+Example 2:
+The parent device of mux controller is syscon device.
+
+syscon {
+	compatible = "syscon";
+
+	mux: mux-controller {
+		compatible = "mmio-mux";
+		#mux-control-cells = <1>;
+
+		mux-reg-masks = <0x3 0x30>, /* 0: reg 0x3, bits 5:4 */
+				<0x3 0x40>, /* 1: reg 0x3, bit 6 */
+		idle-states = <MUX_IDLE_AS_IS>, <0>;
+	};
+};
+
+video-mux {
+	compatible = "video-mux";
+	mux-controls = <&mux 0>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	ports {
+		/* inputs 0..3 */
+		port@0 {
+			reg = <0>;
+		};
+		port@1 {
+			reg = <1>;
+		};
+		port@2 {
+			reg = <2>;
+		};
+		port@3 {
+			reg = <3>;
+		};
+
+		/* output */
+		port@4 {
+			reg = <4>;
+		};
+	};
+};
-- 
cgit 


From 7a82a01e5c850986d4b53dab52ac71294285586f Mon Sep 17 00:00:00 2001
From: Dragan Cvetic <dragan.cvetic@xilinx.com>
Date: Tue, 11 Jun 2019 18:29:35 +0100
Subject: dt-bindings: xilinx-sdfec: Add SDFEC binding

Add the Soft Decision Forward Error Correction (SDFEC) Engine
bindings which is available for the Zynq UltraScale+ RFSoC
FPGA's.

Signed-off-by: Dragan Cvetic <dragan.cvetic@xilinx.com>
Signed-off-by: Derek Kiernan <derek.kiernan@xilinx.com>
Reviewed-by: Rob Herring <robh@kernel.org>

 .../devicetree/bindings/misc/xlnx,sd-fec.txt       | 58 ++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/misc/xlnx,sd-fec.txt

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/misc/xlnx,sd-fec.txt       | 58 ++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/misc/xlnx,sd-fec.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/misc/xlnx,sd-fec.txt b/Documentation/devicetree/bindings/misc/xlnx,sd-fec.txt
new file mode 100644
index 000000000000..e3289634fa30
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/xlnx,sd-fec.txt
@@ -0,0 +1,58 @@
+* Xilinx SDFEC(16nm) IP *
+
+The Soft Decision Forward Error Correction (SDFEC) Engine is a Hard IP block
+which provides high-throughput LDPC and Turbo Code implementations.
+The LDPC decode & encode functionality is capable of covering a range of
+customer specified Quasi-cyclic (QC) codes. The Turbo decode functionality
+principally covers codes used by LTE. The FEC Engine offers significant
+power and area savings versus implementations done in the FPGA fabric.
+
+
+Required properties:
+- compatible: Must be "xlnx,sd-fec-1.1"
+- clock-names : List of input clock names from the following:
+    - "core_clk", Main processing clock for processing core (required)
+    - "s_axi_aclk", AXI4-Lite memory-mapped slave interface clock (required)
+    - "s_axis_din_aclk", DIN AXI4-Stream Slave interface clock (optional)
+    - "s_axis_din_words-aclk", DIN_WORDS AXI4-Stream Slave interface clock (optional)
+    - "s_axis_ctrl_aclk",  Control input AXI4-Stream Slave interface clock (optional)
+    - "m_axis_dout_aclk", DOUT AXI4-Stream Master interface clock (optional)
+    - "m_axis_dout_words_aclk", DOUT_WORDS AXI4-Stream Master interface clock (optional)
+    - "m_axis_status_aclk", Status output AXI4-Stream Master interface clock (optional)
+- clocks : Clock phandles (see clock_bindings.txt for details).
+- reg: Should contain Xilinx SDFEC 16nm Hardened IP block registers
+  location and length.
+- xlnx,sdfec-code : Should contain "ldpc" or "turbo" to describe the codes
+  being used.
+- xlnx,sdfec-din-words : A value 0 indicates that the DIN_WORDS interface is
+  driven with a fixed value and is not present on the device, a value of 1
+  configures the DIN_WORDS to be block based, while a value of 2 configures the
+  DIN_WORDS input to be supplied for each AXI transaction.
+- xlnx,sdfec-din-width : Configures the DIN AXI stream where a value of 1
+  configures a width of "1x128b", 2 a width of "2x128b" and 4 configures a width
+  of "4x128b".
+- xlnx,sdfec-dout-words : A value 0 indicates that the DOUT_WORDS interface is
+  driven with a fixed value and is not present on the device, a value of 1
+  configures the DOUT_WORDS to be block based, while a value of 2 configures the
+  DOUT_WORDS input to be supplied for each AXI transaction.
+- xlnx,sdfec-dout-width : Configures the DOUT AXI stream where a value of 1
+  configures a width of "1x128b", 2 a width of "2x128b" and 4 configures a width
+  of "4x128b".
+Optional properties:
+- interrupts: should contain SDFEC interrupt number
+
+Example
+---------------------------------------
+	sd_fec_0: sd-fec@a0040000 {
+		compatible = "xlnx,sd-fec-1.1";
+		clock-names = "core_clk","s_axi_aclk","s_axis_ctrl_aclk","s_axis_din_aclk","m_axis_status_aclk","m_axis_dout_aclk";
+		clocks = <&misc_clk_2>,<&misc_clk_0>,<&misc_clk_1>,<&misc_clk_1>,<&misc_clk_1>, <&misc_clk_1>;
+		reg = <0x0 0xa0040000 0x0 0x40000>;
+		interrupt-parent = <&axi_intc>;
+		interrupts = <1 0>;
+		xlnx,sdfec-code = "ldpc";
+		xlnx,sdfec-din-words = <0>;
+		xlnx,sdfec-din-width = <2>;
+		xlnx,sdfec-dout-words = <0>;
+		xlnx,sdfec-dout-width = <1>;
+	};
-- 
cgit 


From 22d137e283e63a62414464744302e01f28196ffc Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Tue, 4 Jun 2019 16:30:15 +0200
Subject: dt-bindings: memory: jz4780: Add compatible string for JZ4740 SoC

Add a compatible string to support the memory controller built into the
JZ4740 SoC from Ingenic.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Boris Brezillon <bbrezillon@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/memory-controllers/ingenic,jz4780-nemc.txt       | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/memory-controllers/ingenic,jz4780-nemc.txt b/Documentation/devicetree/bindings/memory-controllers/ingenic,jz4780-nemc.txt
index f936b5589b19..59b8dcc118ee 100644
--- a/Documentation/devicetree/bindings/memory-controllers/ingenic,jz4780-nemc.txt
+++ b/Documentation/devicetree/bindings/memory-controllers/ingenic,jz4780-nemc.txt
@@ -5,6 +5,7 @@ controller in Ingenic JZ4780
 
 Required properties:
 - compatible: Should be set to one of:
+    "ingenic,jz4740-nemc" (JZ4740)
     "ingenic,jz4780-nemc" (JZ4780)
 - reg: Should specify the NEMC controller registers location and length.
 - clocks: Clock for the NEMC controller.
-- 
cgit 


From fd757dbac5f6c42f45b1a29a354255045a655a27 Mon Sep 17 00:00:00 2001
From: Tomasz Figa <tomasz.figa@gmail.com>
Date: Fri, 21 Jun 2019 13:13:51 +0200
Subject: dt-bindings: extcon: Add support for fsa9480 switch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds documentation for binding of extcont Fairchild
Semiconductor FSA9480 microusb switch.
This usb port accessory detector and switch, can be found for example in
some Samsung s5pv210 based phones.

Signed-off-by: Tomasz Figa <tomasz.figa@gmail.com>
Signed-off-by: Paweł Chmiel <pawel.mikolaj.chmiel@gmail.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 .../devicetree/bindings/extcon/extcon-fsa9480.txt     | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/extcon/extcon-fsa9480.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/extcon/extcon-fsa9480.txt b/Documentation/devicetree/bindings/extcon/extcon-fsa9480.txt
new file mode 100644
index 000000000000..d592c21245f2
--- /dev/null
+++ b/Documentation/devicetree/bindings/extcon/extcon-fsa9480.txt
@@ -0,0 +1,19 @@
+FAIRCHILD SEMICONDUCTOR FSA9480 MICROUSB SWITCH
+
+The FSA9480 is a USB port accessory detector and switch. The FSA9480 is fully
+controlled using I2C and enables USB data, stereo and mono audio, video,
+microphone, and UART data to use a common connector port.
+
+Required properties:
+ - compatible : Must be "fcs,fsa9480"
+ - reg : Specifies i2c slave address. Must be 0x25.
+ - interrupts : Should contain one entry specifying interrupt signal of
+   interrupt parent to which interrupt pin of the chip is connected.
+
+ Example:
+	musb@25 {
+		compatible = "fcs,fsa9480";
+		reg = <0x25>;
+		interrupt-parent = <&gph2>;
+		interrupts = <7 0>;
+	};
-- 
cgit 


From 25c7eabed5b219e975c8aee527ae202604d10911 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Date: Wed, 26 Jun 2019 11:27:32 +0100
Subject: dt-bindings: imx-ocotp: Add i.MX8MM compatible

Add compatible for i.MX8MM as per arch/arm64/boot/dts/freescale/imx8mm.dtsi

Signed-off-by: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Cc: Rob Herring <robh@kernel.org>
Reviewed-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/nvmem/imx-ocotp.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/nvmem/imx-ocotp.txt b/Documentation/devicetree/bindings/nvmem/imx-ocotp.txt
index 68f7d6fdd140..96ffd06d2ca8 100644
--- a/Documentation/devicetree/bindings/nvmem/imx-ocotp.txt
+++ b/Documentation/devicetree/bindings/nvmem/imx-ocotp.txt
@@ -15,6 +15,7 @@ Required properties:
 	"fsl,imx6sll-ocotp" (i.MX6SLL),
 	"fsl,imx7ulp-ocotp" (i.MX7ULP),
 	"fsl,imx8mq-ocotp" (i.MX8MQ),
+	"fsl,imx8mm-ocotp" (i.MX8MM),
 	followed by "syscon".
 - #address-cells : Should be 1
 - #size-cells : Should be 1
-- 
cgit 


From 9b5db89ea4bfdbb23d4f85f3a7fbf2cd36d20146 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 28 Jun 2019 18:23:13 -0300
Subject: docs: misc-devices: convert files without extension to ReST

Those files are also text files. Convert them to ReST and add
to the misc-files index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/b7dc829809673bd8cffe0e7bbe9c9308681c6fe2.1561756511.git.mchehab+samsung@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/misc-devices/eeprom         |  96 ---------------------
 Documentation/misc-devices/eeprom.rst     | 107 +++++++++++++++++++++++
 Documentation/misc-devices/ics932s401     |  31 -------
 Documentation/misc-devices/ics932s401.rst |  36 ++++++++
 Documentation/misc-devices/index.rst      |   5 ++
 Documentation/misc-devices/isl29003       |  62 --------------
 Documentation/misc-devices/isl29003.rst   |  75 ++++++++++++++++
 Documentation/misc-devices/lis3lv02d      |  93 --------------------
 Documentation/misc-devices/lis3lv02d.rst  |  99 ++++++++++++++++++++++
 Documentation/misc-devices/max6875        | 110 ------------------------
 Documentation/misc-devices/max6875.rst    | 136 ++++++++++++++++++++++++++++++
 11 files changed, 458 insertions(+), 392 deletions(-)
 delete mode 100644 Documentation/misc-devices/eeprom
 create mode 100644 Documentation/misc-devices/eeprom.rst
 delete mode 100644 Documentation/misc-devices/ics932s401
 create mode 100644 Documentation/misc-devices/ics932s401.rst
 delete mode 100644 Documentation/misc-devices/isl29003
 create mode 100644 Documentation/misc-devices/isl29003.rst
 delete mode 100644 Documentation/misc-devices/lis3lv02d
 create mode 100644 Documentation/misc-devices/lis3lv02d.rst
 delete mode 100644 Documentation/misc-devices/max6875
 create mode 100644 Documentation/misc-devices/max6875.rst

(limited to 'Documentation')

diff --git a/Documentation/misc-devices/eeprom b/Documentation/misc-devices/eeprom
deleted file mode 100644
index ba692011f221..000000000000
--- a/Documentation/misc-devices/eeprom
+++ /dev/null
@@ -1,96 +0,0 @@
-Kernel driver eeprom
-====================
-
-Supported chips:
-  * Any EEPROM chip in the designated address range
-    Prefix: 'eeprom'
-    Addresses scanned: I2C 0x50 - 0x57
-    Datasheets: Publicly available from:
-                Atmel (www.atmel.com),
-                Catalyst (www.catsemi.com),
-                Fairchild (www.fairchildsemi.com),
-                Microchip (www.microchip.com),
-                Philips (www.semiconductor.philips.com),
-                Rohm (www.rohm.com),
-                ST (www.st.com),
-                Xicor (www.xicor.com),
-                and others.
-
-        Chip     Size (bits)    Address
-        24C01     1K            0x50 (shadows at 0x51 - 0x57)
-        24C01A    1K            0x50 - 0x57 (Typical device on DIMMs)
-        24C02     2K            0x50 - 0x57
-        24C04     4K            0x50, 0x52, 0x54, 0x56
-                                (additional data at 0x51, 0x53, 0x55, 0x57)
-        24C08     8K            0x50, 0x54 (additional data at 0x51, 0x52,
-                                0x53, 0x55, 0x56, 0x57)
-        24C16    16K            0x50 (additional data at 0x51 - 0x57)
-        Sony      2K            0x57
-
-        Atmel     34C02B  2K    0x50 - 0x57, SW write protect at 0x30-37
-        Catalyst  34FC02  2K    0x50 - 0x57, SW write protect at 0x30-37
-        Catalyst  34RC02  2K    0x50 - 0x57, SW write protect at 0x30-37
-        Fairchild 34W02   2K    0x50 - 0x57, SW write protect at 0x30-37
-        Microchip 24AA52  2K    0x50 - 0x57, SW write protect at 0x30-37
-        ST        M34C02  2K    0x50 - 0x57, SW write protect at 0x30-37
-
-
-Authors:
-        Frodo Looijaard <frodol@dds.nl>,
-        Philip Edelbrock <phil@netroedge.com>,
-        Jean Delvare <jdelvare@suse.de>,
-        Greg Kroah-Hartman <greg@kroah.com>,
-        IBM Corp.
-
-Description
------------
-
-This is a simple EEPROM module meant to enable reading the first 256 bytes
-of an EEPROM (on a SDRAM DIMM for example). However, it will access serial
-EEPROMs on any I2C adapter. The supported devices are generically called
-24Cxx, and are listed above; however the numbering for these
-industry-standard devices may vary by manufacturer.
-
-This module was a programming exercise to get used to the new project
-organization laid out by Frodo, but it should be at least completely
-effective for decoding the contents of EEPROMs on DIMMs.
-
-DIMMS will typically contain a 24C01A or 24C02, or the 34C02 variants.
-The other devices will not be found on a DIMM because they respond to more
-than one address.
-
-DDC Monitors may contain any device. Often a 24C01, which responds to all 8
-addresses, is found.
-
-Recent Sony Vaio laptops have an EEPROM at 0x57. We couldn't get the
-specification, so it is guess work and far from being complete.
-
-The Microchip 24AA52/24LCS52, ST M34C02, and others support an additional
-software write protect register at 0x30 - 0x37 (0x20 less than the memory
-location). The chip responds to "write quick" detection at this address but
-does not respond to byte reads. If this register is present, the lower 128
-bytes of the memory array are not write protected. Any byte data write to
-this address will write protect the memory array permanently, and the
-device will no longer respond at the 0x30-37 address. The eeprom driver
-does not support this register.
-
-Lacking functionality:
-
-* Full support for larger devices (24C04, 24C08, 24C16). These are not
-typically found on a PC. These devices will appear as separate devices at
-multiple addresses.
-
-* Support for really large devices (24C32, 24C64, 24C128, 24C256, 24C512).
-These devices require two-byte address fields and are not supported.
-
-* Enable Writing. Again, no technical reason why not, but making it easy
-to change the contents of the EEPROMs (on DIMMs anyway) also makes it easy
-to disable the DIMMs (potentially preventing the computer from booting)
-until the values are restored somehow.
-
-Use:
-
-After inserting the module (and any other required SMBus/i2c modules), you
-should have some EEPROM directories in /sys/bus/i2c/devices/* of names such
-as "0-0050". Inside each of these is a series of files, the eeprom file
-contains the binary data from EEPROM.
diff --git a/Documentation/misc-devices/eeprom.rst b/Documentation/misc-devices/eeprom.rst
new file mode 100644
index 000000000000..008249675ccc
--- /dev/null
+++ b/Documentation/misc-devices/eeprom.rst
@@ -0,0 +1,107 @@
+====================
+Kernel driver eeprom
+====================
+
+Supported chips:
+
+  * Any EEPROM chip in the designated address range
+
+    Prefix: 'eeprom'
+
+    Addresses scanned: I2C 0x50 - 0x57
+
+    Datasheets: Publicly available from:
+
+                Atmel (www.atmel.com),
+                Catalyst (www.catsemi.com),
+                Fairchild (www.fairchildsemi.com),
+                Microchip (www.microchip.com),
+                Philips (www.semiconductor.philips.com),
+                Rohm (www.rohm.com),
+                ST (www.st.com),
+                Xicor (www.xicor.com),
+                and others.
+
+        ========= ============= ============================================
+        Chip      Size (bits)   Address
+        ========= ============= ============================================
+        24C01     1K            0x50 (shadows at 0x51 - 0x57)
+        24C01A    1K            0x50 - 0x57 (Typical device on DIMMs)
+        24C02     2K            0x50 - 0x57
+        24C04     4K            0x50, 0x52, 0x54, 0x56
+                                (additional data at 0x51, 0x53, 0x55, 0x57)
+        24C08     8K            0x50, 0x54 (additional data at 0x51, 0x52,
+                                0x53, 0x55, 0x56, 0x57)
+        24C16     16K           0x50 (additional data at 0x51 - 0x57)
+        Sony      2K            0x57
+
+        Atmel     34C02B  2K    0x50 - 0x57, SW write protect at 0x30-37
+        Catalyst  34FC02  2K    0x50 - 0x57, SW write protect at 0x30-37
+        Catalyst  34RC02  2K    0x50 - 0x57, SW write protect at 0x30-37
+        Fairchild 34W02   2K    0x50 - 0x57, SW write protect at 0x30-37
+        Microchip 24AA52  2K    0x50 - 0x57, SW write protect at 0x30-37
+        ST        M34C02  2K    0x50 - 0x57, SW write protect at 0x30-37
+        ========= ============= ============================================
+
+
+Authors:
+        - Frodo Looijaard <frodol@dds.nl>,
+        - Philip Edelbrock <phil@netroedge.com>,
+        - Jean Delvare <jdelvare@suse.de>,
+        - Greg Kroah-Hartman <greg@kroah.com>,
+        - IBM Corp.
+
+Description
+-----------
+
+This is a simple EEPROM module meant to enable reading the first 256 bytes
+of an EEPROM (on a SDRAM DIMM for example). However, it will access serial
+EEPROMs on any I2C adapter. The supported devices are generically called
+24Cxx, and are listed above; however the numbering for these
+industry-standard devices may vary by manufacturer.
+
+This module was a programming exercise to get used to the new project
+organization laid out by Frodo, but it should be at least completely
+effective for decoding the contents of EEPROMs on DIMMs.
+
+DIMMS will typically contain a 24C01A or 24C02, or the 34C02 variants.
+The other devices will not be found on a DIMM because they respond to more
+than one address.
+
+DDC Monitors may contain any device. Often a 24C01, which responds to all 8
+addresses, is found.
+
+Recent Sony Vaio laptops have an EEPROM at 0x57. We couldn't get the
+specification, so it is guess work and far from being complete.
+
+The Microchip 24AA52/24LCS52, ST M34C02, and others support an additional
+software write protect register at 0x30 - 0x37 (0x20 less than the memory
+location). The chip responds to "write quick" detection at this address but
+does not respond to byte reads. If this register is present, the lower 128
+bytes of the memory array are not write protected. Any byte data write to
+this address will write protect the memory array permanently, and the
+device will no longer respond at the 0x30-37 address. The eeprom driver
+does not support this register.
+
+Lacking functionality
+---------------------
+
+* Full support for larger devices (24C04, 24C08, 24C16). These are not
+  typically found on a PC. These devices will appear as separate devices at
+  multiple addresses.
+
+* Support for really large devices (24C32, 24C64, 24C128, 24C256, 24C512).
+  These devices require two-byte address fields and are not supported.
+
+* Enable Writing. Again, no technical reason why not, but making it easy
+  to change the contents of the EEPROMs (on DIMMs anyway) also makes it easy
+  to disable the DIMMs (potentially preventing the computer from booting)
+  until the values are restored somehow.
+
+Use
+---
+
+After inserting the module (and any other required SMBus/i2c modules), you
+should have some EEPROM directories in ``/sys/bus/i2c/devices/*`` of names such
+as "0-0050". Inside each of these is a series of files, the eeprom file
+contains the binary data from EEPROM.
diff --git a/Documentation/misc-devices/ics932s401 b/Documentation/misc-devices/ics932s401
deleted file mode 100644
index bdac67ff6e3f..000000000000
--- a/Documentation/misc-devices/ics932s401
+++ /dev/null
@@ -1,31 +0,0 @@
-Kernel driver ics932s401
-======================
-
-Supported chips:
-  * IDT ICS932S401
-    Prefix: 'ics932s401'
-    Addresses scanned: I2C 0x69
-    Datasheet: Publicly available at the IDT website
-
-Author: Darrick J. Wong
-
-Description
------------
-
-This driver implements support for the IDT ICS932S401 chip family.
-
-This chip has 4 clock outputs--a base clock for the CPU (which is likely
-multiplied to get the real CPU clock), a system clock, a PCI clock, a USB
-clock, and a reference clock.  The driver reports selected and actual
-frequency.  If spread spectrum mode is enabled, the driver also reports by what
-percent the clock signal is being spread, which should be between 0 and -0.5%.
-All frequencies are reported in KHz.
-
-The ICS932S401 monitors all inputs continuously. The driver will not read
-the registers more often than once every other second.
-
-Special Features
-----------------
-
-The clocks could be reprogrammed to increase system speed.  I will not help you
-do this, as you risk damaging your system!
diff --git a/Documentation/misc-devices/ics932s401.rst b/Documentation/misc-devices/ics932s401.rst
new file mode 100644
index 000000000000..613ee54a9c21
--- /dev/null
+++ b/Documentation/misc-devices/ics932s401.rst
@@ -0,0 +1,36 @@
+========================
+Kernel driver ics932s401
+========================
+
+Supported chips:
+
+  * IDT ICS932S401
+
+    Prefix: 'ics932s401'
+
+    Addresses scanned: I2C 0x69
+
+    Datasheet: Publicly available at the IDT website
+
+Author: Darrick J. Wong
+
+Description
+-----------
+
+This driver implements support for the IDT ICS932S401 chip family.
+
+This chip has 4 clock outputs--a base clock for the CPU (which is likely
+multiplied to get the real CPU clock), a system clock, a PCI clock, a USB
+clock, and a reference clock.  The driver reports selected and actual
+frequency.  If spread spectrum mode is enabled, the driver also reports by what
+percent the clock signal is being spread, which should be between 0 and -0.5%.
+All frequencies are reported in KHz.
+
+The ICS932S401 monitors all inputs continuously. The driver will not read
+the registers more often than once every other second.
+
+Special Features
+----------------
+
+The clocks could be reprogrammed to increase system speed.  I will not help you
+do this, as you risk damaging your system!
diff --git a/Documentation/misc-devices/index.rst b/Documentation/misc-devices/index.rst
index dfd1f45a3127..a57f92dfe49a 100644
--- a/Documentation/misc-devices/index.rst
+++ b/Documentation/misc-devices/index.rst
@@ -14,4 +14,9 @@ fit into other categories.
 .. toctree::
    :maxdepth: 2
 
+   eeprom
    ibmvmc
+   ics932s401
+   isl29003
+   lis3lv02d
+   max6875
diff --git a/Documentation/misc-devices/isl29003 b/Documentation/misc-devices/isl29003
deleted file mode 100644
index 80b952fd32ff..000000000000
--- a/Documentation/misc-devices/isl29003
+++ /dev/null
@@ -1,62 +0,0 @@
-Kernel driver isl29003
-=====================
-
-Supported chips:
-* Intersil ISL29003
-Prefix: 'isl29003'
-Addresses scanned: none
-Datasheet:
-http://www.intersil.com/data/fn/fn7464.pdf
-
-Author: Daniel Mack <daniel@caiaq.de>
-
-
-Description
------------
-The ISL29003 is an integrated light sensor with a 16-bit integrating type
-ADC, I2C user programmable lux range select for optimized counts/lux, and
-I2C multi-function control and monitoring capabilities. The internal ADC
-provides 16-bit resolution while rejecting 50Hz and 60Hz flicker caused by
-artificial light sources.
-
-The driver allows to set the lux range, the bit resolution, the operational
-mode (see below) and the power state of device and can read the current lux
-value, of course.
-
-
-Detection
----------
-
-The ISL29003 does not have an ID register which could be used to identify
-it, so the detection routine will just try to read from the configured I2C
-address and consider the device to be present as soon as it ACKs the
-transfer.
-
-
-Sysfs entries
--------------
-
-range:
-	0: 0 lux to 1000 lux (default)
-	1: 0 lux to 4000 lux
-	2: 0 lux to 16,000 lux
-	3: 0 lux to 64,000 lux
-
-resolution:
-	0: 2^16 cycles (default)
-	1: 2^12 cycles
-	2: 2^8 cycles
-	3: 2^4 cycles
-
-mode:
-	0: diode1's current (unsigned 16bit) (default)
-	1: diode1's current (unsigned 16bit)
-	2: difference between diodes (l1 - l2, signed 15bit)
-
-power_state:
-	0: device is disabled (default)
-	1: device is enabled
-
-lux (read only):
-	returns the value from the last sensor reading
-
diff --git a/Documentation/misc-devices/isl29003.rst b/Documentation/misc-devices/isl29003.rst
new file mode 100644
index 000000000000..0cc38aed6c00
--- /dev/null
+++ b/Documentation/misc-devices/isl29003.rst
@@ -0,0 +1,75 @@
+======================
+Kernel driver isl29003
+======================
+
+Supported chips:
+
+* Intersil ISL29003
+
+Prefix: 'isl29003'
+
+Addresses scanned: none
+
+Datasheet:
+http://www.intersil.com/data/fn/fn7464.pdf
+
+Author: Daniel Mack <daniel@caiaq.de>
+
+
+Description
+-----------
+The ISL29003 is an integrated light sensor with a 16-bit integrating type
+ADC, I2C user programmable lux range select for optimized counts/lux, and
+I2C multi-function control and monitoring capabilities. The internal ADC
+provides 16-bit resolution while rejecting 50Hz and 60Hz flicker caused by
+artificial light sources.
+
+The driver allows to set the lux range, the bit resolution, the operational
+mode (see below) and the power state of device and can read the current lux
+value, of course.
+
+
+Detection
+---------
+
+The ISL29003 does not have an ID register which could be used to identify
+it, so the detection routine will just try to read from the configured I2C
+address and consider the device to be present as soon as it ACKs the
+transfer.
+
+
+Sysfs entries
+-------------
+
+range:
+        == ===========================
+	0: 0 lux to 1000 lux (default)
+	1: 0 lux to 4000 lux
+	2: 0 lux to 16,000 lux
+	3: 0 lux to 64,000 lux
+        == ===========================
+
+resolution:
+        == =====================
+	0: 2^16 cycles (default)
+	1: 2^12 cycles
+	2: 2^8 cycles
+	3: 2^4 cycles
+        == =====================
+
+mode:
+        == =================================================
+	0: diode1's current (unsigned 16bit) (default)
+	1: diode1's current (unsigned 16bit)
+	2: difference between diodes (l1 - l2, signed 15bit)
+        == =================================================
+
+power_state:
+        == =================================================
+	0: device is disabled (default)
+	1: device is enabled
+        == =================================================
+
+lux (read only):
+	returns the value from the last sensor reading
+
diff --git a/Documentation/misc-devices/lis3lv02d b/Documentation/misc-devices/lis3lv02d
deleted file mode 100644
index f89960a0ff95..000000000000
--- a/Documentation/misc-devices/lis3lv02d
+++ /dev/null
@@ -1,93 +0,0 @@
-Kernel driver lis3lv02d
-=======================
-
-Supported chips:
-
-  * STMicroelectronics LIS3LV02DL, LIS3LV02DQ (12 bits precision)
-  * STMicroelectronics LIS302DL, LIS3L02DQ, LIS331DL (8 bits) and
-    LIS331DLH (16 bits)
-
-Authors:
-        Yan Burman <burman.yan@gmail.com>
-	Eric Piel <eric.piel@tremplin-utc.net>
-
-
-Description
------------
-
-This driver provides support for the accelerometer found in various HP laptops
-sporting the feature officially called "HP Mobile Data Protection System 3D" or
-"HP 3D DriveGuard". It detects automatically laptops with this sensor. Known
-models (full list can be found in drivers/platform/x86/hp_accel.c) will have
-their axis automatically oriented on standard way (eg: you can directly play
-neverball). The accelerometer data is readable via
-/sys/devices/platform/lis3lv02d. Reported values are scaled
-to mg values (1/1000th of earth gravity).
-
-Sysfs attributes under /sys/devices/platform/lis3lv02d/:
-position - 3D position that the accelerometer reports. Format: "(x,y,z)"
-rate - read reports the sampling rate of the accelerometer device in HZ.
-	write changes sampling rate of the accelerometer device.
-	Only values which are supported by HW are accepted.
-selftest - performs selftest for the chip as specified by chip manufacturer.
-
-This driver also provides an absolute input class device, allowing
-the laptop to act as a pinball machine-esque joystick. Joystick device can be
-calibrated. Joystick device can be in two different modes.
-By default output values are scaled between -32768 .. 32767. In joystick raw
-mode, joystick and sysfs position entry have the same scale. There can be
-small difference due to input system fuzziness feature.
-Events are also available as input event device.
-
-Selftest is meant only for hardware diagnostic purposes. It is not meant to be
-used during normal operations. Position data is not corrupted during selftest
-but interrupt behaviour is not guaranteed to work reliably. In test mode, the
-sensing element is internally moved little bit. Selftest measures difference
-between normal mode and test mode. Chip specifications tell the acceptance
-limit for each type of the chip. Limits are provided via platform data
-to allow adjustment of the limits without a change to the actual driver.
-Seltest returns either "OK x y z" or "FAIL x y z" where x, y and z are
-measured difference between modes. Axes are not remapped in selftest mode.
-Measurement values are provided to help HW diagnostic applications to make
-final decision.
-
-On HP laptops, if the led infrastructure is activated, support for a led
-indicating disk protection will be provided as /sys/class/leds/hp::hddprotect.
-
-Another feature of the driver is misc device called "freefall" that
-acts similar to /dev/rtc and reacts on free-fall interrupts received
-from the device. It supports blocking operations, poll/select and
-fasync operation modes. You must read 1 bytes from the device.  The
-result is number of free-fall interrupts since the last successful
-read (or 255 if number of interrupts would not fit). See the freefall.c
-file for an example on using the device.
-
-
-Axes orientation
-----------------
-
-For better compatibility between the various laptops. The values reported by
-the accelerometer are converted into a "standard" organisation of the axes
-(aka "can play neverball out of the box"):
- * When the laptop is horizontal the position reported is about 0 for X and Y
-	and a positive value for Z
- * If the left side is elevated, X increases (becomes positive)
- * If the front side (where the touchpad is) is elevated, Y decreases
-	(becomes negative)
- * If the laptop is put upside-down, Z becomes negative
-
-If your laptop model is not recognized (cf "dmesg"), you can send an
-email to the maintainer to add it to the database.  When reporting a new
-laptop, please include the output of "dmidecode" plus the value of
-/sys/devices/platform/lis3lv02d/position in these four cases.
-
-Q&A
----
-
-Q: How do I safely simulate freefall? I have an HP "portable
-workstation" which has about 3.5kg and a plastic case, so letting it
-fall to the ground is out of question...
-
-A: The sensor is pretty sensitive, so your hands can do it. Lift it
-into free space, follow the fall with your hands for like 10
-centimeters. That should be enough to trigger the detection.
diff --git a/Documentation/misc-devices/lis3lv02d.rst b/Documentation/misc-devices/lis3lv02d.rst
new file mode 100644
index 000000000000..959bd2b822cf
--- /dev/null
+++ b/Documentation/misc-devices/lis3lv02d.rst
@@ -0,0 +1,99 @@
+=======================
+Kernel driver lis3lv02d
+=======================
+
+Supported chips:
+
+  * STMicroelectronics LIS3LV02DL, LIS3LV02DQ (12 bits precision)
+  * STMicroelectronics LIS302DL, LIS3L02DQ, LIS331DL (8 bits) and
+    LIS331DLH (16 bits)
+
+Authors:
+        - Yan Burman <burman.yan@gmail.com>
+	- Eric Piel <eric.piel@tremplin-utc.net>
+
+
+Description
+-----------
+
+This driver provides support for the accelerometer found in various HP laptops
+sporting the feature officially called "HP Mobile Data Protection System 3D" or
+"HP 3D DriveGuard". It detects automatically laptops with this sensor. Known
+models (full list can be found in drivers/platform/x86/hp_accel.c) will have
+their axis automatically oriented on standard way (eg: you can directly play
+neverball). The accelerometer data is readable via
+/sys/devices/platform/lis3lv02d. Reported values are scaled
+to mg values (1/1000th of earth gravity).
+
+Sysfs attributes under /sys/devices/platform/lis3lv02d/:
+
+position
+      - 3D position that the accelerometer reports. Format: "(x,y,z)"
+rate
+      - read reports the sampling rate of the accelerometer device in HZ.
+	write changes sampling rate of the accelerometer device.
+	Only values which are supported by HW are accepted.
+selftest
+      - performs selftest for the chip as specified by chip manufacturer.
+
+This driver also provides an absolute input class device, allowing
+the laptop to act as a pinball machine-esque joystick. Joystick device can be
+calibrated. Joystick device can be in two different modes.
+By default output values are scaled between -32768 .. 32767. In joystick raw
+mode, joystick and sysfs position entry have the same scale. There can be
+small difference due to input system fuzziness feature.
+Events are also available as input event device.
+
+Selftest is meant only for hardware diagnostic purposes. It is not meant to be
+used during normal operations. Position data is not corrupted during selftest
+but interrupt behaviour is not guaranteed to work reliably. In test mode, the
+sensing element is internally moved little bit. Selftest measures difference
+between normal mode and test mode. Chip specifications tell the acceptance
+limit for each type of the chip. Limits are provided via platform data
+to allow adjustment of the limits without a change to the actual driver.
+Seltest returns either "OK x y z" or "FAIL x y z" where x, y and z are
+measured difference between modes. Axes are not remapped in selftest mode.
+Measurement values are provided to help HW diagnostic applications to make
+final decision.
+
+On HP laptops, if the led infrastructure is activated, support for a led
+indicating disk protection will be provided as /sys/class/leds/hp::hddprotect.
+
+Another feature of the driver is misc device called "freefall" that
+acts similar to /dev/rtc and reacts on free-fall interrupts received
+from the device. It supports blocking operations, poll/select and
+fasync operation modes. You must read 1 bytes from the device.  The
+result is number of free-fall interrupts since the last successful
+read (or 255 if number of interrupts would not fit). See the freefall.c
+file for an example on using the device.
+
+
+Axes orientation
+----------------
+
+For better compatibility between the various laptops. The values reported by
+the accelerometer are converted into a "standard" organisation of the axes
+(aka "can play neverball out of the box"):
+
+ * When the laptop is horizontal the position reported is about 0 for X and Y
+   and a positive value for Z
+ * If the left side is elevated, X increases (becomes positive)
+ * If the front side (where the touchpad is) is elevated, Y decreases
+   (becomes negative)
+ * If the laptop is put upside-down, Z becomes negative
+
+If your laptop model is not recognized (cf "dmesg"), you can send an
+email to the maintainer to add it to the database.  When reporting a new
+laptop, please include the output of "dmidecode" plus the value of
+/sys/devices/platform/lis3lv02d/position in these four cases.
+
+Q&A
+---
+
+Q: How do I safely simulate freefall? I have an HP "portable
+workstation" which has about 3.5kg and a plastic case, so letting it
+fall to the ground is out of question...
+
+A: The sensor is pretty sensitive, so your hands can do it. Lift it
+into free space, follow the fall with your hands for like 10
+centimeters. That should be enough to trigger the detection.
diff --git a/Documentation/misc-devices/max6875 b/Documentation/misc-devices/max6875
deleted file mode 100644
index 2f2bd0b17b5d..000000000000
--- a/Documentation/misc-devices/max6875
+++ /dev/null
@@ -1,110 +0,0 @@
-Kernel driver max6875
-=====================
-
-Supported chips:
-  * Maxim MAX6874, MAX6875
-    Prefix: 'max6875'
-    Addresses scanned: None (see below)
-    Datasheet:
-        http://pdfserv.maxim-ic.com/en/ds/MAX6874-MAX6875.pdf
-
-Author: Ben Gardner <bgardner@wabtec.com>
-
-
-Description
------------
-
-The Maxim MAX6875 is an EEPROM-programmable power-supply sequencer/supervisor.
-It provides timed outputs that can be used as a watchdog, if properly wired.
-It also provides 512 bytes of user EEPROM.
-
-At reset, the MAX6875 reads the configuration EEPROM into its configuration
-registers.  The chip then begins to operate according to the values in the
-registers.
-
-The Maxim MAX6874 is a similar, mostly compatible device, with more inputs
-and outputs:
-             vin     gpi    vout
-MAX6874        6       4       8
-MAX6875        4       3       5
-
-See the datasheet for more information.
-
-
-Sysfs entries
--------------
-
-eeprom        - 512 bytes of user-defined EEPROM space.
-
-
-General Remarks
----------------
-
-Valid addresses for the MAX6875 are 0x50 and 0x52.
-Valid addresses for the MAX6874 are 0x50, 0x52, 0x54 and 0x56.
-The driver does not probe any address, so you explicitly instantiate the
-devices.
-
-Example:
-$ modprobe max6875
-$ echo max6875 0x50 > /sys/bus/i2c/devices/i2c-0/new_device
-
-The MAX6874/MAX6875 ignores address bit 0, so this driver attaches to multiple
-addresses.  For example, for address 0x50, it also reserves 0x51.
-The even-address instance is called 'max6875', the odd one is 'dummy'.
-
-
-Programming the chip using i2c-dev
-----------------------------------
-
-Use the i2c-dev interface to access and program the chips.
-Reads and writes are performed differently depending on the address range.
-
-The configuration registers are at addresses 0x00 - 0x45.
-Use i2c_smbus_write_byte_data() to write a register and
-i2c_smbus_read_byte_data() to read a register.
-The command is the register number.
-
-Examples:
-To write a 1 to register 0x45:
-  i2c_smbus_write_byte_data(fd, 0x45, 1);
-
-To read register 0x45:
-  value = i2c_smbus_read_byte_data(fd, 0x45);
-
-
-The configuration EEPROM is at addresses 0x8000 - 0x8045.
-The user EEPROM is at addresses 0x8100 - 0x82ff.
-
-Use i2c_smbus_write_word_data() to write a byte to EEPROM.
-
-The command is the upper byte of the address: 0x80, 0x81, or 0x82.
-The data word is the lower part of the address or'd with data << 8.
-  cmd = address >> 8;
-  val = (address & 0xff) | (data << 8);
-
-Example:
-To write 0x5a to address 0x8003:
-  i2c_smbus_write_word_data(fd, 0x80, 0x5a03);
-
-
-Reading data from the EEPROM is a little more complicated.
-Use i2c_smbus_write_byte_data() to set the read address and then
-i2c_smbus_read_byte() or i2c_smbus_read_i2c_block_data() to read the data.
-
-Example:
-To read data starting at offset 0x8100, first set the address:
-  i2c_smbus_write_byte_data(fd, 0x81, 0x00);
-
-And then read the data
-  value = i2c_smbus_read_byte(fd);
-
-  or
-
-  count = i2c_smbus_read_i2c_block_data(fd, 0x84, 16, buffer);
-
-The block read should read 16 bytes.
-0x84 is the block read command.
-
-See the datasheet for more details.
-
diff --git a/Documentation/misc-devices/max6875.rst b/Documentation/misc-devices/max6875.rst
new file mode 100644
index 000000000000..ad419ac22a5b
--- /dev/null
+++ b/Documentation/misc-devices/max6875.rst
@@ -0,0 +1,136 @@
+=====================
+Kernel driver max6875
+=====================
+
+Supported chips:
+
+  * Maxim MAX6874, MAX6875
+
+    Prefix: 'max6875'
+
+    Addresses scanned: None (see below)
+
+    Datasheet: http://pdfserv.maxim-ic.com/en/ds/MAX6874-MAX6875.pdf
+
+Author: Ben Gardner <bgardner@wabtec.com>
+
+
+Description
+-----------
+
+The Maxim MAX6875 is an EEPROM-programmable power-supply sequencer/supervisor.
+It provides timed outputs that can be used as a watchdog, if properly wired.
+It also provides 512 bytes of user EEPROM.
+
+At reset, the MAX6875 reads the configuration EEPROM into its configuration
+registers.  The chip then begins to operate according to the values in the
+registers.
+
+The Maxim MAX6874 is a similar, mostly compatible device, with more inputs
+and outputs:
+
+===========  ===     ===    ====
+-            vin     gpi    vout
+===========  ===     ===    ====
+MAX6874        6       4       8
+MAX6875        4       3       5
+===========  ===     ===    ====
+
+See the datasheet for more information.
+
+
+Sysfs entries
+-------------
+
+eeprom        - 512 bytes of user-defined EEPROM space.
+
+
+General Remarks
+---------------
+
+Valid addresses for the MAX6875 are 0x50 and 0x52.
+
+Valid addresses for the MAX6874 are 0x50, 0x52, 0x54 and 0x56.
+
+The driver does not probe any address, so you explicitly instantiate the
+devices.
+
+Example::
+
+  $ modprobe max6875
+  $ echo max6875 0x50 > /sys/bus/i2c/devices/i2c-0/new_device
+
+The MAX6874/MAX6875 ignores address bit 0, so this driver attaches to multiple
+addresses.  For example, for address 0x50, it also reserves 0x51.
+The even-address instance is called 'max6875', the odd one is 'dummy'.
+
+
+Programming the chip using i2c-dev
+----------------------------------
+
+Use the i2c-dev interface to access and program the chips.
+
+Reads and writes are performed differently depending on the address range.
+
+The configuration registers are at addresses 0x00 - 0x45.
+
+Use i2c_smbus_write_byte_data() to write a register and
+i2c_smbus_read_byte_data() to read a register.
+
+The command is the register number.
+
+Examples:
+
+To write a 1 to register 0x45::
+
+  i2c_smbus_write_byte_data(fd, 0x45, 1);
+
+To read register 0x45::
+
+  value = i2c_smbus_read_byte_data(fd, 0x45);
+
+
+The configuration EEPROM is at addresses 0x8000 - 0x8045.
+
+The user EEPROM is at addresses 0x8100 - 0x82ff.
+
+Use i2c_smbus_write_word_data() to write a byte to EEPROM.
+
+The command is the upper byte of the address: 0x80, 0x81, or 0x82.
+The data word is the lower part of the address or'd with data << 8::
+
+  cmd = address >> 8;
+  val = (address & 0xff) | (data << 8);
+
+Example:
+
+To write 0x5a to address 0x8003::
+
+  i2c_smbus_write_word_data(fd, 0x80, 0x5a03);
+
+
+Reading data from the EEPROM is a little more complicated.
+
+Use i2c_smbus_write_byte_data() to set the read address and then
+i2c_smbus_read_byte() or i2c_smbus_read_i2c_block_data() to read the data.
+
+Example:
+
+To read data starting at offset 0x8100, first set the address::
+
+  i2c_smbus_write_byte_data(fd, 0x81, 0x00);
+
+And then read the data::
+
+  value = i2c_smbus_read_byte(fd);
+
+or::
+
+  count = i2c_smbus_read_i2c_block_data(fd, 0x84, 16, buffer);
+
+The block read should read 16 bytes.
+
+0x84 is the block read command.
+
+See the datasheet for more details.
+
-- 
cgit 


From 7fc1148cfde1c0d53930c7a5ac62b5bffe7b0e0b Mon Sep 17 00:00:00 2001
From: Zhiyong Tao <zhiyong.tao@mediatek.com>
Date: Wed, 24 Apr 2019 09:11:11 +0800
Subject: dt-bindings: adc: mt8183: add binding document

The commit adds mt8183 compatible node in binding document.

Signed-off-by: Zhiyong Tao <zhiyong.tao@mediatek.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/adc/mt6577_auxadc.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/adc/mt6577_auxadc.txt b/Documentation/devicetree/bindings/iio/adc/mt6577_auxadc.txt
index 0df9befdaecc..936a0b4666da 100644
--- a/Documentation/devicetree/bindings/iio/adc/mt6577_auxadc.txt
+++ b/Documentation/devicetree/bindings/iio/adc/mt6577_auxadc.txt
@@ -15,6 +15,7 @@ Required properties:
     - "mediatek,mt2712-auxadc": For MT2712 family of SoCs
     - "mediatek,mt7622-auxadc": For MT7622 family of SoCs
     - "mediatek,mt8173-auxadc": For MT8173 family of SoCs
+    - "mediatek,mt8183-auxadc", "mediatek,mt8173-auxadc": For MT8183 family of SoCs
   - reg: Address range of the AUXADC unit.
   - clocks: Should contain a clock specifier for each entry in clock-names
   - clock-names: Should contain "main".
-- 
cgit 


From cf54f4dd07a652d9a0630ec4e82c5ff082d2a14e Mon Sep 17 00:00:00 2001
From: Brian Masney <masneyb@onstation.org>
Date: Sat, 27 Apr 2019 14:23:59 -0400
Subject: dt-bindings: iio: isl29018: convert bindings to YAML format

Convert the isl29018 device tree bindings to the new YAML format.

Signed-off-by: Brian Masney <masneyb@onstation.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/light/isl29018.txt     | 27 -----------
 .../devicetree/bindings/iio/light/isl29018.yaml    | 56 ++++++++++++++++++++++
 2 files changed, 56 insertions(+), 27 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/iio/light/isl29018.txt
 create mode 100644 Documentation/devicetree/bindings/iio/light/isl29018.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/light/isl29018.txt b/Documentation/devicetree/bindings/iio/light/isl29018.txt
deleted file mode 100644
index b9bbde3e13ed..000000000000
--- a/Documentation/devicetree/bindings/iio/light/isl29018.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-* ISL 29018/29023/29035 I2C ALS, Proximity, and Infrared sensor
-
-Required properties:
-
-  - compatible: Should be one of
-		"isil,isl29018"
-		"isil,isl29023"
-		"isil,isl29035"
-  - reg: the I2C address of the device
-
-Optional properties:
-
-  - interrupts: the sole interrupt generated by the device
-
-  Refer to interrupt-controller/interrupts.txt for generic interrupt client
-  node bindings.
-
-  - vcc-supply: phandle to the regulator that provides power to the sensor.
-
-Example:
-
-isl29018@44 {
-	compatible = "isil,isl29018";
-	reg = <0x44>;
-	interrupt-parent = <&gpio>;
-	interrupts = <TEGRA_GPIO(Z, 2) IRQ_TYPE_LEVEL_HIGH>;
-};
diff --git a/Documentation/devicetree/bindings/iio/light/isl29018.yaml b/Documentation/devicetree/bindings/iio/light/isl29018.yaml
new file mode 100644
index 000000000000..cbb00be8f359
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/light/isl29018.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/light/isl29018.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: |
+  Intersil 29018/29023/29035 Ambient Light, Infrared Light, and Proximity Sensor
+
+maintainers:
+  - Brian Masney <masneyb@onstation.org>
+
+description: |
+  Ambient and infrared light sensing with proximity detection over an i2c
+  interface.
+
+  https://www.renesas.com/us/en/www/doc/datasheet/isl29018.pdf
+  https://www.renesas.com/us/en/www/doc/datasheet/isl29023.pdf
+  https://www.renesas.com/us/en/www/doc/datasheet/isl29035.pdf
+
+properties:
+  compatible:
+    enum:
+      - isil,isl29018
+      - isil,isl29023
+      - isil,isl29035
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  vcc-supply:
+    description: Regulator that provides power to the sensor
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c {
+
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        sensor@44 {
+                compatible = "isil,isl29018";
+                reg = <0x44>;
+                interrupts-extended = <&msmgpio 61 IRQ_TYPE_LEVEL_HIGH>;
+        };
+    };
+...
-- 
cgit 


From f7b99e5948e6be8b850eecbb2de1986d26a76b36 Mon Sep 17 00:00:00 2001
From: Radu Pirea <radu_nicolae.pirea@upb.ro>
Date: Sun, 5 May 2019 21:06:45 +0300
Subject: dt-bindings: mfd: atmel-usart: add DMA bindings for USART in SPI mode

The bindings for DMA are now common for both drivers of the USART
IP.

The node given as an example for USART in SPI mode has been updated in
order to include DMA bindings.

Signed-off-by: Radu Pirea <radu_nicolae.pirea@upb.ro>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/mfd/atmel-usart.txt          | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mfd/atmel-usart.txt b/Documentation/devicetree/bindings/mfd/atmel-usart.txt
index 7f0cd72f47d2..699fd3c9ace8 100644
--- a/Documentation/devicetree/bindings/mfd/atmel-usart.txt
+++ b/Documentation/devicetree/bindings/mfd/atmel-usart.txt
@@ -17,17 +17,24 @@ Required properties for USART in SPI mode:
 - cs-gpios: chipselects (internal cs not supported)
 - atmel,usart-mode : Must be <AT91_USART_MODE_SPI> (found in dt-bindings/mfd/at91-usart.h)
 
+Optional properties in serial and SPI mode:
+- dma bindings for dma transfer:
+	- dmas: DMA specifier, consisting of a phandle to DMA controller node,
+		memory peripheral interface and USART DMA channel ID, FIFO configuration.
+		The order of DMA channels is fixed. The first DMA channel must be TX
+		associated channel and the second one must be RX associated channel.
+		Refer to dma.txt and atmel-dma.txt for details.
+	- dma-names: "tx" for TX channel.
+		     "rx" for RX channel.
+		     The order of dma-names is also fixed. The first name must be "tx"
+		     and the second one must be "rx" as in the examples below.
+
 Optional properties in serial mode:
 - atmel,use-dma-rx: use of PDC or DMA for receiving data
 - atmel,use-dma-tx: use of PDC or DMA for transmitting data
 - {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD line respectively.
   It will use specified PIO instead of the peripheral function pin for the USART feature.
   If unsure, don't specify this property.
-- add dma bindings for dma transfer:
-	- dmas: DMA specifier, consisting of a phandle to DMA controller node,
-		memory peripheral interface and USART DMA channel ID, FIFO configuration.
-		Refer to dma.txt and atmel-dma.txt for details.
-	- dma-names: "rx" for RX channel, "tx" for TX channel.
 - atmel,fifo-size: maximum number of data the RX and TX FIFOs can store for FIFO
   capable USARTs.
 - rs485-rts-delay, rs485-rx-during-tx, linux,rs485-enabled-at-boot-time: see rs485.txt
@@ -81,5 +88,8 @@ Example:
 		interrupts = <12 IRQ_TYPE_LEVEL_HIGH 5>;
 		clocks = <&usart0_clk>;
 		clock-names = "usart";
+		dmas = <&dma0 2 AT91_DMA_CFG_PER_ID(3)>,
+		       <&dma0 2 (AT91_DMA_CFG_PER_ID(4) | AT91_DMA_CFG_FIFOCFG_ASAP)>;
+		dma-names = "tx", "rx";
 		cs-gpios = <&pioB 3 0>;
 	};
-- 
cgit 


From 977bfde5d4cbb9d2eb1969eb2eb671cdb9b9fcbf Mon Sep 17 00:00:00 2001
From: Eric Jeong <eric.jeong.opensource@diasemi.com>
Date: Thu, 18 Apr 2019 15:09:44 +0900
Subject: dt-bindings: regulator: add document bindings for slg51000

Add device tree binding information for slg51000 regulator driver.
Example bindings for SLG51000 are added.

Signed-off-by: Eric Jeong <eric.jeong.opensource@diasemi.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/regulator/slg51000.txt     | 88 ++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/slg51000.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/regulator/slg51000.txt b/Documentation/devicetree/bindings/regulator/slg51000.txt
new file mode 100644
index 000000000000..aa0733e49b90
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/slg51000.txt
@@ -0,0 +1,88 @@
+* Dialog Semiconductor SLG51000 Voltage Regulator
+
+Required properties:
+- compatible : Should be "dlg,slg51000" for SLG51000
+- reg : Specifies the I2C slave address.
+- xxx-supply: Input voltage supply regulator for ldo3 to ldo7.
+  These entries are required if regulators are enabled for a device.
+  An absence of these properties can cause the regulator registration to fail.
+  If some of input supply is powered through battery or always-on supply then
+  also it is required to have these parameters with proper node handle of always
+  on power supply.
+    vin3-supply: Input supply for ldo3
+    vin4-supply: Input supply for ldo4
+    vin5-supply: Input supply for ldo5
+    vin6-supply: Input supply for ldo6
+    vin7-supply: Input supply for ldo7
+
+Optional properties:
+- interrupt-parent : Specifies the reference to the interrupt controller.
+- interrupts : IRQ line information.
+- dlg,cs-gpios : Specify a valid GPIO for chip select
+
+Sub-nodes:
+- regulators : This node defines the settings for the regulators.
+  The content of the sub-node is defined by the standard binding
+  for regulators; see regulator.txt.
+
+  The SLG51000 regulators are bound using their names listed below:
+    ldo1
+    ldo2
+    ldo3
+    ldo4
+    ldo5
+    ldo6
+    ldo7
+
+Optional properties for regulators:
+- enable-gpios : Specify a valid GPIO for platform control of the regulator.
+
+Example:
+	pmic: slg51000@75 {
+		compatible = "dlg,slg51000";
+		reg = <0x75>;
+
+		regulators {
+			ldo1 {
+			        regulator-name = "ldo1";
+			        regulator-min-microvolt = <2400000>;
+			        regulator-max-microvolt = <3300000>;
+			};
+
+			ldo2 {
+			        regulator-name = "ldo2";
+			        regulator-min-microvolt = <2400000>;
+			        regulator-max-microvolt = <3300000>;
+			};
+
+			ldo3 {
+			        regulator-name = "ldo3";
+			        regulator-min-microvolt = <1200000>;
+			        regulator-max-microvolt = <3750000>;
+			};
+
+			ldo4 {
+			        regulator-name = "ldo4";
+			        regulator-min-microvolt = <1200000>;
+			        regulator-max-microvolt = <3750000>;
+			};
+
+			ldo5 {
+			        regulator-name = "ldo5";
+			        regulator-min-microvolt = <500000>;
+			        regulator-max-microvolt = <1200000>;
+			};
+
+			ldo6 {
+			        regulator-name = "ldo6";
+			        regulator-min-microvolt = <500000>;
+			        regulator-max-microvolt = <1200000>;
+			};
+
+			ldo7 {
+			        regulator-name = "ldo7";
+			        regulator-min-microvolt = <1200000>;
+			        regulator-max-microvolt = <3750000>;
+			};
+		};
+	};
-- 
cgit 


From e35f5ad6a965de5d301ca5957a1c48c53fe366fb Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Wed, 15 May 2019 15:18:56 +0200
Subject: ASoC: meson: add tohdmitx DT bindings

Add the bindings and the related documentation for the audio hdmitx
control glue of the Amlogic g12a SoC family

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
Tested-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/sound/amlogic,g12a-tohdmitx.txt       | 55 ++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/sound/amlogic,g12a-tohdmitx.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/amlogic,g12a-tohdmitx.txt b/Documentation/devicetree/bindings/sound/amlogic,g12a-tohdmitx.txt
new file mode 100644
index 000000000000..aa6c35570d31
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/amlogic,g12a-tohdmitx.txt
@@ -0,0 +1,55 @@
+* Amlogic HDMI Tx control glue
+
+Required properties:
+- compatible: "amlogic,g12a-tohdmitx"
+- reg: physical base address of the controller and length of memory
+       mapped region.
+- #sound-dai-cells: should be 1.
+
+Example on the S905X2 SoC:
+
+tohdmitx: audio-controller@744 {
+	compatible = "amlogic,g12a-tohdmitx";
+	reg = <0x0 0x744 0x0 0x4>;
+	#sound-dai-cells = <1>;
+};
+
+Example of an 'amlogic,axg-sound-card':
+
+sound {
+	compatible = "amlogic,axg-sound-card";
+
+[...]
+
+	dai-link-x {
+		sound-dai = <&tdmif_a>;
+		dai-format = "i2s";
+		dai-tdm-slot-tx-mask-0 = <1 1>;
+
+		codec-0 {
+			sound-dai = <&tohdmitx TOHDMITX_I2S_IN_A>;
+		};
+
+		codec-1 {
+			sound-dai = <&external_dac>;
+		};
+	};
+
+	dai-link-y {
+		sound-dai = <&tdmif_c>;
+		dai-format = "i2s";
+		dai-tdm-slot-tx-mask-0 = <1 1>;
+
+		codec {
+			sound-dai = <&tohdmitx TOHDMITX_I2S_IN_C>;
+		};
+	};
+
+	dai-link-z {
+		sound-dai = <&tohdmitx TOHDMITX_I2S_OUT>;
+
+		codec {
+			sound-dai = <&hdmi_tx>;
+		};
+	};
+};
-- 
cgit 


From 506c7f9b0612c46732aa74317f59a28d9477905b Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 16 May 2019 09:59:25 +0200
Subject: dt-bindings: sound: Convert Allwinner SPDIF binding to YAML

The Allwinner SoCs feature an SPDIF controller across multiple SoC
generations.

However, earlier generations were a bit simpler than the subsequent ones,
and for example would always have RX and TX capabilities, and no reset
lines.

In order to express this, let's create two YAML schemas instead of the free
form text we had before.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/sound/allwinner,sun4i-a10-spdif.yaml  | 101 +++++++++++++++++++++
 .../bindings/sound/sunxi,sun4i-spdif.txt           |  42 ---------
 2 files changed, 101 insertions(+), 42 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
 delete mode 100644 Documentation/devicetree/bindings/sound/sunxi,sun4i-spdif.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
new file mode 100644
index 000000000000..5d72d48e923e
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/allwinner,sun4i-a10-spdif.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 S/PDIF Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Liam Girdwood <lgirdwood@gmail.com>
+  - Mark Brown <broonie@kernel.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  "#sound-dai-cells":
+    const: 0
+
+  compatible:
+    oneOf:
+      - const: allwinner,sun4i-a10-spdif
+      - const: allwinner,sun6i-a31-spdif
+      - const: allwinner,sun8i-h3-spdif
+      - items:
+          - const: allwinner,sun8i-a83t-spdif
+          - const: allwinner,sun8i-h3-spdif
+      - items:
+          - const: allwinner,sun50i-a64-spdif
+          - const: allwinner,sun8i-h3-spdif
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+
+  clock-names:
+    items:
+      - const: apb
+      - const: spdif
+
+  dmas:
+    items:
+      - description: RX DMA Channel
+      - description: TX DMA Channel
+
+  dma-names:
+    items:
+      - const: rx
+      - const: tx
+
+  # Even though it only applies to subschemas under the conditionals,
+  # not listing them here will trigger a warning because of the
+  # additionalsProperties set to false.
+  resets:
+    maxItems: 1
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun6i-a31-spdif
+              - allwinner,sun8i-h3-spdif
+
+    then:
+      required:
+        - resets
+
+required:
+  - "#sound-dai-cells"
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - dmas
+  - dma-names
+
+additionalProperties: false
+
+examples:
+  - |
+    spdif: spdif@1c21000 {
+        #sound-dai-cells = <0>;
+        compatible = "allwinner,sun4i-a10-spdif";
+        reg = <0x01c21000 0x40>;
+        interrupts = <13>;
+        clocks = <&apb0_gates 1>, <&spdif_clk>;
+        clock-names = "apb", "spdif";
+        dmas = <&dma 0 2>, <&dma 0 2>;
+        dma-names = "rx", "tx";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/sound/sunxi,sun4i-spdif.txt b/Documentation/devicetree/bindings/sound/sunxi,sun4i-spdif.txt
deleted file mode 100644
index 0c64a209c2e9..000000000000
--- a/Documentation/devicetree/bindings/sound/sunxi,sun4i-spdif.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-Allwinner Sony/Philips Digital Interface Format (S/PDIF) Controller
-
-The Allwinner S/PDIF audio block is a transceiver that allows the
-processor to receive and transmit digital audio via an coaxial cable or
-a fibre cable.
-For now only playback is supported.
-
-Required properties:
-
-  - compatible		: should be one of the following:
-    - "allwinner,sun4i-a10-spdif": for the Allwinner A10 SoC
-    - "allwinner,sun6i-a31-spdif": for the Allwinner A31 SoC
-    - "allwinner,sun8i-h3-spdif": for the Allwinner H3 SoC
-
-  - reg			: Offset and length of the register set for the device.
-
-  - interrupts		: Contains the spdif interrupt.
-
-  - dmas		: Generic dma devicetree binding as described in
-			  Documentation/devicetree/bindings/dma/dma.txt.
-
-  - dma-names		: Two dmas have to be defined, "tx" and "rx".
-
-  - clocks		: Contains an entry for each entry in clock-names.
-
-  - clock-names		: Includes the following entries:
-	"apb"		  clock for the spdif bus.
-	"spdif"		  clock for spdif controller.
-
-  - resets		: reset specifier for the ahb reset (A31 and newer only)
-
-Example:
-
-spdif: spdif@1c21000 {
-	compatible = "allwinner,sun4i-a10-spdif";
-	reg = <0x01c21000 0x40>;
-	interrupts = <13>;
-	clocks = <&apb0_gates 1>, <&spdif_clk>;
-	clock-names = "apb", "spdif";
-	dmas = <&dma 0 2>, <&dma 0 2>;
-	dma-names = "rx", "tx";
-};
-- 
cgit 


From b1f35dfd7c2f509b0736f1ff02c314130b6b773e Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 16 May 2019 09:59:26 +0200
Subject: dt-bindings: sound: sun4i-spdif: Document that the RX channel can be
 missing

The H3 and compatibles controllers don't have any reception capabilities,
even though it was never documented as such in the binding before.

Therefore, on those controllers, we don't have the option to set an RX DMA
channel.

This was already done in the DTSI, but the binding itself was never
updated. Let's add a special case in the schemas.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/sound/allwinner,sun4i-a10-spdif.yaml  | 38 ++++++++++++++++------
 1 file changed, 28 insertions(+), 10 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
index 5d72d48e923e..a49ef2294a74 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
@@ -44,19 +44,11 @@ properties:
       - const: apb
       - const: spdif
 
-  dmas:
-    items:
-      - description: RX DMA Channel
-      - description: TX DMA Channel
-
-  dma-names:
-    items:
-      - const: rx
-      - const: tx
-
   # Even though it only applies to subschemas under the conditionals,
   # not listing them here will trigger a warning because of the
   # additionalsProperties set to false.
+  dmas: true
+  dma-names: true
   resets:
     maxItems: 1
 
@@ -73,6 +65,32 @@ allOf:
       required:
         - resets
 
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun8i-h3-spdif
+
+    then:
+      properties:
+        dmas:
+          description: TX DMA Channel
+
+        dma-names:
+          const: tx
+
+    else:
+      properties:
+        dmas:
+          items:
+            - description: RX DMA Channel
+            - description: TX DMA Channel
+
+        dma-names:
+          items:
+            - const: rx
+            - const: tx
+
 required:
   - "#sound-dai-cells"
   - compatible
-- 
cgit 


From e359a29225dde53fade5fa4bc3d957599fb5f9a5 Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <alexandru.ardelean@analog.com>
Date: Fri, 17 May 2019 18:44:41 +0300
Subject: dt-bindings: iio: accel: adxl345: switch to YAML bindings

The ADX345 supports both I2C & SPI bindings.
This change switches from old text bindings, to YAML bindings, and also
tries to make use of the recent multiple-examples support.

Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/accel/adi,adxl345.yaml | 72 ++++++++++++++++++++++
 .../devicetree/bindings/iio/accel/adxl345.txt      | 39 ------------
 2 files changed, 72 insertions(+), 39 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/iio/accel/adi,adxl345.yaml
 delete mode 100644 Documentation/devicetree/bindings/iio/accel/adxl345.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/accel/adi,adxl345.yaml b/Documentation/devicetree/bindings/iio/accel/adi,adxl345.yaml
new file mode 100644
index 000000000000..7ba167e2e1ea
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/accel/adi,adxl345.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/accelerometers/adi,adxl345.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices ADXL345/ADXL375 3-Axis Digital Accelerometers
+
+maintainers:
+  - Michael Hennerich <michael.hennerich@analog.com>
+
+description: |
+  Analog Devices ADXL345/ADXL375 3-Axis Digital Accelerometers that supports
+  both I2C & SPI interfaces.
+    http://www.analog.com/en/products/mems/accelerometers/adxl345.html
+    http://www.analog.com/en/products/sensors-mems/accelerometers/adxl375.html
+
+properties:
+  compatible:
+    enum:
+      - adi,adxl345
+      - adi,adxl375
+
+  reg:
+    maxItems: 1
+
+  spi-cpha: true
+
+  spi-cpol: true
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c0 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        /* Example for a I2C device node */
+        accelerometer@2a {
+            compatible = "adi,adxl345";
+            reg = <0x53>;
+            interrupt-parent = <&gpio0>;
+            interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+        };
+    };
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    spi0 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        /* Example for a SPI device node */
+        accelerometer@0 {
+            compatible = "adi,adxl345";
+            reg = <0>;
+            spi-max-frequency = <5000000>;
+            spi-cpol;
+            spi-cpha;
+            interrupt-parent = <&gpio0>;
+            interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/iio/accel/adxl345.txt b/Documentation/devicetree/bindings/iio/accel/adxl345.txt
deleted file mode 100644
index f9525f6e3d43..000000000000
--- a/Documentation/devicetree/bindings/iio/accel/adxl345.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Analog Devices ADXL345/ADXL375 3-Axis Digital Accelerometers
-
-http://www.analog.com/en/products/mems/accelerometers/adxl345.html
-http://www.analog.com/en/products/sensors-mems/accelerometers/adxl375.html
-
-Required properties:
- - compatible : should be one of
-		"adi,adxl345"
-		"adi,adxl375"
- - reg : the I2C address or SPI chip select number of the sensor
-
-Required properties for SPI bus usage:
- - spi-max-frequency : set maximum clock frequency, must be 5000000
- - spi-cpol and spi-cpha : must be defined for adxl345 to enable SPI mode 3
-
-Optional properties:
- - interrupts: interrupt mapping for IRQ as documented in
-   Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
-
-Example for a I2C device node:
-
-	accelerometer@2a {
-		compatible = "adi,adxl345";
-		reg = <0x53>;
-		interrupt-parent = <&gpio1>;
-		interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
-	};
-
-Example for a SPI device node:
-
-	accelerometer@0 {
-		compatible = "adi,adxl345";
-		reg = <0>;
-		spi-max-frequency = <5000000>;
-		spi-cpol;
-		spi-cpha;
-		interrupt-parent = <&gpio1>;
-		interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
-	};
-- 
cgit 


From 73e1ccdab3c34b8f9be6e55600d40a1478049121 Mon Sep 17 00:00:00 2001
From: Chun-Hung Wu <chun-hung.wu@mediatek.com>
Date: Thu, 16 May 2019 16:10:44 +0800
Subject: dt-bindings: iio: adc: mediatek: Add document for mt6765

Add compatible node for mt6765 auxadc

Signed-off-by: Chun-Hung Wu <chun-hung.wu@mediatek.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/adc/mt6577_auxadc.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/adc/mt6577_auxadc.txt b/Documentation/devicetree/bindings/iio/adc/mt6577_auxadc.txt
index 936a0b4666da..78c06e05c8e5 100644
--- a/Documentation/devicetree/bindings/iio/adc/mt6577_auxadc.txt
+++ b/Documentation/devicetree/bindings/iio/adc/mt6577_auxadc.txt
@@ -13,6 +13,7 @@ Required properties:
   - compatible: Should be one of:
     - "mediatek,mt2701-auxadc": For MT2701 family of SoCs
     - "mediatek,mt2712-auxadc": For MT2712 family of SoCs
+    - "mediatek,mt6765-auxadc": For MT6765 family of SoCs
     - "mediatek,mt7622-auxadc": For MT7622 family of SoCs
     - "mediatek,mt8173-auxadc": For MT8173 family of SoCs
     - "mediatek,mt8183-auxadc", "mediatek,mt8173-auxadc": For MT8183 family of SoCs
-- 
cgit 


From a02177a39344b643dccfa3b18fa5c1fc4984e1e5 Mon Sep 17 00:00:00 2001
From: Leonard Crestez <leonard.crestez@nxp.com>
Date: Mon, 13 May 2019 11:01:39 +0000
Subject: dt-bindings: imx-cpufreq-dt: Document opp-supported-hw usage

The interpretation of opp-supported-hw bits for imx-cpufreq-dt driver is
not very obvious so attempt to explain it.

There is no OF compat string associated.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 .../devicetree/bindings/cpufreq/imx-cpufreq-dt.txt | 37 ++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/cpufreq/imx-cpufreq-dt.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/cpufreq/imx-cpufreq-dt.txt b/Documentation/devicetree/bindings/cpufreq/imx-cpufreq-dt.txt
new file mode 100644
index 000000000000..87bff5add3f9
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/imx-cpufreq-dt.txt
@@ -0,0 +1,37 @@
+i.MX CPUFreq-DT OPP bindings
+================================
+
+Certain i.MX SoCs support different OPPs depending on the "market segment" and
+"speed grading" value which are written in fuses. These bits are combined with
+the opp-supported-hw values for each OPP to check if the OPP is allowed.
+
+Required properties:
+--------------------
+
+For each opp entry in 'operating-points-v2' table:
+- opp-supported-hw: Two bitmaps indicating:
+  - Supported speed grade mask
+  - Supported market segment mask
+    0: Consumer
+    1: Extended Consumer
+    2: Industrial
+    3: Automotive
+
+Example:
+--------
+
+opp_table {
+	compatible = "operating-points-v2";
+	opp-1000000000 {
+		opp-hz = /bits/ 64 <1000000000>;
+		/* grade >= 0, consumer only */
+		opp-supported-hw = <0xf>, <0x3>;
+	};
+
+	opp-1300000000 {
+		opp-hz = /bits/ 64 <1300000000>;
+		opp-microvolt = <1000000>;
+		/* grade >= 1, all segments */
+		opp-supported-hw = <0xe>, <0x7>;
+	};
+}
-- 
cgit 


From a28d1b67cf4f330fb532c8e515025b360d10691a Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Tue, 7 May 2019 21:38:48 +0200
Subject: dt-bindings: bus: Convert Allwinner RSB to a schema

The newer Allwinner SoCs feature a bus designed for the PMIC, similar to
I2C called RSB. Let's convert the device tree bindings for that bus over to
a YAML schemas.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
---
 .../bindings/bus/allwinner,sun8i-a23-rsb.yaml      | 79 ++++++++++++++++++++++
 .../devicetree/bindings/bus/sunxi-rsb.txt          | 47 -------------
 2 files changed, 79 insertions(+), 47 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
 delete mode 100644 Documentation/devicetree/bindings/bus/sunxi-rsb.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
new file mode 100644
index 000000000000..fc2f63860cc8
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/allwinner,sun8i-a23-rsb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A23 RSB Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+  compatible:
+    oneOf:
+      - const: allwinner,sun8i-a23-rsb
+      - items:
+        - const: allwinner,sun8i-a83t-rsb
+        - const: allwinner,sun8i-a23-rsb
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  clock-frequency:
+    minimum: 1
+    maximum: 20000000
+
+patternProperties:
+  "^.*@[0-9a-fA-F]+$":
+    properties:
+      reg:
+        maxItems: 1
+
+    required:
+      - reg
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - resets
+
+examples:
+  - |
+    rsb@1f03400 {
+        compatible = "allwinner,sun8i-a23-rsb";
+        reg = <0x01f03400 0x400>;
+        interrupts = <0 39 4>;
+        clocks = <&apb0_gates 3>;
+        clock-frequency = <3000000>;
+        resets = <&apb0_rst 3>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        pmic@3e3 {
+            compatible = "...";
+            reg = <0x3e3>;
+
+            /* ... */
+        };
+    };
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/bus/sunxi-rsb.txt b/Documentation/devicetree/bindings/bus/sunxi-rsb.txt
deleted file mode 100644
index eb3ed628c6f1..000000000000
--- a/Documentation/devicetree/bindings/bus/sunxi-rsb.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-Allwinner Reduced Serial Bus (RSB) controller
-
-The RSB controller found on later Allwinner SoCs is an SMBus like 2 wire
-serial bus with 1 master and up to 15 slaves. It is represented by a node
-for the controller itself, and child nodes representing the slave devices.
-
-Required properties :
-
- - reg             : Offset and length of the register set for the controller.
- - compatible      : Shall be "allwinner,sun8i-a23-rsb".
- - interrupts      : The interrupt line associated to the RSB controller.
- - clocks          : The gate clk associated to the RSB controller.
- - resets          : The reset line associated to the RSB controller.
- - #address-cells  : shall be 1
- - #size-cells     : shall be 0
-
-Optional properties :
-
- - clock-frequency : Desired RSB bus clock frequency in Hz. Maximum is 20MHz.
-		     If not set this defaults to 3MHz.
-
-Child nodes:
-
-An RSB controller node can contain zero or more child nodes representing
-slave devices on the bus.  Child 'reg' properties should contain the slave
-device's hardware address. The hardware address is hardwired in the device,
-which can normally be found in the datasheet.
-
-Example:
-
-	rsb@1f03400 {
-		compatible = "allwinner,sun8i-a23-rsb";
-		reg = <0x01f03400 0x400>;
-		interrupts = <0 39 4>;
-		clocks = <&apb0_gates 3>;
-		clock-frequency = <3000000>;
-		resets = <&apb0_rst 3>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		pmic@3e3 {
-			compatible = "...";
-			reg = <0x3e3>;
-
-			/* ... */
-		};
-	};
-- 
cgit 


From 4c1ca625c622b7a9f04c2949fd1ffdc6effa86de Mon Sep 17 00:00:00 2001
From: Nick Crews <ncrews@chromium.org>
Date: Tue, 16 Apr 2019 19:20:47 -0600
Subject: platform/chrome: wilco_ec: Add Boot on AC support

Boot on AC is a policy which makes the device boot from S5 when AC
power is connected. This is useful for users who want to run their
device headless or with a dock.

Signed-off-by: Nick Crews <ncrews@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
---
 Documentation/ABI/testing/sysfs-platform-wilco-ec | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-platform-wilco-ec

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-platform-wilco-ec b/Documentation/ABI/testing/sysfs-platform-wilco-ec
new file mode 100644
index 000000000000..8e5d6eee44db
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-wilco-ec
@@ -0,0 +1,9 @@
+What:		/sys/bus/platform/devices/GOOG000C\:00/boot_on_ac
+Date:		April 2019
+KernelVersion:	5.3
+Description:
+		Boot on AC is a policy which makes the device boot from S5
+		when AC power is connected. This is useful for users who
+		want to run their device headless or with a dock.
+
+		Input should be parseable by kstrtou8() to 0 or 1.
-- 
cgit 


From 2ad1f7a91449de48d4bd5d1ec361ba7bb9026505 Mon Sep 17 00:00:00 2001
From: Nick Crews <ncrews@chromium.org>
Date: Wed, 8 May 2019 15:38:09 -0600
Subject: platform/chrome: wilco_ec: Remove 256 byte transfers

The 0xF6 command, intended to send and receive 256 byte payloads to
and from the EC, is not needed. The 0xF5 command for 32 byte
payloads is sufficient. This patch removes support for the 0xF6
command and 256 byte payloads.

Signed-off-by: Nick Crews <ncrews@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
---
 Documentation/ABI/testing/debugfs-wilco-ec | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/debugfs-wilco-ec b/Documentation/ABI/testing/debugfs-wilco-ec
index 73a5a66ddca6..9d8d9d2def5b 100644
--- a/Documentation/ABI/testing/debugfs-wilco-ec
+++ b/Documentation/ABI/testing/debugfs-wilco-ec
@@ -23,11 +23,9 @@ Description:
 
 		For writing, bytes 0-1 indicate the message type, one of enum
 		wilco_ec_msg_type. Byte 2+ consist of the data passed in the
-		request, starting at MBOX[0]
-
-		At least three bytes are required for writing, two for the type
-		and at least a single byte of data. Only the first
-		EC_MAILBOX_DATA_SIZE bytes of MBOX will be used.
+		request, starting at MBOX[0]. At least three bytes are required
+		for writing, two for the type and at least a single byte of
+		data.
 
 		Example:
 		// Request EC info type 3 (EC firmware build date)
@@ -40,7 +38,7 @@ Description:
 		$ cat /sys/kernel/debug/wilco_ec/raw
 		00 00 31 32 2f 32 31 2f 31 38 00 38 00 01 00 2f 00  ..12/21/18.8...
 
-		Note that the first 32 bytes of the received MBOX[] will be
-		printed, even if some of the data is junk. It is up to you to
-		know how many of the first bytes of data are the actual
-		response.
+		Note that the first 16 bytes of the received MBOX[] will be
+		printed, even if some of the data is junk, and skipping bytes
+		17 to 32. It is up to you to know how many of the first bytes of
+		data are the actual response.
-- 
cgit 


From e15d92bee8184048e7419193263e62a0830cf365 Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@zoho.com.cn>
Date: Mon, 20 May 2019 14:21:16 +0800
Subject: doc: ext2: update description of quota options for ext2

ext2 support user/group disk quota by specifying
usrquota/grpquota option on mount, so fix the
description in the doc properly.

Signed-off-by: Chengguang Xu <cgxu519@zoho.com.cn>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/ext2.txt | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/ext2.txt b/Documentation/filesystems/ext2.txt
index a19973a4dd1e..94c2cf0292f5 100644
--- a/Documentation/filesystems/ext2.txt
+++ b/Documentation/filesystems/ext2.txt
@@ -57,7 +57,13 @@ noacl				Don't support POSIX ACLs.
 
 nobh				Do not attach buffer_heads to file pagecache.
 
-grpquota,noquota,quota,usrquota	Quota options are silently ignored by ext2.
+quota, usrquota			Enable user disk quota support
+				(requires CONFIG_QUOTA).
+
+grpquota			Enable group disk quota support
+				(requires CONFIG_QUOTA).
+
+noquota option ls silently ignored by ext2.
 
 
 Specification
-- 
cgit 


From 70ac79f5d1ef95e61b174e95192b3555a4caef2f Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Date: Fri, 17 May 2019 10:26:29 +0100
Subject: dt-bindings: arm: renesas: Add HopeRun RZ/G2[M] boards

This patch adds board HiHope RZ/G2M (the main board, powered by
the R8A774A1) and board HiHope RZ/G2 EX (the expansion board
that sits on top of the HiHope RZ/G2M). Both boards are made
by Jiangsu HopeRun Software Co., Ltd. (a.k.a. HopeRun).

Useful links:
http://hihope.org/product/detail/rzg2
https://item.taobao.com/item.htm?spm=a2oq0.12575281.0.0.6bcf1debQpzkRS&ft=t&id=592177498472
http://www.hoperun.com/Cn/news/id/379

We already know that the HiHope RZ/G2 EX will also sit on the
HiHope RZ/G2N, even though the HiHope RZ/G2N doesn't exist just
yet.

Signed-off-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Reviewed-by: Chris Paterson <Chris.Paterson2@renesas.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 Documentation/devicetree/bindings/arm/renesas.yaml | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/renesas.yaml b/Documentation/devicetree/bindings/arm/renesas.yaml
index 19f379863d50..08c923f8c257 100644
--- a/Documentation/devicetree/bindings/arm/renesas.yaml
+++ b/Documentation/devicetree/bindings/arm/renesas.yaml
@@ -106,6 +106,14 @@ properties:
 
       - description: RZ/G2M (R8A774A1)
         items:
+          - enum:
+              - hoperun,hihope-rzg2m # HopeRun HiHope RZ/G2M platform
+          - const: renesas,r8a774a1
+
+      - items:
+          - enum:
+              - hoperun,hihope-rzg2-ex # HopeRun expansion board for HiHope RZ/G2 platforms
+          - const: hoperun,hihope-rzg2m
           - const: renesas,r8a774a1
 
       - description: RZ/G2E (R8A774C0)
-- 
cgit 


From 90fdbe8ab27047f7b9da57d1975c14a8d8f39370 Mon Sep 17 00:00:00 2001
From: "S.j. Wang" <shengjiu.wang@nxp.com>
Date: Thu, 16 May 2019 11:40:56 +0000
Subject: ASoC: cs42xx8: add reset-gpios in binding document

Add reset-gpios property, which is optional.

Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/sound/cs42xx8.txt | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/cs42xx8.txt b/Documentation/devicetree/bindings/sound/cs42xx8.txt
index 8619a156d038..bbfe39347c20 100644
--- a/Documentation/devicetree/bindings/sound/cs42xx8.txt
+++ b/Documentation/devicetree/bindings/sound/cs42xx8.txt
@@ -14,6 +14,11 @@ Required properties:
   - VA-supply, VD-supply, VLS-supply, VLC-supply: power supplies for the device,
     as covered in Documentation/devicetree/bindings/regulator/regulator.txt
 
+Optional properties:
+
+  - reset-gpios : a GPIO spec to define which pin is connected to the chip's
+    !RESET pin
+
 Example:
 
 cs42888: codec@48 {
@@ -25,4 +30,5 @@ cs42888: codec@48 {
 	VD-supply = <&reg_audio>;
 	VLS-supply = <&reg_audio>;
 	VLC-supply = <&reg_audio>;
+	reset-gpios = <&pca9557_b 1 GPIO_ACTIVE_LOW>;
 };
-- 
cgit 


From f22558d701755f2f9cfdbd4a0862a58ef5fb1dbb Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Mon, 13 May 2019 09:56:32 +0200
Subject: dt-bindings: olpc,xo1.75-ec: Add OLPC XO-1.75 EC bindings

The OLPC XO-1.75 Embedded Controller is a SPI master that uses extra
signals for handshaking. It needs to know when is the slave (Linux)
side's TX FIFO ready for transfer (the ready-gpio signal on the SPI
controller node) and when does it wish to respond with a command (the
cmd-gpio property).

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Acked-by: Pavel Machek <pavel@ucw.cz>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 .../devicetree/bindings/misc/olpc,xo1.75-ec.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.txt b/Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.txt
new file mode 100644
index 000000000000..8c4d649cdd8f
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.txt
@@ -0,0 +1,23 @@
+OLPC XO-1.75 Embedded Controller
+
+Required properties:
+- compatible: Should be "olpc,xo1.75-ec".
+- cmd-gpios: gpio specifier of the CMD pin
+
+The embedded controller requires the SPI controller driver to signal readiness
+to receive a transfer (that is, when TX FIFO contains the response data) by
+strobing the ACK pin with the ready signal. See the "ready-gpios" property of the
+SSP binding as documented in:
+<Documentation/devicetree/bindings/spi/spi-pxa2xx.txt>.
+
+Example:
+	&ssp3 {
+		spi-slave;
+		ready-gpios = <&gpio 125 GPIO_ACTIVE_HIGH>;
+
+		slave {
+			compatible = "olpc,xo1.75-ec";
+			spi-cpha;
+			cmd-gpios = <&gpio 155 GPIO_ACTIVE_HIGH>;
+		};
+	};
-- 
cgit 


From 2f230f300497f695bec65439b20df3293eabf889 Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Mon, 6 May 2019 14:16:09 -0500
Subject: dt-bindings: mfd: LMU: Add the ramp up/down property

Document the ramp-up and ramp-down property in the binding.
Removing the "sec" from the property definition as seconds is
implied.

Signed-off-by: Dan Murphy <dmurphy@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 Documentation/devicetree/bindings/mfd/ti-lmu.txt | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mfd/ti-lmu.txt b/Documentation/devicetree/bindings/mfd/ti-lmu.txt
index 86ca786d54fc..160eb0a5e21a 100644
--- a/Documentation/devicetree/bindings/mfd/ti-lmu.txt
+++ b/Documentation/devicetree/bindings/mfd/ti-lmu.txt
@@ -23,8 +23,14 @@ Required properties:
          0x36 for LM3633, LM3697
          0x63 for LM3695
 
-Optional property:
+Optional properties:
   - enable-gpios: A GPIO specifier for hardware enable pin.
+  - ramp-up-us: Current ramping from one brightness level to
+		the a higher brightness level.
+		Range from 2048 us - 117.44 s
+  - ramp-down-us: Current ramping from one brightness level to
+		  the a lower brightness level.
+		  Range from 2048 us - 117.44 s
 
 Required node:
   - backlight: All LMU devices have backlight child nodes.
@@ -90,7 +96,7 @@ lm3631@29 {
 
 		lcd_bl {
 			led-sources = <0 1>;
-			ramp-up-msec = <300>;
+			ramp-up-us = <300000>;
 		};
 	};
 };
@@ -152,15 +158,15 @@ lm3633@36 {
 		main {
 			label = "main_lcd";
 			led-sources = <1 2>;
-			ramp-up-msec = <500>;
-			ramp-down-msec = <500>;
+			ramp-up-us = <500000>;
+			ramp-down-us = <500000>;
 		};
 
 		front {
 			label = "front_lcd";
 			led-sources = <0>;
-			ramp-up-msec = <1000>;
-			ramp-down-msec = <0>;
+			ramp-up-us = <1000000>;
+			ramp-down-us = <0>;
 		};
 	};
 
@@ -212,8 +218,8 @@ lm3697@36 {
 
 		lcd {
 			led-sources = <0 1 2>;
-			ramp-up-msec = <200>;
-			ramp-down-msec = <200>;
+			ramp-up-us = <200000>;
+			ramp-down-us = <200000>;
 		};
 	};
 
-- 
cgit 


From d0147554d004e7cc427f6cfecf871916e34e8b67 Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Mon, 6 May 2019 14:16:10 -0500
Subject: dt-bindings: mfd: LMU: Add ti,brightness-resolution

Add ti,brightness-resolution to the TI LMU binding to define
whether the device uses 8-bit brightness or 11-bit brightness.

Signed-off-by: Dan Murphy <dmurphy@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 Documentation/devicetree/bindings/mfd/ti-lmu.txt | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mfd/ti-lmu.txt b/Documentation/devicetree/bindings/mfd/ti-lmu.txt
index 160eb0a5e21a..e3efefb194c5 100644
--- a/Documentation/devicetree/bindings/mfd/ti-lmu.txt
+++ b/Documentation/devicetree/bindings/mfd/ti-lmu.txt
@@ -31,6 +31,16 @@ Optional properties:
   - ramp-down-us: Current ramping from one brightness level to
 		  the a lower brightness level.
 		  Range from 2048 us - 117.44 s
+  - ti,brightness-resolution - This determines whether to use 8 bit brightness
+			       mode or 11 bit brightness mode.  If this value is
+			       not set the device is defaulted to the preferred
+			       8bit brightness mode per 7.3.4.1 of the data
+			       sheet.  This setting can either be in the parent
+			       node or as part of the LED child nodes.  This
+			       is determined by the part itself if the strings
+			       have a common brightness register or individual
+			       brightness registers.
+			       The values are 255 (8bit) or 2047 (11bit).
 
 Required node:
   - backlight: All LMU devices have backlight child nodes.
@@ -217,6 +227,7 @@ lm3697@36 {
 		compatible = "ti,lm3697-backlight";
 
 		lcd {
+			ti,brightness-resolution = <255>;
 			led-sources = <0 1 2>;
 			ramp-up-us = <200000>;
 			ramp-down-us = <200000>;
-- 
cgit 


From bbc9be3ac8ab022061e7cfcc050ae02240144ba1 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 26 Mar 2019 19:23:31 +0000
Subject: Documentation: DT: Add entry for DPAA2 console

This patch adds a devicetree binding documentation for
FSL's DPAA2 console.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Li Yang <leoyang.li@nxp.com>
---
 Documentation/devicetree/bindings/misc/fsl,dpaa2-console.txt | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/misc/fsl,dpaa2-console.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/misc/fsl,dpaa2-console.txt b/Documentation/devicetree/bindings/misc/fsl,dpaa2-console.txt
new file mode 100644
index 000000000000..1442ba5d2d98
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/fsl,dpaa2-console.txt
@@ -0,0 +1,11 @@
+DPAA2 console support
+
+Required properties:
+
+    - compatible
+        Value type: <string>
+        Definition: Must be "fsl,dpaa2-console".
+    - reg
+        Value type: <prop-encoded-array>
+        Definition: A standard property.  Specifies the region where the MCFBA
+                    (MC firmware base address) register can be found.
-- 
cgit 


From 804898e8bc43080c9194ae7e1807bf0e995bad73 Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Wed, 15 May 2019 10:20:39 -0500
Subject: dt-bindings: rcar-gen3-phy-usb2: Document dr_mode

Document the optional dr_mode property

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
index d46188f450bf..64afd086e606 100644
--- a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
+++ b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
@@ -46,6 +46,9 @@ channel as USB OTG:
 	       regulator will be managed during the PHY power on/off sequence.
 - renesas,no-otg-pins: boolean, specify when a board does not provide proper
 		       otg pins.
+- dr_mode: string, indicates the working mode for the PHY. Can be "host",
+           "peripheral", or "otg". Should be set if otg controller is not used.
+
 
 Example (R-Car H3):
 
-- 
cgit 


From b051c93746543385f398b6b8002a51e9d43143a6 Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Wed, 15 May 2019 10:20:40 -0500
Subject: dt-bindings: rcar-gen3-phy-usb2: Add r7s9210 support

Document RZ/A2 (R7S9210) SoC bindings.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
index 64afd086e606..503a8cfb3184 100644
--- a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
+++ b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
@@ -1,10 +1,12 @@
 * Renesas R-Car generation 3 USB 2.0 PHY
 
 This file provides information on what the device node for the R-Car generation
-3, RZ/G1C and RZ/G2 USB 2.0 PHY contain.
+3, RZ/G1C, RZ/G2 and RZ/A2 USB 2.0 PHY contain.
 
 Required properties:
-- compatible: "renesas,usb2-phy-r8a77470" if the device is a part of an R8A77470
+- compatible: "renesas,usb2-phy-r7s9210" if the device is a part of an R7S9210
+	      SoC.
+	      "renesas,usb2-phy-r8a77470" if the device is a part of an R8A77470
 	      SoC.
 	      "renesas,usb2-phy-r8a774a1" if the device is a part of an R8A774A1
 	      SoC.
@@ -20,8 +22,8 @@ Required properties:
 	      R8A77990 SoC.
 	      "renesas,usb2-phy-r8a77995" if the device is a part of an
 	      R8A77995 SoC.
-	      "renesas,rcar-gen3-usb2-phy" for a generic R-Car Gen3 or RZ/G2
-	      compatible device.
+	      "renesas,rcar-gen3-usb2-phy" for a generic R-Car Gen3, RZ/G2 or
+	      RZ/A2 compatible device.
 
 	      When compatible with the generic version, nodes must list the
 	      SoC-specific version corresponding to the platform first
-- 
cgit 


From 6e9aed4ed4ca129510fcb1af495391d4717246d6 Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Wed, 15 May 2019 10:20:45 -0500
Subject: dt-bindings: usb: renesas_usbhs: Add support for r7s9210

Add support for r7s9210 (RZ/A2M) SoC

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/renesas_usbhs.txt | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
index b8acc2a994a8..e39255ea6e4f 100644
--- a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
+++ b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
@@ -20,9 +20,11 @@ Required properties:
 	- "renesas,usbhs-r8a77990" for r8a77990 (R-Car E3) compatible device
 	- "renesas,usbhs-r8a77995" for r8a77995 (R-Car D3) compatible device
 	- "renesas,usbhs-r7s72100" for r7s72100 (RZ/A1) compatible device
+	- "renesas,usbhs-r7s9210" for r7s9210 (RZ/A2) compatible device
 	- "renesas,rcar-gen2-usbhs" for R-Car Gen2 or RZ/G1 compatible devices
 	- "renesas,rcar-gen3-usbhs" for R-Car Gen3 or RZ/G2 compatible devices
 	- "renesas,rza1-usbhs" for RZ/A1 compatible device
+	- "renesas,rza2-usbhs" for RZ/A2 compatible device
 
 	When compatible with the generic version, nodes must list the
 	SoC-specific version corresponding to the platform first followed
-- 
cgit 


From 221fb7268d67c0867a93f23586bd53c3c3969eee Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 20 May 2019 14:22:25 -0700
Subject: Documentation/networking: fix af_xdp.rst Sphinx warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix Sphinx warnings in Documentation/networking/af_xdp.rst by
adding indentation:

Documentation/networking/af_xdp.rst:319: WARNING: Literal block expected; none found.
Documentation/networking/af_xdp.rst:326: WARNING: Literal block expected; none found.

Fixes: 0f4a9b7d4ecb ("xsk: add FAQ to facilitate for first time users")

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Magnus Karlsson <magnus.karlsson@intel.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 Documentation/networking/af_xdp.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index e14d7d40fc75..50bccbf68308 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -316,16 +316,16 @@ A: When a netdev of a physical NIC is initialized, Linux usually
    all the traffic, you can force the netdev to only have 1 queue, queue
    id 0, and then bind to queue 0. You can use ethtool to do this::
 
-   sudo ethtool -L <interface> combined 1
+     sudo ethtool -L <interface> combined 1
 
    If you want to only see part of the traffic, you can program the
    NIC through ethtool to filter out your traffic to a single queue id
    that you can bind your XDP socket to. Here is one example in which
    UDP traffic to and from port 4242 are sent to queue 2::
 
-   sudo ethtool -N <interface> rx-flow-hash udp4 fn
-   sudo ethtool -N <interface> flow-type udp4 src-port 4242 dst-port \
-   4242 action 2
+     sudo ethtool -N <interface> rx-flow-hash udp4 fn
+     sudo ethtool -N <interface> flow-type udp4 src-port 4242 dst-port \
+     4242 action 2
 
    A number of other ways are possible all up to the capabilitites of
    the NIC you have.
-- 
cgit 


From 0f202f69a16b9a687911202083c37636ad73c77e Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 17 May 2019 10:27:22 -0500
Subject: dt-bindings: arm: amlogic: Move 'amlogic, meson-gx-ao-secure' binding
 to its own file

It is best practice to have 1 binding per file, so board level bindings
should be separate for various misc SoC bindings.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Carlo Caione <carlo@caione.org>
Cc: Kevin Hilman <khilman@baylibre.com>
Cc: devicetree@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-amlogic@lists.infradead.org
Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 Documentation/devicetree/bindings/arm/amlogic.txt  | 29 ----------------------
 .../arm/amlogic/amlogic,meson-gx-ao-secure.txt     | 28 +++++++++++++++++++++
 2 files changed, 28 insertions(+), 29 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/amlogic.txt b/Documentation/devicetree/bindings/arm/amlogic.txt
index 061f7b98a07f..5f650248b18e 100644
--- a/Documentation/devicetree/bindings/arm/amlogic.txt
+++ b/Documentation/devicetree/bindings/arm/amlogic.txt
@@ -111,32 +111,3 @@ Board compatible values (alphabetically, grouped by SoC):
   - "amlogic,u200" (Meson g12a s905d2)
   - "amediatech,x96-max" (Meson g12a s905x2)
   - "seirobotics,sei510" (Meson g12a s905x2)
-
-Amlogic Meson Firmware registers Interface
-------------------------------------------
-
-The Meson SoCs have a register bank with status and data shared with the
-secure firmware.
-
-Required properties:
- - compatible: For Meson GX SoCs, must be "amlogic,meson-gx-ao-secure", "syscon"
-
-Properties should indentify components of this register interface :
-
-Meson GX SoC Information
-------------------------
-A firmware register encodes the SoC type, package and revision information on
-the Meson GX SoCs.
-If present, the following property should be added :
-
-Optional properties:
-  - amlogic,has-chip-id: If present, the interface gives the current SoC version.
-
-Example
--------
-
-ao-secure@140 {
-	compatible = "amlogic,meson-gx-ao-secure", "syscon";
-	reg = <0x0 0x140 0x0 0x140>;
-	amlogic,has-chip-id;
-};
diff --git a/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.txt b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.txt
new file mode 100644
index 000000000000..c67d9f48fb91
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.txt
@@ -0,0 +1,28 @@
+Amlogic Meson Firmware registers Interface
+------------------------------------------
+
+The Meson SoCs have a register bank with status and data shared with the
+secure firmware.
+
+Required properties:
+ - compatible: For Meson GX SoCs, must be "amlogic,meson-gx-ao-secure", "syscon"
+
+Properties should indentify components of this register interface :
+
+Meson GX SoC Information
+------------------------
+A firmware register encodes the SoC type, package and revision information on
+the Meson GX SoCs.
+If present, the following property should be added :
+
+Optional properties:
+  - amlogic,has-chip-id: If present, the interface gives the current SoC version.
+
+Example
+-------
+
+ao-secure@140 {
+	compatible = "amlogic,meson-gx-ao-secure", "syscon";
+	reg = <0x0 0x140 0x0 0x140>;
+	amlogic,has-chip-id;
+};
-- 
cgit 


From c0c752d8c6b3dd78b3e7951453bc43be880dcd08 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 17 May 2019 10:27:23 -0500
Subject: dt-bindings: arm: Convert Amlogic board/soc bindings to json-schema

Convert Amlogic SoC bindings to DT schema format using json-schema.

Cc: Carlo Caione <carlo@caione.org>
Cc: Kevin Hilman <khilman@baylibre.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: devicetree@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Neil Armstrong <narmstrong@baylibre.com>
[khilman: updated maninainers]
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 Documentation/devicetree/bindings/arm/amlogic.txt  | 113 -----------------
 Documentation/devicetree/bindings/arm/amlogic.yaml | 138 +++++++++++++++++++++
 2 files changed, 138 insertions(+), 113 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/amlogic.txt
 create mode 100644 Documentation/devicetree/bindings/arm/amlogic.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/amlogic.txt b/Documentation/devicetree/bindings/arm/amlogic.txt
deleted file mode 100644
index 5f650248b18e..000000000000
--- a/Documentation/devicetree/bindings/arm/amlogic.txt
+++ /dev/null
@@ -1,113 +0,0 @@
-Amlogic MesonX device tree bindings
--------------------------------------------
-
-Work in progress statement:
-
-Device tree files and bindings applying to Amlogic SoCs and boards are
-considered "unstable". Any Amlogic device tree binding may change at
-any time. Be sure to use a device tree binary and a kernel image
-generated from the same source tree.
-
-Please refer to Documentation/devicetree/bindings/ABI.txt for a definition of a
-stable binding/ABI.
-
----------------------------------------------------------------
-
-Boards with the Amlogic Meson6 SoC shall have the following properties:
-  Required root node property:
-    compatible: "amlogic,meson6"
-
-Boards with the Amlogic Meson8 SoC shall have the following properties:
-  Required root node property:
-    compatible: "amlogic,meson8";
-
-Boards with the Amlogic Meson8b SoC shall have the following properties:
-  Required root node property:
-    compatible: "amlogic,meson8b";
-
-Boards with the Amlogic Meson8m2 SoC shall have the following properties:
-  Required root node property:
-    compatible: "amlogic,meson8m2";
-
-Boards with the Amlogic Meson GXBaby SoC shall have the following properties:
-  Required root node property:
-    compatible: "amlogic,meson-gxbb";
-
-Boards with the Amlogic Meson GXL S905X SoC shall have the following properties:
-  Required root node property:
-    compatible: "amlogic,s905x", "amlogic,meson-gxl";
-
-Boards with the Amlogic Meson GXL S905D SoC shall have the following properties:
-  Required root node property:
-    compatible: "amlogic,s905d", "amlogic,meson-gxl";
-
-Boards with the Amlogic Meson GXL S805X SoC shall have the following properties:
-  Required root node property:
-    compatible: "amlogic,s805x", "amlogic,meson-gxl";
-
-Boards with the Amlogic Meson GXL S905W SoC shall have the following properties:
-  Required root node property:
-    compatible: "amlogic,s905w", "amlogic,meson-gxl";
-
-Boards with the Amlogic Meson GXM S912 SoC shall have the following properties:
-  Required root node property:
-    compatible: "amlogic,s912", "amlogic,meson-gxm";
-
-Boards with the Amlogic Meson AXG A113D SoC shall have the following properties:
-  Required root node property:
-    compatible: "amlogic,a113d", "amlogic,meson-axg";
-
-Boards with the Amlogic Meson G12A S905D2 SoC shall have the following properties:
-  Required root node property:
-    compatible: "amlogic,g12a";
-
-Board compatible values (alphabetically, grouped by SoC):
-
-  - "geniatech,atv1200" (Meson6)
-
-  - "minix,neo-x8" (Meson8)
-
-  - "endless,ec100" (Meson8b)
-  - "hardkernel,odroid-c1" (Meson8b)
-  - "tronfy,mxq" (Meson8b)
-
-  - "tronsmart,mxiii-plus" (Meson8m2)
-
-  - "amlogic,p200" (Meson gxbb)
-  - "amlogic,p201" (Meson gxbb)
-  - "friendlyarm,nanopi-k2" (Meson gxbb)
-  - "hardkernel,odroid-c2" (Meson gxbb)
-  - "nexbox,a95x" (Meson gxbb or Meson gxl s905x)
-  - "tronsmart,vega-s95-pro", "tronsmart,vega-s95" (Meson gxbb)
-  - "tronsmart,vega-s95-meta", "tronsmart,vega-s95" (Meson gxbb)
-  - "tronsmart,vega-s95-telos", "tronsmart,vega-s95" (Meson gxbb)
-  - "wetek,hub" (Meson gxbb)
-  - "wetek,play2" (Meson gxbb)
-
-  - "amlogic,p212" (Meson gxl s905x)
-  - "hwacom,amazetv" (Meson gxl s905x)
-  - "khadas,vim" (Meson gxl s905x)
-  - "libretech,cc" (Meson gxl s905x)
-
-  - "amlogic,p230" (Meson gxl s905d)
-  - "amlogic,p231" (Meson gxl s905d)
-  - "phicomm,n1" (Meson gxl s905d)
-
-  - "amlogic,p241" (Meson gxl s805x)
-  - "libretech,aml-s805x-ac" (Meson gxl s805x)
-
-  - "amlogic,p281" (Meson gxl s905w)
-  - "oranth,tx3-mini" (Meson gxl s905w)
-
-  - "amlogic,q200" (Meson gxm s912)
-  - "amlogic,q201" (Meson gxm s912)
-  - "khadas,vim2" (Meson gxm s912)
-  - "kingnovel,r-box-pro" (Meson gxm S912)
-  - "nexbox,a1" (Meson gxm s912)
-  - "tronsmart,vega-s96" (Meson gxm s912)
-
-  - "amlogic,s400" (Meson axg a113d)
-
-  - "amlogic,u200" (Meson g12a s905d2)
-  - "amediatech,x96-max" (Meson g12a s905x2)
-  - "seirobotics,sei510" (Meson g12a s905x2)
diff --git a/Documentation/devicetree/bindings/arm/amlogic.yaml b/Documentation/devicetree/bindings/arm/amlogic.yaml
new file mode 100644
index 000000000000..a0dd12be4de4
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/amlogic.yaml
@@ -0,0 +1,138 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/amlogic.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic MesonX device tree bindings
+
+maintainers:
+  - Kevin Hilman <khilman@baylibre.com>
+
+description: |+
+  Work in progress statement:
+
+  Device tree files and bindings applying to Amlogic SoCs and boards are
+  considered "unstable". Any Amlogic device tree binding may change at
+  any time. Be sure to use a device tree binary and a kernel image
+  generated from the same source tree.
+
+  Please refer to Documentation/devicetree/bindings/ABI.txt for a definition of a
+  stable binding/ABI.
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    oneOf:
+      - description: Boards with the Amlogic Meson6 SoC
+        items:
+          - enum:
+              - geniatech,atv1200
+          - const: amlogic,meson6
+
+      - description: Boards with the Amlogic Meson8 SoC
+        items:
+          - enum:
+              - minix,neo-x8
+          - const: amlogic,meson8
+
+      - description: Boards with the Amlogic Meson8m2 SoC
+        items:
+          - enum:
+              - tronsmart,mxiii-plus
+          - const: amlogic,meson8m2
+
+      - description: Boards with the Amlogic Meson8b SoC
+        items:
+          - enum:
+              - endless,ec100
+              - hardkernel,odroid-c1
+              - tronfy,mxq
+          - const: amlogic,meson8b
+
+      - description: Boards with the Amlogic Meson GXBaby SoC
+        items:
+          - enum:
+              - amlogic,p200
+              - amlogic,p201
+              - friendlyarm,nanopi-k2
+              - hardkernel,odroid-c2
+              - nexbox,a95x
+              - wetek,hub
+              - wetek,play2
+          - const: amlogic,meson-gxbb
+
+      - description: Tronsmart Vega S95 devices
+        items:
+          - enum:
+              - tronsmart,vega-s95-pro
+              - tronsmart,vega-s95-meta
+              - tronsmart,vega-s95-telos
+          - const: tronsmart,vega-s95
+          - const: amlogic,meson-gxbb
+
+      - description: Boards with the Amlogic Meson GXL S805X SoC
+        items:
+          - enum:
+              - amlogic,p241
+              - libretech,aml-s805x-ac
+          - const: amlogic,s805x
+          - const: amlogic,meson-gxl
+
+      - description: Boards with the Amlogic Meson GXL S905W SoC
+        items:
+          - enum:
+              - amlogic,p281
+              - oranth,tx3-mini
+          - const: amlogic,s905w
+          - const: amlogic,meson-gxl
+
+      - description: Boards with the Amlogic Meson GXL S905X SoC
+        items:
+          - enum:
+              - amediatech,x96-max
+              - amlogic,p212
+              - hwacom,amazetv
+              - khadas,vim
+              - libretech,cc
+              - nexbox,a95x
+              - seirobotics,sei510
+          - const: amlogic,s905x
+          - const: amlogic,meson-gxl
+
+      - description: Boards with the Amlogic Meson GXL S905D SoC
+        items:
+          - enum:
+              - amlogic,p230
+              - amlogic,p231
+              - phicomm,n1
+          - const: amlogic,s905d
+          - const: amlogic,meson-gxl
+
+      - description: Boards with the Amlogic Meson GXM S912 SoC
+        items:
+          - enum:
+              - amlogic,q200
+              - amlogic,q201
+              - khadas,vim2
+              - kingnovel,r-box-pro
+              - nexbox,a1
+              - tronsmart,vega-s96
+          - const: amlogic,s912
+          - const: amlogic,meson-gxm
+
+      - description: Boards with the Amlogic Meson AXG A113D SoC
+        items:
+          - enum:
+              - amlogic,s400
+          - const: amlogic,a113d
+          - const: amlogic,meson-axg
+
+      - description: Boards with the Amlogic Meson G12A S905D2 SoC
+        items:
+          - enum:
+              - amlogic,u200
+          - const: amlogic,g12a
+
+...
-- 
cgit 


From 46f4050a6587ea3e59e6f47f06513c00689b9d40 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 21 May 2019 11:04:37 +0100
Subject: regulator: arizona: Update device tree binding to support Madera
 CODECs

The Arizona regulator drivers are now also used by the Cirrus Logic
Madera audio CODECs, update the binding document to link to the primary
binding document for these as well as the existing Arizona document.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/regulator/arizona-regulator.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/regulator/arizona-regulator.txt b/Documentation/devicetree/bindings/regulator/arizona-regulator.txt
index 443564d7784f..69bf41949b01 100644
--- a/Documentation/devicetree/bindings/regulator/arizona-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/arizona-regulator.txt
@@ -5,7 +5,8 @@ of analogue I/O.
 
 This document lists regulator specific bindings, see the primary binding
 document:
-  ../mfd/arizona.txt
+  For Wolfson Microelectronic Arizona codecs: ../mfd/arizona.txt
+  For Cirrus Logic Madera codecs: ../mfd/madera.txt
 
 Optional properties:
   - wlf,ldoena : GPIO specifier for the GPIO controlling LDOENA
-- 
cgit 


From 0a1b929356830257568f9547e173f0e7498060ea Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Mon, 20 May 2019 16:50:33 +0200
Subject: spi: Add YAML schemas for the generic SPI options

The SPI controllers have a bunch of generic options that are needed in a
device tree. Add a YAML schemas for those.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-bus.txt  | 112 +-------------
 .../devicetree/bindings/spi/spi-controller.yaml    | 161 +++++++++++++++++++++
 2 files changed, 162 insertions(+), 111 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/spi/spi-controller.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/spi/spi-bus.txt b/Documentation/devicetree/bindings/spi/spi-bus.txt
index 1f6e86f787ef..e07783505498 100644
--- a/Documentation/devicetree/bindings/spi/spi-bus.txt
+++ b/Documentation/devicetree/bindings/spi/spi-bus.txt
@@ -1,111 +1 @@
-SPI (Serial Peripheral Interface) busses
-
-SPI busses can be described with a node for the SPI controller device
-and a set of child nodes for each SPI slave on the bus.  The system's SPI
-controller may be described for use in SPI master mode or in SPI slave mode,
-but not for both at the same time.
-
-The SPI controller node requires the following properties:
-- compatible      - Name of SPI bus controller following generic names
-		    recommended practice.
-
-In master mode, the SPI controller node requires the following additional
-properties:
-- #address-cells  - number of cells required to define a chip select
-		address on the SPI bus.
-- #size-cells     - should be zero.
-
-In slave mode, the SPI controller node requires one additional property:
-- spi-slave       - Empty property.
-
-No other properties are required in the SPI bus node.  It is assumed
-that a driver for an SPI bus device will understand that it is an SPI bus.
-However, the binding does not attempt to define the specific method for
-assigning chip select numbers.  Since SPI chip select configuration is
-flexible and non-standardized, it is left out of this binding with the
-assumption that board specific platform code will be used to manage
-chip selects.  Individual drivers can define additional properties to
-support describing the chip select layout.
-
-Optional properties (master mode only):
-- cs-gpios	  - gpios chip select.
-- num-cs	  - total number of chipselects.
-
-If cs-gpios is used the number of chip selects will be increased automatically
-with max(cs-gpios > hw cs).
-
-So if for example the controller has 2 CS lines, and the cs-gpios
-property looks like this:
-
-cs-gpios = <&gpio1 0 0>, <0>, <&gpio1 1 0>, <&gpio1 2 0>;
-
-Then it should be configured so that num_chipselect = 4 with the
-following mapping:
-
-cs0 : &gpio1 0 0
-cs1 : native
-cs2 : &gpio1 1 0
-cs3 : &gpio1 2 0
-
-
-SPI slave nodes must be children of the SPI controller node.
-
-In master mode, one or more slave nodes (up to the number of chip selects) can
-be present.  Required properties are:
-- compatible      - Name of SPI device following generic names recommended
-		    practice.
-- reg             - Chip select address of device.
-- spi-max-frequency - Maximum SPI clocking speed of device in Hz.
-
-In slave mode, the (single) slave node is optional.
-If present, it must be called "slave".  Required properties are:
-- compatible      - Name of SPI device following generic names recommended
-		    practice.
-
-All slave nodes can contain the following optional properties:
-- spi-cpol        - Empty property indicating device requires inverse clock
-		    polarity (CPOL) mode.
-- spi-cpha        - Empty property indicating device requires shifted clock
-		    phase (CPHA) mode.
-- spi-cs-high     - Empty property indicating device requires chip select
-		    active high.
-- spi-3wire       - Empty property indicating device requires 3-wire mode.
-- spi-lsb-first   - Empty property indicating device requires LSB first mode.
-- spi-tx-bus-width - The bus width (number of data wires) that is used for MOSI.
-		    Defaults to 1 if not present.
-- spi-rx-bus-width - The bus width (number of data wires) that is used for MISO.
-		    Defaults to 1 if not present.
-- spi-rx-delay-us - Microsecond delay after a read transfer.
-- spi-tx-delay-us - Microsecond delay after a write transfer.
-
-Some SPI controllers and devices support Dual and Quad SPI transfer mode.
-It allows data in the SPI system to be transferred using 2 wires (DUAL) or 4
-wires (QUAD).
-Now the value that spi-tx-bus-width and spi-rx-bus-width can receive is
-only 1 (SINGLE), 2 (DUAL) and 4 (QUAD).
-Dual/Quad mode is not allowed when 3-wire mode is used.
-
-If a gpio chipselect is used for the SPI slave the gpio number will be passed
-via the SPI master node cs-gpios property.
-
-SPI example for an MPC5200 SPI bus:
-	spi@f00 {
-		#address-cells = <1>;
-		#size-cells = <0>;
-		compatible = "fsl,mpc5200b-spi","fsl,mpc5200-spi";
-		reg = <0xf00 0x20>;
-		interrupts = <2 13 0 2 14 0>;
-		interrupt-parent = <&mpc5200_pic>;
-
-		ethernet-switch@0 {
-			compatible = "micrel,ks8995m";
-			spi-max-frequency = <1000000>;
-			reg = <0>;
-		};
-
-		codec@1 {
-			compatible = "ti,tlv320aic26";
-			spi-max-frequency = <100000>;
-			reg = <1>;
-		};
-	};
+This file has moved to spi-controller.yaml.
diff --git a/Documentation/devicetree/bindings/spi/spi-controller.yaml b/Documentation/devicetree/bindings/spi/spi-controller.yaml
new file mode 100644
index 000000000000..876c0623f322
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-controller.yaml
@@ -0,0 +1,161 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/spi-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SPI Controller Generic Binding
+
+maintainers:
+  - Mark Brown <broonie@kernel.org>
+
+description: |
+  SPI busses can be described with a node for the SPI controller device
+  and a set of child nodes for each SPI slave on the bus. The system SPI
+  controller may be described for use in SPI master mode or in SPI slave mode,
+  but not for both at the same time.
+
+properties:
+  $nodename:
+    pattern: "^spi(@.*|-[0-9a-f])*$"
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+  cs-gpios:
+    description: |
+      GPIOs used as chip selects.
+      If that property is used, the number of chip selects will be
+      increased automatically with max(cs-gpios, hardware chip selects).
+
+      So if, for example, the controller has 2 CS lines, and the
+      cs-gpios looks like this
+        cs-gpios = <&gpio1 0 0>, <0>, <&gpio1 1 0>, <&gpio1 2 0>;
+
+      Then it should be configured so that num_chipselect = 4, with
+      the following mapping
+        cs0 : &gpio1 0 0
+        cs1 : native
+        cs2 : &gpio1 1 0
+        cs3 : &gpio1 2 0
+
+  num-cs:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Total number of chip selects.
+
+  spi-slave:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      The SPI controller acts as a slave, instead of a master.
+
+patternProperties:
+  "^slave$":
+    type: object
+
+    properties:
+      compatible:
+        description:
+          Compatible of the SPI device.
+
+    required:
+      - compatible
+
+  "^.*@[0-9a-f]+$":
+    type: object
+
+    properties:
+      compatible:
+        description:
+          Compatible of the SPI device.
+
+      reg:
+        maxItems: 1
+        minimum: 0
+        maximum: 256
+        description:
+          Chip select used by the device.
+
+      spi-3wire:
+        $ref: /schemas/types.yaml#/definitions/flag
+        description:
+          The device requires 3-wire mode.
+
+      spi-cpha:
+        $ref: /schemas/types.yaml#/definitions/flag
+        description:
+          The device requires shifted clock phase (CPHA) mode.
+
+      spi-cpol:
+        $ref: /schemas/types.yaml#/definitions/flag
+        description:
+          The device requires inverse clock polarity (CPOL) mode.
+
+      spi-cs-high:
+        $ref: /schemas/types.yaml#/definitions/flag
+        description:
+          The device requires the chip select active high.
+
+      spi-lsb-first:
+        $ref: /schemas/types.yaml#/definitions/flag
+        description:
+          The device requires the LSB first mode.
+
+      spi-max-frequency:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description:
+          Maximum SPI clocking speed of the device in Hz.
+
+      spi-rx-bus-width:
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+          - enum: [ 1, 2, 4 ]
+          - default: 1
+        description:
+          Bus width to the SPI bus used for MISO.
+
+      spi-rx-delay-us:
+        description:
+          Delay, in microseconds, after a read transfer.
+
+      spi-tx-bus-width:
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+          - enum: [ 1, 2, 4 ]
+          - default: 1
+        description:
+          Bus width to the SPI bus used for MOSI.
+
+      spi-tx-delay-us:
+        description:
+          Delay, in microseconds, after a write transfer.
+
+    required:
+      - compatible
+      - reg
+
+examples:
+  - |
+    spi@f00 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        compatible = "fsl,mpc5200b-spi","fsl,mpc5200-spi";
+        reg = <0xf00 0x20>;
+        interrupts = <2 13 0 2 14 0>;
+        interrupt-parent = <&mpc5200_pic>;
+
+        ethernet-switch@0 {
+            compatible = "micrel,ks8995m";
+            spi-max-frequency = <1000000>;
+            reg = <0>;
+        };
+
+        codec@1 {
+            compatible = "ti,tlv320aic26";
+            spi-max-frequency = <100000>;
+            reg = <1>;
+        };
+    };
-- 
cgit 


From 3133f5c24305c31ef2805a7eb05fc18a664836d3 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Mon, 20 May 2019 16:50:34 +0200
Subject: spi: sun4i: Add YAML schemas

Switch the DT binding to a YAML schema to enable the DT validation.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/spi/allwinner,sun4i-a10-spi.yaml      | 86 ++++++++++++++++++++++
 .../devicetree/bindings/spi/spi-sun4i.txt          | 23 ------
 2 files changed, 86 insertions(+), 23 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
 delete mode 100644 Documentation/devicetree/bindings/spi/spi-sun4i.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
new file mode 100644
index 000000000000..c374fd4923a6
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/allwinner,sun4i-a10-spi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 SPI Controller Device Tree Bindings
+
+allOf:
+  - $ref: "spi-controller.yaml"
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  "#address-cells": true
+  "#size-cells": true
+
+  compatible:
+    const: allwinner,sun4i-a10-spi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+
+  clock-names:
+    items:
+      - const: ahb
+      - const: mod
+
+  dmas:
+    items:
+      - description: RX DMA Channel
+      - description: TX DMA Channel
+
+  dma-names:
+    items:
+      - const: rx
+      - const: tx
+
+  num-cs: true
+
+patternProperties:
+  "^.*@[0-9a-f]+":
+    properties:
+      reg:
+        items:
+          minimum: 0
+          maximum: 4
+
+      spi-rx-bus-width:
+        const: 1
+
+      spi-tx-bus-width:
+        const: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    spi1: spi@1c06000 {
+        compatible = "allwinner,sun4i-a10-spi";
+        reg = <0x01c06000 0x1000>;
+        interrupts = <11>;
+        clocks = <&ahb_gates 21>, <&spi1_clk>;
+        clock-names = "ahb", "mod";
+        #address-cells = <1>;
+        #size-cells = <0>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/spi/spi-sun4i.txt b/Documentation/devicetree/bindings/spi/spi-sun4i.txt
deleted file mode 100644
index c75d604a8290..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-sun4i.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Allwinner A10 SPI controller
-
-Required properties:
-- compatible: Should be "allwinner,sun4-a10-spi".
-- reg: Should contain register location and length.
-- interrupts: Should contain interrupt.
-- clocks: phandle to the clocks feeding the SPI controller. Two are
-          needed:
-  - "ahb": the gated AHB parent clock
-  - "mod": the parent module clock
-- clock-names: Must contain the clock names described just above
-
-Example:
-
-spi1: spi@1c06000 {
-	compatible = "allwinner,sun4i-a10-spi";
-	reg = <0x01c06000 0x1000>;
-	interrupts = <11>;
-	clocks = <&ahb_gates 21>, <&spi1_clk>;
-	clock-names = "ahb", "mod";
-	#address-cells = <1>;
-	#size-cells = <0>;
-};
-- 
cgit 


From 101e6fce89b4707429185527e15d97c7e8f62ec5 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Mon, 20 May 2019 16:50:35 +0200
Subject: spi: sun6i: Add YAML schemas

Switch the DT binding to a YAML schema to enable the DT validation.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/spi/allwinner,sun6i-a31-spi.yaml      | 106 +++++++++++++++++++++
 .../devicetree/bindings/spi/spi-sun6i.txt          |  44 ---------
 2 files changed, 106 insertions(+), 44 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
 delete mode 100644 Documentation/devicetree/bindings/spi/spi-sun6i.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
new file mode 100644
index 000000000000..bda7a5befd8b
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/allwinner,sun6i-a31-spi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 SPI Controller Device Tree Bindings
+
+allOf:
+  - $ref: "spi-controller.yaml"
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  "#address-cells": true
+  "#size-cells": true
+
+  compatible:
+    enum:
+      - allwinner,sun6i-a31-spi
+      - allwinner,sun8i-h3-spi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+
+  clock-names:
+    items:
+      - const: ahb
+      - const: mod
+
+  resets:
+    maxItems: 1
+
+  dmas:
+    items:
+      - description: RX DMA Channel
+      - description: TX DMA Channel
+
+  dma-names:
+    items:
+      - const: rx
+      - const: tx
+
+  num-cs: true
+
+patternProperties:
+  "^.*@[0-9a-f]+":
+    properties:
+      reg:
+        items:
+          minimum: 0
+          maximum: 4
+
+      spi-rx-bus-width:
+        const: 1
+
+      spi-tx-bus-width:
+        const: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    spi1: spi@1c69000 {
+        compatible = "allwinner,sun6i-a31-spi";
+        reg = <0x01c69000 0x1000>;
+        interrupts = <0 66 4>;
+        clocks = <&ahb1_gates 21>, <&spi1_clk>;
+        clock-names = "ahb", "mod";
+        resets = <&ahb1_rst 21>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+    };
+
+  - |
+    spi0: spi@1c68000 {
+        compatible = "allwinner,sun8i-h3-spi";
+        reg = <0x01c68000 0x1000>;
+        interrupts = <0 65 4>;
+        clocks = <&ccu 30>, <&ccu 82>;
+        clock-names = "ahb", "mod";
+        dmas = <&dma 23>, <&dma 23>;
+        dma-names = "rx", "tx";
+        resets = <&ccu 15>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/spi/spi-sun6i.txt b/Documentation/devicetree/bindings/spi/spi-sun6i.txt
deleted file mode 100644
index 435a8e0731ac..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-sun6i.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Allwinner A31/H3 SPI controller
-
-Required properties:
-- compatible: Should be "allwinner,sun6i-a31-spi" or "allwinner,sun8i-h3-spi".
-- reg: Should contain register location and length.
-- interrupts: Should contain interrupt.
-- clocks: phandle to the clocks feeding the SPI controller. Two are
-          needed:
-  - "ahb": the gated AHB parent clock
-  - "mod": the parent module clock
-- clock-names: Must contain the clock names described just above
-- resets: phandle to the reset controller asserting this device in
-          reset
-
-Optional properties:
-- dmas: DMA specifiers for rx and tx dma. See the DMA client binding,
-	Documentation/devicetree/bindings/dma/dma.txt
-- dma-names: DMA request names should include "rx" and "tx" if present.
-
-Example:
-
-spi1: spi@1c69000 {
-	compatible = "allwinner,sun6i-a31-spi";
-	reg = <0x01c69000 0x1000>;
-	interrupts = <0 66 4>;
-	clocks = <&ahb1_gates 21>, <&spi1_clk>;
-	clock-names = "ahb", "mod";
-	resets = <&ahb1_rst 21>;
-};
-
-spi0: spi@1c68000 {
-	compatible = "allwinner,sun8i-h3-spi";
-	reg = <0x01c68000 0x1000>;
-	interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
-	clocks = <&ccu CLK_BUS_SPI0>, <&ccu CLK_SPI0>;
-	clock-names = "ahb", "mod";
-	dmas = <&dma 23>, <&dma 23>;
-	dma-names = "rx", "tx";
-	pinctrl-names = "default";
-	pinctrl-0 = <&spi0_pins>;
-	resets = <&ccu RST_BUS_SPI0>;
-	#address-cells = <1>;
-	#size-cells = <0>;
-};
-- 
cgit 


From 7ef5f7dd2a07f7460ce900818cc4cd8222daeebe Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Thu, 9 May 2019 21:34:14 -0700
Subject: dt-bindings: remoteproc: Rename and amend Hexagon v56 binding

The SDM845 Audio DSP peripheral image loader binding describes the
properties needed to load and boot firmware on a Hexagon v56. Rename the
file and add the Compute DSP (CDSP) found in QCS404 to the binding.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 .../bindings/remoteproc/qcom,adsp-pil.txt          | 125 ------------------
 .../bindings/remoteproc/qcom,hexagon-v56.txt       | 140 +++++++++++++++++++++
 2 files changed, 140 insertions(+), 125 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/remoteproc/qcom,adsp-pil.txt
 create mode 100644 Documentation/devicetree/bindings/remoteproc/qcom,hexagon-v56.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,adsp-pil.txt b/Documentation/devicetree/bindings/remoteproc/qcom,adsp-pil.txt
deleted file mode 100644
index 66af2c30944f..000000000000
--- a/Documentation/devicetree/bindings/remoteproc/qcom,adsp-pil.txt
+++ /dev/null
@@ -1,125 +0,0 @@
-Qualcomm Technology Inc. ADSP Peripheral Image Loader
-
-This document defines the binding for a component that loads and boots firmware
-on the Qualcomm Technology Inc. ADSP Hexagon core.
-
-- compatible:
-	Usage: required
-	Value type: <string>
-	Definition: must be one of:
-		    "qcom,sdm845-adsp-pil"
-
-- reg:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: must specify the base address and size of the qdsp6ss register
-
-- interrupts-extended:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: must list the watchdog, fatal IRQs ready, handover and
-		    stop-ack IRQs
-
-- interrupt-names:
-	Usage: required
-	Value type: <stringlist>
-	Definition: must be "wdog", "fatal", "ready", "handover", "stop-ack"
-
-- clocks:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition:  List of 8 phandle and clock specifier pairs for the adsp.
-
-- clock-names:
-	Usage: required
-	Value type: <stringlist>
-	Definition: List of clock input name strings sorted in the same
-		    order as the clocks property. Definition must have
-		    "xo", "sway_cbcr", "lpass_ahbs_aon_cbcr",
-		    "lpass_ahbm_aon_cbcr", "qdsp6ss_xo", "qdsp6ss_sleep"
-		    and "qdsp6ss_core".
-
-- power-domains:
-	Usage: required
-	Value type: <phandle>
-	Definition: reference to cx power domain node.
-
-- resets:
-	Usage: required
-	Value type: <phandle>
-	Definition: reference to the list of 2 reset-controller for the adsp.
-
-- reset-names:
-        Usage: required
-        Value type: <stringlist>
-        Definition: must be "pdc_sync" and "cc_lpass"
-
-- qcom,halt-regs:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: a phandle reference to a syscon representing TCSR followed
-			by the offset within syscon for lpass halt register.
-
-- memory-region:
-	Usage: required
-	Value type: <phandle>
-	Definition: reference to the reserved-memory for the ADSP
-
-- qcom,smem-states:
-	Usage: required
-	Value type: <phandle>
-	Definition: reference to the smem state for requesting the ADSP to
-		    shut down
-
-- qcom,smem-state-names:
-	Usage: required
-	Value type: <stringlist>
-	Definition: must be "stop"
-
-
-= SUBNODES
-The adsp node may have an subnode named "glink-edge" that describes the
-communication edge, channels and devices related to the ADSP.
-See ../soc/qcom/qcom,glink.txt for details on how to describe these.
-
-= EXAMPLE
-The following example describes the resources needed to boot control the
-ADSP, as it is found on SDM845 boards.
-
-	remoteproc@17300000 {
-		compatible = "qcom,sdm845-adsp-pil";
-		reg = <0x17300000 0x40c>;
-
-		interrupts-extended = <&intc GIC_SPI 162 IRQ_TYPE_EDGE_RISING>,
-			<&adsp_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
-			<&adsp_smp2p_in 1 IRQ_TYPE_EDGE_RISING>,
-			<&adsp_smp2p_in 2 IRQ_TYPE_EDGE_RISING>,
-			<&adsp_smp2p_in 3 IRQ_TYPE_EDGE_RISING>;
-		interrupt-names = "wdog", "fatal", "ready",
-			"handover", "stop-ack";
-
-		clocks = <&rpmhcc RPMH_CXO_CLK>,
-			<&gcc GCC_LPASS_SWAY_CLK>,
-			<&lpasscc LPASS_Q6SS_AHBS_AON_CLK>,
-			<&lpasscc LPASS_Q6SS_AHBM_AON_CLK>,
-			<&lpasscc LPASS_QDSP6SS_XO_CLK>,
-			<&lpasscc LPASS_QDSP6SS_SLEEP_CLK>,
-			<&lpasscc LPASS_QDSP6SS_CORE_CLK>;
-		clock-names = "xo", "sway_cbcr",
-			"lpass_ahbs_aon_cbcr",
-			"lpass_ahbm_aon_cbcr", "qdsp6ss_xo",
-			"qdsp6ss_sleep", "qdsp6ss_core";
-
-		power-domains = <&rpmhpd SDM845_CX>;
-
-		resets = <&pdc_reset PDC_AUDIO_SYNC_RESET>,
-			 <&aoss_reset AOSS_CC_LPASS_RESTART>;
-		reset-names = "pdc_sync", "cc_lpass";
-
-		qcom,halt-regs = <&tcsr_mutex_regs 0x22000>;
-
-		memory-region = <&pil_adsp_mem>;
-
-		qcom,smem-states = <&adsp_smp2p_out 0>;
-		qcom,smem-state-names = "stop";
-	};
diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,hexagon-v56.txt b/Documentation/devicetree/bindings/remoteproc/qcom,hexagon-v56.txt
new file mode 100644
index 000000000000..1337a3d93d35
--- /dev/null
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,hexagon-v56.txt
@@ -0,0 +1,140 @@
+Qualcomm Technology Inc. Hexagon v56 Peripheral Image Loader
+
+This document defines the binding for a component that loads and boots firmware
+on the Qualcomm Technology Inc. Hexagon v56 core.
+
+- compatible:
+	Usage: required
+	Value type: <string>
+	Definition: must be one of:
+		    "qcom,qcs404-cdsp-pil",
+		    "qcom,sdm845-adsp-pil"
+
+- reg:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: must specify the base address and size of the qdsp6ss register
+
+- interrupts-extended:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: must list the watchdog, fatal IRQs ready, handover and
+		    stop-ack IRQs
+
+- interrupt-names:
+	Usage: required
+	Value type: <stringlist>
+	Definition: must be "wdog", "fatal", "ready", "handover", "stop-ack"
+
+- clocks:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition:  List of phandles and clock specifier pairs for the Hexagon,
+		     per clock-names below.
+
+- clock-names:
+	Usage: required for SDM845 ADSP
+	Value type: <stringlist>
+	Definition: List of clock input name strings sorted in the same
+		    order as the clocks property. Definition must have
+		    "xo", "sway_cbcr", "lpass_ahbs_aon_cbcr",
+		    "lpass_ahbm_aon_cbcr", "qdsp6ss_xo", "qdsp6ss_sleep"
+		    and "qdsp6ss_core".
+
+- clock-names:
+	Usage: required for QCS404 CDSP
+	Value type: <stringlist>
+	Definition: List of clock input name strings sorted in the same
+		    order as the clocks property. Definition must have
+		    "xo", "sway", "tbu", "bimc", "ahb_aon", "q6ss_slave",
+		    "q6ss_master", "q6_axim".
+
+- power-domains:
+	Usage: required
+	Value type: <phandle>
+	Definition: reference to cx power domain node.
+
+- resets:
+	Usage: required
+	Value type: <phandle>
+	Definition: reference to the list of resets for the Hexagon.
+
+- reset-names:
+        Usage: required for SDM845 ADSP
+        Value type: <stringlist>
+        Definition: must be "pdc_sync" and "cc_lpass"
+
+- reset-names:
+        Usage: required for QCS404 CDSP
+        Value type: <stringlist>
+        Definition: must be "restart"
+
+- qcom,halt-regs:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: a phandle reference to a syscon representing TCSR followed
+		    by the offset within syscon for Hexagon halt register.
+
+- memory-region:
+	Usage: required
+	Value type: <phandle>
+	Definition: reference to the reserved-memory for the firmware
+
+- qcom,smem-states:
+	Usage: required
+	Value type: <phandle>
+	Definition: reference to the smem state for requesting the Hexagon to
+		    shut down
+
+- qcom,smem-state-names:
+	Usage: required
+	Value type: <stringlist>
+	Definition: must be "stop"
+
+
+= SUBNODES
+The adsp node may have an subnode named "glink-edge" that describes the
+communication edge, channels and devices related to the Hexagon.
+See ../soc/qcom/qcom,glink.txt for details on how to describe these.
+
+= EXAMPLE
+The following example describes the resources needed to boot control the
+ADSP, as it is found on SDM845 boards.
+
+	remoteproc@17300000 {
+		compatible = "qcom,sdm845-adsp-pil";
+		reg = <0x17300000 0x40c>;
+
+		interrupts-extended = <&intc GIC_SPI 162 IRQ_TYPE_EDGE_RISING>,
+			<&adsp_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
+			<&adsp_smp2p_in 1 IRQ_TYPE_EDGE_RISING>,
+			<&adsp_smp2p_in 2 IRQ_TYPE_EDGE_RISING>,
+			<&adsp_smp2p_in 3 IRQ_TYPE_EDGE_RISING>;
+		interrupt-names = "wdog", "fatal", "ready",
+			"handover", "stop-ack";
+
+		clocks = <&rpmhcc RPMH_CXO_CLK>,
+			<&gcc GCC_LPASS_SWAY_CLK>,
+			<&lpasscc LPASS_Q6SS_AHBS_AON_CLK>,
+			<&lpasscc LPASS_Q6SS_AHBM_AON_CLK>,
+			<&lpasscc LPASS_QDSP6SS_XO_CLK>,
+			<&lpasscc LPASS_QDSP6SS_SLEEP_CLK>,
+			<&lpasscc LPASS_QDSP6SS_CORE_CLK>;
+		clock-names = "xo", "sway_cbcr",
+			"lpass_ahbs_aon_cbcr",
+			"lpass_ahbm_aon_cbcr", "qdsp6ss_xo",
+			"qdsp6ss_sleep", "qdsp6ss_core";
+
+		power-domains = <&rpmhpd SDM845_CX>;
+
+		resets = <&pdc_reset PDC_AUDIO_SYNC_RESET>,
+			 <&aoss_reset AOSS_CC_LPASS_RESTART>;
+		reset-names = "pdc_sync", "cc_lpass";
+
+		qcom,halt-regs = <&tcsr_mutex_regs 0x22000>;
+
+		memory-region = <&pil_adsp_mem>;
+
+		qcom,smem-states = <&adsp_smp2p_out 0>;
+		qcom,smem-state-names = "stop";
+	};
-- 
cgit 


From b270ea40b3c6e5d269ae6e1c74db401f7b1dcef0 Mon Sep 17 00:00:00 2001
From: Sibi Sankar <sibis@codeaurora.org>
Date: Sat, 29 Dec 2018 00:23:00 +0530
Subject: dt-bindings: soc: qcom: Add remote-pid binding for GLINK SMEM

Add missing qcom,remote-pid dt binding required for GLINK SMEM
which specifies the remote endpoint of the GLINK edge.

Fixes: 2b41d6c8e696 ("dt-bindings: soc: qcom: Extend GLINK to cover SMEM")
Reviewed-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 Documentation/devicetree/bindings/soc/qcom/qcom,glink.txt | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,glink.txt b/Documentation/devicetree/bindings/soc/qcom/qcom,glink.txt
index cf759e5f9b10..1214192847ac 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,glink.txt
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,glink.txt
@@ -21,6 +21,11 @@ edge.
 	Definition: should specify the IRQ used by the remote processor to
 		    signal this processor about communication related events
 
+- qcom,remote-pid:
+	Usage: required for glink-smem
+	Value type: <u32>
+	Definition: specifies the identifier of the remote endpoint of this edge
+
 - qcom,rpm-msg-ram:
 	Usage: required for glink-rpm
 	Value type: <prop-encoded-array>
-- 
cgit 


From 97266c4d05345f9b500d10c3caa1070249e895e7 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 21 May 2019 16:23:25 -0500
Subject: spi: dt-bindings: Convert spi-gpio binding to json-schema

Convert the spi-gpio binding to DT schema format.

Cc: Mark Brown <broonie@kernel.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: linux-spi@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-gpio.txt | 43 -------------
 .../devicetree/bindings/spi/spi-gpio.yaml          | 72 ++++++++++++++++++++++
 2 files changed, 72 insertions(+), 43 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/spi/spi-gpio.txt
 create mode 100644 Documentation/devicetree/bindings/spi/spi-gpio.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/spi/spi-gpio.txt b/Documentation/devicetree/bindings/spi/spi-gpio.txt
deleted file mode 100644
index 52db562f17a4..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-gpio.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-SPI-GPIO devicetree bindings
-
-This represents a group of 3-n GPIO lines used for bit-banged SPI on dedicated
-GPIO lines.
-
-Required properties:
-
- - compatible: should be set to "spi-gpio"
- - #address-cells: should be set to <0x1>
- - ranges
- - sck-gpios: GPIO spec for the SCK line to use
- - miso-gpios: GPIO spec for the MISO line to use
- - mosi-gpios: GPIO spec for the MOSI line to use
- - cs-gpios: GPIOs to use for chipselect lines.
-             Not needed if num-chipselects = <0>.
- - num-chipselects: Number of chipselect lines. Should be <0> if a single device
-                    with no chip select is connected.
-
-Deprecated bindings:
-
-These legacy GPIO line bindings can alternatively be used to define the
-GPIO lines used, they should not be used in new device trees.
-
- - gpio-sck: GPIO spec for the SCK line to use
- - gpio-miso: GPIO spec for the MISO line to use
- - gpio-mosi: GPIO spec for the MOSI line to use
-
-Example:
-
-	spi {
-		compatible = "spi-gpio";
-		#address-cells = <0x1>;
-		ranges;
-
-		sck-gpios = <&gpio 95 0>;
-		miso-gpios = <&gpio 98 0>;
-		mosi-gpios = <&gpio 97 0>;
-		cs-gpios = <&gpio 125 0>;
-		num-chipselects = <1>;
-
-		/* clients */
-	};
-
diff --git a/Documentation/devicetree/bindings/spi/spi-gpio.yaml b/Documentation/devicetree/bindings/spi/spi-gpio.yaml
new file mode 100644
index 000000000000..55c4f1705f07
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-gpio.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/spi-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SPI-GPIO devicetree bindings
+
+maintainers:
+  - Rob Herring <robh@kernel.org>
+
+description:
+  This represents a group of 3-n GPIO lines used for bit-banged SPI on
+  dedicated GPIO lines.
+
+allOf:
+  - $ref: "/schemas/spi/spi-controller.yaml#"
+
+properties:
+  compatible:
+    const: spi-gpio
+
+  sck-gpios:
+    description: GPIO spec for the SCK line to use
+    maxItems: 1
+
+  miso-gpios:
+    description: GPIO spec for the MISO line to use
+    maxItems: 1
+
+  mosi-gpios:
+    description: GPIO spec for the MOSI line to use
+    maxItems: 1
+
+  cs-gpios:
+    description: GPIOs to use for chipselect lines.
+      Not needed if num-chipselects = <0>.
+    minItems: 1
+    maxItems: 1024
+
+  num-chipselects:
+    description: Number of chipselect lines. Should be <0> if a single device
+      with no chip select is connected.
+    $ref: "/schemas/types.yaml#/definitions/uint32"
+
+  # Deprecated properties
+  gpio-sck: false
+  gpio-miso: false
+  gpio-mosi: false
+
+required:
+  - compatible
+  - num-chipselects
+  - sck-gpios
+
+examples:
+  - |
+    spi {
+      compatible = "spi-gpio";
+      #address-cells = <0x1>;
+      #size-cells = <0x0>;
+
+      sck-gpios = <&gpio 95 0>;
+      miso-gpios = <&gpio 98 0>;
+      mosi-gpios = <&gpio 97 0>;
+      cs-gpios = <&gpio 125 0>;
+      num-chipselects = <1>;
+
+      /* clients */
+    };
+
+...
-- 
cgit 


From 1914a996436b09186489da73b807e1df71259f67 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 21 May 2019 16:20:29 -0500
Subject: regulator: Convert regulator binding to json-schema

Convert the common regulator binding to DT schema format. Note that all
the properties with standard unit suffixes have type checks already, so
only a description is necessary.

As fixed-regulator has already been converted, update the references in
it. Otherwise, keep regulator.txt with a reference to the schema to
avoid a bunch of treewide updates. regulator.txt can be removed when all
the regulator bindings are converted.

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/fixed-regulator.yaml        |   5 +-
 .../devicetree/bindings/regulator/regulator.txt    | 140 +--------------
 .../devicetree/bindings/regulator/regulator.yaml   | 200 +++++++++++++++++++++
 3 files changed, 205 insertions(+), 140 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/regulator/regulator.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
index d289c2f7455a..a650b457085d 100644
--- a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
@@ -12,10 +12,13 @@ maintainers:
 
 description:
   Any property defined as part of the core regulator binding, defined in
-  regulator.txt, can also be used. However a fixed voltage regulator is
+  regulator.yaml, can also be used. However a fixed voltage regulator is
   expected to have the regulator-min-microvolt and regulator-max-microvolt
   to be the same.
 
+allOf:
+  - $ref: "regulator.yaml#"
+
 properties:
   compatible:
     const: regulator-fixed
diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt
index 0a3f087d5844..487ccd8370b3 100644
--- a/Documentation/devicetree/bindings/regulator/regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/regulator.txt
@@ -1,139 +1 @@
-Voltage/Current Regulators
-
-Optional properties:
-- regulator-name: A string used as a descriptive name for regulator outputs
-- regulator-min-microvolt: smallest voltage consumers may set
-- regulator-max-microvolt: largest voltage consumers may set
-- regulator-microvolt-offset: Offset applied to voltages to compensate for voltage drops
-- regulator-min-microamp: smallest current consumers may set
-- regulator-max-microamp: largest current consumers may set
-- regulator-input-current-limit-microamp: maximum input current regulator allows
-- regulator-always-on: boolean, regulator should never be disabled
-- regulator-boot-on: bootloader/firmware enabled regulator
-- regulator-allow-bypass: allow the regulator to go into bypass mode
-- regulator-allow-set-load: allow the regulator performance level to be configured
-- <name>-supply: phandle to the parent supply/regulator node
-- regulator-ramp-delay: ramp delay for regulator(in uV/us)
-  For hardware which supports disabling ramp rate, it should be explicitly
-  initialised to zero (regulator-ramp-delay = <0>) for disabling ramp delay.
-- regulator-enable-ramp-delay: The time taken, in microseconds, for the supply
-  rail to reach the target voltage, plus/minus whatever tolerance the board
-  design requires. This property describes the total system ramp time
-  required due to the combination of internal ramping of the regulator itself,
-  and board design issues such as trace capacitance and load on the supply.
-- regulator-settling-time-us: Settling time, in microseconds, for voltage
-  change if regulator have the constant time for any level voltage change.
-  This is useful when regulator have exponential voltage change.
-- regulator-settling-time-up-us: Settling time, in microseconds, for voltage
-  increase if the regulator needs a constant time to settle after voltage
-  increases of any level. This is useful for regulators with exponential
-  voltage changes.
-- regulator-settling-time-down-us: Settling time, in microseconds, for voltage
-  decrease if the regulator needs a constant time to settle after voltage
-  decreases of any level. This is useful for regulators with exponential
-  voltage changes.
-- regulator-soft-start: Enable soft start so that voltage ramps slowly
-- regulator-state-standby sub-root node for Standby mode
-  : equivalent with standby Linux sleep state, which provides energy savings
-  with a relatively quick transition back time.
-- regulator-state-mem sub-root node for Suspend-to-RAM mode
-  : suspend to memory, the device goes to sleep, but all data stored in memory,
-  only some external interrupt can wake the device.
-- regulator-state-disk sub-root node for Suspend-to-DISK mode
-  : suspend to disk, this state operates similarly to Suspend-to-RAM,
-  but includes a final step of writing memory contents to disk.
-- regulator-state-[mem/disk/standby] node has following common properties:
-	- regulator-on-in-suspend: regulator should be on in suspend state.
-	- regulator-off-in-suspend: regulator should be off in suspend state.
-	- regulator-suspend-min-microvolt: minimum voltage may be set in
-	  suspend state.
-	- regulator-suspend-max-microvolt: maximum voltage may be set in
-	  suspend state.
-	- regulator-suspend-microvolt: the default voltage which regulator
-	  would be set in suspend. This property is now deprecated, instead
-	  setting voltage for suspend mode via the API which regulator
-	  driver provides is recommended.
-	- regulator-changeable-in-suspend: whether the default voltage and
-	  the regulator on/off in suspend can be changed in runtime.
-	- regulator-mode: operating mode in the given suspend state.
-	  The set of possible operating modes depends on the capabilities of
-	  every hardware so the valid modes are documented on each regulator
-	  device tree binding document.
-- regulator-initial-mode: initial operating mode. The set of possible operating
-  modes depends on the capabilities of every hardware so each device binding
-  documentation explains which values the regulator supports.
-- regulator-allowed-modes: list of operating modes that software is allowed to
-  configure for the regulator at run-time.  Elements may be specified in any
-  order.  The set of possible operating modes depends on the capabilities of
-  every hardware so each device binding document explains which values the
-  regulator supports.
-- regulator-system-load: Load in uA present on regulator that is not captured by
-  any consumer request.
-- regulator-pull-down: Enable pull down resistor when the regulator is disabled.
-- regulator-over-current-protection: Enable over current protection.
-- regulator-active-discharge: tristate, enable/disable active discharge of
-  regulators. The values are:
-	0: Disable active discharge.
-	1: Enable active discharge.
-	Absence of this property will leave configuration to default.
-- regulator-coupled-with: Regulators with which the regulator
-  is coupled. The linkage is 2-way - all coupled regulators should be linked
-  with each other. A regulator should not be coupled with its supplier.
-- regulator-coupled-max-spread: Array of maximum spread between voltages of
-  coupled regulators in microvolts, each value in the array relates to the
-  corresponding couple specified by the regulator-coupled-with property.
-- regulator-max-step-microvolt: Maximum difference between current and target
-  voltages that can be changed safely in a single step.
-
-Deprecated properties:
-- regulator-compatible: If a regulator chip contains multiple
-  regulators, and if the chip's binding contains a child node that
-  describes each regulator, then this property indicates which regulator
-  this child node is intended to configure. If this property is missing,
-  the node's name will be used instead.
-
-Example:
-
-	xyzreg: regulator@0 {
-		regulator-min-microvolt = <1000000>;
-		regulator-max-microvolt = <2500000>;
-		regulator-always-on;
-		vin-supply = <&vin>;
-
-		regulator-state-mem {
-			regulator-on-in-suspend;
-		};
-	};
-
-Regulator Consumers:
-Consumer nodes can reference one or more of its supplies/
-regulators using the below bindings.
-
-- <name>-supply: phandle to the regulator node
-
-These are the same bindings that a regulator in the above
-example used to reference its own supply, in which case
-its just seen as a special case of a regulator being a
-consumer itself.
-
-Example of a consumer device node (mmc) referencing two
-regulators (twl_reg1 and twl_reg2),
-
-	twl_reg1: regulator@0 {
-		...
-		...
-		...
-	};
-
-	twl_reg2: regulator@1 {
-		...
-		...
-		...
-	};
-
-	mmc: mmc@0 {
-		...
-		...
-		vmmc-supply = <&twl_reg1>;
-		vmmcaux-supply = <&twl_reg2>;
-	};
+This file has moved to regulator.yaml.
diff --git a/Documentation/devicetree/bindings/regulator/regulator.yaml b/Documentation/devicetree/bindings/regulator/regulator.yaml
new file mode 100644
index 000000000000..02c3043ce419
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/regulator.yaml
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Voltage/Current Regulators
+
+maintainers:
+  - Liam Girdwood <lgirdwood@gmail.com>
+  - Mark Brown <broonie@kernel.org>
+
+properties:
+  regulator-name:
+    description: A string used as a descriptive name for regulator outputs
+    $ref: "/schemas/types.yaml#/definitions/string"
+
+  regulator-min-microvolt:
+    description: smallest voltage consumers may set
+
+  regulator-max-microvolt:
+    description: largest voltage consumers may set
+
+  regulator-microvolt-offset:
+    description: Offset applied to voltages to compensate for voltage drops
+
+  regulator-min-microamp:
+    description: smallest current consumers may set
+
+  regulator-max-microamp:
+    description: largest current consumers may set
+
+  regulator-input-current-limit-microamp:
+    description: maximum input current regulator allows
+
+  regulator-always-on:
+    description: boolean, regulator should never be disabled
+    type: boolean
+
+  regulator-boot-on:
+    description: bootloader/firmware enabled regulator
+    type: boolean
+
+  regulator-allow-bypass:
+    description: allow the regulator to go into bypass mode
+    type: boolean
+
+  regulator-allow-set-load:
+    description: allow the regulator performance level to be configured
+    type: boolean
+
+  regulator-ramp-delay:
+    description: ramp delay for regulator(in uV/us) For hardware which supports
+      disabling ramp rate, it should be explicitly initialised to zero (regulator-ramp-delay
+      = <0>) for disabling ramp delay.
+    $ref: "/schemas/types.yaml#/definitions/uint32"
+
+  regulator-enable-ramp-delay:
+    description: The time taken, in microseconds, for the supply rail to
+      reach the target voltage, plus/minus whatever tolerance the board
+      design requires. This property describes the total system ramp time
+      required due to the combination of internal ramping of the regulator
+      itself, and board design issues such as trace capacitance and load
+      on the supply.
+    $ref: "/schemas/types.yaml#/definitions/uint32"
+
+  regulator-settling-time-us:
+    description: Settling time, in microseconds, for voltage change if regulator
+      have the constant time for any level voltage change. This is useful
+      when regulator have exponential voltage change.
+
+  regulator-settling-time-up-us:
+    description: Settling time, in microseconds, for voltage increase if
+      the regulator needs a constant time to settle after voltage increases
+      of any level. This is useful for regulators with exponential voltage
+      changes.
+
+  regulator-settling-time-down-us:
+    description: Settling time, in microseconds, for voltage decrease if
+      the regulator needs a constant time to settle after voltage decreases
+      of any level. This is useful for regulators with exponential voltage
+      changes.
+
+  regulator-soft-start:
+    description: Enable soft start so that voltage ramps slowly
+    type: boolean
+
+  regulator-initial-mode:
+    description: initial operating mode. The set of possible operating modes
+      depends on the capabilities of every hardware so each device binding
+      documentation explains which values the regulator supports.
+    $ref: "/schemas/types.yaml#/definitions/uint32"
+
+  regulator-allowed-modes:
+    description: list of operating modes that software is allowed to configure
+      for the regulator at run-time.  Elements may be specified in any order.
+      The set of possible operating modes depends on the capabilities of
+      every hardware so each device binding document explains which values
+      the regulator supports.
+    $ref: "/schemas/types.yaml#/definitions/uint32-array"
+
+  regulator-system-load:
+    description: Load in uA present on regulator that is not captured by
+      any consumer request.
+    $ref: "/schemas/types.yaml#/definitions/uint32"
+
+  regulator-pull-down:
+    description: Enable pull down resistor when the regulator is disabled.
+    type: boolean
+
+  regulator-over-current-protection:
+    description: Enable over current protection.
+    type: boolean
+
+  regulator-active-discharge:
+    description: |
+      tristate, enable/disable active discharge of regulators. The values are:
+      0: Disable active discharge.
+      1: Enable active discharge.
+      Absence of this property will leave configuration to default.
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/uint32"
+      - enum: [ 0, 1 ]
+
+  regulator-coupled-with:
+    description: Regulators with which the regulator is coupled. The linkage
+      is 2-way - all coupled regulators should be linked with each other.
+      A regulator should not be coupled with its supplier.
+    $ref: "/schemas/types.yaml#/definitions/phandle-array"
+
+  regulator-coupled-max-spread:
+    description: Array of maximum spread between voltages of coupled regulators
+      in microvolts, each value in the array relates to the corresponding
+      couple specified by the regulator-coupled-with property.
+    $ref: "/schemas/types.yaml#/definitions/uint32"
+
+  regulator-max-step-microvolt:
+    description: Maximum difference between current and target voltages
+      that can be changed safely in a single step.
+
+patternProperties:
+  ".*-supply$":
+    description: Input supply phandle(s) for this node
+
+  regulator-state-(standby|mem|disk):
+    type: object
+    description:
+      sub-nodes for regulator state in Standby, Suspend-to-RAM, and
+      Suspend-to-DISK modes. Equivalent with standby, mem, and disk Linux
+      sleep states.
+
+    properties:
+      regulator-on-in-suspend:
+        description: regulator should be on in suspend state.
+        type: boolean
+
+      regulator-off-in-suspend:
+        description: regulator should be off in suspend state.
+        type: boolean
+
+      regulator-suspend-min-microvolt:
+        description: minimum voltage may be set in suspend state.
+
+      regulator-suspend-max-microvolt:
+        description: maximum voltage may be set in suspend state.
+
+      regulator-suspend-microvolt:
+        description: the default voltage which regulator would be set in
+          suspend. This property is now deprecated, instead setting voltage
+          for suspend mode via the API which regulator driver provides is
+          recommended.
+
+      regulator-changeable-in-suspend:
+        description: whether the default voltage and the regulator on/off
+          in suspend can be changed in runtime.
+        type: boolean
+
+      regulator-mode:
+        description: operating mode in the given suspend state. The set
+          of possible operating modes depends on the capabilities of every
+          hardware so the valid modes are documented on each regulator device
+          tree binding document.
+        $ref: "/schemas/types.yaml#/definitions/uint32"
+
+    additionalProperties: false
+
+examples:
+  - |
+    xyzreg: regulator@0 {
+      regulator-min-microvolt = <1000000>;
+      regulator-max-microvolt = <2500000>;
+      regulator-always-on;
+      vin-supply = <&vin>;
+
+      regulator-state-mem {
+        regulator-on-in-suspend;
+      };
+    };
+
+...
-- 
cgit 


From 673e401effe9dfd655f2d16588248f4c043361ce Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 21 May 2019 16:20:30 -0500
Subject: regulator: Convert gpio-regulator to json-schema

Convert the gpio-regulator binding to DT schema format using
json-schema.

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/gpio-regulator.txt          |  57 ----------
 .../bindings/regulator/gpio-regulator.yaml         | 118 +++++++++++++++++++++
 2 files changed, 118 insertions(+), 57 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/regulator/gpio-regulator.txt
 create mode 100644 Documentation/devicetree/bindings/regulator/gpio-regulator.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/regulator/gpio-regulator.txt b/Documentation/devicetree/bindings/regulator/gpio-regulator.txt
deleted file mode 100644
index dd25e73b5d79..000000000000
--- a/Documentation/devicetree/bindings/regulator/gpio-regulator.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-GPIO controlled regulators
-
-Required properties:
-- compatible		: Must be "regulator-gpio".
-- regulator-name	: Defined in regulator.txt as optional, but required
-			  here.
-- gpios			: Array of one or more GPIO pins used to select the
-			  regulator voltage/current listed in "states".
-- states		: Selection of available voltages/currents provided by
-			  this regulator and matching GPIO configurations to
-			  achieve them. If there are no states in the "states"
-			  array, use a fixed regulator instead.
-
-Optional properties:
-- enable-gpios		: GPIO used to enable/disable the regulator.
-			  Warning, the GPIO phandle flags are ignored and the
-			  GPIO polarity is controlled solely by the presence
-			  of "enable-active-high" DT property. This is due to
-			  compatibility with old DTs.
-- enable-active-high	: Polarity of "enable-gpio" GPIO is active HIGH.
-			  Default is active LOW.
-- gpios-states		: On operating systems, that don't support reading back
-			  gpio values in output mode (most notably linux), this
-			  array provides the state of GPIO pins set when
-			  requesting them from the gpio controller. Systems,
-			  that are capable of preserving state when requesting
-			  the lines, are free to ignore this property.
-			  0: LOW, 1: HIGH. Default is LOW if nothing else
-			  is specified.
-- startup-delay-us	: Startup time in microseconds.
-- regulator-type	: Specifies what is being regulated, must be either
-			  "voltage" or "current", defaults to voltage.
-
-Any property defined as part of the core regulator binding defined in
-regulator.txt can also be used.
-
-Example:
-
-	mmciv: gpio-regulator {
-		compatible = "regulator-gpio";
-
-		regulator-name = "mmci-gpio-supply";
-		regulator-min-microvolt = <1800000>;
-		regulator-max-microvolt = <2600000>;
-		regulator-boot-on;
-
-		enable-gpios = <&gpio0 23 0x4>;
-		gpios = <&gpio0 24 0x4
-			 &gpio0 25 0x4>;
-		states = <1800000 0x3
-			  2200000 0x2
-			  2600000 0x1
-			  2900000 0x0>;
-
-		startup-delay-us = <100000>;
-		enable-active-high;
-	};
diff --git a/Documentation/devicetree/bindings/regulator/gpio-regulator.yaml b/Documentation/devicetree/bindings/regulator/gpio-regulator.yaml
new file mode 100644
index 000000000000..9d3b28417fb6
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/gpio-regulator.yaml
@@ -0,0 +1,118 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/gpio-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: GPIO controlled regulators
+
+maintainers:
+  - Liam Girdwood <lgirdwood@gmail.com>
+  - Mark Brown <broonie@kernel.org>
+
+description:
+  Any property defined as part of the core regulator binding, defined in
+  regulator.txt, can also be used.
+
+allOf:
+  - $ref: "regulator.yaml#"
+
+properties:
+  compatible:
+    const: regulator-gpio
+
+  regulator-name: true
+
+  enable-gpios:
+    description: GPIO to use to enable/disable the regulator.
+      Warning, the GPIO phandle flags are ignored and the GPIO polarity is
+      controlled solely by the presence of "enable-active-high" DT property.
+      This is due to compatibility with old DTs.
+    maxItems: 1
+
+  gpios:
+    description: Array of one or more GPIO pins used to select the regulator
+      voltage/current listed in "states".
+    minItems: 1
+    maxItems: 8  # Should be enough...
+
+  gpios-states:
+    description: |
+      On operating systems, that don't support reading back gpio values in
+      output mode (most notably linux), this array provides the state of GPIO
+      pins set when requesting them from the gpio controller. Systems, that are
+      capable of preserving state when requesting the lines, are free to ignore
+      this property.
+        0: LOW
+        1: HIGH
+      Default is LOW if nothing else is specified.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+      - maxItems: 8
+        items:
+          enum: [ 0, 1 ]
+          default: 0
+
+  states:
+    description: Selection of available voltages/currents provided by this
+      regulator and matching GPIO configurations to achieve them. If there are
+      no states in the "states" array, use a fixed regulator instead.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-matrix
+      - maxItems: 8
+        items:
+          items:
+            - description: Voltage in microvolts
+            - description: GPIO group state value
+
+  startup-delay-us:
+    description: startup time in microseconds
+
+  enable-active-high:
+    description: Polarity of "enable-gpio" GPIO is active HIGH. Default is
+      active LOW.
+    type: boolean
+
+  gpio-open-drain:
+    description:
+      GPIO is open drain type. If this property is missing then default
+      assumption is false.
+    type: boolean
+
+  regulator-type:
+    description: Specifies what is being regulated.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/string
+      - enum:
+          - voltage
+          - current
+        default: voltage
+
+required:
+  - compatible
+  - regulator-name
+  - gpios
+  - states
+
+examples:
+  - |
+    gpio-regulator {
+      compatible = "regulator-gpio";
+
+      regulator-name = "mmci-gpio-supply";
+      regulator-min-microvolt = <1800000>;
+      regulator-max-microvolt = <2600000>;
+      regulator-boot-on;
+
+      enable-gpios = <&gpio0 23 0x4>;
+      gpios = <&gpio0 24 0x4
+        &gpio0 25 0x4>;
+      states = <1800000 0x3>,
+        <2200000 0x2>,
+        <2600000 0x1>,
+        <2900000 0x0>;
+
+      startup-delay-us = <100000>;
+      enable-active-high;
+    };
+...
-- 
cgit 


From 27b1b58fcfe72fd6b53a83b0dccb678e6b6faab8 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 21 May 2019 16:20:31 -0500
Subject: regulator: Convert max8660 binding to json-schema

Convert the max8660 binding to DT schema format using json-schema.

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/regulator/max8660.txt      | 47 -------------
 .../devicetree/bindings/regulator/max8660.yaml     | 77 ++++++++++++++++++++++
 2 files changed, 77 insertions(+), 47 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/regulator/max8660.txt
 create mode 100644 Documentation/devicetree/bindings/regulator/max8660.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/regulator/max8660.txt b/Documentation/devicetree/bindings/regulator/max8660.txt
deleted file mode 100644
index 8ba994d8a142..000000000000
--- a/Documentation/devicetree/bindings/regulator/max8660.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-Maxim MAX8660 voltage regulator
-
-Required properties:
-- compatible: must be one of "maxim,max8660", "maxim,max8661"
-- reg: I2C slave address, usually 0x34
-- any required generic properties defined in regulator.txt
-
-Example:
-
-	i2c_master {
-		max8660@34 {
-			compatible = "maxim,max8660";
-			reg = <0x34>;
-
-			regulators {
-				regulator@0 {
-					regulator-compatible= "V3(DCDC)";
-					regulator-min-microvolt = <725000>;
-					regulator-max-microvolt = <1800000>;
-				};
-
-				regulator@1 {
-					regulator-compatible= "V4(DCDC)";
-					regulator-min-microvolt = <725000>;
-					regulator-max-microvolt = <1800000>;
-				};
-
-				regulator@2 {
-					regulator-compatible= "V5(LDO)";
-					regulator-min-microvolt = <1700000>;
-					regulator-max-microvolt = <2000000>;
-				};
-
-				regulator@3 {
-					regulator-compatible= "V6(LDO)";
-					regulator-min-microvolt = <1800000>;
-					regulator-max-microvolt = <3300000>;
-				};
-
-				regulator@4 {
-					regulator-compatible= "V7(LDO)";
-					regulator-min-microvolt = <1800000>;
-					regulator-max-microvolt = <3300000>;
-				};
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/regulator/max8660.yaml b/Documentation/devicetree/bindings/regulator/max8660.yaml
new file mode 100644
index 000000000000..9c038698f880
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max8660.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/max8660.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX8660 voltage regulator
+
+maintainers:
+  - Daniel Mack <zonque@gmail.com>
+
+properties:
+  $nodename:
+    pattern: "pmic@[0-9a-f]{1,2}"
+  compatible:
+    enum:
+      - maxim,max8660
+      - maxim,max8661
+
+  reg:
+    maxItems: 1
+
+  regulators:
+    type: object
+
+    patternProperties:
+      "regulator-.+":
+        $ref: "regulator.yaml#"
+
+    additionalProperties: false
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      pmic@34 {
+        compatible = "maxim,max8660";
+        reg = <0x34>;
+
+        regulators {
+          regulator-V3 {
+            regulator-compatible= "V3(DCDC)";
+            regulator-min-microvolt = <725000>;
+            regulator-max-microvolt = <1800000>;
+          };
+
+          regulator-V4 {
+            regulator-compatible= "V4(DCDC)";
+            regulator-min-microvolt = <725000>;
+            regulator-max-microvolt = <1800000>;
+          };
+
+          regulator-V5 {
+            regulator-compatible= "V5(LDO)";
+            regulator-min-microvolt = <1700000>;
+            regulator-max-microvolt = <2000000>;
+          };
+
+          regulator-V6 {
+            regulator-compatible= "V6(LDO)";
+            regulator-min-microvolt = <1800000>;
+            regulator-max-microvolt = <3300000>;
+          };
+
+          regulator-V7 {
+            regulator-compatible= "V7(LDO)";
+            regulator-min-microvolt = <1800000>;
+            regulator-max-microvolt = <3300000>;
+          };
+        };
+      };
+    };
+...
-- 
cgit 


From 9c3f3410f5ace868c7c82fe4479fd36bed5eee64 Mon Sep 17 00:00:00 2001
From: Trent Piepho <tpiepho@impinj.com>
Date: Wed, 22 May 2019 18:43:19 +0000
Subject: dt-bindings: phy: dp83867: Describe how driver behaves w.r.t rgmii
 delay

Add a note to make it more clear how the driver behaves when "rgmii" vs
"rgmii-id", "rgmii-idrx", or "rgmii-idtx" interface modes are selected.

Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Trent Piepho <tpiepho@impinj.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ti,dp83867.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt
index 9ef9338aaee1..99b8681bde49 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt
@@ -11,6 +11,14 @@ Required properties:
 	- ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h
 		for applicable values
 
+Note: If the interface type is PHY_INTERFACE_MODE_RGMII the TX/RX clock delays
+      will be left at their default values, as set by the PHY's pin strapping.
+      The default strapping will use a delay of 2.00 ns.  Thus
+      PHY_INTERFACE_MODE_RGMII, by default, does not behave as RGMII with no
+      internal delay, but as PHY_INTERFACE_MODE_RGMII_ID.  The device tree
+      should use "rgmii-id" if internal delays are desired as this may be
+      changed in future to cause "rgmii" mode to disable delays.
+
 Optional property:
 	- ti,min-output-impedance - MAC Interface Impedance control to set
 				    the programmable output impedance to
-- 
cgit 


From 980066e6d9642fa5854bed8e592b1a30ea885b76 Mon Sep 17 00:00:00 2001
From: Trent Piepho <tpiepho@impinj.com>
Date: Wed, 22 May 2019 18:43:21 +0000
Subject: dt-bindings: phy: dp83867: Add documentation for disabling clock
 output

The clock output is generally only used for testing and development and
not used to daisy-chain PHYs.  It's just a source of RF noise afterward.

Add a mux value for "off".  I've added it as another enumeration to the
output property.  In the actual PHY, the mux and the output enable are
independently controllable.  However, it doesn't seem useful to be able
to describe the mux setting when the output is disabled.

Document that PHY's default setting will be left as is if the property
is omitted.

Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Trent Piepho <tpiepho@impinj.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ti,dp83867.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt
index 99b8681bde49..db6aa3f2215b 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt
@@ -33,8 +33,10 @@ Optional property:
 				    software needs to take when this pin is
 				    strapped in these modes. See data manual
 				    for details.
-	- ti,clk-output-sel - Muxing option for CLK_OUT pin - see dt-bindings/net/ti-dp83867.h
-				    for applicable values.
+	- ti,clk-output-sel - Muxing option for CLK_OUT pin.  See dt-bindings/net/ti-dp83867.h
+			      for applicable values.  The CLK_OUT pin can also
+			      be disabled by this property.  When omitted, the
+			      PHY's default will be left as is.
 
 Note: ti,min-output-impedance and ti,max-output-impedance are mutually
       exclusive. When both properties are present ti,max-output-impedance
-- 
cgit 


From a0b2ff531582c510af11b6d548732eb2fd7772ff Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Mon, 20 May 2019 21:43:49 +0200
Subject: dt-bindings: soc: amlogic: canvas: document support for Meson8/8b/8m2

The canvas IP on Meson8, Meson8b and Meson8m2 is similar to the one
found on GXBB and newer. The only known difference is that the older
SoCs cannot configure the "endianness".

Add a compatible string for each of the older SoCs to make sure we won't
be using unsupported features on these SoCs.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Acked-by: Maxime Jourdan <mjourdan@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 .../devicetree/bindings/soc/amlogic/amlogic,canvas.txt         | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/soc/amlogic/amlogic,canvas.txt b/Documentation/devicetree/bindings/soc/amlogic/amlogic,canvas.txt
index 436d2106e80d..e876f3ce54f6 100644
--- a/Documentation/devicetree/bindings/soc/amlogic/amlogic,canvas.txt
+++ b/Documentation/devicetree/bindings/soc/amlogic/amlogic,canvas.txt
@@ -2,8 +2,8 @@ Amlogic Canvas
 ================================
 
 A canvas is a collection of metadata that describes a pixel buffer.
-Those metadata include: width, height, phyaddr, wrapping, block mode
-and endianness.
+Those metadata include: width, height, phyaddr, wrapping and block mode.
+Starting with GXBB the endianness can also be described.
 
 Many IPs within Amlogic SoCs rely on canvas indexes to read/write pixel data
 rather than use the phy addresses directly. For instance, this is the case for
@@ -18,7 +18,11 @@ Video Lookup Table
 --------------------------
 
 Required properties:
-- compatible: "amlogic,canvas"
+- compatible: has to be one of:
+		- "amlogic,meson8-canvas", "amlogic,canvas" on Meson8
+		- "amlogic,meson8b-canvas", "amlogic,canvas" on Meson8b
+		- "amlogic,meson8m2-canvas", "amlogic,canvas" on Meson8m2
+		- "amlogic,canvas" on GXBB and newer
 - reg: Base physical address and size of the canvas registers.
 
 Example:
-- 
cgit 


From 3e75b76f0f17194e0e65694ade6e69fc11593190 Mon Sep 17 00:00:00 2001
From: Guillaume La Roque <glaroque@baylibre.com>
Date: Tue, 14 May 2019 10:26:47 +0200
Subject: dt-bindings: pinctrl: add a 'drive-strength-microamp' property

This property allow drive-strength parameter in uA instead of mA.

Signed-off-by: Guillaume La Roque <glaroque@baylibre.com>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
index cef2b5855d60..fcd37e93ed4d 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
@@ -258,6 +258,7 @@ drive-push-pull		- drive actively high and low
 drive-open-drain	- drive with open drain
 drive-open-source	- drive with open source
 drive-strength		- sink or source at most X mA
+drive-strength-microamp	- sink or source at most X uA
 input-enable		- enable input on pin (no effect on output, such as
 			  enabling an input buffer)
 input-disable		- disable input on pin (no effect on output, such as
@@ -326,6 +327,8 @@ arguments are described below.
 
 - drive-strength takes as argument the target strength in mA.
 
+- drive-strength-microamp takes as argument the target strength in uA.
+
 - input-debounce takes the debounce time in usec as argument
   or 0 to disable debouncing
 
-- 
cgit 


From 013786c043298710887e983ac8d59aaff1f554f3 Mon Sep 17 00:00:00 2001
From: Guillaume La Roque <glaroque@baylibre.com>
Date: Tue, 14 May 2019 10:26:49 +0200
Subject: dt-bindings: pinctrl: meson: Add drive-strength-microamp property

Add optional drive-strength-microamp property

Signed-off-by: Guillaume La Roque <glaroque@baylibre.com>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
index a47dd990a8d3..a7618605bf1e 100644
--- a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
@@ -51,6 +51,10 @@ Configuration nodes support the generic properties "bias-disable",
 "bias-pull-up" and "bias-pull-down", described in file
 pinctrl-bindings.txt
 
+Optional properties :
+ - drive-strength-microamp: Drive strength for the specified pins in uA.
+			    This property is only valid for G12A and newer.
+
 === Example ===
 
 	pinctrl: pinctrl@c1109880 {
-- 
cgit 


From f3fbedabb7be0d19b2da862fa6c01d82ac39c716 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 9 May 2019 13:59:53 -0700
Subject: dt-bindings: pinctrl: bcm2835-gpio: Document BCM7211 compatible

BCM7211 has a slightly different block layout and some additional GPIO
registers that were added, document the compatible string.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt b/Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt
index 3fac0a061bcc..ac6d614d74e0 100644
--- a/Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt
+++ b/Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt
@@ -5,6 +5,9 @@ controller, and pinmux/control device.
 
 Required properties:
 - compatible: "brcm,bcm2835-gpio"
+- compatible: should be one of:
+  "brcm,bcm2835-gpio" - BCM2835 compatible pinctrl
+  "brcm,bcm7211-gpio" - BCM7211 compatible pinctrl
 - reg: Should contain the physical address of the GPIO module's registers.
 - gpio-controller: Marks the device node as a GPIO controller.
 - #gpio-cells : Should be two. The first cell is the pin number and the
-- 
cgit 


From 7745f03eb39587dd15a1fb26e6223678b8e906d2 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Mon, 13 May 2019 13:58:45 -0400
Subject: x86/topology: Add CPUID.1F multi-die/package support

Some new systems have multiple software-visible die within each package.

Update Linux parsing of the Intel CPUID "Extended Topology Leaf" to handle
either CPUID.B, or the new CPUID.1F.

Add cpuinfo_x86.die_id and cpuinfo_x86.max_dies to store the result.

die_id will be non-zero only for multi-die/package systems.

Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: linux-doc@vger.kernel.org
Link: https://lkml.kernel.org/r/7b23d2d26d717b8e14ba137c94b70943f1ae4b5c.1557769318.git.len.brown@intel.com
---
 Documentation/x86/topology.rst | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/x86/topology.rst b/Documentation/x86/topology.rst
index 6e28dbe818ab..8e9704f61017 100644
--- a/Documentation/x86/topology.rst
+++ b/Documentation/x86/topology.rst
@@ -49,6 +49,10 @@ Package-related topology information in the kernel:
 
     The number of cores in a package. This information is retrieved via CPUID.
 
+  - cpuinfo_x86.x86_max_dies:
+
+    The number of dies in a package. This information is retrieved via CPUID.
+
   - cpuinfo_x86.phys_proc_id:
 
     The physical ID of the package. This information is retrieved via CPUID
-- 
cgit 


From 0e344d8c709fe01d882fc0fb5452bedfe5eba67a Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Mon, 13 May 2019 13:58:47 -0400
Subject: cpu/topology: Export die_id

Export die_id in cpu topology, for the benefit of hardware that has
multiple-die/package.

Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: linux-doc@vger.kernel.org
Link: https://lkml.kernel.org/r/e7d1caaf4fbd24ee40db6d557ab28d7d83298900.1557769318.git.len.brown@intel.com
---
 Documentation/cputopology.txt | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index cb61277e2308..2ff8a1e9a2db 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -12,6 +12,12 @@ physical_package_id:
 	socket number, but the actual value is architecture and platform
 	dependent.
 
+die_id:
+
+	the CPU die ID of cpuX. Typically it is the hardware platform's
+	identifier (rather than the kernel's).  The actual value is
+	architecture and platform dependent.
+
 core_id:
 
 	the CPU core ID of cpuX. Typically it is the hardware platform's
@@ -81,6 +87,7 @@ For an architecture to support this feature, it must define some of
 these macros in include/asm-XXX/topology.h::
 
 	#define topology_physical_package_id(cpu)
+	#define topology_die_id(cpu)
 	#define topology_core_id(cpu)
 	#define topology_book_id(cpu)
 	#define topology_drawer_id(cpu)
@@ -99,9 +106,11 @@ provides default definitions for any of the above macros that are
 not defined by include/asm-XXX/topology.h:
 
 1) topology_physical_package_id: -1
-2) topology_core_id: 0
-3) topology_sibling_cpumask: just the given CPU
-4) topology_core_cpumask: just the given CPU
+2) topology_die_id: -1
+3) topology_core_id: 0
+4) topology_sibling_cpumask: just the given CPU
+5) topology_core_cpumask: just the given CPU
+6) topology_die_cpumask: just the given CPU
 
 For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
 default definitions for topology_book_id() and topology_book_cpumask().
-- 
cgit 


From b73ed8dc0597c11ec5064d06b9bbd4e541b6d4e7 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Mon, 13 May 2019 13:58:55 -0400
Subject: topology: Create package_cpus sysfs attribute

The existing sysfs cpu/topology/core_siblings (and core_siblings_list)
attributes are documented, implemented, and used by programs to represent
set of logical CPUs sharing the same package.

This makes sense if the next topology level above a core is always a
package.  But on systems where there is a die topology level between a core
and a package, the name and its definition become inconsistent.

So without changing its function, add a name for this map that describes
what it actually is -- package CPUs -- the set of CPUs that share the same
package.

This new name will be immune to changes in topology, since it describes
threads at the current level, not siblings at a contained level.

Suggested-by: Brice Goglin <Brice.Goglin@inria.fr>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/d9d3228b82fb5665e6f93a0ccd033fe022558521.1557769318.git.len.brown@intel.com
---
 Documentation/cputopology.txt | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index 2ff8a1e9a2db..48af5c290e20 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -46,15 +46,15 @@ thread_siblings_list:
 	human-readable list of cpuX's hardware threads within the same
 	core as cpuX.
 
-core_siblings:
+package_cpus:
 
-	internal kernel map of cpuX's hardware threads within the same
-	physical_package_id.
+	internal kernel map of the CPUs sharing the same physical_package_id.
+	(deprecated name: "core_siblings")
 
-core_siblings_list:
+package_cpus_list:
 
-	human-readable list of cpuX's hardware threads within the same
-	physical_package_id.
+	human-readable list of CPUs sharing the same physical_package_id.
+	(deprecated name: "core_siblings_list")
 
 book_siblings:
 
-- 
cgit 


From 2e4c54dac7b360c3820399bdf06cde9134a4495b Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Mon, 13 May 2019 13:58:56 -0400
Subject: topology: Create core_cpus and die_cpus sysfs attributes

Create CPU topology sysfs attributes: "core_cpus" and "core_cpus_list"

These attributes represent all of the logical CPUs that share the
same core.

These attriutes is synonymous with the existing "thread_siblings" and
"thread_siblings_list" attribute, which will be deprecated.

Create CPU topology sysfs attributes: "die_cpus" and "die_cpus_list".
These attributes represent all of the logical CPUs that share the
same die.

Suggested-by: Brice Goglin <Brice.Goglin@inria.fr>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/071c23a298cd27ede6ed0b6460cae190d193364f.1557769318.git.len.brown@intel.com
---
 Documentation/cputopology.txt | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index 48af5c290e20..b90dafcc8237 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -36,15 +36,15 @@ drawer_id:
 	identifier (rather than the kernel's).	The actual value is
 	architecture and platform dependent.
 
-thread_siblings:
+core_cpus:
 
-	internal kernel map of cpuX's hardware threads within the same
-	core as cpuX.
+	internal kernel map of CPUs within the same core.
+	(deprecated name: "thread_siblings")
 
-thread_siblings_list:
+core_cpus_list:
 
-	human-readable list of cpuX's hardware threads within the same
-	core as cpuX.
+	human-readable list of CPUs within the same core.
+	(deprecated name: "thread_siblings_list");
 
 package_cpus:
 
@@ -56,6 +56,14 @@ package_cpus_list:
 	human-readable list of CPUs sharing the same physical_package_id.
 	(deprecated name: "core_siblings_list")
 
+die_cpus:
+
+	internal kernel map of CPUs within the same die.
+
+die_cpus_list:
+
+	human-readable list of CPUs within the same die.
+
 book_siblings:
 
 	internal kernel map of cpuX's hardware threads within the same
@@ -93,6 +101,7 @@ these macros in include/asm-XXX/topology.h::
 	#define topology_drawer_id(cpu)
 	#define topology_sibling_cpumask(cpu)
 	#define topology_core_cpumask(cpu)
+	#define topology_die_cpumask(cpu)
 	#define topology_book_cpumask(cpu)
 	#define topology_drawer_cpumask(cpu)
 
-- 
cgit 


From eabe3bc2689ad0993c57469eb7efb1291443cc74 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 15 May 2019 03:50:41 -0400
Subject: media: cec-ioc-receive.rst: document CEC_MSG_FL_RAW

Document this new cec_msg flag.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/cec/cec-ioc-receive.rst | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/cec/cec-ioc-receive.rst b/Documentation/media/uapi/cec/cec-ioc-receive.rst
index c3a685ff05cb..4137903d672e 100644
--- a/Documentation/media/uapi/cec/cec-ioc-receive.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-receive.rst
@@ -223,6 +223,18 @@ View On' messages from initiator 0xf ('Unregistered') to destination 0 ('TV').
 	result of the :ref:`ioctl CEC_TRANSMIT <CEC_TRANSMIT>`, and once via
 	:ref:`ioctl CEC_RECEIVE <CEC_RECEIVE>`.
 
+    * .. _`CEC-MSG-FL-RAW`:
+
+      - ``CEC_MSG_FL_RAW``
+      - 2
+      - Normally CEC messages are validated before transmitting them. If this
+        flag is set when :ref:`ioctl CEC_TRANSMIT <CEC_TRANSMIT>` is called,
+	then no validation takes place and the message is transmitted as-is.
+	This is useful when debugging CEC issues.
+	This flag is only allowed if the process has the ``CAP_SYS_RAWIO``
+	capability. If that is not set, then the ``EPERM`` error code is
+	returned.
+
 
 .. tabularcolumns:: |p{5.6cm}|p{0.9cm}|p{11.0cm}|
 
@@ -358,7 +370,8 @@ ENOTTY
 
 EPERM
     The CEC adapter is not configured, i.e. :ref:`ioctl CEC_ADAP_S_LOG_ADDRS <CEC_ADAP_S_LOG_ADDRS>`
-    has never been called.
+    has never been called, or ``CEC_MSG_FL_RAW`` was used from a process that
+    did not have the ``CAP_SYS_RAWIO`` capability.
 
 ENONET
     The CEC adapter is not configured, i.e. :ref:`ioctl CEC_ADAP_S_LOG_ADDRS <CEC_ADAP_S_LOG_ADDRS>`
-- 
cgit 


From 428d3c867df6f2f87721fe7384ea09ad3130f839 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 22 May 2019 05:22:20 -0400
Subject: media: cec-ioc-g-mode.rst: be more specific when EPERM is returned

Document which capability is required, rather than just saying
that "root permissions" are required.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/cec/cec-ioc-g-mode.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/cec/cec-ioc-g-mode.rst b/Documentation/media/uapi/cec/cec-ioc-g-mode.rst
index c53bb5f73f0d..d0902f356d65 100644
--- a/Documentation/media/uapi/cec/cec-ioc-g-mode.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-g-mode.rst
@@ -294,7 +294,8 @@ EINVAL
     The requested mode is invalid.
 
 EPERM
-    Monitor mode is requested without having root permissions
+    Monitor mode is requested, but the process does have the ``CAP_NET_ADMIN``
+    capability.
 
 EBUSY
     Someone else is already an exclusive follower or initiator.
-- 
cgit 


From cc0f6e96c4fd01fe1f935014c8c87ac6e994324c Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 21 May 2019 16:23:24 -0500
Subject: spi: dt-bindings: Convert Arm pl022 to json-schema

Convert the Arm pl022 binding to DT schema format. The clock binding was
missing, so it is added to the schema. It really should be required as
well, but there are some platforms (spear) not yet using DT clock
binding.

Cc: Mark Brown <broonie@kernel.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: linux-spi@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/spi/spi-pl022.yaml         | 165 +++++++++++++++++++++
 .../devicetree/bindings/spi/spi_pl022.txt          |  70 ---------
 2 files changed, 165 insertions(+), 70 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/spi/spi-pl022.yaml
 delete mode 100644 Documentation/devicetree/bindings/spi/spi_pl022.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/spi/spi-pl022.yaml b/Documentation/devicetree/bindings/spi/spi-pl022.yaml
new file mode 100644
index 000000000000..dfb697c69341
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-pl022.yaml
@@ -0,0 +1,165 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/spi-pl022.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM PL022 SPI controller
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+allOf:
+  - $ref: "spi-controller.yaml#"
+
+# We need a select here so we don't match all nodes with 'arm,primecell'
+select:
+  properties:
+    compatible:
+      contains:
+        const: arm,pl022
+  required:
+    - compatible
+
+properties:
+  compatible:
+    items:
+      - const: arm,pl022
+      - const: arm,primecell
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 2
+
+  clock-names:
+    items:
+      - enum:
+          - SSPCLK
+          - sspclk
+      - const: apb_pclk
+
+  pl022,autosuspend-delay:
+    description: delay in ms following transfer completion before the
+      runtime power management system suspends the device. A setting of 0
+      indicates no delay and the device will be suspended immediately.
+    $ref: "/schemas/types.yaml#/definitions/uint32"
+
+  pl022,rt:
+    description: indicates the controller should run the message pump with realtime
+               priority to minimise the transfer latency on the bus (boolean)
+    type: boolean
+
+  dmas:
+    description:
+      Two or more DMA channel specifiers following the convention outlined
+      in bindings/dma/dma.txt
+    minItems: 2
+    maxItems: 32
+
+  dma-names:
+    description:
+      There must be at least one channel named "tx" for transmit and named "rx"
+      for receive.
+    minItems: 2
+    maxItems: 32
+    additionalItems: true
+    items:
+      - const: rx
+      - const: tx
+
+patternProperties:
+  "^[a-zA-Z][a-zA-Z0-9,+\\-._]{0,63}@[0-9a-f]+$":
+    type: object
+    # SPI slave nodes must be children of the SPI master node and can
+    # contain the following properties.
+    properties:
+      pl022,interface:
+        description: SPI interface type
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/uint32"
+          - enum:
+              - 0  # SPI
+              - 1  # Texas Instruments Synchronous Serial Frame Format
+              - 2  # Microwire (Half Duplex)
+
+      pl022,com-mode:
+        description: Specifies the transfer mode
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/uint32"
+          - enum:
+              - 0  # interrupt mode
+              - 1  # polling mode
+              - 2  # DMA mode
+            default: 1
+
+      pl022,rx-level-trig:
+        description: Rx FIFO watermark level
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/uint32"
+          - minimum: 0
+            maximum: 4
+
+      pl022,tx-level-trig:
+        description: Tx FIFO watermark level
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/uint32"
+          - minimum: 0
+            maximum: 4
+
+      pl022,ctrl-len:
+        description: Microwire interface - Control length
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/uint32"
+          - minimum: 0x03
+            maximum: 0x1f
+
+      pl022,wait-state:
+        description: Microwire interface - Wait state
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/uint32"
+          - enum: [ 0, 1 ]
+
+      pl022,duplex:
+        description: Microwire interface - Full/Half duplex
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/uint32"
+          - enum: [ 0, 1 ]
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+examples:
+  - |
+    spi@e0100000 {
+      compatible = "arm,pl022", "arm,primecell";
+      reg = <0xe0100000 0x1000>;
+      #address-cells = <1>;
+      #size-cells = <0>;
+      interrupts = <0 31 0x4>;
+      dmas = <&dma_controller 23 1>,
+        <&dma_controller 24 0>;
+      dma-names = "rx", "tx";
+
+      m25p80@1 {
+        compatible = "st,m25p80";
+        reg = <1>;
+        spi-max-frequency = <12000000>;
+        spi-cpol;
+        spi-cpha;
+        pl022,interface = <0>;
+        pl022,com-mode = <0x2>;
+        pl022,rx-level-trig = <0>;
+        pl022,tx-level-trig = <0>;
+        pl022,ctrl-len = <0x11>;
+        pl022,wait-state = <0>;
+        pl022,duplex = <0>;
+      };
+    };
+...
diff --git a/Documentation/devicetree/bindings/spi/spi_pl022.txt b/Documentation/devicetree/bindings/spi/spi_pl022.txt
deleted file mode 100644
index 7638b4968ddb..000000000000
--- a/Documentation/devicetree/bindings/spi/spi_pl022.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-ARM PL022 SPI controller
-
-Required properties:
-- compatible : "arm,pl022", "arm,primecell"
-- reg : Offset and length of the register set for the device
-- interrupts : Should contain SPI controller interrupt
-- num-cs : total number of chipselects
-
-Optional properties:
-- cs-gpios : should specify GPIOs used for chipselects.
-  The gpios will be referred to as reg = <index> in the SPI child nodes.
-  If unspecified, a single SPI device without a chip select can be used.
-- pl022,autosuspend-delay : delay in ms following transfer completion before
-			    the runtime power management system suspends the
-			    device. A setting of 0 indicates no delay and the
-                            device will be suspended immediately
-- pl022,rt : indicates the controller should run the message pump with realtime
-             priority to minimise the transfer latency on the bus (boolean)
-- dmas : Two or more DMA channel specifiers following the convention outlined
-         in bindings/dma/dma.txt
-- dma-names: Names for the dma channels, if present. There must be at
-	     least one channel named "tx" for transmit and named "rx" for
-             receive.
-
-
-SPI slave nodes must be children of the SPI master node and can
-contain the following properties.
-
-- pl022,interface : interface type:
-	0: SPI
-	1: Texas Instruments Synchronous Serial Frame Format
-	2: Microwire (Half Duplex)
-- pl022,com-mode : specifies the transfer mode:
-	0: interrupt mode
-	1: polling mode (default mode if property not present)
-	2: DMA mode
-- pl022,rx-level-trig : Rx FIFO watermark level
-- pl022,tx-level-trig : Tx FIFO watermark level
-- pl022,ctrl-len : Microwire interface: Control length
-- pl022,wait-state : Microwire interface: Wait state
-- pl022,duplex : Microwire interface: Full/Half duplex
-
-
-Example:
-
-	spi@e0100000 {
-		compatible = "arm,pl022", "arm,primecell";
-		reg = <0xe0100000 0x1000>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-		interrupts = <0 31 0x4>;
-		dmas = <&dma-controller 23 1>,
-			<&dma-controller 24 0>;
-		dma-names = "rx", "tx";
-
-		m25p80@1 {
-			compatible = "st,m25p80";
-			reg = <1>;
-			spi-max-frequency = <12000000>;
-			spi-cpol;
-			spi-cpha;
-			pl022,interface = <0>;
-			pl022,com-mode = <0x2>;
-			pl022,rx-level-trig = <0>;
-			pl022,tx-level-trig = <0>;
-			pl022,ctrl-len = <0x11>;
-			pl022,wait-state = <0>;
-			pl022,duplex = <0>;
-		};
-	};
-- 
cgit 


From 1305d97b7c7847b12872818326f0cb4da0439311 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Sun, 5 May 2019 10:00:22 -0400
Subject: media: dt-bindings: marvell,mmp2-ccic: Add Marvell MMP2 camera

Add Marvell MMP2 camera host interface.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../bindings/media/marvell,mmp2-ccic.txt           | 50 ++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/media/marvell,mmp2-ccic.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/media/marvell,mmp2-ccic.txt b/Documentation/devicetree/bindings/media/marvell,mmp2-ccic.txt
new file mode 100644
index 000000000000..7ec2c8c8a3b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/marvell,mmp2-ccic.txt
@@ -0,0 +1,50 @@
+Marvell MMP2 camera host interface
+
+Required properties:
+ - compatible: Should be "marvell,mmp2-ccic".
+ - reg: Register base and size.
+ - interrupts: The interrupt number.
+ - #clock-cells: Must be 0.
+
+Optional properties:
+ - clocks: Reference to the input clock as specified by
+           Documentation/devicetree/bindings/clock/clock-bindings.txt.
+ - clock-names: Names of the clocks used; "axi" for the AXI bus interface,
+                "func" for the peripheral clock and "phy" for the parallel
+                video bus interface.
+ - clock-output-names: Optional clock source for sensors. Shall be "mclk".
+
+Required subnodes:
+ - port: The parallel bus interface port with a single endpoint linked to
+         the sensor's endpoint as described in
+         Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+Required endpoint properties:
+ - bus-type: data bus type, <5> or <6> for Parallel or Bt.656 respectively
+ - pclk-sample: pixel clock polarity
+ - hsync-active: horizontal synchronization polarity (only required for
+   parallel bus)
+ - vsync-active: vertical synchronization polarity (only required for
+   parallel bus)
+
+Example:
+
+	camera0: camera@d420a000 {
+		compatible = "marvell,mmp2-ccic";
+		reg = <0xd420a000 0x800>;
+		interrupts = <42>;
+		clocks = <&soc_clocks MMP2_CLK_CCIC0>;
+		clock-names = "axi";
+		#clock-cells = <0>;
+		clock-output-names = "mclk";
+
+		port {
+			camera0_0: endpoint {
+				remote-endpoint = <&ov7670_0>;
+                                bus-type = <5>;      /* Parallel */
+                                hsync-active = <1>;  /* Active high */
+                                vsync-active = <1>;  /* Active high */
+                                pclk-sample = <0>;   /* Falling */
+			};
+		};
+	};
-- 
cgit 


From 9db9b76767f133d0e1ce19f01117c83221641899 Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Mon, 20 May 2019 13:52:53 -0700
Subject: Documentation/x86: Fix path to entry_32.S

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/20190520205253.23762-1-ira.weiny@intel.com
[ Adjusted the patch to the RST conversion. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/x86/exception-tables.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/x86/exception-tables.rst b/Documentation/x86/exception-tables.rst
index 24596c8210b5..ed6d4b0cf62c 100644
--- a/Documentation/x86/exception-tables.rst
+++ b/Documentation/x86/exception-tables.rst
@@ -35,7 +35,7 @@ page fault handler::
   void do_page_fault(struct pt_regs *regs, unsigned long error_code)
 
 in arch/x86/mm/fault.c. The parameters on the stack are set up by
-the low level assembly glue in arch/x86/kernel/entry_32.S. The parameter
+the low level assembly glue in arch/x86/entry/entry_32.S. The parameter
 regs is a pointer to the saved registers on the stack, error_code
 contains a reason code for the exception.
 
-- 
cgit 


From 2c9239c125f0c657d2955780f364168d4c15aa8b Mon Sep 17 00:00:00 2001
From: Alexandre Torgue <alexandre.torgue@st.com>
Date: Fri, 10 May 2019 17:45:26 +0200
Subject: dt-bindings: pinctrl: Convert stm32 pinctrl bindings to json-schema

Convert the STM32 pinctrl binding to DT schema format using json-schema.

Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../bindings/pinctrl/st,stm32-pinctrl.txt          | 208 ----------------
 .../bindings/pinctrl/st,stm32-pinctrl.yaml         | 264 +++++++++++++++++++++
 2 files changed, 264 insertions(+), 208 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt
 create mode 100644 Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt
deleted file mode 100644
index 00169255e48c..000000000000
--- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt
+++ /dev/null
@@ -1,208 +0,0 @@
-* STM32 GPIO and Pin Mux/Config controller
-
-STMicroelectronics's STM32 MCUs intregrate a GPIO and Pin mux/config hardware
-controller. It controls the input/output settings on the available pins and
-also provides ability to multiplex and configure the output of various on-chip
-controllers onto these pads.
-
-Pin controller node:
-Required properies:
- - compatible: value should be one of the following:
-   "st,stm32f429-pinctrl"
-   "st,stm32f469-pinctrl"
-   "st,stm32f746-pinctrl"
-   "st,stm32f769-pinctrl"
-   "st,stm32h743-pinctrl"
-   "st,stm32mp157-pinctrl"
-   "st,stm32mp157-z-pinctrl"
- - #address-cells: The value of this property must be 1
- - #size-cells	: The value of this property must be 1
- - ranges	: defines mapping between pin controller node (parent) to
-   gpio-bank node (children).
- - pins-are-numbered: Specify the subnodes are using numbered pinmux to
-   specify pins.
-
-GPIO controller/bank node:
-Required properties:
- - gpio-controller : Indicates this device is a GPIO controller
- - #gpio-cells	  : Should be two.
-			The first cell is the pin number
-			The second one is the polarity:
-				- 0 for active high
-				- 1 for active low
- - reg		  : The gpio address range, relative to the pinctrl range
- - clocks	  : clock that drives this bank
- - st,bank-name	  : Should be a name string for this bank as specified in
-   the datasheet
-
-Optional properties:
- - reset:	  : Reference to the reset controller
- - st,syscfg: Should be phandle/offset/mask.
-	-The phandle to the syscon node which includes IRQ mux selection register.
-	-The offset of the IRQ mux selection register
-	-The field mask of IRQ mux, needed if different of 0xf.
- - gpio-ranges: Define a dedicated mapping between a pin-controller and
-   a gpio controller. Format is <&phandle a b c> with:
-	-(phandle): phandle of pin-controller.
-	-(a): gpio base offset in range.
-	-(b): pin base offset in range.
-	-(c): gpio count in range
-   This entry has to be used either if there are holes inside a bank:
-	GPIOB0/B1/B2/B14/B15 (see example 2)
-   or if banks are not contiguous:
-	GPIOA/B/C/E...
-   NOTE: If "gpio-ranges" is used for a gpio controller, all gpio-controller
-   have to use a "gpio-ranges" entry.
-   More details in Documentation/devicetree/bindings/gpio/gpio.txt.
- - st,bank-ioport: should correspond to the EXTI IOport selection (EXTI line
-   used to select GPIOs as interrupts).
- - hwlocks: reference to a phandle of a hardware spinlock provider node.
- - st,package: Indicates the SOC package used.
-   More details in include/dt-bindings/pinctrl/stm32-pinfunc.h
-
-Example 1:
-#include <dt-bindings/pinctrl/stm32f429-pinfunc.h>
-...
-
-	pin-controller {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		compatible = "st,stm32f429-pinctrl";
-		ranges = <0 0x40020000 0x3000>;
-		pins-are-numbered;
-
-		gpioa: gpio@40020000 {
-			gpio-controller;
-			#gpio-cells = <2>;
-			reg = <0x0 0x400>;
-			resets = <&reset_ahb1 0>;
-			st,bank-name = "GPIOA";
-		};
-		...
-		pin-functions nodes follow...
-	};
-
-Example 2:
-#include <dt-bindings/pinctrl/stm32f429-pinfunc.h>
-...
-
-	pinctrl: pin-controller {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		compatible = "st,stm32f429-pinctrl";
-		ranges = <0 0x40020000 0x3000>;
-		pins-are-numbered;
-
-		gpioa: gpio@40020000 {
-			gpio-controller;
-			#gpio-cells = <2>;
-			reg = <0x0 0x400>;
-			resets = <&reset_ahb1 0>;
-			st,bank-name = "GPIOA";
-			gpio-ranges = <&pinctrl 0 0 16>;
-		};
-
-		gpiob: gpio@40020400 {
-			gpio-controller;
-			#gpio-cells = <2>;
-			reg = <0x0 0x400>;
-			resets = <&reset_ahb1 0>;
-			st,bank-name = "GPIOB";
-			ngpios = 4;
-			gpio-ranges = <&pinctrl 0 16 3>,
-				      <&pinctrl 14 30 2>;
-		};
-
-
-		...
-		pin-functions nodes follow...
-	};
-
-
-Contents of function subnode node:
-----------------------------------
-Subnode format
-A pinctrl node should contain at least one subnode representing the
-pinctrl group available on the machine. Each subnode will list the
-pins it needs, and how they should be configured, with regard to muxer
-configuration, pullups, drive, output high/low and output speed.
-
-    node {
-	pinmux = <PIN_NUMBER_PINMUX>;
-	GENERIC_PINCONFIG;
-    };
-
-Required properties:
-- pinmux: integer array, represents gpio pin number and mux setting.
-  Supported pin number and mux varies for different SoCs, and are defined in
-  dt-bindings/pinctrl/<soc>-pinfunc.h directly.
-  These defines are calculated as:
-    ((port * 16 + line) << 8) | function
-  With:
-    - port: The gpio port index (PA = 0, PB = 1, ..., PK = 11)
-    - line: The line offset within the port (PA0 = 0, PA1 = 1, ..., PA15 = 15)
-    - function: The function number, can be:
-      * 0 : GPIO
-      * 1 : Alternate Function 0
-      * 2 : Alternate Function 1
-      * 3 : Alternate Function 2
-      * ...
-      * 16 : Alternate Function 15
-      * 17 : Analog
-
-  To simplify the usage, macro is available to generate "pinmux" field.
-  This macro is available here:
-    - include/dt-bindings/pinctrl/stm32-pinfunc.h
-
-  Some examples of using macro:
-    /* GPIO A9 set as alernate function 2 */
-    ... {
-		pinmux = <STM32_PINMUX('A', 9, AF2)>;
-    };
-    /* GPIO A9 set as GPIO  */
-    ... {
-		pinmux = <STM32_PINMUX('A', 9, GPIO)>;
-    };
-    /* GPIO A9 set as analog */
-    ... {
-		pinmux = <STM32_PINMUX('A', 9, ANALOG)>;
-    };
-
-Optional properties:
-- GENERIC_PINCONFIG: is the generic pinconfig options to use.
-  Available options are:
-   - bias-disable,
-   - bias-pull-down,
-   - bias-pull-up,
-   - drive-push-pull,
-   - drive-open-drain,
-   - output-low
-   - output-high
-   - slew-rate = <x>, with x being:
-       < 0 > : Low speed
-       < 1 > : Medium speed
-       < 2 > : Fast speed
-       < 3 > : High speed
-
-Example:
-
-pin-controller {
-...
-	usart1_pins_a: usart1@0 {
-		pins1 {
-			pinmux = <STM32_PINMUX('A', 9, AF7)>;
-			bias-disable;
-			drive-push-pull;
-			slew-rate = <0>;
-		};
-		pins2 {
-			pinmux = <STM32_PINMUX('A', 10, AF7)>;
-			bias-disable;
-		};
-	};
-};
-
-&usart1 {
-	pinctrl-0 = <&usart1_pins_a>;
-	pinctrl-names = "default";
-};
diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
new file mode 100644
index 000000000000..06c4b66c3ee6
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
@@ -0,0 +1,264 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright (C) STMicroelectronics 2019.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/st,stm32-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STM32 GPIO and Pin Mux/Config controller
+
+maintainers:
+  - Alexandre TORGUE <alexandre.torgue@st.com>
+
+description: |
+  STMicroelectronics's STM32 MCUs intregrate a GPIO and Pin mux/config hardware
+  controller. It controls the input/output settings on the available pins and
+  also provides ability to multiplex and configure the output of various
+  on-chip controllers onto these pads.
+
+properties:
+  compatible:
+    enum:
+      - st,stm32f429-pinctrl
+      - st,stm32f469-pinctrl
+      - st,stm32f746-pinctrl
+      - st,stm32f769-pinctrl
+      - st,stm32h743-pinctrl
+      - st,stm32mp157-pinctrl
+      - st,stm32mp157-z-pinctrl
+
+  '#address-cells':
+    const: 1
+  '#size-cells':
+    const: 1
+
+  ranges: true
+  pins-are-numbered: true
+  hwlocks: true
+
+  st,syscfg:
+    $ref: "/schemas/types.yaml#/definitions/phandle-array"
+    description: Should be phandle/offset/mask
+    items:
+      - description: Phandle to the syscon node which includes IRQ mux selection.
+      - description: The offset of the IRQ mux selection register.
+      - description: The field mask of IRQ mux, needed if different of 0xf.
+
+  st,package:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - enum: [1, 2, 4, 8]
+    description:
+     Indicates the SOC package used.
+     More details in include/dt-bindings/pinctrl/stm32-pinfunc.h
+
+
+patternProperties:
+  '^gpio@[0-9a-f]*$':
+    properties:
+      gpio-controller: true
+      '#gpio-cells':
+        const: 2
+
+      reg:
+        maxItems: 1
+      clocks:
+        maxItems: 1
+      reset:
+        minItems: 1
+        maxItems: 1
+      gpio-ranges:
+        minItems: 1
+        maxItems: 16
+      ngpios:
+        description:
+          Number of available gpios in a bank.
+        minimum: 1
+        maximum: 16
+
+      st,bank-name:
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/string"
+          - enum:
+            - GPIOA
+            - GPIOB
+            - GPIOC
+            - GPIOD
+            - GPIOE
+            - GPIOF
+            - GPIOG
+            - GPIOH
+            - GPIOI
+            - GPIOJ
+            - GPIOK
+            - GPIOZ
+        description:
+          Should be a name string for this bank as specified in the datasheet.
+
+      st,bank-ioport:
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/uint32"
+          - minimum: 0
+          - maximum: 11
+
+        description:
+          Should correspond to the EXTI IOport selection (EXTI line used
+          to select GPIOs as interrupts).
+
+    required:
+      - gpio-controller
+      - '#gpio-cells'
+      - reg
+      - clocks
+      - st,bank-name
+
+  '-[0-9]*$':
+    patternProperties:
+      '^pins':
+        description: |
+          A pinctrl node should contain at least one subnode representing the
+          pinctrl group available on the machine. Each subnode will list the
+          pins it needs, and how they should be configured, with regard to muxer
+          configuration, pullups, drive, output high/low and output speed.
+        properties:
+          pinmux:
+            allOf:
+              - $ref: "/schemas/types.yaml#/definitions/uint32-array"
+            description: |
+              Integer array, represents gpio pin number and mux setting.
+              Supported pin number and mux varies for different SoCs, and are
+              defined in dt-bindings/pinctrl/<soc>-pinfunc.h directly.
+              These defines are calculated as: ((port * 16 + line) << 8) | function
+              With:
+              - port: The gpio port index (PA = 0, PB = 1, ..., PK = 11)
+              - line: The line offset within the port (PA0 = 0, PA1 = 1, ..., PA15 = 15)
+              - function: The function number, can be:
+              * 0 : GPIO
+              * 1 : Alternate Function 0
+              * 2 : Alternate Function 1
+              * 3 : Alternate Function 2
+              * ...
+              * 16 : Alternate Function 15
+              * 17 : Analog
+              To simplify the usage, macro is available to generate "pinmux" field.
+              This macro is available here:
+                - include/dt-bindings/pinctrl/stm32-pinfunc.h
+              Some examples of using macro:
+               /* GPIO A9 set as alernate function 2 */
+               ... {
+                          pinmux = <STM32_PINMUX('A', 9, AF2)>;
+               };
+               /* GPIO A9 set as GPIO  */
+               ... {
+                          pinmux = <STM32_PINMUX('A', 9, GPIO)>;
+               };
+               /* GPIO A9 set as analog */
+               ... {
+                          pinmux = <STM32_PINMUX('A', 9, ANALOG)>;
+               };
+
+          bias-disable:
+            type: boolean
+          bias-pull-down:
+            type: boolean
+          bias-pull-up:
+            type: boolean
+          drive-push-pull:
+            type: boolean
+          drive-open-drain:
+            type: boolean
+          output-low:
+            type: boolean
+          output-high:
+            type: boolean
+          slew-rate:
+            description: |
+              0: Low speed
+              1: Medium speed
+              2: Fast speed
+              3: High speed
+            allOf:
+              - $ref: /schemas/types.yaml#/definitions/uint32
+              - enum: [0, 1, 2, 3]
+
+        required:
+          - pinmux
+
+required:
+  - compatible
+  - '#address-cells'
+  - '#size-cells'
+  - ranges
+  - pins-are-numbered
+
+examples:
+  - |
+    #include <dt-bindings/pinctrl/stm32-pinfunc.h>
+    //Example 1
+      pinctrl@40020000 {
+              #address-cells = <1>;
+              #size-cells = <1>;
+              compatible = "st,stm32f429-pinctrl";
+              ranges = <0 0x40020000 0x3000>;
+              pins-are-numbered;
+
+              gpioa: gpio@0 {
+                      gpio-controller;
+                      #gpio-cells = <2>;
+                      reg = <0x0 0x400>;
+                      resets = <&reset_ahb1 0>;
+                      st,bank-name = "GPIOA";
+              };
+       };
+
+    //Example 2 (using gpio-ranges)
+      pinctrl@50020000 {
+              #address-cells = <1>;
+              #size-cells = <1>;
+              compatible = "st,stm32f429-pinctrl";
+              ranges = <0 0x50020000 0x3000>;
+              pins-are-numbered;
+
+              gpiob: gpio@1000 {
+                      gpio-controller;
+                      #gpio-cells = <2>;
+                      reg = <0x1000 0x400>;
+                      resets = <&reset_ahb1 0>;
+                      st,bank-name = "GPIOB";
+                      gpio-ranges = <&pinctrl 0 0 16>;
+              };
+
+              gpioc: gpio@2000 {
+                      gpio-controller;
+                      #gpio-cells = <2>;
+                      reg = <0x2000 0x400>;
+                      resets = <&reset_ahb1 0>;
+                      st,bank-name = "GPIOC";
+                      ngpios = <5>;
+                      gpio-ranges = <&pinctrl 0 16 3>,
+                                    <&pinctrl 14 30 2>;
+              };
+      };
+
+    //Example 3 pin groups
+      pinctrl@60020000 {
+        usart1_pins_a: usart1-0 {
+                pins1 {
+                        pinmux = <STM32_PINMUX('A', 9, AF7)>;
+                        bias-disable;
+                        drive-push-pull;
+                        slew-rate = <0>;
+                };
+                pins2 {
+                        pinmux = <STM32_PINMUX('A', 10, AF7)>;
+                        bias-disable;
+                };
+        };
+    };
+
+    usart1 {
+                pinctrl-0 = <&usart1_pins_a>;
+                pinctrl-names = "default";
+    };
+
+...
-- 
cgit 


From 1254db248fce4e2cf05a01c1c8df3b79745424c0 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 16 May 2019 17:13:38 +0200
Subject: dt-bindings: pinctrl: meson: add output support in pinconf

add support for the pinconf DT property output-enable, output-disable,
output-low and output-high in the meson pinctrl driver.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
index a7618605bf1e..10dc4f7176ca 100644
--- a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
@@ -47,9 +47,15 @@ Required properties for pinmux nodes are:
 Required properties for configuration nodes:
  - pins: a list of pin names
 
-Configuration nodes support the generic properties "bias-disable",
-"bias-pull-up" and "bias-pull-down", described in file
-pinctrl-bindings.txt
+Configuration nodes support the following generic properties, as
+described in file pinctrl-bindings.txt:
+ - "bias-disable"
+ - "bias-pull-up"
+ - "bias-pull-down"
+ - "output-enable"
+ - "output-disable"
+ - "output-low"
+ - "output-high"
 
 Optional properties :
  - drive-strength-microamp: Drive strength for the specified pins in uA.
-- 
cgit 


From 13531e5d359e30d9e3d1cabd246a24cf6fdf084a Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Mon, 20 May 2019 14:00:57 +0530
Subject: dt-bindings: pinctrl: Modify pinctrl memory map

Earlier, the PWM registers were included as part of the pinctrl memory
map, but this turned to be useless as the muxing is being handled by the
SoC pin controller itself. So, lets modify the pinctrl memory map to
reflect the same.

Fixes: 07b734fbdea2 ("dt-bindings: pinctrl: Add BM1880 pinctrl binding")
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt
index ed34bb1ee81c..cc9a89aa4170 100644
--- a/Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt
@@ -85,9 +85,9 @@ Required Properties:
                   spi0
 
 Example:
-        pinctrl: pinctrl@50 {
+        pinctrl: pinctrl@400 {
                 compatible = "bitmain,bm1880-pinctrl";
-                reg = <0x50 0x4B0>;
+                reg = <0x400 0x120>;
 
                 pinctrl_uart0_default: uart0-default {
                         pinmux {
-- 
cgit 


From 752a74038dbd999c108f1c7474908f7501e246bb Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Mon, 20 May 2019 14:01:00 +0530
Subject: dt-bindings: pinctrl: Document pinconf bindings for BM1880 SoC

Document pinconf bindings for Bitmain BM1880 SoC.

Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../bindings/pinctrl/bitmain,bm1880-pinctrl.txt       | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt
index cc9a89aa4170..4eb089bcb5f3 100644
--- a/Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt
@@ -14,7 +14,8 @@ phrase "pin configuration node".
 The pin configuration nodes act as a container for an arbitrary number of
 subnodes. Each of these subnodes represents some desired configuration for a
 pin, a group, or a list of pins or groups. This configuration for BM1880 SoC
-includes only pinmux as there is no pinconf support available in SoC.
+includes pinmux and various pin configuration parameters, such as pull-up,
+slew rate etc...
 
 Each configuration node can consist of multiple nodes describing the pinmux
 options. The name of each subnode is not important; all subnodes should be
@@ -84,6 +85,22 @@ Required Properties:
                   gpio66, gpio67, eth1, i2s0, i2s0_mclkin, i2s1, i2s1_mclkin,
                   spi0
 
+Optional Properties:
+
+- bias-disable:  No arguments. Disable pin bias.
+- bias-pull-down: No arguments. The specified pins should be configured as
+                  pull down.
+- bias-pull-up:   No arguments. The specified pins should be configured as
+                  pull up.
+- input-schmitt-enable: No arguments: Enable schmitt trigger for the specified
+                  pins
+- input-schmitt-disable: No arguments: Disable schmitt trigger for the specified
+                  pins
+- slew-rate:      Integer. Sets slew rate for the specified pins.
+                  Valid values are:
+                  <0>  - Slow
+                  <1>  - Fast
+
 Example:
         pinctrl: pinctrl@400 {
                 compatible = "bitmain,bm1880-pinctrl";
-- 
cgit 


From e0917169e5cc068a0c791726329746d1f4752b7a Mon Sep 17 00:00:00 2001
From: Clément Péron <peron.clem@gmail.com>
Date: Thu, 23 May 2019 17:10:47 +0200
Subject: dt-bindings: watchdog: add Allwinner H6 watchdog
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allwinner H6 has a similar watchdog as the A64 which is already
a compatible of the A31.

This commit add the H6 compatible.

Signed-off-by: Clément Péron <peron.clem@gmail.com>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
---
 Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt b/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt
index 46055254e8dd..e65198d82a2b 100644
--- a/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt
@@ -6,6 +6,7 @@ Required properties:
 	"allwinner,sun4i-a10-wdt"
 	"allwinner,sun6i-a31-wdt"
 	"allwinner,sun50i-a64-wdt","allwinner,sun6i-a31-wdt"
+	"allwinner,sun50i-h6-wdt","allwinner,sun6i-a31-wdt"
 	"allwinner,suniv-f1c100s-wdt", "allwinner,sun4i-a10-wdt"
 - reg : Specifies base physical address and size of the registers.
 
-- 
cgit 


From ef98682a4e1257ff45405bb1372939e8dfe774bb Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Sat, 11 May 2019 00:15:22 +0530
Subject: dt-bindings: reset: Add devicetree binding for BM1880 reset
 controller

Add devicetree binding for Bitmain BM1880 SoC reset controller.

Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 .../devicetree/bindings/reset/bitmain,bm1880-reset.txt | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/reset/bitmain,bm1880-reset.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/reset/bitmain,bm1880-reset.txt b/Documentation/devicetree/bindings/reset/bitmain,bm1880-reset.txt
new file mode 100644
index 000000000000..a6f8455ae6c4
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/bitmain,bm1880-reset.txt
@@ -0,0 +1,18 @@
+Bitmain BM1880 SoC Reset Controller
+===================================
+
+Please also refer to reset.txt in this directory for common reset
+controller binding usage.
+
+Required properties:
+- compatible:   Should be "bitmain,bm1880-reset"
+- reg:          Offset and length of reset controller space in SCTRL.
+- #reset-cells: Must be 1.
+
+Example:
+
+        rst: reset-controller@c00 {
+                compatible = "bitmain,bm1880-reset";
+                reg = <0xc00 0x8>;
+                #reset-cells = <1>;
+        };
-- 
cgit 


From ad8288b89d4f02d184391457cf655e5f68943341 Mon Sep 17 00:00:00 2001
From: "Y.b. Lu" <yangbo.lu@nxp.com>
Date: Thu, 23 May 2019 02:33:37 +0000
Subject: dt-binding: ptp_qoriq: support ENETC PTP compatible

Add a new compatible for ENETC PTP.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/ptp/ptp-qoriq.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
index 454c937076a2..6ec053449278 100644
--- a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
+++ b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
@@ -4,6 +4,7 @@ General Properties:
 
   - compatible   Should be "fsl,etsec-ptp" for eTSEC
                  Should be "fsl,fman-ptp-timer" for DPAA FMan
+		 Should be "fsl,enetc-ptp" for ENETC
   - reg          Offset and length of the register set for the device
   - interrupts   There should be at least two interrupts. Some devices
                  have as many as four PTP related interrupts.
-- 
cgit 


From e7c787cb2697ee4c101109bf21b4fc397cea150b Mon Sep 17 00:00:00 2001
From: Christian Mauderer <oss@c-mauderer.de>
Date: Mon, 13 May 2019 21:33:06 +0200
Subject: dt-bindings: leds: Add binding for spi-byte LED.

This patch adds the binding documentation for a simple SPI based LED
controller which use only one byte for setting the brightness.

Signed-off-by: Christian Mauderer <oss@c-mauderer.de>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 .../devicetree/bindings/leds/leds-spi-byte.txt     | 44 ++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/leds/leds-spi-byte.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/leds/leds-spi-byte.txt b/Documentation/devicetree/bindings/leds/leds-spi-byte.txt
new file mode 100644
index 000000000000..28b6b2d9091e
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-spi-byte.txt
@@ -0,0 +1,44 @@
+* Single Byte SPI LED Device Driver.
+
+The driver can be used for controllers with a very simple SPI protocol:
+- one LED is controlled by a single byte on MOSI
+- the value of the byte gives the brightness between two values (lowest to
+  highest)
+- no return value is necessary (no MISO signal)
+
+The value for lowest and highest brightness is dependent on the device and
+therefore on the compatible string.
+
+Depending on the compatible string some special functions (like hardware
+accelerated blinking) might can be supported too.
+
+The driver currently only supports one LED. The properties of the LED are
+configured in a sub-node in the device node.
+
+Required properties:
+- compatible: should be one of
+   * "ubnt,acb-spi-led"		microcontroller (SONiX 8F26E611LA) based device
+				used for example in Ubiquiti airCube ISP
+
+Property rules described in Documentation/devicetree/bindings/spi/spi-bus.txt
+apply.
+
+LED sub-node properties:
+- label:
+	see Documentation/devicetree/bindings/leds/common.txt
+- default-state:
+	see Documentation/devicetree/bindings/leds/common.txt
+	Only "on" and "off" are supported.
+
+Example:
+
+led-controller@0 {
+	compatible = "ubnt,acb-spi-led";
+	reg = <0>;
+	spi-max-frequency = <100000>;
+
+	led {
+		label = "white:status";
+		default-state = "on";
+	};
+};
-- 
cgit 


From a5f6f88c3d1a453dd35cbaac2870f5fae866ad2e Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 24 May 2019 14:22:36 -0600
Subject: docs: Do not seek comments in kernel/rcu/tree_plugin.h

There are no kerneldoc comments in this file, so do not attempt to
include them in the docs build.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/kernel-api.rst | 2 --
 Documentation/driver-api/basics.rst   | 3 ---
 2 files changed, 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index a29c99d13331..a53ec2eb8176 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -358,8 +358,6 @@ Read-Copy Update (RCU)
 
 .. kernel-doc:: kernel/rcu/tree.c
 
-.. kernel-doc:: kernel/rcu/tree_plugin.h
-
 .. kernel-doc:: kernel/rcu/tree_exp.h
 
 .. kernel-doc:: kernel/rcu/update.c
diff --git a/Documentation/driver-api/basics.rst b/Documentation/driver-api/basics.rst
index e970fadf4d1a..1ba88c7b3984 100644
--- a/Documentation/driver-api/basics.rst
+++ b/Documentation/driver-api/basics.rst
@@ -115,9 +115,6 @@ Kernel utility functions
 .. kernel-doc:: kernel/rcu/tree.c
    :export:
 
-.. kernel-doc:: kernel/rcu/tree_plugin.h
-   :export:
-
 .. kernel-doc:: kernel/rcu/update.c
    :export:
 
-- 
cgit 


From e8d4f892bb245702ee23abfcd28eb98b5eca6c86 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 24 May 2019 14:31:50 -0600
Subject: docs: Fix a misdirected kerneldoc directive

The stratix10 service layer documentation tried to include a kerneldoc
comments for a nonexistent struct; leading to a "no structured comments
found" message.  Switch it to stratix10_svc_command_config_type, which
appears at that spot in the sequence and was not included.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/firmware/other_interfaces.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/firmware/other_interfaces.rst b/Documentation/driver-api/firmware/other_interfaces.rst
index a4ac54b5fd79..b81794e0cfbb 100644
--- a/Documentation/driver-api/firmware/other_interfaces.rst
+++ b/Documentation/driver-api/firmware/other_interfaces.rst
@@ -33,7 +33,7 @@ of the requests on to a secure monitor (EL3).
    :functions: stratix10_svc_client_msg
 
 .. kernel-doc:: include/linux/firmware/intel/stratix10-svc-client.h
-   :functions: stratix10_svc_command_reconfig_payload
+   :functions: stratix10_svc_command_config_type
 
 .. kernel-doc:: include/linux/firmware/intel/stratix10-svc-client.h
    :functions: stratix10_svc_cb_data
-- 
cgit 


From 79b647a0c0d52a03feb8b39a6311b01bb2bf87a8 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 23 May 2019 20:07:56 +0200
Subject: dt-bindings: net: document new usxgmii phy mode

Add new interface mode USXGMII to binding documentation.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ethernet.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
index e88c3641d613..5475682bf06e 100644
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -43,6 +43,7 @@ Documentation/devicetree/bindings/phy/phy-bindings.txt.
   * "rxaui"
   * "xaui"
   * "10gbase-kr" (10GBASE-KR, XFI, SFI)
+  * "usxgmii"
 - phy-connection-type: the same as "phy-mode" property but described in the
   Devicetree Specification;
 - phy-handle: phandle, specifies a reference to a node representing a PHY
-- 
cgit 


From 1372bbe66a59528e65d10ae67416835d3ec2452e Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Mon, 6 May 2019 14:16:12 -0500
Subject: dt-bindings: ti-lmu: Modify dt bindings for the LM3697

The LM3697 is a single function LED driver. The single function LED
driver needs to reside in the LED directory as a dedicated LED driver
and not as a MFD device.  The device does have common brightness and ramp
features and those can be accomodated by a TI LMU framework.

The LM3697 dt binding needs to be moved from the ti-lmu.txt and a dedicated
LED dt binding needs to be added.  The new LM3697 LED dt binding will then
reside in the Documentation/devicetree/bindings/leds directory and follow the
current LED and general bindings guidelines.

Signed-off-by: Dan Murphy <dmurphy@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 .../devicetree/bindings/leds/leds-lm3697.txt       | 73 ++++++++++++++++++++++
 Documentation/devicetree/bindings/mfd/ti-lmu.txt   | 27 +-------
 2 files changed, 74 insertions(+), 26 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/leds/leds-lm3697.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/leds/leds-lm3697.txt b/Documentation/devicetree/bindings/leds/leds-lm3697.txt
new file mode 100644
index 000000000000..63992d732959
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-lm3697.txt
@@ -0,0 +1,73 @@
+* Texas Instruments - LM3697 Highly Efficient White LED Driver
+
+The LM3697 11-bit LED driver provides high-
+performance backlight dimming for 1, 2, or 3 series
+LED strings while delivering up to 90% efficiency.
+
+This device is suitable for display and keypad lighting
+
+Required properties:
+	- compatible:
+		"ti,lm3697"
+	- reg :  I2C slave address
+	- #address-cells : 1
+	- #size-cells : 0
+
+Optional properties:
+	- enable-gpios : GPIO pin to enable/disable the device
+	- vled-supply : LED supply
+
+Required child properties:
+	- reg : 0 - LED is Controlled by bank A
+		1 - LED is Controlled by bank B
+	- led-sources : Indicates which HVLED string is associated to which
+			control bank.  This is a zero based property so
+			HVLED1 = 0, HVLED2 = 1, HVLED3 = 2.
+			Additional information is contained
+			in Documentation/devicetree/bindings/leds/common.txt
+
+Optional child properties:
+	- ti,brightness-resolution - see Documentation/devicetree/bindings/mfd/ti-lmu.txt
+	- ramp-up-us: see Documentation/devicetree/bindings/mfd/ti-lmu.txt
+	- ramp-down-us: see Documentation/devicetree/bindings/mfd/ti-lmu.txt
+	- label : see Documentation/devicetree/bindings/leds/common.txt
+	- linux,default-trigger :
+	   see Documentation/devicetree/bindings/leds/common.txt
+
+Example:
+
+HVLED string 1 and 3 are controlled by control bank A and HVLED 2 string is
+controlled by control bank B.
+
+led-controller@36 {
+	compatible = "ti,lm3697";
+	#address-cells = <1>;
+	#size-cells = <0>;
+	reg = <0x36>;
+
+	enable-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+	vled-supply = <&vbatt>;
+
+	led@0 {
+		reg = <0>;
+		led-sources = <0 2>;
+		ti,brightness-resolution = <2047>;
+		ramp-up-us = <5000>;
+		ramp-down-us = <1000>;
+		label = "white:first_backlight_cluster";
+		linux,default-trigger = "backlight";
+	};
+
+	led@1 {
+		reg = <1>;
+		led-sources = <1>;
+		ti,brightness-resolution = <255>;
+		ramp-up-us = <500>;
+		ramp-down-us = <1000>;
+		label = "white:second_backlight_cluster";
+		linux,default-trigger = "backlight";
+	};
+}
+
+For more product information please see the link below:
+http://www.ti.com/lit/ds/symlink/lm3697.pdf
diff --git a/Documentation/devicetree/bindings/mfd/ti-lmu.txt b/Documentation/devicetree/bindings/mfd/ti-lmu.txt
index e3efefb194c5..782d3c9812ed 100644
--- a/Documentation/devicetree/bindings/mfd/ti-lmu.txt
+++ b/Documentation/devicetree/bindings/mfd/ti-lmu.txt
@@ -8,7 +8,6 @@ TI LMU driver supports lighting devices below.
   LM3632       Backlight and regulator
   LM3633       Backlight, LED and fault monitor
   LM3695       Backlight
-  LM3697       Backlight and fault monitor
 
 Required properties:
   - compatible: Should be one of:
@@ -16,11 +15,10 @@ Required properties:
                 "ti,lm3632"
                 "ti,lm3633"
                 "ti,lm3695"
-                "ti,lm3697"
   - reg: I2C slave address.
          0x11 for LM3632
          0x29 for LM3631
-         0x36 for LM3633, LM3697
+         0x36 for LM3633
          0x63 for LM3695
 
 Optional properties:
@@ -51,7 +49,6 @@ Optional nodes:
     Required properties:
       - compatible: Should be one of:
                     "ti,lm3633-fault-monitor"
-                    "ti,lm3697-fault-monitor"
   - leds: LED properties for LM3633. Please refer to [2].
   - regulators: Regulator properties for LM3631 and LM3632.
                 Please refer to [3].
@@ -216,25 +213,3 @@ lm3695@63 {
 		};
 	};
 };
-
-lm3697@36 {
-	compatible = "ti,lm3697";
-	reg = <0x36>;
-
-	enable-gpios = <&pioC 2 GPIO_ACTIVE_HIGH>;
-
-	backlight {
-		compatible = "ti,lm3697-backlight";
-
-		lcd {
-			ti,brightness-resolution = <255>;
-			led-sources = <0 1 2>;
-			ramp-up-us = <200000>;
-			ramp-down-us = <200000>;
-		};
-	};
-
-	fault-monitor {
-		compatible = "ti,lm3697-fault-monitor";
-	};
-};
-- 
cgit 


From 41ce14e39bbe0683a2d49385ee8a8cb0b1d010eb Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 24 May 2019 14:43:42 -0600
Subject: docs: Do not seek kerneldoc comments in hw-consumer.h

There are no kerneldoc comments here, so looking for them just yields a
warning in the docs build.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/iio/hw-consumer.rst | 1 -
 1 file changed, 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/iio/hw-consumer.rst b/Documentation/driver-api/iio/hw-consumer.rst
index e0fe0b98230e..819fb9edc005 100644
--- a/Documentation/driver-api/iio/hw-consumer.rst
+++ b/Documentation/driver-api/iio/hw-consumer.rst
@@ -45,7 +45,6 @@ A typical IIO HW consumer setup looks like this::
 
 More details
 ============
-.. kernel-doc:: include/linux/iio/hw-consumer.h
 .. kernel-doc:: drivers/iio/buffer/industrialio-hw-consumer.c
    :export:
 
-- 
cgit 


From 2496f17772f757ba7a58c2abc1cdfdaf7cca29fb Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 23 May 2019 10:14:15 +0200
Subject: dt-bindings: Add vendor prefix for Espressif

Add Espressif Systems DT vendor prefix. That prefix has been used for quite
some time for WiFi chips, but has never been documented.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 33a65a45e319..c4d4f6714814 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -277,6 +277,8 @@ patternProperties:
     description: Ecole Polytechnique Fédérale de Lausanne
   "^epson,.*":
     description: Seiko Epson Corp.
+  "^esp,.*":
+    description: Espressif Systems Co. Ltd.
   "^est,.*":
     description: ESTeem Wireless Modems
   "^ettus,.*":
-- 
cgit 


From 3aef4472665695be7cbdd2cc274814f56d36e4ef Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 24 May 2019 15:01:30 -0600
Subject: docs: No structured comments in target_core_device.c

Documentation/driver-api/target.rst is seeking kerneldoc comments in
drivers/target/target_core_device.c, but no such comments exist.  Take out
the kernel-doc directive and eliminate one warning from the build.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/target.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/target.rst b/Documentation/driver-api/target.rst
index 4363611dd86d..620ec6173a93 100644
--- a/Documentation/driver-api/target.rst
+++ b/Documentation/driver-api/target.rst
@@ -10,8 +10,8 @@ TBD
 Target core device interfaces
 =============================
 
-.. kernel-doc:: drivers/target/target_core_device.c
-    :export:
+This section is blank because no kerneldoc comments have been added to
+drivers/target/target_core_device.c.
 
 Target core transport interfaces
 ================================
-- 
cgit 


From dea20be5063c97bdac48e81ee2a85975f14885ed Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 24 May 2019 15:03:39 -0600
Subject: docs: no structured comments in fs/file_table.c

Remove the kernel-doc directive, since there are only warnings to be found
there.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/api-summary.rst | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/api-summary.rst b/Documentation/filesystems/api-summary.rst
index aa51ffcfa029..bbb0c1c0e5cf 100644
--- a/Documentation/filesystems/api-summary.rst
+++ b/Documentation/filesystems/api-summary.rst
@@ -89,9 +89,6 @@ Other Functions
 .. kernel-doc:: fs/direct-io.c
    :export:
 
-.. kernel-doc:: fs/file_table.c
-   :export:
-
 .. kernel-doc:: fs/libfs.c
    :export:
 
-- 
cgit 


From 3f715b147a6c5245ee25d7334f4053c339feef98 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 24 May 2019 15:05:41 -0600
Subject: docs: No structured comments in include/linux/interconnect.h

Remove the kernel-doc directive for this file, since there's nothing there
and it generates a warning.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/interconnect/interconnect.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/interconnect/interconnect.rst b/Documentation/interconnect/interconnect.rst
index b8107dcc4cd3..c3e004893796 100644
--- a/Documentation/interconnect/interconnect.rst
+++ b/Documentation/interconnect/interconnect.rst
@@ -89,6 +89,5 @@ Interconnect consumers
 
 Interconnect consumers are the clients which use the interconnect APIs to
 get paths between endpoints and set their bandwidth/latency/QoS requirements
-for these interconnect paths.
-
-.. kernel-doc:: include/linux/interconnect.h
+for these interconnect paths.  These interfaces are not currently
+documented.
-- 
cgit 


From 253a41c6fbadd1305e25f84ffc455f2b4d57439c Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Tue, 14 May 2019 13:40:51 -0700
Subject: dt-bindings: Remove Linuxisms from common-properties binding

We shouldn't reference Linux kernel functions or Linux itself in proper
bindings. It's OK to reference functions in the kernel when explaining
examples, but otherwise we shouldn't reference functions to describe
what the binding means.

Cc: Hsin-Yi Wang <hsinyi@chromium.org>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/common-properties.txt | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/common-properties.txt b/Documentation/devicetree/bindings/common-properties.txt
index a3448bfa1c82..98a28130e100 100644
--- a/Documentation/devicetree/bindings/common-properties.txt
+++ b/Documentation/devicetree/bindings/common-properties.txt
@@ -5,30 +5,29 @@ Endianness
 ----------
 
 The Devicetree Specification does not define any properties related to hardware
-byteswapping, but endianness issues show up frequently in porting Linux to
+byte swapping, but endianness issues show up frequently in porting drivers to
 different machine types.  This document attempts to provide a consistent
-way of handling byteswapping across drivers.
+way of handling byte swapping across drivers.
 
 Optional properties:
  - big-endian: Boolean; force big endian register accesses
    unconditionally (e.g. ioread32be/iowrite32be).  Use this if you
-   know the peripheral always needs to be accessed in BE mode.
+   know the peripheral always needs to be accessed in big endian (BE) mode.
  - little-endian: Boolean; force little endian register accesses
    unconditionally (e.g. readl/writel).  Use this if you know the
-   peripheral always needs to be accessed in LE mode.
+   peripheral always needs to be accessed in little endian (LE) mode.
  - native-endian: Boolean; always use register accesses matched to the
    endianness of the kernel binary (e.g. LE vmlinux -> readl/writel,
-   BE vmlinux -> ioread32be/iowrite32be).  In this case no byteswaps
+   BE vmlinux -> ioread32be/iowrite32be).  In this case no byte swaps
    will ever be performed.  Use this if the hardware "self-adjusts"
    register endianness based on the CPU's configured endianness.
 
 If a binding supports these properties, then the binding should also
 specify the default behavior if none of these properties are present.
 In such cases, little-endian is the preferred default, but it is not
-a requirement.  The of_device_is_big_endian() and of_fdt_is_big_endian()
-helper functions do assume that little-endian is the default, because
-most existing (PCI-based) drivers implicitly default to LE by using
-readl/writel for MMIO accesses.
+a requirement.  Some implementations assume that little-endian is
+the default, because most existing (PCI-based) drivers implicitly
+default to LE for their MMIO accesses.
 
 Examples:
 Scenario 1 : CPU in LE mode & device in LE mode.
-- 
cgit 


From 48d07c04b4cc1dc1221965312f58fd84926212fe Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 20 Mar 2019 22:13:33 +0100
Subject: rcu: Enable elimination of Tree-RCU softirq processing

Some workloads need to change kthread priority for RCU core processing
without affecting other softirq work.  This commit therefore introduces
the rcutree.use_softirq kernel boot parameter, which moves the RCU core
work from softirq to a per-CPU SCHED_OTHER kthread named rcuc.  Use of
SCHED_OTHER approach avoids the scalability problems that appeared
with the earlier attempt to move RCU core processing to from softirq
to kthreads.  That said, kernels built with RCU_BOOST=y will run the
rcuc kthreads at the RCU-boosting priority.

Note that rcutree.use_softirq=0 must be specified to move RCU core
processing to the rcuc kthreads: rcutree.use_softirq=1 is the default.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
[ paulmck: Adjust for invoke_rcu_callbacks() only ever being invoked
  from RCU core processing, in contrast to softirq->rcuc transition
  in old mainline RCU priority boosting. ]
[ paulmck: Avoid wakeups when scheduler might have invoked rcu_read_unlock()
  while holding rq or pi locks, also possibly fixing a pre-existing latent
  bug involving raise_softirq()-induced wakeups. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..b96fd15c7316 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3752,6 +3752,12 @@
 			the propagation of recent CPU-hotplug changes up
 			the rcu_node combining tree.
 
+	rcutree.use_softirq=	[KNL]
+			If set to zero, move all RCU_SOFTIRQ processing to
+			per-CPU rcuc kthreads.  Defaults to a non-zero
+			value, meaning that RCU_SOFTIRQ is used by default.
+			Specify rcutree.use_softirq=0 to use rcuc kthreads.
+
 	rcutree.rcu_fanout_exact= [KNL]
 			Disable autobalancing of the rcu_node combining
 			tree.  This is used by rcutorture, and might
-- 
cgit 


From 88903c464321cdbc2d473c24cbf311f576cf05bc Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 15 May 2019 14:38:30 +0900
Subject: tracing/probe: Add ustring type for user-space string

Add "ustring" type for fetching user-space string from kprobe event.
User can specify ustring type at uprobe event, and it is same as
"string" for uprobe.

Note that probe-event provides this option but it doesn't choose the
correct type automatically since we have not way to decide the address
is in user-space or not on some arch (and on some other arch, you can
fetch the string by "string" type). So user must carefully check the
target code (e.g. if you see __user on the target variable) and
use this new type.

Link: http://lkml.kernel.org/r/155789871009.26965.14167558859557329331.stgit@devnote2

Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/trace/kprobetrace.rst | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index 235ce2ab131a..a3ac7c9ac242 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -55,7 +55,8 @@ Synopsis of kprobe_events
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
 		  (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
-		  (x8/x16/x32/x64), "string" and bitfield are supported.
+		  (x8/x16/x32/x64), "string", "ustring" and bitfield
+		  are supported.
 
   (\*1) only for the probe on function entry (offs == 0).
   (\*2) only for return probe.
@@ -77,7 +78,11 @@ apply it to registers/stack-entries etc. (for example, '$stack1:x8[8]' is
 wrong, but '+8($stack):x8[8]' is OK.)
 String type is a special type, which fetches a "null-terminated" string from
 kernel space. This means it will fail and store NULL if the string container
-has been paged out.
+has been paged out. "ustring" type is an alternative of string for user-space.
+Note that kprobe-event provides string/ustring types, but doesn't change it
+automatically. So user has to decide if the targe string in kernel or in user
+space carefully. On some arch, if you choose wrong one, it always fails to
+record string data.
 The string array type is a bit different from other types. For other base
 types, <base-type>[1] is equal to <base-type> (e.g. +0(%di):x32[1] is same
 as +0(%di):x32.) But string[1] is not equal to string. The string type itself
-- 
cgit 


From e65f7ae7f4da56622ecf8f1eaed333b9a13f9435 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 15 May 2019 14:38:42 +0900
Subject: tracing/probe: Support user-space dereference

Support user-space dereference syntax for probe event arguments
to dereference the data-structure or array in user-space.

The syntax is just adding 'u' before an offset value.

 +|-u<OFFSET>(<FETCHARG>)

e.g. +u8(%ax), +u0(+0(%si))

For example, if you probe do_sched_setscheduler(pid, policy,
param) and record param->sched_priority, you can add new
probe as below;

 p do_sched_setscheduler priority=+u0($arg3)

Note that kprobe event provides this and it doesn't change the
dereference method automatically because we do not know whether
the given address is in userspace or kernel on some archs.

So as same as "ustring", this is an option for user, who has to
carefully choose the dereference method.

Link: http://lkml.kernel.org/r/155789872187.26965.4468456816590888687.stgit@devnote2

Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/trace/kprobetrace.rst  | 27 ++++++++++++++++++++++-----
 Documentation/trace/uprobetracer.rst | 10 ++++++----
 2 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index a3ac7c9ac242..09ff474493e1 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -51,7 +51,7 @@ Synopsis of kprobe_events
   $argN		: Fetch the Nth function argument. (N >= 1) (\*1)
   $retval	: Fetch return value.(\*2)
   $comm		: Fetch current task comm.
-  +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(\*3)
+  +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4)
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
 		  (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
@@ -61,6 +61,7 @@ Synopsis of kprobe_events
   (\*1) only for the probe on function entry (offs == 0).
   (\*2) only for return probe.
   (\*3) this is useful for fetching a field of data structures.
+  (\*4) "u" means user-space dereference. See :ref:`user_mem_access`.
 
 Types
 -----
@@ -79,10 +80,7 @@ wrong, but '+8($stack):x8[8]' is OK.)
 String type is a special type, which fetches a "null-terminated" string from
 kernel space. This means it will fail and store NULL if the string container
 has been paged out. "ustring" type is an alternative of string for user-space.
-Note that kprobe-event provides string/ustring types, but doesn't change it
-automatically. So user has to decide if the targe string in kernel or in user
-space carefully. On some arch, if you choose wrong one, it always fails to
-record string data.
+See :ref:`user_mem_access` for more info..
 The string array type is a bit different from other types. For other base
 types, <base-type>[1] is equal to <base-type> (e.g. +0(%di):x32[1] is same
 as +0(%di):x32.) But string[1] is not equal to string. The string type itself
@@ -97,6 +95,25 @@ Symbol type('symbol') is an alias of u32 or u64 type (depends on BITS_PER_LONG)
 which shows given pointer in "symbol+offset" style.
 For $comm, the default type is "string"; any other type is invalid.
 
+.. _user_mem_access:
+User Memory Access
+------------------
+Kprobe events supports user-space memory access. For that purpose, you can use
+either user-space dereference syntax or 'ustring' type.
+
+The user-space dereference syntax allows you to access a field of a data
+structure in user-space. This is done by adding the "u" prefix to the
+dereference syntax. For example, +u4(%si) means it will read memory from the
+address in the register %si offset by 4, and the memory is expected to be in
+user-space. You can use this for strings too, e.g. +u0(%si):string will read
+a string from the address in the register %si that is expected to be in user-
+space. 'ustring' is a shortcut way of performing the same task. That is,
++0(%si):ustring is equivalent to +u0(%si):string.
+
+Note that kprobe-event provides the user-memory access syntax but it doesn't
+use it transparently. This means if you use normal dereference or string type
+for user memory, it might fail, and may always fail on some archs. The user
+has to carefully check if the target data is in kernel or user space.
 
 Per-Probe Event Filtering
 -------------------------
diff --git a/Documentation/trace/uprobetracer.rst b/Documentation/trace/uprobetracer.rst
index 4346e23e3ae7..ab13319c66ac 100644
--- a/Documentation/trace/uprobetracer.rst
+++ b/Documentation/trace/uprobetracer.rst
@@ -42,16 +42,18 @@ Synopsis of uprobe_tracer
    @+OFFSET	: Fetch memory at OFFSET (OFFSET from same file as PATH)
    $stackN	: Fetch Nth entry of stack (N >= 0)
    $stack	: Fetch stack address.
-   $retval	: Fetch return value.(*)
+   $retval	: Fetch return value.(\*1)
    $comm	: Fetch current task comm.
-   +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
+   +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*2)(\*3)
    NAME=FETCHARG     : Set NAME as the argument name of FETCHARG.
    FETCHARG:TYPE     : Set TYPE as the type of FETCHARG. Currently, basic types
 		       (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
 		       (x8/x16/x32/x64), "string" and bitfield are supported.
 
-  (*) only for return probe.
-  (**) this is useful for fetching a field of data structures.
+  (\*1) only for return probe.
+  (\*2) this is useful for fetching a field of data structures.
+  (\*3) Unlike kprobe event, "u" prefix will just be ignored, becuse uprobe
+        events can access only user-space memory.
 
 Types
 -----
-- 
cgit 


From 970988e19eb0a0dc24fe14bf91972019e48336e2 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 22 May 2019 17:32:35 +0900
Subject: tracing/kprobe: Add kprobe_event= boot parameter

Add kprobe_event= boot parameter to define kprobe events
at boot time.
The definition syntax is similar to tracefs/kprobe_events
interface, but use ',' and ';' instead of ' ' and '\n'
respectively. e.g.

  kprobe_event=p,vfs_read,$arg1,$arg2

This puts a probe on vfs_read with argument1 and 2, and
enable the new event.

Link: http://lkml.kernel.org/r/155851395498.15728.830529496248543583.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 13 +++++++++++++
 Documentation/trace/kprobetrace.rst             | 14 ++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..11b9ffb265eb 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2007,6 +2007,19 @@
 			Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y,
 			the default is off.
 
+	kprobe_event=[probe-list]
+			[FTRACE] Add kprobe events and enable at boot time.
+			The probe-list is a semicolon delimited list of probe
+			definitions. Each definition is same as kprobe_events
+			interface, but the parameters are comma delimited.
+			For example, to add a kprobe event on vfs_read with
+			arg1 and arg2, add to the command line;
+
+			      kprobe_event=p,vfs_read,$arg1,$arg2
+
+			See also Documentation/trace/kprobetrace.rst "Kernel
+			Boot Parameter" section.
+
 	kpti=		[ARM64] Control page table isolation of user
 			and kernel address spaces.
 			Default: enabled on cores which need mitigation.
diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index 09ff474493e1..af776989caca 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -146,6 +146,20 @@ You can check the total number of probe hits and probe miss-hits via
 The first column is event name, the second is the number of probe hits,
 the third is the number of probe miss-hits.
 
+Kernel Boot Parameter
+---------------------
+You can add and enable new kprobe events when booting up the kernel by
+"kprobe_event=" parameter. The parameter accepts a semicolon-delimited
+kprobe events, which format is similar to the kprobe_events.
+The difference is that the probe definition parameters are comma-delimited
+instead of space. For example, adding myprobe event on do_sys_open like below
+
+  p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)
+
+should be below for kernel boot parameter (just replace spaces with comma)
+
+  p:myprobe,do_sys_open,dfd=%ax,filename=%dx,flags=%cx,mode=+4($stack)
+
 
 Usage examples
 --------------
-- 
cgit 


From 8e2c67f9960d32edcd16c846c947b2a05dad32ad Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Sat, 25 May 2019 15:41:36 +0200
Subject: dt-bindings: vendor: Escape single quote

Single quotes need to be escaped in YAML, make sure it's the case.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index c4d4f6714814..3bb61e4b4597 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -668,7 +668,7 @@ patternProperties:
   "^plantower,.*":
     description: Plantower Co., Ltd
   "^plathome,.*":
-    description: Plat'Home Co., Ltd.
+    description: Plat\'Home Co., Ltd.
   "^plda,.*":
     description: PLDA
   "^plx,.*":
-- 
cgit 


From b361797f3d0b5accf23af848a089589fb8ce5f80 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Sat, 25 May 2019 15:41:37 +0200
Subject: dt-bindings: vendor: Fix simtek vendor compatible

In the text file, simtek didn't have any description and apparently this
confused the conversion script. Fix the simtek entry and add a proper
description.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 3bb61e4b4597..653dee9d7dd0 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -783,8 +783,8 @@ patternProperties:
     description: Silergy Corp.
   "^siliconmitus,.*":
     description: Silicon Mitus, Inc.
-  "^simte,.*":
-    description: k
+  "^simtek,.*":
+    description: Cypress Semiconductor Corporation (Simtek Corporation)
   "^sirf,.*":
     description: SiRF Technology, Inc.
   "^sis,.*":
-- 
cgit 


From 462409365b698a62a93ddf834b6221e28b76652a Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 24 May 2019 13:12:22 +0200
Subject: dt-bindings: net: add qca,ar71xx.txt documentation

Add binding documentation for Atheros/QCA networking IP core used
in many routers.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/qca,ar71xx.txt         | 45 ++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/qca,ar71xx.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/qca,ar71xx.txt b/Documentation/devicetree/bindings/net/qca,ar71xx.txt
new file mode 100644
index 000000000000..2a33e71ba72b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qca,ar71xx.txt
@@ -0,0 +1,45 @@
+Required properties:
+- compatible:	Should be "qca,<soc>-eth". Currently support compatibles are:
+		qca,ar7100-eth - Atheros AR7100
+		qca,ar7240-eth - Atheros AR7240
+		qca,ar7241-eth - Atheros AR7241
+		qca,ar7242-eth - Atheros AR7242
+		qca,ar9130-eth - Atheros AR9130
+		qca,ar9330-eth - Atheros AR9330
+		qca,ar9340-eth - Atheros AR9340
+		qca,qca9530-eth - Qualcomm Atheros QCA9530
+		qca,qca9550-eth - Qualcomm Atheros QCA9550
+		qca,qca9560-eth - Qualcomm Atheros QCA9560
+
+- reg : Address and length of the register set for the device
+- interrupts : Should contain eth interrupt
+- phy-mode : See ethernet.txt file in the same directory
+- clocks: the clock used by the core
+- clock-names: the names of the clock listed in the clocks property. These are
+	"eth" and "mdio".
+- resets: Should contain phandles to the reset signals
+- reset-names: Should contain the names of reset signal listed in the resets
+		property. These are "mac" and "mdio"
+
+Optional properties:
+- phy-handle : phandle to the PHY device connected to this device.
+- fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.
+  Use instead of phy-handle.
+
+Optional subnodes:
+- mdio : specifies the mdio bus, used as a container for phy nodes
+  according to phy.txt in the same directory
+
+Example:
+
+ethernet@1a000000 {
+	compatible = "qca,ar9330-eth";
+	reg = <0x1a000000 0x200>;
+	interrupts = <5>;
+	resets = <&rst 13>, <&rst 23>;
+	reset-names = "mac", "mdio";
+	clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_MDIO>;
+	clock-names = "eth", "mdio";
+
+	phy-mode = "gmii";
+};
-- 
cgit 


From e618795367df99c463f5e4e0ddee9612fcce6b06 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 25 May 2019 21:42:28 +0100
Subject: dt-bindings: pinctrl: fix spelling mistakes in pinctl documentation

The spelling of configured is incorrect in the documentation. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt | 6 +++---
 Documentation/devicetree/bindings/pinctrl/qcom,ipq8074-pinctrl.txt | 6 +++---
 Documentation/devicetree/bindings/pinctrl/qcom,mdm9615-pinctrl.txt | 6 +++---
 Documentation/devicetree/bindings/pinctrl/qcom,msm8916-pinctrl.txt | 6 +++---
 Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt | 6 +++---
 Documentation/devicetree/bindings/pinctrl/qcom,msm8994-pinctrl.txt | 6 +++---
 Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.txt | 6 +++---
 Documentation/devicetree/bindings/pinctrl/qcom,msm8998-pinctrl.txt | 6 +++---
 Documentation/devicetree/bindings/pinctrl/qcom,qcs404-pinctrl.txt  | 6 +++---
 Documentation/devicetree/bindings/pinctrl/qcom,sdm660-pinctrl.txt  | 6 +++---
 Documentation/devicetree/bindings/pinctrl/qcom,sdm845-pinctrl.txt  | 6 +++---
 11 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt
index 68e93d5b7ede..c9782397ff14 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt
@@ -122,17 +122,17 @@ to specify in a pin configuration subnode:
 - bias-disable:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as no pull.
+	Definition: The specified pins should be configured as no pull.
 
 - bias-pull-down:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull down.
+	Definition: The specified pins should be configured as pull down.
 
 - bias-pull-up:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull up.
+	Definition: The specified pins should be configured as pull up.
 
 - output-high:
 	Usage: optional
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,ipq8074-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,ipq8074-pinctrl.txt
index 6dd72f8599e9..7b151894f5a0 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,ipq8074-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,ipq8074-pinctrl.txt
@@ -118,17 +118,17 @@ to specify in a pin configuration subnode:
 - bias-disable:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as no pull.
+	Definition: The specified pins should be configured as no pull.
 
 - bias-pull-down:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull down.
+	Definition: The specified pins should be configured as pull down.
 
 - bias-pull-up:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull up.
+	Definition: The specified pins should be configured as pull up.
 
 - output-high:
 	Usage: optional
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9615-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9615-pinctrl.txt
index 86ecdcfc4fb8..d46973968873 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9615-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9615-pinctrl.txt
@@ -97,17 +97,17 @@ to specify in a pin configuration subnode:
 - bias-disable:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as no pull.
+	Definition: The specified pins should be configured as no pull.
 
 - bias-pull-down:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull down.
+	Definition: The specified pins should be configured as pull down.
 
 - bias-pull-up:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull up.
+	Definition: The specified pins should be configured as pull up.
 
 - output-high:
 	Usage: optional
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8916-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8916-pinctrl.txt
index 195a7a0ef0cc..3354a63296d9 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,msm8916-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8916-pinctrl.txt
@@ -130,17 +130,17 @@ to specify in a pin configuration subnode:
 - bias-disable:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as no pull.
+	Definition: The specified pins should be configured as no pull.
 
 - bias-pull-down:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull down.
+	Definition: The specified pins should be configured as pull down.
 
 - bias-pull-up:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull up.
+	Definition: The specified pins should be configured as pull up.
 
 - output-high:
 	Usage: optional
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt
index 5034eb6653c7..a7dd213c77c6 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt
@@ -124,17 +124,17 @@ to specify in a pin configuration subnode:
 - bias-disable:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as no pull.
+	Definition: The specified pins should be configured as no pull.
 
 - bias-pull-down:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull down.
+	Definition: The specified pins should be configured as pull down.
 
 - bias-pull-up:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull up.
+	Definition: The specified pins should be configured as pull up.
 
 - output-high:
 	Usage: optional
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8994-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8994-pinctrl.txt
index f15443f6e78e..da52df6273bc 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,msm8994-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8994-pinctrl.txt
@@ -128,17 +128,17 @@ to specify in a pin configuration subnode:
 - bias-disable:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as no pull.
+	Definition: The specified pins should be configured as no pull.
 
 - bias-pull-down:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull down.
+	Definition: The specified pins should be configured as pull down.
 
 - bias-pull-up:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull up.
+	Definition: The specified pins should be configured as pull up.
 
 - output-high:
 	Usage: optional
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.txt
index fa97f609fe45..a56cb882830c 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.txt
@@ -149,17 +149,17 @@ to specify in a pin configuration subnode:
 - bias-disable:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as no pull.
+	Definition: The specified pins should be configured as no pull.
 
 - bias-pull-down:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull down.
+	Definition: The specified pins should be configured as pull down.
 
 - bias-pull-up:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull up.
+	Definition: The specified pins should be configured as pull up.
 
 - output-high:
 	Usage: optional
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8998-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8998-pinctrl.txt
index e70c79bbbc5b..00174f08ba1d 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,msm8998-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8998-pinctrl.txt
@@ -135,17 +135,17 @@ to specify in a pin configuration subnode:
 - bias-disable:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as no pull.
+	Definition: The specified pins should be configured as no pull.
 
 - bias-pull-down:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull down.
+	Definition: The specified pins should be configured as pull down.
 
 - bias-pull-up:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull up.
+	Definition: The specified pins should be configured as pull up.
 
 - output-high:
 	Usage: optional
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,qcs404-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,qcs404-pinctrl.txt
index 2b8f77762edc..a50e74684195 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,qcs404-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,qcs404-pinctrl.txt
@@ -150,17 +150,17 @@ to specify in a pin configuration subnode:
 - bias-disable:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as no pull.
+	Definition: The specified pins should be configured as no pull.
 
 - bias-pull-down:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull down.
+	Definition: The specified pins should be configured as pull down.
 
 - bias-pull-up:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull up.
+	Definition: The specified pins should be configured as pull up.
 
 - output-high:
 	Usage: optional
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sdm660-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,sdm660-pinctrl.txt
index 769ca83bb40d..be034d329e10 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,sdm660-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,sdm660-pinctrl.txt
@@ -142,17 +142,17 @@ to specify in a pin configuration subnode:
 - bias-disable:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as no pull.
+	Definition: The specified pins should be configured as no pull.
 
 - bias-pull-down:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull down.
+	Definition: The specified pins should be configured as pull down.
 
 - bias-pull-up:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull up.
+	Definition: The specified pins should be configured as pull up.
 
 - output-high:
 	Usage: optional
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sdm845-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,sdm845-pinctrl.txt
index 665aadb5ea28..321bdb9be0d2 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,sdm845-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,sdm845-pinctrl.txt
@@ -118,17 +118,17 @@ to specify in a pin configuration subnode:
 - bias-disable:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as no pull.
+	Definition: The specified pins should be configured as no pull.
 
 - bias-pull-down:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull down.
+	Definition: The specified pins should be configured as pull down.
 
 - bias-pull-up:
 	Usage: optional
 	Value type: <none>
-	Definition: The specified pins should be configued as pull up.
+	Definition: The specified pins should be configured as pull up.
 
 - output-high:
 	Usage: optional
-- 
cgit 


From 303b3cf95ed322ab781cafe4c279e58755d3d8ee Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Mon, 27 May 2019 18:41:57 -0700
Subject: dt-bindings: add more optional properties for elan_i2c touchpads

Some new touchpads IC are connected through PS/2 and I2C. On some of these
new IC, the I2C part doesn't have all of the information available.
We need to be able to forward the touchpad parameters from PS/2 and
thus, we need those new optional properties.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 Documentation/devicetree/bindings/input/elan_i2c.txt | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/input/elan_i2c.txt b/Documentation/devicetree/bindings/input/elan_i2c.txt
index 797607460735..9963247706f2 100644
--- a/Documentation/devicetree/bindings/input/elan_i2c.txt
+++ b/Documentation/devicetree/bindings/input/elan_i2c.txt
@@ -13,9 +13,20 @@ Optional properties:
   pinctrl binding [1]).
 - vcc-supply: a phandle for the regulator supplying 3.3V power.
 - elan,trackpoint: touchpad can support a trackpoint (boolean)
+- elan,clickpad: touchpad is a clickpad (the entire surface is a button)
+- elan,middle-button: touchpad has a physical middle button
+- elan,x_traces: number of antennas on the x axis
+- elan,y_traces: number of antennas on the y axis
+- some generic touchscreen properties [2]:
+  * touchscreen-size-x
+  * touchscreen-size-y
+  * touchscreen-x-mm
+  * touchscreen-y-mm
+
 
 [0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
 [1]: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+[2]: Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
 
 Example:
 	&i2c1 {
-- 
cgit 


From 5e343fbb7176a082dec8632f72999aba82f0e720 Mon Sep 17 00:00:00 2001
From: Przemyslaw Gaj <pgaj@cadence.com>
Date: Tue, 16 Apr 2019 09:36:15 +0100
Subject: dt-bindings: i3c: Document dropped support for I2C 10 bit devices

Because this patch series dropped support for 10 bit I2C devices, I'm
documenting this.

Signed-off-by: Przemyslaw Gaj <pgaj@cadence.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 Documentation/devicetree/bindings/i3c/i3c.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/i3c/i3c.txt b/Documentation/devicetree/bindings/i3c/i3c.txt
index ab729a0a86ae..4ffe059f0fec 100644
--- a/Documentation/devicetree/bindings/i3c/i3c.txt
+++ b/Documentation/devicetree/bindings/i3c/i3c.txt
@@ -39,7 +39,9 @@ valid here, but several new properties have been added.
 New constraint on existing properties:
 --------------------------------------
 - reg: contains 3 cells
-  + first cell : still encoding the I2C address
+  + first cell : still encoding the I2C address. 10 bit addressing is not
+    supported. Devices with 10 bit address can't be properly passed through
+    DEFSLVS command.
 
   + second cell: shall be 0
 
-- 
cgit 


From 4b3d50062ce06530adc27224a1e10f087d0c9caf Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 23 May 2019 10:17:36 +0200
Subject: gpio: Fix minor grammar errors in documentation

This fixes up some of my own mistakes when I stressed to refresh
the documentation.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/driver-api/gpio/driver.rst | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index 1ce7fcd0f989..58036c2d84d2 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -235,7 +235,7 @@ means that a pull up or pull-down resistor is available on the output of the
 GPIO line, and this resistor is software controlled.
 
 In discrete designs, a pull-up or pull-down resistor is simply soldered on
-the circuit board. This is not something we deal or model in software. The
+the circuit board. This is not something we deal with or model in software. The
 most you will think about these lines is that they will very likely be
 configured as open drain or open source (see the section above).
 
@@ -292,18 +292,18 @@ We can divide GPIO irqchips in two broad categories:
 
 - HIERARCHICAL INTERRUPT CHIPS: this means that each GPIO line has a dedicated
   irq line to a parent interrupt controller one level up. There is no need
-  to inquire the GPIO hardware to figure out which line has figured, but it
-  may still be necessary to acknowledge the interrupt and set up the
-  configuration such as edge sensitivity.
+  to inquire the GPIO hardware to figure out which line has fired, but it
+  may still be necessary to acknowledge the interrupt and set up configuration
+  such as edge sensitivity.
 
 Realtime considerations: a realtime compliant GPIO driver should not use
 spinlock_t or any sleepable APIs (like PM runtime) as part of its irqchip
 implementation.
 
-- spinlock_t should be replaced with raw_spinlock_t [1].
+- spinlock_t should be replaced with raw_spinlock_t.[1]
 - If sleepable APIs have to be used, these can be done from the .irq_bus_lock()
   and .irq_bus_unlock() callbacks, as these are the only slowpath callbacks
-  on an irqchip. Create the callbacks if needed [2].
+  on an irqchip. Create the callbacks if needed.[2]
 
 
 Cascaded GPIO irqchips
@@ -361,7 +361,7 @@ Cascaded GPIO irqchips usually fall in one of three categories:
 
   Realtime considerations: this kind of handlers will be forced threaded on -RT,
   and as result the IRQ core will complain that generic_handle_irq() is called
-  with IRQ enabled and the same work around as for "CHAINED GPIO irqchips" can
+  with IRQ enabled and the same work-around as for "CHAINED GPIO irqchips" can
   be applied.
 
 - NESTED THREADED GPIO IRQCHIPS: these are off-chip GPIO expanders and any
-- 
cgit 


From 919c46c89bff432f45994259d28d774212cf6e8d Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Fri, 10 May 2019 11:03:39 +0200
Subject: Documentation: gpio: remove duplicated lines

The 'default (active high)' lines are repeated twice. Avoid people stare at
their screens looking for differences.

Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Reviewed-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/driver-api/gpio/consumer.rst | 2 --
 1 file changed, 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst
index 5e4d8aa68913..23d68c321c5c 100644
--- a/Documentation/driver-api/gpio/consumer.rst
+++ b/Documentation/driver-api/gpio/consumer.rst
@@ -283,8 +283,6 @@ To summarize::
   gpiod_set_value(desc, 1);          default (active high)  high
   gpiod_set_value(desc, 0);          active low             high
   gpiod_set_value(desc, 1);          active low             low
-  gpiod_set_value(desc, 0);          default (active high)  low
-  gpiod_set_value(desc, 1);          default (active high)  high
   gpiod_set_value(desc, 0);          open drain             low
   gpiod_set_value(desc, 1);          open drain             high impedance
   gpiod_set_value(desc, 0);          open source            high impedance
-- 
cgit 


From 910f38bed9439e765f7e240edfe32b5f04ad92f5 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 13 May 2019 19:50:33 -0500
Subject: dt-bindings: gpio: Convert Arm PL061 to json-schema

Convert the Arm PL061 GPIO controller binding to json-schema format.

As I'm the author for all but the gpio-ranges line, make the schema dual
GPL/BSD license.

Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: linux-gpio@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../devicetree/bindings/gpio/pl061-gpio.txt        | 10 ----
 .../devicetree/bindings/gpio/pl061-gpio.yaml       | 69 ++++++++++++++++++++++
 2 files changed, 69 insertions(+), 10 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/gpio/pl061-gpio.txt
 create mode 100644 Documentation/devicetree/bindings/gpio/pl061-gpio.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/gpio/pl061-gpio.txt b/Documentation/devicetree/bindings/gpio/pl061-gpio.txt
deleted file mode 100644
index 89058d375b7c..000000000000
--- a/Documentation/devicetree/bindings/gpio/pl061-gpio.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-ARM PL061 GPIO controller
-
-Required properties:
-- compatible : "arm,pl061", "arm,primecell"
-- #gpio-cells : Should be two. The first cell is the pin number and the
-  second cell is used to specify optional parameters:
-  - bit 0 specifies polarity (0 for normal, 1 for inverted)
-- gpio-controller : Marks the device node as a GPIO controller.
-- interrupts : Interrupt mapping for GPIO IRQ.
-- gpio-ranges : Interaction with the PINCTRL subsystem.
diff --git a/Documentation/devicetree/bindings/gpio/pl061-gpio.yaml b/Documentation/devicetree/bindings/gpio/pl061-gpio.yaml
new file mode 100644
index 000000000000..313b17229247
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/pl061-gpio.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/pl061-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM PL061 GPIO controller
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+  - Rob Herring <robh@kernel.org>
+
+# We need a select here so we don't match all nodes with 'arm,primecell'
+select:
+  properties:
+    compatible:
+      contains:
+        const: arm,pl061
+  required:
+    - compatible
+
+properties:
+  $nodename:
+    pattern: "^gpio@[0-9a-f]+$"
+
+  compatible:
+    items:
+      - const: arm,pl061
+      - const: arm,primecell
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    oneOf:
+      - maxItems: 1
+      - maxItems: 8
+
+  interrupt-controller: true
+
+  "#interrupt-cells":
+    const: 2
+
+  clocks:
+    maxItems: 1
+
+  clock-names: true
+
+  "#gpio-cells":
+    const: 2
+
+  gpio-controller: true
+
+  gpio-ranges:
+    maxItems: 8
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-controller
+  - "#interrupt-cells"
+  - clocks
+  - "#gpio-cells"
+  - gpio-controller
+
+additionalProperties: false
+
+...
-- 
cgit 


From b2045303147254d01b1db90a83e5df3832c4264b Mon Sep 17 00:00:00 2001
From: Clément Péron <peron.clem@gmail.com>
Date: Mon, 27 May 2019 22:06:21 +0200
Subject: dt-bindings: sound: sun4i-spdif: Add Allwinner H6 compatible
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allwinner H6 has a SPDIF controller with an increase of the fifo
size and a sligher difference in memory mapping compare to H3/A64.

This make it not compatible with the previous generation.

Introduce a specific bindings for H6 SoC.

Signed-off-by: Clément Péron <peron.clem@gmail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
index a49ef2294a74..e0284d8c3b63 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
@@ -21,6 +21,7 @@ properties:
       - const: allwinner,sun4i-a10-spdif
       - const: allwinner,sun6i-a31-spdif
       - const: allwinner,sun8i-h3-spdif
+      - const: allwinner,sun50i-h6-spdif
       - items:
           - const: allwinner,sun8i-a83t-spdif
           - const: allwinner,sun8i-h3-spdif
-- 
cgit 


From 0a0ca8e94ca36d2153c2fbea69a31f792bfc5831 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Fri, 24 May 2019 14:57:58 +0200
Subject: dt-bindings: sound: Convert Allwinner I2S binding to YAML

The Allwinner SoCs feature an I2S controller across multiple SoC
generations.

However, earlier generations were a bit simpler than the subsequent ones,
and for example would always have RX and TX capabilities, and no reset
lines.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/sound/allwinner,sun4i-a10-i2s.yaml    | 100 +++++++++++++++++++++
 .../devicetree/bindings/sound/sun4i-i2s.txt        |  45 ----------
 2 files changed, 100 insertions(+), 45 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
 delete mode 100644 Documentation/devicetree/bindings/sound/sun4i-i2s.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
new file mode 100644
index 000000000000..85b2d6d84055
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
@@ -0,0 +1,100 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/allwinner,sun4i-a10-i2s.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 I2S Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  "#sound-dai-cells":
+    const: 0
+
+  compatible:
+    oneOf:
+      - const: allwinner,sun4i-a10-i2s
+      - const: allwinner,sun6i-a31-i2s
+      - const: allwinner,sun8i-a83t-i2s
+      - const: allwinner,sun8i-h3-i2s
+      - const: allwinner,sun50i-a64-codec-i2s
+      - items:
+          - const: allwinner,sun50i-a64-i2s
+          - const: allwinner,sun8i-h3-i2s
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+
+  clock-names:
+    items:
+      - const: apb
+      - const: mod
+
+  dmas:
+    items:
+      - description: RX DMA Channel
+      - description: TX DMA Channel
+
+  dma-names:
+    items:
+      - const: rx
+      - const: tx
+
+  # Even though it only applies to subschemas under the conditionals,
+  # not listing them here will trigger a warning because of the
+  # additionalsProperties set to false.
+  resets:
+    maxItems: 1
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun6i-a31-i2s
+              - allwinner,sun8i-a83t-i2s
+              - allwinner,sun8i-h3-i2s
+              - allwinner,sun50i-a64-codec-i2s
+
+    then:
+      required:
+        - resets
+
+required:
+  - "#sound-dai-cells"
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - dmas
+  - dma-names
+
+additionalProperties: false
+
+examples:
+  - |
+    i2s0: i2s@1c22400 {
+        #sound-dai-cells = <0>;
+        compatible = "allwinner,sun4i-a10-i2s";
+        reg = <0x01c22400 0x400>;
+        interrupts = <0 16 4>;
+        clocks = <&apb0_gates 3>, <&i2s0_clk>;
+        clock-names = "apb", "mod";
+        dmas = <&dma 0 3>, <&dma 0 3>;
+        dma-names = "rx", "tx";
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/sound/sun4i-i2s.txt b/Documentation/devicetree/bindings/sound/sun4i-i2s.txt
deleted file mode 100644
index 61e71c1729e0..000000000000
--- a/Documentation/devicetree/bindings/sound/sun4i-i2s.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-* Allwinner A10 I2S controller
-
-The I2S bus (Inter-IC sound bus) is a serial link for digital
-audio data transfer between devices in the system.
-
-Required properties:
-
-- compatible: should be one of the following:
-   - "allwinner,sun4i-a10-i2s"
-   - "allwinner,sun6i-a31-i2s"
-   - "allwinner,sun8i-a83t-i2s"
-   - "allwinner,sun8i-h3-i2s"
-   - "allwinner,sun50i-a64-codec-i2s"
-- reg: physical base address of the controller and length of memory mapped
-  region.
-- interrupts: should contain the I2S interrupt.
-- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
-	Documentation/devicetree/bindings/dma/dma.txt
-- dma-names: should include "tx" and "rx".
-- clocks: a list of phandle + clock-specifer pairs, one for each entry in clock-names.
-- clock-names: should contain the following:
-   - "apb" : clock for the I2S bus interface
-   - "mod" : module clock for the I2S controller
-- #sound-dai-cells : Must be equal to 0
-
-Required properties for the following compatibles:
-	- "allwinner,sun6i-a31-i2s"
-	- "allwinner,sun8i-a83t-i2s"
-	- "allwinner,sun8i-h3-i2s"
-	- "allwinner,sun50i-a64-codec-i2s"
-- resets: phandle to the reset line for this codec
-
-Example:
-
-i2s0: i2s@1c22400 {
-	#sound-dai-cells = <0>;
-	compatible = "allwinner,sun4i-a10-i2s";
-	reg = <0x01c22400 0x400>;
-	interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
-	clocks = <&apb0_gates 3>, <&i2s0_clk>;
-	clock-names = "apb", "mod";
-	dmas = <&dma SUN4I_DMA_NORMAL 3>,
-	       <&dma SUN4I_DMA_NORMAL 3>;
-	dma-names = "rx", "tx";
-};
-- 
cgit 


From eb5b12843b067d685a8d7a191b928b07934b2d02 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Fri, 24 May 2019 14:57:59 +0200
Subject: dt-bindings: sound: sun4i-i2s: Document that the RX channel can be
 missing

The A83t and compatibles controllers don't have any reception capabilities
on some instances of the controllers, even though it was never documented
as such in the binding before.

Therefore, on those controllers, we don't have the option to set an RX DMA
channel.

This was already done in the DTSI, but the binding itself was never
updated. Let's add a special case in the schemas.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/sound/allwinner,sun4i-a10-i2s.yaml    | 52 +++++++++++++++++-----
 1 file changed, 42 insertions(+), 10 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
index 85b2d6d84055..eb3992138eec 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
@@ -41,19 +41,11 @@ properties:
       - const: apb
       - const: mod
 
-  dmas:
-    items:
-      - description: RX DMA Channel
-      - description: TX DMA Channel
-
-  dma-names:
-    items:
-      - const: rx
-      - const: tx
-
   # Even though it only applies to subschemas under the conditionals,
   # not listing them here will trigger a warning because of the
   # additionalsProperties set to false.
+  dmas: true
+  dma-names: true
   resets:
     maxItems: 1
 
@@ -72,6 +64,46 @@ allOf:
       required:
         - resets
 
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun8i-a83t-i2s
+
+    then:
+      properties:
+        dmas:
+          minItems: 1
+          maxItems: 2
+          items:
+            - description: RX DMA Channel
+            - description: TX DMA Channel
+          description:
+            Some controllers cannot receive but can only transmit
+            data. In such a case, the RX DMA channel is to be omitted.
+
+        dma-names:
+          oneOf:
+            - items:
+                - const: rx
+                - const: tx
+            - const: tx
+          description:
+            Some controllers cannot receive but can only transmit
+            data. In such a case, the RX name is to be omitted.
+
+    else:
+      properties:
+        dmas:
+          items:
+            - description: RX DMA Channel
+            - description: TX DMA Channel
+
+        dma-names:
+          items:
+            - const: rx
+            - const: tx
+
 required:
   - "#sound-dai-cells"
   - compatible
-- 
cgit 


From de1dbcee433ccff3e0a37698c65b40542c9d4cf1 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Fri, 29 Mar 2019 10:05:55 -0400
Subject: doc/rcuref: Document real world examples in kernel

Document similar real world examples in the kernel corresponding to the
second and third code snippets. Also correct an issue in
release_referenced() in the code snippet example.

Cc: oleg@redhat.com
Cc: jannh@google.com
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
[ paulmck: Do a bit of wordsmithing. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 Documentation/RCU/rcuref.txt | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt
index 613033ff2b9b..5e6429d66c24 100644
--- a/Documentation/RCU/rcuref.txt
+++ b/Documentation/RCU/rcuref.txt
@@ -12,6 +12,7 @@ please read on.
 Reference counting on elements of lists which are protected by traditional
 reader/writer spinlocks or semaphores are straightforward:
 
+CODE LISTING A:
 1.				2.
 add()				search_and_reference()
 {				{
@@ -28,7 +29,8 @@ add()				search_and_reference()
 release_referenced()			delete()
 {					{
     ...					    write_lock(&list_lock);
-    atomic_dec(&el->rc, relfunc)	    ...
+    if(atomic_dec_and_test(&el->rc))	    ...
+	kfree(el);
     ...					    remove_element
 }					    write_unlock(&list_lock);
  					    ...
@@ -44,6 +46,7 @@ search_and_reference() could potentially hold reference to an element which
 has already been deleted from the list/array.  Use atomic_inc_not_zero()
 in this scenario as follows:
 
+CODE LISTING B:
 1.					2.
 add()					search_and_reference()
 {					{
@@ -79,6 +82,7 @@ search_and_reference() code path.  In such cases, the
 atomic_dec_and_test() may be moved from delete() to el_free()
 as follows:
 
+CODE LISTING C:
 1.					2.
 add()					search_and_reference()
 {					{
@@ -114,6 +118,17 @@ element can therefore safely be freed.  This in turn guarantees that if
 any reader finds the element, that reader may safely acquire a reference
 without checking the value of the reference counter.
 
+A clear advantage of the RCU-based pattern in listing C over the one
+in listing B is that any call to search_and_reference() that locates
+a given object will succeed in obtaining a reference to that object,
+even given a concurrent invocation of delete() for that same object.
+Similarly, a clear advantage of both listings B and C over listing A is
+that a call to delete() is not delayed even if there are an arbitrarily
+large number of calls to search_and_reference() searching for the same
+object that delete() was invoked on.  Instead, all that is delayed is
+the eventual invocation of kfree(), which is usually not a problem on
+modern computer systems, even the small ones.
+
 In cases where delete() can sleep, synchronize_rcu() can be called from
 delete(), so that el_free() can be subsumed into delete as follows:
 
@@ -130,3 +145,7 @@ delete()
     	kfree(el);
     ...
 }
+
+As additional examples in the kernel, the pattern in listing C is used by
+reference counting of struct pid, while the pattern in listing B is used by
+struct posix_acl.
-- 
cgit 


From 588759a39145e18dd808341861d45d44383778b5 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Date: Sun, 14 Apr 2019 11:11:03 +0800
Subject: doc: Fixup definition of rcupdate.rcu_task_stall_timeout

A positive value of rcupdate.rcu_task_stall_timeout is an interval
in seconds rather than jiffies.

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 Documentation/RCU/stallwarn.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 1ab70c37921f..13e88fc00f01 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -153,7 +153,7 @@ rcupdate.rcu_task_stall_timeout
 	This boot/sysfs parameter controls the RCU-tasks stall warning
 	interval.  A value of zero or less suppresses RCU-tasks stall
 	warnings.  A positive value sets the stall-warning interval
-	in jiffies.  An RCU-tasks stall warning starts with the line:
+	in seconds.  An RCU-tasks stall warning starts with the line:
 
 		INFO: rcu_tasks detected stalls on tasks:
 
-- 
cgit 


From 714b6904e23e1c37f262a4cd02b34d0f1863e227 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 11 Apr 2019 07:12:18 -0700
Subject: doc: Remove ".vnet" from paulmck email addresses

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 Documentation/core-api/circular-buffers.rst          | 2 +-
 Documentation/memory-barriers.txt                    | 2 +-
 Documentation/translations/ko_KR/memory-barriers.txt | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/core-api/circular-buffers.rst b/Documentation/core-api/circular-buffers.rst
index 53e51caa3347..50966f66e398 100644
--- a/Documentation/core-api/circular-buffers.rst
+++ b/Documentation/core-api/circular-buffers.rst
@@ -3,7 +3,7 @@ Circular Buffers
 ================
 
 :Author: David Howells <dhowells@redhat.com>
-:Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+:Author: Paul E. McKenney <paulmck@linux.ibm.com>
 
 
 Linux provides a number of features that can be used to implement circular
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f70ebcdfe592..e4e07c8ab89e 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -3,7 +3,7 @@
 			 ============================
 
 By: David Howells <dhowells@redhat.com>
-    Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+    Paul E. McKenney <paulmck@linux.ibm.com>
     Will Deacon <will.deacon@arm.com>
     Peter Zijlstra <peterz@infradead.org>
 
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index db0b9d8619f1..5f3c74dcad43 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -24,7 +24,7 @@ Documentation/memory-barriers.txt
 			 =========================
 
 저자: David Howells <dhowells@redhat.com>
-      Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+      Paul E. McKenney <paulmck@linux.ibm.com>
       Will Deacon <will.deacon@arm.com>
       Peter Zijlstra <peterz@infradead.org>
 
-- 
cgit 


From a82c3149ad8b4a84f9737a633250815d5cf0cc5e Mon Sep 17 00:00:00 2001
From: Steve Longerbeam <slongerbeam@gmail.com>
Date: Fri, 3 May 2019 19:45:07 -0400
Subject: media: docs-rst: Clarify older field vs. first transmitted field

Add a paragraph to make it more clear that video equipment will transmit
fields in the same order the fields were captured, and replace some of
the "is transmitted first" language with "is the older field", since the
latter is the important info for motion compensation applications.

Signed-off-by: Steve Longerbeam <slongerbeam@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/field-order.rst | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/v4l/field-order.rst b/Documentation/media/uapi/v4l/field-order.rst
index d640e922a974..c422bebe4314 100644
--- a/Documentation/media/uapi/v4l/field-order.rst
+++ b/Documentation/media/uapi/v4l/field-order.rst
@@ -51,6 +51,11 @@ determined by the video standard. Hence the distinction between temporal
 and spatial order of fields. The diagrams below should make this
 clearer.
 
+In V4L it is assumed that all video cameras transmit fields on the media
+bus in the same order they were captured, so if the top field was
+captured first (is the older field), the top field is also transmitted
+first on the bus.
+
 All video capture and output devices must report the current field
 order. Some drivers may permit the selection of a different order, to
 this end applications initialize the ``field`` field of struct
@@ -101,10 +106,10 @@ enum v4l2_field
     * - ``V4L2_FIELD_INTERLACED``
       - 4
       - Images contain both fields, interleaved line by line. The temporal
-	order of the fields (whether the top or bottom field is first
-	transmitted) depends on the current video standard. M/NTSC
-	transmits the bottom field first, all other standards the top
-	field first.
+	order of the fields (whether the top or bottom field is older)
+	depends on the current video standard. In M/NTSC the bottom
+	field is the older field. In all other standards the top field
+	is the older field.
     * - ``V4L2_FIELD_SEQ_TB``
       - 5
       - Images contain both fields, the top field lines are stored first
@@ -135,11 +140,11 @@ enum v4l2_field
     * - ``V4L2_FIELD_INTERLACED_TB``
       - 8
       - Images contain both fields, interleaved line by line, top field
-	first. The top field is transmitted first.
+	first. The top field is the older field.
     * - ``V4L2_FIELD_INTERLACED_BT``
       - 9
       - Images contain both fields, interleaved line by line, top field
-	first. The bottom field is transmitted first.
+	first. The bottom field is the older field.
 
 
-- 
cgit 


From 5e27a314a11f7fa53795282eea59a024fd3020ba Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 27 May 2019 14:17:10 +0200
Subject: dt-bindings: interrupt-controller: Add Renesas RZ/A1 Interrupt
 Controller

Add DT bindings for the Renesas RZ/A1 Interrupt Controller.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 .../interrupt-controller/renesas,rza1-irqc.txt     | 43 ++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/interrupt-controller/renesas,rza1-irqc.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,rza1-irqc.txt b/Documentation/devicetree/bindings/interrupt-controller/renesas,rza1-irqc.txt
new file mode 100644
index 000000000000..727b7e4cd6e0
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,rza1-irqc.txt
@@ -0,0 +1,43 @@
+DT bindings for the Renesas RZ/A1 Interrupt Controller
+
+The RZ/A1 Interrupt Controller is a front-end for the GIC found on Renesas
+RZ/A1 and RZ/A2 SoCs:
+  - IRQ sense select for 8 external interrupts, 1:1-mapped to 8 GIC SPI
+    interrupts,
+  - NMI edge select.
+
+Required properties:
+  - compatible: Must be "renesas,<soctype>-irqc", and "renesas,rza1-irqc" as
+		fallback.
+		Examples with soctypes are:
+		  - "renesas,r7s72100-irqc" (RZ/A1H)
+		  - "renesas,r7s9210-irqc" (RZ/A2M)
+  - #interrupt-cells: Must be 2 (an interrupt index and flags, as defined
+				 in interrupts.txt in this directory)
+  - #address-cells: Must be zero
+  - interrupt-controller: Marks the device as an interrupt controller
+  - reg: Base address and length of the memory resource used by the interrupt
+         controller
+  - interrupt-map: Specifies the mapping from external interrupts to GIC
+		   interrupts
+  - interrupt-map-mask: Must be <7 0>
+
+Example:
+
+	irqc: interrupt-controller@fcfef800 {
+		compatible = "renesas,r7s72100-irqc", "renesas,rza1-irqc";
+		#interrupt-cells = <2>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0xfcfef800 0x6>;
+		interrupt-map =
+			<0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+			<1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+			<2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+			<3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+			<4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+			<5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+			<6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+			<7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-map-mask = <7 0>;
+	};
-- 
cgit 


From 5902bca94ae05316ec7feab9b84cb07ffa5c1175 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 24 Apr 2019 06:43:47 -0400
Subject: media: v4l2-ctrl: add MPEG-2 profile and level controls

Add MPEG-2 CID definitions for profiles and levels defined in ITU-T Rec.
H.262.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/ext-ctrls-codec.rst | 56 ++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
index 4a8446203085..843c93e8e7bc 100644
--- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
+++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
@@ -759,6 +759,32 @@ enum v4l2_mpeg_video_h264_level -
 
 
+.. _v4l2-mpeg-video-mpeg2-level:
+
+``V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL``
+    (enum)
+
+enum v4l2_mpeg_video_mpeg2_level -
+    The level information for the MPEG2 elementary stream. Applicable to
+    MPEG2 codecs. Possible values are:
+
+
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    * - ``V4L2_MPEG_VIDEO_MPEG2_LEVEL_LOW``
+      - Low Level (LL)
+    * - ``V4L2_MPEG_VIDEO_MPEG2_LEVEL_MAIN``
+      - Main Level (ML)
+    * - ``V4L2_MPEG_VIDEO_MPEG2_LEVEL_HIGH_1440``
+      - High-1440 Level (H-14)
+    * - ``V4L2_MPEG_VIDEO_MPEG2_LEVEL_HIGH``
+      - High Level (HL)
+
+
+
 .. _v4l2-mpeg-video-mpeg4-level:
 
 ``V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL``
@@ -845,6 +871,36 @@ enum v4l2_mpeg_video_h264_profile -
 
 
+.. _v4l2-mpeg-video-mpeg2-profile:
+
+``V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE``
+    (enum)
+
+enum v4l2_mpeg_video_mpeg2_profile -
+    The profile information for MPEG2. Applicable to MPEG2 codecs.
+    Possible values are:
+
+
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    * - ``V4L2_MPEG_VIDEO_MPEG2_PROFILE_SIMPLE``
+      - Simple profile (SP)
+    * - ``V4L2_MPEG_VIDEO_MPEG2_PROFILE_MAIN``
+      - Main profile (MP)
+    * - ``V4L2_MPEG_VIDEO_MPEG2_PROFILE_SNR_SCALABLE``
+      - SNR Scalable profile (SNR)
+    * - ``V4L2_MPEG_VIDEO_MPEG2_PROFILE_SPATIALLY_SCALABLE``
+      - Spatially Scalable profile (Spt)
+    * - ``V4L2_MPEG_VIDEO_MPEG2_PROFILE_HIGH``
+      - High profile (HP)
+    * - ``V4L2_MPEG_VIDEO_MPEG2_PROFILE_MULTIVIEW``
+      - Multi-view profile (MVP)
+
+
+
 .. _v4l2-mpeg-video-mpeg4-profile:
 
 ``V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE``
-- 
cgit 


From 1f0545d3ed1df3a915546cee60b56f855962ed69 Mon Sep 17 00:00:00 2001
From: Pawel Osciak <posciak@chromium.org>
Date: Fri, 24 May 2019 05:20:28 -0400
Subject: media: uapi: Add H264 low-level decoder API compound controls.

Stateless video codecs will require both the H264 metadata and slices in
order to be able to decode frames.

This introduces the definitions for the structures used to pass the
metadata from the userspace to the kernel.

[hverkuil-cisco@xs4all.nl: add space after . in ".For"]
[hverkuil-cisco@xs4all.nl: sync v4l2_ctrl_h264_decode_params struct layout with header]

Co-developed-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Reviewed-by: Tomasz Figa <tfiga@chromium.org>
Signed-off-by: Pawel Osciak <posciak@chromium.org>
Signed-off-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/biblio.rst           |   9 +
 Documentation/media/uapi/v4l/ext-ctrls-codec.rst  | 569 ++++++++++++++++++++++
 Documentation/media/uapi/v4l/vidioc-queryctrl.rst |  30 ++
 Documentation/media/videodev2.h.rst.exceptions    |   5 +
 4 files changed, 613 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/v4l/biblio.rst b/Documentation/media/uapi/v4l/biblio.rst
index ec33768c055e..8f4eb8823d82 100644
--- a/Documentation/media/uapi/v4l/biblio.rst
+++ b/Documentation/media/uapi/v4l/biblio.rst
@@ -122,6 +122,15 @@ ITU BT.1119
 
 :author:    International Telecommunication Union (http://www.itu.ch)
 
+.. _h264:
+
+ITU-T Rec. H.264 Specification (04/2017 Edition)
+================================================
+
+:title:     ITU-T Recommendation H.264 "Advanced Video Coding for Generic Audiovisual Services"
+
+:author:    International Telecommunication Union (http://www.itu.ch)
+
 .. _jfif:
 
 JFIF
diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
index 843c93e8e7bc..d6ea2ffd65c5 100644
--- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
+++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
@@ -1451,6 +1451,575 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
       - Layer number
 
 
+.. _v4l2-mpeg-h264:
+
+``V4L2_CID_MPEG_VIDEO_H264_SPS (struct)``
+    Specifies the sequence parameter set (as extracted from the
+    bitstream) for the associated H264 slice data. This includes the
+    necessary parameters for configuring a stateless hardware decoding
+    pipeline for H264. The bitstream parameters are defined according
+    to :ref:`h264`, section 7.4.2.1.1 "Sequence Parameter Set Data
+    Semantics". For further documentation, refer to the above
+    specification, unless there is an explicit comment stating
+    otherwise.
+
+    .. note::
+
+       This compound control is not yet part of the public kernel API and
+       it is expected to change.
+
+.. c:type:: v4l2_ctrl_h264_sps
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_ctrl_h264_sps
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u8
+      - ``profile_idc``
+      -
+    * - __u8
+      - ``constraint_set_flags``
+      - See :ref:`Sequence Parameter Set Constraints Set Flags <h264_sps_constraints_set_flags>`
+    * - __u8
+      - ``level_idc``
+      -
+    * - __u8
+      - ``seq_parameter_set_id``
+      -
+    * - __u8
+      - ``chroma_format_idc``
+      -
+    * - __u8
+      - ``bit_depth_luma_minus8``
+      -
+    * - __u8
+      - ``bit_depth_chroma_minus8``
+      -
+    * - __u8
+      - ``log2_max_frame_num_minus4``
+      -
+    * - __u8
+      - ``pic_order_cnt_type``
+      -
+    * - __u8
+      - ``log2_max_pic_order_cnt_lsb_minus4``
+      -
+    * - __u8
+      - ``max_num_ref_frames``
+      -
+    * - __u8
+      - ``num_ref_frames_in_pic_order_cnt_cycle``
+      -
+    * - __s32
+      - ``offset_for_ref_frame[255]``
+      -
+    * - __s32
+      - ``offset_for_non_ref_pic``
+      -
+    * - __s32
+      - ``offset_for_top_to_bottom_field``
+      -
+    * - __u16
+      - ``pic_width_in_mbs_minus1``
+      -
+    * - __u16
+      - ``pic_height_in_map_units_minus1``
+      -
+    * - __u32
+      - ``flags``
+      - See :ref:`Sequence Parameter Set Flags <h264_sps_flags>`
+
+.. _h264_sps_constraints_set_flags:
+
+``Sequence Parameter Set Constraints Set Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_H264_SPS_CONSTRAINT_SET0_FLAG``
+      - 0x00000001
+      -
+    * - ``V4L2_H264_SPS_CONSTRAINT_SET1_FLAG``
+      - 0x00000002
+      -
+    * - ``V4L2_H264_SPS_CONSTRAINT_SET2_FLAG``
+      - 0x00000004
+      -
+    * - ``V4L2_H264_SPS_CONSTRAINT_SET3_FLAG``
+      - 0x00000008
+      -
+    * - ``V4L2_H264_SPS_CONSTRAINT_SET4_FLAG``
+      - 0x00000010
+      -
+    * - ``V4L2_H264_SPS_CONSTRAINT_SET5_FLAG``
+      - 0x00000020
+      -
+
+.. _h264_sps_flags:
+
+``Sequence Parameter Set Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE``
+      - 0x00000001
+      -
+    * - ``V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS``
+      - 0x00000002
+      -
+    * - ``V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO``
+      - 0x00000004
+      -
+    * - ``V4L2_H264_SPS_FLAG_GAPS_IN_FRAME_NUM_VALUE_ALLOWED``
+      - 0x00000008
+      -
+    * - ``V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY``
+      - 0x00000010
+      -
+    * - ``V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD``
+      - 0x00000020
+      -
+    * - ``V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE``
+      - 0x00000040
+      -
+
+``V4L2_CID_MPEG_VIDEO_H264_PPS (struct)``
+    Specifies the picture parameter set (as extracted from the
+    bitstream) for the associated H264 slice data. This includes the
+    necessary parameters for configuring a stateless hardware decoding
+    pipeline for H264.  The bitstream parameters are defined according
+    to :ref:`h264`, section 7.4.2.2 "Picture Parameter Set RBSP
+    Semantics". For further documentation, refer to the above
+    specification, unless there is an explicit comment stating
+    otherwise.
+
+    .. note::
+
+       This compound control is not yet part of the public kernel API and
+       it is expected to change.
+
+.. c:type:: v4l2_ctrl_h264_pps
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_ctrl_h264_pps
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u8
+      - ``pic_parameter_set_id``
+      -
+    * - __u8
+      - ``seq_parameter_set_id``
+      -
+    * - __u8
+      - ``num_slice_groups_minus1``
+      -
+    * - __u8
+      - ``num_ref_idx_l0_default_active_minus1``
+      -
+    * - __u8
+      - ``num_ref_idx_l1_default_active_minus1``
+      -
+    * - __u8
+      - ``weighted_bipred_idc``
+      -
+    * - __s8
+      - ``pic_init_qp_minus26``
+      -
+    * - __s8
+      - ``pic_init_qs_minus26``
+      -
+    * - __s8
+      - ``chroma_qp_index_offset``
+      -
+    * - __s8
+      - ``second_chroma_qp_index_offset``
+      -
+    * - __u16
+      - ``flags``
+      - See :ref:`Picture Parameter Set Flags <h264_pps_flags>`
+
+.. _h264_pps_flags:
+
+``Picture Parameter Set Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE``
+      - 0x00000001
+      -
+    * - ``V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT``
+      - 0x00000002
+      -
+    * - ``V4L2_H264_PPS_FLAG_WEIGHTED_PRED``
+      - 0x00000004
+      -
+    * - ``V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT``
+      - 0x00000008
+      -
+    * - ``V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED``
+      - 0x00000010
+      -
+    * - ``V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT``
+      - 0x00000020
+      -
+    * - ``V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE``
+      - 0x00000040
+      -
+    * - ``V4L2_H264_PPS_FLAG_PIC_SCALING_MATRIX_PRESENT``
+      - 0x00000080
+      -
+
+``V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX (struct)``
+    Specifies the scaling matrix (as extracted from the bitstream) for
+    the associated H264 slice data. The bitstream parameters are
+    defined according to :ref:`h264`, section 7.4.2.1.1.1 "Scaling
+    List Semantics". For further documentation, refer to the above
+    specification, unless there is an explicit comment stating
+    otherwise.
+
+    .. note::
+
+       This compound control is not yet part of the public kernel API and
+       it is expected to change.
+
+.. c:type:: v4l2_ctrl_h264_scaling_matrix
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_ctrl_h264_scaling_matrix
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u8
+      - ``scaling_list_4x4[6][16]``
+      -
+    * - __u8
+      - ``scaling_list_8x8[6][64]``
+      -
+
+``V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS (struct)``
+    Specifies the slice parameters (as extracted from the bitstream)
+    for the associated H264 slice data. This includes the necessary
+    parameters for configuring a stateless hardware decoding pipeline
+    for H264.  The bitstream parameters are defined according to
+    :ref:`h264`, section 7.4.3 "Slice Header Semantics". For further
+    documentation, refer to the above specification, unless there is
+    an explicit comment stating otherwise.
+
+    .. note::
+
+       This compound control is not yet part of the public kernel API
+       and it is expected to change.
+
+       This structure is expected to be passed as an array, with one
+       entry for each slice included in the bitstream buffer.
+
+.. c:type:: v4l2_ctrl_h264_slice_params
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_ctrl_h264_slice_params
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u32
+      - ``size``
+      -
+    * - __u32
+      - ``header_bit_size``
+      -
+    * - __u16
+      - ``first_mb_in_slice``
+      -
+    * - __u8
+      - ``slice_type``
+      -
+    * - __u8
+      - ``pic_parameter_set_id``
+      -
+    * - __u8
+      - ``colour_plane_id``
+      -
+    * - __u8
+      - ``redundant_pic_cnt``
+      -
+    * - __u16
+      - ``frame_num``
+      -
+    * - __u16
+      - ``idr_pic_id``
+      -
+    * - __u16
+      - ``pic_order_cnt_lsb``
+      -
+    * - __s32
+      - ``delta_pic_order_cnt_bottom``
+      -
+    * - __s32
+      - ``delta_pic_order_cnt0``
+      -
+    * - __s32
+      - ``delta_pic_order_cnt1``
+      -
+    * - struct :c:type:`v4l2_h264_pred_weight_table`
+      - ``pred_weight_table``
+      -
+    * - __u32
+      - ``dec_ref_pic_marking_bit_size``
+      -
+    * - __u32
+      - ``pic_order_cnt_bit_size``
+      -
+    * - __u8
+      - ``cabac_init_idc``
+      -
+    * - __s8
+      - ``slice_qp_delta``
+      -
+    * - __s8
+      - ``slice_qs_delta``
+      -
+    * - __u8
+      - ``disable_deblocking_filter_idc``
+      -
+    * - __s8
+      - ``slice_alpha_c0_offset_div2``
+      -
+    * - __s8
+      - ``slice_beta_offset_div2``
+      -
+    * - __u8
+      - ``num_ref_idx_l0_active_minus1``
+      -
+    * - __u8
+      - ``num_ref_idx_l1_active_minus1``
+      -
+    * - __u32
+      - ``slice_group_change_cycle``
+      -
+    * - __u8
+      - ``ref_pic_list0[32]``
+      - Reference picture list after applying the per-slice modifications
+    * - __u8
+      - ``ref_pic_list1[32]``
+      - Reference picture list after applying the per-slice modifications
+    * - __u32
+      - ``flags``
+      - See :ref:`Slice Parameter Flags <h264_slice_flags>`
+
+.. _h264_slice_flags:
+
+``Slice Parameter Set Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_H264_SLICE_FLAG_FIELD_PIC``
+      - 0x00000001
+      -
+    * - ``V4L2_H264_SLICE_FLAG_BOTTOM_FIELD``
+      - 0x00000002
+      -
+    * - ``V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED``
+      - 0x00000004
+      -
+    * - ``V4L2_H264_SLICE_FLAG_SP_FOR_SWITCH``
+      - 0x00000008
+      -
+
+``Prediction Weight Table``
+
+    The bitstream parameters are defined according to :ref:`h264`,
+    section 7.4.3.2 "Prediction Weight Table Semantics". For further
+    documentation, refer to the above specification, unless there is
+    an explicit comment stating otherwise.
+
+.. c:type:: v4l2_h264_pred_weight_table
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_h264_pred_weight_table
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u16
+      - ``luma_log2_weight_denom``
+      -
+    * - __u16
+      - ``chroma_log2_weight_denom``
+      -
+    * - struct :c:type:`v4l2_h264_weight_factors`
+      - ``weight_factors[2]``
+      - The weight factors at index 0 are the weight factors for the reference
+        list 0, the one at index 1 for the reference list 1.
+
+.. c:type:: v4l2_h264_weight_factors
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_h264_weight_factors
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __s16
+      - ``luma_weight[32]``
+      -
+    * - __s16
+      - ``luma_offset[32]``
+      -
+    * - __s16
+      - ``chroma_weight[32][2]``
+      -
+    * - __s16
+      - ``chroma_offset[32][2]``
+      -
+
+``V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS (struct)``
+    Specifies the decode parameters (as extracted from the bitstream)
+    for the associated H264 slice data. This includes the necessary
+    parameters for configuring a stateless hardware decoding pipeline
+    for H264. The bitstream parameters are defined according to
+    :ref:`h264`. For further documentation, refer to the above
+    specification, unless there is an explicit comment stating
+    otherwise.
+
+    .. note::
+
+       This compound control is not yet part of the public kernel API and
+       it is expected to change.
+
+.. c:type:: v4l2_ctrl_h264_decode_params
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_ctrl_h264_decode_params
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - struct :c:type:`v4l2_h264_dpb_entry`
+      - ``dpb[16]``
+      -
+    * - __u16
+      - ``num_slices``
+      - Number of slices needed to decode the current frame
+    * - __u16
+      - ``nal_ref_idc``
+      - NAL reference ID value coming from the NAL Unit header
+    * - __u8
+      - ``ref_pic_list_p0[32]``
+      - Backward reference list used by P-frames in the original bitstream order
+    * - __u8
+      - ``ref_pic_list_b0[32]``
+      - Backward reference list used by B-frames in the original bitstream order
+    * - __u8
+      - ``ref_pic_list_b1[32]``
+      - Forward reference list used by B-frames in the original bitstream order
+    * - __s32
+      - ``top_field_order_cnt``
+      - Picture Order Count for the coded top field
+    * - __s32
+      - ``bottom_field_order_cnt``
+      - Picture Order Count for the coded bottom field
+    * - __u32
+      - ``flags``
+      - See :ref:`Decode Parameters Flags <h264_decode_params_flags>`
+
+.. _h264_decode_params_flags:
+
+``Decode Parameters Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC``
+      - 0x00000001
+      - That picture is an IDR picture
+
+.. c:type:: v4l2_h264_dpb_entry
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_h264_dpb_entry
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u64
+      - ``reference_ts``
+      - Timestamp of the V4L2 capture buffer to use as reference, used
+        with B-coded and P-coded frames. The timestamp refers to the
+	``timestamp`` field in struct :c:type:`v4l2_buffer`. Use the
+	:c:func:`v4l2_timeval_to_ns()` function to convert the struct
+	:c:type:`timeval` in struct :c:type:`v4l2_buffer` to a __u64.
+    * - __u16
+      - ``frame_num``
+      -
+    * - __u16
+      - ``pic_num``
+      -
+    * - __s32
+      - ``top_field_order_cnt``
+      -
+    * - __s32
+      - ``bottom_field_order_cnt``
+      -
+    * - __u32
+      - ``flags``
+      - See :ref:`DPB Entry Flags <h264_dpb_flags>`
+
+.. _h264_dpb_flags:
+
+``DPB Entries Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_H264_DPB_ENTRY_FLAG_VALID``
+      - 0x00000001
+      - The DPB entry is valid and should be considered
+    * - ``V4L2_H264_DPB_ENTRY_FLAG_ACTIVE``
+      - 0x00000002
+      - The DPB entry is currently being used as a reference frame
+    * - ``V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM``
+      - 0x00000004
+      - The DPB entry is a long term reference frame
 
 .. _v4l2-mpeg-mpeg2:
 
diff --git a/Documentation/media/uapi/v4l/vidioc-queryctrl.rst b/Documentation/media/uapi/v4l/vidioc-queryctrl.rst
index f824162d0ea9..dc500632095d 100644
--- a/Documentation/media/uapi/v4l/vidioc-queryctrl.rst
+++ b/Documentation/media/uapi/v4l/vidioc-queryctrl.rst
@@ -443,6 +443,36 @@ See also the examples in :ref:`control`.
       - n/a
       - A struct :c:type:`v4l2_ctrl_mpeg2_quantization`, containing MPEG-2
 	quantization matrices for stateless video decoders.
+    * - ``V4L2_CTRL_TYPE_H264_SPS``
+      - n/a
+      - n/a
+      - n/a
+      - A struct :c:type:`v4l2_ctrl_h264_sps`, containing H264
+	sequence parameters for stateless video decoders.
+    * - ``V4L2_CTRL_TYPE_H264_PPS``
+      - n/a
+      - n/a
+      - n/a
+      - A struct :c:type:`v4l2_ctrl_h264_pps`, containing H264
+	picture parameters for stateless video decoders.
+    * - ``V4L2_CTRL_TYPE_H264_SCALING_MATRIX``
+      - n/a
+      - n/a
+      - n/a
+      - A struct :c:type:`v4l2_ctrl_h264_scaling_matrix`, containing H264
+	scaling matrices for stateless video decoders.
+    * - ``V4L2_CTRL_TYPE_H264_SLICE_PARAMS``
+      - n/a
+      - n/a
+      - n/a
+      - A struct :c:type:`v4l2_ctrl_h264_slice_params`, containing H264
+	slice parameters for stateless video decoders.
+    * - ``V4L2_CTRL_TYPE_H264_DECODE_PARAMS``
+      - n/a
+      - n/a
+      - n/a
+      - A struct :c:type:`v4l2_ctrl_h264_decode_params`, containing H264
+	decode parameters for stateless video decoders.
 
 .. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
 
diff --git a/Documentation/media/videodev2.h.rst.exceptions b/Documentation/media/videodev2.h.rst.exceptions
index 64d348e67df9..55cbe324b9fc 100644
--- a/Documentation/media/videodev2.h.rst.exceptions
+++ b/Documentation/media/videodev2.h.rst.exceptions
@@ -136,6 +136,11 @@ replace symbol V4L2_CTRL_TYPE_U32 :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_U8 :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_MPEG2_QUANTIZATION :c:type:`v4l2_ctrl_type`
+replace symbol V4L2_CTRL_TYPE_H264_SPS :c:type:`v4l2_ctrl_type`
+replace symbol V4L2_CTRL_TYPE_H264_PPS :c:type:`v4l2_ctrl_type`
+replace symbol V4L2_CTRL_TYPE_H264_SCALING_MATRIX :c:type:`v4l2_ctrl_type`
+replace symbol V4L2_CTRL_TYPE_H264_SLICE_PARAMS :c:type:`v4l2_ctrl_type`
+replace symbol V4L2_CTRL_TYPE_H264_DECODE_PARAMS :c:type:`v4l2_ctrl_type`
 
 # V4L2 capability defines
 replace define V4L2_CAP_VIDEO_CAPTURE device-capabilities
-- 
cgit 


From 67e84a98af6505d78be9e1a383fadd8de2f9fed0 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Fri, 24 May 2019 05:20:30 -0400
Subject: media: pixfmt: Add H264_SLICE_RAW format documentation

The H264_SLICE_RAW format introduced before is meant for stateless
decoders that will need the H264 parsed slice data without the start code.

Let's document it.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/pixfmt-compressed.rst | 25 ++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/v4l/pixfmt-compressed.rst b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
index 6c961cfb74da..4b701fc7653e 100644
--- a/Documentation/media/uapi/v4l/pixfmt-compressed.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
@@ -52,6 +52,31 @@ Compressed Formats
       - ``V4L2_PIX_FMT_H264_MVC``
       - 'M264'
       - H264 MVC video elementary stream.
+    * .. _V4L2-PIX-FMT-H264-SLICE-RAW:
+
+      - ``V4L2_PIX_FMT_H264_SLICE_RAW``
+      - 'S264'
+      - H264 parsed slice data, without the start code and as
+	extracted from the H264 bitstream.  This format is adapted for
+	stateless video decoders that implement an H264 pipeline
+	(using the :ref:`mem2mem` and :ref:`media-request-api`).
+	Metadata associated with the frame to decode are required to
+	be passed through the ``V4L2_CID_MPEG_VIDEO_H264_SPS``,
+	``V4L2_CID_MPEG_VIDEO_H264_PPS``,
+	``V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX``,
+	``V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS`` and
+	``V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS`` controls.  See the
+	:ref:`associated Codec Control IDs <v4l2-mpeg-h264>`.  Exactly
+	one output and one capture buffer must be provided for use
+	with this pixel format. The output buffer must contain the
+	appropriate number of macroblocks to decode a full
+	corresponding frame to the matching capture buffer.
+
+	.. note::
+
+	   This format is not yet part of the public kernel API and it
+	   is expected to change.
+
     * .. _V4L2-PIX-FMT-H263:
 
       - ``V4L2_PIX_FMT_H263``
-- 
cgit 


From 156fa8845a5715199d1bb56f8bde4b403de33946 Mon Sep 17 00:00:00 2001
From: Michael Tretter <m.tretter@pengutronix.de>
Date: Tue, 28 May 2019 13:11:17 -0400
Subject: media: dt-bindings: media: document allegro-dvt bindings

Add device-tree bindings for the Allegro DVT video IP core found on the
Xilinx ZynqMP EV family.

Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../devicetree/bindings/media/allegro.txt          | 43 ++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/media/allegro.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/media/allegro.txt b/Documentation/devicetree/bindings/media/allegro.txt
new file mode 100644
index 000000000000..a92e2fbf26c9
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/allegro.txt
@@ -0,0 +1,43 @@
+Device-tree bindings for the Allegro DVT video IP codecs present in the Xilinx
+ZynqMP SoC. The IP core may either be a H.264/H.265 encoder or H.264/H.265
+decoder ip core.
+
+Each actual codec engines is controlled by a microcontroller (MCU). Host
+software uses a provided mailbox interface to communicate with the MCU. The
+MCU share an interrupt.
+
+Required properties:
+  - compatible: value should be one of the following
+    "allegro,al5e-1.1", "allegro,al5e": encoder IP core
+    "allegro,al5d-1.1", "allegro,al5d": decoder IP core
+  - reg: base and length of the memory mapped register region and base and
+    length of the memory mapped sram
+  - reg-names: must include "regs" and "sram"
+  - interrupts: shared interrupt from the MCUs to the processing system
+  - clocks: must contain an entry for each entry in clock-names
+  - clock-names: must include "core_clk", "mcu_clk", "m_axi_core_aclk",
+    "m_axi_mcu_aclk", "s_axi_lite_aclk"
+
+Example:
+	al5e: video-codec@a0009000 {
+		compatible = "allegro,al5e-1.1", "allegro,al5e";
+		reg = <0 0xa0009000 0 0x1000>,
+		      <0 0xa0000000 0 0x8000>;
+		reg-names = "regs", "sram";
+		interrupts = <0 96 4>;
+		clocks = <&xlnx_vcu 0>, <&xlnx_vcu 1>,
+			 <&clkc 71>, <&clkc 71>, <&clkc 71>;
+		clock-names = "core_clk", "mcu_clk", "m_axi_core_aclk",
+			      "m_axi_mcu_aclk", "s_axi_lite_aclk"
+	};
+	al5d: video-codec@a0029000 {
+		compatible = "allegro,al5d-1.1", "allegro,al5d";
+		reg = <0 0xa0029000 0 0x1000>,
+		      <0 0xa0020000 0 0x8000>;
+		reg-names = "regs", "sram";
+		interrupts = <0 96 4>;
+		clocks = <&xlnx_vcu 2>, <&xlnx_vcu 3>,
+			 <&clkc 71>, <&clkc 71>, <&clkc 71>;
+		clock-names = "core_clk", "mcu_clk", "m_axi_core_aclk",
+			      "m_axi_mcu_aclk", "s_axi_lite_aclk"
+	};
-- 
cgit 


From 8df39e16877ffa7a055a2c35c86df69c37ad73a9 Mon Sep 17 00:00:00 2001
From: Michael Tretter <m.tretter@pengutronix.de>
Date: Tue, 28 May 2019 13:11:18 -0400
Subject: media: dt-bindings: media: Add vendor prefix for allegro

Add vendor prefix for Allegro DVT, a provider of H.264/AVC, H.265/HEVC,
AVS2, VP9 and AV1 compliance test suites and H.264/AVC, H.265/HEVC, and
VP9 encoder, codec and decoder hardware (RTL) IPs.

Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 33a65a45e319..7da11464991a 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -49,6 +49,8 @@ patternProperties:
     description: Aeroflex Gaisler AB
   "^al,.*":
     description: Annapurna Labs
+  "^allegro,.*"
+    description: Allegro DVT
   "^allo,.*":
     description: Allo.com
   "^allwinner,.*":
-- 
cgit 


From 97a1aa00c178589a62b973848cfb40132793a1ec Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Thu, 6 Jun 2019 15:13:10 +0300
Subject: docs/habanalabs: update text for some entries in sysfs

This patch updates the description of some entries in sysfs for the
habanalabs driver.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 Documentation/ABI/testing/sysfs-driver-habanalabs | 42 +++++++++++++----------
 1 file changed, 24 insertions(+), 18 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs
index 78b2bcf316a3..f433fc6db3c6 100644
--- a/Documentation/ABI/testing/sysfs-driver-habanalabs
+++ b/Documentation/ABI/testing/sysfs-driver-habanalabs
@@ -62,18 +62,20 @@ What:           /sys/class/habanalabs/hl<n>/ic_clk
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        oded.gabbay@gmail.com
-Description:    Allows the user to set the maximum clock frequency of the
-                Interconnect fabric. Writes to this parameter affect the device
-                only when the power management profile is set to "manual" mode.
-                The device IC clock might be set to lower value then the
+Description:    Allows the user to set the maximum clock frequency, in Hz, of
+                the Interconnect fabric. Writes to this parameter affect the
+                device only when the power management profile is set to "manual"
+                mode. The device IC clock might be set to lower value than the
                 maximum. The user should read the ic_clk_curr to see the actual
-                frequency value of the IC
+                frequency value of the IC. This property is valid only for the
+                Goya ASIC family
 
 What:           /sys/class/habanalabs/hl<n>/ic_clk_curr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        oded.gabbay@gmail.com
-Description:    Displays the current clock frequency of the Interconnect fabric
+Description:    Displays the current clock frequency, in Hz, of the Interconnect
+                fabric. This property is valid only for the Goya ASIC family
 
 What:           /sys/class/habanalabs/hl<n>/infineon_ver
 Date:           Jan 2019
@@ -92,18 +94,20 @@ What:           /sys/class/habanalabs/hl<n>/mme_clk
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        oded.gabbay@gmail.com
-Description:    Allows the user to set the maximum clock frequency of the
-                MME compute engine. Writes to this parameter affect the device
-                only when the power management profile is set to "manual" mode.
-                The device MME clock might be set to lower value then the
+Description:    Allows the user to set the maximum clock frequency, in Hz, of
+                the MME compute engine. Writes to this parameter affect the
+                device only when the power management profile is set to "manual"
+                mode. The device MME clock might be set to lower value than the
                 maximum. The user should read the mme_clk_curr to see the actual
-                frequency value of the MME
+                frequency value of the MME. This property is valid only for the
+                Goya ASIC family
 
 What:           /sys/class/habanalabs/hl<n>/mme_clk_curr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        oded.gabbay@gmail.com
-Description:    Displays the current clock frequency of the MME compute engine
+Description:    Displays the current clock frequency, in Hz, of the MME compute
+                engine. This property is valid only for the Goya ASIC family
 
 What:           /sys/class/habanalabs/hl<n>/pci_addr
 Date:           Jan 2019
@@ -163,18 +167,20 @@ What:           /sys/class/habanalabs/hl<n>/tpc_clk
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        oded.gabbay@gmail.com
-Description:    Allows the user to set the maximum clock frequency of the
-                TPC compute engines. Writes to this parameter affect the device
-                only when the power management profile is set to "manual" mode.
-                The device TPC clock might be set to lower value then the
+Description:    Allows the user to set the maximum clock frequency, in Hz, of
+                the TPC compute engines. Writes to this parameter affect the
+                device only when the power management profile is set to "manual"
+                mode. The device TPC clock might be set to lower value than the
                 maximum. The user should read the tpc_clk_curr to see the actual
-                frequency value of the TPC
+                frequency value of the TPC. This property is valid only for
+                Goya ASIC family
 
 What:           /sys/class/habanalabs/hl<n>/tpc_clk_curr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        oded.gabbay@gmail.com
-Description:    Displays the current clock frequency of the TPC compute engines
+Description:    Displays the current clock frequency, in Hz, of the TPC compute
+                engines. This property is valid only for the Goya ASIC family
 
 What:           /sys/class/habanalabs/hl<n>/uboot_ver
 Date:           Jan 2019
-- 
cgit 


From 3e6a515ff4d40b690511a250ceb11a6e1eba4081 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 27 May 2019 05:11:32 -0400
Subject: media: media-ioc-enum-links.rst: fix incorrect reserved field
 documentation

The reserved field array for struct media_link_desc has length 2, not 4.

And the reserved field array of struct media_links_enum was never documented
at all.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/mediactl/media-ioc-enum-links.rst | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/mediactl/media-ioc-enum-links.rst b/Documentation/media/uapi/mediactl/media-ioc-enum-links.rst
index a982f16e55a4..b827ebc398f8 100644
--- a/Documentation/media/uapi/mediactl/media-ioc-enum-links.rst
+++ b/Documentation/media/uapi/mediactl/media-ioc-enum-links.rst
@@ -84,6 +84,11 @@ returned during the enumeration process.
        -  Pointer to a links array allocated by the application. Ignored if
 	  NULL.
 
+    *  -  __u32
+       -  ``reserved[4]``
+       -  Reserved for future extensions. Drivers and applications must set
+          the array to zero.
+
 
 .. c:type:: media_pad_desc
 
@@ -135,7 +140,7 @@ returned during the enumeration process.
        -  Link flags, see :ref:`media-link-flag` for more details.
 
     *  -  __u32
-       -  ``reserved[4]``
+       -  ``reserved[2]``
        -  Reserved for future extensions. Drivers and applications must set
           the array to zero.
 
-- 
cgit 


From 0a0c2a9262a145570751111603d1f0385ce1a443 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Mon, 20 May 2019 11:06:35 -0400
Subject: media: dt-bindings: media: sun6i-csi: Add compatible string for A83T
 variant

The A83T SoC has a camera sensor interface (known as CSI in Allwinner
lingo), which is similar to the one found on the A64 and H3. The only
difference seems to be that support of MIPI CSI through a connected
MIPI CSI-2 bridge.

Add a compatible string for this variant.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Acked-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/devicetree/bindings/media/sun6i-csi.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/media/sun6i-csi.txt b/Documentation/devicetree/bindings/media/sun6i-csi.txt
index 0dd540bb03db..a2e3e56f0257 100644
--- a/Documentation/devicetree/bindings/media/sun6i-csi.txt
+++ b/Documentation/devicetree/bindings/media/sun6i-csi.txt
@@ -6,6 +6,7 @@ Allwinner V3s SoC features a CSI module(CSI1) with parallel interface.
 Required properties:
   - compatible: value must be one of:
     * "allwinner,sun6i-a31-csi"
+    * "allwinner,sun8i-a83t-csi"
     * "allwinner,sun8i-h3-csi"
     * "allwinner,sun8i-v3s-csi"
     * "allwinner,sun50i-a64-csi"
-- 
cgit 


From 29a50257a9d6d16320bc5e2fc1b15e0f2b2eb7cf Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 28 May 2019 17:57:09 -0700
Subject: dt-bindings: PCI: qcom: Add QCS404 to the binding

The Qualcomm QCS404 platform contains a PCIe controller, add this to the
Qualcomm PCI binding document. The controller is the same version as the
one used in IPQ4019, but the PHY part is described separately, hence the
difference in clocks and resets.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Vinod Koul <vkoul@kernel.org>
---
 .../devicetree/bindings/pci/qcom,pcie.txt          | 25 ++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie.txt b/Documentation/devicetree/bindings/pci/qcom,pcie.txt
index 1fd703bd73e0..ada80b01bf0c 100644
--- a/Documentation/devicetree/bindings/pci/qcom,pcie.txt
+++ b/Documentation/devicetree/bindings/pci/qcom,pcie.txt
@@ -10,6 +10,7 @@
 			- "qcom,pcie-msm8996" for msm8996 or apq8096
 			- "qcom,pcie-ipq4019" for ipq4019
 			- "qcom,pcie-ipq8074" for ipq8074
+			- "qcom,pcie-qcs404" for qcs404
 
 - reg:
 	Usage: required
@@ -116,6 +117,15 @@
 			- "ahb"		AHB clock
 			- "aux"		Auxiliary clock
 
+- clock-names:
+	Usage: required for qcs404
+	Value type: <stringlist>
+	Definition: Should contain the following entries
+			- "iface"	AHB clock
+			- "aux"		Auxiliary clock
+			- "master_bus"	AXI Master clock
+			- "slave_bus"	AXI Slave clock
+
 - resets:
 	Usage: required
 	Value type: <prop-encoded-array>
@@ -167,6 +177,17 @@
 			- "ahb"			AHB Reset
 			- "axi_m_sticky"	AXI Master Sticky reset
 
+- reset-names:
+	Usage: required for qcs404
+	Value type: <stringlist>
+	Definition: Should contain the following entries
+			- "axi_m"		AXI Master reset
+			- "axi_s"		AXI Slave reset
+			- "axi_m_sticky"	AXI Master Sticky reset
+			- "pipe_sticky"		PIPE sticky reset
+			- "pwr"			PWR reset
+			- "ahb"			AHB reset
+
 - power-domains:
 	Usage: required for apq8084 and msm8996/apq8096
 	Value type: <prop-encoded-array>
@@ -195,12 +216,12 @@
 	Definition: A phandle to the PCIe endpoint power supply
 
 - phys:
-	Usage: required for apq8084
+	Usage: required for apq8084 and qcs404
 	Value type: <phandle>
 	Definition: List of phandle(s) as listed in phy-names property
 
 - phy-names:
-	Usage: required for apq8084
+	Usage: required for apq8084 and qcs404
 	Value type: <stringlist>
 	Definition: Should contain "pciephy"
 
-- 
cgit 


From 42f6ebd827832e62a37350ffad776ea785a2486b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 23 May 2019 07:43:43 -0300
Subject: docs: cdomain.py: get rid of a warning since version 1.8

There's a new warning about a deprecation function. Add a
logic at cdomain.py to avoid that.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/sphinx/cdomain.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/sphinx/cdomain.py b/Documentation/sphinx/cdomain.py
index cf13ff3a656c..cbac8e608dc4 100644
--- a/Documentation/sphinx/cdomain.py
+++ b/Documentation/sphinx/cdomain.py
@@ -48,7 +48,10 @@ major, minor, patch = sphinx.version_info[:3]
 
 def setup(app):
 
-    app.override_domain(CDomain)
+    if (major == 1 and minor < 8):
+        app.override_domain(CDomain)
+    else:
+        app.add_domain(CDomain, override=True)
 
     return dict(
         version = __version__,
-- 
cgit 


From fe4ec72cca500b2f97ffa0429b4cd57f67e0821d Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Tue, 21 May 2019 21:30:00 +0900
Subject: docs: tracing: Fix typos in histogram.rst

This patch fixes some spelling typos in histogram.rst

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/trace/histogram.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
index fb621a1c2638..8408670d0328 100644
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -1010,7 +1010,7 @@ Extended error information
 
   For example, suppose we wanted to take a look at the relative
   weights in terms of skb length for each callpath that leads to a
-  netif_receieve_skb event when downloading a decent-sized file using
+  netif_receive_skb event when downloading a decent-sized file using
   wget.
 
   First we set up an initially paused stacktrace trigger on the
@@ -1843,7 +1843,7 @@ practice, not every handler.action combination is currently supported;
 if a given handler.action combination isn't supported, the hist
 trigger will fail with -EINVAL;
 
-The default 'handler.action' if none is explicity specified is as it
+The default 'handler.action' if none is explicitly specified is as it
 always has been, to simply update the set of values associated with an
 entry.  Some applications, however, may want to perform additional
 actions at that point, such as generate another event, or compare and
@@ -2088,7 +2088,7 @@ The following commonly-used handler.action pairs are available:
     and the saved values corresponding to the max are displayed
     following the rest of the fields.
 
-    If a snaphot was taken, there is also a message indicating that,
+    If a snapshot was taken, there is also a message indicating that,
     along with the value and event that triggered the global maximum:
 
     # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
@@ -2176,7 +2176,7 @@ The following commonly-used handler.action pairs are available:
     hist trigger entry.
 
     Note that in this case the changed value is a global variable
-    associated withe current trace instance.  The key of the specific
+    associated with current trace instance.  The key of the specific
     trace event that caused the value to change and the global value
     itself are displayed, along with a message stating that a snapshot
     has been taken and where to find it.  The user can use the key
@@ -2203,7 +2203,7 @@ The following commonly-used handler.action pairs are available:
     and the saved values corresponding to that value are displayed
     following the rest of the fields.
 
-    If a snaphot was taken, there is also a message indicating that,
+    If a snapshot was taken, there is also a message indicating that,
     along with the value and event that triggered the snapshot::
 
       # cat /sys/kernel/debug/tracing/events/tcp/tcp_probe/hist
-- 
cgit 


From 93285c01977729a2e046e065e4b99791b966130c Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Date: Tue, 21 May 2019 10:32:08 +0800
Subject: doc: kernel-parameters.txt: fix documentation of nmi_watchdog
 parameter

The default behavior of hardlockup depends on the config of
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC.

Fix the description of nmi_watchdog to make it clear.

Suggested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-doc@vger.kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/kernel-parameters.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..79d043b8850d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2836,8 +2836,9 @@
 			0 - turn hardlockup detector in nmi_watchdog off
 			1 - turn hardlockup detector in nmi_watchdog on
 			When panic is specified, panic when an NMI watchdog
-			timeout occurs (or 'nopanic' to override the opposite
-			default). To disable both hard and soft lockup detectors,
+			timeout occurs (or 'nopanic' to not panic on an NMI
+			watchdog, if CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is set)
+			To disable both hard and soft lockup detectors,
 			please see 'nowatchdog'.
 			This is useful when you use a panic=... timeout and
 			need the box quickly up again.
-- 
cgit 


From 50c1f43a37d006ac24755397614b00064a8f293a Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Wed, 15 May 2019 10:29:05 +1000
Subject: docs: filesystems: vfs: Remove space before tab

Currently the file has a bunch of spaces before tabspaces.  This is a
nuisance when patching the file because they show up whenever we touch
these lines.  Let's just fix them all now in preparation for doing the
RST conversion.

Remove spaces before tabspaces.

Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/vfs.txt | 78 +++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 57fc576b1f3e..cab5a36f39c6 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -134,7 +134,7 @@ struct file_system_type {
 	should be shut down
 
   owner: for internal VFS use: you should initialize this to THIS_MODULE in
-  	most cases.
+	most cases.
 
   next: for internal VFS use: you should initialize this to NULL
 
@@ -143,7 +143,7 @@ struct file_system_type {
 The mount() method has the following arguments:
 
   struct file_system_type *fs_type: describes the filesystem, partly initialized
-  	by the specific filesystem code
+	by the specific filesystem code
 
   int flags: mount flags
 
@@ -180,12 +180,12 @@ and provides a fill_super() callback instead. The generic variants are:
   mount_nodev: mount a filesystem that is not backed by a device
 
   mount_single: mount a filesystem which shares the instance between
-  	all mounts
+	all mounts
 
 A fill_super() callback implementation has the following arguments:
 
   struct super_block *sb: the superblock structure. The callback
-  	must initialize this properly.
+	must initialize this properly.
 
   void *data: arbitrary mount options, usually comes as an ASCII
 	string (see "Mount Options" section)
@@ -236,14 +236,14 @@ only called from a process context (i.e. not from an interrupt handler
 or bottom half).
 
   alloc_inode: this method is called by alloc_inode() to allocate memory
- 	for struct inode and initialize it.  If this function is not
- 	defined, a simple 'struct inode' is allocated.  Normally
- 	alloc_inode will be used to allocate a larger structure which
- 	contains a 'struct inode' embedded within it.
+	for struct inode and initialize it.  If this function is not
+	defined, a simple 'struct inode' is allocated.  Normally
+	alloc_inode will be used to allocate a larger structure which
+	contains a 'struct inode' embedded within it.
 
   destroy_inode: this method is called by destroy_inode() to release
-  	resources allocated for struct inode.  It is only required if
-  	->alloc_inode was defined and simply undoes anything done by
+	resources allocated for struct inode.  It is only required if
+	->alloc_inode was defined and simply undoes anything done by
 	->alloc_inode.
 
   dirty_inode: this method is called by the VFS to mark an inode dirty.
@@ -271,15 +271,15 @@ or bottom half).
 	(i.e. unmount). This is called with the superblock lock held
 
   sync_fs: called when VFS is writing out all dirty data associated with
-  	a superblock. The second parameter indicates whether the method
+	a superblock. The second parameter indicates whether the method
 	should wait until the write out has been completed. Optional.
 
   freeze_fs: called when VFS is locking a filesystem and
-  	forcing it into a consistent state.  This method is currently
-  	used by the Logical Volume Manager (LVM).
+	forcing it into a consistent state.  This method is currently
+	used by the Logical Volume Manager (LVM).
 
   unfreeze_fs: called when VFS is unlocking a filesystem and making it writable
-  	again.
+	again.
 
   statfs: called when the VFS needs to get filesystem statistics.
 
@@ -476,30 +476,30 @@ otherwise noted.
 	that.
 
   permission: called by the VFS to check for access rights on a POSIX-like
-  	filesystem.
+	filesystem.
 
 	May be called in rcu-walk mode (mask & MAY_NOT_BLOCK). If in rcu-walk
-        mode, the filesystem must check the permission without blocking or
+	mode, the filesystem must check the permission without blocking or
 	storing to the inode.
 
 	If a situation is encountered that rcu-walk cannot handle, return
 	-ECHILD and it will be called again in ref-walk mode.
 
   setattr: called by the VFS to set attributes for a file. This method
-  	is called by chmod(2) and related system calls.
+	is called by chmod(2) and related system calls.
 
   getattr: called by the VFS to get attributes of a file. This method
-  	is called by stat(2) and related system calls.
+	is called by stat(2) and related system calls.
 
   listxattr: called by the VFS to list all extended attributes for a
 	given file. This method is called by the listxattr(2) system call.
 
   update_time: called by the VFS to update a specific time or the i_version of
-  	an inode.  If this is not defined the VFS will update the inode itself
-  	and call mark_inode_dirty_sync.
+	an inode.  If this is not defined the VFS will update the inode itself
+	and call mark_inode_dirty_sync.
 
   atomic_open: called on the last component of an open.  Using this optional
-  	method the filesystem can look up, possibly create and open the file in
+	method the filesystem can look up, possibly create and open the file in
 	one atomic operation.  If it wants to leave actual opening to the
 	caller (e.g. if the file turned out to be a symlink, device, or just
 	something filesystem won't do atomic open for), it may signal this by
@@ -687,13 +687,13 @@ struct address_space_operations {
        that all succeeds, ->readpage will be called again.
 
   writepages: called by the VM to write out pages associated with the
-  	address_space object.  If wbc->sync_mode is WBC_SYNC_ALL, then
-  	the writeback_control will specify a range of pages that must be
-  	written out.  If it is WBC_SYNC_NONE, then a nr_to_write is given
+	address_space object.  If wbc->sync_mode is WBC_SYNC_ALL, then
+	the writeback_control will specify a range of pages that must be
+	written out.  If it is WBC_SYNC_NONE, then a nr_to_write is given
 	and that many pages should be written if possible.
 	If no ->writepages is given, then mpage_writepages is used
-  	instead.  This will choose pages from the address space that are
-  	tagged as DIRTY and will pass them to ->writepage.
+	instead.  This will choose pages from the address space that are
+	tagged as DIRTY and will pass them to ->writepage.
 
   set_page_dirty: called by the VM to set a page dirty.
         This is particularly needed if an address space attaches
@@ -704,11 +704,11 @@ struct address_space_operations {
         PAGECACHE_TAG_DIRTY tag in the radix tree.
 
   readpages: called by the VM to read pages associated with the address_space
-  	object. This is essentially just a vector version of
-  	readpage.  Instead of just one page, several pages are
-  	requested.
+	object. This is essentially just a vector version of
+	readpage.  Instead of just one page, several pages are
+	requested.
 	readpages is only used for read-ahead, so read errors are
-  	ignored.  If anything goes wrong, feel free to give up.
+	ignored.  If anything goes wrong, feel free to give up.
 
   write_begin:
 	Called by the generic buffered write code to ask the filesystem to
@@ -745,12 +745,12 @@ struct address_space_operations {
         that were able to be copied into pagecache.
 
   bmap: called by the VFS to map a logical block offset within object to
-  	physical block number. This method is used by the FIBMAP
-  	ioctl and for working with swap-files.  To be able to swap to
-  	a file, the file must have a stable mapping to a block
-  	device.  The swap system does not go through the filesystem
-  	but instead uses bmap to find out where the blocks in the file
-  	are and uses those addresses directly.
+	physical block number. This method is used by the FIBMAP
+	ioctl and for working with swap-files.  To be able to swap to
+	a file, the file must have a stable mapping to a block
+	device.  The swap system does not go through the filesystem
+	but instead uses bmap to find out where the blocks in the file
+	are and uses those addresses directly.
 
   invalidatepage: If a page has PagePrivate set, then invalidatepage
         will be called when part or all of the page is to be removed
@@ -810,7 +810,7 @@ struct address_space_operations {
   putback_page: Called by the VM when isolated page's migration fails.
 
   launder_page: Called before freeing a page - it writes back the dirty page. To
-  	prevent redirtying the page, it is kept locked during the whole
+	prevent redirtying the page, it is kept locked during the whole
 	operation.
 
   is_partially_uptodate: Called by the VM when reading a file through the
@@ -921,7 +921,7 @@ otherwise noted.
   unlocked_ioctl: called by the ioctl(2) system call.
 
   compat_ioctl: called by the ioctl(2) system call when 32 bit system calls
- 	 are used on 64 bit kernels.
+	 are used on 64 bit kernels.
 
   mmap: called by the mmap(2) system call
 
@@ -946,7 +946,7 @@ otherwise noted.
 	(non-blocking) mode is enabled for a file
 
   lock: called by the fcntl(2) system call for F_GETLK, F_SETLK, and F_SETLKW
-  	commands
+	commands
 
   get_unmapped_area: called by the mmap(2) system call
 
-- 
cgit 


From 4ee33ea403ac7c1f2b04534132ebb9c3c5095b56 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Wed, 15 May 2019 10:29:06 +1000
Subject: docs: filesystems: vfs: Use uniform space after period.

Currently sometimes document has a single space after a period and
sometimes it has double.  Whichever we use it should be uniform.

Use double space after period, be uniform.

Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/vfs.txt | 246 +++++++++++++++++++-------------------
 1 file changed, 123 insertions(+), 123 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index cab5a36f39c6..6088b925aa7f 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -14,12 +14,12 @@ Introduction
 
 The Virtual File System (also known as the Virtual Filesystem Switch)
 is the software layer in the kernel that provides the filesystem
-interface to userspace programs. It also provides an abstraction
+interface to userspace programs.  It also provides an abstraction
 within the kernel which allows different filesystem implementations to
 coexist.
 
 VFS system calls open(2), stat(2), read(2), write(2), chmod(2) and so
-on are called from a process context. Filesystem locking is described
+on are called from a process context.  Filesystem locking is described
 in the document Documentation/filesystems/Locking.
 
 
@@ -27,37 +27,37 @@ Directory Entry Cache (dcache)
 ------------------------------
 
 The VFS implements the open(2), stat(2), chmod(2), and similar system
-calls. The pathname argument that is passed to them is used by the VFS
+calls.  The pathname argument that is passed to them is used by the VFS
 to search through the directory entry cache (also known as the dentry
-cache or dcache). This provides a very fast look-up mechanism to
-translate a pathname (filename) into a specific dentry. Dentries live
+cache or dcache).  This provides a very fast look-up mechanism to
+translate a pathname (filename) into a specific dentry.  Dentries live
 in RAM and are never saved to disc: they exist only for performance.
 
-The dentry cache is meant to be a view into your entire filespace. As
+The dentry cache is meant to be a view into your entire filespace.  As
 most computers cannot fit all dentries in the RAM at the same time,
-some bits of the cache are missing. In order to resolve your pathname
+some bits of the cache are missing.  In order to resolve your pathname
 into a dentry, the VFS may have to resort to creating dentries along
-the way, and then loading the inode. This is done by looking up the
+the way, and then loading the inode.  This is done by looking up the
 inode.
 
 
 The Inode Object
 ----------------
 
-An individual dentry usually has a pointer to an inode. Inodes are
+An individual dentry usually has a pointer to an inode.  Inodes are
 filesystem objects such as regular files, directories, FIFOs and other
 beasts.  They live either on the disc (for block device filesystems)
-or in the memory (for pseudo filesystems). Inodes that live on the
+or in the memory (for pseudo filesystems).  Inodes that live on the
 disc are copied into the memory when required and changes to the inode
-are written back to disc. A single inode can be pointed to by multiple
+are written back to disc.  A single inode can be pointed to by multiple
 dentries (hard links, for example, do this).
 
 To look up an inode requires that the VFS calls the lookup() method of
-the parent directory inode. This method is installed by the specific
-filesystem implementation that the inode lives in. Once the VFS has
+the parent directory inode.  This method is installed by the specific
+filesystem implementation that the inode lives in.  Once the VFS has
 the required dentry (and hence the inode), we can do all those boring
 things like open(2) the file, or stat(2) it to peek at the inode
-data. The stat(2) operation is fairly simple: once the VFS has the
+data.  The stat(2) operation is fairly simple: once the VFS has the
 dentry, it peeks at the inode data and passes some of it back to
 userspace.
 
@@ -67,17 +67,17 @@ The File Object
 
 Opening a file requires another operation: allocation of a file
 structure (this is the kernel-side implementation of file
-descriptors). The freshly allocated file structure is initialized with
+descriptors).  The freshly allocated file structure is initialized with
 a pointer to the dentry and a set of file operation member functions.
-These are taken from the inode data. The open() file method is then
-called so the specific filesystem implementation can do its work. You
-can see that this is another switch performed by the VFS. The file
+These are taken from the inode data.  The open() file method is then
+called so the specific filesystem implementation can do its work.  You
+can see that this is another switch performed by the VFS.  The file
 structure is placed into the file descriptor table for the process.
 
 Reading, writing and closing files (and other assorted VFS operations)
 is done by using the userspace file descriptor to grab the appropriate
 file structure, and then calling the required file structure method to
-do whatever is required. For as long as the file is open, it keeps the
+do whatever is required.  For as long as the file is open, it keeps the
 dentry in use, which in turn means that the VFS inode is still in use.
 
 
@@ -92,7 +92,7 @@ functions:
    extern int register_filesystem(struct file_system_type *);
    extern int unregister_filesystem(struct file_system_type *);
 
-The passed struct file_system_type describes your filesystem. When a
+The passed struct file_system_type describes your filesystem.  When a
 request is made to mount a filesystem onto a directory in your namespace,
 the VFS will call the appropriate mount() method for the specific
 filesystem.  New vfsmount referring to the tree returned by ->mount()
@@ -106,7 +106,7 @@ file /proc/filesystems.
 struct file_system_type
 -----------------------
 
-This describes the filesystem. As of kernel 2.6.39, the following
+This describes the filesystem.  As of kernel 2.6.39, the following
 members are defined:
 
 struct file_system_type {
@@ -168,12 +168,12 @@ point of view is a reference to dentry at the root of (sub)tree to
 be attached; creation of new superblock is a common side effect.
 
 The most interesting member of the superblock structure that the
-mount() method fills in is the "s_op" field. This is a pointer to
+mount() method fills in is the "s_op" field.  This is a pointer to
 a "struct super_operations" which describes the next level of the
 filesystem implementation.
 
 Usually, a filesystem uses one of the generic mount() implementations
-and provides a fill_super() callback instead. The generic variants are:
+and provides a fill_super() callback instead.  The generic variants are:
 
   mount_bdev: mount a filesystem residing on a block device
 
@@ -184,7 +184,7 @@ and provides a fill_super() callback instead. The generic variants are:
 
 A fill_super() callback implementation has the following arguments:
 
-  struct super_block *sb: the superblock structure. The callback
+  struct super_block *sb: the superblock structure.  The callback
 	must initialize this properly.
 
   void *data: arbitrary mount options, usually comes as an ASCII
@@ -203,7 +203,7 @@ struct super_operations
 -----------------------
 
 This describes how the VFS can manipulate the superblock of your
-filesystem. As of kernel 2.6.22, the following members are defined:
+filesystem.  As of kernel 2.6.22, the following members are defined:
 
 struct super_operations {
         struct inode *(*alloc_inode)(struct super_block *sb);
@@ -231,7 +231,7 @@ struct super_operations {
 };
 
 All methods are called without any locks being held, unless otherwise
-noted. This means that most methods can block safely. All methods are
+noted.  This means that most methods can block safely.  All methods are
 only called from a process context (i.e. not from an interrupt handler
 or bottom half).
 
@@ -268,11 +268,11 @@ or bottom half).
   delete_inode: called when the VFS wants to delete an inode
 
   put_super: called when the VFS wishes to free the superblock
-	(i.e. unmount). This is called with the superblock lock held
+	(i.e. unmount).  This is called with the superblock lock held
 
   sync_fs: called when VFS is writing out all dirty data associated with
-	a superblock. The second parameter indicates whether the method
-	should wait until the write out has been completed. Optional.
+	a superblock.  The second parameter indicates whether the method
+	should wait until the write out has been completed.  Optional.
 
   freeze_fs: called when VFS is locking a filesystem and
 	forcing it into a consistent state.  This method is currently
@@ -283,10 +283,10 @@ or bottom half).
 
   statfs: called when the VFS needs to get filesystem statistics.
 
-  remount_fs: called when the filesystem is remounted. This is called
+  remount_fs: called when the filesystem is remounted.  This is called
 	with the kernel lock held
 
-  clear_inode: called then the VFS clears the inode. Optional
+  clear_inode: called then the VFS clears the inode.  Optional
 
   umount_begin: called when the VFS is unmounting a filesystem.
 
@@ -307,17 +307,17 @@ or bottom half).
 	implement ->nr_cached_objects for it to be called correctly.
 
 	We can't do anything with any errors that the filesystem might
-	encountered, hence the void return type. This will never be called if
+	encountered, hence the void return type.  This will never be called if
 	the VM is trying to reclaim under GFP_NOFS conditions, hence this
 	method does not need to handle that situation itself.
 
 	Implementations must include conditional reschedule calls inside any
-	scanning loop that is done. This allows the VFS to determine
+	scanning loop that is done.  This allows the VFS to determine
 	appropriate scan batch sizes without having to worry about whether
 	implementations will cause holdoff problems due to large scan batch
 	sizes.
 
-Whoever sets up the inode is responsible for filling in the "i_op" field. This
+Whoever sets up the inode is responsible for filling in the "i_op" field.  This
 is a pointer to a "struct inode_operations" which describes the methods that
 can be performed on individual inodes.
 
@@ -361,7 +361,7 @@ struct inode_operations
 -----------------------
 
 This describes how the VFS can manipulate an inode in your
-filesystem. As of kernel 2.6.22, the following members are defined:
+filesystem.  As of kernel 2.6.22, the following members are defined:
 
 struct inode_operations {
 	int (*create) (struct inode *,struct dentry *, umode_t, bool);
@@ -391,19 +391,19 @@ struct inode_operations {
 Again, all methods are called without any locks being held, unless
 otherwise noted.
 
-  create: called by the open(2) and creat(2) system calls. Only
-	required if you want to support regular files. The dentry you
+  create: called by the open(2) and creat(2) system calls.  Only
+	required if you want to support regular files.  The dentry you
 	get should not have an inode (i.e. it should be a negative
-	dentry). Here you will probably call d_instantiate() with the
+	dentry).  Here you will probably call d_instantiate() with the
 	dentry and the newly created inode
 
   lookup: called when the VFS needs to look up an inode in a parent
-	directory. The name to look for is found in the dentry. This
+	directory.  The name to look for is found in the dentry.  This
 	method must call d_add() to insert the found inode into the
-	dentry. The "i_count" field in the inode structure should be
-	incremented. If the named inode does not exist a NULL inode
+	dentry.  The "i_count" field in the inode structure should be
+	incremented.  If the named inode does not exist a NULL inode
 	should be inserted into the dentry (this is called a negative
-	dentry). Returning an error code from this routine must only
+	dentry).  Returning an error code from this routine must only
 	be done on a real error, otherwise creating inodes with system
 	calls like create(2), mknod(2), mkdir(2) and so on will fail.
 	If you wish to overload the dentry methods then you should
@@ -411,27 +411,27 @@ otherwise noted.
 	to a struct "dentry_operations".
 	This method is called with the directory inode semaphore held
 
-  link: called by the link(2) system call. Only required if you want
-	to support hard links. You will probably need to call
+  link: called by the link(2) system call.  Only required if you want
+	to support hard links.  You will probably need to call
 	d_instantiate() just as you would in the create() method
 
-  unlink: called by the unlink(2) system call. Only required if you
+  unlink: called by the unlink(2) system call.  Only required if you
 	want to support deleting inodes
 
-  symlink: called by the symlink(2) system call. Only required if you
-	want to support symlinks. You will probably need to call
+  symlink: called by the symlink(2) system call.  Only required if you
+	want to support symlinks.  You will probably need to call
 	d_instantiate() just as you would in the create() method
 
-  mkdir: called by the mkdir(2) system call. Only required if you want
-	to support creating subdirectories. You will probably need to
+  mkdir: called by the mkdir(2) system call.  Only required if you want
+	to support creating subdirectories.  You will probably need to
 	call d_instantiate() just as you would in the create() method
 
-  rmdir: called by the rmdir(2) system call. Only required if you want
+  rmdir: called by the rmdir(2) system call.  Only required if you want
 	to support deleting subdirectories
 
   mknod: called by the mknod(2) system call to create a device (char,
-	block) inode or a named pipe (FIFO) or socket. Only required
-	if you want to support creating these types of inodes. You
+	block) inode or a named pipe (FIFO) or socket.  Only required
+	if you want to support creating these types of inodes.  You
 	will probably need to call d_instantiate() just as you would
 	in the create() method
 
@@ -478,21 +478,21 @@ otherwise noted.
   permission: called by the VFS to check for access rights on a POSIX-like
 	filesystem.
 
-	May be called in rcu-walk mode (mask & MAY_NOT_BLOCK). If in rcu-walk
-	mode, the filesystem must check the permission without blocking or
+	May be called in rcu-walk mode (mask & MAY_NOT_BLOCK).  If in rcu-walk
+        mode, the filesystem must check the permission without blocking or
 	storing to the inode.
 
 	If a situation is encountered that rcu-walk cannot handle, return
 	-ECHILD and it will be called again in ref-walk mode.
 
-  setattr: called by the VFS to set attributes for a file. This method
+  setattr: called by the VFS to set attributes for a file.  This method
 	is called by chmod(2) and related system calls.
 
-  getattr: called by the VFS to get attributes of a file. This method
+  getattr: called by the VFS to get attributes of a file.  This method
 	is called by stat(2) and related system calls.
 
   listxattr: called by the VFS to list all extended attributes for a
-	given file. This method is called by the listxattr(2) system call.
+	given file.  This method is called by the listxattr(2) system call.
 
   update_time: called by the VFS to update a specific time or the i_version of
 	an inode.  If this is not defined the VFS will update the inode itself
@@ -530,7 +530,7 @@ The first can be used independently to the others.  The VM can try to
 either write dirty pages in order to clean them, or release clean
 pages in order to reuse them.  To do this it can call the ->writepage
 method on dirty pages, and ->releasepage on clean pages with
-PagePrivate set. Clean pages without PagePrivate and with no external
+PagePrivate set.  Clean pages without PagePrivate and with no external
 references will be released without notice being given to the
 address_space.
 
@@ -538,7 +538,7 @@ To achieve this functionality, pages need to be placed on an LRU with
 lru_cache_add and mark_page_active needs to be called whenever the
 page is used.
 
-Pages are normally kept in a radix tree index by ->index. This tree
+Pages are normally kept in a radix tree index by ->index.  This tree
 maintains information about the PG_Dirty and PG_Writeback status of
 each page, so that pages with either of these flags can be found
 quickly.
@@ -624,7 +624,7 @@ struct address_space_operations
 -------------------------------
 
 This describes how the VFS can manipulate mapping of a file to page cache in
-your filesystem. The following members are defined:
+your filesystem.  The following members are defined:
 
 struct address_space_operations {
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
@@ -704,7 +704,7 @@ struct address_space_operations {
         PAGECACHE_TAG_DIRTY tag in the radix tree.
 
   readpages: called by the VM to read pages associated with the address_space
-	object. This is essentially just a vector version of
+	object.  This is essentially just a vector version of
 	readpage.  Instead of just one page, several pages are
 	requested.
 	readpages is only used for read-ahead, so read errors are
@@ -712,7 +712,7 @@ struct address_space_operations {
 
   write_begin:
 	Called by the generic buffered write code to ask the filesystem to
-	prepare to write len bytes at the given offset in the file. The
+	prepare to write len bytes at the given offset in the file.  The
 	address_space should check that the write will be able to complete,
 	by allocating space if necessary and doing any other internal
 	housekeeping.  If the write will update parts of any basic-blocks on
@@ -735,7 +735,7 @@ struct address_space_operations {
 	which case write_end is not called.
 
   write_end: After a successful write_begin, and data copy, write_end must
-        be called. len is the original len passed to write_begin, and copied
+        be called.  len is the original len passed to write_begin, and copied
         is the amount that was able to be copied.
 
         The filesystem must take care of unlocking the page and releasing it
@@ -745,7 +745,7 @@ struct address_space_operations {
         that were able to be copied into pagecache.
 
   bmap: called by the VFS to map a logical block offset within object to
-	physical block number. This method is used by the FIBMAP
+	physical block number.  This method is used by the FIBMAP
 	ioctl and for working with swap-files.  To be able to swap to
 	a file, the file must have a stable mapping to a block
 	device.  The swap system does not go through the filesystem
@@ -757,7 +757,7 @@ struct address_space_operations {
 	from the address space.  This generally corresponds to either a
 	truncation, punch hole  or a complete invalidation of the address
 	space (in the latter case 'offset' will always be 0 and 'length'
-	will be PAGE_SIZE). Any private data associated with the page
+	will be PAGE_SIZE).  Any private data associated with the page
 	should be updated to reflect this truncation.  If offset is 0 and
 	length is PAGE_SIZE, then the private data should be released,
 	because the page must be able to be completely discarded.  This may
@@ -767,7 +767,7 @@ struct address_space_operations {
   releasepage: releasepage is called on PagePrivate pages to indicate
         that the page should be freed if possible.  ->releasepage
         should remove any private data from the page and clear the
-        PagePrivate flag. If releasepage() fails for some reason, it must
+        PagePrivate flag.  If releasepage() fails for some reason, it must
 	indicate failure with a 0 return value.
 	releasepage() is used in two distinct though related cases.  The
 	first is when the VM finds a clean page with no active users and
@@ -787,7 +787,7 @@ struct address_space_operations {
 
   freepage: freepage is called once the page is no longer visible in
         the page cache in order to allow the cleanup of any private
-	data. Since it may be called by the memory reclaimer, it
+	data.  Since it may be called by the memory reclaimer, it
 	should not assume that the original address_space mapping still
 	exists, and it should not block.
 
@@ -809,32 +809,32 @@ struct address_space_operations {
 
   putback_page: Called by the VM when isolated page's migration fails.
 
-  launder_page: Called before freeing a page - it writes back the dirty page. To
+  launder_page: Called before freeing a page - it writes back the dirty page.  To
 	prevent redirtying the page, it is kept locked during the whole
 	operation.
 
   is_partially_uptodate: Called by the VM when reading a file through the
-	pagecache when the underlying blocksize != pagesize. If the required
+	pagecache when the underlying blocksize != pagesize.  If the required
 	block is up to date then the read can complete without needing the IO
 	to bring the whole page up to date.
 
   is_dirty_writeback: Called by the VM when attempting to reclaim a page.
 	The VM uses dirty and writeback information to determine if it needs
-	to stall to allow flushers a chance to complete some IO. Ordinarily
+	to stall to allow flushers a chance to complete some IO.  Ordinarily
 	it can use PageDirty and PageWriteback but some filesystems have
 	more complex state (unstable pages in NFS prevent reclaim) or
-	do not set those flags due to locking problems. This callback
+	do not set those flags due to locking problems.  This callback
 	allows a filesystem to indicate to the VM if a page should be
 	treated as dirty or writeback for the purposes of stalling.
 
   error_remove_page: normally set to generic_error_remove_page if truncation
-	is ok for this address space. Used for memory failure handling.
+	is ok for this address space.  Used for memory failure handling.
 	Setting this implies you deal with pages going away under you,
 	unless you have them locked or reference counts increased.
 
   swap_activate: Called when swapon is used on a file to allocate
 	space if necessary and pin the block lookup information in
-	memory. A return value of zero indicates success,
+	memory.  A return value of zero indicates success,
 	in which case this file can be used to back swapspace.
 
   swap_deactivate: Called during swapoff on files where swap_activate
@@ -844,14 +844,14 @@ struct address_space_operations {
 The File Object
 ===============
 
-A file object represents a file opened by a process. This is also known
+A file object represents a file opened by a process.  This is also known
 as an "open file description" in POSIX parlance.
 
 
 struct file_operations
 ----------------------
 
-This describes how the VFS can manipulate an open file. As of kernel
+This describes how the VFS can manipulate an open file.  As of kernel
 4.18, the following members are defined:
 
 struct file_operations {
@@ -916,7 +916,7 @@ otherwise noted.
 
   poll: called by the VFS when a process wants to check if there is
 	activity on this file and (optionally) go to sleep until there
-	is activity. Called by the select(2) and poll(2) system calls
+	is activity.  Called by the select(2) and poll(2) system calls
 
   unlocked_ioctl: called by the ioctl(2) system call.
 
@@ -925,13 +925,13 @@ otherwise noted.
 
   mmap: called by the mmap(2) system call
 
-  open: called by the VFS when an inode should be opened. When the VFS
-	opens a file, it creates a new "struct file". It then calls the
-	open method for the newly allocated file structure. You might
+  open: called by the VFS when an inode should be opened.  When the VFS
+	opens a file, it creates a new "struct file".  It then calls the
+	open method for the newly allocated file structure.  You might
 	think that the open method really belongs in
-	"struct inode_operations", and you may be right. I think it's
+	"struct inode_operations", and you may be right.  I think it's
 	done the way it is because it makes filesystems simpler to
-	implement. The open() method is a good place to initialize the
+	implement.  The open() method is a good place to initialize the
 	"private_data" member in the file structure if you want to point
 	to a device structure
 
@@ -939,7 +939,7 @@ otherwise noted.
 
   release: called when the last reference to an open file is closed
 
-  fsync: called by the fsync(2) system call. Also see the section above
+  fsync: called by the fsync(2) system call.  Also see the section above
 	 entitled "Handling errors during writeback".
 
   fasync: called by the fcntl(2) system call when asynchronous
@@ -954,13 +954,13 @@ otherwise noted.
 
   flock: called by the flock(2) system call
 
-  splice_write: called by the VFS to splice data from a pipe to a file. This
+  splice_write: called by the VFS to splice data from a pipe to a file.  This
 		method is used by the splice(2) system call
 
-  splice_read: called by the VFS to splice data from file to a pipe. This
+  splice_read: called by the VFS to splice data from file to a pipe.  This
 	       method is used by the splice(2) system call
 
-  setlease: called by the VFS to set or release a file lock lease. setlease
+  setlease: called by the VFS to set or release a file lock lease.  setlease
 	    implementations should call generic_setlease to record or remove
 	    the lease in the inode after setting it.
 
@@ -984,12 +984,12 @@ otherwise noted.
   fadvise: possibly called by the fadvise64() system call.
 
 Note that the file operations are implemented by the specific
-filesystem in which the inode resides. When opening a device node
+filesystem in which the inode resides.  When opening a device node
 (character or block special) most filesystems will call special
 support routines in the VFS which will locate the required device
-driver information. These support routines replace the filesystem file
+driver information.  These support routines replace the filesystem file
 operations with those for the device driver, and then proceed to call
-the new open() method for the file. This is how opening a device file
+the new open() method for the file.  This is how opening a device file
 in the filesystem eventually ends up calling the device driver open()
 method.
 
@@ -1002,10 +1002,10 @@ struct dentry_operations
 ------------------------
 
 This describes how a filesystem can overload the standard dentry
-operations. Dentries and the dcache are the domain of the VFS and the
-individual filesystem implementations. Device drivers have no business
-here. These methods may be set to NULL, as they are either optional or
-the VFS uses a default. As of kernel 2.6.22, the following members are
+operations.  Dentries and the dcache are the domain of the VFS and the
+individual filesystem implementations.  Device drivers have no business
+here.  These methods may be set to NULL, as they are either optional or
+the VFS uses a default.  As of kernel 2.6.22, the following members are
 defined:
 
 struct dentry_operations {
@@ -1024,10 +1024,10 @@ struct dentry_operations {
 	struct dentry *(*d_real)(struct dentry *, const struct inode *);
 };
 
-  d_revalidate: called when the VFS needs to revalidate a dentry. This
+  d_revalidate: called when the VFS needs to revalidate a dentry.  This
 	is called whenever a name look-up finds a dentry in the
-	dcache. Most local filesystems leave this as NULL, because all their
-	dentries in the dcache are valid. Network filesystems are different
+	dcache.  Most local filesystems leave this as NULL, because all their
+	dentries in the dcache are valid.  Network filesystems are different
 	since things can change on the server without the client necessarily
 	being aware of it.
 
@@ -1045,11 +1045,11 @@ struct dentry_operations {
 
  d_weak_revalidate: called when the VFS needs to revalidate a "jumped" dentry.
 	This is called when a path-walk ends at dentry that was not acquired by
-	doing a lookup in the parent directory. This includes "/", "." and "..",
+	doing a lookup in the parent directory.  This includes "/", "." and "..",
 	as well as procfs-style symlinks and mountpoint traversal.
 
 	In this case, we are less concerned with whether the dentry is still
-	fully correct, but rather that the inode is still valid. As with
+	fully correct, but rather that the inode is still valid.  As with
 	d_revalidate, most local filesystems will set this to NULL since their
 	dcache entries are always valid.
 
@@ -1057,17 +1057,17 @@ struct dentry_operations {
 
 	d_weak_revalidate is only called after leaving rcu-walk mode.
 
-  d_hash: called when the VFS adds a dentry to the hash table. The first
+  d_hash: called when the VFS adds a dentry to the hash table.  The first
 	dentry passed to d_hash is the parent directory that the name is
 	to be hashed into.
 
 	Same locking and synchronisation rules as d_compare regarding
 	what is safe to dereference etc.
 
-  d_compare: called to compare a dentry name with a given name. The first
+  d_compare: called to compare a dentry name with a given name.  The first
 	dentry is the parent of the dentry to be compared, the second is
-	the child dentry. len and name string are properties of the dentry
-	to be compared. qstr is the name to compare it with.
+	the child dentry.  len and name string are properties of the dentry
+	to be compared.  qstr is the name to compare it with.
 
 	Must be constant and idempotent, and should not take locks if
 	possible, and should not or store into the dentry.
@@ -1082,9 +1082,9 @@ struct dentry_operations {
 	"rcu-walk", ie. without any locks or references on things.
 
   d_delete: called when the last reference to a dentry is dropped and the
-	dcache is deciding whether or not to cache it. Return 1 to delete
-	immediately, or 0 to cache the dentry. Default is NULL which means to
-	always cache a reachable dentry. d_delete must be constant and
+	dcache is deciding whether or not to cache it.  Return 1 to delete
+	immediately, or 0 to cache the dentry.  Default is NULL which means to
+	always cache a reachable dentry.  d_delete must be constant and
 	idempotent.
 
   d_init: called when a dentry is allocated
@@ -1092,19 +1092,19 @@ struct dentry_operations {
   d_release: called when a dentry is really deallocated
 
   d_iput: called when a dentry loses its inode (just prior to its
-	being deallocated). The default when this is NULL is that the
-	VFS calls iput(). If you define this method, you must call
+	being deallocated).  The default when this is NULL is that the
+	VFS calls iput().  If you define this method, you must call
 	iput() yourself
 
   d_dname: called when the pathname of a dentry should be generated.
 	Useful for some pseudo filesystems (sockfs, pipefs, ...) to delay
-	pathname generation. (Instead of doing it when dentry is created,
-	it's done only when the path is needed.). Real filesystems probably
+	pathname generation.  (Instead of doing it when dentry is created,
+	it's done only when the path is needed.).  Real filesystems probably
 	dont want to use it, because their dentries are present in global
-	dcache hash, so their hash should be an invariant. As no lock is
+	dcache hash, so their hash should be an invariant.  As no lock is
 	held, d_dname() should not try to modify the dentry itself, unless
-	appropriate SMP safety is used. CAUTION : d_path() logic is quite
-	tricky. The correct way to return for example "Hello" is to put it
+	appropriate SMP safety is used.  CAUTION : d_path() logic is quite
+	tricky.  The correct way to return for example "Hello" is to put it
 	at the end of the buffer, and returns a pointer to the first char.
 	dynamic_dname() helper function is provided to take care of this.
 
@@ -1166,7 +1166,7 @@ struct dentry_operations {
 	With NULL inode the topmost real underlying dentry is returned.
 
 Each dentry has a pointer to its parent dentry, as well as a hash list
-of child dentries. Child dentries are basically like files in a
+of child dentries.  Child dentries are basically like files in a
 directory.
 
 
@@ -1179,36 +1179,36 @@ manipulate dentries:
   dget: open a new handle for an existing dentry (this just increments
 	the usage count)
 
-  dput: close a handle for a dentry (decrements the usage count). If
+  dput: close a handle for a dentry (decrements the usage count).  If
 	the usage count drops to 0, and the dentry is still in its
 	parent's hash, the "d_delete" method is called to check whether
-	it should be cached. If it should not be cached, or if the dentry
-	is not hashed, it is deleted. Otherwise cached dentries are put
+	it should be cached.  If it should not be cached, or if the dentry
+	is not hashed, it is deleted.  Otherwise cached dentries are put
 	into an LRU list to be reclaimed on memory shortage.
 
-  d_drop: this unhashes a dentry from its parents hash list. A
+  d_drop: this unhashes a dentry from its parents hash list.  A
 	subsequent call to dput() will deallocate the dentry if its
 	usage count drops to 0
 
-  d_delete: delete a dentry. If there are no other open references to
+  d_delete: delete a dentry.  If there are no other open references to
 	the dentry then the dentry is turned into a negative dentry
-	(the d_iput() method is called). If there are other
+	(the d_iput() method is called).  If there are other
 	references, then d_drop() is called instead
 
   d_add: add a dentry to its parents hash list and then calls
 	d_instantiate()
 
   d_instantiate: add a dentry to the alias hash list for the inode and
-	updates the "d_inode" member. The "i_count" member in the
-	inode structure should be set/incremented. If the inode
+	updates the "d_inode" member.  The "i_count" member in the
+	inode structure should be set/incremented.  If the inode
 	pointer is NULL, the dentry is called a "negative
-	dentry". This function is commonly called when an inode is
+	dentry".  This function is commonly called when an inode is
 	created for an existing negative dentry
 
   d_lookup: look up a dentry given its parent and path name component
 	It looks up the child of that given name from the dcache
-	hash table. If it is found, the reference count is incremented
-	and the dentry is returned. The caller must use dput()
+	hash table.  If it is found, the reference count is incremented
+	and the dentry is returned.  The caller must use dput()
 	to free the dentry when it finishes using it.
 
 Mount Options
-- 
cgit 


From 90caa781f6402a08b4e602fab7017baa3cee3a28 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Wed, 15 May 2019 10:29:07 +1000
Subject: docs: filesystems: vfs: Use 72 character column width

In preparation for conversion to RST format use the kernels favoured
documentation column width.  If we are going to do this we might as well
do it thoroughly.  Just do the paragraphs (not the indented stuff), the
rest will be done during indentation fix up patch.

This patch is whitespace only, no textual changes.

Use 72 character column width for all paragraph sections.

Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/vfs.txt | 198 +++++++++++++++++++-------------------
 1 file changed, 97 insertions(+), 101 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 6088b925aa7f..1cd0e658137a 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -12,15 +12,14 @@
 Introduction
 ============
 
-The Virtual File System (also known as the Virtual Filesystem Switch)
-is the software layer in the kernel that provides the filesystem
-interface to userspace programs.  It also provides an abstraction
-within the kernel which allows different filesystem implementations to
-coexist.
+The Virtual File System (also known as the Virtual Filesystem Switch) is
+the software layer in the kernel that provides the filesystem interface
+to userspace programs.  It also provides an abstraction within the
+kernel which allows different filesystem implementations to coexist.
 
-VFS system calls open(2), stat(2), read(2), write(2), chmod(2) and so
-on are called from a process context.  Filesystem locking is described
-in the document Documentation/filesystems/Locking.
+VFS system calls open(2), stat(2), read(2), write(2), chmod(2) and so on
+are called from a process context.  Filesystem locking is described in
+the document Documentation/filesystems/Locking.
 
 
 Directory Entry Cache (dcache)
@@ -34,11 +33,10 @@ translate a pathname (filename) into a specific dentry.  Dentries live
 in RAM and are never saved to disc: they exist only for performance.
 
 The dentry cache is meant to be a view into your entire filespace.  As
-most computers cannot fit all dentries in the RAM at the same time,
-some bits of the cache are missing.  In order to resolve your pathname
-into a dentry, the VFS may have to resort to creating dentries along
-the way, and then loading the inode.  This is done by looking up the
-inode.
+most computers cannot fit all dentries in the RAM at the same time, some
+bits of the cache are missing.  In order to resolve your pathname into a
+dentry, the VFS may have to resort to creating dentries along the way,
+and then loading the inode.  This is done by looking up the inode.
 
 
 The Inode Object
@@ -46,33 +44,32 @@ The Inode Object
 
 An individual dentry usually has a pointer to an inode.  Inodes are
 filesystem objects such as regular files, directories, FIFOs and other
-beasts.  They live either on the disc (for block device filesystems)
-or in the memory (for pseudo filesystems).  Inodes that live on the
-disc are copied into the memory when required and changes to the inode
-are written back to disc.  A single inode can be pointed to by multiple
+beasts.  They live either on the disc (for block device filesystems) or
+in the memory (for pseudo filesystems).  Inodes that live on the disc
+are copied into the memory when required and changes to the inode are
+written back to disc.  A single inode can be pointed to by multiple
 dentries (hard links, for example, do this).
 
 To look up an inode requires that the VFS calls the lookup() method of
 the parent directory inode.  This method is installed by the specific
-filesystem implementation that the inode lives in.  Once the VFS has
-the required dentry (and hence the inode), we can do all those boring
-things like open(2) the file, or stat(2) it to peek at the inode
-data.  The stat(2) operation is fairly simple: once the VFS has the
-dentry, it peeks at the inode data and passes some of it back to
-userspace.
+filesystem implementation that the inode lives in.  Once the VFS has the
+required dentry (and hence the inode), we can do all those boring things
+like open(2) the file, or stat(2) it to peek at the inode data.  The
+stat(2) operation is fairly simple: once the VFS has the dentry, it
+peeks at the inode data and passes some of it back to userspace.
 
 
 The File Object
 ---------------
 
 Opening a file requires another operation: allocation of a file
-structure (this is the kernel-side implementation of file
-descriptors).  The freshly allocated file structure is initialized with
-a pointer to the dentry and a set of file operation member functions.
-These are taken from the inode data.  The open() file method is then
-called so the specific filesystem implementation can do its work.  You
-can see that this is another switch performed by the VFS.  The file
-structure is placed into the file descriptor table for the process.
+structure (this is the kernel-side implementation of file descriptors).
+The freshly allocated file structure is initialized with a pointer to
+the dentry and a set of file operation member functions.  These are
+taken from the inode data.  The open() file method is then called so the
+specific filesystem implementation can do its work.  You can see that
+this is another switch performed by the VFS.  The file structure is
+placed into the file descriptor table for the process.
 
 Reading, writing and closing files (and other assorted VFS operations)
 is done by using the userspace file descriptor to grab the appropriate
@@ -93,11 +90,12 @@ functions:
    extern int unregister_filesystem(struct file_system_type *);
 
 The passed struct file_system_type describes your filesystem.  When a
-request is made to mount a filesystem onto a directory in your namespace,
-the VFS will call the appropriate mount() method for the specific
-filesystem.  New vfsmount referring to the tree returned by ->mount()
-will be attached to the mountpoint, so that when pathname resolution
-reaches the mountpoint it will jump into the root of that vfsmount.
+request is made to mount a filesystem onto a directory in your
+namespace, the VFS will call the appropriate mount() method for the
+specific filesystem.  New vfsmount referring to the tree returned by
+->mount() will be attached to the mountpoint, so that when pathname
+resolution reaches the mountpoint it will jump into the root of that
+vfsmount.
 
 You can see all filesystems that are registered to the kernel in the
 file /proc/filesystems.
@@ -156,21 +154,21 @@ The mount() method must return the root dentry of the tree requested by
 caller.  An active reference to its superblock must be grabbed and the
 superblock must be locked.  On failure it should return ERR_PTR(error).
 
-The arguments match those of mount(2) and their interpretation
-depends on filesystem type.  E.g. for block filesystems, dev_name is
-interpreted as block device name, that device is opened and if it
-contains a suitable filesystem image the method creates and initializes
-struct super_block accordingly, returning its root dentry to caller.
+The arguments match those of mount(2) and their interpretation depends
+on filesystem type.  E.g. for block filesystems, dev_name is interpreted
+as block device name, that device is opened and if it contains a
+suitable filesystem image the method creates and initializes struct
+super_block accordingly, returning its root dentry to caller.
 
 ->mount() may choose to return a subtree of existing filesystem - it
 doesn't have to create a new one.  The main result from the caller's
-point of view is a reference to dentry at the root of (sub)tree to
-be attached; creation of new superblock is a common side effect.
+point of view is a reference to dentry at the root of (sub)tree to be
+attached; creation of new superblock is a common side effect.
 
-The most interesting member of the superblock structure that the
-mount() method fills in is the "s_op" field.  This is a pointer to
-a "struct super_operations" which describes the next level of the
-filesystem implementation.
+The most interesting member of the superblock structure that the mount()
+method fills in is the "s_op" field.  This is a pointer to a "struct
+super_operations" which describes the next level of the filesystem
+implementation.
 
 Usually, a filesystem uses one of the generic mount() implementations
 and provides a fill_super() callback instead.  The generic variants are:
@@ -317,16 +315,16 @@ or bottom half).
 	implementations will cause holdoff problems due to large scan batch
 	sizes.
 
-Whoever sets up the inode is responsible for filling in the "i_op" field.  This
-is a pointer to a "struct inode_operations" which describes the methods that
-can be performed on individual inodes.
+Whoever sets up the inode is responsible for filling in the "i_op"
+field.  This is a pointer to a "struct inode_operations" which describes
+the methods that can be performed on individual inodes.
 
 struct xattr_handlers
 ---------------------
 
 On filesystems that support extended attributes (xattrs), the s_xattr
-superblock field points to a NULL-terminated array of xattr handlers.  Extended
-attributes are name:value pairs.
+superblock field points to a NULL-terminated array of xattr handlers.
+Extended attributes are name:value pairs.
 
   name: Indicates that the handler matches attributes with the specified name
 	(such as "system.posix_acl_access"); the prefix field must be NULL.
@@ -346,9 +344,9 @@ attributes are name:value pairs.
 	attribute.  This method is called by the the setxattr(2) and
 	removexattr(2) system calls.
 
-When none of the xattr handlers of a filesystem match the specified attribute
-name or when a filesystem doesn't support extended attributes, the various
-*xattr(2) system calls return -EOPNOTSUPP.
+When none of the xattr handlers of a filesystem match the specified
+attribute name or when a filesystem doesn't support extended attributes,
+the various *xattr(2) system calls return -EOPNOTSUPP.
 
 
 The Inode Object
@@ -360,8 +358,8 @@ An inode object represents an object within the filesystem.
 struct inode_operations
 -----------------------
 
-This describes how the VFS can manipulate an inode in your
-filesystem.  As of kernel 2.6.22, the following members are defined:
+This describes how the VFS can manipulate an inode in your filesystem.
+As of kernel 2.6.22, the following members are defined:
 
 struct inode_operations {
 	int (*create) (struct inode *,struct dentry *, umode_t, bool);
@@ -517,42 +515,40 @@ The Address Space Object
 ========================
 
 The address space object is used to group and manage pages in the page
-cache.  It can be used to keep track of the pages in a file (or
-anything else) and also track the mapping of sections of the file into
-process address spaces.
+cache.  It can be used to keep track of the pages in a file (or anything
+else) and also track the mapping of sections of the file into process
+address spaces.
 
 There are a number of distinct yet related services that an
-address-space can provide.  These include communicating memory
-pressure, page lookup by address, and keeping track of pages tagged as
-Dirty or Writeback.
+address-space can provide.  These include communicating memory pressure,
+page lookup by address, and keeping track of pages tagged as Dirty or
+Writeback.
 
 The first can be used independently to the others.  The VM can try to
-either write dirty pages in order to clean them, or release clean
-pages in order to reuse them.  To do this it can call the ->writepage
-method on dirty pages, and ->releasepage on clean pages with
-PagePrivate set.  Clean pages without PagePrivate and with no external
-references will be released without notice being given to the
-address_space.
+either write dirty pages in order to clean them, or release clean pages
+in order to reuse them.  To do this it can call the ->writepage method
+on dirty pages, and ->releasepage on clean pages with PagePrivate set.
+Clean pages without PagePrivate and with no external references will be
+released without notice being given to the address_space.
 
 To achieve this functionality, pages need to be placed on an LRU with
-lru_cache_add and mark_page_active needs to be called whenever the
-page is used.
+lru_cache_add and mark_page_active needs to be called whenever the page
+is used.
 
 Pages are normally kept in a radix tree index by ->index.  This tree
-maintains information about the PG_Dirty and PG_Writeback status of
-each page, so that pages with either of these flags can be found
-quickly.
+maintains information about the PG_Dirty and PG_Writeback status of each
+page, so that pages with either of these flags can be found quickly.
 
 The Dirty tag is primarily used by mpage_writepages - the default
 ->writepages method.  It uses the tag to find dirty pages to call
 ->writepage on.  If mpage_writepages is not used (i.e. the address
-provides its own ->writepages) , the PAGECACHE_TAG_DIRTY tag is
-almost unused.  write_inode_now and sync_inode do use it (through
+provides its own ->writepages) , the PAGECACHE_TAG_DIRTY tag is almost
+unused.  write_inode_now and sync_inode do use it (through
 __sync_single_inode) to check if ->writepages has been successful in
 writing out the whole address_space.
 
-The Writeback tag is used by filemap*wait* and sync_page* functions,
-via filemap_fdatawait_range, to wait for all writeback to complete.
+The Writeback tag is used by filemap*wait* and sync_page* functions, via
+filemap_fdatawait_range, to wait for all writeback to complete.
 
 An address_space handler may attach extra information to a page,
 typically using the 'private' field in the 'struct page'.  If such
@@ -562,25 +558,24 @@ handler to deal with that data.
 
 An address space acts as an intermediate between storage and
 application.  Data is read into the address space a whole page at a
-time, and provided to the application either by copying of the page,
-or by memory-mapping the page.
-Data is written into the address space by the application, and then
-written-back to storage typically in whole pages, however the
-address_space has finer control of write sizes.
+time, and provided to the application either by copying of the page, or
+by memory-mapping the page.  Data is written into the address space by
+the application, and then written-back to storage typically in whole
+pages, however the address_space has finer control of write sizes.
 
 The read process essentially only requires 'readpage'.  The write
 process is more complicated and uses write_begin/write_end or
-set_page_dirty to write data into the address_space, and writepage
-and writepages to writeback data to storage.
+set_page_dirty to write data into the address_space, and writepage and
+writepages to writeback data to storage.
 
 Adding and removing pages to/from an address_space is protected by the
 inode's i_mutex.
 
 When data is written to a page, the PG_Dirty flag should be set.  It
 typically remains set until writepage asks for it to be written.  This
-should clear PG_Dirty and set PG_Writeback.  It can be actually
-written at any point after PG_Dirty is clear.  Once it is known to be
-safe, PG_Writeback is cleared.
+should clear PG_Dirty and set PG_Writeback.  It can be actually written
+at any point after PG_Dirty is clear.  Once it is known to be safe,
+PG_Writeback is cleared.
 
 Writeback makes use of a writeback_control structure to direct the
 operations.  This gives the the writepage and writepages operations some
@@ -609,9 +604,10 @@ file descriptors should get back an error is not possible.
 Instead, the generic writeback error tracking infrastructure in the
 kernel settles for reporting errors to fsync on all file descriptions
 that were open at the time that the error occurred.  In a situation with
-multiple writers, all of them will get back an error on a subsequent fsync,
-even if all of the writes done through that particular file descriptor
-succeeded (or even if there were no writes on that file descriptor at all).
+multiple writers, all of them will get back an error on a subsequent
+fsync, even if all of the writes done through that particular file
+descriptor succeeded (or even if there were no writes on that file
+descriptor at all).
 
 Filesystems that wish to use this infrastructure should call
 mapping_set_error to record the error in the address_space when it
@@ -623,8 +619,8 @@ point in the stream of errors emitted by the backing device(s).
 struct address_space_operations
 -------------------------------
 
-This describes how the VFS can manipulate mapping of a file to page cache in
-your filesystem.  The following members are defined:
+This describes how the VFS can manipulate mapping of a file to page
+cache in your filesystem.  The following members are defined:
 
 struct address_space_operations {
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
@@ -1231,8 +1227,8 @@ filesystems.
 Showing options
 ---------------
 
-If a filesystem accepts mount options, it must define show_options()
-to show all the currently active options.  The rules are:
+If a filesystem accepts mount options, it must define show_options() to
+show all the currently active options.  The rules are:
 
   - options MUST be shown which are not default or their values differ
     from the default
@@ -1240,14 +1236,14 @@ to show all the currently active options.  The rules are:
   - options MAY be shown which are enabled by default or have their
     default value
 
-Options used only internally between a mount helper and the kernel
-(such as file descriptors), or which only have an effect during the
-mounting (such as ones controlling the creation of a journal) are exempt
-from the above rules.
+Options used only internally between a mount helper and the kernel (such
+as file descriptors), or which only have an effect during the mounting
+(such as ones controlling the creation of a journal) are exempt from the
+above rules.
 
-The underlying reason for the above rules is to make sure, that a
-mount can be accurately replicated (e.g. umounting and mounting again)
-based on the information found in /proc/mounts.
+The underlying reason for the above rules is to make sure, that a mount
+can be accurately replicated (e.g. umounting and mounting again) based
+on the information found in /proc/mounts.
 
 Resources
 =========
-- 
cgit 


From e04c83cd53b59e422157c4cea0cdc4e2f33fe305 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Wed, 15 May 2019 10:29:08 +1000
Subject: docs: filesystems: vfs: Use uniform spacing around headings

Currently spacing before and after headings is non-uniform.  Use two
blank lines before a heading and one after the heading.

Use uniform spacing around headings.

Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/vfs.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 1cd0e658137a..242fd644c97b 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -319,6 +319,7 @@ Whoever sets up the inode is responsible for filling in the "i_op"
 field.  This is a pointer to a "struct inode_operations" which describes
 the methods that can be performed on individual inodes.
 
+
 struct xattr_handlers
 ---------------------
 
@@ -511,6 +512,7 @@ otherwise noted.
   tmpfile: called in the end of O_TMPFILE open().  Optional, equivalent to
 	atomically creating, opening and unlinking a file in given directory.
 
+
 The Address Space Object
 ========================
 
@@ -584,8 +586,10 @@ and the constraints under which it is being done.  It is also used to
 return information back to the caller about the result of a writepage or
 writepages request.
 
+
 Handling errors during writeback
 --------------------------------
+
 Most applications that do buffered I/O will periodically call a file
 synchronization call (fsync, fdatasync, msync or sync_file_range) to
 ensure that data written has made it to the backing store.  When there
@@ -616,6 +620,7 @@ file->fsync operation, they should call file_check_and_advance_wb_err to
 ensure that the struct file's error cursor has advanced to the correct
 point in the stream of errors emitted by the backing device(s).
 
+
 struct address_space_operations
 -------------------------------
 
@@ -1207,9 +1212,11 @@ manipulate dentries:
 	and the dentry is returned.  The caller must use dput()
 	to free the dentry when it finishes using it.
 
+
 Mount Options
 =============
 
+
 Parsing options
 ---------------
 
@@ -1224,6 +1231,7 @@ The <linux/parser.h> header defines an API that helps parse these
 options.  There are plenty of examples on how to use it in existing
 filesystems.
 
+
 Showing options
 ---------------
 
@@ -1245,6 +1253,7 @@ The underlying reason for the above rules is to make sure, that a mount
 can be accurately replicated (e.g. umounting and mounting again) based
 on the information found in /proc/mounts.
 
+
 Resources
 =========
 
-- 
cgit 


From 90ac11a844f8859d5f960fb530190a9690a9a19b Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Wed, 15 May 2019 10:29:09 +1000
Subject: docs: filesystems: vfs: Use correct initial heading

Kernel RST has a preferred heading adornment scheme.  Currently all the
heading adornments follow this scheme except the document heading.

Use correct heading adornment for initial heading.

Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/vfs.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 242fd644c97b..1167dd94d84b 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -1,5 +1,6 @@
-
-	      Overview of the Linux Virtual File System
+=========================================
+Overview of the Linux Virtual File System
+=========================================
 
 	Original author: Richard Gooch <rgooch@atnf.csiro.au>
 
-- 
cgit 


From 099c5c7a3fba0c4686090075c6d214355aa67e47 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Wed, 15 May 2019 10:29:10 +1000
Subject: docs: filesystems: vfs: Use SPDX identifier

Currently the licence is indicated via a custom string.  We have SPDX
license identifiers now for this task.

Use SPDX license identifier matching current license string.

Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/vfs.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 1167dd94d84b..bd6dd782e8ca 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 =========================================
 Overview of the Linux Virtual File System
 =========================================
@@ -7,8 +9,6 @@ Overview of the Linux Virtual File System
   Copyright (C) 1999 Richard Gooch
   Copyright (C) 2005 Pekka Enberg
 
-  This file is released under the GPLv2.
-
 
 Introduction
 ============
-- 
cgit 


From e66b045715457ca6e18fce2b2fc61dd8af2e2440 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Wed, 15 May 2019 10:29:11 +1000
Subject: docs: filesystems: vfs: Fix pre-amble indentation

Currently file pre-amble contains custom indentation.  RST is not going
to like this, lets left-align the text.  Put the copyright notices in a
list in preparation for converting document to RST.

Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/vfs.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index bd6dd782e8ca..9ed5c8d6e656 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -4,10 +4,10 @@
 Overview of the Linux Virtual File System
 =========================================
 
-	Original author: Richard Gooch <rgooch@atnf.csiro.au>
+Original author: Richard Gooch <rgooch@atnf.csiro.au>
 
-  Copyright (C) 1999 Richard Gooch
-  Copyright (C) 2005 Pekka Enberg
+- Copyright (C) 1999 Richard Gooch
+- Copyright (C) 2005 Pekka Enberg
 
 
 Introduction
-- 
cgit 


From 1b44ae63deae020e172866871bd14a76376e0f8b Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Wed, 15 May 2019 10:29:12 +1000
Subject: docs: filesystems: vfs: Convert spaces to tabs

There are bunch of places with 8 spaces, in preparation for correctly
indenting all code snippets (during conversion to RST) change these to
use tabspaces.

This patch is whitespace only.

Convert instances of 8 consecutive spaces to a single tabspace.

Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/vfs.txt | 124 +++++++++++++++++++-------------------
 1 file changed, 62 insertions(+), 62 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 9ed5c8d6e656..4f4f4931bfa0 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -111,12 +111,12 @@ members are defined:
 struct file_system_type {
 	const char *name;
 	int fs_flags;
-        struct dentry *(*mount) (struct file_system_type *, int,
-                       const char *, void *);
-        void (*kill_sb) (struct super_block *);
-        struct module *owner;
-        struct file_system_type * next;
-        struct list_head fs_supers;
+	struct dentry *(*mount) (struct file_system_type *, int,
+		       const char *, void *);
+	void (*kill_sb) (struct super_block *);
+	struct module *owner;
+	struct file_system_type * next;
+	struct list_head fs_supers;
 	struct lock_class_key s_lock_key;
 	struct lock_class_key s_umount_key;
 };
@@ -205,26 +205,26 @@ This describes how the VFS can manipulate the superblock of your
 filesystem.  As of kernel 2.6.22, the following members are defined:
 
 struct super_operations {
-        struct inode *(*alloc_inode)(struct super_block *sb);
-        void (*destroy_inode)(struct inode *);
-
-        void (*dirty_inode) (struct inode *, int flags);
-        int (*write_inode) (struct inode *, int);
-        void (*drop_inode) (struct inode *);
-        void (*delete_inode) (struct inode *);
-        void (*put_super) (struct super_block *);
-        int (*sync_fs)(struct super_block *sb, int wait);
-        int (*freeze_fs) (struct super_block *);
-        int (*unfreeze_fs) (struct super_block *);
-        int (*statfs) (struct dentry *, struct kstatfs *);
-        int (*remount_fs) (struct super_block *, int *, char *);
-        void (*clear_inode) (struct inode *);
-        void (*umount_begin) (struct super_block *);
-
-        int (*show_options)(struct seq_file *, struct dentry *);
-
-        ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
-        ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+	struct inode *(*alloc_inode)(struct super_block *sb);
+	void (*destroy_inode)(struct inode *);
+
+	void (*dirty_inode) (struct inode *, int flags);
+	int (*write_inode) (struct inode *, int);
+	void (*drop_inode) (struct inode *);
+	void (*delete_inode) (struct inode *);
+	void (*put_super) (struct super_block *);
+	int (*sync_fs)(struct super_block *sb, int wait);
+	int (*freeze_fs) (struct super_block *);
+	int (*unfreeze_fs) (struct super_block *);
+	int (*statfs) (struct dentry *, struct kstatfs *);
+	int (*remount_fs) (struct super_block *, int *, char *);
+	void (*clear_inode) (struct inode *);
+	void (*umount_begin) (struct super_block *);
+
+	int (*show_options)(struct seq_file *, struct dentry *);
+
+	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
+	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
 	int (*nr_cached_objects)(struct super_block *);
 	void (*free_cached_objects)(struct super_block *, int);
 };
@@ -479,7 +479,7 @@ otherwise noted.
 	filesystem.
 
 	May be called in rcu-walk mode (mask & MAY_NOT_BLOCK).  If in rcu-walk
-        mode, the filesystem must check the permission without blocking or
+	mode, the filesystem must check the permission without blocking or
 	storing to the inode.
 
 	If a situation is encountered that rcu-walk cannot handle, return
@@ -698,12 +698,12 @@ struct address_space_operations {
 	tagged as DIRTY and will pass them to ->writepage.
 
   set_page_dirty: called by the VM to set a page dirty.
-        This is particularly needed if an address space attaches
-        private data to a page, and that data needs to be updated when
-        a page is dirtied.  This is called, for example, when a memory
+	This is particularly needed if an address space attaches
+	private data to a page, and that data needs to be updated when
+	a page is dirtied.  This is called, for example, when a memory
 	mapped page gets modified.
 	If defined, it should set the PageDirty flag, and the
-        PAGECACHE_TAG_DIRTY tag in the radix tree.
+	PAGECACHE_TAG_DIRTY tag in the radix tree.
 
   readpages: called by the VM to read pages associated with the address_space
 	object.  This is essentially just a vector version of
@@ -721,7 +721,7 @@ struct address_space_operations {
 	storage, then those blocks should be pre-read (if they haven't been
 	read already) so that the updated blocks can be written out properly.
 
-        The filesystem must return the locked pagecache page for the specified
+	The filesystem must return the locked pagecache page for the specified
 	offset, in *pagep, for the caller to write into.
 
 	It must be able to cope with short writes (where the length passed to
@@ -730,21 +730,21 @@ struct address_space_operations {
 	flags is a field for AOP_FLAG_xxx flags, described in
 	include/linux/fs.h.
 
-        A void * may be returned in fsdata, which then gets passed into
-        write_end.
+	A void * may be returned in fsdata, which then gets passed into
+	write_end.
 
-        Returns 0 on success; < 0 on failure (which is the error code), in
+	Returns 0 on success; < 0 on failure (which is the error code), in
 	which case write_end is not called.
 
   write_end: After a successful write_begin, and data copy, write_end must
-        be called.  len is the original len passed to write_begin, and copied
-        is the amount that was able to be copied.
+	be called.  len is the original len passed to write_begin, and copied
+	is the amount that was able to be copied.
 
-        The filesystem must take care of unlocking the page and releasing it
-        refcount, and updating i_size.
+	The filesystem must take care of unlocking the page and releasing it
+	refcount, and updating i_size.
 
-        Returns < 0 on failure, otherwise the number of bytes (<= 'copied')
-        that were able to be copied into pagecache.
+	Returns < 0 on failure, otherwise the number of bytes (<= 'copied')
+	that were able to be copied into pagecache.
 
   bmap: called by the VFS to map a logical block offset within object to
 	physical block number.  This method is used by the FIBMAP
@@ -755,7 +755,7 @@ struct address_space_operations {
 	are and uses those addresses directly.
 
   invalidatepage: If a page has PagePrivate set, then invalidatepage
-        will be called when part or all of the page is to be removed
+	will be called when part or all of the page is to be removed
 	from the address space.  This generally corresponds to either a
 	truncation, punch hole  or a complete invalidation of the address
 	space (in the latter case 'offset' will always be 0 and 'length'
@@ -767,47 +767,47 @@ struct address_space_operations {
 	release MUST succeed.
 
   releasepage: releasepage is called on PagePrivate pages to indicate
-        that the page should be freed if possible.  ->releasepage
-        should remove any private data from the page and clear the
-        PagePrivate flag.  If releasepage() fails for some reason, it must
+	that the page should be freed if possible.  ->releasepage
+	should remove any private data from the page and clear the
+	PagePrivate flag.  If releasepage() fails for some reason, it must
 	indicate failure with a 0 return value.
 	releasepage() is used in two distinct though related cases.  The
 	first is when the VM finds a clean page with no active users and
-        wants to make it a free page.  If ->releasepage succeeds, the
-        page will be removed from the address_space and become free.
+	wants to make it a free page.  If ->releasepage succeeds, the
+	page will be removed from the address_space and become free.
 
 	The second case is when a request has been made to invalidate
-        some or all pages in an address_space.  This can happen
-        through the fadvise(POSIX_FADV_DONTNEED) system call or by the
-        filesystem explicitly requesting it as nfs and 9fs do (when
-        they believe the cache may be out of date with storage) by
-        calling invalidate_inode_pages2().
+	some or all pages in an address_space.  This can happen
+	through the fadvise(POSIX_FADV_DONTNEED) system call or by the
+	filesystem explicitly requesting it as nfs and 9fs do (when
+	they believe the cache may be out of date with storage) by
+	calling invalidate_inode_pages2().
 	If the filesystem makes such a call, and needs to be certain
-        that all pages are invalidated, then its releasepage will
-        need to ensure this.  Possibly it can clear the PageUptodate
-        bit if it cannot free private data yet.
+	that all pages are invalidated, then its releasepage will
+	need to ensure this.  Possibly it can clear the PageUptodate
+	bit if it cannot free private data yet.
 
   freepage: freepage is called once the page is no longer visible in
-        the page cache in order to allow the cleanup of any private
+	the page cache in order to allow the cleanup of any private
 	data.  Since it may be called by the memory reclaimer, it
 	should not assume that the original address_space mapping still
 	exists, and it should not block.
 
   direct_IO: called by the generic read/write routines to perform
-        direct_IO - that is IO requests which bypass the page cache
-        and transfer data directly between the storage and the
-        application's address space.
+	direct_IO - that is IO requests which bypass the page cache
+	and transfer data directly between the storage and the
+	application's address space.
 
   isolate_page: Called by the VM when isolating a movable non-lru page.
 	If page is successfully isolated, VM marks the page as PG_isolated
 	via __SetPageIsolated.
 
   migrate_page:  This is used to compact the physical memory usage.
-        If the VM wants to relocate a page (maybe off a memory card
-        that is signalling imminent failure) it will pass a new page
+	If the VM wants to relocate a page (maybe off a memory card
+	that is signalling imminent failure) it will pass a new page
 	and an old page to this function.  migrate_page should
 	transfer any private data across and update any references
-        that it has to the page.
+	that it has to the page.
 
   putback_page: Called by the VM when isolated page's migration fails.
 
-- 
cgit 


From af96c1e304f7051bf2ee64c9957724bdace05c58 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Wed, 15 May 2019 10:29:13 +1000
Subject: docs: filesystems: vfs: Convert vfs.txt to RST

vfs.txt is currently stale.  If we convert it to RST this is a good
first step in the process of getting the VFS documentation up to date.

This patch does the following (all as a single patch so as not to
introduce any new SPHINX build warnings)

 - Use '.. code-block:: c' for C code blocks and indent the code blocks.
 - Use double backticks for struct member descriptions.
 - Fix a couple of build warnings by guarding pointers (*) with double
   backticks .e.g  ``*ptr``.
 - Add vfs to Documentation/filesystems/index.rst

The member descriptions paragraph indentation was not touched.  It is
not pretty but these do not cause build warnings.  These descriptions
all need updating anyways so leave it as it is for now.

Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/index.rst |    1 +
 Documentation/filesystems/vfs.rst   | 1291 +++++++++++++++++++++++++++++++++++
 Documentation/filesystems/vfs.txt   | 1274 ----------------------------------
 3 files changed, 1292 insertions(+), 1274 deletions(-)
 create mode 100644 Documentation/filesystems/vfs.rst
 delete mode 100644 Documentation/filesystems/vfs.txt

(limited to 'Documentation')

diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 1131c34d77f6..35644840a690 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -16,6 +16,7 @@ algorithms work.
 .. toctree::
    :maxdepth: 2
 
+   vfs
    path-lookup.rst
    api-summary
    splice
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
new file mode 100644
index 000000000000..2ffbdf5f392c
--- /dev/null
+++ b/Documentation/filesystems/vfs.rst
@@ -0,0 +1,1291 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+Overview of the Linux Virtual File System
+=========================================
+
+Original author: Richard Gooch <rgooch@atnf.csiro.au>
+
+- Copyright (C) 1999 Richard Gooch
+- Copyright (C) 2005 Pekka Enberg
+
+
+Introduction
+============
+
+The Virtual File System (also known as the Virtual Filesystem Switch) is
+the software layer in the kernel that provides the filesystem interface
+to userspace programs.  It also provides an abstraction within the
+kernel which allows different filesystem implementations to coexist.
+
+VFS system calls open(2), stat(2), read(2), write(2), chmod(2) and so on
+are called from a process context.  Filesystem locking is described in
+the document Documentation/filesystems/Locking.
+
+
+Directory Entry Cache (dcache)
+------------------------------
+
+The VFS implements the open(2), stat(2), chmod(2), and similar system
+calls.  The pathname argument that is passed to them is used by the VFS
+to search through the directory entry cache (also known as the dentry
+cache or dcache).  This provides a very fast look-up mechanism to
+translate a pathname (filename) into a specific dentry.  Dentries live
+in RAM and are never saved to disc: they exist only for performance.
+
+The dentry cache is meant to be a view into your entire filespace.  As
+most computers cannot fit all dentries in the RAM at the same time, some
+bits of the cache are missing.  In order to resolve your pathname into a
+dentry, the VFS may have to resort to creating dentries along the way,
+and then loading the inode.  This is done by looking up the inode.
+
+
+The Inode Object
+----------------
+
+An individual dentry usually has a pointer to an inode.  Inodes are
+filesystem objects such as regular files, directories, FIFOs and other
+beasts.  They live either on the disc (for block device filesystems) or
+in the memory (for pseudo filesystems).  Inodes that live on the disc
+are copied into the memory when required and changes to the inode are
+written back to disc.  A single inode can be pointed to by multiple
+dentries (hard links, for example, do this).
+
+To look up an inode requires that the VFS calls the lookup() method of
+the parent directory inode.  This method is installed by the specific
+filesystem implementation that the inode lives in.  Once the VFS has the
+required dentry (and hence the inode), we can do all those boring things
+like open(2) the file, or stat(2) it to peek at the inode data.  The
+stat(2) operation is fairly simple: once the VFS has the dentry, it
+peeks at the inode data and passes some of it back to userspace.
+
+
+The File Object
+---------------
+
+Opening a file requires another operation: allocation of a file
+structure (this is the kernel-side implementation of file descriptors).
+The freshly allocated file structure is initialized with a pointer to
+the dentry and a set of file operation member functions.  These are
+taken from the inode data.  The open() file method is then called so the
+specific filesystem implementation can do its work.  You can see that
+this is another switch performed by the VFS.  The file structure is
+placed into the file descriptor table for the process.
+
+Reading, writing and closing files (and other assorted VFS operations)
+is done by using the userspace file descriptor to grab the appropriate
+file structure, and then calling the required file structure method to
+do whatever is required.  For as long as the file is open, it keeps the
+dentry in use, which in turn means that the VFS inode is still in use.
+
+
+Registering and Mounting a Filesystem
+=====================================
+
+To register and unregister a filesystem, use the following API
+functions:
+
+.. code-block:: c
+
+	#include <linux/fs.h>
+
+	extern int register_filesystem(struct file_system_type *);
+	extern int unregister_filesystem(struct file_system_type *);
+
+The passed struct file_system_type describes your filesystem.  When a
+request is made to mount a filesystem onto a directory in your
+namespace, the VFS will call the appropriate mount() method for the
+specific filesystem.  New vfsmount referring to the tree returned by
+->mount() will be attached to the mountpoint, so that when pathname
+resolution reaches the mountpoint it will jump into the root of that
+vfsmount.
+
+You can see all filesystems that are registered to the kernel in the
+file /proc/filesystems.
+
+
+struct file_system_type
+-----------------------
+
+This describes the filesystem.  As of kernel 2.6.39, the following
+members are defined:
+
+.. code-block:: c
+
+	struct file_system_operations {
+		const char *name;
+		int fs_flags;
+		struct dentry *(*mount) (struct file_system_type *, int,
+					 const char *, void *);
+		void (*kill_sb) (struct super_block *);
+		struct module *owner;
+		struct file_system_type * next;
+		struct list_head fs_supers;
+		struct lock_class_key s_lock_key;
+		struct lock_class_key s_umount_key;
+	};
+
+``name``: the name of the filesystem type, such as "ext2", "iso9660",
+	"msdos" and so on
+
+``fs_flags``: various flags (i.e. FS_REQUIRES_DEV, FS_NO_DCACHE, etc.)
+
+``mount``: the method to call when a new instance of this filesystem should
+be mounted
+
+``kill_sb``: the method to call when an instance of this filesystem
+	should be shut down
+
+``owner``: for internal VFS use: you should initialize this to THIS_MODULE in
+	most cases.
+
+``next``: for internal VFS use: you should initialize this to NULL
+
+  s_lock_key, s_umount_key: lockdep-specific
+
+The mount() method has the following arguments:
+
+``struct file_system_type *fs_type``: describes the filesystem, partly initialized
+	by the specific filesystem code
+
+``int flags``: mount flags
+
+``const char *dev_name``: the device name we are mounting.
+
+``void *data``: arbitrary mount options, usually comes as an ASCII
+	string (see "Mount Options" section)
+
+The mount() method must return the root dentry of the tree requested by
+caller.  An active reference to its superblock must be grabbed and the
+superblock must be locked.  On failure it should return ERR_PTR(error).
+
+The arguments match those of mount(2) and their interpretation depends
+on filesystem type.  E.g. for block filesystems, dev_name is interpreted
+as block device name, that device is opened and if it contains a
+suitable filesystem image the method creates and initializes struct
+super_block accordingly, returning its root dentry to caller.
+
+->mount() may choose to return a subtree of existing filesystem - it
+doesn't have to create a new one.  The main result from the caller's
+point of view is a reference to dentry at the root of (sub)tree to be
+attached; creation of new superblock is a common side effect.
+
+The most interesting member of the superblock structure that the mount()
+method fills in is the "s_op" field.  This is a pointer to a "struct
+super_operations" which describes the next level of the filesystem
+implementation.
+
+Usually, a filesystem uses one of the generic mount() implementations
+and provides a fill_super() callback instead.  The generic variants are:
+
+``mount_bdev``: mount a filesystem residing on a block device
+
+``mount_nodev``: mount a filesystem that is not backed by a device
+
+``mount_single``: mount a filesystem which shares the instance between
+	all mounts
+
+A fill_super() callback implementation has the following arguments:
+
+``struct super_block *sb``: the superblock structure.  The callback
+	must initialize this properly.
+
+``void *data``: arbitrary mount options, usually comes as an ASCII
+	string (see "Mount Options" section)
+
+``int silent``: whether or not to be silent on error
+
+
+The Superblock Object
+=====================
+
+A superblock object represents a mounted filesystem.
+
+
+struct super_operations
+-----------------------
+
+This describes how the VFS can manipulate the superblock of your
+filesystem.  As of kernel 2.6.22, the following members are defined:
+
+.. code-block:: c
+
+	struct super_operations {
+		struct inode *(*alloc_inode)(struct super_block *sb);
+		void (*destroy_inode)(struct inode *);
+
+		void (*dirty_inode) (struct inode *, int flags);
+		int (*write_inode) (struct inode *, int);
+		void (*drop_inode) (struct inode *);
+		void (*delete_inode) (struct inode *);
+		void (*put_super) (struct super_block *);
+		int (*sync_fs)(struct super_block *sb, int wait);
+		int (*freeze_fs) (struct super_block *);
+		int (*unfreeze_fs) (struct super_block *);
+		int (*statfs) (struct dentry *, struct kstatfs *);
+		int (*remount_fs) (struct super_block *, int *, char *);
+		void (*clear_inode) (struct inode *);
+		void (*umount_begin) (struct super_block *);
+
+		int (*show_options)(struct seq_file *, struct dentry *);
+
+		ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
+		ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+		int (*nr_cached_objects)(struct super_block *);
+		void (*free_cached_objects)(struct super_block *, int);
+	};
+
+All methods are called without any locks being held, unless otherwise
+noted.  This means that most methods can block safely.  All methods are
+only called from a process context (i.e. not from an interrupt handler
+or bottom half).
+
+``alloc_inode``: this method is called by alloc_inode() to allocate memory
+	for struct inode and initialize it.  If this function is not
+	defined, a simple 'struct inode' is allocated.  Normally
+	alloc_inode will be used to allocate a larger structure which
+	contains a 'struct inode' embedded within it.
+
+``destroy_inode``: this method is called by destroy_inode() to release
+	resources allocated for struct inode.  It is only required if
+	->alloc_inode was defined and simply undoes anything done by
+	->alloc_inode.
+
+``dirty_inode``: this method is called by the VFS to mark an inode dirty.
+
+``write_inode``: this method is called when the VFS needs to write an
+	inode to disc.  The second parameter indicates whether the write
+	should be synchronous or not, not all filesystems check this flag.
+
+``drop_inode``: called when the last access to the inode is dropped,
+	with the inode->i_lock spinlock held.
+
+	This method should be either NULL (normal UNIX filesystem
+	semantics) or "generic_delete_inode" (for filesystems that do not
+	want to cache inodes - causing "delete_inode" to always be
+	called regardless of the value of i_nlink)
+
+	The "generic_delete_inode()" behavior is equivalent to the
+	old practice of using "force_delete" in the put_inode() case,
+	but does not have the races that the "force_delete()" approach
+	had. 
+
+``delete_inode``: called when the VFS wants to delete an inode
+
+``put_super``: called when the VFS wishes to free the superblock
+	(i.e. unmount).  This is called with the superblock lock held
+
+``sync_fs``: called when VFS is writing out all dirty data associated with
+	a superblock.  The second parameter indicates whether the method
+	should wait until the write out has been completed.  Optional.
+
+``freeze_fs``: called when VFS is locking a filesystem and
+	forcing it into a consistent state.  This method is currently
+	used by the Logical Volume Manager (LVM).
+
+``unfreeze_fs``: called when VFS is unlocking a filesystem and making it writable
+	again.
+
+``statfs``: called when the VFS needs to get filesystem statistics.
+
+``remount_fs``: called when the filesystem is remounted.  This is called
+	with the kernel lock held
+
+``clear_inode``: called then the VFS clears the inode.  Optional
+
+``umount_begin``: called when the VFS is unmounting a filesystem.
+
+``show_options``: called by the VFS to show mount options for
+	/proc/<pid>/mounts.  (see "Mount Options" section)
+
+``quota_read``: called by the VFS to read from filesystem quota file.
+
+``quota_write``: called by the VFS to write to filesystem quota file.
+
+``nr_cached_objects``: called by the sb cache shrinking function for the
+	filesystem to return the number of freeable cached objects it contains.
+	Optional.
+
+``free_cache_objects``: called by the sb cache shrinking function for the
+	filesystem to scan the number of objects indicated to try to free them.
+	Optional, but any filesystem implementing this method needs to also
+	implement ->nr_cached_objects for it to be called correctly.
+
+	We can't do anything with any errors that the filesystem might
+	encountered, hence the void return type.  This will never be called if
+	the VM is trying to reclaim under GFP_NOFS conditions, hence this
+	method does not need to handle that situation itself.
+
+	Implementations must include conditional reschedule calls inside any
+	scanning loop that is done.  This allows the VFS to determine
+	appropriate scan batch sizes without having to worry about whether
+	implementations will cause holdoff problems due to large scan batch
+	sizes.
+
+Whoever sets up the inode is responsible for filling in the "i_op"
+field.  This is a pointer to a "struct inode_operations" which describes
+the methods that can be performed on individual inodes.
+
+
+struct xattr_handlers
+---------------------
+
+On filesystems that support extended attributes (xattrs), the s_xattr
+superblock field points to a NULL-terminated array of xattr handlers.
+Extended attributes are name:value pairs.
+
+``name``: Indicates that the handler matches attributes with the specified name
+	(such as "system.posix_acl_access"); the prefix field must be NULL.
+
+``prefix``: Indicates that the handler matches all attributes with the specified
+	name prefix (such as "user."); the name field must be NULL.
+
+``list``: Determine if attributes matching this xattr handler should be listed
+	for a particular dentry.  Used by some listxattr implementations like
+	generic_listxattr.
+
+``get``: Called by the VFS to get the value of a particular extended attribute.
+	This method is called by the getxattr(2) system call.
+
+``set``: Called by the VFS to set the value of a particular extended attribute.
+	When the new value is NULL, called to remove a particular extended
+	attribute.  This method is called by the the setxattr(2) and
+	removexattr(2) system calls.
+
+When none of the xattr handlers of a filesystem match the specified
+attribute name or when a filesystem doesn't support extended attributes,
+the various ``*xattr(2)`` system calls return -EOPNOTSUPP.
+
+
+The Inode Object
+================
+
+An inode object represents an object within the filesystem.
+
+
+struct inode_operations
+-----------------------
+
+This describes how the VFS can manipulate an inode in your filesystem.
+As of kernel 2.6.22, the following members are defined:
+
+.. code-block:: c
+
+	struct inode_operations {
+		int (*create) (struct inode *,struct dentry *, umode_t, bool);
+		struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
+		int (*link) (struct dentry *,struct inode *,struct dentry *);
+		int (*unlink) (struct inode *,struct dentry *);
+		int (*symlink) (struct inode *,struct dentry *,const char *);
+		int (*mkdir) (struct inode *,struct dentry *,umode_t);
+		int (*rmdir) (struct inode *,struct dentry *);
+		int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
+		int (*rename) (struct inode *, struct dentry *,
+			       struct inode *, struct dentry *, unsigned int);
+		int (*readlink) (struct dentry *, char __user *,int);
+		const char *(*get_link) (struct dentry *, struct inode *,
+					 struct delayed_call *);
+		int (*permission) (struct inode *, int);
+		int (*get_acl)(struct inode *, int);
+		int (*setattr) (struct dentry *, struct iattr *);
+		int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
+		ssize_t (*listxattr) (struct dentry *, char *, size_t);
+		void (*update_time)(struct inode *, struct timespec *, int);
+		int (*atomic_open)(struct inode *, struct dentry *, struct file *,
+				   unsigned open_flag, umode_t create_mode);
+		int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+	};
+
+Again, all methods are called without any locks being held, unless
+otherwise noted.
+
+``create``: called by the open(2) and creat(2) system calls.  Only
+	required if you want to support regular files.  The dentry you
+	get should not have an inode (i.e. it should be a negative
+	dentry).  Here you will probably call d_instantiate() with the
+	dentry and the newly created inode
+
+``lookup``: called when the VFS needs to look up an inode in a parent
+	directory.  The name to look for is found in the dentry.  This
+	method must call d_add() to insert the found inode into the
+	dentry.  The "i_count" field in the inode structure should be
+	incremented.  If the named inode does not exist a NULL inode
+	should be inserted into the dentry (this is called a negative
+	dentry).  Returning an error code from this routine must only
+	be done on a real error, otherwise creating inodes with system
+	calls like create(2), mknod(2), mkdir(2) and so on will fail.
+	If you wish to overload the dentry methods then you should
+	initialise the "d_dop" field in the dentry; this is a pointer
+	to a struct "dentry_operations".
+	This method is called with the directory inode semaphore held
+
+``link``: called by the link(2) system call.  Only required if you want
+	to support hard links.  You will probably need to call
+	d_instantiate() just as you would in the create() method
+
+``unlink``: called by the unlink(2) system call.  Only required if you
+	want to support deleting inodes
+
+``symlink``: called by the symlink(2) system call.  Only required if you
+	want to support symlinks.  You will probably need to call
+	d_instantiate() just as you would in the create() method
+
+``mkdir``: called by the mkdir(2) system call.  Only required if you want
+	to support creating subdirectories.  You will probably need to
+	call d_instantiate() just as you would in the create() method
+
+``rmdir``: called by the rmdir(2) system call.  Only required if you want
+	to support deleting subdirectories
+
+``mknod``: called by the mknod(2) system call to create a device (char,
+	block) inode or a named pipe (FIFO) or socket.  Only required
+	if you want to support creating these types of inodes.  You
+	will probably need to call d_instantiate() just as you would
+	in the create() method
+
+``rename``: called by the rename(2) system call to rename the object to
+	have the parent and name given by the second inode and dentry.
+
+	The filesystem must return -EINVAL for any unsupported or
+	unknown	flags.  Currently the following flags are implemented:
+	(1) RENAME_NOREPLACE: this flag indicates that if the target
+	of the rename exists the rename should fail with -EEXIST
+	instead of replacing the target.  The VFS already checks for
+	existence, so for local filesystems the RENAME_NOREPLACE
+	implementation is equivalent to plain rename.
+	(2) RENAME_EXCHANGE: exchange source and target.  Both must
+	exist; this is checked by the VFS.  Unlike plain rename,
+	source and target may be of different type.
+
+``get_link``: called by the VFS to follow a symbolic link to the
+	inode it points to.  Only required if you want to support
+	symbolic links.  This method returns the symlink body
+	to traverse (and possibly resets the current position with
+	nd_jump_link()).  If the body won't go away until the inode
+	is gone, nothing else is needed; if it needs to be otherwise
+	pinned, arrange for its release by having get_link(..., ..., done)
+	do set_delayed_call(done, destructor, argument).
+	In that case destructor(argument) will be called once VFS is
+	done with the body you've returned.
+	May be called in RCU mode; that is indicated by NULL dentry
+	argument.  If request can't be handled without leaving RCU mode,
+	have it return ERR_PTR(-ECHILD).
+
+
+	If the filesystem stores the symlink target in ->i_link, the
+	VFS may use it directly without calling ->get_link(); however,
+	->get_link() must still be provided.  ->i_link must not be
+	freed until after an RCU grace period.  Writing to ->i_link
+	post-iget() time requires a 'release' memory barrier.
+
+``readlink``: this is now just an override for use by readlink(2) for the
+	cases when ->get_link uses nd_jump_link() or object is not in
+	fact a symlink.  Normally filesystems should only implement
+	->get_link for symlinks and readlink(2) will automatically use
+	that.
+
+``permission``: called by the VFS to check for access rights on a POSIX-like
+	filesystem.
+
+	May be called in rcu-walk mode (mask & MAY_NOT_BLOCK).  If in rcu-walk
+	mode, the filesystem must check the permission without blocking or
+	storing to the inode.
+
+	If a situation is encountered that rcu-walk cannot handle, return
+	-ECHILD and it will be called again in ref-walk mode.
+
+``setattr``: called by the VFS to set attributes for a file.  This method
+	is called by chmod(2) and related system calls.
+
+``getattr``: called by the VFS to get attributes of a file.  This method
+	is called by stat(2) and related system calls.
+
+``listxattr``: called by the VFS to list all extended attributes for a
+	given file.  This method is called by the listxattr(2) system call.
+
+``update_time``: called by the VFS to update a specific time or the i_version of
+	an inode.  If this is not defined the VFS will update the inode itself
+	and call mark_inode_dirty_sync.
+
+``atomic_open``: called on the last component of an open.  Using this optional
+	method the filesystem can look up, possibly create and open the file in
+	one atomic operation.  If it wants to leave actual opening to the
+	caller (e.g. if the file turned out to be a symlink, device, or just
+	something filesystem won't do atomic open for), it may signal this by
+	returning finish_no_open(file, dentry).  This method is only called if
+	the last component is negative or needs lookup.  Cached positive dentries
+	are still handled by f_op->open().  If the file was created,
+	FMODE_CREATED flag should be set in file->f_mode.  In case of O_EXCL
+	the method must only succeed if the file didn't exist and hence FMODE_CREATED
+	shall always be set on success.
+
+``tmpfile``: called in the end of O_TMPFILE open().  Optional, equivalent to
+	atomically creating, opening and unlinking a file in given directory.
+
+
+The Address Space Object
+========================
+
+The address space object is used to group and manage pages in the page
+cache.  It can be used to keep track of the pages in a file (or anything
+else) and also track the mapping of sections of the file into process
+address spaces.
+
+There are a number of distinct yet related services that an
+address-space can provide.  These include communicating memory pressure,
+page lookup by address, and keeping track of pages tagged as Dirty or
+Writeback.
+
+The first can be used independently to the others.  The VM can try to
+either write dirty pages in order to clean them, or release clean pages
+in order to reuse them.  To do this it can call the ->writepage method
+on dirty pages, and ->releasepage on clean pages with PagePrivate set.
+Clean pages without PagePrivate and with no external references will be
+released without notice being given to the address_space.
+
+To achieve this functionality, pages need to be placed on an LRU with
+lru_cache_add and mark_page_active needs to be called whenever the page
+is used.
+
+Pages are normally kept in a radix tree index by ->index.  This tree
+maintains information about the PG_Dirty and PG_Writeback status of each
+page, so that pages with either of these flags can be found quickly.
+
+The Dirty tag is primarily used by mpage_writepages - the default
+->writepages method.  It uses the tag to find dirty pages to call
+->writepage on.  If mpage_writepages is not used (i.e. the address
+provides its own ->writepages) , the PAGECACHE_TAG_DIRTY tag is almost
+unused.  write_inode_now and sync_inode do use it (through
+__sync_single_inode) to check if ->writepages has been successful in
+writing out the whole address_space.
+
+The Writeback tag is used by filemap*wait* and sync_page* functions, via
+filemap_fdatawait_range, to wait for all writeback to complete.
+
+An address_space handler may attach extra information to a page,
+typically using the 'private' field in the 'struct page'.  If such
+information is attached, the PG_Private flag should be set.  This will
+cause various VM routines to make extra calls into the address_space
+handler to deal with that data.
+
+An address space acts as an intermediate between storage and
+application.  Data is read into the address space a whole page at a
+time, and provided to the application either by copying of the page, or
+by memory-mapping the page.  Data is written into the address space by
+the application, and then written-back to storage typically in whole
+pages, however the address_space has finer control of write sizes.
+
+The read process essentially only requires 'readpage'.  The write
+process is more complicated and uses write_begin/write_end or
+set_page_dirty to write data into the address_space, and writepage and
+writepages to writeback data to storage.
+
+Adding and removing pages to/from an address_space is protected by the
+inode's i_mutex.
+
+When data is written to a page, the PG_Dirty flag should be set.  It
+typically remains set until writepage asks for it to be written.  This
+should clear PG_Dirty and set PG_Writeback.  It can be actually written
+at any point after PG_Dirty is clear.  Once it is known to be safe,
+PG_Writeback is cleared.
+
+Writeback makes use of a writeback_control structure to direct the
+operations.  This gives the the writepage and writepages operations some
+information about the nature of and reason for the writeback request,
+and the constraints under which it is being done.  It is also used to
+return information back to the caller about the result of a writepage or
+writepages request.
+
+
+Handling errors during writeback
+--------------------------------
+
+Most applications that do buffered I/O will periodically call a file
+synchronization call (fsync, fdatasync, msync or sync_file_range) to
+ensure that data written has made it to the backing store.  When there
+is an error during writeback, they expect that error to be reported when
+a file sync request is made.  After an error has been reported on one
+request, subsequent requests on the same file descriptor should return
+0, unless further writeback errors have occurred since the previous file
+syncronization.
+
+Ideally, the kernel would report errors only on file descriptions on
+which writes were done that subsequently failed to be written back.  The
+generic pagecache infrastructure does not track the file descriptions
+that have dirtied each individual page however, so determining which
+file descriptors should get back an error is not possible.
+
+Instead, the generic writeback error tracking infrastructure in the
+kernel settles for reporting errors to fsync on all file descriptions
+that were open at the time that the error occurred.  In a situation with
+multiple writers, all of them will get back an error on a subsequent
+fsync, even if all of the writes done through that particular file
+descriptor succeeded (or even if there were no writes on that file
+descriptor at all).
+
+Filesystems that wish to use this infrastructure should call
+mapping_set_error to record the error in the address_space when it
+occurs.  Then, after writing back data from the pagecache in their
+file->fsync operation, they should call file_check_and_advance_wb_err to
+ensure that the struct file's error cursor has advanced to the correct
+point in the stream of errors emitted by the backing device(s).
+
+
+struct address_space_operations
+-------------------------------
+
+This describes how the VFS can manipulate mapping of a file to page
+cache in your filesystem.  The following members are defined:
+
+.. code-block:: c
+
+	struct address_space_operations {
+		int (*writepage)(struct page *page, struct writeback_control *wbc);
+		int (*readpage)(struct file *, struct page *);
+		int (*writepages)(struct address_space *, struct writeback_control *);
+		int (*set_page_dirty)(struct page *page);
+		int (*readpages)(struct file *filp, struct address_space *mapping,
+				 struct list_head *pages, unsigned nr_pages);
+		int (*write_begin)(struct file *, struct address_space *mapping,
+				   loff_t pos, unsigned len, unsigned flags,
+				struct page **pagep, void **fsdata);
+		int (*write_end)(struct file *, struct address_space *mapping,
+				 loff_t pos, unsigned len, unsigned copied,
+				 struct page *page, void *fsdata);
+		sector_t (*bmap)(struct address_space *, sector_t);
+		void (*invalidatepage) (struct page *, unsigned int, unsigned int);
+		int (*releasepage) (struct page *, int);
+		void (*freepage)(struct page *);
+		ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
+		/* isolate a page for migration */
+		bool (*isolate_page) (struct page *, isolate_mode_t);
+		/* migrate the contents of a page to the specified target */
+		int (*migratepage) (struct page *, struct page *);
+		/* put migration-failed page back to right list */
+		void (*putback_page) (struct page *);
+		int (*launder_page) (struct page *);
+
+		int (*is_partially_uptodate) (struct page *, unsigned long,
+					      unsigned long);
+		void (*is_dirty_writeback) (struct page *, bool *, bool *);
+		int (*error_remove_page) (struct mapping *mapping, struct page *page);
+		int (*swap_activate)(struct file *);
+		int (*swap_deactivate)(struct file *);
+	};
+
+``writepage``: called by the VM to write a dirty page to backing store.
+      This may happen for data integrity reasons (i.e. 'sync'), or
+      to free up memory (flush).  The difference can be seen in
+      wbc->sync_mode.
+      The PG_Dirty flag has been cleared and PageLocked is true.
+      writepage should start writeout, should set PG_Writeback,
+      and should make sure the page is unlocked, either synchronously
+      or asynchronously when the write operation completes.
+
+      If wbc->sync_mode is WB_SYNC_NONE, ->writepage doesn't have to
+      try too hard if there are problems, and may choose to write out
+      other pages from the mapping if that is easier (e.g. due to
+      internal dependencies).  If it chooses not to start writeout, it
+      should return AOP_WRITEPAGE_ACTIVATE so that the VM will not keep
+      calling ->writepage on that page.
+
+      See the file "Locking" for more details.
+
+``readpage``: called by the VM to read a page from backing store.
+       The page will be Locked when readpage is called, and should be
+       unlocked and marked uptodate once the read completes.
+       If ->readpage discovers that it needs to unlock the page for
+       some reason, it can do so, and then return AOP_TRUNCATED_PAGE.
+       In this case, the page will be relocated, relocked and if
+       that all succeeds, ->readpage will be called again.
+
+``writepages``: called by the VM to write out pages associated with the
+	address_space object.  If wbc->sync_mode is WBC_SYNC_ALL, then
+	the writeback_control will specify a range of pages that must be
+	written out.  If it is WBC_SYNC_NONE, then a nr_to_write is given
+	and that many pages should be written if possible.
+	If no ->writepages is given, then mpage_writepages is used
+	instead.  This will choose pages from the address space that are
+	tagged as DIRTY and will pass them to ->writepage.
+
+``set_page_dirty``: called by the VM to set a page dirty.
+	This is particularly needed if an address space attaches
+	private data to a page, and that data needs to be updated when
+	a page is dirtied.  This is called, for example, when a memory
+	mapped page gets modified.
+	If defined, it should set the PageDirty flag, and the
+	PAGECACHE_TAG_DIRTY tag in the radix tree.
+
+``readpages``: called by the VM to read pages associated with the address_space
+	object.  This is essentially just a vector version of
+	readpage.  Instead of just one page, several pages are
+	requested.
+	readpages is only used for read-ahead, so read errors are
+	ignored.  If anything goes wrong, feel free to give up.
+
+``write_begin``:
+	Called by the generic buffered write code to ask the filesystem to
+	prepare to write len bytes at the given offset in the file.  The
+	address_space should check that the write will be able to complete,
+	by allocating space if necessary and doing any other internal
+	housekeeping.  If the write will update parts of any basic-blocks on
+	storage, then those blocks should be pre-read (if they haven't been
+	read already) so that the updated blocks can be written out properly.
+
+	The filesystem must return the locked pagecache page for the specified
+	offset, in ``*pagep``, for the caller to write into.
+
+	It must be able to cope with short writes (where the length passed to
+	write_begin is greater than the number of bytes copied into the page).
+
+	flags is a field for AOP_FLAG_xxx flags, described in
+	include/linux/fs.h.
+
+	A void * may be returned in fsdata, which then gets passed into
+	write_end.
+
+	Returns 0 on success; < 0 on failure (which is the error code), in
+	which case write_end is not called.
+
+``write_end``: After a successful write_begin, and data copy, write_end must
+	be called.  len is the original len passed to write_begin, and copied
+	is the amount that was able to be copied.
+
+	The filesystem must take care of unlocking the page and releasing it
+	refcount, and updating i_size.
+
+	Returns < 0 on failure, otherwise the number of bytes (<= 'copied')
+	that were able to be copied into pagecache.
+
+``bmap``: called by the VFS to map a logical block offset within object to
+	physical block number.  This method is used by the FIBMAP
+	ioctl and for working with swap-files.  To be able to swap to
+	a file, the file must have a stable mapping to a block
+	device.  The swap system does not go through the filesystem
+	but instead uses bmap to find out where the blocks in the file
+	are and uses those addresses directly.
+
+``invalidatepage``: If a page has PagePrivate set, then invalidatepage
+	will be called when part or all of the page is to be removed
+	from the address space.  This generally corresponds to either a
+	truncation, punch hole  or a complete invalidation of the address
+	space (in the latter case 'offset' will always be 0 and 'length'
+	will be PAGE_SIZE).  Any private data associated with the page
+	should be updated to reflect this truncation.  If offset is 0 and
+	length is PAGE_SIZE, then the private data should be released,
+	because the page must be able to be completely discarded.  This may
+	be done by calling the ->releasepage function, but in this case the
+	release MUST succeed.
+
+``releasepage``: releasepage is called on PagePrivate pages to indicate
+	that the page should be freed if possible.  ->releasepage
+	should remove any private data from the page and clear the
+	PagePrivate flag.  If releasepage() fails for some reason, it must
+	indicate failure with a 0 return value.
+	releasepage() is used in two distinct though related cases.  The
+	first is when the VM finds a clean page with no active users and
+	wants to make it a free page.  If ->releasepage succeeds, the
+	page will be removed from the address_space and become free.
+
+	The second case is when a request has been made to invalidate
+	some or all pages in an address_space.  This can happen
+	through the fadvise(POSIX_FADV_DONTNEED) system call or by the
+	filesystem explicitly requesting it as nfs and 9fs do (when
+	they believe the cache may be out of date with storage) by
+	calling invalidate_inode_pages2().
+	If the filesystem makes such a call, and needs to be certain
+	that all pages are invalidated, then its releasepage will
+	need to ensure this.  Possibly it can clear the PageUptodate
+	bit if it cannot free private data yet.
+
+``freepage``: freepage is called once the page is no longer visible in
+	the page cache in order to allow the cleanup of any private
+	data.  Since it may be called by the memory reclaimer, it
+	should not assume that the original address_space mapping still
+	exists, and it should not block.
+
+``direct_IO``: called by the generic read/write routines to perform
+	direct_IO - that is IO requests which bypass the page cache
+	and transfer data directly between the storage and the
+	application's address space.
+
+``isolate_page``: Called by the VM when isolating a movable non-lru page.
+	If page is successfully isolated, VM marks the page as PG_isolated
+	via __SetPageIsolated.
+
+``migrate_page``:  This is used to compact the physical memory usage.
+	If the VM wants to relocate a page (maybe off a memory card
+	that is signalling imminent failure) it will pass a new page
+	and an old page to this function.  migrate_page should
+	transfer any private data across and update any references
+	that it has to the page.
+
+``putback_page``: Called by the VM when isolated page's migration fails.
+
+``launder_page``: Called before freeing a page - it writes back the dirty page.  To
+	prevent redirtying the page, it is kept locked during the whole
+	operation.
+
+``is_partially_uptodate``: Called by the VM when reading a file through the
+	pagecache when the underlying blocksize != pagesize.  If the required
+	block is up to date then the read can complete without needing the IO
+	to bring the whole page up to date.
+
+``is_dirty_writeback``: Called by the VM when attempting to reclaim a page.
+	The VM uses dirty and writeback information to determine if it needs
+	to stall to allow flushers a chance to complete some IO.  Ordinarily
+	it can use PageDirty and PageWriteback but some filesystems have
+	more complex state (unstable pages in NFS prevent reclaim) or
+	do not set those flags due to locking problems.  This callback
+	allows a filesystem to indicate to the VM if a page should be
+	treated as dirty or writeback for the purposes of stalling.
+
+``error_remove_page``: normally set to generic_error_remove_page if truncation
+	is ok for this address space.  Used for memory failure handling.
+	Setting this implies you deal with pages going away under you,
+	unless you have them locked or reference counts increased.
+
+``swap_activate``: Called when swapon is used on a file to allocate
+	space if necessary and pin the block lookup information in
+	memory.  A return value of zero indicates success,
+	in which case this file can be used to back swapspace.
+
+``swap_deactivate``: Called during swapoff on files where swap_activate
+	was successful.
+
+
+The File Object
+===============
+
+A file object represents a file opened by a process.  This is also known
+as an "open file description" in POSIX parlance.
+
+
+struct file_operations
+----------------------
+
+This describes how the VFS can manipulate an open file.  As of kernel
+4.18, the following members are defined:
+
+.. code-block:: c
+
+	struct file_operations {
+		struct module *owner;
+		loff_t (*llseek) (struct file *, loff_t, int);
+		ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
+		ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
+		ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+		ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
+		int (*iopoll)(struct kiocb *kiocb, bool spin);
+		int (*iterate) (struct file *, struct dir_context *);
+		int (*iterate_shared) (struct file *, struct dir_context *);
+		__poll_t (*poll) (struct file *, struct poll_table_struct *);
+		long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
+		long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
+		int (*mmap) (struct file *, struct vm_area_struct *);
+		int (*open) (struct inode *, struct file *);
+		int (*flush) (struct file *, fl_owner_t id);
+		int (*release) (struct inode *, struct file *);
+		int (*fsync) (struct file *, loff_t, loff_t, int datasync);
+		int (*fasync) (int, struct file *, int);
+		int (*lock) (struct file *, int, struct file_lock *);
+		ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
+		unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+		int (*check_flags)(int);
+		int (*flock) (struct file *, int, struct file_lock *);
+		ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
+		ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
+		int (*setlease)(struct file *, long, struct file_lock **, void **);
+		long (*fallocate)(struct file *file, int mode, loff_t offset,
+				  loff_t len);
+		void (*show_fdinfo)(struct seq_file *m, struct file *f);
+	#ifndef CONFIG_MMU
+		unsigned (*mmap_capabilities)(struct file *);
+	#endif
+		ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
+		loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
+					   struct file *file_out, loff_t pos_out,
+					   loff_t len, unsigned int remap_flags);
+		int (*fadvise)(struct file *, loff_t, loff_t, int);
+	};
+
+Again, all methods are called without any locks being held, unless
+otherwise noted.
+
+``llseek``: called when the VFS needs to move the file position index
+
+``read``: called by read(2) and related system calls
+
+``read_iter``: possibly asynchronous read with iov_iter as destination
+
+``write``: called by write(2) and related system calls
+
+``write_iter``: possibly asynchronous write with iov_iter as source
+
+``iopoll``: called when aio wants to poll for completions on HIPRI iocbs
+
+``iterate``: called when the VFS needs to read the directory contents
+
+``iterate_shared``: called when the VFS needs to read the directory contents
+	when filesystem supports concurrent dir iterators
+
+``poll``: called by the VFS when a process wants to check if there is
+	activity on this file and (optionally) go to sleep until there
+	is activity.  Called by the select(2) and poll(2) system calls
+
+``unlocked_ioctl``: called by the ioctl(2) system call.
+
+``compat_ioctl``: called by the ioctl(2) system call when 32 bit system calls
+	 are used on 64 bit kernels.
+
+``mmap``: called by the mmap(2) system call
+
+``open``: called by the VFS when an inode should be opened.  When the VFS
+	opens a file, it creates a new "struct file".  It then calls the
+	open method for the newly allocated file structure.  You might
+	think that the open method really belongs in
+	"struct inode_operations", and you may be right.  I think it's
+	done the way it is because it makes filesystems simpler to
+	implement.  The open() method is a good place to initialize the
+	"private_data" member in the file structure if you want to point
+	to a device structure
+
+``flush``: called by the close(2) system call to flush a file
+
+``release``: called when the last reference to an open file is closed
+
+``fsync``: called by the fsync(2) system call.  Also see the section above
+	 entitled "Handling errors during writeback".
+
+``fasync``: called by the fcntl(2) system call when asynchronous
+	(non-blocking) mode is enabled for a file
+
+``lock``: called by the fcntl(2) system call for F_GETLK, F_SETLK, and F_SETLKW
+	commands
+
+``get_unmapped_area``: called by the mmap(2) system call
+
+``check_flags``: called by the fcntl(2) system call for F_SETFL command
+
+``flock``: called by the flock(2) system call
+
+``splice_write``: called by the VFS to splice data from a pipe to a file.  This
+		method is used by the splice(2) system call
+
+``splice_read``: called by the VFS to splice data from file to a pipe.  This
+	       method is used by the splice(2) system call
+
+``setlease``: called by the VFS to set or release a file lock lease.  setlease
+	    implementations should call generic_setlease to record or remove
+	    the lease in the inode after setting it.
+
+``fallocate``: called by the VFS to preallocate blocks or punch a hole.
+
+``copy_file_range``: called by the copy_file_range(2) system call.
+
+``remap_file_range``: called by the ioctl(2) system call for FICLONERANGE and
+	FICLONE and FIDEDUPERANGE commands to remap file ranges.  An
+	implementation should remap len bytes at pos_in of the source file into
+	the dest file at pos_out.  Implementations must handle callers passing
+	in len == 0; this means "remap to the end of the source file".  The
+	return value should the number of bytes remapped, or the usual
+	negative error code if errors occurred before any bytes were remapped.
+	The remap_flags parameter accepts REMAP_FILE_* flags.  If
+	REMAP_FILE_DEDUP is set then the implementation must only remap if the
+	requested file ranges have identical contents.  If REMAP_CAN_SHORTEN is
+	set, the caller is ok with the implementation shortening the request
+	length to satisfy alignment or EOF requirements (or any other reason).
+
+``fadvise``: possibly called by the fadvise64() system call.
+
+Note that the file operations are implemented by the specific
+filesystem in which the inode resides.  When opening a device node
+(character or block special) most filesystems will call special
+support routines in the VFS which will locate the required device
+driver information.  These support routines replace the filesystem file
+operations with those for the device driver, and then proceed to call
+the new open() method for the file.  This is how opening a device file
+in the filesystem eventually ends up calling the device driver open()
+method.
+
+
+Directory Entry Cache (dcache)
+==============================
+
+
+struct dentry_operations
+------------------------
+
+This describes how a filesystem can overload the standard dentry
+operations.  Dentries and the dcache are the domain of the VFS and the
+individual filesystem implementations.  Device drivers have no business
+here.  These methods may be set to NULL, as they are either optional or
+the VFS uses a default.  As of kernel 2.6.22, the following members are
+defined:
+
+.. code-block:: c
+
+	struct dentry_operations {
+		int (*d_revalidate)(struct dentry *, unsigned int);
+		int (*d_weak_revalidate)(struct dentry *, unsigned int);
+		int (*d_hash)(const struct dentry *, struct qstr *);
+		int (*d_compare)(const struct dentry *,
+				 unsigned int, const char *, const struct qstr *);
+		int (*d_delete)(const struct dentry *);
+		int (*d_init)(struct dentry *);
+		void (*d_release)(struct dentry *);
+		void (*d_iput)(struct dentry *, struct inode *);
+		char *(*d_dname)(struct dentry *, char *, int);
+		struct vfsmount *(*d_automount)(struct path *);
+		int (*d_manage)(const struct path *, bool);
+		struct dentry *(*d_real)(struct dentry *, const struct inode *);
+	};
+
+``d_revalidate``: called when the VFS needs to revalidate a dentry.  This
+	is called whenever a name look-up finds a dentry in the
+	dcache.  Most local filesystems leave this as NULL, because all their
+	dentries in the dcache are valid.  Network filesystems are different
+	since things can change on the server without the client necessarily
+	being aware of it.
+
+	This function should return a positive value if the dentry is still
+	valid, and zero or a negative error code if it isn't.
+
+	d_revalidate may be called in rcu-walk mode (flags & LOOKUP_RCU).
+	If in rcu-walk mode, the filesystem must revalidate the dentry without
+	blocking or storing to the dentry, d_parent and d_inode should not be
+	used without care (because they can change and, in d_inode case, even
+	become NULL under us).
+
+	If a situation is encountered that rcu-walk cannot handle, return
+	-ECHILD and it will be called again in ref-walk mode.
+
+``_weak_revalidate``: called when the VFS needs to revalidate a "jumped" dentry.
+	This is called when a path-walk ends at dentry that was not acquired by
+	doing a lookup in the parent directory.  This includes "/", "." and "..",
+	as well as procfs-style symlinks and mountpoint traversal.
+
+	In this case, we are less concerned with whether the dentry is still
+	fully correct, but rather that the inode is still valid.  As with
+	d_revalidate, most local filesystems will set this to NULL since their
+	dcache entries are always valid.
+
+	This function has the same return code semantics as d_revalidate.
+
+	d_weak_revalidate is only called after leaving rcu-walk mode.
+
+``d_hash``: called when the VFS adds a dentry to the hash table.  The first
+	dentry passed to d_hash is the parent directory that the name is
+	to be hashed into.
+
+	Same locking and synchronisation rules as d_compare regarding
+	what is safe to dereference etc.
+
+``d_compare``: called to compare a dentry name with a given name.  The first
+	dentry is the parent of the dentry to be compared, the second is
+	the child dentry.  len and name string are properties of the dentry
+	to be compared.  qstr is the name to compare it with.
+
+	Must be constant and idempotent, and should not take locks if
+	possible, and should not or store into the dentry.
+	Should not dereference pointers outside the dentry without
+	lots of care (eg.  d_parent, d_inode, d_name should not be used).
+
+	However, our vfsmount is pinned, and RCU held, so the dentries and
+	inodes won't disappear, neither will our sb or filesystem module.
+	->d_sb may be used.
+
+	It is a tricky calling convention because it needs to be called under
+	"rcu-walk", ie. without any locks or references on things.
+
+``d_delete``: called when the last reference to a dentry is dropped and the
+	dcache is deciding whether or not to cache it.  Return 1 to delete
+	immediately, or 0 to cache the dentry.  Default is NULL which means to
+	always cache a reachable dentry.  d_delete must be constant and
+	idempotent.
+
+``d_init``: called when a dentry is allocated
+
+``d_release``: called when a dentry is really deallocated
+
+``d_iput``: called when a dentry loses its inode (just prior to its
+	being deallocated).  The default when this is NULL is that the
+	VFS calls iput().  If you define this method, you must call
+	iput() yourself
+
+``d_dname``: called when the pathname of a dentry should be generated.
+	Useful for some pseudo filesystems (sockfs, pipefs, ...) to delay
+	pathname generation.  (Instead of doing it when dentry is created,
+	it's done only when the path is needed.).  Real filesystems probably
+	dont want to use it, because their dentries are present in global
+	dcache hash, so their hash should be an invariant.  As no lock is
+	held, d_dname() should not try to modify the dentry itself, unless
+	appropriate SMP safety is used.  CAUTION : d_path() logic is quite
+	tricky.  The correct way to return for example "Hello" is to put it
+	at the end of the buffer, and returns a pointer to the first char.
+	dynamic_dname() helper function is provided to take care of this.
+
+	Example :
+
+.. code-block:: c
+
+	static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen)
+	{
+		return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
+				dentry->d_inode->i_ino);
+	}
+
+``d_automount``: called when an automount dentry is to be traversed (optional).
+	This should create a new VFS mount record and return the record to the
+	caller.  The caller is supplied with a path parameter giving the
+	automount directory to describe the automount target and the parent
+	VFS mount record to provide inheritable mount parameters.  NULL should
+	be returned if someone else managed to make the automount first.  If
+	the vfsmount creation failed, then an error code should be returned.
+	If -EISDIR is returned, then the directory will be treated as an
+	ordinary directory and returned to pathwalk to continue walking.
+
+	If a vfsmount is returned, the caller will attempt to mount it on the
+	mountpoint and will remove the vfsmount from its expiration list in
+	the case of failure.  The vfsmount should be returned with 2 refs on
+	it to prevent automatic expiration - the caller will clean up the
+	additional ref.
+
+	This function is only used if DCACHE_NEED_AUTOMOUNT is set on the
+	dentry.  This is set by __d_instantiate() if S_AUTOMOUNT is set on the
+	inode being added.
+
+``d_manage``: called to allow the filesystem to manage the transition from a
+	dentry (optional).  This allows autofs, for example, to hold up clients
+	waiting to explore behind a 'mountpoint' while letting the daemon go
+	past and construct the subtree there.  0 should be returned to let the
+	calling process continue.  -EISDIR can be returned to tell pathwalk to
+	use this directory as an ordinary directory and to ignore anything
+	mounted on it and not to check the automount flag.  Any other error
+	code will abort pathwalk completely.
+
+	If the 'rcu_walk' parameter is true, then the caller is doing a
+	pathwalk in RCU-walk mode.  Sleeping is not permitted in this mode,
+	and the caller can be asked to leave it and call again by returning
+	-ECHILD.  -EISDIR may also be returned to tell pathwalk to
+	ignore d_automount or any mounts.
+
+	This function is only used if DCACHE_MANAGE_TRANSIT is set on the
+	dentry being transited from.
+
+``d_real``: overlay/union type filesystems implement this method to return one of
+	the underlying dentries hidden by the overlay.  It is used in two
+	different modes:
+
+	Called from file_dentry() it returns the real dentry matching the inode
+	argument.  The real dentry may be from a lower layer already copied up,
+	but still referenced from the file.  This mode is selected with a
+	non-NULL inode argument.
+
+	With NULL inode the topmost real underlying dentry is returned.
+
+Each dentry has a pointer to its parent dentry, as well as a hash list
+of child dentries.  Child dentries are basically like files in a
+directory.
+
+
+Directory Entry Cache API
+--------------------------
+
+There are a number of functions defined which permit a filesystem to
+manipulate dentries:
+
+``dget``: open a new handle for an existing dentry (this just increments
+	the usage count)
+
+``dput``: close a handle for a dentry (decrements the usage count).  If
+	the usage count drops to 0, and the dentry is still in its
+	parent's hash, the "d_delete" method is called to check whether
+	it should be cached.  If it should not be cached, or if the dentry
+	is not hashed, it is deleted.  Otherwise cached dentries are put
+	into an LRU list to be reclaimed on memory shortage.
+
+``d_drop``: this unhashes a dentry from its parents hash list.  A
+	subsequent call to dput() will deallocate the dentry if its
+	usage count drops to 0
+
+``d_delete``: delete a dentry.  If there are no other open references to
+	the dentry then the dentry is turned into a negative dentry
+	(the d_iput() method is called).  If there are other
+	references, then d_drop() is called instead
+
+``d_add``: add a dentry to its parents hash list and then calls
+	d_instantiate()
+
+``d_instantiate``: add a dentry to the alias hash list for the inode and
+	updates the "d_inode" member.  The "i_count" member in the
+	inode structure should be set/incremented.  If the inode
+	pointer is NULL, the dentry is called a "negative
+	dentry".  This function is commonly called when an inode is
+	created for an existing negative dentry
+
+``d_lookup``: look up a dentry given its parent and path name component
+	It looks up the child of that given name from the dcache
+	hash table.  If it is found, the reference count is incremented
+	and the dentry is returned.  The caller must use dput()
+	to free the dentry when it finishes using it.
+
+
+Mount Options
+=============
+
+
+Parsing options
+---------------
+
+On mount and remount the filesystem is passed a string containing a
+comma separated list of mount options.  The options can have either of
+these forms:
+
+  option
+  option=value
+
+The <linux/parser.h> header defines an API that helps parse these
+options.  There are plenty of examples on how to use it in existing
+filesystems.
+
+
+Showing options
+---------------
+
+If a filesystem accepts mount options, it must define show_options() to
+show all the currently active options.  The rules are:
+
+  - options MUST be shown which are not default or their values differ
+    from the default
+
+  - options MAY be shown which are enabled by default or have their
+    default value
+
+Options used only internally between a mount helper and the kernel (such
+as file descriptors), or which only have an effect during the mounting
+(such as ones controlling the creation of a journal) are exempt from the
+above rules.
+
+The underlying reason for the above rules is to make sure, that a mount
+can be accurately replicated (e.g. umounting and mounting again) based
+on the information found in /proc/mounts.
+
+
+Resources
+=========
+
+(Note some of these resources are not up-to-date with the latest kernel
+ version.)
+
+Creating Linux virtual filesystems. 2002
+    <http://lwn.net/Articles/13325/>
+
+The Linux Virtual File-system Layer by Neil Brown. 1999
+    <http://www.cse.unsw.edu.au/~neilb/oss/linux-commentary/vfs.html>
+
+A tour of the Linux VFS by Michael K. Johnson. 1996
+    <http://www.tldp.org/LDP/khg/HyperNews/get/fs/vfstour.html>
+
+A small trail through the Linux kernel by Andries Brouwer. 2001
+    <http://www.win.tue.nl/~aeb/linux/vfs/trail.html>
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
deleted file mode 100644
index 4f4f4931bfa0..000000000000
--- a/Documentation/filesystems/vfs.txt
+++ /dev/null
@@ -1,1274 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-=========================================
-Overview of the Linux Virtual File System
-=========================================
-
-Original author: Richard Gooch <rgooch@atnf.csiro.au>
-
-- Copyright (C) 1999 Richard Gooch
-- Copyright (C) 2005 Pekka Enberg
-
-
-Introduction
-============
-
-The Virtual File System (also known as the Virtual Filesystem Switch) is
-the software layer in the kernel that provides the filesystem interface
-to userspace programs.  It also provides an abstraction within the
-kernel which allows different filesystem implementations to coexist.
-
-VFS system calls open(2), stat(2), read(2), write(2), chmod(2) and so on
-are called from a process context.  Filesystem locking is described in
-the document Documentation/filesystems/Locking.
-
-
-Directory Entry Cache (dcache)
-------------------------------
-
-The VFS implements the open(2), stat(2), chmod(2), and similar system
-calls.  The pathname argument that is passed to them is used by the VFS
-to search through the directory entry cache (also known as the dentry
-cache or dcache).  This provides a very fast look-up mechanism to
-translate a pathname (filename) into a specific dentry.  Dentries live
-in RAM and are never saved to disc: they exist only for performance.
-
-The dentry cache is meant to be a view into your entire filespace.  As
-most computers cannot fit all dentries in the RAM at the same time, some
-bits of the cache are missing.  In order to resolve your pathname into a
-dentry, the VFS may have to resort to creating dentries along the way,
-and then loading the inode.  This is done by looking up the inode.
-
-
-The Inode Object
-----------------
-
-An individual dentry usually has a pointer to an inode.  Inodes are
-filesystem objects such as regular files, directories, FIFOs and other
-beasts.  They live either on the disc (for block device filesystems) or
-in the memory (for pseudo filesystems).  Inodes that live on the disc
-are copied into the memory when required and changes to the inode are
-written back to disc.  A single inode can be pointed to by multiple
-dentries (hard links, for example, do this).
-
-To look up an inode requires that the VFS calls the lookup() method of
-the parent directory inode.  This method is installed by the specific
-filesystem implementation that the inode lives in.  Once the VFS has the
-required dentry (and hence the inode), we can do all those boring things
-like open(2) the file, or stat(2) it to peek at the inode data.  The
-stat(2) operation is fairly simple: once the VFS has the dentry, it
-peeks at the inode data and passes some of it back to userspace.
-
-
-The File Object
----------------
-
-Opening a file requires another operation: allocation of a file
-structure (this is the kernel-side implementation of file descriptors).
-The freshly allocated file structure is initialized with a pointer to
-the dentry and a set of file operation member functions.  These are
-taken from the inode data.  The open() file method is then called so the
-specific filesystem implementation can do its work.  You can see that
-this is another switch performed by the VFS.  The file structure is
-placed into the file descriptor table for the process.
-
-Reading, writing and closing files (and other assorted VFS operations)
-is done by using the userspace file descriptor to grab the appropriate
-file structure, and then calling the required file structure method to
-do whatever is required.  For as long as the file is open, it keeps the
-dentry in use, which in turn means that the VFS inode is still in use.
-
-
-Registering and Mounting a Filesystem
-=====================================
-
-To register and unregister a filesystem, use the following API
-functions:
-
-   #include <linux/fs.h>
-
-   extern int register_filesystem(struct file_system_type *);
-   extern int unregister_filesystem(struct file_system_type *);
-
-The passed struct file_system_type describes your filesystem.  When a
-request is made to mount a filesystem onto a directory in your
-namespace, the VFS will call the appropriate mount() method for the
-specific filesystem.  New vfsmount referring to the tree returned by
-->mount() will be attached to the mountpoint, so that when pathname
-resolution reaches the mountpoint it will jump into the root of that
-vfsmount.
-
-You can see all filesystems that are registered to the kernel in the
-file /proc/filesystems.
-
-
-struct file_system_type
------------------------
-
-This describes the filesystem.  As of kernel 2.6.39, the following
-members are defined:
-
-struct file_system_type {
-	const char *name;
-	int fs_flags;
-	struct dentry *(*mount) (struct file_system_type *, int,
-		       const char *, void *);
-	void (*kill_sb) (struct super_block *);
-	struct module *owner;
-	struct file_system_type * next;
-	struct list_head fs_supers;
-	struct lock_class_key s_lock_key;
-	struct lock_class_key s_umount_key;
-};
-
-  name: the name of the filesystem type, such as "ext2", "iso9660",
-	"msdos" and so on
-
-  fs_flags: various flags (i.e. FS_REQUIRES_DEV, FS_NO_DCACHE, etc.)
-
-  mount: the method to call when a new instance of this
-	filesystem should be mounted
-
-  kill_sb: the method to call when an instance of this filesystem
-	should be shut down
-
-  owner: for internal VFS use: you should initialize this to THIS_MODULE in
-	most cases.
-
-  next: for internal VFS use: you should initialize this to NULL
-
-  s_lock_key, s_umount_key: lockdep-specific
-
-The mount() method has the following arguments:
-
-  struct file_system_type *fs_type: describes the filesystem, partly initialized
-	by the specific filesystem code
-
-  int flags: mount flags
-
-  const char *dev_name: the device name we are mounting.
-
-  void *data: arbitrary mount options, usually comes as an ASCII
-	string (see "Mount Options" section)
-
-The mount() method must return the root dentry of the tree requested by
-caller.  An active reference to its superblock must be grabbed and the
-superblock must be locked.  On failure it should return ERR_PTR(error).
-
-The arguments match those of mount(2) and their interpretation depends
-on filesystem type.  E.g. for block filesystems, dev_name is interpreted
-as block device name, that device is opened and if it contains a
-suitable filesystem image the method creates and initializes struct
-super_block accordingly, returning its root dentry to caller.
-
-->mount() may choose to return a subtree of existing filesystem - it
-doesn't have to create a new one.  The main result from the caller's
-point of view is a reference to dentry at the root of (sub)tree to be
-attached; creation of new superblock is a common side effect.
-
-The most interesting member of the superblock structure that the mount()
-method fills in is the "s_op" field.  This is a pointer to a "struct
-super_operations" which describes the next level of the filesystem
-implementation.
-
-Usually, a filesystem uses one of the generic mount() implementations
-and provides a fill_super() callback instead.  The generic variants are:
-
-  mount_bdev: mount a filesystem residing on a block device
-
-  mount_nodev: mount a filesystem that is not backed by a device
-
-  mount_single: mount a filesystem which shares the instance between
-	all mounts
-
-A fill_super() callback implementation has the following arguments:
-
-  struct super_block *sb: the superblock structure.  The callback
-	must initialize this properly.
-
-  void *data: arbitrary mount options, usually comes as an ASCII
-	string (see "Mount Options" section)
-
-  int silent: whether or not to be silent on error
-
-
-The Superblock Object
-=====================
-
-A superblock object represents a mounted filesystem.
-
-
-struct super_operations
------------------------
-
-This describes how the VFS can manipulate the superblock of your
-filesystem.  As of kernel 2.6.22, the following members are defined:
-
-struct super_operations {
-	struct inode *(*alloc_inode)(struct super_block *sb);
-	void (*destroy_inode)(struct inode *);
-
-	void (*dirty_inode) (struct inode *, int flags);
-	int (*write_inode) (struct inode *, int);
-	void (*drop_inode) (struct inode *);
-	void (*delete_inode) (struct inode *);
-	void (*put_super) (struct super_block *);
-	int (*sync_fs)(struct super_block *sb, int wait);
-	int (*freeze_fs) (struct super_block *);
-	int (*unfreeze_fs) (struct super_block *);
-	int (*statfs) (struct dentry *, struct kstatfs *);
-	int (*remount_fs) (struct super_block *, int *, char *);
-	void (*clear_inode) (struct inode *);
-	void (*umount_begin) (struct super_block *);
-
-	int (*show_options)(struct seq_file *, struct dentry *);
-
-	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
-	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
-	int (*nr_cached_objects)(struct super_block *);
-	void (*free_cached_objects)(struct super_block *, int);
-};
-
-All methods are called without any locks being held, unless otherwise
-noted.  This means that most methods can block safely.  All methods are
-only called from a process context (i.e. not from an interrupt handler
-or bottom half).
-
-  alloc_inode: this method is called by alloc_inode() to allocate memory
-	for struct inode and initialize it.  If this function is not
-	defined, a simple 'struct inode' is allocated.  Normally
-	alloc_inode will be used to allocate a larger structure which
-	contains a 'struct inode' embedded within it.
-
-  destroy_inode: this method is called by destroy_inode() to release
-	resources allocated for struct inode.  It is only required if
-	->alloc_inode was defined and simply undoes anything done by
-	->alloc_inode.
-
-  dirty_inode: this method is called by the VFS to mark an inode dirty.
-
-  write_inode: this method is called when the VFS needs to write an
-	inode to disc.  The second parameter indicates whether the write
-	should be synchronous or not, not all filesystems check this flag.
-
-  drop_inode: called when the last access to the inode is dropped,
-	with the inode->i_lock spinlock held.
-
-	This method should be either NULL (normal UNIX filesystem
-	semantics) or "generic_delete_inode" (for filesystems that do not
-	want to cache inodes - causing "delete_inode" to always be
-	called regardless of the value of i_nlink)
-
-	The "generic_delete_inode()" behavior is equivalent to the
-	old practice of using "force_delete" in the put_inode() case,
-	but does not have the races that the "force_delete()" approach
-	had. 
-
-  delete_inode: called when the VFS wants to delete an inode
-
-  put_super: called when the VFS wishes to free the superblock
-	(i.e. unmount).  This is called with the superblock lock held
-
-  sync_fs: called when VFS is writing out all dirty data associated with
-	a superblock.  The second parameter indicates whether the method
-	should wait until the write out has been completed.  Optional.
-
-  freeze_fs: called when VFS is locking a filesystem and
-	forcing it into a consistent state.  This method is currently
-	used by the Logical Volume Manager (LVM).
-
-  unfreeze_fs: called when VFS is unlocking a filesystem and making it writable
-	again.
-
-  statfs: called when the VFS needs to get filesystem statistics.
-
-  remount_fs: called when the filesystem is remounted.  This is called
-	with the kernel lock held
-
-  clear_inode: called then the VFS clears the inode.  Optional
-
-  umount_begin: called when the VFS is unmounting a filesystem.
-
-  show_options: called by the VFS to show mount options for
-	/proc/<pid>/mounts.  (see "Mount Options" section)
-
-  quota_read: called by the VFS to read from filesystem quota file.
-
-  quota_write: called by the VFS to write to filesystem quota file.
-
-  nr_cached_objects: called by the sb cache shrinking function for the
-	filesystem to return the number of freeable cached objects it contains.
-	Optional.
-
-  free_cache_objects: called by the sb cache shrinking function for the
-	filesystem to scan the number of objects indicated to try to free them.
-	Optional, but any filesystem implementing this method needs to also
-	implement ->nr_cached_objects for it to be called correctly.
-
-	We can't do anything with any errors that the filesystem might
-	encountered, hence the void return type.  This will never be called if
-	the VM is trying to reclaim under GFP_NOFS conditions, hence this
-	method does not need to handle that situation itself.
-
-	Implementations must include conditional reschedule calls inside any
-	scanning loop that is done.  This allows the VFS to determine
-	appropriate scan batch sizes without having to worry about whether
-	implementations will cause holdoff problems due to large scan batch
-	sizes.
-
-Whoever sets up the inode is responsible for filling in the "i_op"
-field.  This is a pointer to a "struct inode_operations" which describes
-the methods that can be performed on individual inodes.
-
-
-struct xattr_handlers
----------------------
-
-On filesystems that support extended attributes (xattrs), the s_xattr
-superblock field points to a NULL-terminated array of xattr handlers.
-Extended attributes are name:value pairs.
-
-  name: Indicates that the handler matches attributes with the specified name
-	(such as "system.posix_acl_access"); the prefix field must be NULL.
-
-  prefix: Indicates that the handler matches all attributes with the specified
-	name prefix (such as "user."); the name field must be NULL.
-
-  list: Determine if attributes matching this xattr handler should be listed
-	for a particular dentry.  Used by some listxattr implementations like
-	generic_listxattr.
-
-  get: Called by the VFS to get the value of a particular extended attribute.
-	This method is called by the getxattr(2) system call.
-
-  set: Called by the VFS to set the value of a particular extended attribute.
-	When the new value is NULL, called to remove a particular extended
-	attribute.  This method is called by the the setxattr(2) and
-	removexattr(2) system calls.
-
-When none of the xattr handlers of a filesystem match the specified
-attribute name or when a filesystem doesn't support extended attributes,
-the various *xattr(2) system calls return -EOPNOTSUPP.
-
-
-The Inode Object
-================
-
-An inode object represents an object within the filesystem.
-
-
-struct inode_operations
------------------------
-
-This describes how the VFS can manipulate an inode in your filesystem.
-As of kernel 2.6.22, the following members are defined:
-
-struct inode_operations {
-	int (*create) (struct inode *,struct dentry *, umode_t, bool);
-	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
-	int (*link) (struct dentry *,struct inode *,struct dentry *);
-	int (*unlink) (struct inode *,struct dentry *);
-	int (*symlink) (struct inode *,struct dentry *,const char *);
-	int (*mkdir) (struct inode *,struct dentry *,umode_t);
-	int (*rmdir) (struct inode *,struct dentry *);
-	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
-	int (*rename) (struct inode *, struct dentry *,
-			struct inode *, struct dentry *, unsigned int);
-	int (*readlink) (struct dentry *, char __user *,int);
-	const char *(*get_link) (struct dentry *, struct inode *,
-				 struct delayed_call *);
-	int (*permission) (struct inode *, int);
-	int (*get_acl)(struct inode *, int);
-	int (*setattr) (struct dentry *, struct iattr *);
-	int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
-	ssize_t (*listxattr) (struct dentry *, char *, size_t);
-	void (*update_time)(struct inode *, struct timespec *, int);
-	int (*atomic_open)(struct inode *, struct dentry *, struct file *,
-			unsigned open_flag, umode_t create_mode);
-	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
-};
-
-Again, all methods are called without any locks being held, unless
-otherwise noted.
-
-  create: called by the open(2) and creat(2) system calls.  Only
-	required if you want to support regular files.  The dentry you
-	get should not have an inode (i.e. it should be a negative
-	dentry).  Here you will probably call d_instantiate() with the
-	dentry and the newly created inode
-
-  lookup: called when the VFS needs to look up an inode in a parent
-	directory.  The name to look for is found in the dentry.  This
-	method must call d_add() to insert the found inode into the
-	dentry.  The "i_count" field in the inode structure should be
-	incremented.  If the named inode does not exist a NULL inode
-	should be inserted into the dentry (this is called a negative
-	dentry).  Returning an error code from this routine must only
-	be done on a real error, otherwise creating inodes with system
-	calls like create(2), mknod(2), mkdir(2) and so on will fail.
-	If you wish to overload the dentry methods then you should
-	initialise the "d_dop" field in the dentry; this is a pointer
-	to a struct "dentry_operations".
-	This method is called with the directory inode semaphore held
-
-  link: called by the link(2) system call.  Only required if you want
-	to support hard links.  You will probably need to call
-	d_instantiate() just as you would in the create() method
-
-  unlink: called by the unlink(2) system call.  Only required if you
-	want to support deleting inodes
-
-  symlink: called by the symlink(2) system call.  Only required if you
-	want to support symlinks.  You will probably need to call
-	d_instantiate() just as you would in the create() method
-
-  mkdir: called by the mkdir(2) system call.  Only required if you want
-	to support creating subdirectories.  You will probably need to
-	call d_instantiate() just as you would in the create() method
-
-  rmdir: called by the rmdir(2) system call.  Only required if you want
-	to support deleting subdirectories
-
-  mknod: called by the mknod(2) system call to create a device (char,
-	block) inode or a named pipe (FIFO) or socket.  Only required
-	if you want to support creating these types of inodes.  You
-	will probably need to call d_instantiate() just as you would
-	in the create() method
-
-  rename: called by the rename(2) system call to rename the object to
-	have the parent and name given by the second inode and dentry.
-
-	The filesystem must return -EINVAL for any unsupported or
-	unknown	flags.  Currently the following flags are implemented:
-	(1) RENAME_NOREPLACE: this flag indicates that if the target
-	of the rename exists the rename should fail with -EEXIST
-	instead of replacing the target.  The VFS already checks for
-	existence, so for local filesystems the RENAME_NOREPLACE
-	implementation is equivalent to plain rename.
-	(2) RENAME_EXCHANGE: exchange source and target.  Both must
-	exist; this is checked by the VFS.  Unlike plain rename,
-	source and target may be of different type.
-
-  get_link: called by the VFS to follow a symbolic link to the
-	inode it points to.  Only required if you want to support
-	symbolic links.  This method returns the symlink body
-	to traverse (and possibly resets the current position with
-	nd_jump_link()).  If the body won't go away until the inode
-	is gone, nothing else is needed; if it needs to be otherwise
-	pinned, arrange for its release by having get_link(..., ..., done)
-	do set_delayed_call(done, destructor, argument).
-	In that case destructor(argument) will be called once VFS is
-	done with the body you've returned.
-	May be called in RCU mode; that is indicated by NULL dentry
-	argument.  If request can't be handled without leaving RCU mode,
-	have it return ERR_PTR(-ECHILD).
-
-	If the filesystem stores the symlink target in ->i_link, the
-	VFS may use it directly without calling ->get_link(); however,
-	->get_link() must still be provided.  ->i_link must not be
-	freed until after an RCU grace period.  Writing to ->i_link
-	post-iget() time requires a 'release' memory barrier.
-
-  readlink: this is now just an override for use by readlink(2) for the
-	cases when ->get_link uses nd_jump_link() or object is not in
-	fact a symlink.  Normally filesystems should only implement
-	->get_link for symlinks and readlink(2) will automatically use
-	that.
-
-  permission: called by the VFS to check for access rights on a POSIX-like
-	filesystem.
-
-	May be called in rcu-walk mode (mask & MAY_NOT_BLOCK).  If in rcu-walk
-	mode, the filesystem must check the permission without blocking or
-	storing to the inode.
-
-	If a situation is encountered that rcu-walk cannot handle, return
-	-ECHILD and it will be called again in ref-walk mode.
-
-  setattr: called by the VFS to set attributes for a file.  This method
-	is called by chmod(2) and related system calls.
-
-  getattr: called by the VFS to get attributes of a file.  This method
-	is called by stat(2) and related system calls.
-
-  listxattr: called by the VFS to list all extended attributes for a
-	given file.  This method is called by the listxattr(2) system call.
-
-  update_time: called by the VFS to update a specific time or the i_version of
-	an inode.  If this is not defined the VFS will update the inode itself
-	and call mark_inode_dirty_sync.
-
-  atomic_open: called on the last component of an open.  Using this optional
-	method the filesystem can look up, possibly create and open the file in
-	one atomic operation.  If it wants to leave actual opening to the
-	caller (e.g. if the file turned out to be a symlink, device, or just
-	something filesystem won't do atomic open for), it may signal this by
-	returning finish_no_open(file, dentry).  This method is only called if
-	the last component is negative or needs lookup.  Cached positive dentries
-	are still handled by f_op->open().  If the file was created,
-	FMODE_CREATED flag should be set in file->f_mode.  In case of O_EXCL
-	the method must only succeed if the file didn't exist and hence FMODE_CREATED
-	shall always be set on success.
-
-  tmpfile: called in the end of O_TMPFILE open().  Optional, equivalent to
-	atomically creating, opening and unlinking a file in given directory.
-
-
-The Address Space Object
-========================
-
-The address space object is used to group and manage pages in the page
-cache.  It can be used to keep track of the pages in a file (or anything
-else) and also track the mapping of sections of the file into process
-address spaces.
-
-There are a number of distinct yet related services that an
-address-space can provide.  These include communicating memory pressure,
-page lookup by address, and keeping track of pages tagged as Dirty or
-Writeback.
-
-The first can be used independently to the others.  The VM can try to
-either write dirty pages in order to clean them, or release clean pages
-in order to reuse them.  To do this it can call the ->writepage method
-on dirty pages, and ->releasepage on clean pages with PagePrivate set.
-Clean pages without PagePrivate and with no external references will be
-released without notice being given to the address_space.
-
-To achieve this functionality, pages need to be placed on an LRU with
-lru_cache_add and mark_page_active needs to be called whenever the page
-is used.
-
-Pages are normally kept in a radix tree index by ->index.  This tree
-maintains information about the PG_Dirty and PG_Writeback status of each
-page, so that pages with either of these flags can be found quickly.
-
-The Dirty tag is primarily used by mpage_writepages - the default
-->writepages method.  It uses the tag to find dirty pages to call
-->writepage on.  If mpage_writepages is not used (i.e. the address
-provides its own ->writepages) , the PAGECACHE_TAG_DIRTY tag is almost
-unused.  write_inode_now and sync_inode do use it (through
-__sync_single_inode) to check if ->writepages has been successful in
-writing out the whole address_space.
-
-The Writeback tag is used by filemap*wait* and sync_page* functions, via
-filemap_fdatawait_range, to wait for all writeback to complete.
-
-An address_space handler may attach extra information to a page,
-typically using the 'private' field in the 'struct page'.  If such
-information is attached, the PG_Private flag should be set.  This will
-cause various VM routines to make extra calls into the address_space
-handler to deal with that data.
-
-An address space acts as an intermediate between storage and
-application.  Data is read into the address space a whole page at a
-time, and provided to the application either by copying of the page, or
-by memory-mapping the page.  Data is written into the address space by
-the application, and then written-back to storage typically in whole
-pages, however the address_space has finer control of write sizes.
-
-The read process essentially only requires 'readpage'.  The write
-process is more complicated and uses write_begin/write_end or
-set_page_dirty to write data into the address_space, and writepage and
-writepages to writeback data to storage.
-
-Adding and removing pages to/from an address_space is protected by the
-inode's i_mutex.
-
-When data is written to a page, the PG_Dirty flag should be set.  It
-typically remains set until writepage asks for it to be written.  This
-should clear PG_Dirty and set PG_Writeback.  It can be actually written
-at any point after PG_Dirty is clear.  Once it is known to be safe,
-PG_Writeback is cleared.
-
-Writeback makes use of a writeback_control structure to direct the
-operations.  This gives the the writepage and writepages operations some
-information about the nature of and reason for the writeback request,
-and the constraints under which it is being done.  It is also used to
-return information back to the caller about the result of a writepage or
-writepages request.
-
-
-Handling errors during writeback
---------------------------------
-
-Most applications that do buffered I/O will periodically call a file
-synchronization call (fsync, fdatasync, msync or sync_file_range) to
-ensure that data written has made it to the backing store.  When there
-is an error during writeback, they expect that error to be reported when
-a file sync request is made.  After an error has been reported on one
-request, subsequent requests on the same file descriptor should return
-0, unless further writeback errors have occurred since the previous file
-syncronization.
-
-Ideally, the kernel would report errors only on file descriptions on
-which writes were done that subsequently failed to be written back.  The
-generic pagecache infrastructure does not track the file descriptions
-that have dirtied each individual page however, so determining which
-file descriptors should get back an error is not possible.
-
-Instead, the generic writeback error tracking infrastructure in the
-kernel settles for reporting errors to fsync on all file descriptions
-that were open at the time that the error occurred.  In a situation with
-multiple writers, all of them will get back an error on a subsequent
-fsync, even if all of the writes done through that particular file
-descriptor succeeded (or even if there were no writes on that file
-descriptor at all).
-
-Filesystems that wish to use this infrastructure should call
-mapping_set_error to record the error in the address_space when it
-occurs.  Then, after writing back data from the pagecache in their
-file->fsync operation, they should call file_check_and_advance_wb_err to
-ensure that the struct file's error cursor has advanced to the correct
-point in the stream of errors emitted by the backing device(s).
-
-
-struct address_space_operations
--------------------------------
-
-This describes how the VFS can manipulate mapping of a file to page
-cache in your filesystem.  The following members are defined:
-
-struct address_space_operations {
-	int (*writepage)(struct page *page, struct writeback_control *wbc);
-	int (*readpage)(struct file *, struct page *);
-	int (*writepages)(struct address_space *, struct writeback_control *);
-	int (*set_page_dirty)(struct page *page);
-	int (*readpages)(struct file *filp, struct address_space *mapping,
-			struct list_head *pages, unsigned nr_pages);
-	int (*write_begin)(struct file *, struct address_space *mapping,
-				loff_t pos, unsigned len, unsigned flags,
-				struct page **pagep, void **fsdata);
-	int (*write_end)(struct file *, struct address_space *mapping,
-				loff_t pos, unsigned len, unsigned copied,
-				struct page *page, void *fsdata);
-	sector_t (*bmap)(struct address_space *, sector_t);
-	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
-	int (*releasepage) (struct page *, int);
-	void (*freepage)(struct page *);
-	ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
-	/* isolate a page for migration */
-	bool (*isolate_page) (struct page *, isolate_mode_t);
-	/* migrate the contents of a page to the specified target */
-	int (*migratepage) (struct page *, struct page *);
-	/* put migration-failed page back to right list */
-	void (*putback_page) (struct page *);
-	int (*launder_page) (struct page *);
-
-	int (*is_partially_uptodate) (struct page *, unsigned long,
-					unsigned long);
-	void (*is_dirty_writeback) (struct page *, bool *, bool *);
-	int (*error_remove_page) (struct mapping *mapping, struct page *page);
-	int (*swap_activate)(struct file *);
-	int (*swap_deactivate)(struct file *);
-};
-
-  writepage: called by the VM to write a dirty page to backing store.
-      This may happen for data integrity reasons (i.e. 'sync'), or
-      to free up memory (flush).  The difference can be seen in
-      wbc->sync_mode.
-      The PG_Dirty flag has been cleared and PageLocked is true.
-      writepage should start writeout, should set PG_Writeback,
-      and should make sure the page is unlocked, either synchronously
-      or asynchronously when the write operation completes.
-
-      If wbc->sync_mode is WB_SYNC_NONE, ->writepage doesn't have to
-      try too hard if there are problems, and may choose to write out
-      other pages from the mapping if that is easier (e.g. due to
-      internal dependencies).  If it chooses not to start writeout, it
-      should return AOP_WRITEPAGE_ACTIVATE so that the VM will not keep
-      calling ->writepage on that page.
-
-      See the file "Locking" for more details.
-
-  readpage: called by the VM to read a page from backing store.
-       The page will be Locked when readpage is called, and should be
-       unlocked and marked uptodate once the read completes.
-       If ->readpage discovers that it needs to unlock the page for
-       some reason, it can do so, and then return AOP_TRUNCATED_PAGE.
-       In this case, the page will be relocated, relocked and if
-       that all succeeds, ->readpage will be called again.
-
-  writepages: called by the VM to write out pages associated with the
-	address_space object.  If wbc->sync_mode is WBC_SYNC_ALL, then
-	the writeback_control will specify a range of pages that must be
-	written out.  If it is WBC_SYNC_NONE, then a nr_to_write is given
-	and that many pages should be written if possible.
-	If no ->writepages is given, then mpage_writepages is used
-	instead.  This will choose pages from the address space that are
-	tagged as DIRTY and will pass them to ->writepage.
-
-  set_page_dirty: called by the VM to set a page dirty.
-	This is particularly needed if an address space attaches
-	private data to a page, and that data needs to be updated when
-	a page is dirtied.  This is called, for example, when a memory
-	mapped page gets modified.
-	If defined, it should set the PageDirty flag, and the
-	PAGECACHE_TAG_DIRTY tag in the radix tree.
-
-  readpages: called by the VM to read pages associated with the address_space
-	object.  This is essentially just a vector version of
-	readpage.  Instead of just one page, several pages are
-	requested.
-	readpages is only used for read-ahead, so read errors are
-	ignored.  If anything goes wrong, feel free to give up.
-
-  write_begin:
-	Called by the generic buffered write code to ask the filesystem to
-	prepare to write len bytes at the given offset in the file.  The
-	address_space should check that the write will be able to complete,
-	by allocating space if necessary and doing any other internal
-	housekeeping.  If the write will update parts of any basic-blocks on
-	storage, then those blocks should be pre-read (if they haven't been
-	read already) so that the updated blocks can be written out properly.
-
-	The filesystem must return the locked pagecache page for the specified
-	offset, in *pagep, for the caller to write into.
-
-	It must be able to cope with short writes (where the length passed to
-	write_begin is greater than the number of bytes copied into the page).
-
-	flags is a field for AOP_FLAG_xxx flags, described in
-	include/linux/fs.h.
-
-	A void * may be returned in fsdata, which then gets passed into
-	write_end.
-
-	Returns 0 on success; < 0 on failure (which is the error code), in
-	which case write_end is not called.
-
-  write_end: After a successful write_begin, and data copy, write_end must
-	be called.  len is the original len passed to write_begin, and copied
-	is the amount that was able to be copied.
-
-	The filesystem must take care of unlocking the page and releasing it
-	refcount, and updating i_size.
-
-	Returns < 0 on failure, otherwise the number of bytes (<= 'copied')
-	that were able to be copied into pagecache.
-
-  bmap: called by the VFS to map a logical block offset within object to
-	physical block number.  This method is used by the FIBMAP
-	ioctl and for working with swap-files.  To be able to swap to
-	a file, the file must have a stable mapping to a block
-	device.  The swap system does not go through the filesystem
-	but instead uses bmap to find out where the blocks in the file
-	are and uses those addresses directly.
-
-  invalidatepage: If a page has PagePrivate set, then invalidatepage
-	will be called when part or all of the page is to be removed
-	from the address space.  This generally corresponds to either a
-	truncation, punch hole  or a complete invalidation of the address
-	space (in the latter case 'offset' will always be 0 and 'length'
-	will be PAGE_SIZE).  Any private data associated with the page
-	should be updated to reflect this truncation.  If offset is 0 and
-	length is PAGE_SIZE, then the private data should be released,
-	because the page must be able to be completely discarded.  This may
-	be done by calling the ->releasepage function, but in this case the
-	release MUST succeed.
-
-  releasepage: releasepage is called on PagePrivate pages to indicate
-	that the page should be freed if possible.  ->releasepage
-	should remove any private data from the page and clear the
-	PagePrivate flag.  If releasepage() fails for some reason, it must
-	indicate failure with a 0 return value.
-	releasepage() is used in two distinct though related cases.  The
-	first is when the VM finds a clean page with no active users and
-	wants to make it a free page.  If ->releasepage succeeds, the
-	page will be removed from the address_space and become free.
-
-	The second case is when a request has been made to invalidate
-	some or all pages in an address_space.  This can happen
-	through the fadvise(POSIX_FADV_DONTNEED) system call or by the
-	filesystem explicitly requesting it as nfs and 9fs do (when
-	they believe the cache may be out of date with storage) by
-	calling invalidate_inode_pages2().
-	If the filesystem makes such a call, and needs to be certain
-	that all pages are invalidated, then its releasepage will
-	need to ensure this.  Possibly it can clear the PageUptodate
-	bit if it cannot free private data yet.
-
-  freepage: freepage is called once the page is no longer visible in
-	the page cache in order to allow the cleanup of any private
-	data.  Since it may be called by the memory reclaimer, it
-	should not assume that the original address_space mapping still
-	exists, and it should not block.
-
-  direct_IO: called by the generic read/write routines to perform
-	direct_IO - that is IO requests which bypass the page cache
-	and transfer data directly between the storage and the
-	application's address space.
-
-  isolate_page: Called by the VM when isolating a movable non-lru page.
-	If page is successfully isolated, VM marks the page as PG_isolated
-	via __SetPageIsolated.
-
-  migrate_page:  This is used to compact the physical memory usage.
-	If the VM wants to relocate a page (maybe off a memory card
-	that is signalling imminent failure) it will pass a new page
-	and an old page to this function.  migrate_page should
-	transfer any private data across and update any references
-	that it has to the page.
-
-  putback_page: Called by the VM when isolated page's migration fails.
-
-  launder_page: Called before freeing a page - it writes back the dirty page.  To
-	prevent redirtying the page, it is kept locked during the whole
-	operation.
-
-  is_partially_uptodate: Called by the VM when reading a file through the
-	pagecache when the underlying blocksize != pagesize.  If the required
-	block is up to date then the read can complete without needing the IO
-	to bring the whole page up to date.
-
-  is_dirty_writeback: Called by the VM when attempting to reclaim a page.
-	The VM uses dirty and writeback information to determine if it needs
-	to stall to allow flushers a chance to complete some IO.  Ordinarily
-	it can use PageDirty and PageWriteback but some filesystems have
-	more complex state (unstable pages in NFS prevent reclaim) or
-	do not set those flags due to locking problems.  This callback
-	allows a filesystem to indicate to the VM if a page should be
-	treated as dirty or writeback for the purposes of stalling.
-
-  error_remove_page: normally set to generic_error_remove_page if truncation
-	is ok for this address space.  Used for memory failure handling.
-	Setting this implies you deal with pages going away under you,
-	unless you have them locked or reference counts increased.
-
-  swap_activate: Called when swapon is used on a file to allocate
-	space if necessary and pin the block lookup information in
-	memory.  A return value of zero indicates success,
-	in which case this file can be used to back swapspace.
-
-  swap_deactivate: Called during swapoff on files where swap_activate
-	was successful.
-
-
-The File Object
-===============
-
-A file object represents a file opened by a process.  This is also known
-as an "open file description" in POSIX parlance.
-
-
-struct file_operations
-----------------------
-
-This describes how the VFS can manipulate an open file.  As of kernel
-4.18, the following members are defined:
-
-struct file_operations {
-	struct module *owner;
-	loff_t (*llseek) (struct file *, loff_t, int);
-	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
-	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
-	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
-	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
-	int (*iopoll)(struct kiocb *kiocb, bool spin);
-	int (*iterate) (struct file *, struct dir_context *);
-	int (*iterate_shared) (struct file *, struct dir_context *);
-	__poll_t (*poll) (struct file *, struct poll_table_struct *);
-	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
-	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
-	int (*mmap) (struct file *, struct vm_area_struct *);
-	int (*open) (struct inode *, struct file *);
-	int (*flush) (struct file *, fl_owner_t id);
-	int (*release) (struct inode *, struct file *);
-	int (*fsync) (struct file *, loff_t, loff_t, int datasync);
-	int (*fasync) (int, struct file *, int);
-	int (*lock) (struct file *, int, struct file_lock *);
-	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
-	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
-	int (*check_flags)(int);
-	int (*flock) (struct file *, int, struct file_lock *);
-	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
-	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
-	int (*setlease)(struct file *, long, struct file_lock **, void **);
-	long (*fallocate)(struct file *file, int mode, loff_t offset,
-			  loff_t len);
-	void (*show_fdinfo)(struct seq_file *m, struct file *f);
-#ifndef CONFIG_MMU
-	unsigned (*mmap_capabilities)(struct file *);
-#endif
-	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
-	loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
-				   struct file *file_out, loff_t pos_out,
-				   loff_t len, unsigned int remap_flags);
-	int (*fadvise)(struct file *, loff_t, loff_t, int);
-};
-
-Again, all methods are called without any locks being held, unless
-otherwise noted.
-
-  llseek: called when the VFS needs to move the file position index
-
-  read: called by read(2) and related system calls
-
-  read_iter: possibly asynchronous read with iov_iter as destination
-
-  write: called by write(2) and related system calls
-
-  write_iter: possibly asynchronous write with iov_iter as source
-
-  iopoll: called when aio wants to poll for completions on HIPRI iocbs
-
-  iterate: called when the VFS needs to read the directory contents
-
-  iterate_shared: called when the VFS needs to read the directory contents
-	when filesystem supports concurrent dir iterators
-
-  poll: called by the VFS when a process wants to check if there is
-	activity on this file and (optionally) go to sleep until there
-	is activity.  Called by the select(2) and poll(2) system calls
-
-  unlocked_ioctl: called by the ioctl(2) system call.
-
-  compat_ioctl: called by the ioctl(2) system call when 32 bit system calls
-	 are used on 64 bit kernels.
-
-  mmap: called by the mmap(2) system call
-
-  open: called by the VFS when an inode should be opened.  When the VFS
-	opens a file, it creates a new "struct file".  It then calls the
-	open method for the newly allocated file structure.  You might
-	think that the open method really belongs in
-	"struct inode_operations", and you may be right.  I think it's
-	done the way it is because it makes filesystems simpler to
-	implement.  The open() method is a good place to initialize the
-	"private_data" member in the file structure if you want to point
-	to a device structure
-
-  flush: called by the close(2) system call to flush a file
-
-  release: called when the last reference to an open file is closed
-
-  fsync: called by the fsync(2) system call.  Also see the section above
-	 entitled "Handling errors during writeback".
-
-  fasync: called by the fcntl(2) system call when asynchronous
-	(non-blocking) mode is enabled for a file
-
-  lock: called by the fcntl(2) system call for F_GETLK, F_SETLK, and F_SETLKW
-	commands
-
-  get_unmapped_area: called by the mmap(2) system call
-
-  check_flags: called by the fcntl(2) system call for F_SETFL command
-
-  flock: called by the flock(2) system call
-
-  splice_write: called by the VFS to splice data from a pipe to a file.  This
-		method is used by the splice(2) system call
-
-  splice_read: called by the VFS to splice data from file to a pipe.  This
-	       method is used by the splice(2) system call
-
-  setlease: called by the VFS to set or release a file lock lease.  setlease
-	    implementations should call generic_setlease to record or remove
-	    the lease in the inode after setting it.
-
-  fallocate: called by the VFS to preallocate blocks or punch a hole.
-
-  copy_file_range: called by the copy_file_range(2) system call.
-
-  remap_file_range: called by the ioctl(2) system call for FICLONERANGE and
-	FICLONE and FIDEDUPERANGE commands to remap file ranges.  An
-	implementation should remap len bytes at pos_in of the source file into
-	the dest file at pos_out.  Implementations must handle callers passing
-	in len == 0; this means "remap to the end of the source file".  The
-	return value should the number of bytes remapped, or the usual
-	negative error code if errors occurred before any bytes were remapped.
-	The remap_flags parameter accepts REMAP_FILE_* flags.  If
-	REMAP_FILE_DEDUP is set then the implementation must only remap if the
-	requested file ranges have identical contents.  If REMAP_CAN_SHORTEN is
-	set, the caller is ok with the implementation shortening the request
-	length to satisfy alignment or EOF requirements (or any other reason).
-
-  fadvise: possibly called by the fadvise64() system call.
-
-Note that the file operations are implemented by the specific
-filesystem in which the inode resides.  When opening a device node
-(character or block special) most filesystems will call special
-support routines in the VFS which will locate the required device
-driver information.  These support routines replace the filesystem file
-operations with those for the device driver, and then proceed to call
-the new open() method for the file.  This is how opening a device file
-in the filesystem eventually ends up calling the device driver open()
-method.
-
-
-Directory Entry Cache (dcache)
-==============================
-
-
-struct dentry_operations
-------------------------
-
-This describes how a filesystem can overload the standard dentry
-operations.  Dentries and the dcache are the domain of the VFS and the
-individual filesystem implementations.  Device drivers have no business
-here.  These methods may be set to NULL, as they are either optional or
-the VFS uses a default.  As of kernel 2.6.22, the following members are
-defined:
-
-struct dentry_operations {
-	int (*d_revalidate)(struct dentry *, unsigned int);
-	int (*d_weak_revalidate)(struct dentry *, unsigned int);
-	int (*d_hash)(const struct dentry *, struct qstr *);
-	int (*d_compare)(const struct dentry *,
-			unsigned int, const char *, const struct qstr *);
-	int (*d_delete)(const struct dentry *);
-	int (*d_init)(struct dentry *);
-	void (*d_release)(struct dentry *);
-	void (*d_iput)(struct dentry *, struct inode *);
-	char *(*d_dname)(struct dentry *, char *, int);
-	struct vfsmount *(*d_automount)(struct path *);
-	int (*d_manage)(const struct path *, bool);
-	struct dentry *(*d_real)(struct dentry *, const struct inode *);
-};
-
-  d_revalidate: called when the VFS needs to revalidate a dentry.  This
-	is called whenever a name look-up finds a dentry in the
-	dcache.  Most local filesystems leave this as NULL, because all their
-	dentries in the dcache are valid.  Network filesystems are different
-	since things can change on the server without the client necessarily
-	being aware of it.
-
-	This function should return a positive value if the dentry is still
-	valid, and zero or a negative error code if it isn't.
-
-	d_revalidate may be called in rcu-walk mode (flags & LOOKUP_RCU).
-	If in rcu-walk mode, the filesystem must revalidate the dentry without
-	blocking or storing to the dentry, d_parent and d_inode should not be
-	used without care (because they can change and, in d_inode case, even
-	become NULL under us).
-
-	If a situation is encountered that rcu-walk cannot handle, return
-	-ECHILD and it will be called again in ref-walk mode.
-
- d_weak_revalidate: called when the VFS needs to revalidate a "jumped" dentry.
-	This is called when a path-walk ends at dentry that was not acquired by
-	doing a lookup in the parent directory.  This includes "/", "." and "..",
-	as well as procfs-style symlinks and mountpoint traversal.
-
-	In this case, we are less concerned with whether the dentry is still
-	fully correct, but rather that the inode is still valid.  As with
-	d_revalidate, most local filesystems will set this to NULL since their
-	dcache entries are always valid.
-
-	This function has the same return code semantics as d_revalidate.
-
-	d_weak_revalidate is only called after leaving rcu-walk mode.
-
-  d_hash: called when the VFS adds a dentry to the hash table.  The first
-	dentry passed to d_hash is the parent directory that the name is
-	to be hashed into.
-
-	Same locking and synchronisation rules as d_compare regarding
-	what is safe to dereference etc.
-
-  d_compare: called to compare a dentry name with a given name.  The first
-	dentry is the parent of the dentry to be compared, the second is
-	the child dentry.  len and name string are properties of the dentry
-	to be compared.  qstr is the name to compare it with.
-
-	Must be constant and idempotent, and should not take locks if
-	possible, and should not or store into the dentry.
-	Should not dereference pointers outside the dentry without
-	lots of care (eg.  d_parent, d_inode, d_name should not be used).
-
-	However, our vfsmount is pinned, and RCU held, so the dentries and
-	inodes won't disappear, neither will our sb or filesystem module.
-	->d_sb may be used.
-
-	It is a tricky calling convention because it needs to be called under
-	"rcu-walk", ie. without any locks or references on things.
-
-  d_delete: called when the last reference to a dentry is dropped and the
-	dcache is deciding whether or not to cache it.  Return 1 to delete
-	immediately, or 0 to cache the dentry.  Default is NULL which means to
-	always cache a reachable dentry.  d_delete must be constant and
-	idempotent.
-
-  d_init: called when a dentry is allocated
-
-  d_release: called when a dentry is really deallocated
-
-  d_iput: called when a dentry loses its inode (just prior to its
-	being deallocated).  The default when this is NULL is that the
-	VFS calls iput().  If you define this method, you must call
-	iput() yourself
-
-  d_dname: called when the pathname of a dentry should be generated.
-	Useful for some pseudo filesystems (sockfs, pipefs, ...) to delay
-	pathname generation.  (Instead of doing it when dentry is created,
-	it's done only when the path is needed.).  Real filesystems probably
-	dont want to use it, because their dentries are present in global
-	dcache hash, so their hash should be an invariant.  As no lock is
-	held, d_dname() should not try to modify the dentry itself, unless
-	appropriate SMP safety is used.  CAUTION : d_path() logic is quite
-	tricky.  The correct way to return for example "Hello" is to put it
-	at the end of the buffer, and returns a pointer to the first char.
-	dynamic_dname() helper function is provided to take care of this.
-
-	Example :
-
-	static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen)
-	{
-		return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
-				dentry->d_inode->i_ino);
-	}
-
-  d_automount: called when an automount dentry is to be traversed (optional).
-	This should create a new VFS mount record and return the record to the
-	caller.  The caller is supplied with a path parameter giving the
-	automount directory to describe the automount target and the parent
-	VFS mount record to provide inheritable mount parameters.  NULL should
-	be returned if someone else managed to make the automount first.  If
-	the vfsmount creation failed, then an error code should be returned.
-	If -EISDIR is returned, then the directory will be treated as an
-	ordinary directory and returned to pathwalk to continue walking.
-
-	If a vfsmount is returned, the caller will attempt to mount it on the
-	mountpoint and will remove the vfsmount from its expiration list in
-	the case of failure.  The vfsmount should be returned with 2 refs on
-	it to prevent automatic expiration - the caller will clean up the
-	additional ref.
-
-	This function is only used if DCACHE_NEED_AUTOMOUNT is set on the
-	dentry.  This is set by __d_instantiate() if S_AUTOMOUNT is set on the
-	inode being added.
-
-  d_manage: called to allow the filesystem to manage the transition from a
-	dentry (optional).  This allows autofs, for example, to hold up clients
-	waiting to explore behind a 'mountpoint' while letting the daemon go
-	past and construct the subtree there.  0 should be returned to let the
-	calling process continue.  -EISDIR can be returned to tell pathwalk to
-	use this directory as an ordinary directory and to ignore anything
-	mounted on it and not to check the automount flag.  Any other error
-	code will abort pathwalk completely.
-
-	If the 'rcu_walk' parameter is true, then the caller is doing a
-	pathwalk in RCU-walk mode.  Sleeping is not permitted in this mode,
-	and the caller can be asked to leave it and call again by returning
-	-ECHILD.  -EISDIR may also be returned to tell pathwalk to
-	ignore d_automount or any mounts.
-
-	This function is only used if DCACHE_MANAGE_TRANSIT is set on the
-	dentry being transited from.
-
-  d_real: overlay/union type filesystems implement this method to return one of
-	the underlying dentries hidden by the overlay.  It is used in two
-	different modes:
-
-	Called from file_dentry() it returns the real dentry matching the inode
-	argument.  The real dentry may be from a lower layer already copied up,
-	but still referenced from the file.  This mode is selected with a
-	non-NULL inode argument.
-
-	With NULL inode the topmost real underlying dentry is returned.
-
-Each dentry has a pointer to its parent dentry, as well as a hash list
-of child dentries.  Child dentries are basically like files in a
-directory.
-
-
-Directory Entry Cache API
---------------------------
-
-There are a number of functions defined which permit a filesystem to
-manipulate dentries:
-
-  dget: open a new handle for an existing dentry (this just increments
-	the usage count)
-
-  dput: close a handle for a dentry (decrements the usage count).  If
-	the usage count drops to 0, and the dentry is still in its
-	parent's hash, the "d_delete" method is called to check whether
-	it should be cached.  If it should not be cached, or if the dentry
-	is not hashed, it is deleted.  Otherwise cached dentries are put
-	into an LRU list to be reclaimed on memory shortage.
-
-  d_drop: this unhashes a dentry from its parents hash list.  A
-	subsequent call to dput() will deallocate the dentry if its
-	usage count drops to 0
-
-  d_delete: delete a dentry.  If there are no other open references to
-	the dentry then the dentry is turned into a negative dentry
-	(the d_iput() method is called).  If there are other
-	references, then d_drop() is called instead
-
-  d_add: add a dentry to its parents hash list and then calls
-	d_instantiate()
-
-  d_instantiate: add a dentry to the alias hash list for the inode and
-	updates the "d_inode" member.  The "i_count" member in the
-	inode structure should be set/incremented.  If the inode
-	pointer is NULL, the dentry is called a "negative
-	dentry".  This function is commonly called when an inode is
-	created for an existing negative dentry
-
-  d_lookup: look up a dentry given its parent and path name component
-	It looks up the child of that given name from the dcache
-	hash table.  If it is found, the reference count is incremented
-	and the dentry is returned.  The caller must use dput()
-	to free the dentry when it finishes using it.
-
-
-Mount Options
-=============
-
-
-Parsing options
----------------
-
-On mount and remount the filesystem is passed a string containing a
-comma separated list of mount options.  The options can have either of
-these forms:
-
-  option
-  option=value
-
-The <linux/parser.h> header defines an API that helps parse these
-options.  There are plenty of examples on how to use it in existing
-filesystems.
-
-
-Showing options
----------------
-
-If a filesystem accepts mount options, it must define show_options() to
-show all the currently active options.  The rules are:
-
-  - options MUST be shown which are not default or their values differ
-    from the default
-
-  - options MAY be shown which are enabled by default or have their
-    default value
-
-Options used only internally between a mount helper and the kernel (such
-as file descriptors), or which only have an effect during the mounting
-(such as ones controlling the creation of a journal) are exempt from the
-above rules.
-
-The underlying reason for the above rules is to make sure, that a mount
-can be accurately replicated (e.g. umounting and mounting again) based
-on the information found in /proc/mounts.
-
-
-Resources
-=========
-
-(Note some of these resources are not up-to-date with the latest kernel
- version.)
-
-Creating Linux virtual filesystems. 2002
-    <http://lwn.net/Articles/13325/>
-
-The Linux Virtual File-system Layer by Neil Brown. 1999
-    <http://www.cse.unsw.edu.au/~neilb/oss/linux-commentary/vfs.html>
-
-A tour of the Linux VFS by Michael K. Johnson. 1996
-    <http://www.tldp.org/LDP/khg/HyperNews/get/fs/vfstour.html>
-
-A small trail through the Linux kernel by Andries Brouwer. 2001
-    <http://www.win.tue.nl/~aeb/linux/vfs/trail.html>
-- 
cgit 


From 0cb93b1503c19f19966402c3ed841626ac9db266 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Mon, 13 May 2019 15:50:10 +0530
Subject: dt-bindings: power: Add rpm power domain bindings for qcs404

Add RPM power domain bindings for the qcs404 family of SoC

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Jeffrey Hugo <jhugo@codeaurora.org>
Reviewed-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
[sibis: Add supported rpmpd states for qcs404]
Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>

Signed-off-by: Andy Gross <agross@kernel.org>
---
 Documentation/devicetree/bindings/power/qcom,rpmpd.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/power/qcom,rpmpd.txt b/Documentation/devicetree/bindings/power/qcom,rpmpd.txt
index 980e5413d18f..172ccf940c5c 100644
--- a/Documentation/devicetree/bindings/power/qcom,rpmpd.txt
+++ b/Documentation/devicetree/bindings/power/qcom,rpmpd.txt
@@ -6,6 +6,7 @@ which then translates it into a corresponding voltage on a rail
 Required Properties:
  - compatible: Should be one of the following
 	* qcom,msm8996-rpmpd: RPM Power domain for the msm8996 family of SoC
+	* qcom,qcs404-rpmpd: RPM Power domain for the qcs404 family of SoC
 	* qcom,sdm845-rpmhpd: RPMh Power domain for the sdm845 family of SoC
  - #power-domain-cells: number of cells in Power domain specifier
 	must be 1.
-- 
cgit 


From dec9a05a147e4988e1f743c0d0a1389a0552e322 Mon Sep 17 00:00:00 2001
From: Sibi Sankar <sibis@codeaurora.org>
Date: Mon, 13 May 2019 15:50:13 +0530
Subject: dt-bindings: power: Add rpm power domain bindings for msm8998

Add RPM power domain bindings for the msm8998 family of SoC

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Jeffrey Hugo <jhugo@codeaurora.org>
Reviewed-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Andy Gross <agross@kernel.org>
---
 Documentation/devicetree/bindings/power/qcom,rpmpd.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/power/qcom,rpmpd.txt b/Documentation/devicetree/bindings/power/qcom,rpmpd.txt
index 172ccf940c5c..eb35b22f9e23 100644
--- a/Documentation/devicetree/bindings/power/qcom,rpmpd.txt
+++ b/Documentation/devicetree/bindings/power/qcom,rpmpd.txt
@@ -6,6 +6,7 @@ which then translates it into a corresponding voltage on a rail
 Required Properties:
  - compatible: Should be one of the following
 	* qcom,msm8996-rpmpd: RPM Power domain for the msm8996 family of SoC
+	* qcom,msm8998-rpmpd: RPM Power domain for the msm8998 family of SoC
 	* qcom,qcs404-rpmpd: RPM Power domain for the qcs404 family of SoC
 	* qcom,sdm845-rpmhpd: RPMh Power domain for the sdm845 family of SoC
  - #power-domain-cells: number of cells in Power domain specifier
-- 
cgit 


From c920f745f45d4610e08943690301b8055fb5e834 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 28 May 2019 20:38:10 +0300
Subject: net: phy: Add phy_standalone sysfs entry

Export a phy_standalone device attribute that is meant to give the
indication that this PHY lacks an attached_dev and its corresponding
sysfs link. The attribute will be created only when the
phy_attach_direct() function will be called with a NULL net_device.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-class-net-phydev | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-class-net-phydev b/Documentation/ABI/testing/sysfs-class-net-phydev
index 6ebabfb27912..b7c46c2f3fd0 100644
--- a/Documentation/ABI/testing/sysfs-class-net-phydev
+++ b/Documentation/ABI/testing/sysfs-class-net-phydev
@@ -34,3 +34,11 @@ Description:
 		xgmii, moca, qsgmii, trgmii, 1000base-x, 2500base-x, rxaui,
 		xaui, 10gbase-kr, unknown
 
+What:		/sys/class/mdio_bus/<bus>/<device>/phy_standalone
+Date:		May 2019
+KernelVersion:	5.3
+Contact:	netdev@vger.kernel.org
+Description:
+		Boolean value indicating whether the PHY device is used in
+		standalone mode, without a net_device associated, by PHYLINK.
+		Attribute created only when this is the case.
-- 
cgit 


From 44cc27e43fa3b8977373915a8e7f515a9d263343 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 28 May 2019 20:38:12 +0300
Subject: net: phylink: Add struct phylink_config to PHYLINK API

The phylink_config structure will encapsulate a pointer to a struct
device and the operation type requested for this instance of PHYLINK.
This patch does not make any functional changes, it just transitions the
PHYLINK internals and all its users to the new API.

A pointer to a phylink_config structure will be passed to
phylink_create() instead of the net_device directly. Also, the same
phylink_config pointer will be passed back to all phylink_mac_ops
callbacks instead of the net_device. Using this mechanism, a PHYLINK
user can get the original net_device using a structure such as
'to_net_dev(config->dev)' or directly the structure containing the
phylink_config using a container_of call.

At the moment, only the PHYLINK_NETDEV is defined as a valid operation
type for PHYLINK. In this mode, a valid reference to a struct device
linked to the original net_device should be passed to PHYLINK through
the phylink_config structure.

This API changes is mainly driven by the necessity of adding a new
operation type in PHYLINK that disconnects the phy_device from the
net_device and also works when the net_device is lacking.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Tested-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/sfp-phylink.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/sfp-phylink.rst b/Documentation/networking/sfp-phylink.rst
index 5bd26cb07244..91446b431b70 100644
--- a/Documentation/networking/sfp-phylink.rst
+++ b/Documentation/networking/sfp-phylink.rst
@@ -98,6 +98,7 @@ this documentation.
 4. Add::
 
 	struct phylink *phylink;
+	struct phylink_config phylink_config;
 
    to the driver's private data structure.  We shall refer to the
    driver's private data pointer as ``priv`` below, and the driver's
@@ -223,8 +224,10 @@ this documentation.
    .. code-block:: c
 
 	struct phylink *phylink;
+	priv->phylink_config.dev = &dev.dev;
+	priv->phylink_config.type = PHYLINK_NETDEV;
 
-	phylink = phylink_create(dev, node, phy_mode, &phylink_ops);
+	phylink = phylink_create(&priv->phylink_config, node, phy_mode, &phylink_ops);
 	if (IS_ERR(phylink)) {
 		err = PTR_ERR(phylink);
 		fail probe;
-- 
cgit 


From 84ede58dfcd1db6f04f71dd3ccd5328271b346da Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 20 May 2019 09:54:46 -0700
Subject: crypto: hash - remove CRYPTO_ALG_TYPE_DIGEST

Remove the unnecessary constant CRYPTO_ALG_TYPE_DIGEST, which has the
same value as CRYPTO_ALG_TYPE_HASH.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/crypto/architecture.rst | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/crypto/architecture.rst b/Documentation/crypto/architecture.rst
index ee8ff0762d7f..3eae1ae7f798 100644
--- a/Documentation/crypto/architecture.rst
+++ b/Documentation/crypto/architecture.rst
@@ -208,9 +208,7 @@ the aforementioned cipher types:
 -  CRYPTO_ALG_TYPE_KPP Key-agreement Protocol Primitive (KPP) such as
    an ECDH or DH implementation
 
--  CRYPTO_ALG_TYPE_DIGEST Raw message digest
-
--  CRYPTO_ALG_TYPE_HASH Alias for CRYPTO_ALG_TYPE_DIGEST
+-  CRYPTO_ALG_TYPE_HASH Raw message digest
 
 -  CRYPTO_ALG_TYPE_SHASH Synchronous multi-block hash
 
-- 
cgit 


From 2af8c8927ebb474eb2af4ab3b8d7e92c3844b43f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 24 May 2019 18:26:50 +0200
Subject: dt-bindings: add Atmel SHA204A I2C crypto processor

Add a compatible string for the Atmel SHA204A I2C crypto processor.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/devicetree/bindings/trivial-devices.yaml | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 747fd3f689dc..a572c3468226 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -52,6 +52,8 @@ properties:
           - at,24c08
             # i2c trusted platform module (TPM)
           - atmel,at97sc3204t
+            # i2c h/w symmetric crypto module
+          - atmel,atsha204a
             # CM32181: Ambient Light Sensor
           - capella,cm32181
             # CM3232: Ambient Light Sensor
-- 
cgit 


From 0adb0c99594b73844cf9a5714faa6553ea04ba04 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 24 May 2019 18:26:51 +0200
Subject: dt-bindings: move Atmel ECC508A I2C crypto processor to
 trivial-devices

Move the binding for the discrete Atmel I2C Elliptic Curve h/w crypto
module to trivial-devices.yaml, as it doesn't belong in atmel-crypto
which describes unrelated on-SoC peripherals.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/devicetree/bindings/crypto/atmel-crypto.txt | 13 -------------
 Documentation/devicetree/bindings/trivial-devices.yaml    |  2 ++
 2 files changed, 2 insertions(+), 13 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/crypto/atmel-crypto.txt b/Documentation/devicetree/bindings/crypto/atmel-crypto.txt
index 6b458bb2440d..f2aab3dc2b52 100644
--- a/Documentation/devicetree/bindings/crypto/atmel-crypto.txt
+++ b/Documentation/devicetree/bindings/crypto/atmel-crypto.txt
@@ -66,16 +66,3 @@ sha@f8034000 {
 	dmas = <&dma1 2 17>;
 	dma-names = "tx";
 };
-
-* Eliptic Curve Cryptography (I2C)
-
-Required properties:
-- compatible : must be "atmel,atecc508a".
-- reg: I2C bus address of the device.
-- clock-frequency: must be present in the i2c controller node.
-
-Example:
-atecc508a@c0 {
-	compatible = "atmel,atecc508a";
-	reg = <0xC0>;
-};
diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index a572c3468226..2e742d399e87 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -54,6 +54,8 @@ properties:
           - atmel,at97sc3204t
             # i2c h/w symmetric crypto module
           - atmel,atsha204a
+            # i2c h/w elliptic curve crypto module
+          - atmel,atecc508a
             # CM32181: Ambient Light Sensor
           - capella,cm32181
             # CM3232: Ambient Light Sensor
-- 
cgit 


From ae683c816d9f4aadec3eaf1da69a04c07ca9fd3f Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Sat, 25 May 2019 15:41:39 +0200
Subject: ARM: dts: sun7i: icnova-swac: Fix the model vendor

Even though the SWAC is just a baseboard to the icnova SoM, the vendor of
the baseboard somehow ended up with the board name instead of the vendor
name. Fix that.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
---
 Documentation/devicetree/bindings/arm/sunxi.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml
index 285f4fc8519d..000a00d12d6a 100644
--- a/Documentation/devicetree/bindings/arm/sunxi.yaml
+++ b/Documentation/devicetree/bindings/arm/sunxi.yaml
@@ -263,7 +263,7 @@ properties:
 
       - description: ICNova A20 SWAC
         items:
-          - const: swac,icnova-a20-swac
+          - const: incircuit,icnova-a20-swac
           - const: incircuit,icnova-a20
           - const: allwinner,sun7i-a20
 
-- 
cgit 


From c42eaffa16568a538f12dfebd99624659992913a Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 14 May 2019 22:47:23 +0800
Subject: Documentation: add Linux PCI to Sphinx TOC tree

Add index.rst for PCI subsystem.  More docs will be added later.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 Documentation/PCI/index.rst | 9 +++++++++
 Documentation/index.rst     | 1 +
 2 files changed, 10 insertions(+)
 create mode 100644 Documentation/PCI/index.rst

(limited to 'Documentation')

diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst
new file mode 100644
index 000000000000..c2f8728d11cf
--- /dev/null
+++ b/Documentation/PCI/index.rst
@@ -0,0 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+Linux PCI Bus Subsystem
+=======================
+
+.. toctree::
+   :maxdepth: 2
+   :numbered:
diff --git a/Documentation/index.rst b/Documentation/index.rst
index a7566ef62411..4afa431d9b1f 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -101,6 +101,7 @@ needed).
    filesystems/index
    vm/index
    bpf/index
+   PCI/index
    misc-devices/index
 
 Architecture-specific documentation
-- 
cgit 


From 4d11d13e27762413399ab6f8dc49d30911cac17a Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 2 May 2019 20:08:40 -0700
Subject: f2fs: add missing sysfs entries in documentation

This patch cleans up documentation to cover missing sysfs entries.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.txt | 89 ++++++++++++++++++++++++++++++++++----
 1 file changed, 80 insertions(+), 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index f7b5e4ff0de3..66aca042988e 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -246,11 +246,14 @@ Files in /sys/fs/f2fs/<devname>
 ..............................................................................
  File                         Content
 
- gc_max_sleep_time            This tuning parameter controls the maximum sleep
+ gc_urgent_sleep_time         This parameter controls sleep time for gc_urgent.
+                              500 ms is set by default. See above gc_urgent.
+
+ gc_min_sleep_time            This tuning parameter controls the minimum sleep
                               time for the garbage collection thread. Time is
                               in milliseconds.
 
- gc_min_sleep_time            This tuning parameter controls the minimum sleep
+ gc_max_sleep_time            This tuning parameter controls the maximum sleep
                               time for the garbage collection thread. Time is
                               in milliseconds.
 
@@ -270,9 +273,6 @@ Files in /sys/fs/f2fs/<devname>
                               to 1, background thread starts to do GC by given
                               gc_urgent_sleep_time interval.
 
- gc_urgent_sleep_time         This parameter controls sleep time for gc_urgent.
-                              500 ms is set by default. See above gc_urgent.
-
  reclaim_segments             This parameter controls the number of prefree
                               segments to be reclaimed. If the number of prefree
 			      segments is larger than the number of segments
@@ -287,7 +287,16 @@ Files in /sys/fs/f2fs/<devname>
 			      checkpoint is triggered, and issued during the
 			      checkpoint. By default, it is disabled with 0.
 
- trim_sections                This parameter controls the number of sections
+ discard_granularity	      This parameter controls the granularity of discard
+			      command size. It will issue discard commands iif
+			      the size is larger than given granularity. Its
+			      unit size is 4KB, and 4 (=16KB) is set by default.
+			      The maximum value is 128 (=512KB).
+
+ reserved_blocks	      This parameter indicates the number of blocks that
+			      f2fs reserves internally for root.
+
+ batched_trim_sections	      This parameter controls the number of sections
                               to be trimmed out in batch mode when FITRIM
                               conducts. 32 sections is set by default.
 
@@ -309,11 +318,35 @@ Files in /sys/fs/f2fs/<devname>
 			      the number is less than this value, it triggers
 			      in-place-updates.
 
+ min_seq_blocks		      This parameter controls the threshold to serialize
+			      write IOs issued by multiple threads in parallel.
+
+ min_hot_blocks		      This parameter controls the threshold to allocate
+			      a hot data log for pending data blocks to write.
+
+ min_ssr_sections	      This parameter adds the threshold when deciding
+			      SSR block allocation. If this is large, SSR mode
+			      will be enabled early.
+
+ ram_thresh                   This parameter controls the memory footprint used
+			      by free nids and cached nat entries. By default,
+			      10 is set, which indicates 10 MB / 1 GB RAM.
+
+ ra_nid_pages		      When building free nids, F2FS reads NAT blocks
+			      ahead for speed up. Default is 0.
+
+ dirty_nats_ratio	      Given dirty ratio of cached nat entries, F2FS
+			      determines flushing them in background.
+
  max_victim_search	      This parameter controls the number of trials to
 			      find a victim segment when conducting SSR and
 			      cleaning operations. The default value is 4096
 			      which covers 8GB block address range.
 
+ migration_granularity	      For large-sized sections, F2FS can stop GC given
+			      this granularity instead of reclaiming entire
+			      section.
+
  dir_level                    This parameter controls the directory level to
 			      support large directory. If a directory has a
 			      number of files, it can reduce the file lookup
@@ -321,9 +354,47 @@ Files in /sys/fs/f2fs/<devname>
 			      Otherwise, it needs to decrease this value to
 			      reduce the space overhead. The default value is 0.
 
- ram_thresh                   This parameter controls the memory footprint used
-			      by free nids and cached nat entries. By default,
-			      10 is set, which indicates 10 MB / 1 GB RAM.
+ cp_interval		      F2FS tries to do checkpoint periodically, 60 secs
+			      by default.
+
+ idle_interval		      F2FS detects system is idle, if there's no F2FS
+			      operations during given interval, 5 secs by
+			      default.
+
+ discard_idle_interval	      F2FS detects the discard thread is idle, given
+			      time interval. Default is 5 secs.
+
+ gc_idle_interval	      F2FS detects the GC thread is idle, given time
+			      interval. Default is 5 secs.
+
+ umount_discard_timeout       When unmounting the disk, F2FS waits for finishing
+			      queued discard commands which can take huge time.
+			      This gives time out for it, 5 secs by default.
+
+ iostat_enable		      This controls to enable/disable iostat in F2FS.
+
+ readdir_ra		      This enables/disabled readahead of inode blocks
+			      in readdir, and default is enabled.
+
+ gc_pin_file_thresh	      This indicates how many GC can be failed for the
+			      pinned file. If it exceeds this, F2FS doesn't
+			      guarantee its pinning state. 2048 trials is set
+			      by default.
+
+ extension_list		      This enables to change extension_list for hot/cold
+			      files in runtime.
+
+ inject_rate		      This controls injection rate of arbitrary faults.
+
+ inject_type		      This controls injection type of arbitrary faults.
+
+ dirty_segments 	      This shows # of dirty segments.
+
+ lifetime_write_kbytes	      This shows # of data written to the disk.
+
+ features		      This shows current features enabled on F2FS.
+
+ current_reserved_blocks      This shows # of blocks currently reserved.
 
 ================================================================================
 USAGE
-- 
cgit 


From 9b88ad5464af1bf7228991f1c46a9a13484790a4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 29 May 2019 20:09:26 -0300
Subject: scripts/sphinx-pre-install: always check if version is compatible
 with build

Call the script every time a make docs target is selected, on
a simplified check mode.

With this change, the script will set two vars:

$min_version - obtained from `needs_sphinx` var inside
	       conf.py (currently, '1.3')

$rec_version - obtained from sphinx/requirements.txt.

With those changes, a target like "make htmldocs" will do:

1) If no sphinx-build/sphinx-build3 is found, it will run
   the script on normal mode as before, checking for all
   system dependencies and providing install hints for the
   needed programs and will abort the build;

2) If no sphinx-build/sphinx-build3 is found, but there is
   a sphinx_${VER}/bin/activate file, and if
   ${VER} >= $min_version (string comparation), it will
   run in full mode, and will recommend to activate the
   virtualenv. If there are multiple virtualenvs, it
   will string sort the versions, recommending the
   highest version and will abort the build;

3) If Sphinx is detected but has a version lower than
   $min_version, it will run in full mode - with will
   recommend creating a virtual env using sphinx/requirements.txt,
   and will abort the build.

4) If Sphinx is detected and version is lower than
   $rec_version, it will run in full mode and will
   recommend creating a virtual env using sphinx/requirements.txt.

   In this case, it **won't** abort the build.

5) If Sphinx is detected and version is equal or righer than
   $rec_version it will return just after detecting the
   version ("quick mode"), not checking if are there any
   missing dependencies.

Just like before, if one wants to install Sphinx from the
distro, it has to call the script manually and use `--no-virtualenv`
argument to get the hints for his OS:

    You should run:

	sudo dnf install -y python3-sphinx python3-sphinx_rtd_theme

While here, add a small help for the three optional arguments
for the script.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/Makefile | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/Makefile b/Documentation/Makefile
index e889e7cb8511..380e24053d6f 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -70,12 +70,14 @@ quiet_cmd_sphinx = SPHINX  $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
 	$(abspath $(BUILDDIR)/$3/$4)
 
 htmldocs:
+	@./scripts/sphinx-pre-install --version-check
 	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
 
 linkcheckdocs:
 	@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,linkcheck,$(var),,$(var)))
 
 latexdocs:
+	@./scripts/sphinx-pre-install --version-check
 	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,latex,$(var),latex,$(var)))
 
 ifeq ($(HAVE_PDFLATEX),0)
@@ -87,14 +89,17 @@ pdfdocs:
 else # HAVE_PDFLATEX
 
 pdfdocs: latexdocs
+	@./scripts/sphinx-pre-install --version-check
 	$(foreach var,$(SPHINXDIRS), $(MAKE) PDFLATEX="$(PDFLATEX)" LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex || exit;)
 
 endif # HAVE_PDFLATEX
 
 epubdocs:
+	@./scripts/sphinx-pre-install --version-check
 	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,epub,$(var),epub,$(var)))
 
 xmldocs:
+	@./scripts/sphinx-pre-install --version-check
 	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,xml,$(var),xml,$(var)))
 
 endif # HAVE_SPHINX
-- 
cgit 


From cf08508d21ffae5aea6c7dcb771ebd28612c6120 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 29 May 2019 20:09:31 -0300
Subject: docs: by default, build docs a lot faster with Sphinx >= 1.7

Since Sphinx version 1.7, it is possible to use "-jauto" in
order to speedup documentation builds. On older versions,
while -j was already supported, one would need to set the
number of threads manually.

So, if SPHINXOPTS is not provided, add -jauto, in order to
speed up the build. That makes it *a lot* times faster than
without -j.

If one really wants to slow things down, it can just use:

	make SPHINXOPTS=-j1 htmldocs

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
[ jc: fixed perl magic to determine sphinx version ]
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/Makefile | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/Makefile b/Documentation/Makefile
index 380e24053d6f..85d3cfafd77c 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -28,6 +28,8 @@ ifeq ($(HAVE_SPHINX),0)
 
 else # HAVE_SPHINX
 
+export SPHINXOPTS = $(shell perl -e 'open IN,"sphinx-build --version 2>&1 |"; while (<IN>) { if (m/([\d\.]+)/) { print "-jauto" if ($$1 >= "1.7") } ;} close IN')
+
 # User-friendly check for pdflatex and latexmk
 HAVE_PDFLATEX := $(shell if which $(PDFLATEX) >/dev/null 2>&1; then echo 1; else echo 0; fi)
 HAVE_LATEXMK := $(shell if which latexmk >/dev/null 2>&1; then echo 1; else echo 0; fi)
-- 
cgit 


From a700767a7682d9bd237e927253274859aee075e7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 29 May 2019 20:09:32 -0300
Subject: docs: requirements.txt: recommend Sphinx 1.7.9

As discussed at the linux-doc ML, while we'll still support
version 1.3, it is time to recommend a more modern version.

So, let's switch the minimal requirements to Sphinx 1.7.9,
as it has the "-jauto" flag, with makes a lot faster when
building documentation.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/doc-guide/sphinx.rst    | 17 ++++++++---------
 Documentation/sphinx/requirements.txt |  4 ++--
 2 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst
index c039224b404e..4ba081f43e98 100644
--- a/Documentation/doc-guide/sphinx.rst
+++ b/Documentation/doc-guide/sphinx.rst
@@ -27,8 +27,7 @@ Sphinx Install
 ==============
 
 The ReST markups currently used by the Documentation/ files are meant to be
-built with ``Sphinx`` version 1.3 or higher. If you desire to build
-PDF output, it is recommended to use version 1.4.6 or higher.
+built with ``Sphinx`` version 1.3 or higher.
 
 There's a script that checks for the Sphinx requirements. Please see
 :ref:`sphinx-pre-install` for further details.
@@ -56,13 +55,13 @@ or ``virtualenv``, depending on how your distribution packaged Python 3.
       those expressions are written using LaTeX notation. It needs texlive
       installed with amdfonts and amsmath in order to evaluate them.
 
-In summary, if you want to install Sphinx version 1.4.9, you should do::
+In summary, if you want to install Sphinx version 1.7.9, you should do::
 
-       $ virtualenv sphinx_1.4
-       $ . sphinx_1.4/bin/activate
-       (sphinx_1.4) $ pip install -r Documentation/sphinx/requirements.txt
+       $ virtualenv sphinx_1.7.9
+       $ . sphinx_1.7.9/bin/activate
+       (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt
 
-After running ``. sphinx_1.4/bin/activate``, the prompt will change,
+After running ``. sphinx_1.7.9/bin/activate``, the prompt will change,
 in order to indicate that you're using the new environment. If you
 open a new shell, you need to rerun this command to enter again at
 the virtual environment before building the documentation.
@@ -105,8 +104,8 @@ command line options for your distro::
 	You should run:
 
 		sudo dnf install -y texlive-luatex85
-		/usr/bin/virtualenv sphinx_1.4
-		. sphinx_1.4/bin/activate
+		/usr/bin/virtualenv sphinx_1.7.9
+		. sphinx_1.7.9/bin/activate
 		pip install -r Documentation/sphinx/requirements.txt
 
 	Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468.
diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt
index 742be3e12619..14e29a0ae480 100644
--- a/Documentation/sphinx/requirements.txt
+++ b/Documentation/sphinx/requirements.txt
@@ -1,3 +1,3 @@
-docutils==0.12
-Sphinx==1.4.9
+docutils
+Sphinx==1.7.9
 sphinx_rtd_theme
-- 
cgit 


From 2dc7e48dee68219dd8921824473306036ea45152 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Wed, 29 May 2019 12:34:00 -0400
Subject: Documentation: ip-sysctl.txt: Document tcp_fastopen_key

Add docs for /proc/sys/net/ipv4/tcp_fastopen_key

Signed-off-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Cc: Jeremy Sowden <jeremy@azazel.net>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 14fe93049d28..a73b3a02e49a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -648,6 +648,26 @@ tcp_fastopen_blackhole_timeout_sec - INTEGER
 	0 to disable the blackhole detection.
 	By default, it is set to 1hr.
 
+tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs
+	The list consists of a primary key and an optional backup key. The
+	primary key is used for both creating and validating cookies, while the
+	optional backup key is only used for validating cookies. The purpose of
+	the backup key is to maximize TFO validation when keys are rotated.
+
+	A randomly chosen primary key may be configured by the kernel if
+	the tcp_fastopen sysctl is set to 0x400 (see above), or if the
+	TCP_FASTOPEN setsockopt() optname is set and a key has not been
+	previously configured via sysctl. If keys are configured via
+	setsockopt() by using the TCP_FASTOPEN_KEY optname, then those
+	per-socket keys will be used instead of any keys that are specified via
+	sysctl.
+
+	A key is specified as 4 8-digit hexadecimal integers which are separated
+	by a '-' as: xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxxxx. Leading zeros may be
+	omitted. A primary and a backup key may be specified by separating them
+	by a comma. If only one key is specified, it becomes the primary key and
+	any previously configured backup keys are removed.
+
 tcp_syn_retries - INTEGER
 	Number of times initial SYNs for an active TCP connection attempt
 	will be retransmitted. Should not be higher than 127. Default value
-- 
cgit 


From ed0ac5c7ec3763e3261c48e3c5d4b7528b60fd85 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 20 May 2019 21:51:50 +0100
Subject: keys: Add a keyctl to move a key between keyrings

Add a keyctl to atomically move a link to a key from one keyring to
another.  The key must exist in "from" keyring and a flag can be given to
cause the operation to fail if there's a matching key already in the "to"
keyring.

This can be done with:

	keyctl(KEYCTL_MOVE,
	       key_serial_t key,
	       key_serial_t from_keyring,
	       key_serial_t to_keyring,
	       unsigned int flags);

The key being moved must grant Link permission and both keyrings must grant
Write permission.

flags should be 0 or KEYCTL_MOVE_EXCL, with the latter preventing
displacement of a matching key from the "to" keyring.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/security/keys/core.rst | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index 9521c4207f01..823d29bf44f7 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -577,6 +577,27 @@ The keyctl syscall functions are:
      added.
 
 
+  *  Move a key from one keyring to another::
+
+	long keyctl(KEYCTL_MOVE,
+		    key_serial_t id,
+		    key_serial_t from_ring_id,
+		    key_serial_t to_ring_id,
+		    unsigned int flags);
+
+     Move the key specified by "id" from the keyring specified by
+     "from_ring_id" to the keyring specified by "to_ring_id".  If the two
+     keyrings are the same, nothing is done.
+
+     "flags" can have KEYCTL_MOVE_EXCL set in it to cause the operation to fail
+     with EEXIST if a matching key exists in the destination keyring, otherwise
+     such a key will be replaced.
+
+     A process must have link permission on the key for this function to be
+     successful and write permission on both keyrings.  Any errors that can
+     occur from KEYCTL_LINK also apply on the destination keyring here.
+
+
   *  Unlink a key or keyring from another keyring::
 
 	long keyctl(KEYCTL_UNLINK, key_serial_t keyring, key_serial_t key);
-- 
cgit 


From 229b4e0728e0a6ddca2645e73696d5b104fbbbfb Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 14 May 2019 22:47:24 +0800
Subject: Documentation: PCI: convert pci.txt to reST

Convert plain text documentation to reStructuredText format and add it to
Sphinx TOC tree.  No essential content change.

Move the description of struct pci_driver and struct pci_device_id into
in-source comments.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
[bhelgaas: fix kernel-doc warnings related to moving descriptions to
linux/pci.h, fix "space tab" whitespace errors in mod_devicetable.h]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/PCI/index.rst |   2 +
 Documentation/PCI/pci.rst   | 578 ++++++++++++++++++++++++++++++++++++++++
 Documentation/PCI/pci.txt   | 636 --------------------------------------------
 3 files changed, 580 insertions(+), 636 deletions(-)
 create mode 100644 Documentation/PCI/pci.rst
 delete mode 100644 Documentation/PCI/pci.txt

(limited to 'Documentation')

diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst
index c2f8728d11cf..7babf43709b0 100644
--- a/Documentation/PCI/index.rst
+++ b/Documentation/PCI/index.rst
@@ -7,3 +7,5 @@ Linux PCI Bus Subsystem
 .. toctree::
    :maxdepth: 2
    :numbered:
+
+   pci
diff --git a/Documentation/PCI/pci.rst b/Documentation/PCI/pci.rst
new file mode 100644
index 000000000000..6864f9a70f5f
--- /dev/null
+++ b/Documentation/PCI/pci.rst
@@ -0,0 +1,578 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+How To Write Linux PCI Drivers
+==============================
+
+:Authors: - Martin Mares <mj@ucw.cz>
+          - Grant Grundler <grundler@parisc-linux.org>
+
+The world of PCI is vast and full of (mostly unpleasant) surprises.
+Since each CPU architecture implements different chip-sets and PCI devices
+have different requirements (erm, "features"), the result is the PCI support
+in the Linux kernel is not as trivial as one would wish. This short paper
+tries to introduce all potential driver authors to Linux APIs for
+PCI device drivers.
+
+A more complete resource is the third edition of "Linux Device Drivers"
+by Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman.
+LDD3 is available for free (under Creative Commons License) from:
+http://lwn.net/Kernel/LDD3/.
+
+However, keep in mind that all documents are subject to "bit rot".
+Refer to the source code if things are not working as described here.
+
+Please send questions/comments/patches about Linux PCI API to the
+"Linux PCI" <linux-pci@atrey.karlin.mff.cuni.cz> mailing list.
+
+
+Structure of PCI drivers
+========================
+PCI drivers "discover" PCI devices in a system via pci_register_driver().
+Actually, it's the other way around. When the PCI generic code discovers
+a new device, the driver with a matching "description" will be notified.
+Details on this below.
+
+pci_register_driver() leaves most of the probing for devices to
+the PCI layer and supports online insertion/removal of devices [thus
+supporting hot-pluggable PCI, CardBus, and Express-Card in a single driver].
+pci_register_driver() call requires passing in a table of function
+pointers and thus dictates the high level structure of a driver.
+
+Once the driver knows about a PCI device and takes ownership, the
+driver generally needs to perform the following initialization:
+
+  - Enable the device
+  - Request MMIO/IOP resources
+  - Set the DMA mask size (for both coherent and streaming DMA)
+  - Allocate and initialize shared control data (pci_allocate_coherent())
+  - Access device configuration space (if needed)
+  - Register IRQ handler (request_irq())
+  - Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
+  - Enable DMA/processing engines
+
+When done using the device, and perhaps the module needs to be unloaded,
+the driver needs to take the follow steps:
+
+  - Disable the device from generating IRQs
+  - Release the IRQ (free_irq())
+  - Stop all DMA activity
+  - Release DMA buffers (both streaming and coherent)
+  - Unregister from other subsystems (e.g. scsi or netdev)
+  - Release MMIO/IOP resources
+  - Disable the device
+
+Most of these topics are covered in the following sections.
+For the rest look at LDD3 or <linux/pci.h> .
+
+If the PCI subsystem is not configured (CONFIG_PCI is not set), most of
+the PCI functions described below are defined as inline functions either
+completely empty or just returning an appropriate error codes to avoid
+lots of ifdefs in the drivers.
+
+
+pci_register_driver() call
+==========================
+
+PCI device drivers call ``pci_register_driver()`` during their
+initialization with a pointer to a structure describing the driver
+(``struct pci_driver``):
+
+.. kernel-doc:: include/linux/pci.h
+   :functions: pci_driver
+
+The ID table is an array of ``struct pci_device_id`` entries ending with an
+all-zero entry.  Definitions with static const are generally preferred.
+
+.. kernel-doc:: include/linux/mod_devicetable.h
+   :functions: pci_device_id
+
+Most drivers only need ``PCI_DEVICE()`` or ``PCI_DEVICE_CLASS()`` to set up
+a pci_device_id table.
+
+New PCI IDs may be added to a device driver pci_ids table at runtime
+as shown below::
+
+  echo "vendor device subvendor subdevice class class_mask driver_data" > \
+  /sys/bus/pci/drivers/{driver}/new_id
+
+All fields are passed in as hexadecimal values (no leading 0x).
+The vendor and device fields are mandatory, the others are optional. Users
+need pass only as many optional fields as necessary:
+
+  - subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF)
+  - class and classmask fields default to 0
+  - driver_data defaults to 0UL.
+
+Note that driver_data must match the value used by any of the pci_device_id
+entries defined in the driver. This makes the driver_data field mandatory
+if all the pci_device_id entries have a non-zero driver_data value.
+
+Once added, the driver probe routine will be invoked for any unclaimed
+PCI devices listed in its (newly updated) pci_ids list.
+
+When the driver exits, it just calls pci_unregister_driver() and the PCI layer
+automatically calls the remove hook for all devices handled by the driver.
+
+
+"Attributes" for driver functions/data
+--------------------------------------
+
+Please mark the initialization and cleanup functions where appropriate
+(the corresponding macros are defined in <linux/init.h>):
+
+	======		=================================================
+	__init		Initialization code. Thrown away after the driver
+			initializes.
+	__exit		Exit code. Ignored for non-modular drivers.
+	======		=================================================
+
+Tips on when/where to use the above attributes:
+	- The module_init()/module_exit() functions (and all
+	  initialization functions called _only_ from these)
+	  should be marked __init/__exit.
+
+	- Do not mark the struct pci_driver.
+
+	- Do NOT mark a function if you are not sure which mark to use.
+	  Better to not mark the function than mark the function wrong.
+
+
+How to find PCI devices manually
+================================
+
+PCI drivers should have a really good reason for not using the
+pci_register_driver() interface to search for PCI devices.
+The main reason PCI devices are controlled by multiple drivers
+is because one PCI device implements several different HW services.
+E.g. combined serial/parallel port/floppy controller.
+
+A manual search may be performed using the following constructs:
+
+Searching by vendor and device ID::
+
+	struct pci_dev *dev = NULL;
+	while (dev = pci_get_device(VENDOR_ID, DEVICE_ID, dev))
+		configure_device(dev);
+
+Searching by class ID (iterate in a similar way)::
+
+	pci_get_class(CLASS_ID, dev)
+
+Searching by both vendor/device and subsystem vendor/device ID::
+
+	pci_get_subsys(VENDOR_ID,DEVICE_ID, SUBSYS_VENDOR_ID, SUBSYS_DEVICE_ID, dev).
+
+You can use the constant PCI_ANY_ID as a wildcard replacement for
+VENDOR_ID or DEVICE_ID.  This allows searching for any device from a
+specific vendor, for example.
+
+These functions are hotplug-safe. They increment the reference count on
+the pci_dev that they return. You must eventually (possibly at module unload)
+decrement the reference count on these devices by calling pci_dev_put().
+
+
+Device Initialization Steps
+===========================
+
+As noted in the introduction, most PCI drivers need the following steps
+for device initialization:
+
+  - Enable the device
+  - Request MMIO/IOP resources
+  - Set the DMA mask size (for both coherent and streaming DMA)
+  - Allocate and initialize shared control data (pci_allocate_coherent())
+  - Access device configuration space (if needed)
+  - Register IRQ handler (request_irq())
+  - Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
+  - Enable DMA/processing engines.
+
+The driver can access PCI config space registers at any time.
+(Well, almost. When running BIST, config space can go away...but
+that will just result in a PCI Bus Master Abort and config reads
+will return garbage).
+
+
+Enable the PCI device
+---------------------
+Before touching any device registers, the driver needs to enable
+the PCI device by calling pci_enable_device(). This will:
+
+  - wake up the device if it was in suspended state,
+  - allocate I/O and memory regions of the device (if BIOS did not),
+  - allocate an IRQ (if BIOS did not).
+
+.. note::
+   pci_enable_device() can fail! Check the return value.
+
+.. warning::
+   OS BUG: we don't check resource allocations before enabling those
+   resources. The sequence would make more sense if we called
+   pci_request_resources() before calling pci_enable_device().
+   Currently, the device drivers can't detect the bug when when two
+   devices have been allocated the same range. This is not a common
+   problem and unlikely to get fixed soon.
+
+   This has been discussed before but not changed as of 2.6.19:
+   http://lkml.org/lkml/2006/3/2/194
+
+
+pci_set_master() will enable DMA by setting the bus master bit
+in the PCI_COMMAND register. It also fixes the latency timer value if
+it's set to something bogus by the BIOS.  pci_clear_master() will
+disable DMA by clearing the bus master bit.
+
+If the PCI device can use the PCI Memory-Write-Invalidate transaction,
+call pci_set_mwi().  This enables the PCI_COMMAND bit for Mem-Wr-Inval
+and also ensures that the cache line size register is set correctly.
+Check the return value of pci_set_mwi() as not all architectures
+or chip-sets may support Memory-Write-Invalidate.  Alternatively,
+if Mem-Wr-Inval would be nice to have but is not required, call
+pci_try_set_mwi() to have the system do its best effort at enabling
+Mem-Wr-Inval.
+
+
+Request MMIO/IOP resources
+--------------------------
+Memory (MMIO), and I/O port addresses should NOT be read directly
+from the PCI device config space. Use the values in the pci_dev structure
+as the PCI "bus address" might have been remapped to a "host physical"
+address by the arch/chip-set specific kernel support.
+
+See Documentation/io-mapping.txt for how to access device registers
+or device memory.
+
+The device driver needs to call pci_request_region() to verify
+no other device is already using the same address resource.
+Conversely, drivers should call pci_release_region() AFTER
+calling pci_disable_device().
+The idea is to prevent two devices colliding on the same address range.
+
+.. tip::
+   See OS BUG comment above. Currently (2.6.19), The driver can only
+   determine MMIO and IO Port resource availability _after_ calling
+   pci_enable_device().
+
+Generic flavors of pci_request_region() are request_mem_region()
+(for MMIO ranges) and request_region() (for IO Port ranges).
+Use these for address resources that are not described by "normal" PCI
+BARs.
+
+Also see pci_request_selected_regions() below.
+
+
+Set the DMA mask size
+---------------------
+.. note::
+   If anything below doesn't make sense, please refer to
+   Documentation/DMA-API.txt. This section is just a reminder that
+   drivers need to indicate DMA capabilities of the device and is not
+   an authoritative source for DMA interfaces.
+
+While all drivers should explicitly indicate the DMA capability
+(e.g. 32 or 64 bit) of the PCI bus master, devices with more than
+32-bit bus master capability for streaming data need the driver
+to "register" this capability by calling pci_set_dma_mask() with
+appropriate parameters.  In general this allows more efficient DMA
+on systems where System RAM exists above 4G _physical_ address.
+
+Drivers for all PCI-X and PCIe compliant devices must call
+pci_set_dma_mask() as they are 64-bit DMA devices.
+
+Similarly, drivers must also "register" this capability if the device
+can directly address "consistent memory" in System RAM above 4G physical
+address by calling pci_set_consistent_dma_mask().
+Again, this includes drivers for all PCI-X and PCIe compliant devices.
+Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are
+64-bit DMA capable for payload ("streaming") data but not control
+("consistent") data.
+
+
+Setup shared control data
+-------------------------
+Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared)
+memory.  See Documentation/DMA-API.txt for a full description of
+the DMA APIs. This section is just a reminder that it needs to be done
+before enabling DMA on the device.
+
+
+Initialize device registers
+---------------------------
+Some drivers will need specific "capability" fields programmed
+or other "vendor specific" register initialized or reset.
+E.g. clearing pending interrupts.
+
+
+Register IRQ handler
+--------------------
+While calling request_irq() is the last step described here,
+this is often just another intermediate step to initialize a device.
+This step can often be deferred until the device is opened for use.
+
+All interrupt handlers for IRQ lines should be registered with IRQF_SHARED
+and use the devid to map IRQs to devices (remember that all PCI IRQ lines
+can be shared).
+
+request_irq() will associate an interrupt handler and device handle
+with an interrupt number. Historically interrupt numbers represent
+IRQ lines which run from the PCI device to the Interrupt controller.
+With MSI and MSI-X (more below) the interrupt number is a CPU "vector".
+
+request_irq() also enables the interrupt. Make sure the device is
+quiesced and does not have any interrupts pending before registering
+the interrupt handler.
+
+MSI and MSI-X are PCI capabilities. Both are "Message Signaled Interrupts"
+which deliver interrupts to the CPU via a DMA write to a Local APIC.
+The fundamental difference between MSI and MSI-X is how multiple
+"vectors" get allocated. MSI requires contiguous blocks of vectors
+while MSI-X can allocate several individual ones.
+
+MSI capability can be enabled by calling pci_alloc_irq_vectors() with the
+PCI_IRQ_MSI and/or PCI_IRQ_MSIX flags before calling request_irq(). This
+causes the PCI support to program CPU vector data into the PCI device
+capability registers. Many architectures, chip-sets, or BIOSes do NOT
+support MSI or MSI-X and a call to pci_alloc_irq_vectors with just
+the PCI_IRQ_MSI and PCI_IRQ_MSIX flags will fail, so try to always
+specify PCI_IRQ_LEGACY as well.
+
+Drivers that have different interrupt handlers for MSI/MSI-X and
+legacy INTx should chose the right one based on the msi_enabled
+and msix_enabled flags in the pci_dev structure after calling
+pci_alloc_irq_vectors.
+
+There are (at least) two really good reasons for using MSI:
+
+1) MSI is an exclusive interrupt vector by definition.
+   This means the interrupt handler doesn't have to verify
+   its device caused the interrupt.
+
+2) MSI avoids DMA/IRQ race conditions. DMA to host memory is guaranteed
+   to be visible to the host CPU(s) when the MSI is delivered. This
+   is important for both data coherency and avoiding stale control data.
+   This guarantee allows the driver to omit MMIO reads to flush
+   the DMA stream.
+
+See drivers/infiniband/hw/mthca/ or drivers/net/tg3.c for examples
+of MSI/MSI-X usage.
+
+
+PCI device shutdown
+===================
+
+When a PCI device driver is being unloaded, most of the following
+steps need to be performed:
+
+  - Disable the device from generating IRQs
+  - Release the IRQ (free_irq())
+  - Stop all DMA activity
+  - Release DMA buffers (both streaming and consistent)
+  - Unregister from other subsystems (e.g. scsi or netdev)
+  - Disable device from responding to MMIO/IO Port addresses
+  - Release MMIO/IO Port resource(s)
+
+
+Stop IRQs on the device
+-----------------------
+How to do this is chip/device specific. If it's not done, it opens
+the possibility of a "screaming interrupt" if (and only if)
+the IRQ is shared with another device.
+
+When the shared IRQ handler is "unhooked", the remaining devices
+using the same IRQ line will still need the IRQ enabled. Thus if the
+"unhooked" device asserts IRQ line, the system will respond assuming
+it was one of the remaining devices asserted the IRQ line. Since none
+of the other devices will handle the IRQ, the system will "hang" until
+it decides the IRQ isn't going to get handled and masks the IRQ (100,000
+iterations later). Once the shared IRQ is masked, the remaining devices
+will stop functioning properly. Not a nice situation.
+
+This is another reason to use MSI or MSI-X if it's available.
+MSI and MSI-X are defined to be exclusive interrupts and thus
+are not susceptible to the "screaming interrupt" problem.
+
+
+Release the IRQ
+---------------
+Once the device is quiesced (no more IRQs), one can call free_irq().
+This function will return control once any pending IRQs are handled,
+"unhook" the drivers IRQ handler from that IRQ, and finally release
+the IRQ if no one else is using it.
+
+
+Stop all DMA activity
+---------------------
+It's extremely important to stop all DMA operations BEFORE attempting
+to deallocate DMA control data. Failure to do so can result in memory
+corruption, hangs, and on some chip-sets a hard crash.
+
+Stopping DMA after stopping the IRQs can avoid races where the
+IRQ handler might restart DMA engines.
+
+While this step sounds obvious and trivial, several "mature" drivers
+didn't get this step right in the past.
+
+
+Release DMA buffers
+-------------------
+Once DMA is stopped, clean up streaming DMA first.
+I.e. unmap data buffers and return buffers to "upstream"
+owners if there is one.
+
+Then clean up "consistent" buffers which contain the control data.
+
+See Documentation/DMA-API.txt for details on unmapping interfaces.
+
+
+Unregister from other subsystems
+--------------------------------
+Most low level PCI device drivers support some other subsystem
+like USB, ALSA, SCSI, NetDev, Infiniband, etc. Make sure your
+driver isn't losing resources from that other subsystem.
+If this happens, typically the symptom is an Oops (panic) when
+the subsystem attempts to call into a driver that has been unloaded.
+
+
+Disable Device from responding to MMIO/IO Port addresses
+--------------------------------------------------------
+io_unmap() MMIO or IO Port resources and then call pci_disable_device().
+This is the symmetric opposite of pci_enable_device().
+Do not access device registers after calling pci_disable_device().
+
+
+Release MMIO/IO Port Resource(s)
+--------------------------------
+Call pci_release_region() to mark the MMIO or IO Port range as available.
+Failure to do so usually results in the inability to reload the driver.
+
+
+How to access PCI config space
+==============================
+
+You can use `pci_(read|write)_config_(byte|word|dword)` to access the config
+space of a device represented by `struct pci_dev *`. All these functions return
+0 when successful or an error code (`PCIBIOS_...`) which can be translated to a
+text string by pcibios_strerror. Most drivers expect that accesses to valid PCI
+devices don't fail.
+
+If you don't have a struct pci_dev available, you can call
+`pci_bus_(read|write)_config_(byte|word|dword)` to access a given device
+and function on that bus.
+
+If you access fields in the standard portion of the config header, please
+use symbolic names of locations and bits declared in <linux/pci.h>.
+
+If you need to access Extended PCI Capability registers, just call
+pci_find_capability() for the particular capability and it will find the
+corresponding register block for you.
+
+
+Other interesting functions
+===========================
+
+=============================	================================================
+pci_get_domain_bus_and_slot()	Find pci_dev corresponding to given domain,
+				bus and slot and number. If the device is
+				found, its reference count is increased.
+pci_set_power_state()		Set PCI Power Management state (0=D0 ... 3=D3)
+pci_find_capability()		Find specified capability in device's capability
+				list.
+pci_resource_start()		Returns bus start address for a given PCI region
+pci_resource_end()		Returns bus end address for a given PCI region
+pci_resource_len()		Returns the byte length of a PCI region
+pci_set_drvdata()		Set private driver data pointer for a pci_dev
+pci_get_drvdata()		Return private driver data pointer for a pci_dev
+pci_set_mwi()			Enable Memory-Write-Invalidate transactions.
+pci_clear_mwi()			Disable Memory-Write-Invalidate transactions.
+=============================	================================================
+
+
+Miscellaneous hints
+===================
+
+When displaying PCI device names to the user (for example when a driver wants
+to tell the user what card has it found), please use pci_name(pci_dev).
+
+Always refer to the PCI devices by a pointer to the pci_dev structure.
+All PCI layer functions use this identification and it's the only
+reasonable one. Don't use bus/slot/function numbers except for very
+special purposes -- on systems with multiple primary buses their semantics
+can be pretty complex.
+
+Don't try to turn on Fast Back to Back writes in your driver.  All devices
+on the bus need to be capable of doing it, so this is something which needs
+to be handled by platform and generic code, not individual drivers.
+
+
+Vendor and device identifications
+=================================
+
+Do not add new device or vendor IDs to include/linux/pci_ids.h unless they
+are shared across multiple drivers.  You can add private definitions in
+your driver if they're helpful, or just use plain hex constants.
+
+The device IDs are arbitrary hex numbers (vendor controlled) and normally used
+only in a single location, the pci_device_id table.
+
+Please DO submit new vendor/device IDs to http://pci-ids.ucw.cz/.
+There are mirrors of the pci.ids file at http://pciids.sourceforge.net/
+and https://github.com/pciutils/pciids.
+
+
+Obsolete functions
+==================
+
+There are several functions which you might come across when trying to
+port an old driver to the new PCI interface.  They are no longer present
+in the kernel as they aren't compatible with hotplug or PCI domains or
+having sane locking.
+
+=================	===========================================
+pci_find_device()	Superseded by pci_get_device()
+pci_find_subsys()	Superseded by pci_get_subsys()
+pci_find_slot()		Superseded by pci_get_domain_bus_and_slot()
+pci_get_slot()		Superseded by pci_get_domain_bus_and_slot()
+=================	===========================================
+
+The alternative is the traditional PCI device driver that walks PCI
+device lists. This is still possible but discouraged.
+
+
+MMIO Space and "Write Posting"
+==============================
+
+Converting a driver from using I/O Port space to using MMIO space
+often requires some additional changes. Specifically, "write posting"
+needs to be handled. Many drivers (e.g. tg3, acenic, sym53c8xx_2)
+already do this. I/O Port space guarantees write transactions reach the PCI
+device before the CPU can continue. Writes to MMIO space allow the CPU
+to continue before the transaction reaches the PCI device. HW weenies
+call this "Write Posting" because the write completion is "posted" to
+the CPU before the transaction has reached its destination.
+
+Thus, timing sensitive code should add readl() where the CPU is
+expected to wait before doing other work.  The classic "bit banging"
+sequence works fine for I/O Port space::
+
+       for (i = 8; --i; val >>= 1) {
+               outb(val & 1, ioport_reg);      /* write bit */
+               udelay(10);
+       }
+
+The same sequence for MMIO space should be::
+
+       for (i = 8; --i; val >>= 1) {
+               writeb(val & 1, mmio_reg);      /* write bit */
+               readb(safe_mmio_reg);           /* flush posted write */
+               udelay(10);
+       }
+
+It is important that "safe_mmio_reg" not have any side effects that
+interferes with the correct operation of the device.
+
+Another case to watch out for is when resetting a PCI device. Use PCI
+Configuration space reads to flush the writel(). This will gracefully
+handle the PCI master abort on all platforms if the PCI device is
+expected to not respond to a readl().  Most x86 platforms will allow
+MMIO reads to master abort (a.k.a. "Soft Fail") and return garbage
+(e.g. ~0). But many RISC platforms will crash (a.k.a."Hard Fail").
diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt
deleted file mode 100644
index badb26ac33dc..000000000000
--- a/Documentation/PCI/pci.txt
+++ /dev/null
@@ -1,636 +0,0 @@
-
-			How To Write Linux PCI Drivers
-
-		by Martin Mares <mj@ucw.cz> on 07-Feb-2000
-	updated by Grant Grundler <grundler@parisc-linux.org> on 23-Dec-2006
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The world of PCI is vast and full of (mostly unpleasant) surprises.
-Since each CPU architecture implements different chip-sets and PCI devices
-have different requirements (erm, "features"), the result is the PCI support
-in the Linux kernel is not as trivial as one would wish. This short paper
-tries to introduce all potential driver authors to Linux APIs for
-PCI device drivers.
-
-A more complete resource is the third edition of "Linux Device Drivers"
-by Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman.
-LDD3 is available for free (under Creative Commons License) from:
-
-	http://lwn.net/Kernel/LDD3/
-
-However, keep in mind that all documents are subject to "bit rot".
-Refer to the source code if things are not working as described here.
-
-Please send questions/comments/patches about Linux PCI API to the
-"Linux PCI" <linux-pci@atrey.karlin.mff.cuni.cz> mailing list.
-
-
-
-0. Structure of PCI drivers
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-PCI drivers "discover" PCI devices in a system via pci_register_driver().
-Actually, it's the other way around. When the PCI generic code discovers
-a new device, the driver with a matching "description" will be notified.
-Details on this below.
-
-pci_register_driver() leaves most of the probing for devices to
-the PCI layer and supports online insertion/removal of devices [thus
-supporting hot-pluggable PCI, CardBus, and Express-Card in a single driver].
-pci_register_driver() call requires passing in a table of function
-pointers and thus dictates the high level structure of a driver.
-
-Once the driver knows about a PCI device and takes ownership, the
-driver generally needs to perform the following initialization:
-
-	Enable the device
-	Request MMIO/IOP resources
-	Set the DMA mask size (for both coherent and streaming DMA)
-	Allocate and initialize shared control data (pci_allocate_coherent())
-	Access device configuration space (if needed)
-	Register IRQ handler (request_irq())
-	Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
-	Enable DMA/processing engines
-
-When done using the device, and perhaps the module needs to be unloaded,
-the driver needs to take the follow steps:
-	Disable the device from generating IRQs
-	Release the IRQ (free_irq())
-	Stop all DMA activity
-	Release DMA buffers (both streaming and coherent)
-	Unregister from other subsystems (e.g. scsi or netdev)
-	Release MMIO/IOP resources
-	Disable the device
-
-Most of these topics are covered in the following sections.
-For the rest look at LDD3 or <linux/pci.h> .
-
-If the PCI subsystem is not configured (CONFIG_PCI is not set), most of
-the PCI functions described below are defined as inline functions either
-completely empty or just returning an appropriate error codes to avoid
-lots of ifdefs in the drivers.
-
-
-
-1. pci_register_driver() call
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-PCI device drivers call pci_register_driver() during their
-initialization with a pointer to a structure describing the driver
-(struct pci_driver):
-
-	field name	Description
-	----------	------------------------------------------------------
-	id_table	Pointer to table of device ID's the driver is
-			interested in.  Most drivers should export this
-			table using MODULE_DEVICE_TABLE(pci,...).
-
-	probe		This probing function gets called (during execution
-			of pci_register_driver() for already existing
-			devices or later if a new device gets inserted) for
-			all PCI devices which match the ID table and are not
-			"owned" by the other drivers yet. This function gets
-			passed a "struct pci_dev *" for each device whose
-			entry in the ID table matches the device. The probe
-			function returns zero when the driver chooses to
-			take "ownership" of the device or an error code
-			(negative number) otherwise.
-			The probe function always gets called from process
-			context, so it can sleep.
-
-	remove		The remove() function gets called whenever a device
-			being handled by this driver is removed (either during
-			deregistration of the driver or when it's manually
-			pulled out of a hot-pluggable slot).
-			The remove function always gets called from process
-			context, so it can sleep.
-
-	suspend		Put device into low power state.
-	suspend_late	Put device into low power state.
-
-	resume_early	Wake device from low power state.
-	resume		Wake device from low power state.
-
-		(Please see Documentation/power/pci.txt for descriptions
-		of PCI Power Management and the related functions.)
-
-	shutdown	Hook into reboot_notifier_list (kernel/sys.c).
-			Intended to stop any idling DMA operations.
-			Useful for enabling wake-on-lan (NIC) or changing
-			the power state of a device before reboot.
-			e.g. drivers/net/e100.c.
-
-	err_handler	See Documentation/PCI/pci-error-recovery.txt
-
-
-The ID table is an array of struct pci_device_id entries ending with an
-all-zero entry.  Definitions with static const are generally preferred.
-
-Each entry consists of:
-
-	vendor,device	Vendor and device ID to match (or PCI_ANY_ID)
-
-	subvendor,	Subsystem vendor and device ID to match (or PCI_ANY_ID)
-	subdevice,
-
-	class		Device class, subclass, and "interface" to match.
-			See Appendix D of the PCI Local Bus Spec or
-			include/linux/pci_ids.h for a full list of classes.
-			Most drivers do not need to specify class/class_mask
-			as vendor/device is normally sufficient.
-
-	class_mask	limit which sub-fields of the class field are compared.
-			See drivers/scsi/sym53c8xx_2/ for example of usage.
-
-	driver_data	Data private to the driver.
-			Most drivers don't need to use driver_data field.
-			Best practice is to use driver_data as an index
-			into a static list of equivalent device types,
-			instead of using it as a pointer.
-
-
-Most drivers only need PCI_DEVICE() or PCI_DEVICE_CLASS() to set up
-a pci_device_id table.
-
-New PCI IDs may be added to a device driver pci_ids table at runtime
-as shown below:
-
-echo "vendor device subvendor subdevice class class_mask driver_data" > \
-/sys/bus/pci/drivers/{driver}/new_id
-
-All fields are passed in as hexadecimal values (no leading 0x).
-The vendor and device fields are mandatory, the others are optional. Users
-need pass only as many optional fields as necessary:
-	o subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF)
-	o class and classmask fields default to 0
-	o driver_data defaults to 0UL.
-
-Note that driver_data must match the value used by any of the pci_device_id
-entries defined in the driver. This makes the driver_data field mandatory
-if all the pci_device_id entries have a non-zero driver_data value.
-
-Once added, the driver probe routine will be invoked for any unclaimed
-PCI devices listed in its (newly updated) pci_ids list.
-
-When the driver exits, it just calls pci_unregister_driver() and the PCI layer
-automatically calls the remove hook for all devices handled by the driver.
-
-
-1.1 "Attributes" for driver functions/data
-
-Please mark the initialization and cleanup functions where appropriate
-(the corresponding macros are defined in <linux/init.h>):
-
-	__init		Initialization code. Thrown away after the driver
-			initializes.
-	__exit		Exit code. Ignored for non-modular drivers.
-
-Tips on when/where to use the above attributes:
-	o The module_init()/module_exit() functions (and all
-	  initialization functions called _only_ from these)
-	  should be marked __init/__exit.
-
-	o Do not mark the struct pci_driver.
-
-	o Do NOT mark a function if you are not sure which mark to use.
-	  Better to not mark the function than mark the function wrong.
-
-
-
-2. How to find PCI devices manually
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-PCI drivers should have a really good reason for not using the
-pci_register_driver() interface to search for PCI devices.
-The main reason PCI devices are controlled by multiple drivers
-is because one PCI device implements several different HW services.
-E.g. combined serial/parallel port/floppy controller.
-
-A manual search may be performed using the following constructs:
-
-Searching by vendor and device ID:
-
-	struct pci_dev *dev = NULL;
-	while (dev = pci_get_device(VENDOR_ID, DEVICE_ID, dev))
-		configure_device(dev);
-
-Searching by class ID (iterate in a similar way):
-
-	pci_get_class(CLASS_ID, dev)
-
-Searching by both vendor/device and subsystem vendor/device ID:
-
-	pci_get_subsys(VENDOR_ID,DEVICE_ID, SUBSYS_VENDOR_ID, SUBSYS_DEVICE_ID, dev).
-
-You can use the constant PCI_ANY_ID as a wildcard replacement for
-VENDOR_ID or DEVICE_ID.  This allows searching for any device from a
-specific vendor, for example.
-
-These functions are hotplug-safe. They increment the reference count on
-the pci_dev that they return. You must eventually (possibly at module unload)
-decrement the reference count on these devices by calling pci_dev_put().
-
-
-
-3. Device Initialization Steps
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-As noted in the introduction, most PCI drivers need the following steps
-for device initialization:
-
-	Enable the device
-	Request MMIO/IOP resources
-	Set the DMA mask size (for both coherent and streaming DMA)
-	Allocate and initialize shared control data (pci_allocate_coherent())
-	Access device configuration space (if needed)
-	Register IRQ handler (request_irq())
-	Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
-	Enable DMA/processing engines.
-
-The driver can access PCI config space registers at any time.
-(Well, almost. When running BIST, config space can go away...but
-that will just result in a PCI Bus Master Abort and config reads
-will return garbage).
-
-
-3.1 Enable the PCI device
-~~~~~~~~~~~~~~~~~~~~~~~~~
-Before touching any device registers, the driver needs to enable
-the PCI device by calling pci_enable_device(). This will:
-	o wake up the device if it was in suspended state,
-	o allocate I/O and memory regions of the device (if BIOS did not),
-	o allocate an IRQ (if BIOS did not).
-
-NOTE: pci_enable_device() can fail! Check the return value.
-
-[ OS BUG: we don't check resource allocations before enabling those
-  resources. The sequence would make more sense if we called
-  pci_request_resources() before calling pci_enable_device().
-  Currently, the device drivers can't detect the bug when when two
-  devices have been allocated the same range. This is not a common
-  problem and unlikely to get fixed soon.
-
-  This has been discussed before but not changed as of 2.6.19:
-	http://lkml.org/lkml/2006/3/2/194
-]
-
-pci_set_master() will enable DMA by setting the bus master bit
-in the PCI_COMMAND register. It also fixes the latency timer value if
-it's set to something bogus by the BIOS.  pci_clear_master() will
-disable DMA by clearing the bus master bit.
-
-If the PCI device can use the PCI Memory-Write-Invalidate transaction,
-call pci_set_mwi().  This enables the PCI_COMMAND bit for Mem-Wr-Inval
-and also ensures that the cache line size register is set correctly.
-Check the return value of pci_set_mwi() as not all architectures
-or chip-sets may support Memory-Write-Invalidate.  Alternatively,
-if Mem-Wr-Inval would be nice to have but is not required, call
-pci_try_set_mwi() to have the system do its best effort at enabling
-Mem-Wr-Inval.
-
-
-3.2 Request MMIO/IOP resources
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Memory (MMIO), and I/O port addresses should NOT be read directly
-from the PCI device config space. Use the values in the pci_dev structure
-as the PCI "bus address" might have been remapped to a "host physical"
-address by the arch/chip-set specific kernel support.
-
-See Documentation/io-mapping.txt for how to access device registers
-or device memory.
-
-The device driver needs to call pci_request_region() to verify
-no other device is already using the same address resource.
-Conversely, drivers should call pci_release_region() AFTER
-calling pci_disable_device().
-The idea is to prevent two devices colliding on the same address range.
-
-[ See OS BUG comment above. Currently (2.6.19), The driver can only
-  determine MMIO and IO Port resource availability _after_ calling
-  pci_enable_device(). ]
-
-Generic flavors of pci_request_region() are request_mem_region()
-(for MMIO ranges) and request_region() (for IO Port ranges).
-Use these for address resources that are not described by "normal" PCI
-BARs.
-
-Also see pci_request_selected_regions() below.
-
-
-3.3 Set the DMA mask size
-~~~~~~~~~~~~~~~~~~~~~~~~~
-[ If anything below doesn't make sense, please refer to
-  Documentation/DMA-API.txt. This section is just a reminder that
-  drivers need to indicate DMA capabilities of the device and is not
-  an authoritative source for DMA interfaces. ]
-
-While all drivers should explicitly indicate the DMA capability
-(e.g. 32 or 64 bit) of the PCI bus master, devices with more than
-32-bit bus master capability for streaming data need the driver
-to "register" this capability by calling pci_set_dma_mask() with
-appropriate parameters.  In general this allows more efficient DMA
-on systems where System RAM exists above 4G _physical_ address.
-
-Drivers for all PCI-X and PCIe compliant devices must call
-pci_set_dma_mask() as they are 64-bit DMA devices.
-
-Similarly, drivers must also "register" this capability if the device
-can directly address "consistent memory" in System RAM above 4G physical
-address by calling pci_set_consistent_dma_mask().
-Again, this includes drivers for all PCI-X and PCIe compliant devices.
-Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are
-64-bit DMA capable for payload ("streaming") data but not control
-("consistent") data.
-
-
-3.4 Setup shared control data
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared)
-memory.  See Documentation/DMA-API.txt for a full description of
-the DMA APIs. This section is just a reminder that it needs to be done
-before enabling DMA on the device.
-
-
-3.5 Initialize device registers
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Some drivers will need specific "capability" fields programmed
-or other "vendor specific" register initialized or reset.
-E.g. clearing pending interrupts.
-
-
-3.6 Register IRQ handler
-~~~~~~~~~~~~~~~~~~~~~~~~
-While calling request_irq() is the last step described here,
-this is often just another intermediate step to initialize a device.
-This step can often be deferred until the device is opened for use.
-
-All interrupt handlers for IRQ lines should be registered with IRQF_SHARED
-and use the devid to map IRQs to devices (remember that all PCI IRQ lines
-can be shared).
-
-request_irq() will associate an interrupt handler and device handle
-with an interrupt number. Historically interrupt numbers represent
-IRQ lines which run from the PCI device to the Interrupt controller.
-With MSI and MSI-X (more below) the interrupt number is a CPU "vector".
-
-request_irq() also enables the interrupt. Make sure the device is
-quiesced and does not have any interrupts pending before registering
-the interrupt handler.
-
-MSI and MSI-X are PCI capabilities. Both are "Message Signaled Interrupts"
-which deliver interrupts to the CPU via a DMA write to a Local APIC.
-The fundamental difference between MSI and MSI-X is how multiple
-"vectors" get allocated. MSI requires contiguous blocks of vectors
-while MSI-X can allocate several individual ones.
-
-MSI capability can be enabled by calling pci_alloc_irq_vectors() with the
-PCI_IRQ_MSI and/or PCI_IRQ_MSIX flags before calling request_irq(). This
-causes the PCI support to program CPU vector data into the PCI device
-capability registers. Many architectures, chip-sets, or BIOSes do NOT
-support MSI or MSI-X and a call to pci_alloc_irq_vectors with just
-the PCI_IRQ_MSI and PCI_IRQ_MSIX flags will fail, so try to always
-specify PCI_IRQ_LEGACY as well.
-
-Drivers that have different interrupt handlers for MSI/MSI-X and
-legacy INTx should chose the right one based on the msi_enabled
-and msix_enabled flags in the pci_dev structure after calling
-pci_alloc_irq_vectors.
-
-There are (at least) two really good reasons for using MSI:
-1) MSI is an exclusive interrupt vector by definition.
-   This means the interrupt handler doesn't have to verify
-   its device caused the interrupt.
-
-2) MSI avoids DMA/IRQ race conditions. DMA to host memory is guaranteed
-   to be visible to the host CPU(s) when the MSI is delivered. This
-   is important for both data coherency and avoiding stale control data.
-   This guarantee allows the driver to omit MMIO reads to flush
-   the DMA stream.
-
-See drivers/infiniband/hw/mthca/ or drivers/net/tg3.c for examples
-of MSI/MSI-X usage.
-
-
-
-4. PCI device shutdown
-~~~~~~~~~~~~~~~~~~~~~~~
-
-When a PCI device driver is being unloaded, most of the following
-steps need to be performed:
-
-	Disable the device from generating IRQs
-	Release the IRQ (free_irq())
-	Stop all DMA activity
-	Release DMA buffers (both streaming and consistent)
-	Unregister from other subsystems (e.g. scsi or netdev)
-	Disable device from responding to MMIO/IO Port addresses
-	Release MMIO/IO Port resource(s)
-
-
-4.1 Stop IRQs on the device
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-How to do this is chip/device specific. If it's not done, it opens
-the possibility of a "screaming interrupt" if (and only if)
-the IRQ is shared with another device.
-
-When the shared IRQ handler is "unhooked", the remaining devices
-using the same IRQ line will still need the IRQ enabled. Thus if the
-"unhooked" device asserts IRQ line, the system will respond assuming
-it was one of the remaining devices asserted the IRQ line. Since none
-of the other devices will handle the IRQ, the system will "hang" until
-it decides the IRQ isn't going to get handled and masks the IRQ (100,000
-iterations later). Once the shared IRQ is masked, the remaining devices
-will stop functioning properly. Not a nice situation.
-
-This is another reason to use MSI or MSI-X if it's available.
-MSI and MSI-X are defined to be exclusive interrupts and thus
-are not susceptible to the "screaming interrupt" problem.
-
-
-4.2 Release the IRQ
-~~~~~~~~~~~~~~~~~~~
-Once the device is quiesced (no more IRQs), one can call free_irq().
-This function will return control once any pending IRQs are handled,
-"unhook" the drivers IRQ handler from that IRQ, and finally release
-the IRQ if no one else is using it.
-
-
-4.3 Stop all DMA activity
-~~~~~~~~~~~~~~~~~~~~~~~~~
-It's extremely important to stop all DMA operations BEFORE attempting
-to deallocate DMA control data. Failure to do so can result in memory
-corruption, hangs, and on some chip-sets a hard crash.
-
-Stopping DMA after stopping the IRQs can avoid races where the
-IRQ handler might restart DMA engines.
-
-While this step sounds obvious and trivial, several "mature" drivers
-didn't get this step right in the past.
-
-
-4.4 Release DMA buffers
-~~~~~~~~~~~~~~~~~~~~~~~
-Once DMA is stopped, clean up streaming DMA first.
-I.e. unmap data buffers and return buffers to "upstream"
-owners if there is one.
-
-Then clean up "consistent" buffers which contain the control data.
-
-See Documentation/DMA-API.txt for details on unmapping interfaces.
-
-
-4.5 Unregister from other subsystems
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Most low level PCI device drivers support some other subsystem
-like USB, ALSA, SCSI, NetDev, Infiniband, etc. Make sure your
-driver isn't losing resources from that other subsystem.
-If this happens, typically the symptom is an Oops (panic) when
-the subsystem attempts to call into a driver that has been unloaded.
-
-
-4.6 Disable Device from responding to MMIO/IO Port addresses
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-io_unmap() MMIO or IO Port resources and then call pci_disable_device().
-This is the symmetric opposite of pci_enable_device().
-Do not access device registers after calling pci_disable_device().
-
-
-4.7 Release MMIO/IO Port Resource(s)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Call pci_release_region() to mark the MMIO or IO Port range as available.
-Failure to do so usually results in the inability to reload the driver.
-
-
-
-5. How to access PCI config space
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-You can use pci_(read|write)_config_(byte|word|dword) to access the config
-space of a device represented by struct pci_dev *. All these functions return 0
-when successful or an error code (PCIBIOS_...) which can be translated to a text
-string by pcibios_strerror. Most drivers expect that accesses to valid PCI
-devices don't fail.
-
-If you don't have a struct pci_dev available, you can call
-pci_bus_(read|write)_config_(byte|word|dword) to access a given device
-and function on that bus.
-
-If you access fields in the standard portion of the config header, please
-use symbolic names of locations and bits declared in <linux/pci.h>.
-
-If you need to access Extended PCI Capability registers, just call
-pci_find_capability() for the particular capability and it will find the
-corresponding register block for you.
-
-
-
-6. Other interesting functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-pci_get_domain_bus_and_slot()	Find pci_dev corresponding to given domain,
-				bus and slot and number. If the device is
-				found, its reference count is increased.
-pci_set_power_state()		Set PCI Power Management state (0=D0 ... 3=D3)
-pci_find_capability()		Find specified capability in device's capability
-				list.
-pci_resource_start()		Returns bus start address for a given PCI region
-pci_resource_end()		Returns bus end address for a given PCI region
-pci_resource_len()		Returns the byte length of a PCI region
-pci_set_drvdata()		Set private driver data pointer for a pci_dev
-pci_get_drvdata()		Return private driver data pointer for a pci_dev
-pci_set_mwi()			Enable Memory-Write-Invalidate transactions.
-pci_clear_mwi()			Disable Memory-Write-Invalidate transactions.
-
-
-
-7. Miscellaneous hints
-~~~~~~~~~~~~~~~~~~~~~~
-
-When displaying PCI device names to the user (for example when a driver wants
-to tell the user what card has it found), please use pci_name(pci_dev).
-
-Always refer to the PCI devices by a pointer to the pci_dev structure.
-All PCI layer functions use this identification and it's the only
-reasonable one. Don't use bus/slot/function numbers except for very
-special purposes -- on systems with multiple primary buses their semantics
-can be pretty complex.
-
-Don't try to turn on Fast Back to Back writes in your driver.  All devices
-on the bus need to be capable of doing it, so this is something which needs
-to be handled by platform and generic code, not individual drivers.
-
-
-
-8. Vendor and device identifications
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Do not add new device or vendor IDs to include/linux/pci_ids.h unless they
-are shared across multiple drivers.  You can add private definitions in
-your driver if they're helpful, or just use plain hex constants.
-
-The device IDs are arbitrary hex numbers (vendor controlled) and normally used
-only in a single location, the pci_device_id table.
-
-Please DO submit new vendor/device IDs to http://pci-ids.ucw.cz/.
-There are mirrors of the pci.ids file at http://pciids.sourceforge.net/
-and https://github.com/pciutils/pciids.
-
-
-
-9. Obsolete functions
-~~~~~~~~~~~~~~~~~~~~~
-
-There are several functions which you might come across when trying to
-port an old driver to the new PCI interface.  They are no longer present
-in the kernel as they aren't compatible with hotplug or PCI domains or
-having sane locking.
-
-pci_find_device()	Superseded by pci_get_device()
-pci_find_subsys()	Superseded by pci_get_subsys()
-pci_find_slot()		Superseded by pci_get_domain_bus_and_slot()
-pci_get_slot()		Superseded by pci_get_domain_bus_and_slot()
-
-
-The alternative is the traditional PCI device driver that walks PCI
-device lists. This is still possible but discouraged.
-
-
-
-10. MMIO Space and "Write Posting"
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Converting a driver from using I/O Port space to using MMIO space
-often requires some additional changes. Specifically, "write posting"
-needs to be handled. Many drivers (e.g. tg3, acenic, sym53c8xx_2)
-already do this. I/O Port space guarantees write transactions reach the PCI
-device before the CPU can continue. Writes to MMIO space allow the CPU
-to continue before the transaction reaches the PCI device. HW weenies
-call this "Write Posting" because the write completion is "posted" to
-the CPU before the transaction has reached its destination.
-
-Thus, timing sensitive code should add readl() where the CPU is
-expected to wait before doing other work.  The classic "bit banging"
-sequence works fine for I/O Port space:
-
-       for (i = 8; --i; val >>= 1) {
-               outb(val & 1, ioport_reg);      /* write bit */
-               udelay(10);
-       }
-
-The same sequence for MMIO space should be:
-
-       for (i = 8; --i; val >>= 1) {
-               writeb(val & 1, mmio_reg);      /* write bit */
-               readb(safe_mmio_reg);           /* flush posted write */
-               udelay(10);
-       }
-
-It is important that "safe_mmio_reg" not have any side effects that
-interferes with the correct operation of the device.
-
-Another case to watch out for is when resetting a PCI device. Use PCI
-Configuration space reads to flush the writel(). This will gracefully
-handle the PCI master abort on all platforms if the PCI device is
-expected to not respond to a readl().  Most x86 platforms will allow
-MMIO reads to master abort (a.k.a. "Soft Fail") and return garbage
-(e.g. ~0). But many RISC platforms will crash (a.k.a."Hard Fail").
-
-- 
cgit 


From 2e6422444894685a8a3135f7b982aa026dc0f74c Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 14 May 2019 22:47:25 +0800
Subject: Documentation: PCI: convert PCIEBUS-HOWTO.txt to reST

Convert plain text documentation to reStructuredText format and add it to
Sphinx TOC tree.  No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/PCI/PCIEBUS-HOWTO.txt | 198 --------------------------------
 Documentation/PCI/index.rst         |   1 +
 Documentation/PCI/picebus-howto.rst | 220 ++++++++++++++++++++++++++++++++++++
 3 files changed, 221 insertions(+), 198 deletions(-)
 delete mode 100644 Documentation/PCI/PCIEBUS-HOWTO.txt
 create mode 100644 Documentation/PCI/picebus-howto.rst

(limited to 'Documentation')

diff --git a/Documentation/PCI/PCIEBUS-HOWTO.txt b/Documentation/PCI/PCIEBUS-HOWTO.txt
deleted file mode 100644
index 15f0bb3b5045..000000000000
--- a/Documentation/PCI/PCIEBUS-HOWTO.txt
+++ /dev/null
@@ -1,198 +0,0 @@
-		The PCI Express Port Bus Driver Guide HOWTO
-	Tom L Nguyen tom.l.nguyen@intel.com
-			11/03/2004
-
-1. About this guide
-
-This guide describes the basics of the PCI Express Port Bus driver
-and provides information on how to enable the service drivers to
-register/unregister with the PCI Express Port Bus Driver.
-
-2. Copyright 2004 Intel Corporation
-
-3. What is the PCI Express Port Bus Driver
-
-A PCI Express Port is a logical PCI-PCI Bridge structure. There
-are two types of PCI Express Port: the Root Port and the Switch
-Port. The Root Port originates a PCI Express link from a PCI Express
-Root Complex and the Switch Port connects PCI Express links to
-internal logical PCI buses. The Switch Port, which has its secondary
-bus representing the switch's internal routing logic, is called the
-switch's Upstream Port. The switch's Downstream Port is bridging from
-switch's internal routing bus to a bus representing the downstream
-PCI Express link from the PCI Express Switch.
-
-A PCI Express Port can provide up to four distinct functions,
-referred to in this document as services, depending on its port type.
-PCI Express Port's services include native hotplug support (HP),
-power management event support (PME), advanced error reporting
-support (AER), and virtual channel support (VC). These services may
-be handled by a single complex driver or be individually distributed
-and handled by corresponding service drivers.
-
-4. Why use the PCI Express Port Bus Driver?
-
-In existing Linux kernels, the Linux Device Driver Model allows a
-physical device to be handled by only a single driver. The PCI
-Express Port is a PCI-PCI Bridge device with multiple distinct
-services. To maintain a clean and simple solution each service
-may have its own software service driver. In this case several
-service drivers will compete for a single PCI-PCI Bridge device.
-For example, if the PCI Express Root Port native hotplug service
-driver is loaded first, it claims a PCI-PCI Bridge Root Port. The
-kernel therefore does not load other service drivers for that Root
-Port. In other words, it is impossible to have multiple service
-drivers load and run on a PCI-PCI Bridge device simultaneously
-using the current driver model.
-
-To enable multiple service drivers running simultaneously requires
-having a PCI Express Port Bus driver, which manages all populated
-PCI Express Ports and distributes all provided service requests
-to the corresponding service drivers as required. Some key
-advantages of using the PCI Express Port Bus driver are listed below:
-
-	- Allow multiple service drivers to run simultaneously on
-	  a PCI-PCI Bridge Port device.
-
-	- Allow service drivers implemented in an independent
-	  staged approach.
-
-	- Allow one service driver to run on multiple PCI-PCI Bridge
-	  Port devices.
-
-	- Manage and distribute resources of a PCI-PCI Bridge Port
-	  device to requested service drivers.
-
-5. Configuring the PCI Express Port Bus Driver vs. Service Drivers
-
-5.1 Including the PCI Express Port Bus Driver Support into the Kernel
-
-Including the PCI Express Port Bus driver depends on whether the PCI
-Express support is included in the kernel config. The kernel will
-automatically include the PCI Express Port Bus driver as a kernel
-driver when the PCI Express support is enabled in the kernel.
-
-5.2 Enabling Service Driver Support
-
-PCI device drivers are implemented based on Linux Device Driver Model.
-All service drivers are PCI device drivers. As discussed above, it is
-impossible to load any service driver once the kernel has loaded the
-PCI Express Port Bus Driver. To meet the PCI Express Port Bus Driver
-Model requires some minimal changes on existing service drivers that
-imposes no impact on the functionality of existing service drivers.
-
-A service driver is required to use the two APIs shown below to
-register its service with the PCI Express Port Bus driver (see
-section 5.2.1 & 5.2.2). It is important that a service driver
-initializes the pcie_port_service_driver data structure, included in
-header file /include/linux/pcieport_if.h, before calling these APIs.
-Failure to do so will result an identity mismatch, which prevents
-the PCI Express Port Bus driver from loading a service driver.
-
-5.2.1 pcie_port_service_register
-
-int pcie_port_service_register(struct pcie_port_service_driver *new)
-
-This API replaces the Linux Driver Model's pci_register_driver API. A
-service driver should always calls pcie_port_service_register at
-module init. Note that after service driver being loaded, calls
-such as pci_enable_device(dev) and pci_set_master(dev) are no longer
-necessary since these calls are executed by the PCI Port Bus driver.
-
-5.2.2 pcie_port_service_unregister
-
-void pcie_port_service_unregister(struct pcie_port_service_driver *new)
-
-pcie_port_service_unregister replaces the Linux Driver Model's
-pci_unregister_driver. It's always called by service driver when a
-module exits.
-
-5.2.3 Sample Code
-
-Below is sample service driver code to initialize the port service
-driver data structure.
-
-static struct pcie_port_service_id service_id[] = { {
-	.vendor = PCI_ANY_ID,
-	.device = PCI_ANY_ID,
-	.port_type = PCIE_RC_PORT,
-	.service_type = PCIE_PORT_SERVICE_AER,
-	}, { /* end: all zeroes */ }
-};
-
-static struct pcie_port_service_driver root_aerdrv = {
-	.name		= (char *)device_name,
-	.id_table	= &service_id[0],
-
-	.probe		= aerdrv_load,
-	.remove		= aerdrv_unload,
-
-	.suspend	= aerdrv_suspend,
-	.resume		= aerdrv_resume,
-};
-
-Below is a sample code for registering/unregistering a service
-driver.
-
-static int __init aerdrv_service_init(void)
-{
-	int retval = 0;
-
-	retval = pcie_port_service_register(&root_aerdrv);
-	if (!retval) {
-		/*
-		 * FIX ME
-		 */
-	}
-	return retval;
-}
-
-static void __exit aerdrv_service_exit(void)
-{
-	pcie_port_service_unregister(&root_aerdrv);
-}
-
-module_init(aerdrv_service_init);
-module_exit(aerdrv_service_exit);
-
-6. Possible Resource Conflicts
-
-Since all service drivers of a PCI-PCI Bridge Port device are
-allowed to run simultaneously, below lists a few of possible resource
-conflicts with proposed solutions.
-
-6.1 MSI and MSI-X Vector Resource
-
-Once MSI or MSI-X interrupts are enabled on a device, it stays in this
-mode until they are disabled again.  Since service drivers of the same
-PCI-PCI Bridge port share the same physical device, if an individual
-service driver enables or disables MSI/MSI-X mode it may result
-unpredictable behavior.
-
-To avoid this situation all service drivers are not permitted to
-switch interrupt mode on its device. The PCI Express Port Bus driver
-is responsible for determining the interrupt mode and this should be
-transparent to service drivers. Service drivers need to know only
-the vector IRQ assigned to the field irq of struct pcie_device, which
-is passed in when the PCI Express Port Bus driver probes each service
-driver. Service drivers should use (struct pcie_device*)dev->irq to
-call request_irq/free_irq. In addition, the interrupt mode is stored
-in the field interrupt_mode of struct pcie_device.
-
-6.3 PCI Memory/IO Mapped Regions
-
-Service drivers for PCI Express Power Management (PME), Advanced
-Error Reporting (AER), Hot-Plug (HP) and Virtual Channel (VC) access
-PCI configuration space on the PCI Express port. In all cases the
-registers accessed are independent of each other. This patch assumes
-that all service drivers will be well behaved and not overwrite
-other service driver's configuration settings.
-
-6.4 PCI Config Registers
-
-Each service driver runs its PCI config operations on its own
-capability structure except the PCI Express capability structure, in
-which Root Control register and Device Control register are shared
-between PME and AER. This patch assumes that all service drivers
-will be well behaved and not overwrite other service driver's
-configuration settings.
diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst
index 7babf43709b0..79d6d75bbf28 100644
--- a/Documentation/PCI/index.rst
+++ b/Documentation/PCI/index.rst
@@ -9,3 +9,4 @@ Linux PCI Bus Subsystem
    :numbered:
 
    pci
+   picebus-howto
diff --git a/Documentation/PCI/picebus-howto.rst b/Documentation/PCI/picebus-howto.rst
new file mode 100644
index 000000000000..f882ff62c51f
--- /dev/null
+++ b/Documentation/PCI/picebus-howto.rst
@@ -0,0 +1,220 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===========================================
+The PCI Express Port Bus Driver Guide HOWTO
+===========================================
+
+:Author: Tom L Nguyen tom.l.nguyen@intel.com 11/03/2004
+:Copyright: |copy| 2004 Intel Corporation
+
+About this guide
+================
+
+This guide describes the basics of the PCI Express Port Bus driver
+and provides information on how to enable the service drivers to
+register/unregister with the PCI Express Port Bus Driver.
+
+
+What is the PCI Express Port Bus Driver
+=======================================
+
+A PCI Express Port is a logical PCI-PCI Bridge structure. There
+are two types of PCI Express Port: the Root Port and the Switch
+Port. The Root Port originates a PCI Express link from a PCI Express
+Root Complex and the Switch Port connects PCI Express links to
+internal logical PCI buses. The Switch Port, which has its secondary
+bus representing the switch's internal routing logic, is called the
+switch's Upstream Port. The switch's Downstream Port is bridging from
+switch's internal routing bus to a bus representing the downstream
+PCI Express link from the PCI Express Switch.
+
+A PCI Express Port can provide up to four distinct functions,
+referred to in this document as services, depending on its port type.
+PCI Express Port's services include native hotplug support (HP),
+power management event support (PME), advanced error reporting
+support (AER), and virtual channel support (VC). These services may
+be handled by a single complex driver or be individually distributed
+and handled by corresponding service drivers.
+
+Why use the PCI Express Port Bus Driver?
+========================================
+
+In existing Linux kernels, the Linux Device Driver Model allows a
+physical device to be handled by only a single driver. The PCI
+Express Port is a PCI-PCI Bridge device with multiple distinct
+services. To maintain a clean and simple solution each service
+may have its own software service driver. In this case several
+service drivers will compete for a single PCI-PCI Bridge device.
+For example, if the PCI Express Root Port native hotplug service
+driver is loaded first, it claims a PCI-PCI Bridge Root Port. The
+kernel therefore does not load other service drivers for that Root
+Port. In other words, it is impossible to have multiple service
+drivers load and run on a PCI-PCI Bridge device simultaneously
+using the current driver model.
+
+To enable multiple service drivers running simultaneously requires
+having a PCI Express Port Bus driver, which manages all populated
+PCI Express Ports and distributes all provided service requests
+to the corresponding service drivers as required. Some key
+advantages of using the PCI Express Port Bus driver are listed below:
+
+  - Allow multiple service drivers to run simultaneously on
+    a PCI-PCI Bridge Port device.
+
+  - Allow service drivers implemented in an independent
+    staged approach.
+
+  - Allow one service driver to run on multiple PCI-PCI Bridge
+    Port devices.
+
+  - Manage and distribute resources of a PCI-PCI Bridge Port
+    device to requested service drivers.
+
+Configuring the PCI Express Port Bus Driver vs. Service Drivers
+===============================================================
+
+Including the PCI Express Port Bus Driver Support into the Kernel
+-----------------------------------------------------------------
+
+Including the PCI Express Port Bus driver depends on whether the PCI
+Express support is included in the kernel config. The kernel will
+automatically include the PCI Express Port Bus driver as a kernel
+driver when the PCI Express support is enabled in the kernel.
+
+Enabling Service Driver Support
+-------------------------------
+
+PCI device drivers are implemented based on Linux Device Driver Model.
+All service drivers are PCI device drivers. As discussed above, it is
+impossible to load any service driver once the kernel has loaded the
+PCI Express Port Bus Driver. To meet the PCI Express Port Bus Driver
+Model requires some minimal changes on existing service drivers that
+imposes no impact on the functionality of existing service drivers.
+
+A service driver is required to use the two APIs shown below to
+register its service with the PCI Express Port Bus driver (see
+section 5.2.1 & 5.2.2). It is important that a service driver
+initializes the pcie_port_service_driver data structure, included in
+header file /include/linux/pcieport_if.h, before calling these APIs.
+Failure to do so will result an identity mismatch, which prevents
+the PCI Express Port Bus driver from loading a service driver.
+
+pcie_port_service_register
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+::
+
+  int pcie_port_service_register(struct pcie_port_service_driver *new)
+
+This API replaces the Linux Driver Model's pci_register_driver API. A
+service driver should always calls pcie_port_service_register at
+module init. Note that after service driver being loaded, calls
+such as pci_enable_device(dev) and pci_set_master(dev) are no longer
+necessary since these calls are executed by the PCI Port Bus driver.
+
+pcie_port_service_unregister
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+::
+
+  void pcie_port_service_unregister(struct pcie_port_service_driver *new)
+
+pcie_port_service_unregister replaces the Linux Driver Model's
+pci_unregister_driver. It's always called by service driver when a
+module exits.
+
+Sample Code
+~~~~~~~~~~~
+
+Below is sample service driver code to initialize the port service
+driver data structure.
+::
+
+  static struct pcie_port_service_id service_id[] = { {
+    .vendor = PCI_ANY_ID,
+    .device = PCI_ANY_ID,
+    .port_type = PCIE_RC_PORT,
+    .service_type = PCIE_PORT_SERVICE_AER,
+    }, { /* end: all zeroes */ }
+  };
+
+  static struct pcie_port_service_driver root_aerdrv = {
+    .name		= (char *)device_name,
+    .id_table	= &service_id[0],
+
+    .probe		= aerdrv_load,
+    .remove		= aerdrv_unload,
+
+    .suspend	= aerdrv_suspend,
+    .resume		= aerdrv_resume,
+  };
+
+Below is a sample code for registering/unregistering a service
+driver.
+::
+
+  static int __init aerdrv_service_init(void)
+  {
+    int retval = 0;
+
+    retval = pcie_port_service_register(&root_aerdrv);
+    if (!retval) {
+      /*
+      * FIX ME
+      */
+    }
+    return retval;
+  }
+
+  static void __exit aerdrv_service_exit(void)
+  {
+    pcie_port_service_unregister(&root_aerdrv);
+  }
+
+  module_init(aerdrv_service_init);
+  module_exit(aerdrv_service_exit);
+
+Possible Resource Conflicts
+===========================
+
+Since all service drivers of a PCI-PCI Bridge Port device are
+allowed to run simultaneously, below lists a few of possible resource
+conflicts with proposed solutions.
+
+MSI and MSI-X Vector Resource
+-----------------------------
+
+Once MSI or MSI-X interrupts are enabled on a device, it stays in this
+mode until they are disabled again.  Since service drivers of the same
+PCI-PCI Bridge port share the same physical device, if an individual
+service driver enables or disables MSI/MSI-X mode it may result
+unpredictable behavior.
+
+To avoid this situation all service drivers are not permitted to
+switch interrupt mode on its device. The PCI Express Port Bus driver
+is responsible for determining the interrupt mode and this should be
+transparent to service drivers. Service drivers need to know only
+the vector IRQ assigned to the field irq of struct pcie_device, which
+is passed in when the PCI Express Port Bus driver probes each service
+driver. Service drivers should use (struct pcie_device*)dev->irq to
+call request_irq/free_irq. In addition, the interrupt mode is stored
+in the field interrupt_mode of struct pcie_device.
+
+PCI Memory/IO Mapped Regions
+----------------------------
+
+Service drivers for PCI Express Power Management (PME), Advanced
+Error Reporting (AER), Hot-Plug (HP) and Virtual Channel (VC) access
+PCI configuration space on the PCI Express port. In all cases the
+registers accessed are independent of each other. This patch assumes
+that all service drivers will be well behaved and not overwrite
+other service driver's configuration settings.
+
+PCI Config Registers
+--------------------
+
+Each service driver runs its PCI config operations on its own
+capability structure except the PCI Express capability structure, in
+which Root Control register and Device Control register are shared
+between PME and AER. This patch assumes that all service drivers
+will be well behaved and not overwrite other service driver's
+configuration settings.
-- 
cgit 


From 4d2c729c62328d6841111d98396374476367ae83 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 14 May 2019 22:47:26 +0800
Subject: Documentation: PCI: convert pci-iov-howto.txt to reST

Convert plain text documentation to reStructuredText format and add it to
Sphinx TOC tree.  No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/PCI/index.rst         |   1 +
 Documentation/PCI/pci-iov-howto.rst | 172 ++++++++++++++++++++++++++++++++++++
 Documentation/PCI/pci-iov-howto.txt | 147 ------------------------------
 3 files changed, 173 insertions(+), 147 deletions(-)
 create mode 100644 Documentation/PCI/pci-iov-howto.rst
 delete mode 100644 Documentation/PCI/pci-iov-howto.txt

(limited to 'Documentation')

diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst
index 79d6d75bbf28..0d9390298c4a 100644
--- a/Documentation/PCI/index.rst
+++ b/Documentation/PCI/index.rst
@@ -10,3 +10,4 @@ Linux PCI Bus Subsystem
 
    pci
    picebus-howto
+   pci-iov-howto
diff --git a/Documentation/PCI/pci-iov-howto.rst b/Documentation/PCI/pci-iov-howto.rst
new file mode 100644
index 000000000000..b9fd003206f1
--- /dev/null
+++ b/Documentation/PCI/pci-iov-howto.rst
@@ -0,0 +1,172 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+====================================
+PCI Express I/O Virtualization Howto
+====================================
+
+:Copyright: |copy| 2009 Intel Corporation
+:Authors: - Yu Zhao <yu.zhao@intel.com>
+          - Donald Dutile <ddutile@redhat.com>
+
+Overview
+========
+
+What is SR-IOV
+--------------
+
+Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended
+capability which makes one physical device appear as multiple virtual
+devices. The physical device is referred to as Physical Function (PF)
+while the virtual devices are referred to as Virtual Functions (VF).
+Allocation of the VF can be dynamically controlled by the PF via
+registers encapsulated in the capability. By default, this feature is
+not enabled and the PF behaves as traditional PCIe device. Once it's
+turned on, each VF's PCI configuration space can be accessed by its own
+Bus, Device and Function Number (Routing ID). And each VF also has PCI
+Memory Space, which is used to map its register set. VF device driver
+operates on the register set so it can be functional and appear as a
+real existing PCI device.
+
+User Guide
+==========
+
+How can I enable SR-IOV capability
+----------------------------------
+
+Multiple methods are available for SR-IOV enablement.
+In the first method, the device driver (PF driver) will control the
+enabling and disabling of the capability via API provided by SR-IOV core.
+If the hardware has SR-IOV capability, loading its PF driver would
+enable it and all VFs associated with the PF.  Some PF drivers require
+a module parameter to be set to determine the number of VFs to enable.
+In the second method, a write to the sysfs file sriov_numvfs will
+enable and disable the VFs associated with a PCIe PF.  This method
+enables per-PF, VF enable/disable values versus the first method,
+which applies to all PFs of the same device.  Additionally, the
+PCI SRIOV core support ensures that enable/disable operations are
+valid to reduce duplication in multiple drivers for the same
+checks, e.g., check numvfs == 0 if enabling VFs, ensure
+numvfs <= totalvfs.
+The second method is the recommended method for new/future VF devices.
+
+How can I use the Virtual Functions
+-----------------------------------
+
+The VF is treated as hot-plugged PCI devices in the kernel, so they
+should be able to work in the same way as real PCI devices. The VF
+requires device driver that is same as a normal PCI device's.
+
+Developer Guide
+===============
+
+SR-IOV API
+----------
+
+To enable SR-IOV capability:
+
+(a) For the first method, in the driver::
+
+	int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+
+'nr_virtfn' is number of VFs to be enabled.
+
+(b) For the second method, from sysfs::
+
+	echo 'nr_virtfn' > \
+        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
+
+To disable SR-IOV capability:
+
+(a) For the first method, in the driver::
+
+	void pci_disable_sriov(struct pci_dev *dev);
+
+(b) For the second method, from sysfs::
+
+	echo  0 > \
+        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
+
+To enable auto probing VFs by a compatible driver on the host, run
+command below before enabling SR-IOV capabilities. This is the
+default behavior.
+::
+
+	echo 1 > \
+        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe
+
+To disable auto probing VFs by a compatible driver on the host, run
+command below before enabling SR-IOV capabilities. Updating this
+entry will not affect VFs which are already probed.
+::
+
+	echo  0 > \
+        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe
+
+Usage example
+-------------
+
+Following piece of code illustrates the usage of the SR-IOV API.
+::
+
+	static int dev_probe(struct pci_dev *dev, const struct pci_device_id *id)
+	{
+		pci_enable_sriov(dev, NR_VIRTFN);
+
+		...
+
+		return 0;
+	}
+
+	static void dev_remove(struct pci_dev *dev)
+	{
+		pci_disable_sriov(dev);
+
+		...
+	}
+
+	static int dev_suspend(struct pci_dev *dev, pm_message_t state)
+	{
+		...
+
+		return 0;
+	}
+
+	static int dev_resume(struct pci_dev *dev)
+	{
+		...
+
+		return 0;
+	}
+
+	static void dev_shutdown(struct pci_dev *dev)
+	{
+		...
+	}
+
+	static int dev_sriov_configure(struct pci_dev *dev, int numvfs)
+	{
+		if (numvfs > 0) {
+			...
+			pci_enable_sriov(dev, numvfs);
+			...
+			return numvfs;
+		}
+		if (numvfs == 0) {
+			....
+			pci_disable_sriov(dev);
+			...
+			return 0;
+		}
+	}
+
+	static struct pci_driver dev_driver = {
+		.name =		"SR-IOV Physical Function driver",
+		.id_table =	dev_id_table,
+		.probe =	dev_probe,
+		.remove =	dev_remove,
+		.suspend =	dev_suspend,
+		.resume =	dev_resume,
+		.shutdown =	dev_shutdown,
+		.sriov_configure = dev_sriov_configure,
+	};
diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt
deleted file mode 100644
index d2a84151e99c..000000000000
--- a/Documentation/PCI/pci-iov-howto.txt
+++ /dev/null
@@ -1,147 +0,0 @@
-		PCI Express I/O Virtualization Howto
-		Copyright (C) 2009 Intel Corporation
-		    Yu Zhao <yu.zhao@intel.com>
-
-		Update: November 2012
-			-- sysfs-based SRIOV enable-/disable-ment
-		Donald Dutile <ddutile@redhat.com>
-
-1. Overview
-
-1.1 What is SR-IOV
-
-Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended
-capability which makes one physical device appear as multiple virtual
-devices. The physical device is referred to as Physical Function (PF)
-while the virtual devices are referred to as Virtual Functions (VF).
-Allocation of the VF can be dynamically controlled by the PF via
-registers encapsulated in the capability. By default, this feature is
-not enabled and the PF behaves as traditional PCIe device. Once it's
-turned on, each VF's PCI configuration space can be accessed by its own
-Bus, Device and Function Number (Routing ID). And each VF also has PCI
-Memory Space, which is used to map its register set. VF device driver
-operates on the register set so it can be functional and appear as a
-real existing PCI device.
-
-2. User Guide
-
-2.1 How can I enable SR-IOV capability
-
-Multiple methods are available for SR-IOV enablement.
-In the first method, the device driver (PF driver) will control the
-enabling and disabling of the capability via API provided by SR-IOV core.
-If the hardware has SR-IOV capability, loading its PF driver would
-enable it and all VFs associated with the PF.  Some PF drivers require
-a module parameter to be set to determine the number of VFs to enable.
-In the second method, a write to the sysfs file sriov_numvfs will
-enable and disable the VFs associated with a PCIe PF.  This method
-enables per-PF, VF enable/disable values versus the first method,
-which applies to all PFs of the same device.  Additionally, the
-PCI SRIOV core support ensures that enable/disable operations are
-valid to reduce duplication in multiple drivers for the same
-checks, e.g., check numvfs == 0 if enabling VFs, ensure
-numvfs <= totalvfs.
-The second method is the recommended method for new/future VF devices.
-
-2.2 How can I use the Virtual Functions
-
-The VF is treated as hot-plugged PCI devices in the kernel, so they
-should be able to work in the same way as real PCI devices. The VF
-requires device driver that is same as a normal PCI device's.
-
-3. Developer Guide
-
-3.1 SR-IOV API
-
-To enable SR-IOV capability:
-(a) For the first method, in the driver:
-	int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
-	'nr_virtfn' is number of VFs to be enabled.
-(b) For the second method, from sysfs:
-	echo 'nr_virtfn' > \
-        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
-
-To disable SR-IOV capability:
-(a) For the first method, in the driver:
-	void pci_disable_sriov(struct pci_dev *dev);
-(b) For the second method, from sysfs:
-	echo  0 > \
-        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
-
-To enable auto probing VFs by a compatible driver on the host, run
-command below before enabling SR-IOV capabilities. This is the
-default behavior.
-	echo 1 > \
-        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe
-
-To disable auto probing VFs by a compatible driver on the host, run
-command below before enabling SR-IOV capabilities. Updating this
-entry will not affect VFs which are already probed.
-	echo  0 > \
-        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe
-
-3.2 Usage example
-
-Following piece of code illustrates the usage of the SR-IOV API.
-
-static int dev_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	pci_enable_sriov(dev, NR_VIRTFN);
-
-	...
-
-	return 0;
-}
-
-static void dev_remove(struct pci_dev *dev)
-{
-	pci_disable_sriov(dev);
-
-	...
-}
-
-static int dev_suspend(struct pci_dev *dev, pm_message_t state)
-{
-	...
-
-	return 0;
-}
-
-static int dev_resume(struct pci_dev *dev)
-{
-	...
-
-	return 0;
-}
-
-static void dev_shutdown(struct pci_dev *dev)
-{
-	...
-}
-
-static int dev_sriov_configure(struct pci_dev *dev, int numvfs)
-{
-	if (numvfs > 0) {
-		...
-		pci_enable_sriov(dev, numvfs);
-		...
-		return numvfs;
-	}
-	if (numvfs == 0) {
-		....
-		pci_disable_sriov(dev);
-		...
-		return 0;
-	}
-}
-
-static struct pci_driver dev_driver = {
-	.name =		"SR-IOV Physical Function driver",
-	.id_table =	dev_id_table,
-	.probe =	dev_probe,
-	.remove =	dev_remove,
-	.suspend =	dev_suspend,
-	.resume =	dev_resume,
-	.shutdown =	dev_shutdown,
-	.sriov_configure = dev_sriov_configure,
-};
-- 
cgit 


From 3b9bae029b60ee0fa6d6205e0debfad4482434a7 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 14 May 2019 22:47:27 +0800
Subject: Documentation: PCI: convert MSI-HOWTO.txt to reST

Convert plain text documentation to reStructuredText format and add it to
Sphinx TOC tree.  No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/PCI/MSI-HOWTO.txt | 270 -------------------------------------
 Documentation/PCI/index.rst     |   1 +
 Documentation/PCI/msi-howto.rst | 287 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 288 insertions(+), 270 deletions(-)
 delete mode 100644 Documentation/PCI/MSI-HOWTO.txt
 create mode 100644 Documentation/PCI/msi-howto.rst

(limited to 'Documentation')

diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
deleted file mode 100644
index 618e13d5e276..000000000000
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ /dev/null
@@ -1,270 +0,0 @@
-		The MSI Driver Guide HOWTO
-	Tom L Nguyen tom.l.nguyen@intel.com
-			10/03/2003
-	Revised Feb 12, 2004 by Martine Silbermann
-		email: Martine.Silbermann@hp.com
-	Revised Jun 25, 2004 by Tom L Nguyen
-	Revised Jul  9, 2008 by Matthew Wilcox <willy@linux.intel.com>
-		Copyright 2003, 2008 Intel Corporation
-
-1. About this guide
-
-This guide describes the basics of Message Signaled Interrupts (MSIs),
-the advantages of using MSI over traditional interrupt mechanisms, how
-to change your driver to use MSI or MSI-X and some basic diagnostics to
-try if a device doesn't support MSIs.
-
-
-2. What are MSIs?
-
-A Message Signaled Interrupt is a write from the device to a special
-address which causes an interrupt to be received by the CPU.
-
-The MSI capability was first specified in PCI 2.2 and was later enhanced
-in PCI 3.0 to allow each interrupt to be masked individually.  The MSI-X
-capability was also introduced with PCI 3.0.  It supports more interrupts
-per device than MSI and allows interrupts to be independently configured.
-
-Devices may support both MSI and MSI-X, but only one can be enabled at
-a time.
-
-
-3. Why use MSIs?
-
-There are three reasons why using MSIs can give an advantage over
-traditional pin-based interrupts.
-
-Pin-based PCI interrupts are often shared amongst several devices.
-To support this, the kernel must call each interrupt handler associated
-with an interrupt, which leads to reduced performance for the system as
-a whole.  MSIs are never shared, so this problem cannot arise.
-
-When a device writes data to memory, then raises a pin-based interrupt,
-it is possible that the interrupt may arrive before all the data has
-arrived in memory (this becomes more likely with devices behind PCI-PCI
-bridges).  In order to ensure that all the data has arrived in memory,
-the interrupt handler must read a register on the device which raised
-the interrupt.  PCI transaction ordering rules require that all the data
-arrive in memory before the value may be returned from the register.
-Using MSIs avoids this problem as the interrupt-generating write cannot
-pass the data writes, so by the time the interrupt is raised, the driver
-knows that all the data has arrived in memory.
-
-PCI devices can only support a single pin-based interrupt per function.
-Often drivers have to query the device to find out what event has
-occurred, slowing down interrupt handling for the common case.  With
-MSIs, a device can support more interrupts, allowing each interrupt
-to be specialised to a different purpose.  One possible design gives
-infrequent conditions (such as errors) their own interrupt which allows
-the driver to handle the normal interrupt handling path more efficiently.
-Other possible designs include giving one interrupt to each packet queue
-in a network card or each port in a storage controller.
-
-
-4. How to use MSIs
-
-PCI devices are initialised to use pin-based interrupts.  The device
-driver has to set up the device to use MSI or MSI-X.  Not all machines
-support MSIs correctly, and for those machines, the APIs described below
-will simply fail and the device will continue to use pin-based interrupts.
-
-4.1 Include kernel support for MSIs
-
-To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI
-option enabled.  This option is only available on some architectures,
-and it may depend on some other options also being set.  For example,
-on x86, you must also enable X86_UP_APIC or SMP in order to see the
-CONFIG_PCI_MSI option.
-
-4.2 Using MSI
-
-Most of the hard work is done for the driver in the PCI layer.  The driver
-simply has to request that the PCI layer set up the MSI capability for this
-device.
-
-To automatically use MSI or MSI-X interrupt vectors, use the following
-function:
-
-  int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
-		unsigned int max_vecs, unsigned int flags);
-
-which allocates up to max_vecs interrupt vectors for a PCI device.  It
-returns the number of vectors allocated or a negative error.  If the device
-has a requirements for a minimum number of vectors the driver can pass a
-min_vecs argument set to this limit, and the PCI core will return -ENOSPC
-if it can't meet the minimum number of vectors.
-
-The flags argument is used to specify which type of interrupt can be used
-by the device and the driver (PCI_IRQ_LEGACY, PCI_IRQ_MSI, PCI_IRQ_MSIX).
-A convenient short-hand (PCI_IRQ_ALL_TYPES) is also available to ask for
-any possible kind of interrupt.  If the PCI_IRQ_AFFINITY flag is set,
-pci_alloc_irq_vectors() will spread the interrupts around the available CPUs.
-
-To get the Linux IRQ numbers passed to request_irq() and free_irq() and the
-vectors, use the following function:
-
-  int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
-
-Any allocated resources should be freed before removing the device using
-the following function:
-
-  void pci_free_irq_vectors(struct pci_dev *dev);
-
-If a device supports both MSI-X and MSI capabilities, this API will use the
-MSI-X facilities in preference to the MSI facilities.  MSI-X supports any
-number of interrupts between 1 and 2048.  In contrast, MSI is restricted to
-a maximum of 32 interrupts (and must be a power of two).  In addition, the
-MSI interrupt vectors must be allocated consecutively, so the system might
-not be able to allocate as many vectors for MSI as it could for MSI-X.  On
-some platforms, MSI interrupts must all be targeted at the same set of CPUs
-whereas MSI-X interrupts can all be targeted at different CPUs.
-
-If a device supports neither MSI-X or MSI it will fall back to a single
-legacy IRQ vector.
-
-The typical usage of MSI or MSI-X interrupts is to allocate as many vectors
-as possible, likely up to the limit supported by the device.  If nvec is
-larger than the number supported by the device it will automatically be
-capped to the supported limit, so there is no need to query the number of
-vectors supported beforehand:
-
-	nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_ALL_TYPES)
-	if (nvec < 0)
-		goto out_err;
-
-If a driver is unable or unwilling to deal with a variable number of MSI
-interrupts it can request a particular number of interrupts by passing that
-number to pci_alloc_irq_vectors() function as both 'min_vecs' and
-'max_vecs' parameters:
-
-	ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_ALL_TYPES);
-	if (ret < 0)
-		goto out_err;
-
-The most notorious example of the request type described above is enabling
-the single MSI mode for a device.  It could be done by passing two 1s as
-'min_vecs' and 'max_vecs':
-
-	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
-	if (ret < 0)
-		goto out_err;
-
-Some devices might not support using legacy line interrupts, in which case
-the driver can specify that only MSI or MSI-X is acceptable:
-
-	nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_MSI | PCI_IRQ_MSIX);
-	if (nvec < 0)
-		goto out_err;
-
-4.3 Legacy APIs
-
-The following old APIs to enable and disable MSI or MSI-X interrupts should
-not be used in new code:
-
-  pci_enable_msi()		/* deprecated */
-  pci_disable_msi()		/* deprecated */
-  pci_enable_msix_range()	/* deprecated */
-  pci_enable_msix_exact()	/* deprecated */
-  pci_disable_msix()		/* deprecated */
-
-Additionally there are APIs to provide the number of supported MSI or MSI-X
-vectors: pci_msi_vec_count() and pci_msix_vec_count().  In general these
-should be avoided in favor of letting pci_alloc_irq_vectors() cap the
-number of vectors.  If you have a legitimate special use case for the count
-of vectors we might have to revisit that decision and add a
-pci_nr_irq_vectors() helper that handles MSI and MSI-X transparently.
-
-4.4 Considerations when using MSIs
-
-4.4.1 Spinlocks
-
-Most device drivers have a per-device spinlock which is taken in the
-interrupt handler.  With pin-based interrupts or a single MSI, it is not
-necessary to disable interrupts (Linux guarantees the same interrupt will
-not be re-entered).  If a device uses multiple interrupts, the driver
-must disable interrupts while the lock is held.  If the device sends
-a different interrupt, the driver will deadlock trying to recursively
-acquire the spinlock.  Such deadlocks can be avoided by using
-spin_lock_irqsave() or spin_lock_irq() which disable local interrupts
-and acquire the lock (see Documentation/kernel-hacking/locking.rst).
-
-4.5 How to tell whether MSI/MSI-X is enabled on a device
-
-Using 'lspci -v' (as root) may show some devices with "MSI", "Message
-Signalled Interrupts" or "MSI-X" capabilities.  Each of these capabilities
-has an 'Enable' flag which is followed with either "+" (enabled)
-or "-" (disabled).
-
-
-5. MSI quirks
-
-Several PCI chipsets or devices are known not to support MSIs.
-The PCI stack provides three ways to disable MSIs:
-
-1. globally
-2. on all devices behind a specific bridge
-3. on a single device
-
-5.1. Disabling MSIs globally
-
-Some host chipsets simply don't support MSIs properly.  If we're
-lucky, the manufacturer knows this and has indicated it in the ACPI
-FADT table.  In this case, Linux automatically disables MSIs.
-Some boards don't include this information in the table and so we have
-to detect them ourselves.  The complete list of these is found near the
-quirk_disable_all_msi() function in drivers/pci/quirks.c.
-
-If you have a board which has problems with MSIs, you can pass pci=nomsi
-on the kernel command line to disable MSIs on all devices.  It would be
-in your best interests to report the problem to linux-pci@vger.kernel.org
-including a full 'lspci -v' so we can add the quirks to the kernel.
-
-5.2. Disabling MSIs below a bridge
-
-Some PCI bridges are not able to route MSIs between busses properly.
-In this case, MSIs must be disabled on all devices behind the bridge.
-
-Some bridges allow you to enable MSIs by changing some bits in their
-PCI configuration space (especially the Hypertransport chipsets such
-as the nVidia nForce and Serverworks HT2000).  As with host chipsets,
-Linux mostly knows about them and automatically enables MSIs if it can.
-If you have a bridge unknown to Linux, you can enable
-MSIs in configuration space using whatever method you know works, then
-enable MSIs on that bridge by doing:
-
-       echo 1 > /sys/bus/pci/devices/$bridge/msi_bus
-
-where $bridge is the PCI address of the bridge you've enabled (eg
-0000:00:0e.0).
-
-To disable MSIs, echo 0 instead of 1.  Changing this value should be
-done with caution as it could break interrupt handling for all devices
-below this bridge.
-
-Again, please notify linux-pci@vger.kernel.org of any bridges that need
-special handling.
-
-5.3. Disabling MSIs on a single device
-
-Some devices are known to have faulty MSI implementations.  Usually this
-is handled in the individual device driver, but occasionally it's necessary
-to handle this with a quirk.  Some drivers have an option to disable use
-of MSI.  While this is a convenient workaround for the driver author,
-it is not good practice, and should not be emulated.
-
-5.4. Finding why MSIs are disabled on a device
-
-From the above three sections, you can see that there are many reasons
-why MSIs may not be enabled for a given device.  Your first step should
-be to examine your dmesg carefully to determine whether MSIs are enabled
-for your machine.  You should also check your .config to be sure you
-have enabled CONFIG_PCI_MSI.
-
-Then, 'lspci -t' gives the list of bridges above a device.  Reading
-/sys/bus/pci/devices/*/msi_bus will tell you whether MSIs are enabled (1)
-or disabled (0).  If 0 is found in any of the msi_bus files belonging
-to bridges between the PCI root and the device, MSIs are disabled.
-
-It is also worth checking the device driver to see whether it supports MSIs.
-For example, it may contain calls to pci_irq_alloc_vectors() with the
-PCI_IRQ_MSI or PCI_IRQ_MSIX flags.
diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst
index 0d9390298c4a..458354daac47 100644
--- a/Documentation/PCI/index.rst
+++ b/Documentation/PCI/index.rst
@@ -11,3 +11,4 @@ Linux PCI Bus Subsystem
    pci
    picebus-howto
    pci-iov-howto
+   msi-howto
diff --git a/Documentation/PCI/msi-howto.rst b/Documentation/PCI/msi-howto.rst
new file mode 100644
index 000000000000..994cbb660ade
--- /dev/null
+++ b/Documentation/PCI/msi-howto.rst
@@ -0,0 +1,287 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==========================
+The MSI Driver Guide HOWTO
+==========================
+
+:Authors: Tom L Nguyen; Martine Silbermann; Matthew Wilcox
+
+:Copyright: 2003, 2008 Intel Corporation
+
+About this guide
+================
+
+This guide describes the basics of Message Signaled Interrupts (MSIs),
+the advantages of using MSI over traditional interrupt mechanisms, how
+to change your driver to use MSI or MSI-X and some basic diagnostics to
+try if a device doesn't support MSIs.
+
+
+What are MSIs?
+==============
+
+A Message Signaled Interrupt is a write from the device to a special
+address which causes an interrupt to be received by the CPU.
+
+The MSI capability was first specified in PCI 2.2 and was later enhanced
+in PCI 3.0 to allow each interrupt to be masked individually.  The MSI-X
+capability was also introduced with PCI 3.0.  It supports more interrupts
+per device than MSI and allows interrupts to be independently configured.
+
+Devices may support both MSI and MSI-X, but only one can be enabled at
+a time.
+
+
+Why use MSIs?
+=============
+
+There are three reasons why using MSIs can give an advantage over
+traditional pin-based interrupts.
+
+Pin-based PCI interrupts are often shared amongst several devices.
+To support this, the kernel must call each interrupt handler associated
+with an interrupt, which leads to reduced performance for the system as
+a whole.  MSIs are never shared, so this problem cannot arise.
+
+When a device writes data to memory, then raises a pin-based interrupt,
+it is possible that the interrupt may arrive before all the data has
+arrived in memory (this becomes more likely with devices behind PCI-PCI
+bridges).  In order to ensure that all the data has arrived in memory,
+the interrupt handler must read a register on the device which raised
+the interrupt.  PCI transaction ordering rules require that all the data
+arrive in memory before the value may be returned from the register.
+Using MSIs avoids this problem as the interrupt-generating write cannot
+pass the data writes, so by the time the interrupt is raised, the driver
+knows that all the data has arrived in memory.
+
+PCI devices can only support a single pin-based interrupt per function.
+Often drivers have to query the device to find out what event has
+occurred, slowing down interrupt handling for the common case.  With
+MSIs, a device can support more interrupts, allowing each interrupt
+to be specialised to a different purpose.  One possible design gives
+infrequent conditions (such as errors) their own interrupt which allows
+the driver to handle the normal interrupt handling path more efficiently.
+Other possible designs include giving one interrupt to each packet queue
+in a network card or each port in a storage controller.
+
+
+How to use MSIs
+===============
+
+PCI devices are initialised to use pin-based interrupts.  The device
+driver has to set up the device to use MSI or MSI-X.  Not all machines
+support MSIs correctly, and for those machines, the APIs described below
+will simply fail and the device will continue to use pin-based interrupts.
+
+Include kernel support for MSIs
+-------------------------------
+
+To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI
+option enabled.  This option is only available on some architectures,
+and it may depend on some other options also being set.  For example,
+on x86, you must also enable X86_UP_APIC or SMP in order to see the
+CONFIG_PCI_MSI option.
+
+Using MSI
+---------
+
+Most of the hard work is done for the driver in the PCI layer.  The driver
+simply has to request that the PCI layer set up the MSI capability for this
+device.
+
+To automatically use MSI or MSI-X interrupt vectors, use the following
+function::
+
+  int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
+		unsigned int max_vecs, unsigned int flags);
+
+which allocates up to max_vecs interrupt vectors for a PCI device.  It
+returns the number of vectors allocated or a negative error.  If the device
+has a requirements for a minimum number of vectors the driver can pass a
+min_vecs argument set to this limit, and the PCI core will return -ENOSPC
+if it can't meet the minimum number of vectors.
+
+The flags argument is used to specify which type of interrupt can be used
+by the device and the driver (PCI_IRQ_LEGACY, PCI_IRQ_MSI, PCI_IRQ_MSIX).
+A convenient short-hand (PCI_IRQ_ALL_TYPES) is also available to ask for
+any possible kind of interrupt.  If the PCI_IRQ_AFFINITY flag is set,
+pci_alloc_irq_vectors() will spread the interrupts around the available CPUs.
+
+To get the Linux IRQ numbers passed to request_irq() and free_irq() and the
+vectors, use the following function::
+
+  int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
+
+Any allocated resources should be freed before removing the device using
+the following function::
+
+  void pci_free_irq_vectors(struct pci_dev *dev);
+
+If a device supports both MSI-X and MSI capabilities, this API will use the
+MSI-X facilities in preference to the MSI facilities.  MSI-X supports any
+number of interrupts between 1 and 2048.  In contrast, MSI is restricted to
+a maximum of 32 interrupts (and must be a power of two).  In addition, the
+MSI interrupt vectors must be allocated consecutively, so the system might
+not be able to allocate as many vectors for MSI as it could for MSI-X.  On
+some platforms, MSI interrupts must all be targeted at the same set of CPUs
+whereas MSI-X interrupts can all be targeted at different CPUs.
+
+If a device supports neither MSI-X or MSI it will fall back to a single
+legacy IRQ vector.
+
+The typical usage of MSI or MSI-X interrupts is to allocate as many vectors
+as possible, likely up to the limit supported by the device.  If nvec is
+larger than the number supported by the device it will automatically be
+capped to the supported limit, so there is no need to query the number of
+vectors supported beforehand::
+
+	nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_ALL_TYPES)
+	if (nvec < 0)
+		goto out_err;
+
+If a driver is unable or unwilling to deal with a variable number of MSI
+interrupts it can request a particular number of interrupts by passing that
+number to pci_alloc_irq_vectors() function as both 'min_vecs' and
+'max_vecs' parameters::
+
+	ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_ALL_TYPES);
+	if (ret < 0)
+		goto out_err;
+
+The most notorious example of the request type described above is enabling
+the single MSI mode for a device.  It could be done by passing two 1s as
+'min_vecs' and 'max_vecs'::
+
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+	if (ret < 0)
+		goto out_err;
+
+Some devices might not support using legacy line interrupts, in which case
+the driver can specify that only MSI or MSI-X is acceptable::
+
+	nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+	if (nvec < 0)
+		goto out_err;
+
+Legacy APIs
+-----------
+
+The following old APIs to enable and disable MSI or MSI-X interrupts should
+not be used in new code::
+
+  pci_enable_msi()		/* deprecated */
+  pci_disable_msi()		/* deprecated */
+  pci_enable_msix_range()	/* deprecated */
+  pci_enable_msix_exact()	/* deprecated */
+  pci_disable_msix()		/* deprecated */
+
+Additionally there are APIs to provide the number of supported MSI or MSI-X
+vectors: pci_msi_vec_count() and pci_msix_vec_count().  In general these
+should be avoided in favor of letting pci_alloc_irq_vectors() cap the
+number of vectors.  If you have a legitimate special use case for the count
+of vectors we might have to revisit that decision and add a
+pci_nr_irq_vectors() helper that handles MSI and MSI-X transparently.
+
+Considerations when using MSIs
+------------------------------
+
+Spinlocks
+~~~~~~~~~
+
+Most device drivers have a per-device spinlock which is taken in the
+interrupt handler.  With pin-based interrupts or a single MSI, it is not
+necessary to disable interrupts (Linux guarantees the same interrupt will
+not be re-entered).  If a device uses multiple interrupts, the driver
+must disable interrupts while the lock is held.  If the device sends
+a different interrupt, the driver will deadlock trying to recursively
+acquire the spinlock.  Such deadlocks can be avoided by using
+spin_lock_irqsave() or spin_lock_irq() which disable local interrupts
+and acquire the lock (see Documentation/kernel-hacking/locking.rst).
+
+How to tell whether MSI/MSI-X is enabled on a device
+----------------------------------------------------
+
+Using 'lspci -v' (as root) may show some devices with "MSI", "Message
+Signalled Interrupts" or "MSI-X" capabilities.  Each of these capabilities
+has an 'Enable' flag which is followed with either "+" (enabled)
+or "-" (disabled).
+
+
+MSI quirks
+==========
+
+Several PCI chipsets or devices are known not to support MSIs.
+The PCI stack provides three ways to disable MSIs:
+
+1. globally
+2. on all devices behind a specific bridge
+3. on a single device
+
+Disabling MSIs globally
+-----------------------
+
+Some host chipsets simply don't support MSIs properly.  If we're
+lucky, the manufacturer knows this and has indicated it in the ACPI
+FADT table.  In this case, Linux automatically disables MSIs.
+Some boards don't include this information in the table and so we have
+to detect them ourselves.  The complete list of these is found near the
+quirk_disable_all_msi() function in drivers/pci/quirks.c.
+
+If you have a board which has problems with MSIs, you can pass pci=nomsi
+on the kernel command line to disable MSIs on all devices.  It would be
+in your best interests to report the problem to linux-pci@vger.kernel.org
+including a full 'lspci -v' so we can add the quirks to the kernel.
+
+Disabling MSIs below a bridge
+-----------------------------
+
+Some PCI bridges are not able to route MSIs between busses properly.
+In this case, MSIs must be disabled on all devices behind the bridge.
+
+Some bridges allow you to enable MSIs by changing some bits in their
+PCI configuration space (especially the Hypertransport chipsets such
+as the nVidia nForce and Serverworks HT2000).  As with host chipsets,
+Linux mostly knows about them and automatically enables MSIs if it can.
+If you have a bridge unknown to Linux, you can enable
+MSIs in configuration space using whatever method you know works, then
+enable MSIs on that bridge by doing::
+
+       echo 1 > /sys/bus/pci/devices/$bridge/msi_bus
+
+where $bridge is the PCI address of the bridge you've enabled (eg
+0000:00:0e.0).
+
+To disable MSIs, echo 0 instead of 1.  Changing this value should be
+done with caution as it could break interrupt handling for all devices
+below this bridge.
+
+Again, please notify linux-pci@vger.kernel.org of any bridges that need
+special handling.
+
+Disabling MSIs on a single device
+---------------------------------
+
+Some devices are known to have faulty MSI implementations.  Usually this
+is handled in the individual device driver, but occasionally it's necessary
+to handle this with a quirk.  Some drivers have an option to disable use
+of MSI.  While this is a convenient workaround for the driver author,
+it is not good practice, and should not be emulated.
+
+Finding why MSIs are disabled on a device
+-----------------------------------------
+
+From the above three sections, you can see that there are many reasons
+why MSIs may not be enabled for a given device.  Your first step should
+be to examine your dmesg carefully to determine whether MSIs are enabled
+for your machine.  You should also check your .config to be sure you
+have enabled CONFIG_PCI_MSI.
+
+Then, 'lspci -t' gives the list of bridges above a device. Reading
+`/sys/bus/pci/devices/*/msi_bus` will tell you whether MSIs are enabled (1)
+or disabled (0).  If 0 is found in any of the msi_bus files belonging
+to bridges between the PCI root and the device, MSIs are disabled.
+
+It is also worth checking the device driver to see whether it supports MSIs.
+For example, it may contain calls to pci_irq_alloc_vectors() with the
+PCI_IRQ_MSI or PCI_IRQ_MSIX flags.
-- 
cgit 


From b66357f32fb9a68bb9f2126a894d3b9bbc4e821c Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 14 May 2019 22:47:28 +0800
Subject: Documentation: PCI: convert acpi-info.txt to reST

Convert plain text documentation to reStructuredText format and add it to
Sphinx TOC tree.  No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/PCI/acpi-info.rst | 192 ++++++++++++++++++++++++++++++++++++++++
 Documentation/PCI/acpi-info.txt | 187 --------------------------------------
 Documentation/PCI/index.rst     |   1 +
 3 files changed, 193 insertions(+), 187 deletions(-)
 create mode 100644 Documentation/PCI/acpi-info.rst
 delete mode 100644 Documentation/PCI/acpi-info.txt

(limited to 'Documentation')

diff --git a/Documentation/PCI/acpi-info.rst b/Documentation/PCI/acpi-info.rst
new file mode 100644
index 000000000000..060217081c79
--- /dev/null
+++ b/Documentation/PCI/acpi-info.rst
@@ -0,0 +1,192 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================================
+ACPI considerations for PCI host bridges
+========================================
+
+The general rule is that the ACPI namespace should describe everything the
+OS might use unless there's another way for the OS to find it [1, 2].
+
+For example, there's no standard hardware mechanism for enumerating PCI
+host bridges, so the ACPI namespace must describe each host bridge, the
+method for accessing PCI config space below it, the address space windows
+the host bridge forwards to PCI (using _CRS), and the routing of legacy
+INTx interrupts (using _PRT).
+
+PCI devices, which are below the host bridge, generally do not need to be
+described via ACPI.  The OS can discover them via the standard PCI
+enumeration mechanism, using config accesses to discover and identify
+devices and read and size their BARs.  However, ACPI may describe PCI
+devices if it provides power management or hotplug functionality for them
+or if the device has INTx interrupts connected by platform interrupt
+controllers and a _PRT is needed to describe those connections.
+
+ACPI resource description is done via _CRS objects of devices in the ACPI
+namespace [2].   The _CRS is like a generalized PCI BAR: the OS can read
+_CRS and figure out what resource is being consumed even if it doesn't have
+a driver for the device [3].  That's important because it means an old OS
+can work correctly even on a system with new devices unknown to the OS.
+The new devices might not do anything, but the OS can at least make sure no
+resources conflict with them.
+
+Static tables like MCFG, HPET, ECDT, etc., are *not* mechanisms for
+reserving address space.  The static tables are for things the OS needs to
+know early in boot, before it can parse the ACPI namespace.  If a new table
+is defined, an old OS needs to operate correctly even though it ignores the
+table.  _CRS allows that because it is generic and understood by the old
+OS; a static table does not.
+
+If the OS is expected to manage a non-discoverable device described via
+ACPI, that device will have a specific _HID/_CID that tells the OS what
+driver to bind to it, and the _CRS tells the OS and the driver where the
+device's registers are.
+
+PCI host bridges are PNP0A03 or PNP0A08 devices.  Their _CRS should
+describe all the address space they consume.  This includes all the windows
+they forward down to the PCI bus, as well as registers of the host bridge
+itself that are not forwarded to PCI.  The host bridge registers include
+things like secondary/subordinate bus registers that determine the bus
+range below the bridge, window registers that describe the apertures, etc.
+These are all device-specific, non-architected things, so the only way a
+PNP0A03/PNP0A08 driver can manage them is via _PRS/_CRS/_SRS, which contain
+the device-specific details.  The host bridge registers also include ECAM
+space, since it is consumed by the host bridge.
+
+ACPI defines a Consumer/Producer bit to distinguish the bridge registers
+("Consumer") from the bridge apertures ("Producer") [4, 5], but early
+BIOSes didn't use that bit correctly.  The result is that the current ACPI
+spec defines Consumer/Producer only for the Extended Address Space
+descriptors; the bit should be ignored in the older QWord/DWord/Word
+Address Space descriptors.  Consequently, OSes have to assume all
+QWord/DWord/Word descriptors are windows.
+
+Prior to the addition of Extended Address Space descriptors, the failure of
+Consumer/Producer meant there was no way to describe bridge registers in
+the PNP0A03/PNP0A08 device itself.  The workaround was to describe the
+bridge registers (including ECAM space) in PNP0C02 catch-all devices [6].
+With the exception of ECAM, the bridge register space is device-specific
+anyway, so the generic PNP0A03/PNP0A08 driver (pci_root.c) has no need to
+know about it.  
+
+New architectures should be able to use "Consumer" Extended Address Space
+descriptors in the PNP0A03 device for bridge registers, including ECAM,
+although a strict interpretation of [6] might prohibit this.  Old x86 and
+ia64 kernels assume all address space descriptors, including "Consumer"
+Extended Address Space ones, are windows, so it would not be safe to
+describe bridge registers this way on those architectures.
+
+PNP0C02 "motherboard" devices are basically a catch-all.  There's no
+programming model for them other than "don't use these resources for
+anything else."  So a PNP0C02 _CRS should claim any address space that is
+(1) not claimed by _CRS under any other device object in the ACPI namespace
+and (2) should not be assigned by the OS to something else.
+
+The PCIe spec requires the Enhanced Configuration Access Method (ECAM)
+unless there's a standard firmware interface for config access, e.g., the
+ia64 SAL interface [7].  A host bridge consumes ECAM memory address space
+and converts memory accesses into PCI configuration accesses.  The spec
+defines the ECAM address space layout and functionality; only the base of
+the address space is device-specific.  An ACPI OS learns the base address
+from either the static MCFG table or a _CBA method in the PNP0A03 device.
+
+The MCFG table must describe the ECAM space of non-hot pluggable host
+bridges [8].  Since MCFG is a static table and can't be updated by hotplug,
+a _CBA method in the PNP0A03 device describes the ECAM space of a
+hot-pluggable host bridge [9].  Note that for both MCFG and _CBA, the base
+address always corresponds to bus 0, even if the bus range below the bridge
+(which is reported via _CRS) doesn't start at 0.
+
+
+[1] ACPI 6.2, sec 6.1:
+    For any device that is on a non-enumerable type of bus (for example, an
+    ISA bus), OSPM enumerates the devices' identifier(s) and the ACPI
+    system firmware must supply an _HID object ... for each device to
+    enable OSPM to do that.
+
+[2] ACPI 6.2, sec 3.7:
+    The OS enumerates motherboard devices simply by reading through the
+    ACPI Namespace looking for devices with hardware IDs.
+
+    Each device enumerated by ACPI includes ACPI-defined objects in the
+    ACPI Namespace that report the hardware resources the device could
+    occupy [_PRS], an object that reports the resources that are currently
+    used by the device [_CRS], and objects for configuring those resources
+    [_SRS].  The information is used by the Plug and Play OS (OSPM) to
+    configure the devices.
+
+[3] ACPI 6.2, sec 6.2:
+    OSPM uses device configuration objects to configure hardware resources
+    for devices enumerated via ACPI.  Device configuration objects provide
+    information about current and possible resource requirements, the
+    relationship between shared resources, and methods for configuring
+    hardware resources.
+
+    When OSPM enumerates a device, it calls _PRS to determine the resource
+    requirements of the device.  It may also call _CRS to find the current
+    resource settings for the device.  Using this information, the Plug and
+    Play system determines what resources the device should consume and
+    sets those resources by calling the device’s _SRS control method.
+
+    In ACPI, devices can consume resources (for example, legacy keyboards),
+    provide resources (for example, a proprietary PCI bridge), or do both.
+    Unless otherwise specified, resources for a device are assumed to be
+    taken from the nearest matching resource above the device in the device
+    hierarchy.
+
+[4] ACPI 6.2, sec 6.4.3.5.1, 2, 3, 4:
+    QWord/DWord/Word Address Space Descriptor (.1, .2, .3)
+      General Flags: Bit [0] Ignored
+
+    Extended Address Space Descriptor (.4)
+      General Flags: Bit [0] Consumer/Producer:
+
+        * 1 – This device consumes this resource
+        * 0 – This device produces and consumes this resource
+
+[5] ACPI 6.2, sec 19.6.43:
+    ResourceUsage specifies whether the Memory range is consumed by
+    this device (ResourceConsumer) or passed on to child devices
+    (ResourceProducer).  If nothing is specified, then
+    ResourceConsumer is assumed.
+
+[6] PCI Firmware 3.2, sec 4.1.2:
+    If the operating system does not natively comprehend reserving the
+    MMCFG region, the MMCFG region must be reserved by firmware.  The
+    address range reported in the MCFG table or by _CBA method (see Section
+    4.1.3) must be reserved by declaring a motherboard resource.  For most
+    systems, the motherboard resource would appear at the root of the ACPI
+    namespace (under \_SB) in a node with a _HID of EISAID (PNP0C02), and
+    the resources in this case should not be claimed in the root PCI bus’s
+    _CRS.  The resources can optionally be returned in Int15 E820 or
+    EFIGetMemoryMap as reserved memory but must always be reported through
+    ACPI as a motherboard resource.
+
+[7] PCI Express 4.0, sec 7.2.2:
+    For systems that are PC-compatible, or that do not implement a
+    processor-architecture-specific firmware interface standard that allows
+    access to the Configuration Space, the ECAM is required as defined in
+    this section.
+
+[8] PCI Firmware 3.2, sec 4.1.2:
+    The MCFG table is an ACPI table that is used to communicate the base
+    addresses corresponding to the non-hot removable PCI Segment Groups
+    range within a PCI Segment Group available to the operating system at
+    boot. This is required for the PC-compatible systems.
+
+    The MCFG table is only used to communicate the base addresses
+    corresponding to the PCI Segment Groups available to the system at
+    boot.
+
+[9] PCI Firmware 3.2, sec 4.1.3:
+    The _CBA (Memory mapped Configuration Base Address) control method is
+    an optional ACPI object that returns the 64-bit memory mapped
+    configuration base address for the hot plug capable host bridge. The
+    base address returned by _CBA is processor-relative address. The _CBA
+    control method evaluates to an Integer.
+
+    This control method appears under a host bridge object. When the _CBA
+    method appears under an active host bridge object, the operating system
+    evaluates this structure to identify the memory mapped configuration
+    base address corresponding to the PCI Segment Group for the bus number
+    range specified in _CRS method. An ACPI name space object that contains
+    the _CBA method must also contain a corresponding _SEG method.
diff --git a/Documentation/PCI/acpi-info.txt b/Documentation/PCI/acpi-info.txt
deleted file mode 100644
index 3ffa3b03970e..000000000000
--- a/Documentation/PCI/acpi-info.txt
+++ /dev/null
@@ -1,187 +0,0 @@
-		ACPI considerations for PCI host bridges
-
-The general rule is that the ACPI namespace should describe everything the
-OS might use unless there's another way for the OS to find it [1, 2].
-
-For example, there's no standard hardware mechanism for enumerating PCI
-host bridges, so the ACPI namespace must describe each host bridge, the
-method for accessing PCI config space below it, the address space windows
-the host bridge forwards to PCI (using _CRS), and the routing of legacy
-INTx interrupts (using _PRT).
-
-PCI devices, which are below the host bridge, generally do not need to be
-described via ACPI.  The OS can discover them via the standard PCI
-enumeration mechanism, using config accesses to discover and identify
-devices and read and size their BARs.  However, ACPI may describe PCI
-devices if it provides power management or hotplug functionality for them
-or if the device has INTx interrupts connected by platform interrupt
-controllers and a _PRT is needed to describe those connections.
-
-ACPI resource description is done via _CRS objects of devices in the ACPI
-namespace [2].   The _CRS is like a generalized PCI BAR: the OS can read
-_CRS and figure out what resource is being consumed even if it doesn't have
-a driver for the device [3].  That's important because it means an old OS
-can work correctly even on a system with new devices unknown to the OS.
-The new devices might not do anything, but the OS can at least make sure no
-resources conflict with them.
-
-Static tables like MCFG, HPET, ECDT, etc., are *not* mechanisms for
-reserving address space.  The static tables are for things the OS needs to
-know early in boot, before it can parse the ACPI namespace.  If a new table
-is defined, an old OS needs to operate correctly even though it ignores the
-table.  _CRS allows that because it is generic and understood by the old
-OS; a static table does not.
-
-If the OS is expected to manage a non-discoverable device described via
-ACPI, that device will have a specific _HID/_CID that tells the OS what
-driver to bind to it, and the _CRS tells the OS and the driver where the
-device's registers are.
-
-PCI host bridges are PNP0A03 or PNP0A08 devices.  Their _CRS should
-describe all the address space they consume.  This includes all the windows
-they forward down to the PCI bus, as well as registers of the host bridge
-itself that are not forwarded to PCI.  The host bridge registers include
-things like secondary/subordinate bus registers that determine the bus
-range below the bridge, window registers that describe the apertures, etc.
-These are all device-specific, non-architected things, so the only way a
-PNP0A03/PNP0A08 driver can manage them is via _PRS/_CRS/_SRS, which contain
-the device-specific details.  The host bridge registers also include ECAM
-space, since it is consumed by the host bridge.
-
-ACPI defines a Consumer/Producer bit to distinguish the bridge registers
-("Consumer") from the bridge apertures ("Producer") [4, 5], but early
-BIOSes didn't use that bit correctly.  The result is that the current ACPI
-spec defines Consumer/Producer only for the Extended Address Space
-descriptors; the bit should be ignored in the older QWord/DWord/Word
-Address Space descriptors.  Consequently, OSes have to assume all
-QWord/DWord/Word descriptors are windows.
-
-Prior to the addition of Extended Address Space descriptors, the failure of
-Consumer/Producer meant there was no way to describe bridge registers in
-the PNP0A03/PNP0A08 device itself.  The workaround was to describe the
-bridge registers (including ECAM space) in PNP0C02 catch-all devices [6].
-With the exception of ECAM, the bridge register space is device-specific
-anyway, so the generic PNP0A03/PNP0A08 driver (pci_root.c) has no need to
-know about it.  
-
-New architectures should be able to use "Consumer" Extended Address Space
-descriptors in the PNP0A03 device for bridge registers, including ECAM,
-although a strict interpretation of [6] might prohibit this.  Old x86 and
-ia64 kernels assume all address space descriptors, including "Consumer"
-Extended Address Space ones, are windows, so it would not be safe to
-describe bridge registers this way on those architectures.
-
-PNP0C02 "motherboard" devices are basically a catch-all.  There's no
-programming model for them other than "don't use these resources for
-anything else."  So a PNP0C02 _CRS should claim any address space that is
-(1) not claimed by _CRS under any other device object in the ACPI namespace
-and (2) should not be assigned by the OS to something else.
-
-The PCIe spec requires the Enhanced Configuration Access Method (ECAM)
-unless there's a standard firmware interface for config access, e.g., the
-ia64 SAL interface [7].  A host bridge consumes ECAM memory address space
-and converts memory accesses into PCI configuration accesses.  The spec
-defines the ECAM address space layout and functionality; only the base of
-the address space is device-specific.  An ACPI OS learns the base address
-from either the static MCFG table or a _CBA method in the PNP0A03 device.
-
-The MCFG table must describe the ECAM space of non-hot pluggable host
-bridges [8].  Since MCFG is a static table and can't be updated by hotplug,
-a _CBA method in the PNP0A03 device describes the ECAM space of a
-hot-pluggable host bridge [9].  Note that for both MCFG and _CBA, the base
-address always corresponds to bus 0, even if the bus range below the bridge
-(which is reported via _CRS) doesn't start at 0.
-
-
-[1] ACPI 6.2, sec 6.1:
-    For any device that is on a non-enumerable type of bus (for example, an
-    ISA bus), OSPM enumerates the devices' identifier(s) and the ACPI
-    system firmware must supply an _HID object ... for each device to
-    enable OSPM to do that.
-
-[2] ACPI 6.2, sec 3.7:
-    The OS enumerates motherboard devices simply by reading through the
-    ACPI Namespace looking for devices with hardware IDs.
-
-    Each device enumerated by ACPI includes ACPI-defined objects in the
-    ACPI Namespace that report the hardware resources the device could
-    occupy [_PRS], an object that reports the resources that are currently
-    used by the device [_CRS], and objects for configuring those resources
-    [_SRS].  The information is used by the Plug and Play OS (OSPM) to
-    configure the devices.
-
-[3] ACPI 6.2, sec 6.2:
-    OSPM uses device configuration objects to configure hardware resources
-    for devices enumerated via ACPI.  Device configuration objects provide
-    information about current and possible resource requirements, the
-    relationship between shared resources, and methods for configuring
-    hardware resources.
-
-    When OSPM enumerates a device, it calls _PRS to determine the resource
-    requirements of the device.  It may also call _CRS to find the current
-    resource settings for the device.  Using this information, the Plug and
-    Play system determines what resources the device should consume and
-    sets those resources by calling the device’s _SRS control method.
-
-    In ACPI, devices can consume resources (for example, legacy keyboards),
-    provide resources (for example, a proprietary PCI bridge), or do both.
-    Unless otherwise specified, resources for a device are assumed to be
-    taken from the nearest matching resource above the device in the device
-    hierarchy.
-
-[4] ACPI 6.2, sec 6.4.3.5.1, 2, 3, 4:
-    QWord/DWord/Word Address Space Descriptor (.1, .2, .3)
-    General Flags: Bit [0] Ignored
-
-    Extended Address Space Descriptor (.4)
-    General Flags: Bit [0] Consumer/Producer:
-	1–This device consumes this resource
-	0–This device produces and consumes this resource
-
-[5] ACPI 6.2, sec 19.6.43:
-    ResourceUsage specifies whether the Memory range is consumed by
-    this device (ResourceConsumer) or passed on to child devices
-    (ResourceProducer).  If nothing is specified, then
-    ResourceConsumer is assumed.
-
-[6] PCI Firmware 3.2, sec 4.1.2:
-    If the operating system does not natively comprehend reserving the
-    MMCFG region, the MMCFG region must be reserved by firmware.  The
-    address range reported in the MCFG table or by _CBA method (see Section
-    4.1.3) must be reserved by declaring a motherboard resource.  For most
-    systems, the motherboard resource would appear at the root of the ACPI
-    namespace (under \_SB) in a node with a _HID of EISAID (PNP0C02), and
-    the resources in this case should not be claimed in the root PCI bus’s
-    _CRS.  The resources can optionally be returned in Int15 E820 or
-    EFIGetMemoryMap as reserved memory but must always be reported through
-    ACPI as a motherboard resource.
-
-[7] PCI Express 4.0, sec 7.2.2:
-    For systems that are PC-compatible, or that do not implement a
-    processor-architecture-specific firmware interface standard that allows
-    access to the Configuration Space, the ECAM is required as defined in
-    this section.
-
-[8] PCI Firmware 3.2, sec 4.1.2:
-    The MCFG table is an ACPI table that is used to communicate the base
-    addresses corresponding to the non-hot removable PCI Segment Groups
-    range within a PCI Segment Group available to the operating system at
-    boot. This is required for the PC-compatible systems.
-
-    The MCFG table is only used to communicate the base addresses
-    corresponding to the PCI Segment Groups available to the system at
-    boot.
-
-[9] PCI Firmware 3.2, sec 4.1.3:
-    The _CBA (Memory mapped Configuration Base Address) control method is
-    an optional ACPI object that returns the 64-bit memory mapped
-    configuration base address for the hot plug capable host bridge. The
-    base address returned by _CBA is processor-relative address. The _CBA
-    control method evaluates to an Integer.
-
-    This control method appears under a host bridge object. When the _CBA
-    method appears under an active host bridge object, the operating system
-    evaluates this structure to identify the memory mapped configuration
-    base address corresponding to the PCI Segment Group for the bus number
-    range specified in _CRS method. An ACPI name space object that contains
-    the _CBA method must also contain a corresponding _SEG method.
diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst
index 458354daac47..6f573f3df993 100644
--- a/Documentation/PCI/index.rst
+++ b/Documentation/PCI/index.rst
@@ -12,3 +12,4 @@ Linux PCI Bus Subsystem
    picebus-howto
    pci-iov-howto
    msi-howto
+   acpi-info
-- 
cgit 


From 8a01fa64348aaaf54b3eef9728bfe2654e7bdd88 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 14 May 2019 22:47:29 +0800
Subject: Documentation: PCI: convert pci-error-recovery.txt to reST

Convert plain text documentation to reStructuredText format and add it to
Sphinx TOC tree.  No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/PCI/index.rst              |   1 +
 Documentation/PCI/pci-error-recovery.rst | 424 +++++++++++++++++++++++++++++++
 Documentation/PCI/pci-error-recovery.txt | 413 ------------------------------
 3 files changed, 425 insertions(+), 413 deletions(-)
 create mode 100644 Documentation/PCI/pci-error-recovery.rst
 delete mode 100644 Documentation/PCI/pci-error-recovery.txt

(limited to 'Documentation')

diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst
index 6f573f3df993..92e62d0fc9e6 100644
--- a/Documentation/PCI/index.rst
+++ b/Documentation/PCI/index.rst
@@ -13,3 +13,4 @@ Linux PCI Bus Subsystem
    pci-iov-howto
    msi-howto
    acpi-info
+   pci-error-recovery
diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst
new file mode 100644
index 000000000000..83db42092935
--- /dev/null
+++ b/Documentation/PCI/pci-error-recovery.rst
@@ -0,0 +1,424 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+PCI Error Recovery
+==================
+
+
+:Authors: - Linas Vepstas <linasvepstas@gmail.com>
+          - Richard Lary <rlary@us.ibm.com>
+          - Mike Mason <mmlnx@us.ibm.com>
+
+
+Many PCI bus controllers are able to detect a variety of hardware
+PCI errors on the bus, such as parity errors on the data and address
+buses, as well as SERR and PERR errors.  Some of the more advanced
+chipsets are able to deal with these errors; these include PCI-E chipsets,
+and the PCI-host bridges found on IBM Power4, Power5 and Power6-based
+pSeries boxes. A typical action taken is to disconnect the affected device,
+halting all I/O to it.  The goal of a disconnection is to avoid system
+corruption; for example, to halt system memory corruption due to DMA's
+to "wild" addresses. Typically, a reconnection mechanism is also
+offered, so that the affected PCI device(s) are reset and put back
+into working condition. The reset phase requires coordination
+between the affected device drivers and the PCI controller chip.
+This document describes a generic API for notifying device drivers
+of a bus disconnection, and then performing error recovery.
+This API is currently implemented in the 2.6.16 and later kernels.
+
+Reporting and recovery is performed in several steps. First, when
+a PCI hardware error has resulted in a bus disconnect, that event
+is reported as soon as possible to all affected device drivers,
+including multiple instances of a device driver on multi-function
+cards. This allows device drivers to avoid deadlocking in spinloops,
+waiting for some i/o-space register to change, when it never will.
+It also gives the drivers a chance to defer incoming I/O as
+needed.
+
+Next, recovery is performed in several stages. Most of the complexity
+is forced by the need to handle multi-function devices, that is,
+devices that have multiple device drivers associated with them.
+In the first stage, each driver is allowed to indicate what type
+of reset it desires, the choices being a simple re-enabling of I/O
+or requesting a slot reset.
+
+If any driver requests a slot reset, that is what will be done.
+
+After a reset and/or a re-enabling of I/O, all drivers are
+again notified, so that they may then perform any device setup/config
+that may be required.  After these have all completed, a final
+"resume normal operations" event is sent out.
+
+The biggest reason for choosing a kernel-based implementation rather
+than a user-space implementation was the need to deal with bus
+disconnects of PCI devices attached to storage media, and, in particular,
+disconnects from devices holding the root file system.  If the root
+file system is disconnected, a user-space mechanism would have to go
+through a large number of contortions to complete recovery. Almost all
+of the current Linux file systems are not tolerant of disconnection
+from/reconnection to their underlying block device. By contrast,
+bus errors are easy to manage in the device driver. Indeed, most
+device drivers already handle very similar recovery procedures;
+for example, the SCSI-generic layer already provides significant
+mechanisms for dealing with SCSI bus errors and SCSI bus resets.
+
+
+Detailed Design
+===============
+
+Design and implementation details below, based on a chain of
+public email discussions with Ben Herrenschmidt, circa 5 April 2005.
+
+The error recovery API support is exposed to the driver in the form of
+a structure of function pointers pointed to by a new field in struct
+pci_driver. A driver that fails to provide the structure is "non-aware",
+and the actual recovery steps taken are platform dependent.  The
+arch/powerpc implementation will simulate a PCI hotplug remove/add.
+
+This structure has the form::
+
+	struct pci_error_handlers
+	{
+		int (*error_detected)(struct pci_dev *dev, enum pci_channel_state);
+		int (*mmio_enabled)(struct pci_dev *dev);
+		int (*slot_reset)(struct pci_dev *dev);
+		void (*resume)(struct pci_dev *dev);
+	};
+
+The possible channel states are::
+
+	enum pci_channel_state {
+		pci_channel_io_normal,  /* I/O channel is in normal state */
+		pci_channel_io_frozen,  /* I/O to channel is blocked */
+		pci_channel_io_perm_failure, /* PCI card is dead */
+	};
+
+Possible return values are::
+
+	enum pci_ers_result {
+		PCI_ERS_RESULT_NONE,        /* no result/none/not supported in device driver */
+		PCI_ERS_RESULT_CAN_RECOVER, /* Device driver can recover without slot reset */
+		PCI_ERS_RESULT_NEED_RESET,  /* Device driver wants slot to be reset. */
+		PCI_ERS_RESULT_DISCONNECT,  /* Device has completely failed, is unrecoverable */
+		PCI_ERS_RESULT_RECOVERED,   /* Device driver is fully recovered and operational */
+	};
+
+A driver does not have to implement all of these callbacks; however,
+if it implements any, it must implement error_detected(). If a callback
+is not implemented, the corresponding feature is considered unsupported.
+For example, if mmio_enabled() and resume() aren't there, then it
+is assumed that the driver is not doing any direct recovery and requires
+a slot reset.  Typically a driver will want to know about
+a slot_reset().
+
+The actual steps taken by a platform to recover from a PCI error
+event will be platform-dependent, but will follow the general
+sequence described below.
+
+STEP 0: Error Event
+-------------------
+A PCI bus error is detected by the PCI hardware.  On powerpc, the slot
+is isolated, in that all I/O is blocked: all reads return 0xffffffff,
+all writes are ignored.
+
+
+STEP 1: Notification
+--------------------
+Platform calls the error_detected() callback on every instance of
+every driver affected by the error.
+
+At this point, the device might not be accessible anymore, depending on
+the platform (the slot will be isolated on powerpc). The driver may
+already have "noticed" the error because of a failing I/O, but this
+is the proper "synchronization point", that is, it gives the driver
+a chance to cleanup, waiting for pending stuff (timers, whatever, etc...)
+to complete; it can take semaphores, schedule, etc... everything but
+touch the device. Within this function and after it returns, the driver
+shouldn't do any new IOs. Called in task context. This is sort of a
+"quiesce" point. See note about interrupts at the end of this doc.
+
+All drivers participating in this system must implement this call.
+The driver must return one of the following result codes:
+
+  - PCI_ERS_RESULT_CAN_RECOVER
+      Driver returns this if it thinks it might be able to recover
+      the HW by just banging IOs or if it wants to be given
+      a chance to extract some diagnostic information (see
+      mmio_enable, below).
+  - PCI_ERS_RESULT_NEED_RESET
+      Driver returns this if it can't recover without a
+      slot reset.
+  - PCI_ERS_RESULT_DISCONNECT
+      Driver returns this if it doesn't want to recover at all.
+
+The next step taken will depend on the result codes returned by the
+drivers.
+
+If all drivers on the segment/slot return PCI_ERS_RESULT_CAN_RECOVER,
+then the platform should re-enable IOs on the slot (or do nothing in
+particular, if the platform doesn't isolate slots), and recovery
+proceeds to STEP 2 (MMIO Enable).
+
+If any driver requested a slot reset (by returning PCI_ERS_RESULT_NEED_RESET),
+then recovery proceeds to STEP 4 (Slot Reset).
+
+If the platform is unable to recover the slot, the next step
+is STEP 6 (Permanent Failure).
+
+.. note::
+
+   The current powerpc implementation assumes that a device driver will
+   *not* schedule or semaphore in this routine; the current powerpc
+   implementation uses one kernel thread to notify all devices;
+   thus, if one device sleeps/schedules, all devices are affected.
+   Doing better requires complex multi-threaded logic in the error
+   recovery implementation (e.g. waiting for all notification threads
+   to "join" before proceeding with recovery.)  This seems excessively
+   complex and not worth implementing.
+
+   The current powerpc implementation doesn't much care if the device
+   attempts I/O at this point, or not.  I/O's will fail, returning
+   a value of 0xff on read, and writes will be dropped. If more than
+   EEH_MAX_FAILS I/O's are attempted to a frozen adapter, EEH
+   assumes that the device driver has gone into an infinite loop
+   and prints an error to syslog.  A reboot is then required to
+   get the device working again.
+
+STEP 2: MMIO Enabled
+--------------------
+The platform re-enables MMIO to the device (but typically not the
+DMA), and then calls the mmio_enabled() callback on all affected
+device drivers.
+
+This is the "early recovery" call. IOs are allowed again, but DMA is
+not, with some restrictions. This is NOT a callback for the driver to
+start operations again, only to peek/poke at the device, extract diagnostic
+information, if any, and eventually do things like trigger a device local
+reset or some such, but not restart operations. This callback is made if
+all drivers on a segment agree that they can try to recover and if no automatic
+link reset was performed by the HW. If the platform can't just re-enable IOs
+without a slot reset or a link reset, it will not call this callback, and
+instead will have gone directly to STEP 3 (Link Reset) or STEP 4 (Slot Reset)
+
+.. note::
+
+   The following is proposed; no platform implements this yet:
+   Proposal: All I/O's should be done _synchronously_ from within
+   this callback, errors triggered by them will be returned via
+   the normal pci_check_whatever() API, no new error_detected()
+   callback will be issued due to an error happening here. However,
+   such an error might cause IOs to be re-blocked for the whole
+   segment, and thus invalidate the recovery that other devices
+   on the same segment might have done, forcing the whole segment
+   into one of the next states, that is, link reset or slot reset.
+
+The driver should return one of the following result codes:
+  - PCI_ERS_RESULT_RECOVERED
+      Driver returns this if it thinks the device is fully
+      functional and thinks it is ready to start
+      normal driver operations again. There is no
+      guarantee that the driver will actually be
+      allowed to proceed, as another driver on the
+      same segment might have failed and thus triggered a
+      slot reset on platforms that support it.
+
+  - PCI_ERS_RESULT_NEED_RESET
+      Driver returns this if it thinks the device is not
+      recoverable in its current state and it needs a slot
+      reset to proceed.
+
+  - PCI_ERS_RESULT_DISCONNECT
+      Same as above. Total failure, no recovery even after
+      reset driver dead. (To be defined more precisely)
+
+The next step taken depends on the results returned by the drivers.
+If all drivers returned PCI_ERS_RESULT_RECOVERED, then the platform
+proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations).
+
+If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform
+proceeds to STEP 4 (Slot Reset)
+
+STEP 3: Link Reset
+------------------
+The platform resets the link.  This is a PCI-Express specific step
+and is done whenever a fatal error has been detected that can be
+"solved" by resetting the link.
+
+STEP 4: Slot Reset
+------------------
+
+In response to a return value of PCI_ERS_RESULT_NEED_RESET, the
+the platform will perform a slot reset on the requesting PCI device(s).
+The actual steps taken by a platform to perform a slot reset
+will be platform-dependent. Upon completion of slot reset, the
+platform will call the device slot_reset() callback.
+
+Powerpc platforms implement two levels of slot reset:
+soft reset(default) and fundamental(optional) reset.
+
+Powerpc soft reset consists of asserting the adapter #RST line and then
+restoring the PCI BAR's and PCI configuration header to a state
+that is equivalent to what it would be after a fresh system
+power-on followed by power-on BIOS/system firmware initialization.
+Soft reset is also known as hot-reset.
+
+Powerpc fundamental reset is supported by PCI Express cards only
+and results in device's state machines, hardware logic, port states and
+configuration registers to initialize to their default conditions.
+
+For most PCI devices, a soft reset will be sufficient for recovery.
+Optional fundamental reset is provided to support a limited number
+of PCI Express devices for which a soft reset is not sufficient
+for recovery.
+
+If the platform supports PCI hotplug, then the reset might be
+performed by toggling the slot electrical power off/on.
+
+It is important for the platform to restore the PCI config space
+to the "fresh poweron" state, rather than the "last state". After
+a slot reset, the device driver will almost always use its standard
+device initialization routines, and an unusual config space setup
+may result in hung devices, kernel panics, or silent data corruption.
+
+This call gives drivers the chance to re-initialize the hardware
+(re-download firmware, etc.).  At this point, the driver may assume
+that the card is in a fresh state and is fully functional. The slot
+is unfrozen and the driver has full access to PCI config space,
+memory mapped I/O space and DMA. Interrupts (Legacy, MSI, or MSI-X)
+will also be available.
+
+Drivers should not restart normal I/O processing operations
+at this point.  If all device drivers report success on this
+callback, the platform will call resume() to complete the sequence,
+and let the driver restart normal I/O processing.
+
+A driver can still return a critical failure for this function if
+it can't get the device operational after reset.  If the platform
+previously tried a soft reset, it might now try a hard reset (power
+cycle) and then call slot_reset() again.  It the device still can't
+be recovered, there is nothing more that can be done;  the platform
+will typically report a "permanent failure" in such a case.  The
+device will be considered "dead" in this case.
+
+Drivers for multi-function cards will need to coordinate among
+themselves as to which driver instance will perform any "one-shot"
+or global device initialization. For example, the Symbios sym53cxx2
+driver performs device init only from PCI function 0::
+
+	+       if (PCI_FUNC(pdev->devfn) == 0)
+	+               sym_reset_scsi_bus(np, 0);
+
+Result codes:
+	- PCI_ERS_RESULT_DISCONNECT
+	  Same as above.
+
+Drivers for PCI Express cards that require a fundamental reset must
+set the needs_freset bit in the pci_dev structure in their probe function.
+For example, the QLogic qla2xxx driver sets the needs_freset bit for certain
+PCI card types::
+
+	+	/* Set EEH reset type to fundamental if required by hba  */
+	+	if (IS_QLA24XX(ha) || IS_QLA25XX(ha) || IS_QLA81XX(ha))
+	+		pdev->needs_freset = 1;
+	+
+
+Platform proceeds either to STEP 5 (Resume Operations) or STEP 6 (Permanent
+Failure).
+
+.. note::
+
+   The current powerpc implementation does not try a power-cycle
+   reset if the driver returned PCI_ERS_RESULT_DISCONNECT.
+   However, it probably should.
+
+
+STEP 5: Resume Operations
+-------------------------
+The platform will call the resume() callback on all affected device
+drivers if all drivers on the segment have returned
+PCI_ERS_RESULT_RECOVERED from one of the 3 previous callbacks.
+The goal of this callback is to tell the driver to restart activity,
+that everything is back and running. This callback does not return
+a result code.
+
+At this point, if a new error happens, the platform will restart
+a new error recovery sequence.
+
+STEP 6: Permanent Failure
+-------------------------
+A "permanent failure" has occurred, and the platform cannot recover
+the device.  The platform will call error_detected() with a
+pci_channel_state value of pci_channel_io_perm_failure.
+
+The device driver should, at this point, assume the worst. It should
+cancel all pending I/O, refuse all new I/O, returning -EIO to
+higher layers. The device driver should then clean up all of its
+memory and remove itself from kernel operations, much as it would
+during system shutdown.
+
+The platform will typically notify the system operator of the
+permanent failure in some way.  If the device is hotplug-capable,
+the operator will probably want to remove and replace the device.
+Note, however, not all failures are truly "permanent". Some are
+caused by over-heating, some by a poorly seated card. Many
+PCI error events are caused by software bugs, e.g. DMA's to
+wild addresses or bogus split transactions due to programming
+errors. See the discussion in powerpc/eeh-pci-error-recovery.txt
+for additional detail on real-life experience of the causes of
+software errors.
+
+
+Conclusion; General Remarks
+---------------------------
+The way the callbacks are called is platform policy. A platform with
+no slot reset capability may want to just "ignore" drivers that can't
+recover (disconnect them) and try to let other cards on the same segment
+recover. Keep in mind that in most real life cases, though, there will
+be only one driver per segment.
+
+Now, a note about interrupts. If you get an interrupt and your
+device is dead or has been isolated, there is a problem :)
+The current policy is to turn this into a platform policy.
+That is, the recovery API only requires that:
+
+ - There is no guarantee that interrupt delivery can proceed from any
+   device on the segment starting from the error detection and until the
+   slot_reset callback is called, at which point interrupts are expected
+   to be fully operational.
+
+ - There is no guarantee that interrupt delivery is stopped, that is,
+   a driver that gets an interrupt after detecting an error, or that detects
+   an error within the interrupt handler such that it prevents proper
+   ack'ing of the interrupt (and thus removal of the source) should just
+   return IRQ_NOTHANDLED. It's up to the platform to deal with that
+   condition, typically by masking the IRQ source during the duration of
+   the error handling. It is expected that the platform "knows" which
+   interrupts are routed to error-management capable slots and can deal
+   with temporarily disabling that IRQ number during error processing (this
+   isn't terribly complex). That means some IRQ latency for other devices
+   sharing the interrupt, but there is simply no other way. High end
+   platforms aren't supposed to share interrupts between many devices
+   anyway :)
+
+.. note::
+
+   Implementation details for the powerpc platform are discussed in
+   the file Documentation/powerpc/eeh-pci-error-recovery.txt
+
+   As of this writing, there is a growing list of device drivers with
+   patches implementing error recovery. Not all of these patches are in
+   mainline yet. These may be used as "examples":
+
+   - drivers/scsi/ipr
+   - drivers/scsi/sym53c8xx_2
+   - drivers/scsi/qla2xxx
+   - drivers/scsi/lpfc
+   - drivers/next/bnx2.c
+   - drivers/next/e100.c
+   - drivers/net/e1000
+   - drivers/net/e1000e
+   - drivers/net/ixgb
+   - drivers/net/ixgbe
+   - drivers/net/cxgb3
+   - drivers/net/s2io.c
+   - drivers/net/qlge
diff --git a/Documentation/PCI/pci-error-recovery.txt b/Documentation/PCI/pci-error-recovery.txt
deleted file mode 100644
index 0b6bb3ef449e..000000000000
--- a/Documentation/PCI/pci-error-recovery.txt
+++ /dev/null
@@ -1,413 +0,0 @@
-
-                       PCI Error Recovery
-                       ------------------
-                        February 2, 2006
-
-                 Current document maintainer:
-             Linas Vepstas <linasvepstas@gmail.com>
-          updated by Richard Lary <rlary@us.ibm.com>
-       and Mike Mason <mmlnx@us.ibm.com> on 27-Jul-2009
-
-
-Many PCI bus controllers are able to detect a variety of hardware
-PCI errors on the bus, such as parity errors on the data and address
-buses, as well as SERR and PERR errors.  Some of the more advanced
-chipsets are able to deal with these errors; these include PCI-E chipsets,
-and the PCI-host bridges found on IBM Power4, Power5 and Power6-based
-pSeries boxes. A typical action taken is to disconnect the affected device,
-halting all I/O to it.  The goal of a disconnection is to avoid system
-corruption; for example, to halt system memory corruption due to DMA's
-to "wild" addresses. Typically, a reconnection mechanism is also
-offered, so that the affected PCI device(s) are reset and put back
-into working condition. The reset phase requires coordination
-between the affected device drivers and the PCI controller chip.
-This document describes a generic API for notifying device drivers
-of a bus disconnection, and then performing error recovery.
-This API is currently implemented in the 2.6.16 and later kernels.
-
-Reporting and recovery is performed in several steps. First, when
-a PCI hardware error has resulted in a bus disconnect, that event
-is reported as soon as possible to all affected device drivers,
-including multiple instances of a device driver on multi-function
-cards. This allows device drivers to avoid deadlocking in spinloops,
-waiting for some i/o-space register to change, when it never will.
-It also gives the drivers a chance to defer incoming I/O as
-needed.
-
-Next, recovery is performed in several stages. Most of the complexity
-is forced by the need to handle multi-function devices, that is,
-devices that have multiple device drivers associated with them.
-In the first stage, each driver is allowed to indicate what type
-of reset it desires, the choices being a simple re-enabling of I/O
-or requesting a slot reset.
-
-If any driver requests a slot reset, that is what will be done.
-
-After a reset and/or a re-enabling of I/O, all drivers are
-again notified, so that they may then perform any device setup/config
-that may be required.  After these have all completed, a final
-"resume normal operations" event is sent out.
-
-The biggest reason for choosing a kernel-based implementation rather
-than a user-space implementation was the need to deal with bus
-disconnects of PCI devices attached to storage media, and, in particular,
-disconnects from devices holding the root file system.  If the root
-file system is disconnected, a user-space mechanism would have to go
-through a large number of contortions to complete recovery. Almost all
-of the current Linux file systems are not tolerant of disconnection
-from/reconnection to their underlying block device. By contrast,
-bus errors are easy to manage in the device driver. Indeed, most
-device drivers already handle very similar recovery procedures;
-for example, the SCSI-generic layer already provides significant
-mechanisms for dealing with SCSI bus errors and SCSI bus resets.
-
-
-Detailed Design
----------------
-Design and implementation details below, based on a chain of
-public email discussions with Ben Herrenschmidt, circa 5 April 2005.
-
-The error recovery API support is exposed to the driver in the form of
-a structure of function pointers pointed to by a new field in struct
-pci_driver. A driver that fails to provide the structure is "non-aware",
-and the actual recovery steps taken are platform dependent.  The
-arch/powerpc implementation will simulate a PCI hotplug remove/add.
-
-This structure has the form:
-struct pci_error_handlers
-{
-	int (*error_detected)(struct pci_dev *dev, enum pci_channel_state);
-	int (*mmio_enabled)(struct pci_dev *dev);
-	int (*slot_reset)(struct pci_dev *dev);
-	void (*resume)(struct pci_dev *dev);
-};
-
-The possible channel states are:
-enum pci_channel_state {
-	pci_channel_io_normal,  /* I/O channel is in normal state */
-	pci_channel_io_frozen,  /* I/O to channel is blocked */
-	pci_channel_io_perm_failure, /* PCI card is dead */
-};
-
-Possible return values are:
-enum pci_ers_result {
-	PCI_ERS_RESULT_NONE,        /* no result/none/not supported in device driver */
-	PCI_ERS_RESULT_CAN_RECOVER, /* Device driver can recover without slot reset */
-	PCI_ERS_RESULT_NEED_RESET,  /* Device driver wants slot to be reset. */
-	PCI_ERS_RESULT_DISCONNECT,  /* Device has completely failed, is unrecoverable */
-	PCI_ERS_RESULT_RECOVERED,   /* Device driver is fully recovered and operational */
-};
-
-A driver does not have to implement all of these callbacks; however,
-if it implements any, it must implement error_detected(). If a callback
-is not implemented, the corresponding feature is considered unsupported.
-For example, if mmio_enabled() and resume() aren't there, then it
-is assumed that the driver is not doing any direct recovery and requires
-a slot reset.  Typically a driver will want to know about
-a slot_reset().
-
-The actual steps taken by a platform to recover from a PCI error
-event will be platform-dependent, but will follow the general
-sequence described below.
-
-STEP 0: Error Event
--------------------
-A PCI bus error is detected by the PCI hardware.  On powerpc, the slot
-is isolated, in that all I/O is blocked: all reads return 0xffffffff,
-all writes are ignored.
-
-
-STEP 1: Notification
---------------------
-Platform calls the error_detected() callback on every instance of
-every driver affected by the error.
-
-At this point, the device might not be accessible anymore, depending on
-the platform (the slot will be isolated on powerpc). The driver may
-already have "noticed" the error because of a failing I/O, but this
-is the proper "synchronization point", that is, it gives the driver
-a chance to cleanup, waiting for pending stuff (timers, whatever, etc...)
-to complete; it can take semaphores, schedule, etc... everything but
-touch the device. Within this function and after it returns, the driver
-shouldn't do any new IOs. Called in task context. This is sort of a
-"quiesce" point. See note about interrupts at the end of this doc.
-
-All drivers participating in this system must implement this call.
-The driver must return one of the following result codes:
-		- PCI_ERS_RESULT_CAN_RECOVER:
-		  Driver returns this if it thinks it might be able to recover
-		  the HW by just banging IOs or if it wants to be given
-		  a chance to extract some diagnostic information (see
-		  mmio_enable, below).
-		- PCI_ERS_RESULT_NEED_RESET:
-		  Driver returns this if it can't recover without a
-		  slot reset.
-		- PCI_ERS_RESULT_DISCONNECT:
-		  Driver returns this if it doesn't want to recover at all.
-
-The next step taken will depend on the result codes returned by the
-drivers.
-
-If all drivers on the segment/slot return PCI_ERS_RESULT_CAN_RECOVER,
-then the platform should re-enable IOs on the slot (or do nothing in
-particular, if the platform doesn't isolate slots), and recovery
-proceeds to STEP 2 (MMIO Enable).
-
-If any driver requested a slot reset (by returning PCI_ERS_RESULT_NEED_RESET),
-then recovery proceeds to STEP 4 (Slot Reset).
-
-If the platform is unable to recover the slot, the next step
-is STEP 6 (Permanent Failure).
-
->>> The current powerpc implementation assumes that a device driver will
->>> *not* schedule or semaphore in this routine; the current powerpc
->>> implementation uses one kernel thread to notify all devices;
->>> thus, if one device sleeps/schedules, all devices are affected.
->>> Doing better requires complex multi-threaded logic in the error
->>> recovery implementation (e.g. waiting for all notification threads
->>> to "join" before proceeding with recovery.)  This seems excessively
->>> complex and not worth implementing.
-
->>> The current powerpc implementation doesn't much care if the device
->>> attempts I/O at this point, or not.  I/O's will fail, returning
->>> a value of 0xff on read, and writes will be dropped. If more than
->>> EEH_MAX_FAILS I/O's are attempted to a frozen adapter, EEH
->>> assumes that the device driver has gone into an infinite loop
->>> and prints an error to syslog.  A reboot is then required to
->>> get the device working again.
-
-STEP 2: MMIO Enabled
--------------------
-The platform re-enables MMIO to the device (but typically not the
-DMA), and then calls the mmio_enabled() callback on all affected
-device drivers.
-
-This is the "early recovery" call. IOs are allowed again, but DMA is
-not, with some restrictions. This is NOT a callback for the driver to
-start operations again, only to peek/poke at the device, extract diagnostic
-information, if any, and eventually do things like trigger a device local
-reset or some such, but not restart operations. This callback is made if
-all drivers on a segment agree that they can try to recover and if no automatic
-link reset was performed by the HW. If the platform can't just re-enable IOs
-without a slot reset or a link reset, it will not call this callback, and
-instead will have gone directly to STEP 3 (Link Reset) or STEP 4 (Slot Reset)
-
->>> The following is proposed; no platform implements this yet:
->>> Proposal: All I/O's should be done _synchronously_ from within
->>> this callback, errors triggered by them will be returned via
->>> the normal pci_check_whatever() API, no new error_detected()
->>> callback will be issued due to an error happening here. However,
->>> such an error might cause IOs to be re-blocked for the whole
->>> segment, and thus invalidate the recovery that other devices
->>> on the same segment might have done, forcing the whole segment
->>> into one of the next states, that is, link reset or slot reset.
-
-The driver should return one of the following result codes:
-		- PCI_ERS_RESULT_RECOVERED
-		  Driver returns this if it thinks the device is fully
-		  functional and thinks it is ready to start
-		  normal driver operations again. There is no
-		  guarantee that the driver will actually be
-		  allowed to proceed, as another driver on the
-		  same segment might have failed and thus triggered a
-		  slot reset on platforms that support it.
-
-		- PCI_ERS_RESULT_NEED_RESET
-		  Driver returns this if it thinks the device is not
-		  recoverable in its current state and it needs a slot
-		  reset to proceed.
-
-		- PCI_ERS_RESULT_DISCONNECT
-		  Same as above. Total failure, no recovery even after
-		  reset driver dead. (To be defined more precisely)
-
-The next step taken depends on the results returned by the drivers.
-If all drivers returned PCI_ERS_RESULT_RECOVERED, then the platform
-proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations).
-
-If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform
-proceeds to STEP 4 (Slot Reset)
-
-STEP 3: Link Reset
-------------------
-The platform resets the link.  This is a PCI-Express specific step
-and is done whenever a fatal error has been detected that can be
-"solved" by resetting the link.
-
-STEP 4: Slot Reset
-------------------
-
-In response to a return value of PCI_ERS_RESULT_NEED_RESET, the
-the platform will perform a slot reset on the requesting PCI device(s).
-The actual steps taken by a platform to perform a slot reset
-will be platform-dependent. Upon completion of slot reset, the
-platform will call the device slot_reset() callback.
-
-Powerpc platforms implement two levels of slot reset:
-soft reset(default) and fundamental(optional) reset.
-
-Powerpc soft reset consists of asserting the adapter #RST line and then
-restoring the PCI BAR's and PCI configuration header to a state
-that is equivalent to what it would be after a fresh system
-power-on followed by power-on BIOS/system firmware initialization.
-Soft reset is also known as hot-reset.
-
-Powerpc fundamental reset is supported by PCI Express cards only
-and results in device's state machines, hardware logic, port states and
-configuration registers to initialize to their default conditions.
-
-For most PCI devices, a soft reset will be sufficient for recovery.
-Optional fundamental reset is provided to support a limited number
-of PCI Express devices for which a soft reset is not sufficient
-for recovery.
-
-If the platform supports PCI hotplug, then the reset might be
-performed by toggling the slot electrical power off/on.
-
-It is important for the platform to restore the PCI config space
-to the "fresh poweron" state, rather than the "last state". After
-a slot reset, the device driver will almost always use its standard
-device initialization routines, and an unusual config space setup
-may result in hung devices, kernel panics, or silent data corruption.
-
-This call gives drivers the chance to re-initialize the hardware
-(re-download firmware, etc.).  At this point, the driver may assume
-that the card is in a fresh state and is fully functional. The slot
-is unfrozen and the driver has full access to PCI config space,
-memory mapped I/O space and DMA. Interrupts (Legacy, MSI, or MSI-X)
-will also be available.
-
-Drivers should not restart normal I/O processing operations
-at this point.  If all device drivers report success on this
-callback, the platform will call resume() to complete the sequence,
-and let the driver restart normal I/O processing.
-
-A driver can still return a critical failure for this function if
-it can't get the device operational after reset.  If the platform
-previously tried a soft reset, it might now try a hard reset (power
-cycle) and then call slot_reset() again.  It the device still can't
-be recovered, there is nothing more that can be done;  the platform
-will typically report a "permanent failure" in such a case.  The
-device will be considered "dead" in this case.
-
-Drivers for multi-function cards will need to coordinate among
-themselves as to which driver instance will perform any "one-shot"
-or global device initialization. For example, the Symbios sym53cxx2
-driver performs device init only from PCI function 0:
-
-+       if (PCI_FUNC(pdev->devfn) == 0)
-+               sym_reset_scsi_bus(np, 0);
-
-	Result codes:
-		- PCI_ERS_RESULT_DISCONNECT
-		Same as above.
-
-Drivers for PCI Express cards that require a fundamental reset must
-set the needs_freset bit in the pci_dev structure in their probe function.
-For example, the QLogic qla2xxx driver sets the needs_freset bit for certain
-PCI card types:
-
-+	/* Set EEH reset type to fundamental if required by hba  */
-+	if (IS_QLA24XX(ha) || IS_QLA25XX(ha) || IS_QLA81XX(ha))
-+		pdev->needs_freset = 1;
-+
-
-Platform proceeds either to STEP 5 (Resume Operations) or STEP 6 (Permanent
-Failure).
-
->>> The current powerpc implementation does not try a power-cycle
->>> reset if the driver returned PCI_ERS_RESULT_DISCONNECT.
->>> However, it probably should.
-
-
-STEP 5: Resume Operations
--------------------------
-The platform will call the resume() callback on all affected device
-drivers if all drivers on the segment have returned
-PCI_ERS_RESULT_RECOVERED from one of the 3 previous callbacks.
-The goal of this callback is to tell the driver to restart activity,
-that everything is back and running. This callback does not return
-a result code.
-
-At this point, if a new error happens, the platform will restart
-a new error recovery sequence.
-
-STEP 6: Permanent Failure
--------------------------
-A "permanent failure" has occurred, and the platform cannot recover
-the device.  The platform will call error_detected() with a
-pci_channel_state value of pci_channel_io_perm_failure.
-
-The device driver should, at this point, assume the worst. It should
-cancel all pending I/O, refuse all new I/O, returning -EIO to
-higher layers. The device driver should then clean up all of its
-memory and remove itself from kernel operations, much as it would
-during system shutdown.
-
-The platform will typically notify the system operator of the
-permanent failure in some way.  If the device is hotplug-capable,
-the operator will probably want to remove and replace the device.
-Note, however, not all failures are truly "permanent". Some are
-caused by over-heating, some by a poorly seated card. Many
-PCI error events are caused by software bugs, e.g. DMA's to
-wild addresses or bogus split transactions due to programming
-errors. See the discussion in powerpc/eeh-pci-error-recovery.txt
-for additional detail on real-life experience of the causes of
-software errors.
-
-
-Conclusion; General Remarks
----------------------------
-The way the callbacks are called is platform policy. A platform with
-no slot reset capability may want to just "ignore" drivers that can't
-recover (disconnect them) and try to let other cards on the same segment
-recover. Keep in mind that in most real life cases, though, there will
-be only one driver per segment.
-
-Now, a note about interrupts. If you get an interrupt and your
-device is dead or has been isolated, there is a problem :)
-The current policy is to turn this into a platform policy.
-That is, the recovery API only requires that:
-
- - There is no guarantee that interrupt delivery can proceed from any
-device on the segment starting from the error detection and until the
-slot_reset callback is called, at which point interrupts are expected
-to be fully operational.
-
- - There is no guarantee that interrupt delivery is stopped, that is,
-a driver that gets an interrupt after detecting an error, or that detects
-an error within the interrupt handler such that it prevents proper
-ack'ing of the interrupt (and thus removal of the source) should just
-return IRQ_NOTHANDLED. It's up to the platform to deal with that
-condition, typically by masking the IRQ source during the duration of
-the error handling. It is expected that the platform "knows" which
-interrupts are routed to error-management capable slots and can deal
-with temporarily disabling that IRQ number during error processing (this
-isn't terribly complex). That means some IRQ latency for other devices
-sharing the interrupt, but there is simply no other way. High end
-platforms aren't supposed to share interrupts between many devices
-anyway :)
-
->>> Implementation details for the powerpc platform are discussed in
->>> the file Documentation/powerpc/eeh-pci-error-recovery.txt
-
->>> As of this writing, there is a growing list of device drivers with
->>> patches implementing error recovery. Not all of these patches are in
->>> mainline yet. These may be used as "examples":
->>>
->>> drivers/scsi/ipr
->>> drivers/scsi/sym53c8xx_2
->>> drivers/scsi/qla2xxx
->>> drivers/scsi/lpfc
->>> drivers/next/bnx2.c
->>> drivers/next/e100.c
->>> drivers/net/e1000
->>> drivers/net/e1000e
->>> drivers/net/ixgb
->>> drivers/net/ixgbe
->>> drivers/net/cxgb3
->>> drivers/net/s2io.c
->>> drivers/net/qlge
-
-The End
--------
-- 
cgit 


From 4e37f055a92e4a813b29fb196a05a6a826abb790 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 14 May 2019 22:47:30 +0800
Subject: Documentation: PCI: convert pcieaer-howto.txt to reST

Convert plain text documentation to reStructuredText format and add it to
Sphinx TOC tree.  No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/PCI/index.rst         |   1 +
 Documentation/PCI/pcieaer-howto.rst | 311 ++++++++++++++++++++++++++++++++++++
 Documentation/PCI/pcieaer-howto.txt | 267 -------------------------------
 3 files changed, 312 insertions(+), 267 deletions(-)
 create mode 100644 Documentation/PCI/pcieaer-howto.rst
 delete mode 100644 Documentation/PCI/pcieaer-howto.txt

(limited to 'Documentation')

diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst
index 92e62d0fc9e6..f54b65b1ca5f 100644
--- a/Documentation/PCI/index.rst
+++ b/Documentation/PCI/index.rst
@@ -14,3 +14,4 @@ Linux PCI Bus Subsystem
    msi-howto
    acpi-info
    pci-error-recovery
+   pcieaer-howto
diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst
new file mode 100644
index 000000000000..18bdefaafd1a
--- /dev/null
+++ b/Documentation/PCI/pcieaer-howto.rst
@@ -0,0 +1,311 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===========================================================
+The PCI Express Advanced Error Reporting Driver Guide HOWTO
+===========================================================
+
+:Authors: - T. Long Nguyen <tom.l.nguyen@intel.com>
+          - Yanmin Zhang <yanmin.zhang@intel.com>
+
+:Copyright: |copy| 2006 Intel Corporation
+
+Overview
+===========
+
+About this guide
+----------------
+
+This guide describes the basics of the PCI Express Advanced Error
+Reporting (AER) driver and provides information on how to use it, as
+well as how to enable the drivers of endpoint devices to conform with
+PCI Express AER driver.
+
+
+What is the PCI Express AER Driver?
+-----------------------------------
+
+PCI Express error signaling can occur on the PCI Express link itself
+or on behalf of transactions initiated on the link. PCI Express
+defines two error reporting paradigms: the baseline capability and
+the Advanced Error Reporting capability. The baseline capability is
+required of all PCI Express components providing a minimum defined
+set of error reporting requirements. Advanced Error Reporting
+capability is implemented with a PCI Express advanced error reporting
+extended capability structure providing more robust error reporting.
+
+The PCI Express AER driver provides the infrastructure to support PCI
+Express Advanced Error Reporting capability. The PCI Express AER
+driver provides three basic functions:
+
+  - Gathers the comprehensive error information if errors occurred.
+  - Reports error to the users.
+  - Performs error recovery actions.
+
+AER driver only attaches root ports which support PCI-Express AER
+capability.
+
+
+User Guide
+==========
+
+Include the PCI Express AER Root Driver into the Linux Kernel
+-------------------------------------------------------------
+
+The PCI Express AER Root driver is a Root Port service driver attached
+to the PCI Express Port Bus driver. If a user wants to use it, the driver
+has to be compiled. Option CONFIG_PCIEAER supports this capability. It
+depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and
+CONFIG_PCIEAER = y.
+
+Load PCI Express AER Root Driver
+--------------------------------
+
+Some systems have AER support in firmware. Enabling Linux AER support at
+the same time the firmware handles AER may result in unpredictable
+behavior. Therefore, Linux does not handle AER events unless the firmware
+grants AER control to the OS via the ACPI _OSC method. See the PCI FW 3.0
+Specification for details regarding _OSC usage.
+
+AER error output
+----------------
+
+When a PCIe AER error is captured, an error message will be output to
+console. If it's a correctable error, it is output as a warning.
+Otherwise, it is printed as an error. So users could choose different
+log level to filter out correctable error messages.
+
+Below shows an example::
+
+  0000:50:00.0: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, id=0500(Requester ID)
+  0000:50:00.0:   device [8086:0329] error status/mask=00100000/00000000
+  0000:50:00.0:    [20] Unsupported Request    (First)
+  0000:50:00.0:   TLP Header: 04000001 00200a03 05010000 00050100
+
+In the example, 'Requester ID' means the ID of the device who sends
+the error message to root port. Pls. refer to pci express specs for
+other fields.
+
+AER Statistics / Counters
+-------------------------
+
+When PCIe AER errors are captured, the counters / statistics are also exposed
+in the form of sysfs attributes which are documented at
+Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
+
+Developer Guide
+===============
+
+To enable AER aware support requires a software driver to configure
+the AER capability structure within its device and to provide callbacks.
+
+To support AER better, developers need understand how AER does work
+firstly.
+
+PCI Express errors are classified into two types: correctable errors
+and uncorrectable errors. This classification is based on the impacts
+of those errors, which may result in degraded performance or function
+failure.
+
+Correctable errors pose no impacts on the functionality of the
+interface. The PCI Express protocol can recover without any software
+intervention or any loss of data. These errors are detected and
+corrected by hardware. Unlike correctable errors, uncorrectable
+errors impact functionality of the interface. Uncorrectable errors
+can cause a particular transaction or a particular PCI Express link
+to be unreliable. Depending on those error conditions, uncorrectable
+errors are further classified into non-fatal errors and fatal errors.
+Non-fatal errors cause the particular transaction to be unreliable,
+but the PCI Express link itself is fully functional. Fatal errors, on
+the other hand, cause the link to be unreliable.
+
+When AER is enabled, a PCI Express device will automatically send an
+error message to the PCIe root port above it when the device captures
+an error. The Root Port, upon receiving an error reporting message,
+internally processes and logs the error message in its PCI Express
+capability structure. Error information being logged includes storing
+the error reporting agent's requestor ID into the Error Source
+Identification Registers and setting the error bits of the Root Error
+Status Register accordingly. If AER error reporting is enabled in Root
+Error Command Register, the Root Port generates an interrupt if an
+error is detected.
+
+Note that the errors as described above are related to the PCI Express
+hierarchy and links. These errors do not include any device specific
+errors because device specific errors will still get sent directly to
+the device driver.
+
+Configure the AER capability structure
+--------------------------------------
+
+AER aware drivers of PCI Express component need change the device
+control registers to enable AER. They also could change AER registers,
+including mask and severity registers. Helper function
+pci_enable_pcie_error_reporting could be used to enable AER. See
+section 3.3.
+
+Provide callbacks
+-----------------
+
+callback reset_link to reset pci express link
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This callback is used to reset the pci express physical link when a
+fatal error happens. The root port aer service driver provides a
+default reset_link function, but different upstream ports might
+have different specifications to reset pci express link, so all
+upstream ports should provide their own reset_link functions.
+
+In struct pcie_port_service_driver, a new pointer, reset_link, is
+added.
+::
+
+	pci_ers_result_t (*reset_link) (struct pci_dev *dev);
+
+Section 3.2.2.2 provides more detailed info on when to call
+reset_link.
+
+PCI error-recovery callbacks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The PCI Express AER Root driver uses error callbacks to coordinate
+with downstream device drivers associated with a hierarchy in question
+when performing error recovery actions.
+
+Data struct pci_driver has a pointer, err_handler, to point to
+pci_error_handlers who consists of a couple of callback function
+pointers. AER driver follows the rules defined in
+pci-error-recovery.txt except pci express specific parts (e.g.
+reset_link). Pls. refer to pci-error-recovery.txt for detailed
+definitions of the callbacks.
+
+Below sections specify when to call the error callback functions.
+
+Correctable errors
+~~~~~~~~~~~~~~~~~~
+
+Correctable errors pose no impacts on the functionality of
+the interface. The PCI Express protocol can recover without any
+software intervention or any loss of data. These errors do not
+require any recovery actions. The AER driver clears the device's
+correctable error status register accordingly and logs these errors.
+
+Non-correctable (non-fatal and fatal) errors
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If an error message indicates a non-fatal error, performing link reset
+at upstream is not required. The AER driver calls error_detected(dev,
+pci_channel_io_normal) to all drivers associated within a hierarchy in
+question. for example::
+
+  EndPoint<==>DownstreamPort B<==>UpstreamPort A<==>RootPort
+
+If Upstream port A captures an AER error, the hierarchy consists of
+Downstream port B and EndPoint.
+
+A driver may return PCI_ERS_RESULT_CAN_RECOVER,
+PCI_ERS_RESULT_DISCONNECT, or PCI_ERS_RESULT_NEED_RESET, depending on
+whether it can recover or the AER driver calls mmio_enabled as next.
+
+If an error message indicates a fatal error, kernel will broadcast
+error_detected(dev, pci_channel_io_frozen) to all drivers within
+a hierarchy in question. Then, performing link reset at upstream is
+necessary. As different kinds of devices might use different approaches
+to reset link, AER port service driver is required to provide the
+function to reset link. Firstly, kernel looks for if the upstream
+component has an aer driver. If it has, kernel uses the reset_link
+callback of the aer driver. If the upstream component has no aer driver
+and the port is downstream port, we will perform a hot reset as the
+default by setting the Secondary Bus Reset bit of the Bridge Control
+register associated with the downstream port. As for upstream ports,
+they should provide their own aer service drivers with reset_link
+function. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER and
+reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes
+to mmio_enabled.
+
+helper functions
+----------------
+::
+
+  int pci_enable_pcie_error_reporting(struct pci_dev *dev);
+
+pci_enable_pcie_error_reporting enables the device to send error
+messages to root port when an error is detected. Note that devices
+don't enable the error reporting by default, so device drivers need
+call this function to enable it.
+
+::
+
+  int pci_disable_pcie_error_reporting(struct pci_dev *dev);
+
+pci_disable_pcie_error_reporting disables the device to send error
+messages to root port when an error is detected.
+
+::
+
+  int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);`
+
+pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
+error status register.
+
+Frequent Asked Questions
+------------------------
+
+Q:
+  What happens if a PCI Express device driver does not provide an
+  error recovery handler (pci_driver->err_handler is equal to NULL)?
+
+A:
+  The devices attached with the driver won't be recovered. If the
+  error is fatal, kernel will print out warning messages. Please refer
+  to section 3 for more information.
+
+Q:
+  What happens if an upstream port service driver does not provide
+  callback reset_link?
+
+A:
+  Fatal error recovery will fail if the errors are reported by the
+  upstream ports who are attached by the service driver.
+
+Q:
+  How does this infrastructure deal with driver that is not PCI
+  Express aware?
+
+A:
+  This infrastructure calls the error callback functions of the
+  driver when an error happens. But if the driver is not aware of
+  PCI Express, the device might not report its own errors to root
+  port.
+
+Q:
+  What modifications will that driver need to make it compatible
+  with the PCI Express AER Root driver?
+
+A:
+  It could call the helper functions to enable AER in devices and
+  cleanup uncorrectable status register. Pls. refer to section 3.3.
+
+
+Software error injection
+========================
+
+Debugging PCIe AER error recovery code is quite difficult because it
+is hard to trigger real hardware errors. Software based error
+injection can be used to fake various kinds of PCIe errors.
+
+First you should enable PCIe AER software error injection in kernel
+configuration, that is, following item should be in your .config.
+
+CONFIG_PCIEAER_INJECT=y or CONFIG_PCIEAER_INJECT=m
+
+After reboot with new kernel or insert the module, a device file named
+/dev/aer_inject should be created.
+
+Then, you need a user space tool named aer-inject, which can be gotten
+from:
+
+    https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/
+
+More information about aer-inject can be found in the document comes
+with its source code.
diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt
deleted file mode 100644
index 48ce7903e3c6..000000000000
--- a/Documentation/PCI/pcieaer-howto.txt
+++ /dev/null
@@ -1,267 +0,0 @@
-   The PCI Express Advanced Error Reporting Driver Guide HOWTO
-		T. Long Nguyen	<tom.l.nguyen@intel.com>
-		Yanmin Zhang	<yanmin.zhang@intel.com>
-				07/29/2006
-
-
-1. Overview
-
-1.1 About this guide
-
-This guide describes the basics of the PCI Express Advanced Error
-Reporting (AER) driver and provides information on how to use it, as
-well as how to enable the drivers of endpoint devices to conform with
-PCI Express AER driver.
-
-1.2 Copyright (C) Intel Corporation 2006.
-
-1.3 What is the PCI Express AER Driver?
-
-PCI Express error signaling can occur on the PCI Express link itself
-or on behalf of transactions initiated on the link. PCI Express
-defines two error reporting paradigms: the baseline capability and
-the Advanced Error Reporting capability. The baseline capability is
-required of all PCI Express components providing a minimum defined
-set of error reporting requirements. Advanced Error Reporting
-capability is implemented with a PCI Express advanced error reporting
-extended capability structure providing more robust error reporting.
-
-The PCI Express AER driver provides the infrastructure to support PCI
-Express Advanced Error Reporting capability. The PCI Express AER
-driver provides three basic functions:
-
--	Gathers the comprehensive error information if errors occurred.
--	Reports error to the users.
--	Performs error recovery actions.
-
-AER driver only attaches root ports which support PCI-Express AER
-capability.
-
-
-2. User Guide
-
-2.1 Include the PCI Express AER Root Driver into the Linux Kernel
-
-The PCI Express AER Root driver is a Root Port service driver attached
-to the PCI Express Port Bus driver. If a user wants to use it, the driver
-has to be compiled. Option CONFIG_PCIEAER supports this capability. It
-depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and
-CONFIG_PCIEAER = y.
-
-2.2 Load PCI Express AER Root Driver
-
-Some systems have AER support in firmware. Enabling Linux AER support at
-the same time the firmware handles AER may result in unpredictable
-behavior. Therefore, Linux does not handle AER events unless the firmware
-grants AER control to the OS via the ACPI _OSC method. See the PCI FW 3.0
-Specification for details regarding _OSC usage.
-
-2.3 AER error output
-
-When a PCIe AER error is captured, an error message will be output to
-console. If it's a correctable error, it is output as a warning.
-Otherwise, it is printed as an error. So users could choose different
-log level to filter out correctable error messages.
-
-Below shows an example:
-0000:50:00.0: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, id=0500(Requester ID)
-0000:50:00.0:   device [8086:0329] error status/mask=00100000/00000000
-0000:50:00.0:    [20] Unsupported Request    (First)
-0000:50:00.0:   TLP Header: 04000001 00200a03 05010000 00050100
-
-In the example, 'Requester ID' means the ID of the device who sends
-the error message to root port. Pls. refer to pci express specs for
-other fields.
-
-2.4 AER Statistics / Counters
-
-When PCIe AER errors are captured, the counters / statistics are also exposed
-in the form of sysfs attributes which are documented at
-Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
-
-3. Developer Guide
-
-To enable AER aware support requires a software driver to configure
-the AER capability structure within its device and to provide callbacks.
-
-To support AER better, developers need understand how AER does work
-firstly.
-
-PCI Express errors are classified into two types: correctable errors
-and uncorrectable errors. This classification is based on the impacts
-of those errors, which may result in degraded performance or function
-failure.
-
-Correctable errors pose no impacts on the functionality of the
-interface. The PCI Express protocol can recover without any software
-intervention or any loss of data. These errors are detected and
-corrected by hardware. Unlike correctable errors, uncorrectable
-errors impact functionality of the interface. Uncorrectable errors
-can cause a particular transaction or a particular PCI Express link
-to be unreliable. Depending on those error conditions, uncorrectable
-errors are further classified into non-fatal errors and fatal errors.
-Non-fatal errors cause the particular transaction to be unreliable,
-but the PCI Express link itself is fully functional. Fatal errors, on
-the other hand, cause the link to be unreliable.
-
-When AER is enabled, a PCI Express device will automatically send an
-error message to the PCIe root port above it when the device captures
-an error. The Root Port, upon receiving an error reporting message,
-internally processes and logs the error message in its PCI Express
-capability structure. Error information being logged includes storing
-the error reporting agent's requestor ID into the Error Source
-Identification Registers and setting the error bits of the Root Error
-Status Register accordingly. If AER error reporting is enabled in Root
-Error Command Register, the Root Port generates an interrupt if an
-error is detected.
-
-Note that the errors as described above are related to the PCI Express
-hierarchy and links. These errors do not include any device specific
-errors because device specific errors will still get sent directly to
-the device driver.
-
-3.1 Configure the AER capability structure
-
-AER aware drivers of PCI Express component need change the device
-control registers to enable AER. They also could change AER registers,
-including mask and severity registers. Helper function
-pci_enable_pcie_error_reporting could be used to enable AER. See
-section 3.3.
-
-3.2. Provide callbacks
-
-3.2.1 callback reset_link to reset pci express link
-
-This callback is used to reset the pci express physical link when a
-fatal error happens. The root port aer service driver provides a
-default reset_link function, but different upstream ports might
-have different specifications to reset pci express link, so all
-upstream ports should provide their own reset_link functions.
-
-In struct pcie_port_service_driver, a new pointer, reset_link, is
-added.
-
-pci_ers_result_t (*reset_link) (struct pci_dev *dev);
-
-Section 3.2.2.2 provides more detailed info on when to call
-reset_link.
-
-3.2.2 PCI error-recovery callbacks
-
-The PCI Express AER Root driver uses error callbacks to coordinate
-with downstream device drivers associated with a hierarchy in question
-when performing error recovery actions.
-
-Data struct pci_driver has a pointer, err_handler, to point to
-pci_error_handlers who consists of a couple of callback function
-pointers. AER driver follows the rules defined in
-pci-error-recovery.txt except pci express specific parts (e.g.
-reset_link). Pls. refer to pci-error-recovery.txt for detailed
-definitions of the callbacks.
-
-Below sections specify when to call the error callback functions.
-
-3.2.2.1 Correctable errors
-
-Correctable errors pose no impacts on the functionality of
-the interface. The PCI Express protocol can recover without any
-software intervention or any loss of data. These errors do not
-require any recovery actions. The AER driver clears the device's
-correctable error status register accordingly and logs these errors.
-
-3.2.2.2 Non-correctable (non-fatal and fatal) errors
-
-If an error message indicates a non-fatal error, performing link reset
-at upstream is not required. The AER driver calls error_detected(dev,
-pci_channel_io_normal) to all drivers associated within a hierarchy in
-question. for example,
-EndPoint<==>DownstreamPort B<==>UpstreamPort A<==>RootPort.
-If Upstream port A captures an AER error, the hierarchy consists of
-Downstream port B and EndPoint.
-
-A driver may return PCI_ERS_RESULT_CAN_RECOVER,
-PCI_ERS_RESULT_DISCONNECT, or PCI_ERS_RESULT_NEED_RESET, depending on
-whether it can recover or the AER driver calls mmio_enabled as next.
-
-If an error message indicates a fatal error, kernel will broadcast
-error_detected(dev, pci_channel_io_frozen) to all drivers within
-a hierarchy in question. Then, performing link reset at upstream is
-necessary. As different kinds of devices might use different approaches
-to reset link, AER port service driver is required to provide the
-function to reset link. Firstly, kernel looks for if the upstream
-component has an aer driver. If it has, kernel uses the reset_link
-callback of the aer driver. If the upstream component has no aer driver
-and the port is downstream port, we will perform a hot reset as the
-default by setting the Secondary Bus Reset bit of the Bridge Control
-register associated with the downstream port. As for upstream ports,
-they should provide their own aer service drivers with reset_link
-function. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER and
-reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes
-to mmio_enabled.
-
-3.3 helper functions
-
-3.3.1 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
-pci_enable_pcie_error_reporting enables the device to send error
-messages to root port when an error is detected. Note that devices
-don't enable the error reporting by default, so device drivers need
-call this function to enable it.
-
-3.3.2 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
-pci_disable_pcie_error_reporting disables the device to send error
-messages to root port when an error is detected.
-
-3.3.3 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
-pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
-error status register.
-
-3.4 Frequent Asked Questions
-
-Q: What happens if a PCI Express device driver does not provide an
-error recovery handler (pci_driver->err_handler is equal to NULL)?
-
-A: The devices attached with the driver won't be recovered. If the
-error is fatal, kernel will print out warning messages. Please refer
-to section 3 for more information.
-
-Q: What happens if an upstream port service driver does not provide
-callback reset_link?
-
-A: Fatal error recovery will fail if the errors are reported by the
-upstream ports who are attached by the service driver.
-
-Q: How does this infrastructure deal with driver that is not PCI
-Express aware?
-
-A: This infrastructure calls the error callback functions of the
-driver when an error happens. But if the driver is not aware of
-PCI Express, the device might not report its own errors to root
-port.
-
-Q: What modifications will that driver need to make it compatible
-with the PCI Express AER Root driver?
-
-A: It could call the helper functions to enable AER in devices and
-cleanup uncorrectable status register. Pls. refer to section 3.3.
-
-
-4. Software error injection
-
-Debugging PCIe AER error recovery code is quite difficult because it
-is hard to trigger real hardware errors. Software based error
-injection can be used to fake various kinds of PCIe errors.
-
-First you should enable PCIe AER software error injection in kernel
-configuration, that is, following item should be in your .config.
-
-CONFIG_PCIEAER_INJECT=y or CONFIG_PCIEAER_INJECT=m
-
-After reboot with new kernel or insert the module, a device file named
-/dev/aer_inject should be created.
-
-Then, you need a user space tool named aer-inject, which can be gotten
-from:
-    https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/
-
-More information about aer-inject can be found in the document comes
-with its source code.
-- 
cgit 


From d8946fc38517755a2d9535a04c3f6a8d3a331eee Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 14 May 2019 22:47:31 +0800
Subject: Documentation: PCI: convert endpoint/pci-endpoint.txt to reST

Convert plain text documentation to reStructuredText format and add it to
Sphinx TOC tree.  No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/PCI/endpoint/index.rst        |  10 ++
 Documentation/PCI/endpoint/pci-endpoint.rst | 231 ++++++++++++++++++++++++++++
 Documentation/PCI/endpoint/pci-endpoint.txt | 215 --------------------------
 Documentation/PCI/index.rst                 |   1 +
 4 files changed, 242 insertions(+), 215 deletions(-)
 create mode 100644 Documentation/PCI/endpoint/index.rst
 create mode 100644 Documentation/PCI/endpoint/pci-endpoint.rst
 delete mode 100644 Documentation/PCI/endpoint/pci-endpoint.txt

(limited to 'Documentation')

diff --git a/Documentation/PCI/endpoint/index.rst b/Documentation/PCI/endpoint/index.rst
new file mode 100644
index 000000000000..0db4f2fcd7f0
--- /dev/null
+++ b/Documentation/PCI/endpoint/index.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+PCI Endpoint Framework
+======================
+
+.. toctree::
+   :maxdepth: 2
+
+   pci-endpoint
diff --git a/Documentation/PCI/endpoint/pci-endpoint.rst b/Documentation/PCI/endpoint/pci-endpoint.rst
new file mode 100644
index 000000000000..0e2311b5617b
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-endpoint.rst
@@ -0,0 +1,231 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
+
+This document is a guide to use the PCI Endpoint Framework in order to create
+endpoint controller driver, endpoint function driver, and using configfs
+interface to bind the function driver to the controller driver.
+
+Introduction
+============
+
+Linux has a comprehensive PCI subsystem to support PCI controllers that
+operates in Root Complex mode. The subsystem has capability to scan PCI bus,
+assign memory resources and IRQ resources, load PCI driver (based on
+vendor ID, device ID), support other services like hot-plug, power management,
+advanced error reporting and virtual channels.
+
+However the PCI controller IP integrated in some SoCs is capable of operating
+either in Root Complex mode or Endpoint mode. PCI Endpoint Framework will
+add endpoint mode support in Linux. This will help to run Linux in an
+EP system which can have a wide variety of use cases from testing or
+validation, co-processor accelerator, etc.
+
+PCI Endpoint Core
+=================
+
+The PCI Endpoint Core layer comprises 3 components: the Endpoint Controller
+library, the Endpoint Function library, and the configfs layer to bind the
+endpoint function with the endpoint controller.
+
+PCI Endpoint Controller(EPC) Library
+------------------------------------
+
+The EPC library provides APIs to be used by the controller that can operate
+in endpoint mode. It also provides APIs to be used by function driver/library
+in order to implement a particular endpoint function.
+
+APIs for the PCI controller Driver
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This section lists the APIs that the PCI Endpoint core provides to be used
+by the PCI controller driver.
+
+* devm_pci_epc_create()/pci_epc_create()
+
+   The PCI controller driver should implement the following ops:
+
+	 * write_header: ops to populate configuration space header
+	 * set_bar: ops to configure the BAR
+	 * clear_bar: ops to reset the BAR
+	 * alloc_addr_space: ops to allocate in PCI controller address space
+	 * free_addr_space: ops to free the allocated address space
+	 * raise_irq: ops to raise a legacy, MSI or MSI-X interrupt
+	 * start: ops to start the PCI link
+	 * stop: ops to stop the PCI link
+
+   The PCI controller driver can then create a new EPC device by invoking
+   devm_pci_epc_create()/pci_epc_create().
+
+* devm_pci_epc_destroy()/pci_epc_destroy()
+
+   The PCI controller driver can destroy the EPC device created by either
+   devm_pci_epc_create() or pci_epc_create() using devm_pci_epc_destroy() or
+   pci_epc_destroy().
+
+* pci_epc_linkup()
+
+   In order to notify all the function devices that the EPC device to which
+   they are linked has established a link with the host, the PCI controller
+   driver should invoke pci_epc_linkup().
+
+* pci_epc_mem_init()
+
+   Initialize the pci_epc_mem structure used for allocating EPC addr space.
+
+* pci_epc_mem_exit()
+
+   Cleanup the pci_epc_mem structure allocated during pci_epc_mem_init().
+
+
+APIs for the PCI Endpoint Function Driver
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This section lists the APIs that the PCI Endpoint core provides to be used
+by the PCI endpoint function driver.
+
+* pci_epc_write_header()
+
+   The PCI endpoint function driver should use pci_epc_write_header() to
+   write the standard configuration header to the endpoint controller.
+
+* pci_epc_set_bar()
+
+   The PCI endpoint function driver should use pci_epc_set_bar() to configure
+   the Base Address Register in order for the host to assign PCI addr space.
+   Register space of the function driver is usually configured
+   using this API.
+
+* pci_epc_clear_bar()
+
+   The PCI endpoint function driver should use pci_epc_clear_bar() to reset
+   the BAR.
+
+* pci_epc_raise_irq()
+
+   The PCI endpoint function driver should use pci_epc_raise_irq() to raise
+   Legacy Interrupt, MSI or MSI-X Interrupt.
+
+* pci_epc_mem_alloc_addr()
+
+   The PCI endpoint function driver should use pci_epc_mem_alloc_addr(), to
+   allocate memory address from EPC addr space which is required to access
+   RC's buffer
+
+* pci_epc_mem_free_addr()
+
+   The PCI endpoint function driver should use pci_epc_mem_free_addr() to
+   free the memory space allocated using pci_epc_mem_alloc_addr().
+
+Other APIs
+~~~~~~~~~~
+
+There are other APIs provided by the EPC library. These are used for binding
+the EPF device with EPC device. pci-ep-cfs.c can be used as reference for
+using these APIs.
+
+* pci_epc_get()
+
+   Get a reference to the PCI endpoint controller based on the device name of
+   the controller.
+
+* pci_epc_put()
+
+   Release the reference to the PCI endpoint controller obtained using
+   pci_epc_get()
+
+* pci_epc_add_epf()
+
+   Add a PCI endpoint function to a PCI endpoint controller. A PCIe device
+   can have up to 8 functions according to the specification.
+
+* pci_epc_remove_epf()
+
+   Remove the PCI endpoint function from PCI endpoint controller.
+
+* pci_epc_start()
+
+   The PCI endpoint function driver should invoke pci_epc_start() once it
+   has configured the endpoint function and wants to start the PCI link.
+
+* pci_epc_stop()
+
+   The PCI endpoint function driver should invoke pci_epc_stop() to stop
+   the PCI LINK.
+
+
+PCI Endpoint Function(EPF) Library
+----------------------------------
+
+The EPF library provides APIs to be used by the function driver and the EPC
+library to provide endpoint mode functionality.
+
+APIs for the PCI Endpoint Function Driver
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This section lists the APIs that the PCI Endpoint core provides to be used
+by the PCI endpoint function driver.
+
+* pci_epf_register_driver()
+
+   The PCI Endpoint Function driver should implement the following ops:
+	 * bind: ops to perform when a EPC device has been bound to EPF device
+	 * unbind: ops to perform when a binding has been lost between a EPC
+	   device and EPF device
+	 * linkup: ops to perform when the EPC device has established a
+	   connection with a host system
+
+  The PCI Function driver can then register the PCI EPF driver by using
+  pci_epf_register_driver().
+
+* pci_epf_unregister_driver()
+
+  The PCI Function driver can unregister the PCI EPF driver by using
+  pci_epf_unregister_driver().
+
+* pci_epf_alloc_space()
+
+  The PCI Function driver can allocate space for a particular BAR using
+  pci_epf_alloc_space().
+
+* pci_epf_free_space()
+
+  The PCI Function driver can free the allocated space
+  (using pci_epf_alloc_space) by invoking pci_epf_free_space().
+
+APIs for the PCI Endpoint Controller Library
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This section lists the APIs that the PCI Endpoint core provides to be used
+by the PCI endpoint controller library.
+
+* pci_epf_linkup()
+
+   The PCI endpoint controller library invokes pci_epf_linkup() when the
+   EPC device has established the connection to the host.
+
+Other APIs
+~~~~~~~~~~
+
+There are other APIs provided by the EPF library. These are used to notify
+the function driver when the EPF device is bound to the EPC device.
+pci-ep-cfs.c can be used as reference for using these APIs.
+
+* pci_epf_create()
+
+   Create a new PCI EPF device by passing the name of the PCI EPF device.
+   This name will be used to bind the the EPF device to a EPF driver.
+
+* pci_epf_destroy()
+
+   Destroy the created PCI EPF device.
+
+* pci_epf_bind()
+
+   pci_epf_bind() should be invoked when the EPF device has been bound to
+   a EPC device.
+
+* pci_epf_unbind()
+
+   pci_epf_unbind() should be invoked when the binding between EPC device
+   and EPF device is lost.
diff --git a/Documentation/PCI/endpoint/pci-endpoint.txt b/Documentation/PCI/endpoint/pci-endpoint.txt
deleted file mode 100644
index e86a96b66a6a..000000000000
--- a/Documentation/PCI/endpoint/pci-endpoint.txt
+++ /dev/null
@@ -1,215 +0,0 @@
-			    PCI ENDPOINT FRAMEWORK
-		    Kishon Vijay Abraham I <kishon@ti.com>
-
-This document is a guide to use the PCI Endpoint Framework in order to create
-endpoint controller driver, endpoint function driver, and using configfs
-interface to bind the function driver to the controller driver.
-
-1. Introduction
-
-Linux has a comprehensive PCI subsystem to support PCI controllers that
-operates in Root Complex mode. The subsystem has capability to scan PCI bus,
-assign memory resources and IRQ resources, load PCI driver (based on
-vendor ID, device ID), support other services like hot-plug, power management,
-advanced error reporting and virtual channels.
-
-However the PCI controller IP integrated in some SoCs is capable of operating
-either in Root Complex mode or Endpoint mode. PCI Endpoint Framework will
-add endpoint mode support in Linux. This will help to run Linux in an
-EP system which can have a wide variety of use cases from testing or
-validation, co-processor accelerator, etc.
-
-2. PCI Endpoint Core
-
-The PCI Endpoint Core layer comprises 3 components: the Endpoint Controller
-library, the Endpoint Function library, and the configfs layer to bind the
-endpoint function with the endpoint controller.
-
-2.1 PCI Endpoint Controller(EPC) Library
-
-The EPC library provides APIs to be used by the controller that can operate
-in endpoint mode. It also provides APIs to be used by function driver/library
-in order to implement a particular endpoint function.
-
-2.1.1 APIs for the PCI controller Driver
-
-This section lists the APIs that the PCI Endpoint core provides to be used
-by the PCI controller driver.
-
-*) devm_pci_epc_create()/pci_epc_create()
-
-   The PCI controller driver should implement the following ops:
-	 * write_header: ops to populate configuration space header
-	 * set_bar: ops to configure the BAR
-	 * clear_bar: ops to reset the BAR
-	 * alloc_addr_space: ops to allocate in PCI controller address space
-	 * free_addr_space: ops to free the allocated address space
-	 * raise_irq: ops to raise a legacy, MSI or MSI-X interrupt
-	 * start: ops to start the PCI link
-	 * stop: ops to stop the PCI link
-
-   The PCI controller driver can then create a new EPC device by invoking
-   devm_pci_epc_create()/pci_epc_create().
-
-*) devm_pci_epc_destroy()/pci_epc_destroy()
-
-   The PCI controller driver can destroy the EPC device created by either
-   devm_pci_epc_create() or pci_epc_create() using devm_pci_epc_destroy() or
-   pci_epc_destroy().
-
-*) pci_epc_linkup()
-
-   In order to notify all the function devices that the EPC device to which
-   they are linked has established a link with the host, the PCI controller
-   driver should invoke pci_epc_linkup().
-
-*) pci_epc_mem_init()
-
-   Initialize the pci_epc_mem structure used for allocating EPC addr space.
-
-*) pci_epc_mem_exit()
-
-   Cleanup the pci_epc_mem structure allocated during pci_epc_mem_init().
-
-2.1.2 APIs for the PCI Endpoint Function Driver
-
-This section lists the APIs that the PCI Endpoint core provides to be used
-by the PCI endpoint function driver.
-
-*) pci_epc_write_header()
-
-   The PCI endpoint function driver should use pci_epc_write_header() to
-   write the standard configuration header to the endpoint controller.
-
-*) pci_epc_set_bar()
-
-   The PCI endpoint function driver should use pci_epc_set_bar() to configure
-   the Base Address Register in order for the host to assign PCI addr space.
-   Register space of the function driver is usually configured
-   using this API.
-
-*) pci_epc_clear_bar()
-
-   The PCI endpoint function driver should use pci_epc_clear_bar() to reset
-   the BAR.
-
-*) pci_epc_raise_irq()
-
-   The PCI endpoint function driver should use pci_epc_raise_irq() to raise
-   Legacy Interrupt, MSI or MSI-X Interrupt.
-
-*) pci_epc_mem_alloc_addr()
-
-   The PCI endpoint function driver should use pci_epc_mem_alloc_addr(), to
-   allocate memory address from EPC addr space which is required to access
-   RC's buffer
-
-*) pci_epc_mem_free_addr()
-
-   The PCI endpoint function driver should use pci_epc_mem_free_addr() to
-   free the memory space allocated using pci_epc_mem_alloc_addr().
-
-2.1.3 Other APIs
-
-There are other APIs provided by the EPC library. These are used for binding
-the EPF device with EPC device. pci-ep-cfs.c can be used as reference for
-using these APIs.
-
-*) pci_epc_get()
-
-   Get a reference to the PCI endpoint controller based on the device name of
-   the controller.
-
-*) pci_epc_put()
-
-   Release the reference to the PCI endpoint controller obtained using
-   pci_epc_get()
-
-*) pci_epc_add_epf()
-
-   Add a PCI endpoint function to a PCI endpoint controller. A PCIe device
-   can have up to 8 functions according to the specification.
-
-*) pci_epc_remove_epf()
-
-   Remove the PCI endpoint function from PCI endpoint controller.
-
-*) pci_epc_start()
-
-   The PCI endpoint function driver should invoke pci_epc_start() once it
-   has configured the endpoint function and wants to start the PCI link.
-
-*) pci_epc_stop()
-
-   The PCI endpoint function driver should invoke pci_epc_stop() to stop
-   the PCI LINK.
-
-2.2 PCI Endpoint Function(EPF) Library
-
-The EPF library provides APIs to be used by the function driver and the EPC
-library to provide endpoint mode functionality.
-
-2.2.1 APIs for the PCI Endpoint Function Driver
-
-This section lists the APIs that the PCI Endpoint core provides to be used
-by the PCI endpoint function driver.
-
-*) pci_epf_register_driver()
-
-   The PCI Endpoint Function driver should implement the following ops:
-	 * bind: ops to perform when a EPC device has been bound to EPF device
-	 * unbind: ops to perform when a binding has been lost between a EPC
-	   device and EPF device
-	 * linkup: ops to perform when the EPC device has established a
-	   connection with a host system
-
-  The PCI Function driver can then register the PCI EPF driver by using
-  pci_epf_register_driver().
-
-*) pci_epf_unregister_driver()
-
-  The PCI Function driver can unregister the PCI EPF driver by using
-  pci_epf_unregister_driver().
-
-*) pci_epf_alloc_space()
-
-  The PCI Function driver can allocate space for a particular BAR using
-  pci_epf_alloc_space().
-
-*) pci_epf_free_space()
-
-  The PCI Function driver can free the allocated space
-  (using pci_epf_alloc_space) by invoking pci_epf_free_space().
-
-2.2.2 APIs for the PCI Endpoint Controller Library
-This section lists the APIs that the PCI Endpoint core provides to be used
-by the PCI endpoint controller library.
-
-*) pci_epf_linkup()
-
-   The PCI endpoint controller library invokes pci_epf_linkup() when the
-   EPC device has established the connection to the host.
-
-2.2.2 Other APIs
-There are other APIs provided by the EPF library. These are used to notify
-the function driver when the EPF device is bound to the EPC device.
-pci-ep-cfs.c can be used as reference for using these APIs.
-
-*) pci_epf_create()
-
-   Create a new PCI EPF device by passing the name of the PCI EPF device.
-   This name will be used to bind the the EPF device to a EPF driver.
-
-*) pci_epf_destroy()
-
-   Destroy the created PCI EPF device.
-
-*) pci_epf_bind()
-
-   pci_epf_bind() should be invoked when the EPF device has been bound to
-   a EPC device.
-
-*) pci_epf_unbind()
-
-   pci_epf_unbind() should be invoked when the binding between EPC device
-   and EPF device is lost.
diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst
index f54b65b1ca5f..f4c6121868c3 100644
--- a/Documentation/PCI/index.rst
+++ b/Documentation/PCI/index.rst
@@ -15,3 +15,4 @@ Linux PCI Bus Subsystem
    acpi-info
    pci-error-recovery
    pcieaer-howto
+   endpoint/index
-- 
cgit 


From d4518e4ac64cae18f953f8a433359ea1face4b52 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 14 May 2019 22:47:32 +0800
Subject: Documentation: PCI: convert endpoint/pci-endpoint-cfs.txt to reST

Convert plain text documentation to reStructuredText format and add it to
Sphinx TOC tree.  No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/PCI/endpoint/index.rst            |   1 +
 Documentation/PCI/endpoint/pci-endpoint-cfs.rst | 118 ++++++++++++++++++++++++
 Documentation/PCI/endpoint/pci-endpoint-cfs.txt | 105 ---------------------
 3 files changed, 119 insertions(+), 105 deletions(-)
 create mode 100644 Documentation/PCI/endpoint/pci-endpoint-cfs.rst
 delete mode 100644 Documentation/PCI/endpoint/pci-endpoint-cfs.txt

(limited to 'Documentation')

diff --git a/Documentation/PCI/endpoint/index.rst b/Documentation/PCI/endpoint/index.rst
index 0db4f2fcd7f0..3951de9f923c 100644
--- a/Documentation/PCI/endpoint/index.rst
+++ b/Documentation/PCI/endpoint/index.rst
@@ -8,3 +8,4 @@ PCI Endpoint Framework
    :maxdepth: 2
 
    pci-endpoint
+   pci-endpoint-cfs
diff --git a/Documentation/PCI/endpoint/pci-endpoint-cfs.rst b/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
new file mode 100644
index 000000000000..b6d39cdec56e
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
@@ -0,0 +1,118 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================
+Configuring PCI Endpoint Using CONFIGFS
+=======================================
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
+
+The PCI Endpoint Core exposes configfs entry (pci_ep) to configure the
+PCI endpoint function and to bind the endpoint function
+with the endpoint controller. (For introducing other mechanisms to
+configure the PCI Endpoint Function refer to [1]).
+
+Mounting configfs
+=================
+
+The PCI Endpoint Core layer creates pci_ep directory in the mounted configfs
+directory. configfs can be mounted using the following command::
+
+	mount -t configfs none /sys/kernel/config
+
+Directory Structure
+===================
+
+The pci_ep configfs has two directories at its root: controllers and
+functions. Every EPC device present in the system will have an entry in
+the *controllers* directory and and every EPF driver present in the system
+will have an entry in the *functions* directory.
+::
+
+	/sys/kernel/config/pci_ep/
+		.. controllers/
+		.. functions/
+
+Creating EPF Device
+===================
+
+Every registered EPF driver will be listed in controllers directory. The
+entries corresponding to EPF driver will be created by the EPF core.
+::
+
+	/sys/kernel/config/pci_ep/functions/
+		.. <EPF Driver1>/
+			... <EPF Device 11>/
+			... <EPF Device 21>/
+		.. <EPF Driver2>/
+			... <EPF Device 12>/
+			... <EPF Device 22>/
+
+In order to create a <EPF device> of the type probed by <EPF Driver>, the
+user has to create a directory inside <EPF DriverN>.
+
+Every <EPF device> directory consists of the following entries that can be
+used to configure the standard configuration header of the endpoint function.
+(These entries are created by the framework when any new <EPF Device> is
+created)
+::
+
+		.. <EPF Driver1>/
+			... <EPF Device 11>/
+				... vendorid
+				... deviceid
+				... revid
+				... progif_code
+				... subclass_code
+				... baseclass_code
+				... cache_line_size
+				... subsys_vendor_id
+				... subsys_id
+				... interrupt_pin
+
+EPC Device
+==========
+
+Every registered EPC device will be listed in controllers directory. The
+entries corresponding to EPC device will be created by the EPC core.
+::
+
+	/sys/kernel/config/pci_ep/controllers/
+		.. <EPC Device1>/
+			... <Symlink EPF Device11>/
+			... <Symlink EPF Device12>/
+			... start
+		.. <EPC Device2>/
+			... <Symlink EPF Device21>/
+			... <Symlink EPF Device22>/
+			... start
+
+The <EPC Device> directory will have a list of symbolic links to
+<EPF Device>. These symbolic links should be created by the user to
+represent the functions present in the endpoint device.
+
+The <EPC Device> directory will also have a *start* field. Once
+"1" is written to this field, the endpoint device will be ready to
+establish the link with the host. This is usually done after
+all the EPF devices are created and linked with the EPC device.
+::
+
+			 | controllers/
+				| <Directory: EPC name>/
+					| <Symbolic Link: Function>
+					| start
+			 | functions/
+				| <Directory: EPF driver>/
+					| <Directory: EPF device>/
+						| vendorid
+						| deviceid
+						| revid
+						| progif_code
+						| subclass_code
+						| baseclass_code
+						| cache_line_size
+						| subsys_vendor_id
+						| subsys_id
+						| interrupt_pin
+						| function
+
+[1] :doc:`pci-endpoint`
diff --git a/Documentation/PCI/endpoint/pci-endpoint-cfs.txt b/Documentation/PCI/endpoint/pci-endpoint-cfs.txt
deleted file mode 100644
index d740f29960a4..000000000000
--- a/Documentation/PCI/endpoint/pci-endpoint-cfs.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-                   CONFIGURING PCI ENDPOINT USING CONFIGFS
-                    Kishon Vijay Abraham I <kishon@ti.com>
-
-The PCI Endpoint Core exposes configfs entry (pci_ep) to configure the
-PCI endpoint function and to bind the endpoint function
-with the endpoint controller. (For introducing other mechanisms to
-configure the PCI Endpoint Function refer to [1]).
-
-*) Mounting configfs
-
-The PCI Endpoint Core layer creates pci_ep directory in the mounted configfs
-directory. configfs can be mounted using the following command.
-
-	mount -t configfs none /sys/kernel/config
-
-*) Directory Structure
-
-The pci_ep configfs has two directories at its root: controllers and
-functions. Every EPC device present in the system will have an entry in
-the *controllers* directory and and every EPF driver present in the system
-will have an entry in the *functions* directory.
-
-/sys/kernel/config/pci_ep/
-	.. controllers/
-	.. functions/
-
-*) Creating EPF Device
-
-Every registered EPF driver will be listed in controllers directory. The
-entries corresponding to EPF driver will be created by the EPF core.
-
-/sys/kernel/config/pci_ep/functions/
-	.. <EPF Driver1>/
-		... <EPF Device 11>/
-		... <EPF Device 21>/
-	.. <EPF Driver2>/
-		... <EPF Device 12>/
-		... <EPF Device 22>/
-
-In order to create a <EPF device> of the type probed by <EPF Driver>, the
-user has to create a directory inside <EPF DriverN>.
-
-Every <EPF device> directory consists of the following entries that can be
-used to configure the standard configuration header of the endpoint function.
-(These entries are created by the framework when any new <EPF Device> is
-created)
-
-	.. <EPF Driver1>/
-		... <EPF Device 11>/
-			... vendorid
-			... deviceid
-			... revid
-			... progif_code
-			... subclass_code
-			... baseclass_code
-			... cache_line_size
-			... subsys_vendor_id
-			... subsys_id
-			... interrupt_pin
-
-*) EPC Device
-
-Every registered EPC device will be listed in controllers directory. The
-entries corresponding to EPC device will be created by the EPC core.
-
-/sys/kernel/config/pci_ep/controllers/
-	.. <EPC Device1>/
-		... <Symlink EPF Device11>/
-		... <Symlink EPF Device12>/
-		... start
-	.. <EPC Device2>/
-		... <Symlink EPF Device21>/
-		... <Symlink EPF Device22>/
-		... start
-
-The <EPC Device> directory will have a list of symbolic links to
-<EPF Device>. These symbolic links should be created by the user to
-represent the functions present in the endpoint device.
-
-The <EPC Device> directory will also have a *start* field. Once
-"1" is written to this field, the endpoint device will be ready to
-establish the link with the host. This is usually done after
-all the EPF devices are created and linked with the EPC device.
-
-
-			 | controllers/
-				| <Directory: EPC name>/
-					| <Symbolic Link: Function>
-					| start
-			 | functions/
-				| <Directory: EPF driver>/
-					| <Directory: EPF device>/
-						| vendorid
-						| deviceid
-						| revid
-						| progif_code
-						| subclass_code
-						| baseclass_code
-						| cache_line_size
-						| subsys_vendor_id
-						| subsys_id
-						| interrupt_pin
-						| function
-
-[1] -> Documentation/PCI/endpoint/pci-endpoint.txt
-- 
cgit 


From bf2c2658d4b6baed13c274da7091428772b5cb03 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 14 May 2019 22:47:33 +0800
Subject: Documentation: PCI: convert endpoint/pci-test-function.txt to reST

Convert plain text documentation to reStructuredText format and add it to
Sphinx TOC tree.  No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/PCI/endpoint/index.rst             |   1 +
 Documentation/PCI/endpoint/pci-test-function.rst | 103 +++++++++++++++++++++++
 Documentation/PCI/endpoint/pci-test-function.txt |  87 -------------------
 3 files changed, 104 insertions(+), 87 deletions(-)
 create mode 100644 Documentation/PCI/endpoint/pci-test-function.rst
 delete mode 100644 Documentation/PCI/endpoint/pci-test-function.txt

(limited to 'Documentation')

diff --git a/Documentation/PCI/endpoint/index.rst b/Documentation/PCI/endpoint/index.rst
index 3951de9f923c..b680a3fc4fec 100644
--- a/Documentation/PCI/endpoint/index.rst
+++ b/Documentation/PCI/endpoint/index.rst
@@ -9,3 +9,4 @@ PCI Endpoint Framework
 
    pci-endpoint
    pci-endpoint-cfs
+   pci-test-function
diff --git a/Documentation/PCI/endpoint/pci-test-function.rst b/Documentation/PCI/endpoint/pci-test-function.rst
new file mode 100644
index 000000000000..3c8521d7aa31
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-test-function.rst
@@ -0,0 +1,103 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+PCI Test Function
+=================
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
+
+Traditionally PCI RC has always been validated by using standard
+PCI cards like ethernet PCI cards or USB PCI cards or SATA PCI cards.
+However with the addition of EP-core in linux kernel, it is possible
+to configure a PCI controller that can operate in EP mode to work as
+a test device.
+
+The PCI endpoint test device is a virtual device (defined in software)
+used to test the endpoint functionality and serve as a sample driver
+for other PCI endpoint devices (to use the EP framework).
+
+The PCI endpoint test device has the following registers:
+
+	1) PCI_ENDPOINT_TEST_MAGIC
+	2) PCI_ENDPOINT_TEST_COMMAND
+	3) PCI_ENDPOINT_TEST_STATUS
+	4) PCI_ENDPOINT_TEST_SRC_ADDR
+	5) PCI_ENDPOINT_TEST_DST_ADDR
+	6) PCI_ENDPOINT_TEST_SIZE
+	7) PCI_ENDPOINT_TEST_CHECKSUM
+	8) PCI_ENDPOINT_TEST_IRQ_TYPE
+	9) PCI_ENDPOINT_TEST_IRQ_NUMBER
+
+* PCI_ENDPOINT_TEST_MAGIC
+
+This register will be used to test BAR0. A known pattern will be written
+and read back from MAGIC register to verify BAR0.
+
+* PCI_ENDPOINT_TEST_COMMAND
+
+This register will be used by the host driver to indicate the function
+that the endpoint device must perform.
+
+========	================================================================
+Bitfield	Description
+========	================================================================
+Bit 0		raise legacy IRQ
+Bit 1		raise MSI IRQ
+Bit 2		raise MSI-X IRQ
+Bit 3		read command (read data from RC buffer)
+Bit 4		write command (write data to RC buffer)
+Bit 5		copy command (copy data from one RC buffer to another RC buffer)
+========	================================================================
+
+* PCI_ENDPOINT_TEST_STATUS
+
+This register reflects the status of the PCI endpoint device.
+
+========	==============================
+Bitfield	Description
+========	==============================
+Bit 0		read success
+Bit 1		read fail
+Bit 2		write success
+Bit 3		write fail
+Bit 4		copy success
+Bit 5		copy fail
+Bit 6		IRQ raised
+Bit 7		source address is invalid
+Bit 8		destination address is invalid
+========	==============================
+
+* PCI_ENDPOINT_TEST_SRC_ADDR
+
+This register contains the source address (RC buffer address) for the
+COPY/READ command.
+
+* PCI_ENDPOINT_TEST_DST_ADDR
+
+This register contains the destination address (RC buffer address) for
+the COPY/WRITE command.
+
+* PCI_ENDPOINT_TEST_IRQ_TYPE
+
+This register contains the interrupt type (Legacy/MSI) triggered
+for the READ/WRITE/COPY and raise IRQ (Legacy/MSI) commands.
+
+Possible types:
+
+======	==
+Legacy	0
+MSI	1
+MSI-X	2
+======	==
+
+* PCI_ENDPOINT_TEST_IRQ_NUMBER
+
+This register contains the triggered ID interrupt.
+
+Admissible values:
+
+======	===========
+Legacy	0
+MSI	[1 .. 32]
+MSI-X	[1 .. 2048]
+======	===========
diff --git a/Documentation/PCI/endpoint/pci-test-function.txt b/Documentation/PCI/endpoint/pci-test-function.txt
deleted file mode 100644
index 5916f1f592bb..000000000000
--- a/Documentation/PCI/endpoint/pci-test-function.txt
+++ /dev/null
@@ -1,87 +0,0 @@
-				PCI TEST
-		    Kishon Vijay Abraham I <kishon@ti.com>
-
-Traditionally PCI RC has always been validated by using standard
-PCI cards like ethernet PCI cards or USB PCI cards or SATA PCI cards.
-However with the addition of EP-core in linux kernel, it is possible
-to configure a PCI controller that can operate in EP mode to work as
-a test device.
-
-The PCI endpoint test device is a virtual device (defined in software)
-used to test the endpoint functionality and serve as a sample driver
-for other PCI endpoint devices (to use the EP framework).
-
-The PCI endpoint test device has the following registers:
-
-	1) PCI_ENDPOINT_TEST_MAGIC
-	2) PCI_ENDPOINT_TEST_COMMAND
-	3) PCI_ENDPOINT_TEST_STATUS
-	4) PCI_ENDPOINT_TEST_SRC_ADDR
-	5) PCI_ENDPOINT_TEST_DST_ADDR
-	6) PCI_ENDPOINT_TEST_SIZE
-	7) PCI_ENDPOINT_TEST_CHECKSUM
-	8) PCI_ENDPOINT_TEST_IRQ_TYPE
-	9) PCI_ENDPOINT_TEST_IRQ_NUMBER
-
-*) PCI_ENDPOINT_TEST_MAGIC
-
-This register will be used to test BAR0. A known pattern will be written
-and read back from MAGIC register to verify BAR0.
-
-*) PCI_ENDPOINT_TEST_COMMAND:
-
-This register will be used by the host driver to indicate the function
-that the endpoint device must perform.
-
-Bitfield Description:
-  Bit 0		: raise legacy IRQ
-  Bit 1		: raise MSI IRQ
-  Bit 2		: raise MSI-X IRQ
-  Bit 3		: read command (read data from RC buffer)
-  Bit 4		: write command (write data to RC buffer)
-  Bit 5		: copy command (copy data from one RC buffer to another
-		  RC buffer)
-
-*) PCI_ENDPOINT_TEST_STATUS
-
-This register reflects the status of the PCI endpoint device.
-
-Bitfield Description:
-  Bit 0		: read success
-  Bit 1		: read fail
-  Bit 2		: write success
-  Bit 3		: write fail
-  Bit 4		: copy success
-  Bit 5		: copy fail
-  Bit 6		: IRQ raised
-  Bit 7		: source address is invalid
-  Bit 8		: destination address is invalid
-
-*) PCI_ENDPOINT_TEST_SRC_ADDR
-
-This register contains the source address (RC buffer address) for the
-COPY/READ command.
-
-*) PCI_ENDPOINT_TEST_DST_ADDR
-
-This register contains the destination address (RC buffer address) for
-the COPY/WRITE command.
-
-*) PCI_ENDPOINT_TEST_IRQ_TYPE
-
-This register contains the interrupt type (Legacy/MSI) triggered
-for the READ/WRITE/COPY and raise IRQ (Legacy/MSI) commands.
-
-Possible types:
- - Legacy	: 0
- - MSI		: 1
- - MSI-X	: 2
-
-*) PCI_ENDPOINT_TEST_IRQ_NUMBER
-
-This register contains the triggered ID interrupt.
-
-Admissible values:
- - Legacy	: 0
- - MSI		: [1 .. 32]
- - MSI-X	: [1 .. 2048]
-- 
cgit 


From 9595aee2a389be5dfa9a0121a14e8fba70f17278 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 14 May 2019 22:47:34 +0800
Subject: Documentation: PCI: convert endpoint/pci-test-howto.txt to reST

Convert plain text documentation to reStructuredText format and add it to
Sphinx TOC tree.  No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/PCI/endpoint/index.rst          |   1 +
 Documentation/PCI/endpoint/pci-test-howto.rst | 235 ++++++++++++++++++++++++++
 Documentation/PCI/endpoint/pci-test-howto.txt | 206 ----------------------
 3 files changed, 236 insertions(+), 206 deletions(-)
 create mode 100644 Documentation/PCI/endpoint/pci-test-howto.rst
 delete mode 100644 Documentation/PCI/endpoint/pci-test-howto.txt

(limited to 'Documentation')

diff --git a/Documentation/PCI/endpoint/index.rst b/Documentation/PCI/endpoint/index.rst
index b680a3fc4fec..d114ea74b444 100644
--- a/Documentation/PCI/endpoint/index.rst
+++ b/Documentation/PCI/endpoint/index.rst
@@ -10,3 +10,4 @@ PCI Endpoint Framework
    pci-endpoint
    pci-endpoint-cfs
    pci-test-function
+   pci-test-howto
diff --git a/Documentation/PCI/endpoint/pci-test-howto.rst b/Documentation/PCI/endpoint/pci-test-howto.rst
new file mode 100644
index 000000000000..909f770a07d6
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-test-howto.rst
@@ -0,0 +1,235 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+PCI Test User Guide
+===================
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
+
+This document is a guide to help users use pci-epf-test function driver
+and pci_endpoint_test host driver for testing PCI. The list of steps to
+be followed in the host side and EP side is given below.
+
+Endpoint Device
+===============
+
+Endpoint Controller Devices
+---------------------------
+
+To find the list of endpoint controller devices in the system::
+
+	# ls /sys/class/pci_epc/
+	  51000000.pcie_ep
+
+If PCI_ENDPOINT_CONFIGFS is enabled::
+
+	# ls /sys/kernel/config/pci_ep/controllers
+	  51000000.pcie_ep
+
+
+Endpoint Function Drivers
+-------------------------
+
+To find the list of endpoint function drivers in the system::
+
+	# ls /sys/bus/pci-epf/drivers
+	  pci_epf_test
+
+If PCI_ENDPOINT_CONFIGFS is enabled::
+
+	# ls /sys/kernel/config/pci_ep/functions
+	  pci_epf_test
+
+
+Creating pci-epf-test Device
+----------------------------
+
+PCI endpoint function device can be created using the configfs. To create
+pci-epf-test device, the following commands can be used::
+
+	# mount -t configfs none /sys/kernel/config
+	# cd /sys/kernel/config/pci_ep/
+	# mkdir functions/pci_epf_test/func1
+
+The "mkdir func1" above creates the pci-epf-test function device that will
+be probed by pci_epf_test driver.
+
+The PCI endpoint framework populates the directory with the following
+configurable fields::
+
+	# ls functions/pci_epf_test/func1
+	  baseclass_code	interrupt_pin	progif_code	subsys_id
+	  cache_line_size	msi_interrupts	revid		subsys_vendorid
+	  deviceid          	msix_interrupts	subclass_code	vendorid
+
+The PCI endpoint function driver populates these entries with default values
+when the device is bound to the driver. The pci-epf-test driver populates
+vendorid with 0xffff and interrupt_pin with 0x0001::
+
+	# cat functions/pci_epf_test/func1/vendorid
+	  0xffff
+	# cat functions/pci_epf_test/func1/interrupt_pin
+	  0x0001
+
+
+Configuring pci-epf-test Device
+-------------------------------
+
+The user can configure the pci-epf-test device using configfs entry. In order
+to change the vendorid and the number of MSI interrupts used by the function
+device, the following commands can be used::
+
+	# echo 0x104c > functions/pci_epf_test/func1/vendorid
+	# echo 0xb500 > functions/pci_epf_test/func1/deviceid
+	# echo 16 > functions/pci_epf_test/func1/msi_interrupts
+	# echo 8 > functions/pci_epf_test/func1/msix_interrupts
+
+
+Binding pci-epf-test Device to EP Controller
+--------------------------------------------
+
+In order for the endpoint function device to be useful, it has to be bound to
+a PCI endpoint controller driver. Use the configfs to bind the function
+device to one of the controller driver present in the system::
+
+	# ln -s functions/pci_epf_test/func1 controllers/51000000.pcie_ep/
+
+Once the above step is completed, the PCI endpoint is ready to establish a link
+with the host.
+
+
+Start the Link
+--------------
+
+In order for the endpoint device to establish a link with the host, the _start_
+field should be populated with '1'::
+
+	# echo 1 > controllers/51000000.pcie_ep/start
+
+
+RootComplex Device
+==================
+
+lspci Output
+------------
+
+Note that the devices listed here correspond to the value populated in 1.4
+above::
+
+	00:00.0 PCI bridge: Texas Instruments Device 8888 (rev 01)
+	01:00.0 Unassigned class [ff00]: Texas Instruments Device b500
+
+
+Using Endpoint Test function Device
+-----------------------------------
+
+pcitest.sh added in tools/pci/ can be used to run all the default PCI endpoint
+tests. To compile this tool the following commands should be used::
+
+	# cd <kernel-dir>
+	# make -C tools/pci
+
+or if you desire to compile and install in your system::
+
+	# cd <kernel-dir>
+	# make -C tools/pci install
+
+The tool and script will be located in <rootfs>/usr/bin/
+
+
+pcitest.sh Output
+~~~~~~~~~~~~~~~~~
+::
+
+	# pcitest.sh
+	BAR tests
+
+	BAR0:           OKAY
+	BAR1:           OKAY
+	BAR2:           OKAY
+	BAR3:           OKAY
+	BAR4:           NOT OKAY
+	BAR5:           NOT OKAY
+
+	Interrupt tests
+
+	SET IRQ TYPE TO LEGACY:         OKAY
+	LEGACY IRQ:     NOT OKAY
+	SET IRQ TYPE TO MSI:            OKAY
+	MSI1:           OKAY
+	MSI2:           OKAY
+	MSI3:           OKAY
+	MSI4:           OKAY
+	MSI5:           OKAY
+	MSI6:           OKAY
+	MSI7:           OKAY
+	MSI8:           OKAY
+	MSI9:           OKAY
+	MSI10:          OKAY
+	MSI11:          OKAY
+	MSI12:          OKAY
+	MSI13:          OKAY
+	MSI14:          OKAY
+	MSI15:          OKAY
+	MSI16:          OKAY
+	MSI17:          NOT OKAY
+	MSI18:          NOT OKAY
+	MSI19:          NOT OKAY
+	MSI20:          NOT OKAY
+	MSI21:          NOT OKAY
+	MSI22:          NOT OKAY
+	MSI23:          NOT OKAY
+	MSI24:          NOT OKAY
+	MSI25:          NOT OKAY
+	MSI26:          NOT OKAY
+	MSI27:          NOT OKAY
+	MSI28:          NOT OKAY
+	MSI29:          NOT OKAY
+	MSI30:          NOT OKAY
+	MSI31:          NOT OKAY
+	MSI32:          NOT OKAY
+	SET IRQ TYPE TO MSI-X:          OKAY
+	MSI-X1:         OKAY
+	MSI-X2:         OKAY
+	MSI-X3:         OKAY
+	MSI-X4:         OKAY
+	MSI-X5:         OKAY
+	MSI-X6:         OKAY
+	MSI-X7:         OKAY
+	MSI-X8:         OKAY
+	MSI-X9:         NOT OKAY
+	MSI-X10:        NOT OKAY
+	MSI-X11:        NOT OKAY
+	MSI-X12:        NOT OKAY
+	MSI-X13:        NOT OKAY
+	MSI-X14:        NOT OKAY
+	MSI-X15:        NOT OKAY
+	MSI-X16:        NOT OKAY
+	[...]
+	MSI-X2047:      NOT OKAY
+	MSI-X2048:      NOT OKAY
+
+	Read Tests
+
+	SET IRQ TYPE TO MSI:            OKAY
+	READ (      1 bytes):           OKAY
+	READ (   1024 bytes):           OKAY
+	READ (   1025 bytes):           OKAY
+	READ (1024000 bytes):           OKAY
+	READ (1024001 bytes):           OKAY
+
+	Write Tests
+
+	WRITE (      1 bytes):          OKAY
+	WRITE (   1024 bytes):          OKAY
+	WRITE (   1025 bytes):          OKAY
+	WRITE (1024000 bytes):          OKAY
+	WRITE (1024001 bytes):          OKAY
+
+	Copy Tests
+
+	COPY (      1 bytes):           OKAY
+	COPY (   1024 bytes):           OKAY
+	COPY (   1025 bytes):           OKAY
+	COPY (1024000 bytes):           OKAY
+	COPY (1024001 bytes):           OKAY
diff --git a/Documentation/PCI/endpoint/pci-test-howto.txt b/Documentation/PCI/endpoint/pci-test-howto.txt
deleted file mode 100644
index 040479f437a5..000000000000
--- a/Documentation/PCI/endpoint/pci-test-howto.txt
+++ /dev/null
@@ -1,206 +0,0 @@
-			    PCI TEST USERGUIDE
-		    Kishon Vijay Abraham I <kishon@ti.com>
-
-This document is a guide to help users use pci-epf-test function driver
-and pci_endpoint_test host driver for testing PCI. The list of steps to
-be followed in the host side and EP side is given below.
-
-1. Endpoint Device
-
-1.1 Endpoint Controller Devices
-
-To find the list of endpoint controller devices in the system:
-
-	# ls /sys/class/pci_epc/
-	  51000000.pcie_ep
-
-If PCI_ENDPOINT_CONFIGFS is enabled
-	# ls /sys/kernel/config/pci_ep/controllers
-	  51000000.pcie_ep
-
-1.2 Endpoint Function Drivers
-
-To find the list of endpoint function drivers in the system:
-
-	# ls /sys/bus/pci-epf/drivers
-	  pci_epf_test
-
-If PCI_ENDPOINT_CONFIGFS is enabled
-	# ls /sys/kernel/config/pci_ep/functions
-	  pci_epf_test
-
-1.3 Creating pci-epf-test Device
-
-PCI endpoint function device can be created using the configfs. To create
-pci-epf-test device, the following commands can be used
-
-	# mount -t configfs none /sys/kernel/config
-	# cd /sys/kernel/config/pci_ep/
-	# mkdir functions/pci_epf_test/func1
-
-The "mkdir func1" above creates the pci-epf-test function device that will
-be probed by pci_epf_test driver.
-
-The PCI endpoint framework populates the directory with the following
-configurable fields.
-
-	# ls functions/pci_epf_test/func1
-	  baseclass_code	interrupt_pin	progif_code	subsys_id
-	  cache_line_size	msi_interrupts	revid		subsys_vendorid
-	  deviceid          	msix_interrupts	subclass_code	vendorid
-
-The PCI endpoint function driver populates these entries with default values
-when the device is bound to the driver. The pci-epf-test driver populates
-vendorid with 0xffff and interrupt_pin with 0x0001
-
-	# cat functions/pci_epf_test/func1/vendorid
-	  0xffff
-	# cat functions/pci_epf_test/func1/interrupt_pin
-	  0x0001
-
-1.4 Configuring pci-epf-test Device
-
-The user can configure the pci-epf-test device using configfs entry. In order
-to change the vendorid and the number of MSI interrupts used by the function
-device, the following commands can be used.
-
-	# echo 0x104c > functions/pci_epf_test/func1/vendorid
-	# echo 0xb500 > functions/pci_epf_test/func1/deviceid
-	# echo 16 > functions/pci_epf_test/func1/msi_interrupts
-	# echo 8 > functions/pci_epf_test/func1/msix_interrupts
-
-1.5 Binding pci-epf-test Device to EP Controller
-
-In order for the endpoint function device to be useful, it has to be bound to
-a PCI endpoint controller driver. Use the configfs to bind the function
-device to one of the controller driver present in the system.
-
-	# ln -s functions/pci_epf_test/func1 controllers/51000000.pcie_ep/
-
-Once the above step is completed, the PCI endpoint is ready to establish a link
-with the host.
-
-1.6 Start the Link
-
-In order for the endpoint device to establish a link with the host, the _start_
-field should be populated with '1'.
-
-	# echo 1 > controllers/51000000.pcie_ep/start
-
-2. RootComplex Device
-
-2.1 lspci Output
-
-Note that the devices listed here correspond to the value populated in 1.4 above
-
-	00:00.0 PCI bridge: Texas Instruments Device 8888 (rev 01)
-	01:00.0 Unassigned class [ff00]: Texas Instruments Device b500
-
-2.2 Using Endpoint Test function Device
-
-pcitest.sh added in tools/pci/ can be used to run all the default PCI endpoint
-tests. To compile this tool the following commands should be used:
-
-	# cd <kernel-dir>
-	# make -C tools/pci
-
-or if you desire to compile and install in your system:
-
-	# cd <kernel-dir>
-	# make -C tools/pci install
-
-The tool and script will be located in <rootfs>/usr/bin/
-
-2.2.1 pcitest.sh Output
-	# pcitest.sh
-	BAR tests
-
-	BAR0:           OKAY
-	BAR1:           OKAY
-	BAR2:           OKAY
-	BAR3:           OKAY
-	BAR4:           NOT OKAY
-	BAR5:           NOT OKAY
-
-	Interrupt tests
-
-	SET IRQ TYPE TO LEGACY:         OKAY
-	LEGACY IRQ:     NOT OKAY
-	SET IRQ TYPE TO MSI:            OKAY
-	MSI1:           OKAY
-	MSI2:           OKAY
-	MSI3:           OKAY
-	MSI4:           OKAY
-	MSI5:           OKAY
-	MSI6:           OKAY
-	MSI7:           OKAY
-	MSI8:           OKAY
-	MSI9:           OKAY
-	MSI10:          OKAY
-	MSI11:          OKAY
-	MSI12:          OKAY
-	MSI13:          OKAY
-	MSI14:          OKAY
-	MSI15:          OKAY
-	MSI16:          OKAY
-	MSI17:          NOT OKAY
-	MSI18:          NOT OKAY
-	MSI19:          NOT OKAY
-	MSI20:          NOT OKAY
-	MSI21:          NOT OKAY
-	MSI22:          NOT OKAY
-	MSI23:          NOT OKAY
-	MSI24:          NOT OKAY
-	MSI25:          NOT OKAY
-	MSI26:          NOT OKAY
-	MSI27:          NOT OKAY
-	MSI28:          NOT OKAY
-	MSI29:          NOT OKAY
-	MSI30:          NOT OKAY
-	MSI31:          NOT OKAY
-	MSI32:          NOT OKAY
-	SET IRQ TYPE TO MSI-X:          OKAY
-	MSI-X1:         OKAY
-	MSI-X2:         OKAY
-	MSI-X3:         OKAY
-	MSI-X4:         OKAY
-	MSI-X5:         OKAY
-	MSI-X6:         OKAY
-	MSI-X7:         OKAY
-	MSI-X8:         OKAY
-	MSI-X9:         NOT OKAY
-	MSI-X10:        NOT OKAY
-	MSI-X11:        NOT OKAY
-	MSI-X12:        NOT OKAY
-	MSI-X13:        NOT OKAY
-	MSI-X14:        NOT OKAY
-	MSI-X15:        NOT OKAY
-	MSI-X16:        NOT OKAY
-	[...]
-	MSI-X2047:      NOT OKAY
-	MSI-X2048:      NOT OKAY
-
-	Read Tests
-
-	SET IRQ TYPE TO MSI:            OKAY
-	READ (      1 bytes):           OKAY
-	READ (   1024 bytes):           OKAY
-	READ (   1025 bytes):           OKAY
-	READ (1024000 bytes):           OKAY
-	READ (1024001 bytes):           OKAY
-
-	Write Tests
-
-	WRITE (      1 bytes):          OKAY
-	WRITE (   1024 bytes):          OKAY
-	WRITE (   1025 bytes):          OKAY
-	WRITE (1024000 bytes):          OKAY
-	WRITE (1024001 bytes):          OKAY
-
-	Copy Tests
-
-	COPY (      1 bytes):           OKAY
-	COPY (   1024 bytes):           OKAY
-	COPY (   1025 bytes):           OKAY
-	COPY (1024000 bytes):           OKAY
-	COPY (1024001 bytes):           OKAY
-- 
cgit 


From 8e6c8aa3b52e573a6db1f58b2ba9a9106a012963 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 18 Apr 2019 22:45:09 +0200
Subject: isdn: gigaset: remove i4l support

isdn4linux is getting removed, and the gigaset driver can still
use the CAPI support, so this can all go away.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 Documentation/isdn/README.gigaset | 36 ++++++++----------------------------
 1 file changed, 8 insertions(+), 28 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/isdn/README.gigaset b/Documentation/isdn/README.gigaset
index 9b1ce277ca3d..f6184b637182 100644
--- a/Documentation/isdn/README.gigaset
+++ b/Documentation/isdn/README.gigaset
@@ -48,9 +48,8 @@ GigaSet 307x Device Driver
 
 1.2. Software
      --------
-     The driver works with the Kernel CAPI subsystem as well as the old
-     ISDN4Linux subsystem, so it can be used with any software which is able
-     to use CAPI 2.0 or ISDN4Linux for ISDN connections (voice or data).
+     The driver works with the Kernel CAPI subsystem and can be used with any
+     software which is able to use CAPI 2.0 for ISDN connections (voice or data).
 
      There are some user space tools available at
      https://sourceforge.net/projects/gigaset307x/
@@ -92,7 +91,7 @@ GigaSet 307x Device Driver
 	gigaset	 	debug	   debug level (see section 3.2.)
 
 			startmode  initial operation mode (see section 2.5.):
-	bas_gigaset )		   1=ISDN4linux/CAPI (default), 0=Unimodem
+	bas_gigaset )		   1=CAPI (default), 0=Unimodem
 	ser_gigaset )
 	usb_gigaset )	cidmode    initial Call-ID mode setting (see section
 				   2.5.): 1=on (default), 0=off
@@ -154,18 +153,10 @@ GigaSet 307x Device Driver
 
 2.3. CAPI
      ----
-     If the driver is compiled with CAPI support (kernel configuration option
-     GIGASET_CAPI) the devices will show up as CAPI controllers as soon as the
-     corresponding driver module is loaded, and can then be used with CAPI 2.0
-     kernel and user space applications. For user space access, the module
-     capi.ko must be loaded.
-
-     Legacy ISDN4Linux applications are supported via the capidrv
-     compatibility driver. The kernel module capidrv.ko must be loaded
-     explicitly with the command
-        modprobe capidrv
-     if needed, and cannot be unloaded again without unloading the driver
-     first. (These are limitations of capidrv.)
+     The devices will show up as CAPI controllers as soon as the
+     corresponding driver module is loaded, and can then be used with
+     CAPI 2.0 kernel and user space applications. For user space access,
+     the module capi.ko must be loaded.
 
      Most distributions handle loading and unloading of the various CAPI
      modules automatically via the command capiinit(1) from the capi4k-utils
@@ -173,16 +164,6 @@ GigaSet 307x Device Driver
      Gigaset drivers because it doesn't support more than one module per
      driver.
 
-2.4. ISDN4Linux
-     ----------
-     If the driver is compiled without CAPI support (native ISDN4Linux
-     variant), it registers the device with the legacy ISDN4Linux subsystem
-     after loading the module. It can then be used with ISDN4Linux
-     applications only. Most distributions provide some configuration utility
-     for setting up that subsystem. Otherwise you can use some HOWTOs like
-         http://www.linuxhaven.de/dlhp/HOWTO/DE-ISDN-HOWTO-5.html
-
-
 2.5. Unimodem mode
      -------------
      In this mode the device works like a modem connected to a serial port
@@ -281,8 +262,7 @@ GigaSet 307x Device Driver
      number. Dialing "***" (three asterisks) calls all extensions
      simultaneously (global call).
 
-     This holds for both CAPI 2.0 and ISDN4Linux applications. Unimodem mode
-     does not support internal calls.
+     Unimodem mode does not support internal calls.
 
 2.8. Unregistered Wireless Devices (M101/M105)
      -----------------------------------------
-- 
cgit 


From 85993b8c9786fb24975dbcabebb1c75790d4fb6a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 18 Apr 2019 22:47:35 +0200
Subject: isdn: remove hisax driver

With the decline of ISDN, this seems to have become almost completely
obsolete, and even in the past years before that, almost all remaining
users appear to have used mISDN instead.

Birger Harzenetter noted that he is still using i4l/hisax to take
advantage of the 'divert' driver for call diversion, but otherwise uses
mISDN on the same hardware. This is a rare edge case as far as I
can tell, but we are still breaking an actively used work flow
(see https://xkcd.com/1172/).

We debated moving i4l/hisax to staging as an intermediate step, but as
he is not likely to change the setup, and that would just delay breaking
this use case.  The alternatives here are to stay on stable kernels
< 5.2, to create an external driver repository for isdn4linux, or to
add divert functionality to mISDN.

Cc: Birger Harzenetter <WIMPy@yeti.dk>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 Documentation/isdn/HiSax.cert   |  96 ------
 Documentation/isdn/README.HiSax | 659 ----------------------------------------
 2 files changed, 755 deletions(-)
 delete mode 100644 Documentation/isdn/HiSax.cert
 delete mode 100644 Documentation/isdn/README.HiSax

(limited to 'Documentation')

diff --git a/Documentation/isdn/HiSax.cert b/Documentation/isdn/HiSax.cert
deleted file mode 100644
index f2a6fcb8efee..000000000000
--- a/Documentation/isdn/HiSax.cert
+++ /dev/null
@@ -1,96 +0,0 @@
------BEGIN PGP SIGNED MESSAGE-----
-
-First:
-
-    HiSax is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-However, if you wish to modify the HiSax sources, please note the following:
-
-HiSax has passed the ITU approval test suite with ELSA Quickstep ISDN cards
-and Eicon Technology Diva 2.01 PCI card.
-The certification is only valid for the combination of the tested software
-version and the tested hardware. Any changes to the HiSax source code may
-therefore affect the certification.
-
-Additional ITU approval tests have been carried out for all generic cards
-using Colognechip single chip solutions HFC-S PCI A for PCI cards as well
-as HFC-S USB based USB ISDN ta adapters.
-These tests included all layers 1-3 and as well all functional tests for 
-the layer 1. Because all hardware based on these chips are complete ISDN
-solutions in one chip all cards and USB-TAs using these chips are to be
-regarded as approved for those tests. Some additional electrical tests
-of the layer 1 which are independent of the driver and related to a
-special hardware used will be regarded as approved if at least one 
-solution has been tested including those electrical tests. So if cards 
-or tas have been completely approved for any other os, the approval
-for those electrical tests is valid for linux, too.
-Please send any questions regarding this drivers or approval abouts to 
-werner@isdn-development.de 
-Additional information and the type approval documents will be found
-shortly on the Colognechip website www.colognechip.com 
-
-If you change the main files of the HiSax ISDN stack, the certification will
-become invalid. Because in most countries it is illegal to connect
-unapproved ISDN equipment to the public network, I have to guarantee that
-changes in HiSax do not affect the certification.
-
-In order to make a valid certification apparent to the user, I have built in
-some validation checks that are made during the make process. The HiSax main
-files are protected by md5 checksums and the md5sum file is pgp signed by
-myself:
-
-KeyID 1024/FF992F6D 1997/01/16 Karsten Keil <kkeil@suse.de>
-Key fingerprint = 92 6B F7 58 EE 86 28 C8  C4 1A E6 DC 39 89 F2 AA
-
-Only if the checksums are OK, and the signature of the file
-"drivers/isdn/hisax/md5sums.asc" match, is the certification valid; a
-message confirming this is then displayed during the hisax init process.
-
-The affected files are:
-
-drivers/isdn/hisax/isac.c
-drivers/isdn/hisax/isdnl1.c
-drivers/isdn/hisax/isdnl2.c
-drivers/isdn/hisax/isdnl3.c
-drivers/isdn/hisax/tei.c
-drivers/isdn/hisax/callc.c
-drivers/isdn/hisax/l3dss1.c
-drivers/isdn/hisax/l3_1tr6.c
-drivers/isdn/hisax/cert.c
-drivers/isdn/hisax/elsa.c
-drivers/isdn/hisax/diva.c
-drivers/isdn/hisax/hfc_pci.c
-
-Please send any changes, bugfixes and patches to me rather than implementing
-them directly into the HiSax sources.
-
-This does not reduce your rights granted by the GNU General Public License.
-If you wish to change the sources, go ahead; but note that then the
-certification is invalid even if you use one of the approved cards.
-
-Here are the certification registration numbers for ELSA Quickstep cards:
-German   D133361J CETECOM ICT Services GmbH 0682
-European D133362J CETECOM ICT Services GmbH 0682
-
-
-Karsten Keil
-keil@isdn4linux.de
-
------BEGIN PGP SIGNATURE-----
-Version: 2.6.3i
-Charset: noconv
-
-iQCVAwUBOFAwqTpxHvX/mS9tAQFI2QP9GLDK2iy/KBhwReE3F7LeO+tVhffTVZ3a
-20q5/z/WcIg/pnH0uTkl2UgDXBFXYl45zJyDGNpAposIFmT+Edd14o7Vj1w/BBdn
-Y+5rBmJf+gyBu61da5d6bv0lpymwRa/um+ri+ilYnZ/XPfg5JKhdjGSBCJuJAElM
-d2jFbTrsMYw=
-=LNf9
------END PGP SIGNATURE-----
diff --git a/Documentation/isdn/README.HiSax b/Documentation/isdn/README.HiSax
deleted file mode 100644
index b1a573cf4472..000000000000
--- a/Documentation/isdn/README.HiSax
+++ /dev/null
@@ -1,659 +0,0 @@
-HiSax is a Linux hardware-level driver for passive ISDN cards with Siemens
-chipset (ISAC_S 2085/2086/2186, HSCX SAB 82525). It is based on the Teles
-driver from Jan den Ouden.
-It is meant to be used with isdn4linux, an ISDN link-level module for Linux
-written by Fritz Elfert.
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-
-Supported cards
----------------
-
-Teles 8.0/16.0/16.3 and compatible ones
-Teles 16.3c
-Teles S0/PCMCIA
-Teles PCI
-Teles S0Box
-Creatix S0Box
-Creatix PnP S0
-Compaq ISDN S0 ISA card
-AVM A1 (Fritz, Teledat 150)
-AVM Fritz PCMCIA
-AVM Fritz PnP
-AVM Fritz PCI
-ELSA Microlink PCC-16, PCF, PCF-Pro, PCC-8
-ELSA Quickstep 1000
-ELSA Quickstep 1000PCI
-ELSA Quickstep 3000 (same settings as QS1000)
-ELSA Quickstep 3000PCI
-ELSA PCMCIA
-ITK ix1-micro Rev.2
-Eicon Diva 2.0 ISA and PCI (S0 and U interface, no PRO version)
-Eicon Diva 2.01 ISA and PCI
-Eicon Diva 2.02 PCI
-Eicon Diva Piccola
-ASUSCOM NETWORK INC. ISDNLink 128K PC adapter (order code I-IN100-ST-D)
-Dynalink IS64PH (OEM version of ASUSCOM NETWORK INC. ISDNLink 128K adapter)
-PCBIT-DP (OEM version of ASUSCOM NETWORK INC. ISDNLink)
-HFC-2BS0 based cards (TeleInt SA1)
-Sedlbauer Speed Card (Speed Win, Teledat 100, PCI, Fax+)
-Sedlbauer Speed Star/Speed Star2 (PCMCIA)
-Sedlbauer ISDN-Controller PC/104
-USR Sportster internal TA (compatible Stollmann tina-pp V3)
-USR internal TA PCI
-ith Kommunikationstechnik GmbH MIC 16 ISA card
-Traverse Technologie NETjet PCI S0 card and NETspider U card
-Ovislink ISDN sc100-p card (NETjet driver)
-Dr. Neuhaus Niccy PnP/PCI
-Siemens I-Surf 1.0
-Siemens I-Surf 2.0 (with IPAC, try type 12 asuscom) 
-ACER P10
-HST Saphir
-Berkom Telekom A4T
-Scitel Quadro
-Gazel ISDN cards
-HFC-PCI based cards
-Winbond W6692 based cards
-HFC-S+, HFC-SP/PCMCIA cards
-formula-n enternow
-Gerdes Power ISDN
-
-Note: PCF, PCF-Pro: up to now, only the ISDN part is supported
-      PCC-8: not tested yet
-      Eicon.Diehl Diva U interface not tested
-
-If you know other passive cards with the Siemens chipset, please let me know.
-You can combine any card, if there is no conflict between the resources
-(io, mem, irq).
-
-
-Configuring the driver
-----------------------
-
-The HiSax driver can either be built directly into the kernel or as a module.
-It can be configured using the command line feature while loading the kernel
-with LILO or LOADLIN or, if built as a module, using insmod/modprobe with
-parameters.
-There is also some config needed before you compile the kernel and/or
-modules. It is included in the normal "make [menu]config" target at the
-kernel. Don't forget it, especially to select the right D-channel protocol.
-
-Please note: In older versions of the HiSax driver, all PnP cards
-needed to be configured with isapnp and worked only with the HiSax
-driver used as a module.
-
-In the current version, HiSax will automatically use the in-kernel
-ISAPnP support, provided you selected it during kernel configuration
-(CONFIG_ISAPNP), if you don't give the io=, irq= command line parameters.
-
-The affected card types are: 4,7,12,14,19,27-30
-
-a) when built as a module
--------------------------
-
-insmod/modprobe  hisax.o \
-  io=iobase irq=IRQ mem=membase type=card_type \
-  protocol=D_channel_protocol id=idstring
-
-or, if several cards are installed:
-
-insmod/modprobe hisax.o \
-  io=iobase1,iobase2,... irq=IRQ1,IRQ2,... mem=membase1,membase2,... \
-  type=card_type1,card_type2,... \
-  protocol=D_channel_protocol1,D_channel_protocol2,... \
-  id=idstring1%idstring2 ...
-
-where "iobaseN" represents the I/O base address of the Nth card, "membaseN"
-the memory base address of the Nth card, etc.
-
-The reason for the delimiter "%" being used in the idstrings is that ","
-won't work with the current modules package.
-
-The parameters may be specified in any order. For example, the "io"
-parameter may precede the "irq" parameter, or vice versa. If several
-cards are installed, the ordering within the comma separated parameter
-lists must of course be consistent.
-
-Only parameters applicable to the card type need to be specified. For
-example, the Teles 16.3 card is not memory-mapped, so the "mem"
-parameter may be omitted for this card. Sometimes it may be necessary
-to specify a dummy parameter, however. This is the case when there is
-a card of a different type later in the list that needs a parameter
-which the preceding card does not. For instance, if a Teles 16.0 card
-is listed after a Teles 16.3 card, a dummy memory base parameter of 0
-must be specified for the 16.3. Instead of a dummy value, the parameter
-can also be skipped by simply omitting the value. For example:
-mem=,0xd0000. See example 6 below.
-
-The parameter for the D-Channel protocol may be omitted if you selected the
-correct one during kernel config. Valid values are "1" for German 1TR6,
-"2" for EDSS1 (Euro ISDN), "3" for leased lines (no D-Channel) and "4"
-for US NI1.
-With US NI1 you have to include your SPID into the MSN setting in the form
-<MSN>:<SPID> for example (your phonenumber is 1234 your SPID 5678):
-AT&E1234:5678                       on ttyI interfaces
-isdnctrl eaz ippp0 1234:5678        on network devices
-
-The Creatix/Teles PnP cards use io1= and io2= instead of io= for specifying
-the I/O addresses of the ISAC and HSCX chips, respectively.
-
-Card types:
-
-    Type                Required parameters (in addition to type and protocol)
-
-    1   Teles 16.0               irq, mem, io
-    2   Teles  8.0               irq, mem
-    3   Teles 16.3 (non PnP)     irq, io
-    4   Creatix/Teles PnP        irq, io0 (ISAC), io1 (HSCX)
-    5   AVM A1 (Fritz)           irq, io
-    6   ELSA PCC/PCF cards       io or nothing for autodetect (the iobase is
-                                 required only if you have more than one ELSA
-                                 card in your PC)
-    7   ELSA Quickstep 1000      irq, io  (from isapnp setup)
-    8   Teles 16.3 PCMCIA     	 irq, io
-    9   ITK ix1-micro Rev.2      irq, io
-   10   ELSA PCMCIA		 irq, io  (set with card manager)
-   11   Eicon.Diehl Diva ISA PnP irq, io
-   11   Eicon.Diehl Diva PCI     no parameter
-   12   ASUS COM ISDNLink        irq, io  (from isapnp setup)
-   13   HFC-2BS0 based cards     irq, io
-   14   Teles 16.3c PnP          irq, io
-   15   Sedlbauer Speed Card     irq, io
-   15   Sedlbauer PC/104         irq, io
-   15   Sedlbauer Speed PCI	 no parameter
-   16   USR Sportster internal   irq, io
-   17   MIC card                 irq, io
-   18   ELSA Quickstep 1000PCI   no parameter
-   19   Compaq ISDN S0 ISA card  irq, io0, io1, io (from isapnp setup io=IO2)
-   20   NETjet PCI card          no parameter
-   21   Teles PCI                no parameter
-   22   Sedlbauer Speed Star (PCMCIA) irq, io (set with card manager)
-   24   Dr. Neuhaus Niccy PnP    irq, io0, io1 (from isapnp setup)
-   24   Dr. Neuhaus Niccy PCI    no parameter
-   25   Teles S0Box              irq, io (of the used lpt port)
-   26   AVM A1 PCMCIA (Fritz!)   irq, io (set with card manager)
-   27   AVM PnP (Fritz!PnP)      irq, io  (from isapnp setup)
-   27   AVM PCI (Fritz!PCI)      no parameter
-   28   Sedlbauer Speed Fax+     irq, io (from isapnp setup)
-   29	Siemens I-Surf 1.0       irq, io, memory (from isapnp setup)   
-   30	ACER P10                 irq, io (from isapnp setup)   
-   31	HST Saphir               irq, io
-   32	Telekom A4T              none
-   33	Scitel Quadro		 subcontroller (4*S0, subctrl 1...4)
-   34	Gazel ISDN cards (ISA)   irq,io
-   34	Gazel ISDN cards (PCI)   none
-   35	HFC 2BDS0 PCI            none
-   36	W6692 based PCI cards    none
-   37	HFC 2BDS0 S+, SP         irq,io 
-   38	NETspider U PCI card     none
-   39	HFC 2BDS0 SP/PCMCIA      irq,io (set with cardmgr)
-   40   hotplug interface
-   41   Formula-n enter:now PCI  none
-
-At the moment IRQ sharing is only possible with PCI cards. Please make sure
-that your IRQ is free and enabled for ISA use.
-
-
-Examples for module loading
-
-1. Teles 16.3, Euro ISDN, I/O base 280 hex, IRQ 10
-   modprobe hisax type=3 protocol=2 io=0x280 irq=10
-
-2. Teles 16.0, 1TR6 ISDN, I/O base d80 hex, IRQ 5, Memory d0000 hex
-   modprobe hisax protocol=1 type=1 io=0xd80 mem=0xd0000 irq=5
-
-3. Fritzcard, Euro ISDN, I/O base 340 hex, IRQ 10 and ELSA PCF, Euro ISDN
-   modprobe hisax type=5,6 protocol=2,2 io=0x340 irq=10 id=Fritz%Elsa
-
-4. Any ELSA PCC/PCF card, Euro ISDN
-   modprobe hisax type=6 protocol=2
-
-5. Teles 16.3 PnP, Euro ISDN, with isapnp configured
-   isapnp config:  (INT 0 (IRQ 10 (MODE +E)))
- 		   (IO 0 (BASE 0x0580))
-                   (IO 1 (BASE 0x0180))
-   modprobe hisax type=4 protocol=2 irq=10 io0=0x580 io1=0x180
-
-   In the current version of HiSax, you can instead simply use
-
-   modprobe hisax type=4 protocol=2
-
-   if you configured your kernel for ISAPnP. Don't run isapnp in
-   this case!
-
-6. Teles 16.3, Euro ISDN, I/O base 280 hex, IRQ 12 and
-   Teles 16.0, 1TR6, IRQ 5, Memory d0000 hex
-   modprobe hisax type=3,1 protocol=2,1 io=0x280 mem=0,0xd0000
-
-   Please note the dummy 0 memory address for the Teles 16.3, used as a
-   placeholder as described above, in the last example.
-
-7. Teles PCMCIA, Euro ISDN, I/O base 180 hex, IRQ 15 (default values)
-   modprobe hisax type=8 protocol=2 io=0x180 irq=15
-
-
-b) using LILO/LOADLIN, with the driver compiled directly into the kernel
-------------------------------------------------------------------------
-
-hisax=typ1,dp1,pa_1,pb_1,pc_1[,typ2,dp2,pa_2 ... \
-      typn,dpn,pa_n,pb_n,pc_n][,idstring1[,idstring2,...,idstringn]]
-
-where
-     typ1 = type of 1st card (default depends on kernel settings)
-     dp1  = D-Channel protocol of 1st card. 1=1TR6, 2=EDSS1, 3=leased
-     pa_1 = 1st parameter (depending on the type of the card)
-     pb_1 = 2nd parameter (    "     "   "   "   "   "   "  )
-     pc_1 = 3rd parameter (    "     "   "   "   "   "   "  )
-
-     typ2,dp2,pa_2,pb_2,pc_2 = Parameters of the second card (defaults: none)
-     typn,dpn,pa_n,pb_n,pc_n = Parameters of the n'th card (up to 16 cards are
-                                                                     supported)
-
-     idstring = Driver ID for accessing the particular card with utility
-                programs and for identification when using a line monitor
-                (default: "HiSax")
-
-                Note: the ID string must start with an alphabetical character!
-
-Card types:
-
-type
-    1 	Teles 16.0     	        pa=irq  pb=membase  pc=iobase
-    2 	Teles  8.0              pa=irq  pb=membase
-    3 	Teles 16.3              pa=irq  pb=iobase
-    4 	Creatix/Teles PNP     	ONLY WORKS AS A MODULE !
-    5 	AVM A1 (Fritz)          pa=irq  pb=iobase
-    6 	ELSA PCC/PCF cards      pa=iobase or nothing for autodetect
-    7   ELSA Quickstep 1000     ONLY WORKS AS A MODULE !
-    8   Teles S0 PCMCIA         pa=irq  pb=iobase
-    9   ITK ix1-micro Rev.2     pa=irq  pb=iobase
-   10   ELSA PCMCIA             pa=irq, pb=io  (set with card manager)
-   11   Eicon.Diehl Diva ISAPnP ONLY WORKS AS A MODULE !
-   11   Eicon.Diehl Diva PCI    no parameter
-   12   ASUS COM ISDNLink       ONLY WORKS AS A MODULE !
-   13	HFC-2BS0 based cards    pa=irq  pb=io
-   14   Teles 16.3c PnP         ONLY WORKS AS A MODULE !
-   15	Sedlbauer Speed Card    pa=irq  pb=io (Speed Win only as module !)
-   15   Sedlbauer PC/104        pa=irq  pb=io
-   15   Sedlbauer Speed PCI	no parameter
-   16   USR Sportster internal  pa=irq  pb=io
-   17   MIC card                pa=irq  pb=io
-   18   ELSA Quickstep 1000PCI  no parameter
-   19   Compaq ISDN S0 ISA card ONLY WORKS AS A MODULE !
-   20   NETjet PCI card         no parameter
-   21   Teles PCI               no parameter
-   22   Sedlbauer Speed Star (PCMCIA)  pa=irq, pb=io  (set with card manager)
-   24   Dr. Neuhaus Niccy PnP   ONLY WORKS AS A MODULE !
-   24   Dr. Neuhaus Niccy PCI   no parameter
-   25   Teles S0Box             pa=irq, pb=io (of the used lpt port)
-   26   AVM A1 PCMCIA (Fritz!)  pa=irq, pb=io (set with card manager)
-   27   AVM PnP (Fritz!PnP)     ONLY WORKS AS A MODULE !
-   27   AVM PCI (Fritz!PCI)     no parameter
-   28   Sedlbauer Speed Fax+    ONLY WORKS AS A MODULE !
-   29	Siemens I-Surf 1.0      ONLY WORKS AS A MODULE !
-   30	ACER P10                ONLY WORKS AS A MODULE !
-   31	HST Saphir              pa=irq, pb=io
-   32	Telekom A4T             no parameter
-   33	Scitel Quadro		subcontroller (4*S0, subctrl 1...4)
-   34	Gazel ISDN cards (ISA)  pa=irq, pb=io
-   34	Gazel ISDN cards (PCI)  no parameter
-   35	HFC 2BDS0 PCI           no parameter
-   36	W6692 based PCI cards   none
-   37	HFC 2BDS0 S+,SP/PCMCIA  ONLY WORKS AS A MODULE !
-   38	NETspider U PCI card    none
-   39	HFC 2BDS0 SP/PCMCIA     ONLY WORKS AS A MODULE !
-   40   hotplug interface	ONLY WORKS AS A MODULE !
-   41   Formula-n enter:now PCI none
-
-Running the driver
-------------------
-
-When you insmod isdn.o and hisax.o (or with the in-kernel version, during
-boot time), a few lines should appear in your syslog. Look for something like:
-
-Apr 13 21:01:59 kke01 kernel: HiSax: Driver for Siemens chip set ISDN cards
-Apr 13 21:01:59 kke01 kernel: HiSax: Version 2.9
-Apr 13 21:01:59 kke01 kernel: HiSax: Revisions 1.14/1.9/1.10/1.25/1.8
-Apr 13 21:01:59 kke01 kernel: HiSax: Total 1 card defined
-Apr 13 21:01:59 kke01 kernel: HiSax: Card 1 Protocol EDSS1 Id=HiSax1 (0)
-Apr 13 21:01:59 kke01 kernel: HiSax: Elsa driver Rev. 1.13
-...
-Apr 13 21:01:59 kke01 kernel: Elsa: PCF-Pro found at 0x360 Rev.:C IRQ 10
-Apr 13 21:01:59 kke01 kernel: Elsa: timer OK; resetting card
-Apr 13 21:01:59 kke01 kernel: Elsa: HSCX version A: V2.1  B: V2.1
-Apr 13 21:01:59 kke01 kernel: Elsa: ISAC 2086/2186 V1.1
-...
-Apr 13 21:01:59 kke01 kernel: HiSax: DSS1 Rev. 1.14
-Apr 13 21:01:59 kke01 kernel: HiSax: 2 channels added
-
-This means that the card is ready for use.
-Cabling problems or line-downs are not detected, and only some ELSA cards can
-detect the S0 power.
-
-Remember that, according to the new strategy for accessing low-level drivers
-from within isdn4linux, you should also define a driver ID while doing
-insmod: Simply append hisax_id=<SomeString> to the insmod command line. This
-string MUST NOT start with a digit or a small 'x'!
-
-At this point you can run a 'cat /dev/isdnctrl0' and view debugging messages.
-
-At the moment, debugging messages are enabled with the hisaxctrl tool:
-
-    hisaxctrl <DriverId> DebugCmd <debugging_flags>
-
-<DriverId> default is HiSax, if you didn't specify one.
-
-DebugCmd is  1  for generic debugging
-            11  for layer 1 development debugging
-            13  for layer 3 development debugging
-
-where <debugging_flags> is the integer sum of the following debugging
-options you wish enabled:
-
-With DebugCmd set to 1:
-
-   0x0001  Link-level <--> hardware-level communication
-   0x0002  Top state machine
-   0x0004  D-Channel Frames for isdnlog
-   0x0008  D-Channel Q.921
-   0x0010  B-Channel X.75
-   0x0020  D-Channel l2
-   0x0040  B-Channel l2
-   0x0080  D-Channel link state debugging
-   0x0100  B-Channel link state debugging
-   0x0200  TEI debug
-   0x0400  LOCK debug in callc.c
-   0x0800  More paranoid debug in callc.c (not for normal use)
-   0x1000  D-Channel l1 state debugging
-   0x2000  B-Channel l1 state debugging
-
-With DebugCmd set to 11:
-
-   0x0001  Warnings (default: on)
-   0x0002  IRQ status
-   0x0004  ISAC
-   0x0008  ISAC FIFO
-   0x0010  HSCX
-   0x0020  HSCX FIFO (attention: full B-Channel output!)
-   0x0040  D-Channel LAPD frame types
-   0x0080  IPAC debug
-   0x0100  HFC receive debug
-   0x0200  ISAC monitor debug
-   0x0400  D-Channel frames for isdnlog (set with 1 0x4 too)
-   0x0800  D-Channel message verbose
-
-With DebugCmd set to 13:
-
-         1  Warnings (default: on)
-         2  l3 protocol descriptor errors
-         4  l3 state machine
-         8  charge info debugging (1TR6)
-
-For example, 'hisaxctrl HiSax 1 0x3ff' enables full generic debugging.
-
-Because of some obscure problems with some switch equipment, the delay
-between the CONNECT message and sending the first data on the B-channel is now
-configurable with
-
-hisaxctrl <DriverId> 2 <delay>
-<delay> in ms Value between 50 and 800 ms is recommended.
-
-Downloading Firmware
---------------------
-At the moment, the Sedlbauer speed fax+ is the only card, which
-needs to download firmware.
-The firmware is downloaded with the hisaxctrl tool:
-
-    hisaxctrl <DriverId> 9 <firmware_filename>
-
-<DriverId> default is HiSax, if you didn't specify one,
-
-where <firmware_filename> is the filename of the firmware file.
-
-For example, 'hisaxctrl HiSax 9 ISAR.BIN' downloads the firmware for
-ISAR based cards (like the Sedlbauer speed fax+).
-
-Warning
--------
-HiSax is a work in progress and may crash your machine.
-For certification look at HiSax.cert file.
-
-Limitations
------------
-At this time, HiSax only works on Euro ISDN lines and German 1TR6 lines.
-For leased lines see appendix.
-
-Bugs
-----
-If you find any, please let me know.
-
-
-Thanks
-------
-Special thanks to:
-
-        Emil Stephan for the name HiSax which is a mix of HSCX and ISAC.
-
-        Fritz Elfert, Jan den Ouden, Michael Hipp, Michael Wein,
-        Andreas Kool, Pekka Sarnila, Sim Yskes, Johan Myrre'en,
-	Klaus-Peter Nischke (ITK AG), Christof Petig, Werner Fehn (ELSA GmbH),
-	Volker Schmidt
-	Edgar Toernig and Marcus Niemann for the Sedlbauer driver
-	Stephan von Krawczynski
-	Juergen Quade for the Leased Line part
-	Klaus Lichtenwalder (Klaus.Lichtenwalder@WebForum.DE), for ELSA PCMCIA support
-	Enrik Berkhan (enrik@starfleet.inka.de) for S0BOX specific stuff
-	Ton van Rosmalen for Teles PCI
-	Petr Novak <petr.novak@i.cz> for Winbond W6692 support
-	Werner Cornelius <werner@isdn4linux.de> for HFC-PCI, HFC-S(+/P) and supplementary services support
-        and more people who are hunting bugs. (If I forgot somebody, please
-	send me a mail).
-
-        Firma ELSA GmbH
-        Firma Eicon.Diehl GmbH
-        Firma Dynalink NL
-	Firma ASUSCOM NETWORK INC. Taiwan
-	Firma S.u.S.E
-	Firma ith Kommunikationstechnik GmbH
-	Firma Traverse Technologie Australia
-	Firma Medusa GmbH  (www.medusa.de).
-	Firma Quant-X Austria for sponsoring a DEC Alpha board+CPU
-	Firma Cologne Chip Designs GmbH
-
-        My girl friend and partner in life Ute for her patience with me.
-
-
-Enjoy,
-
-Karsten Keil
-keil@isdn4linux.de
-
-
-Appendix: Teles PCMCIA driver
------------------------------
-
-See
-   http://www.linux.no/teles_cs.txt 
-for instructions.
-
-Appendix: Linux and ISDN-leased lines
--------------------------------------
-
-Original from Juergen Quade, new version KKe.
-
-Attention NEW VERSION, the old leased line syntax won't work !!!
-
-You can use HiSax to connect your Linux-Box via an ISDN leased line
-to e.g. the Internet:
-
-1. Build a kernel which includes the HiSax driver either as a module
-   or as part of the kernel.
-     cd /usr/src/linux
-     make menuconfig
-     <ISDN subsystem - ISDN support -- HiSax>
-     make clean; make zImage; make modules; make modules_install
-2. Install the new kernel
-     cp /usr/src/linux/arch/x86/boot/zImage /etc/kernel/linux.isdn
-     vi /etc/lilo.conf
-     <add new kernel in the bootable image section>
-     lilo
-3. in case the hisax driver is a "fixed" part of the kernel, configure
-   the driver with lilo:
-     vi /etc/lilo.conf
-     <add HiSax driver parameter in the global section (see below)>
-     lilo
-   Your lilo.conf _might_ look like the following:
-
-	# LILO configuration-file
-	# global section
-    # teles 16.0 on IRQ=5, MEM=0xd8000, PORT=0xd80
-	append="hisax=1,3,5,0xd8000,0xd80,HiSax"
-    # teles 16.3 (non pnp) on IRQ=15, PORT=0xd80
-	# append="hisax=3,3,5,0xd8000,0xd80,HiSax"
-	boot=/dev/sda
-	compact        # faster, but won't work on all systems.
-	linear
-	read-only
-	prompt
-	timeout=100
-	vga = normal    # force sane state
-	# Linux bootable partition config begins
-	image = /etc/kernel/linux.isdn
-	root = /dev/sda1
-	label = linux.isdn
-	#
-	image = /etc/kernel/linux-2.0.30
-	root = /dev/sda1
-	label = linux.secure
-
-   In the line starting with "append" you have to adapt the parameters
-   according to your card (see above in this file)
-
-3. boot the new linux.isdn kernel
-4. start the ISDN subsystem:
-   a) load - if necessary - the modules (depends, whether you compiled
-      the ISDN driver as module or not)
-      According to the type of card you have to specify the necessary
-      driver parameter (irq, io, mem, type, protocol).
-      For the leased line the protocol is "3". See the table above for
-      the parameters, which you have to specify depending on your card.
-   b) configure i4l
-      /sbin/isdnctrl addif isdn0
-      # EAZ  1 -- B1 channel   2 --B2 channel
-      /sbin/isdnctrl eaz isdn0 1
-      /sbin/isdnctrl secure isdn0 on
-      /sbin/isdnctrl huptimeout isdn0 0
-      /sbin/isdnctrl l2_prot isdn0 hdlc
-      # Attention you must not set an outgoing number !!! This won't work !!!
-      # The incoming number is LEASED0 for the first card, LEASED1 for the
-      # second and so on.
-      /sbin/isdnctrl addphone isdn0 in LEASED0
-      # Here is no need to bind the channel.
-   c) in case the remote partner is a CISCO:
-      /sbin/isdnctrl encap isdn0 cisco-h
-   d) configure the interface
-      /sbin/ifconfig isdn0 ${LOCAL_IP} pointopoint ${REMOTE_IP}
-   e) set the routes
-      /sbin/route add -host ${REMOTE_IP} isdn0
-      /sbin/route add default gw ${REMOTE_IP}
-   f) switch the card into leased mode for each used B-channel
-      /sbin/hisaxctrl HiSax 5 1
-
-Remarks:
-a) Use state of the art isdn4k-utils
-
-Here an example script:
-#!/bin/sh
-# Start/Stop ISDN leased line connection
-
-I4L_AS_MODULE=yes
-I4L_REMOTE_IS_CISCO=no
-I4L_MODULE_PARAMS="type=16 io=0x268 irq=7 "
-I4L_DEBUG=no
-I4L_LEASED_128K=yes
-LOCAL_IP=192.168.1.1
-REMOTE_IP=192.168.2.1
-
-case "$1" in
-    start)
-	echo "Starting ISDN ..."
-        if [ ${I4L_AS_MODULE} = "yes" ]; then
-		echo "loading modules..."
-		/sbin/modprobe hisax ${I4L_MODULE_PARAMS}
-	fi
-	# configure interface
-	/sbin/isdnctrl addif isdn0
-	/sbin/isdnctrl secure isdn0 on
-	if [ ${I4L_DEBUG} = "yes" ]; then
-		/sbin/isdnctrl verbose 7
-		/sbin/hisaxctrl HiSax 1 0xffff
-		/sbin/hisaxctrl HiSax 11 0xff
-		cat  /dev/isdnctrl >/tmp/lea.log &
-	fi
-	if [ ${I4L_REMOTE_IS_CISCO} = "yes" ]; then
-		/sbin/isdnctrl encap isdn0 cisco-h
-	fi
-	/sbin/isdnctrl huptimeout isdn0 0
-	# B-CHANNEL 1
-	/sbin/isdnctrl eaz isdn0 1
-	/sbin/isdnctrl l2_prot isdn0 hdlc
-	# 1. card
-	/sbin/isdnctrl addphone isdn0 in LEASED0
-        if [ ${I4L_LEASED_128K} = "yes" ]; then
-		/sbin/isdnctrl addslave isdn0 isdn0s
-		/sbin/isdnctrl secure isdn0s on
-		/sbin/isdnctrl huptimeout isdn0s 0
-		# B-CHANNEL 2
-		/sbin/isdnctrl eaz isdn0s 2
-		/sbin/isdnctrl l2_prot isdn0s hdlc
-		# 1. card
-		/sbin/isdnctrl addphone isdn0s in LEASED0
-		if [ ${I4L_REMOTE_IS_CISCO} = "yes" ]; then
-			/sbin/isdnctrl encap isdn0s cisco-h
-		fi
-	fi
-	/sbin/isdnctrl dialmode isdn0 manual
-	# configure tcp/ip
-	/sbin/ifconfig isdn0 ${LOCAL_IP} pointopoint ${REMOTE_IP}
-	/sbin/route add -host ${REMOTE_IP} isdn0
-	/sbin/route add default gw ${REMOTE_IP}
-	# switch to leased mode
-	# B-CHANNEL 1
-	/sbin/hisaxctrl HiSax 5 1
-        if [ ${I4L_LEASED_128K} = "yes" ]; then
-		# B-CHANNEL 2
-		sleep 10; /* Wait for master */
-		/sbin/hisaxctrl HiSax 5 2
-	fi
-	;;
-    stop)
-	/sbin/ifconfig isdn0 down
-	/sbin/isdnctrl delif isdn0
-	if [ ${I4L_DEBUG} = "yes" ]; then
-		killall cat
-	fi
-	if [ ${I4L_AS_MODULE} = "yes" ]; then
-		/sbin/rmmod hisax
-		/sbin/rmmod isdn
-		/sbin/rmmod ppp
-		/sbin/rmmod slhc
-	fi
-	;;
-    *)
-	echo "Usage: $0 {start|stop}"
-	exit 1
-esac
-exit 0
-- 
cgit 


From 9c3c0c2048149d946d7f3ebdcbe70e2946750bfb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 18 Apr 2019 22:43:36 +0200
Subject: isdn: remove isdn4linux

With all isdn4linux hardware drivers gone, this is only a wrapper around
CAPI to support old user space. However, from looking at the mailing
list, it seems that the last time anyone asked about it was in 2014,
when the upgrade from a linux-2.4 installation failed, and mISDN was
suggested as a replacement.

The largest public ISDN network (Deutsche Telekom) was supposed to be
shut down 2018, which must have drastically reduced the number of legacy
installations.

When we last discussed removing i4l in 2016, Karsten Keil suggested
revisiting this in 2018. I guess this is overdue.

Link: http://listserv.isdn4linux.de/pipermail/isdn4linux/2014-October/006165.html
Link: https://patchwork.kernel.org/patch/8484861/#17900371
Link: https://listserv.isdn4linux.de/pipermail/isdn4linux/2019-April/thread.html
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 Documentation/isdn/INTERFACE        | 759 ------------------------------------
 Documentation/isdn/INTERFACE.fax    | 163 --------
 Documentation/isdn/README           | 599 ----------------------------
 Documentation/isdn/README.FAQ       |  26 --
 Documentation/isdn/README.audio     | 138 -------
 Documentation/isdn/README.concap    | 259 ------------
 Documentation/isdn/README.diversion | 127 ------
 Documentation/isdn/README.fax       |  45 ---
 Documentation/isdn/README.hfc-pci   |  41 --
 Documentation/isdn/README.syncppp   |  58 ---
 Documentation/isdn/README.x25       | 184 ---------
 Documentation/isdn/syncPPP.FAQ      | 224 -----------
 Documentation/process/changes.rst   |  16 +-
 13 files changed, 2 insertions(+), 2637 deletions(-)
 delete mode 100644 Documentation/isdn/INTERFACE
 delete mode 100644 Documentation/isdn/INTERFACE.fax
 delete mode 100644 Documentation/isdn/README
 delete mode 100644 Documentation/isdn/README.FAQ
 delete mode 100644 Documentation/isdn/README.audio
 delete mode 100644 Documentation/isdn/README.concap
 delete mode 100644 Documentation/isdn/README.diversion
 delete mode 100644 Documentation/isdn/README.fax
 delete mode 100644 Documentation/isdn/README.hfc-pci
 delete mode 100644 Documentation/isdn/README.syncppp
 delete mode 100644 Documentation/isdn/README.x25
 delete mode 100644 Documentation/isdn/syncPPP.FAQ

(limited to 'Documentation')

diff --git a/Documentation/isdn/INTERFACE b/Documentation/isdn/INTERFACE
deleted file mode 100644
index 5df17e5b25c8..000000000000
--- a/Documentation/isdn/INTERFACE
+++ /dev/null
@@ -1,759 +0,0 @@
-$Id: INTERFACE,v 1.15.8.2 2001/03/13 16:17:07 kai Exp $
-
-Description of the Interface between Linklevel and Hardwarelevel
-  of isdn4linux:
-
-
-  The Communication between Linklevel (LL) and Hardwarelevel (HL)
-  is based on the struct isdn_if (defined in isdnif.h).
-
-  An HL-driver can register itself at LL by calling the function
-  register_isdn() with a pointer to that struct. Prior to that, it has
-  to preset some of the fields of isdn_if. The LL sets the rest of
-  the fields. All further communication is done via callbacks using
-  the function-pointers defined in isdn_if.
-
-  Changes/Version numbering:
-
-  During development of the ISDN subsystem, several changes have been
-  made to the interface. Before it went into kernel, the package
-  had a unique version number. The last version, distributed separately
-  was 0.7.4. When the subsystem went into kernel, every functional unit
-  got a separate version number. These numbers are shown at initialization,
-  separated by slashes:
-
-     c.c/t.t/n.n/p.p/a.a/v.v
-
-  where
-
-   c.c is the revision of the common code.
-   t.t is the revision of the tty related code.
-   n.n is the revision of the network related code.
-   p.p is the revision of the ppp related code.
-   a.a is the revision of the audio related code.
-   v.v is the revision of the V.110 related code.
-
-  Changes in this document are marked with '***CHANGEx' where x representing
-  the version number. If that number starts with 0, it refers to the old,
-  separately distributed package. If it starts with one of the letters
-  above, it refers to the revision of the corresponding module. 
-  ***CHANGEIx refers to the revision number of the isdnif.h  
-
-1. Description of the fields of isdn_if:
-
-  int channels;
-
-    This field has to be set by the HL-driver to the number of channels
-    supported prior to calling register_isdn(). Upon return of the call,
-    the LL puts an id there, which has to be used by the HL-driver when
-    invoking the other callbacks.
-
-  int maxbufsize;
-
-    ***CHANGE0.6: New since this version.
-
-    Also to be preset by the HL-driver. With this value the HL-driver
-    tells the LL the maximum size of a data-packet it will accept. 
-
-  unsigned long features;
-
-    To be preset by the HL-driver. Using this field, the HL-driver
-    announces the features supported. At the moment this is limited to
-    report the supported layer2 and layer3-protocols. For setting this
-    field the constants ISDN_FEATURE..., declared in isdnif.h have to be
-    used.
-
-    ***CHANGE0.7.1: The line type (1TR6, EDSS1) has to be set.
-
-  unsigned short hl_hdrlen;
-
-    ***CHANGE0.7.4: New field.
-
-    To be preset by the HL-driver, if it supports sk_buff's. The driver
-    should put here the amount of additional space needed in sk_buff's for
-    its internal purposes. Drivers not supporting sk_buff's should 
-    initialize this field to 0.
-
-  void (*rcvcallb_skb)(int, int, struct sk_buff *)
-
-    ***CHANGE0.7.4: New field.
-
-    This field will be set by LL. The HL-driver delivers received data-
-    packets by calling this function. Upon calling, the HL-driver must
-    already have its private data pulled off the head of the sk_buff.
-
-    Parameter:
-      int              driver-Id
-      int              Channel-number locally to the driver. (starting with 0)
-      struct sk_buff * Pointer to sk_buff, containing received data.
-
-  int (*statcallb)(isdn_ctrl*);
-
-    This field will be set by LL. This function has to be called by the
-    HL-driver for signaling status-changes or other events to the LL.
-
-    Parameter:
-      isdn_ctrl*
-
-      The struct isdn_ctrl also defined in isdn_if. The exact meanings of its
-      fields are described together with the descriptions of the possible
-      events. Here is only a short description of the fields:
-
-        driver  = driver Id.
-        command = event-type. (one of the constants ISDN_STAT_...)
-        arg     = depends on event-type.
-        num     = depends on event-type.
-
-    Returnvalue:
-      0 on success, else -1
-
-  int (*command)(isdn_ctrl*);
-
-    This field has to be preset by the HL-driver. It points to a function,
-    to be called by LL to perform functions like dialing, B-channel
-    setup, etc. The exact meaning of the parameters is described with the
-    descriptions of the possible commands.
-
-    Parameter:
-      isdn_ctrl*
-        driver  = driver-Id
-        command = command to perform. (one of the constants ISDN_CMD_...)
-        arg     = depends on command.
-        num     = depends on command.
-    
-    Returnvalue:
-      >=0 on success, else error-code (-ENODEV etc.)
-
-  int (*writebuf_skb)(int, int, int, struct sk_buff *)
-
-    ***CHANGE0.7.4: New field.
-    ***CHANGEI.1.21: New field.
-
-    This field has to be preset by the HL-driver. The given function will
-    be called by the LL for delivering data to be send via B-Channel.
-
- 
-    Parameter:
-      int              driver-Id ***CHANGE0.7.4: New parameter.
-      int              channel-number locally to the HL-driver. (starts with 0)
-      int	       ack ***ChangeI1.21: New parameter
-		       If this is !0, the driver has to signal the delivery
-		       by sending an ISDN_STAT_BSENT. If this is 0, the driver
-		       MUST NOT send an ISDN_STAT_BSENT.
-      struct sk_buff * Pointer to sk_buff containing data to be send via
-                       B-channel.
-
-    Returnvalue:
-      Length of data accepted on success, else error-code (-EINVAL on
-      oversized packets etc.)
-
-  int (*writecmd)(u_char*, int, int, int, int);
-
-    This field has to be preset by the HL-driver. The given function will be
-    called to perform write-requests on /dev/isdnctrl (i.e. sending commands
-    to the card) The data-format is hardware-specific. This function is
-    intended for debugging only. It is not necessary for normal operation
-    and never will be called by the tty-emulation- or network-code. If
-    this function is not supported, the driver has to set NULL here.
-
-    Parameter:
-      u_char* pointer to data.
-      int     length of data.
-      int     flag: 0 = call from within kernel-space. (HL-driver must use
-                        memcpy, may NOT use schedule())
-                    1 = call from user-space. (HL-driver must use
-                        memcpy_fromfs, use of schedule() allowed)
-      int     driver-Id.
-      int     channel-number locally to the HL-driver. (starts with 0)
-
-***CHANGEI1.14: The driver-Id and channel-number are new since this revision.
-
-    Returnvalue:
-      Length of data accepted on success, else error-code (-EINVAL etc.)
-
-  int (*readstat)(u_char*, int, int, int, int);
-
-    This field has to be preset by the HL-driver. The given function will be
-    called to perform read-requests on /dev/isdnctrl (i.e. reading replies
-    from the card) The data-format is hardware-specific. This function is
-    intended for debugging only. It is not necessary for normal operation
-    and never will be called by the tty-emulation- or network-code. If
-    this function is not supported, the driver has to set NULL here.
-
-    Parameter:
-      u_char* pointer to data.
-      int     length of data.
-      int     flag: 0 = call from within kernel-space. (HL-driver must use
-                        memcpy, may NOT use schedule())
-                    1 = call from user-space. (HL-driver must use
-                        memcpy_fromfs, use of schedule() allowed)
-      int     driver-Id.
-      int     channel-number locally to the HL-driver. (starts with 0)
-
-***CHANGEI1.14: The driver-Id and channel-number are new since this revision.
-
-    Returnvalue:
-      Length of data on success, else error-code (-EINVAL etc.)
-
-  char id[20];
-       ***CHANGE0.7: New since this version.
-
-   This string has to be preset by the HL-driver. Its purpose is for
-   identification of the driver by the user. Eg.: it is shown in the
-   status-info of /dev/isdninfo. Furthermore it is used as Id for binding
-   net-interfaces to a specific channel. If a string of length zero is
-   given, upon return, isdn4linux will replace it by a generic name. (line0,
-   line1 etc.) It is recommended to make this string configurable during
-   module-load-time. (copy a global variable to this string.) For doing that,
-   modules 1.2.8 or newer are necessary.
-
-2. Description of the commands, a HL-driver has to support:
-
-   All commands will be performed by calling the function command() described
-   above from within the LL. The field command of the struct-parameter will
-   contain the desired command, the field driver is always set to the
-   appropriate driver-Id.
-
-   Until now, the following commands are defined:
-
-***CHANGEI1.34: The parameter "num" has been replaced by a union "parm" containing
-                the old "num" and a new setup_type struct used for ISDN_CMD_DIAL
-                and ISDN_STAT_ICALL callback.
-
-   ISDN_CMD_IOCTL:
-
-     This command is intended for performing ioctl-calls for configuring
-     hardware or similar purposes (setting port-addresses, loading firmware
-     etc.) For this purpose, in the LL all ioctl-calls with an argument
-     >= IIOCDRVCTL (0x100) will be handed transparently to this
-     function after subtracting 0x100 and placing the result in arg.
-     Example:
-       If a userlevel-program calls ioctl(0x101,...) the function gets
-       called with the field command set to 1.
-
-     Parameter:
-       driver   = driver-Id.
-       command  = ISDN_CMD_IOCTL
-       arg      = Original ioctl-cmd - IIOCDRVCTL
-       parm.num = first bytes filled with (unsigned long)arg
-   
-     Returnvalue:
-       Depending on driver.
-
-  
-  ISDN_CMD_DIAL:
-
-    This command is used to tell the HL-driver it should dial a given
-    number.
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_DIAL
-      arg         = channel-number locally to the driver. (starting with 0)
-      
-      parm.setup.phone  = An ASCII-String containing the number to dial.
-      parm.setup.eazmsn = An ASCII-Sting containing the own EAZ or MSN.
-      parm.setup.si1    = The Service-Indicator.
-      parm.setup.si2    = Additional Service-Indicator.
-
-                    If the Line has been designed as SPV (a special german
-                    feature, meaning semi-leased-line) the phone has to
-                    start with an "S".
-      ***CHANGE0.6: In previous versions the EAZ has been given in the
-                    highbyte of arg.
-    ***CHANGE0.7.1: New since this version: ServiceIndicator and AddInfo.
-
-  ISDN_CMD_ACCEPTD:
-
-    With this command, the HL-driver is told to accept a D-Channel-setup.
-    (Response to an incoming call)
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_ACCEPTD
-      arg         = channel-number locally to the driver. (starting with 0)
-      parm        = unused.
-
-  ISDN_CMD_ACCEPTB:
-
-    With this command, the HL-driver is told to perform a B-Channel-setup.
-    (after establishing D-Channel-Connection)
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_ACCEPTB
-      arg         = channel-number locally to the driver. (starting with 0)
-      parm        = unused.
-
-  ISDN_CMD_HANGUP:
-
-    With this command, the HL-driver is told to hangup (B-Channel if
-    established first, then D-Channel). This command is also used for
-    actively rejecting an incoming call.
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_HANGUP
-      arg         = channel-number locally to the driver. (starting with 0)
-      parm        = unused.
-
-  ISDN_CMD_CLREAZ:
-
-    With this command, the HL-driver is told not to signal incoming
-    calls to the LL.
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_CLREAZ
-      arg         = channel-number locally to the driver. (starting with 0)
-      parm        = unused.
-
-  ISDN_CMD_SETEAZ:
-
-    With this command, the HL-driver is told to signal incoming calls for
-    the given EAZs/MSNs to the LL.
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_SETEAZ
-      arg         = channel-number locally to the driver. (starting with 0)
-      parm.num    = ASCII-String, containing the desired EAZ's/MSN's
-                    (comma-separated). If an empty String is given, the
-                    HL-driver should respond to ALL incoming calls,
-                    regardless of the destination-address.
-      ***CHANGE0.6: New since this version the "empty-string"-feature.
-
-  ISDN_CMD_GETEAZ: (currently unused)
-
-    With this command, the HL-driver is told to report the current setting
-    given with ISDN_CMD_SETEAZ.
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_GETEAZ
-      arg         = channel-number locally to the driver. (starting with 0)
-      parm.num    = ASCII-String, containing the current EAZ's/MSN's
-
-  ISDN_CMD_SETSIL: (currently unused)
-
-    With this command, the HL-driver is told to signal only incoming
-    calls with the given Service-Indicators.
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_SETSIL
-      arg         = channel-number locally to the driver. (starting with 0)
-      parm.num    = ASCII-String, containing the desired Service-Indicators.
-
-  ISDN_CMD_GETSIL: (currently unused)
-
-    With this command, the HL-driver is told to return the current
-    Service-Indicators it will respond to.
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_SETSIL
-      arg         = channel-number locally to the driver. (starting with 0)
-      parm.num    = ASCII-String, containing the current Service-Indicators.
-
-  ISDN_CMD_SETL2:
-
-    With this command, the HL-driver is told to select the given Layer-2-
-    protocol. This command is issued by the LL prior to ISDN_CMD_DIAL or
-    ISDN_CMD_ACCEPTD.
-
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_SETL2
-      arg         = channel-number locally to the driver. (starting with 0)
-                    logical or'ed with (protocol-Id << 8)
-                    protocol-Id is one of the constants ISDN_PROTO_L2...
-      parm        = unused.
-
-  ISDN_CMD_GETL2: (currently unused)
-
-    With this command, the HL-driver is told to return the current
-    setting of the Layer-2-protocol.
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_GETL2
-      arg         = channel-number locally to the driver. (starting with 0)
-      parm        = unused.
-    Returnvalue:
-      current protocol-Id (one of the constants ISDN_L2_PROTO)
-
-  ISDN_CMD_SETL3:
-
-    With this command, the HL-driver is told to select the given Layer-3-
-    protocol. This command is issued by the LL prior to ISDN_CMD_DIAL or
-    ISDN_CMD_ACCEPTD.
-
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_SETL3
-      arg         = channel-number locally to the driver. (starting with 0)
-                    logical or'ed with (protocol-Id << 8)
-                    protocol-Id is one of the constants ISDN_PROTO_L3...
-      parm.fax    = Pointer to T30_s fax struct. (fax usage only)
-
-  ISDN_CMD_GETL2: (currently unused)
-
-    With this command, the HL-driver is told to return the current
-    setting of the Layer-3-protocol.
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_GETL3
-      arg         = channel-number locally to the driver. (starting with 0)
-      parm        = unused.
-    Returnvalue:
-      current protocol-Id (one of the constants ISDN_L3_PROTO)
-
-  ISDN_CMD_PROCEED: 
-
-    With this command, the HL-driver is told to proceed with a incoming call.
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_PROCEED
-      arg         = channel-number locally to the driver. (starting with 0)
-      setup.eazmsn= empty string or string send as uus1 in DSS1 with 
-                    PROCEED message
-
-  ISDN_CMD_ALERT: 
-
-    With this command, the HL-driver is told to alert a proceeding call.
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_ALERT
-      arg         = channel-number locally to the driver. (starting with 0)
-      setup.eazmsn= empty string or string send as uus1 in DSS1 with 
-                    ALERT message
-
-  ISDN_CMD_REDIR: 
-
-    With this command, the HL-driver is told to redirect a call in proceeding
-    or alerting state.  
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_REDIR
-      arg         = channel-number locally to the driver. (starting with 0)
-      setup.eazmsn= empty string or string send as uus1 in DSS1 protocol
-      setup.screen= screening indicator
-      setup.phone = redirected to party number
-
-  ISDN_CMD_PROT_IO:
-
-    With this call, the LL-driver invokes protocol specific features through
-    the LL.
-    The call is not implicitely bound to a connection.
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_CMD_PROT_IO
-      arg         = The lower 8 Bits define the addressed protocol as defined
-                    in ISDN_PTYPE..., the upper bits are used to differentiate
-                    the protocol specific CMD.  
-      
-      para        = protocol and function specific. See isdnif.h for detail.
-
-
-  ISDN_CMD_FAXCMD:
-
-    With this command the HL-driver receives a fax sub-command.
-    For details refer to INTERFACE.fax
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_CMD_FAXCMD
-      arg         = channel-number locally to the driver. (starting with 0)
-      parm        = unused.
-
-
-3. Description of the events to be signaled by the HL-driver to the LL.
-
-  All status-changes are signaled via calling the previously described
-  function statcallb(). The field command of the struct isdn_cmd has
-  to be set by the HL-driver with the appropriate Status-Id (event-number).
-  The field arg has to be set to the channel-number (locally to the driver,
-  starting with 0) to which this event applies. (Exception: STAVAIL-event)
-
-  Until now, the following Status-Ids are defined:
-
-  ISDN_STAT_AVAIL:
-
-    With this call, the HL-driver signals the availability of new data
-    for readstat(). Used only for debugging-purposes, see description
-    of readstat().
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_STAVAIL
-      arg         = length of available data.
-      parm        = unused.
-
-  ISDN_STAT_ICALL:
-  ISDN_STAT_ICALLW:
-
-    With this call, the HL-driver signals an incoming call to the LL.
-    If ICALLW is signalled the incoming call is a waiting call without
-    a available B-chan.
-
-    Parameter:
-      driver            = driver-Id
-      command           = ISDN_STAT_ICALL
-      arg               = channel-number, locally to the driver. (starting with 0)
-      para.setup.phone  = Callernumber.
-      para.setup.eazmsn = CalledNumber.
-      para.setup.si1    = Service Indicator.
-      para.setup.si2    = Additional Service Indicator.
-      para.setup.plan   = octet 3 from Calling party number Information Element.
-      para.setup.screen = octet 3a from Calling party number Information Element.
-
-    Return:
-      0           = No device matching this call.
-      1           = At least one device matching this call (RING on ttyI).
-                    HL-driver may send ALERTING on the D-channel in this case.
-      2           = Call will be rejected.
-      3           = Incoming called party number is currently incomplete.
-                    Additional digits are required. 
-                    Used for signalling with PtP connections.
-      4	          = Call will be held in a proceeding state 
-                    (HL driver sends PROCEEDING)
-                    Used when a user space prog needs time to interpret a call
-		    para.setup.eazmsn may be filled with an uus1 message of
-		    30 octets maximum. Empty string if no uus. 
-      5           = Call will be actively deflected to another party
-                    Only available in DSS1/EURO protocol
-		    para.setup.phone must be set to destination party number
-		    para.setup.eazmsn may be filled with an uus1 message of
-		    30 octets maximum. Empty string if no uus. 
-      -1          = An error happened. (Invalid parameters for example.)
-  The keypad support now is included in the dial command.	        
-
-
-  ISDN_STAT_RUN:
-
-    With this call, the HL-driver signals availability of the ISDN-card.
-    (after initializing, loading firmware)
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_RUN
-      arg         = unused.
-      parm        = unused.
-
-  ISDN_STAT_STOP:
-
-    With this call, the HL-driver signals unavailability of the ISDN-card.
-    (before unloading, while resetting/reconfiguring the card)
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_STOP
-      arg         = unused.
-      parm        = unused.
-
-  ISDN_STAT_DCONN:
-
-   With this call, the HL-driver signals the successful establishment of
-   a D-Channel-connection. (Response to ISDN_CMD_ACCEPTD or ISDN_CMD_DIAL)
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_DCONN
-      arg         = channel-number, locally to the driver. (starting with 0)
-      parm        = unused.
-
-  ISDN_STAT_BCONN:
-
-   With this call, the HL-driver signals the successful establishment of
-   a B-Channel-connection. (Response to ISDN_CMD_ACCEPTB or because the
-   remote-station has initiated establishment)
-
-   The HL driver should call this when the logical l2/l3 protocol 
-   connection on top of the physical B-channel is established.
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_BCONN
-      arg         = channel-number, locally to the driver. (starting with 0)
-      parm.num    = ASCII-String, containing type of connection (for analog
-		    modem only). This will be appended to the CONNECT message
-		    e.g. 14400/V.32bis
-
-  ISDN_STAT_DHUP:
-
-   With this call, the HL-driver signals the shutdown of a
-   D-Channel-connection. This could be a response to a prior ISDN_CMD_HANGUP,
-   or caused by a remote-hangup or if the remote-station has actively
-   rejected a call.
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_DHUP
-      arg         = channel-number, locally to the driver. (starting with 0)
-      parm        = unused.
-
-  ISDN_STAT_BHUP:
-
-   With this call, the HL-driver signals the shutdown of a
-   B-Channel-connection. This could be a response to a prior ISDN_CMD_HANGUP,
-   or caused by a remote-hangup.
-
-   The HL driver should call this as soon as the logical l2/l3 protocol 
-   connection on top of the physical B-channel is released.
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_BHUP
-      arg         = channel-number, locally to the driver. (starting with 0)
-      parm        = unused.
-
-  ISDN_STAT_CINF:
-
-   With this call, the HL-driver delivers charge-unit information to the
-   LL.
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_CINF
-      arg         = channel-number, locally to the driver. (starting with 0)
-      parm.num    = ASCII string containing charge-units (digits only).
-
-  ISDN_STAT_LOAD: (currently unused)
-
-  ISDN_STAT_UNLOAD:
-
-   With this call, the HL-driver signals that it will be unloaded now. This
-   tells the LL to release all corresponding data-structures.
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_UNLOAD
-      arg         = unused.
-      parm        = unused.
-
-  ISDN_STAT_BSENT:
-
-    With this call the HL-driver signals the delivery of a data-packet.
-    This callback is used by the network-interfaces only, tty-Emulation
-    does not need this call.
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_BSENT
-      arg         = channel-number, locally to the driver. (starting with 0)
-      parm.length = ***CHANGEI.1.21: New field.
-		    the driver has to set this to the original length
-		    of the skb at the time of receiving it from the linklevel.
-
-  ISDN_STAT_NODCH:
-
-    With this call, the driver has to respond to a prior ISDN_CMD_DIAL, if
-    no D-Channel is available.
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_NODCH
-      arg         = channel-number, locally to the driver. (starting with 0)
-      parm        = unused.
-
-  ISDN_STAT_ADDCH: 
-
-    This call is for HL-drivers, which are unable to check card-type
-    or numbers of supported channels before they have loaded any firmware
-    using ioctl. Those HL-driver simply set the channel-parameter to a
-    minimum channel-number when registering, and later if they know
-    the real amount, perform this call, allocating additional channels.
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_ADDCH
-      arg         = number of channels to be added.
-      parm        = unused.
-
-  ISDN_STAT_CAUSE:
-
-    With this call, the HL-driver delivers CAUSE-messages to the LL.
-    Currently the LL does not use this messages. Their contents is simply
-    logged via kernel-messages. Therefore, currently the format of the
-    messages is completely free. However they should be printable.
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_NODCH
-      arg         = channel-number, locally to the driver. (starting with 0)
-      parm.num    = ASCII string containing CAUSE-message.
-
-  ISDN_STAT_DISPLAY:
-
-    With this call, the HL-driver delivers DISPLAY-messages to the LL.
-    Currently the LL does not use this messages. 
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_DISPLAY
-      arg         = channel-number, locally to the driver. (starting with 0)
-      para.display= string containing DISPLAY-message.
-
-  ISDN_STAT_PROT:
-
-    With this call, the HL-driver delivers protocol specific infos to the LL.
-    The call is not implicitely bound to a connection.
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_PROT
-      arg         = The lower 8 Bits define the addressed protocol as defined
-                    in ISDN_PTYPE..., the upper bits are used to differentiate
-                    the protocol specific STAT.  
-      
-      para        = protocol and function specific. See isdnif.h for detail.
-
-  ISDN_STAT_DISCH:
-
-    With this call, the HL-driver signals the LL to disable or enable the
-    use of supplied channel and driver.
-    The call may be used to reduce the available number of B-channels after
-    loading the driver. The LL has to ignore a disabled channel when searching
-    for free channels. The HL driver itself never delivers STAT callbacks for
-    disabled channels. 	    
-    The LL returns a nonzero code if the operation was not successful or the
-    selected channel is actually regarded as busy.
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_DISCH
-      arg         = channel-number, locally to the driver. (starting with 0)
-      parm.num[0] = 0 if channel shall be disabled, else enabled.
-
-  ISDN_STAT_L1ERR:
-
-    ***CHANGEI1.21 new status message.
-    A signal can be sent to the linklevel if an Layer1-error results in
-    packet-loss on receive or send. The field errcode of the cmd.parm
-    union describes the error more precisely.
-
-    Parameter:
-      driver      = driver-Id
-      command     = ISDN_STAT_L1ERR
-      arg         = channel-number, locally to the driver. (starting with 0)
-      parm.errcode= ISDN_STAT_L1ERR_SEND:     Packet lost while sending.
-		    ISDN_STAT_L1ERR_RECV:     Packet lost while receiving.
-  ISDN_STAT_FAXIND:
-
-    With this call the HL-driver signals a fax sub-command to the LL.
-    For details refer to INTERFACE.fax
-
-    Parameter:
-      driver      = driver-Id.
-      command     = ISDN_STAT_FAXIND
-      arg         = channel-number, locally to the driver. (starting with 0)
-      parm        = unused.
-
diff --git a/Documentation/isdn/INTERFACE.fax b/Documentation/isdn/INTERFACE.fax
deleted file mode 100644
index 9c8c6d914ec7..000000000000
--- a/Documentation/isdn/INTERFACE.fax
+++ /dev/null
@@ -1,163 +0,0 @@
-$Id: INTERFACE.fax,v 1.2 2000/08/06 09:22:50 armin Exp $
-
-
-Description of the fax-subinterface between linklevel and hardwarelevel of 
-  isdn4linux. 
-
-  The communication between linklevel (LL) and hardwarelevel (HL) for fax
-  is based on the struct T30_s (defined in isdnif.h).
-  This struct is allocated in the LL.  
-  In order to use fax, the LL provides the pointer to this struct with the 
-  command ISDN_CMD_SETL3 (parm.fax). This pointer expires in case of hangup 
-  and when a new channel to a new connection is assigned. 
-
-
-Data handling:
-  In send-mode the HL-driver has to handle the <DLE> codes and the bit-order 
-  conversion by itself. 
-  In receive-mode the LL-driver takes care of the bit-order conversion
-  (specified by +FBOR)
-
-Structure T30_s description:
-
-  This structure stores the values (set by AT-commands), the remote-
-  capability-values and the command-codes between LL and HL.
-
-  If the HL-driver receives ISDN_CMD_FAXCMD, all needed information
-  is in this struct set by the LL.
-  To signal information to the LL, the HL-driver has to set the 
-  parameters and use ISDN_STAT_FAXIND.
-  (Please refer to INTERFACE)
-
-Structure T30_s:
-
-  All members are 8-bit unsigned (__u8)
-
-  -  resolution     
-  -  rate
-  -  width
-  -  length
-  -  compression
-  -  ecm
-  -  binary
-  -  scantime
-  -  id[]
-  Local faxmachine's parameters, set by +FDIS, +FDCS, +FLID, ...
-
-  -  r_resolution
-  -  r_rate
-  -  r_width
-  -  r_length
-  -  r_compression
-  -  r_ecm
-  -  r_binary
-  -  r_scantime
-  -  r_id[]
-  Remote faxmachine's parameters. To be set by HL-driver.
-
-  -  phase      
-  Defines the actual state of fax connection. Set by HL or LL
-  depending on progress and type of connection.
-  If the phase changes because of an AT command, the LL driver
-  changes this value. Otherwise the HL-driver takes care of it, but
-  only necessary on call establishment (from IDLE to PHASE_A).
-  (one of the constants ISDN_FAX_PHASE_[IDLE,A,B,C,D,E])
-
-  -  direction
-  Defines outgoing/send or incoming/receive connection.
-  (ISDN_TTY_FAX_CONN_[IN,OUT])
-
-  -  code
-  Commands from LL to HL; possible constants : 
-      ISDN_TTY_FAX_DR        signals +FDR command to HL
-
-      ISDN_TTY_FAX_DT        signals +FDT command to HL 
-
-      ISDN_TTY_FAX_ET        signals +FET command to HL
-
-
-  Other than that the "code" is set with the hangup-code value at
-  the end of connection for the +FHNG message.
-        
-  -  r_code 
-  Commands from HL to LL; possible constants :
-      ISDN_TTY_FAX_CFR       output of +FCFR message. 
-
-      ISDN_TTY_FAX_RID       output of remote ID set in r_id[]
-                             (+FCSI/+FTSI on send/receive)
-
-      ISDN_TTY_FAX_DCS       output of +FDCS and CONNECT message,
-                             switching to phase C.
-
-      ISDN_TTY_FAX_ET        signals end of data,
-                             switching to phase D.
-
-      ISDN_TTY_FAX_FCON      signals the established, outgoing connection,
-                             switching to phase B.
-
-      ISDN_TTY_FAX_FCON_I    signals the established, incoming connection,
-                             switching to phase B.
-
-      ISDN_TTY_FAX_DIS       output of +FDIS message and values.
-
-      ISDN_TTY_FAX_SENT      signals that all data has been sent 
-                             and <DLE><ETX> is acknowledged,
-                             OK message will be sent.
-
-      ISDN_TTY_FAX_PTS       signals a msg-confirmation (page sent successful),
-                             depending on fet value:
-                             0: output OK message (more pages follow)
-                             1: switching to phase B (next document)
-
-      ISDN_TTY_FAX_TRAIN_OK  output of +FDCS and OK message (for receive mode).
-
-      ISDN_TTY_FAX_EOP       signals end of data in receive mode,
-                             switching to phase D.
-
-      ISDN_TTY_FAX_HNG       output of the +FHNG and value set by code and
-                             OK message, switching to phase E.
-
-
-  -  badlin
-  Value of +FBADLIN  
-
-  -  badmul
-  Value of +FBADMUL
-
-  -  bor
-  Value of +FBOR
-
-  -  fet
-  Value of +FET command in send-mode.
-  Set by HL in receive-mode for +FET message.
-
-  -  pollid[]  
-  ID-string, set by +FCIG
-
-  -  cq
-  Value of +FCQ
-
-  -  cr
-  Value of +FCR
-
-  -  ctcrty
-  Value of +FCTCRTY
-
-  -  minsp
-  Value of +FMINSP
-
-  -  phcto
-  Value of +FPHCTO
-
-  -  rel
-  Value of +FREL
-
-  -  nbc
-  Value of +FNBC (0,1)
-  (+FNBC is not a known class 2 fax command, I added this to change the
-   automatic "best capabilities" connection in the eicon HL-driver)
-
-  
-Armin
-mac@melware.de
-
diff --git a/Documentation/isdn/README b/Documentation/isdn/README
deleted file mode 100644
index 74bd2bdb455b..000000000000
--- a/Documentation/isdn/README
+++ /dev/null
@@ -1,599 +0,0 @@
-README for the ISDN-subsystem
-
-1. Preface
-
-  1.1 Introduction
-
-  This README describes how to set up and how to use the different parts
-  of the ISDN-subsystem.
-
-  For using the ISDN-subsystem, some additional userlevel programs are
-  necessary. Those programs and some contributed utilities are available
-  at
-
-   ftp.isdn4linux.de
-
-   /pub/isdn4linux/isdn4k-utils-<VersionNumber>.tar.gz
-
-
-  We also have set up a mailing-list:
-
-   The isdn4linux-project originates in Germany, and therefore by historical
-   reasons, the mailing-list's primary language is german. However mails
-   written in english have been welcome all the time.
-
-   to subscribe: write a email to majordomo@listserv.isdn4linux.de,
-   Subject irrelevant, in the message body:
-   subscribe isdn4linux <your_email_address>
-
-   To write to the mailing-list, write to isdn4linux@listserv.isdn4linux.de
-
-   This mailinglist is bidirectionally gated to the newsgroup
-
-     de.alt.comm.isdn4linux
-
-  There is also a well maintained FAQ in English available at
-     https://www.mhessler.de/i4lfaq/
-  It can be viewed online, or downloaded in sgml/text/html format.
-  The FAQ can also be viewed online at
-     https://www.isdn4linux.de/faq/i4lfaq.html
-  or downloaded from
-     ftp://ftp.isdn4linux.de/pub/isdn4linux/FAQ/
-
-  1.1 Technical details
-
-  In the following Text, the terms MSN and EAZ are used.
-
-  MSN is the abbreviation for (M)ultiple(S)ubscriber(N)umber, and applies
-  to Euro(EDSS1)-type lines. Usually it is simply the phone number.
-
-  EAZ is the abbreviation of (E)ndgeraete(A)uswahl(Z)iffer and
-  applies to German 1TR6-type lines. This is a one-digit string,
-  simply appended to the base phone number
-
-  The internal handling is nearly identical, so replace the appropriate
-  term to that one, which applies to your local ISDN-environment.
-
-  When the link-level-module isdn.o is loaded, it supports up to 16
-  low-level-modules with up to 64 channels. (The number 64 is arbitrarily
-  chosen and can be configured at compile-time --ISDN_MAX in isdn.h).
-  A low-level-driver can register itself through an interface (which is
-  defined in isdnif.h) and gets assigned a slot.
-  The following char-devices are made available for each channel:
-
-  A raw-control-device with the following functions:
-     write: raw D-channel-messages (format: depends on driver).
-     read:  raw D-channel-messages (format: depends on driver).
-     ioctl: depends on driver, i.e. for the ICN-driver, the base-address of
-            the ports and the shared memory on the card can be set and read
-            also the boot-code and the protocol software can be loaded into
-            the card.
-
-   O N L Y !!!  for debugging (no locking against other devices):
-   One raw-data-device with the following functions:
-     write: data to B-channel.
-     read:  data from B-channel.
-
-   In addition the following devices are made available:
-
-   128 tty-devices (64 cuix and 64 ttyIx) with integrated modem-emulator:
-   The functionality is almost the same as that of a serial device
-   (the line-discs are handled by the kernel), which lets you run
-   SLIP, CSLIP and asynchronous PPP through the devices. We have tested
-   Seyon, minicom, CSLIP (uri-dip) PPP, mgetty, XCept and Hylafax. 
-
-   The modem-emulation supports the following:
-           1.3.1 Commands:
-
-               ATA      Answer incoming call.
-               ATD<No.> Dial, the number may contain:
-                        [0-9] and [,#.*WPT-S]
-                        the latter are ignored until 'S'.
-                        The 'S' must precede the number, if
-                        the line is a SPV (German 1TR6).
-               ATE0     Echo off.
-               ATE1     Echo on (default).
-               ATH      Hang-up.
-               ATH1     Off hook (ignored).
-               ATH0     Hang-up.
-               ATI      Return "ISDN for Linux...".
-               ATI0        "
-               ATI1        "
-               ATI2     Report of last connection.
-               ATO      On line (data mode).
-               ATQ0     Enable result codes (default).
-               ATQ1     Disable result codes (default).
-               ATSx=y   Set register x to y.
-               ATSx?    Show contents of register x.
-               ATV0     Numeric responses.
-               ATV1     English responses (default).
-               ATZ      Load registers and EAZ/MSN from Profile.
-               AT&Bx    Set Send-Packet-size to x (max. 4000)
-                        The real packet-size may be limited by the
-                        low-level-driver used. e.g. the HiSax-Module-
-                        limit is 2000. You will get NO Error-Message,
-                        if you set it to higher values, because at the
-                        time of giving this command the corresponding
-                        driver may not be selected (see "Automatic
-                        Assignment") however the size of outgoing packets
-                        will be limited correctly.
-               AT&D0    Ignore DTR
-               AT&D2    DTR-low-edge: Hang up and return to
-                        command mode (default).
-               AT&D3    Same as AT&D2 but also resets all registers.
-               AT&Ex    Set the EAZ/MSN for this channel to x.
-               AT&F     Reset all registers and profile to "factory-defaults"
-               AT&Lx    Set list of phone numbers to listen on.  x is a
-                        list of wildcard patterns separated by semicolon.
-                        If this is set, it has precedence over the MSN set
-                        by AT&E.
-               AT&Rx    Select V.110 bitrate adaption.
-                        This command enables V.110 protocol with 9600 baud
-                        (x=9600), 19200 baud (x=19200) or 38400 baud
-                        (x=38400). A value of x=0 disables V.110 switching
-                        back to default X.75. This command sets the following
-                        Registers:
-                          Reg 14 (Layer-2 protocol):
-                            x = 0:     0
-                            x = 9600:  7
-                            x = 19200: 8
-                            x = 38400: 9
-                          Reg 18.2 = 1
-                          Reg 19 (Additional Service Indicator):
-                            x = 0:       0
-                            x = 9600:  197
-                            x = 19200: 199
-                            x = 38400: 198
-                          Note on value in Reg 19:
-                            There is _NO_ common convention for 38400 baud.
-                            The value 198 is chosen arbitrarily. Users
-                            _MUST_ negotiate this value before establishing
-                            a connection.
-               AT&Sx    Set window-size (x = 1..8) (not yet implemented)
-               AT&V     Show all settings.
-               AT&W0    Write registers and EAZ/MSN to profile. See also
-                        iprofd (5.c in this README).
-               AT&X0    BTX-mode and T.70-mode off (default)
-               AT&X1    BTX-mode on. (S13.1=1, S13.5=0 S14=0, S16=7, S18=7, S19=0)
-               AT&X2    T.70-mode on. (S13.1=1, S13.5=1, S14=0, S16=7, S18=7, S19=0)
-               AT+Rx    Resume a suspended call with CallID x (x = 1,2,3...)
-               AT+Sx    Suspend a call with CallID x (x = 1,2,3...)
-
-           For voice-mode commands refer to README.audio
-
-           1.3.2 Escape sequence:
-               During a connection, the emulation reacts just like
-               a normal modem to the escape sequence <DELAY>+++<DELAY>.
-               (The escape character - default '+' - can be set in the
-               register 2).
-               The DELAY must at least be 1.5 seconds long and delay
-               between the escape characters must not exceed 0.5 seconds.
-
-           1.3.3 Registers:
-
-              Nr.  Default  Description
-              0    0        Answer on ring number.
-                            (no auto-answer if S0=0).
-              1    0        Count of rings.
-              2    43       Escape character.
-                            (a value >= 128 disables the escape sequence).
-              3    13       Carriage return character (ASCII).
-              4    10       Line feed character (ASCII).
-              5    8        Backspace character (ASCII).
-              6    3        Delay in seconds before dialing.
-              7    60       Wait for carrier.
-              8    2        Pause time for comma (ignored)
-              9    6        Carrier detect time (ignored)
-             10    7        Carrier loss to disconnect time (ignored).
-             11    70       Touch tone timing (ignored).
-             12    69       Bit coded register:
-                            Bit 0:    0 = Suppress response messages.
-                                      1 = Show response messages.
-                            Bit 1:    0 = English response messages.
-                                      1 = Numeric response messages.
-                            Bit 2:    0 = Echo off.
-                                      1 = Echo on.
-                            Bit 3     0 = DCD always on.
-                                      1 = DCD follows carrier.
-                            Bit 4     0 = CTS follows RTS
-                                      1 = Ignore RTS, CTS always on.
-                            Bit 5     0 = return to command mode on DTR low.
-                                      1 = Same as 0 but also resets all
-                                          registers.
-                                      See also register 13, bit 2
-                            Bit 6     0 = DSR always on.
-                                      1 = DSR only on if channel is available.
-                            Bit 7     0 = Cisco-PPP-flag-hack off (default).
-                                      1 = Cisco-PPP-flag-hack on.
-             13   0         Bit coded register:
-                            Bit 0:    0 = Use delayed tty-send-algorithm
-                                      1 = Direct tty-send.
-                            Bit 1:    0 = T.70 protocol (Only for BTX!) off
-                                      1 = T.70 protocol (Only for BTX!) on
-                            Bit 2:    0 = Don't hangup on DTR low.
-                                      1 = Hangup on DTR low.
-                            Bit 3:    0 = Standard response messages
-                                      1 = Extended response messages
-                            Bit 4:    0 = CALLER NUMBER before every RING.
-                                      1 = CALLER NUMBER after first RING.
-                            Bit 5:    0 = T.70 extended protocol off
-                                      1 = T.70 extended protocol on
-                            Bit 6:    0 = Special RUNG Message off
-                                      1 = Special RUNG Message on
-                                          "RUNG" is delivered on a ttyI, if
-                                          an incoming call happened (RING) and
-                                          the remote party hung up before any
-                                          local ATA was given.
-			    Bit 7:    0 = Don't show display messages from net
-                                      1 = Show display messages from net
-				          (S12 Bit 1 must be 0 too)      
-             14   0         Layer-2 protocol:
-                                      0 = X75/LAPB with I-frames
-                                      1 = X75/LAPB with UI-frames
-                                      2 = X75/LAPB with BUI-frames
-                                      3 = HDLC
-                                      4 = Transparent (audio)
-                                      7 = V.110, 9600 baud
-                                      8 = V.110, 19200 baud
-                                      9 = V.110, 38400 baud
-                                     10 = Analog Modem (only if hardware supports this)
-                                     11 = Fax G3 (only if hardware supports this)
-             15   0         Layer-3 protocol:
-                                      0 = transparent
-                                      1 = transparent with audio features (e.g. DSP)
-                                      2 = Fax G3 Class 2 commands (S14 has to be set to 11)
-                                      3 = Fax G3 Class 1 commands (S14 has to be set to 11)
-             16   250       Send-Packet-size/16
-             17   8         Window-size (not yet implemented)
-             18   4         Bit coded register, Service-Octet-1 to accept,
-                            or to be used on dialout:
-                            Bit 0:    Service 1 (audio) when set.
-                            Bit 1:    Service 5 (BTX) when set.
-                            Bit 2:    Service 7 (data) when set.
-                            Note: It is possible to set more than one
-                                  bit. In this case, on incoming calls
-                                  the selected services are accepted,
-                                  and if the service is "audio", the
-                                  Layer-2-protocol is automatically
-                                  changed to 4 regardless of the setting
-                                  of register 14. On outgoing calls,
-                                  the most significant 1-bit is chosen to
-                                  select the outgoing service octet.
-             19   0         Service-Octet-2
-             20   0         Bit coded register (readonly)
-                            Service-Octet-1 of last call.
-                            Bit mapping is the same as register 18
-             21   0         Bit coded register (readonly)
-                            Set on incoming call (during RING) to
-                            octet 3 of calling party number IE (Numbering plan)
-                            See section 4.5.10 of ITU Q.931
-             22   0         Bit coded register (readonly)
-                            Set on incoming call (during RING) to
-                            octet 3a of calling party number IE (Screening info)
-                            See section 4.5.10 of ITU Q.931
-             23   0         Bit coded register:
-                            Bit 0:    0 = Add CPN to RING message off
-                                      1 = Add CPN to RING message on
-                            Bit 1:    0 = Add CPN to FCON message off
-                                      1 = Add CPN to FCON message on
-                            Bit 2:    0 = Add CDN to RING/FCON message off
-                                      1 = Add CDN to RING/FCON message on
-
-  Last but not least a (at the moment fairly primitive) device to request
-  the line-status (/dev/isdninfo) is made available.
-
-  Automatic assignment of devices to lines:
-
-  All inactive physical lines are listening to all EAZs for incoming
-  calls and are NOT assigned to a specific tty or network interface.
-  When an incoming call is detected, the driver looks first for a network
-  interface and then for an opened tty which:
-
-  1. is configured for the same EAZ.
-  2. has the same protocol settings for the B-channel.
-  3. (only for network interfaces if the security flag is set)
-     contains the caller number in its access list.
-  4. Either the channel is not bound exclusively to another Net-interface, or
-     it is bound AND the other checks apply to exactly this interface.
-     (For usage of the bind-features, refer to the isdnctrl-man-page)
-
-  Only when a matching interface or tty is found is the call accepted
-  and the "connection" between the low-level-layer and the link-level-layer
-  is established and kept until the end of the connection.
-  In all other cases no connection is established. Isdn4linux can be
-  configured to either do NOTHING in this case (which is useful, if
-  other, external devices with the same EAZ/MSN are connected to the bus)
-  or to reject the call actively. (isdnctrl busreject ...)
-
-  For an outgoing call, the inactive physical lines are searched.
-  The call is placed on the first physical line, which supports the
-  requested protocols for the B-channel. If a net-interface, however
-  is pre-bound to a channel, this channel is used directly.
-
-  This makes it possible to configure several network interfaces and ttys
-  for one EAZ, if the network interfaces are set to secure operation.
-  If an incoming call matches one network interface, it gets connected to it.
-  If another incoming call for the same EAZ arrives, which does not match
-  a network interface, the first tty gets a "RING" and so on.
-
-2 System prerequisites:
-
-  ATTENTION!
-
-  Always use the latest module utilities. The current version is
-  named in Documentation/Changes. Some old versions of insmod
-  are not capable of setting the driver-Ids correctly.
-
-3. Lowlevel-driver configuration.
-
-   Configuration depends on how the drivers are built. See the
-   README.<yourDriver> for information on driver-specific setup.
-
-4. Device-inodes
-
-   The major and minor numbers and their names are described in
-   Documentation/admin-guide/devices.rst. The major numbers are:
-
-     43 for the ISDN-tty's.
-     44 for the ISDN-callout-tty's.
-     45 for control/info/debug devices.
-
-5. Application
-
-   a) For some card-types, firmware has to be loaded into the cards, before
-      proceeding with device-independent setup. See README.<yourDriver>
-      for how to do that.
-
-   b) If you only intend to use ttys, you are nearly ready now.
-
-   c) If you want to have really permanent "Modem"-settings on disk, you
-      can start the daemon iprofd. Give it a path to a file at the command-
-      line. It will store the profile-settings in this file every time
-      an AT&W0 is performed on any ISDN-tty. If the file already exists,
-      all profiles are initialized from this file. If you want to unload
-      any of the modules, kill iprofd first.
-
-   d) For networking, continue: Create an interface:
-       isdnctrl addif isdn0
-
-   e) Set the EAZ (or MSN for Euro-ISDN):
-       isdnctrl eaz isdn0 2
-
-     (For 1TR6 a single digit is allowed, for Euro-ISDN the number is your
-      real MSN e.g.: Phone-Number)
-
-   f) Set the number for outgoing calls on the interface:
-       isdnctrl addphone isdn0 out 1234567
-       ... (this can be executed more than once, all assigned numbers are
-            tried in order)
-      and the number(s) for incoming calls:
-       isdnctrl addphone isdn0 in 1234567
-
-   g) Set the timeout for hang-up:
-       isdnctrl huptimeout isdn0 <timeout_in_seconds>
-
-   h) additionally you may activate charge-hang-up (= Hang up before
-      next charge-info, this only works, if your isdn-provider transmits
-      the charge-info during and after the connection):
-       isdnctrl chargehup isdn0 on
-
-   i) Set the dial mode of the interface:
-       isdnctrl dialmode isdn0 auto
-      "off" means that you (or the system) cannot make any connection
-        (neither incoming or outgoing connections are possible). Use
-        this if you want to be sure that no connections will be made.
-      "auto" means that the interface is in auto-dial mode, and will
-        attempt to make a connection whenever a network data packet needs
-        the interface's link. Note that this can cause unexpected dialouts,
-        and lead to a high phone bill! Some daemons or other pc's that use
-        this interface can cause this.
-        Incoming connections are also possible.
-      "manual" is a dial mode created to prevent the unexpected dialouts.
-        In this mode, the interface will never make any connections on its
-        own. You must explicitly initiate a connection with "isdnctrl dial
-        isdn0". However, after an idle time of no traffic as configured for
-	the huptimeout value with isdnctrl, the connection _will_ be ended.
-	If you don't want any automatic hangup, set the huptimeout value to 0.
-        "manual" is the default.
-
-   j) Setup the interface with ifconfig as usual, and set a route to it.
-
-   k) (optional) If you run X11 and have Tcl/Tk-wish version 4.0, you can use
-     the script tools/tcltk/isdnmon. You can add actions for line-status
-     changes. See the comments at the beginning of the script for how to
-     do that. There are other tty-based tools in the tools-subdirectory
-     contributed by Michael Knigge (imon), Volker Götz (imontty) and
-     Andreas Kool (isdnmon).
-
-   l) For initial testing, you can set the verbose-level to 2 (default: 0).
-      Then all incoming calls are logged, even if they are not addressed
-      to one of the configured net-interfaces:
-      isdnctrl verbose 2
-
-  Now you are ready! A ping to the set address should now result in an
-  automatic dial-out (look at syslog kernel-messages).
-  The phone numbers and EAZs can be assigned at any time with isdnctrl.
-  You can add as many interfaces as you like with addif following the
-  directions above. Of course, there may be some limitations. But we have
-  tested as many as 20 interfaces without any problem. However, if you
-  don't give an interface name to addif, the  kernel will assign a name
-  which starts with "eth". The number of "eth"-interfaces is limited by
-  the kernel.
-
-5. Additional options for isdnctrl:
-
-   "isdnctrl secure <InterfaceName> on"
-   Only incoming calls, for which the caller-id is listed in the access
-   list of the interface are accepted. You can add caller-id's With the
-   command "isdnctrl addphone <InterfaceName> in <caller-id>"
-   Euro-ISDN does not transmit the leading '0' of the caller-id for an
-   incoming call, therefore you should configure it accordingly.
-   If the real number for the dialout e.g. is "09311234567" the number
-   to configure here is "9311234567". The pattern-match function
-   works similar to the shell mechanism.
-
-     ?     one arbitrary digit
-     *     zero or arbitrary many digits
-     [123] one of the digits in the list
-     [1-5] one digit between '1' and '5'
-           a '^' as the first character in a list inverts the list
-
-
-   "isdnctrl secure <InterfaceName> off"
-   Switch off secure operation (default).
-
-   "isdnctrl ihup <InterfaceName> [on|off]"
-   Switch the hang-up-timer for incoming calls on or off.
-
-   "isdnctrl eaz <InterfaceName>"
-   Returns the EAZ of an interface.
-
-   "isdnctrl delphone <InterfaceName> in|out <number>"
-   Deletes a number from one of the access-lists of the interface.
-
-   "isdnctrl delif <InterfaceName>"
-   Removes the interface (and possible slaves) from the kernel.
-   (You have to unregister it with "ifconfig <InterfaceName> down" before).
-
-   "isdnctrl callback <InterfaceName> [on|off]"
-   Switches an interface to callback-mode. In this mode, an incoming call
-   will be rejected and after this the remote-station will be called. If
-   you test this feature by using ping, some routers will re-dial very
-   quickly, so that the callback from isdn4linux may not be recognized.
-   In this case use ping with the option -i <sec> to increase the interval
-   between echo-packets.
-
-   "isdnctrl cbdelay <InterfaceName> [seconds]"
-   Sets the delay (default 5 sec) between an incoming call and start of
-   dialing when callback is enabled.
-
-   "isdnctrl cbhup <InterfaceName> [on|off]"
-   This enables (default) or disables an active hangup (reject) when getting an
-   incoming call for an interface which is configured for callback.
-
-   "isdnctrl encap <InterfaceName> <EncapType>"
-   Selects the type of packet-encapsulation. The encapsulation can be changed
-   only while an interface is down.
-
-   At the moment the following values are supported:
-
-   rawip    (Default) Selects raw-IP-encapsulation. This means, MAC-headers
-            are stripped off.
-   ip       IP with type-field. Same as IP but the type-field of the MAC-header
-            is preserved.
-   x25iface X.25 interface encapsulation (first byte semantics as defined in
-            ../networking/x25-iface.txt). Use this for running the linux
-            X.25 network protocol stack (AF_X25 sockets) on top of isdn.
-   cisco-h  A special-mode for communicating with a Cisco, which is configured
-            to do "hdlc"
-   ethernet No stripping. Packets are sent with full MAC-header.
-            The Ethernet-address of the interface is faked, from its
-            IP-address: fc:fc:i1:i2:i3:i4, where i1-4 are the IP-addr.-values.
-   syncppp  Synchronous PPP
-
-   uihdlc   HDLC with UI-frame-header (for use with DOS ISPA, option -h1)
-
-
-   NOTE:    x25iface encapsulation is currently experimental. Please
-            read README.x25 for further details
-
-
-   Watching packets, using standard-tcpdump will fail for all encapsulations
-   except ethernet because tcpdump does not know how to handle packets
-   without MAC-header. A patch for tcpdump is included in the utility-package
-   mentioned above.
-
-   "isdnctrl l2_prot <InterfaceName> <L2-ProtocolName>"
-   Selects a layer-2-protocol.
-   (With the ICN-driver and the HiSax-driver, "x75i" and "hdlc" is available.
-   With other drivers, "x75ui", "x75bui", "x25dte", "x25dce" may be
-   possible too. See README.x25 for x25 related l2 protocols.)
-
-   isdnctrl l3_prot <InterfaceName> <L3-ProtocolName>
-   The same for layer-3. (At the moment only "trans" is allowed)
-
-   "isdnctrl list <InterfaceName>"
-   Shows all parameters of an interface and the charge-info.
-   Try "all" as the interface name.
-
-   "isdnctrl hangup <InterfaceName>"
-   Forces hangup of an interface.
-
-   "isdnctrl bind <InterfaceName> <DriverId>,<ChannelNumber> [exclusive]"
-   If you are using more than one ISDN card, it is sometimes necessary to
-   dial out using a specific card or even preserve a specific channel for
-   dialout of a specific net-interface. This can be done with the above
-   command. Replace <DriverId> by whatever you assigned while loading the
-   module. The <ChannelNumber> is counted from zero. The upper limit
-   depends on the card used. At the moment no card supports more than
-   2 channels, so the upper limit is one.
-
-   "isdnctrl unbind <InterfaceName>"
-   unbinds a previously bound interface.
-
-   "isdnctrl busreject <DriverId> on|off"
-   If switched on, isdn4linux replies a REJECT to incoming calls, it
-   cannot match to any configured interface.
-   If switched off, nothing happens in this case.
-   You normally should NOT enable this feature, if the ISDN adapter is not
-   the only device connected to the S0-bus. Otherwise it could happen that
-   isdn4linux rejects an incoming call, which belongs to another device on
-   the bus.
-
-   "isdnctrl addslave <InterfaceName> <SlaveName>
-   Creates a slave interface for channel-bundling. Slave interfaces are
-   not seen by the kernel, but their ISDN-part can be configured with
-   isdnctrl as usual. (Phone numbers, EAZ/MSN, timeouts etc.) If more
-   than two channels are to be bundled, feel free to create as many as you
-   want. InterfaceName must be a real interface, NOT a slave. Slave interfaces
-   start dialing, if the master interface resp. the previous slave interface
-   has a load of more than 7000 cps. They hangup if the load goes under 7000
-   cps, according to their "huptimeout"-parameter.
-
-   "isdnctrl sdelay <InterfaceName> secs."
-   This sets the minimum time an Interface has to be fully loaded, until
-   it sends a dial-request to its slave.
-
-   "isdnctrl dial <InterfaceName>"
-   Forces an interface to start dialing even if no packets are to be
-   transferred.
-
-   "isdnctrl mapping <DriverId> MSN0,MSN1,MSN2,...MSN9"
-   This installs a mapping table for EAZ<->MSN-mapping for a single line.
-   Missing MSN's have to be given as "-" or can be omitted, if at the end
-   of the commandline.
-   With this command, it's now possible to have an interface listening to
-   mixed 1TR6- and Euro-Type lines. In this case, the interface has to be
-   configured to a 1TR6-type EAZ (one digit). The mapping is also valid
-   for tty-emulation. Seen from the interface/tty-level the mapping
-   CAN be used, however it's possible to use single tty's/interfaces with
-   real MSN's (more digits) also, in which case the mapping will be ignored.
-   Here is an example:
-
-   You have a 1TR6-type line with base-nr. 1234567 and a Euro-line with
-   MSN's 987654, 987655 and 987656. The DriverId for the Euro-line is "EURO".
-
-   isdnctrl mapping EURO -,987654,987655,987656,-,987655
-   ...
-   isdnctrl eaz isdn0 1      # listen on 12345671(1tr6) and 987654(euro)
-   ...
-   isdnctrl eaz isdn1 4      # listen on 12345674(1tr6) only.
-   ...
-   isdnctrl eaz isdn2 987654 # listen on 987654(euro) only.
-
-   Same scheme is used with AT&E...  at the tty's.
-
-6. If you want to write a new low-level-driver, you are welcome.
-   The interface to the link-level-module is described in the file INTERFACE.
-   If the interface should be expanded for any reason, don't do it
-   on your own, send me a mail containing the proposed changes and
-   some reasoning about them.
-   If other drivers will not be affected, I will include the changes
-   in the next release.
-   For developers only, there is a second mailing-list. Write to me
-   (fritz@isdn4linux.de), if you want to join that list.
-
-Have fun!
-
- -Fritz
-
diff --git a/Documentation/isdn/README.FAQ b/Documentation/isdn/README.FAQ
deleted file mode 100644
index e5dd1addacdd..000000000000
--- a/Documentation/isdn/README.FAQ
+++ /dev/null
@@ -1,26 +0,0 @@
-
-The FAQ for isdn4linux
-======================
-
-Please note that there is a big FAQ available in the isdn4k-utils.
-You find it in:
- isdn4k-utils/FAQ/i4lfaq.sgml
-
-In case you just want to see the FAQ online, or download the newest version,
-you can have a look at my website:
-https://www.mhessler.de/i4lfaq/ (view + download)
-or:
-https://www.isdn4linux.de/faq/4lfaq.html (view)
-
-As the extension tells, the FAQ is in SGML format, and you can convert it
-into text/html/... format by using the sgml2txt/sgml2html/... tools.
-Alternatively, you can also do a 'configure; make all' in the FAQ directory.
-
-
-Please have a look at the FAQ before posting anything in the Mailinglist,
-or the newsgroup!
-
-
-Matthias Hessler
-hessler@isdn4linux.de
-
diff --git a/Documentation/isdn/README.audio b/Documentation/isdn/README.audio
deleted file mode 100644
index 8ebca19290d9..000000000000
--- a/Documentation/isdn/README.audio
+++ /dev/null
@@ -1,138 +0,0 @@
-$Id: README.audio,v 1.8 1999/07/11 17:17:29 armin Exp $
-
-ISDN subsystem for Linux.
-  Description of audio mode.
-
-When enabled during kernel configuration, the tty emulator of the ISDN
-subsystem is capable of a reduced set of commands to support audio.
-This document describes the commands supported and the format of
-audio data.
-
-Commands for enabling/disabling audio mode:
-
-        AT+FCLASS=8      Enable audio mode.
-                         This affects the following registers:
-                           S18: Bits 0 and 2 are set.
-                           S16: Set to 48 and any further change to
-                                larger values is blocked.
-        AT+FCLASS=0      Disable audio mode.
-                         Register 18 is set to 4.
-        AT+FCLASS=?      Show possible modes.
-        AT+FCLASS?       Report current mode (0 or 8).
-
-Commands supported in audio mode:
-
-All audio mode commands have one of the following forms:
-
-        AT+Vxx?          Show current setting.
-        AT+Vxx=?         Show possible settings.
-        AT+Vxx=v         Set simple parameter.
-        AT+Vxx=v,v ...   Set complex parameter.
-
-where xx is a two-character code and v are alphanumerical parameters.
-The following commands are supported:
-
-        AT+VNH=x         Auto hangup setting. NO EFFECT, supported
-                         for compatibility only.
-        AT+VNH?          Always reporting "1"
-        AT+VNH=?         Always reporting "1"
-
-        AT+VIP           Reset all audio parameters.
-
-        AT+VLS=x         Line select. x is one of the following:
-                           0 = No device.
-                           2 = Phone line.
-        AT+VLS=?         Always reporting "0,2"
-        AT+VLS?          Show current line.
-
-        AT+VRX           Start recording. Emulator responds with
-                         CONNECT and starts sending audio data to
-                         the application. See below for data format
-
-        AT+VSD=x,y       Set silence-detection parameters.
-                         Possible parameters:
-                           x = 0 ... 31  sensitivity threshold level.
-                                         (default 0 , deactivated)
-                           y = 0 ... 255 range of interval in units
-                                         of 0.1 second. (default 70)
-        AT+VSD=?         Report possible parameters.
-        AT+VSD?          Show current parameters.
-
-        AT+VDD=x,y       Set DTMF-detection parameters.
-                         Only possible if online and during this connection.
-                         Possible parameters:
-                           x = 0 ... 15  sensitivity threshold level.
-                                         (default 0 , I4L soft-decode)
-                                         (1-15 soft-decode off, hardware on)
-                           y = 0 ... 255 tone duration in units of 5ms.
-                                         Not for I4L soft decode (default 8, 40ms)
-        AT+VDD=?         Report possible parameters.
-        AT+VDD?          Show current parameters.
-
-        AT+VSM=x         Select audio data format.
-                         Possible parameters:
-                           2 = ADPCM-2
-                           3 = ADPCM-3
-                           4 = ADPCM-4
-                           5 = aLAW
-                           6 = uLAW
-        AT+VSM=?         Show possible audio formats.
-
-        AT+VTX           Start audio playback. Emulator responds
-                         with CONNECT and starts sending audio data
-                         received from the application via phone line.
-General behavior and description of data formats/protocol.
-    when a connection is made:
-
-      On incoming calls, if the application responds to a RING
-      with ATA, depending on the calling service, the emulator
-      responds with either CONNECT (data call) or VCON (voice call).
-      
-      On outgoing voice calls, the emulator responds with VCON
-      upon connection setup.
-
-  Audio recording.
-
-    When receiving audio data, a kind of bisync protocol is used.
-    Upon AT+VRX command, the emulator responds with CONNECT, and
-    starts sending audio data to the application. There are several
-    escape sequences defined, all using DLE (0x10) as Escape char:
-
-    <DLE><ETX>              End of audio data. (i.e. caused by a
-                            hangup of the remote side) Emulator stops
-                            recording, responding with VCON.
-    <DLE><DC4>		    Abort recording, (send by appl.) Emulator
-			    stops recording, sends DLE,ETX.
-    <DLE><DLE>              Escape sequence for DLE in data stream.
-    <DLE>0                  Touchtone "0" received.
-         ...
-    <DLE>9                  Touchtone "9" received.
-    <DLE>#                  Touchtone "#" received.
-    <DLE>*                  Touchtone "*" received.
-    <DLE>A                  Touchtone "A" received.
-    <DLE>B                  Touchtone "B" received.
-    <DLE>C                  Touchtone "C" received.
-    <DLE>D                  Touchtone "D" received.
-
-    <DLE>q                  quiet. Silence detected after non-silence.
-    <DLE>s                  silence. Silence detected from the
-                            start of recording.
-
-    Currently unsupported DLE sequences:
-
-    <DLE>c                  FAX calling tone received.
-    <DLE>b                  busy tone received.
-
-  Audio playback.
-
-    When sending audio data, upon AT+VTX command, emulator responds with
-    CONNECT, and starts transferring data from application to the phone line.
-    The same DLE sequences apply to this mode.
-
-  Full-Duplex-Audio:
-
-    When _both_ commands for recording and playback are given in _one_
-    AT-command-line (i.e.: "AT+VTX+VRX"), full-duplex-mode is selected.
-	In this mode, the only way to stop recording is sending <DLE><DC4>
-    and the only way to stop playback is to send <DLE><ETX>.
-
diff --git a/Documentation/isdn/README.concap b/Documentation/isdn/README.concap
deleted file mode 100644
index a76d74845a4c..000000000000
--- a/Documentation/isdn/README.concap
+++ /dev/null
@@ -1,259 +0,0 @@
-Description of the "concap" encapsulation protocol interface
-============================================================
-
-The "concap" interface is intended to be used by network device
-drivers that need to process an encapsulation protocol. 
-It is assumed that the protocol interacts with a linux network device by
-- data transmission
-- connection control (establish, release)
-Thus, the mnemonic: "CONnection CONtrolling eNCAPsulation Protocol".
-
-This is currently only used inside the isdn subsystem. But it might
-also be useful to other kinds of network devices. Thus, if you want
-to suggest changes that improve usability or performance of the
-interface, please let me know. I'm willing to include them in future
-releases (even if I needed to adapt the current isdn code to the
-changed interface).
-
-
-Why is this useful?
-===================
-
-The encapsulation protocol used on top of WAN connections or permanent
-point-to-point links are frequently chosen upon bilateral agreement.
-Thus, a device driver for a certain type of hardware must support
-several different encapsulation protocols at once.
-
-The isdn device driver did already support several different
-encapsulation protocols. The encapsulation protocol is configured by a
-user space utility (isdnctrl). The isdn network interface code then
-uses several case statements which select appropriate actions
-depending on the currently configured encapsulation protocol.
-
-In contrast, LAN network interfaces always used a single encapsulation
-protocol which is unique to the hardware type of the interface. The LAN
-encapsulation is usually done by just sticking a header on the data. Thus,
-traditional linux network device drivers used to process the
-encapsulation protocol directly (usually by just providing a hard_header()
-method in the device structure) using some hardware type specific support
-functions. This is simple, direct and efficient. But it doesn't fit all
-the requirements for complex WAN encapsulations. 
-
-
-   The configurability of the encapsulation protocol to be used
-   makes isdn network interfaces more flexible, but also much more
-   complex than traditional lan network interfaces.
-
-
-Many Encapsulation protocols used on top of WAN connections will not just
-stick a header on the data. They also might need to set up or release
-the WAN connection. They also might want to send other data for their
-private purpose over the wire, e.g. ppp does a lot of link level
-negotiation before the first piece of user data can be transmitted.
-Such encapsulation protocols for WAN devices are typically more complex
-than encapsulation protocols for lan devices. Thus, network interface
-code for typical WAN devices also tends to be more complex.
-
-
-In order to support Linux' x25 PLP implementation on top of
-isdn network interfaces I could have introduced yet another branch to
-the various case statements inside drivers/isdn/isdn_net.c.
-This eventually made isdn_net.c even more complex. In addition, it made
-isdn_net.c harder to maintain. Thus, by identifying an abstract
-interface between the network interface code and the encapsulation
-protocol, complexity could be reduced and maintainability could be
-increased.
-
-
-Likewise, a similar encapsulation protocol will frequently be needed by
-several different interfaces of even different hardware type, e.g. the
-synchronous ppp implementation used by the isdn driver and the
-asynchronous ppp implementation used by the ppp driver have a lot of
-similar code in them. By cleanly separating the encapsulation protocol
-from the hardware specific interface stuff such code could be shared
-better in future.
-
-
-When operating over dial-up-connections (e.g. telephone lines via modem,
-non-permanent virtual circuits of wide area networks, ISDN) many
-encapsulation protocols will need to control the connection. Therefore,
-some basic connection control primitives are supported. The type and
-semantics of the connection (i.e the ISO layer where connection service
-is provided) is outside our scope and might be different depending on
-the encapsulation protocol used, e.g. for a ppp module using our service
-on top of a modem connection a connect_request will result in dialing
-a (somewhere else configured) remote phone number. For an X25-interface
-module (LAPB semantics, as defined in Documentation/networking/x25-iface.txt)
-a connect_request will ask for establishing a reliable lapb
-datalink connection.
-
-
-The encapsulation protocol currently provides the following
-service primitives to the network device.
-
-- create a new encapsulation protocol instance
-- delete encapsulation protocol instance and free all its resources
-- initialize (open) the encapsulation protocol instance for use.
-- deactivate (close) an encapsulation protocol instance.
-- process (xmit) data handed down by upper protocol layer
-- receive data from lower (hardware) layer
-- process connect indication from lower (hardware) layer
-- process disconnect indication from lower (hardware) layer
-
-
-The network interface driver accesses those primitives via callbacks
-provided by the encapsulation protocol instance within a
-struct concap_proto_ops.
-
-struct concap_proto_ops{
-
-	/* create a new encapsulation protocol instance of same type */
-	struct concap_proto *  (*proto_new) (void);
-
-	/* delete encapsulation protocol instance and free all its resources.
-	   cprot may no longer be referenced after calling this */
-	void (*proto_del)(struct concap_proto *cprot);
-
-	/* initialize the protocol's data. To be called at interface startup
-	   or when the device driver resets the interface. All services of the
-	   encapsulation protocol may be used after this*/
-	int (*restart)(struct concap_proto *cprot, 
-		       struct net_device *ndev,
-		       struct concap_device_ops *dops);
-
-	/* deactivate an encapsulation protocol instance. The encapsulation
-	   protocol may not call any *dops methods after this. */
-	int (*close)(struct concap_proto *cprot);
-
-	/* process a frame handed down to us by upper layer */
-	int (*encap_and_xmit)(struct concap_proto *cprot, struct sk_buff *skb);
-
-	/* to be called for each data entity received from lower layer*/ 
-	int (*data_ind)(struct concap_proto *cprot, struct sk_buff *skb);
-
-	/* to be called when a connection was set up/down.
-	   Protocols that don't process these primitives might fill in
-	   dummy methods here */
-	int (*connect_ind)(struct concap_proto *cprot);
-	int (*disconn_ind)(struct concap_proto *cprot);
-};
-
-
-The data structures are defined in the header file include/linux/concap.h.
-
-
-A Network interface using encapsulation protocols must also provide
-some service primitives to the encapsulation protocol:
-
-- request data being submitted by lower layer (device hardware) 
-- request a connection being set up by lower layer 
-- request a connection being released by lower layer
-
-The encapsulation protocol accesses those primitives via callbacks
-provided by the network interface within a struct concap_device_ops.
-
-struct concap_device_ops{
-
-	/* to request data be submitted by device */ 
-	int (*data_req)(struct concap_proto *, struct sk_buff *);
-
-	/* Control methods must be set to NULL by devices which do not
-	   support connection control. */
-	/* to request a connection be set up */ 
-	int (*connect_req)(struct concap_proto *);
-
-	/* to request a connection be released */
-	int (*disconn_req)(struct concap_proto *);	
-};
-
-The network interface does not explicitly provide a receive service
-because the encapsulation protocol directly calls netif_rx(). 
-
-
-
-
-An encapsulation protocol itself is actually the
-struct concap_proto{
-	struct net_device *net_dev;		/* net device using our service  */
-	struct concap_device_ops *dops; /* callbacks provided by device */
- 	struct concap_proto_ops  *pops; /* callbacks provided by us */
-	int flags;
-	void *proto_data;               /* protocol specific private data, to
-					   be accessed via *pops methods only*/
-	/*
-	  :
-	  whatever 
-	  :
-	  */
-};
-
-Most of this is filled in when the device requests the protocol to 
-be reset (opend). The network interface must provide the net_dev and
-dops pointers. Other concap_proto members should be considered private
-data that are only accessed by the pops callback functions. Likewise,
-a concap proto should access the network device's private data
-only by means of the callbacks referred to by the dops pointer.
-
-
-A possible extended device structure which uses the connection controlling
-encapsulation services could look like this:
-
-struct concap_device{
-	struct net_device net_dev;
-	struct my_priv  /* device->local stuff */
-			/* the my_priv struct might contain a 
-			   struct concap_device_ops *dops;
-	                   to provide the device specific callbacks
-			*/
-	struct concap_proto *cprot;        /* callbacks provided by protocol */
-};
-
-
-
-Misc Thoughts
-=============
-
-The concept of the concap proto might help to reuse protocol code and
-reduce the complexity of certain network interface implementations.
-The trade off is that it introduces yet another procedure call layer
-when processing the protocol. This has of course some impact on
-performance. However, typically the concap interface will be used by
-devices attached to slow lines (like telephone, isdn, leased synchronous
-lines). For such slow lines, the overhead is probably negligible.
-This might no longer hold for certain high speed WAN links (like
-ATM).
-
-
-If general linux network interfaces explicitly supported concap
-protocols (e.g. by a member struct concap_proto* in struct net_device)
-then the interface of the service function could be changed
-by passing a pointer of type (struct net_device*) instead of
-type (struct concap_proto*). Doing so would make many of the service
-functions compatible to network device support functions.
-
-e.g. instead of the concap protocol's service function
-
-  int (*encap_and_xmit)(struct concap_proto *cprot, struct sk_buff *skb);
-
-we could have
-
-  int (*encap_and_xmit)(struct net_device *ndev, struct sk_buff *skb);
-
-As this is compatible to the dev->hard_start_xmit() method, the device
-driver could directly register the concap protocol's encap_and_xmit()
-function as its hard_start_xmit() method. This would eliminate one
-procedure call layer.
-
-
-The device's data request function could also be defined as
- 
-  int (*data_req)(struct net_device *ndev, struct sk_buff *skb);
-
-This might even allow for some protocol stacking. And the network
-interface might even register the same data_req() function directly
-as its hard_start_xmit() method when a zero layer encapsulation
-protocol is configured. Thus, eliminating the performance penalty
-of the concap interface when a trivial concap protocol is used.
-Nevertheless, the device remains able to support encapsulation
-protocol configuration.
-
diff --git a/Documentation/isdn/README.diversion b/Documentation/isdn/README.diversion
deleted file mode 100644
index bddcd5fb86ff..000000000000
--- a/Documentation/isdn/README.diversion
+++ /dev/null
@@ -1,127 +0,0 @@
-The isdn diversion services are a supporting module working together with
-the isdn4linux and the HiSax module for passive cards. 
-Active cards, TAs and cards using a own or other driver than the HiSax 
-module need to be adapted to the HL<->LL interface described in a separate 
-document. The diversion services may be used with all cards supported by 
-the HiSax driver.
-The diversion kernel interface and controlling tool divertctrl were written
-by Werner Cornelius (werner@isdn4linux.de or werner@titro.de) under the
-GNU General Public License.
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-Table of contents
-=================
-
-1. Features of the i4l diversion services 
-   (Or what can the i4l diversion services do for me)
-
-2. Required hard- and software
-
-3. Compiling, installing and loading/unloading the module  
-   Tracing calling and diversion information 
-
-4. Tracing calling and diversion information
- 
-5. Format of the divert device ASCII output
- 
-
-1. Features of the i4l diversion services 
-   (Or what can the i4l diversion services do for me)
-
-   The i4l diversion services offers call forwarding and logging normally 
-   only supported by isdn phones. Incoming calls may be diverted 
-   unconditionally (CFU), when not reachable (CFNR) or on busy condition 
-   (CFB). 
-   The diversions may be invoked statically in the providers exchange
-   as normally done by isdn phones. In this case all incoming calls
-   with a special (or all) service identifiers are forwarded if the 
-   forwarding reason is met. Activated static services may also be 
-   interrogated (queried).
-   The i4l diversion services additionally offers a dynamic version of
-   call forwarding which is not preprogrammed inside the providers exchange
-   but dynamically activated by i4l.
-   In this case all incoming calls are checked by rules that may be
-   compared to the mechanism of ipfwadm or ipchains. If a given rule matches
-   the checking process is finished and the rule matching will be applied
-   to the call.
-   The rules include primary and secondary service identifiers, called 
-   number and subaddress, callers number and subaddress and whether the rule
-   matches to all filtered calls or only those when all B-channel resources
-   are exhausted.
-   Actions that may be invoked by a rule are ignore, proceed, reject, 
-   direct divert or delayed divert of a call.
-   All incoming calls matching a rule except the ignore rule a reported and
-   logged as ASCII via the proc filesystem (/proc/net/isdn/divert). If proceed
-   is selected the call will be held in a proceeding state (without ringing)
-   for a certain amount of time to let an external program or client decide
-   how to handle the call. 
-            
-
-2. Required hard- and software
-   
-   For using the i4l diversion services the isdn line must be of a EURO/DSS1
-   type. Additionally the i4l services only work together with the HiSax 
-   driver for passive isdn cards. All HiSax supported cards may be used for
-   the diversion purposes.
-   The static diversion services require the provider having static services
-   CFU, CFNR, CFB activated on an MSN-line. The static services may not be 
-   used on a point-to-point connection. Further the static services are only
-   available in some countries (for example germany). Countries requiring the 
-   keypad protocol for activating static diversions (like the netherlands) are
-   not supported but may use the tty devices for this purpose.
-   The dynamic diversion services may be used in all countries if the provider
-   enables the feature CF (call forwarding). This should work on both MSN- and
-   point-to-point lines.
-   To add and delete rules the additional divertctrl program is needed. This
-   program is part of the isdn4kutils package.   
-
-3. Compiling, installing and loading/unloading the module  
-   Tracing calling and diversion information 
-
-
-   To compile the i4l code with diversion support you need to say yes to the 
-   DSS1 diversion services when selecting the i4l options in the kernel 
-   config (menuconfig or config).
-   After having properly activated a make modules and make modules_install all
-   required modules will be correctly installed in the needed modules dirs.
-   As the diversion services are currently not included in the scripts of most
-   standard distributions you will have to add a "insmod dss1_divert" after
-   having loaded the global isdn module.
-   The module can be loaded without any command line parameters.
-   If the module is actually loaded and active may be checked with a 
-   "cat /proc/modules" or "ls /proc/net/isdn/divert". The divert file is 
-   dynamically created by the diversion module and removed when the module is
-   unloaded.
-
-
-4. Tracing calling and diversion information
- 
-   You also may put a "cat /proc/net/isdn/divert" in the background with the
-   output redirected to a file. Then all actions of the module are logged.
-   The divert file in the proc system may be opened more than once, so in 
-   conjunction with inetd and a small remote client on other machines inside
-   your network incoming calls and reactions by the module may be shown on 
-   every listening machine. 
-   If a call is reported as proceeding an external program or client may 
-   specify during a certain amount of time (normally 4 to 10 seconds) what
-   to do with that call.      
-   To unload the module all open files to the device in the proc system must
-   be closed. Otherwise the module (and isdn.o) may not be unloaded. 
-
-5. Format of the divert device ASCII output
- 
-   To be done later
-
diff --git a/Documentation/isdn/README.fax b/Documentation/isdn/README.fax
deleted file mode 100644
index 5314958a8a6e..000000000000
--- a/Documentation/isdn/README.fax
+++ /dev/null
@@ -1,45 +0,0 @@
-
-Fax with isdn4linux
-===================
-
-When enabled during kernel configuration, the tty emulator
-of the ISDN subsystem is capable of the Fax Class 2 commands.
-
-This only makes sense under the following conditions :
-
-- You need the commands as dummy, because you are using
-  hylafax (with patch) for AVM capi.
-- You want to use the fax capabilities of your isdn-card.
-  (supported cards are listed below)
-
-
-NOTE: This implementation does *not* support fax with passive
-      ISDN-cards (known as softfax). The low-level driver of
-      the ISDN-card and/or the card itself must support this.
-
-
-Supported ISDN-Cards
---------------------
-
-Eicon DIVA Server BRI/PCI
-	- full support with both B-channels.
-
-Eicon DIVA Server 4BRI/PCI
-	- full support with all B-channels.
-
-Eicon DIVA Server PRI/PCI
-	- full support on amount of B-channels
-		depending on DSPs on board.
-
-
-
-The command set is known as Class 2 (not Class 2.0) and
-can be activated by AT+FCLASS=2
-
-
-The interface between the link-level-module and the hardware-level driver
-is described in the files INTERFACE.fax and INTERFACE.
-
-Armin
-mac@melware.de
-
diff --git a/Documentation/isdn/README.hfc-pci b/Documentation/isdn/README.hfc-pci
deleted file mode 100644
index e8a4ef0226e8..000000000000
--- a/Documentation/isdn/README.hfc-pci
+++ /dev/null
@@ -1,41 +0,0 @@
-The driver for the HFC-PCI and HFC-PCI-A chips from CCD may be used
-for many OEM cards using this chips.
-Additionally the driver has a special feature which makes it possible
-to read the echo-channel of the isdn bus. So all frames in both directions
-may be logged.
-When the echo logging feature is used the number of available B-channels
-for a HFC-PCI card is reduced to 1. Of course this is only relevant to
-the card, not to the isdn line.
-To activate the echo mode the following ioctls must be entered:
-
-hisaxctrl <driver/cardname> 10 1
-
-This reduces the available channels to 1. There must not be open connections
-through this card when entering the command.
-And then:
-
-hisaxctrl <driver/cardname> 12 1
-
-This enables the echo mode. If Hex logging is activated the isdnctrlx 
-devices show a output with a line beginning of HEX: for the providers
-exchange and ECHO: for isdn devices sending to the provider.
-
-If more than one HFC-PCI cards are installed, a specific card may be selected
-at the hisax module load command line. Supply the load command with the desired
-IO-address of the desired card. 
-Example:
-There tree cards installed in your machine at IO-base addresses 0xd000, 0xd400 
-and 0xdc00
-If you want to use the card at 0xd400 standalone you should supply the insmod
-or depmod with type=35 io=0xd400.
-If you want to use all three cards, but the order needs to be at 0xdc00,0xd400,
-0xd000 you may give the parameters type=35,35,35 io=0xdc00,0xd400,0xd00 
-Then the desired card will be the initialised in the desired order.
-If the io parameter is used the io addresses of all used cards should be 
-supplied else the parameter is assumed 0 and a auto search for a free card is
-invoked which may not give the wanted result. 
-
-Comments and reports to werner@isdn4linux.de or werner@isdn-development.de
-
-
-
diff --git a/Documentation/isdn/README.syncppp b/Documentation/isdn/README.syncppp
deleted file mode 100644
index 27d260095cce..000000000000
--- a/Documentation/isdn/README.syncppp
+++ /dev/null
@@ -1,58 +0,0 @@
-Some additional information for setting up a syncPPP
-connection using network interfaces.
----------------------------------------------------------------
-
-You need one thing beside the isdn4linux package:
-
-  a patched pppd .. (I called it ipppd to show the difference)
-
-Compiling isdn4linux with sync PPP:
------------------------------------
-To compile isdn4linux with the sync PPP part, you have
-to answer the appropriate question when doing a "make config"
-Don't forget to load the slhc.o
-module before the isdn.o module, if VJ-compression support
-is not compiled into your kernel. (e.g if you have no PPP or
-CSLIP in the kernel)
-
-Using isdn4linux with sync PPP:
--------------------------------
-Sync PPP is just another encapsulation for isdn4linux. The
-name to enable sync PPP encapsulation is 'syncppp' .. e.g:
-
-  /sbin/isdnctrl encap ippp0 syncppp
-
-The name of the interface is here 'ippp0'. You need 
-one interface with the name 'ippp0' to saturate the
-ipppd, which checks the ppp version via this interface.
-Currently, all devices must have the name ipppX where
-'X' is a decimal value.
-
-To set up a PPP connection you need the ipppd .. You must start 
-the ipppd once after installing the modules. The ipppd 
-communicates with the isdn4linux link-level driver using the
-/dev/ippp0 to /dev/ippp15 devices. One ipppd can handle
-all devices at once. If you want to use two PPP connections
-at the same time, you have to connect the ipppd to two
-devices .. and so on. 
-I've implemented one additional option for the ipppd:
- 'useifip' will get (if set to not 0.0.0.0) the IP address 
- for the negotiation from the attached network-interface. 
-(also: ipppd will try to negotiate pointopoint IP as remote IP)
-You must disable BSD-compression, this implementation can't
-handle compressed packets.
-
-Check the etc/rc.isdn.syncppp in the isdn4kernel-util package
-for an example setup script.
-
-To use the MPPP stuff, you must configure a slave device
-with isdn4linux. Now call the ipppd with the '+mp' option.
-To increase the number of links, you must use the
-'addlink' option of the isdnctrl tool. (rc.isdn.syncppp.MPPP is
-an example script)
-
-enjoy it,
-    michael
-     
-
-
diff --git a/Documentation/isdn/README.x25 b/Documentation/isdn/README.x25
deleted file mode 100644
index e561a77c4e22..000000000000
--- a/Documentation/isdn/README.x25
+++ /dev/null
@@ -1,184 +0,0 @@
-  
-X.25 support within isdn4linux
-==============================
-
-This is alpha/beta test code. Use it completely at your own risk.
-As new versions appear, the stuff described here might suddenly change
-or become invalid without notice.
-
-Keep in mind:
-
-You are using several new parts of the 2.2.x kernel series which
-have not been tested in a large scale. Therefore, you might encounter
-more bugs as usual.
-
-- If you connect to an X.25 neighbour not operated by yourself, ASK the
-  other side first. Be prepared that bugs in the protocol implementation
-  might result in problems.
-
-- This implementation has never wiped out my whole hard disk yet. But as
-  this is experimental code, don't blame me if that happened to you.
-  Backing up important data will never harm.
-
-- Monitor your isdn connections while using this software. This should
-  prevent you from undesired phone bills in case of driver problems.
-  
- 
-
-
-How to configure the kernel
-===========================
- 
-The ITU-T (former CCITT) X.25 network protocol layer has been implemented
-in the Linux source tree since version 2.1.16. The isdn subsystem might be 
-useful to run X.25 on top of ISDN. If you want to try it, select
-
-   "CCITT X.25 Packet Layer"
-
-from the networking options as well as
-
-   "ISDN Support" and "X.25 PLP on Top of ISDN"
-
-from the ISDN subsystem options when you configure your kernel for
-compilation. You currently also need to enable
-"Prompt for development and/or incomplete code/drivers" from the
-"Code maturity level options" menu. For the x25trace utility to work
-you also need to enable "Packet socket".
-
-For local testing it is also recommended to enable the isdnloop driver
-from the isdn subsystem's configuration menu.
-
-For testing, it is recommended that all isdn drivers and the X.25 PLP
-protocol are compiled as loadable modules. Like this, you can recover
-from certain errors by simply unloading and reloading the modules.
-
-
-
-What's it for? How to use it?
-=============================
-
-X.25 on top of isdn might be useful with two different scenarios:
-
-- You might want to access a public X.25 data network from your Linux box.
-  You can use i4l if you were physically connected to the X.25 switch
-  by an ISDN B-channel (leased line as well as dial up connection should
-  work).
-
-  This corresponds to ITU-T recommendation X.31 Case A (circuit-mode
-  access to PSPDN [packet switched public data network]).
-
-  NOTE: X.31 also covers a Case B (access to PSPDN via virtual
-  circuit / packet mode service). The latter mode (which in theory
-  also allows using the D-channel) is not supported by isdn4linux.
-  It should however be possible to establish such packet mode connections
-  with certain active isdn cards provided that the firmware supports X.31
-  and the driver exports this functionality to the user. Currently, 
-  the AVM B1 driver is the only driver which does so. (It should be
-  possible to access D-channel X.31 with active AVM cards using the
-  CAPI interface of the AVM-B1 driver).
-
-- Or you might want to operate certain ISDN teleservices on your linux
-  box. A lot of those teleservices run on top of the ISO-8208
-  (DTE-DTE mode) network layer protocol. ISO-8208 is essentially the
-  same as ITU-T X.25.
-
-  Popular candidates of such teleservices are EUROfile transfer or any
-  teleservice applying ITU-T recommendation T.90.
-
-To use the X.25 protocol on top of isdn, just create an isdn network
-interface as usual, configure your own and/or peer's ISDN numbers,
-and choose x25iface encapsulation by
-
-   isdnctrl encap <iface-name> x25iface.
-
-Once encap is set like this, the device can be used by the X.25 packet layer.
-
-All the stuff needed for X.25 is implemented inside the isdn link
-level (mainly isdn_net.c and some new source files). Thus, it should
-work with every existing HL driver. I was able to successfully open X.25
-connections on top of the isdnloop driver and the hisax driver.
-"x25iface"-encapsulation bypasses demand dialing. Dialing will be
-initiated when the upper (X.25 packet) layer requests the lapb datalink to
-be established. But hangup timeout is still active. Whenever a hangup
-occurs, all existing X.25 connections on that link will be cleared
-It is recommended to use sufficiently large hangup-timeouts for the
-isdn interfaces.
-
-
-In order to set up a conforming protocol stack you also need to
-specify the proper l2_prot parameter:
-
-To operate in ISO-8208  X.25 DTE-DTE mode, use
-
-   isdnctrl l2_prot <iface-name> x75i
-
-To access an X.25 network switch via isdn (your linux box is the DTE), use
-
-   isdnctrl l2_prot <iface-name> x25dte
-
-To mimic an X.25 network switch (DCE side of the connection), use
-
-   isdnctrl l2_prot <iface-name> x25dce
-
-However, x25dte or x25dce is currently not supported by any real HL
-level driver. The main difference between x75i and x25dte/dce is that
-x25d[tc]e uses fixed lap_b addresses. With x75i, the side which
-initiates the isdn connection uses the DTE's lap_b address while the
-called side used the DCE's lap_b address. Thus, l2_prot x75i might
-probably work if you access a public X.25 network as long as the
-corresponding isdn connection is set up by you. At least one test
-was successful to connect via isdn4linux to an X.25 switch using this
-trick. At the switch side, a terminal adapter X.21 was used to connect
-it to the isdn.
-
-
-How to set up a test installation?
-==================================
-
-To test X.25 on top of isdn, you need to get
-
-- a recent version of the "isdnctrl" program that supports setting the new
-  X.25 specific parameters.
-
-- the x25-utils-2.X package from 
-  ftp://ftp.hes.iki.fi/pub/ham/linux/ax25/x25utils-*
-  (don't confuse the x25-utils with the ax25-utils)
-
-- an application program that uses linux PF_X25 sockets (some are
-  contained in the x25-util package).
-
-Before compiling the user level utilities make sure that the compiler/
-preprocessor will fetch the proper kernel header files of this kernel
-source tree. Either make /usr/include/linux a symbolic link pointing to 
-this kernel's include/linux directory or set the appropriate compiler flags.
-
-When all drivers and interfaces are loaded and configured you need to
-ifconfig the network interfaces up and add X.25-routes to them. Use
-the usual ifconfig tool.
-
-ifconfig <iface-name> up
-
-But a special x25route tool (distributed with the x25-util package)
-is needed to set up X.25 routes. I.e. 
-
-x25route add 01 <iface-name>
-
-will cause all x.25 connections to the destination X.25-address
-"01" to be routed to your created isdn network interface.
-
-There are currently no real X.25 applications available. However, for
-tests, the x25-utils package contains a modified version of telnet
-and telnetd that uses X.25 sockets instead of tcp/ip sockets. You can
-use those for your first tests. Furthermore, you might check
-ftp://ftp.hamburg.pop.de/pub/LOCAL/linux/i4l-eft/ which contains some
-alpha-test implementation ("eftp4linux") of the EUROfile transfer
-protocol.
-
-The scripts distributed with the eftp4linux test releases might also
-provide useful examples for setting up X.25 on top of isdn.
-
-The x25-utility package also contains an x25trace tool that can be
-used to monitor X.25 packets received by the network interfaces.
-The /proc/net/x25* files also contain useful information. 
-
-- Henner
diff --git a/Documentation/isdn/syncPPP.FAQ b/Documentation/isdn/syncPPP.FAQ
deleted file mode 100644
index 3257a4bc0786..000000000000
--- a/Documentation/isdn/syncPPP.FAQ
+++ /dev/null
@@ -1,224 +0,0 @@
-simple isdn4linux PPP FAQ .. to be continued .. not 'debugged' 
--------------------------------------------------------------------
-
-Q01: what's pppd, ipppd, syncPPP, asyncPPP ??
-Q02: error message "this system lacks PPP support"
-Q03: strange information using 'ifconfig'
-Q04: MPPP?? What's that and how can I use it ...
-Q05: I tried MPPP but it doesn't work 
-Q06: can I use asynchronous PPP encapsulation with network devices
-Q07: A SunISDN machine can't connect to my i4l system
-Q08: I wanna talk to several machines, which need different configs
-Q09: Starting the ipppd, I get only error messages from i4l
-Q10: I wanna use dynamic IP address assignment 
-Q11: I can't connect. How can I check where the problem is.
-Q12: How can I reduce login delay? 
-
--------------------------------------------------------------------
-
-Q01: pppd, ipppd, syncPPP, asyncPPP .. what is that ?
-   what should I use?
-A: The pppd is for asynchronous PPP .. asynchronous means
-   here, the framing is character based. (e.g when
-   using ttyI* or tty* devices)
-
-   The ipppd handles PPP packets coming in HDLC
-   frames (bit based protocol) ... The PPP driver
-   in isdn4linux pushes all IP packets direct
-   to the network layer and all PPP protocol
-   frames to the /dev/ippp* device. 
-   So, the ipppd is a simple external network
-   protocol handler.
-
-   If you login into a remote machine using the
-   /dev/ttyI* devices and then enable PPP on the
-   remote terminal server -> use the 'old' pppd
-
-   If your remote side immediately starts to send
-   frames ... you probably connect to a 
-   syncPPP machine .. use the network device part
-   of isdn4linux with the 'syncppp' encapsulation
-   and make sure, that the ipppd is running and 
-   connected to at least one /dev/ippp*. Check the 
-   isdn4linux manual on how to configure a network device.
-
---
-
-Q02: when I start the ipppd .. I only get the
-   error message "this system lacks PPP support"
-A: check that at least the device 'ippp0' exists.
-   (you can check this e.g with the program 'ifconfig')
-   The ipppd NEEDS this device under THIS name .. 
-   If this device doesn't exists, use:
-	isdnctrl addif ippp0
-	isdnctrl encap ippp0 syncppp
-	... (see isdn4linux doc for more) ...
-A: Maybe you have compiled the ipppd with another
-   kernel source tree than the kernel you currently
-   run ... 
-
---
-
-Q03: when I list the netdevices with ifconfig I see, that
-   my ISDN interface has a HWaddr and IRQ=0 and Base 
-   address = 0 
-A: The device is a fake ethernet device .. ignore IRQ and baseaddr
-   You need the HWaddr only for ethernet encapsulation.
-   
---
-
-Q04: MPPP?? What's that and how can I use it ...
-
-A: MPPP or MP or MPP (Warning: MP is also an 
-   acronym for 'Multi Processor') stands for
-   Multi Point to Point and means bundling
-   of several channels to one logical stream.
-   To enable MPPP negotiation you must call the
-   ipppd with the '+mp' option. 
-   You must also configure a slave device for
-   every additional channel. (see the i4l manual
-   for more)
-   To use channel bundling you must first activate
-   the 'master' or initial call. Now you can add 
-   the slave channels with the command:
-       isdnctrl addlink <device>
-   e.g:
-       isdnctrl addlink ippp0
-   This is different from other encapsulations of
-   isdn4linux! With syncPPP, there is no automatic
-   activation of slave devices.
-
---
-
-Q05: I tried MPPP but it doesn't work .. the ipppd
-   writes in the debug log something like:
-   .. rcvd [0][proto=0x3d] c0 00 00 00 80 fd 01 01 00 0a ...
-   .. sent [0][LCP ProtRej id=0x2 00 3d c0 00 00 00 80 fd 01 ...
-
-A: you forgot to compile MPPP/RFC1717 support into the
-   ISDN Subsystem. Recompile with this option enabled.
-
---
-
-Q06: can I use asynchronous PPP encapsulation
-   over the network interface of isdn4linux ..
-
-A: No .. that's not possible .. Use the standard
-   PPP package over the /dev/ttyI* devices. You
-   must not use the ipppd for this.
-   
---
-
-Q07: A SunISDN machine tries to connect my i4l system,
-   which doesn't work.
-   Checking the debug log I just saw garbage like:
-!![ ... fill in the line ... ]!!
-
-A: The Sun tries to talk asynchronous PPP ... i4l
-   can't understand this ... try to use the ttyI*
-   devices with the standard PPP/pppd package
-
-A: (from Alexanter Strauss: )
-!![ ... fill in mail ]!!
-
---
-
-Q08: I wanna talk to remote machines, which need
-   a different configuration. The only way
-   I found to do this is to kill the ipppd and
-   start a new one with another config to connect
-   to the second machine. 
-
-A: you must bind a network interface explicitly to
-   an ippp device, where you can connect a (for this
-   interface) individually configured ipppd.
-
---
-
-Q09: When I start the ipppd I only get error messages
-   from the i4l driver .. 
-
-A: When starting, the ipppd calls functions which may 
-   trigger a network packet. (e.g gethostbyname()).
-   Without the ipppd (at this moment, it is not
-   fully started) we can't handle this network request.
-   Try to configure hostnames necessary for the ipppd
-   in your local /etc/hosts file or in a way, that
-   your system can resolve it without using an
-   isdn/ippp network-interface.
-
---
-
-Q10: I wanna use dynamic IP address assignment ... How 
-   must I configure the network device.
-
-A: At least you must have a route which forwards
-   a packet to the ippp network-interface to trigger
-   the dial-on-demand.
-   A default route to the ippp-interface will work.
-   Now you must choose a dummy IP address for your
-   interface.
-   If for some reason you can't set the default
-   route to the ippp interface, you may take any 
-   address of the subnet from which you expect your
-   dynamic IP number and set a 'network route' for
-   this subnet to the ippp interface.
-   To allow overriding of the dummy address you
-   must call the ipppd with the 'ipcp-accept-local' option.
-
-A: You must know, how the ipppd gets the addresses it wanna
-   configure. If you don't give any option, the ipppd
-   tries to negotiate the local host address!
-   With the option 'noipdefault' it requests an address
-   from the remote machine. With 'useifip' it gets the
-   addresses from the net interface. Or you set the address
-   on the option line with the <a.b.c.d:e.f.g.h> option.
-   Note: the IP address of the remote machine must be configured
-   locally or the remote machine must send it in an IPCP request.
-   If your side doesn't know the IP address after negotiation, it
-   closes the connection!
-   You must allow overriding of address with the 'ipcp-accept-*'
-   options, if you have set your own or the remote address 
-   explicitly.
-
-A: Maybe you try these options .. e.g:   
-
-    /sbin/ipppd :$REMOTE noipdefault /dev/ippp0
-
-   where REMOTE must be the address of the remote machine (the
-   machine, which gives you your address)
-
---
-
-Q11: I can't connect. How can I check where the problem is.
-
-A: A good help log is the debug output from the ipppd...
-   Check whether you can find there:
-   - only a few LCP-conf-req SENT messages (less then 10)
-     and then a Term-REQ:
-     -> check whether your ISDN card is well configured
-        it seems, that your machine doesn't dial
-        (IRQ,IO,Proto, etc problems)
-        Configure your ISDN card to print debug messages and
-        check the /dev/isdnctrl output next time. There
-        you can see, whether there is activity on the card/line.
-   - there are at least a few RECV messages in the log:
-     -> fine: your card is dialing and your remote machine
-        tries to talk with you. Maybe only a missing 
-        authentication. Check your ipppd configuration again.
-   - the ipppd exits for some reason:
-     -> not good ... check /var/adm/syslog and /var/adm/daemon.
-        Could be a bug in the ipppd.
-
---
-
-Q12: How can I reduce login delay?
-
-A: Log a login session ('debug' log) and check which options 
-  your remote side rejects. Next time configure your ipppd
-  to not negotiate these options. Another 'side effect' is, that
-  this increases redundancy. (e.g your remote side is buggy and
-  rejects options in a wrong way).
-
-
-
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 18735dc460a0..111636ad1bad 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -23,8 +23,8 @@ running, the suggested command should tell you.
 
 Again, keep in mind that this list assumes you are already functionally
 running a Linux kernel.  Also, not all tools are necessary on all
-systems; obviously, if you don't have any ISDN hardware, for example,
-you probably needn't concern yourself with isdn4k-utils.
+systems; obviously, if you don't have any PC Card hardware, for example,
+you probably needn't concern yourself with pcmciautils.
 
 ====================== ===============  ========================================
         Program        Minimal version       Command to check the version
@@ -45,7 +45,6 @@ btrfs-progs            0.18             btrfsck
 pcmciautils            004              pccardctl -V
 quota-tools            3.09             quota -V
 PPP                    2.4.0            pppd --version
-isdn4k-utils           3.1pre1          isdnctrl 2>&1|grep version
 nfs-utils              1.0.5            showmount --version
 procps                 3.2.0            ps --version
 oprofile               0.9              oprofiled --version
@@ -279,12 +278,6 @@ which can be made by::
 
 as root.
 
-Isdn4k-utils
-------------
-
-Due to changes in the length of the phone number field, isdn4k-utils
-needs to be recompiled or (preferably) upgraded.
-
 NFS-utils
 ---------
 
@@ -448,11 +441,6 @@ PPP
 
 - <ftp://ftp.samba.org/pub/ppp/>
 
-Isdn4k-utils
-------------
-
-- <ftp://ftp.isdn4linux.de/pub/isdn4linux/utils/>
-
 NFS-utils
 ---------
 
-- 
cgit 


From 96ef07f79ae8644ab9c277e3f2f4fb04a54be43d Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 1 May 2019 17:14:05 -0700
Subject: dt-bindings: phy: Add binding for Qualcomm PCIe2 PHY

The Qualcomm PCIe2 PHY is a Synopsys based PCIe PHY found in a number of
Qualcomm platforms, add a binding to describe this.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 .../devicetree/bindings/phy/qcom-pcie2-phy.txt     | 42 ++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/phy/qcom-pcie2-phy.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/phy/qcom-pcie2-phy.txt b/Documentation/devicetree/bindings/phy/qcom-pcie2-phy.txt
new file mode 100644
index 000000000000..30064253f290
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/qcom-pcie2-phy.txt
@@ -0,0 +1,42 @@
+Qualcomm PCIe2 PHY controller
+=============================
+
+The Qualcomm PCIe2 PHY is a Synopsys based phy found in a number of Qualcomm
+platforms.
+
+Required properties:
+ - compatible: compatible list, should be:
+	       "qcom,qcs404-pcie2-phy", "qcom,pcie2-phy"
+
+ - reg: offset and length of the PHY register set.
+ - #phy-cells: must be 0.
+
+ - clocks: a clock-specifier pair for the "pipe" clock
+
+ - vdda-vp-supply: phandle to low voltage regulator
+ - vdda-vph-supply: phandle to high voltage regulator
+
+ - resets: reset-specifier pairs for the "phy" and "pipe" resets
+ - reset-names: list of resets, should contain:
+		"phy" and "pipe"
+
+ - clock-output-names: name of the outgoing clock signal from the PHY PLL
+ - #clock-cells: must be 0
+
+Example:
+ phy@7786000 {
+	compatible = "qcom,qcs404-pcie2-phy", "qcom,pcie2-phy";
+	reg = <0x07786000 0xb8>;
+
+	clocks = <&gcc GCC_PCIE_0_PIPE_CLK>;
+	resets = <&gcc GCC_PCIEPHY_0_PHY_BCR>,
+	         <&gcc GCC_PCIE_0_PIPE_ARES>;
+	reset-names = "phy", "pipe";
+
+	vdda-vp-supply = <&vreg_l3_1p05>;
+	vdda-vph-supply = <&vreg_l5_1p8>;
+
+	clock-output-names = "pcie_0_pipe_clk";
+	#clock-cells = <0>;
+	#phy-cells = <0>;
+ };
-- 
cgit 


From 6c01edd395a7cc7bb82333e953992eb0e76b1c35 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 31 May 2019 10:02:11 -0600
Subject: docs: look for sphinx-pre-install in the source tree

Recent makefile changes included an invocation of
./scripts/sphinx-pre-install.  Unfortunately, that fails when a separate
build directory is in use with:

  /bin/bash: ./scripts/sphinx-pre-install: No such file or directory

Use $(srctree) to fully specify the location of this script.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/Makefile b/Documentation/Makefile
index 85d3cfafd77c..2edd03b1dad6 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -23,7 +23,7 @@ ifeq ($(HAVE_SPHINX),0)
 .DEFAULT:
 	$(warning The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed and in PATH, or set the SPHINXBUILD make variable to point to the full path of the '$(SPHINXBUILD)' executable.)
 	@echo
-	@./scripts/sphinx-pre-install
+	@$(srctree)/scripts/sphinx-pre-install
 	@echo "  SKIP    Sphinx $@ target."
 
 else # HAVE_SPHINX
-- 
cgit 


From a5e112e6424adb77d953eac20e6936b952fd6b32 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 13 May 2019 12:37:17 -0700
Subject: cgroup: add cgroup_parse_float()

cgroup already uses floating point for percent[ile] numbers and there
are several controllers which want to take them as input.  Add a
generic parse helper to handle inputs.

Update the interface convention documentation about the use of
percentage numbers.  While at it, also clarify the default time unit.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/admin-guide/cgroup-v2.rst | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 88e746074252..73b0c0d8df31 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -696,6 +696,12 @@ Conventions
   informational files on the root cgroup which end up showing global
   information available elsewhere shouldn't exist.
 
+- The default time unit is microseconds.  If a different unit is ever
+  used, an explicit unit suffix must be present.
+
+- A parts-per quantity should use a percentage decimal with at least
+  two digit fractional part - e.g. 13.40.
+
 - If a controller implements weight based resource distribution, its
   interface file should be named "weight" and have the range [1,
   10000] with 100 as the default.  The values are chosen to allow
-- 
cgit 


From 0ff9848067b7b950a4ed70de7f5028600a2157e3 Mon Sep 17 00:00:00 2001
From: Ke Wu <mikewu@google.com>
Date: Thu, 30 May 2019 12:22:08 -0700
Subject: security/loadpin: Allow to exclude specific file types

Linux kernel already provide MODULE_SIG and KEXEC_VERIFY_SIG to
make sure loaded kernel module and kernel image are trusted. This
patch adds a kernel command line option "loadpin.exclude" which
allows to exclude specific file types from LoadPin. This is useful
when people want to use different mechanisms to verify module and
kernel image while still use LoadPin to protect the integrity of
other files kernel loads.

Signed-off-by: Ke Wu <mikewu@google.com>
Reviewed-by: James Morris <jamorris@linux.microsoft.com>
[kees: fix array size issue reported by Coverity via Colin Ian King]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/admin-guide/LSM/LoadPin.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/LSM/LoadPin.rst b/Documentation/admin-guide/LSM/LoadPin.rst
index 32070762d24c..716ad9b23c9a 100644
--- a/Documentation/admin-guide/LSM/LoadPin.rst
+++ b/Documentation/admin-guide/LSM/LoadPin.rst
@@ -19,3 +19,13 @@ block device backing the filesystem is not read-only, a sysctl is
 created to toggle pinning: ``/proc/sys/kernel/loadpin/enabled``. (Having
 a mutable filesystem means pinning is mutable too, but having the
 sysctl allows for easy testing on systems with a mutable filesystem.)
+
+It's also possible to exclude specific file types from LoadPin using kernel
+command line option "``loadpin.exclude``". By default, all files are
+included, but they can be excluded using kernel command line option such
+as "``loadpin.exclude=kernel-module,kexec-image``". This allows to use
+different mechanisms such as ``CONFIG_MODULE_SIG`` and
+``CONFIG_KEXEC_VERIFY_SIG`` to verify kernel module and kernel image while
+still use LoadPin to protect the integrity of other files kernel loads. The
+full list of valid file types can be found in ``kernel_read_file_str``
+defined in ``include/linux/fs.h``.
-- 
cgit 


From c231c22a989af95fae3e75cac9d4511e0fe79377 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Thu, 30 May 2019 21:23:18 +0100
Subject: bpf: doc: update answer for 32-bit subregister question

There has been quite a few progress around the two steps mentioned in the
answer to the following question:

  Q: BPF 32-bit subregister requirements

This patch updates the answer to reflect what has been done.

v2:
 - Add missing full stop. (Song Liu)
 - Minor tweak on one sentence. (Song Liu)

v1:
 - Integrated rephrase from Quentin and Jakub

Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/bpf_design_QA.rst | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst
index cb402c59eca5..12a246fcf6cb 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -172,11 +172,31 @@ registers which makes BPF inefficient virtual machine for 32-bit
 CPU architectures and 32-bit HW accelerators. Can true 32-bit registers
 be added to BPF in the future?
 
-A: NO. The first thing to improve performance on 32-bit archs is to teach
-LLVM to generate code that uses 32-bit subregisters. Then second step
-is to teach verifier to mark operations where zero-ing upper bits
-is unnecessary. Then JITs can take advantage of those markings and
-drastically reduce size of generated code and improve performance.
+A: NO.
+
+But some optimizations on zero-ing the upper 32 bits for BPF registers are
+available, and can be leveraged to improve the performance of JITed BPF
+programs for 32-bit architectures.
+
+Starting with version 7, LLVM is able to generate instructions that operate
+on 32-bit subregisters, provided the option -mattr=+alu32 is passed for
+compiling a program. Furthermore, the verifier can now mark the
+instructions for which zero-ing the upper bits of the destination register
+is required, and insert an explicit zero-extension (zext) instruction
+(a mov32 variant). This means that for architectures without zext hardware
+support, the JIT back-ends do not need to clear the upper bits for
+subregisters written by alu32 instructions or narrow loads. Instead, the
+back-ends simply need to support code generation for that mov32 variant,
+and to overwrite bpf_jit_needs_zext() to make it return "true" (in order to
+enable zext insertion in the verifier).
+
+Note that it is possible for a JIT back-end to have partial hardware
+support for zext. In that case, if verifier zext insertion is enabled,
+it could lead to the insertion of unnecessary zext instructions. Such
+instructions could be removed by creating a simple peephole inside the JIT
+back-end: if one instruction has hardware support for zext and if the next
+instruction is an explicit zext, then the latter can be skipped when doing
+the code generation.
 
 Q: Does BPF have a stable ABI?
 ------------------------------
-- 
cgit 


From 2585a584f8444e10a5a9e234c0c38b81dfe4370a Mon Sep 17 00:00:00 2001
From: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Date: Thu, 16 May 2019 17:23:11 +0530
Subject: pinctrl: Add Tegra194 pinctrl DT bindings

Add binding doc for Tegra 194 pinctrl driver.

Signed-off-by: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Tested-by: Vidya Sagar <vidyas@nvidia.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../bindings/pinctrl/nvidia,tegra194-pinmux.txt    | 107 +++++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pinctrl/nvidia,tegra194-pinmux.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra194-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra194-pinmux.txt
new file mode 100644
index 000000000000..8763f448c376
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra194-pinmux.txt
@@ -0,0 +1,107 @@
+NVIDIA Tegra194 pinmux controller
+
+Required properties:
+- compatible: "nvidia,tegra194-pinmux"
+- reg: Should contain a list of base address and size pairs for:
+  - first entry: The APB_MISC_GP_*_PADCTRL registers (pad control)
+  - second entry: The PINMUX_AUX_* registers (pinmux)
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Tegra's pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, tristate, drive strength, etc.
+
+See the TRM to determine which properties and values apply to each pin/group.
+Macro values for property values are defined in
+include/dt-binding/pinctrl/pinctrl-tegra.h.
+
+Required subnode-properties:
+- nvidia,pins : An array of strings. Each string contains the name of a pin or
+    group. Valid values for these names are listed below.
+
+Optional subnode-properties:
+- nvidia,function: A string containing the name of the function to mux to the
+    pin or group.
+- nvidia,pull: Integer, representing the pull-down/up to apply to the pin.
+    0: none, 1: down, 2: up.
+- nvidia,tristate: Integer.
+    0: drive, 1: tristate.
+- nvidia,enable-input: Integer. Enable the pin's input path.
+    enable :TEGRA_PIN_ENABLE and
+    disable or output only: TEGRA_PIN_DISABLE.
+- nvidia,open-drain: Integer.
+    enable: TEGRA_PIN_ENABLE.
+    disable: TEGRA_PIN_DISABLE.
+- nvidia,lock: Integer. Lock the pin configuration against further changes
+    until reset.
+    enable: TEGRA_PIN_ENABLE.
+    disable: TEGRA_PIN_DISABLE.
+- nvidia,io-hv: Integer. Select high-voltage receivers.
+    normal: TEGRA_PIN_DISABLE
+    high: TEGRA_PIN_ENABLE
+- nvidia,schmitt: Integer. Enables Schmitt Trigger on the input.
+    normal: TEGRA_PIN_DISABLE
+    high: TEGRA_PIN_ENABLE
+- nvidia,drive-type: Integer. Valid range 0...3.
+- nvidia,pull-down-strength: Integer. Controls drive strength. 0 is weakest.
+    The range of valid values depends on the pingroup. See "CAL_DRVDN" in the
+    Tegra TRM.
+- nvidia,pull-up-strength: Integer. Controls drive strength. 0 is weakest.
+    The range of valid values depends on the pingroup. See "CAL_DRVUP" in the
+    Tegra TRM.
+
+Valid values for pin and group names (nvidia,pin) are:
+
+    These correspond to Tegra PADCTL_* (pinmux) registers.
+
+  Mux groups:
+
+    These correspond to Tegra PADCTL_* (pinmux) registers. Any property
+    that exists in those registers may be set for the following pin names.
+
+    pex_l5_clkreq_n_pgg0, pex_l5_rst_n_pgg1
+
+  Drive groups:
+
+    These registers controls a single pin for which a mux group exists.
+    See the list above for the pin name to use when configuring the pinmux.
+
+    pex_l5_clkreq_n_pgg0, pex_l5_rst_n_pgg1
+
+Valid values for nvidia,functions are:
+
+    pe5
+
+Power Domain:
+    pex_l5_clkreq_n_pgg0 and pex_l5_rst_n_pgg1 are part of PCIE C5 power
+    partition. Client devices must enable this partition before accessing
+    these pins here.
+
+
+Example:
+
+		tegra_pinctrl: pinmux: pinmux@2430000 {
+			compatible = "nvidia,tegra194-pinmux";
+			reg = <0x2430000 0x17000
+			       0xc300000 0x4000>;
+
+			pinctrl-names = "pex_rst";
+			pinctrl-0 = <&pex_rst_c5_out_state>;
+
+			pex_rst_c5_out_state: pex_rst_c5_out {
+				pex_rst {
+					nvidia,pins = "pex_l5_rst_n_pgg1";
+					nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+					nvidia,lpdr = <TEGRA_PIN_ENABLE>;
+					nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+					nvidia,io-high-voltage = <TEGRA_PIN_ENABLE>;
+					nvidia,tristate = <TEGRA_PIN_DISABLE>;
+					nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+				};
+			};
+		};
-- 
cgit 


From c01fbbc83f42748b3ed094497933601e6c9e0a03 Mon Sep 17 00:00:00 2001
From: Yuyang Du <duyuyang@gmail.com>
Date: Mon, 6 May 2019 16:19:18 +0800
Subject: locking/lockdep: Add description and explanation in lockdep design
 doc

More words are added to lockdep design document regarding key concepts,
which should help people without lockdep experience read and understand
lockdep reports.

Signed-off-by: Yuyang Du <duyuyang@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bvanassche@acm.org
Cc: frederic@kernel.org
Cc: ming.lei@redhat.com
Cc: will.deacon@arm.com
Link: https://lkml.kernel.org/r/20190506081939.74287-3-duyuyang@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/locking/lockdep-design.txt | 79 ++++++++++++++++++++++++--------
 1 file changed, 61 insertions(+), 18 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt
index 39fae143c9cb..ae65758383ea 100644
--- a/Documentation/locking/lockdep-design.txt
+++ b/Documentation/locking/lockdep-design.txt
@@ -15,34 +15,48 @@ tens of thousands of) instantiations. For example a lock in the inode
 struct is one class, while each inode has its own instantiation of that
 lock class.
 
-The validator tracks the 'state' of lock-classes, and it tracks
-dependencies between different lock-classes. The validator maintains a
-rolling proof that the state and the dependencies are correct.
-
-Unlike an lock instantiation, the lock-class itself never goes away: when
-a lock-class is used for the first time after bootup it gets registered,
-and all subsequent uses of that lock-class will be attached to this
-lock-class.
+The validator tracks the 'usage state' of lock-classes, and it tracks
+the dependencies between different lock-classes. Lock usage indicates
+how a lock is used with regard to its IRQ contexts, while lock
+dependency can be understood as lock order, where L1 -> L2 suggests that
+a task is attempting to acquire L2 while holding L1. From lockdep's
+perspective, the two locks (L1 and L2) are not necessarily related; that
+dependency just means the order ever happened. The validator maintains a
+continuing effort to prove lock usages and dependencies are correct or
+the validator will shoot a splat if incorrect.
+
+A lock-class's behavior is constructed by its instances collectively:
+when the first instance of a lock-class is used after bootup the class
+gets registered, then all (subsequent) instances will be mapped to the
+class and hence their usages and dependecies will contribute to those of
+the class. A lock-class does not go away when a lock instance does, but
+it can be removed if the memory space of the lock class (static or
+dynamic) is reclaimed, this happens for example when a module is
+unloaded or a workqueue is destroyed.
 
 State
 -----
 
-The validator tracks lock-class usage history into 4 * nSTATEs + 1 separate
-state bits:
+The validator tracks lock-class usage history and divides the usage into
+(4 usages * n STATEs + 1) categories:
 
+where the 4 usages can be:
 - 'ever held in STATE context'
 - 'ever held as readlock in STATE context'
 - 'ever held with STATE enabled'
 - 'ever held as readlock with STATE enabled'
 
-Where STATE can be either one of (kernel/locking/lockdep_states.h)
- - hardirq
- - softirq
+where the n STATEs are coded in kernel/locking/lockdep_states.h and as of
+now they include:
+- hardirq
+- softirq
 
+where the last 1 category is:
 - 'ever used'                                       [ == !unused        ]
 
-When locking rules are violated, these state bits are presented in the
-locking error messages, inside curlies. A contrived example:
+When locking rules are violated, these usage bits are presented in the
+locking error messages, inside curlies, with a total of 2 * n STATEs bits.
+A contrived example:
 
    modprobe/2287 is trying to acquire lock:
     (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
@@ -51,15 +65,44 @@ locking error messages, inside curlies. A contrived example:
     (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
 
 
-The bit position indicates STATE, STATE-read, for each of the states listed
-above, and the character displayed in each indicates:
+For a given lock, the bit positions from left to right indicate the usage
+of the lock and readlock (if exists), for each of the n STATEs listed
+above respectively, and the character displayed at each bit position
+indicates:
 
    '.'  acquired while irqs disabled and not in irq context
    '-'  acquired in irq context
    '+'  acquired with irqs enabled
    '?'  acquired in irq context with irqs enabled.
 
-Unused mutexes cannot be part of the cause of an error.
+The bits are illustrated with an example:
+
+    (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
+                         ||||
+                         ||| \-> softirq disabled and not in softirq context
+                         || \--> acquired in softirq context
+                         | \---> hardirq disabled and not in hardirq context
+                          \----> acquired in hardirq context
+
+
+For a given STATE, whether the lock is ever acquired in that STATE
+context and whether that STATE is enabled yields four possible cases as
+shown in the table below. The bit character is able to indicate which
+exact case is for the lock as of the reporting time.
+
+   -------------------------------------------
+  |              | irq enabled | irq disabled |
+  |-------------------------------------------|
+  | ever in irq  |      ?      |       -      |
+  |-------------------------------------------|
+  | never in irq |      +      |       .      |
+   -------------------------------------------
+
+The character '-' suggests irq is disabled because if otherwise the
+charactor '?' would have been shown instead. Similar deduction can be
+applied for '+' too.
+
+Unused locks (e.g., mutexes) cannot be part of the cause of an error.
 
 
 Single-lock state rules:
-- 
cgit 


From 1ac4ba5ed0114bcc146d5743d97df414af25c524 Mon Sep 17 00:00:00 2001
From: Yuyang Du <duyuyang@gmail.com>
Date: Mon, 6 May 2019 16:19:32 +0800
Subject: locking/lockdep: Add explanation to lock usage rules in lockdep
 design doc

The irq usage and lock dependency rules that if violated a deacklock may
happen are explained in more detail.

Signed-off-by: Yuyang Du <duyuyang@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bvanassche@acm.org
Cc: frederic@kernel.org
Cc: ming.lei@redhat.com
Cc: will.deacon@arm.com
Link: https://lkml.kernel.org/r/20190506081939.74287-17-duyuyang@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/locking/lockdep-design.txt | 33 ++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt
index ae65758383ea..f189d130e543 100644
--- a/Documentation/locking/lockdep-design.txt
+++ b/Documentation/locking/lockdep-design.txt
@@ -108,14 +108,24 @@ Unused locks (e.g., mutexes) cannot be part of the cause of an error.
 Single-lock state rules:
 ------------------------
 
+A lock is irq-safe means it was ever used in an irq context, while a lock
+is irq-unsafe means it was ever acquired with irq enabled.
+
 A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
-following states are exclusive, and only one of them is allowed to be
-set for any lock-class:
+following states must be exclusive: only one of them is allowed to be set
+for any lock-class based on its usage:
+
+ <hardirq-safe> or <hardirq-unsafe>
+ <softirq-safe> or <softirq-unsafe>
 
- <hardirq-safe> and <hardirq-unsafe>
- <softirq-safe> and <softirq-unsafe>
+This is because if a lock can be used in irq context (irq-safe) then it
+cannot be ever acquired with irq enabled (irq-unsafe). Otherwise, a
+deadlock may happen. For example, in the scenario that after this lock
+was acquired but before released, if the context is interrupted this
+lock will be attempted to acquire twice, which creates a deadlock,
+referred to as lock recursion deadlock.
 
-The validator detects and reports lock usage that violate these
+The validator detects and reports lock usage that violates these
 single-lock state rules.
 
 Multi-lock dependency rules:
@@ -124,15 +134,18 @@ Multi-lock dependency rules:
 The same lock-class must not be acquired twice, because this could lead
 to lock recursion deadlocks.
 
-Furthermore, two locks may not be taken in different order:
+Furthermore, two locks can not be taken in inverse order:
 
  <L1> -> <L2>
  <L2> -> <L1>
 
-because this could lead to lock inversion deadlocks. (The validator
-finds such dependencies in arbitrary complexity, i.e. there can be any
-other locking sequence between the acquire-lock operations, the
-validator will still track all dependencies between locks.)
+because this could lead to a deadlock - referred to as lock inversion
+deadlock - as attempts to acquire the two locks form a circle which
+could lead to the two contexts waiting for each other permanently. The
+validator will find such dependency circle in arbitrary complexity,
+i.e., there can be any other locking sequence between the acquire-lock
+operations; the validator will still find whether these locks can be
+acquired in a circular fashion.
 
 Furthermore, the following usage based lock dependencies are not allowed
 between any two lock-classes:
-- 
cgit 


From fff9b6c7d26943a8eb32b58364b7ec6b9369746a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 May 2019 13:52:31 +0200
Subject: Documentation/atomic_t.txt: Clarify pure non-rmw usage

Clarify that pure non-RMW usage of atomic_t is pointless, there is
nothing 'magical' about atomic_set() / atomic_read().

This is something that seems to confuse people, because I happen upon it
semi-regularly.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190524115231.GN2623@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/atomic_t.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
index dca3fb0554db..89eae7f6b360 100644
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -81,9 +81,11 @@ Non-RMW ops:
 
 The non-RMW ops are (typically) regular LOADs and STOREs and are canonically
 implemented using READ_ONCE(), WRITE_ONCE(), smp_load_acquire() and
-smp_store_release() respectively.
+smp_store_release() respectively. Therefore, if you find yourself only using
+the Non-RMW operations of atomic_t, you do not in fact need atomic_t at all
+and are doing it wrong.
 
-The one detail to this is that atomic_set{}() should be observable to the RMW
+A subtle detail of atomic_set{}() is that it should be observable to the RMW
 ops. That is:
 
   C atomic-set
-- 
cgit 


From c9c2c27d7ceca8c2856c5008f2002bddb384f518 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 16 Apr 2019 15:46:55 +0200
Subject: debugfs: make debugfs_create_u32_array() return void

The single user of debugfs_create_u32_array() does not care about the
return value of it, so make it return void as there is no need to do
anything with the return value.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/filesystems/debugfs.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
index 4a0a9c3f4af6..9e27c843d00e 100644
--- a/Documentation/filesystems/debugfs.txt
+++ b/Documentation/filesystems/debugfs.txt
@@ -169,7 +169,7 @@ byte offsets over a base for the register block.
 
 If you want to dump an u32 array in debugfs, you can create file with:
 
-    struct dentry *debugfs_create_u32_array(const char *name, umode_t mode,
+    void debugfs_create_u32_array(const char *name, umode_t mode,
 			struct dentry *parent,
 			u32 *array, u32 elements);
 
-- 
cgit 


From 0864c9ce8fe83eadfd21b08e98997111d091660c Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Mon, 3 Jun 2019 07:32:50 -0400
Subject: media: dt-bindings: Fix vendor-prefixes YAML

Commit 8df39e16877f ("media: dt-bindings: media: Add vendor prefix for
allegro") introduced a new devicetree binding vendors, however with an
improper syntax making the resulting YAML impossible to parse. Let's fix
this.

Fixes: 8df39e16877f ("media: dt-bindings: media: Add vendor prefix for allegro")

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 7da11464991a..1acf806b62bf 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -49,7 +49,7 @@ patternProperties:
     description: Aeroflex Gaisler AB
   "^al,.*":
     description: Annapurna Labs
-  "^allegro,.*"
+  "^allegro,.*":
     description: Allegro DVT
   "^allo,.*":
     description: Allo.com
-- 
cgit 


From 4d3aed70902f299ff2ed7048ef44f0d4d573d786 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Wed, 29 May 2019 17:49:06 -0700
Subject: f2fs: Add option to limit required GC for checkpoint=disable

This extends the checkpoint option to allow checkpoint=disable:%u[%]
This allows you to specify what how much of the disk you are willing
to lose access to while mounting with checkpoint=disable. If the amount
lost would be higher, the mount will return -EAGAIN. This can be given
as a percent of total space, or in blocks.

Currently, we need to run garbage collection until the amount of holes
is smaller than the OVP space. With the new option, f2fs can mark
space as unusable up front instead of requiring garbage collection until
the number of holes is small enough.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
 Documentation/filesystems/f2fs.txt      | 19 ++++++++++++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 91822ce25831..dca326e0ee3e 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -243,3 +243,11 @@ Description:
 		 - Del: echo '[h/c]!extension' > /sys/fs/f2fs/<disk>/extension_list
 		 - [h] means add/del hot file extension
 		 - [c] means add/del cold file extension
+
+What:		/sys/fs/f2fs/<disk>/unusable
+Date		April 2019
+Contact:	"Daniel Rosenberg" <drosen@google.com>
+Description:
+		If checkpoint=disable, it displays the number of blocks that are unusable.
+                If checkpoint=enable it displays the enumber of blocks that would be unusable
+                if checkpoint=disable were to be set.
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 66aca042988e..bebd1be3ba49 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -214,11 +214,22 @@ fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
                        non-atomic files likewise "nobarrier" mount option.
 test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
                        context. The fake fscrypt context is used by xfstests.
-checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
+checkpoint=%s[:%u[%]]     Set to "disable" to turn off checkpointing. Set to "enable"
                        to reenable checkpointing. Is enabled by default. While
                        disabled, any unmounting or unexpected shutdowns will cause
                        the filesystem contents to appear as they did when the
                        filesystem was mounted with that option.
+                       While mounting with checkpoint=disabled, the filesystem must
+                       run garbage collection to ensure that all available space can
+                       be used. If this takes too much time, the mount may return
+                       EAGAIN. You may optionally add a value to indicate how much
+                       of the disk you would be willing to temporarily give up to
+                       avoid additional garbage collection. This can be given as a
+                       number of blocks, or as a percent. For instance, mounting
+                       with checkpoint=disable:100% would always succeed, but it may
+                       hide up to all remaining free space. The actual space that
+                       would be unusable can be viewed at /sys/fs/f2fs/<disk>/unusable
+                       This space is reclaimed once checkpoint=enable.
 
 ================================================================================
 DEBUGFS ENTRIES
@@ -396,6 +407,12 @@ Files in /sys/fs/f2fs/<devname>
 
  current_reserved_blocks      This shows # of blocks currently reserved.
 
+ unusable                     If checkpoint=disable, this shows the number of
+                              blocks that are unusable.
+                              If checkpoint=enable it shows the number of blocks
+                              that would be unusable if checkpoint=disable were
+                              to be set.
+
 ================================================================================
 USAGE
 ================================================================================
-- 
cgit 


From adf671ccd2aa067cd9786d6337dfe4a3d2cc2e8f Mon Sep 17 00:00:00 2001
From: Sameeh Jubran <sameehj@amazon.com>
Date: Mon, 3 Jun 2019 17:43:24 +0300
Subject: net: ena: documentation: update ena.txt

Small cosmetic changes to ena.txt

Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/amazon/ena.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/amazon/ena.txt b/Documentation/networking/device_drivers/amazon/ena.txt
index 2b4b6f57e549..1bb55c7b604c 100644
--- a/Documentation/networking/device_drivers/amazon/ena.txt
+++ b/Documentation/networking/device_drivers/amazon/ena.txt
@@ -73,7 +73,7 @@ operation.
 AQ is used for submitting management commands, and the
 results/responses are reported asynchronously through ACQ.
 
-ENA introduces a very small set of management commands with room for
+ENA introduces a small set of management commands with room for
 vendor-specific extensions. Most of the management operations are
 framed in a generic Get/Set feature command.
 
@@ -202,11 +202,14 @@ delay value to each level.
 The user can enable/disable adaptive moderation, modify the interrupt
 delay table and restore its default values through sysfs.
 
+RX copybreak:
+=============
 The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK
 and can be configured by the ETHTOOL_STUNABLE command of the
 SIOCETHTOOL ioctl.
 
 SKB:
+====
 The driver-allocated SKB for frames received from Rx handling using
 NAPI context. The allocation method depends on the size of the packet.
 If the frame length is larger than rx_copybreak, napi_get_frags()
-- 
cgit 


From fa0e5158c26ea63e1ac45befeda4d03e02f00d44 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Sat, 1 Jun 2019 08:03:10 +0800
Subject: dt-bindings: clock: mediatek: Add an extra required property to
 sgmiisys

add an extra required property "mediatek,physpeed" to sgmiisys to determine
link speed to match up the capability of the target PHY.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt
index 30cb645c0e54..f5518f26a914 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt
@@ -9,6 +9,8 @@ Required Properties:
 	- "mediatek,mt7622-sgmiisys", "syscon"
 	- "mediatek,mt7629-sgmiisys", "syscon"
 - #clock-cells: Must be 1
+- mediatek,physpeed: Should be one of "auto", "1000" or "2500" to match up
+		     the capability of the target PHY.
 
 The SGMIISYS controller uses the common clk binding from
 Documentation/devicetree/bindings/clock/clock-bindings.txt
-- 
cgit 


From 3277fc683ae57f15b646b500c216e380821231c8 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Sat, 1 Jun 2019 08:03:11 +0800
Subject: dt-bindings: net: mediatek: Add support for MediaTek MT7629 SoC

Add binding document for the ethernet on MT7629 SoC.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/mediatek-net.txt | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt
index 503f2b9194e2..770ff98d4524 100644
--- a/Documentation/devicetree/bindings/net/mediatek-net.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-net.txt
@@ -11,6 +11,7 @@ Required properties:
 		"mediatek,mt2701-eth": for MT2701 SoC
 		"mediatek,mt7623-eth", "mediatek,mt2701-eth": for MT7623 SoC
 		"mediatek,mt7622-eth": for MT7622 SoC
+		"mediatek,mt7629-eth": for MT7629 SoC
 - reg: Address and length of the register set for the device
 - interrupts: Should contain the three frame engines interrupts in numeric
 	order. These are fe_int0, fe_int1 and fe_int2.
@@ -19,14 +20,23 @@ Required properties:
 	"ethif", "esw", "gp2", "gp1" : For MT2701 and MT7623 SoC
         "ethif", "esw", "gp0", "gp1", "gp2", "sgmii_tx250m", "sgmii_rx250m",
 	"sgmii_cdr_ref", "sgmii_cdr_fb", "sgmii_ck", "eth2pll" : For MT7622 SoC
+	"ethif", "sgmiitop", "esw", "gp0", "gp1", "gp2", "fe", "sgmii_tx250m",
+	"sgmii_rx250m", "sgmii_cdr_ref", "sgmii_cdr_fb", "sgmii2_tx250m",
+	"sgmii2_rx250m", "sgmii2_cdr_ref", "sgmii2_cdr_fb", "sgmii_ck",
+	"eth2pll" : For MT7629 SoC.
 - power-domains: phandle to the power domain that the ethernet is part of
 - resets: Should contain phandles to the ethsys reset signals
 - reset-names: Should contain the names of reset signal listed in the resets
 		property
 		These are "fe", "gmac" and "ppe"
 - mediatek,ethsys: phandle to the syscon node that handles the port setup
-- mediatek,sgmiisys: phandle to the syscon node that handles the SGMII setup
-	which is required for those SoCs equipped with SGMII such as MT7622 SoC.
+- mediatek,infracfg: phandle to the syscon node that handles the path from
+	GMAC to PHY variants, which is required for MT7629 SoC.
+- mediatek,sgmiisys: a list of phandles to the syscon node that handles the
+	SGMII setup which is required for those SoCs equipped with SGMII such
+	as MT7622 and MT7629 SoC. And MT7622 have only one set of SGMII shared
+	by GMAC1 and GMAC2; MT7629 have two independent sets of SGMII directed
+	to GMAC1 and GMAC2, respectively.
 - mediatek,pctl: phandle to the syscon node that handles the ports slew rate
 	and driver current: only for MT2701 and MT7623 SoC
 
-- 
cgit 


From af9422a85721f7afa8d5ad3442b5de5549a23e84 Mon Sep 17 00:00:00 2001
From: Gareth Williams <gareth.williams.jx@renesas.com>
Date: Tue, 28 May 2019 12:54:26 +0100
Subject: dt-bindings: clock: renesas: r9a06g032-sysctrl: Document power
 Domains

The driver is gaining power domain support, so add the new property
to the DT binding and update the examples.

Signed-off-by: Gareth Williams <gareth.williams.jx@renesas.com>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 .../devicetree/bindings/clock/renesas,r9a06g032-sysctrl.txt        | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/clock/renesas,r9a06g032-sysctrl.txt b/Documentation/devicetree/bindings/clock/renesas,r9a06g032-sysctrl.txt
index d60b99756bb9..aed713cf0831 100644
--- a/Documentation/devicetree/bindings/clock/renesas,r9a06g032-sysctrl.txt
+++ b/Documentation/devicetree/bindings/clock/renesas,r9a06g032-sysctrl.txt
@@ -13,6 +13,7 @@ Required Properties:
 	- external (optional) RGMII_REFCLK
   - clock-names: Must be:
         clock-names = "mclk", "rtc", "jtag", "rgmii_ref_ext";
+  - #power-domain-cells: Must be 0
 
 Examples
 --------
@@ -27,6 +28,7 @@ Examples
 		clocks = <&ext_mclk>, <&ext_rtc_clk>,
 				<&ext_jtag_clk>, <&ext_rgmii_ref>;
 		clock-names = "mclk", "rtc", "jtag", "rgmii_ref_ext";
+		#power-domain-cells = <0>;
 	};
 
   - Other nodes can use the clocks provided by SYSCTRL as in:
@@ -38,6 +40,7 @@ Examples
 		interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>;
 		reg-shift = <2>;
 		reg-io-width = <4>;
-		clocks = <&sysctrl R9A06G032_CLK_UART0>;
-		clock-names = "baudclk";
+		clocks = <&sysctrl R9A06G032_CLK_UART0>, <&sysctrl R9A06G032_HCLK_UART0>;
+		clock-names = "baudclk", "apb_pclk";
+		power-domains = <&sysctrl>;
 	};
-- 
cgit 


From f48d14c218ccaaf7c22f84582d4bdb5fa2234c76 Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@siol.net>
Date: Mon, 27 May 2019 22:14:53 +0200
Subject: dt-bindings: arm64: allwinner: h6: Add binding for DMA controller
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DMA in H6 is similar to other DMA controller, except it is first which
supports more than 32 request sources and has 16 channels. It also needs
additional clock to be enabled.

Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Clément Péron <peron.clem@gmail.com>
Acked-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/devicetree/bindings/dma/sun6i-dma.txt | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/dma/sun6i-dma.txt b/Documentation/devicetree/bindings/dma/sun6i-dma.txt
index 7fccc20d8331..cae31f4e77ba 100644
--- a/Documentation/devicetree/bindings/dma/sun6i-dma.txt
+++ b/Documentation/devicetree/bindings/dma/sun6i-dma.txt
@@ -28,12 +28,17 @@ Example:
 	};
 
 ------------------------------------------------------------------------------
-For A64 DMA controller:
+For A64 and H6 DMA controller:
 
 Required properties:
-- compatible:	"allwinner,sun50i-a64-dma"
+- compatible:	Must be one of
+		  "allwinner,sun50i-a64-dma"
+		  "allwinner,sun50i-h6-dma"
 - dma-channels: Number of DMA channels supported by the controller.
 		Refer to Documentation/devicetree/bindings/dma/dma.txt
+- clocks:	In addition to parent AHB clock, it should also contain mbus
+		clock (H6 only)
+- clock-names:	Should contain "bus" and "mbus" (H6 only)
 - all properties above, i.e. reg, interrupts, clocks, resets and #dma-cells
 
 Optional properties:
-- 
cgit 


From 18e1572419d69f8d45248cccabc40352a3e281d6 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 4 Jun 2019 07:55:49 -0600
Subject: docs: Completely fix the remote build tree case

My previous fix miserably failed to catch all of the invocations of
"./scripts/sphinx-pre-install", so we got build errors.  Try again with
more caffeine.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/Makefile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/Makefile b/Documentation/Makefile
index 2edd03b1dad6..2df0789f90b7 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -72,14 +72,14 @@ quiet_cmd_sphinx = SPHINX  $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
 	$(abspath $(BUILDDIR)/$3/$4)
 
 htmldocs:
-	@./scripts/sphinx-pre-install --version-check
+	@$(srctree)/scripts/sphinx-pre-install --version-check
 	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
 
 linkcheckdocs:
 	@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,linkcheck,$(var),,$(var)))
 
 latexdocs:
-	@./scripts/sphinx-pre-install --version-check
+	@$(srctree)/scripts/sphinx-pre-install --version-check
 	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,latex,$(var),latex,$(var)))
 
 ifeq ($(HAVE_PDFLATEX),0)
@@ -91,17 +91,17 @@ pdfdocs:
 else # HAVE_PDFLATEX
 
 pdfdocs: latexdocs
-	@./scripts/sphinx-pre-install --version-check
+	@$(srctree)/scripts/sphinx-pre-install --version-check
 	$(foreach var,$(SPHINXDIRS), $(MAKE) PDFLATEX="$(PDFLATEX)" LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex || exit;)
 
 endif # HAVE_PDFLATEX
 
 epubdocs:
-	@./scripts/sphinx-pre-install --version-check
+	@$(srctree)/scripts/sphinx-pre-install --version-check
 	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,epub,$(var),epub,$(var)))
 
 xmldocs:
-	@./scripts/sphinx-pre-install --version-check
+	@$(srctree)/scripts/sphinx-pre-install --version-check
 	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,xml,$(var),xml,$(var)))
 
 endif # HAVE_SPHINX
-- 
cgit 


From d6e65bb7ff0d360c4c5462c3d0b237f2a07e5312 Mon Sep 17 00:00:00 2001
From: Shuming Fan <shumingf@realtek.com>
Date: Tue, 28 May 2019 12:30:17 +0800
Subject: ASoC: rt1011: Add RT1011 amplifier driver

This is the initial amplifier driver for rt1011.

Signed-off-by: Shuming Fan <shumingf@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/sound/rt1011.txt | 32 ++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/sound/rt1011.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/rt1011.txt b/Documentation/devicetree/bindings/sound/rt1011.txt
new file mode 100644
index 000000000000..35a23e60d679
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/rt1011.txt
@@ -0,0 +1,32 @@
+RT1011 Mono Class D Audio Amplifier
+
+This device supports I2C only.
+
+Required properties:
+
+- compatible : "realtek,rt1011".
+
+- reg : The I2C address of the device. This I2C address decide by
+        two input pins (ASEL1 and ASEL2).
+        -------------------------------------
+        |   ASEL2   |  ASEL1   |  Address   |
+        -------------------------------------
+        |     0     |    0     |   0x38     |
+        -------------------------------------
+        |     0     |    1     |   0x39     |
+        -------------------------------------
+        |     1     |    0     |   0x3a     |
+        -------------------------------------
+        |     1     |    1     |   0x3b     |
+        -------------------------------------
+
+Pins on the device (for linking into audio routes) for RT1011:
+
+  * SPO
+
+Example:
+
+rt1011: codec@38 {
+	compatible = "realtek,rt1011";
+	reg = <0x38>;
+};
-- 
cgit 


From 6d72a49ff3f6bb0932718c132b75351aedc9458e Mon Sep 17 00:00:00 2001
From: Masahisa Kojima <masahisa.kojima@linaro.org>
Date: Tue, 4 Jun 2019 14:12:56 +0900
Subject: spi: Add DT bindings for Synquacer

This patch adds documentation for Device-Tree bindings for the
Socionext Synquacer spi driver.

Signed-off-by: Masahisa Kojima <masahisa.kojima@linaro.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/spi/spi-synquacer.txt      | 27 ++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/spi/spi-synquacer.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/spi/spi-synquacer.txt b/Documentation/devicetree/bindings/spi/spi-synquacer.txt
new file mode 100644
index 000000000000..291dfa692d0a
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-synquacer.txt
@@ -0,0 +1,27 @@
+* Socionext Synquacer HS-SPI bindings
+
+Required Properties:
+- compatible: should be "socionext,synquacer-spi"
+- reg: physical base address of the controller and length of memory mapped
+       region.
+- interrupts: should contain the "spi_rx", "spi_tx" and "spi_fault" interrupts.
+- clocks: core clock iHCLK. Optional rate clock iPCLK (default is iHCLK)
+- clock-names: Shall be "iHCLK" and "iPCLK" respectively
+
+Optional Properties:
+- socionext,use-rtm: boolean, if required to use "retimed clock" for RX
+- socionext,set-aces: boolean, if same active clock edges field to be set.
+
+Example:
+
+	spi0: spi@ff110000 {
+		compatible = "socionext,synquacer-spi";
+		reg = <0xff110000 0x1000>;
+		interrupts = <GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 161 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&clk_hsspi>;
+		clock-names = "iHCLK";
+		socionext,use-rtm;
+		socionext,set-aces;
+	};
-- 
cgit 


From be1038846b8063c448baf5ddcdc2387241c4133e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 4 Jun 2019 11:17:47 -0300
Subject: docs: soundwire: locking: fix tags for a code-block

There's an ascii artwork at Example 1 whose code-block is not properly
idented, causing those warnings.

    Documentation/driver-api/soundwire/locking.rst:50: WARNING: Inconsistent literal block quoting.
    Documentation/driver-api/soundwire/locking.rst:51: WARNING: Line block ends without a blank line.
    Documentation/driver-api/soundwire/locking.rst:55: WARNING: Inline substitution_reference start-string without end-string.
    Documentation/driver-api/soundwire/locking.rst:56: WARNING: Line block ends without a blank line.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/driver-api/soundwire/locking.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/soundwire/locking.rst b/Documentation/driver-api/soundwire/locking.rst
index 253f73555255..3a7ffb3d87f3 100644
--- a/Documentation/driver-api/soundwire/locking.rst
+++ b/Documentation/driver-api/soundwire/locking.rst
@@ -44,7 +44,9 @@ Message transfer.
      b. Transfer message (Read/Write) to Slave1 or broadcast message on
         Bus in case of bank switch.
 
-     c. Release Message lock ::
+     c. Release Message lock
+
+     ::
 
 	+----------+                    +---------+
 	|          |                    |         |
-- 
cgit 


From 8ffdaa7f491961efb4d02d3c8d806e9a60105adb Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 21 May 2019 14:06:52 +0800
Subject: KVM: Documentation: Add disable pause exits to
 KVM_CAP_X86_DISABLE_EXITS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit b31c114b (KVM: X86: Provide a capability to disable PAUSE intercepts)
forgot to add the KVM_X86_DISABLE_EXITS_PAUSE into api doc. This patch adds
it.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index ba6c42c576dd..33cd92dd6aa5 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4893,6 +4893,7 @@ Valid bits in args[0] are
 
 #define KVM_X86_DISABLE_EXITS_MWAIT            (1 << 0)
 #define KVM_X86_DISABLE_EXITS_HLT              (1 << 1)
+#define KVM_X86_DISABLE_EXITS_PAUSE            (1 << 2)
 
 Enabling this capability on a VM provides userspace with a way to no
 longer intercept some instructions for improved latency in some
-- 
cgit 


From b51700632e0e53254733ff706e5bdca22d19dbe5 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 21 May 2019 14:06:53 +0800
Subject: KVM: X86: Provide a capability to disable cstate msr read intercepts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allow guest reads CORE cstate when exposing host CPU power management capabilities
to the guest. PKG cstate is restricted to avoid a guest to get the whole package
information in multi-tenant scenario.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 33cd92dd6aa5..91fd86fcc49f 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4894,6 +4894,7 @@ Valid bits in args[0] are
 #define KVM_X86_DISABLE_EXITS_MWAIT            (1 << 0)
 #define KVM_X86_DISABLE_EXITS_HLT              (1 << 1)
 #define KVM_X86_DISABLE_EXITS_PAUSE            (1 << 2)
+#define KVM_X86_DISABLE_EXITS_CSTATE           (1 << 3)
 
 Enabling this capability on a VM provides userspace with a way to no
 longer intercept some instructions for improved latency in some
-- 
cgit 


From 87b11e0638c3dbf029a7c9020f8a779062db58fc Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 3 Jun 2019 15:17:00 -0700
Subject: net/tls: remove false positive warning

It's possible that TCP stack will decide to retransmit a packet
right when that packet's data gets acked, especially in presence
of packet reordering.  This means that packets may be in flight,
even though tls_device code has already freed their record state.
Make fill_sg_in() and in turn tls_sw_fallback() not generate a
warning in that case, and quietly proceed to drop such frames.

Make the exit path from tls_sw_fallback() drop monitor friendly,
for users to be able to troubleshoot dropped retransmissions.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/tls-offload.rst | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst
index cb85af559dff..eb7c9b81ccf5 100644
--- a/Documentation/networking/tls-offload.rst
+++ b/Documentation/networking/tls-offload.rst
@@ -379,7 +379,6 @@ by the driver:
    but did not arrive in the expected order
  * ``tx_tls_drop_no_sync_data`` - number of TX packets dropped because
    they arrived out of order and associated record could not be found
-   (see also :ref:`pre_tls_data`)
 
 Notable corner cases, exceptions and additional requirements
 ============================================================
@@ -462,21 +461,3 @@ Redirects leak clear text
 
 In the RX direction, if segment has already been decrypted by the device
 and it gets redirected or mirrored - clear text will be transmitted out.
-
-.. _pre_tls_data:
-
-Transmission of pre-TLS data
-----------------------------
-
-User can enqueue some already encrypted and framed records before enabling
-``ktls`` on the socket. Those records have to get sent as they are. This is
-perfectly easy to handle in the software case - such data will be waiting
-in the TCP layer, TLS ULP won't see it. In the offloaded case when pre-queued
-segment reaches transmission point it appears to be out of order (before the
-expected TCP sequence number) and the stack does not have a record information
-associated.
-
-All segments without record information cannot, however, be assumed to be
-pre-queued data, because a race condition exists between TCP stack queuing
-a retransmission, the driver seeing the retransmission and TCP ACK arriving
-for the retransmitted data.
-- 
cgit 


From dabde0dac12481c055d2d0ef6686c5789a1a3499 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Date: Tue, 4 Jun 2019 07:34:33 +0000
Subject: dt-bindings: net: dsa: marvell: add "marvell,mv88e6250" compatible
 string

The mv88e6250 has port_base_addr 0x8 or 0x18 (depending on
configuration pins), so it constitutes a new family and hence needs
its own compatible string.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/marvell.txt | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt
index feb007af13cb..6f9538974bb9 100644
--- a/Documentation/devicetree/bindings/net/dsa/marvell.txt
+++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt
@@ -21,10 +21,13 @@ which is at a different MDIO base address in different switch families.
 			  6341, 6350, 6351, 6352
 - "marvell,mv88e6190"	: Switch has base address 0x00. Use with models:
 			  6190, 6190X, 6191, 6290, 6390, 6390X
+- "marvell,mv88e6250"	: Switch has base address 0x08 or 0x18. Use with model:
+			  6250
 
 Required properties:
-- compatible		: Should be one of "marvell,mv88e6085" or
-			  "marvell,mv88e6190" as indicated above
+- compatible		: Should be one of "marvell,mv88e6085",
+			  "marvell,mv88e6190" or "marvell,mv88e6250" as
+			  indicated above
 - reg			: Address on the MII bus for the switch.
 
 Optional properties:
-- 
cgit 


From bcc8737ddcaad43acbb9ccf768f1be9c61bd493f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 4 Jun 2019 11:17:42 -0300
Subject: Documentation/i915: Fix references to renamed files

WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -function Hardware workarounds ./drivers/gpu/drm/i915/intel_workarounds.c' failed with return code 1
WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -function Logical Rings, Logical Ring Contexts and Execlists ./drivers/gpu/drm/i915/intel_lrc.c' failed with return code 1
WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -internal ./drivers/gpu/drm/i915/intel_lrc.c' failed with return code 2

Fixes: 112ed2d31a46 ("drm/i915: Move GraphicsTechnology files under gt/")
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/bd7dd29b9fb2101c954c8cfb2c3b4efc7d277045.1559656538.git.mchehab+samsung@kernel.org
---
 Documentation/gpu/i915.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 055df45596c1..6c75380b2928 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -61,7 +61,7 @@ Intel GVT-g Host Support(vGPU device model)
 Workarounds
 -----------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_workarounds.c
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_workarounds.c
    :doc: Hardware workarounds
 
 Display Hardware Handling
@@ -379,10 +379,10 @@ User Batchbuffer Execution
 Logical Rings, Logical Ring Contexts and Execlists
 --------------------------------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_lrc.c
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_lrc.c
    :doc: Logical Rings, Logical Ring Contexts and Execlists
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_lrc.c
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_lrc.c
    :internal:
 
 Global GTT views
-- 
cgit 


From 355fb0e54e85316fedd2f688fc62917cc4a5bb81 Mon Sep 17 00:00:00 2001
From: Luca Weiss <luca@z3ntu.xyz>
Date: Tue, 4 Jun 2019 19:21:53 +0200
Subject: dt-bindings: input: sun4i-lradc-keys: Add A64 compatible

Add the A64 compatible with a fallback to the A83T compatible.

Signed-off-by: Luca Weiss <luca@z3ntu.xyz>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
---
 Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
index 496125c6bfb7..507b737612ea 100644
--- a/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
+++ b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
@@ -5,6 +5,7 @@ Required properties:
  - compatible: should be one of the following string:
 		"allwinner,sun4i-a10-lradc-keys"
 		"allwinner,sun8i-a83t-r-lradc"
+		"allwinner,sun50i-a64-lradc", "allwinner,sun8i-a83t-r-lradc"
  - reg: mmio address range of the chip
  - interrupts: interrupt to which the chip is connected
  - vref-supply: powersupply for the lradc reference voltage
-- 
cgit 


From 0d9ce162cf46c99628cc5da9510b959c7976735b Mon Sep 17 00:00:00 2001
From: Junaid Shahid <junaids@google.com>
Date: Thu, 3 Jan 2019 17:14:28 -0800
Subject: kvm: Convert kvm_lock to a mutex

It doesn't seem as if there is any particular need for kvm_lock to be a
spinlock, so convert the lock to a mutex so that sleepable functions (in
particular cond_resched()) can be called while holding it.

Signed-off-by: Junaid Shahid <junaids@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/locking.txt | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index 1bb8bcaf8497..635cd6eaf714 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -15,8 +15,6 @@ The acquisition orders for mutexes are as follows:
 
 On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
 
-For spinlocks, kvm_lock is taken outside kvm->mmu_lock.
-
 Everything else is a leaf: no other lock is taken inside the critical
 sections.
 
@@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above.
 ------------
 
 Name:		kvm_lock
-Type:		spinlock_t
+Type:		mutex
 Arch:		any
 Protects:	- vm_list
 
-- 
cgit 


From b467ec063ec56900e1ebba4d5aeb50b0a7cb0ef8 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Mon, 27 May 2019 12:02:19 +0200
Subject: dt-bindings: clk: Convert Allwinner CCU to a schema

The Allwinner SoCs have a clocks controller supported in Linux, with a
matching Device Tree binding.

Now that we have the DT validation in place, let's convert the device tree
bindings for that controller over to a YAML schemas.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
---
 .../bindings/clock/allwinner,sun4i-a10-ccu.yaml    | 141 +++++++++++++++++++++
 .../devicetree/bindings/clock/sunxi-ccu.txt        |  62 ---------
 2 files changed, 141 insertions(+), 62 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
 delete mode 100644 Documentation/devicetree/bindings/clock/sunxi-ccu.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
new file mode 100644
index 000000000000..c935405458fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
@@ -0,0 +1,141 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/allwinner,sun4i-a10-ccu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner Clock Control Unit Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  "#clock-cells":
+    const: 1
+
+  "#reset-cells":
+    const: 1
+
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-ccu
+      - allwinner,sun5i-a10s-ccu
+      - allwinner,sun5i-a13-ccu
+      - allwinner,sun6i-a31-ccu
+      - allwinner,sun7i-a20-ccu
+      - allwinner,sun8i-a23-ccu
+      - allwinner,sun8i-a33-ccu
+      - allwinner,sun8i-a83t-ccu
+      - allwinner,sun8i-a83t-r-ccu
+      - allwinner,sun8i-h3-ccu
+      - allwinner,sun8i-h3-r-ccu
+      - allwinner,sun8i-r40-ccu
+      - allwinner,sun8i-v3s-ccu
+      - allwinner,sun9i-a80-ccu
+      - allwinner,sun50i-a64-ccu
+      - allwinner,sun50i-a64-r-ccu
+      - allwinner,sun50i-h5-ccu
+      - allwinner,sun50i-h6-ccu
+      - allwinner,sun50i-h6-r-ccu
+      - allwinner,suniv-f1c100s-ccu
+      - nextthing,gr8-ccu
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    minItems: 2
+    maxItems: 4
+    items:
+      - description: High Frequency Oscillator (usually at 24MHz)
+      - description: Low Frequency Oscillator (usually at 32kHz)
+      - description: Internal Oscillator
+      - description: Peripherals PLL
+
+  clock-names:
+    minItems: 2
+    maxItems: 4
+    items:
+      - const: hosc
+      - const: losc
+      - const: iosc
+      - const: pll-periph
+
+required:
+  - "#clock-cells"
+  - "#reset-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+
+if:
+  properties:
+    compatible:
+      enum:
+        - allwinner,sun8i-a83t-r-ccu
+        - allwinner,sun8i-h3-r-ccu
+        - allwinner,sun50i-a64-r-ccu
+        - allwinner,sun50i-h6-r-ccu
+
+then:
+  properties:
+    clocks:
+      minItems: 4
+      maxItems: 4
+
+    clock-names:
+      minItems: 4
+      maxItems: 4
+
+else:
+  if:
+    properties:
+      compatible:
+        const: allwinner,sun50i-h6-ccu
+
+  then:
+    properties:
+      clocks:
+        minItems: 3
+        maxItems: 3
+
+      clock-names:
+        minItems: 3
+        maxItems: 3
+
+  else:
+    properties:
+      clocks:
+        minItems: 2
+        maxItems: 2
+
+      clock-names:
+        minItems: 2
+        maxItems: 2
+
+additionalProperties: false
+
+examples:
+  - |
+    ccu: clock@1c20000 {
+        compatible = "allwinner,sun8i-h3-ccu";
+        reg = <0x01c20000 0x400>;
+        clocks = <&osc24M>, <&osc32k>;
+        clock-names = "hosc", "losc";
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+    };
+
+  - |
+    r_ccu: clock@1f01400 {
+        compatible = "allwinner,sun50i-a64-r-ccu";
+        reg = <0x01f01400 0x100>;
+        clocks = <&osc24M>, <&osc32k>, <&iosc>, <&ccu 11>;
+        clock-names = "hosc", "losc", "iosc", "pll-periph";
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
deleted file mode 100644
index e3bd88ae456b..000000000000
--- a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-Allwinner Clock Control Unit Binding
-------------------------------------
-
-Required properties :
-- compatible: must contain one of the following compatibles:
-		- "allwinner,sun4i-a10-ccu"
-		- "allwinner,sun5i-a10s-ccu"
-		- "allwinner,sun5i-a13-ccu"
-		- "allwinner,sun6i-a31-ccu"
-		- "allwinner,sun7i-a20-ccu"
-		- "allwinner,sun8i-a23-ccu"
-		- "allwinner,sun8i-a33-ccu"
-		- "allwinner,sun8i-a83t-ccu"
-		- "allwinner,sun8i-a83t-r-ccu"
-		- "allwinner,sun8i-h3-ccu"
-		- "allwinner,sun8i-h3-r-ccu"
-+		- "allwinner,sun8i-r40-ccu"
-		- "allwinner,sun8i-v3s-ccu"
-		- "allwinner,sun9i-a80-ccu"
-		- "allwinner,sun50i-a64-ccu"
-		- "allwinner,sun50i-a64-r-ccu"
-		- "allwinner,sun50i-h5-ccu"
-		- "allwinner,sun50i-h6-ccu"
-		- "allwinner,sun50i-h6-r-ccu"
-		- "allwinner,suniv-f1c100s-ccu"
-		- "nextthing,gr8-ccu"
-
-- reg: Must contain the registers base address and length
-- clocks: phandle to the oscillators feeding the CCU. Two are needed:
-  - "hosc": the high frequency oscillator (usually at 24MHz)
-  - "losc": the low frequency oscillator (usually at 32kHz)
-	    On the A83T, this is the internal 16MHz oscillator divided by 512
-- clock-names: Must contain the clock names described just above
-- #clock-cells : must contain 1
-- #reset-cells : must contain 1
-
-For the main CCU on H6, one more clock is needed:
-- "iosc": the SoC's internal frequency oscillator
-
-For the PRCM CCUs on A83T/H3/A64/H6, two more clocks are needed:
-- "pll-periph": the SoC's peripheral PLL from the main CCU
-- "iosc": the SoC's internal frequency oscillator
-
-Example for generic CCU:
-ccu: clock@1c20000 {
-	compatible = "allwinner,sun8i-h3-ccu";
-	reg = <0x01c20000 0x400>;
-	clocks = <&osc24M>, <&osc32k>;
-	clock-names = "hosc", "losc";
-	#clock-cells = <1>;
-	#reset-cells = <1>;
-};
-
-Example for PRCM CCU:
-r_ccu: clock@1f01400 {
-	compatible = "allwinner,sun50i-a64-r-ccu";
-	reg = <0x01f01400 0x100>;
-	clocks = <&osc24M>, <&osc32k>, <&iosc>, <&ccu CLK_PLL_PERIPH0>;
-	clock-names = "hosc", "losc", "iosc", "pll-periph";
-	#clock-cells = <1>;
-	#reset-cells = <1>;
-};
-- 
cgit 


From 79e3f1d3db3d99ac7ae4f29b00da545df231a5a7 Mon Sep 17 00:00:00 2001
From: Raul E Rangel <rrangel@chromium.org>
Date: Mon, 3 Jun 2019 12:16:49 -0600
Subject: platform/chrome: wilco_ec: Add version sysfs entries

Add the ability to extract version information from the EC.

Example Output:
$ cd /sys/bus/platform/devices/GOOG000C:00
$ tail build_date build_revision version model_number
==> build_date <==
04/25/19

==> build_revision <==
d2592cae0

==> version <==
00.00.14

==> model_number <==
08B6

Signed-off-by: Raul E Rangel <rrangel@chromium.org>
Reviewed-by: Nick Crews <ncrews@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
---
 Documentation/ABI/testing/sysfs-platform-wilco-ec | 31 +++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-platform-wilco-ec b/Documentation/ABI/testing/sysfs-platform-wilco-ec
index 8e5d6eee44db..8827a734f933 100644
--- a/Documentation/ABI/testing/sysfs-platform-wilco-ec
+++ b/Documentation/ABI/testing/sysfs-platform-wilco-ec
@@ -7,3 +7,34 @@ Description:
 		want to run their device headless or with a dock.
 
 		Input should be parseable by kstrtou8() to 0 or 1.
+
+What:          /sys/bus/platform/devices/GOOG000C\:00/build_date
+Date:          May 2019
+KernelVersion: 5.3
+Description:
+               Display Wilco Embedded Controller firmware build date.
+               Output will a MM/DD/YY string.
+
+What:          /sys/bus/platform/devices/GOOG000C\:00/build_revision
+Date:          May 2019
+KernelVersion: 5.3
+Description:
+               Display Wilco Embedded Controller build revision.
+               Output will a version string be similar to the example below:
+               d2592cae0
+
+What:          /sys/bus/platform/devices/GOOG000C\:00/model_number
+Date:          May 2019
+KernelVersion: 5.3
+Description:
+               Display Wilco Embedded Controller model number.
+               Output will a version string be similar to the example below:
+               08B6
+
+What:          /sys/bus/platform/devices/GOOG000C\:00/version
+Date:          May 2019
+KernelVersion: 5.3
+Description:
+               Display Wilco Embedded Controller firmware version.
+               The format of the string is x.y.z. Where x is major, y is minor
+               and z is the build number. For example: 95.00.06
-- 
cgit 


From e96a8819a6c4fc578809ba79d64abca57145acb7 Mon Sep 17 00:00:00 2001
From: Marc Gonzalez <marc.w.gonzalez@free.fr>
Date: Wed, 5 Jun 2019 07:59:10 -0400
Subject: media: docs: fix minor typos

Fix minor typos in the DVB demux documentation.

Signed-off-by: Marc Gonzalez <marc.w.gonzalez@free.fr>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/kapi/dtv-core.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/media/kapi/dtv-core.rst b/Documentation/media/kapi/dtv-core.rst
index ac005b46f23e..82c5b85ed9b1 100644
--- a/Documentation/media/kapi/dtv-core.rst
+++ b/Documentation/media/kapi/dtv-core.rst
@@ -11,12 +11,12 @@ Digital TV devices are implemented by several different drivers:
 
 - Frontend drivers that are usually implemented as two separate drivers:
 
-  - A tuner driver that implements the logic with commands the part of the
-    hardware with is responsible to tune into a digital TV transponder or
+  - A tuner driver that implements the logic which commands the part of
+    the hardware responsible for tuning into a digital TV transponder or
     physical channel. The output of a tuner is usually a baseband or
     Intermediate Frequency (IF) signal;
 
-  - A demodulator driver (a.k.a "demod") that implements the logic with
+  - A demodulator driver (a.k.a "demod") that implements the logic which
     commands the digital TV decoding hardware. The output of a demod is
     a digital stream, with multiple audio, video and data channels typically
     multiplexed using MPEG Transport Stream [#f1]_.
-- 
cgit 


From 84060c65a8fa14ea9974a8f15b8280517b869cdd Mon Sep 17 00:00:00 2001
From: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Date: Thu, 30 May 2019 05:33:12 -0400
Subject: media: media/doc: Allow sizeimage to be set by v4l clients

This changes v4l2_pix_format and v4l2_plane_pix_format sizeimage
field description to allow v4l clients to set bigger image size
in case of variable length compressed data.

Presently s5p-mfc and mtk-vcodec codec drivers use that. Lets
make it obvious in the documentation.

Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst | 15 ++++++++++++++-
 Documentation/media/uapi/v4l/pixfmt-v4l2.rst        | 13 ++++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst b/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst
index 5688c816e334..db43dda5aafb 100644
--- a/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst
@@ -31,7 +31,20 @@ describing all planes of that format.
 
     * - __u32
       - ``sizeimage``
-      - Maximum size in bytes required for image data in this plane.
+      - Maximum size in bytes required for image data in this plane,
+	set by the driver. When the image consists of variable length
+	compressed data this is the number of bytes required by the
+	codec to support the worst-case compression scenario.
+
+	The driver will set the value for uncompressed images.
+
+	Clients are allowed to set the sizeimage field for variable length
+	compressed data flagged with ``V4L2_FMT_FLAG_COMPRESSED`` at
+	:ref:`VIDIOC_ENUM_FMT`, but the driver may ignore it and set the
+	value itself, or it may modify the provided value based on
+	alignment requirements or minimum/maximum size requirements.
+	If the client wants to leave this to the driver, then it should
+	set sizeimage to 0.
     * - __u32
       - ``bytesperline``
       - Distance in bytes between the leftmost pixels in two adjacent
diff --git a/Documentation/media/uapi/v4l/pixfmt-v4l2.rst b/Documentation/media/uapi/v4l/pixfmt-v4l2.rst
index 71eebfc6d853..da6da2ef139a 100644
--- a/Documentation/media/uapi/v4l/pixfmt-v4l2.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-v4l2.rst
@@ -89,7 +89,18 @@ Single-planar format structure
       - Size in bytes of the buffer to hold a complete image, set by the
 	driver. Usually this is ``bytesperline`` times ``height``. When
 	the image consists of variable length compressed data this is the
-	maximum number of bytes required to hold an image.
+	number of bytes required by the codec to support the worst-case
+	compression scenario.
+
+	The driver will set the value for uncompressed images.
+
+	Clients are allowed to set the sizeimage field for variable length
+	compressed data flagged with ``V4L2_FMT_FLAG_COMPRESSED`` at
+	:ref:`VIDIOC_ENUM_FMT`, but the driver may ignore it and set the
+	value itself, or it may modify the provided value based on
+	alignment requirements or minimum/maximum size requirements.
+	If the client wants to leave this to the driver, then it should
+	set sizeimage to 0.
     * - __u32
       - ``colorspace``
       - Image colorspace, from enum :c:type:`v4l2_colorspace`.
-- 
cgit 


From 6aace2f89f785482efecbec3d61dfa41e0a64f9c Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 5 Jun 2019 10:31:06 -0400
Subject: media: Documentation: update email address

Use hverkuil-cisco@xs4all.nl instead of hans.verkuil@cisco.com.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/ABI/testing/debugfs-cec-error-inj | 2 +-
 Documentation/media/uapi/cec/cec-api.rst        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/debugfs-cec-error-inj b/Documentation/ABI/testing/debugfs-cec-error-inj
index 122b65c5fe62..4c3596c6d25b 100644
--- a/Documentation/ABI/testing/debugfs-cec-error-inj
+++ b/Documentation/ABI/testing/debugfs-cec-error-inj
@@ -1,6 +1,6 @@
 What:		/sys/kernel/debug/cec/*/error-inj
 Date:		March 2018
-Contact:	Hans Verkuil <hans.verkuil@cisco.com>
+Contact:	Hans Verkuil <hverkuil-cisco@xs4all.nl>
 Description:
 
 The CEC Framework allows for CEC error injection commands through
diff --git a/Documentation/media/uapi/cec/cec-api.rst b/Documentation/media/uapi/cec/cec-api.rst
index b614bf81aa20..0780ba07995a 100644
--- a/Documentation/media/uapi/cec/cec-api.rst
+++ b/Documentation/media/uapi/cec/cec-api.rst
@@ -39,7 +39,7 @@ Revision and Copyright
 **********************
 Authors:
 
-- Verkuil, Hans <hans.verkuil@cisco.com>
+- Verkuil, Hans <hverkuil-cisco@xs4all.nl>
 
  - Initial version.
 
-- 
cgit 


From 323a53c41292a0d7efc8748856c623324c8d7c21 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 5 Jun 2019 07:55:09 -0700
Subject: ipv6: tcp: enable flowlabel reflection in some RST packets

When RST packets are sent because no socket could be found,
it makes sense to use flowlabel_reflect sysctl to decide
if a reflection of the flowlabel is requested.

This extends commit 22b6722bfa59 ("ipv6: Add sysctl for per
namespace flow label reflection"), for some TCP RST packets.

In order to provide full control of this new feature,
flowlabel_reflect becomes a bitmask.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a73b3a02e49a..f4b1043e92ed 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1429,14 +1429,24 @@ flowlabel_state_ranges - BOOLEAN
 	FALSE: disabled
 	Default: true
 
-flowlabel_reflect - BOOLEAN
-	Automatically reflect the flow label. Needed for Path MTU
+flowlabel_reflect - INTEGER
+	Control flow label reflection. Needed for Path MTU
 	Discovery to work with Equal Cost Multipath Routing in anycast
 	environments. See RFC 7690 and:
 	https://tools.ietf.org/html/draft-wang-6man-flow-label-reflection-01
-	TRUE: enabled
-	FALSE: disabled
-	Default: FALSE
+
+	This is a mask of two bits.
+	1: enabled for established flows
+
+	Note that this prevents automatic flowlabel changes, as done
+	in "tcp: change IPv6 flow-label upon receiving spurious retransmission"
+	and "tcp: Change txhash on every SYN and RTO retransmit"
+
+	2: enabled for TCP RESET packets (no active listener)
+	If set, a RST packet sent in response to a SYN packet on a closed
+	port will reflect the incoming flow label.
+
+	Default: 0
 
 fib_multipath_hash_policy - INTEGER
 	Controls which hash policy to use for multipath routes.
-- 
cgit 


From 8a6f43d4d92d730dbe20fb84b72563be87d61446 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 5 Jun 2019 12:56:56 +0300
Subject: Documentation/i915: Fix kernel-doc references to moved gem files

The error messages could be more descriptive, but fix these caused by
file moves:

WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -internal
    ./drivers/gpu/drm/i915/i915_gem_shrinker.c' failed with return code 2
WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -function
    User command execution ./drivers/gpu/drm/i915/i915_gem_execbuffer.c'
    failed with return code 1
WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -internal
    ./drivers/gpu/drm/i915/i915_gem_tiling.c' failed with return code 2
WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -function
    buffer object tiling ./drivers/gpu/drm/i915/i915_gem_tiling.c'
    failed with return code 1

Fixes: 10be98a77c55 ("drm/i915: Move more GEM objects under gem/")
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190605095657.23601-1-jani.nikula@intel.com
---
 Documentation/gpu/i915.rst | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 6c75380b2928..f98ee95da90f 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -349,7 +349,7 @@ of buffer object caches. Shrinking is used to make main memory
 available. Note that this is mostly orthogonal to evicting buffer
 objects, which has the goal to make space in gpu virtual address spaces.
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_shrinker.c
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
    :internal:
 
 Batchbuffer Parsing
@@ -373,7 +373,7 @@ Batchbuffer Pools
 User Batchbuffer Execution
 --------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_execbuffer.c
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
    :doc: User command execution
 
 Logical Rings, Logical Ring Contexts and Execlists
@@ -382,9 +382,6 @@ Logical Rings, Logical Ring Contexts and Execlists
 .. kernel-doc:: drivers/gpu/drm/i915/gt/intel_lrc.c
    :doc: Logical Rings, Logical Ring Contexts and Execlists
 
-.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_lrc.c
-   :internal:
-
 Global GTT views
 ----------------
 
@@ -415,10 +412,10 @@ Hardware Tiling and Swizzling Details
 Object Tiling IOCTLs
 --------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_tiling.c
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_tiling.c
    :internal:
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_tiling.c
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_tiling.c
    :doc: buffer object tiling
 
 WOPCM
-- 
cgit 


From affa22b5f0f7e9caf61887671abe38819737bf16 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 5 Jun 2019 12:56:57 +0300
Subject: drm/i915: fix documentation build warnings

Just a straightforward bag of fixes for a clean htmldocs build.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190605095657.23601-2-jani.nikula@intel.com
---
 Documentation/gpu/i915.rst | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index f98ee95da90f..300220c4b498 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -475,12 +475,6 @@ i915_context_create and i915_context_free
 .. kernel-doc:: drivers/gpu/drm/i915/i915_trace.h
    :doc: i915_context_create and i915_context_free tracepoints
 
-switch_mm
----------
-
-.. kernel-doc:: drivers/gpu/drm/i915/i915_trace.h
-   :doc: switch_mm tracepoint
-
 Perf
 ====
 
-- 
cgit 


From 9d79b2f1aed4909988ecd8b370c7919856639699 Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Date: Fri, 17 May 2019 10:26:28 +0100
Subject: dt-bindings: Add vendor prefix for HopeRun

Add "Jiangsu HopeRun Software Co., Ltd." to the list of devicetree
vendor prefixes as "hoperun".

Website: http://www.hoperun.com/en

Signed-off-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Reviewed-by: Chris Paterson <Chris.Paterson2@renesas.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 33a65a45e319..83ca4816a78b 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -371,6 +371,8 @@ patternProperties:
     description: Holt Integrated Circuits, Inc.
   "^honeywell,.*":
     description: Honeywell
+  "^hoperun,.*":
+    description: Jiangsu HopeRun Software Co., Ltd.
   "^hp,.*":
     description: Hewlett Packard
   "^holtek,.*":
-- 
cgit 


From 7bdcb8e0454af9f1b3cd16239ae87c9d00f7a134 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Tue, 28 May 2019 22:30:31 +0200
Subject: dt-bindings: rtc: Add YAML schemas for the generic RTC bindings

The real time clocks have a bunch of generic properties that are needed in
a device tree. Add a YAML schemas for those.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 Documentation/devicetree/bindings/rtc/rtc.txt  | 34 +-----------------
 Documentation/devicetree/bindings/rtc/rtc.yaml | 50 ++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 33 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/rtc/rtc.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/rtc/rtc.txt b/Documentation/devicetree/bindings/rtc/rtc.txt
index a97fc6a9a75e..4d6f81dccc68 100644
--- a/Documentation/devicetree/bindings/rtc/rtc.txt
+++ b/Documentation/devicetree/bindings/rtc/rtc.txt
@@ -1,36 +1,4 @@
-Generic device tree bindings for Real Time Clock devices
-========================================================
-
-This document describes generic bindings which can be used to describe Real Time
-Clock devices in a device tree.
-
-Required properties
--------------------
-
-- compatible : name of RTC device following generic names recommended practice.
-
-For other required properties e.g. to describe register sets,
-clocks, etc. check the binding documentation of the specific driver.
-
-Optional properties
--------------------
-
-- start-year : if provided, the default hardware range supported by the RTC is
-               shifted so the first usable year is the specified one.
-
-The following properties may not be supported by all drivers. However, if a
-driver wants to support one of the below features, it should adapt the bindings
-below.
-- trickle-resistor-ohms :   Selected resistor for trickle charger. Should be given
-                            if trickle charger should be enabled
-- trickle-diode-disable :   Do not use internal trickle charger diode Should be
-                            given if internal trickle charger diode should be
-                            disabled
-- wakeup-source :           Enables wake up of host system on alarm
-- quartz-load-femtofarads : The capacitive load of the quartz(x-tal),
-                            expressed in femto Farad (fF).
-                            The default value shall be listed (if optional),
-                            and likewise all valid values.
+This file has been moved to rtc.yaml.
 
 Trivial RTCs
 ------------
diff --git a/Documentation/devicetree/bindings/rtc/rtc.yaml b/Documentation/devicetree/bindings/rtc/rtc.yaml
new file mode 100644
index 000000000000..ee237b2ed66a
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/rtc.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: RTC Generic Binding
+
+maintainers:
+  - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+description: |
+  This document describes generic bindings which can be used to
+  describe Real Time Clock devices in a device tree.
+
+properties:
+  $nodename:
+    pattern: "^rtc(@.*|-[0-9a-f])*$"
+
+  quartz-load-femtofarads:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      The capacitive load of the quartz(x-tal), expressed in femto
+      Farad (fF). The default value shall be listed (if optional),
+      and likewise all valid values.
+
+  start-year:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      If provided, the default hardware range supported by the RTC is
+      shifted so the first usable year is the specified one.
+
+  trickle-diode-disable:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      Do not use internal trickle charger diode. Should be given if
+      internal trickle charger diode should be disabled.
+
+  trickle-resistor-ohms:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Selected resistor for trickle charger. Should be given
+      if trickle charger should be enabled.
+
+  wakeup-source:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      Enables wake up of host system on alarm.
+
+...
-- 
cgit 


From 43390e0710fdfe4bd7793570c45d9092ed93da1b Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Tue, 28 May 2019 22:30:32 +0200
Subject: dt-bindings: rtc: Move trivial RTC over to a schemas of their own

The RTC generic bindings has a bunch of devices that have a pretty simple
binding, with just compatible, reg and optional interrupts properties.

This is exactly what the trivial devices YAML schema has been created for,
except that they can also have the start-year property, but not any other
generic RTC property.

Let's create a schema with those constraints.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 Documentation/devicetree/bindings/rtc/rtc.txt      | 39 ---------
 .../devicetree/bindings/rtc/trivial-rtc.yaml       | 92 ++++++++++++++++++++++
 2 files changed, 92 insertions(+), 39 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/rtc/trivial-rtc.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/rtc/rtc.txt b/Documentation/devicetree/bindings/rtc/rtc.txt
index 4d6f81dccc68..b8d36fce5e2d 100644
--- a/Documentation/devicetree/bindings/rtc/rtc.txt
+++ b/Documentation/devicetree/bindings/rtc/rtc.txt
@@ -1,40 +1 @@
 This file has been moved to rtc.yaml.
-
-Trivial RTCs
-------------
-
-This is a list of trivial RTC devices that have simple device tree
-bindings, consisting only of a compatible field, an address and
-possibly an interrupt line.
-
-
-Compatible		Vendor / Chip
-==========		=============
-abracon,abb5zes3	AB-RTCMC-32.768kHz-B5ZE-S3: Real Time Clock/Calendar Module with I2C Interface
-abracon,abeoz9		AB-RTCMC-32.768kHz-EOZ9: Real Time Clock/Calendar Module with I2C Interface
-dallas,ds1374		I2C, 32-Bit Binary Counter Watchdog RTC with Trickle Charger and Reset Input/Output
-dallas,ds1672		Dallas DS1672 Real-time Clock
-dallas,ds3232		Extremely Accurate I²C RTC with Integrated Crystal and SRAM
-epson,rx8010		I2C-BUS INTERFACE REAL TIME CLOCK MODULE
-epson,rx8571		I2C-BUS INTERFACE REAL TIME CLOCK MODULE with Battery Backed RAM
-epson,rx8581		I2C-BUS INTERFACE REAL TIME CLOCK MODULE
-emmicro,em3027		EM Microelectronic EM3027 Real-time Clock
-isil,isl1208		Intersil ISL1208 Low Power RTC with Battery Backed SRAM
-isil,isl1218		Intersil ISL1218 Low Power RTC with Battery Backed SRAM
-isil,isl12022		Intersil ISL12022 Real-time Clock
-microcrystal,rv3028	Real Time Clock Module with I2C-Bus
-microcrystal,rv3029	Real Time Clock Module with I2C-Bus
-microcrystal,rv8523	Real Time Clock
-nxp,pcf2127		Real-time clock
-nxp,pcf2129		Real-time clock
-nxp,pcf8563		Real-time clock/calendar
-pericom,pt7c4338	Real-time Clock Module
-ricoh,r2025sd		I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
-ricoh,r2221tl		I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
-ricoh,rs5c372a		I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
-ricoh,rs5c372b		I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
-ricoh,rv5c386		I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
-ricoh,rv5c387a		I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
-sii,s35390a		2-wire CMOS real-time clock
-whwave,sd3078		I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
-xircom,x1205		Xircom X1205 I2C RTC
diff --git a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml
new file mode 100644
index 000000000000..0c12ce9a9b45
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/trivial-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Trivial RTCs
+
+maintainers:
+  - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+description: |
+  This is a list of trivial RTC devices that have simple device tree
+  bindings, consisting only of a compatible field, an address and
+  possibly an interrupt line.
+
+allOf:
+  - $ref: "rtc.yaml#"
+
+properties:
+  compatible:
+    enum:
+      # AB-RTCMC-32.768kHz-B5ZE-S3: Real Time Clock/Calendar Module with I2C Interface
+      - abracon,abb5zes3
+      # AB-RTCMC-32.768kHz-EOZ9: Real Time Clock/Calendar Module with I2C Interface
+      - abracon,abeoz9
+      # I2C, 32-Bit Binary Counter Watchdog RTC with Trickle Charger and Reset Input/Output
+      - dallas,ds1374
+      # Dallas DS1672 Real-time Clock
+      - dallas,ds1672
+      # Extremely Accurate I²C RTC with Integrated Crystal and SRAM
+      - dallas,ds3232
+      # I2C-BUS INTERFACE REAL TIME CLOCK MODULE
+      - epson,rx8010
+      # I2C-BUS INTERFACE REAL TIME CLOCK MODULE with Battery Backed RAM
+      - epson,rx8571
+      # I2C-BUS INTERFACE REAL TIME CLOCK MODULE
+      - epson,rx8581
+      # Intersil ISL1208 Low Power RTC with Battery Backed SRAM
+      - isil,isl1208
+      # Intersil ISL1218 Low Power RTC with Battery Backed SRAM
+      - isil,isl1218
+      # Intersil ISL12022 Real-time Clock
+      - isil,isl12022
+      # Real Time Clock Module with I2C-Bus
+      - microcrystal,rv3028
+      # Real Time Clock Module with I2C-Bus
+      - microcrystal,rv3029
+      # Real Time Clock
+      - microcrystal,rv8523
+      # Real-time clock
+      - nxp,pcf2127
+      # Real-time clock
+      - nxp,pcf2129
+      # Real-time clock/calendar
+      - nxp,pcf8563
+      # Real-time Clock Module
+      - pericom,pt7c4338
+      # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
+      - ricoh,r2025sd
+      # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
+      - ricoh,r2221tl
+      # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
+      - ricoh,rs5c372a
+      # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
+      - ricoh,rs5c372b
+      # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
+      - ricoh,rv5c386
+      # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
+      - ricoh,rv5c387a
+      # 2-wire CMOS real-time clock
+      - sii,s35390a
+      # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
+      - whwave,sd3078
+      # Xircom X1205 I2C RTC
+      - xircom,x1205
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  start-year: true
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+...
-- 
cgit 


From 36e63ef3582b7cf68ad21fea056a278f9ccef6eb Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Tue, 28 May 2019 22:30:33 +0200
Subject: dt-bindings: rtc: Convert Allwinner A10 RTC to a schema

The older Allwinner SoCs have an embedded RTC supported in Linux, with a
matching Device Tree binding.

Now that we have the DT validation in place, let's convert the device tree
bindings for that controller over to a YAML schemas.

Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 .../bindings/rtc/allwinner,sun4i-a10-rtc.yaml      | 43 ++++++++++++++++++++++
 .../devicetree/bindings/rtc/sunxi-rtc.txt          | 17 ---------
 2 files changed, 43 insertions(+), 17 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml
 delete mode 100644 Documentation/devicetree/bindings/rtc/sunxi-rtc.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml b/Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml
new file mode 100644
index 000000000000..46d69c32b89b
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/allwinner,sun4i-a10-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 RTC Device Tree Bindings
+
+allOf:
+  - $ref: "rtc.yaml#"
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  compatible:
+    enum:
+      - allwinner,sun4i-a10-rtc
+      - allwinner,sun7i-a20-rtc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    rtc: rtc@1c20d00 {
+        compatible = "allwinner,sun4i-a10-rtc";
+        reg = <0x01c20d00 0x20>;
+        interrupts = <24>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/rtc/sunxi-rtc.txt b/Documentation/devicetree/bindings/rtc/sunxi-rtc.txt
deleted file mode 100644
index 4a8d79c1cf08..000000000000
--- a/Documentation/devicetree/bindings/rtc/sunxi-rtc.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-* sun4i/sun7i Real Time Clock
-
-RTC controller for the Allwinner A10/A20
-
-Required properties:
-- compatible : Should be "allwinner,sun4i-a10-rtc" or "allwinner,sun7i-a20-rtc"
-- reg: physical base address of the controller and length of memory mapped
-  region.
-- interrupts: IRQ line for the RTC.
-
-Example:
-
-rtc: rtc@1c20d00 {
-	compatible = "allwinner,sun4i-a10-rtc";
-	reg = <0x01c20d00 0x20>;
-	interrupts = <24>;
-};
-- 
cgit 


From 5a0797599b472d6fa35bed483e67c02c380ef041 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Tue, 28 May 2019 22:30:34 +0200
Subject: dt-bindings: rtc: Convert Allwinner A31 RTC to a schema

The newer Allwinner SoCs have an embedded RTC supported in Linux, with a
matching Device Tree binding.

Now that we have the DT validation in place, let's convert the device tree
bindings for that controller over to a YAML schemas.

Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 .../bindings/rtc/allwinner,sun6i-a31-rtc.yaml      | 115 +++++++++++++++++++++
 .../devicetree/bindings/rtc/sun6i-rtc.txt          |  46 ---------
 2 files changed, 115 insertions(+), 46 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
 delete mode 100644 Documentation/devicetree/bindings/rtc/sun6i-rtc.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
new file mode 100644
index 000000000000..f154bbba6a69
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/allwinner,sun6i-a31-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 RTC Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  "#clock-cells":
+    const: 1
+
+  compatible:
+    oneOf:
+      - const: allwinner,sun6i-a31-rtc
+      - const: allwinner,sun8i-a23-rtc
+      - const: allwinner,sun8i-h3-rtc
+      - items:
+          - const: allwinner,sun8i-r40-rtc
+          - const: allwinner,sun8i-h3-rtc
+      - const: allwinner,sun8i-v3-rtc
+      - const: allwinner,sun50i-h5-rtc
+      - items:
+          - const: allwinner,sun50i-a64-rtc
+          - const: allwinner,sun8i-h3-rtc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    items:
+      - description: RTC Alarm 0
+      - description: RTC Alarm 1
+
+  clocks:
+    maxItems: 1
+
+  clock-output-names:
+    minItems: 1
+    maxItems: 3
+    description:
+      The RTC provides up to three clocks
+        - the Low Frequency Oscillator or LOSC, at index 0,
+        - the Low Frequency Oscillator External output (X32KFOUT in
+          the datasheet), at index 1,
+        - the Internal Oscillator, at index 2.
+
+allOf:
+  - $ref: "rtc.yaml#"
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun6i-a31-rtc
+
+    then:
+      properties:
+        clock-output-names:
+          minItems: 1
+          maxItems: 1
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun8i-a23-rtc
+              - allwinner,sun8i-v3-rtc
+
+    then:
+      properties:
+        clock-output-names:
+          minItems: 2
+          maxItems: 2
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun8i-h3-rtc
+              - allwinner,sun50i-h5-rtc
+
+    then:
+      properties:
+        clock-output-names:
+          minItems: 3
+          maxItems: 3
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-output-names
+
+additionalProperties: false
+
+examples:
+  - |
+    rtc: rtc@1f00000 {
+        compatible = "allwinner,sun6i-a31-rtc";
+        reg = <0x01f00000 0x400>;
+        interrupts = <0 40 4>, <0 41 4>;
+        clock-output-names = "osc32k";
+        clocks = <&ext_osc32k>;
+        #clock-cells = <1>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/rtc/sun6i-rtc.txt b/Documentation/devicetree/bindings/rtc/sun6i-rtc.txt
deleted file mode 100644
index 6b732c41392b..000000000000
--- a/Documentation/devicetree/bindings/rtc/sun6i-rtc.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-* sun6i Real Time Clock
-
-RTC controller for the Allwinner A31
-
-Required properties:
-- compatible	: Should be one of the following combinations:
-		    - "allwinner,sun6i-a31-rtc"
-		    - "allwinner,sun8i-a23-rtc"
-		    - "allwinner,sun8i-h3-rtc"
-		    - "allwinner,sun8i-r40-rtc", "allwinner,sun8i-h3-rtc"
-		    - "allwinner,sun8i-v3-rtc"
-		    - "allwinner,sun50i-a64-rtc", "allwinner,sun8i-h3-rtc"
-		    - "allwinner,sun50i-h5-rtc"
-
-		  Where there are two or more compatible strings, this
-		  denotes the hardware covered by the most specific one
-		  is backward-compatible with the latter ones, and the
-		  implementation for the latter ones can be used, albeit
-		  with reduced functionality.
-
-- reg		: physical base address of the controller and length of
-		  memory mapped region.
-- interrupts	: IRQ lines for the RTC alarm 0 and alarm 1, in that order.
-
-Required properties for new device trees
-- clocks	: phandle to the 32kHz external oscillator
-- clock-output-names : names of up to three clock outputs. See below.
-- #clock-cells  : must be equal to 1.
-
-The RTC provides the following clocks at the given indices:
-- 0: LOSC
-- 1: LOSC external output, known as X32KFOUT in the datasheet.
-     This clock is not available on the A31 and is deprecated for old
-     device trees still using the "allwinner,sun6i-a31-rtc" compatible.
-- 2: InternalOSC, or internal RC oscillator (A64/H3/H5 only)
-
-Example:
-
-rtc: rtc@1f00000 {
-	compatible = "allwinner,sun6i-a31-rtc";
-	reg = <0x01f00000 0x400>;
-	interrupts = <0 40 4>, <0 41 4>;
-	clock-output-names = "osc32k";
-	clocks = <&ext_osc32k>;
-	#clock-cells = <1>;
-};
-- 
cgit 


From f5a336496e0f9aacb451b3b72eb44f78b3fe604c Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Tue, 28 May 2019 22:30:35 +0200
Subject: dt-bindings: rtc: sun6i: Add the R40 RTC compatible

The R40 has a pretty different RTC compared to the other SoCs we've
encountered so far, the most important difference being that it now has
only a single interrupt, compared to the previous SoCs having two.

Let's add a compatible for that.

Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 .../bindings/rtc/allwinner,sun6i-a31-rtc.yaml      | 25 +++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
index f154bbba6a69..924622f39c44 100644
--- a/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
@@ -19,9 +19,7 @@ properties:
       - const: allwinner,sun6i-a31-rtc
       - const: allwinner,sun8i-a23-rtc
       - const: allwinner,sun8i-h3-rtc
-      - items:
-          - const: allwinner,sun8i-r40-rtc
-          - const: allwinner,sun8i-h3-rtc
+      - const: allwinner,sun8i-r40-rtc
       - const: allwinner,sun8i-v3-rtc
       - const: allwinner,sun50i-h5-rtc
       - items:
@@ -32,6 +30,8 @@ properties:
     maxItems: 1
 
   interrupts:
+    minItems: 1
+    maxItems: 2
     items:
       - description: RTC Alarm 0
       - description: RTC Alarm 1
@@ -69,6 +69,7 @@ allOf:
           contains:
             enum:
               - allwinner,sun8i-a23-rtc
+              - allwinner,sun8i-r40-rtc
               - allwinner,sun8i-v3-rtc
 
     then:
@@ -91,6 +92,24 @@ allOf:
           minItems: 3
           maxItems: 3
 
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun8i-r40-rtc
+
+    then:
+      properties:
+        interrupts:
+          minItems: 1
+          maxItems: 1
+
+    else:
+      properties:
+        interrupts:
+          minItems: 2
+          maxItems: 2
+
 required:
   - "#clock-cells"
   - compatible
-- 
cgit 


From ee5dc0491c38ae4e4e583d7532d470754bb173f6 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Tue, 4 Jun 2019 10:26:56 +1000
Subject: docs: filesystems: vfs: Render method descriptions

Currently vfs.rst does not render well into HTML the method descriptions
for VFS data structures.  We can improve the HTML output by putting the
description string on a new line following the method name.

Suggested-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/vfs.rst | 1147 +++++++++++++++++++++----------------
 1 file changed, 642 insertions(+), 505 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 2ffbdf5f392c..0f85ab21c2ca 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -125,35 +125,46 @@ members are defined:
 		struct lock_class_key s_umount_key;
 	};
 
-``name``: the name of the filesystem type, such as "ext2", "iso9660",
+``name``
+	the name of the filesystem type, such as "ext2", "iso9660",
 	"msdos" and so on
 
-``fs_flags``: various flags (i.e. FS_REQUIRES_DEV, FS_NO_DCACHE, etc.)
+``fs_flags``
+	various flags (i.e. FS_REQUIRES_DEV, FS_NO_DCACHE, etc.)
 
-``mount``: the method to call when a new instance of this filesystem should
-be mounted
+``mount``
+	the method to call when a new instance of this filesystem should
+	be mounted
 
-``kill_sb``: the method to call when an instance of this filesystem
-	should be shut down
+``kill_sb``
+	the method to call when an instance of this filesystem should be
+	shut down
 
-``owner``: for internal VFS use: you should initialize this to THIS_MODULE in
-	most cases.
 
-``next``: for internal VFS use: you should initialize this to NULL
+``owner``
+	for internal VFS use: you should initialize this to THIS_MODULE
+	in most cases.
+
+``next``
+	for internal VFS use: you should initialize this to NULL
 
   s_lock_key, s_umount_key: lockdep-specific
 
 The mount() method has the following arguments:
 
-``struct file_system_type *fs_type``: describes the filesystem, partly initialized
-	by the specific filesystem code
+``struct file_system_type *fs_type``
+	describes the filesystem, partly initialized by the specific
+	filesystem code
 
-``int flags``: mount flags
+``int flags``
+	mount flags
 
-``const char *dev_name``: the device name we are mounting.
+``const char *dev_name``
+	the device name we are mounting.
 
-``void *data``: arbitrary mount options, usually comes as an ASCII
-	string (see "Mount Options" section)
+``void *data``
+	arbitrary mount options, usually comes as an ASCII string (see
+	"Mount Options" section)
 
 The mount() method must return the root dentry of the tree requested by
 caller.  An active reference to its superblock must be grabbed and the
@@ -178,22 +189,27 @@ implementation.
 Usually, a filesystem uses one of the generic mount() implementations
 and provides a fill_super() callback instead.  The generic variants are:
 
-``mount_bdev``: mount a filesystem residing on a block device
+``mount_bdev``
+	mount a filesystem residing on a block device
 
-``mount_nodev``: mount a filesystem that is not backed by a device
+``mount_nodev``
+	mount a filesystem that is not backed by a device
 
-``mount_single``: mount a filesystem which shares the instance between
-	all mounts
+``mount_single``
+	mount a filesystem which shares the instance between all mounts
 
 A fill_super() callback implementation has the following arguments:
 
-``struct super_block *sb``: the superblock structure.  The callback
-	must initialize this properly.
+``struct super_block *sb``
+	the superblock structure.  The callback must initialize this
+	properly.
 
-``void *data``: arbitrary mount options, usually comes as an ASCII
-	string (see "Mount Options" section)
+``void *data``
+	arbitrary mount options, usually comes as an ASCII string (see
+	"Mount Options" section)
 
-``int silent``: whether or not to be silent on error
+``int silent``
+	whether or not to be silent on error
 
 
 The Superblock Object
@@ -240,87 +256,106 @@ noted.  This means that most methods can block safely.  All methods are
 only called from a process context (i.e. not from an interrupt handler
 or bottom half).
 
-``alloc_inode``: this method is called by alloc_inode() to allocate memory
-	for struct inode and initialize it.  If this function is not
+``alloc_inode``
+	this method is called by alloc_inode() to allocate memory for
+	struct inode and initialize it.  If this function is not
 	defined, a simple 'struct inode' is allocated.  Normally
 	alloc_inode will be used to allocate a larger structure which
 	contains a 'struct inode' embedded within it.
 
-``destroy_inode``: this method is called by destroy_inode() to release
-	resources allocated for struct inode.  It is only required if
+``destroy_inode``
+	this method is called by destroy_inode() to release resources
+	allocated for struct inode.  It is only required if
 	->alloc_inode was defined and simply undoes anything done by
 	->alloc_inode.
 
-``dirty_inode``: this method is called by the VFS to mark an inode dirty.
+``dirty_inode``
+	this method is called by the VFS to mark an inode dirty.
 
-``write_inode``: this method is called when the VFS needs to write an
-	inode to disc.  The second parameter indicates whether the write
-	should be synchronous or not, not all filesystems check this flag.
+``write_inode``
+	this method is called when the VFS needs to write an inode to
+	disc.  The second parameter indicates whether the write should
+	be synchronous or not, not all filesystems check this flag.
 
-``drop_inode``: called when the last access to the inode is dropped,
-	with the inode->i_lock spinlock held.
+``drop_inode``
+	called when the last access to the inode is dropped, with the
+	inode->i_lock spinlock held.
 
 	This method should be either NULL (normal UNIX filesystem
-	semantics) or "generic_delete_inode" (for filesystems that do not
-	want to cache inodes - causing "delete_inode" to always be
+	semantics) or "generic_delete_inode" (for filesystems that do
+	not want to cache inodes - causing "delete_inode" to always be
 	called regardless of the value of i_nlink)
 
-	The "generic_delete_inode()" behavior is equivalent to the
-	old practice of using "force_delete" in the put_inode() case,
-	but does not have the races that the "force_delete()" approach
-	had. 
+	The "generic_delete_inode()" behavior is equivalent to the old
+	practice of using "force_delete" in the put_inode() case, but
+	does not have the races that the "force_delete()" approach had.
 
-``delete_inode``: called when the VFS wants to delete an inode
+``delete_inode``
+	called when the VFS wants to delete an inode
 
-``put_super``: called when the VFS wishes to free the superblock
+``put_super``
+	called when the VFS wishes to free the superblock
 	(i.e. unmount).  This is called with the superblock lock held
 
-``sync_fs``: called when VFS is writing out all dirty data associated with
-	a superblock.  The second parameter indicates whether the method
+``sync_fs``
+	called when VFS is writing out all dirty data associated with a
+	superblock.  The second parameter indicates whether the method
 	should wait until the write out has been completed.  Optional.
 
-``freeze_fs``: called when VFS is locking a filesystem and
-	forcing it into a consistent state.  This method is currently
-	used by the Logical Volume Manager (LVM).
+``freeze_fs``
+	called when VFS is locking a filesystem and forcing it into a
+	consistent state.  This method is currently used by the Logical
+	Volume Manager (LVM).
 
-``unfreeze_fs``: called when VFS is unlocking a filesystem and making it writable
+``unfreeze_fs``
+	called when VFS is unlocking a filesystem and making it writable
 	again.
 
-``statfs``: called when the VFS needs to get filesystem statistics.
+``statfs``
+	called when the VFS needs to get filesystem statistics.
 
-``remount_fs``: called when the filesystem is remounted.  This is called
-	with the kernel lock held
+``remount_fs``
+	called when the filesystem is remounted.  This is called with
+	the kernel lock held
 
-``clear_inode``: called then the VFS clears the inode.  Optional
+``clear_inode``
+	called then the VFS clears the inode.  Optional
 
-``umount_begin``: called when the VFS is unmounting a filesystem.
+``umount_begin``
+	called when the VFS is unmounting a filesystem.
 
-``show_options``: called by the VFS to show mount options for
-	/proc/<pid>/mounts.  (see "Mount Options" section)
+``show_options``
+	called by the VFS to show mount options for /proc/<pid>/mounts.
+	(see "Mount Options" section)
 
-``quota_read``: called by the VFS to read from filesystem quota file.
+``quota_read``
+	called by the VFS to read from filesystem quota file.
 
-``quota_write``: called by the VFS to write to filesystem quota file.
+``quota_write``
+	called by the VFS to write to filesystem quota file.
 
-``nr_cached_objects``: called by the sb cache shrinking function for the
-	filesystem to return the number of freeable cached objects it contains.
+``nr_cached_objects``
+	called by the sb cache shrinking function for the filesystem to
+	return the number of freeable cached objects it contains.
 	Optional.
 
-``free_cache_objects``: called by the sb cache shrinking function for the
-	filesystem to scan the number of objects indicated to try to free them.
-	Optional, but any filesystem implementing this method needs to also
-	implement ->nr_cached_objects for it to be called correctly.
+``free_cache_objects``
+	called by the sb cache shrinking function for the filesystem to
+	scan the number of objects indicated to try to free them.
+	Optional, but any filesystem implementing this method needs to
+	also implement ->nr_cached_objects for it to be called
+	correctly.
 
 	We can't do anything with any errors that the filesystem might
-	encountered, hence the void return type.  This will never be called if
-	the VM is trying to reclaim under GFP_NOFS conditions, hence this
-	method does not need to handle that situation itself.
+	encountered, hence the void return type.  This will never be
+	called if the VM is trying to reclaim under GFP_NOFS conditions,
+	hence this method does not need to handle that situation itself.
 
-	Implementations must include conditional reschedule calls inside any
-	scanning loop that is done.  This allows the VFS to determine
-	appropriate scan batch sizes without having to worry about whether
-	implementations will cause holdoff problems due to large scan batch
-	sizes.
+	Implementations must include conditional reschedule calls inside
+	any scanning loop that is done.  This allows the VFS to
+	determine appropriate scan batch sizes without having to worry
+	about whether implementations will cause holdoff problems due to
+	large scan batch sizes.
 
 Whoever sets up the inode is responsible for filling in the "i_op"
 field.  This is a pointer to a "struct inode_operations" which describes
@@ -334,23 +369,31 @@ On filesystems that support extended attributes (xattrs), the s_xattr
 superblock field points to a NULL-terminated array of xattr handlers.
 Extended attributes are name:value pairs.
 
-``name``: Indicates that the handler matches attributes with the specified name
-	(such as "system.posix_acl_access"); the prefix field must be NULL.
+``name``
+	Indicates that the handler matches attributes with the specified
+	name (such as "system.posix_acl_access"); the prefix field must
+	be NULL.
 
-``prefix``: Indicates that the handler matches all attributes with the specified
-	name prefix (such as "user."); the name field must be NULL.
+``prefix``
+	Indicates that the handler matches all attributes with the
+	specified name prefix (such as "user."); the name field must be
+	NULL.
 
-``list``: Determine if attributes matching this xattr handler should be listed
-	for a particular dentry.  Used by some listxattr implementations like
-	generic_listxattr.
+``list``
+	Determine if attributes matching this xattr handler should be
+	listed for a particular dentry.  Used by some listxattr
+	implementations like generic_listxattr.
 
-``get``: Called by the VFS to get the value of a particular extended attribute.
-	This method is called by the getxattr(2) system call.
+``get``
+	Called by the VFS to get the value of a particular extended
+	attribute.  This method is called by the getxattr(2) system
+	call.
 
-``set``: Called by the VFS to set the value of a particular extended attribute.
-	When the new value is NULL, called to remove a particular extended
-	attribute.  This method is called by the the setxattr(2) and
-	removexattr(2) system calls.
+``set``
+	Called by the VFS to set the value of a particular extended
+	attribute.  When the new value is NULL, called to remove a
+	particular extended attribute.  This method is called by the the
+	setxattr(2) and removexattr(2) system calls.
 
 When none of the xattr handlers of a filesystem match the specified
 attribute name or when a filesystem doesn't support extended attributes,
@@ -399,128 +442,147 @@ As of kernel 2.6.22, the following members are defined:
 Again, all methods are called without any locks being held, unless
 otherwise noted.
 
-``create``: called by the open(2) and creat(2) system calls.  Only
-	required if you want to support regular files.  The dentry you
-	get should not have an inode (i.e. it should be a negative
-	dentry).  Here you will probably call d_instantiate() with the
-	dentry and the newly created inode
+``create``
+	called by the open(2) and creat(2) system calls.  Only required
+	if you want to support regular files.  The dentry you get should
+	not have an inode (i.e. it should be a negative dentry).  Here
+	you will probably call d_instantiate() with the dentry and the
+	newly created inode
 
-``lookup``: called when the VFS needs to look up an inode in a parent
+``lookup``
+	called when the VFS needs to look up an inode in a parent
 	directory.  The name to look for is found in the dentry.  This
 	method must call d_add() to insert the found inode into the
 	dentry.  The "i_count" field in the inode structure should be
 	incremented.  If the named inode does not exist a NULL inode
 	should be inserted into the dentry (this is called a negative
-	dentry).  Returning an error code from this routine must only
-	be done on a real error, otherwise creating inodes with system
+	dentry).  Returning an error code from this routine must only be
+	done on a real error, otherwise creating inodes with system
 	calls like create(2), mknod(2), mkdir(2) and so on will fail.
 	If you wish to overload the dentry methods then you should
-	initialise the "d_dop" field in the dentry; this is a pointer
-	to a struct "dentry_operations".
-	This method is called with the directory inode semaphore held
+	initialise the "d_dop" field in the dentry; this is a pointer to
+	a struct "dentry_operations".  This method is called with the
+	directory inode semaphore held
 
-``link``: called by the link(2) system call.  Only required if you want
-	to support hard links.  You will probably need to call
+``link``
+	called by the link(2) system call.  Only required if you want to
+	support hard links.  You will probably need to call
 	d_instantiate() just as you would in the create() method
 
-``unlink``: called by the unlink(2) system call.  Only required if you
-	want to support deleting inodes
+``unlink``
+	called by the unlink(2) system call.  Only required if you want
+	to support deleting inodes
 
-``symlink``: called by the symlink(2) system call.  Only required if you
-	want to support symlinks.  You will probably need to call
+``symlink``
+	called by the symlink(2) system call.  Only required if you want
+	to support symlinks.  You will probably need to call
 	d_instantiate() just as you would in the create() method
 
-``mkdir``: called by the mkdir(2) system call.  Only required if you want
+``mkdir``
+	called by the mkdir(2) system call.  Only required if you want
 	to support creating subdirectories.  You will probably need to
 	call d_instantiate() just as you would in the create() method
 
-``rmdir``: called by the rmdir(2) system call.  Only required if you want
+``rmdir``
+	called by the rmdir(2) system call.  Only required if you want
 	to support deleting subdirectories
 
-``mknod``: called by the mknod(2) system call to create a device (char,
-	block) inode or a named pipe (FIFO) or socket.  Only required
-	if you want to support creating these types of inodes.  You
-	will probably need to call d_instantiate() just as you would
-	in the create() method
+``mknod``
+	called by the mknod(2) system call to create a device (char,
+	block) inode or a named pipe (FIFO) or socket.  Only required if
+	you want to support creating these types of inodes.  You will
+	probably need to call d_instantiate() just as you would in the
+	create() method
 
-``rename``: called by the rename(2) system call to rename the object to
-	have the parent and name given by the second inode and dentry.
+``rename``
+	called by the rename(2) system call to rename the object to have
+	the parent and name given by the second inode and dentry.
 
 	The filesystem must return -EINVAL for any unsupported or
-	unknown	flags.  Currently the following flags are implemented:
-	(1) RENAME_NOREPLACE: this flag indicates that if the target
-	of the rename exists the rename should fail with -EEXIST
-	instead of replacing the target.  The VFS already checks for
-	existence, so for local filesystems the RENAME_NOREPLACE
-	implementation is equivalent to plain rename.
+	unknown flags.  Currently the following flags are implemented:
+	(1) RENAME_NOREPLACE: this flag indicates that if the target of
+	the rename exists the rename should fail with -EEXIST instead of
+	replacing the target.  The VFS already checks for existence, so
+	for local filesystems the RENAME_NOREPLACE implementation is
+	equivalent to plain rename.
 	(2) RENAME_EXCHANGE: exchange source and target.  Both must
-	exist; this is checked by the VFS.  Unlike plain rename,
-	source and target may be of different type.
-
-``get_link``: called by the VFS to follow a symbolic link to the
-	inode it points to.  Only required if you want to support
-	symbolic links.  This method returns the symlink body
-	to traverse (and possibly resets the current position with
-	nd_jump_link()).  If the body won't go away until the inode
-	is gone, nothing else is needed; if it needs to be otherwise
-	pinned, arrange for its release by having get_link(..., ..., done)
-	do set_delayed_call(done, destructor, argument).
-	In that case destructor(argument) will be called once VFS is
-	done with the body you've returned.
-	May be called in RCU mode; that is indicated by NULL dentry
+	exist; this is checked by the VFS.  Unlike plain rename, source
+	and target may be of different type.
+
+``get_link``
+	called by the VFS to follow a symbolic link to the inode it
+	points to.  Only required if you want to support symbolic links.
+	This method returns the symlink body to traverse (and possibly
+	resets the current position with nd_jump_link()).  If the body
+	won't go away until the inode is gone, nothing else is needed;
+	if it needs to be otherwise pinned, arrange for its release by
+	having get_link(..., ..., done) do set_delayed_call(done,
+	destructor, argument).  In that case destructor(argument) will
+	be called once VFS is done with the body you've returned.  May
+	be called in RCU mode; that is indicated by NULL dentry
 	argument.  If request can't be handled without leaving RCU mode,
 	have it return ERR_PTR(-ECHILD).
 
-
 	If the filesystem stores the symlink target in ->i_link, the
 	VFS may use it directly without calling ->get_link(); however,
 	->get_link() must still be provided.  ->i_link must not be
 	freed until after an RCU grace period.  Writing to ->i_link
 	post-iget() time requires a 'release' memory barrier.
 
-``readlink``: this is now just an override for use by readlink(2) for the
+``readlink``
+	this is now just an override for use by readlink(2) for the
 	cases when ->get_link uses nd_jump_link() or object is not in
 	fact a symlink.  Normally filesystems should only implement
 	->get_link for symlinks and readlink(2) will automatically use
 	that.
 
-``permission``: called by the VFS to check for access rights on a POSIX-like
+``permission``
+	called by the VFS to check for access rights on a POSIX-like
 	filesystem.
 
-	May be called in rcu-walk mode (mask & MAY_NOT_BLOCK).  If in rcu-walk
-	mode, the filesystem must check the permission without blocking or
-	storing to the inode.
+	May be called in rcu-walk mode (mask & MAY_NOT_BLOCK).  If in
+	rcu-walk mode, the filesystem must check the permission without
+	blocking or storing to the inode.
 
-	If a situation is encountered that rcu-walk cannot handle, return
+	If a situation is encountered that rcu-walk cannot handle,
+	return
 	-ECHILD and it will be called again in ref-walk mode.
 
-``setattr``: called by the VFS to set attributes for a file.  This method
-	is called by chmod(2) and related system calls.
-
-``getattr``: called by the VFS to get attributes of a file.  This method
-	is called by stat(2) and related system calls.
-
-``listxattr``: called by the VFS to list all extended attributes for a
-	given file.  This method is called by the listxattr(2) system call.
-
-``update_time``: called by the VFS to update a specific time or the i_version of
-	an inode.  If this is not defined the VFS will update the inode itself
-	and call mark_inode_dirty_sync.
-
-``atomic_open``: called on the last component of an open.  Using this optional
-	method the filesystem can look up, possibly create and open the file in
-	one atomic operation.  If it wants to leave actual opening to the
-	caller (e.g. if the file turned out to be a symlink, device, or just
-	something filesystem won't do atomic open for), it may signal this by
-	returning finish_no_open(file, dentry).  This method is only called if
-	the last component is negative or needs lookup.  Cached positive dentries
-	are still handled by f_op->open().  If the file was created,
-	FMODE_CREATED flag should be set in file->f_mode.  In case of O_EXCL
-	the method must only succeed if the file didn't exist and hence FMODE_CREATED
-	shall always be set on success.
-
-``tmpfile``: called in the end of O_TMPFILE open().  Optional, equivalent to
-	atomically creating, opening and unlinking a file in given directory.
+``setattr``
+	called by the VFS to set attributes for a file.  This method is
+	called by chmod(2) and related system calls.
+
+``getattr``
+	called by the VFS to get attributes of a file.  This method is
+	called by stat(2) and related system calls.
+
+``listxattr``
+	called by the VFS to list all extended attributes for a given
+	file.  This method is called by the listxattr(2) system call.
+
+``update_time``
+	called by the VFS to update a specific time or the i_version of
+	an inode.  If this is not defined the VFS will update the inode
+	itself and call mark_inode_dirty_sync.
+
+``atomic_open``
+	called on the last component of an open.  Using this optional
+	method the filesystem can look up, possibly create and open the
+	file in one atomic operation.  If it wants to leave actual
+	opening to the caller (e.g. if the file turned out to be a
+	symlink, device, or just something filesystem won't do atomic
+	open for), it may signal this by returning finish_no_open(file,
+	dentry).  This method is only called if the last component is
+	negative or needs lookup.  Cached positive dentries are still
+	handled by f_op->open().  If the file was created, FMODE_CREATED
+	flag should be set in file->f_mode.  In case of O_EXCL the
+	method must only succeed if the file didn't exist and hence
+	FMODE_CREATED shall always be set on success.
+
+``tmpfile``
+	called in the end of O_TMPFILE open().  Optional, equivalent to
+	atomically creating, opening and unlinking a file in given
+	directory.
 
 
 The Address Space Object
@@ -673,70 +735,75 @@ cache in your filesystem.  The following members are defined:
 		int (*swap_deactivate)(struct file *);
 	};
 
-``writepage``: called by the VM to write a dirty page to backing store.
-      This may happen for data integrity reasons (i.e. 'sync'), or
-      to free up memory (flush).  The difference can be seen in
-      wbc->sync_mode.
-      The PG_Dirty flag has been cleared and PageLocked is true.
-      writepage should start writeout, should set PG_Writeback,
-      and should make sure the page is unlocked, either synchronously
-      or asynchronously when the write operation completes.
-
-      If wbc->sync_mode is WB_SYNC_NONE, ->writepage doesn't have to
-      try too hard if there are problems, and may choose to write out
-      other pages from the mapping if that is easier (e.g. due to
-      internal dependencies).  If it chooses not to start writeout, it
-      should return AOP_WRITEPAGE_ACTIVATE so that the VM will not keep
-      calling ->writepage on that page.
-
-      See the file "Locking" for more details.
-
-``readpage``: called by the VM to read a page from backing store.
-       The page will be Locked when readpage is called, and should be
-       unlocked and marked uptodate once the read completes.
-       If ->readpage discovers that it needs to unlock the page for
-       some reason, it can do so, and then return AOP_TRUNCATED_PAGE.
-       In this case, the page will be relocated, relocked and if
-       that all succeeds, ->readpage will be called again.
-
-``writepages``: called by the VM to write out pages associated with the
+``writepage``
+	called by the VM to write a dirty page to backing store.  This
+	may happen for data integrity reasons (i.e. 'sync'), or to free
+	up memory (flush).  The difference can be seen in
+	wbc->sync_mode.  The PG_Dirty flag has been cleared and
+	PageLocked is true.  writepage should start writeout, should set
+	PG_Writeback, and should make sure the page is unlocked, either
+	synchronously or asynchronously when the write operation
+	completes.
+
+	If wbc->sync_mode is WB_SYNC_NONE, ->writepage doesn't have to
+	try too hard if there are problems, and may choose to write out
+	other pages from the mapping if that is easier (e.g. due to
+	internal dependencies).  If it chooses not to start writeout, it
+	should return AOP_WRITEPAGE_ACTIVATE so that the VM will not
+	keep calling ->writepage on that page.
+
+	See the file "Locking" for more details.
+
+``readpage``
+	called by the VM to read a page from backing store.  The page
+	will be Locked when readpage is called, and should be unlocked
+	and marked uptodate once the read completes.  If ->readpage
+	discovers that it needs to unlock the page for some reason, it
+	can do so, and then return AOP_TRUNCATED_PAGE.  In this case,
+	the page will be relocated, relocked and if that all succeeds,
+	->readpage will be called again.
+
+``writepages``
+	called by the VM to write out pages associated with the
 	address_space object.  If wbc->sync_mode is WBC_SYNC_ALL, then
 	the writeback_control will specify a range of pages that must be
-	written out.  If it is WBC_SYNC_NONE, then a nr_to_write is given
-	and that many pages should be written if possible.
-	If no ->writepages is given, then mpage_writepages is used
-	instead.  This will choose pages from the address space that are
-	tagged as DIRTY and will pass them to ->writepage.
-
-``set_page_dirty``: called by the VM to set a page dirty.
-	This is particularly needed if an address space attaches
-	private data to a page, and that data needs to be updated when
-	a page is dirtied.  This is called, for example, when a memory
-	mapped page gets modified.
+	written out.  If it is WBC_SYNC_NONE, then a nr_to_write is
+	given and that many pages should be written if possible.  If no
+	->writepages is given, then mpage_writepages is used instead.
+	This will choose pages from the address space that are tagged as
+	DIRTY and will pass them to ->writepage.
+
+``set_page_dirty``
+	called by the VM to set a page dirty.  This is particularly
+	needed if an address space attaches private data to a page, and
+	that data needs to be updated when a page is dirtied.  This is
+	called, for example, when a memory mapped page gets modified.
 	If defined, it should set the PageDirty flag, and the
 	PAGECACHE_TAG_DIRTY tag in the radix tree.
 
-``readpages``: called by the VM to read pages associated with the address_space
-	object.  This is essentially just a vector version of
-	readpage.  Instead of just one page, several pages are
-	requested.
+``readpages``
+	called by the VM to read pages associated with the address_space
+	object.  This is essentially just a vector version of readpage.
+	Instead of just one page, several pages are requested.
 	readpages is only used for read-ahead, so read errors are
 	ignored.  If anything goes wrong, feel free to give up.
 
-``write_begin``:
-	Called by the generic buffered write code to ask the filesystem to
-	prepare to write len bytes at the given offset in the file.  The
-	address_space should check that the write will be able to complete,
-	by allocating space if necessary and doing any other internal
-	housekeeping.  If the write will update parts of any basic-blocks on
-	storage, then those blocks should be pre-read (if they haven't been
-	read already) so that the updated blocks can be written out properly.
+``write_begin``
+	Called by the generic buffered write code to ask the filesystem
+	to prepare to write len bytes at the given offset in the file.
+	The address_space should check that the write will be able to
+	complete, by allocating space if necessary and doing any other
+	internal housekeeping.  If the write will update parts of any
+	basic-blocks on storage, then those blocks should be pre-read
+	(if they haven't been read already) so that the updated blocks
+	can be written out properly.
 
-	The filesystem must return the locked pagecache page for the specified
-	offset, in ``*pagep``, for the caller to write into.
+	The filesystem must return the locked pagecache page for the
+	specified offset, in ``*pagep``, for the caller to write into.
 
-	It must be able to cope with short writes (where the length passed to
-	write_begin is greater than the number of bytes copied into the page).
+	It must be able to cope with short writes (where the length
+	passed to write_begin is greater than the number of bytes copied
+	into the page).
 
 	flags is a field for AOP_FLAG_xxx flags, described in
 	include/linux/fs.h.
@@ -744,114 +811,128 @@ cache in your filesystem.  The following members are defined:
 	A void * may be returned in fsdata, which then gets passed into
 	write_end.
 
-	Returns 0 on success; < 0 on failure (which is the error code), in
-	which case write_end is not called.
-
-``write_end``: After a successful write_begin, and data copy, write_end must
-	be called.  len is the original len passed to write_begin, and copied
-	is the amount that was able to be copied.
-
-	The filesystem must take care of unlocking the page and releasing it
-	refcount, and updating i_size.
-
-	Returns < 0 on failure, otherwise the number of bytes (<= 'copied')
-	that were able to be copied into pagecache.
-
-``bmap``: called by the VFS to map a logical block offset within object to
-	physical block number.  This method is used by the FIBMAP
-	ioctl and for working with swap-files.  To be able to swap to
-	a file, the file must have a stable mapping to a block
-	device.  The swap system does not go through the filesystem
-	but instead uses bmap to find out where the blocks in the file
-	are and uses those addresses directly.
-
-``invalidatepage``: If a page has PagePrivate set, then invalidatepage
-	will be called when part or all of the page is to be removed
-	from the address space.  This generally corresponds to either a
-	truncation, punch hole  or a complete invalidation of the address
+	Returns 0 on success; < 0 on failure (which is the error code),
+	in which case write_end is not called.
+
+``write_end``
+	After a successful write_begin, and data copy, write_end must be
+	called.  len is the original len passed to write_begin, and
+	copied is the amount that was able to be copied.
+
+	The filesystem must take care of unlocking the page and
+	releasing it refcount, and updating i_size.
+
+	Returns < 0 on failure, otherwise the number of bytes (<=
+	'copied') that were able to be copied into pagecache.
+
+``bmap``
+	called by the VFS to map a logical block offset within object to
+	physical block number.  This method is used by the FIBMAP ioctl
+	and for working with swap-files.  To be able to swap to a file,
+	the file must have a stable mapping to a block device.  The swap
+	system does not go through the filesystem but instead uses bmap
+	to find out where the blocks in the file are and uses those
+	addresses directly.
+
+``invalidatepage``
+	If a page has PagePrivate set, then invalidatepage will be
+	called when part or all of the page is to be removed from the
+	address space.  This generally corresponds to either a
+	truncation, punch hole or a complete invalidation of the address
 	space (in the latter case 'offset' will always be 0 and 'length'
 	will be PAGE_SIZE).  Any private data associated with the page
-	should be updated to reflect this truncation.  If offset is 0 and
-	length is PAGE_SIZE, then the private data should be released,
-	because the page must be able to be completely discarded.  This may
-	be done by calling the ->releasepage function, but in this case the
-	release MUST succeed.
-
-``releasepage``: releasepage is called on PagePrivate pages to indicate
-	that the page should be freed if possible.  ->releasepage
-	should remove any private data from the page and clear the
-	PagePrivate flag.  If releasepage() fails for some reason, it must
-	indicate failure with a 0 return value.
-	releasepage() is used in two distinct though related cases.  The
-	first is when the VM finds a clean page with no active users and
-	wants to make it a free page.  If ->releasepage succeeds, the
-	page will be removed from the address_space and become free.
+	should be updated to reflect this truncation.  If offset is 0
+	and length is PAGE_SIZE, then the private data should be
+	released, because the page must be able to be completely
+	discarded.  This may be done by calling the ->releasepage
+	function, but in this case the release MUST succeed.
+
+``releasepage``
+	releasepage is called on PagePrivate pages to indicate that the
+	page should be freed if possible.  ->releasepage should remove
+	any private data from the page and clear the PagePrivate flag.
+	If releasepage() fails for some reason, it must indicate failure
+	with a 0 return value.  releasepage() is used in two distinct
+	though related cases.  The first is when the VM finds a clean
+	page with no active users and wants to make it a free page.  If
+	->releasepage succeeds, the page will be removed from the
+	address_space and become free.
 
 	The second case is when a request has been made to invalidate
-	some or all pages in an address_space.  This can happen
-	through the fadvise(POSIX_FADV_DONTNEED) system call or by the
-	filesystem explicitly requesting it as nfs and 9fs do (when
-	they believe the cache may be out of date with storage) by
-	calling invalidate_inode_pages2().
-	If the filesystem makes such a call, and needs to be certain
-	that all pages are invalidated, then its releasepage will
-	need to ensure this.  Possibly it can clear the PageUptodate
-	bit if it cannot free private data yet.
-
-``freepage``: freepage is called once the page is no longer visible in
-	the page cache in order to allow the cleanup of any private
-	data.  Since it may be called by the memory reclaimer, it
-	should not assume that the original address_space mapping still
-	exists, and it should not block.
-
-``direct_IO``: called by the generic read/write routines to perform
-	direct_IO - that is IO requests which bypass the page cache
-	and transfer data directly between the storage and the
-	application's address space.
-
-``isolate_page``: Called by the VM when isolating a movable non-lru page.
-	If page is successfully isolated, VM marks the page as PG_isolated
-	via __SetPageIsolated.
-
-``migrate_page``:  This is used to compact the physical memory usage.
-	If the VM wants to relocate a page (maybe off a memory card
-	that is signalling imminent failure) it will pass a new page
-	and an old page to this function.  migrate_page should
-	transfer any private data across and update any references
-	that it has to the page.
-
-``putback_page``: Called by the VM when isolated page's migration fails.
-
-``launder_page``: Called before freeing a page - it writes back the dirty page.  To
-	prevent redirtying the page, it is kept locked during the whole
-	operation.
-
-``is_partially_uptodate``: Called by the VM when reading a file through the
-	pagecache when the underlying blocksize != pagesize.  If the required
-	block is up to date then the read can complete without needing the IO
-	to bring the whole page up to date.
-
-``is_dirty_writeback``: Called by the VM when attempting to reclaim a page.
-	The VM uses dirty and writeback information to determine if it needs
-	to stall to allow flushers a chance to complete some IO.  Ordinarily
-	it can use PageDirty and PageWriteback but some filesystems have
-	more complex state (unstable pages in NFS prevent reclaim) or
-	do not set those flags due to locking problems.  This callback
-	allows a filesystem to indicate to the VM if a page should be
-	treated as dirty or writeback for the purposes of stalling.
-
-``error_remove_page``: normally set to generic_error_remove_page if truncation
-	is ok for this address space.  Used for memory failure handling.
+	some or all pages in an address_space.  This can happen through
+	the fadvise(POSIX_FADV_DONTNEED) system call or by the
+	filesystem explicitly requesting it as nfs and 9fs do (when they
+	believe the cache may be out of date with storage) by calling
+	invalidate_inode_pages2().  If the filesystem makes such a call,
+	and needs to be certain that all pages are invalidated, then its
+	releasepage will need to ensure this.  Possibly it can clear the
+	PageUptodate bit if it cannot free private data yet.
+
+``freepage``
+	freepage is called once the page is no longer visible in the
+	page cache in order to allow the cleanup of any private data.
+	Since it may be called by the memory reclaimer, it should not
+	assume that the original address_space mapping still exists, and
+	it should not block.
+
+``direct_IO``
+	called by the generic read/write routines to perform direct_IO -
+	that is IO requests which bypass the page cache and transfer
+	data directly between the storage and the application's address
+	space.
+
+``isolate_page``
+	Called by the VM when isolating a movable non-lru page.  If page
+	is successfully isolated, VM marks the page as PG_isolated via
+	__SetPageIsolated.
+
+``migrate_page``
+	This is used to compact the physical memory usage.  If the VM
+	wants to relocate a page (maybe off a memory card that is
+	signalling imminent failure) it will pass a new page and an old
+	page to this function.  migrate_page should transfer any private
+	data across and update any references that it has to the page.
+
+``putback_page``
+	Called by the VM when isolated page's migration fails.
+
+``launder_page``
+	Called before freeing a page - it writes back the dirty page.
+	To prevent redirtying the page, it is kept locked during the
+	whole operation.
+
+``is_partially_uptodate``
+	Called by the VM when reading a file through the pagecache when
+	the underlying blocksize != pagesize.  If the required block is
+	up to date then the read can complete without needing the IO to
+	bring the whole page up to date.
+
+``is_dirty_writeback``
+	Called by the VM when attempting to reclaim a page.  The VM uses
+	dirty and writeback information to determine if it needs to
+	stall to allow flushers a chance to complete some IO.
+	Ordinarily it can use PageDirty and PageWriteback but some
+	filesystems have more complex state (unstable pages in NFS
+	prevent reclaim) or do not set those flags due to locking
+	problems.  This callback allows a filesystem to indicate to the
+	VM if a page should be treated as dirty or writeback for the
+	purposes of stalling.
+
+``error_remove_page``
+	normally set to generic_error_remove_page if truncation is ok
+	for this address space.  Used for memory failure handling.
 	Setting this implies you deal with pages going away under you,
 	unless you have them locked or reference counts increased.
 
-``swap_activate``: Called when swapon is used on a file to allocate
-	space if necessary and pin the block lookup information in
-	memory.  A return value of zero indicates success,
-	in which case this file can be used to back swapspace.
+``swap_activate``
+	Called when swapon is used on a file to allocate space if
+	necessary and pin the block lookup information in memory.  A
+	return value of zero indicates success, in which case this file
+	can be used to back swapspace.
 
-``swap_deactivate``: Called during swapoff on files where swap_activate
-	was successful.
+``swap_deactivate``
+	Called during swapoff on files where swap_activate was
+	successful.
 
 
 The File Object
@@ -912,91 +993,120 @@ This describes how the VFS can manipulate an open file.  As of kernel
 Again, all methods are called without any locks being held, unless
 otherwise noted.
 
-``llseek``: called when the VFS needs to move the file position index
+``llseek``
+	called when the VFS needs to move the file position index
 
-``read``: called by read(2) and related system calls
+``read``
+	called by read(2) and related system calls
 
-``read_iter``: possibly asynchronous read with iov_iter as destination
+``read_iter``
+	possibly asynchronous read with iov_iter as destination
 
-``write``: called by write(2) and related system calls
+``write``
+	called by write(2) and related system calls
 
-``write_iter``: possibly asynchronous write with iov_iter as source
+``write_iter``
+	possibly asynchronous write with iov_iter as source
 
-``iopoll``: called when aio wants to poll for completions on HIPRI iocbs
+``iopoll``
+	called when aio wants to poll for completions on HIPRI iocbs
 
-``iterate``: called when the VFS needs to read the directory contents
+``iterate``
+	called when the VFS needs to read the directory contents
 
-``iterate_shared``: called when the VFS needs to read the directory contents
-	when filesystem supports concurrent dir iterators
+``iterate_shared``
+	called when the VFS needs to read the directory contents when
+	filesystem supports concurrent dir iterators
 
-``poll``: called by the VFS when a process wants to check if there is
+``poll``
+	called by the VFS when a process wants to check if there is
 	activity on this file and (optionally) go to sleep until there
 	is activity.  Called by the select(2) and poll(2) system calls
 
-``unlocked_ioctl``: called by the ioctl(2) system call.
+``unlocked_ioctl``
+	called by the ioctl(2) system call.
 
-``compat_ioctl``: called by the ioctl(2) system call when 32 bit system calls
-	 are used on 64 bit kernels.
+``compat_ioctl``
+	called by the ioctl(2) system call when 32 bit system calls are
+	 used on 64 bit kernels.
 
-``mmap``: called by the mmap(2) system call
+``mmap``
+	called by the mmap(2) system call
 
-``open``: called by the VFS when an inode should be opened.  When the VFS
+``open``
+	called by the VFS when an inode should be opened.  When the VFS
 	opens a file, it creates a new "struct file".  It then calls the
 	open method for the newly allocated file structure.  You might
-	think that the open method really belongs in
-	"struct inode_operations", and you may be right.  I think it's
-	done the way it is because it makes filesystems simpler to
-	implement.  The open() method is a good place to initialize the
+	think that the open method really belongs in "struct
+	inode_operations", and you may be right.  I think it's done the
+	way it is because it makes filesystems simpler to implement.
+	The open() method is a good place to initialize the
 	"private_data" member in the file structure if you want to point
 	to a device structure
 
-``flush``: called by the close(2) system call to flush a file
+``flush``
+	called by the close(2) system call to flush a file
 
-``release``: called when the last reference to an open file is closed
+``release``
+	called when the last reference to an open file is closed
 
-``fsync``: called by the fsync(2) system call.  Also see the section above
-	 entitled "Handling errors during writeback".
+``fsync``
+	called by the fsync(2) system call.  Also see the section above
+	entitled "Handling errors during writeback".
 
-``fasync``: called by the fcntl(2) system call when asynchronous
+``fasync``
+	called by the fcntl(2) system call when asynchronous
 	(non-blocking) mode is enabled for a file
 
-``lock``: called by the fcntl(2) system call for F_GETLK, F_SETLK, and F_SETLKW
-	commands
+``lock``
+	called by the fcntl(2) system call for F_GETLK, F_SETLK, and
+	F_SETLKW commands
 
-``get_unmapped_area``: called by the mmap(2) system call
+``get_unmapped_area``
+	called by the mmap(2) system call
 
-``check_flags``: called by the fcntl(2) system call for F_SETFL command
+``check_flags``
+	called by the fcntl(2) system call for F_SETFL command
 
-``flock``: called by the flock(2) system call
+``flock``
+	called by the flock(2) system call
 
-``splice_write``: called by the VFS to splice data from a pipe to a file.  This
-		method is used by the splice(2) system call
+``splice_write``
+	called by the VFS to splice data from a pipe to a file.  This
+	method is used by the splice(2) system call
 
-``splice_read``: called by the VFS to splice data from file to a pipe.  This
-	       method is used by the splice(2) system call
+``splice_read``
+	called by the VFS to splice data from file to a pipe.  This
+	method is used by the splice(2) system call
 
-``setlease``: called by the VFS to set or release a file lock lease.  setlease
-	    implementations should call generic_setlease to record or remove
-	    the lease in the inode after setting it.
+``setlease``
+	called by the VFS to set or release a file lock lease.  setlease
+	implementations should call generic_setlease to record or remove
+	the lease in the inode after setting it.
 
-``fallocate``: called by the VFS to preallocate blocks or punch a hole.
+``fallocate``
+	called by the VFS to preallocate blocks or punch a hole.
 
-``copy_file_range``: called by the copy_file_range(2) system call.
+``copy_file_range``
+	called by the copy_file_range(2) system call.
 
-``remap_file_range``: called by the ioctl(2) system call for FICLONERANGE and
-	FICLONE and FIDEDUPERANGE commands to remap file ranges.  An
-	implementation should remap len bytes at pos_in of the source file into
-	the dest file at pos_out.  Implementations must handle callers passing
-	in len == 0; this means "remap to the end of the source file".  The
-	return value should the number of bytes remapped, or the usual
-	negative error code if errors occurred before any bytes were remapped.
-	The remap_flags parameter accepts REMAP_FILE_* flags.  If
-	REMAP_FILE_DEDUP is set then the implementation must only remap if the
-	requested file ranges have identical contents.  If REMAP_CAN_SHORTEN is
-	set, the caller is ok with the implementation shortening the request
-	length to satisfy alignment or EOF requirements (or any other reason).
+``remap_file_range``
+	called by the ioctl(2) system call for FICLONERANGE and FICLONE
+	and FIDEDUPERANGE commands to remap file ranges.  An
+	implementation should remap len bytes at pos_in of the source
+	file into the dest file at pos_out.  Implementations must handle
+	callers passing in len == 0; this means "remap to the end of the
+	source file".  The return value should the number of bytes
+	remapped, or the usual negative error code if errors occurred
+	before any bytes were remapped.  The remap_flags parameter
+	accepts REMAP_FILE_* flags.  If REMAP_FILE_DEDUP is set then the
+	implementation must only remap if the requested file ranges have
+	identical contents.  If REMAP_CAN_SHORTEN is set, the caller is
+	ok with the implementation shortening the request length to
+	satisfy alignment or EOF requirements (or any other reason).
 
-``fadvise``: possibly called by the fadvise64() system call.
+``fadvise``
+	possibly called by the fadvise64() system call.
 
 Note that the file operations are implemented by the specific
 filesystem in which the inode resides.  When opening a device node
@@ -1041,89 +1151,104 @@ defined:
 		struct dentry *(*d_real)(struct dentry *, const struct inode *);
 	};
 
-``d_revalidate``: called when the VFS needs to revalidate a dentry.  This
-	is called whenever a name look-up finds a dentry in the
-	dcache.  Most local filesystems leave this as NULL, because all their
-	dentries in the dcache are valid.  Network filesystems are different
-	since things can change on the server without the client necessarily
-	being aware of it.
-
-	This function should return a positive value if the dentry is still
-	valid, and zero or a negative error code if it isn't.
-
-	d_revalidate may be called in rcu-walk mode (flags & LOOKUP_RCU).
-	If in rcu-walk mode, the filesystem must revalidate the dentry without
-	blocking or storing to the dentry, d_parent and d_inode should not be
-	used without care (because they can change and, in d_inode case, even
-	become NULL under us).
-
-	If a situation is encountered that rcu-walk cannot handle, return
+``d_revalidate``
+	called when the VFS needs to revalidate a dentry.  This is
+	called whenever a name look-up finds a dentry in the dcache.
+	Most local filesystems leave this as NULL, because all their
+	dentries in the dcache are valid.  Network filesystems are
+	different since things can change on the server without the
+	client necessarily being aware of it.
+
+	This function should return a positive value if the dentry is
+	still valid, and zero or a negative error code if it isn't.
+
+	d_revalidate may be called in rcu-walk mode (flags &
+	LOOKUP_RCU).  If in rcu-walk mode, the filesystem must
+	revalidate the dentry without blocking or storing to the dentry,
+	d_parent and d_inode should not be used without care (because
+	they can change and, in d_inode case, even become NULL under
+	us).
+
+	If a situation is encountered that rcu-walk cannot handle,
+	return
 	-ECHILD and it will be called again in ref-walk mode.
 
-``_weak_revalidate``: called when the VFS needs to revalidate a "jumped" dentry.
-	This is called when a path-walk ends at dentry that was not acquired by
-	doing a lookup in the parent directory.  This includes "/", "." and "..",
-	as well as procfs-style symlinks and mountpoint traversal.
+``_weak_revalidate``
+	called when the VFS needs to revalidate a "jumped" dentry.  This
+	is called when a path-walk ends at dentry that was not acquired
+	by doing a lookup in the parent directory.  This includes "/",
+	"." and "..", as well as procfs-style symlinks and mountpoint
+	traversal.
 
-	In this case, we are less concerned with whether the dentry is still
-	fully correct, but rather that the inode is still valid.  As with
-	d_revalidate, most local filesystems will set this to NULL since their
-	dcache entries are always valid.
+	In this case, we are less concerned with whether the dentry is
+	still fully correct, but rather that the inode is still valid.
+	As with d_revalidate, most local filesystems will set this to
+	NULL since their dcache entries are always valid.
 
-	This function has the same return code semantics as d_revalidate.
+	This function has the same return code semantics as
+	d_revalidate.
 
 	d_weak_revalidate is only called after leaving rcu-walk mode.
 
-``d_hash``: called when the VFS adds a dentry to the hash table.  The first
+``d_hash``
+	called when the VFS adds a dentry to the hash table.  The first
 	dentry passed to d_hash is the parent directory that the name is
 	to be hashed into.
 
 	Same locking and synchronisation rules as d_compare regarding
 	what is safe to dereference etc.
 
-``d_compare``: called to compare a dentry name with a given name.  The first
+``d_compare``
+	called to compare a dentry name with a given name.  The first
 	dentry is the parent of the dentry to be compared, the second is
-	the child dentry.  len and name string are properties of the dentry
-	to be compared.  qstr is the name to compare it with.
+	the child dentry.  len and name string are properties of the
+	dentry to be compared.  qstr is the name to compare it with.
 
 	Must be constant and idempotent, and should not take locks if
-	possible, and should not or store into the dentry.
-	Should not dereference pointers outside the dentry without
-	lots of care (eg.  d_parent, d_inode, d_name should not be used).
-
-	However, our vfsmount is pinned, and RCU held, so the dentries and
-	inodes won't disappear, neither will our sb or filesystem module.
-	->d_sb may be used.
-
-	It is a tricky calling convention because it needs to be called under
-	"rcu-walk", ie. without any locks or references on things.
-
-``d_delete``: called when the last reference to a dentry is dropped and the
-	dcache is deciding whether or not to cache it.  Return 1 to delete
-	immediately, or 0 to cache the dentry.  Default is NULL which means to
-	always cache a reachable dentry.  d_delete must be constant and
-	idempotent.
-
-``d_init``: called when a dentry is allocated
-
-``d_release``: called when a dentry is really deallocated
-
-``d_iput``: called when a dentry loses its inode (just prior to its
-	being deallocated).  The default when this is NULL is that the
-	VFS calls iput().  If you define this method, you must call
-	iput() yourself
-
-``d_dname``: called when the pathname of a dentry should be generated.
-	Useful for some pseudo filesystems (sockfs, pipefs, ...) to delay
-	pathname generation.  (Instead of doing it when dentry is created,
-	it's done only when the path is needed.).  Real filesystems probably
-	dont want to use it, because their dentries are present in global
-	dcache hash, so their hash should be an invariant.  As no lock is
-	held, d_dname() should not try to modify the dentry itself, unless
-	appropriate SMP safety is used.  CAUTION : d_path() logic is quite
-	tricky.  The correct way to return for example "Hello" is to put it
-	at the end of the buffer, and returns a pointer to the first char.
-	dynamic_dname() helper function is provided to take care of this.
+	possible, and should not or store into the dentry.  Should not
+	dereference pointers outside the dentry without lots of care
+	(eg.  d_parent, d_inode, d_name should not be used).
+
+	However, our vfsmount is pinned, and RCU held, so the dentries
+	and inodes won't disappear, neither will our sb or filesystem
+	module.  ->d_sb may be used.
+
+	It is a tricky calling convention because it needs to be called
+	under "rcu-walk", ie. without any locks or references on things.
+
+``d_delete``
+	called when the last reference to a dentry is dropped and the
+	dcache is deciding whether or not to cache it.  Return 1 to
+	delete immediately, or 0 to cache the dentry.  Default is NULL
+	which means to always cache a reachable dentry.  d_delete must
+	be constant and idempotent.
+
+``d_init``
+	called when a dentry is allocated
+
+``d_release``
+	called when a dentry is really deallocated
+
+``d_iput``
+	called when a dentry loses its inode (just prior to its being
+	deallocated).  The default when this is NULL is that the VFS
+	calls iput().  If you define this method, you must call iput()
+	yourself
+
+``d_dname``
+	called when the pathname of a dentry should be generated.
+	Useful for some pseudo filesystems (sockfs, pipefs, ...) to
+	delay pathname generation.  (Instead of doing it when dentry is
+	created, it's done only when the path is needed.).  Real
+	filesystems probably dont want to use it, because their dentries
+	are present in global dcache hash, so their hash should be an
+	invariant.  As no lock is held, d_dname() should not try to
+	modify the dentry itself, unless appropriate SMP safety is used.
+	CAUTION : d_path() logic is quite tricky.  The correct way to
+	return for example "Hello" is to put it at the end of the
+	buffer, and returns a pointer to the first char.
+	dynamic_dname() helper function is provided to take care of
+	this.
 
 	Example :
 
@@ -1135,52 +1260,57 @@ defined:
 				dentry->d_inode->i_ino);
 	}
 
-``d_automount``: called when an automount dentry is to be traversed (optional).
-	This should create a new VFS mount record and return the record to the
-	caller.  The caller is supplied with a path parameter giving the
-	automount directory to describe the automount target and the parent
-	VFS mount record to provide inheritable mount parameters.  NULL should
-	be returned if someone else managed to make the automount first.  If
-	the vfsmount creation failed, then an error code should be returned.
-	If -EISDIR is returned, then the directory will be treated as an
-	ordinary directory and returned to pathwalk to continue walking.
-
-	If a vfsmount is returned, the caller will attempt to mount it on the
-	mountpoint and will remove the vfsmount from its expiration list in
-	the case of failure.  The vfsmount should be returned with 2 refs on
-	it to prevent automatic expiration - the caller will clean up the
-	additional ref.
-
-	This function is only used if DCACHE_NEED_AUTOMOUNT is set on the
-	dentry.  This is set by __d_instantiate() if S_AUTOMOUNT is set on the
-	inode being added.
-
-``d_manage``: called to allow the filesystem to manage the transition from a
-	dentry (optional).  This allows autofs, for example, to hold up clients
-	waiting to explore behind a 'mountpoint' while letting the daemon go
-	past and construct the subtree there.  0 should be returned to let the
-	calling process continue.  -EISDIR can be returned to tell pathwalk to
-	use this directory as an ordinary directory and to ignore anything
-	mounted on it and not to check the automount flag.  Any other error
-	code will abort pathwalk completely.
+``d_automount``
+	called when an automount dentry is to be traversed (optional).
+	This should create a new VFS mount record and return the record
+	to the caller.  The caller is supplied with a path parameter
+	giving the automount directory to describe the automount target
+	and the parent VFS mount record to provide inheritable mount
+	parameters.  NULL should be returned if someone else managed to
+	make the automount first.  If the vfsmount creation failed, then
+	an error code should be returned.  If -EISDIR is returned, then
+	the directory will be treated as an ordinary directory and
+	returned to pathwalk to continue walking.
+
+	If a vfsmount is returned, the caller will attempt to mount it
+	on the mountpoint and will remove the vfsmount from its
+	expiration list in the case of failure.  The vfsmount should be
+	returned with 2 refs on it to prevent automatic expiration - the
+	caller will clean up the additional ref.
+
+	This function is only used if DCACHE_NEED_AUTOMOUNT is set on
+	the dentry.  This is set by __d_instantiate() if S_AUTOMOUNT is
+	set on the inode being added.
+
+``d_manage``
+	called to allow the filesystem to manage the transition from a
+	dentry (optional).  This allows autofs, for example, to hold up
+	clients waiting to explore behind a 'mountpoint' while letting
+	the daemon go past and construct the subtree there.  0 should be
+	returned to let the calling process continue.  -EISDIR can be
+	returned to tell pathwalk to use this directory as an ordinary
+	directory and to ignore anything mounted on it and not to check
+	the automount flag.  Any other error code will abort pathwalk
+	completely.
 
 	If the 'rcu_walk' parameter is true, then the caller is doing a
-	pathwalk in RCU-walk mode.  Sleeping is not permitted in this mode,
-	and the caller can be asked to leave it and call again by returning
-	-ECHILD.  -EISDIR may also be returned to tell pathwalk to
-	ignore d_automount or any mounts.
+	pathwalk in RCU-walk mode.  Sleeping is not permitted in this
+	mode, and the caller can be asked to leave it and call again by
+	returning -ECHILD.  -EISDIR may also be returned to tell
+	pathwalk to ignore d_automount or any mounts.
 
-	This function is only used if DCACHE_MANAGE_TRANSIT is set on the
-	dentry being transited from.
+	This function is only used if DCACHE_MANAGE_TRANSIT is set on
+	the dentry being transited from.
 
-``d_real``: overlay/union type filesystems implement this method to return one of
-	the underlying dentries hidden by the overlay.  It is used in two
-	different modes:
+``d_real``
+	overlay/union type filesystems implement this method to return
+	one of the underlying dentries hidden by the overlay.  It is
+	used in two different modes:
 
-	Called from file_dentry() it returns the real dentry matching the inode
-	argument.  The real dentry may be from a lower layer already copied up,
-	but still referenced from the file.  This mode is selected with a
-	non-NULL inode argument.
+	Called from file_dentry() it returns the real dentry matching
+	the inode argument.  The real dentry may be from a lower layer
+	already copied up, but still referenced from the file.  This
+	mode is selected with a non-NULL inode argument.
 
 	With NULL inode the topmost real underlying dentry is returned.
 
@@ -1195,40 +1325,47 @@ Directory Entry Cache API
 There are a number of functions defined which permit a filesystem to
 manipulate dentries:
 
-``dget``: open a new handle for an existing dentry (this just increments
+``dget``
+	open a new handle for an existing dentry (this just increments
 	the usage count)
 
-``dput``: close a handle for a dentry (decrements the usage count).  If
+``dput``
+	close a handle for a dentry (decrements the usage count).  If
 	the usage count drops to 0, and the dentry is still in its
 	parent's hash, the "d_delete" method is called to check whether
-	it should be cached.  If it should not be cached, or if the dentry
-	is not hashed, it is deleted.  Otherwise cached dentries are put
-	into an LRU list to be reclaimed on memory shortage.
-
-``d_drop``: this unhashes a dentry from its parents hash list.  A
-	subsequent call to dput() will deallocate the dentry if its
-	usage count drops to 0
-
-``d_delete``: delete a dentry.  If there are no other open references to
-	the dentry then the dentry is turned into a negative dentry
-	(the d_iput() method is called).  If there are other
-	references, then d_drop() is called instead
-
-``d_add``: add a dentry to its parents hash list and then calls
+	it should be cached.  If it should not be cached, or if the
+	dentry is not hashed, it is deleted.  Otherwise cached dentries
+	are put into an LRU list to be reclaimed on memory shortage.
+
+``d_drop``
+	this unhashes a dentry from its parents hash list.  A subsequent
+	call to dput() will deallocate the dentry if its usage count
+	drops to 0
+
+``d_delete``
+	delete a dentry.  If there are no other open references to the
+	dentry then the dentry is turned into a negative dentry (the
+	d_iput() method is called).  If there are other references, then
+	d_drop() is called instead
+
+``d_add``
+	add a dentry to its parents hash list and then calls
 	d_instantiate()
 
-``d_instantiate``: add a dentry to the alias hash list for the inode and
-	updates the "d_inode" member.  The "i_count" member in the
-	inode structure should be set/incremented.  If the inode
-	pointer is NULL, the dentry is called a "negative
-	dentry".  This function is commonly called when an inode is
-	created for an existing negative dentry
-
-``d_lookup``: look up a dentry given its parent and path name component
-	It looks up the child of that given name from the dcache
-	hash table.  If it is found, the reference count is incremented
-	and the dentry is returned.  The caller must use dput()
-	to free the dentry when it finishes using it.
+``d_instantiate``
+	add a dentry to the alias hash list for the inode and updates
+	the "d_inode" member.  The "i_count" member in the inode
+	structure should be set/incremented.  If the inode pointer is
+	NULL, the dentry is called a "negative dentry".  This function
+	is commonly called when an inode is created for an existing
+	negative dentry
+
+``d_lookup``
+	look up a dentry given its parent and path name component It
+	looks up the child of that given name from the dcache hash
+	table.  If it is found, the reference count is incremented and
+	the dentry is returned.  The caller must use dput() to free the
+	dentry when it finishes using it.
 
 
 Mount Options
-- 
cgit 


From b422124758c19db06c4c30c4abb8f57bf18995b9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 5 Jun 2019 19:39:44 +0300
Subject: docs/core-api: Add string helpers API to the list

Some times string helpers are needed, but there is nothing about them
in the generated documentation.

Fill the gap by adding a reference to string_helpers.c exported functions.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/kernel-api.rst | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index a53ec2eb8176..65ae2bf1f86d 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -33,6 +33,9 @@ String Conversions
 .. kernel-doc:: lib/kstrtox.c
    :export:
 
+.. kernel-doc:: lib/string_helpers.c
+   :export:
+
 String Manipulation
 -------------------
 
-- 
cgit 


From 58d494669f36d0b61b7ec42c232877167ed3f5ce Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 5 Jun 2019 19:51:13 +0300
Subject: docs/core-api: Add integer power functions to the list

Some times integer power functions, such as int_sqrt(), are needed, but
there is nothing about them in the generated documentation.

Fill the gap by adding a reference to the corresponding exported functions.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/kernel-api.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 65ae2bf1f86d..824f24ccf401 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -141,6 +141,15 @@ Base 2 log and power Functions
 .. kernel-doc:: include/linux/log2.h
    :internal:
 
+Integer power Functions
+-----------------------
+
+.. kernel-doc:: lib/math/int_pow.c
+   :export:
+
+.. kernel-doc:: lib/math/int_sqrt.c
+   :export:
+
 Division Functions
 ------------------
 
-- 
cgit 


From 4665743276c3e8efaa4da54eabe8f54b2f99e335 Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Wed, 5 Jun 2019 07:56:30 -0500
Subject: dt-bindings: mfd: Add lm36274 bindings to ti-lmu

Add the LM36274 backlight driver with regulator support.
This is a multi-function device for backlight applications.

Backlight properties will be documented in it's a supplemental
bindings document.

Regulator support is documented in the regulator/lm363x-regulator.txt

http://www.ti.com/lit/ds/symlink/lm36274.pdf

Signed-off-by: Dan Murphy <dmurphy@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 Documentation/devicetree/bindings/mfd/ti-lmu.txt | 54 ++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mfd/ti-lmu.txt b/Documentation/devicetree/bindings/mfd/ti-lmu.txt
index 782d3c9812ed..2296b8f24de4 100644
--- a/Documentation/devicetree/bindings/mfd/ti-lmu.txt
+++ b/Documentation/devicetree/bindings/mfd/ti-lmu.txt
@@ -8,6 +8,7 @@ TI LMU driver supports lighting devices below.
   LM3632       Backlight and regulator
   LM3633       Backlight, LED and fault monitor
   LM3695       Backlight
+  LM36274      Backlight and regulator
 
 Required properties:
   - compatible: Should be one of:
@@ -15,11 +16,13 @@ Required properties:
                 "ti,lm3632"
                 "ti,lm3633"
                 "ti,lm3695"
+		"ti,lm36274"
   - reg: I2C slave address.
          0x11 for LM3632
          0x29 for LM3631
          0x36 for LM3633
          0x63 for LM3695
+         0x11 for LM36274
 
 Optional properties:
   - enable-gpios: A GPIO specifier for hardware enable pin.
@@ -50,12 +53,14 @@ Optional nodes:
       - compatible: Should be one of:
                     "ti,lm3633-fault-monitor"
   - leds: LED properties for LM3633. Please refer to [2].
+	  LED properties for LM36274. Please refer to [4].
   - regulators: Regulator properties for LM3631 and LM3632.
                 Please refer to [3].
 
 [1] ../leds/backlight/ti-lmu-backlight.txt
 [2] ../leds/leds-lm3633.txt
 [3] ../regulator/lm363x-regulator.txt
+[4] ../leds/leds-lm36274.txt
 
 lm3631@29 {
 	compatible = "ti,lm3631";
@@ -213,3 +218,52 @@ lm3695@63 {
 		};
 	};
 };
+
+lm36274@11 {
+	compatible = "ti,lm36274";
+	#address-cells = <1>;
+	#size-cells = <0>;
+	reg = <0x11>;
+
+	enable-gpios = <&pioC 2 GPIO_ACTIVE_HIGH>;
+	regulators {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "ti,lm363x-regulator";
+
+		enable-gpios = <&pioC 0 GPIO_ACTIVE_HIGH>,
+			       <&pioC 1 GPIO_ACTIVE_HIGH>;
+
+		vboost {
+			regulator-name = "lcd_boost";
+			regulator-min-microvolt = <4000000>;
+			regulator-max-microvolt = <7150000>;
+			regulator-always-on;
+		};
+
+		vpos {
+			regulator-name = "lcd_vpos";
+			regulator-min-microvolt = <4000000>;
+			regulator-max-microvolt = <6500000>;
+		};
+
+		vneg {
+			regulator-name = "lcd_vneg";
+			regulator-min-microvolt = <4000000>;
+			regulator-max-microvolt = <6500000>;
+		};
+	};
+
+	backlight {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "ti,lm36274-backlight";
+
+		led@0 {
+			reg = <0>;
+			led-sources = <0 2>;
+			label = "white:backlight_cluster";
+			linux,default-trigger = "backlight";
+		};
+	};
+};
-- 
cgit 


From 2076e5c0451ca943ff8ecc6def7239c84c77e070 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <rcampbell@nvidia.com>
Date: Mon, 6 May 2019 16:29:38 -0700
Subject: mm/hmm: update HMM documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update the HMM documentation to reflect the latest API and make a few
minor wording changes.

Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 Documentation/vm/hmm.rst | 141 +++++++++++++++++++++++++----------------------
 1 file changed, 74 insertions(+), 67 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index 7cdf7282e022..7b6eeda5a7c0 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -10,7 +10,7 @@ of this being specialized struct page for such memory (see sections 5 to 7 of
 this document).
 
 HMM also provides optional helpers for SVM (Share Virtual Memory), i.e.,
-allowing a device to transparently access program address coherently with
+allowing a device to transparently access program addresses coherently with
 the CPU meaning that any valid pointer on the CPU is also a valid pointer
 for the device. This is becoming mandatory to simplify the use of advanced
 heterogeneous computing where GPU, DSP, or FPGA are used to perform various
@@ -22,8 +22,8 @@ expose the hardware limitations that are inherent to many platforms. The third
 section gives an overview of the HMM design. The fourth section explains how
 CPU page-table mirroring works and the purpose of HMM in this context. The
 fifth section deals with how device memory is represented inside the kernel.
-Finally, the last section presents a new migration helper that allows lever-
-aging the device DMA engine.
+Finally, the last section presents a new migration helper that allows
+leveraging the device DMA engine.
 
 .. contents:: :local:
 
@@ -39,20 +39,20 @@ address space. I use shared address space to refer to the opposite situation:
 i.e., one in which any application memory region can be used by a device
 transparently.
 
-Split address space happens because device can only access memory allocated
-through device specific API. This implies that all memory objects in a program
+Split address space happens because devices can only access memory allocated
+through a device specific API. This implies that all memory objects in a program
 are not equal from the device point of view which complicates large programs
 that rely on a wide set of libraries.
 
-Concretely this means that code that wants to leverage devices like GPUs needs
-to copy object between generically allocated memory (malloc, mmap private, mmap
+Concretely, this means that code that wants to leverage devices like GPUs needs
+to copy objects between generically allocated memory (malloc, mmap private, mmap
 share) and memory allocated through the device driver API (this still ends up
 with an mmap but of the device file).
 
 For flat data sets (array, grid, image, ...) this isn't too hard to achieve but
-complex data sets (list, tree, ...) are hard to get right. Duplicating a
+for complex data sets (list, tree, ...) it's hard to get right. Duplicating a
 complex data set needs to re-map all the pointer relations between each of its
-elements. This is error prone and program gets harder to debug because of the
+elements. This is error prone and programs get harder to debug because of the
 duplicate data set and addresses.
 
 Split address space also means that libraries cannot transparently use data
@@ -77,12 +77,12 @@ I/O bus, device memory characteristics
 
 I/O buses cripple shared address spaces due to a few limitations. Most I/O
 buses only allow basic memory access from device to main memory; even cache
-coherency is often optional. Access to device memory from CPU is even more
+coherency is often optional. Access to device memory from a CPU is even more
 limited. More often than not, it is not cache coherent.
 
 If we only consider the PCIE bus, then a device can access main memory (often
 through an IOMMU) and be cache coherent with the CPUs. However, it only allows
-a limited set of atomic operations from device on main memory. This is worse
+a limited set of atomic operations from the device on main memory. This is worse
 in the other direction: the CPU can only access a limited range of the device
 memory and cannot perform atomic operations on it. Thus device memory cannot
 be considered the same as regular memory from the kernel point of view.
@@ -93,20 +93,20 @@ The final limitation is latency. Access to main memory from the device has an
 order of magnitude higher latency than when the device accesses its own memory.
 
 Some platforms are developing new I/O buses or additions/modifications to PCIE
-to address some of these limitations (OpenCAPI, CCIX). They mainly allow two-
-way cache coherency between CPU and device and allow all atomic operations the
+to address some of these limitations (OpenCAPI, CCIX). They mainly allow
+two-way cache coherency between CPU and device and allow all atomic operations the
 architecture supports. Sadly, not all platforms are following this trend and
 some major architectures are left without hardware solutions to these problems.
 
 So for shared address space to make sense, not only must we allow devices to
 access any memory but we must also permit any memory to be migrated to device
-memory while device is using it (blocking CPU access while it happens).
+memory while the device is using it (blocking CPU access while it happens).
 
 
 Shared address space and migration
 ==================================
 
-HMM intends to provide two main features. First one is to share the address
+HMM intends to provide two main features. The first one is to share the address
 space by duplicating the CPU page table in the device page table so the same
 address points to the same physical memory for any valid main memory address in
 the process address space.
@@ -121,14 +121,14 @@ why HMM provides helpers to factor out everything that can be while leaving the
 hardware specific details to the device driver.
 
 The second mechanism HMM provides is a new kind of ZONE_DEVICE memory that
-allows allocating a struct page for each page of the device memory. Those pages
+allows allocating a struct page for each page of device memory. Those pages
 are special because the CPU cannot map them. However, they allow migrating
 main memory to device memory using existing migration mechanisms and everything
-looks like a page is swapped out to disk from the CPU point of view. Using a
-struct page gives the easiest and cleanest integration with existing mm mech-
-anisms. Here again, HMM only provides helpers, first to hotplug new ZONE_DEVICE
+looks like a page that is swapped out to disk from the CPU point of view. Using a
+struct page gives the easiest and cleanest integration with existing mm
+mechanisms. Here again, HMM only provides helpers, first to hotplug new ZONE_DEVICE
 memory for the device memory and second to perform migration. Policy decisions
-of what and when to migrate things is left to the device driver.
+of what and when to migrate is left to the device driver.
 
 Note that any CPU access to a device page triggers a page fault and a migration
 back to main memory. For example, when a page backing a given CPU address A is
@@ -136,8 +136,8 @@ migrated from a main memory page to a device page, then any CPU access to
 address A triggers a page fault and initiates a migration back to main memory.
 
 With these two features, HMM not only allows a device to mirror process address
-space and keeping both CPU and device page table synchronized, but also lever-
-ages device memory by migrating the part of the data set that is actively being
+space and keeps both CPU and device page tables synchronized, but also
+leverages device memory by migrating the part of the data set that is actively being
 used by the device.
 
 
@@ -151,21 +151,28 @@ registration of an hmm_mirror struct::
 
  int hmm_mirror_register(struct hmm_mirror *mirror,
                          struct mm_struct *mm);
- int hmm_mirror_register_locked(struct hmm_mirror *mirror,
-                                struct mm_struct *mm);
 
-
-The locked variant is to be used when the driver is already holding mmap_sem
-of the mm in write mode. The mirror struct has a set of callbacks that are used
+The mirror struct has a set of callbacks that are used
 to propagate CPU page tables::
 
  struct hmm_mirror_ops {
+     /* release() - release hmm_mirror
+      *
+      * @mirror: pointer to struct hmm_mirror
+      *
+      * This is called when the mm_struct is being released.  The callback
+      * must ensure that all access to any pages obtained from this mirror
+      * is halted before the callback returns. All future access should
+      * fault.
+      */
+     void (*release)(struct hmm_mirror *mirror);
+
      /* sync_cpu_device_pagetables() - synchronize page tables
       *
       * @mirror: pointer to struct hmm_mirror
-      * @update_type: type of update that occurred to the CPU page table
-      * @start: virtual start address of the range to update
-      * @end: virtual end address of the range to update
+      * @update: update information (see struct mmu_notifier_range)
+      * Return: -EAGAIN if update.blockable false and callback need to
+      *         block, 0 otherwise.
       *
       * This callback ultimately originates from mmu_notifiers when the CPU
       * page table is updated. The device driver must update its page table
@@ -176,14 +183,12 @@ to propagate CPU page tables::
       * page tables are completely updated (TLBs flushed, etc); this is a
       * synchronous call.
       */
-      void (*update)(struct hmm_mirror *mirror,
-                     enum hmm_update action,
-                     unsigned long start,
-                     unsigned long end);
+     int (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
+                                       const struct hmm_update *update);
  };
 
 The device driver must perform the update action to the range (mark range
-read only, or fully unmap, ...). The device must be done with the update before
+read only, or fully unmap, etc.). The device must complete the update before
 the driver callback returns.
 
 When the device driver wants to populate a range of virtual addresses, it can
@@ -194,17 +199,18 @@ use either::
 
 The first one (hmm_range_snapshot()) will only fetch present CPU page table
 entries and will not trigger a page fault on missing or non-present entries.
-The second one does trigger a page fault on missing or read-only entry if the
-write parameter is true. Page faults use the generic mm page fault code path
-just like a CPU page fault.
+The second one does trigger a page fault on missing or read-only entries if
+write access is requested (see below). Page faults use the generic mm page
+fault code path just like a CPU page fault.
 
 Both functions copy CPU page table entries into their pfns array argument. Each
 entry in that array corresponds to an address in the virtual range. HMM
 provides a set of flags to help the driver identify special CPU page table
 entries.
 
-Locking with the update() callback is the most important aspect the driver must
-respect in order to keep things properly synchronized. The usage pattern is::
+Locking within the sync_cpu_device_pagetables() callback is the most important
+aspect the driver must respect in order to keep things properly synchronized.
+The usage pattern is::
 
  int driver_populate_range(...)
  {
@@ -239,11 +245,11 @@ respect in order to keep things properly synchronized. The usage pattern is::
             hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
             goto again;
           }
-          hmm_mirror_unregister(&range);
+          hmm_range_unregister(&range);
           return ret;
       }
       take_lock(driver->update);
-      if (!range.valid) {
+      if (!hmm_range_valid(&range)) {
           release_lock(driver->update);
           up_read(&mm->mmap_sem);
           goto again;
@@ -251,15 +257,15 @@ respect in order to keep things properly synchronized. The usage pattern is::
 
       // Use pfns array content to update device page table
 
-      hmm_mirror_unregister(&range);
+      hmm_range_unregister(&range);
       release_lock(driver->update);
       up_read(&mm->mmap_sem);
       return 0;
  }
 
 The driver->update lock is the same lock that the driver takes inside its
-update() callback. That lock must be held before checking the range.valid
-field to avoid any race with a concurrent CPU page table update.
+sync_cpu_device_pagetables() callback. That lock must be held before calling
+hmm_range_valid() to avoid any race with a concurrent CPU page table update.
 
 HMM implements all this on top of the mmu_notifier API because we wanted a
 simpler API and also to be able to perform optimizations latter on like doing
@@ -279,46 +285,47 @@ concurrently).
 Leverage default_flags and pfn_flags_mask
 =========================================
 
-The hmm_range struct has 2 fields default_flags and pfn_flags_mask that allows
-to set fault or snapshot policy for a whole range instead of having to set them
-for each entries in the range.
+The hmm_range struct has 2 fields, default_flags and pfn_flags_mask, that specify
+fault or snapshot policy for the whole range instead of having to set them
+for each entry in the pfns array.
+
+For instance, if the device flags for range.flags are::
 
-For instance if the device flags for device entries are:
-    VALID (1 << 63)
-    WRITE (1 << 62)
+    range.flags[HMM_PFN_VALID] = (1 << 63);
+    range.flags[HMM_PFN_WRITE] = (1 << 62);
 
-Now let say that device driver wants to fault with at least read a range then
-it does set::
+and the device driver wants pages for a range with at least read permission,
+it sets::
 
     range->default_flags = (1 << 63);
     range->pfn_flags_mask = 0;
 
-and calls hmm_range_fault() as described above. This will fill fault all page
+and calls hmm_range_fault() as described above. This will fill fault all pages
 in the range with at least read permission.
 
-Now let say driver wants to do the same except for one page in the range for
-which its want to have write. Now driver set::
+Now let's say the driver wants to do the same except for one page in the range for
+which it wants to have write permission. Now driver set::
 
     range->default_flags = (1 << 63);
     range->pfn_flags_mask = (1 << 62);
     range->pfns[index_of_write] = (1 << 62);
 
-With this HMM will fault in all page with at least read (ie valid) and for the
+With this, HMM will fault in all pages with at least read (i.e., valid) and for the
 address == range->start + (index_of_write << PAGE_SHIFT) it will fault with
-write permission ie if the CPU pte does not have write permission set then HMM
+write permission i.e., if the CPU pte does not have write permission set then HMM
 will call handle_mm_fault().
 
-Note that HMM will populate the pfns array with write permission for any entry
-that have write permission within the CPU pte no matter what are the values set
+Note that HMM will populate the pfns array with write permission for any page
+that is mapped with CPU write permission no matter what values are set
 in default_flags or pfn_flags_mask.
 
 
 Represent and manage device memory from core kernel point of view
 =================================================================
 
-Several different designs were tried to support device memory. First one used
-a device specific data structure to keep information about migrated memory and
-HMM hooked itself in various places of mm code to handle any access to
+Several different designs were tried to support device memory. The first one
+used a device specific data structure to keep information about migrated memory
+and HMM hooked itself in various places of mm code to handle any access to
 addresses that were backed by device memory. It turns out that this ended up
 replicating most of the fields of struct page and also needed many kernel code
 paths to be updated to understand this new kind of memory.
@@ -341,7 +348,7 @@ The hmm_devmem_ops is where most of the important things are::
 
  struct hmm_devmem_ops {
      void (*free)(struct hmm_devmem *devmem, struct page *page);
-     int (*fault)(struct hmm_devmem *devmem,
+     vm_fault_t (*fault)(struct hmm_devmem *devmem,
                   struct vm_area_struct *vma,
                   unsigned long addr,
                   struct page *page,
@@ -417,9 +424,9 @@ willing to pay to keep all the code simpler.
 Memory cgroup (memcg) and rss accounting
 ========================================
 
-For now device memory is accounted as any regular page in rss counters (either
+For now, device memory is accounted as any regular page in rss counters (either
 anonymous if device page is used for anonymous, file if device page is used for
-file backed page or shmem if device page is used for shared memory). This is a
+file backed page, or shmem if device page is used for shared memory). This is a
 deliberate choice to keep existing applications, that might start using device
 memory without knowing about it, running unimpacted.
 
@@ -439,6 +446,6 @@ get more experience in how device memory is used and its impact on memory
 resource control.
 
 
-Note that device memory can never be pinned by device driver nor through GUP
+Note that device memory can never be pinned by a device driver nor through GUP
 and thus such memory is always free upon process exit. Or when last reference
 is dropped in case of shared memory or file backed memory.
-- 
cgit 


From b637e0856a6248230e53b5465ab0751f27fdf320 Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen@kernel.org>
Date: Wed, 5 Jun 2019 10:05:50 -0500
Subject: dt-bindings: socfpga-dwmac: add "altr, socfpga-stmmac-a10-s10"
 binding

Add the "altr,socfpga-stmmac-a10-s10" binding for Arria10/Agilex/Stratix10
implementation of the stmmac ethernet controller.

On the Arria10, Agilex, and Stratix10 SoCs, there are a few differences from
the Cyclone5 and Arria5:
     - The emac PHY setup bits are in separate registers.
     - The PTP reference clock select mask is different.
     - The register to enable the emac signal from FPGA is different.

Because of these differences, the dwmac-socfpga glue logic driver will
use this new binding to set the appropriate bits for PHY, PTP reference
clock, and signal from FPGA.

Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/socfpga-dwmac.txt | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
index 17d6819669c8..612a8e8abc88 100644
--- a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
@@ -6,11 +6,17 @@ present in Documentation/devicetree/bindings/net/stmmac.txt.
 The device node has additional properties:
 
 Required properties:
- - compatible	: Should contain "altr,socfpga-stmmac" along with
-		  "snps,dwmac" and any applicable more detailed
+ - compatible	: For Cyclone5/Arria5 SoCs it should contain
+		  "altr,socfpga-stmmac". For Arria10/Agilex/Stratix10 SoCs
+		  "altr,socfpga-stmmac-a10-s10".
+		  Along with "snps,dwmac" and any applicable more detailed
 		  designware version numbers documented in stmmac.txt
  - altr,sysmgr-syscon : Should be the phandle to the system manager node that
    encompasses the glue register, the register offset, and the register shift.
+   On Cyclone5/Arria5, the register shift represents the PHY mode bits, while
+   on the Arria10/Stratix10/Agilex platforms, the register shift represents
+   bit for each emac to enable/disable signals from the FPGA fabric to the
+   EMAC modules.
  - altr,f2h_ptp_ref_clk use f2h_ptp_ref_clk instead of default eosc1 clock
    for ptp ref clk. This affects all emacs as the clock is common.
 
-- 
cgit 


From 8447d84e35f2c3b2e76f80319d0b547a9521bafa Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Date: Tue, 15 Jan 2019 12:19:53 +0000
Subject: dt-bindings: virtio-mmio: Add IOMMU description

The nature of a virtio-mmio node is discovered by the virtio driver at
probe time. However the DMA relation between devices must be described
statically. When a virtio-mmio node is a virtio-iommu device, it needs an
"#iommu-cells" property as specified by bindings/iommu/iommu.txt.

Otherwise, the virtio-mmio device may perform DMA through an IOMMU, which
requires an "iommus" property. Describe these requirements in the
device-tree bindings documentation.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Documentation/devicetree/bindings/virtio/mmio.txt | 30 +++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/virtio/mmio.txt b/Documentation/devicetree/bindings/virtio/mmio.txt
index 5069c1b8e193..21af30fbb81f 100644
--- a/Documentation/devicetree/bindings/virtio/mmio.txt
+++ b/Documentation/devicetree/bindings/virtio/mmio.txt
@@ -8,10 +8,40 @@ Required properties:
 - reg:		control registers base address and size including configuration space
 - interrupts:	interrupt generated by the device
 
+Required properties for virtio-iommu:
+
+- #iommu-cells:	When the node corresponds to a virtio-iommu device, it is
+		linked to DMA masters using the "iommus" or "iommu-map"
+		properties [1][2]. #iommu-cells specifies the size of the
+		"iommus" property. For virtio-iommu #iommu-cells must be
+		1, each cell describing a single endpoint ID.
+
+Optional properties:
+
+- iommus:	If the device accesses memory through an IOMMU, it should
+		have an "iommus" property [1]. Since virtio-iommu itself
+		does not access memory through an IOMMU, the "virtio,mmio"
+		node cannot have both an "#iommu-cells" and an "iommus"
+		property.
+
 Example:
 
 	virtio_block@3000 {
 		compatible = "virtio,mmio";
 		reg = <0x3000 0x100>;
 		interrupts = <41>;
+
+		/* Device has endpoint ID 23 */
+		iommus = <&viommu 23>
 	}
+
+	viommu: iommu@3100 {
+		compatible = "virtio,mmio";
+		reg = <0x3100 0x100>;
+		interrupts = <42>;
+
+		#iommu-cells = <1>
+	}
+
+[1] Documentation/devicetree/bindings/iommu/iommu.txt
+[2] Documentation/devicetree/bindings/pci/pci-iommu.txt
-- 
cgit 


From 6c9e92ef8bdde13398caae1989b0292524d6475e Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Date: Tue, 15 Jan 2019 12:19:54 +0000
Subject: dt-bindings: virtio: Add virtio-pci-iommu node

Some systems implement virtio-iommu as a PCI endpoint. The operating
system needs to discover the relationship between IOMMU and masters long
before the PCI endpoint gets probed. Add a PCI child node to describe the
virtio-iommu device.

The virtio-pci-iommu is conceptually split between a PCI programming
interface and a translation component on the parent bus. The latter
doesn't have a node in the device tree. The virtio-pci-iommu node
describes both, by linking the PCI endpoint to "iommus" property of DMA
master nodes and to "iommu-map" properties of bus nodes.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Documentation/devicetree/bindings/virtio/iommu.txt | 66 ++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/virtio/iommu.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/virtio/iommu.txt b/Documentation/devicetree/bindings/virtio/iommu.txt
new file mode 100644
index 000000000000..2407fea0651c
--- /dev/null
+++ b/Documentation/devicetree/bindings/virtio/iommu.txt
@@ -0,0 +1,66 @@
+* virtio IOMMU PCI device
+
+When virtio-iommu uses the PCI transport, its programming interface is
+discovered dynamically by the PCI probing infrastructure. However the
+device tree statically describes the relation between IOMMU and DMA
+masters. Therefore, the PCI root complex that hosts the virtio-iommu
+contains a child node representing the IOMMU device explicitly.
+
+Required properties:
+
+- compatible:	Should be "virtio,pci-iommu"
+- reg:		PCI address of the IOMMU. As defined in the PCI Bus
+		Binding reference [1], the reg property is a five-cell
+		address encoded as (phys.hi phys.mid phys.lo size.hi
+		size.lo). phys.hi should contain the device's BDF as
+		0b00000000 bbbbbbbb dddddfff 00000000. The other cells
+		should be zero.
+- #iommu-cells:	Each platform DMA master managed by the IOMMU is assigned
+		an endpoint ID, described by the "iommus" property [2].
+		For virtio-iommu, #iommu-cells must be 1.
+
+Notes:
+
+- DMA from the IOMMU device isn't managed by another IOMMU. Therefore the
+  virtio-iommu node doesn't have an "iommus" property, and is omitted from
+  the iommu-map property of the root complex.
+
+Example:
+
+pcie@10000000 {
+	compatible = "pci-host-ecam-generic";
+	...
+
+	/* The IOMMU programming interface uses slot 00:01.0 */
+	iommu0: iommu@0008 {
+		compatible = "virtio,pci-iommu";
+		reg = <0x00000800 0 0 0 0>;
+		#iommu-cells = <1>;
+	};
+
+	/*
+	 * The IOMMU manages all functions in this PCI domain except
+	 * itself. Omit BDF 00:01.0.
+	 */
+	iommu-map = <0x0 &iommu0 0x0 0x8>
+		    <0x9 &iommu0 0x9 0xfff7>;
+};
+
+pcie@20000000 {
+	compatible = "pci-host-ecam-generic";
+	...
+	/*
+	 * The IOMMU also manages all functions from this domain,
+	 * with endpoint IDs 0x10000 - 0x1ffff
+	 */
+	iommu-map = <0x0 &iommu0 0x10000 0x10000>;
+};
+
+ethernet@fe001000 {
+	...
+	/* The IOMMU manages this platform device with endpoint ID 0x20000 */
+	iommus = <&iommu0 0x20000>;
+};
+
+[1] Documentation/devicetree/bindings/pci/pci.txt
+[2] Documentation/devicetree/bindings/iommu/iommu.txt
-- 
cgit 


From 3d8b6e9c774ff57ad2860f79abd9cdb8f29ecaf8 Mon Sep 17 00:00:00 2001
From: Fabien Parent <fparent@baylibre.com>
Date: Thu, 2 May 2019 14:18:42 +0200
Subject: dt-bindings: mediatek: audsys: add support for MT8516

Add AUDSYS device tree bindings documentation for MediaTek MT8516 SoC.

Signed-off-by: Fabien Parent <fparent@baylibre.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt
index f3cef1a6d95c..07c9d813465c 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt
@@ -10,6 +10,7 @@ Required Properties:
 	- "mediatek,mt7622-audsys", "syscon"
 	- "mediatek,mt7623-audsys", "mediatek,mt2701-audsys", "syscon"
 	- "mediatek,mt8183-audiosys", "syscon"
+	- "mediatek,mt8516-audsys", "syscon"
 - #clock-cells: Must be 1
 
 The AUDSYS controller uses the common clk binding from
-- 
cgit 


From 09a0354cadec267be7f5249c89eb998b3474263a Mon Sep 17 00:00:00 2001
From: Robert Hancock <hancock@sedsystems.ca>
Date: Thu, 6 Jun 2019 16:28:09 -0600
Subject: net: axienet: Use clock framework to get device clock rate

This driver was previously always calculating the MDIO clock divisor
(from AXI bus clock to MDIO bus clock) based on the CPU clock frequency,
assuming that it is the same as the AXI bus frequency, but that
simplistic method only works on the MicroBlaze platform.

Add support for specifying the clock used for the device in the device
tree using the clock framework. If the clock is specified then it will
be used when calculating the clock divisor. The previous CPU clock
detection method is left for backward compatibility if no clock is
specified.

Signed-off-by: Robert Hancock <hancock@sedsystems.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/xilinx_axienet.txt | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
index 38f9ec076743..2dea90332ebb 100644
--- a/Documentation/devicetree/bindings/net/xilinx_axienet.txt
+++ b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
@@ -31,6 +31,11 @@ Optional properties:
 		  1 to enable partial TX checksum offload,
 		  2 to enable full TX checksum offload
 - xlnx,rxcsum	: Same values as xlnx,txcsum but for RX checksum offload
+- clocks	: AXI bus clock for the device. Refer to common clock bindings.
+		  Used to calculate MDIO clock divisor. If not specified, it is
+		  auto-detected from the CPU clock (but only on platforms where
+		  this is possible). New device trees should specify this - the
+		  auto detection is only for backward compatibility.
 
 Example:
 	axi_ethernet_eth: ethernet@40c00000 {
@@ -38,6 +43,7 @@ Example:
 		device_type = "network";
 		interrupt-parent = <&microblaze_0_axi_intc>;
 		interrupts = <2 0>;
+		clocks = <&axi_clk>;
 		phy-mode = "mii";
 		reg = <0x40c00000 0x40000>;
 		xlnx,rxcsum = <0x2>;
-- 
cgit 


From 522856cefaf09d1a06ddc02535c7e1e81730c278 Mon Sep 17 00:00:00 2001
From: Robert Hancock <hancock@sedsystems.ca>
Date: Thu, 6 Jun 2019 16:28:16 -0600
Subject: net: axienet: Add optional support for Ethernet core interrupt

Previously this driver only handled interrupts from the DMA RX and TX
blocks, not from the Ethernet core itself. Add optional support for
the Ethernet core interrupt, which is used to detect rx_missed and
framing errors signalled by the hardware. In order to use this
interrupt, a third interrupt needs to be specified in the device tree.

Signed-off-by: Robert Hancock <hancock@sedsystems.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/xilinx_axienet.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
index 2dea90332ebb..0be335c7abed 100644
--- a/Documentation/devicetree/bindings/net/xilinx_axienet.txt
+++ b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
@@ -18,7 +18,8 @@ Required properties:
 - compatible	: Must be one of "xlnx,axi-ethernet-1.00.a",
 		  "xlnx,axi-ethernet-1.01.a", "xlnx,axi-ethernet-2.01.a"
 - reg		: Address and length of the IO space.
-- interrupts	: Should be a list of two interrupt, TX and RX.
+- interrupts	: Should be a list of 2 or 3 interrupts: TX DMA, RX DMA,
+		  and optionally Ethernet core.
 - phy-handle	: Should point to the external phy device.
 		  See ethernet.txt file in the same directory.
 - xlnx,rxmem	: Set to allocated memory buffer for Rx/Tx in the hardware
-- 
cgit 


From a4ebb2997c10f3e8b7b2de27c0f2cf3e4123ed09 Mon Sep 17 00:00:00 2001
From: Robert Hancock <hancock@sedsystems.ca>
Date: Thu, 6 Jun 2019 16:28:20 -0600
Subject: net: axienet: document device tree mdio child node

The mdio child node for the MDIO bus is generally required when using
this driver but was not documented other than being shown in the
example. Document it as an optional (but usually required) parameter.

Signed-off-by: Robert Hancock <hancock@sedsystems.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/xilinx_axienet.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
index 0be335c7abed..a8be67b425fd 100644
--- a/Documentation/devicetree/bindings/net/xilinx_axienet.txt
+++ b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
@@ -37,6 +37,9 @@ Optional properties:
 		  auto-detected from the CPU clock (but only on platforms where
 		  this is possible). New device trees should specify this - the
 		  auto detection is only for backward compatibility.
+ - mdio		: Child node for MDIO bus. Must be defined if PHY access is
+		  required through the core's MDIO interface (i.e. always,
+		  unless the PHY is accessed through a different bus).
 
 Example:
 	axi_ethernet_eth: ethernet@40c00000 {
-- 
cgit 


From a1765c1850be90f621fb69751ccaa3370cf60571 Mon Sep 17 00:00:00 2001
From: Robert Hancock <hancock@sedsystems.ca>
Date: Thu, 6 Jun 2019 16:28:22 -0600
Subject: net: axienet: document axistream-connected attribute

The axienet driver requires the use of an axistream-connected attribute,
but this isn't documented in the devicetree bindings. Document how this
attribute is supposed to be used, including the upcoming change to make
the usage of this attribute optional.

Signed-off-by: Robert Hancock <hancock@sedsystems.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/xilinx_axienet.txt        | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
index a8be67b425fd..7360617cdedb 100644
--- a/Documentation/devicetree/bindings/net/xilinx_axienet.txt
+++ b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
@@ -17,9 +17,15 @@ For more details about mdio please refer phy.txt file in the same directory.
 Required properties:
 - compatible	: Must be one of "xlnx,axi-ethernet-1.00.a",
 		  "xlnx,axi-ethernet-1.01.a", "xlnx,axi-ethernet-2.01.a"
-- reg		: Address and length of the IO space.
+- reg		: Address and length of the IO space, as well as the address
+                  and length of the AXI DMA controller IO space, unless
+                  axistream-connected is specified, in which case the reg
+                  attribute of the node referenced by it is used.
 - interrupts	: Should be a list of 2 or 3 interrupts: TX DMA, RX DMA,
-		  and optionally Ethernet core.
+		  and optionally Ethernet core. If axistream-connected is
+		  specified, the TX/RX DMA interrupts should be on that node
+		  instead, and only the Ethernet core interrupt is optionally
+		  specified here.
 - phy-handle	: Should point to the external phy device.
 		  See ethernet.txt file in the same directory.
 - xlnx,rxmem	: Set to allocated memory buffer for Rx/Tx in the hardware
@@ -37,6 +43,11 @@ Optional properties:
 		  auto-detected from the CPU clock (but only on platforms where
 		  this is possible). New device trees should specify this - the
 		  auto detection is only for backward compatibility.
+- axistream-connected: Reference to another node which contains the resources
+		       for the AXI DMA controller used by this device.
+		       If this is specified, the DMA-related resources from that
+		       device (DMA registers and DMA TX/RX interrupts) rather
+		       than this one will be used.
  - mdio		: Child node for MDIO bus. Must be defined if PHY access is
 		  required through the core's MDIO interface (i.e. always,
 		  unless the PHY is accessed through a different bus).
@@ -46,10 +57,10 @@ Example:
 		compatible = "xlnx,axi-ethernet-1.00.a";
 		device_type = "network";
 		interrupt-parent = <&microblaze_0_axi_intc>;
-		interrupts = <2 0>;
+		interrupts = <2 0 1>;
 		clocks = <&axi_clk>;
 		phy-mode = "mii";
-		reg = <0x40c00000 0x40000>;
+		reg = <0x40c00000 0x40000 0x50c00000 0x40000>;
 		xlnx,rxcsum = <0x2>;
 		xlnx,rxmem = <0x800>;
 		xlnx,txcsum = <0x2>;
-- 
cgit 


From 072a551fd5cff2329ad151c7c25b7958afc0f149 Mon Sep 17 00:00:00 2001
From: Jeffrey Hugo <jeffrey.l.hugo@gmail.com>
Date: Tue, 28 May 2019 09:47:40 -0700
Subject: dt-bindings: clock: Document gpucc for msm8998

The GPU for msm8998 has its own clock controller.  Document it.

Signed-off-by: Jeffrey Hugo <jeffrey.l.hugo@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 Documentation/devicetree/bindings/clock/qcom,gpucc.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/clock/qcom,gpucc.txt b/Documentation/devicetree/bindings/clock/qcom,gpucc.txt
index 4e5215ef1acd..269afe8a757e 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gpucc.txt
+++ b/Documentation/devicetree/bindings/clock/qcom,gpucc.txt
@@ -2,13 +2,15 @@ Qualcomm Graphics Clock & Reset Controller Binding
 --------------------------------------------------
 
 Required properties :
-- compatible : shall contain "qcom,sdm845-gpucc"
+- compatible : shall contain "qcom,sdm845-gpucc" or "qcom,msm8998-gpucc"
 - reg : shall contain base register location and length
 - #clock-cells : from common clock binding, shall contain 1
 - #reset-cells : from common reset binding, shall contain 1
 - #power-domain-cells : from generic power domain binding, shall contain 1
 - clocks : shall contain the XO clock
+	   shall contain the gpll0 out main clock (msm8998)
 - clock-names : shall be "xo"
+		shall be "gpll0" (msm8998)
 
 Example:
 	gpucc: clock-controller@5090000 {
-- 
cgit 


From df6bd6c002a4dee73d274a87ef1c0e36f21d2583 Mon Sep 17 00:00:00 2001
From: Ludovic Barre <ludovic.barre@st.com>
Date: Tue, 7 May 2019 11:16:38 +0200
Subject: mtd: spi-nor: stm32: remove the driver as it was replaced by
 spi-stm32-qspi.c

There's a new driver using the SPI memory interface of the
SPI framework at spi/spi-stm32-qspi.c, which can be used
together with m25p80.c to replace the functionality of
this SPI NOR driver.

The "new" driver uses the same dt properties and not affects
the legacy compatibility.

Signed-off-by: Ludovic Barre <ludovic.barre@st.com>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
---
 .../devicetree/bindings/mtd/stm32-quadspi.txt      | 43 ----------------------
 1 file changed, 43 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/mtd/stm32-quadspi.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mtd/stm32-quadspi.txt b/Documentation/devicetree/bindings/mtd/stm32-quadspi.txt
deleted file mode 100644
index ddd18c135148..000000000000
--- a/Documentation/devicetree/bindings/mtd/stm32-quadspi.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-* STMicroelectronics Quad Serial Peripheral Interface(QuadSPI)
-
-Required properties:
-- compatible: should be "st,stm32f469-qspi"
-- reg: the first contains the register location and length.
-       the second contains the memory mapping address and length
-- reg-names: should contain the reg names "qspi" "qspi_mm"
-- interrupts: should contain the interrupt for the device
-- clocks: the phandle of the clock needed by the QSPI controller
-- A pinctrl must be defined to set pins in mode of operation for QSPI transfer
-
-Optional properties:
-- resets: must contain the phandle to the reset controller.
-
-A spi flash must be a child of the nor_flash node and could have some
-properties. Also see jedec,spi-nor.txt.
-
-Required properties:
-- reg: chip-Select number (QSPI controller may connect 2 nor flashes)
-- spi-max-frequency: max frequency of spi bus
-
-Optional property:
-- spi-rx-bus-width: see ../spi/spi-bus.txt for the description
-
-Example:
-
-qspi: spi@a0001000 {
-	compatible = "st,stm32f469-qspi";
-	reg = <0xa0001000 0x1000>, <0x90000000 0x10000000>;
-	reg-names = "qspi", "qspi_mm";
-	interrupts = <91>;
-	resets = <&rcc STM32F4_AHB3_RESET(QSPI)>;
-	clocks = <&rcc 0 STM32F4_AHB3_CLOCK(QSPI)>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_qspi0>;
-
-	flash@0 {
-		reg = <0>;
-		spi-rx-bus-width = <4>;
-		spi-max-frequency = <108000000>;
-		...
-	};
-};
-- 
cgit 


From fb6dda8349ea0a6a68a743b6cb636261f4489983 Mon Sep 17 00:00:00 2001
From: Long Cheng <long.cheng@mediatek.com>
Date: Sat, 27 Apr 2019 11:36:32 +0800
Subject: dt-bindings: dma: uart: rename binding

The filename matches mtk-uart-apdma.c.
So using "mtk-uart-apdma.txt" should be better.
And add some property.

Signed-off-by: Long Cheng <long.cheng@mediatek.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../devicetree/bindings/dma/8250_mtk_dma.txt       | 33 -------------
 .../devicetree/bindings/dma/mtk-uart-apdma.txt     | 54 ++++++++++++++++++++++
 2 files changed, 54 insertions(+), 33 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/dma/8250_mtk_dma.txt
 create mode 100644 Documentation/devicetree/bindings/dma/mtk-uart-apdma.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/dma/8250_mtk_dma.txt b/Documentation/devicetree/bindings/dma/8250_mtk_dma.txt
deleted file mode 100644
index 3fe0961bcf64..000000000000
--- a/Documentation/devicetree/bindings/dma/8250_mtk_dma.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-* Mediatek UART APDMA Controller
-
-Required properties:
-- compatible should contain:
-  * "mediatek,mt2712-uart-dma" for MT2712 compatible APDMA
-  * "mediatek,mt6577-uart-dma" for MT6577 and all of the above
-
-- reg: The base address of the APDMA register bank.
-
-- interrupts: A single interrupt specifier.
-
-- clocks : Must contain an entry for each entry in clock-names.
-  See ../clocks/clock-bindings.txt for details.
-- clock-names: The APDMA clock for register accesses
-
-Examples:
-
-	apdma: dma-controller@11000380 {
-		compatible = "mediatek,mt2712-uart-dma";
-		reg = <0 0x11000380 0 0x400>;
-		interrupts = <GIC_SPI 63 IRQ_TYPE_LEVEL_LOW>,
-			     <GIC_SPI 64 IRQ_TYPE_LEVEL_LOW>,
-			     <GIC_SPI 65 IRQ_TYPE_LEVEL_LOW>,
-			     <GIC_SPI 66 IRQ_TYPE_LEVEL_LOW>,
-			     <GIC_SPI 67 IRQ_TYPE_LEVEL_LOW>,
-			     <GIC_SPI 68 IRQ_TYPE_LEVEL_LOW>,
-			     <GIC_SPI 69 IRQ_TYPE_LEVEL_LOW>,
-			     <GIC_SPI 70 IRQ_TYPE_LEVEL_LOW>;
-		clocks = <&pericfg CLK_PERI_AP_DMA>;
-		clock-names = "apdma";
-		#dma-cells = <1>;
-	};
-
diff --git a/Documentation/devicetree/bindings/dma/mtk-uart-apdma.txt b/Documentation/devicetree/bindings/dma/mtk-uart-apdma.txt
new file mode 100644
index 000000000000..5d6f98c43e3d
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/mtk-uart-apdma.txt
@@ -0,0 +1,54 @@
+* Mediatek UART APDMA Controller
+
+Required properties:
+- compatible should contain:
+  * "mediatek,mt2712-uart-dma" for MT2712 compatible APDMA
+  * "mediatek,mt6577-uart-dma" for MT6577 and all of the above
+
+- reg: The base address of the APDMA register bank.
+
+- interrupts: A single interrupt specifier.
+ One interrupt per dma-requests, or 8 if no dma-requests property is present
+
+- dma-requests: The number of DMA channels
+
+- clocks : Must contain an entry for each entry in clock-names.
+  See ../clocks/clock-bindings.txt for details.
+- clock-names: The APDMA clock for register accesses
+
+- mediatek,dma-33bits: Present if the DMA requires support
+
+Examples:
+
+	apdma: dma-controller@11000400 {
+		compatible = "mediatek,mt2712-uart-dma";
+		reg = <0 0x11000400 0 0x80>,
+		      <0 0x11000480 0 0x80>,
+		      <0 0x11000500 0 0x80>,
+		      <0 0x11000580 0 0x80>,
+		      <0 0x11000600 0 0x80>,
+		      <0 0x11000680 0 0x80>,
+		      <0 0x11000700 0 0x80>,
+		      <0 0x11000780 0 0x80>,
+		      <0 0x11000800 0 0x80>,
+		      <0 0x11000880 0 0x80>,
+		      <0 0x11000900 0 0x80>,
+		      <0 0x11000980 0 0x80>;
+		interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 104 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 105 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 106 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 107 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 108 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 109 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 110 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 111 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 112 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 113 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 114 IRQ_TYPE_LEVEL_LOW>;
+		dma-requests = <12>;
+		clocks = <&pericfg CLK_PERI_AP_DMA>;
+		clock-names = "apdma";
+		mediatek,dma-33bits;
+		#dma-cells = <1>;
+	};
-- 
cgit 


From 99d2b938672944831035bef50c68a6e948e93abf Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Fri, 7 Jun 2019 16:47:13 +0900
Subject: Documentation: DMA-API: fix a function name of max_mapping_size

The exported function name is dma_max_mapping_size(), not
dma_direct_max_mapping_size() so that this patch fixes
the function name in the documentation.

Fixes: 133d624b1cee ("dma: Introduce dma_max_mapping_size()")
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/DMA-API.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 0076150fdccb..e47c63bd4887 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -198,7 +198,7 @@ call to set the mask to the value returned.
 ::
 
 	size_t
-	dma_direct_max_mapping_size(struct device *dev);
+	dma_max_mapping_size(struct device *dev);
 
 Returns the maximum size of a mapping for the device. The size parameter
 of the mapping functions like dma_map_single(), dma_map_page() and
-- 
cgit 


From 4241d516b0041ae55092fb12739e12184427de5d Mon Sep 17 00:00:00 2001
From: Helen Koike <helen.koike@collabora.com>
Date: Tue, 4 Jun 2019 15:27:19 -0300
Subject: Documentation/dm-init: fix multi device example

The example in the docs regarding multiple device-mappers is invalid (it
has a wrong number of arguments), it's a left over from previous
versions of the patch.
Replace the example with an valid and tested one.

Signed-off-by: Helen Koike <helen.koike@collabora.com>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/device-mapper/dm-init.txt | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/device-mapper/dm-init.txt b/Documentation/device-mapper/dm-init.txt
index 8464ee7c01b8..130b3c3679c5 100644
--- a/Documentation/device-mapper/dm-init.txt
+++ b/Documentation/device-mapper/dm-init.txt
@@ -74,13 +74,13 @@ this target to /dev/mapper/lroot (depending on the rules). No uuid was assigned.
 An example of multiple device-mappers, with the dm-mod.create="..." contents is shown here
 split on multiple lines for readability:
 
-  vroot,,,ro,
-    0 1740800 verity 254:0 254:0 1740800 sha1
-      76e9be054b15884a9fa85973e9cb274c93afadb6
-      5b3549d54d6c7a3837b9b81ed72e49463a64c03680c47835bef94d768e5646fe;
-  vram,,,rw,
-    0 32768 linear 1:0 0,
-    32768 32768 linear 1:1 0
+  dm-linear,,1,rw,
+    0 32768 linear 8:1 0,
+    32768 1024000 linear 8:2 0;
+  dm-verity,,3,ro,
+    0 1638400 verity 1 /dev/sdc1 /dev/sdc2 4096 4096 204800 1 sha256
+    ac87db56303c9c1da433d7209b5a6ef3e4779df141200cbd7c157dcb8dd89c42
+    5ebfe87f7df3235b80a117ebc4078e44f55045487ad4a96581d1adb564615b51
 
 Other examples (per target):
 
-- 
cgit 


From e0cef9ff6315d48a4dfd39da09ca770e242f9cb5 Mon Sep 17 00:00:00 2001
From: Aurelien Thierry <aurelien.thierry@quoscient.io>
Date: Fri, 7 Jun 2019 10:07:02 +0200
Subject: Documentation: fix typo CLOCK_MONONOTNIC_COARSE

Fix typo in documentation file timekeeping.rst: CLOCK_MONONOTNIC_COARSE
should be CLOCK_MONOTONIC_COARSE.

Signed-off-by: Aurelien Thierry <aurelien.thierry@quoscient.io>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/timekeeping.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst
index 93cbeb9daec0..5f87d9c8b04d 100644
--- a/Documentation/core-api/timekeeping.rst
+++ b/Documentation/core-api/timekeeping.rst
@@ -111,7 +111,7 @@ Some additional variants exist for more specialized cases:
 		void ktime_get_coarse_raw_ts64( struct timespec64 * )
 
 	These are quicker than the non-coarse versions, but less accurate,
-	corresponding to CLOCK_MONONOTNIC_COARSE and CLOCK_REALTIME_COARSE
+	corresponding to CLOCK_MONOTONIC_COARSE and CLOCK_REALTIME_COARSE
 	in user space, along with the equivalent boottime/tai/raw
 	timebase not available in user space.
 
-- 
cgit 


From e47cf0c958775700c74223a1f21a8b3457c57069 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 7 Jun 2019 13:07:29 +0200
Subject: Documentation: tee: Grammar s/the its/its/

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/tee.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/tee.txt b/Documentation/tee.txt
index 56ea85ffebf2..afacdf2fd1de 100644
--- a/Documentation/tee.txt
+++ b/Documentation/tee.txt
@@ -32,7 +32,7 @@ User space (the client) connects to the driver by opening /dev/tee[0-9]* or
   memory.
 
 - TEE_IOC_VERSION lets user space know which TEE this driver handles and
-  the its capabilities.
+  its capabilities.
 
 - TEE_IOC_OPEN_SESSION opens a new session to a Trusted Application.
 
-- 
cgit 


From 6fb44c439eda692f94cf60aad55f130a34204ece Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 7 Jun 2019 13:08:42 +0200
Subject: Documentation: net: dsa: Grammar s/the its/its/

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/networking/dsa/dsa.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index ca87068b9ab9..563d56c6a25c 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -531,7 +531,7 @@ Bridge VLAN filtering
   a software implementation.
 
 .. note:: VLAN ID 0 corresponds to the port private database, which, in the context
-        of DSA, would be the its port-based VLAN, used by the associated bridge device.
+        of DSA, would be its port-based VLAN, used by the associated bridge device.
 
 - ``port_fdb_del``: bridge layer function invoked when the bridge wants to remove a
   Forwarding Database entry, the switch hardware should be programmed to delete
@@ -554,7 +554,7 @@ Bridge VLAN filtering
   associated with this VLAN ID.
 
 .. note:: VLAN ID 0 corresponds to the port private database, which, in the context
-        of DSA, would be the its port-based VLAN, used by the associated bridge device.
+        of DSA, would be its port-based VLAN, used by the associated bridge device.
 
 - ``port_mdb_del``: bridge layer function invoked when the bridge wants to remove a
   multicast database entry, the switch hardware should be programmed to delete
-- 
cgit 


From 3f9564e680efb2092dfb826e2f768920c9eb203b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 7 Jun 2019 13:29:51 +0200
Subject: KVM: arm/arm64: Always capitalize ITS

All but one reference is capitalized.  Fix the remaining one.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/virtual/kvm/devices/arm-vgic-its.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
index 4f0c9fc40365..eeaa95b893a8 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
@@ -103,7 +103,7 @@ Groups:
 The following ordering must be followed when restoring the GIC and the ITS:
 a) restore all guest memory and create vcpus
 b) restore all redistributors
-c) provide the its base address
+c) provide the ITS base address
    (KVM_DEV_ARM_VGIC_GRP_ADDR)
 d) restore the ITS in the following order:
    1. Restore GITS_CBASER
-- 
cgit 


From b1663d7e3a7961fc45262fd68a89253f2803036c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 4 Jun 2019 09:26:27 -0300
Subject: docs: Kbuild/Makefile: allow check for missing docs at build time

While this doesn't make sense for production Kernels, in order to
avoid regressions when documents are touched, let's add a
check target at the make file.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/Kconfig  | 13 +++++++++++++
 Documentation/Makefile |  5 +++++
 2 files changed, 18 insertions(+)
 create mode 100644 Documentation/Kconfig

(limited to 'Documentation')

diff --git a/Documentation/Kconfig b/Documentation/Kconfig
new file mode 100644
index 000000000000..66046fa1c341
--- /dev/null
+++ b/Documentation/Kconfig
@@ -0,0 +1,13 @@
+config WARN_MISSING_DOCUMENTS
+
+	bool "Warn if there's a missing documentation file"
+	depends on COMPILE_TEST
+	help
+	   It is not uncommon that a document gets renamed.
+	   This option makes the Kernel to check for missing dependencies,
+	   warning when something is missing. Works only if the Kernel
+	   is built from a git tree.
+
+	   If unsure, select 'N'.
+
+
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 2df0789f90b7..e145e4db508b 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -4,6 +4,11 @@
 
 subdir-y := devicetree/bindings/
 
+# Check for broken documentation file references
+ifeq ($(CONFIG_WARN_MISSING_DOCUMENTS),y)
+$(shell $(srctree)/scripts/documentation-file-ref-check --warn)
+endif
+
 # You can set these variables from the command line.
 SPHINXBUILD   = sphinx-build
 SPHINXOPTS    =
-- 
cgit 


From 889aa9ca930602a0e860cfb89e467c2a7a729b1b Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Fri, 31 May 2019 16:30:16 +0200
Subject: docs: clk: fix struct syntax

The clk_foo_ops struct example has syntax errors. Fix it so it can be
copy-pasted and used more easily.

Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/clk.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/clk.rst b/Documentation/driver-api/clk.rst
index 593cca5058b1..3cad45d14187 100644
--- a/Documentation/driver-api/clk.rst
+++ b/Documentation/driver-api/clk.rst
@@ -175,9 +175,9 @@ the following::
 To take advantage of your data you'll need to support valid operations
 for your clk::
 
-	struct clk_ops clk_foo_ops {
-		.enable		= &clk_foo_enable;
-		.disable	= &clk_foo_disable;
+	struct clk_ops clk_foo_ops = {
+		.enable		= &clk_foo_enable,
+		.disable	= &clk_foo_disable,
 	};
 
 Implement the above functions using container_of::
-- 
cgit 


From 165915c17d681c61962251728d72ecdabe95518e Mon Sep 17 00:00:00 2001
From: Federico Vaga <federico.vaga@vaga.pv.it>
Date: Thu, 30 May 2019 22:14:54 +0200
Subject: doc:it_IT: fix file references

Fix italian translation file references based on
`scripts/documentation-file-ref-check` output.

Signed-off-by: Federico Vaga <federico.vaga@vaga.pv.it>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/it_IT/admin-guide/kernel-parameters.rst     | 12 ++++++++++++
 Documentation/translations/it_IT/process/adding-syscalls.rst |  2 +-
 Documentation/translations/it_IT/process/coding-style.rst    |  2 +-
 Documentation/translations/it_IT/process/howto.rst           |  2 +-
 Documentation/translations/it_IT/process/magic-number.rst    |  2 +-
 .../translations/it_IT/process/stable-kernel-rules.rst       |  4 ++--
 6 files changed, 18 insertions(+), 6 deletions(-)
 create mode 100644 Documentation/translations/it_IT/admin-guide/kernel-parameters.rst

(limited to 'Documentation')

diff --git a/Documentation/translations/it_IT/admin-guide/kernel-parameters.rst b/Documentation/translations/it_IT/admin-guide/kernel-parameters.rst
new file mode 100644
index 000000000000..0e36d82a92be
--- /dev/null
+++ b/Documentation/translations/it_IT/admin-guide/kernel-parameters.rst
@@ -0,0 +1,12 @@
+.. include:: ../disclaimer-ita.rst
+
+:Original: :ref:`Documentation/admin-guide/kernel-parameters.rst <kernelparameters>`
+
+.. _it_kernelparameters:
+
+I parametri da linea di comando del kernel
+==========================================
+
+.. warning::
+
+    TODO ancora da tradurre
diff --git a/Documentation/translations/it_IT/process/adding-syscalls.rst b/Documentation/translations/it_IT/process/adding-syscalls.rst
index e0a64b0688a7..c3a3439595a6 100644
--- a/Documentation/translations/it_IT/process/adding-syscalls.rst
+++ b/Documentation/translations/it_IT/process/adding-syscalls.rst
@@ -39,7 +39,7 @@ vostra interfaccia.
        un qualche modo opaca.
 
  - Se dovete esporre solo delle informazioni sul sistema, un nuovo nodo in
-   sysfs (vedere ``Documentation/translations/it_IT/filesystems/sysfs.txt``) o
+   sysfs (vedere ``Documentation/filesystems/sysfs.txt``) o
    in procfs potrebbe essere sufficiente.  Tuttavia, l'accesso a questi
    meccanismi richiede che il filesystem sia montato, il che potrebbe non
    essere sempre vero (per esempio, in ambienti come namespace/sandbox/chroot).
diff --git a/Documentation/translations/it_IT/process/coding-style.rst b/Documentation/translations/it_IT/process/coding-style.rst
index 5ef534c95e69..a6559d25a23d 100644
--- a/Documentation/translations/it_IT/process/coding-style.rst
+++ b/Documentation/translations/it_IT/process/coding-style.rst
@@ -696,7 +696,7 @@ nella stringa di titolo::
 	...
 
 Per la documentazione completa sui file di configurazione, consultate
-il documento Documentation/translations/it_IT/kbuild/kconfig-language.txt
+il documento Documentation/kbuild/kconfig-language.txt
 
 
 11) Strutture dati
diff --git a/Documentation/translations/it_IT/process/howto.rst b/Documentation/translations/it_IT/process/howto.rst
index 9903ac7c566b..44e6077730e8 100644
--- a/Documentation/translations/it_IT/process/howto.rst
+++ b/Documentation/translations/it_IT/process/howto.rst
@@ -131,7 +131,7 @@ Di seguito una lista di file che sono presenti nei sorgente del kernel e che
 	"Linux kernel patch submission format"
 		http://linux.yyz.us/patch-format.html
 
-  :ref:`Documentation/process/translations/it_IT/stable-api-nonsense.rst <it_stable_api_nonsense>`
+  :ref:`Documentation/translations/it_IT/process/stable-api-nonsense.rst <it_stable_api_nonsense>`
 
     Questo file descrive la motivazioni sottostanti la conscia decisione di
     non avere un API stabile all'interno del kernel, incluso cose come:
diff --git a/Documentation/translations/it_IT/process/magic-number.rst b/Documentation/translations/it_IT/process/magic-number.rst
index 5281d53e57ee..ed1121d0ba84 100644
--- a/Documentation/translations/it_IT/process/magic-number.rst
+++ b/Documentation/translations/it_IT/process/magic-number.rst
@@ -1,6 +1,6 @@
 .. include:: ../disclaimer-ita.rst
 
-:Original: :ref:`Documentation/process/magic-numbers.rst <magicnumbers>`
+:Original: :ref:`Documentation/process/magic-number.rst <magicnumbers>`
 :Translator: Federico Vaga <federico.vaga@vaga.pv.it>
 
 .. _it_magicnumbers:
diff --git a/Documentation/translations/it_IT/process/stable-kernel-rules.rst b/Documentation/translations/it_IT/process/stable-kernel-rules.rst
index 48e88e5ad2c5..4f206cee31a7 100644
--- a/Documentation/translations/it_IT/process/stable-kernel-rules.rst
+++ b/Documentation/translations/it_IT/process/stable-kernel-rules.rst
@@ -33,7 +33,7 @@ Regole sul tipo di patch che vengono o non vengono accettate nei sorgenti
  - Non deve includere alcuna correzione "banale" (correzioni grammaticali,
    pulizia dagli spazi bianchi, eccetera).
  - Deve rispettare le regole scritte in
-   :ref:`Documentation/translation/it_IT/process/submitting-patches.rst <it_submittingpatches>`
+   :ref:`Documentation/translations/it_IT/process/submitting-patches.rst <it_submittingpatches>`
  - Questa patch o una equivalente deve esistere già nei sorgenti principali di
    Linux
 
@@ -43,7 +43,7 @@ Procedura per sottomettere patch per i sorgenti -stable
 
  - Se la patch contiene modifiche a dei file nelle cartelle net/ o drivers/net,
    allora seguite le linee guida descritte in
-   :ref:`Documentation/translation/it_IT/networking/netdev-FAQ.rst <it_netdev-FAQ>`;
+   :ref:`Documentation/translations/it_IT/networking/netdev-FAQ.rst <it_netdev-FAQ>`;
    ma solo dopo aver verificato al seguente indirizzo che la patch non sia
    già in coda:
    https://patchwork.ozlabs.org/bundle/davem/stable/?series=&submitter=&state=*&q=&archive=
-- 
cgit 


From bed0918d64ca28169d55bd138ed20f09e288303e Mon Sep 17 00:00:00 2001
From: Federico Vaga <federico.vaga@vaga.pv.it>
Date: Thu, 30 May 2019 22:14:55 +0200
Subject: doc:it_IT: documentation alignment

Documentation alignment for the following changes:
a700767a7682 (doc/docs-next) docs: requirements.txt: recommend Sphinx 1.7.9

Signed-off-by: Federico Vaga <federico.vaga@vaga.pv.it>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/it_IT/doc-guide/sphinx.rst | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/translations/it_IT/doc-guide/sphinx.rst b/Documentation/translations/it_IT/doc-guide/sphinx.rst
index 793b5cc33403..1739cba8863e 100644
--- a/Documentation/translations/it_IT/doc-guide/sphinx.rst
+++ b/Documentation/translations/it_IT/doc-guide/sphinx.rst
@@ -35,8 +35,7 @@ Installazione Sphinx
 ====================
 
 I marcatori ReST utilizzati nei file in Documentation/ sono pensati per essere
-processati da ``Sphinx`` nella versione 1.3 o superiore. Se desiderate produrre
-un documento PDF è raccomandato l'utilizzo di una versione superiore alle 1.4.6.
+processati da ``Sphinx`` nella versione 1.3 o superiore.
 
 Esiste uno script che verifica i requisiti Sphinx. Per ulteriori dettagli
 consultate :ref:`it_sphinx-pre-install`.
@@ -68,13 +67,13 @@ pacchettizzato dalla vostra distribuzione.
       utilizzando LaTeX. Per una corretta interpretazione, è necessario aver
       installato texlive con i pacchetti amdfonts e amsmath.
 
-Riassumendo, se volete installare la versione 1.4.9 di Sphinx dovete eseguire::
+Riassumendo, se volete installare la versione 1.7.9 di Sphinx dovete eseguire::
 
-       $ virtualenv sphinx_1.4
-       $ . sphinx_1.4/bin/activate
-       (sphinx_1.4) $ pip install -r Documentation/sphinx/requirements.txt
+       $ virtualenv sphinx_1.7.9
+       $ . sphinx_1.7.9/bin/activate
+       (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt
 
-Dopo aver eseguito ``. sphinx_1.4/bin/activate``, il prompt cambierà per
+Dopo aver eseguito ``. sphinx_1.7.9/bin/activate``, il prompt cambierà per
 indicare che state usando il nuovo ambiente. Se aprite un nuova sessione,
 prima di generare la documentazione, dovrete rieseguire questo comando per
 rientrare nell'ambiente virtuale.
@@ -120,8 +119,8 @@ l'installazione::
 	You should run:
 
 		sudo dnf install -y texlive-luatex85
-		/usr/bin/virtualenv sphinx_1.4
-		. sphinx_1.4/bin/activate
+		/usr/bin/virtualenv sphinx_1.7.9
+		. sphinx_1.7.9/bin/activate
 		pip install -r Documentation/sphinx/requirements.txt
 
 	Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468.
-- 
cgit 


From 3d9cf48b2ca257f1a249b347236098c3cf9d54f1 Mon Sep 17 00:00:00 2001
From: Shiyang Ruan <ruansy.fnst@cn.fujitsu.com>
Date: Thu, 9 May 2019 15:40:49 +0800
Subject: Documentation: nvdimm: Fix typo

Remove the extra 'we '.

Signed-off-by: Shiyang Ruan <ruansy.fnst@cn.fujitsu.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/nvdimm/nvdimm.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/nvdimm/nvdimm.txt b/Documentation/nvdimm/nvdimm.txt
index e894de69915a..1669f626b037 100644
--- a/Documentation/nvdimm/nvdimm.txt
+++ b/Documentation/nvdimm/nvdimm.txt
@@ -284,8 +284,8 @@ A bus has a 1:1 relationship with an NFIT.  The current expectation for
 ACPI based systems is that there is only ever one platform-global NFIT.
 That said, it is trivial to register multiple NFITs, the specification
 does not preclude it.  The infrastructure supports multiple busses and
-we we use this capability to test multiple NFIT configurations in the
-unit test.
+we use this capability to test multiple NFIT configurations in the unit
+test.
 
 LIBNVDIMM: control class device in /sys/class
 
-- 
cgit 


From 9d61944356590c40b13f6b1f99df84260e4db0c1 Mon Sep 17 00:00:00 2001
From: Shiyang Ruan <ruansy.fnst@cn.fujitsu.com>
Date: Thu, 9 May 2019 11:05:49 +0800
Subject: Documentation: xfs: Fix typo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In "Y+P" of this line, there are two non-ASCII characters(0xd9 0x8d)
following behind the 'Y'.  Shown as a small '=' under the '+' in VIM
and a '賺' in webpage[1].

I think it's a mistake and remove these strange characters.

[1]: https://www.kernel.org/doc/Documentation/filesystems/xfs-delayed-logging-design.txt

Signed-off-by: Shiyang Ruan <ruansy.fnst@cn.fujitsu.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/xfs-delayed-logging-design.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/xfs-delayed-logging-design.txt b/Documentation/filesystems/xfs-delayed-logging-design.txt
index 2ce36439c09f..9a6dd289b17b 100644
--- a/Documentation/filesystems/xfs-delayed-logging-design.txt
+++ b/Documentation/filesystems/xfs-delayed-logging-design.txt
@@ -34,7 +34,7 @@ transaction:
 	   D			A+B+C+D		X+n+m+o
 	    <object written to disk>
 	   E			   E		   Y (> X+n+m+o)
-	   F			  E+F		  Yٍ+p
+	   F			  E+F		  Y+p
 
 In other words, each time an object is relogged, the new transaction contains
 the aggregation of all the previous changes currently held only in the log.
-- 
cgit 


From 462e5a521ab73f7762583add73cbab1662612beb Mon Sep 17 00:00:00 2001
From: "George G. Davis" <george_davis@mentor.com>
Date: Wed, 5 Jun 2019 16:30:10 -0400
Subject: treewide: trivial: fix s/poped/popped/ typo

Fix a couple of s/poped/popped/ typos.

Signed-off-by: George G. Davis <george_davis@mentor.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/arm/mem_alignment | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/arm/mem_alignment b/Documentation/arm/mem_alignment
index 6335fcacbba9..e110e2781039 100644
--- a/Documentation/arm/mem_alignment
+++ b/Documentation/arm/mem_alignment
@@ -1,4 +1,4 @@
-Too many problems poped up because of unnoticed misaligned memory access in
+Too many problems popped up because of unnoticed misaligned memory access in
 kernel code lately.  Therefore the alignment fixup is now unconditionally
 configured in for SA11x0 based targets.  According to Alan Cox, this is a
 bad idea to configure it out, but Russell King has some good reasons for
-- 
cgit 


From 78a89463a31ce463a4b968553f57ff9932a0697f Mon Sep 17 00:00:00 2001
From: Lecopzer Chen <lecopzer.chen@mediatek.com>
Date: Thu, 9 May 2019 18:31:16 +0800
Subject: Documentation: {u,k}probes: add tracing_on before tracing

After following the document step by step, the `cat trace` can't be
worked without enabling tracing_on and might mislead newbies about
the functionality.

Signed-off-by: Lecopzer Chen <lecopzer.chen@mediatek.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/trace/kprobetrace.rst  | 6 ++++++
 Documentation/trace/uprobetracer.rst | 7 ++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index 235ce2ab131a..baa3c42ba2f4 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -189,6 +189,12 @@ events, you need to enable it.
   echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable
   echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable
 
+Use the following command to start tracing in an interval.
+::
+    # echo 1 > tracing_on
+    Open something...
+    # echo 0 > tracing_on
+
 And you can see the traced information via /sys/kernel/debug/tracing/trace.
 ::
 
diff --git a/Documentation/trace/uprobetracer.rst b/Documentation/trace/uprobetracer.rst
index 4346e23e3ae7..0b21305fabdc 100644
--- a/Documentation/trace/uprobetracer.rst
+++ b/Documentation/trace/uprobetracer.rst
@@ -152,10 +152,15 @@ events, you need to enable it by::
 
     # echo 1 > events/uprobes/enable
 
-Lets disable the event after sleeping for some time.
+Lets start tracing, sleep for some time and stop tracing.
 ::
 
+    # echo 1 > tracing_on
     # sleep 20
+    # echo 0 > tracing_on
+
+Also, you can disable the event by::
+
     # echo 0 > events/uprobes/enable
 
 And you can see the traced information via /sys/kernel/debug/tracing/trace.
-- 
cgit 


From 04f4dc1bc5f0f64640026497f97e2f3c063f7831 Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Wed, 5 Jun 2019 07:56:33 -0500
Subject: dt-bindings: leds: Add LED bindings for the LM36274

Add the LM36274 LED specific bindings.

Signed-off-by: Dan Murphy <dmurphy@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 .../devicetree/bindings/leds/leds-lm36274.txt      | 85 ++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/leds/leds-lm36274.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/leds/leds-lm36274.txt b/Documentation/devicetree/bindings/leds/leds-lm36274.txt
new file mode 100644
index 000000000000..456a589c65f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-lm36274.txt
@@ -0,0 +1,85 @@
+* Texas Instruments LM36274 4-Channel LCD Backlight Driver w/Integrated Bias
+
+The LM36274 is an integrated four-channel WLED driver and LCD bias supply.
+The backlight boost provides the power to bias four parallel LED strings with
+up to 29V total output voltage. The 11-bit LED current is programmable via
+the I2C bus and/or controlled via a logic level PWM input from 60 uA to 30 mA.
+
+Parent device properties are documented in
+Documentation/devicetree/bindings/mfd/ti_lmu.txt
+
+Regulator properties are documented in
+Documentation/devicetree/bindings/regulator/lm363x-regulator.txt
+
+Required backlight properties:
+	- compatible:
+		"ti,lm36274-backlight"
+	- reg : 0
+	- #address-cells : 1
+	- #size-cells : 0
+	- led-sources : Indicates which LED strings will be enabled.
+			Values from 0-3, sources is 0 based so strings will be
+			source value + 1.
+
+Optional backlight properties:
+	- label : see Documentation/devicetree/bindings/leds/common.txt
+	- linux,default-trigger :
+	   see Documentation/devicetree/bindings/leds/common.txt
+
+Example:
+
+HVLED string 1 and 3 are controlled by control bank A and HVLED 2 string is
+controlled by control bank B.
+
+lm36274@11 {
+	compatible = "ti,lm36274";
+	#address-cells = <1>;
+	#size-cells = <0>;
+	reg = <0x11>;
+
+	enable-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+
+	regulators {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "ti,lm363x-regulator";
+
+		enable-gpios = <&pioC 0 GPIO_ACTIVE_HIGH>,
+			       <&pioC 1 GPIO_ACTIVE_HIGH>;
+
+		vboost {
+			regulator-name = "lcd_boost";
+			regulator-min-microvolt = <4000000>;
+			regulator-max-microvolt = <7150000>;
+			regulator-always-on;
+		};
+
+		vpos {
+			regulator-name = "lcd_vpos";
+			regulator-min-microvolt = <4000000>;
+			regulator-max-microvolt = <6500000>;
+		};
+
+		vneg {
+			regulator-name = "lcd_vneg";
+			regulator-min-microvolt = <4000000>;
+			regulator-max-microvolt = <6500000>;
+		};
+	};
+
+	backlight {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "ti,lm36274-backlight";
+
+		led@0 {
+			reg = <0>;
+			led-sources = <0 2>;
+			label = "white:backlight_cluster";
+			linux,default-trigger = "backlight";
+		};
+	};
+};
+
+For more product information please see the link below:
+http://www.ti.com/lit/ds/symlink/lm36274.pdf
-- 
cgit 


From 6c0215f5d9f2a1fa5cab2ca320a41d9f19cfa80c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:33 -0300
Subject: ASoC: dt-bindings: fix some broken links from txt->yaml conversion

Some new files got converted to yaml, but references weren't
updated accordingly.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/media/st,stm32-dcmi.txt | 2 +-
 Documentation/devicetree/bindings/sound/st,stm32-i2s.txt  | 2 +-
 Documentation/devicetree/bindings/sound/st,stm32-sai.txt  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/media/st,stm32-dcmi.txt b/Documentation/devicetree/bindings/media/st,stm32-dcmi.txt
index 249790a93017..3122ded82eb4 100644
--- a/Documentation/devicetree/bindings/media/st,stm32-dcmi.txt
+++ b/Documentation/devicetree/bindings/media/st,stm32-dcmi.txt
@@ -11,7 +11,7 @@ Required properties:
 - clock-names: must contain "mclk", which is the DCMI peripherial clock
 - pinctrl: the pincontrol settings to configure muxing properly
            for pins that connect to DCMI device.
-           See Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt.
+           See Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml.
 - dmas: phandle to DMA controller node,
         see Documentation/devicetree/bindings/dma/stm32-dma.txt
 - dma-names: must contain "tx", which is the transmit channel from DCMI to DMA
diff --git a/Documentation/devicetree/bindings/sound/st,stm32-i2s.txt b/Documentation/devicetree/bindings/sound/st,stm32-i2s.txt
index 58c341300552..cbf24bcd1b8d 100644
--- a/Documentation/devicetree/bindings/sound/st,stm32-i2s.txt
+++ b/Documentation/devicetree/bindings/sound/st,stm32-i2s.txt
@@ -18,7 +18,7 @@ Required properties:
     See Documentation/devicetree/bindings/dma/stm32-dma.txt.
   - dma-names: Identifier for each DMA request line. Must be "tx" and "rx".
   - pinctrl-names: should contain only value "default"
-  - pinctrl-0: see Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt
+  - pinctrl-0: see Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
 
 Optional properties:
   - resets: Reference to a reset controller asserting the reset controller
diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.txt b/Documentation/devicetree/bindings/sound/st,stm32-sai.txt
index 3f4467ff0aa2..944743dd9212 100644
--- a/Documentation/devicetree/bindings/sound/st,stm32-sai.txt
+++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.txt
@@ -41,7 +41,7 @@ SAI subnodes required properties:
 	"tx": if sai sub-block is configured as playback DAI
 	"rx": if sai sub-block is configured as capture DAI
   - pinctrl-names: should contain only value "default"
-  - pinctrl-0: see Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt
+  - pinctrl-0: see Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
 
 SAI subnodes Optional properties:
   - st,sync: specify synchronization mode.
-- 
cgit 


From 0aa3ebffc43cb8974f2fca92d07b9ebeba0f67c1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 29 May 2019 20:23:47 -0300
Subject: docs: gpio: driver.rst: fix a bad tag

With ReST, [foo]_ means a reference to foo, causing this warning:

    Documentation/driver-api/gpio/driver.rst:419: WARNING: Unknown target name: "devm".

Fix it by using a literal for the name.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/driver-api/gpio/driver.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index 58036c2d84d2..4af9aae724f0 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -418,7 +418,7 @@ symbol:
 
 If there is a need to exclude certain GPIO lines from the IRQ domain handled by
 these helpers, we can set .irq.need_valid_mask of the gpiochip before
-[devm_]gpiochip_add_data() is called. This allocates an .irq.valid_mask with as
+``[devm_]gpiochip_add_data()`` is called. This allocates an .irq.valid_mask with as
 many bits set as there are GPIO lines in the chip, each bit representing line
 0..n-1. Drivers can exclude GPIO lines by clearing bits from this mask. The mask
 must be filled in before gpiochip_irqchip_add() or gpiochip_irqchip_add_nested()
-- 
cgit 


From 0df82dcd55832a99363ab7f9fab954fcacdac3ae Mon Sep 17 00:00:00 2001
From: Sean Nyekjaer <sean@geanix.com>
Date: Tue, 7 May 2019 11:34:37 +0200
Subject: dt-bindings: can: mcp251x: add mcp25625 support

Fully compatible with mcp2515, the mcp25625 have integrated transceiver.

This patch add the mcp25625 to the device tree bindings documentation.

Signed-off-by: Sean Nyekjaer <sean@geanix.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt b/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt
index 188c8bd4eb67..5a0111d4de58 100644
--- a/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt
+++ b/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt
@@ -4,6 +4,7 @@ Required properties:
  - compatible: Should be one of the following:
    - "microchip,mcp2510" for MCP2510.
    - "microchip,mcp2515" for MCP2515.
+   - "microchip,mcp25625" for MCP25625.
  - reg: SPI chip select.
  - clocks: The clock feeding the CAN controller.
  - interrupts: Should contain IRQ line for the CAN controller.
-- 
cgit 


From fc8938d445d5ca861c5a76098512ab6dd67f6d80 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Thu, 6 Jun 2019 15:26:17 +0530
Subject: dt-bindings: gpio: davinci: Add k3 am654 compatible

The patch adds k3 am654 compatible, specific properties and
an example.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../devicetree/bindings/gpio/gpio-davinci.txt          | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/gpio/gpio-davinci.txt b/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
index 553b92a7e87b..bc6b4b62df83 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
@@ -5,6 +5,7 @@ Required Properties:
 			"ti,keystone-gpio": for Keystone 2 66AK2H/K, 66AK2L,
 						66AK2E SoCs
 			"ti,k2g-gpio", "ti,keystone-gpio": for 66AK2G
+			"ti,am654-gpio", "ti,keystone-gpio": for TI K3 AM654
 
 - reg: Physical base address of the controller and the size of memory mapped
        registers.
@@ -145,3 +146,20 @@ gpio0: gpio@260bf00 {
 	ti,ngpio = <32>;
 	ti,davinci-gpio-unbanked = <32>;
 };
+
+Example for K3 AM654:
+
+wkup_gpio0: wkup_gpio0@42110000 {
+	compatible = "ti,am654-gpio", "ti,keystone-gpio";
+	reg = <0x42110000 0x100>;
+	gpio-controller;
+	#gpio-cells = <2>;
+	interrupt-parent = <&intr_wkup_gpio>;
+	interrupts = <59 128>, <59 129>, <59 130>, <59 131>;
+	interrupt-controller;
+	#interrupt-cells = <2>;
+	ti,ngpio = <56>;
+	ti,davinci-gpio-unbanked = <0>;
+	clocks = <&k3_clks 59 0>;
+	clock-names = "gpio";
+};
-- 
cgit 


From 3a11cf2217520e3c827a24c6d5f648fa45845787 Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Thu, 30 May 2019 11:13:57 +0800
Subject: dt-bindings: imx: Correct pinfunc head file path for i.MX8MM

The i.MX8MM pinfunc head file is located in DT folder, correct it.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/fsl,imx8mm-pinctrl.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mm-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mm-pinctrl.txt
index 524a16fca666..e4e01c05cf83 100644
--- a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mm-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mm-pinctrl.txt
@@ -12,7 +12,7 @@ Required properties in sub-nodes:
 - fsl,pins: each entry consists of 6 integers and represents the mux and config
   setting for one pin.  The first 5 integers <mux_reg conf_reg input_reg mux_val
   input_val> are specified using a PIN_FUNC_ID macro, which can be found in
-  <dt-bindings/pinctrl/imx8mm-pinfunc.h>. The last integer CONFIG is
+  <arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h>. The last integer CONFIG is
   the pad setting value like pull-up on this pin.  Please refer to i.MX8M Mini
   Reference Manual for detailed CONFIG settings.
 
-- 
cgit 


From 7ea6a2edbfd0a98f729f6f4705535bc53ec1a89f Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Mon, 3 Jun 2019 13:04:20 +0530
Subject: dt-bindings: pinctrl: Document drive strength settings for BM1880 SoC

Document drive strength settings for Bitmain BM1880 SoC.

Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt    | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt
index 4eb089bcb5f3..4980776122cc 100644
--- a/Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt
@@ -100,6 +100,17 @@ Optional Properties:
                   Valid values are:
                   <0>  - Slow
                   <1>  - Fast
+- drive-strength: Integer. Selects the drive strength for the specified
+                  pins in mA.
+                  Valid values are:
+                  <4>
+                  <8>
+                  <12>
+                  <16>
+                  <20>
+                  <24>
+                  <28>
+                  <32>
 
 Example:
         pinctrl: pinctrl@400 {
-- 
cgit 


From 53a5372ce326116f3e3d3f1d701113b2542509f4 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 4 Jun 2019 00:19:59 -0700
Subject: pinctrl: qcom: sdm845: Expose ufs_reset as gpio

The ufs_reset pin is expected to be wired to the reset pin of the
primary UFS memory but is pretty much just a general purpose output pinr

Reorder the pins and expose it as gpio 150, so that the UFS driver can
toggle it.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/qcom,sdm845-pinctrl.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sdm845-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,sdm845-pinctrl.txt
index 321bdb9be0d2..7462e3743c68 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,sdm845-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,sdm845-pinctrl.txt
@@ -79,7 +79,7 @@ to specify in a pin configuration subnode:
 		      gpio0-gpio149
 		        Supports mux, bias and drive-strength
 
-		      sdc2_clk, sdc2_cmd, sdc2_data
+		      sdc2_clk, sdc2_cmd, sdc2_data, ufs_reset
 		        Supports bias and drive-strength
 
 - function:
-- 
cgit 


From 76c4c597b2ef59900933fca8893428f0555afaab Mon Sep 17 00:00:00 2001
From: Hongwei Zhang <hongweiz@ami.com>
Date: Tue, 4 Jun 2019 17:53:32 -0400
Subject: pinctrl: aspeed: Add SGPM pinmux

Add SGPM pinmux to ast2500-pinctrl function and group, to prepare for
supporting SGPIO in AST2500 SoC.

Signed-off-by: Hongwei Zhang <hongweiz@ami.com>
Reviewed-by: Andrew Jeffery <andrew@aj.id.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt
index 3b7266c7c438..8f1c5c4d62f9 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt
@@ -84,7 +84,7 @@ NDCD2 NDCD3 NDCD4 NDSR1 NDSR2 NDSR3 NDSR4 NDTR1 NDTR2 NDTR3 NDTR4 NRI1 NRI2
 NRI3 NRI4 NRTS1 NRTS2 NRTS3 NRTS4 OSCCLK PEWAKE PNOR PWM0 PWM1 PWM2 PWM3 PWM4
 PWM5 PWM6 PWM7 RGMII1 RGMII2 RMII1 RMII2 RXD1 RXD2 RXD3 RXD4 SALT1 SALT10
 SALT11 SALT12 SALT13 SALT14 SALT2 SALT3 SALT4 SALT5 SALT6 SALT7 SALT8 SALT9
-SCL1 SCL2 SD1 SD2 SDA1 SDA2 SGPS1 SGPS2 SIOONCTRL SIOPBI SIOPBO SIOPWREQ
+SCL1 SCL2 SD1 SD2 SDA1 SDA2 SGPM SGPS1 SGPS2 SIOONCTRL SIOPBI SIOPBO SIOPWREQ
 SIOPWRGD SIOS3 SIOS5 SIOSCI SPI1 SPI1CS1 SPI1DEBUG SPI1PASSTHRU SPI2CK SPI2CS0
 SPI2CS1 SPI2MISO SPI2MOSI TIMER3 TIMER4 TIMER5 TIMER6 TIMER7 TIMER8 TXD1 TXD2
 TXD3 TXD4 UART6 USB11BHID USB2AD USB2AH USB2BD USB2BH USBCKI VGABIOSROM VGAHS
-- 
cgit 


From 26207c7e787248bba8b8cee8bb6db36ecc7c0a13 Mon Sep 17 00:00:00 2001
From: Fabien Lahoudere <fabien.lahoudere@collabora.com>
Date: Thu, 23 May 2019 11:07:37 +0200
Subject: docs: iio: add precision about sampling_frequency_available

The documentation give some exemple on what format can be expected
from sampling_frequency_available sysfs attribute

Signed-off-by: Fabien Lahoudere <fabien.lahoudere@collabora.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/ABI/testing/sysfs-bus-iio | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 6aef7dbbde44..680451695422 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -61,8 +61,11 @@ What:		/sys/bus/iio/devices/triggerX/sampling_frequency_available
 KernelVersion:	2.6.35
 Contact:	linux-iio@vger.kernel.org
 Description:
-		When the internal sampling clock can only take a small
-		discrete set of values, this file lists those available.
+		When the internal sampling clock can only take a specific set of
+		frequencies, we can specify the available values with:
+		- a small discrete set of values like "0 2 4 6 8"
+		- a range with minimum, step and maximum frequencies like
+		  "[min step max]"
 
 What:		/sys/bus/iio/devices/iio:deviceX/oversampling_ratio
 KernelVersion:	2.6.38
-- 
cgit 


From d6e561df50b5a7e33273f66f97bf2b4ff25f13b4 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 7 Jun 2019 13:06:12 +0200
Subject: dt-bindings: pinctrl: pic32: Spelling s/configuraion/configuration/

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/microchip,pic32-pinctrl.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/microchip,pic32-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/microchip,pic32-pinctrl.txt
index 29b72e303ebf..51efd2085113 100644
--- a/Documentation/devicetree/bindings/pinctrl/microchip,pic32-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/microchip,pic32-pinctrl.txt
@@ -5,7 +5,7 @@ Please refer to pinctrl-bindings.txt, ../gpio/gpio.txt, and
 pin controller, GPIO, and interrupt bindings.
 
 PIC32 'pin configuration node' is a node of a group of pins which can be
-used for a specific device or function. This node represents configuraions of
+used for a specific device or function. This node represents configurations of
 pins, optional function, and optional mux related configuration.
 
 Required properties for pin controller node:
-- 
cgit 


From 671c30957e78a822917cf0b04c4592e9813f7f9b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:17 -0300
Subject: ABI: sysfs-devices-system-cpu: point to the right docs

The cpuidle doc was split on two, one at the admin guide
and another one at the driver API guide. Instead of pointing
to a non-existent file, point to both (admin guide being
the first one).

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/ABI/testing/sysfs-devices-system-cpu | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 1528239f69b2..87478ac6c2af 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -137,7 +137,8 @@ Description:	Discover cpuidle policy and mechanism
 		current_governor: (RW) displays current idle policy. Users can
 		switch the governor at runtime by writing to this file.
 
-		See files in Documentation/cpuidle/ for more information.
+		See Documentation/admin-guide/pm/cpuidle.rst and
+		Documentation/driver-api/pm/cpuidle.rst for more information.
 
 
 What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/name
-- 
cgit 


From 065efe27872ca942b53b9f11d5b3f534a9c33857 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:19 -0300
Subject: docs: zh_CN: get rid of basic_profiling.txt

Changeset 5700d1974818 ("docs: Get rid of the "basic profiling" guide")
removed an old basic-profiling.txt file that was not updated over
the last 11 years and won't reflect the post-perf era.

It makes no sense to keep its translation, so get rid of it too.

Fixes: 5700d1974818 ("docs: Get rid of the "basic profiling" guide")
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Alex Shi <alex.shi@linux.alibaba.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/basic_profiling.txt         | 71 ----------------------
 1 file changed, 71 deletions(-)
 delete mode 100644 Documentation/translations/zh_CN/basic_profiling.txt

(limited to 'Documentation')

diff --git a/Documentation/translations/zh_CN/basic_profiling.txt b/Documentation/translations/zh_CN/basic_profiling.txt
deleted file mode 100644
index 1e6bf0bdf8f5..000000000000
--- a/Documentation/translations/zh_CN/basic_profiling.txt
+++ /dev/null
@@ -1,71 +0,0 @@
-Chinese translated version of Documentation/basic_profiling
-
-If you have any comment or update to the content, please post to LKML directly.
-However, if you have problem communicating in English you can also ask the
-Chinese maintainer for help.  Contact the Chinese maintainer, if this
-translation is outdated or there is problem with translation.
-
-Chinese maintainer: Liang Xie <xieliang@xiaomi.com>
----------------------------------------------------------------------
-Documentation/basic_profiling的中文翻译
-
-如果想评论或更新本文的内容，请直接发信到LKML。如果你使用英文交流有困难的话，也可
-以向中文版维护者求助。如果本翻译更新不及时或者翻译存在问题，请联系中文版维护者。
-
-中文版维护者： 谢良 Liang Xie <xieliang007@gmail.com>
-中文版翻译者： 谢良 Liang Xie <xieliang007@gmail.com>
-中文版校译者：
-以下为正文
----------------------------------------------------------------------
-
-下面这些说明指令都是非常基础的，如果你想进一步了解请阅读相关专业文档：）
-请不要再在本文档增加新的内容，但可以修复文档中的错误：）(mbligh@aracnet.com)
-感谢John Levon，Dave Hansen等在撰写时的帮助
-
-<test> 用于表示要测量的目标
-请先确保您已经有正确的System.map / vmlinux配置！
-
-对于linux系统来说，配置vmlinuz最容易的方法可能就是使用“make install”，然后修改
-/sbin/installkernel将vmlinux拷贝到/boot目录，而System.map通常是默认安装好的
-
-Readprofile
------------
-2.6系列内核需要版本相对较新的readprofile，比如util-linux 2.12a中包含的，可以从:
-
-http://www.kernel.org/pub/linux/utils/util-linux/ 下载
-
-大部分linux发行版已经包含了.
-
-启用readprofile需要在kernel启动命令行增加”profile=2“
-
-clear		readprofile -r
-		<test>
-dump output	readprofile -m /boot/System.map > captured_profile
-
-Oprofile
---------
-
-从http://oprofile.sourceforge.net/获取源代码（请参考Changes以获取匹配的版本）
-在kernel启动命令行增加“idle=poll”
-
-配置CONFIG_PROFILING=y和CONFIG_OPROFILE=y然后重启进入新kernel
-
-./configure --with-kernel-support
-make install
-
-想得到好的测量结果，请确保启用了本地APIC特性。如果opreport显示有0Hz CPU，
-说明APIC特性没有开启。另外注意idle=poll选项可能有损性能。
-
-One time setup:
-		opcontrol --setup --vmlinux=/boot/vmlinux
-
-clear		opcontrol --reset
-start		opcontrol --start
-		<test>
-stop		opcontrol --stop
-dump output	opreport >  output_file
-
-如果只看kernel相关的报告结果，请运行命令 opreport -l /boot/vmlinux > output_file
-
-通过reset选项可以清理过期统计数据，相当于重启的效果。
-
-- 
cgit 


From 2e03e3a42c961b709926ba5f7c42c09ea6bfb8c1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:20 -0300
Subject: docs: mm: numaperf.rst: get rid of a build warning

When building it, it gets this warning:

	Documentation/admin-guide/mm/numaperf.rst:168: WARNING: Footnote [1] is not referenced.

The problem is that this is not really a reference, as it is not
mentioned within the documentation.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/mm/numaperf.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/mm/numaperf.rst b/Documentation/admin-guide/mm/numaperf.rst
index c067ed145158..a80c3c37226e 100644
--- a/Documentation/admin-guide/mm/numaperf.rst
+++ b/Documentation/admin-guide/mm/numaperf.rst
@@ -165,5 +165,6 @@ write-through caching.
 ========
 See Also
 ========
-.. [1] https://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf
-       Section 5.2.27
+
+[1] https://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf
+- Section 5.2.27
-- 
cgit 


From d857a3ffd3d609d1c822b255d4fe4db8b3464e34 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:21 -0300
Subject: docs: bpf: get rid of two warnings

Documentation/bpf/btf.rst:154: WARNING: Unexpected indentation.
Documentation/bpf/btf.rst:163: WARNING: Unexpected indentation.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/bpf/btf.rst | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 8820360d00da..4ae022d274ab 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -151,6 +151,7 @@ for the type. The maximum value of ``BTF_INT_BITS()`` is 128.
 
 The ``BTF_INT_OFFSET()`` specifies the starting bit offset to calculate values
 for this int. For example, a bitfield struct member has:
+
  * btf member bit offset 100 from the start of the structure,
  * btf member pointing to an int type,
  * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4``
@@ -160,6 +161,7 @@ from bits ``100 + 2 = 102``.
 
 Alternatively, the bitfield struct member can be the following to access the
 same bits as the above:
+
  * btf member bit offset 102,
  * btf member pointing to an int type,
  * the int type has ``BTF_INT_OFFSET() = 0`` and ``BTF_INT_BITS() = 4``
-- 
cgit 


From 27c054d2939f1a46a4da62732e71c140e664afb9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:22 -0300
Subject: docs: mark orphan documents as such

Sphinx doesn't like orphan documents:

    Documentation/accelerators/ocxl.rst: WARNING: document isn't included in any toctree
    Documentation/arm/stm32/overview.rst: WARNING: document isn't included in any toctree
    Documentation/arm/stm32/stm32f429-overview.rst: WARNING: document isn't included in any toctree
    Documentation/arm/stm32/stm32f746-overview.rst: WARNING: document isn't included in any toctree
    Documentation/arm/stm32/stm32f769-overview.rst: WARNING: document isn't included in any toctree
    Documentation/arm/stm32/stm32h743-overview.rst: WARNING: document isn't included in any toctree
    Documentation/arm/stm32/stm32mp157-overview.rst: WARNING: document isn't included in any toctree
    Documentation/gpu/msm-crash-dump.rst: WARNING: document isn't included in any toctree
    Documentation/interconnect/interconnect.rst: WARNING: document isn't included in any toctree
    Documentation/laptops/lg-laptop.rst: WARNING: document isn't included in any toctree
    Documentation/powerpc/isa-versions.rst: WARNING: document isn't included in any toctree
    Documentation/virtual/kvm/amd-memory-encryption.rst: WARNING: document isn't included in any toctree
    Documentation/virtual/kvm/vcpu-requests.rst: WARNING: document isn't included in any toctree

So, while they aren't on any toctree, add :orphan: to them, in order
to silent this warning.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/accelerators/ocxl.rst             | 2 ++
 Documentation/arm/stm32/overview.rst            | 2 ++
 Documentation/arm/stm32/stm32f429-overview.rst  | 2 ++
 Documentation/arm/stm32/stm32f746-overview.rst  | 2 ++
 Documentation/arm/stm32/stm32f769-overview.rst  | 2 ++
 Documentation/arm/stm32/stm32h743-overview.rst  | 2 ++
 Documentation/arm/stm32/stm32mp157-overview.rst | 2 ++
 Documentation/gpu/msm-crash-dump.rst            | 2 ++
 Documentation/interconnect/interconnect.rst     | 2 ++
 Documentation/laptops/lg-laptop.rst             | 2 ++
 Documentation/powerpc/isa-versions.rst          | 2 ++
 11 files changed, 22 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/accelerators/ocxl.rst b/Documentation/accelerators/ocxl.rst
index 14cefc020e2d..b1cea19a90f5 100644
--- a/Documentation/accelerators/ocxl.rst
+++ b/Documentation/accelerators/ocxl.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 ========================================================
 OpenCAPI (Open Coherent Accelerator Processor Interface)
 ========================================================
diff --git a/Documentation/arm/stm32/overview.rst b/Documentation/arm/stm32/overview.rst
index 85cfc8410798..f7e734153860 100644
--- a/Documentation/arm/stm32/overview.rst
+++ b/Documentation/arm/stm32/overview.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 ========================
 STM32 ARM Linux Overview
 ========================
diff --git a/Documentation/arm/stm32/stm32f429-overview.rst b/Documentation/arm/stm32/stm32f429-overview.rst
index 18feda97f483..65bbb1c3b423 100644
--- a/Documentation/arm/stm32/stm32f429-overview.rst
+++ b/Documentation/arm/stm32/stm32f429-overview.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 STM32F429 Overview
 ==================
 
diff --git a/Documentation/arm/stm32/stm32f746-overview.rst b/Documentation/arm/stm32/stm32f746-overview.rst
index b5f4b6ce7656..42d593085015 100644
--- a/Documentation/arm/stm32/stm32f746-overview.rst
+++ b/Documentation/arm/stm32/stm32f746-overview.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 STM32F746 Overview
 ==================
 
diff --git a/Documentation/arm/stm32/stm32f769-overview.rst b/Documentation/arm/stm32/stm32f769-overview.rst
index 228656ced2fe..f6adac862b17 100644
--- a/Documentation/arm/stm32/stm32f769-overview.rst
+++ b/Documentation/arm/stm32/stm32f769-overview.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 STM32F769 Overview
 ==================
 
diff --git a/Documentation/arm/stm32/stm32h743-overview.rst b/Documentation/arm/stm32/stm32h743-overview.rst
index 3458dc00095d..c525835e7473 100644
--- a/Documentation/arm/stm32/stm32h743-overview.rst
+++ b/Documentation/arm/stm32/stm32h743-overview.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 STM32H743 Overview
 ==================
 
diff --git a/Documentation/arm/stm32/stm32mp157-overview.rst b/Documentation/arm/stm32/stm32mp157-overview.rst
index 62e176d47ca7..2c52cd020601 100644
--- a/Documentation/arm/stm32/stm32mp157-overview.rst
+++ b/Documentation/arm/stm32/stm32mp157-overview.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 STM32MP157 Overview
 ===================
 
diff --git a/Documentation/gpu/msm-crash-dump.rst b/Documentation/gpu/msm-crash-dump.rst
index 757cd257e0d8..240ef200f76c 100644
--- a/Documentation/gpu/msm-crash-dump.rst
+++ b/Documentation/gpu/msm-crash-dump.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 =====================
 MSM Crash Dump Format
 =====================
diff --git a/Documentation/interconnect/interconnect.rst b/Documentation/interconnect/interconnect.rst
index c3e004893796..56e331dab70e 100644
--- a/Documentation/interconnect/interconnect.rst
+++ b/Documentation/interconnect/interconnect.rst
@@ -1,5 +1,7 @@
 .. SPDX-License-Identifier: GPL-2.0
 
+:orphan:
+
 =====================================
 GENERIC SYSTEM INTERCONNECT SUBSYSTEM
 =====================================
diff --git a/Documentation/laptops/lg-laptop.rst b/Documentation/laptops/lg-laptop.rst
index aa503ee9b3bc..f2c2ffe31101 100644
--- a/Documentation/laptops/lg-laptop.rst
+++ b/Documentation/laptops/lg-laptop.rst
@@ -1,5 +1,7 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
+:orphan:
+
 LG Gram laptop extra features
 =============================
 
diff --git a/Documentation/powerpc/isa-versions.rst b/Documentation/powerpc/isa-versions.rst
index 812e20cc898c..66c24140ebf1 100644
--- a/Documentation/powerpc/isa-versions.rst
+++ b/Documentation/powerpc/isa-versions.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 CPU to ISA Version Mapping
 ==========================
 
-- 
cgit 


From f672febc3d132ea0487c63367455124dfa39e30f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:23 -0300
Subject: docs: amd-memory-encryption.rst get rid of warnings

Get rid of those warnings:

    Documentation/virtual/kvm/amd-memory-encryption.rst:244: WARNING: Citation [white-paper] is not referenced.
    Documentation/virtual/kvm/amd-memory-encryption.rst:246: WARNING: Citation [amd-apm] is not referenced.
    Documentation/virtual/kvm/amd-memory-encryption.rst:247: WARNING: Citation [kvm-forum] is not referenced.

For references that aren't mentioned at the text by adding an
explicit reference to them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/virtual/kvm/amd-memory-encryption.rst | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/amd-memory-encryption.rst b/Documentation/virtual/kvm/amd-memory-encryption.rst
index 659bbc093b52..d18c97b4e140 100644
--- a/Documentation/virtual/kvm/amd-memory-encryption.rst
+++ b/Documentation/virtual/kvm/amd-memory-encryption.rst
@@ -241,6 +241,9 @@ Returns: 0 on success, -negative on error
 References
 ==========
 
+
+See [white-paper]_, [api-spec]_, [amd-apm]_ and [kvm-forum]_ for more info.
+
 .. [white-paper] http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf
 .. [api-spec] http://support.amd.com/TechDocs/55766_SEV-KM_API_Specification.pdf
 .. [amd-apm] http://support.amd.com/TechDocs/24593.pdf (section 15.34)
-- 
cgit 


From d0727cc650f38243c0ac63fd8c91bfd63e3e2578 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:24 -0300
Subject: docs: zh_CN: avoid duplicate citation references

    Documentation/process/management-style.rst:35: WARNING: duplicate label decisions, other instance in     Documentation/translations/zh_CN/process/management-style.rst
    Documentation/process/programming-language.rst:37: WARNING: duplicate citation c-language, other instance in     Documentation/translations/zh_CN/process/programming-language.rst
    Documentation/process/programming-language.rst:38: WARNING: duplicate citation gcc, other instance in     Documentation/translations/zh_CN/process/programming-language.rst
    Documentation/process/programming-language.rst:39: WARNING: duplicate citation clang, other instance in     Documentation/translations/zh_CN/process/programming-language.rst
    Documentation/process/programming-language.rst:40: WARNING: duplicate citation icc, other instance in     Documentation/translations/zh_CN/process/programming-language.rst
    Documentation/process/programming-language.rst:41: WARNING: duplicate citation gcc-c-dialect-options, other instance in     Documentation/translations/zh_CN/process/programming-language.rst
    Documentation/process/programming-language.rst:42: WARNING: duplicate citation gnu-extensions, other instance in     Documentation/translations/zh_CN/process/programming-language.rst
    Documentation/process/programming-language.rst:43: WARNING: duplicate citation gcc-attribute-syntax, other instance in     Documentation/translations/zh_CN/process/programming-language.rst
    Documentation/process/programming-language.rst:44: WARNING: duplicate citation n2049, other instance in     Documentation/translations/zh_CN/process/programming-language.rst

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../zh_CN/process/management-style.rst             |  4 +-
 .../zh_CN/process/programming-language.rst         | 59 +++++++++++++++++-----
 2 files changed, 47 insertions(+), 16 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/translations/zh_CN/process/management-style.rst b/Documentation/translations/zh_CN/process/management-style.rst
index a181fa56d19e..c6a5bb285797 100644
--- a/Documentation/translations/zh_CN/process/management-style.rst
+++ b/Documentation/translations/zh_CN/process/management-style.rst
@@ -28,7 +28,7 @@ Linux内核管理风格
 
 不管怎样，这里是：
 
-.. _decisions:
+.. _cn_decisions:
 
 1）决策
 -------
@@ -108,7 +108,7 @@ Linux内核管理风格
 但是，为了做好作为内核管理者的准备，最好记住不要烧掉任何桥梁，不要轰炸任何
 无辜的村民，也不要疏远太多的内核开发人员。事实证明，疏远人是相当容易的，而
 亲近一个疏远的人是很难的。因此，“疏远”立即属于“不可逆”的范畴，并根据
-:ref:`decisions` 成为绝不可以做的事情。
+:ref:`cn_decisions` 成为绝不可以做的事情。
 
 这里只有几个简单的规则：
 
diff --git a/Documentation/translations/zh_CN/process/programming-language.rst b/Documentation/translations/zh_CN/process/programming-language.rst
index 51fd4ef48ea1..2a47a1d2ec20 100644
--- a/Documentation/translations/zh_CN/process/programming-language.rst
+++ b/Documentation/translations/zh_CN/process/programming-language.rst
@@ -8,21 +8,21 @@
 程序设计语言
 ============
 
-内核是用C语言 [c-language]_ 编写的。更准确地说，内核通常是用 ``gcc`` [gcc]_
-在 ``-std=gnu89`` [gcc-c-dialect-options]_ 下编译的：ISO C90的 GNU 方言（
+内核是用C语言 :ref:`c-language <cn_c-language>` 编写的。更准确地说，内核通常是用 :ref:`gcc <cn_gcc>`
+在 ``-std=gnu89`` :ref:`gcc-c-dialect-options <cn_gcc-c-dialect-options>` 下编译的：ISO C90的 GNU 方言（
 包括一些C99特性）
 
-这种方言包含对语言 [gnu-extensions]_ 的许多扩展，当然，它们许多都在内核中使用。
+这种方言包含对语言 :ref:`gnu-extensions <cn_gnu-extensions>` 的许多扩展，当然，它们许多都在内核中使用。
 
-对于一些体系结构，有一些使用 ``clang`` [clang]_ 和 ``icc`` [icc]_ 编译内核
+对于一些体系结构，有一些使用 :ref:`clang <cn_clang>` 和 :ref:`icc <cn_icc>` 编译内核
 的支持，尽管在编写此文档时还没有完成，仍需要第三方补丁。
 
 属性
 ----
 
-在整个内核中使用的一个常见扩展是属性（attributes） [gcc-attribute-syntax]_
+在整个内核中使用的一个常见扩展是属性（attributes） :ref:`gcc-attribute-syntax <cn_gcc-attribute-syntax>`
 属性允许将实现定义的语义引入语言实体（如变量、函数或类型），而无需对语言进行
-重大的语法更改（例如添加新关键字） [n2049]_
+重大的语法更改（例如添加新关键字） :ref:`n2049 <cn_n2049>`
 
 在某些情况下，属性是可选的（即不支持这些属性的编译器仍然应该生成正确的代码，
 即使其速度较慢或执行的编译时检查/诊断次数不够）
@@ -31,11 +31,42 @@
 ``__attribute__((__pure__))`` ），以检测可以使用哪些关键字和/或缩短代码, 具体
 请参阅 ``include/linux/compiler_attributes.h``
 
-.. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
-.. [gcc] https://gcc.gnu.org
-.. [clang] https://clang.llvm.org
-.. [icc] https://software.intel.com/en-us/c-compilers
-.. [gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
-.. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
-.. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
-.. [n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
+.. _cn_c-language:
+
+c-language
+   http://www.open-std.org/jtc1/sc22/wg14/www/standards
+
+.. _cn_gcc:
+
+gcc
+   https://gcc.gnu.org
+
+.. _cn_clang:
+
+clang
+   https://clang.llvm.org
+
+.. _cn_icc:
+
+icc
+   https://software.intel.com/en-us/c-compilers
+
+.. _cn_gcc-c-dialect-options:
+
+c-dialect-options
+   https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
+
+.. _cn_gnu-extensions:
+
+gnu-extensions
+   https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
+
+.. _cn_gcc-attribute-syntax:
+
+gcc-attribute-syntax
+   https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
+
+.. _cn_n2049:
+
+n2049
+   http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
-- 
cgit 


From ea0ad8763b17395fc611f6d91d1de389ec0cc584 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:25 -0300
Subject: docs: it: license-rules.rst: get rid of warnings

There's a wrong identation on a code block, and it tries to use
a reference that was not defined at the Italian translation.

    Documentation/translations/it_IT/process/license-rules.rst:329: WARNING: Literal block expected; none found.
    Documentation/translations/it_IT/process/license-rules.rst:332: WARNING: Unexpected indentation.
    Documentation/translations/it_IT/process/license-rules.rst:339: WARNING: Block quote ends without a blank line; unexpected unindent.
    Documentation/translations/it_IT/process/license-rules.rst:341: WARNING: Unexpected indentation.
    Documentation/translations/it_IT/process/license-rules.rst:305: WARNING: Unknown target name: "metatags".

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Reviewed-by: Federico Vaga <federico.vaga@vaga.pv.it>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/it_IT/process/license-rules.rst   | 28 +++++++++++-----------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/translations/it_IT/process/license-rules.rst b/Documentation/translations/it_IT/process/license-rules.rst
index f058e06996dc..4cd87a3a7bf9 100644
--- a/Documentation/translations/it_IT/process/license-rules.rst
+++ b/Documentation/translations/it_IT/process/license-rules.rst
@@ -303,7 +303,7 @@ essere categorizzate in:
      LICENSES/dual
 
    I file in questa cartella contengono il testo completo della rispettiva
-   licenza e i suoi `Metatags`_.  I nomi dei file sono identici agli
+   licenza e i suoi `Metatag`_.  I nomi dei file sono identici agli
    identificatori di licenza SPDX che dovrebbero essere usati nei file
    sorgenti.
 
@@ -326,19 +326,19 @@ essere categorizzate in:
 
    Esempio del formato del file::
 
-   Valid-License-Identifier: MPL-1.1
-   SPDX-URL: https://spdx.org/licenses/MPL-1.1.html
-   Usage-Guide:
-     Do NOT use. The MPL-1.1 is not GPL2 compatible. It may only be used for
-     dual-licensed files where the other license is GPL2 compatible.
-     If you end up using this it MUST be used together with a GPL2 compatible
-     license using "OR".
-     To use the Mozilla Public License version 1.1 put the following SPDX
-     tag/value pair into a comment according to the placement guidelines in
-     the licensing rules documentation:
-   SPDX-License-Identifier: MPL-1.1
-   License-Text:
-     Full license text
+    Valid-License-Identifier: MPL-1.1
+    SPDX-URL: https://spdx.org/licenses/MPL-1.1.html
+    Usage-Guide:
+      Do NOT use. The MPL-1.1 is not GPL2 compatible. It may only be used for
+      dual-licensed files where the other license is GPL2 compatible.
+      If you end up using this it MUST be used together with a GPL2 compatible
+      license using "OR".
+      To use the Mozilla Public License version 1.1 put the following SPDX
+      tag/value pair into a comment according to the placement guidelines in
+      the licensing rules documentation:
+    SPDX-License-Identifier: MPL-1.1
+    License-Text:
+      Full license text
 
 |
 
-- 
cgit 


From 6ad8b21652ec26a5ad51ffc91470e15c19156548 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:27 -0300
Subject: docs: security: trusted-encrypted.rst: fix code-block tag

The code-block tag is at the wrong place, causing those
warnings:

    Documentation/security/keys/trusted-encrypted.rst:112: WARNING: Literal block expected; none found.
    Documentation/security/keys/trusted-encrypted.rst:121: WARNING: Unexpected indentation.
    Documentation/security/keys/trusted-encrypted.rst:122: WARNING: Block quote ends without a blank line; unexpected unindent.
    Documentation/security/keys/trusted-encrypted.rst:123: WARNING: Block quote ends without a blank line; unexpected unindent.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: James Morris <jamorris@linux.microsoft.com>
Acked-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/security/keys/trusted-encrypted.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/security/keys/trusted-encrypted.rst b/Documentation/security/keys/trusted-encrypted.rst
index 7b35fcb58933..50ac8bcd6970 100644
--- a/Documentation/security/keys/trusted-encrypted.rst
+++ b/Documentation/security/keys/trusted-encrypted.rst
@@ -107,12 +107,14 @@ Where::
 
 Examples of trusted and encrypted key usage:
 
-Create and save a trusted key named "kmk" of length 32 bytes::
+Create and save a trusted key named "kmk" of length 32 bytes.
 
 Note: When using a TPM 2.0 with a persistent key with handle 0x81000001,
 append 'keyhandle=0x81000001' to statements between quotes, such as
 "new 32 keyhandle=0x81000001".
 
+::
+
     $ keyctl add trusted kmk "new 32" @u
     440502848
 
-- 
cgit 


From 43415f13276f09623b1b61376c6f2e43f71bedbb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:28 -0300
Subject: docs: security: core.rst: Fix several warnings

Multi-line literal markups only work when they're idented at the
same level, with is not the case here:

   Documentation/security/keys/core.rst:1597: WARNING: Inline literal start-string without end-string.
   Documentation/security/keys/core.rst:1597: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1597: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1598: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1598: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1600: WARNING: Inline literal start-string without end-string.
   Documentation/security/keys/core.rst:1600: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1600: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1600: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1600: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1666: WARNING: Inline literal start-string without end-string.
   Documentation/security/keys/core.rst:1666: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1666: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1666: WARNING: Inline emphasis start-string without end-string.

Fix it by using a code-block instead.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/security/keys/core.rst | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index 9521c4207f01..3fd60dcb2dc6 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -1594,10 +1594,12 @@ The structure has a number of fields, some of which are mandatory:
      attempted key link operation. If there is no match, -EINVAL is returned.
 
 
-  *  ``int (*asym_eds_op)(struct kernel_pkey_params *params,
-			  const void *in, void *out);``
-     ``int (*asym_verify_signature)(struct kernel_pkey_params *params,
-				    const void *in, const void *in2);``
+  *  ``asym_eds_op`` and ``asym_verify_signature``::
+
+       int (*asym_eds_op)(struct kernel_pkey_params *params,
+			  const void *in, void *out);
+       int (*asym_verify_signature)(struct kernel_pkey_params *params,
+				    const void *in, const void *in2);
 
      These methods are optional.  If provided the first allows a key to be
      used to encrypt, decrypt or sign a blob of data, and the second allows a
@@ -1662,8 +1664,10 @@ The structure has a number of fields, some of which are mandatory:
      required crypto isn't available.
 
 
-  *  ``int (*asym_query)(const struct kernel_pkey_params *params,
-			 struct kernel_pkey_query *info);``
+  *  ``asym_query``::
+
+       int (*asym_query)(const struct kernel_pkey_params *params,
+			 struct kernel_pkey_query *info);
 
      This method is optional.  If provided it allows information about the
      public or asymmetric key held in the key to be determined.
-- 
cgit 


From c6fff4d3b2f467dd62ee8c69e49c8a8795fe7400 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:30 -0300
Subject: docs: net: sja1105.rst: fix table format

There's a table there with produces two warnings when built
with Sphinx:

    Documentation/networking/dsa/sja1105.rst:91: WARNING: Block quote ends without a blank line; unexpected unindent.
    Documentation/networking/dsa/sja1105.rst:91: WARNING: Block quote ends without a blank line; unexpected unindent.

It will still produce a table, but the html output is wrong, as
it won't interpret the second line as the continuation for the
first ones, because identation doesn't match.

After the change, the output looks a way better and we got rid
of two warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/networking/dsa/sja1105.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst
index ea7bac438cfd..cb2858dece93 100644
--- a/Documentation/networking/dsa/sja1105.rst
+++ b/Documentation/networking/dsa/sja1105.rst
@@ -86,13 +86,13 @@ functionality.
 The following traffic modes are supported over the switch netdevices:
 
 +--------------------+------------+------------------+------------------+
-|                    | Standalone |   Bridged with   |   Bridged with   |
-|                    |    ports   | vlan_filtering 0 | vlan_filtering 1 |
+|                    | Standalone | Bridged with     | Bridged with     |
+|                    | ports      | vlan_filtering 0 | vlan_filtering 1 |
 +====================+============+==================+==================+
 | Regular traffic    |     Yes    |       Yes        |  No (use master) |
 +--------------------+------------+------------------+------------------+
 | Management traffic |     Yes    |       Yes        |       Yes        |
-|    (BPDU, PTP)     |            |                  |                  |
+| (BPDU, PTP)        |            |                  |                  |
 +--------------------+------------+------------------+------------------+
 
 Switching features
-- 
cgit 


From 14b767430a58046bfef8ff9b9f12854e20343092 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:29 -0300
Subject: docs: net: dpio-driver.rst: fix two codeblock warnings

    Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst:43: WARNING: Definition list ends without a blank line; unexpected unindent.
    Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst:63: WARNING: Unexpected indentation. looking for now-outdated files... none found

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../networking/device_drivers/freescale/dpaa2/dpio-driver.rst         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst b/Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst
index 5045df990a4c..17dbee1ac53e 100644
--- a/Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst
+++ b/Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst
@@ -39,8 +39,7 @@ The Linux DPIO driver consists of 3 primary components--
 
    DPIO service-- provides APIs to other Linux drivers for services
 
-   QBman portal interface-- sends portal commands, gets responses
-::
+   QBman portal interface-- sends portal commands, gets responses::
 
           fsl-mc          other
            bus           drivers
@@ -60,6 +59,7 @@ The Linux DPIO driver consists of 3 primary components--
 
 The diagram below shows how the DPIO driver components fit with the other
 DPAA2 Linux driver components::
+
                                                    +------------+
                                                    | OS Network |
                                                    |   Stack    |
-- 
cgit 


From 1eecbcdca2bd8d96881cace19ad105dc0f0263f5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:31 -0300
Subject: docs: move protection-keys.rst to the core-api book

This document is used by multiple architectures:

	$ echo $(git grep -l  pkey_mprotect arch|cut -d'/' -f 2|sort|uniq)
	alpha arm arm64 ia64 m68k microblaze mips parisc powerpc s390 sh sparc x86 xtensa

So, let's move it to the core book and adjust the links to it
accordingly.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/index.rst           |  1 +
 Documentation/core-api/protection-keys.rst | 99 ++++++++++++++++++++++++++++++
 Documentation/x86/index.rst                |  1 -
 Documentation/x86/protection-keys.rst      | 99 ------------------------------
 4 files changed, 100 insertions(+), 100 deletions(-)
 create mode 100644 Documentation/core-api/protection-keys.rst
 delete mode 100644 Documentation/x86/protection-keys.rst

(limited to 'Documentation')

diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index ee1bb8983a88..2466a4c51031 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -34,6 +34,7 @@ Core utilities
    timekeeping
    boot-time-mm
    memory-hotplug
+   protection-keys
 
 
 Interfaces for kernel debugging
diff --git a/Documentation/core-api/protection-keys.rst b/Documentation/core-api/protection-keys.rst
new file mode 100644
index 000000000000..49d9833af871
--- /dev/null
+++ b/Documentation/core-api/protection-keys.rst
@@ -0,0 +1,99 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+Memory Protection Keys
+======================
+
+Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature
+which is found on Intel's Skylake "Scalable Processor" Server CPUs.
+It will be avalable in future non-server parts.
+
+For anyone wishing to test or use this feature, it is available in
+Amazon's EC2 C5 instances and is known to work there using an Ubuntu
+17.04 image.
+
+Memory Protection Keys provides a mechanism for enforcing page-based
+protections, but without requiring modification of the page tables
+when an application changes protection domains.  It works by
+dedicating 4 previously ignored bits in each page table entry to a
+"protection key", giving 16 possible keys.
+
+There is also a new user-accessible register (PKRU) with two separate
+bits (Access Disable and Write Disable) for each key.  Being a CPU
+register, PKRU is inherently thread-local, potentially giving each
+thread a different set of protections from every other thread.
+
+There are two new instructions (RDPKRU/WRPKRU) for reading and writing
+to the new register.  The feature is only available in 64-bit mode,
+even though there is theoretically space in the PAE PTEs.  These
+permissions are enforced on data access only and have no effect on
+instruction fetches.
+
+Syscalls
+========
+
+There are 3 system calls which directly interact with pkeys::
+
+	int pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+	int pkey_free(int pkey);
+	int pkey_mprotect(unsigned long start, size_t len,
+			  unsigned long prot, int pkey);
+
+Before a pkey can be used, it must first be allocated with
+pkey_alloc().  An application calls the WRPKRU instruction
+directly in order to change access permissions to memory covered
+with a key.  In this example WRPKRU is wrapped by a C function
+called pkey_set().
+::
+
+	int real_prot = PROT_READ|PROT_WRITE;
+	pkey = pkey_alloc(0, PKEY_DISABLE_WRITE);
+	ptr = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+	ret = pkey_mprotect(ptr, PAGE_SIZE, real_prot, pkey);
+	... application runs here
+
+Now, if the application needs to update the data at 'ptr', it can
+gain access, do the update, then remove its write access::
+
+	pkey_set(pkey, 0); // clear PKEY_DISABLE_WRITE
+	*ptr = foo; // assign something
+	pkey_set(pkey, PKEY_DISABLE_WRITE); // set PKEY_DISABLE_WRITE again
+
+Now when it frees the memory, it will also free the pkey since it
+is no longer in use::
+
+	munmap(ptr, PAGE_SIZE);
+	pkey_free(pkey);
+
+.. note:: pkey_set() is a wrapper for the RDPKRU and WRPKRU instructions.
+          An example implementation can be found in
+          tools/testing/selftests/x86/protection_keys.c.
+
+Behavior
+========
+
+The kernel attempts to make protection keys consistent with the
+behavior of a plain mprotect().  For instance if you do this::
+
+	mprotect(ptr, size, PROT_NONE);
+	something(ptr);
+
+you can expect the same effects with protection keys when doing this::
+
+	pkey = pkey_alloc(0, PKEY_DISABLE_WRITE | PKEY_DISABLE_READ);
+	pkey_mprotect(ptr, size, PROT_READ|PROT_WRITE, pkey);
+	something(ptr);
+
+That should be true whether something() is a direct access to 'ptr'
+like::
+
+	*ptr = foo;
+
+or when the kernel does the access on the application's behalf like
+with a read()::
+
+	read(fd, ptr, 1);
+
+The kernel will send a SIGSEGV in both cases, but si_code will be set
+to SEGV_PKERR when violating protection keys versus SEGV_ACCERR when
+the plain mprotect() permissions are violated.
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index ae36fc5fc649..f2de1b2d3ac7 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -19,7 +19,6 @@ x86-specific Documentation
    tlb
    mtrr
    pat
-   protection-keys
    intel_mpx
    amd-memory-encryption
    pti
diff --git a/Documentation/x86/protection-keys.rst b/Documentation/x86/protection-keys.rst
deleted file mode 100644
index 49d9833af871..000000000000
--- a/Documentation/x86/protection-keys.rst
+++ /dev/null
@@ -1,99 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-======================
-Memory Protection Keys
-======================
-
-Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature
-which is found on Intel's Skylake "Scalable Processor" Server CPUs.
-It will be avalable in future non-server parts.
-
-For anyone wishing to test or use this feature, it is available in
-Amazon's EC2 C5 instances and is known to work there using an Ubuntu
-17.04 image.
-
-Memory Protection Keys provides a mechanism for enforcing page-based
-protections, but without requiring modification of the page tables
-when an application changes protection domains.  It works by
-dedicating 4 previously ignored bits in each page table entry to a
-"protection key", giving 16 possible keys.
-
-There is also a new user-accessible register (PKRU) with two separate
-bits (Access Disable and Write Disable) for each key.  Being a CPU
-register, PKRU is inherently thread-local, potentially giving each
-thread a different set of protections from every other thread.
-
-There are two new instructions (RDPKRU/WRPKRU) for reading and writing
-to the new register.  The feature is only available in 64-bit mode,
-even though there is theoretically space in the PAE PTEs.  These
-permissions are enforced on data access only and have no effect on
-instruction fetches.
-
-Syscalls
-========
-
-There are 3 system calls which directly interact with pkeys::
-
-	int pkey_alloc(unsigned long flags, unsigned long init_access_rights)
-	int pkey_free(int pkey);
-	int pkey_mprotect(unsigned long start, size_t len,
-			  unsigned long prot, int pkey);
-
-Before a pkey can be used, it must first be allocated with
-pkey_alloc().  An application calls the WRPKRU instruction
-directly in order to change access permissions to memory covered
-with a key.  In this example WRPKRU is wrapped by a C function
-called pkey_set().
-::
-
-	int real_prot = PROT_READ|PROT_WRITE;
-	pkey = pkey_alloc(0, PKEY_DISABLE_WRITE);
-	ptr = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-	ret = pkey_mprotect(ptr, PAGE_SIZE, real_prot, pkey);
-	... application runs here
-
-Now, if the application needs to update the data at 'ptr', it can
-gain access, do the update, then remove its write access::
-
-	pkey_set(pkey, 0); // clear PKEY_DISABLE_WRITE
-	*ptr = foo; // assign something
-	pkey_set(pkey, PKEY_DISABLE_WRITE); // set PKEY_DISABLE_WRITE again
-
-Now when it frees the memory, it will also free the pkey since it
-is no longer in use::
-
-	munmap(ptr, PAGE_SIZE);
-	pkey_free(pkey);
-
-.. note:: pkey_set() is a wrapper for the RDPKRU and WRPKRU instructions.
-          An example implementation can be found in
-          tools/testing/selftests/x86/protection_keys.c.
-
-Behavior
-========
-
-The kernel attempts to make protection keys consistent with the
-behavior of a plain mprotect().  For instance if you do this::
-
-	mprotect(ptr, size, PROT_NONE);
-	something(ptr);
-
-you can expect the same effects with protection keys when doing this::
-
-	pkey = pkey_alloc(0, PKEY_DISABLE_WRITE | PKEY_DISABLE_READ);
-	pkey_mprotect(ptr, size, PROT_READ|PROT_WRITE, pkey);
-	something(ptr);
-
-That should be true whether something() is a direct access to 'ptr'
-like::
-
-	*ptr = foo;
-
-or when the kernel does the access on the application's behalf like
-with a read()::
-
-	read(fd, ptr, 1);
-
-The kernel will send a SIGSEGV in both cases, but si_code will be set
-to SEGV_PKERR when violating protection keys versus SEGV_ACCERR when
-the plain mprotect() permissions are violated.
-- 
cgit 


From cb1aaebea8d79860181559d7b5d482aea63db113 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:32 -0300
Subject: docs: fix broken documentation links

Mostly due to x86 and acpi conversion, several documentation
links are still pointing to the old file. Fix them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Reviewed-by: Wolfram Sang <wsa@the-dreams.de>
Reviewed-by: Sven Van Asbroeck <TheSven73@gmail.com>
Reviewed-by: Bhupesh Sharma <bhsharma@redhat.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/acpi/dsd/leds.txt                          |  2 +-
 Documentation/admin-guide/kernel-parameters.rst          |  6 +++---
 Documentation/admin-guide/kernel-parameters.txt          | 16 ++++++++--------
 Documentation/admin-guide/ras.rst                        |  2 +-
 Documentation/devicetree/bindings/net/fsl-enetc.txt      |  7 +++----
 .../devicetree/bindings/pci/amlogic,meson-pcie.txt       |  2 +-
 .../bindings/regulator/qcom,rpmh-regulator.txt           |  2 +-
 Documentation/devicetree/booting-without-of.txt          |  2 +-
 Documentation/driver-api/gpio/board.rst                  |  2 +-
 Documentation/driver-api/gpio/consumer.rst               |  2 +-
 Documentation/firmware-guide/acpi/enumeration.rst        |  2 +-
 Documentation/firmware-guide/acpi/method-tracing.rst     |  2 +-
 Documentation/i2c/instantiating-devices                  |  2 +-
 Documentation/sysctl/kernel.txt                          |  4 ++--
 Documentation/translations/zh_CN/process/4.Coding.rst    |  2 +-
 Documentation/x86/x86_64/5level-paging.rst               |  2 +-
 Documentation/x86/x86_64/boot-options.rst                |  4 ++--
 Documentation/x86/x86_64/fake-numa-for-cpusets.rst       |  2 +-
 18 files changed, 31 insertions(+), 32 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/acpi/dsd/leds.txt b/Documentation/acpi/dsd/leds.txt
index 81a63af42ed2..cc58b1a574c5 100644
--- a/Documentation/acpi/dsd/leds.txt
+++ b/Documentation/acpi/dsd/leds.txt
@@ -96,4 +96,4 @@ where
     <URL:http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf>,
     referenced 2019-02-21.
 
-[7] Documentation/acpi/dsd/data-node-reference.txt
+[7] Documentation/firmware-guide/acpi/dsd/data-node-references.rst
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index 0124980dca2d..8d3273e32eb1 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -167,7 +167,7 @@ parameter is applicable::
 	X86-32	X86-32, aka i386 architecture is enabled.
 	X86-64	X86-64 architecture is enabled.
 			More X86-64 boot options can be found in
-			Documentation/x86/x86_64/boot-options.txt .
+			Documentation/x86/x86_64/boot-options.rst.
 	X86	Either 32-bit or 64-bit x86 (same as X86-32+X86-64)
 	X86_UV	SGI UV support is enabled.
 	XEN	Xen support is enabled
@@ -181,10 +181,10 @@ In addition, the following text indicates that the option::
 Parameters denoted with BOOT are actually interpreted by the boot
 loader, and have no meaning to the kernel directly.
 Do not modify the syntax of boot loader parameters without extreme
-need or coordination with <Documentation/x86/boot.txt>.
+need or coordination with <Documentation/x86/boot.rst>.
 
 There are also arch-specific kernel-parameters not documented here.
-See for example <Documentation/x86/x86_64/boot-options.txt>.
+See for example <Documentation/x86/x86_64/boot-options.rst>.
 
 Note that ALL kernel parameters listed below are CASE SENSITIVE, and that
 a trailing = on the name of any parameter states that that parameter will
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 79d043b8850d..1abd7e145357 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -53,7 +53,7 @@
 			ACPI_DEBUG_PRINT statements, e.g.,
 			    ACPI_DEBUG_PRINT((ACPI_DB_INFO, ...
 			The debug_level mask defaults to "info".  See
-			Documentation/acpi/debug.txt for more information about
+			Documentation/firmware-guide/acpi/debug.rst for more information about
 			debug layers and levels.
 
 			Enable processor driver info messages:
@@ -963,7 +963,7 @@
 			for details.
 
 	nompx		[X86] Disables Intel Memory Protection Extensions.
-			See Documentation/x86/intel_mpx.txt for more
+			See Documentation/x86/intel_mpx.rst for more
 			information about the feature.
 
 	nopku		[X86] Disable Memory Protection Keys CPU feature found
@@ -1189,7 +1189,7 @@
 			that is to be dynamically loaded by Linux. If there are
 			multiple variables with the same name but with different
 			vendor GUIDs, all of them will be loaded. See
-			Documentation/acpi/ssdt-overlays.txt for details.
+			Documentation/admin-guide/acpi/ssdt-overlays.rst for details.
 
 
 	eisa_irq_edge=	[PARISC,HW]
@@ -2383,7 +2383,7 @@
 
 	mce		[X86-32] Machine Check Exception
 
-	mce=option	[X86-64] See Documentation/x86/x86_64/boot-options.txt
+	mce=option	[X86-64] See Documentation/x86/x86_64/boot-options.rst
 
 	md=		[HW] RAID subsystems devices and level
 			See Documentation/admin-guide/md.rst.
@@ -2439,7 +2439,7 @@
 			set according to the
 			CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel config
 			option.
-			See Documentation/memory-hotplug.txt.
+			See Documentation/admin-guide/mm/memory-hotplug.rst.
 
 	memmap=exactmap	[KNL,X86] Enable setting of an exact
 			E820 memory map, as specified by the user.
@@ -2528,7 +2528,7 @@
 			mem_encrypt=on:		Activate SME
 			mem_encrypt=off:	Do not activate SME
 
-			Refer to Documentation/x86/amd-memory-encryption.txt
+			Refer to Documentation/virtual/kvm/amd-memory-encryption.rst
 			for details on when memory encryption can be activated.
 
 	mem_sleep_default=	[SUSPEND] Default system suspend mode:
@@ -3529,7 +3529,7 @@
 			See Documentation/blockdev/paride.txt.
 
 	pirq=		[SMP,APIC] Manual mp-table setup
-			See Documentation/x86/i386/IO-APIC.txt.
+			See Documentation/x86/i386/IO-APIC.rst.
 
 	plip=		[PPT,NET] Parallel port network link
 			Format: { parport<nr> | timid | 0 }
@@ -5055,7 +5055,7 @@
 			Can be used multiple times for multiple devices.
 
 	vga=		[BOOT,X86-32] Select a particular video mode
-			See Documentation/x86/boot.txt and
+			See Documentation/x86/boot.rst and
 			Documentation/svga.txt.
 			Use vga=ask for menu.
 			This is actually a boot loader parameter; the value is
diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/ras.rst
index c7495e42e6f4..2b20f5f7380d 100644
--- a/Documentation/admin-guide/ras.rst
+++ b/Documentation/admin-guide/ras.rst
@@ -199,7 +199,7 @@ Architecture (MCA)\ [#f3]_.
   mode).
 
 .. [#f3] For more details about the Machine Check Architecture (MCA),
-  please read Documentation/x86/x86_64/machinecheck at the Kernel tree.
+  please read Documentation/x86/x86_64/machinecheck.rst at the Kernel tree.
 
 EDAC - Error Detection And Correction
 *************************************
diff --git a/Documentation/devicetree/bindings/net/fsl-enetc.txt b/Documentation/devicetree/bindings/net/fsl-enetc.txt
index c812e25ae90f..25fc687419db 100644
--- a/Documentation/devicetree/bindings/net/fsl-enetc.txt
+++ b/Documentation/devicetree/bindings/net/fsl-enetc.txt
@@ -16,8 +16,8 @@ Required properties:
 In this case, the ENETC node should include a "mdio" sub-node
 that in turn should contain the "ethernet-phy" node describing the
 external phy.  Below properties are required, their bindings
-already defined in ethernet.txt or phy.txt, under
-Documentation/devicetree/bindings/net/*.
+already defined in Documentation/devicetree/bindings/net/ethernet.txt or
+Documentation/devicetree/bindings/net/phy.txt.
 
 Required:
 
@@ -51,8 +51,7 @@ Example:
 connection:
 
 In this case, the ENETC port node defines a fixed link connection,
-as specified by "fixed-link.txt", under
-Documentation/devicetree/bindings/net/*.
+as specified by Documentation/devicetree/bindings/net/fixed-link.txt.
 
 Required:
 
diff --git a/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt b/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt
index 12b18f82d441..efa2c8b9b85a 100644
--- a/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt
@@ -3,7 +3,7 @@ Amlogic Meson AXG DWC PCIE SoC controller
 Amlogic Meson PCIe host controller is based on the Synopsys DesignWare PCI core.
 It shares common functions with the PCIe DesignWare core driver and
 inherits common properties defined in
-Documentation/devicetree/bindings/pci/designware-pci.txt.
+Documentation/devicetree/bindings/pci/designware-pcie.txt.
 
 Additional properties are described here:
 
diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
index 7ef2dbe48e8a..14d2eee96b3d 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
@@ -97,7 +97,7 @@ Second Level Nodes - Regulators
 		    sent for this regulator including those which are for a
 		    strictly lower power state.
 
-Other properties defined in Documentation/devicetree/bindings/regulator.txt
+Other properties defined in Documentation/devicetree/bindings/regulator/regulator.txt
 may also be used.  regulator-initial-mode and regulator-allowed-modes may be
 specified for VRM regulators using mode values from
 include/dt-bindings/regulator/qcom,rpmh-regulator.h.  regulator-allow-bypass
diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt
index e86bd2f64117..60f8640f2b2f 100644
--- a/Documentation/devicetree/booting-without-of.txt
+++ b/Documentation/devicetree/booting-without-of.txt
@@ -277,7 +277,7 @@ it with special cases.
   the decompressor (the real mode entry point goes to the same  32bit
   entry point once it switched into protected mode). That entry point
   supports one calling convention which is documented in
-  Documentation/x86/boot.txt
+  Documentation/x86/boot.rst
   The physical pointer to the device-tree block (defined in chapter II)
   is passed via setup_data which requires at least boot protocol 2.09.
   The type filed is defined as
diff --git a/Documentation/driver-api/gpio/board.rst b/Documentation/driver-api/gpio/board.rst
index b37f3f7b8926..ce91518bf9f4 100644
--- a/Documentation/driver-api/gpio/board.rst
+++ b/Documentation/driver-api/gpio/board.rst
@@ -101,7 +101,7 @@ with the help of _DSD (Device Specific Data), introduced in ACPI 5.1::
 	}
 
 For more information about the ACPI GPIO bindings see
-Documentation/acpi/gpio-properties.txt.
+Documentation/firmware-guide/acpi/gpio-properties.rst.
 
 Platform Data
 -------------
diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst
index 5e4d8aa68913..fdecb6d711db 100644
--- a/Documentation/driver-api/gpio/consumer.rst
+++ b/Documentation/driver-api/gpio/consumer.rst
@@ -437,7 +437,7 @@ case, it will be handled by the GPIO subsystem automatically.  However, if the
 _DSD is not present, the mappings between GpioIo()/GpioInt() resources and GPIO
 connection IDs need to be provided by device drivers.
 
-For details refer to Documentation/acpi/gpio-properties.txt
+For details refer to Documentation/firmware-guide/acpi/gpio-properties.rst
 
 
 Interacting With the Legacy GPIO Subsystem
diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index 850be9696931..1252617b520f 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -339,7 +339,7 @@ a code like this::
 There are also devm_* versions of these functions which release the
 descriptors once the device is released.
 
-See Documentation/acpi/gpio-properties.txt for more information about the
+See Documentation/firmware-guide/acpi/gpio-properties.rst for more information about the
 _DSD binding related to GPIOs.
 
 MFD devices
diff --git a/Documentation/firmware-guide/acpi/method-tracing.rst b/Documentation/firmware-guide/acpi/method-tracing.rst
index d0b077b73f5f..0aa7e2c5d32a 100644
--- a/Documentation/firmware-guide/acpi/method-tracing.rst
+++ b/Documentation/firmware-guide/acpi/method-tracing.rst
@@ -68,7 +68,7 @@ c. Filter out the debug layer/level matched logs when the specified
 
 Where:
    0xXXXXXXXX/0xYYYYYYYY
-     Refer to Documentation/acpi/debug.txt for possible debug layer/level
+     Refer to Documentation/firmware-guide/acpi/debug.rst for possible debug layer/level
      masking values.
    \PPPP.AAAA.TTTT.HHHH
      Full path of a control method that can be found in the ACPI namespace.
diff --git a/Documentation/i2c/instantiating-devices b/Documentation/i2c/instantiating-devices
index 0d85ac1935b7..5a3e2f331e8c 100644
--- a/Documentation/i2c/instantiating-devices
+++ b/Documentation/i2c/instantiating-devices
@@ -85,7 +85,7 @@ Method 1c: Declare the I2C devices via ACPI
 -------------------------------------------
 
 ACPI can also describe I2C devices. There is special documentation for this
-which is currently located at Documentation/acpi/enumeration.txt.
+which is currently located at Documentation/firmware-guide/acpi/enumeration.rst.
 
 
 Method 2: Instantiate the devices explicitly
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index f0c86fbb3b48..92f7f34b021a 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -155,7 +155,7 @@ is 0x15 and the full version number is 0x234, this file will contain
 the value 340 = 0x154.
 
 See the type_of_loader and ext_loader_type fields in
-Documentation/x86/boot.txt for additional information.
+Documentation/x86/boot.rst for additional information.
 
 ==============================================================
 
@@ -167,7 +167,7 @@ The complete bootloader version number.  In the example above, this
 file will contain the value 564 = 0x234.
 
 See the type_of_loader and ext_loader_ver fields in
-Documentation/x86/boot.txt for additional information.
+Documentation/x86/boot.rst for additional information.
 
 ==============================================================
 
diff --git a/Documentation/translations/zh_CN/process/4.Coding.rst b/Documentation/translations/zh_CN/process/4.Coding.rst
index 5301e9d55255..8bb777941394 100644
--- a/Documentation/translations/zh_CN/process/4.Coding.rst
+++ b/Documentation/translations/zh_CN/process/4.Coding.rst
@@ -241,7 +241,7 @@ scripts/coccinelle目录下已经打包了相当多的内核“语义补丁”
 
 任何添加新用户空间界面的代码（包括新的sysfs或/proc文件）都应该包含该界面的
 文档，该文档使用户空间开发人员能够知道他们在使用什么。请参阅
-Documentation/abi/readme，了解如何格式化此文档以及需要提供哪些信息。
+Documentation/ABI/README，了解如何格式化此文档以及需要提供哪些信息。
 
 文件 :ref:`Documentation/admin-guide/kernel-parameters.rst <kernelparameters>`
 描述了内核的所有引导时间参数。任何添加新参数的补丁都应该向该文件添加适当的
diff --git a/Documentation/x86/x86_64/5level-paging.rst b/Documentation/x86/x86_64/5level-paging.rst
index ab88a4514163..44856417e6a5 100644
--- a/Documentation/x86/x86_64/5level-paging.rst
+++ b/Documentation/x86/x86_64/5level-paging.rst
@@ -20,7 +20,7 @@ physical address space. This "ought to be enough for anybody" ©.
 QEMU 2.9 and later support 5-level paging.
 
 Virtual memory layout for 5-level paging is described in
-Documentation/x86/x86_64/mm.txt
+Documentation/x86/x86_64/mm.rst
 
 
 Enabling 5-level paging
diff --git a/Documentation/x86/x86_64/boot-options.rst b/Documentation/x86/x86_64/boot-options.rst
index 2f69836b8445..6a4285a3c7a4 100644
--- a/Documentation/x86/x86_64/boot-options.rst
+++ b/Documentation/x86/x86_64/boot-options.rst
@@ -9,7 +9,7 @@ only the AMD64 specific ones are listed here.
 
 Machine check
 =============
-Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables.
+Please see Documentation/x86/x86_64/machinecheck.rst for sysfs runtime tunables.
 
    mce=off
 		Disable machine check
@@ -89,7 +89,7 @@ APICs
      Don't use the local APIC (alias for i386 compatibility)
 
    pirq=...
-	See Documentation/x86/i386/IO-APIC.txt
+	See Documentation/x86/i386/IO-APIC.rst
 
    noapictimer
 	Don't set up the APIC timer
diff --git a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
index 74fbb78b3c67..04df57b9aa3f 100644
--- a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
+++ b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
@@ -18,7 +18,7 @@ For more information on the features of cpusets, see
 Documentation/cgroup-v1/cpusets.txt.
 There are a number of different configurations you can use for your needs.  For
 more information on the numa=fake command line option and its various ways of
-configuring fake nodes, see Documentation/x86/x86_64/boot-options.txt.
+configuring fake nodes, see Documentation/x86/x86_64/boot-options.rst.
 
 For the purposes of this introduction, we'll assume a very primitive NUMA
 emulation setup of "numa=fake=4*512,".  This will split our system memory into
-- 
cgit 


From 9915ec28ec7fc79f0f30ebbba5d19bfa17eb7f03 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:34 -0300
Subject: docs: isdn: remove hisax references from kernel-parameters.txt

The hisax driver got removed on 85993b8c9786 ("isdn: remove hisax driver"),
but a left-over was kept at kernel-parameters.txt.

Fixes: 85993b8c9786 ("isdn: remove hisax driver")

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/kernel-parameters.txt | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 1abd7e145357..9b16b640ce48 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1388,9 +1388,6 @@
 			Valid parameters: "on", "off"
 			Default: "on"
 
-	hisax=		[HW,ISDN]
-			See Documentation/isdn/README.HiSax.
-
 	hlt		[BUGS=ARM,SH]
 
 	hpet=		[X86-32,HPET] option to control HPET usage
-- 
cgit 


From 5c437fa29561f5809ef114ba3a5e80556cc43fb3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:35 -0300
Subject: docs: fs: fix broken links to vfs.txt with was renamed to vfs.rst

A recent documentation conversion renamed this file but forgot
to update the links.

Fixes: af96c1e304f7 ("docs: filesystems: vfs: Convert vfs.txt to RST")
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/porting | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 3bd1148d8bb6..2813a19389fe 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -330,14 +330,14 @@ unreferenced dentries, and is now only called when the dentry refcount goes to
 [mandatory]
 
 	.d_compare() calling convention and locking rules are significantly
-changed. Read updated documentation in Documentation/filesystems/vfs.txt (and
+changed. Read updated documentation in Documentation/filesystems/vfs.rst (and
 look at examples of other filesystems) for guidance.
 
 ---
 [mandatory]
 
 	.d_hash() calling convention and locking rules are significantly
-changed. Read updated documentation in Documentation/filesystems/vfs.txt (and
+changed. Read updated documentation in Documentation/filesystems/vfs.rst (and
 look at examples of other filesystems) for guidance.
 
 ---
@@ -377,12 +377,12 @@ where possible.
 the filesystem provides it), which requires dropping out of rcu-walk mode. This
 may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be
 returned if the filesystem cannot handle rcu-walk. See
-Documentation/filesystems/vfs.txt for more details.
+Documentation/filesystems/vfs.rst for more details.
 
 	permission is an inode permission check that is called on many or all
 directory inodes on the way down a path walk (to check for exec permission). It
 must now be rcu-walk aware (mask & MAY_NOT_BLOCK).  See
-Documentation/filesystems/vfs.txt for more details.
+Documentation/filesystems/vfs.rst for more details.
  
 --
 [mandatory]
@@ -625,7 +625,7 @@ in your dentry operations instead.
 --
 [mandatory]
 	->clone_file_range() and ->dedupe_file_range have been replaced with
-	->remap_file_range().  See Documentation/filesystems/vfs.txt for more
+	->remap_file_range().  See Documentation/filesystems/vfs.rst for more
 	information.
 --
 [recommended]
-- 
cgit 


From b640fbad2d8fe120c761f61eb6c96f05047100cd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 7 Jun 2019 15:54:36 -0300
Subject: docs: pci: fix broken links due to conversion from pci.txt to pci.rst

Some documentation files were still pointing to the old place.

Fixes: 229b4e0728e0 ("Documentation: PCI: convert pci.txt to reST")
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Paul E. McKenney <paulmck@linux.ibm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/memory-barriers.txt                    | 2 +-
 Documentation/translations/ko_KR/memory-barriers.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f70ebcdfe592..f4170aae1d75 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -548,7 +548,7 @@ There are certain things that the Linux kernel memory barriers do not guarantee:
 
 	[*] For information on bus mastering DMA and coherency please read:
 
-	    Documentation/PCI/pci.txt
+	    Documentation/PCI/pci.rst
 	    Documentation/DMA-API-HOWTO.txt
 	    Documentation/DMA-API.txt
 
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index db0b9d8619f1..07725b1df002 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -569,7 +569,7 @@ ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE 
 
 	[*] 버스 마스터링 DMA 와 일관성에 대해서는 다음을 참고하시기 바랍니다:
 
-	    Documentation/PCI/pci.txt
+	    Documentation/PCI/pci.rst
 	    Documentation/DMA-API-HOWTO.txt
 	    Documentation/DMA-API.txt
 
-- 
cgit 


From 12775af50549121ade8efbdcf3612b867188ea0b Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 6 Jun 2019 19:30:38 +0300
Subject: dt-bindings: doc: net: keystone-netcp: document cpts

The Keystone 2 66AK2HK/E/L 1G Ethernet Switch Subsystems contains The
Common Platform Time Sync (CPTS) module which is in general compatible with
CPTS module found on "legacy" TI AM3/4/5 SoCs. So, the basic support for
Keystone 2 CPTS is available by default, but not documented.
The Keystone 2 CPTS module supports also some additional features like time
sync reference (RFTCLK) clock selection through CPTS_RFTCLK_SEL register
(offset: x08) in CPTS module, which is modelled as multiplexer clock.

This patch adds missed binding documentation for Keystone 2 66AK2HK/E/L
CPTS module.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/keystone-netcp.txt     | 44 ++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/keystone-netcp.txt b/Documentation/devicetree/bindings/net/keystone-netcp.txt
index 6262c2f293b0..24f11e042f8d 100644
--- a/Documentation/devicetree/bindings/net/keystone-netcp.txt
+++ b/Documentation/devicetree/bindings/net/keystone-netcp.txt
@@ -104,6 +104,23 @@ Required properties:
 			- 10Gb mac<->mac forced mode : 11
 ----phy-handle:	phandle to PHY device
 
+- cpts:		sub-node time synchronization (CPTS) submodule configuration
+-- clocks:	CPTS reference clock. Should point on cpts-refclk-mux clock.
+-- clock-names: should be "cpts"
+-- cpts-refclk-mux: multiplexer clock definition sub-node for CPTS reference (RFTCLK) clock
+--- #clock-cells: should be 0
+--- clocks:	list of CPTS reference (RFTCLK) clock's parents as defined in Data manual
+--- ti,mux-tbl: array of multiplexer indexes as defined in Data manual
+--- assigned-clocks: should point on cpts-refclk-mux clock
+--- assigned-clock-parents: should point on required RFTCLK clock parent to be selected
+-- cpts_clock_mult: (optional) Numerator to convert input clock ticks
+		into nanoseconds
+-- cpts_clock_shift: (optional) Denominator to convert input clock ticks into
+		nanoseconds.
+		Mult and shift will be calculated basing on CPTS
+		rftclk frequency if both cpts_clock_shift and
+		cpts_clock_mult properties are not provided.
+
 Optional properties:
 - enable-ale:	NetCP driver keeps the address learning feature in the ethernet
 		switch module disabled. This attribute is to enable the address
@@ -168,6 +185,23 @@ netcp: netcp@2000000 {
 			tx-queue = <648>;
 			tx-channel = <8>;
 
+			cpts {
+				clocks = <&cpts_refclk_mux>;
+				clock-names = "cpts";
+
+				cpts_refclk_mux: cpts-refclk-mux {
+					#clock-cells = <0>;
+					clocks = <&chipclk12>, <&chipclk13>,
+						 <&timi0>, <&timi1>,
+						 <&tsipclka>, <&tsrefclk>,
+						 <&tsipclkb>;
+					ti,mux-tbl = <0x0>, <0x1>, <0x2>,
+						<0x3>, <0x4>, <0x8>, <0xC>;
+					assigned-clocks = <&cpts_refclk_mux>;
+					assigned-clock-parents = <&chipclk12>;
+				};
+			};
+
 			interfaces {
 				gbe0: interface-0 {
 					slave-port = <0>;
@@ -219,3 +253,13 @@ netcp: netcp@2000000 {
 		};
 	};
 };
+
+CPTS board configuration - select external CPTS RFTCLK:
+
+&tsrefclk{
+	clock-frequency = <500000000>;
+};
+
+&cpts_refclk_mux {
+	assigned-clock-parents = <&tsrefclk>;
+};
-- 
cgit 


From e3630fd9aa2a11d0dd12b92e7d51299f9b547d64 Mon Sep 17 00:00:00 2001
From: Teresa Remmet <t.remmet@phytec.de>
Date: Fri, 24 May 2019 15:20:03 +0200
Subject: ARM: dts: Add support for phyBOARD-REGOR-AM335x

Adding support for phyBOARD-REGOR-AM335x:
- CAN
- UART0 / UART2
- RS485
- USB device
- i2c RTC
- SD
- ethernet0 RMII
- ethernet1 MII
- Digital I/Os

Signed-off-by: Teresa Remmet <t.remmet@phytec.de>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 Documentation/devicetree/bindings/arm/omap/omap.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt
index 1c1e48fd94b5..b301f753ed2c 100644
--- a/Documentation/devicetree/bindings/arm/omap/omap.txt
+++ b/Documentation/devicetree/bindings/arm/omap/omap.txt
@@ -160,6 +160,9 @@ Boards:
 - AM335X phyCORE-AM335x: Development kit
   compatible = "phytec,am335x-pcm-953", "phytec,am335x-phycore-som", "ti,am33xx"
 
+- AM335x phyBOARD-REGOR: Single Board Computer
+  compatible = "phytec,am335x-regor", "phytec,am335x-phycore-som", "ti,am33xx"
+
 - AM335X UC-8100-ME-T: Communication-centric industrial computing platform
   compatible = "moxa,uc-8100-me-t", "ti,am33xx";
 
-- 
cgit 


From 5a46b6fa0f33c2ca7a8432a0a48e1bf4a71fcc9f Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 27 May 2019 14:43:05 +0200
Subject: dt-bindings: mmc: meson-gx: add dram-access-quirk property

On the Amlogic G12A SoC family, (only) the SDIO controller has a bug which
makes any DRAM access from the MMC controller fail.

Add the amlogic,dram-access-quirk property so signal this particular
controller has this bug and needs a quirk to work properly.

Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/amlogic,meson-gx.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mmc/amlogic,meson-gx.txt b/Documentation/devicetree/bindings/mmc/amlogic,meson-gx.txt
index 13e70409e8ac..ccc5358db131 100644
--- a/Documentation/devicetree/bindings/mmc/amlogic,meson-gx.txt
+++ b/Documentation/devicetree/bindings/mmc/amlogic,meson-gx.txt
@@ -22,6 +22,10 @@ Required properties:
   clock rate requested by the MMC core.
 - resets     : phandle of the internal reset line
 
+Optional properties:
+- amlogic,dram-access-quirk: set when controller's internal DMA engine cannot access the
+  DRAM memory, like on the G12A dedicated SDIO controller.
+
 Example:
 
 	sd_emmc_a: mmc@70000 {
-- 
cgit 


From f9b7989859dd005ffcd8aea5a9c862f61cda9cd8 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Wed, 29 May 2019 09:23:43 +0200
Subject: dt-bindings: mmc: Add YAML schemas for the generic MMC options

The MMC controllers have a bunch of generic options that are needed in a
device tree. Add a YAML schemas for those.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 .../devicetree/bindings/mmc/mmc-controller.yaml    | 374 +++++++++++++++++++++
 Documentation/devicetree/bindings/mmc/mmc.txt      | 178 +---------
 2 files changed, 375 insertions(+), 177 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/mmc/mmc-controller.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
new file mode 100644
index 000000000000..080754e0ef35
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
@@ -0,0 +1,374 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/mmc-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MMC Controller Generic Binding
+
+maintainers:
+  - Ulf Hansson <ulf.hansson@linaro.org>
+
+description: |
+  These properties are common to multiple MMC host controllers. Any host
+  that requires the respective functionality should implement them using
+  these definitions.
+
+properties:
+  $nodename:
+    pattern: "^mmc(@.*)?$"
+
+  "#address-cells":
+    const: 1
+    description: |
+      The cell is the slot ID if a function subnode is used.
+
+  "#size-cells":
+    const: 0
+
+  # Card Detection.
+  # If none of these properties are supplied, the host native card
+  # detect will be used. Only one of them should be provided.
+
+  broken-cd:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      There is no card detection available; polling must be used.
+
+  cd-gpios:
+    description:
+      The card detection will be done using the GPIO provided.
+
+  non-removable:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      Non-removable slot (like eMMC); assume always present.
+
+  # *NOTE* on CD and WP polarity. To use common for all SD/MMC host
+  # controllers line polarity properties, we have to fix the meaning
+  # of the "normal" and "inverted" line levels. We choose to follow
+  # the SDHCI standard, which specifies both those lines as "active
+  # low." Therefore, using the "cd-inverted" property means, that the
+  # CD line is active high, i.e. it is high, when a card is
+  # inserted. Similar logic applies to the "wp-inverted" property.
+  #
+  # CD and WP lines can be implemented on the hardware in one of two
+  # ways: as GPIOs, specified in cd-gpios and wp-gpios properties, or
+  # as dedicated pins. Polarity of dedicated pins can be specified,
+  # using *-inverted properties. GPIO polarity can also be specified
+  # using the GPIO_ACTIVE_LOW flag. This creates an ambiguity in the
+  # latter case. We choose to use the XOR logic for GPIO CD and WP
+  # lines.  This means, the two properties are "superimposed," for
+  # example leaving the GPIO_ACTIVE_LOW flag clear and specifying the
+  # respective *-inverted property property results in a
+  # double-inversion and actually means the "normal" line polarity is
+  # in effect.
+  wp-inverted:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      The Write Protect line polarity is inverted.
+
+  cd-inverted:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      The CD line polarity is inverted.
+
+  # Other properties
+
+  bus-width:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - enum: [1, 4, 8]
+        default: 1
+    description:
+      Number of data lines.
+
+  max-frequency:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - minimum: 400000
+      - maximum: 200000000
+    description:
+      Maximum operating frequency of the bus.
+
+  disable-wp:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      When set, no physical write-protect line is present. This
+      property should only be specified when the controller has a
+      dedicated write-protect detection logic. If a GPIO is always
+      used for the write-protect detection. If a GPIO is always used
+      for the write-protect detection logic, it is sufficient to not
+      specify the wp-gpios property in the absence of a write-protect
+      line.
+
+  wp-gpios:
+    description:
+      GPIO to use for the write-protect detection.
+
+  cd-debounce-delay-ms:
+    description:
+      Set delay time before detecting card after card insert
+      interrupt.
+
+  no-1-8-v:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      When specified, denotes that 1.8V card voltage is not supported
+      on this system, even if the controller claims it.
+
+  cap-sd-highspeed:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      SD high-speed timing is supported.
+
+  cap-mmc-highspeed:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      MMC high-speed timing is supported.
+
+  sd-uhs-sdr12:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      SD UHS SDR12 speed is supported.
+
+  sd-uhs-sdr25:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      SD UHS SDR25 speed is supported.
+
+  sd-uhs-sdr50:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      SD UHS SDR50 speed is supported.
+
+  sd-uhs-sdr104:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      SD UHS SDR104 speed is supported.
+
+  sd-uhs-ddr50:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      SD UHS DDR50 speed is supported.
+
+  cap-power-off-card:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      Powering off the card is safe.
+
+  cap-mmc-hw-reset:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      eMMC hardware reset is supported
+
+  cap-sdio-irq:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      enable SDIO IRQ signalling on this interface
+
+  full-pwr-cycle:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      Full power cycle of the card is supported.
+
+  mmc-ddr-1_2v:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      eMMC high-speed DDR mode (1.2V I/O) is supported.
+
+  mmc-ddr-1_8v:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      eMMC high-speed DDR mode (1.8V I/O) is supported.
+
+  mmc-ddr-3_3v:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      eMMC high-speed DDR mode (3.3V I/O) is supported.
+
+  mmc-hs200-1_2v:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      eMMC HS200 mode (1.2V I/O) is supported.
+
+  mmc-hs200-1_8v:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      eMMC HS200 mode (1.8V I/O) is supported.
+
+  mmc-hs400-1_2v:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      eMMC HS400 mode (1.2V I/O) is supported.
+
+  mmc-hs400-1_8v:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      eMMC HS400 mode (1.8V I/O) is supported.
+
+  mmc-hs400-enhanced-strobe:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      eMMC HS400 enhanced strobe mode is supported
+
+  dsr:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - minimum: 0
+      - maximum: 0xffff
+    description:
+      Value the card Driver Stage Register (DSR) should be programmed
+      with.
+
+  no-sdio:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      Controller is limited to send SDIO commands during
+      initialization.
+
+  no-sd:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      Controller is limited to send SD commands during initialization.
+
+  no-mmc:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      Controller is limited to send MMC commands during
+      initialization.
+
+  fixed-emmc-driver-type:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - minimum: 0
+      - maximum: 4
+    description:
+      For non-removable eMMC, enforce this driver type. The value is
+      the driver type as specified in the eMMC specification (table
+      206 in spec version 5.1)
+
+  post-power-on-delay-ms:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+      - default: 10
+    description:
+      It was invented for MMC pwrseq-simple which could be referred to
+      mmc-pwrseq-simple.txt. But now it\'s reused as a tunable delay
+      waiting for I/O signalling and card power supply to be stable,
+      regardless of whether pwrseq-simple is used. Default to 10ms if
+      no available.
+
+  supports-cqe:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      The presence of this property indicates that the corresponding
+      MMC host controller supports HW command queue feature.
+
+  disable-cqe-dcmd:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      The presence of this property indicates that the MMC
+      controller\'s command queue engine (CQE) does not support direct
+      commands (DCMDs).
+
+  keep-power-in-suspend:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      SDIO only. Preserves card power during a suspend/resume cycle.
+
+  # Deprecated: enable-sdio-wakeup
+  wakeup-source:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      SDIO only. Enables wake up of host system on SDIO IRQ assertion.
+
+  vmmc-supply:
+    description:
+      Supply for the card power
+
+  vqmmc-supply:
+    description:
+      Supply for the bus IO line power
+
+  mmc-pwrseq:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      System-on-Chip designs may specify a specific MMC power
+      sequence. To successfully detect an (e)MMC/SD/SDIO card, that
+      power sequence must be maintained while initializing the card.
+
+patternProperties:
+  "^.*@[0-9]+$":
+    type: object
+    description: |
+      On embedded systems the cards connected to a host may need
+      additional properties. These can be specified in subnodes to the
+      host controller node. The subnodes are identified by the
+      standard \'reg\' property. Which information exactly can be
+      specified depends on the bindings for the SDIO function driver
+      for the subnode, as specified by the compatible string.
+
+    properties:
+      compatible:
+        description: |
+          Name of SDIO function following generic names recommended
+          practice
+
+      reg:
+        items:
+          - minimum: 0
+            maximum: 7
+            description:
+              Must contain the SDIO function number of the function this
+              subnode describes. A value of 0 denotes the memory SD
+              function, values from 1 to 7 denote the SDIO functions.
+
+      broken-hpi:
+        $ref: /schemas/types.yaml#/definitions/flag
+        description:
+          Use this to indicate that the mmc-card has a broken hpi
+          implementation, and that hpi should not be used.
+
+    required:
+      - reg
+
+dependencies:
+  cd-debounce-delay-ms: [ cd-gpios ]
+  fixed-emmc-driver-type: [ non-removable ]
+
+examples:
+  - |
+    sdhci@ab000000 {
+        compatible = "sdhci";
+        reg = <0xab000000 0x200>;
+        interrupts = <23>;
+        bus-width = <4>;
+        cd-gpios = <&gpio 69 0>;
+        cd-inverted;
+        wp-gpios = <&gpio 70 0>;
+        max-frequency = <50000000>;
+        keep-power-in-suspend;
+        wakeup-source;
+        mmc-pwrseq = <&sdhci0_pwrseq>;
+    };
+
+  - |
+    mmc3: mmc@1c12000 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        pinctrl-names = "default";
+        pinctrl-0 = <&mmc3_pins_a>;
+        vmmc-supply = <&reg_vmmc3>;
+        bus-width = <4>;
+        non-removable;
+        mmc-pwrseq = <&sdhci0_pwrseq>;
+
+        brcmf: bcrmf@1 {
+            reg = <1>;
+            compatible = "brcm,bcm43xx-fmac";
+            interrupt-parent = <&pio>;
+            interrupts = <10 8>;
+            interrupt-names = "host-wake";
+        };
+    };
diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt
index c269dbe384fe..bf9d7d3febf1 100644
--- a/Documentation/devicetree/bindings/mmc/mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/mmc.txt
@@ -1,177 +1 @@
-These properties are common to multiple MMC host controllers. Any host
-that requires the respective functionality should implement them using
-these definitions.
-
-Interpreted by the OF core:
-- reg: Registers location and length.
-- interrupts: Interrupts used by the MMC controller.
-
-Card detection:
-If no property below is supplied, host native card detect is used.
-Only one of the properties in this section should be supplied:
-  - broken-cd: There is no card detection available; polling must be used.
-  - cd-gpios: Specify GPIOs for card detection, see gpio binding
-  - non-removable: non-removable slot (like eMMC); assume always present.
-
-Optional properties:
-- bus-width: Number of data lines, can be <1>, <4>, or <8>.  The default
-  will be <1> if the property is absent.
-- wp-gpios: Specify GPIOs for write protection, see gpio binding
-- cd-inverted: when present, polarity on the CD line is inverted. See the note
-  below for the case, when a GPIO is used for the CD line
-- cd-debounce-delay-ms: Set delay time before detecting card after card insert interrupt.
-  It's only valid when cd-gpios is present.
-- wp-inverted: when present, polarity on the WP line is inverted. See the note
-  below for the case, when a GPIO is used for the WP line
-- disable-wp: When set no physical WP line is present. This property should
-  only be specified when the controller has a dedicated write-protect
-  detection logic. If a GPIO is always used for the write-protect detection
-  logic it is sufficient to not specify wp-gpios property in the absence of a WP
-  line.
-- max-frequency: maximum operating clock frequency
-- no-1-8-v: when present, denotes that 1.8v card voltage is not supported on
-  this system, even if the controller claims it is.
-- cap-sd-highspeed: SD high-speed timing is supported
-- cap-mmc-highspeed: MMC high-speed timing is supported
-- sd-uhs-sdr12: SD UHS SDR12 speed is supported
-- sd-uhs-sdr25: SD UHS SDR25 speed is supported
-- sd-uhs-sdr50: SD UHS SDR50 speed is supported
-- sd-uhs-sdr104: SD UHS SDR104 speed is supported
-- sd-uhs-ddr50: SD UHS DDR50 speed is supported
-- cap-power-off-card: powering off the card is safe
-- cap-mmc-hw-reset: eMMC hardware reset is supported
-- cap-sdio-irq: enable SDIO IRQ signalling on this interface
-- full-pwr-cycle: full power cycle of the card is supported
-- mmc-ddr-3_3v: eMMC high-speed DDR mode(3.3V I/O) is supported
-- mmc-ddr-1_8v: eMMC high-speed DDR mode(1.8V I/O) is supported
-- mmc-ddr-1_2v: eMMC high-speed DDR mode(1.2V I/O) is supported
-- mmc-hs200-1_8v: eMMC HS200 mode(1.8V I/O) is supported
-- mmc-hs200-1_2v: eMMC HS200 mode(1.2V I/O) is supported
-- mmc-hs400-1_8v: eMMC HS400 mode(1.8V I/O) is supported
-- mmc-hs400-1_2v: eMMC HS400 mode(1.2V I/O) is supported
-- mmc-hs400-enhanced-strobe: eMMC HS400 enhanced strobe mode is supported
-- dsr: Value the card's (optional) Driver Stage Register (DSR) should be
-  programmed with. Valid range: [0 .. 0xffff].
-- no-sdio: controller is limited to send sdio cmd during initialization
-- no-sd: controller is limited to send sd cmd during initialization
-- no-mmc: controller is limited to send mmc cmd during initialization
-- fixed-emmc-driver-type: for non-removable eMMC, enforce this driver type.
-  The value <n> is the driver type as specified in the eMMC specification
-  (table 206 in spec version 5.1).
-- post-power-on-delay-ms : It was invented for MMC pwrseq-simple which could
-  be referred to mmc-pwrseq-simple.txt. But now it's reused as a tunable delay
-  waiting for I/O signalling and card power supply to be stable, regardless of
-  whether pwrseq-simple is used. Default to 10ms if no available.
-- supports-cqe : The presence of this property indicates that the corresponding
-  MMC host controller supports HW command queue feature.
-- disable-cqe-dcmd: This property indicates that the MMC controller's command
-  queue engine (CQE) does not support direct commands (DCMDs).
-
-*NOTE* on CD and WP polarity. To use common for all SD/MMC host controllers line
-polarity properties, we have to fix the meaning of the "normal" and "inverted"
-line levels. We choose to follow the SDHCI standard, which specifies both those
-lines as "active low." Therefore, using the "cd-inverted" property means, that
-the CD line is active high, i.e. it is high, when a card is inserted. Similar
-logic applies to the "wp-inverted" property.
-
-CD and WP lines can be implemented on the hardware in one of two ways: as GPIOs,
-specified in cd-gpios and wp-gpios properties, or as dedicated pins. Polarity of
-dedicated pins can be specified, using *-inverted properties. GPIO polarity can
-also be specified using the GPIO_ACTIVE_LOW flag. This creates an ambiguity
-in the latter case. We choose to use the XOR logic for GPIO CD and WP lines.
-This means, the two properties are "superimposed," for example leaving the
-GPIO_ACTIVE_LOW flag clear and specifying the respective *-inverted property
-property results in a double-inversion and actually means the "normal" line
-polarity is in effect.
-
-Optional SDIO properties:
-- keep-power-in-suspend: Preserves card power during a suspend/resume cycle
-- wakeup-source: Enables wake up of host system on SDIO IRQ assertion
-		 (Legacy property supported: "enable-sdio-wakeup")
-
-MMC power
----------
-
-Controllers may implement power control from both the connected cards and
-the IO signaling (for example to change to high-speed 1.8V signalling). If
-the system supports this, then the following two properties should point
-to valid regulator nodes:
-
-- vqmmc-supply: supply node for IO line power
-- vmmc-supply: supply node for card's power
-
-
-MMC power sequences:
---------------------
-
-System on chip designs may specify a specific MMC power sequence. To
-successfully detect an (e)MMC/SD/SDIO card, that power sequence must be
-maintained while initializing the card.
-
-Optional property:
-- mmc-pwrseq: phandle to the MMC power sequence node. See "mmc-pwrseq-*"
-	for documentation of MMC power sequence bindings.
-
-
-Use of Function subnodes
-------------------------
-
-On embedded systems the cards connected to a host may need additional
-properties. These can be specified in subnodes to the host controller node.
-The subnodes are identified by the standard 'reg' property.
-Which information exactly can be specified depends on the bindings for the
-SDIO function driver for the subnode, as specified by the compatible string.
-
-Required host node properties when using function subnodes:
-- #address-cells: should be one. The cell is the slot id.
-- #size-cells: should be zero.
-
-Required function subnode properties:
-- reg: Must contain the SDIO function number of the function this subnode
-       describes. A value of 0 denotes the memory SD function, values from
-       1 to 7 denote the SDIO functions.
-
-Optional function subnode properties:
-- compatible: name of SDIO function following generic names recommended practice
-
-
-Examples
---------
-
-Basic example:
-
-sdhci@ab000000 {
-	compatible = "sdhci";
-	reg = <0xab000000 0x200>;
-	interrupts = <23>;
-	bus-width = <4>;
-	cd-gpios = <&gpio 69 0>;
-	cd-inverted;
-	wp-gpios = <&gpio 70 0>;
-	max-frequency = <50000000>;
-	keep-power-in-suspend;
-	wakeup-source;
-	mmc-pwrseq = <&sdhci0_pwrseq>
-}
-
-Example with sdio function subnode:
-
-mmc3: mmc@1c12000 {
-	#address-cells = <1>;
-	#size-cells = <0>;
-
-	pinctrl-names = "default";
-	pinctrl-0 = <&mmc3_pins_a>;
-	vmmc-supply = <&reg_vmmc3>;
-	bus-width = <4>;
-	non-removable;
-	mmc-pwrseq = <&sdhci0_pwrseq>
-
-	brcmf: bcrmf@1 {
-		reg = <1>;
-		compatible = "brcm,bcm43xx-fmac";
-		interrupt-parent = <&pio>;
-		interrupts = <10 8>; /* PH10 / EINT10 */
-		interrupt-names = "host-wake";
-	};
-};
+This file has moved to mmc-controller.yaml.
-- 
cgit 


From ca4570a4c2d3be309ff4b8976efa7c8291ca6b6d Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Wed, 29 May 2019 09:23:44 +0200
Subject: dt-bindings: mmc: sun4i: Add YAML schemas

Switch the DT binding to a YAML schema to enable the DT validation.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 .../bindings/mmc/allwinner,sun4i-a10-mmc.yaml      | 98 ++++++++++++++++++++++
 .../devicetree/bindings/mmc/sunxi-mmc.txt          | 52 ------------
 2 files changed, 98 insertions(+), 52 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml
 delete mode 100644 Documentation/devicetree/bindings/mmc/sunxi-mmc.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml b/Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml
new file mode 100644
index 000000000000..df0280edef97
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/allwinner,sun4i-a10-mmc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 MMC Controller Device Tree Bindings
+
+allOf:
+  - $ref: "mmc-controller.yaml"
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  "#address-cells": true
+  "#size-cells": true
+
+  compatible:
+    oneOf:
+      - const: allwinner,sun4i-a10-mmc
+      - const: allwinner,sun5i-a13-mmc
+      - const: allwinner,sun7i-a20-mmc
+      - const: allwinner,sun8i-a83t-emmc
+      - const: allwinner,sun9i-a80-mmc
+      - const: allwinner,sun50i-a64-emmc
+      - const: allwinner,sun50i-a64-mmc
+      - items:
+          - const: allwinner,sun8i-a83t-mmc
+          - const: allwinner,sun7i-a20-mmc
+      - items:
+          - const: allwinner,sun50i-h6-emmc
+          - const: allwinner,sun50i-a64-emmc
+      - items:
+          - const: allwinner,sun50i-h6-mmc
+          - const: allwinner,sun50i-a64-mmc
+      - items:
+          - const: allwinner,sun8i-r40-emmc
+          - const: allwinner,sun50i-a64-emmc
+      - items:
+          - const: allwinner,sun8i-r40-mmc
+          - const: allwinner,sun50i-a64-mmc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 2
+    maxItems: 4
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+      - description: Output Clock
+      - description: Sample Clock
+
+  clock-names:
+    minItems: 2
+    maxItems: 4
+    items:
+      - const: ahb
+      - const: mmc
+      - const: output
+      - const: sample
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    const: ahb
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+examples:
+  - |
+    mmc0: mmc@1c0f000 {
+        compatible = "allwinner,sun5i-a13-mmc";
+        reg = <0x01c0f000 0x1000>;
+        clocks = <&ahb_gates 8>, <&mmc0_clk>;
+        clock-names = "ahb", "mmc";
+        interrupts = <32>;
+        bus-width = <4>;
+        cd-gpios = <&pio 7 1 0>;
+    };
+
+# FIXME: We should set it, but it would report all the generic
+# properties as additional properties.
+# additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/mmc/sunxi-mmc.txt b/Documentation/devicetree/bindings/mmc/sunxi-mmc.txt
deleted file mode 100644
index e9cb3ec5e502..000000000000
--- a/Documentation/devicetree/bindings/mmc/sunxi-mmc.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-* Allwinner sunxi MMC controller
-
-The highspeed MMC host controller on Allwinner SoCs provides an interface
-for MMC, SD and SDIO types of memory cards.
-
-Supported maximum speeds are the ones of the eMMC standard 4.5 as well
-as the speed of SD standard 3.0.
-Absolute maximum transfer rate is 200MB/s
-
-Required properties:
- - compatible : should be one of:
-   * "allwinner,sun4i-a10-mmc"
-   * "allwinner,sun5i-a13-mmc"
-   * "allwinner,sun7i-a20-mmc"
-   * "allwinner,sun8i-a83t-emmc"
-   * "allwinner,sun9i-a80-mmc"
-   * "allwinner,sun50i-a64-emmc"
-   * "allwinner,sun50i-a64-mmc"
-   * "allwinner,sun50i-h6-emmc", "allwinner.sun50i-a64-emmc"
-   * "allwinner,sun50i-h6-mmc", "allwinner.sun50i-a64-mmc"
- - reg : mmc controller base registers
- - clocks : a list with 4 phandle + clock specifier pairs
- - clock-names : must contain "ahb", "mmc", "output" and "sample"
- - interrupts : mmc controller interrupt
-
-Optional properties:
- - resets : phandle + reset specifier pair
- - reset-names : must contain "ahb"
- - for cd, bus-width and additional generic mmc parameters
-   please refer to mmc.txt within this directory
-
-Examples:
-	- Within .dtsi:
-	mmc0: mmc@1c0f000 {
-		compatible = "allwinner,sun5i-a13-mmc";
-		reg = <0x01c0f000 0x1000>;
-		clocks = <&ahb_gates 8>, <&mmc0_clk>, <&mmc0_output_clk>, <&mmc0_sample_clk>;
-		clock-names = "ahb", "mod", "output", "sample";
-		interrupts = <0 32 4>;
-		status = "disabled";
-	};
-
-	- Within dts:
-	mmc0: mmc@1c0f000 {
-		pinctrl-names = "default", "default";
-		pinctrl-0 = <&mmc0_pins_a>;
-		pinctrl-1 = <&mmc0_cd_pin_reference_design>;
-		bus-width = <4>;
-		cd-gpios = <&pio 7 1 0>; /* PH1 */
-		cd-inverted;
-		status = "okay";
-	};
-- 
cgit 


From 334eb9bcb94f6e02d96268b76dd270a399643176 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Tue, 4 Jun 2019 16:14:22 +0800
Subject: dt-bindings: mmc: sprd: Add another optional clock documentation

For some Spreadtrum platforms like SC9860 platform, we should enable another
gate clock '2x_enable' to make the SD host controller work well. Thus add
documentation for this optional clock.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/sdhci-sprd.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt b/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
index 45c9978aad7b..a285c773acd0 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
@@ -14,6 +14,7 @@ Required properties:
 - clock-names: Should contain the following:
 	"sdio" - SDIO source clock (required)
 	"enable" - gate clock which used for enabling/disabling the device (required)
+	"2x_enable" - gate clock controlling the device for some special platforms (optional)
 
 Optional properties:
 - assigned-clocks: the same with "sdio" clock
-- 
cgit 


From c8ff5351b598cd722c9cf8a84259887bd1af2e0e Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Tue, 4 Jun 2019 16:14:27 +0800
Subject: dt-bindings: mmc: sprd: Add PHY DLL delay documentation

Introduce some PHY DLL delays properties to help to sample the PHY clock.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/sdhci-sprd.txt | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt b/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
index a285c773acd0..e675397fa428 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
@@ -20,6 +20,23 @@ Optional properties:
 - assigned-clocks: the same with "sdio" clock
 - assigned-clock-parents: the default parent of "sdio" clock
 
+PHY DLL delays are used to delay the data valid window, and align the window
+to sampling clock. PHY DLL delays can be configured by following properties,
+and each property contains 4 cells which are used to configure the clock data
+write line delay value, clock read command line delay value, clock read data
+positive edge delay value and clock read data negative edge delay value.
+Each cell's delay value unit is cycle of the PHY clock.
+
+- sprd,phy-delay-legacy: Delay value for legacy timing.
+- sprd,phy-delay-sd-highspeed: Delay value for SD high-speed timing.
+- sprd,phy-delay-sd-uhs-sdr50: Delay value for SD UHS SDR50 timing.
+- sprd,phy-delay-sd-uhs-sdr104: Delay value for SD UHS SDR50 timing.
+- sprd,phy-delay-mmc-highspeed: Delay value for MMC high-speed timing.
+- sprd,phy-delay-mmc-ddr52: Delay value for MMC DDR52 timing.
+- sprd,phy-delay-mmc-hs200: Delay value for MMC HS200 timing.
+- sprd,phy-delay-mmc-hs400: Delay value for MMC HS400 timing.
+- sprd,phy-delay-mmc-hs400es: Delay value for MMC HS400 enhanced strobe timing.
+
 Examples:
 
 sdio0: sdio@20600000 {
@@ -33,6 +50,7 @@ sdio0: sdio@20600000 {
 	assigned-clocks = <&ap_clk CLK_EMMC_2X>;
 	assigned-clock-parents = <&rpll CLK_RPLL_390M>;
 
+	sprd,phy-delay-sd-uhs-sdr104 = <0x3f 0x7f 0x2e 0x2e>;
 	bus-width = <8>;
 	non-removable;
 	no-sdio;
-- 
cgit 


From 51bd6f291583684f495ea498984dfc22049d7fd2 Mon Sep 17 00:00:00 2001
From: Asmaa Mnebhi <Asmaa@mellanox.com>
Date: Mon, 10 Jun 2019 14:57:02 -0400
Subject: Add support for IPMB driver

Support receiving IPMB requests on a Satellite MC from the BMC.
Once a response is ready, this driver will send back a response
to the BMC via the IPMB channel.

Signed-off-by: Asmaa Mnebhi <Asmaa@mellanox.com>
Acked-by: vadimp@mellanox.com
Message-Id: <319690553a0da2a1e80b400941341081b383e5f1.1560192707.git.Asmaa@mellanox.com>
[Move the config option to outside the ipmi msghandler, as it's not
 dependent on that.  Fixed one small whitespace issue.]
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 Documentation/IPMB.txt | 103 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 Documentation/IPMB.txt

(limited to 'Documentation')

diff --git a/Documentation/IPMB.txt b/Documentation/IPMB.txt
new file mode 100644
index 000000000000..a6ed8b68bd0f
--- /dev/null
+++ b/Documentation/IPMB.txt
@@ -0,0 +1,103 @@
+==============================
+IPMB Driver for a Satellite MC
+==============================
+
+The Intelligent Platform Management Bus or IPMB, is an
+I2C bus that provides a standardized interconnection between
+different boards within a chassis. This interconnection is
+between the baseboard management (BMC) and chassis electronics.
+IPMB is also associated with the messaging protocol through the
+IPMB bus.
+
+The devices using the IPMB are usually management
+controllers that perform management functions such as servicing
+the front panel interface, monitoring the baseboard,
+hot-swapping disk drivers in the system chassis, etc...
+
+When an IPMB is implemented in the system, the BMC serves as
+a controller to give system software access to the IPMB. The BMC
+sends IPMI requests to a device (usually a Satellite Management
+Controller or Satellite MC) via IPMB and the device
+sends a response back to the BMC.
+
+For more information on IPMB and the format of an IPMB message,
+refer to the IPMB and IPMI specifications.
+
+IPMB driver for Satellite MC
+----------------------------
+
+ipmb-dev-int - This is the driver needed on a Satellite MC to
+receive IPMB messages from a BMC and send a response back.
+This driver works with the I2C driver and a userspace
+program such as OpenIPMI:
+
+1) It is an I2C slave backend driver. So, it defines a callback
+function to set the Satellite MC as an I2C slave.
+This callback function handles the received IPMI requests.
+
+2) It defines the read and write functions to enable a user
+space program (such as OpenIPMI) to communicate with the kernel.
+
+
+Load the IPMB driver
+--------------------
+
+The driver needs to be loaded at boot time or manually first.
+First, make sure you have the following in your config file:
+CONFIG_IPMB_DEVICE_INTERFACE=y
+
+1) If you want the driver to be loaded at boot time:
+
+a) Add this entry to your ACPI table, under the appropriate SMBus:
+
+Device (SMB0) // Example SMBus host controller
+{
+  Name (_HID, "<Vendor-Specific HID>") // Vendor-Specific HID
+  Name (_UID, 0) // Unique ID of particular host controller
+  :
+  :
+    Device (IPMB)
+    {
+      Name (_HID, "IPMB0001") // IPMB device interface
+      Name (_UID, 0) // Unique device identifier
+    }
+}
+
+b) Example for device tree:
+
+&i2c2 {
+         status = "okay";
+
+         ipmb@10 {
+                 compatible = "ipmb-dev";
+                 reg = <0x10>;
+         };
+};
+
+2) Manually from Linux:
+modprobe ipmb-dev-int
+
+
+Instantiate the device
+----------------------
+
+After loading the driver, you can instantiate the device as
+described in 'Documentation/i2c/instantiating-devices'.
+If you have multiple BMCs, each connected to your Satellite MC via
+a different I2C bus, you can instantiate a device for each of
+those BMCs.
+The name of the instantiated device contains the I2C bus number
+associated with it as follows:
+
+BMC1 ------ IPMB/I2C bus 1 ---------|   /dev/ipmb-1
+				Satellite MC
+BMC1 ------ IPMB/I2C bus 2 ---------|   /dev/ipmb-2
+
+For instance, you can instantiate the ipmb-dev-int device from
+user space at the 7 bit address 0x10 on bus 2:
+
+  # echo ipmb-dev 0x1010 > /sys/bus/i2c/devices/i2c-2/new_device
+
+This will create the device file /dev/ipmb-2, which can be accessed
+by the user space program. The device needs to be instantiated
+before running the user space program.
-- 
cgit 


From dc3988f40fdf7a51bd5480c3383372f463e4dfa9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sat, 8 Jun 2019 23:27:15 -0300
Subject: docs: Debugging390.txt: convert table to ascii artwork

The first bit/value table inside the document is very
hard to read and won't fit ReST format. Also, some columns aren't
properly aligned.

Convert it to a nice ascii artwork table with makes it easier to
read as plain text and is compatible with ReST format parser
on Sphinx.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 Documentation/s390/Debugging390.txt | 210 ++++++++++++++++++++----------------
 1 file changed, 120 insertions(+), 90 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
index 5ae7f868a007..c35804c238ad 100644
--- a/Documentation/s390/Debugging390.txt
+++ b/Documentation/s390/Debugging390.txt
@@ -78,96 +78,126 @@ e.g. switching address translation off requires that you
 have a logical=physical mapping for the address you are
 currently running at.
 
-      Bit           Value
-s/390 z/Architecture
-0       0     Reserved ( must be 0 ) otherwise specification exception occurs.
-
-1       1     Program Event Recording 1 PER enabled, 
-	      PER is used to facilitate debugging e.g. single stepping.
-
-2-4    2-4    Reserved ( must be 0 ). 
-
-5       5     Dynamic address translation 1=DAT on.
-
-6       6     Input/Output interrupt Mask
-
-7	7     External interrupt Mask used primarily for interprocessor
-	      signalling and clock interrupts.
-
-8-11  8-11    PSW Key used for complex memory protection mechanism
-	      (not used under linux)
-
-12      12    1 on s/390 0 on z/Architecture
-
-13      13    Machine Check Mask 1=enable machine check interrupts
-
-14	14    Wait State. Set this to 1 to stop the processor except for
-	      interrupts and give  time to other LPARS. Used in CPU idle in
-	      the kernel to increase overall usage of processor resources.
-
-15      15    Problem state ( if set to 1 certain instructions are disabled )
-	      all linux user programs run with this bit 1 
-	      ( useful info for debugging under VM ).
-
-16-17 16-17   Address Space Control
-
-	      00 Primary Space Mode:
-	      The register CR1 contains the primary address-space control ele-
-	      ment (PASCE), which points to the primary space region/segment
-	      table origin.
-
-	      01 Access register mode
-
-	      10 Secondary Space Mode:
-	      The register CR7 contains the secondary address-space control
-	      element (SASCE), which points to the secondary space region or
-	      segment table origin.
-
-	      11 Home Space Mode:
-	      The register CR13 contains the home space address-space control
-	      element (HASCE), which points to the home space region/segment
-	      table origin.
-
-	      See "Address Spaces on Linux for s/390 & z/Architecture" below
-	      for more information about address space usage in Linux.
-
-18-19 18-19   Condition codes (CC)
-
-20    20      Fixed point overflow mask if 1=FPU exceptions for this event 
-              occur ( normally 0 ) 
-
-21    21      Decimal overflow mask if 1=FPU exceptions for this event occur 
-              ( normally 0 )
-
-22    22      Exponent underflow mask if 1=FPU exceptions for this event occur 
-              ( normally 0 )
-
-23    23      Significance Mask if 1=FPU exceptions for this event occur 
-              ( normally 0 )
-
-24-31 24-30   Reserved Must be 0.
-
-      31      Extended Addressing Mode
-      32      Basic Addressing Mode
-              Used to set addressing mode
-	      PSW 31   PSW 32
-                0         0        24 bit
-                0         1        31 bit
-                1         1        64 bit
-
-32             1=31 bit addressing mode 0=24 bit addressing mode (for backward 
-               compatibility), linux always runs with this bit set to 1
-
-33-64          Instruction address.
-      33-63    Reserved must be 0
-      64-127   Address
-               In 24 bits mode bits 64-103=0 bits 104-127 Address 
-               In 31 bits mode bits 64-96=0 bits 97-127 Address
-               Note: unlike 31 bit mode on s/390 bit 96 must be zero
-	       when loading the address with LPSWE otherwise a 
-               specification exception occurs, LPSW is fully backward
-               compatible.
-
++-------------------------+-------------------------------------------------+
+|          Bit            |                                                 |
++--------+----------------+                     Value                       |
+| s/390  | z/Architecture |                                                 |
++========+================+=================================================+
+| 0      |     0          | Reserved (must be 0) otherwise specification    |
+|        |                | exception occurs.                               |
++--------+----------------+-------------------------------------------------+
+| 1      |     1          | Program Event Recording 1 PER enabled,          |
+|        |                | PER is used to facilitate debugging e.g.        |
+|        |                | single stepping.                                |
++--------+----------------+-------------------------------------------------+
+| 2-4    |    2-4         | Reserved (must be 0).                           |
++--------+----------------+-------------------------------------------------+
+| 5      |     5          | Dynamic address translation 1=DAT on.           |
++--------+----------------+-------------------------------------------------+
+| 6      |     6          | Input/Output interrupt Mask                     |
++--------+----------------+-------------------------------------------------+
+| 7      |     7          | External interrupt Mask used primarily for      |
+|        |                | interprocessor signalling and clock interrupts. |
++--------+----------------+-------------------------------------------------+
+| 8-11   |   8-11         | PSW Key used for complex memory protection      |
+|        |                | mechanism (not used under linux)                |
++--------+----------------+-------------------------------------------------+
+| 12     |     12         | 1 on s/390 0 on z/Architecture                  |
++--------+----------------+-------------------------------------------------+
+| 13     |     13         | Machine Check Mask 1=enable machine check       |
+|        |                | interrupts                                      |
++--------+----------------+-------------------------------------------------+
+| 14     |     14         | Wait State. Set this to 1 to stop the processor |
+|        |                | except for interrupts and give  time to other   |
+|        |                | LPARS. Used in CPU idle in the kernel to        |
+|        |                | increase overall usage of processor resources.  |
++--------+----------------+-------------------------------------------------+
+| 15     |     15         | Problem state (if set to 1 certain instructions |
+|        |                | are disabled). All linux user programs run with |
+|        |                | this bit 1 (useful info for debugging under VM).|
++--------+----------------+-------------------------------------------------+
+| 16-17  |    16-17       | Address Space Control                           |
+|        |                |                                                 |
+|        |                | 00 Primary Space Mode:                          |
+|        |                |                                                 |
+|        |                | The register CR1 contains the primary           |
+|        |                | address-space control element (PASCE), which    |
+|        |                | points to the primary space region/segment      |
+|        |                | table origin.                                   |
+|        |                |                                                 |
+|        |                | 01 Access register mode                         |
+|        |                |                                                 |
+|        |                | 10 Secondary Space Mode:                        |
+|        |                |                                                 |
+|        |                | The register CR7 contains the secondary         |
+|        |                | address-space control element (SASCE), which    |
+|        |                | points to the secondary space region or         |
+|        |                | segment table origin.                           |
+|        |                |                                                 |
+|        |                | 11 Home Space Mode:                             |
+|        |                |                                                 |
+|        |                | The register CR13 contains the home space       |
+|        |                | address-space control element (HASCE), which    |
+|        |                | points to the home space region/segment         |
+|        |                | table origin.                                   |
+|        |                |                                                 |
+|        |                | See "Address Spaces on Linux for s/390 &        |
+|        |                | z/Architecture" below for more information      |
+|        |                | about address space usage in Linux.             |
++--------+----------------+-------------------------------------------------+
+| 18-19  |    18-19       | Condition codes (CC)                            |
++--------+----------------+-------------------------------------------------+
+| 20     |    20          | Fixed point overflow mask if 1=FPU exceptions   |
+|        |                | for this event occur (normally 0)               |
++--------+----------------+-------------------------------------------------+
+| 21     |    21          | Decimal overflow mask if 1=FPU exceptions for   |
+|        |                | this event occur (normally 0)                   |
++--------+----------------+-------------------------------------------------+
+| 22     |    22          | Exponent underflow mask if 1=FPU exceptions     |
+|        |                | for this event occur (normally 0)               |
++--------+----------------+-------------------------------------------------+
+| 23     |    23          | Significance Mask if 1=FPU exceptions for this  |
+|        |                | event occur (normally 0)                        |
++--------+----------------+-------------------------------------------------+
+| 24-31  |    24-30       | Reserved Must be 0.                             |
+|        +----------------+-------------------------------------------------+
+|        |    31          | Extended Addressing Mode                        |
+|        +----------------+-------------------------------------------------+
+|        |    32          | Basic Addressing Mode                           |
+|        |                |                                                 |
+|        |                | Used to set addressing mode                     |
+|        |                |                                                 |
+|        |                |    +---------+----------+----------+            |
+|        |                |    | PSW 31  | PSW 32   |          |            |
+|        |                |    +---------+----------+----------+            |
+|        |                |    |   0     |    0     |  24 bit  |            |
+|        |                |    +---------+----------+----------+            |
+|        |                |    |   0     |    1     |  31 bit  |            |
+|        |                |    +---------+----------+----------+            |
+|        |                |    |   1     |    1     |  64 bit  |            |
+|        |                |    +---------+----------+----------+            |
++--------+----------------+-------------------------------------------------+
+| 32     |                | 1=31 bit addressing mode 0=24 bit addressing    |
+|        |                | mode (for backward compatibility), linux        |
+|        |                | always runs with this bit set to 1              |
++--------+----------------+-------------------------------------------------+
+| 33-64  |                | Instruction address.                            |
+|        +----------------+-------------------------------------------------+
+|        |    33-63       | Reserved must be 0                              |
+|        +----------------+-------------------------------------------------+
+|        |    64-127      | Address                                         |
+|        |                |                                                 |
+|        |                |   - In 24 bits mode bits 64-103=0 bits 104-127  |
+|        |                |     Address                                     |
+|        |                |   - In 31 bits mode bits 64-96=0 bits 97-127    |
+|        |                |     Address                                     |
+|        |                |                                                 |
+|        |                | Note:                                           |
+|        |                |     unlike 31 bit mode on s/390 bit 96 must be  |
+|        |                |     zero when loading the address with LPSWE    |
+|        |                |     otherwise a specification exception occurs, |
+|        |                |     LPSW is fully backward compatible.          |
++--------+----------------+-------------------------------------------------+
 
 Prefix Page(s)
 --------------
-- 
cgit 


From 8b4a503d659b32cae8266aeb306f7fd6717e6a53 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sat, 8 Jun 2019 23:27:16 -0300
Subject: docs: s390: convert docs to ReST and rename to *.rst

Convert all text files with s390 documentation to ReST format.

Tried to preserve as much as possible the original document
format. Still, some of the files required some work in order
for it to be visible on both plain text and after converted
to html.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 Documentation/admin-guide/kernel-parameters.txt |    4 +-
 Documentation/driver-api/s390-drivers.rst       |    4 +-
 Documentation/s390/3270.rst                     |  298 +++
 Documentation/s390/3270.txt                     |  271 ---
 Documentation/s390/CommonIO                     |  125 --
 Documentation/s390/DASD                         |   73 -
 Documentation/s390/Debugging390.txt             | 2172 -------------------
 Documentation/s390/cds.rst                      |  530 +++++
 Documentation/s390/cds.txt                      |  472 ----
 Documentation/s390/common_io.rst                |  140 ++
 Documentation/s390/dasd.rst                     |   84 +
 Documentation/s390/debugging390.rst             | 2613 +++++++++++++++++++++++
 Documentation/s390/driver-model.rst             |  328 +++
 Documentation/s390/driver-model.txt             |  287 ---
 Documentation/s390/index.rst                    |   30 +
 Documentation/s390/monreader.rst                |  212 ++
 Documentation/s390/monreader.txt                |  197 --
 Documentation/s390/qeth.rst                     |   64 +
 Documentation/s390/qeth.txt                     |   50 -
 Documentation/s390/s390dbf.rst                  |  803 +++++++
 Documentation/s390/s390dbf.txt                  |  667 ------
 Documentation/s390/text_files.rst               |   11 +
 Documentation/s390/vfio-ap.rst                  |  866 ++++++++
 Documentation/s390/vfio-ap.txt                  |  837 --------
 Documentation/s390/vfio-ccw.rst                 |  326 +++
 Documentation/s390/vfio-ccw.txt                 |  300 ---
 Documentation/s390/zfcpdump.rst                 |   50 +
 Documentation/s390/zfcpdump.txt                 |   48 -
 28 files changed, 6359 insertions(+), 5503 deletions(-)
 create mode 100644 Documentation/s390/3270.rst
 delete mode 100644 Documentation/s390/3270.txt
 delete mode 100644 Documentation/s390/CommonIO
 delete mode 100644 Documentation/s390/DASD
 delete mode 100644 Documentation/s390/Debugging390.txt
 create mode 100644 Documentation/s390/cds.rst
 delete mode 100644 Documentation/s390/cds.txt
 create mode 100644 Documentation/s390/common_io.rst
 create mode 100644 Documentation/s390/dasd.rst
 create mode 100644 Documentation/s390/debugging390.rst
 create mode 100644 Documentation/s390/driver-model.rst
 delete mode 100644 Documentation/s390/driver-model.txt
 create mode 100644 Documentation/s390/index.rst
 create mode 100644 Documentation/s390/monreader.rst
 delete mode 100644 Documentation/s390/monreader.txt
 create mode 100644 Documentation/s390/qeth.rst
 delete mode 100644 Documentation/s390/qeth.txt
 create mode 100644 Documentation/s390/s390dbf.rst
 delete mode 100644 Documentation/s390/s390dbf.txt
 create mode 100644 Documentation/s390/text_files.rst
 create mode 100644 Documentation/s390/vfio-ap.rst
 delete mode 100644 Documentation/s390/vfio-ap.txt
 create mode 100644 Documentation/s390/vfio-ccw.rst
 delete mode 100644 Documentation/s390/vfio-ccw.txt
 create mode 100644 Documentation/s390/zfcpdump.rst
 delete mode 100644 Documentation/s390/zfcpdump.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..b9b0623be925 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -478,7 +478,7 @@
 			others).
 
 	ccw_timeout_log	[S390]
-			See Documentation/s390/CommonIO for details.
+			See Documentation/s390/common_io.rst for details.
 
 	cgroup_disable=	[KNL] Disable a particular controller
 			Format: {name of the controller(s) to disable}
@@ -516,7 +516,7 @@
 				/selinux/checkreqprot.
 
 	cio_ignore=	[S390]
-			See Documentation/s390/CommonIO for details.
+			See Documentation/s390/common_io.rst for details.
 	clk_ignore_unused
 			[CLK]
 			Prevents the clock framework from automatically gating
diff --git a/Documentation/driver-api/s390-drivers.rst b/Documentation/driver-api/s390-drivers.rst
index 30e6aa7e160b..5158577bc29b 100644
--- a/Documentation/driver-api/s390-drivers.rst
+++ b/Documentation/driver-api/s390-drivers.rst
@@ -27,7 +27,7 @@ not strictly considered I/O devices. They are considered here as well,
 although they are not the focus of this document.
 
 Some additional information can also be found in the kernel source under
-Documentation/s390/driver-model.txt.
+Documentation/s390/driver-model.rst.
 
 The css bus
 ===========
@@ -38,7 +38,7 @@ into several categories:
 * Standard I/O subchannels, for use by the system. They have a child
   device on the ccw bus and are described below.
 * I/O subchannels bound to the vfio-ccw driver. See
-  Documentation/s390/vfio-ccw.txt.
+  Documentation/s390/vfio-ccw.rst.
 * Message subchannels. No Linux driver currently exists.
 * CHSC subchannels (at most one). The chsc subchannel driver can be used
   to send asynchronous chsc commands.
diff --git a/Documentation/s390/3270.rst b/Documentation/s390/3270.rst
new file mode 100644
index 000000000000..e09e77954238
--- /dev/null
+++ b/Documentation/s390/3270.rst
@@ -0,0 +1,298 @@
+===============================
+IBM 3270 Display System support
+===============================
+
+This file describes the driver that supports local channel attachment
+of IBM 3270 devices.  It consists of three sections:
+
+	* Introduction
+	* Installation
+	* Operation
+
+
+Introduction
+============
+
+This paper describes installing and operating 3270 devices under
+Linux/390.  A 3270 device is a block-mode rows-and-columns terminal of
+which I'm sure hundreds of millions were sold by IBM and clonemakers
+twenty and thirty years ago.
+
+You may have 3270s in-house and not know it.  If you're using the
+VM-ESA operating system, define a 3270 to your virtual machine by using
+the command "DEF GRAF <hex-address>"  This paper presumes you will be
+defining four 3270s with the CP/CMS commands:
+
+	- DEF GRAF 620
+	- DEF GRAF 621
+	- DEF GRAF 622
+	- DEF GRAF 623
+
+Your network connection from VM-ESA allows you to use x3270, tn3270, or
+another 3270 emulator, started from an xterm window on your PC or
+workstation.  With the DEF GRAF command, an application such as xterm,
+and this Linux-390 3270 driver, you have another way of talking to your
+Linux box.
+
+This paper covers installation of the driver and operation of a
+dialed-in x3270.
+
+
+Installation
+============
+
+You install the driver by installing a patch, doing a kernel build, and
+running the configuration script (config3270.sh, in this directory).
+
+WARNING:  If you are using 3270 console support, you must rerun the
+configuration script every time you change the console's address (perhaps
+by using the condev= parameter in silo's /boot/parmfile).  More precisely,
+you should rerun the configuration script every time your set of 3270s,
+including the console 3270, changes subchannel identifier relative to
+one another.  ReIPL as soon as possible after running the configuration
+script and the resulting /tmp/mkdev3270.
+
+If you have chosen to make tub3270 a module, you add a line to a
+configuration file under /etc/modprobe.d/.  If you are working on a VM
+virtual machine, you can use DEF GRAF to define virtual 3270 devices.
+
+You may generate both 3270 and 3215 console support, or one or the
+other, or neither.  If you generate both, the console type under VM is
+not changed.  Use #CP Q TERM to see what the current console type is.
+Use #CP TERM CONMODE 3270 to change it to 3270.  If you generate only
+3270 console support, then the driver automatically converts your console
+at boot time to a 3270 if it is a 3215.
+
+In brief, these are the steps:
+
+	1. Install the tub3270 patch
+	2. (If a module) add a line to a file in `/etc/modprobe.d/*.conf`
+	3. (If VM) define devices with DEF GRAF
+	4. Reboot
+	5. Configure
+
+To test that everything works, assuming VM and x3270,
+
+	1. Bring up an x3270 window.
+	2. Use the DIAL command in that window.
+	3. You should immediately see a Linux login screen.
+
+Here are the installation steps in detail:
+
+	1.  The 3270 driver is a part of the official Linux kernel
+	source.  Build a tree with the kernel source and any necessary
+	patches.  Then do::
+
+		make oldconfig
+		(If you wish to disable 3215 console support, edit
+		.config; change CONFIG_TN3215's value to "n";
+		and rerun "make oldconfig".)
+		make image
+		make modules
+		make modules_install
+
+	2. (Perform this step only if you have configured tub3270 as a
+	module.)  Add a line to a file `/etc/modprobe.d/*.conf` to automatically
+	load the driver when it's needed.  With this line added, you will see
+	login prompts appear on your 3270s as soon as boot is complete (or
+	with emulated 3270s, as soon as you dial into your vm guest using the
+	command "DIAL <vmguestname>").  Since the line-mode major number is
+	227, the line to add should be::
+
+		alias char-major-227 tub3270
+
+	3. Define graphic devices to your vm guest machine, if you
+	haven't already.  Define them before you reboot (reipl):
+
+		- DEFINE GRAF 620
+		- DEFINE GRAF 621
+		- DEFINE GRAF 622
+		- DEFINE GRAF 623
+
+	4. Reboot.  The reboot process scans hardware devices, including
+	3270s, and this enables the tub3270 driver once loaded to respond
+	correctly to the configuration requests of the next step.  If
+	you have chosen 3270 console support, your console now behaves
+	as a 3270, not a 3215.
+
+	5. Run the 3270 configuration script config3270.  It is
+	distributed in this same directory, Documentation/s390, as
+	config3270.sh.  Inspect the output script it produces,
+	/tmp/mkdev3270, and then run that script.  This will create the
+	necessary character special device files and make the necessary
+	changes to /etc/inittab.
+
+	Then notify /sbin/init that /etc/inittab has changed, by issuing
+	the telinit command with the q operand::
+
+		cd Documentation/s390
+		sh config3270.sh
+		sh /tmp/mkdev3270
+		telinit q
+
+	This should be sufficient for your first time.  If your 3270
+	configuration has changed and you're reusing config3270, you
+	should follow these steps::
+
+		Change 3270 configuration
+		Reboot
+		Run config3270 and /tmp/mkdev3270
+		Reboot
+
+Here are the testing steps in detail:
+
+	1. Bring up an x3270 window, or use an actual hardware 3278 or
+	3279, or use the 3270 emulator of your choice.  You would be
+	running the emulator on your PC or workstation.  You would use
+	the command, for example::
+
+		x3270 vm-esa-domain-name &
+
+	if you wanted a 3278 Model 4 with 43 rows of 80 columns, the
+	default model number.  The driver does not take advantage of
+	extended attributes.
+
+	The screen you should now see contains a VM logo with input
+	lines near the bottom.  Use TAB to move to the bottom line,
+	probably labeled "COMMAND  ===>".
+
+	2. Use the DIAL command instead of the LOGIN command to connect
+	to one of the virtual 3270s you defined with the DEF GRAF
+	commands::
+
+		dial my-vm-guest-name
+
+	3. You should immediately see a login prompt from your
+	Linux-390 operating system.  If that does not happen, you would
+	see instead the line "DIALED TO my-vm-guest-name   0620".
+
+	To troubleshoot:  do these things.
+
+	A. Is the driver loaded?  Use the lsmod command (no operands)
+	to find out.  Probably it isn't.  Try loading it manually, with
+	the command "insmod tub3270".  Does that command give error
+	messages?  Ha!  There's your problem.
+
+	B. Is the /etc/inittab file modified as in installation step 3
+	above?  Use the grep command to find out; for instance, issue
+	"grep 3270 /etc/inittab".  Nothing found?  There's your
+	problem!
+
+	C. Are the device special files created, as in installation
+	step 2 above?  Use the ls -l command to find out; for instance,
+	issue "ls -l /dev/3270/tty620".  The output should start with the
+	letter "c" meaning character device and should contain "227, 1"
+	just to the left of the device name.  No such file?  no "c"?
+	Wrong major number?  Wrong minor number?  There's your
+	problem!
+
+	D. Do you get the message::
+
+		 "HCPDIA047E my-vm-guest-name 0620 does not exist"?
+
+	If so, you must issue the command "DEF GRAF 620" from your VM
+	3215 console and then reboot the system.
+
+
+
+OPERATION.
+==========
+
+The driver defines three areas on the 3270 screen:  the log area, the
+input area, and the status area.
+
+The log area takes up all but the bottom two lines of the screen.  The
+driver writes terminal output to it, starting at the top line and going
+down.  When it fills, the status area changes from "Linux Running" to
+"Linux More...".  After a scrolling timeout of (default) 5 sec, the
+screen clears and more output is written, from the top down.
+
+The input area extends from the beginning of the second-to-last screen
+line to the start of the status area.  You type commands in this area
+and hit ENTER to execute them.
+
+The status area initializes to "Linux Running" to give you a warm
+fuzzy feeling.  When the log area fills up and output awaits, it
+changes to "Linux More...".  At this time you can do several things or
+nothing.  If you do nothing, the screen will clear in (default) 5 sec
+and more output will appear.  You may hit ENTER with nothing typed in
+the input area to toggle between "Linux More..." and "Linux Holding",
+which indicates no scrolling will occur.  (If you hit ENTER with "Linux
+Running" and nothing typed, the application receives a newline.)
+
+You may change the scrolling timeout value.  For example, the following
+command line::
+
+	echo scrolltime=60 > /proc/tty/driver/tty3270
+
+changes the scrolling timeout value to 60 sec.  Set scrolltime to 0 if
+you wish to prevent scrolling entirely.
+
+Other things you may do when the log area fills up are:  hit PA2 to
+clear the log area and write more output to it, or hit CLEAR to clear
+the log area and the input area and write more output to the log area.
+
+Some of the Program Function (PF) and Program Attention (PA) keys are
+preassigned special functions.  The ones that are not yield an alarm
+when pressed.
+
+PA1 causes a SIGINT to the currently running application.  You may do
+the same thing from the input area, by typing "^C" and hitting ENTER.
+
+PA2 causes the log area to be cleared.  If output awaits, it is then
+written to the log area.
+
+PF3 causes an EOF to be received as input by the application.  You may
+cause an EOF also by typing "^D" and hitting ENTER.
+
+No PF key is preassigned to cause a job suspension, but you may cause a
+job suspension by typing "^Z" and hitting ENTER.  You may wish to
+assign this function to a PF key.  To make PF7 cause job suspension,
+execute the command::
+
+	echo pf7=^z > /proc/tty/driver/tty3270
+
+If the input you type does not end with the two characters "^n", the
+driver appends a newline character and sends it to the tty driver;
+otherwise the driver strips the "^n" and does not append a newline.
+The IBM 3215 driver behaves similarly.
+
+Pf10 causes the most recent command to be retrieved from the tube's
+command stack (default depth 20) and displayed in the input area.  You
+may hit PF10 again for the next-most-recent command, and so on.  A
+command is entered into the stack only when the input area is not made
+invisible (such as for password entry) and it is not identical to the
+current top entry.  PF10 rotates backward through the command stack;
+PF11 rotates forward.  You may assign the backward function to any PF
+key (or PA key, for that matter), say, PA3, with the command::
+
+	echo -e pa3=\\033k > /proc/tty/driver/tty3270
+
+This assigns the string ESC-k to PA3.  Similarly, the string ESC-j
+performs the forward function.  (Rationale:  In bash with vi-mode line
+editing, ESC-k and ESC-j retrieve backward and forward history.
+Suggestions welcome.)
+
+Is a stack size of twenty commands not to your liking?  Change it on
+the fly.  To change to saving the last 100 commands, execute the
+command::
+
+	echo recallsize=100 > /proc/tty/driver/tty3270
+
+Have a command you issue frequently?  Assign it to a PF or PA key!  Use
+the command::
+
+	echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270
+
+to execute the commands mkdir foobar and cd foobar immediately when you
+hit PF24.  Want to see the command line first, before you execute it?
+Use the -n option of the echo command::
+
+	echo -n pf24="mkdir foo; cd foo" > /proc/tty/driver/tty3270
+
+
+
+Happy testing!  I welcome any and all comments about this document, the
+driver, etc etc.
+
+Dick Hitt <rbh00@utsglobal.com>
diff --git a/Documentation/s390/3270.txt b/Documentation/s390/3270.txt
deleted file mode 100644
index 7c715de99774..000000000000
--- a/Documentation/s390/3270.txt
+++ /dev/null
@@ -1,271 +0,0 @@
-IBM 3270 Display System support
-
-This file describes the driver that supports local channel attachment
-of IBM 3270 devices.  It consists of three sections:
-	* Introduction
-	* Installation
-	* Operation
-
-
-INTRODUCTION.
-
-This paper describes installing and operating 3270 devices under
-Linux/390.  A 3270 device is a block-mode rows-and-columns terminal of
-which I'm sure hundreds of millions were sold by IBM and clonemakers
-twenty and thirty years ago.
-
-You may have 3270s in-house and not know it.  If you're using the
-VM-ESA operating system, define a 3270 to your virtual machine by using
-the command "DEF GRAF <hex-address>"  This paper presumes you will be
-defining four 3270s with the CP/CMS commands
-
-	DEF GRAF 620
-	DEF GRAF 621
-	DEF GRAF 622
-	DEF GRAF 623
-
-Your network connection from VM-ESA allows you to use x3270, tn3270, or
-another 3270 emulator, started from an xterm window on your PC or
-workstation.  With the DEF GRAF command, an application such as xterm,
-and this Linux-390 3270 driver, you have another way of talking to your
-Linux box.
-
-This paper covers installation of the driver and operation of a
-dialed-in x3270.
-
-
-INSTALLATION.
-
-You install the driver by installing a patch, doing a kernel build, and
-running the configuration script (config3270.sh, in this directory).
-
-WARNING:  If you are using 3270 console support, you must rerun the
-configuration script every time you change the console's address (perhaps
-by using the condev= parameter in silo's /boot/parmfile).  More precisely,
-you should rerun the configuration script every time your set of 3270s,
-including the console 3270, changes subchannel identifier relative to
-one another.  ReIPL as soon as possible after running the configuration
-script and the resulting /tmp/mkdev3270.
-
-If you have chosen to make tub3270 a module, you add a line to a
-configuration file under /etc/modprobe.d/.  If you are working on a VM
-virtual machine, you can use DEF GRAF to define virtual 3270 devices.
-
-You may generate both 3270 and 3215 console support, or one or the
-other, or neither.  If you generate both, the console type under VM is
-not changed.  Use #CP Q TERM to see what the current console type is.
-Use #CP TERM CONMODE 3270 to change it to 3270.  If you generate only
-3270 console support, then the driver automatically converts your console
-at boot time to a 3270 if it is a 3215.
-
-In brief, these are the steps:
-	1. Install the tub3270 patch
-	2. (If a module) add a line to a file in /etc/modprobe.d/*.conf
-	3. (If VM) define devices with DEF GRAF
-	4. Reboot
-	5. Configure
-
-To test that everything works, assuming VM and x3270,
-	1. Bring up an x3270 window.
-	2. Use the DIAL command in that window.
-	3. You should immediately see a Linux login screen.
-
-Here are the installation steps in detail:
-
-	1.  The 3270 driver is a part of the official Linux kernel
-	source.  Build a tree with the kernel source and any necessary
-	patches.  Then do
-		make oldconfig
-		(If you wish to disable 3215 console support, edit
-		.config; change CONFIG_TN3215's value to "n";
-		and rerun "make oldconfig".)
-		make image
-		make modules
-		make modules_install
-
-	2. (Perform this step only if you have configured tub3270 as a
-	module.)  Add a line to a file /etc/modprobe.d/*.conf to automatically
-	load the driver when it's needed.  With this line added, you will see
-	login prompts appear on your 3270s as soon as boot is complete (or
-	with emulated 3270s, as soon as you dial into your vm guest using the
-	command "DIAL <vmguestname>").  Since the line-mode major number is
-	227, the line to add should be:
-		alias char-major-227 tub3270
-
-	3. Define graphic devices to your vm guest machine, if you
-	haven't already.  Define them before you reboot (reipl):
-		DEFINE GRAF 620
-		DEFINE GRAF 621
-		DEFINE GRAF 622
-		DEFINE GRAF 623
-
-	4. Reboot.  The reboot process scans hardware devices, including
-	3270s, and this enables the tub3270 driver once loaded to respond
-	correctly to the configuration requests of the next step.  If
-	you have chosen 3270 console support, your console now behaves
-	as a 3270, not a 3215.
-
-	5. Run the 3270 configuration script config3270.  It is
-	distributed in this same directory, Documentation/s390, as
-	config3270.sh.	Inspect the output script it produces,
-	/tmp/mkdev3270, and then run that script.  This will create the
-	necessary character special device files and make the necessary
-	changes to /etc/inittab.
-
-	Then notify /sbin/init that /etc/inittab has changed, by issuing
-	the telinit command with the q operand:
-		cd Documentation/s390
-		sh config3270.sh
-		sh /tmp/mkdev3270
-		telinit q
-
-	This should be sufficient for your first time.	If your 3270
-	configuration has changed and you're reusing config3270, you
-	should follow these steps:
-		Change 3270 configuration
-		Reboot
-		Run config3270 and /tmp/mkdev3270
-		Reboot
-
-Here are the testing steps in detail:
-
-	1. Bring up an x3270 window, or use an actual hardware 3278 or
-	3279, or use the 3270 emulator of your choice.  You would be
-	running the emulator on your PC or workstation.  You would use
-	the command, for example,
-		x3270 vm-esa-domain-name &
-	if you wanted a 3278 Model 4 with 43 rows of 80 columns, the
-	default model number.  The driver does not take advantage of
-	extended attributes.
-
-	The screen you should now see contains a VM logo with input
-	lines near the bottom.  Use TAB to move to the bottom line,
-	probably labeled "COMMAND  ===>".
-
-	2. Use the DIAL command instead of the LOGIN command to connect
-	to one of the virtual 3270s you defined with the DEF GRAF
-	commands:
-		dial my-vm-guest-name
-
-	3. You should immediately see a login prompt from your
-	Linux-390 operating system.  If that does not happen, you would
-	see instead the line "DIALED TO my-vm-guest-name   0620".
-
-	To troubleshoot:  do these things.
-
-	A. Is the driver loaded?  Use the lsmod command (no operands)
-	to find out.  Probably it isn't.  Try loading it manually, with
-	the command "insmod tub3270".  Does that command give error
-	messages?  Ha!  There's your problem.
-
-	B. Is the /etc/inittab file modified as in installation step 3
-	above?  Use the grep command to find out; for instance, issue
-	"grep 3270 /etc/inittab".  Nothing found?  There's your
-	problem!
-
-	C. Are the device special files created, as in installation
-	step 2 above?  Use the ls -l command to find out; for instance,
-	issue "ls -l /dev/3270/tty620".  The output should start with the
-	letter "c" meaning character device and should contain "227, 1"
-	just to the left of the device name.  No such file?  no "c"?
-	Wrong major number?  Wrong minor number?  There's your
-	problem!
-
-	D. Do you get the message
-		 "HCPDIA047E my-vm-guest-name 0620 does not exist"?
-	If so, you must issue the command "DEF GRAF 620" from your VM
-	3215 console and then reboot the system.
-
-
-
-OPERATION.
-
-The driver defines three areas on the 3270 screen:  the log area, the
-input area, and the status area.
-
-The log area takes up all but the bottom two lines of the screen.  The
-driver writes terminal output to it, starting at the top line and going
-down.  When it fills, the status area changes from "Linux Running" to
-"Linux More...".  After a scrolling timeout of (default) 5 sec, the
-screen clears and more output is written, from the top down.
-
-The input area extends from the beginning of the second-to-last screen
-line to the start of the status area.  You type commands in this area
-and hit ENTER to execute them.
-
-The status area initializes to "Linux Running" to give you a warm
-fuzzy feeling.  When the log area fills up and output awaits, it
-changes to "Linux More...".  At this time you can do several things or
-nothing.  If you do nothing, the screen will clear in (default) 5 sec
-and more output will appear.  You may hit ENTER with nothing typed in
-the input area to toggle between "Linux More..." and "Linux Holding",
-which indicates no scrolling will occur.  (If you hit ENTER with "Linux
-Running" and nothing typed, the application receives a newline.)
-
-You may change the scrolling timeout value.  For example, the following
-command line:
-	echo scrolltime=60 > /proc/tty/driver/tty3270
-changes the scrolling timeout value to 60 sec.  Set scrolltime to 0 if
-you wish to prevent scrolling entirely.
-
-Other things you may do when the log area fills up are:  hit PA2 to
-clear the log area and write more output to it, or hit CLEAR to clear
-the log area and the input area and write more output to the log area.
-
-Some of the Program Function (PF) and Program Attention (PA) keys are
-preassigned special functions.  The ones that are not yield an alarm
-when pressed.
-
-PA1 causes a SIGINT to the currently running application.  You may do
-the same thing from the input area, by typing "^C" and hitting ENTER.
-
-PA2 causes the log area to be cleared.  If output awaits, it is then
-written to the log area.
-
-PF3 causes an EOF to be received as input by the application.  You may
-cause an EOF also by typing "^D" and hitting ENTER.
-
-No PF key is preassigned to cause a job suspension, but you may cause a
-job suspension by typing "^Z" and hitting ENTER.  You may wish to
-assign this function to a PF key.  To make PF7 cause job suspension,
-execute the command:
-	echo pf7=^z > /proc/tty/driver/tty3270
-
-If the input you type does not end with the two characters "^n", the
-driver appends a newline character and sends it to the tty driver;
-otherwise the driver strips the "^n" and does not append a newline.
-The IBM 3215 driver behaves similarly.
-
-Pf10 causes the most recent command to be retrieved from the tube's
-command stack (default depth 20) and displayed in the input area.  You
-may hit PF10 again for the next-most-recent command, and so on.  A
-command is entered into the stack only when the input area is not made
-invisible (such as for password entry) and it is not identical to the
-current top entry.  PF10 rotates backward through the command stack;
-PF11 rotates forward.  You may assign the backward function to any PF
-key (or PA key, for that matter), say, PA3, with the command:
-	echo -e pa3=\\033k > /proc/tty/driver/tty3270
-This assigns the string ESC-k to PA3.  Similarly, the string ESC-j
-performs the forward function.  (Rationale:  In bash with vi-mode line
-editing, ESC-k and ESC-j retrieve backward and forward history.
-Suggestions welcome.)
-
-Is a stack size of twenty commands not to your liking?  Change it on
-the fly.  To change to saving the last 100 commands, execute the
-command:
-	echo recallsize=100 > /proc/tty/driver/tty3270
-
-Have a command you issue frequently?  Assign it to a PF or PA key!  Use
-the command
-	echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270 
-to execute the commands mkdir foobar and cd foobar immediately when you
-hit PF24.  Want to see the command line first, before you execute it?
-Use the -n option of the echo command:
-	echo -n pf24="mkdir foo; cd foo" > /proc/tty/driver/tty3270
-
-
-
-Happy testing!  I welcome any and all comments about this document, the
-driver, etc etc.
-
-Dick Hitt <rbh00@utsglobal.com>
diff --git a/Documentation/s390/CommonIO b/Documentation/s390/CommonIO
deleted file mode 100644
index 6e0f63f343b4..000000000000
--- a/Documentation/s390/CommonIO
+++ /dev/null
@@ -1,125 +0,0 @@
-S/390 common I/O-Layer - command line parameters, procfs and debugfs entries
-============================================================================
-
-Command line parameters
------------------------
-
-* ccw_timeout_log
-
-  Enable logging of debug information in case of ccw device timeouts.
-
-* cio_ignore = device[,device[,..]]
-
-	device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>}
-
-  The given devices will be ignored by the common I/O-layer; no detection
-  and device sensing will be done on any of those devices. The subchannel to 
-  which the device in question is attached will be treated as if no device was
-  attached.
-
-  An ignored device can be un-ignored later; see the "/proc entries"-section for
-  details.
-
-  The devices must be given either as bus ids (0.x.abcd) or as hexadecimal
-  device numbers (0xabcd or abcd, for 2.4 backward compatibility). If you
-  give a device number 0xabcd, it will be interpreted as 0.0.abcd.
-
-  You can use the 'all' keyword to ignore all devices. The 'ipldev' and 'condev'
-  keywords can be used to refer to the CCW based boot device and CCW console
-  device respectively (these are probably useful only when combined with the '!'
-  operator). The '!' operator will cause the I/O-layer to _not_ ignore a device.
-  The command line is parsed from left to right.
-
-  For example, 
-	cio_ignore=0.0.0023-0.0.0042,0.0.4711
-  will ignore all devices ranging from 0.0.0023 to 0.0.0042 and the device
-  0.0.4711, if detected.
-  As another example,
-	cio_ignore=all,!0.0.4711,!0.0.fd00-0.0.fd02
-  will ignore all devices but 0.0.4711, 0.0.fd00, 0.0.fd01, 0.0.fd02.
-
-  By default, no devices are ignored.
-
-
-/proc entries
--------------
-
-* /proc/cio_ignore
-
-  Lists the ranges of devices (by bus id) which are ignored by common I/O.
-
-  You can un-ignore certain or all devices by piping to /proc/cio_ignore. 
-  "free all" will un-ignore all ignored devices, 
-  "free <device range>, <device range>, ..." will un-ignore the specified
-  devices.
-
-  For example, if devices 0.0.0023 to 0.0.0042 and 0.0.4711 are ignored,
-  - echo free 0.0.0030-0.0.0032 > /proc/cio_ignore
-    will un-ignore devices 0.0.0030 to 0.0.0032 and will leave devices 0.0.0023
-    to 0.0.002f, 0.0.0033 to 0.0.0042 and 0.0.4711 ignored;
-  - echo free 0.0.0041 > /proc/cio_ignore will furthermore un-ignore device
-    0.0.0041;
-  - echo free all > /proc/cio_ignore will un-ignore all remaining ignored 
-    devices.
-
-  When a device is un-ignored, device recognition and sensing is performed and 
-  the device driver will be notified if possible, so the device will become
-  available to the system. Note that un-ignoring is performed asynchronously.
-
-  You can also add ranges of devices to be ignored by piping to 
-  /proc/cio_ignore; "add <device range>, <device range>, ..." will ignore the
-  specified devices.
-
-  Note: While already known devices can be added to the list of devices to be
-        ignored, there will be no effect on then. However, if such a device
-	disappears and then reappears, it will then be ignored. To make
-	known devices go away, you need the "purge" command (see below).
-
-  For example,
-	"echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore"
-  will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored
-  devices.
-
-  You can remove already known but now ignored devices via
-	"echo purge > /proc/cio_ignore"
-  All devices ignored but still registered and not online (= not in use)
-  will be deregistered and thus removed from the system.
-
-  The devices can be specified either by bus id (0.x.abcd) or, for 2.4 backward
-  compatibility, by the device number in hexadecimal (0xabcd or abcd). Device
-  numbers given as 0xabcd will be interpreted as 0.0.abcd.
-
-* /proc/cio_settle
-
-  A write request to this file is blocked until all queued cio actions are
-  handled. This will allow userspace to wait for pending work affecting
-  device availability after changing cio_ignore or the hardware configuration.
-
-* For some of the information present in the /proc filesystem in 2.4 (namely,
-  /proc/subchannels and /proc/chpids), see driver-model.txt.
-  Information formerly in /proc/irq_count is now in /proc/interrupts.
-
-
-debugfs entries
----------------
-
-* /sys/kernel/debug/s390dbf/cio_*/ (S/390 debug feature)
-
-  Some views generated by the debug feature to hold various debug outputs.
-
-  - /sys/kernel/debug/s390dbf/cio_crw/sprintf
-    Messages from the processing of pending channel report words (machine check
-    handling).
-
-  - /sys/kernel/debug/s390dbf/cio_msg/sprintf
-    Various debug messages from the common I/O-layer.
-
-  - /sys/kernel/debug/s390dbf/cio_trace/hex_ascii
-    Logs the calling of functions in the common I/O-layer and, if applicable, 
-    which subchannel they were called for, as well as dumps of some data
-    structures (like irb in an error case).
-
-  The level of logging can be changed to be more or less verbose by piping to 
-  /sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the
-  documentation on the S/390 debug feature (Documentation/s390/s390dbf.txt)
-  for details.
diff --git a/Documentation/s390/DASD b/Documentation/s390/DASD
deleted file mode 100644
index 9963f1e9c98a..000000000000
--- a/Documentation/s390/DASD
+++ /dev/null
@@ -1,73 +0,0 @@
-DASD device driver
-
-S/390's disk devices (DASDs) are managed by Linux via the DASD device
-driver. It is valid for all types of DASDs and represents them to
-Linux as block devices, namely "dd". Currently the DASD driver uses a
-single major number (254) and 4 minor numbers per volume (1 for the
-physical volume and 3 for partitions). With respect to partitions see
-below. Thus you may have up to 64 DASD devices in your system.
-
-The kernel parameter 'dasd=from-to,...' may be issued arbitrary times
-in the kernel's parameter line or not at all. The 'from' and 'to'
-parameters are to be given in hexadecimal notation without a leading
-0x.
-If you supply kernel parameters the different instances are processed
-in order of appearance and a minor number is reserved for any device
-covered by the supplied range up to 64 volumes. Additional DASDs are
-ignored. If you do not supply the 'dasd=' kernel parameter at all, the 
-DASD driver registers all supported DASDs of your system to a minor
-number in ascending order of the subchannel number.
-
-The driver currently supports ECKD-devices and there are stubs for
-support of the FBA and CKD architectures. For the FBA architecture
-only some smart data structures are missing to make the support
-complete. 
-We performed our testing on 3380 and 3390 type disks of different
-sizes, under VM and on the bare hardware (LPAR), using internal disks
-of the multiprise as well as a RAMAC virtual array. Disks exported by
-an Enterprise Storage Server (Seascape) should work fine as well.
-
-We currently implement one partition per volume, which is the whole
-volume, skipping the first blocks up to the volume label. These are
-reserved for IPL records and IBM's volume label to assure
-accessibility of the DASD from other OSs. In a later stage we will
-provide support of partitions, maybe VTOC oriented or using a kind of
-partition table in the label record.
-
-USAGE
-
--Low-level format (?CKD only)
-For using an ECKD-DASD as a Linux harddisk you have to low-level
-format the tracks by issuing the BLKDASDFORMAT-ioctl on that
-device. This will erase any data on that volume including IBM volume
-labels, VTOCs etc. The ioctl may take a 'struct format_data *' or
-'NULL' as an argument.  
-typedef struct {
-	int start_unit;
-	int stop_unit;
-	int blksize;
-} format_data_t;
-When a NULL argument is passed to the BLKDASDFORMAT ioctl the whole
-disk is formatted to a blocksize of 1024 bytes. Otherwise start_unit
-and stop_unit are the first and last track to be formatted. If
-stop_unit is -1 it implies that the DASD is formatted from start_unit
-up to the last track. blksize can be any power of two between 512 and
-4096. We recommend no blksize lower than 1024 because the ext2fs uses
-1kB blocks anyway and you gain approx. 50% of capacity increasing your
-blksize from 512 byte to 1kB.
-
--Make a filesystem
-Then you can mk??fs the filesystem of your choice on that volume or
-partition. For reasons of sanity you should build your filesystem on
-the partition /dev/dd?1 instead of the whole volume. You only lose 3kB	
-but may be sure that you can reuse your data after introduction of a
-real partition table.
-
-BUGS:
-- Performance sometimes is rather low because we don't fully exploit clustering
-
-TODO-List:
-- Add IBM'S Disk layout to genhd
-- Enhance driver to use more than one major number
-- Enable usage as a module
-- Support Cache fast write and DASD fast write (ECKD)
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
deleted file mode 100644
index c35804c238ad..000000000000
--- a/Documentation/s390/Debugging390.txt
+++ /dev/null
@@ -1,2172 +0,0 @@
-
-		  Debugging on Linux for s/390 & z/Architecture
-				       by
-	  Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
-    Copyright (C) 2000-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation
-			Best viewed with fixed width fonts
-
-Overview of Document:
-=====================
-This document is intended to give a good overview of how to debug Linux for
-s/390 and z/Architecture. It is not intended as a complete reference and not a
-tutorial on the fundamentals of C & assembly. It doesn't go into
-390 IO in any detail. It is intended to complement the documents in the
-reference section below & any other worthwhile references you get.
-
-It is intended like the Enterprise Systems Architecture/390 Reference Summary
-to be printed out & used as a quick cheat sheet self help style reference when
-problems occur.
-
-Contents
-========
-Register Set
-Address Spaces on Intel Linux
-Address Spaces on Linux for s/390 & z/Architecture
-The Linux for s/390 & z/Architecture Kernel Task Structure
-Register Usage & Stackframes on Linux for s/390 & z/Architecture
-A sample program with comments
-Compiling programs for debugging on Linux for s/390 & z/Architecture
-Debugging under VM
-s/390 & z/Architecture IO Overview
-Debugging IO on s/390 & z/Architecture under VM
-GDB on s/390 & z/Architecture
-Stack chaining in gdb by hand
-Examining core dumps
-ldd
-Debugging modules
-The proc file system
-SysRq
-References
-Special Thanks
-
-Register Set
-============
-The current architectures have the following registers.
- 
-16 General propose registers, 32 bit on s/390 and 64 bit on z/Architecture,
-r0-r15 (or gpr0-gpr15), used for arithmetic and addressing.
-
-16 Control registers, 32 bit on s/390 and 64 bit on z/Architecture, cr0-cr15,
-kernel usage only, used for memory management, interrupt control, debugging
-control etc.
-
-16 Access registers (ar0-ar15), 32 bit on both s/390 and z/Architecture,
-normally not used by normal programs but potentially could be used as
-temporary storage. These registers have a 1:1 association with general
-purpose registers and are designed to be used in the so-called access
-register mode to select different address spaces.
-Access register 0 (and access register 1 on z/Architecture, which needs a
-64 bit pointer) is currently used by the pthread library as a pointer to
-the current running threads private area.
-
-16 64 bit floating point registers (fp0-fp15 ) IEEE & HFP floating 
-point format compliant on G5 upwards & a Floating point control reg (FPC) 
-4  64 bit registers (fp0,fp2,fp4 & fp6) HFP only on older machines.
-Note:
-Linux (currently) always uses IEEE & emulates G5 IEEE format on older machines,
-( provided the kernel is configured for this ).
-
-
-The PSW is the most important register on the machine it
-is 64 bit on s/390 & 128 bit on z/Architecture & serves the roles of 
-a program counter (pc), condition code register,memory space designator.
-In IBM standard notation I am counting bit 0 as the MSB.
-It has several advantages over a normal program counter
-in that you can change address translation & program counter 
-in a single instruction. To change address translation,
-e.g. switching address translation off requires that you
-have a logical=physical mapping for the address you are
-currently running at.
-
-+-------------------------+-------------------------------------------------+
-|          Bit            |                                                 |
-+--------+----------------+                     Value                       |
-| s/390  | z/Architecture |                                                 |
-+========+================+=================================================+
-| 0      |     0          | Reserved (must be 0) otherwise specification    |
-|        |                | exception occurs.                               |
-+--------+----------------+-------------------------------------------------+
-| 1      |     1          | Program Event Recording 1 PER enabled,          |
-|        |                | PER is used to facilitate debugging e.g.        |
-|        |                | single stepping.                                |
-+--------+----------------+-------------------------------------------------+
-| 2-4    |    2-4         | Reserved (must be 0).                           |
-+--------+----------------+-------------------------------------------------+
-| 5      |     5          | Dynamic address translation 1=DAT on.           |
-+--------+----------------+-------------------------------------------------+
-| 6      |     6          | Input/Output interrupt Mask                     |
-+--------+----------------+-------------------------------------------------+
-| 7      |     7          | External interrupt Mask used primarily for      |
-|        |                | interprocessor signalling and clock interrupts. |
-+--------+----------------+-------------------------------------------------+
-| 8-11   |   8-11         | PSW Key used for complex memory protection      |
-|        |                | mechanism (not used under linux)                |
-+--------+----------------+-------------------------------------------------+
-| 12     |     12         | 1 on s/390 0 on z/Architecture                  |
-+--------+----------------+-------------------------------------------------+
-| 13     |     13         | Machine Check Mask 1=enable machine check       |
-|        |                | interrupts                                      |
-+--------+----------------+-------------------------------------------------+
-| 14     |     14         | Wait State. Set this to 1 to stop the processor |
-|        |                | except for interrupts and give  time to other   |
-|        |                | LPARS. Used in CPU idle in the kernel to        |
-|        |                | increase overall usage of processor resources.  |
-+--------+----------------+-------------------------------------------------+
-| 15     |     15         | Problem state (if set to 1 certain instructions |
-|        |                | are disabled). All linux user programs run with |
-|        |                | this bit 1 (useful info for debugging under VM).|
-+--------+----------------+-------------------------------------------------+
-| 16-17  |    16-17       | Address Space Control                           |
-|        |                |                                                 |
-|        |                | 00 Primary Space Mode:                          |
-|        |                |                                                 |
-|        |                | The register CR1 contains the primary           |
-|        |                | address-space control element (PASCE), which    |
-|        |                | points to the primary space region/segment      |
-|        |                | table origin.                                   |
-|        |                |                                                 |
-|        |                | 01 Access register mode                         |
-|        |                |                                                 |
-|        |                | 10 Secondary Space Mode:                        |
-|        |                |                                                 |
-|        |                | The register CR7 contains the secondary         |
-|        |                | address-space control element (SASCE), which    |
-|        |                | points to the secondary space region or         |
-|        |                | segment table origin.                           |
-|        |                |                                                 |
-|        |                | 11 Home Space Mode:                             |
-|        |                |                                                 |
-|        |                | The register CR13 contains the home space       |
-|        |                | address-space control element (HASCE), which    |
-|        |                | points to the home space region/segment         |
-|        |                | table origin.                                   |
-|        |                |                                                 |
-|        |                | See "Address Spaces on Linux for s/390 &        |
-|        |                | z/Architecture" below for more information      |
-|        |                | about address space usage in Linux.             |
-+--------+----------------+-------------------------------------------------+
-| 18-19  |    18-19       | Condition codes (CC)                            |
-+--------+----------------+-------------------------------------------------+
-| 20     |    20          | Fixed point overflow mask if 1=FPU exceptions   |
-|        |                | for this event occur (normally 0)               |
-+--------+----------------+-------------------------------------------------+
-| 21     |    21          | Decimal overflow mask if 1=FPU exceptions for   |
-|        |                | this event occur (normally 0)                   |
-+--------+----------------+-------------------------------------------------+
-| 22     |    22          | Exponent underflow mask if 1=FPU exceptions     |
-|        |                | for this event occur (normally 0)               |
-+--------+----------------+-------------------------------------------------+
-| 23     |    23          | Significance Mask if 1=FPU exceptions for this  |
-|        |                | event occur (normally 0)                        |
-+--------+----------------+-------------------------------------------------+
-| 24-31  |    24-30       | Reserved Must be 0.                             |
-|        +----------------+-------------------------------------------------+
-|        |    31          | Extended Addressing Mode                        |
-|        +----------------+-------------------------------------------------+
-|        |    32          | Basic Addressing Mode                           |
-|        |                |                                                 |
-|        |                | Used to set addressing mode                     |
-|        |                |                                                 |
-|        |                |    +---------+----------+----------+            |
-|        |                |    | PSW 31  | PSW 32   |          |            |
-|        |                |    +---------+----------+----------+            |
-|        |                |    |   0     |    0     |  24 bit  |            |
-|        |                |    +---------+----------+----------+            |
-|        |                |    |   0     |    1     |  31 bit  |            |
-|        |                |    +---------+----------+----------+            |
-|        |                |    |   1     |    1     |  64 bit  |            |
-|        |                |    +---------+----------+----------+            |
-+--------+----------------+-------------------------------------------------+
-| 32     |                | 1=31 bit addressing mode 0=24 bit addressing    |
-|        |                | mode (for backward compatibility), linux        |
-|        |                | always runs with this bit set to 1              |
-+--------+----------------+-------------------------------------------------+
-| 33-64  |                | Instruction address.                            |
-|        +----------------+-------------------------------------------------+
-|        |    33-63       | Reserved must be 0                              |
-|        +----------------+-------------------------------------------------+
-|        |    64-127      | Address                                         |
-|        |                |                                                 |
-|        |                |   - In 24 bits mode bits 64-103=0 bits 104-127  |
-|        |                |     Address                                     |
-|        |                |   - In 31 bits mode bits 64-96=0 bits 97-127    |
-|        |                |     Address                                     |
-|        |                |                                                 |
-|        |                | Note:                                           |
-|        |                |     unlike 31 bit mode on s/390 bit 96 must be  |
-|        |                |     zero when loading the address with LPSWE    |
-|        |                |     otherwise a specification exception occurs, |
-|        |                |     LPSW is fully backward compatible.          |
-+--------+----------------+-------------------------------------------------+
-
-Prefix Page(s)
---------------
-This per cpu memory area is too intimately tied to the processor not to mention.
-It exists between the real addresses 0-4096 on s/390 and between 0-8192 on
-z/Architecture and is exchanged with one page on s/390 or two pages on
-z/Architecture in absolute storage by the set prefix instruction during Linux
-startup.
-This page is mapped to a different prefix for each processor in an SMP
-configuration (assuming the OS designer is sane of course).
-Bytes 0-512 (200 hex) on s/390 and 0-512, 4096-4544, 4604-5119 currently on
-z/Architecture are used by the processor itself for holding such information
-as exception indications and entry points for exceptions.
-Bytes after 0xc00 hex are used by linux for per processor globals on s/390 and
-z/Architecture (there is a gap on z/Architecture currently between 0xc00 and
-0x1000, too, which is used by Linux).
-The closest thing to this on traditional architectures is the interrupt
-vector table. This is a good thing & does simplify some of the kernel coding
-however it means that we now cannot catch stray NULL pointers in the
-kernel without hard coded checks.
-
-
-
-Address Spaces on Intel Linux
-=============================
-
-The traditional Intel Linux is approximately mapped as follows forgive
-the ascii art.
-0xFFFFFFFF 4GB Himem		*****************
-				*		*
-				* Kernel Space	*
-				*		*
-				*****************	  ****************
-User Space Himem		*  User Stack	*	  *		 *
-(typically 0xC0000000 3GB )	*****************	  *		 *
-				*  Shared Libs	*	  * Next Process *
-				*****************	  *	to	 *
-				*		*   <==   *	Run	 *  <==
-				*  User Program *	  *		 *
-				*   Data BSS	*	  *		 *
-				*    Text	*	  *		 *
-				*   Sections	*	  *		 *
-0x00000000			*****************	  ****************
-
-Now it is easy to see that on Intel it is quite easy to recognise a kernel
-address as being one greater than user space himem (in this case 0xC0000000),
-and addresses of less than this are the ones in the current running program on
-this processor (if an smp box).
-If using the virtual machine ( VM ) as a debugger it is quite difficult to
-know which user process is running as the address space you are looking at
-could be from any process in the run queue.
-
-The limitation of Intels addressing technique is that the linux
-kernel uses a very simple real address to virtual addressing technique
-of Real Address=Virtual Address-User Space Himem.
-This means that on Intel the kernel linux can typically only address
-Himem=0xFFFFFFFF-0xC0000000=1GB & this is all the RAM these machines
-can typically use.
-They can lower User Himem to 2GB or lower & thus be
-able to use 2GB of RAM however this shrinks the maximum size
-of User Space from 3GB to 2GB they have a no win limit of 4GB unless
-they go to 64 Bit.
-
-
-On 390 our limitations & strengths make us slightly different.
-For backward compatibility we are only allowed use 31 bits (2GB)
-of our 32 bit addresses, however, we use entirely separate address 
-spaces for the user & kernel.
-
-This means we can support 2GB of non Extended RAM on s/390, & more
-with the Extended memory management swap device & 
-currently 4TB of physical memory currently on z/Architecture.
-
-
-Address Spaces on Linux for s/390 & z/Architecture
-==================================================
-
-Our addressing scheme is basically as follows:
-
-				   Primary Space	       Home Space
-Himem 0x7fffffff 2GB on s/390    *****************          ****************
-currently 0x3ffffffffff (2^42)-1 *  User Stack   *          *              *
-on z/Architecture.		 *****************          *              *
-				 *  Shared Libs  *	    *		   *
-				 *****************	    *		   *
-			         *               *          *    Kernel    *  
-		                 *  User Program *          *              *
-		                 *   Data BSS    *          *              *
-                                 *    Text       *          *              *
-            			 *   Sections    *          *              *
-0x00000000                       *****************          ****************
-
-This also means that we need to look at the PSW problem state bit and the
-addressing mode to decide whether we are looking at user or kernel space.
-
-User space runs in primary address mode (or access register mode within
-the vdso code).
-
-The kernel usually also runs in home space mode, however when accessing
-user space the kernel switches to primary or secondary address mode if
-the mvcos instruction is not available or if a compare-and-swap (futex)
-instruction on a user space address is performed.
-
-When also looking at the ASCE control registers, this means:
-
-User space:
-- runs in primary or access register mode
-- cr1 contains the user asce
-- cr7 contains the user asce
-- cr13 contains the kernel asce
-
-Kernel space:
-- runs in home space mode
-- cr1 contains the user or kernel asce
-  -> the kernel asce is loaded when a uaccess requires primary or
-     secondary address mode
-- cr7 contains the user or kernel asce, (changed with set_fs())
-- cr13 contains the kernel asce
-
-In case of uaccess the kernel changes to:
-- primary space mode in case of a uaccess (copy_to_user) and uses
-  e.g. the mvcp instruction to access user space. However the kernel
-  will stay in home space mode if the mvcos instruction is available
-- secondary space mode in case of futex atomic operations, so that the
-  instructions come from primary address space and data from secondary
-  space
-
-In case of KVM, the kernel runs in home space mode, but cr1 gets switched
-to contain the gmap asce before the SIE instruction gets executed. When
-the SIE instruction is finished, cr1 will be switched back to contain the
-user asce.
-
-
-Virtual Addresses on s/390 & z/Architecture
-===========================================
-
-A virtual address on s/390 is made up of 3 parts
-The SX (segment index, roughly corresponding to the PGD & PMD in Linux
-terminology) being bits 1-11.
-The PX (page index, corresponding to the page table entry (pte) in Linux
-terminology) being bits 12-19.
-The remaining bits BX (the byte index are the offset in the page )
-i.e. bits 20 to 31.
-
-On z/Architecture in linux we currently make up an address from 4 parts.
-The region index bits (RX) 0-32 we currently use bits 22-32
-The segment index (SX) being bits 33-43
-The page index (PX) being bits  44-51
-The byte index (BX) being bits  52-63
-
-Notes:
-1) s/390 has no PMD so the PMD is really the PGD also.
-A lot of this stuff is defined in pgtable.h.
-
-2) Also seeing as s/390's page indexes are only 1k  in size 
-(bits 12-19 x 4 bytes per pte ) we use 1 ( page 4k )
-to make the best use of memory by updating 4 segment indices 
-entries each time we mess with a PMD & use offsets 
-0,1024,2048 & 3072 in this page as for our segment indexes.
-On z/Architecture our page indexes are now 2k in size
-( bits 12-19 x 8 bytes per pte ) we do a similar trick
-but only mess with 2 segment indices each time we mess with
-a PMD.
-
-3) As z/Architecture supports up to a massive 5-level page table lookup we
-can only use 3 currently on Linux ( as this is all the generic kernel
-currently supports ) however this may change in future
-this allows us to access ( according to my sums )
-4TB of virtual storage per process i.e.
-4096*512(PTES)*1024(PMDS)*2048(PGD) = 4398046511104 bytes,
-enough for another 2 or 3 of years I think :-).
-to do this we use a region-third-table designation type in
-our address space control registers.
- 
-
-The Linux for s/390 & z/Architecture Kernel Task Structure
-==========================================================
-Each process/thread under Linux for S390 has its own kernel task_struct
-defined in linux/include/linux/sched.h
-The S390 on initialisation & resuming of a process on a cpu sets
-the __LC_KERNEL_STACK variable in the spare prefix area for this cpu
-(which we use for per-processor globals).
-
-The kernel stack pointer is intimately tied with the task structure for
-each processor as follows.
-
-                      s/390
-            ************************
-            *  1 page kernel stack *
-	    *        ( 4K )        *
-            ************************
-            *   1 page task_struct *        
-            *        ( 4K )        *
-8K aligned  ************************ 
-
-                 z/Architecture
-            ************************
-            *  2 page kernel stack *
-	    *        ( 8K )        *
-            ************************
-            *  2 page task_struct  *        
-            *        ( 8K )        *
-16K aligned ************************ 
-
-What this means is that we don't need to dedicate any register or global
-variable to point to the current running process & can retrieve it with the
-following very simple construct for s/390 & one very similar for z/Architecture.
-
-static inline struct task_struct * get_current(void)
-{
-        struct task_struct *current;
-        __asm__("lhi   %0,-8192\n\t"
-                "nr    %0,15"
-                : "=r" (current) );
-        return current;
-}
-
-i.e. just anding the current kernel stack pointer with the mask -8192.
-Thankfully because Linux doesn't have support for nested IO interrupts
-& our devices have large buffers can survive interrupts being shut for 
-short amounts of time we don't need a separate stack for interrupts.
-
-
-
-
-Register Usage & Stackframes on Linux for s/390 & z/Architecture
-=================================================================
-Overview:
----------
-This is the code that gcc produces at the top & the bottom of
-each function. It usually is fairly consistent & similar from 
-function to function & if you know its layout you can probably
-make some headway in finding the ultimate cause of a problem
-after a crash without a source level debugger.
-
-Note: To follow stackframes requires a knowledge of C or Pascal &
-limited knowledge of one assembly language.
-
-It should be noted that there are some differences between the
-s/390 and z/Architecture stack layouts as the z/Architecture stack layout
-didn't have to maintain compatibility with older linkage formats.
-
-Glossary:
----------
-alloca:
-This is a built in compiler function for runtime allocation
-of extra space on the callers stack which is obviously freed
-up on function exit ( e.g. the caller may choose to allocate nothing
-of a buffer of 4k if required for temporary purposes ), it generates 
-very efficient code ( a few cycles  ) when compared to alternatives 
-like malloc.
-
-automatics: These are local variables on the stack,
-i.e they aren't in registers & they aren't static.
-
-back-chain:
-This is a pointer to the stack pointer before entering a
-framed functions ( see frameless function ) prologue got by 
-dereferencing the address of the current stack pointer,
- i.e. got by accessing the 32 bit value at the stack pointers
-current location.
-
-base-pointer:
-This is a pointer to the back of the literal pool which
-is an area just behind each procedure used to store constants
-in each function.
-
-call-clobbered: The caller probably needs to save these registers if there 
-is something of value in them, on the stack or elsewhere before making a 
-call to another procedure so that it can restore it later.
-
-epilogue:
-The code generated by the compiler to return to the caller.
-
-frameless-function
-A frameless function in Linux for s390 & z/Architecture is one which doesn't 
-need more than the register save area (96 bytes on s/390, 160 on z/Architecture)
-given to it by the caller.
-A frameless function never:
-1) Sets up a back chain.
-2) Calls alloca.
-3) Calls other normal functions
-4) Has automatics.
-
-GOT-pointer:
-This is a pointer to the global-offset-table in ELF
-( Executable Linkable Format, Linux'es most common executable format ),
-all globals & shared library objects are found using this pointer.
-
-lazy-binding
-ELF shared libraries are typically only loaded when routines in the shared
-library are actually first called at runtime. This is lazy binding.
-
-procedure-linkage-table
-This is a table found from the GOT which contains pointers to routines
-in other shared libraries which can't be called to by easier means.
-
-prologue:
-The code generated by the compiler to set up the stack frame.
-
-outgoing-args:
-This is extra area allocated on the stack of the calling function if the
-parameters for the callee's cannot all be put in registers, the same
-area can be reused by each function the caller calls.
-
-routine-descriptor:
-A COFF  executable format based concept of a procedure reference 
-actually being 8 bytes or more as opposed to a simple pointer to the routine.
-This is typically defined as follows
-Routine Descriptor offset 0=Pointer to Function
-Routine Descriptor offset 4=Pointer to Table of Contents
-The table of contents/TOC is roughly equivalent to a GOT pointer.
-& it means that shared libraries etc. can be shared between several
-environments each with their own TOC.
-
- 
-static-chain: This is used in nested functions a concept adopted from pascal 
-by gcc not used in ansi C or C++ ( although quite useful ), basically it
-is a pointer used to reference local variables of enclosing functions.
-You might come across this stuff once or twice in your lifetime.
-
-e.g.
-The function below should return 11 though gcc may get upset & toss warnings 
-about unused variables.
-int FunctionA(int a)
-{
-	int b;
-	FunctionC(int c)
-	{
-		b=c+1;
-	}
-	FunctionC(10);
-	return(b);
-}
-
-
-s/390 & z/Architecture Register usage
-=====================================
-r0       used by syscalls/assembly                  call-clobbered
-r1	 used by syscalls/assembly                  call-clobbered
-r2       argument 0 / return value 0                call-clobbered
-r3       argument 1 / return value 1 (if long long) call-clobbered
-r4       argument 2                                 call-clobbered
-r5       argument 3                                 call-clobbered
-r6	 argument 4				    saved
-r7       pointer-to arguments 5 to ...              saved      
-r8       this & that                                saved
-r9       this & that                                saved
-r10      static-chain ( if nested function )        saved
-r11      frame-pointer ( if function used alloca )  saved
-r12      got-pointer                                saved
-r13      base-pointer                               saved
-r14      return-address                             saved
-r15      stack-pointer                              saved
-
-f0       argument 0 / return value ( float/double ) call-clobbered
-f2       argument 1                                 call-clobbered
-f4       z/Architecture argument 2                  saved
-f6       z/Architecture argument 3                  saved
-The remaining floating points
-f1,f3,f5 f7-f15 are call-clobbered.
-
-Notes:
-------
-1) The only requirement is that registers which are used
-by the callee are saved, e.g. the compiler is perfectly
-capable of using r11 for purposes other than a frame a
-frame pointer if a frame pointer is not needed.
-2) In functions with variable arguments e.g. printf the calling procedure 
-is identical to one without variable arguments & the same number of 
-parameters. However, the prologue of this function is somewhat more
-hairy owing to it having to move these parameters to the stack to
-get va_start, va_arg & va_end to work.
-3) Access registers are currently unused by gcc but are used in
-the kernel. Possibilities exist to use them at the moment for
-temporary storage but it isn't recommended.
-4) Only 4 of the floating point registers are used for
-parameter passing as older machines such as G3 only have only 4
-& it keeps the stack frame compatible with other compilers.
-However with IEEE floating point emulation under linux on the
-older machines you are free to use the other 12.
-5) A long long or double parameter cannot be have the 
-first 4 bytes in a register & the second four bytes in the 
-outgoing args area. It must be purely in the outgoing args
-area if crossing this boundary.
-6) Floating point parameters are mixed with outgoing args
-on the outgoing args area in the order the are passed in as parameters.
-7) Floating point arguments 2 & 3 are saved in the outgoing args area for 
-z/Architecture
-
-
-Stack Frame Layout
-------------------
-s/390     z/Architecture
-0         0             back chain ( a 0 here signifies end of back chain )
-4         8             eos ( end of stack, not used on Linux for S390 used in other linkage formats )
-8         16            glue used in other s/390 linkage formats for saved routine descriptors etc.
-12        24            glue used in other s/390 linkage formats for saved routine descriptors etc.
-16        32            scratch area
-20        40            scratch area
-24        48            saved r6 of caller function
-28        56            saved r7 of caller function
-32        64            saved r8 of caller function
-36        72            saved r9 of caller function
-40        80            saved r10 of caller function
-44        88            saved r11 of caller function
-48        96            saved r12 of caller function
-52        104           saved r13 of caller function
-56        112           saved r14 of caller function
-60        120           saved r15 of caller function
-64        128           saved f4 of caller function
-72        132           saved f6 of caller function
-80                      undefined
-96        160           outgoing args passed from caller to callee
-96+x      160+x         possible stack alignment ( 8 bytes desirable )
-96+x+y    160+x+y       alloca space of caller ( if used )
-96+x+y+z  160+x+y+z     automatics of caller ( if used )
-0                       back-chain
-
-A sample program with comments.
-===============================
-
-Comments on the function test
------------------------------
-1) It didn't need to set up a pointer to the constant pool gpr13 as it is not
-used ( :-( ).
-2) This is a frameless function & no stack is bought.
-3) The compiler was clever enough to recognise that it could return the
-value in r2 as well as use it for the passed in parameter ( :-) ).
-4) The basr ( branch relative & save ) trick works as follows the instruction 
-has a special case with r0,r0 with some instruction operands is understood as 
-the literal value 0, some risc architectures also do this ). So now
-we are branching to the next address & the address new program counter is
-in r13,so now we subtract the size of the function prologue we have executed
-+ the size of the literal pool to get to the top of the literal pool
-0040037c int test(int b)
-{                                                          # Function prologue below
-  40037c:	90 de f0 34 	stm	%r13,%r14,52(%r15) # Save registers r13 & r14
-  400380:	0d d0       	basr	%r13,%r0           # Set up pointer to constant pool using
-  400382:	a7 da ff fa 	ahi	%r13,-6            # basr trick
-	return(5+b);
-	                                                   # Huge main program
-  400386:	a7 2a 00 05 	ahi	%r2,5              # add 5 to r2
-
-                                                           # Function epilogue below 
-  40038a:	98 de f0 34 	lm	%r13,%r14,52(%r15) # restore registers r13 & 14
-  40038e:	07 fe       	br	%r14               # return
-}
-
-Comments on the function main
------------------------------
-1) The compiler did this function optimally ( 8-) )
-
-Literal pool for main.
-400390:	ff ff ff ec 	.long 0xffffffec
-main(int argc,char *argv[])
-{                                                          # Function prologue below
-  400394:	90 bf f0 2c 	stm	%r11,%r15,44(%r15) # Save necessary registers
-  400398:	18 0f       	lr	%r0,%r15           # copy stack pointer to r0
-  40039a:	a7 fa ff a0 	ahi	%r15,-96           # Make area for callee saving 
-  40039e:	0d d0       	basr	%r13,%r0           # Set up r13 to point to
-  4003a0:	a7 da ff f0 	ahi	%r13,-16           # literal pool
-  4003a4:	50 00 f0 00 	st	%r0,0(%r15)        # Save backchain
-
-	return(test(5));                                   # Main Program Below
-  4003a8:	58 e0 d0 00 	l	%r14,0(%r13)       # load relative address of test from
-						           # literal pool
-  4003ac:	a7 28 00 05 	lhi	%r2,5              # Set first parameter to 5
-  4003b0:	4d ee d0 00 	bas	%r14,0(%r14,%r13)  # jump to test setting r14 as return
-							   # address using branch & save instruction.
-
-							   # Function Epilogue below
-  4003b4:	98 bf f0 8c 	lm	%r11,%r15,140(%r15)# Restore necessary registers.
-  4003b8:	07 fe       	br	%r14               # return to do program exit 
-}
-
-
-Compiler updates
-----------------
-
-main(int argc,char *argv[])
-{
-  4004fc:	90 7f f0 1c       	stm	%r7,%r15,28(%r15)
-  400500:	a7 d5 00 04       	bras	%r13,400508 <main+0xc>
-  400504:	00 40 04 f4       	.long	0x004004f4 
-  # compiler now puts constant pool in code to so it saves an instruction 
-  400508:	18 0f             	lr	%r0,%r15
-  40050a:	a7 fa ff a0       	ahi	%r15,-96
-  40050e:	50 00 f0 00       	st	%r0,0(%r15)
-	return(test(5));
-  400512:	58 10 d0 00       	l	%r1,0(%r13)
-  400516:	a7 28 00 05       	lhi	%r2,5
-  40051a:	0d e1             	basr	%r14,%r1
-  # compiler adds 1 extra instruction to epilogue this is done to
-  # avoid processor pipeline stalls owing to data dependencies on g5 &
-  # above as register 14 in the old code was needed directly after being loaded 
-  # by the lm	%r11,%r15,140(%r15) for the br %14.
-  40051c:	58 40 f0 98       	l	%r4,152(%r15)
-  400520:	98 7f f0 7c       	lm	%r7,%r15,124(%r15)
-  400524:	07 f4             	br	%r4
-}
-
-
-Hartmut ( our compiler developer ) also has been threatening to take out the
-stack backchain in optimised code as this also causes pipeline stalls, you
-have been warned.
-
-64 bit z/Architecture code disassembly
---------------------------------------
-
-If you understand the stuff above you'll understand the stuff
-below too so I'll avoid repeating myself & just say that 
-some of the instructions have g's on the end of them to indicate
-they are 64 bit & the stack offsets are a bigger, 
-the only other difference you'll find between 32 & 64 bit is that
-we now use f4 & f6 for floating point arguments on 64 bit.
-00000000800005b0 <test>:
-int test(int b)
-{
-	return(5+b);
-    800005b0:	a7 2a 00 05       	ahi	%r2,5
-    800005b4:	b9 14 00 22       	lgfr	%r2,%r2 # downcast to integer
-    800005b8:	07 fe             	br	%r14
-    800005ba:	07 07             	bcr	0,%r7
-
-
-}
-
-00000000800005bc <main>:
-main(int argc,char *argv[])
-{ 
-    800005bc:	eb bf f0 58 00 24 	stmg	%r11,%r15,88(%r15)
-    800005c2:	b9 04 00 1f       	lgr	%r1,%r15
-    800005c6:	a7 fb ff 60       	aghi	%r15,-160
-    800005ca:	e3 10 f0 00 00 24 	stg	%r1,0(%r15)
-	return(test(5));
-    800005d0:	a7 29 00 05       	lghi	%r2,5
-    # brasl allows jumps > 64k & is overkill here bras would do fune
-    800005d4:	c0 e5 ff ff ff ee 	brasl	%r14,800005b0 <test> 
-    800005da:	e3 40 f1 10 00 04 	lg	%r4,272(%r15)
-    800005e0:	eb bf f0 f8 00 04 	lmg	%r11,%r15,248(%r15)
-    800005e6:	07 f4             	br	%r4
-}
-
-
-
-Compiling programs for debugging on Linux for s/390 & z/Architecture
-====================================================================
--gdwarf-2 now works it should be considered the default debugging
-format for s/390 & z/Architecture as it is more reliable for debugging
-shared libraries,  normal -g debugging works much better now
-Thanks to the IBM java compiler developers bug reports. 
-
-This is typically done adding/appending the flags -g or -gdwarf-2 to the 
-CFLAGS & LDFLAGS variables Makefile of the program concerned.
-
-If using gdb & you would like accurate displays of registers &
- stack traces compile without optimisation i.e make sure
-that there is no -O2 or similar on the CFLAGS line of the Makefile &
-the emitted gcc commands, obviously this will produce worse code 
-( not advisable for shipment ) but it is an  aid to the debugging process.
-
-This aids debugging because the compiler will copy parameters passed in
-in registers onto the stack so backtracing & looking at passed in
-parameters will work, however some larger programs which use inline functions
-will not compile without optimisation.
-
-Debugging with optimisation has since much improved after fixing
-some bugs, please make sure you are using gdb-5.0 or later developed 
-after Nov'2000.
-
-
-
-Debugging under VM
-==================
-
-Notes
------
-Addresses & values in the VM debugger are always hex never decimal
-Address ranges are of the format <HexValue1>-<HexValue2> or
-<HexValue1>.<HexValue2>
-For example, the address range	0x2000 to 0x3000 can be described as 2000-3000
-or 2000.1000
-
-The VM Debugger is case insensitive.
-
-VM's strengths are usually other debuggers weaknesses you can get at any
-resource no matter how sensitive e.g. memory management resources, change
-address translation in the PSW. For kernel hacking you will reap dividends if
-you get good at it.
-
-The VM Debugger displays operators but not operands, and also the debugger
-displays useful information on the same line as the author of the code probably
-felt that it was a good idea not to go over the 80 columns on the screen.
-This isn't as unintuitive as it may seem as the s/390 instructions are easy to
-decode mentally and you can make a good guess at a lot of them as all the
-operands are nibble (half byte aligned).
-So if you have an objdump listing by hand, it is quite easy to follow, and if
-you don't have an objdump listing keep a copy of the s/390 Reference Summary
-or alternatively the s/390 principles of operation next to you.
-e.g. even I can guess that 
-0001AFF8' LR    180F        CC 0
-is a ( load register ) lr r0,r15 
-
-Also it is very easy to tell the length of a 390 instruction from the 2 most
-significant bits in the instruction (not that this info is really useful except
-if you are trying to make sense of a hexdump of code).
-Here is a table
-Bits                    Instruction Length
-------------------------------------------
-00                          2 Bytes
-01                          4 Bytes
-10                          4 Bytes
-11                          6 Bytes
-
-The debugger also displays other useful info on the same line such as the
-addresses being operated on destination addresses of branches & condition codes.
-e.g.  
-00019736' AHI   A7DAFF0E    CC 1
-000198BA' BRC   A7840004 -> 000198C2'   CC 0
-000198CE' STM   900EF068 >> 0FA95E78    CC 2
-
-
-
-Useful VM debugger commands
----------------------------
-
-I suppose I'd better mention this before I start
-to list the current active traces do 
-Q TR
-there can be a maximum of 255 of these per set
-( more about trace sets later ).
-To stop traces issue a
-TR END.
-To delete a particular breakpoint issue
-TR DEL <breakpoint number>
-
-The PA1 key drops to CP mode so you can issue debugger commands,
-Doing alt c (on my 3270 console at least ) clears the screen. 
-hitting b <enter> comes back to the running operating system
-from cp mode ( in our case linux ).
-It is typically useful to add shortcuts to your profile.exec file
-if you have one ( this is roughly equivalent to autoexec.bat in DOS ).
-file here are a few from mine.
-/* this gives me command history on issuing f12 */
-set pf12 retrieve 
-/* this continues */
-set pf8 imm b
-/* goes to trace set a */
-set pf1 imm tr goto a
-/* goes to trace set b */
-set pf2 imm tr goto b
-/* goes to trace set c */
-set pf3 imm tr goto c
-
-
-
-Instruction Tracing
--------------------
-Setting a simple breakpoint
-TR I PSWA <address>
-To debug a particular function try
-TR I R <function address range>
-TR I on its own will single step.
-TR I DATA <MNEMONIC> <OPTIONAL RANGE> will trace for particular mnemonics
-e.g.
-TR I DATA 4D R 0197BC.4000
-will trace for BAS'es ( opcode 4D ) in the range 0197BC.4000
-if you were inclined you could add traces for all branch instructions &
-suffix them with the run prefix so you would have a backtrace on screen 
-when a program crashes.
-TR BR <INTO OR FROM> will trace branches into or out of an address.
-e.g.
-TR BR INTO 0 is often quite useful if a program is getting awkward & deciding
-to branch to 0 & crashing as this will stop at the address before in jumps to 0.
-TR I R <address range> RUN cmd d g
-single steps a range of addresses but stays running &
-displays the gprs on each step.
-
-
-
-Displaying & modifying Registers
---------------------------------
-D G will display all the gprs
-Adding a extra G to all the commands is necessary to access the full 64 bit 
-content in VM on z/Architecture. Obviously this isn't required for access
-registers as these are still 32 bit.
-e.g. DGG instead of DG 
-D X will display all the control registers
-D AR will display all the access registers
-D AR4-7 will display access registers 4 to 7
-CPU ALL D G will display the GRPS of all CPUS in the configuration
-D PSW will display the current PSW
-st PSW 2000 will put the value 2000 into the PSW &
-cause crash your machine.
-D PREFIX displays the prefix offset
-
-
-Displaying Memory
------------------
-To display memory mapped using the current PSW's mapping try
-D <range>
-To make VM display a message each time it hits a particular address and
-continue try
-D I<range> will disassemble/display a range of instructions.
-ST addr 32 bit word will store a 32 bit aligned address
-D T<range> will display the EBCDIC in an address (if you are that way inclined)
-D R<range> will display real addresses ( without DAT ) but with prefixing.
-There are other complex options to display if you need to get at say home space
-but are in primary space the easiest thing to do is to temporarily
-modify the PSW to the other addressing mode, display the stuff & then
-restore it.
-
-
- 
-Hints
------
-If you want to issue a debugger command without halting your virtual machine
-with the PA1 key try prefixing the command with #CP e.g.
-#cp tr i pswa 2000
-also suffixing most debugger commands with RUN will cause them not
-to stop just display the mnemonic at the current instruction on the console.
-If you have several breakpoints you want to put into your program &
-you get fed up of cross referencing with System.map
-you can do the following trick for several symbols.
-grep do_signal System.map 
-which emits the following among other things
-0001f4e0 T do_signal 
-now you can do
-
-TR I PSWA 0001f4e0 cmd msg * do_signal
-This sends a message to your own console each time do_signal is entered.
-( As an aside I wrote a perl script once which automatically generated a REXX
-script with breakpoints on every kernel procedure, this isn't a good idea
-because there are thousands of these routines & VM can only set 255 breakpoints
-at a time so you nearly had to spend as long pruning the file down as you would 
-entering the msgs by hand), however, the trick might be useful for a single
-object file. In the 3270 terminal emulator x3270 there is a very useful option
-in the file menu called "Save Screen In File" - this is very good for keeping a
-copy of traces.
-
-From CMS help <command name> will give you online help on a particular command. 
-e.g. 
-HELP DISPLAY
-
-Also CP has a file called profile.exec which automatically gets called
-on startup of CMS ( like autoexec.bat ), keeping on a DOS analogy session
-CP has a feature similar to doskey, it may be useful for you to
-use profile.exec to define some keystrokes. 
-e.g.
-SET PF9 IMM B
-This does a single step in VM on pressing F8. 
-SET PF10  ^
-This sets up the ^ key.
-which can be used for ^c (ctrl-c),^z (ctrl-z) which can't be typed directly
-into some 3270 consoles.
-SET PF11 ^-
-This types the starting keystrokes for a sysrq see SysRq below.
-SET PF12 RETRIEVE
-This retrieves command history on pressing F12.
-
-
-Sometimes in VM the display is set up to scroll automatically this
-can be very annoying if there are messages you wish to look at
-to stop this do
-TERM MORE 255 255
-This will nearly stop automatic screen updates, however it will
-cause a denial of service if lots of messages go to the 3270 console,
-so it would be foolish to use this as the default on a production machine.
- 
-
-Tracing particular processes
-----------------------------
-The kernel's text segment is intentionally at an address in memory that it will
-very seldom collide with text segments of user programs ( thanks Martin ),
-this simplifies debugging the kernel.
-However it is quite common for user processes to have addresses which collide
-this can make debugging a particular process under VM painful under normal
-circumstances as the process may change when doing a 
-TR I R <address range>.
-Thankfully after reading VM's online help I figured out how to debug
-I particular process.
-
-Your first problem is to find the STD ( segment table designation )
-of the program you wish to debug.
-There are several ways you can do this here are a few
-1) objdump --syms <program to be debugged> | grep main
-To get the address of main in the program.
-tr i pswa <address of main>
-Start the program, if VM drops to CP on what looks like the entry
-point of the main function this is most likely the process you wish to debug.
-Now do a D X13 or D XG13 on z/Architecture.
-On 31 bit the STD is bits 1-19 ( the STO segment table origin ) 
-& 25-31 ( the STL segment table length ) of CR13.
-now type
-TR I R STD <CR13's value> 0.7fffffff
-e.g.
-TR I R STD 8F32E1FF 0.7fffffff
-Another very useful variation is
-TR STORE INTO STD <CR13's value> <address range>
-for finding out when a particular variable changes.
-
-An alternative way of finding the STD of a currently running process 
-is to do the following, ( this method is more complex but
-could be quite convenient if you aren't updating the kernel much &
-so your kernel structures will stay constant for a reasonable period of
-time ).
-
-grep task /proc/<pid>/status
-from this you should see something like
-task: 0f160000 ksp: 0f161de8 pt_regs: 0f161f68
-This now gives you a pointer to the task structure.
-Now make CC:="s390-gcc -g" kernel/sched.s
-To get the task_struct stabinfo.
-( task_struct is defined in include/linux/sched.h ).
-Now we want to look at
-task->active_mm->pgd
-on my machine the active_mm in the task structure stab is
-active_mm:(4,12),672,32
-its offset is 672/8=84=0x54
-the pgd member in the mm_struct stab is
-pgd:(4,6)=*(29,5),96,32
-so its offset is 96/8=12=0xc
-
-so we'll
-hexdump -s 0xf160054 /dev/mem | more
-i.e. task_struct+active_mm offset
-to look at the active_mm member
-f160054 0fee cc60 0019 e334 0000 0000 0000 0011
-hexdump -s 0x0feecc6c /dev/mem | more
-i.e. active_mm+pgd offset
-feecc6c 0f2c 0000 0000 0001 0000 0001 0000 0010
-we get something like
-now do 
-TR I R STD <pgd|0x7f> 0.7fffffff
-i.e. the 0x7f is added because the pgd only
-gives the page table origin & we need to set the low bits
-to the maximum possible segment table length.
-TR I R STD 0f2c007f 0.7fffffff
-on z/Architecture you'll probably need to do
-TR I R STD <pgd|0x7> 0.ffffffffffffffff
-to set the TableType to 0x1 & the Table length to 3.
-
-
-
-Tracing Program Exceptions
---------------------------
-If you get a crash which says something like
-illegal operation or specification exception followed by a register dump
-You can restart linux & trace these using the tr prog <range or value> trace
-option.
-
-
-The most common ones you will normally be tracing for is
-1=operation exception
-2=privileged operation exception
-4=protection exception
-5=addressing exception
-6=specification exception
-10=segment translation exception
-11=page translation exception
-
-The full list of these is on page 22 of the current s/390 Reference Summary.
-e.g.
-tr prog 10 will trace segment translation exceptions.
-tr prog on its own will trace all program interruption codes.
-
-Trace Sets
-----------
-On starting VM you are initially in the INITIAL trace set.
-You can do a Q TR to verify this.
-If you have a complex tracing situation where you wish to wait for instance 
-till a driver is open before you start tracing IO, but know in your
-heart that you are going to have to make several runs through the code till you
-have a clue whats going on. 
-
-What you can do is
-TR I PSWA <Driver open address>
-hit b to continue till breakpoint
-reach the breakpoint
-now do your
-TR GOTO B 
-TR IO 7c08-7c09 inst int run 
-or whatever the IO channels you wish to trace are & hit b
-
-To got back to the initial trace set do
-TR GOTO INITIAL
-& the TR I PSWA <Driver open address> will be the only active breakpoint again.
-
-
-Tracing linux syscalls under VM
--------------------------------
-Syscalls are implemented on Linux for S390 by the Supervisor call instruction
-(SVC). There 256 possibilities of these as the instruction is made up of a 0xA
-opcode and the second byte being the syscall number. They are traced using the
-simple command:
-TR SVC  <Optional value or range>
-the syscalls are defined in linux/arch/s390/include/asm/unistd.h
-e.g. to trace all file opens just do
-TR SVC 5 ( as this is the syscall number of open )
-
-
-SMP Specific commands
----------------------
-To find out how many cpus you have
-Q CPUS displays all the CPU's available to your virtual machine
-To find the cpu that the current cpu VM debugger commands are being directed at
-do Q CPU to change the current cpu VM debugger commands are being directed at do
-CPU <desired cpu no>
-
-On a SMP guest issue a command to all CPUs try prefixing the command with cpu
-all. To issue a command to a particular cpu try cpu <cpu number> e.g.
-CPU 01 TR I R 2000.3000
-If you are running on a guest with several cpus & you have a IO related problem
-& cannot follow the flow of code but you know it isn't smp related.
-from the bash prompt issue
-shutdown -h now or halt.
-do a Q CPUS to find out how many cpus you have
-detach each one of them from cp except cpu 0 
-by issuing a 
-DETACH CPU 01-(number of cpus in configuration)
-& boot linux again.
-TR SIGP will trace inter processor signal processor instructions.
-DEFINE CPU 01-(number in configuration) 
-will get your guests cpus back.
-
-
-Help for displaying ascii textstrings
--------------------------------------
-On the very latest VM Nucleus'es VM can now display ascii
-( thanks Neale for the hint ) by doing
-D TX<lowaddr>.<len>
-e.g.
-D TX0.100
-
-Alternatively
-=============
-Under older VM debuggers (I love EBDIC too) you can use following little
-program which converts a command line of hex digits to ascii text. It can be
-compiled under linux and you can copy the hex digits from your x3270 terminal
-to your xterm if you are debugging from a linuxbox.
-
-This is quite useful when looking at a parameter passed in as a text string
-under VM ( unless you are good at decoding ASCII in your head ).
-
-e.g. consider tracing an open syscall
-TR SVC 5
-We have stopped at a breakpoint
-000151B0' SVC   0A05     -> 0001909A'   CC 0
-
-D 20.8 to check the SVC old psw in the prefix area and see was it from userspace
-(for the layout of the prefix area consult the "Fixed Storage Locations"
-chapter of the s/390 Reference Summary if you have it available).
-V00000020  070C2000 800151B2
-The problem state bit wasn't set &  it's also too early in the boot sequence
-for it to be a userspace SVC if it was we would have to temporarily switch the 
-psw to user space addressing so we could get at the first parameter of the open
-in gpr2.
-Next do a 
-D G2
-GPR  2 =  00014CB4
-Now display what gpr2 is pointing to
-D 00014CB4.20
-V00014CB4  2F646576 2F636F6E 736F6C65 00001BF5
-V00014CC4  FC00014C B4001001 E0001000 B8070707
-Now copy the text till the first 00 hex ( which is the end of the string
-to an xterm & do hex2ascii on it.
-hex2ascii 2F646576 2F636F6E 736F6C65 00 
-outputs
-Decoded Hex:=/ d e v / c o n s o l e 0x00 
-We were opening the console device,
-
-You can compile the code below yourself for practice :-),
-/*
- *    hex2ascii.c
- *    a useful little tool for converting a hexadecimal command line to ascii
- *
- *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
- *    (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation.
- */   
-#include <stdio.h>
-
-int main(int argc,char *argv[])
-{
-  int cnt1,cnt2,len,toggle=0;
-  int startcnt=1;
-  unsigned char c,hex;
-  
-  if(argc>1&&(strcmp(argv[1],"-a")==0))
-     startcnt=2;
-  printf("Decoded Hex:=");
-  for(cnt1=startcnt;cnt1<argc;cnt1++)
-  {
-    len=strlen(argv[cnt1]);
-    for(cnt2=0;cnt2<len;cnt2++)
-    {
-       c=argv[cnt1][cnt2];
-       if(c>='0'&&c<='9')
-	  c=c-'0';
-       if(c>='A'&&c<='F')
-	  c=c-'A'+10;
-       if(c>='a'&&c<='f')
-	  c=c-'a'+10;
-       switch(toggle)
-       {
-	  case 0:
-	     hex=c<<4;
-	     toggle=1;
-	  break;
-	  case 1:
-	     hex+=c;
-	     if(hex<32||hex>127)
-	     {
-		if(startcnt==1)
-		   printf("0x%02X ",(int)hex);
-		else
-		   printf(".");
-	     }
-	     else
-	     {
-	       printf("%c",hex);
-	       if(startcnt==1)
-		  printf(" ");
-	     }
-	     toggle=0;
-	  break;
-       }
-    }
-  }
-  printf("\n");
-}
-
-
-
-
-Stack tracing under VM
-----------------------
-A basic backtrace
------------------
-
-Here are the tricks I use 9 out of 10 times it works pretty well,
-
-When your backchain reaches a dead end
---------------------------------------
-This can happen when an exception happens in the kernel and the kernel is
-entered twice. If you reach the NULL pointer at the end of the back chain you
-should be able to sniff further back if you follow the following tricks.
-1) A kernel address should be easy to recognise since it is in
-primary space & the problem state bit isn't set & also
-The Hi bit of the address is set.
-2) Another backchain should also be easy to recognise since it is an 
-address pointing to another address approximately 100 bytes or 0x70 hex
-behind the current stackpointer.
-
-
-Here is some practice.
-boot the kernel & hit PA1 at some random time
-d g to display the gprs, this should display something like
-GPR  0 =  00000001  00156018  0014359C  00000000
-GPR  4 =  00000001  001B8888  000003E0  00000000
-GPR  8 =  00100080  00100084  00000000  000FE000
-GPR 12 =  00010400  8001B2DC  8001B36A  000FFED8
-Note that GPR14 is a return address but as we are real men we are going to
-trace the stack.
-display 0x40 bytes after the stack pointer.
-
-V000FFED8  000FFF38 8001B838 80014C8E 000FFF38
-V000FFEE8  00000000 00000000 000003E0 00000000
-V000FFEF8  00100080 00100084 00000000 000FE000
-V000FFF08  00010400 8001B2DC 8001B36A 000FFED8
-
-
-Ah now look at whats in sp+56 (sp+0x38) this is 8001B36A our saved r14 if
-you look above at our stackframe & also agrees with GPR14.
-
-now backchain 
-d 000FFF38.40
-we now are taking the contents of SP to get our first backchain.
-
-V000FFF38  000FFFA0 00000000 00014995 00147094
-V000FFF48  00147090 001470A0 000003E0 00000000
-V000FFF58  00100080 00100084 00000000 001BF1D0
-V000FFF68  00010400 800149BA 80014CA6 000FFF38
-
-This displays a 2nd return address of 80014CA6
-
-now do d 000FFFA0.40 for our 3rd backchain
-
-V000FFFA0  04B52002 0001107F 00000000 00000000
-V000FFFB0  00000000 00000000 FF000000 0001107F
-V000FFFC0  00000000 00000000 00000000 00000000
-V000FFFD0  00010400 80010802 8001085A 000FFFA0
-
-
-our 3rd return address is 8001085A
-
-as the 04B52002 looks suspiciously like rubbish it is fair to assume that the
-kernel entry routines for the sake of optimisation don't set up a backchain.
-
-now look at System.map to see if the addresses make any sense.
-
-grep -i 0001b3 System.map
-outputs among other things
-0001b304 T cpu_idle 
-so 8001B36A
-is cpu_idle+0x66 ( quiet the cpu is asleep, don't wake it )
-
-
-grep -i 00014 System.map 
-produces among other things
-00014a78 T start_kernel  
-so 0014CA6 is start_kernel+some hex number I can't add in my head.
-
-grep -i 00108 System.map 
-this produces
-00010800 T _stext
-so   8001085A is _stext+0x5a
-
-Congrats you've done your first backchain.
-
-
-
-s/390 & z/Architecture IO Overview
-==================================
-
-I am not going to give a course in 390 IO architecture as this would take me
-quite a while and I'm no expert. Instead I'll give a 390 IO architecture
-summary for Dummies. If you have the s/390 principles of operation available
-read this instead. If nothing else you may find a few useful keywords in here
-and be able to use them on a web search engine to find more useful information.
-
-Unlike other bus architectures modern 390 systems do their IO using mostly
-fibre optics and devices such as tapes and disks can be shared between several
-mainframes. Also S390 can support up to 65536 devices while a high end PC based
-system might be choking with around 64.
-
-Here is some of the common IO terminology:
-
-Subchannel:
-This is the logical number most IO commands use to talk to an IO device. There
-can be up to 0x10000 (65536) of these in a configuration, typically there are a
-few hundred. Under VM for simplicity they are allocated contiguously, however
-on the native hardware they are not. They typically stay consistent between
-boots provided no new hardware is inserted or removed.
-Under Linux for s390 we use these as IRQ's and also when issuing an IO command
-(CLEAR SUBCHANNEL, HALT SUBCHANNEL, MODIFY SUBCHANNEL, RESUME SUBCHANNEL,
-START SUBCHANNEL, STORE SUBCHANNEL and TEST SUBCHANNEL). We use this as the ID
-of the device we wish to talk to. The most important of these instructions are
-START SUBCHANNEL (to start IO), TEST SUBCHANNEL (to check whether the IO
-completed successfully) and HALT SUBCHANNEL (to kill IO). A subchannel can have
-up to 8 channel paths to a device, this offers redundancy if one is not
-available.
-
-Device Number:
-This number remains static and is closely tied to the hardware. There are 65536
-of these, made up of a CHPID (Channel Path ID, the most significant 8 bits) and
-another lsb 8 bits. These remain static even if more devices are inserted or
-removed from the hardware. There is a 1 to 1 mapping between subchannels and
-device numbers, provided devices aren't inserted or removed.
-
-Channel Control Words:
-CCWs are linked lists of instructions initially pointed to by an operation
-request block (ORB), which is initially given to Start Subchannel (SSCH)
-command along with the subchannel number for the IO subsystem to process
-while the CPU continues executing normal code.
-CCWs come in two flavours, Format 0 (24 bit for backward compatibility) and
-Format 1 (31 bit). These are typically used to issue read and write (and many
-other) instructions. They consist of a length field and an absolute address
-field.
-Each IO typically gets 1 or 2 interrupts, one for channel end (primary status)
-when the channel is idle, and the second for device end (secondary status).
-Sometimes you get both concurrently. You check how the IO went on by issuing a
-TEST SUBCHANNEL at each interrupt, from which you receive an Interruption
-response block (IRB). If you get channel and device end status in the IRB
-without channel checks etc. your IO probably went okay. If you didn't you
-probably need to examine the IRB, extended status word etc.
-If an error occurs, more sophisticated control units have a facility known as
-concurrent sense. This means that if an error occurs Extended sense information
-will be presented in the Extended status word in the IRB. If not you have to
-issue a subsequent SENSE CCW command after the test subchannel.
-
-
-TPI (Test pending interrupt) can also be used for polled IO, but in
-multitasking multiprocessor systems it isn't recommended except for
-checking special cases (i.e. non looping checks for pending IO etc.).
-
-Store Subchannel and Modify Subchannel can be used to examine and modify
-operating characteristics of a subchannel (e.g. channel paths).
-
-Other IO related Terms:
-Sysplex: S390's Clustering Technology
-QDIO: S390's new high speed IO architecture to support devices such as gigabit
-ethernet, this architecture is also designed to be forward compatible with
-upcoming 64 bit machines.
-
-
-General Concepts 
-
-Input Output Processors (IOP's) are responsible for communicating between
-the mainframe CPU's & the channel & relieve the mainframe CPU's from the
-burden of communicating with IO devices directly, this allows the CPU's to 
-concentrate on data processing. 
-
-IOP's can use one or more links ( known as channel paths ) to talk to each 
-IO device. It first checks for path availability & chooses an available one,
-then starts ( & sometimes terminates IO ).
-There are two types of channel path: ESCON & the Parallel IO interface.
-
-IO devices are attached to control units, control units provide the
-logic to interface the channel paths & channel path IO protocols to 
-the IO devices, they can be integrated with the devices or housed separately
-& often talk to several similar devices ( typical examples would be raid 
-controllers or a control unit which connects to 1000 3270 terminals ).
-
-
-    +---------------------------------------------------------------+
-    | +-----+ +-----+ +-----+ +-----+  +----------+  +----------+   |
-    | | CPU | | CPU | | CPU | | CPU |  |  Main    |  | Expanded |   |
-    | |     | |     | |     | |     |  |  Memory  |  |  Storage |   |
-    | +-----+ +-----+ +-----+ +-----+  +----------+  +----------+   | 
-    |---------------------------------------------------------------+
-    |   IOP        |      IOP      |       IOP                      |
-    |---------------------------------------------------------------
-    | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | 
-    ----------------------------------------------------------------
-         ||                                              ||
-         ||  Bus & Tag Channel Path                      || ESCON
-         ||  ======================                      || Channel
-         ||  ||                  ||                      || Path
-    +----------+               +----------+         +----------+
-    |          |               |          |         |          |
-    |    CU    |               |    CU    |         |    CU    |
-    |          |               |          |         |          |
-    +----------+               +----------+         +----------+
-       |      |                     |                |       |
-+----------+ +----------+      +----------+   +----------+ +----------+
-|I/O Device| |I/O Device|      |I/O Device|   |I/O Device| |I/O Device|
-+----------+ +----------+      +----------+   +----------+ +----------+
-  CPU = Central Processing Unit    
-  C = Channel                      
-  IOP = IP Processor               
-  CU = Control Unit
-
-The 390 IO systems come in 2 flavours the current 390 machines support both
-
-The Older 360 & 370 Interface,sometimes called the Parallel I/O interface,
-sometimes called Bus-and Tag & sometimes Original Equipment Manufacturers
-Interface (OEMI).
-
-This byte wide Parallel channel path/bus has parity & data on the "Bus" cable 
-and control lines on the "Tag" cable. These can operate in byte multiplex mode
-for sharing between several slow devices or burst mode and monopolize the
-channel for the whole burst. Up to 256 devices can be addressed on one of these
-cables. These cables are about one inch in diameter. The maximum unextended
-length supported by these cables is 125 Meters but this can be extended up to
-2km with a fibre optic channel extended such as a 3044. The maximum burst speed
-supported is 4.5 megabytes per second. However, some really old processors
-support only transfer rates of 3.0, 2.0 & 1.0 MB/sec.
-One of these paths can be daisy chained to up to 8 control units.
-
-
-ESCON if fibre optic it is also called FICON 
-Was introduced by IBM in 1990. Has 2 fibre optic cables and uses either leds or
-lasers for communication at a signaling rate of up to 200 megabits/sec. As
-10bits are transferred for every 8 bits info this drops to 160 megabits/sec
-and to 18.6 Megabytes/sec once control info and CRC are added. ESCON only
-operates in burst mode.
- 
-ESCONs typical max cable length is 3km for the led version and 20km for the
-laser version known as XDF (extended distance facility). This can be further
-extended by using an ESCON director which triples the above mentioned ranges.
-Unlike Bus & Tag as ESCON is serial it uses a packet switching architecture,
-the standard Bus & Tag control protocol is however present within the packets.
-Up to 256 devices can be attached to each control unit that uses one of these
-interfaces.
-
-Common 390 Devices include:
-Network adapters typically OSA2,3172's,2116's & OSA-E gigabit ethernet adapters,
-Consoles 3270 & 3215 (a teletype emulated under linux for a line mode console).
-DASD's direct access storage devices ( otherwise known as hard disks ).
-Tape Drives.
-CTC ( Channel to Channel Adapters ),
-ESCON or Parallel Cables used as a very high speed serial link
-between 2 machines.
-
-
-Debugging IO on s/390 & z/Architecture under VM
-===============================================
-
-Now we are ready to go on with IO tracing commands under VM
-
-A few self explanatory queries:
-Q OSA
-Q CTC
-Q DISK ( This command is CMS specific )
-Q DASD
-
-
-
-
-
-
-Q OSA on my machine returns
-OSA  7C08 ON OSA   7C08 SUBCHANNEL = 0000
-OSA  7C09 ON OSA   7C09 SUBCHANNEL = 0001
-OSA  7C14 ON OSA   7C14 SUBCHANNEL = 0002
-OSA  7C15 ON OSA   7C15 SUBCHANNEL = 0003
-
-If you have a guest with certain privileges you may be able to see devices
-which don't belong to you. To avoid this, add the option V.
-e.g.
-Q V OSA
-
-Now using the device numbers returned by this command we will
-Trace the io starting up on the first device 7c08 & 7c09
-In our simplest case we can trace the 
-start subchannels
-like TR SSCH 7C08-7C09
-or the halt subchannels
-or TR HSCH 7C08-7C09
-MSCH's ,STSCH's I think you can guess the rest
-
-A good trick is tracing all the IO's and CCWS and spooling them into the reader
-of another VM guest so he can ftp the logfile back to his own machine. I'll do
-a small bit of this and give you a look at the output.
-
-1) Spool stdout to VM reader
-SP PRT TO (another vm guest ) or * for the local vm guest
-2) Fill the reader with the trace
-TR IO 7c08-7c09 INST INT CCW PRT RUN
-3) Start up linux 
-i 00c  
-4) Finish the trace
-TR END
-5) close the reader
-C PRT
-6) list reader contents
-RDRLIST
-7) copy it to linux4's minidisk 
-RECEIVE / LOG TXT A1 ( replace
-8)
-filel & press F11 to look at it
-You should see something like:
-
-00020942' SSCH  B2334000    0048813C    CC 0    SCH 0000    DEV 7C08
-          CPA 000FFDF0   PARM 00E2C9C4    KEY 0  FPI C0  LPM 80
-          CCW    000FFDF0  E4200100 00487FE8   0000  E4240100 ........
-          IDAL                                      43D8AFE8
-          IDAL                                      0FB76000
-00020B0A'   I/O DEV 7C08 -> 000197BC'   SCH 0000   PARM 00E2C9C4
-00021628' TSCH  B2354000 >> 00488164    CC 0    SCH 0000    DEV 7C08
-          CCWA 000FFDF8   DEV STS 0C  SCH STS 00  CNT 00EC
-           KEY 0   FPI C0  CC 0   CTLS 4007
-00022238' STSCH B2344000 >> 00488108    CC 0    SCH 0000    DEV 7C08
-
-If you don't like messing up your readed ( because you possibly booted from it )
-you can alternatively spool it to another readers guest.
-
-
-Other common VM device related commands
----------------------------------------------
-These commands are listed only because they have
-been of use to me in the past & may be of use to
-you too. For more complete info on each of the commands
-use type HELP <command> from CMS.
-detaching devices
-DET <devno range>
-ATT <devno range> <guest> 
-attach a device to guest * for your own guest
-READY <devno> cause VM to issue a fake interrupt.
-
-The VARY command is normally only available to VM administrators.
-VARY ON PATH <path> TO <devno range>
-VARY OFF PATH <PATH> FROM <devno range>
-This is used to switch on or off channel paths to devices.
-
-Q CHPID <channel path ID>
-This displays state of devices using this channel path
-D SCHIB <subchannel>
-This displays the subchannel information SCHIB block for the device.
-this I believe is also only available to administrators.
-DEFINE CTC <devno>
-defines a virtual CTC channel to channel connection
-2 need to be defined on each guest for the CTC driver to use.
-COUPLE  devno userid remote devno
-Joins a local virtual device to a remote virtual device
-( commonly used for the CTC driver ).
-
-Building a VM ramdisk under CMS which linux can use
-def vfb-<blocksize> <subchannel> <number blocks>
-blocksize is commonly 4096 for linux.
-Formatting it
-format <subchannel> <driver letter e.g. x> (blksize <blocksize>
-
-Sharing a disk between multiple guests
-LINK userid devno1 devno2 mode password
-
-
-
-GDB on S390
-===========
-N.B. if compiling for debugging gdb works better without optimisation 
-( see Compiling programs for debugging )
-
-invocation
-----------
-gdb <victim program> <optional corefile>
-
-Online help
------------
-help: gives help on commands
-e.g.
-help
-help display
-Note gdb's online help is very good use it.
-
-
-Assembly
---------
-info registers: displays registers other than floating point.
-info all-registers: displays floating points as well.
-disassemble: disassembles
-e.g.
-disassemble without parameters will disassemble the current function
-disassemble $pc $pc+10 
-
-Viewing & modifying variables
------------------------------
-print or p: displays variable or register
-e.g. p/x $sp will display the stack pointer
-
-display: prints variable or register each time program stops
-e.g.
-display/x $pc will display the program counter
-display argc
-
-undisplay : undo's display's
-
-info breakpoints: shows all current breakpoints
-
-info stack: shows stack back trace (if this doesn't work too well, I'll show
-you the stacktrace by hand below).
-
-info locals: displays local variables.
-
-info args: display current procedure arguments.
-
-set args: will set argc & argv each time the victim program is invoked.
-
-set <variable>=value
-set argc=100
-set $pc=0
-
-
-
-Modifying execution
--------------------
-step: steps n lines of sourcecode
-step steps 1 line.
-step 100 steps 100 lines of code.
-
-next: like step except this will not step into subroutines
-
-stepi: steps a single machine code instruction.
-e.g. stepi 100
-
-nexti: steps a single machine code instruction but will not step into
-subroutines.
-
-finish: will run until exit of the current routine
-
-run: (re)starts a program
-
-cont: continues a program
-
-quit: exits gdb.
-
-
-breakpoints
-------------
-
-break
-sets a breakpoint
-e.g.
-
-break main
-
-break *$pc
-
-break *0x400618
-
-Here's a really useful one for large programs
-rbr
-Set a breakpoint for all functions matching REGEXP
-e.g.
-rbr 390
-will set a breakpoint with all functions with 390 in their name.
-
-info breakpoints
-lists all breakpoints
-
-delete: delete breakpoint by number or delete them all
-e.g.
-delete 1 will delete the first breakpoint
-delete will delete them all
-
-watch: This will set a watchpoint ( usually hardware assisted ),
-This will watch a variable till it changes
-e.g.
-watch cnt, will watch the variable cnt till it changes.
-As an aside unfortunately gdb's, architecture independent watchpoint code
-is inconsistent & not very good, watchpoints usually work but not always.
-
-info watchpoints: Display currently active watchpoints
-
-condition: ( another useful one )
-Specify breakpoint number N to break only if COND is true.
-Usage is `condition N COND', where N is an integer and COND is an
-expression to be evaluated whenever breakpoint N is reached.
-
-
-
-User defined functions/macros
------------------------------
-define: ( Note this is very very useful,simple & powerful )
-usage define <name> <list of commands> end
-
-examples which you should consider putting into .gdbinit in your home directory
-define d
-stepi
-disassemble $pc $pc+10
-end
-
-define e
-nexti
-disassemble $pc $pc+10
-end
-
-
-Other hard to classify stuff
-----------------------------
-signal n:
-sends the victim program a signal.
-e.g. signal 3 will send a SIGQUIT.
-
-info signals:
-what gdb does when the victim receives certain signals.
-
-list:
-e.g.
-list lists current function source
-list 1,10 list first 10 lines of current file.
-list test.c:1,10
-
-
-directory:
-Adds directories to be searched for source if gdb cannot find the source.
-(note it is a bit sensitive about slashes)
-e.g. To add the root of the filesystem to the searchpath do
-directory //
-
-
-call <function>
-This calls a function in the victim program, this is pretty powerful
-e.g.
-(gdb) call printf("hello world")
-outputs:
-$1 = 11 
-
-You might now be thinking that the line above didn't work, something extra had
-to be done.
-(gdb) call fflush(stdout)
-hello world$2 = 0
-As an aside the debugger also calls malloc & free under the hood 
-to make space for the "hello world" string.
-
-
-
-hints
------
-1) command completion works just like bash 
-( if you are a bad typist like me this really helps )
-e.g. hit br <TAB> & cursor up & down :-).
-
-2) if you have a debugging problem that takes a few steps to recreate
-put the steps into a file called .gdbinit in your current working directory
-if you have defined a few extra useful user defined commands put these in 
-your home directory & they will be read each time gdb is launched.
-
-A typical .gdbinit file might be.
-break main
-run
-break runtime_exception
-cont 
-
-
-stack chaining in gdb by hand
------------------------------
-This is done using a the same trick described for VM 
-p/x (*($sp+56))&0x7fffffff get the first backchain.
-
-For z/Architecture
-Replace 56 with 112 & ignore the &0x7fffffff
-in the macros below & do nasty casts to longs like the following
-as gdb unfortunately deals with printed arguments as ints which
-messes up everything.
-i.e. here is a 3rd backchain dereference
-p/x *(long *)(***(long ***)$sp+112)
-
-
-this outputs 
-$5 = 0x528f18 
-on my machine.
-Now you can use 
-info symbol (*($sp+56))&0x7fffffff 
-you might see something like.
-rl_getc + 36 in section .text  telling you what is located at address 0x528f18
-Now do.
-p/x (*(*$sp+56))&0x7fffffff 
-This outputs
-$6 = 0x528ed0
-Now do.
-info symbol (*(*$sp+56))&0x7fffffff
-rl_read_key + 180 in section .text
-now do
-p/x (*(**$sp+56))&0x7fffffff
-& so on.
-
-Disassembling instructions without debug info
----------------------------------------------
-gdb typically complains if there is a lack of debugging
-symbols in the disassemble command with 
-"No function contains specified address." To get around
-this do 
-x/<number lines to disassemble>xi <address>
-e.g.
-x/20xi 0x400730
-
-
-
-Note: Remember gdb has history just like bash you don't need to retype the
-whole line just use the up & down arrows.
-
-
-
-For more info
--------------
-From your linuxbox do 
-man gdb or info gdb.
-
-core dumps
-----------
-What a core dump ?,
-A core dump is a file generated by the kernel (if allowed) which contains the
-registers and all active pages of the program which has crashed.
-From this file gdb will allow you to look at the registers, stack trace and
-memory of the program as if it just crashed on your system. It is usually
-called core and created in the current working directory.
-This is very useful in that a customer can mail a core dump to a technical
-support department and the technical support department can reconstruct what
-happened. Provided they have an identical copy of this program with debugging
-symbols compiled in and the source base of this build is available.
-In short it is far more useful than something like a crash log could ever hope
-to be.
-
-Why have I never seen one ?.
-Probably because you haven't used the command 
-ulimit -c unlimited in bash
-to allow core dumps, now do 
-ulimit -a 
-to verify that the limit was accepted.
-
-A sample core dump
-To create this I'm going to do
-ulimit -c unlimited
-gdb 
-to launch gdb (my victim app. ) now be bad & do the following from another 
-telnet/xterm session to the same machine
-ps -aux | grep gdb
-kill -SIGSEGV <gdb's pid>
-or alternatively use killall -SIGSEGV gdb if you have the killall command.
-Now look at the core dump.
-./gdb core
-Displays the following
-GNU gdb 4.18
-Copyright 1998 Free Software Foundation, Inc.
-GDB is free software, covered by the GNU General Public License, and you are
-welcome to change it and/or distribute copies of it under certain conditions.
-Type "show copying" to see the conditions.
-There is absolutely no warranty for GDB.  Type "show warranty" for details.
-This GDB was configured as "s390-ibm-linux"...
-Core was generated by `./gdb'.
-Program terminated with signal 11, Segmentation fault.
-Reading symbols from /usr/lib/libncurses.so.4...done.
-Reading symbols from /lib/libm.so.6...done.
-Reading symbols from /lib/libc.so.6...done.
-Reading symbols from /lib/ld-linux.so.2...done.
-#0  0x40126d1a in read () from /lib/libc.so.6
-Setting up the environment for debugging gdb.
-Breakpoint 1 at 0x4dc6f8: file utils.c, line 471.
-Breakpoint 2 at 0x4d87a4: file top.c, line 2609.
-(top-gdb) info stack
-#0  0x40126d1a in read () from /lib/libc.so.6
-#1  0x528f26 in rl_getc (stream=0x7ffffde8) at input.c:402
-#2  0x528ed0 in rl_read_key () at input.c:381
-#3  0x5167e6 in readline_internal_char () at readline.c:454
-#4  0x5168ee in readline_internal_charloop () at readline.c:507
-#5  0x51692c in readline_internal () at readline.c:521
-#6  0x5164fe in readline (prompt=0x7ffff810)
-    at readline.c:349
-#7  0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1,
-    annotation_suffix=0x4d6b44 "prompt") at top.c:2091
-#8  0x4d6cf0 in command_loop () at top.c:1345
-#9  0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635
-
-
-LDD
-===
-This is a program which lists the shared libraries which a library needs,
-Note you also get the relocations of the shared library text segments which
-help when using objdump --source.
-e.g.
- ldd ./gdb
-outputs
-libncurses.so.4 => /usr/lib/libncurses.so.4 (0x40018000)
-libm.so.6 => /lib/libm.so.6 (0x4005e000)
-libc.so.6 => /lib/libc.so.6 (0x40084000)
-/lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000)
-
-
-Debugging shared libraries
-==========================
-Most programs use shared libraries, however it can be very painful
-when you single step instruction into a function like printf for the 
-first time & you end up in functions like _dl_runtime_resolve this is
-the ld.so doing lazy binding, lazy binding is a concept in ELF where 
-shared library functions are not loaded into memory unless they are 
-actually used, great for saving memory but a pain to debug.
-To get around this either relink the program -static or exit gdb type 
-export LD_BIND_NOW=true this will stop lazy binding & restart the gdb'ing 
-the program in question.
- 
-
-
-Debugging modules
-=================
-As modules are dynamically loaded into the kernel their address can be
-anywhere to get around this use the -m option with insmod to emit a load
-map which can be piped into a file if required.
-
-The proc file system
-====================
-What is it ?.
-It is a filesystem created by the kernel with files which are created on demand
-by the kernel if read, or can be used to modify kernel parameters,
-it is a powerful concept.
-
-e.g.
-
-cat /proc/sys/net/ipv4/ip_forward 
-On my machine outputs 
-0 
-telling me ip_forwarding is not on to switch it on I can do
-echo 1 >  /proc/sys/net/ipv4/ip_forward
-cat it again
-cat /proc/sys/net/ipv4/ip_forward 
-On my machine now outputs
-1
-IP forwarding is on.
-There is a lot of useful info in here best found by going in and having a look
-around, so I'll take you through some entries I consider important.
-
-All the processes running on the machine have their own entry defined by
-/proc/<pid>
-So lets have a look at the init process
-cd /proc/1
-
-cat cmdline
-emits
-init [2]
-
-cd /proc/1/fd
-This contains numerical entries of all the open files,
-some of these you can cat e.g. stdout (2)
-
-cat /proc/29/maps
-on my machine emits
-
-00400000-00478000 r-xp 00000000 5f:00 4103       /bin/bash
-00478000-0047e000 rw-p 00077000 5f:00 4103       /bin/bash
-0047e000-00492000 rwxp 00000000 00:00 0
-40000000-40015000 r-xp 00000000 5f:00 14382      /lib/ld-2.1.2.so
-40015000-40016000 rw-p 00014000 5f:00 14382      /lib/ld-2.1.2.so
-40016000-40017000 rwxp 00000000 00:00 0
-40017000-40018000 rw-p 00000000 00:00 0
-40018000-4001b000 r-xp 00000000 5f:00 14435      /lib/libtermcap.so.2.0.8
-4001b000-4001c000 rw-p 00002000 5f:00 14435      /lib/libtermcap.so.2.0.8
-4001c000-4010d000 r-xp 00000000 5f:00 14387      /lib/libc-2.1.2.so
-4010d000-40111000 rw-p 000f0000 5f:00 14387      /lib/libc-2.1.2.so
-40111000-40114000 rw-p 00000000 00:00 0
-40114000-4011e000 r-xp 00000000 5f:00 14408      /lib/libnss_files-2.1.2.so
-4011e000-4011f000 rw-p 00009000 5f:00 14408      /lib/libnss_files-2.1.2.so
-7fffd000-80000000 rwxp ffffe000 00:00 0
-
-
-Showing us the shared libraries init uses where they are in memory
-& memory access permissions for each virtual memory area.
-
-/proc/1/cwd is a softlink to the current working directory.
-/proc/1/root is the root of the filesystem for this process. 
-
-/proc/1/mem is the current running processes memory which you
-can read & write to like a file.
-strace uses this sometimes as it is a bit faster than the
-rather inefficient ptrace interface for peeking at DATA.
-
-
-cat status 
-
-Name:   init
-State:  S (sleeping)
-Pid:    1
-PPid:   0
-Uid:    0       0       0       0
-Gid:    0       0       0       0
-Groups:
-VmSize:      408 kB
-VmLck:         0 kB
-VmRSS:       208 kB
-VmData:       24 kB
-VmStk:         8 kB
-VmExe:       368 kB
-VmLib:         0 kB
-SigPnd: 0000000000000000
-SigBlk: 0000000000000000
-SigIgn: 7fffffffd7f0d8fc
-SigCgt: 00000000280b2603
-CapInh: 00000000fffffeff
-CapPrm: 00000000ffffffff
-CapEff: 00000000fffffeff
-
-User PSW:    070de000 80414146
-task: 004b6000 tss: 004b62d8 ksp: 004b7ca8 pt_regs: 004b7f68
-User GPRS:
-00000400  00000000  0000000b  7ffffa90
-00000000  00000000  00000000  0045d9f4
-0045cafc  7ffffa90  7fffff18  0045cb08
-00010400  804039e8  80403af8  7ffff8b0
-User ACRS:
-00000000  00000000  00000000  00000000
-00000001  00000000  00000000  00000000
-00000000  00000000  00000000  00000000
-00000000  00000000  00000000  00000000
-Kernel BackChain  CallChain    BackChain  CallChain
-       004b7ca8   8002bd0c     004b7d18   8002b92c
-       004b7db8   8005cd50     004b7e38   8005d12a
-       004b7f08   80019114                     
-Showing among other things memory usage & status of some signals &
-the processes'es registers from the kernel task_structure
-as well as a backchain which may be useful if a process crashes
-in the kernel for some unknown reason.
-
-Some driver debugging techniques
-================================
-debug feature
--------------
-Some of our drivers now support a "debug feature" in
-/proc/s390dbf see s390dbf.txt in the linux/Documentation directory
-for more info.
-e.g. 
-to switch on the lcs "debug feature"
-echo 5 > /proc/s390dbf/lcs/level
-& then after the error occurred.
-cat /proc/s390dbf/lcs/sprintf >/logfile
-the logfile now contains some information which may help
-tech support resolve a problem in the field.
-
-
-
-high level debugging network drivers
-------------------------------------
-ifconfig is a quite useful command
-it gives the current state of network drivers.
-
-If you suspect your network device driver is dead
-one way to check is type 
-ifconfig <network device> 
-e.g. tr0
-You should see something like
-tr0       Link encap:16/4 Mbps Token Ring (New)  HWaddr 00:04:AC:20:8E:48
-          inet addr:9.164.185.132  Bcast:9.164.191.255  Mask:255.255.224.0
-          UP BROADCAST RUNNING MULTICAST  MTU:2000  Metric:1
-          RX packets:246134 errors:0 dropped:0 overruns:0 frame:0
-          TX packets:5 errors:0 dropped:0 overruns:0 carrier:0
-          collisions:0 txqueuelen:100
-
-if the device doesn't say up
-try
-/etc/rc.d/init.d/network start 
-( this starts the network stack & hopefully calls ifconfig tr0 up ).
-ifconfig looks at the output of /proc/net/dev and presents it in a more
-presentable form.
-Now ping the device from a machine in the same subnet.
-if the RX packets count & TX packets counts don't increment you probably
-have problems.
-next 
-cat /proc/net/arp
-Do you see any hardware addresses in the cache if not you may have problems.
-Next try
-ping -c 5 <broadcast_addr> i.e. the Bcast field above in the output of
-ifconfig. Do you see any replies from machines other than the local machine
-if not you may have problems. also if the TX packets count in ifconfig
-hasn't incremented either you have serious problems in your driver 
-(e.g. the txbusy field of the network device being stuck on ) 
-or you may have multiple network devices connected.
-
-
-chandev
--------
-There is a new device layer for channel devices, some
-drivers e.g. lcs are registered with this layer.
-If the device uses the channel device layer you'll be
-able to find what interrupts it uses & the current state 
-of the device.
-See the manpage chandev.8 &type cat /proc/chandev for more info.
-
-
-SysRq
-=====
-This is now supported by linux for s/390 & z/Architecture.
-To enable it do compile the kernel with 
-Kernel Hacking -> Magic SysRq Key Enabled
-echo "1" > /proc/sys/kernel/sysrq
-also type
-echo "8" >/proc/sys/kernel/printk
-To make printk output go to console.
-On 390 all commands are prefixed with
-^-
-e.g.
-^-t will show tasks.
-^-? or some unknown command will display help.
-The sysrq key reading is very picky ( I have to type the keys in an
- xterm session & paste them  into the x3270 console )
-& it may be wise to predefine the keys as described in the VM hints above
-
-This is particularly useful for syncing disks unmounting & rebooting
-if the machine gets partially hung.
-
-Read Documentation/admin-guide/sysrq.rst for more info
-
-References:
-===========
-Enterprise Systems Architecture Reference Summary
-Enterprise Systems Architecture Principles of Operation
-Hartmut Penners s390 stack frame sheet.
-IBM Mainframe Channel Attachment a technology brief from a CISCO webpage
-Various bits of man & info pages of Linux.
-Linux & GDB source.
-Various info & man pages.
-CMS Help on tracing commands.
-Linux for s/390 Elf Application Binary Interface
-Linux for z/Series Elf Application Binary Interface ( Both Highly Recommended )
-z/Architecture Principles of Operation SA22-7832-00
-Enterprise Systems Architecture/390 Reference Summary SA22-7209-01 & the
-Enterprise Systems Architecture/390 Principles of Operation SA22-7201-05
-
-Special Thanks
-==============
-Special thanks to Neale Ferguson who maintains a much
-prettier HTML version of this page at
-http://linuxvm.org/penguinvm/
-Bob Grainger Stefan Bader & others for reporting bugs
diff --git a/Documentation/s390/cds.rst b/Documentation/s390/cds.rst
new file mode 100644
index 000000000000..7006d8209d2e
--- /dev/null
+++ b/Documentation/s390/cds.rst
@@ -0,0 +1,530 @@
+===========================
+Linux for S/390 and zSeries
+===========================
+
+Common Device Support (CDS)
+Device Driver I/O Support Routines
+
+Authors:
+	- Ingo Adlung
+	- Cornelia Huck
+
+Copyright, IBM Corp. 1999-2002
+
+Introduction
+============
+
+This document describes the common device support routines for Linux/390.
+Different than other hardware architectures, ESA/390 has defined a unified
+I/O access method. This gives relief to the device drivers as they don't
+have to deal with different bus types, polling versus interrupt
+processing, shared versus non-shared interrupt processing, DMA versus port
+I/O (PIO), and other hardware features more. However, this implies that
+either every single device driver needs to implement the hardware I/O
+attachment functionality itself, or the operating system provides for a
+unified method to access the hardware, providing all the functionality that
+every single device driver would have to provide itself.
+
+The document does not intend to explain the ESA/390 hardware architecture in
+every detail.This information can be obtained from the ESA/390 Principles of
+Operation manual (IBM Form. No. SA22-7201).
+
+In order to build common device support for ESA/390 I/O interfaces, a
+functional layer was introduced that provides generic I/O access methods to
+the hardware.
+
+The common device support layer comprises the I/O support routines defined
+below. Some of them implement common Linux device driver interfaces, while
+some of them are ESA/390 platform specific.
+
+Note:
+  In order to write a driver for S/390, you also need to look into the interface
+  described in Documentation/s390/driver-model.rst.
+
+Note for porting drivers from 2.4:
+
+The major changes are:
+
+* The functions use a ccw_device instead of an irq (subchannel).
+* All drivers must define a ccw_driver (see driver-model.txt) and the associated
+  functions.
+* request_irq() and free_irq() are no longer done by the driver.
+* The oper_handler is (kindof) replaced by the probe() and set_online() functions
+  of the ccw_driver.
+* The not_oper_handler is (kindof) replaced by the remove() and set_offline()
+  functions of the ccw_driver.
+* The channel device layer is gone.
+* The interrupt handlers must be adapted to use a ccw_device as argument.
+  Moreover, they don't return a devstat, but an irb.
+* Before initiating an io, the options must be set via ccw_device_set_options().
+* Instead of calling read_dev_chars()/read_conf_data(), the driver issues
+  the channel program and handles the interrupt itself.
+
+ccw_device_get_ciw()
+   get commands from extended sense data.
+
+ccw_device_start(), ccw_device_start_timeout(), ccw_device_start_key(), ccw_device_start_key_timeout()
+   initiate an I/O request.
+
+ccw_device_resume()
+   resume channel program execution.
+
+ccw_device_halt()
+   terminate the current I/O request processed on the device.
+
+do_IRQ()
+   generic interrupt routine. This function is called by the interrupt entry
+   routine whenever an I/O interrupt is presented to the system. The do_IRQ()
+   routine determines the interrupt status and calls the device specific
+   interrupt handler according to the rules (flags) defined during I/O request
+   initiation with do_IO().
+
+The next chapters describe the functions other than do_IRQ() in more details.
+The do_IRQ() interface is not described, as it is called from the Linux/390
+first level interrupt handler only and does not comprise a device driver
+callable interface. Instead, the functional description of do_IO() also
+describes the input to the device specific interrupt handler.
+
+Note:
+	All explanations apply also to the 64 bit architecture s390x.
+
+
+Common Device Support (CDS) for Linux/390 Device Drivers
+========================================================
+
+General Information
+-------------------
+
+The following chapters describe the I/O related interface routines the
+Linux/390 common device support (CDS) provides to allow for device specific
+driver implementations on the IBM ESA/390 hardware platform. Those interfaces
+intend to provide the functionality required by every device driver
+implementation to allow to drive a specific hardware device on the ESA/390
+platform. Some of the interface routines are specific to Linux/390 and some
+of them can be found on other Linux platforms implementations too.
+Miscellaneous function prototypes, data declarations, and macro definitions
+can be found in the architecture specific C header file
+linux/arch/s390/include/asm/irq.h.
+
+Overview of CDS interface concepts
+----------------------------------
+
+Different to other hardware platforms, the ESA/390 architecture doesn't define
+interrupt lines managed by a specific interrupt controller and bus systems
+that may or may not allow for shared interrupts, DMA processing, etc.. Instead,
+the ESA/390 architecture has implemented a so called channel subsystem, that
+provides a unified view of the devices physically attached to the systems.
+Though the ESA/390 hardware platform knows about a huge variety of different
+peripheral attachments like disk devices (aka. DASDs), tapes, communication
+controllers, etc. they can all be accessed by a well defined access method and
+they are presenting I/O completion a unified way : I/O interruptions. Every
+single device is uniquely identified to the system by a so called subchannel,
+where the ESA/390 architecture allows for 64k devices be attached.
+
+Linux, however, was first built on the Intel PC architecture, with its two
+cascaded 8259 programmable interrupt controllers (PICs), that allow for a
+maximum of 15 different interrupt lines. All devices attached to such a system
+share those 15 interrupt levels. Devices attached to the ISA bus system must
+not share interrupt levels (aka. IRQs), as the ISA bus bases on edge triggered
+interrupts. MCA, EISA, PCI and other bus systems base on level triggered
+interrupts, and therewith allow for shared IRQs. However, if multiple devices
+present their hardware status by the same (shared) IRQ, the operating system
+has to call every single device driver registered on this IRQ in order to
+determine the device driver owning the device that raised the interrupt.
+
+Up to kernel 2.4, Linux/390 used to provide interfaces via the IRQ (subchannel).
+For internal use of the common I/O layer, these are still there. However,
+device drivers should use the new calling interface via the ccw_device only.
+
+During its startup the Linux/390 system checks for peripheral devices. Each
+of those devices is uniquely defined by a so called subchannel by the ESA/390
+channel subsystem. While the subchannel numbers are system generated, each
+subchannel also takes a user defined attribute, the so called device number.
+Both subchannel number and device number cannot exceed 65535. During sysfs
+initialisation, the information about control unit type and device types that
+imply specific I/O commands (channel command words - CCWs) in order to operate
+the device are gathered. Device drivers can retrieve this set of hardware
+information during their initialization step to recognize the devices they
+support using the information saved in the struct ccw_device given to them.
+This methods implies that Linux/390 doesn't require to probe for free (not
+armed) interrupt request lines (IRQs) to drive its devices with. Where
+applicable, the device drivers can use issue the READ DEVICE CHARACTERISTICS
+ccw to retrieve device characteristics in its online routine.
+
+In order to allow for easy I/O initiation the CDS layer provides a
+ccw_device_start() interface that takes a device specific channel program (one
+or more CCWs) as input sets up the required architecture specific control blocks
+and initiates an I/O request on behalf of the device driver. The
+ccw_device_start() routine allows to specify whether it expects the CDS layer
+to notify the device driver for every interrupt it observes, or with final status
+only. See ccw_device_start() for more details. A device driver must never issue
+ESA/390 I/O commands itself, but must use the Linux/390 CDS interfaces instead.
+
+For long running I/O request to be canceled, the CDS layer provides the
+ccw_device_halt() function. Some devices require to initially issue a HALT
+SUBCHANNEL (HSCH) command without having pending I/O requests. This function is
+also covered by ccw_device_halt().
+
+
+get_ciw() - get command information word
+
+This call enables a device driver to get information about supported commands
+from the extended SenseID data.
+
+::
+
+  struct ciw *
+  ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd);
+
+====  ========================================================
+cdev  The ccw_device for which the command is to be retrieved.
+cmd   The command type to be retrieved.
+====  ========================================================
+
+ccw_device_get_ciw() returns:
+
+=====  ================================================================
+ NULL  No extended data available, invalid device or command not found.
+!NULL  The command requested.
+=====  ================================================================
+
+::
+
+  ccw_device_start() - Initiate I/O Request
+
+The ccw_device_start() routines is the I/O request front-end processor. All
+device driver I/O requests must be issued using this routine. A device driver
+must not issue ESA/390 I/O commands itself. Instead the ccw_device_start()
+routine provides all interfaces required to drive arbitrary devices.
+
+This description also covers the status information passed to the device
+driver's interrupt handler as this is related to the rules (flags) defined
+with the associated I/O request when calling ccw_device_start().
+
+::
+
+  int ccw_device_start(struct ccw_device *cdev,
+		       struct ccw1 *cpa,
+		       unsigned long intparm,
+		       __u8 lpm,
+		       unsigned long flags);
+  int ccw_device_start_timeout(struct ccw_device *cdev,
+			       struct ccw1 *cpa,
+			       unsigned long intparm,
+			       __u8 lpm,
+			       unsigned long flags,
+			       int expires);
+  int ccw_device_start_key(struct ccw_device *cdev,
+			   struct ccw1 *cpa,
+			   unsigned long intparm,
+			   __u8 lpm,
+			   __u8 key,
+			   unsigned long flags);
+  int ccw_device_start_key_timeout(struct ccw_device *cdev,
+				   struct ccw1 *cpa,
+				   unsigned long intparm,
+				   __u8 lpm,
+				   __u8 key,
+				   unsigned long flags,
+				   int expires);
+
+============= =============================================================
+cdev          ccw_device the I/O is destined for
+cpa           logical start address of channel program
+user_intparm  user specific interrupt information; will be presented
+	      back to the device driver's interrupt handler. Allows a
+	      device driver to associate the interrupt with a
+	      particular I/O request.
+lpm           defines the channel path to be used for a specific I/O
+	      request. A value of 0 will make cio use the opm.
+key           the storage key to use for the I/O (useful for operating on a
+	      storage with a storage key != default key)
+flag          defines the action to be performed for I/O processing
+expires       timeout value in jiffies. The common I/O layer will terminate
+	      the running program after this and call the interrupt handler
+	      with ERR_PTR(-ETIMEDOUT) as irb.
+============= =============================================================
+
+Possible flag values are:
+
+========================= =============================================
+DOIO_ALLOW_SUSPEND        channel program may become suspended
+DOIO_DENY_PREFETCH        don't allow for CCW prefetch; usually
+			  this implies the channel program might
+			  become modified
+DOIO_SUPPRESS_INTER       don't call the handler on intermediate status
+========================= =============================================
+
+The cpa parameter points to the first format 1 CCW of a channel program::
+
+  struct ccw1 {
+	__u8  cmd_code;/* command code */
+	__u8  flags;   /* flags, like IDA addressing, etc. */
+	__u16 count;   /* byte count */
+	__u32 cda;     /* data address */
+  } __attribute__ ((packed,aligned(8)));
+
+with the following CCW flags values defined:
+
+=================== =========================
+CCW_FLAG_DC         data chaining
+CCW_FLAG_CC         command chaining
+CCW_FLAG_SLI        suppress incorrect length
+CCW_FLAG_SKIP       skip
+CCW_FLAG_PCI        PCI
+CCW_FLAG_IDA        indirect addressing
+CCW_FLAG_SUSPEND    suspend
+=================== =========================
+
+
+Via ccw_device_set_options(), the device driver may specify the following
+options for the device:
+
+========================= ======================================
+DOIO_EARLY_NOTIFICATION   allow for early interrupt notification
+DOIO_REPORT_ALL           report all interrupt conditions
+========================= ======================================
+
+
+The ccw_device_start() function returns:
+
+======== ======================================================================
+      0  successful completion or request successfully initiated
+ -EBUSY  The device is currently processing a previous I/O request, or there is
+	 a status pending at the device.
+-ENODEV  cdev is invalid, the device is not operational or the ccw_device is
+	 not online.
+======== ======================================================================
+
+When the I/O request completes, the CDS first level interrupt handler will
+accumulate the status in a struct irb and then call the device interrupt handler.
+The intparm field will contain the value the device driver has associated with a
+particular I/O request. If a pending device status was recognized,
+intparm will be set to 0 (zero). This may happen during I/O initiation or delayed
+by an alert status notification. In any case this status is not related to the
+current (last) I/O request. In case of a delayed status notification no special
+interrupt will be presented to indicate I/O completion as the I/O request was
+never started, even though ccw_device_start() returned with successful completion.
+
+The irb may contain an error value, and the device driver should check for this
+first:
+
+========== =================================================================
+-ETIMEDOUT the common I/O layer terminated the request after the specified
+	   timeout value
+-EIO       the common I/O layer terminated the request due to an error state
+========== =================================================================
+
+If the concurrent sense flag in the extended status word (esw) in the irb is
+set, the field erw.scnt in the esw describes the number of device specific
+sense bytes available in the extended control word irb->scsw.ecw[]. No device
+sensing by the device driver itself is required.
+
+The device interrupt handler can use the following definitions to investigate
+the primary unit check source coded in sense byte 0 :
+
+======================= ====
+SNS0_CMD_REJECT         0x80
+SNS0_INTERVENTION_REQ   0x40
+SNS0_BUS_OUT_CHECK      0x20
+SNS0_EQUIPMENT_CHECK    0x10
+SNS0_DATA_CHECK         0x08
+SNS0_OVERRUN            0x04
+SNS0_INCOMPL_DOMAIN     0x01
+======================= ====
+
+Depending on the device status, multiple of those values may be set together.
+Please refer to the device specific documentation for details.
+
+The irb->scsw.cstat field provides the (accumulated) subchannel status :
+
+========================= ============================
+SCHN_STAT_PCI             program controlled interrupt
+SCHN_STAT_INCORR_LEN      incorrect length
+SCHN_STAT_PROG_CHECK      program check
+SCHN_STAT_PROT_CHECK      protection check
+SCHN_STAT_CHN_DATA_CHK    channel data check
+SCHN_STAT_CHN_CTRL_CHK    channel control check
+SCHN_STAT_INTF_CTRL_CHK   interface control check
+SCHN_STAT_CHAIN_CHECK     chaining check
+========================= ============================
+
+The irb->scsw.dstat field provides the (accumulated) device status :
+
+===================== =================
+DEV_STAT_ATTENTION    attention
+DEV_STAT_STAT_MOD     status modifier
+DEV_STAT_CU_END       control unit end
+DEV_STAT_BUSY         busy
+DEV_STAT_CHN_END      channel end
+DEV_STAT_DEV_END      device end
+DEV_STAT_UNIT_CHECK   unit check
+DEV_STAT_UNIT_EXCEP   unit exception
+===================== =================
+
+Please see the ESA/390 Principles of Operation manual for details on the
+individual flag meanings.
+
+Usage Notes:
+
+ccw_device_start() must be called disabled and with the ccw device lock held.
+
+The device driver is allowed to issue the next ccw_device_start() call from
+within its interrupt handler already. It is not required to schedule a
+bottom-half, unless a non deterministically long running error recovery procedure
+or similar needs to be scheduled. During I/O processing the Linux/390 generic
+I/O device driver support has already obtained the IRQ lock, i.e. the handler
+must not try to obtain it again when calling ccw_device_start() or we end in a
+deadlock situation!
+
+If a device driver relies on an I/O request to be completed prior to start the
+next it can reduce I/O processing overhead by chaining a NoOp I/O command
+CCW_CMD_NOOP to the end of the submitted CCW chain. This will force Channel-End
+and Device-End status to be presented together, with a single interrupt.
+However, this should be used with care as it implies the channel will remain
+busy, not being able to process I/O requests for other devices on the same
+channel. Therefore e.g. read commands should never use this technique, as the
+result will be presented by a single interrupt anyway.
+
+In order to minimize I/O overhead, a device driver should use the
+DOIO_REPORT_ALL  only if the device can report intermediate interrupt
+information prior to device-end the device driver urgently relies on. In this
+case all I/O interruptions are presented to the device driver until final
+status is recognized.
+
+If a device is able to recover from asynchronously presented I/O errors, it can
+perform overlapping I/O using the DOIO_EARLY_NOTIFICATION flag. While some
+devices always report channel-end and device-end together, with a single
+interrupt, others present primary status (channel-end) when the channel is
+ready for the next I/O request and secondary status (device-end) when the data
+transmission has been completed at the device.
+
+Above flag allows to exploit this feature, e.g. for communication devices that
+can handle lost data on the network to allow for enhanced I/O processing.
+
+Unless the channel subsystem at any time presents a secondary status interrupt,
+exploiting this feature will cause only primary status interrupts to be
+presented to the device driver while overlapping I/O is performed. When a
+secondary status without error (alert status) is presented, this indicates
+successful completion for all overlapping ccw_device_start() requests that have
+been issued since the last secondary (final) status.
+
+Channel programs that intend to set the suspend flag on a channel command word
+(CCW)  must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the
+suspend flag will cause a channel program check. At the time the channel program
+becomes suspended an intermediate interrupt will be generated by the channel
+subsystem.
+
+ccw_device_resume() - Resume Channel Program Execution
+
+If a device driver chooses to suspend the current channel program execution by
+setting the CCW suspend flag on a particular CCW, the channel program execution
+is suspended. In order to resume channel program execution the CIO layer
+provides the ccw_device_resume() routine.
+
+::
+
+  int ccw_device_resume(struct ccw_device *cdev);
+
+====  ================================================
+cdev  ccw_device the resume operation is requested for
+====  ================================================
+
+The ccw_device_resume() function returns:
+
+=========   ==============================================
+	0   suspended channel program is resumed
+   -EBUSY   status pending
+  -ENODEV   cdev invalid or not-operational subchannel
+  -EINVAL   resume function not applicable
+-ENOTCONN   there is no I/O request pending for completion
+=========   ==============================================
+
+Usage Notes:
+
+Please have a look at the ccw_device_start() usage notes for more details on
+suspended channel programs.
+
+ccw_device_halt() - Halt I/O Request Processing
+
+Sometimes a device driver might need a possibility to stop the processing of
+a long-running channel program or the device might require to initially issue
+a halt subchannel (HSCH) I/O command. For those purposes the ccw_device_halt()
+command is provided.
+
+ccw_device_halt() must be called disabled and with the ccw device lock held.
+
+::
+
+  int ccw_device_halt(struct ccw_device *cdev,
+		      unsigned long intparm);
+
+=======  =====================================================
+cdev     ccw_device the halt operation is requested for
+intparm  interruption parameter; value is only used if no I/O
+	 is outstanding, otherwise the intparm associated with
+	 the I/O request is returned
+=======  =====================================================
+
+The ccw_device_halt() function returns:
+
+=======  ==============================================================
+      0  request successfully initiated
+-EBUSY   the device is currently busy, or status pending.
+-ENODEV  cdev invalid.
+-EINVAL  The device is not operational or the ccw device is not online.
+=======  ==============================================================
+
+Usage Notes:
+
+A device driver may write a never-ending channel program by writing a channel
+program that at its end loops back to its beginning by means of a transfer in
+channel (TIC)   command (CCW_CMD_TIC). Usually this is performed by network
+device drivers by setting the PCI CCW flag (CCW_FLAG_PCI). Once this CCW is
+executed a program controlled interrupt (PCI) is generated. The device driver
+can then perform an appropriate action. Prior to interrupt of an outstanding
+read to a network device (with or without PCI flag) a ccw_device_halt()
+is required to end the pending operation.
+
+::
+
+  ccw_device_clear() - Terminage I/O Request Processing
+
+In order to terminate all I/O processing at the subchannel, the clear subchannel
+(CSCH) command is used. It can be issued via ccw_device_clear().
+
+ccw_device_clear() must be called disabled and with the ccw device lock held.
+
+::
+
+  int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm);
+
+======= ===============================================
+cdev    ccw_device the clear operation is requested for
+intparm interruption parameter (see ccw_device_halt())
+======= ===============================================
+
+The ccw_device_clear() function returns:
+
+=======  ==============================================================
+      0  request successfully initiated
+-ENODEV  cdev invalid
+-EINVAL  The device is not operational or the ccw device is not online.
+=======  ==============================================================
+
+Miscellaneous Support Routines
+------------------------------
+
+This chapter describes various routines to be used in a Linux/390 device
+driver programming environment.
+
+get_ccwdev_lock()
+
+Get the address of the device specific lock. This is then used in
+spin_lock() / spin_unlock() calls.
+
+::
+
+  __u8 ccw_device_get_path_mask(struct ccw_device *cdev);
+
+Get the mask of the path currently available for cdev.
diff --git a/Documentation/s390/cds.txt b/Documentation/s390/cds.txt
deleted file mode 100644
index 480a78ef5a1e..000000000000
--- a/Documentation/s390/cds.txt
+++ /dev/null
@@ -1,472 +0,0 @@
-Linux for S/390 and zSeries
-
-Common Device Support (CDS)
-Device Driver I/O Support Routines
-
-Authors : Ingo Adlung
-	  Cornelia Huck
-
-Copyright, IBM Corp. 1999-2002
-
-Introduction
-
-This document describes the common device support routines for Linux/390.
-Different than other hardware architectures, ESA/390 has defined a unified
-I/O access method. This gives relief to the device drivers as they don't
-have to deal with different bus types, polling versus interrupt
-processing, shared versus non-shared interrupt processing, DMA versus port
-I/O (PIO), and other hardware features more. However, this implies that
-either every single device driver needs to implement the hardware I/O
-attachment functionality itself, or the operating system provides for a
-unified method to access the hardware, providing all the functionality that
-every single device driver would have to provide itself.
-
-The document does not intend to explain the ESA/390 hardware architecture in
-every detail.This information can be obtained from the ESA/390 Principles of
-Operation manual (IBM Form. No. SA22-7201).
-
-In order to build common device support for ESA/390 I/O interfaces, a
-functional layer was introduced that provides generic I/O access methods to
-the hardware. 
-
-The common device support layer comprises the I/O support routines defined 
-below. Some of them implement common Linux device driver interfaces, while 
-some of them are ESA/390 platform specific.
-
-Note:
-In order to write a driver for S/390, you also need to look into the interface
-described in Documentation/s390/driver-model.txt.
-
-Note for porting drivers from 2.4:
-The major changes are:
-* The functions use a ccw_device instead of an irq (subchannel).
-* All drivers must define a ccw_driver (see driver-model.txt) and the associated
-  functions.
-* request_irq() and free_irq() are no longer done by the driver.
-* The oper_handler is (kindof) replaced by the probe() and set_online() functions
-  of the ccw_driver.
-* The not_oper_handler is (kindof) replaced by the remove() and set_offline()
-  functions of the ccw_driver.
-* The channel device layer is gone.
-* The interrupt handlers must be adapted to use a ccw_device as argument.
-  Moreover, they don't return a devstat, but an irb.
-* Before initiating an io, the options must be set via ccw_device_set_options().
-* Instead of calling read_dev_chars()/read_conf_data(), the driver issues
-  the channel program and handles the interrupt itself.
-
-ccw_device_get_ciw()
-   get commands from extended sense data.
-
-ccw_device_start()	
-ccw_device_start_timeout()
-ccw_device_start_key()
-ccw_device_start_key_timeout()
-   initiate an I/O request.
-
-ccw_device_resume()
-   resume channel program execution.
-
-ccw_device_halt()	
-   terminate the current I/O request processed on the device.
-
-do_IRQ()	
-   generic interrupt routine. This function is called by the interrupt entry
-   routine whenever an I/O interrupt is presented to the system. The do_IRQ()
-   routine determines the interrupt status and calls the device specific
-   interrupt handler according to the rules (flags) defined during I/O request
-   initiation with do_IO().
-
-The next chapters describe the functions other than do_IRQ() in more details.
-The do_IRQ() interface is not described, as it is called from the Linux/390
-first level interrupt handler only and does not comprise a device driver
-callable interface. Instead, the functional description of do_IO() also
-describes the input to the device specific interrupt handler.
-
-Note: All explanations apply also to the 64 bit architecture s390x.
-
-
-Common Device Support (CDS) for Linux/390 Device Drivers
-
-General Information
-
-The following chapters describe the I/O related interface routines the
-Linux/390 common device support (CDS) provides to allow for device specific
-driver implementations on the IBM ESA/390 hardware platform. Those interfaces
-intend to provide the functionality required by every device driver
-implementation to allow to drive a specific hardware device on the ESA/390
-platform. Some of the interface routines are specific to Linux/390 and some
-of them can be found on other Linux platforms implementations too.
-Miscellaneous function prototypes, data declarations, and macro definitions
-can be found in the architecture specific C header file
-linux/arch/s390/include/asm/irq.h.
-
-Overview of CDS interface concepts
-
-Different to other hardware platforms, the ESA/390 architecture doesn't define
-interrupt lines managed by a specific interrupt controller and bus systems
-that may or may not allow for shared interrupts, DMA processing, etc.. Instead,
-the ESA/390 architecture has implemented a so called channel subsystem, that
-provides a unified view of the devices physically attached to the systems.
-Though the ESA/390 hardware platform knows about a huge variety of different
-peripheral attachments like disk devices (aka. DASDs), tapes, communication
-controllers, etc. they can all be accessed by a well defined access method and
-they are presenting I/O completion a unified way : I/O interruptions. Every
-single device is uniquely identified to the system by a so called subchannel,
-where the ESA/390 architecture allows for 64k devices be attached.
-
-Linux, however, was first built on the Intel PC architecture, with its two
-cascaded 8259 programmable interrupt controllers (PICs), that allow for a
-maximum of 15 different interrupt lines. All devices attached to such a system
-share those 15 interrupt levels. Devices attached to the ISA bus system must
-not share interrupt levels (aka. IRQs), as the ISA bus bases on edge triggered
-interrupts. MCA, EISA, PCI and other bus systems base on level triggered
-interrupts, and therewith allow for shared IRQs. However, if multiple devices
-present their hardware status by the same (shared) IRQ, the operating system
-has to call every single device driver registered on this IRQ in order to
-determine the device driver owning the device that raised the interrupt.
-
-Up to kernel 2.4, Linux/390 used to provide interfaces via the IRQ (subchannel).
-For internal use of the common I/O layer, these are still there. However, 
-device drivers should use the new calling interface via the ccw_device only.
-
-During its startup the Linux/390 system checks for peripheral devices. Each
-of those devices is uniquely defined by a so called subchannel by the ESA/390
-channel subsystem. While the subchannel numbers are system generated, each
-subchannel also takes a user defined attribute, the so called device number.
-Both subchannel number and device number cannot exceed 65535. During sysfs
-initialisation, the information about control unit type and device types that 
-imply specific I/O commands (channel command words - CCWs) in order to operate
-the device are gathered. Device drivers can retrieve this set of hardware
-information during their initialization step to recognize the devices they
-support using the information saved in the struct ccw_device given to them.
-This methods implies that Linux/390 doesn't require to probe for free (not
-armed) interrupt request lines (IRQs) to drive its devices with. Where
-applicable, the device drivers can use issue the READ DEVICE CHARACTERISTICS
-ccw to retrieve device characteristics in its online routine.
-
-In order to allow for easy I/O initiation the CDS layer provides a
-ccw_device_start() interface that takes a device specific channel program (one
-or more CCWs) as input sets up the required architecture specific control blocks
-and initiates an I/O request on behalf of the device driver. The
-ccw_device_start() routine allows to specify whether it expects the CDS layer
-to notify the device driver for every interrupt it observes, or with final status
-only. See ccw_device_start() for more details. A device driver must never issue
-ESA/390 I/O commands itself, but must use the Linux/390 CDS interfaces instead.
-
-For long running I/O request to be canceled, the CDS layer provides the
-ccw_device_halt() function. Some devices require to initially issue a HALT
-SUBCHANNEL (HSCH) command without having pending I/O requests. This function is
-also covered by ccw_device_halt().
-
-
-get_ciw() - get command information word
-
-This call enables a device driver to get information about supported commands
-from the extended SenseID data.
-
-struct ciw *
-ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd);
-
-cdev - The ccw_device for which the command is to be retrieved.
-cmd  - The command type to be retrieved.
-
-ccw_device_get_ciw() returns:
-NULL    - No extended data available, invalid device or command not found.
-!NULL   - The command requested.
-
-
-ccw_device_start() - Initiate I/O Request
-
-The ccw_device_start() routines is the I/O request front-end processor. All
-device driver I/O requests must be issued using this routine. A device driver
-must not issue ESA/390 I/O commands itself. Instead the ccw_device_start()
-routine provides all interfaces required to drive arbitrary devices.
-
-This description also covers the status information passed to the device
-driver's interrupt handler as this is related to the rules (flags) defined
-with the associated I/O request when calling ccw_device_start().
-
-int ccw_device_start(struct ccw_device *cdev,
-		     struct ccw1 *cpa,
-		     unsigned long intparm,
-		     __u8 lpm,
-		     unsigned long flags);
-int ccw_device_start_timeout(struct ccw_device *cdev,
-			     struct ccw1 *cpa,
-			     unsigned long intparm,
-			     __u8 lpm,
-			     unsigned long flags,
-			     int expires);
-int ccw_device_start_key(struct ccw_device *cdev,
-			 struct ccw1 *cpa,
-			 unsigned long intparm,
-			 __u8 lpm,
-			 __u8 key,
-			 unsigned long flags);
-int ccw_device_start_key_timeout(struct ccw_device *cdev,
-				 struct ccw1 *cpa,
-				 unsigned long intparm,
-				 __u8 lpm,
-				 __u8 key,
-				 unsigned long flags,
-				 int expires);
-
-cdev         : ccw_device the I/O is destined for
-cpa          : logical start address of channel program
-user_intparm : user specific interrupt information; will be presented
-	       back to the device driver's interrupt handler. Allows a
-               device driver to associate the interrupt with a
-               particular I/O request.
-lpm          : defines the channel path to be used for a specific I/O
-               request. A value of 0 will make cio use the opm.
-key	     : the storage key to use for the I/O (useful for operating on a
-	       storage with a storage key != default key)
-flag         : defines the action to be performed for I/O processing
-expires      : timeout value in jiffies. The common I/O layer will terminate
-	       the running program after this and call the interrupt handler
-	       with ERR_PTR(-ETIMEDOUT) as irb.
-
-Possible flag values are :
-
-DOIO_ALLOW_SUSPEND       - channel program may become suspended
-DOIO_DENY_PREFETCH       - don't allow for CCW prefetch; usually
-                           this implies the channel program might
-                           become modified
-DOIO_SUPPRESS_INTER     - don't call the handler on intermediate status
-
-The cpa parameter points to the first format 1 CCW of a channel program :
-
-struct ccw1 {
-      __u8  cmd_code;/* command code */
-      __u8  flags;   /* flags, like IDA addressing, etc. */
-      __u16 count;   /* byte count */
-      __u32 cda;     /* data address */
-} __attribute__ ((packed,aligned(8)));
-
-with the following CCW flags values defined :
-
-CCW_FLAG_DC        - data chaining
-CCW_FLAG_CC        - command chaining
-CCW_FLAG_SLI       - suppress incorrect length
-CCW_FLAG_SKIP      - skip
-CCW_FLAG_PCI       - PCI
-CCW_FLAG_IDA       - indirect addressing
-CCW_FLAG_SUSPEND   - suspend
-
-
-Via ccw_device_set_options(), the device driver may specify the following
-options for the device:
-
-DOIO_EARLY_NOTIFICATION  - allow for early interrupt notification
-DOIO_REPORT_ALL          - report all interrupt conditions
-
-
-The ccw_device_start() function returns :
-
-      0 - successful completion or request successfully initiated
--EBUSY	- The device is currently processing a previous I/O request, or there is
-          a status pending at the device.
--ENODEV - cdev is invalid, the device is not operational or the ccw_device is
-          not online.
-
-When the I/O request completes, the CDS first level interrupt handler will
-accumulate the status in a struct irb and then call the device interrupt handler.
-The intparm field will contain the value the device driver has associated with a 
-particular I/O request. If a pending device status was recognized, 
-intparm will be set to 0 (zero). This may happen during I/O initiation or delayed
-by an alert status notification. In any case this status is not related to the
-current (last) I/O request. In case of a delayed status notification no special
-interrupt will be presented to indicate I/O completion as the I/O request was
-never started, even though ccw_device_start() returned with successful completion.
-
-The irb may contain an error value, and the device driver should check for this
-first:
-
--ETIMEDOUT: the common I/O layer terminated the request after the specified
-            timeout value
--EIO:       the common I/O layer terminated the request due to an error state
-
-If the concurrent sense flag in the extended status word (esw) in the irb is
-set, the field erw.scnt in the esw describes the number of device specific
-sense bytes available in the extended control word irb->scsw.ecw[]. No device
-sensing by the device driver itself is required.
-
-The device interrupt handler can use the following definitions to investigate
-the primary unit check source coded in sense byte 0 :
-
-SNS0_CMD_REJECT         0x80
-SNS0_INTERVENTION_REQ   0x40
-SNS0_BUS_OUT_CHECK      0x20
-SNS0_EQUIPMENT_CHECK    0x10
-SNS0_DATA_CHECK         0x08
-SNS0_OVERRUN            0x04
-SNS0_INCOMPL_DOMAIN     0x01
-
-Depending on the device status, multiple of those values may be set together.
-Please refer to the device specific documentation for details.
-
-The irb->scsw.cstat field provides the (accumulated) subchannel status :
-
-SCHN_STAT_PCI            - program controlled interrupt
-SCHN_STAT_INCORR_LEN     - incorrect length
-SCHN_STAT_PROG_CHECK     - program check
-SCHN_STAT_PROT_CHECK     - protection check
-SCHN_STAT_CHN_DATA_CHK   - channel data check
-SCHN_STAT_CHN_CTRL_CHK   - channel control check
-SCHN_STAT_INTF_CTRL_CHK  - interface control check
-SCHN_STAT_CHAIN_CHECK    - chaining check
-
-The irb->scsw.dstat field provides the (accumulated) device status :
-
-DEV_STAT_ATTENTION   - attention
-DEV_STAT_STAT_MOD    - status modifier
-DEV_STAT_CU_END      - control unit end
-DEV_STAT_BUSY        - busy
-DEV_STAT_CHN_END     - channel end
-DEV_STAT_DEV_END     - device end
-DEV_STAT_UNIT_CHECK  - unit check
-DEV_STAT_UNIT_EXCEP  - unit exception
-
-Please see the ESA/390 Principles of Operation manual for details on the
-individual flag meanings.
-
-Usage Notes :
-
-ccw_device_start() must be called disabled and with the ccw device lock held.
-
-The device driver is allowed to issue the next ccw_device_start() call from
-within its interrupt handler already. It is not required to schedule a
-bottom-half, unless a non deterministically long running error recovery procedure
-or similar needs to be scheduled. During I/O processing the Linux/390 generic
-I/O device driver support has already obtained the IRQ lock, i.e. the handler
-must not try to obtain it again when calling ccw_device_start() or we end in a
-deadlock situation!
-
-If a device driver relies on an I/O request to be completed prior to start the
-next it can reduce I/O processing overhead by chaining a NoOp I/O command
-CCW_CMD_NOOP to the end of the submitted CCW chain. This will force Channel-End
-and Device-End status to be presented together, with a single interrupt.
-However, this should be used with care as it implies the channel will remain
-busy, not being able to process I/O requests for other devices on the same
-channel. Therefore e.g. read commands should never use this technique, as the
-result will be presented by a single interrupt anyway.
-
-In order to minimize I/O overhead, a device driver should use the
-DOIO_REPORT_ALL  only if the device can report intermediate interrupt
-information prior to device-end the device driver urgently relies on. In this
-case all I/O interruptions are presented to the device driver until final
-status is recognized.
-
-If a device is able to recover from asynchronously presented I/O errors, it can
-perform overlapping I/O using the DOIO_EARLY_NOTIFICATION flag. While some
-devices always report channel-end and device-end together, with a single
-interrupt, others present primary status (channel-end) when the channel is
-ready for the next I/O request and secondary status (device-end) when the data
-transmission has been completed at the device.
-
-Above flag allows to exploit this feature, e.g. for communication devices that
-can handle lost data on the network to allow for enhanced I/O processing.
-
-Unless the channel subsystem at any time presents a secondary status interrupt,
-exploiting this feature will cause only primary status interrupts to be
-presented to the device driver while overlapping I/O is performed. When a
-secondary status without error (alert status) is presented, this indicates
-successful completion for all overlapping ccw_device_start() requests that have
-been issued since the last secondary (final) status.
-
-Channel programs that intend to set the suspend flag on a channel command word 
-(CCW)  must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the 
-suspend flag will cause a channel program check. At the time the channel program 
-becomes suspended an intermediate interrupt will be generated by the channel 
-subsystem.
-
-ccw_device_resume() - Resume Channel Program Execution 
-
-If a device driver chooses to suspend the current channel program execution by 
-setting the CCW suspend flag on a particular CCW, the channel program execution 
-is suspended. In order to resume channel program execution the CIO layer 
-provides the ccw_device_resume() routine. 
-
-int ccw_device_resume(struct ccw_device *cdev);
-
-cdev - ccw_device the resume operation is requested for
-
-The ccw_device_resume() function returns:
-
-        0  - suspended channel program is resumed
--EBUSY     - status pending
--ENODEV    - cdev invalid or not-operational subchannel 
--EINVAL    - resume function not applicable  
--ENOTCONN  - there is no I/O request pending for completion 
-
-Usage Notes:
-Please have a look at the ccw_device_start() usage notes for more details on
-suspended channel programs.
-
-ccw_device_halt() - Halt I/O Request Processing
-
-Sometimes a device driver might need a possibility to stop the processing of
-a long-running channel program or the device might require to initially issue
-a halt subchannel (HSCH) I/O command. For those purposes the ccw_device_halt()
-command is provided.
-
-ccw_device_halt() must be called disabled and with the ccw device lock held.
-
-int ccw_device_halt(struct ccw_device *cdev,
-                    unsigned long intparm);
-
-cdev    : ccw_device the halt operation is requested for
-intparm : interruption parameter; value is only used if no I/O
-          is outstanding, otherwise the intparm associated with
-          the I/O request is returned
-
-The ccw_device_halt() function returns :
-
-      0 - request successfully initiated
--EBUSY  - the device is currently busy, or status pending.
--ENODEV - cdev invalid.
--EINVAL - The device is not operational or the ccw device is not online.
-
-Usage Notes :
-
-A device driver may write a never-ending channel program by writing a channel
-program that at its end loops back to its beginning by means of a transfer in
-channel (TIC)   command (CCW_CMD_TIC). Usually this is performed by network
-device drivers by setting the PCI CCW flag (CCW_FLAG_PCI). Once this CCW is
-executed a program controlled interrupt (PCI) is generated. The device driver
-can then perform an appropriate action. Prior to interrupt of an outstanding
-read to a network device (with or without PCI flag) a ccw_device_halt()
-is required to end the pending operation.
-
-ccw_device_clear() - Terminage I/O Request Processing
-
-In order to terminate all I/O processing at the subchannel, the clear subchannel
-(CSCH) command is used. It can be issued via ccw_device_clear().
-
-ccw_device_clear() must be called disabled and with the ccw device lock held.
-
-int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm);
-
-cdev:	 ccw_device the clear operation is requested for
-intparm: interruption parameter (see ccw_device_halt())
-
-The ccw_device_clear() function returns:
-
-      0 - request successfully initiated
--ENODEV - cdev invalid
--EINVAL - The device is not operational or the ccw device is not online.
-
-Miscellaneous Support Routines
-
-This chapter describes various routines to be used in a Linux/390 device
-driver programming environment.
-
-get_ccwdev_lock()
-
-Get the address of the device specific lock. This is then used in
-spin_lock() / spin_unlock() calls.
-
-
-__u8 ccw_device_get_path_mask(struct ccw_device *cdev);
-
-Get the mask of the path currently available for cdev.
diff --git a/Documentation/s390/common_io.rst b/Documentation/s390/common_io.rst
new file mode 100644
index 000000000000..846485681ce7
--- /dev/null
+++ b/Documentation/s390/common_io.rst
@@ -0,0 +1,140 @@
+======================
+S/390 common I/O-Layer
+======================
+
+command line parameters, procfs and debugfs entries
+===================================================
+
+Command line parameters
+-----------------------
+
+* ccw_timeout_log
+
+  Enable logging of debug information in case of ccw device timeouts.
+
+* cio_ignore = device[,device[,..]]
+
+	device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>}
+
+  The given devices will be ignored by the common I/O-layer; no detection
+  and device sensing will be done on any of those devices. The subchannel to
+  which the device in question is attached will be treated as if no device was
+  attached.
+
+  An ignored device can be un-ignored later; see the "/proc entries"-section for
+  details.
+
+  The devices must be given either as bus ids (0.x.abcd) or as hexadecimal
+  device numbers (0xabcd or abcd, for 2.4 backward compatibility). If you
+  give a device number 0xabcd, it will be interpreted as 0.0.abcd.
+
+  You can use the 'all' keyword to ignore all devices. The 'ipldev' and 'condev'
+  keywords can be used to refer to the CCW based boot device and CCW console
+  device respectively (these are probably useful only when combined with the '!'
+  operator). The '!' operator will cause the I/O-layer to _not_ ignore a device.
+  The command line
+  is parsed from left to right.
+
+  For example::
+
+	cio_ignore=0.0.0023-0.0.0042,0.0.4711
+
+  will ignore all devices ranging from 0.0.0023 to 0.0.0042 and the device
+  0.0.4711, if detected.
+
+  As another example::
+
+	cio_ignore=all,!0.0.4711,!0.0.fd00-0.0.fd02
+
+  will ignore all devices but 0.0.4711, 0.0.fd00, 0.0.fd01, 0.0.fd02.
+
+  By default, no devices are ignored.
+
+
+/proc entries
+-------------
+
+* /proc/cio_ignore
+
+  Lists the ranges of devices (by bus id) which are ignored by common I/O.
+
+  You can un-ignore certain or all devices by piping to /proc/cio_ignore.
+  "free all" will un-ignore all ignored devices,
+  "free <device range>, <device range>, ..." will un-ignore the specified
+  devices.
+
+  For example, if devices 0.0.0023 to 0.0.0042 and 0.0.4711 are ignored,
+
+  - echo free 0.0.0030-0.0.0032 > /proc/cio_ignore
+    will un-ignore devices 0.0.0030 to 0.0.0032 and will leave devices 0.0.0023
+    to 0.0.002f, 0.0.0033 to 0.0.0042 and 0.0.4711 ignored;
+  - echo free 0.0.0041 > /proc/cio_ignore will furthermore un-ignore device
+    0.0.0041;
+  - echo free all > /proc/cio_ignore will un-ignore all remaining ignored
+    devices.
+
+  When a device is un-ignored, device recognition and sensing is performed and
+  the device driver will be notified if possible, so the device will become
+  available to the system. Note that un-ignoring is performed asynchronously.
+
+  You can also add ranges of devices to be ignored by piping to
+  /proc/cio_ignore; "add <device range>, <device range>, ..." will ignore the
+  specified devices.
+
+  Note: While already known devices can be added to the list of devices to be
+	ignored, there will be no effect on then. However, if such a device
+	disappears and then reappears, it will then be ignored. To make
+	known devices go away, you need the "purge" command (see below).
+
+  For example::
+
+	"echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore"
+
+  will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored
+  devices.
+
+  You can remove already known but now ignored devices via::
+
+	"echo purge > /proc/cio_ignore"
+
+  All devices ignored but still registered and not online (= not in use)
+  will be deregistered and thus removed from the system.
+
+  The devices can be specified either by bus id (0.x.abcd) or, for 2.4 backward
+  compatibility, by the device number in hexadecimal (0xabcd or abcd). Device
+  numbers given as 0xabcd will be interpreted as 0.0.abcd.
+
+* /proc/cio_settle
+
+  A write request to this file is blocked until all queued cio actions are
+  handled. This will allow userspace to wait for pending work affecting
+  device availability after changing cio_ignore or the hardware configuration.
+
+* For some of the information present in the /proc filesystem in 2.4 (namely,
+  /proc/subchannels and /proc/chpids), see driver-model.txt.
+  Information formerly in /proc/irq_count is now in /proc/interrupts.
+
+
+debugfs entries
+---------------
+
+* /sys/kernel/debug/s390dbf/cio_*/ (S/390 debug feature)
+
+  Some views generated by the debug feature to hold various debug outputs.
+
+  - /sys/kernel/debug/s390dbf/cio_crw/sprintf
+    Messages from the processing of pending channel report words (machine check
+    handling).
+
+  - /sys/kernel/debug/s390dbf/cio_msg/sprintf
+    Various debug messages from the common I/O-layer.
+
+  - /sys/kernel/debug/s390dbf/cio_trace/hex_ascii
+    Logs the calling of functions in the common I/O-layer and, if applicable,
+    which subchannel they were called for, as well as dumps of some data
+    structures (like irb in an error case).
+
+  The level of logging can be changed to be more or less verbose by piping to
+  /sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the
+  documentation on the S/390 debug feature (Documentation/s390/s390dbf.rst)
+  for details.
diff --git a/Documentation/s390/dasd.rst b/Documentation/s390/dasd.rst
new file mode 100644
index 000000000000..9e22247285c8
--- /dev/null
+++ b/Documentation/s390/dasd.rst
@@ -0,0 +1,84 @@
+==================
+DASD device driver
+==================
+
+S/390's disk devices (DASDs) are managed by Linux via the DASD device
+driver. It is valid for all types of DASDs and represents them to
+Linux as block devices, namely "dd". Currently the DASD driver uses a
+single major number (254) and 4 minor numbers per volume (1 for the
+physical volume and 3 for partitions). With respect to partitions see
+below. Thus you may have up to 64 DASD devices in your system.
+
+The kernel parameter 'dasd=from-to,...' may be issued arbitrary times
+in the kernel's parameter line or not at all. The 'from' and 'to'
+parameters are to be given in hexadecimal notation without a leading
+0x.
+If you supply kernel parameters the different instances are processed
+in order of appearance and a minor number is reserved for any device
+covered by the supplied range up to 64 volumes. Additional DASDs are
+ignored. If you do not supply the 'dasd=' kernel parameter at all, the
+DASD driver registers all supported DASDs of your system to a minor
+number in ascending order of the subchannel number.
+
+The driver currently supports ECKD-devices and there are stubs for
+support of the FBA and CKD architectures. For the FBA architecture
+only some smart data structures are missing to make the support
+complete.
+We performed our testing on 3380 and 3390 type disks of different
+sizes, under VM and on the bare hardware (LPAR), using internal disks
+of the multiprise as well as a RAMAC virtual array. Disks exported by
+an Enterprise Storage Server (Seascape) should work fine as well.
+
+We currently implement one partition per volume, which is the whole
+volume, skipping the first blocks up to the volume label. These are
+reserved for IPL records and IBM's volume label to assure
+accessibility of the DASD from other OSs. In a later stage we will
+provide support of partitions, maybe VTOC oriented or using a kind of
+partition table in the label record.
+
+Usage
+=====
+
+-Low-level format (?CKD only)
+For using an ECKD-DASD as a Linux harddisk you have to low-level
+format the tracks by issuing the BLKDASDFORMAT-ioctl on that
+device. This will erase any data on that volume including IBM volume
+labels, VTOCs etc. The ioctl may take a `struct format_data *` or
+'NULL' as an argument::
+
+  typedef struct {
+	int start_unit;
+	int stop_unit;
+	int blksize;
+  } format_data_t;
+
+When a NULL argument is passed to the BLKDASDFORMAT ioctl the whole
+disk is formatted to a blocksize of 1024 bytes. Otherwise start_unit
+and stop_unit are the first and last track to be formatted. If
+stop_unit is -1 it implies that the DASD is formatted from start_unit
+up to the last track. blksize can be any power of two between 512 and
+4096. We recommend no blksize lower than 1024 because the ext2fs uses
+1kB blocks anyway and you gain approx. 50% of capacity increasing your
+blksize from 512 byte to 1kB.
+
+Make a filesystem
+=================
+
+Then you can mk??fs the filesystem of your choice on that volume or
+partition. For reasons of sanity you should build your filesystem on
+the partition /dev/dd?1 instead of the whole volume. You only lose 3kB
+but may be sure that you can reuse your data after introduction of a
+real partition table.
+
+Bugs
+====
+
+- Performance sometimes is rather low because we don't fully exploit clustering
+
+TODO-List
+=========
+
+- Add IBM'S Disk layout to genhd
+- Enhance driver to use more than one major number
+- Enable usage as a module
+- Support Cache fast write and DASD fast write (ECKD)
diff --git a/Documentation/s390/debugging390.rst b/Documentation/s390/debugging390.rst
new file mode 100644
index 000000000000..d49305fd5e1a
--- /dev/null
+++ b/Documentation/s390/debugging390.rst
@@ -0,0 +1,2613 @@
+=============================================
+Debugging on Linux for s/390 & z/Architecture
+=============================================
+
+Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+
+Copyright (C) 2000-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation
+
+.. Best viewed with fixed width fonts
+
+Overview of Document:
+=====================
+This document is intended to give a good overview of how to debug Linux for
+s/390 and z/Architecture. It is not intended as a complete reference and not a
+tutorial on the fundamentals of C & assembly. It doesn't go into
+390 IO in any detail. It is intended to complement the documents in the
+reference section below & any other worthwhile references you get.
+
+It is intended like the Enterprise Systems Architecture/390 Reference Summary
+to be printed out & used as a quick cheat sheet self help style reference when
+problems occur.
+
+.. Contents
+   ========
+   Register Set
+   Address Spaces on Intel Linux
+   Address Spaces on Linux for s/390 & z/Architecture
+   The Linux for s/390 & z/Architecture Kernel Task Structure
+   Register Usage & Stackframes on Linux for s/390 & z/Architecture
+   A sample program with comments
+   Compiling programs for debugging on Linux for s/390 & z/Architecture
+   Debugging under VM
+   s/390 & z/Architecture IO Overview
+   Debugging IO on s/390 & z/Architecture under VM
+   GDB on s/390 & z/Architecture
+   Stack chaining in gdb by hand
+   Examining core dumps
+   ldd
+   Debugging modules
+   The proc file system
+   SysRq
+   References
+   Special Thanks
+
+Register Set
+============
+The current architectures have the following registers.
+
+16 General propose registers, 32 bit on s/390 and 64 bit on z/Architecture,
+r0-r15 (or gpr0-gpr15), used for arithmetic and addressing.
+
+16 Control registers, 32 bit on s/390 and 64 bit on z/Architecture, cr0-cr15,
+kernel usage only, used for memory management, interrupt control, debugging
+control etc.
+
+16 Access registers (ar0-ar15), 32 bit on both s/390 and z/Architecture,
+normally not used by normal programs but potentially could be used as
+temporary storage. These registers have a 1:1 association with general
+purpose registers and are designed to be used in the so-called access
+register mode to select different address spaces.
+Access register 0 (and access register 1 on z/Architecture, which needs a
+64 bit pointer) is currently used by the pthread library as a pointer to
+the current running threads private area.
+
+16 64-bit floating point registers (fp0-fp15 ) IEEE & HFP floating
+point format compliant on G5 upwards & a Floating point control reg (FPC)
+
+4  64-bit registers (fp0,fp2,fp4 & fp6) HFP only on older machines.
+
+Note:
+   Linux (currently) always uses IEEE & emulates G5 IEEE format on older
+   machines, ( provided the kernel is configured for this ).
+
+
+The PSW is the most important register on the machine it
+is 64 bit on s/390 & 128 bit on z/Architecture & serves the roles of
+a program counter (pc), condition code register,memory space designator.
+In IBM standard notation I am counting bit 0 as the MSB.
+It has several advantages over a normal program counter
+in that you can change address translation & program counter
+in a single instruction. To change address translation,
+e.g. switching address translation off requires that you
+have a logical=physical mapping for the address you are
+currently running at.
+
++-------------------------+-------------------------------------------------+
+|          Bit            |                                                 |
++--------+----------------+                     Value                       |
+| s/390  | z/Architecture |                                                 |
++========+================+=================================================+
+| 0      |     0          | Reserved (must be 0) otherwise specification    |
+|        |                | exception occurs.                               |
++--------+----------------+-------------------------------------------------+
+| 1      |     1          | Program Event Recording 1 PER enabled,          |
+|        |                | PER is used to facilitate debugging e.g.        |
+|        |                | single stepping.                                |
++--------+----------------+-------------------------------------------------+
+| 2-4    |    2-4         | Reserved (must be 0).                           |
++--------+----------------+-------------------------------------------------+
+| 5      |     5          | Dynamic address translation 1=DAT on.           |
++--------+----------------+-------------------------------------------------+
+| 6      |     6          | Input/Output interrupt Mask                     |
++--------+----------------+-------------------------------------------------+
+| 7      |     7          | External interrupt Mask used primarily for      |
+|        |                | interprocessor signalling and clock interrupts. |
++--------+----------------+-------------------------------------------------+
+| 8-11   |   8-11         | PSW Key used for complex memory protection      |
+|        |                | mechanism (not used under linux)                |
++--------+----------------+-------------------------------------------------+
+| 12     |     12         | 1 on s/390 0 on z/Architecture                  |
++--------+----------------+-------------------------------------------------+
+| 13     |     13         | Machine Check Mask 1=enable machine check       |
+|        |                | interrupts                                      |
++--------+----------------+-------------------------------------------------+
+| 14     |     14         | Wait State. Set this to 1 to stop the processor |
+|        |                | except for interrupts and give  time to other   |
+|        |                | LPARS. Used in CPU idle in the kernel to        |
+|        |                | increase overall usage of processor resources.  |
++--------+----------------+-------------------------------------------------+
+| 15     |     15         | Problem state (if set to 1 certain instructions |
+|        |                | are disabled). All linux user programs run with |
+|        |                | this bit 1 (useful info for debugging under VM).|
++--------+----------------+-------------------------------------------------+
+| 16-17  |    16-17       | Address Space Control                           |
+|        |                |                                                 |
+|        |                | 00 Primary Space Mode:                          |
+|        |                |                                                 |
+|        |                | The register CR1 contains the primary           |
+|        |                | address-space control element (PASCE), which    |
+|        |                | points to the primary space region/segment      |
+|        |                | table origin.                                   |
+|        |                |                                                 |
+|        |                | 01 Access register mode                         |
+|        |                |                                                 |
+|        |                | 10 Secondary Space Mode:                        |
+|        |                |                                                 |
+|        |                | The register CR7 contains the secondary         |
+|        |                | address-space control element (SASCE), which    |
+|        |                | points to the secondary space region or         |
+|        |                | segment table origin.                           |
+|        |                |                                                 |
+|        |                | 11 Home Space Mode:                             |
+|        |                |                                                 |
+|        |                | The register CR13 contains the home space       |
+|        |                | address-space control element (HASCE), which    |
+|        |                | points to the home space region/segment         |
+|        |                | table origin.                                   |
+|        |                |                                                 |
+|        |                | See "Address Spaces on Linux for s/390 &        |
+|        |                | z/Architecture" below for more information      |
+|        |                | about address space usage in Linux.             |
++--------+----------------+-------------------------------------------------+
+| 18-19  |    18-19       | Condition codes (CC)                            |
++--------+----------------+-------------------------------------------------+
+| 20     |    20          | Fixed point overflow mask if 1=FPU exceptions   |
+|        |                | for this event occur (normally 0)               |
++--------+----------------+-------------------------------------------------+
+| 21     |    21          | Decimal overflow mask if 1=FPU exceptions for   |
+|        |                | this event occur (normally 0)                   |
++--------+----------------+-------------------------------------------------+
+| 22     |    22          | Exponent underflow mask if 1=FPU exceptions     |
+|        |                | for this event occur (normally 0)               |
++--------+----------------+-------------------------------------------------+
+| 23     |    23          | Significance Mask if 1=FPU exceptions for this  |
+|        |                | event occur (normally 0)                        |
++--------+----------------+-------------------------------------------------+
+| 24-31  |    24-30       | Reserved Must be 0.                             |
+|        +----------------+-------------------------------------------------+
+|        |    31          | Extended Addressing Mode                        |
+|        +----------------+-------------------------------------------------+
+|        |    32          | Basic Addressing Mode                           |
+|        |                |                                                 |
+|        |                | Used to set addressing mode                     |
+|        |                |                                                 |
+|        |                |    +---------+----------+----------+            |
+|        |                |    | PSW 31  | PSW 32   |          |            |
+|        |                |    +---------+----------+----------+            |
+|        |                |    |   0     |    0     |  24 bit  |            |
+|        |                |    +---------+----------+----------+            |
+|        |                |    |   0     |    1     |  31 bit  |            |
+|        |                |    +---------+----------+----------+            |
+|        |                |    |   1     |    1     |  64 bit  |            |
+|        |                |    +---------+----------+----------+            |
++--------+----------------+-------------------------------------------------+
+| 32     |                | 1=31 bit addressing mode 0=24 bit addressing    |
+|        |                | mode (for backward compatibility), linux        |
+|        |                | always runs with this bit set to 1              |
++--------+----------------+-------------------------------------------------+
+| 33-64  |                | Instruction address.                            |
+|        +----------------+-------------------------------------------------+
+|        |    33-63       | Reserved must be 0                              |
+|        +----------------+-------------------------------------------------+
+|        |    64-127      | Address                                         |
+|        |                |                                                 |
+|        |                |   - In 24 bits mode bits 64-103=0 bits 104-127  |
+|        |                |     Address                                     |
+|        |                |   - In 31 bits mode bits 64-96=0 bits 97-127    |
+|        |                |     Address                                     |
+|        |                |                                                 |
+|        |                | Note:                                           |
+|        |                |     unlike 31 bit mode on s/390 bit 96 must be  |
+|        |                |     zero when loading the address with LPSWE    |
+|        |                |     otherwise a specification exception occurs, |
+|        |                |     LPSW is fully backward compatible.          |
++--------+----------------+-------------------------------------------------+
+
+Prefix Page(s)
+--------------
+This per cpu memory area is too intimately tied to the processor not to mention.
+It exists between the real addresses 0-4096 on s/390 and between 0-8192 on
+z/Architecture and is exchanged with one page on s/390 or two pages on
+z/Architecture in absolute storage by the set prefix instruction during Linux
+startup.
+
+This page is mapped to a different prefix for each processor in an SMP
+configuration (assuming the OS designer is sane of course).
+
+Bytes 0-512 (200 hex) on s/390 and 0-512, 4096-4544, 4604-5119 currently on
+z/Architecture are used by the processor itself for holding such information
+as exception indications and entry points for exceptions.
+
+Bytes after 0xc00 hex are used by linux for per processor globals on s/390 and
+z/Architecture (there is a gap on z/Architecture currently between 0xc00 and
+0x1000, too, which is used by Linux).
+
+The closest thing to this on traditional architectures is the interrupt
+vector table. This is a good thing & does simplify some of the kernel coding
+however it means that we now cannot catch stray NULL pointers in the
+kernel without hard coded checks.
+
+
+
+Address Spaces on Intel Linux
+=============================
+
+The traditional Intel Linux is approximately mapped as follows forgive
+the ascii art::
+
+  0xFFFFFFFF 4GB Himem          *****************
+				*               *
+				* Kernel Space  *
+				*               *
+				*****************         ****************
+  User Space Himem              *  User Stack   *         *              *
+  (typically 0xC0000000 3GB )   *****************         *              *
+				*  Shared Libs  *         * Next Process *
+				*****************         *     to       *
+				*               *   <==   *     Run      *  <==
+				*  User Program *         *              *
+				*   Data BSS    *         *              *
+				*    Text       *         *              *
+				*   Sections    *         *              *
+  0x00000000                    *****************         ****************
+
+Now it is easy to see that on Intel it is quite easy to recognise a kernel
+address as being one greater than user space himem (in this case 0xC0000000),
+and addresses of less than this are the ones in the current running program on
+this processor (if an smp box).
+
+If using the virtual machine ( VM ) as a debugger it is quite difficult to
+know which user process is running as the address space you are looking at
+could be from any process in the run queue.
+
+The limitation of Intels addressing technique is that the linux
+kernel uses a very simple real address to virtual addressing technique
+of Real Address=Virtual Address-User Space Himem.
+This means that on Intel the kernel linux can typically only address
+Himem=0xFFFFFFFF-0xC0000000=1GB & this is all the RAM these machines
+can typically use.
+
+They can lower User Himem to 2GB or lower & thus be
+able to use 2GB of RAM however this shrinks the maximum size
+of User Space from 3GB to 2GB they have a no win limit of 4GB unless
+they go to 64 Bit.
+
+
+On 390 our limitations & strengths make us slightly different.
+For backward compatibility we are only allowed use 31 bits (2GB)
+of our 32 bit addresses, however, we use entirely separate address
+spaces for the user & kernel.
+
+This means we can support 2GB of non Extended RAM on s/390, & more
+with the Extended memory management swap device &
+currently 4TB of physical memory currently on z/Architecture.
+
+
+Address Spaces on Linux for s/390 & z/Architecture
+==================================================
+
+Our addressing scheme is basically as follows::
+
+				   Primary Space               Home Space
+  Himem 0x7fffffff 2GB on s/390    *****************          ****************
+  currently 0x3ffffffffff (2^42)-1 *  User Stack   *          *              *
+  on z/Architecture.               *****************          *              *
+				   *  Shared Libs  *          *              *
+				   *****************          *              *
+				   *               *          *    Kernel    *
+				   *  User Program *          *              *
+				   *   Data BSS    *          *              *
+				   *    Text       *          *              *
+				   *   Sections    *          *              *
+  0x00000000                       *****************          ****************
+
+This also means that we need to look at the PSW problem state bit and the
+addressing mode to decide whether we are looking at user or kernel space.
+
+User space runs in primary address mode (or access register mode within
+the vdso code).
+
+The kernel usually also runs in home space mode, however when accessing
+user space the kernel switches to primary or secondary address mode if
+the mvcos instruction is not available or if a compare-and-swap (futex)
+instruction on a user space address is performed.
+
+When also looking at the ASCE control registers, this means:
+
+User space:
+
+- runs in primary or access register mode
+- cr1 contains the user asce
+- cr7 contains the user asce
+- cr13 contains the kernel asce
+
+Kernel space:
+
+- runs in home space mode
+- cr1 contains the user or kernel asce
+
+  - the kernel asce is loaded when a uaccess requires primary or
+    secondary address mode
+
+- cr7 contains the user or kernel asce, (changed with set_fs())
+- cr13 contains the kernel asce
+
+In case of uaccess the kernel changes to:
+
+- primary space mode in case of a uaccess (copy_to_user) and uses
+  e.g. the mvcp instruction to access user space. However the kernel
+  will stay in home space mode if the mvcos instruction is available
+- secondary space mode in case of futex atomic operations, so that the
+  instructions come from primary address space and data from secondary
+  space
+
+In case of KVM, the kernel runs in home space mode, but cr1 gets switched
+to contain the gmap asce before the SIE instruction gets executed. When
+the SIE instruction is finished, cr1 will be switched back to contain the
+user asce.
+
+
+Virtual Addresses on s/390 & z/Architecture
+===========================================
+
+A virtual address on s/390 is made up of 3 parts
+The SX (segment index, roughly corresponding to the PGD & PMD in Linux
+terminology) being bits 1-11.
+
+The PX (page index, corresponding to the page table entry (pte) in Linux
+terminology) being bits 12-19.
+
+The remaining bits BX (the byte index are the offset in the page )
+i.e. bits 20 to 31.
+
+On z/Architecture in linux we currently make up an address from 4 parts.
+
+- The region index bits (RX) 0-32 we currently use bits 22-32
+- The segment index (SX) being bits 33-43
+- The page index (PX) being bits  44-51
+- The byte index (BX) being bits  52-63
+
+Notes:
+  1) s/390 has no PMD so the PMD is really the PGD also.
+     A lot of this stuff is defined in pgtable.h.
+
+  2) Also seeing as s/390's page indexes are only 1k  in size
+     (bits 12-19 x 4 bytes per pte ) we use 1 ( page 4k )
+     to make the best use of memory by updating 4 segment indices
+     entries each time we mess with a PMD & use offsets
+     0,1024,2048 & 3072 in this page as for our segment indexes.
+     On z/Architecture our page indexes are now 2k in size
+     ( bits 12-19 x 8 bytes per pte ) we do a similar trick
+     but only mess with 2 segment indices each time we mess with
+     a PMD.
+
+  3) As z/Architecture supports up to a massive 5-level page table lookup we
+     can only use 3 currently on Linux ( as this is all the generic kernel
+     currently supports ) however this may change in future
+     this allows us to access ( according to my sums )
+     4TB of virtual storage per process i.e.
+     4096*512(PTES)*1024(PMDS)*2048(PGD) = 4398046511104 bytes,
+     enough for another 2 or 3 of years I think :-).
+     to do this we use a region-third-table designation type in
+     our address space control registers.
+
+
+The Linux for s/390 & z/Architecture Kernel Task Structure
+==========================================================
+Each process/thread under Linux for S390 has its own kernel task_struct
+defined in linux/include/linux/sched.h
+The S390 on initialisation & resuming of a process on a cpu sets
+the __LC_KERNEL_STACK variable in the spare prefix area for this cpu
+(which we use for per-processor globals).
+
+The kernel stack pointer is intimately tied with the task structure for
+each processor as follows::
+
+			s/390
+	      ************************
+	      *  1 page kernel stack *
+	      *        ( 4K )        *
+	      ************************
+	      *   1 page task_struct *
+	      *        ( 4K )        *
+  8K aligned  ************************
+
+		   z/Architecture
+	      ************************
+	      *  2 page kernel stack *
+	      *        ( 8K )        *
+	      ************************
+	      *  2 page task_struct  *
+	      *        ( 8K )        *
+  16K aligned ************************
+
+What this means is that we don't need to dedicate any register or global
+variable to point to the current running process & can retrieve it with the
+following very simple construct for s/390 & one very similar for
+z/Architecture::
+
+  static inline struct task_struct * get_current(void)
+  {
+	struct task_struct *current;
+	__asm__("lhi   %0,-8192\n\t"
+		"nr    %0,15"
+		: "=r" (current) );
+	return current;
+  }
+
+i.e. just anding the current kernel stack pointer with the mask -8192.
+Thankfully because Linux doesn't have support for nested IO interrupts
+& our devices have large buffers can survive interrupts being shut for
+short amounts of time we don't need a separate stack for interrupts.
+
+
+
+
+Register Usage & Stackframes on Linux for s/390 & z/Architecture
+=================================================================
+Overview:
+---------
+This is the code that gcc produces at the top & the bottom of
+each function. It usually is fairly consistent & similar from
+function to function & if you know its layout you can probably
+make some headway in finding the ultimate cause of a problem
+after a crash without a source level debugger.
+
+Note: To follow stackframes requires a knowledge of C or Pascal &
+limited knowledge of one assembly language.
+
+It should be noted that there are some differences between the
+s/390 and z/Architecture stack layouts as the z/Architecture stack layout
+didn't have to maintain compatibility with older linkage formats.
+
+Glossary:
+---------
+alloca:
+  This is a built in compiler function for runtime allocation
+  of extra space on the callers stack which is obviously freed
+  up on function exit ( e.g. the caller may choose to allocate nothing
+  of a buffer of 4k if required for temporary purposes ), it generates
+  very efficient code ( a few cycles  ) when compared to alternatives
+  like malloc.
+
+automatics:
+  These are local variables on the stack, i.e they aren't in registers &
+  they aren't static.
+
+back-chain:
+  This is a pointer to the stack pointer before entering a
+  framed functions ( see frameless function ) prologue got by
+  dereferencing the address of the current stack pointer,
+  i.e. got by accessing the 32 bit value at the stack pointers
+  current location.
+
+base-pointer:
+  This is a pointer to the back of the literal pool which
+  is an area just behind each procedure used to store constants
+  in each function.
+
+call-clobbered:
+  The caller probably needs to save these registers if there
+  is something of value in them, on the stack or elsewhere before making a
+  call to another procedure so that it can restore it later.
+
+epilogue:
+  The code generated by the compiler to return to the caller.
+
+frameless-function:
+  A frameless function in Linux for s390 & z/Architecture is one which doesn't
+  need more than the register save area (96 bytes on s/390, 160 on z/Architecture)
+  given to it by the caller.
+
+  A frameless function never:
+
+  1) Sets up a back chain.
+  2) Calls alloca.
+  3) Calls other normal functions
+  4) Has automatics.
+
+GOT-pointer:
+  This is a pointer to the global-offset-table in ELF
+  ( Executable Linkable Format, Linux'es most common executable format ),
+  all globals & shared library objects are found using this pointer.
+
+lazy-binding
+  ELF shared libraries are typically only loaded when routines in the shared
+  library are actually first called at runtime. This is lazy binding.
+
+procedure-linkage-table
+  This is a table found from the GOT which contains pointers to routines
+  in other shared libraries which can't be called to by easier means.
+
+prologue:
+  The code generated by the compiler to set up the stack frame.
+
+outgoing-args:
+  This is extra area allocated on the stack of the calling function if the
+  parameters for the callee's cannot all be put in registers, the same
+  area can be reused by each function the caller calls.
+
+routine-descriptor:
+  A COFF  executable format based concept of a procedure reference
+  actually being 8 bytes or more as opposed to a simple pointer to the routine.
+  This is typically defined as follows:
+
+  - Routine Descriptor offset 0=Pointer to Function
+  - Routine Descriptor offset 4=Pointer to Table of Contents
+
+  The table of contents/TOC is roughly equivalent to a GOT pointer.
+  & it means that shared libraries etc. can be shared between several
+  environments each with their own TOC.
+
+static-chain:
+  This is used in nested functions a concept adopted from pascal
+  by gcc not used in ansi C or C++ ( although quite useful ), basically it
+  is a pointer used to reference local variables of enclosing functions.
+  You might come across this stuff once or twice in your lifetime.
+
+  e.g.
+
+  The function below should return 11 though gcc may get upset & toss warnings
+  about unused variables::
+
+    int FunctionA(int a)
+    {
+	int b;
+	FunctionC(int c)
+	{
+		b=c+1;
+	}
+	FunctionC(10);
+	return(b);
+    }
+
+
+s/390 & z/Architecture Register usage
+=====================================
+
+======== ========================================== ===============
+r0       used by syscalls/assembly                  call-clobbered
+r1       used by syscalls/assembly                  call-clobbered
+r2       argument 0 / return value 0                call-clobbered
+r3       argument 1 / return value 1 (if long long) call-clobbered
+r4       argument 2                                 call-clobbered
+r5       argument 3                                 call-clobbered
+r6       argument 4                                 saved
+r7       pointer-to arguments 5 to ...              saved
+r8       this & that                                saved
+r9       this & that                                saved
+r10      static-chain ( if nested function )        saved
+r11      frame-pointer ( if function used alloca )  saved
+r12      got-pointer                                saved
+r13      base-pointer                               saved
+r14      return-address                             saved
+r15      stack-pointer                              saved
+
+f0       argument 0 / return value ( float/double ) call-clobbered
+f2       argument 1                                 call-clobbered
+f4       z/Architecture argument 2                  saved
+f6       z/Architecture argument 3                  saved
+======== ========================================== ===============
+
+The remaining floating points
+f1,f3,f5 f7-f15 are call-clobbered.
+
+Notes:
+------
+1) The only requirement is that registers which are used
+   by the callee are saved, e.g. the compiler is perfectly
+   capable of using r11 for purposes other than a frame a
+   frame pointer if a frame pointer is not needed.
+2) In functions with variable arguments e.g. printf the calling procedure
+   is identical to one without variable arguments & the same number of
+   parameters. However, the prologue of this function is somewhat more
+   hairy owing to it having to move these parameters to the stack to
+   get va_start, va_arg & va_end to work.
+3) Access registers are currently unused by gcc but are used in
+   the kernel. Possibilities exist to use them at the moment for
+   temporary storage but it isn't recommended.
+4) Only 4 of the floating point registers are used for
+   parameter passing as older machines such as G3 only have only 4
+   & it keeps the stack frame compatible with other compilers.
+   However with IEEE floating point emulation under linux on the
+   older machines you are free to use the other 12.
+5) A long long or double parameter cannot be have the
+   first 4 bytes in a register & the second four bytes in the
+   outgoing args area. It must be purely in the outgoing args
+   area if crossing this boundary.
+6) Floating point parameters are mixed with outgoing args
+   on the outgoing args area in the order the are passed in as parameters.
+7) Floating point arguments 2 & 3 are saved in the outgoing args area for
+   z/Architecture
+
+
+Stack Frame Layout
+------------------
+
+========= ============== ======================================================
+s/390     z/Architecture
+========= ============== ======================================================
+0         0              back chain ( a 0 here signifies end of back chain )
+4         8              eos ( end of stack, not used on Linux for S390 used
+			 in other linkage formats )
+8         16             glue used in other s/390 linkage formats for saved
+			 routine descriptors etc.
+12        24             glue used in other s/390 linkage formats for saved
+			 routine descriptors etc.
+16        32             scratch area
+20        40             scratch area
+24        48             saved r6 of caller function
+28        56             saved r7 of caller function
+32        64             saved r8 of caller function
+36        72             saved r9 of caller function
+40        80             saved r10 of caller function
+44        88             saved r11 of caller function
+48        96             saved r12 of caller function
+52        104            saved r13 of caller function
+56        112            saved r14 of caller function
+60        120            saved r15 of caller function
+64        128            saved f4 of caller function
+72        132            saved f6 of caller function
+80                       undefined
+96        160            outgoing args passed from caller to callee
+96+x      160+x          possible stack alignment ( 8 bytes desirable )
+96+x+y    160+x+y        alloca space of caller ( if used )
+96+x+y+z  160+x+y+z      automatics of caller ( if used )
+0                        back-chain
+========= ============== ======================================================
+
+A sample program with comments.
+===============================
+
+Comments on the function test
+-----------------------------
+1) It didn't need to set up a pointer to the constant pool gpr13 as it is not
+   used ( :-( ).
+2) This is a frameless function & no stack is bought.
+3) The compiler was clever enough to recognise that it could return the
+   value in r2 as well as use it for the passed in parameter ( :-) ).
+4) The basr ( branch relative & save ) trick works as follows the instruction
+   has a special case with r0,r0 with some instruction operands is understood as
+   the literal value 0, some risc architectures also do this ). So now
+   we are branching to the next address & the address new program counter is
+   in r13,so now we subtract the size of the function prologue we have executed
+   the size of the literal pool to get to the top of the literal pool::
+
+
+     0040037c int test(int b)
+     {                                                     # Function prologue below
+       40037c:  90 de f0 34     stm     %r13,%r14,52(%r15) # Save registers r13 & r14
+       400380:  0d d0           basr    %r13,%r0           # Set up pointer to constant pool using
+       400382:  a7 da ff fa     ahi     %r13,-6            # basr trick
+	return(5+b);
+							   # Huge main program
+       400386:  a7 2a 00 05     ahi     %r2,5              # add 5 to r2
+
+							   # Function epilogue below
+       40038a:  98 de f0 34     lm      %r13,%r14,52(%r15) # restore registers r13 & 14
+       40038e:  07 fe           br      %r14               # return
+     }
+
+Comments on the function main
+-----------------------------
+1) The compiler did this function optimally ( 8-) )::
+
+     Literal pool for main.
+     400390:    ff ff ff ec     .long 0xffffffec
+     main(int argc,char *argv[])
+     {                                                     # Function prologue below
+       400394:  90 bf f0 2c     stm     %r11,%r15,44(%r15) # Save necessary registers
+       400398:  18 0f           lr      %r0,%r15           # copy stack pointer to r0
+       40039a:  a7 fa ff a0     ahi     %r15,-96           # Make area for callee saving
+       40039e:  0d d0           basr    %r13,%r0           # Set up r13 to point to
+       4003a0:  a7 da ff f0     ahi     %r13,-16           # literal pool
+       4003a4:  50 00 f0 00     st      %r0,0(%r15)        # Save backchain
+
+	return(test(5));                                   # Main Program Below
+       4003a8:  58 e0 d0 00     l       %r14,0(%r13)       # load relative address of test from
+							   # literal pool
+       4003ac:  a7 28 00 05     lhi     %r2,5              # Set first parameter to 5
+       4003b0:  4d ee d0 00     bas     %r14,0(%r14,%r13)  # jump to test setting r14 as return
+							   # address using branch & save instruction.
+
+							   # Function Epilogue below
+       4003b4:  98 bf f0 8c     lm      %r11,%r15,140(%r15)# Restore necessary registers.
+       4003b8:  07 fe           br      %r14               # return to do program exit
+     }
+
+
+Compiler updates
+----------------
+
+::
+
+  main(int argc,char *argv[])
+  {
+    4004fc:     90 7f f0 1c             stm     %r7,%r15,28(%r15)
+    400500:     a7 d5 00 04             bras    %r13,400508 <main+0xc>
+    400504:     00 40 04 f4             .long   0x004004f4
+    # compiler now puts constant pool in code to so it saves an instruction
+    400508:     18 0f                   lr      %r0,%r15
+    40050a:     a7 fa ff a0             ahi     %r15,-96
+    40050e:     50 00 f0 00             st      %r0,0(%r15)
+	return(test(5));
+    400512:     58 10 d0 00             l       %r1,0(%r13)
+    400516:     a7 28 00 05             lhi     %r2,5
+    40051a:     0d e1                   basr    %r14,%r1
+    # compiler adds 1 extra instruction to epilogue this is done to
+    # avoid processor pipeline stalls owing to data dependencies on g5 &
+    # above as register 14 in the old code was needed directly after being loaded
+    # by the lm %r11,%r15,140(%r15) for the br %14.
+    40051c:     58 40 f0 98             l       %r4,152(%r15)
+    400520:     98 7f f0 7c             lm      %r7,%r15,124(%r15)
+    400524:     07 f4                   br      %r4
+  }
+
+
+Hartmut ( our compiler developer ) also has been threatening to take out the
+stack backchain in optimised code as this also causes pipeline stalls, you
+have been warned.
+
+64 bit z/Architecture code disassembly
+--------------------------------------
+
+If you understand the stuff above you'll understand the stuff
+below too so I'll avoid repeating myself & just say that
+some of the instructions have g's on the end of them to indicate
+they are 64 bit & the stack offsets are a bigger,
+the only other difference you'll find between 32 & 64 bit is that
+we now use f4 & f6 for floating point arguments on 64 bit::
+
+  00000000800005b0 <test>:
+  int test(int b)
+  {
+	return(5+b);
+      800005b0: a7 2a 00 05             ahi     %r2,5
+      800005b4: b9 14 00 22             lgfr    %r2,%r2 # downcast to integer
+      800005b8: 07 fe                   br      %r14
+      800005ba: 07 07                   bcr     0,%r7
+
+
+  }
+
+  00000000800005bc <main>:
+  main(int argc,char *argv[])
+  {
+      800005bc: eb bf f0 58 00 24       stmg    %r11,%r15,88(%r15)
+      800005c2: b9 04 00 1f             lgr     %r1,%r15
+      800005c6: a7 fb ff 60             aghi    %r15,-160
+      800005ca: e3 10 f0 00 00 24       stg     %r1,0(%r15)
+	return(test(5));
+      800005d0: a7 29 00 05             lghi    %r2,5
+      # brasl allows jumps > 64k & is overkill here bras would do fune
+      800005d4: c0 e5 ff ff ff ee       brasl   %r14,800005b0 <test>
+      800005da: e3 40 f1 10 00 04       lg      %r4,272(%r15)
+      800005e0: eb bf f0 f8 00 04       lmg     %r11,%r15,248(%r15)
+      800005e6: 07 f4                   br      %r4
+  }
+
+
+
+Compiling programs for debugging on Linux for s/390 & z/Architecture
+====================================================================
+-gdwarf-2 now works it should be considered the default debugging
+format for s/390 & z/Architecture as it is more reliable for debugging
+shared libraries,  normal -g debugging works much better now
+Thanks to the IBM java compiler developers bug reports.
+
+This is typically done adding/appending the flags -g or -gdwarf-2 to the
+CFLAGS & LDFLAGS variables Makefile of the program concerned.
+
+If using gdb & you would like accurate displays of registers &
+stack traces compile without optimisation i.e make sure
+that there is no -O2 or similar on the CFLAGS line of the Makefile &
+the emitted gcc commands, obviously this will produce worse code
+( not advisable for shipment ) but it is an  aid to the debugging process.
+
+This aids debugging because the compiler will copy parameters passed in
+in registers onto the stack so backtracing & looking at passed in
+parameters will work, however some larger programs which use inline functions
+will not compile without optimisation.
+
+Debugging with optimisation has since much improved after fixing
+some bugs, please make sure you are using gdb-5.0 or later developed
+after Nov'2000.
+
+
+
+Debugging under VM
+==================
+
+Notes
+-----
+Addresses & values in the VM debugger are always hex never decimal
+Address ranges are of the format <HexValue1>-<HexValue2> or
+<HexValue1>.<HexValue2>
+For example, the address range  0x2000 to 0x3000 can be described as 2000-3000
+or 2000.1000
+
+The VM Debugger is case insensitive.
+
+VM's strengths are usually other debuggers weaknesses you can get at any
+resource no matter how sensitive e.g. memory management resources, change
+address translation in the PSW. For kernel hacking you will reap dividends if
+you get good at it.
+
+The VM Debugger displays operators but not operands, and also the debugger
+displays useful information on the same line as the author of the code probably
+felt that it was a good idea not to go over the 80 columns on the screen.
+This isn't as unintuitive as it may seem as the s/390 instructions are easy to
+decode mentally and you can make a good guess at a lot of them as all the
+operands are nibble (half byte aligned).
+So if you have an objdump listing by hand, it is quite easy to follow, and if
+you don't have an objdump listing keep a copy of the s/390 Reference Summary
+or alternatively the s/390 principles of operation next to you.
+e.g. even I can guess that
+0001AFF8' LR    180F        CC 0
+is a ( load register ) lr r0,r15
+
+Also it is very easy to tell the length of a 390 instruction from the 2 most
+significant bits in the instruction (not that this info is really useful except
+if you are trying to make sense of a hexdump of code).
+Here is a table
+
+======================= ==================
+Bits                    Instruction Length
+======================= ==================
+00                          2 Bytes
+01                          4 Bytes
+10                          4 Bytes
+11                          6 Bytes
+======================= ==================
+
+The debugger also displays other useful info on the same line such as the
+addresses being operated on destination addresses of branches & condition codes.
+e.g.::
+
+  00019736' AHI   A7DAFF0E    CC 1
+  000198BA' BRC   A7840004 -> 000198C2'   CC 0
+  000198CE' STM   900EF068 >> 0FA95E78    CC 2
+
+
+
+Useful VM debugger commands
+---------------------------
+
+I suppose I'd better mention this before I start
+to list the current active traces do::
+
+	Q TR
+
+there can be a maximum of 255 of these per set
+( more about trace sets later ).
+
+To stop traces issue a::
+
+	TR END.
+
+To delete a particular breakpoint issue::
+
+	TR DEL <breakpoint number>
+
+The PA1 key drops to CP mode so you can issue debugger commands,
+Doing alt c (on my 3270 console at least ) clears the screen.
+
+hitting b <enter> comes back to the running operating system
+from cp mode ( in our case linux ).
+
+It is typically useful to add shortcuts to your profile.exec file
+if you have one ( this is roughly equivalent to autoexec.bat in DOS ).
+file here are a few from mine::
+
+  /* this gives me command history on issuing f12 */
+  set pf12 retrieve
+  /* this continues */
+  set pf8 imm b
+  /* goes to trace set a */
+  set pf1 imm tr goto a
+  /* goes to trace set b */
+  set pf2 imm tr goto b
+  /* goes to trace set c */
+  set pf3 imm tr goto c
+
+
+
+Instruction Tracing
+-------------------
+Setting a simple breakpoint::
+
+	TR I PSWA <address>
+
+To debug a particular function try::
+
+  TR I R <function address range>
+  TR I on its own will single step.
+  TR I DATA <MNEMONIC> <OPTIONAL RANGE> will trace for particular mnemonics
+
+e.g.::
+
+  TR I DATA 4D R 0197BC.4000
+
+will trace for BAS'es ( opcode 4D ) in the range 0197BC.4000
+
+if you were inclined you could add traces for all branch instructions &
+suffix them with the run prefix so you would have a backtrace on screen
+when a program crashes::
+
+	TR BR <INTO OR FROM> will trace branches into or out of an address.
+
+e.g.::
+
+	TR BR INTO 0
+
+is often quite useful if a program is getting awkward & deciding
+to branch to 0 & crashing as this will stop at the address before in jumps to 0.
+
+::
+
+	TR I R <address range> RUN cmd d g
+
+single steps a range of addresses but stays running &
+displays the gprs on each step.
+
+
+
+Displaying & modifying Registers
+--------------------------------
+D G
+	will display all the gprs
+
+Adding a extra G to all the commands is necessary to access the full 64 bit
+content in VM on z/Architecture. Obviously this isn't required for access
+registers as these are still 32 bit.
+
+e.g.
+
+DGG
+	instead of DG
+
+D X
+	will display all the control registers
+D AR
+	will display all the access registers
+D AR4-7
+	will display access registers 4 to 7
+CPU ALL D G
+	will display the GRPS of all CPUS in the configuration
+D PSW
+	will display the current PSW
+st PSW 2000
+	will put the value 2000 into the PSW & cause crash your machine.
+D PREFIX
+	displays the prefix offset
+
+
+Displaying Memory
+-----------------
+To display memory mapped using the current PSW's mapping try::
+
+   D <range>
+
+To make VM display a message each time it hits a particular address and
+continue try:
+
+D I<range>
+	will disassemble/display a range of instructions.
+
+ST addr 32 bit word
+	will store a 32 bit aligned address
+D T<range>
+	will display the EBCDIC in an address (if you are that way inclined)
+D R<range>
+	will display real addresses ( without DAT ) but with prefixing.
+
+There are other complex options to display if you need to get at say home space
+but are in primary space the easiest thing to do is to temporarily
+modify the PSW to the other addressing mode, display the stuff & then
+restore it.
+
+
+
+Hints
+-----
+If you want to issue a debugger command without halting your virtual machine
+with the PA1 key try prefixing the command with #CP e.g.::
+
+	#cp tr i pswa 2000
+
+also suffixing most debugger commands with RUN will cause them not
+to stop just display the mnemonic at the current instruction on the console.
+
+If you have several breakpoints you want to put into your program &
+you get fed up of cross referencing with System.map
+you can do the following trick for several symbols.
+
+::
+
+	grep do_signal System.map
+
+which emits the following among other things::
+
+	0001f4e0 T do_signal
+
+now you can do::
+
+	TR I PSWA 0001f4e0 cmd msg * do_signal
+
+This sends a message to your own console each time do_signal is entered.
+( As an aside I wrote a perl script once which automatically generated a REXX
+script with breakpoints on every kernel procedure, this isn't a good idea
+because there are thousands of these routines & VM can only set 255 breakpoints
+at a time so you nearly had to spend as long pruning the file down as you would
+entering the msgs by hand), however, the trick might be useful for a single
+object file. In the 3270 terminal emulator x3270 there is a very useful option
+in the file menu called "Save Screen In File" - this is very good for keeping a
+copy of traces.
+
+From CMS help <command name> will give you online help on a particular command.
+e.g.::
+
+	HELP DISPLAY
+
+Also CP has a file called profile.exec which automatically gets called
+on startup of CMS ( like autoexec.bat ), keeping on a DOS analogy session
+CP has a feature similar to doskey, it may be useful for you to
+use profile.exec to define some keystrokes.
+
+SET PF9 IMM B
+	This does a single step in VM on pressing F8.
+
+SET PF10  ^
+	This sets up the ^ key.
+	which can be used for ^c (ctrl-c),^z (ctrl-z) which can't be typed
+	directly into some 3270 consoles.
+
+SET PF11 ^-
+	This types the starting keystrokes for a sysrq see SysRq below.
+SET PF12 RETRIEVE
+	This retrieves command history on pressing F12.
+
+
+Sometimes in VM the display is set up to scroll automatically this
+can be very annoying if there are messages you wish to look at
+to stop this do
+
+TERM MORE 255 255
+  This will nearly stop automatic screen updates, however it will
+  cause a denial of service if lots of messages go to the 3270 console,
+  so it would be foolish to use this as the default on a production machine.
+
+
+Tracing particular processes
+----------------------------
+The kernel's text segment is intentionally at an address in memory that it will
+very seldom collide with text segments of user programs ( thanks Martin ),
+this simplifies debugging the kernel.
+However it is quite common for user processes to have addresses which collide
+this can make debugging a particular process under VM painful under normal
+circumstances as the process may change when doing a::
+
+	TR I R <address range>.
+
+Thankfully after reading VM's online help I figured out how to debug
+I particular process.
+
+Your first problem is to find the STD ( segment table designation )
+of the program you wish to debug.
+There are several ways you can do this here are a few
+
+Run::
+
+	objdump --syms <program to be debugged> | grep main
+
+To get the address of main in the program. Then::
+
+	tr i pswa <address of main>
+
+Start the program, if VM drops to CP on what looks like the entry
+point of the main function this is most likely the process you wish to debug.
+Now do a D X13 or D XG13 on z/Architecture.
+
+On 31 bit the STD is bits 1-19 ( the STO segment table origin )
+& 25-31 ( the STL segment table length ) of CR13.
+
+now type::
+
+	TR I R STD <CR13's value> 0.7fffffff
+
+e.g.::
+
+	TR I R STD 8F32E1FF 0.7fffffff
+
+Another very useful variation is::
+
+	TR STORE INTO STD <CR13's value> <address range>
+
+for finding out when a particular variable changes.
+
+An alternative way of finding the STD of a currently running process
+is to do the following, ( this method is more complex but
+could be quite convenient if you aren't updating the kernel much &
+so your kernel structures will stay constant for a reasonable period of
+time ).
+
+::
+
+	grep task /proc/<pid>/status
+
+from this you should see something like::
+
+	task: 0f160000 ksp: 0f161de8 pt_regs: 0f161f68
+
+This now gives you a pointer to the task structure.
+
+Now make::
+
+	CC:="s390-gcc -g" kernel/sched.s
+
+To get the task_struct stabinfo.
+
+( task_struct is defined in include/linux/sched.h ).
+
+Now we want to look at
+task->active_mm->pgd
+
+on my machine the active_mm in the task structure stab is
+active_mm:(4,12),672,32
+
+its offset is 672/8=84=0x54
+
+the pgd member in the mm_struct stab is
+pgd:(4,6)=*(29,5),96,32
+so its offset is 96/8=12=0xc
+
+so we'll::
+
+	hexdump -s 0xf160054 /dev/mem | more
+
+i.e. task_struct+active_mm offset
+to look at the active_mm member::
+
+	f160054 0fee cc60 0019 e334 0000 0000 0000 0011
+
+::
+
+	hexdump -s 0x0feecc6c /dev/mem | more
+
+i.e. active_mm+pgd offset::
+
+	feecc6c 0f2c 0000 0000 0001 0000 0001 0000 0010
+
+we get something like
+now do::
+
+	TR I R STD <pgd|0x7f> 0.7fffffff
+
+i.e. the 0x7f is added because the pgd only
+gives the page table origin & we need to set the low bits
+to the maximum possible segment table length.
+
+::
+
+	TR I R STD 0f2c007f 0.7fffffff
+
+on z/Architecture you'll probably need to do::
+
+	TR I R STD <pgd|0x7> 0.ffffffffffffffff
+
+to set the TableType to 0x1 & the Table length to 3.
+
+
+
+Tracing Program Exceptions
+--------------------------
+If you get a crash which says something like
+illegal operation or specification exception followed by a register dump
+You can restart linux & trace these using the tr prog <range or value> trace
+option.
+
+
+The most common ones you will normally be tracing for is:
+
+- 1=operation exception
+- 2=privileged operation exception
+- 4=protection exception
+- 5=addressing exception
+- 6=specification exception
+- 10=segment translation exception
+- 11=page translation exception
+
+The full list of these is on page 22 of the current s/390 Reference Summary.
+e.g.
+
+tr prog 10 will trace segment translation exceptions.
+
+tr prog on its own will trace all program interruption codes.
+
+Trace Sets
+----------
+On starting VM you are initially in the INITIAL trace set.
+You can do a Q TR to verify this.
+If you have a complex tracing situation where you wish to wait for instance
+till a driver is open before you start tracing IO, but know in your
+heart that you are going to have to make several runs through the code till you
+have a clue whats going on.
+
+What you can do is::
+
+	TR I PSWA <Driver open address>
+
+hit b to continue till breakpoint
+
+reach the breakpoint
+
+now do your::
+
+	TR GOTO B
+	TR IO 7c08-7c09 inst int run
+
+or whatever the IO channels you wish to trace are & hit b
+
+To got back to the initial trace set do::
+
+	TR GOTO INITIAL
+
+& the TR I PSWA <Driver open address> will be the only active breakpoint again.
+
+
+Tracing linux syscalls under VM
+-------------------------------
+Syscalls are implemented on Linux for S390 by the Supervisor call instruction
+(SVC). There 256 possibilities of these as the instruction is made up of a 0xA
+opcode and the second byte being the syscall number. They are traced using the
+simple command::
+
+	TR SVC  <Optional value or range>
+
+the syscalls are defined in linux/arch/s390/include/asm/unistd.h
+e.g. to trace all file opens just do::
+
+	TR SVC 5 ( as this is the syscall number of open )
+
+
+SMP Specific commands
+---------------------
+To find out how many cpus you have
+Q CPUS displays all the CPU's available to your virtual machine
+To find the cpu that the current cpu VM debugger commands are being directed at
+do Q CPU to change the current cpu VM debugger commands are being directed at
+do::
+
+	CPU <desired cpu no>
+
+On a SMP guest issue a command to all CPUs try prefixing the command with cpu
+all. To issue a command to a particular cpu try cpu <cpu number> e.g.::
+
+	CPU 01 TR I R 2000.3000
+
+If you are running on a guest with several cpus & you have a IO related problem
+& cannot follow the flow of code but you know it isn't smp related.
+
+from the bash prompt issue::
+
+	shutdown -h now or halt.
+
+do a::
+
+	Q CPUS
+
+to find out how many cpus you have detach each one of them from cp except
+cpu 0 by issuing a::
+
+	DETACH CPU 01-(number of cpus in configuration)
+
+& boot linux again.
+
+TR SIGP
+	will trace inter processor signal processor instructions.
+
+DEFINE CPU 01-(number in configuration)
+	will get your guests cpus back.
+
+
+Help for displaying ascii textstrings
+-------------------------------------
+On the very latest VM Nucleus'es VM can now display ascii
+( thanks Neale for the hint ) by doing::
+
+	D TX<lowaddr>.<len>
+
+e.g.::
+
+	D TX0.100
+
+Alternatively
+=============
+Under older VM debuggers (I love EBDIC too) you can use following little
+program which converts a command line of hex digits to ascii text. It can be
+compiled under linux and you can copy the hex digits from your x3270 terminal
+to your xterm if you are debugging from a linuxbox.
+
+This is quite useful when looking at a parameter passed in as a text string
+under VM ( unless you are good at decoding ASCII in your head ).
+
+e.g. consider tracing an open syscall::
+
+	TR SVC 5
+
+We have stopped at a breakpoint::
+
+	000151B0' SVC   0A05     -> 0001909A'   CC 0
+
+D 20.8 to check the SVC old psw in the prefix area and see was it from userspace
+(for the layout of the prefix area consult the "Fixed Storage Locations"
+chapter of the s/390 Reference Summary if you have it available).
+
+::
+
+  V00000020  070C2000 800151B2
+
+The problem state bit wasn't set &  it's also too early in the boot sequence
+for it to be a userspace SVC if it was we would have to temporarily switch the
+psw to user space addressing so we could get at the first parameter of the open
+in gpr2.
+
+Next do a::
+
+	D G2
+	GPR  2 =  00014CB4
+
+Now display what gpr2 is pointing to::
+
+	D 00014CB4.20
+	V00014CB4  2F646576 2F636F6E 736F6C65 00001BF5
+	V00014CC4  FC00014C B4001001 E0001000 B8070707
+
+Now copy the text till the first 00 hex ( which is the end of the string
+to an xterm & do hex2ascii on it::
+
+	hex2ascii 2F646576 2F636F6E 736F6C65 00
+
+outputs::
+
+	Decoded Hex:=/ d e v / c o n s o l e 0x00
+
+We were opening the console device,
+
+You can compile the code below yourself for practice :-),
+
+::
+
+  /*
+   *    hex2ascii.c
+   *    a useful little tool for converting a hexadecimal command line to ascii
+   *
+   *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+   *    (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation.
+   */
+  #include <stdio.h>
+
+  int main(int argc,char *argv[])
+  {
+    int cnt1,cnt2,len,toggle=0;
+    int startcnt=1;
+    unsigned char c,hex;
+
+    if(argc>1&&(strcmp(argv[1],"-a")==0))
+       startcnt=2;
+    printf("Decoded Hex:=");
+    for(cnt1=startcnt;cnt1<argc;cnt1++)
+    {
+      len=strlen(argv[cnt1]);
+      for(cnt2=0;cnt2<len;cnt2++)
+      {
+	 c=argv[cnt1][cnt2];
+	 if(c>='0'&&c<='9')
+	  c=c-'0';
+	 if(c>='A'&&c<='F')
+	  c=c-'A'+10;
+	 if(c>='a'&&c<='f')
+	  c=c-'a'+10;
+	 switch(toggle)
+	 {
+	  case 0:
+	     hex=c<<4;
+	     toggle=1;
+	  break;
+	  case 1:
+	     hex+=c;
+	     if(hex<32||hex>127)
+	     {
+		if(startcnt==1)
+		   printf("0x%02X ",(int)hex);
+		else
+		   printf(".");
+	     }
+	     else
+	     {
+	       printf("%c",hex);
+	       if(startcnt==1)
+		  printf(" ");
+	     }
+	     toggle=0;
+	  break;
+	 }
+      }
+    }
+    printf("\n");
+  }
+
+
+
+
+Stack tracing under VM
+----------------------
+A basic backtrace
+-----------------
+
+Here are the tricks I use 9 out of 10 times it works pretty well,
+
+When your backchain reaches a dead end
+--------------------------------------
+This can happen when an exception happens in the kernel and the kernel is
+entered twice. If you reach the NULL pointer at the end of the back chain you
+should be able to sniff further back if you follow the following tricks.
+1) A kernel address should be easy to recognise since it is in
+primary space & the problem state bit isn't set & also
+The Hi bit of the address is set.
+2) Another backchain should also be easy to recognise since it is an
+address pointing to another address approximately 100 bytes or 0x70 hex
+behind the current stackpointer.
+
+
+Here is some practice.
+
+boot the kernel & hit PA1 at some random time
+
+d g to display the gprs, this should display something like::
+
+  GPR  0 =  00000001  00156018  0014359C  00000000
+  GPR  4 =  00000001  001B8888  000003E0  00000000
+  GPR  8 =  00100080  00100084  00000000  000FE000
+  GPR 12 =  00010400  8001B2DC  8001B36A  000FFED8
+
+Note that GPR14 is a return address but as we are real men we are going to
+trace the stack.
+display 0x40 bytes after the stack pointer::
+
+  V000FFED8  000FFF38 8001B838 80014C8E 000FFF38
+  V000FFEE8  00000000 00000000 000003E0 00000000
+  V000FFEF8  00100080 00100084 00000000 000FE000
+  V000FFF08  00010400 8001B2DC 8001B36A 000FFED8
+
+
+Ah now look at whats in sp+56 (sp+0x38) this is 8001B36A our saved r14 if
+you look above at our stackframe & also agrees with GPR14.
+
+now backchain::
+
+	d 000FFF38.40
+
+we now are taking the contents of SP to get our first backchain::
+
+  V000FFF38  000FFFA0 00000000 00014995 00147094
+  V000FFF48  00147090 001470A0 000003E0 00000000
+  V000FFF58  00100080 00100084 00000000 001BF1D0
+  V000FFF68  00010400 800149BA 80014CA6 000FFF38
+
+This displays a 2nd return address of 80014CA6
+
+now do::
+
+	d 000FFFA0.40
+
+for our 3rd backchain::
+
+  V000FFFA0  04B52002 0001107F 00000000 00000000
+  V000FFFB0  00000000 00000000 FF000000 0001107F
+  V000FFFC0  00000000 00000000 00000000 00000000
+  V000FFFD0  00010400 80010802 8001085A 000FFFA0
+
+
+our 3rd return address is 8001085A
+
+as the 04B52002 looks suspiciously like rubbish it is fair to assume that the
+kernel entry routines for the sake of optimisation don't set up a backchain.
+
+now look at System.map to see if the addresses make any sense::
+
+	grep -i 0001b3 System.map
+
+outputs among other things::
+
+	0001b304 T cpu_idle
+
+so 8001B36A
+is cpu_idle+0x66 ( quiet the cpu is asleep, don't wake it )
+
+::
+
+	grep -i 00014 System.map
+
+produces among other things::
+
+	00014a78 T start_kernel
+
+so 0014CA6 is start_kernel+some hex number I can't add in my head.
+
+::
+
+	grep -i 00108 System.map
+
+this produces::
+
+	00010800 T _stext
+
+so   8001085A is _stext+0x5a
+
+Congrats you've done your first backchain.
+
+
+
+s/390 & z/Architecture IO Overview
+==================================
+
+I am not going to give a course in 390 IO architecture as this would take me
+quite a while and I'm no expert. Instead I'll give a 390 IO architecture
+summary for Dummies. If you have the s/390 principles of operation available
+read this instead. If nothing else you may find a few useful keywords in here
+and be able to use them on a web search engine to find more useful information.
+
+Unlike other bus architectures modern 390 systems do their IO using mostly
+fibre optics and devices such as tapes and disks can be shared between several
+mainframes. Also S390 can support up to 65536 devices while a high end PC based
+system might be choking with around 64.
+
+Here is some of the common IO terminology:
+
+Subchannel:
+  This is the logical number most IO commands use to talk to an IO device. There
+  can be up to 0x10000 (65536) of these in a configuration, typically there are a
+  few hundred. Under VM for simplicity they are allocated contiguously, however
+  on the native hardware they are not. They typically stay consistent between
+  boots provided no new hardware is inserted or removed.
+
+  Under Linux for s390 we use these as IRQ's and also when issuing an IO command
+  (CLEAR SUBCHANNEL, HALT SUBCHANNEL, MODIFY SUBCHANNEL, RESUME SUBCHANNEL,
+  START SUBCHANNEL, STORE SUBCHANNEL and TEST SUBCHANNEL). We use this as the ID
+  of the device we wish to talk to. The most important of these instructions are
+  START SUBCHANNEL (to start IO), TEST SUBCHANNEL (to check whether the IO
+  completed successfully) and HALT SUBCHANNEL (to kill IO). A subchannel can have
+  up to 8 channel paths to a device, this offers redundancy if one is not
+  available.
+
+Device Number:
+  This number remains static and is closely tied to the hardware. There are 65536
+  of these, made up of a CHPID (Channel Path ID, the most significant 8 bits) and
+  another lsb 8 bits. These remain static even if more devices are inserted or
+  removed from the hardware. There is a 1 to 1 mapping between subchannels and
+  device numbers, provided devices aren't inserted or removed.
+
+Channel Control Words:
+  CCWs are linked lists of instructions initially pointed to by an operation
+  request block (ORB), which is initially given to Start Subchannel (SSCH)
+  command along with the subchannel number for the IO subsystem to process
+  while the CPU continues executing normal code.
+  CCWs come in two flavours, Format 0 (24 bit for backward compatibility) and
+  Format 1 (31 bit). These are typically used to issue read and write (and many
+  other) instructions. They consist of a length field and an absolute address
+  field.
+
+  Each IO typically gets 1 or 2 interrupts, one for channel end (primary status)
+  when the channel is idle, and the second for device end (secondary status).
+  Sometimes you get both concurrently. You check how the IO went on by issuing a
+  TEST SUBCHANNEL at each interrupt, from which you receive an Interruption
+  response block (IRB). If you get channel and device end status in the IRB
+  without channel checks etc. your IO probably went okay. If you didn't you
+  probably need to examine the IRB, extended status word etc.
+  If an error occurs, more sophisticated control units have a facility known as
+  concurrent sense. This means that if an error occurs Extended sense information
+  will be presented in the Extended status word in the IRB. If not you have to
+  issue a subsequent SENSE CCW command after the test subchannel.
+
+
+TPI (Test pending interrupt) can also be used for polled IO, but in
+multitasking multiprocessor systems it isn't recommended except for
+checking special cases (i.e. non looping checks for pending IO etc.).
+
+Store Subchannel and Modify Subchannel can be used to examine and modify
+operating characteristics of a subchannel (e.g. channel paths).
+
+Other IO related Terms:
+
+Sysplex:
+  S390's Clustering Technology
+QDIO:
+  S390's new high speed IO architecture to support devices such as gigabit
+  ethernet, this architecture is also designed to be forward compatible with
+  upcoming 64 bit machines.
+
+
+General Concepts
+----------------
+
+Input Output Processors (IOP's) are responsible for communicating between
+the mainframe CPU's & the channel & relieve the mainframe CPU's from the
+burden of communicating with IO devices directly, this allows the CPU's to
+concentrate on data processing.
+
+IOP's can use one or more links ( known as channel paths ) to talk to each
+IO device. It first checks for path availability & chooses an available one,
+then starts ( & sometimes terminates IO ).
+There are two types of channel path: ESCON & the Parallel IO interface.
+
+IO devices are attached to control units, control units provide the
+logic to interface the channel paths & channel path IO protocols to
+the IO devices, they can be integrated with the devices or housed separately
+& often talk to several similar devices ( typical examples would be raid
+controllers or a control unit which connects to 1000 3270 terminals )::
+
+
+      +---------------------------------------------------------------+
+      | +-----+ +-----+ +-----+ +-----+  +----------+  +----------+   |
+      | | CPU | | CPU | | CPU | | CPU |  |  Main    |  | Expanded |   |
+      | |     | |     | |     | |     |  |  Memory  |  |  Storage |   |
+      | +-----+ +-----+ +-----+ +-----+  +----------+  +----------+   |
+      |---------------------------------------------------------------+
+      |   IOP        |      IOP      |       IOP                      |
+      |---------------------------------------------------------------
+      | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C |
+      ----------------------------------------------------------------
+	   ||                                              ||
+	   ||  Bus & Tag Channel Path                      || ESCON
+	   ||  ======================                      || Channel
+	   ||  ||                  ||                      || Path
+      +----------+               +----------+         +----------+
+      |          |               |          |         |          |
+      |    CU    |               |    CU    |         |    CU    |
+      |          |               |          |         |          |
+      +----------+               +----------+         +----------+
+	 |      |                     |                |       |
+  +----------+ +----------+      +----------+   +----------+ +----------+
+  |I/O Device| |I/O Device|      |I/O Device|   |I/O Device| |I/O Device|
+  +----------+ +----------+      +----------+   +----------+ +----------+
+    CPU = Central Processing Unit
+    C = Channel
+    IOP = IP Processor
+    CU = Control Unit
+
+The 390 IO systems come in 2 flavours the current 390 machines support both
+
+The Older 360 & 370 Interface,sometimes called the Parallel I/O interface,
+sometimes called Bus-and Tag & sometimes Original Equipment Manufacturers
+Interface (OEMI).
+
+This byte wide Parallel channel path/bus has parity & data on the "Bus" cable
+and control lines on the "Tag" cable. These can operate in byte multiplex mode
+for sharing between several slow devices or burst mode and monopolize the
+channel for the whole burst. Up to 256 devices can be addressed on one of these
+cables. These cables are about one inch in diameter. The maximum unextended
+length supported by these cables is 125 Meters but this can be extended up to
+2km with a fibre optic channel extended such as a 3044. The maximum burst speed
+supported is 4.5 megabytes per second. However, some really old processors
+support only transfer rates of 3.0, 2.0 & 1.0 MB/sec.
+One of these paths can be daisy chained to up to 8 control units.
+
+
+ESCON if fibre optic it is also called FICON
+Was introduced by IBM in 1990. Has 2 fibre optic cables and uses either leds or
+lasers for communication at a signaling rate of up to 200 megabits/sec. As
+10bits are transferred for every 8 bits info this drops to 160 megabits/sec
+and to 18.6 Megabytes/sec once control info and CRC are added. ESCON only
+operates in burst mode.
+
+ESCONs typical max cable length is 3km for the led version and 20km for the
+laser version known as XDF (extended distance facility). This can be further
+extended by using an ESCON director which triples the above mentioned ranges.
+Unlike Bus & Tag as ESCON is serial it uses a packet switching architecture,
+the standard Bus & Tag control protocol is however present within the packets.
+Up to 256 devices can be attached to each control unit that uses one of these
+interfaces.
+
+Common 390 Devices include:
+Network adapters typically OSA2,3172's,2116's & OSA-E gigabit ethernet adapters,
+Consoles 3270 & 3215 (a teletype emulated under linux for a line mode console).
+DASD's direct access storage devices ( otherwise known as hard disks ).
+Tape Drives.
+CTC ( Channel to Channel Adapters ),
+ESCON or Parallel Cables used as a very high speed serial link
+between 2 machines.
+
+
+Debugging IO on s/390 & z/Architecture under VM
+===============================================
+
+Now we are ready to go on with IO tracing commands under VM
+
+A few self explanatory queries::
+
+	Q OSA
+	Q CTC
+	Q DISK ( This command is CMS specific )
+	Q DASD
+
+Q OSA on my machine returns::
+
+	OSA  7C08 ON OSA   7C08 SUBCHANNEL = 0000
+	OSA  7C09 ON OSA   7C09 SUBCHANNEL = 0001
+	OSA  7C14 ON OSA   7C14 SUBCHANNEL = 0002
+	OSA  7C15 ON OSA   7C15 SUBCHANNEL = 0003
+
+If you have a guest with certain privileges you may be able to see devices
+which don't belong to you. To avoid this, add the option V.
+e.g.::
+
+	Q V OSA
+
+Now using the device numbers returned by this command we will
+Trace the io starting up on the first device 7c08 & 7c09
+In our simplest case we can trace the
+start subchannels
+like TR SSCH 7C08-7C09
+or the halt subchannels
+or TR HSCH 7C08-7C09
+MSCH's ,STSCH's I think you can guess the rest
+
+A good trick is tracing all the IO's and CCWS and spooling them into the reader
+of another VM guest so he can ftp the logfile back to his own machine. I'll do
+a small bit of this and give you a look at the output.
+
+1) Spool stdout to VM reader::
+
+	SP PRT TO (another vm guest ) or * for the local vm guest
+
+2) Fill the reader with the trace::
+
+	TR IO 7c08-7c09 INST INT CCW PRT RUN
+
+3) Start up linux::
+
+	i 00c
+4) Finish the trace::
+
+	TR END
+
+5) close the reader::
+
+	C PRT
+
+6) list reader contents::
+
+	RDRLIST
+
+7) copy it to linux4's minidisk::
+
+	RECEIVE / LOG TXT A1 ( replace
+
+8)
+filel & press F11 to look at it
+You should see something like::
+
+  00020942' SSCH  B2334000    0048813C    CC 0    SCH 0000    DEV 7C08
+	    CPA 000FFDF0   PARM 00E2C9C4    KEY 0  FPI C0  LPM 80
+	    CCW    000FFDF0  E4200100 00487FE8   0000  E4240100 ........
+	    IDAL                                      43D8AFE8
+	    IDAL                                      0FB76000
+  00020B0A'   I/O DEV 7C08 -> 000197BC'   SCH 0000   PARM 00E2C9C4
+  00021628' TSCH  B2354000 >> 00488164    CC 0    SCH 0000    DEV 7C08
+	    CCWA 000FFDF8   DEV STS 0C  SCH STS 00  CNT 00EC
+	     KEY 0   FPI C0  CC 0   CTLS 4007
+  00022238' STSCH B2344000 >> 00488108    CC 0    SCH 0000    DEV 7C08
+
+If you don't like messing up your readed ( because you possibly booted from it )
+you can alternatively spool it to another readers guest.
+
+
+Other common VM device related commands
+---------------------------------------------
+These commands are listed only because they have
+been of use to me in the past & may be of use to
+you too. For more complete info on each of the commands
+use type HELP <command> from CMS.
+
+detaching devices::
+
+	DET <devno range>
+	ATT <devno range> <guest>
+
+attach a device to guest * for your own guest
+
+READY <devno>
+	cause VM to issue a fake interrupt.
+
+The VARY command is normally only available to VM administrators::
+
+	VARY ON PATH <path> TO <devno range>
+	VARY OFF PATH <PATH> FROM <devno range>
+
+This is used to switch on or off channel paths to devices.
+
+Q CHPID <channel path ID>
+   This displays state of devices using this channel path
+
+D SCHIB <subchannel>
+   This displays the subchannel information SCHIB block for the device.
+   this I believe is also only available to administrators.
+
+DEFINE CTC <devno>
+  defines a virtual CTC channel to channel connection
+  2 need to be defined on each guest for the CTC driver to use.
+
+COUPLE  devno userid remote devno
+  Joins a local virtual device to a remote virtual device
+  ( commonly used for the CTC driver ).
+
+Building a VM ramdisk under CMS which linux can use::
+
+	def vfb-<blocksize> <subchannel> <number blocks>
+
+blocksize is commonly 4096 for linux.
+
+Formatting it::
+
+	format <subchannel> <driver letter e.g. x> (blksize <blocksize>
+
+Sharing a disk between multiple guests::
+
+	LINK userid devno1 devno2 mode password
+
+
+
+GDB on S390
+===========
+N.B. if compiling for debugging gdb works better without optimisation
+( see Compiling programs for debugging )
+
+invocation
+----------
+gdb <victim program> <optional corefile>
+
+Online help
+-----------
+help: gives help on commands
+
+e.g.::
+
+	help
+	help display
+
+Note gdb's online help is very good use it.
+
+
+Assembly
+--------
+info registers:
+  displays registers other than floating point.
+
+info all-registers:
+  displays floating points as well.
+
+disassemble:
+  disassembles
+
+e.g.::
+
+	disassemble without parameters will disassemble the current function
+	disassemble $pc $pc+10
+
+Viewing & modifying variables
+-----------------------------
+print or p:
+  displays variable or register
+
+e.g. p/x $sp will display the stack pointer
+
+display:
+  prints variable or register each time program stops
+
+e.g.::
+
+	display/x $pc will display the program counter
+	display argc
+
+undisplay:
+  undo's display's
+
+info breakpoints:
+  shows all current breakpoints
+
+info stack:
+  shows stack back trace (if this doesn't work too well, I'll show
+  you the stacktrace by hand below).
+
+info locals:
+  displays local variables.
+
+info args:
+  display current procedure arguments.
+
+set args:
+  will set argc & argv each time the victim program is invoked
+
+e.g.::
+
+	set <variable>=value
+	set argc=100
+	set $pc=0
+
+
+
+Modifying execution
+-------------------
+step:
+  steps n lines of sourcecode
+
+step
+  steps 1 line.
+
+step 100
+  steps 100 lines of code.
+
+next:
+	like step except this will not step into subroutines
+
+stepi:
+	steps a single machine code instruction.
+
+e.g.::
+
+	stepi 100
+
+nexti:
+	steps a single machine code instruction but will not step into
+	subroutines.
+
+finish:
+	will run until exit of the current routine
+
+run:
+	(re)starts a program
+
+cont:
+	continues a program
+
+quit:
+	exits gdb.
+
+
+breakpoints
+------------
+
+break
+  sets a breakpoint
+
+e.g.::
+
+	break main
+	break *$pc
+	break *0x400618
+
+Here's a really useful one for large programs
+
+rbr
+	Set a breakpoint for all functions matching REGEXP
+
+e.g.::
+
+	rbr 390
+
+will set a breakpoint with all functions with 390 in their name.
+
+info breakpoints
+	lists all breakpoints
+
+delete:
+	delete breakpoint by number or delete them all
+
+e.g.
+
+delete 1
+	will delete the first breakpoint
+
+
+delete
+	will delete them all
+
+watch:
+	This will set a watchpoint ( usually hardware assisted ),
+
+This will watch a variable till it changes
+
+e.g.
+
+watch cnt
+	will watch the variable cnt till it changes.
+
+As an aside unfortunately gdb's, architecture independent watchpoint code
+is inconsistent & not very good, watchpoints usually work but not always.
+
+info watchpoints:
+	Display currently active watchpoints
+
+condition: ( another useful one )
+	Specify breakpoint number N to break only if COND is true.
+
+Usage is `condition N COND`, where N is an integer and COND is an
+expression to be evaluated whenever breakpoint N is reached.
+
+
+
+User defined functions/macros
+-----------------------------
+define: ( Note this is very very useful,simple & powerful )
+
+usage define <name> <list of commands> end
+
+examples which you should consider putting into .gdbinit in your home
+directory::
+
+	define d
+	stepi
+	disassemble $pc $pc+10
+	end
+	define e
+	nexti
+	disassemble $pc $pc+10
+	end
+
+
+Other hard to classify stuff
+----------------------------
+signal n:
+   sends the victim program a signal.
+
+e.g. `signal 3` will send a SIGQUIT.
+
+info signals:
+	what gdb does when the victim receives certain signals.
+
+list:
+
+e.g.:
+
+list
+	lists current function source
+list 1,10
+	list first 10 lines of current file.
+
+list test.c:1,10
+
+
+directory:
+  Adds directories to be searched for source if gdb cannot find the source.
+  (note it is a bit sensitive about slashes)
+
+e.g. To add the root of the filesystem to the searchpath do::
+
+	directory //
+
+
+call <function>
+This calls a function in the victim program, this is pretty powerful
+e.g.
+(gdb) call printf("hello world")
+outputs:
+$1 = 11
+
+You might now be thinking that the line above didn't work, something extra had
+to be done.
+(gdb) call fflush(stdout)
+hello world$2 = 0
+As an aside the debugger also calls malloc & free under the hood
+to make space for the "hello world" string.
+
+
+
+hints
+-----
+1) command completion works just like bash
+   ( if you are a bad typist like me this really helps )
+
+e.g. hit br <TAB> & cursor up & down :-).
+
+2) if you have a debugging problem that takes a few steps to recreate
+put the steps into a file called .gdbinit in your current working directory
+if you have defined a few extra useful user defined commands put these in
+your home directory & they will be read each time gdb is launched.
+
+A typical .gdbinit file might be.::
+
+	break main
+	run
+	break runtime_exception
+	cont
+
+
+stack chaining in gdb by hand
+-----------------------------
+This is done using a the same trick described for VM::
+
+	p/x (*($sp+56))&0x7fffffff
+
+get the first backchain.
+
+For z/Architecture
+Replace 56 with 112 & ignore the &0x7fffffff
+in the macros below & do nasty casts to longs like the following
+as gdb unfortunately deals with printed arguments as ints which
+messes up everything.
+
+i.e. here is a 3rd backchain dereference::
+
+	p/x *(long *)(***(long ***)$sp+112)
+
+
+this outputs::
+
+	$5 = 0x528f18
+
+on my machine.
+
+Now you can use::
+
+	info symbol (*($sp+56))&0x7fffffff
+
+you might see something like::
+
+	rl_getc + 36 in section .text
+
+telling you what is located at address 0x528f18
+Now do::
+
+	p/x (*(*$sp+56))&0x7fffffff
+
+This outputs::
+
+	$6 = 0x528ed0
+
+Now do::
+
+	info symbol (*(*$sp+56))&0x7fffffff
+	rl_read_key + 180 in section .text
+
+now do::
+
+	p/x (*(**$sp+56))&0x7fffffff
+
+& so on.
+
+Disassembling instructions without debug info
+---------------------------------------------
+gdb typically complains if there is a lack of debugging
+symbols in the disassemble command with
+"No function contains specified address." To get around
+this do::
+
+	x/<number lines to disassemble>xi <address>
+
+e.g.::
+
+	x/20xi 0x400730
+
+
+
+Note:
+  Remember gdb has history just like bash you don't need to retype the
+  whole line just use the up & down arrows.
+
+
+
+For more info
+-------------
+From your linuxbox do::
+
+	man gdb
+
+or::
+
+	info gdb.
+
+core dumps
+----------
+
+What a core dump ?
+^^^^^^^^^^^^^^^^^^
+
+A core dump is a file generated by the kernel (if allowed) which contains the
+registers and all active pages of the program which has crashed.
+
+From this file gdb will allow you to look at the registers, stack trace and
+memory of the program as if it just crashed on your system. It is usually
+called core and created in the current working directory.
+
+This is very useful in that a customer can mail a core dump to a technical
+support department and the technical support department can reconstruct what
+happened. Provided they have an identical copy of this program with debugging
+symbols compiled in and the source base of this build is available.
+
+In short it is far more useful than something like a crash log could ever hope
+to be.
+
+Why have I never seen one ?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Probably because you haven't used the command::
+
+	ulimit -c unlimited in bash
+
+to allow core dumps, now do::
+
+	ulimit -a
+
+to verify that the limit was accepted.
+
+A sample core dump
+   To create this I'm going to do::
+
+	ulimit -c unlimited
+	gdb
+
+to launch gdb (my victim app. ) now be bad & do the following from another
+telnet/xterm session to the same machine::
+
+	ps -aux | grep gdb
+	kill -SIGSEGV <gdb's pid>
+
+or alternatively use `killall -SIGSEGV gdb` if you have the killall command.
+
+Now look at the core dump::
+
+	./gdb core
+
+Displays the following::
+
+  GNU gdb 4.18
+  Copyright 1998 Free Software Foundation, Inc.
+  GDB is free software, covered by the GNU General Public License, and you are
+  welcome to change it and/or distribute copies of it under certain conditions.
+  Type "show copying" to see the conditions.
+  There is absolutely no warranty for GDB.  Type "show warranty" for details.
+  This GDB was configured as "s390-ibm-linux"...
+  Core was generated by `./gdb'.
+  Program terminated with signal 11, Segmentation fault.
+  Reading symbols from /usr/lib/libncurses.so.4...done.
+  Reading symbols from /lib/libm.so.6...done.
+  Reading symbols from /lib/libc.so.6...done.
+  Reading symbols from /lib/ld-linux.so.2...done.
+  #0  0x40126d1a in read () from /lib/libc.so.6
+  Setting up the environment for debugging gdb.
+  Breakpoint 1 at 0x4dc6f8: file utils.c, line 471.
+  Breakpoint 2 at 0x4d87a4: file top.c, line 2609.
+  (top-gdb) info stack
+  #0  0x40126d1a in read () from /lib/libc.so.6
+  #1  0x528f26 in rl_getc (stream=0x7ffffde8) at input.c:402
+  #2  0x528ed0 in rl_read_key () at input.c:381
+  #3  0x5167e6 in readline_internal_char () at readline.c:454
+  #4  0x5168ee in readline_internal_charloop () at readline.c:507
+  #5  0x51692c in readline_internal () at readline.c:521
+  #6  0x5164fe in readline (prompt=0x7ffff810)
+      at readline.c:349
+  #7  0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1,
+      annotation_suffix=0x4d6b44 "prompt") at top.c:2091
+  #8  0x4d6cf0 in command_loop () at top.c:1345
+  #9  0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635
+
+
+LDD
+===
+This is a program which lists the shared libraries which a library needs,
+Note you also get the relocations of the shared library text segments which
+help when using objdump --source.
+
+e.g.::
+
+	ldd ./gdb
+
+outputs::
+
+  libncurses.so.4 => /usr/lib/libncurses.so.4 (0x40018000)
+  libm.so.6 => /lib/libm.so.6 (0x4005e000)
+  libc.so.6 => /lib/libc.so.6 (0x40084000)
+  /lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000)
+
+
+Debugging shared libraries
+==========================
+Most programs use shared libraries, however it can be very painful
+when you single step instruction into a function like printf for the
+first time & you end up in functions like _dl_runtime_resolve this is
+the ld.so doing lazy binding, lazy binding is a concept in ELF where
+shared library functions are not loaded into memory unless they are
+actually used, great for saving memory but a pain to debug.
+
+To get around this either relink the program -static or exit gdb type
+export LD_BIND_NOW=true this will stop lazy binding & restart the gdb'ing
+the program in question.
+
+
+
+Debugging modules
+=================
+As modules are dynamically loaded into the kernel their address can be
+anywhere to get around this use the -m option with insmod to emit a load
+map which can be piped into a file if required.
+
+The proc file system
+====================
+What is it ?.
+It is a filesystem created by the kernel with files which are created on demand
+by the kernel if read, or can be used to modify kernel parameters,
+it is a powerful concept.
+
+e.g.::
+
+	cat /proc/sys/net/ipv4/ip_forward
+
+On my machine outputs::
+
+	0
+
+telling me ip_forwarding is not on to switch it on I can do::
+
+	echo 1 >  /proc/sys/net/ipv4/ip_forward
+
+cat it again::
+
+	cat /proc/sys/net/ipv4/ip_forward
+
+On my machine now outputs::
+
+	1
+
+IP forwarding is on.
+
+There is a lot of useful info in here best found by going in and having a look
+around, so I'll take you through some entries I consider important.
+
+All the processes running on the machine have their own entry defined by
+/proc/<pid>
+
+So lets have a look at the init process::
+
+	cd /proc/1
+	cat cmdline
+
+emits::
+
+	init [2]
+
+::
+
+	cd /proc/1/fd
+
+This contains numerical entries of all the open files,
+some of these you can cat e.g. stdout (2)::
+
+	cat /proc/29/maps
+
+on my machine emits::
+
+  00400000-00478000 r-xp 00000000 5f:00 4103       /bin/bash
+  00478000-0047e000 rw-p 00077000 5f:00 4103       /bin/bash
+  0047e000-00492000 rwxp 00000000 00:00 0
+  40000000-40015000 r-xp 00000000 5f:00 14382      /lib/ld-2.1.2.so
+  40015000-40016000 rw-p 00014000 5f:00 14382      /lib/ld-2.1.2.so
+  40016000-40017000 rwxp 00000000 00:00 0
+  40017000-40018000 rw-p 00000000 00:00 0
+  40018000-4001b000 r-xp 00000000 5f:00 14435      /lib/libtermcap.so.2.0.8
+  4001b000-4001c000 rw-p 00002000 5f:00 14435      /lib/libtermcap.so.2.0.8
+  4001c000-4010d000 r-xp 00000000 5f:00 14387      /lib/libc-2.1.2.so
+  4010d000-40111000 rw-p 000f0000 5f:00 14387      /lib/libc-2.1.2.so
+  40111000-40114000 rw-p 00000000 00:00 0
+  40114000-4011e000 r-xp 00000000 5f:00 14408      /lib/libnss_files-2.1.2.so
+  4011e000-4011f000 rw-p 00009000 5f:00 14408      /lib/libnss_files-2.1.2.so
+  7fffd000-80000000 rwxp ffffe000 00:00 0
+
+
+Showing us the shared libraries init uses where they are in memory
+& memory access permissions for each virtual memory area.
+
+/proc/1/cwd is a softlink to the current working directory.
+
+/proc/1/root is the root of the filesystem for this process.
+
+/proc/1/mem is the current running processes memory which you
+can read & write to like a file.
+
+strace uses this sometimes as it is a bit faster than the
+rather inefficient ptrace interface for peeking at DATA.
+
+::
+
+  cat status
+
+  Name:   init
+  State:  S (sleeping)
+  Pid:    1
+  PPid:   0
+  Uid:    0       0       0       0
+  Gid:    0       0       0       0
+  Groups:
+  VmSize:      408 kB
+  VmLck:         0 kB
+  VmRSS:       208 kB
+  VmData:       24 kB
+  VmStk:         8 kB
+  VmExe:       368 kB
+  VmLib:         0 kB
+  SigPnd: 0000000000000000
+  SigBlk: 0000000000000000
+  SigIgn: 7fffffffd7f0d8fc
+  SigCgt: 00000000280b2603
+  CapInh: 00000000fffffeff
+  CapPrm: 00000000ffffffff
+  CapEff: 00000000fffffeff
+
+  User PSW:    070de000 80414146
+  task: 004b6000 tss: 004b62d8 ksp: 004b7ca8 pt_regs: 004b7f68
+  User GPRS:
+  00000400  00000000  0000000b  7ffffa90
+  00000000  00000000  00000000  0045d9f4
+  0045cafc  7ffffa90  7fffff18  0045cb08
+  00010400  804039e8  80403af8  7ffff8b0
+  User ACRS:
+  00000000  00000000  00000000  00000000
+  00000001  00000000  00000000  00000000
+  00000000  00000000  00000000  00000000
+  00000000  00000000  00000000  00000000
+  Kernel BackChain  CallChain    BackChain  CallChain
+	 004b7ca8   8002bd0c     004b7d18   8002b92c
+	 004b7db8   8005cd50     004b7e38   8005d12a
+	 004b7f08   80019114
+
+Showing among other things memory usage & status of some signals &
+the processes'es registers from the kernel task_structure
+as well as a backchain which may be useful if a process crashes
+in the kernel for some unknown reason.
+
+Some driver debugging techniques
+================================
+debug feature
+-------------
+Some of our drivers now support a "debug feature" in
+/proc/s390dbf see s390dbf.txt in the linux/Documentation directory
+for more info.
+
+e.g.
+to switch on the lcs "debug feature"::
+
+	echo 5 > /proc/s390dbf/lcs/level
+
+& then after the error occurred::
+
+	cat /proc/s390dbf/lcs/sprintf >/logfile
+
+the logfile now contains some information which may help
+tech support resolve a problem in the field.
+
+
+
+high level debugging network drivers
+------------------------------------
+ifconfig is a quite useful command
+it gives the current state of network drivers.
+
+If you suspect your network device driver is dead
+one way to check is type::
+
+	ifconfig <network device>
+
+e.g. tr0
+
+You should see something like::
+
+	ifconfig tr0
+	tr0      Link encap:16/4 Mbps Token Ring (New)  HWaddr 00:04:AC:20:8E:48
+		inet addr:9.164.185.132  Bcast:9.164.191.255  Mask:255.255.224.0
+		UP BROADCAST RUNNING MULTICAST  MTU:2000  Metric:1
+		RX packets:246134 errors:0 dropped:0 overruns:0 frame:0
+		TX packets:5 errors:0 dropped:0 overruns:0 carrier:0
+		collisions:0 txqueuelen:100
+
+if the device doesn't say up
+try::
+
+	/etc/rc.d/init.d/network start
+
+( this starts the network stack & hopefully calls ifconfig tr0 up ).
+ifconfig looks at the output of /proc/net/dev and presents it in a more
+presentable form.
+
+Now ping the device from a machine in the same subnet.
+
+if the RX packets count & TX packets counts don't increment you probably
+have problems.
+
+next::
+
+	cat /proc/net/arp
+
+Do you see any hardware addresses in the cache if not you may have problems.
+Next try::
+
+	ping -c 5 <broadcast_addr>
+
+i.e. the Bcast field above in the output of
+ifconfig. Do you see any replies from machines other than the local machine
+if not you may have problems. also if the TX packets count in ifconfig
+hasn't incremented either you have serious problems in your driver
+(e.g. the txbusy field of the network device being stuck on )
+or you may have multiple network devices connected.
+
+
+chandev
+-------
+There is a new device layer for channel devices, some
+drivers e.g. lcs are registered with this layer.
+
+If the device uses the channel device layer you'll be
+able to find what interrupts it uses & the current state
+of the device.
+
+See the manpage chandev.8 &type cat /proc/chandev for more info.
+
+
+SysRq
+=====
+This is now supported by linux for s/390 & z/Architecture.
+
+To enable it do compile the kernel with::
+
+	Kernel Hacking -> Magic SysRq Key Enabled
+
+Then::
+
+	echo "1" > /proc/sys/kernel/sysrq
+
+also type::
+
+	echo "8" >/proc/sys/kernel/printk
+
+To make printk output go to console.
+
+On 390 all commands are prefixed with::
+
+	^-
+
+e.g.::
+
+	^-t will show tasks.
+	^-? or some unknown command will display help.
+
+The sysrq key reading is very picky ( I have to type the keys in an
+xterm session & paste them  into the x3270 console )
+& it may be wise to predefine the keys as described in the VM hints above
+
+This is particularly useful for syncing disks unmounting & rebooting
+if the machine gets partially hung.
+
+Read Documentation/admin-guide/sysrq.rst for more info
+
+References:
+===========
+- Enterprise Systems Architecture Reference Summary
+- Enterprise Systems Architecture Principles of Operation
+- Hartmut Penners s390 stack frame sheet.
+- IBM Mainframe Channel Attachment a technology brief from a CISCO webpage
+- Various bits of man & info pages of Linux.
+- Linux & GDB source.
+- Various info & man pages.
+- CMS Help on tracing commands.
+- Linux for s/390 Elf Application Binary Interface
+- Linux for z/Series Elf Application Binary Interface ( Both Highly Recommended )
+- z/Architecture Principles of Operation SA22-7832-00
+- Enterprise Systems Architecture/390 Reference Summary SA22-7209-01 & the
+- Enterprise Systems Architecture/390 Principles of Operation SA22-7201-05
+
+Special Thanks
+==============
+Special thanks to Neale Ferguson who maintains a much
+prettier HTML version of this page at
+http://linuxvm.org/penguinvm/
+Bob Grainger Stefan Bader & others for reporting bugs
diff --git a/Documentation/s390/driver-model.rst b/Documentation/s390/driver-model.rst
new file mode 100644
index 000000000000..ad4bc2dbea43
--- /dev/null
+++ b/Documentation/s390/driver-model.rst
@@ -0,0 +1,328 @@
+=============================
+S/390 driver model interfaces
+=============================
+
+1. CCW devices
+--------------
+
+All devices which can be addressed by means of ccws are called 'CCW devices' -
+even if they aren't actually driven by ccws.
+
+All ccw devices are accessed via a subchannel, this is reflected in the
+structures under devices/::
+
+  devices/
+     - system/
+     - css0/
+	   - 0.0.0000/0.0.0815/
+	   - 0.0.0001/0.0.4711/
+	   - 0.0.0002/
+	   - 0.1.0000/0.1.1234/
+	   ...
+	   - defunct/
+
+In this example, device 0815 is accessed via subchannel 0 in subchannel set 0,
+device 4711 via subchannel 1 in subchannel set 0, and subchannel 2 is a non-I/O
+subchannel. Device 1234 is accessed via subchannel 0 in subchannel set 1.
+
+The subchannel named 'defunct' does not represent any real subchannel on the
+system; it is a pseudo subchannel where disconnected ccw devices are moved to
+if they are displaced by another ccw device becoming operational on their
+former subchannel. The ccw devices will be moved again to a proper subchannel
+if they become operational again on that subchannel.
+
+You should address a ccw device via its bus id (e.g. 0.0.4711); the device can
+be found under bus/ccw/devices/.
+
+All ccw devices export some data via sysfs.
+
+cutype:
+	The control unit type / model.
+
+devtype:
+	The device type / model, if applicable.
+
+availability:
+	      Can be 'good' or 'boxed'; 'no path' or 'no device' for
+	      disconnected devices.
+
+online:
+	    An interface to set the device online and offline.
+	    In the special case of the device being disconnected (see the
+	    notify function under 1.2), piping 0 to online will forcibly delete
+	    the device.
+
+The device drivers can add entries to export per-device data and interfaces.
+
+There is also some data exported on a per-subchannel basis (see under
+bus/css/devices/):
+
+chpids:
+	Via which chpids the device is connected.
+
+pimpampom:
+	The path installed, path available and path operational masks.
+
+There also might be additional data, for example for block devices.
+
+
+1.1 Bringing up a ccw device
+----------------------------
+
+This is done in several steps.
+
+a. Each driver can provide one or more parameter interfaces where parameters can
+   be specified. These interfaces are also in the driver's responsibility.
+b. After a. has been performed, if necessary, the device is finally brought up
+   via the 'online' interface.
+
+
+1.2 Writing a driver for ccw devices
+------------------------------------
+
+The basic struct ccw_device and struct ccw_driver data structures can be found
+under include/asm/ccwdev.h::
+
+  struct ccw_device {
+	spinlock_t *ccwlock;
+	struct ccw_device_private *private;
+	struct ccw_device_id id;
+
+	struct ccw_driver *drv;
+	struct device dev;
+	int online;
+
+	void (*handler) (struct ccw_device *dev, unsigned long intparm,
+			 struct irb *irb);
+  };
+
+  struct ccw_driver {
+	struct module *owner;
+	struct ccw_device_id *ids;
+	int (*probe) (struct ccw_device *);
+	int (*remove) (struct ccw_device *);
+	int (*set_online) (struct ccw_device *);
+	int (*set_offline) (struct ccw_device *);
+	int (*notify) (struct ccw_device *, int);
+	struct device_driver driver;
+	char *name;
+  };
+
+The 'private' field contains data needed for internal i/o operation only, and
+is not available to the device driver.
+
+Each driver should declare in a MODULE_DEVICE_TABLE into which CU types/models
+and/or device types/models it is interested. This information can later be found
+in the struct ccw_device_id fields::
+
+  struct ccw_device_id {
+	__u16   match_flags;
+
+	__u16   cu_type;
+	__u16   dev_type;
+	__u8    cu_model;
+	__u8    dev_model;
+
+	unsigned long driver_info;
+  };
+
+The functions in ccw_driver should be used in the following way:
+
+probe:
+	 This function is called by the device layer for each device the driver
+	 is interested in. The driver should only allocate private structures
+	 to put in dev->driver_data and create attributes (if needed). Also,
+	 the interrupt handler (see below) should be set here.
+
+::
+
+  int (*probe) (struct ccw_device *cdev);
+
+Parameters:
+		cdev
+			- the device to be probed.
+
+
+remove:
+	 This function is called by the device layer upon removal of the driver,
+	 the device or the module. The driver should perform cleanups here.
+
+::
+
+  int (*remove) (struct ccw_device *cdev);
+
+Parameters:
+		cdev
+			- the device to be removed.
+
+
+set_online:
+	    This function is called by the common I/O layer when the device is
+	    activated via the 'online' attribute. The driver should finally
+	    setup and activate the device here.
+
+::
+
+  int (*set_online) (struct ccw_device *);
+
+Parameters:
+		cdev
+			- the device to be activated. The common layer has
+			  verified that the device is not already online.
+
+
+set_offline: This function is called by the common I/O layer when the device is
+	     de-activated via the 'online' attribute. The driver should shut
+	     down the device, but not de-allocate its private data.
+
+::
+
+  int (*set_offline) (struct ccw_device *);
+
+Parameters:
+		cdev
+			- the device to be deactivated. The common layer has
+			   verified that the device is online.
+
+
+notify:
+	This function is called by the common I/O layer for some state changes
+	of the device.
+
+	Signalled to the driver are:
+
+	* In online state, device detached (CIO_GONE) or last path gone
+	  (CIO_NO_PATH). The driver must return !0 to keep the device; for
+	  return code 0, the device will be deleted as usual (also when no
+	  notify function is registered). If the driver wants to keep the
+	  device, it is moved into disconnected state.
+	* In disconnected state, device operational again (CIO_OPER). The
+	  common I/O layer performs some sanity checks on device number and
+	  Device / CU to be reasonably sure if it is still the same device.
+	  If not, the old device is removed and a new one registered. By the
+	  return code of the notify function the device driver signals if it
+	  wants the device back: !0 for keeping, 0 to make the device being
+	  removed and re-registered.
+
+::
+
+  int (*notify) (struct ccw_device *, int);
+
+Parameters:
+		cdev
+			- the device whose state changed.
+
+		event
+			- the event that happened. This can be one of CIO_GONE,
+			  CIO_NO_PATH or CIO_OPER.
+
+The handler field of the struct ccw_device is meant to be set to the interrupt
+handler for the device. In order to accommodate drivers which use several
+distinct handlers (e.g. multi subchannel devices), this is a member of ccw_device
+instead of ccw_driver.
+The handler is registered with the common layer during set_online() processing
+before the driver is called, and is deregistered during set_offline() after the
+driver has been called. Also, after registering / before deregistering, path
+grouping resp. disbanding of the path group (if applicable) are performed.
+
+::
+
+  void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb);
+
+Parameters:     dev     - the device the handler is called for
+		intparm - the intparm which allows the device driver to identify
+			  the i/o the interrupt is associated with, or to recognize
+			  the interrupt as unsolicited.
+		irb     - interruption response block which contains the accumulated
+			  status.
+
+The device driver is called from the common ccw_device layer and can retrieve
+information about the interrupt from the irb parameter.
+
+
+1.3 ccwgroup devices
+--------------------
+
+The ccwgroup mechanism is designed to handle devices consisting of multiple ccw
+devices, like lcs or ctc.
+
+The ccw driver provides a 'group' attribute. Piping bus ids of ccw devices to
+this attributes creates a ccwgroup device consisting of these ccw devices (if
+possible). This ccwgroup device can be set online or offline just like a normal
+ccw device.
+
+Each ccwgroup device also provides an 'ungroup' attribute to destroy the device
+again (only when offline). This is a generic ccwgroup mechanism (the driver does
+not need to implement anything beyond normal removal routines).
+
+A ccw device which is a member of a ccwgroup device carries a pointer to the
+ccwgroup device in the driver_data of its device struct. This field must not be
+touched by the driver - it should use the ccwgroup device's driver_data for its
+private data.
+
+To implement a ccwgroup driver, please refer to include/asm/ccwgroup.h. Keep in
+mind that most drivers will need to implement both a ccwgroup and a ccw
+driver.
+
+
+2. Channel paths
+-----------------
+
+Channel paths show up, like subchannels, under the channel subsystem root (css0)
+and are called 'chp0.<chpid>'. They have no driver and do not belong to any bus.
+Please note, that unlike /proc/chpids in 2.4, the channel path objects reflect
+only the logical state and not the physical state, since we cannot track the
+latter consistently due to lacking machine support (we don't need to be aware
+of it anyway).
+
+status
+       - Can be 'online' or 'offline'.
+	 Piping 'on' or 'off' sets the chpid logically online/offline.
+	 Piping 'on' to an online chpid triggers path reprobing for all devices
+	 the chpid connects to. This can be used to force the kernel to re-use
+	 a channel path the user knows to be online, but the machine hasn't
+	 created a machine check for.
+
+type
+       - The physical type of the channel path.
+
+shared
+       - Whether the channel path is shared.
+
+cmg
+       - The channel measurement group.
+
+3. System devices
+-----------------
+
+3.1 xpram
+---------
+
+xpram shows up under devices/system/ as 'xpram'.
+
+3.2 cpus
+--------
+
+For each cpu, a directory is created under devices/system/cpu/. Each cpu has an
+attribute 'online' which can be 0 or 1.
+
+
+4. Other devices
+----------------
+
+4.1 Netiucv
+-----------
+
+The netiucv driver creates an attribute 'connection' under
+bus/iucv/drivers/netiucv. Piping to this attribute creates a new netiucv
+connection to the specified host.
+
+Netiucv connections show up under devices/iucv/ as "netiucv<ifnum>". The interface
+number is assigned sequentially to the connections defined via the 'connection'
+attribute.
+
+user
+    - shows the connection partner.
+
+buffer
+    - maximum buffer size. Pipe to it to change buffer size.
diff --git a/Documentation/s390/driver-model.txt b/Documentation/s390/driver-model.txt
deleted file mode 100644
index ed265cf54cde..000000000000
--- a/Documentation/s390/driver-model.txt
+++ /dev/null
@@ -1,287 +0,0 @@
-S/390 driver model interfaces
------------------------------
-
-1. CCW devices
---------------
-
-All devices which can be addressed by means of ccws are called 'CCW devices' -
-even if they aren't actually driven by ccws.
-
-All ccw devices are accessed via a subchannel, this is reflected in the 
-structures under devices/:
-
-devices/
-     - system/
-     - css0/
-           - 0.0.0000/0.0.0815/
-	   - 0.0.0001/0.0.4711/
-	   - 0.0.0002/
-	   - 0.1.0000/0.1.1234/
-	   ...
-	   - defunct/
-
-In this example, device 0815 is accessed via subchannel 0 in subchannel set 0,
-device 4711 via subchannel 1 in subchannel set 0, and subchannel 2 is a non-I/O
-subchannel. Device 1234 is accessed via subchannel 0 in subchannel set 1.
-
-The subchannel named 'defunct' does not represent any real subchannel on the
-system; it is a pseudo subchannel where disconnected ccw devices are moved to
-if they are displaced by another ccw device becoming operational on their
-former subchannel. The ccw devices will be moved again to a proper subchannel
-if they become operational again on that subchannel.
-
-You should address a ccw device via its bus id (e.g. 0.0.4711); the device can
-be found under bus/ccw/devices/.
-
-All ccw devices export some data via sysfs.
-
-cutype:	    The control unit type / model.
-
-devtype:    The device type / model, if applicable.
-
-availability: Can be 'good' or 'boxed'; 'no path' or 'no device' for
-	      disconnected devices.
-
-online:     An interface to set the device online and offline.
-	    In the special case of the device being disconnected (see the
-	    notify function under 1.2), piping 0 to online will forcibly delete
-	    the device.
-
-The device drivers can add entries to export per-device data and interfaces.
-
-There is also some data exported on a per-subchannel basis (see under
-bus/css/devices/):
-
-chpids:	    Via which chpids the device is connected.
-
-pimpampom:  The path installed, path available and path operational masks.
-
-There also might be additional data, for example for block devices.
-
-
-1.1 Bringing up a ccw device
-----------------------------
-
-This is done in several steps.
-
-a. Each driver can provide one or more parameter interfaces where parameters can
-   be specified. These interfaces are also in the driver's responsibility.
-b. After a. has been performed, if necessary, the device is finally brought up
-   via the 'online' interface.
-
-
-1.2 Writing a driver for ccw devices
-------------------------------------
-
-The basic struct ccw_device and struct ccw_driver data structures can be found
-under include/asm/ccwdev.h.
-
-struct ccw_device {
-        spinlock_t *ccwlock;
-        struct ccw_device_private *private;
-	struct ccw_device_id id;	
-
-	struct ccw_driver *drv;		
-	struct device dev;		
-	int online;
-
-	void (*handler) (struct ccw_device *dev, unsigned long intparm,
-                         struct irb *irb);
-};
-
-struct ccw_driver {
-	struct module *owner;		
-	struct ccw_device_id *ids;	
-	int (*probe) (struct ccw_device *); 
-	int (*remove) (struct ccw_device *);
-	int (*set_online) (struct ccw_device *);
-	int (*set_offline) (struct ccw_device *);
-	int (*notify) (struct ccw_device *, int);
-	struct device_driver driver;
-	char *name;
-};
-
-The 'private' field contains data needed for internal i/o operation only, and
-is not available to the device driver.
-
-Each driver should declare in a MODULE_DEVICE_TABLE into which CU types/models
-and/or device types/models it is interested. This information can later be found
-in the struct ccw_device_id fields:
-
-struct ccw_device_id {
-	__u16	match_flags;	
-
-	__u16	cu_type;	
-	__u16	dev_type;	
-	__u8	cu_model;	
-	__u8	dev_model;	
-
-	unsigned long driver_info;
-};
-
-The functions in ccw_driver should be used in the following way:
-probe:   This function is called by the device layer for each device the driver
-	 is interested in. The driver should only allocate private structures
-	 to put in dev->driver_data and create attributes (if needed). Also,
-	 the interrupt handler (see below) should be set here.
-
-int (*probe) (struct ccw_device *cdev); 
-
-Parameters:  cdev     - the device to be probed.
-
-
-remove:  This function is called by the device layer upon removal of the driver,
-	 the device or the module. The driver should perform cleanups here.
-
-int (*remove) (struct ccw_device *cdev);
-
-Parameters:   cdev    - the device to be removed.
-
-
-set_online: This function is called by the common I/O layer when the device is
-	    activated via the 'online' attribute. The driver should finally
-	    setup and activate the device here.
-
-int (*set_online) (struct ccw_device *);
-
-Parameters:   cdev	- the device to be activated. The common layer has
-			  verified that the device is not already online.
-
-
-set_offline: This function is called by the common I/O layer when the device is
-	     de-activated via the 'online' attribute. The driver should shut
-	     down the device, but not de-allocate its private data.
-
-int (*set_offline) (struct ccw_device *);
-
-Parameters:   cdev       - the device to be deactivated. The common layer has
-			   verified that the device is online.
-
-
-notify: This function is called by the common I/O layer for some state changes
-	of the device.
-	Signalled to the driver are:
-	* In online state, device detached (CIO_GONE) or last path gone
-	  (CIO_NO_PATH). The driver must return !0 to keep the device; for
-	  return code 0, the device will be deleted as usual (also when no
-	  notify function is registered). If the driver wants to keep the
-	  device, it is moved into disconnected state.
-	* In disconnected state, device operational again (CIO_OPER). The
-	  common I/O layer performs some sanity checks on device number and
-	  Device / CU to be reasonably sure if it is still the same device.
-	  If not, the old device is removed and a new one registered. By the
-	  return code of the notify function the device driver signals if it
-	  wants the device back: !0 for keeping, 0 to make the device being
-	  removed and re-registered.
-	
-int (*notify) (struct ccw_device *, int);
-
-Parameters:   cdev    - the device whose state changed.
-	      event   - the event that happened. This can be one of CIO_GONE,
-		        CIO_NO_PATH or CIO_OPER.
-
-The handler field of the struct ccw_device is meant to be set to the interrupt
-handler for the device. In order to accommodate drivers which use several 
-distinct handlers (e.g. multi subchannel devices), this is a member of ccw_device
-instead of ccw_driver.
-The handler is registered with the common layer during set_online() processing
-before the driver is called, and is deregistered during set_offline() after the
-driver has been called. Also, after registering / before deregistering, path 
-grouping resp. disbanding of the path group (if applicable) are performed.
-
-void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb);
-
-Parameters:	dev	- the device the handler is called for
-		intparm - the intparm which allows the device driver to identify
-                          the i/o the interrupt is associated with, or to recognize
-                          the interrupt as unsolicited.
-                irb     - interruption response block which contains the accumulated
-                          status.
-
-The device driver is called from the common ccw_device layer and can retrieve 
-information about the interrupt from the irb parameter.
-
-
-1.3 ccwgroup devices
---------------------
-
-The ccwgroup mechanism is designed to handle devices consisting of multiple ccw
-devices, like lcs or ctc.
-
-The ccw driver provides a 'group' attribute. Piping bus ids of ccw devices to
-this attributes creates a ccwgroup device consisting of these ccw devices (if
-possible). This ccwgroup device can be set online or offline just like a normal
-ccw device.
-
-Each ccwgroup device also provides an 'ungroup' attribute to destroy the device
-again (only when offline). This is a generic ccwgroup mechanism (the driver does
-not need to implement anything beyond normal removal routines).
-
-A ccw device which is a member of a ccwgroup device carries a pointer to the
-ccwgroup device in the driver_data of its device struct. This field must not be
-touched by the driver - it should use the ccwgroup device's driver_data for its
-private data.
-
-To implement a ccwgroup driver, please refer to include/asm/ccwgroup.h. Keep in
-mind that most drivers will need to implement both a ccwgroup and a ccw
-driver.
-
-
-2. Channel paths
------------------
-
-Channel paths show up, like subchannels, under the channel subsystem root (css0)
-and are called 'chp0.<chpid>'. They have no driver and do not belong to any bus.
-Please note, that unlike /proc/chpids in 2.4, the channel path objects reflect
-only the logical state and not the physical state, since we cannot track the
-latter consistently due to lacking machine support (we don't need to be aware
-of it anyway).
-
-status - Can be 'online' or 'offline'.
-	 Piping 'on' or 'off' sets the chpid logically online/offline.
-	 Piping 'on' to an online chpid triggers path reprobing for all devices
-	 the chpid connects to. This can be used to force the kernel to re-use
-	 a channel path the user knows to be online, but the machine hasn't
-	 created a machine check for.
-
-type - The physical type of the channel path.
-
-shared - Whether the channel path is shared.
-
-cmg - The channel measurement group.
-
-3. System devices
------------------
-
-3.1 xpram 
----------
-
-xpram shows up under devices/system/ as 'xpram'.
-
-3.2 cpus
---------
-
-For each cpu, a directory is created under devices/system/cpu/. Each cpu has an
-attribute 'online' which can be 0 or 1.
-
-
-4. Other devices
-----------------
-
-4.1 Netiucv
------------
-
-The netiucv driver creates an attribute 'connection' under
-bus/iucv/drivers/netiucv. Piping to this attribute creates a new netiucv
-connection to the specified host.
-
-Netiucv connections show up under devices/iucv/ as "netiucv<ifnum>". The interface
-number is assigned sequentially to the connections defined via the 'connection'
-attribute.
-
-user			  - shows the connection partner.
-
-buffer			  - maximum buffer size.
-			    Pipe to it to change buffer size.
-
-
diff --git a/Documentation/s390/index.rst b/Documentation/s390/index.rst
new file mode 100644
index 000000000000..1a914da2a07b
--- /dev/null
+++ b/Documentation/s390/index.rst
@@ -0,0 +1,30 @@
+:orphan:
+
+=================
+s390 Architecture
+=================
+
+.. toctree::
+    :maxdepth: 1
+
+    cds
+    3270
+    debugging390
+    driver-model
+    monreader
+    qeth
+    s390dbf
+    vfio-ap
+    vfio-ccw
+    zfcpdump
+    dasd
+    common_io
+
+    text_files
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/s390/monreader.rst b/Documentation/s390/monreader.rst
new file mode 100644
index 000000000000..1e857575c113
--- /dev/null
+++ b/Documentation/s390/monreader.rst
@@ -0,0 +1,212 @@
+=================================================
+Linux API for read access to z/VM Monitor Records
+=================================================
+
+Date  : 2004-Nov-26
+
+Author: Gerald Schaefer (geraldsc@de.ibm.com)
+
+
+
+
+Description
+===========
+This item delivers a new Linux API in the form of a misc char device that is
+usable from user space and allows read access to the z/VM Monitor Records
+collected by the `*MONITOR` System Service of z/VM.
+
+
+User Requirements
+=================
+The z/VM guest on which you want to access this API needs to be configured in
+order to allow IUCV connections to the `*MONITOR` service, i.e. it needs the
+IUCV `*MONITOR` statement in its user entry. If the monitor DCSS to be used is
+restricted (likely), you also need the NAMESAVE <DCSS NAME> statement.
+This item will use the IUCV device driver to access the z/VM services, so you
+need a kernel with IUCV support. You also need z/VM version 4.4 or 5.1.
+
+There are two options for being able to load the monitor DCSS (examples assume
+that the monitor DCSS begins at 144 MB and ends at 152 MB). You can query the
+location of the monitor DCSS with the Class E privileged CP command Q NSS MAP
+(the values BEGPAG and ENDPAG are given in units of 4K pages).
+
+See also "CP Command and Utility Reference" (SC24-6081-00) for more information
+on the DEF STOR and Q NSS MAP commands, as well as "Saved Segments Planning
+and Administration" (SC24-6116-00) for more information on DCSSes.
+
+1st option:
+-----------
+You can use the CP command DEF STOR CONFIG to define a "memory hole" in your
+guest virtual storage around the address range of the DCSS.
+
+Example: DEF STOR CONFIG 0.140M 200M.200M
+
+This defines two blocks of storage, the first is 140MB in size an begins at
+address 0MB, the second is 200MB in size and begins at address 200MB,
+resulting in a total storage of 340MB. Note that the first block should
+always start at 0 and be at least 64MB in size.
+
+2nd option:
+-----------
+Your guest virtual storage has to end below the starting address of the DCSS
+and you have to specify the "mem=" kernel parameter in your parmfile with a
+value greater than the ending address of the DCSS.
+
+Example::
+
+	DEF STOR 140M
+
+This defines 140MB storage size for your guest, the parameter "mem=160M" is
+added to the parmfile.
+
+
+User Interface
+==============
+The char device is implemented as a kernel module named "monreader",
+which can be loaded via the modprobe command, or it can be compiled into the
+kernel instead. There is one optional module (or kernel) parameter, "mondcss",
+to specify the name of the monitor DCSS. If the module is compiled into the
+kernel, the kernel parameter "monreader.mondcss=<DCSS NAME>" can be specified
+in the parmfile.
+
+The default name for the DCSS is "MONDCSS" if none is specified. In case that
+there are other users already connected to the `*MONITOR` service (e.g.
+Performance Toolkit), the monitor DCSS is already defined and you have to use
+the same DCSS. The CP command Q MONITOR (Class E privileged) shows the name
+of the monitor DCSS, if already defined, and the users connected to the
+`*MONITOR` service.
+Refer to the "z/VM Performance" book (SC24-6109-00) on how to create a monitor
+DCSS if your z/VM doesn't have one already, you need Class E privileges to
+define and save a DCSS.
+
+Example:
+--------
+
+::
+
+	modprobe monreader mondcss=MYDCSS
+
+This loads the module and sets the DCSS name to "MYDCSS".
+
+NOTE:
+-----
+This API provides no interface to control the `*MONITOR` service, e.g. specify
+which data should be collected. This can be done by the CP command MONITOR
+(Class E privileged), see "CP Command and Utility Reference".
+
+Device nodes with udev:
+-----------------------
+After loading the module, a char device will be created along with the device
+node /<udev directory>/monreader.
+
+Device nodes without udev:
+--------------------------
+If your distribution does not support udev, a device node will not be created
+automatically and you have to create it manually after loading the module.
+Therefore you need to know the major and minor numbers of the device. These
+numbers can be found in /sys/class/misc/monreader/dev.
+
+Typing cat /sys/class/misc/monreader/dev will give an output of the form
+<major>:<minor>. The device node can be created via the mknod command, enter
+mknod <name> c <major> <minor>, where <name> is the name of the device node
+to be created.
+
+Example:
+--------
+
+::
+
+	# modprobe monreader
+	# cat /sys/class/misc/monreader/dev
+	10:63
+	# mknod /dev/monreader c 10 63
+
+This loads the module with the default monitor DCSS (MONDCSS) and creates a
+device node.
+
+File operations:
+----------------
+The following file operations are supported: open, release, read, poll.
+There are two alternative methods for reading: either non-blocking read in
+conjunction with polling, or blocking read without polling. IOCTLs are not
+supported.
+
+Read:
+-----
+Reading from the device provides a 12 Byte monitor control element (MCE),
+followed by a set of one or more contiguous monitor records (similar to the
+output of the CMS utility MONWRITE without the 4K control blocks). The MCE
+contains information on the type of the following record set (sample/event
+data), the monitor domains contained within it and the start and end address
+of the record set in the monitor DCSS. The start and end address can be used
+to determine the size of the record set, the end address is the address of the
+last byte of data. The start address is needed to handle "end-of-frame" records
+correctly (domain 1, record 13), i.e. it can be used to determine the record
+start offset relative to a 4K page (frame) boundary.
+
+See "Appendix A: `*MONITOR`" in the "z/VM Performance" document for a description
+of the monitor control element layout. The layout of the monitor records can
+be found here (z/VM 5.1): http://www.vm.ibm.com/pubs/mon510/index.html
+
+The layout of the data stream provided by the monreader device is as follows::
+
+	...
+	<0 byte read>
+	<first MCE>              \
+	<first set of records>    |
+	...                       |- data set
+	<last MCE>                |
+	<last set of records>    /
+	<0 byte read>
+	...
+
+There may be more than one combination of MCE and corresponding record set
+within one data set and the end of each data set is indicated by a successful
+read with a return value of 0 (0 byte read).
+Any received data must be considered invalid until a complete set was
+read successfully, including the closing 0 byte read. Therefore you should
+always read the complete set into a buffer before processing the data.
+
+The maximum size of a data set can be as large as the size of the
+monitor DCSS, so design the buffer adequately or use dynamic memory allocation.
+The size of the monitor DCSS will be printed into syslog after loading the
+module. You can also use the (Class E privileged) CP command Q NSS MAP to
+list all available segments and information about them.
+
+As with most char devices, error conditions are indicated by returning a
+negative value for the number of bytes read. In this case, the errno variable
+indicates the error condition:
+
+EIO:
+     reply failed, read data is invalid and the application
+     should discard the data read since the last successful read with 0 size.
+EFAULT:
+	copy_to_user failed, read data is invalid and the application should
+	discard the data read since the last successful read with 0 size.
+EAGAIN:
+	occurs on a non-blocking read if there is no data available at the
+	moment. There is no data missing or corrupted, just try again or rather
+	use polling for non-blocking reads.
+EOVERFLOW:
+	   message limit reached, the data read since the last successful
+	   read with 0 size is valid but subsequent records may be missing.
+
+In the last case (EOVERFLOW) there may be missing data, in the first two cases
+(EIO, EFAULT) there will be missing data. It's up to the application if it will
+continue reading subsequent data or rather exit.
+
+Open:
+-----
+Only one user is allowed to open the char device. If it is already in use, the
+open function will fail (return a negative value) and set errno to EBUSY.
+The open function may also fail if an IUCV connection to the `*MONITOR` service
+cannot be established. In this case errno will be set to EIO and an error
+message with an IPUSER SEVER code will be printed into syslog. The IPUSER SEVER
+codes are described in the "z/VM Performance" book, Appendix A.
+
+NOTE:
+-----
+As soon as the device is opened, incoming messages will be accepted and they
+will account for the message limit, i.e. opening the device without reading
+from it will provoke the "message limit reached" error (EOVERFLOW error code)
+eventually.
diff --git a/Documentation/s390/monreader.txt b/Documentation/s390/monreader.txt
deleted file mode 100644
index d3729585fdb0..000000000000
--- a/Documentation/s390/monreader.txt
+++ /dev/null
@@ -1,197 +0,0 @@
-
-Date  : 2004-Nov-26
-Author: Gerald Schaefer (geraldsc@de.ibm.com)
-
-
-             Linux API for read access to z/VM Monitor Records
-             =================================================
-
-
-Description
-===========
-This item delivers a new Linux API in the form of a misc char device that is
-usable from user space and allows read access to the z/VM Monitor Records
-collected by the *MONITOR System Service of z/VM.
-
-
-User Requirements
-=================
-The z/VM guest on which you want to access this API needs to be configured in
-order to allow IUCV connections to the *MONITOR service, i.e. it needs the
-IUCV *MONITOR statement in its user entry. If the monitor DCSS to be used is
-restricted (likely), you also need the NAMESAVE <DCSS NAME> statement.
-This item will use the IUCV device driver to access the z/VM services, so you
-need a kernel with IUCV support. You also need z/VM version 4.4 or 5.1.
-
-There are two options for being able to load the monitor DCSS (examples assume
-that the monitor DCSS begins at 144 MB and ends at 152 MB). You can query the
-location of the monitor DCSS with the Class E privileged CP command Q NSS MAP
-(the values BEGPAG and ENDPAG are given in units of 4K pages).
-
-See also "CP Command and Utility Reference" (SC24-6081-00) for more information
-on the DEF STOR and Q NSS MAP commands, as well as "Saved Segments Planning
-and Administration" (SC24-6116-00) for more information on DCSSes.
-
-1st option:
------------
-You can use the CP command DEF STOR CONFIG to define a "memory hole" in your
-guest virtual storage around the address range of the DCSS.
-
-Example: DEF STOR CONFIG 0.140M 200M.200M
-
-This defines two blocks of storage, the first is 140MB in size an begins at
-address 0MB, the second is 200MB in size and begins at address 200MB,
-resulting in a total storage of 340MB. Note that the first block should
-always start at 0 and be at least 64MB in size.
-
-2nd option:
------------
-Your guest virtual storage has to end below the starting address of the DCSS
-and you have to specify the "mem=" kernel parameter in your parmfile with a
-value greater than the ending address of the DCSS.
-
-Example: DEF STOR 140M
-
-This defines 140MB storage size for your guest, the parameter "mem=160M" is
-added to the parmfile.
-
-
-User Interface
-==============
-The char device is implemented as a kernel module named "monreader",
-which can be loaded via the modprobe command, or it can be compiled into the
-kernel instead. There is one optional module (or kernel) parameter, "mondcss",
-to specify the name of the monitor DCSS. If the module is compiled into the
-kernel, the kernel parameter "monreader.mondcss=<DCSS NAME>" can be specified
-in the parmfile.
-
-The default name for the DCSS is "MONDCSS" if none is specified. In case that
-there are other users already connected to the *MONITOR service (e.g.
-Performance Toolkit), the monitor DCSS is already defined and you have to use
-the same DCSS. The CP command Q MONITOR (Class E privileged) shows the name
-of the monitor DCSS, if already defined, and the users connected to the
-*MONITOR service.
-Refer to the "z/VM Performance" book (SC24-6109-00) on how to create a monitor
-DCSS if your z/VM doesn't have one already, you need Class E privileges to
-define and save a DCSS.
-
-Example:
---------
-modprobe monreader mondcss=MYDCSS
-
-This loads the module and sets the DCSS name to "MYDCSS".
-
-NOTE:
------
-This API provides no interface to control the *MONITOR service, e.g. specify
-which data should be collected. This can be done by the CP command MONITOR
-(Class E privileged), see "CP Command and Utility Reference".
-
-Device nodes with udev:
------------------------
-After loading the module, a char device will be created along with the device
-node /<udev directory>/monreader.
-
-Device nodes without udev:
---------------------------
-If your distribution does not support udev, a device node will not be created
-automatically and you have to create it manually after loading the module.
-Therefore you need to know the major and minor numbers of the device. These
-numbers can be found in /sys/class/misc/monreader/dev.
-Typing cat /sys/class/misc/monreader/dev will give an output of the form
-<major>:<minor>. The device node can be created via the mknod command, enter
-mknod <name> c <major> <minor>, where <name> is the name of the device node
-to be created.
-
-Example:
---------
-# modprobe monreader
-# cat /sys/class/misc/monreader/dev
-10:63
-# mknod /dev/monreader c 10 63
-
-This loads the module with the default monitor DCSS (MONDCSS) and creates a
-device node.
-
-File operations:
-----------------
-The following file operations are supported: open, release, read, poll.
-There are two alternative methods for reading: either non-blocking read in
-conjunction with polling, or blocking read without polling. IOCTLs are not
-supported.
-
-Read:
------
-Reading from the device provides a 12 Byte monitor control element (MCE),
-followed by a set of one or more contiguous monitor records (similar to the
-output of the CMS utility MONWRITE without the 4K control blocks). The MCE
-contains information on the type of the following record set (sample/event
-data), the monitor domains contained within it and the start and end address
-of the record set in the monitor DCSS. The start and end address can be used
-to determine the size of the record set, the end address is the address of the
-last byte of data. The start address is needed to handle "end-of-frame" records
-correctly (domain 1, record 13), i.e. it can be used to determine the record
-start offset relative to a 4K page (frame) boundary.
-
-See "Appendix A: *MONITOR" in the "z/VM Performance" document for a description
-of the monitor control element layout. The layout of the monitor records can
-be found here (z/VM 5.1): http://www.vm.ibm.com/pubs/mon510/index.html
-
-The layout of the data stream provided by the monreader device is as follows:
-...
-<0 byte read>
-<first MCE>              \
-<first set of records>    |
-...                       |- data set
-<last MCE>                |
-<last set of records>    /
-<0 byte read>
-...
-
-There may be more than one combination of MCE and corresponding record set
-within one data set and the end of each data set is indicated by a successful
-read with a return value of 0 (0 byte read).
-Any received data must be considered invalid until a complete set was
-read successfully, including the closing 0 byte read. Therefore you should
-always read the complete set into a buffer before processing the data.
-
-The maximum size of a data set can be as large as the size of the
-monitor DCSS, so design the buffer adequately or use dynamic memory allocation.
-The size of the monitor DCSS will be printed into syslog after loading the
-module. You can also use the (Class E privileged) CP command Q NSS MAP to
-list all available segments and information about them.
-
-As with most char devices, error conditions are indicated by returning a
-negative value for the number of bytes read. In this case, the errno variable
-indicates the error condition:
-
-EIO: reply failed, read data is invalid and the application
-     should discard the data read since the last successful read with 0 size.
-EFAULT: copy_to_user failed, read data is invalid and the application should
-        discard the data read since the last successful read with 0 size.
-EAGAIN: occurs on a non-blocking read if there is no data available at the
-        moment. There is no data missing or corrupted, just try again or rather
-        use polling for non-blocking reads.
-EOVERFLOW: message limit reached, the data read since the last successful
-           read with 0 size is valid but subsequent records may be missing.
-
-In the last case (EOVERFLOW) there may be missing data, in the first two cases
-(EIO, EFAULT) there will be missing data. It's up to the application if it will
-continue reading subsequent data or rather exit.
-
-Open:
------
-Only one user is allowed to open the char device. If it is already in use, the
-open function will fail (return a negative value) and set errno to EBUSY.
-The open function may also fail if an IUCV connection to the *MONITOR service
-cannot be established. In this case errno will be set to EIO and an error
-message with an IPUSER SEVER code will be printed into syslog. The IPUSER SEVER
-codes are described in the "z/VM Performance" book, Appendix A.
-
-NOTE:
------
-As soon as the device is opened, incoming messages will be accepted and they
-will account for the message limit, i.e. opening the device without reading
-from it will provoke the "message limit reached" error (EOVERFLOW error code)
-eventually.
-
diff --git a/Documentation/s390/qeth.rst b/Documentation/s390/qeth.rst
new file mode 100644
index 000000000000..f02fdaa68de0
--- /dev/null
+++ b/Documentation/s390/qeth.rst
@@ -0,0 +1,64 @@
+=============================
+IBM s390 QDIO Ethernet Driver
+=============================
+
+OSA and HiperSockets Bridge Port Support
+========================================
+
+Uevents
+-------
+
+To generate the events the device must be assigned a role of either
+a primary or a secondary Bridge Port. For more information, see
+"z/VM Connectivity, SC24-6174".
+
+When run on an OSA or HiperSockets Bridge Capable Port hardware, and the state
+of some configured Bridge Port device on the channel changes, a udev
+event with ACTION=CHANGE is emitted on behalf of the corresponding
+ccwgroup device. The event has the following attributes:
+
+BRIDGEPORT=statechange
+  indicates that the Bridge Port device changed
+  its state.
+
+ROLE={primary|secondary|none}
+  the role assigned to the port.
+
+STATE={active|standby|inactive}
+  the newly assumed state of the port.
+
+When run on HiperSockets Bridge Capable Port hardware with host address
+notifications enabled, a udev event with ACTION=CHANGE is emitted.
+It is emitted on behalf of the corresponding ccwgroup device when a host
+or a VLAN is registered or unregistered on the network served by the device.
+The event has the following attributes:
+
+BRIDGEDHOST={reset|register|deregister|abort}
+  host address
+  notifications are started afresh, a new host or VLAN is registered or
+  deregistered on the Bridge Port HiperSockets channel, or address
+  notifications are aborted.
+
+VLAN=numeric-vlan-id
+  VLAN ID on which the event occurred. Not included
+  if no VLAN is involved in the event.
+
+MAC=xx:xx:xx:xx:xx:xx
+  MAC address of the host that is being registered
+  or deregistered from the HiperSockets channel. Not reported if the
+  event reports the creation or destruction of a VLAN.
+
+NTOK_BUSID=x.y.zzzz
+  device bus ID (CSSID, SSID and device number).
+
+NTOK_IID=xx
+  device IID.
+
+NTOK_CHPID=xx
+  device CHPID.
+
+NTOK_CHID=xxxx
+  device channel ID.
+
+Note that the `NTOK_*` attributes refer to devices other than  the one
+connected to the system on which the OS is running.
diff --git a/Documentation/s390/qeth.txt b/Documentation/s390/qeth.txt
deleted file mode 100644
index aa06fcf5f8c2..000000000000
--- a/Documentation/s390/qeth.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-IBM s390 QDIO Ethernet Driver
-
-OSA and HiperSockets Bridge Port Support
-
-Uevents
-
-To generate the events the device must be assigned a role of either
-a primary or a secondary Bridge Port. For more information, see
-"z/VM Connectivity, SC24-6174".
-
-When run on an OSA or HiperSockets Bridge Capable Port hardware, and the state
-of some configured Bridge Port device on the channel changes, a udev
-event with ACTION=CHANGE is emitted on behalf of the corresponding
-ccwgroup device. The event has the following attributes:
-
-BRIDGEPORT=statechange -  indicates that the Bridge Port device changed
-  its state.
-
-ROLE={primary|secondary|none} - the role assigned to the port.
-
-STATE={active|standby|inactive} - the newly assumed state of the port.
-
-When run on HiperSockets Bridge Capable Port hardware with host address
-notifications enabled, a udev event with ACTION=CHANGE is emitted.
-It is emitted on behalf of the corresponding ccwgroup device when a host
-or a VLAN is registered or unregistered on the network served by the device.
-The event has the following attributes:
-
-BRIDGEDHOST={reset|register|deregister|abort} - host address
-  notifications are started afresh, a new host or VLAN is registered or
-  deregistered on the Bridge Port HiperSockets channel, or address
-  notifications are aborted.
-
-VLAN=numeric-vlan-id - VLAN ID on which the event occurred. Not included
-  if no VLAN is involved in the event.
-
-MAC=xx:xx:xx:xx:xx:xx - MAC address of the host that is being registered
-  or deregistered from the HiperSockets channel. Not reported if the
-  event reports the creation or destruction of a VLAN.
-
-NTOK_BUSID=x.y.zzzz - device bus ID (CSSID, SSID and device number).
-
-NTOK_IID=xx - device IID.
-
-NTOK_CHPID=xx - device CHPID.
-
-NTOK_CHID=xxxx - device channel ID.
-
-Note that the NTOK_* attributes refer to devices other than  the one
-connected to the system on which the OS is running.
diff --git a/Documentation/s390/s390dbf.rst b/Documentation/s390/s390dbf.rst
new file mode 100644
index 000000000000..ec2a1faa414b
--- /dev/null
+++ b/Documentation/s390/s390dbf.rst
@@ -0,0 +1,803 @@
+==================
+S390 Debug Feature
+==================
+
+files:
+      - arch/s390/kernel/debug.c
+      - arch/s390/include/asm/debug.h
+
+Description:
+------------
+The goal of this feature is to provide a kernel debug logging API
+where log records can be stored efficiently in memory, where each component
+(e.g. device drivers) can have one separate debug log.
+One purpose of this is to inspect the debug logs after a production system crash
+in order to analyze the reason for the crash.
+
+If the system still runs but only a subcomponent which uses dbf fails,
+it is possible to look at the debug logs on a live system via the Linux
+debugfs filesystem.
+
+The debug feature may also very useful for kernel and driver development.
+
+Design:
+-------
+Kernel components (e.g. device drivers) can register themselves at the debug
+feature with the function call debug_register(). This function initializes a
+debug log for the caller. For each debug log exists a number of debug areas
+where exactly one is active at one time.  Each debug area consists of contiguous
+pages in memory. In the debug areas there are stored debug entries (log records)
+which are written by event- and exception-calls.
+
+An event-call writes the specified debug entry to the active debug
+area and updates the log pointer for the active area. If the end
+of the active debug area is reached, a wrap around is done (ring buffer)
+and the next debug entry will be written at the beginning of the active
+debug area.
+
+An exception-call writes the specified debug entry to the log and
+switches to the next debug area. This is done in order to be sure
+that the records which describe the origin of the exception are not
+overwritten when a wrap around for the current area occurs.
+
+The debug areas themselves are also ordered in form of a ring buffer.
+When an exception is thrown in the last debug area, the following debug
+entries are then written again in the very first area.
+
+There are three versions for the event- and exception-calls: One for
+logging raw data, one for text and one for numbers.
+
+Each debug entry contains the following data:
+
+- Timestamp
+- Cpu-Number of calling task
+- Level of debug entry (0...6)
+- Return Address to caller
+- Flag, if entry is an exception or not
+
+The debug logs can be inspected in a live system through entries in
+the debugfs-filesystem. Under the toplevel directory "s390dbf" there is
+a directory for each registered component, which is named like the
+corresponding component. The debugfs normally should be mounted to
+/sys/kernel/debug therefore the debug feature can be accessed under
+/sys/kernel/debug/s390dbf.
+
+The content of the directories are files which represent different views
+to the debug log. Each component can decide which views should be
+used through registering them with the function debug_register_view().
+Predefined views for hex/ascii, sprintf and raw binary data are provided.
+It is also possible to define other views. The content of
+a view can be inspected simply by reading the corresponding debugfs file.
+
+All debug logs have an actual debug level (range from 0 to 6).
+The default level is 3. Event and Exception functions have a 'level'
+parameter. Only debug entries with a level that is lower or equal
+than the actual level are written to the log. This means, when
+writing events, high priority log entries should have a low level
+value whereas low priority entries should have a high one.
+The actual debug level can be changed with the help of the debugfs-filesystem
+through writing a number string "x" to the 'level' debugfs file which is
+provided for every debug log. Debugging can be switched off completely
+by using "-" on the 'level' debugfs file.
+
+Example::
+
+	> echo "-" > /sys/kernel/debug/s390dbf/dasd/level
+
+It is also possible to deactivate the debug feature globally for every
+debug log. You can change the behavior using  2 sysctl parameters in
+/proc/sys/s390dbf:
+
+There are currently 2 possible triggers, which stop the debug feature
+globally. The first possibility is to use the "debug_active" sysctl. If
+set to 1 the debug feature is running. If "debug_active" is set to 0 the
+debug feature is turned off.
+
+The second trigger which stops the debug feature is a kernel oops.
+That prevents the debug feature from overwriting debug information that
+happened before the oops. After an oops you can reactivate the debug feature
+by piping 1 to /proc/sys/s390dbf/debug_active. Nevertheless, its not
+suggested to use an oopsed kernel in a production environment.
+
+If you want to disallow the deactivation of the debug feature, you can use
+the "debug_stoppable" sysctl. If you set "debug_stoppable" to 0 the debug
+feature cannot be stopped. If the debug feature is already stopped, it
+will stay deactivated.
+
+----------------------------------------------------------------------------
+
+Kernel Interfaces:
+------------------
+
+::
+
+  debug_info_t *debug_register(char *name, int pages, int nr_areas,
+			       int buf_size);
+
+Parameter:
+	      name:
+			   Name of debug log (e.g. used for debugfs entry)
+	      pages:
+			   Number of pages, which will be allocated per area
+	      nr_areas:
+			   Number of debug areas
+	      buf_size:
+			   Size of data area in each debug entry
+
+Return Value:
+	Handle for generated debug area
+
+	  NULL if register failed
+
+Description:  Allocates memory for a debug log
+	      Must not be called within an interrupt handler
+
+----------------------------------------------------------------------------
+
+::
+
+  debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
+				    int buf_size, mode_t mode, uid_t uid,
+				    gid_t gid);
+
+Parameter:
+	      name:
+			   Name of debug log (e.g. used for debugfs entry)
+	      pages:
+			   Number of pages, which will be allocated per area
+	      nr_areas:
+			   Number of debug areas
+	      buf_size:
+			   Size of data area in each debug entry
+	      mode:
+			   File mode for debugfs files. E.g. S_IRWXUGO
+	      uid:
+			   User ID for debugfs files. Currently only 0 is
+			   supported.
+	      gid:
+			   Group ID for debugfs files. Currently only 0 is
+			   supported.
+
+Return Value:
+	      Handle for generated debug area
+
+		NULL if register failed
+
+Description:
+	      Allocates memory for a debug log
+	      Must not be called within an interrupt handler
+
+---------------------------------------------------------------------------
+
+::
+
+  void debug_unregister (debug_info_t * id);
+
+Parameter:
+	       id:
+		    handle for debug log
+
+Return Value:
+	       none
+
+Description:
+	       frees memory for a debug log and removes all registered debug
+	       views.
+
+	       Must not be called within an interrupt handler
+
+---------------------------------------------------------------------------
+
+::
+
+  void debug_set_level (debug_info_t * id, int new_level);
+
+Parameter:     id:        handle for debug log
+	       new_level: new debug level
+
+Return Value:
+	       none
+
+Description:
+	       Sets new actual debug level if new_level is valid.
+
+---------------------------------------------------------------------------
+
+::
+
+  bool debug_level_enabled (debug_info_t * id, int level);
+
+Parameter:
+	      id:
+			  handle for debug log
+	      level:
+			  debug level
+
+Return Value:
+	      True if level is less or equal to the current debug level.
+
+Description:
+	      Returns true if debug events for the specified level would be
+	      logged. Otherwise returns false.
+
+---------------------------------------------------------------------------
+
+::
+
+  void debug_stop_all(void);
+
+Parameter:
+	       none
+
+Return Value:
+	       none
+
+Description:
+	       stops the debug feature if stopping is allowed. Currently
+	       used in case of a kernel oops.
+
+---------------------------------------------------------------------------
+
+::
+
+  debug_entry_t* debug_event (debug_info_t* id, int level, void* data,
+			      int length);
+
+Parameter:
+	       id:
+		       handle for debug log
+	       level:
+		       debug level
+	       data:
+		       pointer to data for debug entry
+	       length:
+		       length of data in bytes
+
+Return Value:
+	       Address of written debug entry
+
+Description:
+	       writes debug entry to active debug area (if level <= actual
+	       debug level)
+
+---------------------------------------------------------------------------
+
+::
+
+  debug_entry_t* debug_int_event (debug_info_t * id, int level,
+				  unsigned int data);
+  debug_entry_t* debug_long_event(debug_info_t * id, int level,
+				  unsigned long data);
+
+Parameter:
+	       id:
+		       handle for debug log
+	       level:
+		       debug level
+	       data:
+		       integer value for debug entry
+
+Return Value:
+	       Address of written debug entry
+
+Description:
+	       writes debug entry to active debug area (if level <= actual
+	       debug level)
+
+---------------------------------------------------------------------------
+
+::
+
+  debug_entry_t* debug_text_event (debug_info_t * id, int level,
+				   const char* data);
+
+Parameter:
+	       id:
+		       handle for debug log
+	       level:
+		       debug level
+	       data:
+		       string for debug entry
+
+Return Value:
+	       Address of written debug entry
+
+Description:
+	       writes debug entry in ascii format to active debug area
+	       (if level <= actual debug level)
+
+---------------------------------------------------------------------------
+
+::
+
+  debug_entry_t* debug_sprintf_event (debug_info_t * id, int level,
+				      char* string,...);
+
+Parameter:
+	       id:
+		      handle for debug log
+	       level:
+		      debug level
+	       string:
+		      format string for debug entry
+	       ...:
+		      varargs used as in sprintf()
+
+Return Value:  Address of written debug entry
+
+Description:
+	       writes debug entry with format string and varargs (longs) to
+	       active debug area (if level $<=$ actual debug level).
+	       floats and long long datatypes cannot be used as varargs.
+
+---------------------------------------------------------------------------
+
+::
+
+  debug_entry_t* debug_exception (debug_info_t* id, int level, void* data,
+				  int length);
+
+Parameter:
+	       id:
+		       handle for debug log
+	       level:
+		       debug level
+	       data:
+		       pointer to data for debug entry
+	       length:
+		       length of data in bytes
+
+Return Value:
+	       Address of written debug entry
+
+Description:
+	       writes debug entry to active debug area (if level <= actual
+	       debug level) and switches to next debug area
+
+---------------------------------------------------------------------------
+
+::
+
+  debug_entry_t* debug_int_exception (debug_info_t * id, int level,
+				      unsigned int data);
+  debug_entry_t* debug_long_exception(debug_info_t * id, int level,
+				      unsigned long data);
+
+Parameter:     id:     handle for debug log
+	       level:  debug level
+	       data:   integer value for debug entry
+
+Return Value:  Address of written debug entry
+
+Description:   writes debug entry to active debug area (if level <= actual
+	       debug level) and switches to next debug area
+
+---------------------------------------------------------------------------
+
+::
+
+  debug_entry_t* debug_text_exception (debug_info_t * id, int level,
+				       const char* data);
+
+Parameter:     id:     handle for debug log
+	       level:  debug level
+	       data:   string for debug entry
+
+Return Value:  Address of written debug entry
+
+Description:   writes debug entry in ascii format to active debug area
+	       (if level <= actual debug level) and switches to next debug
+	       area
+
+---------------------------------------------------------------------------
+
+::
+
+  debug_entry_t* debug_sprintf_exception (debug_info_t * id, int level,
+					  char* string,...);
+
+Parameter:     id:    handle for debug log
+	       level: debug level
+	       string: format string for debug entry
+	       ...: varargs used as in sprintf()
+
+Return Value:  Address of written debug entry
+
+Description:   writes debug entry with format string and varargs (longs) to
+	       active debug area (if level $<=$ actual debug level) and
+	       switches to next debug area.
+	       floats and long long datatypes cannot be used as varargs.
+
+---------------------------------------------------------------------------
+
+::
+
+  int debug_register_view (debug_info_t * id, struct debug_view *view);
+
+Parameter:     id:    handle for debug log
+	       view:  pointer to debug view struct
+
+Return Value:  0  : ok
+	       < 0: Error
+
+Description:   registers new debug view and creates debugfs dir entry
+
+---------------------------------------------------------------------------
+
+::
+
+  int debug_unregister_view (debug_info_t * id, struct debug_view *view);
+
+Parameter:     id:    handle for debug log
+	       view:  pointer to debug view struct
+
+Return Value:  0  : ok
+	       < 0: Error
+
+Description:   unregisters debug view and removes debugfs dir entry
+
+
+
+Predefined views:
+-----------------
+
+extern struct debug_view debug_hex_ascii_view;
+
+extern struct debug_view debug_raw_view;
+
+extern struct debug_view debug_sprintf_view;
+
+Examples
+--------
+
+::
+
+  /*
+   * hex_ascii- + raw-view Example
+   */
+
+  #include <linux/init.h>
+  #include <asm/debug.h>
+
+  static debug_info_t* debug_info;
+
+  static int init(void)
+  {
+      /* register 4 debug areas with one page each and 4 byte data field */
+
+      debug_info = debug_register ("test", 1, 4, 4 );
+      debug_register_view(debug_info,&debug_hex_ascii_view);
+      debug_register_view(debug_info,&debug_raw_view);
+
+      debug_text_event(debug_info, 4 , "one ");
+      debug_int_exception(debug_info, 4, 4711);
+      debug_event(debug_info, 3, &debug_info, 4);
+
+      return 0;
+  }
+
+  static void cleanup(void)
+  {
+      debug_unregister (debug_info);
+  }
+
+  module_init(init);
+  module_exit(cleanup);
+
+---------------------------------------------------------------------------
+
+::
+
+  /*
+   * sprintf-view Example
+   */
+
+  #include <linux/init.h>
+  #include <asm/debug.h>
+
+  static debug_info_t* debug_info;
+
+  static int init(void)
+  {
+      /* register 4 debug areas with one page each and data field for */
+      /* format string pointer + 2 varargs (= 3 * sizeof(long))       */
+
+      debug_info = debug_register ("test", 1, 4, sizeof(long) * 3);
+      debug_register_view(debug_info,&debug_sprintf_view);
+
+      debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
+      debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
+
+      return 0;
+  }
+
+  static void cleanup(void)
+  {
+      debug_unregister (debug_info);
+  }
+
+  module_init(init);
+  module_exit(cleanup);
+
+Debugfs Interface
+-----------------
+Views to the debug logs can be investigated through reading the corresponding
+debugfs-files:
+
+Example::
+
+  > ls /sys/kernel/debug/s390dbf/dasd
+  flush  hex_ascii  level pages raw
+  > cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
+  00 00974733272:680099 2 - 02 0006ad7e  07 ea 4a 90 | ....
+  00 00974733272:682210 2 - 02 0006ade6  46 52 45 45 | FREE
+  00 00974733272:682213 2 - 02 0006adf6  07 ea 4a 90 | ....
+  00 00974733272:682281 1 * 02 0006ab08  41 4c 4c 43 | EXCP
+  01 00974733272:682284 2 - 02 0006ab16  45 43 4b 44 | ECKD
+  01 00974733272:682287 2 - 02 0006ab28  00 00 00 04 | ....
+  01 00974733272:682289 2 - 02 0006ab3e  00 00 00 20 | ...
+  01 00974733272:682297 2 - 02 0006ad7e  07 ea 4a 90 | ....
+  01 00974733272:684384 2 - 00 0006ade6  46 52 45 45 | FREE
+  01 00974733272:684388 2 - 00 0006adf6  07 ea 4a 90 | ....
+
+See section about predefined views for explanation of the above output!
+
+Changing the debug level
+------------------------
+
+Example::
+
+
+  > cat /sys/kernel/debug/s390dbf/dasd/level
+  3
+  > echo "5" > /sys/kernel/debug/s390dbf/dasd/level
+  > cat /sys/kernel/debug/s390dbf/dasd/level
+  5
+
+Flushing debug areas
+--------------------
+Debug areas can be flushed with piping the number of the desired
+area (0...n) to the debugfs file "flush". When using "-" all debug areas
+are flushed.
+
+Examples:
+
+1. Flush debug area 0::
+
+     > echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
+
+2. Flush all debug areas::
+
+     > echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
+
+Changing the size of debug areas
+------------------------------------
+It is possible the change the size of debug areas through piping
+the number of pages to the debugfs file "pages". The resize request will
+also flush the debug areas.
+
+Example:
+
+Define 4 pages for the debug areas of debug feature "dasd"::
+
+  > echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
+
+Stooping the debug feature
+--------------------------
+Example:
+
+1. Check if stopping is allowed::
+
+     > cat /proc/sys/s390dbf/debug_stoppable
+
+2. Stop debug feature::
+
+     > echo 0 > /proc/sys/s390dbf/debug_active
+
+lcrash Interface
+----------------
+It is planned that the dump analysis tool lcrash gets an additional command
+'s390dbf' to display all the debug logs. With this tool it will be possible
+to investigate the debug logs on a live system and with a memory dump after
+a system crash.
+
+Investigating raw memory
+------------------------
+One last possibility to investigate the debug logs at a live
+system and after a system crash is to look at the raw memory
+under VM or at the Service Element.
+It is possible to find the anker of the debug-logs through
+the 'debug_area_first' symbol in the System map. Then one has
+to follow the correct pointers of the data-structures defined
+in debug.h and find the debug-areas in memory.
+Normally modules which use the debug feature will also have
+a global variable with the pointer to the debug-logs. Following
+this pointer it will also be possible to find the debug logs in
+memory.
+
+For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
+for the length of the data field in debug_register() in
+order to see the debug entries well formatted.
+
+
+Predefined Views
+----------------
+
+There are three predefined views: hex_ascii, raw and sprintf.
+The hex_ascii view shows the data field in hex and ascii representation
+(e.g. '45 43 4b 44 | ECKD').
+The raw view returns a bytestream as the debug areas are stored in memory.
+
+The sprintf view formats the debug entries in the same way as the sprintf
+function would do. The sprintf event/exception functions write to the
+debug entry a pointer to the format string (size = sizeof(long))
+and for each vararg a long value. So e.g. for a debug entry with a format
+string plus two varargs one would need to allocate a (3 * sizeof(long))
+byte data area in the debug_register() function.
+
+IMPORTANT:
+  Using "%s" in sprintf event functions is dangerous. You can only
+  use "%s" in the sprintf event functions, if the memory for the passed string
+  is available as long as the debug feature exists. The reason behind this is
+  that due to performance considerations only a pointer to the string is stored
+  in  the debug feature. If you log a string that is freed afterwards, you will
+  get an OOPS when inspecting the debug feature, because then the debug feature
+  will access the already freed memory.
+
+NOTE:
+  If using the sprintf view do NOT use other event/exception functions
+  than the sprintf-event and -exception functions.
+
+The format of the hex_ascii and sprintf view is as follows:
+
+- Number of area
+- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated
+  Universal Time (UTC), January 1, 1970)
+- level of debug entry
+- Exception flag (* = Exception)
+- Cpu-Number of calling task
+- Return Address to caller
+- data field
+
+The format of the raw view is:
+
+- Header as described in debug.h
+- datafield
+
+A typical line of the hex_ascii view will look like the following (first line
+is only for explanation and will not be displayed when 'cating' the view):
+
+area  time           level exception cpu caller    data (hex + ascii)
+--------------------------------------------------------------------------
+00    00964419409:440690 1 -         00  88023fe
+
+
+Defining views
+--------------
+
+Views are specified with the 'debug_view' structure. There are defined
+callback functions which are used for reading and writing the debugfs files::
+
+  struct debug_view {
+	char name[DEBUG_MAX_PROCF_LEN];
+	debug_prolog_proc_t* prolog_proc;
+	debug_header_proc_t* header_proc;
+	debug_format_proc_t* format_proc;
+	debug_input_proc_t*  input_proc;
+	void*                private_data;
+  };
+
+where::
+
+  typedef int (debug_header_proc_t) (debug_info_t* id,
+				     struct debug_view* view,
+				     int area,
+				     debug_entry_t* entry,
+				     char* out_buf);
+
+  typedef int (debug_format_proc_t) (debug_info_t* id,
+				     struct debug_view* view, char* out_buf,
+				     const char* in_buf);
+  typedef int (debug_prolog_proc_t) (debug_info_t* id,
+				     struct debug_view* view,
+				     char* out_buf);
+  typedef int (debug_input_proc_t) (debug_info_t* id,
+				    struct debug_view* view,
+				    struct file* file, const char* user_buf,
+				    size_t in_buf_size, loff_t* offset);
+
+
+The "private_data" member can be used as pointer to view specific data.
+It is not used by the debug feature itself.
+
+The output when reading a debugfs file is structured like this::
+
+  "prolog_proc output"
+
+  "header_proc output 1"  "format_proc output 1"
+  "header_proc output 2"  "format_proc output 2"
+  "header_proc output 3"  "format_proc output 3"
+  ...
+
+When a view is read from the debugfs, the Debug Feature calls the
+'prolog_proc' once for writing the prolog.
+Then 'header_proc' and 'format_proc' are called for each
+existing debug entry.
+
+The input_proc can be used to implement functionality when it is written to
+the view (e.g. like with 'echo "0" > /sys/kernel/debug/s390dbf/dasd/level).
+
+For header_proc there can be used the default function
+debug_dflt_header_fn() which is defined in debug.h.
+and which produces the same header output as the predefined views.
+E.g::
+
+  00 00964419409:440761 2 - 00 88023ec
+
+In order to see how to use the callback functions check the implementation
+of the default views!
+
+Example::
+
+  #include <asm/debug.h>
+
+  #define UNKNOWNSTR "data: %08x"
+
+  const char* messages[] =
+  {"This error...........\n",
+   "That error...........\n",
+   "Problem..............\n",
+   "Something went wrong.\n",
+   "Everything ok........\n",
+   NULL
+  };
+
+  static int debug_test_format_fn(
+     debug_info_t * id, struct debug_view *view,
+     char *out_buf, const char *in_buf
+  )
+  {
+    int i, rc = 0;
+
+    if(id->buf_size >= 4) {
+       int msg_nr = *((int*)in_buf);
+       if(msg_nr < sizeof(messages)/sizeof(char*) - 1)
+	  rc += sprintf(out_buf, "%s", messages[msg_nr]);
+       else
+	  rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
+    }
+   out:
+     return rc;
+  }
+
+  struct debug_view debug_test_view = {
+    "myview",                 /* name of view */
+    NULL,                     /* no prolog */
+    &debug_dflt_header_fn,    /* default header for each entry */
+    &debug_test_format_fn,    /* our own format function */
+    NULL,                     /* no input function */
+    NULL                      /* no private data */
+  };
+
+test:
+=====
+
+::
+
+  debug_info_t *debug_info;
+  ...
+  debug_info = debug_register ("test", 0, 4, 4 ));
+  debug_register_view(debug_info, &debug_test_view);
+  for(i = 0; i < 10; i ++) debug_int_event(debug_info, 1, i);
+
+  > cat /sys/kernel/debug/s390dbf/test/myview
+  00 00964419734:611402 1 - 00 88042ca   This error...........
+  00 00964419734:611405 1 - 00 88042ca   That error...........
+  00 00964419734:611408 1 - 00 88042ca   Problem..............
+  00 00964419734:611411 1 - 00 88042ca   Something went wrong.
+  00 00964419734:611414 1 - 00 88042ca   Everything ok........
+  00 00964419734:611417 1 - 00 88042ca   data: 00000005
+  00 00964419734:611419 1 - 00 88042ca   data: 00000006
+  00 00964419734:611422 1 - 00 88042ca   data: 00000007
+  00 00964419734:611425 1 - 00 88042ca   data: 00000008
+  00 00964419734:611428 1 - 00 88042ca   data: 00000009
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
deleted file mode 100644
index 61329fd62e89..000000000000
--- a/Documentation/s390/s390dbf.txt
+++ /dev/null
@@ -1,667 +0,0 @@
-S390 Debug Feature
-==================
-
-files: arch/s390/kernel/debug.c
-       arch/s390/include/asm/debug.h
-
-Description:
-------------
-The goal of this feature is to provide a kernel debug logging API 
-where log records can be stored efficiently in memory, where each component 
-(e.g. device drivers) can have one separate debug log.
-One purpose of this is to inspect the debug logs after a production system crash
-in order to analyze the reason for the crash.
-If the system still runs but only a subcomponent which uses dbf fails,
-it is possible to look at the debug logs on a live system via the Linux
-debugfs filesystem.
-The debug feature may also very useful for kernel and driver development.
-
-Design:
--------
-Kernel components (e.g. device drivers) can register themselves at the debug 
-feature with the function call debug_register(). This function initializes a 
-debug log for the caller. For each debug log exists a number of debug areas 
-where exactly one is active at one time.  Each debug area consists of contiguous
-pages in memory. In the debug areas there are stored debug entries (log records)
-which are written by event- and exception-calls. 
-
-An event-call writes the specified debug entry to the active debug
-area and updates the log pointer for the active area. If the end 
-of the active debug area is reached, a wrap around is done (ring buffer) 
-and the next debug entry will be written at the beginning of the active 
-debug area.
-
-An exception-call writes the specified debug entry to the log and
-switches to the next debug area. This is done in order to be sure
-that the records which describe the origin of the exception are not
-overwritten when a wrap around for the current area occurs.
-
-The debug areas themselves are also ordered in form of a ring buffer.
-When an exception is thrown in the last debug area, the following debug 
-entries are then written again in the very first area.
-
-There are three versions for the event- and exception-calls: One for
-logging raw data, one for text and one for numbers.
-
-Each debug entry contains the following data:
-
-- Timestamp
-- Cpu-Number of calling task
-- Level of debug entry (0...6)
-- Return Address to caller
-- Flag, if entry is an exception or not
-
-The debug logs can be inspected in a live system through entries in
-the debugfs-filesystem. Under the toplevel directory "s390dbf" there is
-a directory for each registered component, which is named like the
-corresponding component. The debugfs normally should be mounted to
-/sys/kernel/debug therefore the debug feature can be accessed under
-/sys/kernel/debug/s390dbf.
-
-The content of the directories are files which represent different views
-to the debug log. Each component can decide which views should be
-used through registering them with the function debug_register_view().
-Predefined views for hex/ascii, sprintf and raw binary data are provided.
-It is also possible to define other views. The content of
-a view can be inspected simply by reading the corresponding debugfs file.
-
-All debug logs have an actual debug level (range from 0 to 6).
-The default level is 3. Event and Exception functions have a 'level'
-parameter. Only debug entries with a level that is lower or equal
-than the actual level are written to the log. This means, when
-writing events, high priority log entries should have a low level
-value whereas low priority entries should have a high one.
-The actual debug level can be changed with the help of the debugfs-filesystem
-through writing a number string "x" to the 'level' debugfs file which is
-provided for every debug log. Debugging can be switched off completely
-by using "-" on the 'level' debugfs file.
-
-Example:
-
-> echo "-" > /sys/kernel/debug/s390dbf/dasd/level
-
-It is also possible to deactivate the debug feature globally for every
-debug log. You can change the behavior using  2 sysctl parameters in
-/proc/sys/s390dbf:
-There are currently 2 possible triggers, which stop the debug feature
-globally. The first possibility is to use the "debug_active" sysctl. If
-set to 1 the debug feature is running. If "debug_active" is set to 0 the
-debug feature is turned off.
-The second trigger which stops the debug feature is a kernel oops.
-That prevents the debug feature from overwriting debug information that
-happened before the oops. After an oops you can reactivate the debug feature
-by piping 1 to /proc/sys/s390dbf/debug_active. Nevertheless, its not
-suggested to use an oopsed kernel in a production environment.
-If you want to disallow the deactivation of the debug feature, you can use
-the "debug_stoppable" sysctl. If you set "debug_stoppable" to 0 the debug
-feature cannot be stopped. If the debug feature is already stopped, it
-will stay deactivated.
-
-Kernel Interfaces:
-------------------
-
-----------------------------------------------------------------------------
-debug_info_t *debug_register(char *name, int pages, int nr_areas,
-                             int buf_size);
-
-Parameter:    name:        Name of debug log (e.g. used for debugfs entry)
-              pages:       number of pages, which will be allocated per area
-              nr_areas:    number of debug areas
-              buf_size:    size of data area in each debug entry
-
-Return Value: Handle for generated debug area   
-              NULL if register failed 
-
-Description:  Allocates memory for a debug log     
-              Must not be called within an interrupt handler 
-
-----------------------------------------------------------------------------
-debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
-				  int buf_size, mode_t mode, uid_t uid,
-				  gid_t gid);
-
-Parameter:    name:	   Name of debug log (e.g. used for debugfs entry)
-	      pages:	   Number of pages, which will be allocated per area
-	      nr_areas:    Number of debug areas
-	      buf_size:    Size of data area in each debug entry
-	      mode:	   File mode for debugfs files. E.g. S_IRWXUGO
-	      uid:	   User ID for debugfs files. Currently only 0 is
-			   supported.
-	      gid:	   Group ID for debugfs files. Currently only 0 is
-			   supported.
-
-Return Value: Handle for generated debug area
-	      NULL if register failed
-
-Description:  Allocates memory for a debug log
-	      Must not be called within an interrupt handler
-
----------------------------------------------------------------------------
-void debug_unregister (debug_info_t * id);
-
-Parameter:     id:   handle for debug log  
-
-Return Value:  none 
-
-Description:   frees memory for a debug log and removes all registered debug
-	       views.
-               Must not be called within an interrupt handler 
-
----------------------------------------------------------------------------
-void debug_set_level (debug_info_t * id, int new_level);
-
-Parameter:     id:        handle for debug log  
-               new_level: new debug level 
-
-Return Value:  none 
-
-Description:   Sets new actual debug level if new_level is valid. 
-
----------------------------------------------------------------------------
-bool debug_level_enabled (debug_info_t * id, int level);
-
-Parameter:    id:	  handle for debug log
-	      level:	  debug level
-
-Return Value: True if level is less or equal to the current debug level.
-
-Description:  Returns true if debug events for the specified level would be
-	      logged. Otherwise returns false.
----------------------------------------------------------------------------
-void debug_stop_all(void);
-
-Parameter:     none
-
-Return Value:  none
-
-Description:   stops the debug feature if stopping is allowed. Currently
-               used in case of a kernel oops.
-
----------------------------------------------------------------------------
-debug_entry_t* debug_event (debug_info_t* id, int level, void* data, 
-                            int length);
-
-Parameter:     id:     handle for debug log  
-               level:  debug level           
-               data:   pointer to data for debug entry  
-               length: length of data in bytes       
-
-Return Value:  Address of written debug entry 
-
-Description:   writes debug entry to active debug area (if level <= actual 
-               debug level)    
-
----------------------------------------------------------------------------
-debug_entry_t* debug_int_event (debug_info_t * id, int level, 
-                                unsigned int data);
-debug_entry_t* debug_long_event(debug_info_t * id, int level,
-                                unsigned long data);
-
-Parameter:     id:     handle for debug log  
-               level:  debug level           
-               data:   integer value for debug entry           
-
-Return Value:  Address of written debug entry 
-
-Description:   writes debug entry to active debug area (if level <= actual 
-               debug level)    
-
----------------------------------------------------------------------------
-debug_entry_t* debug_text_event (debug_info_t * id, int level, 
-                                 const char* data);
-
-Parameter:     id:     handle for debug log  
-               level:  debug level           
-               data:   string for debug entry  
-
-Return Value:  Address of written debug entry 
-
-Description:   writes debug entry in ascii format to active debug area 
-               (if level <= actual debug level)     
-
----------------------------------------------------------------------------
-debug_entry_t* debug_sprintf_event (debug_info_t * id, int level, 
-                                    char* string,...);
-
-Parameter:     id:    handle for debug log 
-               level: debug level
-               string: format string for debug entry 
-               ...: varargs used as in sprintf()
-
-Return Value:  Address of written debug entry
-
-Description:   writes debug entry with format string and varargs (longs) to 
-               active debug area (if level $<=$ actual debug level). 
-               floats and long long datatypes cannot be used as varargs.
-
----------------------------------------------------------------------------
-
-debug_entry_t* debug_exception (debug_info_t* id, int level, void* data, 
-                                int length);
-
-Parameter:     id:     handle for debug log  
-               level:  debug level           
-               data:   pointer to data for debug entry  
-               length: length of data in bytes       
-
-Return Value:  Address of written debug entry 
-
-Description:   writes debug entry to active debug area (if level <= actual 
-               debug level) and switches to next debug area  
-
----------------------------------------------------------------------------
-debug_entry_t* debug_int_exception (debug_info_t * id, int level, 
-                                    unsigned int data);
-debug_entry_t* debug_long_exception(debug_info_t * id, int level,
-                                    unsigned long data);
-
-Parameter:     id:     handle for debug log  
-               level:  debug level           
-               data:   integer value for debug entry           
-
-Return Value:  Address of written debug entry 
-
-Description:   writes debug entry to active debug area (if level <= actual 
-               debug level) and switches to next debug area  
-
----------------------------------------------------------------------------
-debug_entry_t* debug_text_exception (debug_info_t * id, int level, 
-                                     const char* data);
-
-Parameter:     id:     handle for debug log  
-               level:  debug level           
-               data:   string for debug entry  
-
-Return Value:  Address of written debug entry 
-
-Description:   writes debug entry in ascii format to active debug area 
-               (if level <= actual debug level) and switches to next debug 
-               area  
-
----------------------------------------------------------------------------
-debug_entry_t* debug_sprintf_exception (debug_info_t * id, int level,
-                                        char* string,...);
-
-Parameter:     id:    handle for debug log  
-               level: debug level  
-               string: format string for debug entry  
-               ...: varargs used as in sprintf()
-
-Return Value:  Address of written debug entry 
-
-Description:   writes debug entry with format string and varargs (longs) to 
-               active debug area (if level $<=$ actual debug level) and
-               switches to next debug area. 
-               floats and long long datatypes cannot be used as varargs.
-
----------------------------------------------------------------------------
-
-int debug_register_view (debug_info_t * id, struct debug_view *view);
-
-Parameter:     id:    handle for debug log  
-               view:  pointer to debug view struct 
-
-Return Value:  0  : ok 
-               < 0: Error 
-
-Description:   registers new debug view and creates debugfs dir entry
-
----------------------------------------------------------------------------
-int debug_unregister_view (debug_info_t * id, struct debug_view *view); 
-
-Parameter:     id:    handle for debug log  
-               view:  pointer to debug view struct 
-
-Return Value:  0  : ok 
-               < 0: Error 
-
-Description:   unregisters debug view and removes debugfs dir entry
-
-
-
-Predefined views:
------------------
-
-extern struct debug_view debug_hex_ascii_view;
-extern struct debug_view debug_raw_view;
-extern struct debug_view debug_sprintf_view;
-
-Examples
---------
-
-/*
- * hex_ascii- + raw-view Example
- */
-
-#include <linux/init.h>
-#include <asm/debug.h>
-
-static debug_info_t* debug_info;
-
-static int init(void)
-{
-    /* register 4 debug areas with one page each and 4 byte data field */
-
-    debug_info = debug_register ("test", 1, 4, 4 );
-    debug_register_view(debug_info,&debug_hex_ascii_view);
-    debug_register_view(debug_info,&debug_raw_view);
-
-    debug_text_event(debug_info, 4 , "one ");
-    debug_int_exception(debug_info, 4, 4711);
-    debug_event(debug_info, 3, &debug_info, 4);
-
-    return 0;
-}
-
-static void cleanup(void)
-{
-    debug_unregister (debug_info);
-}
-
-module_init(init);
-module_exit(cleanup);
-
----------------------------------------------------------------------------
-
-/*
- * sprintf-view Example
- */
-
-#include <linux/init.h>
-#include <asm/debug.h>
-
-static debug_info_t* debug_info;
-
-static int init(void)
-{
-    /* register 4 debug areas with one page each and data field for */
-    /* format string pointer + 2 varargs (= 3 * sizeof(long))       */
-
-    debug_info = debug_register ("test", 1, 4, sizeof(long) * 3);
-    debug_register_view(debug_info,&debug_sprintf_view);
-
-    debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
-    debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
-
-    return 0;
-}
-
-static void cleanup(void)
-{
-    debug_unregister (debug_info);
-}
-
-module_init(init);
-module_exit(cleanup);
-
-
-
-Debugfs Interface
-----------------
-Views to the debug logs can be investigated through reading the corresponding 
-debugfs-files:
-
-Example:
-
-> ls /sys/kernel/debug/s390dbf/dasd
-flush  hex_ascii  level pages raw
-> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
-00 00974733272:680099 2 - 02 0006ad7e  07 ea 4a 90 | ....
-00 00974733272:682210 2 - 02 0006ade6  46 52 45 45 | FREE
-00 00974733272:682213 2 - 02 0006adf6  07 ea 4a 90 | ....
-00 00974733272:682281 1 * 02 0006ab08  41 4c 4c 43 | EXCP 
-01 00974733272:682284 2 - 02 0006ab16  45 43 4b 44 | ECKD
-01 00974733272:682287 2 - 02 0006ab28  00 00 00 04 | ....
-01 00974733272:682289 2 - 02 0006ab3e  00 00 00 20 | ... 
-01 00974733272:682297 2 - 02 0006ad7e  07 ea 4a 90 | ....
-01 00974733272:684384 2 - 00 0006ade6  46 52 45 45 | FREE
-01 00974733272:684388 2 - 00 0006adf6  07 ea 4a 90 | ....
-
-See section about predefined views for explanation of the above output!
-
-Changing the debug level
-------------------------
-
-Example:
-
-
-> cat /sys/kernel/debug/s390dbf/dasd/level
-3
-> echo "5" > /sys/kernel/debug/s390dbf/dasd/level
-> cat /sys/kernel/debug/s390dbf/dasd/level
-5
-
-Flushing debug areas
---------------------
-Debug areas can be flushed with piping the number of the desired
-area (0...n) to the debugfs file "flush". When using "-" all debug areas
-are flushed.
-
-Examples:
-
-1. Flush debug area 0:
-> echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
-
-2. Flush all debug areas:
-> echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
-
-Changing the size of debug areas
-------------------------------------
-It is possible the change the size of debug areas through piping
-the number of pages to the debugfs file "pages". The resize request will
-also flush the debug areas.
-
-Example:
-
-Define 4 pages for the debug areas of debug feature "dasd":
-> echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
-
-Stooping the debug feature
---------------------------
-Example:
-
-1. Check if stopping is allowed
-> cat /proc/sys/s390dbf/debug_stoppable
-2. Stop debug feature
-> echo 0 > /proc/sys/s390dbf/debug_active
-
-lcrash Interface
-----------------
-It is planned that the dump analysis tool lcrash gets an additional command
-'s390dbf' to display all the debug logs. With this tool it will be possible 
-to investigate the debug logs on a live system and with a memory dump after 
-a system crash.
-
-Investigating raw memory
-------------------------
-One last possibility to investigate the debug logs at a live
-system and after a system crash is to look at the raw memory
-under VM or at the Service Element.
-It is possible to find the anker of the debug-logs through
-the 'debug_area_first' symbol in the System map. Then one has
-to follow the correct pointers of the data-structures defined
-in debug.h and find the debug-areas in memory.
-Normally modules which use the debug feature will also have
-a global variable with the pointer to the debug-logs. Following
-this pointer it will also be possible to find the debug logs in
-memory.
-
-For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
-for the length of the data field in debug_register() in
-order to see the debug entries well formatted.
-
-
-Predefined Views
-----------------
-
-There are three predefined views: hex_ascii, raw and sprintf. 
-The hex_ascii view shows the data field in hex and ascii representation 
-(e.g. '45 43 4b 44 | ECKD'). 
-The raw view returns a bytestream as the debug areas are stored in memory.
-
-The sprintf view formats the debug entries in the same way as the sprintf
-function would do. The sprintf event/exception functions write to the
-debug entry a pointer to the format string (size = sizeof(long)) 
-and for each vararg a long value. So e.g. for a debug entry with a format 
-string plus two varargs one would need to allocate a (3 * sizeof(long)) 
-byte data area in the debug_register() function.
-
-IMPORTANT: Using "%s" in sprintf event functions is dangerous. You can only
-use "%s" in the sprintf event functions, if the memory for the passed string is
-available as long as the debug feature exists. The reason behind this is that
-due to performance considerations only a pointer to the string is stored in
-the debug feature. If you log a string that is freed afterwards, you will get
-an OOPS when inspecting the debug feature, because then the debug feature will
-access the already freed memory.
-
-NOTE: If using the sprintf view do NOT use other event/exception functions
-than the sprintf-event and -exception functions.
-
-The format of the hex_ascii and sprintf view is as follows:
-- Number of area
-- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated 
-  Universal Time (UTC), January 1, 1970)
-- level of debug entry
-- Exception flag (* = Exception)
-- Cpu-Number of calling task
-- Return Address to caller
-- data field
-
-The format of the raw view is:
-- Header as described in debug.h
-- datafield 
-
-A typical line of the hex_ascii view will look like the following (first line 
-is only for explanation and will not be displayed when 'cating' the view):
-
-area  time           level exception cpu caller    data (hex + ascii)
---------------------------------------------------------------------------
-00    00964419409:440690 1 -         00  88023fe   
-
-
-Defining views
---------------
-
-Views are specified with the 'debug_view' structure. There are defined
-callback functions which are used for reading and writing the debugfs files:
-
-struct debug_view {
-        char name[DEBUG_MAX_PROCF_LEN];  
-        debug_prolog_proc_t* prolog_proc; 
-        debug_header_proc_t* header_proc;
-        debug_format_proc_t* format_proc;
-        debug_input_proc_t*  input_proc;
-	void*                private_data;
-};
-
-where
-
-typedef int (debug_header_proc_t) (debug_info_t* id,
-                                   struct debug_view* view,
-                                   int area,
-                                   debug_entry_t* entry,
-                                   char* out_buf);
-
-typedef int (debug_format_proc_t) (debug_info_t* id,
-                                   struct debug_view* view, char* out_buf,
-                                   const char* in_buf);
-typedef int (debug_prolog_proc_t) (debug_info_t* id,
-                                   struct debug_view* view,
-                                   char* out_buf);
-typedef int (debug_input_proc_t) (debug_info_t* id,
-                                  struct debug_view* view,
-                                  struct file* file, const char* user_buf,
-                                  size_t in_buf_size, loff_t* offset);
-
-
-The "private_data" member can be used as pointer to view specific data.
-It is not used by the debug feature itself.
-
-The output when reading a debugfs file is structured like this:
-
-"prolog_proc output"
-
-"header_proc output 1"  "format_proc output 1"
-"header_proc output 2"  "format_proc output 2"
-"header_proc output 3"  "format_proc output 3"
-...
-
-When a view is read from the debugfs, the Debug Feature calls the
-'prolog_proc' once for writing the prolog.
-Then 'header_proc' and 'format_proc' are called for each 
-existing debug entry.
-
-The input_proc can be used to implement functionality when it is written to 
-the view (e.g. like with 'echo "0" > /sys/kernel/debug/s390dbf/dasd/level).
-
-For header_proc there can be used the default function
-debug_dflt_header_fn() which is defined in debug.h.
-and which produces the same header output as the predefined views.
-E.g:
-00 00964419409:440761 2 - 00 88023ec
-
-In order to see how to use the callback functions check the implementation
-of the default views!
-
-Example
-
-#include <asm/debug.h>
-
-#define UNKNOWNSTR "data: %08x"
-
-const char* messages[] =
-{"This error...........\n",
- "That error...........\n",
- "Problem..............\n",
- "Something went wrong.\n",
- "Everything ok........\n",
- NULL
-};
-
-static int debug_test_format_fn(
-   debug_info_t * id, struct debug_view *view, 
-   char *out_buf, const char *in_buf
-)
-{
-  int i, rc = 0;
-
-  if(id->buf_size >= 4) {
-     int msg_nr = *((int*)in_buf);
-     if(msg_nr < sizeof(messages)/sizeof(char*) - 1)
-        rc += sprintf(out_buf, "%s", messages[msg_nr]);	
-     else
-        rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
-  }
- out:
-   return rc;
-}
-
-struct debug_view debug_test_view = {
-  "myview",                 /* name of view */
-  NULL,                     /* no prolog */
-  &debug_dflt_header_fn,    /* default header for each entry */
-  &debug_test_format_fn,    /* our own format function */
-  NULL,                     /* no input function */
-  NULL                      /* no private data */
-};
-
-=====
-test:
-=====
-debug_info_t *debug_info;
-...
-debug_info = debug_register ("test", 0, 4, 4 ));
-debug_register_view(debug_info, &debug_test_view);
-for(i = 0; i < 10; i ++) debug_int_event(debug_info, 1, i);
-
-> cat /sys/kernel/debug/s390dbf/test/myview
-00 00964419734:611402 1 - 00 88042ca   This error...........
-00 00964419734:611405 1 - 00 88042ca   That error...........
-00 00964419734:611408 1 - 00 88042ca   Problem..............
-00 00964419734:611411 1 - 00 88042ca   Something went wrong.
-00 00964419734:611414 1 - 00 88042ca   Everything ok........
-00 00964419734:611417 1 - 00 88042ca   data: 00000005
-00 00964419734:611419 1 - 00 88042ca   data: 00000006
-00 00964419734:611422 1 - 00 88042ca   data: 00000007
-00 00964419734:611425 1 - 00 88042ca   data: 00000008
-00 00964419734:611428 1 - 00 88042ca   data: 00000009
diff --git a/Documentation/s390/text_files.rst b/Documentation/s390/text_files.rst
new file mode 100644
index 000000000000..c94d05d4fa17
--- /dev/null
+++ b/Documentation/s390/text_files.rst
@@ -0,0 +1,11 @@
+ibm 3270 changelog
+------------------
+
+.. include:: 3270.ChangeLog
+    :literal:
+
+ibm 3270 config3270.sh
+----------------------
+
+.. literalinclude:: config3270.sh
+    :language: shell
diff --git a/Documentation/s390/vfio-ap.rst b/Documentation/s390/vfio-ap.rst
new file mode 100644
index 000000000000..b5c51f7c748d
--- /dev/null
+++ b/Documentation/s390/vfio-ap.rst
@@ -0,0 +1,866 @@
+===============================
+Adjunct Processor (AP) facility
+===============================
+
+
+Introduction
+============
+The Adjunct Processor (AP) facility is an IBM Z cryptographic facility comprised
+of three AP instructions and from 1 up to 256 PCIe cryptographic adapter cards.
+The AP devices provide cryptographic functions to all CPUs assigned to a
+linux system running in an IBM Z system LPAR.
+
+The AP adapter cards are exposed via the AP bus. The motivation for vfio-ap
+is to make AP cards available to KVM guests using the VFIO mediated device
+framework. This implementation relies considerably on the s390 virtualization
+facilities which do most of the hard work of providing direct access to AP
+devices.
+
+AP Architectural Overview
+=========================
+To facilitate the comprehension of the design, let's start with some
+definitions:
+
+* AP adapter
+
+  An AP adapter is an IBM Z adapter card that can perform cryptographic
+  functions. There can be from 0 to 256 adapters assigned to an LPAR. Adapters
+  assigned to the LPAR in which a linux host is running will be available to
+  the linux host. Each adapter is identified by a number from 0 to 255; however,
+  the maximum adapter number is determined by machine model and/or adapter type.
+  When installed, an AP adapter is accessed by AP instructions executed by any
+  CPU.
+
+  The AP adapter cards are assigned to a given LPAR via the system's Activation
+  Profile which can be edited via the HMC. When the linux host system is IPL'd
+  in the LPAR, the AP bus detects the AP adapter cards assigned to the LPAR and
+  creates a sysfs device for each assigned adapter. For example, if AP adapters
+  4 and 10 (0x0a) are assigned to the LPAR, the AP bus will create the following
+  sysfs device entries::
+
+    /sys/devices/ap/card04
+    /sys/devices/ap/card0a
+
+  Symbolic links to these devices will also be created in the AP bus devices
+  sub-directory::
+
+    /sys/bus/ap/devices/[card04]
+    /sys/bus/ap/devices/[card04]
+
+* AP domain
+
+  An adapter is partitioned into domains. An adapter can hold up to 256 domains
+  depending upon the adapter type and hardware configuration. A domain is
+  identified by a number from 0 to 255; however, the maximum domain number is
+  determined by machine model and/or adapter type.. A domain can be thought of
+  as a set of hardware registers and memory used for processing AP commands. A
+  domain can be configured with a secure private key used for clear key
+  encryption. A domain is classified in one of two ways depending upon how it
+  may be accessed:
+
+    * Usage domains are domains that are targeted by an AP instruction to
+      process an AP command.
+
+    * Control domains are domains that are changed by an AP command sent to a
+      usage domain; for example, to set the secure private key for the control
+      domain.
+
+  The AP usage and control domains are assigned to a given LPAR via the system's
+  Activation Profile which can be edited via the HMC. When a linux host system
+  is IPL'd in the LPAR, the AP bus module detects the AP usage and control
+  domains assigned to the LPAR. The domain number of each usage domain and
+  adapter number of each AP adapter are combined to create AP queue devices
+  (see AP Queue section below). The domain number of each control domain will be
+  represented in a bitmask and stored in a sysfs file
+  /sys/bus/ap/ap_control_domain_mask. The bits in the mask, from most to least
+  significant bit, correspond to domains 0-255.
+
+* AP Queue
+
+  An AP queue is the means by which an AP command is sent to a usage domain
+  inside a specific adapter. An AP queue is identified by a tuple
+  comprised of an AP adapter ID (APID) and an AP queue index (APQI). The
+  APQI corresponds to a given usage domain number within the adapter. This tuple
+  forms an AP Queue Number (APQN) uniquely identifying an AP queue. AP
+  instructions include a field containing the APQN to identify the AP queue to
+  which the AP command is to be sent for processing.
+
+  The AP bus will create a sysfs device for each APQN that can be derived from
+  the cross product of the AP adapter and usage domain numbers detected when the
+  AP bus module is loaded. For example, if adapters 4 and 10 (0x0a) and usage
+  domains 6 and 71 (0x47) are assigned to the LPAR, the AP bus will create the
+  following sysfs entries::
+
+    /sys/devices/ap/card04/04.0006
+    /sys/devices/ap/card04/04.0047
+    /sys/devices/ap/card0a/0a.0006
+    /sys/devices/ap/card0a/0a.0047
+
+  The following symbolic links to these devices will be created in the AP bus
+  devices subdirectory::
+
+    /sys/bus/ap/devices/[04.0006]
+    /sys/bus/ap/devices/[04.0047]
+    /sys/bus/ap/devices/[0a.0006]
+    /sys/bus/ap/devices/[0a.0047]
+
+* AP Instructions:
+
+  There are three AP instructions:
+
+  * NQAP: to enqueue an AP command-request message to a queue
+  * DQAP: to dequeue an AP command-reply message from a queue
+  * PQAP: to administer the queues
+
+  AP instructions identify the domain that is targeted to process the AP
+  command; this must be one of the usage domains. An AP command may modify a
+  domain that is not one of the usage domains, but the modified domain
+  must be one of the control domains.
+
+AP and SIE
+==========
+Let's now take a look at how AP instructions executed on a guest are interpreted
+by the hardware.
+
+A satellite control block called the Crypto Control Block (CRYCB) is attached to
+our main hardware virtualization control block. The CRYCB contains three fields
+to identify the adapters, usage domains and control domains assigned to the KVM
+guest:
+
+* The AP Mask (APM) field is a bit mask that identifies the AP adapters assigned
+  to the KVM guest. Each bit in the mask, from left to right (i.e. from most
+  significant to least significant bit in big endian order), corresponds to
+  an APID from 0-255. If a bit is set, the corresponding adapter is valid for
+  use by the KVM guest.
+
+* The AP Queue Mask (AQM) field is a bit mask identifying the AP usage domains
+  assigned to the KVM guest. Each bit in the mask, from left to right (i.e. from
+  most significant to least significant bit in big endian order), corresponds to
+  an AP queue index (APQI) from 0-255. If a bit is set, the corresponding queue
+  is valid for use by the KVM guest.
+
+* The AP Domain Mask field is a bit mask that identifies the AP control domains
+  assigned to the KVM guest. The ADM bit mask controls which domains can be
+  changed by an AP command-request message sent to a usage domain from the
+  guest. Each bit in the mask, from left to right (i.e. from most significant to
+  least significant bit in big endian order), corresponds to a domain from
+  0-255. If a bit is set, the corresponding domain can be modified by an AP
+  command-request message sent to a usage domain.
+
+If you recall from the description of an AP Queue, AP instructions include
+an APQN to identify the AP queue to which an AP command-request message is to be
+sent (NQAP and PQAP instructions), or from which a command-reply message is to
+be received (DQAP instruction). The validity of an APQN is defined by the matrix
+calculated from the APM and AQM; it is the cross product of all assigned adapter
+numbers (APM) with all assigned queue indexes (AQM). For example, if adapters 1
+and 2 and usage domains 5 and 6 are assigned to a guest, the APQNs (1,5), (1,6),
+(2,5) and (2,6) will be valid for the guest.
+
+The APQNs can provide secure key functionality - i.e., a private key is stored
+on the adapter card for each of its domains - so each APQN must be assigned to
+at most one guest or to the linux host::
+
+   Example 1: Valid configuration:
+   ------------------------------
+   Guest1: adapters 1,2  domains 5,6
+   Guest2: adapter  1,2  domain 7
+
+   This is valid because both guests have a unique set of APQNs:
+      Guest1 has APQNs (1,5), (1,6), (2,5), (2,6);
+      Guest2 has APQNs (1,7), (2,7)
+
+   Example 2: Valid configuration:
+   ------------------------------
+   Guest1: adapters 1,2 domains 5,6
+   Guest2: adapters 3,4 domains 5,6
+
+   This is also valid because both guests have a unique set of APQNs:
+      Guest1 has APQNs (1,5), (1,6), (2,5), (2,6);
+      Guest2 has APQNs (3,5), (3,6), (4,5), (4,6)
+
+   Example 3: Invalid configuration:
+   --------------------------------
+   Guest1: adapters 1,2  domains 5,6
+   Guest2: adapter  1    domains 6,7
+
+   This is an invalid configuration because both guests have access to
+   APQN (1,6).
+
+The Design
+==========
+The design introduces three new objects:
+
+1. AP matrix device
+2. VFIO AP device driver (vfio_ap.ko)
+3. VFIO AP mediated matrix pass-through device
+
+The VFIO AP device driver
+-------------------------
+The VFIO AP (vfio_ap) device driver serves the following purposes:
+
+1. Provides the interfaces to secure APQNs for exclusive use of KVM guests.
+
+2. Sets up the VFIO mediated device interfaces to manage a mediated matrix
+   device and creates the sysfs interfaces for assigning adapters, usage
+   domains, and control domains comprising the matrix for a KVM guest.
+
+3. Configures the APM, AQM and ADM in the CRYCB referenced by a KVM guest's
+   SIE state description to grant the guest access to a matrix of AP devices
+
+Reserve APQNs for exclusive use of KVM guests
+---------------------------------------------
+The following block diagram illustrates the mechanism by which APQNs are
+reserved::
+
+				+------------------+
+		 7 remove       |                  |
+	   +--------------------> cex4queue driver |
+	   |                    |                  |
+	   |                    +------------------+
+	   |
+	   |
+	   |                    +------------------+          +----------------+
+	   |  5 register driver |                  | 3 create |                |
+	   |   +---------------->   Device core    +---------->  matrix device |
+	   |   |                |                  |          |                |
+	   |   |                +--------^---------+          +----------------+
+	   |   |                         |
+	   |   |                         +-------------------+
+	   |   | +-----------------------------------+       |
+	   |   | |      4 register AP driver         |       | 2 register device
+	   |   | |                                   |       |
+  +--------+---+-v---+                      +--------+-------+-+
+  |                  |                      |                  |
+  |      ap_bus      +--------------------- >  vfio_ap driver  |
+  |                  |       8 probe        |                  |
+  +--------^---------+                      +--^--^------------+
+  6 edit   |                                   |  |
+    apmask |     +-----------------------------+  | 9 mdev create
+    aqmask |     |           1 modprobe           |
+  +--------+-----+---+           +----------------+-+         +----------------+
+  |                  |           |                  |8 create |     mediated   |
+  |      admin       |           | VFIO device core |--------->     matrix     |
+  |                  +           |                  |         |     device     |
+  +------+-+---------+           +--------^---------+         +--------^-------+
+	 | |                              |                            |
+	 | | 9 create vfio_ap-passthrough |                            |
+	 | +------------------------------+                            |
+	 +-------------------------------------------------------------+
+		     10  assign adapter/domain/control domain
+
+The process for reserving an AP queue for use by a KVM guest is:
+
+1. The administrator loads the vfio_ap device driver
+2. The vfio-ap driver during its initialization will register a single 'matrix'
+   device with the device core. This will serve as the parent device for
+   all mediated matrix devices used to configure an AP matrix for a guest.
+3. The /sys/devices/vfio_ap/matrix device is created by the device core
+4. The vfio_ap device driver will register with the AP bus for AP queue devices
+   of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap
+   driver's probe and remove callback interfaces. Devices older than CEX4 queues
+   are not supported to simplify the implementation by not needlessly
+   complicating the design by supporting older devices that will go out of
+   service in the relatively near future, and for which there are few older
+   systems around on which to test.
+5. The AP bus registers the vfio_ap device driver with the device core
+6. The administrator edits the AP adapter and queue masks to reserve AP queues
+   for use by the vfio_ap device driver.
+7. The AP bus removes the AP queues reserved for the vfio_ap driver from the
+   default zcrypt cex4queue driver.
+8. The AP bus probes the vfio_ap device driver to bind the queues reserved for
+   it.
+9. The administrator creates a passthrough type mediated matrix device to be
+   used by a guest
+10. The administrator assigns the adapters, usage domains and control domains
+    to be exclusively used by a guest.
+
+Set up the VFIO mediated device interfaces
+------------------------------------------
+The VFIO AP device driver utilizes the common interface of the VFIO mediated
+device core driver to:
+
+* Register an AP mediated bus driver to add a mediated matrix device to and
+  remove it from a VFIO group.
+* Create and destroy a mediated matrix device
+* Add a mediated matrix device to and remove it from the AP mediated bus driver
+* Add a mediated matrix device to and remove it from an IOMMU group
+
+The following high-level block diagram shows the main components and interfaces
+of the VFIO AP mediated matrix device driver::
+
+   +-------------+
+   |             |
+   | +---------+ | mdev_register_driver() +--------------+
+   | |  Mdev   | +<-----------------------+              |
+   | |  bus    | |                        | vfio_mdev.ko |
+   | | driver  | +----------------------->+              |<-> VFIO user
+   | +---------+ |    probe()/remove()    +--------------+    APIs
+   |             |
+   |  MDEV CORE  |
+   |   MODULE    |
+   |   mdev.ko   |
+   | +---------+ | mdev_register_device() +--------------+
+   | |Physical | +<-----------------------+              |
+   | | device  | |                        |  vfio_ap.ko  |<-> matrix
+   | |interface| +----------------------->+              |    device
+   | +---------+ |       callback         +--------------+
+   +-------------+
+
+During initialization of the vfio_ap module, the matrix device is registered
+with an 'mdev_parent_ops' structure that provides the sysfs attribute
+structures, mdev functions and callback interfaces for managing the mediated
+matrix device.
+
+* sysfs attribute structures:
+
+  supported_type_groups
+    The VFIO mediated device framework supports creation of user-defined
+    mediated device types. These mediated device types are specified
+    via the 'supported_type_groups' structure when a device is registered
+    with the mediated device framework. The registration process creates the
+    sysfs structures for each mediated device type specified in the
+    'mdev_supported_types' sub-directory of the device being registered. Along
+    with the device type, the sysfs attributes of the mediated device type are
+    provided.
+
+    The VFIO AP device driver will register one mediated device type for
+    passthrough devices:
+
+      /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough
+
+    Only the read-only attributes required by the VFIO mdev framework will
+    be provided::
+
+	... name
+	... device_api
+	... available_instances
+	... device_api
+
+    Where:
+
+	* name:
+	    specifies the name of the mediated device type
+	* device_api:
+	    the mediated device type's API
+	* available_instances:
+	    the number of mediated matrix passthrough devices
+	    that can be created
+	* device_api:
+	    specifies the VFIO API
+  mdev_attr_groups
+    This attribute group identifies the user-defined sysfs attributes of the
+    mediated device. When a device is registered with the VFIO mediated device
+    framework, the sysfs attribute files identified in the 'mdev_attr_groups'
+    structure will be created in the mediated matrix device's directory. The
+    sysfs attributes for a mediated matrix device are:
+
+    assign_adapter / unassign_adapter:
+      Write-only attributes for assigning/unassigning an AP adapter to/from the
+      mediated matrix device. To assign/unassign an adapter, the APID of the
+      adapter is echoed to the respective attribute file.
+    assign_domain / unassign_domain:
+      Write-only attributes for assigning/unassigning an AP usage domain to/from
+      the mediated matrix device. To assign/unassign a domain, the domain
+      number of the the usage domain is echoed to the respective attribute
+      file.
+    matrix:
+      A read-only file for displaying the APQNs derived from the cross product
+      of the adapter and domain numbers assigned to the mediated matrix device.
+    assign_control_domain / unassign_control_domain:
+      Write-only attributes for assigning/unassigning an AP control domain
+      to/from the mediated matrix device. To assign/unassign a control domain,
+      the ID of the domain to be assigned/unassigned is echoed to the respective
+      attribute file.
+    control_domains:
+      A read-only file for displaying the control domain numbers assigned to the
+      mediated matrix device.
+
+* functions:
+
+  create:
+    allocates the ap_matrix_mdev structure used by the vfio_ap driver to:
+
+    * Store the reference to the KVM structure for the guest using the mdev
+    * Store the AP matrix configuration for the adapters, domains, and control
+      domains assigned via the corresponding sysfs attributes files
+
+  remove:
+    deallocates the mediated matrix device's ap_matrix_mdev structure. This will
+    be allowed only if a running guest is not using the mdev.
+
+* callback interfaces
+
+  open:
+    The vfio_ap driver uses this callback to register a
+    VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the mdev matrix
+    device. The open is invoked when QEMU connects the VFIO iommu group
+    for the mdev matrix device to the MDEV bus. Access to the KVM structure used
+    to configure the KVM guest is provided via this callback. The KVM structure,
+    is used to configure the guest's access to the AP matrix defined via the
+    mediated matrix device's sysfs attribute files.
+  release:
+    unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the
+    mdev matrix device and deconfigures the guest's AP matrix.
+
+Configure the APM, AQM and ADM in the CRYCB
+-------------------------------------------
+Configuring the AP matrix for a KVM guest will be performed when the
+VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier
+function is called when QEMU connects to KVM. The guest's AP matrix is
+configured via it's CRYCB by:
+
+* Setting the bits in the APM corresponding to the APIDs assigned to the
+  mediated matrix device via its 'assign_adapter' interface.
+* Setting the bits in the AQM corresponding to the domains assigned to the
+  mediated matrix device via its 'assign_domain' interface.
+* Setting the bits in the ADM corresponding to the domain dIDs assigned to the
+  mediated matrix device via its 'assign_control_domains' interface.
+
+The CPU model features for AP
+-----------------------------
+The AP stack relies on the presence of the AP instructions as well as two
+facilities: The AP Facilities Test (APFT) facility; and the AP Query
+Configuration Information (QCI) facility. These features/facilities are made
+available to a KVM guest via the following CPU model features:
+
+1. ap: Indicates whether the AP instructions are installed on the guest. This
+   feature will be enabled by KVM only if the AP instructions are installed
+   on the host.
+
+2. apft: Indicates the APFT facility is available on the guest. This facility
+   can be made available to the guest only if it is available on the host (i.e.,
+   facility bit 15 is set).
+
+3. apqci: Indicates the AP QCI facility is available on the guest. This facility
+   can be made available to the guest only if it is available on the host (i.e.,
+   facility bit 12 is set).
+
+Note: If the user chooses to specify a CPU model different than the 'host'
+model to QEMU, the CPU model features and facilities need to be turned on
+explicitly; for example::
+
+     /usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on
+
+A guest can be precluded from using AP features/facilities by turning them off
+explicitly; for example::
+
+     /usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off
+
+Note: If the APFT facility is turned off (apft=off) for the guest, the guest
+will not see any AP devices. The zcrypt device drivers that register for type 10
+and newer AP devices - i.e., the cex4card and cex4queue device drivers - need
+the APFT facility to ascertain the facilities installed on a given AP device. If
+the APFT facility is not installed on the guest, then the probe of device
+drivers will fail since only type 10 and newer devices can be configured for
+guest use.
+
+Example
+=======
+Let's now provide an example to illustrate how KVM guests may be given
+access to AP facilities. For this example, we will show how to configure
+three guests such that executing the lszcrypt command on the guests would
+look like this:
+
+Guest1
+------
+=========== ===== ============
+CARD.DOMAIN TYPE  MODE
+=========== ===== ============
+05          CEX5C CCA-Coproc
+05.0004     CEX5C CCA-Coproc
+05.00ab     CEX5C CCA-Coproc
+06          CEX5A Accelerator
+06.0004     CEX5A Accelerator
+06.00ab     CEX5C CCA-Coproc
+=========== ===== ============
+
+Guest2
+------
+=========== ===== ============
+CARD.DOMAIN TYPE  MODE
+=========== ===== ============
+05          CEX5A Accelerator
+05.0047     CEX5A Accelerator
+05.00ff     CEX5A Accelerator
+=========== ===== ============
+
+Guest2
+------
+=========== ===== ============
+CARD.DOMAIN TYPE  MODE
+=========== ===== ============
+06          CEX5A Accelerator
+06.0047     CEX5A Accelerator
+06.00ff     CEX5A Accelerator
+=========== ===== ============
+
+These are the steps:
+
+1. Install the vfio_ap module on the linux host. The dependency chain for the
+   vfio_ap module is:
+   * iommu
+   * s390
+   * zcrypt
+   * vfio
+   * vfio_mdev
+   * vfio_mdev_device
+   * KVM
+
+   To build the vfio_ap module, the kernel build must be configured with the
+   following Kconfig elements selected:
+   * IOMMU_SUPPORT
+   * S390
+   * ZCRYPT
+   * S390_AP_IOMMU
+   * VFIO
+   * VFIO_MDEV
+   * VFIO_MDEV_DEVICE
+   * KVM
+
+   If using make menuconfig select the following to build the vfio_ap module::
+
+     -> Device Drivers
+	-> IOMMU Hardware Support
+	   select S390 AP IOMMU Support
+	-> VFIO Non-Privileged userspace driver framework
+	   -> Mediated device driver frramework
+	      -> VFIO driver for Mediated devices
+     -> I/O subsystem
+	-> VFIO support for AP devices
+
+2. Secure the AP queues to be used by the three guests so that the host can not
+   access them. To secure them, there are two sysfs files that specify
+   bitmasks marking a subset of the APQN range as 'usable by the default AP
+   queue device drivers' or 'not usable by the default device drivers' and thus
+   available for use by the vfio_ap device driver'. The location of the sysfs
+   files containing the masks are::
+
+     /sys/bus/ap/apmask
+     /sys/bus/ap/aqmask
+
+   The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
+   (APID). Each bit in the mask, from left to right (i.e., from most significant
+   to least significant bit in big endian order), corresponds to an APID from
+   0-255. If a bit is set, the APID is marked as usable only by the default AP
+   queue device drivers; otherwise, the APID is usable by the vfio_ap
+   device driver.
+
+   The 'aqmask' is a 256-bit mask that identifies a set of AP queue indexes
+   (APQI). Each bit in the mask, from left to right (i.e., from most significant
+   to least significant bit in big endian order), corresponds to an APQI from
+   0-255. If a bit is set, the APQI is marked as usable only by the default AP
+   queue device drivers; otherwise, the APQI is usable by the vfio_ap device
+   driver.
+
+   Take, for example, the following mask::
+
+      0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+
+    It indicates:
+
+      1, 2, 3, 4, 5, and 7-255 belong to the default drivers' pool, and 0 and 6
+      belong to the vfio_ap device driver's pool.
+
+   The APQN of each AP queue device assigned to the linux host is checked by the
+   AP bus against the set of APQNs derived from the cross product of APIDs
+   and APQIs marked as usable only by the default AP queue device drivers. If a
+   match is detected,  only the default AP queue device drivers will be probed;
+   otherwise, the vfio_ap device driver will be probed.
+
+   By default, the two masks are set to reserve all APQNs for use by the default
+   AP queue device drivers. There are two ways the default masks can be changed:
+
+   1. The sysfs mask files can be edited by echoing a string into the
+      respective sysfs mask file in one of two formats:
+
+      * An absolute hex string starting with 0x - like "0x12345678" - sets
+	the mask. If the given string is shorter than the mask, it is padded
+	with 0s on the right; for example, specifying a mask value of 0x41 is
+	the same as specifying::
+
+	   0x4100000000000000000000000000000000000000000000000000000000000000
+
+	Keep in mind that the mask reads from left to right (i.e., most
+	significant to least significant bit in big endian order), so the mask
+	above identifies device numbers 1 and 7 (01000001).
+
+	If the string is longer than the mask, the operation is terminated with
+	an error (EINVAL).
+
+      * Individual bits in the mask can be switched on and off by specifying
+	each bit number to be switched in a comma separated list. Each bit
+	number string must be prepended with a ('+') or minus ('-') to indicate
+	the corresponding bit is to be switched on ('+') or off ('-'). Some
+	valid values are:
+
+	   - "+0"    switches bit 0 on
+	   - "-13"   switches bit 13 off
+	   - "+0x41" switches bit 65 on
+	   - "-0xff" switches bit 255 off
+
+	The following example:
+
+	      +0,-6,+0x47,-0xf0
+
+	Switches bits 0 and 71 (0x47) on
+
+	Switches bits 6 and 240 (0xf0) off
+
+	Note that the bits not specified in the list remain as they were before
+	the operation.
+
+   2. The masks can also be changed at boot time via parameters on the kernel
+      command line like this:
+
+	 ap.apmask=0xffff ap.aqmask=0x40
+
+	 This would create the following masks::
+
+	    apmask:
+	    0xffff000000000000000000000000000000000000000000000000000000000000
+
+	    aqmask:
+	    0x4000000000000000000000000000000000000000000000000000000000000000
+
+	 Resulting in these two pools::
+
+	    default drivers pool:    adapter 0-15, domain 1
+	    alternate drivers pool:  adapter 16-255, domains 0, 2-255
+
+Securing the APQNs for our example
+----------------------------------
+   To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047,
+   06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding
+   APQNs can either be removed from the default masks::
+
+      echo -5,-6 > /sys/bus/ap/apmask
+
+      echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask
+
+   Or the masks can be set as follows::
+
+      echo 0xf9ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff \
+      > apmask
+
+      echo 0xf7fffffffffffffffeffffffffffffffffffffffffeffffffffffffffffffffe \
+      > aqmask
+
+   This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004,
+   06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The
+   sysfs directory for the vfio_ap device driver will now contain symbolic links
+   to the AP queue devices bound to it::
+
+     /sys/bus/ap
+     ... [drivers]
+     ...... [vfio_ap]
+     ......... [05.0004]
+     ......... [05.0047]
+     ......... [05.00ab]
+     ......... [05.00ff]
+     ......... [06.0004]
+     ......... [06.0047]
+     ......... [06.00ab]
+     ......... [06.00ff]
+
+   Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later)
+   can be bound to the vfio_ap device driver. The reason for this is to
+   simplify the implementation by not needlessly complicating the design by
+   supporting older devices that will go out of service in the relatively near
+   future and for which there are few older systems on which to test.
+
+   The administrator, therefore, must take care to secure only AP queues that
+   can be bound to the vfio_ap device driver. The device type for a given AP
+   queue device can be read from the parent card's sysfs directory. For example,
+   to see the hardware type of the queue 05.0004:
+
+     cat /sys/bus/ap/devices/card05/hwtype
+
+   The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the
+   vfio_ap device driver.
+
+3. Create the mediated devices needed to configure the AP matrixes for the
+   three guests and to provide an interface to the vfio_ap driver for
+   use by the guests::
+
+     /sys/devices/vfio_ap/matrix/
+     --- [mdev_supported_types]
+     ------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
+     --------- create
+     --------- [devices]
+
+   To create the mediated devices for the three guests::
+
+	uuidgen > create
+	uuidgen > create
+	uuidgen > create
+
+	or
+
+	echo $uuid1 > create
+	echo $uuid2 > create
+	echo $uuid3 > create
+
+   This will create three mediated devices in the [devices] subdirectory named
+   after the UUID written to the create attribute file. We call them $uuid1,
+   $uuid2 and $uuid3 and this is the sysfs directory structure after creation::
+
+     /sys/devices/vfio_ap/matrix/
+     --- [mdev_supported_types]
+     ------ [vfio_ap-passthrough]
+     --------- [devices]
+     ------------ [$uuid1]
+     --------------- assign_adapter
+     --------------- assign_control_domain
+     --------------- assign_domain
+     --------------- matrix
+     --------------- unassign_adapter
+     --------------- unassign_control_domain
+     --------------- unassign_domain
+
+     ------------ [$uuid2]
+     --------------- assign_adapter
+     --------------- assign_control_domain
+     --------------- assign_domain
+     --------------- matrix
+     --------------- unassign_adapter
+     ----------------unassign_control_domain
+     ----------------unassign_domain
+
+     ------------ [$uuid3]
+     --------------- assign_adapter
+     --------------- assign_control_domain
+     --------------- assign_domain
+     --------------- matrix
+     --------------- unassign_adapter
+     ----------------unassign_control_domain
+     ----------------unassign_domain
+
+4. The administrator now needs to configure the matrixes for the mediated
+   devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
+
+   This is how the matrix is configured for Guest1::
+
+      echo 5 > assign_adapter
+      echo 6 > assign_adapter
+      echo 4 > assign_domain
+      echo 0xab > assign_domain
+
+   Control domains can similarly be assigned using the assign_control_domain
+   sysfs file.
+
+   If a mistake is made configuring an adapter, domain or control domain,
+   you can use the unassign_xxx files to unassign the adapter, domain or
+   control domain.
+
+   To display the matrix configuration for Guest1::
+
+	 cat matrix
+
+   This is how the matrix is configured for Guest2::
+
+      echo 5 > assign_adapter
+      echo 0x47 > assign_domain
+      echo 0xff > assign_domain
+
+   This is how the matrix is configured for Guest3::
+
+      echo 6 > assign_adapter
+      echo 0x47 > assign_domain
+      echo 0xff > assign_domain
+
+   In order to successfully assign an adapter:
+
+   * The adapter number specified must represent a value from 0 up to the
+     maximum adapter number configured for the system. If an adapter number
+     higher than the maximum is specified, the operation will terminate with
+     an error (ENODEV).
+
+   * All APQNs that can be derived from the adapter ID and the IDs of
+     the previously assigned domains must be bound to the vfio_ap device
+     driver. If no domains have yet been assigned, then there must be at least
+     one APQN with the specified APID bound to the vfio_ap driver. If no such
+     APQNs are bound to the driver, the operation will terminate with an
+     error (EADDRNOTAVAIL).
+
+     No APQN that can be derived from the adapter ID and the IDs of the
+     previously assigned domains can be assigned to another mediated matrix
+     device. If an APQN is assigned to another mediated matrix device, the
+     operation will terminate with an error (EADDRINUSE).
+
+   In order to successfully assign a domain:
+
+   * The domain number specified must represent a value from 0 up to the
+     maximum domain number configured for the system. If a domain number
+     higher than the maximum is specified, the operation will terminate with
+     an error (ENODEV).
+
+   * All APQNs that can be derived from the domain ID and the IDs of
+     the previously assigned adapters must be bound to the vfio_ap device
+     driver. If no domains have yet been assigned, then there must be at least
+     one APQN with the specified APQI bound to the vfio_ap driver. If no such
+     APQNs are bound to the driver, the operation will terminate with an
+     error (EADDRNOTAVAIL).
+
+     No APQN that can be derived from the domain ID and the IDs of the
+     previously assigned adapters can be assigned to another mediated matrix
+     device. If an APQN is assigned to another mediated matrix device, the
+     operation will terminate with an error (EADDRINUSE).
+
+   In order to successfully assign a control domain, the domain number
+   specified must represent a value from 0 up to the maximum domain number
+   configured for the system. If a control domain number higher than the maximum
+   is specified, the operation will terminate with an error (ENODEV).
+
+5. Start Guest1::
+
+     /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+	-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
+
+7. Start Guest2::
+
+     /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+	-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
+
+7. Start Guest3::
+
+     /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+	-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
+
+When the guest is shut down, the mediated matrix devices may be removed.
+
+Using our example again, to remove the mediated matrix device $uuid1::
+
+   /sys/devices/vfio_ap/matrix/
+      --- [mdev_supported_types]
+      ------ [vfio_ap-passthrough]
+      --------- [devices]
+      ------------ [$uuid1]
+      --------------- remove
+
+::
+
+   echo 1 > remove
+
+This will remove all of the mdev matrix device's sysfs structures including
+the mdev device itself. To recreate and reconfigure the mdev matrix device,
+all of the steps starting with step 3 will have to be performed again. Note
+that the remove will fail if a guest using the mdev is still running.
+
+It is not necessary to remove an mdev matrix device, but one may want to
+remove it if no guest will use it during the remaining lifetime of the linux
+host. If the mdev matrix device is removed, one may want to also reconfigure
+the pool of adapters and queues reserved for use by the default drivers.
+
+Limitations
+===========
+* The KVM/kernel interfaces do not provide a way to prevent restoring an APQN
+  to the default drivers pool of a queue that is still assigned to a mediated
+  device in use by a guest. It is incumbent upon the administrator to
+  ensure there is no mediated device in use by a guest to which the APQN is
+  assigned lest the host be given access to the private data of the AP queue
+  device such as a private key configured specifically for the guest.
+
+* Dynamically modifying the AP matrix for a running guest (which would amount to
+  hot(un)plug of AP devices for the guest) is currently not supported
+
+* Live guest migration is not supported for guests using AP devices.
diff --git a/Documentation/s390/vfio-ap.txt b/Documentation/s390/vfio-ap.txt
deleted file mode 100644
index 65167cfe4485..000000000000
--- a/Documentation/s390/vfio-ap.txt
+++ /dev/null
@@ -1,837 +0,0 @@
-Introduction:
-============
-The Adjunct Processor (AP) facility is an IBM Z cryptographic facility comprised
-of three AP instructions and from 1 up to 256 PCIe cryptographic adapter cards.
-The AP devices provide cryptographic functions to all CPUs assigned to a
-linux system running in an IBM Z system LPAR.
-
-The AP adapter cards are exposed via the AP bus. The motivation for vfio-ap
-is to make AP cards available to KVM guests using the VFIO mediated device
-framework. This implementation relies considerably on the s390 virtualization
-facilities which do most of the hard work of providing direct access to AP
-devices.
-
-AP Architectural Overview:
-=========================
-To facilitate the comprehension of the design, let's start with some
-definitions:
-
-* AP adapter
-
-  An AP adapter is an IBM Z adapter card that can perform cryptographic
-  functions. There can be from 0 to 256 adapters assigned to an LPAR. Adapters
-  assigned to the LPAR in which a linux host is running will be available to
-  the linux host. Each adapter is identified by a number from 0 to 255; however,
-  the maximum adapter number is determined by machine model and/or adapter type.
-  When installed, an AP adapter is accessed by AP instructions executed by any
-  CPU.
-
-  The AP adapter cards are assigned to a given LPAR via the system's Activation
-  Profile which can be edited via the HMC. When the linux host system is IPL'd
-  in the LPAR, the AP bus detects the AP adapter cards assigned to the LPAR and
-  creates a sysfs device for each assigned adapter. For example, if AP adapters
-  4 and 10 (0x0a) are assigned to the LPAR, the AP bus will create the following
-  sysfs device entries:
-
-    /sys/devices/ap/card04
-    /sys/devices/ap/card0a
-
-  Symbolic links to these devices will also be created in the AP bus devices
-  sub-directory:
-
-    /sys/bus/ap/devices/[card04]
-    /sys/bus/ap/devices/[card04]
-
-* AP domain
-
-  An adapter is partitioned into domains. An adapter can hold up to 256 domains
-  depending upon the adapter type and hardware configuration. A domain is
-  identified by a number from 0 to 255; however, the maximum domain number is
-  determined by machine model and/or adapter type.. A domain can be thought of
-  as a set of hardware registers and memory used for processing AP commands. A
-  domain can be configured with a secure private key used for clear key
-  encryption. A domain is classified in one of two ways depending upon how it
-  may be accessed:
-
-    * Usage domains are domains that are targeted by an AP instruction to
-      process an AP command.
-
-    * Control domains are domains that are changed by an AP command sent to a
-      usage domain; for example, to set the secure private key for the control
-      domain.
-
-  The AP usage and control domains are assigned to a given LPAR via the system's
-  Activation Profile which can be edited via the HMC. When a linux host system
-  is IPL'd in the LPAR, the AP bus module detects the AP usage and control
-  domains assigned to the LPAR. The domain number of each usage domain and
-  adapter number of each AP adapter are combined to create AP queue devices
-  (see AP Queue section below). The domain number of each control domain will be
-  represented in a bitmask and stored in a sysfs file
-  /sys/bus/ap/ap_control_domain_mask. The bits in the mask, from most to least
-  significant bit, correspond to domains 0-255.
-
-* AP Queue
-
-  An AP queue is the means by which an AP command is sent to a usage domain
-  inside a specific adapter. An AP queue is identified by a tuple
-  comprised of an AP adapter ID (APID) and an AP queue index (APQI). The
-  APQI corresponds to a given usage domain number within the adapter. This tuple
-  forms an AP Queue Number (APQN) uniquely identifying an AP queue. AP
-  instructions include a field containing the APQN to identify the AP queue to
-  which the AP command is to be sent for processing.
-
-  The AP bus will create a sysfs device for each APQN that can be derived from
-  the cross product of the AP adapter and usage domain numbers detected when the
-  AP bus module is loaded. For example, if adapters 4 and 10 (0x0a) and usage
-  domains 6 and 71 (0x47) are assigned to the LPAR, the AP bus will create the
-  following sysfs entries:
-
-    /sys/devices/ap/card04/04.0006
-    /sys/devices/ap/card04/04.0047
-    /sys/devices/ap/card0a/0a.0006
-    /sys/devices/ap/card0a/0a.0047
-
-  The following symbolic links to these devices will be created in the AP bus
-  devices subdirectory:
-
-    /sys/bus/ap/devices/[04.0006]
-    /sys/bus/ap/devices/[04.0047]
-    /sys/bus/ap/devices/[0a.0006]
-    /sys/bus/ap/devices/[0a.0047]
-
-* AP Instructions:
-
-  There are three AP instructions:
-
-  * NQAP: to enqueue an AP command-request message to a queue
-  * DQAP: to dequeue an AP command-reply message from a queue
-  * PQAP: to administer the queues
-
-  AP instructions identify the domain that is targeted to process the AP
-  command; this must be one of the usage domains. An AP command may modify a
-  domain that is not one of the usage domains, but the modified domain
-  must be one of the control domains.
-
-AP and SIE:
-==========
-Let's now take a look at how AP instructions executed on a guest are interpreted
-by the hardware.
-
-A satellite control block called the Crypto Control Block (CRYCB) is attached to
-our main hardware virtualization control block. The CRYCB contains three fields
-to identify the adapters, usage domains and control domains assigned to the KVM
-guest:
-
-* The AP Mask (APM) field is a bit mask that identifies the AP adapters assigned
-  to the KVM guest. Each bit in the mask, from left to right (i.e. from most
-  significant to least significant bit in big endian order), corresponds to
-  an APID from 0-255. If a bit is set, the corresponding adapter is valid for
-  use by the KVM guest.
-
-* The AP Queue Mask (AQM) field is a bit mask identifying the AP usage domains
-  assigned to the KVM guest. Each bit in the mask, from left to right (i.e. from
-  most significant to least significant bit in big endian order), corresponds to
-  an AP queue index (APQI) from 0-255. If a bit is set, the corresponding queue
-  is valid for use by the KVM guest.
-
-* The AP Domain Mask field is a bit mask that identifies the AP control domains
-  assigned to the KVM guest. The ADM bit mask controls which domains can be
-  changed by an AP command-request message sent to a usage domain from the
-  guest. Each bit in the mask, from left to right (i.e. from most significant to
-  least significant bit in big endian order), corresponds to a domain from
-  0-255. If a bit is set, the corresponding domain can be modified by an AP
-  command-request message sent to a usage domain.
-
-If you recall from the description of an AP Queue, AP instructions include
-an APQN to identify the AP queue to which an AP command-request message is to be
-sent (NQAP and PQAP instructions), or from which a command-reply message is to
-be received (DQAP instruction). The validity of an APQN is defined by the matrix
-calculated from the APM and AQM; it is the cross product of all assigned adapter
-numbers (APM) with all assigned queue indexes (AQM). For example, if adapters 1
-and 2 and usage domains 5 and 6 are assigned to a guest, the APQNs (1,5), (1,6),
-(2,5) and (2,6) will be valid for the guest.
-
-The APQNs can provide secure key functionality - i.e., a private key is stored
-on the adapter card for each of its domains - so each APQN must be assigned to
-at most one guest or to the linux host.
-
-   Example 1: Valid configuration:
-   ------------------------------
-   Guest1: adapters 1,2  domains 5,6
-   Guest2: adapter  1,2  domain 7
-
-   This is valid because both guests have a unique set of APQNs:
-      Guest1 has APQNs (1,5), (1,6), (2,5), (2,6);
-      Guest2 has APQNs (1,7), (2,7)
-
-   Example 2: Valid configuration:
-   ------------------------------
-   Guest1: adapters 1,2 domains 5,6
-   Guest2: adapters 3,4 domains 5,6
-
-   This is also valid because both guests have a unique set of APQNs:
-      Guest1 has APQNs (1,5), (1,6), (2,5), (2,6);
-      Guest2 has APQNs (3,5), (3,6), (4,5), (4,6)
-
-   Example 3: Invalid configuration:
-   --------------------------------
-   Guest1: adapters 1,2  domains 5,6
-   Guest2: adapter  1    domains 6,7
-
-   This is an invalid configuration because both guests have access to
-   APQN (1,6).
-
-The Design:
-===========
-The design introduces three new objects:
-
-1. AP matrix device
-2. VFIO AP device driver (vfio_ap.ko)
-3. VFIO AP mediated matrix pass-through device
-
-The VFIO AP device driver
--------------------------
-The VFIO AP (vfio_ap) device driver serves the following purposes:
-
-1. Provides the interfaces to secure APQNs for exclusive use of KVM guests.
-
-2. Sets up the VFIO mediated device interfaces to manage a mediated matrix
-   device and creates the sysfs interfaces for assigning adapters, usage
-   domains, and control domains comprising the matrix for a KVM guest.
-
-3. Configures the APM, AQM and ADM in the CRYCB referenced by a KVM guest's
-   SIE state description to grant the guest access to a matrix of AP devices
-
-Reserve APQNs for exclusive use of KVM guests
----------------------------------------------
-The following block diagram illustrates the mechanism by which APQNs are
-reserved:
-
-                              +------------------+
-               7 remove       |                  |
-         +--------------------> cex4queue driver |
-         |                    |                  |
-         |                    +------------------+
-         |
-         |
-         |                    +------------------+          +-----------------+
-         |  5 register driver |                  | 3 create |                 |
-         |   +---------------->   Device core    +---------->  matrix device  |
-         |   |                |                  |          |                 |
-         |   |                +--------^---------+          +-----------------+
-         |   |                         |
-         |   |                         +-------------------+
-         |   | +-----------------------------------+       |
-         |   | |      4 register AP driver         |       | 2 register device
-         |   | |                                   |       |
-+--------+---+-v---+                      +--------+-------+-+
-|                  |                      |                  |
-|      ap_bus      +--------------------- >  vfio_ap driver  |
-|                  |       8 probe        |                  |
-+--------^---------+                      +--^--^------------+
-6 edit   |                                   |  |
-  apmask |     +-----------------------------+  | 9 mdev create
-  aqmask |     |           1 modprobe           |
-+--------+-----+---+           +----------------+-+         +------------------+
-|                  |           |                  |8 create |     mediated     |
-|      admin       |           | VFIO device core |--------->     matrix       |
-|                  +           |                  |         |     device       |
-+------+-+---------+           +--------^---------+         +--------^---------+
-       | |                              |                            |
-       | | 9 create vfio_ap-passthrough |                            |
-       | +------------------------------+                            |
-       +-------------------------------------------------------------+
-                   10  assign adapter/domain/control domain
-
-The process for reserving an AP queue for use by a KVM guest is:
-
-1. The administrator loads the vfio_ap device driver
-2. The vfio-ap driver during its initialization will register a single 'matrix'
-   device with the device core. This will serve as the parent device for
-   all mediated matrix devices used to configure an AP matrix for a guest.
-3. The /sys/devices/vfio_ap/matrix device is created by the device core
-4  The vfio_ap device driver will register with the AP bus for AP queue devices
-   of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap
-   driver's probe and remove callback interfaces. Devices older than CEX4 queues
-   are not supported to simplify the implementation by not needlessly
-   complicating the design by supporting older devices that will go out of
-   service in the relatively near future, and for which there are few older
-   systems around on which to test.
-5. The AP bus registers the vfio_ap device driver with the device core
-6. The administrator edits the AP adapter and queue masks to reserve AP queues
-   for use by the vfio_ap device driver.
-7. The AP bus removes the AP queues reserved for the vfio_ap driver from the
-   default zcrypt cex4queue driver.
-8. The AP bus probes the vfio_ap device driver to bind the queues reserved for
-   it.
-9. The administrator creates a passthrough type mediated matrix device to be
-   used by a guest
-10 The administrator assigns the adapters, usage domains and control domains
-   to be exclusively used by a guest.
-
-Set up the VFIO mediated device interfaces
-------------------------------------------
-The VFIO AP device driver utilizes the common interface of the VFIO mediated
-device core driver to:
-* Register an AP mediated bus driver to add a mediated matrix device to and
-  remove it from a VFIO group.
-* Create and destroy a mediated matrix device
-* Add a mediated matrix device to and remove it from the AP mediated bus driver
-* Add a mediated matrix device to and remove it from an IOMMU group
-
-The following high-level block diagram shows the main components and interfaces
-of the VFIO AP mediated matrix device driver:
-
- +-------------+
- |             |
- | +---------+ | mdev_register_driver() +--------------+
- | |  Mdev   | +<-----------------------+              |
- | |  bus    | |                        | vfio_mdev.ko |
- | | driver  | +----------------------->+              |<-> VFIO user
- | +---------+ |    probe()/remove()    +--------------+    APIs
- |             |
- |  MDEV CORE  |
- |   MODULE    |
- |   mdev.ko   |
- | +---------+ | mdev_register_device() +--------------+
- | |Physical | +<-----------------------+              |
- | | device  | |                        |  vfio_ap.ko  |<-> matrix
- | |interface| +----------------------->+              |    device
- | +---------+ |       callback         +--------------+
- +-------------+
-
-During initialization of the vfio_ap module, the matrix device is registered
-with an 'mdev_parent_ops' structure that provides the sysfs attribute
-structures, mdev functions and callback interfaces for managing the mediated
-matrix device.
-
-* sysfs attribute structures:
-  * supported_type_groups
-    The VFIO mediated device framework supports creation of user-defined
-    mediated device types. These mediated device types are specified
-    via the 'supported_type_groups' structure when a device is registered
-    with the mediated device framework. The registration process creates the
-    sysfs structures for each mediated device type specified in the
-    'mdev_supported_types' sub-directory of the device being registered. Along
-    with the device type, the sysfs attributes of the mediated device type are
-    provided.
-
-    The VFIO AP device driver will register one mediated device type for
-    passthrough devices:
-      /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough
-    Only the read-only attributes required by the VFIO mdev framework will
-    be provided:
-        ... name
-        ... device_api
-        ... available_instances
-        ... device_api
-        Where:
-        * name: specifies the name of the mediated device type
-        * device_api: the mediated device type's API
-        * available_instances: the number of mediated matrix passthrough devices
-                               that can be created
-        * device_api: specifies the VFIO API
-  * mdev_attr_groups
-    This attribute group identifies the user-defined sysfs attributes of the
-    mediated device. When a device is registered with the VFIO mediated device
-    framework, the sysfs attribute files identified in the 'mdev_attr_groups'
-    structure will be created in the mediated matrix device's directory. The
-    sysfs attributes for a mediated matrix device are:
-    * assign_adapter:
-    * unassign_adapter:
-      Write-only attributes for assigning/unassigning an AP adapter to/from the
-      mediated matrix device. To assign/unassign an adapter, the APID of the
-      adapter is echoed to the respective attribute file.
-    * assign_domain:
-    * unassign_domain:
-      Write-only attributes for assigning/unassigning an AP usage domain to/from
-      the mediated matrix device. To assign/unassign a domain, the domain
-      number of the the usage domain is echoed to the respective attribute
-      file.
-    * matrix:
-      A read-only file for displaying the APQNs derived from the cross product
-      of the adapter and domain numbers assigned to the mediated matrix device.
-    * assign_control_domain:
-    * unassign_control_domain:
-      Write-only attributes for assigning/unassigning an AP control domain
-      to/from the mediated matrix device. To assign/unassign a control domain,
-      the ID of the domain to be assigned/unassigned is echoed to the respective
-      attribute file.
-    * control_domains:
-      A read-only file for displaying the control domain numbers assigned to the
-      mediated matrix device.
-
-* functions:
-  * create:
-    allocates the ap_matrix_mdev structure used by the vfio_ap driver to:
-    * Store the reference to the KVM structure for the guest using the mdev
-    * Store the AP matrix configuration for the adapters, domains, and control
-      domains assigned via the corresponding sysfs attributes files
-  * remove:
-    deallocates the mediated matrix device's ap_matrix_mdev structure. This will
-    be allowed only if a running guest is not using the mdev.
-
-* callback interfaces
-  * open:
-    The vfio_ap driver uses this callback to register a
-    VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the mdev matrix
-    device. The open is invoked when QEMU connects the VFIO iommu group
-    for the mdev matrix device to the MDEV bus. Access to the KVM structure used
-    to configure the KVM guest is provided via this callback. The KVM structure,
-    is used to configure the guest's access to the AP matrix defined via the
-    mediated matrix device's sysfs attribute files.
-  * release:
-    unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the
-    mdev matrix device and deconfigures the guest's AP matrix.
-
-Configure the APM, AQM and ADM in the CRYCB:
--------------------------------------------
-Configuring the AP matrix for a KVM guest will be performed when the
-VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier
-function is called when QEMU connects to KVM. The guest's AP matrix is
-configured via it's CRYCB by:
-* Setting the bits in the APM corresponding to the APIDs assigned to the
-  mediated matrix device via its 'assign_adapter' interface.
-* Setting the bits in the AQM corresponding to the domains assigned to the
-  mediated matrix device via its 'assign_domain' interface.
-* Setting the bits in the ADM corresponding to the domain dIDs assigned to the
-  mediated matrix device via its 'assign_control_domains' interface.
-
-The CPU model features for AP
------------------------------
-The AP stack relies on the presence of the AP instructions as well as two
-facilities: The AP Facilities Test (APFT) facility; and the AP Query
-Configuration Information (QCI) facility. These features/facilities are made
-available to a KVM guest via the following CPU model features:
-
-1. ap: Indicates whether the AP instructions are installed on the guest. This
-   feature will be enabled by KVM only if the AP instructions are installed
-   on the host.
-
-2. apft: Indicates the APFT facility is available on the guest. This facility
-   can be made available to the guest only if it is available on the host (i.e.,
-   facility bit 15 is set).
-
-3. apqci: Indicates the AP QCI facility is available on the guest. This facility
-   can be made available to the guest only if it is available on the host (i.e.,
-   facility bit 12 is set).
-
-Note: If the user chooses to specify a CPU model different than the 'host'
-model to QEMU, the CPU model features and facilities need to be turned on
-explicitly; for example:
-
-     /usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on
-
-A guest can be precluded from using AP features/facilities by turning them off
-explicitly; for example:
-
-     /usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off
-
-Note: If the APFT facility is turned off (apft=off) for the guest, the guest
-will not see any AP devices. The zcrypt device drivers that register for type 10
-and newer AP devices - i.e., the cex4card and cex4queue device drivers - need
-the APFT facility to ascertain the facilities installed on a given AP device. If
-the APFT facility is not installed on the guest, then the probe of device
-drivers will fail since only type 10 and newer devices can be configured for
-guest use.
-
-Example:
-=======
-Let's now provide an example to illustrate how KVM guests may be given
-access to AP facilities. For this example, we will show how to configure
-three guests such that executing the lszcrypt command on the guests would
-look like this:
-
-Guest1
-------
-CARD.DOMAIN TYPE  MODE
-------------------------------
-05          CEX5C CCA-Coproc
-05.0004     CEX5C CCA-Coproc
-05.00ab     CEX5C CCA-Coproc
-06          CEX5A Accelerator
-06.0004     CEX5A Accelerator
-06.00ab     CEX5C CCA-Coproc
-
-Guest2
-------
-CARD.DOMAIN TYPE  MODE
-------------------------------
-05          CEX5A Accelerator
-05.0047     CEX5A Accelerator
-05.00ff     CEX5A Accelerator
-
-Guest2
-------
-CARD.DOMAIN TYPE  MODE
-------------------------------
-06          CEX5A Accelerator
-06.0047     CEX5A Accelerator
-06.00ff     CEX5A Accelerator
-
-These are the steps:
-
-1. Install the vfio_ap module on the linux host. The dependency chain for the
-   vfio_ap module is:
-   * iommu
-   * s390
-   * zcrypt
-   * vfio
-   * vfio_mdev
-   * vfio_mdev_device
-   * KVM
-
-   To build the vfio_ap module, the kernel build must be configured with the
-   following Kconfig elements selected:
-   * IOMMU_SUPPORT
-   * S390
-   * ZCRYPT
-   * S390_AP_IOMMU
-   * VFIO
-   * VFIO_MDEV
-   * VFIO_MDEV_DEVICE
-   * KVM
-
-   If using make menuconfig select the following to build the vfio_ap module:
-   -> Device Drivers
-      -> IOMMU Hardware Support
-         select S390 AP IOMMU Support
-      -> VFIO Non-Privileged userspace driver framework
-         -> Mediated device driver frramework
-            -> VFIO driver for Mediated devices
-   -> I/O subsystem
-      -> VFIO support for AP devices
-
-2. Secure the AP queues to be used by the three guests so that the host can not
-   access them. To secure them, there are two sysfs files that specify
-   bitmasks marking a subset of the APQN range as 'usable by the default AP
-   queue device drivers' or 'not usable by the default device drivers' and thus
-   available for use by the vfio_ap device driver'. The location of the sysfs
-   files containing the masks are:
-
-   /sys/bus/ap/apmask
-   /sys/bus/ap/aqmask
-
-   The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
-   (APID). Each bit in the mask, from left to right (i.e., from most significant
-   to least significant bit in big endian order), corresponds to an APID from
-   0-255. If a bit is set, the APID is marked as usable only by the default AP
-   queue device drivers; otherwise, the APID is usable by the vfio_ap
-   device driver.
-
-   The 'aqmask' is a 256-bit mask that identifies a set of AP queue indexes
-   (APQI). Each bit in the mask, from left to right (i.e., from most significant
-   to least significant bit in big endian order), corresponds to an APQI from
-   0-255. If a bit is set, the APQI is marked as usable only by the default AP
-   queue device drivers; otherwise, the APQI is usable by the vfio_ap device
-   driver.
-
-   Take, for example, the following mask:
-
-      0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-
-    It indicates:
-
-      1, 2, 3, 4, 5, and 7-255 belong to the default drivers' pool, and 0 and 6
-      belong to the vfio_ap device driver's pool.
-
-   The APQN of each AP queue device assigned to the linux host is checked by the
-   AP bus against the set of APQNs derived from the cross product of APIDs
-   and APQIs marked as usable only by the default AP queue device drivers. If a
-   match is detected,  only the default AP queue device drivers will be probed;
-   otherwise, the vfio_ap device driver will be probed.
-
-   By default, the two masks are set to reserve all APQNs for use by the default
-   AP queue device drivers. There are two ways the default masks can be changed:
-
-   1. The sysfs mask files can be edited by echoing a string into the
-      respective sysfs mask file in one of two formats:
-
-      * An absolute hex string starting with 0x - like "0x12345678" - sets
-        the mask. If the given string is shorter than the mask, it is padded
-        with 0s on the right; for example, specifying a mask value of 0x41 is
-        the same as specifying:
-
-           0x4100000000000000000000000000000000000000000000000000000000000000
-
-        Keep in mind that the mask reads from left to right (i.e., most
-        significant to least significant bit in big endian order), so the mask
-        above identifies device numbers 1 and 7 (01000001).
-
-        If the string is longer than the mask, the operation is terminated with
-        an error (EINVAL).
-
-      * Individual bits in the mask can be switched on and off by specifying
-        each bit number to be switched in a comma separated list. Each bit
-        number string must be prepended with a ('+') or minus ('-') to indicate
-        the corresponding bit is to be switched on ('+') or off ('-'). Some
-        valid values are:
-
-           "+0"    switches bit 0 on
-           "-13"   switches bit 13 off
-           "+0x41" switches bit 65 on
-           "-0xff" switches bit 255 off
-
-           The following example:
-              +0,-6,+0x47,-0xf0
-
-              Switches bits 0 and 71 (0x47) on
-              Switches bits 6 and 240 (0xf0) off
-
-        Note that the bits not specified in the list remain as they were before
-        the operation.
-
-   2. The masks can also be changed at boot time via parameters on the kernel
-      command line like this:
-
-         ap.apmask=0xffff ap.aqmask=0x40
-
-         This would create the following masks:
-
-            apmask:
-            0xffff000000000000000000000000000000000000000000000000000000000000
-
-            aqmask:
-            0x4000000000000000000000000000000000000000000000000000000000000000
-
-         Resulting in these two pools:
-
-            default drivers pool:    adapter 0-15, domain 1
-            alternate drivers pool:  adapter 16-255, domains 0, 2-255
-
-   Securing the APQNs for our example:
-   ----------------------------------
-   To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047,
-   06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding
-   APQNs can either be removed from the default masks:
-
-      echo -5,-6 > /sys/bus/ap/apmask
-
-      echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask
-
-   Or the masks can be set as follows:
-
-      echo 0xf9ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff \
-      > apmask
-
-      echo 0xf7fffffffffffffffeffffffffffffffffffffffffeffffffffffffffffffffe \
-      > aqmask
-
-   This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004,
-   06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The
-   sysfs directory for the vfio_ap device driver will now contain symbolic links
-   to the AP queue devices bound to it:
-
-   /sys/bus/ap
-   ... [drivers]
-   ...... [vfio_ap]
-   ......... [05.0004]
-   ......... [05.0047]
-   ......... [05.00ab]
-   ......... [05.00ff]
-   ......... [06.0004]
-   ......... [06.0047]
-   ......... [06.00ab]
-   ......... [06.00ff]
-
-   Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later)
-   can be bound to the vfio_ap device driver. The reason for this is to
-   simplify the implementation by not needlessly complicating the design by
-   supporting older devices that will go out of service in the relatively near
-   future and for which there are few older systems on which to test.
-
-   The administrator, therefore, must take care to secure only AP queues that
-   can be bound to the vfio_ap device driver. The device type for a given AP
-   queue device can be read from the parent card's sysfs directory. For example,
-   to see the hardware type of the queue 05.0004:
-
-   cat /sys/bus/ap/devices/card05/hwtype
-
-   The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the
-   vfio_ap device driver.
-
-3. Create the mediated devices needed to configure the AP matrixes for the
-   three guests and to provide an interface to the vfio_ap driver for
-   use by the guests:
-
-   /sys/devices/vfio_ap/matrix/
-   --- [mdev_supported_types]
-   ------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
-   --------- create
-   --------- [devices]
-
-   To create the mediated devices for the three guests:
-
-	uuidgen > create
-	uuidgen > create
-	uuidgen > create
-
-        or
-
-        echo $uuid1 > create
-        echo $uuid2 > create
-        echo $uuid3 > create
-
-   This will create three mediated devices in the [devices] subdirectory named
-   after the UUID written to the create attribute file. We call them $uuid1,
-   $uuid2 and $uuid3 and this is the sysfs directory structure after creation:
-
-   /sys/devices/vfio_ap/matrix/
-   --- [mdev_supported_types]
-   ------ [vfio_ap-passthrough]
-   --------- [devices]
-   ------------ [$uuid1]
-   --------------- assign_adapter
-   --------------- assign_control_domain
-   --------------- assign_domain
-   --------------- matrix
-   --------------- unassign_adapter
-   --------------- unassign_control_domain
-   --------------- unassign_domain
-
-   ------------ [$uuid2]
-   --------------- assign_adapter
-   --------------- assign_control_domain
-   --------------- assign_domain
-   --------------- matrix
-   --------------- unassign_adapter
-   ----------------unassign_control_domain
-   ----------------unassign_domain
-
-   ------------ [$uuid3]
-   --------------- assign_adapter
-   --------------- assign_control_domain
-   --------------- assign_domain
-   --------------- matrix
-   --------------- unassign_adapter
-   ----------------unassign_control_domain
-   ----------------unassign_domain
-
-4. The administrator now needs to configure the matrixes for the mediated
-   devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
-
-   This is how the matrix is configured for Guest1:
-
-      echo 5 > assign_adapter
-      echo 6 > assign_adapter
-      echo 4 > assign_domain
-      echo 0xab > assign_domain
-
-      Control domains can similarly be assigned using the assign_control_domain
-      sysfs file.
-
-      If a mistake is made configuring an adapter, domain or control domain,
-      you can use the unassign_xxx files to unassign the adapter, domain or
-      control domain.
-
-      To display the matrix configuration for Guest1:
-
-         cat matrix
-
-   This is how the matrix is configured for Guest2:
-
-      echo 5 > assign_adapter
-      echo 0x47 > assign_domain
-      echo 0xff > assign_domain
-
-   This is how the matrix is configured for Guest3:
-
-      echo 6 > assign_adapter
-      echo 0x47 > assign_domain
-      echo 0xff > assign_domain
-
-   In order to successfully assign an adapter:
-
-   * The adapter number specified must represent a value from 0 up to the
-     maximum adapter number configured for the system. If an adapter number
-     higher than the maximum is specified, the operation will terminate with
-     an error (ENODEV).
-
-   * All APQNs that can be derived from the adapter ID and the IDs of
-     the previously assigned domains must be bound to the vfio_ap device
-     driver. If no domains have yet been assigned, then there must be at least
-     one APQN with the specified APID bound to the vfio_ap driver. If no such
-     APQNs are bound to the driver, the operation will terminate with an
-     error (EADDRNOTAVAIL).
-
-     No APQN that can be derived from the adapter ID and the IDs of the
-     previously assigned domains can be assigned to another mediated matrix
-     device. If an APQN is assigned to another mediated matrix device, the
-     operation will terminate with an error (EADDRINUSE).
-
-   In order to successfully assign a domain:
-
-   * The domain number specified must represent a value from 0 up to the
-     maximum domain number configured for the system. If a domain number
-     higher than the maximum is specified, the operation will terminate with
-     an error (ENODEV).
-
-   * All APQNs that can be derived from the domain ID and the IDs of
-     the previously assigned adapters must be bound to the vfio_ap device
-     driver. If no domains have yet been assigned, then there must be at least
-     one APQN with the specified APQI bound to the vfio_ap driver. If no such
-     APQNs are bound to the driver, the operation will terminate with an
-     error (EADDRNOTAVAIL).
-
-     No APQN that can be derived from the domain ID and the IDs of the
-     previously assigned adapters can be assigned to another mediated matrix
-     device. If an APQN is assigned to another mediated matrix device, the
-     operation will terminate with an error (EADDRINUSE).
-
-   In order to successfully assign a control domain, the domain number
-   specified must represent a value from 0 up to the maximum domain number
-   configured for the system. If a control domain number higher than the maximum
-   is specified, the operation will terminate with an error (ENODEV).
-
-5. Start Guest1:
-
-   /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
-      -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
-
-7. Start Guest2:
-
-   /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
-      -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
-
-7. Start Guest3:
-
-   /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
-      -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
-
-When the guest is shut down, the mediated matrix devices may be removed.
-
-Using our example again, to remove the mediated matrix device $uuid1:
-
-   /sys/devices/vfio_ap/matrix/
-      --- [mdev_supported_types]
-      ------ [vfio_ap-passthrough]
-      --------- [devices]
-      ------------ [$uuid1]
-      --------------- remove
-
-
-   echo 1 > remove
-
-   This will remove all of the mdev matrix device's sysfs structures including
-   the mdev device itself. To recreate and reconfigure the mdev matrix device,
-   all of the steps starting with step 3 will have to be performed again. Note
-   that the remove will fail if a guest using the mdev is still running.
-
-   It is not necessary to remove an mdev matrix device, but one may want to
-   remove it if no guest will use it during the remaining lifetime of the linux
-   host. If the mdev matrix device is removed, one may want to also reconfigure
-   the pool of adapters and queues reserved for use by the default drivers.
-
-Limitations
-===========
-* The KVM/kernel interfaces do not provide a way to prevent restoring an APQN
-  to the default drivers pool of a queue that is still assigned to a mediated
-  device in use by a guest. It is incumbent upon the administrator to
-  ensure there is no mediated device in use by a guest to which the APQN is
-  assigned lest the host be given access to the private data of the AP queue
-  device such as a private key configured specifically for the guest.
-
-* Dynamically modifying the AP matrix for a running guest (which would amount to
-  hot(un)plug of AP devices for the guest) is currently not supported
-
-* Live guest migration is not supported for guests using AP devices.
diff --git a/Documentation/s390/vfio-ccw.rst b/Documentation/s390/vfio-ccw.rst
new file mode 100644
index 000000000000..1f6d0b56d53e
--- /dev/null
+++ b/Documentation/s390/vfio-ccw.rst
@@ -0,0 +1,326 @@
+==================================
+vfio-ccw: the basic infrastructure
+==================================
+
+Introduction
+------------
+
+Here we describe the vfio support for I/O subchannel devices for
+Linux/s390. Motivation for vfio-ccw is to passthrough subchannels to a
+virtual machine, while vfio is the means.
+
+Different than other hardware architectures, s390 has defined a unified
+I/O access method, which is so called Channel I/O. It has its own access
+patterns:
+
+- Channel programs run asynchronously on a separate (co)processor.
+- The channel subsystem will access any memory designated by the caller
+  in the channel program directly, i.e. there is no iommu involved.
+
+Thus when we introduce vfio support for these devices, we realize it
+with a mediated device (mdev) implementation. The vfio mdev will be
+added to an iommu group, so as to make itself able to be managed by the
+vfio framework. And we add read/write callbacks for special vfio I/O
+regions to pass the channel programs from the mdev to its parent device
+(the real I/O subchannel device) to do further address translation and
+to perform I/O instructions.
+
+This document does not intend to explain the s390 I/O architecture in
+every detail. More information/reference could be found here:
+
+- A good start to know Channel I/O in general:
+  https://en.wikipedia.org/wiki/Channel_I/O
+- s390 architecture:
+  s390 Principles of Operation manual (IBM Form. No. SA22-7832)
+- The existing QEMU code which implements a simple emulated channel
+  subsystem could also be a good reference. It makes it easier to follow
+  the flow.
+  qemu/hw/s390x/css.c
+
+For vfio mediated device framework:
+- Documentation/vfio-mediated-device.txt
+
+Motivation of vfio-ccw
+----------------------
+
+Typically, a guest virtualized via QEMU/KVM on s390 only sees
+paravirtualized virtio devices via the "Virtio Over Channel I/O
+(virtio-ccw)" transport. This makes virtio devices discoverable via
+standard operating system algorithms for handling channel devices.
+
+However this is not enough. On s390 for the majority of devices, which
+use the standard Channel I/O based mechanism, we also need to provide
+the functionality of passing through them to a QEMU virtual machine.
+This includes devices that don't have a virtio counterpart (e.g. tape
+drives) or that have specific characteristics which guests want to
+exploit.
+
+For passing a device to a guest, we want to use the same interface as
+everybody else, namely vfio. We implement this vfio support for channel
+devices via the vfio mediated device framework and the subchannel device
+driver "vfio_ccw".
+
+Access patterns of CCW devices
+------------------------------
+
+s390 architecture has implemented a so called channel subsystem, that
+provides a unified view of the devices physically attached to the
+systems. Though the s390 hardware platform knows about a huge variety of
+different peripheral attachments like disk devices (aka. DASDs), tapes,
+communication controllers, etc. They can all be accessed by a well
+defined access method and they are presenting I/O completion a unified
+way: I/O interruptions.
+
+All I/O requires the use of channel command words (CCWs). A CCW is an
+instruction to a specialized I/O channel processor. A channel program is
+a sequence of CCWs which are executed by the I/O channel subsystem.  To
+issue a channel program to the channel subsystem, it is required to
+build an operation request block (ORB), which can be used to point out
+the format of the CCW and other control information to the system. The
+operating system signals the I/O channel subsystem to begin executing
+the channel program with a SSCH (start sub-channel) instruction. The
+central processor is then free to proceed with non-I/O instructions
+until interrupted. The I/O completion result is received by the
+interrupt handler in the form of interrupt response block (IRB).
+
+Back to vfio-ccw, in short:
+
+- ORBs and channel programs are built in guest kernel (with guest
+  physical addresses).
+- ORBs and channel programs are passed to the host kernel.
+- Host kernel translates the guest physical addresses to real addresses
+  and starts the I/O with issuing a privileged Channel I/O instruction
+  (e.g SSCH).
+- channel programs run asynchronously on a separate processor.
+- I/O completion will be signaled to the host with I/O interruptions.
+  And it will be copied as IRB to user space to pass it back to the
+  guest.
+
+Physical vfio ccw device and its child mdev
+-------------------------------------------
+
+As mentioned above, we realize vfio-ccw with a mdev implementation.
+
+Channel I/O does not have IOMMU hardware support, so the physical
+vfio-ccw device does not have an IOMMU level translation or isolation.
+
+Subchannel I/O instructions are all privileged instructions. When
+handling the I/O instruction interception, vfio-ccw has the software
+policing and translation how the channel program is programmed before
+it gets sent to hardware.
+
+Within this implementation, we have two drivers for two types of
+devices:
+
+- The vfio_ccw driver for the physical subchannel device.
+  This is an I/O subchannel driver for the real subchannel device.  It
+  realizes a group of callbacks and registers to the mdev framework as a
+  parent (physical) device. As a consequence, mdev provides vfio_ccw a
+  generic interface (sysfs) to create mdev devices. A vfio mdev could be
+  created by vfio_ccw then and added to the mediated bus. It is the vfio
+  device that added to an IOMMU group and a vfio group.
+  vfio_ccw also provides an I/O region to accept channel program
+  request from user space and store I/O interrupt result for user
+  space to retrieve. To notify user space an I/O completion, it offers
+  an interface to setup an eventfd fd for asynchronous signaling.
+
+- The vfio_mdev driver for the mediated vfio ccw device.
+  This is provided by the mdev framework. It is a vfio device driver for
+  the mdev that created by vfio_ccw.
+  It realizes a group of vfio device driver callbacks, adds itself to a
+  vfio group, and registers itself to the mdev framework as a mdev
+  driver.
+  It uses a vfio iommu backend that uses the existing map and unmap
+  ioctls, but rather than programming them into an IOMMU for a device,
+  it simply stores the translations for use by later requests. This
+  means that a device programmed in a VM with guest physical addresses
+  can have the vfio kernel convert that address to process virtual
+  address, pin the page and program the hardware with the host physical
+  address in one step.
+  For a mdev, the vfio iommu backend will not pin the pages during the
+  VFIO_IOMMU_MAP_DMA ioctl. Mdev framework will only maintain a database
+  of the iova<->vaddr mappings in this operation. And they export a
+  vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu
+  backend for the physical devices to pin and unpin pages by demand.
+
+Below is a high Level block diagram::
+
+ +-------------+
+ |             |
+ | +---------+ | mdev_register_driver() +--------------+
+ | |  Mdev   | +<-----------------------+              |
+ | |  bus    | |                        | vfio_mdev.ko |
+ | | driver  | +----------------------->+              |<-> VFIO user
+ | +---------+ |    probe()/remove()    +--------------+    APIs
+ |             |
+ |  MDEV CORE  |
+ |   MODULE    |
+ |   mdev.ko   |
+ | +---------+ | mdev_register_device() +--------------+
+ | |Physical | +<-----------------------+              |
+ | | device  | |                        |  vfio_ccw.ko |<-> subchannel
+ | |interface| +----------------------->+              |     device
+ | +---------+ |       callback         +--------------+
+ +-------------+
+
+The process of how these work together.
+
+1. vfio_ccw.ko drives the physical I/O subchannel, and registers the
+   physical device (with callbacks) to mdev framework.
+   When vfio_ccw probing the subchannel device, it registers device
+   pointer and callbacks to the mdev framework. Mdev related file nodes
+   under the device node in sysfs would be created for the subchannel
+   device, namely 'mdev_create', 'mdev_destroy' and
+   'mdev_supported_types'.
+2. Create a mediated vfio ccw device.
+   Use the 'mdev_create' sysfs file, we need to manually create one (and
+   only one for our case) mediated device.
+3. vfio_mdev.ko drives the mediated ccw device.
+   vfio_mdev is also the vfio device drvier. It will probe the mdev and
+   add it to an iommu_group and a vfio_group. Then we could pass through
+   the mdev to a guest.
+
+vfio-ccw I/O region
+-------------------
+
+An I/O region is used to accept channel program request from user
+space and store I/O interrupt result for user space to retrieve. The
+definition of the region is::
+
+  struct ccw_io_region {
+  #define ORB_AREA_SIZE 12
+	  __u8    orb_area[ORB_AREA_SIZE];
+  #define SCSW_AREA_SIZE 12
+	  __u8    scsw_area[SCSW_AREA_SIZE];
+  #define IRB_AREA_SIZE 96
+	  __u8    irb_area[IRB_AREA_SIZE];
+	  __u32   ret_code;
+  } __packed;
+
+While starting an I/O request, orb_area should be filled with the
+guest ORB, and scsw_area should be filled with the SCSW of the Virtual
+Subchannel.
+
+irb_area stores the I/O result.
+
+ret_code stores a return code for each access of the region.
+
+vfio-ccw operation details
+--------------------------
+
+vfio-ccw follows what vfio-pci did on the s390 platform and uses
+vfio-iommu-type1 as the vfio iommu backend.
+
+* CCW translation APIs
+  A group of APIs (start with `cp_`) to do CCW translation. The CCWs
+  passed in by a user space program are organized with their guest
+  physical memory addresses. These APIs will copy the CCWs into kernel
+  space, and assemble a runnable kernel channel program by updating the
+  guest physical addresses with their corresponding host physical addresses.
+  Note that we have to use IDALs even for direct-access CCWs, as the
+  referenced memory can be located anywhere, including above 2G.
+
+* vfio_ccw device driver
+  This driver utilizes the CCW translation APIs and introduces
+  vfio_ccw, which is the driver for the I/O subchannel devices you want
+  to pass through.
+  vfio_ccw implements the following vfio ioctls::
+
+    VFIO_DEVICE_GET_INFO
+    VFIO_DEVICE_GET_IRQ_INFO
+    VFIO_DEVICE_GET_REGION_INFO
+    VFIO_DEVICE_RESET
+    VFIO_DEVICE_SET_IRQS
+
+  This provides an I/O region, so that the user space program can pass a
+  channel program to the kernel, to do further CCW translation before
+  issuing them to a real device.
+  This also provides the SET_IRQ ioctl to setup an event notifier to
+  notify the user space program the I/O completion in an asynchronous
+  way.
+
+The use of vfio-ccw is not limited to QEMU, while QEMU is definitely a
+good example to get understand how these patches work. Here is a little
+bit more detail how an I/O request triggered by the QEMU guest will be
+handled (without error handling).
+
+Explanation:
+
+- Q1-Q7: QEMU side process.
+- K1-K5: Kernel side process.
+
+Q1.
+    Get I/O region info during initialization.
+
+Q2.
+    Setup event notifier and handler to handle I/O completion.
+
+... ...
+
+Q3.
+    Intercept a ssch instruction.
+Q4.
+    Write the guest channel program and ORB to the I/O region.
+
+    K1.
+	Copy from guest to kernel.
+    K2.
+	Translate the guest channel program to a host kernel space
+	channel program, which becomes runnable for a real device.
+    K3.
+	With the necessary information contained in the orb passed in
+	by QEMU, issue the ccwchain to the device.
+    K4.
+	Return the ssch CC code.
+Q5.
+    Return the CC code to the guest.
+
+... ...
+
+    K5.
+	Interrupt handler gets the I/O result and write the result to
+	the I/O region.
+    K6.
+	Signal QEMU to retrieve the result.
+
+Q6.
+    Get the signal and event handler reads out the result from the I/O
+    region.
+Q7.
+    Update the irb for the guest.
+
+Limitations
+-----------
+
+The current vfio-ccw implementation focuses on supporting basic commands
+needed to implement block device functionality (read/write) of DASD/ECKD
+device only. Some commands may need special handling in the future, for
+example, anything related to path grouping.
+
+DASD is a kind of storage device. While ECKD is a data recording format.
+More information for DASD and ECKD could be found here:
+https://en.wikipedia.org/wiki/Direct-access_storage_device
+https://en.wikipedia.org/wiki/Count_key_data
+
+Together with the corresponding work in QEMU, we can bring the passed
+through DASD/ECKD device online in a guest now and use it as a block
+device.
+
+While the current code allows the guest to start channel programs via
+START SUBCHANNEL, support for HALT SUBCHANNEL or CLEAR SUBCHANNEL is
+not yet implemented.
+
+vfio-ccw supports classic (command mode) channel I/O only. Transport
+mode (HPF) is not supported.
+
+QDIO subchannels are currently not supported. Classic devices other than
+DASD/ECKD might work, but have not been tested.
+
+Reference
+---------
+1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
+2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
+3. https://en.wikipedia.org/wiki/Channel_I/O
+4. Documentation/s390/cds.rst
+5. Documentation/vfio.txt
+6. Documentation/vfio-mediated-device.txt
diff --git a/Documentation/s390/vfio-ccw.txt b/Documentation/s390/vfio-ccw.txt
deleted file mode 100644
index 2be11ad864ff..000000000000
--- a/Documentation/s390/vfio-ccw.txt
+++ /dev/null
@@ -1,300 +0,0 @@
-vfio-ccw: the basic infrastructure
-==================================
-
-Introduction
-------------
-
-Here we describe the vfio support for I/O subchannel devices for
-Linux/s390. Motivation for vfio-ccw is to passthrough subchannels to a
-virtual machine, while vfio is the means.
-
-Different than other hardware architectures, s390 has defined a unified
-I/O access method, which is so called Channel I/O. It has its own access
-patterns:
-- Channel programs run asynchronously on a separate (co)processor.
-- The channel subsystem will access any memory designated by the caller
-  in the channel program directly, i.e. there is no iommu involved.
-Thus when we introduce vfio support for these devices, we realize it
-with a mediated device (mdev) implementation. The vfio mdev will be
-added to an iommu group, so as to make itself able to be managed by the
-vfio framework. And we add read/write callbacks for special vfio I/O
-regions to pass the channel programs from the mdev to its parent device
-(the real I/O subchannel device) to do further address translation and
-to perform I/O instructions.
-
-This document does not intend to explain the s390 I/O architecture in
-every detail. More information/reference could be found here:
-- A good start to know Channel I/O in general:
-  https://en.wikipedia.org/wiki/Channel_I/O
-- s390 architecture:
-  s390 Principles of Operation manual (IBM Form. No. SA22-7832)
-- The existing QEMU code which implements a simple emulated channel
-  subsystem could also be a good reference. It makes it easier to follow
-  the flow.
-  qemu/hw/s390x/css.c
-
-For vfio mediated device framework:
-- Documentation/vfio-mediated-device.txt
-
-Motivation of vfio-ccw
-----------------------
-
-Typically, a guest virtualized via QEMU/KVM on s390 only sees
-paravirtualized virtio devices via the "Virtio Over Channel I/O
-(virtio-ccw)" transport. This makes virtio devices discoverable via
-standard operating system algorithms for handling channel devices.
-
-However this is not enough. On s390 for the majority of devices, which
-use the standard Channel I/O based mechanism, we also need to provide
-the functionality of passing through them to a QEMU virtual machine.
-This includes devices that don't have a virtio counterpart (e.g. tape
-drives) or that have specific characteristics which guests want to
-exploit.
-
-For passing a device to a guest, we want to use the same interface as
-everybody else, namely vfio. We implement this vfio support for channel
-devices via the vfio mediated device framework and the subchannel device
-driver "vfio_ccw".
-
-Access patterns of CCW devices
-------------------------------
-
-s390 architecture has implemented a so called channel subsystem, that
-provides a unified view of the devices physically attached to the
-systems. Though the s390 hardware platform knows about a huge variety of
-different peripheral attachments like disk devices (aka. DASDs), tapes,
-communication controllers, etc. They can all be accessed by a well
-defined access method and they are presenting I/O completion a unified
-way: I/O interruptions.
-
-All I/O requires the use of channel command words (CCWs). A CCW is an
-instruction to a specialized I/O channel processor. A channel program is
-a sequence of CCWs which are executed by the I/O channel subsystem.  To
-issue a channel program to the channel subsystem, it is required to
-build an operation request block (ORB), which can be used to point out
-the format of the CCW and other control information to the system. The
-operating system signals the I/O channel subsystem to begin executing
-the channel program with a SSCH (start sub-channel) instruction. The
-central processor is then free to proceed with non-I/O instructions
-until interrupted. The I/O completion result is received by the
-interrupt handler in the form of interrupt response block (IRB).
-
-Back to vfio-ccw, in short:
-- ORBs and channel programs are built in guest kernel (with guest
-  physical addresses).
-- ORBs and channel programs are passed to the host kernel.
-- Host kernel translates the guest physical addresses to real addresses
-  and starts the I/O with issuing a privileged Channel I/O instruction
-  (e.g SSCH).
-- channel programs run asynchronously on a separate processor.
-- I/O completion will be signaled to the host with I/O interruptions.
-  And it will be copied as IRB to user space to pass it back to the
-  guest.
-
-Physical vfio ccw device and its child mdev
--------------------------------------------
-
-As mentioned above, we realize vfio-ccw with a mdev implementation.
-
-Channel I/O does not have IOMMU hardware support, so the physical
-vfio-ccw device does not have an IOMMU level translation or isolation.
-
-Subchannel I/O instructions are all privileged instructions. When
-handling the I/O instruction interception, vfio-ccw has the software
-policing and translation how the channel program is programmed before
-it gets sent to hardware.
-
-Within this implementation, we have two drivers for two types of
-devices:
-- The vfio_ccw driver for the physical subchannel device.
-  This is an I/O subchannel driver for the real subchannel device.  It
-  realizes a group of callbacks and registers to the mdev framework as a
-  parent (physical) device. As a consequence, mdev provides vfio_ccw a
-  generic interface (sysfs) to create mdev devices. A vfio mdev could be
-  created by vfio_ccw then and added to the mediated bus. It is the vfio
-  device that added to an IOMMU group and a vfio group.
-  vfio_ccw also provides an I/O region to accept channel program
-  request from user space and store I/O interrupt result for user
-  space to retrieve. To notify user space an I/O completion, it offers
-  an interface to setup an eventfd fd for asynchronous signaling.
-
-- The vfio_mdev driver for the mediated vfio ccw device.
-  This is provided by the mdev framework. It is a vfio device driver for
-  the mdev that created by vfio_ccw.
-  It realizes a group of vfio device driver callbacks, adds itself to a
-  vfio group, and registers itself to the mdev framework as a mdev
-  driver.
-  It uses a vfio iommu backend that uses the existing map and unmap
-  ioctls, but rather than programming them into an IOMMU for a device,
-  it simply stores the translations for use by later requests. This
-  means that a device programmed in a VM with guest physical addresses
-  can have the vfio kernel convert that address to process virtual
-  address, pin the page and program the hardware with the host physical
-  address in one step.
-  For a mdev, the vfio iommu backend will not pin the pages during the
-  VFIO_IOMMU_MAP_DMA ioctl. Mdev framework will only maintain a database
-  of the iova<->vaddr mappings in this operation. And they export a
-  vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu
-  backend for the physical devices to pin and unpin pages by demand.
-
-Below is a high Level block diagram.
-
- +-------------+
- |             |
- | +---------+ | mdev_register_driver() +--------------+
- | |  Mdev   | +<-----------------------+              |
- | |  bus    | |                        | vfio_mdev.ko |
- | | driver  | +----------------------->+              |<-> VFIO user
- | +---------+ |    probe()/remove()    +--------------+    APIs
- |             |
- |  MDEV CORE  |
- |   MODULE    |
- |   mdev.ko   |
- | +---------+ | mdev_register_device() +--------------+
- | |Physical | +<-----------------------+              |
- | | device  | |                        |  vfio_ccw.ko |<-> subchannel
- | |interface| +----------------------->+              |     device
- | +---------+ |       callback         +--------------+
- +-------------+
-
-The process of how these work together.
-1. vfio_ccw.ko drives the physical I/O subchannel, and registers the
-   physical device (with callbacks) to mdev framework.
-   When vfio_ccw probing the subchannel device, it registers device
-   pointer and callbacks to the mdev framework. Mdev related file nodes
-   under the device node in sysfs would be created for the subchannel
-   device, namely 'mdev_create', 'mdev_destroy' and
-   'mdev_supported_types'.
-2. Create a mediated vfio ccw device.
-   Use the 'mdev_create' sysfs file, we need to manually create one (and
-   only one for our case) mediated device.
-3. vfio_mdev.ko drives the mediated ccw device.
-   vfio_mdev is also the vfio device drvier. It will probe the mdev and
-   add it to an iommu_group and a vfio_group. Then we could pass through
-   the mdev to a guest.
-
-vfio-ccw I/O region
--------------------
-
-An I/O region is used to accept channel program request from user
-space and store I/O interrupt result for user space to retrieve. The
-definition of the region is:
-
-struct ccw_io_region {
-#define ORB_AREA_SIZE 12
-	__u8	orb_area[ORB_AREA_SIZE];
-#define SCSW_AREA_SIZE 12
-	__u8	scsw_area[SCSW_AREA_SIZE];
-#define IRB_AREA_SIZE 96
-	__u8	irb_area[IRB_AREA_SIZE];
-	__u32	ret_code;
-} __packed;
-
-While starting an I/O request, orb_area should be filled with the
-guest ORB, and scsw_area should be filled with the SCSW of the Virtual
-Subchannel.
-
-irb_area stores the I/O result.
-
-ret_code stores a return code for each access of the region.
-
-vfio-ccw operation details
---------------------------
-
-vfio-ccw follows what vfio-pci did on the s390 platform and uses
-vfio-iommu-type1 as the vfio iommu backend.
-
-* CCW translation APIs
-  A group of APIs (start with 'cp_') to do CCW translation. The CCWs
-  passed in by a user space program are organized with their guest
-  physical memory addresses. These APIs will copy the CCWs into kernel
-  space, and assemble a runnable kernel channel program by updating the
-  guest physical addresses with their corresponding host physical addresses.
-  Note that we have to use IDALs even for direct-access CCWs, as the
-  referenced memory can be located anywhere, including above 2G.
-
-* vfio_ccw device driver
-  This driver utilizes the CCW translation APIs and introduces
-  vfio_ccw, which is the driver for the I/O subchannel devices you want
-  to pass through.
-  vfio_ccw implements the following vfio ioctls:
-    VFIO_DEVICE_GET_INFO
-    VFIO_DEVICE_GET_IRQ_INFO
-    VFIO_DEVICE_GET_REGION_INFO
-    VFIO_DEVICE_RESET
-    VFIO_DEVICE_SET_IRQS
-  This provides an I/O region, so that the user space program can pass a
-  channel program to the kernel, to do further CCW translation before
-  issuing them to a real device.
-  This also provides the SET_IRQ ioctl to setup an event notifier to
-  notify the user space program the I/O completion in an asynchronous
-  way.
-
-The use of vfio-ccw is not limited to QEMU, while QEMU is definitely a
-good example to get understand how these patches work. Here is a little
-bit more detail how an I/O request triggered by the QEMU guest will be
-handled (without error handling).
-
-Explanation:
-Q1-Q7: QEMU side process.
-K1-K5: Kernel side process.
-
-Q1. Get I/O region info during initialization.
-Q2. Setup event notifier and handler to handle I/O completion.
-
-... ...
-
-Q3. Intercept a ssch instruction.
-Q4. Write the guest channel program and ORB to the I/O region.
-    K1. Copy from guest to kernel.
-    K2. Translate the guest channel program to a host kernel space
-        channel program, which becomes runnable for a real device.
-    K3. With the necessary information contained in the orb passed in
-        by QEMU, issue the ccwchain to the device.
-    K4. Return the ssch CC code.
-Q5. Return the CC code to the guest.
-
-... ...
-
-    K5. Interrupt handler gets the I/O result and write the result to
-        the I/O region.
-    K6. Signal QEMU to retrieve the result.
-Q6. Get the signal and event handler reads out the result from the I/O
-    region.
-Q7. Update the irb for the guest.
-
-Limitations
------------
-
-The current vfio-ccw implementation focuses on supporting basic commands
-needed to implement block device functionality (read/write) of DASD/ECKD
-device only. Some commands may need special handling in the future, for
-example, anything related to path grouping.
-
-DASD is a kind of storage device. While ECKD is a data recording format.
-More information for DASD and ECKD could be found here:
-https://en.wikipedia.org/wiki/Direct-access_storage_device
-https://en.wikipedia.org/wiki/Count_key_data
-
-Together with the corresponding work in QEMU, we can bring the passed
-through DASD/ECKD device online in a guest now and use it as a block
-device.
-
-While the current code allows the guest to start channel programs via
-START SUBCHANNEL, support for HALT SUBCHANNEL or CLEAR SUBCHANNEL is
-not yet implemented.
-
-vfio-ccw supports classic (command mode) channel I/O only. Transport
-mode (HPF) is not supported.
-
-QDIO subchannels are currently not supported. Classic devices other than
-DASD/ECKD might work, but have not been tested.
-
-Reference
----------
-1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
-2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
-3. https://en.wikipedia.org/wiki/Channel_I/O
-4. Documentation/s390/cds.txt
-5. Documentation/vfio.txt
-6. Documentation/vfio-mediated-device.txt
diff --git a/Documentation/s390/zfcpdump.rst b/Documentation/s390/zfcpdump.rst
new file mode 100644
index 000000000000..54e8e7caf7e7
--- /dev/null
+++ b/Documentation/s390/zfcpdump.rst
@@ -0,0 +1,50 @@
+==================================
+The s390 SCSI dump tool (zfcpdump)
+==================================
+
+System z machines (z900 or higher) provide hardware support for creating system
+dumps on SCSI disks. The dump process is initiated by booting a dump tool, which
+has to create a dump of the current (probably crashed) Linux image. In order to
+not overwrite memory of the crashed Linux with data of the dump tool, the
+hardware saves some memory plus the register sets of the boot CPU before the
+dump tool is loaded. There exists an SCLP hardware interface to obtain the saved
+memory afterwards. Currently 32 MB are saved.
+
+This zfcpdump implementation consists of a Linux dump kernel together with
+a user space dump tool, which are loaded together into the saved memory region
+below 32 MB. zfcpdump is installed on a SCSI disk using zipl (as contained in
+the s390-tools package) to make the device bootable. The operator of a Linux
+system can then trigger a SCSI dump by booting the SCSI disk, where zfcpdump
+resides on.
+
+The user space dump tool accesses the memory of the crashed system by means
+of the /proc/vmcore interface. This interface exports the crashed system's
+memory and registers in ELF core dump format. To access the memory which has
+been saved by the hardware SCLP requests will be created at the time the data
+is needed by /proc/vmcore. The tail part of the crashed systems memory which
+has not been stashed by hardware can just be copied from real memory.
+
+To build a dump enabled kernel the kernel config option CONFIG_CRASH_DUMP
+has to be set.
+
+To get a valid zfcpdump kernel configuration use "make zfcpdump_defconfig".
+
+The s390 zipl tool looks for the zfcpdump kernel and optional initrd/initramfs
+under the following locations:
+
+* kernel:  <zfcpdump directory>/zfcpdump.image
+* ramdisk: <zfcpdump directory>/zfcpdump.rd
+
+The zfcpdump directory is defined in the s390-tools package.
+
+The user space application of zfcpdump can reside in an intitramfs or an
+initrd. It can also be included in a built-in kernel initramfs. The application
+reads from /proc/vmcore or zcore/mem and writes the system dump to a SCSI disk.
+
+The s390-tools package version 1.24.0 and above builds an external zfcpdump
+initramfs with a user space application that writes the dump to a SCSI
+partition.
+
+For more information on how to use zfcpdump refer to the s390 'Using the Dump
+Tools book', which is available from
+http://www.ibm.com/developerworks/linux/linux390.
diff --git a/Documentation/s390/zfcpdump.txt b/Documentation/s390/zfcpdump.txt
deleted file mode 100644
index b064aa59714d..000000000000
--- a/Documentation/s390/zfcpdump.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-The s390 SCSI dump tool (zfcpdump)
-
-System z machines (z900 or higher) provide hardware support for creating system
-dumps on SCSI disks. The dump process is initiated by booting a dump tool, which
-has to create a dump of the current (probably crashed) Linux image. In order to
-not overwrite memory of the crashed Linux with data of the dump tool, the
-hardware saves some memory plus the register sets of the boot CPU before the
-dump tool is loaded. There exists an SCLP hardware interface to obtain the saved
-memory afterwards. Currently 32 MB are saved.
-
-This zfcpdump implementation consists of a Linux dump kernel together with
-a user space dump tool, which are loaded together into the saved memory region
-below 32 MB. zfcpdump is installed on a SCSI disk using zipl (as contained in
-the s390-tools package) to make the device bootable. The operator of a Linux
-system can then trigger a SCSI dump by booting the SCSI disk, where zfcpdump
-resides on.
-
-The user space dump tool accesses the memory of the crashed system by means
-of the /proc/vmcore interface. This interface exports the crashed system's
-memory and registers in ELF core dump format. To access the memory which has
-been saved by the hardware SCLP requests will be created at the time the data
-is needed by /proc/vmcore. The tail part of the crashed systems memory which
-has not been stashed by hardware can just be copied from real memory.
-
-To build a dump enabled kernel the kernel config option CONFIG_CRASH_DUMP
-has to be set.
-
-To get a valid zfcpdump kernel configuration use "make zfcpdump_defconfig".
-
-The s390 zipl tool looks for the zfcpdump kernel and optional initrd/initramfs
-under the following locations:
-
-* kernel:  <zfcpdump directory>/zfcpdump.image
-* ramdisk: <zfcpdump directory>/zfcpdump.rd
-
-The zfcpdump directory is defined in the s390-tools package.
-
-The user space application of zfcpdump can reside in an intitramfs or an
-initrd. It can also be included in a built-in kernel initramfs. The application
-reads from /proc/vmcore or zcore/mem and writes the system dump to a SCSI disk.
-
-The s390-tools package version 1.24.0 and above builds an external zfcpdump
-initramfs with a user space application that writes the dump to a SCSI
-partition.
-
-For more information on how to use zfcpdump refer to the s390 'Using the Dump
-Tools book', which is available from
-http://www.ibm.com/developerworks/linux/linux390.
-- 
cgit 


From a20aa857e0c207c27d4b2c98af7d97539faf2cc5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sat, 8 Jun 2019 23:27:17 -0300
Subject: s390: include/asm/debug.h add kerneldoc markups

Instead of keeping the documentation inside s390dbf.rst,
move them to arch/s390/include/asm/debug.h, using standard
kernel-doc markups.

Keeping the documentation close to the code helps to keep it
updated. It also makes easier to document other stuff inside
debug.h, as all it needs is to add kernel-doc markups inside
it, as the file will be already be included at the produced
documentation.

-

Those were converted to kerneldoc using this script specially
designed to parse ths file, and manually editted:

<script>
use strict;

my $mode = "";
my $parameter = "";
my $ret = "";
my $descr = "";

sub add_var($)
{
	my $ln = shift;

	$ln =~ s/^\s+//;
	$ln =~ s/\s+$//;

	return if ($ln eq "");

	$ln =~ s/^(\S+)\s+/$1\t/;

	print " * \@$ln\n";
}

sub add_return($)
{
	my $ln = shift;

	print " *\n * Return:\n" if ($mode ne "Return Value:");

	$ln =~ s/^\s+//;
	$ln =~ s/\s+$//;

	return if ($ln eq "");

	print " * -   $ln\n";
}

sub add_description($)
{
	my $ln = shift;

	print " *\n * \n" if ($mode ne "Description:");

	$ln =~ s/^\s+//;
	$ln =~ s/\s+$//;

	return if ($ln eq "");

	print " * $ln\n";
}

sub flush_results()
{
	print " */\n\n";
}

while (<>) {
	if (m/^[\-]+$/) {
		flush_results();
		$mode = "";
		$parameter = "";
		$ret = "";
		$descr = "";
		next;
	}
	if (m/(Parameter:)(.*)/) {
		print " *\n" if ($mode eq "func");
		add_var($2);
		$mode = $1;
		next;
	}
	if (m/(Return Value:)(.*)/) {
		add_return($2);
		$mode = $1;
		next;
	}
	if (m/(Description:)(.*)/) {
		add_description($2);
		$mode = $1;
		next;
	}
	if ($mode eq "Parameter:") {
		add_var($_);
		next;
	}
	if ($mode eq "Return Value:") {
		add_return($_);
		next;
	}
	if ($mode eq "Description:") {
		add_description($_);
		next;
	}
	next if (m/^\s*$/);

	if (m/^\S+.*\s\*?(\S+)\s*\(/) {
		if ($mode eq "") {
			print "/**\n * $1()\n";
		} else {
			print " * $1()\n";
		}
		$mode="func";
	}
}
flush_results();
</script>

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 Documentation/s390/s390dbf.rst | 672 +----------------------------------------
 1 file changed, 1 insertion(+), 671 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/s390/s390dbf.rst b/Documentation/s390/s390dbf.rst
index ec2a1faa414b..d2595b548879 100644
--- a/Documentation/s390/s390dbf.rst
+++ b/Documentation/s390/s390dbf.rst
@@ -104,684 +104,14 @@ the "debug_stoppable" sysctl. If you set "debug_stoppable" to 0 the debug
 feature cannot be stopped. If the debug feature is already stopped, it
 will stay deactivated.
 
-----------------------------------------------------------------------------
-
 Kernel Interfaces:
 ------------------
 
-::
-
-  debug_info_t *debug_register(char *name, int pages, int nr_areas,
-			       int buf_size);
-
-Parameter:
-	      name:
-			   Name of debug log (e.g. used for debugfs entry)
-	      pages:
-			   Number of pages, which will be allocated per area
-	      nr_areas:
-			   Number of debug areas
-	      buf_size:
-			   Size of data area in each debug entry
-
-Return Value:
-	Handle for generated debug area
-
-	  NULL if register failed
-
-Description:  Allocates memory for a debug log
-	      Must not be called within an interrupt handler
-
-----------------------------------------------------------------------------
-
-::
-
-  debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
-				    int buf_size, mode_t mode, uid_t uid,
-				    gid_t gid);
-
-Parameter:
-	      name:
-			   Name of debug log (e.g. used for debugfs entry)
-	      pages:
-			   Number of pages, which will be allocated per area
-	      nr_areas:
-			   Number of debug areas
-	      buf_size:
-			   Size of data area in each debug entry
-	      mode:
-			   File mode for debugfs files. E.g. S_IRWXUGO
-	      uid:
-			   User ID for debugfs files. Currently only 0 is
-			   supported.
-	      gid:
-			   Group ID for debugfs files. Currently only 0 is
-			   supported.
-
-Return Value:
-	      Handle for generated debug area
-
-		NULL if register failed
-
-Description:
-	      Allocates memory for a debug log
-	      Must not be called within an interrupt handler
-
----------------------------------------------------------------------------
-
-::
-
-  void debug_unregister (debug_info_t * id);
-
-Parameter:
-	       id:
-		    handle for debug log
-
-Return Value:
-	       none
-
-Description:
-	       frees memory for a debug log and removes all registered debug
-	       views.
-
-	       Must not be called within an interrupt handler
-
----------------------------------------------------------------------------
-
-::
-
-  void debug_set_level (debug_info_t * id, int new_level);
-
-Parameter:     id:        handle for debug log
-	       new_level: new debug level
-
-Return Value:
-	       none
-
-Description:
-	       Sets new actual debug level if new_level is valid.
-
----------------------------------------------------------------------------
-
-::
-
-  bool debug_level_enabled (debug_info_t * id, int level);
-
-Parameter:
-	      id:
-			  handle for debug log
-	      level:
-			  debug level
-
-Return Value:
-	      True if level is less or equal to the current debug level.
-
-Description:
-	      Returns true if debug events for the specified level would be
-	      logged. Otherwise returns false.
-
----------------------------------------------------------------------------
-
-::
-
-  void debug_stop_all(void);
-
-Parameter:
-	       none
-
-Return Value:
-	       none
-
-Description:
-	       stops the debug feature if stopping is allowed. Currently
-	       used in case of a kernel oops.
-
----------------------------------------------------------------------------
-
-::
-
-  debug_entry_t* debug_event (debug_info_t* id, int level, void* data,
-			      int length);
-
-Parameter:
-	       id:
-		       handle for debug log
-	       level:
-		       debug level
-	       data:
-		       pointer to data for debug entry
-	       length:
-		       length of data in bytes
-
-Return Value:
-	       Address of written debug entry
-
-Description:
-	       writes debug entry to active debug area (if level <= actual
-	       debug level)
-
----------------------------------------------------------------------------
-
-::
-
-  debug_entry_t* debug_int_event (debug_info_t * id, int level,
-				  unsigned int data);
-  debug_entry_t* debug_long_event(debug_info_t * id, int level,
-				  unsigned long data);
-
-Parameter:
-	       id:
-		       handle for debug log
-	       level:
-		       debug level
-	       data:
-		       integer value for debug entry
-
-Return Value:
-	       Address of written debug entry
-
-Description:
-	       writes debug entry to active debug area (if level <= actual
-	       debug level)
-
----------------------------------------------------------------------------
-
-::
-
-  debug_entry_t* debug_text_event (debug_info_t * id, int level,
-				   const char* data);
-
-Parameter:
-	       id:
-		       handle for debug log
-	       level:
-		       debug level
-	       data:
-		       string for debug entry
-
-Return Value:
-	       Address of written debug entry
-
-Description:
-	       writes debug entry in ascii format to active debug area
-	       (if level <= actual debug level)
-
----------------------------------------------------------------------------
-
-::
-
-  debug_entry_t* debug_sprintf_event (debug_info_t * id, int level,
-				      char* string,...);
-
-Parameter:
-	       id:
-		      handle for debug log
-	       level:
-		      debug level
-	       string:
-		      format string for debug entry
-	       ...:
-		      varargs used as in sprintf()
-
-Return Value:  Address of written debug entry
-
-Description:
-	       writes debug entry with format string and varargs (longs) to
-	       active debug area (if level $<=$ actual debug level).
-	       floats and long long datatypes cannot be used as varargs.
-
----------------------------------------------------------------------------
-
-::
-
-  debug_entry_t* debug_exception (debug_info_t* id, int level, void* data,
-				  int length);
-
-Parameter:
-	       id:
-		       handle for debug log
-	       level:
-		       debug level
-	       data:
-		       pointer to data for debug entry
-	       length:
-		       length of data in bytes
-
-Return Value:
-	       Address of written debug entry
-
-Description:
-	       writes debug entry to active debug area (if level <= actual
-	       debug level) and switches to next debug area
-
----------------------------------------------------------------------------
-
-::
-
-  debug_entry_t* debug_int_exception (debug_info_t * id, int level,
-				      unsigned int data);
-  debug_entry_t* debug_long_exception(debug_info_t * id, int level,
-				      unsigned long data);
-
-Parameter:     id:     handle for debug log
-	       level:  debug level
-	       data:   integer value for debug entry
-
-Return Value:  Address of written debug entry
-
-Description:   writes debug entry to active debug area (if level <= actual
-	       debug level) and switches to next debug area
-
----------------------------------------------------------------------------
-
-::
-
-  debug_entry_t* debug_text_exception (debug_info_t * id, int level,
-				       const char* data);
-
-Parameter:     id:     handle for debug log
-	       level:  debug level
-	       data:   string for debug entry
-
-Return Value:  Address of written debug entry
-
-Description:   writes debug entry in ascii format to active debug area
-	       (if level <= actual debug level) and switches to next debug
-	       area
-
----------------------------------------------------------------------------
-
-::
-
-  debug_entry_t* debug_sprintf_exception (debug_info_t * id, int level,
-					  char* string,...);
-
-Parameter:     id:    handle for debug log
-	       level: debug level
-	       string: format string for debug entry
-	       ...: varargs used as in sprintf()
-
-Return Value:  Address of written debug entry
-
-Description:   writes debug entry with format string and varargs (longs) to
-	       active debug area (if level $<=$ actual debug level) and
-	       switches to next debug area.
-	       floats and long long datatypes cannot be used as varargs.
-
----------------------------------------------------------------------------
-
-::
-
-  int debug_register_view (debug_info_t * id, struct debug_view *view);
-
-Parameter:     id:    handle for debug log
-	       view:  pointer to debug view struct
-
-Return Value:  0  : ok
-	       < 0: Error
-
-Description:   registers new debug view and creates debugfs dir entry
-
----------------------------------------------------------------------------
-
-::
-
-  int debug_unregister_view (debug_info_t * id, struct debug_view *view);
-
-Parameter:     id:    handle for debug log
-	       view:  pointer to debug view struct
-
-Return Value:  0  : ok
-	       < 0: Error
-
-Description:   unregisters debug view and removes debugfs dir entry
-
-
+.. kernel-doc:: arch/s390/include/asm/debug.h
 
 Predefined views:
 -----------------
 
-extern struct debug_view debug_hex_ascii_view;
-
-extern struct debug_view debug_raw_view;
-
-extern struct debug_view debug_sprintf_view;
-
-Examples
---------
-
-::
-
-  /*
-   * hex_ascii- + raw-view Example
-   */
-
-  #include <linux/init.h>
-  #include <asm/debug.h>
-
-  static debug_info_t* debug_info;
-
-  static int init(void)
-  {
-      /* register 4 debug areas with one page each and 4 byte data field */
-
-      debug_info = debug_register ("test", 1, 4, 4 );
-      debug_register_view(debug_info,&debug_hex_ascii_view);
-      debug_register_view(debug_info,&debug_raw_view);
-
-      debug_text_event(debug_info, 4 , "one ");
-      debug_int_exception(debug_info, 4, 4711);
-      debug_event(debug_info, 3, &debug_info, 4);
-
-      return 0;
-  }
-
-  static void cleanup(void)
-  {
-      debug_unregister (debug_info);
-  }
-
-  module_init(init);
-  module_exit(cleanup);
-
----------------------------------------------------------------------------
-
-::
-
-  /*
-   * sprintf-view Example
-   */
-
-  #include <linux/init.h>
-  #include <asm/debug.h>
-
-  static debug_info_t* debug_info;
-
-  static int init(void)
-  {
-      /* register 4 debug areas with one page each and data field for */
-      /* format string pointer + 2 varargs (= 3 * sizeof(long))       */
-
-      debug_info = debug_register ("test", 1, 4, sizeof(long) * 3);
-      debug_register_view(debug_info,&debug_sprintf_view);
-
-      debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
-      debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
-
-      return 0;
-  }
-
-  static void cleanup(void)
-  {
-      debug_unregister (debug_info);
-  }
-
-  module_init(init);
-  module_exit(cleanup);
-
-Debugfs Interface
------------------
-Views to the debug logs can be investigated through reading the corresponding
-debugfs-files:
-
-Example::
-
-  > ls /sys/kernel/debug/s390dbf/dasd
-  flush  hex_ascii  level pages raw
-  > cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
-  00 00974733272:680099 2 - 02 0006ad7e  07 ea 4a 90 | ....
-  00 00974733272:682210 2 - 02 0006ade6  46 52 45 45 | FREE
-  00 00974733272:682213 2 - 02 0006adf6  07 ea 4a 90 | ....
-  00 00974733272:682281 1 * 02 0006ab08  41 4c 4c 43 | EXCP
-  01 00974733272:682284 2 - 02 0006ab16  45 43 4b 44 | ECKD
-  01 00974733272:682287 2 - 02 0006ab28  00 00 00 04 | ....
-  01 00974733272:682289 2 - 02 0006ab3e  00 00 00 20 | ...
-  01 00974733272:682297 2 - 02 0006ad7e  07 ea 4a 90 | ....
-  01 00974733272:684384 2 - 00 0006ade6  46 52 45 45 | FREE
-  01 00974733272:684388 2 - 00 0006adf6  07 ea 4a 90 | ....
-
-See section about predefined views for explanation of the above output!
-
-Changing the debug level
-------------------------
-
-Example::
-
-
-  > cat /sys/kernel/debug/s390dbf/dasd/level
-  3
-  > echo "5" > /sys/kernel/debug/s390dbf/dasd/level
-  > cat /sys/kernel/debug/s390dbf/dasd/level
-  5
-
-Flushing debug areas
---------------------
-Debug areas can be flushed with piping the number of the desired
-area (0...n) to the debugfs file "flush". When using "-" all debug areas
-are flushed.
-
-Examples:
-
-1. Flush debug area 0::
-
-     > echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
-
-2. Flush all debug areas::
-
-     > echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
-
-Changing the size of debug areas
-------------------------------------
-It is possible the change the size of debug areas through piping
-the number of pages to the debugfs file "pages". The resize request will
-also flush the debug areas.
-
-Example:
-
-Define 4 pages for the debug areas of debug feature "dasd"::
-
-  > echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
-
-Stooping the debug feature
---------------------------
-Example:
-
-1. Check if stopping is allowed::
-
-     > cat /proc/sys/s390dbf/debug_stoppable
-
-2. Stop debug feature::
-
-     > echo 0 > /proc/sys/s390dbf/debug_active
-
-lcrash Interface
-----------------
-It is planned that the dump analysis tool lcrash gets an additional command
-'s390dbf' to display all the debug logs. With this tool it will be possible
-to investigate the debug logs on a live system and with a memory dump after
-a system crash.
-
-Investigating raw memory
-------------------------
-One last possibility to investigate the debug logs at a live
-system and after a system crash is to look at the raw memory
-under VM or at the Service Element.
-It is possible to find the anker of the debug-logs through
-the 'debug_area_first' symbol in the System map. Then one has
-to follow the correct pointers of the data-structures defined
-in debug.h and find the debug-areas in memory.
-Normally modules which use the debug feature will also have
-a global variable with the pointer to the debug-logs. Following
-this pointer it will also be possible to find the debug logs in
-memory.
-
-For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
-for the length of the data field in debug_register() in
-order to see the debug entries well formatted.
-
-
-Predefined Views
-----------------
-
-There are three predefined views: hex_ascii, raw and sprintf.
-The hex_ascii view shows the data field in hex and ascii representation
-(e.g. '45 43 4b 44 | ECKD').
-The raw view returns a bytestream as the debug areas are stored in memory.
-
-The sprintf view formats the debug entries in the same way as the sprintf
-function would do. The sprintf event/exception functions write to the
-debug entry a pointer to the format string (size = sizeof(long))
-and for each vararg a long value. So e.g. for a debug entry with a format
-string plus two varargs one would need to allocate a (3 * sizeof(long))
-byte data area in the debug_register() function.
-
-IMPORTANT:
-  Using "%s" in sprintf event functions is dangerous. You can only
-  use "%s" in the sprintf event functions, if the memory for the passed string
-  is available as long as the debug feature exists. The reason behind this is
-  that due to performance considerations only a pointer to the string is stored
-  in  the debug feature. If you log a string that is freed afterwards, you will
-  get an OOPS when inspecting the debug feature, because then the debug feature
-  will access the already freed memory.
-
-NOTE:
-  If using the sprintf view do NOT use other event/exception functions
-  than the sprintf-event and -exception functions.
-
-The format of the hex_ascii and sprintf view is as follows:
-
-- Number of area
-- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated
-  Universal Time (UTC), January 1, 1970)
-- level of debug entry
-- Exception flag (* = Exception)
-- Cpu-Number of calling task
-- Return Address to caller
-- data field
-
-The format of the raw view is:
-
-- Header as described in debug.h
-- datafield
-
-A typical line of the hex_ascii view will look like the following (first line
-is only for explanation and will not be displayed when 'cating' the view):
-
-area  time           level exception cpu caller    data (hex + ascii)
---------------------------------------------------------------------------
-00    00964419409:440690 1 -         00  88023fe
-
-
-Defining views
---------------
-
-Views are specified with the 'debug_view' structure. There are defined
-callback functions which are used for reading and writing the debugfs files::
-
-  struct debug_view {
-	char name[DEBUG_MAX_PROCF_LEN];
-	debug_prolog_proc_t* prolog_proc;
-	debug_header_proc_t* header_proc;
-	debug_format_proc_t* format_proc;
-	debug_input_proc_t*  input_proc;
-	void*                private_data;
-  };
-
-where::
-
-  typedef int (debug_header_proc_t) (debug_info_t* id,
-				     struct debug_view* view,
-				     int area,
-				     debug_entry_t* entry,
-				     char* out_buf);
-
-  typedef int (debug_format_proc_t) (debug_info_t* id,
-				     struct debug_view* view, char* out_buf,
-				     const char* in_buf);
-  typedef int (debug_prolog_proc_t) (debug_info_t* id,
-				     struct debug_view* view,
-				     char* out_buf);
-  typedef int (debug_input_proc_t) (debug_info_t* id,
-				    struct debug_view* view,
-				    struct file* file, const char* user_buf,
-				    size_t in_buf_size, loff_t* offset);
-
-
-The "private_data" member can be used as pointer to view specific data.
-It is not used by the debug feature itself.
-
-The output when reading a debugfs file is structured like this::
-
-  "prolog_proc output"
-
-  "header_proc output 1"  "format_proc output 1"
-  "header_proc output 2"  "format_proc output 2"
-  "header_proc output 3"  "format_proc output 3"
-  ...
-
-When a view is read from the debugfs, the Debug Feature calls the
-'prolog_proc' once for writing the prolog.
-Then 'header_proc' and 'format_proc' are called for each
-existing debug entry.
-
-The input_proc can be used to implement functionality when it is written to
-the view (e.g. like with 'echo "0" > /sys/kernel/debug/s390dbf/dasd/level).
-
-For header_proc there can be used the default function
-debug_dflt_header_fn() which is defined in debug.h.
-and which produces the same header output as the predefined views.
-E.g::
-
-  00 00964419409:440761 2 - 00 88023ec
-
-In order to see how to use the callback functions check the implementation
-of the default views!
-
-Example::
-
-  #include <asm/debug.h>
-
-  #define UNKNOWNSTR "data: %08x"
-
-  const char* messages[] =
-  {"This error...........\n",
-   "That error...........\n",
-   "Problem..............\n",
-   "Something went wrong.\n",
-   "Everything ok........\n",
-   NULL
-  };
-
-  static int debug_test_format_fn(
-     debug_info_t * id, struct debug_view *view,
-     char *out_buf, const char *in_buf
-  )
-  {
-    int i, rc = 0;
-
-    if(id->buf_size >= 4) {
-       int msg_nr = *((int*)in_buf);
-       if(msg_nr < sizeof(messages)/sizeof(char*) - 1)
-	  rc += sprintf(out_buf, "%s", messages[msg_nr]);
-       else
-	  rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
-    }
-   out:
-     return rc;
-  }
-
-  struct debug_view debug_test_view = {
-    "myview",                 /* name of view */
-    NULL,                     /* no prolog */
-    &debug_dflt_header_fn,    /* default header for each entry */
-    &debug_test_format_fn,    /* our own format function */
-    NULL,                     /* no input function */
-    NULL                      /* no private data */
-  };
-
-test:
-=====
-
 ::
 
   debug_info_t *debug_info;
-- 
cgit 


From 7391d7f4b0690a98c5bf6a113c33400e2d29d9f8 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 28 May 2019 10:07:56 +0200
Subject: dt-bindings: clk: meson: add g12b periph clock controller bindings

Update the documentation to support clock driver for the Amlogic G12B SoC.

G12B clock driver is very close, the main differences are :
- the clock tree is duplicated for the both clusters, and the
  SYS_PLL are swapped between the clusters
- G12B has additional clocks like for CSI an other components

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt b/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt
index 5c8b105be4d6..6eaa52092313 100644
--- a/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt
+++ b/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt
@@ -10,6 +10,7 @@ Required Properties:
 		"amlogic,gxl-clkc" for GXL and GXM SoC,
 		"amlogic,axg-clkc" for AXG SoC.
 		"amlogic,g12a-clkc" for G12A SoC.
+		"amlogic,g12b-clkc" for G12B SoC.
 - clocks : list of clock phandle, one for each entry clock-names.
 - clock-names : should contain the following:
   * "xtal": the platform xtal
-- 
cgit 


From 5028bd68155985d3d9bc1af570349d7c56608897 Mon Sep 17 00:00:00 2001
From: Xingyu Chen <xingyu.chen@amlogic.com>
Date: Sat, 8 Jun 2019 21:04:09 +0200
Subject: dt-bindings: interrupt-controller: New binding for Meson-G12A SoC

Update the dt-binding document to support new compatible string for the
GPIO interrupt controller which found in Amlogic's Meson-G12A SoC.

Signed-off-by: Xingyu Chen <xingyu.chen@amlogic.com>
Signed-off-by: Jianxin Pan <jianxin.pan@amlogic.com>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 .../devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
index 1502a51548bb..7d531d5fff29 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
@@ -15,6 +15,7 @@ Required properties:
     "amlogic,meson-gxbb-gpio-intc" for GXBB SoCs (S905) or
     "amlogic,meson-gxl-gpio-intc" for GXL SoCs (S905X, S912)
     "amlogic,meson-axg-gpio-intc" for AXG SoCs (A113D, A113X)
+    "amlogic,meson-g12a-gpio-intc" for G12A SoCs (S905D2, S905X2, S905Y2)
 - reg : Specifies base physical address and size of the registers.
 - interrupt-controller : Identifies the node as an interrupt controller.
 - #interrupt-cells : Specifies the number of cells needed to encode an
-- 
cgit 


From dc96f45074a55f307d9618ebd444991fc643836c Mon Sep 17 00:00:00 2001
From: Guo Ren <ren_guo@c-sky.com>
Date: Thu, 6 Jun 2019 15:37:32 +0800
Subject: dt-bindings: interrupt-controller: Update csky mpintc

Add trigger type setting for csky,mpintc. The driver also could
support #interrupt-cells <1> and it wouldn't invalidate existing
DTs. Here we only show the complete format.

Signed-off-by: Guo Ren <ren_guo@c-sky.com>
Reviewed-by: Rob Herring <robh+dt@kernel.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 .../bindings/interrupt-controller/csky,mpintc.txt    | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt b/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt
index ab921f1698fb..e13405355166 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt
@@ -6,11 +6,16 @@ C-SKY Multi-processors Interrupt Controller is designed for ck807/ck810/ck860
 SMP soc, and it also could be used in non-SMP system.
 
 Interrupt number definition:
-
   0-15  : software irq, and we use 15 as our IPI_IRQ.
  16-31  : private  irq, and we use 16 as the co-processor timer.
  31-1024: common irq for soc ip.
 
+Interrupt triger mode: (Defined in dt-bindings/interrupt-controller/irq.h)
+ IRQ_TYPE_LEVEL_HIGH (default)
+ IRQ_TYPE_LEVEL_LOW
+ IRQ_TYPE_EDGE_RISING
+ IRQ_TYPE_EDGE_FALLING
+
 =============================
 intc node bindings definition
 =============================
@@ -26,15 +31,22 @@ intc node bindings definition
 	- #interrupt-cells
 		Usage: required
 		Value type: <u32>
-		Definition: must be <1>
+		Definition: <2>
 	- interrupt-controller:
 		Usage: required
 
-Examples:
+Examples: ("interrupts = <irq_num IRQ_TYPE_XXX>")
 ---------
+#include <dt-bindings/interrupt-controller/irq.h>
 
 	intc: interrupt-controller {
 		compatible = "csky,mpintc";
-		#interrupt-cells = <1>;
+		#interrupt-cells = <2>;
 		interrupt-controller;
 	};
+
+	device: device-example {
+		...
+		interrupts = <34 IRQ_TYPE_EDGE_RISING>;
+		interrupt-parent = <&intc>;
+	};
-- 
cgit 


From 73c699ffe53868d9fc1bd50445ad2c5041f379e3 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Sat, 25 May 2019 15:41:38 +0200
Subject: dt-bindings: vendor: Add a bunch of vendors

Add all the missing vendors used in Allwinner DTS.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/vendor-prefixes.yaml       | 58 ++++++++++++++++++++++
 1 file changed, 58 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 653dee9d7dd0..e68c839e9f51 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -175,6 +175,8 @@ patternProperties:
     description: Common Hardware Reference Platform
   "^chunghwa,.*":
     description: Chunghwa Picture Tubes Ltd.
+  "^chuwi,.*":
+    description: Chuwi Innovation Ltd.
   "^ciaa,.*":
     description: Computadora Industrial Abierta Argentina
   "^cirrus,.*":
@@ -185,8 +187,12 @@ patternProperties:
     description: Chips&Media, Inc.
   "^cnxt,.*":
     description: Conexant Systems, Inc.
+  "^colorfly,.*":
+    description: Colorful GRP, Shenzhen Xueyushi Technology Ltd.
   "^compulab,.*":
     description: CompuLab Ltd.
+  "^corpro,.*":
+    description: Chengdu Corpro Technology Co., Ltd.
   "^cortina,.*":
     description: Cortina Systems, Inc.
   "^cosmic,.*":
@@ -199,6 +205,8 @@ patternProperties:
     description: Crystalfontz America, Inc.
   "^csky,.*":
     description: Hangzhou C-SKY Microsystems Co., Ltd
+  "^csq,.*":
+    description: Shenzen Chuangsiqi Technology Co.,Ltd.
   "^cubietech,.*":
     description: Cubietech, Ltd.
   "^cypress,.*":
@@ -219,6 +227,8 @@ patternProperties:
     description: Devantech, Ltd.
   "^dh,.*":
     description: DH electronics GmbH
+  "^difrnce,.*":
+    description: Shenzhen Yagu Electronic Technology Co., Ltd.
   "^digi,.*":
     description: Digi International Inc.
   "^digilent,.*":
@@ -241,6 +251,8 @@ patternProperties:
     description: DPTechnics
   "^dragino,.*":
     description: Dragino Technology Co., Limited
+  "^dserve,.*":
+    description: dServe Technology B.V.
   "^ea,.*":
     description: Embedded Artists AB
   "^ebs-systart,.*":
@@ -263,6 +275,8 @@ patternProperties:
     description: Emlid, Ltd.
   "^emmicro,.*":
     description: EM Microelectronic
+  "^empire-electronix,.*":
+    description: Empire Electronix
   "^emtrion,.*":
     description: emtrion GmbH
   "^endless,.*":
@@ -329,6 +343,8 @@ patternProperties:
     description: GE Fanuc Intelligent Platforms Embedded Systems, Inc.
   "^GEFanuc,.*":
     description: GE Fanuc Intelligent Platforms Embedded Systems, Inc.
+  "^gemei,.*":
+    description: Gemei Digital Technology Co., Ltd.
   "^geniatech,.*":
     description: Geniatech, Inc.
   "^giantec,.*":
@@ -375,10 +391,14 @@ patternProperties:
     description: Honeywell
   "^hp,.*":
     description: Hewlett Packard
+  "^hsg,.*":
+    description: HannStar Display Co.
   "^holtek,.*":
     description: Holtek Semiconductor, Inc.
   "^hwacom,.*":
     description: HwaCom Systems Inc.
+  "^hyundai,.*":
+    description: Hyundai Technology
   "^i2se,.*":
     description: I2SE GmbH
   "^ibm,.*":
@@ -393,6 +413,10 @@ patternProperties:
     description: ILI Technology Corporation (ILITEK)
   "^img,.*":
     description: Imagination Technologies Ltd.
+  "^incircuit,.*":
+    description: In-Circuit GmbH
+  "^inet-tek,.*":
+    description: Shenzhen iNet Mobile Internet Technology Co., Ltd
   "^infineon,.*":
     description: Infineon Technologies
   "^inforce,.*":
@@ -427,6 +451,8 @@ patternProperties:
     description: Japan Display Inc.
   "^jedec,.*":
     description: JEDEC Solid State Technology Association
+  "^jesurun,.*":
+    description: Shenzhen Jesurun Electronics Business Dept.
   "^jianda,.*":
     description: Jiandangjing Technology Co., Ltd.
   "^karo,.*":
@@ -459,6 +485,8 @@ patternProperties:
     description: LaCie
   "^laird,.*":
     description: Laird PLC
+  "^lamobo,.*":
+    description: Ketai Huajie Technology Co., Ltd.
   "^lantiq,.*":
     description: Lantiq Semiconductor
   "^lattice,.*":
@@ -477,6 +505,8 @@ patternProperties:
     description: Lichee Pi
   "^linaro,.*":
     description: Linaro Limited
+  "^linksprite,.*":
+    description: LinkSprite Technologies, Inc.
   "^linksys,.*":
     description: Belkin International, Inc. (Linksys)
   "^linux,.*":
@@ -493,6 +523,8 @@ patternProperties:
     description: Liebherr-Werk Nenzing GmbH
   "^macnica,.*":
     description: Macnica Americas
+  "^mapleboard,.*":
+    description: Mapleboard.org
   "^marvell,.*":
     description: Marvell Technology Group Ltd.
   "^maxbotix,.*":
@@ -533,6 +565,8 @@ patternProperties:
     description: Micron Technology Inc.
   "^mikroe,.*":
     description: MikroElektronika d.o.o.
+  "^miniand,.*":
+    description: Miniand Tech
   "^minix,.*":
     description: MINIX Technology Ltd.
   "^miramems,.*":
@@ -663,6 +697,8 @@ patternProperties:
     description: Picochip Ltd
   "^pine64,.*":
     description: Pine64
+  "^pineriver,.*":
+    description: Shenzhen PineRiver Designs Co., Ltd.
   "^pixcir,.*":
     description: PIXCIR MICROELECTRONICS Co., Ltd
   "^plantower,.*":
@@ -675,12 +711,18 @@ patternProperties:
     description: Broadcom Corporation (formerly PLX Technology)
   "^pni,.*":
     description: PNI Sensor Corporation
+  "^polaroid,.*":
+    description: Polaroid Corporation
   "^portwell,.*":
     description: Portwell Inc.
   "^poslab,.*":
     description: Poslab Technology Co., Ltd.
+  "^pov,.*":
+    description: Point of View International B.V.
   "^powervr,.*":
     description: PowerVR (deprecated, use img)
+  "^primux,.*":
+    description: Primux Trading, S.L.
   "^probox2,.*":
     description: PROBOX2 (by W2COMP Co., Ltd.)
   "^pulsedlight,.*":
@@ -693,6 +735,8 @@ patternProperties:
     description: QEMU, a generic and open source machine emulator and virtualizer
   "^qi,.*":
     description: Qi Hardware
+  "^qihua,.*":
+    description: Chengdu Kaixuan Information Technology Co., Ltd.
   "^qiaodian,.*":
     description: QiaoDian XianShi Corporation
   "^qnap,.*":
@@ -715,6 +759,8 @@ patternProperties:
     description: Realtek Semiconductor Corp.
   "^renesas,.*":
     description: Renesas Electronics Corporation
+  "^rervision,.*":
+    description: Shenzhen Rervision Technology Co., Ltd.
   "^richtek,.*":
     description: Richtek Technology Corporation
   "^ricoh,.*":
@@ -785,6 +831,10 @@ patternProperties:
     description: Silicon Mitus, Inc.
   "^simtek,.*":
     description: Cypress Semiconductor Corporation (Simtek Corporation)
+  "^sinlinx,.*":
+    description: Sinlinx Electronics Technology Co., LTD
+  "^sinovoip,.*":
+    description: SinoVoip Co., Ltd
   "^sirf,.*":
     description: SiRF Technology, Inc.
   "^sis,.*":
@@ -903,6 +953,8 @@ patternProperties:
     description: United Radiant Technology Corporation
   "^usi,.*":
     description: Universal Scientific Industrial Co., Ltd.
+  "^utoo,.*":
+    description: Aigo Digital Technology Co., Ltd.
   "^v3,.*":
     description: V3 Semiconductor
   "^vamrs,.*":
@@ -939,10 +991,14 @@ patternProperties:
     description: Winbond Electronics corp.
   "^winstar,.*":
     description: Winstar Display Corp.
+  "^wits,.*":
+    description: Shenzhen Merrii Technology Co., Ltd. (WITS)
   "^wlf,.*":
     description: Wolfson Microelectronics
   "^wm,.*":
     description: Wondermedia Technologies, Inc.
+  "^wobo,.*":
+    description: Wobo
   "^x-powers,.*":
     description: X-Powers
   "^xes,.*":
@@ -953,6 +1009,8 @@ patternProperties:
     description: Xilinx
   "^xunlong,.*":
     description: Shenzhen Xunlong Software CO.,Limited
+  "^yones-toptech,.*":
+    description: Yones Toptech Co., Ltd.
   "^ysoft,.*":
     description: Y Soft Corporation a.s.
   "^zarlink,.*":
-- 
cgit 


From a1ccca0e84243c8aa39f4700cecc200b36c6b50f Mon Sep 17 00:00:00 2001
From: Maxime Jourdan <mjourdan@baylibre.com>
Date: Thu, 6 Jun 2019 12:05:10 -0400
Subject: media: dt-bindings: media: add Amlogic Video Decoder Bindings

Add documentation for the meson vdec dts node.

Signed-off-by: Maxime Jourdan <mjourdan@baylibre.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../devicetree/bindings/media/amlogic,vdec.txt     | 71 ++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/media/amlogic,vdec.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/media/amlogic,vdec.txt b/Documentation/devicetree/bindings/media/amlogic,vdec.txt
new file mode 100644
index 000000000000..aabdd01bcf32
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/amlogic,vdec.txt
@@ -0,0 +1,71 @@
+Amlogic Video Decoder
+================================
+
+The video decoding IP lies within the DOS memory region,
+except for the hardware bitstream parser that makes use of an undocumented
+region.
+
+It makes use of the following blocks:
+
+- ESPARSER is a bitstream parser that outputs to a VIFIFO. Further VDEC blocks
+then feed from this VIFIFO.
+- VDEC_1 can decode MPEG-1, MPEG-2, MPEG-4 part 2, MJPEG, H.263, H.264, VC-1.
+- VDEC_HEVC can decode HEVC and VP9.
+
+Both VDEC_1 and VDEC_HEVC share the "vdec" IRQ and as such cannot run
+concurrently.
+
+Device Tree Bindings:
+---------------------
+
+VDEC: Video Decoder
+--------------------------
+
+Required properties:
+- compatible: value should be different for each SoC family as :
+	- GXBB (S905) : "amlogic,gxbb-vdec"
+	- GXL (S905X, S905D) : "amlogic,gxl-vdec"
+	- GXM (S912) : "amlogic,gxm-vdec"
+- reg: base address and size of he following memory-mapped regions :
+	- dos
+	- esparser
+- reg-names: should contain the names of the previous memory regions
+- interrupts: should contain the following IRQs:
+	- vdec
+	- esparser
+- interrupt-names: should contain the names of the previous interrupts
+- amlogic,ao-sysctrl: should point to the AOBUS sysctrl node
+- amlogic,canvas: should point to a canvas provider node
+- clocks: should contain the following clocks :
+	- dos_parser
+	- dos
+	- vdec_1
+	- vdec_hevc
+- clock-names: should contain the names of the previous clocks
+- resets: should contain the parser reset
+- reset-names: should be "esparser"
+
+Example:
+
+vdec: video-decoder@c8820000 {
+	compatible = "amlogic,gxbb-vdec";
+	reg = <0x0 0xc8820000 0x0 0x10000>,
+	      <0x0 0xc110a580 0x0 0xe4>;
+	reg-names = "dos", "esparser";
+
+	interrupts = <GIC_SPI 44 IRQ_TYPE_EDGE_RISING>,
+		     <GIC_SPI 32 IRQ_TYPE_EDGE_RISING>;
+	interrupt-names = "vdec", "esparser";
+
+	amlogic,ao-sysctrl = <&sysctrl_AO>;
+	amlogic,canvas = <&canvas>;
+
+	clocks = <&clkc CLKID_DOS_PARSER>,
+		 <&clkc CLKID_DOS>,
+		 <&clkc CLKID_VDEC_1>,
+		 <&clkc CLKID_VDEC_HEVC>;
+	clock-names = "dos_parser", "dos", "vdec_1", "vdec_hevc";
+
+	resets = <&reset RESET_PARSER>;
+	reset-names = "esparser";
+};
-- 
cgit 


From f953d33ba1225d68cf8790b4706d8c4410b15926 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 10 Jun 2019 21:40:02 -0700
Subject: net/tls: add kernel-driven TLS RX resync

TLS offload device may lose sync with the TCP stream if packets
arrive out of order.  Drivers can currently request a resync at
a specific TCP sequence number.  When a record is found starting
at that sequence number kernel will inform the device of the
corresponding record number.

This requires the device to constantly scan the stream for a
known pattern (constant bytes of the header) after sync is lost.

This patch adds an alternative approach which is entirely under
the control of the kernel.  Kernel tracks records it had to fully
decrypt, even though TLS socket is in TLS_HW mode.  If multiple
records did not have any decrypted parts - it's a pretty strong
indication that the device is out of sync.

We choose the min number of fully encrypted records to be 2,
which should hopefully be more than will get retransmitted at
a time.

After kernel decides the device is out of sync it schedules a
resync request.  If the TCP socket is empty the resync gets
performed immediately.  If socket is not empty we leave the
record parser to resync when next record comes.

Before resync in message parser we peek at the TCP socket and
don't attempt the sync if the socket already has some of the
next record queued.

On resync failure (encrypted data continues to flow in) we
retry with exponential backoff, up to once every 128 records
(with a 16k record thats at most once every 2M of data).

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/tls-offload.rst | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst
index eb7c9b81ccf5..d134d63307e7 100644
--- a/Documentation/networking/tls-offload.rst
+++ b/Documentation/networking/tls-offload.rst
@@ -268,6 +268,9 @@ Device can only detect that segment 4 also contains a TLS header
 if it knows the length of the previous record from segment 2. In this case
 the device will lose synchronization with the stream.
 
+Stream scan resynchronization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 When the device gets out of sync and the stream reaches TCP sequence
 numbers more than a max size record past the expected TCP sequence number,
 the device starts scanning for a known header pattern. For example
@@ -298,6 +301,22 @@ Special care has to be taken if the confirmation request is passed
 asynchronously to the packet stream and record may get processed
 by the kernel before the confirmation request.
 
+Stack-driven resynchronization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The driver may also request the stack to perform resynchronization
+whenever it sees the records are no longer getting decrypted.
+If the connection is configured in this mode the stack automatically
+schedules resynchronization after it has received two completely encrypted
+records.
+
+The stack waits for the socket to drain and informs the device about
+the next expected record number and its TCP sequence number. If the
+records continue to be received fully encrypted stack retries the
+synchronization with an exponential back off (first after 2 encrypted
+records, then after 4 records, after 8, after 16... up until every
+128 records).
+
 Error handling
 ==============
 
-- 
cgit 


From 50180074099fcda752d9d56282d23242b126ebc9 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 10 Jun 2019 21:40:09 -0700
Subject: net/tls: add kernel-driven resync mechanism for TX

TLS offload drivers keep track of TCP seq numbers to make sure
the packets are fed into the HW in order.

When packets get dropped on the way through the stack, the driver
will get out of sync and have to use fallback encryption, but unless
TCP seq number is resynced it will never match the packets correctly
(or even worse - use incorrect record sequence number after TCP seq
wraps).

Existing drivers (mlx5) feed the entire record on every out-of-order
event, allowing FW/HW to always be in sync.

This patch adds an alternative, more akin to the RX resync.  When
driver sees a frame which is past its expected sequence number the
stream must have gotten out of order (if the sequence number is
smaller than expected its likely a retransmission which doesn't
require resync).  Driver will ask the stack to perform TX sync
before it submits the next full record, and fall back to software
crypto until stack has performed the sync.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/tls-offload.rst | 35 +++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst
index d134d63307e7..048e5ca44824 100644
--- a/Documentation/networking/tls-offload.rst
+++ b/Documentation/networking/tls-offload.rst
@@ -206,7 +206,11 @@ TX
 
 Segments transmitted from an offloaded socket can get out of sync
 in similar ways to the receive side-retransmissions - local drops
-are possible, though network reorders are not.
+are possible, though network reorders are not. There are currently
+two mechanisms for dealing with out of order segments.
+
+Crypto state rebuilding
+~~~~~~~~~~~~~~~~~~~~~~~
 
 Whenever an out of order segment is transmitted the driver provides
 the device with enough information to perform cryptographic operations.
@@ -225,6 +229,35 @@ was just a retransmission. The former is simpler, and does not require
 retransmission detection therefore it is the recommended method until
 such time it is proven inefficient.
 
+Next record sync
+~~~~~~~~~~~~~~~~
+
+Whenever an out of order segment is detected the driver requests
+that the ``ktls`` software fallback code encrypt it. If the segment's
+sequence number is lower than expected the driver assumes retransmission
+and doesn't change device state. If the segment is in the future, it
+may imply a local drop, the driver asks the stack to sync the device
+to the next record state and falls back to software.
+
+Resync request is indicated with:
+
+.. code-block:: c
+
+  void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq)
+
+Until resync is complete driver should not access its expected TCP
+sequence number (as it will be updated from a different context).
+Following helper should be used to test if resync is complete:
+
+.. code-block:: c
+
+  bool tls_offload_tx_resync_pending(struct sock *sk)
+
+Next time ``ktls`` pushes a record it will first send its TCP sequence number
+and TLS record number to the driver. Stack will also make sure that
+the new record will start on a segment boundary (like it does when
+the connection is initially added).
+
 RX
 --
 
-- 
cgit 


From b365c124f1ef921125e6c355b5d16a7d165138c4 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 3 Jun 2019 11:10:06 +0200
Subject: dt-bindings: arm: amlogic: add G12B bindings

Add compatible for the Amlogic G12B SoC, sharing most of the
features and architecture with the G12A SoC.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 Documentation/devicetree/bindings/arm/amlogic.yaml | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/amlogic.yaml b/Documentation/devicetree/bindings/arm/amlogic.yaml
index a0dd12be4de4..8434be88d599 100644
--- a/Documentation/devicetree/bindings/arm/amlogic.yaml
+++ b/Documentation/devicetree/bindings/arm/amlogic.yaml
@@ -135,4 +135,8 @@ properties:
               - amlogic,u200
           - const: amlogic,g12a
 
+      - description: Boards with the Amlogic Meson G12B S922X SoC
+        items:
+          - const: amlogic,g12b
+
 ...
-- 
cgit 


From 3113784003dd7ca287fe082ca87969d4de721907 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 3 Jun 2019 11:10:07 +0200
Subject: dt-bindings: arm: amlogic: add Odroid-N2 binding

Add compatible for the Amlogic G12B (S922X) SoC based Odroid-N2 SBC
from HardKernel.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 Documentation/devicetree/bindings/arm/amlogic.yaml | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/amlogic.yaml b/Documentation/devicetree/bindings/arm/amlogic.yaml
index 8434be88d599..325c6fd3566d 100644
--- a/Documentation/devicetree/bindings/arm/amlogic.yaml
+++ b/Documentation/devicetree/bindings/arm/amlogic.yaml
@@ -137,6 +137,8 @@ properties:
 
       - description: Boards with the Amlogic Meson G12B S922X SoC
         items:
+          - enum:
+              - hardkernel,odroid-n2
           - const: amlogic,g12b
 
 ...
-- 
cgit 


From 8ad2b4b371bcfe7069ee28ad18d722e1240a0a97 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 11 Jun 2019 21:45:33 -0700
Subject: dt-bindings: soc: qcom: Add AOSS QMP binding

Add binding for the QMP based side-channel communication mechanism to
the AOSS, which is used to control resources not exposed through the
RPMh interface.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 .../devicetree/bindings/soc/qcom/qcom,aoss-qmp.txt | 81 ++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.txt b/Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.txt
new file mode 100644
index 000000000000..954ffee0a9c4
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.txt
@@ -0,0 +1,81 @@
+Qualcomm Always-On Subsystem side channel binding
+
+This binding describes the hardware component responsible for side channel
+requests to the always-on subsystem (AOSS), used for certain power management
+requests that is not handled by the standard RPMh interface. Each client in the
+SoC has it's own block of message RAM and IRQ for communication with the AOSS.
+The protocol used to communicate in the message RAM is known as Qualcomm
+Messaging Protocol (QMP)
+
+The AOSS side channel exposes control over a set of resources, used to control
+a set of debug related clocks and to affect the low power state of resources
+related to the secondary subsystems. These resources are exposed as a set of
+power-domains.
+
+- compatible:
+	Usage: required
+	Value type: <string>
+	Definition: must be "qcom,sdm845-aoss-qmp"
+
+- reg:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: the base address and size of the message RAM for this
+		    client's communication with the AOSS
+
+- interrupts:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: should specify the AOSS message IRQ for this client
+
+- mboxes:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: reference to the mailbox representing the outgoing doorbell
+		    in APCS for this client, as described in mailbox/mailbox.txt
+
+- #clock-cells:
+	Usage: optional
+	Value type: <u32>
+	Definition: must be 0
+		    The single clock represents the QDSS clock.
+
+- #power-domain-cells:
+	Usage: optional
+	Value type: <u32>
+	Definition: must be 1
+		    The provided power-domains are:
+		    CDSP state (0), LPASS state (1), modem state (2), SLPI
+		    state (3), SPSS state (4) and Venus state (5).
+
+= SUBNODES
+The AOSS side channel also provides the controls for three cooling devices,
+these are expressed as subnodes of the QMP node. The name of the node is used
+to identify the resource and must therefor be "cx", "mx" or "ebi".
+
+- #cooling-cells:
+	Usage: optional
+	Value type: <u32>
+	Definition: must be 2
+
+= EXAMPLE
+
+The following example represents the AOSS side-channel message RAM and the
+mechanism exposing the power-domains, as found in SDM845.
+
+  aoss_qmp: qmp@c300000 {
+	  compatible = "qcom,sdm845-aoss-qmp";
+	  reg = <0x0c300000 0x100000>;
+	  interrupts = <GIC_SPI 389 IRQ_TYPE_EDGE_RISING>;
+	  mboxes = <&apss_shared 0>;
+
+	  #power-domain-cells = <1>;
+
+	  cx_cdev: cx {
+		#cooling-cells = <2>;
+	  };
+
+	  mx_cdev: mx {
+		#cooling-cells = <2>;
+	  };
+  };
-- 
cgit 


From adfd373820906d376c8b643f1a279ac809605b6b Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Mon, 3 Jun 2019 08:53:35 +0200
Subject: iommu: Introduce IOMMU_RESV_DIRECT_RELAXABLE reserved memory regions

Introduce a new type for reserved region. This corresponds
to directly mapped regions which are known to be relaxable
in some specific conditions, such as device assignment use
case. Well known examples are those used by USB controllers
providing PS/2 keyboard emulation for pre-boot BIOS and
early BOOT or RMRRs associated to IGD working in legacy mode.

Since commit c875d2c1b808 ("iommu/vt-d: Exclude devices using RMRRs
from IOMMU API domains") and commit 18436afdc11a ("iommu/vt-d: Allow
RMRR on graphics devices too"), those regions are currently
considered "safe" with respect to device assignment use case
which requires a non direct mapping at IOMMU physical level
(RAM GPA -> HPA mapping).

Those RMRRs currently exist and sometimes the device is
attempting to access it but this has not been considered
an issue until now.

However at the moment, iommu_get_group_resv_regions() is
not able to make any difference between directly mapped
regions: those which must be absolutely enforced and those
like above ones which are known as relaxable.

This is a blocker for reporting severe conflicts between
non relaxable RMRRs (like MSI doorbells) and guest GPA space.

With this new reserved region type we will be able to use
iommu_get_group_resv_regions() to enumerate the IOVA space
that is usable through the IOMMU API without introducing
regressions with respect to existing device assignment
use cases (USB and IGD).

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 Documentation/ABI/testing/sysfs-kernel-iommu_groups | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
index 35c64e00b35c..017f5bc3920c 100644
--- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups
+++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
@@ -24,3 +24,12 @@ Description:    /sys/kernel/iommu_groups/reserved_regions list IOVA
 		region is described on a single line: the 1st field is
 		the base IOVA, the second is the end IOVA and the third
 		field describes the type of the region.
+
+What:		/sys/kernel/iommu_groups/reserved_regions
+Date: 		June 2019
+KernelVersion:  v5.3
+Contact: 	Eric Auger <eric.auger@redhat.com>
+Description:    In case an RMRR is used only by graphics or USB devices
+		it is now exposed as "direct-relaxable" instead of "direct".
+		In device assignment use case, for instance, those RMRR
+		are considered to be relaxable and safe.
-- 
cgit 


From 711486fd18596315d42cebaac3dba8c408f60a3d Mon Sep 17 00:00:00 2001
From: Aubrey Li <aubrey.li@linux.intel.com>
Date: Thu, 6 Jun 2019 09:22:36 +0800
Subject: Documentation/filesystems/proc.txt: Add arch_status file

Add documentation for /proc/<pid>/arch_status file and the x86 specific
AVX512_elapsed_ms entry in it.

[ tglx: Massage changelog ]

Signed-off-by: Aubrey Li <aubrey.li@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: peterz@infradead.org
Cc: hpa@zytor.com
Cc: ak@linux.intel.com
Cc: tim.c.chen@linux.intel.com
Cc: dave.hansen@intel.com
Cc: arjan@linux.intel.com
Cc: adobriyan@gmail.com
Cc: aubrey.li@intel.com
Cc: linux-api@vger.kernel.org
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linux API <linux-api@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190606012236.9391-3-aubrey.li@linux.intel.com
---
 Documentation/filesystems/proc.txt | 40 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 66cad5c86171..a226061fa109 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -45,6 +45,7 @@ Table of Contents
   3.9   /proc/<pid>/map_files - Information about memory mapped files
   3.10  /proc/<pid>/timerslack_ns - Task timerslack value
   3.11	/proc/<pid>/patch_state - Livepatch patch operation state
+  3.12	/proc/<pid>/arch_status - Task architecture specific information
 
   4	Configuring procfs
   4.1	Mount options
@@ -1948,6 +1949,45 @@ patched.  If the patch is being enabled, then the task has already been
 patched.  If the patch is being disabled, then the task hasn't been
 unpatched yet.
 
+3.12 /proc/<pid>/arch_status - task architecture specific status
+-------------------------------------------------------------------
+When CONFIG_PROC_PID_ARCH_STATUS is enabled, this file displays the
+architecture specific status of the task.
+
+Example
+-------
+ $ cat /proc/6753/arch_status
+ AVX512_elapsed_ms:      8
+
+Description
+-----------
+
+x86 specific entries:
+---------------------
+ AVX512_elapsed_ms:
+ ------------------
+  If AVX512 is supported on the machine, this entry shows the milliseconds
+  elapsed since the last time AVX512 usage was recorded. The recording
+  happens on a best effort basis when a task is scheduled out. This means
+  that the value depends on two factors:
+
+    1) The time which the task spent on the CPU without being scheduled
+       out. With CPU isolation and a single runnable task this can take
+       several seconds.
+
+    2) The time since the task was scheduled out last. Depending on the
+       reason for being scheduled out (time slice exhausted, syscall ...)
+       this can be arbitrary long time.
+
+  As a consequence the value cannot be considered precise and authoritative
+  information. The application which uses this information has to be aware
+  of the overall scenario on the system in order to determine whether a
+  task is a real AVX512 user or not. Precise information can be obtained
+  with performance counters.
+
+  A special value of '-1' indicates that no AVX512 usage was recorded, thus
+  the task is unlikely an AVX512 user, but depends on the workload and the
+  scheduling scenario, it also could be a false negative mentioned above.
 
 ------------------------------------------------------------------------------
 Configuring procfs
-- 
cgit 


From f40043b368ae787837ef659728f0b08bd34f5317 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 25 Apr 2019 17:34:42 +0200
Subject: dt-bindings: phy: tegra-xusb: List PLL power supplies

These power supplies provide power for various PLLs that are set up and
driven by the XUSB pad controller. These power supplies were previously
improperly added to the PCIe and XUSB controllers, but depending on the
driver probe order, power to the PLLs will not be supplied soon enough
and cause initialization to fail.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 .../devicetree/bindings/phy/nvidia,tegra124-xusb-padctl.txt  | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/phy/nvidia,tegra124-xusb-padctl.txt b/Documentation/devicetree/bindings/phy/nvidia,tegra124-xusb-padctl.txt
index daedb15f322e..9fb682e47c29 100644
--- a/Documentation/devicetree/bindings/phy/nvidia,tegra124-xusb-padctl.txt
+++ b/Documentation/devicetree/bindings/phy/nvidia,tegra124-xusb-padctl.txt
@@ -42,6 +42,18 @@ Required properties:
 - reset-names: Must include the following entries:
   - "padctl"
 
+For Tegra124:
+- avdd-pll-utmip-supply: UTMI PLL power supply. Must supply 1.8 V.
+- avdd-pll-erefe-supply: PLLE reference PLL power supply. Must supply 1.05 V.
+- avdd-pex-pll-supply: PCIe/USB3 PLL power supply. Must supply 1.05 V.
+- hvdd-pex-pll-e-supply: High-voltage PLLE power supply. Must supply 3.3 V.
+
+For Tegra210:
+- avdd-pll-utmip-supply: UTMI PLL power supply. Must supply 1.8 V.
+- avdd-pll-uerefe-supply: PLLE reference PLL power supply. Must supply 1.05 V.
+- dvdd-pex-pll-supply: PCIe/USB3 PLL power supply. Must supply 1.05 V.
+- hvdd-pex-pll-e-supply: High-voltage PLLE power supply. Must supply 1.8 V.
+
 For Tegra186:
 - avdd-pll-erefeut-supply: UPHY brick and reference clock as well as UTMI PHY
   power supply. Must supply 1.8 V.
-- 
cgit 


From 0ab2c44def8f6cc637a5fb3ce9766d69d2c289d0 Mon Sep 17 00:00:00 2001
From: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
Date: Thu, 23 May 2019 11:23:35 +0000
Subject: dt-bindings: arm: fix the document ID for SCMI protocol documentation

arm,scmi.txt used the wrong document identifier. "ARM DUI 0922B" is
the "ARM Compute Subsystem SCP, Message Interface Protocols". What we
need is the ARM DEN 0056A - "ARM System Control and Management
Interface Platform Design Document".

Fixes: fe7be8b297b2 ("dt-bindings: arm: add support for ARM System Control and Management Interface(SCMI) protocol")
Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 Documentation/devicetree/bindings/arm/arm,scmi.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/arm,scmi.txt b/Documentation/devicetree/bindings/arm/arm,scmi.txt
index 5f3719ab7075..317a2fc3667a 100644
--- a/Documentation/devicetree/bindings/arm/arm,scmi.txt
+++ b/Documentation/devicetree/bindings/arm/arm,scmi.txt
@@ -6,7 +6,7 @@ that are provided by the hardware platform it is running on, including power
 and performance functions.
 
 This binding is intended to define the interface the firmware implementing
-the SCMI as described in ARM document number ARM DUI 0922B ("ARM System Control
+the SCMI as described in ARM document number ARM DEN 0056A ("ARM System Control
 and Management Interface Platform Design Document")[0] provide for OSPM in
 the device tree.
 
-- 
cgit 


From 1d90dff62e16fee5bdc761c3230cc8a4c79814fc Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Tue, 11 Jun 2019 22:09:30 +0800
Subject: dt-bindings: pinctrl: add missing compatible string for V3s

The pinctrl driver of V3s is already available and used in the kernel,
but the compatible string of it is forgotten to be added.

Add the missing compatible string.

Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Acked-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
index cf96b7c20e4d..baba55db864c 100644
--- a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
@@ -24,6 +24,7 @@ Required properties:
   "allwinner,sun8i-h3-pinctrl"
   "allwinner,sun8i-h3-r-pinctrl"
   "allwinner,sun8i-r40-pinctrl"
+  "allwinner,sun8i-v3s-pinctrl"
   "allwinner,sun50i-a64-pinctrl"
   "allwinner,sun50i-a64-r-pinctrl"
   "allwinner,sun50i-h5-pinctrl"
-- 
cgit 


From 2e80e10f2d71f42c6c8f82fc173b21be856acc0f Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Tue, 11 Jun 2019 22:09:31 +0800
Subject: dt-bindings: pinctrl: add compatible string for Allwinner V3 pinctrl

The Allwinner V3 SoC, despite come with the same die with V3s, has more
GPIO pins than V3s, and a different compatible string for pinctrl is
needed.

Add the compatible string for V3 pinctrl.

Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
index baba55db864c..328585c6da58 100644
--- a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
@@ -24,6 +24,7 @@ Required properties:
   "allwinner,sun8i-h3-pinctrl"
   "allwinner,sun8i-h3-r-pinctrl"
   "allwinner,sun8i-r40-pinctrl"
+  "allwinner,sun8i-v3-pinctrl"
   "allwinner,sun8i-v3s-pinctrl"
   "allwinner,sun50i-a64-pinctrl"
   "allwinner,sun50i-a64-r-pinctrl"
-- 
cgit 


From 6a80b30086b861b2591ba2a953042abd08c498e3 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 10 Jun 2019 16:04:39 +0200
Subject: fmc: Delete the FMC subsystem

The FMC subsystem was created in 2012 with the ambition to
drive development of drivers for this hardware upstream.

The current implementation has architectural flaws and would
need to be revamped using real hardware to something that can
reuse existing kernel abstractions in the subsystems for e.g.
I2C, FPGA and GPIO.

We have concluded that for the mainline kernel it will be
better to delete the subsystem and start over with a clean
slate when/if an active maintainer steps up.

For details see:
https://lkml.org/lkml/2018/10/29/534

Suggested-by: Federico Vaga <federico.vaga@cern.ch>
Cc: Pat Riehecky <riehecky@fnal.gov>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Federico Vaga <federico.vaga@cern.ch>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/fmc/API.txt              |  47 -----
 Documentation/fmc/FMC-and-SDB.txt      |  88 ----------
 Documentation/fmc/carrier.txt          | 311 ---------------------------------
 Documentation/fmc/fmc-chardev.txt      |  64 -------
 Documentation/fmc/fmc-fakedev.txt      |  36 ----
 Documentation/fmc/fmc-trivial.txt      |  17 --
 Documentation/fmc/fmc-write-eeprom.txt |  98 -----------
 Documentation/fmc/identifiers.txt      | 168 ------------------
 Documentation/fmc/mezzanine.txt        | 123 -------------
 Documentation/fmc/parameters.txt       |  56 ------
 10 files changed, 1008 deletions(-)
 delete mode 100644 Documentation/fmc/API.txt
 delete mode 100644 Documentation/fmc/FMC-and-SDB.txt
 delete mode 100644 Documentation/fmc/carrier.txt
 delete mode 100644 Documentation/fmc/fmc-chardev.txt
 delete mode 100644 Documentation/fmc/fmc-fakedev.txt
 delete mode 100644 Documentation/fmc/fmc-trivial.txt
 delete mode 100644 Documentation/fmc/fmc-write-eeprom.txt
 delete mode 100644 Documentation/fmc/identifiers.txt
 delete mode 100644 Documentation/fmc/mezzanine.txt
 delete mode 100644 Documentation/fmc/parameters.txt

(limited to 'Documentation')

diff --git a/Documentation/fmc/API.txt b/Documentation/fmc/API.txt
deleted file mode 100644
index 06b06b92c794..000000000000
--- a/Documentation/fmc/API.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-Functions Exported by fmc.ko
-****************************
-
-The FMC core exports the usual 4 functions that are needed for a bus to
-work, and a few more:
-
-        int fmc_driver_register(struct fmc_driver *drv);
-        void fmc_driver_unregister(struct fmc_driver *drv);
-        int fmc_device_register(struct fmc_device *fmc);
-        void fmc_device_unregister(struct fmc_device *fmc);
-
-        int fmc_device_register_n(struct fmc_device **fmc, int n);
-        void fmc_device_unregister_n(struct fmc_device **fmc, int n);
-
-        uint32_t fmc_readl(struct fmc_device *fmc, int offset);
-        void fmc_writel(struct fmc_device *fmc, uint32_t val, int off);
-        void *fmc_get_drvdata(struct fmc_device *fmc);
-        void fmc_set_drvdata(struct fmc_device *fmc, void *data);
-
-        int fmc_reprogram(struct fmc_device *f, struct fmc_driver *d, char *gw,
-                          int sdb_entry);
-
-The data structure that describe a device is detailed in *note FMC
-Device::, the one that describes a driver is detailed in *note FMC
-Driver::.  Please note that structures of type fmc_device must be
-allocated by the caller, but must not be released after unregistering.
-The fmc-bus itself takes care of releasing the structure when their use
-count reaches zero - actually, the device model does that in lieu of us.
-
-The functions to register and unregister n devices are meant to be used
-by carriers that host more than one mezzanine. The devices must all be
-registered at the same time because if the FPGA is reprogrammed, all
-devices in the array are affected. Usually, the driver matching the
-first device will reprogram the FPGA, so other devices must know they
-are already driven by a reprogrammed FPGA.
-
-If a carrier hosts slots that are driven by different FPGA devices, it
-should register as a group only mezzanines that are driven by the same
-FPGA, for the reason outlined above.
-
-Finally, the fmc_reprogram function calls the reprogram method (see
-*note The API Offered by Carriers:: and also scans the memory area for
-an SDB tree. You can pass -1 as sdb_entry to disable such scan.
-Otherwise, the function fails if no tree is found at the specified
-entry point.  The function is meant to factorize common code, and by
-the time you read this it is already used by the spec-sw and fine-delay
-modules.
diff --git a/Documentation/fmc/FMC-and-SDB.txt b/Documentation/fmc/FMC-and-SDB.txt
deleted file mode 100644
index fa14e0b24521..000000000000
--- a/Documentation/fmc/FMC-and-SDB.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-
-FMC (FPGA Mezzanine Card) is the standard we use for our I/O devices,
-in the context of White Rabbit and related hardware.
-
-In our I/O environments we need to write drivers for each mezzanine
-card, and such drivers must work regardless of the carrier being used.
-To achieve this, we abstract the FMC interface.
-
-We have a carrier for PCI-E called SPEC and one for VME called SVEC,
-but more are planned.  Also, we support stand-alone devices (usually
-plugged on a SPEC card), controlled through Etherbone, developed by GSI.
-
-Code and documentation for the FMC bus was born as part of the spec-sw
-project, but now it lives in its own project. Other projects, i.e.
-software support for the various carriers, should include this as a
-submodule.
-
-The most up to date version of code and documentation is always
-available from the repository you can clone from:
-
-        git://ohwr.org/fmc-projects/fmc-bus.git (read-only)
-        git@ohwr.org:fmc-projects/fmc-bus.git (read-write for developers)
-
-Selected versions of the documentation, as well as complete tar
-archives for selected revisions are placed to the Files section of the
-project: `http://www.ohwr.org/projects/fmc-bus/files'
-
-
-What is FMC
-***********
-
-FMC, as said, stands for "FPGA Mezzanine Card". It is a standard
-developed by the VME consortium called VITA (VMEbus International Trade
-Association and ratified by ANSI, the American National Standard
-Institute.  The official documentation is called "ANSI-VITA 57.1".
-
-The FMC card is an almost square PCB, around 70x75 millimeters, that is
-called mezzanine in this document.  It usually lives plugged into
-another PCB for power supply and control; such bigger circuit board is
-called carrier from now on, and a single carrier may host more than one
-mezzanine.
-
-In the typical application the mezzanine is mostly analog while the
-carrier is mostly digital, and hosts an FPGA that must be configured to
-match the specific mezzanine and the desired application. Thus, you may
-need to load different FPGA images to drive different instances of the
-same mezzanine.
-
-FMC, as such, is not a bus in the usual meaning of the term, because
-most carriers have only one connector, and carriers with several
-connectors have completely separate electrical connections to them.
-This package, however, implements a bus as a software abstraction.
-
-
-What is SDB
-***********
-
-SDB (Self Describing Bus) is a set of data structures that we use for
-enumerating the internal structure of an FPGA image. We also use it as
-a filesystem inside the FMC EEPROM.
-
-SDB is not mandatory for use of this FMC kernel bus, but if you have SDB
-this package can make good use of it.  SDB itself is developed in the
-fpga-config-space OHWR project. The link to the repository is
-`git://ohwr.org/hdl-core-lib/fpga-config-space.git' and what is used in
-this project lives in the sdbfs subdirectory in there.
-
-SDB support for FMC is described in *note FMC Identification:: and
-*note SDB Support::
-
-
-SDB Support
-***********
-
-The fmc.ko bus driver exports a few functions to help drivers taking
-advantage of the SDB information that may be present in your own FPGA
-memory image.
-
-The module exports the following functions, in the special header
-<linux/fmc-sdb.h>. The linux/ prefix in the name is there because we
-plan to submit it upstream in the future, and don't want to force
-changes on our drivers if that happens.
-
-         int fmc_scan_sdb_tree(struct fmc_device *fmc, unsigned long address);
-         void fmc_show_sdb_tree(struct fmc_device *fmc);
-         signed long fmc_find_sdb_device(struct sdb_array *tree, uint64_t vendor,
-                                         uint32_t device, unsigned long *sz);
-         int fmc_free_sdb_tree(struct fmc_device *fmc);
diff --git a/Documentation/fmc/carrier.txt b/Documentation/fmc/carrier.txt
deleted file mode 100644
index 5e4f1dd3e98b..000000000000
--- a/Documentation/fmc/carrier.txt
+++ /dev/null
@@ -1,311 +0,0 @@
-FMC Device
-**********
-
-Within the Linux bus framework, the FMC device is created and
-registered by the carrier driver. For example, the PCI driver for the
-SPEC card fills a data structure for each SPEC that it drives, and
-registers an associated FMC device for each card.  The SVEC driver can
-do exactly the same for the VME carrier (actually, it should do it
-twice, because the SVEC carries two FMC mezzanines).  Similarly, an
-Etherbone driver will be able to register its own FMC devices, offering
-communication primitives through frame exchange.
-
-The contents of the EEPROM within the FMC are used for identification
-purposes, i.e. for matching the device with its own driver. For this
-reason the device structure includes a complete copy of the EEPROM
-(actually, the carrier driver may choose whether or not to return it -
-for example we most likely won't have the whole EEPROM available for
-Etherbone devices.
-
-The following listing shows the current structure defining a device.
-Please note that all the machinery is in place but some details may
-still change in the future.  For this reason, there is a version field
-at the beginning of the structure.  As usual, the minor number will
-change for compatible changes (like a new flag) and the major number
-will increase when an incompatible change happens (for example, a
-change in layout of some fmc data structures).  Device writers should
-just set it to the value FMC_VERSION, and be ready to get back -EINVAL
-at registration time.
-
-     struct fmc_device {
-             unsigned long version;
-             unsigned long flags;
-             struct module *owner;           /* char device must pin it */
-             struct fmc_fru_id id;           /* for EEPROM-based match */
-             struct fmc_operations *op;      /* carrier-provided */
-             int irq;                        /* according to host bus. 0 == none */
-             int eeprom_len;                 /* Usually 8kB, may be less */
-             int eeprom_addr;                /* 0x50, 0x52 etc */
-             uint8_t *eeprom;                /* Full contents or leading part */
-             char *carrier_name;             /* "SPEC" or similar, for special use */
-             void *carrier_data;             /* "struct spec *" or equivalent */
-             __iomem void *fpga_base;        /* May be NULL (Etherbone) */
-             __iomem void *slot_base;        /* Set by the driver */
-             struct fmc_device **devarray;   /* Allocated by the bus */
-             int slot_id;                    /* Index in the slot array */
-             int nr_slots;                   /* Number of slots in this carrier */
-             unsigned long memlen;           /* Used for the char device */
-             struct device dev;              /* For Linux use */
-             struct device *hwdev;           /* The underlying hardware device */
-             unsigned long sdbfs_entry;
-             struct sdb_array *sdb;
-             uint32_t device_id;             /* Filled by the device */
-             char *mezzanine_name;           /* Defaults to ``fmc'' */
-             void *mezzanine_data;
-     };
-
-The meaning of most fields is summarized in the code comment above.
-
-The following fields must be filled by the carrier driver before
-registration:
-
-   * version: must be set to FMC_VERSION.
-
-   * owner: set to MODULE_OWNER.
-
-   * op: the operations to act on the device.
-
-   * irq: number for the mezzanine; may be zero.
-
-   * eeprom_len: length of the following array.
-
-   * eeprom_addr: 0x50 for first mezzanine and so on.
-
-   * eeprom: the full content of the I2C EEPROM.
-
-   * carrier_name.
-
-   * carrier_data: a unique pointer for the carrier.
-
-   * fpga_base: the I/O memory address (may be NULL).
-
-   * slot_id: the index of this slot (starting from zero).
-
-   * memlen: if fpga_base is valid, the length of I/O memory.
-
-   * hwdev: to be used in some dev_err() calls.
-
-   * device_id: a slot-specific unique integer number.
-
-
-Please note that the carrier should read its own EEPROM memory before
-registering the device, as well as fill all other fields listed above.
-
-The following fields should not be assigned, because they are filled
-later by either the bus or the device driver:
-
-   * flags.
-
-   * fru_id: filled by the bus, parsing the eeprom.
-
-   * slot_base: filled and used by the driver, if useful to it.
-
-   * devarray: an array og all mezzanines driven by a singe FPGA.
-
-   * nr_slots: set by the core at registration time.
-
-   * dev: used by Linux.
-
-   * sdb: FPGA contents, scanned according to driver's directions.
-
-   * sdbfs_entry: SDB entry point in EEPROM: autodetected.
-
-   * mezzanine_data: available for the driver.
-
-   * mezzanine_name: filled by fmc-bus during identification.
-
-
-Note: mezzanine_data may be redundant, because Linux offers the drvdata
-approach, so the field may be removed in later versions of this bus
-implementation.
-
-As I write this, she SPEC carrier is already completely functional in
-the fmc-bus environment, and is a good reference to look at.
-
-
-The API Offered by Carriers
-===========================
-
-The carrier provides a number of methods by means of the
-`fmc_operations' structure, which currently is defined like this
-(again, it is a moving target, please refer to the header rather than
-this document):
-
-     struct fmc_operations {
-             uint32_t (*readl)(struct fmc_device *fmc, int offset);
-             void (*writel)(struct fmc_device *fmc, uint32_t value, int offset);
-             int (*reprogram)(struct fmc_device *f, struct fmc_driver *d, char *gw);
-             int (*validate)(struct fmc_device *fmc, struct fmc_driver *drv);
-             int (*irq_request)(struct fmc_device *fmc, irq_handler_t h,
-                                char *name, int flags);
-             void (*irq_ack)(struct fmc_device *fmc);
-             int (*irq_free)(struct fmc_device *fmc);
-             int (*gpio_config)(struct fmc_device *fmc, struct fmc_gpio *gpio,
-                                int ngpio);
-             int (*read_ee)(struct fmc_device *fmc, int pos, void *d, int l);
-             int (*write_ee)(struct fmc_device *fmc, int pos, const void *d, int l);
-     };
-
-The individual methods perform the following tasks:
-
-`readl'
-`writel'
-     These functions access FPGA registers by whatever means the
-     carrier offers. They are not expected to fail, and most of the time
-     they will just make a memory access to the host bus. If the
-     carrier provides a fpga_base pointer, the driver may use direct
-     access through that pointer. For this reason the header offers the
-     inline functions fmc_readl and fmc_writel that access fpga_base if
-     the respective method is NULL. A driver that wants to be portable
-     and efficient should use fmc_readl and fmc_writel.  For Etherbone,
-     or other non-local carriers, error-management is still to be
-     defined.
-
-`validate'
-     Module parameters are used to manage different applications for
-     two or more boards of the same kind. Validation is based on the
-     busid module parameter, if provided, and returns the matching
-     index in the associated array. See *note Module Parameters:: in in
-     doubt. If no match is found, `-ENOENT' is returned; if the user
-     didn't pass `busid=', all devices will pass validation.  The value
-     returned by the validate method can be used as index into other
-     parameters (for example, some drivers use the `lm32=' parameter in
-     this way). Such "generic parameters" are documented in *note
-     Module Parameters::, below. The validate method is used by
-     `fmc-trivial.ko', described in *note fmc-trivial::.
-
-`reprogram'
-     The carrier enumerates FMC devices by loading a standard (or
-     golden) FPGA binary that allows EEPROM access. Each driver, then,
-     will need to reprogram the FPGA by calling this function.  If the
-     name argument is NULL, the carrier should reprogram the golden
-     binary. If the gateware name has been overridden through module
-     parameters (in a carrier-specific way) the file loaded will match
-     the parameters. Per-device gateware names can be specified using
-     the `gateware=' parameter, see *note Module Parameters::.  Note:
-     Clients should call rhe new helper, fmc_reprogram, which both
-     calls this method and parse the SDB tree of the FPGA.
-
-`irq_request'
-`irq_ack'
-`irq_free'
-     Interrupt management is carrier-specific, so it is abstracted as
-     operations. The interrupt number is listed in the device
-     structure, and for the mezzanine driver the number is only
-     informative.  The handler will receive the fmc pointer as dev_id;
-     the flags argument is passed to the Linux request_irq function,
-     but fmc-specific flags may be added in the future. You'll most
-     likely want to pass the `IRQF_SHARED' flag.
-
-`gpio_config'
-     The method allows to configure a GPIO pin in the carrier, and read
-     its current value if it is configured as input. See *note The GPIO
-     Abstraction:: for details.
-
-`read_ee'
-`write_ee'
-     Read or write the EEPROM. The functions are expected to be only
-     called before reprogramming and the carrier should refuse them
-     with `ENODEV' after reprogramming.  The offset is expected to be
-     within 8kB (the current size), but addresses up to 1MB are
-     reserved to fit bigger I2C devices in the future. Carriers may
-     offer access to other internal flash memories using these same
-     methods: for example the SPEC driver may define that its carrier
-     I2C memory is seen at offset 1M and the internal SPI flash is seen
-     at offset 16M.  This multiplexing of several flash memories in the
-     same address space is carrier-specific and should only be used
-     by a driver that has verified the `carrier_name' field.
-
-
-
-The GPIO Abstraction
-====================
-
-Support for GPIO pins in the fmc-bus environment is not very
-straightforward and deserves special discussion.
-
-While the general idea of a carrier-independent driver seems to fly,
-configuration of specific signals within the carrier needs at least
-some knowledge of the carrier itself.  For this reason, the specific
-driver can request to configure carrier-specific GPIO pins, numbered
-from 0 to at most 4095.  Configuration is performed by passing a
-pointer to an array of struct fmc_gpio items, as well as the length of
-the array. This is the data structure:
-
-        struct fmc_gpio {
-                char *carrier_name;
-                int gpio;
-                int _gpio;      /* internal use by the carrier */
-                int mode;       /* GPIOF_DIR_OUT etc, from <linux/gpio.h> */
-                int irqmode;    /* IRQF_TRIGGER_LOW and so on */
-        };
-
-By specifying a carrier_name for each pin, the driver may access
-different pins in different carriers.  The gpio_config method is
-expected to return the number of pins successfully configured, ignoring
-requests for other carriers. However, if no pin is configured (because
-no structure at all refers to the current carrier_name), the operation
-returns an error so the caller will know that it is running under a
-yet-unsupported carrier.
-
-So, for example, a driver that has been developed and tested on both
-the SPEC and the SVEC may request configuration of two different GPIO
-pins, and expect one such configuration to succeed - if none succeeds
-it most likely means that the current carrier is a still-unknown one.
-
-If, however, your GPIO pin has a specific known role, you can pass a
-special number in the gpio field, using one of the following macros:
-
-        #define FMC_GPIO_RAW(x)         (x)             /* 4096 of them */
-        #define FMC_GPIO_IRQ(x)         ((x) + 0x1000)  /*  256 of them */
-        #define FMC_GPIO_LED(x)         ((x) + 0x1100)  /*  256 of them */
-        #define FMC_GPIO_KEY(x)         ((x) + 0x1200)  /*  256 of them */
-        #define FMC_GPIO_TP(x)          ((x) + 0x1300)  /*  256 of them */
-        #define FMC_GPIO_USER(x)        ((x) + 0x1400)  /*  256 of them */
-
-Use of virtual GPIO numbers (anything but FMC_GPIO_RAW) is allowed
-provided the carrier_name field in the data structure is left
-unspecified (NULL). Each carrier is responsible for providing a mapping
-between virtual and physical GPIO numbers. The carrier may then use the
-_gpio field to cache the result of this mapping.
-
-All carriers must map their I/O lines to the sets above starting from
-zero.  The SPEC, for example, maps interrupt pins 0 and 1, and test
-points 0 through 3 (even if the test points on the PCB are called
-5,6,7,8).
-
-If, for example, a driver requires a free LED and a test point (for a
-scope probe to be plugged at some point during development) it may ask
-for FMC_GPIO_LED(0) and FMC_GPIO_TP(0). Each carrier will provide
-suitable GPIO pins.  Clearly, the person running the drivers will know
-the order used by the specific carrier driver in assigning leds and
-testpoints, so to make a carrier-dependent use of the diagnostic tools.
-
-In theory, some form of autodetection should be possible: a driver like
-the wr-nic (which uses IRQ(1) on the SPEC card) should configure
-IRQ(0), make a test with software-generated interrupts and configure
-IRQ(1) if the test fails. This probing step should be used because even
-if the wr-nic gateware is known to use IRQ1 on the SPEC, the driver
-should be carrier-independent and thus use IRQ(0) as a first bet -
-actually, the knowledge that IRQ0 may fail is carrier-dependent
-information, but using it doesn't make the driver unsuitable for other
-carriers.
-
-The return value of gpio_config is defined as follows:
-
-   * If no pin in the array can be used by the carrier, `-ENODEV'.
-
-   * If at least one virtual GPIO number cannot be mapped, `-ENOENT'.
-
-   * On success, 0 or positive. The value returned is the number of
-     high input bits (if no input is configured, the value for success
-     is 0).
-
-While I admit the procedure is not completely straightforward, it
-allows configuration, input and output with a single carrier operation.
-Given the typical use case of FMC devices, GPIO operations are not
-expected to ever by in hot paths, and GPIO access so fare has only been
-used to configure the interrupt pin, mode and polarity. Especially
-reading inputs is not expected to be common. If your device has GPIO
-capabilities in the hot path, you should consider using the kernel's
-GPIO mechanisms.
diff --git a/Documentation/fmc/fmc-chardev.txt b/Documentation/fmc/fmc-chardev.txt
deleted file mode 100644
index d9ccb278e597..000000000000
--- a/Documentation/fmc/fmc-chardev.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-fmc-chardev
-===========
-
-This is a simple generic driver, that allows user access by means of a
-character device (actually, one for each mezzanine it takes hold of).
-
-The char device is created as a misc device. Its name in /dev (as
-created by udev) is the same name as the underlying FMC device. Thus,
-the name can be a silly fmc-0000 look-alike if the device has no
-identifiers nor bus_id, a more specific fmc-0400 if the device has a
-bus-specific address but no associated name, or something like
-fdelay-0400 if the FMC core can rely on both a mezzanine name and a bus
-address.
-
-Currently the driver only supports read and write: you can lseek to the
-desired address and read or write a register.
-
-The driver assumes all registers are 32-bit in size, and only accepts a
-single read or write per system call. However, as a result of Unix read
-and write semantics, users can simply fread or fwrite bigger areas in
-order to dump or store bigger memory areas.
-
-There is currently no support for mmap, user-space interrupt management
-and DMA buffers. They may be added in later versions, if the need
-arises.
-
-The example below shows raw access to a SPEC card programmed with its
-golden FPGA file, that features an SDB structure at offset 256 - i.e.
-64 words.  The mezzanine's EEPROM in this case is not programmed, so the
-default name is fmc-<bus><devfn>, and there are two cards in the system:
-
-  spusa.root# insmod fmc-chardev.ko
-  [ 1073.339332] spec 0000:02:00.0: Driver has no ID: matches all
-  [ 1073.345051] spec 0000:02:00.0: Created misc device "fmc-0200"
-  [ 1073.350821] spec 0000:04:00.0: Driver has no ID: matches all
-  [ 1073.356525] spec 0000:04:00.0: Created misc device "fmc-0400"
-  spusa.root# ls -l /dev/fmc*
-  crw------- 1 root root 10, 58 Nov 20 19:23 /dev/fmc-0200
-  crw------- 1 root root 10, 57 Nov 20 19:23 /dev/fmc-0400
-  spusa.root# dd bs=4 skip=64 count=1 if=/dev/fmc-0200 2> /dev/null | od -t x1z
-  0000000 2d 42 44 53                                      >-BDS<
-  0000004
-
-The simple program tools/fmc-mem in this package can access an FMC char
-device and read or write a word or a whole area.  Actually, the program
-is not specific to FMC at all, it just uses lseek, read and write.
-
-Its first argument is the device name, the second the offset, the third
-(if any) the value to write and the optional last argument that must
-begin with "+" is the number of bytes to read or write.  In case of
-repeated reading data is written to stdout; repeated writes read from
-stdin and the value argument is ignored.
-
-The following examples show reading the SDB magic number and the first
-SDB record from a SPEC device programmed with its golden image:
-
-     spusa.root# ./fmc-mem /dev/fmc-0200 100
-     5344422d
-     spusa.root# ./fmc-mem /dev/fmc-0200 100 +40 | od -Ax -t x1z
-     000000 2d 42 44 53 00 01 02 00 00 00 00 00 00 00 00 00  >-BDS............<
-     000010 00 00 00 00 ff 01 00 00 00 00 00 00 51 06 00 00  >............Q...<
-     000020 c9 42 a5 e6 02 00 00 00 11 05 12 20 2d 34 42 57  >.B......... -4BW<
-     000030 73 6f 72 43 72 61 62 73 49 53 47 2d 00 20 20 20  >sorCrabsISG-.   <
-     000040
diff --git a/Documentation/fmc/fmc-fakedev.txt b/Documentation/fmc/fmc-fakedev.txt
deleted file mode 100644
index e85b74a4ae30..000000000000
--- a/Documentation/fmc/fmc-fakedev.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-fmc-fakedev
-===========
-
-This package includes a software-only device, called fmc-fakedev, which
-is able to register up to 4 mezzanines (by default it registers one).
-Unlike the SPEC driver, which creates an FMC device for each PCI cards
-it manages, this module creates a single instance of its set of
-mezzanines.
-
-It is meant as the simplest possible example of how a driver should be
-written, and it includes a fake EEPROM image (built using the tools
-described in *note FMC Identification::),, which by default is
-replicated for each fake mezzanine.
-
-You can also use this device to verify the match algorithms, by asking
-it to test your own EEPROM image. You can provide the image by means of
-the eeprom= module parameter: the new EEPROM image is loaded, as usual,
-by means of the firmware loader.  This example shows the defaults and a
-custom EEPROM image:
-
-     spusa.root# insmod fmc-fakedev.ko
-     [   99.971247]  fake-fmc-carrier: mezzanine 0
-     [   99.975393]       Manufacturer: fake-vendor
-     [   99.979624]       Product name: fake-design-for-testing
-     spusa.root# rmmod fmc-fakedev
-     spusa.root# insmod fmc-fakedev.ko eeprom=fdelay-eeprom.bin
-     [  121.447464]  fake-fmc-carrier: Mezzanine 0: eeprom "fdelay-eeprom.bin"
-     [  121.462725]  fake-fmc-carrier: mezzanine 0
-     [  121.466858]       Manufacturer: CERN
-     [  121.470477]       Product name: FmcDelay1ns4cha
-     spusa.root# rmmod fmc-fakedev
-
-After loading the device, you can use the write_ee method do modify its
-own internal fake EEPROM: whenever the image is overwritten starting at
-offset 0, the module will unregister and register again the FMC device.
-This is shown in fmc-write-eeprom.txt
diff --git a/Documentation/fmc/fmc-trivial.txt b/Documentation/fmc/fmc-trivial.txt
deleted file mode 100644
index d1910bc67159..000000000000
--- a/Documentation/fmc/fmc-trivial.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-fmc-trivial
-===========
-
-The simple module fmc-trivial is just a simple client that registers an
-interrupt handler. I used it to verify the basic mechanism of the FMC
-bus and how interrupts worked.
-
-The module implements the generic FMC parameters, so it can program a
-different gateware file in each card. The whole list of parameters it
-accepts are:
-
-`busid='
-`gateware='
-     Generic parameters. See mezzanine.txt
-
-
-This driver is worth reading, in my opinion.
diff --git a/Documentation/fmc/fmc-write-eeprom.txt b/Documentation/fmc/fmc-write-eeprom.txt
deleted file mode 100644
index e0a9712156aa..000000000000
--- a/Documentation/fmc/fmc-write-eeprom.txt
+++ /dev/null
@@ -1,98 +0,0 @@
-fmc-write-eeprom
-================
-
-This module is designed to load a binary file from /lib/firmware and to
-write it to the internal EEPROM of the mezzanine card. This driver uses
-the `busid' generic parameter.
-
-Overwriting the EEPROM is not something you should do daily, and it is
-expected to only happen during manufacturing. For this reason, the
-module makes it unlikely for the random user to change a working EEPROM.
-
-However, since the EEPROM may include application-specific information
-other than the identification, later versions of this packages added
-write-support through sysfs. See *note Accessing the EEPROM::.
-
-To avoid damaging the EEPROM content, the module takes the following
-measures:
-
-   * It accepts a `file=' argument (within /lib/firmware) and if no
-     such argument is received, it doesn't write anything to EEPROM
-     (i.e. there is no default file name).
-
-   * If the file name ends with `.bin' it is written verbatim starting
-     at offset 0.
-
-   * If the file name ends with `.tlv' it is interpreted as
-     type-length-value (i.e., it allows writev(2)-like operation).
-
-   * If the file name doesn't match any of the patterns above, it is
-     ignored and no write is performed.
-
-   * Only cards listed with `busid=' are written to. If no busid is
-     specified, no programming is done (and the probe function of the
-     driver will fail).
-
-
-Each TLV tuple is formatted in this way: the header is 5 bytes,
-followed by data. The first byte is `w' for write, the next two bytes
-represent the address, in little-endian byte order, and the next two
-represent the data length, in little-endian order. The length does not
-include the header (it is the actual number of bytes to be written).
-
-This is a real example: that writes 5 bytes at position 0x110:
-
-        spusa.root# od -t x1 -Ax /lib/firmware/try.tlv
-        000000 77 10 01 05 00 30 31 32 33 34
-        00000a
-        spusa.root# insmod /tmp/fmc-write-eeprom.ko busid=0x0200 file=try.tlv
-        [19983.391498] spec 0000:03:00.0: write 5 bytes at 0x0110
-        [19983.414615] spec 0000:03:00.0: write_eeprom: success
-
-Please note that you'll most likely want to use SDBFS to build your
-EEPROM image, at least if your mezzanines are being used in the White
-Rabbit environment. For this reason the TLV format is not expected to
-be used much and is not expected to be developed further.
-
-If you want to try reflashing fake EEPROM devices, you can use the
-fmc-fakedev.ko module (see *note fmc-fakedev::).  Whenever you change
-the image starting at offset 0, it will deregister and register again
-after two seconds.  Please note, however, that if fmc-write-eeprom is
-still loaded, the system will associate it to the new device, which
-will be reprogrammed and thus will be unloaded after two seconds.  The
-following example removes the module after it reflashed fakedev the
-first time.
-
-     spusa.root# insmod fmc-fakedev.ko
-        [   72.984733]  fake-fmc: Manufacturer: fake-vendor
-        [   72.989434]  fake-fmc: Product name: fake-design-for-testing
-        spusa.root# insmod fmc-write-eeprom.ko busid=0 file=fdelay-eeprom.bin; \
-            rmmod fmc-write-eeprom
-        [  130.874098]  fake-fmc: Matching a generic driver (no ID)
-        [  130.887845]  fake-fmc: programming 6155 bytes
-        [  130.894567]  fake-fmc: write_eeprom: success
-        [  132.895794]  fake-fmc: Manufacturer: CERN
-        [  132.899872]  fake-fmc: Product name: FmcDelay1ns4cha
-
-
-Accessing the EEPROM
-=====================
-
-The bus creates a sysfs binary file called eeprom for each mezzanine it
-knows about:
-
-        spusa.root# cd /sys/bus/fmc/devices; ls -l */eeprom
-        -r--r--r-- 1 root root 8192 Feb 21 12:30 FmcAdc100m14b4cha-0800/eeprom
-        -r--r--r-- 1 root root 8192 Feb 21 12:30 FmcDelay1ns4cha-0200/eeprom
-        -r--r--r-- 1 root root 8192 Feb 21 12:30 FmcDio5cha-0400/eeprom
-
-Everybody can read the files and the superuser can also modify it, but
-the operation may on the carrier driver, if the carrier is unable to
-access the I2C bus.  For example, the spec driver can access the bus
-only with its golden gateware: after a mezzanine driver reprogrammed
-the FPGA with a custom circuit, the carrier is unable to access the
-EEPROM and returns ENOTSUPP.
-
-An alternative way to write the EEPROM is the mezzanine driver
-fmc-write-eeprom (See *note fmc-write-eeprom::), but the procedure is
-more complex.
diff --git a/Documentation/fmc/identifiers.txt b/Documentation/fmc/identifiers.txt
deleted file mode 100644
index 3bb577ff0d52..000000000000
--- a/Documentation/fmc/identifiers.txt
+++ /dev/null
@@ -1,168 +0,0 @@
-FMC Identification
-******************
-
-The FMC standard requires every compliant mezzanine to carry
-identification information in an I2C EEPROM.  The information must be
-laid out according to the "IPMI Platform Management FRU Information",
-where IPMI is a lie I'd better not expand, and FRU means "Field
-Replaceable Unit".
-
-The FRU information is an intricate unreadable binary blob that must
-live at offset 0 of the EEPROM, and typically extends for a few hundred
-bytes. The standard allows the application to use all the remaining
-storage area of the EEPROM as it wants.
-
-This chapter explains how to create your own EEPROM image and how to
-write it in your mezzanine, as well as how devices and drivers are
-paired at run time.  EEPROM programming uses tools that are part of this
-package and SDB (part of the fpga-config-space package).
-
-The first sections are only interesting for manufacturers who need to
-write the EEPROM. If you are just a software developer writing an FMC
-device or driver, you may jump straight to *note SDB Support::.
-
-
-Building the FRU Structure
-==========================
-
-If you want to know the internals of the FRU structure and despair, you
-can retrieve the document from
-`http://download.intel.com/design/servers/ipmi/FRU1011.pdf' .  The
-standard is awful and difficult without reason, so we only support the
-minimum mandatory subset - we create a simple structure and parse it
-back at run time, but we are not able to either generate or parse more
-arcane features like non-english languages and 6-bit text.  If you need
-more items of the FRU standard for your boards, please submit patches.
-
-This package includes the Python script that Matthieu Cattin wrote to
-generate the FRU binary blob, based on an helper libipmi by Manohar
-Vanga and Matthieu himself.  I changed the test script to receive
-parameters from the command line or from the environment (the command
-line takes precedence)
-
-To make a long story short, in order to build a standard-compliant
-binary file to be burned in your EEPROM, you need the following items:
-
-        Environment    Opt     Official Name          Default
----------------------------------------------------------------------
-        FRU_VENDOR     -v      "Board Manufacturer"   fmc-example
-        FRU_NAME       -n      "Board Product Name"   mezzanine
-        FRU_SERIAL     -s      `Board Serial Number"  0001
-        FRU_PART       -p      "Board Part Number"    sample-part
-        FRU_OUTPUT     -o      not applicable         /dev/stdout
-
-The "Official Name" above is what you find in the FRU official
-documentation, chapter 11, page 7 ("Board Info Area Format").  The
-output option is used to save the generated binary to a specific file
-name instead of stdout.
-
-You can pass the items to the FRU generator either in the environment
-or on the command line.  This package has currently no support for
-specifying power consumption or such stuff, but I plan to add it as
-soon as I find some time for that.
-
-FIXME: consumption etc for FRU are here or in PTS?
-
-The following example creates a binary image for a specific board:
-
-        ./tools/fru-generator -v CERN -n FmcAdc100m14b4cha \
-               -s HCCFFIA___-CR000003 -p EDA-02063-V5-0 > eeprom.bin
-
-The following example shows a script that builds several binary EEPROM
-images for a series of boards, changing the serial number for each of
-them. The script uses a mix of environment variables and command line
-options, and uses the same string patterns shown above.
-
-        #!/bin/sh
-
-        export FRU_VENDOR="CERN"
-        export FRU_NAME="FmcAdc100m14b4cha"
-        export FRU_PART="EDA-02063-V5-0"
-
-        serial="HCCFFIA___-CR"
-
-        for number in $(seq 1 50); do
-           # build number-string "ns"
-           ns="$(printf %06d $number)"
-           ./fru-generator -s "${serial}${ns}" > eeprom-${ns}.bin
-        done
-
-
-Using SDB-FS in the EEPROM
-==========================
-
-If you want to use SDB as a filesystem in the EEPROM device within the
-mezzanine, you should create one such filesystem using gensdbfs, from
-the fpga-config-space package on OHWR.
-
-By using an SBD filesystem you can cluster several files in a single
-EEPROM, so both the host system and a soft-core running in the FPGA (if
-any) can access extra production-time information.
-
-We chose to use SDB as a storage filesystem because the format is very
-simple, and both the host system and the soft-core will likely already
-include support code for such format. The SDB library offered by the
-fpga-config-space is less than 1kB under LM32, so it proves quite up to
-the task.
-
-The SDB entry point (which acts as a directory listing) cannot live at
-offset zero in the flash device, because the FRU information must live
-there.  To avoid wasting precious storage space while still allowing
-for more-than-minimal FRU structures, the fmc.ko will look for the SDB
-record at address 256, 512 and 1024.
-
-In order to generate the complete EEPROM image you'll need a
-configuration file for gensdbfs: you tell the program where to place
-the sdb entry point, and you must force the FRU data file to be placed
-at the beginning of the storage device. If needed, you can also place
-other files at a special offset (we sometimes do it for backward
-compatibility with drivers we wrote before implementing SDB for flash
-memory).
-
-The directory tools/sdbfs of this package includes a well-commented
-example that you may want to use as a starting point (the comments are
-in the file called -SDB-CONFIG-).  Reading documentation for gensdbfs
-is a suggested first step anyways.
-
-This package (generic FMC bus support) only accesses two files in the
-EEPROM: the FRU information, at offset zero, with a suggested filename
-of IPMI-FRU and the short name for the mezzanine, in a file called
-name. The IPMI-FRU name is not mandatory, but a strongly suggested
-choice; the name filename is mandatory, because this is the preferred
-short name used by the FMC core.  For example, a name of "fdelay" may
-supplement a Product Name like "FmcDelay1ns4cha" - exactly as
-demonstrated in `tools/sdbfs'.
-
-Note: SDB access to flash memory is not yet supported, so the short
-name currently in use is just the "Product Name" FRU string.
-
-The example in tools/sdbfs includes an extra file, that is needed by
-the fine-delay driver, and must live at a known address of 0x1800.  By
-running gensdbfs on that directory you can output your binary EEPROM
-image (here below spusa$ is the shell prompt):
-
-        spusa$ ../fru-generator -v CERN -n FmcDelay1ns4cha -s proto-0 \
-                      -p EDA-02267-V3 > IPMI-FRU
-        spusa$ ls -l
-        total 16
-        -rw-rw-r-- 1 rubini staff 975 Nov 19 18:08 --SDB-CONFIG--
-        -rw-rw-r-- 1 rubini staff 216 Nov 19 18:13 IPMI-FRU
-        -rw-rw-r-- 1 rubini staff  11 Nov 19 18:04 fd-calib
-        -rw-rw-r-- 1 rubini staff   7 Nov 19 18:04 name
-        spusa$ sudo gensdbfs . /lib/firmware/fdelay-eeprom.bin
-        spusa$ sdb-read -l -e 0x100 /lib/firmware/fdelay-eeprom.bin
-        /home/rubini/wip/sdbfs/userspace/sdb-read: listing format is to be defined
-        46696c6544617461:2e202020  00000100-000018ff .
-        46696c6544617461:6e616d65  00000200-00000206 name
-        46696c6544617461:66642d63  00001800-000018ff fd-calib
-        46696c6544617461:49504d49  00000000-000000d7 IPMI-FRU
-        spusa$ ../fru-dump /lib/firmware/fdelay-eeprom.bin
-        /lib/firmware/fdelay-eeprom.bin: manufacturer: CERN
-        /lib/firmware/fdelay-eeprom.bin: product-name: FmcDelay1ns4cha
-        /lib/firmware/fdelay-eeprom.bin: serial-number: proto-0
-        /lib/firmware/fdelay-eeprom.bin: part-number: EDA-02267-V3
-
-As expected, the output file is both a proper sdbfs object and an IPMI
-FRU information blob. The fd-calib file lives at offset 0x1800 and is
-over-allocated to 256 bytes, according to the configuration file for
-gensdbfs.
diff --git a/Documentation/fmc/mezzanine.txt b/Documentation/fmc/mezzanine.txt
deleted file mode 100644
index 87910dbfc91e..000000000000
--- a/Documentation/fmc/mezzanine.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-FMC Driver
-**********
-
-An FMC driver is concerned with the specific mezzanine and associated
-gateware. As such, it is expected to be independent of the carrier
-being used: it will perform I/O accesses only by means of
-carrier-provided functions.
-
-The matching between device and driver is based on the content of the
-EEPROM (as mandated by the FMC standard) or by the actual cores
-configured in the FPGA; the latter technique is used when the FPGA is
-already programmed when the device is registered to the bus core.
-
-In some special cases it is possible for a driver to directly access
-FPGA registers, by means of the `fpga_base' field of the device
-structure. This may be needed for high-bandwidth peripherals like fast
-ADC cards. If the device module registered a remote device (for example
-by means of Etherbone), the `fpga_base' pointer will be NULL.
-Therefore, drivers must be ready to deal with NULL base pointers, and
-fail gracefully.  Most driver, however, are not expected to access the
-pointer directly but run fmc_readl and fmc_writel instead, which will
-work in any case.
-
-In even more special cases, the driver may access carrier-specific
-functionality: the `carrier_name' string allows the driver to check
-which is the current carrier and make use of the `carrier_data'
-pointer.  We chose to use carrier names rather than numeric identifiers
-for greater flexibility, but also to avoid a central registry within
-the `fmc.h' file - we hope other users will exploit our framework with
-their own carriers.  An example use of carrier names is in GPIO setup
-(see *note The GPIO Abstraction::), although the name match is not
-expected to be performed by the driver.  If you depend on specific
-carriers, please check the carrier name and fail gracefully if your
-driver finds it is running in a yet-unknown-to-it environment.
-
-
-ID Table
-========
-
-Like most other Linux drivers, and FMC driver must list all the devices
-which it is able to drive.  This is usually done by means of a device
-table, but in FMC we can match hardware based either on the contents of
-their EEPROM or on the actual FPGA cores that can be enumerated.
-Therefore, we have two tables of identifiers.
-
-Matching of FRU information depends on two names, the manufacturer (or
-vendor) and the device (see *note FMC Identification::); for
-flexibility during production (i.e. before writing to the EEPROM) the
-bus supports a catch-all driver that specifies NULL strings. For this
-reason, the table is specified as pointer-and-length, not a a
-null-terminated array - the entry with NULL names can be a valid entry.
-
-Matching on FPGA cores depends on two numeric fields: the 64-bit vendor
-number and the 32-bit device number. Support for matching based on
-class is not yet implemented.  Each device is expected to be uniquely
-identified by an array of cores (it matches if all of the cores are
-instantiated), and for consistency the list is passed as
-pointer-and-length.  Several similar devices can be driven by the same
-driver, and thus the driver specifies and array of such arrays.
-
-The complete set of involved data structures is thus the following:
-
-        struct fmc_fru_id { char *manufacturer; char *product_name; };
-        struct fmc_sdb_one_id { uint64_t vendor; uint32_t device; };
-        struct fmc_sdb_id { struct fmc_sdb_one_id *cores; int cores_nr; };
-
-        struct fmc_device_id {
-                struct fmc_fru_id *fru_id; int fru_id_nr;
-                struct fmc_sdb_id *sdb_id; int sdb_id_nr;
-        };
-
-A better reference, with full explanation, is the <linux/fmc.h> header.
-
-
-Module Parameters
-=================
-
-Most of the FMC drivers need the same set of kernel parameters. This
-package includes support to implement common parameters by means of
-fields in the `fmc_driver' structure and simple macro definitions.
-
-The parameters are carrier-specific, in that they rely on the busid
-concept, that varies among carriers. For the SPEC, the identifier is a
-PCI bus and devfn number, 16 bits wide in total; drivers for other
-carriers will most likely offer something similar but not identical,
-and some code duplication is unavoidable.
-
-This is the list of parameters that are common to several modules to
-see how they are actually used, please look at spec-trivial.c.
-
-`busid='
-     This is an array of integers, listing carrier-specific
-     identification numbers. For PIC, for example, `0x0400' represents
-     bus 4, slot 0.  If any such ID is specified, the driver will only
-     accept to drive cards that appear in the list (even if the FMC ID
-     matches). This is accomplished by the validate carrier method.
-
-`gateware='
-     The argument is an array of strings. If no busid= is specified,
-     the first string of gateware= is used for all cards; otherwise the
-     identifiers and gateware names are paired one by one, in the order
-     specified.
-
-`show_sdb='
-     For modules supporting it, this parameter asks to show the SDB
-     internal structure by means of kernel messages. It is disabled by
-     default because those lines tend to hide more important messages,
-     if you look at the system console while loading the drivers.
-     Note: the parameter is being obsoleted, because fmc.ko itself now
-     supports dump_sdb= that applies to every client driver.
-
-
-For example, if you are using the trivial driver to load two different
-gateware files to two different cards, you can use the following
-parameters to load different binaries to the cards, after looking up
-the PCI identifiers. This has been tested with a SPEC carrier.
-
-        insmod fmc-trivial.ko \
-                              busid=0x0200,0x0400 \
-                              gateware=fmc/fine-delay.bin,fmc/simple-dio.bin
-
-Please note that not all sub-modules support all of those parameters.
-You can use modinfo to check what is supported by each module.
diff --git a/Documentation/fmc/parameters.txt b/Documentation/fmc/parameters.txt
deleted file mode 100644
index 59edf088e3a4..000000000000
--- a/Documentation/fmc/parameters.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-Module Parameters in fmc.ko
-***************************
-
-The core driver receives two module parameters, meant to help debugging
-client modules. Both parameters can be modified by writing to
-/sys/module/fmc/parameters/, because they are used when client drivers
-are devices are registered, not when fmc.ko is loaded.
-
-`dump_eeprom='
-     If not zero, the parameter asks the bus controller to dump the
-     EEPROM of any device that is registered, using printk.
-
-`dump_sdb='
-     If not zero, the parameter prints the SDB tree of every FPGA it is
-     loaded by fmc_reprogram(). If greater than one, it asks to dump
-     the binary content of SDB records.  This currently only dumps the
-     top-level SDB array, though.
-
-
-EEPROM dumping avoids repeating lines, since most of the contents is
-usually empty and all bits are one or zero. This is an example of the
-output:
-
-        [ 6625.850480] spec 0000:02:00.0: FPGA programming successful
-        [ 6626.139949] spec 0000:02:00.0: Manufacturer: CERN
-        [ 6626.144666] spec 0000:02:00.0: Product name: FmcDelay1ns4cha
-        [ 6626.150370] FMC: mezzanine 0: 0000:02:00.0 on SPEC
-        [ 6626.155179] FMC: dumping eeprom 0x2000 (8192) bytes
-        [ 6626.160087] 0000: 01 00 00 01  00 0b 00 f3  01 0a 00 a5  85 87 c4 43
-        [ 6626.167069] 0010: 45 52 4e cf  46 6d 63 44  65 6c 61 79  31 6e 73 34
-        [ 6626.174019] 0020: 63 68 61 c7  70 72 6f 74  6f 2d 30 cc  45 44 41 2d
-        [ 6626.180975] 0030: 30 32 32 36  37 2d 56 33  da 32 30 31  32 2d 31 31
-        [...]
-        [ 6626.371366] 0200: 66 64 65 6c  61 79 0a 00  00 00 00 00  00 00 00 00
-        [ 6626.378359] 0210: 00 00 00 00  00 00 00 00  00 00 00 00  00 00 00 00
-        [ 6626.385361] [...]
-        [ 6626.387308] 1800: 70 6c 61 63  65 68 6f 6c  64 65 72 ff  ff ff ff ff
-        [ 6626.394259] 1810: ff ff ff ff  ff ff ff ff  ff ff ff ff  ff ff ff ff
-        [ 6626.401250] [...]
-
-The dump of SDB looks like the following; the example shows the simple
-golden gateware for the SPEC card, removing the leading timestamps to
-fit the page:
-
-        spec 0000:02:00.0: SDB: 00000651:e6a542c9 WB4-Crossbar-GSI
-        spec 0000:02:00.0: SDB: 0000ce42:ff07fc47 WR-Periph-Syscon (00000000-000000ff)
-        FMC: mezzanine 0: 0000:02:00.0 on SPEC
-        FMC: poor dump of sdb first level:
-        0000: 53 44 42 2d  00 02 01 00  00 00 00 00  00 00 00 00
-        0010: 00 00 00 00  00 00 01 ff  00 00 00 00  00 00 06 51
-        0020: e6 a5 42 c9  00 00 00 02  20 12 05 11  57 42 34 2d
-        0030: 43 72 6f 73  73 62 61 72  2d 47 53 49  20 20 20 00
-        0040: 00 00 01 01  00 00 00 07  00 00 00 00  00 00 00 00
-        0050: 00 00 00 00  00 00 00 ff  00 00 00 00  00 00 ce 42
-        0060: ff 07 fc 47  00 00 00 01  20 12 03 05  57 52 2d 50
-        0070: 65 72 69 70  68 2d 53 79  73 63 6f 6e  20 20 20 01
-- 
cgit 


From b7e47f48f1197c24f5347327afc2a4f7f6da9ca5 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 11 Jun 2019 15:29:40 +0300
Subject: bindings: sound: davinci-mcasp: Add support for optional
 auxclk-fs-ratio

When McASP is bus master it's reference clock (AUXCLK) might not be a
static clock, but running at a specific FS ratio.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt b/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt
index a58f79f5345c..c483dcec01f8 100644
--- a/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt
+++ b/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt
@@ -44,6 +44,9 @@ Optional properties:
   		 please refer to pinctrl-bindings.txt
 - fck_parent : Should contain a valid clock name which will be used as parent
 	       for the McASP fck
+- auxclk-fs-ratio: When McASP is bus master indicates the ratio between AUCLK
+		   and FS rate if applicable:
+		   AUCLK rate = auxclk-fs-ratio * FS rate
 
 Optional GPIO support:
 If any McASP pin need to be used as GPIO then the McASP node must have:
-- 
cgit 


From 0114214eca21e85d66a5e206f803db42d1d07960 Mon Sep 17 00:00:00 2001
From: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Date: Wed, 12 Jun 2019 14:25:27 +0200
Subject: dt-bindings: net: wiznet: add w5x00 support

Add bindings for Wiznet's w5x00 series of SPI interfaced Ethernet chips.

Based on the bindings for microchip,enc28j60.

Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/wiznet,w5x00.txt       | 50 ++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/wiznet,w5x00.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/wiznet,w5x00.txt b/Documentation/devicetree/bindings/net/wiznet,w5x00.txt
new file mode 100644
index 000000000000..e9665798c4be
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/wiznet,w5x00.txt
@@ -0,0 +1,50 @@
+* Wiznet w5x00
+
+This is a standalone 10/100 MBit Ethernet controller with SPI interface.
+
+For each device connected to a SPI bus, define a child node within
+the SPI master node.
+
+Required properties:
+- compatible: Should be one of the following strings:
+	      "wiznet,w5100"
+	      "wiznet,w5200"
+	      "wiznet,w5500"
+- reg: Specify the SPI chip select the chip is wired to.
+- interrupts: Specify the interrupt index within the interrupt controller (referred
+              to above in interrupt-parent) and interrupt type. w5x00 natively
+              generates falling edge interrupts, however, additional board logic
+              might invert the signal.
+- pinctrl-names: List of assigned state names, see pinctrl binding documentation.
+- pinctrl-0: List of phandles to configure the GPIO pin used as interrupt line,
+             see also generic and your platform specific pinctrl binding
+             documentation.
+
+Optional properties:
+- spi-max-frequency: Maximum frequency of the SPI bus when accessing the w5500.
+  According to the w5500 datasheet, the chip allows a maximum of 80 MHz, however,
+  board designs may need to limit this value.
+- local-mac-address: See ethernet.txt in the same directory.
+
+
+Example (for Raspberry Pi with pin control stuff for GPIO irq):
+
+&spi {
+	ethernet@0: w5500@0 {
+		compatible = "wiznet,w5500";
+		reg = <0>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&eth1_pins>;
+		interrupt-parent = <&gpio>;
+		interrupts = <25 IRQ_TYPE_EDGE_FALLING>;
+		spi-max-frequency = <30000000>;
+	};
+};
+
+&gpio {
+	eth1_pins: eth1_pins {
+		brcm,pins = <25>;
+		brcm,function = <0>; /* in */
+		brcm,pull = <0>; /* none */
+	};
+};
-- 
cgit 


From bb2e05e0c8dcc2c93a97410ce362da998b83155f Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Wed, 12 Jun 2019 21:29:34 +0900
Subject: linux-next: DOC: RDS: Fix a typo in rds.txt

This patch fixes a spelling typo in rds.txt

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/rds.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt
index 0235ae69af2a..f2a0147c933d 100644
--- a/Documentation/networking/rds.txt
+++ b/Documentation/networking/rds.txt
@@ -389,7 +389,7 @@ Multipath RDS (mprds)
   a common (to all paths) part, and a per-path struct rds_conn_path. All
   I/O workqs and reconnect threads are driven from the rds_conn_path.
   Transports such as TCP that are multipath capable may then set up a
-  TPC socket per rds_conn_path, and this is managed by the transport via
+  TCP socket per rds_conn_path, and this is managed by the transport via
   the transport privatee cp_transport_data pointer.
 
   Transports announce themselves as multipath capable by setting the
-- 
cgit 


From 03d66cfa2ad62cd0bb4c9aabc5a28d56af41f0cb Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 2 Jun 2019 22:44:08 -0700
Subject: crypto: doc - improve the skcipher API example code

Rewrite the skcipher API example, changing it to encrypt a buffer with
AES-256-XTS.  This addresses various problems with the previous example:

- It requests a specific driver "cbc-aes-aesni", which is unusual.
  Normally users ask for "cbc(aes)", not a specific driver.

- It encrypts only a single AES block.  For the reader, that doesn't
  clearly distinguish the "skcipher" API from the "cipher" API.

- Showing how to encrypt something with bare CBC is arguably a poor
  choice of example, as it doesn't follow modern crypto trends.  Now,
  usually authenticated encryption is recommended, in which case the
  user would use the AEAD API, not skcipher.  Disk encryption is still a
  legitimate use for skcipher, but for that usually XTS is recommended.

- Many other bugs and poor coding practices, such as not setting
  CRYPTO_TFM_REQ_MAY_SLEEP, unnecessarily allocating a heap buffer for
  the IV, unnecessary NULL checks, using a pointless wrapper struct, and
  forgetting to set an error code in one case.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/crypto/api-samples.rst | 176 +++++++++++++++--------------------
 1 file changed, 77 insertions(+), 99 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/crypto/api-samples.rst b/Documentation/crypto/api-samples.rst
index f14afaaf2f32..e923f17bc2bd 100644
--- a/Documentation/crypto/api-samples.rst
+++ b/Documentation/crypto/api-samples.rst
@@ -4,111 +4,89 @@ Code Examples
 Code Example For Symmetric Key Cipher Operation
 -----------------------------------------------
 
-::
-
-
-    /* tie all data structures together */
-    struct skcipher_def {
-        struct scatterlist sg;
-        struct crypto_skcipher *tfm;
-        struct skcipher_request *req;
-        struct crypto_wait wait;
-    };
-
-    /* Perform cipher operation */
-    static unsigned int test_skcipher_encdec(struct skcipher_def *sk,
-                         int enc)
-    {
-        int rc;
-
-        if (enc)
-            rc = crypto_wait_req(crypto_skcipher_encrypt(sk->req), &sk->wait);
-        else
-            rc = crypto_wait_req(crypto_skcipher_decrypt(sk->req), &sk->wait);
-
-	if (rc)
-		pr_info("skcipher encrypt returned with result %d\n", rc);
+This code encrypts some data with AES-256-XTS.  For sake of example,
+all inputs are random bytes, the encryption is done in-place, and it's
+assumed the code is running in a context where it can sleep.
 
-        return rc;
-    }
+::
 
-    /* Initialize and trigger cipher operation */
     static int test_skcipher(void)
     {
-        struct skcipher_def sk;
-        struct crypto_skcipher *skcipher = NULL;
-        struct skcipher_request *req = NULL;
-        char *scratchpad = NULL;
-        char *ivdata = NULL;
-        unsigned char key[32];
-        int ret = -EFAULT;
-
-        skcipher = crypto_alloc_skcipher("cbc-aes-aesni", 0, 0);
-        if (IS_ERR(skcipher)) {
-            pr_info("could not allocate skcipher handle\n");
-            return PTR_ERR(skcipher);
-        }
-
-        req = skcipher_request_alloc(skcipher, GFP_KERNEL);
-        if (!req) {
-            pr_info("could not allocate skcipher request\n");
-            ret = -ENOMEM;
-            goto out;
-        }
-
-        skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-                          crypto_req_done,
-                          &sk.wait);
-
-        /* AES 256 with random key */
-        get_random_bytes(&key, 32);
-        if (crypto_skcipher_setkey(skcipher, key, 32)) {
-            pr_info("key could not be set\n");
-            ret = -EAGAIN;
-            goto out;
-        }
-
-        /* IV will be random */
-        ivdata = kmalloc(16, GFP_KERNEL);
-        if (!ivdata) {
-            pr_info("could not allocate ivdata\n");
-            goto out;
-        }
-        get_random_bytes(ivdata, 16);
-
-        /* Input data will be random */
-        scratchpad = kmalloc(16, GFP_KERNEL);
-        if (!scratchpad) {
-            pr_info("could not allocate scratchpad\n");
-            goto out;
-        }
-        get_random_bytes(scratchpad, 16);
-
-        sk.tfm = skcipher;
-        sk.req = req;
-
-        /* We encrypt one block */
-        sg_init_one(&sk.sg, scratchpad, 16);
-        skcipher_request_set_crypt(req, &sk.sg, &sk.sg, 16, ivdata);
-        crypto_init_wait(&sk.wait);
-
-        /* encrypt data */
-        ret = test_skcipher_encdec(&sk, 1);
-        if (ret)
-            goto out;
-
-        pr_info("Encryption triggered successfully\n");
-
+            struct crypto_skcipher *tfm = NULL;
+            struct skcipher_request *req = NULL;
+            u8 *data = NULL;
+            const size_t datasize = 512; /* data size in bytes */
+            struct scatterlist sg;
+            DECLARE_CRYPTO_WAIT(wait);
+            u8 iv[16];  /* AES-256-XTS takes a 16-byte IV */
+            u8 key[64]; /* AES-256-XTS takes a 64-byte key */
+            int err;
+
+            /*
+             * Allocate a tfm (a transformation object) and set the key.
+             *
+             * In real-world use, a tfm and key are typically used for many
+             * encryption/decryption operations.  But in this example, we'll just do a
+             * single encryption operation with it (which is not very efficient).
+             */
+
+            tfm = crypto_alloc_skcipher("xts(aes)", 0, 0);
+            if (IS_ERR(tfm)) {
+                    pr_err("Error allocating xts(aes) handle: %ld\n", PTR_ERR(tfm));
+                    return PTR_ERR(tfm);
+            }
+
+            get_random_bytes(key, sizeof(key));
+            err = crypto_skcipher_setkey(tfm, key, sizeof(key));
+            if (err) {
+                    pr_err("Error setting key: %d\n", err);
+                    goto out;
+            }
+
+            /* Allocate a request object */
+            req = skcipher_request_alloc(tfm, GFP_KERNEL);
+            if (!req) {
+                    err = -ENOMEM;
+                    goto out;
+            }
+
+            /* Prepare the input data */
+            data = kmalloc(datasize, GFP_KERNEL);
+            if (!data) {
+                    err = -ENOMEM;
+                    goto out;
+            }
+            get_random_bytes(data, datasize);
+
+            /* Initialize the IV */
+            get_random_bytes(iv, sizeof(iv));
+
+            /*
+             * Encrypt the data in-place.
+             *
+             * For simplicity, in this example we wait for the request to complete
+             * before proceeding, even if the underlying implementation is asynchronous.
+             *
+             * To decrypt instead of encrypt, just change crypto_skcipher_encrypt() to
+             * crypto_skcipher_decrypt().
+             */
+            sg_init_one(&sg, data, datasize);
+            skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+                                               CRYPTO_TFM_REQ_MAY_SLEEP,
+                                          crypto_req_done, &wait);
+            skcipher_request_set_crypt(req, &sg, &sg, datasize, iv);
+            err = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
+            if (err) {
+                    pr_err("Error encrypting data: %d\n", err);
+                    goto out;
+            }
+
+            pr_debug("Encryption was successful\n");
     out:
-        if (skcipher)
-            crypto_free_skcipher(skcipher);
-        if (req)
+            crypto_free_skcipher(tfm);
             skcipher_request_free(req);
-        if (ivdata)
-            kfree(ivdata);
-        if (scratchpad)
-            kfree(scratchpad);
-        return ret;
+            kfree(data);
+            return err;
     }
 
 
-- 
cgit 


From e6aacf9a52e0b013835c7e33538bcdf0703bbe2e Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Date: Thu, 9 May 2019 20:20:18 +0100
Subject: dt-bindings: can: rcar_can: Fix RZ/G2 CAN clocks

According to the latest information, the clock options for CAN on RZ/G2
are the same as the ones available on R-Car Gen3

Fixes: 868b7c0f43e6 ("dt-bindings: can: rcar_can: Add r8a774a1 support")
Signed-off-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Reviewed-by: Chris Paterson <Chris.Paterson2@renesas.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 Documentation/devicetree/bindings/net/can/rcar_can.txt | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt
index 9936b9ee67c3..e0dfc7c35fc5 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_can.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt
@@ -27,13 +27,8 @@ Required properties:
 
 - reg: physical base address and size of the R-Car CAN register map.
 - interrupts: interrupt specifier for the sole interrupt.
-- clocks: phandles and clock specifiers for 2 CAN clock inputs for RZ/G2
-	  devices.
-	  phandles and clock specifiers for 3 CAN clock inputs for every other
-	  SoC.
-- clock-names: 2 clock input name strings for RZ/G2: "clkp1", "can_clk".
-	       3 clock input name strings for every other SoC: "clkp1", "clkp2",
-	       "can_clk".
+- clocks: phandles and clock specifiers for 3 CAN clock inputs.
+- clock-names: 3 clock input name strings: "clkp1", "clkp2", and "can_clk".
 - pinctrl-0: pin control group to be used for this controller.
 - pinctrl-names: must be "default".
 
@@ -49,8 +44,7 @@ using the below properties:
 Optional properties:
 - renesas,can-clock-select: R-Car CAN Clock Source Select. Valid values are:
 			    <0x0> (default) : Peripheral clock (clkp1)
-			    <0x1> : Peripheral clock (clkp2) (not supported by
-				    RZ/G2 devices)
+			    <0x1> : Peripheral clock (clkp2)
 			    <0x3> : External input clock
 
 Example
-- 
cgit 


From d703a52eb1ebeeba42f2385bbe84f1dc86f4353c Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Date: Thu, 9 May 2019 20:20:19 +0100
Subject: dt-bindings: can: rcar_can: Add r8a774c0 support

Document RZ/G2E (r8a774c0) SoC specific bindings.

Signed-off-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 Documentation/devicetree/bindings/net/can/rcar_can.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt
index e0dfc7c35fc5..b463e1268ac4 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_can.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt
@@ -6,6 +6,7 @@ Required properties:
 	      "renesas,can-r8a7744" if CAN controller is a part of R8A7744 SoC.
 	      "renesas,can-r8a7745" if CAN controller is a part of R8A7745 SoC.
 	      "renesas,can-r8a774a1" if CAN controller is a part of R8A774A1 SoC.
+	      "renesas,can-r8a774c0" if CAN controller is a part of R8A774C0 SoC.
 	      "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
 	      "renesas,can-r8a7779" if CAN controller is a part of R8A7779 SoC.
 	      "renesas,can-r8a7790" if CAN controller is a part of R8A7790 SoC.
-- 
cgit 


From 2b37c1c3e7bbfaaa3910ee66f44f5a4138229884 Mon Sep 17 00:00:00 2001
From: Frank Li <frank.li@nxp.com>
Date: Wed, 1 May 2019 18:43:26 +0000
Subject: dt-bindings: perf: imx8-ddr: add imx8qxp ddr performance monitor

Add binding documentation for the imx8qxp DDR performance monitor unit.

Signed-off-by: Frank Li <Frank.Li@nxp.com>
Reviewed-by: Rob Herring <robh@kernel.org>
[will: Removed trailing newline]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 .../devicetree/bindings/perf/fsl-imx-ddr.txt        | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
new file mode 100644
index 000000000000..d77e3f26f9e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
@@ -0,0 +1,21 @@
+* Freescale(NXP) IMX8 DDR performance monitor
+
+Required properties:
+
+- compatible: should be one of:
+	"fsl,imx8-ddr-pmu"
+	"fsl,imx8m-ddr-pmu"
+
+- reg: physical address and size
+
+- interrupts: single interrupt
+	generated by the control block
+
+Example:
+
+	ddr-pmu@5c020000 {
+		compatible = "fsl,imx8-ddr-pmu";
+		reg = <0x5c020000 0x10000>;
+		interrupt-parent = <&gic>;
+		interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
+	};
-- 
cgit 


From d9c53aa440b332059f7f0ce3f7868ff1dc58c62c Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Thu, 23 May 2019 16:31:00 -0600
Subject: NTB: Describe the ntb_msi_test client in the documentation.

Add a blurb in Documentation/ntb.txt to describe the ntb_msi_test tool's
debugfs interface. Similar to the (out of date) ntb_tool description.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 Documentation/ntb.txt | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/ntb.txt b/Documentation/ntb.txt
index 074a423c853c..87d1372da879 100644
--- a/Documentation/ntb.txt
+++ b/Documentation/ntb.txt
@@ -200,6 +200,33 @@ Debugfs Files:
 	This file is used to read and write peer scratchpads.  See
 	*spad* for details.
 
+NTB MSI Test Client (ntb\_msi\_test)
+------------------------------------
+
+The MSI test client serves to test and debug the MSI library which
+allows for passing MSI interrupts across NTB memory windows. The
+test client is interacted with through the debugfs filesystem:
+
+* *debugfs*/ntb\_tool/*hw*/
+	A directory in debugfs will be created for each
+	NTB device probed by the tool.  This directory is shortened to *hw*
+	below.
+* *hw*/port
+	This file describes the local port number
+* *hw*/irq*_occurrences
+	One occurrences file exists for each interrupt and, when read,
+	returns the number of times the interrupt has been triggered.
+* *hw*/peer*/port
+	This file describes the port number for each peer
+* *hw*/peer*/count
+	This file describes the number of interrupts that can be
+	triggered on each peer
+* *hw*/peer*/trigger
+	Writing an interrupt number (any number less than the value
+	specified in count) will trigger the interrupt on the
+	specified peer. That peer's interrupt's occurrence file
+	should be incremented.
+
 NTB Hardware Drivers
 ====================
 
-- 
cgit 


From 2d3c72ed504196edb2f22a08cb03a0f9fb7e564f Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Mon, 10 Jun 2019 16:49:11 -0300
Subject: rdma: Remove nes

This driver was first merged over 10 years ago and has not seen major
activity by the authors in the last 7 years. However, in that time it has
been patched 150 times to adapt it to changing kernel APIs.

Further, the hardware has several issues, like not supporting 64 bit DMA,
that make it rather uninteresting for use with modern systems and RDMA.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 Documentation/ABI/stable/sysfs-class-infiniband | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/stable/sysfs-class-infiniband b/Documentation/ABI/stable/sysfs-class-infiniband
index 17211ceb9bf4..aed21b8916a2 100644
--- a/Documentation/ABI/stable/sysfs-class-infiniband
+++ b/Documentation/ABI/stable/sysfs-class-infiniband
@@ -423,23 +423,6 @@ Description:
 		(e.g. driver restart on the VM which owns the VF).
 
 
-sysfs interface for NetEffect RNIC Low-Level iWARP driver (nes)
----------------------------------------------------------------
-
-What:		/sys/class/infiniband/nesX/hw_rev
-What:		/sys/class/infiniband/nesX/hca_type
-What:		/sys/class/infiniband/nesX/board_id
-Date:		Feb, 2008
-KernelVersion:	v2.6.25
-Contact:	linux-rdma@vger.kernel.org
-Description:
-		hw_rev:		(RO) Hardware revision number
-
-		hca_type:	(RO) Host Channel Adapter type (NEX020)
-
-		board_id:	(RO) Manufacturing board id
-
-
 sysfs interface for Chelsio T4/T5 RDMA driver (cxgb4)
 -----------------------------------------------------
 
-- 
cgit 


From f7a6463e389e9222e5d6fb60211673e06a307438 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 17 May 2019 16:18:13 -0500
Subject: dt-bindings: vendor-prefixes: Also allow node names starting with '_'

Generated nodes for overlays begin with '_'. The binding examples are
built as overlays in order to allow unresolved phandles, so we need to
allow the generated node names.

Reviewed-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index e68c839e9f51..a777168432d0 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -1028,7 +1028,7 @@ patternProperties:
 
   # Normal property name match without a comma
   # These should catch all node/property names without a prefix
-  "^[a-zA-Z0-9#][a-zA-Z0-9+\\-._@]{0,63}$": true
+  "^[a-zA-Z0-9#_][a-zA-Z0-9+\\-._@]{0,63}$": true
   "^[a-zA-Z0-9+\\-._]*@[0-9a-zA-Z,]*$": true
   "^#.*": true
 
-- 
cgit 


From 837158b847a48d886f23937b539965f83917cc1f Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 17 May 2019 16:17:08 -0500
Subject: dt-bindings: Check the examples against the schemas

Currently, the binding examples are just built with dtc. dtc recently
gained the support necessary to output the examples in YAML format
(commit 87963ee20693 ("livetree: add missing type markers in generated
overlay properties"). Now just switch the output format and the examples
will be checked against the schema.

Reviewed-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile
index 8a2774b5834b..6b0dfd5c17ba 100644
--- a/Documentation/devicetree/bindings/Makefile
+++ b/Documentation/devicetree/bindings/Makefile
@@ -25,7 +25,7 @@ DT_DOCS = $(shell \
 DT_SCHEMA_FILES ?= $(addprefix $(src)/,$(DT_DOCS))
 
 extra-y += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES))
-extra-y += $(patsubst $(src)/%.yaml,%.example.dtb, $(DT_SCHEMA_FILES))
+extra-y += $(patsubst $(src)/%.yaml,%.example.dt.yaml, $(DT_SCHEMA_FILES))
 
 $(obj)/$(DT_TMP_SCHEMA): $(DT_SCHEMA_FILES) FORCE
 	$(call if_changed,mk_schema)
-- 
cgit 


From 630eccfd0a0355dc18b7e4456d3479901fb108e9 Mon Sep 17 00:00:00 2001
From: Hannes Schmelzer <hannes.schmelzer@br-automation.com>
Date: Fri, 3 May 2019 11:23:32 +0200
Subject: Documentation: devicetree: Add vendor prefix for B&R Industrial
 Automation GmbH

Signed-off-by: Hannes Schmelzer <hannes.schmelzer@br-automation.com>
[robh: rework for schema]
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index a777168432d0..e0a3e5ff3825 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -147,6 +147,8 @@ patternProperties:
     description: Broadcom Corporation
   "^buffalo,.*":
     description: Buffalo, Inc.
+  "^bur,.*":
+    description: B&R Industrial Automation GmbH
   "^bticino,.*":
     description: Bticino International
   "^calxeda,.*":
-- 
cgit 


From 00091c0da136f36feaa974f761fd0fc8905a29c9 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Tue, 11 Jun 2019 17:15:13 -0700
Subject: Documentation: net: mlx5: Add mlx5 initial documentation

Add initial documentation for mlx5 driver.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 Documentation/networking/device_drivers/index.rst  |   1 +
 .../networking/device_drivers/mellanox/mlx5.rst    | 101 +++++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 Documentation/networking/device_drivers/mellanox/mlx5.rst

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 75fa537763a4..24598d5f8ffa 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -21,6 +21,7 @@ Contents:
    intel/i40e
    intel/iavf
    intel/ice
+   mellanox/mlx5
 
 .. only::  subproject
 
diff --git a/Documentation/networking/device_drivers/mellanox/mlx5.rst b/Documentation/networking/device_drivers/mellanox/mlx5.rst
new file mode 100644
index 000000000000..0be802186d12
--- /dev/null
+++ b/Documentation/networking/device_drivers/mellanox/mlx5.rst
@@ -0,0 +1,101 @@
+.. SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+
+=================================================
+Mellanox ConnectX(R) mlx5 core VPI Network Driver
+=================================================
+
+Copyright (c) 2019, Mellanox Technologies LTD.
+
+Contents
+========
+
+- `Enabling the driver and kconfig options`_
+
+Enabling the driver and kconfig options
+================================================
+
+| mlx5 core is modular and most of the major mlx5 core driver features can be selected (compiled in/out)
+| at build time via kernel Kconfig flags.
+| Basic features, ethernet net device rx/tx offloads and XDP, are available with the most basic flags
+| CONFIG_MLX5_CORE=y/m and CONFIG_MLX5_CORE_EN=y.
+| For the list of advanced features please see below.
+
+**CONFIG_MLX5_CORE=(y/m/n)** (module mlx5_core.ko)
+
+|    The driver can be enabled by choosing CONFIG_MLX5_CORE=y/m in kernel config.
+|    This will provide mlx5 core driver for mlx5 ulps to interface with (mlx5e, mlx5_ib).
+
+
+**CONFIG_MLX5_CORE_EN=(y/n)**
+
+|    Choosing this option will allow basic ethernet netdevice support with all of the standard rx/tx offloads.
+|    mlx5e is the mlx5 ulp driver which provides netdevice kernel interface, when chosen, mlx5e will be
+|    built-in into mlx5_core.ko.
+
+
+**CONFIG_MLX5_EN_ARFS=(y/n)**
+
+|     Enables Hardware-accelerated receive flow steering (arfs) support, and ntuple filtering.
+|     https://community.mellanox.com/s/article/howto-configure-arfs-on-connectx-4
+
+
+**CONFIG_MLX5_EN_RXNFC=(y/n)**
+
+|    Enables ethtool receive network flow classification, which allows user defined
+|    flow rules to direct traffic into arbitrary rx queue via ethtool set/get_rxnfc API.
+
+
+**CONFIG_MLX5_CORE_EN_DCB=(y/n)**:
+
+|    Enables `Data Center Bridging (DCB) Support <https://community.mellanox.com/s/article/howto-auto-config-pfc-and-ets-on-connectx-4-via-lldp-dcbx>`_.
+
+
+**CONFIG_MLX5_MPFS=(y/n)**
+
+|    Ethernet Multi-Physical Function Switch (MPFS) support in ConnectX NIC.
+|    MPFs is required for when `Multi-Host <http://www.mellanox.com/page/multihost>`_ configuration is enabled to allow passing
+|    user configured unicast MAC addresses to the requesting PF.
+
+
+**CONFIG_MLX5_ESWITCH=(y/n)**
+
+|    Ethernet SRIOV E-Switch support in ConnectX NIC. E-Switch provides internal SRIOV packet steering
+|    and switching for the enabled VFs and PF in two available modes:
+|           1) `Legacy SRIOV mode (L2 mac vlan steering based) <https://community.mellanox.com/s/article/howto-configure-sr-iov-for-connectx-4-connectx-5-with-kvm--ethernet-x>`_.
+|           2) `Switchdev mode (eswitch offloads) <https://www.mellanox.com/related-docs/prod_software/ASAP2_Hardware_Offloading_for_vSwitches_User_Manual_v4.4.pdf>`_.
+
+
+**CONFIG_MLX5_CORE_IPOIB=(y/n)**
+
+|    IPoIB offloads & acceleration support.
+|    Requires CONFIG_MLX5_CORE_EN to provide an accelerated interface for the rdma
+|    IPoIB ulp netdevice.
+
+
+**CONFIG_MLX5_FPGA=(y/n)**
+
+|    Build support for the Innova family of network cards by Mellanox Technologies.
+|    Innova network cards are comprised of a ConnectX chip and an FPGA chip on one board.
+|    If you select this option, the mlx5_core driver will include the Innova FPGA core and allow
+|    building sandbox-specific client drivers.
+
+
+**CONFIG_MLX5_EN_IPSEC=(y/n)**
+
+|    Enables `IPSec XFRM cryptography-offload accelaration <http://www.mellanox.com/related-docs/prod_software/Mellanox_Innova_IPsec_Ethernet_Adapter_Card_User_Manual.pdf>`_.
+
+**CONFIG_MLX5_EN_TLS=(y/n)**
+
+|   TLS cryptography-offload accelaration.
+
+
+**CONFIG_MLX5_INFINIBAND=(y/n/m)** (module mlx5_ib.ko)
+
+|   Provides low-level InfiniBand/RDMA and `RoCE <https://community.mellanox.com/s/article/recommended-network-configuration-examples-for-roce-deployment>`_ support.
+
+
+**External options** ( Choose if the corresponding mlx5 feature is required )
+
+- CONFIG_PTP_1588_CLOCK: When chosen, mlx5 ptp support will be enabled
+- CONFIG_VXLAN: When chosen, mlx5 vxaln support will be enabled.
+- CONFIG_MLXFW: When chosen, mlx5 firmware flashing support will be enabled (via devlink and ethtool).
-- 
cgit 


From 06efeb555524a8c65ef429f2603885c31a5212b1 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Sat, 1 Jun 2019 16:40:16 +0300
Subject: Documentation: net: mlx5: Devlink health documentation

Documentation for devlink health reporters supported by mlx5.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../networking/device_drivers/mellanox/mlx5.rst    | 72 ++++++++++++++++++++++
 1 file changed, 72 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/mellanox/mlx5.rst b/Documentation/networking/device_drivers/mellanox/mlx5.rst
index 0be802186d12..4eeef2df912f 100644
--- a/Documentation/networking/device_drivers/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/mellanox/mlx5.rst
@@ -10,6 +10,7 @@ Contents
 ========
 
 - `Enabling the driver and kconfig options`_
+- `Devlink health reporters`_
 
 Enabling the driver and kconfig options
 ================================================
@@ -99,3 +100,74 @@ Enabling the driver and kconfig options
 - CONFIG_PTP_1588_CLOCK: When chosen, mlx5 ptp support will be enabled
 - CONFIG_VXLAN: When chosen, mlx5 vxaln support will be enabled.
 - CONFIG_MLXFW: When chosen, mlx5 firmware flashing support will be enabled (via devlink and ethtool).
+
+
+Devlink health reporters
+========================
+
+tx reporter
+-----------
+The tx reporter is responsible of two error scenarios:
+
+- TX timeout
+    Report on kernel tx timeout detection.
+    Recover by searching lost interrupts.
+- TX error completion
+    Report on error tx completion.
+    Recover by flushing the TX queue and reset it.
+
+TX reporter also support Diagnose callback, on which it provides
+real time information of its send queues status.
+
+User commands examples:
+
+- Diagnose send queues status::
+
+    $ devlink health diagnose pci/0000:82:00.0 reporter tx
+
+- Show number of tx errors indicated, number of recover flows ended successfully,
+  is autorecover enabled and graceful period from last recover::
+
+    $ devlink health show pci/0000:82:00.0 reporter tx
+
+fw reporter
+-----------
+The fw reporter implements diagnose and dump callbacks.
+It follows symptoms of fw error such as fw syndrome by triggering
+fw core dump and storing it into the dump buffer.
+The fw reporter diagnose command can be triggered any time by the user to check
+current fw status.
+
+User commands examples:
+
+- Check fw heath status::
+
+    $ devlink health diagnose pci/0000:82:00.0 reporter fw
+
+- Read FW core dump if already stored or trigger new one::
+
+    $ devlink health dump show pci/0000:82:00.0 reporter fw
+
+NOTE: This command can run only on the PF which has fw tracer ownership,
+running it on other PF or any VF will return "Operation not permitted".
+
+fw fatal reporter
+-----------------
+The fw fatal reporter implements dump and recover callbacks.
+It follows fatal errors indications by CR-space dump and recover flow.
+The CR-space dump uses vsc interface which is valid even if the FW command
+interface is not functional, which is the case in most FW fatal errors.
+The recover function runs recover flow which reloads the driver and triggers fw
+reset if needed.
+
+User commands examples:
+
+- Run fw recover flow manually::
+
+    $ devlink health recover pci/0000:82:00.0 reporter fw_fatal
+
+- Read FW CR-space dump if already strored or trigger new one::
+
+    $ devlink health dump show pci/0000:82:00.1 reporter fw_fatal
+
+NOTE: This command can run only on PF.
-- 
cgit 


From 573748081a66b2baacce5e0927c700e480a60c7c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 14 May 2019 10:50:24 +0200
Subject: dt-bindings: property-units: Sanitize unit naming

Make the naming of units consistent with common practices:
  - Do not capitalize the first character of units ("Celsius" is
    special, as it is not the unit name, but a reference to its
    proposer),
  - Do not use plural for units,
  - Do not abbreviate "ampere",
  - Concatenate prefixes and units (no spaces or hyphens),
  - Separate units by spaces not hyphens,
  - "milli" applies to "degree", not to "Celsius".

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/property-units.txt         | 34 +++++++++++-----------
 1 file changed, 17 insertions(+), 17 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/property-units.txt b/Documentation/devicetree/bindings/property-units.txt
index bfd33734faca..e9b8360b3288 100644
--- a/Documentation/devicetree/bindings/property-units.txt
+++ b/Documentation/devicetree/bindings/property-units.txt
@@ -12,32 +12,32 @@ unit prefixes.
 Time/Frequency
 ----------------------------------------
 -mhz		: megahertz
--hz		: Hertz (preferred)
--sec		: seconds
--ms		: milliseconds
--us		: microseconds
--ns		: nanoseconds
+-hz		: hertz (preferred)
+-sec		: second
+-ms		: millisecond
+-us		: microsecond
+-ns		: nanosecond
 
 Distance
 ----------------------------------------
--mm		: millimeters
+-mm		: millimeter
 
 Electricity
 ----------------------------------------
--microamp	: micro amps
--microamp-hours : micro amp-hours
--ohms		: Ohms
--micro-ohms	: micro Ohms
--microwatt-hours: micro Watt-hours
--microvolt	: micro volts
--picofarads	: picofarads
--femtofarads	: femtofarads
+-microamp	: microampere
+-microamp-hours : microampere hour
+-ohms		: ohm
+-micro-ohms	: microohm
+-microwatt-hours: microwatt hour
+-microvolt	: microvolt
+-picofarads	: picofarad
+-femtofarads	: femtofarad
 
 Temperature
 ----------------------------------------
--celsius	: Degrees Celsius
--millicelsius	: Degreee milli-Celsius
+-celsius	: degree Celsius
+-millicelsius	: millidegree Celsius
 
 Pressure
 ----------------------------------------
--kpascal	: kiloPascal
+-kpascal	: kilopascal
-- 
cgit 


From 9129b017b54dab09eb69b7269026243156e5188e Mon Sep 17 00:00:00 2001
From: Andrea Parri <andrea.parri@amarulasolutions.com>
Date: Mon, 27 May 2019 10:49:57 +0200
Subject: rcu: Don't return a value from rcu_assign_pointer()

Quoting Paul [1]:

  "Given that a quick (and perhaps error-prone) search of the uses
   of rcu_assign_pointer() in v5.1 didn't find a single use of the
   return value, let's please instead change the documentation and
   implementation to eliminate the return value."

[1] https://lkml.kernel.org/r/20190523135013.GL28207@linux.ibm.com

Signed-off-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Cc: "Paul E. McKenney" <paulmck@linux.ibm.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: rcu@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Sasha Levin <sashal@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 Documentation/RCU/whatisRCU.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 981651a8b65d..7e1a8721637a 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -212,7 +212,7 @@ synchronize_rcu()
 
 rcu_assign_pointer()
 
-	typeof(p) rcu_assign_pointer(p, typeof(p) v);
+	void rcu_assign_pointer(p, typeof(p) v);
 
 	Yes, rcu_assign_pointer() -is- implemented as a macro, though it
 	would be cool to be able to declare a function in this manner.
@@ -220,9 +220,9 @@ rcu_assign_pointer()
 
 	The updater uses this function to assign a new value to an
 	RCU-protected pointer, in order to safely communicate the change
-	in value from the updater to the reader.  This function returns
-	the new value, and also executes any memory-barrier instructions
-	required for a given CPU architecture.
+	in value from the updater to the reader.  This macro does not
+	evaluate to an rvalue, but it does execute any memory-barrier
+	instructions required for a given CPU architecture.
 
 	Perhaps just as important, it serves to document (1) which
 	pointers are protected by RCU and (2) the point at which a
-- 
cgit 


From 4c0a59e1123f90b773d46942a6d505b3cb5bd406 Mon Sep 17 00:00:00 2001
From: Amit Kucheria <amit.kucheria@linaro.org>
Date: Tue, 21 May 2019 15:05:12 +0530
Subject: Documentation: arm: Link idle-states binding to "enable-method"
 property

The "enable-method" property for cpu nodes needs to be "psci" for CPU
idle management to be setup correctly.

Add a note to the binding documentation to this effect to make it
obvious.

Signed-off-by: Amit Kucheria <amit.kucheria@linaro.org>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/idle-states.txt | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/idle-states.txt b/Documentation/devicetree/bindings/arm/idle-states.txt
index 45730ba60af5..3bdbe675b9e6 100644
--- a/Documentation/devicetree/bindings/arm/idle-states.txt
+++ b/Documentation/devicetree/bindings/arm/idle-states.txt
@@ -241,9 +241,13 @@ processor idle states, defined as device tree nodes, are listed.
 			   - "psci"
 			# On ARM 32-bit systems this property is optional
 
-The nodes describing the idle states (state) can only be defined within the
-idle-states node, any other configuration is considered invalid and therefore
-must be ignored.
+This assumes that the "enable-method" property is set to "psci" in the cpu
+node[6] that is responsible for setting up CPU idle management in the OS
+implementation.
+
+The nodes describing the idle states (state) can only be defined
+within the idle-states node, any other configuration is considered invalid
+and therefore must be ignored.
 
 ===========================================
 4 - state node
@@ -697,3 +701,6 @@ cpus {
 
 [5] Devicetree Specification
     https://www.devicetree.org/specifications/
+
+[6] ARM Linux Kernel documentation - Booting AArch64 Linux
+    Documentation/arm64/booting.txt
-- 
cgit 


From 0ed91bded307cc980f9542eff861266f1744c303 Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen@kernel.org>
Date: Tue, 11 Jun 2019 10:34:32 -0500
Subject: dt-bindings: pl330: document the optional resets property

Add the optional resets property the pl330 dma node.

Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/devicetree/bindings/dma/arm-pl330.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/dma/arm-pl330.txt b/Documentation/devicetree/bindings/dma/arm-pl330.txt
index db7e2260f9c5..2c7fd1941abb 100644
--- a/Documentation/devicetree/bindings/dma/arm-pl330.txt
+++ b/Documentation/devicetree/bindings/dma/arm-pl330.txt
@@ -16,6 +16,9 @@ Optional properties:
   - dma-channels: contains the total number of DMA channels supported by the DMAC
   - dma-requests: contains the total number of DMA requests supported by the DMAC
   - arm,pl330-broken-no-flushp: quirk for avoiding to execute DMAFLUSHP
+  - resets: contains an entry for each entry in reset-names.
+	    See ../reset/reset.txt for details.
+  - reset-names: must contain at least "dma", and optional is "dma-ocp".
 
 Example:
 
-- 
cgit 


From 1e87fec9fa52a6f7c223998d6bfbd3464eb37e31 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 16 May 2019 11:44:52 +0200
Subject: mac80211: call rate_control_send_low() internally

There's no rate control algorithm that *doesn't* want to call
it internally, and calling it internally will let us modify
its behaviour in the future.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 Documentation/driver-api/80211/mac80211-advanced.rst | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/80211/mac80211-advanced.rst b/Documentation/driver-api/80211/mac80211-advanced.rst
index 70a89b2163c2..9f1c5bb7ac35 100644
--- a/Documentation/driver-api/80211/mac80211-advanced.rst
+++ b/Documentation/driver-api/80211/mac80211-advanced.rst
@@ -226,9 +226,6 @@ TBD
 .. kernel-doc:: include/net/mac80211.h
    :functions: ieee80211_tx_rate_control
 
-.. kernel-doc:: include/net/mac80211.h
-   :functions: rate_control_send_low
-
 TBD
 
 This part of the book describes mac80211 internals.
-- 
cgit 


From c2aacceedc86af87428d998e23a1aca24fd8aa2e Mon Sep 17 00:00:00 2001
From: Nick Xie <nick@khadas.com>
Date: Mon, 10 Jun 2019 15:57:53 +0800
Subject: arm64: dts: rockchip: Add support for Khadas Edge/Edge-V/Captain
 boards

Add devicetree support for Khadas Edge/Edge-V/Captain boards.
Khadas Edge is an expandable Rockchip RK3399 board with goldfinger.
Khadas Captain is the carrier board for Khadas Edge.
Khadas Edge-V is a Khadas VIM form factor Rockchip RK3399 board.

Signed-off-by: Nick Xie <nick@khadas.com>
[edge-captain and edge-v contain different components that are supposed
 to get added in future patches, so should stay separate while looking
 somewhat similar right now]
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 Documentation/devicetree/bindings/arm/rockchip.yaml | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/rockchip.yaml b/Documentation/devicetree/bindings/arm/rockchip.yaml
index 5c6bbf10abc9..eef822ce2ad4 100644
--- a/Documentation/devicetree/bindings/arm/rockchip.yaml
+++ b/Documentation/devicetree/bindings/arm/rockchip.yaml
@@ -316,6 +316,14 @@ properties:
           - const: haoyu,marsboard-rk3066
           - const: rockchip,rk3066a
 
+      - description: Khadas Edge series boards
+        items:
+          - enum:
+              - khadas,edge
+              - khadas,edge-captain
+              - khadas,edge-v
+          - const: rockchip,rk3399
+
       - description: mqmaker MiQi
         items:
           - const: mqmaker,miqi
-- 
cgit 


From ed66bcd0674a97cbacb01e7d140eea0f882d31a6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Thu, 13 Jun 2019 23:04:07 -0300
Subject: ABI: fix some syntax issues at the ABI database

On those three files, the ABI representation described at
README are violated.

- at sysfs-bus-iio-proximity-as3935:
	a ':' character is missing after "What"

- at sysfs-class-devfreq:
	there's a typo at Description

- at sysfs-class-cxl, it is using the ":" character at a
	file preamble, causing it to be misinterpreted as a
	tag.

- On the other files, instead of "What", they use "Where".

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>  # cxl
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/pstore                   |  2 +-
 .../testing/sysfs-bus-event_source-devices-format  |  2 +-
 .../ABI/testing/sysfs-bus-i2c-devices-hm6352       |  6 +++---
 .../ABI/testing/sysfs-bus-iio-distance-srf08       |  4 ++--
 .../ABI/testing/sysfs-bus-iio-proximity-as3935     |  4 ++--
 .../ABI/testing/sysfs-bus-pci-devices-cciss        | 22 +++++++++++-----------
 .../ABI/testing/sysfs-bus-usb-devices-usbsevseg    | 12 ++++++------
 Documentation/ABI/testing/sysfs-class-cxl          |  6 +++---
 Documentation/ABI/testing/sysfs-class-devfreq      |  2 +-
 Documentation/ABI/testing/sysfs-class-powercap     |  2 +-
 Documentation/ABI/testing/sysfs-kernel-fscaps      |  2 +-
 Documentation/ABI/testing/sysfs-kernel-vmcoreinfo  |  2 +-
 12 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/pstore b/Documentation/ABI/testing/pstore
index 5fca9f5e10a3..8d6e48f4e8ef 100644
--- a/Documentation/ABI/testing/pstore
+++ b/Documentation/ABI/testing/pstore
@@ -1,4 +1,4 @@
-Where:		/sys/fs/pstore/... (or /dev/pstore/...)
+What:		/sys/fs/pstore/... (or /dev/pstore/...)
 Date:		March 2011
 Kernel Version: 2.6.39
 Contact:	tony.luck@intel.com
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-format b/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
index 77f47ff5ee02..b6f8748e0200 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
@@ -1,4 +1,4 @@
-Where:		/sys/bus/event_source/devices/<dev>/format
+What:		/sys/bus/event_source/devices/<dev>/format
 Date:		January 2012
 Kernel Version: 3.3
 Contact:	Jiri Olsa <jolsa@redhat.com>
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352 b/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352
index feb2e4a87075..29bd447e50a0 100644
--- a/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352
@@ -1,18 +1,18 @@
-Where:		/sys/bus/i2c/devices/.../heading0_input
+What:		/sys/bus/i2c/devices/.../heading0_input
 Date:		April 2010
 Kernel Version: 2.6.36?
 Contact:	alan.cox@intel.com
 Description:	Reports the current heading from the compass as a floating
 		point value in degrees.
 
-Where:		/sys/bus/i2c/devices/.../power_state
+What:		/sys/bus/i2c/devices/.../power_state
 Date:		April 2010
 Kernel Version: 2.6.36?
 Contact:	alan.cox@intel.com
 Description:	Sets the power state of the device. 0 sets the device into
 		sleep mode, 1 wakes it up.
 
-Where:		/sys/bus/i2c/devices/.../calibration
+What:		/sys/bus/i2c/devices/.../calibration
 Date:		April 2010
 Kernel Version: 2.6.36?
 Contact:	alan.cox@intel.com
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-distance-srf08 b/Documentation/ABI/testing/sysfs-bus-iio-distance-srf08
index 0a1ca1487fa9..a133fd8d081a 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-distance-srf08
+++ b/Documentation/ABI/testing/sysfs-bus-iio-distance-srf08
@@ -1,4 +1,4 @@
-What		/sys/bus/iio/devices/iio:deviceX/sensor_sensitivity
+What:		/sys/bus/iio/devices/iio:deviceX/sensor_sensitivity
 Date:		January 2017
 KernelVersion:	4.11
 Contact:	linux-iio@vger.kernel.org
@@ -6,7 +6,7 @@ Description:
 		Show or set the gain boost of the amp, from 0-31 range.
 		default 31
 
-What		/sys/bus/iio/devices/iio:deviceX/sensor_max_range
+What:		/sys/bus/iio/devices/iio:deviceX/sensor_max_range
 Date:		January 2017
 KernelVersion:	4.11
 Contact:	linux-iio@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
index 9a17ab5036a4..c59d95346341 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
+++ b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
@@ -1,4 +1,4 @@
-What		/sys/bus/iio/devices/iio:deviceX/in_proximity_input
+What:		/sys/bus/iio/devices/iio:deviceX/in_proximity_input
 Date:		March 2014
 KernelVersion:	3.15
 Contact:	Matt Ranostay <matt.ranostay@konsulko.com>
@@ -6,7 +6,7 @@ Description:
 		Get the current distance in meters of storm (1km steps)
 		1000-40000 = distance in meters
 
-What		/sys/bus/iio/devices/iio:deviceX/sensor_sensitivity
+What:		/sys/bus/iio/devices/iio:deviceX/sensor_sensitivity
 Date:		March 2014
 KernelVersion:	3.15
 Contact:	Matt Ranostay <matt.ranostay@konsulko.com>
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
index 53d99edd1d75..eb449169c30b 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
@@ -1,66 +1,66 @@
-Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/model
+What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/model
 Date:		March 2009
 Kernel Version: 2.6.30
 Contact:	iss_storagedev@hp.com
 Description:	Displays the SCSI INQUIRY page 0 model for logical drive
 		Y of controller X.
 
-Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/rev
+What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/rev
 Date:		March 2009
 Kernel Version: 2.6.30
 Contact:	iss_storagedev@hp.com
 Description:	Displays the SCSI INQUIRY page 0 revision for logical
 		drive Y of controller X.
 
-Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/unique_id
+What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/unique_id
 Date:		March 2009
 Kernel Version: 2.6.30
 Contact:	iss_storagedev@hp.com
 Description:	Displays the SCSI INQUIRY page 83 serial number for logical
 		drive Y of controller X.
 
-Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/vendor
+What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/vendor
 Date:		March 2009
 Kernel Version: 2.6.30
 Contact:	iss_storagedev@hp.com
 Description:	Displays the SCSI INQUIRY page 0 vendor for logical drive
 		Y of controller X.
 
-Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/block:cciss!cXdY
+What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/block:cciss!cXdY
 Date:		March 2009
 Kernel Version: 2.6.30
 Contact:	iss_storagedev@hp.com
 Description:	A symbolic link to /sys/block/cciss!cXdY
 
-Where:		/sys/bus/pci/devices/<dev>/ccissX/rescan
+What:		/sys/bus/pci/devices/<dev>/ccissX/rescan
 Date:		August 2009
 Kernel Version:	2.6.31
 Contact:	iss_storagedev@hp.com
 Description:	Kicks of a rescan of the controller to discover logical
 		drive topology changes.
 
-Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/lunid
+What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/lunid
 Date:		August 2009
 Kernel Version: 2.6.31
 Contact:	iss_storagedev@hp.com
 Description:	Displays the 8-byte LUN ID used to address logical
 		drive Y of controller X.
 
-Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/raid_level
+What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/raid_level
 Date:		August 2009
 Kernel Version: 2.6.31
 Contact:	iss_storagedev@hp.com
 Description:	Displays the RAID level of logical drive Y of
 		controller X.
 
-Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/usage_count
+What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/usage_count
 Date:		August 2009
 Kernel Version: 2.6.31
 Contact:	iss_storagedev@hp.com
 Description:	Displays the usage count (number of opens) of logical drive Y
 		of controller X.
 
-Where:		/sys/bus/pci/devices/<dev>/ccissX/resettable
+What:		/sys/bus/pci/devices/<dev>/ccissX/resettable
 Date:		February 2011
 Kernel Version:	2.6.38
 Contact:	iss_storagedev@hp.com
@@ -71,7 +71,7 @@ Description:	Value of 1 indicates the controller can honor the reset_devices
 		a dump device, as kdump requires resetting the device in order
 		to work reliably.
 
-Where:		/sys/bus/pci/devices/<dev>/ccissX/transport_mode
+What:		/sys/bus/pci/devices/<dev>/ccissX/transport_mode
 Date:		July 2011
 Kernel Version:	3.0
 Contact:	iss_storagedev@hp.com
diff --git a/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg b/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg
index 70d00dfa443d..f6199b314196 100644
--- a/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg
+++ b/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg
@@ -1,12 +1,12 @@
-Where:		/sys/bus/usb/.../powered
+What:		/sys/bus/usb/.../powered
 Date:		August 2008
 Kernel Version:	2.6.26
 Contact:	Harrison Metzger <harrisonmetz@gmail.com>
 Description:	Controls whether the device's display will powered.
 		A value of 0 is off and a non-zero value is on.
 
-Where:		/sys/bus/usb/.../mode_msb
-Where:		/sys/bus/usb/.../mode_lsb
+What:		/sys/bus/usb/.../mode_msb
+What:		/sys/bus/usb/.../mode_lsb
 Date:		August 2008
 Kernel Version:	2.6.26
 Contact:	Harrison Metzger <harrisonmetz@gmail.com>
@@ -16,7 +16,7 @@ Description:	Controls the devices display mode.
 		for an 8 character display the values are
 			MSB 0x08; LSB 0xFF.
 
-Where:		/sys/bus/usb/.../textmode
+What:		/sys/bus/usb/.../textmode
 Date:		August 2008
 Kernel Version:	2.6.26
 Contact:	Harrison Metzger <harrisonmetz@gmail.com>
@@ -25,13 +25,13 @@ Description:	Controls the way the device interprets its text buffer.
 		hex:	each character is between 0-15
 		ascii:	each character is between '0'-'9' and 'A'-'F'.
 
-Where:		/sys/bus/usb/.../text
+What:		/sys/bus/usb/.../text
 Date:		August 2008
 Kernel Version:	2.6.26
 Contact:	Harrison Metzger <harrisonmetz@gmail.com>
 Description:	The text (or data) for the device to display
 
-Where:		/sys/bus/usb/.../decimals
+What:		/sys/bus/usb/.../decimals
 Date:		August 2008
 Kernel Version:	2.6.26
 Contact:	Harrison Metzger <harrisonmetz@gmail.com>
diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl
index bbbabffc682a..7970e3713e70 100644
--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -1,6 +1,6 @@
-Note: Attributes that are shared between devices are stored in the directory
-pointed to by the symlink device/.
-Example: The real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is
+Please note that attributes that are shared between devices are stored in
+the directory pointed to by the symlink device/.
+For example, the real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is
 /sys/class/cxl/afu0.0s/device/irqs_max, i.e. /sys/class/cxl/afu0.0/irqs_max.
 
 
diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
index ee39acacf6f8..01196e19afca 100644
--- a/Documentation/ABI/testing/sysfs-class-devfreq
+++ b/Documentation/ABI/testing/sysfs-class-devfreq
@@ -47,7 +47,7 @@ Description:
 What:		/sys/class/devfreq/.../trans_stat
 Date:		October 2012
 Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
-Descrtiption:
+Description:
 		This ABI shows the statistics of devfreq behavior on a
 		specific device. It shows the time spent in each state and
 		the number of transitions between states.
diff --git a/Documentation/ABI/testing/sysfs-class-powercap b/Documentation/ABI/testing/sysfs-class-powercap
index db3b3ff70d84..f333a0ccc29b 100644
--- a/Documentation/ABI/testing/sysfs-class-powercap
+++ b/Documentation/ABI/testing/sysfs-class-powercap
@@ -147,6 +147,6 @@ What:		/sys/class/powercap/.../<power zone>/enabled
 Date:		September 2013
 KernelVersion:	3.13
 Contact:	linux-pm@vger.kernel.org
-Description
+Description:
 		This allows to enable/disable power capping at power zone level.
 		This applies to current power zone and its children.
diff --git a/Documentation/ABI/testing/sysfs-kernel-fscaps b/Documentation/ABI/testing/sysfs-kernel-fscaps
index 50a3033b5e15..bcff34665192 100644
--- a/Documentation/ABI/testing/sysfs-kernel-fscaps
+++ b/Documentation/ABI/testing/sysfs-kernel-fscaps
@@ -2,7 +2,7 @@ What:		/sys/kernel/fscaps
 Date:		February 2011
 KernelVersion:	2.6.38
 Contact:	Ludwig Nussel <ludwig.nussel@suse.de>
-Description
+Description:
 		Shows whether file system capabilities are honored
 		when executing a binary
 
diff --git a/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo b/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo
index 7bd81168e063..1f1087a5f075 100644
--- a/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo
+++ b/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo
@@ -4,7 +4,7 @@ KernelVersion:	2.6.24
 Contact:	Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
 		Kexec Mailing List <kexec@lists.infradead.org>
 		Vivek Goyal <vgoyal@redhat.com>
-Description
+Description:
 		Shows physical address and size of vmcoreinfo ELF note.
 		First value contains physical address of note in hex and
 		second value contains the size of note in hex. This ELF
-- 
cgit 


From 745b2888a2afdb33055f62ba3b245b79e2462e65 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Thu, 13 Jun 2019 23:04:08 -0300
Subject: ABI: sysfs-driver-hid: the "What" field doesn't parse fine

The What: field on this ABI description use a different
syntax than expected, causing it to not be properly parsed
by script.

Fix it.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-driver-hid | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-driver-hid b/Documentation/ABI/testing/sysfs-driver-hid
index 48942cacb0bf..a59533410871 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid
+++ b/Documentation/ABI/testing/sysfs-driver-hid
@@ -1,6 +1,6 @@
-What:		For USB devices	: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/report_descriptor
-		For BT devices	: /sys/class/bluetooth/hci<addr>/<hid-bus>:<vendor-id>:<product-id>.<num>/report_descriptor
-		Symlink		: /sys/class/hidraw/hidraw<num>/device/report_descriptor
+What:		/sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/report_descriptor
+What:		/sys/class/bluetooth/hci<addr>/<hid-bus>:<vendor-id>:<product-id>.<num>/report_descriptor
+What:		/sys/class/hidraw/hidraw<num>/device/report_descriptor
 Date:		Jan 2011
 KernelVersion:	2.0.39
 Contact:	Alan Ott <alan@signal11.us>
@@ -9,9 +9,9 @@ Description:	When read, this file returns the device's raw binary HID
 		This file cannot be written.
 Users:		HIDAPI library (http://www.signal11.us/oss/hidapi)
 
-What:		For USB devices	: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/country
-		For BT devices	: /sys/class/bluetooth/hci<addr>/<hid-bus>:<vendor-id>:<product-id>.<num>/country
-		Symlink		: /sys/class/hidraw/hidraw<num>/device/country
+What:		/sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/country
+What:		/sys/class/bluetooth/hci<addr>/<hid-bus>:<vendor-id>:<product-id>.<num>/country
+What:		/sys/class/hidraw/hidraw<num>/device/country
 Date:		February 2015
 KernelVersion:	3.19
 Contact:	Olivier Gay <ogay@logitech.com>
-- 
cgit 


From d59f0ec7151e8f70ceaf58e2ddd057e3296cc2d7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Thu, 13 Jun 2019 23:04:09 -0300
Subject: ABI: sysfs-class-uwb_rc: remove a duplicated incomplete entry

There are two entries for /sys/class/uwb_rc/uwbN/<EUI-48>/BPST.
The second one has a missing description.

Get rid of it.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-class-uwb_rc | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-class-uwb_rc b/Documentation/ABI/testing/sysfs-class-uwb_rc
index 85f4875d16ac..a0578751c1e3 100644
--- a/Documentation/ABI/testing/sysfs-class-uwb_rc
+++ b/Documentation/ABI/testing/sysfs-class-uwb_rc
@@ -125,12 +125,6 @@ Description:
                 The EUI-48 of this device in colon separated hex
                 octets.
 
-What:           /sys/class/uwb_rc/uwbN/<EUI-48>/BPST
-Date:           July 2008
-KernelVersion:  2.6.27
-Contact:        linux-usb@vger.kernel.org
-Description:
-
 What:           /sys/class/uwb_rc/uwbN/<EUI-48>/IEs
 Date:           July 2008
 KernelVersion:  2.6.27
-- 
cgit 


From 1107049034ac094dd66998ec91d59d8f8807c88a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Thu, 13 Jun 2019 23:04:10 -0300
Subject: ABI: better identificate tables

When parsing via script, it is important to know if the script
should consider a description as a literal block that should
be displayed as-is, or if the description can be considered
as a normal text.

Change descriptions to ensure that the preceding line of a table
ends with a colon. That makes easy to identify the need of a
literal block.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra       | 2 +-
 Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533 | 6 +++---
 Documentation/ABI/testing/sysfs-class-led-driver-lm3533       | 8 ++++----
 Documentation/ABI/testing/sysfs-class-leds-gt683r             | 4 ++--
 Documentation/ABI/testing/sysfs-driver-hid-roccat-kone        | 2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra
index 16020b31ae64..5d41ebadf15e 100644
--- a/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra
+++ b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra
@@ -5,7 +5,7 @@ Description:	It is possible to switch the cpi setting of the mouse with the
 		press of a button.
 		When read, this file returns the raw number of the actual cpi
 		setting reported by the mouse. This number has to be further
-		processed to receive the real dpi value.
+		processed to receive the real dpi value:
 
 		VALUE DPI
 		1     400
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533 b/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533
index 77cf7ac949af..c0e0a9ae7b3d 100644
--- a/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533
+++ b/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533
@@ -4,7 +4,7 @@ KernelVersion:	3.5
 Contact:	Johan Hovold <jhovold@gmail.com>
 Description:
 		Get the ALS output channel used as input in
-		ALS-current-control mode (0, 1), where
+		ALS-current-control mode (0, 1), where:
 
 		0 - out_current0 (backlight 0)
 		1 - out_current1 (backlight 1)
@@ -28,7 +28,7 @@ Date:		April 2012
 KernelVersion:	3.5
 Contact:	Johan Hovold <jhovold@gmail.com>
 Description:
-		Set the brightness-mapping mode (0, 1), where
+		Set the brightness-mapping mode (0, 1), where:
 
 		0 - exponential mode
 		1 - linear mode
@@ -38,7 +38,7 @@ Date:		April 2012
 KernelVersion:	3.5
 Contact:	Johan Hovold <jhovold@gmail.com>
 Description:
-		Set the PWM-input control mask (5 bits), where
+		Set the PWM-input control mask (5 bits), where:
 
 		bit 5 - PWM-input enabled in Zone 4
 		bit 4 - PWM-input enabled in Zone 3
diff --git a/Documentation/ABI/testing/sysfs-class-led-driver-lm3533 b/Documentation/ABI/testing/sysfs-class-led-driver-lm3533
index 620ebb3b9baa..e4c89b261546 100644
--- a/Documentation/ABI/testing/sysfs-class-led-driver-lm3533
+++ b/Documentation/ABI/testing/sysfs-class-led-driver-lm3533
@@ -4,7 +4,7 @@ KernelVersion:	3.5
 Contact:	Johan Hovold <jhovold@gmail.com>
 Description:
 		Set the ALS output channel to use as input in
-		ALS-current-control mode (1, 2), where
+		ALS-current-control mode (1, 2), where:
 
 		1 - out_current1
 		2 - out_current2
@@ -22,7 +22,7 @@ Date:		April 2012
 KernelVersion:	3.5
 Contact:	Johan Hovold <jhovold@gmail.com>
 Description:
-		Set the pattern generator fall and rise times (0..7), where
+		Set the pattern generator fall and rise times (0..7), where:
 
 		0 - 2048 us
 		1 - 262 ms
@@ -45,7 +45,7 @@ Date:		April 2012
 KernelVersion:	3.5
 Contact:	Johan Hovold <jhovold@gmail.com>
 Description:
-		Set the brightness-mapping mode (0, 1), where
+		Set the brightness-mapping mode (0, 1), where:
 
 		0 - exponential mode
 		1 - linear mode
@@ -55,7 +55,7 @@ Date:		April 2012
 KernelVersion:	3.5
 Contact:	Johan Hovold <jhovold@gmail.com>
 Description:
-		Set the PWM-input control mask (5 bits), where
+		Set the PWM-input control mask (5 bits), where:
 
 		bit 5 - PWM-input enabled in Zone 4
 		bit 4 - PWM-input enabled in Zone 3
diff --git a/Documentation/ABI/testing/sysfs-class-leds-gt683r b/Documentation/ABI/testing/sysfs-class-leds-gt683r
index e4fae6026e79..6adab27f646e 100644
--- a/Documentation/ABI/testing/sysfs-class-leds-gt683r
+++ b/Documentation/ABI/testing/sysfs-class-leds-gt683r
@@ -5,7 +5,7 @@ Contact:	Janne Kanniainen <janne.kanniainen@gmail.com>
 Description:
 		Set the mode of LEDs. You should notice that changing the mode
 		of one LED will update the mode of its two sibling devices as
-		well.
+		well. Possible values are:
 
 		0 - normal
 		1 - audio
@@ -13,4 +13,4 @@ Description:
 
 		Normal: LEDs are fully on when enabled
 		Audio:  LEDs brightness depends on sound level
-		Breathing: LEDs brightness varies at human breathing rate
\ No newline at end of file
+		Breathing: LEDs brightness varies at human breathing rate
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-kone b/Documentation/ABI/testing/sysfs-driver-hid-roccat-kone
index 3ca3971109bf..8f7982c70d72 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid-roccat-kone
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-kone
@@ -5,7 +5,7 @@ Description:	It is possible to switch the dpi setting of the mouse with the
 		press of a button.
 		When read, this file returns the raw number of the actual dpi
 		setting reported by the mouse. This number has to be further
-		processed to receive the real dpi value.
+		processed to receive the real dpi value:
 
 		VALUE DPI
 		1     800
-- 
cgit 


From ce1a5ea18ef9bf4c62c75abe7c540a29264ec988 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 14 Jun 2019 09:02:49 +0200
Subject: Documentation: Remove duplicate x86 index entry

x86 got added twice to the index via the RST conversion and the MDS
documentation changes. Remove one instance.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/index.rst | 1 -
 1 file changed, 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/index.rst b/Documentation/index.rst
index a7566ef62411..781042b4579d 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -112,7 +112,6 @@ implementation.
 .. toctree::
    :maxdepth: 2
 
-   x86/index
    sh/index
    x86/index
 
-- 
cgit 


From 1419f64ef526b5657ccfe604a69533736db44490 Mon Sep 17 00:00:00 2001
From: Erwan Le Ray <erwan.leray@st.com>
Date: Fri, 24 May 2019 17:30:38 +0200
Subject: dt-bindings: stm32: serial: Add optional reset

STM32 serial can be reset via reset controller.
Add an optional reset property to stm32 usart bindings.

Signed-off-by: Erwan Le Ray <erwan.leray@st.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/serial/st,stm32-usart.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/serial/st,stm32-usart.txt b/Documentation/devicetree/bindings/serial/st,stm32-usart.txt
index 9d3efed55deb..a6b19485c9dc 100644
--- a/Documentation/devicetree/bindings/serial/st,stm32-usart.txt
+++ b/Documentation/devicetree/bindings/serial/st,stm32-usart.txt
@@ -13,6 +13,7 @@ Required properties:
 - clocks: The input clock of the USART instance
 
 Optional properties:
+- resets: Must contain the phandle to the reset controller.
 - pinctrl: The reference on the pins configuration
 - st,hw-flow-ctrl: bool flag to enable hardware flow control.
 - rs485-rts-delay, rs485-rx-during-tx, rs485-rts-active-low,
-- 
cgit 


From abdcfc25641c5ba6d63047bec1dc8d3aaa7d4111 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Wed, 15 May 2019 15:18:56 +0200
Subject: ASoC: meson: add tohdmitx DT bindings

Add the bindings and the related documentation for the audio hdmitx
control glue of the Amlogic g12a SoC family

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
Tested-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/sound/amlogic,g12a-tohdmitx.txt       | 55 ++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/sound/amlogic,g12a-tohdmitx.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/amlogic,g12a-tohdmitx.txt b/Documentation/devicetree/bindings/sound/amlogic,g12a-tohdmitx.txt
new file mode 100644
index 000000000000..aa6c35570d31
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/amlogic,g12a-tohdmitx.txt
@@ -0,0 +1,55 @@
+* Amlogic HDMI Tx control glue
+
+Required properties:
+- compatible: "amlogic,g12a-tohdmitx"
+- reg: physical base address of the controller and length of memory
+       mapped region.
+- #sound-dai-cells: should be 1.
+
+Example on the S905X2 SoC:
+
+tohdmitx: audio-controller@744 {
+	compatible = "amlogic,g12a-tohdmitx";
+	reg = <0x0 0x744 0x0 0x4>;
+	#sound-dai-cells = <1>;
+};
+
+Example of an 'amlogic,axg-sound-card':
+
+sound {
+	compatible = "amlogic,axg-sound-card";
+
+[...]
+
+	dai-link-x {
+		sound-dai = <&tdmif_a>;
+		dai-format = "i2s";
+		dai-tdm-slot-tx-mask-0 = <1 1>;
+
+		codec-0 {
+			sound-dai = <&tohdmitx TOHDMITX_I2S_IN_A>;
+		};
+
+		codec-1 {
+			sound-dai = <&external_dac>;
+		};
+	};
+
+	dai-link-y {
+		sound-dai = <&tdmif_c>;
+		dai-format = "i2s";
+		dai-tdm-slot-tx-mask-0 = <1 1>;
+
+		codec {
+			sound-dai = <&tohdmitx TOHDMITX_I2S_IN_C>;
+		};
+	};
+
+	dai-link-z {
+		sound-dai = <&tohdmitx TOHDMITX_I2S_OUT>;
+
+		codec {
+			sound-dai = <&hdmi_tx>;
+		};
+	};
+};
-- 
cgit 


From 305a99eb98af22996e9771078b7a19978732ed41 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:37 -0300
Subject: docs: aoe: convert docs to ReST and rename to *.rst

There are only two files within Documentation/aoe dir that are
documentation. The remaining ones are examples and shell
scripts.

Convert the two AoE files to ReST format, and add the others
as literal, as they're part of the documentation.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/aoe/aoe.rst      | 150 +++++++++++++++++++++++++++++++++++++++++
 Documentation/aoe/aoe.txt      | 143 ---------------------------------------
 Documentation/aoe/examples.rst |  23 +++++++
 Documentation/aoe/index.rst    |  19 ++++++
 Documentation/aoe/todo.rst     |  17 +++++
 Documentation/aoe/todo.txt     |  14 ----
 Documentation/aoe/udev.txt     |   2 +-
 7 files changed, 210 insertions(+), 158 deletions(-)
 create mode 100644 Documentation/aoe/aoe.rst
 delete mode 100644 Documentation/aoe/aoe.txt
 create mode 100644 Documentation/aoe/examples.rst
 create mode 100644 Documentation/aoe/index.rst
 create mode 100644 Documentation/aoe/todo.rst
 delete mode 100644 Documentation/aoe/todo.txt

(limited to 'Documentation')

diff --git a/Documentation/aoe/aoe.rst b/Documentation/aoe/aoe.rst
new file mode 100644
index 000000000000..58747ecec71d
--- /dev/null
+++ b/Documentation/aoe/aoe.rst
@@ -0,0 +1,150 @@
+Introduction
+============
+
+ATA over Ethernet is a network protocol that provides simple access to
+block storage on the LAN.
+
+  http://support.coraid.com/documents/AoEr11.txt
+
+The EtherDrive (R) HOWTO for 2.6 and 3.x kernels is found at ...
+
+  http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO.html
+
+It has many tips and hints!  Please see, especially, recommended
+tunings for virtual memory:
+
+  http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO-5.html#ss5.19
+
+The aoetools are userland programs that are designed to work with this
+driver.  The aoetools are on sourceforge.
+
+  http://aoetools.sourceforge.net/
+
+The scripts in this Documentation/aoe directory are intended to
+document the use of the driver and are not necessary if you install
+the aoetools.
+
+
+Creating Device Nodes
+=====================
+
+  Users of udev should find the block device nodes created
+  automatically, but to create all the necessary device nodes, use the
+  udev configuration rules provided in udev.txt (in this directory).
+
+  There is a udev-install.sh script that shows how to install these
+  rules on your system.
+
+  There is also an autoload script that shows how to edit
+  /etc/modprobe.d/aoe.conf to ensure that the aoe module is loaded when
+  necessary.  Preloading the aoe module is preferable to autoloading,
+  however, because AoE discovery takes a few seconds.  It can be
+  confusing when an AoE device is not present the first time the a
+  command is run but appears a second later.
+
+Using Device Nodes
+==================
+
+  "cat /dev/etherd/err" blocks, waiting for error diagnostic output,
+  like any retransmitted packets.
+
+  "echo eth2 eth4 > /dev/etherd/interfaces" tells the aoe driver to
+  limit ATA over Ethernet traffic to eth2 and eth4.  AoE traffic from
+  untrusted networks should be ignored as a matter of security.  See
+  also the aoe_iflist driver option described below.
+
+  "echo > /dev/etherd/discover" tells the driver to find out what AoE
+  devices are available.
+
+  In the future these character devices may disappear and be replaced
+  by sysfs counterparts.  Using the commands in aoetools insulates
+  users from these implementation details.
+
+  The block devices are named like this::
+
+	e{shelf}.{slot}
+	e{shelf}.{slot}p{part}
+
+  ... so that "e0.2" is the third blade from the left (slot 2) in the
+  first shelf (shelf address zero).  That's the whole disk.  The first
+  partition on that disk would be "e0.2p1".
+
+Using sysfs
+===========
+
+  Each aoe block device in /sys/block has the extra attributes of
+  state, mac, and netif.  The state attribute is "up" when the device
+  is ready for I/O and "down" if detected but unusable.  The
+  "down,closewait" state shows that the device is still open and
+  cannot come up again until it has been closed.
+
+  The mac attribute is the ethernet address of the remote AoE device.
+  The netif attribute is the network interface on the localhost
+  through which we are communicating with the remote AoE device.
+
+  There is a script in this directory that formats this information in
+  a convenient way.  Users with aoetools should use the aoe-stat
+  command::
+
+    root@makki root# sh Documentation/aoe/status.sh
+       e10.0            eth3              up
+       e10.1            eth3              up
+       e10.2            eth3              up
+       e10.3            eth3              up
+       e10.4            eth3              up
+       e10.5            eth3              up
+       e10.6            eth3              up
+       e10.7            eth3              up
+       e10.8            eth3              up
+       e10.9            eth3              up
+        e4.0            eth1              up
+        e4.1            eth1              up
+        e4.2            eth1              up
+        e4.3            eth1              up
+        e4.4            eth1              up
+        e4.5            eth1              up
+        e4.6            eth1              up
+        e4.7            eth1              up
+        e4.8            eth1              up
+        e4.9            eth1              up
+
+  Use /sys/module/aoe/parameters/aoe_iflist (or better, the driver
+  option discussed below) instead of /dev/etherd/interfaces to limit
+  AoE traffic to the network interfaces in the given
+  whitespace-separated list.  Unlike the old character device, the
+  sysfs entry can be read from as well as written to.
+
+  It's helpful to trigger discovery after setting the list of allowed
+  interfaces.  The aoetools package provides an aoe-discover script
+  for this purpose.  You can also directly use the
+  /dev/etherd/discover special file described above.
+
+Driver Options
+==============
+
+  There is a boot option for the built-in aoe driver and a
+  corresponding module parameter, aoe_iflist.  Without this option,
+  all network interfaces may be used for ATA over Ethernet.  Here is a
+  usage example for the module parameter::
+
+    modprobe aoe_iflist="eth1 eth3"
+
+  The aoe_deadsecs module parameter determines the maximum number of
+  seconds that the driver will wait for an AoE device to provide a
+  response to an AoE command.  After aoe_deadsecs seconds have
+  elapsed, the AoE device will be marked as "down".  A value of zero
+  is supported for testing purposes and makes the aoe driver keep
+  trying AoE commands forever.
+
+  The aoe_maxout module parameter has a default of 128.  This is the
+  maximum number of unresponded packets that will be sent to an AoE
+  target at one time.
+
+  The aoe_dyndevs module parameter defaults to 1, meaning that the
+  driver will assign a block device minor number to a discovered AoE
+  target based on the order of its discovery.  With dynamic minor
+  device numbers in use, a greater range of AoE shelf and slot
+  addresses can be supported.  Users with udev will never have to
+  think about minor numbers.  Using aoe_dyndevs=0 allows device nodes
+  to be pre-created using a static minor-number scheme with the
+  aoe-mkshelf script in the aoetools.
diff --git a/Documentation/aoe/aoe.txt b/Documentation/aoe/aoe.txt
deleted file mode 100644
index c71487d399d1..000000000000
--- a/Documentation/aoe/aoe.txt
+++ /dev/null
@@ -1,143 +0,0 @@
-ATA over Ethernet is a network protocol that provides simple access to
-block storage on the LAN.
-
-  http://support.coraid.com/documents/AoEr11.txt
-
-The EtherDrive (R) HOWTO for 2.6 and 3.x kernels is found at ...
-
-  http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO.html
-
-It has many tips and hints!  Please see, especially, recommended
-tunings for virtual memory:
-
-  http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO-5.html#ss5.19
-
-The aoetools are userland programs that are designed to work with this
-driver.  The aoetools are on sourceforge.
-
-  http://aoetools.sourceforge.net/
-
-The scripts in this Documentation/aoe directory are intended to
-document the use of the driver and are not necessary if you install
-the aoetools.
-
-
-CREATING DEVICE NODES
-
-  Users of udev should find the block device nodes created
-  automatically, but to create all the necessary device nodes, use the
-  udev configuration rules provided in udev.txt (in this directory).
-
-  There is a udev-install.sh script that shows how to install these
-  rules on your system.
-
-  There is also an autoload script that shows how to edit
-  /etc/modprobe.d/aoe.conf to ensure that the aoe module is loaded when
-  necessary.  Preloading the aoe module is preferable to autoloading,
-  however, because AoE discovery takes a few seconds.  It can be
-  confusing when an AoE device is not present the first time the a
-  command is run but appears a second later.
-
-USING DEVICE NODES
-
-  "cat /dev/etherd/err" blocks, waiting for error diagnostic output,
-  like any retransmitted packets.
-
-  "echo eth2 eth4 > /dev/etherd/interfaces" tells the aoe driver to
-  limit ATA over Ethernet traffic to eth2 and eth4.  AoE traffic from
-  untrusted networks should be ignored as a matter of security.  See
-  also the aoe_iflist driver option described below.
-
-  "echo > /dev/etherd/discover" tells the driver to find out what AoE
-  devices are available.
-
-  In the future these character devices may disappear and be replaced
-  by sysfs counterparts.  Using the commands in aoetools insulates
-  users from these implementation details.
-
-  The block devices are named like this:
-
-	e{shelf}.{slot}
-	e{shelf}.{slot}p{part}
-
-  ... so that "e0.2" is the third blade from the left (slot 2) in the
-  first shelf (shelf address zero).  That's the whole disk.  The first
-  partition on that disk would be "e0.2p1".
-
-USING SYSFS
-
-  Each aoe block device in /sys/block has the extra attributes of
-  state, mac, and netif.  The state attribute is "up" when the device
-  is ready for I/O and "down" if detected but unusable.  The
-  "down,closewait" state shows that the device is still open and
-  cannot come up again until it has been closed.
-
-  The mac attribute is the ethernet address of the remote AoE device.
-  The netif attribute is the network interface on the localhost
-  through which we are communicating with the remote AoE device.
-
-  There is a script in this directory that formats this information in
-  a convenient way.  Users with aoetools should use the aoe-stat
-  command.
-
-  root@makki root# sh Documentation/aoe/status.sh 
-     e10.0            eth3              up
-     e10.1            eth3              up
-     e10.2            eth3              up
-     e10.3            eth3              up
-     e10.4            eth3              up
-     e10.5            eth3              up
-     e10.6            eth3              up
-     e10.7            eth3              up
-     e10.8            eth3              up
-     e10.9            eth3              up
-      e4.0            eth1              up
-      e4.1            eth1              up
-      e4.2            eth1              up
-      e4.3            eth1              up
-      e4.4            eth1              up
-      e4.5            eth1              up
-      e4.6            eth1              up
-      e4.7            eth1              up
-      e4.8            eth1              up
-      e4.9            eth1              up
-
-  Use /sys/module/aoe/parameters/aoe_iflist (or better, the driver
-  option discussed below) instead of /dev/etherd/interfaces to limit
-  AoE traffic to the network interfaces in the given
-  whitespace-separated list.  Unlike the old character device, the
-  sysfs entry can be read from as well as written to.
-
-  It's helpful to trigger discovery after setting the list of allowed
-  interfaces.  The aoetools package provides an aoe-discover script
-  for this purpose.  You can also directly use the
-  /dev/etherd/discover special file described above.
-
-DRIVER OPTIONS
-
-  There is a boot option for the built-in aoe driver and a
-  corresponding module parameter, aoe_iflist.  Without this option,
-  all network interfaces may be used for ATA over Ethernet.  Here is a
-  usage example for the module parameter.
-
-    modprobe aoe_iflist="eth1 eth3"
-
-  The aoe_deadsecs module parameter determines the maximum number of
-  seconds that the driver will wait for an AoE device to provide a
-  response to an AoE command.  After aoe_deadsecs seconds have
-  elapsed, the AoE device will be marked as "down".  A value of zero
-  is supported for testing purposes and makes the aoe driver keep
-  trying AoE commands forever.
-
-  The aoe_maxout module parameter has a default of 128.  This is the
-  maximum number of unresponded packets that will be sent to an AoE
-  target at one time.
-
-  The aoe_dyndevs module parameter defaults to 1, meaning that the
-  driver will assign a block device minor number to a discovered AoE
-  target based on the order of its discovery.  With dynamic minor
-  device numbers in use, a greater range of AoE shelf and slot
-  addresses can be supported.  Users with udev will never have to
-  think about minor numbers.  Using aoe_dyndevs=0 allows device nodes
-  to be pre-created using a static minor-number scheme with the
-  aoe-mkshelf script in the aoetools.
diff --git a/Documentation/aoe/examples.rst b/Documentation/aoe/examples.rst
new file mode 100644
index 000000000000..91f3198e52c1
--- /dev/null
+++ b/Documentation/aoe/examples.rst
@@ -0,0 +1,23 @@
+Example of udev rules
+---------------------
+
+ .. include:: udev.txt
+    :literal:
+
+Example of udev install rules script
+------------------------------------
+
+ .. literalinclude:: udev-install.sh
+    :language: shell
+
+Example script to get status
+----------------------------
+
+ .. literalinclude:: status.sh
+    :language: shell
+
+Example of AoE autoload script
+------------------------------
+
+ .. literalinclude:: autoload.sh
+    :language: shell
diff --git a/Documentation/aoe/index.rst b/Documentation/aoe/index.rst
new file mode 100644
index 000000000000..4394b9b7913c
--- /dev/null
+++ b/Documentation/aoe/index.rst
@@ -0,0 +1,19 @@
+:orphan:
+
+=======================
+ATA over Ethernet (AoE)
+=======================
+
+.. toctree::
+    :maxdepth: 1
+
+    aoe
+    todo
+    examples
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/aoe/todo.rst b/Documentation/aoe/todo.rst
new file mode 100644
index 000000000000..dea8db5a33e1
--- /dev/null
+++ b/Documentation/aoe/todo.rst
@@ -0,0 +1,17 @@
+TODO
+====
+
+There is a potential for deadlock when allocating a struct sk_buff for
+data that needs to be written out to aoe storage.  If the data is
+being written from a dirty page in order to free that page, and if
+there are no other pages available, then deadlock may occur when a
+free page is needed for the sk_buff allocation.  This situation has
+not been observed, but it would be nice to eliminate any potential for
+deadlock under memory pressure.
+
+Because ATA over Ethernet is not fragmented by the kernel's IP code,
+the destructor member of the struct sk_buff is available to the aoe
+driver.  By using a mempool for allocating all but the first few
+sk_buffs, and by registering a destructor, we should be able to
+efficiently allocate sk_buffs without introducing any potential for
+deadlock.
diff --git a/Documentation/aoe/todo.txt b/Documentation/aoe/todo.txt
deleted file mode 100644
index c09dfad4aed8..000000000000
--- a/Documentation/aoe/todo.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-There is a potential for deadlock when allocating a struct sk_buff for
-data that needs to be written out to aoe storage.  If the data is
-being written from a dirty page in order to free that page, and if
-there are no other pages available, then deadlock may occur when a
-free page is needed for the sk_buff allocation.  This situation has
-not been observed, but it would be nice to eliminate any potential for
-deadlock under memory pressure.
-
-Because ATA over Ethernet is not fragmented by the kernel's IP code,
-the destructor member of the struct sk_buff is available to the aoe
-driver.  By using a mempool for allocating all but the first few
-sk_buffs, and by registering a destructor, we should be able to
-efficiently allocate sk_buffs without introducing any potential for
-deadlock.
diff --git a/Documentation/aoe/udev.txt b/Documentation/aoe/udev.txt
index 1f06daf03f5b..54feda5a0772 100644
--- a/Documentation/aoe/udev.txt
+++ b/Documentation/aoe/udev.txt
@@ -11,7 +11,7 @@
 #   udev_rules="/etc/udev/rules.d/"
 #   bash# ls /etc/udev/rules.d/
 #   10-wacom.rules  50-udev.rules
-#   bash# cp /path/to/linux-2.6.xx/Documentation/aoe/udev.txt \
+#   bash# cp /path/to/linux/Documentation/aoe/udev.txt \
 #           /etc/udev/rules.d/60-aoe.rules
 #  
 
-- 
cgit 


From b693d0b372afb39432e1c49ad7b3454855bc6bed Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:38 -0300
Subject: docs: arm64: convert docs to ReST and rename to .rst

The documentation is in a format that is very close to ReST format.

The conversion is actually:
  - add blank lines in order to identify paragraphs;
  - fixing tables markups;
  - adding some lists markups;
  - marking literal blocks;
  - adjust some title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/arm64/acpi_object_usage.rst          | 738 +++++++++++++++++++++
 Documentation/arm64/acpi_object_usage.txt          | 622 -----------------
 Documentation/arm64/arm-acpi.rst                   | 528 +++++++++++++++
 Documentation/arm64/arm-acpi.txt                   | 519 ---------------
 Documentation/arm64/booting.rst                    | 293 ++++++++
 Documentation/arm64/booting.txt                    | 266 --------
 Documentation/arm64/cpu-feature-registers.rst      | 304 +++++++++
 Documentation/arm64/cpu-feature-registers.txt      | 296 ---------
 Documentation/arm64/elf_hwcaps.rst                 | 201 ++++++
 Documentation/arm64/elf_hwcaps.txt                 | 231 -------
 Documentation/arm64/hugetlbpage.rst                |  41 ++
 Documentation/arm64/hugetlbpage.txt                |  38 --
 Documentation/arm64/index.rst                      |  28 +
 Documentation/arm64/legacy_instructions.rst        |  68 ++
 Documentation/arm64/legacy_instructions.txt        |  57 --
 Documentation/arm64/memory.rst                     |  98 +++
 Documentation/arm64/memory.txt                     |  97 ---
 Documentation/arm64/pointer-authentication.rst     | 109 +++
 Documentation/arm64/pointer-authentication.txt     | 107 ---
 Documentation/arm64/silicon-errata.rst             | 131 ++++
 Documentation/arm64/silicon-errata.txt             |  88 ---
 Documentation/arm64/sve.rst                        | 529 +++++++++++++++
 Documentation/arm64/sve.txt                        | 525 ---------------
 Documentation/arm64/tagged-pointers.rst            |  68 ++
 Documentation/arm64/tagged-pointers.txt            |  66 --
 Documentation/translations/zh_CN/arm64/booting.txt |   4 +-
 .../zh_CN/arm64/legacy_instructions.txt            |   4 +-
 Documentation/translations/zh_CN/arm64/memory.txt  |   4 +-
 .../translations/zh_CN/arm64/silicon-errata.txt    |   4 +-
 .../translations/zh_CN/arm64/tagged-pointers.txt   |   4 +-
 Documentation/virtual/kvm/api.txt                  |   2 +-
 31 files changed, 3147 insertions(+), 2923 deletions(-)
 create mode 100644 Documentation/arm64/acpi_object_usage.rst
 delete mode 100644 Documentation/arm64/acpi_object_usage.txt
 create mode 100644 Documentation/arm64/arm-acpi.rst
 delete mode 100644 Documentation/arm64/arm-acpi.txt
 create mode 100644 Documentation/arm64/booting.rst
 delete mode 100644 Documentation/arm64/booting.txt
 create mode 100644 Documentation/arm64/cpu-feature-registers.rst
 delete mode 100644 Documentation/arm64/cpu-feature-registers.txt
 create mode 100644 Documentation/arm64/elf_hwcaps.rst
 delete mode 100644 Documentation/arm64/elf_hwcaps.txt
 create mode 100644 Documentation/arm64/hugetlbpage.rst
 delete mode 100644 Documentation/arm64/hugetlbpage.txt
 create mode 100644 Documentation/arm64/index.rst
 create mode 100644 Documentation/arm64/legacy_instructions.rst
 delete mode 100644 Documentation/arm64/legacy_instructions.txt
 create mode 100644 Documentation/arm64/memory.rst
 delete mode 100644 Documentation/arm64/memory.txt
 create mode 100644 Documentation/arm64/pointer-authentication.rst
 delete mode 100644 Documentation/arm64/pointer-authentication.txt
 create mode 100644 Documentation/arm64/silicon-errata.rst
 delete mode 100644 Documentation/arm64/silicon-errata.txt
 create mode 100644 Documentation/arm64/sve.rst
 delete mode 100644 Documentation/arm64/sve.txt
 create mode 100644 Documentation/arm64/tagged-pointers.rst
 delete mode 100644 Documentation/arm64/tagged-pointers.txt

(limited to 'Documentation')

diff --git a/Documentation/arm64/acpi_object_usage.rst b/Documentation/arm64/acpi_object_usage.rst
new file mode 100644
index 000000000000..d51b69dc624d
--- /dev/null
+++ b/Documentation/arm64/acpi_object_usage.rst
@@ -0,0 +1,738 @@
+===========
+ACPI Tables
+===========
+
+The expectations of individual ACPI tables are discussed in the list that
+follows.
+
+If a section number is used, it refers to a section number in the ACPI
+specification where the object is defined.  If "Signature Reserved" is used,
+the table signature (the first four bytes of the table) is the only portion
+of the table recognized by the specification, and the actual table is defined
+outside of the UEFI Forum (see Section 5.2.6 of the specification).
+
+For ACPI on arm64, tables also fall into the following categories:
+
+       -  Required: DSDT, FADT, GTDT, MADT, MCFG, RSDP, SPCR, XSDT
+
+       -  Recommended: BERT, EINJ, ERST, HEST, PCCT, SSDT
+
+       -  Optional: BGRT, CPEP, CSRT, DBG2, DRTM, ECDT, FACS, FPDT, IORT,
+          MCHI, MPST, MSCT, NFIT, PMTT, RASF, SBST, SLIT, SPMI, SRAT, STAO,
+	  TCPA, TPM2, UEFI, XENV
+
+       -  Not supported: BOOT, DBGP, DMAR, ETDT, HPET, IBFT, IVRS, LPIT,
+          MSDM, OEMx, PSDT, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
+
+====== ========================================================================
+Table  Usage for ARMv8 Linux
+====== ========================================================================
+BERT   Section 18.3 (signature == "BERT")
+
+       **Boot Error Record Table**
+
+       Must be supplied if RAS support is provided by the platform.  It
+       is recommended this table be supplied.
+
+BOOT   Signature Reserved (signature == "BOOT")
+
+       **simple BOOT flag table**
+
+       Microsoft only table, will not be supported.
+
+BGRT   Section 5.2.22 (signature == "BGRT")
+
+       **Boot Graphics Resource Table**
+
+       Optional, not currently supported, with no real use-case for an
+       ARM server.
+
+CPEP   Section 5.2.18 (signature == "CPEP")
+
+       **Corrected Platform Error Polling table**
+
+       Optional, not currently supported, and not recommended until such
+       time as ARM-compatible hardware is available, and the specification
+       suitably modified.
+
+CSRT   Signature Reserved (signature == "CSRT")
+
+       **Core System Resources Table**
+
+       Optional, not currently supported.
+
+DBG2   Signature Reserved (signature == "DBG2")
+
+       **DeBuG port table 2**
+
+       License has changed and should be usable.  Optional if used instead
+       of earlycon=<device> on the command line.
+
+DBGP   Signature Reserved (signature == "DBGP")
+
+       **DeBuG Port table**
+
+       Microsoft only table, will not be supported.
+
+DSDT   Section 5.2.11.1 (signature == "DSDT")
+
+       **Differentiated System Description Table**
+
+       A DSDT is required; see also SSDT.
+
+       ACPI tables contain only one DSDT but can contain one or more SSDTs,
+       which are optional.  Each SSDT can only add to the ACPI namespace,
+       but cannot modify or replace anything in the DSDT.
+
+DMAR   Signature Reserved (signature == "DMAR")
+
+       **DMA Remapping table**
+
+       x86 only table, will not be supported.
+
+DRTM   Signature Reserved (signature == "DRTM")
+
+       **Dynamic Root of Trust for Measurement table**
+
+       Optional, not currently supported.
+
+ECDT   Section 5.2.16 (signature == "ECDT")
+
+       **Embedded Controller Description Table**
+
+       Optional, not currently supported, but could be used on ARM if and
+       only if one uses the GPE_BIT field to represent an IRQ number, since
+       there are no GPE blocks defined in hardware reduced mode.  This would
+       need to be modified in the ACPI specification.
+
+EINJ   Section 18.6 (signature == "EINJ")
+
+       **Error Injection table**
+
+       This table is very useful for testing platform response to error
+       conditions; it allows one to inject an error into the system as
+       if it had actually occurred.  However, this table should not be
+       shipped with a production system; it should be dynamically loaded
+       and executed with the ACPICA tools only during testing.
+
+ERST   Section 18.5 (signature == "ERST")
+
+       **Error Record Serialization Table**
+
+       On a platform supports RAS, this table must be supplied if it is not
+       UEFI-based; if it is UEFI-based, this table may be supplied. When this
+       table is not present, UEFI run time service will be utilized to save
+       and retrieve hardware error information to and from a persistent store.
+
+ETDT   Signature Reserved (signature == "ETDT")
+
+       **Event Timer Description Table**
+
+       Obsolete table, will not be supported.
+
+FACS   Section 5.2.10 (signature == "FACS")
+
+       **Firmware ACPI Control Structure**
+
+       It is unlikely that this table will be terribly useful.  If it is
+       provided, the Global Lock will NOT be used since it is not part of
+       the hardware reduced profile, and only 64-bit address fields will
+       be considered valid.
+
+FADT   Section 5.2.9 (signature == "FACP")
+
+       **Fixed ACPI Description Table**
+       Required for arm64.
+
+
+       The HW_REDUCED_ACPI flag must be set.  All of the fields that are
+       to be ignored when HW_REDUCED_ACPI is set are expected to be set to
+       zero.
+
+       If an FACS table is provided, the X_FIRMWARE_CTRL field is to be
+       used, not FIRMWARE_CTRL.
+
+       If PSCI is used (as is recommended), make sure that ARM_BOOT_ARCH is
+       filled in properly - that the PSCI_COMPLIANT flag is set and that
+       PSCI_USE_HVC is set or unset as needed (see table 5-37).
+
+       For the DSDT that is also required, the X_DSDT field is to be used,
+       not the DSDT field.
+
+FPDT   Section 5.2.23 (signature == "FPDT")
+
+       **Firmware Performance Data Table**
+
+       Optional, not currently supported.
+
+GTDT   Section 5.2.24 (signature == "GTDT")
+
+       **Generic Timer Description Table**
+
+       Required for arm64.
+
+HEST   Section 18.3.2 (signature == "HEST")
+
+       **Hardware Error Source Table**
+
+       ARM-specific error sources have been defined; please use those or the
+       PCI types such as type 6 (AER Root Port), 7 (AER Endpoint), or 8 (AER
+       Bridge), or use type 9 (Generic Hardware Error Source).  Firmware first
+       error handling is possible if and only if Trusted Firmware is being
+       used on arm64.
+
+       Must be supplied if RAS support is provided by the platform.  It
+       is recommended this table be supplied.
+
+HPET   Signature Reserved (signature == "HPET")
+
+       **High Precision Event timer Table**
+
+       x86 only table, will not be supported.
+
+IBFT   Signature Reserved (signature == "IBFT")
+
+       **iSCSI Boot Firmware Table**
+
+       Microsoft defined table, support TBD.
+
+IORT   Signature Reserved (signature == "IORT")
+
+       **Input Output Remapping Table**
+
+       arm64 only table, required in order to describe IO topology, SMMUs,
+       and GIC ITSs, and how those various components are connected together,
+       such as identifying which components are behind which SMMUs/ITSs.
+       This table will only be required on certain SBSA platforms (e.g.,
+       when using GICv3-ITS and an SMMU); on SBSA Level 0 platforms, it
+       remains optional.
+
+IVRS   Signature Reserved (signature == "IVRS")
+
+       **I/O Virtualization Reporting Structure**
+
+       x86_64 (AMD) only table, will not be supported.
+
+LPIT   Signature Reserved (signature == "LPIT")
+
+       **Low Power Idle Table**
+
+       x86 only table as of ACPI 5.1; starting with ACPI 6.0, processor
+       descriptions and power states on ARM platforms should use the DSDT
+       and define processor container devices (_HID ACPI0010, Section 8.4,
+       and more specifically 8.4.3 and and 8.4.4).
+
+MADT   Section 5.2.12 (signature == "APIC")
+
+       **Multiple APIC Description Table**
+
+       Required for arm64.  Only the GIC interrupt controller structures
+       should be used (types 0xA - 0xF).
+
+MCFG   Signature Reserved (signature == "MCFG")
+
+       **Memory-mapped ConFiGuration space**
+
+       If the platform supports PCI/PCIe, an MCFG table is required.
+
+MCHI   Signature Reserved (signature == "MCHI")
+
+       **Management Controller Host Interface table**
+
+       Optional, not currently supported.
+
+MPST   Section 5.2.21 (signature == "MPST")
+
+       **Memory Power State Table**
+
+       Optional, not currently supported.
+
+MSCT   Section 5.2.19 (signature == "MSCT")
+
+       **Maximum System Characteristic Table**
+
+       Optional, not currently supported.
+
+MSDM   Signature Reserved (signature == "MSDM")
+
+       **Microsoft Data Management table**
+
+       Microsoft only table, will not be supported.
+
+NFIT   Section 5.2.25 (signature == "NFIT")
+
+       **NVDIMM Firmware Interface Table**
+
+       Optional, not currently supported.
+
+OEMx   Signature of "OEMx" only
+
+       **OEM Specific Tables**
+
+       All tables starting with a signature of "OEM" are reserved for OEM
+       use.  Since these are not meant to be of general use but are limited
+       to very specific end users, they are not recommended for use and are
+       not supported by the kernel for arm64.
+
+PCCT   Section 14.1 (signature == "PCCT)
+
+       **Platform Communications Channel Table**
+
+       Recommend for use on arm64; use of PCC is recommended when using CPPC
+       to control performance and power for platform processors.
+
+PMTT   Section 5.2.21.12 (signature == "PMTT")
+
+       **Platform Memory Topology Table**
+
+       Optional, not currently supported.
+
+PSDT   Section 5.2.11.3 (signature == "PSDT")
+
+       **Persistent System Description Table**
+
+       Obsolete table, will not be supported.
+
+RASF   Section 5.2.20 (signature == "RASF")
+
+       **RAS Feature table**
+
+       Optional, not currently supported.
+
+RSDP   Section 5.2.5 (signature == "RSD PTR")
+
+       **Root System Description PoinTeR**
+
+       Required for arm64.
+
+RSDT   Section 5.2.7 (signature == "RSDT")
+
+       **Root System Description Table**
+
+       Since this table can only provide 32-bit addresses, it is deprecated
+       on arm64, and will not be used.  If provided, it will be ignored.
+
+SBST   Section 5.2.14 (signature == "SBST")
+
+       **Smart Battery Subsystem Table**
+
+       Optional, not currently supported.
+
+SLIC   Signature Reserved (signature == "SLIC")
+
+       **Software LIcensing table**
+
+       Microsoft only table, will not be supported.
+
+SLIT   Section 5.2.17 (signature == "SLIT")
+
+       **System Locality distance Information Table**
+
+       Optional in general, but required for NUMA systems.
+
+SPCR   Signature Reserved (signature == "SPCR")
+
+       **Serial Port Console Redirection table**
+
+       Required for arm64.
+
+SPMI   Signature Reserved (signature == "SPMI")
+
+       **Server Platform Management Interface table**
+
+       Optional, not currently supported.
+
+SRAT   Section 5.2.16 (signature == "SRAT")
+
+       **System Resource Affinity Table**
+
+       Optional, but if used, only the GICC Affinity structures are read.
+       To support arm64 NUMA, this table is required.
+
+SSDT   Section 5.2.11.2 (signature == "SSDT")
+
+       **Secondary System Description Table**
+
+       These tables are a continuation of the DSDT; these are recommended
+       for use with devices that can be added to a running system, but can
+       also serve the purpose of dividing up device descriptions into more
+       manageable pieces.
+
+       An SSDT can only ADD to the ACPI namespace.  It cannot modify or
+       replace existing device descriptions already in the namespace.
+
+       These tables are optional, however.  ACPI tables should contain only
+       one DSDT but can contain many SSDTs.
+
+STAO   Signature Reserved (signature == "STAO")
+
+       **_STA Override table**
+
+       Optional, but only necessary in virtualized environments in order to
+       hide devices from guest OSs.
+
+TCPA   Signature Reserved (signature == "TCPA")
+
+       **Trusted Computing Platform Alliance table**
+
+       Optional, not currently supported, and may need changes to fully
+       interoperate with arm64.
+
+TPM2   Signature Reserved (signature == "TPM2")
+
+       **Trusted Platform Module 2 table**
+
+       Optional, not currently supported, and may need changes to fully
+       interoperate with arm64.
+
+UEFI   Signature Reserved (signature == "UEFI")
+
+       **UEFI ACPI data table**
+
+       Optional, not currently supported.  No known use case for arm64,
+       at present.
+
+WAET   Signature Reserved (signature == "WAET")
+
+       **Windows ACPI Emulated devices Table**
+
+       Microsoft only table, will not be supported.
+
+WDAT   Signature Reserved (signature == "WDAT")
+
+       **Watch Dog Action Table**
+
+       Microsoft only table, will not be supported.
+
+WDRT   Signature Reserved (signature == "WDRT")
+
+       **Watch Dog Resource Table**
+
+       Microsoft only table, will not be supported.
+
+WPBT   Signature Reserved (signature == "WPBT")
+
+       **Windows Platform Binary Table**
+
+       Microsoft only table, will not be supported.
+
+XENV   Signature Reserved (signature == "XENV")
+
+       **Xen project table**
+
+       Optional, used only by Xen at present.
+
+XSDT   Section 5.2.8 (signature == "XSDT")
+
+       **eXtended System Description Table**
+
+       Required for arm64.
+====== ========================================================================
+
+ACPI Objects
+------------
+The expectations on individual ACPI objects that are likely to be used are
+shown in the list that follows; any object not explicitly mentioned below
+should be used as needed for a particular platform or particular subsystem,
+such as power management or PCI.
+
+===== ================ ========================================================
+Name   Section         Usage for ARMv8 Linux
+===== ================ ========================================================
+_CCA   6.2.17          This method must be defined for all bus masters
+                       on arm64 - there are no assumptions made about
+                       whether such devices are cache coherent or not.
+                       The _CCA value is inherited by all descendants of
+                       these devices so it does not need to be repeated.
+                       Without _CCA on arm64, the kernel does not know what
+                       to do about setting up DMA for the device.
+
+                       NB: this method provides default cache coherency
+                       attributes; the presence of an SMMU can be used to
+                       modify that, however.  For example, a master could
+                       default to non-coherent, but be made coherent with
+                       the appropriate SMMU configuration (see Table 17 of
+                       the IORT specification, ARM Document DEN 0049B).
+
+_CID   6.1.2           Use as needed, see also _HID.
+
+_CLS   6.1.3           Use as needed, see also _HID.
+
+_CPC   8.4.7.1         Use as needed, power management specific.  CPPC is
+                       recommended on arm64.
+
+_CRS   6.2.2           Required on arm64.
+
+_CSD   8.4.2.2         Use as needed, used only in conjunction with _CST.
+
+_CST   8.4.2.1         Low power idle states (8.4.4) are recommended instead
+                       of C-states.
+
+_DDN   6.1.4           This field can be used for a device name.  However,
+                       it is meant for DOS device names (e.g., COM1), so be
+                       careful of its use across OSes.
+
+_DSD   6.2.5           To be used with caution.  If this object is used, try
+                       to use it within the constraints already defined by the
+                       Device Properties UUID.  Only in rare circumstances
+                       should it be necessary to create a new _DSD UUID.
+
+                       In either case, submit the _DSD definition along with
+                       any driver patches for discussion, especially when
+                       device properties are used.  A driver will not be
+                       considered complete without a corresponding _DSD
+                       description.  Once approved by kernel maintainers,
+                       the UUID or device properties must then be registered
+                       with the UEFI Forum; this may cause some iteration as
+                       more than one OS will be registering entries.
+
+_DSM   9.1.1           Do not use this method.  It is not standardized, the
+                       return values are not well documented, and it is
+                       currently a frequent source of error.
+
+\_GL   5.7.1           This object is not to be used in hardware reduced
+                       mode, and therefore should not be used on arm64.
+
+_GLK   6.5.7           This object requires a global lock be defined; there
+                       is no global lock on arm64 since it runs in hardware
+                       reduced mode.  Hence, do not use this object on arm64.
+
+\_GPE  5.3.1           This namespace is for x86 use only.  Do not use it
+                       on arm64.
+
+_HID   6.1.5           This is the primary object to use in device probing,
+		       though _CID and _CLS may also be used.
+
+_INI   6.5.1           Not required, but can be useful in setting up devices
+                       when UEFI leaves them in a state that may not be what
+                       the driver expects before it starts probing.
+
+_LPI   8.4.4.3         Recommended for use with processor definitions (_HID
+		       ACPI0010) on arm64.  See also _RDI.
+
+_MLS   6.1.7           Highly recommended for use in internationalization.
+
+_OFF   7.2.2           It is recommended to define this method for any device
+                       that can be turned on or off.
+
+_ON    7.2.3           It is recommended to define this method for any device
+                       that can be turned on or off.
+
+\_OS   5.7.3           This method will return "Linux" by default (this is
+                       the value of the macro ACPI_OS_NAME on Linux).  The
+                       command line parameter acpi_os=<string> can be used
+                       to set it to some other value.
+
+_OSC   6.2.11          This method can be a global method in ACPI (i.e.,
+                       \_SB._OSC), or it may be associated with a specific
+                       device (e.g., \_SB.DEV0._OSC), or both.  When used
+                       as a global method, only capabilities published in
+                       the ACPI specification are allowed.  When used as
+                       a device-specific method, the process described for
+                       using _DSD MUST be used to create an _OSC definition;
+                       out-of-process use of _OSC is not allowed.  That is,
+                       submit the device-specific _OSC usage description as
+                       part of the kernel driver submission, get it approved
+                       by the kernel community, then register it with the
+                       UEFI Forum.
+
+\_OSI  5.7.2           Deprecated on ARM64.  As far as ACPI firmware is
+		       concerned, _OSI is not to be used to determine what
+		       sort of system is being used or what functionality
+		       is provided.  The _OSC method is to be used instead.
+
+_PDC   8.4.1           Deprecated, do not use on arm64.
+
+\_PIC  5.8.1           The method should not be used.  On arm64, the only
+                       interrupt model available is GIC.
+
+\_PR   5.3.1           This namespace is for x86 use only on legacy systems.
+                       Do not use it on arm64.
+
+_PRT   6.2.13          Required as part of the definition of all PCI root
+                       devices.
+
+_PRx   7.3.8-11        Use as needed; power management specific.  If _PR0 is
+                       defined, _PR3 must also be defined.
+
+_PSx   7.3.2-5         Use as needed; power management specific.  If _PS0 is
+                       defined, _PS3 must also be defined.  If clocks or
+                       regulators need adjusting to be consistent with power
+                       usage, change them in these methods.
+
+_RDI   8.4.4.4         Recommended for use with processor definitions (_HID
+		       ACPI0010) on arm64.  This should only be used in
+		       conjunction with _LPI.
+
+\_REV  5.7.4           Always returns the latest version of ACPI supported.
+
+\_SB   5.3.1           Required on arm64; all devices must be defined in this
+                       namespace.
+
+_SLI   6.2.15          Use is recommended when SLIT table is in use.
+
+_STA   6.3.7,          It is recommended to define this method for any device
+       7.2.4           that can be turned on or off.  See also the STAO table
+                       that provides overrides to hide devices in virtualized
+                       environments.
+
+_SRS   6.2.16          Use as needed; see also _PRS.
+
+_STR   6.1.10          Recommended for conveying device names to end users;
+                       this is preferred over using _DDN.
+
+_SUB   6.1.9           Use as needed; _HID or _CID are preferred.
+
+_SUN   6.1.11          Use as needed, but recommended.
+
+_SWS   7.4.3           Use as needed; power management specific; this may
+                       require specification changes for use on arm64.
+
+_UID   6.1.12          Recommended for distinguishing devices of the same
+                       class; define it if at all possible.
+===== ================ ========================================================
+
+
+
+
+ACPI Event Model
+----------------
+Do not use GPE block devices; these are not supported in the hardware reduced
+profile used by arm64.  Since there are no GPE blocks defined for use on ARM
+platforms, ACPI events must be signaled differently.
+
+There are two options: GPIO-signaled interrupts (Section 5.6.5), and
+interrupt-signaled events (Section 5.6.9).  Interrupt-signaled events are a
+new feature in the ACPI 6.1 specification.  Either - or both - can be used
+on a given platform, and which to use may be dependent of limitations in any
+given SoC.  If possible, interrupt-signaled events are recommended.
+
+
+ACPI Processor Control
+----------------------
+Section 8 of the ACPI specification changed significantly in version 6.0.
+Processors should now be defined as Device objects with _HID ACPI0007; do
+not use the deprecated Processor statement in ASL.  All multiprocessor systems
+should also define a hierarchy of processors, done with Processor Container
+Devices (see Section 8.4.3.1, _HID ACPI0010); do not use processor aggregator
+devices (Section 8.5) to describe processor topology.  Section 8.4 of the
+specification describes the semantics of these object definitions and how
+they interrelate.
+
+Most importantly, the processor hierarchy defined also defines the low power
+idle states that are available to the platform, along with the rules for
+determining which processors can be turned on or off and the circumstances
+that control that.  Without this information, the processors will run in
+whatever power state they were left in by UEFI.
+
+Note too, that the processor Device objects defined and the entries in the
+MADT for GICs are expected to be in synchronization.  The _UID of the Device
+object must correspond to processor IDs used in the MADT.
+
+It is recommended that CPPC (8.4.5) be used as the primary model for processor
+performance control on arm64.  C-states and P-states may become available at
+some point in the future, but most current design work appears to favor CPPC.
+
+Further, it is essential that the ARMv8 SoC provide a fully functional
+implementation of PSCI; this will be the only mechanism supported by ACPI
+to control CPU power state.  Booting of secondary CPUs using the ACPI
+parking protocol is possible, but discouraged, since only PSCI is supported
+for ARM servers.
+
+
+ACPI System Address Map Interfaces
+----------------------------------
+In Section 15 of the ACPI specification, several methods are mentioned as
+possible mechanisms for conveying memory resource information to the kernel.
+For arm64, we will only support UEFI for booting with ACPI, hence the UEFI
+GetMemoryMap() boot service is the only mechanism that will be used.
+
+
+ACPI Platform Error Interfaces (APEI)
+-------------------------------------
+The APEI tables supported are described above.
+
+APEI requires the equivalent of an SCI and an NMI on ARMv8.  The SCI is used
+to notify the OSPM of errors that have occurred but can be corrected and the
+system can continue correct operation, even if possibly degraded.  The NMI is
+used to indicate fatal errors that cannot be corrected, and require immediate
+attention.
+
+Since there is no direct equivalent of the x86 SCI or NMI, arm64 handles
+these slightly differently.  The SCI is handled as a high priority interrupt;
+given that these are corrected (or correctable) errors being reported, this
+is sufficient.  The NMI is emulated as the highest priority interrupt
+possible.  This implies some caution must be used since there could be
+interrupts at higher privilege levels or even interrupts at the same priority
+as the emulated NMI.  In Linux, this should not be the case but one should
+be aware it could happen.
+
+
+ACPI Objects Not Supported on ARM64
+-----------------------------------
+While this may change in the future, there are several classes of objects
+that can be defined, but are not currently of general interest to ARM servers.
+Some of these objects have x86 equivalents, and may actually make sense in ARM
+servers.  However, there is either no hardware available at present, or there
+may not even be a non-ARM implementation yet.  Hence, they are not currently
+supported.
+
+The following classes of objects are not supported:
+
+       -  Section 9.2: ambient light sensor devices
+
+       -  Section 9.3: battery devices
+
+       -  Section 9.4: lids (e.g., laptop lids)
+
+       -  Section 9.8.2: IDE controllers
+
+       -  Section 9.9: floppy controllers
+
+       -  Section 9.10: GPE block devices
+
+       -  Section 9.15: PC/AT RTC/CMOS devices
+
+       -  Section 9.16: user presence detection devices
+
+       -  Section 9.17: I/O APIC devices; all GICs must be enumerable via MADT
+
+       -  Section 9.18: time and alarm devices (see 9.15)
+
+       -  Section 10: power source and power meter devices
+
+       -  Section 11: thermal management
+
+       -  Section 12: embedded controllers interface
+
+       -  Section 13: SMBus interfaces
+
+
+This also means that there is no support for the following objects:
+
+====   =========================== ====   ==========
+Name   Section                     Name   Section
+====   =========================== ====   ==========
+_ALC   9.3.4                       _FDM   9.10.3
+_ALI   9.3.2                       _FIX   6.2.7
+_ALP   9.3.6                       _GAI   10.4.5
+_ALR   9.3.5                       _GHL   10.4.7
+_ALT   9.3.3                       _GTM   9.9.2.1.1
+_BCT   10.2.2.10                   _LID   9.5.1
+_BDN   6.5.3                       _PAI   10.4.4
+_BIF   10.2.2.1                    _PCL   10.3.2
+_BIX   10.2.2.1                    _PIF   10.3.3
+_BLT   9.2.3                       _PMC   10.4.1
+_BMA   10.2.2.4                    _PMD   10.4.8
+_BMC   10.2.2.12                   _PMM   10.4.3
+_BMD   10.2.2.11                   _PRL   10.3.4
+_BMS   10.2.2.5                    _PSR   10.3.1
+_BST   10.2.2.6                    _PTP   10.4.2
+_BTH   10.2.2.7                    _SBS   10.1.3
+_BTM   10.2.2.9                    _SHL   10.4.6
+_BTP   10.2.2.8                    _STM   9.9.2.1.1
+_DCK   6.5.2                       _UPD   9.16.1
+_EC    12.12                       _UPP   9.16.2
+_FDE   9.10.1                      _WPC   10.5.2
+_FDI   9.10.2                      _WPP   10.5.3
+====   =========================== ====   ==========
diff --git a/Documentation/arm64/acpi_object_usage.txt b/Documentation/arm64/acpi_object_usage.txt
deleted file mode 100644
index c77010c5c1f0..000000000000
--- a/Documentation/arm64/acpi_object_usage.txt
+++ /dev/null
@@ -1,622 +0,0 @@
-ACPI Tables
------------
-The expectations of individual ACPI tables are discussed in the list that
-follows.
-
-If a section number is used, it refers to a section number in the ACPI
-specification where the object is defined.  If "Signature Reserved" is used,
-the table signature (the first four bytes of the table) is the only portion
-of the table recognized by the specification, and the actual table is defined
-outside of the UEFI Forum (see Section 5.2.6 of the specification).
-
-For ACPI on arm64, tables also fall into the following categories:
-
-       -- Required: DSDT, FADT, GTDT, MADT, MCFG, RSDP, SPCR, XSDT
-
-       -- Recommended: BERT, EINJ, ERST, HEST, PCCT, SSDT
-
-       -- Optional: BGRT, CPEP, CSRT, DBG2, DRTM, ECDT, FACS, FPDT, IORT,
-          MCHI, MPST, MSCT, NFIT, PMTT, RASF, SBST, SLIT, SPMI, SRAT, STAO,
-	  TCPA, TPM2, UEFI, XENV
-
-       -- Not supported: BOOT, DBGP, DMAR, ETDT, HPET, IBFT, IVRS, LPIT,
-          MSDM, OEMx, PSDT, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
-
-Table  Usage for ARMv8 Linux
------  ----------------------------------------------------------------
-BERT   Section 18.3 (signature == "BERT")
-       == Boot Error Record Table ==
-       Must be supplied if RAS support is provided by the platform.  It
-       is recommended this table be supplied.
-
-BOOT   Signature Reserved (signature == "BOOT")
-       == simple BOOT flag table ==
-       Microsoft only table, will not be supported.
-
-BGRT   Section 5.2.22 (signature == "BGRT")
-       == Boot Graphics Resource Table ==
-       Optional, not currently supported, with no real use-case for an
-       ARM server.
-
-CPEP   Section 5.2.18 (signature == "CPEP")
-       == Corrected Platform Error Polling table ==
-       Optional, not currently supported, and not recommended until such
-       time as ARM-compatible hardware is available, and the specification
-       suitably modified.
-
-CSRT   Signature Reserved (signature == "CSRT")
-       == Core System Resources Table ==
-       Optional, not currently supported.
-
-DBG2   Signature Reserved (signature == "DBG2")
-       == DeBuG port table 2 ==
-       License has changed and should be usable.  Optional if used instead
-       of earlycon=<device> on the command line.
-
-DBGP   Signature Reserved (signature == "DBGP")
-       == DeBuG Port table ==
-       Microsoft only table, will not be supported.
-
-DSDT   Section 5.2.11.1 (signature == "DSDT")
-       == Differentiated System Description Table ==
-       A DSDT is required; see also SSDT.
-
-       ACPI tables contain only one DSDT but can contain one or more SSDTs,
-       which are optional.  Each SSDT can only add to the ACPI namespace,
-       but cannot modify or replace anything in the DSDT.
-
-DMAR   Signature Reserved (signature == "DMAR")
-       == DMA Remapping table ==
-       x86 only table, will not be supported.
-
-DRTM   Signature Reserved (signature == "DRTM")
-       == Dynamic Root of Trust for Measurement table ==
-       Optional, not currently supported.
-
-ECDT   Section 5.2.16 (signature == "ECDT")
-       == Embedded Controller Description Table ==
-       Optional, not currently supported, but could be used on ARM if and
-       only if one uses the GPE_BIT field to represent an IRQ number, since
-       there are no GPE blocks defined in hardware reduced mode.  This would
-       need to be modified in the ACPI specification.
-
-EINJ   Section 18.6 (signature == "EINJ")
-       == Error Injection table ==
-       This table is very useful for testing platform response to error
-       conditions; it allows one to inject an error into the system as
-       if it had actually occurred.  However, this table should not be
-       shipped with a production system; it should be dynamically loaded
-       and executed with the ACPICA tools only during testing.
-
-ERST   Section 18.5 (signature == "ERST")
-       == Error Record Serialization Table ==
-       On a platform supports RAS, this table must be supplied if it is not
-       UEFI-based; if it is UEFI-based, this table may be supplied. When this
-       table is not present, UEFI run time service will be utilized to save
-       and retrieve hardware error information to and from a persistent store.
-
-ETDT   Signature Reserved (signature == "ETDT")
-       == Event Timer Description Table ==
-       Obsolete table, will not be supported.
-
-FACS   Section 5.2.10 (signature == "FACS")
-       == Firmware ACPI Control Structure ==
-       It is unlikely that this table will be terribly useful.  If it is
-       provided, the Global Lock will NOT be used since it is not part of
-       the hardware reduced profile, and only 64-bit address fields will
-       be considered valid.
-
-FADT   Section 5.2.9 (signature == "FACP")
-       == Fixed ACPI Description Table ==
-       Required for arm64.
-
-       The HW_REDUCED_ACPI flag must be set.  All of the fields that are
-       to be ignored when HW_REDUCED_ACPI is set are expected to be set to
-       zero.
-
-       If an FACS table is provided, the X_FIRMWARE_CTRL field is to be
-       used, not FIRMWARE_CTRL.
-
-       If PSCI is used (as is recommended), make sure that ARM_BOOT_ARCH is
-       filled in properly -- that the PSCI_COMPLIANT flag is set and that
-       PSCI_USE_HVC is set or unset as needed (see table 5-37).
-
-       For the DSDT that is also required, the X_DSDT field is to be used,
-       not the DSDT field.
-
-FPDT   Section 5.2.23 (signature == "FPDT")
-       == Firmware Performance Data Table ==
-       Optional, not currently supported.
-
-GTDT   Section 5.2.24 (signature == "GTDT")
-       == Generic Timer Description Table ==
-       Required for arm64.
-
-HEST   Section 18.3.2 (signature == "HEST")
-       == Hardware Error Source Table ==
-       ARM-specific error sources have been defined; please use those or the
-       PCI types such as type 6 (AER Root Port), 7 (AER Endpoint), or 8 (AER
-       Bridge), or use type 9 (Generic Hardware Error Source).  Firmware first
-       error handling is possible if and only if Trusted Firmware is being
-       used on arm64.
-
-       Must be supplied if RAS support is provided by the platform.  It
-       is recommended this table be supplied.
-
-HPET   Signature Reserved (signature == "HPET")
-       == High Precision Event timer Table ==
-       x86 only table, will not be supported.
-
-IBFT   Signature Reserved (signature == "IBFT")
-       == iSCSI Boot Firmware Table ==
-       Microsoft defined table, support TBD.
-
-IORT   Signature Reserved (signature == "IORT")
-       == Input Output Remapping Table ==
-       arm64 only table, required in order to describe IO topology, SMMUs,
-       and GIC ITSs, and how those various components are connected together,
-       such as identifying which components are behind which SMMUs/ITSs.
-       This table will only be required on certain SBSA platforms (e.g.,
-       when using GICv3-ITS and an SMMU); on SBSA Level 0 platforms, it 
-       remains optional.
-
-IVRS   Signature Reserved (signature == "IVRS")
-       == I/O Virtualization Reporting Structure ==
-       x86_64 (AMD) only table, will not be supported.
-
-LPIT   Signature Reserved (signature == "LPIT")
-       == Low Power Idle Table ==
-       x86 only table as of ACPI 5.1; starting with ACPI 6.0, processor
-       descriptions and power states on ARM platforms should use the DSDT
-       and define processor container devices (_HID ACPI0010, Section 8.4,
-       and more specifically 8.4.3 and and 8.4.4).
-
-MADT   Section 5.2.12 (signature == "APIC")
-       == Multiple APIC Description Table ==
-       Required for arm64.  Only the GIC interrupt controller structures
-       should be used (types 0xA - 0xF).
-
-MCFG   Signature Reserved (signature == "MCFG")
-       == Memory-mapped ConFiGuration space ==
-       If the platform supports PCI/PCIe, an MCFG table is required.
-
-MCHI   Signature Reserved (signature == "MCHI")
-       == Management Controller Host Interface table ==
-       Optional, not currently supported.
-
-MPST   Section 5.2.21 (signature == "MPST")
-       == Memory Power State Table ==
-       Optional, not currently supported.
-
-MSCT   Section 5.2.19 (signature == "MSCT")
-       == Maximum System Characteristic Table ==
-       Optional, not currently supported.
-
-MSDM   Signature Reserved (signature == "MSDM")
-       == Microsoft Data Management table ==
-       Microsoft only table, will not be supported.
-
-NFIT   Section 5.2.25 (signature == "NFIT")
-       == NVDIMM Firmware Interface Table ==
-       Optional, not currently supported.
-
-OEMx   Signature of "OEMx" only
-       == OEM Specific Tables ==
-       All tables starting with a signature of "OEM" are reserved for OEM
-       use.  Since these are not meant to be of general use but are limited
-       to very specific end users, they are not recommended for use and are
-       not supported by the kernel for arm64.
-
-PCCT   Section 14.1 (signature == "PCCT)
-       == Platform Communications Channel Table ==
-       Recommend for use on arm64; use of PCC is recommended when using CPPC
-       to control performance and power for platform processors.
-
-PMTT   Section 5.2.21.12 (signature == "PMTT")
-       == Platform Memory Topology Table ==
-       Optional, not currently supported.
-
-PSDT   Section 5.2.11.3 (signature == "PSDT")
-       == Persistent System Description Table ==
-       Obsolete table, will not be supported.
-
-RASF   Section 5.2.20 (signature == "RASF")
-       == RAS Feature table ==
-       Optional, not currently supported.
-
-RSDP   Section 5.2.5 (signature == "RSD PTR")
-       == Root System Description PoinTeR ==
-       Required for arm64.
-
-RSDT   Section 5.2.7 (signature == "RSDT")
-       == Root System Description Table ==
-       Since this table can only provide 32-bit addresses, it is deprecated
-       on arm64, and will not be used.  If provided, it will be ignored.
-
-SBST   Section 5.2.14 (signature == "SBST")
-       == Smart Battery Subsystem Table ==
-       Optional, not currently supported.
-
-SLIC   Signature Reserved (signature == "SLIC")
-       == Software LIcensing table ==
-       Microsoft only table, will not be supported.
-
-SLIT   Section 5.2.17 (signature == "SLIT")
-       == System Locality distance Information Table ==
-       Optional in general, but required for NUMA systems.
-
-SPCR   Signature Reserved (signature == "SPCR")
-       == Serial Port Console Redirection table ==
-       Required for arm64.
-
-SPMI   Signature Reserved (signature == "SPMI")
-       == Server Platform Management Interface table ==
-       Optional, not currently supported.
-
-SRAT   Section 5.2.16 (signature == "SRAT")
-       == System Resource Affinity Table ==
-       Optional, but if used, only the GICC Affinity structures are read.
-       To support arm64 NUMA, this table is required.
-
-SSDT   Section 5.2.11.2 (signature == "SSDT")
-       == Secondary System Description Table ==
-       These tables are a continuation of the DSDT; these are recommended
-       for use with devices that can be added to a running system, but can
-       also serve the purpose of dividing up device descriptions into more
-       manageable pieces.
-
-       An SSDT can only ADD to the ACPI namespace.  It cannot modify or
-       replace existing device descriptions already in the namespace.
-
-       These tables are optional, however.  ACPI tables should contain only
-       one DSDT but can contain many SSDTs.
-
-STAO   Signature Reserved (signature == "STAO")
-       == _STA Override table ==
-       Optional, but only necessary in virtualized environments in order to
-       hide devices from guest OSs.
-
-TCPA   Signature Reserved (signature == "TCPA")
-       == Trusted Computing Platform Alliance table ==
-       Optional, not currently supported, and may need changes to fully
-       interoperate with arm64.
-
-TPM2   Signature Reserved (signature == "TPM2")
-       == Trusted Platform Module 2 table ==
-       Optional, not currently supported, and may need changes to fully
-       interoperate with arm64.
-
-UEFI   Signature Reserved (signature == "UEFI")
-       == UEFI ACPI data table ==
-       Optional, not currently supported.  No known use case for arm64,
-       at present.
-
-WAET   Signature Reserved (signature == "WAET")
-       == Windows ACPI Emulated devices Table ==
-       Microsoft only table, will not be supported.
-
-WDAT   Signature Reserved (signature == "WDAT")
-       == Watch Dog Action Table ==
-       Microsoft only table, will not be supported.
-
-WDRT   Signature Reserved (signature == "WDRT")
-       == Watch Dog Resource Table ==
-       Microsoft only table, will not be supported.
-
-WPBT   Signature Reserved (signature == "WPBT")
-       == Windows Platform Binary Table ==
-       Microsoft only table, will not be supported.
-
-XENV   Signature Reserved (signature == "XENV")
-       == Xen project table ==
-       Optional, used only by Xen at present.
-
-XSDT   Section 5.2.8 (signature == "XSDT")
-       == eXtended System Description Table ==
-       Required for arm64.
-
-
-ACPI Objects
-------------
-The expectations on individual ACPI objects that are likely to be used are
-shown in the list that follows; any object not explicitly mentioned below
-should be used as needed for a particular platform or particular subsystem,
-such as power management or PCI.
-
-Name   Section         Usage for ARMv8 Linux
-----   ------------    -------------------------------------------------
-_CCA   6.2.17          This method must be defined for all bus masters
-                       on arm64 -- there are no assumptions made about
-                       whether such devices are cache coherent or not.
-                       The _CCA value is inherited by all descendants of
-                       these devices so it does not need to be repeated.
-                       Without _CCA on arm64, the kernel does not know what
-                       to do about setting up DMA for the device.
-
-                       NB: this method provides default cache coherency
-                       attributes; the presence of an SMMU can be used to
-                       modify that, however.  For example, a master could
-                       default to non-coherent, but be made coherent with
-                       the appropriate SMMU configuration (see Table 17 of
-                       the IORT specification, ARM Document DEN 0049B).
-
-_CID   6.1.2           Use as needed, see also _HID.
-
-_CLS   6.1.3           Use as needed, see also _HID.
-
-_CPC   8.4.7.1         Use as needed, power management specific.  CPPC is
-                       recommended on arm64.
-
-_CRS   6.2.2           Required on arm64.
-
-_CSD   8.4.2.2         Use as needed, used only in conjunction with _CST.
-
-_CST   8.4.2.1         Low power idle states (8.4.4) are recommended instead
-                       of C-states.
-
-_DDN   6.1.4           This field can be used for a device name.  However,
-                       it is meant for DOS device names (e.g., COM1), so be
-                       careful of its use across OSes.
-
-_DSD   6.2.5           To be used with caution.  If this object is used, try
-                       to use it within the constraints already defined by the
-                       Device Properties UUID.  Only in rare circumstances
-                       should it be necessary to create a new _DSD UUID.
-
-                       In either case, submit the _DSD definition along with
-                       any driver patches for discussion, especially when
-                       device properties are used.  A driver will not be
-                       considered complete without a corresponding _DSD
-                       description.  Once approved by kernel maintainers,
-                       the UUID or device properties must then be registered
-                       with the UEFI Forum; this may cause some iteration as
-                       more than one OS will be registering entries.
-
-_DSM   9.1.1           Do not use this method.  It is not standardized, the
-                       return values are not well documented, and it is
-                       currently a frequent source of error.
-
-\_GL   5.7.1           This object is not to be used in hardware reduced
-                       mode, and therefore should not be used on arm64.
-
-_GLK   6.5.7           This object requires a global lock be defined; there
-                       is no global lock on arm64 since it runs in hardware
-                       reduced mode.  Hence, do not use this object on arm64.
-
-\_GPE  5.3.1           This namespace is for x86 use only.  Do not use it
-                       on arm64.
-
-_HID   6.1.5           This is the primary object to use in device probing,
-		       though _CID and _CLS may also be used.
-
-_INI   6.5.1           Not required, but can be useful in setting up devices
-                       when UEFI leaves them in a state that may not be what
-                       the driver expects before it starts probing.
-
-_LPI   8.4.4.3         Recommended for use with processor definitions (_HID
-		       ACPI0010) on arm64.  See also _RDI.
-
-_MLS   6.1.7           Highly recommended for use in internationalization.
-
-_OFF   7.2.2           It is recommended to define this method for any device
-                       that can be turned on or off.
-
-_ON    7.2.3           It is recommended to define this method for any device
-                       that can be turned on or off.
-
-\_OS   5.7.3           This method will return "Linux" by default (this is
-                       the value of the macro ACPI_OS_NAME on Linux).  The
-                       command line parameter acpi_os=<string> can be used
-                       to set it to some other value.
-
-_OSC   6.2.11          This method can be a global method in ACPI (i.e.,
-                       \_SB._OSC), or it may be associated with a specific
-                       device (e.g., \_SB.DEV0._OSC), or both.  When used
-                       as a global method, only capabilities published in
-                       the ACPI specification are allowed.  When used as
-                       a device-specific method, the process described for
-                       using _DSD MUST be used to create an _OSC definition;
-                       out-of-process use of _OSC is not allowed.  That is,
-                       submit the device-specific _OSC usage description as
-                       part of the kernel driver submission, get it approved
-                       by the kernel community, then register it with the
-                       UEFI Forum.
-
-\_OSI  5.7.2           Deprecated on ARM64.  As far as ACPI firmware is 
-		       concerned, _OSI is not to be used to determine what 
-		       sort of system is being used or what functionality
-		       is provided.  The _OSC method is to be used instead.
-
-_PDC   8.4.1           Deprecated, do not use on arm64.
-
-\_PIC  5.8.1           The method should not be used.  On arm64, the only
-                       interrupt model available is GIC.
-
-\_PR   5.3.1           This namespace is for x86 use only on legacy systems.
-                       Do not use it on arm64.
-
-_PRT   6.2.13          Required as part of the definition of all PCI root
-                       devices.
-
-_PRx   7.3.8-11        Use as needed; power management specific.  If _PR0 is
-                       defined, _PR3 must also be defined.
-
-_PSx   7.3.2-5         Use as needed; power management specific.  If _PS0 is
-                       defined, _PS3 must also be defined.  If clocks or
-                       regulators need adjusting to be consistent with power
-                       usage, change them in these methods.
-
-_RDI   8.4.4.4         Recommended for use with processor definitions (_HID
-		       ACPI0010) on arm64.  This should only be used in 
-		       conjunction with _LPI.
-
-\_REV  5.7.4           Always returns the latest version of ACPI supported.
-
-\_SB   5.3.1           Required on arm64; all devices must be defined in this
-                       namespace.
-
-_SLI   6.2.15          Use is recommended when SLIT table is in use.
-
-_STA   6.3.7,          It is recommended to define this method for any device
-       7.2.4           that can be turned on or off.  See also the STAO table
-                       that provides overrides to hide devices in virtualized
-                       environments.
-
-_SRS   6.2.16          Use as needed; see also _PRS.
-
-_STR   6.1.10          Recommended for conveying device names to end users;
-                       this is preferred over using _DDN.
-
-_SUB   6.1.9           Use as needed; _HID or _CID are preferred.
-
-_SUN   6.1.11          Use as needed, but recommended.
-
-_SWS   7.4.3           Use as needed; power management specific; this may
-                       require specification changes for use on arm64.
-
-_UID   6.1.12          Recommended for distinguishing devices of the same
-                       class; define it if at all possible.
-
-
-
-
-ACPI Event Model
-----------------
-Do not use GPE block devices; these are not supported in the hardware reduced
-profile used by arm64.  Since there are no GPE blocks defined for use on ARM
-platforms, ACPI events must be signaled differently.
-
-There are two options: GPIO-signaled interrupts (Section 5.6.5), and
-interrupt-signaled events (Section 5.6.9).  Interrupt-signaled events are a
-new feature in the ACPI 6.1 specification.  Either -- or both -- can be used
-on a given platform, and which to use may be dependent of limitations in any
-given SoC.  If possible, interrupt-signaled events are recommended.
-
-
-ACPI Processor Control
-----------------------
-Section 8 of the ACPI specification changed significantly in version 6.0.
-Processors should now be defined as Device objects with _HID ACPI0007; do
-not use the deprecated Processor statement in ASL.  All multiprocessor systems
-should also define a hierarchy of processors, done with Processor Container
-Devices (see Section 8.4.3.1, _HID ACPI0010); do not use processor aggregator
-devices (Section 8.5) to describe processor topology.  Section 8.4 of the
-specification describes the semantics of these object definitions and how
-they interrelate.
-
-Most importantly, the processor hierarchy defined also defines the low power
-idle states that are available to the platform, along with the rules for
-determining which processors can be turned on or off and the circumstances
-that control that.  Without this information, the processors will run in
-whatever power state they were left in by UEFI.
-
-Note too, that the processor Device objects defined and the entries in the
-MADT for GICs are expected to be in synchronization.  The _UID of the Device
-object must correspond to processor IDs used in the MADT.
-
-It is recommended that CPPC (8.4.5) be used as the primary model for processor
-performance control on arm64.  C-states and P-states may become available at
-some point in the future, but most current design work appears to favor CPPC.
-
-Further, it is essential that the ARMv8 SoC provide a fully functional
-implementation of PSCI; this will be the only mechanism supported by ACPI
-to control CPU power state.  Booting of secondary CPUs using the ACPI
-parking protocol is possible, but discouraged, since only PSCI is supported
-for ARM servers.
-
-
-ACPI System Address Map Interfaces
-----------------------------------
-In Section 15 of the ACPI specification, several methods are mentioned as
-possible mechanisms for conveying memory resource information to the kernel.
-For arm64, we will only support UEFI for booting with ACPI, hence the UEFI
-GetMemoryMap() boot service is the only mechanism that will be used.
-
-
-ACPI Platform Error Interfaces (APEI)
--------------------------------------
-The APEI tables supported are described above.
-
-APEI requires the equivalent of an SCI and an NMI on ARMv8.  The SCI is used
-to notify the OSPM of errors that have occurred but can be corrected and the
-system can continue correct operation, even if possibly degraded.  The NMI is
-used to indicate fatal errors that cannot be corrected, and require immediate
-attention.
-
-Since there is no direct equivalent of the x86 SCI or NMI, arm64 handles
-these slightly differently.  The SCI is handled as a high priority interrupt;
-given that these are corrected (or correctable) errors being reported, this
-is sufficient.  The NMI is emulated as the highest priority interrupt
-possible.  This implies some caution must be used since there could be
-interrupts at higher privilege levels or even interrupts at the same priority
-as the emulated NMI.  In Linux, this should not be the case but one should
-be aware it could happen.
-
-
-ACPI Objects Not Supported on ARM64
------------------------------------
-While this may change in the future, there are several classes of objects
-that can be defined, but are not currently of general interest to ARM servers.
-Some of these objects have x86 equivalents, and may actually make sense in ARM
-servers.  However, there is either no hardware available at present, or there
-may not even be a non-ARM implementation yet.  Hence, they are not currently
-supported.
-
-The following classes of objects are not supported:
-
-       -- Section 9.2: ambient light sensor devices
-
-       -- Section 9.3: battery devices
-
-       -- Section 9.4: lids (e.g., laptop lids)
-
-       -- Section 9.8.2: IDE controllers
-
-       -- Section 9.9: floppy controllers
-
-       -- Section 9.10: GPE block devices
-
-       -- Section 9.15: PC/AT RTC/CMOS devices
-
-       -- Section 9.16: user presence detection devices
-
-       -- Section 9.17: I/O APIC devices; all GICs must be enumerable via MADT
-
-       -- Section 9.18: time and alarm devices (see 9.15)
-
-       -- Section 10: power source and power meter devices
-
-       -- Section 11: thermal management
-
-       -- Section 12: embedded controllers interface
-
-       -- Section 13: SMBus interfaces
-
-
-This also means that there is no support for the following objects:
-
-Name   Section                     Name   Section
-----   ------------                ----   ------------
-_ALC   9.3.4                       _FDM   9.10.3
-_ALI   9.3.2                       _FIX   6.2.7
-_ALP   9.3.6                       _GAI   10.4.5
-_ALR   9.3.5                       _GHL   10.4.7
-_ALT   9.3.3                       _GTM   9.9.2.1.1
-_BCT   10.2.2.10                   _LID   9.5.1
-_BDN   6.5.3                       _PAI   10.4.4
-_BIF   10.2.2.1                    _PCL   10.3.2
-_BIX   10.2.2.1                    _PIF   10.3.3
-_BLT   9.2.3                       _PMC   10.4.1
-_BMA   10.2.2.4                    _PMD   10.4.8
-_BMC   10.2.2.12                   _PMM   10.4.3
-_BMD   10.2.2.11                   _PRL   10.3.4
-_BMS   10.2.2.5                    _PSR   10.3.1
-_BST   10.2.2.6                    _PTP   10.4.2
-_BTH   10.2.2.7                    _SBS   10.1.3
-_BTM   10.2.2.9                    _SHL   10.4.6
-_BTP   10.2.2.8                    _STM   9.9.2.1.1
-_DCK   6.5.2                       _UPD   9.16.1
-_EC    12.12                       _UPP   9.16.2
-_FDE   9.10.1                      _WPC   10.5.2
-_FDI   9.10.2                      _WPP   10.5.3
-
diff --git a/Documentation/arm64/arm-acpi.rst b/Documentation/arm64/arm-acpi.rst
new file mode 100644
index 000000000000..872dbbc73d4a
--- /dev/null
+++ b/Documentation/arm64/arm-acpi.rst
@@ -0,0 +1,528 @@
+=====================
+ACPI on ARMv8 Servers
+=====================
+
+ACPI can be used for ARMv8 general purpose servers designed to follow
+the ARM SBSA (Server Base System Architecture) [0] and SBBR (Server
+Base Boot Requirements) [1] specifications.  Please note that the SBBR
+can be retrieved simply by visiting [1], but the SBSA is currently only
+available to those with an ARM login due to ARM IP licensing concerns.
+
+The ARMv8 kernel implements the reduced hardware model of ACPI version
+5.1 or later.  Links to the specification and all external documents
+it refers to are managed by the UEFI Forum.  The specification is
+available at http://www.uefi.org/specifications and documents referenced
+by the specification can be found via http://www.uefi.org/acpi.
+
+If an ARMv8 system does not meet the requirements of the SBSA and SBBR,
+or cannot be described using the mechanisms defined in the required ACPI
+specifications, then ACPI may not be a good fit for the hardware.
+
+While the documents mentioned above set out the requirements for building
+industry-standard ARMv8 servers, they also apply to more than one operating
+system.  The purpose of this document is to describe the interaction between
+ACPI and Linux only, on an ARMv8 system -- that is, what Linux expects of
+ACPI and what ACPI can expect of Linux.
+
+
+Why ACPI on ARM?
+----------------
+Before examining the details of the interface between ACPI and Linux, it is
+useful to understand why ACPI is being used.  Several technologies already
+exist in Linux for describing non-enumerable hardware, after all.  In this
+section we summarize a blog post [2] from Grant Likely that outlines the
+reasoning behind ACPI on ARMv8 servers.  Actually, we snitch a good portion
+of the summary text almost directly, to be honest.
+
+The short form of the rationale for ACPI on ARM is:
+
+-  ACPI’s byte code (AML) allows the platform to encode hardware behavior,
+   while DT explicitly does not support this.  For hardware vendors, being
+   able to encode behavior is a key tool used in supporting operating
+   system releases on new hardware.
+
+-  ACPI’s OSPM defines a power management model that constrains what the
+   platform is allowed to do into a specific model, while still providing
+   flexibility in hardware design.
+
+-  In the enterprise server environment, ACPI has established bindings (such
+   as for RAS) which are currently used in production systems.  DT does not.
+   Such bindings could be defined in DT at some point, but doing so means ARM
+   and x86 would end up using completely different code paths in both firmware
+   and the kernel.
+
+-  Choosing a single interface to describe the abstraction between a platform
+   and an OS is important.  Hardware vendors would not be required to implement
+   both DT and ACPI if they want to support multiple operating systems.  And,
+   agreeing on a single interface instead of being fragmented into per OS
+   interfaces makes for better interoperability overall.
+
+-  The new ACPI governance process works well and Linux is now at the same
+   table as hardware vendors and other OS vendors.  In fact, there is no
+   longer any reason to feel that ACPI only belongs to Windows or that
+   Linux is in any way secondary to Microsoft in this arena.  The move of
+   ACPI governance into the UEFI forum has significantly opened up the
+   specification development process, and currently, a large portion of the
+   changes being made to ACPI are being driven by Linux.
+
+Key to the use of ACPI is the support model.  For servers in general, the
+responsibility for hardware behaviour cannot solely be the domain of the
+kernel, but rather must be split between the platform and the kernel, in
+order to allow for orderly change over time.  ACPI frees the OS from needing
+to understand all the minute details of the hardware so that the OS doesn’t
+need to be ported to each and every device individually.  It allows the
+hardware vendors to take responsibility for power management behaviour without
+depending on an OS release cycle which is not under their control.
+
+ACPI is also important because hardware and OS vendors have already worked
+out the mechanisms for supporting a general purpose computing ecosystem.  The
+infrastructure is in place, the bindings are in place, and the processes are
+in place.  DT does exactly what Linux needs it to when working with vertically
+integrated devices, but there are no good processes for supporting what the
+server vendors need.  Linux could potentially get there with DT, but doing so
+really just duplicates something that already works.  ACPI already does what
+the hardware vendors need, Microsoft won’t collaborate on DT, and hardware
+vendors would still end up providing two completely separate firmware
+interfaces -- one for Linux and one for Windows.
+
+
+Kernel Compatibility
+--------------------
+One of the primary motivations for ACPI is standardization, and using that
+to provide backward compatibility for Linux kernels.  In the server market,
+software and hardware are often used for long periods.  ACPI allows the
+kernel and firmware to agree on a consistent abstraction that can be
+maintained over time, even as hardware or software change.  As long as the
+abstraction is supported, systems can be updated without necessarily having
+to replace the kernel.
+
+When a Linux driver or subsystem is first implemented using ACPI, it by
+definition ends up requiring a specific version of the ACPI specification
+-- it's baseline.  ACPI firmware must continue to work, even though it may
+not be optimal, with the earliest kernel version that first provides support
+for that baseline version of ACPI.  There may be a need for additional drivers,
+but adding new functionality (e.g., CPU power management) should not break
+older kernel versions.  Further, ACPI firmware must also work with the most
+recent version of the kernel.
+
+
+Relationship with Device Tree
+-----------------------------
+ACPI support in drivers and subsystems for ARMv8 should never be mutually
+exclusive with DT support at compile time.
+
+At boot time the kernel will only use one description method depending on
+parameters passed from the boot loader (including kernel bootargs).
+
+Regardless of whether DT or ACPI is used, the kernel must always be capable
+of booting with either scheme (in kernels with both schemes enabled at compile
+time).
+
+
+Booting using ACPI tables
+-------------------------
+The only defined method for passing ACPI tables to the kernel on ARMv8
+is via the UEFI system configuration table.  Just so it is explicit, this
+means that ACPI is only supported on platforms that boot via UEFI.
+
+When an ARMv8 system boots, it can either have DT information, ACPI tables,
+or in some very unusual cases, both.  If no command line parameters are used,
+the kernel will try to use DT for device enumeration; if there is no DT
+present, the kernel will try to use ACPI tables, but only if they are present.
+In neither is available, the kernel will not boot.  If acpi=force is used
+on the command line, the kernel will attempt to use ACPI tables first, but
+fall back to DT if there are no ACPI tables present.  The basic idea is that
+the kernel will not fail to boot unless it absolutely has no other choice.
+
+Processing of ACPI tables may be disabled by passing acpi=off on the kernel
+command line; this is the default behavior.
+
+In order for the kernel to load and use ACPI tables, the UEFI implementation
+MUST set the ACPI_20_TABLE_GUID to point to the RSDP table (the table with
+the ACPI signature "RSD PTR ").  If this pointer is incorrect and acpi=force
+is used, the kernel will disable ACPI and try to use DT to boot instead; the
+kernel has, in effect, determined that ACPI tables are not present at that
+point.
+
+If the pointer to the RSDP table is correct, the table will be mapped into
+the kernel by the ACPI core, using the address provided by UEFI.
+
+The ACPI core will then locate and map in all other ACPI tables provided by
+using the addresses in the RSDP table to find the XSDT (eXtended System
+Description Table).  The XSDT in turn provides the addresses to all other
+ACPI tables provided by the system firmware; the ACPI core will then traverse
+this table and map in the tables listed.
+
+The ACPI core will ignore any provided RSDT (Root System Description Table).
+RSDTs have been deprecated and are ignored on arm64 since they only allow
+for 32-bit addresses.
+
+Further, the ACPI core will only use the 64-bit address fields in the FADT
+(Fixed ACPI Description Table).  Any 32-bit address fields in the FADT will
+be ignored on arm64.
+
+Hardware reduced mode (see Section 4.1 of the ACPI 6.1 specification) will
+be enforced by the ACPI core on arm64.  Doing so allows the ACPI core to
+run less complex code since it no longer has to provide support for legacy
+hardware from other architectures.  Any fields that are not to be used for
+hardware reduced mode must be set to zero.
+
+For the ACPI core to operate properly, and in turn provide the information
+the kernel needs to configure devices, it expects to find the following
+tables (all section numbers refer to the ACPI 6.1 specification):
+
+    -  RSDP (Root System Description Pointer), section 5.2.5
+
+    -  XSDT (eXtended System Description Table), section 5.2.8
+
+    -  FADT (Fixed ACPI Description Table), section 5.2.9
+
+    -  DSDT (Differentiated System Description Table), section
+       5.2.11.1
+
+    -  MADT (Multiple APIC Description Table), section 5.2.12
+
+    -  GTDT (Generic Timer Description Table), section 5.2.24
+
+    -  If PCI is supported, the MCFG (Memory mapped ConFiGuration
+       Table), section 5.2.6, specifically Table 5-31.
+
+    -  If booting without a console=<device> kernel parameter is
+       supported, the SPCR (Serial Port Console Redirection table),
+       section 5.2.6, specifically Table 5-31.
+
+    -  If necessary to describe the I/O topology, SMMUs and GIC ITSs,
+       the IORT (Input Output Remapping Table, section 5.2.6, specifically
+       Table 5-31).
+
+    -  If NUMA is supported, the SRAT (System Resource Affinity Table)
+       and SLIT (System Locality distance Information Table), sections
+       5.2.16 and 5.2.17, respectively.
+
+If the above tables are not all present, the kernel may or may not be
+able to boot properly since it may not be able to configure all of the
+devices available.  This list of tables is not meant to be all inclusive;
+in some environments other tables may be needed (e.g., any of the APEI
+tables from section 18) to support specific functionality.
+
+
+ACPI Detection
+--------------
+Drivers should determine their probe() type by checking for a null
+value for ACPI_HANDLE, or checking .of_node, or other information in
+the device structure.  This is detailed further in the "Driver
+Recommendations" section.
+
+In non-driver code, if the presence of ACPI needs to be detected at
+run time, then check the value of acpi_disabled. If CONFIG_ACPI is not
+set, acpi_disabled will always be 1.
+
+
+Device Enumeration
+------------------
+Device descriptions in ACPI should use standard recognized ACPI interfaces.
+These may contain less information than is typically provided via a Device
+Tree description for the same device.  This is also one of the reasons that
+ACPI can be useful -- the driver takes into account that it may have less
+detailed information about the device and uses sensible defaults instead.
+If done properly in the driver, the hardware can change and improve over
+time without the driver having to change at all.
+
+Clocks provide an excellent example.  In DT, clocks need to be specified
+and the drivers need to take them into account.  In ACPI, the assumption
+is that UEFI will leave the device in a reasonable default state, including
+any clock settings.  If for some reason the driver needs to change a clock
+value, this can be done in an ACPI method; all the driver needs to do is
+invoke the method and not concern itself with what the method needs to do
+to change the clock.  Changing the hardware can then take place over time
+by changing what the ACPI method does, and not the driver.
+
+In DT, the parameters needed by the driver to set up clocks as in the example
+above are known as "bindings"; in ACPI, these are known as "Device Properties"
+and provided to a driver via the _DSD object.
+
+ACPI tables are described with a formal language called ASL, the ACPI
+Source Language (section 19 of the specification).  This means that there
+are always multiple ways to describe the same thing -- including device
+properties.  For example, device properties could use an ASL construct
+that looks like this: Name(KEY0, "value0").  An ACPI device driver would
+then retrieve the value of the property by evaluating the KEY0 object.
+However, using Name() this way has multiple problems: (1) ACPI limits
+names ("KEY0") to four characters unlike DT; (2) there is no industry
+wide registry that maintains a list of names, minimizing re-use; (3)
+there is also no registry for the definition of property values ("value0"),
+again making re-use difficult; and (4) how does one maintain backward
+compatibility as new hardware comes out?  The _DSD method was created
+to solve precisely these sorts of problems; Linux drivers should ALWAYS
+use the _DSD method for device properties and nothing else.
+
+The _DSM object (ACPI Section 9.14.1) could also be used for conveying
+device properties to a driver.  Linux drivers should only expect it to
+be used if _DSD cannot represent the data required, and there is no way
+to create a new UUID for the _DSD object.  Note that there is even less
+regulation of the use of _DSM than there is of _DSD.  Drivers that depend
+on the contents of _DSM objects will be more difficult to maintain over
+time because of this; as of this writing, the use of _DSM is the cause
+of quite a few firmware problems and is not recommended.
+
+Drivers should look for device properties in the _DSD object ONLY; the _DSD
+object is described in the ACPI specification section 6.2.5, but this only
+describes how to define the structure of an object returned via _DSD, and
+how specific data structures are defined by specific UUIDs.  Linux should
+only use the _DSD Device Properties UUID [5]:
+
+   - UUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301
+
+   - http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
+
+The UEFI Forum provides a mechanism for registering device properties [4]
+so that they may be used across all operating systems supporting ACPI.
+Device properties that have not been registered with the UEFI Forum should
+not be used.
+
+Before creating new device properties, check to be sure that they have not
+been defined before and either registered in the Linux kernel documentation
+as DT bindings, or the UEFI Forum as device properties.  While we do not want
+to simply move all DT bindings into ACPI device properties, we can learn from
+what has been previously defined.
+
+If it is necessary to define a new device property, or if it makes sense to
+synthesize the definition of a binding so it can be used in any firmware,
+both DT bindings and ACPI device properties for device drivers have review
+processes.  Use them both.  When the driver itself is submitted for review
+to the Linux mailing lists, the device property definitions needed must be
+submitted at the same time.  A driver that supports ACPI and uses device
+properties will not be considered complete without their definitions.  Once
+the device property has been accepted by the Linux community, it must be
+registered with the UEFI Forum [4], which will review it again for consistency
+within the registry.  This may require iteration.  The UEFI Forum, though,
+will always be the canonical site for device property definitions.
+
+It may make sense to provide notice to the UEFI Forum that there is the
+intent to register a previously unused device property name as a means of
+reserving the name for later use.  Other operating system vendors will
+also be submitting registration requests and this may help smooth the
+process.
+
+Once registration and review have been completed, the kernel provides an
+interface for looking up device properties in a manner independent of
+whether DT or ACPI is being used.  This API should be used [6]; it can
+eliminate some duplication of code paths in driver probing functions and
+discourage divergence between DT bindings and ACPI device properties.
+
+
+Programmable Power Control Resources
+------------------------------------
+Programmable power control resources include such resources as voltage/current
+providers (regulators) and clock sources.
+
+With ACPI, the kernel clock and regulator framework is not expected to be used
+at all.
+
+The kernel assumes that power control of these resources is represented with
+Power Resource Objects (ACPI section 7.1).  The ACPI core will then handle
+correctly enabling and disabling resources as they are needed.  In order to
+get that to work, ACPI assumes each device has defined D-states and that these
+can be controlled through the optional ACPI methods _PS0, _PS1, _PS2, and _PS3;
+in ACPI, _PS0 is the method to invoke to turn a device full on, and _PS3 is for
+turning a device full off.
+
+There are two options for using those Power Resources.  They can:
+
+   -  be managed in a _PSx method which gets called on entry to power
+      state Dx.
+
+   -  be declared separately as power resources with their own _ON and _OFF
+      methods.  They are then tied back to D-states for a particular device
+      via _PRx which specifies which power resources a device needs to be on
+      while in Dx.  Kernel then tracks number of devices using a power resource
+      and calls _ON/_OFF as needed.
+
+The kernel ACPI code will also assume that the _PSx methods follow the normal
+ACPI rules for such methods:
+
+   -  If either _PS0 or _PS3 is implemented, then the other method must also
+      be implemented.
+
+   -  If a device requires usage or setup of a power resource when on, the ASL
+      should organize that it is allocated/enabled using the _PS0 method.
+
+   -  Resources allocated or enabled in the _PS0 method should be disabled
+      or de-allocated in the _PS3 method.
+
+   -  Firmware will leave the resources in a reasonable state before handing
+      over control to the kernel.
+
+Such code in _PSx methods will of course be very platform specific.  But,
+this allows the driver to abstract out the interface for operating the device
+and avoid having to read special non-standard values from ACPI tables. Further,
+abstracting the use of these resources allows the hardware to change over time
+without requiring updates to the driver.
+
+
+Clocks
+------
+ACPI makes the assumption that clocks are initialized by the firmware --
+UEFI, in this case -- to some working value before control is handed over
+to the kernel.  This has implications for devices such as UARTs, or SoC-driven
+LCD displays, for example.
+
+When the kernel boots, the clocks are assumed to be set to reasonable
+working values.  If for some reason the frequency needs to change -- e.g.,
+throttling for power management -- the device driver should expect that
+process to be abstracted out into some ACPI method that can be invoked
+(please see the ACPI specification for further recommendations on standard
+methods to be expected).  The only exceptions to this are CPU clocks where
+CPPC provides a much richer interface than ACPI methods.  If the clocks
+are not set, there is no direct way for Linux to control them.
+
+If an SoC vendor wants to provide fine-grained control of the system clocks,
+they could do so by providing ACPI methods that could be invoked by Linux
+drivers.  However, this is NOT recommended and Linux drivers should NOT use
+such methods, even if they are provided.  Such methods are not currently
+standardized in the ACPI specification, and using them could tie a kernel
+to a very specific SoC, or tie an SoC to a very specific version of the
+kernel, both of which we are trying to avoid.
+
+
+Driver Recommendations
+----------------------
+DO NOT remove any DT handling when adding ACPI support for a driver.  The
+same device may be used on many different systems.
+
+DO try to structure the driver so that it is data-driven.  That is, set up
+a struct containing internal per-device state based on defaults and whatever
+else must be discovered by the driver probe function.  Then, have the rest
+of the driver operate off of the contents of that struct.  Doing so should
+allow most divergence between ACPI and DT functionality to be kept local to
+the probe function instead of being scattered throughout the driver.  For
+example::
+
+  static int device_probe_dt(struct platform_device *pdev)
+  {
+         /* DT specific functionality */
+         ...
+  }
+
+  static int device_probe_acpi(struct platform_device *pdev)
+  {
+         /* ACPI specific functionality */
+         ...
+  }
+
+  static int device_probe(struct platform_device *pdev)
+  {
+         ...
+         struct device_node node = pdev->dev.of_node;
+         ...
+
+         if (node)
+                 ret = device_probe_dt(pdev);
+         else if (ACPI_HANDLE(&pdev->dev))
+                 ret = device_probe_acpi(pdev);
+         else
+                 /* other initialization */
+                 ...
+         /* Continue with any generic probe operations */
+         ...
+  }
+
+DO keep the MODULE_DEVICE_TABLE entries together in the driver to make it
+clear the different names the driver is probed for, both from DT and from
+ACPI::
+
+  static struct of_device_id virtio_mmio_match[] = {
+          { .compatible = "virtio,mmio", },
+          { }
+  };
+  MODULE_DEVICE_TABLE(of, virtio_mmio_match);
+
+  static const struct acpi_device_id virtio_mmio_acpi_match[] = {
+          { "LNRO0005", },
+          { }
+  };
+  MODULE_DEVICE_TABLE(acpi, virtio_mmio_acpi_match);
+
+
+ASWG
+----
+The ACPI specification changes regularly.  During the year 2014, for instance,
+version 5.1 was released and version 6.0 substantially completed, with most of
+the changes being driven by ARM-specific requirements.  Proposed changes are
+presented and discussed in the ASWG (ACPI Specification Working Group) which
+is a part of the UEFI Forum.  The current version of the ACPI specification
+is 6.1 release in January 2016.
+
+Participation in this group is open to all UEFI members.  Please see
+http://www.uefi.org/workinggroup for details on group membership.
+
+It is the intent of the ARMv8 ACPI kernel code to follow the ACPI specification
+as closely as possible, and to only implement functionality that complies with
+the released standards from UEFI ASWG.  As a practical matter, there will be
+vendors that provide bad ACPI tables or violate the standards in some way.
+If this is because of errors, quirks and fix-ups may be necessary, but will
+be avoided if possible.  If there are features missing from ACPI that preclude
+it from being used on a platform, ECRs (Engineering Change Requests) should be
+submitted to ASWG and go through the normal approval process; for those that
+are not UEFI members, many other members of the Linux community are and would
+likely be willing to assist in submitting ECRs.
+
+
+Linux Code
+----------
+Individual items specific to Linux on ARM, contained in the the Linux
+source code, are in the list that follows:
+
+ACPI_OS_NAME
+                       This macro defines the string to be returned when
+                       an ACPI method invokes the _OS method.  On ARM64
+                       systems, this macro will be "Linux" by default.
+                       The command line parameter acpi_os=<string>
+                       can be used to set it to some other value.  The
+                       default value for other architectures is "Microsoft
+                       Windows NT", for example.
+
+ACPI Objects
+------------
+Detailed expectations for ACPI tables and object are listed in the file
+Documentation/arm64/acpi_object_usage.rst.
+
+
+References
+----------
+[0] http://silver.arm.com
+    document ARM-DEN-0029, or newer:
+    "Server Base System Architecture", version 2.3, dated 27 Mar 2014
+
+[1] http://infocenter.arm.com/help/topic/com.arm.doc.den0044a/Server_Base_Boot_Requirements.pdf
+    Document ARM-DEN-0044A, or newer: "Server Base Boot Requirements, System
+    Software on ARM Platforms", dated 16 Aug 2014
+
+[2] http://www.secretlab.ca/archives/151,
+    10 Jan 2015, Copyright (c) 2015,
+    Linaro Ltd., written by Grant Likely.
+
+[3] AMD ACPI for Seattle platform documentation
+    http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Seattle_ACPI_Guide.pdf
+
+
+[4] http://www.uefi.org/acpi
+    please see the link for the "ACPI _DSD Device
+    Property Registry Instructions"
+
+[5] http://www.uefi.org/acpi
+    please see the link for the "_DSD (Device
+    Specific Data) Implementation Guide"
+
+[6] Kernel code for the unified device
+    property interface can be found in
+    include/linux/property.h and drivers/base/property.c.
+
+
+Authors
+-------
+- Al Stone <al.stone@linaro.org>
+- Graeme Gregory <graeme.gregory@linaro.org>
+- Hanjun Guo <hanjun.guo@linaro.org>
+
+- Grant Likely <grant.likely@linaro.org>, for the "Why ACPI on ARM?" section
diff --git a/Documentation/arm64/arm-acpi.txt b/Documentation/arm64/arm-acpi.txt
deleted file mode 100644
index 1a74a041a443..000000000000
--- a/Documentation/arm64/arm-acpi.txt
+++ /dev/null
@@ -1,519 +0,0 @@
-ACPI on ARMv8 Servers
----------------------
-ACPI can be used for ARMv8 general purpose servers designed to follow
-the ARM SBSA (Server Base System Architecture) [0] and SBBR (Server
-Base Boot Requirements) [1] specifications.  Please note that the SBBR
-can be retrieved simply by visiting [1], but the SBSA is currently only
-available to those with an ARM login due to ARM IP licensing concerns.
-
-The ARMv8 kernel implements the reduced hardware model of ACPI version
-5.1 or later.  Links to the specification and all external documents
-it refers to are managed by the UEFI Forum.  The specification is
-available at http://www.uefi.org/specifications and documents referenced
-by the specification can be found via http://www.uefi.org/acpi.
-
-If an ARMv8 system does not meet the requirements of the SBSA and SBBR,
-or cannot be described using the mechanisms defined in the required ACPI
-specifications, then ACPI may not be a good fit for the hardware.
-
-While the documents mentioned above set out the requirements for building
-industry-standard ARMv8 servers, they also apply to more than one operating
-system.  The purpose of this document is to describe the interaction between
-ACPI and Linux only, on an ARMv8 system -- that is, what Linux expects of
-ACPI and what ACPI can expect of Linux.
-
-
-Why ACPI on ARM?
-----------------
-Before examining the details of the interface between ACPI and Linux, it is
-useful to understand why ACPI is being used.  Several technologies already
-exist in Linux for describing non-enumerable hardware, after all.  In this
-section we summarize a blog post [2] from Grant Likely that outlines the
-reasoning behind ACPI on ARMv8 servers.  Actually, we snitch a good portion
-of the summary text almost directly, to be honest.
-
-The short form of the rationale for ACPI on ARM is:
-
--- ACPI’s byte code (AML) allows the platform to encode hardware behavior,
-   while DT explicitly does not support this.  For hardware vendors, being
-   able to encode behavior is a key tool used in supporting operating
-   system releases on new hardware.
-
--- ACPI’s OSPM defines a power management model that constrains what the
-   platform is allowed to do into a specific model, while still providing
-   flexibility in hardware design.
-
--- In the enterprise server environment, ACPI has established bindings (such
-   as for RAS) which are currently used in production systems.  DT does not.
-   Such bindings could be defined in DT at some point, but doing so means ARM
-   and x86 would end up using completely different code paths in both firmware
-   and the kernel.
-
--- Choosing a single interface to describe the abstraction between a platform
-   and an OS is important.  Hardware vendors would not be required to implement
-   both DT and ACPI if they want to support multiple operating systems.  And,
-   agreeing on a single interface instead of being fragmented into per OS
-   interfaces makes for better interoperability overall.
-
--- The new ACPI governance process works well and Linux is now at the same
-   table as hardware vendors and other OS vendors.  In fact, there is no
-   longer any reason to feel that ACPI only belongs to Windows or that
-   Linux is in any way secondary to Microsoft in this arena.  The move of
-   ACPI governance into the UEFI forum has significantly opened up the
-   specification development process, and currently, a large portion of the
-   changes being made to ACPI are being driven by Linux.
-
-Key to the use of ACPI is the support model.  For servers in general, the
-responsibility for hardware behaviour cannot solely be the domain of the
-kernel, but rather must be split between the platform and the kernel, in
-order to allow for orderly change over time.  ACPI frees the OS from needing
-to understand all the minute details of the hardware so that the OS doesn’t
-need to be ported to each and every device individually.  It allows the
-hardware vendors to take responsibility for power management behaviour without
-depending on an OS release cycle which is not under their control.
-
-ACPI is also important because hardware and OS vendors have already worked
-out the mechanisms for supporting a general purpose computing ecosystem.  The
-infrastructure is in place, the bindings are in place, and the processes are
-in place.  DT does exactly what Linux needs it to when working with vertically
-integrated devices, but there are no good processes for supporting what the
-server vendors need.  Linux could potentially get there with DT, but doing so
-really just duplicates something that already works.  ACPI already does what
-the hardware vendors need, Microsoft won’t collaborate on DT, and hardware
-vendors would still end up providing two completely separate firmware
-interfaces -- one for Linux and one for Windows.
-
-
-Kernel Compatibility
---------------------
-One of the primary motivations for ACPI is standardization, and using that
-to provide backward compatibility for Linux kernels.  In the server market,
-software and hardware are often used for long periods.  ACPI allows the
-kernel and firmware to agree on a consistent abstraction that can be
-maintained over time, even as hardware or software change.  As long as the
-abstraction is supported, systems can be updated without necessarily having
-to replace the kernel.
-
-When a Linux driver or subsystem is first implemented using ACPI, it by
-definition ends up requiring a specific version of the ACPI specification
--- it's baseline.  ACPI firmware must continue to work, even though it may
-not be optimal, with the earliest kernel version that first provides support
-for that baseline version of ACPI.  There may be a need for additional drivers,
-but adding new functionality (e.g., CPU power management) should not break
-older kernel versions.  Further, ACPI firmware must also work with the most
-recent version of the kernel.
-
-
-Relationship with Device Tree
------------------------------
-ACPI support in drivers and subsystems for ARMv8 should never be mutually
-exclusive with DT support at compile time.
-
-At boot time the kernel will only use one description method depending on
-parameters passed from the boot loader (including kernel bootargs).
-
-Regardless of whether DT or ACPI is used, the kernel must always be capable
-of booting with either scheme (in kernels with both schemes enabled at compile
-time).
-
-
-Booting using ACPI tables
--------------------------
-The only defined method for passing ACPI tables to the kernel on ARMv8
-is via the UEFI system configuration table.  Just so it is explicit, this
-means that ACPI is only supported on platforms that boot via UEFI.
-
-When an ARMv8 system boots, it can either have DT information, ACPI tables,
-or in some very unusual cases, both.  If no command line parameters are used,
-the kernel will try to use DT for device enumeration; if there is no DT
-present, the kernel will try to use ACPI tables, but only if they are present.
-In neither is available, the kernel will not boot.  If acpi=force is used
-on the command line, the kernel will attempt to use ACPI tables first, but
-fall back to DT if there are no ACPI tables present.  The basic idea is that
-the kernel will not fail to boot unless it absolutely has no other choice.
-
-Processing of ACPI tables may be disabled by passing acpi=off on the kernel
-command line; this is the default behavior.
-
-In order for the kernel to load and use ACPI tables, the UEFI implementation
-MUST set the ACPI_20_TABLE_GUID to point to the RSDP table (the table with
-the ACPI signature "RSD PTR ").  If this pointer is incorrect and acpi=force
-is used, the kernel will disable ACPI and try to use DT to boot instead; the
-kernel has, in effect, determined that ACPI tables are not present at that
-point.
-
-If the pointer to the RSDP table is correct, the table will be mapped into
-the kernel by the ACPI core, using the address provided by UEFI.
-
-The ACPI core will then locate and map in all other ACPI tables provided by
-using the addresses in the RSDP table to find the XSDT (eXtended System
-Description Table).  The XSDT in turn provides the addresses to all other
-ACPI tables provided by the system firmware; the ACPI core will then traverse
-this table and map in the tables listed.
-
-The ACPI core will ignore any provided RSDT (Root System Description Table).
-RSDTs have been deprecated and are ignored on arm64 since they only allow
-for 32-bit addresses.
-
-Further, the ACPI core will only use the 64-bit address fields in the FADT
-(Fixed ACPI Description Table).  Any 32-bit address fields in the FADT will
-be ignored on arm64.
-
-Hardware reduced mode (see Section 4.1 of the ACPI 6.1 specification) will
-be enforced by the ACPI core on arm64.  Doing so allows the ACPI core to
-run less complex code since it no longer has to provide support for legacy
-hardware from other architectures.  Any fields that are not to be used for
-hardware reduced mode must be set to zero.
-
-For the ACPI core to operate properly, and in turn provide the information
-the kernel needs to configure devices, it expects to find the following
-tables (all section numbers refer to the ACPI 6.1 specification):
-
-    -- RSDP (Root System Description Pointer), section 5.2.5
-
-    -- XSDT (eXtended System Description Table), section 5.2.8
-
-    -- FADT (Fixed ACPI Description Table), section 5.2.9
-
-    -- DSDT (Differentiated System Description Table), section
-       5.2.11.1
-
-    -- MADT (Multiple APIC Description Table), section 5.2.12
-
-    -- GTDT (Generic Timer Description Table), section 5.2.24
-
-    -- If PCI is supported, the MCFG (Memory mapped ConFiGuration
-       Table), section 5.2.6, specifically Table 5-31.
-
-    -- If booting without a console=<device> kernel parameter is
-       supported, the SPCR (Serial Port Console Redirection table),
-       section 5.2.6, specifically Table 5-31.
-
-    -- If necessary to describe the I/O topology, SMMUs and GIC ITSs,
-       the IORT (Input Output Remapping Table, section 5.2.6, specifically
-       Table 5-31).
-
-    -- If NUMA is supported, the SRAT (System Resource Affinity Table)
-       and SLIT (System Locality distance Information Table), sections
-       5.2.16 and 5.2.17, respectively.
-
-If the above tables are not all present, the kernel may or may not be
-able to boot properly since it may not be able to configure all of the
-devices available.  This list of tables is not meant to be all inclusive;
-in some environments other tables may be needed (e.g., any of the APEI
-tables from section 18) to support specific functionality.
-
-
-ACPI Detection
---------------
-Drivers should determine their probe() type by checking for a null
-value for ACPI_HANDLE, or checking .of_node, or other information in
-the device structure.  This is detailed further in the "Driver
-Recommendations" section.
-
-In non-driver code, if the presence of ACPI needs to be detected at
-run time, then check the value of acpi_disabled. If CONFIG_ACPI is not
-set, acpi_disabled will always be 1.
-
-
-Device Enumeration
-------------------
-Device descriptions in ACPI should use standard recognized ACPI interfaces.
-These may contain less information than is typically provided via a Device
-Tree description for the same device.  This is also one of the reasons that
-ACPI can be useful -- the driver takes into account that it may have less
-detailed information about the device and uses sensible defaults instead.
-If done properly in the driver, the hardware can change and improve over
-time without the driver having to change at all.
-
-Clocks provide an excellent example.  In DT, clocks need to be specified
-and the drivers need to take them into account.  In ACPI, the assumption
-is that UEFI will leave the device in a reasonable default state, including
-any clock settings.  If for some reason the driver needs to change a clock
-value, this can be done in an ACPI method; all the driver needs to do is
-invoke the method and not concern itself with what the method needs to do
-to change the clock.  Changing the hardware can then take place over time
-by changing what the ACPI method does, and not the driver.
-
-In DT, the parameters needed by the driver to set up clocks as in the example
-above are known as "bindings"; in ACPI, these are known as "Device Properties"
-and provided to a driver via the _DSD object.
-
-ACPI tables are described with a formal language called ASL, the ACPI
-Source Language (section 19 of the specification).  This means that there
-are always multiple ways to describe the same thing -- including device
-properties.  For example, device properties could use an ASL construct
-that looks like this: Name(KEY0, "value0").  An ACPI device driver would
-then retrieve the value of the property by evaluating the KEY0 object.
-However, using Name() this way has multiple problems: (1) ACPI limits
-names ("KEY0") to four characters unlike DT; (2) there is no industry
-wide registry that maintains a list of names, minimizing re-use; (3)
-there is also no registry for the definition of property values ("value0"),
-again making re-use difficult; and (4) how does one maintain backward
-compatibility as new hardware comes out?  The _DSD method was created
-to solve precisely these sorts of problems; Linux drivers should ALWAYS
-use the _DSD method for device properties and nothing else.
-
-The _DSM object (ACPI Section 9.14.1) could also be used for conveying
-device properties to a driver.  Linux drivers should only expect it to
-be used if _DSD cannot represent the data required, and there is no way
-to create a new UUID for the _DSD object.  Note that there is even less
-regulation of the use of _DSM than there is of _DSD.  Drivers that depend
-on the contents of _DSM objects will be more difficult to maintain over
-time because of this; as of this writing, the use of _DSM is the cause
-of quite a few firmware problems and is not recommended.
-
-Drivers should look for device properties in the _DSD object ONLY; the _DSD
-object is described in the ACPI specification section 6.2.5, but this only
-describes how to define the structure of an object returned via _DSD, and
-how specific data structures are defined by specific UUIDs.  Linux should
-only use the _DSD Device Properties UUID [5]:
-
-   -- UUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301
-
-   -- http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
-
-The UEFI Forum provides a mechanism for registering device properties [4]
-so that they may be used across all operating systems supporting ACPI.
-Device properties that have not been registered with the UEFI Forum should
-not be used.
-
-Before creating new device properties, check to be sure that they have not
-been defined before and either registered in the Linux kernel documentation
-as DT bindings, or the UEFI Forum as device properties.  While we do not want
-to simply move all DT bindings into ACPI device properties, we can learn from
-what has been previously defined.
-
-If it is necessary to define a new device property, or if it makes sense to
-synthesize the definition of a binding so it can be used in any firmware,
-both DT bindings and ACPI device properties for device drivers have review
-processes.  Use them both.  When the driver itself is submitted for review
-to the Linux mailing lists, the device property definitions needed must be
-submitted at the same time.  A driver that supports ACPI and uses device
-properties will not be considered complete without their definitions.  Once
-the device property has been accepted by the Linux community, it must be
-registered with the UEFI Forum [4], which will review it again for consistency
-within the registry.  This may require iteration.  The UEFI Forum, though,
-will always be the canonical site for device property definitions.
-
-It may make sense to provide notice to the UEFI Forum that there is the
-intent to register a previously unused device property name as a means of
-reserving the name for later use.  Other operating system vendors will
-also be submitting registration requests and this may help smooth the
-process.
-
-Once registration and review have been completed, the kernel provides an
-interface for looking up device properties in a manner independent of
-whether DT or ACPI is being used.  This API should be used [6]; it can
-eliminate some duplication of code paths in driver probing functions and
-discourage divergence between DT bindings and ACPI device properties.
-
-
-Programmable Power Control Resources
-------------------------------------
-Programmable power control resources include such resources as voltage/current
-providers (regulators) and clock sources.
-
-With ACPI, the kernel clock and regulator framework is not expected to be used
-at all.
-
-The kernel assumes that power control of these resources is represented with
-Power Resource Objects (ACPI section 7.1).  The ACPI core will then handle
-correctly enabling and disabling resources as they are needed.  In order to
-get that to work, ACPI assumes each device has defined D-states and that these
-can be controlled through the optional ACPI methods _PS0, _PS1, _PS2, and _PS3;
-in ACPI, _PS0 is the method to invoke to turn a device full on, and _PS3 is for
-turning a device full off.
-
-There are two options for using those Power Resources.  They can:
-
-   -- be managed in a _PSx method which gets called on entry to power
-      state Dx.
-
-   -- be declared separately as power resources with their own _ON and _OFF
-      methods.  They are then tied back to D-states for a particular device
-      via _PRx which specifies which power resources a device needs to be on
-      while in Dx.  Kernel then tracks number of devices using a power resource
-      and calls _ON/_OFF as needed.
-
-The kernel ACPI code will also assume that the _PSx methods follow the normal
-ACPI rules for such methods:
-
-   -- If either _PS0 or _PS3 is implemented, then the other method must also
-      be implemented.
-
-   -- If a device requires usage or setup of a power resource when on, the ASL
-      should organize that it is allocated/enabled using the _PS0 method.
-
-   -- Resources allocated or enabled in the _PS0 method should be disabled
-      or de-allocated in the _PS3 method.
-
-   -- Firmware will leave the resources in a reasonable state before handing
-      over control to the kernel.
-
-Such code in _PSx methods will of course be very platform specific.  But,
-this allows the driver to abstract out the interface for operating the device
-and avoid having to read special non-standard values from ACPI tables. Further,
-abstracting the use of these resources allows the hardware to change over time
-without requiring updates to the driver.
-
-
-Clocks
-------
-ACPI makes the assumption that clocks are initialized by the firmware --
-UEFI, in this case -- to some working value before control is handed over
-to the kernel.  This has implications for devices such as UARTs, or SoC-driven
-LCD displays, for example.
-
-When the kernel boots, the clocks are assumed to be set to reasonable
-working values.  If for some reason the frequency needs to change -- e.g.,
-throttling for power management -- the device driver should expect that
-process to be abstracted out into some ACPI method that can be invoked
-(please see the ACPI specification for further recommendations on standard
-methods to be expected).  The only exceptions to this are CPU clocks where
-CPPC provides a much richer interface than ACPI methods.  If the clocks
-are not set, there is no direct way for Linux to control them.
-
-If an SoC vendor wants to provide fine-grained control of the system clocks,
-they could do so by providing ACPI methods that could be invoked by Linux
-drivers.  However, this is NOT recommended and Linux drivers should NOT use
-such methods, even if they are provided.  Such methods are not currently
-standardized in the ACPI specification, and using them could tie a kernel
-to a very specific SoC, or tie an SoC to a very specific version of the
-kernel, both of which we are trying to avoid.
-
-
-Driver Recommendations
-----------------------
-DO NOT remove any DT handling when adding ACPI support for a driver.  The
-same device may be used on many different systems.
-
-DO try to structure the driver so that it is data-driven.  That is, set up
-a struct containing internal per-device state based on defaults and whatever
-else must be discovered by the driver probe function.  Then, have the rest
-of the driver operate off of the contents of that struct.  Doing so should
-allow most divergence between ACPI and DT functionality to be kept local to
-the probe function instead of being scattered throughout the driver.  For
-example:
-
-static int device_probe_dt(struct platform_device *pdev)
-{
-       /* DT specific functionality */
-       ...
-}
-
-static int device_probe_acpi(struct platform_device *pdev)
-{
-       /* ACPI specific functionality */
-       ...
-}
-
-static int device_probe(struct platform_device *pdev)
-{
-       ...
-       struct device_node node = pdev->dev.of_node;
-       ...
-
-       if (node)
-               ret = device_probe_dt(pdev);
-       else if (ACPI_HANDLE(&pdev->dev))
-               ret = device_probe_acpi(pdev);
-       else
-               /* other initialization */
-               ...
-       /* Continue with any generic probe operations */
-       ...
-}
-
-DO keep the MODULE_DEVICE_TABLE entries together in the driver to make it
-clear the different names the driver is probed for, both from DT and from
-ACPI:
-
-static struct of_device_id virtio_mmio_match[] = {
-        { .compatible = "virtio,mmio", },
-        { }
-};
-MODULE_DEVICE_TABLE(of, virtio_mmio_match);
-
-static const struct acpi_device_id virtio_mmio_acpi_match[] = {
-        { "LNRO0005", },
-        { }
-};
-MODULE_DEVICE_TABLE(acpi, virtio_mmio_acpi_match);
-
-
-ASWG
-----
-The ACPI specification changes regularly.  During the year 2014, for instance,
-version 5.1 was released and version 6.0 substantially completed, with most of
-the changes being driven by ARM-specific requirements.  Proposed changes are
-presented and discussed in the ASWG (ACPI Specification Working Group) which
-is a part of the UEFI Forum.  The current version of the ACPI specification
-is 6.1 release in January 2016.
-
-Participation in this group is open to all UEFI members.  Please see
-http://www.uefi.org/workinggroup for details on group membership.
-
-It is the intent of the ARMv8 ACPI kernel code to follow the ACPI specification
-as closely as possible, and to only implement functionality that complies with
-the released standards from UEFI ASWG.  As a practical matter, there will be
-vendors that provide bad ACPI tables or violate the standards in some way.
-If this is because of errors, quirks and fix-ups may be necessary, but will
-be avoided if possible.  If there are features missing from ACPI that preclude
-it from being used on a platform, ECRs (Engineering Change Requests) should be
-submitted to ASWG and go through the normal approval process; for those that
-are not UEFI members, many other members of the Linux community are and would
-likely be willing to assist in submitting ECRs.
-
-
-Linux Code
-----------
-Individual items specific to Linux on ARM, contained in the the Linux
-source code, are in the list that follows:
-
-ACPI_OS_NAME           This macro defines the string to be returned when
-                       an ACPI method invokes the _OS method.  On ARM64
-                       systems, this macro will be "Linux" by default.
-                       The command line parameter acpi_os=<string>
-                       can be used to set it to some other value.  The
-                       default value for other architectures is "Microsoft
-                       Windows NT", for example.
-
-ACPI Objects
-------------
-Detailed expectations for ACPI tables and object are listed in the file
-Documentation/arm64/acpi_object_usage.txt.
-
-
-References
-----------
-[0] http://silver.arm.com -- document ARM-DEN-0029, or newer
-    "Server Base System Architecture", version 2.3, dated 27 Mar 2014
-
-[1] http://infocenter.arm.com/help/topic/com.arm.doc.den0044a/Server_Base_Boot_Requirements.pdf
-    Document ARM-DEN-0044A, or newer: "Server Base Boot Requirements, System
-    Software on ARM Platforms", dated 16 Aug 2014
-
-[2] http://www.secretlab.ca/archives/151, 10 Jan 2015, Copyright (c) 2015,
-    Linaro Ltd., written by Grant Likely.
-
-[3] AMD ACPI for Seattle platform documentation:
-    http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Seattle_ACPI_Guide.pdf
-
-[4] http://www.uefi.org/acpi -- please see the link for the "ACPI _DSD Device
-    Property Registry Instructions"
-
-[5] http://www.uefi.org/acpi -- please see the link for the "_DSD (Device
-    Specific Data) Implementation Guide"
-
-[6] Kernel code for the unified device property interface can be found in
-    include/linux/property.h and drivers/base/property.c.
-
-
-Authors
--------
-Al Stone <al.stone@linaro.org>
-Graeme Gregory <graeme.gregory@linaro.org>
-Hanjun Guo <hanjun.guo@linaro.org>
-
-Grant Likely <grant.likely@linaro.org>, for the "Why ACPI on ARM?" section
diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
new file mode 100644
index 000000000000..3d041d0d16e8
--- /dev/null
+++ b/Documentation/arm64/booting.rst
@@ -0,0 +1,293 @@
+=====================
+Booting AArch64 Linux
+=====================
+
+Author: Will Deacon <will.deacon@arm.com>
+
+Date  : 07 September 2012
+
+This document is based on the ARM booting document by Russell King and
+is relevant to all public releases of the AArch64 Linux kernel.
+
+The AArch64 exception model is made up of a number of exception levels
+(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
+counterpart.  EL2 is the hypervisor level and exists only in non-secure
+mode. EL3 is the highest priority level and exists only in secure mode.
+
+For the purposes of this document, we will use the term `boot loader`
+simply to define all software that executes on the CPU(s) before control
+is passed to the Linux kernel.  This may include secure monitor and
+hypervisor code, or it may just be a handful of instructions for
+preparing a minimal boot environment.
+
+Essentially, the boot loader should provide (as a minimum) the
+following:
+
+1. Setup and initialise the RAM
+2. Setup the device tree
+3. Decompress the kernel image
+4. Call the kernel image
+
+
+1. Setup and initialise RAM
+---------------------------
+
+Requirement: MANDATORY
+
+The boot loader is expected to find and initialise all RAM that the
+kernel will use for volatile data storage in the system.  It performs
+this in a machine dependent manner.  (It may use internal algorithms
+to automatically locate and size all RAM, or it may use knowledge of
+the RAM in the machine, or any other method the boot loader designer
+sees fit.)
+
+
+2. Setup the device tree
+-------------------------
+
+Requirement: MANDATORY
+
+The device tree blob (dtb) must be placed on an 8-byte boundary and must
+not exceed 2 megabytes in size. Since the dtb will be mapped cacheable
+using blocks of up to 2 megabytes in size, it must not be placed within
+any 2M region which must be mapped with any specific attributes.
+
+NOTE: versions prior to v4.2 also require that the DTB be placed within
+the 512 MB region starting at text_offset bytes below the kernel Image.
+
+3. Decompress the kernel image
+------------------------------
+
+Requirement: OPTIONAL
+
+The AArch64 kernel does not currently provide a decompressor and
+therefore requires decompression (gzip etc.) to be performed by the boot
+loader if a compressed Image target (e.g. Image.gz) is used.  For
+bootloaders that do not implement this requirement, the uncompressed
+Image target is available instead.
+
+
+4. Call the kernel image
+------------------------
+
+Requirement: MANDATORY
+
+The decompressed kernel image contains a 64-byte header as follows::
+
+  u32 code0;			/* Executable code */
+  u32 code1;			/* Executable code */
+  u64 text_offset;		/* Image load offset, little endian */
+  u64 image_size;		/* Effective Image size, little endian */
+  u64 flags;			/* kernel flags, little endian */
+  u64 res2	= 0;		/* reserved */
+  u64 res3	= 0;		/* reserved */
+  u64 res4	= 0;		/* reserved */
+  u32 magic	= 0x644d5241;	/* Magic number, little endian, "ARM\x64" */
+  u32 res5;			/* reserved (used for PE COFF offset) */
+
+
+Header notes:
+
+- As of v3.17, all fields are little endian unless stated otherwise.
+
+- code0/code1 are responsible for branching to stext.
+
+- when booting through EFI, code0/code1 are initially skipped.
+  res5 is an offset to the PE header and the PE header has the EFI
+  entry point (efi_stub_entry).  When the stub has done its work, it
+  jumps to code0 to resume the normal boot process.
+
+- Prior to v3.17, the endianness of text_offset was not specified.  In
+  these cases image_size is zero and text_offset is 0x80000 in the
+  endianness of the kernel.  Where image_size is non-zero image_size is
+  little-endian and must be respected.  Where image_size is zero,
+  text_offset can be assumed to be 0x80000.
+
+- The flags field (introduced in v3.17) is a little-endian 64-bit field
+  composed as follows:
+
+  ============= ===============================================================
+  Bit 0		Kernel endianness.  1 if BE, 0 if LE.
+  Bit 1-2	Kernel Page size.
+
+			* 0 - Unspecified.
+			* 1 - 4K
+			* 2 - 16K
+			* 3 - 64K
+  Bit 3		Kernel physical placement
+
+			0
+			  2MB aligned base should be as close as possible
+			  to the base of DRAM, since memory below it is not
+			  accessible via the linear mapping
+			1
+			  2MB aligned base may be anywhere in physical
+			  memory
+  Bits 4-63	Reserved.
+  ============= ===============================================================
+
+- When image_size is zero, a bootloader should attempt to keep as much
+  memory as possible free for use by the kernel immediately after the
+  end of the kernel image. The amount of space required will vary
+  depending on selected features, and is effectively unbound.
+
+The Image must be placed text_offset bytes from a 2MB aligned base
+address anywhere in usable system RAM and called there. The region
+between the 2 MB aligned base address and the start of the image has no
+special significance to the kernel, and may be used for other purposes.
+At least image_size bytes from the start of the image must be free for
+use by the kernel.
+NOTE: versions prior to v4.6 cannot make use of memory below the
+physical offset of the Image so it is recommended that the Image be
+placed as close as possible to the start of system RAM.
+
+If an initrd/initramfs is passed to the kernel at boot, it must reside
+entirely within a 1 GB aligned physical memory window of up to 32 GB in
+size that fully covers the kernel Image as well.
+
+Any memory described to the kernel (even that below the start of the
+image) which is not marked as reserved from the kernel (e.g., with a
+memreserve region in the device tree) will be considered as available to
+the kernel.
+
+Before jumping into the kernel, the following conditions must be met:
+
+- Quiesce all DMA capable devices so that memory does not get
+  corrupted by bogus network packets or disk data.  This will save
+  you many hours of debug.
+
+- Primary CPU general-purpose register settings:
+
+    - x0 = physical address of device tree blob (dtb) in system RAM.
+    - x1 = 0 (reserved for future use)
+    - x2 = 0 (reserved for future use)
+    - x3 = 0 (reserved for future use)
+
+- CPU mode
+
+  All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
+  IRQ and FIQ).
+  The CPU must be in either EL2 (RECOMMENDED in order to have access to
+  the virtualisation extensions) or non-secure EL1.
+
+- Caches, MMUs
+
+  The MMU must be off.
+  Instruction cache may be on or off.
+  The address range corresponding to the loaded kernel image must be
+  cleaned to the PoC. In the presence of a system cache or other
+  coherent masters with caches enabled, this will typically require
+  cache maintenance by VA rather than set/way operations.
+  System caches which respect the architected cache maintenance by VA
+  operations must be configured and may be enabled.
+  System caches which do not respect architected cache maintenance by VA
+  operations (not recommended) must be configured and disabled.
+
+- Architected timers
+
+  CNTFRQ must be programmed with the timer frequency and CNTVOFF must
+  be programmed with a consistent value on all CPUs.  If entering the
+  kernel at EL1, CNTHCTL_EL2 must have EL1PCTEN (bit 0) set where
+  available.
+
+- Coherency
+
+  All CPUs to be booted by the kernel must be part of the same coherency
+  domain on entry to the kernel.  This may require IMPLEMENTATION DEFINED
+  initialisation to enable the receiving of maintenance operations on
+  each CPU.
+
+- System registers
+
+  All writable architected system registers at the exception level where
+  the kernel image will be entered must be initialised by software at a
+  higher exception level to prevent execution in an UNKNOWN state.
+
+  - SCR_EL3.FIQ must have the same value across all CPUs the kernel is
+    executing on.
+  - The value of SCR_EL3.FIQ must be the same as the one present at boot
+    time whenever the kernel is executing.
+
+  For systems with a GICv3 interrupt controller to be used in v3 mode:
+  - If EL3 is present:
+
+      - ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1.
+      - ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1.
+
+  - If the kernel is entered at EL1:
+
+      - ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1
+      - ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1.
+
+  - The DT or ACPI tables must describe a GICv3 interrupt controller.
+
+  For systems with a GICv3 interrupt controller to be used in
+  compatibility (v2) mode:
+
+  - If EL3 is present:
+
+      ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b0.
+
+  - If the kernel is entered at EL1:
+
+      ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b0.
+
+  - The DT or ACPI tables must describe a GICv2 interrupt controller.
+
+  For CPUs with pointer authentication functionality:
+  - If EL3 is present:
+
+    - SCR_EL3.APK (bit 16) must be initialised to 0b1
+    - SCR_EL3.API (bit 17) must be initialised to 0b1
+
+  - If the kernel is entered at EL1:
+
+    - HCR_EL2.APK (bit 40) must be initialised to 0b1
+    - HCR_EL2.API (bit 41) must be initialised to 0b1
+
+The requirements described above for CPU mode, caches, MMUs, architected
+timers, coherency and system registers apply to all CPUs.  All CPUs must
+enter the kernel in the same exception level.
+
+The boot loader is expected to enter the kernel on each CPU in the
+following manner:
+
+- The primary CPU must jump directly to the first instruction of the
+  kernel image.  The device tree blob passed by this CPU must contain
+  an 'enable-method' property for each cpu node.  The supported
+  enable-methods are described below.
+
+  It is expected that the bootloader will generate these device tree
+  properties and insert them into the blob prior to kernel entry.
+
+- CPUs with a "spin-table" enable-method must have a 'cpu-release-addr'
+  property in their cpu node.  This property identifies a
+  naturally-aligned 64-bit zero-initalised memory location.
+
+  These CPUs should spin outside of the kernel in a reserved area of
+  memory (communicated to the kernel by a /memreserve/ region in the
+  device tree) polling their cpu-release-addr location, which must be
+  contained in the reserved region.  A wfe instruction may be inserted
+  to reduce the overhead of the busy-loop and a sev will be issued by
+  the primary CPU.  When a read of the location pointed to by the
+  cpu-release-addr returns a non-zero value, the CPU must jump to this
+  value.  The value will be written as a single 64-bit little-endian
+  value, so CPUs must convert the read value to their native endianness
+  before jumping to it.
+
+- CPUs with a "psci" enable method should remain outside of
+  the kernel (i.e. outside of the regions of memory described to the
+  kernel in the memory node, or in a reserved area of memory described
+  to the kernel by a /memreserve/ region in the device tree).  The
+  kernel will issue CPU_ON calls as described in ARM document number ARM
+  DEN 0022A ("Power State Coordination Interface System Software on ARM
+  processors") to bring CPUs into the kernel.
+
+  The device tree should contain a 'psci' node, as described in
+  Documentation/devicetree/bindings/arm/psci.txt.
+
+- Secondary CPU general-purpose register settings
+  x0 = 0 (reserved for future use)
+  x1 = 0 (reserved for future use)
+  x2 = 0 (reserved for future use)
+  x3 = 0 (reserved for future use)
diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
deleted file mode 100644
index fbab7e21d116..000000000000
--- a/Documentation/arm64/booting.txt
+++ /dev/null
@@ -1,266 +0,0 @@
-			Booting AArch64 Linux
-			=====================
-
-Author: Will Deacon <will.deacon@arm.com>
-Date  : 07 September 2012
-
-This document is based on the ARM booting document by Russell King and
-is relevant to all public releases of the AArch64 Linux kernel.
-
-The AArch64 exception model is made up of a number of exception levels
-(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
-counterpart.  EL2 is the hypervisor level and exists only in non-secure
-mode. EL3 is the highest priority level and exists only in secure mode.
-
-For the purposes of this document, we will use the term `boot loader'
-simply to define all software that executes on the CPU(s) before control
-is passed to the Linux kernel.  This may include secure monitor and
-hypervisor code, or it may just be a handful of instructions for
-preparing a minimal boot environment.
-
-Essentially, the boot loader should provide (as a minimum) the
-following:
-
-1. Setup and initialise the RAM
-2. Setup the device tree
-3. Decompress the kernel image
-4. Call the kernel image
-
-
-1. Setup and initialise RAM
----------------------------
-
-Requirement: MANDATORY
-
-The boot loader is expected to find and initialise all RAM that the
-kernel will use for volatile data storage in the system.  It performs
-this in a machine dependent manner.  (It may use internal algorithms
-to automatically locate and size all RAM, or it may use knowledge of
-the RAM in the machine, or any other method the boot loader designer
-sees fit.)
-
-
-2. Setup the device tree
--------------------------
-
-Requirement: MANDATORY
-
-The device tree blob (dtb) must be placed on an 8-byte boundary and must
-not exceed 2 megabytes in size. Since the dtb will be mapped cacheable
-using blocks of up to 2 megabytes in size, it must not be placed within
-any 2M region which must be mapped with any specific attributes.
-
-NOTE: versions prior to v4.2 also require that the DTB be placed within
-the 512 MB region starting at text_offset bytes below the kernel Image.
-
-3. Decompress the kernel image
-------------------------------
-
-Requirement: OPTIONAL
-
-The AArch64 kernel does not currently provide a decompressor and
-therefore requires decompression (gzip etc.) to be performed by the boot
-loader if a compressed Image target (e.g. Image.gz) is used.  For
-bootloaders that do not implement this requirement, the uncompressed
-Image target is available instead.
-
-
-4. Call the kernel image
-------------------------
-
-Requirement: MANDATORY
-
-The decompressed kernel image contains a 64-byte header as follows:
-
-  u32 code0;			/* Executable code */
-  u32 code1;			/* Executable code */
-  u64 text_offset;		/* Image load offset, little endian */
-  u64 image_size;		/* Effective Image size, little endian */
-  u64 flags;			/* kernel flags, little endian */
-  u64 res2	= 0;		/* reserved */
-  u64 res3	= 0;		/* reserved */
-  u64 res4	= 0;		/* reserved */
-  u32 magic	= 0x644d5241;	/* Magic number, little endian, "ARM\x64" */
-  u32 res5;			/* reserved (used for PE COFF offset) */
-
-
-Header notes:
-
-- As of v3.17, all fields are little endian unless stated otherwise.
-
-- code0/code1 are responsible for branching to stext.
-
-- when booting through EFI, code0/code1 are initially skipped.
-  res5 is an offset to the PE header and the PE header has the EFI
-  entry point (efi_stub_entry).  When the stub has done its work, it
-  jumps to code0 to resume the normal boot process.
-
-- Prior to v3.17, the endianness of text_offset was not specified.  In
-  these cases image_size is zero and text_offset is 0x80000 in the
-  endianness of the kernel.  Where image_size is non-zero image_size is
-  little-endian and must be respected.  Where image_size is zero,
-  text_offset can be assumed to be 0x80000.
-
-- The flags field (introduced in v3.17) is a little-endian 64-bit field
-  composed as follows:
-  Bit 0:	Kernel endianness.  1 if BE, 0 if LE.
-  Bit 1-2:	Kernel Page size.
-			0 - Unspecified.
-			1 - 4K
-			2 - 16K
-			3 - 64K
-  Bit 3:	Kernel physical placement
-			0 - 2MB aligned base should be as close as possible
-			    to the base of DRAM, since memory below it is not
-			    accessible via the linear mapping
-			1 - 2MB aligned base may be anywhere in physical
-			    memory
-  Bits 4-63:	Reserved.
-
-- When image_size is zero, a bootloader should attempt to keep as much
-  memory as possible free for use by the kernel immediately after the
-  end of the kernel image. The amount of space required will vary
-  depending on selected features, and is effectively unbound.
-
-The Image must be placed text_offset bytes from a 2MB aligned base
-address anywhere in usable system RAM and called there. The region
-between the 2 MB aligned base address and the start of the image has no
-special significance to the kernel, and may be used for other purposes.
-At least image_size bytes from the start of the image must be free for
-use by the kernel.
-NOTE: versions prior to v4.6 cannot make use of memory below the
-physical offset of the Image so it is recommended that the Image be
-placed as close as possible to the start of system RAM.
-
-If an initrd/initramfs is passed to the kernel at boot, it must reside
-entirely within a 1 GB aligned physical memory window of up to 32 GB in
-size that fully covers the kernel Image as well.
-
-Any memory described to the kernel (even that below the start of the
-image) which is not marked as reserved from the kernel (e.g., with a
-memreserve region in the device tree) will be considered as available to
-the kernel.
-
-Before jumping into the kernel, the following conditions must be met:
-
-- Quiesce all DMA capable devices so that memory does not get
-  corrupted by bogus network packets or disk data.  This will save
-  you many hours of debug.
-
-- Primary CPU general-purpose register settings
-  x0 = physical address of device tree blob (dtb) in system RAM.
-  x1 = 0 (reserved for future use)
-  x2 = 0 (reserved for future use)
-  x3 = 0 (reserved for future use)
-
-- CPU mode
-  All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
-  IRQ and FIQ).
-  The CPU must be in either EL2 (RECOMMENDED in order to have access to
-  the virtualisation extensions) or non-secure EL1.
-
-- Caches, MMUs
-  The MMU must be off.
-  Instruction cache may be on or off.
-  The address range corresponding to the loaded kernel image must be
-  cleaned to the PoC. In the presence of a system cache or other
-  coherent masters with caches enabled, this will typically require
-  cache maintenance by VA rather than set/way operations.
-  System caches which respect the architected cache maintenance by VA
-  operations must be configured and may be enabled.
-  System caches which do not respect architected cache maintenance by VA
-  operations (not recommended) must be configured and disabled.
-
-- Architected timers
-  CNTFRQ must be programmed with the timer frequency and CNTVOFF must
-  be programmed with a consistent value on all CPUs.  If entering the
-  kernel at EL1, CNTHCTL_EL2 must have EL1PCTEN (bit 0) set where
-  available.
-
-- Coherency
-  All CPUs to be booted by the kernel must be part of the same coherency
-  domain on entry to the kernel.  This may require IMPLEMENTATION DEFINED
-  initialisation to enable the receiving of maintenance operations on
-  each CPU.
-
-- System registers
-  All writable architected system registers at the exception level where
-  the kernel image will be entered must be initialised by software at a
-  higher exception level to prevent execution in an UNKNOWN state.
-
-  - SCR_EL3.FIQ must have the same value across all CPUs the kernel is
-    executing on.
-  - The value of SCR_EL3.FIQ must be the same as the one present at boot
-    time whenever the kernel is executing.
-
-  For systems with a GICv3 interrupt controller to be used in v3 mode:
-  - If EL3 is present:
-    ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1.
-    ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1.
-  - If the kernel is entered at EL1:
-    ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1
-    ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1.
-  - The DT or ACPI tables must describe a GICv3 interrupt controller.
-
-  For systems with a GICv3 interrupt controller to be used in
-  compatibility (v2) mode:
-  - If EL3 is present:
-    ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b0.
-  - If the kernel is entered at EL1:
-    ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b0.
-  - The DT or ACPI tables must describe a GICv2 interrupt controller.
-
-  For CPUs with pointer authentication functionality:
-  - If EL3 is present:
-    SCR_EL3.APK (bit 16) must be initialised to 0b1
-    SCR_EL3.API (bit 17) must be initialised to 0b1
-  - If the kernel is entered at EL1:
-    HCR_EL2.APK (bit 40) must be initialised to 0b1
-    HCR_EL2.API (bit 41) must be initialised to 0b1
-
-The requirements described above for CPU mode, caches, MMUs, architected
-timers, coherency and system registers apply to all CPUs.  All CPUs must
-enter the kernel in the same exception level.
-
-The boot loader is expected to enter the kernel on each CPU in the
-following manner:
-
-- The primary CPU must jump directly to the first instruction of the
-  kernel image.  The device tree blob passed by this CPU must contain
-  an 'enable-method' property for each cpu node.  The supported
-  enable-methods are described below.
-
-  It is expected that the bootloader will generate these device tree
-  properties and insert them into the blob prior to kernel entry.
-
-- CPUs with a "spin-table" enable-method must have a 'cpu-release-addr'
-  property in their cpu node.  This property identifies a
-  naturally-aligned 64-bit zero-initalised memory location.
-
-  These CPUs should spin outside of the kernel in a reserved area of
-  memory (communicated to the kernel by a /memreserve/ region in the
-  device tree) polling their cpu-release-addr location, which must be
-  contained in the reserved region.  A wfe instruction may be inserted
-  to reduce the overhead of the busy-loop and a sev will be issued by
-  the primary CPU.  When a read of the location pointed to by the
-  cpu-release-addr returns a non-zero value, the CPU must jump to this
-  value.  The value will be written as a single 64-bit little-endian
-  value, so CPUs must convert the read value to their native endianness
-  before jumping to it.
-
-- CPUs with a "psci" enable method should remain outside of
-  the kernel (i.e. outside of the regions of memory described to the
-  kernel in the memory node, or in a reserved area of memory described
-  to the kernel by a /memreserve/ region in the device tree).  The
-  kernel will issue CPU_ON calls as described in ARM document number ARM
-  DEN 0022A ("Power State Coordination Interface System Software on ARM
-  processors") to bring CPUs into the kernel.
-
-  The device tree should contain a 'psci' node, as described in
-  Documentation/devicetree/bindings/arm/psci.txt.
-
-- Secondary CPU general-purpose register settings
-  x0 = 0 (reserved for future use)
-  x1 = 0 (reserved for future use)
-  x2 = 0 (reserved for future use)
-  x3 = 0 (reserved for future use)
diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst
new file mode 100644
index 000000000000..2955287e9acc
--- /dev/null
+++ b/Documentation/arm64/cpu-feature-registers.rst
@@ -0,0 +1,304 @@
+===========================
+ARM64 CPU Feature Registers
+===========================
+
+Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+
+
+This file describes the ABI for exporting the AArch64 CPU ID/feature
+registers to userspace. The availability of this ABI is advertised
+via the HWCAP_CPUID in HWCAPs.
+
+1. Motivation
+-------------
+
+The ARM architecture defines a set of feature registers, which describe
+the capabilities of the CPU/system. Access to these system registers is
+restricted from EL0 and there is no reliable way for an application to
+extract this information to make better decisions at runtime. There is
+limited information available to the application via HWCAPs, however
+there are some issues with their usage.
+
+ a) Any change to the HWCAPs requires an update to userspace (e.g libc)
+    to detect the new changes, which can take a long time to appear in
+    distributions. Exposing the registers allows applications to get the
+    information without requiring updates to the toolchains.
+
+ b) Access to HWCAPs is sometimes limited (e.g prior to libc, or
+    when ld is initialised at startup time).
+
+ c) HWCAPs cannot represent non-boolean information effectively. The
+    architecture defines a canonical format for representing features
+    in the ID registers; this is well defined and is capable of
+    representing all valid architecture variations.
+
+
+2. Requirements
+---------------
+
+ a) Safety:
+
+    Applications should be able to use the information provided by the
+    infrastructure to run safely across the system. This has greater
+    implications on a system with heterogeneous CPUs.
+    The infrastructure exports a value that is safe across all the
+    available CPU on the system.
+
+    e.g, If at least one CPU doesn't implement CRC32 instructions, while
+    others do, we should report that the CRC32 is not implemented.
+    Otherwise an application could crash when scheduled on the CPU
+    which doesn't support CRC32.
+
+ b) Security:
+
+    Applications should only be able to receive information that is
+    relevant to the normal operation in userspace. Hence, some of the
+    fields are masked out(i.e, made invisible) and their values are set to
+    indicate the feature is 'not supported'. See Section 4 for the list
+    of visible features. Also, the kernel may manipulate the fields
+    based on what it supports. e.g, If FP is not supported by the
+    kernel, the values could indicate that the FP is not available
+    (even when the CPU provides it).
+
+ c) Implementation Defined Features
+
+    The infrastructure doesn't expose any register which is
+    IMPLEMENTATION DEFINED as per ARMv8-A Architecture.
+
+ d) CPU Identification:
+
+    MIDR_EL1 is exposed to help identify the processor. On a
+    heterogeneous system, this could be racy (just like getcpu()). The
+    process could be migrated to another CPU by the time it uses the
+    register value, unless the CPU affinity is set. Hence, there is no
+    guarantee that the value reflects the processor that it is
+    currently executing on. The REVIDR is not exposed due to this
+    constraint, as REVIDR makes sense only in conjunction with the
+    MIDR. Alternately, MIDR_EL1 and REVIDR_EL1 are exposed via sysfs
+    at::
+
+	/sys/devices/system/cpu/cpu$ID/regs/identification/
+	                                              \- midr
+	                                              \- revidr
+
+3. Implementation
+--------------------
+
+The infrastructure is built on the emulation of the 'MRS' instruction.
+Accessing a restricted system register from an application generates an
+exception and ends up in SIGILL being delivered to the process.
+The infrastructure hooks into the exception handler and emulates the
+operation if the source belongs to the supported system register space.
+
+The infrastructure emulates only the following system register space::
+
+	Op0=3, Op1=0, CRn=0, CRm=0,4,5,6,7
+
+(See Table C5-6 'System instruction encodings for non-Debug System
+register accesses' in ARMv8 ARM DDI 0487A.h, for the list of
+registers).
+
+The following rules are applied to the value returned by the
+infrastructure:
+
+ a) The value of an 'IMPLEMENTATION DEFINED' field is set to 0.
+ b) The value of a reserved field is populated with the reserved
+    value as defined by the architecture.
+ c) The value of a 'visible' field holds the system wide safe value
+    for the particular feature (except for MIDR_EL1, see section 4).
+ d) All other fields (i.e, invisible fields) are set to indicate
+    the feature is missing (as defined by the architecture).
+
+4. List of registers with visible features
+-------------------------------------------
+
+  1) ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
+
+     +------------------------------+---------+---------+
+     | Name                         |  bits   | visible |
+     +------------------------------+---------+---------+
+     | TS                           | [55-52] |    y    |
+     +------------------------------+---------+---------+
+     | FHM                          | [51-48] |    y    |
+     +------------------------------+---------+---------+
+     | DP                           | [47-44] |    y    |
+     +------------------------------+---------+---------+
+     | SM4                          | [43-40] |    y    |
+     +------------------------------+---------+---------+
+     | SM3                          | [39-36] |    y    |
+     +------------------------------+---------+---------+
+     | SHA3                         | [35-32] |    y    |
+     +------------------------------+---------+---------+
+     | RDM                          | [31-28] |    y    |
+     +------------------------------+---------+---------+
+     | ATOMICS                      | [23-20] |    y    |
+     +------------------------------+---------+---------+
+     | CRC32                        | [19-16] |    y    |
+     +------------------------------+---------+---------+
+     | SHA2                         | [15-12] |    y    |
+     +------------------------------+---------+---------+
+     | SHA1                         | [11-8]  |    y    |
+     +------------------------------+---------+---------+
+     | AES                          | [7-4]   |    y    |
+     +------------------------------+---------+---------+
+
+
+  2) ID_AA64PFR0_EL1 - Processor Feature Register 0
+
+     +------------------------------+---------+---------+
+     | Name                         |  bits   | visible |
+     +------------------------------+---------+---------+
+     | DIT                          | [51-48] |    y    |
+     +------------------------------+---------+---------+
+     | SVE                          | [35-32] |    y    |
+     +------------------------------+---------+---------+
+     | GIC                          | [27-24] |    n    |
+     +------------------------------+---------+---------+
+     | AdvSIMD                      | [23-20] |    y    |
+     +------------------------------+---------+---------+
+     | FP                           | [19-16] |    y    |
+     +------------------------------+---------+---------+
+     | EL3                          | [15-12] |    n    |
+     +------------------------------+---------+---------+
+     | EL2                          | [11-8]  |    n    |
+     +------------------------------+---------+---------+
+     | EL1                          | [7-4]   |    n    |
+     +------------------------------+---------+---------+
+     | EL0                          | [3-0]   |    n    |
+     +------------------------------+---------+---------+
+
+
+  3) MIDR_EL1 - Main ID Register
+
+     +------------------------------+---------+---------+
+     | Name                         |  bits   | visible |
+     +------------------------------+---------+---------+
+     | Implementer                  | [31-24] |    y    |
+     +------------------------------+---------+---------+
+     | Variant                      | [23-20] |    y    |
+     +------------------------------+---------+---------+
+     | Architecture                 | [19-16] |    y    |
+     +------------------------------+---------+---------+
+     | PartNum                      | [15-4]  |    y    |
+     +------------------------------+---------+---------+
+     | Revision                     | [3-0]   |    y    |
+     +------------------------------+---------+---------+
+
+   NOTE: The 'visible' fields of MIDR_EL1 will contain the value
+   as available on the CPU where it is fetched and is not a system
+   wide safe value.
+
+  4) ID_AA64ISAR1_EL1 - Instruction set attribute register 1
+
+     +------------------------------+---------+---------+
+     | Name                         |  bits   | visible |
+     +------------------------------+---------+---------+
+     | GPI                          | [31-28] |    y    |
+     +------------------------------+---------+---------+
+     | GPA                          | [27-24] |    y    |
+     +------------------------------+---------+---------+
+     | LRCPC                        | [23-20] |    y    |
+     +------------------------------+---------+---------+
+     | FCMA                         | [19-16] |    y    |
+     +------------------------------+---------+---------+
+     | JSCVT                        | [15-12] |    y    |
+     +------------------------------+---------+---------+
+     | API                          | [11-8]  |    y    |
+     +------------------------------+---------+---------+
+     | APA                          | [7-4]   |    y    |
+     +------------------------------+---------+---------+
+     | DPB                          | [3-0]   |    y    |
+     +------------------------------+---------+---------+
+
+  5) ID_AA64MMFR2_EL1 - Memory model feature register 2
+
+     +------------------------------+---------+---------+
+     | Name                         |  bits   | visible |
+     +------------------------------+---------+---------+
+     | AT                           | [35-32] |    y    |
+     +------------------------------+---------+---------+
+
+  6) ID_AA64ZFR0_EL1 - SVE feature ID register 0
+
+     +------------------------------+---------+---------+
+     | Name                         |  bits   | visible |
+     +------------------------------+---------+---------+
+     | SM4                          | [43-40] |    y    |
+     +------------------------------+---------+---------+
+     | SHA3                         | [35-32] |    y    |
+     +------------------------------+---------+---------+
+     | BitPerm                      | [19-16] |    y    |
+     +------------------------------+---------+---------+
+     | AES                          | [7-4]   |    y    |
+     +------------------------------+---------+---------+
+     | SVEVer                       | [3-0]   |    y    |
+     +------------------------------+---------+---------+
+
+Appendix I: Example
+-------------------
+
+::
+
+  /*
+   * Sample program to demonstrate the MRS emulation ABI.
+   *
+   * Copyright (C) 2015-2016, ARM Ltd
+   *
+   * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+   *
+   * This program is free software; you can redistribute it and/or modify
+   * it under the terms of the GNU General Public License version 2 as
+   * published by the Free Software Foundation.
+   *
+   * This program is distributed in the hope that it will be useful,
+   * but WITHOUT ANY WARRANTY; without even the implied warranty of
+   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   * GNU General Public License for more details.
+   * This program is free software; you can redistribute it and/or modify
+   * it under the terms of the GNU General Public License version 2 as
+   * published by the Free Software Foundation.
+   *
+   * This program is distributed in the hope that it will be useful,
+   * but WITHOUT ANY WARRANTY; without even the implied warranty of
+   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   * GNU General Public License for more details.
+   */
+
+  #include <asm/hwcap.h>
+  #include <stdio.h>
+  #include <sys/auxv.h>
+
+  #define get_cpu_ftr(id) ({					\
+		unsigned long __val;				\
+		asm("mrs %0, "#id : "=r" (__val));		\
+		printf("%-20s: 0x%016lx\n", #id, __val);	\
+	})
+
+  int main(void)
+  {
+
+	if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
+		fputs("CPUID registers unavailable\n", stderr);
+		return 1;
+	}
+
+	get_cpu_ftr(ID_AA64ISAR0_EL1);
+	get_cpu_ftr(ID_AA64ISAR1_EL1);
+	get_cpu_ftr(ID_AA64MMFR0_EL1);
+	get_cpu_ftr(ID_AA64MMFR1_EL1);
+	get_cpu_ftr(ID_AA64PFR0_EL1);
+	get_cpu_ftr(ID_AA64PFR1_EL1);
+	get_cpu_ftr(ID_AA64DFR0_EL1);
+	get_cpu_ftr(ID_AA64DFR1_EL1);
+
+	get_cpu_ftr(MIDR_EL1);
+	get_cpu_ftr(MPIDR_EL1);
+	get_cpu_ftr(REVIDR_EL1);
+
+  #if 0
+	/* Unexposed register access causes SIGILL */
+	get_cpu_ftr(ID_MMFR0_EL1);
+  #endif
+
+	return 0;
+  }
diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt
deleted file mode 100644
index 684a0da39378..000000000000
--- a/Documentation/arm64/cpu-feature-registers.txt
+++ /dev/null
@@ -1,296 +0,0 @@
-		ARM64 CPU Feature Registers
-		===========================
-
-Author: Suzuki K Poulose <suzuki.poulose@arm.com>
-
-
-This file describes the ABI for exporting the AArch64 CPU ID/feature
-registers to userspace. The availability of this ABI is advertised
-via the HWCAP_CPUID in HWCAPs.
-
-1. Motivation
----------------
-
-The ARM architecture defines a set of feature registers, which describe
-the capabilities of the CPU/system. Access to these system registers is
-restricted from EL0 and there is no reliable way for an application to
-extract this information to make better decisions at runtime. There is
-limited information available to the application via HWCAPs, however
-there are some issues with their usage.
-
- a) Any change to the HWCAPs requires an update to userspace (e.g libc)
-    to detect the new changes, which can take a long time to appear in
-    distributions. Exposing the registers allows applications to get the
-    information without requiring updates to the toolchains.
-
- b) Access to HWCAPs is sometimes limited (e.g prior to libc, or
-    when ld is initialised at startup time).
-
- c) HWCAPs cannot represent non-boolean information effectively. The
-    architecture defines a canonical format for representing features
-    in the ID registers; this is well defined and is capable of
-    representing all valid architecture variations.
-
-
-2. Requirements
------------------
-
- a) Safety :
-    Applications should be able to use the information provided by the
-    infrastructure to run safely across the system. This has greater
-    implications on a system with heterogeneous CPUs.
-    The infrastructure exports a value that is safe across all the
-    available CPU on the system.
-
-    e.g, If at least one CPU doesn't implement CRC32 instructions, while
-    others do, we should report that the CRC32 is not implemented.
-    Otherwise an application could crash when scheduled on the CPU
-    which doesn't support CRC32.
-
- b) Security :
-    Applications should only be able to receive information that is
-    relevant to the normal operation in userspace. Hence, some of the
-    fields are masked out(i.e, made invisible) and their values are set to
-    indicate the feature is 'not supported'. See Section 4 for the list
-    of visible features. Also, the kernel may manipulate the fields
-    based on what it supports. e.g, If FP is not supported by the
-    kernel, the values could indicate that the FP is not available
-    (even when the CPU provides it).
-
- c) Implementation Defined Features
-    The infrastructure doesn't expose any register which is
-    IMPLEMENTATION DEFINED as per ARMv8-A Architecture.
-
- d) CPU Identification :
-    MIDR_EL1 is exposed to help identify the processor. On a
-    heterogeneous system, this could be racy (just like getcpu()). The
-    process could be migrated to another CPU by the time it uses the
-    register value, unless the CPU affinity is set. Hence, there is no
-    guarantee that the value reflects the processor that it is
-    currently executing on. The REVIDR is not exposed due to this
-    constraint, as REVIDR makes sense only in conjunction with the
-    MIDR. Alternately, MIDR_EL1 and REVIDR_EL1 are exposed via sysfs
-    at:
-
-	/sys/devices/system/cpu/cpu$ID/regs/identification/
-	                                              \- midr
-	                                              \- revidr
-
-3. Implementation
---------------------
-
-The infrastructure is built on the emulation of the 'MRS' instruction.
-Accessing a restricted system register from an application generates an
-exception and ends up in SIGILL being delivered to the process.
-The infrastructure hooks into the exception handler and emulates the
-operation if the source belongs to the supported system register space.
-
-The infrastructure emulates only the following system register space:
-	Op0=3, Op1=0, CRn=0, CRm=0,4,5,6,7
-
-(See Table C5-6 'System instruction encodings for non-Debug System
-register accesses' in ARMv8 ARM DDI 0487A.h, for the list of
-registers).
-
-The following rules are applied to the value returned by the
-infrastructure:
-
- a) The value of an 'IMPLEMENTATION DEFINED' field is set to 0.
- b) The value of a reserved field is populated with the reserved
-    value as defined by the architecture.
- c) The value of a 'visible' field holds the system wide safe value
-    for the particular feature (except for MIDR_EL1, see section 4).
- d) All other fields (i.e, invisible fields) are set to indicate
-    the feature is missing (as defined by the architecture).
-
-4. List of registers with visible features
--------------------------------------------
-
-  1) ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
-     x--------------------------------------------------x
-     | Name                         |  bits   | visible |
-     |--------------------------------------------------|
-     | TS                           | [55-52] |    y    |
-     |--------------------------------------------------|
-     | FHM                          | [51-48] |    y    |
-     |--------------------------------------------------|
-     | DP                           | [47-44] |    y    |
-     |--------------------------------------------------|
-     | SM4                          | [43-40] |    y    |
-     |--------------------------------------------------|
-     | SM3                          | [39-36] |    y    |
-     |--------------------------------------------------|
-     | SHA3                         | [35-32] |    y    |
-     |--------------------------------------------------|
-     | RDM                          | [31-28] |    y    |
-     |--------------------------------------------------|
-     | ATOMICS                      | [23-20] |    y    |
-     |--------------------------------------------------|
-     | CRC32                        | [19-16] |    y    |
-     |--------------------------------------------------|
-     | SHA2                         | [15-12] |    y    |
-     |--------------------------------------------------|
-     | SHA1                         | [11-8]  |    y    |
-     |--------------------------------------------------|
-     | AES                          | [7-4]   |    y    |
-     x--------------------------------------------------x
-
-
-  2) ID_AA64PFR0_EL1 - Processor Feature Register 0
-     x--------------------------------------------------x
-     | Name                         |  bits   | visible |
-     |--------------------------------------------------|
-     | DIT                          | [51-48] |    y    |
-     |--------------------------------------------------|
-     | SVE                          | [35-32] |    y    |
-     |--------------------------------------------------|
-     | GIC                          | [27-24] |    n    |
-     |--------------------------------------------------|
-     | AdvSIMD                      | [23-20] |    y    |
-     |--------------------------------------------------|
-     | FP                           | [19-16] |    y    |
-     |--------------------------------------------------|
-     | EL3                          | [15-12] |    n    |
-     |--------------------------------------------------|
-     | EL2                          | [11-8]  |    n    |
-     |--------------------------------------------------|
-     | EL1                          | [7-4]   |    n    |
-     |--------------------------------------------------|
-     | EL0                          | [3-0]   |    n    |
-     x--------------------------------------------------x
-
-
-  3) MIDR_EL1 - Main ID Register
-     x--------------------------------------------------x
-     | Name                         |  bits   | visible |
-     |--------------------------------------------------|
-     | Implementer                  | [31-24] |    y    |
-     |--------------------------------------------------|
-     | Variant                      | [23-20] |    y    |
-     |--------------------------------------------------|
-     | Architecture                 | [19-16] |    y    |
-     |--------------------------------------------------|
-     | PartNum                      | [15-4]  |    y    |
-     |--------------------------------------------------|
-     | Revision                     | [3-0]   |    y    |
-     x--------------------------------------------------x
-
-   NOTE: The 'visible' fields of MIDR_EL1 will contain the value
-   as available on the CPU where it is fetched and is not a system
-   wide safe value.
-
-  4) ID_AA64ISAR1_EL1 - Instruction set attribute register 1
-
-     x--------------------------------------------------x
-     | Name                         |  bits   | visible |
-     |--------------------------------------------------|
-     | GPI                          | [31-28] |    y    |
-     |--------------------------------------------------|
-     | GPA                          | [27-24] |    y    |
-     |--------------------------------------------------|
-     | LRCPC                        | [23-20] |    y    |
-     |--------------------------------------------------|
-     | FCMA                         | [19-16] |    y    |
-     |--------------------------------------------------|
-     | JSCVT                        | [15-12] |    y    |
-     |--------------------------------------------------|
-     | API                          | [11-8]  |    y    |
-     |--------------------------------------------------|
-     | APA                          | [7-4]   |    y    |
-     |--------------------------------------------------|
-     | DPB                          | [3-0]   |    y    |
-     x--------------------------------------------------x
-
-  5) ID_AA64MMFR2_EL1 - Memory model feature register 2
-
-     x--------------------------------------------------x
-     | Name                         |  bits   | visible |
-     |--------------------------------------------------|
-     | AT                           | [35-32] |    y    |
-     x--------------------------------------------------x
-
-  6) ID_AA64ZFR0_EL1 - SVE feature ID register 0
-
-     x--------------------------------------------------x
-     | Name                         |  bits   | visible |
-     |--------------------------------------------------|
-     | SM4                          | [43-40] |    y    |
-     |--------------------------------------------------|
-     | SHA3                         | [35-32] |    y    |
-     |--------------------------------------------------|
-     | BitPerm                      | [19-16] |    y    |
-     |--------------------------------------------------|
-     | AES                          | [7-4]   |    y    |
-     |--------------------------------------------------|
-     | SVEVer                       | [3-0]   |    y    |
-     x--------------------------------------------------x
-
-Appendix I: Example
----------------------------
-
-/*
- * Sample program to demonstrate the MRS emulation ABI.
- *
- * Copyright (C) 2015-2016, ARM Ltd
- *
- * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <asm/hwcap.h>
-#include <stdio.h>
-#include <sys/auxv.h>
-
-#define get_cpu_ftr(id) ({					\
-		unsigned long __val;				\
-		asm("mrs %0, "#id : "=r" (__val));		\
-		printf("%-20s: 0x%016lx\n", #id, __val);	\
-	})
-
-int main(void)
-{
-
-	if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
-		fputs("CPUID registers unavailable\n", stderr);
-		return 1;
-	}
-
-	get_cpu_ftr(ID_AA64ISAR0_EL1);
-	get_cpu_ftr(ID_AA64ISAR1_EL1);
-	get_cpu_ftr(ID_AA64MMFR0_EL1);
-	get_cpu_ftr(ID_AA64MMFR1_EL1);
-	get_cpu_ftr(ID_AA64PFR0_EL1);
-	get_cpu_ftr(ID_AA64PFR1_EL1);
-	get_cpu_ftr(ID_AA64DFR0_EL1);
-	get_cpu_ftr(ID_AA64DFR1_EL1);
-
-	get_cpu_ftr(MIDR_EL1);
-	get_cpu_ftr(MPIDR_EL1);
-	get_cpu_ftr(REVIDR_EL1);
-
-#if 0
-	/* Unexposed register access causes SIGILL */
-	get_cpu_ftr(ID_MMFR0_EL1);
-#endif
-
-	return 0;
-}
-
-
-
diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
new file mode 100644
index 000000000000..c7cbf4b571c0
--- /dev/null
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -0,0 +1,201 @@
+================
+ARM64 ELF hwcaps
+================
+
+This document describes the usage and semantics of the arm64 ELF hwcaps.
+
+
+1. Introduction
+---------------
+
+Some hardware or software features are only available on some CPU
+implementations, and/or with certain kernel configurations, but have no
+architected discovery mechanism available to userspace code at EL0. The
+kernel exposes the presence of these features to userspace through a set
+of flags called hwcaps, exposed in the auxilliary vector.
+
+Userspace software can test for features by acquiring the AT_HWCAP or
+AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant
+flags are set, e.g.::
+
+	bool floating_point_is_present(void)
+	{
+		unsigned long hwcaps = getauxval(AT_HWCAP);
+		if (hwcaps & HWCAP_FP)
+			return true;
+
+		return false;
+	}
+
+Where software relies on a feature described by a hwcap, it should check
+the relevant hwcap flag to verify that the feature is present before
+attempting to make use of the feature.
+
+Features cannot be probed reliably through other means. When a feature
+is not available, attempting to use it may result in unpredictable
+behaviour, and is not guaranteed to result in any reliable indication
+that the feature is unavailable, such as a SIGILL.
+
+
+2. Interpretation of hwcaps
+---------------------------
+
+The majority of hwcaps are intended to indicate the presence of features
+which are described by architected ID registers inaccessible to
+userspace code at EL0. These hwcaps are defined in terms of ID register
+fields, and should be interpreted with reference to the definition of
+these fields in the ARM Architecture Reference Manual (ARM ARM).
+
+Such hwcaps are described below in the form::
+
+    Functionality implied by idreg.field == val.
+
+Such hwcaps indicate the availability of functionality that the ARM ARM
+defines as being present when idreg.field has value val, but do not
+indicate that idreg.field is precisely equal to val, nor do they
+indicate the absence of functionality implied by other values of
+idreg.field.
+
+Other hwcaps may indicate the presence of features which cannot be
+described by ID registers alone. These may be described without
+reference to ID registers, and may refer to other documentation.
+
+
+3. The hwcaps exposed in AT_HWCAP
+---------------------------------
+
+HWCAP_FP
+    Functionality implied by ID_AA64PFR0_EL1.FP == 0b0000.
+
+HWCAP_ASIMD
+    Functionality implied by ID_AA64PFR0_EL1.AdvSIMD == 0b0000.
+
+HWCAP_EVTSTRM
+    The generic timer is configured to generate events at a frequency of
+    approximately 100KHz.
+
+HWCAP_AES
+    Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0001.
+
+HWCAP_PMULL
+    Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0010.
+
+HWCAP_SHA1
+    Functionality implied by ID_AA64ISAR0_EL1.SHA1 == 0b0001.
+
+HWCAP_SHA2
+    Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0001.
+
+HWCAP_CRC32
+    Functionality implied by ID_AA64ISAR0_EL1.CRC32 == 0b0001.
+
+HWCAP_ATOMICS
+    Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0010.
+
+HWCAP_FPHP
+    Functionality implied by ID_AA64PFR0_EL1.FP == 0b0001.
+
+HWCAP_ASIMDHP
+    Functionality implied by ID_AA64PFR0_EL1.AdvSIMD == 0b0001.
+
+HWCAP_CPUID
+    EL0 access to certain ID registers is available, to the extent
+    described by Documentation/arm64/cpu-feature-registers.rst.
+
+    These ID registers may imply the availability of features.
+
+HWCAP_ASIMDRDM
+    Functionality implied by ID_AA64ISAR0_EL1.RDM == 0b0001.
+
+HWCAP_JSCVT
+    Functionality implied by ID_AA64ISAR1_EL1.JSCVT == 0b0001.
+
+HWCAP_FCMA
+    Functionality implied by ID_AA64ISAR1_EL1.FCMA == 0b0001.
+
+HWCAP_LRCPC
+    Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0001.
+
+HWCAP_DCPOP
+    Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0001.
+
+HWCAP2_DCPODP
+
+    Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
+
+HWCAP_SHA3
+    Functionality implied by ID_AA64ISAR0_EL1.SHA3 == 0b0001.
+
+HWCAP_SM3
+    Functionality implied by ID_AA64ISAR0_EL1.SM3 == 0b0001.
+
+HWCAP_SM4
+    Functionality implied by ID_AA64ISAR0_EL1.SM4 == 0b0001.
+
+HWCAP_ASIMDDP
+    Functionality implied by ID_AA64ISAR0_EL1.DP == 0b0001.
+
+HWCAP_SHA512
+    Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0010.
+
+HWCAP_SVE
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
+
+HWCAP2_SVE2
+
+    Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001.
+
+HWCAP2_SVEAES
+
+    Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
+
+HWCAP2_SVEPMULL
+
+    Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010.
+
+HWCAP2_SVEBITPERM
+
+    Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001.
+
+HWCAP2_SVESHA3
+
+    Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001.
+
+HWCAP2_SVESM4
+
+    Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
+
+HWCAP_ASIMDFHM
+   Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
+
+HWCAP_DIT
+    Functionality implied by ID_AA64PFR0_EL1.DIT == 0b0001.
+
+HWCAP_USCAT
+    Functionality implied by ID_AA64MMFR2_EL1.AT == 0b0001.
+
+HWCAP_ILRCPC
+    Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010.
+
+HWCAP_FLAGM
+    Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
+
+HWCAP_SSBS
+    Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.
+
+HWCAP_PACA
+    Functionality implied by ID_AA64ISAR1_EL1.APA == 0b0001 or
+    ID_AA64ISAR1_EL1.API == 0b0001, as described by
+    Documentation/arm64/pointer-authentication.rst.
+
+HWCAP_PACG
+    Functionality implied by ID_AA64ISAR1_EL1.GPA == 0b0001 or
+    ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
+    Documentation/arm64/pointer-authentication.rst.
+
+
+4. Unused AT_HWCAP bits
+-----------------------
+
+For interoperation with userspace, the kernel guarantees that bits 62
+and 63 of AT_HWCAP will always be returned as 0.
diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
deleted file mode 100644
index b73a2519ecf2..000000000000
--- a/Documentation/arm64/elf_hwcaps.txt
+++ /dev/null
@@ -1,231 +0,0 @@
-ARM64 ELF hwcaps
-================
-
-This document describes the usage and semantics of the arm64 ELF hwcaps.
-
-
-1. Introduction
----------------
-
-Some hardware or software features are only available on some CPU
-implementations, and/or with certain kernel configurations, but have no
-architected discovery mechanism available to userspace code at EL0. The
-kernel exposes the presence of these features to userspace through a set
-of flags called hwcaps, exposed in the auxilliary vector.
-
-Userspace software can test for features by acquiring the AT_HWCAP or
-AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant
-flags are set, e.g.
-
-bool floating_point_is_present(void)
-{
-	unsigned long hwcaps = getauxval(AT_HWCAP);
-	if (hwcaps & HWCAP_FP)
-		return true;
-
-	return false;
-}
-
-Where software relies on a feature described by a hwcap, it should check
-the relevant hwcap flag to verify that the feature is present before
-attempting to make use of the feature.
-
-Features cannot be probed reliably through other means. When a feature
-is not available, attempting to use it may result in unpredictable
-behaviour, and is not guaranteed to result in any reliable indication
-that the feature is unavailable, such as a SIGILL.
-
-
-2. Interpretation of hwcaps
----------------------------
-
-The majority of hwcaps are intended to indicate the presence of features
-which are described by architected ID registers inaccessible to
-userspace code at EL0. These hwcaps are defined in terms of ID register
-fields, and should be interpreted with reference to the definition of
-these fields in the ARM Architecture Reference Manual (ARM ARM).
-
-Such hwcaps are described below in the form:
-
-    Functionality implied by idreg.field == val.
-
-Such hwcaps indicate the availability of functionality that the ARM ARM
-defines as being present when idreg.field has value val, but do not
-indicate that idreg.field is precisely equal to val, nor do they
-indicate the absence of functionality implied by other values of
-idreg.field.
-
-Other hwcaps may indicate the presence of features which cannot be
-described by ID registers alone. These may be described without
-reference to ID registers, and may refer to other documentation.
-
-
-3. The hwcaps exposed in AT_HWCAP
----------------------------------
-
-HWCAP_FP
-
-    Functionality implied by ID_AA64PFR0_EL1.FP == 0b0000.
-
-HWCAP_ASIMD
-
-    Functionality implied by ID_AA64PFR0_EL1.AdvSIMD == 0b0000.
-
-HWCAP_EVTSTRM
-
-    The generic timer is configured to generate events at a frequency of
-    approximately 100KHz.
-
-HWCAP_AES
-
-    Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0001.
-
-HWCAP_PMULL
-
-    Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0010.
-
-HWCAP_SHA1
-
-    Functionality implied by ID_AA64ISAR0_EL1.SHA1 == 0b0001.
-
-HWCAP_SHA2
-
-    Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0001.
-
-HWCAP_CRC32
-
-    Functionality implied by ID_AA64ISAR0_EL1.CRC32 == 0b0001.
-
-HWCAP_ATOMICS
-
-    Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0010.
-
-HWCAP_FPHP
-
-    Functionality implied by ID_AA64PFR0_EL1.FP == 0b0001.
-
-HWCAP_ASIMDHP
-
-    Functionality implied by ID_AA64PFR0_EL1.AdvSIMD == 0b0001.
-
-HWCAP_CPUID
-
-    EL0 access to certain ID registers is available, to the extent
-    described by Documentation/arm64/cpu-feature-registers.txt.
-
-    These ID registers may imply the availability of features.
-
-HWCAP_ASIMDRDM
-
-    Functionality implied by ID_AA64ISAR0_EL1.RDM == 0b0001.
-
-HWCAP_JSCVT
-
-    Functionality implied by ID_AA64ISAR1_EL1.JSCVT == 0b0001.
-
-HWCAP_FCMA
-
-    Functionality implied by ID_AA64ISAR1_EL1.FCMA == 0b0001.
-
-HWCAP_LRCPC
-
-    Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0001.
-
-HWCAP_DCPOP
-
-    Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0001.
-
-HWCAP2_DCPODP
-
-    Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
-
-HWCAP_SHA3
-
-    Functionality implied by ID_AA64ISAR0_EL1.SHA3 == 0b0001.
-
-HWCAP_SM3
-
-    Functionality implied by ID_AA64ISAR0_EL1.SM3 == 0b0001.
-
-HWCAP_SM4
-
-    Functionality implied by ID_AA64ISAR0_EL1.SM4 == 0b0001.
-
-HWCAP_ASIMDDP
-
-    Functionality implied by ID_AA64ISAR0_EL1.DP == 0b0001.
-
-HWCAP_SHA512
-
-    Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0010.
-
-HWCAP_SVE
-
-    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
-
-HWCAP2_SVE2
-
-    Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001.
-
-HWCAP2_SVEAES
-
-    Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
-
-HWCAP2_SVEPMULL
-
-    Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010.
-
-HWCAP2_SVEBITPERM
-
-    Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001.
-
-HWCAP2_SVESHA3
-
-    Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001.
-
-HWCAP2_SVESM4
-
-    Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
-
-HWCAP_ASIMDFHM
-
-   Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
-
-HWCAP_DIT
-
-    Functionality implied by ID_AA64PFR0_EL1.DIT == 0b0001.
-
-HWCAP_USCAT
-
-    Functionality implied by ID_AA64MMFR2_EL1.AT == 0b0001.
-
-HWCAP_ILRCPC
-
-    Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010.
-
-HWCAP_FLAGM
-
-    Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
-
-HWCAP_SSBS
-
-    Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.
-
-HWCAP_PACA
-
-    Functionality implied by ID_AA64ISAR1_EL1.APA == 0b0001 or
-    ID_AA64ISAR1_EL1.API == 0b0001, as described by
-    Documentation/arm64/pointer-authentication.txt.
-
-HWCAP_PACG
-
-    Functionality implied by ID_AA64ISAR1_EL1.GPA == 0b0001 or
-    ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
-    Documentation/arm64/pointer-authentication.txt.
-
-
-4. Unused AT_HWCAP bits
------------------------
-
-For interoperation with userspace, the kernel guarantees that bits 62
-and 63 of AT_HWCAP will always be returned as 0.
diff --git a/Documentation/arm64/hugetlbpage.rst b/Documentation/arm64/hugetlbpage.rst
new file mode 100644
index 000000000000..b44f939e5210
--- /dev/null
+++ b/Documentation/arm64/hugetlbpage.rst
@@ -0,0 +1,41 @@
+====================
+HugeTLBpage on ARM64
+====================
+
+Hugepage relies on making efficient use of TLBs to improve performance of
+address translations. The benefit depends on both -
+
+  - the size of hugepages
+  - size of entries supported by the TLBs
+
+The ARM64 port supports two flavours of hugepages.
+
+1) Block mappings at the pud/pmd level
+--------------------------------------
+
+These are regular hugepages where a pmd or a pud page table entry points to a
+block of memory. Regardless of the supported size of entries in TLB, block
+mappings reduce the depth of page table walk needed to translate hugepage
+addresses.
+
+2) Using the Contiguous bit
+---------------------------
+
+The architecture provides a contiguous bit in the translation table entries
+(D4.5.3, ARM DDI 0487C.a) that hints to the MMU to indicate that it is one of a
+contiguous set of entries that can be cached in a single TLB entry.
+
+The contiguous bit is used in Linux to increase the mapping size at the pmd and
+pte (last) level. The number of supported contiguous entries varies by page size
+and level of the page table.
+
+
+The following hugepage sizes are supported -
+
+  ====== ========   ====    ========    ===
+  -      CONT PTE    PMD    CONT PMD    PUD
+  ====== ========   ====    ========    ===
+  4K:         64K     2M         32M     1G
+  16K:         2M    32M          1G
+  64K:         2M   512M         16G
+  ====== ========   ====    ========    ===
diff --git a/Documentation/arm64/hugetlbpage.txt b/Documentation/arm64/hugetlbpage.txt
deleted file mode 100644
index cfae87dc653b..000000000000
--- a/Documentation/arm64/hugetlbpage.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-HugeTLBpage on ARM64
-====================
-
-Hugepage relies on making efficient use of TLBs to improve performance of
-address translations. The benefit depends on both -
-
-  - the size of hugepages
-  - size of entries supported by the TLBs
-
-The ARM64 port supports two flavours of hugepages.
-
-1) Block mappings at the pud/pmd level
---------------------------------------
-
-These are regular hugepages where a pmd or a pud page table entry points to a
-block of memory. Regardless of the supported size of entries in TLB, block
-mappings reduce the depth of page table walk needed to translate hugepage
-addresses.
-
-2) Using the Contiguous bit
----------------------------
-
-The architecture provides a contiguous bit in the translation table entries
-(D4.5.3, ARM DDI 0487C.a) that hints to the MMU to indicate that it is one of a
-contiguous set of entries that can be cached in a single TLB entry.
-
-The contiguous bit is used in Linux to increase the mapping size at the pmd and
-pte (last) level. The number of supported contiguous entries varies by page size
-and level of the page table.
-
-
-The following hugepage sizes are supported -
-
-         CONT PTE    PMD    CONT PMD    PUD
-         --------    ---    --------    ---
-  4K:         64K     2M         32M     1G
-  16K:         2M    32M          1G
-  64K:         2M   512M         16G
diff --git a/Documentation/arm64/index.rst b/Documentation/arm64/index.rst
new file mode 100644
index 000000000000..018b7836ecb7
--- /dev/null
+++ b/Documentation/arm64/index.rst
@@ -0,0 +1,28 @@
+:orphan:
+
+==================
+ARM64 Architecture
+==================
+
+.. toctree::
+    :maxdepth: 1
+
+    acpi_object_usage
+    arm-acpi
+    booting
+    cpu-feature-registers
+    elf_hwcaps
+    hugetlbpage
+    legacy_instructions
+    memory
+    pointer-authentication
+    silicon-errata
+    sve
+    tagged-pointers
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/arm64/legacy_instructions.rst b/Documentation/arm64/legacy_instructions.rst
new file mode 100644
index 000000000000..54401b22cb8f
--- /dev/null
+++ b/Documentation/arm64/legacy_instructions.rst
@@ -0,0 +1,68 @@
+===================
+Legacy instructions
+===================
+
+The arm64 port of the Linux kernel provides infrastructure to support
+emulation of instructions which have been deprecated, or obsoleted in
+the architecture. The infrastructure code uses undefined instruction
+hooks to support emulation. Where available it also allows turning on
+the instruction execution in hardware.
+
+The emulation mode can be controlled by writing to sysctl nodes
+(/proc/sys/abi). The following explains the different execution
+behaviours and the corresponding values of the sysctl nodes -
+
+* Undef
+    Value: 0
+
+  Generates undefined instruction abort. Default for instructions that
+  have been obsoleted in the architecture, e.g., SWP
+
+* Emulate
+    Value: 1
+
+  Uses software emulation. To aid migration of software, in this mode
+  usage of emulated instruction is traced as well as rate limited
+  warnings are issued. This is the default for deprecated
+  instructions, .e.g., CP15 barriers
+
+* Hardware Execution
+    Value: 2
+
+  Although marked as deprecated, some implementations may support the
+  enabling/disabling of hardware support for the execution of these
+  instructions. Using hardware execution generally provides better
+  performance, but at the loss of ability to gather runtime statistics
+  about the use of the deprecated instructions.
+
+The default mode depends on the status of the instruction in the
+architecture. Deprecated instructions should default to emulation
+while obsolete instructions must be undefined by default.
+
+Note: Instruction emulation may not be possible in all cases. See
+individual instruction notes for further information.
+
+Supported legacy instructions
+-----------------------------
+* SWP{B}
+
+:Node: /proc/sys/abi/swp
+:Status: Obsolete
+:Default: Undef (0)
+
+* CP15 Barriers
+
+:Node: /proc/sys/abi/cp15_barrier
+:Status: Deprecated
+:Default: Emulate (1)
+
+* SETEND
+
+:Node: /proc/sys/abi/setend
+:Status: Deprecated
+:Default: Emulate (1)*
+
+  Note: All the cpus on the system must have mixed endian support at EL0
+  for this feature to be enabled. If a new CPU - which doesn't support mixed
+  endian - is hotplugged in after this feature has been enabled, there could
+  be unexpected results in the application.
diff --git a/Documentation/arm64/legacy_instructions.txt b/Documentation/arm64/legacy_instructions.txt
deleted file mode 100644
index 01bf3d9fac85..000000000000
--- a/Documentation/arm64/legacy_instructions.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-The arm64 port of the Linux kernel provides infrastructure to support
-emulation of instructions which have been deprecated, or obsoleted in
-the architecture. The infrastructure code uses undefined instruction
-hooks to support emulation. Where available it also allows turning on
-the instruction execution in hardware.
-
-The emulation mode can be controlled by writing to sysctl nodes
-(/proc/sys/abi). The following explains the different execution
-behaviours and the corresponding values of the sysctl nodes -
-
-* Undef
-  Value: 0
-  Generates undefined instruction abort. Default for instructions that
-  have been obsoleted in the architecture, e.g., SWP
-
-* Emulate
-  Value: 1
-  Uses software emulation. To aid migration of software, in this mode
-  usage of emulated instruction is traced as well as rate limited
-  warnings are issued. This is the default for deprecated
-  instructions, .e.g., CP15 barriers
-
-* Hardware Execution
-  Value: 2
-  Although marked as deprecated, some implementations may support the
-  enabling/disabling of hardware support for the execution of these
-  instructions. Using hardware execution generally provides better
-  performance, but at the loss of ability to gather runtime statistics
-  about the use of the deprecated instructions.
-
-The default mode depends on the status of the instruction in the
-architecture. Deprecated instructions should default to emulation
-while obsolete instructions must be undefined by default.
-
-Note: Instruction emulation may not be possible in all cases. See
-individual instruction notes for further information.
-
-Supported legacy instructions
------------------------------
-* SWP{B}
-Node: /proc/sys/abi/swp
-Status: Obsolete
-Default: Undef (0)
-
-* CP15 Barriers
-Node: /proc/sys/abi/cp15_barrier
-Status: Deprecated
-Default: Emulate (1)
-
-* SETEND
-Node: /proc/sys/abi/setend
-Status: Deprecated
-Default: Emulate (1)*
-Note: All the cpus on the system must have mixed endian support at EL0
-for this feature to be enabled. If a new CPU - which doesn't support mixed
-endian - is hotplugged in after this feature has been enabled, there could
-be unexpected results in the application.
diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
new file mode 100644
index 000000000000..464b880fc4b7
--- /dev/null
+++ b/Documentation/arm64/memory.rst
@@ -0,0 +1,98 @@
+==============================
+Memory Layout on AArch64 Linux
+==============================
+
+Author: Catalin Marinas <catalin.marinas@arm.com>
+
+This document describes the virtual memory layout used by the AArch64
+Linux kernel. The architecture allows up to 4 levels of translation
+tables with a 4KB page size and up to 3 levels with a 64KB page size.
+
+AArch64 Linux uses either 3 levels or 4 levels of translation tables
+with the 4KB page configuration, allowing 39-bit (512GB) or 48-bit
+(256TB) virtual addresses, respectively, for both user and kernel. With
+64KB pages, only 2 levels of translation tables, allowing 42-bit (4TB)
+virtual address, are used but the memory layout is the same.
+
+User addresses have bits 63:48 set to 0 while the kernel addresses have
+the same bits set to 1. TTBRx selection is given by bit 63 of the
+virtual address. The swapper_pg_dir contains only kernel (global)
+mappings while the user pgd contains only user (non-global) mappings.
+The swapper_pg_dir address is written to TTBR1 and never written to
+TTBR0.
+
+
+AArch64 Linux memory layout with 4KB pages + 3 levels::
+
+  Start			End			Size		Use
+  -----------------------------------------------------------------------
+  0000000000000000	0000007fffffffff	 512GB		user
+  ffffff8000000000	ffffffffffffffff	 512GB		kernel
+
+
+AArch64 Linux memory layout with 4KB pages + 4 levels::
+
+  Start			End			Size		Use
+  -----------------------------------------------------------------------
+  0000000000000000	0000ffffffffffff	 256TB		user
+  ffff000000000000	ffffffffffffffff	 256TB		kernel
+
+
+AArch64 Linux memory layout with 64KB pages + 2 levels::
+
+  Start			End			Size		Use
+  -----------------------------------------------------------------------
+  0000000000000000	000003ffffffffff	   4TB		user
+  fffffc0000000000	ffffffffffffffff	   4TB		kernel
+
+
+AArch64 Linux memory layout with 64KB pages + 3 levels::
+
+  Start			End			Size		Use
+  -----------------------------------------------------------------------
+  0000000000000000	0000ffffffffffff	 256TB		user
+  ffff000000000000	ffffffffffffffff	 256TB		kernel
+
+
+For details of the virtual kernel memory layout please see the kernel
+booting log.
+
+
+Translation table lookup with 4KB pages::
+
+  +--------+--------+--------+--------+--------+--------+--------+--------+
+  |63    56|55    48|47    40|39    32|31    24|23    16|15     8|7      0|
+  +--------+--------+--------+--------+--------+--------+--------+--------+
+   |                 |         |         |         |         |
+   |                 |         |         |         |         v
+   |                 |         |         |         |   [11:0]  in-page offset
+   |                 |         |         |         +-> [20:12] L3 index
+   |                 |         |         +-----------> [29:21] L2 index
+   |                 |         +---------------------> [38:30] L1 index
+   |                 +-------------------------------> [47:39] L0 index
+   +-------------------------------------------------> [63] TTBR0/1
+
+
+Translation table lookup with 64KB pages::
+
+  +--------+--------+--------+--------+--------+--------+--------+--------+
+  |63    56|55    48|47    40|39    32|31    24|23    16|15     8|7      0|
+  +--------+--------+--------+--------+--------+--------+--------+--------+
+   |                 |    |               |              |
+   |                 |    |               |              v
+   |                 |    |               |            [15:0]  in-page offset
+   |                 |    |               +----------> [28:16] L3 index
+   |                 |    +--------------------------> [41:29] L2 index
+   |                 +-------------------------------> [47:42] L1 index
+   +-------------------------------------------------> [63] TTBR0/1
+
+
+When using KVM without the Virtualization Host Extensions, the
+hypervisor maps kernel pages in EL2 at a fixed (and potentially
+random) offset from the linear mapping. See the kern_hyp_va macro and
+kvm_update_va_mask function for more details. MMIO devices such as
+GICv2 gets mapped next to the HYP idmap page, as do vectors when
+ARM64_HARDEN_EL2_VECTORS is selected for particular CPUs.
+
+When using KVM with the Virtualization Host Extensions, no additional
+mappings are created, since the host kernel runs directly in EL2.
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
deleted file mode 100644
index c5dab30d3389..000000000000
--- a/Documentation/arm64/memory.txt
+++ /dev/null
@@ -1,97 +0,0 @@
-		     Memory Layout on AArch64 Linux
-		     ==============================
-
-Author: Catalin Marinas <catalin.marinas@arm.com>
-
-This document describes the virtual memory layout used by the AArch64
-Linux kernel. The architecture allows up to 4 levels of translation
-tables with a 4KB page size and up to 3 levels with a 64KB page size.
-
-AArch64 Linux uses either 3 levels or 4 levels of translation tables
-with the 4KB page configuration, allowing 39-bit (512GB) or 48-bit
-(256TB) virtual addresses, respectively, for both user and kernel. With
-64KB pages, only 2 levels of translation tables, allowing 42-bit (4TB)
-virtual address, are used but the memory layout is the same.
-
-User addresses have bits 63:48 set to 0 while the kernel addresses have
-the same bits set to 1. TTBRx selection is given by bit 63 of the
-virtual address. The swapper_pg_dir contains only kernel (global)
-mappings while the user pgd contains only user (non-global) mappings.
-The swapper_pg_dir address is written to TTBR1 and never written to
-TTBR0.
-
-
-AArch64 Linux memory layout with 4KB pages + 3 levels:
-
-Start			End			Size		Use
------------------------------------------------------------------------
-0000000000000000	0000007fffffffff	 512GB		user
-ffffff8000000000	ffffffffffffffff	 512GB		kernel
-
-
-AArch64 Linux memory layout with 4KB pages + 4 levels:
-
-Start			End			Size		Use
------------------------------------------------------------------------
-0000000000000000	0000ffffffffffff	 256TB		user
-ffff000000000000	ffffffffffffffff	 256TB		kernel
-
-
-AArch64 Linux memory layout with 64KB pages + 2 levels:
-
-Start			End			Size		Use
------------------------------------------------------------------------
-0000000000000000	000003ffffffffff	   4TB		user
-fffffc0000000000	ffffffffffffffff	   4TB		kernel
-
-
-AArch64 Linux memory layout with 64KB pages + 3 levels:
-
-Start			End			Size		Use
------------------------------------------------------------------------
-0000000000000000	0000ffffffffffff	 256TB		user
-ffff000000000000	ffffffffffffffff	 256TB		kernel
-
-
-For details of the virtual kernel memory layout please see the kernel
-booting log.
-
-
-Translation table lookup with 4KB pages:
-
-+--------+--------+--------+--------+--------+--------+--------+--------+
-|63    56|55    48|47    40|39    32|31    24|23    16|15     8|7      0|
-+--------+--------+--------+--------+--------+--------+--------+--------+
- |                 |         |         |         |         |
- |                 |         |         |         |         v
- |                 |         |         |         |   [11:0]  in-page offset
- |                 |         |         |         +-> [20:12] L3 index
- |                 |         |         +-----------> [29:21] L2 index
- |                 |         +---------------------> [38:30] L1 index
- |                 +-------------------------------> [47:39] L0 index
- +-------------------------------------------------> [63] TTBR0/1
-
-
-Translation table lookup with 64KB pages:
-
-+--------+--------+--------+--------+--------+--------+--------+--------+
-|63    56|55    48|47    40|39    32|31    24|23    16|15     8|7      0|
-+--------+--------+--------+--------+--------+--------+--------+--------+
- |                 |    |               |              |
- |                 |    |               |              v
- |                 |    |               |            [15:0]  in-page offset
- |                 |    |               +----------> [28:16] L3 index
- |                 |    +--------------------------> [41:29] L2 index
- |                 +-------------------------------> [47:42] L1 index
- +-------------------------------------------------> [63] TTBR0/1
-
-
-When using KVM without the Virtualization Host Extensions, the
-hypervisor maps kernel pages in EL2 at a fixed (and potentially
-random) offset from the linear mapping. See the kern_hyp_va macro and
-kvm_update_va_mask function for more details. MMIO devices such as
-GICv2 gets mapped next to the HYP idmap page, as do vectors when
-ARM64_HARDEN_EL2_VECTORS is selected for particular CPUs.
-
-When using KVM with the Virtualization Host Extensions, no additional
-mappings are created, since the host kernel runs directly in EL2.
diff --git a/Documentation/arm64/pointer-authentication.rst b/Documentation/arm64/pointer-authentication.rst
new file mode 100644
index 000000000000..30b2ab06526b
--- /dev/null
+++ b/Documentation/arm64/pointer-authentication.rst
@@ -0,0 +1,109 @@
+=======================================
+Pointer authentication in AArch64 Linux
+=======================================
+
+Author: Mark Rutland <mark.rutland@arm.com>
+
+Date: 2017-07-19
+
+This document briefly describes the provision of pointer authentication
+functionality in AArch64 Linux.
+
+
+Architecture overview
+---------------------
+
+The ARMv8.3 Pointer Authentication extension adds primitives that can be
+used to mitigate certain classes of attack where an attacker can corrupt
+the contents of some memory (e.g. the stack).
+
+The extension uses a Pointer Authentication Code (PAC) to determine
+whether pointers have been modified unexpectedly. A PAC is derived from
+a pointer, another value (such as the stack pointer), and a secret key
+held in system registers.
+
+The extension adds instructions to insert a valid PAC into a pointer,
+and to verify/remove the PAC from a pointer. The PAC occupies a number
+of high-order bits of the pointer, which varies dependent on the
+configured virtual address size and whether pointer tagging is in use.
+
+A subset of these instructions have been allocated from the HINT
+encoding space. In the absence of the extension (or when disabled),
+these instructions behave as NOPs. Applications and libraries using
+these instructions operate correctly regardless of the presence of the
+extension.
+
+The extension provides five separate keys to generate PACs - two for
+instruction addresses (APIAKey, APIBKey), two for data addresses
+(APDAKey, APDBKey), and one for generic authentication (APGAKey).
+
+
+Basic support
+-------------
+
+When CONFIG_ARM64_PTR_AUTH is selected, and relevant HW support is
+present, the kernel will assign random key values to each process at
+exec*() time. The keys are shared by all threads within the process, and
+are preserved across fork().
+
+Presence of address authentication functionality is advertised via
+HWCAP_PACA, and generic authentication functionality via HWCAP_PACG.
+
+The number of bits that the PAC occupies in a pointer is 55 minus the
+virtual address size configured by the kernel. For example, with a
+virtual address size of 48, the PAC is 7 bits wide.
+
+Recent versions of GCC can compile code with APIAKey-based return
+address protection when passed the -msign-return-address option. This
+uses instructions in the HINT space (unless -march=armv8.3-a or higher
+is also passed), and such code can run on systems without the pointer
+authentication extension.
+
+In addition to exec(), keys can also be reinitialized to random values
+using the PR_PAC_RESET_KEYS prctl. A bitmask of PR_PAC_APIAKEY,
+PR_PAC_APIBKEY, PR_PAC_APDAKEY, PR_PAC_APDBKEY and PR_PAC_APGAKEY
+specifies which keys are to be reinitialized; specifying 0 means "all
+keys".
+
+
+Debugging
+---------
+
+When CONFIG_ARM64_PTR_AUTH is selected, and HW support for address
+authentication is present, the kernel will expose the position of TTBR0
+PAC bits in the NT_ARM_PAC_MASK regset (struct user_pac_mask), which
+userspace can acquire via PTRACE_GETREGSET.
+
+The regset is exposed only when HWCAP_PACA is set. Separate masks are
+exposed for data pointers and instruction pointers, as the set of PAC
+bits can vary between the two. Note that the masks apply to TTBR0
+addresses, and are not valid to apply to TTBR1 addresses (e.g. kernel
+pointers).
+
+Additionally, when CONFIG_CHECKPOINT_RESTORE is also set, the kernel
+will expose the NT_ARM_PACA_KEYS and NT_ARM_PACG_KEYS regsets (struct
+user_pac_address_keys and struct user_pac_generic_keys). These can be
+used to get and set the keys for a thread.
+
+
+Virtualization
+--------------
+
+Pointer authentication is enabled in KVM guest when each virtual cpu is
+initialised by passing flags KVM_ARM_VCPU_PTRAUTH_[ADDRESS/GENERIC] and
+requesting these two separate cpu features to be enabled. The current KVM
+guest implementation works by enabling both features together, so both
+these userspace flags are checked before enabling pointer authentication.
+The separate userspace flag will allow to have no userspace ABI changes
+if support is added in the future to allow these two features to be
+enabled independently of one another.
+
+As Arm Architecture specifies that Pointer Authentication feature is
+implemented along with the VHE feature so KVM arm64 ptrauth code relies
+on VHE mode to be present.
+
+Additionally, when these vcpu feature flags are not set then KVM will
+filter out the Pointer Authentication system key registers from
+KVM_GET/SET_REG_* ioctls and mask those features from cpufeature ID
+register. Any attempt to use the Pointer Authentication instructions will
+result in an UNDEFINED exception being injected into the guest.
diff --git a/Documentation/arm64/pointer-authentication.txt b/Documentation/arm64/pointer-authentication.txt
deleted file mode 100644
index fc71b33de87e..000000000000
--- a/Documentation/arm64/pointer-authentication.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-Pointer authentication in AArch64 Linux
-=======================================
-
-Author: Mark Rutland <mark.rutland@arm.com>
-Date: 2017-07-19
-
-This document briefly describes the provision of pointer authentication
-functionality in AArch64 Linux.
-
-
-Architecture overview
----------------------
-
-The ARMv8.3 Pointer Authentication extension adds primitives that can be
-used to mitigate certain classes of attack where an attacker can corrupt
-the contents of some memory (e.g. the stack).
-
-The extension uses a Pointer Authentication Code (PAC) to determine
-whether pointers have been modified unexpectedly. A PAC is derived from
-a pointer, another value (such as the stack pointer), and a secret key
-held in system registers.
-
-The extension adds instructions to insert a valid PAC into a pointer,
-and to verify/remove the PAC from a pointer. The PAC occupies a number
-of high-order bits of the pointer, which varies dependent on the
-configured virtual address size and whether pointer tagging is in use.
-
-A subset of these instructions have been allocated from the HINT
-encoding space. In the absence of the extension (or when disabled),
-these instructions behave as NOPs. Applications and libraries using
-these instructions operate correctly regardless of the presence of the
-extension.
-
-The extension provides five separate keys to generate PACs - two for
-instruction addresses (APIAKey, APIBKey), two for data addresses
-(APDAKey, APDBKey), and one for generic authentication (APGAKey).
-
-
-Basic support
--------------
-
-When CONFIG_ARM64_PTR_AUTH is selected, and relevant HW support is
-present, the kernel will assign random key values to each process at
-exec*() time. The keys are shared by all threads within the process, and
-are preserved across fork().
-
-Presence of address authentication functionality is advertised via
-HWCAP_PACA, and generic authentication functionality via HWCAP_PACG.
-
-The number of bits that the PAC occupies in a pointer is 55 minus the
-virtual address size configured by the kernel. For example, with a
-virtual address size of 48, the PAC is 7 bits wide.
-
-Recent versions of GCC can compile code with APIAKey-based return
-address protection when passed the -msign-return-address option. This
-uses instructions in the HINT space (unless -march=armv8.3-a or higher
-is also passed), and such code can run on systems without the pointer
-authentication extension.
-
-In addition to exec(), keys can also be reinitialized to random values
-using the PR_PAC_RESET_KEYS prctl. A bitmask of PR_PAC_APIAKEY,
-PR_PAC_APIBKEY, PR_PAC_APDAKEY, PR_PAC_APDBKEY and PR_PAC_APGAKEY
-specifies which keys are to be reinitialized; specifying 0 means "all
-keys".
-
-
-Debugging
----------
-
-When CONFIG_ARM64_PTR_AUTH is selected, and HW support for address
-authentication is present, the kernel will expose the position of TTBR0
-PAC bits in the NT_ARM_PAC_MASK regset (struct user_pac_mask), which
-userspace can acquire via PTRACE_GETREGSET.
-
-The regset is exposed only when HWCAP_PACA is set. Separate masks are
-exposed for data pointers and instruction pointers, as the set of PAC
-bits can vary between the two. Note that the masks apply to TTBR0
-addresses, and are not valid to apply to TTBR1 addresses (e.g. kernel
-pointers).
-
-Additionally, when CONFIG_CHECKPOINT_RESTORE is also set, the kernel
-will expose the NT_ARM_PACA_KEYS and NT_ARM_PACG_KEYS regsets (struct
-user_pac_address_keys and struct user_pac_generic_keys). These can be
-used to get and set the keys for a thread.
-
-
-Virtualization
---------------
-
-Pointer authentication is enabled in KVM guest when each virtual cpu is
-initialised by passing flags KVM_ARM_VCPU_PTRAUTH_[ADDRESS/GENERIC] and
-requesting these two separate cpu features to be enabled. The current KVM
-guest implementation works by enabling both features together, so both
-these userspace flags are checked before enabling pointer authentication.
-The separate userspace flag will allow to have no userspace ABI changes
-if support is added in the future to allow these two features to be
-enabled independently of one another.
-
-As Arm Architecture specifies that Pointer Authentication feature is
-implemented along with the VHE feature so KVM arm64 ptrauth code relies
-on VHE mode to be present.
-
-Additionally, when these vcpu feature flags are not set then KVM will
-filter out the Pointer Authentication system key registers from
-KVM_GET/SET_REG_* ioctls and mask those features from cpufeature ID
-register. Any attempt to use the Pointer Authentication instructions will
-result in an UNDEFINED exception being injected into the guest.
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
new file mode 100644
index 000000000000..c792774be59e
--- /dev/null
+++ b/Documentation/arm64/silicon-errata.rst
@@ -0,0 +1,131 @@
+=======================================
+Silicon Errata and Software Workarounds
+=======================================
+
+Author: Will Deacon <will.deacon@arm.com>
+
+Date  : 27 November 2015
+
+It is an unfortunate fact of life that hardware is often produced with
+so-called "errata", which can cause it to deviate from the architecture
+under specific circumstances.  For hardware produced by ARM, these
+errata are broadly classified into the following categories:
+
+  ==========  ========================================================
+  Category A  A critical error without a viable workaround.
+  Category B  A significant or critical error with an acceptable
+              workaround.
+  Category C  A minor error that is not expected to occur under normal
+              operation.
+  ==========  ========================================================
+
+For more information, consult one of the "Software Developers Errata
+Notice" documents available on infocenter.arm.com (registration
+required).
+
+As far as Linux is concerned, Category B errata may require some special
+treatment in the operating system. For example, avoiding a particular
+sequence of code, or configuring the processor in a particular way. A
+less common situation may require similar actions in order to declassify
+a Category A erratum into a Category C erratum. These are collectively
+known as "software workarounds" and are only required in the minority of
+cases (e.g. those cases that both require a non-secure workaround *and*
+can be triggered by Linux).
+
+For software workarounds that may adversely impact systems unaffected by
+the erratum in question, a Kconfig entry is added under "Kernel
+Features" -> "ARM errata workarounds via the alternatives framework".
+These are enabled by default and patched in at runtime when an affected
+CPU is detected. For less-intrusive workarounds, a Kconfig option is not
+available and the code is structured (preferably with a comment) in such
+a way that the erratum will not be hit.
+
+This approach can make it slightly onerous to determine exactly which
+errata are worked around in an arbitrary kernel source tree, so this
+file acts as a registry of software workarounds in the Linux Kernel and
+will be updated when new workarounds are committed and backported to
+stable kernels.
+
++----------------+-----------------+-----------------+-----------------------------+
+| Implementor    | Component       | Erratum ID      | Kconfig                     |
++================+=================+=================+=============================+
+| Allwinner      | A64/R18         | UNKNOWN1        | SUN50I_ERRATUM_UNKNOWN1     |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A53      | #826319         | ARM64_ERRATUM_826319        |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A53      | #827319         | ARM64_ERRATUM_827319        |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A53      | #824069         | ARM64_ERRATUM_824069        |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A53      | #819472         | ARM64_ERRATUM_819472        |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A53      | #845719         | ARM64_ERRATUM_845719        |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A53      | #843419         | ARM64_ERRATUM_843419        |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075        |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A57      | #852523         | N/A                         |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220        |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A72      | #853709         | N/A                         |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A73      | #858921         | ARM64_ERRATUM_858921        |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A55      | #1024718        | ARM64_ERRATUM_1024718       |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A76      | #1188873,1418040| ARM64_ERRATUM_1418040       |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A76      | #1165522        | ARM64_ERRATUM_1165522       |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807       |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A76      | #1463225        | ARM64_ERRATUM_1463225       |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Neoverse-N1     | #1188873,1418040| ARM64_ERRATUM_1418040       |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | MMU-500         | #841119,826419  | N/A                         |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium         | ThunderX ITS    | #22375,24313    | CAVIUM_ERRATUM_22375        |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456        |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium         | ThunderX Core   | #30115          | CAVIUM_ERRATUM_30115        |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium         | ThunderX SMMUv2 | #27704          | N/A                         |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium         | ThunderX2 SMMUv3| #74             | N/A                         |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium         | ThunderX2 SMMUv3| #126            | N/A                         |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon      | Hip0{5,6,7}     | #161010101      | HISILICON_ERRATUM_161010101 |
++----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon      | Hip0{6,7}       | #161010701      | N/A                         |
++----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon      | Hip07           | #161600802      | HISILICON_ERRATUM_161600802 |
++----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon      | Hip08 SMMU PMCG | #162001800      | N/A                         |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Kryo/Falkor v1  | E1003           | QCOM_FALKOR_ERRATUM_1003    |
++----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
++----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
++----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Falkor v{1,2}   | E1041           | QCOM_FALKOR_ERRATUM_1041    |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Fujitsu        | A64FX           | E#010001        | FUJITSU_ERRATUM_010001      |
++----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
deleted file mode 100644
index 2735462d5958..000000000000
--- a/Documentation/arm64/silicon-errata.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-                Silicon Errata and Software Workarounds
-                =======================================
-
-Author: Will Deacon <will.deacon@arm.com>
-Date  : 27 November 2015
-
-It is an unfortunate fact of life that hardware is often produced with
-so-called "errata", which can cause it to deviate from the architecture
-under specific circumstances.  For hardware produced by ARM, these
-errata are broadly classified into the following categories:
-
-  Category A: A critical error without a viable workaround.
-  Category B: A significant or critical error with an acceptable
-              workaround.
-  Category C: A minor error that is not expected to occur under normal
-              operation.
-
-For more information, consult one of the "Software Developers Errata
-Notice" documents available on infocenter.arm.com (registration
-required).
-
-As far as Linux is concerned, Category B errata may require some special
-treatment in the operating system. For example, avoiding a particular
-sequence of code, or configuring the processor in a particular way. A
-less common situation may require similar actions in order to declassify
-a Category A erratum into a Category C erratum. These are collectively
-known as "software workarounds" and are only required in the minority of
-cases (e.g. those cases that both require a non-secure workaround *and*
-can be triggered by Linux).
-
-For software workarounds that may adversely impact systems unaffected by
-the erratum in question, a Kconfig entry is added under "Kernel
-Features" -> "ARM errata workarounds via the alternatives framework".
-These are enabled by default and patched in at runtime when an affected
-CPU is detected. For less-intrusive workarounds, a Kconfig option is not
-available and the code is structured (preferably with a comment) in such
-a way that the erratum will not be hit.
-
-This approach can make it slightly onerous to determine exactly which
-errata are worked around in an arbitrary kernel source tree, so this
-file acts as a registry of software workarounds in the Linux Kernel and
-will be updated when new workarounds are committed and backported to
-stable kernels.
-
-| Implementor    | Component       | Erratum ID      | Kconfig                     |
-+----------------+-----------------+-----------------+-----------------------------+
-| Allwinner      | A64/R18         | UNKNOWN1        | SUN50I_ERRATUM_UNKNOWN1     |
-|                |                 |                 |                             |
-| ARM            | Cortex-A53      | #826319         | ARM64_ERRATUM_826319        |
-| ARM            | Cortex-A53      | #827319         | ARM64_ERRATUM_827319        |
-| ARM            | Cortex-A53      | #824069         | ARM64_ERRATUM_824069        |
-| ARM            | Cortex-A53      | #819472         | ARM64_ERRATUM_819472        |
-| ARM            | Cortex-A53      | #845719         | ARM64_ERRATUM_845719        |
-| ARM            | Cortex-A53      | #843419         | ARM64_ERRATUM_843419        |
-| ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075        |
-| ARM            | Cortex-A57      | #852523         | N/A                         |
-| ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220        |
-| ARM            | Cortex-A72      | #853709         | N/A                         |
-| ARM            | Cortex-A73      | #858921         | ARM64_ERRATUM_858921        |
-| ARM            | Cortex-A55      | #1024718        | ARM64_ERRATUM_1024718       |
-| ARM            | Cortex-A76      | #1188873,1418040| ARM64_ERRATUM_1418040       |
-| ARM            | Cortex-A76      | #1165522        | ARM64_ERRATUM_1165522       |
-| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807       |
-| ARM            | Cortex-A76      | #1463225        | ARM64_ERRATUM_1463225       |
-| ARM            | Neoverse-N1     | #1188873,1418040| ARM64_ERRATUM_1418040       |
-| ARM            | MMU-500         | #841119,826419  | N/A                         |
-|                |                 |                 |                             |
-| Cavium         | ThunderX ITS    | #22375,24313    | CAVIUM_ERRATUM_22375        |
-| Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
-| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
-| Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456        |
-| Cavium         | ThunderX Core   | #30115          | CAVIUM_ERRATUM_30115        |
-| Cavium         | ThunderX SMMUv2 | #27704          | N/A                         |
-| Cavium         | ThunderX2 SMMUv3| #74             | N/A                         |
-| Cavium         | ThunderX2 SMMUv3| #126            | N/A                         |
-|                |                 |                 |                             |
-| Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
-|                |                 |                 |                             |
-| Hisilicon      | Hip0{5,6,7}     | #161010101      | HISILICON_ERRATUM_161010101 |
-| Hisilicon      | Hip0{6,7}       | #161010701      | N/A                         |
-| Hisilicon      | Hip07           | #161600802      | HISILICON_ERRATUM_161600802 |
-| Hisilicon      | Hip08 SMMU PMCG | #162001800      | N/A                         |
-|                |                 |                 |                             |
-| Qualcomm Tech. | Kryo/Falkor v1  | E1003           | QCOM_FALKOR_ERRATUM_1003    |
-| Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
-| Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
-| Qualcomm Tech. | Falkor v{1,2}   | E1041           | QCOM_FALKOR_ERRATUM_1041    |
-| Fujitsu        | A64FX           | E#010001        | FUJITSU_ERRATUM_010001      |
diff --git a/Documentation/arm64/sve.rst b/Documentation/arm64/sve.rst
new file mode 100644
index 000000000000..38422ab249dd
--- /dev/null
+++ b/Documentation/arm64/sve.rst
@@ -0,0 +1,529 @@
+===================================================
+Scalable Vector Extension support for AArch64 Linux
+===================================================
+
+Author: Dave Martin <Dave.Martin@arm.com>
+
+Date:   4 August 2017
+
+This document outlines briefly the interface provided to userspace by Linux in
+order to support use of the ARM Scalable Vector Extension (SVE).
+
+This is an outline of the most important features and issues only and not
+intended to be exhaustive.
+
+This document does not aim to describe the SVE architecture or programmer's
+model.  To aid understanding, a minimal description of relevant programmer's
+model features for SVE is included in Appendix A.
+
+
+1.  General
+-----------
+
+* SVE registers Z0..Z31, P0..P15 and FFR and the current vector length VL, are
+  tracked per-thread.
+
+* The presence of SVE is reported to userspace via HWCAP_SVE in the aux vector
+  AT_HWCAP entry.  Presence of this flag implies the presence of the SVE
+  instructions and registers, and the Linux-specific system interfaces
+  described in this document.  SVE is reported in /proc/cpuinfo as "sve".
+
+* Support for the execution of SVE instructions in userspace can also be
+  detected by reading the CPU ID register ID_AA64PFR0_EL1 using an MRS
+  instruction, and checking that the value of the SVE field is nonzero. [3]
+
+  It does not guarantee the presence of the system interfaces described in the
+  following sections: software that needs to verify that those interfaces are
+  present must check for HWCAP_SVE instead.
+
+* On hardware that supports the SVE2 extensions, HWCAP2_SVE2 will also
+  be reported in the AT_HWCAP2 aux vector entry.  In addition to this,
+  optional extensions to SVE2 may be reported by the presence of:
+
+	HWCAP2_SVE2
+	HWCAP2_SVEAES
+	HWCAP2_SVEPMULL
+	HWCAP2_SVEBITPERM
+	HWCAP2_SVESHA3
+	HWCAP2_SVESM4
+
+  This list may be extended over time as the SVE architecture evolves.
+
+  These extensions are also reported via the CPU ID register ID_AA64ZFR0_EL1,
+  which userspace can read using an MRS instruction.  See elf_hwcaps.txt and
+  cpu-feature-registers.txt for details.
+
+* Debuggers should restrict themselves to interacting with the target via the
+  NT_ARM_SVE regset.  The recommended way of detecting support for this regset
+  is to connect to a target process first and then attempt a
+  ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
+
+
+2.  Vector length terminology
+-----------------------------
+
+The size of an SVE vector (Z) register is referred to as the "vector length".
+
+To avoid confusion about the units used to express vector length, the kernel
+adopts the following conventions:
+
+* Vector length (VL) = size of a Z-register in bytes
+
+* Vector quadwords (VQ) = size of a Z-register in units of 128 bits
+
+(So, VL = 16 * VQ.)
+
+The VQ convention is used where the underlying granularity is important, such
+as in data structure definitions.  In most other situations, the VL convention
+is used.  This is consistent with the meaning of the "VL" pseudo-register in
+the SVE instruction set architecture.
+
+
+3.  System call behaviour
+-------------------------
+
+* On syscall, V0..V31 are preserved (as without SVE).  Thus, bits [127:0] of
+  Z0..Z31 are preserved.  All other bits of Z0..Z31, and all of P0..P15 and FFR
+  become unspecified on return from a syscall.
+
+* The SVE registers are not used to pass arguments to or receive results from
+  any syscall.
+
+* In practice the affected registers/bits will be preserved or will be replaced
+  with zeros on return from a syscall, but userspace should not make
+  assumptions about this.  The kernel behaviour may vary on a case-by-case
+  basis.
+
+* All other SVE state of a thread, including the currently configured vector
+  length, the state of the PR_SVE_VL_INHERIT flag, and the deferred vector
+  length (if any), is preserved across all syscalls, subject to the specific
+  exceptions for execve() described in section 6.
+
+  In particular, on return from a fork() or clone(), the parent and new child
+  process or thread share identical SVE configuration, matching that of the
+  parent before the call.
+
+
+4.  Signal handling
+-------------------
+
+* A new signal frame record sve_context encodes the SVE registers on signal
+  delivery. [1]
+
+* This record is supplementary to fpsimd_context.  The FPSR and FPCR registers
+  are only present in fpsimd_context.  For convenience, the content of V0..V31
+  is duplicated between sve_context and fpsimd_context.
+
+* The signal frame record for SVE always contains basic metadata, in particular
+  the thread's vector length (in sve_context.vl).
+
+* The SVE registers may or may not be included in the record, depending on
+  whether the registers are live for the thread.  The registers are present if
+  and only if:
+  sve_context.head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve_context.vl)).
+
+* If the registers are present, the remainder of the record has a vl-dependent
+  size and layout.  Macros SVE_SIG_* are defined [1] to facilitate access to
+  the members.
+
+* If the SVE context is too big to fit in sigcontext.__reserved[], then extra
+  space is allocated on the stack, an extra_context record is written in
+  __reserved[] referencing this space.  sve_context is then written in the
+  extra space.  Refer to [1] for further details about this mechanism.
+
+
+5.  Signal return
+-----------------
+
+When returning from a signal handler:
+
+* If there is no sve_context record in the signal frame, or if the record is
+  present but contains no register data as desribed in the previous section,
+  then the SVE registers/bits become non-live and take unspecified values.
+
+* If sve_context is present in the signal frame and contains full register
+  data, the SVE registers become live and are populated with the specified
+  data.  However, for backward compatibility reasons, bits [127:0] of Z0..Z31
+  are always restored from the corresponding members of fpsimd_context.vregs[]
+  and not from sve_context.  The remaining bits are restored from sve_context.
+
+* Inclusion of fpsimd_context in the signal frame remains mandatory,
+  irrespective of whether sve_context is present or not.
+
+* The vector length cannot be changed via signal return.  If sve_context.vl in
+  the signal frame does not match the current vector length, the signal return
+  attempt is treated as illegal, resulting in a forced SIGSEGV.
+
+
+6.  prctl extensions
+--------------------
+
+Some new prctl() calls are added to allow programs to manage the SVE vector
+length:
+
+prctl(PR_SVE_SET_VL, unsigned long arg)
+
+    Sets the vector length of the calling thread and related flags, where
+    arg == vl | flags.  Other threads of the calling process are unaffected.
+
+    vl is the desired vector length, where sve_vl_valid(vl) must be true.
+
+    flags:
+
+	PR_SVE_SET_VL_INHERIT
+
+	    Inherit the current vector length across execve().  Otherwise, the
+	    vector length is reset to the system default at execve().  (See
+	    Section 9.)
+
+	PR_SVE_SET_VL_ONEXEC
+
+	    Defer the requested vector length change until the next execve()
+	    performed by this thread.
+
+	    The effect is equivalent to implicit exceution of the following
+	    call immediately after the next execve() (if any) by the thread:
+
+		prctl(PR_SVE_SET_VL, arg & ~PR_SVE_SET_VL_ONEXEC)
+
+	    This allows launching of a new program with a different vector
+	    length, while avoiding runtime side effects in the caller.
+
+
+	    Without PR_SVE_SET_VL_ONEXEC, the requested change takes effect
+	    immediately.
+
+
+    Return value: a nonnegative on success, or a negative value on error:
+	EINVAL: SVE not supported, invalid vector length requested, or
+	    invalid flags.
+
+
+    On success:
+
+    * Either the calling thread's vector length or the deferred vector length
+      to be applied at the next execve() by the thread (dependent on whether
+      PR_SVE_SET_VL_ONEXEC is present in arg), is set to the largest value
+      supported by the system that is less than or equal to vl.  If vl ==
+      SVE_VL_MAX, the value set will be the largest value supported by the
+      system.
+
+    * Any previously outstanding deferred vector length change in the calling
+      thread is cancelled.
+
+    * The returned value describes the resulting configuration, encoded as for
+      PR_SVE_GET_VL.  The vector length reported in this value is the new
+      current vector length for this thread if PR_SVE_SET_VL_ONEXEC was not
+      present in arg; otherwise, the reported vector length is the deferred
+      vector length that will be applied at the next execve() by the calling
+      thread.
+
+    * Changing the vector length causes all of P0..P15, FFR and all bits of
+      Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
+      unspecified.  Calling PR_SVE_SET_VL with vl equal to the thread's current
+      vector length, or calling PR_SVE_SET_VL with the PR_SVE_SET_VL_ONEXEC
+      flag, does not constitute a change to the vector length for this purpose.
+
+
+prctl(PR_SVE_GET_VL)
+
+    Gets the vector length of the calling thread.
+
+    The following flag may be OR-ed into the result:
+
+	PR_SVE_SET_VL_INHERIT
+
+	    Vector length will be inherited across execve().
+
+    There is no way to determine whether there is an outstanding deferred
+    vector length change (which would only normally be the case between a
+    fork() or vfork() and the corresponding execve() in typical use).
+
+    To extract the vector length from the result, and it with
+    PR_SVE_VL_LEN_MASK.
+
+    Return value: a nonnegative value on success, or a negative value on error:
+	EINVAL: SVE not supported.
+
+
+7.  ptrace extensions
+---------------------
+
+* A new regset NT_ARM_SVE is defined for use with PTRACE_GETREGSET and
+  PTRACE_SETREGSET.
+
+  Refer to [2] for definitions.
+
+The regset data starts with struct user_sve_header, containing:
+
+    size
+
+	Size of the complete regset, in bytes.
+	This depends on vl and possibly on other things in the future.
+
+	If a call to PTRACE_GETREGSET requests less data than the value of
+	size, the caller can allocate a larger buffer and retry in order to
+	read the complete regset.
+
+    max_size
+
+	Maximum size in bytes that the regset can grow to for the target
+	thread.  The regset won't grow bigger than this even if the target
+	thread changes its vector length etc.
+
+    vl
+
+	Target thread's current vector length, in bytes.
+
+    max_vl
+
+	Maximum possible vector length for the target thread.
+
+    flags
+
+	either
+
+	    SVE_PT_REGS_FPSIMD
+
+		SVE registers are not live (GETREGSET) or are to be made
+		non-live (SETREGSET).
+
+		The payload is of type struct user_fpsimd_state, with the same
+		meaning as for NT_PRFPREG, starting at offset
+		SVE_PT_FPSIMD_OFFSET from the start of user_sve_header.
+
+		Extra data might be appended in the future: the size of the
+		payload should be obtained using SVE_PT_FPSIMD_SIZE(vq, flags).
+
+		vq should be obtained using sve_vq_from_vl(vl).
+
+		or
+
+	    SVE_PT_REGS_SVE
+
+		SVE registers are live (GETREGSET) or are to be made live
+		(SETREGSET).
+
+		The payload contains the SVE register data, starting at offset
+		SVE_PT_SVE_OFFSET from the start of user_sve_header, and with
+		size SVE_PT_SVE_SIZE(vq, flags);
+
+	... OR-ed with zero or more of the following flags, which have the same
+	meaning and behaviour as the corresponding PR_SET_VL_* flags:
+
+	    SVE_PT_VL_INHERIT
+
+	    SVE_PT_VL_ONEXEC (SETREGSET only).
+
+* The effects of changing the vector length and/or flags are equivalent to
+  those documented for PR_SVE_SET_VL.
+
+  The caller must make a further GETREGSET call if it needs to know what VL is
+  actually set by SETREGSET, unless is it known in advance that the requested
+  VL is supported.
+
+* In the SVE_PT_REGS_SVE case, the size and layout of the payload depends on
+  the header fields.  The SVE_PT_SVE_*() macros are provided to facilitate
+  access to the members.
+
+* In either case, for SETREGSET it is permissible to omit the payload, in which
+  case only the vector length and flags are changed (along with any
+  consequences of those changes).
+
+* For SETREGSET, if an SVE_PT_REGS_SVE payload is present and the
+  requested VL is not supported, the effect will be the same as if the
+  payload were omitted, except that an EIO error is reported.  No
+  attempt is made to translate the payload data to the correct layout
+  for the vector length actually set.  The thread's FPSIMD state is
+  preserved, but the remaining bits of the SVE registers become
+  unspecified.  It is up to the caller to translate the payload layout
+  for the actual VL and retry.
+
+* The effect of writing a partial, incomplete payload is unspecified.
+
+
+8.  ELF coredump extensions
+---------------------------
+
+* A NT_ARM_SVE note will be added to each coredump for each thread of the
+  dumped process.  The contents will be equivalent to the data that would have
+  been read if a PTRACE_GETREGSET of NT_ARM_SVE were executed for each thread
+  when the coredump was generated.
+
+
+9.  System runtime configuration
+--------------------------------
+
+* To mitigate the ABI impact of expansion of the signal frame, a policy
+  mechanism is provided for administrators, distro maintainers and developers
+  to set the default vector length for userspace processes:
+
+/proc/sys/abi/sve_default_vector_length
+
+    Writing the text representation of an integer to this file sets the system
+    default vector length to the specified value, unless the value is greater
+    than the maximum vector length supported by the system in which case the
+    default vector length is set to that maximum.
+
+    The result can be determined by reopening the file and reading its
+    contents.
+
+    At boot, the default vector length is initially set to 64 or the maximum
+    supported vector length, whichever is smaller.  This determines the initial
+    vector length of the init process (PID 1).
+
+    Reading this file returns the current system default vector length.
+
+* At every execve() call, the new vector length of the new process is set to
+  the system default vector length, unless
+
+    * PR_SVE_SET_VL_INHERIT (or equivalently SVE_PT_VL_INHERIT) is set for the
+      calling thread, or
+
+    * a deferred vector length change is pending, established via the
+      PR_SVE_SET_VL_ONEXEC flag (or SVE_PT_VL_ONEXEC).
+
+* Modifying the system default vector length does not affect the vector length
+  of any existing process or thread that does not make an execve() call.
+
+
+Appendix A.  SVE programmer's model (informative)
+=================================================
+
+This section provides a minimal description of the additions made by SVE to the
+ARMv8-A programmer's model that are relevant to this document.
+
+Note: This section is for information only and not intended to be complete or
+to replace any architectural specification.
+
+A.1.  Registers
+---------------
+
+In A64 state, SVE adds the following:
+
+* 32 8VL-bit vector registers Z0..Z31
+  For each Zn, Zn bits [127:0] alias the ARMv8-A vector register Vn.
+
+  A register write using a Vn register name zeros all bits of the corresponding
+  Zn except for bits [127:0].
+
+* 16 VL-bit predicate registers P0..P15
+
+* 1 VL-bit special-purpose predicate register FFR (the "first-fault register")
+
+* a VL "pseudo-register" that determines the size of each vector register
+
+  The SVE instruction set architecture provides no way to write VL directly.
+  Instead, it can be modified only by EL1 and above, by writing appropriate
+  system registers.
+
+* The value of VL can be configured at runtime by EL1 and above:
+  16 <= VL <= VLmax, where VL must be a multiple of 16.
+
+* The maximum vector length is determined by the hardware:
+  16 <= VLmax <= 256.
+
+  (The SVE architecture specifies 256, but permits future architecture
+  revisions to raise this limit.)
+
+* FPSR and FPCR are retained from ARMv8-A, and interact with SVE floating-point
+  operations in a similar way to the way in which they interact with ARMv8
+  floating-point operations::
+
+         8VL-1                       128               0  bit index
+        +----          ////            -----------------+
+     Z0 |                               :       V0      |
+      :                                          :
+     Z7 |                               :       V7      |
+     Z8 |                               :     * V8      |
+      :                                       :  :
+    Z15 |                               :     *V15      |
+    Z16 |                               :      V16      |
+      :                                          :
+    Z31 |                               :      V31      |
+        +----          ////            -----------------+
+                                                 31    0
+         VL-1                  0                +-------+
+        +----       ////      --+          FPSR |       |
+     P0 |                       |               +-------+
+      : |                       |         *FPCR |       |
+    P15 |                       |               +-------+
+        +----       ////      --+
+    FFR |                       |               +-----+
+        +----       ////      --+            VL |     |
+                                                +-----+
+
+(*) callee-save:
+    This only applies to bits [63:0] of Z-/V-registers.
+    FPCR contains callee-save and caller-save bits.  See [4] for details.
+
+
+A.2.  Procedure call standard
+-----------------------------
+
+The ARMv8-A base procedure call standard is extended as follows with respect to
+the additional SVE register state:
+
+* All SVE register bits that are not shared with FP/SIMD are caller-save.
+
+* Z8 bits [63:0] .. Z15 bits [63:0] are callee-save.
+
+  This follows from the way these bits are mapped to V8..V15, which are caller-
+  save in the base procedure call standard.
+
+
+Appendix B.  ARMv8-A FP/SIMD programmer's model
+===============================================
+
+Note: This section is for information only and not intended to be complete or
+to replace any architectural specification.
+
+Refer to [4] for for more information.
+
+ARMv8-A defines the following floating-point / SIMD register state:
+
+* 32 128-bit vector registers V0..V31
+* 2 32-bit status/control registers FPSR, FPCR
+
+::
+
+         127           0  bit index
+        +---------------+
+     V0 |               |
+      : :               :
+     V7 |               |
+   * V8 |               |
+   :  : :               :
+   *V15 |               |
+    V16 |               |
+      : :               :
+    V31 |               |
+        +---------------+
+
+                 31    0
+                +-------+
+           FPSR |       |
+                +-------+
+          *FPCR |       |
+                +-------+
+
+(*) callee-save:
+    This only applies to bits [63:0] of V-registers.
+    FPCR contains a mixture of callee-save and caller-save bits.
+
+
+References
+==========
+
+[1] arch/arm64/include/uapi/asm/sigcontext.h
+    AArch64 Linux signal ABI definitions
+
+[2] arch/arm64/include/uapi/asm/ptrace.h
+    AArch64 Linux ptrace ABI definitions
+
+[3] Documentation/arm64/cpu-feature-registers.rst
+
+[4] ARM IHI0055C
+    http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf
+    http://infocenter.arm.com/help/topic/com.arm.doc.subset.swdev.abi/index.html
+    Procedure Call Standard for the ARM 64-bit Architecture (AArch64)
diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt
deleted file mode 100644
index 9940e924a47e..000000000000
--- a/Documentation/arm64/sve.txt
+++ /dev/null
@@ -1,525 +0,0 @@
-            Scalable Vector Extension support for AArch64 Linux
-            ===================================================
-
-Author: Dave Martin <Dave.Martin@arm.com>
-Date:   4 August 2017
-
-This document outlines briefly the interface provided to userspace by Linux in
-order to support use of the ARM Scalable Vector Extension (SVE).
-
-This is an outline of the most important features and issues only and not
-intended to be exhaustive.
-
-This document does not aim to describe the SVE architecture or programmer's
-model.  To aid understanding, a minimal description of relevant programmer's
-model features for SVE is included in Appendix A.
-
-
-1.  General
------------
-
-* SVE registers Z0..Z31, P0..P15 and FFR and the current vector length VL, are
-  tracked per-thread.
-
-* The presence of SVE is reported to userspace via HWCAP_SVE in the aux vector
-  AT_HWCAP entry.  Presence of this flag implies the presence of the SVE
-  instructions and registers, and the Linux-specific system interfaces
-  described in this document.  SVE is reported in /proc/cpuinfo as "sve".
-
-* Support for the execution of SVE instructions in userspace can also be
-  detected by reading the CPU ID register ID_AA64PFR0_EL1 using an MRS
-  instruction, and checking that the value of the SVE field is nonzero. [3]
-
-  It does not guarantee the presence of the system interfaces described in the
-  following sections: software that needs to verify that those interfaces are
-  present must check for HWCAP_SVE instead.
-
-* On hardware that supports the SVE2 extensions, HWCAP2_SVE2 will also
-  be reported in the AT_HWCAP2 aux vector entry.  In addition to this,
-  optional extensions to SVE2 may be reported by the presence of:
-
-	HWCAP2_SVE2
-	HWCAP2_SVEAES
-	HWCAP2_SVEPMULL
-	HWCAP2_SVEBITPERM
-	HWCAP2_SVESHA3
-	HWCAP2_SVESM4
-
-  This list may be extended over time as the SVE architecture evolves.
-
-  These extensions are also reported via the CPU ID register ID_AA64ZFR0_EL1,
-  which userspace can read using an MRS instruction.  See elf_hwcaps.txt and
-  cpu-feature-registers.txt for details.
-
-* Debuggers should restrict themselves to interacting with the target via the
-  NT_ARM_SVE regset.  The recommended way of detecting support for this regset
-  is to connect to a target process first and then attempt a
-  ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
-
-
-2.  Vector length terminology
------------------------------
-
-The size of an SVE vector (Z) register is referred to as the "vector length".
-
-To avoid confusion about the units used to express vector length, the kernel
-adopts the following conventions:
-
-* Vector length (VL) = size of a Z-register in bytes
-
-* Vector quadwords (VQ) = size of a Z-register in units of 128 bits
-
-(So, VL = 16 * VQ.)
-
-The VQ convention is used where the underlying granularity is important, such
-as in data structure definitions.  In most other situations, the VL convention
-is used.  This is consistent with the meaning of the "VL" pseudo-register in
-the SVE instruction set architecture.
-
-
-3.  System call behaviour
--------------------------
-
-* On syscall, V0..V31 are preserved (as without SVE).  Thus, bits [127:0] of
-  Z0..Z31 are preserved.  All other bits of Z0..Z31, and all of P0..P15 and FFR
-  become unspecified on return from a syscall.
-
-* The SVE registers are not used to pass arguments to or receive results from
-  any syscall.
-
-* In practice the affected registers/bits will be preserved or will be replaced
-  with zeros on return from a syscall, but userspace should not make
-  assumptions about this.  The kernel behaviour may vary on a case-by-case
-  basis.
-
-* All other SVE state of a thread, including the currently configured vector
-  length, the state of the PR_SVE_VL_INHERIT flag, and the deferred vector
-  length (if any), is preserved across all syscalls, subject to the specific
-  exceptions for execve() described in section 6.
-
-  In particular, on return from a fork() or clone(), the parent and new child
-  process or thread share identical SVE configuration, matching that of the
-  parent before the call.
-
-
-4.  Signal handling
--------------------
-
-* A new signal frame record sve_context encodes the SVE registers on signal
-  delivery. [1]
-
-* This record is supplementary to fpsimd_context.  The FPSR and FPCR registers
-  are only present in fpsimd_context.  For convenience, the content of V0..V31
-  is duplicated between sve_context and fpsimd_context.
-
-* The signal frame record for SVE always contains basic metadata, in particular
-  the thread's vector length (in sve_context.vl).
-
-* The SVE registers may or may not be included in the record, depending on
-  whether the registers are live for the thread.  The registers are present if
-  and only if:
-  sve_context.head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve_context.vl)).
-
-* If the registers are present, the remainder of the record has a vl-dependent
-  size and layout.  Macros SVE_SIG_* are defined [1] to facilitate access to
-  the members.
-
-* If the SVE context is too big to fit in sigcontext.__reserved[], then extra
-  space is allocated on the stack, an extra_context record is written in
-  __reserved[] referencing this space.  sve_context is then written in the
-  extra space.  Refer to [1] for further details about this mechanism.
-
-
-5.  Signal return
------------------
-
-When returning from a signal handler:
-
-* If there is no sve_context record in the signal frame, or if the record is
-  present but contains no register data as desribed in the previous section,
-  then the SVE registers/bits become non-live and take unspecified values.
-
-* If sve_context is present in the signal frame and contains full register
-  data, the SVE registers become live and are populated with the specified
-  data.  However, for backward compatibility reasons, bits [127:0] of Z0..Z31
-  are always restored from the corresponding members of fpsimd_context.vregs[]
-  and not from sve_context.  The remaining bits are restored from sve_context.
-
-* Inclusion of fpsimd_context in the signal frame remains mandatory,
-  irrespective of whether sve_context is present or not.
-
-* The vector length cannot be changed via signal return.  If sve_context.vl in
-  the signal frame does not match the current vector length, the signal return
-  attempt is treated as illegal, resulting in a forced SIGSEGV.
-
-
-6.  prctl extensions
---------------------
-
-Some new prctl() calls are added to allow programs to manage the SVE vector
-length:
-
-prctl(PR_SVE_SET_VL, unsigned long arg)
-
-    Sets the vector length of the calling thread and related flags, where
-    arg == vl | flags.  Other threads of the calling process are unaffected.
-
-    vl is the desired vector length, where sve_vl_valid(vl) must be true.
-
-    flags:
-
-	PR_SVE_SET_VL_INHERIT
-
-	    Inherit the current vector length across execve().  Otherwise, the
-	    vector length is reset to the system default at execve().  (See
-	    Section 9.)
-
-	PR_SVE_SET_VL_ONEXEC
-
-	    Defer the requested vector length change until the next execve()
-	    performed by this thread.
-
-	    The effect is equivalent to implicit exceution of the following
-	    call immediately after the next execve() (if any) by the thread:
-
-		prctl(PR_SVE_SET_VL, arg & ~PR_SVE_SET_VL_ONEXEC)
-
-	    This allows launching of a new program with a different vector
-	    length, while avoiding runtime side effects in the caller.
-
-
-	    Without PR_SVE_SET_VL_ONEXEC, the requested change takes effect
-	    immediately.
-
-
-    Return value: a nonnegative on success, or a negative value on error:
-	EINVAL: SVE not supported, invalid vector length requested, or
-	    invalid flags.
-
-
-    On success:
-
-    * Either the calling thread's vector length or the deferred vector length
-      to be applied at the next execve() by the thread (dependent on whether
-      PR_SVE_SET_VL_ONEXEC is present in arg), is set to the largest value
-      supported by the system that is less than or equal to vl.  If vl ==
-      SVE_VL_MAX, the value set will be the largest value supported by the
-      system.
-
-    * Any previously outstanding deferred vector length change in the calling
-      thread is cancelled.
-
-    * The returned value describes the resulting configuration, encoded as for
-      PR_SVE_GET_VL.  The vector length reported in this value is the new
-      current vector length for this thread if PR_SVE_SET_VL_ONEXEC was not
-      present in arg; otherwise, the reported vector length is the deferred
-      vector length that will be applied at the next execve() by the calling
-      thread.
-
-    * Changing the vector length causes all of P0..P15, FFR and all bits of
-      Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
-      unspecified.  Calling PR_SVE_SET_VL with vl equal to the thread's current
-      vector length, or calling PR_SVE_SET_VL with the PR_SVE_SET_VL_ONEXEC
-      flag, does not constitute a change to the vector length for this purpose.
-
-
-prctl(PR_SVE_GET_VL)
-
-    Gets the vector length of the calling thread.
-
-    The following flag may be OR-ed into the result:
-
-	PR_SVE_SET_VL_INHERIT
-
-	    Vector length will be inherited across execve().
-
-    There is no way to determine whether there is an outstanding deferred
-    vector length change (which would only normally be the case between a
-    fork() or vfork() and the corresponding execve() in typical use).
-
-    To extract the vector length from the result, and it with
-    PR_SVE_VL_LEN_MASK.
-
-    Return value: a nonnegative value on success, or a negative value on error:
-	EINVAL: SVE not supported.
-
-
-7.  ptrace extensions
----------------------
-
-* A new regset NT_ARM_SVE is defined for use with PTRACE_GETREGSET and
-  PTRACE_SETREGSET.
-
-  Refer to [2] for definitions.
-
-The regset data starts with struct user_sve_header, containing:
-
-    size
-
-	Size of the complete regset, in bytes.
-	This depends on vl and possibly on other things in the future.
-
-	If a call to PTRACE_GETREGSET requests less data than the value of
-	size, the caller can allocate a larger buffer and retry in order to
-	read the complete regset.
-
-    max_size
-
-	Maximum size in bytes that the regset can grow to for the target
-	thread.  The regset won't grow bigger than this even if the target
-	thread changes its vector length etc.
-
-    vl
-
-	Target thread's current vector length, in bytes.
-
-    max_vl
-
-	Maximum possible vector length for the target thread.
-
-    flags
-
-	either
-
-	    SVE_PT_REGS_FPSIMD
-
-		SVE registers are not live (GETREGSET) or are to be made
-		non-live (SETREGSET).
-
-		The payload is of type struct user_fpsimd_state, with the same
-		meaning as for NT_PRFPREG, starting at offset
-		SVE_PT_FPSIMD_OFFSET from the start of user_sve_header.
-
-		Extra data might be appended in the future: the size of the
-		payload should be obtained using SVE_PT_FPSIMD_SIZE(vq, flags).
-
-		vq should be obtained using sve_vq_from_vl(vl).
-
-		or
-
-	    SVE_PT_REGS_SVE
-
-		SVE registers are live (GETREGSET) or are to be made live
-		(SETREGSET).
-
-		The payload contains the SVE register data, starting at offset
-		SVE_PT_SVE_OFFSET from the start of user_sve_header, and with
-		size SVE_PT_SVE_SIZE(vq, flags);
-
-	... OR-ed with zero or more of the following flags, which have the same
-	meaning and behaviour as the corresponding PR_SET_VL_* flags:
-
-	    SVE_PT_VL_INHERIT
-
-	    SVE_PT_VL_ONEXEC (SETREGSET only).
-
-* The effects of changing the vector length and/or flags are equivalent to
-  those documented for PR_SVE_SET_VL.
-
-  The caller must make a further GETREGSET call if it needs to know what VL is
-  actually set by SETREGSET, unless is it known in advance that the requested
-  VL is supported.
-
-* In the SVE_PT_REGS_SVE case, the size and layout of the payload depends on
-  the header fields.  The SVE_PT_SVE_*() macros are provided to facilitate
-  access to the members.
-
-* In either case, for SETREGSET it is permissible to omit the payload, in which
-  case only the vector length and flags are changed (along with any
-  consequences of those changes).
-
-* For SETREGSET, if an SVE_PT_REGS_SVE payload is present and the
-  requested VL is not supported, the effect will be the same as if the
-  payload were omitted, except that an EIO error is reported.  No
-  attempt is made to translate the payload data to the correct layout
-  for the vector length actually set.  The thread's FPSIMD state is
-  preserved, but the remaining bits of the SVE registers become
-  unspecified.  It is up to the caller to translate the payload layout
-  for the actual VL and retry.
-
-* The effect of writing a partial, incomplete payload is unspecified.
-
-
-8.  ELF coredump extensions
----------------------------
-
-* A NT_ARM_SVE note will be added to each coredump for each thread of the
-  dumped process.  The contents will be equivalent to the data that would have
-  been read if a PTRACE_GETREGSET of NT_ARM_SVE were executed for each thread
-  when the coredump was generated.
-
-
-9.  System runtime configuration
---------------------------------
-
-* To mitigate the ABI impact of expansion of the signal frame, a policy
-  mechanism is provided for administrators, distro maintainers and developers
-  to set the default vector length for userspace processes:
-
-/proc/sys/abi/sve_default_vector_length
-
-    Writing the text representation of an integer to this file sets the system
-    default vector length to the specified value, unless the value is greater
-    than the maximum vector length supported by the system in which case the
-    default vector length is set to that maximum.
-
-    The result can be determined by reopening the file and reading its
-    contents.
-
-    At boot, the default vector length is initially set to 64 or the maximum
-    supported vector length, whichever is smaller.  This determines the initial
-    vector length of the init process (PID 1).
-
-    Reading this file returns the current system default vector length.
-
-* At every execve() call, the new vector length of the new process is set to
-  the system default vector length, unless
-
-    * PR_SVE_SET_VL_INHERIT (or equivalently SVE_PT_VL_INHERIT) is set for the
-      calling thread, or
-
-    * a deferred vector length change is pending, established via the
-      PR_SVE_SET_VL_ONEXEC flag (or SVE_PT_VL_ONEXEC).
-
-* Modifying the system default vector length does not affect the vector length
-  of any existing process or thread that does not make an execve() call.
-
-
-Appendix A.  SVE programmer's model (informative)
-=================================================
-
-This section provides a minimal description of the additions made by SVE to the
-ARMv8-A programmer's model that are relevant to this document.
-
-Note: This section is for information only and not intended to be complete or
-to replace any architectural specification.
-
-A.1.  Registers
----------------
-
-In A64 state, SVE adds the following:
-
-* 32 8VL-bit vector registers Z0..Z31
-  For each Zn, Zn bits [127:0] alias the ARMv8-A vector register Vn.
-
-  A register write using a Vn register name zeros all bits of the corresponding
-  Zn except for bits [127:0].
-
-* 16 VL-bit predicate registers P0..P15
-
-* 1 VL-bit special-purpose predicate register FFR (the "first-fault register")
-
-* a VL "pseudo-register" that determines the size of each vector register
-
-  The SVE instruction set architecture provides no way to write VL directly.
-  Instead, it can be modified only by EL1 and above, by writing appropriate
-  system registers.
-
-* The value of VL can be configured at runtime by EL1 and above:
-  16 <= VL <= VLmax, where VL must be a multiple of 16.
-
-* The maximum vector length is determined by the hardware:
-  16 <= VLmax <= 256.
-
-  (The SVE architecture specifies 256, but permits future architecture
-  revisions to raise this limit.)
-
-* FPSR and FPCR are retained from ARMv8-A, and interact with SVE floating-point
-  operations in a similar way to the way in which they interact with ARMv8
-  floating-point operations.
-
-         8VL-1                       128               0  bit index
-        +----          ////            -----------------+
-     Z0 |                               :       V0      |
-      :                                          :
-     Z7 |                               :       V7      |
-     Z8 |                               :     * V8      |
-      :                                       :  :
-    Z15 |                               :     *V15      |
-    Z16 |                               :      V16      |
-      :                                          :
-    Z31 |                               :      V31      |
-        +----          ////            -----------------+
-                                                 31    0
-         VL-1                  0                +-------+
-        +----       ////      --+          FPSR |       |
-     P0 |                       |               +-------+
-      : |                       |         *FPCR |       |
-    P15 |                       |               +-------+
-        +----       ////      --+
-    FFR |                       |               +-----+
-        +----       ////      --+            VL |     |
-                                                +-----+
-
-(*) callee-save:
-    This only applies to bits [63:0] of Z-/V-registers.
-    FPCR contains callee-save and caller-save bits.  See [4] for details.
-
-
-A.2.  Procedure call standard
------------------------------
-
-The ARMv8-A base procedure call standard is extended as follows with respect to
-the additional SVE register state:
-
-* All SVE register bits that are not shared with FP/SIMD are caller-save.
-
-* Z8 bits [63:0] .. Z15 bits [63:0] are callee-save.
-
-  This follows from the way these bits are mapped to V8..V15, which are caller-
-  save in the base procedure call standard.
-
-
-Appendix B.  ARMv8-A FP/SIMD programmer's model
-===============================================
-
-Note: This section is for information only and not intended to be complete or
-to replace any architectural specification.
-
-Refer to [4] for for more information.
-
-ARMv8-A defines the following floating-point / SIMD register state:
-
-* 32 128-bit vector registers V0..V31
-* 2 32-bit status/control registers FPSR, FPCR
-
-         127           0  bit index
-        +---------------+
-     V0 |               |
-      : :               :
-     V7 |               |
-   * V8 |               |
-   :  : :               :
-   *V15 |               |
-    V16 |               |
-      : :               :
-    V31 |               |
-        +---------------+
-
-                 31    0
-                +-------+
-           FPSR |       |
-                +-------+
-          *FPCR |       |
-                +-------+
-
-(*) callee-save:
-    This only applies to bits [63:0] of V-registers.
-    FPCR contains a mixture of callee-save and caller-save bits.
-
-
-References
-==========
-
-[1] arch/arm64/include/uapi/asm/sigcontext.h
-    AArch64 Linux signal ABI definitions
-
-[2] arch/arm64/include/uapi/asm/ptrace.h
-    AArch64 Linux ptrace ABI definitions
-
-[3] Documentation/arm64/cpu-feature-registers.txt
-
-[4] ARM IHI0055C
-    http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf
-    http://infocenter.arm.com/help/topic/com.arm.doc.subset.swdev.abi/index.html
-    Procedure Call Standard for the ARM 64-bit Architecture (AArch64)
diff --git a/Documentation/arm64/tagged-pointers.rst b/Documentation/arm64/tagged-pointers.rst
new file mode 100644
index 000000000000..2acdec3ebbeb
--- /dev/null
+++ b/Documentation/arm64/tagged-pointers.rst
@@ -0,0 +1,68 @@
+=========================================
+Tagged virtual addresses in AArch64 Linux
+=========================================
+
+Author: Will Deacon <will.deacon@arm.com>
+
+Date  : 12 June 2013
+
+This document briefly describes the provision of tagged virtual
+addresses in the AArch64 translation system and their potential uses
+in AArch64 Linux.
+
+The kernel configures the translation tables so that translations made
+via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of
+the virtual address ignored by the translation hardware. This frees up
+this byte for application use.
+
+
+Passing tagged addresses to the kernel
+--------------------------------------
+
+All interpretation of userspace memory addresses by the kernel assumes
+an address tag of 0x00.
+
+This includes, but is not limited to, addresses found in:
+
+ - pointer arguments to system calls, including pointers in structures
+   passed to system calls,
+
+ - the stack pointer (sp), e.g. when interpreting it to deliver a
+   signal,
+
+ - the frame pointer (x29) and frame records, e.g. when interpreting
+   them to generate a backtrace or call graph.
+
+Using non-zero address tags in any of these locations may result in an
+error code being returned, a (fatal) signal being raised, or other modes
+of failure.
+
+For these reasons, passing non-zero address tags to the kernel via
+system calls is forbidden, and using a non-zero address tag for sp is
+strongly discouraged.
+
+Programs maintaining a frame pointer and frame records that use non-zero
+address tags may suffer impaired or inaccurate debug and profiling
+visibility.
+
+
+Preserving tags
+---------------
+
+Non-zero tags are not preserved when delivering signals. This means that
+signal handlers in applications making use of tags cannot rely on the
+tag information for user virtual addresses being maintained for fields
+inside siginfo_t. One exception to this rule is for signals raised in
+response to watchpoint debug exceptions, where the tag information will
+be preserved.
+
+The architecture prevents the use of a tagged PC, so the upper byte will
+be set to a sign-extension of bit 55 on exception return.
+
+
+Other considerations
+--------------------
+
+Special care should be taken when using tagged pointers, since it is
+likely that C compilers will not hazard two virtual addresses differing
+only in the upper byte.
diff --git a/Documentation/arm64/tagged-pointers.txt b/Documentation/arm64/tagged-pointers.txt
deleted file mode 100644
index a25a99e82bb1..000000000000
--- a/Documentation/arm64/tagged-pointers.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-		Tagged virtual addresses in AArch64 Linux
-		=========================================
-
-Author: Will Deacon <will.deacon@arm.com>
-Date  : 12 June 2013
-
-This document briefly describes the provision of tagged virtual
-addresses in the AArch64 translation system and their potential uses
-in AArch64 Linux.
-
-The kernel configures the translation tables so that translations made
-via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of
-the virtual address ignored by the translation hardware. This frees up
-this byte for application use.
-
-
-Passing tagged addresses to the kernel
---------------------------------------
-
-All interpretation of userspace memory addresses by the kernel assumes
-an address tag of 0x00.
-
-This includes, but is not limited to, addresses found in:
-
- - pointer arguments to system calls, including pointers in structures
-   passed to system calls,
-
- - the stack pointer (sp), e.g. when interpreting it to deliver a
-   signal,
-
- - the frame pointer (x29) and frame records, e.g. when interpreting
-   them to generate a backtrace or call graph.
-
-Using non-zero address tags in any of these locations may result in an
-error code being returned, a (fatal) signal being raised, or other modes
-of failure.
-
-For these reasons, passing non-zero address tags to the kernel via
-system calls is forbidden, and using a non-zero address tag for sp is
-strongly discouraged.
-
-Programs maintaining a frame pointer and frame records that use non-zero
-address tags may suffer impaired or inaccurate debug and profiling
-visibility.
-
-
-Preserving tags
----------------
-
-Non-zero tags are not preserved when delivering signals. This means that
-signal handlers in applications making use of tags cannot rely on the
-tag information for user virtual addresses being maintained for fields
-inside siginfo_t. One exception to this rule is for signals raised in
-response to watchpoint debug exceptions, where the tag information will
-be preserved.
-
-The architecture prevents the use of a tagged PC, so the upper byte will
-be set to a sign-extension of bit 55 on exception return.
-
-
-Other considerations
---------------------
-
-Special care should be taken when using tagged pointers, since it is
-likely that C compilers will not hazard two virtual addresses differing
-only in the upper byte.
diff --git a/Documentation/translations/zh_CN/arm64/booting.txt b/Documentation/translations/zh_CN/arm64/booting.txt
index c1dd968c5ee9..3bfbf66e5a5e 100644
--- a/Documentation/translations/zh_CN/arm64/booting.txt
+++ b/Documentation/translations/zh_CN/arm64/booting.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm64/booting.txt
+Chinese translated version of Documentation/arm64/booting.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -10,7 +10,7 @@ M:	Will Deacon <will.deacon@arm.com>
 zh_CN:	Fu Wei <wefu@redhat.com>
 C:	55f058e7574c3615dea4615573a19bdb258696c6
 ---------------------------------------------------------------------
-Documentation/arm64/booting.txt 的中文翻译
+Documentation/arm64/booting.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/arm64/legacy_instructions.txt b/Documentation/translations/zh_CN/arm64/legacy_instructions.txt
index 68362a1ab717..e295cf75f606 100644
--- a/Documentation/translations/zh_CN/arm64/legacy_instructions.txt
+++ b/Documentation/translations/zh_CN/arm64/legacy_instructions.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm64/legacy_instructions.txt
+Chinese translated version of Documentation/arm64/legacy_instructions.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -10,7 +10,7 @@ Maintainer: Punit Agrawal <punit.agrawal@arm.com>
             Suzuki K. Poulose <suzuki.poulose@arm.com>
 Chinese maintainer: Fu Wei <wefu@redhat.com>
 ---------------------------------------------------------------------
-Documentation/arm64/legacy_instructions.txt 的中文翻译
+Documentation/arm64/legacy_instructions.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/arm64/memory.txt b/Documentation/translations/zh_CN/arm64/memory.txt
index 19b3a52d5d94..be20f8228b91 100644
--- a/Documentation/translations/zh_CN/arm64/memory.txt
+++ b/Documentation/translations/zh_CN/arm64/memory.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm64/memory.txt
+Chinese translated version of Documentation/arm64/memory.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -9,7 +9,7 @@ or if there is a problem with the translation.
 Maintainer: Catalin Marinas <catalin.marinas@arm.com>
 Chinese maintainer: Fu Wei <wefu@redhat.com>
 ---------------------------------------------------------------------
-Documentation/arm64/memory.txt 的中文翻译
+Documentation/arm64/memory.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/arm64/silicon-errata.txt b/Documentation/translations/zh_CN/arm64/silicon-errata.txt
index 39477c75c4a4..440c59ac7dce 100644
--- a/Documentation/translations/zh_CN/arm64/silicon-errata.txt
+++ b/Documentation/translations/zh_CN/arm64/silicon-errata.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm64/silicon-errata.txt
+Chinese translated version of Documentation/arm64/silicon-errata.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -10,7 +10,7 @@ M:	Will Deacon <will.deacon@arm.com>
 zh_CN:	Fu Wei <wefu@redhat.com>
 C:	1926e54f115725a9248d0c4c65c22acaf94de4c4
 ---------------------------------------------------------------------
-Documentation/arm64/silicon-errata.txt 的中文翻译
+Documentation/arm64/silicon-errata.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/arm64/tagged-pointers.txt b/Documentation/translations/zh_CN/arm64/tagged-pointers.txt
index 2664d1bd5a1c..77ac3548a16d 100644
--- a/Documentation/translations/zh_CN/arm64/tagged-pointers.txt
+++ b/Documentation/translations/zh_CN/arm64/tagged-pointers.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm64/tagged-pointers.txt
+Chinese translated version of Documentation/arm64/tagged-pointers.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -9,7 +9,7 @@ or if there is a problem with the translation.
 Maintainer: Will Deacon <will.deacon@arm.com>
 Chinese maintainer: Fu Wei <wefu@redhat.com>
 ---------------------------------------------------------------------
-Documentation/arm64/tagged-pointers.txt 的中文翻译
+Documentation/arm64/tagged-pointers.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index ba6c42c576dd..68984c284c40 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2205,7 +2205,7 @@ max_vq.  This is the maximum vector length available to the guest on
 this vcpu, and determines which register slices are visible through
 this ioctl interface.
 
-(See Documentation/arm64/sve.txt for an explanation of the "vq"
+(See Documentation/arm64/sve.rst for an explanation of the "vq"
 nomenclature.)
 
 KVM_REG_ARM64_SVE_VLS is only accessible after KVM_ARM_VCPU_INIT.
-- 
cgit 


From e327cfcb25422c91f4bb8e8a3488386ac95955f1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:39 -0300
Subject: docs: cdrom-standard.tex: convert from LaTeX to ReST

This is the only LaTeX documentation file inside the documentation.

Instead of having a Latex document directly there, convert
it to ReST format, as this is the format we're using for docs.

For now, let's keep the extension as .txt in order to avoid
warnings when building the documentation with Sphinx.

The next patch patch will rename it to .rst and add it to the
building system.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/cdrom/Makefile           |   21 -
 Documentation/cdrom/cdrom-standard.tex | 1026 ------------------------------
 Documentation/cdrom/cdrom-standard.txt | 1063 ++++++++++++++++++++++++++++++++
 3 files changed, 1063 insertions(+), 1047 deletions(-)
 delete mode 100644 Documentation/cdrom/Makefile
 delete mode 100644 Documentation/cdrom/cdrom-standard.tex
 create mode 100644 Documentation/cdrom/cdrom-standard.txt

(limited to 'Documentation')

diff --git a/Documentation/cdrom/Makefile b/Documentation/cdrom/Makefile
deleted file mode 100644
index a19e321928e1..000000000000
--- a/Documentation/cdrom/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-LATEXFILE = cdrom-standard
-
-all:
-	make clean
-	latex $(LATEXFILE)
-	latex $(LATEXFILE)
-	@if [ -x `which gv` ]; then \
-		`dvips -q -t letter -o $(LATEXFILE).ps $(LATEXFILE).dvi` ;\
-		`gv -antialias -media letter -nocenter $(LATEXFILE).ps` ;\
-	else \
-		`xdvi $(LATEXFILE).dvi &` ;\
-	fi
-	make sortofclean
-
-clean:
-	rm -f $(LATEXFILE).ps $(LATEXFILE).dvi $(LATEXFILE).aux $(LATEXFILE).log 
-
-sortofclean:
-	rm -f $(LATEXFILE).aux $(LATEXFILE).log 
-
-
diff --git a/Documentation/cdrom/cdrom-standard.tex b/Documentation/cdrom/cdrom-standard.tex
deleted file mode 100644
index f7cd455973f7..000000000000
--- a/Documentation/cdrom/cdrom-standard.tex
+++ /dev/null
@@ -1,1026 +0,0 @@
-\documentclass{article}
-\def\version{$Id: cdrom-standard.tex,v 1.9 1997/12/28 15:42:49 david Exp $}
-\newcommand{\newsection}[1]{\newpage\section{#1}}
-
-\evensidemargin=0pt
-\oddsidemargin=0pt
-\topmargin=-\headheight \advance\topmargin by -\headsep
-\textwidth=15.99cm \textheight=24.62cm % normal A4, 1'' margin
-
-\def\linux{{\sc Linux}}
-\def\cdrom{{\sc cd-rom}}
-\def\UCD{{\sc Uniform cd-rom Driver}}
-\def\cdromc{{\tt {cdrom.c}}}
-\def\cdromh{{\tt {cdrom.h}}}
-\def\fo{\sl}                    % foreign words
-\def\ie{{\fo i.e.}}
-\def\eg{{\fo e.g.}}
-
-\everymath{\it} \everydisplay{\it}
-\catcode `\_=\active \def_{\_\penalty100 }
-\catcode`\<=\active \def<#1>{{\langle\hbox{\rm#1}\rangle}}
-
-\begin{document}
-\title{A \linux\ \cdrom\ standard}
-\author{David van Leeuwen\\{\normalsize\tt david@ElseWare.cistron.nl}
-\\{\footnotesize updated by Erik Andersen {\tt(andersee@debian.org)}}
-\\{\footnotesize updated by Jens Axboe {\tt(axboe@image.dk)}}}
-\date{12 March 1999}
-
-\maketitle
-
-\newsection{Introduction}
-
-\linux\ is probably the Unix-like operating system that supports
-the widest variety of hardware devices. The reasons for this are
-presumably 
-\begin{itemize} 
-\item 
-  The large list of hardware devices available for the many platforms
-  that \linux\ now supports (\ie, i386-PCs, Sparc Suns, etc.)
-\item 
-  The open design of the operating system, such that anybody can write a
-  driver for \linux.
-\item 
-  There is plenty of source code around as examples of how to write a driver.
-\end{itemize}
-The openness of \linux, and the many different types of available
-hardware has allowed \linux\ to support many different hardware devices.
-Unfortunately, the very openness that has allowed \linux\ to support
-all these different devices has also allowed the behavior of each
-device driver to differ significantly from one device to another.
-This divergence of behavior has been very significant for \cdrom\
-devices; the way a particular drive reacts to a `standard' $ioctl()$
-call varies greatly from one device driver to another. To avoid making
-their drivers totally inconsistent, the writers of \linux\ \cdrom\
-drivers generally created new device drivers by understanding, copying,
-and then changing an existing one. Unfortunately, this practice did not
-maintain uniform behavior across all the \linux\ \cdrom\ drivers. 
-
-This document describes an effort to establish Uniform behavior across
-all the different \cdrom\ device drivers for \linux. This document also
-defines the various $ioctl$s, and how the low-level \cdrom\ device
-drivers should implement them. Currently (as of the \linux\ 2.1.$x$
-development kernels) several low-level \cdrom\ device drivers, including
-both IDE/ATAPI and SCSI, now use this Uniform interface.
-
-When the \cdrom\ was developed, the interface between the \cdrom\ drive
-and the computer was not specified in the standards. As a result, many
-different \cdrom\ interfaces were developed. Some of them had their
-own proprietary design (Sony, Mitsumi, Panasonic, Philips), other
-manufacturers adopted an existing electrical interface and changed
-the functionality (CreativeLabs/SoundBlaster, Teac, Funai) or simply
-adapted their drives to one or more of the already existing electrical
-interfaces (Aztech, Sanyo, Funai, Vertos, Longshine, Optics Storage and
-most of the `NoName' manufacturers). In cases where a new drive really
-brought its own interface or used its own command set and flow control
-scheme, either a separate driver had to be written, or an existing
-driver had to be enhanced. History has delivered us \cdrom\ support for
-many of these different interfaces. Nowadays, almost all new \cdrom\
-drives are either IDE/ATAPI or SCSI, and it is very unlikely that any
-manufacturer will create a new interface. Even finding drives for the
-old proprietary interfaces is getting difficult.
-
-When (in the 1.3.70's) I looked at the existing software interface,
-which was expressed through \cdromh, it appeared to be a rather wild
-set of commands and data formats.\footnote{I cannot recollect what
-kernel version I looked at, then, presumably 1.2.13 and 1.3.34---the
-latest kernel that I was indirectly involved in.} It seemed that many
-features of the software interface had been added to accommodate the
-capabilities of a particular drive, in an {\fo ad hoc\/} manner. More
-importantly, it appeared that the behavior of the `standard' commands
-was different for most of the different drivers: \eg, some drivers
-close the tray if an $open()$ call occurs when the tray is open, while
-others do not. Some drivers lock the door upon opening the device, to
-prevent an incoherent file system, but others don't, to allow software
-ejection. Undoubtedly, the capabilities of the different drives vary,
-but even when two drives have the same capability their drivers'
-behavior was usually different.
-
-I decided to start a discussion on how to make all the \linux\ \cdrom\
-drivers behave more uniformly. I began by contacting the developers of
-the many \cdrom\ drivers found in the \linux\ kernel. Their reactions
-encouraged me to write the \UCD\ which this document is intended to
-describe. The implementation of the \UCD\ is in the file \cdromc. This
-driver is intended to be an additional software layer that sits on top
-of the low-level device drivers for each \cdrom\ drive. By adding this
-additional layer, it is possible to have all the different \cdrom\
-devices behave {\em exactly\/} the same (insofar as the underlying
-hardware will allow).
-
-The goal of the \UCD\ is {\em not\/} to alienate driver developers who
-have not yet taken steps to support this effort. The goal of \UCD\ is
-simply to give people writing application programs for \cdrom\ drives
-{\em one\/} \linux\ \cdrom\ interface with consistent behavior for all
-\cdrom\ devices. In addition, this also provides a consistent interface
-between the low-level device driver code and the \linux\ kernel. Care
-is taken that 100\,\% compatibility exists with the data structures and
-programmer's interface defined in \cdromh. This guide was written to
-help \cdrom\ driver developers adapt their code to use the \UCD\ code
-defined in \cdromc.
-
-Personally, I think that the most important hardware interfaces are
-the IDE/ATAPI drives and, of course, the SCSI drives, but as prices
-of hardware drop continuously, it is also likely that people may have
-more than one \cdrom\ drive, possibly of mixed types. It is important
-that these drives behave in the same way. In December 1994, one of the
-cheapest \cdrom\ drives was a Philips cm206, a double-speed proprietary
-drive. In the months that I was busy writing a \linux\ driver for it,
-proprietary drives became obsolete and IDE/ATAPI drives became the
-standard. At the time of the last update to this document (November
-1997) it is becoming difficult to even {\em find} anything less than a
-16 speed \cdrom\ drive, and 24 speed drives are common.
-
-\newsection{Standardizing through another software level}
-\label{cdrom.c}
-
-At the time this document was conceived, all drivers directly
-implemented the \cdrom\ $ioctl()$ calls through their own routines. This
-led to the danger of different drivers forgetting to do important things
-like checking that the user was giving the driver valid data. More
-importantly, this led to the divergence of behavior, which has already
-been discussed.
-
-For this reason, the \UCD\ was created to enforce consistent \cdrom\
-drive behavior, and to provide a common set of services to the various
-low-level \cdrom\ device drivers. The \UCD\ now provides another
-software-level, that separates the $ioctl()$ and $open()$ implementation
-from the actual hardware implementation. Note that this effort has
-made few changes which will affect a user's application programs. The
-greatest change involved moving the contents of the various low-level
-\cdrom\ drivers' header files to the kernel's cdrom directory. This was
-done to help ensure that the user is only presented with only one cdrom
-interface, the interface defined in \cdromh.
-
-\cdrom\ drives are specific enough (\ie, different from other
-block-devices such as floppy or hard disc drives), to define a set
-of common {\em \cdrom\ device operations}, $<cdrom-device>_dops$.
-These operations are different from the classical block-device file
-operations, $<block-device>_fops$.
-
-The routines for the \UCD\ interface level are implemented in the file
-\cdromc. In this file, the \UCD\ interfaces with the kernel as a block
-device by registering the following general $struct\ file_operations$:
-$$
-\halign{$#$\ \hfil&$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
-struct& file_operations\ cdrom_fops = \{\hidewidth\cr
-        &NULL,                  & lseek \cr
-        &block_read,            & read---general block-dev read \cr
-        &block_write,           & write---general block-dev write \cr
-        &NULL,                  & readdir \cr
-        &NULL,                  & select \cr
-        &cdrom_ioctl,           & ioctl \cr
-        &NULL,                  & mmap \cr
-        &cdrom_open,            & open \cr
-        &cdrom_release,         & release \cr
-        &NULL,                  & fsync \cr
-        &NULL,                  & fasync \cr
-        &cdrom_media_changed,   & media change \cr
-        &NULL                   & revalidate \cr
-\};\cr
-}
-$$ 
-
-Every active \cdrom\ device shares this $struct$. The routines
-declared above are all implemented in \cdromc, since this file is the
-place where the behavior of all \cdrom-devices is defined and
-standardized. The actual interface to the various types of \cdrom\ 
-hardware is still performed by various low-level \cdrom-device
-drivers. These routines simply implement certain {\em capabilities\/}
-that are common to all \cdrom\ (and really, all removable-media
-devices).
-
-Registration of a low-level \cdrom\ device driver is now done through
-the general routines in \cdromc, not through the Virtual File System
-(VFS) any more. The interface implemented in \cdromc\ is carried out
-through two general structures that contain information about the
-capabilities of the driver, and the specific drives on which the
-driver operates. The structures are:
-\begin{description}
-\item[$cdrom_device_ops$] 
-  This structure contains information about the low-level driver for a
-  \cdrom\ device. This structure is conceptually connected to the major
-  number of the device (although some drivers may have different
-  major numbers, as is the case for the IDE driver).
-\item[$cdrom_device_info$] 
-  This structure contains information about a particular \cdrom\ drive,
-  such as its device name, speed, etc. This structure is conceptually
-  connected to the minor number of the device.
-\end{description}
-
-Registering a particular \cdrom\ drive with the \UCD\ is done by the
-low-level device driver though a call to:
-$$register_cdrom(struct\ cdrom_device_info * <device>_info)  
-$$
-The device information structure, $<device>_info$, contains all the
-information needed for the kernel to interface with the low-level
-\cdrom\ device driver. One of the most important entries in this
-structure is a pointer to the $cdrom_device_ops$ structure of the
-low-level driver.
-
-The device operations structure, $cdrom_device_ops$, contains a list
-of pointers to the functions which are implemented in the low-level
-device driver. When \cdromc\ accesses a \cdrom\ device, it does it
-through the functions in this structure. It is impossible to know all
-the capabilities of future \cdrom\ drives, so it is expected that this
-list may need to be expanded from time to time as new technologies are
-developed. For example, CD-R and CD-R/W drives are beginning to become
-popular, and support will soon need to be added for them. For now, the
-current $struct$ is:
-$$
-\halign{$#$\ \hfil&$#$\ \hfil&\hbox to 10em{$#$\hss}&
-  $/*$ \rm# $*/$\hfil\cr
-struct& cdrom_device_ops\ \{ \hidewidth\cr
-  &int& (* open)(struct\ cdrom_device_info *, int)\cr
-  &void& (* release)(struct\ cdrom_device_info *);\cr 
-  &int& (* drive_status)(struct\ cdrom_device_info *, int);\cr     
-  &unsigned\ int& (* check_events)(struct\ cdrom_device_info *, unsigned\ int, int);\cr
-  &int& (* media_changed)(struct\ cdrom_device_info *, int);\cr 
-  &int& (* tray_move)(struct\ cdrom_device_info *, int);\cr
-  &int& (* lock_door)(struct\ cdrom_device_info *, int);\cr
-  &int& (* select_speed)(struct\ cdrom_device_info *, int);\cr
-  &int& (* select_disc)(struct\ cdrom_device_info *, int);\cr
-  &int& (* get_last_session) (struct\ cdrom_device_info *, 
-        struct\ cdrom_multisession *{});\cr
-  &int& (* get_mcn)(struct\ cdrom_device_info *, struct\ cdrom_mcn *{});\cr
-  &int& (* reset)(struct\ cdrom_device_info *);\cr
-  &int& (* audio_ioctl)(struct\ cdrom_device_info *, unsigned\ int, 
-        void *{});\cr 
-\noalign{\medskip}
-  &const\ int& capability;& capability flags \cr
-  &int& (* generic_packet)(struct\ cdrom_device_info *, struct\ packet_command *{});\cr
-\};\cr
-}
-$$
-When a low-level device driver implements one of these capabilities,
-it should add a function pointer to this $struct$. When a particular
-function is not implemented, however, this $struct$ should contain a
-NULL instead. The $capability$ flags specify the capabilities of the
-\cdrom\ hardware and/or low-level \cdrom\ driver when a \cdrom\ drive
-is registered with the \UCD.
-
-Note that most functions have fewer parameters than their
-$blkdev_fops$ counterparts. This is because very little of the
-information in the structures $inode$ and $file$ is used. For most
-drivers, the main parameter is the $struct$ $cdrom_device_info$, from
-which the major and minor number can be extracted. (Most low-level
-\cdrom\ drivers don't even look at the major and minor number though,
-since many of them only support one device.) This will be available
-through $dev$ in $cdrom_device_info$ described below.
-
-The drive-specific, minor-like information that is registered with
-\cdromc, currently contains the following fields:
-$$
-\halign{$#$\ \hfil&$#$\ \hfil&\hbox to 10em{$#$\hss}&
-  $/*$ \rm# $*/$\hfil\cr
-struct& cdrom_device_info\ \{ \hidewidth\cr
-  & const\ struct\ cdrom_device_ops *& ops;& device operations for this major\cr
-  & struct\ list_head& list;& linked list of all device_info\cr
-  & struct\ gendisk *& disk;& matching block layer disk\cr
-  & void *&  handle;& driver-dependent data\cr
-\noalign{\medskip}
-  & int& mask;& mask of capability: disables them \cr
-  & int& speed;& maximum speed for reading data \cr
-  & int& capacity;& number of discs in a jukebox \cr
-\noalign{\medskip}
-  &unsigned\ int& options : 30;& options flags \cr
-  &unsigned& mc_flags : 2;& media-change buffer flags \cr
-  &unsigned\ int& vfs_events;& cached events for vfs path\cr
-  &unsigned\ int& ioctl_events;& cached events for ioctl path\cr
-  & int& use_count;& number of times device is opened\cr
-  & char& name[20];& name of the device type\cr
-\noalign{\medskip}
-  &__u8& sanyo_slot : 2;& Sanyo 3-CD changer support\cr
-  &__u8& keeplocked : 1;& CDROM_LOCKDOOR status\cr
-  &__u8& reserved : 5;& not used yet\cr
-  & int& cdda_method;& see CDDA_* flags\cr
-  &__u8& last_sense;& saves last sense key\cr
-  &__u8& media_written;& dirty flag, DVD+RW bookkeeping\cr
-  &unsigned\ short& mmc3_profile;& current MMC3 profile\cr
-  & int& for_data;& unknown:TBD\cr
-  & int\ (* exit)\ (struct\ cdrom_device_info *);&& unknown:TBD\cr
-  & int& mrw_mode_page;& which MRW mode page is in use\cr
-\}\cr
-}$$
-Using this $struct$, a linked list of the registered minor devices is
-built, using the $next$ field. The device number, the device operations
-struct and specifications of properties of the drive are stored in this
-structure.
-
-The $mask$ flags can be used to mask out some of the capabilities listed
-in $ops\to capability$, if a specific drive doesn't support a feature
-of the driver. The value $speed$ specifies the maximum head-rate of the
-drive, measured in units of normal audio speed (176\,kB/sec raw data or
-150\,kB/sec file system data).  The parameters are declared $const$
-because they describe properties of the drive, which don't change after
-registration.
-
-A few registers contain variables local to the \cdrom\ drive. The
-flags $options$ are used to specify how the general \cdrom\ routines
-should behave. These various flags registers should provide enough
-flexibility to adapt to the different users' wishes (and {\em not\/} the
-`arbitrary' wishes of the author of the low-level device driver, as is
-the case in the old scheme). The register $mc_flags$ is used to buffer
-the information from $media_changed()$ to two separate queues. Other
-data that is specific to a minor drive, can be accessed through $handle$,
-which can point to a data structure specific to the low-level driver.
-The fields $use_count$, $next$, $options$ and $mc_flags$ need not be
-initialized.
-
-The intermediate software layer that \cdromc\ forms will perform some
-additional bookkeeping. The use count of the device (the number of
-processes that have the device opened) is registered in $use_count$. The
-function $cdrom_ioctl()$ will verify the appropriate user-memory regions
-for read and write, and in case a location on the CD is transferred,
-it will `sanitize' the format by making requests to the low-level
-drivers in a standard format, and translating all formats between the
-user-software and low level drivers. This relieves much of the drivers'
-memory checking and format checking and translation. Also, the necessary
-structures will be declared on the program stack.
-
-The implementation of the functions should be as defined in the
-following sections. Two functions {\em must\/} be implemented, namely
-$open()$ and $release()$. Other functions may be omitted, their
-corresponding capability flags will be cleared upon registration.
-Generally, a function returns zero on success and negative on error. A
-function call should return only after the command has completed, but of
-course waiting for the device should not use processor time.
-
-\subsection{$Int\ open(struct\ cdrom_device_info * cdi, int\ purpose)$}
-
-$Open()$ should try to open the device for a specific $purpose$, which
-can be either:
-\begin{itemize}
-\item[0] Open for reading data, as done by {\tt {mount()}} (2), or the
-user commands {\tt {dd}} or {\tt {cat}}.  
-\item[1] Open for $ioctl$ commands, as done by audio-CD playing
-programs.
-\end{itemize}
-Notice that any strategic code (closing tray upon $open()$, etc.)\ is
-done by the calling routine in \cdromc, so the low-level routine
-should only be concerned with proper initialization, such as spinning
-up the disc, etc. % and device-use count
-
-
-\subsection{$Void\ release(struct\ cdrom_device_info * cdi)$}
-
-
-Device-specific actions should be taken such as spinning down the device.
-However, strategic actions such as ejection of the tray, or unlocking
-the door, should be left over to the general routine $cdrom_release()$.
-This is the only function returning type $void$.
-
-\subsection{$Int\ drive_status(struct\ cdrom_device_info * cdi, int\ slot_nr)$}
-\label{drive status}
-
-The function $drive_status$, if implemented, should provide
-information on the status of the drive (not the status of the disc,
-which may or may not be in the drive). If the drive is not a changer,
-$slot_nr$ should be ignored. In \cdromh\ the possibilities are listed: 
-$$
-\halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
-CDS_NO_INFO& no information available\cr
-CDS_NO_DISC& no disc is inserted, tray is closed\cr
-CDS_TRAY_OPEN& tray is opened\cr
-CDS_DRIVE_NOT_READY& something is wrong, tray is moving?\cr
-CDS_DISC_OK& a disc is loaded and everything is fine\cr
-}
-$$
-
-\subsection{$Int\ media_changed(struct\ cdrom_device_info * cdi, int\ disc_nr)$}
-
-This function is very similar to the original function in $struct\ 
-file_operations$. It returns 1 if the medium of the device $cdi\to
-dev$ has changed since the last call, and 0 otherwise. The parameter
-$disc_nr$ identifies a specific slot in a juke-box, it should be
-ignored for single-disc drives.  Note that by `re-routing' this
-function through $cdrom_media_changed()$, we can implement separate
-queues for the VFS and a new $ioctl()$ function that can report device
-changes to software (\eg, an auto-mounting daemon).
-
-\subsection{$Int\ tray_move(struct\ cdrom_device_info * cdi, int\ position)$}
-
-This function, if implemented, should control the tray movement. (No
-other function should control this.) The parameter $position$ controls
-the desired direction of movement:
-\begin{itemize}
-\item[0] Close tray
-\item[1] Open tray
-\end{itemize}
-This function returns 0 upon success, and a non-zero value upon
-error. Note that if the tray is already in the desired position, no
-action need be taken, and the return value should be 0. 
-
-\subsection{$Int\ lock_door(struct\ cdrom_device_info * cdi, int\ lock)$}
-
-This function (and no other code) controls locking of the door, if the
-drive allows this. The value of $lock$ controls the desired locking
-state:
-\begin{itemize}
-\item[0] Unlock door, manual opening is allowed
-\item[1] Lock door, tray cannot be ejected manually
-\end{itemize}
-This function returns 0 upon success, and a non-zero value upon
-error. Note that if the door is already in the requested state, no
-action need be taken, and the return value should be 0. 
-
-\subsection{$Int\ select_speed(struct\ cdrom_device_info * cdi, int\ speed)$}
-
-Some \cdrom\ drives are capable of changing their head-speed. There
-are several reasons for changing the speed of a \cdrom\ drive. Badly
-pressed \cdrom s may benefit from less-than-maximum head rate. Modern
-\cdrom\ drives can obtain very high head rates (up to $24\times$ is
-common).  It has been reported that these drives can make reading
-errors at these high speeds, reducing the speed can prevent data loss
-in these circumstances.  Finally, some of these drives can
-make an annoyingly loud noise, which a lower speed may reduce. %Finally,
-%although the audio-low-pass filters probably aren't designed for it,
-%more than real-time playback of audio might be used for high-speed
-%copying of audio tracks.
-
-This function specifies the speed at which data is read or audio is
-played back. The value of $speed$ specifies the head-speed of the
-drive, measured in units of standard cdrom speed (176\,kB/sec raw data
-or 150\,kB/sec file system data). So to request that a \cdrom\ drive
-operate at 300\,kB/sec you would call the CDROM_SELECT_SPEED $ioctl$
-with $speed=2$. The special value `0' means `auto-selection', \ie,
-maximum data-rate or real-time audio rate. If the drive doesn't have
-this `auto-selection' capability, the decision should be made on the
-current disc loaded and the return value should be positive. A negative
-return value indicates an error.
-
-\subsection{$Int\ select_disc(struct\ cdrom_device_info * cdi, int\ number)$}
-
-If the drive can store multiple discs (a juke-box) this function
-will perform disc selection. It should return the number of the
-selected disc on success, a negative value on error. Currently, only
-the ide-cd driver supports this functionality.
-
-\subsection{$Int\ get_last_session(struct\ cdrom_device_info * cdi, struct\
-  cdrom_multisession * ms_info)$}
-
-This function should implement the old corresponding $ioctl()$. For
-device $cdi\to dev$, the start of the last session of the current disc
-should be returned in the pointer argument $ms_info$. Note that
-routines in \cdromc\ have sanitized this argument: its requested
-format will {\em always\/} be of the type $CDROM_LBA$ (linear block
-addressing mode), whatever the calling software requested. But
-sanitization goes even further: the low-level implementation may
-return the requested information in $CDROM_MSF$ format if it wishes so
-(setting the $ms_info\rightarrow addr_format$ field appropriately, of
-course) and the routines in \cdromc\ will make the transformation if
-necessary. The return value is 0 upon success.
-
-\subsection{$Int\ get_mcn(struct\ cdrom_device_info * cdi, struct\
-  cdrom_mcn * mcn)$}
-
-Some discs carry a `Media Catalog Number' (MCN), also called
-`Universal Product Code' (UPC). This number should reflect the number
-that is generally found in the bar-code on the product. Unfortunately,
-the few discs that carry such a number on the disc don't even use the
-same format. The return argument to this function is a pointer to a
-pre-declared memory region of type $struct\ cdrom_mcn$. The MCN is
-expected as a 13-character string, terminated by a null-character.
-
-\subsection{$Int\ reset(struct\ cdrom_device_info * cdi)$}
-
-This call should perform a hard-reset on the drive (although in
-circumstances that a hard-reset is necessary, a drive may very well not
-listen to commands anymore). Preferably, control is returned to the
-caller only after the drive has finished resetting. If the drive is no
-longer listening, it may be wise for the underlying low-level cdrom
-driver to time out.
-
-\subsection{$Int\ audio_ioctl(struct\ cdrom_device_info * cdi, unsigned\
-  int\ cmd, void * arg)$}
-
-Some of the \cdrom-$ioctl$s defined in \cdromh\ can be
-implemented by the routines described above, and hence the function
-$cdrom_ioctl$ will use those. However, most $ioctl$s deal with
-audio-control. We have decided to leave these to be accessed through a
-single function, repeating the arguments $cmd$ and $arg$. Note that
-the latter is of type $void*{}$, rather than $unsigned\ long\
-int$. The routine $cdrom_ioctl()$ does do some useful things,
-though. It sanitizes the address format type to $CDROM_MSF$ (Minutes,
-Seconds, Frames) for all audio calls. It also verifies the memory
-location of $arg$, and reserves stack-memory for the argument. This
-makes implementation of the $audio_ioctl()$ much simpler than in the
-old driver scheme. For example, you may look up the function
-$cm206_audio_ioctl()$ in {\tt {cm206.c}} that should be updated with
-this documentation. 
-
-An unimplemented ioctl should return $-ENOSYS$, but a harmless request
-(\eg, $CDROMSTART$) may be ignored by returning 0 (success). Other
-errors should be according to the standards, whatever they are. When
-an error is returned by the low-level driver, the \UCD\ tries whenever
-possible to return the error code to the calling program. (We may decide
-to sanitize the return value in $cdrom_ioctl()$ though, in order to
-guarantee a uniform interface to the audio-player software.)
-
-\subsection{$Int\ dev_ioctl(struct\ cdrom_device_info * cdi, unsigned\ int\
-  cmd, unsigned\ long\ arg)$}
-
-Some $ioctl$s seem to be specific to certain \cdrom\ drives. That is,
-they are introduced to service some capabilities of certain drives. In
-fact, there are 6 different $ioctl$s for reading data, either in some
-particular kind of format, or audio data. Not many drives support
-reading audio tracks as data, I believe this is because of protection
-of copyrights of artists. Moreover, I think that if audio-tracks are
-supported, it should be done through the VFS and not via $ioctl$s. A
-problem here could be the fact that audio-frames are 2352 bytes long,
-so either the audio-file-system should ask for 75264 bytes at once
-(the least common multiple of 512 and 2352), or the drivers should
-bend their backs to cope with this incoherence (to which I would be
-opposed).  Furthermore, it is very difficult for the hardware to find
-the exact frame boundaries, since there are no synchronization headers
-in audio frames.  Once these issues are resolved, this code should be
-standardized in \cdromc.
-
-Because there are so many $ioctl$s that seem to be introduced to
-satisfy certain drivers,\footnote{Is there software around that
-  actually uses these? I'd be interested!} any `non-standard' $ioctl$s
-are routed through the call $dev_ioctl()$. In principle, `private'
-$ioctl$s should be numbered after the device's major number, and not
-the general \cdrom\ $ioctl$ number, {\tt {0x53}}. Currently the
-non-supported $ioctl$s are: {\it CDROMREADMODE1, CDROMREADMODE2,
-  CDROMREADAUDIO, CDROMREADRAW, CDROMREADCOOKED, CDROMSEEK,
-  CDROMPLAY\-BLK and CDROM\-READALL}.
-
-
-\subsection{\cdrom\ capabilities}
-\label{capability}
-
-Instead of just implementing some $ioctl$ calls, the interface in
-\cdromc\ supplies the possibility to indicate the {\em capabilities\/}
-of a \cdrom\ drive. This can be done by ORing any number of
-capability-constants that are defined in \cdromh\ at the registration
-phase. Currently, the capabilities are any of:
-$$
-\halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
-CDC_CLOSE_TRAY& can close tray by software control\cr
-CDC_OPEN_TRAY& can open tray\cr
-CDC_LOCK& can lock and unlock the door\cr
-CDC_SELECT_SPEED& can select speed, in units of $\sim$150\,kB/s\cr
-CDC_SELECT_DISC& drive is juke-box\cr
-CDC_MULTI_SESSION& can read sessions $>\rm1$\cr
-CDC_MCN& can read Media Catalog Number\cr
-CDC_MEDIA_CHANGED& can report if disc has changed\cr
-CDC_PLAY_AUDIO& can perform audio-functions (play, pause, etc)\cr
-CDC_RESET& hard reset device\cr
-CDC_IOCTLS& driver has non-standard ioctls\cr
-CDC_DRIVE_STATUS& driver implements drive status\cr
-}
-$$
-The capability flag is declared $const$, to prevent drivers from
-accidentally tampering with the contents. The capability fags actually
-inform \cdromc\ of what the driver can do. If the drive found
-by the driver does not have the capability, is can be masked out by
-the $cdrom_device_info$ variable $mask$. For instance, the SCSI \cdrom\
-driver has implemented the code for loading and ejecting \cdrom's, and
-hence its corresponding flags in $capability$ will be set. But a SCSI
-\cdrom\ drive might be a caddy system, which can't load the tray, and
-hence for this drive the $cdrom_device_info$ struct will have set
-the $CDC_CLOSE_TRAY$ bit in $mask$.
-
-In the file \cdromc\ you will encounter many constructions of the type
-$$\it
-if\ (cdo\rightarrow capability \mathrel\& \mathord{\sim} cdi\rightarrow mask 
-   \mathrel{\&} CDC_<capability>) \ldots
-$$
-There is no $ioctl$ to set the mask\dots The reason is that
-I think it is better to control the {\em behavior\/} rather than the
-{\em capabilities}.
-
-\subsection{Options}
-
-A final flag register controls the {\em behavior\/} of the \cdrom\
-drives, in order to satisfy different users' wishes, hopefully
-independently of the ideas of the respective author who happened to
-have made the drive's support available to the \linux\ community. The
-current behavior options are:
-$$
-\halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
-CDO_AUTO_CLOSE& try to close tray upon device $open()$\cr
-CDO_AUTO_EJECT& try to open tray on last device $close()$\cr
-CDO_USE_FFLAGS& use $file_pointer\rightarrow f_flags$ to indicate
- purpose for $open()$\cr
-CDO_LOCK& try to lock door if device is opened\cr
-CDO_CHECK_TYPE& ensure disc type is data if opened for data\cr
-}
-$$
-
-The initial value of this register is $CDO_AUTO_CLOSE \mathrel|
-CDO_USE_FFLAGS \mathrel| CDO_LOCK$, reflecting my own view on user
-interface and software standards. Before you protest, there are two
-new $ioctl$s implemented in \cdromc, that allow you to control the
-behavior by software. These are:
-$$
-\halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
-CDROM_SET_OPTIONS& set options specified in $(int)\ arg$\cr
-CDROM_CLEAR_OPTIONS& clear options specified in $(int)\ arg$\cr
-}
-$$
-One option needs some more explanation: $CDO_USE_FFLAGS$. In the next
-newsection we explain what the need for this option is.
-
-A software package {\tt setcd}, available from the Debian distribution
-and {\tt sunsite.unc.edu}, allows user level control of these flags. 
-
-\newsection{The need to know the purpose of opening the \cdrom\ device}
-
-Traditionally, Unix devices can be used in two different `modes',
-either by reading/writing to the device file, or by issuing
-controlling commands to the device, by the device's $ioctl()$
-call. The problem with \cdrom\ drives, is that they can be used for
-two entirely different purposes. One is to mount removable
-file systems, \cdrom s, the other is to play audio CD's. Audio commands
-are implemented entirely through $ioctl$s, presumably because the
-first implementation (SUN?) has been such. In principle there is
-nothing wrong with this, but a good control of the `CD player' demands
-that the device can {\em always\/} be opened in order to give the
-$ioctl$ commands, regardless of the state the drive is in. 
-
-On the other hand, when used as a removable-media disc drive (what the
-original purpose of \cdrom s is) we would like to make sure that the
-disc drive is ready for operation upon opening the device. In the old
-scheme, some \cdrom\ drivers don't do any integrity checking, resulting
-in a number of i/o errors reported by the VFS to the kernel when an
-attempt for mounting a \cdrom\ on an empty drive occurs. This is not a
-particularly elegant way to find out that there is no \cdrom\ inserted;
-it more-or-less looks like the old IBM-PC trying to read an empty floppy
-drive for a couple of seconds, after which the system complains it
-can't read from it. Nowadays we can {\em sense\/} the existence of a
-removable medium in a drive, and we believe we should exploit that
-fact. An integrity check on opening of the device, that verifies the
-availability of a \cdrom\ and its correct type (data), would be
-desirable.
-
-These two ways of using a \cdrom\ drive, principally for data and
-secondarily for playing audio discs, have different demands for the
-behavior of the $open()$ call. Audio use simply wants to open the
-device in order to get a file handle which is needed for issuing
-$ioctl$ commands, while data use wants to open for correct and
-reliable data transfer. The only way user programs can indicate what
-their {\em purpose\/} of opening the device is, is through the $flags$
-parameter (see {\tt {open(2)}}). For \cdrom\ devices, these flags aren't
-implemented (some drivers implement checking for write-related flags,
-but this is not strictly necessary if the device file has correct
-permission flags). Most option flags simply don't make sense to
-\cdrom\ devices: $O_CREAT$, $O_NOCTTY$, $O_TRUNC$, $O_APPEND$, and
-$O_SYNC$ have no meaning to a \cdrom. 
-
-We therefore propose to use the flag $O_NONBLOCK$ to indicate
-that the device is opened just for issuing $ioctl$
-commands. Strictly, the meaning of $O_NONBLOCK$ is that opening and
-subsequent calls to the device don't cause the calling process to
-wait. We could interpret this as ``don't wait until someone has
-inserted some valid data-\cdrom.'' Thus, our proposal of the
-implementation for the $open()$ call for \cdrom s is:
-\begin{itemize}
-\item If no other flags are set than $O_RDONLY$, the device is opened
-for data transfer, and the return value will be 0 only upon successful
-initialization of the transfer. The call may even induce some actions
-on the \cdrom, such as closing the tray.  
-\item If the option flag $O_NONBLOCK$ is set, opening will always be
-successful, unless the whole device doesn't exist. The drive will take
-no actions whatsoever. 
-\end{itemize}
-
-\subsection{And what about standards?}
-
-You might hesitate to accept this proposal as it comes from the
-\linux\ community, and not from some standardizing institute. What
-about SUN, SGI, HP and all those other Unix and hardware vendors?
-Well, these companies are in the lucky position that they generally
-control both the hardware and software of their supported products,
-and are large enough to set their own standard. They do not have to
-deal with a dozen or more different, competing hardware
-configurations.\footnote{Incidentally, I think that SUN's approach to
-mounting \cdrom s is very good in origin: under Solaris a
-volume-daemon automatically mounts a newly inserted \cdrom\ under {\tt
-{/cdrom/$<volume-name>$/}}. In my opinion they should have pushed this
-further and have {\em every\/} \cdrom\ on the local area network be
-mounted at the similar location, \ie, no matter in which particular
-machine you insert a \cdrom, it will always appear at the same
-position in the directory tree, on every system. When I wanted to
-implement such a user-program for \linux, I came across the
-differences in behavior of the various drivers, and the need for an
-$ioctl$ informing about media changes.}
-
-We believe that using $O_NONBLOCK$ to indicate that a device is being opened
-for $ioctl$ commands only can be easily introduced in the \linux\
-community. All the CD-player authors will have to be informed, we can
-even send in our own patches to the programs. The use of $O_NONBLOCK$
-has most likely no influence on the behavior of the CD-players on
-other operating systems than \linux. Finally, a user can always revert
-to old behavior by a call to $ioctl(file_descriptor, CDROM_CLEAR_OPTIONS,
-CDO_USE_FFLAGS)$. 
-
-\subsection{The preferred strategy of $open()$}
-
-The routines in \cdromc\ are designed in such a way that run-time
-configuration of the behavior of \cdrom\ devices (of {\em any\/} type)
-can be carried out, by the $CDROM_SET/CLEAR_OPTIONS$ $ioctls$. Thus, various
-modes of operation can be set:
-\begin{description}
-\item[$CDO_AUTO_CLOSE \mathrel| CDO_USE_FFLAGS \mathrel| CDO_LOCK$] This
-is the default setting. (With $CDO_CHECK_TYPE$ it will be better, in the
-future.) If the device is not yet opened by any other process, and if
-the device is being opened for data ($O_NONBLOCK$ is not set) and the
-tray is found to be open, an attempt to close the tray is made. Then,
-it is verified that a disc is in the drive and, if $CDO_CHECK_TYPE$ is
-set, that it contains tracks of type `data mode 1.' Only if all tests
-are passed is the return value zero. The door is locked to prevent file
-system corruption. If the drive is opened for audio ($O_NONBLOCK$ is
-set), no actions are taken and a value of 0 will be returned. 
-\item[$CDO_AUTO_CLOSE \mathrel| CDO_AUTO_EJECT \mathrel| CDO_LOCK$] This
-mimics the behavior of the current sbpcd-driver. The option flags are
-ignored, the tray is closed on the first open, if necessary. Similarly,
-the tray is opened on the last release, \ie, if a \cdrom\ is unmounted,
-it is automatically ejected, such that the user can replace it.
-\end{description} 
-We hope that these option can convince everybody (both driver
-maintainers and user program developers) to adopt the new \cdrom\
-driver scheme and option flag interpretation.
-
-\newsection{Description of routines in \cdromc}
-
-Only a few routines in \cdromc\ are exported to the drivers. In this
-new section we will discuss these, as well as the functions that `take
-over' the \cdrom\ interface to the kernel. The header file belonging
-to \cdromc\ is called \cdromh. Formerly, some of the contents of this
-file were placed in the file {\tt {ucdrom.h}}, but this file has now been
-merged back into \cdromh.
-
-\subsection{$Struct\ file_operations\ cdrom_fops$}
-
-The contents of this structure were described in section~\ref{cdrom.c}.
-A pointer to this structure is assigned to the $fops$ field
-of the $struct gendisk$.
-
-\subsection{$Int\ register_cdrom( struct\ cdrom_device_info\ * cdi)$}
-
-This function is used in about the same way one registers $cdrom_fops$
-with the kernel, the device operations and information structures,
-as described in section~\ref{cdrom.c}, should be registered with the
-\UCD:
-$$
-register_cdrom(\&<device>_info));
-$$
-This function returns zero upon success, and non-zero upon
-failure. The structure $<device>_info$ should have a pointer to the
-driver's $<device>_dops$, as in 
-$$
-\vbox{\halign{&$#$\hfil\cr
-struct\ &cdrom_device_info\ <device>_info = \{\cr
-& <device>_dops;\cr
-&\ldots\cr
-\}\cr
-}}$$
-Note that a driver must have one static structure, $<device>_dops$, while
-it may have as many structures $<device>_info$ as there are minor devices
-active. $Register_cdrom()$ builds a linked list from these. 
-
-\subsection{$Void\ unregister_cdrom(struct\ cdrom_device_info * cdi)$}
-
-Unregistering device $cdi$ with minor number $MINOR(cdi\to dev)$ removes
-the minor device from the list. If it was the last registered minor for
-the low-level driver, this disconnects the registered device-operation
-routines from the \cdrom\ interface. This function returns zero upon
-success, and non-zero upon failure.
-
-\subsection{$Int\ cdrom_open(struct\ inode * ip, struct\ file * fp)$}
-
-This function is not called directly by the low-level drivers, it is
-listed in the standard $cdrom_fops$. If the VFS opens a file, this
-function becomes active. A strategy is implemented in this routine,
-taking care of all capabilities and options that are set in the
-$cdrom_device_ops$ connected to the device. Then, the program flow is
-transferred to the device_dependent $open()$ call.
-
-\subsection{$Void\ cdrom_release(struct\ inode *ip, struct\ file
-*fp)$}
-
-This function implements the reverse-logic of $cdrom_open()$, and then
-calls the device-dependent $release()$ routine. When the use-count has
-reached 0, the allocated buffers are flushed by calls to $sync_dev(dev)$
-and $invalidate_buffers(dev)$.
-
-
-\subsection{$Int\ cdrom_ioctl(struct\ inode *ip, struct\ file *fp,
-unsigned\ int\ cmd, unsigned\ long\ arg)$}
-\label{cdrom-ioctl}
-
-This function handles all the standard $ioctl$ requests for \cdrom\
-devices in a uniform way. The different calls fall into three
-categories: $ioctl$s that can be directly implemented by device
-operations, ones that are routed through the call $audio_ioctl()$, and
-the remaining ones, that are presumable device-dependent. Generally, a
-negative return value indicates an error.
-
-\subsubsection{Directly implemented $ioctl$s}
-\label{ioctl-direct}
-
-The following `old' \cdrom-$ioctl$s are implemented by directly
-calling device-operations in $cdrom_device_ops$, if implemented and
-not masked:
-\begin{description}
-\item[CDROMMULTISESSION] Requests the last session on a \cdrom.
-\item[CDROMEJECT] Open tray. 
-\item[CDROMCLOSETRAY] Close tray.
-\item[CDROMEJECT_SW] If $arg\not=0$, set behavior to auto-close (close
-tray on first open) and auto-eject (eject on last release), otherwise
-set behavior to non-moving on $open()$ and $release()$ calls.
-\item[CDROM_GET_MCN] Get the Media Catalog Number from a CD.
-\end{description}
-
-\subsubsection{$Ioctl$s routed through $audio_ioctl()$}
-\label{ioctl-audio}
-
-The following set of $ioctl$s are all implemented through a call to
-the $cdrom_fops$ function $audio_ioctl()$. Memory checks and
-allocation are performed in $cdrom_ioctl()$, and also sanitization of
-address format ($CDROM_LBA$/$CDROM_MSF$) is done.
-\begin{description}
-\item[CDROMSUBCHNL] Get sub-channel data in argument $arg$ of type $struct\
-cdrom_subchnl *{}$.
-\item[CDROMREADTOCHDR] Read Table of Contents header, in $arg$ of type
-$struct\ cdrom_tochdr *{}$. 
-\item[CDROMREADTOCENTRY] Read a Table of Contents entry in $arg$ and
-specified by $arg$ of type $struct\ cdrom_tocentry *{}$.
-\item[CDROMPLAYMSF] Play audio fragment specified in Minute, Second,
-Frame format, delimited by $arg$ of type $struct\ cdrom_msf *{}$.
-\item[CDROMPLAYTRKIND] Play audio fragment in track-index format
-delimited by $arg$ of type $struct\ \penalty-1000 cdrom_ti *{}$.
-\item[CDROMVOLCTRL] Set volume specified by $arg$ of type $struct\
-cdrom_volctrl *{}$.
-\item[CDROMVOLREAD] Read volume into by $arg$ of type $struct\
-cdrom_volctrl *{}$.
-\item[CDROMSTART] Spin up disc.
-\item[CDROMSTOP] Stop playback of audio fragment.
-\item[CDROMPAUSE] Pause playback of audio fragment.
-\item[CDROMRESUME] Resume playing.
-\end{description}
-
-\subsubsection{New $ioctl$s in \cdromc}
-
-The following $ioctl$s have been introduced to allow user programs to
-control the behavior of individual \cdrom\ devices. New $ioctl$
-commands can be identified by the underscores in their names.
-\begin{description}
-\item[CDROM_SET_OPTIONS] Set options specified by $arg$. Returns the
-option flag register after modification. Use  $arg = \rm0$ for reading
-the current flags.
-\item[CDROM_CLEAR_OPTIONS] Clear options specified by $arg$. Returns
-  the option flag register after modification.
-\item[CDROM_SELECT_SPEED] Select head-rate speed of disc specified as
-  by $arg$ in units of standard cdrom speed (176\,kB/sec raw data or
-  150\,kB/sec file system data). The value 0 means `auto-select', \ie,
-  play audio discs at real time and data discs at maximum speed. The value
-  $arg$ is checked against the maximum head rate of the drive found in the
-  $cdrom_dops$.
-\item[CDROM_SELECT_DISC] Select disc numbered $arg$ from a juke-box.
-  First disc is numbered 0. The number $arg$ is checked against the
-  maximum number of discs in the juke-box found in the $cdrom_dops$.
-\item[CDROM_MEDIA_CHANGED] Returns 1 if a disc has been changed since
-  the last call. Note that calls to $cdrom_media_changed$ by the VFS
-  are treated by an independent queue, so both mechanisms will detect
-  a media change once. For juke-boxes, an extra argument $arg$
-  specifies the slot for which the information is given. The special
-  value $CDSL_CURRENT$ requests that information about the currently
-  selected slot be returned.
-\item[CDROM_DRIVE_STATUS] Returns the status of the drive by a call to
-  $drive_status()$. Return values are defined in section~\ref{drive
-   status}. Note that this call doesn't return information on the
-  current playing activity of the drive; this can be polled through an
-  $ioctl$ call to $CDROMSUBCHNL$. For juke-boxes, an extra argument
-  $arg$ specifies the slot for which (possibly limited) information is
-  given. The special value $CDSL_CURRENT$ requests that information
-  about the currently selected slot be returned.
-\item[CDROM_DISC_STATUS] Returns the type of the disc currently in the
-  drive.  It should be viewed as a complement to $CDROM_DRIVE_STATUS$.
-  This $ioctl$ can provide \emph {some} information about the current
-  disc that is inserted in the drive.  This functionality used to be
-  implemented in the low level drivers, but is now carried out
-  entirely in \UCD.
-  
-  The history of development of the CD's use as a carrier medium for
-  various digital information has lead to many different disc types.
-  This $ioctl$ is useful only in the case that CDs have \emph {only
-    one} type of data on them.  While this is often the case, it is
-  also very common for CDs to have some tracks with data, and some
-  tracks with audio.  Because this is an existing interface, rather
-  than fixing this interface by changing the assumptions it was made
-  under, thereby breaking all user applications that use this
-  function, the \UCD\ implements this $ioctl$ as follows: If the CD in
-  question has audio tracks on it, and it has absolutely no CD-I, XA,
-  or data tracks on it, it will be reported as $CDS_AUDIO$.  If it has
-  both audio and data tracks, it will return $CDS_MIXED$.  If there
-  are no audio tracks on the disc, and if the CD in question has any
-  CD-I tracks on it, it will be reported as $CDS_XA_2_2$.  Failing
-  that, if the CD in question has any XA tracks on it, it will be
-  reported as $CDS_XA_2_1$.  Finally, if the CD in question has any
-  data tracks on it, it will be reported as a data CD ($CDS_DATA_1$).
-
-  This $ioctl$ can return:
-  $$
-  \halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
-    CDS_NO_INFO& no information available\cr
-    CDS_NO_DISC& no disc is inserted, or tray is opened\cr
-    CDS_AUDIO& Audio disc (2352 audio bytes/frame)\cr
-    CDS_DATA_1& data disc, mode 1 (2048 user bytes/frame)\cr
-    CDS_XA_2_1& mixed data (XA), mode 2, form 1 (2048 user bytes)\cr
-    CDS_XA_2_2& mixed data (XA), mode 2, form 1 (2324  user bytes)\cr
-    CDS_MIXED& mixed audio/data disc\cr
-    }
-  $$
-  For some information concerning frame layout of the various disc
-  types, see a recent version of \cdromh.
-
-\item[CDROM_CHANGER_NSLOTS] Returns the number of slots in a
-  juke-box. 
-\item[CDROMRESET] Reset the drive. 
-\item[CDROM_GET_CAPABILITY] Returns the $capability$ flags for the
-  drive. Refer to section \ref{capability} for more information on
-  these flags.
-\item[CDROM_LOCKDOOR] Locks the door of the drive. $arg == \rm0$
-  unlocks the door, any other value locks it.
-\item[CDROM_DEBUG] Turns on debugging info. Only root is allowed
-  to do this. Same semantics as CDROM_LOCKDOOR.
-\end{description}
-
-\subsubsection{Device dependent $ioctl$s}
-
-Finally, all other $ioctl$s are passed to the function $dev_ioctl()$,
-if implemented. No memory allocation or verification is carried out. 
-
-\newsection{How to update your driver}
-
-\begin{enumerate}
-\item Make a backup of your current driver. 
-\item Get hold of the files \cdromc\ and \cdromh, they should be in
-  the directory tree that came with this documentation.
-\item Make sure you include \cdromh.
-\item Change the 3rd argument of $register_blkdev$ from
-$\&<your-drive>_fops$ to $\&cdrom_fops$. 
-\item Just after that line, add the following to register with the \UCD:
-  $$register_cdrom(\&<your-drive>_info);$$
-  Similarly, add a call to $unregister_cdrom()$ at the appropriate place.
-\item Copy an example of the device-operations $struct$ to your
-  source, \eg, from {\tt {cm206.c}} $cm206_dops$, and change all
-  entries to names corresponding to your driver, or names you just
-  happen to like. If your driver doesn't support a certain function,
-  make the entry $NULL$. At the entry $capability$ you should list all
-  capabilities your driver currently supports. If your driver
-  has a capability that is not listed, please send me a message.
-\item Copy the $cdrom_device_info$ declaration from the same example
-  driver, and modify the entries according to your needs. If your
-  driver dynamically determines the capabilities of the hardware, this
-  structure should also be declared dynamically. 
-\item Implement all functions in your $<device>_dops$ structure,
-  according to prototypes listed in \cdromh, and specifications given
-  in section~\ref{cdrom.c}. Most likely you have already implemented
-  the code in a large part, and you will almost certainly need to adapt the
-  prototype and return values.
-\item Rename your $<device>_ioctl()$ function to $audio_ioctl$ and
-  change the prototype a little. Remove entries listed in the first
-  part in section~\ref{cdrom-ioctl}, if your code was OK, these are
-  just calls to the routines you adapted in the previous step.
-\item You may remove all remaining memory checking code in the
-  $audio_ioctl()$ function that deals with audio commands (these are
-  listed in the second part of section~\ref{cdrom-ioctl}). There is no
-  need for memory allocation either, so most $case$s in the $switch$
-  statement look similar to:
-  $$
-  case\ CDROMREADTOCENTRY\colon get_toc_entry\bigl((struct\ 
-  cdrom_tocentry *{})\ arg\bigr);
-  $$
-\item All remaining $ioctl$ cases must be moved to a separate
-  function, $<device>_ioctl$, the device-dependent $ioctl$s. Note that
-  memory checking and allocation must be kept in this code!
-\item Change the prototypes of $<device>_open()$ and
-  $<device>_release()$, and remove any strategic code (\ie, tray
-  movement, door locking, etc.).
-\item Try to recompile the drivers. We advise you to use modules, both
-  for {\tt {cdrom.o}} and your driver, as debugging is much easier this
-  way.
-\end{enumerate} 
-
-\newsection{Thanks}
-
-Thanks to all the people involved.  First, Erik Andersen, who has
-taken over the torch in maintaining \cdromc\ and integrating much
-\cdrom-related code in the 2.1-kernel.  Thanks to Scott Snyder and
-Gerd Knorr, who were the first to implement this interface for SCSI
-and IDE-CD drivers and added many ideas for extension of the data
-structures relative to kernel~2.0.  Further thanks to Heiko Ei{\ss}feldt,
-Thomas Quinot, Jon Tombs, Ken Pizzini, Eberhard M\"onkeberg and Andrew
-Kroll, the \linux\ \cdrom\ device driver developers who were kind
-enough to give suggestions and criticisms during the writing. Finally
-of course, I want to thank Linus Torvalds for making this possible in
-the first place.
-
-\vfill
-$ \version\ $
-\eject
-\end{document}
diff --git a/Documentation/cdrom/cdrom-standard.txt b/Documentation/cdrom/cdrom-standard.txt
new file mode 100644
index 000000000000..dde4f7f7fdbf
--- /dev/null
+++ b/Documentation/cdrom/cdrom-standard.txt
@@ -0,0 +1,1063 @@
+=======================
+A Linux CD-ROM standard
+=======================
+
+:Author: David van Leeuwen <david@ElseWare.cistron.nl>
+:Date: 12 March 1999
+:Updated by: Erik Andersen (andersee@debian.org)
+:Updated by: Jens Axboe (axboe@image.dk)
+
+
+Introduction
+============
+
+Linux is probably the Unix-like operating system that supports
+the widest variety of hardware devices. The reasons for this are
+presumably
+
+- The large list of hardware devices available for the many platforms
+  that Linux now supports (i.e., i386-PCs, Sparc Suns, etc.)
+- The open design of the operating system, such that anybody can write a
+  driver for Linux.
+- There is plenty of source code around as examples of how to write a driver.
+
+The openness of Linux, and the many different types of available
+hardware has allowed Linux to support many different hardware devices.
+Unfortunately, the very openness that has allowed Linux to support
+all these different devices has also allowed the behavior of each
+device driver to differ significantly from one device to another.
+This divergence of behavior has been very significant for CD-ROM
+devices; the way a particular drive reacts to a `standard` *ioctl()*
+call varies greatly from one device driver to another. To avoid making
+their drivers totally inconsistent, the writers of Linux CD-ROM
+drivers generally created new device drivers by understanding, copying,
+and then changing an existing one. Unfortunately, this practice did not
+maintain uniform behavior across all the Linux CD-ROM drivers.
+
+This document describes an effort to establish Uniform behavior across
+all the different CD-ROM device drivers for Linux. This document also
+defines the various *ioctl()'s*, and how the low-level CD-ROM device
+drivers should implement them. Currently (as of the Linux 2.1.\ *x*
+development kernels) several low-level CD-ROM device drivers, including
+both IDE/ATAPI and SCSI, now use this Uniform interface.
+
+When the CD-ROM was developed, the interface between the CD-ROM drive
+and the computer was not specified in the standards. As a result, many
+different CD-ROM interfaces were developed. Some of them had their
+own proprietary design (Sony, Mitsumi, Panasonic, Philips), other
+manufacturers adopted an existing electrical interface and changed
+the functionality (CreativeLabs/SoundBlaster, Teac, Funai) or simply
+adapted their drives to one or more of the already existing electrical
+interfaces (Aztech, Sanyo, Funai, Vertos, Longshine, Optics Storage and
+most of the `NoName` manufacturers). In cases where a new drive really
+brought its own interface or used its own command set and flow control
+scheme, either a separate driver had to be written, or an existing
+driver had to be enhanced. History has delivered us CD-ROM support for
+many of these different interfaces. Nowadays, almost all new CD-ROM
+drives are either IDE/ATAPI or SCSI, and it is very unlikely that any
+manufacturer will create a new interface. Even finding drives for the
+old proprietary interfaces is getting difficult.
+
+When (in the 1.3.70's) I looked at the existing software interface,
+which was expressed through `cdrom.h`, it appeared to be a rather wild
+set of commands and data formats [#f1]_. It seemed that many
+features of the software interface had been added to accommodate the
+capabilities of a particular drive, in an *ad hoc* manner. More
+importantly, it appeared that the behavior of the `standard` commands
+was different for most of the different drivers: e. g., some drivers
+close the tray if an *open()* call occurs when the tray is open, while
+others do not. Some drivers lock the door upon opening the device, to
+prevent an incoherent file system, but others don't, to allow software
+ejection. Undoubtedly, the capabilities of the different drives vary,
+but even when two drives have the same capability their drivers'
+behavior was usually different.
+
+.. [#f1]
+   I cannot recollect what kernel version I looked at, then,
+   presumably 1.2.13 and 1.3.34 --- the latest kernel that I was
+   indirectly involved in.
+
+I decided to start a discussion on how to make all the Linux CD-ROM
+drivers behave more uniformly. I began by contacting the developers of
+the many CD-ROM drivers found in the Linux kernel. Their reactions
+encouraged me to write the Uniform CD-ROM Driver which this document is
+intended to describe. The implementation of the Uniform CD-ROM Driver is
+in the file `cdrom.c`. This driver is intended to be an additional software
+layer that sits on top of the low-level device drivers for each CD-ROM drive.
+By adding this additional layer, it is possible to have all the different
+CD-ROM devices behave **exactly** the same (insofar as the underlying
+hardware will allow).
+
+The goal of the Uniform CD-ROM Driver is **not** to alienate driver developers
+whohave not yet taken steps to support this effort. The goal of Uniform CD-ROM
+Driver is simply to give people writing application programs for CD-ROM drives
+**one** Linux CD-ROM interface with consistent behavior for all
+CD-ROM devices. In addition, this also provides a consistent interface
+between the low-level device driver code and the Linux kernel. Care
+is taken that 100% compatibility exists with the data structures and
+programmer's interface defined in `cdrom.h`. This guide was written to
+help CD-ROM driver developers adapt their code to use the Uniform CD-ROM
+Driver code defined in `cdrom.c`.
+
+Personally, I think that the most important hardware interfaces are
+the IDE/ATAPI drives and, of course, the SCSI drives, but as prices
+of hardware drop continuously, it is also likely that people may have
+more than one CD-ROM drive, possibly of mixed types. It is important
+that these drives behave in the same way. In December 1994, one of the
+cheapest CD-ROM drives was a Philips cm206, a double-speed proprietary
+drive. In the months that I was busy writing a Linux driver for it,
+proprietary drives became obsolete and IDE/ATAPI drives became the
+standard. At the time of the last update to this document (November
+1997) it is becoming difficult to even **find** anything less than a
+16 speed CD-ROM drive, and 24 speed drives are common.
+
+.. _cdrom_api:
+
+Standardizing through another software level
+============================================
+
+At the time this document was conceived, all drivers directly
+implemented the CD-ROM *ioctl()* calls through their own routines. This
+led to the danger of different drivers forgetting to do important things
+like checking that the user was giving the driver valid data. More
+importantly, this led to the divergence of behavior, which has already
+been discussed.
+
+For this reason, the Uniform CD-ROM Driver was created to enforce consistent
+CD-ROM drive behavior, and to provide a common set of services to the various
+low-level CD-ROM device drivers. The Uniform CD-ROM Driver now provides another
+software-level, that separates the *ioctl()* and *open()* implementation
+from the actual hardware implementation. Note that this effort has
+made few changes which will affect a user's application programs. The
+greatest change involved moving the contents of the various low-level
+CD-ROM drivers\' header files to the kernel's cdrom directory. This was
+done to help ensure that the user is only presented with only one cdrom
+interface, the interface defined in `cdrom.h`.
+
+CD-ROM drives are specific enough (i. e., different from other
+block-devices such as floppy or hard disc drives), to define a set
+of common **CD-ROM device operations**, *<cdrom-device>_dops*.
+These operations are different from the classical block-device file
+operations, *<block-device>_fops*.
+
+The routines for the Uniform CD-ROM Driver interface level are implemented
+in the file `cdrom.c`. In this file, the Uniform CD-ROM Driver interfaces
+with the kernel as a block device by registering the following general
+*struct file_operations*::
+
+	struct file_operations cdrom_fops = {
+		NULL,			/∗ lseek ∗/
+		block _read ,		/∗ read—general block-dev read ∗/
+		block _write,		/∗ write—general block-dev write ∗/
+		NULL,			/∗ readdir ∗/
+		NULL,			/∗ select ∗/
+		cdrom_ioctl,		/∗ ioctl ∗/
+		NULL,			/∗ mmap ∗/
+		cdrom_open,		/∗ open ∗/
+		cdrom_release,		/∗ release ∗/
+		NULL,			/∗ fsync ∗/
+		NULL,			/∗ fasync ∗/
+		cdrom_media_changed,	/∗ media change ∗/
+		NULL			/∗ revalidate ∗/
+	};
+
+Every active CD-ROM device shares this *struct*. The routines
+declared above are all implemented in `cdrom.c`, since this file is the
+place where the behavior of all CD-ROM-devices is defined and
+standardized. The actual interface to the various types of CD-ROM
+hardware is still performed by various low-level CD-ROM-device
+drivers. These routines simply implement certain **capabilities**
+that are common to all CD-ROM (and really, all removable-media
+devices).
+
+Registration of a low-level CD-ROM device driver is now done through
+the general routines in `cdrom.c`, not through the Virtual File System
+(VFS) any more. The interface implemented in `cdrom.c` is carried out
+through two general structures that contain information about the
+capabilities of the driver, and the specific drives on which the
+driver operates. The structures are:
+
+cdrom_device_ops
+  This structure contains information about the low-level driver for a
+  CD-ROM device. This structure is conceptually connected to the major
+  number of the device (although some drivers may have different
+  major numbers, as is the case for the IDE driver).
+
+cdrom_device_info
+  This structure contains information about a particular CD-ROM drive,
+  such as its device name, speed, etc. This structure is conceptually
+  connected to the minor number of the device.
+
+Registering a particular CD-ROM drive with the Uniform CD-ROM Driver
+is done by the low-level device driver though a call to::
+
+	register_cdrom(struct cdrom_device_info * <device>_info)
+
+The device information structure, *<device>_info*, contains all the
+information needed for the kernel to interface with the low-level
+CD-ROM device driver. One of the most important entries in this
+structure is a pointer to the *cdrom_device_ops* structure of the
+low-level driver.
+
+The device operations structure, *cdrom_device_ops*, contains a list
+of pointers to the functions which are implemented in the low-level
+device driver. When `cdrom.c` accesses a CD-ROM device, it does it
+through the functions in this structure. It is impossible to know all
+the capabilities of future CD-ROM drives, so it is expected that this
+list may need to be expanded from time to time as new technologies are
+developed. For example, CD-R and CD-R/W drives are beginning to become
+popular, and support will soon need to be added for them. For now, the
+current *struct* is::
+
+	struct cdrom_device_ops {
+		int (*open)(struct cdrom_device_info *, int)
+		void (*release)(struct cdrom_device_info *);
+		int (*drive_status)(struct cdrom_device_info *, int);
+		unsigned int (*check_events)(struct cdrom_device_info *,
+					     unsigned int, int);
+		int (*media_changed)(struct cdrom_device_info *, int);
+		int (*tray_move)(struct cdrom_device_info *, int);
+		int (*lock_door)(struct cdrom_device_info *, int);
+		int (*select_speed)(struct cdrom_device_info *, int);
+		int (*select_disc)(struct cdrom_device_info *, int);
+		int (*get_last_session) (struct cdrom_device_info *,
+					 struct cdrom_multisession *);
+		int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
+		int (*reset)(struct cdrom_device_info *);
+		int (*audio_ioctl)(struct cdrom_device_info *,
+				   unsigned int, void *);
+		const int capability;		/* capability flags */
+		int (*generic_packet)(struct cdrom_device_info *,
+				      struct packet_command *);
+	};
+
+When a low-level device driver implements one of these capabilities,
+it should add a function pointer to this *struct*. When a particular
+function is not implemented, however, this *struct* should contain a
+NULL instead. The *capability* flags specify the capabilities of the
+CD-ROM hardware and/or low-level CD-ROM driver when a CD-ROM drive
+is registered with the Uniform CD-ROM Driver.
+
+Note that most functions have fewer parameters than their
+*blkdev_fops* counterparts. This is because very little of the
+information in the structures *inode* and *file* is used. For most
+drivers, the main parameter is the *struct* *cdrom_device_info*, from
+which the major and minor number can be extracted. (Most low-level
+CD-ROM drivers don't even look at the major and minor number though,
+since many of them only support one device.) This will be available
+through *dev* in *cdrom_device_info* described below.
+
+The drive-specific, minor-like information that is registered with
+`cdrom.c`, currently contains the following fields::
+
+  struct cdrom_device_info {
+	const struct cdrom_device_ops * ops; 	/* device operations for this major */
+	struct list_head list;			/* linked list of all device_info */
+	struct gendisk * disk;			/* matching block layer disk */
+	void *  handle;				/* driver-dependent data */
+
+	int mask; 				/* mask of capability: disables them */
+	int speed;				/* maximum speed for reading data */
+	int capacity;				/* number of discs in a jukebox */
+
+	unsigned int options:30;		/* options flags */
+	unsigned mc_flags:2;			/*  media-change buffer flags */
+	unsigned int vfs_events;		/*  cached events for vfs path */
+	unsigned int ioctl_events;		/*  cached events for ioctl path */
+	int use_count;				/*  number of times device is opened */
+	char name[20];				/*  name of the device type */
+
+	__u8 sanyo_slot : 2;			/*  Sanyo 3-CD changer support */
+	__u8 keeplocked : 1;			/*  CDROM_LOCKDOOR status */
+	__u8 reserved : 5;			/*  not used yet */
+	int cdda_method;			/*  see CDDA_* flags */
+	__u8 last_sense;			/*  saves last sense key */
+	__u8 media_written;			/*  dirty flag, DVD+RW bookkeeping */
+	unsigned short mmc3_profile;		/*  current MMC3 profile */
+	int for_data;				/*  unknown:TBD */
+	int (*exit)(struct cdrom_device_info *);/*  unknown:TBD */
+	int mrw_mode_page;			/*  which MRW mode page is in use */
+  };
+
+Using this *struct*, a linked list of the registered minor devices is
+built, using the *next* field. The device number, the device operations
+struct and specifications of properties of the drive are stored in this
+structure.
+
+The *mask* flags can be used to mask out some of the capabilities listed
+in *ops->capability*, if a specific drive doesn't support a feature
+of the driver. The value *speed* specifies the maximum head-rate of the
+drive, measured in units of normal audio speed (176kB/sec raw data or
+150kB/sec file system data). The parameters are declared *const*
+because they describe properties of the drive, which don't change after
+registration.
+
+A few registers contain variables local to the CD-ROM drive. The
+flags *options* are used to specify how the general CD-ROM routines
+should behave. These various flags registers should provide enough
+flexibility to adapt to the different users' wishes (and **not** the
+`arbitrary` wishes of the author of the low-level device driver, as is
+the case in the old scheme). The register *mc_flags* is used to buffer
+the information from *media_changed()* to two separate queues. Other
+data that is specific to a minor drive, can be accessed through *handle*,
+which can point to a data structure specific to the low-level driver.
+The fields *use_count*, *next*, *options* and *mc_flags* need not be
+initialized.
+
+The intermediate software layer that `cdrom.c` forms will perform some
+additional bookkeeping. The use count of the device (the number of
+processes that have the device opened) is registered in *use_count*. The
+function *cdrom_ioctl()* will verify the appropriate user-memory regions
+for read and write, and in case a location on the CD is transferred,
+it will `sanitize` the format by making requests to the low-level
+drivers in a standard format, and translating all formats between the
+user-software and low level drivers. This relieves much of the drivers'
+memory checking and format checking and translation. Also, the necessary
+structures will be declared on the program stack.
+
+The implementation of the functions should be as defined in the
+following sections. Two functions **must** be implemented, namely
+*open()* and *release()*. Other functions may be omitted, their
+corresponding capability flags will be cleared upon registration.
+Generally, a function returns zero on success and negative on error. A
+function call should return only after the command has completed, but of
+course waiting for the device should not use processor time.
+
+::
+
+	int open(struct cdrom_device_info *cdi, int purpose)
+
+*Open()* should try to open the device for a specific *purpose*, which
+can be either:
+
+- Open for reading data, as done by `mount()` (2), or the
+  user commands `dd` or `cat`.
+- Open for *ioctl* commands, as done by audio-CD playing programs.
+
+Notice that any strategic code (closing tray upon *open()*, etc.) is
+done by the calling routine in `cdrom.c`, so the low-level routine
+should only be concerned with proper initialization, such as spinning
+up the disc, etc.
+
+::
+
+	void release(struct cdrom_device_info *cdi)
+
+Device-specific actions should be taken such as spinning down the device.
+However, strategic actions such as ejection of the tray, or unlocking
+the door, should be left over to the general routine *cdrom_release()*.
+This is the only function returning type *void*.
+
+.. _cdrom_drive_status:
+
+::
+
+	int drive_status(struct cdrom_device_info *cdi, int slot_nr)
+
+The function *drive_status*, if implemented, should provide
+information on the status of the drive (not the status of the disc,
+which may or may not be in the drive). If the drive is not a changer,
+*slot_nr* should be ignored. In `cdrom.h` the possibilities are listed::
+
+
+	CDS_NO_INFO		/* no information available */
+	CDS_NO_DISC		/* no disc is inserted, tray is closed */
+	CDS_TRAY_OPEN		/* tray is opened */
+	CDS_DRIVE_NOT_READY	/* something is wrong, tray is moving? */
+	CDS_DISC_OK		/* a disc is loaded and everything is fine */
+
+::
+
+	int media_changed(struct cdrom_device_info *cdi, int disc_nr)
+
+This function is very similar to the original function in $struct
+file_operations*. It returns 1 if the medium of the device *cdi->dev*
+has changed since the last call, and 0 otherwise. The parameter
+*disc_nr* identifies a specific slot in a juke-box, it should be
+ignored for single-disc drives. Note that by `re-routing` this
+function through *cdrom_media_changed()*, we can implement separate
+queues for the VFS and a new *ioctl()* function that can report device
+changes to software (e. g., an auto-mounting daemon).
+
+::
+
+	int tray_move(struct cdrom_device_info *cdi, int position)
+
+This function, if implemented, should control the tray movement. (No
+other function should control this.) The parameter *position* controls
+the desired direction of movement:
+
+- 0 Close tray
+- 1 Open tray
+
+This function returns 0 upon success, and a non-zero value upon
+error. Note that if the tray is already in the desired position, no
+action need be taken, and the return value should be 0.
+
+::
+
+	int lock_door(struct cdrom_device_info *cdi, int lock)
+
+This function (and no other code) controls locking of the door, if the
+drive allows this. The value of *lock* controls the desired locking
+state:
+
+- 0 Unlock door, manual opening is allowed
+- 1 Lock door, tray cannot be ejected manually
+
+This function returns 0 upon success, and a non-zero value upon
+error. Note that if the door is already in the requested state, no
+action need be taken, and the return value should be 0.
+
+::
+
+	int select_speed(struct cdrom_device_info *cdi, int speed)
+
+Some CD-ROM drives are capable of changing their head-speed. There
+are several reasons for changing the speed of a CD-ROM drive. Badly
+pressed CD-ROM s may benefit from less-than-maximum head rate. Modern
+CD-ROM drives can obtain very high head rates (up to *24x* is
+common). It has been reported that these drives can make reading
+errors at these high speeds, reducing the speed can prevent data loss
+in these circumstances. Finally, some of these drives can
+make an annoyingly loud noise, which a lower speed may reduce.
+
+This function specifies the speed at which data is read or audio is
+played back. The value of *speed* specifies the head-speed of the
+drive, measured in units of standard cdrom speed (176kB/sec raw data
+or 150kB/sec file system data). So to request that a CD-ROM drive
+operate at 300kB/sec you would call the CDROM_SELECT_SPEED *ioctl*
+with *speed=2*. The special value `0` means `auto-selection`, i. e.,
+maximum data-rate or real-time audio rate. If the drive doesn't have
+this `auto-selection` capability, the decision should be made on the
+current disc loaded and the return value should be positive. A negative
+return value indicates an error.
+
+::
+
+	int select_disc(struct cdrom_device_info *cdi, int number)
+
+If the drive can store multiple discs (a juke-box) this function
+will perform disc selection. It should return the number of the
+selected disc on success, a negative value on error. Currently, only
+the ide-cd driver supports this functionality.
+
+::
+
+	int get_last_session(struct cdrom_device_info *cdi,
+			     struct cdrom_multisession *ms_info)
+
+This function should implement the old corresponding *ioctl()*. For
+device *cdi->dev*, the start of the last session of the current disc
+should be returned in the pointer argument *ms_info*. Note that
+routines in `cdrom.c` have sanitized this argument: its requested
+format will **always** be of the type *CDROM_LBA* (linear block
+addressing mode), whatever the calling software requested. But
+sanitization goes even further: the low-level implementation may
+return the requested information in *CDROM_MSF* format if it wishes so
+(setting the *ms_info->addr_format* field appropriately, of
+course) and the routines in `cdrom.c` will make the transformation if
+necessary. The return value is 0 upon success.
+
+::
+
+	int get_mcn(struct cdrom_device_info *cdi,
+		    struct cdrom_mcn *mcn)
+
+Some discs carry a `Media Catalog Number` (MCN), also called
+`Universal Product Code` (UPC). This number should reflect the number
+that is generally found in the bar-code on the product. Unfortunately,
+the few discs that carry such a number on the disc don't even use the
+same format. The return argument to this function is a pointer to a
+pre-declared memory region of type *struct cdrom_mcn*. The MCN is
+expected as a 13-character string, terminated by a null-character.
+
+::
+
+	int reset(struct cdrom_device_info *cdi)
+
+This call should perform a hard-reset on the drive (although in
+circumstances that a hard-reset is necessary, a drive may very well not
+listen to commands anymore). Preferably, control is returned to the
+caller only after the drive has finished resetting. If the drive is no
+longer listening, it may be wise for the underlying low-level cdrom
+driver to time out.
+
+::
+
+	int audio_ioctl(struct cdrom_device_info *cdi,
+			unsigned int cmd, void *arg)
+
+Some of the CD-ROM-\ *ioctl()*\ 's defined in `cdrom.h` can be
+implemented by the routines described above, and hence the function
+*cdrom_ioctl* will use those. However, most *ioctl()*\ 's deal with
+audio-control. We have decided to leave these to be accessed through a
+single function, repeating the arguments *cmd* and *arg*. Note that
+the latter is of type *void*, rather than *unsigned long int*.
+The routine *cdrom_ioctl()* does do some useful things,
+though. It sanitizes the address format type to *CDROM_MSF* (Minutes,
+Seconds, Frames) for all audio calls. It also verifies the memory
+location of *arg*, and reserves stack-memory for the argument. This
+makes implementation of the *audio_ioctl()* much simpler than in the
+old driver scheme. For example, you may look up the function
+*cm206_audio_ioctl()* `cm206.c` that should be updated with
+this documentation.
+
+An unimplemented ioctl should return *-ENOSYS*, but a harmless request
+(e. g., *CDROMSTART*) may be ignored by returning 0 (success). Other
+errors should be according to the standards, whatever they are. When
+an error is returned by the low-level driver, the Uniform CD-ROM Driver
+tries whenever possible to return the error code to the calling program.
+(We may decide to sanitize the return value in *cdrom_ioctl()* though, in
+order to guarantee a uniform interface to the audio-player software.)
+
+::
+
+	int dev_ioctl(struct cdrom_device_info *cdi,
+		      unsigned int cmd, unsigned long arg)
+
+Some *ioctl()'s* seem to be specific to certain CD-ROM drives. That is,
+they are introduced to service some capabilities of certain drives. In
+fact, there are 6 different *ioctl()'s* for reading data, either in some
+particular kind of format, or audio data. Not many drives support
+reading audio tracks as data, I believe this is because of protection
+of copyrights of artists. Moreover, I think that if audio-tracks are
+supported, it should be done through the VFS and not via *ioctl()'s*. A
+problem here could be the fact that audio-frames are 2352 bytes long,
+so either the audio-file-system should ask for 75264 bytes at once
+(the least common multiple of 512 and 2352), or the drivers should
+bend their backs to cope with this incoherence (to which I would be
+opposed). Furthermore, it is very difficult for the hardware to find
+the exact frame boundaries, since there are no synchronization headers
+in audio frames. Once these issues are resolved, this code should be
+standardized in `cdrom.c`.
+
+Because there are so many *ioctl()'s* that seem to be introduced to
+satisfy certain drivers [#f2]_, any non-standard *ioctl()*\ s
+are routed through the call *dev_ioctl()*. In principle, `private`
+*ioctl()*\ 's should be numbered after the device's major number, and not
+the general CD-ROM *ioctl* number, `0x53`. Currently the
+non-supported *ioctl()'s* are:
+
+	CDROMREADMODE1, CDROMREADMODE2, CDROMREADAUDIO, CDROMREADRAW,
+	CDROMREADCOOKED, CDROMSEEK, CDROMPLAY-BLK and CDROM-READALL
+
+.. [#f2]
+
+   Is there software around that actually uses these? I'd be interested!
+
+.. _cdrom_capabilities:
+
+CD-ROM capabilities
+-------------------
+
+Instead of just implementing some *ioctl* calls, the interface in
+`cdrom.c` supplies the possibility to indicate the **capabilities**
+of a CD-ROM drive. This can be done by ORing any number of
+capability-constants that are defined in `cdrom.h` at the registration
+phase. Currently, the capabilities are any of::
+
+	CDC_CLOSE_TRAY		/* can close tray by software control */
+	CDC_OPEN_TRAY		/* can open tray */
+	CDC_LOCK		/* can lock and unlock the door */
+	CDC_SELECT_SPEED	/* can select speed, in units of * sim*150 ,kB/s */
+	CDC_SELECT_DISC		/* drive is juke-box */
+	CDC_MULTI_SESSION	/* can read sessions *> rm1* */
+	CDC_MCN			/* can read Media Catalog Number */
+	CDC_MEDIA_CHANGED	/* can report if disc has changed */
+	CDC_PLAY_AUDIO		/* can perform audio-functions (play, pause, etc) */
+	CDC_RESET		/* hard reset device */
+	CDC_IOCTLS		/* driver has non-standard ioctls */
+	CDC_DRIVE_STATUS	/* driver implements drive status */
+
+The capability flag is declared *const*, to prevent drivers from
+accidentally tampering with the contents. The capability fags actually
+inform `cdrom.c` of what the driver can do. If the drive found
+by the driver does not have the capability, is can be masked out by
+the *cdrom_device_info* variable *mask*. For instance, the SCSI CD-ROM
+driver has implemented the code for loading and ejecting CD-ROM's, and
+hence its corresponding flags in *capability* will be set. But a SCSI
+CD-ROM drive might be a caddy system, which can't load the tray, and
+hence for this drive the *cdrom_device_info* struct will have set
+the *CDC_CLOSE_TRAY* bit in *mask*.
+
+In the file `cdrom.c` you will encounter many constructions of the type::
+
+	if (cdo->capability & ∼cdi->mask & CDC _⟨capability⟩) ...
+
+There is no *ioctl* to set the mask... The reason is that
+I think it is better to control the **behavior** rather than the
+**capabilities**.
+
+Options
+-------
+
+A final flag register controls the **behavior** of the CD-ROM
+drives, in order to satisfy different users' wishes, hopefully
+independently of the ideas of the respective author who happened to
+have made the drive's support available to the Linux community. The
+current behavior options are::
+
+	CDO_AUTO_CLOSE	/* try to close tray upon device open() */
+	CDO_AUTO_EJECT	/* try to open tray on last device close() */
+	CDO_USE_FFLAGS	/* use file_pointer->f_flags to indicate purpose for open() */
+	CDO_LOCK	/* try to lock door if device is opened */
+	CDO_CHECK_TYPE	/* ensure disc type is data if opened for data */
+
+The initial value of this register is
+`CDO_AUTO_CLOSE | CDO_USE_FFLAGS | CDO_LOCK`, reflecting my own view on user
+interface and software standards. Before you protest, there are two
+new *ioctl()'s* implemented in `cdrom.c`, that allow you to control the
+behavior by software. These are::
+
+	CDROM_SET_OPTIONS	/* set options specified in (int)arg */
+	CDROM_CLEAR_OPTIONS	/* clear options specified in (int)arg */
+
+One option needs some more explanation: *CDO_USE_FFLAGS*. In the next
+newsection we explain what the need for this option is.
+
+A software package `setcd`, available from the Debian distribution
+and `sunsite.unc.edu`, allows user level control of these flags.
+
+
+The need to know the purpose of opening the CD-ROM device
+=========================================================
+
+Traditionally, Unix devices can be used in two different `modes`,
+either by reading/writing to the device file, or by issuing
+controlling commands to the device, by the device's *ioctl()*
+call. The problem with CD-ROM drives, is that they can be used for
+two entirely different purposes. One is to mount removable
+file systems, CD-ROM's, the other is to play audio CD's. Audio commands
+are implemented entirely through *ioctl()\'s*, presumably because the
+first implementation (SUN?) has been such. In principle there is
+nothing wrong with this, but a good control of the `CD player` demands
+that the device can **always** be opened in order to give the
+*ioctl* commands, regardless of the state the drive is in.
+
+On the other hand, when used as a removable-media disc drive (what the
+original purpose of CD-ROM s is) we would like to make sure that the
+disc drive is ready for operation upon opening the device. In the old
+scheme, some CD-ROM drivers don't do any integrity checking, resulting
+in a number of i/o errors reported by the VFS to the kernel when an
+attempt for mounting a CD-ROM on an empty drive occurs. This is not a
+particularly elegant way to find out that there is no CD-ROM inserted;
+it more-or-less looks like the old IBM-PC trying to read an empty floppy
+drive for a couple of seconds, after which the system complains it
+can't read from it. Nowadays we can **sense** the existence of a
+removable medium in a drive, and we believe we should exploit that
+fact. An integrity check on opening of the device, that verifies the
+availability of a CD-ROM and its correct type (data), would be
+desirable.
+
+These two ways of using a CD-ROM drive, principally for data and
+secondarily for playing audio discs, have different demands for the
+behavior of the *open()* call. Audio use simply wants to open the
+device in order to get a file handle which is needed for issuing
+*ioctl* commands, while data use wants to open for correct and
+reliable data transfer. The only way user programs can indicate what
+their *purpose* of opening the device is, is through the *flags*
+parameter (see `open(2)`). For CD-ROM devices, these flags aren't
+implemented (some drivers implement checking for write-related flags,
+but this is not strictly necessary if the device file has correct
+permission flags). Most option flags simply don't make sense to
+CD-ROM devices: *O_CREAT*, *O_NOCTTY*, *O_TRUNC*, *O_APPEND*, and
+*O_SYNC* have no meaning to a CD-ROM.
+
+We therefore propose to use the flag *O_NONBLOCK* to indicate
+that the device is opened just for issuing *ioctl*
+commands. Strictly, the meaning of *O_NONBLOCK* is that opening and
+subsequent calls to the device don't cause the calling process to
+wait. We could interpret this as don't wait until someone has
+inserted some valid data-CD-ROM. Thus, our proposal of the
+implementation for the *open()* call for CD-ROM s is:
+
+- If no other flags are set than *O_RDONLY*, the device is opened
+  for data transfer, and the return value will be 0 only upon successful
+  initialization of the transfer. The call may even induce some actions
+  on the CD-ROM, such as closing the tray.
+- If the option flag *O_NONBLOCK* is set, opening will always be
+  successful, unless the whole device doesn't exist. The drive will take
+  no actions whatsoever.
+
+And what about standards?
+-------------------------
+
+You might hesitate to accept this proposal as it comes from the
+Linux community, and not from some standardizing institute. What
+about SUN, SGI, HP and all those other Unix and hardware vendors?
+Well, these companies are in the lucky position that they generally
+control both the hardware and software of their supported products,
+and are large enough to set their own standard. They do not have to
+deal with a dozen or more different, competing hardware
+configurations\ [#f3]_.
+
+.. [#f3]
+
+   Incidentally, I think that SUN's approach to mounting CD-ROM s is very
+   good in origin: under Solaris a volume-daemon automatically mounts a
+   newly inserted CD-ROM under `/cdrom/*<volume-name>*`.
+
+   In my opinion they should have pushed this
+   further and have **every** CD-ROM on the local area network be
+   mounted at the similar location, i. e., no matter in which particular
+   machine you insert a CD-ROM, it will always appear at the same
+   position in the directory tree, on every system. When I wanted to
+   implement such a user-program for Linux, I came across the
+   differences in behavior of the various drivers, and the need for an
+   *ioctl* informing about media changes.
+
+We believe that using *O_NONBLOCK* to indicate that a device is being opened
+for *ioctl* commands only can be easily introduced in the Linux
+community. All the CD-player authors will have to be informed, we can
+even send in our own patches to the programs. The use of *O_NONBLOCK*
+has most likely no influence on the behavior of the CD-players on
+other operating systems than Linux. Finally, a user can always revert
+to old behavior by a call to
+*ioctl(file_descriptor, CDROM_CLEAR_OPTIONS, CDO_USE_FFLAGS)*.
+
+The preferred strategy of *open()*
+----------------------------------
+
+The routines in `cdrom.c` are designed in such a way that run-time
+configuration of the behavior of CD-ROM devices (of **any** type)
+can be carried out, by the *CDROM_SET/CLEAR_OPTIONS* *ioctls*. Thus, various
+modes of operation can be set:
+
+`CDO_AUTO_CLOSE | CDO_USE_FFLAGS | CDO_LOCK`
+   This is the default setting. (With *CDO_CHECK_TYPE* it will be better, in
+   the future.) If the device is not yet opened by any other process, and if
+   the device is being opened for data (*O_NONBLOCK* is not set) and the
+   tray is found to be open, an attempt to close the tray is made. Then,
+   it is verified that a disc is in the drive and, if *CDO_CHECK_TYPE* is
+   set, that it contains tracks of type `data mode 1`. Only if all tests
+   are passed is the return value zero. The door is locked to prevent file
+   system corruption. If the drive is opened for audio (*O_NONBLOCK* is
+   set), no actions are taken and a value of 0 will be returned.
+
+`CDO_AUTO_CLOSE | CDO_AUTO_EJECT | CDO_LOCK`
+   This mimics the behavior of the current sbpcd-driver. The option flags are
+   ignored, the tray is closed on the first open, if necessary. Similarly,
+   the tray is opened on the last release, i. e., if a CD-ROM is unmounted,
+   it is automatically ejected, such that the user can replace it.
+
+We hope that these option can convince everybody (both driver
+maintainers and user program developers) to adopt the new CD-ROM
+driver scheme and option flag interpretation.
+
+Description of routines in `cdrom.c`
+====================================
+
+Only a few routines in `cdrom.c` are exported to the drivers. In this
+new section we will discuss these, as well as the functions that `take
+over' the CD-ROM interface to the kernel. The header file belonging
+to `cdrom.c` is called `cdrom.h`. Formerly, some of the contents of this
+file were placed in the file `ucdrom.h`, but this file has now been
+merged back into `cdrom.h`.
+
+::
+
+	struct file_operations cdrom_fops
+
+The contents of this structure were described in cdrom_api_.
+A pointer to this structure is assigned to the *fops* field
+of the *struct gendisk*.
+
+::
+
+	int register_cdrom(struct cdrom_device_info *cdi)
+
+This function is used in about the same way one registers *cdrom_fops*
+with the kernel, the device operations and information structures,
+as described in cdrom_api_, should be registered with the
+Uniform CD-ROM Driver::
+
+	register_cdrom(&<device>_info);
+
+
+This function returns zero upon success, and non-zero upon
+failure. The structure *<device>_info* should have a pointer to the
+driver's *<device>_dops*, as in::
+
+	struct cdrom_device_info <device>_info = {
+		<device>_dops;
+		...
+	}
+
+Note that a driver must have one static structure, *<device>_dops*, while
+it may have as many structures *<device>_info* as there are minor devices
+active. *Register_cdrom()* builds a linked list from these.
+
+
+::
+
+	void unregister_cdrom(struct cdrom_device_info *cdi)
+
+Unregistering device *cdi* with minor number *MINOR(cdi->dev)* removes
+the minor device from the list. If it was the last registered minor for
+the low-level driver, this disconnects the registered device-operation
+routines from the CD-ROM interface. This function returns zero upon
+success, and non-zero upon failure.
+
+::
+
+	int cdrom_open(struct inode * ip, struct file * fp)
+
+This function is not called directly by the low-level drivers, it is
+listed in the standard *cdrom_fops*. If the VFS opens a file, this
+function becomes active. A strategy is implemented in this routine,
+taking care of all capabilities and options that are set in the
+*cdrom_device_ops* connected to the device. Then, the program flow is
+transferred to the device_dependent *open()* call.
+
+::
+
+	void cdrom_release(struct inode *ip, struct file *fp)
+
+This function implements the reverse-logic of *cdrom_open()*, and then
+calls the device-dependent *release()* routine. When the use-count has
+reached 0, the allocated buffers are flushed by calls to *sync_dev(dev)*
+and *invalidate_buffers(dev)*.
+
+
+.. _cdrom_ioctl:
+
+::
+
+	int cdrom_ioctl(struct inode *ip, struct file *fp,
+			unsigned int cmd, unsigned long arg)
+
+This function handles all the standard *ioctl* requests for CD-ROM
+devices in a uniform way. The different calls fall into three
+categories: *ioctl()'s* that can be directly implemented by device
+operations, ones that are routed through the call *audio_ioctl()*, and
+the remaining ones, that are presumable device-dependent. Generally, a
+negative return value indicates an error.
+
+Directly implemented *ioctl()'s*
+--------------------------------
+
+The following `old` CD-ROM *ioctl()*\ 's are implemented by directly
+calling device-operations in *cdrom_device_ops*, if implemented and
+not masked:
+
+`CDROMMULTISESSION`
+	Requests the last session on a CD-ROM.
+`CDROMEJECT`
+	Open tray.
+`CDROMCLOSETRAY`
+	Close tray.
+`CDROMEJECT_SW`
+	If *arg\not=0*, set behavior to auto-close (close
+	tray on first open) and auto-eject (eject on last release), otherwise
+	set behavior to non-moving on *open()* and *release()* calls.
+`CDROM_GET_MCN`
+	Get the Media Catalog Number from a CD.
+
+*Ioctl*s routed through *audio_ioctl()*
+---------------------------------------
+
+The following set of *ioctl()'s* are all implemented through a call to
+the *cdrom_fops* function *audio_ioctl()*. Memory checks and
+allocation are performed in *cdrom_ioctl()*, and also sanitization of
+address format (*CDROM_LBA*/*CDROM_MSF*) is done.
+
+`CDROMSUBCHNL`
+	Get sub-channel data in argument *arg* of type
+	`struct cdrom_subchnl *`.
+`CDROMREADTOCHDR`
+	Read Table of Contents header, in *arg* of type
+	`struct cdrom_tochdr *`.
+`CDROMREADTOCENTRY`
+	Read a Table of Contents entry in *arg* and specified by *arg*
+	of type `struct cdrom_tocentry *`.
+`CDROMPLAYMSF`
+	Play audio fragment specified in Minute, Second, Frame format,
+	delimited by *arg* of type `struct cdrom_msf *`.
+`CDROMPLAYTRKIND`
+	Play audio fragment in track-index format delimited by *arg*
+	of type `struct cdrom_ti *`.
+`CDROMVOLCTRL`
+	Set volume specified by *arg* of type `struct cdrom_volctrl *`.
+`CDROMVOLREAD`
+	Read volume into by *arg* of type `struct cdrom_volctrl *`.
+`CDROMSTART`
+	Spin up disc.
+`CDROMSTOP`
+	Stop playback of audio fragment.
+`CDROMPAUSE`
+	Pause playback of audio fragment.
+`CDROMRESUME`
+	Resume playing.
+
+New *ioctl()'s* in `cdrom.c`
+----------------------------
+
+The following *ioctl()'s* have been introduced to allow user programs to
+control the behavior of individual CD-ROM devices. New *ioctl*
+commands can be identified by the underscores in their names.
+
+`CDROM_SET_OPTIONS`
+	Set options specified by *arg*. Returns the option flag register
+	after modification. Use *arg = \rm0* for reading the current flags.
+`CDROM_CLEAR_OPTIONS`
+	Clear options specified by *arg*. Returns the option flag register
+	after modification.
+`CDROM_SELECT_SPEED`
+	Select head-rate speed of disc specified as by *arg* in units
+	of standard cdrom speed (176\,kB/sec raw data or
+	150kB/sec file system data). The value 0 means `auto-select`,
+	i. e., play audio discs at real time and data discs at maximum speed.
+	The value *arg* is checked against the maximum head rate of the
+	drive found in the *cdrom_dops*.
+`CDROM_SELECT_DISC`
+	Select disc numbered *arg* from a juke-box.
+
+	First disc is numbered 0. The number *arg* is checked against the
+	maximum number of discs in the juke-box found in the *cdrom_dops*.
+`CDROM_MEDIA_CHANGED`
+	Returns 1 if a disc has been changed since the last call.
+	Note that calls to *cdrom_media_changed* by the VFS are treated
+	by an independent queue, so both mechanisms will detect a
+	media change once. For juke-boxes, an extra argument *arg*
+	specifies the slot for which the information is given. The special
+	value *CDSL_CURRENT* requests that information about the currently
+	selected slot be returned.
+`CDROM_DRIVE_STATUS`
+	Returns the status of the drive by a call to
+	*drive_status()*. Return values are defined in cdrom_drive_status_.
+	Note that this call doesn't return information on the
+	current playing activity of the drive; this can be polled through
+	an *ioctl* call to *CDROMSUBCHNL*. For juke-boxes, an extra argument
+	*arg* specifies the slot for which (possibly limited) information is
+	given. The special value *CDSL_CURRENT* requests that information
+	about the currently selected slot be returned.
+`CDROM_DISC_STATUS`
+	Returns the type of the disc currently in the drive.
+	It should be viewed as a complement to *CDROM_DRIVE_STATUS*.
+	This *ioctl* can provide *some* information about the current
+	disc that is inserted in the drive. This functionality used to be
+	implemented in the low level drivers, but is now carried out
+	entirely in Uniform CD-ROM Driver.
+
+	The history of development of the CD's use as a carrier medium for
+	various digital information has lead to many different disc types.
+	This *ioctl* is useful only in the case that CDs have \emph {only
+	one} type of data on them. While this is often the case, it is
+	also very common for CDs to have some tracks with data, and some
+	tracks with audio. Because this is an existing interface, rather
+	than fixing this interface by changing the assumptions it was made
+	under, thereby breaking all user applications that use this
+	function, the Uniform CD-ROM Driver implements this *ioctl* as
+	follows: If the CD in question has audio tracks on it, and it has
+	absolutely no CD-I, XA, or data tracks on it, it will be reported
+	as *CDS_AUDIO*. If it has both audio and data tracks, it will
+	return *CDS_MIXED*. If there are no audio tracks on the disc, and
+	if the CD in question has any CD-I tracks on it, it will be
+	reported as *CDS_XA_2_2*. Failing that, if the CD in question
+	has any XA tracks on it, it will be reported as *CDS_XA_2_1*.
+	Finally, if the CD in question has any data tracks on it,
+	it will be reported as a data CD (*CDS_DATA_1*).
+
+	This *ioctl* can return::
+
+		CDS_NO_INFO	/* no information available */
+		CDS_NO_DISC	/* no disc is inserted, or tray is opened */
+		CDS_AUDIO	/* Audio disc (2352 audio bytes/frame) */
+		CDS_DATA_1	/* data disc, mode 1 (2048 user bytes/frame) */
+		CDS_XA_2_1	/* mixed data (XA), mode 2, form 1 (2048 user bytes) */
+		CDS_XA_2_2	/* mixed data (XA), mode 2, form 1 (2324 user bytes) */
+		CDS_MIXED	/* mixed audio/data disc */
+
+	For some information concerning frame layout of the various disc
+	types, see a recent version of `cdrom.h`.
+
+`CDROM_CHANGER_NSLOTS`
+	Returns the number of slots in a juke-box.
+`CDROMRESET`
+	Reset the drive.
+`CDROM_GET_CAPABILITY`
+	Returns the *capability* flags for the drive. Refer to section
+	cdrom_capabilities_ for more information on these flags.
+`CDROM_LOCKDOOR`
+	 Locks the door of the drive. `arg == 0` unlocks the door,
+	 any other value locks it.
+`CDROM_DEBUG`
+	 Turns on debugging info. Only root is allowed to do this.
+	 Same semantics as CDROM_LOCKDOOR.
+
+
+Device dependent *ioctl()'s*
+----------------------------
+
+Finally, all other *ioctl()'s* are passed to the function *dev_ioctl()*,
+if implemented. No memory allocation or verification is carried out.
+
+How to update your driver
+=========================
+
+- Make a backup of your current driver.
+- Get hold of the files `cdrom.c` and `cdrom.h`, they should be in
+  the directory tree that came with this documentation.
+- Make sure you include `cdrom.h`.
+- Change the 3rd argument of *register_blkdev* from `&<your-drive>_fops`
+  to `&cdrom_fops`.
+- Just after that line, add the following to register with the Uniform
+  CD-ROM Driver::
+
+	register_cdrom(&<your-drive>_info);*
+
+  Similarly, add a call to *unregister_cdrom()* at the appropriate place.
+- Copy an example of the device-operations *struct* to your
+  source, e. g., from `cm206.c` *cm206_dops*, and change all
+  entries to names corresponding to your driver, or names you just
+  happen to like. If your driver doesn't support a certain function,
+  make the entry *NULL*. At the entry *capability* you should list all
+  capabilities your driver currently supports. If your driver
+  has a capability that is not listed, please send me a message.
+- Copy the *cdrom_device_info* declaration from the same example
+  driver, and modify the entries according to your needs. If your
+  driver dynamically determines the capabilities of the hardware, this
+  structure should also be declared dynamically.
+- Implement all functions in your `<device>_dops` structure,
+  according to prototypes listed in  `cdrom.h`, and specifications given
+  in cdrom_api_. Most likely you have already implemented
+  the code in a large part, and you will almost certainly need to adapt the
+  prototype and return values.
+- Rename your `<device>_ioctl()` function to *audio_ioctl* and
+  change the prototype a little. Remove entries listed in the first
+  part in cdrom_ioctl_, if your code was OK, these are
+  just calls to the routines you adapted in the previous step.
+- You may remove all remaining memory checking code in the
+  *audio_ioctl()* function that deals with audio commands (these are
+  listed in the second part of cdrom_ioctl_. There is no
+  need for memory allocation either, so most *case*s in the *switch*
+  statement look similar to::
+
+	case CDROMREADTOCENTRY:
+		get_toc_entry\bigl((struct cdrom_tocentry *) arg);
+
+- All remaining *ioctl* cases must be moved to a separate
+  function, *<device>_ioctl*, the device-dependent *ioctl()'s*. Note that
+  memory checking and allocation must be kept in this code!
+- Change the prototypes of *<device>_open()* and
+  *<device>_release()*, and remove any strategic code (i. e., tray
+  movement, door locking, etc.).
+- Try to recompile the drivers. We advise you to use modules, both
+  for `cdrom.o` and your driver, as debugging is much easier this
+  way.
+
+Thanks
+======
+
+Thanks to all the people involved. First, Erik Andersen, who has
+taken over the torch in maintaining `cdrom.c` and integrating much
+CD-ROM-related code in the 2.1-kernel. Thanks to Scott Snyder and
+Gerd Knorr, who were the first to implement this interface for SCSI
+and IDE-CD drivers and added many ideas for extension of the data
+structures relative to kernel~2.0. Further thanks to Heiko Eißfeldt,
+Thomas Quinot, Jon Tombs, Ken Pizzini, Eberhard Mönkeberg and Andrew Kroll,
+the Linux CD-ROM device driver developers who were kind
+enough to give suggestions and criticisms during the writing. Finally
+of course, I want to thank Linus Torvalds for making this possible in
+the first place.
-- 
cgit 


From 8ea618899b6b4fbe97c8462e7d769867307de011 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:40 -0300
Subject: docs: cdrom: convert docs to ReST and rename to *.rst

The stuff there is almost already at ReST format. A
conversion for them is trivial: just add a missing titles
and fix some scape codes for them to match ReST syntax.

While here, rename the cdrom-standard.txt, with was converted
from LaTeX to ReST on the previous patch, and add it to the
index file.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/cdrom/cdrom-standard.rst | 1063 ++++++++++++++++++++++++++++++++
 Documentation/cdrom/cdrom-standard.txt | 1063 --------------------------------
 Documentation/cdrom/ide-cd             |  534 ----------------
 Documentation/cdrom/ide-cd.rst         |  538 ++++++++++++++++
 Documentation/cdrom/index.rst          |   19 +
 Documentation/cdrom/packet-writing.rst |  139 +++++
 Documentation/cdrom/packet-writing.txt |  132 ----
 7 files changed, 1759 insertions(+), 1729 deletions(-)
 create mode 100644 Documentation/cdrom/cdrom-standard.rst
 delete mode 100644 Documentation/cdrom/cdrom-standard.txt
 delete mode 100644 Documentation/cdrom/ide-cd
 create mode 100644 Documentation/cdrom/ide-cd.rst
 create mode 100644 Documentation/cdrom/index.rst
 create mode 100644 Documentation/cdrom/packet-writing.rst
 delete mode 100644 Documentation/cdrom/packet-writing.txt

(limited to 'Documentation')

diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst
new file mode 100644
index 000000000000..dde4f7f7fdbf
--- /dev/null
+++ b/Documentation/cdrom/cdrom-standard.rst
@@ -0,0 +1,1063 @@
+=======================
+A Linux CD-ROM standard
+=======================
+
+:Author: David van Leeuwen <david@ElseWare.cistron.nl>
+:Date: 12 March 1999
+:Updated by: Erik Andersen (andersee@debian.org)
+:Updated by: Jens Axboe (axboe@image.dk)
+
+
+Introduction
+============
+
+Linux is probably the Unix-like operating system that supports
+the widest variety of hardware devices. The reasons for this are
+presumably
+
+- The large list of hardware devices available for the many platforms
+  that Linux now supports (i.e., i386-PCs, Sparc Suns, etc.)
+- The open design of the operating system, such that anybody can write a
+  driver for Linux.
+- There is plenty of source code around as examples of how to write a driver.
+
+The openness of Linux, and the many different types of available
+hardware has allowed Linux to support many different hardware devices.
+Unfortunately, the very openness that has allowed Linux to support
+all these different devices has also allowed the behavior of each
+device driver to differ significantly from one device to another.
+This divergence of behavior has been very significant for CD-ROM
+devices; the way a particular drive reacts to a `standard` *ioctl()*
+call varies greatly from one device driver to another. To avoid making
+their drivers totally inconsistent, the writers of Linux CD-ROM
+drivers generally created new device drivers by understanding, copying,
+and then changing an existing one. Unfortunately, this practice did not
+maintain uniform behavior across all the Linux CD-ROM drivers.
+
+This document describes an effort to establish Uniform behavior across
+all the different CD-ROM device drivers for Linux. This document also
+defines the various *ioctl()'s*, and how the low-level CD-ROM device
+drivers should implement them. Currently (as of the Linux 2.1.\ *x*
+development kernels) several low-level CD-ROM device drivers, including
+both IDE/ATAPI and SCSI, now use this Uniform interface.
+
+When the CD-ROM was developed, the interface between the CD-ROM drive
+and the computer was not specified in the standards. As a result, many
+different CD-ROM interfaces were developed. Some of them had their
+own proprietary design (Sony, Mitsumi, Panasonic, Philips), other
+manufacturers adopted an existing electrical interface and changed
+the functionality (CreativeLabs/SoundBlaster, Teac, Funai) or simply
+adapted their drives to one or more of the already existing electrical
+interfaces (Aztech, Sanyo, Funai, Vertos, Longshine, Optics Storage and
+most of the `NoName` manufacturers). In cases where a new drive really
+brought its own interface or used its own command set and flow control
+scheme, either a separate driver had to be written, or an existing
+driver had to be enhanced. History has delivered us CD-ROM support for
+many of these different interfaces. Nowadays, almost all new CD-ROM
+drives are either IDE/ATAPI or SCSI, and it is very unlikely that any
+manufacturer will create a new interface. Even finding drives for the
+old proprietary interfaces is getting difficult.
+
+When (in the 1.3.70's) I looked at the existing software interface,
+which was expressed through `cdrom.h`, it appeared to be a rather wild
+set of commands and data formats [#f1]_. It seemed that many
+features of the software interface had been added to accommodate the
+capabilities of a particular drive, in an *ad hoc* manner. More
+importantly, it appeared that the behavior of the `standard` commands
+was different for most of the different drivers: e. g., some drivers
+close the tray if an *open()* call occurs when the tray is open, while
+others do not. Some drivers lock the door upon opening the device, to
+prevent an incoherent file system, but others don't, to allow software
+ejection. Undoubtedly, the capabilities of the different drives vary,
+but even when two drives have the same capability their drivers'
+behavior was usually different.
+
+.. [#f1]
+   I cannot recollect what kernel version I looked at, then,
+   presumably 1.2.13 and 1.3.34 --- the latest kernel that I was
+   indirectly involved in.
+
+I decided to start a discussion on how to make all the Linux CD-ROM
+drivers behave more uniformly. I began by contacting the developers of
+the many CD-ROM drivers found in the Linux kernel. Their reactions
+encouraged me to write the Uniform CD-ROM Driver which this document is
+intended to describe. The implementation of the Uniform CD-ROM Driver is
+in the file `cdrom.c`. This driver is intended to be an additional software
+layer that sits on top of the low-level device drivers for each CD-ROM drive.
+By adding this additional layer, it is possible to have all the different
+CD-ROM devices behave **exactly** the same (insofar as the underlying
+hardware will allow).
+
+The goal of the Uniform CD-ROM Driver is **not** to alienate driver developers
+whohave not yet taken steps to support this effort. The goal of Uniform CD-ROM
+Driver is simply to give people writing application programs for CD-ROM drives
+**one** Linux CD-ROM interface with consistent behavior for all
+CD-ROM devices. In addition, this also provides a consistent interface
+between the low-level device driver code and the Linux kernel. Care
+is taken that 100% compatibility exists with the data structures and
+programmer's interface defined in `cdrom.h`. This guide was written to
+help CD-ROM driver developers adapt their code to use the Uniform CD-ROM
+Driver code defined in `cdrom.c`.
+
+Personally, I think that the most important hardware interfaces are
+the IDE/ATAPI drives and, of course, the SCSI drives, but as prices
+of hardware drop continuously, it is also likely that people may have
+more than one CD-ROM drive, possibly of mixed types. It is important
+that these drives behave in the same way. In December 1994, one of the
+cheapest CD-ROM drives was a Philips cm206, a double-speed proprietary
+drive. In the months that I was busy writing a Linux driver for it,
+proprietary drives became obsolete and IDE/ATAPI drives became the
+standard. At the time of the last update to this document (November
+1997) it is becoming difficult to even **find** anything less than a
+16 speed CD-ROM drive, and 24 speed drives are common.
+
+.. _cdrom_api:
+
+Standardizing through another software level
+============================================
+
+At the time this document was conceived, all drivers directly
+implemented the CD-ROM *ioctl()* calls through their own routines. This
+led to the danger of different drivers forgetting to do important things
+like checking that the user was giving the driver valid data. More
+importantly, this led to the divergence of behavior, which has already
+been discussed.
+
+For this reason, the Uniform CD-ROM Driver was created to enforce consistent
+CD-ROM drive behavior, and to provide a common set of services to the various
+low-level CD-ROM device drivers. The Uniform CD-ROM Driver now provides another
+software-level, that separates the *ioctl()* and *open()* implementation
+from the actual hardware implementation. Note that this effort has
+made few changes which will affect a user's application programs. The
+greatest change involved moving the contents of the various low-level
+CD-ROM drivers\' header files to the kernel's cdrom directory. This was
+done to help ensure that the user is only presented with only one cdrom
+interface, the interface defined in `cdrom.h`.
+
+CD-ROM drives are specific enough (i. e., different from other
+block-devices such as floppy or hard disc drives), to define a set
+of common **CD-ROM device operations**, *<cdrom-device>_dops*.
+These operations are different from the classical block-device file
+operations, *<block-device>_fops*.
+
+The routines for the Uniform CD-ROM Driver interface level are implemented
+in the file `cdrom.c`. In this file, the Uniform CD-ROM Driver interfaces
+with the kernel as a block device by registering the following general
+*struct file_operations*::
+
+	struct file_operations cdrom_fops = {
+		NULL,			/∗ lseek ∗/
+		block _read ,		/∗ read—general block-dev read ∗/
+		block _write,		/∗ write—general block-dev write ∗/
+		NULL,			/∗ readdir ∗/
+		NULL,			/∗ select ∗/
+		cdrom_ioctl,		/∗ ioctl ∗/
+		NULL,			/∗ mmap ∗/
+		cdrom_open,		/∗ open ∗/
+		cdrom_release,		/∗ release ∗/
+		NULL,			/∗ fsync ∗/
+		NULL,			/∗ fasync ∗/
+		cdrom_media_changed,	/∗ media change ∗/
+		NULL			/∗ revalidate ∗/
+	};
+
+Every active CD-ROM device shares this *struct*. The routines
+declared above are all implemented in `cdrom.c`, since this file is the
+place where the behavior of all CD-ROM-devices is defined and
+standardized. The actual interface to the various types of CD-ROM
+hardware is still performed by various low-level CD-ROM-device
+drivers. These routines simply implement certain **capabilities**
+that are common to all CD-ROM (and really, all removable-media
+devices).
+
+Registration of a low-level CD-ROM device driver is now done through
+the general routines in `cdrom.c`, not through the Virtual File System
+(VFS) any more. The interface implemented in `cdrom.c` is carried out
+through two general structures that contain information about the
+capabilities of the driver, and the specific drives on which the
+driver operates. The structures are:
+
+cdrom_device_ops
+  This structure contains information about the low-level driver for a
+  CD-ROM device. This structure is conceptually connected to the major
+  number of the device (although some drivers may have different
+  major numbers, as is the case for the IDE driver).
+
+cdrom_device_info
+  This structure contains information about a particular CD-ROM drive,
+  such as its device name, speed, etc. This structure is conceptually
+  connected to the minor number of the device.
+
+Registering a particular CD-ROM drive with the Uniform CD-ROM Driver
+is done by the low-level device driver though a call to::
+
+	register_cdrom(struct cdrom_device_info * <device>_info)
+
+The device information structure, *<device>_info*, contains all the
+information needed for the kernel to interface with the low-level
+CD-ROM device driver. One of the most important entries in this
+structure is a pointer to the *cdrom_device_ops* structure of the
+low-level driver.
+
+The device operations structure, *cdrom_device_ops*, contains a list
+of pointers to the functions which are implemented in the low-level
+device driver. When `cdrom.c` accesses a CD-ROM device, it does it
+through the functions in this structure. It is impossible to know all
+the capabilities of future CD-ROM drives, so it is expected that this
+list may need to be expanded from time to time as new technologies are
+developed. For example, CD-R and CD-R/W drives are beginning to become
+popular, and support will soon need to be added for them. For now, the
+current *struct* is::
+
+	struct cdrom_device_ops {
+		int (*open)(struct cdrom_device_info *, int)
+		void (*release)(struct cdrom_device_info *);
+		int (*drive_status)(struct cdrom_device_info *, int);
+		unsigned int (*check_events)(struct cdrom_device_info *,
+					     unsigned int, int);
+		int (*media_changed)(struct cdrom_device_info *, int);
+		int (*tray_move)(struct cdrom_device_info *, int);
+		int (*lock_door)(struct cdrom_device_info *, int);
+		int (*select_speed)(struct cdrom_device_info *, int);
+		int (*select_disc)(struct cdrom_device_info *, int);
+		int (*get_last_session) (struct cdrom_device_info *,
+					 struct cdrom_multisession *);
+		int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
+		int (*reset)(struct cdrom_device_info *);
+		int (*audio_ioctl)(struct cdrom_device_info *,
+				   unsigned int, void *);
+		const int capability;		/* capability flags */
+		int (*generic_packet)(struct cdrom_device_info *,
+				      struct packet_command *);
+	};
+
+When a low-level device driver implements one of these capabilities,
+it should add a function pointer to this *struct*. When a particular
+function is not implemented, however, this *struct* should contain a
+NULL instead. The *capability* flags specify the capabilities of the
+CD-ROM hardware and/or low-level CD-ROM driver when a CD-ROM drive
+is registered with the Uniform CD-ROM Driver.
+
+Note that most functions have fewer parameters than their
+*blkdev_fops* counterparts. This is because very little of the
+information in the structures *inode* and *file* is used. For most
+drivers, the main parameter is the *struct* *cdrom_device_info*, from
+which the major and minor number can be extracted. (Most low-level
+CD-ROM drivers don't even look at the major and minor number though,
+since many of them only support one device.) This will be available
+through *dev* in *cdrom_device_info* described below.
+
+The drive-specific, minor-like information that is registered with
+`cdrom.c`, currently contains the following fields::
+
+  struct cdrom_device_info {
+	const struct cdrom_device_ops * ops; 	/* device operations for this major */
+	struct list_head list;			/* linked list of all device_info */
+	struct gendisk * disk;			/* matching block layer disk */
+	void *  handle;				/* driver-dependent data */
+
+	int mask; 				/* mask of capability: disables them */
+	int speed;				/* maximum speed for reading data */
+	int capacity;				/* number of discs in a jukebox */
+
+	unsigned int options:30;		/* options flags */
+	unsigned mc_flags:2;			/*  media-change buffer flags */
+	unsigned int vfs_events;		/*  cached events for vfs path */
+	unsigned int ioctl_events;		/*  cached events for ioctl path */
+	int use_count;				/*  number of times device is opened */
+	char name[20];				/*  name of the device type */
+
+	__u8 sanyo_slot : 2;			/*  Sanyo 3-CD changer support */
+	__u8 keeplocked : 1;			/*  CDROM_LOCKDOOR status */
+	__u8 reserved : 5;			/*  not used yet */
+	int cdda_method;			/*  see CDDA_* flags */
+	__u8 last_sense;			/*  saves last sense key */
+	__u8 media_written;			/*  dirty flag, DVD+RW bookkeeping */
+	unsigned short mmc3_profile;		/*  current MMC3 profile */
+	int for_data;				/*  unknown:TBD */
+	int (*exit)(struct cdrom_device_info *);/*  unknown:TBD */
+	int mrw_mode_page;			/*  which MRW mode page is in use */
+  };
+
+Using this *struct*, a linked list of the registered minor devices is
+built, using the *next* field. The device number, the device operations
+struct and specifications of properties of the drive are stored in this
+structure.
+
+The *mask* flags can be used to mask out some of the capabilities listed
+in *ops->capability*, if a specific drive doesn't support a feature
+of the driver. The value *speed* specifies the maximum head-rate of the
+drive, measured in units of normal audio speed (176kB/sec raw data or
+150kB/sec file system data). The parameters are declared *const*
+because they describe properties of the drive, which don't change after
+registration.
+
+A few registers contain variables local to the CD-ROM drive. The
+flags *options* are used to specify how the general CD-ROM routines
+should behave. These various flags registers should provide enough
+flexibility to adapt to the different users' wishes (and **not** the
+`arbitrary` wishes of the author of the low-level device driver, as is
+the case in the old scheme). The register *mc_flags* is used to buffer
+the information from *media_changed()* to two separate queues. Other
+data that is specific to a minor drive, can be accessed through *handle*,
+which can point to a data structure specific to the low-level driver.
+The fields *use_count*, *next*, *options* and *mc_flags* need not be
+initialized.
+
+The intermediate software layer that `cdrom.c` forms will perform some
+additional bookkeeping. The use count of the device (the number of
+processes that have the device opened) is registered in *use_count*. The
+function *cdrom_ioctl()* will verify the appropriate user-memory regions
+for read and write, and in case a location on the CD is transferred,
+it will `sanitize` the format by making requests to the low-level
+drivers in a standard format, and translating all formats between the
+user-software and low level drivers. This relieves much of the drivers'
+memory checking and format checking and translation. Also, the necessary
+structures will be declared on the program stack.
+
+The implementation of the functions should be as defined in the
+following sections. Two functions **must** be implemented, namely
+*open()* and *release()*. Other functions may be omitted, their
+corresponding capability flags will be cleared upon registration.
+Generally, a function returns zero on success and negative on error. A
+function call should return only after the command has completed, but of
+course waiting for the device should not use processor time.
+
+::
+
+	int open(struct cdrom_device_info *cdi, int purpose)
+
+*Open()* should try to open the device for a specific *purpose*, which
+can be either:
+
+- Open for reading data, as done by `mount()` (2), or the
+  user commands `dd` or `cat`.
+- Open for *ioctl* commands, as done by audio-CD playing programs.
+
+Notice that any strategic code (closing tray upon *open()*, etc.) is
+done by the calling routine in `cdrom.c`, so the low-level routine
+should only be concerned with proper initialization, such as spinning
+up the disc, etc.
+
+::
+
+	void release(struct cdrom_device_info *cdi)
+
+Device-specific actions should be taken such as spinning down the device.
+However, strategic actions such as ejection of the tray, or unlocking
+the door, should be left over to the general routine *cdrom_release()*.
+This is the only function returning type *void*.
+
+.. _cdrom_drive_status:
+
+::
+
+	int drive_status(struct cdrom_device_info *cdi, int slot_nr)
+
+The function *drive_status*, if implemented, should provide
+information on the status of the drive (not the status of the disc,
+which may or may not be in the drive). If the drive is not a changer,
+*slot_nr* should be ignored. In `cdrom.h` the possibilities are listed::
+
+
+	CDS_NO_INFO		/* no information available */
+	CDS_NO_DISC		/* no disc is inserted, tray is closed */
+	CDS_TRAY_OPEN		/* tray is opened */
+	CDS_DRIVE_NOT_READY	/* something is wrong, tray is moving? */
+	CDS_DISC_OK		/* a disc is loaded and everything is fine */
+
+::
+
+	int media_changed(struct cdrom_device_info *cdi, int disc_nr)
+
+This function is very similar to the original function in $struct
+file_operations*. It returns 1 if the medium of the device *cdi->dev*
+has changed since the last call, and 0 otherwise. The parameter
+*disc_nr* identifies a specific slot in a juke-box, it should be
+ignored for single-disc drives. Note that by `re-routing` this
+function through *cdrom_media_changed()*, we can implement separate
+queues for the VFS and a new *ioctl()* function that can report device
+changes to software (e. g., an auto-mounting daemon).
+
+::
+
+	int tray_move(struct cdrom_device_info *cdi, int position)
+
+This function, if implemented, should control the tray movement. (No
+other function should control this.) The parameter *position* controls
+the desired direction of movement:
+
+- 0 Close tray
+- 1 Open tray
+
+This function returns 0 upon success, and a non-zero value upon
+error. Note that if the tray is already in the desired position, no
+action need be taken, and the return value should be 0.
+
+::
+
+	int lock_door(struct cdrom_device_info *cdi, int lock)
+
+This function (and no other code) controls locking of the door, if the
+drive allows this. The value of *lock* controls the desired locking
+state:
+
+- 0 Unlock door, manual opening is allowed
+- 1 Lock door, tray cannot be ejected manually
+
+This function returns 0 upon success, and a non-zero value upon
+error. Note that if the door is already in the requested state, no
+action need be taken, and the return value should be 0.
+
+::
+
+	int select_speed(struct cdrom_device_info *cdi, int speed)
+
+Some CD-ROM drives are capable of changing their head-speed. There
+are several reasons for changing the speed of a CD-ROM drive. Badly
+pressed CD-ROM s may benefit from less-than-maximum head rate. Modern
+CD-ROM drives can obtain very high head rates (up to *24x* is
+common). It has been reported that these drives can make reading
+errors at these high speeds, reducing the speed can prevent data loss
+in these circumstances. Finally, some of these drives can
+make an annoyingly loud noise, which a lower speed may reduce.
+
+This function specifies the speed at which data is read or audio is
+played back. The value of *speed* specifies the head-speed of the
+drive, measured in units of standard cdrom speed (176kB/sec raw data
+or 150kB/sec file system data). So to request that a CD-ROM drive
+operate at 300kB/sec you would call the CDROM_SELECT_SPEED *ioctl*
+with *speed=2*. The special value `0` means `auto-selection`, i. e.,
+maximum data-rate or real-time audio rate. If the drive doesn't have
+this `auto-selection` capability, the decision should be made on the
+current disc loaded and the return value should be positive. A negative
+return value indicates an error.
+
+::
+
+	int select_disc(struct cdrom_device_info *cdi, int number)
+
+If the drive can store multiple discs (a juke-box) this function
+will perform disc selection. It should return the number of the
+selected disc on success, a negative value on error. Currently, only
+the ide-cd driver supports this functionality.
+
+::
+
+	int get_last_session(struct cdrom_device_info *cdi,
+			     struct cdrom_multisession *ms_info)
+
+This function should implement the old corresponding *ioctl()*. For
+device *cdi->dev*, the start of the last session of the current disc
+should be returned in the pointer argument *ms_info*. Note that
+routines in `cdrom.c` have sanitized this argument: its requested
+format will **always** be of the type *CDROM_LBA* (linear block
+addressing mode), whatever the calling software requested. But
+sanitization goes even further: the low-level implementation may
+return the requested information in *CDROM_MSF* format if it wishes so
+(setting the *ms_info->addr_format* field appropriately, of
+course) and the routines in `cdrom.c` will make the transformation if
+necessary. The return value is 0 upon success.
+
+::
+
+	int get_mcn(struct cdrom_device_info *cdi,
+		    struct cdrom_mcn *mcn)
+
+Some discs carry a `Media Catalog Number` (MCN), also called
+`Universal Product Code` (UPC). This number should reflect the number
+that is generally found in the bar-code on the product. Unfortunately,
+the few discs that carry such a number on the disc don't even use the
+same format. The return argument to this function is a pointer to a
+pre-declared memory region of type *struct cdrom_mcn*. The MCN is
+expected as a 13-character string, terminated by a null-character.
+
+::
+
+	int reset(struct cdrom_device_info *cdi)
+
+This call should perform a hard-reset on the drive (although in
+circumstances that a hard-reset is necessary, a drive may very well not
+listen to commands anymore). Preferably, control is returned to the
+caller only after the drive has finished resetting. If the drive is no
+longer listening, it may be wise for the underlying low-level cdrom
+driver to time out.
+
+::
+
+	int audio_ioctl(struct cdrom_device_info *cdi,
+			unsigned int cmd, void *arg)
+
+Some of the CD-ROM-\ *ioctl()*\ 's defined in `cdrom.h` can be
+implemented by the routines described above, and hence the function
+*cdrom_ioctl* will use those. However, most *ioctl()*\ 's deal with
+audio-control. We have decided to leave these to be accessed through a
+single function, repeating the arguments *cmd* and *arg*. Note that
+the latter is of type *void*, rather than *unsigned long int*.
+The routine *cdrom_ioctl()* does do some useful things,
+though. It sanitizes the address format type to *CDROM_MSF* (Minutes,
+Seconds, Frames) for all audio calls. It also verifies the memory
+location of *arg*, and reserves stack-memory for the argument. This
+makes implementation of the *audio_ioctl()* much simpler than in the
+old driver scheme. For example, you may look up the function
+*cm206_audio_ioctl()* `cm206.c` that should be updated with
+this documentation.
+
+An unimplemented ioctl should return *-ENOSYS*, but a harmless request
+(e. g., *CDROMSTART*) may be ignored by returning 0 (success). Other
+errors should be according to the standards, whatever they are. When
+an error is returned by the low-level driver, the Uniform CD-ROM Driver
+tries whenever possible to return the error code to the calling program.
+(We may decide to sanitize the return value in *cdrom_ioctl()* though, in
+order to guarantee a uniform interface to the audio-player software.)
+
+::
+
+	int dev_ioctl(struct cdrom_device_info *cdi,
+		      unsigned int cmd, unsigned long arg)
+
+Some *ioctl()'s* seem to be specific to certain CD-ROM drives. That is,
+they are introduced to service some capabilities of certain drives. In
+fact, there are 6 different *ioctl()'s* for reading data, either in some
+particular kind of format, or audio data. Not many drives support
+reading audio tracks as data, I believe this is because of protection
+of copyrights of artists. Moreover, I think that if audio-tracks are
+supported, it should be done through the VFS and not via *ioctl()'s*. A
+problem here could be the fact that audio-frames are 2352 bytes long,
+so either the audio-file-system should ask for 75264 bytes at once
+(the least common multiple of 512 and 2352), or the drivers should
+bend their backs to cope with this incoherence (to which I would be
+opposed). Furthermore, it is very difficult for the hardware to find
+the exact frame boundaries, since there are no synchronization headers
+in audio frames. Once these issues are resolved, this code should be
+standardized in `cdrom.c`.
+
+Because there are so many *ioctl()'s* that seem to be introduced to
+satisfy certain drivers [#f2]_, any non-standard *ioctl()*\ s
+are routed through the call *dev_ioctl()*. In principle, `private`
+*ioctl()*\ 's should be numbered after the device's major number, and not
+the general CD-ROM *ioctl* number, `0x53`. Currently the
+non-supported *ioctl()'s* are:
+
+	CDROMREADMODE1, CDROMREADMODE2, CDROMREADAUDIO, CDROMREADRAW,
+	CDROMREADCOOKED, CDROMSEEK, CDROMPLAY-BLK and CDROM-READALL
+
+.. [#f2]
+
+   Is there software around that actually uses these? I'd be interested!
+
+.. _cdrom_capabilities:
+
+CD-ROM capabilities
+-------------------
+
+Instead of just implementing some *ioctl* calls, the interface in
+`cdrom.c` supplies the possibility to indicate the **capabilities**
+of a CD-ROM drive. This can be done by ORing any number of
+capability-constants that are defined in `cdrom.h` at the registration
+phase. Currently, the capabilities are any of::
+
+	CDC_CLOSE_TRAY		/* can close tray by software control */
+	CDC_OPEN_TRAY		/* can open tray */
+	CDC_LOCK		/* can lock and unlock the door */
+	CDC_SELECT_SPEED	/* can select speed, in units of * sim*150 ,kB/s */
+	CDC_SELECT_DISC		/* drive is juke-box */
+	CDC_MULTI_SESSION	/* can read sessions *> rm1* */
+	CDC_MCN			/* can read Media Catalog Number */
+	CDC_MEDIA_CHANGED	/* can report if disc has changed */
+	CDC_PLAY_AUDIO		/* can perform audio-functions (play, pause, etc) */
+	CDC_RESET		/* hard reset device */
+	CDC_IOCTLS		/* driver has non-standard ioctls */
+	CDC_DRIVE_STATUS	/* driver implements drive status */
+
+The capability flag is declared *const*, to prevent drivers from
+accidentally tampering with the contents. The capability fags actually
+inform `cdrom.c` of what the driver can do. If the drive found
+by the driver does not have the capability, is can be masked out by
+the *cdrom_device_info* variable *mask*. For instance, the SCSI CD-ROM
+driver has implemented the code for loading and ejecting CD-ROM's, and
+hence its corresponding flags in *capability* will be set. But a SCSI
+CD-ROM drive might be a caddy system, which can't load the tray, and
+hence for this drive the *cdrom_device_info* struct will have set
+the *CDC_CLOSE_TRAY* bit in *mask*.
+
+In the file `cdrom.c` you will encounter many constructions of the type::
+
+	if (cdo->capability & ∼cdi->mask & CDC _⟨capability⟩) ...
+
+There is no *ioctl* to set the mask... The reason is that
+I think it is better to control the **behavior** rather than the
+**capabilities**.
+
+Options
+-------
+
+A final flag register controls the **behavior** of the CD-ROM
+drives, in order to satisfy different users' wishes, hopefully
+independently of the ideas of the respective author who happened to
+have made the drive's support available to the Linux community. The
+current behavior options are::
+
+	CDO_AUTO_CLOSE	/* try to close tray upon device open() */
+	CDO_AUTO_EJECT	/* try to open tray on last device close() */
+	CDO_USE_FFLAGS	/* use file_pointer->f_flags to indicate purpose for open() */
+	CDO_LOCK	/* try to lock door if device is opened */
+	CDO_CHECK_TYPE	/* ensure disc type is data if opened for data */
+
+The initial value of this register is
+`CDO_AUTO_CLOSE | CDO_USE_FFLAGS | CDO_LOCK`, reflecting my own view on user
+interface and software standards. Before you protest, there are two
+new *ioctl()'s* implemented in `cdrom.c`, that allow you to control the
+behavior by software. These are::
+
+	CDROM_SET_OPTIONS	/* set options specified in (int)arg */
+	CDROM_CLEAR_OPTIONS	/* clear options specified in (int)arg */
+
+One option needs some more explanation: *CDO_USE_FFLAGS*. In the next
+newsection we explain what the need for this option is.
+
+A software package `setcd`, available from the Debian distribution
+and `sunsite.unc.edu`, allows user level control of these flags.
+
+
+The need to know the purpose of opening the CD-ROM device
+=========================================================
+
+Traditionally, Unix devices can be used in two different `modes`,
+either by reading/writing to the device file, or by issuing
+controlling commands to the device, by the device's *ioctl()*
+call. The problem with CD-ROM drives, is that they can be used for
+two entirely different purposes. One is to mount removable
+file systems, CD-ROM's, the other is to play audio CD's. Audio commands
+are implemented entirely through *ioctl()\'s*, presumably because the
+first implementation (SUN?) has been such. In principle there is
+nothing wrong with this, but a good control of the `CD player` demands
+that the device can **always** be opened in order to give the
+*ioctl* commands, regardless of the state the drive is in.
+
+On the other hand, when used as a removable-media disc drive (what the
+original purpose of CD-ROM s is) we would like to make sure that the
+disc drive is ready for operation upon opening the device. In the old
+scheme, some CD-ROM drivers don't do any integrity checking, resulting
+in a number of i/o errors reported by the VFS to the kernel when an
+attempt for mounting a CD-ROM on an empty drive occurs. This is not a
+particularly elegant way to find out that there is no CD-ROM inserted;
+it more-or-less looks like the old IBM-PC trying to read an empty floppy
+drive for a couple of seconds, after which the system complains it
+can't read from it. Nowadays we can **sense** the existence of a
+removable medium in a drive, and we believe we should exploit that
+fact. An integrity check on opening of the device, that verifies the
+availability of a CD-ROM and its correct type (data), would be
+desirable.
+
+These two ways of using a CD-ROM drive, principally for data and
+secondarily for playing audio discs, have different demands for the
+behavior of the *open()* call. Audio use simply wants to open the
+device in order to get a file handle which is needed for issuing
+*ioctl* commands, while data use wants to open for correct and
+reliable data transfer. The only way user programs can indicate what
+their *purpose* of opening the device is, is through the *flags*
+parameter (see `open(2)`). For CD-ROM devices, these flags aren't
+implemented (some drivers implement checking for write-related flags,
+but this is not strictly necessary if the device file has correct
+permission flags). Most option flags simply don't make sense to
+CD-ROM devices: *O_CREAT*, *O_NOCTTY*, *O_TRUNC*, *O_APPEND*, and
+*O_SYNC* have no meaning to a CD-ROM.
+
+We therefore propose to use the flag *O_NONBLOCK* to indicate
+that the device is opened just for issuing *ioctl*
+commands. Strictly, the meaning of *O_NONBLOCK* is that opening and
+subsequent calls to the device don't cause the calling process to
+wait. We could interpret this as don't wait until someone has
+inserted some valid data-CD-ROM. Thus, our proposal of the
+implementation for the *open()* call for CD-ROM s is:
+
+- If no other flags are set than *O_RDONLY*, the device is opened
+  for data transfer, and the return value will be 0 only upon successful
+  initialization of the transfer. The call may even induce some actions
+  on the CD-ROM, such as closing the tray.
+- If the option flag *O_NONBLOCK* is set, opening will always be
+  successful, unless the whole device doesn't exist. The drive will take
+  no actions whatsoever.
+
+And what about standards?
+-------------------------
+
+You might hesitate to accept this proposal as it comes from the
+Linux community, and not from some standardizing institute. What
+about SUN, SGI, HP and all those other Unix and hardware vendors?
+Well, these companies are in the lucky position that they generally
+control both the hardware and software of their supported products,
+and are large enough to set their own standard. They do not have to
+deal with a dozen or more different, competing hardware
+configurations\ [#f3]_.
+
+.. [#f3]
+
+   Incidentally, I think that SUN's approach to mounting CD-ROM s is very
+   good in origin: under Solaris a volume-daemon automatically mounts a
+   newly inserted CD-ROM under `/cdrom/*<volume-name>*`.
+
+   In my opinion they should have pushed this
+   further and have **every** CD-ROM on the local area network be
+   mounted at the similar location, i. e., no matter in which particular
+   machine you insert a CD-ROM, it will always appear at the same
+   position in the directory tree, on every system. When I wanted to
+   implement such a user-program for Linux, I came across the
+   differences in behavior of the various drivers, and the need for an
+   *ioctl* informing about media changes.
+
+We believe that using *O_NONBLOCK* to indicate that a device is being opened
+for *ioctl* commands only can be easily introduced in the Linux
+community. All the CD-player authors will have to be informed, we can
+even send in our own patches to the programs. The use of *O_NONBLOCK*
+has most likely no influence on the behavior of the CD-players on
+other operating systems than Linux. Finally, a user can always revert
+to old behavior by a call to
+*ioctl(file_descriptor, CDROM_CLEAR_OPTIONS, CDO_USE_FFLAGS)*.
+
+The preferred strategy of *open()*
+----------------------------------
+
+The routines in `cdrom.c` are designed in such a way that run-time
+configuration of the behavior of CD-ROM devices (of **any** type)
+can be carried out, by the *CDROM_SET/CLEAR_OPTIONS* *ioctls*. Thus, various
+modes of operation can be set:
+
+`CDO_AUTO_CLOSE | CDO_USE_FFLAGS | CDO_LOCK`
+   This is the default setting. (With *CDO_CHECK_TYPE* it will be better, in
+   the future.) If the device is not yet opened by any other process, and if
+   the device is being opened for data (*O_NONBLOCK* is not set) and the
+   tray is found to be open, an attempt to close the tray is made. Then,
+   it is verified that a disc is in the drive and, if *CDO_CHECK_TYPE* is
+   set, that it contains tracks of type `data mode 1`. Only if all tests
+   are passed is the return value zero. The door is locked to prevent file
+   system corruption. If the drive is opened for audio (*O_NONBLOCK* is
+   set), no actions are taken and a value of 0 will be returned.
+
+`CDO_AUTO_CLOSE | CDO_AUTO_EJECT | CDO_LOCK`
+   This mimics the behavior of the current sbpcd-driver. The option flags are
+   ignored, the tray is closed on the first open, if necessary. Similarly,
+   the tray is opened on the last release, i. e., if a CD-ROM is unmounted,
+   it is automatically ejected, such that the user can replace it.
+
+We hope that these option can convince everybody (both driver
+maintainers and user program developers) to adopt the new CD-ROM
+driver scheme and option flag interpretation.
+
+Description of routines in `cdrom.c`
+====================================
+
+Only a few routines in `cdrom.c` are exported to the drivers. In this
+new section we will discuss these, as well as the functions that `take
+over' the CD-ROM interface to the kernel. The header file belonging
+to `cdrom.c` is called `cdrom.h`. Formerly, some of the contents of this
+file were placed in the file `ucdrom.h`, but this file has now been
+merged back into `cdrom.h`.
+
+::
+
+	struct file_operations cdrom_fops
+
+The contents of this structure were described in cdrom_api_.
+A pointer to this structure is assigned to the *fops* field
+of the *struct gendisk*.
+
+::
+
+	int register_cdrom(struct cdrom_device_info *cdi)
+
+This function is used in about the same way one registers *cdrom_fops*
+with the kernel, the device operations and information structures,
+as described in cdrom_api_, should be registered with the
+Uniform CD-ROM Driver::
+
+	register_cdrom(&<device>_info);
+
+
+This function returns zero upon success, and non-zero upon
+failure. The structure *<device>_info* should have a pointer to the
+driver's *<device>_dops*, as in::
+
+	struct cdrom_device_info <device>_info = {
+		<device>_dops;
+		...
+	}
+
+Note that a driver must have one static structure, *<device>_dops*, while
+it may have as many structures *<device>_info* as there are minor devices
+active. *Register_cdrom()* builds a linked list from these.
+
+
+::
+
+	void unregister_cdrom(struct cdrom_device_info *cdi)
+
+Unregistering device *cdi* with minor number *MINOR(cdi->dev)* removes
+the minor device from the list. If it was the last registered minor for
+the low-level driver, this disconnects the registered device-operation
+routines from the CD-ROM interface. This function returns zero upon
+success, and non-zero upon failure.
+
+::
+
+	int cdrom_open(struct inode * ip, struct file * fp)
+
+This function is not called directly by the low-level drivers, it is
+listed in the standard *cdrom_fops*. If the VFS opens a file, this
+function becomes active. A strategy is implemented in this routine,
+taking care of all capabilities and options that are set in the
+*cdrom_device_ops* connected to the device. Then, the program flow is
+transferred to the device_dependent *open()* call.
+
+::
+
+	void cdrom_release(struct inode *ip, struct file *fp)
+
+This function implements the reverse-logic of *cdrom_open()*, and then
+calls the device-dependent *release()* routine. When the use-count has
+reached 0, the allocated buffers are flushed by calls to *sync_dev(dev)*
+and *invalidate_buffers(dev)*.
+
+
+.. _cdrom_ioctl:
+
+::
+
+	int cdrom_ioctl(struct inode *ip, struct file *fp,
+			unsigned int cmd, unsigned long arg)
+
+This function handles all the standard *ioctl* requests for CD-ROM
+devices in a uniform way. The different calls fall into three
+categories: *ioctl()'s* that can be directly implemented by device
+operations, ones that are routed through the call *audio_ioctl()*, and
+the remaining ones, that are presumable device-dependent. Generally, a
+negative return value indicates an error.
+
+Directly implemented *ioctl()'s*
+--------------------------------
+
+The following `old` CD-ROM *ioctl()*\ 's are implemented by directly
+calling device-operations in *cdrom_device_ops*, if implemented and
+not masked:
+
+`CDROMMULTISESSION`
+	Requests the last session on a CD-ROM.
+`CDROMEJECT`
+	Open tray.
+`CDROMCLOSETRAY`
+	Close tray.
+`CDROMEJECT_SW`
+	If *arg\not=0*, set behavior to auto-close (close
+	tray on first open) and auto-eject (eject on last release), otherwise
+	set behavior to non-moving on *open()* and *release()* calls.
+`CDROM_GET_MCN`
+	Get the Media Catalog Number from a CD.
+
+*Ioctl*s routed through *audio_ioctl()*
+---------------------------------------
+
+The following set of *ioctl()'s* are all implemented through a call to
+the *cdrom_fops* function *audio_ioctl()*. Memory checks and
+allocation are performed in *cdrom_ioctl()*, and also sanitization of
+address format (*CDROM_LBA*/*CDROM_MSF*) is done.
+
+`CDROMSUBCHNL`
+	Get sub-channel data in argument *arg* of type
+	`struct cdrom_subchnl *`.
+`CDROMREADTOCHDR`
+	Read Table of Contents header, in *arg* of type
+	`struct cdrom_tochdr *`.
+`CDROMREADTOCENTRY`
+	Read a Table of Contents entry in *arg* and specified by *arg*
+	of type `struct cdrom_tocentry *`.
+`CDROMPLAYMSF`
+	Play audio fragment specified in Minute, Second, Frame format,
+	delimited by *arg* of type `struct cdrom_msf *`.
+`CDROMPLAYTRKIND`
+	Play audio fragment in track-index format delimited by *arg*
+	of type `struct cdrom_ti *`.
+`CDROMVOLCTRL`
+	Set volume specified by *arg* of type `struct cdrom_volctrl *`.
+`CDROMVOLREAD`
+	Read volume into by *arg* of type `struct cdrom_volctrl *`.
+`CDROMSTART`
+	Spin up disc.
+`CDROMSTOP`
+	Stop playback of audio fragment.
+`CDROMPAUSE`
+	Pause playback of audio fragment.
+`CDROMRESUME`
+	Resume playing.
+
+New *ioctl()'s* in `cdrom.c`
+----------------------------
+
+The following *ioctl()'s* have been introduced to allow user programs to
+control the behavior of individual CD-ROM devices. New *ioctl*
+commands can be identified by the underscores in their names.
+
+`CDROM_SET_OPTIONS`
+	Set options specified by *arg*. Returns the option flag register
+	after modification. Use *arg = \rm0* for reading the current flags.
+`CDROM_CLEAR_OPTIONS`
+	Clear options specified by *arg*. Returns the option flag register
+	after modification.
+`CDROM_SELECT_SPEED`
+	Select head-rate speed of disc specified as by *arg* in units
+	of standard cdrom speed (176\,kB/sec raw data or
+	150kB/sec file system data). The value 0 means `auto-select`,
+	i. e., play audio discs at real time and data discs at maximum speed.
+	The value *arg* is checked against the maximum head rate of the
+	drive found in the *cdrom_dops*.
+`CDROM_SELECT_DISC`
+	Select disc numbered *arg* from a juke-box.
+
+	First disc is numbered 0. The number *arg* is checked against the
+	maximum number of discs in the juke-box found in the *cdrom_dops*.
+`CDROM_MEDIA_CHANGED`
+	Returns 1 if a disc has been changed since the last call.
+	Note that calls to *cdrom_media_changed* by the VFS are treated
+	by an independent queue, so both mechanisms will detect a
+	media change once. For juke-boxes, an extra argument *arg*
+	specifies the slot for which the information is given. The special
+	value *CDSL_CURRENT* requests that information about the currently
+	selected slot be returned.
+`CDROM_DRIVE_STATUS`
+	Returns the status of the drive by a call to
+	*drive_status()*. Return values are defined in cdrom_drive_status_.
+	Note that this call doesn't return information on the
+	current playing activity of the drive; this can be polled through
+	an *ioctl* call to *CDROMSUBCHNL*. For juke-boxes, an extra argument
+	*arg* specifies the slot for which (possibly limited) information is
+	given. The special value *CDSL_CURRENT* requests that information
+	about the currently selected slot be returned.
+`CDROM_DISC_STATUS`
+	Returns the type of the disc currently in the drive.
+	It should be viewed as a complement to *CDROM_DRIVE_STATUS*.
+	This *ioctl* can provide *some* information about the current
+	disc that is inserted in the drive. This functionality used to be
+	implemented in the low level drivers, but is now carried out
+	entirely in Uniform CD-ROM Driver.
+
+	The history of development of the CD's use as a carrier medium for
+	various digital information has lead to many different disc types.
+	This *ioctl* is useful only in the case that CDs have \emph {only
+	one} type of data on them. While this is often the case, it is
+	also very common for CDs to have some tracks with data, and some
+	tracks with audio. Because this is an existing interface, rather
+	than fixing this interface by changing the assumptions it was made
+	under, thereby breaking all user applications that use this
+	function, the Uniform CD-ROM Driver implements this *ioctl* as
+	follows: If the CD in question has audio tracks on it, and it has
+	absolutely no CD-I, XA, or data tracks on it, it will be reported
+	as *CDS_AUDIO*. If it has both audio and data tracks, it will
+	return *CDS_MIXED*. If there are no audio tracks on the disc, and
+	if the CD in question has any CD-I tracks on it, it will be
+	reported as *CDS_XA_2_2*. Failing that, if the CD in question
+	has any XA tracks on it, it will be reported as *CDS_XA_2_1*.
+	Finally, if the CD in question has any data tracks on it,
+	it will be reported as a data CD (*CDS_DATA_1*).
+
+	This *ioctl* can return::
+
+		CDS_NO_INFO	/* no information available */
+		CDS_NO_DISC	/* no disc is inserted, or tray is opened */
+		CDS_AUDIO	/* Audio disc (2352 audio bytes/frame) */
+		CDS_DATA_1	/* data disc, mode 1 (2048 user bytes/frame) */
+		CDS_XA_2_1	/* mixed data (XA), mode 2, form 1 (2048 user bytes) */
+		CDS_XA_2_2	/* mixed data (XA), mode 2, form 1 (2324 user bytes) */
+		CDS_MIXED	/* mixed audio/data disc */
+
+	For some information concerning frame layout of the various disc
+	types, see a recent version of `cdrom.h`.
+
+`CDROM_CHANGER_NSLOTS`
+	Returns the number of slots in a juke-box.
+`CDROMRESET`
+	Reset the drive.
+`CDROM_GET_CAPABILITY`
+	Returns the *capability* flags for the drive. Refer to section
+	cdrom_capabilities_ for more information on these flags.
+`CDROM_LOCKDOOR`
+	 Locks the door of the drive. `arg == 0` unlocks the door,
+	 any other value locks it.
+`CDROM_DEBUG`
+	 Turns on debugging info. Only root is allowed to do this.
+	 Same semantics as CDROM_LOCKDOOR.
+
+
+Device dependent *ioctl()'s*
+----------------------------
+
+Finally, all other *ioctl()'s* are passed to the function *dev_ioctl()*,
+if implemented. No memory allocation or verification is carried out.
+
+How to update your driver
+=========================
+
+- Make a backup of your current driver.
+- Get hold of the files `cdrom.c` and `cdrom.h`, they should be in
+  the directory tree that came with this documentation.
+- Make sure you include `cdrom.h`.
+- Change the 3rd argument of *register_blkdev* from `&<your-drive>_fops`
+  to `&cdrom_fops`.
+- Just after that line, add the following to register with the Uniform
+  CD-ROM Driver::
+
+	register_cdrom(&<your-drive>_info);*
+
+  Similarly, add a call to *unregister_cdrom()* at the appropriate place.
+- Copy an example of the device-operations *struct* to your
+  source, e. g., from `cm206.c` *cm206_dops*, and change all
+  entries to names corresponding to your driver, or names you just
+  happen to like. If your driver doesn't support a certain function,
+  make the entry *NULL*. At the entry *capability* you should list all
+  capabilities your driver currently supports. If your driver
+  has a capability that is not listed, please send me a message.
+- Copy the *cdrom_device_info* declaration from the same example
+  driver, and modify the entries according to your needs. If your
+  driver dynamically determines the capabilities of the hardware, this
+  structure should also be declared dynamically.
+- Implement all functions in your `<device>_dops` structure,
+  according to prototypes listed in  `cdrom.h`, and specifications given
+  in cdrom_api_. Most likely you have already implemented
+  the code in a large part, and you will almost certainly need to adapt the
+  prototype and return values.
+- Rename your `<device>_ioctl()` function to *audio_ioctl* and
+  change the prototype a little. Remove entries listed in the first
+  part in cdrom_ioctl_, if your code was OK, these are
+  just calls to the routines you adapted in the previous step.
+- You may remove all remaining memory checking code in the
+  *audio_ioctl()* function that deals with audio commands (these are
+  listed in the second part of cdrom_ioctl_. There is no
+  need for memory allocation either, so most *case*s in the *switch*
+  statement look similar to::
+
+	case CDROMREADTOCENTRY:
+		get_toc_entry\bigl((struct cdrom_tocentry *) arg);
+
+- All remaining *ioctl* cases must be moved to a separate
+  function, *<device>_ioctl*, the device-dependent *ioctl()'s*. Note that
+  memory checking and allocation must be kept in this code!
+- Change the prototypes of *<device>_open()* and
+  *<device>_release()*, and remove any strategic code (i. e., tray
+  movement, door locking, etc.).
+- Try to recompile the drivers. We advise you to use modules, both
+  for `cdrom.o` and your driver, as debugging is much easier this
+  way.
+
+Thanks
+======
+
+Thanks to all the people involved. First, Erik Andersen, who has
+taken over the torch in maintaining `cdrom.c` and integrating much
+CD-ROM-related code in the 2.1-kernel. Thanks to Scott Snyder and
+Gerd Knorr, who were the first to implement this interface for SCSI
+and IDE-CD drivers and added many ideas for extension of the data
+structures relative to kernel~2.0. Further thanks to Heiko Eißfeldt,
+Thomas Quinot, Jon Tombs, Ken Pizzini, Eberhard Mönkeberg and Andrew Kroll,
+the Linux CD-ROM device driver developers who were kind
+enough to give suggestions and criticisms during the writing. Finally
+of course, I want to thank Linus Torvalds for making this possible in
+the first place.
diff --git a/Documentation/cdrom/cdrom-standard.txt b/Documentation/cdrom/cdrom-standard.txt
deleted file mode 100644
index dde4f7f7fdbf..000000000000
--- a/Documentation/cdrom/cdrom-standard.txt
+++ /dev/null
@@ -1,1063 +0,0 @@
-=======================
-A Linux CD-ROM standard
-=======================
-
-:Author: David van Leeuwen <david@ElseWare.cistron.nl>
-:Date: 12 March 1999
-:Updated by: Erik Andersen (andersee@debian.org)
-:Updated by: Jens Axboe (axboe@image.dk)
-
-
-Introduction
-============
-
-Linux is probably the Unix-like operating system that supports
-the widest variety of hardware devices. The reasons for this are
-presumably
-
-- The large list of hardware devices available for the many platforms
-  that Linux now supports (i.e., i386-PCs, Sparc Suns, etc.)
-- The open design of the operating system, such that anybody can write a
-  driver for Linux.
-- There is plenty of source code around as examples of how to write a driver.
-
-The openness of Linux, and the many different types of available
-hardware has allowed Linux to support many different hardware devices.
-Unfortunately, the very openness that has allowed Linux to support
-all these different devices has also allowed the behavior of each
-device driver to differ significantly from one device to another.
-This divergence of behavior has been very significant for CD-ROM
-devices; the way a particular drive reacts to a `standard` *ioctl()*
-call varies greatly from one device driver to another. To avoid making
-their drivers totally inconsistent, the writers of Linux CD-ROM
-drivers generally created new device drivers by understanding, copying,
-and then changing an existing one. Unfortunately, this practice did not
-maintain uniform behavior across all the Linux CD-ROM drivers.
-
-This document describes an effort to establish Uniform behavior across
-all the different CD-ROM device drivers for Linux. This document also
-defines the various *ioctl()'s*, and how the low-level CD-ROM device
-drivers should implement them. Currently (as of the Linux 2.1.\ *x*
-development kernels) several low-level CD-ROM device drivers, including
-both IDE/ATAPI and SCSI, now use this Uniform interface.
-
-When the CD-ROM was developed, the interface between the CD-ROM drive
-and the computer was not specified in the standards. As a result, many
-different CD-ROM interfaces were developed. Some of them had their
-own proprietary design (Sony, Mitsumi, Panasonic, Philips), other
-manufacturers adopted an existing electrical interface and changed
-the functionality (CreativeLabs/SoundBlaster, Teac, Funai) or simply
-adapted their drives to one or more of the already existing electrical
-interfaces (Aztech, Sanyo, Funai, Vertos, Longshine, Optics Storage and
-most of the `NoName` manufacturers). In cases where a new drive really
-brought its own interface or used its own command set and flow control
-scheme, either a separate driver had to be written, or an existing
-driver had to be enhanced. History has delivered us CD-ROM support for
-many of these different interfaces. Nowadays, almost all new CD-ROM
-drives are either IDE/ATAPI or SCSI, and it is very unlikely that any
-manufacturer will create a new interface. Even finding drives for the
-old proprietary interfaces is getting difficult.
-
-When (in the 1.3.70's) I looked at the existing software interface,
-which was expressed through `cdrom.h`, it appeared to be a rather wild
-set of commands and data formats [#f1]_. It seemed that many
-features of the software interface had been added to accommodate the
-capabilities of a particular drive, in an *ad hoc* manner. More
-importantly, it appeared that the behavior of the `standard` commands
-was different for most of the different drivers: e. g., some drivers
-close the tray if an *open()* call occurs when the tray is open, while
-others do not. Some drivers lock the door upon opening the device, to
-prevent an incoherent file system, but others don't, to allow software
-ejection. Undoubtedly, the capabilities of the different drives vary,
-but even when two drives have the same capability their drivers'
-behavior was usually different.
-
-.. [#f1]
-   I cannot recollect what kernel version I looked at, then,
-   presumably 1.2.13 and 1.3.34 --- the latest kernel that I was
-   indirectly involved in.
-
-I decided to start a discussion on how to make all the Linux CD-ROM
-drivers behave more uniformly. I began by contacting the developers of
-the many CD-ROM drivers found in the Linux kernel. Their reactions
-encouraged me to write the Uniform CD-ROM Driver which this document is
-intended to describe. The implementation of the Uniform CD-ROM Driver is
-in the file `cdrom.c`. This driver is intended to be an additional software
-layer that sits on top of the low-level device drivers for each CD-ROM drive.
-By adding this additional layer, it is possible to have all the different
-CD-ROM devices behave **exactly** the same (insofar as the underlying
-hardware will allow).
-
-The goal of the Uniform CD-ROM Driver is **not** to alienate driver developers
-whohave not yet taken steps to support this effort. The goal of Uniform CD-ROM
-Driver is simply to give people writing application programs for CD-ROM drives
-**one** Linux CD-ROM interface with consistent behavior for all
-CD-ROM devices. In addition, this also provides a consistent interface
-between the low-level device driver code and the Linux kernel. Care
-is taken that 100% compatibility exists with the data structures and
-programmer's interface defined in `cdrom.h`. This guide was written to
-help CD-ROM driver developers adapt their code to use the Uniform CD-ROM
-Driver code defined in `cdrom.c`.
-
-Personally, I think that the most important hardware interfaces are
-the IDE/ATAPI drives and, of course, the SCSI drives, but as prices
-of hardware drop continuously, it is also likely that people may have
-more than one CD-ROM drive, possibly of mixed types. It is important
-that these drives behave in the same way. In December 1994, one of the
-cheapest CD-ROM drives was a Philips cm206, a double-speed proprietary
-drive. In the months that I was busy writing a Linux driver for it,
-proprietary drives became obsolete and IDE/ATAPI drives became the
-standard. At the time of the last update to this document (November
-1997) it is becoming difficult to even **find** anything less than a
-16 speed CD-ROM drive, and 24 speed drives are common.
-
-.. _cdrom_api:
-
-Standardizing through another software level
-============================================
-
-At the time this document was conceived, all drivers directly
-implemented the CD-ROM *ioctl()* calls through their own routines. This
-led to the danger of different drivers forgetting to do important things
-like checking that the user was giving the driver valid data. More
-importantly, this led to the divergence of behavior, which has already
-been discussed.
-
-For this reason, the Uniform CD-ROM Driver was created to enforce consistent
-CD-ROM drive behavior, and to provide a common set of services to the various
-low-level CD-ROM device drivers. The Uniform CD-ROM Driver now provides another
-software-level, that separates the *ioctl()* and *open()* implementation
-from the actual hardware implementation. Note that this effort has
-made few changes which will affect a user's application programs. The
-greatest change involved moving the contents of the various low-level
-CD-ROM drivers\' header files to the kernel's cdrom directory. This was
-done to help ensure that the user is only presented with only one cdrom
-interface, the interface defined in `cdrom.h`.
-
-CD-ROM drives are specific enough (i. e., different from other
-block-devices such as floppy or hard disc drives), to define a set
-of common **CD-ROM device operations**, *<cdrom-device>_dops*.
-These operations are different from the classical block-device file
-operations, *<block-device>_fops*.
-
-The routines for the Uniform CD-ROM Driver interface level are implemented
-in the file `cdrom.c`. In this file, the Uniform CD-ROM Driver interfaces
-with the kernel as a block device by registering the following general
-*struct file_operations*::
-
-	struct file_operations cdrom_fops = {
-		NULL,			/∗ lseek ∗/
-		block _read ,		/∗ read—general block-dev read ∗/
-		block _write,		/∗ write—general block-dev write ∗/
-		NULL,			/∗ readdir ∗/
-		NULL,			/∗ select ∗/
-		cdrom_ioctl,		/∗ ioctl ∗/
-		NULL,			/∗ mmap ∗/
-		cdrom_open,		/∗ open ∗/
-		cdrom_release,		/∗ release ∗/
-		NULL,			/∗ fsync ∗/
-		NULL,			/∗ fasync ∗/
-		cdrom_media_changed,	/∗ media change ∗/
-		NULL			/∗ revalidate ∗/
-	};
-
-Every active CD-ROM device shares this *struct*. The routines
-declared above are all implemented in `cdrom.c`, since this file is the
-place where the behavior of all CD-ROM-devices is defined and
-standardized. The actual interface to the various types of CD-ROM
-hardware is still performed by various low-level CD-ROM-device
-drivers. These routines simply implement certain **capabilities**
-that are common to all CD-ROM (and really, all removable-media
-devices).
-
-Registration of a low-level CD-ROM device driver is now done through
-the general routines in `cdrom.c`, not through the Virtual File System
-(VFS) any more. The interface implemented in `cdrom.c` is carried out
-through two general structures that contain information about the
-capabilities of the driver, and the specific drives on which the
-driver operates. The structures are:
-
-cdrom_device_ops
-  This structure contains information about the low-level driver for a
-  CD-ROM device. This structure is conceptually connected to the major
-  number of the device (although some drivers may have different
-  major numbers, as is the case for the IDE driver).
-
-cdrom_device_info
-  This structure contains information about a particular CD-ROM drive,
-  such as its device name, speed, etc. This structure is conceptually
-  connected to the minor number of the device.
-
-Registering a particular CD-ROM drive with the Uniform CD-ROM Driver
-is done by the low-level device driver though a call to::
-
-	register_cdrom(struct cdrom_device_info * <device>_info)
-
-The device information structure, *<device>_info*, contains all the
-information needed for the kernel to interface with the low-level
-CD-ROM device driver. One of the most important entries in this
-structure is a pointer to the *cdrom_device_ops* structure of the
-low-level driver.
-
-The device operations structure, *cdrom_device_ops*, contains a list
-of pointers to the functions which are implemented in the low-level
-device driver. When `cdrom.c` accesses a CD-ROM device, it does it
-through the functions in this structure. It is impossible to know all
-the capabilities of future CD-ROM drives, so it is expected that this
-list may need to be expanded from time to time as new technologies are
-developed. For example, CD-R and CD-R/W drives are beginning to become
-popular, and support will soon need to be added for them. For now, the
-current *struct* is::
-
-	struct cdrom_device_ops {
-		int (*open)(struct cdrom_device_info *, int)
-		void (*release)(struct cdrom_device_info *);
-		int (*drive_status)(struct cdrom_device_info *, int);
-		unsigned int (*check_events)(struct cdrom_device_info *,
-					     unsigned int, int);
-		int (*media_changed)(struct cdrom_device_info *, int);
-		int (*tray_move)(struct cdrom_device_info *, int);
-		int (*lock_door)(struct cdrom_device_info *, int);
-		int (*select_speed)(struct cdrom_device_info *, int);
-		int (*select_disc)(struct cdrom_device_info *, int);
-		int (*get_last_session) (struct cdrom_device_info *,
-					 struct cdrom_multisession *);
-		int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
-		int (*reset)(struct cdrom_device_info *);
-		int (*audio_ioctl)(struct cdrom_device_info *,
-				   unsigned int, void *);
-		const int capability;		/* capability flags */
-		int (*generic_packet)(struct cdrom_device_info *,
-				      struct packet_command *);
-	};
-
-When a low-level device driver implements one of these capabilities,
-it should add a function pointer to this *struct*. When a particular
-function is not implemented, however, this *struct* should contain a
-NULL instead. The *capability* flags specify the capabilities of the
-CD-ROM hardware and/or low-level CD-ROM driver when a CD-ROM drive
-is registered with the Uniform CD-ROM Driver.
-
-Note that most functions have fewer parameters than their
-*blkdev_fops* counterparts. This is because very little of the
-information in the structures *inode* and *file* is used. For most
-drivers, the main parameter is the *struct* *cdrom_device_info*, from
-which the major and minor number can be extracted. (Most low-level
-CD-ROM drivers don't even look at the major and minor number though,
-since many of them only support one device.) This will be available
-through *dev* in *cdrom_device_info* described below.
-
-The drive-specific, minor-like information that is registered with
-`cdrom.c`, currently contains the following fields::
-
-  struct cdrom_device_info {
-	const struct cdrom_device_ops * ops; 	/* device operations for this major */
-	struct list_head list;			/* linked list of all device_info */
-	struct gendisk * disk;			/* matching block layer disk */
-	void *  handle;				/* driver-dependent data */
-
-	int mask; 				/* mask of capability: disables them */
-	int speed;				/* maximum speed for reading data */
-	int capacity;				/* number of discs in a jukebox */
-
-	unsigned int options:30;		/* options flags */
-	unsigned mc_flags:2;			/*  media-change buffer flags */
-	unsigned int vfs_events;		/*  cached events for vfs path */
-	unsigned int ioctl_events;		/*  cached events for ioctl path */
-	int use_count;				/*  number of times device is opened */
-	char name[20];				/*  name of the device type */
-
-	__u8 sanyo_slot : 2;			/*  Sanyo 3-CD changer support */
-	__u8 keeplocked : 1;			/*  CDROM_LOCKDOOR status */
-	__u8 reserved : 5;			/*  not used yet */
-	int cdda_method;			/*  see CDDA_* flags */
-	__u8 last_sense;			/*  saves last sense key */
-	__u8 media_written;			/*  dirty flag, DVD+RW bookkeeping */
-	unsigned short mmc3_profile;		/*  current MMC3 profile */
-	int for_data;				/*  unknown:TBD */
-	int (*exit)(struct cdrom_device_info *);/*  unknown:TBD */
-	int mrw_mode_page;			/*  which MRW mode page is in use */
-  };
-
-Using this *struct*, a linked list of the registered minor devices is
-built, using the *next* field. The device number, the device operations
-struct and specifications of properties of the drive are stored in this
-structure.
-
-The *mask* flags can be used to mask out some of the capabilities listed
-in *ops->capability*, if a specific drive doesn't support a feature
-of the driver. The value *speed* specifies the maximum head-rate of the
-drive, measured in units of normal audio speed (176kB/sec raw data or
-150kB/sec file system data). The parameters are declared *const*
-because they describe properties of the drive, which don't change after
-registration.
-
-A few registers contain variables local to the CD-ROM drive. The
-flags *options* are used to specify how the general CD-ROM routines
-should behave. These various flags registers should provide enough
-flexibility to adapt to the different users' wishes (and **not** the
-`arbitrary` wishes of the author of the low-level device driver, as is
-the case in the old scheme). The register *mc_flags* is used to buffer
-the information from *media_changed()* to two separate queues. Other
-data that is specific to a minor drive, can be accessed through *handle*,
-which can point to a data structure specific to the low-level driver.
-The fields *use_count*, *next*, *options* and *mc_flags* need not be
-initialized.
-
-The intermediate software layer that `cdrom.c` forms will perform some
-additional bookkeeping. The use count of the device (the number of
-processes that have the device opened) is registered in *use_count*. The
-function *cdrom_ioctl()* will verify the appropriate user-memory regions
-for read and write, and in case a location on the CD is transferred,
-it will `sanitize` the format by making requests to the low-level
-drivers in a standard format, and translating all formats between the
-user-software and low level drivers. This relieves much of the drivers'
-memory checking and format checking and translation. Also, the necessary
-structures will be declared on the program stack.
-
-The implementation of the functions should be as defined in the
-following sections. Two functions **must** be implemented, namely
-*open()* and *release()*. Other functions may be omitted, their
-corresponding capability flags will be cleared upon registration.
-Generally, a function returns zero on success and negative on error. A
-function call should return only after the command has completed, but of
-course waiting for the device should not use processor time.
-
-::
-
-	int open(struct cdrom_device_info *cdi, int purpose)
-
-*Open()* should try to open the device for a specific *purpose*, which
-can be either:
-
-- Open for reading data, as done by `mount()` (2), or the
-  user commands `dd` or `cat`.
-- Open for *ioctl* commands, as done by audio-CD playing programs.
-
-Notice that any strategic code (closing tray upon *open()*, etc.) is
-done by the calling routine in `cdrom.c`, so the low-level routine
-should only be concerned with proper initialization, such as spinning
-up the disc, etc.
-
-::
-
-	void release(struct cdrom_device_info *cdi)
-
-Device-specific actions should be taken such as spinning down the device.
-However, strategic actions such as ejection of the tray, or unlocking
-the door, should be left over to the general routine *cdrom_release()*.
-This is the only function returning type *void*.
-
-.. _cdrom_drive_status:
-
-::
-
-	int drive_status(struct cdrom_device_info *cdi, int slot_nr)
-
-The function *drive_status*, if implemented, should provide
-information on the status of the drive (not the status of the disc,
-which may or may not be in the drive). If the drive is not a changer,
-*slot_nr* should be ignored. In `cdrom.h` the possibilities are listed::
-
-
-	CDS_NO_INFO		/* no information available */
-	CDS_NO_DISC		/* no disc is inserted, tray is closed */
-	CDS_TRAY_OPEN		/* tray is opened */
-	CDS_DRIVE_NOT_READY	/* something is wrong, tray is moving? */
-	CDS_DISC_OK		/* a disc is loaded and everything is fine */
-
-::
-
-	int media_changed(struct cdrom_device_info *cdi, int disc_nr)
-
-This function is very similar to the original function in $struct
-file_operations*. It returns 1 if the medium of the device *cdi->dev*
-has changed since the last call, and 0 otherwise. The parameter
-*disc_nr* identifies a specific slot in a juke-box, it should be
-ignored for single-disc drives. Note that by `re-routing` this
-function through *cdrom_media_changed()*, we can implement separate
-queues for the VFS and a new *ioctl()* function that can report device
-changes to software (e. g., an auto-mounting daemon).
-
-::
-
-	int tray_move(struct cdrom_device_info *cdi, int position)
-
-This function, if implemented, should control the tray movement. (No
-other function should control this.) The parameter *position* controls
-the desired direction of movement:
-
-- 0 Close tray
-- 1 Open tray
-
-This function returns 0 upon success, and a non-zero value upon
-error. Note that if the tray is already in the desired position, no
-action need be taken, and the return value should be 0.
-
-::
-
-	int lock_door(struct cdrom_device_info *cdi, int lock)
-
-This function (and no other code) controls locking of the door, if the
-drive allows this. The value of *lock* controls the desired locking
-state:
-
-- 0 Unlock door, manual opening is allowed
-- 1 Lock door, tray cannot be ejected manually
-
-This function returns 0 upon success, and a non-zero value upon
-error. Note that if the door is already in the requested state, no
-action need be taken, and the return value should be 0.
-
-::
-
-	int select_speed(struct cdrom_device_info *cdi, int speed)
-
-Some CD-ROM drives are capable of changing their head-speed. There
-are several reasons for changing the speed of a CD-ROM drive. Badly
-pressed CD-ROM s may benefit from less-than-maximum head rate. Modern
-CD-ROM drives can obtain very high head rates (up to *24x* is
-common). It has been reported that these drives can make reading
-errors at these high speeds, reducing the speed can prevent data loss
-in these circumstances. Finally, some of these drives can
-make an annoyingly loud noise, which a lower speed may reduce.
-
-This function specifies the speed at which data is read or audio is
-played back. The value of *speed* specifies the head-speed of the
-drive, measured in units of standard cdrom speed (176kB/sec raw data
-or 150kB/sec file system data). So to request that a CD-ROM drive
-operate at 300kB/sec you would call the CDROM_SELECT_SPEED *ioctl*
-with *speed=2*. The special value `0` means `auto-selection`, i. e.,
-maximum data-rate or real-time audio rate. If the drive doesn't have
-this `auto-selection` capability, the decision should be made on the
-current disc loaded and the return value should be positive. A negative
-return value indicates an error.
-
-::
-
-	int select_disc(struct cdrom_device_info *cdi, int number)
-
-If the drive can store multiple discs (a juke-box) this function
-will perform disc selection. It should return the number of the
-selected disc on success, a negative value on error. Currently, only
-the ide-cd driver supports this functionality.
-
-::
-
-	int get_last_session(struct cdrom_device_info *cdi,
-			     struct cdrom_multisession *ms_info)
-
-This function should implement the old corresponding *ioctl()*. For
-device *cdi->dev*, the start of the last session of the current disc
-should be returned in the pointer argument *ms_info*. Note that
-routines in `cdrom.c` have sanitized this argument: its requested
-format will **always** be of the type *CDROM_LBA* (linear block
-addressing mode), whatever the calling software requested. But
-sanitization goes even further: the low-level implementation may
-return the requested information in *CDROM_MSF* format if it wishes so
-(setting the *ms_info->addr_format* field appropriately, of
-course) and the routines in `cdrom.c` will make the transformation if
-necessary. The return value is 0 upon success.
-
-::
-
-	int get_mcn(struct cdrom_device_info *cdi,
-		    struct cdrom_mcn *mcn)
-
-Some discs carry a `Media Catalog Number` (MCN), also called
-`Universal Product Code` (UPC). This number should reflect the number
-that is generally found in the bar-code on the product. Unfortunately,
-the few discs that carry such a number on the disc don't even use the
-same format. The return argument to this function is a pointer to a
-pre-declared memory region of type *struct cdrom_mcn*. The MCN is
-expected as a 13-character string, terminated by a null-character.
-
-::
-
-	int reset(struct cdrom_device_info *cdi)
-
-This call should perform a hard-reset on the drive (although in
-circumstances that a hard-reset is necessary, a drive may very well not
-listen to commands anymore). Preferably, control is returned to the
-caller only after the drive has finished resetting. If the drive is no
-longer listening, it may be wise for the underlying low-level cdrom
-driver to time out.
-
-::
-
-	int audio_ioctl(struct cdrom_device_info *cdi,
-			unsigned int cmd, void *arg)
-
-Some of the CD-ROM-\ *ioctl()*\ 's defined in `cdrom.h` can be
-implemented by the routines described above, and hence the function
-*cdrom_ioctl* will use those. However, most *ioctl()*\ 's deal with
-audio-control. We have decided to leave these to be accessed through a
-single function, repeating the arguments *cmd* and *arg*. Note that
-the latter is of type *void*, rather than *unsigned long int*.
-The routine *cdrom_ioctl()* does do some useful things,
-though. It sanitizes the address format type to *CDROM_MSF* (Minutes,
-Seconds, Frames) for all audio calls. It also verifies the memory
-location of *arg*, and reserves stack-memory for the argument. This
-makes implementation of the *audio_ioctl()* much simpler than in the
-old driver scheme. For example, you may look up the function
-*cm206_audio_ioctl()* `cm206.c` that should be updated with
-this documentation.
-
-An unimplemented ioctl should return *-ENOSYS*, but a harmless request
-(e. g., *CDROMSTART*) may be ignored by returning 0 (success). Other
-errors should be according to the standards, whatever they are. When
-an error is returned by the low-level driver, the Uniform CD-ROM Driver
-tries whenever possible to return the error code to the calling program.
-(We may decide to sanitize the return value in *cdrom_ioctl()* though, in
-order to guarantee a uniform interface to the audio-player software.)
-
-::
-
-	int dev_ioctl(struct cdrom_device_info *cdi,
-		      unsigned int cmd, unsigned long arg)
-
-Some *ioctl()'s* seem to be specific to certain CD-ROM drives. That is,
-they are introduced to service some capabilities of certain drives. In
-fact, there are 6 different *ioctl()'s* for reading data, either in some
-particular kind of format, or audio data. Not many drives support
-reading audio tracks as data, I believe this is because of protection
-of copyrights of artists. Moreover, I think that if audio-tracks are
-supported, it should be done through the VFS and not via *ioctl()'s*. A
-problem here could be the fact that audio-frames are 2352 bytes long,
-so either the audio-file-system should ask for 75264 bytes at once
-(the least common multiple of 512 and 2352), or the drivers should
-bend their backs to cope with this incoherence (to which I would be
-opposed). Furthermore, it is very difficult for the hardware to find
-the exact frame boundaries, since there are no synchronization headers
-in audio frames. Once these issues are resolved, this code should be
-standardized in `cdrom.c`.
-
-Because there are so many *ioctl()'s* that seem to be introduced to
-satisfy certain drivers [#f2]_, any non-standard *ioctl()*\ s
-are routed through the call *dev_ioctl()*. In principle, `private`
-*ioctl()*\ 's should be numbered after the device's major number, and not
-the general CD-ROM *ioctl* number, `0x53`. Currently the
-non-supported *ioctl()'s* are:
-
-	CDROMREADMODE1, CDROMREADMODE2, CDROMREADAUDIO, CDROMREADRAW,
-	CDROMREADCOOKED, CDROMSEEK, CDROMPLAY-BLK and CDROM-READALL
-
-.. [#f2]
-
-   Is there software around that actually uses these? I'd be interested!
-
-.. _cdrom_capabilities:
-
-CD-ROM capabilities
--------------------
-
-Instead of just implementing some *ioctl* calls, the interface in
-`cdrom.c` supplies the possibility to indicate the **capabilities**
-of a CD-ROM drive. This can be done by ORing any number of
-capability-constants that are defined in `cdrom.h` at the registration
-phase. Currently, the capabilities are any of::
-
-	CDC_CLOSE_TRAY		/* can close tray by software control */
-	CDC_OPEN_TRAY		/* can open tray */
-	CDC_LOCK		/* can lock and unlock the door */
-	CDC_SELECT_SPEED	/* can select speed, in units of * sim*150 ,kB/s */
-	CDC_SELECT_DISC		/* drive is juke-box */
-	CDC_MULTI_SESSION	/* can read sessions *> rm1* */
-	CDC_MCN			/* can read Media Catalog Number */
-	CDC_MEDIA_CHANGED	/* can report if disc has changed */
-	CDC_PLAY_AUDIO		/* can perform audio-functions (play, pause, etc) */
-	CDC_RESET		/* hard reset device */
-	CDC_IOCTLS		/* driver has non-standard ioctls */
-	CDC_DRIVE_STATUS	/* driver implements drive status */
-
-The capability flag is declared *const*, to prevent drivers from
-accidentally tampering with the contents. The capability fags actually
-inform `cdrom.c` of what the driver can do. If the drive found
-by the driver does not have the capability, is can be masked out by
-the *cdrom_device_info* variable *mask*. For instance, the SCSI CD-ROM
-driver has implemented the code for loading and ejecting CD-ROM's, and
-hence its corresponding flags in *capability* will be set. But a SCSI
-CD-ROM drive might be a caddy system, which can't load the tray, and
-hence for this drive the *cdrom_device_info* struct will have set
-the *CDC_CLOSE_TRAY* bit in *mask*.
-
-In the file `cdrom.c` you will encounter many constructions of the type::
-
-	if (cdo->capability & ∼cdi->mask & CDC _⟨capability⟩) ...
-
-There is no *ioctl* to set the mask... The reason is that
-I think it is better to control the **behavior** rather than the
-**capabilities**.
-
-Options
--------
-
-A final flag register controls the **behavior** of the CD-ROM
-drives, in order to satisfy different users' wishes, hopefully
-independently of the ideas of the respective author who happened to
-have made the drive's support available to the Linux community. The
-current behavior options are::
-
-	CDO_AUTO_CLOSE	/* try to close tray upon device open() */
-	CDO_AUTO_EJECT	/* try to open tray on last device close() */
-	CDO_USE_FFLAGS	/* use file_pointer->f_flags to indicate purpose for open() */
-	CDO_LOCK	/* try to lock door if device is opened */
-	CDO_CHECK_TYPE	/* ensure disc type is data if opened for data */
-
-The initial value of this register is
-`CDO_AUTO_CLOSE | CDO_USE_FFLAGS | CDO_LOCK`, reflecting my own view on user
-interface and software standards. Before you protest, there are two
-new *ioctl()'s* implemented in `cdrom.c`, that allow you to control the
-behavior by software. These are::
-
-	CDROM_SET_OPTIONS	/* set options specified in (int)arg */
-	CDROM_CLEAR_OPTIONS	/* clear options specified in (int)arg */
-
-One option needs some more explanation: *CDO_USE_FFLAGS*. In the next
-newsection we explain what the need for this option is.
-
-A software package `setcd`, available from the Debian distribution
-and `sunsite.unc.edu`, allows user level control of these flags.
-
-
-The need to know the purpose of opening the CD-ROM device
-=========================================================
-
-Traditionally, Unix devices can be used in two different `modes`,
-either by reading/writing to the device file, or by issuing
-controlling commands to the device, by the device's *ioctl()*
-call. The problem with CD-ROM drives, is that they can be used for
-two entirely different purposes. One is to mount removable
-file systems, CD-ROM's, the other is to play audio CD's. Audio commands
-are implemented entirely through *ioctl()\'s*, presumably because the
-first implementation (SUN?) has been such. In principle there is
-nothing wrong with this, but a good control of the `CD player` demands
-that the device can **always** be opened in order to give the
-*ioctl* commands, regardless of the state the drive is in.
-
-On the other hand, when used as a removable-media disc drive (what the
-original purpose of CD-ROM s is) we would like to make sure that the
-disc drive is ready for operation upon opening the device. In the old
-scheme, some CD-ROM drivers don't do any integrity checking, resulting
-in a number of i/o errors reported by the VFS to the kernel when an
-attempt for mounting a CD-ROM on an empty drive occurs. This is not a
-particularly elegant way to find out that there is no CD-ROM inserted;
-it more-or-less looks like the old IBM-PC trying to read an empty floppy
-drive for a couple of seconds, after which the system complains it
-can't read from it. Nowadays we can **sense** the existence of a
-removable medium in a drive, and we believe we should exploit that
-fact. An integrity check on opening of the device, that verifies the
-availability of a CD-ROM and its correct type (data), would be
-desirable.
-
-These two ways of using a CD-ROM drive, principally for data and
-secondarily for playing audio discs, have different demands for the
-behavior of the *open()* call. Audio use simply wants to open the
-device in order to get a file handle which is needed for issuing
-*ioctl* commands, while data use wants to open for correct and
-reliable data transfer. The only way user programs can indicate what
-their *purpose* of opening the device is, is through the *flags*
-parameter (see `open(2)`). For CD-ROM devices, these flags aren't
-implemented (some drivers implement checking for write-related flags,
-but this is not strictly necessary if the device file has correct
-permission flags). Most option flags simply don't make sense to
-CD-ROM devices: *O_CREAT*, *O_NOCTTY*, *O_TRUNC*, *O_APPEND*, and
-*O_SYNC* have no meaning to a CD-ROM.
-
-We therefore propose to use the flag *O_NONBLOCK* to indicate
-that the device is opened just for issuing *ioctl*
-commands. Strictly, the meaning of *O_NONBLOCK* is that opening and
-subsequent calls to the device don't cause the calling process to
-wait. We could interpret this as don't wait until someone has
-inserted some valid data-CD-ROM. Thus, our proposal of the
-implementation for the *open()* call for CD-ROM s is:
-
-- If no other flags are set than *O_RDONLY*, the device is opened
-  for data transfer, and the return value will be 0 only upon successful
-  initialization of the transfer. The call may even induce some actions
-  on the CD-ROM, such as closing the tray.
-- If the option flag *O_NONBLOCK* is set, opening will always be
-  successful, unless the whole device doesn't exist. The drive will take
-  no actions whatsoever.
-
-And what about standards?
--------------------------
-
-You might hesitate to accept this proposal as it comes from the
-Linux community, and not from some standardizing institute. What
-about SUN, SGI, HP and all those other Unix and hardware vendors?
-Well, these companies are in the lucky position that they generally
-control both the hardware and software of their supported products,
-and are large enough to set their own standard. They do not have to
-deal with a dozen or more different, competing hardware
-configurations\ [#f3]_.
-
-.. [#f3]
-
-   Incidentally, I think that SUN's approach to mounting CD-ROM s is very
-   good in origin: under Solaris a volume-daemon automatically mounts a
-   newly inserted CD-ROM under `/cdrom/*<volume-name>*`.
-
-   In my opinion they should have pushed this
-   further and have **every** CD-ROM on the local area network be
-   mounted at the similar location, i. e., no matter in which particular
-   machine you insert a CD-ROM, it will always appear at the same
-   position in the directory tree, on every system. When I wanted to
-   implement such a user-program for Linux, I came across the
-   differences in behavior of the various drivers, and the need for an
-   *ioctl* informing about media changes.
-
-We believe that using *O_NONBLOCK* to indicate that a device is being opened
-for *ioctl* commands only can be easily introduced in the Linux
-community. All the CD-player authors will have to be informed, we can
-even send in our own patches to the programs. The use of *O_NONBLOCK*
-has most likely no influence on the behavior of the CD-players on
-other operating systems than Linux. Finally, a user can always revert
-to old behavior by a call to
-*ioctl(file_descriptor, CDROM_CLEAR_OPTIONS, CDO_USE_FFLAGS)*.
-
-The preferred strategy of *open()*
-----------------------------------
-
-The routines in `cdrom.c` are designed in such a way that run-time
-configuration of the behavior of CD-ROM devices (of **any** type)
-can be carried out, by the *CDROM_SET/CLEAR_OPTIONS* *ioctls*. Thus, various
-modes of operation can be set:
-
-`CDO_AUTO_CLOSE | CDO_USE_FFLAGS | CDO_LOCK`
-   This is the default setting. (With *CDO_CHECK_TYPE* it will be better, in
-   the future.) If the device is not yet opened by any other process, and if
-   the device is being opened for data (*O_NONBLOCK* is not set) and the
-   tray is found to be open, an attempt to close the tray is made. Then,
-   it is verified that a disc is in the drive and, if *CDO_CHECK_TYPE* is
-   set, that it contains tracks of type `data mode 1`. Only if all tests
-   are passed is the return value zero. The door is locked to prevent file
-   system corruption. If the drive is opened for audio (*O_NONBLOCK* is
-   set), no actions are taken and a value of 0 will be returned.
-
-`CDO_AUTO_CLOSE | CDO_AUTO_EJECT | CDO_LOCK`
-   This mimics the behavior of the current sbpcd-driver. The option flags are
-   ignored, the tray is closed on the first open, if necessary. Similarly,
-   the tray is opened on the last release, i. e., if a CD-ROM is unmounted,
-   it is automatically ejected, such that the user can replace it.
-
-We hope that these option can convince everybody (both driver
-maintainers and user program developers) to adopt the new CD-ROM
-driver scheme and option flag interpretation.
-
-Description of routines in `cdrom.c`
-====================================
-
-Only a few routines in `cdrom.c` are exported to the drivers. In this
-new section we will discuss these, as well as the functions that `take
-over' the CD-ROM interface to the kernel. The header file belonging
-to `cdrom.c` is called `cdrom.h`. Formerly, some of the contents of this
-file were placed in the file `ucdrom.h`, but this file has now been
-merged back into `cdrom.h`.
-
-::
-
-	struct file_operations cdrom_fops
-
-The contents of this structure were described in cdrom_api_.
-A pointer to this structure is assigned to the *fops* field
-of the *struct gendisk*.
-
-::
-
-	int register_cdrom(struct cdrom_device_info *cdi)
-
-This function is used in about the same way one registers *cdrom_fops*
-with the kernel, the device operations and information structures,
-as described in cdrom_api_, should be registered with the
-Uniform CD-ROM Driver::
-
-	register_cdrom(&<device>_info);
-
-
-This function returns zero upon success, and non-zero upon
-failure. The structure *<device>_info* should have a pointer to the
-driver's *<device>_dops*, as in::
-
-	struct cdrom_device_info <device>_info = {
-		<device>_dops;
-		...
-	}
-
-Note that a driver must have one static structure, *<device>_dops*, while
-it may have as many structures *<device>_info* as there are minor devices
-active. *Register_cdrom()* builds a linked list from these.
-
-
-::
-
-	void unregister_cdrom(struct cdrom_device_info *cdi)
-
-Unregistering device *cdi* with minor number *MINOR(cdi->dev)* removes
-the minor device from the list. If it was the last registered minor for
-the low-level driver, this disconnects the registered device-operation
-routines from the CD-ROM interface. This function returns zero upon
-success, and non-zero upon failure.
-
-::
-
-	int cdrom_open(struct inode * ip, struct file * fp)
-
-This function is not called directly by the low-level drivers, it is
-listed in the standard *cdrom_fops*. If the VFS opens a file, this
-function becomes active. A strategy is implemented in this routine,
-taking care of all capabilities and options that are set in the
-*cdrom_device_ops* connected to the device. Then, the program flow is
-transferred to the device_dependent *open()* call.
-
-::
-
-	void cdrom_release(struct inode *ip, struct file *fp)
-
-This function implements the reverse-logic of *cdrom_open()*, and then
-calls the device-dependent *release()* routine. When the use-count has
-reached 0, the allocated buffers are flushed by calls to *sync_dev(dev)*
-and *invalidate_buffers(dev)*.
-
-
-.. _cdrom_ioctl:
-
-::
-
-	int cdrom_ioctl(struct inode *ip, struct file *fp,
-			unsigned int cmd, unsigned long arg)
-
-This function handles all the standard *ioctl* requests for CD-ROM
-devices in a uniform way. The different calls fall into three
-categories: *ioctl()'s* that can be directly implemented by device
-operations, ones that are routed through the call *audio_ioctl()*, and
-the remaining ones, that are presumable device-dependent. Generally, a
-negative return value indicates an error.
-
-Directly implemented *ioctl()'s*
---------------------------------
-
-The following `old` CD-ROM *ioctl()*\ 's are implemented by directly
-calling device-operations in *cdrom_device_ops*, if implemented and
-not masked:
-
-`CDROMMULTISESSION`
-	Requests the last session on a CD-ROM.
-`CDROMEJECT`
-	Open tray.
-`CDROMCLOSETRAY`
-	Close tray.
-`CDROMEJECT_SW`
-	If *arg\not=0*, set behavior to auto-close (close
-	tray on first open) and auto-eject (eject on last release), otherwise
-	set behavior to non-moving on *open()* and *release()* calls.
-`CDROM_GET_MCN`
-	Get the Media Catalog Number from a CD.
-
-*Ioctl*s routed through *audio_ioctl()*
----------------------------------------
-
-The following set of *ioctl()'s* are all implemented through a call to
-the *cdrom_fops* function *audio_ioctl()*. Memory checks and
-allocation are performed in *cdrom_ioctl()*, and also sanitization of
-address format (*CDROM_LBA*/*CDROM_MSF*) is done.
-
-`CDROMSUBCHNL`
-	Get sub-channel data in argument *arg* of type
-	`struct cdrom_subchnl *`.
-`CDROMREADTOCHDR`
-	Read Table of Contents header, in *arg* of type
-	`struct cdrom_tochdr *`.
-`CDROMREADTOCENTRY`
-	Read a Table of Contents entry in *arg* and specified by *arg*
-	of type `struct cdrom_tocentry *`.
-`CDROMPLAYMSF`
-	Play audio fragment specified in Minute, Second, Frame format,
-	delimited by *arg* of type `struct cdrom_msf *`.
-`CDROMPLAYTRKIND`
-	Play audio fragment in track-index format delimited by *arg*
-	of type `struct cdrom_ti *`.
-`CDROMVOLCTRL`
-	Set volume specified by *arg* of type `struct cdrom_volctrl *`.
-`CDROMVOLREAD`
-	Read volume into by *arg* of type `struct cdrom_volctrl *`.
-`CDROMSTART`
-	Spin up disc.
-`CDROMSTOP`
-	Stop playback of audio fragment.
-`CDROMPAUSE`
-	Pause playback of audio fragment.
-`CDROMRESUME`
-	Resume playing.
-
-New *ioctl()'s* in `cdrom.c`
-----------------------------
-
-The following *ioctl()'s* have been introduced to allow user programs to
-control the behavior of individual CD-ROM devices. New *ioctl*
-commands can be identified by the underscores in their names.
-
-`CDROM_SET_OPTIONS`
-	Set options specified by *arg*. Returns the option flag register
-	after modification. Use *arg = \rm0* for reading the current flags.
-`CDROM_CLEAR_OPTIONS`
-	Clear options specified by *arg*. Returns the option flag register
-	after modification.
-`CDROM_SELECT_SPEED`
-	Select head-rate speed of disc specified as by *arg* in units
-	of standard cdrom speed (176\,kB/sec raw data or
-	150kB/sec file system data). The value 0 means `auto-select`,
-	i. e., play audio discs at real time and data discs at maximum speed.
-	The value *arg* is checked against the maximum head rate of the
-	drive found in the *cdrom_dops*.
-`CDROM_SELECT_DISC`
-	Select disc numbered *arg* from a juke-box.
-
-	First disc is numbered 0. The number *arg* is checked against the
-	maximum number of discs in the juke-box found in the *cdrom_dops*.
-`CDROM_MEDIA_CHANGED`
-	Returns 1 if a disc has been changed since the last call.
-	Note that calls to *cdrom_media_changed* by the VFS are treated
-	by an independent queue, so both mechanisms will detect a
-	media change once. For juke-boxes, an extra argument *arg*
-	specifies the slot for which the information is given. The special
-	value *CDSL_CURRENT* requests that information about the currently
-	selected slot be returned.
-`CDROM_DRIVE_STATUS`
-	Returns the status of the drive by a call to
-	*drive_status()*. Return values are defined in cdrom_drive_status_.
-	Note that this call doesn't return information on the
-	current playing activity of the drive; this can be polled through
-	an *ioctl* call to *CDROMSUBCHNL*. For juke-boxes, an extra argument
-	*arg* specifies the slot for which (possibly limited) information is
-	given. The special value *CDSL_CURRENT* requests that information
-	about the currently selected slot be returned.
-`CDROM_DISC_STATUS`
-	Returns the type of the disc currently in the drive.
-	It should be viewed as a complement to *CDROM_DRIVE_STATUS*.
-	This *ioctl* can provide *some* information about the current
-	disc that is inserted in the drive. This functionality used to be
-	implemented in the low level drivers, but is now carried out
-	entirely in Uniform CD-ROM Driver.
-
-	The history of development of the CD's use as a carrier medium for
-	various digital information has lead to many different disc types.
-	This *ioctl* is useful only in the case that CDs have \emph {only
-	one} type of data on them. While this is often the case, it is
-	also very common for CDs to have some tracks with data, and some
-	tracks with audio. Because this is an existing interface, rather
-	than fixing this interface by changing the assumptions it was made
-	under, thereby breaking all user applications that use this
-	function, the Uniform CD-ROM Driver implements this *ioctl* as
-	follows: If the CD in question has audio tracks on it, and it has
-	absolutely no CD-I, XA, or data tracks on it, it will be reported
-	as *CDS_AUDIO*. If it has both audio and data tracks, it will
-	return *CDS_MIXED*. If there are no audio tracks on the disc, and
-	if the CD in question has any CD-I tracks on it, it will be
-	reported as *CDS_XA_2_2*. Failing that, if the CD in question
-	has any XA tracks on it, it will be reported as *CDS_XA_2_1*.
-	Finally, if the CD in question has any data tracks on it,
-	it will be reported as a data CD (*CDS_DATA_1*).
-
-	This *ioctl* can return::
-
-		CDS_NO_INFO	/* no information available */
-		CDS_NO_DISC	/* no disc is inserted, or tray is opened */
-		CDS_AUDIO	/* Audio disc (2352 audio bytes/frame) */
-		CDS_DATA_1	/* data disc, mode 1 (2048 user bytes/frame) */
-		CDS_XA_2_1	/* mixed data (XA), mode 2, form 1 (2048 user bytes) */
-		CDS_XA_2_2	/* mixed data (XA), mode 2, form 1 (2324 user bytes) */
-		CDS_MIXED	/* mixed audio/data disc */
-
-	For some information concerning frame layout of the various disc
-	types, see a recent version of `cdrom.h`.
-
-`CDROM_CHANGER_NSLOTS`
-	Returns the number of slots in a juke-box.
-`CDROMRESET`
-	Reset the drive.
-`CDROM_GET_CAPABILITY`
-	Returns the *capability* flags for the drive. Refer to section
-	cdrom_capabilities_ for more information on these flags.
-`CDROM_LOCKDOOR`
-	 Locks the door of the drive. `arg == 0` unlocks the door,
-	 any other value locks it.
-`CDROM_DEBUG`
-	 Turns on debugging info. Only root is allowed to do this.
-	 Same semantics as CDROM_LOCKDOOR.
-
-
-Device dependent *ioctl()'s*
-----------------------------
-
-Finally, all other *ioctl()'s* are passed to the function *dev_ioctl()*,
-if implemented. No memory allocation or verification is carried out.
-
-How to update your driver
-=========================
-
-- Make a backup of your current driver.
-- Get hold of the files `cdrom.c` and `cdrom.h`, they should be in
-  the directory tree that came with this documentation.
-- Make sure you include `cdrom.h`.
-- Change the 3rd argument of *register_blkdev* from `&<your-drive>_fops`
-  to `&cdrom_fops`.
-- Just after that line, add the following to register with the Uniform
-  CD-ROM Driver::
-
-	register_cdrom(&<your-drive>_info);*
-
-  Similarly, add a call to *unregister_cdrom()* at the appropriate place.
-- Copy an example of the device-operations *struct* to your
-  source, e. g., from `cm206.c` *cm206_dops*, and change all
-  entries to names corresponding to your driver, or names you just
-  happen to like. If your driver doesn't support a certain function,
-  make the entry *NULL*. At the entry *capability* you should list all
-  capabilities your driver currently supports. If your driver
-  has a capability that is not listed, please send me a message.
-- Copy the *cdrom_device_info* declaration from the same example
-  driver, and modify the entries according to your needs. If your
-  driver dynamically determines the capabilities of the hardware, this
-  structure should also be declared dynamically.
-- Implement all functions in your `<device>_dops` structure,
-  according to prototypes listed in  `cdrom.h`, and specifications given
-  in cdrom_api_. Most likely you have already implemented
-  the code in a large part, and you will almost certainly need to adapt the
-  prototype and return values.
-- Rename your `<device>_ioctl()` function to *audio_ioctl* and
-  change the prototype a little. Remove entries listed in the first
-  part in cdrom_ioctl_, if your code was OK, these are
-  just calls to the routines you adapted in the previous step.
-- You may remove all remaining memory checking code in the
-  *audio_ioctl()* function that deals with audio commands (these are
-  listed in the second part of cdrom_ioctl_. There is no
-  need for memory allocation either, so most *case*s in the *switch*
-  statement look similar to::
-
-	case CDROMREADTOCENTRY:
-		get_toc_entry\bigl((struct cdrom_tocentry *) arg);
-
-- All remaining *ioctl* cases must be moved to a separate
-  function, *<device>_ioctl*, the device-dependent *ioctl()'s*. Note that
-  memory checking and allocation must be kept in this code!
-- Change the prototypes of *<device>_open()* and
-  *<device>_release()*, and remove any strategic code (i. e., tray
-  movement, door locking, etc.).
-- Try to recompile the drivers. We advise you to use modules, both
-  for `cdrom.o` and your driver, as debugging is much easier this
-  way.
-
-Thanks
-======
-
-Thanks to all the people involved. First, Erik Andersen, who has
-taken over the torch in maintaining `cdrom.c` and integrating much
-CD-ROM-related code in the 2.1-kernel. Thanks to Scott Snyder and
-Gerd Knorr, who were the first to implement this interface for SCSI
-and IDE-CD drivers and added many ideas for extension of the data
-structures relative to kernel~2.0. Further thanks to Heiko Eißfeldt,
-Thomas Quinot, Jon Tombs, Ken Pizzini, Eberhard Mönkeberg and Andrew Kroll,
-the Linux CD-ROM device driver developers who were kind
-enough to give suggestions and criticisms during the writing. Finally
-of course, I want to thank Linus Torvalds for making this possible in
-the first place.
diff --git a/Documentation/cdrom/ide-cd b/Documentation/cdrom/ide-cd
deleted file mode 100644
index a5f2a7f1ff46..000000000000
--- a/Documentation/cdrom/ide-cd
+++ /dev/null
@@ -1,534 +0,0 @@
-IDE-CD driver documentation
-Originally by scott snyder  <snyder@fnald0.fnal.gov> (19 May 1996)
-Carrying on the torch is: Erik Andersen <andersee@debian.org>
-New maintainers (19 Oct 1998): Jens Axboe <axboe@image.dk>
-
-1. Introduction
----------------
-
-The ide-cd driver should work with all ATAPI ver 1.2 to ATAPI 2.6 compliant 
-CDROM drives which attach to an IDE interface.  Note that some CDROM vendors
-(including Mitsumi, Sony, Creative, Aztech, and Goldstar) have made
-both ATAPI-compliant drives and drives which use a proprietary
-interface.  If your drive uses one of those proprietary interfaces,
-this driver will not work with it (but one of the other CDROM drivers
-probably will).  This driver will not work with `ATAPI' drives which
-attach to the parallel port.  In addition, there is at least one drive
-(CyCDROM CR520ie) which attaches to the IDE port but is not ATAPI;
-this driver will not work with drives like that either (but see the
-aztcd driver).
-
-This driver provides the following features:
-
- - Reading from data tracks, and mounting ISO 9660 filesystems.
-
- - Playing audio tracks.  Most of the CDROM player programs floating
-   around should work; I usually use Workman.
-
- - Multisession support.
-
- - On drives which support it, reading digital audio data directly
-   from audio tracks.  The program cdda2wav can be used for this.
-   Note, however, that only some drives actually support this.
-
- - There is now support for CDROM changers which comply with the 
-   ATAPI 2.6 draft standard (such as the NEC CDR-251).  This additional
-   functionality includes a function call to query which slot is the
-   currently selected slot, a function call to query which slots contain
-   CDs, etc. A sample program which demonstrates this functionality is
-   appended to the end of this file.  The Sanyo 3-disc changer
-   (which does not conform to the standard) is also now supported.
-   Please note the driver refers to the first CD as slot # 0.
-
-
-2. Installation
----------------
-
-0. The ide-cd relies on the ide disk driver.  See
-   Documentation/ide/ide.txt for up-to-date information on the ide
-   driver.
-
-1. Make sure that the ide and ide-cd drivers are compiled into the
-   kernel you're using.  When configuring the kernel, in the section 
-   entitled "Floppy, IDE, and other block devices", say either `Y' 
-   (which will compile the support directly into the kernel) or `M'
-   (to compile support as a module which can be loaded and unloaded)
-   to the options: 
-
-      ATA/ATAPI/MFM/RLL support
-      Include IDE/ATAPI CDROM support
-
-   Depending on what type of IDE interface you have, you may need to
-   specify additional configuration options.  See
-   Documentation/ide/ide.txt.
-
-2. You should also ensure that the iso9660 filesystem is either
-   compiled into the kernel or available as a loadable module.  You
-   can see if a filesystem is known to the kernel by catting
-   /proc/filesystems.
-
-3. The CDROM drive should be connected to the host on an IDE
-   interface.  Each interface on a system is defined by an I/O port
-   address and an IRQ number, the standard assignments being
-   0x1f0 and 14 for the primary interface and 0x170 and 15 for the
-   secondary interface.  Each interface can control up to two devices,
-   where each device can be a hard drive, a CDROM drive, a floppy drive, 
-   or a tape drive.  The two devices on an interface are called `master'
-   and `slave'; this is usually selectable via a jumper on the drive.
-
-   Linux names these devices as follows.  The master and slave devices
-   on the primary IDE interface are called `hda' and `hdb',
-   respectively.  The drives on the secondary interface are called
-   `hdc' and `hdd'.  (Interfaces at other locations get other letters
-   in the third position; see Documentation/ide/ide.txt.)
-
-   If you want your CDROM drive to be found automatically by the
-   driver, you should make sure your IDE interface uses either the
-   primary or secondary addresses mentioned above.  In addition, if
-   the CDROM drive is the only device on the IDE interface, it should
-   be jumpered as `master'.  (If for some reason you cannot configure
-   your system in this manner, you can probably still use the driver.
-   You may have to pass extra configuration information to the kernel
-   when you boot, however.  See Documentation/ide/ide.txt for more
-   information.)
-
-4. Boot the system.  If the drive is recognized, you should see a
-   message which looks like
-
-     hdb: NEC CD-ROM DRIVE:260, ATAPI CDROM drive
-
-   If you do not see this, see section 5 below.
-
-5. You may want to create a symbolic link /dev/cdrom pointing to the
-   actual device.  You can do this with the command
-
-     ln -s  /dev/hdX  /dev/cdrom
-
-   where X should be replaced by the letter indicating where your
-   drive is installed.
-
-6. You should be able to see any error messages from the driver with
-   the `dmesg' command.
-
-
-3. Basic usage
---------------
-
-An ISO 9660 CDROM can be mounted by putting the disc in the drive and 
-typing (as root)
-
-  mount -t iso9660 /dev/cdrom /mnt/cdrom
-
-where it is assumed that /dev/cdrom is a link pointing to the actual
-device (as described in step 5 of the last section) and /mnt/cdrom is
-an empty directory.  You should now be able to see the contents of the
-CDROM under the /mnt/cdrom directory.  If you want to eject the CDROM,
-you must first dismount it with a command like
-
-  umount /mnt/cdrom
-
-Note that audio CDs cannot be mounted.
-
-Some distributions set up /etc/fstab to always try to mount a CDROM
-filesystem on bootup.  It is not required to mount the CDROM in this
-manner, though, and it may be a nuisance if you change CDROMs often.
-You should feel free to remove the cdrom line from /etc/fstab and
-mount CDROMs manually if that suits you better.
-
-Multisession and photocd discs should work with no special handling.
-The hpcdtoppm package (ftp.gwdg.de:/pub/linux/hpcdtoppm/) may be
-useful for reading photocds.
-
-To play an audio CD, you should first unmount and remove any data
-CDROM.  Any of the CDROM player programs should then work (workman,
-workbone, cdplayer, etc.).
-
-On a few drives, you can read digital audio directly using a program
-such as cdda2wav.  The only types of drive which I've heard support
-this are Sony and Toshiba drives.  You will get errors if you try to
-use this function on a drive which does not support it.
-
-For supported changers, you can use the `cdchange' program (appended to
-the end of this file) to switch between changer slots.  Note that the
-drive should be unmounted before attempting this.  The program takes
-two arguments:  the CDROM device, and the slot number to which you wish
-to change.  If the slot number is -1, the drive is unloaded.
-
-
-4. Common problems
-------------------
-
-This section discusses some common problems encountered when trying to
-use the driver, and some possible solutions.  Note that if you are
-experiencing problems, you should probably also review
-Documentation/ide/ide.txt for current information about the underlying
-IDE support code.  Some of these items apply only to earlier versions
-of the driver, but are mentioned here for completeness.
-
-In most cases, you should probably check with `dmesg' for any errors
-from the driver.
-
-a. Drive is not detected during booting.
-
-   - Review the configuration instructions above and in
-     Documentation/ide/ide.txt, and check how your hardware is
-     configured.
-
-   - If your drive is the only device on an IDE interface, it should
-     be jumpered as master, if at all possible.
-
-   - If your IDE interface is not at the standard addresses of 0x170
-     or 0x1f0, you'll need to explicitly inform the driver using a
-     lilo option.  See Documentation/ide/ide.txt.  (This feature was
-     added around kernel version 1.3.30.)
-
-   - If the autoprobing is not finding your drive, you can tell the
-     driver to assume that one exists by using a lilo option of the
-     form `hdX=cdrom', where X is the drive letter corresponding to
-     where your drive is installed.  Note that if you do this and you 
-     see a boot message like
-
-       hdX: ATAPI cdrom (?)
-
-     this does _not_ mean that the driver has successfully detected
-     the drive; rather, it means that the driver has not detected a
-     drive, but is assuming there's one there anyway because you told
-     it so.  If you actually try to do I/O to a drive defined at a
-     nonexistent or nonresponding I/O address, you'll probably get
-     errors with a status value of 0xff.
-
-   - Some IDE adapters require a nonstandard initialization sequence
-     before they'll function properly.  (If this is the case, there
-     will often be a separate MS-DOS driver just for the controller.)
-     IDE interfaces on sound cards often fall into this category.
-
-     Support for some interfaces needing extra initialization is
-     provided in later 1.3.x kernels.  You may need to turn on
-     additional kernel configuration options to get them to work;
-     see Documentation/ide/ide.txt.
-
-     Even if support is not available for your interface, you may be
-     able to get it to work with the following procedure.  First boot
-     MS-DOS and load the appropriate drivers.  Then warm-boot linux
-     (i.e., without powering off).  If this works, it can be automated
-     by running loadlin from the MS-DOS autoexec.
-
-
-b. Timeout/IRQ errors.
-
-  - If you always get timeout errors, interrupts from the drive are
-    probably not making it to the host.
-
-  - IRQ problems may also be indicated by the message
-    `IRQ probe failed (<n>)' while booting.  If <n> is zero, that
-    means that the system did not see an interrupt from the drive when
-    it was expecting one (on any feasible IRQ).  If <n> is negative,
-    that means the system saw interrupts on multiple IRQ lines, when
-    it was expecting to receive just one from the CDROM drive.
-
-  - Double-check your hardware configuration to make sure that the IRQ
-    number of your IDE interface matches what the driver expects.
-    (The usual assignments are 14 for the primary (0x1f0) interface
-    and 15 for the secondary (0x170) interface.)  Also be sure that
-    you don't have some other hardware which might be conflicting with
-    the IRQ you're using.  Also check the BIOS setup for your system;
-    some have the ability to disable individual IRQ levels, and I've
-    had one report of a system which was shipped with IRQ 15 disabled
-    by default.
-
-  - Note that many MS-DOS CDROM drivers will still function even if
-    there are hardware problems with the interrupt setup; they
-    apparently don't use interrupts.
-
-  - If you own a Pioneer DR-A24X, you _will_ get nasty error messages 
-    on boot such as "irq timeout: status=0x50 { DriveReady SeekComplete }"
-    The Pioneer DR-A24X CDROM drives are fairly popular these days.
-    Unfortunately, these drives seem to become very confused when we perform
-    the standard Linux ATA disk drive probe. If you own one of these drives,
-    you can bypass the ATA probing which confuses these CDROM drives, by 
-    adding `append="hdX=noprobe hdX=cdrom"' to your lilo.conf file and running 
-    lilo (again where X is the drive letter corresponding to where your drive 
-    is installed.)
-    
-c. System hangups.
-
-  - If the system locks up when you try to access the CDROM, the most
-    likely cause is that you have a buggy IDE adapter which doesn't
-    properly handle simultaneous transactions on multiple interfaces.
-    The most notorious of these is the CMD640B chip.  This problem can
-    be worked around by specifying the `serialize' option when
-    booting.  Recent kernels should be able to detect the need for
-    this automatically in most cases, but the detection is not
-    foolproof.  See Documentation/ide/ide.txt for more information
-    about the `serialize' option and the CMD640B.
-
-  - Note that many MS-DOS CDROM drivers will work with such buggy
-    hardware, apparently because they never attempt to overlap CDROM
-    operations with other disk activity.
-
-
-d. Can't mount a CDROM.
-
-  - If you get errors from mount, it may help to check `dmesg' to see
-    if there are any more specific errors from the driver or from the
-    filesystem.
-
-  - Make sure there's a CDROM loaded in the drive, and that's it's an
-    ISO 9660 disc.  You can't mount an audio CD.
-
-  - With the CDROM in the drive and unmounted, try something like
-
-      cat /dev/cdrom | od | more
-
-    If you see a dump, then the drive and driver are probably working
-    OK, and the problem is at the filesystem level (i.e., the CDROM is
-    not ISO 9660 or has errors in the filesystem structure).
-
-  - If you see `not a block device' errors, check that the definitions
-    of the device special files are correct.  They should be as
-    follows:
-
-      brw-rw----   1 root     disk       3,   0 Nov 11 18:48 /dev/hda
-      brw-rw----   1 root     disk       3,  64 Nov 11 18:48 /dev/hdb
-      brw-rw----   1 root     disk      22,   0 Nov 11 18:48 /dev/hdc
-      brw-rw----   1 root     disk      22,  64 Nov 11 18:48 /dev/hdd
-
-    Some early Slackware releases had these defined incorrectly.  If
-    these are wrong, you can remake them by running the script
-    scripts/MAKEDEV.ide.  (You may have to make it executable
-    with chmod first.)
-
-    If you have a /dev/cdrom symbolic link, check that it is pointing
-    to the correct device file.
-
-    If you hear people talking of the devices `hd1a' and `hd1b', these
-    were old names for what are now called hdc and hdd.  Those names
-    should be considered obsolete.
-
-  - If mount is complaining that the iso9660 filesystem is not
-    available, but you know it is (check /proc/filesystems), you
-    probably need a newer version of mount.  Early versions would not
-    always give meaningful error messages.
-
-
-e. Directory listings are unpredictably truncated, and `dmesg' shows
-   `buffer botch' error messages from the driver.
-
-  - There was a bug in the version of the driver in 1.2.x kernels
-    which could cause this.  It was fixed in 1.3.0.  If you can't
-    upgrade, you can probably work around the problem by specifying a
-    blocksize of 2048 when mounting.  (Note that you won't be able to
-    directly execute binaries off the CDROM in that case.)
-
-    If you see this in kernels later than 1.3.0, please report it as a
-    bug.
-
-
-f. Data corruption.
-
-  - Random data corruption was occasionally observed with the Hitachi
-    CDR-7730 CDROM. If you experience data corruption, using "hdx=slow"
-    as a command line parameter may work around the problem, at the
-    expense of low system performance.
-
-
-5. cdchange.c
--------------
-
-/*
- * cdchange.c  [-v]  <device>  [<slot>]
- *
- * This loads a CDROM from a specified slot in a changer, and displays 
- * information about the changer status.  The drive should be unmounted before 
- * using this program.
- *
- * Changer information is displayed if either the -v flag is specified
- * or no slot was specified.
- *
- * Based on code originally from Gerhard Zuber <zuber@berlin.snafu.de>.
- * Changer status information, and rewrite for the new Uniform CDROM driver
- * interface by Erik Andersen <andersee@debian.org>.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <linux/cdrom.h>
-
-
-int
-main (int argc, char **argv)
-{
-	char *program;
-	char *device;
-	int fd;           /* file descriptor for CD-ROM device */
-	int status;       /* return status for system calls */
-	int verbose = 0;
-	int slot=-1, x_slot;
-	int total_slots_available;
-
-	program = argv[0];
-
-	++argv;
-	--argc;
-
-	if (argc < 1 || argc > 3) {
-		fprintf (stderr, "usage: %s [-v] <device> [<slot>]\n",
-			 program);
-		fprintf (stderr, "       Slots are numbered 1 -- n.\n");
-		exit (1);
-	}
- 
-       if (strcmp (argv[0], "-v") == 0) {
-                verbose = 1;
-                ++argv;
-                --argc;
-        }
- 
-	device = argv[0];
- 
-	if (argc == 2)
-		slot = atoi (argv[1]) - 1;
-
-	/* open device */ 
-	fd = open(device, O_RDONLY | O_NONBLOCK);
-	if (fd < 0) {
-		fprintf (stderr, "%s: open failed for `%s': %s\n",
-			 program, device, strerror (errno));
-		exit (1);
-	}
-
-	/* Check CD player status */ 
-	total_slots_available = ioctl (fd, CDROM_CHANGER_NSLOTS);
-	if (total_slots_available <= 1 ) {
-		fprintf (stderr, "%s: Device `%s' is not an ATAPI "
-			"compliant CD changer.\n", program, device);
-		exit (1);
-	}
-
-	if (slot >= 0) {
-		if (slot >= total_slots_available) {
-			fprintf (stderr, "Bad slot number.  "
-				 "Should be 1 -- %d.\n",
-				 total_slots_available);
-			exit (1);
-		}
-
-		/* load */ 
-		slot=ioctl (fd, CDROM_SELECT_DISC, slot);
-		if (slot<0) {
-			fflush(stdout);
-				perror ("CDROM_SELECT_DISC ");
-			exit(1);
-		}
-	}
-
-	if (slot < 0 || verbose) {
-
-		status=ioctl (fd, CDROM_SELECT_DISC, CDSL_CURRENT);
-		if (status<0) {
-			fflush(stdout);
-			perror (" CDROM_SELECT_DISC");
-			exit(1);
-		}
-		slot=status;
-
-		printf ("Current slot: %d\n", slot+1);
-		printf ("Total slots available: %d\n",
-			total_slots_available);
-
-		printf ("Drive status: ");
-                status = ioctl (fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
-                if (status<0) {
-                  perror(" CDROM_DRIVE_STATUS");
-                } else switch(status) {
-		case CDS_DISC_OK:
-			printf ("Ready.\n");
-			break;
-		case CDS_TRAY_OPEN:
-			printf ("Tray Open.\n");
-			break;
-		case CDS_DRIVE_NOT_READY:
-			printf ("Drive Not Ready.\n");
-			break;
-		default:
-			printf ("This Should not happen!\n");
-			break;
-		}
-
-		for (x_slot=0; x_slot<total_slots_available; x_slot++) {
-			printf ("Slot %2d: ", x_slot+1);
-             		status = ioctl (fd, CDROM_DRIVE_STATUS, x_slot);
-             		if (status<0) {
-             		     perror(" CDROM_DRIVE_STATUS");
-             		} else switch(status) {
-			case CDS_DISC_OK:
-				printf ("Disc present.");
-				break;
-			case CDS_NO_DISC: 
-				printf ("Empty slot.");
-				break;
-			case CDS_TRAY_OPEN:
-				printf ("CD-ROM tray open.\n");
-				break;
-			case CDS_DRIVE_NOT_READY:
-				printf ("CD-ROM drive not ready.\n");
-				break;
-			case CDS_NO_INFO:
-				printf ("No Information available.");
-				break;
-			default:
-				printf ("This Should not happen!\n");
-				break;
-			}
-		  if (slot == x_slot) {
-                  status = ioctl (fd, CDROM_DISC_STATUS);
-                  if (status<0) {
-			perror(" CDROM_DISC_STATUS");
-                  }
-		  switch (status) {
-			case CDS_AUDIO:
-				printf ("\tAudio disc.\t");
-				break;
-			case CDS_DATA_1:
-			case CDS_DATA_2:
-				printf ("\tData disc type %d.\t", status-CDS_DATA_1+1);
-				break;
-			case CDS_XA_2_1:
-			case CDS_XA_2_2:
-				printf ("\tXA data disc type %d.\t", status-CDS_XA_2_1+1);
-				break;
-			default:
-				printf ("\tUnknown disc type 0x%x!\t", status);
-				break;
-			}
-			}
-                  	status = ioctl (fd, CDROM_MEDIA_CHANGED, x_slot);
-                  	if (status<0) {
-				perror(" CDROM_MEDIA_CHANGED");
-                  	}
-		  	switch (status) {
-			case 1:
-				printf ("Changed.\n");
-				break;
-			default:
-				printf ("\n");
-				break;
-			}
-		}
-	}
-
-	/* close device */
-	status = close (fd);
-	if (status != 0) {
-		fprintf (stderr, "%s: close failed for `%s': %s\n",
-			 program, device, strerror (errno));
-		exit (1);
-	}
- 
-	exit (0);
-}
diff --git a/Documentation/cdrom/ide-cd.rst b/Documentation/cdrom/ide-cd.rst
new file mode 100644
index 000000000000..dadc94ef6b6c
--- /dev/null
+++ b/Documentation/cdrom/ide-cd.rst
@@ -0,0 +1,538 @@
+IDE-CD driver documentation
+===========================
+
+:Originally by: scott snyder  <snyder@fnald0.fnal.gov> (19 May 1996)
+:Carrying on the torch is: Erik Andersen <andersee@debian.org>
+:New maintainers (19 Oct 1998): Jens Axboe <axboe@image.dk>
+
+1. Introduction
+---------------
+
+The ide-cd driver should work with all ATAPI ver 1.2 to ATAPI 2.6 compliant
+CDROM drives which attach to an IDE interface.  Note that some CDROM vendors
+(including Mitsumi, Sony, Creative, Aztech, and Goldstar) have made
+both ATAPI-compliant drives and drives which use a proprietary
+interface.  If your drive uses one of those proprietary interfaces,
+this driver will not work with it (but one of the other CDROM drivers
+probably will).  This driver will not work with `ATAPI` drives which
+attach to the parallel port.  In addition, there is at least one drive
+(CyCDROM CR520ie) which attaches to the IDE port but is not ATAPI;
+this driver will not work with drives like that either (but see the
+aztcd driver).
+
+This driver provides the following features:
+
+ - Reading from data tracks, and mounting ISO 9660 filesystems.
+
+ - Playing audio tracks.  Most of the CDROM player programs floating
+   around should work; I usually use Workman.
+
+ - Multisession support.
+
+ - On drives which support it, reading digital audio data directly
+   from audio tracks.  The program cdda2wav can be used for this.
+   Note, however, that only some drives actually support this.
+
+ - There is now support for CDROM changers which comply with the
+   ATAPI 2.6 draft standard (such as the NEC CDR-251).  This additional
+   functionality includes a function call to query which slot is the
+   currently selected slot, a function call to query which slots contain
+   CDs, etc. A sample program which demonstrates this functionality is
+   appended to the end of this file.  The Sanyo 3-disc changer
+   (which does not conform to the standard) is also now supported.
+   Please note the driver refers to the first CD as slot # 0.
+
+
+2. Installation
+---------------
+
+0. The ide-cd relies on the ide disk driver.  See
+   Documentation/ide/ide.txt for up-to-date information on the ide
+   driver.
+
+1. Make sure that the ide and ide-cd drivers are compiled into the
+   kernel you're using.  When configuring the kernel, in the section
+   entitled "Floppy, IDE, and other block devices", say either `Y`
+   (which will compile the support directly into the kernel) or `M`
+   (to compile support as a module which can be loaded and unloaded)
+   to the options::
+
+      ATA/ATAPI/MFM/RLL support
+      Include IDE/ATAPI CDROM support
+
+   Depending on what type of IDE interface you have, you may need to
+   specify additional configuration options.  See
+   Documentation/ide/ide.txt.
+
+2. You should also ensure that the iso9660 filesystem is either
+   compiled into the kernel or available as a loadable module.  You
+   can see if a filesystem is known to the kernel by catting
+   /proc/filesystems.
+
+3. The CDROM drive should be connected to the host on an IDE
+   interface.  Each interface on a system is defined by an I/O port
+   address and an IRQ number, the standard assignments being
+   0x1f0 and 14 for the primary interface and 0x170 and 15 for the
+   secondary interface.  Each interface can control up to two devices,
+   where each device can be a hard drive, a CDROM drive, a floppy drive,
+   or a tape drive.  The two devices on an interface are called `master`
+   and `slave`; this is usually selectable via a jumper on the drive.
+
+   Linux names these devices as follows.  The master and slave devices
+   on the primary IDE interface are called `hda` and `hdb`,
+   respectively.  The drives on the secondary interface are called
+   `hdc` and `hdd`.  (Interfaces at other locations get other letters
+   in the third position; see Documentation/ide/ide.txt.)
+
+   If you want your CDROM drive to be found automatically by the
+   driver, you should make sure your IDE interface uses either the
+   primary or secondary addresses mentioned above.  In addition, if
+   the CDROM drive is the only device on the IDE interface, it should
+   be jumpered as `master`.  (If for some reason you cannot configure
+   your system in this manner, you can probably still use the driver.
+   You may have to pass extra configuration information to the kernel
+   when you boot, however.  See Documentation/ide/ide.txt for more
+   information.)
+
+4. Boot the system.  If the drive is recognized, you should see a
+   message which looks like::
+
+     hdb: NEC CD-ROM DRIVE:260, ATAPI CDROM drive
+
+   If you do not see this, see section 5 below.
+
+5. You may want to create a symbolic link /dev/cdrom pointing to the
+   actual device.  You can do this with the command::
+
+     ln -s  /dev/hdX  /dev/cdrom
+
+   where X should be replaced by the letter indicating where your
+   drive is installed.
+
+6. You should be able to see any error messages from the driver with
+   the `dmesg` command.
+
+
+3. Basic usage
+--------------
+
+An ISO 9660 CDROM can be mounted by putting the disc in the drive and
+typing (as root)::
+
+  mount -t iso9660 /dev/cdrom /mnt/cdrom
+
+where it is assumed that /dev/cdrom is a link pointing to the actual
+device (as described in step 5 of the last section) and /mnt/cdrom is
+an empty directory.  You should now be able to see the contents of the
+CDROM under the /mnt/cdrom directory.  If you want to eject the CDROM,
+you must first dismount it with a command like::
+
+  umount /mnt/cdrom
+
+Note that audio CDs cannot be mounted.
+
+Some distributions set up /etc/fstab to always try to mount a CDROM
+filesystem on bootup.  It is not required to mount the CDROM in this
+manner, though, and it may be a nuisance if you change CDROMs often.
+You should feel free to remove the cdrom line from /etc/fstab and
+mount CDROMs manually if that suits you better.
+
+Multisession and photocd discs should work with no special handling.
+The hpcdtoppm package (ftp.gwdg.de:/pub/linux/hpcdtoppm/) may be
+useful for reading photocds.
+
+To play an audio CD, you should first unmount and remove any data
+CDROM.  Any of the CDROM player programs should then work (workman,
+workbone, cdplayer, etc.).
+
+On a few drives, you can read digital audio directly using a program
+such as cdda2wav.  The only types of drive which I've heard support
+this are Sony and Toshiba drives.  You will get errors if you try to
+use this function on a drive which does not support it.
+
+For supported changers, you can use the `cdchange` program (appended to
+the end of this file) to switch between changer slots.  Note that the
+drive should be unmounted before attempting this.  The program takes
+two arguments:  the CDROM device, and the slot number to which you wish
+to change.  If the slot number is -1, the drive is unloaded.
+
+
+4. Common problems
+------------------
+
+This section discusses some common problems encountered when trying to
+use the driver, and some possible solutions.  Note that if you are
+experiencing problems, you should probably also review
+Documentation/ide/ide.txt for current information about the underlying
+IDE support code.  Some of these items apply only to earlier versions
+of the driver, but are mentioned here for completeness.
+
+In most cases, you should probably check with `dmesg` for any errors
+from the driver.
+
+a. Drive is not detected during booting.
+
+   - Review the configuration instructions above and in
+     Documentation/ide/ide.txt, and check how your hardware is
+     configured.
+
+   - If your drive is the only device on an IDE interface, it should
+     be jumpered as master, if at all possible.
+
+   - If your IDE interface is not at the standard addresses of 0x170
+     or 0x1f0, you'll need to explicitly inform the driver using a
+     lilo option.  See Documentation/ide/ide.txt.  (This feature was
+     added around kernel version 1.3.30.)
+
+   - If the autoprobing is not finding your drive, you can tell the
+     driver to assume that one exists by using a lilo option of the
+     form `hdX=cdrom`, where X is the drive letter corresponding to
+     where your drive is installed.  Note that if you do this and you
+     see a boot message like::
+
+       hdX: ATAPI cdrom (?)
+
+     this does _not_ mean that the driver has successfully detected
+     the drive; rather, it means that the driver has not detected a
+     drive, but is assuming there's one there anyway because you told
+     it so.  If you actually try to do I/O to a drive defined at a
+     nonexistent or nonresponding I/O address, you'll probably get
+     errors with a status value of 0xff.
+
+   - Some IDE adapters require a nonstandard initialization sequence
+     before they'll function properly.  (If this is the case, there
+     will often be a separate MS-DOS driver just for the controller.)
+     IDE interfaces on sound cards often fall into this category.
+
+     Support for some interfaces needing extra initialization is
+     provided in later 1.3.x kernels.  You may need to turn on
+     additional kernel configuration options to get them to work;
+     see Documentation/ide/ide.txt.
+
+     Even if support is not available for your interface, you may be
+     able to get it to work with the following procedure.  First boot
+     MS-DOS and load the appropriate drivers.  Then warm-boot linux
+     (i.e., without powering off).  If this works, it can be automated
+     by running loadlin from the MS-DOS autoexec.
+
+
+b. Timeout/IRQ errors.
+
+  - If you always get timeout errors, interrupts from the drive are
+    probably not making it to the host.
+
+  - IRQ problems may also be indicated by the message
+    `IRQ probe failed (<n>)` while booting.  If <n> is zero, that
+    means that the system did not see an interrupt from the drive when
+    it was expecting one (on any feasible IRQ).  If <n> is negative,
+    that means the system saw interrupts on multiple IRQ lines, when
+    it was expecting to receive just one from the CDROM drive.
+
+  - Double-check your hardware configuration to make sure that the IRQ
+    number of your IDE interface matches what the driver expects.
+    (The usual assignments are 14 for the primary (0x1f0) interface
+    and 15 for the secondary (0x170) interface.)  Also be sure that
+    you don't have some other hardware which might be conflicting with
+    the IRQ you're using.  Also check the BIOS setup for your system;
+    some have the ability to disable individual IRQ levels, and I've
+    had one report of a system which was shipped with IRQ 15 disabled
+    by default.
+
+  - Note that many MS-DOS CDROM drivers will still function even if
+    there are hardware problems with the interrupt setup; they
+    apparently don't use interrupts.
+
+  - If you own a Pioneer DR-A24X, you _will_ get nasty error messages
+    on boot such as "irq timeout: status=0x50 { DriveReady SeekComplete }"
+    The Pioneer DR-A24X CDROM drives are fairly popular these days.
+    Unfortunately, these drives seem to become very confused when we perform
+    the standard Linux ATA disk drive probe. If you own one of these drives,
+    you can bypass the ATA probing which confuses these CDROM drives, by
+    adding `append="hdX=noprobe hdX=cdrom"` to your lilo.conf file and running
+    lilo (again where X is the drive letter corresponding to where your drive
+    is installed.)
+
+c. System hangups.
+
+  - If the system locks up when you try to access the CDROM, the most
+    likely cause is that you have a buggy IDE adapter which doesn't
+    properly handle simultaneous transactions on multiple interfaces.
+    The most notorious of these is the CMD640B chip.  This problem can
+    be worked around by specifying the `serialize` option when
+    booting.  Recent kernels should be able to detect the need for
+    this automatically in most cases, but the detection is not
+    foolproof.  See Documentation/ide/ide.txt for more information
+    about the `serialize` option and the CMD640B.
+
+  - Note that many MS-DOS CDROM drivers will work with such buggy
+    hardware, apparently because they never attempt to overlap CDROM
+    operations with other disk activity.
+
+
+d. Can't mount a CDROM.
+
+  - If you get errors from mount, it may help to check `dmesg` to see
+    if there are any more specific errors from the driver or from the
+    filesystem.
+
+  - Make sure there's a CDROM loaded in the drive, and that's it's an
+    ISO 9660 disc.  You can't mount an audio CD.
+
+  - With the CDROM in the drive and unmounted, try something like::
+
+      cat /dev/cdrom | od | more
+
+    If you see a dump, then the drive and driver are probably working
+    OK, and the problem is at the filesystem level (i.e., the CDROM is
+    not ISO 9660 or has errors in the filesystem structure).
+
+  - If you see `not a block device` errors, check that the definitions
+    of the device special files are correct.  They should be as
+    follows::
+
+      brw-rw----   1 root     disk       3,   0 Nov 11 18:48 /dev/hda
+      brw-rw----   1 root     disk       3,  64 Nov 11 18:48 /dev/hdb
+      brw-rw----   1 root     disk      22,   0 Nov 11 18:48 /dev/hdc
+      brw-rw----   1 root     disk      22,  64 Nov 11 18:48 /dev/hdd
+
+    Some early Slackware releases had these defined incorrectly.  If
+    these are wrong, you can remake them by running the script
+    scripts/MAKEDEV.ide.  (You may have to make it executable
+    with chmod first.)
+
+    If you have a /dev/cdrom symbolic link, check that it is pointing
+    to the correct device file.
+
+    If you hear people talking of the devices `hd1a` and `hd1b`, these
+    were old names for what are now called hdc and hdd.  Those names
+    should be considered obsolete.
+
+  - If mount is complaining that the iso9660 filesystem is not
+    available, but you know it is (check /proc/filesystems), you
+    probably need a newer version of mount.  Early versions would not
+    always give meaningful error messages.
+
+
+e. Directory listings are unpredictably truncated, and `dmesg` shows
+   `buffer botch` error messages from the driver.
+
+  - There was a bug in the version of the driver in 1.2.x kernels
+    which could cause this.  It was fixed in 1.3.0.  If you can't
+    upgrade, you can probably work around the problem by specifying a
+    blocksize of 2048 when mounting.  (Note that you won't be able to
+    directly execute binaries off the CDROM in that case.)
+
+    If you see this in kernels later than 1.3.0, please report it as a
+    bug.
+
+
+f. Data corruption.
+
+  - Random data corruption was occasionally observed with the Hitachi
+    CDR-7730 CDROM. If you experience data corruption, using "hdx=slow"
+    as a command line parameter may work around the problem, at the
+    expense of low system performance.
+
+
+5. cdchange.c
+-------------
+
+::
+
+  /*
+   * cdchange.c  [-v]  <device>  [<slot>]
+   *
+   * This loads a CDROM from a specified slot in a changer, and displays
+   * information about the changer status.  The drive should be unmounted before
+   * using this program.
+   *
+   * Changer information is displayed if either the -v flag is specified
+   * or no slot was specified.
+   *
+   * Based on code originally from Gerhard Zuber <zuber@berlin.snafu.de>.
+   * Changer status information, and rewrite for the new Uniform CDROM driver
+   * interface by Erik Andersen <andersee@debian.org>.
+   */
+
+  #include <stdio.h>
+  #include <stdlib.h>
+  #include <errno.h>
+  #include <string.h>
+  #include <unistd.h>
+  #include <fcntl.h>
+  #include <sys/ioctl.h>
+  #include <linux/cdrom.h>
+
+
+  int
+  main (int argc, char **argv)
+  {
+	char *program;
+	char *device;
+	int fd;           /* file descriptor for CD-ROM device */
+	int status;       /* return status for system calls */
+	int verbose = 0;
+	int slot=-1, x_slot;
+	int total_slots_available;
+
+	program = argv[0];
+
+	++argv;
+	--argc;
+
+	if (argc < 1 || argc > 3) {
+		fprintf (stderr, "usage: %s [-v] <device> [<slot>]\n",
+			 program);
+		fprintf (stderr, "       Slots are numbered 1 -- n.\n");
+		exit (1);
+	}
+
+       if (strcmp (argv[0], "-v") == 0) {
+                verbose = 1;
+                ++argv;
+                --argc;
+        }
+
+	device = argv[0];
+
+	if (argc == 2)
+		slot = atoi (argv[1]) - 1;
+
+	/* open device */
+	fd = open(device, O_RDONLY | O_NONBLOCK);
+	if (fd < 0) {
+		fprintf (stderr, "%s: open failed for `%s`: %s\n",
+			 program, device, strerror (errno));
+		exit (1);
+	}
+
+	/* Check CD player status */
+	total_slots_available = ioctl (fd, CDROM_CHANGER_NSLOTS);
+	if (total_slots_available <= 1 ) {
+		fprintf (stderr, "%s: Device `%s` is not an ATAPI "
+			"compliant CD changer.\n", program, device);
+		exit (1);
+	}
+
+	if (slot >= 0) {
+		if (slot >= total_slots_available) {
+			fprintf (stderr, "Bad slot number.  "
+				 "Should be 1 -- %d.\n",
+				 total_slots_available);
+			exit (1);
+		}
+
+		/* load */
+		slot=ioctl (fd, CDROM_SELECT_DISC, slot);
+		if (slot<0) {
+			fflush(stdout);
+				perror ("CDROM_SELECT_DISC ");
+			exit(1);
+		}
+	}
+
+	if (slot < 0 || verbose) {
+
+		status=ioctl (fd, CDROM_SELECT_DISC, CDSL_CURRENT);
+		if (status<0) {
+			fflush(stdout);
+			perror (" CDROM_SELECT_DISC");
+			exit(1);
+		}
+		slot=status;
+
+		printf ("Current slot: %d\n", slot+1);
+		printf ("Total slots available: %d\n",
+			total_slots_available);
+
+		printf ("Drive status: ");
+                status = ioctl (fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
+                if (status<0) {
+                  perror(" CDROM_DRIVE_STATUS");
+                } else switch(status) {
+		case CDS_DISC_OK:
+			printf ("Ready.\n");
+			break;
+		case CDS_TRAY_OPEN:
+			printf ("Tray Open.\n");
+			break;
+		case CDS_DRIVE_NOT_READY:
+			printf ("Drive Not Ready.\n");
+			break;
+		default:
+			printf ("This Should not happen!\n");
+			break;
+		}
+
+		for (x_slot=0; x_slot<total_slots_available; x_slot++) {
+			printf ("Slot %2d: ", x_slot+1);
+			status = ioctl (fd, CDROM_DRIVE_STATUS, x_slot);
+			if (status<0) {
+			     perror(" CDROM_DRIVE_STATUS");
+			} else switch(status) {
+			case CDS_DISC_OK:
+				printf ("Disc present.");
+				break;
+			case CDS_NO_DISC:
+				printf ("Empty slot.");
+				break;
+			case CDS_TRAY_OPEN:
+				printf ("CD-ROM tray open.\n");
+				break;
+			case CDS_DRIVE_NOT_READY:
+				printf ("CD-ROM drive not ready.\n");
+				break;
+			case CDS_NO_INFO:
+				printf ("No Information available.");
+				break;
+			default:
+				printf ("This Should not happen!\n");
+				break;
+			}
+		  if (slot == x_slot) {
+                  status = ioctl (fd, CDROM_DISC_STATUS);
+                  if (status<0) {
+			perror(" CDROM_DISC_STATUS");
+                  }
+		  switch (status) {
+			case CDS_AUDIO:
+				printf ("\tAudio disc.\t");
+				break;
+			case CDS_DATA_1:
+			case CDS_DATA_2:
+				printf ("\tData disc type %d.\t", status-CDS_DATA_1+1);
+				break;
+			case CDS_XA_2_1:
+			case CDS_XA_2_2:
+				printf ("\tXA data disc type %d.\t", status-CDS_XA_2_1+1);
+				break;
+			default:
+				printf ("\tUnknown disc type 0x%x!\t", status);
+				break;
+			}
+			}
+			status = ioctl (fd, CDROM_MEDIA_CHANGED, x_slot);
+			if (status<0) {
+				perror(" CDROM_MEDIA_CHANGED");
+			}
+			switch (status) {
+			case 1:
+				printf ("Changed.\n");
+				break;
+			default:
+				printf ("\n");
+				break;
+			}
+		}
+	}
+
+	/* close device */
+	status = close (fd);
+	if (status != 0) {
+		fprintf (stderr, "%s: close failed for `%s`: %s\n",
+			 program, device, strerror (errno));
+		exit (1);
+	}
+
+	exit (0);
+  }
diff --git a/Documentation/cdrom/index.rst b/Documentation/cdrom/index.rst
new file mode 100644
index 000000000000..efbd5d111825
--- /dev/null
+++ b/Documentation/cdrom/index.rst
@@ -0,0 +1,19 @@
+:orphan:
+
+=====
+cdrom
+=====
+
+.. toctree::
+    :maxdepth: 1
+
+    cdrom-standard
+    ide-cd
+    packet-writing
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/cdrom/packet-writing.rst b/Documentation/cdrom/packet-writing.rst
new file mode 100644
index 000000000000..c5c957195a5a
--- /dev/null
+++ b/Documentation/cdrom/packet-writing.rst
@@ -0,0 +1,139 @@
+==============
+Packet writing
+==============
+
+Getting started quick
+---------------------
+
+- Select packet support in the block device section and UDF support in
+  the file system section.
+
+- Compile and install kernel and modules, reboot.
+
+- You need the udftools package (pktsetup, mkudffs, cdrwtool).
+  Download from http://sourceforge.net/projects/linux-udf/
+
+- Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute
+  as appropriate)::
+
+	# cdrwtool -d /dev/hdc -q
+
+- Setup your writer::
+
+	# pktsetup dev_name /dev/hdc
+
+- Now you can mount /dev/pktcdvd/dev_name and copy files to it. Enjoy::
+
+	# mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
+
+
+Packet writing for DVD-RW media
+-------------------------------
+
+DVD-RW discs can be written to much like CD-RW discs if they are in
+the so called "restricted overwrite" mode. To put a disc in restricted
+overwrite mode, run::
+
+	# dvd+rw-format /dev/hdc
+
+You can then use the disc the same way you would use a CD-RW disc::
+
+	# pktsetup dev_name /dev/hdc
+	# mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
+
+
+Packet writing for DVD+RW media
+-------------------------------
+
+According to the DVD+RW specification, a drive supporting DVD+RW discs
+shall implement "true random writes with 2KB granularity", which means
+that it should be possible to put any filesystem with a block size >=
+2KB on such a disc. For example, it should be possible to do::
+
+	# dvd+rw-format /dev/hdc   (only needed if the disc has never
+	                            been formatted)
+	# mkudffs /dev/hdc
+	# mount /dev/hdc /cdrom -t udf -o rw,noatime
+
+However, some drives don't follow the specification and expect the
+host to perform aligned writes at 32KB boundaries. Other drives do
+follow the specification, but suffer bad performance problems if the
+writes are not 32KB aligned.
+
+Both problems can be solved by using the pktcdvd driver, which always
+generates aligned writes::
+
+	# dvd+rw-format /dev/hdc
+	# pktsetup dev_name /dev/hdc
+	# mkudffs /dev/pktcdvd/dev_name
+	# mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
+
+
+Packet writing for DVD-RAM media
+--------------------------------
+
+DVD-RAM discs are random writable, so using the pktcdvd driver is not
+necessary. However, using the pktcdvd driver can improve performance
+in the same way it does for DVD+RW media.
+
+
+Notes
+-----
+
+- CD-RW media can usually not be overwritten more than about 1000
+  times, so to avoid unnecessary wear on the media, you should always
+  use the noatime mount option.
+
+- Defect management (ie automatic remapping of bad sectors) has not
+  been implemented yet, so you are likely to get at least some
+  filesystem corruption if the disc wears out.
+
+- Since the pktcdvd driver makes the disc appear as a regular block
+  device with a 2KB block size, you can put any filesystem you like on
+  the disc. For example, run::
+
+	# /sbin/mke2fs /dev/pktcdvd/dev_name
+
+  to create an ext2 filesystem on the disc.
+
+
+Using the pktcdvd sysfs interface
+---------------------------------
+
+Since Linux 2.6.20, the pktcdvd module has a sysfs interface
+and can be controlled by it. For example the "pktcdvd" tool uses
+this interface. (see http://tom.ist-im-web.de/download/pktcdvd )
+
+"pktcdvd" works similar to "pktsetup", e.g.::
+
+	# pktcdvd -a dev_name /dev/hdc
+	# mkudffs /dev/pktcdvd/dev_name
+	# mount -t udf -o rw,noatime /dev/pktcdvd/dev_name /dvdram
+	# cp files /dvdram
+	# umount /dvdram
+	# pktcdvd -r dev_name
+
+
+For a description of the sysfs interface look into the file:
+
+  Documentation/ABI/testing/sysfs-class-pktcdvd
+
+
+Using the pktcdvd debugfs interface
+-----------------------------------
+
+To read pktcdvd device infos in human readable form, do::
+
+	# cat /sys/kernel/debug/pktcdvd/pktcdvd[0-7]/info
+
+For a description of the debugfs interface look into the file:
+
+  Documentation/ABI/testing/debugfs-pktcdvd
+
+
+
+Links
+-----
+
+See http://fy.chalmers.se/~appro/linux/DVD+RW/ for more information
+about DVD writing.
diff --git a/Documentation/cdrom/packet-writing.txt b/Documentation/cdrom/packet-writing.txt
deleted file mode 100644
index 2834170d821e..000000000000
--- a/Documentation/cdrom/packet-writing.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-Getting started quick
----------------------
-
-- Select packet support in the block device section and UDF support in
-  the file system section.
-
-- Compile and install kernel and modules, reboot.
-
-- You need the udftools package (pktsetup, mkudffs, cdrwtool).
-  Download from http://sourceforge.net/projects/linux-udf/
-
-- Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute
-  as appropriate):
-	# cdrwtool -d /dev/hdc -q
-
-- Setup your writer
-	# pktsetup dev_name /dev/hdc
-
-- Now you can mount /dev/pktcdvd/dev_name and copy files to it. Enjoy!
-	# mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
-
-
-Packet writing for DVD-RW media
--------------------------------
-
-DVD-RW discs can be written to much like CD-RW discs if they are in
-the so called "restricted overwrite" mode. To put a disc in restricted
-overwrite mode, run:
-
-	# dvd+rw-format /dev/hdc
-
-You can then use the disc the same way you would use a CD-RW disc:
-
-	# pktsetup dev_name /dev/hdc
-	# mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
-
-
-Packet writing for DVD+RW media
--------------------------------
-
-According to the DVD+RW specification, a drive supporting DVD+RW discs
-shall implement "true random writes with 2KB granularity", which means
-that it should be possible to put any filesystem with a block size >=
-2KB on such a disc. For example, it should be possible to do:
-
-	# dvd+rw-format /dev/hdc   (only needed if the disc has never
-	                            been formatted)
-	# mkudffs /dev/hdc
-	# mount /dev/hdc /cdrom -t udf -o rw,noatime
-
-However, some drives don't follow the specification and expect the
-host to perform aligned writes at 32KB boundaries. Other drives do
-follow the specification, but suffer bad performance problems if the
-writes are not 32KB aligned.
-
-Both problems can be solved by using the pktcdvd driver, which always
-generates aligned writes.
-
-	# dvd+rw-format /dev/hdc
-	# pktsetup dev_name /dev/hdc
-	# mkudffs /dev/pktcdvd/dev_name
-	# mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
-
-
-Packet writing for DVD-RAM media
---------------------------------
-
-DVD-RAM discs are random writable, so using the pktcdvd driver is not
-necessary. However, using the pktcdvd driver can improve performance
-in the same way it does for DVD+RW media.
-
-
-Notes
------
-
-- CD-RW media can usually not be overwritten more than about 1000
-  times, so to avoid unnecessary wear on the media, you should always
-  use the noatime mount option.
-
-- Defect management (ie automatic remapping of bad sectors) has not
-  been implemented yet, so you are likely to get at least some
-  filesystem corruption if the disc wears out.
-
-- Since the pktcdvd driver makes the disc appear as a regular block
-  device with a 2KB block size, you can put any filesystem you like on
-  the disc. For example, run:
-
-	# /sbin/mke2fs /dev/pktcdvd/dev_name
-
-  to create an ext2 filesystem on the disc.
-
-
-Using the pktcdvd sysfs interface
----------------------------------
-
-Since Linux 2.6.20, the pktcdvd module has a sysfs interface
-and can be controlled by it. For example the "pktcdvd" tool uses
-this interface. (see http://tom.ist-im-web.de/download/pktcdvd )
-
-"pktcdvd" works similar to "pktsetup", e.g.:
-
-	# pktcdvd -a dev_name /dev/hdc
-	# mkudffs /dev/pktcdvd/dev_name
-	# mount -t udf -o rw,noatime /dev/pktcdvd/dev_name /dvdram
-	# cp files /dvdram
-	# umount /dvdram
-	# pktcdvd -r dev_name
-
-
-For a description of the sysfs interface look into the file:
-
-  Documentation/ABI/testing/sysfs-class-pktcdvd
-
-
-Using the pktcdvd debugfs interface
------------------------------------
-
-To read pktcdvd device infos in human readable form, do:
-
-	# cat /sys/kernel/debug/pktcdvd/pktcdvd[0-7]/info
-
-For a description of the debugfs interface look into the file:
-
-  Documentation/ABI/testing/debugfs-pktcdvd
-
-
-
-Links
------
-
-See http://fy.chalmers.se/~appro/linux/DVD+RW/ for more information
-about DVD writing.
-- 
cgit 


From f0ba43774cea3fc14732bb9243ce7238ae8a3202 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:43 -0300
Subject: docs: convert docs to ReST and rename to *.rst

The conversion is actually:
  - add blank lines and indentation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/device-mapper/cache-policies.rst    | 131 +++++++
 Documentation/device-mapper/cache-policies.txt    | 121 ------
 Documentation/device-mapper/cache.rst             | 337 +++++++++++++++++
 Documentation/device-mapper/cache.txt             | 311 ----------------
 Documentation/device-mapper/delay.rst             |  31 ++
 Documentation/device-mapper/delay.txt             |  28 --
 Documentation/device-mapper/dm-crypt.rst          | 173 +++++++++
 Documentation/device-mapper/dm-crypt.txt          | 162 --------
 Documentation/device-mapper/dm-flakey.rst         |  74 ++++
 Documentation/device-mapper/dm-flakey.txt         |  57 ---
 Documentation/device-mapper/dm-init.rst           | 125 +++++++
 Documentation/device-mapper/dm-init.txt           | 114 ------
 Documentation/device-mapper/dm-integrity.rst      | 259 +++++++++++++
 Documentation/device-mapper/dm-integrity.txt      | 233 ------------
 Documentation/device-mapper/dm-io.rst             |  75 ++++
 Documentation/device-mapper/dm-io.txt             |  75 ----
 Documentation/device-mapper/dm-log.rst            |  57 +++
 Documentation/device-mapper/dm-log.txt            |  54 ---
 Documentation/device-mapper/dm-queue-length.rst   |  48 +++
 Documentation/device-mapper/dm-queue-length.txt   |  39 --
 Documentation/device-mapper/dm-raid.rst           | 419 +++++++++++++++++++++
 Documentation/device-mapper/dm-raid.txt           | 354 ------------------
 Documentation/device-mapper/dm-service-time.rst   | 101 +++++
 Documentation/device-mapper/dm-service-time.txt   |  91 -----
 Documentation/device-mapper/dm-uevent.rst         | 110 ++++++
 Documentation/device-mapper/dm-uevent.txt         |  97 -----
 Documentation/device-mapper/dm-zoned.rst          | 146 ++++++++
 Documentation/device-mapper/dm-zoned.txt          | 144 --------
 Documentation/device-mapper/era.rst               | 116 ++++++
 Documentation/device-mapper/era.txt               | 108 ------
 Documentation/device-mapper/index.rst             |  44 +++
 Documentation/device-mapper/kcopyd.rst            |  47 +++
 Documentation/device-mapper/kcopyd.txt            |  47 ---
 Documentation/device-mapper/linear.rst            |  63 ++++
 Documentation/device-mapper/linear.txt            |  61 ----
 Documentation/device-mapper/log-writes.rst        | 145 ++++++++
 Documentation/device-mapper/log-writes.txt        | 140 -------
 Documentation/device-mapper/persistent-data.rst   |  88 +++++
 Documentation/device-mapper/persistent-data.txt   |  84 -----
 Documentation/device-mapper/snapshot.rst          | 180 +++++++++
 Documentation/device-mapper/snapshot.txt          | 176 ---------
 Documentation/device-mapper/statistics.rst        | 225 ++++++++++++
 Documentation/device-mapper/statistics.txt        | 223 -----------
 Documentation/device-mapper/striped.rst           |  61 ++++
 Documentation/device-mapper/striped.txt           |  57 ---
 Documentation/device-mapper/switch.rst            | 141 +++++++
 Documentation/device-mapper/switch.txt            | 138 -------
 Documentation/device-mapper/thin-provisioning.rst | 427 ++++++++++++++++++++++
 Documentation/device-mapper/thin-provisioning.txt | 411 ---------------------
 Documentation/device-mapper/unstriped.rst         | 135 +++++++
 Documentation/device-mapper/unstriped.txt         | 124 -------
 Documentation/device-mapper/verity.rst            | 229 ++++++++++++
 Documentation/device-mapper/verity.txt            | 219 -----------
 Documentation/device-mapper/writecache.rst        |  79 ++++
 Documentation/device-mapper/writecache.txt        |  70 ----
 Documentation/device-mapper/zero.rst              |  37 ++
 Documentation/device-mapper/zero.txt              |  37 --
 Documentation/filesystems/ubifs-authentication.md |   4 +-
 58 files changed, 4105 insertions(+), 3777 deletions(-)
 create mode 100644 Documentation/device-mapper/cache-policies.rst
 delete mode 100644 Documentation/device-mapper/cache-policies.txt
 create mode 100644 Documentation/device-mapper/cache.rst
 delete mode 100644 Documentation/device-mapper/cache.txt
 create mode 100644 Documentation/device-mapper/delay.rst
 delete mode 100644 Documentation/device-mapper/delay.txt
 create mode 100644 Documentation/device-mapper/dm-crypt.rst
 delete mode 100644 Documentation/device-mapper/dm-crypt.txt
 create mode 100644 Documentation/device-mapper/dm-flakey.rst
 delete mode 100644 Documentation/device-mapper/dm-flakey.txt
 create mode 100644 Documentation/device-mapper/dm-init.rst
 delete mode 100644 Documentation/device-mapper/dm-init.txt
 create mode 100644 Documentation/device-mapper/dm-integrity.rst
 delete mode 100644 Documentation/device-mapper/dm-integrity.txt
 create mode 100644 Documentation/device-mapper/dm-io.rst
 delete mode 100644 Documentation/device-mapper/dm-io.txt
 create mode 100644 Documentation/device-mapper/dm-log.rst
 delete mode 100644 Documentation/device-mapper/dm-log.txt
 create mode 100644 Documentation/device-mapper/dm-queue-length.rst
 delete mode 100644 Documentation/device-mapper/dm-queue-length.txt
 create mode 100644 Documentation/device-mapper/dm-raid.rst
 delete mode 100644 Documentation/device-mapper/dm-raid.txt
 create mode 100644 Documentation/device-mapper/dm-service-time.rst
 delete mode 100644 Documentation/device-mapper/dm-service-time.txt
 create mode 100644 Documentation/device-mapper/dm-uevent.rst
 delete mode 100644 Documentation/device-mapper/dm-uevent.txt
 create mode 100644 Documentation/device-mapper/dm-zoned.rst
 delete mode 100644 Documentation/device-mapper/dm-zoned.txt
 create mode 100644 Documentation/device-mapper/era.rst
 delete mode 100644 Documentation/device-mapper/era.txt
 create mode 100644 Documentation/device-mapper/index.rst
 create mode 100644 Documentation/device-mapper/kcopyd.rst
 delete mode 100644 Documentation/device-mapper/kcopyd.txt
 create mode 100644 Documentation/device-mapper/linear.rst
 delete mode 100644 Documentation/device-mapper/linear.txt
 create mode 100644 Documentation/device-mapper/log-writes.rst
 delete mode 100644 Documentation/device-mapper/log-writes.txt
 create mode 100644 Documentation/device-mapper/persistent-data.rst
 delete mode 100644 Documentation/device-mapper/persistent-data.txt
 create mode 100644 Documentation/device-mapper/snapshot.rst
 delete mode 100644 Documentation/device-mapper/snapshot.txt
 create mode 100644 Documentation/device-mapper/statistics.rst
 delete mode 100644 Documentation/device-mapper/statistics.txt
 create mode 100644 Documentation/device-mapper/striped.rst
 delete mode 100644 Documentation/device-mapper/striped.txt
 create mode 100644 Documentation/device-mapper/switch.rst
 delete mode 100644 Documentation/device-mapper/switch.txt
 create mode 100644 Documentation/device-mapper/thin-provisioning.rst
 delete mode 100644 Documentation/device-mapper/thin-provisioning.txt
 create mode 100644 Documentation/device-mapper/unstriped.rst
 delete mode 100644 Documentation/device-mapper/unstriped.txt
 create mode 100644 Documentation/device-mapper/verity.rst
 delete mode 100644 Documentation/device-mapper/verity.txt
 create mode 100644 Documentation/device-mapper/writecache.rst
 delete mode 100644 Documentation/device-mapper/writecache.txt
 create mode 100644 Documentation/device-mapper/zero.rst
 delete mode 100644 Documentation/device-mapper/zero.txt

(limited to 'Documentation')

diff --git a/Documentation/device-mapper/cache-policies.rst b/Documentation/device-mapper/cache-policies.rst
new file mode 100644
index 000000000000..b17fe352fc41
--- /dev/null
+++ b/Documentation/device-mapper/cache-policies.rst
@@ -0,0 +1,131 @@
+=============================
+Guidance for writing policies
+=============================
+
+Try to keep transactionality out of it.  The core is careful to
+avoid asking about anything that is migrating.  This is a pain, but
+makes it easier to write the policies.
+
+Mappings are loaded into the policy at construction time.
+
+Every bio that is mapped by the target is referred to the policy.
+The policy can return a simple HIT or MISS or issue a migration.
+
+Currently there's no way for the policy to issue background work,
+e.g. to start writing back dirty blocks that are going to be evicted
+soon.
+
+Because we map bios, rather than requests it's easy for the policy
+to get fooled by many small bios.  For this reason the core target
+issues periodic ticks to the policy.  It's suggested that the policy
+doesn't update states (eg, hit counts) for a block more than once
+for each tick.  The core ticks by watching bios complete, and so
+trying to see when the io scheduler has let the ios run.
+
+
+Overview of supplied cache replacement policies
+===============================================
+
+multiqueue (mq)
+---------------
+
+This policy is now an alias for smq (see below).
+
+The following tunables are accepted, but have no effect::
+
+	'sequential_threshold <#nr_sequential_ios>'
+	'random_threshold <#nr_random_ios>'
+	'read_promote_adjustment <value>'
+	'write_promote_adjustment <value>'
+	'discard_promote_adjustment <value>'
+
+Stochastic multiqueue (smq)
+---------------------------
+
+This policy is the default.
+
+The stochastic multi-queue (smq) policy addresses some of the problems
+with the multiqueue (mq) policy.
+
+The smq policy (vs mq) offers the promise of less memory utilization,
+improved performance and increased adaptability in the face of changing
+workloads.  smq also does not have any cumbersome tuning knobs.
+
+Users may switch from "mq" to "smq" simply by appropriately reloading a
+DM table that is using the cache target.  Doing so will cause all of the
+mq policy's hints to be dropped.  Also, performance of the cache may
+degrade slightly until smq recalculates the origin device's hotspots
+that should be cached.
+
+Memory usage
+^^^^^^^^^^^^
+
+The mq policy used a lot of memory; 88 bytes per cache block on a 64
+bit machine.
+
+smq uses 28bit indexes to implement its data structures rather than
+pointers.  It avoids storing an explicit hit count for each block.  It
+has a 'hotspot' queue, rather than a pre-cache, which uses a quarter of
+the entries (each hotspot block covers a larger area than a single
+cache block).
+
+All this means smq uses ~25bytes per cache block.  Still a lot of
+memory, but a substantial improvement nontheless.
+
+Level balancing
+^^^^^^^^^^^^^^^
+
+mq placed entries in different levels of the multiqueue structures
+based on their hit count (~ln(hit count)).  This meant the bottom
+levels generally had the most entries, and the top ones had very
+few.  Having unbalanced levels like this reduced the efficacy of the
+multiqueue.
+
+smq does not maintain a hit count, instead it swaps hit entries with
+the least recently used entry from the level above.  The overall
+ordering being a side effect of this stochastic process.  With this
+scheme we can decide how many entries occupy each multiqueue level,
+resulting in better promotion/demotion decisions.
+
+Adaptability:
+The mq policy maintained a hit count for each cache block.  For a
+different block to get promoted to the cache its hit count has to
+exceed the lowest currently in the cache.  This meant it could take a
+long time for the cache to adapt between varying IO patterns.
+
+smq doesn't maintain hit counts, so a lot of this problem just goes
+away.  In addition it tracks performance of the hotspot queue, which
+is used to decide which blocks to promote.  If the hotspot queue is
+performing badly then it starts moving entries more quickly between
+levels.  This lets it adapt to new IO patterns very quickly.
+
+Performance
+^^^^^^^^^^^
+
+Testing smq shows substantially better performance than mq.
+
+cleaner
+-------
+
+The cleaner writes back all dirty blocks in a cache to decommission it.
+
+Examples
+========
+
+The syntax for a table is::
+
+	cache <metadata dev> <cache dev> <origin dev> <block size>
+	<#feature_args> [<feature arg>]*
+	<policy> <#policy_args> [<policy arg>]*
+
+The syntax to send a message using the dmsetup command is::
+
+	dmsetup message <mapped device> 0 sequential_threshold 1024
+	dmsetup message <mapped device> 0 random_threshold 8
+
+Using dmsetup::
+
+	dmsetup create blah --table "0 268435456 cache /dev/sdb /dev/sdc \
+	    /dev/sdd 512 0 mq 4 sequential_threshold 1024 random_threshold 8"
+	creates a 128GB large mapped device named 'blah' with the
+	sequential threshold set to 1024 and the random_threshold set to 8.
diff --git a/Documentation/device-mapper/cache-policies.txt b/Documentation/device-mapper/cache-policies.txt
deleted file mode 100644
index 86786d87d9a8..000000000000
--- a/Documentation/device-mapper/cache-policies.txt
+++ /dev/null
@@ -1,121 +0,0 @@
-Guidance for writing policies
-=============================
-
-Try to keep transactionality out of it.  The core is careful to
-avoid asking about anything that is migrating.  This is a pain, but
-makes it easier to write the policies.
-
-Mappings are loaded into the policy at construction time.
-
-Every bio that is mapped by the target is referred to the policy.
-The policy can return a simple HIT or MISS or issue a migration.
-
-Currently there's no way for the policy to issue background work,
-e.g. to start writing back dirty blocks that are going to be evicted
-soon.
-
-Because we map bios, rather than requests it's easy for the policy
-to get fooled by many small bios.  For this reason the core target
-issues periodic ticks to the policy.  It's suggested that the policy
-doesn't update states (eg, hit counts) for a block more than once
-for each tick.  The core ticks by watching bios complete, and so
-trying to see when the io scheduler has let the ios run.
-
-
-Overview of supplied cache replacement policies
-===============================================
-
-multiqueue (mq)
----------------
-
-This policy is now an alias for smq (see below).
-
-The following tunables are accepted, but have no effect:
-
-	'sequential_threshold <#nr_sequential_ios>'
-	'random_threshold <#nr_random_ios>'
-	'read_promote_adjustment <value>'
-	'write_promote_adjustment <value>'
-	'discard_promote_adjustment <value>'
-
-Stochastic multiqueue (smq)
----------------------------
-
-This policy is the default.
-
-The stochastic multi-queue (smq) policy addresses some of the problems
-with the multiqueue (mq) policy.
-
-The smq policy (vs mq) offers the promise of less memory utilization,
-improved performance and increased adaptability in the face of changing
-workloads.  smq also does not have any cumbersome tuning knobs.
-
-Users may switch from "mq" to "smq" simply by appropriately reloading a
-DM table that is using the cache target.  Doing so will cause all of the
-mq policy's hints to be dropped.  Also, performance of the cache may
-degrade slightly until smq recalculates the origin device's hotspots
-that should be cached.
-
-Memory usage:
-The mq policy used a lot of memory; 88 bytes per cache block on a 64
-bit machine.
-
-smq uses 28bit indexes to implement its data structures rather than
-pointers.  It avoids storing an explicit hit count for each block.  It
-has a 'hotspot' queue, rather than a pre-cache, which uses a quarter of
-the entries (each hotspot block covers a larger area than a single
-cache block).
-
-All this means smq uses ~25bytes per cache block.  Still a lot of
-memory, but a substantial improvement nontheless.
-
-Level balancing:
-mq placed entries in different levels of the multiqueue structures
-based on their hit count (~ln(hit count)).  This meant the bottom
-levels generally had the most entries, and the top ones had very
-few.  Having unbalanced levels like this reduced the efficacy of the
-multiqueue.
-
-smq does not maintain a hit count, instead it swaps hit entries with
-the least recently used entry from the level above.  The overall
-ordering being a side effect of this stochastic process.  With this
-scheme we can decide how many entries occupy each multiqueue level,
-resulting in better promotion/demotion decisions.
-
-Adaptability:
-The mq policy maintained a hit count for each cache block.  For a
-different block to get promoted to the cache its hit count has to
-exceed the lowest currently in the cache.  This meant it could take a
-long time for the cache to adapt between varying IO patterns.
-
-smq doesn't maintain hit counts, so a lot of this problem just goes
-away.  In addition it tracks performance of the hotspot queue, which
-is used to decide which blocks to promote.  If the hotspot queue is
-performing badly then it starts moving entries more quickly between
-levels.  This lets it adapt to new IO patterns very quickly.
-
-Performance:
-Testing smq shows substantially better performance than mq.
-
-cleaner
--------
-
-The cleaner writes back all dirty blocks in a cache to decommission it.
-
-Examples
-========
-
-The syntax for a table is:
-	cache <metadata dev> <cache dev> <origin dev> <block size>
-	<#feature_args> [<feature arg>]*
-	<policy> <#policy_args> [<policy arg>]*
-
-The syntax to send a message using the dmsetup command is:
-	dmsetup message <mapped device> 0 sequential_threshold 1024
-	dmsetup message <mapped device> 0 random_threshold 8
-
-Using dmsetup:
-	dmsetup create blah --table "0 268435456 cache /dev/sdb /dev/sdc \
-	    /dev/sdd 512 0 mq 4 sequential_threshold 1024 random_threshold 8"
-	creates a 128GB large mapped device named 'blah' with the
-	sequential threshold set to 1024 and the random_threshold set to 8.
diff --git a/Documentation/device-mapper/cache.rst b/Documentation/device-mapper/cache.rst
new file mode 100644
index 000000000000..f15e5254d05b
--- /dev/null
+++ b/Documentation/device-mapper/cache.rst
@@ -0,0 +1,337 @@
+=====
+Cache
+=====
+
+Introduction
+============
+
+dm-cache is a device mapper target written by Joe Thornber, Heinz
+Mauelshagen, and Mike Snitzer.
+
+It aims to improve performance of a block device (eg, a spindle) by
+dynamically migrating some of its data to a faster, smaller device
+(eg, an SSD).
+
+This device-mapper solution allows us to insert this caching at
+different levels of the dm stack, for instance above the data device for
+a thin-provisioning pool.  Caching solutions that are integrated more
+closely with the virtual memory system should give better performance.
+
+The target reuses the metadata library used in the thin-provisioning
+library.
+
+The decision as to what data to migrate and when is left to a plug-in
+policy module.  Several of these have been written as we experiment,
+and we hope other people will contribute others for specific io
+scenarios (eg. a vm image server).
+
+Glossary
+========
+
+  Migration
+	       Movement of the primary copy of a logical block from one
+	       device to the other.
+  Promotion
+	       Migration from slow device to fast device.
+  Demotion
+	       Migration from fast device to slow device.
+
+The origin device always contains a copy of the logical block, which
+may be out of date or kept in sync with the copy on the cache device
+(depending on policy).
+
+Design
+======
+
+Sub-devices
+-----------
+
+The target is constructed by passing three devices to it (along with
+other parameters detailed later):
+
+1. An origin device - the big, slow one.
+
+2. A cache device - the small, fast one.
+
+3. A small metadata device - records which blocks are in the cache,
+   which are dirty, and extra hints for use by the policy object.
+   This information could be put on the cache device, but having it
+   separate allows the volume manager to configure it differently,
+   e.g. as a mirror for extra robustness.  This metadata device may only
+   be used by a single cache device.
+
+Fixed block size
+----------------
+
+The origin is divided up into blocks of a fixed size.  This block size
+is configurable when you first create the cache.  Typically we've been
+using block sizes of 256KB - 1024KB.  The block size must be between 64
+sectors (32KB) and 2097152 sectors (1GB) and a multiple of 64 sectors (32KB).
+
+Having a fixed block size simplifies the target a lot.  But it is
+something of a compromise.  For instance, a small part of a block may be
+getting hit a lot, yet the whole block will be promoted to the cache.
+So large block sizes are bad because they waste cache space.  And small
+block sizes are bad because they increase the amount of metadata (both
+in core and on disk).
+
+Cache operating modes
+---------------------
+
+The cache has three operating modes: writeback, writethrough and
+passthrough.
+
+If writeback, the default, is selected then a write to a block that is
+cached will go only to the cache and the block will be marked dirty in
+the metadata.
+
+If writethrough is selected then a write to a cached block will not
+complete until it has hit both the origin and cache devices.  Clean
+blocks should remain clean.
+
+If passthrough is selected, useful when the cache contents are not known
+to be coherent with the origin device, then all reads are served from
+the origin device (all reads miss the cache) and all writes are
+forwarded to the origin device; additionally, write hits cause cache
+block invalidates.  To enable passthrough mode the cache must be clean.
+Passthrough mode allows a cache device to be activated without having to
+worry about coherency.  Coherency that exists is maintained, although
+the cache will gradually cool as writes take place.  If the coherency of
+the cache can later be verified, or established through use of the
+"invalidate_cblocks" message, the cache device can be transitioned to
+writethrough or writeback mode while still warm.  Otherwise, the cache
+contents can be discarded prior to transitioning to the desired
+operating mode.
+
+A simple cleaner policy is provided, which will clean (write back) all
+dirty blocks in a cache.  Useful for decommissioning a cache or when
+shrinking a cache.  Shrinking the cache's fast device requires all cache
+blocks, in the area of the cache being removed, to be clean.  If the
+area being removed from the cache still contains dirty blocks the resize
+will fail.  Care must be taken to never reduce the volume used for the
+cache's fast device until the cache is clean.  This is of particular
+importance if writeback mode is used.  Writethrough and passthrough
+modes already maintain a clean cache.  Future support to partially clean
+the cache, above a specified threshold, will allow for keeping the cache
+warm and in writeback mode during resize.
+
+Migration throttling
+--------------------
+
+Migrating data between the origin and cache device uses bandwidth.
+The user can set a throttle to prevent more than a certain amount of
+migration occurring at any one time.  Currently we're not taking any
+account of normal io traffic going to the devices.  More work needs
+doing here to avoid migrating during those peak io moments.
+
+For the time being, a message "migration_threshold <#sectors>"
+can be used to set the maximum number of sectors being migrated,
+the default being 2048 sectors (1MB).
+
+Updating on-disk metadata
+-------------------------
+
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second.  This
+means the cache behaves like a physical disk that has a volatile write
+cache.  If power is lost you may lose some recent writes.  The metadata
+should always be consistent in spite of any crash.
+
+The 'dirty' state for a cache block changes far too frequently for us
+to keep updating it on the fly.  So we treat it as a hint.  In normal
+operation it will be written when the dm device is suspended.  If the
+system crashes all cache blocks will be assumed dirty when restarted.
+
+Per-block policy hints
+----------------------
+
+Policy plug-ins can store a chunk of data per cache block.  It's up to
+the policy how big this chunk is, but it should be kept small.  Like the
+dirty flags this data is lost if there's a crash so a safe fallback
+value should always be possible.
+
+Policy hints affect performance, not correctness.
+
+Policy messaging
+----------------
+
+Policies will have different tunables, specific to each one, so we
+need a generic way of getting and setting these.  Device-mapper
+messages are used.  Refer to cache-policies.txt.
+
+Discard bitset resolution
+-------------------------
+
+We can avoid copying data during migration if we know the block has
+been discarded.  A prime example of this is when mkfs discards the
+whole block device.  We store a bitset tracking the discard state of
+blocks.  However, we allow this bitset to have a different block size
+from the cache blocks.  This is because we need to track the discard
+state for all of the origin device (compare with the dirty bitset
+which is just for the smaller cache device).
+
+Target interface
+================
+
+Constructor
+-----------
+
+  ::
+
+   cache <metadata dev> <cache dev> <origin dev> <block size>
+         <#feature args> [<feature arg>]*
+         <policy> <#policy args> [policy args]*
+
+ ================ =======================================================
+ metadata dev     fast device holding the persistent metadata
+ cache dev	  fast device holding cached data blocks
+ origin dev	  slow device holding original data blocks
+ block size       cache unit size in sectors
+
+ #feature args    number of feature arguments passed
+ feature args     writethrough or passthrough (The default is writeback.)
+
+ policy           the replacement policy to use
+ #policy args     an even number of arguments corresponding to
+                  key/value pairs passed to the policy
+ policy args      key/value pairs passed to the policy
+		  E.g. 'sequential_threshold 1024'
+		  See cache-policies.txt for details.
+ ================ =======================================================
+
+Optional feature arguments are:
+
+
+   ==================== ========================================================
+   writethrough		write through caching that prohibits cache block
+			content from being different from origin block content.
+			Without this argument, the default behaviour is to write
+			back cache block contents later for performance reasons,
+			so they may differ from the corresponding origin blocks.
+
+   passthrough		a degraded mode useful for various cache coherency
+			situations (e.g., rolling back snapshots of
+			underlying storage).	 Reads and writes always go to
+			the origin.	If a write goes to a cached origin
+			block, then the cache block is invalidated.
+			To enable passthrough mode the cache must be clean.
+
+   metadata2		use version 2 of the metadata.  This stores the dirty
+			bits in a separate btree, which improves speed of
+			shutting down the cache.
+
+   no_discard_passdown	disable passing down discards from the cache
+			to the origin's data device.
+   ==================== ========================================================
+
+A policy called 'default' is always registered.  This is an alias for
+the policy we currently think is giving best all round performance.
+
+As the default policy could vary between kernels, if you are relying on
+the characteristics of a specific policy, always request it by name.
+
+Status
+------
+
+::
+
+  <metadata block size> <#used metadata blocks>/<#total metadata blocks>
+  <cache block size> <#used cache blocks>/<#total cache blocks>
+  <#read hits> <#read misses> <#write hits> <#write misses>
+  <#demotions> <#promotions> <#dirty> <#features> <features>*
+  <#core args> <core args>* <policy name> <#policy args> <policy args>*
+  <cache metadata mode>
+
+
+========================= =====================================================
+metadata block size	  Fixed block size for each metadata block in
+			  sectors
+#used metadata blocks	  Number of metadata blocks used
+#total metadata blocks	  Total number of metadata blocks
+cache block size	  Configurable block size for the cache device
+			  in sectors
+#used cache blocks	  Number of blocks resident in the cache
+#total cache blocks	  Total number of cache blocks
+#read hits		  Number of times a READ bio has been mapped
+			  to the cache
+#read misses		  Number of times a READ bio has been mapped
+			  to the origin
+#write hits		  Number of times a WRITE bio has been mapped
+			  to the cache
+#write misses		  Number of times a WRITE bio has been
+			  mapped to the origin
+#demotions		  Number of times a block has been removed
+			  from the cache
+#promotions		  Number of times a block has been moved to
+			  the cache
+#dirty			  Number of blocks in the cache that differ
+			  from the origin
+#feature args		  Number of feature args to follow
+feature args		  'writethrough' (optional)
+#core args		  Number of core arguments (must be even)
+core args		  Key/value pairs for tuning the core
+			  e.g. migration_threshold
+policy name		  Name of the policy
+#policy args		  Number of policy arguments to follow (must be even)
+policy args		  Key/value pairs e.g. sequential_threshold
+cache metadata mode       ro if read-only, rw if read-write
+
+			  In serious cases where even a read-only mode is
+			  deemed unsafe no further I/O will be permitted and
+			  the status will just contain the string 'Fail'.
+			  The userspace recovery tools should then be used.
+needs_check		  'needs_check' if set, '-' if not set
+			  A metadata operation has failed, resulting in the
+			  needs_check flag being set in the metadata's
+			  superblock.  The metadata device must be
+			  deactivated and checked/repaired before the
+			  cache can be made fully operational again.
+			  '-' indicates	needs_check is not set.
+========================= =====================================================
+
+Messages
+--------
+
+Policies will have different tunables, specific to each one, so we
+need a generic way of getting and setting these.  Device-mapper
+messages are used.  (A sysfs interface would also be possible.)
+
+The message format is::
+
+   <key> <value>
+
+E.g.::
+
+   dmsetup message my_cache 0 sequential_threshold 1024
+
+
+Invalidation is removing an entry from the cache without writing it
+back.  Cache blocks can be invalidated via the invalidate_cblocks
+message, which takes an arbitrary number of cblock ranges.  Each cblock
+range's end value is "one past the end", meaning 5-10 expresses a range
+of values from 5 to 9.  Each cblock must be expressed as a decimal
+value, in the future a variant message that takes cblock ranges
+expressed in hexadecimal may be needed to better support efficient
+invalidation of larger caches.  The cache must be in passthrough mode
+when invalidate_cblocks is used::
+
+   invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*
+
+E.g.::
+
+   dmsetup message my_cache 0 invalidate_cblocks 2345 3456-4567 5678-6789
+
+Examples
+========
+
+The test suite can be found here:
+
+https://github.com/jthornber/device-mapper-test-suite
+
+::
+
+  dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
+	  /dev/mapper/ssd /dev/mapper/origin 512 1 writeback default 0'
+  dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
+	  /dev/mapper/ssd /dev/mapper/origin 1024 1 writeback \
+	  mq 4 sequential_threshold 1024 random_threshold 8'
diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt
deleted file mode 100644
index 8ae1cf8e94da..000000000000
--- a/Documentation/device-mapper/cache.txt
+++ /dev/null
@@ -1,311 +0,0 @@
-Introduction
-============
-
-dm-cache is a device mapper target written by Joe Thornber, Heinz
-Mauelshagen, and Mike Snitzer.
-
-It aims to improve performance of a block device (eg, a spindle) by
-dynamically migrating some of its data to a faster, smaller device
-(eg, an SSD).
-
-This device-mapper solution allows us to insert this caching at
-different levels of the dm stack, for instance above the data device for
-a thin-provisioning pool.  Caching solutions that are integrated more
-closely with the virtual memory system should give better performance.
-
-The target reuses the metadata library used in the thin-provisioning
-library.
-
-The decision as to what data to migrate and when is left to a plug-in
-policy module.  Several of these have been written as we experiment,
-and we hope other people will contribute others for specific io
-scenarios (eg. a vm image server).
-
-Glossary
-========
-
-  Migration -  Movement of the primary copy of a logical block from one
-	       device to the other.
-  Promotion -  Migration from slow device to fast device.
-  Demotion  -  Migration from fast device to slow device.
-
-The origin device always contains a copy of the logical block, which
-may be out of date or kept in sync with the copy on the cache device
-(depending on policy).
-
-Design
-======
-
-Sub-devices
------------
-
-The target is constructed by passing three devices to it (along with
-other parameters detailed later):
-
-1. An origin device - the big, slow one.
-
-2. A cache device - the small, fast one.
-
-3. A small metadata device - records which blocks are in the cache,
-   which are dirty, and extra hints for use by the policy object.
-   This information could be put on the cache device, but having it
-   separate allows the volume manager to configure it differently,
-   e.g. as a mirror for extra robustness.  This metadata device may only
-   be used by a single cache device.
-
-Fixed block size
-----------------
-
-The origin is divided up into blocks of a fixed size.  This block size
-is configurable when you first create the cache.  Typically we've been
-using block sizes of 256KB - 1024KB.  The block size must be between 64
-sectors (32KB) and 2097152 sectors (1GB) and a multiple of 64 sectors (32KB).
-
-Having a fixed block size simplifies the target a lot.  But it is
-something of a compromise.  For instance, a small part of a block may be
-getting hit a lot, yet the whole block will be promoted to the cache.
-So large block sizes are bad because they waste cache space.  And small
-block sizes are bad because they increase the amount of metadata (both
-in core and on disk).
-
-Cache operating modes
----------------------
-
-The cache has three operating modes: writeback, writethrough and
-passthrough.
-
-If writeback, the default, is selected then a write to a block that is
-cached will go only to the cache and the block will be marked dirty in
-the metadata.
-
-If writethrough is selected then a write to a cached block will not
-complete until it has hit both the origin and cache devices.  Clean
-blocks should remain clean.
-
-If passthrough is selected, useful when the cache contents are not known
-to be coherent with the origin device, then all reads are served from
-the origin device (all reads miss the cache) and all writes are
-forwarded to the origin device; additionally, write hits cause cache
-block invalidates.  To enable passthrough mode the cache must be clean.
-Passthrough mode allows a cache device to be activated without having to
-worry about coherency.  Coherency that exists is maintained, although
-the cache will gradually cool as writes take place.  If the coherency of
-the cache can later be verified, or established through use of the
-"invalidate_cblocks" message, the cache device can be transitioned to
-writethrough or writeback mode while still warm.  Otherwise, the cache
-contents can be discarded prior to transitioning to the desired
-operating mode.
-
-A simple cleaner policy is provided, which will clean (write back) all
-dirty blocks in a cache.  Useful for decommissioning a cache or when
-shrinking a cache.  Shrinking the cache's fast device requires all cache
-blocks, in the area of the cache being removed, to be clean.  If the
-area being removed from the cache still contains dirty blocks the resize
-will fail.  Care must be taken to never reduce the volume used for the
-cache's fast device until the cache is clean.  This is of particular
-importance if writeback mode is used.  Writethrough and passthrough
-modes already maintain a clean cache.  Future support to partially clean
-the cache, above a specified threshold, will allow for keeping the cache
-warm and in writeback mode during resize.
-
-Migration throttling
---------------------
-
-Migrating data between the origin and cache device uses bandwidth.
-The user can set a throttle to prevent more than a certain amount of
-migration occurring at any one time.  Currently we're not taking any
-account of normal io traffic going to the devices.  More work needs
-doing here to avoid migrating during those peak io moments.
-
-For the time being, a message "migration_threshold <#sectors>"
-can be used to set the maximum number of sectors being migrated,
-the default being 2048 sectors (1MB).
-
-Updating on-disk metadata
--------------------------
-
-On-disk metadata is committed every time a FLUSH or FUA bio is written.
-If no such requests are made then commits will occur every second.  This
-means the cache behaves like a physical disk that has a volatile write
-cache.  If power is lost you may lose some recent writes.  The metadata
-should always be consistent in spite of any crash.
-
-The 'dirty' state for a cache block changes far too frequently for us
-to keep updating it on the fly.  So we treat it as a hint.  In normal
-operation it will be written when the dm device is suspended.  If the
-system crashes all cache blocks will be assumed dirty when restarted.
-
-Per-block policy hints
-----------------------
-
-Policy plug-ins can store a chunk of data per cache block.  It's up to
-the policy how big this chunk is, but it should be kept small.  Like the
-dirty flags this data is lost if there's a crash so a safe fallback
-value should always be possible.
-
-Policy hints affect performance, not correctness.
-
-Policy messaging
-----------------
-
-Policies will have different tunables, specific to each one, so we
-need a generic way of getting and setting these.  Device-mapper
-messages are used.  Refer to cache-policies.txt.
-
-Discard bitset resolution
--------------------------
-
-We can avoid copying data during migration if we know the block has
-been discarded.  A prime example of this is when mkfs discards the
-whole block device.  We store a bitset tracking the discard state of
-blocks.  However, we allow this bitset to have a different block size
-from the cache blocks.  This is because we need to track the discard
-state for all of the origin device (compare with the dirty bitset
-which is just for the smaller cache device).
-
-Target interface
-================
-
-Constructor
------------
-
- cache <metadata dev> <cache dev> <origin dev> <block size>
-       <#feature args> [<feature arg>]*
-       <policy> <#policy args> [policy args]*
-
- metadata dev    : fast device holding the persistent metadata
- cache dev	 : fast device holding cached data blocks
- origin dev	 : slow device holding original data blocks
- block size      : cache unit size in sectors
-
- #feature args   : number of feature arguments passed
- feature args    : writethrough or passthrough (The default is writeback.)
-
- policy          : the replacement policy to use
- #policy args    : an even number of arguments corresponding to
-                   key/value pairs passed to the policy
- policy args     : key/value pairs passed to the policy
-		   E.g. 'sequential_threshold 1024'
-		   See cache-policies.txt for details.
-
-Optional feature arguments are:
-   writethrough  : write through caching that prohibits cache block
-		   content from being different from origin block content.
-		   Without this argument, the default behaviour is to write
-		   back cache block contents later for performance reasons,
-		   so they may differ from the corresponding origin blocks.
-
-   passthrough	 : a degraded mode useful for various cache coherency
-		   situations (e.g., rolling back snapshots of
-		   underlying storage).	 Reads and writes always go to
-		   the origin.	If a write goes to a cached origin
-		   block, then the cache block is invalidated.
-		   To enable passthrough mode the cache must be clean.
-
-   metadata2	: use version 2 of the metadata.  This stores the dirty bits
-                  in a separate btree, which improves speed of shutting
-		  down the cache.
-
-   no_discard_passdown	: disable passing down discards from the cache
-			  to the origin's data device.
-
-A policy called 'default' is always registered.  This is an alias for
-the policy we currently think is giving best all round performance.
-
-As the default policy could vary between kernels, if you are relying on
-the characteristics of a specific policy, always request it by name.
-
-Status
-------
-
-<metadata block size> <#used metadata blocks>/<#total metadata blocks>
-<cache block size> <#used cache blocks>/<#total cache blocks>
-<#read hits> <#read misses> <#write hits> <#write misses>
-<#demotions> <#promotions> <#dirty> <#features> <features>*
-<#core args> <core args>* <policy name> <#policy args> <policy args>*
-<cache metadata mode>
-
-metadata block size	 : Fixed block size for each metadata block in
-			     sectors
-#used metadata blocks	 : Number of metadata blocks used
-#total metadata blocks	 : Total number of metadata blocks
-cache block size	 : Configurable block size for the cache device
-			     in sectors
-#used cache blocks	 : Number of blocks resident in the cache
-#total cache blocks	 : Total number of cache blocks
-#read hits		 : Number of times a READ bio has been mapped
-			     to the cache
-#read misses		 : Number of times a READ bio has been mapped
-			     to the origin
-#write hits		 : Number of times a WRITE bio has been mapped
-			     to the cache
-#write misses		 : Number of times a WRITE bio has been
-			     mapped to the origin
-#demotions		 : Number of times a block has been removed
-			     from the cache
-#promotions		 : Number of times a block has been moved to
-			     the cache
-#dirty			 : Number of blocks in the cache that differ
-			     from the origin
-#feature args		 : Number of feature args to follow
-feature args		 : 'writethrough' (optional)
-#core args		 : Number of core arguments (must be even)
-core args		 : Key/value pairs for tuning the core
-			     e.g. migration_threshold
-policy name		 : Name of the policy
-#policy args		 : Number of policy arguments to follow (must be even)
-policy args		 : Key/value pairs e.g. sequential_threshold
-cache metadata mode      : ro if read-only, rw if read-write
-	In serious cases where even a read-only mode is deemed unsafe
-	no further I/O will be permitted and the status will just
-	contain the string 'Fail'.  The userspace recovery tools
-	should then be used.
-needs_check		 : 'needs_check' if set, '-' if not set
-	A metadata operation has failed, resulting in the needs_check
-	flag being set in the metadata's superblock.  The metadata
-	device must be deactivated and checked/repaired before the
-	cache can be made fully operational again.  '-' indicates
-	needs_check is not set.
-
-Messages
---------
-
-Policies will have different tunables, specific to each one, so we
-need a generic way of getting and setting these.  Device-mapper
-messages are used.  (A sysfs interface would also be possible.)
-
-The message format is:
-
-   <key> <value>
-
-E.g.
-   dmsetup message my_cache 0 sequential_threshold 1024
-
-
-Invalidation is removing an entry from the cache without writing it
-back.  Cache blocks can be invalidated via the invalidate_cblocks
-message, which takes an arbitrary number of cblock ranges.  Each cblock
-range's end value is "one past the end", meaning 5-10 expresses a range
-of values from 5 to 9.  Each cblock must be expressed as a decimal
-value, in the future a variant message that takes cblock ranges
-expressed in hexadecimal may be needed to better support efficient
-invalidation of larger caches.  The cache must be in passthrough mode
-when invalidate_cblocks is used.
-
-   invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*
-
-E.g.
-   dmsetup message my_cache 0 invalidate_cblocks 2345 3456-4567 5678-6789
-
-Examples
-========
-
-The test suite can be found here:
-
-https://github.com/jthornber/device-mapper-test-suite
-
-dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
-	/dev/mapper/ssd /dev/mapper/origin 512 1 writeback default 0'
-dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
-	/dev/mapper/ssd /dev/mapper/origin 1024 1 writeback \
-	mq 4 sequential_threshold 1024 random_threshold 8'
diff --git a/Documentation/device-mapper/delay.rst b/Documentation/device-mapper/delay.rst
new file mode 100644
index 000000000000..917ba8c33359
--- /dev/null
+++ b/Documentation/device-mapper/delay.rst
@@ -0,0 +1,31 @@
+========
+dm-delay
+========
+
+Device-Mapper's "delay" target delays reads and/or writes
+and maps them to different devices.
+
+Parameters::
+
+    <device> <offset> <delay> [<write_device> <write_offset> <write_delay>
+			       [<flush_device> <flush_offset> <flush_delay>]]
+
+With separate write parameters, the first set is only used for reads.
+Offsets are specified in sectors.
+Delays are specified in milliseconds.
+
+Example scripts
+===============
+
+::
+
+	#!/bin/sh
+	# Create device delaying rw operation for 500ms
+	echo "0 `blockdev --getsz $1` delay $1 0 500" | dmsetup create delayed
+
+::
+
+	#!/bin/sh
+	# Create device delaying only write operation for 500ms and
+	# splitting reads and writes to different devices $1 $2
+	echo "0 `blockdev --getsz $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
diff --git a/Documentation/device-mapper/delay.txt b/Documentation/device-mapper/delay.txt
deleted file mode 100644
index 6426c45273cb..000000000000
--- a/Documentation/device-mapper/delay.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-dm-delay
-========
-
-Device-Mapper's "delay" target delays reads and/or writes
-and maps them to different devices.
-
-Parameters:
-    <device> <offset> <delay> [<write_device> <write_offset> <write_delay>
-			       [<flush_device> <flush_offset> <flush_delay>]]
-
-With separate write parameters, the first set is only used for reads.
-Offsets are specified in sectors.
-Delays are specified in milliseconds.
-
-Example scripts
-===============
-[[
-#!/bin/sh
-# Create device delaying rw operation for 500ms
-echo "0 `blockdev --getsz $1` delay $1 0 500" | dmsetup create delayed
-]]
-
-[[
-#!/bin/sh
-# Create device delaying only write operation for 500ms and
-# splitting reads and writes to different devices $1 $2
-echo "0 `blockdev --getsz $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
-]]
diff --git a/Documentation/device-mapper/dm-crypt.rst b/Documentation/device-mapper/dm-crypt.rst
new file mode 100644
index 000000000000..8f4a3f889d43
--- /dev/null
+++ b/Documentation/device-mapper/dm-crypt.rst
@@ -0,0 +1,173 @@
+========
+dm-crypt
+========
+
+Device-Mapper's "crypt" target provides transparent encryption of block devices
+using the kernel crypto API.
+
+For a more detailed description of supported parameters see:
+https://gitlab.com/cryptsetup/cryptsetup/wikis/DMCrypt
+
+Parameters::
+
+	      <cipher> <key> <iv_offset> <device path> \
+	      <offset> [<#opt_params> <opt_params>]
+
+<cipher>
+    Encryption cipher, encryption mode and Initial Vector (IV) generator.
+
+    The cipher specifications format is::
+
+       cipher[:keycount]-chainmode-ivmode[:ivopts]
+
+    Examples::
+
+       aes-cbc-essiv:sha256
+       aes-xts-plain64
+       serpent-xts-plain64
+
+    Cipher format also supports direct specification with kernel crypt API
+    format (selected by capi: prefix). The IV specification is the same
+    as for the first format type.
+    This format is mainly used for specification of authenticated modes.
+
+    The crypto API cipher specifications format is::
+
+        capi:cipher_api_spec-ivmode[:ivopts]
+
+    Examples::
+
+        capi:cbc(aes)-essiv:sha256
+        capi:xts(aes)-plain64
+
+    Examples of authenticated modes::
+
+        capi:gcm(aes)-random
+        capi:authenc(hmac(sha256),xts(aes))-random
+        capi:rfc7539(chacha20,poly1305)-random
+
+    The /proc/crypto contains a list of curently loaded crypto modes.
+
+<key>
+    Key used for encryption. It is encoded either as a hexadecimal number
+    or it can be passed as <key_string> prefixed with single colon
+    character (':') for keys residing in kernel keyring service.
+    You can only use key sizes that are valid for the selected cipher
+    in combination with the selected iv mode.
+    Note that for some iv modes the key string can contain additional
+    keys (for example IV seed) so the key contains more parts concatenated
+    into a single string.
+
+<key_string>
+    The kernel keyring key is identified by string in following format:
+    <key_size>:<key_type>:<key_description>.
+
+<key_size>
+    The encryption key size in bytes. The kernel key payload size must match
+    the value passed in <key_size>.
+
+<key_type>
+    Either 'logon' or 'user' kernel key type.
+
+<key_description>
+    The kernel keyring key description crypt target should look for
+    when loading key of <key_type>.
+
+<keycount>
+    Multi-key compatibility mode. You can define <keycount> keys and
+    then sectors are encrypted according to their offsets (sector 0 uses key0;
+    sector 1 uses key1 etc.).  <keycount> must be a power of two.
+
+<iv_offset>
+    The IV offset is a sector count that is added to the sector number
+    before creating the IV.
+
+<device path>
+    This is the device that is going to be used as backend and contains the
+    encrypted data.  You can specify it as a path like /dev/xxx or a device
+    number <major>:<minor>.
+
+<offset>
+    Starting sector within the device where the encrypted data begins.
+
+<#opt_params>
+    Number of optional parameters. If there are no optional parameters,
+    the optional paramaters section can be skipped or #opt_params can be zero.
+    Otherwise #opt_params is the number of following arguments.
+
+    Example of optional parameters section:
+        3 allow_discards same_cpu_crypt submit_from_crypt_cpus
+
+allow_discards
+    Block discard requests (a.k.a. TRIM) are passed through the crypt device.
+    The default is to ignore discard requests.
+
+    WARNING: Assess the specific security risks carefully before enabling this
+    option.  For example, allowing discards on encrypted devices may lead to
+    the leak of information about the ciphertext device (filesystem type,
+    used space etc.) if the discarded blocks can be located easily on the
+    device later.
+
+same_cpu_crypt
+    Perform encryption using the same cpu that IO was submitted on.
+    The default is to use an unbound workqueue so that encryption work
+    is automatically balanced between available CPUs.
+
+submit_from_crypt_cpus
+    Disable offloading writes to a separate thread after encryption.
+    There are some situations where offloading write bios from the
+    encryption threads to a single thread degrades performance
+    significantly.  The default is to offload write bios to the same
+    thread because it benefits CFQ to have writes submitted using the
+    same context.
+
+integrity:<bytes>:<type>
+    The device requires additional <bytes> metadata per-sector stored
+    in per-bio integrity structure. This metadata must by provided
+    by underlying dm-integrity target.
+
+    The <type> can be "none" if metadata is used only for persistent IV.
+
+    For Authenticated Encryption with Additional Data (AEAD)
+    the <type> is "aead". An AEAD mode additionally calculates and verifies
+    integrity for the encrypted device. The additional space is then
+    used for storing authentication tag (and persistent IV if needed).
+
+sector_size:<bytes>
+    Use <bytes> as the encryption unit instead of 512 bytes sectors.
+    This option can be in range 512 - 4096 bytes and must be power of two.
+    Virtual device will announce this size as a minimal IO and logical sector.
+
+iv_large_sectors
+   IV generators will use sector number counted in <sector_size> units
+   instead of default 512 bytes sectors.
+
+   For example, if <sector_size> is 4096 bytes, plain64 IV for the second
+   sector will be 8 (without flag) and 1 if iv_large_sectors is present.
+   The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
+   if this flag is specified.
+
+Example scripts
+===============
+LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
+encryption with dm-crypt using the 'cryptsetup' utility, see
+https://gitlab.com/cryptsetup/cryptsetup
+
+::
+
+	#!/bin/sh
+	# Create a crypt device using dmsetup
+	dmsetup create crypt1 --table "0 `blockdev --getsz $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0"
+
+::
+
+	#!/bin/sh
+	# Create a crypt device using dmsetup when encryption key is stored in keyring service
+	dmsetup create crypt2 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 :32:logon:my_prefix:my_key 0 $1 0"
+
+::
+
+	#!/bin/sh
+	# Create a crypt device using cryptsetup and LUKS header with default cipher
+	cryptsetup luksFormat $1
+	cryptsetup luksOpen $1 crypt1
diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt
deleted file mode 100644
index 3b3e1de21c9c..000000000000
--- a/Documentation/device-mapper/dm-crypt.txt
+++ /dev/null
@@ -1,162 +0,0 @@
-dm-crypt
-=========
-
-Device-Mapper's "crypt" target provides transparent encryption of block devices
-using the kernel crypto API.
-
-For a more detailed description of supported parameters see:
-https://gitlab.com/cryptsetup/cryptsetup/wikis/DMCrypt
-
-Parameters: <cipher> <key> <iv_offset> <device path> \
-	      <offset> [<#opt_params> <opt_params>]
-
-<cipher>
-    Encryption cipher, encryption mode and Initial Vector (IV) generator.
-
-    The cipher specifications format is:
-       cipher[:keycount]-chainmode-ivmode[:ivopts]
-    Examples:
-       aes-cbc-essiv:sha256
-       aes-xts-plain64
-       serpent-xts-plain64
-
-    Cipher format also supports direct specification with kernel crypt API
-    format (selected by capi: prefix). The IV specification is the same
-    as for the first format type.
-    This format is mainly used for specification of authenticated modes.
-
-    The crypto API cipher specifications format is:
-        capi:cipher_api_spec-ivmode[:ivopts]
-    Examples:
-        capi:cbc(aes)-essiv:sha256
-        capi:xts(aes)-plain64
-    Examples of authenticated modes:
-        capi:gcm(aes)-random
-        capi:authenc(hmac(sha256),xts(aes))-random
-        capi:rfc7539(chacha20,poly1305)-random
-
-    The /proc/crypto contains a list of curently loaded crypto modes.
-
-<key>
-    Key used for encryption. It is encoded either as a hexadecimal number
-    or it can be passed as <key_string> prefixed with single colon
-    character (':') for keys residing in kernel keyring service.
-    You can only use key sizes that are valid for the selected cipher
-    in combination with the selected iv mode.
-    Note that for some iv modes the key string can contain additional
-    keys (for example IV seed) so the key contains more parts concatenated
-    into a single string.
-
-<key_string>
-    The kernel keyring key is identified by string in following format:
-    <key_size>:<key_type>:<key_description>.
-
-<key_size>
-    The encryption key size in bytes. The kernel key payload size must match
-    the value passed in <key_size>.
-
-<key_type>
-    Either 'logon' or 'user' kernel key type.
-
-<key_description>
-    The kernel keyring key description crypt target should look for
-    when loading key of <key_type>.
-
-<keycount>
-    Multi-key compatibility mode. You can define <keycount> keys and
-    then sectors are encrypted according to their offsets (sector 0 uses key0;
-    sector 1 uses key1 etc.).  <keycount> must be a power of two.
-
-<iv_offset>
-    The IV offset is a sector count that is added to the sector number
-    before creating the IV.
-
-<device path>
-    This is the device that is going to be used as backend and contains the
-    encrypted data.  You can specify it as a path like /dev/xxx or a device
-    number <major>:<minor>.
-
-<offset>
-    Starting sector within the device where the encrypted data begins.
-
-<#opt_params>
-    Number of optional parameters. If there are no optional parameters,
-    the optional paramaters section can be skipped or #opt_params can be zero.
-    Otherwise #opt_params is the number of following arguments.
-
-    Example of optional parameters section:
-        3 allow_discards same_cpu_crypt submit_from_crypt_cpus
-
-allow_discards
-    Block discard requests (a.k.a. TRIM) are passed through the crypt device.
-    The default is to ignore discard requests.
-
-    WARNING: Assess the specific security risks carefully before enabling this
-    option.  For example, allowing discards on encrypted devices may lead to
-    the leak of information about the ciphertext device (filesystem type,
-    used space etc.) if the discarded blocks can be located easily on the
-    device later.
-
-same_cpu_crypt
-    Perform encryption using the same cpu that IO was submitted on.
-    The default is to use an unbound workqueue so that encryption work
-    is automatically balanced between available CPUs.
-
-submit_from_crypt_cpus
-    Disable offloading writes to a separate thread after encryption.
-    There are some situations where offloading write bios from the
-    encryption threads to a single thread degrades performance
-    significantly.  The default is to offload write bios to the same
-    thread because it benefits CFQ to have writes submitted using the
-    same context.
-
-integrity:<bytes>:<type>
-    The device requires additional <bytes> metadata per-sector stored
-    in per-bio integrity structure. This metadata must by provided
-    by underlying dm-integrity target.
-
-    The <type> can be "none" if metadata is used only for persistent IV.
-
-    For Authenticated Encryption with Additional Data (AEAD)
-    the <type> is "aead". An AEAD mode additionally calculates and verifies
-    integrity for the encrypted device. The additional space is then
-    used for storing authentication tag (and persistent IV if needed).
-
-sector_size:<bytes>
-    Use <bytes> as the encryption unit instead of 512 bytes sectors.
-    This option can be in range 512 - 4096 bytes and must be power of two.
-    Virtual device will announce this size as a minimal IO and logical sector.
-
-iv_large_sectors
-   IV generators will use sector number counted in <sector_size> units
-   instead of default 512 bytes sectors.
-
-   For example, if <sector_size> is 4096 bytes, plain64 IV for the second
-   sector will be 8 (without flag) and 1 if iv_large_sectors is present.
-   The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
-   if this flag is specified.
-
-Example scripts
-===============
-LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
-encryption with dm-crypt using the 'cryptsetup' utility, see
-https://gitlab.com/cryptsetup/cryptsetup
-
-[[
-#!/bin/sh
-# Create a crypt device using dmsetup
-dmsetup create crypt1 --table "0 `blockdev --getsz $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0"
-]]
-
-[[
-#!/bin/sh
-# Create a crypt device using dmsetup when encryption key is stored in keyring service
-dmsetup create crypt2 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 :32:logon:my_prefix:my_key 0 $1 0"
-]]
-
-[[
-#!/bin/sh
-# Create a crypt device using cryptsetup and LUKS header with default cipher
-cryptsetup luksFormat $1
-cryptsetup luksOpen $1 crypt1
-]]
diff --git a/Documentation/device-mapper/dm-flakey.rst b/Documentation/device-mapper/dm-flakey.rst
new file mode 100644
index 000000000000..86138735879d
--- /dev/null
+++ b/Documentation/device-mapper/dm-flakey.rst
@@ -0,0 +1,74 @@
+=========
+dm-flakey
+=========
+
+This target is the same as the linear target except that it exhibits
+unreliable behaviour periodically.  It's been found useful in simulating
+failing devices for testing purposes.
+
+Starting from the time the table is loaded, the device is available for
+<up interval> seconds, then exhibits unreliable behaviour for <down
+interval> seconds, and then this cycle repeats.
+
+Also, consider using this in combination with the dm-delay target too,
+which can delay reads and writes and/or send them to different
+underlying devices.
+
+Table parameters
+----------------
+
+::
+
+  <dev path> <offset> <up interval> <down interval> \
+    [<num_features> [<feature arguments>]]
+
+Mandatory parameters:
+
+    <dev path>:
+        Full pathname to the underlying block-device, or a
+        "major:minor" device-number.
+    <offset>:
+        Starting sector within the device.
+    <up interval>:
+        Number of seconds device is available.
+    <down interval>:
+        Number of seconds device returns errors.
+
+Optional feature parameters:
+
+  If no feature parameters are present, during the periods of
+  unreliability, all I/O returns errors.
+
+  drop_writes:
+	All write I/O is silently ignored.
+	Read I/O is handled correctly.
+
+  error_writes:
+	All write I/O is failed with an error signalled.
+	Read I/O is handled correctly.
+
+  corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
+	During <down interval>, replace <Nth_byte> of the data of
+	each matching bio with <value>.
+
+    <Nth_byte>:
+	The offset of the byte to replace.
+	Counting starts at 1, to replace the first byte.
+    <direction>:
+	Either 'r' to corrupt reads or 'w' to corrupt writes.
+	'w' is incompatible with drop_writes.
+    <value>:
+	The value (from 0-255) to write.
+    <flags>:
+	Perform the replacement only if bio->bi_opf has all the
+	selected flags set.
+
+Examples:
+
+Replaces the 32nd byte of READ bios with the value 1::
+
+  corrupt_bio_byte 32 r 1 0
+
+Replaces the 224th byte of REQ_META (=32) bios with the value 0::
+
+  corrupt_bio_byte 224 w 0 32
diff --git a/Documentation/device-mapper/dm-flakey.txt b/Documentation/device-mapper/dm-flakey.txt
deleted file mode 100644
index 9f0e247d0877..000000000000
--- a/Documentation/device-mapper/dm-flakey.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-dm-flakey
-=========
-
-This target is the same as the linear target except that it exhibits
-unreliable behaviour periodically.  It's been found useful in simulating
-failing devices for testing purposes.
-
-Starting from the time the table is loaded, the device is available for
-<up interval> seconds, then exhibits unreliable behaviour for <down
-interval> seconds, and then this cycle repeats.
-
-Also, consider using this in combination with the dm-delay target too,
-which can delay reads and writes and/or send them to different
-underlying devices.
-
-Table parameters
-----------------
-  <dev path> <offset> <up interval> <down interval> \
-    [<num_features> [<feature arguments>]]
-
-Mandatory parameters:
-    <dev path>: Full pathname to the underlying block-device, or a
-                "major:minor" device-number.
-    <offset>: Starting sector within the device.
-    <up interval>: Number of seconds device is available.
-    <down interval>: Number of seconds device returns errors.
-
-Optional feature parameters:
-  If no feature parameters are present, during the periods of
-  unreliability, all I/O returns errors.
-
-  drop_writes:
-	All write I/O is silently ignored.
-	Read I/O is handled correctly.
-
-  error_writes:
-	All write I/O is failed with an error signalled.
-	Read I/O is handled correctly.
-
-  corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
-	During <down interval>, replace <Nth_byte> of the data of
-	each matching bio with <value>.
-
-    <Nth_byte>: The offset of the byte to replace.
-		Counting starts at 1, to replace the first byte.
-    <direction>: Either 'r' to corrupt reads or 'w' to corrupt writes.
-		 'w' is incompatible with drop_writes.
-    <value>: The value (from 0-255) to write.
-    <flags>: Perform the replacement only if bio->bi_opf has all the
-	     selected flags set.
-
-Examples:
-  corrupt_bio_byte 32 r 1 0
-	- replaces the 32nd byte of READ bios with the value 1
-
-  corrupt_bio_byte 224 w 0 32
-	- replaces the 224th byte of REQ_META (=32) bios with the value 0
diff --git a/Documentation/device-mapper/dm-init.rst b/Documentation/device-mapper/dm-init.rst
new file mode 100644
index 000000000000..e5242ff17e9b
--- /dev/null
+++ b/Documentation/device-mapper/dm-init.rst
@@ -0,0 +1,125 @@
+================================
+Early creation of mapped devices
+================================
+
+It is possible to configure a device-mapper device to act as the root device for
+your system in two ways.
+
+The first is to build an initial ramdisk which boots to a minimal userspace
+which configures the device, then pivot_root(8) in to it.
+
+The second is to create one or more device-mappers using the module parameter
+"dm-mod.create=" through the kernel boot command line argument.
+
+The format is specified as a string of data separated by commas and optionally
+semi-colons, where:
+
+ - a comma is used to separate fields like name, uuid, flags and table
+   (specifies one device)
+ - a semi-colon is used to separate devices.
+
+So the format will look like this::
+
+ dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
+
+Where::
+
+	<name>		::= The device name.
+	<uuid>		::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | ""
+	<minor>		::= The device minor number | ""
+	<flags>		::= "ro" | "rw"
+	<table>		::= <start_sector> <num_sectors> <target_type> <target_args>
+	<target_type>	::= "verity" | "linear" | ... (see list below)
+
+The dm line should be equivalent to the one used by the dmsetup tool with the
+`--concise` argument.
+
+Target types
+============
+
+Not all target types are available as there are serious risks in allowing
+activation of certain DM targets without first using userspace tools to check
+the validity of associated metadata.
+
+======================= =======================================================
+`cache`			constrained, userspace should verify cache device
+`crypt`			allowed
+`delay`			allowed
+`era`			constrained, userspace should verify metadata device
+`flakey`		constrained, meant for test
+`linear`		allowed
+`log-writes`		constrained, userspace should verify metadata device
+`mirror`		constrained, userspace should verify main/mirror device
+`raid`			constrained, userspace should verify metadata device
+`snapshot`		constrained, userspace should verify src/dst device
+`snapshot-origin`	allowed
+`snapshot-merge`	constrained, userspace should verify src/dst device
+`striped`		allowed
+`switch`		constrained, userspace should verify dev path
+`thin`			constrained, requires dm target message from userspace
+`thin-pool`		constrained, requires dm target message from userspace
+`verity`		allowed
+`writecache`		constrained, userspace should verify cache device
+`zero`			constrained, not meant for rootfs
+======================= =======================================================
+
+If the target is not listed above, it is constrained by default (not tested).
+
+Examples
+========
+An example of booting to a linear array made up of user-mode linux block
+devices::
+
+  dm-mod.create="lroot,,,rw, 0 4096 linear 98:16 0, 4096 4096 linear 98:32 0" root=/dev/dm-0
+
+This will boot to a rw dm-linear target of 8192 sectors split across two block
+devices identified by their major:minor numbers.  After boot, udev will rename
+this target to /dev/mapper/lroot (depending on the rules). No uuid was assigned.
+
+An example of multiple device-mappers, with the dm-mod.create="..." contents
+is shown here split on multiple lines for readability::
+
+  dm-linear,,1,rw,
+    0 32768 linear 8:1 0,
+    32768 1024000 linear 8:2 0;
+  dm-verity,,3,ro,
+    0 1638400 verity 1 /dev/sdc1 /dev/sdc2 4096 4096 204800 1 sha256
+    ac87db56303c9c1da433d7209b5a6ef3e4779df141200cbd7c157dcb8dd89c42
+    5ebfe87f7df3235b80a117ebc4078e44f55045487ad4a96581d1adb564615b51
+
+Other examples (per target):
+
+"crypt"::
+
+  dm-crypt,,8,ro,
+    0 1048576 crypt aes-xts-plain64
+    babebabebabebabebabebabebabebabebabebabebabebabebabebabebabebabe 0
+    /dev/sda 0 1 allow_discards
+
+"delay"::
+
+  dm-delay,,4,ro,0 409600 delay /dev/sda1 0 500
+
+"linear"::
+
+  dm-linear,,,rw,
+    0 32768 linear /dev/sda1 0,
+    32768 1024000 linear /dev/sda2 0,
+    1056768 204800 linear /dev/sda3 0,
+    1261568 512000 linear /dev/sda4 0
+
+"snapshot-origin"::
+
+  dm-snap-orig,,4,ro,0 409600 snapshot-origin 8:2
+
+"striped"::
+
+  dm-striped,,4,ro,0 1638400 striped 4 4096
+  /dev/sda1 0 /dev/sda2 0 /dev/sda3 0 /dev/sda4 0
+
+"verity"::
+
+  dm-verity,,4,ro,
+    0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256
+    fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd
+    51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584
diff --git a/Documentation/device-mapper/dm-init.txt b/Documentation/device-mapper/dm-init.txt
deleted file mode 100644
index 130b3c3679c5..000000000000
--- a/Documentation/device-mapper/dm-init.txt
+++ /dev/null
@@ -1,114 +0,0 @@
-Early creation of mapped devices
-====================================
-
-It is possible to configure a device-mapper device to act as the root device for
-your system in two ways.
-
-The first is to build an initial ramdisk which boots to a minimal userspace
-which configures the device, then pivot_root(8) in to it.
-
-The second is to create one or more device-mappers using the module parameter
-"dm-mod.create=" through the kernel boot command line argument.
-
-The format is specified as a string of data separated by commas and optionally
-semi-colons, where:
- - a comma is used to separate fields like name, uuid, flags and table
-   (specifies one device)
- - a semi-colon is used to separate devices.
-
-So the format will look like this:
-
- dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
-
-Where,
-	<name>		::= The device name.
-	<uuid>		::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | ""
-	<minor>		::= The device minor number | ""
-	<flags>		::= "ro" | "rw"
-	<table>		::= <start_sector> <num_sectors> <target_type> <target_args>
-	<target_type>	::= "verity" | "linear" | ... (see list below)
-
-The dm line should be equivalent to the one used by the dmsetup tool with the
---concise argument.
-
-Target types
-============
-
-Not all target types are available as there are serious risks in allowing
-activation of certain DM targets without first using userspace tools to check
-the validity of associated metadata.
-
-	"cache":		constrained, userspace should verify cache device
-	"crypt":		allowed
-	"delay":		allowed
-	"era":			constrained, userspace should verify metadata device
-	"flakey":		constrained, meant for test
-	"linear":		allowed
-	"log-writes":		constrained, userspace should verify metadata device
-	"mirror":		constrained, userspace should verify main/mirror device
-	"raid":			constrained, userspace should verify metadata device
-	"snapshot":		constrained, userspace should verify src/dst device
-	"snapshot-origin":	allowed
-	"snapshot-merge":	constrained, userspace should verify src/dst device
-	"striped":		allowed
-	"switch":		constrained, userspace should verify dev path
-	"thin":			constrained, requires dm target message from userspace
-	"thin-pool":		constrained, requires dm target message from userspace
-	"verity":		allowed
-	"writecache":		constrained, userspace should verify cache device
-	"zero":			constrained, not meant for rootfs
-
-If the target is not listed above, it is constrained by default (not tested).
-
-Examples
-========
-An example of booting to a linear array made up of user-mode linux block
-devices:
-
-  dm-mod.create="lroot,,,rw, 0 4096 linear 98:16 0, 4096 4096 linear 98:32 0" root=/dev/dm-0
-
-This will boot to a rw dm-linear target of 8192 sectors split across two block
-devices identified by their major:minor numbers.  After boot, udev will rename
-this target to /dev/mapper/lroot (depending on the rules). No uuid was assigned.
-
-An example of multiple device-mappers, with the dm-mod.create="..." contents is shown here
-split on multiple lines for readability:
-
-  dm-linear,,1,rw,
-    0 32768 linear 8:1 0,
-    32768 1024000 linear 8:2 0;
-  dm-verity,,3,ro,
-    0 1638400 verity 1 /dev/sdc1 /dev/sdc2 4096 4096 204800 1 sha256
-    ac87db56303c9c1da433d7209b5a6ef3e4779df141200cbd7c157dcb8dd89c42
-    5ebfe87f7df3235b80a117ebc4078e44f55045487ad4a96581d1adb564615b51
-
-Other examples (per target):
-
-"crypt":
-  dm-crypt,,8,ro,
-    0 1048576 crypt aes-xts-plain64
-    babebabebabebabebabebabebabebabebabebabebabebabebabebabebabebabe 0
-    /dev/sda 0 1 allow_discards
-
-"delay":
-  dm-delay,,4,ro,0 409600 delay /dev/sda1 0 500
-
-"linear":
-  dm-linear,,,rw,
-    0 32768 linear /dev/sda1 0,
-    32768 1024000 linear /dev/sda2 0,
-    1056768 204800 linear /dev/sda3 0,
-    1261568 512000 linear /dev/sda4 0
-
-"snapshot-origin":
-  dm-snap-orig,,4,ro,0 409600 snapshot-origin 8:2
-
-"striped":
-  dm-striped,,4,ro,0 1638400 striped 4 4096
-  /dev/sda1 0 /dev/sda2 0 /dev/sda3 0 /dev/sda4 0
-
-"verity":
-  dm-verity,,4,ro,
-    0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256
-    fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd
-    51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584
diff --git a/Documentation/device-mapper/dm-integrity.rst b/Documentation/device-mapper/dm-integrity.rst
new file mode 100644
index 000000000000..a30aa91b5fbe
--- /dev/null
+++ b/Documentation/device-mapper/dm-integrity.rst
@@ -0,0 +1,259 @@
+============
+dm-integrity
+============
+
+The dm-integrity target emulates a block device that has additional
+per-sector tags that can be used for storing integrity information.
+
+A general problem with storing integrity tags with every sector is that
+writing the sector and the integrity tag must be atomic - i.e. in case of
+crash, either both sector and integrity tag or none of them is written.
+
+To guarantee write atomicity, the dm-integrity target uses journal, it
+writes sector data and integrity tags into a journal, commits the journal
+and then copies the data and integrity tags to their respective location.
+
+The dm-integrity target can be used with the dm-crypt target - in this
+situation the dm-crypt target creates the integrity data and passes them
+to the dm-integrity target via bio_integrity_payload attached to the bio.
+In this mode, the dm-crypt and dm-integrity targets provide authenticated
+disk encryption - if the attacker modifies the encrypted device, an I/O
+error is returned instead of random data.
+
+The dm-integrity target can also be used as a standalone target, in this
+mode it calculates and verifies the integrity tag internally. In this
+mode, the dm-integrity target can be used to detect silent data
+corruption on the disk or in the I/O path.
+
+There's an alternate mode of operation where dm-integrity uses bitmap
+instead of a journal. If a bit in the bitmap is 1, the corresponding
+region's data and integrity tags are not synchronized - if the machine
+crashes, the unsynchronized regions will be recalculated. The bitmap mode
+is faster than the journal mode, because we don't have to write the data
+twice, but it is also less reliable, because if data corruption happens
+when the machine crashes, it may not be detected.
+
+When loading the target for the first time, the kernel driver will format
+the device. But it will only format the device if the superblock contains
+zeroes. If the superblock is neither valid nor zeroed, the dm-integrity
+target can't be loaded.
+
+To use the target for the first time:
+
+1. overwrite the superblock with zeroes
+2. load the dm-integrity target with one-sector size, the kernel driver
+   will format the device
+3. unload the dm-integrity target
+4. read the "provided_data_sectors" value from the superblock
+5. load the dm-integrity target with the the target size
+   "provided_data_sectors"
+6. if you want to use dm-integrity with dm-crypt, load the dm-crypt target
+   with the size "provided_data_sectors"
+
+
+Target arguments:
+
+1. the underlying block device
+
+2. the number of reserved sector at the beginning of the device - the
+   dm-integrity won't read of write these sectors
+
+3. the size of the integrity tag (if "-" is used, the size is taken from
+   the internal-hash algorithm)
+
+4. mode:
+
+	D - direct writes (without journal)
+		in this mode, journaling is
+		not used and data sectors and integrity tags are written
+		separately. In case of crash, it is possible that the data
+		and integrity tag doesn't match.
+	J - journaled writes
+		data and integrity tags are written to the
+		journal and atomicity is guaranteed. In case of crash,
+		either both data and tag or none of them are written. The
+		journaled mode degrades write throughput twice because the
+		data have to be written twice.
+	B - bitmap mode - data and metadata are written without any
+		synchronization, the driver maintains a bitmap of dirty
+		regions where data and metadata don't match. This mode can
+		only be used with internal hash.
+	R - recovery mode - in this mode, journal is not replayed,
+		checksums are not checked and writes to the device are not
+		allowed. This mode is useful for data recovery if the
+		device cannot be activated in any of the other standard
+		modes.
+
+5. the number of additional arguments
+
+Additional arguments:
+
+journal_sectors:number
+	The size of journal, this argument is used only if formatting the
+	device. If the device is already formatted, the value from the
+	superblock is used.
+
+interleave_sectors:number
+	The number of interleaved sectors. This values is rounded down to
+	a power of two. If the device is already formatted, the value from
+	the superblock is used.
+
+meta_device:device
+	Don't interleave the data and metadata on on device. Use a
+	separate device for metadata.
+
+buffer_sectors:number
+	The number of sectors in one buffer. The value is rounded down to
+	a power of two.
+
+	The tag area is accessed using buffers, the buffer size is
+	configurable. The large buffer size means that the I/O size will
+	be larger, but there could be less I/Os issued.
+
+journal_watermark:number
+	The journal watermark in percents. When the size of the journal
+	exceeds this watermark, the thread that flushes the journal will
+	be started.
+
+commit_time:number
+	Commit time in milliseconds. When this time passes, the journal is
+	written. The journal is also written immediatelly if the FLUSH
+	request is received.
+
+internal_hash:algorithm(:key)	(the key is optional)
+	Use internal hash or crc.
+	When this argument is used, the dm-integrity target won't accept
+	integrity tags from the upper target, but it will automatically
+	generate and verify the integrity tags.
+
+	You can use a crc algorithm (such as crc32), then integrity target
+	will protect the data against accidental corruption.
+	You can also use a hmac algorithm (for example
+	"hmac(sha256):0123456789abcdef"), in this mode it will provide
+	cryptographic authentication of the data without encryption.
+
+	When this argument is not used, the integrity tags are accepted
+	from an upper layer target, such as dm-crypt. The upper layer
+	target should check the validity of the integrity tags.
+
+recalculate
+	Recalculate the integrity tags automatically. It is only valid
+	when using internal hash.
+
+journal_crypt:algorithm(:key)	(the key is optional)
+	Encrypt the journal using given algorithm to make sure that the
+	attacker can't read the journal. You can use a block cipher here
+	(such as "cbc(aes)") or a stream cipher (for example "chacha20",
+	"salsa20", "ctr(aes)" or "ecb(arc4)").
+
+	The journal contains history of last writes to the block device,
+	an attacker reading the journal could see the last sector nubmers
+	that were written. From the sector numbers, the attacker can infer
+	the size of files that were written. To protect against this
+	situation, you can encrypt the journal.
+
+journal_mac:algorithm(:key)	(the key is optional)
+	Protect sector numbers in the journal from accidental or malicious
+	modification. To protect against accidental modification, use a
+	crc algorithm, to protect against malicious modification, use a
+	hmac algorithm with a key.
+
+	This option is not needed when using internal-hash because in this
+	mode, the integrity of journal entries is checked when replaying
+	the journal. Thus, modified sector number would be detected at
+	this stage.
+
+block_size:number
+	The size of a data block in bytes.  The larger the block size the
+	less overhead there is for per-block integrity metadata.
+	Supported values are 512, 1024, 2048 and 4096 bytes.  If not
+	specified the default block size is 512 bytes.
+
+sectors_per_bit:number
+	In the bitmap mode, this parameter specifies the number of
+	512-byte sectors that corresponds to one bitmap bit.
+
+bitmap_flush_interval:number
+	The bitmap flush interval in milliseconds. The metadata buffers
+	are synchronized when this interval expires.
+
+
+The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
+be changed when reloading the target (load an inactive table and swap the
+tables with suspend and resume). The other arguments should not be changed
+when reloading the target because the layout of disk data depend on them
+and the reloaded target would be non-functional.
+
+
+The layout of the formatted block device:
+
+* reserved sectors
+    (they are not used by this target, they can be used for
+    storing LUKS metadata or for other purpose), the size of the reserved
+    area is specified in the target arguments
+
+* superblock (4kiB)
+	* magic string - identifies that the device was formatted
+	* version
+	* log2(interleave sectors)
+	* integrity tag size
+	* the number of journal sections
+	* provided data sectors - the number of sectors that this target
+	  provides (i.e. the size of the device minus the size of all
+	  metadata and padding). The user of this target should not send
+	  bios that access data beyond the "provided data sectors" limit.
+	* flags
+	    SB_FLAG_HAVE_JOURNAL_MAC
+		- a flag is set if journal_mac is used
+	    SB_FLAG_RECALCULATING
+		- recalculating is in progress
+	    SB_FLAG_DIRTY_BITMAP
+		- journal area contains the bitmap of dirty
+		  blocks
+	* log2(sectors per block)
+	* a position where recalculating finished
+* journal
+	The journal is divided into sections, each section contains:
+
+	* metadata area (4kiB), it contains journal entries
+
+	  - every journal entry contains:
+
+		* logical sector (specifies where the data and tag should
+		  be written)
+		* last 8 bytes of data
+		* integrity tag (the size is specified in the superblock)
+
+	  - every metadata sector ends with
+
+		* mac (8-bytes), all the macs in 8 metadata sectors form a
+		  64-byte value. It is used to store hmac of sector
+		  numbers in the journal section, to protect against a
+		  possibility that the attacker tampers with sector
+		  numbers in the journal.
+		* commit id
+
+	* data area (the size is variable; it depends on how many journal
+	  entries fit into the metadata area)
+
+	    - every sector in the data area contains:
+
+		* data (504 bytes of data, the last 8 bytes are stored in
+		  the journal entry)
+		* commit id
+
+	To test if the whole journal section was written correctly, every
+	512-byte sector of the journal ends with 8-byte commit id. If the
+	commit id matches on all sectors in a journal section, then it is
+	assumed that the section was written correctly. If the commit id
+	doesn't match, the section was written partially and it should not
+	be replayed.
+
+* one or more runs of interleaved tags and data.
+    Each run contains:
+
+	* tag area - it contains integrity tags. There is one tag for each
+	  sector in the data area
+	* data area - it contains data sectors. The number of data sectors
+	  in one run must be a power of two. log2 of this value is stored
+	  in the superblock.
diff --git a/Documentation/device-mapper/dm-integrity.txt b/Documentation/device-mapper/dm-integrity.txt
deleted file mode 100644
index d63d78ffeb73..000000000000
--- a/Documentation/device-mapper/dm-integrity.txt
+++ /dev/null
@@ -1,233 +0,0 @@
-The dm-integrity target emulates a block device that has additional
-per-sector tags that can be used for storing integrity information.
-
-A general problem with storing integrity tags with every sector is that
-writing the sector and the integrity tag must be atomic - i.e. in case of
-crash, either both sector and integrity tag or none of them is written.
-
-To guarantee write atomicity, the dm-integrity target uses journal, it
-writes sector data and integrity tags into a journal, commits the journal
-and then copies the data and integrity tags to their respective location.
-
-The dm-integrity target can be used with the dm-crypt target - in this
-situation the dm-crypt target creates the integrity data and passes them
-to the dm-integrity target via bio_integrity_payload attached to the bio.
-In this mode, the dm-crypt and dm-integrity targets provide authenticated
-disk encryption - if the attacker modifies the encrypted device, an I/O
-error is returned instead of random data.
-
-The dm-integrity target can also be used as a standalone target, in this
-mode it calculates and verifies the integrity tag internally. In this
-mode, the dm-integrity target can be used to detect silent data
-corruption on the disk or in the I/O path.
-
-There's an alternate mode of operation where dm-integrity uses bitmap
-instead of a journal. If a bit in the bitmap is 1, the corresponding
-region's data and integrity tags are not synchronized - if the machine
-crashes, the unsynchronized regions will be recalculated. The bitmap mode
-is faster than the journal mode, because we don't have to write the data
-twice, but it is also less reliable, because if data corruption happens
-when the machine crashes, it may not be detected.
-
-When loading the target for the first time, the kernel driver will format
-the device. But it will only format the device if the superblock contains
-zeroes. If the superblock is neither valid nor zeroed, the dm-integrity
-target can't be loaded.
-
-To use the target for the first time:
-1. overwrite the superblock with zeroes
-2. load the dm-integrity target with one-sector size, the kernel driver
-	will format the device
-3. unload the dm-integrity target
-4. read the "provided_data_sectors" value from the superblock
-5. load the dm-integrity target with the the target size
-	"provided_data_sectors"
-6. if you want to use dm-integrity with dm-crypt, load the dm-crypt target
-	with the size "provided_data_sectors"
-
-
-Target arguments:
-
-1. the underlying block device
-
-2. the number of reserved sector at the beginning of the device - the
-	dm-integrity won't read of write these sectors
-
-3. the size of the integrity tag (if "-" is used, the size is taken from
-	the internal-hash algorithm)
-
-4. mode:
-	D - direct writes (without journal) - in this mode, journaling is
-		not used and data sectors and integrity tags are written
-		separately. In case of crash, it is possible that the data
-		and integrity tag doesn't match.
-	J - journaled writes - data and integrity tags are written to the
-		journal and atomicity is guaranteed. In case of crash,
-		either both data and tag or none of them are written. The
-		journaled mode degrades write throughput twice because the
-		data have to be written twice.
-	B - bitmap mode - data and metadata are written without any
-		synchronization, the driver maintains a bitmap of dirty
-		regions where data and metadata don't match. This mode can
-		only be used with internal hash.
-	R - recovery mode - in this mode, journal is not replayed,
-		checksums are not checked and writes to the device are not
-		allowed. This mode is useful for data recovery if the
-		device cannot be activated in any of the other standard
-		modes.
-
-5. the number of additional arguments
-
-Additional arguments:
-
-journal_sectors:number
-	The size of journal, this argument is used only if formatting the
-	device. If the device is already formatted, the value from the
-	superblock is used.
-
-interleave_sectors:number
-	The number of interleaved sectors. This values is rounded down to
-	a power of two. If the device is already formatted, the value from
-	the superblock is used.
-
-meta_device:device
-	Don't interleave the data and metadata on on device. Use a
-	separate device for metadata.
-
-buffer_sectors:number
-	The number of sectors in one buffer. The value is rounded down to
-	a power of two.
-
-	The tag area is accessed using buffers, the buffer size is
-	configurable. The large buffer size means that the I/O size will
-	be larger, but there could be less I/Os issued.
-
-journal_watermark:number
-	The journal watermark in percents. When the size of the journal
-	exceeds this watermark, the thread that flushes the journal will
-	be started.
-
-commit_time:number
-	Commit time in milliseconds. When this time passes, the journal is
-	written. The journal is also written immediatelly if the FLUSH
-	request is received.
-
-internal_hash:algorithm(:key)	(the key is optional)
-	Use internal hash or crc.
-	When this argument is used, the dm-integrity target won't accept
-	integrity tags from the upper target, but it will automatically
-	generate and verify the integrity tags.
-
-	You can use a crc algorithm (such as crc32), then integrity target
-	will protect the data against accidental corruption.
-	You can also use a hmac algorithm (for example
-	"hmac(sha256):0123456789abcdef"), in this mode it will provide
-	cryptographic authentication of the data without encryption.
-
-	When this argument is not used, the integrity tags are accepted
-	from an upper layer target, such as dm-crypt. The upper layer
-	target should check the validity of the integrity tags.
-
-recalculate
-	Recalculate the integrity tags automatically. It is only valid
-	when using internal hash.
-
-journal_crypt:algorithm(:key)	(the key is optional)
-	Encrypt the journal using given algorithm to make sure that the
-	attacker can't read the journal. You can use a block cipher here
-	(such as "cbc(aes)") or a stream cipher (for example "chacha20",
-	"salsa20", "ctr(aes)" or "ecb(arc4)").
-
-	The journal contains history of last writes to the block device,
-	an attacker reading the journal could see the last sector nubmers
-	that were written. From the sector numbers, the attacker can infer
-	the size of files that were written. To protect against this
-	situation, you can encrypt the journal.
-
-journal_mac:algorithm(:key)	(the key is optional)
-	Protect sector numbers in the journal from accidental or malicious
-	modification. To protect against accidental modification, use a
-	crc algorithm, to protect against malicious modification, use a
-	hmac algorithm with a key.
-
-	This option is not needed when using internal-hash because in this
-	mode, the integrity of journal entries is checked when replaying
-	the journal. Thus, modified sector number would be detected at
-	this stage.
-
-block_size:number
-	The size of a data block in bytes.  The larger the block size the
-	less overhead there is for per-block integrity metadata.
-	Supported values are 512, 1024, 2048 and 4096 bytes.  If not
-	specified the default block size is 512 bytes.
-
-sectors_per_bit:number
-	In the bitmap mode, this parameter specifies the number of
-	512-byte sectors that corresponds to one bitmap bit.
-
-bitmap_flush_interval:number
-	The bitmap flush interval in milliseconds. The metadata buffers
-	are synchronized when this interval expires.
-
-
-The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
-be changed when reloading the target (load an inactive table and swap the
-tables with suspend and resume). The other arguments should not be changed
-when reloading the target because the layout of disk data depend on them
-and the reloaded target would be non-functional.
-
-
-The layout of the formatted block device:
-* reserved sectors (they are not used by this target, they can be used for
-  storing LUKS metadata or for other purpose), the size of the reserved
-  area is specified in the target arguments
-* superblock (4kiB)
-	* magic string - identifies that the device was formatted
-	* version
-	* log2(interleave sectors)
-	* integrity tag size
-	* the number of journal sections
-	* provided data sectors - the number of sectors that this target
-	  provides (i.e. the size of the device minus the size of all
-	  metadata and padding). The user of this target should not send
-	  bios that access data beyond the "provided data sectors" limit.
-	* flags
-	  SB_FLAG_HAVE_JOURNAL_MAC - a flag is set if journal_mac is used
-	  SB_FLAG_RECALCULATING - recalculating is in progress
-	  SB_FLAG_DIRTY_BITMAP - journal area contains the bitmap of dirty
-		blocks
-	* log2(sectors per block)
-	* a position where recalculating finished
-* journal
-	The journal is divided into sections, each section contains:
-	* metadata area (4kiB), it contains journal entries
-	  every journal entry contains:
-		* logical sector (specifies where the data and tag should
-		  be written)
-		* last 8 bytes of data
-		* integrity tag (the size is specified in the superblock)
-	    every metadata sector ends with
-		* mac (8-bytes), all the macs in 8 metadata sectors form a
-		  64-byte value. It is used to store hmac of sector
-		  numbers in the journal section, to protect against a
-		  possibility that the attacker tampers with sector
-		  numbers in the journal.
-		* commit id
-	* data area (the size is variable; it depends on how many journal
-	  entries fit into the metadata area)
-	    every sector in the data area contains:
-		* data (504 bytes of data, the last 8 bytes are stored in
-		  the journal entry)
-		* commit id
-	To test if the whole journal section was written correctly, every
-	512-byte sector of the journal ends with 8-byte commit id. If the
-	commit id matches on all sectors in a journal section, then it is
-	assumed that the section was written correctly. If the commit id
-	doesn't match, the section was written partially and it should not
-	be replayed.
-* one or more runs of interleaved tags and data. Each run contains:
-	* tag area - it contains integrity tags. There is one tag for each
-	  sector in the data area
-	* data area - it contains data sectors. The number of data sectors
-	  in one run must be a power of two. log2 of this value is stored
-	  in the superblock.
diff --git a/Documentation/device-mapper/dm-io.rst b/Documentation/device-mapper/dm-io.rst
new file mode 100644
index 000000000000..d2492917a1f5
--- /dev/null
+++ b/Documentation/device-mapper/dm-io.rst
@@ -0,0 +1,75 @@
+=====
+dm-io
+=====
+
+Dm-io provides synchronous and asynchronous I/O services. There are three
+types of I/O services available, and each type has a sync and an async
+version.
+
+The user must set up an io_region structure to describe the desired location
+of the I/O. Each io_region indicates a block-device along with the starting
+sector and size of the region::
+
+   struct io_region {
+      struct block_device *bdev;
+      sector_t sector;
+      sector_t count;
+   };
+
+Dm-io can read from one io_region or write to one or more io_regions. Writes
+to multiple regions are specified by an array of io_region structures.
+
+The first I/O service type takes a list of memory pages as the data buffer for
+the I/O, along with an offset into the first page::
+
+   struct page_list {
+      struct page_list *next;
+      struct page *page;
+   };
+
+   int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
+                  struct page_list *pl, unsigned int offset,
+                  unsigned long *error_bits);
+   int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
+                   struct page_list *pl, unsigned int offset,
+                   io_notify_fn fn, void *context);
+
+The second I/O service type takes an array of bio vectors as the data buffer
+for the I/O. This service can be handy if the caller has a pre-assembled bio,
+but wants to direct different portions of the bio to different devices::
+
+   int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where,
+                       int rw, struct bio_vec *bvec,
+                       unsigned long *error_bits);
+   int dm_io_async_bvec(unsigned int num_regions, struct io_region *where,
+                        int rw, struct bio_vec *bvec,
+                        io_notify_fn fn, void *context);
+
+The third I/O service type takes a pointer to a vmalloc'd memory buffer as the
+data buffer for the I/O. This service can be handy if the caller needs to do
+I/O to a large region but doesn't want to allocate a large number of individual
+memory pages::
+
+   int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
+                     void *data, unsigned long *error_bits);
+   int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
+                      void *data, io_notify_fn fn, void *context);
+
+Callers of the asynchronous I/O services must include the name of a completion
+callback routine and a pointer to some context data for the I/O::
+
+   typedef void (*io_notify_fn)(unsigned long error, void *context);
+
+The "error" parameter in this callback, as well as the `*error` parameter in
+all of the synchronous versions, is a bitset (instead of a simple error value).
+In the case of an write-I/O to multiple regions, this bitset allows dm-io to
+indicate success or failure on each individual region.
+
+Before using any of the dm-io services, the user should call dm_io_get()
+and specify the number of pages they expect to perform I/O on concurrently.
+Dm-io will attempt to resize its mempool to make sure enough pages are
+always available in order to avoid unnecessary waiting while performing I/O.
+
+When the user is finished using the dm-io services, they should call
+dm_io_put() and specify the same number of pages that were given on the
+dm_io_get() call.
diff --git a/Documentation/device-mapper/dm-io.txt b/Documentation/device-mapper/dm-io.txt
deleted file mode 100644
index 3b5d9a52cdcf..000000000000
--- a/Documentation/device-mapper/dm-io.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-dm-io
-=====
-
-Dm-io provides synchronous and asynchronous I/O services. There are three
-types of I/O services available, and each type has a sync and an async
-version.
-
-The user must set up an io_region structure to describe the desired location
-of the I/O. Each io_region indicates a block-device along with the starting
-sector and size of the region.
-
-   struct io_region {
-      struct block_device *bdev;
-      sector_t sector;
-      sector_t count;
-   };
-
-Dm-io can read from one io_region or write to one or more io_regions. Writes
-to multiple regions are specified by an array of io_region structures.
-
-The first I/O service type takes a list of memory pages as the data buffer for
-the I/O, along with an offset into the first page.
-
-   struct page_list {
-      struct page_list *next;
-      struct page *page;
-   };
-
-   int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
-                  struct page_list *pl, unsigned int offset,
-                  unsigned long *error_bits);
-   int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
-                   struct page_list *pl, unsigned int offset,
-                   io_notify_fn fn, void *context);
-
-The second I/O service type takes an array of bio vectors as the data buffer
-for the I/O. This service can be handy if the caller has a pre-assembled bio,
-but wants to direct different portions of the bio to different devices.
-
-   int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where,
-                       int rw, struct bio_vec *bvec,
-                       unsigned long *error_bits);
-   int dm_io_async_bvec(unsigned int num_regions, struct io_region *where,
-                        int rw, struct bio_vec *bvec,
-                        io_notify_fn fn, void *context);
-
-The third I/O service type takes a pointer to a vmalloc'd memory buffer as the
-data buffer for the I/O. This service can be handy if the caller needs to do
-I/O to a large region but doesn't want to allocate a large number of individual
-memory pages.
-
-   int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
-                     void *data, unsigned long *error_bits);
-   int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
-                      void *data, io_notify_fn fn, void *context);
-
-Callers of the asynchronous I/O services must include the name of a completion
-callback routine and a pointer to some context data for the I/O.
-
-   typedef void (*io_notify_fn)(unsigned long error, void *context);
-
-The "error" parameter in this callback, as well as the "*error" parameter in
-all of the synchronous versions, is a bitset (instead of a simple error value).
-In the case of an write-I/O to multiple regions, this bitset allows dm-io to
-indicate success or failure on each individual region.
-
-Before using any of the dm-io services, the user should call dm_io_get()
-and specify the number of pages they expect to perform I/O on concurrently.
-Dm-io will attempt to resize its mempool to make sure enough pages are
-always available in order to avoid unnecessary waiting while performing I/O.
-
-When the user is finished using the dm-io services, they should call
-dm_io_put() and specify the same number of pages that were given on the
-dm_io_get() call.
-
diff --git a/Documentation/device-mapper/dm-log.rst b/Documentation/device-mapper/dm-log.rst
new file mode 100644
index 000000000000..ba4fce39bc27
--- /dev/null
+++ b/Documentation/device-mapper/dm-log.rst
@@ -0,0 +1,57 @@
+=====================
+Device-Mapper Logging
+=====================
+The device-mapper logging code is used by some of the device-mapper
+RAID targets to track regions of the disk that are not consistent.
+A region (or portion of the address space) of the disk may be
+inconsistent because a RAID stripe is currently being operated on or
+a machine died while the region was being altered.  In the case of
+mirrors, a region would be considered dirty/inconsistent while you
+are writing to it because the writes need to be replicated for all
+the legs of the mirror and may not reach the legs at the same time.
+Once all writes are complete, the region is considered clean again.
+
+There is a generic logging interface that the device-mapper RAID
+implementations use to perform logging operations (see
+dm_dirty_log_type in include/linux/dm-dirty-log.h).  Various different
+logging implementations are available and provide different
+capabilities.  The list includes:
+
+==============	==============================================================
+Type		Files
+==============	==============================================================
+disk		drivers/md/dm-log.c
+core		drivers/md/dm-log.c
+userspace	drivers/md/dm-log-userspace* include/linux/dm-log-userspace.h
+==============	==============================================================
+
+The "disk" log type
+-------------------
+This log implementation commits the log state to disk.  This way, the
+logging state survives reboots/crashes.
+
+The "core" log type
+-------------------
+This log implementation keeps the log state in memory.  The log state
+will not survive a reboot or crash, but there may be a small boost in
+performance.  This method can also be used if no storage device is
+available for storing log state.
+
+The "userspace" log type
+------------------------
+This log type simply provides a way to export the log API to userspace,
+so log implementations can be done there.  This is done by forwarding most
+logging requests to userspace, where a daemon receives and processes the
+request.
+
+The structure used for communication between kernel and userspace are
+located in include/linux/dm-log-userspace.h.  Due to the frequency,
+diversity, and 2-way communication nature of the exchanges between
+kernel and userspace, 'connector' is used as the interface for
+communication.
+
+There are currently two userspace log implementations that leverage this
+framework - "clustered-disk" and "clustered-core".  These implementations
+provide a cluster-coherent log for shared-storage.  Device-mapper mirroring
+can be used in a shared-storage environment when the cluster log implementations
+are employed.
diff --git a/Documentation/device-mapper/dm-log.txt b/Documentation/device-mapper/dm-log.txt
deleted file mode 100644
index c155ac569c44..000000000000
--- a/Documentation/device-mapper/dm-log.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-Device-Mapper Logging
-=====================
-The device-mapper logging code is used by some of the device-mapper
-RAID targets to track regions of the disk that are not consistent.
-A region (or portion of the address space) of the disk may be
-inconsistent because a RAID stripe is currently being operated on or
-a machine died while the region was being altered.  In the case of
-mirrors, a region would be considered dirty/inconsistent while you
-are writing to it because the writes need to be replicated for all
-the legs of the mirror and may not reach the legs at the same time.
-Once all writes are complete, the region is considered clean again.
-
-There is a generic logging interface that the device-mapper RAID
-implementations use to perform logging operations (see
-dm_dirty_log_type in include/linux/dm-dirty-log.h).  Various different
-logging implementations are available and provide different
-capabilities.  The list includes:
-
-Type		Files
-====		=====
-disk		drivers/md/dm-log.c
-core		drivers/md/dm-log.c
-userspace	drivers/md/dm-log-userspace* include/linux/dm-log-userspace.h
-
-The "disk" log type
--------------------
-This log implementation commits the log state to disk.  This way, the
-logging state survives reboots/crashes.
-
-The "core" log type
--------------------
-This log implementation keeps the log state in memory.  The log state
-will not survive a reboot or crash, but there may be a small boost in
-performance.  This method can also be used if no storage device is
-available for storing log state.
-
-The "userspace" log type
-------------------------
-This log type simply provides a way to export the log API to userspace,
-so log implementations can be done there.  This is done by forwarding most
-logging requests to userspace, where a daemon receives and processes the
-request.
-
-The structure used for communication between kernel and userspace are
-located in include/linux/dm-log-userspace.h.  Due to the frequency,
-diversity, and 2-way communication nature of the exchanges between
-kernel and userspace, 'connector' is used as the interface for
-communication.
-
-There are currently two userspace log implementations that leverage this
-framework - "clustered-disk" and "clustered-core".  These implementations
-provide a cluster-coherent log for shared-storage.  Device-mapper mirroring
-can be used in a shared-storage environment when the cluster log implementations
-are employed.
diff --git a/Documentation/device-mapper/dm-queue-length.rst b/Documentation/device-mapper/dm-queue-length.rst
new file mode 100644
index 000000000000..d8e381c1cb02
--- /dev/null
+++ b/Documentation/device-mapper/dm-queue-length.rst
@@ -0,0 +1,48 @@
+===============
+dm-queue-length
+===============
+
+dm-queue-length is a path selector module for device-mapper targets,
+which selects a path with the least number of in-flight I/Os.
+The path selector name is 'queue-length'.
+
+Table parameters for each path: [<repeat_count>]
+
+::
+
+	<repeat_count>: The number of I/Os to dispatch using the selected
+			path before switching to the next path.
+			If not given, internal default is used. To check
+			the default value, see the activated table.
+
+Status for each path: <status> <fail-count> <in-flight>
+
+::
+
+	<status>: 'A' if the path is active, 'F' if the path is failed.
+	<fail-count>: The number of path failures.
+	<in-flight>: The number of in-flight I/Os on the path.
+
+
+Algorithm
+=========
+
+dm-queue-length increments/decrements 'in-flight' when an I/O is
+dispatched/completed respectively.
+dm-queue-length selects a path with the minimum 'in-flight'.
+
+
+Examples
+========
+In case that 2 paths (sda and sdb) are used with repeat_count == 128.
+
+::
+
+  # echo "0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128" \
+    dmsetup create test
+  #
+  # dmsetup table
+  test: 0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128
+  #
+  # dmsetup status
+  test: 0 10 multipath 2 0 0 0 1 1 E 0 2 1 8:0 A 0 0 8:16 A 0 0
diff --git a/Documentation/device-mapper/dm-queue-length.txt b/Documentation/device-mapper/dm-queue-length.txt
deleted file mode 100644
index f4db2562175c..000000000000
--- a/Documentation/device-mapper/dm-queue-length.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-dm-queue-length
-===============
-
-dm-queue-length is a path selector module for device-mapper targets,
-which selects a path with the least number of in-flight I/Os.
-The path selector name is 'queue-length'.
-
-Table parameters for each path: [<repeat_count>]
-	<repeat_count>: The number of I/Os to dispatch using the selected
-			path before switching to the next path.
-			If not given, internal default is used. To check
-			the default value, see the activated table.
-
-Status for each path: <status> <fail-count> <in-flight>
-	<status>: 'A' if the path is active, 'F' if the path is failed.
-	<fail-count>: The number of path failures.
-	<in-flight>: The number of in-flight I/Os on the path.
-
-
-Algorithm
-=========
-
-dm-queue-length increments/decrements 'in-flight' when an I/O is
-dispatched/completed respectively.
-dm-queue-length selects a path with the minimum 'in-flight'.
-
-
-Examples
-========
-In case that 2 paths (sda and sdb) are used with repeat_count == 128.
-
-# echo "0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128" \
-  dmsetup create test
-#
-# dmsetup table
-test: 0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128
-#
-# dmsetup status
-test: 0 10 multipath 2 0 0 0 1 1 E 0 2 1 8:0 A 0 0 8:16 A 0 0
diff --git a/Documentation/device-mapper/dm-raid.rst b/Documentation/device-mapper/dm-raid.rst
new file mode 100644
index 000000000000..2fe255b130fb
--- /dev/null
+++ b/Documentation/device-mapper/dm-raid.rst
@@ -0,0 +1,419 @@
+=======
+dm-raid
+=======
+
+The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
+It allows the MD RAID drivers to be accessed using a device-mapper
+interface.
+
+
+Mapping Table Interface
+-----------------------
+The target is named "raid" and it accepts the following parameters::
+
+  <raid_type> <#raid_params> <raid_params> \
+    <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
+
+<raid_type>:
+
+  ============= ===============================================================
+  raid0		RAID0 striping (no resilience)
+  raid1		RAID1 mirroring
+  raid4		RAID4 with dedicated last parity disk
+  raid5_n 	RAID5 with dedicated last parity disk supporting takeover
+		Same as raid4
+
+		- Transitory layout
+  raid5_la	RAID5 left asymmetric
+
+		- rotating parity 0 with data continuation
+  raid5_ra	RAID5 right asymmetric
+
+		- rotating parity N with data continuation
+  raid5_ls	RAID5 left symmetric
+
+		- rotating parity 0 with data restart
+  raid5_rs 	RAID5 right symmetric
+
+		- rotating parity N with data restart
+  raid6_zr	RAID6 zero restart
+
+		- rotating parity zero (left-to-right) with data restart
+  raid6_nr	RAID6 N restart
+
+		- rotating parity N (right-to-left) with data restart
+  raid6_nc	RAID6 N continue
+
+		- rotating parity N (right-to-left) with data continuation
+  raid6_n_6	RAID6 with dedicate parity disks
+
+		- parity and Q-syndrome on the last 2 disks;
+		  layout for takeover from/to raid4/raid5_n
+  raid6_la_6	Same as "raid_la" plus dedicated last Q-syndrome disk
+
+		- layout for takeover from raid5_la from/to raid6
+  raid6_ra_6	Same as "raid5_ra" dedicated last Q-syndrome disk
+
+		- layout for takeover from raid5_ra from/to raid6
+  raid6_ls_6	Same as "raid5_ls" dedicated last Q-syndrome disk
+
+		- layout for takeover from raid5_ls from/to raid6
+  raid6_rs_6	Same as "raid5_rs" dedicated last Q-syndrome disk
+
+		- layout for takeover from raid5_rs from/to raid6
+  raid10        Various RAID10 inspired algorithms chosen by additional params
+		(see raid10_format and raid10_copies below)
+
+		- RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
+		- RAID1E: Integrated Adjacent Stripe Mirroring
+		- RAID1E: Integrated Offset Stripe Mirroring
+		- and other similar RAID10 variants
+  ============= ===============================================================
+
+  Reference: Chapter 4 of
+  http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
+
+<#raid_params>: The number of parameters that follow.
+
+<raid_params> consists of
+
+    Mandatory parameters:
+        <chunk_size>:
+		      Chunk size in sectors.  This parameter is often known as
+		      "stripe size".  It is the only mandatory parameter and
+		      is placed first.
+
+    followed by optional parameters (in any order):
+	[sync|nosync]
+		Force or prevent RAID initialization.
+
+	[rebuild <idx>]
+		Rebuild drive number 'idx' (first drive is 0).
+
+	[daemon_sleep <ms>]
+		Interval between runs of the bitmap daemon that
+		clear bits.  A longer interval means less bitmap I/O but
+		resyncing after a failure is likely to take longer.
+
+	[min_recovery_rate <kB/sec/disk>]
+		Throttle RAID initialization
+	[max_recovery_rate <kB/sec/disk>]
+		Throttle RAID initialization
+	[write_mostly <idx>]
+		Mark drive index 'idx' write-mostly.
+	[max_write_behind <sectors>]
+		See '--write-behind=' (man mdadm)
+	[stripe_cache <sectors>]
+		Stripe cache size (RAID 4/5/6 only)
+	[region_size <sectors>]
+		The region_size multiplied by the number of regions is the
+		logical size of the array.  The bitmap records the device
+		synchronisation state for each region.
+
+        [raid10_copies   <# copies>], [raid10_format   <near|far|offset>]
+		These two options are used to alter the default layout of
+		a RAID10 configuration.  The number of copies is can be
+		specified, but the default is 2.  There are also three
+		variations to how the copies are laid down - the default
+		is "near".  Near copies are what most people think of with
+		respect to mirroring.  If these options are left unspecified,
+		or 'raid10_copies 2' and/or 'raid10_format near' are given,
+		then the layouts for 2, 3 and 4 devices	are:
+
+		========	 ==========	   ==============
+		2 drives         3 drives          4 drives
+		========	 ==========	   ==============
+		A1  A1           A1  A1  A2        A1  A1  A2  A2
+		A2  A2           A2  A3  A3        A3  A3  A4  A4
+		A3  A3           A4  A4  A5        A5  A5  A6  A6
+		A4  A4           A5  A6  A6        A7  A7  A8  A8
+		..  ..           ..  ..  ..        ..  ..  ..  ..
+		========	 ==========	   ==============
+
+		The 2-device layout is equivalent 2-way RAID1.  The 4-device
+		layout is what a traditional RAID10 would look like.  The
+		3-device layout is what might be called a 'RAID1E - Integrated
+		Adjacent Stripe Mirroring'.
+
+		If 'raid10_copies 2' and 'raid10_format far', then the layouts
+		for 2, 3 and 4 devices are:
+
+		========	     ============	  ===================
+		2 drives             3 drives             4 drives
+		========	     ============	  ===================
+		A1  A2               A1   A2   A3         A1   A2   A3   A4
+		A3  A4               A4   A5   A6         A5   A6   A7   A8
+		A5  A6               A7   A8   A9         A9   A10  A11  A12
+		..  ..               ..   ..   ..         ..   ..   ..   ..
+		A2  A1               A3   A1   A2         A2   A1   A4   A3
+		A4  A3               A6   A4   A5         A6   A5   A8   A7
+		A6  A5               A9   A7   A8         A10  A9   A12  A11
+		..  ..               ..   ..   ..         ..   ..   ..   ..
+		========	     ============	  ===================
+
+		If 'raid10_copies 2' and 'raid10_format offset', then the
+		layouts for 2, 3 and 4 devices are:
+
+		========       ==========         ================
+		2 drives       3 drives           4 drives
+		========       ==========         ================
+		A1  A2         A1  A2  A3         A1  A2  A3  A4
+		A2  A1         A3  A1  A2         A2  A1  A4  A3
+		A3  A4         A4  A5  A6         A5  A6  A7  A8
+		A4  A3         A6  A4  A5         A6  A5  A8  A7
+		A5  A6         A7  A8  A9         A9  A10 A11 A12
+		A6  A5         A9  A7  A8         A10 A9  A12 A11
+		..  ..         ..  ..  ..         ..  ..  ..  ..
+		========       ==========         ================
+
+		Here we see layouts closely akin to 'RAID1E - Integrated
+		Offset Stripe Mirroring'.
+
+        [delta_disks <N>]
+		The delta_disks option value (-251 < N < +251) triggers
+		device removal (negative value) or device addition (positive
+		value) to any reshape supporting raid levels 4/5/6 and 10.
+		RAID levels 4/5/6 allow for addition of devices (metadata
+		and data device tuple), raid10_near and raid10_offset only
+		allow for device addition. raid10_far does not support any
+		reshaping at all.
+		A minimum of devices have to be kept to enforce resilience,
+		which is 3 devices for raid4/5 and 4 devices for raid6.
+
+        [data_offset <sectors>]
+		This option value defines the offset into each data device
+		where the data starts. This is used to provide out-of-place
+		reshaping space to avoid writing over data while
+		changing the layout of stripes, hence an interruption/crash
+		may happen at any time without the risk of losing data.
+		E.g. when adding devices to an existing raid set during
+		forward reshaping, the out-of-place space will be allocated
+		at the beginning of each raid device. The kernel raid4/5/6/10
+		MD personalities supporting such device addition will read the data from
+		the existing first stripes (those with smaller number of stripes)
+		starting at data_offset to fill up a new stripe with the larger
+		number of stripes, calculate the redundancy blocks (CRC/Q-syndrome)
+		and write that new stripe to offset 0. Same will be applied to all
+		N-1 other new stripes. This out-of-place scheme is used to change
+		the RAID type (i.e. the allocation algorithm) as well, e.g.
+		changing from raid5_ls to raid5_n.
+
+	[journal_dev <dev>]
+		This option adds a journal device to raid4/5/6 raid sets and
+		uses it to close the 'write hole' caused by the non-atomic updates
+		to the component devices which can cause data loss during recovery.
+		The journal device is used as writethrough thus causing writes to
+		be throttled versus non-journaled raid4/5/6 sets.
+		Takeover/reshape is not possible with a raid4/5/6 journal device;
+		it has to be deconfigured before requesting these.
+
+	[journal_mode <mode>]
+		This option sets the caching mode on journaled raid4/5/6 raid sets
+		(see 'journal_dev <dev>' above) to 'writethrough' or 'writeback'.
+		If 'writeback' is selected the journal device has to be resilient
+		and must not suffer from the 'write hole' problem itself (e.g. use
+		raid1 or raid10) to avoid a single point of failure.
+
+<#raid_devs>: The number of devices composing the array.
+	Each device consists of two entries.  The first is the device
+	containing the metadata (if any); the second is the one containing the
+	data. A Maximum of 64 metadata/data device entries are supported
+	up to target version 1.8.0.
+	1.9.0 supports up to 253 which is enforced by the used MD kernel runtime.
+
+	If a drive has failed or is missing at creation time, a '-' can be
+	given for both the metadata and data drives for a given position.
+
+
+Example Tables
+--------------
+
+::
+
+  # RAID4 - 4 data drives, 1 parity (no metadata devices)
+  # No metadata devices specified to hold superblock/bitmap info
+  # Chunk size of 1MiB
+  # (Lines separated for easy reading)
+
+  0 1960893648 raid \
+          raid4 1 2048 \
+          5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
+
+  # RAID4 - 4 data drives, 1 parity (with metadata devices)
+  # Chunk size of 1MiB, force RAID initialization,
+  #       min recovery rate at 20 kiB/sec/disk
+
+  0 1960893648 raid \
+          raid4 4 2048 sync min_recovery_rate 20 \
+          5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
+
+
+Status Output
+-------------
+'dmsetup table' displays the table used to construct the mapping.
+The optional parameters are always printed in the order listed
+above with "sync" or "nosync" always output ahead of the other
+arguments, regardless of the order used when originally loading the table.
+Arguments that can be repeated are ordered by value.
+
+
+'dmsetup status' yields information on the state and health of the array.
+The output is as follows (normally a single line, but expanded here for
+clarity)::
+
+  1: <s> <l> raid \
+  2:      <raid_type> <#devices> <health_chars> \
+  3:      <sync_ratio> <sync_action> <mismatch_cnt>
+
+Line 1 is the standard output produced by device-mapper.
+
+Line 2 & 3 are produced by the raid target and are best explained by example::
+
+        0 1960893648 raid raid4 5 AAAAA 2/490221568 init 0
+
+Here we can see the RAID type is raid4, there are 5 devices - all of
+which are 'A'live, and the array is 2/490221568 complete with its initial
+recovery.  Here is a fuller description of the individual fields:
+
+	=============== =========================================================
+	<raid_type>     Same as the <raid_type> used to create the array.
+	<health_chars>  One char for each device, indicating:
+
+			- 'A' = alive and in-sync
+			- 'a' = alive but not in-sync
+			- 'D' = dead/failed.
+	<sync_ratio>    The ratio indicating how much of the array has undergone
+			the process described by 'sync_action'.  If the
+			'sync_action' is "check" or "repair", then the process
+			of "resync" or "recover" can be considered complete.
+	<sync_action>   One of the following possible states:
+
+			idle
+				- No synchronization action is being performed.
+			frozen
+				- The current action has been halted.
+			resync
+				- Array is undergoing its initial synchronization
+				  or is resynchronizing after an unclean shutdown
+				  (possibly aided by a bitmap).
+			recover
+				- A device in the array is being rebuilt or
+				  replaced.
+			check
+				- A user-initiated full check of the array is
+				  being performed.  All blocks are read and
+				  checked for consistency.  The number of
+				  discrepancies found are recorded in
+				  <mismatch_cnt>.  No changes are made to the
+				  array by this action.
+			repair
+				- The same as "check", but discrepancies are
+				  corrected.
+			reshape
+				- The array is undergoing a reshape.
+	<mismatch_cnt>  The number of discrepancies found between mirror copies
+			in RAID1/10 or wrong parity values found in RAID4/5/6.
+			This value is valid only after a "check" of the array
+			is performed.  A healthy array has a 'mismatch_cnt' of 0.
+	<data_offset>   The current data offset to the start of the user data on
+			each component device of a raid set (see the respective
+			raid parameter to support out-of-place reshaping).
+	<journal_char>	- 'A' - active write-through journal device.
+			- 'a' - active write-back journal device.
+			- 'D' - dead journal device.
+			- '-' - no journal device.
+	=============== =========================================================
+
+
+Message Interface
+-----------------
+The dm-raid target will accept certain actions through the 'message' interface.
+('man dmsetup' for more information on the message interface.)  These actions
+include:
+
+	========= ================================================
+	"idle"    Halt the current sync action.
+	"frozen"  Freeze the current sync action.
+	"resync"  Initiate/continue a resync.
+	"recover" Initiate/continue a recover process.
+	"check"   Initiate a check (i.e. a "scrub") of the array.
+	"repair"  Initiate a repair of the array.
+	========= ================================================
+
+
+Discard Support
+---------------
+The implementation of discard support among hardware vendors varies.
+When a block is discarded, some storage devices will return zeroes when
+the block is read.  These devices set the 'discard_zeroes_data'
+attribute.  Other devices will return random data.  Confusingly, some
+devices that advertise 'discard_zeroes_data' will not reliably return
+zeroes when discarded blocks are read!  Since RAID 4/5/6 uses blocks
+from a number of devices to calculate parity blocks and (for performance
+reasons) relies on 'discard_zeroes_data' being reliable, it is important
+that the devices be consistent.  Blocks may be discarded in the middle
+of a RAID 4/5/6 stripe and if subsequent read results are not
+consistent, the parity blocks may be calculated differently at any time;
+making the parity blocks useless for redundancy.  It is important to
+understand how your hardware behaves with discards if you are going to
+enable discards with RAID 4/5/6.
+
+Since the behavior of storage devices is unreliable in this respect,
+even when reporting 'discard_zeroes_data', by default RAID 4/5/6
+discard support is disabled -- this ensures data integrity at the
+expense of losing some performance.
+
+Storage devices that properly support 'discard_zeroes_data' are
+increasingly whitelisted in the kernel and can thus be trusted.
+
+For trusted devices, the following dm-raid module parameter can be set
+to safely enable discard support for RAID 4/5/6:
+
+    'devices_handle_discards_safely'
+
+
+Version History
+---------------
+
+::
+
+ 1.0.0	Initial version.  Support for RAID 4/5/6
+ 1.1.0	Added support for RAID 1
+ 1.2.0	Handle creation of arrays that contain failed devices.
+ 1.3.0	Added support for RAID 10
+ 1.3.1	Allow device replacement/rebuild for RAID 10
+ 1.3.2	Fix/improve redundancy checking for RAID10
+ 1.4.0	Non-functional change.  Removes arg from mapping function.
+ 1.4.1	RAID10 fix redundancy validation checks (commit 55ebbb5).
+ 1.4.2	Add RAID10 "far" and "offset" algorithm support.
+ 1.5.0	Add message interface to allow manipulation of the sync_action.
+	New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
+ 1.5.1	Add ability to restore transiently failed devices on resume.
+ 1.5.2	'mismatch_cnt' is zero unless [last_]sync_action is "check".
+ 1.6.0	Add discard support (and devices_handle_discard_safely module param).
+ 1.7.0	Add support for MD RAID0 mappings.
+ 1.8.0	Explicitly check for compatible flags in the superblock metadata
+	and reject to start the raid set if any are set by a newer
+	target version, thus avoiding data corruption on a raid set
+	with a reshape in progress.
+ 1.9.0	Add support for RAID level takeover/reshape/region size
+	and set size reduction.
+ 1.9.1	Fix activation of existing RAID 4/10 mapped devices
+ 1.9.2	Don't emit '- -' on the status table line in case the constructor
+	fails reading a superblock. Correctly emit 'maj:min1 maj:min2' and
+	'D' on the status line.  If '- -' is passed into the constructor, emit
+	'- -' on the table line and '-' as the status line health character.
+ 1.10.0	Add support for raid4/5/6 journal device
+ 1.10.1	Fix data corruption on reshape request
+ 1.11.0	Fix table line argument order
+	(wrong raid10_copies/raid10_format sequence)
+ 1.11.1	Add raid4/5/6 journal write-back support via journal_mode option
+ 1.12.1	Fix for MD deadlock between mddev_suspend() and md_write_start() available
+ 1.13.0	Fix dev_health status at end of "recover" (was 'a', now 'A')
+ 1.13.1	Fix deadlock caused by early md_stop_writes().  Also fix size an
+	state races.
+ 1.13.2	Fix raid redundancy validation and avoid keeping raid set frozen
+ 1.14.0	Fix reshape race on small devices.  Fix stripe adding reshape
+	deadlock/potential data corruption.  Update superblock when
+	specific devices are requested via rebuild.  Fix RAID leg
+	rebuild errors.
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
deleted file mode 100644
index 2355bef14653..000000000000
--- a/Documentation/device-mapper/dm-raid.txt
+++ /dev/null
@@ -1,354 +0,0 @@
-dm-raid
-=======
-
-The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
-It allows the MD RAID drivers to be accessed using a device-mapper
-interface.
-
-
-Mapping Table Interface
------------------------
-The target is named "raid" and it accepts the following parameters:
-
-  <raid_type> <#raid_params> <raid_params> \
-    <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
-
-<raid_type>:
-  raid0		RAID0 striping (no resilience)
-  raid1		RAID1 mirroring
-  raid4		RAID4 with dedicated last parity disk
-  raid5_n 	RAID5 with dedicated last parity disk supporting takeover
-		Same as raid4
-		-Transitory layout
-  raid5_la	RAID5 left asymmetric
-		- rotating parity 0 with data continuation
-  raid5_ra	RAID5 right asymmetric
-		- rotating parity N with data continuation
-  raid5_ls	RAID5 left symmetric
-		- rotating parity 0 with data restart
-  raid5_rs 	RAID5 right symmetric
-		- rotating parity N with data restart
-  raid6_zr	RAID6 zero restart
-		- rotating parity zero (left-to-right) with data restart
-  raid6_nr	RAID6 N restart
-		- rotating parity N (right-to-left) with data restart
-  raid6_nc	RAID6 N continue
-		- rotating parity N (right-to-left) with data continuation
-  raid6_n_6	RAID6 with dedicate parity disks
-		- parity and Q-syndrome on the last 2 disks;
-		  layout for takeover from/to raid4/raid5_n
-  raid6_la_6	Same as "raid_la" plus dedicated last Q-syndrome disk
-		- layout for takeover from raid5_la from/to raid6
-  raid6_ra_6	Same as "raid5_ra" dedicated last Q-syndrome disk
-		- layout for takeover from raid5_ra from/to raid6
-  raid6_ls_6	Same as "raid5_ls" dedicated last Q-syndrome disk
-		- layout for takeover from raid5_ls from/to raid6
-  raid6_rs_6	Same as "raid5_rs" dedicated last Q-syndrome disk
-		- layout for takeover from raid5_rs from/to raid6
-  raid10        Various RAID10 inspired algorithms chosen by additional params
-		(see raid10_format and raid10_copies below)
-		- RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
-		- RAID1E: Integrated Adjacent Stripe Mirroring
-		- RAID1E: Integrated Offset Stripe Mirroring
-		-  and other similar RAID10 variants
-
-  Reference: Chapter 4 of
-  http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
-
-<#raid_params>: The number of parameters that follow.
-
-<raid_params> consists of
-    Mandatory parameters:
-        <chunk_size>: Chunk size in sectors.  This parameter is often known as
-		      "stripe size".  It is the only mandatory parameter and
-		      is placed first.
-
-    followed by optional parameters (in any order):
-	[sync|nosync]   Force or prevent RAID initialization.
-
-	[rebuild <idx>]	Rebuild drive number 'idx' (first drive is 0).
-
-	[daemon_sleep <ms>]
-		Interval between runs of the bitmap daemon that
-		clear bits.  A longer interval means less bitmap I/O but
-		resyncing after a failure is likely to take longer.
-
-	[min_recovery_rate <kB/sec/disk>]  Throttle RAID initialization
-	[max_recovery_rate <kB/sec/disk>]  Throttle RAID initialization
-	[write_mostly <idx>]		   Mark drive index 'idx' write-mostly.
-	[max_write_behind <sectors>]       See '--write-behind=' (man mdadm)
-	[stripe_cache <sectors>]           Stripe cache size (RAID 4/5/6 only)
-	[region_size <sectors>]
-		The region_size multiplied by the number of regions is the
-		logical size of the array.  The bitmap records the device
-		synchronisation state for each region.
-
-        [raid10_copies   <# copies>]
-        [raid10_format   <near|far|offset>]
-		These two options are used to alter the default layout of
-		a RAID10 configuration.  The number of copies is can be
-		specified, but the default is 2.  There are also three
-		variations to how the copies are laid down - the default
-		is "near".  Near copies are what most people think of with
-		respect to mirroring.  If these options are left unspecified,
-		or 'raid10_copies 2' and/or 'raid10_format near' are given,
-		then the layouts for 2, 3 and 4 devices	are:
-		2 drives         3 drives          4 drives
-		--------         ----------        --------------
-		A1  A1           A1  A1  A2        A1  A1  A2  A2
-		A2  A2           A2  A3  A3        A3  A3  A4  A4
-		A3  A3           A4  A4  A5        A5  A5  A6  A6
-		A4  A4           A5  A6  A6        A7  A7  A8  A8
-		..  ..           ..  ..  ..        ..  ..  ..  ..
-		The 2-device layout is equivalent 2-way RAID1.  The 4-device
-		layout is what a traditional RAID10 would look like.  The
-		3-device layout is what might be called a 'RAID1E - Integrated
-		Adjacent Stripe Mirroring'.
-
-		If 'raid10_copies 2' and 'raid10_format far', then the layouts
-		for 2, 3 and 4 devices are:
-		2 drives             3 drives             4 drives
-		--------             --------------       --------------------
-		A1  A2               A1   A2   A3         A1   A2   A3   A4
-		A3  A4               A4   A5   A6         A5   A6   A7   A8
-		A5  A6               A7   A8   A9         A9   A10  A11  A12
-		..  ..               ..   ..   ..         ..   ..   ..   ..
-		A2  A1               A3   A1   A2         A2   A1   A4   A3
-		A4  A3               A6   A4   A5         A6   A5   A8   A7
-		A6  A5               A9   A7   A8         A10  A9   A12  A11
-		..  ..               ..   ..   ..         ..   ..   ..   ..
-
-		If 'raid10_copies 2' and 'raid10_format offset', then the
-		layouts for 2, 3 and 4 devices are:
-		2 drives       3 drives           4 drives
-		--------       ------------       -----------------
-		A1  A2         A1  A2  A3         A1  A2  A3  A4
-		A2  A1         A3  A1  A2         A2  A1  A4  A3
-		A3  A4         A4  A5  A6         A5  A6  A7  A8
-		A4  A3         A6  A4  A5         A6  A5  A8  A7
-		A5  A6         A7  A8  A9         A9  A10 A11 A12
-		A6  A5         A9  A7  A8         A10 A9  A12 A11
-		..  ..         ..  ..  ..         ..  ..  ..  ..
-		Here we see layouts closely akin to 'RAID1E - Integrated
-		Offset Stripe Mirroring'.
-
-        [delta_disks <N>]
-		The delta_disks option value (-251 < N < +251) triggers
-		device removal (negative value) or device addition (positive
-		value) to any reshape supporting raid levels 4/5/6 and 10.
-		RAID levels 4/5/6 allow for addition of devices (metadata
-		and data device tuple), raid10_near and raid10_offset only
-		allow for device addition. raid10_far does not support any
-		reshaping at all.
-		A minimum of devices have to be kept to enforce resilience,
-		which is 3 devices for raid4/5 and 4 devices for raid6.
-
-        [data_offset <sectors>]
-		This option value defines the offset into each data device
-		where the data starts. This is used to provide out-of-place
-		reshaping space to avoid writing over data while
-		changing the layout of stripes, hence an interruption/crash
-		may happen at any time without the risk of losing data.
-		E.g. when adding devices to an existing raid set during
-		forward reshaping, the out-of-place space will be allocated
-		at the beginning of each raid device. The kernel raid4/5/6/10
-		MD personalities supporting such device addition will read the data from
-		the existing first stripes (those with smaller number of stripes)
-		starting at data_offset to fill up a new stripe with the larger
-		number of stripes, calculate the redundancy blocks (CRC/Q-syndrome)
-		and write that new stripe to offset 0. Same will be applied to all
-		N-1 other new stripes. This out-of-place scheme is used to change
-		the RAID type (i.e. the allocation algorithm) as well, e.g.
-		changing from raid5_ls to raid5_n.
-
-	[journal_dev <dev>]
-		This option adds a journal device to raid4/5/6 raid sets and
-		uses it to close the 'write hole' caused by the non-atomic updates
-		to the component devices which can cause data loss during recovery.
-		The journal device is used as writethrough thus causing writes to
-		be throttled versus non-journaled raid4/5/6 sets.
-		Takeover/reshape is not possible with a raid4/5/6 journal device;
-		it has to be deconfigured before requesting these.
-
-	[journal_mode <mode>]
-		This option sets the caching mode on journaled raid4/5/6 raid sets
-		(see 'journal_dev <dev>' above) to 'writethrough' or 'writeback'.
-		If 'writeback' is selected the journal device has to be resilient
-		and must not suffer from the 'write hole' problem itself (e.g. use
-		raid1 or raid10) to avoid a single point of failure.
-
-<#raid_devs>: The number of devices composing the array.
-	Each device consists of two entries.  The first is the device
-	containing the metadata (if any); the second is the one containing the
-	data. A Maximum of 64 metadata/data device entries are supported
-	up to target version 1.8.0.
-	1.9.0 supports up to 253 which is enforced by the used MD kernel runtime.
-
-	If a drive has failed or is missing at creation time, a '-' can be
-	given for both the metadata and data drives for a given position.
-
-
-Example Tables
---------------
-# RAID4 - 4 data drives, 1 parity (no metadata devices)
-# No metadata devices specified to hold superblock/bitmap info
-# Chunk size of 1MiB
-# (Lines separated for easy reading)
-
-0 1960893648 raid \
-        raid4 1 2048 \
-        5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
-
-# RAID4 - 4 data drives, 1 parity (with metadata devices)
-# Chunk size of 1MiB, force RAID initialization,
-#       min recovery rate at 20 kiB/sec/disk
-
-0 1960893648 raid \
-        raid4 4 2048 sync min_recovery_rate 20 \
-        5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
-
-
-Status Output
--------------
-'dmsetup table' displays the table used to construct the mapping.
-The optional parameters are always printed in the order listed
-above with "sync" or "nosync" always output ahead of the other
-arguments, regardless of the order used when originally loading the table.
-Arguments that can be repeated are ordered by value.
-
-
-'dmsetup status' yields information on the state and health of the array.
-The output is as follows (normally a single line, but expanded here for
-clarity):
-1: <s> <l> raid \
-2:      <raid_type> <#devices> <health_chars> \
-3:      <sync_ratio> <sync_action> <mismatch_cnt>
-
-Line 1 is the standard output produced by device-mapper.
-Line 2 & 3 are produced by the raid target and are best explained by example:
-        0 1960893648 raid raid4 5 AAAAA 2/490221568 init 0
-Here we can see the RAID type is raid4, there are 5 devices - all of
-which are 'A'live, and the array is 2/490221568 complete with its initial
-recovery.  Here is a fuller description of the individual fields:
-	<raid_type>     Same as the <raid_type> used to create the array.
-	<health_chars>  One char for each device, indicating: 'A' = alive and
-			in-sync, 'a' = alive but not in-sync, 'D' = dead/failed.
-	<sync_ratio>    The ratio indicating how much of the array has undergone
-			the process described by 'sync_action'.  If the
-			'sync_action' is "check" or "repair", then the process
-			of "resync" or "recover" can be considered complete.
-	<sync_action>   One of the following possible states:
-			idle    - No synchronization action is being performed.
-			frozen  - The current action has been halted.
-			resync  - Array is undergoing its initial synchronization
-				  or is resynchronizing after an unclean shutdown
-				  (possibly aided by a bitmap).
-			recover - A device in the array is being rebuilt or
-				  replaced.
-			check   - A user-initiated full check of the array is
-				  being performed.  All blocks are read and
-				  checked for consistency.  The number of
-				  discrepancies found are recorded in
-				  <mismatch_cnt>.  No changes are made to the
-				  array by this action.
-			repair  - The same as "check", but discrepancies are
-				  corrected.
-			reshape - The array is undergoing a reshape.
-	<mismatch_cnt>  The number of discrepancies found between mirror copies
-			in RAID1/10 or wrong parity values found in RAID4/5/6.
-			This value is valid only after a "check" of the array
-			is performed.  A healthy array has a 'mismatch_cnt' of 0.
-	<data_offset>   The current data offset to the start of the user data on
-			each component device of a raid set (see the respective
-			raid parameter to support out-of-place reshaping).
-	<journal_char>	'A' - active write-through journal device.
-			'a' - active write-back journal device.
-			'D' - dead journal device.
-			'-' - no journal device.
-
-
-Message Interface
------------------
-The dm-raid target will accept certain actions through the 'message' interface.
-('man dmsetup' for more information on the message interface.)  These actions
-include:
-	"idle"   - Halt the current sync action.
-	"frozen" - Freeze the current sync action.
-	"resync" - Initiate/continue a resync.
-	"recover"- Initiate/continue a recover process.
-	"check"  - Initiate a check (i.e. a "scrub") of the array.
-	"repair" - Initiate a repair of the array.
-
-
-Discard Support
----------------
-The implementation of discard support among hardware vendors varies.
-When a block is discarded, some storage devices will return zeroes when
-the block is read.  These devices set the 'discard_zeroes_data'
-attribute.  Other devices will return random data.  Confusingly, some
-devices that advertise 'discard_zeroes_data' will not reliably return
-zeroes when discarded blocks are read!  Since RAID 4/5/6 uses blocks
-from a number of devices to calculate parity blocks and (for performance
-reasons) relies on 'discard_zeroes_data' being reliable, it is important
-that the devices be consistent.  Blocks may be discarded in the middle
-of a RAID 4/5/6 stripe and if subsequent read results are not
-consistent, the parity blocks may be calculated differently at any time;
-making the parity blocks useless for redundancy.  It is important to
-understand how your hardware behaves with discards if you are going to
-enable discards with RAID 4/5/6.
-
-Since the behavior of storage devices is unreliable in this respect,
-even when reporting 'discard_zeroes_data', by default RAID 4/5/6
-discard support is disabled -- this ensures data integrity at the
-expense of losing some performance.
-
-Storage devices that properly support 'discard_zeroes_data' are
-increasingly whitelisted in the kernel and can thus be trusted.
-
-For trusted devices, the following dm-raid module parameter can be set
-to safely enable discard support for RAID 4/5/6:
-    'devices_handle_discards_safely'
-
-
-Version History
----------------
-1.0.0	Initial version.  Support for RAID 4/5/6
-1.1.0	Added support for RAID 1
-1.2.0	Handle creation of arrays that contain failed devices.
-1.3.0	Added support for RAID 10
-1.3.1	Allow device replacement/rebuild for RAID 10
-1.3.2   Fix/improve redundancy checking for RAID10
-1.4.0	Non-functional change.  Removes arg from mapping function.
-1.4.1   RAID10 fix redundancy validation checks (commit 55ebbb5).
-1.4.2   Add RAID10 "far" and "offset" algorithm support.
-1.5.0   Add message interface to allow manipulation of the sync_action.
-	New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
-1.5.1   Add ability to restore transiently failed devices on resume.
-1.5.2   'mismatch_cnt' is zero unless [last_]sync_action is "check".
-1.6.0   Add discard support (and devices_handle_discard_safely module param).
-1.7.0   Add support for MD RAID0 mappings.
-1.8.0   Explicitly check for compatible flags in the superblock metadata
-	and reject to start the raid set if any are set by a newer
-	target version, thus avoiding data corruption on a raid set
-	with a reshape in progress.
-1.9.0   Add support for RAID level takeover/reshape/region size
-	and set size reduction.
-1.9.1   Fix activation of existing RAID 4/10 mapped devices
-1.9.2   Don't emit '- -' on the status table line in case the constructor
-	fails reading a superblock. Correctly emit 'maj:min1 maj:min2' and
-	'D' on the status line.  If '- -' is passed into the constructor, emit
-	'- -' on the table line and '-' as the status line health character.
-1.10.0  Add support for raid4/5/6 journal device
-1.10.1  Fix data corruption on reshape request
-1.11.0  Fix table line argument order
-	(wrong raid10_copies/raid10_format sequence)
-1.11.1  Add raid4/5/6 journal write-back support via journal_mode option
-1.12.1  Fix for MD deadlock between mddev_suspend() and md_write_start() available
-1.13.0  Fix dev_health status at end of "recover" (was 'a', now 'A')
-1.13.1  Fix deadlock caused by early md_stop_writes().  Also fix size an
-	state races.
-1.13.2  Fix raid redundancy validation and avoid keeping raid set frozen
-1.14.0  Fix reshape race on small devices.  Fix stripe adding reshape
-	deadlock/potential data corruption.  Update superblock when
-	specific devices are requested via rebuild.  Fix RAID leg
-	rebuild errors.
diff --git a/Documentation/device-mapper/dm-service-time.rst b/Documentation/device-mapper/dm-service-time.rst
new file mode 100644
index 000000000000..facf277fc13c
--- /dev/null
+++ b/Documentation/device-mapper/dm-service-time.rst
@@ -0,0 +1,101 @@
+===============
+dm-service-time
+===============
+
+dm-service-time is a path selector module for device-mapper targets,
+which selects a path with the shortest estimated service time for
+the incoming I/O.
+
+The service time for each path is estimated by dividing the total size
+of in-flight I/Os on a path with the performance value of the path.
+The performance value is a relative throughput value among all paths
+in a path-group, and it can be specified as a table argument.
+
+The path selector name is 'service-time'.
+
+Table parameters for each path:
+
+    [<repeat_count> [<relative_throughput>]]
+	<repeat_count>:
+			The number of I/Os to dispatch using the selected
+			path before switching to the next path.
+			If not given, internal default is used.  To check
+			the default value, see the activated table.
+	<relative_throughput>:
+			The relative throughput value of the path
+			among all paths in the path-group.
+			The valid range is 0-100.
+			If not given, minimum value '1' is used.
+			If '0' is given, the path isn't selected while
+			other paths having a positive value are available.
+
+Status for each path:
+
+    <status> <fail-count> <in-flight-size> <relative_throughput>
+	<status>:
+		'A' if the path is active, 'F' if the path is failed.
+	<fail-count>:
+		The number of path failures.
+	<in-flight-size>:
+		The size of in-flight I/Os on the path.
+	<relative_throughput>:
+		The relative throughput value of the path
+		among all paths in the path-group.
+
+
+Algorithm
+=========
+
+dm-service-time adds the I/O size to 'in-flight-size' when the I/O is
+dispatched and subtracts when completed.
+Basically, dm-service-time selects a path having minimum service time
+which is calculated by::
+
+	('in-flight-size' + 'size-of-incoming-io') / 'relative_throughput'
+
+However, some optimizations below are used to reduce the calculation
+as much as possible.
+
+	1. If the paths have the same 'relative_throughput', skip
+	   the division and just compare the 'in-flight-size'.
+
+	2. If the paths have the same 'in-flight-size', skip the division
+	   and just compare the 'relative_throughput'.
+
+	3. If some paths have non-zero 'relative_throughput' and others
+	   have zero 'relative_throughput', ignore those paths with zero
+	   'relative_throughput'.
+
+If such optimizations can't be applied, calculate service time, and
+compare service time.
+If calculated service time is equal, the path having maximum
+'relative_throughput' may be better.  So compare 'relative_throughput'
+then.
+
+
+Examples
+========
+In case that 2 paths (sda and sdb) are used with repeat_count == 128
+and sda has an average throughput 1GB/s and sdb has 4GB/s,
+'relative_throughput' value may be '1' for sda and '4' for sdb::
+
+  # echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4" \
+    dmsetup create test
+  #
+  # dmsetup table
+  test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4
+  #
+  # dmsetup status
+  test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 1 8:16 A 0 0 4
+
+
+Or '2' for sda and '8' for sdb would be also true::
+
+  # echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8" \
+    dmsetup create test
+  #
+  # dmsetup table
+  test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8
+  #
+  # dmsetup status
+  test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 2 8:16 A 0 0 8
diff --git a/Documentation/device-mapper/dm-service-time.txt b/Documentation/device-mapper/dm-service-time.txt
deleted file mode 100644
index fb1d4a0cf122..000000000000
--- a/Documentation/device-mapper/dm-service-time.txt
+++ /dev/null
@@ -1,91 +0,0 @@
-dm-service-time
-===============
-
-dm-service-time is a path selector module for device-mapper targets,
-which selects a path with the shortest estimated service time for
-the incoming I/O.
-
-The service time for each path is estimated by dividing the total size
-of in-flight I/Os on a path with the performance value of the path.
-The performance value is a relative throughput value among all paths
-in a path-group, and it can be specified as a table argument.
-
-The path selector name is 'service-time'.
-
-Table parameters for each path: [<repeat_count> [<relative_throughput>]]
-	<repeat_count>: The number of I/Os to dispatch using the selected
-			path before switching to the next path.
-			If not given, internal default is used.  To check
-			the default value, see the activated table.
-	<relative_throughput>: The relative throughput value of the path
-			among all paths in the path-group.
-			The valid range is 0-100.
-			If not given, minimum value '1' is used.
-			If '0' is given, the path isn't selected while
-			other paths having a positive value are available.
-
-Status for each path: <status> <fail-count> <in-flight-size> \
-		      <relative_throughput>
-	<status>: 'A' if the path is active, 'F' if the path is failed.
-	<fail-count>: The number of path failures.
-	<in-flight-size>: The size of in-flight I/Os on the path.
-	<relative_throughput>: The relative throughput value of the path
-			among all paths in the path-group.
-
-
-Algorithm
-=========
-
-dm-service-time adds the I/O size to 'in-flight-size' when the I/O is
-dispatched and subtracts when completed.
-Basically, dm-service-time selects a path having minimum service time
-which is calculated by:
-
-	('in-flight-size' + 'size-of-incoming-io') / 'relative_throughput'
-
-However, some optimizations below are used to reduce the calculation
-as much as possible.
-
-	1. If the paths have the same 'relative_throughput', skip
-	   the division and just compare the 'in-flight-size'.
-
-	2. If the paths have the same 'in-flight-size', skip the division
-	   and just compare the 'relative_throughput'.
-
-	3. If some paths have non-zero 'relative_throughput' and others
-	   have zero 'relative_throughput', ignore those paths with zero
-	   'relative_throughput'.
-
-If such optimizations can't be applied, calculate service time, and
-compare service time.
-If calculated service time is equal, the path having maximum
-'relative_throughput' may be better.  So compare 'relative_throughput'
-then.
-
-
-Examples
-========
-In case that 2 paths (sda and sdb) are used with repeat_count == 128
-and sda has an average throughput 1GB/s and sdb has 4GB/s,
-'relative_throughput' value may be '1' for sda and '4' for sdb.
-
-# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4" \
-  dmsetup create test
-#
-# dmsetup table
-test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4
-#
-# dmsetup status
-test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 1 8:16 A 0 0 4
-
-
-Or '2' for sda and '8' for sdb would be also true.
-
-# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8" \
-  dmsetup create test
-#
-# dmsetup table
-test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8
-#
-# dmsetup status
-test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 2 8:16 A 0 0 8
diff --git a/Documentation/device-mapper/dm-uevent.rst b/Documentation/device-mapper/dm-uevent.rst
new file mode 100644
index 000000000000..4a8ee8d069c9
--- /dev/null
+++ b/Documentation/device-mapper/dm-uevent.rst
@@ -0,0 +1,110 @@
+====================
+device-mapper uevent
+====================
+
+The device-mapper uevent code adds the capability to device-mapper to create
+and send kobject uevents (uevents).  Previously device-mapper events were only
+available through the ioctl interface.  The advantage of the uevents interface
+is the event contains environment attributes providing increased context for
+the event avoiding the need to query the state of the device-mapper device after
+the event is received.
+
+There are two functions currently for device-mapper events.  The first function
+listed creates the event and the second function sends the event(s)::
+
+  void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
+                      const char *path, unsigned nr_valid_paths)
+
+  void dm_send_uevents(struct list_head *events, struct kobject *kobj)
+
+
+The variables added to the uevent environment are:
+
+Variable Name: DM_TARGET
+------------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description:
+:Value: Name of device-mapper target that generated the event.
+
+Variable Name: DM_ACTION
+------------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description:
+:Value: Device-mapper specific action that caused the uevent action.
+	PATH_FAILED - A path has failed;
+	PATH_REINSTATED - A path has been reinstated.
+
+Variable Name: DM_SEQNUM
+------------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: unsigned integer
+:Description: A sequence number for this specific device-mapper device.
+:Value: Valid unsigned integer range.
+
+Variable Name: DM_PATH
+----------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description: Major and minor number of the path device pertaining to this
+	      event.
+:Value: Path name in the form of "Major:Minor"
+
+Variable Name: DM_NR_VALID_PATHS
+--------------------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: unsigned integer
+:Description:
+:Value: Valid unsigned integer range.
+
+Variable Name: DM_NAME
+----------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description: Name of the device-mapper device.
+:Value: Name
+
+Variable Name: DM_UUID
+----------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description: UUID of the device-mapper device.
+:Value: UUID. (Empty string if there isn't one.)
+
+An example of the uevents generated as captured by udevmonitor is shown
+below
+
+1.) Path failure::
+
+	UEVENT[1192521009.711215] change@/block/dm-3
+	ACTION=change
+	DEVPATH=/block/dm-3
+	SUBSYSTEM=block
+	DM_TARGET=multipath
+	DM_ACTION=PATH_FAILED
+	DM_SEQNUM=1
+	DM_PATH=8:32
+	DM_NR_VALID_PATHS=0
+	DM_NAME=mpath2
+	DM_UUID=mpath-35333333000002328
+	MINOR=3
+	MAJOR=253
+	SEQNUM=1130
+
+2.) Path reinstate::
+
+	UEVENT[1192521132.989927] change@/block/dm-3
+	ACTION=change
+	DEVPATH=/block/dm-3
+	SUBSYSTEM=block
+	DM_TARGET=multipath
+	DM_ACTION=PATH_REINSTATED
+	DM_SEQNUM=2
+	DM_PATH=8:32
+	DM_NR_VALID_PATHS=1
+	DM_NAME=mpath2
+	DM_UUID=mpath-35333333000002328
+	MINOR=3
+	MAJOR=253
+	SEQNUM=1131
diff --git a/Documentation/device-mapper/dm-uevent.txt b/Documentation/device-mapper/dm-uevent.txt
deleted file mode 100644
index 07edbd85c714..000000000000
--- a/Documentation/device-mapper/dm-uevent.txt
+++ /dev/null
@@ -1,97 +0,0 @@
-The device-mapper uevent code adds the capability to device-mapper to create
-and send kobject uevents (uevents).  Previously device-mapper events were only
-available through the ioctl interface.  The advantage of the uevents interface
-is the event contains environment attributes providing increased context for
-the event avoiding the need to query the state of the device-mapper device after
-the event is received.
-
-There are two functions currently for device-mapper events.  The first function
-listed creates the event and the second function sends the event(s).
-
-void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
-                    const char *path, unsigned nr_valid_paths)
-
-void dm_send_uevents(struct list_head *events, struct kobject *kobj)
-
-
-The variables added to the uevent environment are:
-
-Variable Name: DM_TARGET
-Uevent Action(s): KOBJ_CHANGE
-Type: string
-Description:
-Value: Name of device-mapper target that generated the event.
-
-Variable Name: DM_ACTION
-Uevent Action(s): KOBJ_CHANGE
-Type: string
-Description:
-Value: Device-mapper specific action that caused the uevent action.
-	PATH_FAILED - A path has failed.
-	PATH_REINSTATED - A path has been reinstated.
-
-Variable Name: DM_SEQNUM
-Uevent Action(s): KOBJ_CHANGE
-Type: unsigned integer
-Description: A sequence number for this specific device-mapper device.
-Value: Valid unsigned integer range.
-
-Variable Name: DM_PATH
-Uevent Action(s): KOBJ_CHANGE
-Type: string
-Description: Major and minor number of the path device pertaining to this
-event.
-Value: Path name in the form of "Major:Minor"
-
-Variable Name: DM_NR_VALID_PATHS
-Uevent Action(s): KOBJ_CHANGE
-Type: unsigned integer
-Description:
-Value: Valid unsigned integer range.
-
-Variable Name: DM_NAME
-Uevent Action(s): KOBJ_CHANGE
-Type: string
-Description: Name of the device-mapper device.
-Value: Name
-
-Variable Name: DM_UUID
-Uevent Action(s): KOBJ_CHANGE
-Type: string
-Description: UUID of the device-mapper device.
-Value: UUID. (Empty string if there isn't one.)
-
-An example of the uevents generated as captured by udevmonitor is shown
-below.
-
-1.) Path failure.
-UEVENT[1192521009.711215] change@/block/dm-3
-ACTION=change
-DEVPATH=/block/dm-3
-SUBSYSTEM=block
-DM_TARGET=multipath
-DM_ACTION=PATH_FAILED
-DM_SEQNUM=1
-DM_PATH=8:32
-DM_NR_VALID_PATHS=0
-DM_NAME=mpath2
-DM_UUID=mpath-35333333000002328
-MINOR=3
-MAJOR=253
-SEQNUM=1130
-
-2.) Path reinstate.
-UEVENT[1192521132.989927] change@/block/dm-3
-ACTION=change
-DEVPATH=/block/dm-3
-SUBSYSTEM=block
-DM_TARGET=multipath
-DM_ACTION=PATH_REINSTATED
-DM_SEQNUM=2
-DM_PATH=8:32
-DM_NR_VALID_PATHS=1
-DM_NAME=mpath2
-DM_UUID=mpath-35333333000002328
-MINOR=3
-MAJOR=253
-SEQNUM=1131
diff --git a/Documentation/device-mapper/dm-zoned.rst b/Documentation/device-mapper/dm-zoned.rst
new file mode 100644
index 000000000000..07f56ebc1730
--- /dev/null
+++ b/Documentation/device-mapper/dm-zoned.rst
@@ -0,0 +1,146 @@
+========
+dm-zoned
+========
+
+The dm-zoned device mapper target exposes a zoned block device (ZBC and
+ZAC compliant devices) as a regular block device without any write
+pattern constraints. In effect, it implements a drive-managed zoned
+block device which hides from the user (a file system or an application
+doing raw block device accesses) the sequential write constraints of
+host-managed zoned block devices and can mitigate the potential
+device-side performance degradation due to excessive random writes on
+host-aware zoned block devices.
+
+For a more detailed description of the zoned block device models and
+their constraints see (for SCSI devices):
+
+http://www.t10.org/drafts.htm#ZBC_Family
+
+and (for ATA devices):
+
+http://www.t13.org/Documents/UploadedDocuments/docs2015/di537r05-Zoned_Device_ATA_Command_Set_ZAC.pdf
+
+The dm-zoned implementation is simple and minimizes system overhead (CPU
+and memory usage as well as storage capacity loss). For a 10TB
+host-managed disk with 256 MB zones, dm-zoned memory usage per disk
+instance is at most 4.5 MB and as little as 5 zones will be used
+internally for storing metadata and performaing reclaim operations.
+
+dm-zoned target devices are formatted and checked using the dmzadm
+utility available at:
+
+https://github.com/hgst/dm-zoned-tools
+
+Algorithm
+=========
+
+dm-zoned implements an on-disk buffering scheme to handle non-sequential
+write accesses to the sequential zones of a zoned block device.
+Conventional zones are used for caching as well as for storing internal
+metadata.
+
+The zones of the device are separated into 2 types:
+
+1) Metadata zones: these are conventional zones used to store metadata.
+Metadata zones are not reported as useable capacity to the user.
+
+2) Data zones: all remaining zones, the vast majority of which will be
+sequential zones used exclusively to store user data. The conventional
+zones of the device may be used also for buffering user random writes.
+Data in these zones may be directly mapped to the conventional zone, but
+later moved to a sequential zone so that the conventional zone can be
+reused for buffering incoming random writes.
+
+dm-zoned exposes a logical device with a sector size of 4096 bytes,
+irrespective of the physical sector size of the backend zoned block
+device being used. This allows reducing the amount of metadata needed to
+manage valid blocks (blocks written).
+
+The on-disk metadata format is as follows:
+
+1) The first block of the first conventional zone found contains the
+super block which describes the on disk amount and position of metadata
+blocks.
+
+2) Following the super block, a set of blocks is used to describe the
+mapping of the logical device blocks. The mapping is done per chunk of
+blocks, with the chunk size equal to the zoned block device size. The
+mapping table is indexed by chunk number and each mapping entry
+indicates the zone number of the device storing the chunk of data. Each
+mapping entry may also indicate if the zone number of a conventional
+zone used to buffer random modification to the data zone.
+
+3) A set of blocks used to store bitmaps indicating the validity of
+blocks in the data zones follows the mapping table. A valid block is
+defined as a block that was written and not discarded. For a buffered
+data chunk, a block is always valid only in the data zone mapping the
+chunk or in the buffer zone of the chunk.
+
+For a logical chunk mapped to a conventional zone, all write operations
+are processed by directly writing to the zone. If the mapping zone is a
+sequential zone, the write operation is processed directly only if the
+write offset within the logical chunk is equal to the write pointer
+offset within of the sequential data zone (i.e. the write operation is
+aligned on the zone write pointer). Otherwise, write operations are
+processed indirectly using a buffer zone. In that case, an unused
+conventional zone is allocated and assigned to the chunk being
+accessed. Writing a block to the buffer zone of a chunk will
+automatically invalidate the same block in the sequential zone mapping
+the chunk. If all blocks of the sequential zone become invalid, the zone
+is freed and the chunk buffer zone becomes the primary zone mapping the
+chunk, resulting in native random write performance similar to a regular
+block device.
+
+Read operations are processed according to the block validity
+information provided by the bitmaps. Valid blocks are read either from
+the sequential zone mapping a chunk, or if the chunk is buffered, from
+the buffer zone assigned. If the accessed chunk has no mapping, or the
+accessed blocks are invalid, the read buffer is zeroed and the read
+operation terminated.
+
+After some time, the limited number of convnetional zones available may
+be exhausted (all used to map chunks or buffer sequential zones) and
+unaligned writes to unbuffered chunks become impossible. To avoid this
+situation, a reclaim process regularly scans used conventional zones and
+tries to reclaim the least recently used zones by copying the valid
+blocks of the buffer zone to a free sequential zone. Once the copy
+completes, the chunk mapping is updated to point to the sequential zone
+and the buffer zone freed for reuse.
+
+Metadata Protection
+===================
+
+To protect metadata against corruption in case of sudden power loss or
+system crash, 2 sets of metadata zones are used. One set, the primary
+set, is used as the main metadata region, while the secondary set is
+used as a staging area. Modified metadata is first written to the
+secondary set and validated by updating the super block in the secondary
+set, a generation counter is used to indicate that this set contains the
+newest metadata. Once this operation completes, in place of metadata
+block updates can be done in the primary metadata set. This ensures that
+one of the set is always consistent (all modifications committed or none
+at all). Flush operations are used as a commit point. Upon reception of
+a flush request, metadata modification activity is temporarily blocked
+(for both incoming BIO processing and reclaim process) and all dirty
+metadata blocks are staged and updated. Normal operation is then
+resumed. Flushing metadata thus only temporarily delays write and
+discard requests. Read requests can be processed concurrently while
+metadata flush is being executed.
+
+Usage
+=====
+
+A zoned block device must first be formatted using the dmzadm tool. This
+will analyze the device zone configuration, determine where to place the
+metadata sets on the device and initialize the metadata sets.
+
+Ex::
+
+	dmzadm --format /dev/sdxx
+
+For a formatted device, the target can be created normally with the
+dmsetup utility. The only parameter that dm-zoned requires is the
+underlying zoned block device name. Ex::
+
+	echo "0 `blockdev --getsize ${dev}` zoned ${dev}" | \
+	dmsetup create dmz-`basename ${dev}`
diff --git a/Documentation/device-mapper/dm-zoned.txt b/Documentation/device-mapper/dm-zoned.txt
deleted file mode 100644
index 736fcc78d193..000000000000
--- a/Documentation/device-mapper/dm-zoned.txt
+++ /dev/null
@@ -1,144 +0,0 @@
-dm-zoned
-========
-
-The dm-zoned device mapper target exposes a zoned block device (ZBC and
-ZAC compliant devices) as a regular block device without any write
-pattern constraints. In effect, it implements a drive-managed zoned
-block device which hides from the user (a file system or an application
-doing raw block device accesses) the sequential write constraints of
-host-managed zoned block devices and can mitigate the potential
-device-side performance degradation due to excessive random writes on
-host-aware zoned block devices.
-
-For a more detailed description of the zoned block device models and
-their constraints see (for SCSI devices):
-
-http://www.t10.org/drafts.htm#ZBC_Family
-
-and (for ATA devices):
-
-http://www.t13.org/Documents/UploadedDocuments/docs2015/di537r05-Zoned_Device_ATA_Command_Set_ZAC.pdf
-
-The dm-zoned implementation is simple and minimizes system overhead (CPU
-and memory usage as well as storage capacity loss). For a 10TB
-host-managed disk with 256 MB zones, dm-zoned memory usage per disk
-instance is at most 4.5 MB and as little as 5 zones will be used
-internally for storing metadata and performaing reclaim operations.
-
-dm-zoned target devices are formatted and checked using the dmzadm
-utility available at:
-
-https://github.com/hgst/dm-zoned-tools
-
-Algorithm
-=========
-
-dm-zoned implements an on-disk buffering scheme to handle non-sequential
-write accesses to the sequential zones of a zoned block device.
-Conventional zones are used for caching as well as for storing internal
-metadata.
-
-The zones of the device are separated into 2 types:
-
-1) Metadata zones: these are conventional zones used to store metadata.
-Metadata zones are not reported as useable capacity to the user.
-
-2) Data zones: all remaining zones, the vast majority of which will be
-sequential zones used exclusively to store user data. The conventional
-zones of the device may be used also for buffering user random writes.
-Data in these zones may be directly mapped to the conventional zone, but
-later moved to a sequential zone so that the conventional zone can be
-reused for buffering incoming random writes.
-
-dm-zoned exposes a logical device with a sector size of 4096 bytes,
-irrespective of the physical sector size of the backend zoned block
-device being used. This allows reducing the amount of metadata needed to
-manage valid blocks (blocks written).
-
-The on-disk metadata format is as follows:
-
-1) The first block of the first conventional zone found contains the
-super block which describes the on disk amount and position of metadata
-blocks.
-
-2) Following the super block, a set of blocks is used to describe the
-mapping of the logical device blocks. The mapping is done per chunk of
-blocks, with the chunk size equal to the zoned block device size. The
-mapping table is indexed by chunk number and each mapping entry
-indicates the zone number of the device storing the chunk of data. Each
-mapping entry may also indicate if the zone number of a conventional
-zone used to buffer random modification to the data zone.
-
-3) A set of blocks used to store bitmaps indicating the validity of
-blocks in the data zones follows the mapping table. A valid block is
-defined as a block that was written and not discarded. For a buffered
-data chunk, a block is always valid only in the data zone mapping the
-chunk or in the buffer zone of the chunk.
-
-For a logical chunk mapped to a conventional zone, all write operations
-are processed by directly writing to the zone. If the mapping zone is a
-sequential zone, the write operation is processed directly only if the
-write offset within the logical chunk is equal to the write pointer
-offset within of the sequential data zone (i.e. the write operation is
-aligned on the zone write pointer). Otherwise, write operations are
-processed indirectly using a buffer zone. In that case, an unused
-conventional zone is allocated and assigned to the chunk being
-accessed. Writing a block to the buffer zone of a chunk will
-automatically invalidate the same block in the sequential zone mapping
-the chunk. If all blocks of the sequential zone become invalid, the zone
-is freed and the chunk buffer zone becomes the primary zone mapping the
-chunk, resulting in native random write performance similar to a regular
-block device.
-
-Read operations are processed according to the block validity
-information provided by the bitmaps. Valid blocks are read either from
-the sequential zone mapping a chunk, or if the chunk is buffered, from
-the buffer zone assigned. If the accessed chunk has no mapping, or the
-accessed blocks are invalid, the read buffer is zeroed and the read
-operation terminated.
-
-After some time, the limited number of convnetional zones available may
-be exhausted (all used to map chunks or buffer sequential zones) and
-unaligned writes to unbuffered chunks become impossible. To avoid this
-situation, a reclaim process regularly scans used conventional zones and
-tries to reclaim the least recently used zones by copying the valid
-blocks of the buffer zone to a free sequential zone. Once the copy
-completes, the chunk mapping is updated to point to the sequential zone
-and the buffer zone freed for reuse.
-
-Metadata Protection
-===================
-
-To protect metadata against corruption in case of sudden power loss or
-system crash, 2 sets of metadata zones are used. One set, the primary
-set, is used as the main metadata region, while the secondary set is
-used as a staging area. Modified metadata is first written to the
-secondary set and validated by updating the super block in the secondary
-set, a generation counter is used to indicate that this set contains the
-newest metadata. Once this operation completes, in place of metadata
-block updates can be done in the primary metadata set. This ensures that
-one of the set is always consistent (all modifications committed or none
-at all). Flush operations are used as a commit point. Upon reception of
-a flush request, metadata modification activity is temporarily blocked
-(for both incoming BIO processing and reclaim process) and all dirty
-metadata blocks are staged and updated. Normal operation is then
-resumed. Flushing metadata thus only temporarily delays write and
-discard requests. Read requests can be processed concurrently while
-metadata flush is being executed.
-
-Usage
-=====
-
-A zoned block device must first be formatted using the dmzadm tool. This
-will analyze the device zone configuration, determine where to place the
-metadata sets on the device and initialize the metadata sets.
-
-Ex:
-
-dmzadm --format /dev/sdxx
-
-For a formatted device, the target can be created normally with the
-dmsetup utility. The only parameter that dm-zoned requires is the
-underlying zoned block device name. Ex:
-
-echo "0 `blockdev --getsize ${dev}` zoned ${dev}" | dmsetup create dmz-`basename ${dev}`
diff --git a/Documentation/device-mapper/era.rst b/Documentation/device-mapper/era.rst
new file mode 100644
index 000000000000..90dd5c670b9f
--- /dev/null
+++ b/Documentation/device-mapper/era.rst
@@ -0,0 +1,116 @@
+======
+dm-era
+======
+
+Introduction
+============
+
+dm-era is a target that behaves similar to the linear target.  In
+addition it keeps track of which blocks were written within a user
+defined period of time called an 'era'.  Each era target instance
+maintains the current era as a monotonically increasing 32-bit
+counter.
+
+Use cases include tracking changed blocks for backup software, and
+partially invalidating the contents of a cache to restore cache
+coherency after rolling back a vendor snapshot.
+
+Constructor
+===========
+
+era <metadata dev> <origin dev> <block size>
+
+ ================ ======================================================
+ metadata dev     fast device holding the persistent metadata
+ origin dev	  device holding data blocks that may change
+ block size       block size of origin data device, granularity that is
+		  tracked by the target
+ ================ ======================================================
+
+Messages
+========
+
+None of the dm messages take any arguments.
+
+checkpoint
+----------
+
+Possibly move to a new era.  You shouldn't assume the era has
+incremented.  After sending this message, you should check the
+current era via the status line.
+
+take_metadata_snap
+------------------
+
+Create a clone of the metadata, to allow a userland process to read it.
+
+drop_metadata_snap
+------------------
+
+Drop the metadata snapshot.
+
+Status
+======
+
+<metadata block size> <#used metadata blocks>/<#total metadata blocks>
+<current era> <held metadata root | '-'>
+
+========================= ==============================================
+metadata block size	  Fixed block size for each metadata block in
+			  sectors
+#used metadata blocks	  Number of metadata blocks used
+#total metadata blocks	  Total number of metadata blocks
+current era		  The current era
+held metadata root	  The location, in blocks, of the metadata root
+			  that has been 'held' for userspace read
+			  access. '-' indicates there is no held root
+========================= ==============================================
+
+Detailed use case
+=================
+
+The scenario of invalidating a cache when rolling back a vendor
+snapshot was the primary use case when developing this target:
+
+Taking a vendor snapshot
+------------------------
+
+- Send a checkpoint message to the era target
+- Make a note of the current era in its status line
+- Take vendor snapshot (the era and snapshot should be forever
+  associated now).
+
+Rolling back to an vendor snapshot
+----------------------------------
+
+- Cache enters passthrough mode (see: dm-cache's docs in cache.txt)
+- Rollback vendor storage
+- Take metadata snapshot
+- Ascertain which blocks have been written since the snapshot was taken
+  by checking each block's era
+- Invalidate those blocks in the caching software
+- Cache returns to writeback/writethrough mode
+
+Memory usage
+============
+
+The target uses a bitset to record writes in the current era.  It also
+has a spare bitset ready for switching over to a new era.  Other than
+that it uses a few 4k blocks for updating metadata::
+
+   (4 * nr_blocks) bytes + buffers
+
+Resilience
+==========
+
+Metadata is updated on disk before a write to a previously unwritten
+block is performed.  As such dm-era should not be effected by a hard
+crash such as power failure.
+
+Userland tools
+==============
+
+Userland tools are found in the increasingly poorly named
+thin-provisioning-tools project:
+
+    https://github.com/jthornber/thin-provisioning-tools
diff --git a/Documentation/device-mapper/era.txt b/Documentation/device-mapper/era.txt
deleted file mode 100644
index 3c6d01be3560..000000000000
--- a/Documentation/device-mapper/era.txt
+++ /dev/null
@@ -1,108 +0,0 @@
-Introduction
-============
-
-dm-era is a target that behaves similar to the linear target.  In
-addition it keeps track of which blocks were written within a user
-defined period of time called an 'era'.  Each era target instance
-maintains the current era as a monotonically increasing 32-bit
-counter.
-
-Use cases include tracking changed blocks for backup software, and
-partially invalidating the contents of a cache to restore cache
-coherency after rolling back a vendor snapshot.
-
-Constructor
-===========
-
- era <metadata dev> <origin dev> <block size>
-
- metadata dev    : fast device holding the persistent metadata
- origin dev	 : device holding data blocks that may change
- block size      : block size of origin data device, granularity that is
-		     tracked by the target
-
-Messages
-========
-
-None of the dm messages take any arguments.
-
-checkpoint
-----------
-
-Possibly move to a new era.  You shouldn't assume the era has
-incremented.  After sending this message, you should check the
-current era via the status line.
-
-take_metadata_snap
-------------------
-
-Create a clone of the metadata, to allow a userland process to read it.
-
-drop_metadata_snap
-------------------
-
-Drop the metadata snapshot.
-
-Status
-======
-
-<metadata block size> <#used metadata blocks>/<#total metadata blocks>
-<current era> <held metadata root | '-'>
-
-metadata block size	 : Fixed block size for each metadata block in
-			     sectors
-#used metadata blocks	 : Number of metadata blocks used
-#total metadata blocks	 : Total number of metadata blocks
-current era		 : The current era
-held metadata root	 : The location, in blocks, of the metadata root
-			     that has been 'held' for userspace read
-			     access. '-' indicates there is no held root
-
-Detailed use case
-=================
-
-The scenario of invalidating a cache when rolling back a vendor
-snapshot was the primary use case when developing this target:
-
-Taking a vendor snapshot
-------------------------
-
-- Send a checkpoint message to the era target
-- Make a note of the current era in its status line
-- Take vendor snapshot (the era and snapshot should be forever
-  associated now).
-
-Rolling back to an vendor snapshot
-----------------------------------
-
-- Cache enters passthrough mode (see: dm-cache's docs in cache.txt)
-- Rollback vendor storage
-- Take metadata snapshot
-- Ascertain which blocks have been written since the snapshot was taken
-  by checking each block's era
-- Invalidate those blocks in the caching software
-- Cache returns to writeback/writethrough mode
-
-Memory usage
-============
-
-The target uses a bitset to record writes in the current era.  It also
-has a spare bitset ready for switching over to a new era.  Other than
-that it uses a few 4k blocks for updating metadata.
-
-   (4 * nr_blocks) bytes + buffers
-
-Resilience
-==========
-
-Metadata is updated on disk before a write to a previously unwritten
-block is performed.  As such dm-era should not be effected by a hard
-crash such as power failure.
-
-Userland tools
-==============
-
-Userland tools are found in the increasingly poorly named
-thin-provisioning-tools project:
-
-    https://github.com/jthornber/thin-provisioning-tools
diff --git a/Documentation/device-mapper/index.rst b/Documentation/device-mapper/index.rst
new file mode 100644
index 000000000000..105e253bc231
--- /dev/null
+++ b/Documentation/device-mapper/index.rst
@@ -0,0 +1,44 @@
+:orphan:
+
+=============
+Device Mapper
+=============
+
+.. toctree::
+    :maxdepth: 1
+
+    cache-policies
+    cache
+    delay
+    dm-crypt
+    dm-flakey
+    dm-init
+    dm-integrity
+    dm-io
+    dm-log
+    dm-queue-length
+    dm-raid
+    dm-service-time
+    dm-uevent
+    dm-zoned
+    era
+    kcopyd
+    linear
+    log-writes
+    persistent-data
+    snapshot
+    statistics
+    striped
+    switch
+    thin-provisioning
+    unstriped
+    verity
+    writecache
+    zero
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/device-mapper/kcopyd.rst b/Documentation/device-mapper/kcopyd.rst
new file mode 100644
index 000000000000..7651d395127f
--- /dev/null
+++ b/Documentation/device-mapper/kcopyd.rst
@@ -0,0 +1,47 @@
+======
+kcopyd
+======
+
+Kcopyd provides the ability to copy a range of sectors from one block-device
+to one or more other block-devices, with an asynchronous completion
+notification. It is used by dm-snapshot and dm-mirror.
+
+Users of kcopyd must first create a client and indicate how many memory pages
+to set aside for their copy jobs. This is done with a call to
+kcopyd_client_create()::
+
+   int kcopyd_client_create(unsigned int num_pages,
+                            struct kcopyd_client **result);
+
+To start a copy job, the user must set up io_region structures to describe
+the source and destinations of the copy. Each io_region indicates a
+block-device along with the starting sector and size of the region. The source
+of the copy is given as one io_region structure, and the destinations of the
+copy are given as an array of io_region structures::
+
+   struct io_region {
+      struct block_device *bdev;
+      sector_t sector;
+      sector_t count;
+   };
+
+To start the copy, the user calls kcopyd_copy(), passing in the client
+pointer, pointers to the source and destination io_regions, the name of a
+completion callback routine, and a pointer to some context data for the copy::
+
+   int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
+                   unsigned int num_dests, struct io_region *dests,
+                   unsigned int flags, kcopyd_notify_fn fn, void *context);
+
+   typedef void (*kcopyd_notify_fn)(int read_err, unsigned int write_err,
+				    void *context);
+
+When the copy completes, kcopyd will call the user's completion routine,
+passing back the user's context pointer. It will also indicate if a read or
+write error occurred during the copy.
+
+When a user is done with all their copy jobs, they should call
+kcopyd_client_destroy() to delete the kcopyd client, which will release the
+associated memory pages::
+
+   void kcopyd_client_destroy(struct kcopyd_client *kc);
diff --git a/Documentation/device-mapper/kcopyd.txt b/Documentation/device-mapper/kcopyd.txt
deleted file mode 100644
index 820382c4cecf..000000000000
--- a/Documentation/device-mapper/kcopyd.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-kcopyd
-======
-
-Kcopyd provides the ability to copy a range of sectors from one block-device
-to one or more other block-devices, with an asynchronous completion
-notification. It is used by dm-snapshot and dm-mirror.
-
-Users of kcopyd must first create a client and indicate how many memory pages
-to set aside for their copy jobs. This is done with a call to
-kcopyd_client_create().
-
-   int kcopyd_client_create(unsigned int num_pages,
-                            struct kcopyd_client **result);
-
-To start a copy job, the user must set up io_region structures to describe
-the source and destinations of the copy. Each io_region indicates a
-block-device along with the starting sector and size of the region. The source
-of the copy is given as one io_region structure, and the destinations of the
-copy are given as an array of io_region structures.
-
-   struct io_region {
-      struct block_device *bdev;
-      sector_t sector;
-      sector_t count;
-   };
-
-To start the copy, the user calls kcopyd_copy(), passing in the client
-pointer, pointers to the source and destination io_regions, the name of a
-completion callback routine, and a pointer to some context data for the copy.
-
-   int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
-                   unsigned int num_dests, struct io_region *dests,
-                   unsigned int flags, kcopyd_notify_fn fn, void *context);
-
-   typedef void (*kcopyd_notify_fn)(int read_err, unsigned int write_err,
-				    void *context);
-
-When the copy completes, kcopyd will call the user's completion routine,
-passing back the user's context pointer. It will also indicate if a read or
-write error occurred during the copy.
-
-When a user is done with all their copy jobs, they should call
-kcopyd_client_destroy() to delete the kcopyd client, which will release the
-associated memory pages.
-
-   void kcopyd_client_destroy(struct kcopyd_client *kc);
-
diff --git a/Documentation/device-mapper/linear.rst b/Documentation/device-mapper/linear.rst
new file mode 100644
index 000000000000..9d17fc6e64a9
--- /dev/null
+++ b/Documentation/device-mapper/linear.rst
@@ -0,0 +1,63 @@
+=========
+dm-linear
+=========
+
+Device-Mapper's "linear" target maps a linear range of the Device-Mapper
+device onto a linear range of another device.  This is the basic building
+block of logical volume managers.
+
+Parameters: <dev path> <offset>
+    <dev path>:
+	Full pathname to the underlying block-device, or a
+        "major:minor" device-number.
+    <offset>:
+	Starting sector within the device.
+
+
+Example scripts
+===============
+
+::
+
+  #!/bin/sh
+  # Create an identity mapping for a device
+  echo "0 `blockdev --getsz $1` linear $1 0" | dmsetup create identity
+
+::
+
+  #!/bin/sh
+  # Join 2 devices together
+  size1=`blockdev --getsz $1`
+  size2=`blockdev --getsz $2`
+  echo "0 $size1 linear $1 0
+  $size1 $size2 linear $2 0" | dmsetup create joined
+
+::
+
+  #!/usr/bin/perl -w
+  # Split a device into 4M chunks and then join them together in reverse order.
+
+  my $name = "reverse";
+  my $extent_size = 4 * 1024 * 2;
+  my $dev = $ARGV[0];
+  my $table = "";
+  my $count = 0;
+
+  if (!defined($dev)) {
+          die("Please specify a device.\n");
+  }
+
+  my $dev_size = `blockdev --getsz $dev`;
+  my $extents = int($dev_size / $extent_size) -
+                (($dev_size % $extent_size) ? 1 : 0);
+
+  while ($extents > 0) {
+          my $this_start = $count * $extent_size;
+          $extents--;
+          $count++;
+          my $this_offset = $extents * $extent_size;
+
+          $table .= "$this_start $extent_size linear $dev $this_offset\n";
+  }
+
+  `echo \"$table\" | dmsetup create $name`;
diff --git a/Documentation/device-mapper/linear.txt b/Documentation/device-mapper/linear.txt
deleted file mode 100644
index 7cb98d89d3f8..000000000000
--- a/Documentation/device-mapper/linear.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-dm-linear
-=========
-
-Device-Mapper's "linear" target maps a linear range of the Device-Mapper
-device onto a linear range of another device.  This is the basic building
-block of logical volume managers.
-
-Parameters: <dev path> <offset>
-    <dev path>: Full pathname to the underlying block-device, or a
-                "major:minor" device-number.
-    <offset>: Starting sector within the device.
-
-
-Example scripts
-===============
-[[
-#!/bin/sh
-# Create an identity mapping for a device
-echo "0 `blockdev --getsz $1` linear $1 0" | dmsetup create identity
-]]
-
-
-[[
-#!/bin/sh
-# Join 2 devices together
-size1=`blockdev --getsz $1`
-size2=`blockdev --getsz $2`
-echo "0 $size1 linear $1 0
-$size1 $size2 linear $2 0" | dmsetup create joined
-]]
-
-
-[[
-#!/usr/bin/perl -w
-# Split a device into 4M chunks and then join them together in reverse order.
-
-my $name = "reverse";
-my $extent_size = 4 * 1024 * 2;
-my $dev = $ARGV[0];
-my $table = "";
-my $count = 0;
-
-if (!defined($dev)) {
-        die("Please specify a device.\n");
-}
-
-my $dev_size = `blockdev --getsz $dev`;
-my $extents = int($dev_size / $extent_size) -
-              (($dev_size % $extent_size) ? 1 : 0);
-
-while ($extents > 0) {
-        my $this_start = $count * $extent_size;
-        $extents--;
-        $count++;
-        my $this_offset = $extents * $extent_size;
-
-        $table .= "$this_start $extent_size linear $dev $this_offset\n";
-}
-
-`echo \"$table\" | dmsetup create $name`;
-]]
diff --git a/Documentation/device-mapper/log-writes.rst b/Documentation/device-mapper/log-writes.rst
new file mode 100644
index 000000000000..23141f2ffb7c
--- /dev/null
+++ b/Documentation/device-mapper/log-writes.rst
@@ -0,0 +1,145 @@
+=============
+dm-log-writes
+=============
+
+This target takes 2 devices, one to pass all IO to normally, and one to log all
+of the write operations to.  This is intended for file system developers wishing
+to verify the integrity of metadata or data as the file system is written to.
+There is a log_write_entry written for every WRITE request and the target is
+able to take arbitrary data from userspace to insert into the log.  The data
+that is in the WRITE requests is copied into the log to make the replay happen
+exactly as it happened originally.
+
+Log Ordering
+============
+
+We log things in order of completion once we are sure the write is no longer in
+cache.  This means that normal WRITE requests are not actually logged until the
+next REQ_PREFLUSH request.  This is to make it easier for userspace to replay
+the log in a way that correlates to what is on disk and not what is in cache,
+to make it easier to detect improper waiting/flushing.
+
+This works by attaching all WRITE requests to a list once the write completes.
+Once we see a REQ_PREFLUSH request we splice this list onto the request and once
+the FLUSH request completes we log all of the WRITEs and then the FLUSH.  Only
+completed WRITEs, at the time the REQ_PREFLUSH is issued, are added in order to
+simulate the worst case scenario with regard to power failures.  Consider the
+following example (W means write, C means complete):
+
+	W1,W2,W3,C3,C2,Wflush,C1,Cflush
+
+The log would show the following:
+
+	W3,W2,flush,W1....
+
+Again this is to simulate what is actually on disk, this allows us to detect
+cases where a power failure at a particular point in time would create an
+inconsistent file system.
+
+Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as
+they complete as those requests will obviously bypass the device cache.
+
+Any REQ_OP_DISCARD requests are treated like WRITE requests.  Otherwise we would
+have all the DISCARD requests, and then the WRITE requests and then the FLUSH
+request.  Consider the following example:
+
+	WRITE block 1, DISCARD block 1, FLUSH
+
+If we logged DISCARD when it completed, the replay would look like this:
+
+	DISCARD 1, WRITE 1, FLUSH
+
+which isn't quite what happened and wouldn't be caught during the log replay.
+
+Target interface
+================
+
+i) Constructor
+
+   log-writes <dev_path> <log_dev_path>
+
+   ============= ==============================================
+   dev_path	 Device that all of the IO will go to normally.
+   log_dev_path  Device where the log entries are written to.
+   ============= ==============================================
+
+ii) Status
+
+    <#logged entries> <highest allocated sector>
+
+    =========================== ========================
+    #logged entries	        Number of logged entries
+    highest allocated sector    Highest allocated sector
+    =========================== ========================
+
+iii) Messages
+
+    mark <description>
+
+	You can use a dmsetup message to set an arbitrary mark in a log.
+	For example say you want to fsck a file system after every
+	write, but first you need to replay up to the mkfs to make sure
+	we're fsck'ing something reasonable, you would do something like
+	this::
+
+	  mkfs.btrfs -f /dev/mapper/log
+	  dmsetup message log 0 mark mkfs
+	  <run test>
+
+	This would allow you to replay the log up to the mkfs mark and
+	then replay from that point on doing the fsck check in the
+	interval that you want.
+
+	Every log has a mark at the end labeled "dm-log-writes-end".
+
+Userspace component
+===================
+
+There is a userspace tool that will replay the log for you in various ways.
+It can be found here: https://github.com/josefbacik/log-writes
+
+Example usage
+=============
+
+Say you want to test fsync on your file system.  You would do something like
+this::
+
+  TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+  dmsetup create log --table "$TABLE"
+  mkfs.btrfs -f /dev/mapper/log
+  dmsetup message log 0 mark mkfs
+
+  mount /dev/mapper/log /mnt/btrfs-test
+  <some test that does fsync at the end>
+  dmsetup message log 0 mark fsync
+  md5sum /mnt/btrfs-test/foo
+  umount /mnt/btrfs-test
+
+  dmsetup remove log
+  replay-log --log /dev/sdc --replay /dev/sdb --end-mark fsync
+  mount /dev/sdb /mnt/btrfs-test
+  md5sum /mnt/btrfs-test/foo
+  <verify md5sum's are correct>
+
+  Another option is to do a complicated file system operation and verify the file
+  system is consistent during the entire operation.  You could do this with:
+
+  TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+  dmsetup create log --table "$TABLE"
+  mkfs.btrfs -f /dev/mapper/log
+  dmsetup message log 0 mark mkfs
+
+  mount /dev/mapper/log /mnt/btrfs-test
+  <fsstress to dirty the fs>
+  btrfs filesystem balance /mnt/btrfs-test
+  umount /mnt/btrfs-test
+  dmsetup remove log
+
+  replay-log --log /dev/sdc --replay /dev/sdb --end-mark mkfs
+  btrfsck /dev/sdb
+  replay-log --log /dev/sdc --replay /dev/sdb --start-mark mkfs \
+	--fsck "btrfsck /dev/sdb" --check fua
+
+And that will replay the log until it sees a FUA request, run the fsck command
+and if the fsck passes it will replay to the next FUA, until it is completed or
+the fsck command exists abnormally.
diff --git a/Documentation/device-mapper/log-writes.txt b/Documentation/device-mapper/log-writes.txt
deleted file mode 100644
index b638d124be6a..000000000000
--- a/Documentation/device-mapper/log-writes.txt
+++ /dev/null
@@ -1,140 +0,0 @@
-dm-log-writes
-=============
-
-This target takes 2 devices, one to pass all IO to normally, and one to log all
-of the write operations to.  This is intended for file system developers wishing
-to verify the integrity of metadata or data as the file system is written to.
-There is a log_write_entry written for every WRITE request and the target is
-able to take arbitrary data from userspace to insert into the log.  The data
-that is in the WRITE requests is copied into the log to make the replay happen
-exactly as it happened originally.
-
-Log Ordering
-============
-
-We log things in order of completion once we are sure the write is no longer in
-cache.  This means that normal WRITE requests are not actually logged until the
-next REQ_PREFLUSH request.  This is to make it easier for userspace to replay
-the log in a way that correlates to what is on disk and not what is in cache,
-to make it easier to detect improper waiting/flushing.
-
-This works by attaching all WRITE requests to a list once the write completes.
-Once we see a REQ_PREFLUSH request we splice this list onto the request and once
-the FLUSH request completes we log all of the WRITEs and then the FLUSH.  Only
-completed WRITEs, at the time the REQ_PREFLUSH is issued, are added in order to
-simulate the worst case scenario with regard to power failures.  Consider the
-following example (W means write, C means complete):
-
-W1,W2,W3,C3,C2,Wflush,C1,Cflush
-
-The log would show the following
-
-W3,W2,flush,W1....
-
-Again this is to simulate what is actually on disk, this allows us to detect
-cases where a power failure at a particular point in time would create an
-inconsistent file system.
-
-Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as
-they complete as those requests will obviously bypass the device cache.
-
-Any REQ_OP_DISCARD requests are treated like WRITE requests.  Otherwise we would
-have all the DISCARD requests, and then the WRITE requests and then the FLUSH
-request.  Consider the following example:
-
-WRITE block 1, DISCARD block 1, FLUSH
-
-If we logged DISCARD when it completed, the replay would look like this
-
-DISCARD 1, WRITE 1, FLUSH
-
-which isn't quite what happened and wouldn't be caught during the log replay.
-
-Target interface
-================
-
-i) Constructor
-
-   log-writes <dev_path> <log_dev_path>
-
-   dev_path	: Device that all of the IO will go to normally.
-   log_dev_path : Device where the log entries are written to.
-
-ii) Status
-
-    <#logged entries> <highest allocated sector>
-
-    #logged entries	       : Number of logged entries
-    highest allocated sector   : Highest allocated sector
-
-iii) Messages
-
-    mark <description>
-
-	You can use a dmsetup message to set an arbitrary mark in a log.
-	For example say you want to fsck a file system after every
-	write, but first you need to replay up to the mkfs to make sure
-	we're fsck'ing something reasonable, you would do something like
-	this:
-
-	  mkfs.btrfs -f /dev/mapper/log
-	  dmsetup message log 0 mark mkfs
-	  <run test>
-
-	  This would allow you to replay the log up to the mkfs mark and
-	  then replay from that point on doing the fsck check in the
-	  interval that you want.
-
-	Every log has a mark at the end labeled "dm-log-writes-end".
-
-Userspace component
-===================
-
-There is a userspace tool that will replay the log for you in various ways.
-It can be found here: https://github.com/josefbacik/log-writes
-
-Example usage
-=============
-
-Say you want to test fsync on your file system.  You would do something like
-this:
-
-TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
-dmsetup create log --table "$TABLE"
-mkfs.btrfs -f /dev/mapper/log
-dmsetup message log 0 mark mkfs
-
-mount /dev/mapper/log /mnt/btrfs-test
-<some test that does fsync at the end>
-dmsetup message log 0 mark fsync
-md5sum /mnt/btrfs-test/foo
-umount /mnt/btrfs-test
-
-dmsetup remove log
-replay-log --log /dev/sdc --replay /dev/sdb --end-mark fsync
-mount /dev/sdb /mnt/btrfs-test
-md5sum /mnt/btrfs-test/foo
-<verify md5sum's are correct>
-
-Another option is to do a complicated file system operation and verify the file
-system is consistent during the entire operation.  You could do this with:
-
-TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
-dmsetup create log --table "$TABLE"
-mkfs.btrfs -f /dev/mapper/log
-dmsetup message log 0 mark mkfs
-
-mount /dev/mapper/log /mnt/btrfs-test
-<fsstress to dirty the fs>
-btrfs filesystem balance /mnt/btrfs-test
-umount /mnt/btrfs-test
-dmsetup remove log
-
-replay-log --log /dev/sdc --replay /dev/sdb --end-mark mkfs
-btrfsck /dev/sdb
-replay-log --log /dev/sdc --replay /dev/sdb --start-mark mkfs \
-	--fsck "btrfsck /dev/sdb" --check fua
-
-And that will replay the log until it sees a FUA request, run the fsck command
-and if the fsck passes it will replay to the next FUA, until it is completed or
-the fsck command exists abnormally.
diff --git a/Documentation/device-mapper/persistent-data.rst b/Documentation/device-mapper/persistent-data.rst
new file mode 100644
index 000000000000..2065c3c5a091
--- /dev/null
+++ b/Documentation/device-mapper/persistent-data.rst
@@ -0,0 +1,88 @@
+===============
+Persistent data
+===============
+
+Introduction
+============
+
+The more-sophisticated device-mapper targets require complex metadata
+that is managed in kernel.  In late 2010 we were seeing that various
+different targets were rolling their own data structures, for example:
+
+- Mikulas Patocka's multisnap implementation
+- Heinz Mauelshagen's thin provisioning target
+- Another btree-based caching target posted to dm-devel
+- Another multi-snapshot target based on a design of Daniel Phillips
+
+Maintaining these data structures takes a lot of work, so if possible
+we'd like to reduce the number.
+
+The persistent-data library is an attempt to provide a re-usable
+framework for people who want to store metadata in device-mapper
+targets.  It's currently used by the thin-provisioning target and an
+upcoming hierarchical storage target.
+
+Overview
+========
+
+The main documentation is in the header files which can all be found
+under drivers/md/persistent-data.
+
+The block manager
+-----------------
+
+dm-block-manager.[hc]
+
+This provides access to the data on disk in fixed sized-blocks.  There
+is a read/write locking interface to prevent concurrent accesses, and
+keep data that is being used in the cache.
+
+Clients of persistent-data are unlikely to use this directly.
+
+The transaction manager
+-----------------------
+
+dm-transaction-manager.[hc]
+
+This restricts access to blocks and enforces copy-on-write semantics.
+The only way you can get hold of a writable block through the
+transaction manager is by shadowing an existing block (ie. doing
+copy-on-write) or allocating a fresh one.  Shadowing is elided within
+the same transaction so performance is reasonable.  The commit method
+ensures that all data is flushed before it writes the superblock.
+On power failure your metadata will be as it was when last committed.
+
+The Space Maps
+--------------
+
+dm-space-map.h
+dm-space-map-metadata.[hc]
+dm-space-map-disk.[hc]
+
+On-disk data structures that keep track of reference counts of blocks.
+Also acts as the allocator of new blocks.  Currently two
+implementations: a simpler one for managing blocks on a different
+device (eg. thinly-provisioned data blocks); and one for managing
+the metadata space.  The latter is complicated by the need to store
+its own data within the space it's managing.
+
+The data structures
+-------------------
+
+dm-btree.[hc]
+dm-btree-remove.c
+dm-btree-spine.c
+dm-btree-internal.h
+
+Currently there is only one data structure, a hierarchical btree.
+There are plans to add more.  For example, something with an
+array-like interface would see a lot of use.
+
+The btree is 'hierarchical' in that you can define it to be composed
+of nested btrees, and take multiple keys.  For example, the
+thin-provisioning target uses a btree with two levels of nesting.
+The first maps a device id to a mapping tree, and that in turn maps a
+virtual block to a physical block.
+
+Values stored in the btrees can have arbitrary size.  Keys are always
+64bits, although nesting allows you to use multiple keys.
diff --git a/Documentation/device-mapper/persistent-data.txt b/Documentation/device-mapper/persistent-data.txt
deleted file mode 100644
index a333bcb3a6c2..000000000000
--- a/Documentation/device-mapper/persistent-data.txt
+++ /dev/null
@@ -1,84 +0,0 @@
-Introduction
-============
-
-The more-sophisticated device-mapper targets require complex metadata
-that is managed in kernel.  In late 2010 we were seeing that various
-different targets were rolling their own data structures, for example:
-
-- Mikulas Patocka's multisnap implementation
-- Heinz Mauelshagen's thin provisioning target
-- Another btree-based caching target posted to dm-devel
-- Another multi-snapshot target based on a design of Daniel Phillips
-
-Maintaining these data structures takes a lot of work, so if possible
-we'd like to reduce the number.
-
-The persistent-data library is an attempt to provide a re-usable
-framework for people who want to store metadata in device-mapper
-targets.  It's currently used by the thin-provisioning target and an
-upcoming hierarchical storage target.
-
-Overview
-========
-
-The main documentation is in the header files which can all be found
-under drivers/md/persistent-data.
-
-The block manager
------------------
-
-dm-block-manager.[hc]
-
-This provides access to the data on disk in fixed sized-blocks.  There
-is a read/write locking interface to prevent concurrent accesses, and
-keep data that is being used in the cache.
-
-Clients of persistent-data are unlikely to use this directly.
-
-The transaction manager
------------------------
-
-dm-transaction-manager.[hc]
-
-This restricts access to blocks and enforces copy-on-write semantics.
-The only way you can get hold of a writable block through the
-transaction manager is by shadowing an existing block (ie. doing
-copy-on-write) or allocating a fresh one.  Shadowing is elided within
-the same transaction so performance is reasonable.  The commit method
-ensures that all data is flushed before it writes the superblock.
-On power failure your metadata will be as it was when last committed.
-
-The Space Maps
---------------
-
-dm-space-map.h
-dm-space-map-metadata.[hc]
-dm-space-map-disk.[hc]
-
-On-disk data structures that keep track of reference counts of blocks.
-Also acts as the allocator of new blocks.  Currently two
-implementations: a simpler one for managing blocks on a different
-device (eg. thinly-provisioned data blocks); and one for managing
-the metadata space.  The latter is complicated by the need to store
-its own data within the space it's managing.
-
-The data structures
--------------------
-
-dm-btree.[hc]
-dm-btree-remove.c
-dm-btree-spine.c
-dm-btree-internal.h
-
-Currently there is only one data structure, a hierarchical btree.
-There are plans to add more.  For example, something with an
-array-like interface would see a lot of use.
-
-The btree is 'hierarchical' in that you can define it to be composed
-of nested btrees, and take multiple keys.  For example, the
-thin-provisioning target uses a btree with two levels of nesting.
-The first maps a device id to a mapping tree, and that in turn maps a
-virtual block to a physical block.
-
-Values stored in the btrees can have arbitrary size.  Keys are always
-64bits, although nesting allows you to use multiple keys.
diff --git a/Documentation/device-mapper/snapshot.rst b/Documentation/device-mapper/snapshot.rst
new file mode 100644
index 000000000000..4c53304e72f1
--- /dev/null
+++ b/Documentation/device-mapper/snapshot.rst
@@ -0,0 +1,180 @@
+==============================
+Device-mapper snapshot support
+==============================
+
+Device-mapper allows you, without massive data copying:
+
+-  To create snapshots of any block device i.e. mountable, saved states of
+   the block device which are also writable without interfering with the
+   original content;
+-  To create device "forks", i.e. multiple different versions of the
+   same data stream.
+-  To merge a snapshot of a block device back into the snapshot's origin
+   device.
+
+In the first two cases, dm copies only the chunks of data that get
+changed and uses a separate copy-on-write (COW) block device for
+storage.
+
+For snapshot merge the contents of the COW storage are merged back into
+the origin device.
+
+
+There are three dm targets available:
+snapshot, snapshot-origin, and snapshot-merge.
+
+-  snapshot-origin <origin>
+
+which will normally have one or more snapshots based on it.
+Reads will be mapped directly to the backing device. For each write, the
+original data will be saved in the <COW device> of each snapshot to keep
+its visible content unchanged, at least until the <COW device> fills up.
+
+
+-  snapshot <origin> <COW device> <persistent?> <chunksize>
+
+A snapshot of the <origin> block device is created. Changed chunks of
+<chunksize> sectors will be stored on the <COW device>.  Writes will
+only go to the <COW device>.  Reads will come from the <COW device> or
+from <origin> for unchanged data.  <COW device> will often be
+smaller than the origin and if it fills up the snapshot will become
+useless and be disabled, returning errors.  So it is important to monitor
+the amount of free space and expand the <COW device> before it fills up.
+
+<persistent?> is P (Persistent) or N (Not persistent - will not survive
+after reboot).  O (Overflow) can be added as a persistent store option
+to allow userspace to advertise its support for seeing "Overflow" in the
+snapshot status.  So supported store types are "P", "PO" and "N".
+
+The difference between persistent and transient is with transient
+snapshots less metadata must be saved on disk - they can be kept in
+memory by the kernel.
+
+When loading or unloading the snapshot target, the corresponding
+snapshot-origin or snapshot-merge target must be suspended. A failure to
+suspend the origin target could result in data corruption.
+
+
+* snapshot-merge <origin> <COW device> <persistent> <chunksize>
+
+takes the same table arguments as the snapshot target except it only
+works with persistent snapshots.  This target assumes the role of the
+"snapshot-origin" target and must not be loaded if the "snapshot-origin"
+is still present for <origin>.
+
+Creates a merging snapshot that takes control of the changed chunks
+stored in the <COW device> of an existing snapshot, through a handover
+procedure, and merges these chunks back into the <origin>.  Once merging
+has started (in the background) the <origin> may be opened and the merge
+will continue while I/O is flowing to it.  Changes to the <origin> are
+deferred until the merging snapshot's corresponding chunk(s) have been
+merged.  Once merging has started the snapshot device, associated with
+the "snapshot" target, will return -EIO when accessed.
+
+
+How snapshot is used by LVM2
+============================
+When you create the first LVM2 snapshot of a volume, four dm devices are used:
+
+1) a device containing the original mapping table of the source volume;
+2) a device used as the <COW device>;
+3) a "snapshot" device, combining #1 and #2, which is the visible snapshot
+   volume;
+4) the "original" volume (which uses the device number used by the original
+   source volume), whose table is replaced by a "snapshot-origin" mapping
+   from device #1.
+
+A fixed naming scheme is used, so with the following commands::
+
+  lvcreate -L 1G -n base volumeGroup
+  lvcreate -L 100M --snapshot -n snap volumeGroup/base
+
+we'll have this situation (with volumes in above order)::
+
+  # dmsetup table|grep volumeGroup
+
+  volumeGroup-base-real: 0 2097152 linear 8:19 384
+  volumeGroup-snap-cow: 0 204800 linear 8:19 2097536
+  volumeGroup-snap: 0 2097152 snapshot 254:11 254:12 P 16
+  volumeGroup-base: 0 2097152 snapshot-origin 254:11
+
+  # ls -lL /dev/mapper/volumeGroup-*
+  brw-------  1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
+  brw-------  1 root root 254, 12 29 ago 18:15 /dev/mapper/volumeGroup-snap-cow
+  brw-------  1 root root 254, 13 29 ago 18:15 /dev/mapper/volumeGroup-snap
+  brw-------  1 root root 254, 10 29 ago 18:14 /dev/mapper/volumeGroup-base
+
+
+How snapshot-merge is used by LVM2
+==================================
+A merging snapshot assumes the role of the "snapshot-origin" while
+merging.  As such the "snapshot-origin" is replaced with
+"snapshot-merge".  The "-real" device is not changed and the "-cow"
+device is renamed to <origin name>-cow to aid LVM2's cleanup of the
+merging snapshot after it completes.  The "snapshot" that hands over its
+COW device to the "snapshot-merge" is deactivated (unless using lvchange
+--refresh); but if it is left active it will simply return I/O errors.
+
+A snapshot will merge into its origin with the following command::
+
+  lvconvert --merge volumeGroup/snap
+
+we'll now have this situation::
+
+  # dmsetup table|grep volumeGroup
+
+  volumeGroup-base-real: 0 2097152 linear 8:19 384
+  volumeGroup-base-cow: 0 204800 linear 8:19 2097536
+  volumeGroup-base: 0 2097152 snapshot-merge 254:11 254:12 P 16
+
+  # ls -lL /dev/mapper/volumeGroup-*
+  brw-------  1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
+  brw-------  1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow
+  brw-------  1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base
+
+
+How to determine when a merging is complete
+===========================================
+The snapshot-merge and snapshot status lines end with:
+
+  <sectors_allocated>/<total_sectors> <metadata_sectors>
+
+Both <sectors_allocated> and <total_sectors> include both data and metadata.
+During merging, the number of sectors allocated gets smaller and
+smaller.  Merging has finished when the number of sectors holding data
+is zero, in other words <sectors_allocated> == <metadata_sectors>.
+
+Here is a practical example (using a hybrid of lvm and dmsetup commands)::
+
+  # lvs
+    LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
+    base    volumeGroup owi-a- 4.00g
+    snap    volumeGroup swi-a- 1.00g base  18.97
+
+  # dmsetup status volumeGroup-snap
+  0 8388608 snapshot 397896/2097152 1560
+                                    ^^^^ metadata sectors
+
+  # lvconvert --merge -b volumeGroup/snap
+    Merging of volume snap started.
+
+  # lvs volumeGroup/snap
+    LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
+    base    volumeGroup Owi-a- 4.00g          17.23
+
+  # dmsetup status volumeGroup-base
+  0 8388608 snapshot-merge 281688/2097152 1104
+
+  # dmsetup status volumeGroup-base
+  0 8388608 snapshot-merge 180480/2097152 712
+
+  # dmsetup status volumeGroup-base
+  0 8388608 snapshot-merge 16/2097152 16
+
+Merging has finished.
+
+::
+
+  # lvs
+    LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
+    base    volumeGroup owi-a- 4.00g
diff --git a/Documentation/device-mapper/snapshot.txt b/Documentation/device-mapper/snapshot.txt
deleted file mode 100644
index b8bbb516f989..000000000000
--- a/Documentation/device-mapper/snapshot.txt
+++ /dev/null
@@ -1,176 +0,0 @@
-Device-mapper snapshot support
-==============================
-
-Device-mapper allows you, without massive data copying:
-
-*) To create snapshots of any block device i.e. mountable, saved states of
-the block device which are also writable without interfering with the
-original content;
-*) To create device "forks", i.e. multiple different versions of the
-same data stream.
-*) To merge a snapshot of a block device back into the snapshot's origin
-device.
-
-In the first two cases, dm copies only the chunks of data that get
-changed and uses a separate copy-on-write (COW) block device for
-storage.
-
-For snapshot merge the contents of the COW storage are merged back into
-the origin device.
-
-
-There are three dm targets available:
-snapshot, snapshot-origin, and snapshot-merge.
-
-*) snapshot-origin <origin>
-
-which will normally have one or more snapshots based on it.
-Reads will be mapped directly to the backing device. For each write, the
-original data will be saved in the <COW device> of each snapshot to keep
-its visible content unchanged, at least until the <COW device> fills up.
-
-
-*) snapshot <origin> <COW device> <persistent?> <chunksize>
-
-A snapshot of the <origin> block device is created. Changed chunks of
-<chunksize> sectors will be stored on the <COW device>.  Writes will
-only go to the <COW device>.  Reads will come from the <COW device> or
-from <origin> for unchanged data.  <COW device> will often be
-smaller than the origin and if it fills up the snapshot will become
-useless and be disabled, returning errors.  So it is important to monitor
-the amount of free space and expand the <COW device> before it fills up.
-
-<persistent?> is P (Persistent) or N (Not persistent - will not survive
-after reboot).  O (Overflow) can be added as a persistent store option
-to allow userspace to advertise its support for seeing "Overflow" in the
-snapshot status.  So supported store types are "P", "PO" and "N".
-
-The difference between persistent and transient is with transient
-snapshots less metadata must be saved on disk - they can be kept in
-memory by the kernel.
-
-When loading or unloading the snapshot target, the corresponding
-snapshot-origin or snapshot-merge target must be suspended. A failure to
-suspend the origin target could result in data corruption.
-
-
-* snapshot-merge <origin> <COW device> <persistent> <chunksize>
-
-takes the same table arguments as the snapshot target except it only
-works with persistent snapshots.  This target assumes the role of the
-"snapshot-origin" target and must not be loaded if the "snapshot-origin"
-is still present for <origin>.
-
-Creates a merging snapshot that takes control of the changed chunks
-stored in the <COW device> of an existing snapshot, through a handover
-procedure, and merges these chunks back into the <origin>.  Once merging
-has started (in the background) the <origin> may be opened and the merge
-will continue while I/O is flowing to it.  Changes to the <origin> are
-deferred until the merging snapshot's corresponding chunk(s) have been
-merged.  Once merging has started the snapshot device, associated with
-the "snapshot" target, will return -EIO when accessed.
-
-
-How snapshot is used by LVM2
-============================
-When you create the first LVM2 snapshot of a volume, four dm devices are used:
-
-1) a device containing the original mapping table of the source volume;
-2) a device used as the <COW device>;
-3) a "snapshot" device, combining #1 and #2, which is the visible snapshot
-   volume;
-4) the "original" volume (which uses the device number used by the original
-   source volume), whose table is replaced by a "snapshot-origin" mapping
-   from device #1.
-
-A fixed naming scheme is used, so with the following commands:
-
-lvcreate -L 1G -n base volumeGroup
-lvcreate -L 100M --snapshot -n snap volumeGroup/base
-
-we'll have this situation (with volumes in above order):
-
-# dmsetup table|grep volumeGroup
-
-volumeGroup-base-real: 0 2097152 linear 8:19 384
-volumeGroup-snap-cow: 0 204800 linear 8:19 2097536
-volumeGroup-snap: 0 2097152 snapshot 254:11 254:12 P 16
-volumeGroup-base: 0 2097152 snapshot-origin 254:11
-
-# ls -lL /dev/mapper/volumeGroup-*
-brw-------  1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
-brw-------  1 root root 254, 12 29 ago 18:15 /dev/mapper/volumeGroup-snap-cow
-brw-------  1 root root 254, 13 29 ago 18:15 /dev/mapper/volumeGroup-snap
-brw-------  1 root root 254, 10 29 ago 18:14 /dev/mapper/volumeGroup-base
-
-
-How snapshot-merge is used by LVM2
-==================================
-A merging snapshot assumes the role of the "snapshot-origin" while
-merging.  As such the "snapshot-origin" is replaced with
-"snapshot-merge".  The "-real" device is not changed and the "-cow"
-device is renamed to <origin name>-cow to aid LVM2's cleanup of the
-merging snapshot after it completes.  The "snapshot" that hands over its
-COW device to the "snapshot-merge" is deactivated (unless using lvchange
---refresh); but if it is left active it will simply return I/O errors.
-
-A snapshot will merge into its origin with the following command:
-
-lvconvert --merge volumeGroup/snap
-
-we'll now have this situation:
-
-# dmsetup table|grep volumeGroup
-
-volumeGroup-base-real: 0 2097152 linear 8:19 384
-volumeGroup-base-cow: 0 204800 linear 8:19 2097536
-volumeGroup-base: 0 2097152 snapshot-merge 254:11 254:12 P 16
-
-# ls -lL /dev/mapper/volumeGroup-*
-brw-------  1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
-brw-------  1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow
-brw-------  1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base
-
-
-How to determine when a merging is complete
-===========================================
-The snapshot-merge and snapshot status lines end with:
-  <sectors_allocated>/<total_sectors> <metadata_sectors>
-
-Both <sectors_allocated> and <total_sectors> include both data and metadata.
-During merging, the number of sectors allocated gets smaller and
-smaller.  Merging has finished when the number of sectors holding data
-is zero, in other words <sectors_allocated> == <metadata_sectors>.
-
-Here is a practical example (using a hybrid of lvm and dmsetup commands):
-
-# lvs
-  LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
-  base    volumeGroup owi-a- 4.00g
-  snap    volumeGroup swi-a- 1.00g base  18.97
-
-# dmsetup status volumeGroup-snap
-0 8388608 snapshot 397896/2097152 1560
-                                  ^^^^ metadata sectors
-
-# lvconvert --merge -b volumeGroup/snap
-  Merging of volume snap started.
-
-# lvs volumeGroup/snap
-  LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
-  base    volumeGroup Owi-a- 4.00g          17.23
-
-# dmsetup status volumeGroup-base
-0 8388608 snapshot-merge 281688/2097152 1104
-
-# dmsetup status volumeGroup-base
-0 8388608 snapshot-merge 180480/2097152 712
-
-# dmsetup status volumeGroup-base
-0 8388608 snapshot-merge 16/2097152 16
-
-Merging has finished.
-
-# lvs
-  LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
-  base    volumeGroup owi-a- 4.00g
diff --git a/Documentation/device-mapper/statistics.rst b/Documentation/device-mapper/statistics.rst
new file mode 100644
index 000000000000..3d80a9f850cc
--- /dev/null
+++ b/Documentation/device-mapper/statistics.rst
@@ -0,0 +1,225 @@
+=============
+DM statistics
+=============
+
+Device Mapper supports the collection of I/O statistics on user-defined
+regions of a DM device.	 If no regions are defined no statistics are
+collected so there isn't any performance impact.  Only bio-based DM
+devices are currently supported.
+
+Each user-defined region specifies a starting sector, length and step.
+Individual statistics will be collected for each step-sized area within
+the range specified.
+
+The I/O statistics counters for each step-sized area of a region are
+in the same format as `/sys/block/*/stat` or `/proc/diskstats` (see:
+Documentation/iostats.txt).  But two extra counters (12 and 13) are
+provided: total time spent reading and writing.  When the histogram
+argument is used, the 14th parameter is reported that represents the
+histogram of latencies.  All these counters may be accessed by sending
+the @stats_print message to the appropriate DM device via dmsetup.
+
+The reported times are in milliseconds and the granularity depends on
+the kernel ticks.  When the option precise_timestamps is used, the
+reported times are in nanoseconds.
+
+Each region has a corresponding unique identifier, which we call a
+region_id, that is assigned when the region is created.	 The region_id
+must be supplied when querying statistics about the region, deleting the
+region, etc.  Unique region_ids enable multiple userspace programs to
+request and process statistics for the same DM device without stepping
+on each other's data.
+
+The creation of DM statistics will allocate memory via kmalloc or
+fallback to using vmalloc space.  At most, 1/4 of the overall system
+memory may be allocated by DM statistics.  The admin can see how much
+memory is used by reading:
+
+	/sys/module/dm_mod/parameters/stats_current_allocated_bytes
+
+Messages
+========
+
+    @stats_create <range> <step> [<number_of_optional_arguments> <optional_arguments>...] [<program_id> [<aux_data>]]
+	Create a new region and return the region_id.
+
+	<range>
+	  "-"
+		whole device
+	  "<start_sector>+<length>"
+		a range of <length> 512-byte sectors
+		starting with <start_sector>.
+
+	<step>
+	  "<area_size>"
+		the range is subdivided into areas each containing
+		<area_size> sectors.
+	  "/<number_of_areas>"
+		the range is subdivided into the specified
+		number of areas.
+
+	<number_of_optional_arguments>
+	  The number of optional arguments
+
+	<optional_arguments>
+	  The following optional arguments are supported:
+
+	  precise_timestamps
+		use precise timer with nanosecond resolution
+		instead of the "jiffies" variable.  When this argument is
+		used, the resulting times are in nanoseconds instead of
+		milliseconds.  Precise timestamps are a little bit slower
+		to obtain than jiffies-based timestamps.
+	  histogram:n1,n2,n3,n4,...
+		collect histogram of latencies.  The
+		numbers n1, n2, etc are times that represent the boundaries
+		of the histogram.  If precise_timestamps is not used, the
+		times are in milliseconds, otherwise they are in
+		nanoseconds.  For each range, the kernel will report the
+		number of requests that completed within this range. For
+		example, if we use "histogram:10,20,30", the kernel will
+		report four numbers a:b:c:d. a is the number of requests
+		that took 0-10 ms to complete, b is the number of requests
+		that took 10-20 ms to complete, c is the number of requests
+		that took 20-30 ms to complete and d is the number of
+		requests that took more than 30 ms to complete.
+
+	<program_id>
+	  An optional parameter.  A name that uniquely identifies
+	  the userspace owner of the range.  This groups ranges together
+	  so that userspace programs can identify the ranges they
+	  created and ignore those created by others.
+	  The kernel returns this string back in the output of
+	  @stats_list message, but it doesn't use it for anything else.
+	  If we omit the number of optional arguments, program id must not
+	  be a number, otherwise it would be interpreted as the number of
+	  optional arguments.
+
+	<aux_data>
+	  An optional parameter.  A word that provides auxiliary data
+	  that is useful to the client program that created the range.
+	  The kernel returns this string back in the output of
+	  @stats_list message, but it doesn't use this value for anything.
+
+    @stats_delete <region_id>
+	Delete the region with the specified id.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+    @stats_clear <region_id>
+	Clear all the counters except the in-flight i/o counters.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+    @stats_list [<program_id>]
+	List all regions registered with @stats_create.
+
+	<program_id>
+	  An optional parameter.
+	  If this parameter is specified, only matching regions
+	  are returned.
+	  If it is not specified, all regions are returned.
+
+	Output format:
+	  <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
+	        precise_timestamps histogram:n1,n2,n3,...
+
+	The strings "precise_timestamps" and "histogram" are printed only
+	if they were specified when creating the region.
+
+    @stats_print <region_id> [<starting_line> <number_of_lines>]
+	Print counters for each step-sized area of a region.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<starting_line>
+	  The index of the starting line in the output.
+	  If omitted, all lines are returned.
+
+	<number_of_lines>
+	  The number of lines to include in the output.
+	  If omitted, all lines are returned.
+
+	Output format for each step-sized area of a region:
+
+	  <start_sector>+<length>
+		counters
+
+	  The first 11 counters have the same meaning as
+	  `/sys/block/*/stat or /proc/diskstats`.
+
+	  Please refer to Documentation/iostats.txt for details.
+
+	  1. the number of reads completed
+	  2. the number of reads merged
+	  3. the number of sectors read
+	  4. the number of milliseconds spent reading
+	  5. the number of writes completed
+	  6. the number of writes merged
+	  7. the number of sectors written
+	  8. the number of milliseconds spent writing
+	  9. the number of I/Os currently in progress
+	  10. the number of milliseconds spent doing I/Os
+	  11. the weighted number of milliseconds spent doing I/Os
+
+	  Additional counters:
+
+	  12. the total time spent reading in milliseconds
+	  13. the total time spent writing in milliseconds
+
+    @stats_print_clear <region_id> [<starting_line> <number_of_lines>]
+	Atomically print and then clear all the counters except the
+	in-flight i/o counters.	 Useful when the client consuming the
+	statistics does not want to lose any statistics (those updated
+	between printing and clearing).
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<starting_line>
+	  The index of the starting line in the output.
+	  If omitted, all lines are printed and then cleared.
+
+	<number_of_lines>
+	  The number of lines to process.
+	  If omitted, all lines are printed and then cleared.
+
+    @stats_set_aux <region_id> <aux_data>
+	Store auxiliary data aux_data for the specified region.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<aux_data>
+	  The string that identifies data which is useful to the client
+	  program that created the range.  The kernel returns this
+	  string back in the output of @stats_list message, but it
+	  doesn't use this value for anything.
+
+Examples
+========
+
+Subdivide the DM device 'vol' into 100 pieces and start collecting
+statistics on them::
+
+  dmsetup message vol 0 @stats_create - /100
+
+Set the auxiliary data string to "foo bar baz" (the escape for each
+space must also be escaped, otherwise the shell will consume them)::
+
+  dmsetup message vol 0 @stats_set_aux 0 foo\\ bar\\ baz
+
+List the statistics::
+
+  dmsetup message vol 0 @stats_list
+
+Print the statistics::
+
+  dmsetup message vol 0 @stats_print 0
+
+Delete the statistics::
+
+  dmsetup message vol 0 @stats_delete 0
diff --git a/Documentation/device-mapper/statistics.txt b/Documentation/device-mapper/statistics.txt
deleted file mode 100644
index 170ac02a1f50..000000000000
--- a/Documentation/device-mapper/statistics.txt
+++ /dev/null
@@ -1,223 +0,0 @@
-DM statistics
-=============
-
-Device Mapper supports the collection of I/O statistics on user-defined
-regions of a DM device.	 If no regions are defined no statistics are
-collected so there isn't any performance impact.  Only bio-based DM
-devices are currently supported.
-
-Each user-defined region specifies a starting sector, length and step.
-Individual statistics will be collected for each step-sized area within
-the range specified.
-
-The I/O statistics counters for each step-sized area of a region are
-in the same format as /sys/block/*/stat or /proc/diskstats (see:
-Documentation/iostats.txt).  But two extra counters (12 and 13) are
-provided: total time spent reading and writing.  When the histogram
-argument is used, the 14th parameter is reported that represents the
-histogram of latencies.  All these counters may be accessed by sending
-the @stats_print message to the appropriate DM device via dmsetup.
-
-The reported times are in milliseconds and the granularity depends on
-the kernel ticks.  When the option precise_timestamps is used, the
-reported times are in nanoseconds.
-
-Each region has a corresponding unique identifier, which we call a
-region_id, that is assigned when the region is created.	 The region_id
-must be supplied when querying statistics about the region, deleting the
-region, etc.  Unique region_ids enable multiple userspace programs to
-request and process statistics for the same DM device without stepping
-on each other's data.
-
-The creation of DM statistics will allocate memory via kmalloc or
-fallback to using vmalloc space.  At most, 1/4 of the overall system
-memory may be allocated by DM statistics.  The admin can see how much
-memory is used by reading
-/sys/module/dm_mod/parameters/stats_current_allocated_bytes
-
-Messages
-========
-
-    @stats_create <range> <step>
-		[<number_of_optional_arguments> <optional_arguments>...]
-		[<program_id> [<aux_data>]]
-
-	Create a new region and return the region_id.
-
-	<range>
-	  "-" - whole device
-	  "<start_sector>+<length>" - a range of <length> 512-byte sectors
-				      starting with <start_sector>.
-
-	<step>
-	  "<area_size>" - the range is subdivided into areas each containing
-			  <area_size> sectors.
-	  "/<number_of_areas>" - the range is subdivided into the specified
-				 number of areas.
-
-	<number_of_optional_arguments>
-	  The number of optional arguments
-
-	<optional_arguments>
-	  The following optional arguments are supported
-	  precise_timestamps - use precise timer with nanosecond resolution
-		instead of the "jiffies" variable.  When this argument is
-		used, the resulting times are in nanoseconds instead of
-		milliseconds.  Precise timestamps are a little bit slower
-		to obtain than jiffies-based timestamps.
-	  histogram:n1,n2,n3,n4,... - collect histogram of latencies.  The
-		numbers n1, n2, etc are times that represent the boundaries
-		of the histogram.  If precise_timestamps is not used, the
-		times are in milliseconds, otherwise they are in
-		nanoseconds.  For each range, the kernel will report the
-		number of requests that completed within this range. For
-		example, if we use "histogram:10,20,30", the kernel will
-		report four numbers a:b:c:d. a is the number of requests
-		that took 0-10 ms to complete, b is the number of requests
-		that took 10-20 ms to complete, c is the number of requests
-		that took 20-30 ms to complete and d is the number of
-		requests that took more than 30 ms to complete.
-
-	<program_id>
-	  An optional parameter.  A name that uniquely identifies
-	  the userspace owner of the range.  This groups ranges together
-	  so that userspace programs can identify the ranges they
-	  created and ignore those created by others.
-	  The kernel returns this string back in the output of
-	  @stats_list message, but it doesn't use it for anything else.
-	  If we omit the number of optional arguments, program id must not
-	  be a number, otherwise it would be interpreted as the number of
-	  optional arguments.
-
-	<aux_data>
-	  An optional parameter.  A word that provides auxiliary data
-	  that is useful to the client program that created the range.
-	  The kernel returns this string back in the output of
-	  @stats_list message, but it doesn't use this value for anything.
-
-    @stats_delete <region_id>
-
-	Delete the region with the specified id.
-
-	<region_id>
-	  region_id returned from @stats_create
-
-    @stats_clear <region_id>
-
-	Clear all the counters except the in-flight i/o counters.
-
-	<region_id>
-	  region_id returned from @stats_create
-
-    @stats_list [<program_id>]
-
-	List all regions registered with @stats_create.
-
-	<program_id>
-	  An optional parameter.
-	  If this parameter is specified, only matching regions
-	  are returned.
-	  If it is not specified, all regions are returned.
-
-	Output format:
-	  <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
-	        precise_timestamps histogram:n1,n2,n3,...
-
-	The strings "precise_timestamps" and "histogram" are printed only
-	if they were specified when creating the region.
-
-    @stats_print <region_id> [<starting_line> <number_of_lines>]
-
-	Print counters for each step-sized area of a region.
-
-	<region_id>
-	  region_id returned from @stats_create
-
-	<starting_line>
-	  The index of the starting line in the output.
-	  If omitted, all lines are returned.
-
-	<number_of_lines>
-	  The number of lines to include in the output.
-	  If omitted, all lines are returned.
-
-	Output format for each step-sized area of a region:
-
-	  <start_sector>+<length> counters
-
-	  The first 11 counters have the same meaning as
-	  /sys/block/*/stat or /proc/diskstats.
-
-	  Please refer to Documentation/iostats.txt for details.
-
-	  1. the number of reads completed
-	  2. the number of reads merged
-	  3. the number of sectors read
-	  4. the number of milliseconds spent reading
-	  5. the number of writes completed
-	  6. the number of writes merged
-	  7. the number of sectors written
-	  8. the number of milliseconds spent writing
-	  9. the number of I/Os currently in progress
-	  10. the number of milliseconds spent doing I/Os
-	  11. the weighted number of milliseconds spent doing I/Os
-
-	  Additional counters:
-	  12. the total time spent reading in milliseconds
-	  13. the total time spent writing in milliseconds
-
-    @stats_print_clear <region_id> [<starting_line> <number_of_lines>]
-
-	Atomically print and then clear all the counters except the
-	in-flight i/o counters.	 Useful when the client consuming the
-	statistics does not want to lose any statistics (those updated
-	between printing and clearing).
-
-	<region_id>
-	  region_id returned from @stats_create
-
-	<starting_line>
-	  The index of the starting line in the output.
-	  If omitted, all lines are printed and then cleared.
-
-	<number_of_lines>
-	  The number of lines to process.
-	  If omitted, all lines are printed and then cleared.
-
-    @stats_set_aux <region_id> <aux_data>
-
-	Store auxiliary data aux_data for the specified region.
-
-	<region_id>
-	  region_id returned from @stats_create
-
-	<aux_data>
-	  The string that identifies data which is useful to the client
-	  program that created the range.  The kernel returns this
-	  string back in the output of @stats_list message, but it
-	  doesn't use this value for anything.
-
-Examples
-========
-
-Subdivide the DM device 'vol' into 100 pieces and start collecting
-statistics on them:
-
-  dmsetup message vol 0 @stats_create - /100
-
-Set the auxiliary data string to "foo bar baz" (the escape for each
-space must also be escaped, otherwise the shell will consume them):
-
-  dmsetup message vol 0 @stats_set_aux 0 foo\\ bar\\ baz
-
-List the statistics:
-
-  dmsetup message vol 0 @stats_list
-
-Print the statistics:
-
-  dmsetup message vol 0 @stats_print 0
-
-Delete the statistics:
-
-  dmsetup message vol 0 @stats_delete 0
diff --git a/Documentation/device-mapper/striped.rst b/Documentation/device-mapper/striped.rst
new file mode 100644
index 000000000000..e9a8da192ae1
--- /dev/null
+++ b/Documentation/device-mapper/striped.rst
@@ -0,0 +1,61 @@
+=========
+dm-stripe
+=========
+
+Device-Mapper's "striped" target is used to create a striped (i.e. RAID-0)
+device across one or more underlying devices. Data is written in "chunks",
+with consecutive chunks rotating among the underlying devices. This can
+potentially provide improved I/O throughput by utilizing several physical
+devices in parallel.
+
+Parameters: <num devs> <chunk size> [<dev path> <offset>]+
+    <num devs>:
+	Number of underlying devices.
+    <chunk size>:
+	Size of each chunk of data. Must be at least as
+        large as the system's PAGE_SIZE.
+    <dev path>:
+	Full pathname to the underlying block-device, or a
+	"major:minor" device-number.
+    <offset>:
+	Starting sector within the device.
+
+One or more underlying devices can be specified. The striped device size must
+be a multiple of the chunk size multiplied by the number of underlying devices.
+
+
+Example scripts
+===============
+
+::
+
+  #!/usr/bin/perl -w
+  # Create a striped device across any number of underlying devices. The device
+  # will be called "stripe_dev" and have a chunk-size of 128k.
+
+  my $chunk_size = 128 * 2;
+  my $dev_name = "stripe_dev";
+  my $num_devs = @ARGV;
+  my @devs = @ARGV;
+  my ($min_dev_size, $stripe_dev_size, $i);
+
+  if (!$num_devs) {
+          die("Specify at least one device\n");
+  }
+
+  $min_dev_size = `blockdev --getsz $devs[0]`;
+  for ($i = 1; $i < $num_devs; $i++) {
+          my $this_size = `blockdev --getsz $devs[$i]`;
+          $min_dev_size = ($min_dev_size < $this_size) ?
+                          $min_dev_size : $this_size;
+  }
+
+  $stripe_dev_size = $min_dev_size * $num_devs;
+  $stripe_dev_size -= $stripe_dev_size % ($chunk_size * $num_devs);
+
+  $table = "0 $stripe_dev_size striped $num_devs $chunk_size";
+  for ($i = 0; $i < $num_devs; $i++) {
+          $table .= " $devs[$i] 0";
+  }
+
+  `echo $table | dmsetup create $dev_name`;
diff --git a/Documentation/device-mapper/striped.txt b/Documentation/device-mapper/striped.txt
deleted file mode 100644
index 07ec492cceee..000000000000
--- a/Documentation/device-mapper/striped.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-dm-stripe
-=========
-
-Device-Mapper's "striped" target is used to create a striped (i.e. RAID-0)
-device across one or more underlying devices. Data is written in "chunks",
-with consecutive chunks rotating among the underlying devices. This can
-potentially provide improved I/O throughput by utilizing several physical
-devices in parallel.
-
-Parameters: <num devs> <chunk size> [<dev path> <offset>]+
-    <num devs>: Number of underlying devices.
-    <chunk size>: Size of each chunk of data. Must be at least as
-                  large as the system's PAGE_SIZE.
-    <dev path>: Full pathname to the underlying block-device, or a
-                "major:minor" device-number.
-    <offset>: Starting sector within the device.
-
-One or more underlying devices can be specified. The striped device size must
-be a multiple of the chunk size multiplied by the number of underlying devices.
-
-
-Example scripts
-===============
-
-[[
-#!/usr/bin/perl -w
-# Create a striped device across any number of underlying devices. The device
-# will be called "stripe_dev" and have a chunk-size of 128k.
-
-my $chunk_size = 128 * 2;
-my $dev_name = "stripe_dev";
-my $num_devs = @ARGV;
-my @devs = @ARGV;
-my ($min_dev_size, $stripe_dev_size, $i);
-
-if (!$num_devs) {
-        die("Specify at least one device\n");
-}
-
-$min_dev_size = `blockdev --getsz $devs[0]`;
-for ($i = 1; $i < $num_devs; $i++) {
-        my $this_size = `blockdev --getsz $devs[$i]`;
-        $min_dev_size = ($min_dev_size < $this_size) ?
-                        $min_dev_size : $this_size;
-}
-
-$stripe_dev_size = $min_dev_size * $num_devs;
-$stripe_dev_size -= $stripe_dev_size % ($chunk_size * $num_devs);
-
-$table = "0 $stripe_dev_size striped $num_devs $chunk_size";
-for ($i = 0; $i < $num_devs; $i++) {
-        $table .= " $devs[$i] 0";
-}
-
-`echo $table | dmsetup create $dev_name`;
-]]
-
diff --git a/Documentation/device-mapper/switch.rst b/Documentation/device-mapper/switch.rst
new file mode 100644
index 000000000000..7dde06be1a4f
--- /dev/null
+++ b/Documentation/device-mapper/switch.rst
@@ -0,0 +1,141 @@
+=========
+dm-switch
+=========
+
+The device-mapper switch target creates a device that supports an
+arbitrary mapping of fixed-size regions of I/O across a fixed set of
+paths.  The path used for any specific region can be switched
+dynamically by sending the target a message.
+
+It maps I/O to underlying block devices efficiently when there is a large
+number of fixed-sized address regions but there is no simple pattern
+that would allow for a compact representation of the mapping such as
+dm-stripe.
+
+Background
+----------
+
+Dell EqualLogic and some other iSCSI storage arrays use a distributed
+frameless architecture.  In this architecture, the storage group
+consists of a number of distinct storage arrays ("members") each having
+independent controllers, disk storage and network adapters.  When a LUN
+is created it is spread across multiple members.  The details of the
+spreading are hidden from initiators connected to this storage system.
+The storage group exposes a single target discovery portal, no matter
+how many members are being used.  When iSCSI sessions are created, each
+session is connected to an eth port on a single member.  Data to a LUN
+can be sent on any iSCSI session, and if the blocks being accessed are
+stored on another member the I/O will be forwarded as required.  This
+forwarding is invisible to the initiator.  The storage layout is also
+dynamic, and the blocks stored on disk may be moved from member to
+member as needed to balance the load.
+
+This architecture simplifies the management and configuration of both
+the storage group and initiators.  In a multipathing configuration, it
+is possible to set up multiple iSCSI sessions to use multiple network
+interfaces on both the host and target to take advantage of the
+increased network bandwidth.  An initiator could use a simple round
+robin algorithm to send I/O across all paths and let the storage array
+members forward it as necessary, but there is a performance advantage to
+sending data directly to the correct member.
+
+A device-mapper table already lets you map different regions of a
+device onto different targets.  However in this architecture the LUN is
+spread with an address region size on the order of 10s of MBs, which
+means the resulting table could have more than a million entries and
+consume far too much memory.
+
+Using this device-mapper switch target we can now build a two-layer
+device hierarchy:
+
+    Upper Tier - Determine which array member the I/O should be sent to.
+    Lower Tier - Load balance amongst paths to a particular member.
+
+The lower tier consists of a single dm multipath device for each member.
+Each of these multipath devices contains the set of paths directly to
+the array member in one priority group, and leverages existing path
+selectors to load balance amongst these paths.  We also build a
+non-preferred priority group containing paths to other array members for
+failover reasons.
+
+The upper tier consists of a single dm-switch device.  This device uses
+a bitmap to look up the location of the I/O and choose the appropriate
+lower tier device to route the I/O.  By using a bitmap we are able to
+use 4 bits for each address range in a 16 member group (which is very
+large for us).  This is a much denser representation than the dm table
+b-tree can achieve.
+
+Construction Parameters
+=======================
+
+    <num_paths> <region_size> <num_optional_args> [<optional_args>...] [<dev_path> <offset>]+
+	<num_paths>
+	    The number of paths across which to distribute the I/O.
+
+	<region_size>
+	    The number of 512-byte sectors in a region. Each region can be redirected
+	    to any of the available paths.
+
+	<num_optional_args>
+	    The number of optional arguments. Currently, no optional arguments
+	    are supported and so this must be zero.
+
+	<dev_path>
+	    The block device that represents a specific path to the device.
+
+	<offset>
+	    The offset of the start of data on the specific <dev_path> (in units
+	    of 512-byte sectors). This number is added to the sector number when
+	    forwarding the request to the specific path. Typically it is zero.
+
+Messages
+========
+
+set_region_mappings <index>:<path_nr> [<index>]:<path_nr> [<index>]:<path_nr>...
+
+Modify the region table by specifying which regions are redirected to
+which paths.
+
+<index>
+    The region number (region size was specified in constructor parameters).
+    If index is omitted, the next region (previous index + 1) is used.
+    Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+<path_nr>
+    The path number in the range 0 ... (<num_paths> - 1).
+    Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+R<n>,<m>
+    This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
+    are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
+    slots.
+
+Status
+======
+
+No status line is reported.
+
+Example
+=======
+
+Assume that you have volumes vg1/switch0 vg1/switch1 vg1/switch2 with
+the same size.
+
+Create a switch device with 64kB region size::
+
+    dmsetup create switch --table "0 `blockdev --getsz /dev/vg1/switch0`
+	switch 3 128 0 /dev/vg1/switch0 0 /dev/vg1/switch1 0 /dev/vg1/switch2 0"
+
+Set mappings for the first 7 entries to point to devices switch0, switch1,
+switch2, switch0, switch1, switch2, switch1::
+
+    dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1
+
+Set repetitive mapping. This command::
+
+    dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
+
+is equivalent to::
+
+    dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
+	:1 :2 :1 :2 :1 :2 :1 :2 :1 :2
diff --git a/Documentation/device-mapper/switch.txt b/Documentation/device-mapper/switch.txt
deleted file mode 100644
index 5bd4831db4a8..000000000000
--- a/Documentation/device-mapper/switch.txt
+++ /dev/null
@@ -1,138 +0,0 @@
-dm-switch
-=========
-
-The device-mapper switch target creates a device that supports an
-arbitrary mapping of fixed-size regions of I/O across a fixed set of
-paths.  The path used for any specific region can be switched
-dynamically by sending the target a message.
-
-It maps I/O to underlying block devices efficiently when there is a large
-number of fixed-sized address regions but there is no simple pattern
-that would allow for a compact representation of the mapping such as
-dm-stripe.
-
-Background
-----------
-
-Dell EqualLogic and some other iSCSI storage arrays use a distributed
-frameless architecture.  In this architecture, the storage group
-consists of a number of distinct storage arrays ("members") each having
-independent controllers, disk storage and network adapters.  When a LUN
-is created it is spread across multiple members.  The details of the
-spreading are hidden from initiators connected to this storage system.
-The storage group exposes a single target discovery portal, no matter
-how many members are being used.  When iSCSI sessions are created, each
-session is connected to an eth port on a single member.  Data to a LUN
-can be sent on any iSCSI session, and if the blocks being accessed are
-stored on another member the I/O will be forwarded as required.  This
-forwarding is invisible to the initiator.  The storage layout is also
-dynamic, and the blocks stored on disk may be moved from member to
-member as needed to balance the load.
-
-This architecture simplifies the management and configuration of both
-the storage group and initiators.  In a multipathing configuration, it
-is possible to set up multiple iSCSI sessions to use multiple network
-interfaces on both the host and target to take advantage of the
-increased network bandwidth.  An initiator could use a simple round
-robin algorithm to send I/O across all paths and let the storage array
-members forward it as necessary, but there is a performance advantage to
-sending data directly to the correct member.
-
-A device-mapper table already lets you map different regions of a
-device onto different targets.  However in this architecture the LUN is
-spread with an address region size on the order of 10s of MBs, which
-means the resulting table could have more than a million entries and
-consume far too much memory.
-
-Using this device-mapper switch target we can now build a two-layer
-device hierarchy:
-
-    Upper Tier - Determine which array member the I/O should be sent to.
-    Lower Tier - Load balance amongst paths to a particular member.
-
-The lower tier consists of a single dm multipath device for each member.
-Each of these multipath devices contains the set of paths directly to
-the array member in one priority group, and leverages existing path
-selectors to load balance amongst these paths.  We also build a
-non-preferred priority group containing paths to other array members for
-failover reasons.
-
-The upper tier consists of a single dm-switch device.  This device uses
-a bitmap to look up the location of the I/O and choose the appropriate
-lower tier device to route the I/O.  By using a bitmap we are able to
-use 4 bits for each address range in a 16 member group (which is very
-large for us).  This is a much denser representation than the dm table
-b-tree can achieve.
-
-Construction Parameters
-=======================
-
-    <num_paths> <region_size> <num_optional_args> [<optional_args>...]
-    [<dev_path> <offset>]+
-
-<num_paths>
-    The number of paths across which to distribute the I/O.
-
-<region_size>
-    The number of 512-byte sectors in a region. Each region can be redirected
-    to any of the available paths.
-
-<num_optional_args>
-    The number of optional arguments. Currently, no optional arguments
-    are supported and so this must be zero.
-
-<dev_path>
-    The block device that represents a specific path to the device.
-
-<offset>
-    The offset of the start of data on the specific <dev_path> (in units
-    of 512-byte sectors). This number is added to the sector number when
-    forwarding the request to the specific path. Typically it is zero.
-
-Messages
-========
-
-set_region_mappings <index>:<path_nr> [<index>]:<path_nr> [<index>]:<path_nr>...
-
-Modify the region table by specifying which regions are redirected to
-which paths.
-
-<index>
-    The region number (region size was specified in constructor parameters).
-    If index is omitted, the next region (previous index + 1) is used.
-    Expressed in hexadecimal (WITHOUT any prefix like 0x).
-
-<path_nr>
-    The path number in the range 0 ... (<num_paths> - 1).
-    Expressed in hexadecimal (WITHOUT any prefix like 0x).
-
-R<n>,<m>
-    This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
-    are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
-    slots.
-
-Status
-======
-
-No status line is reported.
-
-Example
-=======
-
-Assume that you have volumes vg1/switch0 vg1/switch1 vg1/switch2 with
-the same size.
-
-Create a switch device with 64kB region size:
-    dmsetup create switch --table "0 `blockdev --getsz /dev/vg1/switch0`
-	switch 3 128 0 /dev/vg1/switch0 0 /dev/vg1/switch1 0 /dev/vg1/switch2 0"
-
-Set mappings for the first 7 entries to point to devices switch0, switch1,
-switch2, switch0, switch1, switch2, switch1:
-    dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1
-
-Set repetitive mapping. This command:
-    dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
-is equivalent to:
-    dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
-	:1 :2 :1 :2 :1 :2 :1 :2 :1 :2
-
diff --git a/Documentation/device-mapper/thin-provisioning.rst b/Documentation/device-mapper/thin-provisioning.rst
new file mode 100644
index 000000000000..bafebf79da4b
--- /dev/null
+++ b/Documentation/device-mapper/thin-provisioning.rst
@@ -0,0 +1,427 @@
+=================
+Thin provisioning
+=================
+
+Introduction
+============
+
+This document describes a collection of device-mapper targets that
+between them implement thin-provisioning and snapshots.
+
+The main highlight of this implementation, compared to the previous
+implementation of snapshots, is that it allows many virtual devices to
+be stored on the same data volume.  This simplifies administration and
+allows the sharing of data between volumes, thus reducing disk usage.
+
+Another significant feature is support for an arbitrary depth of
+recursive snapshots (snapshots of snapshots of snapshots ...).  The
+previous implementation of snapshots did this by chaining together
+lookup tables, and so performance was O(depth).  This new
+implementation uses a single data structure to avoid this degradation
+with depth.  Fragmentation may still be an issue, however, in some
+scenarios.
+
+Metadata is stored on a separate device from data, giving the
+administrator some freedom, for example to:
+
+- Improve metadata resilience by storing metadata on a mirrored volume
+  but data on a non-mirrored one.
+
+- Improve performance by storing the metadata on SSD.
+
+Status
+======
+
+These targets are considered safe for production use.  But different use
+cases will have different performance characteristics, for example due
+to fragmentation of the data volume.
+
+If you find this software is not performing as expected please mail
+dm-devel@redhat.com with details and we'll try our best to improve
+things for you.
+
+Userspace tools for checking and repairing the metadata have been fully
+developed and are available as 'thin_check' and 'thin_repair'.  The name
+of the package that provides these utilities varies by distribution (on
+a Red Hat distribution it is named 'device-mapper-persistent-data').
+
+Cookbook
+========
+
+This section describes some quick recipes for using thin provisioning.
+They use the dmsetup program to control the device-mapper driver
+directly.  End users will be advised to use a higher-level volume
+manager such as LVM2 once support has been added.
+
+Pool device
+-----------
+
+The pool device ties together the metadata volume and the data volume.
+It maps I/O linearly to the data volume and updates the metadata via
+two mechanisms:
+
+- Function calls from the thin targets
+
+- Device-mapper 'messages' from userspace which control the creation of new
+  virtual devices amongst other things.
+
+Setting up a fresh pool device
+------------------------------
+
+Setting up a pool device requires a valid metadata device, and a
+data device.  If you do not have an existing metadata device you can
+make one by zeroing the first 4k to indicate empty metadata.
+
+    dd if=/dev/zero of=$metadata_dev bs=4096 count=1
+
+The amount of metadata you need will vary according to how many blocks
+are shared between thin devices (i.e. through snapshots).  If you have
+less sharing than average you'll need a larger-than-average metadata device.
+
+As a guide, we suggest you calculate the number of bytes to use in the
+metadata device as 48 * $data_dev_size / $data_block_size but round it up
+to 2MB if the answer is smaller.  If you're creating large numbers of
+snapshots which are recording large amounts of change, you may find you
+need to increase this.
+
+The largest size supported is 16GB: If the device is larger,
+a warning will be issued and the excess space will not be used.
+
+Reloading a pool table
+----------------------
+
+You may reload a pool's table, indeed this is how the pool is resized
+if it runs out of space.  (N.B. While specifying a different metadata
+device when reloading is not forbidden at the moment, things will go
+wrong if it does not route I/O to exactly the same on-disk location as
+previously.)
+
+Using an existing pool device
+-----------------------------
+
+::
+
+    dmsetup create pool \
+	--table "0 20971520 thin-pool $metadata_dev $data_dev \
+		 $data_block_size $low_water_mark"
+
+$data_block_size gives the smallest unit of disk space that can be
+allocated at a time expressed in units of 512-byte sectors.
+$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a
+multiple of 128 (64KB).  $data_block_size cannot be changed after the
+thin-pool is created.  People primarily interested in thin provisioning
+may want to use a value such as 1024 (512KB).  People doing lots of
+snapshotting may want a smaller value such as 128 (64KB).  If you are
+not zeroing newly-allocated data, a larger $data_block_size in the
+region of 256000 (128MB) is suggested.
+
+$low_water_mark is expressed in blocks of size $data_block_size.  If
+free space on the data device drops below this level then a dm event
+will be triggered which a userspace daemon should catch allowing it to
+extend the pool device.  Only one such event will be sent.
+
+No special event is triggered if a just resumed device's free space is below
+the low water mark. However, resuming a device always triggers an
+event; a userspace daemon should verify that free space exceeds the low
+water mark when handling this event.
+
+A low water mark for the metadata device is maintained in the kernel and
+will trigger a dm event if free space on the metadata device drops below
+it.
+
+Updating on-disk metadata
+-------------------------
+
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second.  This
+means the thin-provisioning target behaves like a physical disk that has
+a volatile write cache.  If power is lost you may lose some recent
+writes.  The metadata should always be consistent in spite of any crash.
+
+If data space is exhausted the pool will either error or queue IO
+according to the configuration (see: error_if_no_space).  If metadata
+space is exhausted or a metadata operation fails: the pool will error IO
+until the pool is taken offline and repair is performed to 1) fix any
+potential inconsistencies and 2) clear the flag that imposes repair.
+Once the pool's metadata device is repaired it may be resized, which
+will allow the pool to return to normal operation.  Note that if a pool
+is flagged as needing repair, the pool's data and metadata devices
+cannot be resized until repair is performed.  It should also be noted
+that when the pool's metadata space is exhausted the current metadata
+transaction is aborted.  Given that the pool will cache IO whose
+completion may have already been acknowledged to upper IO layers
+(e.g. filesystem) it is strongly suggested that consistency checks
+(e.g. fsck) be performed on those layers when repair of the pool is
+required.
+
+Thin provisioning
+-----------------
+
+i) Creating a new thinly-provisioned volume.
+
+  To create a new thinly- provisioned volume you must send a message to an
+  active pool device, /dev/mapper/pool in this example::
+
+    dmsetup message /dev/mapper/pool 0 "create_thin 0"
+
+  Here '0' is an identifier for the volume, a 24-bit number.  It's up
+  to the caller to allocate and manage these identifiers.  If the
+  identifier is already in use, the message will fail with -EEXIST.
+
+ii) Using a thinly-provisioned volume.
+
+  Thinly-provisioned volumes are activated using the 'thin' target::
+
+    dmsetup create thin --table "0 2097152 thin /dev/mapper/pool 0"
+
+  The last parameter is the identifier for the thinp device.
+
+Internal snapshots
+------------------
+
+i) Creating an internal snapshot.
+
+  Snapshots are created with another message to the pool.
+
+  N.B.  If the origin device that you wish to snapshot is active, you
+  must suspend it before creating the snapshot to avoid corruption.
+  This is NOT enforced at the moment, so please be careful!
+
+  ::
+
+    dmsetup suspend /dev/mapper/thin
+    dmsetup message /dev/mapper/pool 0 "create_snap 1 0"
+    dmsetup resume /dev/mapper/thin
+
+  Here '1' is the identifier for the volume, a 24-bit number.  '0' is the
+  identifier for the origin device.
+
+ii) Using an internal snapshot.
+
+  Once created, the user doesn't have to worry about any connection
+  between the origin and the snapshot.  Indeed the snapshot is no
+  different from any other thinly-provisioned device and can be
+  snapshotted itself via the same method.  It's perfectly legal to
+  have only one of them active, and there's no ordering requirement on
+  activating or removing them both.  (This differs from conventional
+  device-mapper snapshots.)
+
+  Activate it exactly the same way as any other thinly-provisioned volume::
+
+    dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"
+
+External snapshots
+------------------
+
+You can use an external **read only** device as an origin for a
+thinly-provisioned volume.  Any read to an unprovisioned area of the
+thin device will be passed through to the origin.  Writes trigger
+the allocation of new blocks as usual.
+
+One use case for this is VM hosts that want to run guests on
+thinly-provisioned volumes but have the base image on another device
+(possibly shared between many VMs).
+
+You must not write to the origin device if you use this technique!
+Of course, you may write to the thin device and take internal snapshots
+of the thin volume.
+
+i) Creating a snapshot of an external device
+
+  This is the same as creating a thin device.
+  You don't mention the origin at this stage.
+
+  ::
+
+    dmsetup message /dev/mapper/pool 0 "create_thin 0"
+
+ii) Using a snapshot of an external device.
+
+  Append an extra parameter to the thin target specifying the origin::
+
+    dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 0 /dev/image"
+
+  N.B. All descendants (internal snapshots) of this snapshot require the
+  same extra origin parameter.
+
+Deactivation
+------------
+
+All devices using a pool must be deactivated before the pool itself
+can be.
+
+::
+
+    dmsetup remove thin
+    dmsetup remove snap
+    dmsetup remove pool
+
+Reference
+=========
+
+'thin-pool' target
+------------------
+
+i) Constructor
+
+    ::
+
+      thin-pool <metadata dev> <data dev> <data block size (sectors)> \
+	        <low water mark (blocks)> [<number of feature args> [<arg>]*]
+
+    Optional feature arguments:
+
+      skip_block_zeroing:
+	Skip the zeroing of newly-provisioned blocks.
+
+      ignore_discard:
+	Disable discard support.
+
+      no_discard_passdown:
+	Don't pass discards down to the underlying
+	data device, but just remove the mapping.
+
+      read_only:
+		 Don't allow any changes to be made to the pool
+		 metadata.  This mode is only available after the
+		 thin-pool has been created and first used in full
+		 read/write mode.  It cannot be specified on initial
+		 thin-pool creation.
+
+      error_if_no_space:
+	Error IOs, instead of queueing, if no space.
+
+    Data block size must be between 64KB (128 sectors) and 1GB
+    (2097152 sectors) inclusive.
+
+
+ii) Status
+
+    ::
+
+      <transaction id> <used metadata blocks>/<total metadata blocks>
+      <used data blocks>/<total data blocks> <held metadata root>
+      ro|rw|out_of_data_space [no_]discard_passdown [error|queue]_if_no_space
+      needs_check|- metadata_low_watermark
+
+    transaction id:
+	A 64-bit number used by userspace to help synchronise with metadata
+	from volume managers.
+
+    used data blocks / total data blocks
+	If the number of free blocks drops below the pool's low water mark a
+	dm event will be sent to userspace.  This event is edge-triggered and
+	it will occur only once after each resume so volume manager writers
+	should register for the event and then check the target's status.
+
+    held metadata root:
+	The location, in blocks, of the metadata root that has been
+	'held' for userspace read access.  '-' indicates there is no
+	held root.
+
+    discard_passdown|no_discard_passdown
+	Whether or not discards are actually being passed down to the
+	underlying device.  When this is enabled when loading the table,
+	it can get disabled if the underlying device doesn't support it.
+
+    ro|rw|out_of_data_space
+	If the pool encounters certain types of device failures it will
+	drop into a read-only metadata mode in which no changes to
+	the pool metadata (like allocating new blocks) are permitted.
+
+	In serious cases where even a read-only mode is deemed unsafe
+	no further I/O will be permitted and the status will just
+	contain the string 'Fail'.  The userspace recovery tools
+	should then be used.
+
+    error_if_no_space|queue_if_no_space
+	If the pool runs out of data or metadata space, the pool will
+	either queue or error the IO destined to the data device.  The
+	default is to queue the IO until more space is added or the
+	'no_space_timeout' expires.  The 'no_space_timeout' dm-thin-pool
+	module parameter can be used to change this timeout -- it
+	defaults to 60 seconds but may be disabled using a value of 0.
+
+    needs_check
+	A metadata operation has failed, resulting in the needs_check
+	flag being set in the metadata's superblock.  The metadata
+	device must be deactivated and checked/repaired before the
+	thin-pool can be made fully operational again.  '-' indicates
+	needs_check is not set.
+
+    metadata_low_watermark:
+	Value of metadata low watermark in blocks.  The kernel sets this
+	value internally but userspace needs to know this value to
+	determine if an event was caused by crossing this threshold.
+
+iii) Messages
+
+    create_thin <dev id>
+	Create a new thinly-provisioned device.
+	<dev id> is an arbitrary unique 24-bit identifier chosen by
+	the caller.
+
+    create_snap <dev id> <origin id>
+	Create a new snapshot of another thinly-provisioned device.
+	<dev id> is an arbitrary unique 24-bit identifier chosen by
+	the caller.
+	<origin id> is the identifier of the thinly-provisioned device
+	of which the new device will be a snapshot.
+
+    delete <dev id>
+	Deletes a thin device.  Irreversible.
+
+    set_transaction_id <current id> <new id>
+	Userland volume managers, such as LVM, need a way to
+	synchronise their external metadata with the internal metadata of the
+	pool target.  The thin-pool target offers to store an
+	arbitrary 64-bit transaction id and return it on the target's
+	status line.  To avoid races you must provide what you think
+	the current transaction id is when you change it with this
+	compare-and-swap message.
+
+    reserve_metadata_snap
+        Reserve a copy of the data mapping btree for use by userland.
+        This allows userland to inspect the mappings as they were when
+        this message was executed.  Use the pool's status command to
+        get the root block associated with the metadata snapshot.
+
+    release_metadata_snap
+        Release a previously reserved copy of the data mapping btree.
+
+'thin' target
+-------------
+
+i) Constructor
+
+    ::
+
+        thin <pool dev> <dev id> [<external origin dev>]
+
+    pool dev:
+	the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
+
+    dev id:
+	the internal device identifier of the device to be
+	activated.
+
+    external origin dev:
+	an optional block device outside the pool to be treated as a
+	read-only snapshot origin: reads to unprovisioned areas of the
+	thin target will be mapped to this device.
+
+The pool doesn't store any size against the thin devices.  If you
+load a thin target that is smaller than you've been using previously,
+then you'll have no access to blocks mapped beyond the end.  If you
+load a target that is bigger than before, then extra blocks will be
+provisioned as and when needed.
+
+ii) Status
+
+    <nr mapped sectors> <highest mapped sector>
+	If the pool has encountered device errors and failed, the status
+	will just contain the string 'Fail'.  The userspace recovery
+	tools should then be used.
+
+    In the case where <nr mapped sectors> is 0, there is no highest
+    mapped sector and the value of <highest mapped sector> is unspecified.
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
deleted file mode 100644
index 883e7ca5f745..000000000000
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ /dev/null
@@ -1,411 +0,0 @@
-Introduction
-============
-
-This document describes a collection of device-mapper targets that
-between them implement thin-provisioning and snapshots.
-
-The main highlight of this implementation, compared to the previous
-implementation of snapshots, is that it allows many virtual devices to
-be stored on the same data volume.  This simplifies administration and
-allows the sharing of data between volumes, thus reducing disk usage.
-
-Another significant feature is support for an arbitrary depth of
-recursive snapshots (snapshots of snapshots of snapshots ...).  The
-previous implementation of snapshots did this by chaining together
-lookup tables, and so performance was O(depth).  This new
-implementation uses a single data structure to avoid this degradation
-with depth.  Fragmentation may still be an issue, however, in some
-scenarios.
-
-Metadata is stored on a separate device from data, giving the
-administrator some freedom, for example to:
-
-- Improve metadata resilience by storing metadata on a mirrored volume
-  but data on a non-mirrored one.
-
-- Improve performance by storing the metadata on SSD.
-
-Status
-======
-
-These targets are considered safe for production use.  But different use
-cases will have different performance characteristics, for example due
-to fragmentation of the data volume.
-
-If you find this software is not performing as expected please mail
-dm-devel@redhat.com with details and we'll try our best to improve
-things for you.
-
-Userspace tools for checking and repairing the metadata have been fully
-developed and are available as 'thin_check' and 'thin_repair'.  The name
-of the package that provides these utilities varies by distribution (on
-a Red Hat distribution it is named 'device-mapper-persistent-data').
-
-Cookbook
-========
-
-This section describes some quick recipes for using thin provisioning.
-They use the dmsetup program to control the device-mapper driver
-directly.  End users will be advised to use a higher-level volume
-manager such as LVM2 once support has been added.
-
-Pool device
------------
-
-The pool device ties together the metadata volume and the data volume.
-It maps I/O linearly to the data volume and updates the metadata via
-two mechanisms:
-
-- Function calls from the thin targets
-
-- Device-mapper 'messages' from userspace which control the creation of new
-  virtual devices amongst other things.
-
-Setting up a fresh pool device
-------------------------------
-
-Setting up a pool device requires a valid metadata device, and a
-data device.  If you do not have an existing metadata device you can
-make one by zeroing the first 4k to indicate empty metadata.
-
-    dd if=/dev/zero of=$metadata_dev bs=4096 count=1
-
-The amount of metadata you need will vary according to how many blocks
-are shared between thin devices (i.e. through snapshots).  If you have
-less sharing than average you'll need a larger-than-average metadata device.
-
-As a guide, we suggest you calculate the number of bytes to use in the
-metadata device as 48 * $data_dev_size / $data_block_size but round it up
-to 2MB if the answer is smaller.  If you're creating large numbers of
-snapshots which are recording large amounts of change, you may find you
-need to increase this.
-
-The largest size supported is 16GB: If the device is larger,
-a warning will be issued and the excess space will not be used.
-
-Reloading a pool table
-----------------------
-
-You may reload a pool's table, indeed this is how the pool is resized
-if it runs out of space.  (N.B. While specifying a different metadata
-device when reloading is not forbidden at the moment, things will go
-wrong if it does not route I/O to exactly the same on-disk location as
-previously.)
-
-Using an existing pool device
------------------------------
-
-    dmsetup create pool \
-	--table "0 20971520 thin-pool $metadata_dev $data_dev \
-		 $data_block_size $low_water_mark"
-
-$data_block_size gives the smallest unit of disk space that can be
-allocated at a time expressed in units of 512-byte sectors.
-$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a
-multiple of 128 (64KB).  $data_block_size cannot be changed after the
-thin-pool is created.  People primarily interested in thin provisioning
-may want to use a value such as 1024 (512KB).  People doing lots of
-snapshotting may want a smaller value such as 128 (64KB).  If you are
-not zeroing newly-allocated data, a larger $data_block_size in the
-region of 256000 (128MB) is suggested.
-
-$low_water_mark is expressed in blocks of size $data_block_size.  If
-free space on the data device drops below this level then a dm event
-will be triggered which a userspace daemon should catch allowing it to
-extend the pool device.  Only one such event will be sent.
-
-No special event is triggered if a just resumed device's free space is below
-the low water mark. However, resuming a device always triggers an
-event; a userspace daemon should verify that free space exceeds the low
-water mark when handling this event.
-
-A low water mark for the metadata device is maintained in the kernel and
-will trigger a dm event if free space on the metadata device drops below
-it.
-
-Updating on-disk metadata
--------------------------
-
-On-disk metadata is committed every time a FLUSH or FUA bio is written.
-If no such requests are made then commits will occur every second.  This
-means the thin-provisioning target behaves like a physical disk that has
-a volatile write cache.  If power is lost you may lose some recent
-writes.  The metadata should always be consistent in spite of any crash.
-
-If data space is exhausted the pool will either error or queue IO
-according to the configuration (see: error_if_no_space).  If metadata
-space is exhausted or a metadata operation fails: the pool will error IO
-until the pool is taken offline and repair is performed to 1) fix any
-potential inconsistencies and 2) clear the flag that imposes repair.
-Once the pool's metadata device is repaired it may be resized, which
-will allow the pool to return to normal operation.  Note that if a pool
-is flagged as needing repair, the pool's data and metadata devices
-cannot be resized until repair is performed.  It should also be noted
-that when the pool's metadata space is exhausted the current metadata
-transaction is aborted.  Given that the pool will cache IO whose
-completion may have already been acknowledged to upper IO layers
-(e.g. filesystem) it is strongly suggested that consistency checks
-(e.g. fsck) be performed on those layers when repair of the pool is
-required.
-
-Thin provisioning
------------------
-
-i) Creating a new thinly-provisioned volume.
-
-  To create a new thinly- provisioned volume you must send a message to an
-  active pool device, /dev/mapper/pool in this example.
-
-    dmsetup message /dev/mapper/pool 0 "create_thin 0"
-
-  Here '0' is an identifier for the volume, a 24-bit number.  It's up
-  to the caller to allocate and manage these identifiers.  If the
-  identifier is already in use, the message will fail with -EEXIST.
-
-ii) Using a thinly-provisioned volume.
-
-  Thinly-provisioned volumes are activated using the 'thin' target:
-
-    dmsetup create thin --table "0 2097152 thin /dev/mapper/pool 0"
-
-  The last parameter is the identifier for the thinp device.
-
-Internal snapshots
-------------------
-
-i) Creating an internal snapshot.
-
-  Snapshots are created with another message to the pool.
-
-  N.B.  If the origin device that you wish to snapshot is active, you
-  must suspend it before creating the snapshot to avoid corruption.
-  This is NOT enforced at the moment, so please be careful!
-
-    dmsetup suspend /dev/mapper/thin
-    dmsetup message /dev/mapper/pool 0 "create_snap 1 0"
-    dmsetup resume /dev/mapper/thin
-
-  Here '1' is the identifier for the volume, a 24-bit number.  '0' is the
-  identifier for the origin device.
-
-ii) Using an internal snapshot.
-
-  Once created, the user doesn't have to worry about any connection
-  between the origin and the snapshot.  Indeed the snapshot is no
-  different from any other thinly-provisioned device and can be
-  snapshotted itself via the same method.  It's perfectly legal to
-  have only one of them active, and there's no ordering requirement on
-  activating or removing them both.  (This differs from conventional
-  device-mapper snapshots.)
-
-  Activate it exactly the same way as any other thinly-provisioned volume:
-
-    dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"
-
-External snapshots
-------------------
-
-You can use an external _read only_ device as an origin for a
-thinly-provisioned volume.  Any read to an unprovisioned area of the
-thin device will be passed through to the origin.  Writes trigger
-the allocation of new blocks as usual.
-
-One use case for this is VM hosts that want to run guests on
-thinly-provisioned volumes but have the base image on another device
-(possibly shared between many VMs).
-
-You must not write to the origin device if you use this technique!
-Of course, you may write to the thin device and take internal snapshots
-of the thin volume.
-
-i) Creating a snapshot of an external device
-
-  This is the same as creating a thin device.
-  You don't mention the origin at this stage.
-
-    dmsetup message /dev/mapper/pool 0 "create_thin 0"
-
-ii) Using a snapshot of an external device.
-
-  Append an extra parameter to the thin target specifying the origin:
-
-    dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 0 /dev/image"
-
-  N.B. All descendants (internal snapshots) of this snapshot require the
-  same extra origin parameter.
-
-Deactivation
-------------
-
-All devices using a pool must be deactivated before the pool itself
-can be.
-
-    dmsetup remove thin
-    dmsetup remove snap
-    dmsetup remove pool
-
-Reference
-=========
-
-'thin-pool' target
-------------------
-
-i) Constructor
-
-    thin-pool <metadata dev> <data dev> <data block size (sectors)> \
-	      <low water mark (blocks)> [<number of feature args> [<arg>]*]
-
-    Optional feature arguments:
-
-      skip_block_zeroing: Skip the zeroing of newly-provisioned blocks.
-
-      ignore_discard: Disable discard support.
-
-      no_discard_passdown: Don't pass discards down to the underlying
-			   data device, but just remove the mapping.
-
-      read_only: Don't allow any changes to be made to the pool
-		 metadata.  This mode is only available after the
-		 thin-pool has been created and first used in full
-		 read/write mode.  It cannot be specified on initial
-		 thin-pool creation.
-
-      error_if_no_space: Error IOs, instead of queueing, if no space.
-
-    Data block size must be between 64KB (128 sectors) and 1GB
-    (2097152 sectors) inclusive.
-
-
-ii) Status
-
-    <transaction id> <used metadata blocks>/<total metadata blocks>
-    <used data blocks>/<total data blocks> <held metadata root>
-    ro|rw|out_of_data_space [no_]discard_passdown [error|queue]_if_no_space
-    needs_check|- metadata_low_watermark
-
-    transaction id:
-	A 64-bit number used by userspace to help synchronise with metadata
-	from volume managers.
-
-    used data blocks / total data blocks
-	If the number of free blocks drops below the pool's low water mark a
-	dm event will be sent to userspace.  This event is edge-triggered and
-	it will occur only once after each resume so volume manager writers
-	should register for the event and then check the target's status.
-
-    held metadata root:
-	The location, in blocks, of the metadata root that has been
-	'held' for userspace read access.  '-' indicates there is no
-	held root.
-
-    discard_passdown|no_discard_passdown
-	Whether or not discards are actually being passed down to the
-	underlying device.  When this is enabled when loading the table,
-	it can get disabled if the underlying device doesn't support it.
-
-    ro|rw|out_of_data_space
-	If the pool encounters certain types of device failures it will
-	drop into a read-only metadata mode in which no changes to
-	the pool metadata (like allocating new blocks) are permitted.
-
-	In serious cases where even a read-only mode is deemed unsafe
-	no further I/O will be permitted and the status will just
-	contain the string 'Fail'.  The userspace recovery tools
-	should then be used.
-
-    error_if_no_space|queue_if_no_space
-	If the pool runs out of data or metadata space, the pool will
-	either queue or error the IO destined to the data device.  The
-	default is to queue the IO until more space is added or the
-	'no_space_timeout' expires.  The 'no_space_timeout' dm-thin-pool
-	module parameter can be used to change this timeout -- it
-	defaults to 60 seconds but may be disabled using a value of 0.
-
-    needs_check
-	A metadata operation has failed, resulting in the needs_check
-	flag being set in the metadata's superblock.  The metadata
-	device must be deactivated and checked/repaired before the
-	thin-pool can be made fully operational again.  '-' indicates
-	needs_check is not set.
-
-    metadata_low_watermark:
-	Value of metadata low watermark in blocks.  The kernel sets this
-	value internally but userspace needs to know this value to
-	determine if an event was caused by crossing this threshold.
-
-iii) Messages
-
-    create_thin <dev id>
-
-	Create a new thinly-provisioned device.
-	<dev id> is an arbitrary unique 24-bit identifier chosen by
-	the caller.
-
-    create_snap <dev id> <origin id>
-
-	Create a new snapshot of another thinly-provisioned device.
-	<dev id> is an arbitrary unique 24-bit identifier chosen by
-	the caller.
-	<origin id> is the identifier of the thinly-provisioned device
-	of which the new device will be a snapshot.
-
-    delete <dev id>
-
-	Deletes a thin device.  Irreversible.
-
-    set_transaction_id <current id> <new id>
-
-	Userland volume managers, such as LVM, need a way to
-	synchronise their external metadata with the internal metadata of the
-	pool target.  The thin-pool target offers to store an
-	arbitrary 64-bit transaction id and return it on the target's
-	status line.  To avoid races you must provide what you think
-	the current transaction id is when you change it with this
-	compare-and-swap message.
-
-    reserve_metadata_snap
-
-        Reserve a copy of the data mapping btree for use by userland.
-        This allows userland to inspect the mappings as they were when
-        this message was executed.  Use the pool's status command to
-        get the root block associated with the metadata snapshot.
-
-    release_metadata_snap
-
-        Release a previously reserved copy of the data mapping btree.
-
-'thin' target
--------------
-
-i) Constructor
-
-    thin <pool dev> <dev id> [<external origin dev>]
-
-    pool dev:
-	the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
-
-    dev id:
-	the internal device identifier of the device to be
-	activated.
-
-    external origin dev:
-	an optional block device outside the pool to be treated as a
-	read-only snapshot origin: reads to unprovisioned areas of the
-	thin target will be mapped to this device.
-
-The pool doesn't store any size against the thin devices.  If you
-load a thin target that is smaller than you've been using previously,
-then you'll have no access to blocks mapped beyond the end.  If you
-load a target that is bigger than before, then extra blocks will be
-provisioned as and when needed.
-
-ii) Status
-
-     <nr mapped sectors> <highest mapped sector>
-
-	If the pool has encountered device errors and failed, the status
-	will just contain the string 'Fail'.  The userspace recovery
-	tools should then be used.
-
-    In the case where <nr mapped sectors> is 0, there is no highest
-    mapped sector and the value of <highest mapped sector> is unspecified.
diff --git a/Documentation/device-mapper/unstriped.rst b/Documentation/device-mapper/unstriped.rst
new file mode 100644
index 000000000000..0a8d3eb3f072
--- /dev/null
+++ b/Documentation/device-mapper/unstriped.rst
@@ -0,0 +1,135 @@
+================================
+Device-mapper "unstriped" target
+================================
+
+Introduction
+============
+
+The device-mapper "unstriped" target provides a transparent mechanism to
+unstripe a device-mapper "striped" target to access the underlying disks
+without having to touch the true backing block-device.  It can also be
+used to unstripe a hardware RAID-0 to access backing disks.
+
+Parameters:
+<number of stripes> <chunk size> <stripe #> <dev_path> <offset>
+
+<number of stripes>
+        The number of stripes in the RAID 0.
+
+<chunk size>
+	The amount of 512B sectors in the chunk striping.
+
+<dev_path>
+	The block device you wish to unstripe.
+
+<stripe #>
+        The stripe number within the device that corresponds to physical
+        drive you wish to unstripe.  This must be 0 indexed.
+
+
+Why use this module?
+====================
+
+An example of undoing an existing dm-stripe
+-------------------------------------------
+
+This small bash script will setup 4 loop devices and use the existing
+striped target to combine the 4 devices into one.  It then will use
+the unstriped target ontop of the striped device to access the
+individual backing loop devices.  We write data to the newly exposed
+unstriped devices and verify the data written matches the correct
+underlying device on the striped array::
+
+  #!/bin/bash
+
+  MEMBER_SIZE=$((128 * 1024 * 1024))
+  NUM=4
+  SEQ_END=$((${NUM}-1))
+  CHUNK=256
+  BS=4096
+
+  RAID_SIZE=$((${MEMBER_SIZE}*${NUM}/512))
+  DM_PARMS="0 ${RAID_SIZE} striped ${NUM} ${CHUNK}"
+  COUNT=$((${MEMBER_SIZE} / ${BS}))
+
+  for i in $(seq 0 ${SEQ_END}); do
+    dd if=/dev/zero of=member-${i} bs=${MEMBER_SIZE} count=1 oflag=direct
+    losetup /dev/loop${i} member-${i}
+    DM_PARMS+=" /dev/loop${i} 0"
+  done
+
+  echo $DM_PARMS | dmsetup create raid0
+  for i in $(seq 0 ${SEQ_END}); do
+    echo "0 1 unstriped ${NUM} ${CHUNK} ${i} /dev/mapper/raid0 0" | dmsetup create set-${i}
+  done;
+
+  for i in $(seq 0 ${SEQ_END}); do
+    dd if=/dev/urandom of=/dev/mapper/set-${i} bs=${BS} count=${COUNT} oflag=direct
+    diff /dev/mapper/set-${i} member-${i}
+  done;
+
+  for i in $(seq 0 ${SEQ_END}); do
+    dmsetup remove set-${i}
+  done
+
+  dmsetup remove raid0
+
+  for i in $(seq 0 ${SEQ_END}); do
+    losetup -d /dev/loop${i}
+    rm -f member-${i}
+  done
+
+Another example
+---------------
+
+Intel NVMe drives contain two cores on the physical device.
+Each core of the drive has segregated access to its LBA range.
+The current LBA model has a RAID 0 128k chunk on each core, resulting
+in a 256k stripe across the two cores::
+
+   Core 0:       Core 1:
+  __________    __________
+  | LBA 512|    | LBA 768|
+  | LBA 0  |    | LBA 256|
+  ----------    ----------
+
+The purpose of this unstriping is to provide better QoS in noisy
+neighbor environments. When two partitions are created on the
+aggregate drive without this unstriping, reads on one partition
+can affect writes on another partition.  This is because the partitions
+are striped across the two cores.  When we unstripe this hardware RAID 0
+and make partitions on each new exposed device the two partitions are now
+physically separated.
+
+With the dm-unstriped target we're able to segregate an fio script that
+has read and write jobs that are independent of each other.  Compared to
+when we run the test on a combined drive with partitions, we were able
+to get a 92% reduction in read latency using this device mapper target.
+
+
+Example dmsetup usage
+=====================
+
+unstriped ontop of Intel NVMe device that has 2 cores
+-----------------------------------------------------
+
+::
+
+  dmsetup create nvmset0 --table '0 512 unstriped 2 256 0 /dev/nvme0n1 0'
+  dmsetup create nvmset1 --table '0 512 unstriped 2 256 1 /dev/nvme0n1 0'
+
+There will now be two devices that expose Intel NVMe core 0 and 1
+respectively::
+
+  /dev/mapper/nvmset0
+  /dev/mapper/nvmset1
+
+unstriped ontop of striped with 4 drives using 128K chunk size
+--------------------------------------------------------------
+
+::
+
+  dmsetup create raid_disk0 --table '0 512 unstriped 4 256 0 /dev/mapper/striped 0'
+  dmsetup create raid_disk1 --table '0 512 unstriped 4 256 1 /dev/mapper/striped 0'
+  dmsetup create raid_disk2 --table '0 512 unstriped 4 256 2 /dev/mapper/striped 0'
+  dmsetup create raid_disk3 --table '0 512 unstriped 4 256 3 /dev/mapper/striped 0'
diff --git a/Documentation/device-mapper/unstriped.txt b/Documentation/device-mapper/unstriped.txt
deleted file mode 100644
index 0b2a306c54ee..000000000000
--- a/Documentation/device-mapper/unstriped.txt
+++ /dev/null
@@ -1,124 +0,0 @@
-Introduction
-============
-
-The device-mapper "unstriped" target provides a transparent mechanism to
-unstripe a device-mapper "striped" target to access the underlying disks
-without having to touch the true backing block-device.  It can also be
-used to unstripe a hardware RAID-0 to access backing disks.
-
-Parameters:
-<number of stripes> <chunk size> <stripe #> <dev_path> <offset>
-
-<number of stripes>
-        The number of stripes in the RAID 0.
-
-<chunk size>
-	The amount of 512B sectors in the chunk striping.
-
-<dev_path>
-	The block device you wish to unstripe.
-
-<stripe #>
-        The stripe number within the device that corresponds to physical
-        drive you wish to unstripe.  This must be 0 indexed.
-
-
-Why use this module?
-====================
-
-An example of undoing an existing dm-stripe
--------------------------------------------
-
-This small bash script will setup 4 loop devices and use the existing
-striped target to combine the 4 devices into one.  It then will use
-the unstriped target ontop of the striped device to access the
-individual backing loop devices.  We write data to the newly exposed
-unstriped devices and verify the data written matches the correct
-underlying device on the striped array.
-
-#!/bin/bash
-
-MEMBER_SIZE=$((128 * 1024 * 1024))
-NUM=4
-SEQ_END=$((${NUM}-1))
-CHUNK=256
-BS=4096
-
-RAID_SIZE=$((${MEMBER_SIZE}*${NUM}/512))
-DM_PARMS="0 ${RAID_SIZE} striped ${NUM} ${CHUNK}"
-COUNT=$((${MEMBER_SIZE} / ${BS}))
-
-for i in $(seq 0 ${SEQ_END}); do
-  dd if=/dev/zero of=member-${i} bs=${MEMBER_SIZE} count=1 oflag=direct
-  losetup /dev/loop${i} member-${i}
-  DM_PARMS+=" /dev/loop${i} 0"
-done
-
-echo $DM_PARMS | dmsetup create raid0
-for i in $(seq 0 ${SEQ_END}); do
-  echo "0 1 unstriped ${NUM} ${CHUNK} ${i} /dev/mapper/raid0 0" | dmsetup create set-${i}
-done;
-
-for i in $(seq 0 ${SEQ_END}); do
-  dd if=/dev/urandom of=/dev/mapper/set-${i} bs=${BS} count=${COUNT} oflag=direct
-  diff /dev/mapper/set-${i} member-${i}
-done;
-
-for i in $(seq 0 ${SEQ_END}); do
-  dmsetup remove set-${i}
-done
-
-dmsetup remove raid0
-
-for i in $(seq 0 ${SEQ_END}); do
-  losetup -d /dev/loop${i}
-  rm -f member-${i}
-done
-
-Another example
----------------
-
-Intel NVMe drives contain two cores on the physical device.
-Each core of the drive has segregated access to its LBA range.
-The current LBA model has a RAID 0 128k chunk on each core, resulting
-in a 256k stripe across the two cores:
-
-   Core 0:       Core 1:
-  __________    __________
-  | LBA 512|    | LBA 768|
-  | LBA 0  |    | LBA 256|
-  ----------    ----------
-
-The purpose of this unstriping is to provide better QoS in noisy
-neighbor environments. When two partitions are created on the
-aggregate drive without this unstriping, reads on one partition
-can affect writes on another partition.  This is because the partitions
-are striped across the two cores.  When we unstripe this hardware RAID 0
-and make partitions on each new exposed device the two partitions are now
-physically separated.
-
-With the dm-unstriped target we're able to segregate an fio script that
-has read and write jobs that are independent of each other.  Compared to
-when we run the test on a combined drive with partitions, we were able
-to get a 92% reduction in read latency using this device mapper target.
-
-
-Example dmsetup usage
-=====================
-
-unstriped ontop of Intel NVMe device that has 2 cores
------------------------------------------------------
-dmsetup create nvmset0 --table '0 512 unstriped 2 256 0 /dev/nvme0n1 0'
-dmsetup create nvmset1 --table '0 512 unstriped 2 256 1 /dev/nvme0n1 0'
-
-There will now be two devices that expose Intel NVMe core 0 and 1
-respectively:
-/dev/mapper/nvmset0
-/dev/mapper/nvmset1
-
-unstriped ontop of striped with 4 drives using 128K chunk size
---------------------------------------------------------------
-dmsetup create raid_disk0 --table '0 512 unstriped 4 256 0 /dev/mapper/striped 0'
-dmsetup create raid_disk1 --table '0 512 unstriped 4 256 1 /dev/mapper/striped 0'
-dmsetup create raid_disk2 --table '0 512 unstriped 4 256 2 /dev/mapper/striped 0'
-dmsetup create raid_disk3 --table '0 512 unstriped 4 256 3 /dev/mapper/striped 0'
diff --git a/Documentation/device-mapper/verity.rst b/Documentation/device-mapper/verity.rst
new file mode 100644
index 000000000000..a4d1c1476d72
--- /dev/null
+++ b/Documentation/device-mapper/verity.rst
@@ -0,0 +1,229 @@
+=========
+dm-verity
+=========
+
+Device-Mapper's "verity" target provides transparent integrity checking of
+block devices using a cryptographic digest provided by the kernel crypto API.
+This target is read-only.
+
+Construction Parameters
+=======================
+
+::
+
+    <version> <dev> <hash_dev>
+    <data_block_size> <hash_block_size>
+    <num_data_blocks> <hash_start_block>
+    <algorithm> <digest> <salt>
+    [<#opt_params> <opt_params>]
+
+<version>
+    This is the type of the on-disk hash format.
+
+    0 is the original format used in the Chromium OS.
+      The salt is appended when hashing, digests are stored continuously and
+      the rest of the block is padded with zeroes.
+
+    1 is the current format that should be used for new devices.
+      The salt is prepended when hashing and each digest is
+      padded with zeroes to the power of two.
+
+<dev>
+    This is the device containing data, the integrity of which needs to be
+    checked.  It may be specified as a path, like /dev/sdaX, or a device number,
+    <major>:<minor>.
+
+<hash_dev>
+    This is the device that supplies the hash tree data.  It may be
+    specified similarly to the device path and may be the same device.  If the
+    same device is used, the hash_start should be outside the configured
+    dm-verity device.
+
+<data_block_size>
+    The block size on a data device in bytes.
+    Each block corresponds to one digest on the hash device.
+
+<hash_block_size>
+    The size of a hash block in bytes.
+
+<num_data_blocks>
+    The number of data blocks on the data device.  Additional blocks are
+    inaccessible.  You can place hashes to the same partition as data, in this
+    case hashes are placed after <num_data_blocks>.
+
+<hash_start_block>
+    This is the offset, in <hash_block_size>-blocks, from the start of hash_dev
+    to the root block of the hash tree.
+
+<algorithm>
+    The cryptographic hash algorithm used for this device.  This should
+    be the name of the algorithm, like "sha1".
+
+<digest>
+    The hexadecimal encoding of the cryptographic hash of the root hash block
+    and the salt.  This hash should be trusted as there is no other authenticity
+    beyond this point.
+
+<salt>
+    The hexadecimal encoding of the salt value.
+
+<#opt_params>
+    Number of optional parameters. If there are no optional parameters,
+    the optional paramaters section can be skipped or #opt_params can be zero.
+    Otherwise #opt_params is the number of following arguments.
+
+    Example of optional parameters section:
+        1 ignore_corruption
+
+ignore_corruption
+    Log corrupted blocks, but allow read operations to proceed normally.
+
+restart_on_corruption
+    Restart the system when a corrupted block is discovered. This option is
+    not compatible with ignore_corruption and requires user space support to
+    avoid restart loops.
+
+ignore_zero_blocks
+    Do not verify blocks that are expected to contain zeroes and always return
+    zeroes instead. This may be useful if the partition contains unused blocks
+    that are not guaranteed to contain zeroes.
+
+use_fec_from_device <fec_dev>
+    Use forward error correction (FEC) to recover from corruption if hash
+    verification fails. Use encoding data from the specified device. This
+    may be the same device where data and hash blocks reside, in which case
+    fec_start must be outside data and hash areas.
+
+    If the encoding data covers additional metadata, it must be accessible
+    on the hash device after the hash blocks.
+
+    Note: block sizes for data and hash devices must match. Also, if the
+    verity <dev> is encrypted the <fec_dev> should be too.
+
+fec_roots <num>
+    Number of generator roots. This equals to the number of parity bytes in
+    the encoding data. For example, in RS(M, N) encoding, the number of roots
+    is M-N.
+
+fec_blocks <num>
+    The number of encoding data blocks on the FEC device. The block size for
+    the FEC device is <data_block_size>.
+
+fec_start <offset>
+    This is the offset, in <data_block_size> blocks, from the start of the
+    FEC device to the beginning of the encoding data.
+
+check_at_most_once
+    Verify data blocks only the first time they are read from the data device,
+    rather than every time.  This reduces the overhead of dm-verity so that it
+    can be used on systems that are memory and/or CPU constrained.  However, it
+    provides a reduced level of security because only offline tampering of the
+    data device's content will be detected, not online tampering.
+
+    Hash blocks are still verified each time they are read from the hash device,
+    since verification of hash blocks is less performance critical than data
+    blocks, and a hash block will not be verified any more after all the data
+    blocks it covers have been verified anyway.
+
+Theory of operation
+===================
+
+dm-verity is meant to be set up as part of a verified boot path.  This
+may be anything ranging from a boot using tboot or trustedgrub to just
+booting from a known-good device (like a USB drive or CD).
+
+When a dm-verity device is configured, it is expected that the caller
+has been authenticated in some way (cryptographic signatures, etc).
+After instantiation, all hashes will be verified on-demand during
+disk access.  If they cannot be verified up to the root node of the
+tree, the root hash, then the I/O will fail.  This should detect
+tampering with any data on the device and the hash data.
+
+Cryptographic hashes are used to assert the integrity of the device on a
+per-block basis. This allows for a lightweight hash computation on first read
+into the page cache. Block hashes are stored linearly, aligned to the nearest
+block size.
+
+If forward error correction (FEC) support is enabled any recovery of
+corrupted data will be verified using the cryptographic hash of the
+corresponding data. This is why combining error correction with
+integrity checking is essential.
+
+Hash Tree
+---------
+
+Each node in the tree is a cryptographic hash.  If it is a leaf node, the hash
+of some data block on disk is calculated. If it is an intermediary node,
+the hash of a number of child nodes is calculated.
+
+Each entry in the tree is a collection of neighboring nodes that fit in one
+block.  The number is determined based on block_size and the size of the
+selected cryptographic digest algorithm.  The hashes are linearly-ordered in
+this entry and any unaligned trailing space is ignored but included when
+calculating the parent node.
+
+The tree looks something like:
+
+	alg = sha256, num_blocks = 32768, block_size = 4096
+
+::
+
+                                 [   root    ]
+                                /    . . .    \
+                     [entry_0]                 [entry_1]
+                    /  . . .  \                 . . .   \
+         [entry_0_0]   . . .  [entry_0_127]    . . . .  [entry_1_127]
+           / ... \             /   . . .  \             /           \
+     blk_0 ... blk_127  blk_16256   blk_16383      blk_32640 . . . blk_32767
+
+
+On-disk format
+==============
+
+The verity kernel code does not read the verity metadata on-disk header.
+It only reads the hash blocks which directly follow the header.
+It is expected that a user-space tool will verify the integrity of the
+verity header.
+
+Alternatively, the header can be omitted and the dmsetup parameters can
+be passed via the kernel command-line in a rooted chain of trust where
+the command-line is verified.
+
+Directly following the header (and with sector number padded to the next hash
+block boundary) are the hash blocks which are stored a depth at a time
+(starting from the root), sorted in order of increasing index.
+
+The full specification of kernel parameters and on-disk metadata format
+is available at the cryptsetup project's wiki page
+
+  https://gitlab.com/cryptsetup/cryptsetup/wikis/DMVerity
+
+Status
+======
+V (for Valid) is returned if every check performed so far was valid.
+If any check failed, C (for Corruption) is returned.
+
+Example
+=======
+Set up a device::
+
+  # dmsetup create vroot --readonly --table \
+    "0 2097152 verity 1 /dev/sda1 /dev/sda2 4096 4096 262144 1 sha256 "\
+    "4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 "\
+    "1234000000000000000000000000000000000000000000000000000000000000"
+
+A command line tool veritysetup is available to compute or verify
+the hash tree or activate the kernel device. This is available from
+the cryptsetup upstream repository https://gitlab.com/cryptsetup/cryptsetup/
+(as a libcryptsetup extension).
+
+Create hash on the device::
+
+  # veritysetup format /dev/sda1 /dev/sda2
+  ...
+  Root hash: 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
+
+Activate the device::
+
+  # veritysetup create vroot /dev/sda1 /dev/sda2 \
+    4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
deleted file mode 100644
index b3d2e4a42255..000000000000
--- a/Documentation/device-mapper/verity.txt
+++ /dev/null
@@ -1,219 +0,0 @@
-dm-verity
-==========
-
-Device-Mapper's "verity" target provides transparent integrity checking of
-block devices using a cryptographic digest provided by the kernel crypto API.
-This target is read-only.
-
-Construction Parameters
-=======================
-    <version> <dev> <hash_dev>
-    <data_block_size> <hash_block_size>
-    <num_data_blocks> <hash_start_block>
-    <algorithm> <digest> <salt>
-    [<#opt_params> <opt_params>]
-
-<version>
-    This is the type of the on-disk hash format.
-
-    0 is the original format used in the Chromium OS.
-      The salt is appended when hashing, digests are stored continuously and
-      the rest of the block is padded with zeroes.
-
-    1 is the current format that should be used for new devices.
-      The salt is prepended when hashing and each digest is
-      padded with zeroes to the power of two.
-
-<dev>
-    This is the device containing data, the integrity of which needs to be
-    checked.  It may be specified as a path, like /dev/sdaX, or a device number,
-    <major>:<minor>.
-
-<hash_dev>
-    This is the device that supplies the hash tree data.  It may be
-    specified similarly to the device path and may be the same device.  If the
-    same device is used, the hash_start should be outside the configured
-    dm-verity device.
-
-<data_block_size>
-    The block size on a data device in bytes.
-    Each block corresponds to one digest on the hash device.
-
-<hash_block_size>
-    The size of a hash block in bytes.
-
-<num_data_blocks>
-    The number of data blocks on the data device.  Additional blocks are
-    inaccessible.  You can place hashes to the same partition as data, in this
-    case hashes are placed after <num_data_blocks>.
-
-<hash_start_block>
-    This is the offset, in <hash_block_size>-blocks, from the start of hash_dev
-    to the root block of the hash tree.
-
-<algorithm>
-    The cryptographic hash algorithm used for this device.  This should
-    be the name of the algorithm, like "sha1".
-
-<digest>
-    The hexadecimal encoding of the cryptographic hash of the root hash block
-    and the salt.  This hash should be trusted as there is no other authenticity
-    beyond this point.
-
-<salt>
-    The hexadecimal encoding of the salt value.
-
-<#opt_params>
-    Number of optional parameters. If there are no optional parameters,
-    the optional paramaters section can be skipped or #opt_params can be zero.
-    Otherwise #opt_params is the number of following arguments.
-
-    Example of optional parameters section:
-        1 ignore_corruption
-
-ignore_corruption
-    Log corrupted blocks, but allow read operations to proceed normally.
-
-restart_on_corruption
-    Restart the system when a corrupted block is discovered. This option is
-    not compatible with ignore_corruption and requires user space support to
-    avoid restart loops.
-
-ignore_zero_blocks
-    Do not verify blocks that are expected to contain zeroes and always return
-    zeroes instead. This may be useful if the partition contains unused blocks
-    that are not guaranteed to contain zeroes.
-
-use_fec_from_device <fec_dev>
-    Use forward error correction (FEC) to recover from corruption if hash
-    verification fails. Use encoding data from the specified device. This
-    may be the same device where data and hash blocks reside, in which case
-    fec_start must be outside data and hash areas.
-
-    If the encoding data covers additional metadata, it must be accessible
-    on the hash device after the hash blocks.
-
-    Note: block sizes for data and hash devices must match. Also, if the
-    verity <dev> is encrypted the <fec_dev> should be too.
-
-fec_roots <num>
-    Number of generator roots. This equals to the number of parity bytes in
-    the encoding data. For example, in RS(M, N) encoding, the number of roots
-    is M-N.
-
-fec_blocks <num>
-    The number of encoding data blocks on the FEC device. The block size for
-    the FEC device is <data_block_size>.
-
-fec_start <offset>
-    This is the offset, in <data_block_size> blocks, from the start of the
-    FEC device to the beginning of the encoding data.
-
-check_at_most_once
-    Verify data blocks only the first time they are read from the data device,
-    rather than every time.  This reduces the overhead of dm-verity so that it
-    can be used on systems that are memory and/or CPU constrained.  However, it
-    provides a reduced level of security because only offline tampering of the
-    data device's content will be detected, not online tampering.
-
-    Hash blocks are still verified each time they are read from the hash device,
-    since verification of hash blocks is less performance critical than data
-    blocks, and a hash block will not be verified any more after all the data
-    blocks it covers have been verified anyway.
-
-Theory of operation
-===================
-
-dm-verity is meant to be set up as part of a verified boot path.  This
-may be anything ranging from a boot using tboot or trustedgrub to just
-booting from a known-good device (like a USB drive or CD).
-
-When a dm-verity device is configured, it is expected that the caller
-has been authenticated in some way (cryptographic signatures, etc).
-After instantiation, all hashes will be verified on-demand during
-disk access.  If they cannot be verified up to the root node of the
-tree, the root hash, then the I/O will fail.  This should detect
-tampering with any data on the device and the hash data.
-
-Cryptographic hashes are used to assert the integrity of the device on a
-per-block basis. This allows for a lightweight hash computation on first read
-into the page cache. Block hashes are stored linearly, aligned to the nearest
-block size.
-
-If forward error correction (FEC) support is enabled any recovery of
-corrupted data will be verified using the cryptographic hash of the
-corresponding data. This is why combining error correction with
-integrity checking is essential.
-
-Hash Tree
----------
-
-Each node in the tree is a cryptographic hash.  If it is a leaf node, the hash
-of some data block on disk is calculated. If it is an intermediary node,
-the hash of a number of child nodes is calculated.
-
-Each entry in the tree is a collection of neighboring nodes that fit in one
-block.  The number is determined based on block_size and the size of the
-selected cryptographic digest algorithm.  The hashes are linearly-ordered in
-this entry and any unaligned trailing space is ignored but included when
-calculating the parent node.
-
-The tree looks something like:
-
-alg = sha256, num_blocks = 32768, block_size = 4096
-
-                                 [   root    ]
-                                /    . . .    \
-                     [entry_0]                 [entry_1]
-                    /  . . .  \                 . . .   \
-         [entry_0_0]   . . .  [entry_0_127]    . . . .  [entry_1_127]
-           / ... \             /   . . .  \             /           \
-     blk_0 ... blk_127  blk_16256   blk_16383      blk_32640 . . . blk_32767
-
-
-On-disk format
-==============
-
-The verity kernel code does not read the verity metadata on-disk header.
-It only reads the hash blocks which directly follow the header.
-It is expected that a user-space tool will verify the integrity of the
-verity header.
-
-Alternatively, the header can be omitted and the dmsetup parameters can
-be passed via the kernel command-line in a rooted chain of trust where
-the command-line is verified.
-
-Directly following the header (and with sector number padded to the next hash
-block boundary) are the hash blocks which are stored a depth at a time
-(starting from the root), sorted in order of increasing index.
-
-The full specification of kernel parameters and on-disk metadata format
-is available at the cryptsetup project's wiki page
-  https://gitlab.com/cryptsetup/cryptsetup/wikis/DMVerity
-
-Status
-======
-V (for Valid) is returned if every check performed so far was valid.
-If any check failed, C (for Corruption) is returned.
-
-Example
-=======
-Set up a device:
-  # dmsetup create vroot --readonly --table \
-    "0 2097152 verity 1 /dev/sda1 /dev/sda2 4096 4096 262144 1 sha256 "\
-    "4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 "\
-    "1234000000000000000000000000000000000000000000000000000000000000"
-
-A command line tool veritysetup is available to compute or verify
-the hash tree or activate the kernel device. This is available from
-the cryptsetup upstream repository https://gitlab.com/cryptsetup/cryptsetup/
-(as a libcryptsetup extension).
-
-Create hash on the device:
-  # veritysetup format /dev/sda1 /dev/sda2
-  ...
-  Root hash: 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
-
-Activate the device:
-  # veritysetup create vroot /dev/sda1 /dev/sda2 \
-    4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
diff --git a/Documentation/device-mapper/writecache.rst b/Documentation/device-mapper/writecache.rst
new file mode 100644
index 000000000000..d3d7690f5e8d
--- /dev/null
+++ b/Documentation/device-mapper/writecache.rst
@@ -0,0 +1,79 @@
+=================
+Writecache target
+=================
+
+The writecache target caches writes on persistent memory or on SSD. It
+doesn't cache reads because reads are supposed to be cached in page cache
+in normal RAM.
+
+When the device is constructed, the first sector should be zeroed or the
+first sector should contain valid superblock from previous invocation.
+
+Constructor parameters:
+
+1. type of the cache device - "p" or "s"
+
+	- p - persistent memory
+	- s - SSD
+2. the underlying device that will be cached
+3. the cache device
+4. block size (4096 is recommended; the maximum block size is the page
+   size)
+5. the number of optional parameters (the parameters with an argument
+   count as two)
+
+	start_sector n		(default: 0)
+		offset from the start of cache device in 512-byte sectors
+	high_watermark n	(default: 50)
+		start writeback when the number of used blocks reach this
+		watermark
+	low_watermark x		(default: 45)
+		stop writeback when the number of used blocks drops below
+		this watermark
+	writeback_jobs n	(default: unlimited)
+		limit the number of blocks that are in flight during
+		writeback. Setting this value reduces writeback
+		throughput, but it may improve latency of read requests
+	autocommit_blocks n	(default: 64 for pmem, 65536 for ssd)
+		when the application writes this amount of blocks without
+		issuing the FLUSH request, the blocks are automatically
+		commited
+	autocommit_time ms	(default: 1000)
+		autocommit time in milliseconds. The data is automatically
+		commited if this time passes and no FLUSH request is
+		received
+	fua			(by default on)
+		applicable only to persistent memory - use the FUA flag
+		when writing data from persistent memory back to the
+		underlying device
+	nofua
+		applicable only to persistent memory - don't use the FUA
+		flag when writing back data and send the FLUSH request
+		afterwards
+
+		- some underlying devices perform better with fua, some
+		  with nofua. The user should test it
+
+Status:
+1. error indicator - 0 if there was no error, otherwise error number
+2. the number of blocks
+3. the number of free blocks
+4. the number of blocks under writeback
+
+Messages:
+	flush
+		flush the cache device. The message returns successfully
+		if the cache device was flushed without an error
+	flush_on_suspend
+		flush the cache device on next suspend. Use this message
+		when you are going to remove the cache device. The proper
+		sequence for removing the cache device is:
+
+		1. send the "flush_on_suspend" message
+		2. load an inactive table with a linear target that maps
+		   to the underlying device
+		3. suspend the device
+		4. ask for status and verify that there are no errors
+		5. resume the device, so that it will use the linear
+		   target
+		6. the cache device is now inactive and it can be deleted
diff --git a/Documentation/device-mapper/writecache.txt b/Documentation/device-mapper/writecache.txt
deleted file mode 100644
index 01532b3008ae..000000000000
--- a/Documentation/device-mapper/writecache.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-The writecache target caches writes on persistent memory or on SSD. It
-doesn't cache reads because reads are supposed to be cached in page cache
-in normal RAM.
-
-When the device is constructed, the first sector should be zeroed or the
-first sector should contain valid superblock from previous invocation.
-
-Constructor parameters:
-1. type of the cache device - "p" or "s"
-	p - persistent memory
-	s - SSD
-2. the underlying device that will be cached
-3. the cache device
-4. block size (4096 is recommended; the maximum block size is the page
-   size)
-5. the number of optional parameters (the parameters with an argument
-   count as two)
-	start_sector n		(default: 0)
-		offset from the start of cache device in 512-byte sectors
-	high_watermark n	(default: 50)
-		start writeback when the number of used blocks reach this
-		watermark
-	low_watermark x		(default: 45)
-		stop writeback when the number of used blocks drops below
-		this watermark
-	writeback_jobs n	(default: unlimited)
-		limit the number of blocks that are in flight during
-		writeback. Setting this value reduces writeback
-		throughput, but it may improve latency of read requests
-	autocommit_blocks n	(default: 64 for pmem, 65536 for ssd)
-		when the application writes this amount of blocks without
-		issuing the FLUSH request, the blocks are automatically
-		commited
-	autocommit_time ms	(default: 1000)
-		autocommit time in milliseconds. The data is automatically
-		commited if this time passes and no FLUSH request is
-		received
-	fua			(by default on)
-		applicable only to persistent memory - use the FUA flag
-		when writing data from persistent memory back to the
-		underlying device
-	nofua
-		applicable only to persistent memory - don't use the FUA
-		flag when writing back data and send the FLUSH request
-		afterwards
-		- some underlying devices perform better with fua, some
-		  with nofua. The user should test it
-
-Status:
-1. error indicator - 0 if there was no error, otherwise error number
-2. the number of blocks
-3. the number of free blocks
-4. the number of blocks under writeback
-
-Messages:
-	flush
-		flush the cache device. The message returns successfully
-		if the cache device was flushed without an error
-	flush_on_suspend
-		flush the cache device on next suspend. Use this message
-		when you are going to remove the cache device. The proper
-		sequence for removing the cache device is:
-		1. send the "flush_on_suspend" message
-		2. load an inactive table with a linear target that maps
-		   to the underlying device
-		3. suspend the device
-		4. ask for status and verify that there are no errors
-		5. resume the device, so that it will use the linear
-		   target
-		6. the cache device is now inactive and it can be deleted
diff --git a/Documentation/device-mapper/zero.rst b/Documentation/device-mapper/zero.rst
new file mode 100644
index 000000000000..11fb5cf4597c
--- /dev/null
+++ b/Documentation/device-mapper/zero.rst
@@ -0,0 +1,37 @@
+=======
+dm-zero
+=======
+
+Device-Mapper's "zero" target provides a block-device that always returns
+zero'd data on reads and silently drops writes. This is similar behavior to
+/dev/zero, but as a block-device instead of a character-device.
+
+Dm-zero has no target-specific parameters.
+
+One very interesting use of dm-zero is for creating "sparse" devices in
+conjunction with dm-snapshot. A sparse device reports a device-size larger
+than the amount of actual storage space available for that device. A user can
+write data anywhere within the sparse device and read it back like a normal
+device. Reads to previously unwritten areas will return a zero'd buffer. When
+enough data has been written to fill up the actual storage space, the sparse
+device is deactivated. This can be very useful for testing device and
+filesystem limitations.
+
+To create a sparse device, start by creating a dm-zero device that's the
+desired size of the sparse device. For this example, we'll assume a 10TB
+sparse device::
+
+  TEN_TERABYTES=`expr 10 \* 1024 \* 1024 \* 1024 \* 2`   # 10 TB in sectors
+  echo "0 $TEN_TERABYTES zero" | dmsetup create zero1
+
+Then create a snapshot of the zero device, using any available block-device as
+the COW device. The size of the COW device will determine the amount of real
+space available to the sparse device. For this example, we'll assume /dev/sdb1
+is an available 10GB partition::
+
+  echo "0 $TEN_TERABYTES snapshot /dev/mapper/zero1 /dev/sdb1 p 128" | \
+     dmsetup create sparse1
+
+This will create a 10TB sparse device called /dev/mapper/sparse1 that has
+10GB of actual storage space available. If more than 10GB of data is written
+to this device, it will start returning I/O errors.
diff --git a/Documentation/device-mapper/zero.txt b/Documentation/device-mapper/zero.txt
deleted file mode 100644
index 20fb38e7fa7e..000000000000
--- a/Documentation/device-mapper/zero.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-dm-zero
-=======
-
-Device-Mapper's "zero" target provides a block-device that always returns
-zero'd data on reads and silently drops writes. This is similar behavior to
-/dev/zero, but as a block-device instead of a character-device.
-
-Dm-zero has no target-specific parameters.
-
-One very interesting use of dm-zero is for creating "sparse" devices in
-conjunction with dm-snapshot. A sparse device reports a device-size larger
-than the amount of actual storage space available for that device. A user can
-write data anywhere within the sparse device and read it back like a normal
-device. Reads to previously unwritten areas will return a zero'd buffer. When
-enough data has been written to fill up the actual storage space, the sparse
-device is deactivated. This can be very useful for testing device and
-filesystem limitations.
-
-To create a sparse device, start by creating a dm-zero device that's the
-desired size of the sparse device. For this example, we'll assume a 10TB
-sparse device.
-
-TEN_TERABYTES=`expr 10 \* 1024 \* 1024 \* 1024 \* 2`   # 10 TB in sectors
-echo "0 $TEN_TERABYTES zero" | dmsetup create zero1
-
-Then create a snapshot of the zero device, using any available block-device as
-the COW device. The size of the COW device will determine the amount of real
-space available to the sparse device. For this example, we'll assume /dev/sdb1
-is an available 10GB partition.
-
-echo "0 $TEN_TERABYTES snapshot /dev/mapper/zero1 /dev/sdb1 p 128" | \
-   dmsetup create sparse1
-
-This will create a 10TB sparse device called /dev/mapper/sparse1 that has
-10GB of actual storage space available. If more than 10GB of data is written
-to this device, it will start returning I/O errors.
-
diff --git a/Documentation/filesystems/ubifs-authentication.md b/Documentation/filesystems/ubifs-authentication.md
index 028b3e2e25f9..23e698167141 100644
--- a/Documentation/filesystems/ubifs-authentication.md
+++ b/Documentation/filesystems/ubifs-authentication.md
@@ -417,9 +417,9 @@ will then have to be provided beforehand in the normal way.
 
 [DMC-CBC-ATTACK]     http://www.jakoblell.com/blog/2013/12/22/practical-malleability-attack-against-cbc-encrypted-luks-partitions/
 
-[DM-INTEGRITY]       https://www.kernel.org/doc/Documentation/device-mapper/dm-integrity.txt
+[DM-INTEGRITY]       https://www.kernel.org/doc/Documentation/device-mapper/dm-integrity.rst
 
-[DM-VERITY]          https://www.kernel.org/doc/Documentation/device-mapper/verity.txt
+[DM-VERITY]          https://www.kernel.org/doc/Documentation/device-mapper/verity.rst
 
 [FSCRYPT-POLICY2]    https://www.spinics.net/lists/linux-ext4/msg58710.html
 
-- 
cgit 


From 10ffebbed5503b1830c7920ef528075785351be6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:44 -0300
Subject: docs: fault-injection: convert docs to ReST and rename to *.rst

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Federico Vaga <federico.vaga@vaga.pv.it>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/fault-injection/fault-injection.rst  | 446 +++++++++++++++++++++
 Documentation/fault-injection/fault-injection.txt  | 435 --------------------
 Documentation/fault-injection/index.rst            |  20 +
 .../fault-injection/notifier-error-inject.rst      |  98 +++++
 .../fault-injection/notifier-error-inject.txt      |  94 -----
 .../fault-injection/nvme-fault-injection.rst       | 120 ++++++
 .../fault-injection/nvme-fault-injection.txt       | 116 ------
 Documentation/fault-injection/provoke-crashes.rst  |  48 +++
 Documentation/fault-injection/provoke-crashes.txt  |  38 --
 Documentation/process/4.Coding.rst                 |   2 +-
 .../translations/it_IT/process/4.Coding.rst        |   2 +-
 .../translations/zh_CN/process/4.Coding.rst        |   2 +-
 12 files changed, 735 insertions(+), 686 deletions(-)
 create mode 100644 Documentation/fault-injection/fault-injection.rst
 delete mode 100644 Documentation/fault-injection/fault-injection.txt
 create mode 100644 Documentation/fault-injection/index.rst
 create mode 100644 Documentation/fault-injection/notifier-error-inject.rst
 delete mode 100644 Documentation/fault-injection/notifier-error-inject.txt
 create mode 100644 Documentation/fault-injection/nvme-fault-injection.rst
 delete mode 100644 Documentation/fault-injection/nvme-fault-injection.txt
 create mode 100644 Documentation/fault-injection/provoke-crashes.rst
 delete mode 100644 Documentation/fault-injection/provoke-crashes.txt

(limited to 'Documentation')

diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst
new file mode 100644
index 000000000000..f51bb21d20e4
--- /dev/null
+++ b/Documentation/fault-injection/fault-injection.rst
@@ -0,0 +1,446 @@
+===========================================
+Fault injection capabilities infrastructure
+===========================================
+
+See also drivers/md/md-faulty.c and "every_nth" module option for scsi_debug.
+
+
+Available fault injection capabilities
+--------------------------------------
+
+- failslab
+
+  injects slab allocation failures. (kmalloc(), kmem_cache_alloc(), ...)
+
+- fail_page_alloc
+
+  injects page allocation failures. (alloc_pages(), get_free_pages(), ...)
+
+- fail_futex
+
+  injects futex deadlock and uaddr fault errors.
+
+- fail_make_request
+
+  injects disk IO errors on devices permitted by setting
+  /sys/block/<device>/make-it-fail or
+  /sys/block/<device>/<partition>/make-it-fail. (generic_make_request())
+
+- fail_mmc_request
+
+  injects MMC data errors on devices permitted by setting
+  debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request
+
+- fail_function
+
+  injects error return on specific functions, which are marked by
+  ALLOW_ERROR_INJECTION() macro, by setting debugfs entries
+  under /sys/kernel/debug/fail_function. No boot option supported.
+
+- NVMe fault injection
+
+  inject NVMe status code and retry flag on devices permitted by setting
+  debugfs entries under /sys/kernel/debug/nvme*/fault_inject. The default
+  status code is NVME_SC_INVALID_OPCODE with no retry. The status code and
+  retry flag can be set via the debugfs.
+
+
+Configure fault-injection capabilities behavior
+-----------------------------------------------
+
+debugfs entries
+^^^^^^^^^^^^^^^
+
+fault-inject-debugfs kernel module provides some debugfs entries for runtime
+configuration of fault-injection capabilities.
+
+- /sys/kernel/debug/fail*/probability:
+
+	likelihood of failure injection, in percent.
+
+	Format: <percent>
+
+	Note that one-failure-per-hundred is a very high error rate
+	for some testcases.  Consider setting probability=100 and configure
+	/sys/kernel/debug/fail*/interval for such testcases.
+
+- /sys/kernel/debug/fail*/interval:
+
+	specifies the interval between failures, for calls to
+	should_fail() that pass all the other tests.
+
+	Note that if you enable this, by setting interval>1, you will
+	probably want to set probability=100.
+
+- /sys/kernel/debug/fail*/times:
+
+	specifies how many times failures may happen at most.
+	A value of -1 means "no limit".
+
+- /sys/kernel/debug/fail*/space:
+
+	specifies an initial resource "budget", decremented by "size"
+	on each call to should_fail(,size).  Failure injection is
+	suppressed until "space" reaches zero.
+
+- /sys/kernel/debug/fail*/verbose
+
+	Format: { 0 | 1 | 2 }
+
+	specifies the verbosity of the messages when failure is
+	injected.  '0' means no messages; '1' will print only a single
+	log line per failure; '2' will print a call trace too -- useful
+	to debug the problems revealed by fault injection.
+
+- /sys/kernel/debug/fail*/task-filter:
+
+	Format: { 'Y' | 'N' }
+
+	A value of 'N' disables filtering by process (default).
+	Any positive value limits failures to only processes indicated by
+	/proc/<pid>/make-it-fail==1.
+
+- /sys/kernel/debug/fail*/require-start,
+  /sys/kernel/debug/fail*/require-end,
+  /sys/kernel/debug/fail*/reject-start,
+  /sys/kernel/debug/fail*/reject-end:
+
+	specifies the range of virtual addresses tested during
+	stacktrace walking.  Failure is injected only if some caller
+	in the walked stacktrace lies within the required range, and
+	none lies within the rejected range.
+	Default required range is [0,ULONG_MAX) (whole of virtual address space).
+	Default rejected range is [0,0).
+
+- /sys/kernel/debug/fail*/stacktrace-depth:
+
+	specifies the maximum stacktrace depth walked during search
+	for a caller within [require-start,require-end) OR
+	[reject-start,reject-end).
+
+- /sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem:
+
+	Format: { 'Y' | 'N' }
+
+	default is 'N', setting it to 'Y' won't inject failures into
+	highmem/user allocations.
+
+- /sys/kernel/debug/failslab/ignore-gfp-wait:
+- /sys/kernel/debug/fail_page_alloc/ignore-gfp-wait:
+
+	Format: { 'Y' | 'N' }
+
+	default is 'N', setting it to 'Y' will inject failures
+	only into non-sleep allocations (GFP_ATOMIC allocations).
+
+- /sys/kernel/debug/fail_page_alloc/min-order:
+
+	specifies the minimum page allocation order to be injected
+	failures.
+
+- /sys/kernel/debug/fail_futex/ignore-private:
+
+	Format: { 'Y' | 'N' }
+
+	default is 'N', setting it to 'Y' will disable failure injections
+	when dealing with private (address space) futexes.
+
+- /sys/kernel/debug/fail_function/inject:
+
+	Format: { 'function-name' | '!function-name' | '' }
+
+	specifies the target function of error injection by name.
+	If the function name leads '!' prefix, given function is
+	removed from injection list. If nothing specified ('')
+	injection list is cleared.
+
+- /sys/kernel/debug/fail_function/injectable:
+
+	(read only) shows error injectable functions and what type of
+	error values can be specified. The error type will be one of
+	below;
+	- NULL:	retval must be 0.
+	- ERRNO: retval must be -1 to -MAX_ERRNO (-4096).
+	- ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096).
+
+- /sys/kernel/debug/fail_function/<functiuon-name>/retval:
+
+	specifies the "error" return value to inject to the given
+	function for given function. This will be created when
+	user specifies new injection entry.
+
+Boot option
+^^^^^^^^^^^
+
+In order to inject faults while debugfs is not available (early boot time),
+use the boot option::
+
+	failslab=
+	fail_page_alloc=
+	fail_make_request=
+	fail_futex=
+	mmc_core.fail_request=<interval>,<probability>,<space>,<times>
+
+proc entries
+^^^^^^^^^^^^
+
+- /proc/<pid>/fail-nth,
+  /proc/self/task/<tid>/fail-nth:
+
+	Write to this file of integer N makes N-th call in the task fail.
+	Read from this file returns a integer value. A value of '0' indicates
+	that the fault setup with a previous write to this file was injected.
+	A positive integer N indicates that the fault wasn't yet injected.
+	Note that this file enables all types of faults (slab, futex, etc).
+	This setting takes precedence over all other generic debugfs settings
+	like probability, interval, times, etc. But per-capability settings
+	(e.g. fail_futex/ignore-private) take precedence over it.
+
+	This feature is intended for systematic testing of faults in a single
+	system call. See an example below.
+
+How to add new fault injection capability
+-----------------------------------------
+
+- #include <linux/fault-inject.h>
+
+- define the fault attributes
+
+  DECLARE_FAULT_ATTR(name);
+
+  Please see the definition of struct fault_attr in fault-inject.h
+  for details.
+
+- provide a way to configure fault attributes
+
+- boot option
+
+  If you need to enable the fault injection capability from boot time, you can
+  provide boot option to configure it. There is a helper function for it:
+
+	setup_fault_attr(attr, str);
+
+- debugfs entries
+
+  failslab, fail_page_alloc, and fail_make_request use this way.
+  Helper functions:
+
+	fault_create_debugfs_attr(name, parent, attr);
+
+- module parameters
+
+  If the scope of the fault injection capability is limited to a
+  single kernel module, it is better to provide module parameters to
+  configure the fault attributes.
+
+- add a hook to insert failures
+
+  Upon should_fail() returning true, client code should inject a failure:
+
+	should_fail(attr, size);
+
+Application Examples
+--------------------
+
+- Inject slab allocation failures into module init/exit code::
+
+    #!/bin/bash
+
+    FAILTYPE=failslab
+    echo Y > /sys/kernel/debug/$FAILTYPE/task-filter
+    echo 10 > /sys/kernel/debug/$FAILTYPE/probability
+    echo 100 > /sys/kernel/debug/$FAILTYPE/interval
+    echo -1 > /sys/kernel/debug/$FAILTYPE/times
+    echo 0 > /sys/kernel/debug/$FAILTYPE/space
+    echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
+    echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
+
+    faulty_system()
+    {
+	bash -c "echo 1 > /proc/self/make-it-fail && exec $*"
+    }
+
+    if [ $# -eq 0 ]
+    then
+	echo "Usage: $0 modulename [ modulename ... ]"
+	exit 1
+    fi
+
+    for m in $*
+    do
+	echo inserting $m...
+	faulty_system modprobe $m
+
+	echo removing $m...
+	faulty_system modprobe -r $m
+    done
+
+------------------------------------------------------------------------------
+
+- Inject page allocation failures only for a specific module::
+
+    #!/bin/bash
+
+    FAILTYPE=fail_page_alloc
+    module=$1
+
+    if [ -z $module ]
+    then
+	echo "Usage: $0 <modulename>"
+	exit 1
+    fi
+
+    modprobe $module
+
+    if [ ! -d /sys/module/$module/sections ]
+    then
+	echo Module $module is not loaded
+	exit 1
+    fi
+
+    cat /sys/module/$module/sections/.text > /sys/kernel/debug/$FAILTYPE/require-start
+    cat /sys/module/$module/sections/.data > /sys/kernel/debug/$FAILTYPE/require-end
+
+    echo N > /sys/kernel/debug/$FAILTYPE/task-filter
+    echo 10 > /sys/kernel/debug/$FAILTYPE/probability
+    echo 100 > /sys/kernel/debug/$FAILTYPE/interval
+    echo -1 > /sys/kernel/debug/$FAILTYPE/times
+    echo 0 > /sys/kernel/debug/$FAILTYPE/space
+    echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
+    echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
+    echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-highmem
+    echo 10 > /sys/kernel/debug/$FAILTYPE/stacktrace-depth
+
+    trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT
+
+    echo "Injecting errors into the module $module... (interrupt to stop)"
+    sleep 1000000
+
+------------------------------------------------------------------------------
+
+- Inject open_ctree error while btrfs mount::
+
+    #!/bin/bash
+
+    rm -f testfile.img
+    dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
+    DEVICE=$(losetup --show -f testfile.img)
+    mkfs.btrfs -f $DEVICE
+    mkdir -p tmpmnt
+
+    FAILTYPE=fail_function
+    FAILFUNC=open_ctree
+    echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject
+    echo -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval
+    echo N > /sys/kernel/debug/$FAILTYPE/task-filter
+    echo 100 > /sys/kernel/debug/$FAILTYPE/probability
+    echo 0 > /sys/kernel/debug/$FAILTYPE/interval
+    echo -1 > /sys/kernel/debug/$FAILTYPE/times
+    echo 0 > /sys/kernel/debug/$FAILTYPE/space
+    echo 1 > /sys/kernel/debug/$FAILTYPE/verbose
+
+    mount -t btrfs $DEVICE tmpmnt
+    if [ $? -ne 0 ]
+    then
+	echo "SUCCESS!"
+    else
+	echo "FAILED!"
+	umount tmpmnt
+    fi
+
+    echo > /sys/kernel/debug/$FAILTYPE/inject
+
+    rmdir tmpmnt
+    losetup -d $DEVICE
+    rm testfile.img
+
+
+Tool to run command with failslab or fail_page_alloc
+----------------------------------------------------
+In order to make it easier to accomplish the tasks mentioned above, we can use
+tools/testing/fault-injection/failcmd.sh.  Please run a command
+"./tools/testing/fault-injection/failcmd.sh --help" for more information and
+see the following examples.
+
+Examples:
+
+Run a command "make -C tools/testing/selftests/ run_tests" with injecting slab
+allocation failure::
+
+	# ./tools/testing/fault-injection/failcmd.sh \
+		-- make -C tools/testing/selftests/ run_tests
+
+Same as above except to specify 100 times failures at most instead of one time
+at most by default::
+
+	# ./tools/testing/fault-injection/failcmd.sh --times=100 \
+		-- make -C tools/testing/selftests/ run_tests
+
+Same as above except to inject page allocation failure instead of slab
+allocation failure::
+
+	# env FAILCMD_TYPE=fail_page_alloc \
+		./tools/testing/fault-injection/failcmd.sh --times=100 \
+		-- make -C tools/testing/selftests/ run_tests
+
+Systematic faults using fail-nth
+---------------------------------
+
+The following code systematically faults 0-th, 1-st, 2-nd and so on
+capabilities in the socketpair() system call::
+
+  #include <sys/types.h>
+  #include <sys/stat.h>
+  #include <sys/socket.h>
+  #include <sys/syscall.h>
+  #include <fcntl.h>
+  #include <unistd.h>
+  #include <string.h>
+  #include <stdlib.h>
+  #include <stdio.h>
+  #include <errno.h>
+
+  int main()
+  {
+	int i, err, res, fail_nth, fds[2];
+	char buf[128];
+
+	system("echo N > /sys/kernel/debug/failslab/ignore-gfp-wait");
+	sprintf(buf, "/proc/self/task/%ld/fail-nth", syscall(SYS_gettid));
+	fail_nth = open(buf, O_RDWR);
+	for (i = 1;; i++) {
+		sprintf(buf, "%d", i);
+		write(fail_nth, buf, strlen(buf));
+		res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
+		err = errno;
+		pread(fail_nth, buf, sizeof(buf), 0);
+		if (res == 0) {
+			close(fds[0]);
+			close(fds[1]);
+		}
+		printf("%d-th fault %c: res=%d/%d\n", i, atoi(buf) ? 'N' : 'Y',
+			res, err);
+		if (atoi(buf))
+			break;
+	}
+	return 0;
+  }
+
+An example output::
+
+	1-th fault Y: res=-1/23
+	2-th fault Y: res=-1/23
+	3-th fault Y: res=-1/12
+	4-th fault Y: res=-1/12
+	5-th fault Y: res=-1/23
+	6-th fault Y: res=-1/23
+	7-th fault Y: res=-1/23
+	8-th fault Y: res=-1/12
+	9-th fault Y: res=-1/12
+	10-th fault Y: res=-1/12
+	11-th fault Y: res=-1/12
+	12-th fault Y: res=-1/12
+	13-th fault Y: res=-1/12
+	14-th fault Y: res=-1/12
+	15-th fault Y: res=-1/12
+	16-th fault N: res=0/12
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
deleted file mode 100644
index a17517a083c3..000000000000
--- a/Documentation/fault-injection/fault-injection.txt
+++ /dev/null
@@ -1,435 +0,0 @@
-Fault injection capabilities infrastructure
-===========================================
-
-See also drivers/md/md-faulty.c and "every_nth" module option for scsi_debug.
-
-
-Available fault injection capabilities
---------------------------------------
-
-o failslab
-
-  injects slab allocation failures. (kmalloc(), kmem_cache_alloc(), ...)
-
-o fail_page_alloc
-
-  injects page allocation failures. (alloc_pages(), get_free_pages(), ...)
-
-o fail_futex
-
-  injects futex deadlock and uaddr fault errors.
-
-o fail_make_request
-
-  injects disk IO errors on devices permitted by setting
-  /sys/block/<device>/make-it-fail or
-  /sys/block/<device>/<partition>/make-it-fail. (generic_make_request())
-
-o fail_mmc_request
-
-  injects MMC data errors on devices permitted by setting
-  debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request
-
-o fail_function
-
-  injects error return on specific functions, which are marked by
-  ALLOW_ERROR_INJECTION() macro, by setting debugfs entries
-  under /sys/kernel/debug/fail_function. No boot option supported.
-
-o NVMe fault injection
-
-  inject NVMe status code and retry flag on devices permitted by setting
-  debugfs entries under /sys/kernel/debug/nvme*/fault_inject. The default
-  status code is NVME_SC_INVALID_OPCODE with no retry. The status code and
-  retry flag can be set via the debugfs.
-
-
-Configure fault-injection capabilities behavior
------------------------------------------------
-
-o debugfs entries
-
-fault-inject-debugfs kernel module provides some debugfs entries for runtime
-configuration of fault-injection capabilities.
-
-- /sys/kernel/debug/fail*/probability:
-
-	likelihood of failure injection, in percent.
-	Format: <percent>
-
-	Note that one-failure-per-hundred is a very high error rate
-	for some testcases.  Consider setting probability=100 and configure
-	/sys/kernel/debug/fail*/interval for such testcases.
-
-- /sys/kernel/debug/fail*/interval:
-
-	specifies the interval between failures, for calls to
-	should_fail() that pass all the other tests.
-
-	Note that if you enable this, by setting interval>1, you will
-	probably want to set probability=100.
-
-- /sys/kernel/debug/fail*/times:
-
-	specifies how many times failures may happen at most.
-	A value of -1 means "no limit".
-
-- /sys/kernel/debug/fail*/space:
-
-	specifies an initial resource "budget", decremented by "size"
-	on each call to should_fail(,size).  Failure injection is
-	suppressed until "space" reaches zero.
-
-- /sys/kernel/debug/fail*/verbose
-
-	Format: { 0 | 1 | 2 }
-	specifies the verbosity of the messages when failure is
-	injected.  '0' means no messages; '1' will print only a single
-	log line per failure; '2' will print a call trace too -- useful
-	to debug the problems revealed by fault injection.
-
-- /sys/kernel/debug/fail*/task-filter:
-
-	Format: { 'Y' | 'N' }
-	A value of 'N' disables filtering by process (default).
-	Any positive value limits failures to only processes indicated by
-	/proc/<pid>/make-it-fail==1.
-
-- /sys/kernel/debug/fail*/require-start:
-- /sys/kernel/debug/fail*/require-end:
-- /sys/kernel/debug/fail*/reject-start:
-- /sys/kernel/debug/fail*/reject-end:
-
-	specifies the range of virtual addresses tested during
-	stacktrace walking.  Failure is injected only if some caller
-	in the walked stacktrace lies within the required range, and
-	none lies within the rejected range.
-	Default required range is [0,ULONG_MAX) (whole of virtual address space).
-	Default rejected range is [0,0).
-
-- /sys/kernel/debug/fail*/stacktrace-depth:
-
-	specifies the maximum stacktrace depth walked during search
-	for a caller within [require-start,require-end) OR
-	[reject-start,reject-end).
-
-- /sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem:
-
-	Format: { 'Y' | 'N' }
-	default is 'N', setting it to 'Y' won't inject failures into
-	highmem/user allocations.
-
-- /sys/kernel/debug/failslab/ignore-gfp-wait:
-- /sys/kernel/debug/fail_page_alloc/ignore-gfp-wait:
-
-	Format: { 'Y' | 'N' }
-	default is 'N', setting it to 'Y' will inject failures
-	only into non-sleep allocations (GFP_ATOMIC allocations).
-
-- /sys/kernel/debug/fail_page_alloc/min-order:
-
-	specifies the minimum page allocation order to be injected
-	failures.
-
-- /sys/kernel/debug/fail_futex/ignore-private:
-
-	Format: { 'Y' | 'N' }
-	default is 'N', setting it to 'Y' will disable failure injections
-	when dealing with private (address space) futexes.
-
-- /sys/kernel/debug/fail_function/inject:
-
-	Format: { 'function-name' | '!function-name' | '' }
-	specifies the target function of error injection by name.
-	If the function name leads '!' prefix, given function is
-	removed from injection list. If nothing specified ('')
-	injection list is cleared.
-
-- /sys/kernel/debug/fail_function/injectable:
-
-	(read only) shows error injectable functions and what type of
-	error values can be specified. The error type will be one of
-	below;
-	- NULL:	retval must be 0.
-	- ERRNO: retval must be -1 to -MAX_ERRNO (-4096).
-	- ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096).
-
-- /sys/kernel/debug/fail_function/<functiuon-name>/retval:
-
-	specifies the "error" return value to inject to the given
-	function for given function. This will be created when
-	user specifies new injection entry.
-
-o Boot option
-
-In order to inject faults while debugfs is not available (early boot time),
-use the boot option:
-
-	failslab=
-	fail_page_alloc=
-	fail_make_request=
-	fail_futex=
-	mmc_core.fail_request=<interval>,<probability>,<space>,<times>
-
-o proc entries
-
-- /proc/<pid>/fail-nth:
-- /proc/self/task/<tid>/fail-nth:
-
-	Write to this file of integer N makes N-th call in the task fail.
-	Read from this file returns a integer value. A value of '0' indicates
-	that the fault setup with a previous write to this file was injected.
-	A positive integer N indicates that the fault wasn't yet injected.
-	Note that this file enables all types of faults (slab, futex, etc).
-	This setting takes precedence over all other generic debugfs settings
-	like probability, interval, times, etc. But per-capability settings
-	(e.g. fail_futex/ignore-private) take precedence over it.
-
-	This feature is intended for systematic testing of faults in a single
-	system call. See an example below.
-
-How to add new fault injection capability
------------------------------------------
-
-o #include <linux/fault-inject.h>
-
-o define the fault attributes
-
-  DECLARE_FAULT_ATTR(name);
-
-  Please see the definition of struct fault_attr in fault-inject.h
-  for details.
-
-o provide a way to configure fault attributes
-
-- boot option
-
-  If you need to enable the fault injection capability from boot time, you can
-  provide boot option to configure it. There is a helper function for it:
-
-	setup_fault_attr(attr, str);
-
-- debugfs entries
-
-  failslab, fail_page_alloc, and fail_make_request use this way.
-  Helper functions:
-
-	fault_create_debugfs_attr(name, parent, attr);
-
-- module parameters
-
-  If the scope of the fault injection capability is limited to a
-  single kernel module, it is better to provide module parameters to
-  configure the fault attributes.
-
-o add a hook to insert failures
-
-  Upon should_fail() returning true, client code should inject a failure.
-
-	should_fail(attr, size);
-
-Application Examples
---------------------
-
-o Inject slab allocation failures into module init/exit code
-
-#!/bin/bash
-
-FAILTYPE=failslab
-echo Y > /sys/kernel/debug/$FAILTYPE/task-filter
-echo 10 > /sys/kernel/debug/$FAILTYPE/probability
-echo 100 > /sys/kernel/debug/$FAILTYPE/interval
-echo -1 > /sys/kernel/debug/$FAILTYPE/times
-echo 0 > /sys/kernel/debug/$FAILTYPE/space
-echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
-echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
-
-faulty_system()
-{
-	bash -c "echo 1 > /proc/self/make-it-fail && exec $*"
-}
-
-if [ $# -eq 0 ]
-then
-	echo "Usage: $0 modulename [ modulename ... ]"
-	exit 1
-fi
-
-for m in $*
-do
-	echo inserting $m...
-	faulty_system modprobe $m
-
-	echo removing $m...
-	faulty_system modprobe -r $m
-done
-
-------------------------------------------------------------------------------
-
-o Inject page allocation failures only for a specific module
-
-#!/bin/bash
-
-FAILTYPE=fail_page_alloc
-module=$1
-
-if [ -z $module ]
-then
-	echo "Usage: $0 <modulename>"
-	exit 1
-fi
-
-modprobe $module
-
-if [ ! -d /sys/module/$module/sections ]
-then
-	echo Module $module is not loaded
-	exit 1
-fi
-
-cat /sys/module/$module/sections/.text > /sys/kernel/debug/$FAILTYPE/require-start
-cat /sys/module/$module/sections/.data > /sys/kernel/debug/$FAILTYPE/require-end
-
-echo N > /sys/kernel/debug/$FAILTYPE/task-filter
-echo 10 > /sys/kernel/debug/$FAILTYPE/probability
-echo 100 > /sys/kernel/debug/$FAILTYPE/interval
-echo -1 > /sys/kernel/debug/$FAILTYPE/times
-echo 0 > /sys/kernel/debug/$FAILTYPE/space
-echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
-echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
-echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-highmem
-echo 10 > /sys/kernel/debug/$FAILTYPE/stacktrace-depth
-
-trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT
-
-echo "Injecting errors into the module $module... (interrupt to stop)"
-sleep 1000000
-
-------------------------------------------------------------------------------
-
-o Inject open_ctree error while btrfs mount
-
-#!/bin/bash
-
-rm -f testfile.img
-dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
-DEVICE=$(losetup --show -f testfile.img)
-mkfs.btrfs -f $DEVICE
-mkdir -p tmpmnt
-
-FAILTYPE=fail_function
-FAILFUNC=open_ctree
-echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject
-echo -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval
-echo N > /sys/kernel/debug/$FAILTYPE/task-filter
-echo 100 > /sys/kernel/debug/$FAILTYPE/probability
-echo 0 > /sys/kernel/debug/$FAILTYPE/interval
-echo -1 > /sys/kernel/debug/$FAILTYPE/times
-echo 0 > /sys/kernel/debug/$FAILTYPE/space
-echo 1 > /sys/kernel/debug/$FAILTYPE/verbose
-
-mount -t btrfs $DEVICE tmpmnt
-if [ $? -ne 0 ]
-then
-	echo "SUCCESS!"
-else
-	echo "FAILED!"
-	umount tmpmnt
-fi
-
-echo > /sys/kernel/debug/$FAILTYPE/inject
-
-rmdir tmpmnt
-losetup -d $DEVICE
-rm testfile.img
-
-
-Tool to run command with failslab or fail_page_alloc
-----------------------------------------------------
-In order to make it easier to accomplish the tasks mentioned above, we can use
-tools/testing/fault-injection/failcmd.sh.  Please run a command
-"./tools/testing/fault-injection/failcmd.sh --help" for more information and
-see the following examples.
-
-Examples:
-
-Run a command "make -C tools/testing/selftests/ run_tests" with injecting slab
-allocation failure.
-
-	# ./tools/testing/fault-injection/failcmd.sh \
-		-- make -C tools/testing/selftests/ run_tests
-
-Same as above except to specify 100 times failures at most instead of one time
-at most by default.
-
-	# ./tools/testing/fault-injection/failcmd.sh --times=100 \
-		-- make -C tools/testing/selftests/ run_tests
-
-Same as above except to inject page allocation failure instead of slab
-allocation failure.
-
-	# env FAILCMD_TYPE=fail_page_alloc \
-		./tools/testing/fault-injection/failcmd.sh --times=100 \
-                -- make -C tools/testing/selftests/ run_tests
-
-Systematic faults using fail-nth
----------------------------------
-
-The following code systematically faults 0-th, 1-st, 2-nd and so on
-capabilities in the socketpair() system call.
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/syscall.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
-
-int main()
-{
-	int i, err, res, fail_nth, fds[2];
-	char buf[128];
-
-	system("echo N > /sys/kernel/debug/failslab/ignore-gfp-wait");
-	sprintf(buf, "/proc/self/task/%ld/fail-nth", syscall(SYS_gettid));
-	fail_nth = open(buf, O_RDWR);
-	for (i = 1;; i++) {
-		sprintf(buf, "%d", i);
-		write(fail_nth, buf, strlen(buf));
-		res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
-		err = errno;
-		pread(fail_nth, buf, sizeof(buf), 0);
-		if (res == 0) {
-			close(fds[0]);
-			close(fds[1]);
-		}
-		printf("%d-th fault %c: res=%d/%d\n", i, atoi(buf) ? 'N' : 'Y',
-			res, err);
-		if (atoi(buf))
-			break;
-	}
-	return 0;
-}
-
-An example output:
-
-1-th fault Y: res=-1/23
-2-th fault Y: res=-1/23
-3-th fault Y: res=-1/12
-4-th fault Y: res=-1/12
-5-th fault Y: res=-1/23
-6-th fault Y: res=-1/23
-7-th fault Y: res=-1/23
-8-th fault Y: res=-1/12
-9-th fault Y: res=-1/12
-10-th fault Y: res=-1/12
-11-th fault Y: res=-1/12
-12-th fault Y: res=-1/12
-13-th fault Y: res=-1/12
-14-th fault Y: res=-1/12
-15-th fault Y: res=-1/12
-16-th fault N: res=0/12
diff --git a/Documentation/fault-injection/index.rst b/Documentation/fault-injection/index.rst
new file mode 100644
index 000000000000..92b5639ed07a
--- /dev/null
+++ b/Documentation/fault-injection/index.rst
@@ -0,0 +1,20 @@
+:orphan:
+
+===============
+fault-injection
+===============
+
+.. toctree::
+    :maxdepth: 1
+
+    fault-injection
+    notifier-error-inject
+    nvme-fault-injection
+    provoke-crashes
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/fault-injection/notifier-error-inject.rst b/Documentation/fault-injection/notifier-error-inject.rst
new file mode 100644
index 000000000000..1668b6e48d3a
--- /dev/null
+++ b/Documentation/fault-injection/notifier-error-inject.rst
@@ -0,0 +1,98 @@
+Notifier error injection
+========================
+
+Notifier error injection provides the ability to inject artificial errors to
+specified notifier chain callbacks. It is useful to test the error handling of
+notifier call chain failures which is rarely executed.  There are kernel
+modules that can be used to test the following notifiers.
+
+ * PM notifier
+ * Memory hotplug notifier
+ * powerpc pSeries reconfig notifier
+ * Netdevice notifier
+
+PM notifier error injection module
+----------------------------------
+This feature is controlled through debugfs interface
+
+  /sys/kernel/debug/notifier-error-inject/pm/actions/<notifier event>/error
+
+Possible PM notifier events to be failed are:
+
+ * PM_HIBERNATION_PREPARE
+ * PM_SUSPEND_PREPARE
+ * PM_RESTORE_PREPARE
+
+Example: Inject PM suspend error (-12 = -ENOMEM)::
+
+	# cd /sys/kernel/debug/notifier-error-inject/pm/
+	# echo -12 > actions/PM_SUSPEND_PREPARE/error
+	# echo mem > /sys/power/state
+	bash: echo: write error: Cannot allocate memory
+
+Memory hotplug notifier error injection module
+----------------------------------------------
+This feature is controlled through debugfs interface
+
+  /sys/kernel/debug/notifier-error-inject/memory/actions/<notifier event>/error
+
+Possible memory notifier events to be failed are:
+
+ * MEM_GOING_ONLINE
+ * MEM_GOING_OFFLINE
+
+Example: Inject memory hotplug offline error (-12 == -ENOMEM)::
+
+	# cd /sys/kernel/debug/notifier-error-inject/memory
+	# echo -12 > actions/MEM_GOING_OFFLINE/error
+	# echo offline > /sys/devices/system/memory/memoryXXX/state
+	bash: echo: write error: Cannot allocate memory
+
+powerpc pSeries reconfig notifier error injection module
+--------------------------------------------------------
+This feature is controlled through debugfs interface
+
+  /sys/kernel/debug/notifier-error-inject/pSeries-reconfig/actions/<notifier event>/error
+
+Possible pSeries reconfig notifier events to be failed are:
+
+ * PSERIES_RECONFIG_ADD
+ * PSERIES_RECONFIG_REMOVE
+ * PSERIES_DRCONF_MEM_ADD
+ * PSERIES_DRCONF_MEM_REMOVE
+
+Netdevice notifier error injection module
+----------------------------------------------
+This feature is controlled through debugfs interface
+
+  /sys/kernel/debug/notifier-error-inject/netdev/actions/<notifier event>/error
+
+Netdevice notifier events which can be failed are:
+
+ * NETDEV_REGISTER
+ * NETDEV_CHANGEMTU
+ * NETDEV_CHANGENAME
+ * NETDEV_PRE_UP
+ * NETDEV_PRE_TYPE_CHANGE
+ * NETDEV_POST_INIT
+ * NETDEV_PRECHANGEMTU
+ * NETDEV_PRECHANGEUPPER
+ * NETDEV_CHANGEUPPER
+
+Example: Inject netdevice mtu change error (-22 == -EINVAL)::
+
+	# cd /sys/kernel/debug/notifier-error-inject/netdev
+	# echo -22 > actions/NETDEV_CHANGEMTU/error
+	# ip link set eth0 mtu 1024
+	RTNETLINK answers: Invalid argument
+
+For more usage examples
+-----------------------
+There are tools/testing/selftests using the notifier error injection features
+for CPU and memory notifiers.
+
+ * tools/testing/selftests/cpu-hotplug/on-off-test.sh
+ * tools/testing/selftests/memory-hotplug/on-off-test.sh
+
+These scripts first do simple online and offline tests and then do fault
+injection tests if notifier error injection module is available.
diff --git a/Documentation/fault-injection/notifier-error-inject.txt b/Documentation/fault-injection/notifier-error-inject.txt
deleted file mode 100644
index e861d761de24..000000000000
--- a/Documentation/fault-injection/notifier-error-inject.txt
+++ /dev/null
@@ -1,94 +0,0 @@
-Notifier error injection
-========================
-
-Notifier error injection provides the ability to inject artificial errors to
-specified notifier chain callbacks. It is useful to test the error handling of
-notifier call chain failures which is rarely executed.  There are kernel
-modules that can be used to test the following notifiers.
-
- * PM notifier
- * Memory hotplug notifier
- * powerpc pSeries reconfig notifier
- * Netdevice notifier
-
-PM notifier error injection module
-----------------------------------
-This feature is controlled through debugfs interface
-/sys/kernel/debug/notifier-error-inject/pm/actions/<notifier event>/error
-
-Possible PM notifier events to be failed are:
-
- * PM_HIBERNATION_PREPARE
- * PM_SUSPEND_PREPARE
- * PM_RESTORE_PREPARE
-
-Example: Inject PM suspend error (-12 = -ENOMEM)
-
-	# cd /sys/kernel/debug/notifier-error-inject/pm/
-	# echo -12 > actions/PM_SUSPEND_PREPARE/error
-	# echo mem > /sys/power/state
-	bash: echo: write error: Cannot allocate memory
-
-Memory hotplug notifier error injection module
-----------------------------------------------
-This feature is controlled through debugfs interface
-/sys/kernel/debug/notifier-error-inject/memory/actions/<notifier event>/error
-
-Possible memory notifier events to be failed are:
-
- * MEM_GOING_ONLINE
- * MEM_GOING_OFFLINE
-
-Example: Inject memory hotplug offline error (-12 == -ENOMEM)
-
-	# cd /sys/kernel/debug/notifier-error-inject/memory
-	# echo -12 > actions/MEM_GOING_OFFLINE/error
-	# echo offline > /sys/devices/system/memory/memoryXXX/state
-	bash: echo: write error: Cannot allocate memory
-
-powerpc pSeries reconfig notifier error injection module
---------------------------------------------------------
-This feature is controlled through debugfs interface
-/sys/kernel/debug/notifier-error-inject/pSeries-reconfig/actions/<notifier event>/error
-
-Possible pSeries reconfig notifier events to be failed are:
-
- * PSERIES_RECONFIG_ADD
- * PSERIES_RECONFIG_REMOVE
- * PSERIES_DRCONF_MEM_ADD
- * PSERIES_DRCONF_MEM_REMOVE
-
-Netdevice notifier error injection module
-----------------------------------------------
-This feature is controlled through debugfs interface
-/sys/kernel/debug/notifier-error-inject/netdev/actions/<notifier event>/error
-
-Netdevice notifier events which can be failed are:
-
- * NETDEV_REGISTER
- * NETDEV_CHANGEMTU
- * NETDEV_CHANGENAME
- * NETDEV_PRE_UP
- * NETDEV_PRE_TYPE_CHANGE
- * NETDEV_POST_INIT
- * NETDEV_PRECHANGEMTU
- * NETDEV_PRECHANGEUPPER
- * NETDEV_CHANGEUPPER
-
-Example: Inject netdevice mtu change error (-22 == -EINVAL)
-
-	# cd /sys/kernel/debug/notifier-error-inject/netdev
-	# echo -22 > actions/NETDEV_CHANGEMTU/error
-	# ip link set eth0 mtu 1024
-	RTNETLINK answers: Invalid argument
-
-For more usage examples
------------------------
-There are tools/testing/selftests using the notifier error injection features
-for CPU and memory notifiers.
-
- * tools/testing/selftests/cpu-hotplug/on-off-test.sh
- * tools/testing/selftests/memory-hotplug/on-off-test.sh
-
-These scripts first do simple online and offline tests and then do fault
-injection tests if notifier error injection module is available.
diff --git a/Documentation/fault-injection/nvme-fault-injection.rst b/Documentation/fault-injection/nvme-fault-injection.rst
new file mode 100644
index 000000000000..bbb1bf3e8650
--- /dev/null
+++ b/Documentation/fault-injection/nvme-fault-injection.rst
@@ -0,0 +1,120 @@
+NVMe Fault Injection
+====================
+Linux's fault injection framework provides a systematic way to support
+error injection via debugfs in the /sys/kernel/debug directory. When
+enabled, the default NVME_SC_INVALID_OPCODE with no retry will be
+injected into the nvme_end_request. Users can change the default status
+code and no retry flag via the debugfs. The list of Generic Command
+Status can be found in include/linux/nvme.h
+
+Following examples show how to inject an error into the nvme.
+
+First, enable CONFIG_FAULT_INJECTION_DEBUG_FS kernel config,
+recompile the kernel. After booting up the kernel, do the
+following.
+
+Example 1: Inject default status code with no retry
+---------------------------------------------------
+
+::
+
+  mount /dev/nvme0n1 /mnt
+  echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/times
+  echo 100 > /sys/kernel/debug/nvme0n1/fault_inject/probability
+  cp a.file /mnt
+
+Expected Result::
+
+  cp: cannot stat ‘/mnt/a.file’: Input/output error
+
+Message from dmesg::
+
+  FAULT_INJECTION: forcing a failure.
+  name fault_inject, interval 1, probability 100, space 0, times 1
+  CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.15.0-rc8+ #2
+  Hardware name: innotek GmbH VirtualBox/VirtualBox,
+  BIOS VirtualBox 12/01/2006
+  Call Trace:
+    <IRQ>
+    dump_stack+0x5c/0x7d
+    should_fail+0x148/0x170
+    nvme_should_fail+0x2f/0x50 [nvme_core]
+    nvme_process_cq+0xe7/0x1d0 [nvme]
+    nvme_irq+0x1e/0x40 [nvme]
+    __handle_irq_event_percpu+0x3a/0x190
+    handle_irq_event_percpu+0x30/0x70
+    handle_irq_event+0x36/0x60
+    handle_fasteoi_irq+0x78/0x120
+    handle_irq+0xa7/0x130
+    ? tick_irq_enter+0xa8/0xc0
+    do_IRQ+0x43/0xc0
+    common_interrupt+0xa2/0xa2
+    </IRQ>
+  RIP: 0010:native_safe_halt+0x2/0x10
+  RSP: 0018:ffffffff82003e90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffdd
+  RAX: ffffffff817a10c0 RBX: ffffffff82012480 RCX: 0000000000000000
+  RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
+  RBP: 0000000000000000 R08: 000000008e38ce64 R09: 0000000000000000
+  R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff82012480
+  R13: ffffffff82012480 R14: 0000000000000000 R15: 0000000000000000
+    ? __sched_text_end+0x4/0x4
+    default_idle+0x18/0xf0
+    do_idle+0x150/0x1d0
+    cpu_startup_entry+0x6f/0x80
+    start_kernel+0x4c4/0x4e4
+    ? set_init_arg+0x55/0x55
+    secondary_startup_64+0xa5/0xb0
+    print_req_error: I/O error, dev nvme0n1, sector 9240
+  EXT4-fs error (device nvme0n1): ext4_find_entry:1436:
+  inode #2: comm cp: reading directory lblock 0
+
+Example 2: Inject default status code with retry
+------------------------------------------------
+
+::
+
+  mount /dev/nvme0n1 /mnt
+  echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/times
+  echo 100 > /sys/kernel/debug/nvme0n1/fault_inject/probability
+  echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/status
+  echo 0 > /sys/kernel/debug/nvme0n1/fault_inject/dont_retry
+
+  cp a.file /mnt
+
+Expected Result::
+
+  command success without error
+
+Message from dmesg::
+
+  FAULT_INJECTION: forcing a failure.
+  name fault_inject, interval 1, probability 100, space 0, times 1
+  CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.15.0-rc8+ #4
+  Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+  Call Trace:
+    <IRQ>
+    dump_stack+0x5c/0x7d
+    should_fail+0x148/0x170
+    nvme_should_fail+0x30/0x60 [nvme_core]
+    nvme_loop_queue_response+0x84/0x110 [nvme_loop]
+    nvmet_req_complete+0x11/0x40 [nvmet]
+    nvmet_bio_done+0x28/0x40 [nvmet]
+    blk_update_request+0xb0/0x310
+    blk_mq_end_request+0x18/0x60
+    flush_smp_call_function_queue+0x3d/0xf0
+    smp_call_function_single_interrupt+0x2c/0xc0
+    call_function_single_interrupt+0xa2/0xb0
+    </IRQ>
+  RIP: 0010:native_safe_halt+0x2/0x10
+  RSP: 0018:ffffc9000068bec0 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff04
+  RAX: ffffffff817a10c0 RBX: ffff88011a3c9680 RCX: 0000000000000000
+  RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
+  RBP: 0000000000000001 R08: 000000008e38c131 R09: 0000000000000000
+  R10: 0000000000000000 R11: 0000000000000000 R12: ffff88011a3c9680
+  R13: ffff88011a3c9680 R14: 0000000000000000 R15: 0000000000000000
+    ? __sched_text_end+0x4/0x4
+    default_idle+0x18/0xf0
+    do_idle+0x150/0x1d0
+    cpu_startup_entry+0x6f/0x80
+    start_secondary+0x187/0x1e0
+    secondary_startup_64+0xa5/0xb0
diff --git a/Documentation/fault-injection/nvme-fault-injection.txt b/Documentation/fault-injection/nvme-fault-injection.txt
deleted file mode 100644
index 8fbf3bf60b62..000000000000
--- a/Documentation/fault-injection/nvme-fault-injection.txt
+++ /dev/null
@@ -1,116 +0,0 @@
-NVMe Fault Injection
-====================
-Linux's fault injection framework provides a systematic way to support
-error injection via debugfs in the /sys/kernel/debug directory. When
-enabled, the default NVME_SC_INVALID_OPCODE with no retry will be
-injected into the nvme_end_request. Users can change the default status
-code and no retry flag via the debugfs. The list of Generic Command
-Status can be found in include/linux/nvme.h
-
-Following examples show how to inject an error into the nvme.
-
-First, enable CONFIG_FAULT_INJECTION_DEBUG_FS kernel config,
-recompile the kernel. After booting up the kernel, do the
-following.
-
-Example 1: Inject default status code with no retry
----------------------------------------------------
-
-mount /dev/nvme0n1 /mnt
-echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/times
-echo 100 > /sys/kernel/debug/nvme0n1/fault_inject/probability
-cp a.file /mnt
-
-Expected Result:
-
-cp: cannot stat ‘/mnt/a.file’: Input/output error
-
-Message from dmesg:
-
-FAULT_INJECTION: forcing a failure.
-name fault_inject, interval 1, probability 100, space 0, times 1
-CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.15.0-rc8+ #2
-Hardware name: innotek GmbH VirtualBox/VirtualBox,
-BIOS VirtualBox 12/01/2006
-Call Trace:
-  <IRQ>
-  dump_stack+0x5c/0x7d
-  should_fail+0x148/0x170
-  nvme_should_fail+0x2f/0x50 [nvme_core]
-  nvme_process_cq+0xe7/0x1d0 [nvme]
-  nvme_irq+0x1e/0x40 [nvme]
-  __handle_irq_event_percpu+0x3a/0x190
-  handle_irq_event_percpu+0x30/0x70
-  handle_irq_event+0x36/0x60
-  handle_fasteoi_irq+0x78/0x120
-  handle_irq+0xa7/0x130
-  ? tick_irq_enter+0xa8/0xc0
-  do_IRQ+0x43/0xc0
-  common_interrupt+0xa2/0xa2
-  </IRQ>
-RIP: 0010:native_safe_halt+0x2/0x10
-RSP: 0018:ffffffff82003e90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffdd
-RAX: ffffffff817a10c0 RBX: ffffffff82012480 RCX: 0000000000000000
-RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
-RBP: 0000000000000000 R08: 000000008e38ce64 R09: 0000000000000000
-R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff82012480
-R13: ffffffff82012480 R14: 0000000000000000 R15: 0000000000000000
-  ? __sched_text_end+0x4/0x4
-  default_idle+0x18/0xf0
-  do_idle+0x150/0x1d0
-  cpu_startup_entry+0x6f/0x80
-  start_kernel+0x4c4/0x4e4
-  ? set_init_arg+0x55/0x55
-  secondary_startup_64+0xa5/0xb0
-  print_req_error: I/O error, dev nvme0n1, sector 9240
-EXT4-fs error (device nvme0n1): ext4_find_entry:1436:
-inode #2: comm cp: reading directory lblock 0
-
-Example 2: Inject default status code with retry
-------------------------------------------------
-
-mount /dev/nvme0n1 /mnt
-echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/times
-echo 100 > /sys/kernel/debug/nvme0n1/fault_inject/probability
-echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/status
-echo 0 > /sys/kernel/debug/nvme0n1/fault_inject/dont_retry
-
-cp a.file /mnt
-
-Expected Result:
-
-command success without error
-
-Message from dmesg:
-
-FAULT_INJECTION: forcing a failure.
-name fault_inject, interval 1, probability 100, space 0, times 1
-CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.15.0-rc8+ #4
-Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
-Call Trace:
-  <IRQ>
-  dump_stack+0x5c/0x7d
-  should_fail+0x148/0x170
-  nvme_should_fail+0x30/0x60 [nvme_core]
-  nvme_loop_queue_response+0x84/0x110 [nvme_loop]
-  nvmet_req_complete+0x11/0x40 [nvmet]
-  nvmet_bio_done+0x28/0x40 [nvmet]
-  blk_update_request+0xb0/0x310
-  blk_mq_end_request+0x18/0x60
-  flush_smp_call_function_queue+0x3d/0xf0
-  smp_call_function_single_interrupt+0x2c/0xc0
-  call_function_single_interrupt+0xa2/0xb0
-  </IRQ>
-RIP: 0010:native_safe_halt+0x2/0x10
-RSP: 0018:ffffc9000068bec0 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff04
-RAX: ffffffff817a10c0 RBX: ffff88011a3c9680 RCX: 0000000000000000
-RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
-RBP: 0000000000000001 R08: 000000008e38c131 R09: 0000000000000000
-R10: 0000000000000000 R11: 0000000000000000 R12: ffff88011a3c9680
-R13: ffff88011a3c9680 R14: 0000000000000000 R15: 0000000000000000
-  ? __sched_text_end+0x4/0x4
-  default_idle+0x18/0xf0
-  do_idle+0x150/0x1d0
-  cpu_startup_entry+0x6f/0x80
-  start_secondary+0x187/0x1e0
-  secondary_startup_64+0xa5/0xb0
diff --git a/Documentation/fault-injection/provoke-crashes.rst b/Documentation/fault-injection/provoke-crashes.rst
new file mode 100644
index 000000000000..9279a3e12278
--- /dev/null
+++ b/Documentation/fault-injection/provoke-crashes.rst
@@ -0,0 +1,48 @@
+===============
+Provoke crashes
+===============
+
+The lkdtm module provides an interface to crash or injure the kernel at
+predefined crashpoints to evaluate the reliability of crash dumps obtained
+using different dumping solutions. The module uses KPROBEs to instrument
+crashing points, but can also crash the kernel directly without KRPOBE
+support.
+
+
+You can provide the way either through module arguments when inserting
+the module, or through a debugfs interface.
+
+Usage::
+
+	insmod lkdtm.ko [recur_count={>0}] cpoint_name=<> cpoint_type=<>
+			[cpoint_count={>0}]
+
+recur_count
+	Recursion level for the stack overflow test. Default is 10.
+
+cpoint_name
+	Crash point where the kernel is to be crashed. It can be
+	one of INT_HARDWARE_ENTRY, INT_HW_IRQ_EN, INT_TASKLET_ENTRY,
+	FS_DEVRW, MEM_SWAPOUT, TIMERADD, SCSI_DISPATCH_CMD,
+	IDE_CORE_CP, DIRECT
+
+cpoint_type
+	Indicates the action to be taken on hitting the crash point.
+	It can be one of PANIC, BUG, EXCEPTION, LOOP, OVERFLOW,
+	CORRUPT_STACK, UNALIGNED_LOAD_STORE_WRITE, OVERWRITE_ALLOCATION,
+	WRITE_AFTER_FREE,
+
+cpoint_count
+	Indicates the number of times the crash point is to be hit
+	to trigger an action. The default is 10.
+
+You can also induce failures by mounting debugfs and writing the type to
+<mountpoint>/provoke-crash/<crashpoint>. E.g.::
+
+  mount -t debugfs debugfs /mnt
+  echo EXCEPTION > /mnt/provoke-crash/INT_HARDWARE_ENTRY
+
+
+A special file is `DIRECT` which will induce the crash directly without
+KPROBE instrumentation. This mode is the only one available when the module
+is built on a kernel without KPROBEs support.
diff --git a/Documentation/fault-injection/provoke-crashes.txt b/Documentation/fault-injection/provoke-crashes.txt
deleted file mode 100644
index 7a9d3d81525b..000000000000
--- a/Documentation/fault-injection/provoke-crashes.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-The lkdtm module provides an interface to crash or injure the kernel at
-predefined crashpoints to evaluate the reliability of crash dumps obtained
-using different dumping solutions. The module uses KPROBEs to instrument
-crashing points, but can also crash the kernel directly without KRPOBE
-support.
-
-
-You can provide the way either through module arguments when inserting
-the module, or through a debugfs interface.
-
-Usage: insmod lkdtm.ko [recur_count={>0}] cpoint_name=<> cpoint_type=<>
-				[cpoint_count={>0}]
-
-  recur_count : Recursion level for the stack overflow test. Default is 10.
-
-  cpoint_name : Crash point where the kernel is to be crashed. It can be
-	 one of INT_HARDWARE_ENTRY, INT_HW_IRQ_EN, INT_TASKLET_ENTRY,
-	 FS_DEVRW, MEM_SWAPOUT, TIMERADD, SCSI_DISPATCH_CMD,
-	 IDE_CORE_CP, DIRECT
-
-  cpoint_type : Indicates the action to be taken on hitting the crash point.
-     It can be one of PANIC, BUG, EXCEPTION, LOOP, OVERFLOW,
-     CORRUPT_STACK, UNALIGNED_LOAD_STORE_WRITE, OVERWRITE_ALLOCATION,
-     WRITE_AFTER_FREE,
-
-  cpoint_count : Indicates the number of times the crash point is to be hit
-    to trigger an action. The default is 10.
-
-You can also induce failures by mounting debugfs and writing the type to
-<mountpoint>/provoke-crash/<crashpoint>. E.g.,
-
-  mount -t debugfs debugfs /mnt
-  echo EXCEPTION > /mnt/provoke-crash/INT_HARDWARE_ENTRY
-
-
-A special file is `DIRECT' which will induce the crash directly without
-KPROBE instrumentation. This mode is the only one available when the module
-is built on a kernel without KPROBEs support.
diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst
index 4b7a5ab3cec1..13dd893c9f88 100644
--- a/Documentation/process/4.Coding.rst
+++ b/Documentation/process/4.Coding.rst
@@ -298,7 +298,7 @@ enabled, a configurable percentage of memory allocations will be made to
 fail; these failures can be restricted to a specific range of code.
 Running with fault injection enabled allows the programmer to see how the
 code responds when things go badly.  See
-Documentation/fault-injection/fault-injection.txt for more information on
+Documentation/fault-injection/fault-injection.rst for more information on
 how to use this facility.
 
 Other kinds of errors can be found with the "sparse" static analysis tool.
diff --git a/Documentation/translations/it_IT/process/4.Coding.rst b/Documentation/translations/it_IT/process/4.Coding.rst
index c05b89e616dd..a5e36aa60448 100644
--- a/Documentation/translations/it_IT/process/4.Coding.rst
+++ b/Documentation/translations/it_IT/process/4.Coding.rst
@@ -314,7 +314,7 @@ di allocazione di memoria sarà destinata al fallimento; questi fallimenti
 possono essere ridotti ad uno specifico pezzo di codice.  Procedere con
 l'inserimento dei fallimenti attivo permette al programmatore di verificare
 come il codice risponde quando le cose vanno male.  Consultate:
-Documentation/fault-injection/fault-injection.txt per avere maggiori
+Documentation/fault-injection/fault-injection.rst per avere maggiori
 informazioni su come utilizzare questo strumento.
 
 Altre tipologie di errori possono essere riscontrati con lo strumento di
diff --git a/Documentation/translations/zh_CN/process/4.Coding.rst b/Documentation/translations/zh_CN/process/4.Coding.rst
index 8bb777941394..b82b1dde3122 100644
--- a/Documentation/translations/zh_CN/process/4.Coding.rst
+++ b/Documentation/translations/zh_CN/process/4.Coding.rst
@@ -205,7 +205,7 @@ Linus对这个问题给出了最佳答案:
 启用故障注入后，内存分配的可配置百分比将失败；这些失败可以限制在特定的代码
 范围内。在启用了故障注入的情况下运行，程序员可以看到当情况恶化时代码如何响
 应。有关如何使用此工具的详细信息，请参阅
-Documentation/fault-injection/fault-injection.txt。
+Documentation/fault-injection/fault-injection.rst。
 
 使用“sparse”静态分析工具可以发现其他类型的错误。对于sparse，可以警告程序员
 用户空间和内核空间地址之间的混淆、big endian和small endian数量的混合、在需
-- 
cgit 


From ab42b818954c040fa13639dc031d8541edcecb4b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:45 -0300
Subject: docs: fb: convert docs to ReST and rename to *.rst

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Also, removed the Maintained by, as requested by Geert.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/kernel-parameters.txt |   2 +-
 Documentation/fb/api.rst                        | 307 ++++++++++++++++
 Documentation/fb/api.txt                        | 306 ----------------
 Documentation/fb/arkfb.rst                      |  68 ++++
 Documentation/fb/arkfb.txt                      |  68 ----
 Documentation/fb/aty128fb.rst                   |  75 ++++
 Documentation/fb/aty128fb.txt                   |  72 ----
 Documentation/fb/cirrusfb.rst                   |  94 +++++
 Documentation/fb/cirrusfb.txt                   |  97 ------
 Documentation/fb/cmap_xfbdev.rst                |  56 +++
 Documentation/fb/cmap_xfbdev.txt                |  53 ---
 Documentation/fb/deferred_io.rst                |  79 +++++
 Documentation/fb/deferred_io.txt                |  75 ----
 Documentation/fb/efifb.rst                      |  39 +++
 Documentation/fb/efifb.txt                      |  37 --
 Documentation/fb/ep93xx-fb.rst                  | 140 ++++++++
 Documentation/fb/ep93xx-fb.txt                  | 135 --------
 Documentation/fb/fbcon.rst                      | 350 +++++++++++++++++++
 Documentation/fb/fbcon.txt                      | 347 -------------------
 Documentation/fb/framebuffer.rst                | 353 +++++++++++++++++++
 Documentation/fb/framebuffer.txt                | 343 ------------------
 Documentation/fb/gxfb.rst                       |  54 +++
 Documentation/fb/gxfb.txt                       |  52 ---
 Documentation/fb/index.rst                      |  50 +++
 Documentation/fb/intel810.rst                   | 287 +++++++++++++++
 Documentation/fb/intel810.txt                   | 278 ---------------
 Documentation/fb/intelfb.rst                    | 155 +++++++++
 Documentation/fb/intelfb.txt                    | 149 --------
 Documentation/fb/internals.rst                  |  86 +++++
 Documentation/fb/internals.txt                  |  82 -----
 Documentation/fb/lxfb.rst                       |  55 +++
 Documentation/fb/lxfb.txt                       |  52 ---
 Documentation/fb/matroxfb.rst                   | 443 ++++++++++++++++++++++++
 Documentation/fb/matroxfb.txt                   | 413 ----------------------
 Documentation/fb/metronomefb.rst                |  38 ++
 Documentation/fb/metronomefb.txt                |  36 --
 Documentation/fb/modedb.rst                     | 155 +++++++++
 Documentation/fb/modedb.txt                     | 151 --------
 Documentation/fb/pvr2fb.rst                     |  66 ++++
 Documentation/fb/pvr2fb.txt                     |  65 ----
 Documentation/fb/pxafb.rst                      | 173 +++++++++
 Documentation/fb/pxafb.txt                      | 142 --------
 Documentation/fb/s3fb.rst                       |  82 +++++
 Documentation/fb/s3fb.txt                       |  82 -----
 Documentation/fb/sa1100fb.rst                   |  40 +++
 Documentation/fb/sa1100fb.txt                   |  39 ---
 Documentation/fb/sh7760fb.rst                   | 130 +++++++
 Documentation/fb/sh7760fb.txt                   | 131 -------
 Documentation/fb/sisfb.rst                      | 160 +++++++++
 Documentation/fb/sisfb.txt                      | 158 ---------
 Documentation/fb/sm501.rst                      |  15 +
 Documentation/fb/sm501.txt                      |  10 -
 Documentation/fb/sm712fb.rst                    |  35 ++
 Documentation/fb/sm712fb.txt                    |  31 --
 Documentation/fb/sstfb.rst                      | 207 +++++++++++
 Documentation/fb/sstfb.txt                      | 174 ----------
 Documentation/fb/tgafb.rst                      |  71 ++++
 Documentation/fb/tgafb.txt                      |  69 ----
 Documentation/fb/tridentfb.rst                  |  78 +++++
 Documentation/fb/tridentfb.txt                  |  70 ----
 Documentation/fb/udlfb.rst                      | 162 +++++++++
 Documentation/fb/udlfb.txt                      | 159 ---------
 Documentation/fb/uvesafb.rst                    | 188 ++++++++++
 Documentation/fb/uvesafb.txt                    | 184 ----------
 Documentation/fb/vesafb.rst                     | 192 ++++++++++
 Documentation/fb/vesafb.txt                     | 181 ----------
 Documentation/fb/viafb.rst                      | 297 ++++++++++++++++
 Documentation/fb/viafb.txt                      | 252 --------------
 Documentation/fb/vt8623fb.rst                   |  64 ++++
 Documentation/fb/vt8623fb.txt                   |  64 ----
 70 files changed, 4845 insertions(+), 4558 deletions(-)
 create mode 100644 Documentation/fb/api.rst
 delete mode 100644 Documentation/fb/api.txt
 create mode 100644 Documentation/fb/arkfb.rst
 delete mode 100644 Documentation/fb/arkfb.txt
 create mode 100644 Documentation/fb/aty128fb.rst
 delete mode 100644 Documentation/fb/aty128fb.txt
 create mode 100644 Documentation/fb/cirrusfb.rst
 delete mode 100644 Documentation/fb/cirrusfb.txt
 create mode 100644 Documentation/fb/cmap_xfbdev.rst
 delete mode 100644 Documentation/fb/cmap_xfbdev.txt
 create mode 100644 Documentation/fb/deferred_io.rst
 delete mode 100644 Documentation/fb/deferred_io.txt
 create mode 100644 Documentation/fb/efifb.rst
 delete mode 100644 Documentation/fb/efifb.txt
 create mode 100644 Documentation/fb/ep93xx-fb.rst
 delete mode 100644 Documentation/fb/ep93xx-fb.txt
 create mode 100644 Documentation/fb/fbcon.rst
 delete mode 100644 Documentation/fb/fbcon.txt
 create mode 100644 Documentation/fb/framebuffer.rst
 delete mode 100644 Documentation/fb/framebuffer.txt
 create mode 100644 Documentation/fb/gxfb.rst
 delete mode 100644 Documentation/fb/gxfb.txt
 create mode 100644 Documentation/fb/index.rst
 create mode 100644 Documentation/fb/intel810.rst
 delete mode 100644 Documentation/fb/intel810.txt
 create mode 100644 Documentation/fb/intelfb.rst
 delete mode 100644 Documentation/fb/intelfb.txt
 create mode 100644 Documentation/fb/internals.rst
 delete mode 100644 Documentation/fb/internals.txt
 create mode 100644 Documentation/fb/lxfb.rst
 delete mode 100644 Documentation/fb/lxfb.txt
 create mode 100644 Documentation/fb/matroxfb.rst
 delete mode 100644 Documentation/fb/matroxfb.txt
 create mode 100644 Documentation/fb/metronomefb.rst
 delete mode 100644 Documentation/fb/metronomefb.txt
 create mode 100644 Documentation/fb/modedb.rst
 delete mode 100644 Documentation/fb/modedb.txt
 create mode 100644 Documentation/fb/pvr2fb.rst
 delete mode 100644 Documentation/fb/pvr2fb.txt
 create mode 100644 Documentation/fb/pxafb.rst
 delete mode 100644 Documentation/fb/pxafb.txt
 create mode 100644 Documentation/fb/s3fb.rst
 delete mode 100644 Documentation/fb/s3fb.txt
 create mode 100644 Documentation/fb/sa1100fb.rst
 delete mode 100644 Documentation/fb/sa1100fb.txt
 create mode 100644 Documentation/fb/sh7760fb.rst
 delete mode 100644 Documentation/fb/sh7760fb.txt
 create mode 100644 Documentation/fb/sisfb.rst
 delete mode 100644 Documentation/fb/sisfb.txt
 create mode 100644 Documentation/fb/sm501.rst
 delete mode 100644 Documentation/fb/sm501.txt
 create mode 100644 Documentation/fb/sm712fb.rst
 delete mode 100644 Documentation/fb/sm712fb.txt
 create mode 100644 Documentation/fb/sstfb.rst
 delete mode 100644 Documentation/fb/sstfb.txt
 create mode 100644 Documentation/fb/tgafb.rst
 delete mode 100644 Documentation/fb/tgafb.txt
 create mode 100644 Documentation/fb/tridentfb.rst
 delete mode 100644 Documentation/fb/tridentfb.txt
 create mode 100644 Documentation/fb/udlfb.rst
 delete mode 100644 Documentation/fb/udlfb.txt
 create mode 100644 Documentation/fb/uvesafb.rst
 delete mode 100644 Documentation/fb/uvesafb.txt
 create mode 100644 Documentation/fb/vesafb.rst
 delete mode 100644 Documentation/fb/vesafb.txt
 create mode 100644 Documentation/fb/viafb.rst
 delete mode 100644 Documentation/fb/viafb.txt
 create mode 100644 Documentation/fb/vt8623fb.rst
 delete mode 100644 Documentation/fb/vt8623fb.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 9b16b640ce48..83d6560f10f0 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5024,7 +5024,7 @@
 			vector=percpu: enable percpu vector domain
 
 	video=		[FB] Frame buffer configuration
-			See Documentation/fb/modedb.txt.
+			See Documentation/fb/modedb.rst.
 
 	video.brightness_switch_enabled= [0,1]
 			If set to 1, on receiving an ACPI notify event
diff --git a/Documentation/fb/api.rst b/Documentation/fb/api.rst
new file mode 100644
index 000000000000..79ec33dded74
--- /dev/null
+++ b/Documentation/fb/api.rst
@@ -0,0 +1,307 @@
+===========================
+The Frame Buffer Device API
+===========================
+
+Last revised: June 21, 2011
+
+
+0. Introduction
+---------------
+
+This document describes the frame buffer API used by applications to interact
+with frame buffer devices. In-kernel APIs between device drivers and the frame
+buffer core are not described.
+
+Due to a lack of documentation in the original frame buffer API, drivers
+behaviours differ in subtle (and not so subtle) ways. This document describes
+the recommended API implementation, but applications should be prepared to
+deal with different behaviours.
+
+
+1. Capabilities
+---------------
+
+Device and driver capabilities are reported in the fixed screen information
+capabilities field::
+
+  struct fb_fix_screeninfo {
+	...
+	__u16 capabilities;		/* see FB_CAP_*			*/
+	...
+  };
+
+Application should use those capabilities to find out what features they can
+expect from the device and driver.
+
+- FB_CAP_FOURCC
+
+The driver supports the four character code (FOURCC) based format setting API.
+When supported, formats are configured using a FOURCC instead of manually
+specifying color components layout.
+
+
+2. Types and visuals
+--------------------
+
+Pixels are stored in memory in hardware-dependent formats. Applications need
+to be aware of the pixel storage format in order to write image data to the
+frame buffer memory in the format expected by the hardware.
+
+Formats are described by frame buffer types and visuals. Some visuals require
+additional information, which are stored in the variable screen information
+bits_per_pixel, grayscale, red, green, blue and transp fields.
+
+Visuals describe how color information is encoded and assembled to create
+macropixels. Types describe how macropixels are stored in memory. The following
+types and visuals are supported.
+
+- FB_TYPE_PACKED_PIXELS
+
+Macropixels are stored contiguously in a single plane. If the number of bits
+per macropixel is not a multiple of 8, whether macropixels are padded to the
+next multiple of 8 bits or packed together into bytes depends on the visual.
+
+Padding at end of lines may be present and is then reported through the fixed
+screen information line_length field.
+
+- FB_TYPE_PLANES
+
+Macropixels are split across multiple planes. The number of planes is equal to
+the number of bits per macropixel, with plane i'th storing i'th bit from all
+macropixels.
+
+Planes are located contiguously in memory.
+
+- FB_TYPE_INTERLEAVED_PLANES
+
+Macropixels are split across multiple planes. The number of planes is equal to
+the number of bits per macropixel, with plane i'th storing i'th bit from all
+macropixels.
+
+Planes are interleaved in memory. The interleave factor, defined as the
+distance in bytes between the beginning of two consecutive interleaved blocks
+belonging to different planes, is stored in the fixed screen information
+type_aux field.
+
+- FB_TYPE_FOURCC
+
+Macropixels are stored in memory as described by the format FOURCC identifier
+stored in the variable screen information grayscale field.
+
+- FB_VISUAL_MONO01
+
+Pixels are black or white and stored on a number of bits (typically one)
+specified by the variable screen information bpp field.
+
+Black pixels are represented by all bits set to 1 and white pixels by all bits
+set to 0. When the number of bits per pixel is smaller than 8, several pixels
+are packed together in a byte.
+
+FB_VISUAL_MONO01 is currently used with FB_TYPE_PACKED_PIXELS only.
+
+- FB_VISUAL_MONO10
+
+Pixels are black or white and stored on a number of bits (typically one)
+specified by the variable screen information bpp field.
+
+Black pixels are represented by all bits set to 0 and white pixels by all bits
+set to 1. When the number of bits per pixel is smaller than 8, several pixels
+are packed together in a byte.
+
+FB_VISUAL_MONO01 is currently used with FB_TYPE_PACKED_PIXELS only.
+
+- FB_VISUAL_TRUECOLOR
+
+Pixels are broken into red, green and blue components, and each component
+indexes a read-only lookup table for the corresponding value. Lookup tables
+are device-dependent, and provide linear or non-linear ramps.
+
+Each component is stored in a macropixel according to the variable screen
+information red, green, blue and transp fields.
+
+- FB_VISUAL_PSEUDOCOLOR and FB_VISUAL_STATIC_PSEUDOCOLOR
+
+Pixel values are encoded as indices into a colormap that stores red, green and
+blue components. The colormap is read-only for FB_VISUAL_STATIC_PSEUDOCOLOR
+and read-write for FB_VISUAL_PSEUDOCOLOR.
+
+Each pixel value is stored in the number of bits reported by the variable
+screen information bits_per_pixel field.
+
+- FB_VISUAL_DIRECTCOLOR
+
+Pixels are broken into red, green and blue components, and each component
+indexes a programmable lookup table for the corresponding value.
+
+Each component is stored in a macropixel according to the variable screen
+information red, green, blue and transp fields.
+
+- FB_VISUAL_FOURCC
+
+Pixels are encoded and  interpreted as described by the format FOURCC
+identifier stored in the variable screen information grayscale field.
+
+
+3. Screen information
+---------------------
+
+Screen information are queried by applications using the FBIOGET_FSCREENINFO
+and FBIOGET_VSCREENINFO ioctls. Those ioctls take a pointer to a
+fb_fix_screeninfo and fb_var_screeninfo structure respectively.
+
+struct fb_fix_screeninfo stores device independent unchangeable information
+about the frame buffer device and the current format. Those information can't
+be directly modified by applications, but can be changed by the driver when an
+application modifies the format::
+
+  struct fb_fix_screeninfo {
+	char id[16];			/* identification string eg "TT Builtin" */
+	unsigned long smem_start;	/* Start of frame buffer mem */
+					/* (physical address) */
+	__u32 smem_len;			/* Length of frame buffer mem */
+	__u32 type;			/* see FB_TYPE_*		*/
+	__u32 type_aux;			/* Interleave for interleaved Planes */
+	__u32 visual;			/* see FB_VISUAL_*		*/
+	__u16 xpanstep;			/* zero if no hardware panning  */
+	__u16 ypanstep;			/* zero if no hardware panning  */
+	__u16 ywrapstep;		/* zero if no hardware ywrap    */
+	__u32 line_length;		/* length of a line in bytes    */
+	unsigned long mmio_start;	/* Start of Memory Mapped I/O   */
+					/* (physical address) */
+	__u32 mmio_len;			/* Length of Memory Mapped I/O  */
+	__u32 accel;			/* Indicate to driver which	*/
+					/*  specific chip/card we have	*/
+	__u16 capabilities;		/* see FB_CAP_*			*/
+	__u16 reserved[2];		/* Reserved for future compatibility */
+  };
+
+struct fb_var_screeninfo stores device independent changeable information
+about a frame buffer device, its current format and video mode, as well as
+other miscellaneous parameters::
+
+  struct fb_var_screeninfo {
+	__u32 xres;			/* visible resolution		*/
+	__u32 yres;
+	__u32 xres_virtual;		/* virtual resolution		*/
+	__u32 yres_virtual;
+	__u32 xoffset;			/* offset from virtual to visible */
+	__u32 yoffset;			/* resolution			*/
+
+	__u32 bits_per_pixel;		/* guess what			*/
+	__u32 grayscale;		/* 0 = color, 1 = grayscale,	*/
+					/* >1 = FOURCC			*/
+	struct fb_bitfield red;		/* bitfield in fb mem if true color, */
+	struct fb_bitfield green;	/* else only length is significant */
+	struct fb_bitfield blue;
+	struct fb_bitfield transp;	/* transparency			*/
+
+	__u32 nonstd;			/* != 0 Non standard pixel format */
+
+	__u32 activate;			/* see FB_ACTIVATE_*		*/
+
+	__u32 height;			/* height of picture in mm    */
+	__u32 width;			/* width of picture in mm     */
+
+	__u32 accel_flags;		/* (OBSOLETE) see fb_info.flags */
+
+	/* Timing: All values in pixclocks, except pixclock (of course) */
+	__u32 pixclock;			/* pixel clock in ps (pico seconds) */
+	__u32 left_margin;		/* time from sync to picture	*/
+	__u32 right_margin;		/* time from picture to sync	*/
+	__u32 upper_margin;		/* time from sync to picture	*/
+	__u32 lower_margin;
+	__u32 hsync_len;		/* length of horizontal sync	*/
+	__u32 vsync_len;		/* length of vertical sync	*/
+	__u32 sync;			/* see FB_SYNC_*		*/
+	__u32 vmode;			/* see FB_VMODE_*		*/
+	__u32 rotate;			/* angle we rotate counter clockwise */
+	__u32 colorspace;		/* colorspace for FOURCC-based modes */
+	__u32 reserved[4];		/* Reserved for future compatibility */
+  };
+
+To modify variable information, applications call the FBIOPUT_VSCREENINFO
+ioctl with a pointer to a fb_var_screeninfo structure. If the call is
+successful, the driver will update the fixed screen information accordingly.
+
+Instead of filling the complete fb_var_screeninfo structure manually,
+applications should call the FBIOGET_VSCREENINFO ioctl and modify only the
+fields they care about.
+
+
+4. Format configuration
+-----------------------
+
+Frame buffer devices offer two ways to configure the frame buffer format: the
+legacy API and the FOURCC-based API.
+
+
+The legacy API has been the only frame buffer format configuration API for a
+long time and is thus widely used by application. It is the recommended API
+for applications when using RGB and grayscale formats, as well as legacy
+non-standard formats.
+
+To select a format, applications set the fb_var_screeninfo bits_per_pixel field
+to the desired frame buffer depth. Values up to 8 will usually map to
+monochrome, grayscale or pseudocolor visuals, although this is not required.
+
+- For grayscale formats, applications set the grayscale field to one. The red,
+  blue, green and transp fields must be set to 0 by applications and ignored by
+  drivers. Drivers must fill the red, blue and green offsets to 0 and lengths
+  to the bits_per_pixel value.
+
+- For pseudocolor formats, applications set the grayscale field to zero. The
+  red, blue, green and transp fields must be set to 0 by applications and
+  ignored by drivers. Drivers must fill the red, blue and green offsets to 0
+  and lengths to the bits_per_pixel value.
+
+- For truecolor and directcolor formats, applications set the grayscale field
+  to zero, and the red, blue, green and transp fields to describe the layout of
+  color components in memory::
+
+    struct fb_bitfield {
+	__u32 offset;			/* beginning of bitfield	*/
+	__u32 length;			/* length of bitfield		*/
+	__u32 msb_right;		/* != 0 : Most significant bit is */
+					/* right */
+    };
+
+  Pixel values are bits_per_pixel wide and are split in non-overlapping red,
+  green, blue and alpha (transparency) components. Location and size of each
+  component in the pixel value are described by the fb_bitfield offset and
+  length fields. Offset are computed from the right.
+
+  Pixels are always stored in an integer number of bytes. If the number of
+  bits per pixel is not a multiple of 8, pixel values are padded to the next
+  multiple of 8 bits.
+
+Upon successful format configuration, drivers update the fb_fix_screeninfo
+type, visual and line_length fields depending on the selected format.
+
+
+The FOURCC-based API replaces format descriptions by four character codes
+(FOURCC). FOURCCs are abstract identifiers that uniquely define a format
+without explicitly describing it. This is the only API that supports YUV
+formats. Drivers are also encouraged to implement the FOURCC-based API for RGB
+and grayscale formats.
+
+Drivers that support the FOURCC-based API report this capability by setting
+the FB_CAP_FOURCC bit in the fb_fix_screeninfo capabilities field.
+
+FOURCC definitions are located in the linux/videodev2.h header. However, and
+despite starting with the V4L2_PIX_FMT_prefix, they are not restricted to V4L2
+and don't require usage of the V4L2 subsystem. FOURCC documentation is
+available in Documentation/media/uapi/v4l/pixfmt.rst.
+
+To select a format, applications set the grayscale field to the desired FOURCC.
+For YUV formats, they should also select the appropriate colorspace by setting
+the colorspace field to one of the colorspaces listed in linux/videodev2.h and
+documented in Documentation/media/uapi/v4l/colorspaces.rst.
+
+The red, green, blue and transp fields are not used with the FOURCC-based API.
+For forward compatibility reasons applications must zero those fields, and
+drivers must ignore them. Values other than 0 may get a meaning in future
+extensions.
+
+Upon successful format configuration, drivers update the fb_fix_screeninfo
+type, visual and line_length fields depending on the selected format. The type
+and visual fields are set to FB_TYPE_FOURCC and FB_VISUAL_FOURCC respectively.
diff --git a/Documentation/fb/api.txt b/Documentation/fb/api.txt
deleted file mode 100644
index d52cf1e3b975..000000000000
--- a/Documentation/fb/api.txt
+++ /dev/null
@@ -1,306 +0,0 @@
-			The Frame Buffer Device API
-			---------------------------
-
-Last revised: June 21, 2011
-
-
-0. Introduction
----------------
-
-This document describes the frame buffer API used by applications to interact
-with frame buffer devices. In-kernel APIs between device drivers and the frame
-buffer core are not described.
-
-Due to a lack of documentation in the original frame buffer API, drivers
-behaviours differ in subtle (and not so subtle) ways. This document describes
-the recommended API implementation, but applications should be prepared to
-deal with different behaviours.
-
-
-1. Capabilities
----------------
-
-Device and driver capabilities are reported in the fixed screen information
-capabilities field.
-
-struct fb_fix_screeninfo {
-	...
-	__u16 capabilities;		/* see FB_CAP_*			*/
-	...
-};
-
-Application should use those capabilities to find out what features they can
-expect from the device and driver.
-
-- FB_CAP_FOURCC
-
-The driver supports the four character code (FOURCC) based format setting API.
-When supported, formats are configured using a FOURCC instead of manually
-specifying color components layout.
-
-
-2. Types and visuals
---------------------
-
-Pixels are stored in memory in hardware-dependent formats. Applications need
-to be aware of the pixel storage format in order to write image data to the
-frame buffer memory in the format expected by the hardware.
-
-Formats are described by frame buffer types and visuals. Some visuals require
-additional information, which are stored in the variable screen information
-bits_per_pixel, grayscale, red, green, blue and transp fields.
-
-Visuals describe how color information is encoded and assembled to create
-macropixels. Types describe how macropixels are stored in memory. The following
-types and visuals are supported.
-
-- FB_TYPE_PACKED_PIXELS
-
-Macropixels are stored contiguously in a single plane. If the number of bits
-per macropixel is not a multiple of 8, whether macropixels are padded to the
-next multiple of 8 bits or packed together into bytes depends on the visual.
-
-Padding at end of lines may be present and is then reported through the fixed
-screen information line_length field.
-
-- FB_TYPE_PLANES
-
-Macropixels are split across multiple planes. The number of planes is equal to
-the number of bits per macropixel, with plane i'th storing i'th bit from all
-macropixels.
-
-Planes are located contiguously in memory.
-
-- FB_TYPE_INTERLEAVED_PLANES
-
-Macropixels are split across multiple planes. The number of planes is equal to
-the number of bits per macropixel, with plane i'th storing i'th bit from all
-macropixels.
-
-Planes are interleaved in memory. The interleave factor, defined as the
-distance in bytes between the beginning of two consecutive interleaved blocks
-belonging to different planes, is stored in the fixed screen information
-type_aux field.
-
-- FB_TYPE_FOURCC
-
-Macropixels are stored in memory as described by the format FOURCC identifier
-stored in the variable screen information grayscale field.
-
-- FB_VISUAL_MONO01
-
-Pixels are black or white and stored on a number of bits (typically one)
-specified by the variable screen information bpp field.
-
-Black pixels are represented by all bits set to 1 and white pixels by all bits
-set to 0. When the number of bits per pixel is smaller than 8, several pixels
-are packed together in a byte.
-
-FB_VISUAL_MONO01 is currently used with FB_TYPE_PACKED_PIXELS only.
-
-- FB_VISUAL_MONO10
-
-Pixels are black or white and stored on a number of bits (typically one)
-specified by the variable screen information bpp field.
-
-Black pixels are represented by all bits set to 0 and white pixels by all bits
-set to 1. When the number of bits per pixel is smaller than 8, several pixels
-are packed together in a byte.
-
-FB_VISUAL_MONO01 is currently used with FB_TYPE_PACKED_PIXELS only.
-
-- FB_VISUAL_TRUECOLOR
-
-Pixels are broken into red, green and blue components, and each component
-indexes a read-only lookup table for the corresponding value. Lookup tables
-are device-dependent, and provide linear or non-linear ramps.
-
-Each component is stored in a macropixel according to the variable screen
-information red, green, blue and transp fields.
-
-- FB_VISUAL_PSEUDOCOLOR and FB_VISUAL_STATIC_PSEUDOCOLOR
-
-Pixel values are encoded as indices into a colormap that stores red, green and
-blue components. The colormap is read-only for FB_VISUAL_STATIC_PSEUDOCOLOR
-and read-write for FB_VISUAL_PSEUDOCOLOR.
-
-Each pixel value is stored in the number of bits reported by the variable
-screen information bits_per_pixel field.
-
-- FB_VISUAL_DIRECTCOLOR
-
-Pixels are broken into red, green and blue components, and each component
-indexes a programmable lookup table for the corresponding value.
-
-Each component is stored in a macropixel according to the variable screen
-information red, green, blue and transp fields.
-
-- FB_VISUAL_FOURCC
-
-Pixels are encoded and  interpreted as described by the format FOURCC
-identifier stored in the variable screen information grayscale field.
-
-
-3. Screen information
----------------------
-
-Screen information are queried by applications using the FBIOGET_FSCREENINFO
-and FBIOGET_VSCREENINFO ioctls. Those ioctls take a pointer to a
-fb_fix_screeninfo and fb_var_screeninfo structure respectively.
-
-struct fb_fix_screeninfo stores device independent unchangeable information
-about the frame buffer device and the current format. Those information can't
-be directly modified by applications, but can be changed by the driver when an
-application modifies the format.
-
-struct fb_fix_screeninfo {
-	char id[16];			/* identification string eg "TT Builtin" */
-	unsigned long smem_start;	/* Start of frame buffer mem */
-					/* (physical address) */
-	__u32 smem_len;			/* Length of frame buffer mem */
-	__u32 type;			/* see FB_TYPE_*		*/
-	__u32 type_aux;			/* Interleave for interleaved Planes */
-	__u32 visual;			/* see FB_VISUAL_*		*/
-	__u16 xpanstep;			/* zero if no hardware panning  */
-	__u16 ypanstep;			/* zero if no hardware panning  */
-	__u16 ywrapstep;		/* zero if no hardware ywrap    */
-	__u32 line_length;		/* length of a line in bytes    */
-	unsigned long mmio_start;	/* Start of Memory Mapped I/O   */
-					/* (physical address) */
-	__u32 mmio_len;			/* Length of Memory Mapped I/O  */
-	__u32 accel;			/* Indicate to driver which	*/
-					/*  specific chip/card we have	*/
-	__u16 capabilities;		/* see FB_CAP_*			*/
-	__u16 reserved[2];		/* Reserved for future compatibility */
-};
-
-struct fb_var_screeninfo stores device independent changeable information
-about a frame buffer device, its current format and video mode, as well as
-other miscellaneous parameters.
-
-struct fb_var_screeninfo {
-	__u32 xres;			/* visible resolution		*/
-	__u32 yres;
-	__u32 xres_virtual;		/* virtual resolution		*/
-	__u32 yres_virtual;
-	__u32 xoffset;			/* offset from virtual to visible */
-	__u32 yoffset;			/* resolution			*/
-
-	__u32 bits_per_pixel;		/* guess what			*/
-	__u32 grayscale;		/* 0 = color, 1 = grayscale,	*/
-					/* >1 = FOURCC			*/
-	struct fb_bitfield red;		/* bitfield in fb mem if true color, */
-	struct fb_bitfield green;	/* else only length is significant */
-	struct fb_bitfield blue;
-	struct fb_bitfield transp;	/* transparency			*/
-
-	__u32 nonstd;			/* != 0 Non standard pixel format */
-
-	__u32 activate;			/* see FB_ACTIVATE_*		*/
-
-	__u32 height;			/* height of picture in mm    */
-	__u32 width;			/* width of picture in mm     */
-
-	__u32 accel_flags;		/* (OBSOLETE) see fb_info.flags */
-
-	/* Timing: All values in pixclocks, except pixclock (of course) */
-	__u32 pixclock;			/* pixel clock in ps (pico seconds) */
-	__u32 left_margin;		/* time from sync to picture	*/
-	__u32 right_margin;		/* time from picture to sync	*/
-	__u32 upper_margin;		/* time from sync to picture	*/
-	__u32 lower_margin;
-	__u32 hsync_len;		/* length of horizontal sync	*/
-	__u32 vsync_len;		/* length of vertical sync	*/
-	__u32 sync;			/* see FB_SYNC_*		*/
-	__u32 vmode;			/* see FB_VMODE_*		*/
-	__u32 rotate;			/* angle we rotate counter clockwise */
-	__u32 colorspace;		/* colorspace for FOURCC-based modes */
-	__u32 reserved[4];		/* Reserved for future compatibility */
-};
-
-To modify variable information, applications call the FBIOPUT_VSCREENINFO
-ioctl with a pointer to a fb_var_screeninfo structure. If the call is
-successful, the driver will update the fixed screen information accordingly.
-
-Instead of filling the complete fb_var_screeninfo structure manually,
-applications should call the FBIOGET_VSCREENINFO ioctl and modify only the
-fields they care about.
-
-
-4. Format configuration
------------------------
-
-Frame buffer devices offer two ways to configure the frame buffer format: the
-legacy API and the FOURCC-based API.
-
-
-The legacy API has been the only frame buffer format configuration API for a
-long time and is thus widely used by application. It is the recommended API
-for applications when using RGB and grayscale formats, as well as legacy
-non-standard formats.
-
-To select a format, applications set the fb_var_screeninfo bits_per_pixel field
-to the desired frame buffer depth. Values up to 8 will usually map to
-monochrome, grayscale or pseudocolor visuals, although this is not required.
-
-- For grayscale formats, applications set the grayscale field to one. The red,
-  blue, green and transp fields must be set to 0 by applications and ignored by
-  drivers. Drivers must fill the red, blue and green offsets to 0 and lengths
-  to the bits_per_pixel value.
-
-- For pseudocolor formats, applications set the grayscale field to zero. The
-  red, blue, green and transp fields must be set to 0 by applications and
-  ignored by drivers. Drivers must fill the red, blue and green offsets to 0
-  and lengths to the bits_per_pixel value.
-
-- For truecolor and directcolor formats, applications set the grayscale field
-  to zero, and the red, blue, green and transp fields to describe the layout of
-  color components in memory.
-
-struct fb_bitfield {
-	__u32 offset;			/* beginning of bitfield	*/
-	__u32 length;			/* length of bitfield		*/
-	__u32 msb_right;		/* != 0 : Most significant bit is */
-					/* right */
-};
-
-  Pixel values are bits_per_pixel wide and are split in non-overlapping red,
-  green, blue and alpha (transparency) components. Location and size of each
-  component in the pixel value are described by the fb_bitfield offset and
-  length fields. Offset are computed from the right.
-
-  Pixels are always stored in an integer number of bytes. If the number of
-  bits per pixel is not a multiple of 8, pixel values are padded to the next
-  multiple of 8 bits.
-
-Upon successful format configuration, drivers update the fb_fix_screeninfo
-type, visual and line_length fields depending on the selected format.
-
-
-The FOURCC-based API replaces format descriptions by four character codes
-(FOURCC). FOURCCs are abstract identifiers that uniquely define a format
-without explicitly describing it. This is the only API that supports YUV
-formats. Drivers are also encouraged to implement the FOURCC-based API for RGB
-and grayscale formats.
-
-Drivers that support the FOURCC-based API report this capability by setting
-the FB_CAP_FOURCC bit in the fb_fix_screeninfo capabilities field.
-
-FOURCC definitions are located in the linux/videodev2.h header. However, and
-despite starting with the V4L2_PIX_FMT_prefix, they are not restricted to V4L2
-and don't require usage of the V4L2 subsystem. FOURCC documentation is
-available in Documentation/media/uapi/v4l/pixfmt.rst.
-
-To select a format, applications set the grayscale field to the desired FOURCC.
-For YUV formats, they should also select the appropriate colorspace by setting
-the colorspace field to one of the colorspaces listed in linux/videodev2.h and
-documented in Documentation/media/uapi/v4l/colorspaces.rst.
-
-The red, green, blue and transp fields are not used with the FOURCC-based API.
-For forward compatibility reasons applications must zero those fields, and
-drivers must ignore them. Values other than 0 may get a meaning in future
-extensions.
-
-Upon successful format configuration, drivers update the fb_fix_screeninfo
-type, visual and line_length fields depending on the selected format. The type
-and visual fields are set to FB_TYPE_FOURCC and FB_VISUAL_FOURCC respectively.
diff --git a/Documentation/fb/arkfb.rst b/Documentation/fb/arkfb.rst
new file mode 100644
index 000000000000..aeca8773dd7e
--- /dev/null
+++ b/Documentation/fb/arkfb.rst
@@ -0,0 +1,68 @@
+========================================
+arkfb - fbdev driver for ARK Logic chips
+========================================
+
+
+Supported Hardware
+==================
+
+	ARK 2000PV chip
+	ICS 5342 ramdac
+
+	- only BIOS initialized VGA devices supported
+	- probably not working on big endian
+
+
+Supported Features
+==================
+
+	*  4 bpp pseudocolor modes (with 18bit palette, two variants)
+	*  8 bpp pseudocolor mode (with 18bit palette)
+	* 16 bpp truecolor modes (RGB 555 and RGB 565)
+	* 24 bpp truecolor mode (RGB 888)
+	* 32 bpp truecolor mode (RGB 888)
+	* text mode (activated by bpp = 0)
+	* doublescan mode variant (not available in text mode)
+	* panning in both directions
+	* suspend/resume support
+
+Text mode is supported even in higher resolutions, but there is limitation to
+lower pixclocks (i got maximum about 70 MHz, it is dependent on specific
+hardware). This limitation is not enforced by driver. Text mode supports 8bit
+wide fonts only (hardware limitation) and 16bit tall fonts (driver
+limitation). Unfortunately character attributes (like color) in text mode are
+broken for unknown reason, so its usefulness is limited.
+
+There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with
+packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode
+with interleaved planes (1 byte interleave), MSB first. Both modes support
+8bit wide fonts only (driver limitation).
+
+Suspend/resume works on systems that initialize video card during resume and
+if device is active (for example used by fbcon).
+
+
+Missing Features
+================
+(alias TODO list)
+
+	* secondary (not initialized by BIOS) device support
+	* big endian support
+	* DPMS support
+	* MMIO support
+	* interlaced mode variant
+	* support for fontwidths != 8 in 4 bpp modes
+	* support for fontheight != 16 in text mode
+	* hardware cursor
+	* vsync synchronization
+	* feature connector support
+	* acceleration support (8514-like 2D)
+
+
+Known bugs
+==========
+
+	* character attributes (and cursor) in text mode are broken
+
+--
+Ondrej Zajicek <santiago@crfreenet.org>
diff --git a/Documentation/fb/arkfb.txt b/Documentation/fb/arkfb.txt
deleted file mode 100644
index e8487a9d6a05..000000000000
--- a/Documentation/fb/arkfb.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-
-	arkfb - fbdev driver for ARK Logic chips
-	========================================
-
-
-Supported Hardware
-==================
-
-	ARK 2000PV chip
-	ICS 5342 ramdac
-
-	- only BIOS initialized VGA devices supported
-	- probably not working on big endian
-
-
-Supported Features
-==================
-
-	*  4 bpp pseudocolor modes (with 18bit palette, two variants)
-	*  8 bpp pseudocolor mode (with 18bit palette)
-	* 16 bpp truecolor modes (RGB 555 and RGB 565)
-	* 24 bpp truecolor mode (RGB 888)
-	* 32 bpp truecolor mode (RGB 888)
-	* text mode (activated by bpp = 0)
-	* doublescan mode variant (not available in text mode)
-	* panning in both directions
-	* suspend/resume support
-
-Text mode is supported even in higher resolutions, but there is limitation to
-lower pixclocks (i got maximum about 70 MHz, it is dependent on specific
-hardware). This limitation is not enforced by driver. Text mode supports 8bit
-wide fonts only (hardware limitation) and 16bit tall fonts (driver
-limitation). Unfortunately character attributes (like color) in text mode are
-broken for unknown reason, so its usefulness is limited.
-
-There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with
-packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode
-with interleaved planes (1 byte interleave), MSB first. Both modes support
-8bit wide fonts only (driver limitation).
-
-Suspend/resume works on systems that initialize video card during resume and
-if device is active (for example used by fbcon).
-
-
-Missing Features
-================
-(alias TODO list)
-
-	* secondary (not initialized by BIOS) device support
-   	* big endian support
-	* DPMS support
-	* MMIO support
-	* interlaced mode variant
-	* support for fontwidths != 8 in 4 bpp modes
-	* support for fontheight != 16 in text mode
-	* hardware cursor
-	* vsync synchronization
-	* feature connector support
-	* acceleration support (8514-like 2D)
-
-
-Known bugs
-==========
-
-	* character attributes (and cursor) in text mode are broken
-
---
-Ondrej Zajicek <santiago@crfreenet.org>
diff --git a/Documentation/fb/aty128fb.rst b/Documentation/fb/aty128fb.rst
new file mode 100644
index 000000000000..3f107718f933
--- /dev/null
+++ b/Documentation/fb/aty128fb.rst
@@ -0,0 +1,75 @@
+=================
+What is aty128fb?
+=================
+
+.. [This file is cloned from VesaFB/matroxfb]
+
+This is a driver for a graphic framebuffer for ATI Rage128 based devices
+on Intel and PPC boxes.
+
+Advantages:
+
+ * It provides a nice large console (128 cols + 48 lines with 1024x768)
+   without using tiny, unreadable fonts.
+ * You can run XF68_FBDev on top of /dev/fb0
+ * Most important: boot logo :-)
+
+Disadvantages:
+
+ * graphic mode is slower than text mode... but you should not notice
+   if you use same resolution as you used in textmode.
+ * still experimental.
+
+
+How to use it?
+==============
+
+Switching modes is done using the  video=aty128fb:<resolution>... modedb
+boot parameter or using `fbset` program.
+
+See Documentation/fb/modedb.rst for more information on modedb
+resolutions.
+
+You should compile in both vgacon (to boot if you remove your Rage128 from
+box) and aty128fb (for graphics mode). You should not compile-in vesafb
+unless you have primary display on non-Rage128 VBE2.0 device (see
+Documentation/fb/vesafb.rst for details).
+
+
+X11
+===
+
+XF68_FBDev should generally work fine, but it is non-accelerated. As of
+this document, 8 and 32bpp works fine.  There have been palette issues
+when switching from X to console and back to X.  You will have to restart
+X to fix this.
+
+
+Configuration
+=============
+
+You can pass kernel command line options to vesafb with
+`video=aty128fb:option1,option2:value2,option3` (multiple options should
+be separated by comma, values are separated from options by `:`).
+Accepted options:
+
+========= =======================================================
+noaccel   do not use acceleration engine. It is default.
+accel     use acceleration engine. Not finished.
+vmode:x   chooses PowerMacintosh video mode <x>. Deprecated.
+cmode:x   chooses PowerMacintosh colour mode <x>. Deprecated.
+<XxX@X>   selects startup videomode. See modedb.txt for detailed
+	  explanation. Default is 640x480x8bpp.
+========= =======================================================
+
+
+Limitations
+===========
+
+There are known and unknown bugs, features and misfeatures.
+Currently there are following known bugs:
+
+ - This driver is still experimental and is not finished.  Too many
+   bugs/errata to list here.
+
+Brad Douglas <brad@neruo.com>
diff --git a/Documentation/fb/aty128fb.txt b/Documentation/fb/aty128fb.txt
deleted file mode 100644
index b605204fcfe1..000000000000
--- a/Documentation/fb/aty128fb.txt
+++ /dev/null
@@ -1,72 +0,0 @@
-[This file is cloned from VesaFB/matroxfb]
-
-What is aty128fb?
-=================
-
-This is a driver for a graphic framebuffer for ATI Rage128 based devices
-on Intel and PPC boxes.
-
-Advantages:
-
- * It provides a nice large console (128 cols + 48 lines with 1024x768)
-   without using tiny, unreadable fonts.
- * You can run XF68_FBDev on top of /dev/fb0
- * Most important: boot logo :-)
-
-Disadvantages:
-
- * graphic mode is slower than text mode... but you should not notice
-   if you use same resolution as you used in textmode.
- * still experimental.
-
-
-How to use it?
-==============
-
-Switching modes is done using the  video=aty128fb:<resolution>... modedb
-boot parameter or using `fbset' program.
-
-See Documentation/fb/modedb.txt for more information on modedb
-resolutions.
-
-You should compile in both vgacon (to boot if you remove your Rage128 from
-box) and aty128fb (for graphics mode). You should not compile-in vesafb
-unless you have primary display on non-Rage128 VBE2.0 device (see 
-Documentation/fb/vesafb.txt for details).
-
-
-X11
-===
-
-XF68_FBDev should generally work fine, but it is non-accelerated. As of
-this document, 8 and 32bpp works fine.  There have been palette issues
-when switching from X to console and back to X.  You will have to restart
-X to fix this.
-
-
-Configuration
-=============
-
-You can pass kernel command line options to vesafb with
-`video=aty128fb:option1,option2:value2,option3' (multiple options should
-be separated by comma, values are separated from options by `:'). 
-Accepted options:
-
-noaccel  - do not use acceleration engine. It is default.
-accel    - use acceleration engine. Not finished.
-vmode:x  - chooses PowerMacintosh video mode <x>. Deprecated.
-cmode:x  - chooses PowerMacintosh colour mode <x>. Deprecated.
-<XxX@X>  - selects startup videomode. See modedb.txt for detailed
-	   explanation. Default is 640x480x8bpp.
-
-
-Limitations
-===========
-
-There are known and unknown bugs, features and misfeatures.
-Currently there are following known bugs:
- + This driver is still experimental and is not finished.  Too many
-   bugs/errata to list here.
-
---
-Brad Douglas <brad@neruo.com>
diff --git a/Documentation/fb/cirrusfb.rst b/Documentation/fb/cirrusfb.rst
new file mode 100644
index 000000000000..8c3e6c6cb114
--- /dev/null
+++ b/Documentation/fb/cirrusfb.rst
@@ -0,0 +1,94 @@
+============================================
+Framebuffer driver for Cirrus Logic chipsets
+============================================
+
+Copyright 1999 Jeff Garzik <jgarzik@pobox.com>
+
+
+.. just a little something to get people going; contributors welcome!
+
+
+Chip families supported:
+	- SD64
+	- Piccolo
+	- Picasso
+	- Spectrum
+	- Alpine (GD-543x/4x)
+	- Picasso4 (GD-5446)
+	- GD-5480
+	- Laguna (GD-546x)
+
+Bus's supported:
+	- PCI
+	- Zorro
+
+Architectures supported:
+	- i386
+	- Alpha
+	- PPC (Motorola Powerstack)
+	- m68k (Amiga)
+
+
+
+Default video modes
+-------------------
+At the moment, there are two kernel command line arguments supported:
+
+- mode:640x480
+- mode:800x600
+- mode:1024x768
+
+Full support for startup video modes (modedb) will be integrated soon.
+
+Version 1.9.9.1
+---------------
+* Fix memory detection for 512kB case
+* 800x600 mode
+* Fixed timings
+* Hint for AXP: Use -accel false -vyres -1 when changing resolution
+
+
+Version 1.9.4.4
+---------------
+* Preliminary Laguna support
+* Overhaul color register routines.
+* Associated with the above, console colors are now obtained from a LUT
+  called 'palette' instead of from the VGA registers.  This code was
+  modelled after that in atyfb and matroxfb.
+* Code cleanup, add comments.
+* Overhaul SR07 handling.
+* Bug fixes.
+
+
+Version 1.9.4.3
+---------------
+* Correctly set default startup video mode.
+* Do not override ram size setting.  Define
+  CLGEN_USE_HARDCODED_RAM_SETTINGS if you _do_ want to override the RAM
+  setting.
+* Compile fixes related to new 2.3.x IORESOURCE_IO[PORT] symbol changes.
+* Use new 2.3.x resource allocation.
+* Some code cleanup.
+
+
+Version 1.9.4.2
+---------------
+* Casting fixes.
+* Assertions no longer cause an oops on purpose.
+* Bug fixes.
+
+
+Version 1.9.4.1
+---------------
+* Add compatibility support.  Now requires a 2.1.x, 2.2.x or 2.3.x kernel.
+
+
+Version 1.9.4
+-------------
+* Several enhancements, smaller memory footprint, a few bugfixes.
+* Requires kernel 2.3.14-pre1 or later.
+
+
+Version 1.9.3
+-------------
+* Bundled with kernel 2.3.14-pre1 or later.
diff --git a/Documentation/fb/cirrusfb.txt b/Documentation/fb/cirrusfb.txt
deleted file mode 100644
index f75950d330a4..000000000000
--- a/Documentation/fb/cirrusfb.txt
+++ /dev/null
@@ -1,97 +0,0 @@
-
-		Framebuffer driver for Cirrus Logic chipsets
-		Copyright 1999 Jeff Garzik <jgarzik@pobox.com>
-
-
-
-{ just a little something to get people going; contributors welcome! }
-
-
-
-Chip families supported:
-	SD64
-	Piccolo
-	Picasso
-	Spectrum
-	Alpine (GD-543x/4x)
-	Picasso4 (GD-5446)
-	GD-5480
-	Laguna (GD-546x)
-
-Bus's supported:
-	PCI
-	Zorro
-
-Architectures supported:
-	i386
-	Alpha
-	PPC (Motorola Powerstack)
-	m68k (Amiga)
-
-
-
-Default video modes
--------------------
-At the moment, there are two kernel command line arguments supported:
-
-mode:640x480
-mode:800x600
-	or
-mode:1024x768
-
-Full support for startup video modes (modedb) will be integrated soon.
-
-Version 1.9.9.1
----------------
-* Fix memory detection for 512kB case
-* 800x600 mode
-* Fixed timings
-* Hint for AXP: Use -accel false -vyres -1 when changing resolution
-
-
-Version 1.9.4.4
----------------
-* Preliminary Laguna support
-* Overhaul color register routines.
-* Associated with the above, console colors are now obtained from a LUT
-  called 'palette' instead of from the VGA registers.  This code was
-  modelled after that in atyfb and matroxfb.
-* Code cleanup, add comments.
-* Overhaul SR07 handling.
-* Bug fixes.
-
-
-Version 1.9.4.3
----------------
-* Correctly set default startup video mode.
-* Do not override ram size setting.  Define
-  CLGEN_USE_HARDCODED_RAM_SETTINGS if you _do_ want to override the RAM
-  setting.
-* Compile fixes related to new 2.3.x IORESOURCE_IO[PORT] symbol changes.
-* Use new 2.3.x resource allocation.
-* Some code cleanup.
-
-
-Version 1.9.4.2
----------------
-* Casting fixes.
-* Assertions no longer cause an oops on purpose.
-* Bug fixes.
-
-
-Version 1.9.4.1
----------------
-* Add compatibility support.  Now requires a 2.1.x, 2.2.x or 2.3.x kernel.
-
-
-Version 1.9.4
--------------
-* Several enhancements, smaller memory footprint, a few bugfixes.
-* Requires kernel 2.3.14-pre1 or later.
-
-
-Version 1.9.3
--------------
-* Bundled with kernel 2.3.14-pre1 or later.
-
-
diff --git a/Documentation/fb/cmap_xfbdev.rst b/Documentation/fb/cmap_xfbdev.rst
new file mode 100644
index 000000000000..5db5e9787361
--- /dev/null
+++ b/Documentation/fb/cmap_xfbdev.rst
@@ -0,0 +1,56 @@
+==========================
+Understanding fbdev's cmap
+==========================
+
+These notes explain how X's dix layer uses fbdev's cmap structures.
+
+-  example of relevant structures in fbdev as used for a 3-bit grayscale cmap::
+
+    struct fb_var_screeninfo {
+	    .bits_per_pixel = 8,
+	    .grayscale      = 1,
+	    .red =          { 4, 3, 0 },
+	    .green =        { 0, 0, 0 },
+	    .blue =         { 0, 0, 0 },
+    }
+    struct fb_fix_screeninfo {
+	    .visual =       FB_VISUAL_STATIC_PSEUDOCOLOR,
+    }
+    for (i = 0; i < 8; i++)
+	info->cmap.red[i] = (((2*i)+1)*(0xFFFF))/16;
+    memcpy(info->cmap.green, info->cmap.red, sizeof(u16)*8);
+    memcpy(info->cmap.blue, info->cmap.red, sizeof(u16)*8);
+
+-  X11 apps do something like the following when trying to use grayscale::
+
+    for (i=0; i < 8; i++) {
+	char colorspec[64];
+	memset(colorspec,0,64);
+	sprintf(colorspec, "rgb:%x/%x/%x", i*36,i*36,i*36);
+	if (!XParseColor(outputDisplay, testColormap, colorspec, &wantedColor))
+		printf("Can't get color %s\n",colorspec);
+	XAllocColor(outputDisplay, testColormap, &wantedColor);
+	grays[i] = wantedColor;
+    }
+
+There's also named equivalents like gray1..x provided you have an rgb.txt.
+
+Somewhere in X's callchain, this results in a call to X code that handles the
+colormap. For example, Xfbdev hits the following:
+
+xc-011010/programs/Xserver/dix/colormap.c::
+
+  FindBestPixel(pentFirst, size, prgb, channel)
+
+  dr = (long) pent->co.local.red - prgb->red;
+  dg = (long) pent->co.local.green - prgb->green;
+  db = (long) pent->co.local.blue - prgb->blue;
+  sq = dr * dr;
+  UnsignedToBigNum (sq, &sum);
+  BigNumAdd (&sum, &temp, &sum);
+
+co.local.red are entries that were brought in through FBIOGETCMAP which come
+directly from the info->cmap.red that was listed above. The prgb is the rgb
+that the app wants to match to. The above code is doing what looks like a least
+squares matching function. That's why the cmap entries can't be set to the left
+hand side boundaries of a color range.
diff --git a/Documentation/fb/cmap_xfbdev.txt b/Documentation/fb/cmap_xfbdev.txt
deleted file mode 100644
index 55e1f0a3d2b4..000000000000
--- a/Documentation/fb/cmap_xfbdev.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-Understanding fbdev's cmap
---------------------------
-
-These notes explain how X's dix layer uses fbdev's cmap structures.
-
-*. example of relevant structures in fbdev as used for a 3-bit grayscale cmap
-struct fb_var_screeninfo {
-        .bits_per_pixel = 8,
-        .grayscale      = 1,
-        .red =          { 4, 3, 0 },
-        .green =        { 0, 0, 0 },
-        .blue =         { 0, 0, 0 },
-}
-struct fb_fix_screeninfo {
-        .visual =       FB_VISUAL_STATIC_PSEUDOCOLOR,
-}
-for (i = 0; i < 8; i++)
-	info->cmap.red[i] = (((2*i)+1)*(0xFFFF))/16;
-memcpy(info->cmap.green, info->cmap.red, sizeof(u16)*8);
-memcpy(info->cmap.blue, info->cmap.red, sizeof(u16)*8);
-
-*. X11 apps do something like the following when trying to use grayscale.
-for (i=0; i < 8; i++) {
-	char colorspec[64];
-	memset(colorspec,0,64);
-	sprintf(colorspec, "rgb:%x/%x/%x", i*36,i*36,i*36);
-	if (!XParseColor(outputDisplay, testColormap, colorspec, &wantedColor))
-		printf("Can't get color %s\n",colorspec);
-	XAllocColor(outputDisplay, testColormap, &wantedColor);
-	grays[i] = wantedColor;
-}
-There's also named equivalents like gray1..x provided you have an rgb.txt.
-
-Somewhere in X's callchain, this results in a call to X code that handles the
-colormap. For example, Xfbdev hits the following:
-
-xc-011010/programs/Xserver/dix/colormap.c:
-
-FindBestPixel(pentFirst, size, prgb, channel)
-
-dr = (long) pent->co.local.red - prgb->red;
-dg = (long) pent->co.local.green - prgb->green;
-db = (long) pent->co.local.blue - prgb->blue;
-sq = dr * dr;
-UnsignedToBigNum (sq, &sum);
-BigNumAdd (&sum, &temp, &sum);
-
-co.local.red are entries that were brought in through FBIOGETCMAP which come
-directly from the info->cmap.red that was listed above. The prgb is the rgb
-that the app wants to match to. The above code is doing what looks like a least
-squares matching function. That's why the cmap entries can't be set to the left
-hand side boundaries of a color range.
-
diff --git a/Documentation/fb/deferred_io.rst b/Documentation/fb/deferred_io.rst
new file mode 100644
index 000000000000..7300cff255a3
--- /dev/null
+++ b/Documentation/fb/deferred_io.rst
@@ -0,0 +1,79 @@
+===========
+Deferred IO
+===========
+
+Deferred IO is a way to delay and repurpose IO. It uses host memory as a
+buffer and the MMU pagefault as a pretrigger for when to perform the device
+IO. The following example may be a useful explanation of how one such setup
+works:
+
+- userspace app like Xfbdev mmaps framebuffer
+- deferred IO and driver sets up fault and page_mkwrite handlers
+- userspace app tries to write to mmaped vaddress
+- we get pagefault and reach fault handler
+- fault handler finds and returns physical page
+- we get page_mkwrite where we add this page to a list
+- schedule a workqueue task to be run after a delay
+- app continues writing to that page with no additional cost. this is
+  the key benefit.
+- the workqueue task comes in and mkcleans the pages on the list, then
+  completes the work associated with updating the framebuffer. this is
+  the real work talking to the device.
+- app tries to write to the address (that has now been mkcleaned)
+- get pagefault and the above sequence occurs again
+
+As can be seen from above, one benefit is roughly to allow bursty framebuffer
+writes to occur at minimum cost. Then after some time when hopefully things
+have gone quiet, we go and really update the framebuffer which would be
+a relatively more expensive operation.
+
+For some types of nonvolatile high latency displays, the desired image is
+the final image rather than the intermediate stages which is why it's okay
+to not update for each write that is occurring.
+
+It may be the case that this is useful in other scenarios as well. Paul Mundt
+has mentioned a case where it is beneficial to use the page count to decide
+whether to coalesce and issue SG DMA or to do memory bursts.
+
+Another one may be if one has a device framebuffer that is in an usual format,
+say diagonally shifting RGB, this may then be a mechanism for you to allow
+apps to pretend to have a normal framebuffer but reswizzle for the device
+framebuffer at vsync time based on the touched pagelist.
+
+How to use it: (for applications)
+---------------------------------
+No changes needed. mmap the framebuffer like normal and just use it.
+
+How to use it: (for fbdev drivers)
+----------------------------------
+The following example may be helpful.
+
+1. Setup your structure. Eg::
+
+	static struct fb_deferred_io hecubafb_defio = {
+		.delay		= HZ,
+		.deferred_io	= hecubafb_dpy_deferred_io,
+	};
+
+The delay is the minimum delay between when the page_mkwrite trigger occurs
+and when the deferred_io callback is called. The deferred_io callback is
+explained below.
+
+2. Setup your deferred IO callback. Eg::
+
+	static void hecubafb_dpy_deferred_io(struct fb_info *info,
+					     struct list_head *pagelist)
+
+The deferred_io callback is where you would perform all your IO to the display
+device. You receive the pagelist which is the list of pages that were written
+to during the delay. You must not modify this list. This callback is called
+from a workqueue.
+
+3. Call init::
+
+	info->fbdefio = &hecubafb_defio;
+	fb_deferred_io_init(info);
+
+4. Call cleanup::
+
+	fb_deferred_io_cleanup(info);
diff --git a/Documentation/fb/deferred_io.txt b/Documentation/fb/deferred_io.txt
deleted file mode 100644
index 748328370250..000000000000
--- a/Documentation/fb/deferred_io.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-Deferred IO
------------
-
-Deferred IO is a way to delay and repurpose IO. It uses host memory as a
-buffer and the MMU pagefault as a pretrigger for when to perform the device
-IO. The following example may be a useful explanation of how one such setup
-works:
-
-- userspace app like Xfbdev mmaps framebuffer
-- deferred IO and driver sets up fault and page_mkwrite handlers
-- userspace app tries to write to mmaped vaddress
-- we get pagefault and reach fault handler
-- fault handler finds and returns physical page
-- we get page_mkwrite where we add this page to a list
-- schedule a workqueue task to be run after a delay
-- app continues writing to that page with no additional cost. this is
-  the key benefit.
-- the workqueue task comes in and mkcleans the pages on the list, then
- completes the work associated with updating the framebuffer. this is
-  the real work talking to the device.
-- app tries to write to the address (that has now been mkcleaned)
-- get pagefault and the above sequence occurs again
-
-As can be seen from above, one benefit is roughly to allow bursty framebuffer
-writes to occur at minimum cost. Then after some time when hopefully things
-have gone quiet, we go and really update the framebuffer which would be
-a relatively more expensive operation.
-
-For some types of nonvolatile high latency displays, the desired image is
-the final image rather than the intermediate stages which is why it's okay
-to not update for each write that is occurring.
-
-It may be the case that this is useful in other scenarios as well. Paul Mundt
-has mentioned a case where it is beneficial to use the page count to decide
-whether to coalesce and issue SG DMA or to do memory bursts.
-
-Another one may be if one has a device framebuffer that is in an usual format,
-say diagonally shifting RGB, this may then be a mechanism for you to allow
-apps to pretend to have a normal framebuffer but reswizzle for the device
-framebuffer at vsync time based on the touched pagelist.
-
-How to use it: (for applications)
----------------------------------
-No changes needed. mmap the framebuffer like normal and just use it.
-
-How to use it: (for fbdev drivers)
-----------------------------------
-The following example may be helpful.
-
-1. Setup your structure. Eg:
-
-static struct fb_deferred_io hecubafb_defio = {
-	.delay		= HZ,
-	.deferred_io	= hecubafb_dpy_deferred_io,
-};
-
-The delay is the minimum delay between when the page_mkwrite trigger occurs
-and when the deferred_io callback is called. The deferred_io callback is
-explained below.
-
-2. Setup your deferred IO callback. Eg:
-static void hecubafb_dpy_deferred_io(struct fb_info *info,
-				struct list_head *pagelist)
-
-The deferred_io callback is where you would perform all your IO to the display
-device. You receive the pagelist which is the list of pages that were written
-to during the delay. You must not modify this list. This callback is called
-from a workqueue.
-
-3. Call init
-	info->fbdefio = &hecubafb_defio;
-	fb_deferred_io_init(info);
-
-4. Call cleanup
-	fb_deferred_io_cleanup(info);
diff --git a/Documentation/fb/efifb.rst b/Documentation/fb/efifb.rst
new file mode 100644
index 000000000000..04840331a00e
--- /dev/null
+++ b/Documentation/fb/efifb.rst
@@ -0,0 +1,39 @@
+==============
+What is efifb?
+==============
+
+This is a generic EFI platform driver for Intel based Apple computers.
+efifb is only for EFI booted Intel Macs.
+
+Supported Hardware
+==================
+
+- iMac 17"/20"
+- Macbook
+- Macbook Pro 15"/17"
+- MacMini
+
+How to use it?
+==============
+
+efifb does not have any kind of autodetection of your machine.
+You have to add the following kernel parameters in your elilo.conf::
+
+	Macbook :
+		video=efifb:macbook
+	MacMini :
+		video=efifb:mini
+	Macbook Pro 15", iMac 17" :
+		video=efifb:i17
+	Macbook Pro 17", iMac 20" :
+		video=efifb:i20
+
+Accepted options:
+
+======= ===========================================================
+nowc	Don't map the framebuffer write combined. This can be used
+	to workaround side-effects and slowdowns on other CPU cores
+	when large amounts of console data are written.
+======= ===========================================================
+
+Edgar Hucek <gimli@dark-green.com>
diff --git a/Documentation/fb/efifb.txt b/Documentation/fb/efifb.txt
deleted file mode 100644
index 1a85c1bdaf38..000000000000
--- a/Documentation/fb/efifb.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-
-What is efifb?
-===============
-
-This is a generic EFI platform driver for Intel based Apple computers.
-efifb is only for EFI booted Intel Macs.
-
-Supported Hardware
-==================
-
-iMac 17"/20"
-Macbook
-Macbook Pro 15"/17"
-MacMini
-
-How to use it?
-==============
-
-efifb does not have any kind of autodetection of your machine.
-You have to add the following kernel parameters in your elilo.conf:
-	Macbook :
-		video=efifb:macbook
-	MacMini :
-		video=efifb:mini
-	Macbook Pro 15", iMac 17" :
-		video=efifb:i17
-	Macbook Pro 17", iMac 20" :
-		video=efifb:i20
-
-Accepted options:
-
-nowc	Don't map the framebuffer write combined. This can be used
-	to workaround side-effects and slowdowns on other CPU cores
-	when large amounts of console data are written.
-
---
-Edgar Hucek <gimli@dark-green.com>
diff --git a/Documentation/fb/ep93xx-fb.rst b/Documentation/fb/ep93xx-fb.rst
new file mode 100644
index 000000000000..6f7767926d1a
--- /dev/null
+++ b/Documentation/fb/ep93xx-fb.rst
@@ -0,0 +1,140 @@
+================================
+Driver for EP93xx LCD controller
+================================
+
+The EP93xx LCD controller can drive both standard desktop monitors and
+embedded LCD displays. If you have a standard desktop monitor then you
+can use the standard Linux video mode database. In your board file::
+
+	static struct ep93xxfb_mach_info some_board_fb_info = {
+		.num_modes	= EP93XXFB_USE_MODEDB,
+		.bpp		= 16,
+	};
+
+If you have an embedded LCD display then you need to define a video
+mode for it as follows::
+
+	static struct fb_videomode some_board_video_modes[] = {
+		{
+			.name		= "some_lcd_name",
+			/* Pixel clock, porches, etc */
+		},
+	};
+
+Note that the pixel clock value is in pico-seconds. You can use the
+KHZ2PICOS macro to convert the pixel clock value. Most other values
+are in pixel clocks. See Documentation/fb/framebuffer.rst for further
+details.
+
+The ep93xxfb_mach_info structure for your board should look like the
+following::
+
+	static struct ep93xxfb_mach_info some_board_fb_info = {
+		.num_modes	= ARRAY_SIZE(some_board_video_modes),
+		.modes		= some_board_video_modes,
+		.default_mode	= &some_board_video_modes[0],
+		.bpp		= 16,
+	};
+
+The framebuffer device can be registered by adding the following to
+your board initialisation function::
+
+	ep93xx_register_fb(&some_board_fb_info);
+
+=====================
+Video Attribute Flags
+=====================
+
+The ep93xxfb_mach_info structure has a flags field which can be used
+to configure the controller. The video attributes flags are fully
+documented in section 7 of the EP93xx users' guide. The following
+flags are available:
+
+=============================== ==========================================
+EP93XXFB_PCLK_FALLING		Clock data on the falling edge of the
+				pixel clock. The default is to clock
+				data on the rising edge.
+
+EP93XXFB_SYNC_BLANK_HIGH	Blank signal is active high. By
+				default the blank signal is active low.
+
+EP93XXFB_SYNC_HORIZ_HIGH	Horizontal sync is active high. By
+				default the horizontal sync is active low.
+
+EP93XXFB_SYNC_VERT_HIGH		Vertical sync is active high. By
+				default the vertical sync is active high.
+=============================== ==========================================
+
+The physical address of the framebuffer can be controlled using the
+following flags:
+
+=============================== ======================================
+EP93XXFB_USE_SDCSN0		Use SDCSn[0] for the framebuffer. This
+				is the default setting.
+
+EP93XXFB_USE_SDCSN1		Use SDCSn[1] for the framebuffer.
+
+EP93XXFB_USE_SDCSN2		Use SDCSn[2] for the framebuffer.
+
+EP93XXFB_USE_SDCSN3		Use SDCSn[3] for the framebuffer.
+=============================== ======================================
+
+==================
+Platform callbacks
+==================
+
+The EP93xx framebuffer driver supports three optional platform
+callbacks: setup, teardown and blank. The setup and teardown functions
+are called when the framebuffer driver is installed and removed
+respectively. The blank function is called whenever the display is
+blanked or unblanked.
+
+The setup and teardown devices pass the platform_device structure as
+an argument. The fb_info and ep93xxfb_mach_info structures can be
+obtained as follows::
+
+	static int some_board_fb_setup(struct platform_device *pdev)
+	{
+		struct ep93xxfb_mach_info *mach_info = pdev->dev.platform_data;
+		struct fb_info *fb_info = platform_get_drvdata(pdev);
+
+		/* Board specific framebuffer setup */
+	}
+
+======================
+Setting the video mode
+======================
+
+The video mode is set using the following syntax::
+
+	video=XRESxYRES[-BPP][@REFRESH]
+
+If the EP93xx video driver is built-in then the video mode is set on
+the Linux kernel command line, for example::
+
+	video=ep93xx-fb:800x600-16@60
+
+If the EP93xx video driver is built as a module then the video mode is
+set when the module is installed::
+
+	modprobe ep93xx-fb video=320x240
+
+==============
+Screenpage bug
+==============
+
+At least on the EP9315 there is a silicon bug which causes bit 27 of
+the VIDSCRNPAGE (framebuffer physical offset) to be tied low. There is
+an unofficial errata for this bug at::
+
+	http://marc.info/?l=linux-arm-kernel&m=110061245502000&w=2
+
+By default the EP93xx framebuffer driver checks if the allocated physical
+address has bit 27 set. If it does, then the memory is freed and an
+error is returned. The check can be disabled by adding the following
+option when loading the driver::
+
+      ep93xx-fb.check_screenpage_bug=0
+
+In some cases it may be possible to reconfigure your SDRAM layout to
+avoid this bug. See section 13 of the EP93xx users' guide for details.
diff --git a/Documentation/fb/ep93xx-fb.txt b/Documentation/fb/ep93xx-fb.txt
deleted file mode 100644
index 5af1bd9effae..000000000000
--- a/Documentation/fb/ep93xx-fb.txt
+++ /dev/null
@@ -1,135 +0,0 @@
-================================
-Driver for EP93xx LCD controller
-================================
-
-The EP93xx LCD controller can drive both standard desktop monitors and
-embedded LCD displays. If you have a standard desktop monitor then you
-can use the standard Linux video mode database. In your board file:
-
-	static struct ep93xxfb_mach_info some_board_fb_info = {
-		.num_modes	= EP93XXFB_USE_MODEDB,
-		.bpp		= 16,
-	};
-
-If you have an embedded LCD display then you need to define a video
-mode for it as follows:
-
-	static struct fb_videomode some_board_video_modes[] = {
-		{
-			.name		= "some_lcd_name",
-			/* Pixel clock, porches, etc */
-		},
-	};
-
-Note that the pixel clock value is in pico-seconds. You can use the
-KHZ2PICOS macro to convert the pixel clock value. Most other values
-are in pixel clocks. See Documentation/fb/framebuffer.txt for further
-details.
-
-The ep93xxfb_mach_info structure for your board should look like the
-following:
-
-	static struct ep93xxfb_mach_info some_board_fb_info = {
-		.num_modes	= ARRAY_SIZE(some_board_video_modes),
-		.modes		= some_board_video_modes,
-		.default_mode	= &some_board_video_modes[0],
-		.bpp		= 16,
-	};
-
-The framebuffer device can be registered by adding the following to
-your board initialisation function:
-
-	ep93xx_register_fb(&some_board_fb_info);
-
-=====================
-Video Attribute Flags
-=====================
-
-The ep93xxfb_mach_info structure has a flags field which can be used
-to configure the controller. The video attributes flags are fully
-documented in section 7 of the EP93xx users' guide. The following
-flags are available:
-
-EP93XXFB_PCLK_FALLING		Clock data on the falling edge of the
-				pixel clock. The default is to clock
-				data on the rising edge.
-
-EP93XXFB_SYNC_BLANK_HIGH	Blank signal is active high. By
-				default the blank signal is active low.
-
-EP93XXFB_SYNC_HORIZ_HIGH	Horizontal sync is active high. By
-				default the horizontal sync is active low.
-
-EP93XXFB_SYNC_VERT_HIGH		Vertical sync is active high. By
-				default the vertical sync is active high.
-
-The physical address of the framebuffer can be controlled using the
-following flags:
-
-EP93XXFB_USE_SDCSN0		Use SDCSn[0] for the framebuffer. This
-				is the default setting.
-
-EP93XXFB_USE_SDCSN1		Use SDCSn[1] for the framebuffer.
-
-EP93XXFB_USE_SDCSN2		Use SDCSn[2] for the framebuffer.
-
-EP93XXFB_USE_SDCSN3		Use SDCSn[3] for the framebuffer.
-
-==================
-Platform callbacks
-==================
-
-The EP93xx framebuffer driver supports three optional platform
-callbacks: setup, teardown and blank. The setup and teardown functions
-are called when the framebuffer driver is installed and removed
-respectively. The blank function is called whenever the display is
-blanked or unblanked.
-
-The setup and teardown devices pass the platform_device structure as
-an argument. The fb_info and ep93xxfb_mach_info structures can be
-obtained as follows:
-
-	static int some_board_fb_setup(struct platform_device *pdev)
-	{
-		struct ep93xxfb_mach_info *mach_info = pdev->dev.platform_data;
-		struct fb_info *fb_info = platform_get_drvdata(pdev);
-
-		/* Board specific framebuffer setup */
-	}
-
-======================
-Setting the video mode
-======================
-
-The video mode is set using the following syntax:
-
-	video=XRESxYRES[-BPP][@REFRESH]
-
-If the EP93xx video driver is built-in then the video mode is set on
-the Linux kernel command line, for example:
-
-	video=ep93xx-fb:800x600-16@60
-
-If the EP93xx video driver is built as a module then the video mode is
-set when the module is installed:
-
-	modprobe ep93xx-fb video=320x240
-
-==============
-Screenpage bug
-==============
-
-At least on the EP9315 there is a silicon bug which causes bit 27 of
-the VIDSCRNPAGE (framebuffer physical offset) to be tied low. There is
-an unofficial errata for this bug at:
-	http://marc.info/?l=linux-arm-kernel&m=110061245502000&w=2
-
-By default the EP93xx framebuffer driver checks if the allocated physical
-address has bit 27 set. If it does, then the memory is freed and an
-error is returned. The check can be disabled by adding the following
-option when loading the driver:
-
-      ep93xx-fb.check_screenpage_bug=0
-
-In some cases it may be possible to reconfigure your SDRAM layout to
-avoid this bug. See section 13 of the EP93xx users' guide for details.
diff --git a/Documentation/fb/fbcon.rst b/Documentation/fb/fbcon.rst
new file mode 100644
index 000000000000..cfb9f7c38f18
--- /dev/null
+++ b/Documentation/fb/fbcon.rst
@@ -0,0 +1,350 @@
+=======================
+The Framebuffer Console
+=======================
+
+The framebuffer console (fbcon), as its name implies, is a text
+console running on top of the framebuffer device. It has the functionality of
+any standard text console driver, such as the VGA console, with the added
+features that can be attributed to the graphical nature of the framebuffer.
+
+In the x86 architecture, the framebuffer console is optional, and
+some even treat it as a toy. For other architectures, it is the only available
+display device, text or graphical.
+
+What are the features of fbcon?  The framebuffer console supports
+high resolutions, varying font types, display rotation, primitive multihead,
+etc. Theoretically, multi-colored fonts, blending, aliasing, and any feature
+made available by the underlying graphics card are also possible.
+
+A. Configuration
+================
+
+The framebuffer console can be enabled by using your favorite kernel
+configuration tool.  It is under Device Drivers->Graphics Support->Frame
+buffer Devices->Console display driver support->Framebuffer Console Support.
+Select 'y' to compile support statically or 'm' for module support.  The
+module will be fbcon.
+
+In order for fbcon to activate, at least one framebuffer driver is
+required, so choose from any of the numerous drivers available. For x86
+systems, they almost universally have VGA cards, so vga16fb and vesafb will
+always be available. However, using a chipset-specific driver will give you
+more speed and features, such as the ability to change the video mode
+dynamically.
+
+To display the penguin logo, choose any logo available in Graphics
+support->Bootup logo.
+
+Also, you will need to select at least one compiled-in font, but if
+you don't do anything, the kernel configuration tool will select one for you,
+usually an 8x16 font.
+
+GOTCHA: A common bug report is enabling the framebuffer without enabling the
+framebuffer console.  Depending on the driver, you may get a blanked or
+garbled display, but the system still boots to completion.  If you are
+fortunate to have a driver that does not alter the graphics chip, then you
+will still get a VGA console.
+
+B. Loading
+==========
+
+Possible scenarios:
+
+1. Driver and fbcon are compiled statically
+
+	 Usually, fbcon will automatically take over your console. The notable
+	 exception is vesafb.  It needs to be explicitly activated with the
+	 vga= boot option parameter.
+
+2. Driver is compiled statically, fbcon is compiled as a module
+
+	 Depending on the driver, you either get a standard console, or a
+	 garbled display, as mentioned above.  To get a framebuffer console,
+	 do a 'modprobe fbcon'.
+
+3. Driver is compiled as a module, fbcon is compiled statically
+
+	 You get your standard console.  Once the driver is loaded with
+	 'modprobe xxxfb', fbcon automatically takes over the console with
+	 the possible exception of using the fbcon=map:n option. See below.
+
+4. Driver and fbcon are compiled as a module.
+
+	 You can load them in any order. Once both are loaded, fbcon will take
+	 over the console.
+
+C. Boot options
+
+	 The framebuffer console has several, largely unknown, boot options
+	 that can change its behavior.
+
+1. fbcon=font:<name>
+
+	Select the initial font to use. The value 'name' can be any of the
+	compiled-in fonts: 10x18, 6x10, 7x14, Acorn8x8, MINI4x6,
+	PEARL8x8, ProFont6x11, SUN12x22, SUN8x16, VGA8x16, VGA8x8.
+
+	Note, not all drivers can handle font with widths not divisible by 8,
+	such as vga16fb.
+
+2. fbcon=scrollback:<value>[k]
+
+	The scrollback buffer is memory that is used to preserve display
+	contents that has already scrolled past your view.  This is accessed
+	by using the Shift-PageUp key combination.  The value 'value' is any
+	integer. It defaults to 32KB.  The 'k' suffix is optional, and will
+	multiply the 'value' by 1024.
+
+3. fbcon=map:<0123>
+
+	This is an interesting option. It tells which driver gets mapped to
+	which console. The value '0123' is a sequence that gets repeated until
+	the total length is 64 which is the number of consoles available. In
+	the above example, it is expanded to 012301230123... and the mapping
+	will be::
+
+		tty | 1 2 3 4 5 6 7 8 9 ...
+		fb  | 0 1 2 3 0 1 2 3 0 ...
+
+		('cat /proc/fb' should tell you what the fb numbers are)
+
+	One side effect that may be useful is using a map value that exceeds
+	the number of loaded fb drivers. For example, if only one driver is
+	available, fb0, adding fbcon=map:1 tells fbcon not to take over the
+	console.
+
+	Later on, when you want to map the console the to the framebuffer
+	device, you can use the con2fbmap utility.
+
+4. fbcon=vc:<n1>-<n2>
+
+	This option tells fbcon to take over only a range of consoles as
+	specified by the values 'n1' and 'n2'. The rest of the consoles
+	outside the given range will still be controlled by the standard
+	console driver.
+
+	NOTE: For x86 machines, the standard console is the VGA console which
+	is typically located on the same video card.  Thus, the consoles that
+	are controlled by the VGA console will be garbled.
+
+4. fbcon=rotate:<n>
+
+	This option changes the orientation angle of the console display. The
+	value 'n' accepts the following:
+
+	    - 0 - normal orientation (0 degree)
+	    - 1 - clockwise orientation (90 degrees)
+	    - 2 - upside down orientation (180 degrees)
+	    - 3 - counterclockwise orientation (270 degrees)
+
+	The angle can be changed anytime afterwards by 'echoing' the same
+	numbers to any one of the 2 attributes found in
+	/sys/class/graphics/fbcon:
+
+		- rotate     - rotate the display of the active console
+		- rotate_all - rotate the display of all consoles
+
+	Console rotation will only become available if Framebuffer Console
+	Rotation support is compiled in your kernel.
+
+	NOTE: This is purely console rotation.  Any other applications that
+	use the framebuffer will remain at their 'normal' orientation.
+	Actually, the underlying fb driver is totally ignorant of console
+	rotation.
+
+5. fbcon=margin:<color>
+
+	This option specifies the color of the margins. The margins are the
+	leftover area at the right and the bottom of the screen that are not
+	used by text. By default, this area will be black. The 'color' value
+	is an integer number that depends on the framebuffer driver being used.
+
+6. fbcon=nodefer
+
+	If the kernel is compiled with deferred fbcon takeover support, normally
+	the framebuffer contents, left in place by the firmware/bootloader, will
+	be preserved until there actually is some text is output to the console.
+	This option causes fbcon to bind immediately to the fbdev device.
+
+7. fbcon=logo-pos:<location>
+
+	The only possible 'location' is 'center' (without quotes), and when
+	given, the bootup logo is moved from the default top-left corner
+	location to the center of the framebuffer. If more than one logo is
+	displayed due to multiple CPUs, the collected line of logos is moved
+	as a whole.
+
+C. Attaching, Detaching and Unloading
+
+Before going on to how to attach, detach and unload the framebuffer console, an
+illustration of the dependencies may help.
+
+The console layer, as with most subsystems, needs a driver that interfaces with
+the hardware. Thus, in a VGA console::
+
+	console ---> VGA driver ---> hardware.
+
+Assuming the VGA driver can be unloaded, one must first unbind the VGA driver
+from the console layer before unloading the driver.  The VGA driver cannot be
+unloaded if it is still bound to the console layer. (See
+Documentation/console/console.txt for more information).
+
+This is more complicated in the case of the framebuffer console (fbcon),
+because fbcon is an intermediate layer between the console and the drivers::
+
+	console ---> fbcon ---> fbdev drivers ---> hardware
+
+The fbdev drivers cannot be unloaded if bound to fbcon, and fbcon cannot
+be unloaded if it's bound to the console layer.
+
+So to unload the fbdev drivers, one must first unbind fbcon from the console,
+then unbind the fbdev drivers from fbcon.  Fortunately, unbinding fbcon from
+the console layer will automatically unbind framebuffer drivers from
+fbcon. Thus, there is no need to explicitly unbind the fbdev drivers from
+fbcon.
+
+So, how do we unbind fbcon from the console? Part of the answer is in
+Documentation/console/console.txt. To summarize:
+
+Echo a value to the bind file that represents the framebuffer console
+driver. So assuming vtcon1 represents fbcon, then::
+
+  echo 1 > sys/class/vtconsole/vtcon1/bind - attach framebuffer console to
+					     console layer
+  echo 0 > sys/class/vtconsole/vtcon1/bind - detach framebuffer console from
+					     console layer
+
+If fbcon is detached from the console layer, your boot console driver (which is
+usually VGA text mode) will take over.  A few drivers (rivafb and i810fb) will
+restore VGA text mode for you.  With the rest, before detaching fbcon, you
+must take a few additional steps to make sure that your VGA text mode is
+restored properly. The following is one of the several methods that you can do:
+
+1. Download or install vbetool.  This utility is included with most
+   distributions nowadays, and is usually part of the suspend/resume tool.
+
+2. In your kernel configuration, ensure that CONFIG_FRAMEBUFFER_CONSOLE is set
+   to 'y' or 'm'. Enable one or more of your favorite framebuffer drivers.
+
+3. Boot into text mode and as root run::
+
+	vbetool vbestate save > <vga state file>
+
+   The above command saves the register contents of your graphics
+   hardware to <vga state file>.  You need to do this step only once as
+   the state file can be reused.
+
+4. If fbcon is compiled as a module, load fbcon by doing::
+
+       modprobe fbcon
+
+5. Now to detach fbcon::
+
+       vbetool vbestate restore < <vga state file> && \
+       echo 0 > /sys/class/vtconsole/vtcon1/bind
+
+6. That's it, you're back to VGA mode. And if you compiled fbcon as a module,
+   you can unload it by 'rmmod fbcon'.
+
+7. To reattach fbcon::
+
+       echo 1 > /sys/class/vtconsole/vtcon1/bind
+
+8. Once fbcon is unbound, all drivers registered to the system will also
+become unbound.  This means that fbcon and individual framebuffer drivers
+can be unloaded or reloaded at will. Reloading the drivers or fbcon will
+automatically bind the console, fbcon and the drivers together. Unloading
+all the drivers without unloading fbcon will make it impossible for the
+console to bind fbcon.
+
+Notes for vesafb users:
+=======================
+
+Unfortunately, if your bootline includes a vga=xxx parameter that sets the
+hardware in graphics mode, such as when loading vesafb, vgacon will not load.
+Instead, vgacon will replace the default boot console with dummycon, and you
+won't get any display after detaching fbcon. Your machine is still alive, so
+you can reattach vesafb. However, to reattach vesafb, you need to do one of
+the following:
+
+Variation 1:
+
+    a. Before detaching fbcon, do::
+
+	vbetool vbemode save > <vesa state file> # do once for each vesafb mode,
+						 # the file can be reused
+
+    b. Detach fbcon as in step 5.
+
+    c. Attach fbcon::
+
+	vbetool vbestate restore < <vesa state file> && \
+	echo 1 > /sys/class/vtconsole/vtcon1/bind
+
+Variation 2:
+
+    a. Before detaching fbcon, do::
+
+	echo <ID> > /sys/class/tty/console/bind
+
+	vbetool vbemode get
+
+    b. Take note of the mode number
+
+    b. Detach fbcon as in step 5.
+
+    c. Attach fbcon::
+
+	vbetool vbemode set <mode number> && \
+	echo 1 > /sys/class/vtconsole/vtcon1/bind
+
+Samples:
+========
+
+Here are 2 sample bash scripts that you can use to bind or unbind the
+framebuffer console driver if you are on an X86 box::
+
+  #!/bin/bash
+  # Unbind fbcon
+
+  # Change this to where your actual vgastate file is located
+  # Or Use VGASTATE=$1 to indicate the state file at runtime
+  VGASTATE=/tmp/vgastate
+
+  # path to vbetool
+  VBETOOL=/usr/local/bin
+
+
+  for (( i = 0; i < 16; i++))
+  do
+    if test -x /sys/class/vtconsole/vtcon$i; then
+	if [ `cat /sys/class/vtconsole/vtcon$i/name | grep -c "frame buffer"` \
+	     = 1 ]; then
+	    if test -x $VBETOOL/vbetool; then
+	       echo Unbinding vtcon$i
+	       $VBETOOL/vbetool vbestate restore < $VGASTATE
+	       echo 0 > /sys/class/vtconsole/vtcon$i/bind
+	    fi
+	fi
+    fi
+  done
+
+---------------------------------------------------------------------------
+
+::
+
+  #!/bin/bash
+  # Bind fbcon
+
+  for (( i = 0; i < 16; i++))
+  do
+    if test -x /sys/class/vtconsole/vtcon$i; then
+	if [ `cat /sys/class/vtconsole/vtcon$i/name | grep -c "frame buffer"` \
+	     = 1 ]; then
+	  echo Unbinding vtcon$i
+	  echo 1 > /sys/class/vtconsole/vtcon$i/bind
+	fi
+    fi
+  done
+
+Antonino Daplas <adaplas@pol.net>
diff --git a/Documentation/fb/fbcon.txt b/Documentation/fb/fbcon.txt
deleted file mode 100644
index 60a5ec04e8f0..000000000000
--- a/Documentation/fb/fbcon.txt
+++ /dev/null
@@ -1,347 +0,0 @@
-The Framebuffer Console
-=======================
-
-	The framebuffer console (fbcon), as its name implies, is a text
-console running on top of the framebuffer device. It has the functionality of
-any standard text console driver, such as the VGA console, with the added
-features that can be attributed to the graphical nature of the framebuffer.
-
-	 In the x86 architecture, the framebuffer console is optional, and
-some even treat it as a toy. For other architectures, it is the only available
-display device, text or graphical.
-
-	 What are the features of fbcon?  The framebuffer console supports
-high resolutions, varying font types, display rotation, primitive multihead,
-etc. Theoretically, multi-colored fonts, blending, aliasing, and any feature
-made available by the underlying graphics card are also possible.
-
-A. Configuration
-
-	The framebuffer console can be enabled by using your favorite kernel
-configuration tool.  It is under Device Drivers->Graphics Support->Frame
-buffer Devices->Console display driver support->Framebuffer Console Support.
-Select 'y' to compile support statically or 'm' for module support.  The
-module will be fbcon.
-
-	In order for fbcon to activate, at least one framebuffer driver is
-required, so choose from any of the numerous drivers available. For x86
-systems, they almost universally have VGA cards, so vga16fb and vesafb will
-always be available. However, using a chipset-specific driver will give you
-more speed and features, such as the ability to change the video mode
-dynamically.
-
-	To display the penguin logo, choose any logo available in Graphics
-support->Bootup logo.
-
-	Also, you will need to select at least one compiled-in font, but if
-you don't do anything, the kernel configuration tool will select one for you,
-usually an 8x16 font.
-
-GOTCHA: A common bug report is enabling the framebuffer without enabling the
-framebuffer console.  Depending on the driver, you may get a blanked or
-garbled display, but the system still boots to completion.  If you are
-fortunate to have a driver that does not alter the graphics chip, then you
-will still get a VGA console.
-
-B. Loading
-
-Possible scenarios:
-
-1. Driver and fbcon are compiled statically
-
-	 Usually, fbcon will automatically take over your console. The notable
-	 exception is vesafb.  It needs to be explicitly activated with the
-	 vga= boot option parameter.
-
-2. Driver is compiled statically, fbcon is compiled as a module
-
-	 Depending on the driver, you either get a standard console, or a
-	 garbled display, as mentioned above.  To get a framebuffer console,
-	 do a 'modprobe fbcon'.
-
-3. Driver is compiled as a module, fbcon is compiled statically
-
-	 You get your standard console.  Once the driver is loaded with
-	 'modprobe xxxfb', fbcon automatically takes over the console with
-	 the possible exception of using the fbcon=map:n option. See below.
-
-4. Driver and fbcon are compiled as a module.
-
-	 You can load them in any order. Once both are loaded, fbcon will take
-	 over the console.
-
-C. Boot options
-
-         The framebuffer console has several, largely unknown, boot options
-         that can change its behavior.
-
-1. fbcon=font:<name>
-
-        Select the initial font to use. The value 'name' can be any of the
-        compiled-in fonts: 10x18, 6x10, 7x14, Acorn8x8, MINI4x6,
-        PEARL8x8, ProFont6x11, SUN12x22, SUN8x16, VGA8x16, VGA8x8.
-
-	Note, not all drivers can handle font with widths not divisible by 8,
-        such as vga16fb.
-
-2. fbcon=scrollback:<value>[k]
-
-        The scrollback buffer is memory that is used to preserve display
-        contents that has already scrolled past your view.  This is accessed
-        by using the Shift-PageUp key combination.  The value 'value' is any
-        integer. It defaults to 32KB.  The 'k' suffix is optional, and will
-        multiply the 'value' by 1024.
-
-3. fbcon=map:<0123>
-
-        This is an interesting option. It tells which driver gets mapped to
-        which console. The value '0123' is a sequence that gets repeated until
-        the total length is 64 which is the number of consoles available. In
-        the above example, it is expanded to 012301230123... and the mapping
-        will be:
-
-		tty | 1 2 3 4 5 6 7 8 9 ...
-		fb  | 0 1 2 3 0 1 2 3 0 ...
-
-		('cat /proc/fb' should tell you what the fb numbers are)
-
-	One side effect that may be useful is using a map value that exceeds
-	the number of loaded fb drivers. For example, if only one driver is
-	available, fb0, adding fbcon=map:1 tells fbcon not to take over the
-	console.
-
-	Later on, when you want to map the console the to the framebuffer
-	device, you can use the con2fbmap utility.
-
-4. fbcon=vc:<n1>-<n2>
-
-	This option tells fbcon to take over only a range of consoles as
-	specified by the values 'n1' and 'n2'. The rest of the consoles
-	outside the given range will still be controlled by the standard
-	console driver.
-
-	NOTE: For x86 machines, the standard console is the VGA console which
-	is typically located on the same video card.  Thus, the consoles that
-	are controlled by the VGA console will be garbled.
-
-4. fbcon=rotate:<n>
-
-        This option changes the orientation angle of the console display. The
-        value 'n' accepts the following:
-
-	      0 - normal orientation (0 degree)
-	      1 - clockwise orientation (90 degrees)
-	      2 - upside down orientation (180 degrees)
-	      3 - counterclockwise orientation (270 degrees)
-
-	The angle can be changed anytime afterwards by 'echoing' the same
-	numbers to any one of the 2 attributes found in
-	/sys/class/graphics/fbcon:
-
-		rotate     - rotate the display of the active console
-		rotate_all - rotate the display of all consoles
-
-	Console rotation will only become available if Framebuffer Console
-	Rotation support is compiled in your kernel.
-
-	NOTE: This is purely console rotation.  Any other applications that
-	use the framebuffer will remain at their 'normal' orientation.
-	Actually, the underlying fb driver is totally ignorant of console
-	rotation.
-
-5. fbcon=margin:<color>
-
-	This option specifies the color of the margins. The margins are the
-	leftover area at the right and the bottom of the screen that are not
-	used by text. By default, this area will be black. The 'color' value
-	is an integer number that depends on the framebuffer driver being used.
-
-6. fbcon=nodefer
-
-	If the kernel is compiled with deferred fbcon takeover support, normally
-	the framebuffer contents, left in place by the firmware/bootloader, will
-	be preserved until there actually is some text is output to the console.
-	This option causes fbcon to bind immediately to the fbdev device.
-
-7. fbcon=logo-pos:<location>
-
-	The only possible 'location' is 'center' (without quotes), and when
-	given, the bootup logo is moved from the default top-left corner
-	location to the center of the framebuffer. If more than one logo is
-	displayed due to multiple CPUs, the collected line of logos is moved
-	as a whole.
-
-C. Attaching, Detaching and Unloading
-
-Before going on to how to attach, detach and unload the framebuffer console, an
-illustration of the dependencies may help.
-
-The console layer, as with most subsystems, needs a driver that interfaces with
-the hardware. Thus, in a VGA console:
-
-console ---> VGA driver ---> hardware.
-
-Assuming the VGA driver can be unloaded, one must first unbind the VGA driver
-from the console layer before unloading the driver.  The VGA driver cannot be
-unloaded if it is still bound to the console layer. (See
-Documentation/console/console.txt for more information).
-
-This is more complicated in the case of the framebuffer console (fbcon),
-because fbcon is an intermediate layer between the console and the drivers:
-
-console ---> fbcon ---> fbdev drivers ---> hardware
-
-The fbdev drivers cannot be unloaded if bound to fbcon, and fbcon cannot
-be unloaded if it's bound to the console layer.
-
-So to unload the fbdev drivers, one must first unbind fbcon from the console,
-then unbind the fbdev drivers from fbcon.  Fortunately, unbinding fbcon from
-the console layer will automatically unbind framebuffer drivers from
-fbcon. Thus, there is no need to explicitly unbind the fbdev drivers from
-fbcon.
-
-So, how do we unbind fbcon from the console? Part of the answer is in
-Documentation/console/console.txt. To summarize:
-
-Echo a value to the bind file that represents the framebuffer console
-driver. So assuming vtcon1 represents fbcon, then:
-
-echo 1 > sys/class/vtconsole/vtcon1/bind - attach framebuffer console to
-                                           console layer
-echo 0 > sys/class/vtconsole/vtcon1/bind - detach framebuffer console from
-                                           console layer
-
-If fbcon is detached from the console layer, your boot console driver (which is
-usually VGA text mode) will take over.  A few drivers (rivafb and i810fb) will
-restore VGA text mode for you.  With the rest, before detaching fbcon, you
-must take a few additional steps to make sure that your VGA text mode is
-restored properly. The following is one of the several methods that you can do:
-
-1. Download or install vbetool.  This utility is included with most
-   distributions nowadays, and is usually part of the suspend/resume tool.
-
-2. In your kernel configuration, ensure that CONFIG_FRAMEBUFFER_CONSOLE is set
-   to 'y' or 'm'. Enable one or more of your favorite framebuffer drivers.
-
-3. Boot into text mode and as root run:
-
-	vbetool vbestate save > <vga state file>
-
-	The above command saves the register contents of your graphics
-	hardware to <vga state file>.  You need to do this step only once as
-	the state file can be reused.
-
-4. If fbcon is compiled as a module, load fbcon by doing:
-
-       modprobe fbcon
-
-5. Now to detach fbcon:
-
-       vbetool vbestate restore < <vga state file> && \
-       echo 0 > /sys/class/vtconsole/vtcon1/bind
-
-6. That's it, you're back to VGA mode. And if you compiled fbcon as a module,
-   you can unload it by 'rmmod fbcon'.
-
-7. To reattach fbcon:
-
-       echo 1 > /sys/class/vtconsole/vtcon1/bind
-
-8. Once fbcon is unbound, all drivers registered to the system will also
-become unbound.  This means that fbcon and individual framebuffer drivers
-can be unloaded or reloaded at will. Reloading the drivers or fbcon will
-automatically bind the console, fbcon and the drivers together. Unloading
-all the drivers without unloading fbcon will make it impossible for the
-console to bind fbcon.
-
-Notes for vesafb users:
-=======================
-
-Unfortunately, if your bootline includes a vga=xxx parameter that sets the
-hardware in graphics mode, such as when loading vesafb, vgacon will not load.
-Instead, vgacon will replace the default boot console with dummycon, and you
-won't get any display after detaching fbcon. Your machine is still alive, so
-you can reattach vesafb. However, to reattach vesafb, you need to do one of
-the following:
-
-Variation 1:
-
-    a. Before detaching fbcon, do
-
-       vbetool vbemode save > <vesa state file> # do once for each vesafb mode,
-						# the file can be reused
-
-    b. Detach fbcon as in step 5.
-
-    c. Attach fbcon
-
-        vbetool vbestate restore < <vesa state file> && \
-	echo 1 > /sys/class/vtconsole/vtcon1/bind
-
-Variation 2:
-
-    a. Before detaching fbcon, do:
-	echo <ID> > /sys/class/tty/console/bind
-
-
-       vbetool vbemode get
-
-    b. Take note of the mode number
-
-    b. Detach fbcon as in step 5.
-
-    c. Attach fbcon:
-
-       vbetool vbemode set <mode number> && \
-       echo 1 > /sys/class/vtconsole/vtcon1/bind
-
-Samples:
-========
-
-Here are 2 sample bash scripts that you can use to bind or unbind the
-framebuffer console driver if you are on an X86 box:
-
----------------------------------------------------------------------------
-#!/bin/bash
-# Unbind fbcon
-
-# Change this to where your actual vgastate file is located
-# Or Use VGASTATE=$1 to indicate the state file at runtime
-VGASTATE=/tmp/vgastate
-
-# path to vbetool
-VBETOOL=/usr/local/bin
-
-
-for (( i = 0; i < 16; i++))
-do
-  if test -x /sys/class/vtconsole/vtcon$i; then
-      if [ `cat /sys/class/vtconsole/vtcon$i/name | grep -c "frame buffer"` \
-           = 1 ]; then
-	    if test -x $VBETOOL/vbetool; then
-	       echo Unbinding vtcon$i
-	       $VBETOOL/vbetool vbestate restore < $VGASTATE
-	       echo 0 > /sys/class/vtconsole/vtcon$i/bind
-	    fi
-      fi
-  fi
-done
-
----------------------------------------------------------------------------
-#!/bin/bash
-# Bind fbcon
-
-for (( i = 0; i < 16; i++))
-do
-  if test -x /sys/class/vtconsole/vtcon$i; then
-      if [ `cat /sys/class/vtconsole/vtcon$i/name | grep -c "frame buffer"` \
-           = 1 ]; then
-	  echo Unbinding vtcon$i
-	  echo 1 > /sys/class/vtconsole/vtcon$i/bind
-      fi
-  fi
-done
----------------------------------------------------------------------------
-
---
-Antonino Daplas <adaplas@pol.net>
diff --git a/Documentation/fb/framebuffer.rst b/Documentation/fb/framebuffer.rst
new file mode 100644
index 000000000000..7fe087310c82
--- /dev/null
+++ b/Documentation/fb/framebuffer.rst
@@ -0,0 +1,353 @@
+=======================
+The Frame Buffer Device
+=======================
+
+Last revised: May 10, 2001
+
+
+0. Introduction
+---------------
+
+The frame buffer device provides an abstraction for the graphics hardware. It
+represents the frame buffer of some video hardware and allows application
+software to access the graphics hardware through a well-defined interface, so
+the software doesn't need to know anything about the low-level (hardware
+register) stuff.
+
+The device is accessed through special device nodes, usually located in the
+/dev directory, i.e. /dev/fb*.
+
+
+1. User's View of /dev/fb*
+--------------------------
+
+From the user's point of view, the frame buffer device looks just like any
+other device in /dev. It's a character device using major 29; the minor
+specifies the frame buffer number.
+
+By convention, the following device nodes are used (numbers indicate the device
+minor numbers)::
+
+      0 = /dev/fb0	First frame buffer
+      1 = /dev/fb1	Second frame buffer
+	  ...
+     31 = /dev/fb31	32nd frame buffer
+
+For backwards compatibility, you may want to create the following symbolic
+links::
+
+    /dev/fb0current -> fb0
+    /dev/fb1current -> fb1
+
+and so on...
+
+The frame buffer devices are also `normal` memory devices, this means, you can
+read and write their contents. You can, for example, make a screen snapshot by::
+
+  cp /dev/fb0 myfile
+
+There also can be more than one frame buffer at a time, e.g. if you have a
+graphics card in addition to the built-in hardware. The corresponding frame
+buffer devices (/dev/fb0 and /dev/fb1 etc.) work independently.
+
+Application software that uses the frame buffer device (e.g. the X server) will
+use /dev/fb0 by default (older software uses /dev/fb0current). You can specify
+an alternative frame buffer device by setting the environment variable
+$FRAMEBUFFER to the path name of a frame buffer device, e.g. (for sh/bash
+users)::
+
+    export FRAMEBUFFER=/dev/fb1
+
+or (for csh users)::
+
+    setenv FRAMEBUFFER /dev/fb1
+
+After this the X server will use the second frame buffer.
+
+
+2. Programmer's View of /dev/fb*
+--------------------------------
+
+As you already know, a frame buffer device is a memory device like /dev/mem and
+it has the same features. You can read it, write it, seek to some location in
+it and mmap() it (the main usage). The difference is just that the memory that
+appears in the special file is not the whole memory, but the frame buffer of
+some video hardware.
+
+/dev/fb* also allows several ioctls on it, by which lots of information about
+the hardware can be queried and set. The color map handling works via ioctls,
+too. Look into <linux/fb.h> for more information on what ioctls exist and on
+which data structures they work. Here's just a brief overview:
+
+  - You can request unchangeable information about the hardware, like name,
+    organization of the screen memory (planes, packed pixels, ...) and address
+    and length of the screen memory.
+
+  - You can request and change variable information about the hardware, like
+    visible and virtual geometry, depth, color map format, timing, and so on.
+    If you try to change that information, the driver maybe will round up some
+    values to meet the hardware's capabilities (or return EINVAL if that isn't
+    possible).
+
+  - You can get and set parts of the color map. Communication is done with 16
+    bits per color part (red, green, blue, transparency) to support all
+    existing hardware. The driver does all the computations needed to apply
+    it to the hardware (round it down to less bits, maybe throw away
+    transparency).
+
+All this hardware abstraction makes the implementation of application programs
+easier and more portable. E.g. the X server works completely on /dev/fb* and
+thus doesn't need to know, for example, how the color registers of the concrete
+hardware are organized. XF68_FBDev is a general X server for bitmapped,
+unaccelerated video hardware. The only thing that has to be built into
+application programs is the screen organization (bitplanes or chunky pixels
+etc.), because it works on the frame buffer image data directly.
+
+For the future it is planned that frame buffer drivers for graphics cards and
+the like can be implemented as kernel modules that are loaded at runtime. Such
+a driver just has to call register_framebuffer() and supply some functions.
+Writing and distributing such drivers independently from the kernel will save
+much trouble...
+
+
+3. Frame Buffer Resolution Maintenance
+--------------------------------------
+
+Frame buffer resolutions are maintained using the utility `fbset`. It can
+change the video mode properties of a frame buffer device. Its main usage is
+to change the current video mode, e.g. during boot up in one of your `/etc/rc.*`
+or `/etc/init.d/*` files.
+
+Fbset uses a video mode database stored in a configuration file, so you can
+easily add your own modes and refer to them with a simple identifier.
+
+
+4. The X Server
+---------------
+
+The X server (XF68_FBDev) is the most notable application program for the frame
+buffer device. Starting with XFree86 release 3.2, the X server is part of
+XFree86 and has 2 modes:
+
+  - If the `Display` subsection for the `fbdev` driver in the /etc/XF86Config
+    file contains a::
+
+	Modes "default"
+
+    line, the X server will use the scheme discussed above, i.e. it will start
+    up in the resolution determined by /dev/fb0 (or $FRAMEBUFFER, if set). You
+    still have to specify the color depth (using the Depth keyword) and virtual
+    resolution (using the Virtual keyword) though. This is the default for the
+    configuration file supplied with XFree86. It's the most simple
+    configuration, but it has some limitations.
+
+  - Therefore it's also possible to specify resolutions in the /etc/XF86Config
+    file. This allows for on-the-fly resolution switching while retaining the
+    same virtual desktop size. The frame buffer device that's used is still
+    /dev/fb0current (or $FRAMEBUFFER), but the available resolutions are
+    defined by /etc/XF86Config now. The disadvantage is that you have to
+    specify the timings in a different format (but `fbset -x` may help).
+
+To tune a video mode, you can use fbset or xvidtune. Note that xvidtune doesn't
+work 100% with XF68_FBDev: the reported clock values are always incorrect.
+
+
+5. Video Mode Timings
+---------------------
+
+A monitor draws an image on the screen by using an electron beam (3 electron
+beams for color models, 1 electron beam for monochrome monitors). The front of
+the screen is covered by a pattern of colored phosphors (pixels). If a phosphor
+is hit by an electron, it emits a photon and thus becomes visible.
+
+The electron beam draws horizontal lines (scanlines) from left to right, and
+from the top to the bottom of the screen. By modifying the intensity of the
+electron beam, pixels with various colors and intensities can be shown.
+
+After each scanline the electron beam has to move back to the left side of the
+screen and to the next line: this is called the horizontal retrace. After the
+whole screen (frame) was painted, the beam moves back to the upper left corner:
+this is called the vertical retrace. During both the horizontal and vertical
+retrace, the electron beam is turned off (blanked).
+
+The speed at which the electron beam paints the pixels is determined by the
+dotclock in the graphics board. For a dotclock of e.g. 28.37516 MHz (millions
+of cycles per second), each pixel is 35242 ps (picoseconds) long::
+
+    1/(28.37516E6 Hz) = 35.242E-9 s
+
+If the screen resolution is 640x480, it will take::
+
+    640*35.242E-9 s = 22.555E-6 s
+
+to paint the 640 (xres) pixels on one scanline. But the horizontal retrace
+also takes time (e.g. 272 `pixels`), so a full scanline takes::
+
+    (640+272)*35.242E-9 s = 32.141E-6 s
+
+We'll say that the horizontal scanrate is about 31 kHz::
+
+    1/(32.141E-6 s) = 31.113E3 Hz
+
+A full screen counts 480 (yres) lines, but we have to consider the vertical
+retrace too (e.g. 49 `lines`). So a full screen will take::
+
+    (480+49)*32.141E-6 s = 17.002E-3 s
+
+The vertical scanrate is about 59 Hz::
+
+    1/(17.002E-3 s) = 58.815 Hz
+
+This means the screen data is refreshed about 59 times per second. To have a
+stable picture without visible flicker, VESA recommends a vertical scanrate of
+at least 72 Hz. But the perceived flicker is very human dependent: some people
+can use 50 Hz without any trouble, while I'll notice if it's less than 80 Hz.
+
+Since the monitor doesn't know when a new scanline starts, the graphics board
+will supply a synchronization pulse (horizontal sync or hsync) for each
+scanline.  Similarly it supplies a synchronization pulse (vertical sync or
+vsync) for each new frame. The position of the image on the screen is
+influenced by the moments at which the synchronization pulses occur.
+
+The following picture summarizes all timings. The horizontal retrace time is
+the sum of the left margin, the right margin and the hsync length, while the
+vertical retrace time is the sum of the upper margin, the lower margin and the
+vsync length::
+
+  +----------+---------------------------------------------+----------+-------+
+  |          |                ↑                            |          |       |
+  |          |                |upper_margin                |          |       |
+  |          |                ↓                            |          |       |
+  +----------###############################################----------+-------+
+  |          #                ↑                            #          |       |
+  |          #                |                            #          |       |
+  |          #                |                            #          |       |
+  |          #                |                            #          |       |
+  |   left   #                |                            #  right   | hsync |
+  |  margin  #                |       xres                 #  margin  |  len  |
+  |<-------->#<---------------+--------------------------->#<-------->|<----->|
+  |          #                |                            #          |       |
+  |          #                |                            #          |       |
+  |          #                |                            #          |       |
+  |          #                |yres                        #          |       |
+  |          #                |                            #          |       |
+  |          #                |                            #          |       |
+  |          #                |                            #          |       |
+  |          #                |                            #          |       |
+  |          #                |                            #          |       |
+  |          #                |                            #          |       |
+  |          #                |                            #          |       |
+  |          #                |                            #          |       |
+  |          #                ↓                            #          |       |
+  +----------###############################################----------+-------+
+  |          |                ↑                            |          |       |
+  |          |                |lower_margin                |          |       |
+  |          |                ↓                            |          |       |
+  +----------+---------------------------------------------+----------+-------+
+  |          |                ↑                            |          |       |
+  |          |                |vsync_len                   |          |       |
+  |          |                ↓                            |          |       |
+  +----------+---------------------------------------------+----------+-------+
+
+The frame buffer device expects all horizontal timings in number of dotclocks
+(in picoseconds, 1E-12 s), and vertical timings in number of scanlines.
+
+
+6. Converting XFree86 timing values info frame buffer device timings
+--------------------------------------------------------------------
+
+An XFree86 mode line consists of the following fields::
+
+ "800x600"     50      800  856  976 1040    600  637  643  666
+ < name >     DCF       HR  SH1  SH2  HFL     VR  SV1  SV2  VFL
+
+The frame buffer device uses the following fields:
+
+  - pixclock: pixel clock in ps (pico seconds)
+  - left_margin: time from sync to picture
+  - right_margin: time from picture to sync
+  - upper_margin: time from sync to picture
+  - lower_margin: time from picture to sync
+  - hsync_len: length of horizontal sync
+  - vsync_len: length of vertical sync
+
+1) Pixelclock:
+
+   xfree: in MHz
+
+   fb: in picoseconds (ps)
+
+   pixclock = 1000000 / DCF
+
+2) horizontal timings:
+
+   left_margin = HFL - SH2
+
+   right_margin = SH1 - HR
+
+   hsync_len = SH2 - SH1
+
+3) vertical timings:
+
+   upper_margin = VFL - SV2
+
+   lower_margin = SV1 - VR
+
+   vsync_len = SV2 - SV1
+
+Good examples for VESA timings can be found in the XFree86 source tree,
+under "xc/programs/Xserver/hw/xfree86/doc/modeDB.txt".
+
+
+7. References
+-------------
+
+For more specific information about the frame buffer device and its
+applications, please refer to the Linux-fbdev website:
+
+    http://linux-fbdev.sourceforge.net/
+
+and to the following documentation:
+
+  - The manual pages for fbset: fbset(8), fb.modes(5)
+  - The manual pages for XFree86: XF68_FBDev(1), XF86Config(4/5)
+  - The mighty kernel sources:
+
+      - linux/drivers/video/
+      - linux/include/linux/fb.h
+      - linux/include/video/
+
+
+
+8. Mailing list
+---------------
+
+There is a frame buffer device related mailing list at kernel.org:
+linux-fbdev@vger.kernel.org.
+
+Point your web browser to http://sourceforge.net/projects/linux-fbdev/ for
+subscription information and archive browsing.
+
+
+9. Downloading
+--------------
+
+All necessary files can be found at
+
+    ftp://ftp.uni-erlangen.de/pub/Linux/LOCAL/680x0/
+
+and on its mirrors.
+
+The latest version of fbset can be found at
+
+    http://www.linux-fbdev.org/
+
+
+10. Credits
+-----------
+
+This readme was written by Geert Uytterhoeven, partly based on the original
+`X-framebuffer.README` by Roman Hodek and Martin Schaller. Section 6 was
+provided by Frank Neumann.
+
+The frame buffer device abstraction was designed by Martin Schaller.
diff --git a/Documentation/fb/framebuffer.txt b/Documentation/fb/framebuffer.txt
deleted file mode 100644
index 58c5ae2e9f59..000000000000
--- a/Documentation/fb/framebuffer.txt
+++ /dev/null
@@ -1,343 +0,0 @@
-			The Frame Buffer Device
-			-----------------------
-
-Maintained by Geert Uytterhoeven <geert@linux-m68k.org>
-Last revised: May 10, 2001
-
-
-0. Introduction
----------------
-
-The frame buffer device provides an abstraction for the graphics hardware. It
-represents the frame buffer of some video hardware and allows application
-software to access the graphics hardware through a well-defined interface, so
-the software doesn't need to know anything about the low-level (hardware
-register) stuff.
-
-The device is accessed through special device nodes, usually located in the
-/dev directory, i.e. /dev/fb*.
-
-
-1. User's View of /dev/fb*
---------------------------
-
-From the user's point of view, the frame buffer device looks just like any
-other device in /dev. It's a character device using major 29; the minor
-specifies the frame buffer number.
-
-By convention, the following device nodes are used (numbers indicate the device
-minor numbers):
-
-      0 = /dev/fb0	First frame buffer
-      1 = /dev/fb1	Second frame buffer
-	  ...
-     31 = /dev/fb31	32nd frame buffer
-
-For backwards compatibility, you may want to create the following symbolic
-links:
-
-    /dev/fb0current -> fb0
-    /dev/fb1current -> fb1
-
-and so on...
-
-The frame buffer devices are also `normal' memory devices, this means, you can
-read and write their contents. You can, for example, make a screen snapshot by
-
-  cp /dev/fb0 myfile
-
-There also can be more than one frame buffer at a time, e.g. if you have a
-graphics card in addition to the built-in hardware. The corresponding frame
-buffer devices (/dev/fb0 and /dev/fb1 etc.) work independently.
-
-Application software that uses the frame buffer device (e.g. the X server) will
-use /dev/fb0 by default (older software uses /dev/fb0current). You can specify
-an alternative frame buffer device by setting the environment variable
-$FRAMEBUFFER to the path name of a frame buffer device, e.g. (for sh/bash
-users):
-
-    export FRAMEBUFFER=/dev/fb1
-
-or (for csh users):
-
-    setenv FRAMEBUFFER /dev/fb1
-
-After this the X server will use the second frame buffer.
-
-
-2. Programmer's View of /dev/fb*
---------------------------------
-
-As you already know, a frame buffer device is a memory device like /dev/mem and
-it has the same features. You can read it, write it, seek to some location in
-it and mmap() it (the main usage). The difference is just that the memory that
-appears in the special file is not the whole memory, but the frame buffer of
-some video hardware.
-
-/dev/fb* also allows several ioctls on it, by which lots of information about
-the hardware can be queried and set. The color map handling works via ioctls,
-too. Look into <linux/fb.h> for more information on what ioctls exist and on
-which data structures they work. Here's just a brief overview:
-
-  - You can request unchangeable information about the hardware, like name,
-    organization of the screen memory (planes, packed pixels, ...) and address
-    and length of the screen memory.
-
-  - You can request and change variable information about the hardware, like
-    visible and virtual geometry, depth, color map format, timing, and so on.
-    If you try to change that information, the driver maybe will round up some
-    values to meet the hardware's capabilities (or return EINVAL if that isn't
-    possible).
-
-  - You can get and set parts of the color map. Communication is done with 16
-    bits per color part (red, green, blue, transparency) to support all 
-    existing hardware. The driver does all the computations needed to apply 
-    it to the hardware (round it down to less bits, maybe throw away 
-    transparency).
-
-All this hardware abstraction makes the implementation of application programs
-easier and more portable. E.g. the X server works completely on /dev/fb* and
-thus doesn't need to know, for example, how the color registers of the concrete
-hardware are organized. XF68_FBDev is a general X server for bitmapped,
-unaccelerated video hardware. The only thing that has to be built into
-application programs is the screen organization (bitplanes or chunky pixels
-etc.), because it works on the frame buffer image data directly.
-
-For the future it is planned that frame buffer drivers for graphics cards and
-the like can be implemented as kernel modules that are loaded at runtime. Such
-a driver just has to call register_framebuffer() and supply some functions.
-Writing and distributing such drivers independently from the kernel will save
-much trouble...
-
-
-3. Frame Buffer Resolution Maintenance
---------------------------------------
-
-Frame buffer resolutions are maintained using the utility `fbset'. It can
-change the video mode properties of a frame buffer device. Its main usage is
-to change the current video mode, e.g. during boot up in one of your /etc/rc.*
-or /etc/init.d/* files.
-
-Fbset uses a video mode database stored in a configuration file, so you can
-easily add your own modes and refer to them with a simple identifier.
-
-
-4. The X Server
----------------
-
-The X server (XF68_FBDev) is the most notable application program for the frame
-buffer device. Starting with XFree86 release 3.2, the X server is part of
-XFree86 and has 2 modes:
-
-  - If the `Display' subsection for the `fbdev' driver in the /etc/XF86Config
-    file contains a
-
-	Modes "default"
-
-    line, the X server will use the scheme discussed above, i.e. it will start
-    up in the resolution determined by /dev/fb0 (or $FRAMEBUFFER, if set). You
-    still have to specify the color depth (using the Depth keyword) and virtual
-    resolution (using the Virtual keyword) though. This is the default for the
-    configuration file supplied with XFree86. It's the most simple
-    configuration, but it has some limitations.
-
-  - Therefore it's also possible to specify resolutions in the /etc/XF86Config
-    file. This allows for on-the-fly resolution switching while retaining the
-    same virtual desktop size. The frame buffer device that's used is still
-    /dev/fb0current (or $FRAMEBUFFER), but the available resolutions are
-    defined by /etc/XF86Config now. The disadvantage is that you have to
-    specify the timings in a different format (but `fbset -x' may help).
-
-To tune a video mode, you can use fbset or xvidtune. Note that xvidtune doesn't
-work 100% with XF68_FBDev: the reported clock values are always incorrect.
-
-
-5. Video Mode Timings
----------------------
-
-A monitor draws an image on the screen by using an electron beam (3 electron
-beams for color models, 1 electron beam for monochrome monitors). The front of
-the screen is covered by a pattern of colored phosphors (pixels). If a phosphor
-is hit by an electron, it emits a photon and thus becomes visible.
-
-The electron beam draws horizontal lines (scanlines) from left to right, and
-from the top to the bottom of the screen. By modifying the intensity of the
-electron beam, pixels with various colors and intensities can be shown.
-
-After each scanline the electron beam has to move back to the left side of the
-screen and to the next line: this is called the horizontal retrace. After the
-whole screen (frame) was painted, the beam moves back to the upper left corner:
-this is called the vertical retrace. During both the horizontal and vertical
-retrace, the electron beam is turned off (blanked).
-
-The speed at which the electron beam paints the pixels is determined by the
-dotclock in the graphics board. For a dotclock of e.g. 28.37516 MHz (millions
-of cycles per second), each pixel is 35242 ps (picoseconds) long:
-
-    1/(28.37516E6 Hz) = 35.242E-9 s
-
-If the screen resolution is 640x480, it will take
-
-    640*35.242E-9 s = 22.555E-6 s
-
-to paint the 640 (xres) pixels on one scanline. But the horizontal retrace
-also takes time (e.g. 272 `pixels'), so a full scanline takes
-
-    (640+272)*35.242E-9 s = 32.141E-6 s
-
-We'll say that the horizontal scanrate is about 31 kHz:
-
-    1/(32.141E-6 s) = 31.113E3 Hz
-
-A full screen counts 480 (yres) lines, but we have to consider the vertical
-retrace too (e.g. 49 `lines'). So a full screen will take
-
-    (480+49)*32.141E-6 s = 17.002E-3 s
-
-The vertical scanrate is about 59 Hz:
-
-    1/(17.002E-3 s) = 58.815 Hz
-
-This means the screen data is refreshed about 59 times per second. To have a
-stable picture without visible flicker, VESA recommends a vertical scanrate of
-at least 72 Hz. But the perceived flicker is very human dependent: some people
-can use 50 Hz without any trouble, while I'll notice if it's less than 80 Hz.
-
-Since the monitor doesn't know when a new scanline starts, the graphics board
-will supply a synchronization pulse (horizontal sync or hsync) for each
-scanline.  Similarly it supplies a synchronization pulse (vertical sync or
-vsync) for each new frame. The position of the image on the screen is
-influenced by the moments at which the synchronization pulses occur.
-
-The following picture summarizes all timings. The horizontal retrace time is
-the sum of the left margin, the right margin and the hsync length, while the
-vertical retrace time is the sum of the upper margin, the lower margin and the
-vsync length.
-
-  +----------+---------------------------------------------+----------+-------+
-  |          |                ↑                            |          |       |
-  |          |                |upper_margin                |          |       |
-  |          |                ↓                            |          |       |
-  +----------###############################################----------+-------+
-  |          #                ↑                            #          |       |
-  |          #                |                            #          |       |
-  |          #                |                            #          |       |
-  |          #                |                            #          |       |
-  |   left   #                |                            #  right   | hsync |
-  |  margin  #                |       xres                 #  margin  |  len  |
-  |<-------->#<---------------+--------------------------->#<-------->|<----->|
-  |          #                |                            #          |       |
-  |          #                |                            #          |       |
-  |          #                |                            #          |       |
-  |          #                |yres                        #          |       |
-  |          #                |                            #          |       |
-  |          #                |                            #          |       |
-  |          #                |                            #          |       |
-  |          #                |                            #          |       |
-  |          #                |                            #          |       |
-  |          #                |                            #          |       |
-  |          #                |                            #          |       |
-  |          #                |                            #          |       |
-  |          #                ↓                            #          |       |
-  +----------###############################################----------+-------+
-  |          |                ↑                            |          |       |
-  |          |                |lower_margin                |          |       |
-  |          |                ↓                            |          |       |
-  +----------+---------------------------------------------+----------+-------+
-  |          |                ↑                            |          |       |
-  |          |                |vsync_len                   |          |       |
-  |          |                ↓                            |          |       |
-  +----------+---------------------------------------------+----------+-------+
-
-The frame buffer device expects all horizontal timings in number of dotclocks
-(in picoseconds, 1E-12 s), and vertical timings in number of scanlines.
-
-
-6. Converting XFree86 timing values info frame buffer device timings
---------------------------------------------------------------------
-
-An XFree86 mode line consists of the following fields:
- "800x600"     50      800  856  976 1040    600  637  643  666
- < name >     DCF       HR  SH1  SH2  HFL     VR  SV1  SV2  VFL
-
-The frame buffer device uses the following fields:
-
-  - pixclock: pixel clock in ps (pico seconds)
-  - left_margin: time from sync to picture
-  - right_margin: time from picture to sync
-  - upper_margin: time from sync to picture
-  - lower_margin: time from picture to sync
-  - hsync_len: length of horizontal sync
-  - vsync_len: length of vertical sync
-
-1) Pixelclock:
-   xfree: in MHz
-   fb: in picoseconds (ps)
-
-   pixclock = 1000000 / DCF
-
-2) horizontal timings:
-   left_margin = HFL - SH2
-   right_margin = SH1 - HR
-   hsync_len = SH2 - SH1
-
-3) vertical timings:
-   upper_margin = VFL - SV2
-   lower_margin = SV1 - VR
-   vsync_len = SV2 - SV1
-
-Good examples for VESA timings can be found in the XFree86 source tree,
-under "xc/programs/Xserver/hw/xfree86/doc/modeDB.txt".
-
-
-7. References
--------------
-
-For more specific information about the frame buffer device and its
-applications, please refer to the Linux-fbdev website:
-
-    http://linux-fbdev.sourceforge.net/
-
-and to the following documentation:
-
-  - The manual pages for fbset: fbset(8), fb.modes(5)
-  - The manual pages for XFree86: XF68_FBDev(1), XF86Config(4/5)
-  - The mighty kernel sources:
-      o linux/drivers/video/
-      o linux/include/linux/fb.h
-      o linux/include/video/
-
-
-
-8. Mailing list
----------------
-
-There is a frame buffer device related mailing list at kernel.org:
-linux-fbdev@vger.kernel.org.
-
-Point your web browser to http://sourceforge.net/projects/linux-fbdev/ for
-subscription information and archive browsing.
-
-
-9. Downloading
---------------
-
-All necessary files can be found at
-
-    ftp://ftp.uni-erlangen.de/pub/Linux/LOCAL/680x0/
-
-and on its mirrors.
-
-The latest version of fbset can be found at
-
-    http://www.linux-fbdev.org/ 
-
-  
-10. Credits                                                       
-----------                                                       
-                
-This readme was written by Geert Uytterhoeven, partly based on the original
-`X-framebuffer.README' by Roman Hodek and Martin Schaller. Section 6 was
-provided by Frank Neumann.
-
-The frame buffer device abstraction was designed by Martin Schaller.
diff --git a/Documentation/fb/gxfb.rst b/Documentation/fb/gxfb.rst
new file mode 100644
index 000000000000..5738709bccbb
--- /dev/null
+++ b/Documentation/fb/gxfb.rst
@@ -0,0 +1,54 @@
+=============
+What is gxfb?
+=============
+
+.. [This file is cloned from VesaFB/aty128fb]
+
+This is a graphics framebuffer driver for AMD Geode GX2 based processors.
+
+Advantages:
+
+ * No need to use AMD's VSA code (or other VESA emulation layer) in the
+   BIOS.
+ * It provides a nice large console (128 cols + 48 lines with 1024x768)
+   without using tiny, unreadable fonts.
+ * You can run XF68_FBDev on top of /dev/fb0
+ * Most important: boot logo :-)
+
+Disadvantages:
+
+ * graphic mode is slower than text mode...
+
+
+How to use it?
+==============
+
+Switching modes is done using  gxfb.mode_option=<resolution>... boot
+parameter or using `fbset` program.
+
+See Documentation/fb/modedb.rst for more information on modedb
+resolutions.
+
+
+X11
+===
+
+XF68_FBDev should generally work fine, but it is non-accelerated.
+
+
+Configuration
+=============
+
+You can pass kernel command line options to gxfb with gxfb.<option>.
+For example, gxfb.mode_option=800x600@75.
+Accepted options:
+
+================ ==================================================
+mode_option	 specify the video mode.  Of the form
+		 <x>x<y>[-<bpp>][@<refresh>]
+vram		 size of video ram (normally auto-detected)
+vt_switch	 enable vt switching during suspend/resume.  The vt
+		 switch is slow, but harmless.
+================ ==================================================
+
+Andres Salomon <dilinger@debian.org>
diff --git a/Documentation/fb/gxfb.txt b/Documentation/fb/gxfb.txt
deleted file mode 100644
index 2f640903bbb2..000000000000
--- a/Documentation/fb/gxfb.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-[This file is cloned from VesaFB/aty128fb]
-
-What is gxfb?
-=================
-
-This is a graphics framebuffer driver for AMD Geode GX2 based processors.
-
-Advantages:
-
- * No need to use AMD's VSA code (or other VESA emulation layer) in the
-   BIOS.
- * It provides a nice large console (128 cols + 48 lines with 1024x768)
-   without using tiny, unreadable fonts.
- * You can run XF68_FBDev on top of /dev/fb0
- * Most important: boot logo :-)
-
-Disadvantages:
-
- * graphic mode is slower than text mode...
-
-
-How to use it?
-==============
-
-Switching modes is done using  gxfb.mode_option=<resolution>... boot
-parameter or using `fbset' program.
-
-See Documentation/fb/modedb.txt for more information on modedb
-resolutions.
-
-
-X11
-===
-
-XF68_FBDev should generally work fine, but it is non-accelerated.
-
-
-Configuration
-=============
-
-You can pass kernel command line options to gxfb with gxfb.<option>.
-For example, gxfb.mode_option=800x600@75.
-Accepted options:
-
-mode_option	- specify the video mode.  Of the form
-		  <x>x<y>[-<bpp>][@<refresh>]
-vram		- size of video ram (normally auto-detected)
-vt_switch	- enable vt switching during suspend/resume.  The vt
-		  switch is slow, but harmless.
-
---
-Andres Salomon <dilinger@debian.org>
diff --git a/Documentation/fb/index.rst b/Documentation/fb/index.rst
new file mode 100644
index 000000000000..d47313714635
--- /dev/null
+++ b/Documentation/fb/index.rst
@@ -0,0 +1,50 @@
+:orphan:
+
+============
+Frame Buffer
+============
+
+.. toctree::
+    :maxdepth: 1
+
+    api
+    arkfb
+    aty128fb
+    cirrusfb
+    cmap_xfbdev
+    deferred_io
+    efifb
+    ep93xx-fb
+    fbcon
+    framebuffer
+    gxfb
+    intel810
+    intelfb
+    internals
+    lxfb
+    matroxfb
+    metronomefb
+    modedb
+    pvr2fb
+    pxafb
+    s3fb
+    sa1100fb
+    sh7760fb
+    sisfb
+    sm501
+    sm712fb
+    sstfb
+    tgafb
+    tridentfb
+    udlfb
+    uvesafb
+    vesafb
+    viafb
+    vt8623fb
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/fb/intel810.rst b/Documentation/fb/intel810.rst
new file mode 100644
index 000000000000..eb86098db91f
--- /dev/null
+++ b/Documentation/fb/intel810.rst
@@ -0,0 +1,287 @@
+================================
+Intel 810/815 Framebuffer driver
+================================
+
+Tony Daplas <adaplas@pol.net>
+
+http://i810fb.sourceforge.net
+
+March 17, 2002
+
+First Released: July 2001
+Last Update:    September 12, 2005
+
+A. Introduction
+===============
+
+	This is a framebuffer driver for various Intel 810/815 compatible
+	graphics devices.  These include:
+
+	- Intel 810
+	- Intel 810E
+	- Intel 810-DC100
+	- Intel 815 Internal graphics only, 100Mhz FSB
+	- Intel 815 Internal graphics only
+	- Intel 815 Internal graphics and AGP
+
+B.  Features
+============
+
+	- Choice of using Discrete Video Timings, VESA Generalized Timing
+	  Formula, or a framebuffer specific database to set the video mode
+
+	- Supports a variable range of horizontal and vertical resolution and
+	  vertical refresh rates if the VESA Generalized Timing Formula is
+	  enabled.
+
+	- Supports color depths of 8, 16, 24 and 32 bits per pixel
+
+	- Supports pseudocolor, directcolor, or truecolor visuals
+
+	- Full and optimized hardware acceleration at 8, 16 and 24 bpp
+
+	- Robust video state save and restore
+
+	- MTRR support
+
+	- Utilizes user-entered monitor specifications to automatically
+	  calculate required video mode parameters.
+
+	- Can concurrently run with xfree86 running with native i810 drivers
+
+	- Hardware Cursor Support
+
+	- Supports EDID probing either by DDC/I2C or through the BIOS
+
+C.  List of available options
+=============================
+
+   a. "video=i810fb"
+	enables the i810 driver
+
+	Recommendation: required
+
+   b. "xres:<value>"
+	select horizontal resolution in pixels. (This parameter will be
+	ignored if 'mode_option' is specified.  See 'o' below).
+
+	Recommendation: user preference
+	(default = 640)
+
+   c. "yres:<value>"
+	select vertical resolution in scanlines. If Discrete Video Timings
+	is enabled, this will be ignored and computed as 3*xres/4.  (This
+	parameter will be ignored if 'mode_option' is specified.  See 'o'
+	below)
+
+	Recommendation: user preference
+	(default = 480)
+
+   d. "vyres:<value>"
+	select virtual vertical resolution in scanlines. If (0) or none
+	is specified, this will be computed against maximum available memory.
+
+	Recommendation: do not set
+	(default = 480)
+
+   e. "vram:<value>"
+	select amount of system RAM in MB to allocate for the video memory
+
+	Recommendation: 1 - 4 MB.
+	(default = 4)
+
+   f. "bpp:<value>"
+	select desired pixel depth
+
+	Recommendation: 8
+	(default = 8)
+
+   g. "hsync1/hsync2:<value>"
+	select the minimum and maximum Horizontal Sync Frequency of the
+	monitor in kHz.  If using a fixed frequency monitor, hsync1 must
+	be equal to hsync2. If EDID probing is successful, these will be
+	ignored and values will be taken from the EDID block.
+
+	Recommendation: check monitor manual for correct values
+	(default = 29/30)
+
+   h. "vsync1/vsync2:<value>"
+	select the minimum and maximum Vertical Sync Frequency of the monitor
+	in Hz. You can also use this option to lock your monitor's refresh
+	rate. If EDID probing is successful, these will be ignored and values
+	will be taken from the EDID block.
+
+	Recommendation: check monitor manual for correct values
+	(default = 60/60)
+
+	IMPORTANT:  If you need to clamp your timings, try to give some
+	leeway for computational errors (over/underflows).  Example: if
+	using vsync1/vsync2 = 60/60, make sure hsync1/hsync2 has at least
+	a 1 unit difference, and vice versa.
+
+   i. "voffset:<value>"
+	select at what offset in MB of the logical memory to allocate the
+	framebuffer memory.  The intent is to avoid the memory blocks
+	used by standard graphics applications (XFree86).  The default
+	offset (16 MB for a 64 MB aperture, 8 MB for a 32 MB aperture) will
+	avoid XFree86's usage and allows up to 7 MB/15 MB of framebuffer
+	memory.  Depending on your usage, adjust the value up or down
+	(0 for maximum usage, 31/63 MB for the least amount).  Note, an
+	arbitrary setting may conflict with XFree86.
+
+	Recommendation: do not set
+	(default = 8 or 16 MB)
+
+   j. "accel"
+	enable text acceleration.  This can be enabled/reenabled anytime
+	by using 'fbset -accel true/false'.
+
+	Recommendation: enable
+	(default = not set)
+
+   k. "mtrr"
+	enable MTRR.  This allows data transfers to the framebuffer memory
+	to occur in bursts which can significantly increase performance.
+	Not very helpful with the i810/i815 because of 'shared memory'.
+
+	Recommendation: do not set
+	(default = not set)
+
+   l. "extvga"
+	if specified, secondary/external VGA output will always be enabled.
+	Useful if the BIOS turns off the VGA port when no monitor is attached.
+	The external VGA monitor can then be attached without rebooting.
+
+	Recommendation: do not set
+	(default = not set)
+
+   m. "sync"
+	Forces the hardware engine to do a "sync" or wait for the hardware
+	to finish before starting another instruction. This will produce a
+	more stable setup, but will be slower.
+
+	Recommendation: do not set
+	(default = not set)
+
+   n. "dcolor"
+	Use directcolor visual instead of truecolor for pixel depths greater
+	than 8 bpp.  Useful for color tuning, such as gamma control.
+
+	Recommendation: do not set
+	(default = not set)
+
+   o. <xres>x<yres>[-<bpp>][@<refresh>]
+	The driver will now accept specification of boot mode option.  If this
+	is specified, the options 'xres' and 'yres' will be ignored. See
+	Documentation/fb/modedb.rst for usage.
+
+D. Kernel booting
+=================
+
+Separate each option/option-pair by commas (,) and the option from its value
+with a colon (:) as in the following::
+
+	video=i810fb:option1,option2:value2
+
+Sample Usage
+------------
+
+In /etc/lilo.conf, add the line::
+
+  append="video=i810fb:vram:2,xres:1024,yres:768,bpp:8,hsync1:30,hsync2:55, \
+	  vsync1:50,vsync2:85,accel,mtrr"
+
+This will initialize the framebuffer to 1024x768 at 8bpp.  The framebuffer
+will use 2 MB of System RAM. MTRR support will be enabled. The refresh rate
+will be computed based on the hsync1/hsync2 and vsync1/vsync2 values.
+
+IMPORTANT:
+  You must include hsync1, hsync2, vsync1 and vsync2 to enable video modes
+  better than 640x480 at 60Hz. HOWEVER, if your chipset/display combination
+  supports I2C and has an EDID block, you can safely exclude hsync1, hsync2,
+  vsync1 and vsync2 parameters.  These parameters will be taken from the EDID
+  block.
+
+E.  Module options
+==================
+
+The module parameters are essentially similar to the kernel
+parameters. The main difference is that you need to include a Boolean value
+(1 for TRUE, and 0 for FALSE) for those options which don't need a value.
+
+Example, to enable MTRR, include "mtrr=1".
+
+Sample Usage
+------------
+
+Using the same setup as described above, load the module like this::
+
+	modprobe i810fb vram=2 xres=1024 bpp=8 hsync1=30 hsync2=55 vsync1=50 \
+		 vsync2=85 accel=1 mtrr=1
+
+Or just add the following to a configuration file in /etc/modprobe.d/::
+
+	options i810fb vram=2 xres=1024 bpp=16 hsync1=30 hsync2=55 vsync1=50 \
+	vsync2=85 accel=1 mtrr=1
+
+and just do a::
+
+	modprobe i810fb
+
+
+F.  Setup
+=========
+
+	a. Do your usual method of configuring the kernel
+
+	   make menuconfig/xconfig/config
+
+	b. Under "Code maturity level options" enable "Prompt for development
+	   and/or incomplete code/drivers".
+
+	c. Enable agpgart support for the Intel 810/815 on-board graphics.
+	   This is required.  The option is under "Character Devices".
+
+	d. Under "Graphics Support", select "Intel 810/815" either statically
+	   or as a module.  Choose "use VESA Generalized Timing Formula" if
+	   you need to maximize the capability of your display.  To be on the
+	   safe side, you can leave this unselected.
+
+	e. If you want support for DDC/I2C probing (Plug and Play Displays),
+	   set 'Enable DDC Support' to 'y'. To make this option appear, set
+	   'use VESA Generalized Timing Formula' to 'y'.
+
+	f. If you want a framebuffer console, enable it under "Console
+	   Drivers".
+
+	g. Compile your kernel.
+
+	h. Load the driver as described in sections D and E.
+
+	i.  Try the DirectFB (http://www.directfb.org) + the i810 gfxdriver
+	    patch to see the chipset in action (or inaction :-).
+
+G.  Acknowledgment:
+===================
+
+	1.  Geert Uytterhoeven - his excellent howto and the virtual
+	    framebuffer driver code made this possible.
+
+	2.  Jeff Hartmann for his agpgart code.
+
+	3.  The X developers.  Insights were provided just by reading the
+	    XFree86 source code.
+
+	4.  Intel(c).  For this value-oriented chipset driver and for
+	    providing documentation.
+
+	5. Matt Sottek.  His inputs and ideas  helped in making some
+	   optimizations possible.
+
+H.  Home Page:
+==============
+
+	A more complete, and probably updated information is provided at
+	http://i810fb.sourceforge.net.
+
+Tony
diff --git a/Documentation/fb/intel810.txt b/Documentation/fb/intel810.txt
deleted file mode 100644
index a8e9f5bca6f3..000000000000
--- a/Documentation/fb/intel810.txt
+++ /dev/null
@@ -1,278 +0,0 @@
-Intel 810/815 Framebuffer driver
- 	Tony Daplas <adaplas@pol.net>
-	http://i810fb.sourceforge.net
-
-	March 17, 2002
-
-	First Released: July 2001
-	Last Update:    September 12, 2005
-================================================================
-
-A. Introduction
-
-	This is a framebuffer driver for various Intel 810/815 compatible
-	graphics devices.  These include:
-
-	Intel 810
-	Intel 810E
-	Intel 810-DC100
-	Intel 815 Internal graphics only, 100Mhz FSB
-	Intel 815 Internal graphics only
-	Intel 815 Internal graphics and AGP
-
-B.  Features
-
-	- Choice of using Discrete Video Timings, VESA Generalized Timing
-	  Formula, or a framebuffer specific database to set the video mode
-
-	- Supports a variable range of horizontal and vertical resolution and
-	  vertical refresh rates if the VESA Generalized Timing Formula is
-	  enabled.
-
-	- Supports color depths of 8, 16, 24 and 32 bits per pixel
-
-	- Supports pseudocolor, directcolor, or truecolor visuals
-
-	- Full and optimized hardware acceleration at 8, 16 and 24 bpp
-
-	- Robust video state save and restore
-
-	- MTRR support
-
-	- Utilizes user-entered monitor specifications to automatically
-	  calculate required video mode parameters.
-
-	- Can concurrently run with xfree86 running with native i810 drivers
-
-	- Hardware Cursor Support
- 
-	- Supports EDID probing either by DDC/I2C or through the BIOS
-
-C.  List of available options
-
-   a. "video=i810fb"
-	enables the i810 driver
-
-	Recommendation: required
-
-   b. "xres:<value>"
-	select horizontal resolution in pixels. (This parameter will be
-	ignored if 'mode_option' is specified.  See 'o' below).
-
-	Recommendation: user preference
-	(default = 640)
-
-   c. "yres:<value>"
-	select vertical resolution in scanlines. If Discrete Video Timings
-	is enabled, this will be ignored and computed as 3*xres/4.  (This
-	parameter will be ignored if 'mode_option' is specified.  See 'o'
-	below)
-
-	Recommendation: user preference
-	(default = 480)
-
-   d. "vyres:<value>"
-	select virtual vertical resolution in scanlines. If (0) or none
-	is specified, this will be computed against maximum available memory.
-
-	Recommendation: do not set
-	(default = 480)
-
-   e. "vram:<value>"
-	select amount of system RAM in MB to allocate for the video memory
-
-	Recommendation: 1 - 4 MB.
-	(default = 4)
-
-   f. "bpp:<value>"
-	select desired pixel depth
-
-	Recommendation: 8
-	(default = 8)
-
-   g. "hsync1/hsync2:<value>"
-	select the minimum and maximum Horizontal Sync Frequency of the
-	monitor in kHz.  If using a fixed frequency monitor, hsync1 must
-	be equal to hsync2. If EDID probing is successful, these will be
-	ignored and values will be taken from the EDID block.
-
-	Recommendation: check monitor manual for correct values
-	(default = 29/30)
-
-   h. "vsync1/vsync2:<value>"
-	select the minimum and maximum Vertical Sync Frequency of the monitor
-	in Hz. You can also use this option to lock your monitor's refresh
-	rate. If EDID probing is successful, these will be ignored and values
-	will be taken from the EDID block.
-
-	Recommendation: check monitor manual for correct values
-	(default = 60/60)
-
-	IMPORTANT:  If you need to clamp your timings, try to give some
-	leeway for computational errors (over/underflows).  Example: if
-	using vsync1/vsync2 = 60/60, make sure hsync1/hsync2 has at least
-	a 1 unit difference, and vice versa.
-
-   i. "voffset:<value>"
-	select at what offset in MB of the logical memory to allocate the
-	framebuffer memory.  The intent is to avoid the memory blocks
-	used by standard graphics applications (XFree86).  The default
-	offset (16 MB for a 64 MB aperture, 8 MB for a 32 MB aperture) will
-	avoid XFree86's usage and allows up to 7 MB/15 MB of framebuffer
-	memory.  Depending on your usage, adjust the value up or down
-	(0 for maximum usage, 31/63 MB for the least amount).  Note, an
-	arbitrary setting may conflict with XFree86.
-
-	Recommendation: do not set
-	(default = 8 or 16 MB)
-
-   j. "accel"
-	enable text acceleration.  This can be enabled/reenabled anytime
-	by using 'fbset -accel true/false'.
-
-	Recommendation: enable
-	(default = not set)
-
-   k. "mtrr"
-	enable MTRR.  This allows data transfers to the framebuffer memory
-	to occur in bursts which can significantly increase performance.
-	Not very helpful with the i810/i815 because of 'shared memory'.
-
-	Recommendation: do not set
-	(default = not set)
-
-   l. "extvga"
-	if specified, secondary/external VGA output will always be enabled.
-	Useful if the BIOS turns off the VGA port when no monitor is attached.
-	The external VGA monitor can then be attached without rebooting.
-
-	Recommendation: do not set
-	(default = not set)
-
-   m. "sync"
-	Forces the hardware engine to do a "sync" or wait for the hardware
-	to finish before starting another instruction. This will produce a
-	more stable setup, but will be slower.
-
-	Recommendation: do not set
-	(default = not set)
-
-   n. "dcolor"
-        Use directcolor visual instead of truecolor for pixel depths greater
-	than 8 bpp.  Useful for color tuning, such as gamma control.
-
-	Recommendation: do not set
-	(default = not set)
-
-   o. <xres>x<yres>[-<bpp>][@<refresh>]
-	The driver will now accept specification of boot mode option.  If this
-	is specified, the options 'xres' and 'yres' will be ignored. See
-	Documentation/fb/modedb.txt for usage.
-
-D. Kernel booting
-
-Separate each option/option-pair by commas (,) and the option from its value
-with a colon (:) as in the following:
-
-video=i810fb:option1,option2:value2
-
-Sample Usage
-------------
-
-In /etc/lilo.conf, add the line:
-
-append="video=i810fb:vram:2,xres:1024,yres:768,bpp:8,hsync1:30,hsync2:55, \
-        vsync1:50,vsync2:85,accel,mtrr"
-
-This will initialize the framebuffer to 1024x768 at 8bpp.  The framebuffer
-will use 2 MB of System RAM. MTRR support will be enabled. The refresh rate
-will be computed based on the hsync1/hsync2 and vsync1/vsync2 values.
-
-IMPORTANT:
-You must include hsync1, hsync2, vsync1 and vsync2 to enable video modes
-better than 640x480 at 60Hz. HOWEVER, if your chipset/display combination
-supports I2C and has an EDID block, you can safely exclude hsync1, hsync2,
-vsync1 and vsync2 parameters.  These parameters will be taken from the EDID
-block.
-
-E.  Module options
-
-The module parameters are essentially similar to the kernel
-parameters. The main difference is that you need to include a Boolean value
-(1 for TRUE, and 0 for FALSE) for those options which don't need a value.
-
-Example, to enable MTRR, include "mtrr=1".
-
-Sample Usage
-------------
-
-Using the same setup as described above, load the module like this:
-
-	modprobe i810fb vram=2 xres=1024 bpp=8 hsync1=30 hsync2=55 vsync1=50 \
-	         vsync2=85 accel=1 mtrr=1
-
-Or just add the following to a configuration file in /etc/modprobe.d/
-
-	options i810fb vram=2 xres=1024 bpp=16 hsync1=30 hsync2=55 vsync1=50 \
-	vsync2=85 accel=1 mtrr=1
-
-and just do a
-
-	modprobe i810fb
-
-
-F.  Setup
-
-	a. Do your usual method of configuring the kernel.
-
-	make menuconfig/xconfig/config
-
-	b. Under "Code maturity level options" enable "Prompt for development
-	   and/or incomplete code/drivers".
-
- 	c. Enable agpgart support for the Intel 810/815 on-board graphics.
-	   This is required.  The option is under "Character Devices".
-
-	d. Under "Graphics Support", select "Intel 810/815" either statically
-	   or as a module.  Choose "use VESA Generalized Timing Formula" if
-	   you need to maximize the capability of your display.  To be on the
-	   safe side, you can leave this unselected.
-
-	e. If you want support for DDC/I2C probing (Plug and Play Displays),
-	   set 'Enable DDC Support' to 'y'. To make this option appear, set
-	   'use VESA Generalized Timing Formula' to 'y'.
-
-        f. If you want a framebuffer console, enable it under "Console
-	   Drivers".
-
-	g. Compile your kernel.
-
-	h. Load the driver as described in sections D and E.
-
-	i.  Try the DirectFB (http://www.directfb.org) + the i810 gfxdriver
-	    patch to see the chipset in action (or inaction :-).
-
-G.  Acknowledgment:
-
-	1.  Geert Uytterhoeven - his excellent howto and the virtual
-	    framebuffer driver code made this possible.
-
-	2.  Jeff Hartmann for his agpgart code.
-
-	3.  The X developers.  Insights were provided just by reading the
-	    XFree86 source code.
-
-	4.  Intel(c).  For this value-oriented chipset driver and for
-	    providing documentation.
-
-	5. Matt Sottek.  His inputs and ideas  helped in making some
-	   optimizations possible.
-
-H.  Home Page:
-
-	A more complete, and probably updated information is provided at
-	http://i810fb.sourceforge.net.
-
-###########################
-Tony
-
diff --git a/Documentation/fb/intelfb.rst b/Documentation/fb/intelfb.rst
new file mode 100644
index 000000000000..e2d0903f4efb
--- /dev/null
+++ b/Documentation/fb/intelfb.rst
@@ -0,0 +1,155 @@
+=============================================================
+Intel 830M/845G/852GM/855GM/865G/915G/945G Framebuffer driver
+=============================================================
+
+A. Introduction
+===============
+
+This is a framebuffer driver for various Intel 8xx/9xx compatible
+graphics devices.  These would include:
+
+	- Intel 830M
+	- Intel 845G
+	- Intel 852GM
+	- Intel 855GM
+	- Intel 865G
+	- Intel 915G
+	- Intel 915GM
+	- Intel 945G
+	- Intel 945GM
+	- Intel 945GME
+	- Intel 965G
+	- Intel 965GM
+
+B.  List of available options
+=============================
+
+   a. "video=intelfb"
+	enables the intelfb driver
+
+	Recommendation: required
+
+   b. "mode=<xres>x<yres>[-<bpp>][@<refresh>]"
+	select mode
+
+	Recommendation: user preference
+	(default = 1024x768-32@70)
+
+   c. "vram=<value>"
+	select amount of system RAM in MB to allocate for the video memory
+	if not enough RAM was already allocated by the BIOS.
+
+	Recommendation: 1 - 4 MB.
+	(default = 4 MB)
+
+   d. "voffset=<value>"
+	select at what offset in MB of the logical memory to allocate the
+	framebuffer memory.  The intent is to avoid the memory blocks
+	used by standard graphics applications (XFree86). Depending on your
+	usage, adjust the value up or down, (0 for maximum usage, 63/127 MB
+	for the least amount).  Note, an arbitrary setting may conflict
+	with XFree86.
+
+	Recommendation: do not set
+	(default = 48 MB)
+
+   e. "accel"
+	enable text acceleration.  This can be enabled/reenabled anytime
+	by using 'fbset -accel true/false'.
+
+	Recommendation: enable
+	(default = set)
+
+   f. "hwcursor"
+	enable cursor acceleration.
+
+	Recommendation: enable
+	(default = set)
+
+   g. "mtrr"
+	enable MTRR.  This allows data transfers to the framebuffer memory
+	to occur in bursts which can significantly increase performance.
+	Not very helpful with the intel chips because of 'shared memory'.
+
+	Recommendation: set
+	(default = set)
+
+   h. "fixed"
+	disable mode switching.
+
+	Recommendation: do not set
+	(default = not set)
+
+   The binary parameters can be unset with a "no" prefix, example "noaccel".
+   The default parameter (not named) is the mode.
+
+C. Kernel booting
+=================
+
+Separate each option/option-pair by commas (,) and the option from its value
+with an equals sign (=) as in the following::
+
+	video=intelfb:option1,option2=value2
+
+Sample Usage
+------------
+
+In /etc/lilo.conf, add the line::
+
+	append="video=intelfb:mode=800x600-32@75,accel,hwcursor,vram=8"
+
+This will initialize the framebuffer to 800x600 at 32bpp and 75Hz. The
+framebuffer will use 8 MB of System RAM. hw acceleration of text and cursor
+will be enabled.
+
+Remarks
+-------
+
+If setting this parameter doesn't work (you stay in a 80x25 text-mode),
+you might need to set the "vga=<mode>" parameter too - see vesafb.txt
+in this directory.
+
+
+D.  Module options
+==================
+
+The module parameters are essentially similar to the kernel
+parameters. The main difference is that you need to include a Boolean value
+(1 for TRUE, and 0 for FALSE) for those options which don't need a value.
+
+Example, to enable MTRR, include "mtrr=1".
+
+Sample Usage
+------------
+
+Using the same setup as described above, load the module like this::
+
+	modprobe intelfb mode=800x600-32@75 vram=8 accel=1 hwcursor=1
+
+Or just add the following to a configuration file in /etc/modprobe.d/::
+
+	options intelfb mode=800x600-32@75 vram=8 accel=1 hwcursor=1
+
+and just do a::
+
+	modprobe intelfb
+
+
+E.  Acknowledgment:
+===================
+
+	1.  Geert Uytterhoeven - his excellent howto and the virtual
+	    framebuffer driver code made this possible.
+
+	2.  Jeff Hartmann for his agpgart code.
+
+	3.  David Dawes for his original kernel 2.4 code.
+
+	4.  The X developers.  Insights were provided just by reading the
+	    XFree86 source code.
+
+	5.  Antonino A. Daplas for his inspiring i810fb driver.
+
+	6.  Andrew Morton for his kernel patches maintenance.
+
+Sylvain
diff --git a/Documentation/fb/intelfb.txt b/Documentation/fb/intelfb.txt
deleted file mode 100644
index feac4e4d6968..000000000000
--- a/Documentation/fb/intelfb.txt
+++ /dev/null
@@ -1,149 +0,0 @@
-Intel 830M/845G/852GM/855GM/865G/915G/945G Framebuffer driver
-================================================================
-
-A. Introduction
-	This is a framebuffer driver for various Intel 8xx/9xx compatible
-graphics devices.  These would include:
-
-	Intel 830M
-	Intel 845G
-	Intel 852GM
-	Intel 855GM
-	Intel 865G
-	Intel 915G
-	Intel 915GM
-	Intel 945G
-	Intel 945GM
-	Intel 945GME
-	Intel 965G
-	Intel 965GM
-
-B.  List of available options
-
-   a. "video=intelfb"
-	enables the intelfb driver
-
-	Recommendation: required
-
-   b. "mode=<xres>x<yres>[-<bpp>][@<refresh>]"
-	select mode
-
-	Recommendation: user preference
-	(default = 1024x768-32@70)
-
-   c. "vram=<value>"
-	select amount of system RAM in MB to allocate for the video memory
-	if not enough RAM was already allocated by the BIOS.
-
-	Recommendation: 1 - 4 MB.
-	(default = 4 MB)
-
-   d. "voffset=<value>"
-        select at what offset in MB of the logical memory to allocate the
-	framebuffer memory.  The intent is to avoid the memory blocks
-	used by standard graphics applications (XFree86). Depending on your
-        usage, adjust the value up or down, (0 for maximum usage, 63/127 MB
-        for the least amount).  Note, an arbitrary setting may conflict
-        with XFree86.
-
-	Recommendation: do not set
-	(default = 48 MB)
-
-   e. "accel"
-	enable text acceleration.  This can be enabled/reenabled anytime
-	by using 'fbset -accel true/false'.
-
-	Recommendation: enable
-	(default = set)
-
-   f. "hwcursor"
-	enable cursor acceleration.
-
-	Recommendation: enable
-	(default = set)
-
-   g. "mtrr"
-	enable MTRR.  This allows data transfers to the framebuffer memory
-	to occur in bursts which can significantly increase performance.
-	Not very helpful with the intel chips because of 'shared memory'.
-
-	Recommendation: set
-	(default = set)
-
-   h. "fixed"
-	disable mode switching.
-
-	Recommendation: do not set
-	(default = not set)
-
-   The binary parameters can be unset with a "no" prefix, example "noaccel".
-   The default parameter (not named) is the mode.
-
-C. Kernel booting
-
-Separate each option/option-pair by commas (,) and the option from its value
-with an equals sign (=) as in the following:
-
-video=intelfb:option1,option2=value2
-
-Sample Usage
-------------
-
-In /etc/lilo.conf, add the line:
-
-append="video=intelfb:mode=800x600-32@75,accel,hwcursor,vram=8"
-
-This will initialize the framebuffer to 800x600 at 32bpp and 75Hz. The
-framebuffer will use 8 MB of System RAM. hw acceleration of text and cursor
-will be enabled.
-
-Remarks
--------
-
-If setting this parameter doesn't work (you stay in a 80x25 text-mode),
-you might need to set the "vga=<mode>" parameter too - see vesafb.txt
-in this directory.
-
-
-D.  Module options
-
-	The module parameters are essentially similar to the kernel
-parameters. The main difference is that you need to include a Boolean value
-(1 for TRUE, and 0 for FALSE) for those options which don't need a value.
-
-Example, to enable MTRR, include "mtrr=1".
-
-Sample Usage
-------------
-
-Using the same setup as described above, load the module like this:
-
-	modprobe intelfb mode=800x600-32@75 vram=8 accel=1 hwcursor=1
-
-Or just add the following to a configuration file in /etc/modprobe.d/
-
-	options intelfb mode=800x600-32@75 vram=8 accel=1 hwcursor=1
-
-and just do a
-
-	modprobe intelfb
-
-
-E.  Acknowledgment:
-
-	1.  Geert Uytterhoeven - his excellent howto and the virtual
-                                 framebuffer driver code made this possible.
-
-	2.  Jeff Hartmann for his agpgart code.
-
-	3.  David Dawes for his original kernel 2.4 code.
-
-	4.  The X developers.  Insights were provided just by reading the
-	    XFree86 source code.
-
-	5.  Antonino A. Daplas for his inspiring i810fb driver.
-
-	6.  Andrew Morton for his kernel patches maintenance.
-
-###########################
-Sylvain
diff --git a/Documentation/fb/internals.rst b/Documentation/fb/internals.rst
new file mode 100644
index 000000000000..696b50aa7c24
--- /dev/null
+++ b/Documentation/fb/internals.rst
@@ -0,0 +1,86 @@
+=============================
+Frame Buffer device internals
+=============================
+
+This is a first start for some documentation about frame buffer device
+internals.
+
+Authors:
+
+- Geert Uytterhoeven <geert@linux-m68k.org>, 21 July 1998
+- James Simmons <jsimmons@user.sf.net>, Nov 26 2002
+
+--------------------------------------------------------------------------------
+
+Structures used by the frame buffer device API
+==============================================
+
+The following structures play a role in the game of frame buffer devices. They
+are defined in <linux/fb.h>.
+
+1. Outside the kernel (user space)
+
+  - struct fb_fix_screeninfo
+
+    Device independent unchangeable information about a frame buffer device and
+    a specific video mode. This can be obtained using the FBIOGET_FSCREENINFO
+    ioctl.
+
+  - struct fb_var_screeninfo
+
+    Device independent changeable information about a frame buffer device and a
+    specific video mode. This can be obtained using the FBIOGET_VSCREENINFO
+    ioctl, and updated with the FBIOPUT_VSCREENINFO ioctl. If you want to pan
+    the screen only, you can use the FBIOPAN_DISPLAY ioctl.
+
+  - struct fb_cmap
+
+    Device independent colormap information. You can get and set the colormap
+    using the FBIOGETCMAP and FBIOPUTCMAP ioctls.
+
+
+2. Inside the kernel
+
+  - struct fb_info
+
+    Generic information, API and low level information about a specific frame
+    buffer device instance (slot number, board address, ...).
+
+  - struct `par`
+
+    Device dependent information that uniquely defines the video mode for this
+    particular piece of hardware.
+
+
+Visuals used by the frame buffer device API
+===========================================
+
+
+Monochrome (FB_VISUAL_MONO01 and FB_VISUAL_MONO10)
+--------------------------------------------------
+Each pixel is either black or white.
+
+
+Pseudo color (FB_VISUAL_PSEUDOCOLOR and FB_VISUAL_STATIC_PSEUDOCOLOR)
+---------------------------------------------------------------------
+The whole pixel value is fed through a programmable lookup table that has one
+color (including red, green, and blue intensities) for each possible pixel
+value, and that color is displayed.
+
+
+True color (FB_VISUAL_TRUECOLOR)
+--------------------------------
+The pixel value is broken up into red, green, and blue fields.
+
+
+Direct color (FB_VISUAL_DIRECTCOLOR)
+------------------------------------
+The pixel value is broken up into red, green, and blue fields, each of which
+are looked up in separate red, green, and blue lookup tables.
+
+
+Grayscale displays
+------------------
+Grayscale and static grayscale are special variants of pseudo color and static
+pseudo color, where the red, green and blue components are always equal to
+each other.
diff --git a/Documentation/fb/internals.txt b/Documentation/fb/internals.txt
deleted file mode 100644
index 9b2a2b2f3e57..000000000000
--- a/Documentation/fb/internals.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-
-This is a first start for some documentation about frame buffer device
-internals.
-
-Geert Uytterhoeven <geert@linux-m68k.org>, 21 July 1998
-James Simmons <jsimmons@user.sf.net>, Nov 26 2002
-
---------------------------------------------------------------------------------
-
-	    ***  STRUCTURES USED BY THE FRAME BUFFER DEVICE API  ***
-
-The following structures play a role in the game of frame buffer devices. They
-are defined in <linux/fb.h>.
-
-1. Outside the kernel (user space)
-
-  - struct fb_fix_screeninfo
-
-    Device independent unchangeable information about a frame buffer device and
-    a specific video mode. This can be obtained using the FBIOGET_FSCREENINFO
-    ioctl.
-
-  - struct fb_var_screeninfo
-
-    Device independent changeable information about a frame buffer device and a
-    specific video mode. This can be obtained using the FBIOGET_VSCREENINFO
-    ioctl, and updated with the FBIOPUT_VSCREENINFO ioctl. If you want to pan
-    the screen only, you can use the FBIOPAN_DISPLAY ioctl.
-
-  - struct fb_cmap
-
-    Device independent colormap information. You can get and set the colormap
-    using the FBIOGETCMAP and FBIOPUTCMAP ioctls.
-
-
-2. Inside the kernel
-
-  - struct fb_info
-
-    Generic information, API and low level information about a specific frame
-    buffer device instance (slot number, board address, ...).
-
-  - struct `par'
-
-    Device dependent information that uniquely defines the video mode for this
-    particular piece of hardware.
-
-
---------------------------------------------------------------------------------
-
-	    ***  VISUALS USED BY THE FRAME BUFFER DEVICE API  ***
-
-
-Monochrome (FB_VISUAL_MONO01 and FB_VISUAL_MONO10)
--------------------------------------------------
-Each pixel is either black or white.
-
-
-Pseudo color (FB_VISUAL_PSEUDOCOLOR and FB_VISUAL_STATIC_PSEUDOCOLOR)
----------------------------------------------------------------------
-The whole pixel value is fed through a programmable lookup table that has one
-color (including red, green, and blue intensities) for each possible pixel
-value, and that color is displayed.
-
-
-True color (FB_VISUAL_TRUECOLOR)
---------------------------------
-The pixel value is broken up into red, green, and blue fields.
-
-
-Direct color (FB_VISUAL_DIRECTCOLOR)
-------------------------------------
-The pixel value is broken up into red, green, and blue fields, each of which 
-are looked up in separate red, green, and blue lookup tables.
-
-
-Grayscale displays
-------------------
-Grayscale and static grayscale are special variants of pseudo color and static
-pseudo color, where the red, green and blue components are always equal to
-each other.
-
diff --git a/Documentation/fb/lxfb.rst b/Documentation/fb/lxfb.rst
new file mode 100644
index 000000000000..863e6b98fbae
--- /dev/null
+++ b/Documentation/fb/lxfb.rst
@@ -0,0 +1,55 @@
+=============
+What is lxfb?
+=============
+
+.. [This file is cloned from VesaFB/aty128fb]
+
+
+This is a graphics framebuffer driver for AMD Geode LX based processors.
+
+Advantages:
+
+ * No need to use AMD's VSA code (or other VESA emulation layer) in the
+   BIOS.
+ * It provides a nice large console (128 cols + 48 lines with 1024x768)
+   without using tiny, unreadable fonts.
+ * You can run XF68_FBDev on top of /dev/fb0
+ * Most important: boot logo :-)
+
+Disadvantages:
+
+ * graphic mode is slower than text mode...
+
+
+How to use it?
+==============
+
+Switching modes is done using  lxfb.mode_option=<resolution>... boot
+parameter or using `fbset` program.
+
+See Documentation/fb/modedb.rst for more information on modedb
+resolutions.
+
+
+X11
+===
+
+XF68_FBDev should generally work fine, but it is non-accelerated.
+
+
+Configuration
+=============
+
+You can pass kernel command line options to lxfb with lxfb.<option>.
+For example, lxfb.mode_option=800x600@75.
+Accepted options:
+
+================ ==================================================
+mode_option	 specify the video mode.  Of the form
+		 <x>x<y>[-<bpp>][@<refresh>]
+vram		 size of video ram (normally auto-detected)
+vt_switch	 enable vt switching during suspend/resume.  The vt
+		 switch is slow, but harmless.
+================ ==================================================
+
+Andres Salomon <dilinger@debian.org>
diff --git a/Documentation/fb/lxfb.txt b/Documentation/fb/lxfb.txt
deleted file mode 100644
index 38b3ca6f6ca7..000000000000
--- a/Documentation/fb/lxfb.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-[This file is cloned from VesaFB/aty128fb]
-
-What is lxfb?
-=================
-
-This is a graphics framebuffer driver for AMD Geode LX based processors.
-
-Advantages:
-
- * No need to use AMD's VSA code (or other VESA emulation layer) in the
-   BIOS.
- * It provides a nice large console (128 cols + 48 lines with 1024x768)
-   without using tiny, unreadable fonts.
- * You can run XF68_FBDev on top of /dev/fb0
- * Most important: boot logo :-)
-
-Disadvantages:
-
- * graphic mode is slower than text mode...
-
-
-How to use it?
-==============
-
-Switching modes is done using  lxfb.mode_option=<resolution>... boot
-parameter or using `fbset' program.
-
-See Documentation/fb/modedb.txt for more information on modedb
-resolutions.
-
-
-X11
-===
-
-XF68_FBDev should generally work fine, but it is non-accelerated.
-
-
-Configuration
-=============
-
-You can pass kernel command line options to lxfb with lxfb.<option>.
-For example, lxfb.mode_option=800x600@75.
-Accepted options:
-
-mode_option	- specify the video mode.  Of the form
-		  <x>x<y>[-<bpp>][@<refresh>]
-vram		- size of video ram (normally auto-detected)
-vt_switch	- enable vt switching during suspend/resume.  The vt
-		  switch is slow, but harmless.
-
---
-Andres Salomon <dilinger@debian.org>
diff --git a/Documentation/fb/matroxfb.rst b/Documentation/fb/matroxfb.rst
new file mode 100644
index 000000000000..f1859d98606e
--- /dev/null
+++ b/Documentation/fb/matroxfb.rst
@@ -0,0 +1,443 @@
+=================
+What is matroxfb?
+=================
+
+.. [This file is cloned from VesaFB. Thanks go to Gerd Knorr]
+
+
+This is a driver for a graphic framebuffer for Matrox devices on
+Alpha, Intel and PPC boxes.
+
+Advantages:
+
+ * It provides a nice large console (128 cols + 48 lines with 1024x768)
+   without using tiny, unreadable fonts.
+ * You can run XF{68,86}_FBDev or XFree86 fbdev driver on top of /dev/fb0
+ * Most important: boot logo :-)
+
+Disadvantages:
+
+ * graphic mode is slower than text mode... but you should not notice
+   if you use same resolution as you used in textmode.
+
+
+How to use it?
+==============
+
+Switching modes is done using the video=matroxfb:vesa:... boot parameter
+or using `fbset` program.
+
+If you want, for example, enable a resolution of 1280x1024x24bpp you should
+pass to the kernel this command line: "video=matroxfb:vesa:0x1BB".
+
+You should compile in both vgacon (to boot if you remove you Matrox from
+box) and matroxfb (for graphics mode). You should not compile-in vesafb
+unless you have primary display on non-Matrox VBE2.0 device (see
+Documentation/fb/vesafb.rst for details).
+
+Currently supported video modes are (through vesa:... interface, PowerMac
+has [as addon] compatibility code):
+
+
+Graphic modes
+-------------
+
+===  =======  =======  =======  =======  =======
+bpp  640x400  640x480  768x576  800x600  960x720
+===  =======  =======  =======  =======  =======
+  4             0x12             0x102
+  8   0x100    0x101    0x180    0x103    0x188
+ 15            0x110    0x181    0x113    0x189
+ 16            0x111    0x182    0x114    0x18A
+ 24            0x1B2    0x184    0x1B5    0x18C
+ 32            0x112    0x183    0x115    0x18B
+===  =======  =======  =======  =======  =======
+
+
+Graphic modes (continued)
+-------------------------
+
+===  ======== ======== ========= ========= =========
+bpp  1024x768 1152x864 1280x1024 1408x1056 1600x1200
+===  ======== ======== ========= ========= =========
+  4    0x104             0x106
+  8    0x105    0x190    0x107     0x198     0x11C
+ 15    0x116    0x191    0x119     0x199     0x11D
+ 16    0x117    0x192    0x11A     0x19A     0x11E
+ 24    0x1B8    0x194    0x1BB     0x19C     0x1BF
+ 32    0x118    0x193    0x11B     0x19B
+===  ======== ======== ========= ========= =========
+
+
+Text modes
+----------
+
+==== =======  =======  ========  ========  ========
+text 640x400  640x480  1056x344  1056x400  1056x480
+==== =======  =======  ========  ========  ========
+ 8x8   0x1C0    0x108     0x10A     0x10B     0x10C
+8x16 2, 3, 7                        0x109
+==== =======  =======  ========  ========  ========
+
+You can enter these number either hexadecimal (leading `0x`) or decimal
+(0x100 = 256). You can also use value + 512 to achieve compatibility
+with your old number passed to vesafb.
+
+Non-listed number can be achieved by more complicated command-line, for
+example 1600x1200x32bpp can be specified by `video=matroxfb:vesa:0x11C,depth:32`.
+
+
+X11
+===
+
+XF{68,86}_FBDev should work just fine, but it is non-accelerated. On non-intel
+architectures there are some glitches for 24bpp videomodes. 8, 16 and 32bpp
+works fine.
+
+Running another (accelerated) X-Server like XF86_SVGA works too. But (at least)
+XFree servers have big troubles in multihead configurations (even on first
+head, not even talking about second). Running XFree86 4.x accelerated mga
+driver is possible, but you must not enable DRI - if you do, resolution and
+color depth of your X desktop must match resolution and color depths of your
+virtual consoles, otherwise X will corrupt accelerator settings.
+
+
+SVGALib
+=======
+
+Driver contains SVGALib compatibility code. It is turned on by choosing textual
+mode for console. You can do it at boot time by using videomode
+2,3,7,0x108-0x10C or 0x1C0. At runtime, `fbset -depth 0` does this work.
+Unfortunately, after SVGALib application exits, screen contents is corrupted.
+Switching to another console and back fixes it. I hope that it is SVGALib's
+problem and not mine, but I'm not sure.
+
+
+Configuration
+=============
+
+You can pass kernel command line options to matroxfb with
+`video=matroxfb:option1,option2:value2,option3` (multiple options should be
+separated by comma, values are separated from options by `:`).
+Accepted options:
+
+============ ===================================================================
+mem:X        size of memory (X can be in megabytes, kilobytes or bytes)
+	     You can only decrease value determined by driver because of
+	     it always probe for memory. Default is to use whole detected
+	     memory usable for on-screen display (i.e. max. 8 MB).
+disabled     do not load driver; you can use also `off`, but `disabled`
+	     is here too.
+enabled      load driver, if you have `video=matroxfb:disabled` in LILO
+	     configuration, you can override it by this (you cannot override
+	     `off`). It is default.
+noaccel      do not use acceleration engine. It does not work on Alphas.
+accel        use acceleration engine. It is default.
+nopan        create initial consoles with vyres = yres, thus disabling virtual
+	     scrolling.
+pan          create initial consoles as tall as possible (vyres = memory/vxres).
+	     It is default.
+nopciretry   disable PCI retries. It is needed for some broken chipsets,
+	     it is autodetected for intel's 82437. In this case device does
+	     not comply to PCI 2.1 specs (it will not guarantee that every
+	     transaction terminate with success or retry in 32 PCLK).
+pciretry     enable PCI retries. It is default, except for intel's 82437.
+novga        disables VGA I/O ports. It is default if BIOS did not enable
+	     device. You should not use this option, some boards then do not
+	     restart without power off.
+vga          preserve state of VGA I/O ports. It is default. Driver does not
+	     enable VGA I/O if BIOS did not it (it is not safe to enable it in
+	     most cases).
+nobios       disables BIOS ROM. It is default if BIOS did not enable BIOS
+	     itself. You should not use this option, some boards then do not
+	     restart without power off.
+bios         preserve state of BIOS ROM. It is default. Driver does not enable
+	     BIOS if BIOS was not enabled before.
+noinit       tells driver, that devices were already initialized. You should use
+	     it if you have G100 and/or if driver cannot detect memory, you see
+	     strange pattern on screen and so on. Devices not enabled by BIOS
+	     are still initialized. It is default.
+init         driver initializes every device it knows about.
+memtype      specifies memory type, implies 'init'. This is valid only for G200
+	     and G400 and has following meaning:
+
+	       G200:
+		 -  0 -> 2x128Kx32 chips, 2MB onboard, probably sgram
+		 -  1 -> 2x128Kx32 chips, 4MB onboard, probably sgram
+		 -  2 -> 2x256Kx32 chips, 4MB onboard, probably sgram
+		 -  3 -> 2x256Kx32 chips, 8MB onboard, probably sgram
+		 -  4 -> 2x512Kx16 chips, 8/16MB onboard, probably sdram only
+		 -  5 -> same as above
+		 -  6 -> 4x128Kx32 chips, 4MB onboard, probably sgram
+		 -  7 -> 4x128Kx32 chips, 8MB onboard, probably sgram
+	       G400:
+		 -  0 -> 2x512Kx16 SDRAM, 16/32MB
+		 -	 2x512Kx32 SGRAM, 16/32MB
+		 -  1 -> 2x256Kx32 SGRAM, 8/16MB
+		 -  2 -> 4x128Kx32 SGRAM, 8/16MB
+		 -  3 -> 4x512Kx32 SDRAM, 32MB
+		 -  4 -> 4x256Kx32 SGRAM, 16/32MB
+		 -  5 -> 2x1Mx32 SDRAM, 32MB
+		 -  6 -> reserved
+		 -  7 -> reserved
+
+	     You should use sdram or sgram parameter in addition to memtype
+	     parameter.
+nomtrr       disables write combining on frame buffer. This slows down driver
+	     but there is reported minor incompatibility between GUS DMA and
+	     XFree under high loads if write combining is enabled (sound
+	     dropouts).
+mtrr         enables write combining on frame buffer. It speeds up video
+	     accesses much. It is default. You must have MTRR support enabled
+	     in kernel and your CPU must have MTRR (f.e. Pentium II have them).
+sgram        tells to driver that you have Gxx0 with SGRAM memory. It has no
+	     effect without `init`.
+sdram        tells to driver that you have Gxx0 with SDRAM memory.
+	     It is a default.
+inv24        change timings parameters for 24bpp modes on Millennium and
+	     Millennium II. Specify this if you see strange color shadows
+	     around  characters.
+noinv24      use standard timings. It is the default.
+inverse      invert colors on screen (for LCD displays)
+noinverse    show true colors on screen. It is default.
+dev:X        bind driver to device X. Driver numbers device from 0 up to N,
+	     where device 0 is first `known` device found, 1 second and so on.
+	     lspci lists devices in this order.
+	     Default is `every` known device.
+nohwcursor   disables hardware cursor (use software cursor instead).
+hwcursor     enables hardware cursor. It is default. If you are using
+	     non-accelerated mode (`noaccel` or `fbset -accel false`), software
+	     cursor is used (except for text mode).
+noblink      disables cursor blinking. Cursor in text mode always blinks (hw
+	     limitation).
+blink        enables cursor blinking. It is default.
+nofastfont   disables fastfont feature. It is default.
+fastfont:X   enables fastfont feature. X specifies size of memory reserved for
+	     font data, it must be >= (fontwidth*fontheight*chars_in_font)/8.
+	     It is faster on Gx00 series, but slower on older cards.
+grayscale    enable grayscale summing. It works in PSEUDOCOLOR modes (text,
+	     4bpp, 8bpp). In DIRECTCOLOR modes it is limited to characters
+	     displayed through putc/putcs. Direct accesses to framebuffer
+	     can paint colors.
+nograyscale  disable grayscale summing. It is default.
+cross4MB     enables that pixel line can cross 4MB boundary. It is default for
+	     non-Millennium.
+nocross4MB   pixel line must not cross 4MB boundary. It is default for
+	     Millennium I or II, because of these devices have hardware
+	     limitations which do not allow this. But this option is
+	     incompatible with some (if not all yet released) versions of
+	     XF86_FBDev.
+dfp          enables digital flat panel interface. This option is incompatible
+	     with secondary (TV) output - if DFP is active, TV output must be
+	     inactive and vice versa. DFP always uses same timing as primary
+	     (monitor) output.
+dfp:X        use settings X for digital flat panel interface. X is number from
+	     0 to 0xFF, and meaning of each individual bit is described in
+	     G400 manual, in description of DAC register 0x1F. For normal
+	     operation you should set all bits to zero, except lowest bit. This
+	     lowest bit selects who is source of display clocks, whether G400,
+	     or panel. Default value is now read back from hardware - so you
+	     should specify this value only if you are also using `init`
+	     parameter.
+outputs:XYZ  set mapping between CRTC and outputs. Each letter can have value
+	     of 0 (for no CRTC), 1 (CRTC1) or 2 (CRTC2), and first letter
+	     corresponds to primary analog output, second letter to the
+	     secondary analog output and third letter to the DVI output.
+	     Default setting is 100 for cards below G400 or G400 without DFP,
+	     101 for G400 with DFP, and 111 for G450 and G550. You can set
+	     mapping only on first card, use matroxset for setting up other
+	     devices.
+vesa:X       selects startup videomode. X is number from 0 to 0x1FF, see table
+	     above for detailed explanation. Default is 640x480x8bpp if driver
+	     has 8bpp support. Otherwise first available of 640x350x4bpp,
+	     640x480x15bpp, 640x480x24bpp, 640x480x32bpp or 80x25 text
+	     (80x25 text is always available).
+============ ===================================================================
+
+If you are not satisfied with videomode selected by `vesa` option, you
+can modify it with these options:
+
+============ ===================================================================
+xres:X       horizontal resolution, in pixels. Default is derived from `vesa`
+	     option.
+yres:X       vertical resolution, in pixel lines. Default is derived from `vesa`
+	     option.
+upper:X      top boundary: lines between end of VSYNC pulse and start of first
+	     pixel line of picture. Default is derived from `vesa` option.
+lower:X      bottom boundary: lines between end of picture and start of VSYNC
+	     pulse. Default is derived from `vesa` option.
+vslen:X      length of VSYNC pulse, in lines. Default is derived from `vesa`
+	     option.
+left:X       left boundary: pixels between end of HSYNC pulse and first pixel.
+	     Default is derived from `vesa` option.
+right:X      right boundary: pixels between end of picture and start of HSYNC
+	     pulse. Default is derived from `vesa` option.
+hslen:X      length of HSYNC pulse, in pixels. Default is derived from `vesa`
+	     option.
+pixclock:X   dotclocks, in ps (picoseconds). Default is derived from `vesa`
+	     option and from `fh` and `fv` options.
+sync:X       sync. pulse - bit 0 inverts HSYNC polarity, bit 1 VSYNC polarity.
+	     If bit 3 (value 0x08) is set, composite sync instead of HSYNC is
+	     generated. If bit 5 (value 0x20) is set, sync on green is turned
+	     on. Do not forget that if you want sync on green, you also probably
+	     want composite sync.
+	     Default depends on `vesa`.
+depth:X      Bits per pixel: 0=text, 4,8,15,16,24 or 32. Default depends on
+	     `vesa`.
+============ ===================================================================
+
+If you know capabilities of your monitor, you can specify some (or all) of
+`maxclk`, `fh` and `fv`. In this case, `pixclock` is computed so that
+pixclock <= maxclk, real_fh <= fh and real_fv <= fv.
+
+============ ==================================================================
+maxclk:X     maximum dotclock. X can be specified in MHz, kHz or Hz. Default is
+	     `don`t care`.
+fh:X         maximum horizontal synchronization frequency. X can be specified
+	     in kHz or Hz. Default is `don't care`.
+fv:X         maximum vertical frequency. X must be specified in Hz. Default is
+	     70 for modes derived from `vesa` with yres <= 400, 60Hz for
+	     yres > 400.
+============ ==================================================================
+
+
+Limitations
+===========
+
+There are known and unknown bugs, features and misfeatures.
+Currently there are following known bugs:
+
+ - SVGALib does not restore screen on exit
+ - generic fbcon-cfbX procedures do not work on Alphas. Due to this,
+   `noaccel` (and cfb4 accel) driver does not work on Alpha. So everyone
+   with access to `/dev/fb*` on Alpha can hang machine (you should restrict
+   access to `/dev/fb*` - everyone with access to this device can destroy
+   your monitor, believe me...).
+ - 24bpp does not support correctly XF-FBDev on big-endian architectures.
+ - interlaced text mode is not supported; it looks like hardware limitation,
+   but I'm not sure.
+ - Gxx0 SGRAM/SDRAM is not autodetected.
+ - If you are using more than one framebuffer device, you must boot kernel
+   with 'video=scrollback:0'.
+ - maybe more...
+
+And following misfeatures:
+
+ - SVGALib does not restore screen on exit.
+ - pixclock for text modes is limited by hardware to
+
+    - 83 MHz on G200
+    - 66 MHz on Millennium I
+    - 60 MHz on Millennium II
+
+   Because I have no access to other devices, I do not know specific
+   frequencies for them. So driver does not check this and allows you to
+   set frequency higher that this. It causes sparks, black holes and other
+   pretty effects on screen. Device was not destroyed during tests. :-)
+ - my Millennium G200 oscillator has frequency range from 35 MHz to 380 MHz
+   (and it works with 8bpp on about 320 MHz dotclocks (and changed mclk)).
+   But Matrox says on product sheet that VCO limit is 50-250 MHz, so I believe
+   them (maybe that chip overheats, but it has a very big cooler (G100 has
+   none), so it should work).
+ - special mixed video/graphics videomodes of Mystique and Gx00 - 2G8V16 and
+   G16V16 are not supported
+ - color keying is not supported
+ - feature connector of Mystique and Gx00 is set to VGA mode (it is disabled
+   by BIOS)
+ - DDC (monitor detection) is supported through dualhead driver
+ - some check for input values are not so strict how it should be (you can
+   specify vslen=4000 and so on).
+ - maybe more...
+
+And following features:
+
+ - 4bpp is available only on Millennium I and Millennium II. It is hardware
+   limitation.
+ - selection between 1:5:5:5 and 5:6:5 16bpp videomode is done by -rgba
+   option of fbset: "fbset -depth 16 -rgba 5,5,5" selects 1:5:5:5, anything
+   else selects 5:6:5 mode.
+ - text mode uses 6 bit VGA palette instead of 8 bit (one of 262144 colors
+   instead of one of 16M colors). It is due to hardware limitation of
+   Millennium I/II and SVGALib compatibility.
+
+
+Benchmarks
+==========
+It is time to redraw whole screen 1000 times in 1024x768, 60Hz. It is
+time for draw 6144000 characters on screen through /dev/vcsa
+(for 32bpp it is about 3GB of data (exactly 3000 MB); for 8x16 font in
+16 seconds, i.e. 187 MBps).
+Times were obtained from one older version of driver, now they are about 3%
+faster, it is kernel-space only time on P-II/350 MHz, Millennium I in 33 MHz
+PCI slot, G200 in AGP 2x slot. I did not test vgacon::
+
+  NOACCEL
+	8x16                 12x22
+	Millennium I  G200   Millennium I  G200
+  8bpp    16.42         9.54   12.33         9.13
+  16bpp   21.00        15.70   19.11        15.02
+  24bpp   36.66        36.66   35.00        35.00
+  32bpp   35.00        30.00   33.85        28.66
+
+  ACCEL, nofastfont
+	8x16                 12x22                6x11
+	Millennium I  G200   Millennium I  G200   Millennium I  G200
+  8bpp     7.79         7.24   13.55         7.78   30.00        21.01
+  16bpp    9.13         7.78   16.16         7.78   30.00        21.01
+  24bpp   14.17        10.72   18.69        10.24   34.99        21.01
+  32bpp   16.15	     16.16   18.73        13.09   34.99        21.01
+
+  ACCEL, fastfont
+	8x16                 12x22                6x11
+	Millennium I  G200   Millennium I  G200   Millennium I  G200
+  8bpp     8.41         6.01    6.54         4.37   16.00        10.51
+  16bpp    9.54         9.12    8.76         6.17   17.52        14.01
+  24bpp   15.00        12.36   11.67        10.00   22.01        18.32
+  32bpp   16.18        18.29*  12.71        12.74   24.44        21.00
+
+  TEXT
+	8x16
+	Millennium I  G200
+  TEXT     3.29         1.50
+
+  * Yes, it is slower than Millennium I.
+
+
+Dualhead G400
+=============
+Driver supports dualhead G400 with some limitations:
+ + secondary head shares videomemory with primary head. It is not problem
+   if you have 32MB of videoram, but if you have only 16MB, you may have
+   to think twice before choosing videomode (for example twice 1880x1440x32bpp
+   is not possible).
+ + due to hardware limitation, secondary head can use only 16 and 32bpp
+   videomodes.
+ + secondary head is not accelerated. There were bad problems with accelerated
+   XFree when secondary head used to use acceleration.
+ + secondary head always powerups in 640x480@60-32 videomode. You have to use
+   fbset to change this mode.
+ + secondary head always powerups in monitor mode. You have to use fbmatroxset
+   to change it to TV mode. Also, you must select at least 525 lines for
+   NTSC output and 625 lines for PAL output.
+ + kernel is not fully multihead ready. So some things are impossible to do.
+ + if you compiled it as module, you must insert i2c-matroxfb, matroxfb_maven
+   and matroxfb_crtc2 into kernel.
+
+
+Dualhead G450
+=============
+Driver supports dualhead G450 with some limitations:
+ + secondary head shares videomemory with primary head. It is not problem
+   if you have 32MB of videoram, but if you have only 16MB, you may have
+   to think twice before choosing videomode.
+ + due to hardware limitation, secondary head can use only 16 and 32bpp
+   videomodes.
+ + secondary head is not accelerated.
+ + secondary head always powerups in 640x480@60-32 videomode. You have to use
+   fbset to change this mode.
+ + TV output is not supported
+ + kernel is not fully multihead ready, so some things are impossible to do.
+ + if you compiled it as module, you must insert matroxfb_g450 and matroxfb_crtc2
+   into kernel.
+
+Petr Vandrovec <vandrove@vc.cvut.cz>
diff --git a/Documentation/fb/matroxfb.txt b/Documentation/fb/matroxfb.txt
deleted file mode 100644
index b95f5bb522f2..000000000000
--- a/Documentation/fb/matroxfb.txt
+++ /dev/null
@@ -1,413 +0,0 @@
-[This file is cloned from VesaFB. Thanks go to Gerd Knorr]
-
-What is matroxfb?
-=================
-
-This is a driver for a graphic framebuffer for Matrox devices on
-Alpha, Intel and PPC boxes.
-
-Advantages:
-
- * It provides a nice large console (128 cols + 48 lines with 1024x768)
-   without using tiny, unreadable fonts.
- * You can run XF{68,86}_FBDev or XFree86 fbdev driver on top of /dev/fb0
- * Most important: boot logo :-)
-
-Disadvantages:
-
- * graphic mode is slower than text mode... but you should not notice
-   if you use same resolution as you used in textmode.
-
-
-How to use it?
-==============
-
-Switching modes is done using the video=matroxfb:vesa:... boot parameter
-or using `fbset' program.
-
-If you want, for example, enable a resolution of 1280x1024x24bpp you should
-pass to the kernel this command line: "video=matroxfb:vesa:0x1BB".
-
-You should compile in both vgacon (to boot if you remove you Matrox from
-box) and matroxfb (for graphics mode). You should not compile-in vesafb
-unless you have primary display on non-Matrox VBE2.0 device (see 
-Documentation/fb/vesafb.txt for details).
-
-Currently supported video modes are (through vesa:... interface, PowerMac
-has [as addon] compatibility code):
-
-
-[Graphic modes]
-
-bpp | 640x400  640x480  768x576  800x600  960x720
-----+--------------------------------------------
-  4 |            0x12             0x102            
-  8 |  0x100    0x101    0x180    0x103    0x188   
- 15 |           0x110    0x181    0x113    0x189   
- 16 |           0x111    0x182    0x114    0x18A   
- 24 |           0x1B2    0x184    0x1B5    0x18C   
- 32 |           0x112    0x183    0x115    0x18B   
-
-
-[Graphic modes (continued)]
-
-bpp | 1024x768 1152x864 1280x1024 1408x1056 1600x1200
-----+------------------------------------------------
-  4 |   0x104             0x106
-  8 |   0x105    0x190    0x107     0x198     0x11C
- 15 |   0x116    0x191    0x119     0x199     0x11D
- 16 |   0x117    0x192    0x11A     0x19A     0x11E
- 24 |   0x1B8    0x194    0x1BB     0x19C     0x1BF
- 32 |   0x118    0x193    0x11B     0x19B
-
-
-[Text modes]
-
-text | 640x400  640x480  1056x344  1056x400  1056x480
------+------------------------------------------------
- 8x8 |  0x1C0    0x108     0x10A     0x10B     0x10C
-8x16 | 2, 3, 7                       0x109
-
-You can enter these number either hexadecimal (leading `0x') or decimal
-(0x100 = 256). You can also use value + 512 to achieve compatibility
-with your old number passed to vesafb.
-
-Non-listed number can be achieved by more complicated command-line, for
-example 1600x1200x32bpp can be specified by `video=matroxfb:vesa:0x11C,depth:32'.
-
-
-X11
-===
-
-XF{68,86}_FBDev should work just fine, but it is non-accelerated. On non-intel
-architectures there are some glitches for 24bpp videomodes. 8, 16 and 32bpp
-works fine.
-
-Running another (accelerated) X-Server like XF86_SVGA works too. But (at least)
-XFree servers have big troubles in multihead configurations (even on first
-head, not even talking about second). Running XFree86 4.x accelerated mga 
-driver is possible, but you must not enable DRI - if you do, resolution and
-color depth of your X desktop must match resolution and color depths of your
-virtual consoles, otherwise X will corrupt accelerator settings.
-
-
-SVGALib
-=======
-
-Driver contains SVGALib compatibility code. It is turned on by choosing textual
-mode for console. You can do it at boot time by using videomode
-2,3,7,0x108-0x10C or 0x1C0. At runtime, `fbset -depth 0' does this work.
-Unfortunately, after SVGALib application exits, screen contents is corrupted.
-Switching to another console and back fixes it. I hope that it is SVGALib's
-problem and not mine, but I'm not sure.
-
-
-Configuration
-=============
-
-You can pass kernel command line options to matroxfb with
-`video=matroxfb:option1,option2:value2,option3' (multiple options should be 
-separated by comma, values are separated from options by `:'). 
-Accepted options:
-
-mem:X    - size of memory (X can be in megabytes, kilobytes or bytes)
-           You can only decrease value determined by driver because of
-	   it always probe for memory. Default is to use whole detected
-	   memory usable for on-screen display (i.e. max. 8 MB).
-disabled - do not load driver; you can use also `off', but `disabled'
-           is here too.
-enabled  - load driver, if you have `video=matroxfb:disabled' in LILO
-           configuration, you can override it by this (you cannot override
-	   `off'). It is default.
-noaccel  - do not use acceleration engine. It does not work on Alphas.
-accel    - use acceleration engine. It is default.
-nopan    - create initial consoles with vyres = yres, thus disabling virtual
-           scrolling.
-pan      - create initial consoles as tall as possible (vyres = memory/vxres).
-           It is default.
-nopciretry - disable PCI retries. It is needed for some broken chipsets,
-           it is autodetected for intel's 82437. In this case device does
-	   not comply to PCI 2.1 specs (it will not guarantee that every
-	   transaction terminate with success or retry in 32 PCLK).
-pciretry - enable PCI retries. It is default, except for intel's 82437.
-novga    - disables VGA I/O ports. It is default if BIOS did not enable device.
-           You should not use this option, some boards then do not restart
-	   without power off.
-vga      - preserve state of VGA I/O ports. It is default. Driver does not
-           enable VGA I/O if BIOS did not it (it is not safe to enable it in
-	   most cases).
-nobios   - disables BIOS ROM. It is default if BIOS did not enable BIOS itself.
-           You should not use this option, some boards then do not restart
-	   without power off.
-bios     - preserve state of BIOS ROM. It is default. Driver does not enable
-           BIOS if BIOS was not enabled before.
-noinit   - tells driver, that devices were already initialized. You should use
-           it if you have G100 and/or if driver cannot detect memory, you see
-	   strange pattern on screen and so on. Devices not enabled by BIOS
-	   are still initialized. It is default.
-init     - driver initializes every device it knows about.
-memtype  - specifies memory type, implies 'init'. This is valid only for G200 
-           and G400 and has following meaning:
-             G200: 0 -> 2x128Kx32 chips, 2MB onboard, probably sgram
-                   1 -> 2x128Kx32 chips, 4MB onboard, probably sgram
-                   2 -> 2x256Kx32 chips, 4MB onboard, probably sgram
-                   3 -> 2x256Kx32 chips, 8MB onboard, probably sgram
-                   4 -> 2x512Kx16 chips, 8/16MB onboard, probably sdram only
-                   5 -> same as above
-                   6 -> 4x128Kx32 chips, 4MB onboard, probably sgram
-                   7 -> 4x128Kx32 chips, 8MB onboard, probably sgram
-             G400: 0 -> 2x512Kx16 SDRAM, 16/32MB
-                        2x512Kx32 SGRAM, 16/32MB
-                   1 -> 2x256Kx32 SGRAM, 8/16MB
-                   2 -> 4x128Kx32 SGRAM, 8/16MB
-                   3 -> 4x512Kx32 SDRAM, 32MB
-                   4 -> 4x256Kx32 SGRAM, 16/32MB
-                   5 -> 2x1Mx32 SDRAM, 32MB
-                   6 -> reserved
-                   7 -> reserved
-           You should use sdram or sgram parameter in addition to memtype 
-           parameter.
-nomtrr   - disables write combining on frame buffer. This slows down driver but
-           there is reported minor incompatibility between GUS DMA and XFree
-	   under high loads if write combining is enabled (sound dropouts).
-mtrr     - enables write combining on frame buffer. It speeds up video accesses
-           much. It is default. You must have MTRR support enabled in kernel
-	   and your CPU must have MTRR (f.e. Pentium II have them).
-sgram    - tells to driver that you have Gxx0 with SGRAM memory. It has no
-           effect without `init'.
-sdram    - tells to driver that you have Gxx0 with SDRAM memory.
-           It is a default.
-inv24    - change timings parameters for 24bpp modes on Millennium and
-           Millennium II. Specify this if you see strange color shadows around
-	   characters.
-noinv24  - use standard timings. It is the default.
-inverse  - invert colors on screen (for LCD displays)
-noinverse - show true colors on screen. It is default.
-dev:X    - bind driver to device X. Driver numbers device from 0 up to N,
-           where device 0 is first `known' device found, 1 second and so on.
-	   lspci lists devices in this order.
-	   Default is `every' known device.
-nohwcursor - disables hardware cursor (use software cursor instead).
-hwcursor - enables hardware cursor. It is default. If you are using
-           non-accelerated mode (`noaccel' or `fbset -accel false'), software
-	   cursor is used (except for text mode).
-noblink  - disables cursor blinking. Cursor in text mode always blinks (hw
-           limitation).
-blink    - enables cursor blinking. It is default.
-nofastfont - disables fastfont feature. It is default.
-fastfont:X - enables fastfont feature. X specifies size of memory reserved for
-             font data, it must be >= (fontwidth*fontheight*chars_in_font)/8.
-	     It is faster on Gx00 series, but slower on older cards.
-grayscale - enable grayscale summing. It works in PSEUDOCOLOR modes (text,
-            4bpp, 8bpp). In DIRECTCOLOR modes it is limited to characters
-	    displayed through putc/putcs. Direct accesses to framebuffer
-	    can paint colors.
-nograyscale - disable grayscale summing. It is default.
-cross4MB - enables that pixel line can cross 4MB boundary. It is default for
-           non-Millennium.
-nocross4MB - pixel line must not cross 4MB boundary. It is default for
-             Millennium I or II, because of these devices have hardware
-	     limitations which do not allow this. But this option is
-	     incompatible with some (if not all yet released) versions of
-	     XF86_FBDev.
-dfp      - enables digital flat panel interface. This option is incompatible with
-           secondary (TV) output - if DFP is active, TV output must be
-	   inactive and vice versa. DFP always uses same timing as primary
-	   (monitor) output.
-dfp:X    - use settings X for digital flat panel interface. X is number from
-           0 to 0xFF, and meaning of each individual bit is described in
-	   G400 manual, in description of DAC register 0x1F. For normal operation
-	   you should set all bits to zero, except lowest bit. This lowest bit
-	   selects who is source of display clocks, whether G400, or panel.
-	   Default value is now read back from hardware - so you should specify
-	   this value only if you are also using `init' parameter.
-outputs:XYZ - set mapping between CRTC and outputs. Each letter can have value
-           of 0 (for no CRTC), 1 (CRTC1) or 2 (CRTC2), and first letter corresponds
-	   to primary analog output, second letter to the secondary analog output
-	   and third letter to the DVI output. Default setting is 100 for
-	   cards below G400 or G400 without DFP, 101 for G400 with DFP, and
-	   111 for G450 and G550. You can set mapping only on first card,
-	   use matroxset for setting up other devices.
-vesa:X   - selects startup videomode. X is number from 0 to 0x1FF, see table
-           above for detailed explanation. Default is 640x480x8bpp if driver
-	   has 8bpp support. Otherwise first available of 640x350x4bpp,
-	   640x480x15bpp, 640x480x24bpp, 640x480x32bpp or 80x25 text
-	   (80x25 text is always available).
-
-If you are not satisfied with videomode selected by `vesa' option, you
-can modify it with these options:
-
-xres:X   - horizontal resolution, in pixels. Default is derived from `vesa'
-           option.
-yres:X   - vertical resolution, in pixel lines. Default is derived from `vesa'
-           option.
-upper:X  - top boundary: lines between end of VSYNC pulse and start of first
-           pixel line of picture. Default is derived from `vesa' option.
-lower:X  - bottom boundary: lines between end of picture and start of VSYNC
-           pulse. Default is derived from `vesa' option.
-vslen:X  - length of VSYNC pulse, in lines. Default is derived from `vesa'
-           option.
-left:X   - left boundary: pixels between end of HSYNC pulse and first pixel.
-           Default is derived from `vesa' option.
-right:X  - right boundary: pixels between end of picture and start of HSYNC
-           pulse. Default is derived from `vesa' option.
-hslen:X  - length of HSYNC pulse, in pixels. Default is derived from `vesa'
-           option.
-pixclock:X - dotclocks, in ps (picoseconds). Default is derived from `vesa'
-             option and from `fh' and `fv' options.
-sync:X   - sync. pulse - bit 0 inverts HSYNC polarity, bit 1 VSYNC polarity.
-           If bit 3 (value 0x08) is set, composite sync instead of HSYNC is
-	   generated. If bit 5 (value 0x20) is set, sync on green is turned on.
-	   Do not forget that if you want sync on green, you also probably
-	   want composite sync.
-	   Default depends on `vesa'.
-depth:X  - Bits per pixel: 0=text, 4,8,15,16,24 or 32. Default depends on
-           `vesa'.
-
-If you know capabilities of your monitor, you can specify some (or all) of
-`maxclk', `fh' and `fv'. In this case, `pixclock' is computed so that
-pixclock <= maxclk, real_fh <= fh and real_fv <= fv.
-
-maxclk:X - maximum dotclock. X can be specified in MHz, kHz or Hz. Default is
-           `don't care'.
-fh:X     - maximum horizontal synchronization frequency. X can be specified
-           in kHz or Hz. Default is `don't care'.
-fv:X     - maximum vertical frequency. X must be specified in Hz. Default is
-           70 for modes derived from `vesa' with yres <= 400, 60Hz for
-	   yres > 400.
-
-
-Limitations
-===========
-
-There are known and unknown bugs, features and misfeatures.
-Currently there are following known bugs:
- + SVGALib does not restore screen on exit
- + generic fbcon-cfbX procedures do not work on Alphas. Due to this,
-   `noaccel' (and cfb4 accel) driver does not work on Alpha. So everyone
-   with access to /dev/fb* on Alpha can hang machine (you should restrict
-   access to /dev/fb* - everyone with access to this device can destroy
-   your monitor, believe me...).
- + 24bpp does not support correctly XF-FBDev on big-endian architectures.
- + interlaced text mode is not supported; it looks like hardware limitation,
-   but I'm not sure.
- + Gxx0 SGRAM/SDRAM is not autodetected.
- + If you are using more than one framebuffer device, you must boot kernel
-   with 'video=scrollback:0'.
- + maybe more...
-And following misfeatures:
- + SVGALib does not restore screen on exit.
- + pixclock for text modes is limited by hardware to
-    83 MHz on G200
-    66 MHz on Millennium I
-    60 MHz on Millennium II
-   Because I have no access to other devices, I do not know specific
-   frequencies for them. So driver does not check this and allows you to
-   set frequency higher that this. It causes sparks, black holes and other
-   pretty effects on screen. Device was not destroyed during tests. :-)
- + my Millennium G200 oscillator has frequency range from 35 MHz to 380 MHz
-   (and it works with 8bpp on about 320 MHz dotclocks (and changed mclk)).
-   But Matrox says on product sheet that VCO limit is 50-250 MHz, so I believe
-   them (maybe that chip overheats, but it has a very big cooler (G100 has
-   none), so it should work).
- + special mixed video/graphics videomodes of Mystique and Gx00 - 2G8V16 and
-   G16V16 are not supported
- + color keying is not supported
- + feature connector of Mystique and Gx00 is set to VGA mode (it is disabled
-   by BIOS)
- + DDC (monitor detection) is supported through dualhead driver
- + some check for input values are not so strict how it should be (you can
-   specify vslen=4000 and so on).
- + maybe more...
-And following features:
- + 4bpp is available only on Millennium I and Millennium II. It is hardware
-   limitation.
- + selection between 1:5:5:5 and 5:6:5 16bpp videomode is done by -rgba 
-   option of fbset: "fbset -depth 16 -rgba 5,5,5" selects 1:5:5:5, anything
-   else selects 5:6:5 mode.
- + text mode uses 6 bit VGA palette instead of 8 bit (one of 262144 colors
-   instead of one of 16M colors). It is due to hardware limitation of 
-   Millennium I/II and SVGALib compatibility.
-
-
-Benchmarks
-==========
-It is time to redraw whole screen 1000 times in 1024x768, 60Hz. It is
-time for draw 6144000 characters on screen through /dev/vcsa
-(for 32bpp it is about 3GB of data (exactly 3000 MB); for 8x16 font in 
-16 seconds, i.e. 187 MBps).
-Times were obtained from one older version of driver, now they are about 3%
-faster, it is kernel-space only time on P-II/350 MHz, Millennium I in 33 MHz
-PCI slot, G200 in AGP 2x slot. I did not test vgacon.
-
-NOACCEL
-        8x16                 12x22
-        Millennium I  G200   Millennium I  G200
-8bpp    16.42         9.54   12.33         9.13
-16bpp   21.00        15.70   19.11        15.02
-24bpp   36.66        36.66   35.00        35.00
-32bpp   35.00        30.00   33.85        28.66
-
-ACCEL, nofastfont
-        8x16                 12x22                6x11
-	Millennium I  G200   Millennium I  G200   Millennium I  G200
-8bpp     7.79         7.24   13.55         7.78   30.00        21.01
-16bpp    9.13         7.78   16.16         7.78   30.00        21.01
-24bpp   14.17        10.72   18.69        10.24   34.99        21.01
-32bpp   16.15	     16.16   18.73        13.09   34.99        21.01
-
-ACCEL, fastfont
-        8x16                 12x22                6x11
-	Millennium I  G200   Millennium I  G200   Millennium I  G200
-8bpp     8.41         6.01    6.54         4.37   16.00        10.51
-16bpp    9.54         9.12    8.76         6.17   17.52        14.01
-24bpp   15.00        12.36   11.67        10.00   22.01        18.32
-32bpp   16.18        18.29*  12.71        12.74   24.44        21.00
-
-TEXT
-        8x16
-	Millennium I  G200
-TEXT     3.29         1.50
-
-* Yes, it is slower than Millennium I.
-
-
-Dualhead G400
-=============
-Driver supports dualhead G400 with some limitations:
- + secondary head shares videomemory with primary head. It is not problem
-   if you have 32MB of videoram, but if you have only 16MB, you may have
-   to think twice before choosing videomode (for example twice 1880x1440x32bpp
-   is not possible).
- + due to hardware limitation, secondary head can use only 16 and 32bpp
-   videomodes.
- + secondary head is not accelerated. There were bad problems with accelerated
-   XFree when secondary head used to use acceleration.
- + secondary head always powerups in 640x480@60-32 videomode. You have to use
-   fbset to change this mode.
- + secondary head always powerups in monitor mode. You have to use fbmatroxset
-   to change it to TV mode. Also, you must select at least 525 lines for
-   NTSC output and 625 lines for PAL output.
- + kernel is not fully multihead ready. So some things are impossible to do.
- + if you compiled it as module, you must insert i2c-matroxfb, matroxfb_maven
-   and matroxfb_crtc2 into kernel.
-
-
-Dualhead G450
-=============
-Driver supports dualhead G450 with some limitations:
- + secondary head shares videomemory with primary head. It is not problem
-   if you have 32MB of videoram, but if you have only 16MB, you may have
-   to think twice before choosing videomode.
- + due to hardware limitation, secondary head can use only 16 and 32bpp
-   videomodes.
- + secondary head is not accelerated.
- + secondary head always powerups in 640x480@60-32 videomode. You have to use
-   fbset to change this mode.
- + TV output is not supported
- + kernel is not fully multihead ready, so some things are impossible to do.
- + if you compiled it as module, you must insert matroxfb_g450 and matroxfb_crtc2
-   into kernel.
-	
---
-Petr Vandrovec <vandrove@vc.cvut.cz>
diff --git a/Documentation/fb/metronomefb.rst b/Documentation/fb/metronomefb.rst
new file mode 100644
index 000000000000..63e1d31a7e54
--- /dev/null
+++ b/Documentation/fb/metronomefb.rst
@@ -0,0 +1,38 @@
+===========
+Metronomefb
+===========
+
+Maintained by Jaya Kumar <jayakumar.lkml.gmail.com>
+
+Last revised: Mar 10, 2008
+
+Metronomefb is a driver for the Metronome display controller. The controller
+is from E-Ink Corporation. It is intended to be used to drive the E-Ink
+Vizplex display media. E-Ink hosts some details of this controller and the
+display media here http://www.e-ink.com/products/matrix/metronome.html .
+
+Metronome is interfaced to the host CPU through the AMLCD interface. The
+host CPU generates the control information and the image in a framebuffer
+which is then delivered to the AMLCD interface by a host specific method.
+The display and error status are each pulled through individual GPIOs.
+
+Metronomefb is platform independent and depends on a board specific driver
+to do all physical IO work. Currently, an example is implemented for the
+PXA board used in the AM-200 EPD devkit. This example is am200epd.c
+
+Metronomefb requires waveform information which is delivered via the AMLCD
+interface to the metronome controller. The waveform information is expected to
+be delivered from userspace via the firmware class interface. The waveform file
+can be compressed as long as your udev or hotplug script is aware of the need
+to uncompress it before delivering it. metronomefb will ask for metronome.wbf
+which would typically go into /lib/firmware/metronome.wbf depending on your
+udev/hotplug setup. I have only tested with a single waveform file which was
+originally labeled 23P01201_60_WT0107_MTC. I do not know what it stands for.
+Caution should be exercised when manipulating the waveform as there may be
+a possibility that it could have some permanent effects on the display media.
+I neither have access to nor know exactly what the waveform does in terms of
+the physical media.
+
+Metronomefb uses the deferred IO interface so that it can provide a memory
+mappable frame buffer. It has been tested with tinyx (Xfbdev). It is known
+to work at this time with xeyes, xclock, xloadimage, xpdf.
diff --git a/Documentation/fb/metronomefb.txt b/Documentation/fb/metronomefb.txt
deleted file mode 100644
index 237ca412582d..000000000000
--- a/Documentation/fb/metronomefb.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-			Metronomefb
-			-----------
-Maintained by Jaya Kumar <jayakumar.lkml.gmail.com>
-Last revised: Mar 10, 2008
-
-Metronomefb is a driver for the Metronome display controller. The controller
-is from E-Ink Corporation. It is intended to be used to drive the E-Ink
-Vizplex display media. E-Ink hosts some details of this controller and the
-display media here http://www.e-ink.com/products/matrix/metronome.html .
-
-Metronome is interfaced to the host CPU through the AMLCD interface. The
-host CPU generates the control information and the image in a framebuffer
-which is then delivered to the AMLCD interface by a host specific method.
-The display and error status are each pulled through individual GPIOs.
-
-Metronomefb is platform independent and depends on a board specific driver
-to do all physical IO work. Currently, an example is implemented for the
-PXA board used in the AM-200 EPD devkit. This example is am200epd.c
-
-Metronomefb requires waveform information which is delivered via the AMLCD
-interface to the metronome controller. The waveform information is expected to
-be delivered from userspace via the firmware class interface. The waveform file
-can be compressed as long as your udev or hotplug script is aware of the need
-to uncompress it before delivering it. metronomefb will ask for metronome.wbf
-which would typically go into /lib/firmware/metronome.wbf depending on your
-udev/hotplug setup. I have only tested with a single waveform file which was
-originally labeled 23P01201_60_WT0107_MTC. I do not know what it stands for.
-Caution should be exercised when manipulating the waveform as there may be
-a possibility that it could have some permanent effects on the display media.
-I neither have access to nor know exactly what the waveform does in terms of
-the physical media.
-
-Metronomefb uses the deferred IO interface so that it can provide a memory
-mappable frame buffer. It has been tested with tinyx (Xfbdev). It is known
-to work at this time with xeyes, xclock, xloadimage, xpdf.
-
diff --git a/Documentation/fb/modedb.rst b/Documentation/fb/modedb.rst
new file mode 100644
index 000000000000..3c2397293977
--- /dev/null
+++ b/Documentation/fb/modedb.rst
@@ -0,0 +1,155 @@
+=================================
+modedb default video mode support
+=================================
+
+
+Currently all frame buffer device drivers have their own video mode databases,
+which is a mess and a waste of resources. The main idea of modedb is to have
+
+  - one routine to probe for video modes, which can be used by all frame buffer
+    devices
+  - one generic video mode database with a fair amount of standard videomodes
+    (taken from XFree86)
+  - the possibility to supply your own mode database for graphics hardware that
+    needs non-standard modes, like amifb and Mac frame buffer drivers (which
+    use macmodes.c)
+
+When a frame buffer device receives a video= option it doesn't know, it should
+consider that to be a video mode option. If no frame buffer device is specified
+in a video= option, fbmem considers that to be a global video mode option.
+
+Valid mode specifiers (mode_option argument)::
+
+    <xres>x<yres>[M][R][-<bpp>][@<refresh>][i][m][eDd]
+    <name>[-<bpp>][@<refresh>]
+
+with <xres>, <yres>, <bpp> and <refresh> decimal numbers and <name> a string.
+Things between square brackets are optional.
+
+If 'M' is specified in the mode_option argument (after <yres> and before
+<bpp> and <refresh>, if specified) the timings will be calculated using
+VESA(TM) Coordinated Video Timings instead of looking up the mode from a table.
+If 'R' is specified, do a 'reduced blanking' calculation for digital displays.
+If 'i' is specified, calculate for an interlaced mode.  And if 'm' is
+specified, add margins to the calculation (1.8% of xres rounded down to 8
+pixels and 1.8% of yres).
+
+       Sample usage: 1024x768M@60m - CVT timing with margins
+
+DRM drivers also add options to enable or disable outputs:
+
+'e' will force the display to be enabled, i.e. it will override the detection
+if a display is connected. 'D' will force the display to be enabled and use
+digital output. This is useful for outputs that have both analog and digital
+signals (e.g. HDMI and DVI-I). For other outputs it behaves like 'e'. If 'd'
+is specified the output is disabled.
+
+You can additionally specify which output the options matches to.
+To force the VGA output to be enabled and drive a specific mode say::
+
+    video=VGA-1:1280x1024@60me
+
+Specifying the option multiple times for different ports is possible, e.g.::
+
+    video=LVDS-1:d video=HDMI-1:D
+
+-----------------------------------------------------------------------------
+
+What is the VESA(TM) Coordinated Video Timings (CVT)?
+=====================================================
+
+From the VESA(TM) Website:
+
+     "The purpose of CVT is to provide a method for generating a consistent
+      and coordinated set of standard formats, display refresh rates, and
+      timing specifications for computer display products, both those
+      employing CRTs, and those using other display technologies. The
+      intention of CVT is to give both source and display manufacturers a
+      common set of tools to enable new timings to be developed in a
+      consistent manner that ensures greater compatibility."
+
+This is the third standard approved by VESA(TM) concerning video timings.  The
+first was the Discrete Video Timings (DVT) which is  a collection of
+pre-defined modes approved by VESA(TM).  The second is the Generalized Timing
+Formula (GTF) which is an algorithm to calculate the timings, given the
+pixelclock, the horizontal sync frequency, or the vertical refresh rate.
+
+The GTF is limited by the fact that it is designed mainly for CRT displays.
+It artificially increases the pixelclock because of its high blanking
+requirement. This is inappropriate for digital display interface with its high
+data rate which requires that it conserves the pixelclock as much as possible.
+Also, GTF does not take into account the aspect ratio of the display.
+
+The CVT addresses these limitations.  If used with CRT's, the formula used
+is a derivation of GTF with a few modifications.  If used with digital
+displays, the "reduced blanking" calculation can be used.
+
+From the framebuffer subsystem perspective, new formats need not be added
+to the global mode database whenever a new mode is released by display
+manufacturers. Specifying for CVT will work for most, if not all, relatively
+new CRT displays and probably with most flatpanels, if 'reduced blanking'
+calculation is specified.  (The CVT compatibility of the display can be
+determined from its EDID. The version 1.3 of the EDID has extra 128-byte
+blocks where additional timing information is placed.  As of this time, there
+is no support yet in the layer to parse this additional blocks.)
+
+CVT also introduced a new naming convention (should be seen from dmesg output)::
+
+    <pix>M<a>[-R]
+
+    where: pix = total amount of pixels in MB (xres x yres)
+	   M   = always present
+	   a   = aspect ratio (3 - 4:3; 4 - 5:4; 9 - 15:9, 16:9; A - 16:10)
+	  -R   = reduced blanking
+
+	  example:  .48M3-R - 800x600 with reduced blanking
+
+Note: VESA(TM) has restrictions on what is a standard CVT timing:
+
+      - aspect ratio can only be one of the above values
+      - acceptable refresh rates are 50, 60, 70 or 85 Hz only
+      - if reduced blanking, the refresh rate must be at 60Hz
+
+If one of the above are not satisfied, the kernel will print a warning but the
+timings will still be calculated.
+
+-----------------------------------------------------------------------------
+
+To find a suitable video mode, you just call::
+
+  int __init fb_find_mode(struct fb_var_screeninfo *var,
+			  struct fb_info *info, const char *mode_option,
+			  const struct fb_videomode *db, unsigned int dbsize,
+			  const struct fb_videomode *default_mode,
+			  unsigned int default_bpp)
+
+with db/dbsize your non-standard video mode database, or NULL to use the
+standard video mode database.
+
+fb_find_mode() first tries the specified video mode (or any mode that matches,
+e.g. there can be multiple 640x480 modes, each of them is tried). If that
+fails, the default mode is tried. If that fails, it walks over all modes.
+
+To specify a video mode at bootup, use the following boot options::
+
+    video=<driver>:<xres>x<yres>[-<bpp>][@refresh]
+
+where <driver> is a name from the table below.  Valid default modes can be
+found in linux/drivers/video/modedb.c.  Check your driver's documentation.
+There may be more modes::
+
+    Drivers that support modedb boot options
+    Boot Name	  Cards Supported
+
+    amifb	- Amiga chipset frame buffer
+    aty128fb	- ATI Rage128 / Pro frame buffer
+    atyfb	- ATI Mach64 frame buffer
+    pm2fb	- Permedia 2/2V frame buffer
+    pm3fb	- Permedia 3 frame buffer
+    sstfb	- Voodoo 1/2 (SST1) chipset frame buffer
+    tdfxfb	- 3D Fx frame buffer
+    tridentfb	- Trident (Cyber)blade chipset frame buffer
+    vt8623fb	- VIA 8623 frame buffer
+
+BTW, only a few fb drivers use this at the moment. Others are to follow
+(feel free to send patches). The DRM drivers also support this.
diff --git a/Documentation/fb/modedb.txt b/Documentation/fb/modedb.txt
deleted file mode 100644
index 16aa08453911..000000000000
--- a/Documentation/fb/modedb.txt
+++ /dev/null
@@ -1,151 +0,0 @@
-
-
-			modedb default video mode support
-
-
-Currently all frame buffer device drivers have their own video mode databases,
-which is a mess and a waste of resources. The main idea of modedb is to have
-
-  - one routine to probe for video modes, which can be used by all frame buffer
-    devices
-  - one generic video mode database with a fair amount of standard videomodes
-    (taken from XFree86)
-  - the possibility to supply your own mode database for graphics hardware that
-    needs non-standard modes, like amifb and Mac frame buffer drivers (which
-    use macmodes.c)
-
-When a frame buffer device receives a video= option it doesn't know, it should
-consider that to be a video mode option. If no frame buffer device is specified
-in a video= option, fbmem considers that to be a global video mode option.
-
-Valid mode specifiers (mode_option argument):
-
-    <xres>x<yres>[M][R][-<bpp>][@<refresh>][i][m][eDd]
-    <name>[-<bpp>][@<refresh>]
-
-with <xres>, <yres>, <bpp> and <refresh> decimal numbers and <name> a string.
-Things between square brackets are optional.
-
-If 'M' is specified in the mode_option argument (after <yres> and before
-<bpp> and <refresh>, if specified) the timings will be calculated using
-VESA(TM) Coordinated Video Timings instead of looking up the mode from a table.
-If 'R' is specified, do a 'reduced blanking' calculation for digital displays.
-If 'i' is specified, calculate for an interlaced mode.  And if 'm' is
-specified, add margins to the calculation (1.8% of xres rounded down to 8
-pixels and 1.8% of yres).
-
-       Sample usage: 1024x768M@60m - CVT timing with margins
-
-DRM drivers also add options to enable or disable outputs:
-
-'e' will force the display to be enabled, i.e. it will override the detection
-if a display is connected. 'D' will force the display to be enabled and use
-digital output. This is useful for outputs that have both analog and digital
-signals (e.g. HDMI and DVI-I). For other outputs it behaves like 'e'. If 'd'
-is specified the output is disabled.
-
-You can additionally specify which output the options matches to.
-To force the VGA output to be enabled and drive a specific mode say:
-    video=VGA-1:1280x1024@60me
-
-Specifying the option multiple times for different ports is possible, e.g.:
-    video=LVDS-1:d video=HDMI-1:D
-
-***** oOo ***** oOo ***** oOo ***** oOo ***** oOo ***** oOo ***** oOo *****
-
-What is the VESA(TM) Coordinated Video Timings (CVT)?
-
-From the VESA(TM) Website:
-
-     "The purpose of CVT is to provide a method for generating a consistent
-      and coordinated set of standard formats, display refresh rates, and
-      timing specifications for computer display products, both those
-      employing CRTs, and those using other display technologies. The
-      intention of CVT is to give both source and display manufacturers a
-      common set of tools to enable new timings to be developed in a
-      consistent manner that ensures greater compatibility."
-
-This is the third standard approved by VESA(TM) concerning video timings.  The
-first was the Discrete Video Timings (DVT) which is  a collection of
-pre-defined modes approved by VESA(TM).  The second is the Generalized Timing
-Formula (GTF) which is an algorithm to calculate the timings, given the
-pixelclock, the horizontal sync frequency, or the vertical refresh rate.
-
-The GTF is limited by the fact that it is designed mainly for CRT displays.
-It artificially increases the pixelclock because of its high blanking
-requirement. This is inappropriate for digital display interface with its high
-data rate which requires that it conserves the pixelclock as much as possible.
-Also, GTF does not take into account the aspect ratio of the display.
-
-The CVT addresses these limitations.  If used with CRT's, the formula used
-is a derivation of GTF with a few modifications.  If used with digital
-displays, the "reduced blanking" calculation can be used.
-
-From the framebuffer subsystem perspective, new formats need not be added
-to the global mode database whenever a new mode is released by display
-manufacturers. Specifying for CVT will work for most, if not all, relatively
-new CRT displays and probably with most flatpanels, if 'reduced blanking'
-calculation is specified.  (The CVT compatibility of the display can be
-determined from its EDID. The version 1.3 of the EDID has extra 128-byte
-blocks where additional timing information is placed.  As of this time, there
-is no support yet in the layer to parse this additional blocks.)
-
-CVT also introduced a new naming convention (should be seen from dmesg output):
-
-    <pix>M<a>[-R]
-
-    where: pix = total amount of pixels in MB (xres x yres)
-           M   = always present
-           a   = aspect ratio (3 - 4:3; 4 - 5:4; 9 - 15:9, 16:9; A - 16:10)
-          -R   = reduced blanking
-
-	  example:  .48M3-R - 800x600 with reduced blanking
-
-Note: VESA(TM) has restrictions on what is a standard CVT timing:
-
-      - aspect ratio can only be one of the above values
-      - acceptable refresh rates are 50, 60, 70 or 85 Hz only
-      - if reduced blanking, the refresh rate must be at 60Hz
-
-If one of the above are not satisfied, the kernel will print a warning but the
-timings will still be calculated.
-
-***** oOo ***** oOo ***** oOo ***** oOo ***** oOo ***** oOo ***** oOo *****
-
-To find a suitable video mode, you just call
-
-int __init fb_find_mode(struct fb_var_screeninfo *var,
-                        struct fb_info *info, const char *mode_option,
-                        const struct fb_videomode *db, unsigned int dbsize,
-                        const struct fb_videomode *default_mode,
-                        unsigned int default_bpp)
-
-with db/dbsize your non-standard video mode database, or NULL to use the
-standard video mode database.
-
-fb_find_mode() first tries the specified video mode (or any mode that matches,
-e.g. there can be multiple 640x480 modes, each of them is tried). If that
-fails, the default mode is tried. If that fails, it walks over all modes.
-
-To specify a video mode at bootup, use the following boot options:
-    video=<driver>:<xres>x<yres>[-<bpp>][@refresh]
-
-where <driver> is a name from the table below.  Valid default modes can be
-found in linux/drivers/video/modedb.c.  Check your driver's documentation.
-There may be more modes.
-
-    Drivers that support modedb boot options
-    Boot Name	  Cards Supported
-
-    amifb	- Amiga chipset frame buffer
-    aty128fb	- ATI Rage128 / Pro frame buffer
-    atyfb	- ATI Mach64 frame buffer
-    pm2fb	- Permedia 2/2V frame buffer
-    pm3fb	- Permedia 3 frame buffer
-    sstfb	- Voodoo 1/2 (SST1) chipset frame buffer
-    tdfxfb	- 3D Fx frame buffer
-    tridentfb	- Trident (Cyber)blade chipset frame buffer
-    vt8623fb	- VIA 8623 frame buffer
-
-BTW, only a few fb drivers use this at the moment. Others are to follow
-(feel free to send patches). The DRM drivers also support this.
diff --git a/Documentation/fb/pvr2fb.rst b/Documentation/fb/pvr2fb.rst
new file mode 100644
index 000000000000..fcf2c21c8fcf
--- /dev/null
+++ b/Documentation/fb/pvr2fb.rst
@@ -0,0 +1,66 @@
+===============
+What is pvr2fb?
+===============
+
+This is a driver for PowerVR 2 based graphics frame buffers, such as the
+one found in the Dreamcast.
+
+Advantages:
+
+ * It provides a nice large console (128 cols + 48 lines with 1024x768)
+   without using tiny, unreadable fonts (NOT on the Dreamcast)
+ * You can run XF86_FBDev on top of /dev/fb0
+ * Most important: boot logo :-)
+
+Disadvantages:
+
+ * Driver is largely untested on non-Dreamcast systems.
+
+Configuration
+=============
+
+You can pass kernel command line options to pvr2fb with
+`video=pvr2fb:option1,option2:value2,option3` (multiple options should be
+separated by comma, values are separated from options by `:`).
+
+Accepted options:
+
+==========  ==================================================================
+font:X      default font to use. All fonts are supported, including the
+	    SUN12x22 font which is very nice at high resolutions.
+
+
+mode:X      default video mode with format [xres]x[yres]-<bpp>@<refresh rate>
+	    The following video modes are supported:
+	    640x640-16@60, 640x480-24@60, 640x480-32@60. The Dreamcast
+	    defaults to 640x480-16@60. At the time of writing the
+	    24bpp and 32bpp modes function poorly. Work to fix that is
+	    ongoing
+
+	    Note: the 640x240 mode is currently broken, and should not be
+	    used for any reason. It is only mentioned here as a reference.
+
+inverse     invert colors on screen (for LCD displays)
+
+nomtrr      disables write combining on frame buffer. This slows down driver
+	    but there is reported minor incompatibility between GUS DMA and
+	    XFree under high loads if write combining is enabled (sound
+	    dropouts). MTRR is enabled by default on systems that have it
+	    configured and that support it.
+
+cable:X     cable type. This can be any of the following: vga, rgb, and
+	    composite. If none is specified, we guess.
+
+output:X    output type. This can be any of the following: pal, ntsc, and
+	    vga. If none is specified, we guess.
+==========  ==================================================================
+
+X11
+===
+
+XF86_FBDev has been shown to work on the Dreamcast in the past - though not yet
+on any 2.6 series kernel.
+
+Paul Mundt <lethal@linuxdc.org>
+
+Updated by Adrian McMenamin <adrian@mcmen.demon.co.uk>
diff --git a/Documentation/fb/pvr2fb.txt b/Documentation/fb/pvr2fb.txt
deleted file mode 100644
index 36bdeff585e2..000000000000
--- a/Documentation/fb/pvr2fb.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-$Id: pvr2fb.txt,v 1.1 2001/05/24 05:09:16 mrbrown Exp $
-
-What is pvr2fb?
-===============
-
-This is a driver for PowerVR 2 based graphics frame buffers, such as the
-one found in the Dreamcast.
-
-Advantages:
-
- * It provides a nice large console (128 cols + 48 lines with 1024x768)
-   without using tiny, unreadable fonts (NOT on the Dreamcast)
- * You can run XF86_FBDev on top of /dev/fb0
- * Most important: boot logo :-)
-
-Disadvantages:
-
- * Driver is largely untested on non-Dreamcast systems.
-
-Configuration
-=============
-
-You can pass kernel command line options to pvr2fb with
-`video=pvr2fb:option1,option2:value2,option3' (multiple options should be
-separated by comma, values are separated from options by `:').
-Accepted options:
-
-font:X    - default font to use. All fonts are supported, including the
-            SUN12x22 font which is very nice at high resolutions.
-
-	    
-mode:X    - default video mode with format [xres]x[yres]-<bpp>@<refresh rate>
-            The following video modes are supported:
-            640x640-16@60, 640x480-24@60, 640x480-32@60. The Dreamcast
-            defaults to 640x480-16@60. At the time of writing the
-            24bpp and 32bpp modes function poorly. Work to fix that is
-            ongoing
-
-            Note: the 640x240 mode is currently broken, and should not be
-            used for any reason. It is only mentioned here as a reference.
-
-inverse   - invert colors on screen (for LCD displays)
-
-nomtrr    - disables write combining on frame buffer. This slows down driver
-            but there is reported minor incompatibility between GUS DMA and
-            XFree under high loads if write combining is enabled (sound
-            dropouts). MTRR is enabled by default on systems that have it
-            configured and that support it.
-
-cable:X   - cable type. This can be any of the following: vga, rgb, and
-            composite. If none is specified, we guess.
-
-output:X  - output type. This can be any of the following: pal, ntsc, and
-            vga. If none is specified, we guess.
-
-X11
-===
-
-XF86_FBDev has been shown to work on the Dreamcast in the past - though not yet
-on any 2.6 series kernel.
-
---
-Paul Mundt <lethal@linuxdc.org>
-Updated by Adrian McMenamin <adrian@mcmen.demon.co.uk>
-
diff --git a/Documentation/fb/pxafb.rst b/Documentation/fb/pxafb.rst
new file mode 100644
index 000000000000..90177f5e7e76
--- /dev/null
+++ b/Documentation/fb/pxafb.rst
@@ -0,0 +1,173 @@
+================================
+Driver for PXA25x LCD controller
+================================
+
+The driver supports the following options, either via
+options=<OPTIONS> when modular or video=pxafb:<OPTIONS> when built in.
+
+For example::
+
+	modprobe pxafb options=vmem:2M,mode:640x480-8,passive
+
+or on the kernel command line::
+
+	video=pxafb:vmem:2M,mode:640x480-8,passive
+
+vmem: VIDEO_MEM_SIZE
+
+	Amount of video memory to allocate (can be suffixed with K or M
+	for kilobytes or megabytes)
+
+mode:XRESxYRES[-BPP]
+
+	XRES == LCCR1_PPL + 1
+
+	YRES == LLCR2_LPP + 1
+
+		The resolution of the display in pixels
+
+	BPP == The bit depth. Valid values are 1, 2, 4, 8 and 16.
+
+pixclock:PIXCLOCK
+
+	Pixel clock in picoseconds
+
+left:LEFT == LCCR1_BLW + 1
+
+right:RIGHT == LCCR1_ELW + 1
+
+hsynclen:HSYNC == LCCR1_HSW + 1
+
+upper:UPPER == LCCR2_BFW
+
+lower:LOWER == LCCR2_EFR
+
+vsynclen:VSYNC == LCCR2_VSW + 1
+
+	Display margins and sync times
+
+color | mono => LCCR0_CMS
+
+	umm...
+
+active | passive => LCCR0_PAS
+
+	Active (TFT) or Passive (STN) display
+
+single | dual => LCCR0_SDS
+
+	Single or dual panel passive display
+
+4pix | 8pix => LCCR0_DPD
+
+	4 or 8 pixel monochrome single panel data
+
+hsync:HSYNC, vsync:VSYNC
+
+	Horizontal and vertical sync. 0 => active low, 1 => active
+	high.
+
+dpc:DPC
+
+	Double pixel clock. 1=>true, 0=>false
+
+outputen:POLARITY
+
+	Output Enable Polarity. 0 => active low, 1 => active high
+
+pixclockpol:POLARITY
+
+	pixel clock polarity
+	0 => falling edge, 1 => rising edge
+
+
+Overlay Support for PXA27x and later LCD controllers
+====================================================
+
+  PXA27x and later processors support overlay1 and overlay2 on-top of the
+  base framebuffer (although under-neath the base is also possible). They
+  support palette and no-palette RGB formats, as well as YUV formats (only
+  available on overlay2). These overlays have dedicated DMA channels and
+  behave in a similar way as a framebuffer.
+
+  However, there are some differences between these overlay framebuffers
+  and normal framebuffers, as listed below:
+
+  1. overlay can start at a 32-bit word aligned position within the base
+     framebuffer, which means they have a start (x, y). This information
+     is encoded into var->nonstd (no, var->xoffset and var->yoffset are
+     not for such purpose).
+
+  2. overlay framebuffer is allocated dynamically according to specified
+     'struct fb_var_screeninfo', the amount is decided by::
+
+	var->xres_virtual * var->yres_virtual * bpp
+
+     bpp = 16 -- for RGB565 or RGBT555
+
+     bpp = 24 -- for YUV444 packed
+
+     bpp = 24 -- for YUV444 planar
+
+     bpp = 16 -- for YUV422 planar (1 pixel = 1 Y + 1/2 Cb + 1/2 Cr)
+
+     bpp = 12 -- for YUV420 planar (1 pixel = 1 Y + 1/4 Cb + 1/4 Cr)
+
+     NOTE:
+
+     a. overlay does not support panning in x-direction, thus
+	var->xres_virtual will always be equal to var->xres
+
+     b. line length of overlay(s) must be on a 32-bit word boundary,
+	for YUV planar modes, it is a requirement for the component
+	with minimum bits per pixel,  e.g. for YUV420, Cr component
+	for one pixel is actually 2-bits, it means the line length
+	should be a multiple of 16-pixels
+
+     c. starting horizontal position (XPOS) should start on a 32-bit
+	word boundary, otherwise the fb_check_var() will just fail.
+
+     d. the rectangle of the overlay should be within the base plane,
+	otherwise fail
+
+     Applications should follow the sequence below to operate an overlay
+     framebuffer:
+
+	 a. open("/dev/fb[1-2]", ...)
+	 b. ioctl(fd, FBIOGET_VSCREENINFO, ...)
+	 c. modify 'var' with desired parameters:
+
+	    1) var->xres and var->yres
+	    2) larger var->yres_virtual if more memory is required,
+	       usually for double-buffering
+	    3) var->nonstd for starting (x, y) and color format
+	    4) var->{red, green, blue, transp} if RGB mode is to be used
+
+	 d. ioctl(fd, FBIOPUT_VSCREENINFO, ...)
+	 e. ioctl(fd, FBIOGET_FSCREENINFO, ...)
+	 f. mmap
+	 g. ...
+
+  3. for YUV planar formats, these are actually not supported within the
+     framebuffer framework, application has to take care of the offsets
+     and lengths of each component within the framebuffer.
+
+  4. var->nonstd is used to pass starting (x, y) position and color format,
+     the detailed bit fields are shown below::
+
+      31                23  20         10          0
+       +-----------------+---+----------+----------+
+       |  ... unused ... |FOR|   XPOS   |   YPOS   |
+       +-----------------+---+----------+----------+
+
+     FOR  - color format, as defined by OVERLAY_FORMAT_* in pxafb.h
+
+	  - 0 - RGB
+	  - 1 - YUV444 PACKED
+	  - 2 - YUV444 PLANAR
+	  - 3 - YUV422 PLANAR
+	  - 4 - YUR420 PLANAR
+
+     XPOS - starting horizontal position
+
+     YPOS - starting vertical position
diff --git a/Documentation/fb/pxafb.txt b/Documentation/fb/pxafb.txt
deleted file mode 100644
index d143a0a749f9..000000000000
--- a/Documentation/fb/pxafb.txt
+++ /dev/null
@@ -1,142 +0,0 @@
-Driver for PXA25x LCD controller
-================================
-
-The driver supports the following options, either via
-options=<OPTIONS> when modular or video=pxafb:<OPTIONS> when built in.
-
-For example:
-	modprobe pxafb options=vmem:2M,mode:640x480-8,passive
-or on the kernel command line
-	video=pxafb:vmem:2M,mode:640x480-8,passive
-
-vmem: VIDEO_MEM_SIZE
-	Amount of video memory to allocate (can be suffixed with K or M
-	for kilobytes or megabytes)
-
-mode:XRESxYRES[-BPP]
-	XRES == LCCR1_PPL + 1
-	YRES == LLCR2_LPP + 1
-		The resolution of the display in pixels
-	BPP == The bit depth. Valid values are 1, 2, 4, 8 and 16.
-
-pixclock:PIXCLOCK
-	Pixel clock in picoseconds
-
-left:LEFT == LCCR1_BLW + 1
-right:RIGHT == LCCR1_ELW + 1
-hsynclen:HSYNC == LCCR1_HSW + 1
-upper:UPPER == LCCR2_BFW
-lower:LOWER == LCCR2_EFR
-vsynclen:VSYNC == LCCR2_VSW + 1
-	Display margins and sync times
-
-color | mono => LCCR0_CMS
-	umm...
-
-active | passive => LCCR0_PAS
-	Active (TFT) or Passive (STN) display
-
-single | dual => LCCR0_SDS
-	Single or dual panel passive display
-
-4pix | 8pix => LCCR0_DPD
-	4 or 8 pixel monochrome single panel data
-
-hsync:HSYNC
-vsync:VSYNC
-	Horizontal and vertical sync. 0 => active low, 1 => active
-	high.
-
-dpc:DPC
-	Double pixel clock. 1=>true, 0=>false
-
-outputen:POLARITY
-	Output Enable Polarity. 0 => active low, 1 => active high
-
-pixclockpol:POLARITY
-	pixel clock polarity
-	0 => falling edge, 1 => rising edge
-
-
-Overlay Support for PXA27x and later LCD controllers
-====================================================
-
-  PXA27x and later processors support overlay1 and overlay2 on-top of the
-  base framebuffer (although under-neath the base is also possible). They
-  support palette and no-palette RGB formats, as well as YUV formats (only
-  available on overlay2). These overlays have dedicated DMA channels and
-  behave in a similar way as a framebuffer.
-
-  However, there are some differences between these overlay framebuffers
-  and normal framebuffers, as listed below:
-
-  1. overlay can start at a 32-bit word aligned position within the base
-     framebuffer, which means they have a start (x, y). This information
-     is encoded into var->nonstd (no, var->xoffset and var->yoffset are
-     not for such purpose).
-
-  2. overlay framebuffer is allocated dynamically according to specified
-     'struct fb_var_screeninfo', the amount is decided by:
-
-        var->xres_virtual * var->yres_virtual * bpp
-
-     bpp = 16 -- for RGB565 or RGBT555
-         = 24 -- for YUV444 packed
-         = 24 -- for YUV444 planar
-	 = 16 -- for YUV422 planar (1 pixel = 1 Y + 1/2 Cb + 1/2 Cr)
-	 = 12 -- for YUV420 planar (1 pixel = 1 Y + 1/4 Cb + 1/4 Cr)
-
-     NOTE:
-
-     a. overlay does not support panning in x-direction, thus
-        var->xres_virtual will always be equal to var->xres
-
-     b. line length of overlay(s) must be on a 32-bit word boundary,
-        for YUV planar modes, it is a requirement for the component
-	with minimum bits per pixel,  e.g. for YUV420, Cr component
-	for one pixel is actually 2-bits, it means the line length
-	should be a multiple of 16-pixels
-
-     c. starting horizontal position (XPOS) should start on a 32-bit
-        word boundary, otherwise the fb_check_var() will just fail.
-
-     d. the rectangle of the overlay should be within the base plane,
-        otherwise fail
-
-     Applications should follow the sequence below to operate an overlay
-     framebuffer:
-
-         a. open("/dev/fb[1-2]", ...)
-	 b. ioctl(fd, FBIOGET_VSCREENINFO, ...)
-	 c. modify 'var' with desired parameters:
-	    1) var->xres and var->yres
-	    2) larger var->yres_virtual if more memory is required,
-	       usually for double-buffering
-	    3) var->nonstd for starting (x, y) and color format
-	    4) var->{red, green, blue, transp} if RGB mode is to be used
-	 d. ioctl(fd, FBIOPUT_VSCREENINFO, ...)
-	 e. ioctl(fd, FBIOGET_FSCREENINFO, ...)
-	 f. mmap
-	 g. ...
-
-  3. for YUV planar formats, these are actually not supported within the
-     framebuffer framework, application has to take care of the offsets
-     and lengths of each component within the framebuffer.
-
-  4. var->nonstd is used to pass starting (x, y) position and color format,
-     the detailed bit fields are shown below:
-
-    31                23  20         10          0
-     +-----------------+---+----------+----------+
-     |  ... unused ... |FOR|   XPOS   |   YPOS   |
-     +-----------------+---+----------+----------+
-
-     FOR  - color format, as defined by OVERLAY_FORMAT_* in pxafb.h
-            0 - RGB
-	    1 - YUV444 PACKED
-	    2 - YUV444 PLANAR
-	    3 - YUV422 PLANAR
-	    4 - YUR420 PLANAR
-
-     XPOS - starting horizontal position
-     YPOS - starting vertical position
diff --git a/Documentation/fb/s3fb.rst b/Documentation/fb/s3fb.rst
new file mode 100644
index 000000000000..e809d69c21a7
--- /dev/null
+++ b/Documentation/fb/s3fb.rst
@@ -0,0 +1,82 @@
+===========================================
+s3fb - fbdev driver for S3 Trio/Virge chips
+===========================================
+
+
+Supported Hardware
+==================
+
+	S3 Trio32
+	S3 Trio64 (and variants V+, UV+, V2/DX, V2/GX)
+	S3 Virge  (and variants VX, DX, GX and GX2+)
+	S3 Plato/PX		(completely untested)
+	S3 Aurora64V+		(completely untested)
+
+	- only PCI bus supported
+	- only BIOS initialized VGA devices supported
+	- probably not working on big endian
+
+I tested s3fb on Trio64 (plain, V+ and V2/DX) and Virge (plain, VX, DX),
+all on i386.
+
+
+Supported Features
+==================
+
+	*  4 bpp pseudocolor modes (with 18bit palette, two variants)
+	*  8 bpp pseudocolor mode (with 18bit palette)
+	* 16 bpp truecolor modes (RGB 555 and RGB 565)
+	* 24 bpp truecolor mode (RGB 888) on (only on Virge VX)
+	* 32 bpp truecolor mode (RGB 888) on (not on Virge VX)
+	* text mode (activated by bpp = 0)
+	* interlaced mode variant (not available in text mode)
+	* doublescan mode variant (not available in text mode)
+	* panning in both directions
+	* suspend/resume support
+	* DPMS support
+
+Text mode is supported even in higher resolutions, but there is limitation to
+lower pixclocks (maximum usually between 50-60 MHz, depending on specific
+hardware, i get best results from plain S3 Trio32 card - about 75 MHz). This
+limitation is not enforced by driver. Text mode supports 8bit wide fonts only
+(hardware limitation) and 16bit tall fonts (driver limitation). Text mode
+support is broken on S3 Trio64 V2/DX.
+
+There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with
+packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode
+with interleaved planes (1 byte interleave), MSB first. Both modes support
+8bit wide fonts only (driver limitation).
+
+Suspend/resume works on systems that initialize video card during resume and
+if device is active (for example used by fbcon).
+
+
+Missing Features
+================
+(alias TODO list)
+
+	* secondary (not initialized by BIOS) device support
+	* big endian support
+	* Zorro bus support
+	* MMIO support
+	* 24 bpp mode support on more cards
+	* support for fontwidths != 8 in 4 bpp modes
+	* support for fontheight != 16 in text mode
+	* composite and external sync (is anyone able to test this?)
+	* hardware cursor
+	* video overlay support
+	* vsync synchronization
+	* feature connector support
+	* acceleration support (8514-like 2D, Virge 3D, busmaster transfers)
+	* better values for some magic registers (performance issues)
+
+
+Known bugs
+==========
+
+	* cursor disable in text mode doesn't work
+	* text mode broken on S3 Trio64 V2/DX
+
+
+--
+Ondrej Zajicek <santiago@crfreenet.org>
diff --git a/Documentation/fb/s3fb.txt b/Documentation/fb/s3fb.txt
deleted file mode 100644
index 2c97770bdbaa..000000000000
--- a/Documentation/fb/s3fb.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-
-	s3fb - fbdev driver for S3 Trio/Virge chips
-	===========================================
-
-
-Supported Hardware
-==================
-
-	S3 Trio32
-	S3 Trio64 (and variants V+, UV+, V2/DX, V2/GX)
-	S3 Virge  (and variants VX, DX, GX and GX2+)
-	S3 Plato/PX		(completely untested)
-	S3 Aurora64V+		(completely untested)
-
-	- only PCI bus supported
-	- only BIOS initialized VGA devices supported
-	- probably not working on big endian
-
-I tested s3fb on Trio64 (plain, V+ and V2/DX) and Virge (plain, VX, DX),
-all on i386.
-
-
-Supported Features
-==================
-
-	*  4 bpp pseudocolor modes (with 18bit palette, two variants)
-	*  8 bpp pseudocolor mode (with 18bit palette)
-	* 16 bpp truecolor modes (RGB 555 and RGB 565)
-	* 24 bpp truecolor mode (RGB 888) on (only on Virge VX)
-	* 32 bpp truecolor mode (RGB 888) on (not on Virge VX)
-	* text mode (activated by bpp = 0)
-	* interlaced mode variant (not available in text mode)
-	* doublescan mode variant (not available in text mode)
-	* panning in both directions
-	* suspend/resume support
-	* DPMS support
-
-Text mode is supported even in higher resolutions, but there is limitation to
-lower pixclocks (maximum usually between 50-60 MHz, depending on specific
-hardware, i get best results from plain S3 Trio32 card - about 75 MHz). This
-limitation is not enforced by driver. Text mode supports 8bit wide fonts only
-(hardware limitation) and 16bit tall fonts (driver limitation). Text mode
-support is broken on S3 Trio64 V2/DX.
-
-There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with
-packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode
-with interleaved planes (1 byte interleave), MSB first. Both modes support
-8bit wide fonts only (driver limitation).
-
-Suspend/resume works on systems that initialize video card during resume and
-if device is active (for example used by fbcon).
-
-
-Missing Features
-================
-(alias TODO list)
-
-	* secondary (not initialized by BIOS) device support
-   	* big endian support
-	* Zorro bus support
-	* MMIO support
-	* 24 bpp mode support on more cards
-	* support for fontwidths != 8 in 4 bpp modes
-	* support for fontheight != 16 in text mode
-	* composite and external sync (is anyone able to test this?)
-	* hardware cursor
-	* video overlay support
-	* vsync synchronization
-	* feature connector support
-	* acceleration support (8514-like 2D, Virge 3D, busmaster transfers)
-	* better values for some magic registers (performance issues)
-
-
-Known bugs
-==========
-
-	* cursor disable in text mode doesn't work
-	* text mode broken on S3 Trio64 V2/DX
-
-
---
-Ondrej Zajicek <santiago@crfreenet.org>
diff --git a/Documentation/fb/sa1100fb.rst b/Documentation/fb/sa1100fb.rst
new file mode 100644
index 000000000000..67e2650e017d
--- /dev/null
+++ b/Documentation/fb/sa1100fb.rst
@@ -0,0 +1,40 @@
+=================
+What is sa1100fb?
+=================
+
+.. [This file is cloned from VesaFB/matroxfb]
+
+
+This is a driver for a graphic framebuffer for the SA-1100 LCD
+controller.
+
+Configuration
+==============
+
+For most common passive displays, giving the option::
+
+  video=sa1100fb:bpp:<value>,lccr0:<value>,lccr1:<value>,lccr2:<value>,lccr3:<value>
+
+on the kernel command line should be enough to configure the
+controller. The bits per pixel (bpp) value should be 4, 8, 12, or
+16. LCCR values are display-specific and should be computed as
+documented in the SA-1100 Developer's Manual, Section 11.7. Dual-panel
+displays are supported as long as the SDS bit is set in LCCR0; GPIO<9:2>
+are used for the lower panel.
+
+For active displays or displays requiring additional configuration
+(controlling backlights, powering on the LCD, etc.), the command line
+options may not be enough to configure the display. Adding sections to
+sa1100fb_init_fbinfo(), sa1100fb_activate_var(),
+sa1100fb_disable_lcd_controller(), and sa1100fb_enable_lcd_controller()
+will probably be necessary.
+
+Accepted options::
+
+	bpp:<value>	Configure for <value> bits per pixel
+	lccr0:<value>	Configure LCD control register 0 (11.7.3)
+	lccr1:<value>	Configure LCD control register 1 (11.7.4)
+	lccr2:<value>	Configure LCD control register 2 (11.7.5)
+	lccr3:<value>	Configure LCD control register 3 (11.7.6)
+
+Mark Huang <mhuang@livetoy.com>
diff --git a/Documentation/fb/sa1100fb.txt b/Documentation/fb/sa1100fb.txt
deleted file mode 100644
index f1b4220464df..000000000000
--- a/Documentation/fb/sa1100fb.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-[This file is cloned from VesaFB/matroxfb]
-
-What is sa1100fb?
-=================
-
-This is a driver for a graphic framebuffer for the SA-1100 LCD
-controller.
-
-Configuration
-==============
-
-For most common passive displays, giving the option
-
-video=sa1100fb:bpp:<value>,lccr0:<value>,lccr1:<value>,lccr2:<value>,lccr3:<value>
-
-on the kernel command line should be enough to configure the
-controller. The bits per pixel (bpp) value should be 4, 8, 12, or
-16. LCCR values are display-specific and should be computed as
-documented in the SA-1100 Developer's Manual, Section 11.7. Dual-panel
-displays are supported as long as the SDS bit is set in LCCR0; GPIO<9:2>
-are used for the lower panel.
-
-For active displays or displays requiring additional configuration
-(controlling backlights, powering on the LCD, etc.), the command line
-options may not be enough to configure the display. Adding sections to
-sa1100fb_init_fbinfo(), sa1100fb_activate_var(),
-sa1100fb_disable_lcd_controller(), and sa1100fb_enable_lcd_controller()
-will probably be necessary.
-
-Accepted options:
-
-bpp:<value>	Configure for <value> bits per pixel
-lccr0:<value>	Configure LCD control register 0 (11.7.3)
-lccr1:<value>	Configure LCD control register 1 (11.7.4)
-lccr2:<value>	Configure LCD control register 2 (11.7.5)
-lccr3:<value>	Configure LCD control register 3 (11.7.6)
-
---
-Mark Huang <mhuang@livetoy.com>
diff --git a/Documentation/fb/sh7760fb.rst b/Documentation/fb/sh7760fb.rst
new file mode 100644
index 000000000000..c3266485f810
--- /dev/null
+++ b/Documentation/fb/sh7760fb.rst
@@ -0,0 +1,130 @@
+================================================
+SH7760/SH7763 integrated LCDC Framebuffer driver
+================================================
+
+0. Overview
+-----------
+The SH7760/SH7763 have an integrated LCD Display controller (LCDC) which
+supports (in theory) resolutions ranging from 1x1 to 1024x1024,
+with color depths ranging from 1 to 16 bits, on STN, DSTN and TFT Panels.
+
+Caveats:
+
+* Framebuffer memory must be a large chunk allocated at the top
+  of Area3 (HW requirement). Because of this requirement you should NOT
+  make the driver a module since at runtime it may become impossible to
+  get a large enough contiguous chunk of memory.
+
+* The driver does not support changing resolution while loaded
+  (displays aren't hotpluggable anyway)
+
+* Heavy flickering may be observed
+  a) if you're using 15/16bit color modes at >= 640x480 px resolutions,
+  b) during PCMCIA (or any other slow bus) activity.
+
+* Rotation works only 90degress clockwise, and only if horizontal
+  resolution is <= 320 pixels.
+
+Files:
+	- drivers/video/sh7760fb.c
+	- include/asm-sh/sh7760fb.h
+	- Documentation/fb/sh7760fb.rst
+
+1. Platform setup
+-----------------
+SH7760:
+ Video data is fetched via the DMABRG DMA engine, so you have to
+ configure the SH DMAC for DMABRG mode (write 0x94808080 to the
+ DMARSRA register somewhere at boot).
+
+ PFC registers PCCR and PCDR must be set to peripheral mode.
+ (write zeros to both).
+
+The driver does NOT do the above for you since board setup is, well, job
+of the board setup code.
+
+2. Panel definitions
+--------------------
+The LCDC must explicitly be told about the type of LCD panel
+attached.  Data must be wrapped in a "struct sh7760fb_platdata" and
+passed to the driver as platform_data.
+
+Suggest you take a closer look at the SH7760 Manual, Section 30.
+(http://documentation.renesas.com/eng/products/mpumcu/e602291_sh7760.pdf)
+
+The following code illustrates what needs to be done to
+get the framebuffer working on a 640x480 TFT::
+
+  #include <linux/fb.h>
+  #include <asm/sh7760fb.h>
+
+  /*
+   * NEC NL6440bc26-01 640x480 TFT
+   * dotclock 25175 kHz
+   * Xres                640     Yres            480
+   * Htotal      800     Vtotal          525
+   * HsynStart   656     VsynStart       490
+   * HsynLenn    30      VsynLenn        2
+   *
+   * The linux framebuffer layer does not use the syncstart/synclen
+   * values but right/left/upper/lower margin values. The comments
+   * for the x_margin explain how to calculate those from given
+   * panel sync timings.
+   */
+  static struct fb_videomode nl6448bc26 = {
+         .name           = "NL6448BC26",
+         .refresh        = 60,
+         .xres           = 640,
+         .yres           = 480,
+         .pixclock       = 39683,        /* in picoseconds! */
+         .hsync_len      = 30,
+         .vsync_len      = 2,
+         .left_margin    = 114,  /* HTOT - (HSYNSLEN + HSYNSTART) */
+         .right_margin   = 16,   /* HSYNSTART - XRES */
+         .upper_margin   = 33,   /* VTOT - (VSYNLEN + VSYNSTART) */
+         .lower_margin   = 10,   /* VSYNSTART - YRES */
+         .sync           = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+         .vmode          = FB_VMODE_NONINTERLACED,
+         .flag           = 0,
+  };
+
+  static struct sh7760fb_platdata sh7760fb_nl6448 = {
+         .def_mode       = &nl6448bc26,
+         .ldmtr          = LDMTR_TFT_COLOR_16,   /* 16bit TFT panel */
+         .lddfr          = LDDFR_8BPP,           /* we want 8bit output */
+         .ldpmmr         = 0x0070,
+         .ldpspr         = 0x0500,
+         .ldaclnr        = 0,
+         .ldickr         = LDICKR_CLKSRC(LCDC_CLKSRC_EXTERNAL) |
+			 LDICKR_CLKDIV(1),
+         .rotate         = 0,
+         .novsync        = 1,
+         .blank          = NULL,
+  };
+
+  /* SH7760:
+   * 0xFE300800: 256 * 4byte xRGB palette ram
+   * 0xFE300C00: 42 bytes ctrl registers
+   */
+  static struct resource sh7760_lcdc_res[] = {
+         [0] = {
+	       .start  = 0xFE300800,
+	       .end    = 0xFE300CFF,
+	       .flags  = IORESOURCE_MEM,
+         },
+         [1] = {
+	       .start  = 65,
+	       .end    = 65,
+	       .flags  = IORESOURCE_IRQ,
+         },
+  };
+
+  static struct platform_device sh7760_lcdc_dev = {
+         .dev    = {
+	       .platform_data = &sh7760fb_nl6448,
+         },
+         .name           = "sh7760-lcdc",
+         .id             = -1,
+         .resource       = sh7760_lcdc_res,
+         .num_resources  = ARRAY_SIZE(sh7760_lcdc_res),
+  };
diff --git a/Documentation/fb/sh7760fb.txt b/Documentation/fb/sh7760fb.txt
deleted file mode 100644
index b994c3b10549..000000000000
--- a/Documentation/fb/sh7760fb.txt
+++ /dev/null
@@ -1,131 +0,0 @@
-SH7760/SH7763 integrated LCDC Framebuffer driver
-================================================
-
-0. Overview
------------
-The SH7760/SH7763 have an integrated LCD Display controller (LCDC) which
-supports (in theory) resolutions ranging from 1x1 to 1024x1024,
-with color depths ranging from 1 to 16 bits, on STN, DSTN and TFT Panels.
-
-Caveats:
-* Framebuffer memory must be a large chunk allocated at the top
-  of Area3 (HW requirement). Because of this requirement you should NOT
-  make the driver a module since at runtime it may become impossible to
-  get a large enough contiguous chunk of memory.
-
-* The driver does not support changing resolution while loaded
-  (displays aren't hotpluggable anyway)
-
-* Heavy flickering may be observed
-  a) if you're using 15/16bit color modes at >= 640x480 px resolutions,
-  b) during PCMCIA (or any other slow bus) activity.
-
-* Rotation works only 90degress clockwise, and only if horizontal
-  resolution is <= 320 pixels.
-
-files:   drivers/video/sh7760fb.c
-        include/asm-sh/sh7760fb.h
-        Documentation/fb/sh7760fb.txt
-
-1. Platform setup
------------------
-SH7760:
- Video data is fetched via the DMABRG DMA engine, so you have to
- configure the SH DMAC for DMABRG mode (write 0x94808080 to the
- DMARSRA register somewhere at boot).
-
- PFC registers PCCR and PCDR must be set to peripheral mode.
- (write zeros to both).
-
-The driver does NOT do the above for you since board setup is, well, job
-of the board setup code.
-
-2. Panel definitions
---------------------
-The LCDC must explicitly be told about the type of LCD panel
-attached.  Data must be wrapped in a "struct sh7760fb_platdata" and
-passed to the driver as platform_data.
-
-Suggest you take a closer look at the SH7760 Manual, Section 30.
-(http://documentation.renesas.com/eng/products/mpumcu/e602291_sh7760.pdf)
-
-The following code illustrates what needs to be done to
-get the framebuffer working on a 640x480 TFT:
-
-====================== cut here ======================================
-
-#include <linux/fb.h>
-#include <asm/sh7760fb.h>
-
-/*
- * NEC NL6440bc26-01 640x480 TFT
- * dotclock 25175 kHz
- * Xres                640     Yres            480
- * Htotal      800     Vtotal          525
- * HsynStart   656     VsynStart       490
- * HsynLenn    30      VsynLenn        2
- *
- * The linux framebuffer layer does not use the syncstart/synclen
- * values but right/left/upper/lower margin values. The comments
- * for the x_margin explain how to calculate those from given
- * panel sync timings.
- */
-static struct fb_videomode nl6448bc26 = {
-       .name           = "NL6448BC26",
-       .refresh        = 60,
-       .xres           = 640,
-       .yres           = 480,
-       .pixclock       = 39683,        /* in picoseconds! */
-       .hsync_len      = 30,
-       .vsync_len      = 2,
-       .left_margin    = 114,  /* HTOT - (HSYNSLEN + HSYNSTART) */
-       .right_margin   = 16,   /* HSYNSTART - XRES */
-       .upper_margin   = 33,   /* VTOT - (VSYNLEN + VSYNSTART) */
-       .lower_margin   = 10,   /* VSYNSTART - YRES */
-       .sync           = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
-       .vmode          = FB_VMODE_NONINTERLACED,
-       .flag           = 0,
-};
-
-static struct sh7760fb_platdata sh7760fb_nl6448 = {
-       .def_mode       = &nl6448bc26,
-       .ldmtr          = LDMTR_TFT_COLOR_16,   /* 16bit TFT panel */
-       .lddfr          = LDDFR_8BPP,           /* we want 8bit output */
-       .ldpmmr         = 0x0070,
-       .ldpspr         = 0x0500,
-       .ldaclnr        = 0,
-       .ldickr         = LDICKR_CLKSRC(LCDC_CLKSRC_EXTERNAL) |
-                         LDICKR_CLKDIV(1),
-       .rotate         = 0,
-       .novsync        = 1,
-       .blank          = NULL,
-};
-
-/* SH7760:
- * 0xFE300800: 256 * 4byte xRGB palette ram
- * 0xFE300C00: 42 bytes ctrl registers
- */
-static struct resource sh7760_lcdc_res[] = {
-       [0] = {
-               .start  = 0xFE300800,
-               .end    = 0xFE300CFF,
-               .flags  = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start  = 65,
-               .end    = 65,
-               .flags  = IORESOURCE_IRQ,
-       },
-};
-
-static struct platform_device sh7760_lcdc_dev = {
-       .dev    = {
-               .platform_data = &sh7760fb_nl6448,
-       },
-       .name           = "sh7760-lcdc",
-       .id             = -1,
-       .resource       = sh7760_lcdc_res,
-       .num_resources  = ARRAY_SIZE(sh7760_lcdc_res),
-};
-
-====================== cut here ======================================
diff --git a/Documentation/fb/sisfb.rst b/Documentation/fb/sisfb.rst
new file mode 100644
index 000000000000..8f4e502ea12e
--- /dev/null
+++ b/Documentation/fb/sisfb.rst
@@ -0,0 +1,160 @@
+==============
+What is sisfb?
+==============
+
+sisfb is a framebuffer device driver for SiS (Silicon Integrated Systems)
+graphics chips. Supported are:
+
+- SiS 300 series: SiS 300/305, 540, 630(S), 730(S)
+- SiS 315 series: SiS 315/H/PRO, 55x, (M)65x, 740, (M)661(F/M)X, (M)741(GX)
+- SiS 330 series: SiS 330 ("Xabre"), (M)760
+
+
+Why do I need a framebuffer driver?
+===================================
+
+sisfb is eg. useful if you want a high-resolution text console. Besides that,
+sisfb is required to run DirectFB (which comes with an additional, dedicated
+driver for the 315 series).
+
+On the 300 series, sisfb on kernels older than 2.6.3 furthermore plays an
+important role in connection with DRM/DRI: Sisfb manages the memory heap
+used by DRM/DRI for 3D texture and other data. This memory management is
+required for using DRI/DRM.
+
+Kernels >= around 2.6.3 do not need sisfb any longer for DRI/DRM memory
+management. The SiS DRM driver has been updated and features a memory manager
+of its own (which will be used if sisfb is not compiled). So unless you want
+a graphical console, you don't need sisfb on kernels >=2.6.3.
+
+Sidenote: Since this seems to be a commonly made mistake: sisfb and vesafb
+cannot be active at the same time! Do only select one of them in your kernel
+configuration.
+
+
+How are parameters passed to sisfb?
+===================================
+
+Well, it depends: If compiled statically into the kernel, use lilo's append
+statement to add the parameters to the kernel command line. Please see lilo's
+(or GRUB's) documentation for more information. If sisfb is a kernel module,
+parameters are given with the modprobe (or insmod) command.
+
+Example for sisfb as part of the static kernel: Add the following line to your
+lilo.conf::
+
+     append="video=sisfb:mode:1024x768x16,mem:12288,rate:75"
+
+Example for sisfb as a module: Start sisfb by typing::
+
+     modprobe sisfb mode=1024x768x16 rate=75 mem=12288
+
+A common mistake is that folks use a wrong parameter format when using the
+driver compiled into the kernel. Please note: If compiled into the kernel,
+the parameter format is video=sisfb:mode:none or video=sisfb:mode:1024x768x16
+(or whatever mode you want to use, alternatively using any other format
+described above or the vesa keyword instead of mode). If compiled as a module,
+the parameter format reads mode=none or mode=1024x768x16 (or whatever mode you
+want to use). Using a "=" for a ":" (and vice versa) is a huge difference!
+Additionally: If you give more than one argument to the in-kernel sisfb, the
+arguments are separated with ",". For example::
+
+   video=sisfb:mode:1024x768x16,rate:75,mem:12288
+
+
+How do I use it?
+================
+
+Preface statement: This file only covers very little of the driver's
+capabilities and features. Please refer to the author's and maintainer's
+website at http://www.winischhofer.net/linuxsisvga.shtml for more
+information. Additionally, "modinfo sisfb" gives an overview over all
+supported options including some explanation.
+
+The desired display mode can be specified using the keyword "mode" with
+a parameter in one of the following formats:
+
+  - XxYxDepth or
+  - XxY-Depth or
+  - XxY-Depth@Rate or
+  - XxY
+  - or simply use the VESA mode number in hexadecimal or decimal.
+
+For example: 1024x768x16, 1024x768-16@75, 1280x1024-16. If no depth is
+specified, it defaults to 8. If no rate is given, it defaults to 60Hz. Depth 32
+means 24bit color depth (but 32 bit framebuffer depth, which is not relevant
+to the user).
+
+Additionally, sisfb understands the keyword "vesa" followed by a VESA mode
+number in decimal or hexadecimal. For example: vesa=791 or vesa=0x117. Please
+use either "mode" or "vesa" but not both.
+
+Linux 2.4 only: If no mode is given, sisfb defaults to "no mode" (mode=none) if
+compiled as a module; if sisfb is statically compiled into the kernel, it
+defaults to 800x600x8 unless CRT2 type is LCD, in which case the LCD's native
+resolution is used. If you want to switch to a different mode, use the fbset
+shell command.
+
+Linux 2.6 only: If no mode is given, sisfb defaults to 800x600x8 unless CRT2
+type is LCD, in which case it defaults to the LCD's native resolution. If
+you want to switch to another mode, use the stty shell command.
+
+You should compile in both vgacon (to boot if you remove you SiS card from
+your system) and sisfb (for graphics mode). Under Linux 2.6, also "Framebuffer
+console support" (fbcon) is needed for a graphical console.
+
+You should *not* compile-in vesafb. And please do not use the "vga=" keyword
+in lilo's or grub's configuration file; mode selection is done using the
+"mode" or "vesa" keywords as a parameter. See above and below.
+
+
+X11
+===
+
+If using XFree86 or X.org, it is recommended that you don't use the "fbdev"
+driver but the dedicated "sis" X driver. The "sis" X driver and sisfb are
+developed by the same person (Thomas Winischhofer) and cooperate well with
+each other.
+
+
+SVGALib
+=======
+
+SVGALib, if directly accessing the hardware, never restores the screen
+correctly, especially on laptops or if the output devices are LCD or TV.
+Therefore, use the chipset "FBDEV" in SVGALib configuration. This will make
+SVGALib use the framebuffer device for mode switches and restoration.
+
+
+Configuration
+=============
+
+(Some) accepted options:
+
+=========  ==================================================================
+off        Disable sisfb. This option is only understood if sisfb is
+	   in-kernel, not a module.
+mem:X      size of memory for the console, rest will be used for DRI/DRM. X
+	   is in kilobytes. On 300 series, the default is 4096, 8192 or
+	   16384 (each in kilobyte) depending on how much video ram the card
+	   has. On 315/330 series, the default is the maximum available ram
+	   (since DRI/DRM is not supported for these chipsets).
+noaccel    do not use 2D acceleration engine. (Default: use acceleration)
+noypan     disable y-panning and scroll by redrawing the entire screen.
+	   This is much slower than y-panning. (Default: use y-panning)
+vesa:X     selects startup videomode. X is number from 0 to 0x1FF and
+	   represents the VESA mode number (can be given in decimal or
+	   hexadecimal form, the latter prefixed with "0x").
+mode:X     selects startup videomode. Please see above for the format of
+	   "X".
+=========  ==================================================================
+
+Boolean options such as "noaccel" or "noypan" are to be given without a
+parameter if sisfb is in-kernel (for example "video=sisfb:noypan). If
+sisfb is a module, these are to be set to 1 (for example "modprobe sisfb
+noypan=1").
+
+
+Thomas Winischhofer <thomas@winischhofer.net>
+
+May 27, 2004
diff --git a/Documentation/fb/sisfb.txt b/Documentation/fb/sisfb.txt
deleted file mode 100644
index 2e68e503e72f..000000000000
--- a/Documentation/fb/sisfb.txt
+++ /dev/null
@@ -1,158 +0,0 @@
-
-What is sisfb?
-==============
-
-sisfb is a framebuffer device driver for SiS (Silicon Integrated Systems)
-graphics chips. Supported are:
-
-- SiS 300 series: SiS 300/305, 540, 630(S), 730(S)
-- SiS 315 series: SiS 315/H/PRO, 55x, (M)65x, 740, (M)661(F/M)X, (M)741(GX)
-- SiS 330 series: SiS 330 ("Xabre"), (M)760
-
-
-Why do I need a framebuffer driver?
-===================================
-
-sisfb is eg. useful if you want a high-resolution text console. Besides that,
-sisfb is required to run DirectFB (which comes with an additional, dedicated
-driver for the 315 series).
-
-On the 300 series, sisfb on kernels older than 2.6.3 furthermore plays an
-important role in connection with DRM/DRI: Sisfb manages the memory heap
-used by DRM/DRI for 3D texture and other data. This memory management is
-required for using DRI/DRM.
-
-Kernels >= around 2.6.3 do not need sisfb any longer for DRI/DRM memory
-management. The SiS DRM driver has been updated and features a memory manager
-of its own (which will be used if sisfb is not compiled). So unless you want
-a graphical console, you don't need sisfb on kernels >=2.6.3.
-
-Sidenote: Since this seems to be a commonly made mistake: sisfb and vesafb
-cannot be active at the same time! Do only select one of them in your kernel
-configuration.
-
-
-How are parameters passed to sisfb?
-===================================
-
-Well, it depends: If compiled statically into the kernel, use lilo's append
-statement to add the parameters to the kernel command line. Please see lilo's
-(or GRUB's) documentation for more information. If sisfb is a kernel module,
-parameters are given with the modprobe (or insmod) command.
-
-Example for sisfb as part of the static kernel: Add the following line to your
-lilo.conf:
-
-     append="video=sisfb:mode:1024x768x16,mem:12288,rate:75"
-
-Example for sisfb as a module: Start sisfb by typing
-
-     modprobe sisfb mode=1024x768x16 rate=75 mem=12288
-
-A common mistake is that folks use a wrong parameter format when using the
-driver compiled into the kernel. Please note: If compiled into the kernel,
-the parameter format is video=sisfb:mode:none or video=sisfb:mode:1024x768x16
-(or whatever mode you want to use, alternatively using any other format
-described above or the vesa keyword instead of mode). If compiled as a module,
-the parameter format reads mode=none or mode=1024x768x16 (or whatever mode you
-want to use). Using a "=" for a ":" (and vice versa) is a huge difference!
-Additionally: If you give more than one argument to the in-kernel sisfb, the
-arguments are separated with ",". For example:
-
-   video=sisfb:mode:1024x768x16,rate:75,mem:12288
-
-
-How do I use it?
-================
-
-Preface statement: This file only covers very little of the driver's
-capabilities and features. Please refer to the author's and maintainer's
-website at http://www.winischhofer.net/linuxsisvga.shtml for more
-information. Additionally, "modinfo sisfb" gives an overview over all
-supported options including some explanation.
-
-The desired display mode can be specified using the keyword "mode" with
-a parameter in one of the following formats:
-  - XxYxDepth or
-  - XxY-Depth or
-  - XxY-Depth@Rate or
-  - XxY
-  - or simply use the VESA mode number in hexadecimal or decimal.
-
-For example: 1024x768x16, 1024x768-16@75, 1280x1024-16. If no depth is
-specified, it defaults to 8. If no rate is given, it defaults to 60Hz. Depth 32
-means 24bit color depth (but 32 bit framebuffer depth, which is not relevant
-to the user).
-
-Additionally, sisfb understands the keyword "vesa" followed by a VESA mode
-number in decimal or hexadecimal. For example: vesa=791 or vesa=0x117. Please
-use either "mode" or "vesa" but not both.
-
-Linux 2.4 only: If no mode is given, sisfb defaults to "no mode" (mode=none) if
-compiled as a module; if sisfb is statically compiled into the kernel, it
-defaults to 800x600x8 unless CRT2 type is LCD, in which case the LCD's native
-resolution is used. If you want to switch to a different mode, use the fbset
-shell command.
-
-Linux 2.6 only: If no mode is given, sisfb defaults to 800x600x8 unless CRT2
-type is LCD, in which case it defaults to the LCD's native resolution. If
-you want to switch to another mode, use the stty shell command.
-
-You should compile in both vgacon (to boot if you remove you SiS card from
-your system) and sisfb (for graphics mode). Under Linux 2.6, also "Framebuffer
-console support" (fbcon) is needed for a graphical console.
-
-You should *not* compile-in vesafb. And please do not use the "vga=" keyword
-in lilo's or grub's configuration file; mode selection is done using the
-"mode" or "vesa" keywords as a parameter. See above and below.
-
-
-X11
-===
-
-If using XFree86 or X.org, it is recommended that you don't use the "fbdev"
-driver but the dedicated "sis" X driver. The "sis" X driver and sisfb are
-developed by the same person (Thomas Winischhofer) and cooperate well with
-each other.
-
-
-SVGALib
-=======
-
-SVGALib, if directly accessing the hardware, never restores the screen
-correctly, especially on laptops or if the output devices are LCD or TV.
-Therefore, use the chipset "FBDEV" in SVGALib configuration. This will make
-SVGALib use the framebuffer device for mode switches and restoration.
-
-
-Configuration
-=============
-
-(Some) accepted options:
-
-off      - Disable sisfb. This option is only understood if sisfb is
-           in-kernel, not a module.
-mem:X    - size of memory for the console, rest will be used for DRI/DRM. X
-           is in kilobytes. On 300 series, the default is 4096, 8192 or
-	   16384 (each in kilobyte) depending on how much video ram the card
-           has. On 315/330 series, the default is the maximum available ram
-	   (since DRI/DRM is not supported for these chipsets).
-noaccel  - do not use 2D acceleration engine. (Default: use acceleration)
-noypan   - disable y-panning and scroll by redrawing the entire screen.
-           This is much slower than y-panning. (Default: use y-panning)
-vesa:X   - selects startup videomode. X is number from 0 to 0x1FF and
-           represents the VESA mode number (can be given in decimal or
-	   hexadecimal form, the latter prefixed with "0x").
-mode:X   - selects startup videomode. Please see above for the format of
-           "X".
-
-Boolean options such as "noaccel" or "noypan" are to be given without a
-parameter if sisfb is in-kernel (for example "video=sisfb:noypan). If
-sisfb is a module, these are to be set to 1 (for example "modprobe sisfb
-noypan=1").
-
---
-Thomas Winischhofer <thomas@winischhofer.net>
-May 27, 2004
-
-
diff --git a/Documentation/fb/sm501.rst b/Documentation/fb/sm501.rst
new file mode 100644
index 000000000000..03e02c8042a7
--- /dev/null
+++ b/Documentation/fb/sm501.rst
@@ -0,0 +1,15 @@
+=======
+sm501fb
+=======
+
+Configuration:
+
+You can pass the following kernel command line options to sm501
+videoframebuffer::
+
+	sm501fb.bpp=	SM501 Display driver:
+			Specify bits-per-pixel if not specified by 'mode'
+
+	sm501fb.mode=	SM501 Display driver:
+			Specify resolution as
+			"<xres>x<yres>[-<bpp>][@<refresh>]"
diff --git a/Documentation/fb/sm501.txt b/Documentation/fb/sm501.txt
deleted file mode 100644
index 187f3b3ccb6c..000000000000
--- a/Documentation/fb/sm501.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-Configuration:
-
-You can pass the following kernel command line options to sm501 videoframebuffer:
-
-	sm501fb.bpp=	SM501 Display driver:
-			Specify bits-per-pixel if not specified by 'mode'
-
-	sm501fb.mode=	SM501 Display driver:
-			Specify resolution as
-			"<xres>x<yres>[-<bpp>][@<refresh>]"
diff --git a/Documentation/fb/sm712fb.rst b/Documentation/fb/sm712fb.rst
new file mode 100644
index 000000000000..994dad3b0238
--- /dev/null
+++ b/Documentation/fb/sm712fb.rst
@@ -0,0 +1,35 @@
+================
+What is sm712fb?
+================
+
+This is a graphics framebuffer driver for Silicon Motion SM712 based processors.
+
+How to use it?
+==============
+
+Switching modes is done using the video=sm712fb:... boot parameter.
+
+If you want, for example, enable a resolution of 1280x1024x24bpp you should
+pass to the kernel this command line: "video=sm712fb:0x31B".
+
+You should not compile-in vesafb.
+
+Currently supported video modes are:
+
+Graphic modes
+-------------
+
+===  =======  =======  ========  =========
+bpp  640x480  800x600  1024x768  1280x1024
+===  =======  =======  ========  =========
+  8  0x301    0x303    0x305     0x307
+ 16  0x311    0x314    0x317     0x31A
+ 24  0x312    0x315    0x318     0x31B
+===  =======  =======  ========  =========
+
+Missing Features
+================
+(alias TODO list)
+
+	* 2D acceleratrion
+	* dual-head support
diff --git a/Documentation/fb/sm712fb.txt b/Documentation/fb/sm712fb.txt
deleted file mode 100644
index c388442edf51..000000000000
--- a/Documentation/fb/sm712fb.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-What is sm712fb?
-=================
-
-This is a graphics framebuffer driver for Silicon Motion SM712 based processors.
-
-How to use it?
-==============
-
-Switching modes is done using the video=sm712fb:... boot parameter.
-
-If you want, for example, enable a resolution of 1280x1024x24bpp you should
-pass to the kernel this command line: "video=sm712fb:0x31B".
-
-You should not compile-in vesafb.
-
-Currently supported video modes are:
-
-[Graphic modes]
-
-bpp | 640x480  800x600  1024x768  1280x1024
-----+--------------------------------------------
-  8 | 0x301    0x303    0x305    0x307
- 16 | 0x311    0x314    0x317    0x31A
- 24 | 0x312    0x315    0x318    0x31B
-
-Missing Features
-================
-(alias TODO list)
-
-	* 2D acceleratrion
-	* dual-head support
diff --git a/Documentation/fb/sstfb.rst b/Documentation/fb/sstfb.rst
new file mode 100644
index 000000000000..8e8c1b940359
--- /dev/null
+++ b/Documentation/fb/sstfb.rst
@@ -0,0 +1,207 @@
+=====
+sstfb
+=====
+
+Introduction
+============
+
+This is a frame buffer device driver for 3dfx' Voodoo Graphics
+(aka voodoo 1, aka sst1) and Voodoo² (aka Voodoo 2, aka CVG) based
+video boards. It's highly experimental code, but is guaranteed to work
+on my computer, with my "Maxi Gamer 3D" and "Maxi Gamer 3d²" boards,
+and with me "between chair and keyboard". Some people tested other
+combinations and it seems that it works.
+The main page is located at <http://sstfb.sourceforge.net>, and if
+you want the latest version, check out the CVS, as the driver is a work
+in progress, I feel uncomfortable with releasing tarballs of something
+not completely working...Don't worry, it's still more than usable
+(I eat my own dog food)
+
+Please read the Bug section, and report any success or failure to me
+(Ghozlane Toumi <gtoumi@laposte.net>).
+BTW, If you have only one monitor , and you don't feel like playing
+with the vga passthrou cable, I can only suggest borrowing a screen
+somewhere...
+
+
+Installation
+============
+
+This driver (should) work on ix86, with "late" 2.2.x kernel (tested
+with x = 19) and "recent" 2.4.x kernel, as a module or compiled in.
+It has been included in mainstream kernel since the infamous 2.4.10.
+You can apply the patches found in `sstfb/kernel/*-2.{2|4}.x.patch`,
+and copy sstfb.c to linux/drivers/video/, or apply a single patch,
+`sstfb/patch-2.{2|4}.x-sstfb-yymmdd` to your linux source tree.
+
+Then configure your kernel as usual: choose "m" or "y" to 3Dfx Voodoo
+Graphics in section "console". Compile, install, have fun... and please
+drop me a report :)
+
+
+Module Usage
+============
+
+.. warning::
+
+       #. You should read completely this section before issuing any command.
+
+       #. If you have only one monitor to play with, once you insmod the
+	  module, the 3dfx takes control of the output, so you'll have to
+	  plug the monitor to the "normal" video board in order to issue
+	  the commands, or you can blindly use sst_dbg_vgapass
+	  in the tools directory (See Tools). The latest solution is pass the
+	  parameter vgapass=1 when insmodding the driver. (See Kernel/Modules
+	  Options)
+
+Module insertion
+----------------
+
+       #. insmod sstfb.o
+
+	  you should see some strange output from the board:
+	  a big blue square, a green and a red small squares and a vertical
+	  white rectangle. why? the function's name is self-explanatory:
+	  "sstfb_test()"...
+	  (if you don't have a second monitor, you'll have to plug your monitor
+	  directly to the 2D videocard to see what you're typing)
+
+       #. con2fb /dev/fbx /dev/ttyx
+
+	  bind a tty to the new frame buffer. if you already have a frame
+	  buffer driver, the voodoo fb will likely be /dev/fb1. if not,
+	  the device will be /dev/fb0. You can check this by doing a
+	  cat /proc/fb. You can find a copy of con2fb in tools/ directory.
+	  if you don't have another fb device, this step is superfluous,
+	  as the console subsystem automagicaly binds ttys to the fb.
+       #. switch to the virtual console you just mapped. "tadaaa" ...
+
+Module removal
+--------------
+
+       #. con2fb /dev/fbx /dev/ttyx
+
+	  bind the tty to the old frame buffer so the module can be removed.
+	  (how does it work with vgacon ? short answer : it doesn't work)
+
+       #. rmmod sstfb
+
+
+Kernel/Modules Options
+----------------------
+
+You can pass some options to the sstfb module, and via the kernel
+command line when the driver is compiled in:
+for module : insmod sstfb.o option1=value1 option2=value2 ...
+in kernel :  video=sstfb:option1,option2:value2,option3 ...
+
+sstfb supports the following options:
+
+=============== =============== ===============================================
+Module		Kernel		Description
+=============== =============== ===============================================
+vgapass=0	vganopass	Enable or disable VGA passthrou cable.
+vgapass=1	vgapass		When enabled, the monitor will get the signal
+				from the VGA board and not from the voodoo.
+
+				Default: nopass
+
+mem=x		mem:x		Force frame buffer memory in MiB
+				allowed values: 0, 1, 2, 4.
+
+				Default: 0 (= autodetect)
+
+inverse=1	inverse		Supposed to enable inverse console.
+				doesn't work yet...
+
+clipping=1	clipping	Enable or disable clipping.
+clipping=0	noclipping	With clipping enabled, all offscreen
+				reads and writes are discarded.
+
+				Default: enable clipping.
+
+gfxclk=x	gfxclk:x	Force graphic clock frequency (in MHz).
+				Be careful with this option, it may be
+				DANGEROUS.
+
+				Default: auto
+
+					- 50Mhz for Voodoo 1,
+					- 75MHz for Voodoo 2.
+
+slowpci=1	fastpci		Enable or disable fast PCI read/writes.
+slowpci=1	slowpci		Default : fastpci
+
+dev=x		dev:x		Attach the driver to device number x.
+				0 is the first compatible board (in
+				lspci order)
+=============== =============== ===============================================
+
+Tools
+=====
+
+These tools are mostly for debugging purposes, but you can
+find some of these interesting:
+
+- `con2fb`, maps a tty to a fbramebuffer::
+
+	con2fb /dev/fb1 /dev/tty5
+
+- `sst_dbg_vgapass`, changes vga passthrou. You have to recompile the
+  driver with SST_DEBUG and SST_DEBUG_IOCTL set to 1::
+
+	sst_dbg_vgapass /dev/fb1 1 (enables vga cable)
+	sst_dbg_vgapass /dev/fb1 0 (disables vga cable)
+
+- `glide_reset`, resets the voodoo using glide
+  use this after rmmoding sstfb, if the module refuses to
+  reinsert.
+
+Bugs
+====
+
+- DO NOT use glide while the sstfb module is in, you'll most likely
+  hang your computer.
+- If you see some artefacts (pixels not cleaning and stuff like that),
+  try turning off clipping (clipping=0), and/or using slowpci
+- the driver don't detect the 4Mb frame buffer voodoos, it seems that
+  the 2 last Mbs wrap around. looking into that .
+- The driver is 16 bpp only, 24/32 won't work.
+- The driver is not your_favorite_toy-safe. this includes SMP...
+
+	[Actually from inspection it seems to be safe - Alan]
+
+- When using XFree86 FBdev (X over fbdev) you may see strange color
+  patterns at the border of your windows (the pixels lose the lowest
+  byte -> basically the blue component and some of the green). I'm unable
+  to reproduce this with XFree86-3.3, but one of the testers has this
+  problem with XFree86-4. Apparently recent Xfree86-4.x solve this
+  problem.
+- I didn't really test changing the palette, so you may find some weird
+  things when playing with that.
+- Sometimes the driver will not recognise the DAC, and the
+  initialisation will fail. This is specifically true for
+  voodoo 2 boards, but it should be solved in recent versions. Please
+  contact me.
+- The 24/32 is not likely to work anytime soon, knowing that the
+  hardware does ... unusual things in 24/32 bpp.
+- When used with another video board, current limitations of the linux
+  console subsystem can cause some troubles, specifically, you should
+  disable software scrollback, as it can oops badly ...
+
+Todo
+====
+
+- Get rid of the previous paragraph.
+- Buy more coffee.
+- test/port to other arch.
+- try to add panning using tweeks with front and back buffer .
+- try to implement accel on voodoo2, this board can actually do a
+  lot in 2D even if it was sold as a 3D only board ...
+
+Ghozlane Toumi <gtoumi@laposte.net>
+
+
+Date: 2002/05/09 20:11:45
+
+http://sstfb.sourceforge.net/README
diff --git a/Documentation/fb/sstfb.txt b/Documentation/fb/sstfb.txt
deleted file mode 100644
index 13db1075e4a5..000000000000
--- a/Documentation/fb/sstfb.txt
+++ /dev/null
@@ -1,174 +0,0 @@
-
-Introduction
-
-	  This is a frame buffer device driver for 3dfx' Voodoo Graphics 
-	(aka voodoo 1, aka sst1) and Voodoo² (aka Voodoo 2, aka CVG) based 
-	video boards. It's highly experimental code, but is guaranteed to work
-	on my computer, with my "Maxi Gamer 3D" and "Maxi Gamer 3d²" boards,
-	and with me "between chair and keyboard". Some people tested other
-	combinations and it seems that it works.
-	  The main page is located at <http://sstfb.sourceforge.net>, and if
-	you want the latest version, check out the CVS, as the driver is a work
-	in progress, I feel uncomfortable with releasing tarballs of something
-	not completely working...Don't worry, it's still more than usable
-	(I eat my own dog food)
-
-	  Please read the Bug section, and report any success or failure to me
-	(Ghozlane Toumi <gtoumi@laposte.net>).
-	  BTW, If you have only one monitor , and you don't feel like playing
-	with the vga passthrou cable, I can only suggest borrowing a screen
-	somewhere... 
-
-
-Installation 
-
-	  This driver (should) work on ix86, with "late" 2.2.x kernel (tested
-	with x = 19) and "recent" 2.4.x kernel, as a module or compiled in.
-	  It has been included in mainstream kernel since the infamous 2.4.10.
-	  You can apply the patches found in sstfb/kernel/*-2.{2|4}.x.patch,
-	and copy sstfb.c to linux/drivers/video/, or apply a single patch, 
-	sstfb/patch-2.{2|4}.x-sstfb-yymmdd to your linux source tree.
-
-	  Then configure your kernel as usual: choose "m" or "y" to 3Dfx Voodoo
-	Graphics in section "console". Compile, install, have fun... and please
-	drop me a report :)
-
-
-Module Usage
-	
-	Warnings.
-	# You should read completely this section before issuing any command.
-	# If you have only one monitor to play with, once you insmod the
-	  module, the 3dfx takes control of the output, so you'll have to
-	  plug the monitor to the "normal" video board in order to issue
-	  the commands, or you can blindly use sst_dbg_vgapass
-          in the tools directory (See Tools). The latest solution is pass the
-	  parameter vgapass=1 when insmodding the driver. (See Kernel/Modules
-	  Options)
-
-	Module insertion:
-	# insmod sstfb.o
-	  you should see some strange output from the board: 
-	  a big blue square, a green and a red small squares and a vertical
-	  white rectangle. why? the function's name is self-explanatory:
-	  "sstfb_test()"...
-	  (if you don't have a second monitor, you'll have to plug your monitor
-	  directly to the 2D videocard to see what you're typing)
-	# con2fb /dev/fbx /dev/ttyx
-	  bind a tty to the new frame buffer. if you already have a frame
-	  buffer driver, the voodoo fb will likely be /dev/fb1. if not, 
-	  the device will be /dev/fb0. You can check this by doing a 
-	  cat /proc/fb. You can find a copy of con2fb in tools/ directory.
-	  if you don't have another fb device, this step is superfluous,
-	  as the console subsystem automagicaly binds ttys to the fb.
-	# switch to the virtual console you just mapped. "tadaaa" ...
-
-	Module removal:
-	# con2fb /dev/fbx /dev/ttyx
-	  bind the tty to the old frame buffer so the module can be removed.
-	  (how does it work with vgacon ? short answer : it doesn't work)
-	# rmmod sstfb
-
-
-Kernel/Modules Options
-
-	You can pass some options to the sstfb module, and via the kernel 
-	command line when the driver is compiled in:
-	for module : insmod sstfb.o option1=value1 option2=value2 ...
-	in kernel :  video=sstfb:option1,option2:value2,option3 ...
-	
-	sstfb supports the following options :
-
-Module		Kernel		Description
-
-vgapass=0	vganopass	Enable or disable VGA passthrou cable.
-vgapass=1	vgapass		When enabled, the monitor will get the signal
-				from the VGA board and not from the voodoo.
-				Default: nopass
-
-mem=x		mem:x		Force frame buffer memory in MiB
-				allowed values: 0, 1, 2, 4.
-				Default: 0 (= autodetect)
-
-inverse=1	inverse		Supposed to enable inverse console.
-				doesn't work yet...
-
-clipping=1	clipping	Enable or disable clipping.
-clipping=0	noclipping	With clipping enabled, all offscreen
-				reads and writes are discarded.
-				Default: enable clipping.
-
-gfxclk=x	gfxclk:x	Force graphic clock frequency (in MHz).
-				Be careful with this option, it may be
-				DANGEROUS.
-				Default: auto 
-					50Mhz for Voodoo 1,
-					75MHz for Voodoo 2. 
-
-slowpci=1	fastpci		Enable or disable fast PCI read/writes.
-slowpci=1	slowpci		Default : fastpci
-
-dev=x		dev:x		Attach the driver to device number x.
-				0 is the first compatible board (in 
-				lspci order)
-
-Tools
-
-	These tools are mostly for debugging purposes, but you can 
-	find some of these interesting :
-	 - con2fb , maps a tty to a fbramebuffer .
-		con2fb /dev/fb1 /dev/tty5
-	 - sst_dbg_vgapass , changes vga passthrou. You have to recompile the
-	driver with SST_DEBUG and SST_DEBUG_IOCTL set to 1
-		sst_dbg_vgapass /dev/fb1 1 (enables vga cable)
-		sst_dbg_vgapass /dev/fb1 0 (disables vga cable)
-	 - glide_reset , resets the voodoo using glide
-		use this after rmmoding sstfb, if the module refuses to
-		reinsert .
-
-Bugs
-
-	- DO NOT use glide while the sstfb module is in, you'll most likely
-	hang your computer.
-	- If you see some artefacts (pixels not cleaning and stuff like that), 
-	try turning off clipping (clipping=0), and/or using slowpci
-	- the driver don't detect the 4Mb frame buffer voodoos, it seems that
-	the 2 last Mbs wrap around. looking into that .
-	- The driver is 16 bpp only, 24/32 won't work.
-	- The driver is not your_favorite_toy-safe. this includes SMP...
-          [Actually from inspection it seems to be safe - Alan]
-	- When using XFree86 FBdev (X over fbdev) you may see strange color
-	patterns at the border of your windows (the pixels lose the lowest
-	byte -> basically the blue component and some of the green). I'm unable
-	to reproduce this with XFree86-3.3, but one of the testers has this
-	problem with XFree86-4. Apparently recent Xfree86-4.x solve this
-	problem.
-	- I didn't really test changing the palette, so you may find some weird
-	things when playing with that.
-	- Sometimes the driver will not recognise the DAC, and the
-        initialisation will fail. This is specifically true for
-	voodoo 2 boards, but it should be solved in recent versions. Please
-	contact me.
-	- The 24/32 is not likely to work anytime soon, knowing that the
-	hardware does ... unusual things in 24/32 bpp.
-	- When used with another video board, current limitations of the linux
-	console subsystem can cause some troubles, specifically, you should
-	disable software scrollback, as it can oops badly ...
-
-Todo
-
-	- Get rid of the previous paragraph.
-	- Buy more coffee.
-	- test/port to other arch.
-	- try to add panning using tweeks with front and back buffer .
-	- try to implement accel on voodoo2, this board can actually do a 
-	  lot in 2D even if it was sold as a 3D only board ...
-
-ghoz.
-
--- 
-Ghozlane Toumi <gtoumi@laposte.net>
-
-
-$Date: 2002/05/09 20:11:45 $
-http://sstfb.sourceforge.net/README
diff --git a/Documentation/fb/tgafb.rst b/Documentation/fb/tgafb.rst
new file mode 100644
index 000000000000..0c50d2134aa4
--- /dev/null
+++ b/Documentation/fb/tgafb.rst
@@ -0,0 +1,71 @@
+==============
+What is tgafb?
+==============
+
+This is a driver for DECChip 21030 based graphics framebuffers, a.k.a. TGA
+cards, which are usually found in older Digital Alpha systems. The
+following models are supported:
+
+- ZLxP-E1 (8bpp, 2 MB VRAM)
+- ZLxP-E2 (32bpp, 8 MB VRAM)
+- ZLxP-E3 (32bpp, 16 MB VRAM, Zbuffer)
+
+This version is an almost complete rewrite of the code written by Geert
+Uytterhoeven, which was based on the original TGA console code written by
+Jay Estabrook.
+
+Major new features since Linux 2.0.x:
+
+ * Support for multiple resolutions
+ * Support for fixed-frequency and other oddball monitors
+   (by allowing the video mode to be set at boot time)
+
+User-visible changes since Linux 2.2.x:
+
+ * Sync-on-green is now handled properly
+ * More useful information is printed on bootup
+   (this helps if people run into problems)
+
+This driver does not (yet) support the TGA2 family of framebuffers, so the
+PowerStorm 3D30/4D20 (also known as PBXGB) cards are not supported. These
+can however be used with the standard VGA Text Console driver.
+
+
+Configuration
+=============
+
+You can pass kernel command line options to tgafb with
+`video=tgafb:option1,option2:value2,option3` (multiple options should be
+separated by comma, values are separated from options by `:`).
+
+Accepted options:
+
+==========  ============================================================
+font:X      default font to use. All fonts are supported, including the
+	    SUN12x22 font which is very nice at high resolutions.
+
+mode:X      default video mode. The following video modes are supported:
+	    640x480-60, 800x600-56, 640x480-72, 800x600-60, 800x600-72,
+	    1024x768-60, 1152x864-60, 1024x768-70, 1024x768-76,
+	    1152x864-70, 1280x1024-61, 1024x768-85, 1280x1024-70,
+	    1152x864-84, 1280x1024-76, 1280x1024-85
+==========  ============================================================
+
+
+Known Issues
+============
+
+The XFree86 FBDev server has been reported not to work, since tgafb doesn't do
+mmap(). Running the standard XF86_TGA server from XFree86 3.3.x works fine for
+me, however this server does not do acceleration, which make certain operations
+quite slow. Support for acceleration is being progressively integrated in
+XFree86 4.x.
+
+When running tgafb in resolutions higher than 640x480, on switching VCs from
+tgafb to XF86_TGA 3.3.x, the entire screen is not re-drawn and must be manually
+refreshed. This is an X server problem, not a tgafb problem, and is fixed in
+XFree86 4.0.
+
+Enjoy!
+
+Martin Lucina <mato@kotelna.sk>
diff --git a/Documentation/fb/tgafb.txt b/Documentation/fb/tgafb.txt
deleted file mode 100644
index 250083ada8fb..000000000000
--- a/Documentation/fb/tgafb.txt
+++ /dev/null
@@ -1,69 +0,0 @@
-$Id: tgafb.txt,v 1.1.2.2 2000/04/04 06:50:18 mato Exp $
-
-What is tgafb?
-===============
-
-This is a driver for DECChip 21030 based graphics framebuffers, a.k.a. TGA
-cards, which are usually found in older Digital Alpha systems. The
-following models are supported:
-
-ZLxP-E1 (8bpp, 2 MB VRAM)
-ZLxP-E2 (32bpp, 8 MB VRAM)
-ZLxP-E3 (32bpp, 16 MB VRAM, Zbuffer)
-
-This version is an almost complete rewrite of the code written by Geert
-Uytterhoeven, which was based on the original TGA console code written by
-Jay Estabrook.
-
-Major new features since Linux 2.0.x:
-
- * Support for multiple resolutions
- * Support for fixed-frequency and other oddball monitors 
-   (by allowing the video mode to be set at boot time)
-
-User-visible changes since Linux 2.2.x:
-
- * Sync-on-green is now handled properly
- * More useful information is printed on bootup
-   (this helps if people run into problems)
-
-This driver does not (yet) support the TGA2 family of framebuffers, so the
-PowerStorm 3D30/4D20 (also known as PBXGB) cards are not supported. These
-can however be used with the standard VGA Text Console driver.
-
-
-Configuration
-=============
-
-You can pass kernel command line options to tgafb with
-`video=tgafb:option1,option2:value2,option3' (multiple options should be
-separated by comma, values are separated from options by `:').
-Accepted options:
-
-font:X    - default font to use. All fonts are supported, including the
-            SUN12x22 font which is very nice at high resolutions.
-
-mode:X    - default video mode. The following video modes are supported:
-            640x480-60, 800x600-56, 640x480-72, 800x600-60, 800x600-72, 
-	    1024x768-60, 1152x864-60, 1024x768-70, 1024x768-76,
-	    1152x864-70, 1280x1024-61, 1024x768-85, 1280x1024-70,
-	    1152x864-84, 1280x1024-76, 1280x1024-85
- 
-
-Known Issues
-============
-
-The XFree86 FBDev server has been reported not to work, since tgafb doesn't do
-mmap(). Running the standard XF86_TGA server from XFree86 3.3.x works fine for
-me, however this server does not do acceleration, which make certain operations
-quite slow. Support for acceleration is being progressively integrated in
-XFree86 4.x.
-
-When running tgafb in resolutions higher than 640x480, on switching VCs from
-tgafb to XF86_TGA 3.3.x, the entire screen is not re-drawn and must be manually
-refreshed. This is an X server problem, not a tgafb problem, and is fixed in
-XFree86 4.0.
-
-Enjoy!
-
-Martin Lucina <mato@kotelna.sk>
diff --git a/Documentation/fb/tridentfb.rst b/Documentation/fb/tridentfb.rst
new file mode 100644
index 000000000000..7921c9dee78c
--- /dev/null
+++ b/Documentation/fb/tridentfb.rst
@@ -0,0 +1,78 @@
+=========
+Tridentfb
+=========
+
+Tridentfb is a framebuffer driver for some Trident chip based cards.
+
+The following list of chips is thought to be supported although not all are
+tested:
+
+those from the TGUI series 9440/96XX and with Cyber in their names
+those from the Image series and with Cyber in their names
+those with Blade in their names (Blade3D,CyberBlade...)
+the newer CyberBladeXP family
+
+All families are accelerated. Only PCI/AGP based cards are supported,
+none of the older Tridents.
+The driver supports 8, 16 and 32 bits per pixel depths.
+The TGUI family requires a line length to be power of 2 if acceleration
+is enabled. This means that range of possible resolutions and bpp is
+limited comparing to the range if acceleration is disabled (see list
+of parameters below).
+
+Known bugs:
+
+1. The driver randomly locks up on 3DImage975 chip with acceleration
+   enabled. The same happens in X11 (Xorg).
+2. The ramdac speeds require some more fine tuning. It is possible to
+   switch resolution which the chip does not support at some depths for
+   older chips.
+
+How to use it?
+==============
+
+When booting you can pass the video parameter::
+
+	video=tridentfb
+
+The parameters for tridentfb are concatenated with a ':' as in this example::
+
+	video=tridentfb:800x600-16@75,noaccel
+
+The second level parameters that tridentfb understands are:
+
+========  =====================================================================
+noaccel   turns off acceleration (when it doesn't work for your card)
+
+fp	  use flat panel related stuff
+crt 	  assume monitor is present instead of fp
+
+center 	  for flat panels and resolutions smaller than native size center the
+	  image, otherwise use
+stretch
+
+memsize   integer value in KB, use if your card's memory size is misdetected.
+	  look at the driver output to see what it says when initializing.
+
+memdiff   integer value in KB, should be nonzero if your card reports
+	  more memory than it actually has. For instance mine is 192K less than
+	  detection says in all three BIOS selectable situations 2M, 4M, 8M.
+	  Only use if your video memory is taken from main memory hence of
+	  configurable size. Otherwise use memsize.
+	  If in some modes which barely fit the memory you see garbage
+	  at the bottom this might help by not letting change to that mode
+	  anymore.
+
+nativex   the width in pixels of the flat panel.If you know it (usually 1024
+	  800 or 1280) and it is not what the driver seems to detect use it.
+
+bpp	  bits per pixel (8,16 or 32)
+mode	  a mode name like 800x600-8@75 as described in
+	  Documentation/fb/modedb.rst
+========  =====================================================================
+
+Using insane values for the above parameters will probably result in driver
+misbehaviour so take care(for instance memsize=12345678 or memdiff=23784 or
+nativex=93)
+
+Contact: jani@astechnix.ro
diff --git a/Documentation/fb/tridentfb.txt b/Documentation/fb/tridentfb.txt
deleted file mode 100644
index 45d9de5b13a3..000000000000
--- a/Documentation/fb/tridentfb.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-Tridentfb is a framebuffer driver for some Trident chip based cards.
-
-The following list of chips is thought to be supported although not all are
-tested:
-
-those from the TGUI series 9440/96XX and with Cyber in their names
-those from the Image series and with Cyber in their names
-those with Blade in their names (Blade3D,CyberBlade...)
-the newer CyberBladeXP family
-
-All families are accelerated. Only PCI/AGP based cards are supported,
-none of the older Tridents.
-The driver supports 8, 16 and 32 bits per pixel depths.
-The TGUI family requires a line length to be power of 2 if acceleration
-is enabled. This means that range of possible resolutions and bpp is
-limited comparing to the range if acceleration is disabled (see list
-of parameters below).
-
-Known bugs:
-1. The driver randomly locks up on 3DImage975 chip with acceleration
-   enabled. The same happens in X11 (Xorg).
-2. The ramdac speeds require some more fine tuning. It is possible to
-   switch resolution which the chip does not support at some depths for
-   older chips.
-
-How to use it?
-==============
-
-When booting you can pass the video parameter.
-video=tridentfb
-
-The parameters for tridentfb are concatenated with a ':' as in this example.
-
-video=tridentfb:800x600-16@75,noaccel
-
-The second level parameters that tridentfb understands are:
-
-noaccel - turns off acceleration (when it doesn't work for your card)
-
-fp	- use flat panel related stuff
-crt 	- assume monitor is present instead of fp
-
-center 	- for flat panels and resolutions smaller than native size center the
-	  image, otherwise use
-stretch
-
-memsize - integer value in KB, use if your card's memory size is misdetected.
-	  look at the driver output to see what it says when initializing.
-
-memdiff - integer value in KB, should be nonzero if your card reports
-	  more memory than it actually has. For instance mine is 192K less than
-	  detection says in all three BIOS selectable situations 2M, 4M, 8M.
-	  Only use if your video memory is taken from main memory hence of
-	  configurable size. Otherwise use memsize.
-	  If in some modes which barely fit the memory you see garbage
-	  at the bottom this might help by not letting change to that mode
-	  anymore.
-
-nativex - the width in pixels of the flat panel.If you know it (usually 1024
-	  800 or 1280) and it is not what the driver seems to detect use it.
-
-bpp	- bits per pixel (8,16 or 32)
-mode	- a mode name like 800x600-8@75 as described in
-	  Documentation/fb/modedb.txt
-
-Using insane values for the above parameters will probably result in driver
-misbehaviour so take care(for instance memsize=12345678 or memdiff=23784 or
-nativex=93)
-
-Contact: jani@astechnix.ro
diff --git a/Documentation/fb/udlfb.rst b/Documentation/fb/udlfb.rst
new file mode 100644
index 000000000000..732b37db3504
--- /dev/null
+++ b/Documentation/fb/udlfb.rst
@@ -0,0 +1,162 @@
+==============
+What is udlfb?
+==============
+
+This is a driver for DisplayLink USB 2.0 era graphics chips.
+
+DisplayLink chips provide simple hline/blit operations with some compression,
+pairing that with a hardware framebuffer (16MB) on the other end of the
+USB wire.  That hardware framebuffer is able to drive the VGA, DVI, or HDMI
+monitor with no CPU involvement until a pixel has to change.
+
+The CPU or other local resource does all the rendering; optionally compares the
+result with a local shadow of the remote hardware framebuffer to identify
+the minimal set of pixels that have changed; and compresses and sends those
+pixels line-by-line via USB bulk transfers.
+
+Because of the efficiency of bulk transfers and a protocol on top that
+does not require any acks - the effect is very low latency that
+can support surprisingly high resolutions with good performance for
+non-gaming and non-video applications.
+
+Mode setting, EDID read, etc are other bulk or control transfers. Mode
+setting is very flexible - able to set nearly arbitrary modes from any timing.
+
+Advantages of USB graphics in general:
+
+ * Ability to add a nearly arbitrary number of displays to any USB 2.0
+   capable system. On Linux, number of displays is limited by fbdev interface
+   (FB_MAX is currently 32). Of course, all USB devices on the same
+   host controller share the same 480Mbs USB 2.0 interface.
+
+Advantages of supporting DisplayLink chips with kernel framebuffer interface:
+
+ * The actual hardware functionality of DisplayLink chips matches nearly
+   one-to-one with the fbdev interface, making the driver quite small and
+   tight relative to the functionality it provides.
+ * X servers and other applications can use the standard fbdev interface
+   from user mode to talk to the device, without needing to know anything
+   about USB or DisplayLink's protocol at all. A "displaylink" X driver
+   and a slightly modified "fbdev" X driver are among those that already do.
+
+Disadvantages:
+
+ * Fbdev's mmap interface assumes a real hardware framebuffer is mapped.
+   In the case of USB graphics, it is just an allocated (virtual) buffer.
+   Writes need to be detected and encoded into USB bulk transfers by the CPU.
+   Accurate damage/changed area notifications work around this problem.
+   In the future, hopefully fbdev will be enhanced with an small standard
+   interface to allow mmap clients to report damage, for the benefit
+   of virtual or remote framebuffers.
+ * Fbdev does not arbitrate client ownership of the framebuffer well.
+ * Fbcon assumes the first framebuffer it finds should be consumed for console.
+ * It's not clear what the future of fbdev is, given the rise of KMS/DRM.
+
+How to use it?
+==============
+
+Udlfb, when loaded as a module, will match against all USB 2.0 generation
+DisplayLink chips (Alex and Ollie family). It will then attempt to read the EDID
+of the monitor, and set the best common mode between the DisplayLink device
+and the monitor's capabilities.
+
+If the DisplayLink device is successful, it will paint a "green screen" which
+means that from a hardware and fbdev software perspective, everything is good.
+
+At that point, a /dev/fb? interface will be present for user-mode applications
+to open and begin writing to the framebuffer of the DisplayLink device using
+standard fbdev calls.  Note that if mmap() is used, by default the user mode
+application must send down damage notifications to trigger repaints of the
+changed regions.  Alternatively, udlfb can be recompiled with experimental
+defio support enabled, to support a page-fault based detection mechanism
+that can work without explicit notification.
+
+The most common client of udlfb is xf86-video-displaylink or a modified
+xf86-video-fbdev X server. These servers have no real DisplayLink specific
+code. They write to the standard framebuffer interface and rely on udlfb
+to do its thing.  The one extra feature they have is the ability to report
+rectangles from the X DAMAGE protocol extension down to udlfb via udlfb's
+damage interface (which will hopefully be standardized for all virtual
+framebuffers that need damage info). These damage notifications allow
+udlfb to efficiently process the changed pixels.
+
+Module Options
+==============
+
+Special configuration for udlfb is usually unnecessary. There are a few
+options, however.
+
+From the command line, pass options to modprobe
+modprobe udlfb fb_defio=0 console=1 shadow=1
+
+Or modify options on the fly at /sys/module/udlfb/parameters directory via
+sudo nano fb_defio
+change the parameter in place, and save the file.
+
+Unplug/replug USB device to apply with new settings
+
+Or for permanent option, create file like /etc/modprobe.d/udlfb.conf with text
+options udlfb fb_defio=0 console=1 shadow=1
+
+Accepted boolean options:
+
+=============== ================================================================
+fb_defio	Make use of the fb_defio (CONFIG_FB_DEFERRED_IO) kernel
+		module to track changed areas of the framebuffer by page faults.
+		Standard fbdev applications that use mmap but that do not
+		report damage, should be able to work with this enabled.
+		Disable when running with X server that supports reporting
+		changed regions via ioctl, as this method is simpler,
+		more stable, and higher performance.
+		default: fb_defio=1
+
+console		Allow fbcon to attach to udlfb provided framebuffers.
+		Can be disabled if fbcon and other clients
+		(e.g. X with --shared-vt) are in conflict.
+		default: console=1
+
+shadow		Allocate a 2nd framebuffer to shadow what's currently across
+		the USB bus in device memory. If any pixels are unchanged,
+		do not transmit. Spends host memory to save USB transfers.
+		Enabled by default. Only disable on very low memory systems.
+		default: shadow=1
+=============== ================================================================
+
+Sysfs Attributes
+================
+
+Udlfb creates several files in /sys/class/graphics/fb?
+Where ? is the sequential framebuffer id of the particular DisplayLink device
+
+======================== ========================================================
+edid			 If a valid EDID blob is written to this file (typically
+			 by a udev rule), then udlfb will use this EDID as a
+			 backup in case reading the actual EDID of the monitor
+			 attached to the DisplayLink device fails. This is
+			 especially useful for fixed panels, etc. that cannot
+			 communicate their capabilities via EDID. Reading
+			 this file returns the current EDID of the attached
+			 monitor (or last backup value written). This is
+			 useful to get the EDID of the attached monitor,
+			 which can be passed to utilities like parse-edid.
+
+metrics_bytes_rendered	 32-bit count of pixel bytes rendered
+
+metrics_bytes_identical  32-bit count of how many of those bytes were found to be
+			 unchanged, based on a shadow framebuffer check
+
+metrics_bytes_sent	 32-bit count of how many bytes were transferred over
+			 USB to communicate the resulting changed pixels to the
+			 hardware. Includes compression and protocol overhead
+
+metrics_cpu_kcycles_used 32-bit count of CPU cycles used in processing the
+			 above pixels (in thousands of cycles).
+
+metrics_reset		 Write-only. Any write to this file resets all metrics
+			 above to zero.  Note that the 32-bit counters above
+			 roll over very quickly. To get reliable results, design
+			 performance tests to start and finish in a very short
+			 period of time (one minute or less is safe).
+======================== ========================================================
+
+Bernie Thompson <bernie@plugable.com>
diff --git a/Documentation/fb/udlfb.txt b/Documentation/fb/udlfb.txt
deleted file mode 100644
index c985cb65dd06..000000000000
--- a/Documentation/fb/udlfb.txt
+++ /dev/null
@@ -1,159 +0,0 @@
-
-What is udlfb?
-===============
-
-This is a driver for DisplayLink USB 2.0 era graphics chips.
-
-DisplayLink chips provide simple hline/blit operations with some compression,
-pairing that with a hardware framebuffer (16MB) on the other end of the
-USB wire.  That hardware framebuffer is able to drive the VGA, DVI, or HDMI
-monitor with no CPU involvement until a pixel has to change.
-
-The CPU or other local resource does all the rendering; optionally compares the
-result with a local shadow of the remote hardware framebuffer to identify
-the minimal set of pixels that have changed; and compresses and sends those
-pixels line-by-line via USB bulk transfers.
-
-Because of the efficiency of bulk transfers and a protocol on top that
-does not require any acks - the effect is very low latency that
-can support surprisingly high resolutions with good performance for
-non-gaming and non-video applications.
-
-Mode setting, EDID read, etc are other bulk or control transfers. Mode
-setting is very flexible - able to set nearly arbitrary modes from any timing.
-
-Advantages of USB graphics in general:
-
- * Ability to add a nearly arbitrary number of displays to any USB 2.0
-   capable system. On Linux, number of displays is limited by fbdev interface
-   (FB_MAX is currently 32). Of course, all USB devices on the same
-   host controller share the same 480Mbs USB 2.0 interface.
-
-Advantages of supporting DisplayLink chips with kernel framebuffer interface:
-
- * The actual hardware functionality of DisplayLink chips matches nearly
-   one-to-one with the fbdev interface, making the driver quite small and
-   tight relative to the functionality it provides.
- * X servers and other applications can use the standard fbdev interface
-   from user mode to talk to the device, without needing to know anything
-   about USB or DisplayLink's protocol at all. A "displaylink" X driver
-   and a slightly modified "fbdev" X driver are among those that already do.
-
-Disadvantages:
-
- * Fbdev's mmap interface assumes a real hardware framebuffer is mapped.
-   In the case of USB graphics, it is just an allocated (virtual) buffer.
-   Writes need to be detected and encoded into USB bulk transfers by the CPU.
-   Accurate damage/changed area notifications work around this problem.
-   In the future, hopefully fbdev will be enhanced with an small standard
-   interface to allow mmap clients to report damage, for the benefit
-   of virtual or remote framebuffers.
- * Fbdev does not arbitrate client ownership of the framebuffer well.
- * Fbcon assumes the first framebuffer it finds should be consumed for console.
- * It's not clear what the future of fbdev is, given the rise of KMS/DRM.
-
-How to use it?
-==============
-
-Udlfb, when loaded as a module, will match against all USB 2.0 generation
-DisplayLink chips (Alex and Ollie family). It will then attempt to read the EDID
-of the monitor, and set the best common mode between the DisplayLink device
-and the monitor's capabilities.
-
-If the DisplayLink device is successful, it will paint a "green screen" which
-means that from a hardware and fbdev software perspective, everything is good.
-
-At that point, a /dev/fb? interface will be present for user-mode applications
-to open and begin writing to the framebuffer of the DisplayLink device using
-standard fbdev calls.  Note that if mmap() is used, by default the user mode
-application must send down damage notifications to trigger repaints of the
-changed regions.  Alternatively, udlfb can be recompiled with experimental
-defio support enabled, to support a page-fault based detection mechanism
-that can work without explicit notification.
-
-The most common client of udlfb is xf86-video-displaylink or a modified
-xf86-video-fbdev X server. These servers have no real DisplayLink specific
-code. They write to the standard framebuffer interface and rely on udlfb
-to do its thing.  The one extra feature they have is the ability to report
-rectangles from the X DAMAGE protocol extension down to udlfb via udlfb's
-damage interface (which will hopefully be standardized for all virtual
-framebuffers that need damage info). These damage notifications allow
-udlfb to efficiently process the changed pixels.
-
-Module Options
-==============
-
-Special configuration for udlfb is usually unnecessary. There are a few
-options, however.
-
-From the command line, pass options to modprobe
-modprobe udlfb fb_defio=0 console=1 shadow=1
-
-Or modify options on the fly at /sys/module/udlfb/parameters directory via
-sudo nano fb_defio
-change the parameter in place, and save the file.
-
-Unplug/replug USB device to apply with new settings
-
-Or for permanent option, create file like /etc/modprobe.d/udlfb.conf with text
-options udlfb fb_defio=0 console=1 shadow=1
-
-Accepted boolean options:
-
-fb_defio	Make use of the fb_defio (CONFIG_FB_DEFERRED_IO) kernel
-		module to track changed areas of the framebuffer by page faults.
-		Standard fbdev applications that use mmap but that do not
-		report damage, should be able to work with this enabled.
-		Disable when running with X server that supports reporting
-		changed regions via ioctl, as this method is simpler,
-		more stable, and higher performance.
-		default: fb_defio=1
-
-console	Allow fbcon to attach to udlfb provided framebuffers.
-		Can be disabled if fbcon and other clients
-		(e.g. X with --shared-vt) are in conflict.
-		default: console=1
-
-shadow		Allocate a 2nd framebuffer to shadow what's currently across
-		the USB bus in device memory. If any pixels are unchanged,
-		do not transmit. Spends host memory to save USB transfers.
-		Enabled by default. Only disable on very low memory systems.
-		default: shadow=1
-
-Sysfs Attributes
-================
-
-Udlfb creates several files in /sys/class/graphics/fb?
-Where ? is the sequential framebuffer id of the particular DisplayLink device
-
-edid	       		If a valid EDID blob is written to this file (typically
-			by a udev rule), then udlfb will use this EDID as a
-			backup in case reading the actual EDID of the monitor
-			attached to the DisplayLink device fails. This is
-			especially useful for fixed panels, etc. that cannot
-			communicate their capabilities via EDID. Reading
-			this file returns the current EDID of the attached
-			monitor (or last backup value written). This is
-			useful to get the EDID of the attached monitor,
-			which can be passed to utilities like parse-edid.
-
-metrics_bytes_rendered	32-bit count of pixel bytes rendered
-
-metrics_bytes_identical 32-bit count of how many of those bytes were found to be
-			unchanged, based on a shadow framebuffer check
-
-metrics_bytes_sent	32-bit count of how many bytes were transferred over
-			USB to communicate the resulting changed pixels to the
-			hardware. Includes compression and protocol overhead
-
-metrics_cpu_kcycles_used 32-bit count of CPU cycles used in processing the
-			above pixels (in thousands of cycles).
-
-metrics_reset		Write-only. Any write to this file resets all metrics
-			above to zero.  Note that the 32-bit counters above
-			roll over very quickly. To get reliable results, design
-			performance tests to start and finish in a very short
-			period of time (one minute or less is safe).
-
---
-Bernie Thompson <bernie@plugable.com>
diff --git a/Documentation/fb/uvesafb.rst b/Documentation/fb/uvesafb.rst
new file mode 100644
index 000000000000..d1c2523fbb33
--- /dev/null
+++ b/Documentation/fb/uvesafb.rst
@@ -0,0 +1,188 @@
+==========================================================
+uvesafb - A Generic Driver for VBE2+ compliant video cards
+==========================================================
+
+1. Requirements
+---------------
+
+uvesafb should work with any video card that has a Video BIOS compliant
+with the VBE 2.0 standard.
+
+Unlike other drivers, uvesafb makes use of a userspace helper called
+v86d.  v86d is used to run the x86 Video BIOS code in a simulated and
+controlled environment.  This allows uvesafb to function on arches other
+than x86.  Check the v86d documentation for a list of currently supported
+arches.
+
+v86d source code can be downloaded from the following website:
+
+  https://github.com/mjanusz/v86d
+
+Please refer to the v86d documentation for detailed configuration and
+installation instructions.
+
+Note that the v86d userspace helper has to be available at all times in
+order for uvesafb to work properly.  If you want to use uvesafb during
+early boot, you will have to include v86d into an initramfs image, and
+either compile it into the kernel or use it as an initrd.
+
+2. Caveats and limitations
+--------------------------
+
+uvesafb is a _generic_ driver which supports a wide variety of video
+cards, but which is ultimately limited by the Video BIOS interface.
+The most important limitations are:
+
+- Lack of any type of acceleration.
+- A strict and limited set of supported video modes.  Often the native
+  or most optimal resolution/refresh rate for your setup will not work
+  with uvesafb, simply because the Video BIOS doesn't support the
+  video mode you want to use.  This can be especially painful with
+  widescreen panels, where native video modes don't have the 4:3 aspect
+  ratio, which is what most BIOS-es are limited to.
+- Adjusting the refresh rate is only possible with a VBE 3.0 compliant
+  Video BIOS.  Note that many nVidia Video BIOS-es claim to be VBE 3.0
+  compliant, while they simply ignore any refresh rate settings.
+
+3. Configuration
+----------------
+
+uvesafb can be compiled either as a module, or directly into the kernel.
+In both cases it supports the same set of configuration options, which
+are either given on the kernel command line or as module parameters, e.g.::
+
+ video=uvesafb:1024x768-32,mtrr:3,ywrap (compiled into the kernel)
+
+ # modprobe uvesafb mode_option=1024x768-32 mtrr=3 scroll=ywrap  (module)
+
+Accepted options:
+
+======= =========================================================
+ypan    Enable display panning using the VESA protected mode
+	interface.  The visible screen is just a window of the
+	video memory, console scrolling is done by changing the
+	start of the window.  This option is available on x86
+	only and is the default option on that architecture.
+
+ywrap   Same as ypan, but assumes your gfx board can wrap-around
+	the video memory (i.e. starts reading from top if it
+	reaches the end of video memory).  Faster than ypan.
+	Available on x86 only.
+
+redraw  Scroll by redrawing the affected part of the screen, this
+	is the default on non-x86.
+======= =========================================================
+
+(If you're using uvesafb as a module, the above three options are
+used a parameter of the scroll option, e.g. scroll=ypan.)
+
+=========== ====================================================================
+vgapal      Use the standard VGA registers for palette changes.
+
+pmipal      Use the protected mode interface for palette changes.
+            This is the default if the protected mode interface is
+            available.  Available on x86 only.
+
+mtrr:n      Setup memory type range registers for the framebuffer
+            where n:
+
+                - 0 - disabled (equivalent to nomtrr)
+                - 3 - write-combining (default)
+
+            Values other than 0 and 3 will result in a warning and will be
+            treated just like 3.
+
+nomtrr      Do not use memory type range registers.
+
+vremap:n
+            Remap 'n' MiB of video RAM.  If 0 or not specified, remap memory
+            according to video mode.
+
+vtotal:n    If the video BIOS of your card incorrectly determines the total
+            amount of video RAM, use this option to override the BIOS (in MiB).
+
+<mode>      The mode you want to set, in the standard modedb format.  Refer to
+            modedb.txt for a detailed description.  When uvesafb is compiled as
+            a module, the mode string should be provided as a value of the
+            'mode_option' option.
+
+vbemode:x   Force the use of VBE mode x.  The mode will only be set if it's
+            found in the VBE-provided list of supported modes.
+            NOTE: The mode number 'x' should be specified in VESA mode number
+            notation, not the Linux kernel one (eg. 257 instead of 769).
+            HINT: If you use this option because normal <mode> parameter does
+            not work for you and you use a X server, you'll probably want to
+            set the 'nocrtc' option to ensure that the video mode is properly
+            restored after console <-> X switches.
+
+nocrtc      Do not use CRTC timings while setting the video mode.  This option
+            has any effect only if the Video BIOS is VBE 3.0 compliant.  Use it
+            if you have problems with modes set the standard way.  Note that
+            using this option implies that any refresh rate adjustments will
+            be ignored and the refresh rate will stay at your BIOS default
+            (60 Hz).
+
+noedid      Do not try to fetch and use EDID-provided modes.
+
+noblank     Disable hardware blanking.
+
+v86d:path   Set path to the v86d executable. This option is only available as
+            a module parameter, and not as a part of the video= string.  If you
+            need to use it and have uvesafb built into the kernel, use
+            uvesafb.v86d="path".
+=========== ====================================================================
+
+Additionally, the following parameters may be provided.  They all override the
+EDID-provided values and BIOS defaults.  Refer to your monitor's specs to get
+the correct values for maxhf, maxvf and maxclk for your hardware.
+
+=========== ======================================
+maxhf:n     Maximum horizontal frequency (in kHz).
+maxvf:n     Maximum vertical frequency (in Hz).
+maxclk:n    Maximum pixel clock (in MHz).
+=========== ======================================
+
+4. The sysfs interface
+----------------------
+
+uvesafb provides several sysfs nodes for configurable parameters and
+additional information.
+
+Driver attributes:
+
+/sys/bus/platform/drivers/uvesafb
+  v86d
+    (default: /sbin/v86d)
+
+    Path to the v86d executable. v86d is started by uvesafb
+    if an instance of the daemon isn't already running.
+
+Device attributes:
+
+/sys/bus/platform/drivers/uvesafb/uvesafb.0
+  nocrtc
+    Use the default refresh rate (60 Hz) if set to 1.
+
+  oem_product_name, oem_product_rev, oem_string, oem_vendor
+    Information about the card and its maker.
+
+  vbe_modes
+    A list of video modes supported by the Video BIOS along with their
+    VBE mode numbers in hex.
+
+  vbe_version
+    A BCD value indicating the implemented VBE standard.
+
+5. Miscellaneous
+----------------
+
+Uvesafb will set a video mode with the default refresh rate and timings
+from the Video BIOS if you set pixclock to 0 in fb_var_screeninfo.
+
+
+
+ Michal Januszewski <spock@gentoo.org>
+
+ Last updated: 2017-10-10
+
+ Documentation of the uvesafb options is loosely based on vesafb.txt.
diff --git a/Documentation/fb/uvesafb.txt b/Documentation/fb/uvesafb.txt
deleted file mode 100644
index aa924196c366..000000000000
--- a/Documentation/fb/uvesafb.txt
+++ /dev/null
@@ -1,184 +0,0 @@
-
-uvesafb - A Generic Driver for VBE2+ compliant video cards
-==========================================================
-
-1. Requirements
----------------
-
-uvesafb should work with any video card that has a Video BIOS compliant
-with the VBE 2.0 standard.
-
-Unlike other drivers, uvesafb makes use of a userspace helper called
-v86d.  v86d is used to run the x86 Video BIOS code in a simulated and
-controlled environment.  This allows uvesafb to function on arches other
-than x86.  Check the v86d documentation for a list of currently supported
-arches.
-
-v86d source code can be downloaded from the following website:
-
-  https://github.com/mjanusz/v86d
-
-Please refer to the v86d documentation for detailed configuration and
-installation instructions.
-
-Note that the v86d userspace helper has to be available at all times in
-order for uvesafb to work properly.  If you want to use uvesafb during
-early boot, you will have to include v86d into an initramfs image, and
-either compile it into the kernel or use it as an initrd.
-
-2. Caveats and limitations
---------------------------
-
-uvesafb is a _generic_ driver which supports a wide variety of video
-cards, but which is ultimately limited by the Video BIOS interface.
-The most important limitations are:
-
-- Lack of any type of acceleration.
-- A strict and limited set of supported video modes.  Often the native
-  or most optimal resolution/refresh rate for your setup will not work
-  with uvesafb, simply because the Video BIOS doesn't support the
-  video mode you want to use.  This can be especially painful with
-  widescreen panels, where native video modes don't have the 4:3 aspect
-  ratio, which is what most BIOS-es are limited to.
-- Adjusting the refresh rate is only possible with a VBE 3.0 compliant
-  Video BIOS.  Note that many nVidia Video BIOS-es claim to be VBE 3.0
-  compliant, while they simply ignore any refresh rate settings.
-
-3. Configuration
-----------------
-
-uvesafb can be compiled either as a module, or directly into the kernel.
-In both cases it supports the same set of configuration options, which
-are either given on the kernel command line or as module parameters, e.g.:
-
- video=uvesafb:1024x768-32,mtrr:3,ywrap (compiled into the kernel)
-
- # modprobe uvesafb mode_option=1024x768-32 mtrr=3 scroll=ywrap  (module)
-
-Accepted options:
-
-ypan    Enable display panning using the VESA protected mode
-        interface.  The visible screen is just a window of the
-        video memory, console scrolling is done by changing the
-        start of the window.  This option is available on x86
-        only and is the default option on that architecture.
-
-ywrap   Same as ypan, but assumes your gfx board can wrap-around
-        the video memory (i.e. starts reading from top if it
-        reaches the end of video memory).  Faster than ypan.
-        Available on x86 only.
-
-redraw  Scroll by redrawing the affected part of the screen, this
-        is the default on non-x86.
-
-(If you're using uvesafb as a module, the above three options are
- used a parameter of the scroll option, e.g. scroll=ypan.)
-
-vgapal  Use the standard VGA registers for palette changes.
-
-pmipal  Use the protected mode interface for palette changes.
-        This is the default if the protected mode interface is
-        available.  Available on x86 only.
-
-mtrr:n  Setup memory type range registers for the framebuffer
-        where n:
-              0 - disabled (equivalent to nomtrr)
-              3 - write-combining (default)
-
-	Values other than 0 and 3 will result in a warning and will be
-	treated just like 3.
-
-nomtrr  Do not use memory type range registers.
-
-vremap:n
-        Remap 'n' MiB of video RAM.  If 0 or not specified, remap memory
-        according to video mode.
-
-vtotal:n
-        If the video BIOS of your card incorrectly determines the total
-        amount of video RAM, use this option to override the BIOS (in MiB).
-
-<mode>  The mode you want to set, in the standard modedb format.  Refer to
-        modedb.txt for a detailed description.  When uvesafb is compiled as
-        a module, the mode string should be provided as a value of the
-        'mode_option' option.
-
-vbemode:x
-        Force the use of VBE mode x.  The mode will only be set if it's
-        found in the VBE-provided list of supported modes.
-        NOTE: The mode number 'x' should be specified in VESA mode number
-        notation, not the Linux kernel one (eg. 257 instead of 769).
-        HINT: If you use this option because normal <mode> parameter does
-        not work for you and you use a X server, you'll probably want to
-        set the 'nocrtc' option to ensure that the video mode is properly
-        restored after console <-> X switches.
-
-nocrtc  Do not use CRTC timings while setting the video mode.  This option
-        has any effect only if the Video BIOS is VBE 3.0 compliant.  Use it
-        if you have problems with modes set the standard way.  Note that
-        using this option implies that any refresh rate adjustments will
-        be ignored and the refresh rate will stay at your BIOS default (60 Hz).
-
-noedid  Do not try to fetch and use EDID-provided modes.
-
-noblank Disable hardware blanking.
-
-v86d:path
-        Set path to the v86d executable. This option is only available as
-        a module parameter, and not as a part of the video= string.  If you
-        need to use it and have uvesafb built into the kernel, use
-        uvesafb.v86d="path".
-
-Additionally, the following parameters may be provided.  They all override the
-EDID-provided values and BIOS defaults.  Refer to your monitor's specs to get
-the correct values for maxhf, maxvf and maxclk for your hardware.
-
-maxhf:n     Maximum horizontal frequency (in kHz).
-maxvf:n     Maximum vertical frequency (in Hz).
-maxclk:n    Maximum pixel clock (in MHz).
-
-4. The sysfs interface
-----------------------
-
-uvesafb provides several sysfs nodes for configurable parameters and
-additional information.
-
-Driver attributes:
-
-/sys/bus/platform/drivers/uvesafb
-  - v86d (default: /sbin/v86d)
-    Path to the v86d executable. v86d is started by uvesafb
-    if an instance of the daemon isn't already running.
-
-Device attributes:
-
-/sys/bus/platform/drivers/uvesafb/uvesafb.0
-  - nocrtc
-    Use the default refresh rate (60 Hz) if set to 1.
-
-  - oem_product_name
-  - oem_product_rev
-  - oem_string
-  - oem_vendor
-    Information about the card and its maker.
-
-  - vbe_modes
-    A list of video modes supported by the Video BIOS along with their
-    VBE mode numbers in hex.
-
-  - vbe_version
-    A BCD value indicating the implemented VBE standard.
-
-5. Miscellaneous
-----------------
-
-Uvesafb will set a video mode with the default refresh rate and timings
-from the Video BIOS if you set pixclock to 0 in fb_var_screeninfo.
-
-
---
- Michal Januszewski <spock@gentoo.org>
- Last updated: 2017-10-10
-
- Documentation of the uvesafb options is loosely based on vesafb.txt.
-
diff --git a/Documentation/fb/vesafb.rst b/Documentation/fb/vesafb.rst
new file mode 100644
index 000000000000..2ed0dfb661cf
--- /dev/null
+++ b/Documentation/fb/vesafb.rst
@@ -0,0 +1,192 @@
+===============
+What is vesafb?
+===============
+
+This is a generic driver for a graphic framebuffer on intel boxes.
+
+The idea is simple:  Turn on graphics mode at boot time with the help
+of the BIOS, and use this as framebuffer device /dev/fb0, like the m68k
+(and other) ports do.
+
+This means we decide at boot time whenever we want to run in text or
+graphics mode.  Switching mode later on (in protected mode) is
+impossible; BIOS calls work in real mode only.  VESA BIOS Extensions
+Version 2.0 are required, because we need a linear frame buffer.
+
+Advantages:
+
+ * It provides a nice large console (128 cols + 48 lines with 1024x768)
+   without using tiny, unreadable fonts.
+ * You can run XF68_FBDev on top of /dev/fb0 (=> non-accelerated X11
+   support for every VBE 2.0 compliant graphics board).
+ * Most important: boot logo :-)
+
+Disadvantages:
+
+ * graphic mode is slower than text mode...
+
+
+How to use it?
+==============
+
+Switching modes is done using the vga=... boot parameter.  Read
+Documentation/svga.txt for details.
+
+You should compile in both vgacon (for text mode) and vesafb (for
+graphics mode). Which of them takes over the console depends on
+whenever the specified mode is text or graphics.
+
+The graphic modes are NOT in the list which you get if you boot with
+vga=ask and hit return. The mode you wish to use is derived from the
+VESA mode number. Here are those VESA mode numbers:
+
+====== =======  =======  ======== =========
+colors 640x480  800x600  1024x768 1280x1024
+====== =======  =======  ======== =========
+256    0x101    0x103    0x105    0x107
+32k    0x110    0x113    0x116    0x119
+64k    0x111    0x114    0x117    0x11A
+16M    0x112    0x115    0x118    0x11B
+====== =======  =======  ======== =========
+
+
+The video mode number of the Linux kernel is the VESA mode number plus
+0x200:
+
+ Linux_kernel_mode_number = VESA_mode_number + 0x200
+
+So the table for the Kernel mode numbers are:
+
+====== =======  =======  ======== =========
+colors 640x480  800x600  1024x768 1280x1024
+====== =======  =======  ======== =========
+256    0x301    0x303    0x305    0x307
+32k    0x310    0x313    0x316    0x319
+64k    0x311    0x314    0x317    0x31A
+16M    0x312    0x315    0x318    0x31B
+====== =======  =======  ======== =========
+
+To enable one of those modes you have to specify "vga=ask" in the
+lilo.conf file and rerun LILO. Then you can type in the desired
+mode at the "vga=ask" prompt. For example if you like to use
+1024x768x256 colors you have to say "305" at this prompt.
+
+If this does not work, this might be because your BIOS does not support
+linear framebuffers or because it does not support this mode at all.
+Even if your board does, it might be the BIOS which does not.  VESA BIOS
+Extensions v2.0 are required, 1.2 is NOT sufficient.  You will get a
+"bad mode number" message if something goes wrong.
+
+1. Note: LILO cannot handle hex, for booting directly with
+   "vga=mode-number" you have to transform the numbers to decimal.
+2. Note: Some newer versions of LILO appear to work with those hex values,
+   if you set the 0x in front of the numbers.
+
+X11
+===
+
+XF68_FBDev should work just fine, but it is non-accelerated.  Running
+another (accelerated) X-Server like XF86_SVGA might or might not work.
+It depends on X-Server and graphics board.
+
+The X-Server must restore the video mode correctly, else you end up
+with a broken console (and vesafb cannot do anything about this).
+
+
+Refresh rates
+=============
+
+There is no way to change the vesafb video mode and/or timings after
+booting linux.  If you are not happy with the 60 Hz refresh rate, you
+have these options:
+
+ * configure and load the DOS-Tools for the graphics board (if
+   available) and boot linux with loadlin.
+ * use a native driver (matroxfb/atyfb) instead if vesafb.  If none
+   is available, write a new one!
+ * VBE 3.0 might work too.  I have neither a gfx board with VBE 3.0
+   support nor the specs, so I have not checked this yet.
+
+
+Configuration
+=============
+
+The VESA BIOS provides protected mode interface for changing
+some parameters.  vesafb can use it for palette changes and
+to pan the display.  It is turned off by default because it
+seems not to work with some BIOS versions, but there are options
+to turn it on.
+
+You can pass options to vesafb using "video=vesafb:option" on
+the kernel command line.  Multiple options should be separated
+by comma, like this: "video=vesafb:ypan,inverse"
+
+Accepted options:
+
+inverse	use inverse color map
+
+========= ======================================================================
+ypan	  enable display panning using the VESA protected mode
+          interface.  The visible screen is just a window of the
+          video memory, console scrolling is done by changing the
+          start of the window.
+
+          pro:
+
+                * scrolling (fullscreen) is fast, because there is
+		  no need to copy around data.
+		* You'll get scrollback (the Shift-PgUp thing),
+		  the video memory can be used as scrollback buffer
+
+          kontra:
+
+		* scrolling only parts of the screen causes some
+		  ugly flicker effects (boot logo flickers for
+		  example).
+
+ywrap	  Same as ypan, but assumes your gfx board can wrap-around
+          the video memory (i.e. starts reading from top if it
+          reaches the end of video memory).  Faster than ypan.
+
+redraw	  Scroll by redrawing the affected part of the screen, this
+          is the safe (and slow) default.
+
+
+vgapal	  Use the standard vga registers for palette changes.
+          This is the default.
+pmipal    Use the protected mode interface for palette changes.
+
+mtrr:n	  Setup memory type range registers for the vesafb framebuffer
+          where n:
+
+              - 0 - disabled (equivalent to nomtrr) (default)
+              - 1 - uncachable
+              - 2 - write-back
+              - 3 - write-combining
+              - 4 - write-through
+
+          If you see the following in dmesg, choose the type that matches the
+          old one. In this example, use "mtrr:2".
+...
+mtrr:     type mismatch for e0000000,8000000 old: write-back new:
+	  write-combining
+...
+
+nomtrr    disable mtrr
+
+vremap:n
+          Remap 'n' MiB of video RAM. If 0 or not specified, remap memory
+          according to video mode. (2.5.66 patch/idea by Antonino Daplas
+          reversed to give override possibility (allocate more fb memory
+          than the kernel would) to 2.4 by tmb@iki.fi)
+
+vtotal:n  If the video BIOS of your card incorrectly determines the total
+          amount of video RAM, use this option to override the BIOS (in MiB).
+========= ======================================================================
+
+Have fun!
+
+Gerd Knorr <kraxel@goldbach.in-berlin.de>
+
+Minor (mostly typo) changes
+by Nico Schmoigl <schmoigl@rumms.uni-mannheim.de>
diff --git a/Documentation/fb/vesafb.txt b/Documentation/fb/vesafb.txt
deleted file mode 100644
index 413bb73235be..000000000000
--- a/Documentation/fb/vesafb.txt
+++ /dev/null
@@ -1,181 +0,0 @@
-
-What is vesafb?
-===============
-
-This is a generic driver for a graphic framebuffer on intel boxes.
-
-The idea is simple:  Turn on graphics mode at boot time with the help
-of the BIOS, and use this as framebuffer device /dev/fb0, like the m68k
-(and other) ports do.
-
-This means we decide at boot time whenever we want to run in text or
-graphics mode.  Switching mode later on (in protected mode) is
-impossible; BIOS calls work in real mode only.  VESA BIOS Extensions
-Version 2.0 are required, because we need a linear frame buffer.
-
-Advantages:
-
- * It provides a nice large console (128 cols + 48 lines with 1024x768)
-   without using tiny, unreadable fonts.
- * You can run XF68_FBDev on top of /dev/fb0 (=> non-accelerated X11
-   support for every VBE 2.0 compliant graphics board).
- * Most important: boot logo :-)
-
-Disadvantages:
-
- * graphic mode is slower than text mode...
-
-
-How to use it?
-==============
-
-Switching modes is done using the vga=... boot parameter.  Read
-Documentation/svga.txt for details.
-
-You should compile in both vgacon (for text mode) and vesafb (for
-graphics mode). Which of them takes over the console depends on
-whenever the specified mode is text or graphics.
-
-The graphic modes are NOT in the list which you get if you boot with
-vga=ask and hit return. The mode you wish to use is derived from the
-VESA mode number. Here are those VESA mode numbers:
-
-    | 640x480  800x600  1024x768 1280x1024
-----+-------------------------------------
-256 |  0x101    0x103    0x105    0x107   
-32k |  0x110    0x113    0x116    0x119   
-64k |  0x111    0x114    0x117    0x11A   
-16M |  0x112    0x115    0x118    0x11B   
-
-The video mode number of the Linux kernel is the VESA mode number plus
-0x200.
- 
- Linux_kernel_mode_number = VESA_mode_number + 0x200
-
-So the table for the Kernel mode numbers are:
-
-    | 640x480  800x600  1024x768 1280x1024
-----+-------------------------------------
-256 |  0x301    0x303    0x305    0x307   
-32k |  0x310    0x313    0x316    0x319   
-64k |  0x311    0x314    0x317    0x31A   
-16M |  0x312    0x315    0x318    0x31B   
-
-To enable one of those modes you have to specify "vga=ask" in the
-lilo.conf file and rerun LILO. Then you can type in the desired
-mode at the "vga=ask" prompt. For example if you like to use 
-1024x768x256 colors you have to say "305" at this prompt.
-
-If this does not work, this might be because your BIOS does not support
-linear framebuffers or because it does not support this mode at all.
-Even if your board does, it might be the BIOS which does not.  VESA BIOS
-Extensions v2.0 are required, 1.2 is NOT sufficient.  You will get a
-"bad mode number" message if something goes wrong.
-
-1. Note: LILO cannot handle hex, for booting directly with 
-         "vga=mode-number" you have to transform the numbers to decimal.
-2. Note: Some newer versions of LILO appear to work with those hex values,
-         if you set the 0x in front of the numbers.
-
-X11
-===
-
-XF68_FBDev should work just fine, but it is non-accelerated.  Running
-another (accelerated) X-Server like XF86_SVGA might or might not work.
-It depends on X-Server and graphics board.
-
-The X-Server must restore the video mode correctly, else you end up
-with a broken console (and vesafb cannot do anything about this).
-
-
-Refresh rates
-=============
-
-There is no way to change the vesafb video mode and/or timings after
-booting linux.  If you are not happy with the 60 Hz refresh rate, you
-have these options:
-
- * configure and load the DOS-Tools for the graphics board (if
-   available) and boot linux with loadlin.
- * use a native driver (matroxfb/atyfb) instead if vesafb.  If none
-   is available, write a new one!
- * VBE 3.0 might work too.  I have neither a gfx board with VBE 3.0
-   support nor the specs, so I have not checked this yet.
-
-
-Configuration
-=============
-
-The VESA BIOS provides protected mode interface for changing
-some parameters.  vesafb can use it for palette changes and
-to pan the display.  It is turned off by default because it
-seems not to work with some BIOS versions, but there are options
-to turn it on.
-
-You can pass options to vesafb using "video=vesafb:option" on
-the kernel command line.  Multiple options should be separated
-by comma, like this: "video=vesafb:ypan,inverse"
-
-Accepted options:
-
-inverse	use inverse color map
-
-ypan	enable display panning using the VESA protected mode 
-	interface.  The visible screen is just a window of the
-	video memory, console scrolling is done by changing the
-	start of the window.
-	pro:	* scrolling (fullscreen) is fast, because there is
-		  no need to copy around data.
-		* You'll get scrollback (the Shift-PgUp thing),
-		  the video memory can be used as scrollback buffer
-	kontra: * scrolling only parts of the screen causes some
-		  ugly flicker effects (boot logo flickers for
-		  example).
-
-ywrap	Same as ypan, but assumes your gfx board can wrap-around 
-	the video memory (i.e. starts reading from top if it
-	reaches the end of video memory).  Faster than ypan.
-
-redraw	scroll by redrawing the affected part of the screen, this
-	is the safe (and slow) default.
-
-
-vgapal	Use the standard vga registers for palette changes.
-	This is the default.
-pmipal	Use the protected mode interface for palette changes.
-
-mtrr:n	setup memory type range registers for the vesafb framebuffer
-	where n:
-	      0 - disabled (equivalent to nomtrr) (default)
-	      1 - uncachable
-	      2 - write-back
-	      3 - write-combining
-	      4 - write-through
-
-	If you see the following in dmesg, choose the type that matches the
-	old one. In this example, use "mtrr:2".
-...
-mtrr: type mismatch for e0000000,8000000 old: write-back new: write-combining
-...
-
-nomtrr  disable mtrr
-
-vremap:n
-        remap 'n' MiB of video RAM. If 0 or not specified, remap memory
-	according to video mode. (2.5.66 patch/idea by Antonino Daplas
-	reversed to give override possibility (allocate more fb memory
-	than the kernel would) to 2.4 by tmb@iki.fi)
-
-vtotal:n
-        if the video BIOS of your card incorrectly determines the total
-        amount of video RAM, use this option to override the BIOS (in MiB).
-
-Have fun!
-
-  Gerd
-
---
-Gerd Knorr <kraxel@goldbach.in-berlin.de>
-
-Minor (mostly typo) changes 
-by Nico Schmoigl <schmoigl@rumms.uni-mannheim.de>
diff --git a/Documentation/fb/viafb.rst b/Documentation/fb/viafb.rst
new file mode 100644
index 000000000000..8eb7a3bb068c
--- /dev/null
+++ b/Documentation/fb/viafb.rst
@@ -0,0 +1,297 @@
+=======================================================
+VIA Integration Graphic Chip Console Framebuffer Driver
+=======================================================
+
+Platform
+--------
+    The console framebuffer driver is for graphics chips of
+    VIA UniChrome Family
+    (CLE266, PM800 / CN400 / CN300,
+    P4M800CE / P4M800Pro / CN700 / VN800,
+    CX700 / VX700, K8M890, P4M890,
+    CN896 / P4M900, VX800, VX855)
+
+Driver features
+---------------
+    Device: CRT, LCD, DVI
+
+    Support viafb_mode::
+
+	CRT:
+	    640x480(60, 75, 85, 100, 120 Hz), 720x480(60 Hz),
+	    720x576(60 Hz), 800x600(60, 75, 85, 100, 120 Hz),
+	    848x480(60 Hz), 856x480(60 Hz), 1024x512(60 Hz),
+	    1024x768(60, 75, 85, 100 Hz), 1152x864(75 Hz),
+	    1280x768(60 Hz), 1280x960(60 Hz), 1280x1024(60, 75, 85 Hz),
+	    1440x1050(60 Hz), 1600x1200(60, 75 Hz), 1280x720(60 Hz),
+	    1920x1080(60 Hz), 1400x1050(60 Hz), 800x480(60 Hz)
+
+    color depth: 8 bpp, 16 bpp, 32 bpp supports.
+
+    Support 2D hardware accelerator.
+
+Using the viafb module
+----------------------
+    Start viafb with default settings::
+
+	#modprobe viafb
+
+    Start viafb with user options::
+
+	#modprobe viafb viafb_mode=800x600 viafb_bpp=16 viafb_refresh=60
+		  viafb_active_dev=CRT+DVI viafb_dvi_port=DVP1
+		  viafb_mode1=1024x768 viafb_bpp=16 viafb_refresh1=60
+		  viafb_SAMM_ON=1
+
+    viafb_mode:
+	- 640x480 (default)
+	- 720x480
+	- 800x600
+	- 1024x768
+
+    viafb_bpp:
+	- 8, 16, 32 (default:32)
+
+    viafb_refresh:
+	- 60, 75, 85, 100, 120 (default:60)
+
+    viafb_lcd_dsp_method:
+	- 0 : expansion (default)
+	- 1 : centering
+
+    viafb_lcd_mode:
+	0 : LCD panel with LSB data format input (default)
+	1 : LCD panel with MSB data format input
+
+    viafb_lcd_panel_id:
+	- 0 : Resolution: 640x480, Channel: single, Dithering: Enable
+	- 1 : Resolution: 800x600, Channel: single, Dithering: Enable
+	- 2 : Resolution: 1024x768, Channel: single, Dithering: Enable (default)
+	- 3 : Resolution: 1280x768, Channel: single, Dithering: Enable
+	- 4 : Resolution: 1280x1024, Channel: dual, Dithering: Enable
+	- 5 : Resolution: 1400x1050, Channel: dual, Dithering: Enable
+	- 6 : Resolution: 1600x1200, Channel: dual, Dithering: Enable
+
+	- 8 : Resolution: 800x480, Channel: single, Dithering: Enable
+	- 9 : Resolution: 1024x768, Channel: dual, Dithering: Enable
+	- 10: Resolution: 1024x768, Channel: single, Dithering: Disable
+	- 11: Resolution: 1024x768, Channel: dual, Dithering: Disable
+	- 12: Resolution: 1280x768, Channel: single, Dithering: Disable
+	- 13: Resolution: 1280x1024, Channel: dual, Dithering: Disable
+	- 14: Resolution: 1400x1050, Channel: dual, Dithering: Disable
+	- 15: Resolution: 1600x1200, Channel: dual, Dithering: Disable
+	- 16: Resolution: 1366x768, Channel: single, Dithering: Disable
+	- 17: Resolution: 1024x600, Channel: single, Dithering: Enable
+	- 18: Resolution: 1280x768, Channel: dual, Dithering: Enable
+	- 19: Resolution: 1280x800, Channel: single, Dithering: Enable
+
+    viafb_accel:
+	- 0 : No 2D Hardware Acceleration
+	- 1 : 2D Hardware Acceleration (default)
+
+    viafb_SAMM_ON:
+	- 0 : viafb_SAMM_ON disable (default)
+	- 1 : viafb_SAMM_ON enable
+
+    viafb_mode1: (secondary display device)
+	- 640x480 (default)
+	- 720x480
+	- 800x600
+	- 1024x768
+
+    viafb_bpp1: (secondary display device)
+	- 8, 16, 32 (default:32)
+
+    viafb_refresh1: (secondary display device)
+	- 60, 75, 85, 100, 120 (default:60)
+
+    viafb_active_dev:
+	This option is used to specify active devices.(CRT, DVI, CRT+LCD...)
+	DVI stands for DVI or HDMI, E.g., If you want to enable HDMI,
+	set viafb_active_dev=DVI. In SAMM case, the previous of
+	viafb_active_dev is primary device, and the following is
+	secondary device.
+
+	For example:
+
+	To enable one device, such as DVI only, we can use::
+
+	    modprobe viafb viafb_active_dev=DVI
+
+	To enable two devices, such as CRT+DVI::
+
+	    modprobe viafb viafb_active_dev=CRT+DVI;
+
+	For DuoView case, we can use::
+
+	    modprobe viafb viafb_active_dev=CRT+DVI
+
+	OR::
+
+	    modprobe viafb viafb_active_dev=DVI+CRT...
+
+	For SAMM case:
+
+	If CRT is primary and DVI is secondary, we should use::
+
+	    modprobe viafb viafb_active_dev=CRT+DVI viafb_SAMM_ON=1...
+
+	If DVI is primary and CRT is secondary, we should use::
+
+	    modprobe viafb viafb_active_dev=DVI+CRT viafb_SAMM_ON=1...
+
+    viafb_display_hardware_layout:
+	This option is used to specify display hardware layout for CX700 chip.
+
+	- 1 : LCD only
+	- 2 : DVI only
+	- 3 : LCD+DVI (default)
+	- 4 : LCD1+LCD2 (internal + internal)
+	- 16: LCD1+ExternalLCD2 (internal + external)
+
+    viafb_second_size:
+	This option is used to set second device memory size(MB) in SAMM case.
+	The minimal size is 16.
+
+    viafb_platform_epia_dvi:
+	This option is used to enable DVI on EPIA - M
+
+	- 0 : No DVI on EPIA - M (default)
+	- 1 : DVI on EPIA - M
+
+    viafb_bus_width:
+	When using 24 - Bit Bus Width Digital Interface,
+	this option should be set.
+
+	- 12: 12-Bit LVDS or 12-Bit TMDS (default)
+	- 24: 24-Bit LVDS or 24-Bit TMDS
+
+    viafb_device_lcd_dualedge:
+	When using Dual Edge Panel, this option should be set.
+
+	- 0 : No Dual Edge Panel (default)
+	- 1 : Dual Edge Panel
+
+    viafb_lcd_port:
+	This option is used to specify LCD output port,
+	available values are "DVP0" "DVP1" "DFP_HIGHLOW" "DFP_HIGH" "DFP_LOW".
+
+	for external LCD + external DVI on CX700(External LCD is on DVP0),
+	we should use::
+
+	    modprobe viafb viafb_lcd_port=DVP0...
+
+Notes:
+    1. CRT may not display properly for DuoView CRT & DVI display at
+       the "640x480" PAL mode with DVI overscan enabled.
+    2. SAMM stands for single adapter multi monitors. It is different from
+       multi-head since SAMM support multi monitor at driver layers, thus fbcon
+       layer doesn't even know about it; SAMM's second screen doesn't have a
+       device node file, thus a user mode application can't access it directly.
+       When SAMM is enabled, viafb_mode and viafb_mode1, viafb_bpp and
+       viafb_bpp1, viafb_refresh and viafb_refresh1 can be different.
+    3. When console is depending on viafbinfo1, dynamically change resolution
+       and bpp, need to call VIAFB specified ioctl interface VIAFB_SET_DEVICE
+       instead of calling common ioctl function FBIOPUT_VSCREENINFO since
+       viafb doesn't support multi-head well, or it will cause screen crush.
+
+
+Configure viafb with "fbset" tool
+---------------------------------
+
+    "fbset" is an inbox utility of Linux.
+
+    1. Inquire current viafb information, type::
+
+	   # fbset -i
+
+    2. Set various resolutions and viafb_refresh rates::
+
+	   # fbset <resolution-vertical_sync>
+
+       example::
+
+	   # fbset "1024x768-75"
+
+       or::
+
+	   # fbset -g 1024 768 1024 768 32
+
+       Check the file "/etc/fb.modes" to find display modes available.
+
+    3. Set the color depth::
+
+	   # fbset -depth <value>
+
+       example::
+
+	   # fbset -depth 16
+
+
+Configure viafb via /proc
+-------------------------
+    The following files exist in /proc/viafb
+
+    supported_output_devices
+	This read-only file contains a full ',' separated list containing all
+	output devices that could be available on your platform. It is likely
+	that not all of those have a connector on your hardware but it should
+	provide a good starting point to figure out which of those names match
+	a real connector.
+
+	Example::
+
+		# cat /proc/viafb/supported_output_devices
+
+    iga1/output_devices, iga2/output_devices
+	These two files are readable and writable. iga1 and iga2 are the two
+	independent units that produce the screen image. Those images can be
+	forwarded to one or more output devices. Reading those files is a way
+	to query which output devices are currently used by an iga.
+
+	Example::
+
+		# cat /proc/viafb/iga1/output_devices
+
+	If there are no output devices printed the output of this iga is lost.
+	This can happen for example if only one (the other) iga is used.
+	Writing to these files allows adjusting the output devices during
+	runtime. One can add new devices, remove existing ones or switch
+	between igas. Essentially you can write a ',' separated list of device
+	names (or a single one) in the same format as the output to those
+	files. You can add a '+' or '-' as a prefix allowing simple addition
+	and removal of devices. So a prefix '+' adds the devices from your list
+	to the already existing ones, '-' removes the listed devices from the
+	existing ones and if no prefix is given it replaces all existing ones
+	with the listed ones. If you remove devices they are expected to turn
+	off. If you add devices that are already part of the other iga they are
+	removed there and added to the new one.
+
+	Examples:
+
+	Add CRT as output device to iga1::
+
+		# echo +CRT > /proc/viafb/iga1/output_devices
+
+	Remove (turn off) DVP1 and LVDS1 as output devices of iga2::
+
+		# echo -DVP1,LVDS1 > /proc/viafb/iga2/output_devices
+
+	Replace all iga1 output devices by CRT::
+
+		# echo CRT > /proc/viafb/iga1/output_devices
+
+
+Bootup with viafb
+-----------------
+
+Add the following line to your grub.conf::
+
+    append = "video=viafb:viafb_mode=1024x768,viafb_bpp=32,viafb_refresh=85"
+
+
+VIA Framebuffer modes
+=====================
+
+.. include:: viafb.modes
+   :literal:
diff --git a/Documentation/fb/viafb.txt b/Documentation/fb/viafb.txt
deleted file mode 100644
index 1cb2462a71ce..000000000000
--- a/Documentation/fb/viafb.txt
+++ /dev/null
@@ -1,252 +0,0 @@
-
-        VIA Integration Graphic Chip Console Framebuffer Driver
-
-[Platform]
------------------------
-    The console framebuffer driver is for graphics chips of
-    VIA UniChrome Family(CLE266, PM800 / CN400 / CN300,
-                        P4M800CE / P4M800Pro / CN700 / VN800,
-                        CX700 / VX700, K8M890, P4M890,
-                        CN896 / P4M900, VX800, VX855)
-
-[Driver features]
-------------------------
-    Device: CRT, LCD, DVI
-
-    Support viafb_mode:
-        CRT:
-            640x480(60, 75, 85, 100, 120 Hz), 720x480(60 Hz),
-            720x576(60 Hz), 800x600(60, 75, 85, 100, 120 Hz),
-            848x480(60 Hz), 856x480(60 Hz), 1024x512(60 Hz),
-            1024x768(60, 75, 85, 100 Hz), 1152x864(75 Hz),
-            1280x768(60 Hz), 1280x960(60 Hz), 1280x1024(60, 75, 85 Hz),
-            1440x1050(60 Hz), 1600x1200(60, 75 Hz), 1280x720(60 Hz),
-            1920x1080(60 Hz), 1400x1050(60 Hz), 800x480(60 Hz)
-
-    color depth: 8 bpp, 16 bpp, 32 bpp supports.
-
-    Support 2D hardware accelerator.
-
-[Using the viafb module]
--- -- --------------------
-    Start viafb with default settings:
-        #modprobe viafb
-
-    Start viafb with user options:
-        #modprobe viafb viafb_mode=800x600 viafb_bpp=16 viafb_refresh=60
-                  viafb_active_dev=CRT+DVI viafb_dvi_port=DVP1
-                  viafb_mode1=1024x768 viafb_bpp=16 viafb_refresh1=60
-                  viafb_SAMM_ON=1
-
-    viafb_mode:
-        640x480 (default)
-        720x480
-        800x600
-        1024x768
-        ......
-
-    viafb_bpp:
-        8, 16, 32 (default:32)
-
-    viafb_refresh:
-        60, 75, 85, 100, 120 (default:60)
-
-    viafb_lcd_dsp_method:
-        0 : expansion (default)
-        1 : centering
-
-    viafb_lcd_mode:
-        0 : LCD panel with LSB data format input (default)
-        1 : LCD panel with MSB data format input
-
-    viafb_lcd_panel_id:
-        0 : Resolution: 640x480, Channel: single, Dithering: Enable
-        1 : Resolution: 800x600, Channel: single, Dithering: Enable
-        2 : Resolution: 1024x768, Channel: single, Dithering: Enable (default)
-        3 : Resolution: 1280x768, Channel: single, Dithering: Enable
-        4 : Resolution: 1280x1024, Channel: dual, Dithering: Enable
-        5 : Resolution: 1400x1050, Channel: dual, Dithering: Enable
-        6 : Resolution: 1600x1200, Channel: dual, Dithering: Enable
-
-        8 : Resolution: 800x480, Channel: single, Dithering: Enable
-        9 : Resolution: 1024x768, Channel: dual, Dithering: Enable
-        10: Resolution: 1024x768, Channel: single, Dithering: Disable
-        11: Resolution: 1024x768, Channel: dual, Dithering: Disable
-        12: Resolution: 1280x768, Channel: single, Dithering: Disable
-        13: Resolution: 1280x1024, Channel: dual, Dithering: Disable
-        14: Resolution: 1400x1050, Channel: dual, Dithering: Disable
-        15: Resolution: 1600x1200, Channel: dual, Dithering: Disable
-        16: Resolution: 1366x768, Channel: single, Dithering: Disable
-        17: Resolution: 1024x600, Channel: single, Dithering: Enable
-        18: Resolution: 1280x768, Channel: dual, Dithering: Enable
-        19: Resolution: 1280x800, Channel: single, Dithering: Enable
-
-    viafb_accel:
-        0 : No 2D Hardware Acceleration
-        1 : 2D Hardware Acceleration (default)
-
-    viafb_SAMM_ON:
-        0 : viafb_SAMM_ON disable (default)
-        1 : viafb_SAMM_ON enable
-
-    viafb_mode1: (secondary display device)
-        640x480 (default)
-        720x480
-        800x600
-        1024x768
-        ... ...
-
-    viafb_bpp1: (secondary display device)
-        8, 16, 32 (default:32)
-
-    viafb_refresh1: (secondary display device)
-        60, 75, 85, 100, 120 (default:60)
-
-    viafb_active_dev:
-        This option is used to specify active devices.(CRT, DVI, CRT+LCD...)
-        DVI stands for DVI or HDMI, E.g., If you want to enable HDMI,
-        set viafb_active_dev=DVI. In SAMM case, the previous of
-        viafb_active_dev is primary device, and the following is
-        secondary device.
-
-        For example:
-        To enable one device, such as DVI only, we can use:
-            modprobe viafb viafb_active_dev=DVI
-        To enable two devices, such as CRT+DVI:
-            modprobe viafb viafb_active_dev=CRT+DVI;
-
-        For DuoView case, we can use:
-            modprobe viafb viafb_active_dev=CRT+DVI
-            OR
-            modprobe viafb viafb_active_dev=DVI+CRT...
-
-        For SAMM case:
-        If CRT is primary and DVI is secondary, we should use:
-            modprobe viafb viafb_active_dev=CRT+DVI viafb_SAMM_ON=1...
-        If DVI is primary and CRT is secondary, we should use:
-            modprobe viafb viafb_active_dev=DVI+CRT viafb_SAMM_ON=1...
-
-    viafb_display_hardware_layout:
-        This option is used to specify display hardware layout for CX700 chip.
-        1 : LCD only
-        2 : DVI only
-        3 : LCD+DVI (default)
-        4 : LCD1+LCD2 (internal + internal)
-        16: LCD1+ExternalLCD2 (internal + external)
-
-    viafb_second_size:
-        This option is used to set second device memory size(MB) in SAMM case.
-        The minimal size is 16.
-
-    viafb_platform_epia_dvi:
-        This option is used to enable DVI on EPIA - M
-        0 : No DVI on EPIA - M (default)
-        1 : DVI on EPIA - M
-
-    viafb_bus_width:
-        When using 24 - Bit Bus Width Digital Interface,
-        this option should be set.
-        12: 12-Bit LVDS or 12-Bit TMDS (default)
-        24: 24-Bit LVDS or 24-Bit TMDS
-
-    viafb_device_lcd_dualedge:
-        When using Dual Edge Panel, this option should be set.
-        0 : No Dual Edge Panel (default)
-        1 : Dual Edge Panel
-
-    viafb_lcd_port:
-        This option is used to specify LCD output port,
-        available values are "DVP0" "DVP1" "DFP_HIGHLOW" "DFP_HIGH" "DFP_LOW".
-        for external LCD + external DVI on CX700(External LCD is on DVP0),
-        we should use:
-            modprobe viafb viafb_lcd_port=DVP0...
-
-Notes:
-    1. CRT may not display properly for DuoView CRT & DVI display at
-       the "640x480" PAL mode with DVI overscan enabled.
-    2. SAMM stands for single adapter multi monitors. It is different from
-       multi-head since SAMM support multi monitor at driver layers, thus fbcon
-       layer doesn't even know about it; SAMM's second screen doesn't have a
-       device node file, thus a user mode application can't access it directly.
-       When SAMM is enabled, viafb_mode and viafb_mode1, viafb_bpp and
-       viafb_bpp1, viafb_refresh and viafb_refresh1 can be different.
-    3. When console is depending on viafbinfo1, dynamically change resolution
-       and bpp, need to call VIAFB specified ioctl interface VIAFB_SET_DEVICE
-       instead of calling common ioctl function FBIOPUT_VSCREENINFO since
-       viafb doesn't support multi-head well, or it will cause screen crush.
-
-
-[Configure viafb with "fbset" tool]
------------------------------------
-    "fbset" is an inbox utility of Linux.
-    1. Inquire current viafb information, type,
-           # fbset -i
-
-    2. Set various resolutions and viafb_refresh rates,
-           # fbset <resolution-vertical_sync>
-
-       example,
-           # fbset "1024x768-75"
-       or
-           # fbset -g 1024 768 1024 768 32
-       Check the file "/etc/fb.modes" to find display modes available.
-
-    3. Set the color depth,
-           # fbset -depth <value>
-
-       example,
-           # fbset -depth 16
-
-
-[Configure viafb via /proc]
----------------------------
-    The following files exist in /proc/viafb
-
-    supported_output_devices
-
-        This read-only file contains a full ',' separated list containing all
-        output devices that could be available on your platform. It is likely
-        that not all of those have a connector on your hardware but it should
-        provide a good starting point to figure out which of those names match
-        a real connector.
-        Example:
-        # cat /proc/viafb/supported_output_devices
-
-    iga1/output_devices
-    iga2/output_devices
-
-        These two files are readable and writable. iga1 and iga2 are the two
-        independent units that produce the screen image. Those images can be
-        forwarded to one or more output devices. Reading those files is a way
-        to query which output devices are currently used by an iga.
-        Example:
-        # cat /proc/viafb/iga1/output_devices
-        If there are no output devices printed the output of this iga is lost.
-        This can happen for example if only one (the other) iga is used.
-        Writing to these files allows adjusting the output devices during
-        runtime. One can add new devices, remove existing ones or switch
-        between igas. Essentially you can write a ',' separated list of device
-        names (or a single one) in the same format as the output to those
-        files. You can add a '+' or '-' as a prefix allowing simple addition
-        and removal of devices. So a prefix '+' adds the devices from your list
-        to the already existing ones, '-' removes the listed devices from the
-        existing ones and if no prefix is given it replaces all existing ones
-        with the listed ones. If you remove devices they are expected to turn
-        off. If you add devices that are already part of the other iga they are
-        removed there and added to the new one.
-        Examples:
-        Add CRT as output device to iga1
-        # echo +CRT > /proc/viafb/iga1/output_devices
-
-        Remove (turn off) DVP1 and LVDS1 as output devices of iga2
-        # echo -DVP1,LVDS1 > /proc/viafb/iga2/output_devices
-
-        Replace all iga1 output devices by CRT
-        # echo CRT > /proc/viafb/iga1/output_devices
-
-
-[Bootup with viafb]:
---------------------
-    Add the following line to your grub.conf:
-    append = "video=viafb:viafb_mode=1024x768,viafb_bpp=32,viafb_refresh=85"
-
diff --git a/Documentation/fb/vt8623fb.rst b/Documentation/fb/vt8623fb.rst
new file mode 100644
index 000000000000..ba1730937dd8
--- /dev/null
+++ b/Documentation/fb/vt8623fb.rst
@@ -0,0 +1,64 @@
+===============================================================
+vt8623fb - fbdev driver for graphics core in VIA VT8623 chipset
+===============================================================
+
+
+Supported Hardware
+==================
+
+VIA VT8623 [CLE266] chipset and	its graphics core
+(known as CastleRock or Unichrome)
+
+I tested vt8623fb on VIA EPIA ML-6000
+
+
+Supported Features
+==================
+
+	*  4 bpp pseudocolor modes (with 18bit palette, two variants)
+	*  8 bpp pseudocolor mode (with 18bit palette)
+	* 16 bpp truecolor mode (RGB 565)
+	* 32 bpp truecolor mode (RGB 888)
+	* text mode (activated by bpp = 0)
+	* doublescan mode variant (not available in text mode)
+	* panning in both directions
+	* suspend/resume support
+	* DPMS support
+
+Text mode is supported even in higher resolutions, but there is limitation to
+lower pixclocks (maximum about 100 MHz). This limitation is not enforced by
+driver. Text mode supports 8bit wide fonts only (hardware limitation) and
+16bit tall fonts (driver limitation).
+
+There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with
+packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode
+with interleaved planes (1 byte interleave), MSB first. Both modes support
+8bit wide fonts only (driver limitation).
+
+Suspend/resume works on systems that initialize video card during resume and
+if device is active (for example used by fbcon).
+
+
+Missing Features
+================
+(alias TODO list)
+
+	* secondary (not initialized by BIOS) device support
+	* MMIO support
+	* interlaced mode variant
+	* support for fontwidths != 8 in 4 bpp modes
+	* support for fontheight != 16 in text mode
+	* hardware cursor
+	* video overlay support
+	* vsync synchronization
+	* acceleration support (8514-like 2D, busmaster transfers)
+
+
+Known bugs
+==========
+
+	* cursor disable in text mode doesn't work
+
+
+--
+Ondrej Zajicek <santiago@crfreenet.org>
diff --git a/Documentation/fb/vt8623fb.txt b/Documentation/fb/vt8623fb.txt
deleted file mode 100644
index f654576c56b7..000000000000
--- a/Documentation/fb/vt8623fb.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-
-	vt8623fb - fbdev driver for graphics core in VIA VT8623 chipset
-	===============================================================
-
-
-Supported Hardware
-==================
-
-	VIA VT8623 [CLE266] chipset and	its graphics core
-		(known as CastleRock or Unichrome)
-
-I tested vt8623fb on VIA EPIA ML-6000
-
-
-Supported Features
-==================
-
-	*  4 bpp pseudocolor modes (with 18bit palette, two variants)
-	*  8 bpp pseudocolor mode (with 18bit palette)
-	* 16 bpp truecolor mode (RGB 565)
-	* 32 bpp truecolor mode (RGB 888)
-	* text mode (activated by bpp = 0)
-	* doublescan mode variant (not available in text mode)
-	* panning in both directions
-	* suspend/resume support
-	* DPMS support
-
-Text mode is supported even in higher resolutions, but there is limitation to
-lower pixclocks (maximum about 100 MHz). This limitation is not enforced by
-driver. Text mode supports 8bit wide fonts only (hardware limitation) and
-16bit tall fonts (driver limitation).
-
-There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with
-packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode
-with interleaved planes (1 byte interleave), MSB first. Both modes support
-8bit wide fonts only (driver limitation).
-
-Suspend/resume works on systems that initialize video card during resume and
-if device is active (for example used by fbcon).
-
-
-Missing Features
-================
-(alias TODO list)
-
-	* secondary (not initialized by BIOS) device support
-	* MMIO support
-	* interlaced mode variant
-	* support for fontwidths != 8 in 4 bpp modes
-	* support for fontheight != 16 in text mode
-	* hardware cursor
-	* video overlay support
-	* vsync synchronization
-	* acceleration support (8514-like 2D, busmaster transfers)
-
-
-Known bugs
-==========
-
-	* cursor disable in text mode doesn't work
-
-
---
-Ondrej Zajicek <santiago@crfreenet.org>
-- 
cgit 


From c220a1fae6c5df52ed3a02f88b86a27830ea0210 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:46 -0300
Subject: docs: fpga: convert docs to ReST and rename to *.rst

The dfl.txt file is almost there. It needs just a few
adjustments to be properly parsed.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/fpga/dfl.rst   | 291 +++++++++++++++++++++++++++++++++++++++++++
 Documentation/fpga/dfl.txt   | 285 ------------------------------------------
 Documentation/fpga/index.rst |  17 +++
 3 files changed, 308 insertions(+), 285 deletions(-)
 create mode 100644 Documentation/fpga/dfl.rst
 delete mode 100644 Documentation/fpga/dfl.txt
 create mode 100644 Documentation/fpga/index.rst

(limited to 'Documentation')

diff --git a/Documentation/fpga/dfl.rst b/Documentation/fpga/dfl.rst
new file mode 100644
index 000000000000..2f125abd777f
--- /dev/null
+++ b/Documentation/fpga/dfl.rst
@@ -0,0 +1,291 @@
+=================================================
+FPGA Device Feature List (DFL) Framework Overview
+=================================================
+
+Authors:
+
+- Enno Luebbers <enno.luebbers@intel.com>
+- Xiao Guangrong <guangrong.xiao@linux.intel.com>
+- Wu Hao <hao.wu@intel.com>
+
+The Device Feature List (DFL) FPGA framework (and drivers according to this
+this framework) hides the very details of low layer hardwares and provides
+unified interfaces to userspace. Applications could use these interfaces to
+configure, enumerate, open and access FPGA accelerators on platforms which
+implement the DFL in the device memory. Besides this, the DFL framework
+enables system level management functions such as FPGA reconfiguration.
+
+
+Device Feature List (DFL) Overview
+==================================
+Device Feature List (DFL) defines a linked list of feature headers within the
+device MMIO space to provide an extensible way of adding features. Software can
+walk through these predefined data structures to enumerate FPGA features:
+FPGA Interface Unit (FIU), Accelerated Function Unit (AFU) and Private Features,
+as illustrated below::
+
+    Header            Header            Header            Header
+ +----------+  +-->+----------+  +-->+----------+  +-->+----------+
+ |   Type   |  |   |  Type    |  |   |  Type    |  |   |  Type    |
+ |   FIU    |  |   | Private  |  |   | Private  |  |   | Private  |
+ +----------+  |   | Feature  |  |   | Feature  |  |   | Feature  |
+ | Next_DFH |--+   +----------+  |   +----------+  |   +----------+
+ +----------+      | Next_DFH |--+   | Next_DFH |--+   | Next_DFH |--> NULL
+ |    ID    |      +----------+      +----------+      +----------+
+ +----------+      |    ID    |      |    ID    |      |    ID    |
+ | Next_AFU |--+   +----------+      +----------+      +----------+
+ +----------+  |   | Feature  |      | Feature  |      | Feature  |
+ |  Header  |  |   | Register |      | Register |      | Register |
+ | Register |  |   |   Set    |      |   Set    |      |   Set    |
+ |   Set    |  |   +----------+      +----------+      +----------+
+ +----------+  |      Header
+               +-->+----------+
+                   |   Type   |
+                   |   AFU    |
+                   +----------+
+                   | Next_DFH |--> NULL
+                   +----------+
+                   |   GUID   |
+                   +----------+
+                   |  Header  |
+                   | Register |
+                   |   Set    |
+                   +----------+
+
+FPGA Interface Unit (FIU) represents a standalone functional unit for the
+interface to FPGA, e.g. the FPGA Management Engine (FME) and Port (more
+descriptions on FME and Port in later sections).
+
+Accelerated Function Unit (AFU) represents a FPGA programmable region and
+always connects to a FIU (e.g. a Port) as its child as illustrated above.
+
+Private Features represent sub features of the FIU and AFU. They could be
+various function blocks with different IDs, but all private features which
+belong to the same FIU or AFU, must be linked to one list via the Next Device
+Feature Header (Next_DFH) pointer.
+
+Each FIU, AFU and Private Feature could implement its own functional registers.
+The functional register set for FIU and AFU, is named as Header Register Set,
+e.g. FME Header Register Set, and the one for Private Feature, is named as
+Feature Register Set, e.g. FME Partial Reconfiguration Feature Register Set.
+
+This Device Feature List provides a way of linking features together, it's
+convenient for software to locate each feature by walking through this list,
+and can be implemented in register regions of any FPGA device.
+
+
+FIU - FME (FPGA Management Engine)
+==================================
+The FPGA Management Engine performs reconfiguration and other infrastructure
+functions. Each FPGA device only has one FME.
+
+User-space applications can acquire exclusive access to the FME using open(),
+and release it using close().
+
+The following functions are exposed through ioctls:
+
+- Get driver API version (DFL_FPGA_GET_API_VERSION)
+- Check for extensions (DFL_FPGA_CHECK_EXTENSION)
+- Program bitstream (DFL_FPGA_FME_PORT_PR)
+
+More functions are exposed through sysfs
+(/sys/class/fpga_region/regionX/dfl-fme.n/):
+
+ Read bitstream ID (bitstream_id)
+     bitstream_id indicates version of the static FPGA region.
+
+ Read bitstream metadata (bitstream_metadata)
+     bitstream_metadata includes detailed information of static FPGA region,
+     e.g. synthesis date and seed.
+
+ Read number of ports (ports_num)
+     one FPGA device may have more than one port, this sysfs interface indicates
+     how many ports the FPGA device has.
+
+
+FIU - PORT
+==========
+A port represents the interface between the static FPGA fabric and a partially
+reconfigurable region containing an AFU. It controls the communication from SW
+to the accelerator and exposes features such as reset and debug. Each FPGA
+device may have more than one port, but always one AFU per port.
+
+
+AFU
+===
+An AFU is attached to a port FIU and exposes a fixed length MMIO region to be
+used for accelerator-specific control registers.
+
+User-space applications can acquire exclusive access to an AFU attached to a
+port by using open() on the port device node and release it using close().
+
+The following functions are exposed through ioctls:
+
+- Get driver API version (DFL_FPGA_GET_API_VERSION)
+- Check for extensions (DFL_FPGA_CHECK_EXTENSION)
+- Get port info (DFL_FPGA_PORT_GET_INFO)
+- Get MMIO region info (DFL_FPGA_PORT_GET_REGION_INFO)
+- Map DMA buffer (DFL_FPGA_PORT_DMA_MAP)
+- Unmap DMA buffer (DFL_FPGA_PORT_DMA_UNMAP)
+- Reset AFU (DFL_FPGA_PORT_RESET)
+
+DFL_FPGA_PORT_RESET:
+  reset the FPGA Port and its AFU. Userspace can do Port
+  reset at any time, e.g. during DMA or Partial Reconfiguration. But it should
+  never cause any system level issue, only functional failure (e.g. DMA or PR
+  operation failure) and be recoverable from the failure.
+
+User-space applications can also mmap() accelerator MMIO regions.
+
+More functions are exposed through sysfs:
+(/sys/class/fpga_region/<regionX>/<dfl-port.m>/):
+
+ Read Accelerator GUID (afu_id)
+     afu_id indicates which PR bitstream is programmed to this AFU.
+
+
+DFL Framework Overview
+======================
+
+::
+
+         +----------+    +--------+ +--------+ +--------+
+         |   FME    |    |  AFU   | |  AFU   | |  AFU   |
+         |  Module  |    | Module | | Module | | Module |
+         +----------+    +--------+ +--------+ +--------+
+                 +-----------------------+
+                 | FPGA Container Device |    Device Feature List
+                 |  (FPGA Base Region)   |         Framework
+                 +-----------------------+
+  ------------------------------------------------------------------
+               +----------------------------+
+               |   FPGA DFL Device Module   |
+               | (e.g. PCIE/Platform Device)|
+               +----------------------------+
+                 +------------------------+
+                 |  FPGA Hardware Device  |
+                 +------------------------+
+
+DFL framework in kernel provides common interfaces to create container device
+(FPGA base region), discover feature devices and their private features from the
+given Device Feature Lists and create platform devices for feature devices
+(e.g. FME, Port and AFU) with related resources under the container device. It
+also abstracts operations for the private features and exposes common ops to
+feature device drivers.
+
+The FPGA DFL Device could be different hardwares, e.g. PCIe device, platform
+device and etc. Its driver module is always loaded first once the device is
+created by the system. This driver plays an infrastructural role in the
+driver architecture. It locates the DFLs in the device memory, handles them
+and related resources to common interfaces from DFL framework for enumeration.
+(Please refer to drivers/fpga/dfl.c for detailed enumeration APIs).
+
+The FPGA Management Engine (FME) driver is a platform driver which is loaded
+automatically after FME platform device creation from the DFL device module. It
+provides the key features for FPGA management, including:
+
+	a) Expose static FPGA region information, e.g. version and metadata.
+	   Users can read related information via sysfs interfaces exposed
+	   by FME driver.
+
+	b) Partial Reconfiguration. The FME driver creates FPGA manager, FPGA
+	   bridges and FPGA regions during PR sub feature initialization. Once
+	   it receives a DFL_FPGA_FME_PORT_PR ioctl from user, it invokes the
+	   common interface function from FPGA Region to complete the partial
+	   reconfiguration of the PR bitstream to the given port.
+
+Similar to the FME driver, the FPGA Accelerated Function Unit (AFU) driver is
+probed once the AFU platform device is created. The main function of this module
+is to provide an interface for userspace applications to access the individual
+accelerators, including basic reset control on port, AFU MMIO region export, dma
+buffer mapping service functions.
+
+After feature platform devices creation, matched platform drivers will be loaded
+automatically to handle different functionalities. Please refer to next sections
+for detailed information on functional units which have been already implemented
+under this DFL framework.
+
+
+Partial Reconfiguration
+=======================
+As mentioned above, accelerators can be reconfigured through partial
+reconfiguration of a PR bitstream file. The PR bitstream file must have been
+generated for the exact static FPGA region and targeted reconfigurable region
+(port) of the FPGA, otherwise, the reconfiguration operation will fail and
+possibly cause system instability. This compatibility can be checked by
+comparing the compatibility ID noted in the header of PR bitstream file against
+the compat_id exposed by the target FPGA region. This check is usually done by
+userspace before calling the reconfiguration IOCTL.
+
+
+Device enumeration
+==================
+This section introduces how applications enumerate the fpga device from
+the sysfs hierarchy under /sys/class/fpga_region.
+
+In the example below, two DFL based FPGA devices are installed in the host. Each
+fpga device has one FME and two ports (AFUs).
+
+FPGA regions are created under /sys/class/fpga_region/::
+
+	/sys/class/fpga_region/region0
+	/sys/class/fpga_region/region1
+	/sys/class/fpga_region/region2
+	...
+
+Application needs to search each regionX folder, if feature device is found,
+(e.g. "dfl-port.n" or "dfl-fme.m" is found), then it's the base
+fpga region which represents the FPGA device.
+
+Each base region has one FME and two ports (AFUs) as child devices::
+
+	/sys/class/fpga_region/region0/dfl-fme.0
+	/sys/class/fpga_region/region0/dfl-port.0
+	/sys/class/fpga_region/region0/dfl-port.1
+	...
+
+	/sys/class/fpga_region/region3/dfl-fme.1
+	/sys/class/fpga_region/region3/dfl-port.2
+	/sys/class/fpga_region/region3/dfl-port.3
+	...
+
+In general, the FME/AFU sysfs interfaces are named as follows::
+
+	/sys/class/fpga_region/<regionX>/<dfl-fme.n>/
+	/sys/class/fpga_region/<regionX>/<dfl-port.m>/
+
+with 'n' consecutively numbering all FMEs and 'm' consecutively numbering all
+ports.
+
+The device nodes used for ioctl() or mmap() can be referenced through::
+
+	/sys/class/fpga_region/<regionX>/<dfl-fme.n>/dev
+	/sys/class/fpga_region/<regionX>/<dfl-port.n>/dev
+
+
+Add new FIUs support
+====================
+It's possible that developers made some new function blocks (FIUs) under this
+DFL framework, then new platform device driver needs to be developed for the
+new feature dev (FIU) following the same way as existing feature dev drivers
+(e.g. FME and Port/AFU platform device driver). Besides that, it requires
+modification on DFL framework enumeration code too, for new FIU type detection
+and related platform devices creation.
+
+
+Add new private features support
+================================
+In some cases, we may need to add some new private features to existing FIUs
+(e.g. FME or Port). Developers don't need to touch enumeration code in DFL
+framework, as each private feature will be parsed automatically and related
+mmio resources can be found under FIU platform device created by DFL framework.
+Developer only needs to provide a sub feature driver with matched feature id.
+FME Partial Reconfiguration Sub Feature driver (see drivers/fpga/dfl-fme-pr.c)
+could be a reference.
+
+
+Open discussion
+===============
+FME driver exports one ioctl (DFL_FPGA_FME_PORT_PR) for partial reconfiguration
+to user now. In the future, if unified user interfaces for reconfiguration are
+added, FME driver should switch to them from ioctl interface.
diff --git a/Documentation/fpga/dfl.txt b/Documentation/fpga/dfl.txt
deleted file mode 100644
index 6df4621c3f2a..000000000000
--- a/Documentation/fpga/dfl.txt
+++ /dev/null
@@ -1,285 +0,0 @@
-===============================================================================
-              FPGA Device Feature List (DFL) Framework Overview
--------------------------------------------------------------------------------
-                Enno Luebbers <enno.luebbers@intel.com>
-                Xiao Guangrong <guangrong.xiao@linux.intel.com>
-                Wu Hao <hao.wu@intel.com>
-
-The Device Feature List (DFL) FPGA framework (and drivers according to this
-this framework) hides the very details of low layer hardwares and provides
-unified interfaces to userspace. Applications could use these interfaces to
-configure, enumerate, open and access FPGA accelerators on platforms which
-implement the DFL in the device memory. Besides this, the DFL framework
-enables system level management functions such as FPGA reconfiguration.
-
-
-Device Feature List (DFL) Overview
-==================================
-Device Feature List (DFL) defines a linked list of feature headers within the
-device MMIO space to provide an extensible way of adding features. Software can
-walk through these predefined data structures to enumerate FPGA features:
-FPGA Interface Unit (FIU), Accelerated Function Unit (AFU) and Private Features,
-as illustrated below:
-
-    Header            Header            Header            Header
- +----------+  +-->+----------+  +-->+----------+  +-->+----------+
- |   Type   |  |   |  Type    |  |   |  Type    |  |   |  Type    |
- |   FIU    |  |   | Private  |  |   | Private  |  |   | Private  |
- +----------+  |   | Feature  |  |   | Feature  |  |   | Feature  |
- | Next_DFH |--+   +----------+  |   +----------+  |   +----------+
- +----------+      | Next_DFH |--+   | Next_DFH |--+   | Next_DFH |--> NULL
- |    ID    |      +----------+      +----------+      +----------+
- +----------+      |    ID    |      |    ID    |      |    ID    |
- | Next_AFU |--+   +----------+      +----------+      +----------+
- +----------+  |   | Feature  |      | Feature  |      | Feature  |
- |  Header  |  |   | Register |      | Register |      | Register |
- | Register |  |   |   Set    |      |   Set    |      |   Set    |
- |   Set    |  |   +----------+      +----------+      +----------+
- +----------+  |      Header
-               +-->+----------+
-                   |   Type   |
-                   |   AFU    |
-                   +----------+
-                   | Next_DFH |--> NULL
-                   +----------+
-                   |   GUID   |
-                   +----------+
-                   |  Header  |
-                   | Register |
-                   |   Set    |
-                   +----------+
-
-FPGA Interface Unit (FIU) represents a standalone functional unit for the
-interface to FPGA, e.g. the FPGA Management Engine (FME) and Port (more
-descriptions on FME and Port in later sections).
-
-Accelerated Function Unit (AFU) represents a FPGA programmable region and
-always connects to a FIU (e.g. a Port) as its child as illustrated above.
-
-Private Features represent sub features of the FIU and AFU. They could be
-various function blocks with different IDs, but all private features which
-belong to the same FIU or AFU, must be linked to one list via the Next Device
-Feature Header (Next_DFH) pointer.
-
-Each FIU, AFU and Private Feature could implement its own functional registers.
-The functional register set for FIU and AFU, is named as Header Register Set,
-e.g. FME Header Register Set, and the one for Private Feature, is named as
-Feature Register Set, e.g. FME Partial Reconfiguration Feature Register Set.
-
-This Device Feature List provides a way of linking features together, it's
-convenient for software to locate each feature by walking through this list,
-and can be implemented in register regions of any FPGA device.
-
-
-FIU - FME (FPGA Management Engine)
-==================================
-The FPGA Management Engine performs reconfiguration and other infrastructure
-functions. Each FPGA device only has one FME.
-
-User-space applications can acquire exclusive access to the FME using open(),
-and release it using close().
-
-The following functions are exposed through ioctls:
-
- Get driver API version (DFL_FPGA_GET_API_VERSION)
- Check for extensions (DFL_FPGA_CHECK_EXTENSION)
- Program bitstream (DFL_FPGA_FME_PORT_PR)
-
-More functions are exposed through sysfs
-(/sys/class/fpga_region/regionX/dfl-fme.n/):
-
- Read bitstream ID (bitstream_id)
-     bitstream_id indicates version of the static FPGA region.
-
- Read bitstream metadata (bitstream_metadata)
-     bitstream_metadata includes detailed information of static FPGA region,
-     e.g. synthesis date and seed.
-
- Read number of ports (ports_num)
-     one FPGA device may have more than one port, this sysfs interface indicates
-     how many ports the FPGA device has.
-
-
-FIU - PORT
-==========
-A port represents the interface between the static FPGA fabric and a partially
-reconfigurable region containing an AFU. It controls the communication from SW
-to the accelerator and exposes features such as reset and debug. Each FPGA
-device may have more than one port, but always one AFU per port.
-
-
-AFU
-===
-An AFU is attached to a port FIU and exposes a fixed length MMIO region to be
-used for accelerator-specific control registers.
-
-User-space applications can acquire exclusive access to an AFU attached to a
-port by using open() on the port device node and release it using close().
-
-The following functions are exposed through ioctls:
-
- Get driver API version (DFL_FPGA_GET_API_VERSION)
- Check for extensions (DFL_FPGA_CHECK_EXTENSION)
- Get port info (DFL_FPGA_PORT_GET_INFO)
- Get MMIO region info (DFL_FPGA_PORT_GET_REGION_INFO)
- Map DMA buffer (DFL_FPGA_PORT_DMA_MAP)
- Unmap DMA buffer (DFL_FPGA_PORT_DMA_UNMAP)
- Reset AFU (*DFL_FPGA_PORT_RESET)
-
-*DFL_FPGA_PORT_RESET: reset the FPGA Port and its AFU. Userspace can do Port
-reset at any time, e.g. during DMA or Partial Reconfiguration. But it should
-never cause any system level issue, only functional failure (e.g. DMA or PR
-operation failure) and be recoverable from the failure.
-
-User-space applications can also mmap() accelerator MMIO regions.
-
-More functions are exposed through sysfs:
-(/sys/class/fpga_region/<regionX>/<dfl-port.m>/):
-
- Read Accelerator GUID (afu_id)
-     afu_id indicates which PR bitstream is programmed to this AFU.
-
-
-DFL Framework Overview
-======================
-
-         +----------+    +--------+ +--------+ +--------+
-         |   FME    |    |  AFU   | |  AFU   | |  AFU   |
-         |  Module  |    | Module | | Module | | Module |
-         +----------+    +--------+ +--------+ +--------+
-                 +-----------------------+
-                 | FPGA Container Device |    Device Feature List
-                 |  (FPGA Base Region)   |         Framework
-                 +-----------------------+
---------------------------------------------------------------------
-               +----------------------------+
-               |   FPGA DFL Device Module   |
-               | (e.g. PCIE/Platform Device)|
-               +----------------------------+
-                 +------------------------+
-                 |  FPGA Hardware Device  |
-                 +------------------------+
-
-DFL framework in kernel provides common interfaces to create container device
-(FPGA base region), discover feature devices and their private features from the
-given Device Feature Lists and create platform devices for feature devices
-(e.g. FME, Port and AFU) with related resources under the container device. It
-also abstracts operations for the private features and exposes common ops to
-feature device drivers.
-
-The FPGA DFL Device could be different hardwares, e.g. PCIe device, platform
-device and etc. Its driver module is always loaded first once the device is
-created by the system. This driver plays an infrastructural role in the
-driver architecture. It locates the DFLs in the device memory, handles them
-and related resources to common interfaces from DFL framework for enumeration.
-(Please refer to drivers/fpga/dfl.c for detailed enumeration APIs).
-
-The FPGA Management Engine (FME) driver is a platform driver which is loaded
-automatically after FME platform device creation from the DFL device module. It
-provides the key features for FPGA management, including:
-
-	a) Expose static FPGA region information, e.g. version and metadata.
-	   Users can read related information via sysfs interfaces exposed
-	   by FME driver.
-
-	b) Partial Reconfiguration. The FME driver creates FPGA manager, FPGA
-	   bridges and FPGA regions during PR sub feature initialization. Once
-	   it receives a DFL_FPGA_FME_PORT_PR ioctl from user, it invokes the
-	   common interface function from FPGA Region to complete the partial
-	   reconfiguration of the PR bitstream to the given port.
-
-Similar to the FME driver, the FPGA Accelerated Function Unit (AFU) driver is
-probed once the AFU platform device is created. The main function of this module
-is to provide an interface for userspace applications to access the individual
-accelerators, including basic reset control on port, AFU MMIO region export, dma
-buffer mapping service functions.
-
-After feature platform devices creation, matched platform drivers will be loaded
-automatically to handle different functionalities. Please refer to next sections
-for detailed information on functional units which have been already implemented
-under this DFL framework.
-
-
-Partial Reconfiguration
-=======================
-As mentioned above, accelerators can be reconfigured through partial
-reconfiguration of a PR bitstream file. The PR bitstream file must have been
-generated for the exact static FPGA region and targeted reconfigurable region
-(port) of the FPGA, otherwise, the reconfiguration operation will fail and
-possibly cause system instability. This compatibility can be checked by
-comparing the compatibility ID noted in the header of PR bitstream file against
-the compat_id exposed by the target FPGA region. This check is usually done by
-userspace before calling the reconfiguration IOCTL.
-
-
-Device enumeration
-==================
-This section introduces how applications enumerate the fpga device from
-the sysfs hierarchy under /sys/class/fpga_region.
-
-In the example below, two DFL based FPGA devices are installed in the host. Each
-fpga device has one FME and two ports (AFUs).
-
-FPGA regions are created under /sys/class/fpga_region/
-
-	/sys/class/fpga_region/region0
-	/sys/class/fpga_region/region1
-	/sys/class/fpga_region/region2
-	...
-
-Application needs to search each regionX folder, if feature device is found,
-(e.g. "dfl-port.n" or "dfl-fme.m" is found), then it's the base
-fpga region which represents the FPGA device.
-
-Each base region has one FME and two ports (AFUs) as child devices:
-
-	/sys/class/fpga_region/region0/dfl-fme.0
-	/sys/class/fpga_region/region0/dfl-port.0
-	/sys/class/fpga_region/region0/dfl-port.1
-	...
-
-	/sys/class/fpga_region/region3/dfl-fme.1
-	/sys/class/fpga_region/region3/dfl-port.2
-	/sys/class/fpga_region/region3/dfl-port.3
-	...
-
-In general, the FME/AFU sysfs interfaces are named as follows:
-
-	/sys/class/fpga_region/<regionX>/<dfl-fme.n>/
-	/sys/class/fpga_region/<regionX>/<dfl-port.m>/
-
-with 'n' consecutively numbering all FMEs and 'm' consecutively numbering all
-ports.
-
-The device nodes used for ioctl() or mmap() can be referenced through:
-
-	/sys/class/fpga_region/<regionX>/<dfl-fme.n>/dev
-	/sys/class/fpga_region/<regionX>/<dfl-port.n>/dev
-
-
-Add new FIUs support
-====================
-It's possible that developers made some new function blocks (FIUs) under this
-DFL framework, then new platform device driver needs to be developed for the
-new feature dev (FIU) following the same way as existing feature dev drivers
-(e.g. FME and Port/AFU platform device driver). Besides that, it requires
-modification on DFL framework enumeration code too, for new FIU type detection
-and related platform devices creation.
-
-
-Add new private features support
-================================
-In some cases, we may need to add some new private features to existing FIUs
-(e.g. FME or Port). Developers don't need to touch enumeration code in DFL
-framework, as each private feature will be parsed automatically and related
-mmio resources can be found under FIU platform device created by DFL framework.
-Developer only needs to provide a sub feature driver with matched feature id.
-FME Partial Reconfiguration Sub Feature driver (see drivers/fpga/dfl-fme-pr.c)
-could be a reference.
-
-
-Open discussion
-===============
-FME driver exports one ioctl (DFL_FPGA_FME_PORT_PR) for partial reconfiguration
-to user now. In the future, if unified user interfaces for reconfiguration are
-added, FME driver should switch to them from ioctl interface.
diff --git a/Documentation/fpga/index.rst b/Documentation/fpga/index.rst
new file mode 100644
index 000000000000..2c87d1ea084f
--- /dev/null
+++ b/Documentation/fpga/index.rst
@@ -0,0 +1,17 @@
+:orphan:
+
+====
+fpga
+====
+
+.. toctree::
+    :maxdepth: 1
+
+    dfl
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
-- 
cgit 


From d7b461c5e82fc5f5e4261f3b0228ecda58eb9f1a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:47 -0300
Subject: docs: ide: convert docs to ReST and rename to *.rst

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/kernel-parameters.txt |   2 +-
 Documentation/cdrom/ide-cd.rst                  |  18 +-
 Documentation/ide/changelogs.rst                |  17 ++
 Documentation/ide/ide-tape.rst                  |  68 ++++++
 Documentation/ide/ide-tape.txt                  |  65 ------
 Documentation/ide/ide.rst                       | 265 ++++++++++++++++++++++++
 Documentation/ide/ide.txt                       | 256 -----------------------
 Documentation/ide/index.rst                     |  21 ++
 Documentation/ide/warm-plug-howto.rst           |  18 ++
 Documentation/ide/warm-plug-howto.txt           |  18 --
 10 files changed, 399 insertions(+), 349 deletions(-)
 create mode 100644 Documentation/ide/changelogs.rst
 create mode 100644 Documentation/ide/ide-tape.rst
 delete mode 100644 Documentation/ide/ide-tape.txt
 create mode 100644 Documentation/ide/ide.rst
 delete mode 100644 Documentation/ide/ide.txt
 create mode 100644 Documentation/ide/index.rst
 create mode 100644 Documentation/ide/warm-plug-howto.rst
 delete mode 100644 Documentation/ide/warm-plug-howto.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 83d6560f10f0..81c168b25b20 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1504,7 +1504,7 @@
 			Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc
 			.vlb_clock .pci_clock .noflush .nohpa .noprobe .nowerr
 			.cdrom .chs .ignore_cable are additional options
-			See Documentation/ide/ide.txt.
+			See Documentation/ide/ide.rst.
 
 	ide-generic.probe-mask= [HW] (E)IDE subsystem
 			Format: <int>
diff --git a/Documentation/cdrom/ide-cd.rst b/Documentation/cdrom/ide-cd.rst
index dadc94ef6b6c..bdccb74fc92d 100644
--- a/Documentation/cdrom/ide-cd.rst
+++ b/Documentation/cdrom/ide-cd.rst
@@ -47,7 +47,7 @@ This driver provides the following features:
 ---------------
 
 0. The ide-cd relies on the ide disk driver.  See
-   Documentation/ide/ide.txt for up-to-date information on the ide
+   Documentation/ide/ide.rst for up-to-date information on the ide
    driver.
 
 1. Make sure that the ide and ide-cd drivers are compiled into the
@@ -62,7 +62,7 @@ This driver provides the following features:
 
    Depending on what type of IDE interface you have, you may need to
    specify additional configuration options.  See
-   Documentation/ide/ide.txt.
+   Documentation/ide/ide.rst.
 
 2. You should also ensure that the iso9660 filesystem is either
    compiled into the kernel or available as a loadable module.  You
@@ -82,7 +82,7 @@ This driver provides the following features:
    on the primary IDE interface are called `hda` and `hdb`,
    respectively.  The drives on the secondary interface are called
    `hdc` and `hdd`.  (Interfaces at other locations get other letters
-   in the third position; see Documentation/ide/ide.txt.)
+   in the third position; see Documentation/ide/ide.rst.)
 
    If you want your CDROM drive to be found automatically by the
    driver, you should make sure your IDE interface uses either the
@@ -91,7 +91,7 @@ This driver provides the following features:
    be jumpered as `master`.  (If for some reason you cannot configure
    your system in this manner, you can probably still use the driver.
    You may have to pass extra configuration information to the kernel
-   when you boot, however.  See Documentation/ide/ide.txt for more
+   when you boot, however.  See Documentation/ide/ide.rst for more
    information.)
 
 4. Boot the system.  If the drive is recognized, you should see a
@@ -163,7 +163,7 @@ to change.  If the slot number is -1, the drive is unloaded.
 This section discusses some common problems encountered when trying to
 use the driver, and some possible solutions.  Note that if you are
 experiencing problems, you should probably also review
-Documentation/ide/ide.txt for current information about the underlying
+Documentation/ide/ide.rst for current information about the underlying
 IDE support code.  Some of these items apply only to earlier versions
 of the driver, but are mentioned here for completeness.
 
@@ -173,7 +173,7 @@ from the driver.
 a. Drive is not detected during booting.
 
    - Review the configuration instructions above and in
-     Documentation/ide/ide.txt, and check how your hardware is
+     Documentation/ide/ide.rst, and check how your hardware is
      configured.
 
    - If your drive is the only device on an IDE interface, it should
@@ -181,7 +181,7 @@ a. Drive is not detected during booting.
 
    - If your IDE interface is not at the standard addresses of 0x170
      or 0x1f0, you'll need to explicitly inform the driver using a
-     lilo option.  See Documentation/ide/ide.txt.  (This feature was
+     lilo option.  See Documentation/ide/ide.rst.  (This feature was
      added around kernel version 1.3.30.)
 
    - If the autoprobing is not finding your drive, you can tell the
@@ -207,7 +207,7 @@ a. Drive is not detected during booting.
      Support for some interfaces needing extra initialization is
      provided in later 1.3.x kernels.  You may need to turn on
      additional kernel configuration options to get them to work;
-     see Documentation/ide/ide.txt.
+     see Documentation/ide/ide.rst.
 
      Even if support is not available for your interface, you may be
      able to get it to work with the following procedure.  First boot
@@ -261,7 +261,7 @@ c. System hangups.
     be worked around by specifying the `serialize` option when
     booting.  Recent kernels should be able to detect the need for
     this automatically in most cases, but the detection is not
-    foolproof.  See Documentation/ide/ide.txt for more information
+    foolproof.  See Documentation/ide/ide.rst for more information
     about the `serialize` option and the CMD640B.
 
   - Note that many MS-DOS CDROM drivers will work with such buggy
diff --git a/Documentation/ide/changelogs.rst b/Documentation/ide/changelogs.rst
new file mode 100644
index 000000000000..fdf9d0fb8027
--- /dev/null
+++ b/Documentation/ide/changelogs.rst
@@ -0,0 +1,17 @@
+Changelog for ide cd
+--------------------
+
+ .. include:: ChangeLog.ide-cd.1994-2004
+    :literal:
+
+Changelog for ide floppy
+------------------------
+
+ .. include:: ChangeLog.ide-floppy.1996-2002
+    :literal:
+
+Changelog for ide tape
+----------------------
+
+ .. include:: ChangeLog.ide-tape.1995-2002
+    :literal:
diff --git a/Documentation/ide/ide-tape.rst b/Documentation/ide/ide-tape.rst
new file mode 100644
index 000000000000..3e061d9c0e38
--- /dev/null
+++ b/Documentation/ide/ide-tape.rst
@@ -0,0 +1,68 @@
+===============================
+IDE ATAPI streaming tape driver
+===============================
+
+This driver is a part of the Linux ide driver.
+
+The driver, in co-operation with ide.c, basically traverses the
+request-list for the block device interface. The character device
+interface, on the other hand, creates new requests, adds them
+to the request-list of the block device, and waits for their completion.
+
+The block device major and minor numbers are determined from the
+tape's relative position in the ide interfaces, as explained in ide.c.
+
+The character device interface consists of the following devices::
+
+  ht0		major 37, minor 0	first  IDE tape, rewind on close.
+  ht1		major 37, minor 1	second IDE tape, rewind on close.
+  ...
+  nht0		major 37, minor 128	first  IDE tape, no rewind on close.
+  nht1		major 37, minor 129	second IDE tape, no rewind on close.
+  ...
+
+The general magnetic tape commands compatible interface, as defined by
+include/linux/mtio.h, is accessible through the character device.
+
+General ide driver configuration options, such as the interrupt-unmask
+flag, can be configured by issuing an ioctl to the block device interface,
+as any other ide device.
+
+Our own ide-tape ioctl's can be issued to either the block device or
+the character device interface.
+
+Maximal throughput with minimal bus load will usually be achieved in the
+following scenario:
+
+     1.	ide-tape is operating in the pipelined operation mode.
+     2.	No buffering is performed by the user backup program.
+
+Testing was done with a 2 GB CONNER CTMA 4000 IDE ATAPI Streaming Tape Drive.
+
+Here are some words from the first releases of hd.c, which are quoted
+in ide.c and apply here as well:
+
+* Special care is recommended.  Have Fun!
+
+Possible improvements
+=====================
+
+1. Support for the ATAPI overlap protocol.
+
+In order to maximize bus throughput, we currently use the DSC
+overlap method which enables ide.c to service requests from the
+other device while the tape is busy executing a command. The
+DSC overlap method involves polling the tape's status register
+for the DSC bit, and servicing the other device while the tape
+isn't ready.
+
+In the current QIC development standard (December 1995),
+it is recommended that new tape drives will *in addition*
+implement the ATAPI overlap protocol, which is used for the
+same purpose - efficient use of the IDE bus, but is interrupt
+driven and thus has much less CPU overhead.
+
+ATAPI overlap is likely to be supported in most new ATAPI
+devices, including new ATAPI cdroms, and thus provides us
+a method by which we can achieve higher throughput when
+sharing a (fast) ATA-2 disk with any (slow) new ATAPI device.
diff --git a/Documentation/ide/ide-tape.txt b/Documentation/ide/ide-tape.txt
deleted file mode 100644
index 3f348a0b21d8..000000000000
--- a/Documentation/ide/ide-tape.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-IDE ATAPI streaming tape driver.
-
-This driver is a part of the Linux ide driver.
-
-The driver, in co-operation with ide.c, basically traverses the
-request-list for the block device interface. The character device
-interface, on the other hand, creates new requests, adds them
-to the request-list of the block device, and waits for their completion.
-
-The block device major and minor numbers are determined from the
-tape's relative position in the ide interfaces, as explained in ide.c.
-
-The character device interface consists of the following devices:
-
-ht0		major 37, minor 0	first  IDE tape, rewind on close.
-ht1		major 37, minor 1	second IDE tape, rewind on close.
-...
-nht0		major 37, minor 128	first  IDE tape, no rewind on close.
-nht1		major 37, minor 129	second IDE tape, no rewind on close.
-...
-
-The general magnetic tape commands compatible interface, as defined by
-include/linux/mtio.h, is accessible through the character device.
-
-General ide driver configuration options, such as the interrupt-unmask
-flag, can be configured by issuing an ioctl to the block device interface,
-as any other ide device.
-
-Our own ide-tape ioctl's can be issued to either the block device or
-the character device interface.
-
-Maximal throughput with minimal bus load will usually be achieved in the
-following scenario:
-
-     1.	ide-tape is operating in the pipelined operation mode.
-     2.	No buffering is performed by the user backup program.
-
-Testing was done with a 2 GB CONNER CTMA 4000 IDE ATAPI Streaming Tape Drive.
-
-Here are some words from the first releases of hd.c, which are quoted
-in ide.c and apply here as well:
-
-| Special care is recommended.  Have Fun!
-
-Possible improvements:
-
-1. Support for the ATAPI overlap protocol.
-
-In order to maximize bus throughput, we currently use the DSC
-overlap method which enables ide.c to service requests from the
-other device while the tape is busy executing a command. The
-DSC overlap method involves polling the tape's status register
-for the DSC bit, and servicing the other device while the tape
-isn't ready.
-
-In the current QIC development standard (December 1995),
-it is recommended that new tape drives will *in addition*
-implement the ATAPI overlap protocol, which is used for the
-same purpose - efficient use of the IDE bus, but is interrupt
-driven and thus has much less CPU overhead.
-
-ATAPI overlap is likely to be supported in most new ATAPI
-devices, including new ATAPI cdroms, and thus provides us
-a method by which we can achieve higher throughput when
-sharing a (fast) ATA-2 disk with any (slow) new ATAPI device.
diff --git a/Documentation/ide/ide.rst b/Documentation/ide/ide.rst
new file mode 100644
index 000000000000..88bdcba92f7d
--- /dev/null
+++ b/Documentation/ide/ide.rst
@@ -0,0 +1,265 @@
+============================================
+Information regarding the Enhanced IDE drive
+============================================
+
+   The hdparm utility can be used to control various IDE features on a
+   running system. It is packaged separately.  Please Look for it on popular
+   linux FTP sites.
+
+-------------------------------------------------------------------------------
+
+.. important::
+
+   BUGGY IDE CHIPSETS CAN CORRUPT DATA!!
+
+    PCI versions of the CMD640 and RZ1000 interfaces are now detected
+    automatically at startup when PCI BIOS support is configured.
+
+    Linux disables the "prefetch" ("readahead") mode of the RZ1000
+    to prevent data corruption possible due to hardware design flaws.
+
+    For the CMD640, linux disables "IRQ unmasking" (hdparm -u1) on any
+    drive for which the "prefetch" mode of the CMD640 is turned on.
+    If "prefetch" is disabled (hdparm -p8), then "IRQ unmasking" can be
+    used again.
+
+    For the CMD640, linux disables "32bit I/O" (hdparm -c1) on any drive
+    for which the "prefetch" mode of the CMD640 is turned off.
+    If "prefetch" is enabled (hdparm -p9), then "32bit I/O" can be
+    used again.
+
+    The CMD640 is also used on some Vesa Local Bus (VLB) cards, and is *NOT*
+    automatically detected by Linux.  For safe, reliable operation with such
+    interfaces, one *MUST* use the "cmd640.probe_vlb" kernel option.
+
+    Use of the "serialize" option is no longer necessary.
+
+-------------------------------------------------------------------------------
+
+Common pitfalls
+===============
+
+- 40-conductor IDE cables are capable of transferring data in DMA modes up to
+  udma2, but no faster.
+
+- If possible devices should be attached to separate channels if they are
+  available. Typically the disk on the first and CD-ROM on the second.
+
+- If you mix devices on the same cable, please consider using similar devices
+  in respect of the data transfer mode they support.
+
+- Even better try to stick to the same vendor and device type on the same
+  cable.
+
+This is the multiple IDE interface driver, as evolved from hd.c
+===============================================================
+
+It supports up to 9 IDE interfaces per default, on one or more IRQs (usually
+14 & 15).  There can be up to two drives per interface, as per the ATA-6 spec.::
+
+  Primary:    ide0, port 0x1f0; major=3;  hda is minor=0; hdb is minor=64
+  Secondary:  ide1, port 0x170; major=22; hdc is minor=0; hdd is minor=64
+  Tertiary:   ide2, port 0x1e8; major=33; hde is minor=0; hdf is minor=64
+  Quaternary: ide3, port 0x168; major=34; hdg is minor=0; hdh is minor=64
+  fifth..     ide4, usually PCI, probed
+  sixth..     ide5, usually PCI, probed
+
+To access devices on interfaces > ide0, device entries please make sure that
+device files for them are present in /dev.  If not, please create such
+entries, by using /dev/MAKEDEV.
+
+This driver automatically probes for most IDE interfaces (including all PCI
+ones), for the drives/geometries attached to those interfaces, and for the IRQ
+lines being used by the interfaces (normally 14, 15 for ide0/ide1).
+
+Any number of interfaces may share a single IRQ if necessary, at a slight
+performance penalty, whether on separate cards or a single VLB card.
+The IDE driver automatically detects and handles this.  However, this may
+or may not be harmful to your hardware.. two or more cards driving the same IRQ
+can potentially burn each other's bus driver, though in practice this
+seldom occurs.  Be careful, and if in doubt, don't do it!
+
+Drives are normally found by auto-probing and/or examining the CMOS/BIOS data.
+For really weird situations, the apparent (fdisk) geometry can also be specified
+on the kernel "command line" using LILO.  The format of such lines is::
+
+	ide_core.chs=[interface_number.device_number]:cyls,heads,sects
+
+or::
+
+	ide_core.cdrom=[interface_number.device_number]
+
+For example::
+
+	ide_core.chs=1.0:1050,32,64  ide_core.cdrom=1.1
+
+The results of successful auto-probing may override the physical geometry/irq
+specified, though the "original" geometry may be retained as the "logical"
+geometry for partitioning purposes (fdisk).
+
+If the auto-probing during boot time confuses a drive (ie. the drive works
+with hd.c but not with ide.c), then an command line option may be specified
+for each drive for which you'd like the drive to skip the hardware
+probe/identification sequence.  For example::
+
+	ide_core.noprobe=0.1
+
+or::
+
+	ide_core.chs=1.0:768,16,32
+	ide_core.noprobe=1.0
+
+Note that when only one IDE device is attached to an interface, it should be
+jumpered as "single" or "master", *not* "slave".  Many folks have had
+"trouble" with cdroms because of this requirement, so the driver now probes
+for both units, though success is more likely when the drive is jumpered
+correctly.
+
+Courtesy of Scott Snyder and others, the driver supports ATAPI cdrom drives
+such as the NEC-260 and the new MITSUMI triple/quad speed drives.
+Such drives will be identified at boot time, just like a hard disk.
+
+If for some reason your cdrom drive is *not* found at boot time, you can force
+the probe to look harder by supplying a kernel command line parameter
+via LILO, such as:::
+
+	ide_core.cdrom=1.0	/* "master" on second interface (hdc) */
+
+or::
+
+	ide_core.cdrom=1.1	/* "slave" on second interface (hdd) */
+
+For example, a GW2000 system might have a hard drive on the primary
+interface (/dev/hda) and an IDE cdrom drive on the secondary interface
+(/dev/hdc).  To mount a CD in the cdrom drive, one would use something like::
+
+	ln -sf /dev/hdc /dev/cdrom
+	mkdir /mnt/cdrom
+	mount /dev/cdrom /mnt/cdrom -t iso9660 -o ro
+
+If, after doing all of the above, mount doesn't work and you see
+errors from the driver (with dmesg) complaining about `status=0xff`,
+this means that the hardware is not responding to the driver's attempts
+to read it.  One of the following is probably the problem:
+
+  - Your hardware is broken.
+
+  - You are using the wrong address for the device, or you have the
+    drive jumpered wrong.  Review the configuration instructions above.
+
+  - Your IDE controller requires some nonstandard initialization sequence
+    before it will work properly.  If this is the case, there will often
+    be a separate MS-DOS driver just for the controller.  IDE interfaces
+    on sound cards usually fall into this category.  Such configurations
+    can often be made to work by first booting MS-DOS, loading the
+    appropriate drivers, and then warm-booting linux (without powering
+    off).  This can be automated using loadlin in the MS-DOS autoexec.
+
+If you always get timeout errors, interrupts from the drive are probably
+not making it to the host.  Check how you have the hardware jumpered
+and make sure it matches what the driver expects (see the configuration
+instructions above).  If you have a PCI system, also check the BIOS
+setup; I've had one report of a system which was shipped with IRQ 15
+disabled by the BIOS.
+
+The kernel is able to execute binaries directly off of the cdrom,
+provided it is mounted with the default block size of 1024 (as above).
+
+Please pass on any feedback on any of this stuff to the maintainer,
+whose address can be found in linux/MAINTAINERS.
+
+The IDE driver is modularized.  The high level disk/CD-ROM/tape/floppy
+drivers can always be compiled as loadable modules, the chipset drivers
+can only be compiled into the kernel, and the core code (ide.c) can be
+compiled as a loadable module provided no chipset support is needed.
+
+When using ide.c as a module in combination with kmod, add::
+
+	alias block-major-3 ide-probe
+
+to a configuration file in /etc/modprobe.d/.
+
+When ide.c is used as a module, you can pass command line parameters to the
+driver using the "options=" keyword to insmod, while replacing any ',' with
+';'.
+
+
+Summary of ide driver parameters for kernel command line
+========================================================
+
+For legacy IDE VLB host drivers (ali14xx/dtc2278/ht6560b/qd65xx/umc8672)
+you need to explicitly enable probing by using "probe" kernel parameter,
+i.e. to enable probing for ALI M14xx chipsets (ali14xx host driver) use:
+
+* "ali14xx.probe" boot option when ali14xx driver is built-in the kernel
+
+* "probe" module parameter when ali14xx driver is compiled as module
+  ("modprobe ali14xx probe")
+
+Also for legacy CMD640 host driver (cmd640) you need to use "probe_vlb"
+kernel paremeter to enable probing for VLB version of the chipset (PCI ones
+are detected automatically).
+
+You also need to use "probe" kernel parameter for ide-4drives driver
+(support for IDE generic chipset with four drives on one port).
+
+To enable support for IDE doublers on Amiga use "doubler" kernel parameter
+for gayle host driver (i.e. "gayle.doubler" if the driver is built-in).
+
+To force ignoring cable detection (this should be needed only if you're using
+short 40-wires cable which cannot be automatically detected - if this is not
+a case please report it as a bug instead) use "ignore_cable" kernel parameter:
+
+* "ide_core.ignore_cable=[interface_number]" boot option if IDE is built-in
+  (i.e. "ide_core.ignore_cable=1" to force ignoring cable for "ide1")
+
+* "ignore_cable=[interface_number]" module parameter (for ide_core module)
+  if IDE is compiled as module
+
+Other kernel parameters for ide_core are:
+
+* "nodma=[interface_number.device_number]" to disallow DMA for a device
+
+* "noflush=[interface_number.device_number]" to disable flush requests
+
+* "nohpa=[interface_number.device_number]" to disable Host Protected Area
+
+* "noprobe=[interface_number.device_number]" to skip probing
+
+* "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit
+
+* "cdrom=[interface_number.device_number]" to force device as a CD-ROM
+
+* "chs=[interface_number.device_number]" to force device as a disk (using CHS)
+
+
+Some Terminology
+================
+
+IDE
+  Integrated Drive Electronics, meaning that each drive has a built-in
+  controller, which is why an "IDE interface card" is not a "controller card".
+
+ATA
+  AT (the old IBM 286 computer) Attachment Interface, a draft American
+  National Standard for connecting hard drives to PCs.  This is the official
+  name for "IDE".
+
+  The latest standards define some enhancements, known as the ATA-6 spec,
+  which grew out of vendor-specific "Enhanced IDE" (EIDE) implementations.
+
+ATAPI
+  ATA Packet Interface, a new protocol for controlling the drives,
+  similar to SCSI protocols, created at the same time as the ATA2 standard.
+  ATAPI is currently used for controlling CDROM, TAPE and FLOPPY (ZIP or
+  LS120/240) devices, removable R/W cartridges, and for high capacity hard disk
+  drives.
+
+mlord@pobox.com
+
+
+Wed Apr 17 22:52:44 CEST 2002 edited by Marcin Dalecki, the current
+maintainer.
+
+Wed Aug 20 22:31:29 CEST 2003 updated ide boot options to current ide.c
+comments at 2.6.0-test4 time. Maciej Soltysiak <solt@dns.toxicfilms.tv>
diff --git a/Documentation/ide/ide.txt b/Documentation/ide/ide.txt
deleted file mode 100644
index 7aca987c23d9..000000000000
--- a/Documentation/ide/ide.txt
+++ /dev/null
@@ -1,256 +0,0 @@
-
-	Information regarding the Enhanced IDE drive in Linux 2.6
-
-==============================================================================
-
-
-   The hdparm utility can be used to control various IDE features on a
-   running system. It is packaged separately.  Please Look for it on popular
-   linux FTP sites.
-
-
-
-***  IMPORTANT NOTICES:  BUGGY IDE CHIPSETS CAN CORRUPT DATA!!
-***  =================
-***  PCI versions of the CMD640 and RZ1000 interfaces are now detected
-***  automatically at startup when PCI BIOS support is configured.
-***
-***  Linux disables the "prefetch" ("readahead") mode of the RZ1000
-***  to prevent data corruption possible due to hardware design flaws.
-***
-***  For the CMD640, linux disables "IRQ unmasking" (hdparm -u1) on any
-***  drive for which the "prefetch" mode of the CMD640 is turned on.
-***  If "prefetch" is disabled (hdparm -p8), then "IRQ unmasking" can be
-***  used again.
-***
-***  For the CMD640, linux disables "32bit I/O" (hdparm -c1) on any drive
-***  for which the "prefetch" mode of the CMD640 is turned off.
-***  If "prefetch" is enabled (hdparm -p9), then "32bit I/O" can be
-***  used again.
-***
-***  The CMD640 is also used on some Vesa Local Bus (VLB) cards, and is *NOT*
-***  automatically detected by Linux.  For safe, reliable operation with such
-***  interfaces, one *MUST* use the "cmd640.probe_vlb" kernel option.
-***
-***  Use of the "serialize" option is no longer necessary.
-
-================================================================================
-Common pitfalls:
-
-- 40-conductor IDE cables are capable of transferring data in DMA modes up to
-  udma2, but no faster.
-
-- If possible devices should be attached to separate channels if they are
-  available. Typically the disk on the first and CD-ROM on the second.
-
-- If you mix devices on the same cable, please consider using similar devices
-  in respect of the data transfer mode they support.
-
-- Even better try to stick to the same vendor and device type on the same
-  cable.
-
-================================================================================
-
-This is the multiple IDE interface driver, as evolved from hd.c.
-
-It supports up to 9 IDE interfaces per default, on one or more IRQs (usually
-14 & 15).  There can be up to two drives per interface, as per the ATA-6 spec.
-
-Primary:    ide0, port 0x1f0; major=3;  hda is minor=0; hdb is minor=64
-Secondary:  ide1, port 0x170; major=22; hdc is minor=0; hdd is minor=64
-Tertiary:   ide2, port 0x1e8; major=33; hde is minor=0; hdf is minor=64
-Quaternary: ide3, port 0x168; major=34; hdg is minor=0; hdh is minor=64
-fifth..     ide4, usually PCI, probed
-sixth..     ide5, usually PCI, probed
-
-To access devices on interfaces > ide0, device entries please make sure that
-device files for them are present in /dev.  If not, please create such
-entries, by using /dev/MAKEDEV.
-
-This driver automatically probes for most IDE interfaces (including all PCI
-ones), for the drives/geometries attached to those interfaces, and for the IRQ
-lines being used by the interfaces (normally 14, 15 for ide0/ide1).
-
-Any number of interfaces may share a single IRQ if necessary, at a slight
-performance penalty, whether on separate cards or a single VLB card.
-The IDE driver automatically detects and handles this.  However, this may
-or may not be harmful to your hardware.. two or more cards driving the same IRQ
-can potentially burn each other's bus driver, though in practice this
-seldom occurs.  Be careful, and if in doubt, don't do it!
-
-Drives are normally found by auto-probing and/or examining the CMOS/BIOS data.
-For really weird situations, the apparent (fdisk) geometry can also be specified
-on the kernel "command line" using LILO.  The format of such lines is:
-
-	ide_core.chs=[interface_number.device_number]:cyls,heads,sects
-or	ide_core.cdrom=[interface_number.device_number]
-
-For example:
-
-	ide_core.chs=1.0:1050,32,64  ide_core.cdrom=1.1
-
-The results of successful auto-probing may override the physical geometry/irq
-specified, though the "original" geometry may be retained as the "logical"
-geometry for partitioning purposes (fdisk).
-
-If the auto-probing during boot time confuses a drive (ie. the drive works
-with hd.c but not with ide.c), then an command line option may be specified
-for each drive for which you'd like the drive to skip the hardware
-probe/identification sequence.  For example:
-
-	ide_core.noprobe=0.1
-or
-	ide_core.chs=1.0:768,16,32
-	ide_core.noprobe=1.0
-
-Note that when only one IDE device is attached to an interface, it should be
-jumpered as "single" or "master", *not* "slave".  Many folks have had
-"trouble" with cdroms because of this requirement, so the driver now probes
-for both units, though success is more likely when the drive is jumpered
-correctly.
-
-Courtesy of Scott Snyder and others, the driver supports ATAPI cdrom drives
-such as the NEC-260 and the new MITSUMI triple/quad speed drives.
-Such drives will be identified at boot time, just like a hard disk.
-
-If for some reason your cdrom drive is *not* found at boot time, you can force
-the probe to look harder by supplying a kernel command line parameter
-via LILO, such as:
-
-	ide_core.cdrom=1.0	/* "master" on second interface (hdc) */
-or
-	ide_core.cdrom=1.1	/* "slave" on second interface (hdd) */
-
-For example, a GW2000 system might have a hard drive on the primary
-interface (/dev/hda) and an IDE cdrom drive on the secondary interface
-(/dev/hdc).  To mount a CD in the cdrom drive, one would use something like:
-
-	ln -sf /dev/hdc /dev/cdrom
-	mkdir /mnt/cdrom
-	mount /dev/cdrom /mnt/cdrom -t iso9660 -o ro
-
-If, after doing all of the above, mount doesn't work and you see
-errors from the driver (with dmesg) complaining about `status=0xff',
-this means that the hardware is not responding to the driver's attempts
-to read it.  One of the following is probably the problem:
-
-  - Your hardware is broken.
-
-  - You are using the wrong address for the device, or you have the
-    drive jumpered wrong.  Review the configuration instructions above.
-
-  - Your IDE controller requires some nonstandard initialization sequence
-    before it will work properly.  If this is the case, there will often
-    be a separate MS-DOS driver just for the controller.  IDE interfaces
-    on sound cards usually fall into this category.  Such configurations
-    can often be made to work by first booting MS-DOS, loading the
-    appropriate drivers, and then warm-booting linux (without powering
-    off).  This can be automated using loadlin in the MS-DOS autoexec.
-
-If you always get timeout errors, interrupts from the drive are probably
-not making it to the host.  Check how you have the hardware jumpered
-and make sure it matches what the driver expects (see the configuration
-instructions above).  If you have a PCI system, also check the BIOS
-setup; I've had one report of a system which was shipped with IRQ 15
-disabled by the BIOS.
-
-The kernel is able to execute binaries directly off of the cdrom,
-provided it is mounted with the default block size of 1024 (as above).
-
-Please pass on any feedback on any of this stuff to the maintainer,
-whose address can be found in linux/MAINTAINERS.
-
-The IDE driver is modularized.  The high level disk/CD-ROM/tape/floppy
-drivers can always be compiled as loadable modules, the chipset drivers
-can only be compiled into the kernel, and the core code (ide.c) can be
-compiled as a loadable module provided no chipset support is needed.
-
-When using ide.c as a module in combination with kmod, add:
-
-	alias block-major-3 ide-probe
-
-to a configuration file in /etc/modprobe.d/.
-
-When ide.c is used as a module, you can pass command line parameters to the
-driver using the "options=" keyword to insmod, while replacing any ',' with
-';'.
-
-
-================================================================================
-
-Summary of ide driver parameters for kernel command line
---------------------------------------------------------
-
-For legacy IDE VLB host drivers (ali14xx/dtc2278/ht6560b/qd65xx/umc8672)
-you need to explicitly enable probing by using "probe" kernel parameter,
-i.e. to enable probing for ALI M14xx chipsets (ali14xx host driver) use:
-
-* "ali14xx.probe" boot option when ali14xx driver is built-in the kernel
-
-* "probe" module parameter when ali14xx driver is compiled as module
-  ("modprobe ali14xx probe")
-
-Also for legacy CMD640 host driver (cmd640) you need to use "probe_vlb"
-kernel paremeter to enable probing for VLB version of the chipset (PCI ones
-are detected automatically).
-
-You also need to use "probe" kernel parameter for ide-4drives driver
-(support for IDE generic chipset with four drives on one port).
-
-To enable support for IDE doublers on Amiga use "doubler" kernel parameter
-for gayle host driver (i.e. "gayle.doubler" if the driver is built-in).
-
-To force ignoring cable detection (this should be needed only if you're using
-short 40-wires cable which cannot be automatically detected - if this is not
-a case please report it as a bug instead) use "ignore_cable" kernel parameter:
-
-* "ide_core.ignore_cable=[interface_number]" boot option if IDE is built-in
-  (i.e. "ide_core.ignore_cable=1" to force ignoring cable for "ide1")
-
-* "ignore_cable=[interface_number]" module parameter (for ide_core module)
-  if IDE is compiled as module
-
-Other kernel parameters for ide_core are:
-
-* "nodma=[interface_number.device_number]" to disallow DMA for a device
-
-* "noflush=[interface_number.device_number]" to disable flush requests
-
-* "nohpa=[interface_number.device_number]" to disable Host Protected Area
-
-* "noprobe=[interface_number.device_number]" to skip probing
-
-* "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit
-
-* "cdrom=[interface_number.device_number]" to force device as a CD-ROM
-
-* "chs=[interface_number.device_number]" to force device as a disk (using CHS)
-
-================================================================================
-
-Some Terminology
-----------------
-IDE = Integrated Drive Electronics, meaning that each drive has a built-in
-controller, which is why an "IDE interface card" is not a "controller card".
-
-ATA = AT (the old IBM 286 computer) Attachment Interface, a draft American
-National Standard for connecting hard drives to PCs.  This is the official
-name for "IDE".
-
-The latest standards define some enhancements, known as the ATA-6 spec,
-which grew out of vendor-specific "Enhanced IDE" (EIDE) implementations.
-
-ATAPI = ATA Packet Interface, a new protocol for controlling the drives,
-similar to SCSI protocols, created at the same time as the ATA2 standard.
-ATAPI is currently used for controlling CDROM, TAPE and FLOPPY (ZIP or
-LS120/240) devices, removable R/W cartridges, and for high capacity hard disk
-drives.
-
-mlord@pobox.com
---
-
-Wed Apr 17 22:52:44 CEST 2002 edited by Marcin Dalecki, the current
-maintainer.
-
-Wed Aug 20 22:31:29 CEST 2003 updated ide boot options to current ide.c
-comments at 2.6.0-test4 time. Maciej Soltysiak <solt@dns.toxicfilms.tv>
diff --git a/Documentation/ide/index.rst b/Documentation/ide/index.rst
new file mode 100644
index 000000000000..45bc12d3957f
--- /dev/null
+++ b/Documentation/ide/index.rst
@@ -0,0 +1,21 @@
+:orphan:
+
+==================================
+Integrated Drive Electronics (IDE)
+==================================
+
+.. toctree::
+    :maxdepth: 1
+
+    ide
+    ide-tape
+    warm-plug-howto
+
+    changelogs
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/ide/warm-plug-howto.rst b/Documentation/ide/warm-plug-howto.rst
new file mode 100644
index 000000000000..c245242ef2f1
--- /dev/null
+++ b/Documentation/ide/warm-plug-howto.rst
@@ -0,0 +1,18 @@
+===================
+IDE warm-plug HOWTO
+===================
+
+To warm-plug devices on a port 'idex'::
+
+	# echo -n "1" > /sys/class/ide_port/idex/delete_devices
+
+unplug old device(s) and plug new device(s)::
+
+	# echo -n "1" > /sys/class/ide_port/idex/scan
+
+done
+
+NOTE: please make sure that partitions are unmounted and that there are
+no other active references to devices before doing "delete_devices" step,
+also do not attempt "scan" step on devices currently in use -- otherwise
+results may be unpredictable and lead to data loss if you're unlucky
diff --git a/Documentation/ide/warm-plug-howto.txt b/Documentation/ide/warm-plug-howto.txt
deleted file mode 100644
index 98152bcd515a..000000000000
--- a/Documentation/ide/warm-plug-howto.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-
-IDE warm-plug HOWTO
-===================
-
-To warm-plug devices on a port 'idex':
-
-# echo -n "1" > /sys/class/ide_port/idex/delete_devices
-
-unplug old device(s) and plug new device(s)
-
-# echo -n "1" > /sys/class/ide_port/idex/scan
-
-done
-
-NOTE: please make sure that partitions are unmounted and that there are
-no other active references to devices before doing "delete_devices" step,
-also do not attempt "scan" step on devices currently in use -- otherwise
-results may be unpredictable and lead to data loss if you're unlucky
-- 
cgit 


From cd238effefa28fac177e51dcf5e9d1a8b59c3c6b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:48 -0300
Subject: docs: kbuild: convert docs to ReST and rename to *.rst

The kbuild documentation clearly shows that the documents
there are written at different times: some use markdown,
some use their own peculiar logic to split sections.

Convert everything to ReST without affecting too much
the author's style and avoiding adding uneeded markups.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/README.rst               |    2 +-
 Documentation/kbuild/headers_install.rst           |   51 +
 Documentation/kbuild/headers_install.txt           |   50 -
 Documentation/kbuild/index.rst                     |   27 +
 Documentation/kbuild/issues.rst                    |   11 +
 Documentation/kbuild/kbuild.rst                    |  265 ++++
 Documentation/kbuild/kbuild.txt                    |  248 ----
 Documentation/kbuild/kconfig-language.rst          |  689 +++++++++
 Documentation/kbuild/kconfig-language.txt          |  669 ---------
 Documentation/kbuild/kconfig-macro-language.rst    |  247 ++++
 Documentation/kbuild/kconfig-macro-language.txt    |  242 ----
 Documentation/kbuild/kconfig.rst                   |  300 ++++
 Documentation/kbuild/kconfig.txt                   |  272 ----
 Documentation/kbuild/makefiles.rst                 | 1509 ++++++++++++++++++++
 Documentation/kbuild/makefiles.txt                 | 1369 ------------------
 Documentation/kbuild/modules.rst                   |  571 ++++++++
 Documentation/kbuild/modules.txt                   |  541 -------
 Documentation/kernel-hacking/hacking.rst           |    4 +-
 Documentation/process/coding-style.rst             |    2 +-
 Documentation/process/submit-checklist.rst         |    2 +-
 .../translations/it_IT/kernel-hacking/hacking.rst  |    4 +-
 .../translations/it_IT/process/coding-style.rst    |    2 +-
 .../it_IT/process/submit-checklist.rst             |    2 +-
 .../translations/zh_CN/process/coding-style.rst    |    2 +-
 .../zh_CN/process/submit-checklist.rst             |    2 +-
 25 files changed, 3681 insertions(+), 3402 deletions(-)
 create mode 100644 Documentation/kbuild/headers_install.rst
 delete mode 100644 Documentation/kbuild/headers_install.txt
 create mode 100644 Documentation/kbuild/index.rst
 create mode 100644 Documentation/kbuild/issues.rst
 create mode 100644 Documentation/kbuild/kbuild.rst
 delete mode 100644 Documentation/kbuild/kbuild.txt
 create mode 100644 Documentation/kbuild/kconfig-language.rst
 delete mode 100644 Documentation/kbuild/kconfig-language.txt
 create mode 100644 Documentation/kbuild/kconfig-macro-language.rst
 delete mode 100644 Documentation/kbuild/kconfig-macro-language.txt
 create mode 100644 Documentation/kbuild/kconfig.rst
 delete mode 100644 Documentation/kbuild/kconfig.txt
 create mode 100644 Documentation/kbuild/makefiles.rst
 delete mode 100644 Documentation/kbuild/makefiles.txt
 create mode 100644 Documentation/kbuild/modules.rst
 delete mode 100644 Documentation/kbuild/modules.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst
index a582c780c3bd..cc6151fc0845 100644
--- a/Documentation/admin-guide/README.rst
+++ b/Documentation/admin-guide/README.rst
@@ -227,7 +227,7 @@ Configuring the kernel
      "make tinyconfig"  Configure the tiniest possible kernel.
 
    You can find more information on using the Linux kernel config tools
-   in Documentation/kbuild/kconfig.txt.
+   in Documentation/kbuild/kconfig.rst.
 
  - NOTES on ``make config``:
 
diff --git a/Documentation/kbuild/headers_install.rst b/Documentation/kbuild/headers_install.rst
new file mode 100644
index 000000000000..1ab7294e41ac
--- /dev/null
+++ b/Documentation/kbuild/headers_install.rst
@@ -0,0 +1,51 @@
+=============================================
+Exporting kernel headers for use by userspace
+=============================================
+
+The "make headers_install" command exports the kernel's header files in a
+form suitable for use by userspace programs.
+
+The linux kernel's exported header files describe the API for user space
+programs attempting to use kernel services.  These kernel header files are
+used by the system's C library (such as glibc or uClibc) to define available
+system calls, as well as constants and structures to be used with these
+system calls.  The C library's header files include the kernel header files
+from the "linux" subdirectory.  The system's libc headers are usually
+installed at the default location /usr/include and the kernel headers in
+subdirectories under that (most notably /usr/include/linux and
+/usr/include/asm).
+
+Kernel headers are backwards compatible, but not forwards compatible.  This
+means that a program built against a C library using older kernel headers
+should run on a newer kernel (although it may not have access to new
+features), but a program built against newer kernel headers may not work on an
+older kernel.
+
+The "make headers_install" command can be run in the top level directory of the
+kernel source code (or using a standard out-of-tree build).  It takes two
+optional arguments::
+
+  make headers_install ARCH=i386 INSTALL_HDR_PATH=/usr
+
+ARCH indicates which architecture to produce headers for, and defaults to the
+current architecture.  The linux/asm directory of the exported kernel headers
+is platform-specific, to see a complete list of supported architectures use
+the command::
+
+  ls -d include/asm-* | sed 's/.*-//'
+
+INSTALL_HDR_PATH indicates where to install the headers. It defaults to
+"./usr".
+
+An 'include' directory is automatically created inside INSTALL_HDR_PATH and
+headers are installed in 'INSTALL_HDR_PATH/include'.
+
+The command "make headers_install_all" exports headers for all architectures
+simultaneously.  (This is mostly of interest to distribution maintainers,
+who create an architecture-independent tarball from the resulting include
+directory.)  You also can use HDR_ARCH_LIST to specify list of architectures.
+Remember to provide the appropriate linux/asm directory via "mv" or "ln -s"
+before building a C library with headers exported this way.
+
+The kernel header export infrastructure is maintained by David Woodhouse
+<dwmw2@infradead.org>.
diff --git a/Documentation/kbuild/headers_install.txt b/Documentation/kbuild/headers_install.txt
deleted file mode 100644
index f0153adb95e2..000000000000
--- a/Documentation/kbuild/headers_install.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-Exporting kernel headers for use by userspace
-=============================================
-
-The "make headers_install" command exports the kernel's header files in a
-form suitable for use by userspace programs.
-
-The linux kernel's exported header files describe the API for user space
-programs attempting to use kernel services.  These kernel header files are
-used by the system's C library (such as glibc or uClibc) to define available
-system calls, as well as constants and structures to be used with these
-system calls.  The C library's header files include the kernel header files
-from the "linux" subdirectory.  The system's libc headers are usually
-installed at the default location /usr/include and the kernel headers in
-subdirectories under that (most notably /usr/include/linux and
-/usr/include/asm).
-
-Kernel headers are backwards compatible, but not forwards compatible.  This
-means that a program built against a C library using older kernel headers
-should run on a newer kernel (although it may not have access to new
-features), but a program built against newer kernel headers may not work on an
-older kernel.
-
-The "make headers_install" command can be run in the top level directory of the
-kernel source code (or using a standard out-of-tree build).  It takes two
-optional arguments:
-
-  make headers_install ARCH=i386 INSTALL_HDR_PATH=/usr
-
-ARCH indicates which architecture to produce headers for, and defaults to the
-current architecture.  The linux/asm directory of the exported kernel headers
-is platform-specific, to see a complete list of supported architectures use
-the command:
-
-  ls -d include/asm-* | sed 's/.*-//'
-
-INSTALL_HDR_PATH indicates where to install the headers. It defaults to
-"./usr".
-
-An 'include' directory is automatically created inside INSTALL_HDR_PATH and
-headers are installed in 'INSTALL_HDR_PATH/include'.
-
-The command "make headers_install_all" exports headers for all architectures
-simultaneously.  (This is mostly of interest to distribution maintainers,
-who create an architecture-independent tarball from the resulting include
-directory.)  You also can use HDR_ARCH_LIST to specify list of architectures.
-Remember to provide the appropriate linux/asm directory via "mv" or "ln -s"
-before building a C library with headers exported this way.
-
-The kernel header export infrastructure is maintained by David Woodhouse
-<dwmw2@infradead.org>.
diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst
new file mode 100644
index 000000000000..42d4cbe4460c
--- /dev/null
+++ b/Documentation/kbuild/index.rst
@@ -0,0 +1,27 @@
+:orphan:
+
+===================
+Kernel Build System
+===================
+
+.. toctree::
+    :maxdepth: 1
+
+    kconfig-language
+    kconfig-macro-language
+
+    kbuild
+    kconfig
+    makefiles
+    modules
+
+    headers_install
+
+    issues
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/kbuild/issues.rst b/Documentation/kbuild/issues.rst
new file mode 100644
index 000000000000..9fdded4b681c
--- /dev/null
+++ b/Documentation/kbuild/issues.rst
@@ -0,0 +1,11 @@
+Recursion issue #1
+------------------
+
+ .. include:: Kconfig.recursion-issue-01
+    :literal:
+
+Recursion issue #2
+------------------
+
+ .. include:: Kconfig.recursion-issue-02
+    :literal:
diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst
new file mode 100644
index 000000000000..e774e760522d
--- /dev/null
+++ b/Documentation/kbuild/kbuild.rst
@@ -0,0 +1,265 @@
+======
+Kbuild
+======
+
+
+Output files
+============
+
+modules.order
+-------------
+This file records the order in which modules appear in Makefiles. This
+is used by modprobe to deterministically resolve aliases that match
+multiple modules.
+
+modules.builtin
+---------------
+This file lists all modules that are built into the kernel. This is used
+by modprobe to not fail when trying to load something builtin.
+
+modules.builtin.modinfo
+--------------------------------------------------
+This file contains modinfo from all modules that are built into the kernel.
+Unlike modinfo of a separate module, all fields are prefixed with module name.
+
+
+Environment variables
+=====================
+
+KCPPFLAGS
+---------
+Additional options to pass when preprocessing. The preprocessing options
+will be used in all cases where kbuild does preprocessing including
+building C files and assembler files.
+
+KAFLAGS
+-------
+Additional options to the assembler (for built-in and modules).
+
+AFLAGS_MODULE
+-------------
+Additional module specific options to use for $(AS).
+
+AFLAGS_KERNEL
+-------------
+Additional options for $(AS) when used for assembler
+code for code that is compiled as built-in.
+
+KCFLAGS
+-------
+Additional options to the C compiler (for built-in and modules).
+
+CFLAGS_KERNEL
+-------------
+Additional options for $(CC) when used to compile
+code that is compiled as built-in.
+
+CFLAGS_MODULE
+-------------
+Additional module specific options to use for $(CC).
+
+LDFLAGS_MODULE
+--------------
+Additional options used for $(LD) when linking modules.
+
+HOSTCFLAGS
+----------
+Additional flags to be passed to $(HOSTCC) when building host programs.
+
+HOSTCXXFLAGS
+------------
+Additional flags to be passed to $(HOSTCXX) when building host programs.
+
+HOSTLDFLAGS
+-----------
+Additional flags to be passed when linking host programs.
+
+HOSTLDLIBS
+----------
+Additional libraries to link against when building host programs.
+
+KBUILD_KCONFIG
+--------------
+Set the top-level Kconfig file to the value of this environment
+variable.  The default name is "Kconfig".
+
+KBUILD_VERBOSE
+--------------
+Set the kbuild verbosity. Can be assigned same values as "V=...".
+
+See make help for the full list.
+
+Setting "V=..." takes precedence over KBUILD_VERBOSE.
+
+KBUILD_EXTMOD
+-------------
+Set the directory to look for the kernel source when building external
+modules.
+
+Setting "M=..." takes precedence over KBUILD_EXTMOD.
+
+KBUILD_OUTPUT
+-------------
+Specify the output directory when building the kernel.
+
+The output directory can also be specified using "O=...".
+
+Setting "O=..." takes precedence over KBUILD_OUTPUT.
+
+KBUILD_DEBARCH
+--------------
+For the deb-pkg target, allows overriding the normal heuristics deployed by
+deb-pkg. Normally deb-pkg attempts to guess the right architecture based on
+the UTS_MACHINE variable, and on some architectures also the kernel config.
+The value of KBUILD_DEBARCH is assumed (not checked) to be a valid Debian
+architecture.
+
+ARCH
+----
+Set ARCH to the architecture to be built.
+
+In most cases the name of the architecture is the same as the
+directory name found in the arch/ directory.
+
+But some architectures such as x86 and sparc have aliases.
+
+- x86: i386 for 32 bit, x86_64 for 64 bit
+- sh: sh for 32 bit, sh64 for 64 bit
+- sparc: sparc32 for 32 bit, sparc64 for 64 bit
+
+CROSS_COMPILE
+-------------
+Specify an optional fixed part of the binutils filename.
+CROSS_COMPILE can be a part of the filename or the full path.
+
+CROSS_COMPILE is also used for ccache in some setups.
+
+CF
+--
+Additional options for sparse.
+
+CF is often used on the command-line like this::
+
+    make CF=-Wbitwise C=2
+
+INSTALL_PATH
+------------
+INSTALL_PATH specifies where to place the updated kernel and system map
+images. Default is /boot, but you can set it to other values.
+
+INSTALLKERNEL
+-------------
+Install script called when using "make install".
+The default name is "installkernel".
+
+The script will be called with the following arguments:
+   - $1 - kernel version
+   - $2 - kernel image file
+   - $3 - kernel map file
+   - $4 - default install path (use root directory if blank)
+
+The implementation of "make install" is architecture specific
+and it may differ from the above.
+
+INSTALLKERNEL is provided to enable the possibility to
+specify a custom installer when cross compiling a kernel.
+
+MODLIB
+------
+Specify where to install modules.
+The default value is::
+
+     $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE)
+
+The value can be overridden in which case the default value is ignored.
+
+INSTALL_MOD_PATH
+----------------
+INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory
+relocations required by build roots.  This is not defined in the
+makefile but the argument can be passed to make if needed.
+
+INSTALL_MOD_STRIP
+-----------------
+INSTALL_MOD_STRIP, if defined, will cause modules to be
+stripped after they are installed.  If INSTALL_MOD_STRIP is '1', then
+the default option --strip-debug will be used.  Otherwise,
+INSTALL_MOD_STRIP value will be used as the options to the strip command.
+
+INSTALL_HDR_PATH
+----------------
+INSTALL_HDR_PATH specifies where to install user space headers when
+executing "make headers_*".
+
+The default value is::
+
+    $(objtree)/usr
+
+$(objtree) is the directory where output files are saved.
+The output directory is often set using "O=..." on the commandline.
+
+The value can be overridden in which case the default value is ignored.
+
+KBUILD_SIGN_PIN
+---------------
+This variable allows a passphrase or PIN to be passed to the sign-file
+utility when signing kernel modules, if the private key requires such.
+
+KBUILD_MODPOST_WARN
+-------------------
+KBUILD_MODPOST_WARN can be set to avoid errors in case of undefined
+symbols in the final module linking stage. It changes such errors
+into warnings.
+
+KBUILD_MODPOST_NOFINAL
+----------------------
+KBUILD_MODPOST_NOFINAL can be set to skip the final link of modules.
+This is solely useful to speed up test compiles.
+
+KBUILD_EXTRA_SYMBOLS
+--------------------
+For modules that use symbols from other modules.
+See more details in modules.txt.
+
+ALLSOURCE_ARCHS
+---------------
+For tags/TAGS/cscope targets, you can specify more than one arch
+to be included in the databases, separated by blank space. E.g.::
+
+    $ make ALLSOURCE_ARCHS="x86 mips arm" tags
+
+To get all available archs you can also specify all. E.g.::
+
+    $ make ALLSOURCE_ARCHS=all tags
+
+KBUILD_ENABLE_EXTRA_GCC_CHECKS
+------------------------------
+If enabled over the make command line with "W=1", it turns on additional
+gcc -W... options for more extensive build-time checking.
+
+KBUILD_BUILD_TIMESTAMP
+----------------------
+Setting this to a date string overrides the timestamp used in the
+UTS_VERSION definition (uname -v in the running kernel). The value has to
+be a string that can be passed to date -d. The default value
+is the output of the date command at one point during build.
+
+KBUILD_BUILD_USER, KBUILD_BUILD_HOST
+------------------------------------
+These two variables allow to override the user@host string displayed during
+boot and in /proc/version. The default value is the output of the commands
+whoami and host, respectively.
+
+KBUILD_LDS
+----------
+The linker script with full path. Assigned by the top-level Makefile.
+
+KBUILD_VMLINUX_OBJS
+-------------------
+All object files for vmlinux. They are linked to vmlinux in the same
+order as listed in KBUILD_VMLINUX_OBJS.
+
+KBUILD_VMLINUX_LIBS
+-------------------
+All .a "lib" files for vmlinux. KBUILD_VMLINUX_OBJS and KBUILD_VMLINUX_LIBS
+together specify all the object files used to link vmlinux.
diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
deleted file mode 100644
index 9c230ea71963..000000000000
--- a/Documentation/kbuild/kbuild.txt
+++ /dev/null
@@ -1,248 +0,0 @@
-Output files
-
-modules.order
---------------------------------------------------
-This file records the order in which modules appear in Makefiles. This
-is used by modprobe to deterministically resolve aliases that match
-multiple modules.
-
-modules.builtin
---------------------------------------------------
-This file lists all modules that are built into the kernel. This is used
-by modprobe to not fail when trying to load something builtin.
-
-modules.builtin.modinfo
---------------------------------------------------
-This file contains modinfo from all modules that are built into the kernel.
-Unlike modinfo of a separate module, all fields are prefixed with module name.
-
-
-Environment variables
-
-KCPPFLAGS
---------------------------------------------------
-Additional options to pass when preprocessing. The preprocessing options
-will be used in all cases where kbuild does preprocessing including
-building C files and assembler files.
-
-KAFLAGS
---------------------------------------------------
-Additional options to the assembler (for built-in and modules).
-
-AFLAGS_MODULE
---------------------------------------------------
-Additional module specific options to use for $(AS).
-
-AFLAGS_KERNEL
---------------------------------------------------
-Additional options for $(AS) when used for assembler
-code for code that is compiled as built-in.
-
-KCFLAGS
---------------------------------------------------
-Additional options to the C compiler (for built-in and modules).
-
-CFLAGS_KERNEL
---------------------------------------------------
-Additional options for $(CC) when used to compile
-code that is compiled as built-in.
-
-CFLAGS_MODULE
---------------------------------------------------
-Additional module specific options to use for $(CC).
-
-LDFLAGS_MODULE
---------------------------------------------------
-Additional options used for $(LD) when linking modules.
-
-HOSTCFLAGS
---------------------------------------------------
-Additional flags to be passed to $(HOSTCC) when building host programs.
-
-HOSTCXXFLAGS
---------------------------------------------------
-Additional flags to be passed to $(HOSTCXX) when building host programs.
-
-HOSTLDFLAGS
---------------------------------------------------
-Additional flags to be passed when linking host programs.
-
-HOSTLDLIBS
---------------------------------------------------
-Additional libraries to link against when building host programs.
-
-KBUILD_KCONFIG
---------------------------------------------------
-Set the top-level Kconfig file to the value of this environment
-variable.  The default name is "Kconfig".
-
-KBUILD_VERBOSE
---------------------------------------------------
-Set the kbuild verbosity. Can be assigned same values as "V=...".
-See make help for the full list.
-Setting "V=..." takes precedence over KBUILD_VERBOSE.
-
-KBUILD_EXTMOD
---------------------------------------------------
-Set the directory to look for the kernel source when building external
-modules.
-Setting "M=..." takes precedence over KBUILD_EXTMOD.
-
-KBUILD_OUTPUT
---------------------------------------------------
-Specify the output directory when building the kernel.
-The output directory can also be specified using "O=...".
-Setting "O=..." takes precedence over KBUILD_OUTPUT.
-
-KBUILD_DEBARCH
---------------------------------------------------
-For the deb-pkg target, allows overriding the normal heuristics deployed by
-deb-pkg. Normally deb-pkg attempts to guess the right architecture based on
-the UTS_MACHINE variable, and on some architectures also the kernel config.
-The value of KBUILD_DEBARCH is assumed (not checked) to be a valid Debian
-architecture.
-
-ARCH
---------------------------------------------------
-Set ARCH to the architecture to be built.
-In most cases the name of the architecture is the same as the
-directory name found in the arch/ directory.
-But some architectures such as x86 and sparc have aliases.
-x86: i386 for 32 bit, x86_64 for 64 bit
-sh: sh for 32 bit, sh64 for 64 bit
-sparc: sparc32 for 32 bit, sparc64 for 64 bit
-
-CROSS_COMPILE
---------------------------------------------------
-Specify an optional fixed part of the binutils filename.
-CROSS_COMPILE can be a part of the filename or the full path.
-
-CROSS_COMPILE is also used for ccache in some setups.
-
-CF
---------------------------------------------------
-Additional options for sparse.
-CF is often used on the command-line like this:
-
-    make CF=-Wbitwise C=2
-
-INSTALL_PATH
---------------------------------------------------
-INSTALL_PATH specifies where to place the updated kernel and system map
-images. Default is /boot, but you can set it to other values.
-
-INSTALLKERNEL
---------------------------------------------------
-Install script called when using "make install".
-The default name is "installkernel".
-
-The script will be called with the following arguments:
-    $1 - kernel version
-    $2 - kernel image file
-    $3 - kernel map file
-    $4 - default install path (use root directory if blank)
-
-The implementation of "make install" is architecture specific
-and it may differ from the above.
-
-INSTALLKERNEL is provided to enable the possibility to
-specify a custom installer when cross compiling a kernel.
-
-MODLIB
---------------------------------------------------
-Specify where to install modules.
-The default value is:
-
-     $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE)
-
-The value can be overridden in which case the default value is ignored.
-
-INSTALL_MOD_PATH
---------------------------------------------------
-INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory
-relocations required by build roots.  This is not defined in the
-makefile but the argument can be passed to make if needed.
-
-INSTALL_MOD_STRIP
---------------------------------------------------
-INSTALL_MOD_STRIP, if defined, will cause modules to be
-stripped after they are installed.  If INSTALL_MOD_STRIP is '1', then
-the default option --strip-debug will be used.  Otherwise,
-INSTALL_MOD_STRIP value will be used as the options to the strip command.
-
-INSTALL_HDR_PATH
---------------------------------------------------
-INSTALL_HDR_PATH specifies where to install user space headers when
-executing "make headers_*".
-The default value is:
-
-    $(objtree)/usr
-
-$(objtree) is the directory where output files are saved.
-The output directory is often set using "O=..." on the commandline.
-
-The value can be overridden in which case the default value is ignored.
-
-KBUILD_SIGN_PIN
---------------------------------------------------
-This variable allows a passphrase or PIN to be passed to the sign-file
-utility when signing kernel modules, if the private key requires such.
-
-KBUILD_MODPOST_WARN
---------------------------------------------------
-KBUILD_MODPOST_WARN can be set to avoid errors in case of undefined
-symbols in the final module linking stage. It changes such errors
-into warnings.
-
-KBUILD_MODPOST_NOFINAL
---------------------------------------------------
-KBUILD_MODPOST_NOFINAL can be set to skip the final link of modules.
-This is solely useful to speed up test compiles.
-
-KBUILD_EXTRA_SYMBOLS
---------------------------------------------------
-For modules that use symbols from other modules.
-See more details in modules.txt.
-
-ALLSOURCE_ARCHS
---------------------------------------------------
-For tags/TAGS/cscope targets, you can specify more than one arch
-to be included in the databases, separated by blank space. E.g.:
-
-    $ make ALLSOURCE_ARCHS="x86 mips arm" tags
-
-To get all available archs you can also specify all. E.g.:
-
-    $ make ALLSOURCE_ARCHS=all tags
-
-KBUILD_ENABLE_EXTRA_GCC_CHECKS
---------------------------------------------------
-If enabled over the make command line with "W=1", it turns on additional
-gcc -W... options for more extensive build-time checking.
-
-KBUILD_BUILD_TIMESTAMP
---------------------------------------------------
-Setting this to a date string overrides the timestamp used in the
-UTS_VERSION definition (uname -v in the running kernel). The value has to
-be a string that can be passed to date -d. The default value
-is the output of the date command at one point during build.
-
-KBUILD_BUILD_USER, KBUILD_BUILD_HOST
---------------------------------------------------
-These two variables allow to override the user@host string displayed during
-boot and in /proc/version. The default value is the output of the commands
-whoami and host, respectively.
-
-KBUILD_LDS
---------------------------------------------------
-The linker script with full path. Assigned by the top-level Makefile.
-
-KBUILD_VMLINUX_OBJS
---------------------------------------------------
-All object files for vmlinux. They are linked to vmlinux in the same
-order as listed in KBUILD_VMLINUX_OBJS.
-
-KBUILD_VMLINUX_LIBS
---------------------------------------------------
-All .a "lib" files for vmlinux. KBUILD_VMLINUX_OBJS and KBUILD_VMLINUX_LIBS
-together specify all the object files used to link vmlinux.
diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst
new file mode 100644
index 000000000000..2bc8a7803365
--- /dev/null
+++ b/Documentation/kbuild/kconfig-language.rst
@@ -0,0 +1,689 @@
+================
+Kconfig Language
+================
+
+Introduction
+------------
+
+The configuration database is a collection of configuration options
+organized in a tree structure::
+
+	+- Code maturity level options
+	|  +- Prompt for development and/or incomplete code/drivers
+	+- General setup
+	|  +- Networking support
+	|  +- System V IPC
+	|  +- BSD Process Accounting
+	|  +- Sysctl support
+	+- Loadable module support
+	|  +- Enable loadable module support
+	|     +- Set version information on all module symbols
+	|     +- Kernel module loader
+	+- ...
+
+Every entry has its own dependencies. These dependencies are used
+to determine the visibility of an entry. Any child entry is only
+visible if its parent entry is also visible.
+
+Menu entries
+------------
+
+Most entries define a config option; all other entries help to organize
+them. A single configuration option is defined like this::
+
+  config MODVERSIONS
+	bool "Set version information on all module symbols"
+	depends on MODULES
+	help
+	  Usually, modules have to be recompiled whenever you switch to a new
+	  kernel.  ...
+
+Every line starts with a key word and can be followed by multiple
+arguments.  "config" starts a new config entry. The following lines
+define attributes for this config option. Attributes can be the type of
+the config option, input prompt, dependencies, help text and default
+values. A config option can be defined multiple times with the same
+name, but every definition can have only a single input prompt and the
+type must not conflict.
+
+Menu attributes
+---------------
+
+A menu entry can have a number of attributes. Not all of them are
+applicable everywhere (see syntax).
+
+- type definition: "bool"/"tristate"/"string"/"hex"/"int"
+  Every config option must have a type. There are only two basic types:
+  tristate and string; the other types are based on these two. The type
+  definition optionally accepts an input prompt, so these two examples
+  are equivalent::
+
+	bool "Networking support"
+
+  and::
+
+	bool
+	prompt "Networking support"
+
+- input prompt: "prompt" <prompt> ["if" <expr>]
+  Every menu entry can have at most one prompt, which is used to display
+  to the user. Optionally dependencies only for this prompt can be added
+  with "if".
+
+- default value: "default" <expr> ["if" <expr>]
+  A config option can have any number of default values. If multiple
+  default values are visible, only the first defined one is active.
+  Default values are not limited to the menu entry where they are
+  defined. This means the default can be defined somewhere else or be
+  overridden by an earlier definition.
+  The default value is only assigned to the config symbol if no other
+  value was set by the user (via the input prompt above). If an input
+  prompt is visible the default value is presented to the user and can
+  be overridden by him.
+  Optionally, dependencies only for this default value can be added with
+  "if".
+
+ The default value deliberately defaults to 'n' in order to avoid bloating the
+ build. With few exceptions, new config options should not change this. The
+ intent is for "make oldconfig" to add as little as possible to the config from
+ release to release.
+
+ Note:
+	Things that merit "default y/m" include:
+
+	a) A new Kconfig option for something that used to always be built
+	   should be "default y".
+
+	b) A new gatekeeping Kconfig option that hides/shows other Kconfig
+	   options (but does not generate any code of its own), should be
+	   "default y" so people will see those other options.
+
+	c) Sub-driver behavior or similar options for a driver that is
+	   "default n". This allows you to provide sane defaults.
+
+	d) Hardware or infrastructure that everybody expects, such as CONFIG_NET
+	   or CONFIG_BLOCK. These are rare exceptions.
+
+- type definition + default value::
+
+	"def_bool"/"def_tristate" <expr> ["if" <expr>]
+
+  This is a shorthand notation for a type definition plus a value.
+  Optionally dependencies for this default value can be added with "if".
+
+- dependencies: "depends on" <expr>
+  This defines a dependency for this menu entry. If multiple
+  dependencies are defined, they are connected with '&&'. Dependencies
+  are applied to all other options within this menu entry (which also
+  accept an "if" expression), so these two examples are equivalent::
+
+	bool "foo" if BAR
+	default y if BAR
+
+  and::
+
+	depends on BAR
+	bool "foo"
+	default y
+
+- reverse dependencies: "select" <symbol> ["if" <expr>]
+  While normal dependencies reduce the upper limit of a symbol (see
+  below), reverse dependencies can be used to force a lower limit of
+  another symbol. The value of the current menu symbol is used as the
+  minimal value <symbol> can be set to. If <symbol> is selected multiple
+  times, the limit is set to the largest selection.
+  Reverse dependencies can only be used with boolean or tristate
+  symbols.
+
+  Note:
+	select should be used with care. select will force
+	a symbol to a value without visiting the dependencies.
+	By abusing select you are able to select a symbol FOO even
+	if FOO depends on BAR that is not set.
+	In general use select only for non-visible symbols
+	(no prompts anywhere) and for symbols with no dependencies.
+	That will limit the usefulness but on the other hand avoid
+	the illegal configurations all over.
+
+- weak reverse dependencies: "imply" <symbol> ["if" <expr>]
+  This is similar to "select" as it enforces a lower limit on another
+  symbol except that the "implied" symbol's value may still be set to n
+  from a direct dependency or with a visible prompt.
+
+  Given the following example::
+
+    config FOO
+	tristate
+	imply BAZ
+
+    config BAZ
+	tristate
+	depends on BAR
+
+  The following values are possible:
+
+	===		===		=============	==============
+	FOO		BAR		BAZ's default	choice for BAZ
+	===		===		=============	==============
+	n		y		n		N/m/y
+	m		y		m		M/y/n
+	y		y		y		Y/n
+	y		n		*		N
+	===		===		=============	==============
+
+  This is useful e.g. with multiple drivers that want to indicate their
+  ability to hook into a secondary subsystem while allowing the user to
+  configure that subsystem out without also having to unset these drivers.
+
+- limiting menu display: "visible if" <expr>
+  This attribute is only applicable to menu blocks, if the condition is
+  false, the menu block is not displayed to the user (the symbols
+  contained there can still be selected by other symbols, though). It is
+  similar to a conditional "prompt" attribute for individual menu
+  entries. Default value of "visible" is true.
+
+- numerical ranges: "range" <symbol> <symbol> ["if" <expr>]
+  This allows to limit the range of possible input values for int
+  and hex symbols. The user can only input a value which is larger than
+  or equal to the first symbol and smaller than or equal to the second
+  symbol.
+
+- help text: "help" or "---help---"
+  This defines a help text. The end of the help text is determined by
+  the indentation level, this means it ends at the first line which has
+  a smaller indentation than the first line of the help text.
+  "---help---" and "help" do not differ in behaviour, "---help---" is
+  used to help visually separate configuration logic from help within
+  the file as an aid to developers.
+
+- misc options: "option" <symbol>[=<value>]
+  Various less common options can be defined via this option syntax,
+  which can modify the behaviour of the menu entry and its config
+  symbol. These options are currently possible:
+
+  - "defconfig_list"
+    This declares a list of default entries which can be used when
+    looking for the default configuration (which is used when the main
+    .config doesn't exists yet.)
+
+  - "modules"
+    This declares the symbol to be used as the MODULES symbol, which
+    enables the third modular state for all config symbols.
+    At most one symbol may have the "modules" option set.
+
+  - "allnoconfig_y"
+    This declares the symbol as one that should have the value y when
+    using "allnoconfig". Used for symbols that hide other symbols.
+
+Menu dependencies
+-----------------
+
+Dependencies define the visibility of a menu entry and can also reduce
+the input range of tristate symbols. The tristate logic used in the
+expressions uses one more state than normal boolean logic to express the
+module state. Dependency expressions have the following syntax::
+
+  <expr> ::= <symbol>                           (1)
+           <symbol> '=' <symbol>                (2)
+           <symbol> '!=' <symbol>               (3)
+           <symbol1> '<' <symbol2>              (4)
+           <symbol1> '>' <symbol2>              (4)
+           <symbol1> '<=' <symbol2>             (4)
+           <symbol1> '>=' <symbol2>             (4)
+           '(' <expr> ')'                       (5)
+           '!' <expr>                           (6)
+           <expr> '&&' <expr>                   (7)
+           <expr> '||' <expr>                   (8)
+
+Expressions are listed in decreasing order of precedence.
+
+(1) Convert the symbol into an expression. Boolean and tristate symbols
+    are simply converted into the respective expression values. All
+    other symbol types result in 'n'.
+(2) If the values of both symbols are equal, it returns 'y',
+    otherwise 'n'.
+(3) If the values of both symbols are equal, it returns 'n',
+    otherwise 'y'.
+(4) If value of <symbol1> is respectively lower, greater, lower-or-equal,
+    or greater-or-equal than value of <symbol2>, it returns 'y',
+    otherwise 'n'.
+(5) Returns the value of the expression. Used to override precedence.
+(6) Returns the result of (2-/expr/).
+(7) Returns the result of min(/expr/, /expr/).
+(8) Returns the result of max(/expr/, /expr/).
+
+An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
+respectively for calculations). A menu entry becomes visible when its
+expression evaluates to 'm' or 'y'.
+
+There are two types of symbols: constant and non-constant symbols.
+Non-constant symbols are the most common ones and are defined with the
+'config' statement. Non-constant symbols consist entirely of alphanumeric
+characters or underscores.
+Constant symbols are only part of expressions. Constant symbols are
+always surrounded by single or double quotes. Within the quote, any
+other character is allowed and the quotes can be escaped using '\'.
+
+Menu structure
+--------------
+
+The position of a menu entry in the tree is determined in two ways. First
+it can be specified explicitly::
+
+  menu "Network device support"
+	depends on NET
+
+  config NETDEVICES
+	...
+
+  endmenu
+
+All entries within the "menu" ... "endmenu" block become a submenu of
+"Network device support". All subentries inherit the dependencies from
+the menu entry, e.g. this means the dependency "NET" is added to the
+dependency list of the config option NETDEVICES.
+
+The other way to generate the menu structure is done by analyzing the
+dependencies. If a menu entry somehow depends on the previous entry, it
+can be made a submenu of it. First, the previous (parent) symbol must
+be part of the dependency list and then one of these two conditions
+must be true:
+
+- the child entry must become invisible, if the parent is set to 'n'
+- the child entry must only be visible, if the parent is visible::
+
+    config MODULES
+	bool "Enable loadable module support"
+
+    config MODVERSIONS
+	bool "Set version information on all module symbols"
+	depends on MODULES
+
+    comment "module support disabled"
+	depends on !MODULES
+
+MODVERSIONS directly depends on MODULES, this means it's only visible if
+MODULES is different from 'n'. The comment on the other hand is only
+visible when MODULES is set to 'n'.
+
+
+Kconfig syntax
+--------------
+
+The configuration file describes a series of menu entries, where every
+line starts with a keyword (except help texts). The following keywords
+end a menu entry:
+
+- config
+- menuconfig
+- choice/endchoice
+- comment
+- menu/endmenu
+- if/endif
+- source
+
+The first five also start the definition of a menu entry.
+
+config::
+	"config" <symbol>
+	<config options>
+
+This defines a config symbol <symbol> and accepts any of above
+attributes as options.
+
+menuconfig::
+	"menuconfig" <symbol>
+	<config options>
+
+This is similar to the simple config entry above, but it also gives a
+hint to front ends, that all suboptions should be displayed as a
+separate list of options. To make sure all the suboptions will really
+show up under the menuconfig entry and not outside of it, every item
+from the <config options> list must depend on the menuconfig symbol.
+In practice, this is achieved by using one of the next two constructs::
+
+  (1):
+  menuconfig M
+  if M
+      config C1
+      config C2
+  endif
+
+  (2):
+  menuconfig M
+  config C1
+      depends on M
+  config C2
+      depends on M
+
+In the following examples (3) and (4), C1 and C2 still have the M
+dependency, but will not appear under menuconfig M anymore, because
+of C0, which doesn't depend on M::
+
+  (3):
+  menuconfig M
+      config C0
+  if M
+      config C1
+      config C2
+  endif
+
+  (4):
+  menuconfig M
+  config C0
+  config C1
+      depends on M
+  config C2
+      depends on M
+
+choices::
+
+	"choice" [symbol]
+	<choice options>
+	<choice block>
+	"endchoice"
+
+This defines a choice group and accepts any of the above attributes as
+options. A choice can only be of type bool or tristate.  If no type is
+specified for a choice, its type will be determined by the type of
+the first choice element in the group or remain unknown if none of the
+choice elements have a type specified, as well.
+
+While a boolean choice only allows a single config entry to be
+selected, a tristate choice also allows any number of config entries
+to be set to 'm'. This can be used if multiple drivers for a single
+hardware exists and only a single driver can be compiled/loaded into
+the kernel, but all drivers can be compiled as modules.
+
+A choice accepts another option "optional", which allows to set the
+choice to 'n' and no entry needs to be selected.
+If no [symbol] is associated with a choice, then you can not have multiple
+definitions of that choice. If a [symbol] is associated to the choice,
+then you may define the same choice (i.e. with the same entries) in another
+place.
+
+comment::
+
+	"comment" <prompt>
+	<comment options>
+
+This defines a comment which is displayed to the user during the
+configuration process and is also echoed to the output files. The only
+possible options are dependencies.
+
+menu::
+
+	"menu" <prompt>
+	<menu options>
+	<menu block>
+	"endmenu"
+
+This defines a menu block, see "Menu structure" above for more
+information. The only possible options are dependencies and "visible"
+attributes.
+
+if::
+
+	"if" <expr>
+	<if block>
+	"endif"
+
+This defines an if block. The dependency expression <expr> is appended
+to all enclosed menu entries.
+
+source::
+
+	"source" <prompt>
+
+This reads the specified configuration file. This file is always parsed.
+
+mainmenu::
+
+	"mainmenu" <prompt>
+
+This sets the config program's title bar if the config program chooses
+to use it. It should be placed at the top of the configuration, before any
+other statement.
+
+'#' Kconfig source file comment:
+
+An unquoted '#' character anywhere in a source file line indicates
+the beginning of a source file comment.  The remainder of that line
+is a comment.
+
+
+Kconfig hints
+-------------
+This is a collection of Kconfig tips, most of which aren't obvious at
+first glance and most of which have become idioms in several Kconfig
+files.
+
+Adding common features and make the usage configurable
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It is a common idiom to implement a feature/functionality that are
+relevant for some architectures but not all.
+The recommended way to do so is to use a config variable named HAVE_*
+that is defined in a common Kconfig file and selected by the relevant
+architectures.
+An example is the generic IOMAP functionality.
+
+We would in lib/Kconfig see::
+
+  # Generic IOMAP is used to ...
+  config HAVE_GENERIC_IOMAP
+
+  config GENERIC_IOMAP
+	depends on HAVE_GENERIC_IOMAP && FOO
+
+And in lib/Makefile we would see::
+
+	obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
+
+For each architecture using the generic IOMAP functionality we would see::
+
+  config X86
+	select ...
+	select HAVE_GENERIC_IOMAP
+	select ...
+
+Note: we use the existing config option and avoid creating a new
+config variable to select HAVE_GENERIC_IOMAP.
+
+Note: the use of the internal config variable HAVE_GENERIC_IOMAP, it is
+introduced to overcome the limitation of select which will force a
+config option to 'y' no matter the dependencies.
+The dependencies are moved to the symbol GENERIC_IOMAP and we avoid the
+situation where select forces a symbol equals to 'y'.
+
+Adding features that need compiler support
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There are several features that need compiler support. The recommended way
+to describe the dependency on the compiler feature is to use "depends on"
+followed by a test macro::
+
+  config STACKPROTECTOR
+	bool "Stack Protector buffer overflow detection"
+	depends on $(cc-option,-fstack-protector)
+	...
+
+If you need to expose a compiler capability to makefiles and/or C source files,
+`CC_HAS_` is the recommended prefix for the config option::
+
+  config CC_HAS_STACKPROTECTOR_NONE
+	def_bool $(cc-option,-fno-stack-protector)
+
+Build as module only
+~~~~~~~~~~~~~~~~~~~~
+To restrict a component build to module-only, qualify its config symbol
+with "depends on m".  E.g.::
+
+  config FOO
+	depends on BAR && m
+
+limits FOO to module (=m) or disabled (=n).
+
+Kconfig recursive dependency limitations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you've hit the Kconfig error: "recursive dependency detected" you've run
+into a recursive dependency issue with Kconfig, a recursive dependency can be
+summarized as a circular dependency. The kconfig tools need to ensure that
+Kconfig files comply with specified configuration requirements. In order to do
+that kconfig must determine the values that are possible for all Kconfig
+symbols, this is currently not possible if there is a circular relation
+between two or more Kconfig symbols. For more details refer to the "Simple
+Kconfig recursive issue" subsection below. Kconfig does not do recursive
+dependency resolution; this has a few implications for Kconfig file writers.
+We'll first explain why this issues exists and then provide an example
+technical limitation which this brings upon Kconfig developers. Eager
+developers wishing to try to address this limitation should read the next
+subsections.
+
+Simple Kconfig recursive issue
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Read: Documentation/kbuild/Kconfig.recursion-issue-01
+
+Test with::
+
+  make KBUILD_KCONFIG=Documentation/kbuild/Kconfig.recursion-issue-01 allnoconfig
+
+Cumulative Kconfig recursive issue
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Read: Documentation/kbuild/Kconfig.recursion-issue-02
+
+Test with::
+
+  make KBUILD_KCONFIG=Documentation/kbuild/Kconfig.recursion-issue-02 allnoconfig
+
+Practical solutions to kconfig recursive issue
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Developers who run into the recursive Kconfig issue have two options
+at their disposal. We document them below and also provide a list of
+historical issues resolved through these different solutions.
+
+  a) Remove any superfluous "select FOO" or "depends on FOO"
+  b) Match dependency semantics:
+
+	b1) Swap all "select FOO" to "depends on FOO" or,
+
+	b2) Swap all "depends on FOO" to "select FOO"
+
+The resolution to a) can be tested with the sample Kconfig file
+Documentation/kbuild/Kconfig.recursion-issue-01 through the removal
+of the "select CORE" from CORE_BELL_A_ADVANCED as that is implicit already
+since CORE_BELL_A depends on CORE. At times it may not be possible to remove
+some dependency criteria, for such cases you can work with solution b).
+
+The two different resolutions for b) can be tested in the sample Kconfig file
+Documentation/kbuild/Kconfig.recursion-issue-02.
+
+Below is a list of examples of prior fixes for these types of recursive issues;
+all errors appear to involve one or more select's and one or more "depends on".
+
+============    ===================================
+commit          fix
+============    ===================================
+06b718c01208    select A -> depends on A
+c22eacfe82f9    depends on A -> depends on B
+6a91e854442c    select A -> depends on A
+118c565a8f2e    select A -> select B
+f004e5594705    select A -> depends on A
+c7861f37b4c6    depends on A -> (null)
+80c69915e5fb    select A -> (null)              (1)
+c2218e26c0d0    select A -> depends on A        (1)
+d6ae99d04e1c    select A -> depends on A
+95ca19cf8cbf    select A -> depends on A
+8f057d7bca54    depends on A -> (null)
+8f057d7bca54    depends on A -> select A
+a0701f04846e    select A -> depends on A
+0c8b92f7f259    depends on A -> (null)
+e4e9e0540928    select A -> depends on A        (2)
+7453ea886e87    depends on A > (null)           (1)
+7b1fff7e4fdf    select A -> depends on A
+86c747d2a4f0    select A -> depends on A
+d9f9ab51e55e    select A -> depends on A
+0c51a4d8abd6    depends on A -> select A        (3)
+e98062ed6dc4    select A -> depends on A        (3)
+91e5d284a7f1    select A -> (null)
+============    ===================================
+
+(1) Partial (or no) quote of error.
+(2) That seems to be the gist of that fix.
+(3) Same error.
+
+Future kconfig work
+~~~~~~~~~~~~~~~~~~~
+
+Work on kconfig is welcomed on both areas of clarifying semantics and on
+evaluating the use of a full SAT solver for it. A full SAT solver can be
+desirable to enable more complex dependency mappings and / or queries,
+for instance on possible use case for a SAT solver could be that of handling
+the current known recursive dependency issues. It is not known if this would
+address such issues but such evaluation is desirable. If support for a full SAT
+solver proves too complex or that it cannot address recursive dependency issues
+Kconfig should have at least clear and well defined semantics which also
+addresses and documents limitations or requirements such as the ones dealing
+with recursive dependencies.
+
+Further work on both of these areas is welcomed on Kconfig. We elaborate
+on both of these in the next two subsections.
+
+Semantics of Kconfig
+~~~~~~~~~~~~~~~~~~~~
+
+The use of Kconfig is broad, Linux is now only one of Kconfig's users:
+one study has completed a broad analysis of Kconfig use in 12 projects [0]_.
+Despite its widespread use, and although this document does a reasonable job
+in documenting basic Kconfig syntax a more precise definition of Kconfig
+semantics is welcomed. One project deduced Kconfig semantics through
+the use of the xconfig configurator [1]_. Work should be done to confirm if
+the deduced semantics matches our intended Kconfig design goals.
+
+Having well defined semantics can be useful for tools for practical
+evaluation of depenencies, for instance one such use known case was work to
+express in boolean abstraction of the inferred semantics of Kconfig to
+translate Kconfig logic into boolean formulas and run a SAT solver on this to
+find dead code / features (always inactive), 114 dead features were found in
+Linux using this methodology [1]_ (Section 8: Threats to validity).
+
+Confirming this could prove useful as Kconfig stands as one of the the leading
+industrial variability modeling languages [1]_ [2]_. Its study would help
+evaluate practical uses of such languages, their use was only theoretical
+and real world requirements were not well understood. As it stands though
+only reverse engineering techniques have been used to deduce semantics from
+variability modeling languages such as Kconfig [3]_.
+
+.. [0] http://www.eng.uwaterloo.ca/~shshe/kconfig_semantics.pdf
+.. [1] http://gsd.uwaterloo.ca/sites/default/files/vm-2013-berger.pdf
+.. [2] http://gsd.uwaterloo.ca/sites/default/files/ase241-berger_0.pdf
+.. [3] http://gsd.uwaterloo.ca/sites/default/files/icse2011.pdf
+
+Full SAT solver for Kconfig
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Although SAT solvers [4]_ haven't yet been used by Kconfig directly, as noted
+in the previous subsection, work has been done however to express in boolean
+abstraction the inferred semantics of Kconfig to translate Kconfig logic into
+boolean formulas and run a SAT solver on it [5]_. Another known related project
+is CADOS [6]_ (former VAMOS [7]_) and the tools, mainly undertaker [8]_, which
+has been introduced first with [9]_.  The basic concept of undertaker is to
+exract variability models from Kconfig, and put them together with a
+propositional formula extracted from CPP #ifdefs and build-rules into a SAT
+solver in order to find dead code, dead files, and dead symbols. If using a SAT
+solver is desirable on Kconfig one approach would be to evaluate repurposing
+such efforts somehow on Kconfig. There is enough interest from mentors of
+existing projects to not only help advise how to integrate this work upstream
+but also help maintain it long term. Interested developers should visit:
+
+http://kernelnewbies.org/KernelProjects/kconfig-sat
+
+.. [4] http://www.cs.cornell.edu/~sabhar/chapters/SATSolvers-KR-Handbook.pdf
+.. [5] http://gsd.uwaterloo.ca/sites/default/files/vm-2013-berger.pdf
+.. [6] https://cados.cs.fau.de
+.. [7] https://vamos.cs.fau.de
+.. [8] https://undertaker.cs.fau.de
+.. [9] https://www4.cs.fau.de/Publications/2011/tartler_11_eurosys.pdf
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
deleted file mode 100644
index 864e740811da..000000000000
--- a/Documentation/kbuild/kconfig-language.txt
+++ /dev/null
@@ -1,669 +0,0 @@
-Introduction
-------------
-
-The configuration database is a collection of configuration options
-organized in a tree structure:
-
-	+- Code maturity level options
-	|  +- Prompt for development and/or incomplete code/drivers
-	+- General setup
-	|  +- Networking support
-	|  +- System V IPC
-	|  +- BSD Process Accounting
-	|  +- Sysctl support
-	+- Loadable module support
-	|  +- Enable loadable module support
-	|     +- Set version information on all module symbols
-	|     +- Kernel module loader
-	+- ...
-
-Every entry has its own dependencies. These dependencies are used
-to determine the visibility of an entry. Any child entry is only
-visible if its parent entry is also visible.
-
-Menu entries
-------------
-
-Most entries define a config option; all other entries help to organize
-them. A single configuration option is defined like this:
-
-config MODVERSIONS
-	bool "Set version information on all module symbols"
-	depends on MODULES
-	help
-	  Usually, modules have to be recompiled whenever you switch to a new
-	  kernel.  ...
-
-Every line starts with a key word and can be followed by multiple
-arguments.  "config" starts a new config entry. The following lines
-define attributes for this config option. Attributes can be the type of
-the config option, input prompt, dependencies, help text and default
-values. A config option can be defined multiple times with the same
-name, but every definition can have only a single input prompt and the
-type must not conflict.
-
-Menu attributes
----------------
-
-A menu entry can have a number of attributes. Not all of them are
-applicable everywhere (see syntax).
-
-- type definition: "bool"/"tristate"/"string"/"hex"/"int"
-  Every config option must have a type. There are only two basic types:
-  tristate and string; the other types are based on these two. The type
-  definition optionally accepts an input prompt, so these two examples
-  are equivalent:
-
-	bool "Networking support"
-  and
-	bool
-	prompt "Networking support"
-
-- input prompt: "prompt" <prompt> ["if" <expr>]
-  Every menu entry can have at most one prompt, which is used to display
-  to the user. Optionally dependencies only for this prompt can be added
-  with "if".
-
-- default value: "default" <expr> ["if" <expr>]
-  A config option can have any number of default values. If multiple
-  default values are visible, only the first defined one is active.
-  Default values are not limited to the menu entry where they are
-  defined. This means the default can be defined somewhere else or be
-  overridden by an earlier definition.
-  The default value is only assigned to the config symbol if no other
-  value was set by the user (via the input prompt above). If an input
-  prompt is visible the default value is presented to the user and can
-  be overridden by him.
-  Optionally, dependencies only for this default value can be added with
-  "if".
-
- The default value deliberately defaults to 'n' in order to avoid bloating the
- build. With few exceptions, new config options should not change this. The
- intent is for "make oldconfig" to add as little as possible to the config from
- release to release.
-
- Note:
-	Things that merit "default y/m" include:
-
-	a) A new Kconfig option for something that used to always be built
-	   should be "default y".
-
-	b) A new gatekeeping Kconfig option that hides/shows other Kconfig
-	   options (but does not generate any code of its own), should be
-	   "default y" so people will see those other options.
-
-	c) Sub-driver behavior or similar options for a driver that is
-	   "default n". This allows you to provide sane defaults.
-
-	d) Hardware or infrastructure that everybody expects, such as CONFIG_NET
-	   or CONFIG_BLOCK. These are rare exceptions.
-
-- type definition + default value:
-	"def_bool"/"def_tristate" <expr> ["if" <expr>]
-  This is a shorthand notation for a type definition plus a value.
-  Optionally dependencies for this default value can be added with "if".
-
-- dependencies: "depends on" <expr>
-  This defines a dependency for this menu entry. If multiple
-  dependencies are defined, they are connected with '&&'. Dependencies
-  are applied to all other options within this menu entry (which also
-  accept an "if" expression), so these two examples are equivalent:
-
-	bool "foo" if BAR
-	default y if BAR
-  and
-	depends on BAR
-	bool "foo"
-	default y
-
-- reverse dependencies: "select" <symbol> ["if" <expr>]
-  While normal dependencies reduce the upper limit of a symbol (see
-  below), reverse dependencies can be used to force a lower limit of
-  another symbol. The value of the current menu symbol is used as the
-  minimal value <symbol> can be set to. If <symbol> is selected multiple
-  times, the limit is set to the largest selection.
-  Reverse dependencies can only be used with boolean or tristate
-  symbols.
-  Note:
-	select should be used with care. select will force
-	a symbol to a value without visiting the dependencies.
-	By abusing select you are able to select a symbol FOO even
-	if FOO depends on BAR that is not set.
-	In general use select only for non-visible symbols
-	(no prompts anywhere) and for symbols with no dependencies.
-	That will limit the usefulness but on the other hand avoid
-	the illegal configurations all over.
-
-- weak reverse dependencies: "imply" <symbol> ["if" <expr>]
-  This is similar to "select" as it enforces a lower limit on another
-  symbol except that the "implied" symbol's value may still be set to n
-  from a direct dependency or with a visible prompt.
-
-  Given the following example:
-
-  config FOO
-	tristate
-	imply BAZ
-
-  config BAZ
-	tristate
-	depends on BAR
-
-  The following values are possible:
-
-	FOO		BAR		BAZ's default	choice for BAZ
-	---		---		-------------	--------------
-	n		y		n		N/m/y
-	m		y		m		M/y/n
-	y		y		y		Y/n
-	y		n		*		N
-
-  This is useful e.g. with multiple drivers that want to indicate their
-  ability to hook into a secondary subsystem while allowing the user to
-  configure that subsystem out without also having to unset these drivers.
-
-- limiting menu display: "visible if" <expr>
-  This attribute is only applicable to menu blocks, if the condition is
-  false, the menu block is not displayed to the user (the symbols
-  contained there can still be selected by other symbols, though). It is
-  similar to a conditional "prompt" attribute for individual menu
-  entries. Default value of "visible" is true.
-
-- numerical ranges: "range" <symbol> <symbol> ["if" <expr>]
-  This allows to limit the range of possible input values for int
-  and hex symbols. The user can only input a value which is larger than
-  or equal to the first symbol and smaller than or equal to the second
-  symbol.
-
-- help text: "help" or "---help---"
-  This defines a help text. The end of the help text is determined by
-  the indentation level, this means it ends at the first line which has
-  a smaller indentation than the first line of the help text.
-  "---help---" and "help" do not differ in behaviour, "---help---" is
-  used to help visually separate configuration logic from help within
-  the file as an aid to developers.
-
-- misc options: "option" <symbol>[=<value>]
-  Various less common options can be defined via this option syntax,
-  which can modify the behaviour of the menu entry and its config
-  symbol. These options are currently possible:
-
-  - "defconfig_list"
-    This declares a list of default entries which can be used when
-    looking for the default configuration (which is used when the main
-    .config doesn't exists yet.)
-
-  - "modules"
-    This declares the symbol to be used as the MODULES symbol, which
-    enables the third modular state for all config symbols.
-    At most one symbol may have the "modules" option set.
-
-  - "allnoconfig_y"
-    This declares the symbol as one that should have the value y when
-    using "allnoconfig". Used for symbols that hide other symbols.
-
-Menu dependencies
------------------
-
-Dependencies define the visibility of a menu entry and can also reduce
-the input range of tristate symbols. The tristate logic used in the
-expressions uses one more state than normal boolean logic to express the
-module state. Dependency expressions have the following syntax:
-
-<expr> ::= <symbol>                             (1)
-           <symbol> '=' <symbol>                (2)
-           <symbol> '!=' <symbol>               (3)
-           <symbol1> '<' <symbol2>              (4)
-           <symbol1> '>' <symbol2>              (4)
-           <symbol1> '<=' <symbol2>             (4)
-           <symbol1> '>=' <symbol2>             (4)
-           '(' <expr> ')'                       (5)
-           '!' <expr>                           (6)
-           <expr> '&&' <expr>                   (7)
-           <expr> '||' <expr>                   (8)
-
-Expressions are listed in decreasing order of precedence. 
-
-(1) Convert the symbol into an expression. Boolean and tristate symbols
-    are simply converted into the respective expression values. All
-    other symbol types result in 'n'.
-(2) If the values of both symbols are equal, it returns 'y',
-    otherwise 'n'.
-(3) If the values of both symbols are equal, it returns 'n',
-    otherwise 'y'.
-(4) If value of <symbol1> is respectively lower, greater, lower-or-equal,
-    or greater-or-equal than value of <symbol2>, it returns 'y',
-    otherwise 'n'.
-(5) Returns the value of the expression. Used to override precedence.
-(6) Returns the result of (2-/expr/).
-(7) Returns the result of min(/expr/, /expr/).
-(8) Returns the result of max(/expr/, /expr/).
-
-An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
-respectively for calculations). A menu entry becomes visible when its
-expression evaluates to 'm' or 'y'.
-
-There are two types of symbols: constant and non-constant symbols.
-Non-constant symbols are the most common ones and are defined with the
-'config' statement. Non-constant symbols consist entirely of alphanumeric
-characters or underscores.
-Constant symbols are only part of expressions. Constant symbols are
-always surrounded by single or double quotes. Within the quote, any
-other character is allowed and the quotes can be escaped using '\'.
-
-Menu structure
---------------
-
-The position of a menu entry in the tree is determined in two ways. First
-it can be specified explicitly:
-
-menu "Network device support"
-	depends on NET
-
-config NETDEVICES
-	...
-
-endmenu
-
-All entries within the "menu" ... "endmenu" block become a submenu of
-"Network device support". All subentries inherit the dependencies from
-the menu entry, e.g. this means the dependency "NET" is added to the
-dependency list of the config option NETDEVICES.
-
-The other way to generate the menu structure is done by analyzing the
-dependencies. If a menu entry somehow depends on the previous entry, it
-can be made a submenu of it. First, the previous (parent) symbol must
-be part of the dependency list and then one of these two conditions
-must be true:
-- the child entry must become invisible, if the parent is set to 'n'
-- the child entry must only be visible, if the parent is visible
-
-config MODULES
-	bool "Enable loadable module support"
-
-config MODVERSIONS
-	bool "Set version information on all module symbols"
-	depends on MODULES
-
-comment "module support disabled"
-	depends on !MODULES
-
-MODVERSIONS directly depends on MODULES, this means it's only visible if
-MODULES is different from 'n'. The comment on the other hand is only
-visible when MODULES is set to 'n'.
-
-
-Kconfig syntax
---------------
-
-The configuration file describes a series of menu entries, where every
-line starts with a keyword (except help texts). The following keywords
-end a menu entry:
-- config
-- menuconfig
-- choice/endchoice
-- comment
-- menu/endmenu
-- if/endif
-- source
-The first five also start the definition of a menu entry.
-
-config:
-
-	"config" <symbol>
-	<config options>
-
-This defines a config symbol <symbol> and accepts any of above
-attributes as options.
-
-menuconfig:
-	"menuconfig" <symbol>
-	<config options>
-
-This is similar to the simple config entry above, but it also gives a
-hint to front ends, that all suboptions should be displayed as a
-separate list of options. To make sure all the suboptions will really
-show up under the menuconfig entry and not outside of it, every item
-from the <config options> list must depend on the menuconfig symbol.
-In practice, this is achieved by using one of the next two constructs:
-
-(1):
-menuconfig M
-if M
-    config C1
-    config C2
-endif
-
-(2):
-menuconfig M
-config C1
-    depends on M
-config C2
-    depends on M
-
-In the following examples (3) and (4), C1 and C2 still have the M
-dependency, but will not appear under menuconfig M anymore, because
-of C0, which doesn't depend on M:
-
-(3):
-menuconfig M
-    config C0
-if M
-    config C1
-    config C2
-endif
-
-(4):
-menuconfig M
-config C0
-config C1
-    depends on M
-config C2
-    depends on M
-
-choices:
-
-	"choice" [symbol]
-	<choice options>
-	<choice block>
-	"endchoice"
-
-This defines a choice group and accepts any of the above attributes as
-options. A choice can only be of type bool or tristate.  If no type is
-specified for a choice, its type will be determined by the type of
-the first choice element in the group or remain unknown if none of the
-choice elements have a type specified, as well.
-
-While a boolean choice only allows a single config entry to be
-selected, a tristate choice also allows any number of config entries
-to be set to 'm'. This can be used if multiple drivers for a single
-hardware exists and only a single driver can be compiled/loaded into
-the kernel, but all drivers can be compiled as modules.
-
-A choice accepts another option "optional", which allows to set the
-choice to 'n' and no entry needs to be selected.
-If no [symbol] is associated with a choice, then you can not have multiple
-definitions of that choice. If a [symbol] is associated to the choice,
-then you may define the same choice (i.e. with the same entries) in another
-place.
-
-comment:
-
-	"comment" <prompt>
-	<comment options>
-
-This defines a comment which is displayed to the user during the
-configuration process and is also echoed to the output files. The only
-possible options are dependencies.
-
-menu:
-
-	"menu" <prompt>
-	<menu options>
-	<menu block>
-	"endmenu"
-
-This defines a menu block, see "Menu structure" above for more
-information. The only possible options are dependencies and "visible"
-attributes.
-
-if:
-
-	"if" <expr>
-	<if block>
-	"endif"
-
-This defines an if block. The dependency expression <expr> is appended
-to all enclosed menu entries.
-
-source:
-
-	"source" <prompt>
-
-This reads the specified configuration file. This file is always parsed.
-
-mainmenu:
-
-	"mainmenu" <prompt>
-
-This sets the config program's title bar if the config program chooses
-to use it. It should be placed at the top of the configuration, before any
-other statement.
-
-'#' Kconfig source file comment:
-
-An unquoted '#' character anywhere in a source file line indicates
-the beginning of a source file comment.  The remainder of that line
-is a comment.
-
-
-Kconfig hints
--------------
-This is a collection of Kconfig tips, most of which aren't obvious at
-first glance and most of which have become idioms in several Kconfig
-files.
-
-Adding common features and make the usage configurable
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-It is a common idiom to implement a feature/functionality that are
-relevant for some architectures but not all.
-The recommended way to do so is to use a config variable named HAVE_*
-that is defined in a common Kconfig file and selected by the relevant
-architectures.
-An example is the generic IOMAP functionality.
-
-We would in lib/Kconfig see:
-
-# Generic IOMAP is used to ...
-config HAVE_GENERIC_IOMAP
-
-config GENERIC_IOMAP
-	depends on HAVE_GENERIC_IOMAP && FOO
-
-And in lib/Makefile we would see:
-obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
-
-For each architecture using the generic IOMAP functionality we would see:
-
-config X86
-	select ...
-	select HAVE_GENERIC_IOMAP
-	select ...
-
-Note: we use the existing config option and avoid creating a new
-config variable to select HAVE_GENERIC_IOMAP.
-
-Note: the use of the internal config variable HAVE_GENERIC_IOMAP, it is
-introduced to overcome the limitation of select which will force a
-config option to 'y' no matter the dependencies.
-The dependencies are moved to the symbol GENERIC_IOMAP and we avoid the
-situation where select forces a symbol equals to 'y'.
-
-Adding features that need compiler support
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-There are several features that need compiler support. The recommended way
-to describe the dependency on the compiler feature is to use "depends on"
-followed by a test macro.
-
-config STACKPROTECTOR
-	bool "Stack Protector buffer overflow detection"
-	depends on $(cc-option,-fstack-protector)
-	...
-
-If you need to expose a compiler capability to makefiles and/or C source files,
-CC_HAS_ is the recommended prefix for the config option.
-
-config CC_HAS_STACKPROTECTOR_NONE
-	def_bool $(cc-option,-fno-stack-protector)
-
-Build as module only
-~~~~~~~~~~~~~~~~~~~~
-To restrict a component build to module-only, qualify its config symbol
-with "depends on m".  E.g.:
-
-config FOO
-	depends on BAR && m
-
-limits FOO to module (=m) or disabled (=n).
-
-Kconfig recursive dependency limitations
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-If you've hit the Kconfig error: "recursive dependency detected" you've run
-into a recursive dependency issue with Kconfig, a recursive dependency can be
-summarized as a circular dependency. The kconfig tools need to ensure that
-Kconfig files comply with specified configuration requirements. In order to do
-that kconfig must determine the values that are possible for all Kconfig
-symbols, this is currently not possible if there is a circular relation
-between two or more Kconfig symbols. For more details refer to the "Simple
-Kconfig recursive issue" subsection below. Kconfig does not do recursive
-dependency resolution; this has a few implications for Kconfig file writers.
-We'll first explain why this issues exists and then provide an example
-technical limitation which this brings upon Kconfig developers. Eager
-developers wishing to try to address this limitation should read the next
-subsections.
-
-Simple Kconfig recursive issue
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Read: Documentation/kbuild/Kconfig.recursion-issue-01
-
-Test with:
-
-make KBUILD_KCONFIG=Documentation/kbuild/Kconfig.recursion-issue-01 allnoconfig
-
-Cumulative Kconfig recursive issue
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Read: Documentation/kbuild/Kconfig.recursion-issue-02
-
-Test with:
-
-make KBUILD_KCONFIG=Documentation/kbuild/Kconfig.recursion-issue-02 allnoconfig
-
-Practical solutions to kconfig recursive issue
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Developers who run into the recursive Kconfig issue have two options
-at their disposal. We document them below and also provide a list of
-historical issues resolved through these different solutions.
-
-  a) Remove any superfluous "select FOO" or "depends on FOO"
-  b) Match dependency semantics:
-	b1) Swap all "select FOO" to "depends on FOO" or,
-	b2) Swap all "depends on FOO" to "select FOO"
-
-The resolution to a) can be tested with the sample Kconfig file
-Documentation/kbuild/Kconfig.recursion-issue-01 through the removal
-of the "select CORE" from CORE_BELL_A_ADVANCED as that is implicit already
-since CORE_BELL_A depends on CORE. At times it may not be possible to remove
-some dependency criteria, for such cases you can work with solution b).
-
-The two different resolutions for b) can be tested in the sample Kconfig file
-Documentation/kbuild/Kconfig.recursion-issue-02.
-
-Below is a list of examples of prior fixes for these types of recursive issues;
-all errors appear to involve one or more select's and one or more "depends on".
-
-commit          fix
-======          ===
-06b718c01208    select A -> depends on A
-c22eacfe82f9    depends on A -> depends on B
-6a91e854442c    select A -> depends on A
-118c565a8f2e    select A -> select B
-f004e5594705    select A -> depends on A
-c7861f37b4c6    depends on A -> (null)
-80c69915e5fb    select A -> (null)              (1)
-c2218e26c0d0    select A -> depends on A        (1)
-d6ae99d04e1c    select A -> depends on A
-95ca19cf8cbf    select A -> depends on A
-8f057d7bca54    depends on A -> (null)
-8f057d7bca54    depends on A -> select A
-a0701f04846e    select A -> depends on A
-0c8b92f7f259    depends on A -> (null)
-e4e9e0540928    select A -> depends on A        (2)
-7453ea886e87    depends on A > (null)           (1)
-7b1fff7e4fdf    select A -> depends on A
-86c747d2a4f0    select A -> depends on A
-d9f9ab51e55e    select A -> depends on A
-0c51a4d8abd6    depends on A -> select A        (3)
-e98062ed6dc4    select A -> depends on A        (3)
-91e5d284a7f1    select A -> (null)
-
-(1) Partial (or no) quote of error.
-(2) That seems to be the gist of that fix.
-(3) Same error.
-
-Future kconfig work
-~~~~~~~~~~~~~~~~~~~
-
-Work on kconfig is welcomed on both areas of clarifying semantics and on
-evaluating the use of a full SAT solver for it. A full SAT solver can be
-desirable to enable more complex dependency mappings and / or queries,
-for instance on possible use case for a SAT solver could be that of handling
-the current known recursive dependency issues. It is not known if this would
-address such issues but such evaluation is desirable. If support for a full SAT
-solver proves too complex or that it cannot address recursive dependency issues
-Kconfig should have at least clear and well defined semantics which also
-addresses and documents limitations or requirements such as the ones dealing
-with recursive dependencies.
-
-Further work on both of these areas is welcomed on Kconfig. We elaborate
-on both of these in the next two subsections.
-
-Semantics of Kconfig
-~~~~~~~~~~~~~~~~~~~~
-
-The use of Kconfig is broad, Linux is now only one of Kconfig's users:
-one study has completed a broad analysis of Kconfig use in 12 projects [0].
-Despite its widespread use, and although this document does a reasonable job
-in documenting basic Kconfig syntax a more precise definition of Kconfig
-semantics is welcomed. One project deduced Kconfig semantics through
-the use of the xconfig configurator [1]. Work should be done to confirm if
-the deduced semantics matches our intended Kconfig design goals.
-
-Having well defined semantics can be useful for tools for practical
-evaluation of depenencies, for instance one such use known case was work to
-express in boolean abstraction of the inferred semantics of Kconfig to
-translate Kconfig logic into boolean formulas and run a SAT solver on this to
-find dead code / features (always inactive), 114 dead features were found in
-Linux using this methodology [1] (Section 8: Threats to validity).
-
-Confirming this could prove useful as Kconfig stands as one of the the leading
-industrial variability modeling languages [1] [2]. Its study would help
-evaluate practical uses of such languages, their use was only theoretical
-and real world requirements were not well understood. As it stands though
-only reverse engineering techniques have been used to deduce semantics from
-variability modeling languages such as Kconfig [3].
-
-[0] http://www.eng.uwaterloo.ca/~shshe/kconfig_semantics.pdf
-[1] http://gsd.uwaterloo.ca/sites/default/files/vm-2013-berger.pdf
-[2] http://gsd.uwaterloo.ca/sites/default/files/ase241-berger_0.pdf
-[3] http://gsd.uwaterloo.ca/sites/default/files/icse2011.pdf
-
-Full SAT solver for Kconfig
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Although SAT solvers [0] haven't yet been used by Kconfig directly, as noted in
-the previous subsection, work has been done however to express in boolean
-abstraction the inferred semantics of Kconfig to translate Kconfig logic into
-boolean formulas and run a SAT solver on it [1]. Another known related project
-is CADOS [2] (former VAMOS [3]) and the tools, mainly undertaker [4], which has
-been introduced first with [5].  The basic concept of undertaker is to exract
-variability models from Kconfig, and put them together with a propositional
-formula extracted from CPP #ifdefs and build-rules into a SAT solver in order
-to find dead code, dead files, and dead symbols. If using a SAT solver is
-desirable on Kconfig one approach would be to evaluate repurposing such efforts
-somehow on Kconfig. There is enough interest from mentors of existing projects
-to not only help advise how to integrate this work upstream but also help
-maintain it long term. Interested developers should visit:
-
-http://kernelnewbies.org/KernelProjects/kconfig-sat
-
-[0] http://www.cs.cornell.edu/~sabhar/chapters/SATSolvers-KR-Handbook.pdf
-[1] http://gsd.uwaterloo.ca/sites/default/files/vm-2013-berger.pdf
-[2] https://cados.cs.fau.de
-[3] https://vamos.cs.fau.de
-[4] https://undertaker.cs.fau.de
-[5] https://www4.cs.fau.de/Publications/2011/tartler_11_eurosys.pdf
diff --git a/Documentation/kbuild/kconfig-macro-language.rst b/Documentation/kbuild/kconfig-macro-language.rst
new file mode 100644
index 000000000000..35b3263b7e40
--- /dev/null
+++ b/Documentation/kbuild/kconfig-macro-language.rst
@@ -0,0 +1,247 @@
+======================
+Kconfig macro language
+======================
+
+Concept
+-------
+
+The basic idea was inspired by Make. When we look at Make, we notice sort of
+two languages in one. One language describes dependency graphs consisting of
+targets and prerequisites. The other is a macro language for performing textual
+substitution.
+
+There is clear distinction between the two language stages. For example, you
+can write a makefile like follows::
+
+    APP := foo
+    SRC := foo.c
+    CC := gcc
+
+    $(APP): $(SRC)
+            $(CC) -o $(APP) $(SRC)
+
+The macro language replaces the variable references with their expanded form,
+and handles as if the source file were input like follows::
+
+    foo: foo.c
+            gcc -o foo foo.c
+
+Then, Make analyzes the dependency graph and determines the targets to be
+updated.
+
+The idea is quite similar in Kconfig - it is possible to describe a Kconfig
+file like this::
+
+    CC := gcc
+
+    config CC_HAS_FOO
+            def_bool $(shell, $(srctree)/scripts/gcc-check-foo.sh $(CC))
+
+The macro language in Kconfig processes the source file into the following
+intermediate::
+
+    config CC_HAS_FOO
+            def_bool y
+
+Then, Kconfig moves onto the evaluation stage to resolve inter-symbol
+dependency as explained in kconfig-language.txt.
+
+
+Variables
+---------
+
+Like in Make, a variable in Kconfig works as a macro variable.  A macro
+variable is expanded "in place" to yield a text string that may then be
+expanded further. To get the value of a variable, enclose the variable name in
+$( ). The parentheses are required even for single-letter variable names; $X is
+a syntax error. The curly brace form as in ${CC} is not supported either.
+
+There are two types of variables: simply expanded variables and recursively
+expanded variables.
+
+A simply expanded variable is defined using the := assignment operator. Its
+righthand side is expanded immediately upon reading the line from the Kconfig
+file.
+
+A recursively expanded variable is defined using the = assignment operator.
+Its righthand side is simply stored as the value of the variable without
+expanding it in any way. Instead, the expansion is performed when the variable
+is used.
+
+There is another type of assignment operator; += is used to append text to a
+variable. The righthand side of += is expanded immediately if the lefthand
+side was originally defined as a simple variable. Otherwise, its evaluation is
+deferred.
+
+The variable reference can take parameters, in the following form::
+
+  $(name,arg1,arg2,arg3)
+
+You can consider the parameterized reference as a function. (more precisely,
+"user-defined function" in contrast to "built-in function" listed below).
+
+Useful functions must be expanded when they are used since the same function is
+expanded differently if different parameters are passed. Hence, a user-defined
+function is defined using the = assignment operator. The parameters are
+referenced within the body definition with $(1), $(2), etc.
+
+In fact, recursively expanded variables and user-defined functions are the same
+internally. (In other words, "variable" is "function with zero argument".)
+When we say "variable" in a broad sense, it includes "user-defined function".
+
+
+Built-in functions
+------------------
+
+Like Make, Kconfig provides several built-in functions. Every function takes a
+particular number of arguments.
+
+In Make, every built-in function takes at least one argument. Kconfig allows
+zero argument for built-in functions, such as $(fileno), $(lineno). You could
+consider those as "built-in variable", but it is just a matter of how we call
+it after all. Let's say "built-in function" here to refer to natively supported
+functionality.
+
+Kconfig currently supports the following built-in functions.
+
+ - $(shell,command)
+
+  The "shell" function accepts a single argument that is expanded and passed
+  to a subshell for execution. The standard output of the command is then read
+  and returned as the value of the function. Every newline in the output is
+  replaced with a space. Any trailing newlines are deleted. The standard error
+  is not returned, nor is any program exit status.
+
+ - $(info,text)
+
+  The "info" function takes a single argument and prints it to stdout.
+  It evaluates to an empty string.
+
+ - $(warning-if,condition,text)
+
+  The "warning-if" function takes two arguments. If the condition part is "y",
+  the text part is sent to stderr. The text is prefixed with the name of the
+  current Kconfig file and the current line number.
+
+ - $(error-if,condition,text)
+
+  The "error-if" function is similar to "warning-if", but it terminates the
+  parsing immediately if the condition part is "y".
+
+ - $(filename)
+
+  The 'filename' takes no argument, and $(filename) is expanded to the file
+  name being parsed.
+
+ - $(lineno)
+
+  The 'lineno' takes no argument, and $(lineno) is expanded to the line number
+  being parsed.
+
+
+Make vs Kconfig
+---------------
+
+Kconfig adopts Make-like macro language, but the function call syntax is
+slightly different.
+
+A function call in Make looks like this::
+
+  $(func-name arg1,arg2,arg3)
+
+The function name and the first argument are separated by at least one
+whitespace. Then, leading whitespaces are trimmed from the first argument,
+while whitespaces in the other arguments are kept. You need to use a kind of
+trick to start the first parameter with spaces. For example, if you want
+to make "info" function print "  hello", you can write like follows::
+
+  empty :=
+  space := $(empty) $(empty)
+  $(info $(space)$(space)hello)
+
+Kconfig uses only commas for delimiters, and keeps all whitespaces in the
+function call. Some people prefer putting a space after each comma delimiter::
+
+  $(func-name, arg1, arg2, arg3)
+
+In this case, "func-name" will receive " arg1", " arg2", " arg3". The presence
+of leading spaces may matter depending on the function. The same applies to
+Make - for example, $(subst .c, .o, $(sources)) is a typical mistake; it
+replaces ".c" with " .o".
+
+In Make, a user-defined function is referenced by using a built-in function,
+'call', like this::
+
+    $(call my-func,arg1,arg2,arg3)
+
+Kconfig invokes user-defined functions and built-in functions in the same way.
+The omission of 'call' makes the syntax shorter.
+
+In Make, some functions treat commas verbatim instead of argument separators.
+For example, $(shell echo hello, world) runs the command "echo hello, world".
+Likewise, $(info hello, world) prints "hello, world" to stdout. You could say
+this is _useful_ inconsistency.
+
+In Kconfig, for simpler implementation and grammatical consistency, commas that
+appear in the $( ) context are always delimiters. It means::
+
+  $(shell, echo hello, world)
+
+is an error because it is passing two parameters where the 'shell' function
+accepts only one. To pass commas in arguments, you can use the following trick::
+
+  comma := ,
+  $(shell, echo hello$(comma) world)
+
+
+Caveats
+-------
+
+A variable (or function) cannot be expanded across tokens. So, you cannot use
+a variable as a shorthand for an expression that consists of multiple tokens.
+The following works::
+
+    RANGE_MIN := 1
+    RANGE_MAX := 3
+
+    config FOO
+            int "foo"
+            range $(RANGE_MIN) $(RANGE_MAX)
+
+But, the following does not work::
+
+    RANGES := 1 3
+
+    config FOO
+            int "foo"
+            range $(RANGES)
+
+A variable cannot be expanded to any keyword in Kconfig.  The following does
+not work::
+
+    MY_TYPE := tristate
+
+    config FOO
+            $(MY_TYPE) "foo"
+            default y
+
+Obviously from the design, $(shell command) is expanded in the textual
+substitution phase. You cannot pass symbols to the 'shell' function.
+
+The following does not work as expected::
+
+    config ENDIAN_FLAG
+            string
+            default "-mbig-endian" if CPU_BIG_ENDIAN
+            default "-mlittle-endian" if CPU_LITTLE_ENDIAN
+
+    config CC_HAS_ENDIAN_FLAG
+            def_bool $(shell $(srctree)/scripts/gcc-check-flag ENDIAN_FLAG)
+
+Instead, you can do like follows so that any function call is statically
+expanded::
+
+    config CC_HAS_ENDIAN_FLAG
+            bool
+            default $(shell $(srctree)/scripts/gcc-check-flag -mbig-endian) if CPU_BIG_ENDIAN
+            default $(shell $(srctree)/scripts/gcc-check-flag -mlittle-endian) if CPU_LITTLE_ENDIAN
diff --git a/Documentation/kbuild/kconfig-macro-language.txt b/Documentation/kbuild/kconfig-macro-language.txt
deleted file mode 100644
index 07da2ea68dce..000000000000
--- a/Documentation/kbuild/kconfig-macro-language.txt
+++ /dev/null
@@ -1,242 +0,0 @@
-Concept
--------
-
-The basic idea was inspired by Make. When we look at Make, we notice sort of
-two languages in one. One language describes dependency graphs consisting of
-targets and prerequisites. The other is a macro language for performing textual
-substitution.
-
-There is clear distinction between the two language stages. For example, you
-can write a makefile like follows:
-
-    APP := foo
-    SRC := foo.c
-    CC := gcc
-
-    $(APP): $(SRC)
-            $(CC) -o $(APP) $(SRC)
-
-The macro language replaces the variable references with their expanded form,
-and handles as if the source file were input like follows:
-
-    foo: foo.c
-            gcc -o foo foo.c
-
-Then, Make analyzes the dependency graph and determines the targets to be
-updated.
-
-The idea is quite similar in Kconfig - it is possible to describe a Kconfig
-file like this:
-
-    CC := gcc
-
-    config CC_HAS_FOO
-            def_bool $(shell, $(srctree)/scripts/gcc-check-foo.sh $(CC))
-
-The macro language in Kconfig processes the source file into the following
-intermediate:
-
-    config CC_HAS_FOO
-            def_bool y
-
-Then, Kconfig moves onto the evaluation stage to resolve inter-symbol
-dependency as explained in kconfig-language.txt.
-
-
-Variables
----------
-
-Like in Make, a variable in Kconfig works as a macro variable.  A macro
-variable is expanded "in place" to yield a text string that may then be
-expanded further. To get the value of a variable, enclose the variable name in
-$( ). The parentheses are required even for single-letter variable names; $X is
-a syntax error. The curly brace form as in ${CC} is not supported either.
-
-There are two types of variables: simply expanded variables and recursively
-expanded variables.
-
-A simply expanded variable is defined using the := assignment operator. Its
-righthand side is expanded immediately upon reading the line from the Kconfig
-file.
-
-A recursively expanded variable is defined using the = assignment operator.
-Its righthand side is simply stored as the value of the variable without
-expanding it in any way. Instead, the expansion is performed when the variable
-is used.
-
-There is another type of assignment operator; += is used to append text to a
-variable. The righthand side of += is expanded immediately if the lefthand
-side was originally defined as a simple variable. Otherwise, its evaluation is
-deferred.
-
-The variable reference can take parameters, in the following form:
-
-  $(name,arg1,arg2,arg3)
-
-You can consider the parameterized reference as a function. (more precisely,
-"user-defined function" in contrast to "built-in function" listed below).
-
-Useful functions must be expanded when they are used since the same function is
-expanded differently if different parameters are passed. Hence, a user-defined
-function is defined using the = assignment operator. The parameters are
-referenced within the body definition with $(1), $(2), etc.
-
-In fact, recursively expanded variables and user-defined functions are the same
-internally. (In other words, "variable" is "function with zero argument".)
-When we say "variable" in a broad sense, it includes "user-defined function".
-
-
-Built-in functions
-------------------
-
-Like Make, Kconfig provides several built-in functions. Every function takes a
-particular number of arguments.
-
-In Make, every built-in function takes at least one argument. Kconfig allows
-zero argument for built-in functions, such as $(fileno), $(lineno). You could
-consider those as "built-in variable", but it is just a matter of how we call
-it after all. Let's say "built-in function" here to refer to natively supported
-functionality.
-
-Kconfig currently supports the following built-in functions.
-
- - $(shell,command)
-
-  The "shell" function accepts a single argument that is expanded and passed
-  to a subshell for execution. The standard output of the command is then read
-  and returned as the value of the function. Every newline in the output is
-  replaced with a space. Any trailing newlines are deleted. The standard error
-  is not returned, nor is any program exit status.
-
- - $(info,text)
-
-  The "info" function takes a single argument and prints it to stdout.
-  It evaluates to an empty string.
-
- - $(warning-if,condition,text)
-
-  The "warning-if" function takes two arguments. If the condition part is "y",
-  the text part is sent to stderr. The text is prefixed with the name of the
-  current Kconfig file and the current line number.
-
- - $(error-if,condition,text)
-
-  The "error-if" function is similar to "warning-if", but it terminates the
-  parsing immediately if the condition part is "y".
-
- - $(filename)
-
-  The 'filename' takes no argument, and $(filename) is expanded to the file
-  name being parsed.
-
- - $(lineno)
-
-  The 'lineno' takes no argument, and $(lineno) is expanded to the line number
-  being parsed.
-
-
-Make vs Kconfig
----------------
-
-Kconfig adopts Make-like macro language, but the function call syntax is
-slightly different.
-
-A function call in Make looks like this:
-
-  $(func-name arg1,arg2,arg3)
-
-The function name and the first argument are separated by at least one
-whitespace. Then, leading whitespaces are trimmed from the first argument,
-while whitespaces in the other arguments are kept. You need to use a kind of
-trick to start the first parameter with spaces. For example, if you want
-to make "info" function print "  hello", you can write like follows:
-
-  empty :=
-  space := $(empty) $(empty)
-  $(info $(space)$(space)hello)
-
-Kconfig uses only commas for delimiters, and keeps all whitespaces in the
-function call. Some people prefer putting a space after each comma delimiter:
-
-  $(func-name, arg1, arg2, arg3)
-
-In this case, "func-name" will receive " arg1", " arg2", " arg3". The presence
-of leading spaces may matter depending on the function. The same applies to
-Make - for example, $(subst .c, .o, $(sources)) is a typical mistake; it
-replaces ".c" with " .o".
-
-In Make, a user-defined function is referenced by using a built-in function,
-'call', like this:
-
-    $(call my-func,arg1,arg2,arg3)
-
-Kconfig invokes user-defined functions and built-in functions in the same way.
-The omission of 'call' makes the syntax shorter.
-
-In Make, some functions treat commas verbatim instead of argument separators.
-For example, $(shell echo hello, world) runs the command "echo hello, world".
-Likewise, $(info hello, world) prints "hello, world" to stdout. You could say
-this is _useful_ inconsistency.
-
-In Kconfig, for simpler implementation and grammatical consistency, commas that
-appear in the $( ) context are always delimiters. It means
-
-  $(shell, echo hello, world)
-
-is an error because it is passing two parameters where the 'shell' function
-accepts only one. To pass commas in arguments, you can use the following trick:
-
-  comma := ,
-  $(shell, echo hello$(comma) world)
-
-
-Caveats
--------
-
-A variable (or function) cannot be expanded across tokens. So, you cannot use
-a variable as a shorthand for an expression that consists of multiple tokens.
-The following works:
-
-    RANGE_MIN := 1
-    RANGE_MAX := 3
-
-    config FOO
-            int "foo"
-            range $(RANGE_MIN) $(RANGE_MAX)
-
-But, the following does not work:
-
-    RANGES := 1 3
-
-    config FOO
-            int "foo"
-            range $(RANGES)
-
-A variable cannot be expanded to any keyword in Kconfig.  The following does
-not work:
-
-    MY_TYPE := tristate
-
-    config FOO
-            $(MY_TYPE) "foo"
-            default y
-
-Obviously from the design, $(shell command) is expanded in the textual
-substitution phase. You cannot pass symbols to the 'shell' function.
-The following does not work as expected.
-
-    config ENDIAN_FLAG
-            string
-            default "-mbig-endian" if CPU_BIG_ENDIAN
-            default "-mlittle-endian" if CPU_LITTLE_ENDIAN
-
-    config CC_HAS_ENDIAN_FLAG
-            def_bool $(shell $(srctree)/scripts/gcc-check-flag ENDIAN_FLAG)
-
-Instead, you can do like follows so that any function call is statically
-expanded.
-
-    config CC_HAS_ENDIAN_FLAG
-            bool
-            default $(shell $(srctree)/scripts/gcc-check-flag -mbig-endian) if CPU_BIG_ENDIAN
-            default $(shell $(srctree)/scripts/gcc-check-flag -mlittle-endian) if CPU_LITTLE_ENDIAN
diff --git a/Documentation/kbuild/kconfig.rst b/Documentation/kbuild/kconfig.rst
new file mode 100644
index 000000000000..88129af7e539
--- /dev/null
+++ b/Documentation/kbuild/kconfig.rst
@@ -0,0 +1,300 @@
+===================
+Kconfig make config
+===================
+
+This file contains some assistance for using `make *config`.
+
+Use "make help" to list all of the possible configuration targets.
+
+The xconfig ('qconf'), menuconfig ('mconf'), and nconfig ('nconf')
+programs also have embedded help text.  Be sure to check that for
+navigation, search, and other general help text.
+
+General
+-------
+
+New kernel releases often introduce new config symbols.  Often more
+important, new kernel releases may rename config symbols.  When
+this happens, using a previously working .config file and running
+"make oldconfig" won't necessarily produce a working new kernel
+for you, so you may find that you need to see what NEW kernel
+symbols have been introduced.
+
+To see a list of new config symbols, use::
+
+	cp user/some/old.config .config
+	make listnewconfig
+
+and the config program will list any new symbols, one per line.
+
+Alternatively, you can use the brute force method::
+
+	make oldconfig
+	scripts/diffconfig .config.old .config | less
+
+----------------------------------------------------------------------
+
+Environment variables for `*config`
+
+KCONFIG_CONFIG
+--------------
+This environment variable can be used to specify a default kernel config
+file name to override the default name of ".config".
+
+KCONFIG_OVERWRITECONFIG
+-----------------------
+If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
+break symlinks when .config is a symlink to somewhere else.
+
+`CONFIG_`
+---------
+If you set `CONFIG_` in the environment, Kconfig will prefix all symbols
+with its value when saving the configuration, instead of using the default,
+`CONFIG_`.
+
+----------------------------------------------------------------------
+
+Environment variables for '{allyes/allmod/allno/rand}config'
+
+KCONFIG_ALLCONFIG
+-----------------
+(partially based on lkml email from/by Rob Landley, re: miniconfig)
+
+--------------------------------------------------
+
+The allyesconfig/allmodconfig/allnoconfig/randconfig variants can also
+use the environment variable KCONFIG_ALLCONFIG as a flag or a filename
+that contains config symbols that the user requires to be set to a
+specific value.  If KCONFIG_ALLCONFIG is used without a filename where
+KCONFIG_ALLCONFIG == "" or KCONFIG_ALLCONFIG == "1", `make *config`
+checks for a file named "all{yes/mod/no/def/random}.config"
+(corresponding to the `*config` command that was used) for symbol values
+that are to be forced.  If this file is not found, it checks for a
+file named "all.config" to contain forced values.
+
+This enables you to create "miniature" config (miniconfig) or custom
+config files containing just the config symbols that you are interested
+in.  Then the kernel config system generates the full .config file,
+including symbols of your miniconfig file.
+
+This 'KCONFIG_ALLCONFIG' file is a config file which contains
+(usually a subset of all) preset config symbols.  These variable
+settings are still subject to normal dependency checks.
+
+Examples::
+
+	KCONFIG_ALLCONFIG=custom-notebook.config make allnoconfig
+
+or::
+
+	KCONFIG_ALLCONFIG=mini.config make allnoconfig
+
+or::
+
+	make KCONFIG_ALLCONFIG=mini.config allnoconfig
+
+These examples will disable most options (allnoconfig) but enable or
+disable the options that are explicitly listed in the specified
+mini-config files.
+
+----------------------------------------------------------------------
+
+Environment variables for 'randconfig'
+
+KCONFIG_SEED
+------------
+You can set this to the integer value used to seed the RNG, if you want
+to somehow debug the behaviour of the kconfig parser/frontends.
+If not set, the current time will be used.
+
+KCONFIG_PROBABILITY
+-------------------
+This variable can be used to skew the probabilities. This variable can
+be unset or empty, or set to three different formats:
+
+    =======================     ==================  =====================
+	KCONFIG_PROBABILITY     y:n split           y:m:n split
+    =======================     ==================  =====================
+	unset or empty          50  : 50            33  : 33  : 34
+	N                        N  : 100-N         N/2 : N/2 : 100-N
+    [1] N:M                     N+M : 100-(N+M)      N  :  M  : 100-(N+M)
+    [2] N:M:L                    N  : 100-N          M  :  L  : 100-(M+L)
+    =======================     ==================  =====================
+
+where N, M and L are integers (in base 10) in the range [0,100], and so
+that:
+
+    [1] N+M is in the range [0,100]
+
+    [2] M+L is in the range [0,100]
+
+Examples::
+
+	KCONFIG_PROBABILITY=10
+		10% of booleans will be set to 'y', 90% to 'n'
+		5% of tristates will be set to 'y', 5% to 'm', 90% to 'n'
+	KCONFIG_PROBABILITY=15:25
+		40% of booleans will be set to 'y', 60% to 'n'
+		15% of tristates will be set to 'y', 25% to 'm', 60% to 'n'
+	KCONFIG_PROBABILITY=10:15:15
+		10% of booleans will be set to 'y', 90% to 'n'
+		15% of tristates will be set to 'y', 15% to 'm', 70% to 'n'
+
+----------------------------------------------------------------------
+
+Environment variables for 'syncconfig'
+
+KCONFIG_NOSILENTUPDATE
+----------------------
+If this variable has a non-blank value, it prevents silent kernel
+config updates (requires explicit updates).
+
+KCONFIG_AUTOCONFIG
+------------------
+This environment variable can be set to specify the path & name of the
+"auto.conf" file.  Its default value is "include/config/auto.conf".
+
+KCONFIG_TRISTATE
+----------------
+This environment variable can be set to specify the path & name of the
+"tristate.conf" file.  Its default value is "include/config/tristate.conf".
+
+KCONFIG_AUTOHEADER
+------------------
+This environment variable can be set to specify the path & name of the
+"autoconf.h" (header) file.
+Its default value is "include/generated/autoconf.h".
+
+
+----------------------------------------------------------------------
+
+menuconfig
+----------
+
+SEARCHING for CONFIG symbols
+
+Searching in menuconfig:
+
+	The Search function searches for kernel configuration symbol
+	names, so you have to know something close to what you are
+	looking for.
+
+	Example::
+
+		/hotplug
+		This lists all config symbols that contain "hotplug",
+		e.g., HOTPLUG_CPU, MEMORY_HOTPLUG.
+
+	For search help, enter / followed by TAB-TAB (to highlight
+	<Help>) and Enter.  This will tell you that you can also use
+	regular expressions (regexes) in the search string, so if you
+	are not interested in MEMORY_HOTPLUG, you could try::
+
+		/^hotplug
+
+	When searching, symbols are sorted thus:
+
+	  - first, exact matches, sorted alphabetically (an exact match
+	    is when the search matches the complete symbol name);
+	  - then, other matches, sorted alphabetically.
+
+	For example: ^ATH.K matches:
+
+	    ATH5K ATH9K ATH5K_AHB ATH5K_DEBUG [...] ATH6KL ATH6KL_DEBUG
+	    [...] ATH9K_AHB ATH9K_BTCOEX_SUPPORT ATH9K_COMMON [...]
+
+	of which only ATH5K and ATH9K match exactly and so are sorted
+	first (and in alphabetical order), then come all other symbols,
+	sorted in alphabetical order.
+
+----------------------------------------------------------------------
+
+User interface options for 'menuconfig'
+
+MENUCONFIG_COLOR
+----------------
+It is possible to select different color themes using the variable
+MENUCONFIG_COLOR.  To select a theme use::
+
+	make MENUCONFIG_COLOR=<theme> menuconfig
+
+Available themes are::
+
+  - mono       => selects colors suitable for monochrome displays
+  - blackbg    => selects a color scheme with black background
+  - classic    => theme with blue background. The classic look
+  - bluetitle  => a LCD friendly version of classic. (default)
+
+MENUCONFIG_MODE
+---------------
+This mode shows all sub-menus in one large tree.
+
+Example::
+
+	make MENUCONFIG_MODE=single_menu menuconfig
+
+----------------------------------------------------------------------
+
+nconfig
+-------
+
+nconfig is an alternate text-based configurator.  It lists function
+keys across the bottom of the terminal (window) that execute commands.
+You can also just use the corresponding numeric key to execute the
+commands unless you are in a data entry window.  E.g., instead of F6
+for Save, you can just press 6.
+
+Use F1 for Global help or F3 for the Short help menu.
+
+Searching in nconfig:
+
+	You can search either in the menu entry "prompt" strings
+	or in the configuration symbols.
+
+	Use / to begin a search through the menu entries.  This does
+	not support regular expressions.  Use <Down> or <Up> for
+	Next hit and Previous hit, respectively.  Use <Esc> to
+	terminate the search mode.
+
+	F8 (SymSearch) searches the configuration symbols for the
+	given string or regular expression (regex).
+
+NCONFIG_MODE
+------------
+This mode shows all sub-menus in one large tree.
+
+Example::
+	make NCONFIG_MODE=single_menu nconfig
+
+----------------------------------------------------------------------
+
+xconfig
+-------
+
+Searching in xconfig:
+
+	The Search function searches for kernel configuration symbol
+	names, so you have to know something close to what you are
+	looking for.
+
+	Example:
+		Ctrl-F hotplug
+	or
+		Menu: File, Search, hotplug
+
+	lists all config symbol entries that contain "hotplug" in
+	the symbol name.  In this Search dialog, you may change the
+	config setting for any of the entries that are not grayed out.
+	You can also enter a different search string without having
+	to return to the main menu.
+
+
+----------------------------------------------------------------------
+
+gconfig
+-------
+
+Searching in gconfig:
+
+	There is no search command in gconfig.  However, gconfig does
+	have several different viewing choices, modes, and options.
diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt
deleted file mode 100644
index 68c82914c0f3..000000000000
--- a/Documentation/kbuild/kconfig.txt
+++ /dev/null
@@ -1,272 +0,0 @@
-This file contains some assistance for using "make *config".
-
-Use "make help" to list all of the possible configuration targets.
-
-The xconfig ('qconf'), menuconfig ('mconf'), and nconfig ('nconf')
-programs also have embedded help text.  Be sure to check that for
-navigation, search, and other general help text.
-
-======================================================================
-General
---------------------------------------------------
-
-New kernel releases often introduce new config symbols.  Often more
-important, new kernel releases may rename config symbols.  When
-this happens, using a previously working .config file and running
-"make oldconfig" won't necessarily produce a working new kernel
-for you, so you may find that you need to see what NEW kernel
-symbols have been introduced.
-
-To see a list of new config symbols, use
-
-	cp user/some/old.config .config
-	make listnewconfig
-
-and the config program will list any new symbols, one per line.
-
-Alternatively, you can use the brute force method:
-
-	make oldconfig
-	scripts/diffconfig .config.old .config | less
-
-______________________________________________________________________
-Environment variables for '*config'
-
-KCONFIG_CONFIG
---------------------------------------------------
-This environment variable can be used to specify a default kernel config
-file name to override the default name of ".config".
-
-KCONFIG_OVERWRITECONFIG
---------------------------------------------------
-If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
-break symlinks when .config is a symlink to somewhere else.
-
-CONFIG_
---------------------------------------------------
-If you set CONFIG_ in the environment, Kconfig will prefix all symbols
-with its value when saving the configuration, instead of using the default,
-"CONFIG_".
-
-______________________________________________________________________
-Environment variables for '{allyes/allmod/allno/rand}config'
-
-KCONFIG_ALLCONFIG
---------------------------------------------------
-(partially based on lkml email from/by Rob Landley, re: miniconfig)
---------------------------------------------------
-The allyesconfig/allmodconfig/allnoconfig/randconfig variants can also
-use the environment variable KCONFIG_ALLCONFIG as a flag or a filename
-that contains config symbols that the user requires to be set to a
-specific value.  If KCONFIG_ALLCONFIG is used without a filename where
-KCONFIG_ALLCONFIG == "" or KCONFIG_ALLCONFIG == "1", "make *config"
-checks for a file named "all{yes/mod/no/def/random}.config"
-(corresponding to the *config command that was used) for symbol values
-that are to be forced.  If this file is not found, it checks for a
-file named "all.config" to contain forced values.
-
-This enables you to create "miniature" config (miniconfig) or custom
-config files containing just the config symbols that you are interested
-in.  Then the kernel config system generates the full .config file,
-including symbols of your miniconfig file.
-
-This 'KCONFIG_ALLCONFIG' file is a config file which contains
-(usually a subset of all) preset config symbols.  These variable
-settings are still subject to normal dependency checks.
-
-Examples:
-	KCONFIG_ALLCONFIG=custom-notebook.config make allnoconfig
-or
-	KCONFIG_ALLCONFIG=mini.config make allnoconfig
-or
-	make KCONFIG_ALLCONFIG=mini.config allnoconfig
-
-These examples will disable most options (allnoconfig) but enable or
-disable the options that are explicitly listed in the specified
-mini-config files.
-
-______________________________________________________________________
-Environment variables for 'randconfig'
-
-KCONFIG_SEED
---------------------------------------------------
-You can set this to the integer value used to seed the RNG, if you want
-to somehow debug the behaviour of the kconfig parser/frontends.
-If not set, the current time will be used.
-
-KCONFIG_PROBABILITY
---------------------------------------------------
-This variable can be used to skew the probabilities. This variable can
-be unset or empty, or set to three different formats:
-	KCONFIG_PROBABILITY     y:n split           y:m:n split
-	-----------------------------------------------------------------
-	unset or empty          50  : 50            33  : 33  : 34
-	N                        N  : 100-N         N/2 : N/2 : 100-N
-    [1] N:M                     N+M : 100-(N+M)      N  :  M  : 100-(N+M)
-    [2] N:M:L                    N  : 100-N          M  :  L  : 100-(M+L)
-
-where N, M and L are integers (in base 10) in the range [0,100], and so
-that:
-    [1] N+M is in the range [0,100]
-    [2] M+L is in the range [0,100]
-
-Examples:
-	KCONFIG_PROBABILITY=10
-		10% of booleans will be set to 'y', 90% to 'n'
-		5% of tristates will be set to 'y', 5% to 'm', 90% to 'n'
-	KCONFIG_PROBABILITY=15:25
-		40% of booleans will be set to 'y', 60% to 'n'
-		15% of tristates will be set to 'y', 25% to 'm', 60% to 'n'
-	KCONFIG_PROBABILITY=10:15:15
-		10% of booleans will be set to 'y', 90% to 'n'
-		15% of tristates will be set to 'y', 15% to 'm', 70% to 'n'
-
-______________________________________________________________________
-Environment variables for 'syncconfig'
-
-KCONFIG_NOSILENTUPDATE
---------------------------------------------------
-If this variable has a non-blank value, it prevents silent kernel
-config updates (requires explicit updates).
-
-KCONFIG_AUTOCONFIG
---------------------------------------------------
-This environment variable can be set to specify the path & name of the
-"auto.conf" file.  Its default value is "include/config/auto.conf".
-
-KCONFIG_TRISTATE
---------------------------------------------------
-This environment variable can be set to specify the path & name of the
-"tristate.conf" file.  Its default value is "include/config/tristate.conf".
-
-KCONFIG_AUTOHEADER
---------------------------------------------------
-This environment variable can be set to specify the path & name of the
-"autoconf.h" (header) file.
-Its default value is "include/generated/autoconf.h".
-
-
-======================================================================
-menuconfig
---------------------------------------------------
-
-SEARCHING for CONFIG symbols
-
-Searching in menuconfig:
-
-	The Search function searches for kernel configuration symbol
-	names, so you have to know something close to what you are
-	looking for.
-
-	Example:
-		/hotplug
-		This lists all config symbols that contain "hotplug",
-		e.g., HOTPLUG_CPU, MEMORY_HOTPLUG.
-
-	For search help, enter / followed by TAB-TAB (to highlight
-	<Help>) and Enter.  This will tell you that you can also use
-	regular expressions (regexes) in the search string, so if you
-	are not interested in MEMORY_HOTPLUG, you could try
-
-		/^hotplug
-
-	When searching, symbols are sorted thus:
-	  - first, exact matches, sorted alphabetically (an exact match
-	    is when the search matches the complete symbol name);
-	  - then, other matches, sorted alphabetically.
-	For example: ^ATH.K matches:
-	    ATH5K ATH9K ATH5K_AHB ATH5K_DEBUG [...] ATH6KL ATH6KL_DEBUG
-	    [...] ATH9K_AHB ATH9K_BTCOEX_SUPPORT ATH9K_COMMON [...]
-	of which only ATH5K and ATH9K match exactly and so are sorted
-	first (and in alphabetical order), then come all other symbols,
-	sorted in alphabetical order.
-
-______________________________________________________________________
-User interface options for 'menuconfig'
-
-MENUCONFIG_COLOR
---------------------------------------------------
-It is possible to select different color themes using the variable
-MENUCONFIG_COLOR.  To select a theme use:
-
-	make MENUCONFIG_COLOR=<theme> menuconfig
-
-Available themes are:
-  mono       => selects colors suitable for monochrome displays
-  blackbg    => selects a color scheme with black background
-  classic    => theme with blue background. The classic look
-  bluetitle  => a LCD friendly version of classic. (default)
-
-MENUCONFIG_MODE
---------------------------------------------------
-This mode shows all sub-menus in one large tree.
-
-Example:
-	make MENUCONFIG_MODE=single_menu menuconfig
-
-
-======================================================================
-nconfig
---------------------------------------------------
-
-nconfig is an alternate text-based configurator.  It lists function
-keys across the bottom of the terminal (window) that execute commands.
-You can also just use the corresponding numeric key to execute the
-commands unless you are in a data entry window.  E.g., instead of F6
-for Save, you can just press 6.
-
-Use F1 for Global help or F3 for the Short help menu.
-
-Searching in nconfig:
-
-	You can search either in the menu entry "prompt" strings
-	or in the configuration symbols.
-
-	Use / to begin a search through the menu entries.  This does
-	not support regular expressions.  Use <Down> or <Up> for
-	Next hit and Previous hit, respectively.  Use <Esc> to
-	terminate the search mode.
-
-	F8 (SymSearch) searches the configuration symbols for the
-	given string or regular expression (regex).
-
-NCONFIG_MODE
---------------------------------------------------
-This mode shows all sub-menus in one large tree.
-
-Example:
-	make NCONFIG_MODE=single_menu nconfig
-
-
-======================================================================
-xconfig
---------------------------------------------------
-
-Searching in xconfig:
-
-	The Search function searches for kernel configuration symbol
-	names, so you have to know something close to what you are
-	looking for.
-
-	Example:
-		Ctrl-F hotplug
-	or
-		Menu: File, Search, hotplug
-
-	lists all config symbol entries that contain "hotplug" in
-	the symbol name.  In this Search dialog, you may change the
-	config setting for any of the entries that are not grayed out.
-	You can also enter a different search string without having
-	to return to the main menu.
-
-
-======================================================================
-gconfig
---------------------------------------------------
-
-Searching in gconfig:
-
-	There is no search command in gconfig.  However, gconfig does
-	have several different viewing choices, modes, and options.
-
-###
diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst
new file mode 100644
index 000000000000..9274cdcc9bd2
--- /dev/null
+++ b/Documentation/kbuild/makefiles.rst
@@ -0,0 +1,1509 @@
+======================
+Linux Kernel Makefiles
+======================
+
+This document describes the Linux kernel Makefiles.
+
+.. Table of Contents
+
+	=== 1 Overview
+	=== 2 Who does what
+	=== 3 The kbuild files
+	   --- 3.1 Goal definitions
+	   --- 3.2 Built-in object goals - obj-y
+	   --- 3.3 Loadable module goals - obj-m
+	   --- 3.4 Objects which export symbols
+	   --- 3.5 Library file goals - lib-y
+	   --- 3.6 Descending down in directories
+	   --- 3.7 Compilation flags
+	   --- 3.8 Command line dependency
+	   --- 3.9 Dependency tracking
+	   --- 3.10 Special Rules
+	   --- 3.11 $(CC) support functions
+	   --- 3.12 $(LD) support functions
+
+	=== 4 Host Program support
+	   --- 4.1 Simple Host Program
+	   --- 4.2 Composite Host Programs
+	   --- 4.3 Using C++ for host programs
+	   --- 4.4 Controlling compiler options for host programs
+	   --- 4.5 When host programs are actually built
+	   --- 4.6 Using hostprogs-$(CONFIG_FOO)
+
+	=== 5 Kbuild clean infrastructure
+
+	=== 6 Architecture Makefiles
+	   --- 6.1 Set variables to tweak the build to the architecture
+	   --- 6.2 Add prerequisites to archheaders:
+	   --- 6.3 Add prerequisites to archprepare:
+	   --- 6.4 List directories to visit when descending
+	   --- 6.5 Architecture-specific boot images
+	   --- 6.6 Building non-kbuild targets
+	   --- 6.7 Commands useful for building a boot image
+	   --- 6.8 Custom kbuild commands
+	   --- 6.9 Preprocessing linker scripts
+	   --- 6.10 Generic header files
+	   --- 6.11 Post-link pass
+
+	=== 7 Kbuild syntax for exported headers
+		--- 7.1 no-export-headers
+		--- 7.2 generic-y
+		--- 7.3 generated-y
+		--- 7.4 mandatory-y
+
+	=== 8 Kbuild Variables
+	=== 9 Makefile language
+	=== 10 Credits
+	=== 11 TODO
+
+1 Overview
+==========
+
+The Makefiles have five parts::
+
+	Makefile		the top Makefile.
+	.config			the kernel configuration file.
+	arch/$(ARCH)/Makefile	the arch Makefile.
+	scripts/Makefile.*	common rules etc. for all kbuild Makefiles.
+	kbuild Makefiles	there are about 500 of these.
+
+The top Makefile reads the .config file, which comes from the kernel
+configuration process.
+
+The top Makefile is responsible for building two major products: vmlinux
+(the resident kernel image) and modules (any module files).
+It builds these goals by recursively descending into the subdirectories of
+the kernel source tree.
+The list of subdirectories which are visited depends upon the kernel
+configuration. The top Makefile textually includes an arch Makefile
+with the name arch/$(ARCH)/Makefile. The arch Makefile supplies
+architecture-specific information to the top Makefile.
+
+Each subdirectory has a kbuild Makefile which carries out the commands
+passed down from above. The kbuild Makefile uses information from the
+.config file to construct various file lists used by kbuild to build
+any built-in or modular targets.
+
+scripts/Makefile.* contains all the definitions/rules etc. that
+are used to build the kernel based on the kbuild makefiles.
+
+
+2 Who does what
+===============
+
+People have four different relationships with the kernel Makefiles.
+
+*Users* are people who build kernels.  These people type commands such as
+"make menuconfig" or "make".  They usually do not read or edit
+any kernel Makefiles (or any other source files).
+
+*Normal developers* are people who work on features such as device
+drivers, file systems, and network protocols.  These people need to
+maintain the kbuild Makefiles for the subsystem they are
+working on.  In order to do this effectively, they need some overall
+knowledge about the kernel Makefiles, plus detailed knowledge about the
+public interface for kbuild.
+
+*Arch developers* are people who work on an entire architecture, such
+as sparc or ia64.  Arch developers need to know about the arch Makefile
+as well as kbuild Makefiles.
+
+*Kbuild developers* are people who work on the kernel build system itself.
+These people need to know about all aspects of the kernel Makefiles.
+
+This document is aimed towards normal developers and arch developers.
+
+
+3 The kbuild files
+==================
+
+Most Makefiles within the kernel are kbuild Makefiles that use the
+kbuild infrastructure. This chapter introduces the syntax used in the
+kbuild makefiles.
+The preferred name for the kbuild files are 'Makefile' but 'Kbuild' can
+be used and if both a 'Makefile' and a 'Kbuild' file exists, then the 'Kbuild'
+file will be used.
+
+Section 3.1 "Goal definitions" is a quick intro, further chapters provide
+more details, with real examples.
+
+3.1 Goal definitions
+--------------------
+
+	Goal definitions are the main part (heart) of the kbuild Makefile.
+	These lines define the files to be built, any special compilation
+	options, and any subdirectories to be entered recursively.
+
+	The most simple kbuild makefile contains one line:
+
+	Example::
+
+		obj-y += foo.o
+
+	This tells kbuild that there is one object in that directory, named
+	foo.o. foo.o will be built from foo.c or foo.S.
+
+	If foo.o shall be built as a module, the variable obj-m is used.
+	Therefore the following pattern is often used:
+
+	Example::
+
+		obj-$(CONFIG_FOO) += foo.o
+
+	$(CONFIG_FOO) evaluates to either y (for built-in) or m (for module).
+	If CONFIG_FOO is neither y nor m, then the file will not be compiled
+	nor linked.
+
+3.2 Built-in object goals - obj-y
+---------------------------------
+
+	The kbuild Makefile specifies object files for vmlinux
+	in the $(obj-y) lists.  These lists depend on the kernel
+	configuration.
+
+	Kbuild compiles all the $(obj-y) files.  It then calls
+	"$(AR) rcSTP" to merge these files into one built-in.a file.
+	This is a thin archive without a symbol table. It will be later
+	linked into vmlinux by scripts/link-vmlinux.sh
+
+	The order of files in $(obj-y) is significant.  Duplicates in
+	the lists are allowed: the first instance will be linked into
+	built-in.a and succeeding instances will be ignored.
+
+	Link order is significant, because certain functions
+	(module_init() / __initcall) will be called during boot in the
+	order they appear. So keep in mind that changing the link
+	order may e.g. change the order in which your SCSI
+	controllers are detected, and thus your disks are renumbered.
+
+	Example::
+
+		#drivers/isdn/i4l/Makefile
+		# Makefile for the kernel ISDN subsystem and device drivers.
+		# Each configuration option enables a list of files.
+		obj-$(CONFIG_ISDN_I4L)         += isdn.o
+		obj-$(CONFIG_ISDN_PPP_BSDCOMP) += isdn_bsdcomp.o
+
+3.3 Loadable module goals - obj-m
+---------------------------------
+
+	$(obj-m) specifies object files which are built as loadable
+	kernel modules.
+
+	A module may be built from one source file or several source
+	files. In the case of one source file, the kbuild makefile
+	simply adds the file to $(obj-m).
+
+	Example::
+
+		#drivers/isdn/i4l/Makefile
+		obj-$(CONFIG_ISDN_PPP_BSDCOMP) += isdn_bsdcomp.o
+
+	Note: In this example $(CONFIG_ISDN_PPP_BSDCOMP) evaluates to 'm'
+
+	If a kernel module is built from several source files, you specify
+	that you want to build a module in the same way as above; however,
+	kbuild needs to know which object files you want to build your
+	module from, so you have to tell it by setting a $(<module_name>-y)
+	variable.
+
+	Example::
+
+		#drivers/isdn/i4l/Makefile
+		obj-$(CONFIG_ISDN_I4L) += isdn.o
+		isdn-y := isdn_net_lib.o isdn_v110.o isdn_common.o
+
+	In this example, the module name will be isdn.o. Kbuild will
+	compile the objects listed in $(isdn-y) and then run
+	"$(LD) -r" on the list of these files to generate isdn.o.
+
+	Due to kbuild recognizing $(<module_name>-y) for composite objects,
+	you can use the value of a `CONFIG_` symbol to optionally include an
+	object file as part of a composite object.
+
+	Example::
+
+		#fs/ext2/Makefile
+	        obj-$(CONFIG_EXT2_FS) += ext2.o
+		ext2-y := balloc.o dir.o file.o ialloc.o inode.o ioctl.o \
+			  namei.o super.o symlink.o
+	        ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o \
+						xattr_trusted.o
+
+	In this example, xattr.o, xattr_user.o and xattr_trusted.o are only
+	part of the composite object ext2.o if $(CONFIG_EXT2_FS_XATTR)
+	evaluates to 'y'.
+
+	Note: Of course, when you are building objects into the kernel,
+	the syntax above will also work. So, if you have CONFIG_EXT2_FS=y,
+	kbuild will build an ext2.o file for you out of the individual
+	parts and then link this into built-in.a, as you would expect.
+
+3.4 Objects which export symbols
+--------------------------------
+
+	No special notation is required in the makefiles for
+	modules exporting symbols.
+
+3.5 Library file goals - lib-y
+------------------------------
+
+	Objects listed with obj-* are used for modules, or
+	combined in a built-in.a for that specific directory.
+	There is also the possibility to list objects that will
+	be included in a library, lib.a.
+	All objects listed with lib-y are combined in a single
+	library for that directory.
+	Objects that are listed in obj-y and additionally listed in
+	lib-y will not be included in the library, since they will
+	be accessible anyway.
+	For consistency, objects listed in lib-m will be included in lib.a.
+
+	Note that the same kbuild makefile may list files to be built-in
+	and to be part of a library. Therefore the same directory
+	may contain both a built-in.a and a lib.a file.
+
+	Example::
+
+		#arch/x86/lib/Makefile
+		lib-y    := delay.o
+
+	This will create a library lib.a based on delay.o. For kbuild to
+	actually recognize that there is a lib.a being built, the directory
+	shall be listed in libs-y.
+
+	See also "6.4 List directories to visit when descending".
+
+	Use of lib-y is normally restricted to `lib/` and `arch/*/lib`.
+
+3.6 Descending down in directories
+----------------------------------
+
+	A Makefile is only responsible for building objects in its own
+	directory. Files in subdirectories should be taken care of by
+	Makefiles in these subdirs. The build system will automatically
+	invoke make recursively in subdirectories, provided you let it know of
+	them.
+
+	To do so, obj-y and obj-m are used.
+	ext2 lives in a separate directory, and the Makefile present in fs/
+	tells kbuild to descend down using the following assignment.
+
+	Example::
+
+		#fs/Makefile
+		obj-$(CONFIG_EXT2_FS) += ext2/
+
+	If CONFIG_EXT2_FS is set to either 'y' (built-in) or 'm' (modular)
+	the corresponding obj- variable will be set, and kbuild will descend
+	down in the ext2 directory.
+	Kbuild only uses this information to decide that it needs to visit
+	the directory, it is the Makefile in the subdirectory that
+	specifies what is modular and what is built-in.
+
+	It is good practice to use a `CONFIG_` variable when assigning directory
+	names. This allows kbuild to totally skip the directory if the
+	corresponding `CONFIG_` option is neither 'y' nor 'm'.
+
+3.7 Compilation flags
+---------------------
+
+    ccflags-y, asflags-y and ldflags-y
+	These three flags apply only to the kbuild makefile in which they
+	are assigned. They are used for all the normal cc, as and ld
+	invocations happening during a recursive build.
+	Note: Flags with the same behaviour were previously named:
+	EXTRA_CFLAGS, EXTRA_AFLAGS and EXTRA_LDFLAGS.
+	They are still supported but their usage is deprecated.
+
+	ccflags-y specifies options for compiling with $(CC).
+
+	Example::
+
+		# drivers/acpi/acpica/Makefile
+		ccflags-y			:= -Os -D_LINUX -DBUILDING_ACPICA
+		ccflags-$(CONFIG_ACPI_DEBUG)	+= -DACPI_DEBUG_OUTPUT
+
+	This variable is necessary because the top Makefile owns the
+	variable $(KBUILD_CFLAGS) and uses it for compilation flags for the
+	entire tree.
+
+	asflags-y specifies options for assembling with $(AS).
+
+	Example::
+
+		#arch/sparc/kernel/Makefile
+		asflags-y := -ansi
+
+	ldflags-y specifies options for linking with $(LD).
+
+	Example::
+
+		#arch/cris/boot/compressed/Makefile
+		ldflags-y += -T $(srctree)/$(src)/decompress_$(arch-y).lds
+
+    subdir-ccflags-y, subdir-asflags-y
+	The two flags listed above are similar to ccflags-y and asflags-y.
+	The difference is that the subdir- variants have effect for the kbuild
+	file where they are present and all subdirectories.
+	Options specified using subdir-* are added to the commandline before
+	the options specified using the non-subdir variants.
+
+	Example::
+
+		subdir-ccflags-y := -Werror
+
+    CFLAGS_$@, AFLAGS_$@
+	CFLAGS_$@ and AFLAGS_$@ only apply to commands in current
+	kbuild makefile.
+
+	$(CFLAGS_$@) specifies per-file options for $(CC).  The $@
+	part has a literal value which specifies the file that it is for.
+
+	Example::
+
+		# drivers/scsi/Makefile
+		CFLAGS_aha152x.o =   -DAHA152X_STAT -DAUTOCONF
+		CFLAGS_gdth.o    = # -DDEBUG_GDTH=2 -D__SERIAL__ -D__COM2__ \
+				     -DGDTH_STATISTICS
+
+	These two lines specify compilation flags for aha152x.o and gdth.o.
+
+	$(AFLAGS_$@) is a similar feature for source files in assembly
+	languages.
+
+	Example::
+
+		# arch/arm/kernel/Makefile
+		AFLAGS_head.o        := -DTEXT_OFFSET=$(TEXT_OFFSET)
+		AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312
+		AFLAGS_iwmmxt.o      := -Wa,-mcpu=iwmmxt
+
+
+3.9 Dependency tracking
+-----------------------
+
+	Kbuild tracks dependencies on the following:
+	1) All prerequisite files (both `*.c` and `*.h`)
+	2) `CONFIG_` options used in all prerequisite files
+	3) Command-line used to compile target
+
+	Thus, if you change an option to $(CC) all affected files will
+	be re-compiled.
+
+3.10 Special Rules
+------------------
+
+	Special rules are used when the kbuild infrastructure does
+	not provide the required support. A typical example is
+	header files generated during the build process.
+	Another example are the architecture-specific Makefiles which
+	need special rules to prepare boot images etc.
+
+	Special rules are written as normal Make rules.
+	Kbuild is not executing in the directory where the Makefile is
+	located, so all special rules shall provide a relative
+	path to prerequisite files and target files.
+
+	Two variables are used when defining special rules:
+
+	$(src)
+	    $(src) is a relative path which points to the directory
+	    where the Makefile is located. Always use $(src) when
+	    referring to files located in the src tree.
+
+	$(obj)
+	    $(obj) is a relative path which points to the directory
+	    where the target is saved. Always use $(obj) when
+	    referring to generated files.
+
+	    Example::
+
+		#drivers/scsi/Makefile
+		$(obj)/53c8xx_d.h: $(src)/53c7,8xx.scr $(src)/script_asm.pl
+			$(CPP) -DCHIP=810 - < $< | ... $(src)/script_asm.pl
+
+	    This is a special rule, following the normal syntax
+	    required by make.
+
+	    The target file depends on two prerequisite files. References
+	    to the target file are prefixed with $(obj), references
+	    to prerequisites are referenced with $(src) (because they are not
+	    generated files).
+
+	$(kecho)
+	    echoing information to user in a rule is often a good practice
+	    but when execution "make -s" one does not expect to see any output
+	    except for warnings/errors.
+	    To support this kbuild defines $(kecho) which will echo out the
+	    text following $(kecho) to stdout except if "make -s" is used.
+
+	Example::
+
+		#arch/blackfin/boot/Makefile
+		$(obj)/vmImage: $(obj)/vmlinux.gz
+			$(call if_changed,uimage)
+			@$(kecho) 'Kernel: $@ is ready'
+
+
+3.11 $(CC) support functions
+----------------------------
+
+	The kernel may be built with several different versions of
+	$(CC), each supporting a unique set of features and options.
+	kbuild provides basic support to check for valid options for $(CC).
+	$(CC) is usually the gcc compiler, but other alternatives are
+	available.
+
+    as-option
+	as-option is used to check if $(CC) -- when used to compile
+	assembler (`*.S`) files -- supports the given option. An optional
+	second option may be specified if the first option is not supported.
+
+	Example::
+
+		#arch/sh/Makefile
+		cflags-y += $(call as-option,-Wa$(comma)-isa=$(isa-y),)
+
+	In the above example, cflags-y will be assigned the option
+	-Wa$(comma)-isa=$(isa-y) if it is supported by $(CC).
+	The second argument is optional, and if supplied will be used
+	if first argument is not supported.
+
+    cc-ldoption
+	cc-ldoption is used to check if $(CC) when used to link object files
+	supports the given option.  An optional second option may be
+	specified if first option are not supported.
+
+	Example::
+
+		#arch/x86/kernel/Makefile
+		vsyscall-flags += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+
+	In the above example, vsyscall-flags will be assigned the option
+	-Wl$(comma)--hash-style=sysv if it is supported by $(CC).
+	The second argument is optional, and if supplied will be used
+	if first argument is not supported.
+
+    as-instr
+	as-instr checks if the assembler reports a specific instruction
+	and then outputs either option1 or option2
+	C escapes are supported in the test instruction
+	Note: as-instr-option uses KBUILD_AFLAGS for $(AS) options
+
+    cc-option
+	cc-option is used to check if $(CC) supports a given option, and if
+	not supported to use an optional second option.
+
+	Example::
+
+		#arch/x86/Makefile
+		cflags-y += $(call cc-option,-march=pentium-mmx,-march=i586)
+
+	In the above example, cflags-y will be assigned the option
+	-march=pentium-mmx if supported by $(CC), otherwise -march=i586.
+	The second argument to cc-option is optional, and if omitted,
+	cflags-y will be assigned no value if first option is not supported.
+	Note: cc-option uses KBUILD_CFLAGS for $(CC) options
+
+   cc-option-yn
+	cc-option-yn is used to check if gcc supports a given option
+	and return 'y' if supported, otherwise 'n'.
+
+	Example::
+
+		#arch/ppc/Makefile
+		biarch := $(call cc-option-yn, -m32)
+		aflags-$(biarch) += -a32
+		cflags-$(biarch) += -m32
+
+	In the above example, $(biarch) is set to y if $(CC) supports the -m32
+	option. When $(biarch) equals 'y', the expanded variables $(aflags-y)
+	and $(cflags-y) will be assigned the values -a32 and -m32,
+	respectively.
+	Note: cc-option-yn uses KBUILD_CFLAGS for $(CC) options
+
+    cc-disable-warning
+	cc-disable-warning checks if gcc supports a given warning and returns
+	the commandline switch to disable it. This special function is needed,
+	because gcc 4.4 and later accept any unknown -Wno-* option and only
+	warn about it if there is another warning in the source file.
+
+	Example::
+
+		KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+
+	In the above example, -Wno-unused-but-set-variable will be added to
+	KBUILD_CFLAGS only if gcc really accepts it.
+
+    cc-ifversion
+	cc-ifversion tests the version of $(CC) and equals the fourth parameter
+	if version expression is true, or the fifth (if given) if the version
+	expression is false.
+
+	Example::
+
+		#fs/reiserfs/Makefile
+		ccflags-y := $(call cc-ifversion, -lt, 0402, -O1)
+
+	In this example, ccflags-y will be assigned the value -O1 if the
+	$(CC) version is less than 4.2.
+	cc-ifversion takes all the shell operators:
+	-eq, -ne, -lt, -le, -gt, and -ge
+	The third parameter may be a text as in this example, but it may also
+	be an expanded variable or a macro.
+
+    cc-cross-prefix
+	cc-cross-prefix is used to check if there exists a $(CC) in path with
+	one of the listed prefixes. The first prefix where there exist a
+	prefix$(CC) in the PATH is returned - and if no prefix$(CC) is found
+	then nothing is returned.
+	Additional prefixes are separated by a single space in the
+	call of cc-cross-prefix.
+	This functionality is useful for architecture Makefiles that try
+	to set CROSS_COMPILE to well-known values but may have several
+	values to select between.
+	It is recommended only to try to set CROSS_COMPILE if it is a cross
+	build (host arch is different from target arch). And if CROSS_COMPILE
+	is already set then leave it with the old value.
+
+	Example::
+
+		#arch/m68k/Makefile
+		ifneq ($(SUBARCH),$(ARCH))
+		        ifeq ($(CROSS_COMPILE),)
+		               CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu-)
+			endif
+		endif
+
+3.12 $(LD) support functions
+----------------------------
+
+    ld-option
+	ld-option is used to check if $(LD) supports the supplied option.
+	ld-option takes two options as arguments.
+	The second argument is an optional option that can be used if the
+	first option is not supported by $(LD).
+
+	Example::
+
+		#Makefile
+		LDFLAGS_vmlinux += $(call ld-option, -X)
+
+
+4 Host Program support
+======================
+
+Kbuild supports building executables on the host for use during the
+compilation stage.
+Two steps are required in order to use a host executable.
+
+The first step is to tell kbuild that a host program exists. This is
+done utilising the variable hostprogs-y.
+
+The second step is to add an explicit dependency to the executable.
+This can be done in two ways. Either add the dependency in a rule,
+or utilise the variable $(always).
+Both possibilities are described in the following.
+
+4.1 Simple Host Program
+-----------------------
+
+	In some cases there is a need to compile and run a program on the
+	computer where the build is running.
+	The following line tells kbuild that the program bin2hex shall be
+	built on the build host.
+
+	Example::
+
+		hostprogs-y := bin2hex
+
+	Kbuild assumes in the above example that bin2hex is made from a single
+	c-source file named bin2hex.c located in the same directory as
+	the Makefile.
+
+4.2 Composite Host Programs
+---------------------------
+
+	Host programs can be made up based on composite objects.
+	The syntax used to define composite objects for host programs is
+	similar to the syntax used for kernel objects.
+	$(<executable>-objs) lists all objects used to link the final
+	executable.
+
+	Example::
+
+		#scripts/lxdialog/Makefile
+		hostprogs-y   := lxdialog
+		lxdialog-objs := checklist.o lxdialog.o
+
+	Objects with extension .o are compiled from the corresponding .c
+	files. In the above example, checklist.c is compiled to checklist.o
+	and lxdialog.c is compiled to lxdialog.o.
+
+	Finally, the two .o files are linked to the executable, lxdialog.
+	Note: The syntax <executable>-y is not permitted for host-programs.
+
+4.3 Using C++ for host programs
+-------------------------------
+
+	kbuild offers support for host programs written in C++. This was
+	introduced solely to support kconfig, and is not recommended
+	for general use.
+
+	Example::
+
+		#scripts/kconfig/Makefile
+		hostprogs-y   := qconf
+		qconf-cxxobjs := qconf.o
+
+	In the example above the executable is composed of the C++ file
+	qconf.cc - identified by $(qconf-cxxobjs).
+
+	If qconf is composed of a mixture of .c and .cc files, then an
+	additional line can be used to identify this.
+
+	Example::
+
+		#scripts/kconfig/Makefile
+		hostprogs-y   := qconf
+		qconf-cxxobjs := qconf.o
+		qconf-objs    := check.o
+
+4.4 Controlling compiler options for host programs
+--------------------------------------------------
+
+	When compiling host programs, it is possible to set specific flags.
+	The programs will always be compiled utilising $(HOSTCC) passed
+	the options specified in $(KBUILD_HOSTCFLAGS).
+	To set flags that will take effect for all host programs created
+	in that Makefile, use the variable HOST_EXTRACFLAGS.
+
+	Example::
+
+		#scripts/lxdialog/Makefile
+		HOST_EXTRACFLAGS += -I/usr/include/ncurses
+
+	To set specific flags for a single file the following construction
+	is used:
+
+	Example::
+
+		#arch/ppc64/boot/Makefile
+		HOSTCFLAGS_piggyback.o := -DKERNELBASE=$(KERNELBASE)
+
+	It is also possible to specify additional options to the linker.
+
+	Example::
+
+		#scripts/kconfig/Makefile
+		HOSTLDLIBS_qconf := -L$(QTDIR)/lib
+
+	When linking qconf, it will be passed the extra option
+	"-L$(QTDIR)/lib".
+
+4.5 When host programs are actually built
+-----------------------------------------
+
+	Kbuild will only build host-programs when they are referenced
+	as a prerequisite.
+	This is possible in two ways:
+
+	(1) List the prerequisite explicitly in a special rule.
+
+	Example::
+
+		#drivers/pci/Makefile
+		hostprogs-y := gen-devlist
+		$(obj)/devlist.h: $(src)/pci.ids $(obj)/gen-devlist
+			( cd $(obj); ./gen-devlist ) < $<
+
+	The target $(obj)/devlist.h will not be built before
+	$(obj)/gen-devlist is updated. Note that references to
+	the host programs in special rules must be prefixed with $(obj).
+
+	(2) Use $(always)
+
+	When there is no suitable special rule, and the host program
+	shall be built when a makefile is entered, the $(always)
+	variable shall be used.
+
+	Example::
+
+		#scripts/lxdialog/Makefile
+		hostprogs-y   := lxdialog
+		always        := $(hostprogs-y)
+
+	This will tell kbuild to build lxdialog even if not referenced in
+	any rule.
+
+4.6 Using hostprogs-$(CONFIG_FOO)
+---------------------------------
+
+	A typical pattern in a Kbuild file looks like this:
+
+	Example::
+
+		#scripts/Makefile
+		hostprogs-$(CONFIG_KALLSYMS) += kallsyms
+
+	Kbuild knows about both 'y' for built-in and 'm' for module.
+	So if a config symbol evaluates to 'm', kbuild will still build
+	the binary. In other words, Kbuild handles hostprogs-m exactly
+	like hostprogs-y. But only hostprogs-y is recommended to be used
+	when no CONFIG symbols are involved.
+
+5 Kbuild clean infrastructure
+=============================
+
+"make clean" deletes most generated files in the obj tree where the kernel
+is compiled. This includes generated files such as host programs.
+Kbuild knows targets listed in $(hostprogs-y), $(hostprogs-m), $(always),
+$(extra-y) and $(targets). They are all deleted during "make clean".
+Files matching the patterns "*.[oas]", "*.ko", plus some additional files
+generated by kbuild are deleted all over the kernel src tree when
+"make clean" is executed.
+
+Additional files can be specified in kbuild makefiles by use of $(clean-files).
+
+	Example::
+
+		#lib/Makefile
+		clean-files := crc32table.h
+
+When executing "make clean", the file "crc32table.h" will be deleted.
+Kbuild will assume files to be in the same relative directory as the
+Makefile, except if prefixed with $(objtree).
+
+To delete a directory hierarchy use:
+
+	Example::
+
+		#scripts/package/Makefile
+		clean-dirs := $(objtree)/debian/
+
+This will delete the directory debian in the toplevel directory, including all
+subdirectories.
+
+To exclude certain files from make clean, use the $(no-clean-files) variable.
+This is only a special case used in the top level Kbuild file:
+
+	Example::
+
+		#Kbuild
+		no-clean-files := $(bounds-file) $(offsets-file)
+
+Usually kbuild descends down in subdirectories due to "obj-* := dir/",
+but in the architecture makefiles where the kbuild infrastructure
+is not sufficient this sometimes needs to be explicit.
+
+	Example::
+
+		#arch/x86/boot/Makefile
+		subdir- := compressed/
+
+The above assignment instructs kbuild to descend down in the
+directory compressed/ when "make clean" is executed.
+
+To support the clean infrastructure in the Makefiles that build the
+final bootimage there is an optional target named archclean:
+
+	Example::
+
+		#arch/x86/Makefile
+		archclean:
+			$(Q)$(MAKE) $(clean)=arch/x86/boot
+
+When "make clean" is executed, make will descend down in arch/x86/boot,
+and clean as usual. The Makefile located in arch/x86/boot/ may use
+the subdir- trick to descend further down.
+
+Note 1: arch/$(ARCH)/Makefile cannot use "subdir-", because that file is
+included in the top level makefile, and the kbuild infrastructure
+is not operational at that point.
+
+Note 2: All directories listed in core-y, libs-y, drivers-y and net-y will
+be visited during "make clean".
+
+6 Architecture Makefiles
+========================
+
+The top level Makefile sets up the environment and does the preparation,
+before starting to descend down in the individual directories.
+The top level makefile contains the generic part, whereas
+arch/$(ARCH)/Makefile contains what is required to set up kbuild
+for said architecture.
+To do so, arch/$(ARCH)/Makefile sets up a number of variables and defines
+a few targets.
+
+When kbuild executes, the following steps are followed (roughly):
+
+1) Configuration of the kernel => produce .config
+2) Store kernel version in include/linux/version.h
+3) Updating all other prerequisites to the target prepare:
+   - Additional prerequisites are specified in arch/$(ARCH)/Makefile
+4) Recursively descend down in all directories listed in
+   init-* core* drivers-* net-* libs-* and build all targets.
+   - The values of the above variables are expanded in arch/$(ARCH)/Makefile.
+5) All object files are then linked and the resulting file vmlinux is
+   located at the root of the obj tree.
+   The very first objects linked are listed in head-y, assigned by
+   arch/$(ARCH)/Makefile.
+6) Finally, the architecture-specific part does any required post processing
+   and builds the final bootimage.
+   - This includes building boot records
+   - Preparing initrd images and the like
+
+
+6.1 Set variables to tweak the build to the architecture
+--------------------------------------------------------
+
+    LDFLAGS
+	Generic $(LD) options
+
+	Flags used for all invocations of the linker.
+	Often specifying the emulation is sufficient.
+
+	Example::
+
+		#arch/s390/Makefile
+		LDFLAGS         := -m elf_s390
+
+	Note: ldflags-y can be used to further customise
+	the flags used. See chapter 3.7.
+
+    LDFLAGS_vmlinux
+	Options for $(LD) when linking vmlinux
+
+	LDFLAGS_vmlinux is used to specify additional flags to pass to
+	the linker when linking the final vmlinux image.
+	LDFLAGS_vmlinux uses the LDFLAGS_$@ support.
+
+	Example::
+
+		#arch/x86/Makefile
+		LDFLAGS_vmlinux := -e stext
+
+    OBJCOPYFLAGS
+	objcopy flags
+
+	When $(call if_changed,objcopy) is used to translate a .o file,
+	the flags specified in OBJCOPYFLAGS will be used.
+	$(call if_changed,objcopy) is often used to generate raw binaries on
+	vmlinux.
+
+	Example::
+
+		#arch/s390/Makefile
+		OBJCOPYFLAGS := -O binary
+
+		#arch/s390/boot/Makefile
+		$(obj)/image: vmlinux FORCE
+			$(call if_changed,objcopy)
+
+	In this example, the binary $(obj)/image is a binary version of
+	vmlinux. The usage of $(call if_changed,xxx) will be described later.
+
+    KBUILD_AFLAGS
+	$(AS) assembler flags
+
+	Default value - see top level Makefile
+	Append or modify as required per architecture.
+
+	Example::
+
+		#arch/sparc64/Makefile
+		KBUILD_AFLAGS += -m64 -mcpu=ultrasparc
+
+    KBUILD_CFLAGS
+	$(CC) compiler flags
+
+	Default value - see top level Makefile
+	Append or modify as required per architecture.
+
+	Often, the KBUILD_CFLAGS variable depends on the configuration.
+
+	Example::
+
+		#arch/x86/boot/compressed/Makefile
+		cflags-$(CONFIG_X86_32) := -march=i386
+		cflags-$(CONFIG_X86_64) := -mcmodel=small
+		KBUILD_CFLAGS += $(cflags-y)
+
+	Many arch Makefiles dynamically run the target C compiler to
+	probe supported options::
+
+		#arch/x86/Makefile
+
+		...
+		cflags-$(CONFIG_MPENTIUMII)     += $(call cc-option,\
+						-march=pentium2,-march=i686)
+		...
+		# Disable unit-at-a-time mode ...
+		KBUILD_CFLAGS += $(call cc-option,-fno-unit-at-a-time)
+		...
+
+
+	The first example utilises the trick that a config option expands
+	to 'y' when selected.
+
+    KBUILD_AFLAGS_KERNEL
+	$(AS) options specific for built-in
+
+	$(KBUILD_AFLAGS_KERNEL) contains extra C compiler flags used to compile
+	resident kernel code.
+
+    KBUILD_AFLAGS_MODULE
+	Options for $(AS) when building modules
+
+	$(KBUILD_AFLAGS_MODULE) is used to add arch-specific options that
+	are used for $(AS).
+
+	From commandline AFLAGS_MODULE shall be used (see kbuild.txt).
+
+    KBUILD_CFLAGS_KERNEL
+	$(CC) options specific for built-in
+
+	$(KBUILD_CFLAGS_KERNEL) contains extra C compiler flags used to compile
+	resident kernel code.
+
+    KBUILD_CFLAGS_MODULE
+	Options for $(CC) when building modules
+
+	$(KBUILD_CFLAGS_MODULE) is used to add arch-specific options that
+	are used for $(CC).
+	From commandline CFLAGS_MODULE shall be used (see kbuild.txt).
+
+    KBUILD_LDFLAGS_MODULE
+	Options for $(LD) when linking modules
+
+	$(KBUILD_LDFLAGS_MODULE) is used to add arch-specific options
+	used when linking modules. This is often a linker script.
+
+	From commandline LDFLAGS_MODULE shall be used (see kbuild.txt).
+
+    KBUILD_ARFLAGS   Options for $(AR) when creating archives
+
+	$(KBUILD_ARFLAGS) set by the top level Makefile to "D" (deterministic
+	mode) if this option is supported by $(AR).
+
+    ARCH_CPPFLAGS, ARCH_AFLAGS, ARCH_CFLAGS   Overrides the kbuild defaults
+
+	These variables are appended to the KBUILD_CPPFLAGS,
+	KBUILD_AFLAGS, and KBUILD_CFLAGS, respectively, after the
+	top-level Makefile has set any other flags. This provides a
+	means for an architecture to override the defaults.
+
+
+6.2 Add prerequisites to archheaders
+------------------------------------
+
+	The archheaders: rule is used to generate header files that
+	may be installed into user space by "make header_install" or
+	"make headers_install_all".  In order to support
+	"make headers_install_all", this target has to be able to run
+	on an unconfigured tree, or a tree configured for another
+	architecture.
+
+	It is run before "make archprepare" when run on the
+	architecture itself.
+
+
+6.3 Add prerequisites to archprepare
+------------------------------------
+
+	The archprepare: rule is used to list prerequisites that need to be
+	built before starting to descend down in the subdirectories.
+	This is usually used for header files containing assembler constants.
+
+	Example::
+
+		#arch/arm/Makefile
+		archprepare: maketools
+
+	In this example, the file target maketools will be processed
+	before descending down in the subdirectories.
+	See also chapter XXX-TODO that describe how kbuild supports
+	generating offset header files.
+
+
+6.4 List directories to visit when descending
+---------------------------------------------
+
+	An arch Makefile cooperates with the top Makefile to define variables
+	which specify how to build the vmlinux file.  Note that there is no
+	corresponding arch-specific section for modules; the module-building
+	machinery is all architecture-independent.
+
+
+	head-y, init-y, core-y, libs-y, drivers-y, net-y
+	    $(head-y) lists objects to be linked first in vmlinux.
+
+	    $(libs-y) lists directories where a lib.a archive can be located.
+
+	    The rest list directories where a built-in.a object file can be
+	    located.
+
+	    $(init-y) objects will be located after $(head-y).
+
+	    Then the rest follows in this order:
+
+		$(core-y), $(libs-y), $(drivers-y) and $(net-y).
+
+	    The top level Makefile defines values for all generic directories,
+	    and arch/$(ARCH)/Makefile only adds architecture-specific
+	    directories.
+
+	    Example::
+
+		#arch/sparc64/Makefile
+		core-y += arch/sparc64/kernel/
+		libs-y += arch/sparc64/prom/ arch/sparc64/lib/
+		drivers-$(CONFIG_OPROFILE)  += arch/sparc64/oprofile/
+
+
+6.5 Architecture-specific boot images
+-------------------------------------
+
+	An arch Makefile specifies goals that take the vmlinux file, compress
+	it, wrap it in bootstrapping code, and copy the resulting files
+	somewhere. This includes various kinds of installation commands.
+	The actual goals are not standardized across architectures.
+
+	It is common to locate any additional processing in a boot/
+	directory below arch/$(ARCH)/.
+
+	Kbuild does not provide any smart way to support building a
+	target specified in boot/. Therefore arch/$(ARCH)/Makefile shall
+	call make manually to build a target in boot/.
+
+	The recommended approach is to include shortcuts in
+	arch/$(ARCH)/Makefile, and use the full path when calling down
+	into the arch/$(ARCH)/boot/Makefile.
+
+	Example::
+
+		#arch/x86/Makefile
+		boot := arch/x86/boot
+		bzImage: vmlinux
+			$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+	"$(Q)$(MAKE) $(build)=<dir>" is the recommended way to invoke
+	make in a subdirectory.
+
+	There are no rules for naming architecture-specific targets,
+	but executing "make help" will list all relevant targets.
+	To support this, $(archhelp) must be defined.
+
+	Example::
+
+		#arch/x86/Makefile
+		define archhelp
+		  echo  '* bzImage      - Image (arch/$(ARCH)/boot/bzImage)'
+		endif
+
+	When make is executed without arguments, the first goal encountered
+	will be built. In the top level Makefile the first goal present
+	is all:.
+	An architecture shall always, per default, build a bootable image.
+	In "make help", the default goal is highlighted with a '*'.
+	Add a new prerequisite to all: to select a default goal different
+	from vmlinux.
+
+	Example::
+
+		#arch/x86/Makefile
+		all: bzImage
+
+	When "make" is executed without arguments, bzImage will be built.
+
+6.6 Building non-kbuild targets
+-------------------------------
+
+    extra-y
+	extra-y specifies additional targets created in the current
+	directory, in addition to any targets specified by `obj-*`.
+
+	Listing all targets in extra-y is required for two purposes:
+
+	1) Enable kbuild to check changes in command lines
+
+	   - When $(call if_changed,xxx) is used
+
+	2) kbuild knows what files to delete during "make clean"
+
+	Example::
+
+		#arch/x86/kernel/Makefile
+		extra-y := head.o init_task.o
+
+	In this example, extra-y is used to list object files that
+	shall be built, but shall not be linked as part of built-in.a.
+
+
+6.7 Commands useful for building a boot image
+---------------------------------------------
+
+    Kbuild provides a few macros that are useful when building a
+    boot image.
+
+    if_changed
+	if_changed is the infrastructure used for the following commands.
+
+	Usage::
+
+		target: source(s) FORCE
+			$(call if_changed,ld/objcopy/gzip/...)
+
+	When the rule is evaluated, it is checked to see if any files
+	need an update, or the command line has changed since the last
+	invocation. The latter will force a rebuild if any options
+	to the executable have changed.
+	Any target that utilises if_changed must be listed in $(targets),
+	otherwise the command line check will fail, and the target will
+	always be built.
+	Assignments to $(targets) are without $(obj)/ prefix.
+	if_changed may be used in conjunction with custom commands as
+	defined in 6.8 "Custom kbuild commands".
+
+	Note: It is a typical mistake to forget the FORCE prerequisite.
+	Another common pitfall is that whitespace is sometimes
+	significant; for instance, the below will fail (note the extra space
+	after the comma)::
+
+		target: source(s) FORCE
+
+	**WRONG!**	$(call if_changed, ld/objcopy/gzip/...)
+
+        Note:
+	      if_changed should not be used more than once per target.
+              It stores the executed command in a corresponding .cmd
+
+        file and multiple calls would result in overwrites and
+        unwanted results when the target is up to date and only the
+        tests on changed commands trigger execution of commands.
+
+    ld
+	Link target. Often, LDFLAGS_$@ is used to set specific options to ld.
+
+	Example::
+
+		#arch/x86/boot/Makefile
+		LDFLAGS_bootsect := -Ttext 0x0 -s --oformat binary
+		LDFLAGS_setup    := -Ttext 0x0 -s --oformat binary -e begtext
+
+		targets += setup setup.o bootsect bootsect.o
+		$(obj)/setup $(obj)/bootsect: %: %.o FORCE
+			$(call if_changed,ld)
+
+	In this example, there are two possible targets, requiring different
+	options to the linker. The linker options are specified using the
+	LDFLAGS_$@ syntax - one for each potential target.
+	$(targets) are assigned all potential targets, by which kbuild knows
+	the targets and will:
+
+		1) check for commandline changes
+		2) delete target during make clean
+
+	The ": %: %.o" part of the prerequisite is a shorthand that
+	frees us from listing the setup.o and bootsect.o files.
+
+	Note:
+	      It is a common mistake to forget the "targets :=" assignment,
+	      resulting in the target file being recompiled for no
+	      obvious reason.
+
+    objcopy
+	Copy binary. Uses OBJCOPYFLAGS usually specified in
+	arch/$(ARCH)/Makefile.
+	OBJCOPYFLAGS_$@ may be used to set additional options.
+
+    gzip
+	Compress target. Use maximum compression to compress target.
+
+	Example::
+
+		#arch/x86/boot/compressed/Makefile
+		$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
+			$(call if_changed,gzip)
+
+    dtc
+	Create flattened device tree blob object suitable for linking
+	into vmlinux. Device tree blobs linked into vmlinux are placed
+	in an init section in the image. Platform code *must* copy the
+	blob to non-init memory prior to calling unflatten_device_tree().
+
+	To use this command, simply add `*.dtb` into obj-y or targets, or make
+	some other target depend on `%.dtb`
+
+	A central rule exists to create `$(obj)/%.dtb` from `$(src)/%.dts`;
+	architecture Makefiles do no need to explicitly write out that rule.
+
+	Example::
+
+		targets += $(dtb-y)
+		DTC_FLAGS ?= -p 1024
+
+6.8 Custom kbuild commands
+--------------------------
+
+	When kbuild is executing with KBUILD_VERBOSE=0, then only a shorthand
+	of a command is normally displayed.
+	To enable this behaviour for custom commands kbuild requires
+	two variables to be set::
+
+		quiet_cmd_<command>	- what shall be echoed
+		      cmd_<command>	- the command to execute
+
+	Example::
+
+		#
+		quiet_cmd_image = BUILD   $@
+		      cmd_image = $(obj)/tools/build $(BUILDFLAGS) \
+		                                     $(obj)/vmlinux.bin > $@
+
+		targets += bzImage
+		$(obj)/bzImage: $(obj)/vmlinux.bin $(obj)/tools/build FORCE
+			$(call if_changed,image)
+			@echo 'Kernel: $@ is ready'
+
+	When updating the $(obj)/bzImage target, the line:
+
+		BUILD    arch/x86/boot/bzImage
+
+	will be displayed with "make KBUILD_VERBOSE=0".
+
+
+--- 6.9 Preprocessing linker scripts
+
+	When the vmlinux image is built, the linker script
+	arch/$(ARCH)/kernel/vmlinux.lds is used.
+	The script is a preprocessed variant of the file vmlinux.lds.S
+	located in the same directory.
+	kbuild knows .lds files and includes a rule `*lds.S` -> `*lds`.
+
+	Example::
+
+		#arch/x86/kernel/Makefile
+		always := vmlinux.lds
+
+		#Makefile
+		export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH)
+
+	The assignment to $(always) is used to tell kbuild to build the
+	target vmlinux.lds.
+	The assignment to $(CPPFLAGS_vmlinux.lds) tells kbuild to use the
+	specified options when building the target vmlinux.lds.
+
+	When building the `*.lds` target, kbuild uses the variables::
+
+		KBUILD_CPPFLAGS	: Set in top-level Makefile
+		cppflags-y	: May be set in the kbuild makefile
+		CPPFLAGS_$(@F)  : Target-specific flags.
+				Note that the full filename is used in this
+				assignment.
+
+	The kbuild infrastructure for `*lds` files is used in several
+	architecture-specific files.
+
+6.10 Generic header files
+-------------------------
+
+	The directory include/asm-generic contains the header files
+	that may be shared between individual architectures.
+	The recommended approach how to use a generic header file is
+	to list the file in the Kbuild file.
+	See "7.2 generic-y" for further info on syntax etc.
+
+6.11 Post-link pass
+-------------------
+
+	If the file arch/xxx/Makefile.postlink exists, this makefile
+	will be invoked for post-link objects (vmlinux and modules.ko)
+	for architectures to run post-link passes on. Must also handle
+	the clean target.
+
+	This pass runs after kallsyms generation. If the architecture
+	needs to modify symbol locations, rather than manipulate the
+	kallsyms, it may be easier to add another postlink target for
+	.tmp_vmlinux? targets to be called from link-vmlinux.sh.
+
+	For example, powerpc uses this to check relocation sanity of
+	the linked vmlinux file.
+
+7 Kbuild syntax for exported headers
+------------------------------------
+
+The kernel includes a set of headers that is exported to userspace.
+Many headers can be exported as-is but other headers require a
+minimal pre-processing before they are ready for user-space.
+The pre-processing does:
+
+- drop kernel-specific annotations
+- drop include of compiler.h
+- drop all sections that are kernel internal (guarded by `ifdef __KERNEL__`)
+
+All headers under include/uapi/, include/generated/uapi/,
+arch/<arch>/include/uapi/ and arch/<arch>/include/generated/uapi/
+are exported.
+
+A Kbuild file may be defined under arch/<arch>/include/uapi/asm/ and
+arch/<arch>/include/asm/ to list asm files coming from asm-generic.
+See subsequent chapter for the syntax of the Kbuild file.
+
+7.1 no-export-headers
+---------------------
+
+	no-export-headers is essentially used by include/uapi/linux/Kbuild to
+	avoid exporting specific headers (e.g. kvm.h) on architectures that do
+	not support it. It should be avoided as much as possible.
+
+7.2 generic-y
+-------------
+
+	If an architecture uses a verbatim copy of a header from
+	include/asm-generic then this is listed in the file
+	arch/$(ARCH)/include/asm/Kbuild like this:
+
+		Example::
+
+			#arch/x86/include/asm/Kbuild
+			generic-y += termios.h
+			generic-y += rtc.h
+
+	During the prepare phase of the build a wrapper include
+	file is generated in the directory::
+
+		arch/$(ARCH)/include/generated/asm
+
+	When a header is exported where the architecture uses
+	the generic header a similar wrapper is generated as part
+	of the set of exported headers in the directory::
+
+		usr/include/asm
+
+	The generated wrapper will in both cases look like the following:
+
+		Example: termios.h::
+
+			#include <asm-generic/termios.h>
+
+7.3 generated-y
+---------------
+
+	If an architecture generates other header files alongside generic-y
+	wrappers, generated-y specifies them.
+
+	This prevents them being treated as stale asm-generic wrappers and
+	removed.
+
+		Example::
+
+			#arch/x86/include/asm/Kbuild
+			generated-y += syscalls_32.h
+
+7.4 mandatory-y
+---------------
+
+	mandatory-y is essentially used by include/(uapi/)asm-generic/Kbuild
+	to define the minimum set of ASM headers that all architectures must have.
+
+	This works like optional generic-y. If a mandatory header is missing
+	in arch/$(ARCH)/include/(uapi/)/asm, Kbuild will automatically generate
+	a wrapper of the asm-generic one.
+
+	The convention is to list one subdir per line and
+	preferably in alphabetic order.
+
+8 Kbuild Variables
+==================
+
+The top Makefile exports the following variables:
+
+    VERSION, PATCHLEVEL, SUBLEVEL, EXTRAVERSION
+	These variables define the current kernel version.  A few arch
+	Makefiles actually use these values directly; they should use
+	$(KERNELRELEASE) instead.
+
+	$(VERSION), $(PATCHLEVEL), and $(SUBLEVEL) define the basic
+	three-part version number, such as "2", "4", and "0".  These three
+	values are always numeric.
+
+	$(EXTRAVERSION) defines an even tinier sublevel for pre-patches
+	or additional patches.	It is usually some non-numeric string
+	such as "-pre4", and is often blank.
+
+    KERNELRELEASE
+	$(KERNELRELEASE) is a single string such as "2.4.0-pre4", suitable
+	for constructing installation directory names or showing in
+	version strings.  Some arch Makefiles use it for this purpose.
+
+    ARCH
+	This variable defines the target architecture, such as "i386",
+	"arm", or "sparc". Some kbuild Makefiles test $(ARCH) to
+	determine which files to compile.
+
+	By default, the top Makefile sets $(ARCH) to be the same as the
+	host system architecture.  For a cross build, a user may
+	override the value of $(ARCH) on the command line::
+
+	    make ARCH=m68k ...
+
+
+    INSTALL_PATH
+	This variable defines a place for the arch Makefiles to install
+	the resident kernel image and System.map file.
+	Use this for architecture-specific install targets.
+
+    INSTALL_MOD_PATH, MODLIB
+	$(INSTALL_MOD_PATH) specifies a prefix to $(MODLIB) for module
+	installation.  This variable is not defined in the Makefile but
+	may be passed in by the user if desired.
+
+	$(MODLIB) specifies the directory for module installation.
+	The top Makefile defines $(MODLIB) to
+	$(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE).  The user may
+	override this value on the command line if desired.
+
+    INSTALL_MOD_STRIP
+	If this variable is specified, it will cause modules to be stripped
+	after they are installed.  If INSTALL_MOD_STRIP is '1', then the
+	default option --strip-debug will be used.  Otherwise, the
+	INSTALL_MOD_STRIP value will be used as the option(s) to the strip
+	command.
+
+
+9 Makefile language
+===================
+
+The kernel Makefiles are designed to be run with GNU Make.  The Makefiles
+use only the documented features of GNU Make, but they do use many
+GNU extensions.
+
+GNU Make supports elementary list-processing functions.  The kernel
+Makefiles use a novel style of list building and manipulation with few
+"if" statements.
+
+GNU Make has two assignment operators, ":=" and "=".  ":=" performs
+immediate evaluation of the right-hand side and stores an actual string
+into the left-hand side.  "=" is like a formula definition; it stores the
+right-hand side in an unevaluated form and then evaluates this form each
+time the left-hand side is used.
+
+There are some cases where "=" is appropriate.  Usually, though, ":="
+is the right choice.
+
+10 Credits
+==========
+
+- Original version made by Michael Elizabeth Chastain, <mailto:mec@shout.net>
+- Updates by Kai Germaschewski <kai@tp1.ruhr-uni-bochum.de>
+- Updates by Sam Ravnborg <sam@ravnborg.org>
+- Language QA by Jan Engelhardt <jengelh@gmx.de>
+
+11 TODO
+=======
+
+- Describe how kbuild supports shipped files with _shipped.
+- Generating offset header files.
+- Add more variables to section 7?
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
deleted file mode 100644
index d65ad5746f94..000000000000
--- a/Documentation/kbuild/makefiles.txt
+++ /dev/null
@@ -1,1369 +0,0 @@
-Linux Kernel Makefiles
-
-This document describes the Linux kernel Makefiles.
-
-=== Table of Contents
-
-	=== 1 Overview
-	=== 2 Who does what
-	=== 3 The kbuild files
-	   --- 3.1 Goal definitions
-	   --- 3.2 Built-in object goals - obj-y
-	   --- 3.3 Loadable module goals - obj-m
-	   --- 3.4 Objects which export symbols
-	   --- 3.5 Library file goals - lib-y
-	   --- 3.6 Descending down in directories
-	   --- 3.7 Compilation flags
-	   --- 3.8 Command line dependency
-	   --- 3.9 Dependency tracking
-	   --- 3.10 Special Rules
-	   --- 3.11 $(CC) support functions
-	   --- 3.12 $(LD) support functions
-
-	=== 4 Host Program support
-	   --- 4.1 Simple Host Program
-	   --- 4.2 Composite Host Programs
-	   --- 4.3 Using C++ for host programs
-	   --- 4.4 Controlling compiler options for host programs
-	   --- 4.5 When host programs are actually built
-	   --- 4.6 Using hostprogs-$(CONFIG_FOO)
-
-	=== 5 Kbuild clean infrastructure
-
-	=== 6 Architecture Makefiles
-	   --- 6.1 Set variables to tweak the build to the architecture
-	   --- 6.2 Add prerequisites to archheaders:
-	   --- 6.3 Add prerequisites to archprepare:
-	   --- 6.4 List directories to visit when descending
-	   --- 6.5 Architecture-specific boot images
-	   --- 6.6 Building non-kbuild targets
-	   --- 6.7 Commands useful for building a boot image
-	   --- 6.8 Custom kbuild commands
-	   --- 6.9 Preprocessing linker scripts
-	   --- 6.10 Generic header files
-	   --- 6.11 Post-link pass
-
-	=== 7 Kbuild syntax for exported headers
-		--- 7.1 no-export-headers
-		--- 7.2 generic-y
-		--- 7.3 generated-y
-		--- 7.4 mandatory-y
-
-	=== 8 Kbuild Variables
-	=== 9 Makefile language
-	=== 10 Credits
-	=== 11 TODO
-
-=== 1 Overview
-
-The Makefiles have five parts:
-
-	Makefile		the top Makefile.
-	.config			the kernel configuration file.
-	arch/$(ARCH)/Makefile	the arch Makefile.
-	scripts/Makefile.*	common rules etc. for all kbuild Makefiles.
-	kbuild Makefiles	there are about 500 of these.
-
-The top Makefile reads the .config file, which comes from the kernel
-configuration process.
-
-The top Makefile is responsible for building two major products: vmlinux
-(the resident kernel image) and modules (any module files).
-It builds these goals by recursively descending into the subdirectories of
-the kernel source tree.
-The list of subdirectories which are visited depends upon the kernel
-configuration. The top Makefile textually includes an arch Makefile
-with the name arch/$(ARCH)/Makefile. The arch Makefile supplies
-architecture-specific information to the top Makefile.
-
-Each subdirectory has a kbuild Makefile which carries out the commands
-passed down from above. The kbuild Makefile uses information from the
-.config file to construct various file lists used by kbuild to build
-any built-in or modular targets.
-
-scripts/Makefile.* contains all the definitions/rules etc. that
-are used to build the kernel based on the kbuild makefiles.
-
-
-=== 2 Who does what
-
-People have four different relationships with the kernel Makefiles.
-
-*Users* are people who build kernels.  These people type commands such as
-"make menuconfig" or "make".  They usually do not read or edit
-any kernel Makefiles (or any other source files).
-
-*Normal developers* are people who work on features such as device
-drivers, file systems, and network protocols.  These people need to
-maintain the kbuild Makefiles for the subsystem they are
-working on.  In order to do this effectively, they need some overall
-knowledge about the kernel Makefiles, plus detailed knowledge about the
-public interface for kbuild.
-
-*Arch developers* are people who work on an entire architecture, such
-as sparc or ia64.  Arch developers need to know about the arch Makefile
-as well as kbuild Makefiles.
-
-*Kbuild developers* are people who work on the kernel build system itself.
-These people need to know about all aspects of the kernel Makefiles.
-
-This document is aimed towards normal developers and arch developers.
-
-
-=== 3 The kbuild files
-
-Most Makefiles within the kernel are kbuild Makefiles that use the
-kbuild infrastructure. This chapter introduces the syntax used in the
-kbuild makefiles.
-The preferred name for the kbuild files are 'Makefile' but 'Kbuild' can
-be used and if both a 'Makefile' and a 'Kbuild' file exists, then the 'Kbuild'
-file will be used.
-
-Section 3.1 "Goal definitions" is a quick intro, further chapters provide
-more details, with real examples.
-
---- 3.1 Goal definitions
-
-	Goal definitions are the main part (heart) of the kbuild Makefile.
-	These lines define the files to be built, any special compilation
-	options, and any subdirectories to be entered recursively.
-
-	The most simple kbuild makefile contains one line:
-
-	Example:
-		obj-y += foo.o
-
-	This tells kbuild that there is one object in that directory, named
-	foo.o. foo.o will be built from foo.c or foo.S.
-
-	If foo.o shall be built as a module, the variable obj-m is used.
-	Therefore the following pattern is often used:
-
-	Example:
-		obj-$(CONFIG_FOO) += foo.o
-
-	$(CONFIG_FOO) evaluates to either y (for built-in) or m (for module).
-	If CONFIG_FOO is neither y nor m, then the file will not be compiled
-	nor linked.
-
---- 3.2 Built-in object goals - obj-y
-
-	The kbuild Makefile specifies object files for vmlinux
-	in the $(obj-y) lists.  These lists depend on the kernel
-	configuration.
-
-	Kbuild compiles all the $(obj-y) files.  It then calls
-	"$(AR) rcSTP" to merge these files into one built-in.a file.
-	This is a thin archive without a symbol table. It will be later
-	linked into vmlinux by scripts/link-vmlinux.sh
-
-	The order of files in $(obj-y) is significant.  Duplicates in
-	the lists are allowed: the first instance will be linked into
-	built-in.a and succeeding instances will be ignored.
-
-	Link order is significant, because certain functions
-	(module_init() / __initcall) will be called during boot in the
-	order they appear. So keep in mind that changing the link
-	order may e.g. change the order in which your SCSI
-	controllers are detected, and thus your disks are renumbered.
-
-	Example:
-		#drivers/isdn/i4l/Makefile
-		# Makefile for the kernel ISDN subsystem and device drivers.
-		# Each configuration option enables a list of files.
-		obj-$(CONFIG_ISDN_I4L)         += isdn.o
-		obj-$(CONFIG_ISDN_PPP_BSDCOMP) += isdn_bsdcomp.o
-
---- 3.3 Loadable module goals - obj-m
-
-	$(obj-m) specifies object files which are built as loadable
-	kernel modules.
-
-	A module may be built from one source file or several source
-	files. In the case of one source file, the kbuild makefile
-	simply adds the file to $(obj-m).
-
-	Example:
-		#drivers/isdn/i4l/Makefile
-		obj-$(CONFIG_ISDN_PPP_BSDCOMP) += isdn_bsdcomp.o
-
-	Note: In this example $(CONFIG_ISDN_PPP_BSDCOMP) evaluates to 'm'
-
-	If a kernel module is built from several source files, you specify
-	that you want to build a module in the same way as above; however,
-	kbuild needs to know which object files you want to build your
-	module from, so you have to tell it by setting a $(<module_name>-y)
-	variable.
-
-	Example:
-		#drivers/isdn/i4l/Makefile
-		obj-$(CONFIG_ISDN_I4L) += isdn.o
-		isdn-y := isdn_net_lib.o isdn_v110.o isdn_common.o
-
-	In this example, the module name will be isdn.o. Kbuild will
-	compile the objects listed in $(isdn-y) and then run
-	"$(LD) -r" on the list of these files to generate isdn.o.
-
-	Due to kbuild recognizing $(<module_name>-y) for composite objects,
-	you can use the value of a CONFIG_ symbol to optionally include an
-	object file as part of a composite object.
-
-	Example:
-		#fs/ext2/Makefile
-	        obj-$(CONFIG_EXT2_FS) += ext2.o
-		ext2-y := balloc.o dir.o file.o ialloc.o inode.o ioctl.o \
-			  namei.o super.o symlink.o
-	        ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o \
-						xattr_trusted.o
-
-	In this example, xattr.o, xattr_user.o and xattr_trusted.o are only
-	part of the composite object ext2.o if $(CONFIG_EXT2_FS_XATTR)
-	evaluates to 'y'.
-
-	Note: Of course, when you are building objects into the kernel,
-	the syntax above will also work. So, if you have CONFIG_EXT2_FS=y,
-	kbuild will build an ext2.o file for you out of the individual
-	parts and then link this into built-in.a, as you would expect.
-
---- 3.4 Objects which export symbols
-
-	No special notation is required in the makefiles for
-	modules exporting symbols.
-
---- 3.5 Library file goals - lib-y
-
-	Objects listed with obj-* are used for modules, or
-	combined in a built-in.a for that specific directory.
-	There is also the possibility to list objects that will
-	be included in a library, lib.a.
-	All objects listed with lib-y are combined in a single
-	library for that directory.
-	Objects that are listed in obj-y and additionally listed in
-	lib-y will not be included in the library, since they will
-	be accessible anyway.
-	For consistency, objects listed in lib-m will be included in lib.a.
-
-	Note that the same kbuild makefile may list files to be built-in
-	and to be part of a library. Therefore the same directory
-	may contain both a built-in.a and a lib.a file.
-
-	Example:
-		#arch/x86/lib/Makefile
-		lib-y    := delay.o
-
-	This will create a library lib.a based on delay.o. For kbuild to
-	actually recognize that there is a lib.a being built, the directory
-	shall be listed in libs-y.
-	See also "6.4 List directories to visit when descending".
-
-	Use of lib-y is normally restricted to lib/ and arch/*/lib.
-
---- 3.6 Descending down in directories
-
-	A Makefile is only responsible for building objects in its own
-	directory. Files in subdirectories should be taken care of by
-	Makefiles in these subdirs. The build system will automatically
-	invoke make recursively in subdirectories, provided you let it know of
-	them.
-
-	To do so, obj-y and obj-m are used.
-	ext2 lives in a separate directory, and the Makefile present in fs/
-	tells kbuild to descend down using the following assignment.
-
-	Example:
-		#fs/Makefile
-		obj-$(CONFIG_EXT2_FS) += ext2/
-
-	If CONFIG_EXT2_FS is set to either 'y' (built-in) or 'm' (modular)
-	the corresponding obj- variable will be set, and kbuild will descend
-	down in the ext2 directory.
-	Kbuild only uses this information to decide that it needs to visit
-	the directory, it is the Makefile in the subdirectory that
-	specifies what is modular and what is built-in.
-
-	It is good practice to use a CONFIG_ variable when assigning directory
-	names. This allows kbuild to totally skip the directory if the
-	corresponding CONFIG_ option is neither 'y' nor 'm'.
-
---- 3.7 Compilation flags
-
-    ccflags-y, asflags-y and ldflags-y
-	These three flags apply only to the kbuild makefile in which they
-	are assigned. They are used for all the normal cc, as and ld
-	invocations happening during a recursive build.
-	Note: Flags with the same behaviour were previously named:
-	EXTRA_CFLAGS, EXTRA_AFLAGS and EXTRA_LDFLAGS.
-	They are still supported but their usage is deprecated.
-
-	ccflags-y specifies options for compiling with $(CC).
-
-	Example:
-		# drivers/acpi/acpica/Makefile
-		ccflags-y			:= -Os -D_LINUX -DBUILDING_ACPICA
-		ccflags-$(CONFIG_ACPI_DEBUG)	+= -DACPI_DEBUG_OUTPUT
-
-	This variable is necessary because the top Makefile owns the
-	variable $(KBUILD_CFLAGS) and uses it for compilation flags for the
-	entire tree.
-
-	asflags-y specifies options for assembling with $(AS).
-
-	Example:
-		#arch/sparc/kernel/Makefile
-		asflags-y := -ansi
-
-	ldflags-y specifies options for linking with $(LD).
-
-	Example:
-		#arch/cris/boot/compressed/Makefile
-		ldflags-y += -T $(srctree)/$(src)/decompress_$(arch-y).lds
-
-    subdir-ccflags-y, subdir-asflags-y
-	The two flags listed above are similar to ccflags-y and asflags-y.
-	The difference is that the subdir- variants have effect for the kbuild
-	file where they are present and all subdirectories.
-	Options specified using subdir-* are added to the commandline before
-	the options specified using the non-subdir variants.
-
-	Example:
-		subdir-ccflags-y := -Werror
-
-    CFLAGS_$@, AFLAGS_$@
-
-	CFLAGS_$@ and AFLAGS_$@ only apply to commands in current
-	kbuild makefile.
-
-	$(CFLAGS_$@) specifies per-file options for $(CC).  The $@
-	part has a literal value which specifies the file that it is for.
-
-	Example:
-		# drivers/scsi/Makefile
-		CFLAGS_aha152x.o =   -DAHA152X_STAT -DAUTOCONF
-		CFLAGS_gdth.o    = # -DDEBUG_GDTH=2 -D__SERIAL__ -D__COM2__ \
-				     -DGDTH_STATISTICS
-
-	These two lines specify compilation flags for aha152x.o and gdth.o.
-
-	$(AFLAGS_$@) is a similar feature for source files in assembly
-	languages.
-
-	Example:
-		# arch/arm/kernel/Makefile
-		AFLAGS_head.o        := -DTEXT_OFFSET=$(TEXT_OFFSET)
-		AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312
-		AFLAGS_iwmmxt.o      := -Wa,-mcpu=iwmmxt
-
-
---- 3.9 Dependency tracking
-
-	Kbuild tracks dependencies on the following:
-	1) All prerequisite files (both *.c and *.h)
-	2) CONFIG_ options used in all prerequisite files
-	3) Command-line used to compile target
-
-	Thus, if you change an option to $(CC) all affected files will
-	be re-compiled.
-
---- 3.10 Special Rules
-
-	Special rules are used when the kbuild infrastructure does
-	not provide the required support. A typical example is
-	header files generated during the build process.
-	Another example are the architecture-specific Makefiles which
-	need special rules to prepare boot images etc.
-
-	Special rules are written as normal Make rules.
-	Kbuild is not executing in the directory where the Makefile is
-	located, so all special rules shall provide a relative
-	path to prerequisite files and target files.
-
-	Two variables are used when defining special rules:
-
-    $(src)
-	$(src) is a relative path which points to the directory
-	where the Makefile is located. Always use $(src) when
-	referring to files located in the src tree.
-
-    $(obj)
-	$(obj) is a relative path which points to the directory
-	where the target is saved. Always use $(obj) when
-	referring to generated files.
-
-	Example:
-		#drivers/scsi/Makefile
-		$(obj)/53c8xx_d.h: $(src)/53c7,8xx.scr $(src)/script_asm.pl
-			$(CPP) -DCHIP=810 - < $< | ... $(src)/script_asm.pl
-
-	This is a special rule, following the normal syntax
-	required by make.
-	The target file depends on two prerequisite files. References
-	to the target file are prefixed with $(obj), references
-	to prerequisites are referenced with $(src) (because they are not
-	generated files).
-
-    $(kecho)
-	echoing information to user in a rule is often a good practice
-	but when execution "make -s" one does not expect to see any output
-	except for warnings/errors.
-	To support this kbuild defines $(kecho) which will echo out the
-	text following $(kecho) to stdout except if "make -s" is used.
-
-	Example:
-		#arch/blackfin/boot/Makefile
-		$(obj)/vmImage: $(obj)/vmlinux.gz
-			$(call if_changed,uimage)
-			@$(kecho) 'Kernel: $@ is ready'
-
-
---- 3.11 $(CC) support functions
-
-	The kernel may be built with several different versions of
-	$(CC), each supporting a unique set of features and options.
-	kbuild provides basic support to check for valid options for $(CC).
-	$(CC) is usually the gcc compiler, but other alternatives are
-	available.
-
-    as-option
-	as-option is used to check if $(CC) -- when used to compile
-	assembler (*.S) files -- supports the given option. An optional
-	second option may be specified if the first option is not supported.
-
-	Example:
-		#arch/sh/Makefile
-		cflags-y += $(call as-option,-Wa$(comma)-isa=$(isa-y),)
-
-	In the above example, cflags-y will be assigned the option
-	-Wa$(comma)-isa=$(isa-y) if it is supported by $(CC).
-	The second argument is optional, and if supplied will be used
-	if first argument is not supported.
-
-    as-instr
-	as-instr checks if the assembler reports a specific instruction
-	and then outputs either option1 or option2
-	C escapes are supported in the test instruction
-	Note: as-instr-option uses KBUILD_AFLAGS for $(AS) options
-
-    cc-option
-	cc-option is used to check if $(CC) supports a given option, and if
-	not supported to use an optional second option.
-
-	Example:
-		#arch/x86/Makefile
-		cflags-y += $(call cc-option,-march=pentium-mmx,-march=i586)
-
-	In the above example, cflags-y will be assigned the option
-	-march=pentium-mmx if supported by $(CC), otherwise -march=i586.
-	The second argument to cc-option is optional, and if omitted,
-	cflags-y will be assigned no value if first option is not supported.
-	Note: cc-option uses KBUILD_CFLAGS for $(CC) options
-
-   cc-option-yn
-	cc-option-yn is used to check if gcc supports a given option
-	and return 'y' if supported, otherwise 'n'.
-
-	Example:
-		#arch/ppc/Makefile
-		biarch := $(call cc-option-yn, -m32)
-		aflags-$(biarch) += -a32
-		cflags-$(biarch) += -m32
-
-	In the above example, $(biarch) is set to y if $(CC) supports the -m32
-	option. When $(biarch) equals 'y', the expanded variables $(aflags-y)
-	and $(cflags-y) will be assigned the values -a32 and -m32,
-	respectively.
-	Note: cc-option-yn uses KBUILD_CFLAGS for $(CC) options
-
-    cc-disable-warning
-	cc-disable-warning checks if gcc supports a given warning and returns
-	the commandline switch to disable it. This special function is needed,
-	because gcc 4.4 and later accept any unknown -Wno-* option and only
-	warn about it if there is another warning in the source file.
-
-	Example:
-		KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-
-	In the above example, -Wno-unused-but-set-variable will be added to
-	KBUILD_CFLAGS only if gcc really accepts it.
-
-    cc-ifversion
-	cc-ifversion tests the version of $(CC) and equals the fourth parameter
-	if version expression is true, or the fifth (if given) if the version
-	expression is false.
-
-	Example:
-		#fs/reiserfs/Makefile
-		ccflags-y := $(call cc-ifversion, -lt, 0402, -O1)
-
-	In this example, ccflags-y will be assigned the value -O1 if the
-	$(CC) version is less than 4.2.
-	cc-ifversion takes all the shell operators:
-	-eq, -ne, -lt, -le, -gt, and -ge
-	The third parameter may be a text as in this example, but it may also
-	be an expanded variable or a macro.
-
-    cc-cross-prefix
-	cc-cross-prefix is used to check if there exists a $(CC) in path with
-	one of the listed prefixes. The first prefix where there exist a
-	prefix$(CC) in the PATH is returned - and if no prefix$(CC) is found
-	then nothing is returned.
-	Additional prefixes are separated by a single space in the
-	call of cc-cross-prefix.
-	This functionality is useful for architecture Makefiles that try
-	to set CROSS_COMPILE to well-known values but may have several
-	values to select between.
-	It is recommended only to try to set CROSS_COMPILE if it is a cross
-	build (host arch is different from target arch). And if CROSS_COMPILE
-	is already set then leave it with the old value.
-
-	Example:
-		#arch/m68k/Makefile
-		ifneq ($(SUBARCH),$(ARCH))
-		        ifeq ($(CROSS_COMPILE),)
-		               CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu-)
-			endif
-		endif
-
---- 3.12 $(LD) support functions
-
-    ld-option
-	ld-option is used to check if $(LD) supports the supplied option.
-	ld-option takes two options as arguments.
-	The second argument is an optional option that can be used if the
-	first option is not supported by $(LD).
-
-	Example:
-		#Makefile
-		LDFLAGS_vmlinux += $(call ld-option, -X)
-
-
-=== 4 Host Program support
-
-Kbuild supports building executables on the host for use during the
-compilation stage.
-Two steps are required in order to use a host executable.
-
-The first step is to tell kbuild that a host program exists. This is
-done utilising the variable hostprogs-y.
-
-The second step is to add an explicit dependency to the executable.
-This can be done in two ways. Either add the dependency in a rule,
-or utilise the variable $(always).
-Both possibilities are described in the following.
-
---- 4.1 Simple Host Program
-
-	In some cases there is a need to compile and run a program on the
-	computer where the build is running.
-	The following line tells kbuild that the program bin2hex shall be
-	built on the build host.
-
-	Example:
-		hostprogs-y := bin2hex
-
-	Kbuild assumes in the above example that bin2hex is made from a single
-	c-source file named bin2hex.c located in the same directory as
-	the Makefile.
-
---- 4.2 Composite Host Programs
-
-	Host programs can be made up based on composite objects.
-	The syntax used to define composite objects for host programs is
-	similar to the syntax used for kernel objects.
-	$(<executable>-objs) lists all objects used to link the final
-	executable.
-
-	Example:
-		#scripts/lxdialog/Makefile
-		hostprogs-y   := lxdialog
-		lxdialog-objs := checklist.o lxdialog.o
-
-	Objects with extension .o are compiled from the corresponding .c
-	files. In the above example, checklist.c is compiled to checklist.o
-	and lxdialog.c is compiled to lxdialog.o.
-	Finally, the two .o files are linked to the executable, lxdialog.
-	Note: The syntax <executable>-y is not permitted for host-programs.
-
---- 4.3 Using C++ for host programs
-
-	kbuild offers support for host programs written in C++. This was
-	introduced solely to support kconfig, and is not recommended
-	for general use.
-
-	Example:
-		#scripts/kconfig/Makefile
-		hostprogs-y   := qconf
-		qconf-cxxobjs := qconf.o
-
-	In the example above the executable is composed of the C++ file
-	qconf.cc - identified by $(qconf-cxxobjs).
-
-	If qconf is composed of a mixture of .c and .cc files, then an
-	additional line can be used to identify this.
-
-	Example:
-		#scripts/kconfig/Makefile
-		hostprogs-y   := qconf
-		qconf-cxxobjs := qconf.o
-		qconf-objs    := check.o
-
---- 4.4 Controlling compiler options for host programs
-
-	When compiling host programs, it is possible to set specific flags.
-	The programs will always be compiled utilising $(HOSTCC) passed
-	the options specified in $(KBUILD_HOSTCFLAGS).
-	To set flags that will take effect for all host programs created
-	in that Makefile, use the variable HOST_EXTRACFLAGS.
-
-	Example:
-		#scripts/lxdialog/Makefile
-		HOST_EXTRACFLAGS += -I/usr/include/ncurses
-
-	To set specific flags for a single file the following construction
-	is used:
-
-	Example:
-		#arch/ppc64/boot/Makefile
-		HOSTCFLAGS_piggyback.o := -DKERNELBASE=$(KERNELBASE)
-
-	It is also possible to specify additional options to the linker.
-
-	Example:
-		#scripts/kconfig/Makefile
-		HOSTLDLIBS_qconf := -L$(QTDIR)/lib
-
-	When linking qconf, it will be passed the extra option
-	"-L$(QTDIR)/lib".
-
---- 4.5 When host programs are actually built
-
-	Kbuild will only build host-programs when they are referenced
-	as a prerequisite.
-	This is possible in two ways:
-
-	(1) List the prerequisite explicitly in a special rule.
-
-	Example:
-		#drivers/pci/Makefile
-		hostprogs-y := gen-devlist
-		$(obj)/devlist.h: $(src)/pci.ids $(obj)/gen-devlist
-			( cd $(obj); ./gen-devlist ) < $<
-
-	The target $(obj)/devlist.h will not be built before
-	$(obj)/gen-devlist is updated. Note that references to
-	the host programs in special rules must be prefixed with $(obj).
-
-	(2) Use $(always)
-	When there is no suitable special rule, and the host program
-	shall be built when a makefile is entered, the $(always)
-	variable shall be used.
-
-	Example:
-		#scripts/lxdialog/Makefile
-		hostprogs-y   := lxdialog
-		always        := $(hostprogs-y)
-
-	This will tell kbuild to build lxdialog even if not referenced in
-	any rule.
-
---- 4.6 Using hostprogs-$(CONFIG_FOO)
-
-	A typical pattern in a Kbuild file looks like this:
-
-	Example:
-		#scripts/Makefile
-		hostprogs-$(CONFIG_KALLSYMS) += kallsyms
-
-	Kbuild knows about both 'y' for built-in and 'm' for module.
-	So if a config symbol evaluates to 'm', kbuild will still build
-	the binary. In other words, Kbuild handles hostprogs-m exactly
-	like hostprogs-y. But only hostprogs-y is recommended to be used
-	when no CONFIG symbols are involved.
-
-=== 5 Kbuild clean infrastructure
-
-"make clean" deletes most generated files in the obj tree where the kernel
-is compiled. This includes generated files such as host programs.
-Kbuild knows targets listed in $(hostprogs-y), $(hostprogs-m), $(always),
-$(extra-y) and $(targets). They are all deleted during "make clean".
-Files matching the patterns "*.[oas]", "*.ko", plus some additional files
-generated by kbuild are deleted all over the kernel src tree when
-"make clean" is executed.
-
-Additional files can be specified in kbuild makefiles by use of $(clean-files).
-
-	Example:
-		#lib/Makefile
-		clean-files := crc32table.h
-
-When executing "make clean", the file "crc32table.h" will be deleted.
-Kbuild will assume files to be in the same relative directory as the
-Makefile, except if prefixed with $(objtree).
-
-To delete a directory hierarchy use:
-
-	Example:
-		#scripts/package/Makefile
-		clean-dirs := $(objtree)/debian/
-
-This will delete the directory debian in the toplevel directory, including all
-subdirectories.
-
-To exclude certain files from make clean, use the $(no-clean-files) variable.
-This is only a special case used in the top level Kbuild file:
-
-	Example:
-		#Kbuild
-		no-clean-files := $(bounds-file) $(offsets-file)
-
-Usually kbuild descends down in subdirectories due to "obj-* := dir/",
-but in the architecture makefiles where the kbuild infrastructure
-is not sufficient this sometimes needs to be explicit.
-
-	Example:
-		#arch/x86/boot/Makefile
-		subdir- := compressed/
-
-The above assignment instructs kbuild to descend down in the
-directory compressed/ when "make clean" is executed.
-
-To support the clean infrastructure in the Makefiles that build the
-final bootimage there is an optional target named archclean:
-
-	Example:
-		#arch/x86/Makefile
-		archclean:
-			$(Q)$(MAKE) $(clean)=arch/x86/boot
-
-When "make clean" is executed, make will descend down in arch/x86/boot,
-and clean as usual. The Makefile located in arch/x86/boot/ may use
-the subdir- trick to descend further down.
-
-Note 1: arch/$(ARCH)/Makefile cannot use "subdir-", because that file is
-included in the top level makefile, and the kbuild infrastructure
-is not operational at that point.
-
-Note 2: All directories listed in core-y, libs-y, drivers-y and net-y will
-be visited during "make clean".
-
-=== 6 Architecture Makefiles
-
-The top level Makefile sets up the environment and does the preparation,
-before starting to descend down in the individual directories.
-The top level makefile contains the generic part, whereas
-arch/$(ARCH)/Makefile contains what is required to set up kbuild
-for said architecture.
-To do so, arch/$(ARCH)/Makefile sets up a number of variables and defines
-a few targets.
-
-When kbuild executes, the following steps are followed (roughly):
-1) Configuration of the kernel => produce .config
-2) Store kernel version in include/linux/version.h
-3) Updating all other prerequisites to the target prepare:
-   - Additional prerequisites are specified in arch/$(ARCH)/Makefile
-4) Recursively descend down in all directories listed in
-   init-* core* drivers-* net-* libs-* and build all targets.
-   - The values of the above variables are expanded in arch/$(ARCH)/Makefile.
-5) All object files are then linked and the resulting file vmlinux is
-   located at the root of the obj tree.
-   The very first objects linked are listed in head-y, assigned by
-   arch/$(ARCH)/Makefile.
-6) Finally, the architecture-specific part does any required post processing
-   and builds the final bootimage.
-   - This includes building boot records
-   - Preparing initrd images and the like
-
-
---- 6.1 Set variables to tweak the build to the architecture
-
-    LDFLAGS		Generic $(LD) options
-
-	Flags used for all invocations of the linker.
-	Often specifying the emulation is sufficient.
-
-	Example:
-		#arch/s390/Makefile
-		LDFLAGS         := -m elf_s390
-	Note: ldflags-y can be used to further customise
-	the flags used. See chapter 3.7.
-
-    LDFLAGS_vmlinux	Options for $(LD) when linking vmlinux
-
-	LDFLAGS_vmlinux is used to specify additional flags to pass to
-	the linker when linking the final vmlinux image.
-	LDFLAGS_vmlinux uses the LDFLAGS_$@ support.
-
-	Example:
-		#arch/x86/Makefile
-		LDFLAGS_vmlinux := -e stext
-
-    OBJCOPYFLAGS	objcopy flags
-
-	When $(call if_changed,objcopy) is used to translate a .o file,
-	the flags specified in OBJCOPYFLAGS will be used.
-	$(call if_changed,objcopy) is often used to generate raw binaries on
-	vmlinux.
-
-	Example:
-		#arch/s390/Makefile
-		OBJCOPYFLAGS := -O binary
-
-		#arch/s390/boot/Makefile
-		$(obj)/image: vmlinux FORCE
-			$(call if_changed,objcopy)
-
-	In this example, the binary $(obj)/image is a binary version of
-	vmlinux. The usage of $(call if_changed,xxx) will be described later.
-
-    KBUILD_AFLAGS		$(AS) assembler flags
-
-	Default value - see top level Makefile
-	Append or modify as required per architecture.
-
-	Example:
-		#arch/sparc64/Makefile
-		KBUILD_AFLAGS += -m64 -mcpu=ultrasparc
-
-    KBUILD_CFLAGS		$(CC) compiler flags
-
-	Default value - see top level Makefile
-	Append or modify as required per architecture.
-
-	Often, the KBUILD_CFLAGS variable depends on the configuration.
-
-	Example:
-		#arch/x86/boot/compressed/Makefile
-		cflags-$(CONFIG_X86_32) := -march=i386
-		cflags-$(CONFIG_X86_64) := -mcmodel=small
-		KBUILD_CFLAGS += $(cflags-y)
-
-	Many arch Makefiles dynamically run the target C compiler to
-	probe supported options:
-
-		#arch/x86/Makefile
-
-		...
-		cflags-$(CONFIG_MPENTIUMII)     += $(call cc-option,\
-						-march=pentium2,-march=i686)
-		...
-		# Disable unit-at-a-time mode ...
-		KBUILD_CFLAGS += $(call cc-option,-fno-unit-at-a-time)
-		...
-
-
-	The first example utilises the trick that a config option expands
-	to 'y' when selected.
-
-    KBUILD_AFLAGS_KERNEL	$(AS) options specific for built-in
-
-	$(KBUILD_AFLAGS_KERNEL) contains extra C compiler flags used to compile
-	resident kernel code.
-
-    KBUILD_AFLAGS_MODULE   Options for $(AS) when building modules
-
-	$(KBUILD_AFLAGS_MODULE) is used to add arch-specific options that
-	are used for $(AS).
-	From commandline AFLAGS_MODULE shall be used (see kbuild.txt).
-
-    KBUILD_CFLAGS_KERNEL	$(CC) options specific for built-in
-
-	$(KBUILD_CFLAGS_KERNEL) contains extra C compiler flags used to compile
-	resident kernel code.
-
-    KBUILD_CFLAGS_MODULE   Options for $(CC) when building modules
-
-	$(KBUILD_CFLAGS_MODULE) is used to add arch-specific options that
-	are used for $(CC).
-	From commandline CFLAGS_MODULE shall be used (see kbuild.txt).
-
-    KBUILD_LDFLAGS_MODULE   Options for $(LD) when linking modules
-
-	$(KBUILD_LDFLAGS_MODULE) is used to add arch-specific options
-	used when linking modules. This is often a linker script.
-	From commandline LDFLAGS_MODULE shall be used (see kbuild.txt).
-
-    KBUILD_ARFLAGS   Options for $(AR) when creating archives
-
-	$(KBUILD_ARFLAGS) set by the top level Makefile to "D" (deterministic
-	mode) if this option is supported by $(AR).
-
-    ARCH_CPPFLAGS, ARCH_AFLAGS, ARCH_CFLAGS   Overrides the kbuild defaults
-
-	These variables are appended to the KBUILD_CPPFLAGS,
-	KBUILD_AFLAGS, and KBUILD_CFLAGS, respectively, after the
-	top-level Makefile has set any other flags. This provides a
-	means for an architecture to override the defaults.
-
-
---- 6.2 Add prerequisites to archheaders:
-
-	The archheaders: rule is used to generate header files that
-	may be installed into user space by "make header_install" or
-	"make headers_install_all".  In order to support
-	"make headers_install_all", this target has to be able to run
-	on an unconfigured tree, or a tree configured for another
-	architecture.
-
-	It is run before "make archprepare" when run on the
-	architecture itself.
-
-
---- 6.3 Add prerequisites to archprepare:
-
-	The archprepare: rule is used to list prerequisites that need to be
-	built before starting to descend down in the subdirectories.
-	This is usually used for header files containing assembler constants.
-
-		Example:
-		#arch/arm/Makefile
-		archprepare: maketools
-
-	In this example, the file target maketools will be processed
-	before descending down in the subdirectories.
-	See also chapter XXX-TODO that describe how kbuild supports
-	generating offset header files.
-
-
---- 6.4 List directories to visit when descending
-
-	An arch Makefile cooperates with the top Makefile to define variables
-	which specify how to build the vmlinux file.  Note that there is no
-	corresponding arch-specific section for modules; the module-building
-	machinery is all architecture-independent.
-
-
-    head-y, init-y, core-y, libs-y, drivers-y, net-y
-
-	$(head-y) lists objects to be linked first in vmlinux.
-	$(libs-y) lists directories where a lib.a archive can be located.
-	The rest list directories where a built-in.a object file can be
-	located.
-
-	$(init-y) objects will be located after $(head-y).
-	Then the rest follows in this order:
-	$(core-y), $(libs-y), $(drivers-y) and $(net-y).
-
-	The top level Makefile defines values for all generic directories,
-	and arch/$(ARCH)/Makefile only adds architecture-specific directories.
-
-	Example:
-		#arch/sparc64/Makefile
-		core-y += arch/sparc64/kernel/
-		libs-y += arch/sparc64/prom/ arch/sparc64/lib/
-		drivers-$(CONFIG_OPROFILE)  += arch/sparc64/oprofile/
-
-
---- 6.5 Architecture-specific boot images
-
-	An arch Makefile specifies goals that take the vmlinux file, compress
-	it, wrap it in bootstrapping code, and copy the resulting files
-	somewhere. This includes various kinds of installation commands.
-	The actual goals are not standardized across architectures.
-
-	It is common to locate any additional processing in a boot/
-	directory below arch/$(ARCH)/.
-
-	Kbuild does not provide any smart way to support building a
-	target specified in boot/. Therefore arch/$(ARCH)/Makefile shall
-	call make manually to build a target in boot/.
-
-	The recommended approach is to include shortcuts in
-	arch/$(ARCH)/Makefile, and use the full path when calling down
-	into the arch/$(ARCH)/boot/Makefile.
-
-	Example:
-		#arch/x86/Makefile
-		boot := arch/x86/boot
-		bzImage: vmlinux
-			$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-
-	"$(Q)$(MAKE) $(build)=<dir>" is the recommended way to invoke
-	make in a subdirectory.
-
-	There are no rules for naming architecture-specific targets,
-	but executing "make help" will list all relevant targets.
-	To support this, $(archhelp) must be defined.
-
-	Example:
-		#arch/x86/Makefile
-		define archhelp
-		  echo  '* bzImage      - Image (arch/$(ARCH)/boot/bzImage)'
-		endif
-
-	When make is executed without arguments, the first goal encountered
-	will be built. In the top level Makefile the first goal present
-	is all:.
-	An architecture shall always, per default, build a bootable image.
-	In "make help", the default goal is highlighted with a '*'.
-	Add a new prerequisite to all: to select a default goal different
-	from vmlinux.
-
-	Example:
-		#arch/x86/Makefile
-		all: bzImage
-
-	When "make" is executed without arguments, bzImage will be built.
-
---- 6.6 Building non-kbuild targets
-
-    extra-y
-
-	extra-y specifies additional targets created in the current
-	directory, in addition to any targets specified by obj-*.
-
-	Listing all targets in extra-y is required for two purposes:
-	1) Enable kbuild to check changes in command lines
-	   - When $(call if_changed,xxx) is used
-	2) kbuild knows what files to delete during "make clean"
-
-	Example:
-		#arch/x86/kernel/Makefile
-		extra-y := head.o init_task.o
-
-	In this example, extra-y is used to list object files that
-	shall be built, but shall not be linked as part of built-in.a.
-
-
---- 6.7 Commands useful for building a boot image
-
-	Kbuild provides a few macros that are useful when building a
-	boot image.
-
-    if_changed
-
-	if_changed is the infrastructure used for the following commands.
-
-	Usage:
-		target: source(s) FORCE
-			$(call if_changed,ld/objcopy/gzip/...)
-
-	When the rule is evaluated, it is checked to see if any files
-	need an update, or the command line has changed since the last
-	invocation. The latter will force a rebuild if any options
-	to the executable have changed.
-	Any target that utilises if_changed must be listed in $(targets),
-	otherwise the command line check will fail, and the target will
-	always be built.
-	Assignments to $(targets) are without $(obj)/ prefix.
-	if_changed may be used in conjunction with custom commands as
-	defined in 6.8 "Custom kbuild commands".
-
-	Note: It is a typical mistake to forget the FORCE prerequisite.
-	Another common pitfall is that whitespace is sometimes
-	significant; for instance, the below will fail (note the extra space
-	after the comma):
-		target: source(s) FORCE
-	#WRONG!#	$(call if_changed, ld/objcopy/gzip/...)
-
-        Note: if_changed should not be used more than once per target.
-              It stores the executed command in a corresponding .cmd
-        file and multiple calls would result in overwrites and
-        unwanted results when the target is up to date and only the
-        tests on changed commands trigger execution of commands.
-
-    ld
-	Link target. Often, LDFLAGS_$@ is used to set specific options to ld.
-
-	Example:
-		#arch/x86/boot/Makefile
-		LDFLAGS_bootsect := -Ttext 0x0 -s --oformat binary
-		LDFLAGS_setup    := -Ttext 0x0 -s --oformat binary -e begtext
-
-		targets += setup setup.o bootsect bootsect.o
-		$(obj)/setup $(obj)/bootsect: %: %.o FORCE
-			$(call if_changed,ld)
-
-	In this example, there are two possible targets, requiring different
-	options to the linker. The linker options are specified using the
-	LDFLAGS_$@ syntax - one for each potential target.
-	$(targets) are assigned all potential targets, by which kbuild knows
-	the targets and will:
-		1) check for commandline changes
-		2) delete target during make clean
-
-	The ": %: %.o" part of the prerequisite is a shorthand that
-	frees us from listing the setup.o and bootsect.o files.
-	Note: It is a common mistake to forget the "targets :=" assignment,
-	      resulting in the target file being recompiled for no
-	      obvious reason.
-
-    objcopy
-	Copy binary. Uses OBJCOPYFLAGS usually specified in
-	arch/$(ARCH)/Makefile.
-	OBJCOPYFLAGS_$@ may be used to set additional options.
-
-    gzip
-	Compress target. Use maximum compression to compress target.
-
-	Example:
-		#arch/x86/boot/compressed/Makefile
-		$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
-			$(call if_changed,gzip)
-
-    dtc
-	Create flattened device tree blob object suitable for linking
-	into vmlinux. Device tree blobs linked into vmlinux are placed
-	in an init section in the image. Platform code *must* copy the
-	blob to non-init memory prior to calling unflatten_device_tree().
-
-	To use this command, simply add *.dtb into obj-y or targets, or make
-	some other target depend on %.dtb
-
-	A central rule exists to create $(obj)/%.dtb from $(src)/%.dts;
-	architecture Makefiles do no need to explicitly write out that rule.
-
-	Example:
-		targets += $(dtb-y)
-		DTC_FLAGS ?= -p 1024
-
---- 6.8 Custom kbuild commands
-
-	When kbuild is executing with KBUILD_VERBOSE=0, then only a shorthand
-	of a command is normally displayed.
-	To enable this behaviour for custom commands kbuild requires
-	two variables to be set:
-	quiet_cmd_<command>	- what shall be echoed
-	      cmd_<command>	- the command to execute
-
-	Example:
-		#
-		quiet_cmd_image = BUILD   $@
-		      cmd_image = $(obj)/tools/build $(BUILDFLAGS) \
-		                                     $(obj)/vmlinux.bin > $@
-
-		targets += bzImage
-		$(obj)/bzImage: $(obj)/vmlinux.bin $(obj)/tools/build FORCE
-			$(call if_changed,image)
-			@echo 'Kernel: $@ is ready'
-
-	When updating the $(obj)/bzImage target, the line
-
-	BUILD    arch/x86/boot/bzImage
-
-	will be displayed with "make KBUILD_VERBOSE=0".
-
-
---- 6.9 Preprocessing linker scripts
-
-	When the vmlinux image is built, the linker script
-	arch/$(ARCH)/kernel/vmlinux.lds is used.
-	The script is a preprocessed variant of the file vmlinux.lds.S
-	located in the same directory.
-	kbuild knows .lds files and includes a rule *lds.S -> *lds.
-
-	Example:
-		#arch/x86/kernel/Makefile
-		always := vmlinux.lds
-
-		#Makefile
-		export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH)
-
-	The assignment to $(always) is used to tell kbuild to build the
-	target vmlinux.lds.
-	The assignment to $(CPPFLAGS_vmlinux.lds) tells kbuild to use the
-	specified options when building the target vmlinux.lds.
-
-	When building the *.lds target, kbuild uses the variables:
-	KBUILD_CPPFLAGS	: Set in top-level Makefile
-	cppflags-y	: May be set in the kbuild makefile
-	CPPFLAGS_$(@F)  : Target-specific flags.
-	                  Note that the full filename is used in this
-	                  assignment.
-
-	The kbuild infrastructure for *lds files is used in several
-	architecture-specific files.
-
---- 6.10 Generic header files
-
-	The directory include/asm-generic contains the header files
-	that may be shared between individual architectures.
-	The recommended approach how to use a generic header file is
-	to list the file in the Kbuild file.
-	See "7.2 generic-y" for further info on syntax etc.
-
---- 6.11 Post-link pass
-
-	If the file arch/xxx/Makefile.postlink exists, this makefile
-	will be invoked for post-link objects (vmlinux and modules.ko)
-	for architectures to run post-link passes on. Must also handle
-	the clean target.
-
-	This pass runs after kallsyms generation. If the architecture
-	needs to modify symbol locations, rather than manipulate the
-	kallsyms, it may be easier to add another postlink target for
-	.tmp_vmlinux? targets to be called from link-vmlinux.sh.
-
-	For example, powerpc uses this to check relocation sanity of
-	the linked vmlinux file.
-
-=== 7 Kbuild syntax for exported headers
-
-The kernel includes a set of headers that is exported to userspace.
-Many headers can be exported as-is but other headers require a
-minimal pre-processing before they are ready for user-space.
-The pre-processing does:
-- drop kernel-specific annotations
-- drop include of compiler.h
-- drop all sections that are kernel internal (guarded by ifdef __KERNEL__)
-
-All headers under include/uapi/, include/generated/uapi/,
-arch/<arch>/include/uapi/ and arch/<arch>/include/generated/uapi/
-are exported.
-
-A Kbuild file may be defined under arch/<arch>/include/uapi/asm/ and
-arch/<arch>/include/asm/ to list asm files coming from asm-generic.
-See subsequent chapter for the syntax of the Kbuild file.
-
---- 7.1 no-export-headers
-
-	no-export-headers is essentially used by include/uapi/linux/Kbuild to
-	avoid exporting specific headers (e.g. kvm.h) on architectures that do
-	not support it. It should be avoided as much as possible.
-
---- 7.2 generic-y
-
-	If an architecture uses a verbatim copy of a header from
-	include/asm-generic then this is listed in the file
-	arch/$(ARCH)/include/asm/Kbuild like this:
-
-		Example:
-			#arch/x86/include/asm/Kbuild
-			generic-y += termios.h
-			generic-y += rtc.h
-
-	During the prepare phase of the build a wrapper include
-	file is generated in the directory:
-
-		arch/$(ARCH)/include/generated/asm
-
-	When a header is exported where the architecture uses
-	the generic header a similar wrapper is generated as part
-	of the set of exported headers in the directory:
-
-		usr/include/asm
-
-	The generated wrapper will in both cases look like the following:
-
-		Example: termios.h
-			#include <asm-generic/termios.h>
-
---- 7.3 generated-y
-
-	If an architecture generates other header files alongside generic-y
-	wrappers, generated-y specifies them.
-
-	This prevents them being treated as stale asm-generic wrappers and
-	removed.
-
-		Example:
-			#arch/x86/include/asm/Kbuild
-			generated-y += syscalls_32.h
-
---- 7.4 mandatory-y
-
-	mandatory-y is essentially used by include/(uapi/)asm-generic/Kbuild
-	to define the minimum set of ASM headers that all architectures must have.
-
-	This works like optional generic-y. If a mandatory header is missing
-	in arch/$(ARCH)/include/(uapi/)/asm, Kbuild will automatically generate
-	a wrapper of the asm-generic one.
-
-	The convention is to list one subdir per line and
-	preferably in alphabetic order.
-
-=== 8 Kbuild Variables
-
-The top Makefile exports the following variables:
-
-    VERSION, PATCHLEVEL, SUBLEVEL, EXTRAVERSION
-
-	These variables define the current kernel version.  A few arch
-	Makefiles actually use these values directly; they should use
-	$(KERNELRELEASE) instead.
-
-	$(VERSION), $(PATCHLEVEL), and $(SUBLEVEL) define the basic
-	three-part version number, such as "2", "4", and "0".  These three
-	values are always numeric.
-
-	$(EXTRAVERSION) defines an even tinier sublevel for pre-patches
-	or additional patches.	It is usually some non-numeric string
-	such as "-pre4", and is often blank.
-
-    KERNELRELEASE
-
-	$(KERNELRELEASE) is a single string such as "2.4.0-pre4", suitable
-	for constructing installation directory names or showing in
-	version strings.  Some arch Makefiles use it for this purpose.
-
-    ARCH
-
-	This variable defines the target architecture, such as "i386",
-	"arm", or "sparc". Some kbuild Makefiles test $(ARCH) to
-	determine which files to compile.
-
-	By default, the top Makefile sets $(ARCH) to be the same as the
-	host system architecture.  For a cross build, a user may
-	override the value of $(ARCH) on the command line:
-
-	    make ARCH=m68k ...
-
-
-    INSTALL_PATH
-
-	This variable defines a place for the arch Makefiles to install
-	the resident kernel image and System.map file.
-	Use this for architecture-specific install targets.
-
-    INSTALL_MOD_PATH, MODLIB
-
-	$(INSTALL_MOD_PATH) specifies a prefix to $(MODLIB) for module
-	installation.  This variable is not defined in the Makefile but
-	may be passed in by the user if desired.
-
-	$(MODLIB) specifies the directory for module installation.
-	The top Makefile defines $(MODLIB) to
-	$(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE).  The user may
-	override this value on the command line if desired.
-
-    INSTALL_MOD_STRIP
-
-	If this variable is specified, it will cause modules to be stripped
-	after they are installed.  If INSTALL_MOD_STRIP is '1', then the
-	default option --strip-debug will be used.  Otherwise, the
-	INSTALL_MOD_STRIP value will be used as the option(s) to the strip
-	command.
-
-
-=== 9 Makefile language
-
-The kernel Makefiles are designed to be run with GNU Make.  The Makefiles
-use only the documented features of GNU Make, but they do use many
-GNU extensions.
-
-GNU Make supports elementary list-processing functions.  The kernel
-Makefiles use a novel style of list building and manipulation with few
-"if" statements.
-
-GNU Make has two assignment operators, ":=" and "=".  ":=" performs
-immediate evaluation of the right-hand side and stores an actual string
-into the left-hand side.  "=" is like a formula definition; it stores the
-right-hand side in an unevaluated form and then evaluates this form each
-time the left-hand side is used.
-
-There are some cases where "=" is appropriate.  Usually, though, ":="
-is the right choice.
-
-=== 10 Credits
-
-Original version made by Michael Elizabeth Chastain, <mailto:mec@shout.net>
-Updates by Kai Germaschewski <kai@tp1.ruhr-uni-bochum.de>
-Updates by Sam Ravnborg <sam@ravnborg.org>
-Language QA by Jan Engelhardt <jengelh@gmx.de>
-
-=== 11 TODO
-
-- Describe how kbuild supports shipped files with _shipped.
-- Generating offset header files.
-- Add more variables to section 7?
-
-
-
diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst
new file mode 100644
index 000000000000..24e763482650
--- /dev/null
+++ b/Documentation/kbuild/modules.rst
@@ -0,0 +1,571 @@
+=========================
+Building External Modules
+=========================
+
+This document describes how to build an out-of-tree kernel module.
+
+.. Table of Contents
+
+	=== 1 Introduction
+	=== 2 How to Build External Modules
+	   --- 2.1 Command Syntax
+	   --- 2.2 Options
+	   --- 2.3 Targets
+	   --- 2.4 Building Separate Files
+	=== 3. Creating a Kbuild File for an External Module
+	   --- 3.1 Shared Makefile
+	   --- 3.2 Separate Kbuild file and Makefile
+	   --- 3.3 Binary Blobs
+	   --- 3.4 Building Multiple Modules
+	=== 4. Include Files
+	   --- 4.1 Kernel Includes
+	   --- 4.2 Single Subdirectory
+	   --- 4.3 Several Subdirectories
+	=== 5. Module Installation
+	   --- 5.1 INSTALL_MOD_PATH
+	   --- 5.2 INSTALL_MOD_DIR
+	=== 6. Module Versioning
+	   --- 6.1 Symbols From the Kernel (vmlinux + modules)
+	   --- 6.2 Symbols and External Modules
+	   --- 6.3 Symbols From Another External Module
+	=== 7. Tips & Tricks
+	   --- 7.1 Testing for CONFIG_FOO_BAR
+
+
+
+1. Introduction
+===============
+
+"kbuild" is the build system used by the Linux kernel. Modules must use
+kbuild to stay compatible with changes in the build infrastructure and
+to pick up the right flags to "gcc." Functionality for building modules
+both in-tree and out-of-tree is provided. The method for building
+either is similar, and all modules are initially developed and built
+out-of-tree.
+
+Covered in this document is information aimed at developers interested
+in building out-of-tree (or "external") modules. The author of an
+external module should supply a makefile that hides most of the
+complexity, so one only has to type "make" to build the module. This is
+easily accomplished, and a complete example will be presented in
+section 3.
+
+
+2. How to Build External Modules
+================================
+
+To build external modules, you must have a prebuilt kernel available
+that contains the configuration and header files used in the build.
+Also, the kernel must have been built with modules enabled. If you are
+using a distribution kernel, there will be a package for the kernel you
+are running provided by your distribution.
+
+An alternative is to use the "make" target "modules_prepare." This will
+make sure the kernel contains the information required. The target
+exists solely as a simple way to prepare a kernel source tree for
+building external modules.
+
+NOTE: "modules_prepare" will not build Module.symvers even if
+CONFIG_MODVERSIONS is set; therefore, a full kernel build needs to be
+executed to make module versioning work.
+
+2.1 Command Syntax
+==================
+
+	The command to build an external module is::
+
+		$ make -C <path_to_kernel_src> M=$PWD
+
+	The kbuild system knows that an external module is being built
+	due to the "M=<dir>" option given in the command.
+
+	To build against the running kernel use::
+
+		$ make -C /lib/modules/`uname -r`/build M=$PWD
+
+	Then to install the module(s) just built, add the target
+	"modules_install" to the command::
+
+		$ make -C /lib/modules/`uname -r`/build M=$PWD modules_install
+
+2.2 Options
+===========
+
+	($KDIR refers to the path of the kernel source directory.)
+
+	make -C $KDIR M=$PWD
+
+	-C $KDIR
+		The directory where the kernel source is located.
+		"make" will actually change to the specified directory
+		when executing and will change back when finished.
+
+	M=$PWD
+		Informs kbuild that an external module is being built.
+		The value given to "M" is the absolute path of the
+		directory where the external module (kbuild file) is
+		located.
+
+2.3 Targets
+===========
+
+	When building an external module, only a subset of the "make"
+	targets are available.
+
+	make -C $KDIR M=$PWD [target]
+
+	The default will build the module(s) located in the current
+	directory, so a target does not need to be specified. All
+	output files will also be generated in this directory. No
+	attempts are made to update the kernel source, and it is a
+	precondition that a successful "make" has been executed for the
+	kernel.
+
+	modules
+		The default target for external modules. It has the
+		same functionality as if no target was specified. See
+		description above.
+
+	modules_install
+		Install the external module(s). The default location is
+		/lib/modules/<kernel_release>/extra/, but a prefix may
+		be added with INSTALL_MOD_PATH (discussed in section 5).
+
+	clean
+		Remove all generated files in the module directory only.
+
+	help
+		List the available targets for external modules.
+
+2.4 Building Separate Files
+===========================
+
+	It is possible to build single files that are part of a module.
+	This works equally well for the kernel, a module, and even for
+	external modules.
+
+	Example (The module foo.ko, consist of bar.o and baz.o)::
+
+		make -C $KDIR M=$PWD bar.lst
+		make -C $KDIR M=$PWD baz.o
+		make -C $KDIR M=$PWD foo.ko
+		make -C $KDIR M=$PWD ./
+
+
+3. Creating a Kbuild File for an External Module
+================================================
+
+In the last section we saw the command to build a module for the
+running kernel. The module is not actually built, however, because a
+build file is required. Contained in this file will be the name of
+the module(s) being built, along with the list of requisite source
+files. The file may be as simple as a single line::
+
+	obj-m := <module_name>.o
+
+The kbuild system will build <module_name>.o from <module_name>.c,
+and, after linking, will result in the kernel module <module_name>.ko.
+The above line can be put in either a "Kbuild" file or a "Makefile."
+When the module is built from multiple sources, an additional line is
+needed listing the files::
+
+	<module_name>-y := <src1>.o <src2>.o ...
+
+NOTE: Further documentation describing the syntax used by kbuild is
+located in Documentation/kbuild/makefiles.rst.
+
+The examples below demonstrate how to create a build file for the
+module 8123.ko, which is built from the following files::
+
+	8123_if.c
+	8123_if.h
+	8123_pci.c
+	8123_bin.o_shipped	<= Binary blob
+
+--- 3.1 Shared Makefile
+
+	An external module always includes a wrapper makefile that
+	supports building the module using "make" with no arguments.
+	This target is not used by kbuild; it is only for convenience.
+	Additional functionality, such as test targets, can be included
+	but should be filtered out from kbuild due to possible name
+	clashes.
+
+	Example 1::
+
+		--> filename: Makefile
+		ifneq ($(KERNELRELEASE),)
+		# kbuild part of makefile
+		obj-m  := 8123.o
+		8123-y := 8123_if.o 8123_pci.o 8123_bin.o
+
+		else
+		# normal makefile
+		KDIR ?= /lib/modules/`uname -r`/build
+
+		default:
+			$(MAKE) -C $(KDIR) M=$$PWD
+
+		# Module specific targets
+		genbin:
+			echo "X" > 8123_bin.o_shipped
+
+		endif
+
+	The check for KERNELRELEASE is used to separate the two parts
+	of the makefile. In the example, kbuild will only see the two
+	assignments, whereas "make" will see everything except these
+	two assignments. This is due to two passes made on the file:
+	the first pass is by the "make" instance run on the command
+	line; the second pass is by the kbuild system, which is
+	initiated by the parameterized "make" in the default target.
+
+3.2 Separate Kbuild File and Makefile
+-------------------------------------
+
+	In newer versions of the kernel, kbuild will first look for a
+	file named "Kbuild," and only if that is not found, will it
+	then look for a makefile. Utilizing a "Kbuild" file allows us
+	to split up the makefile from example 1 into two files:
+
+	Example 2::
+
+		--> filename: Kbuild
+		obj-m  := 8123.o
+		8123-y := 8123_if.o 8123_pci.o 8123_bin.o
+
+		--> filename: Makefile
+		KDIR ?= /lib/modules/`uname -r`/build
+
+		default:
+			$(MAKE) -C $(KDIR) M=$$PWD
+
+		# Module specific targets
+		genbin:
+			echo "X" > 8123_bin.o_shipped
+
+	The split in example 2 is questionable due to the simplicity of
+	each file; however, some external modules use makefiles
+	consisting of several hundred lines, and here it really pays
+	off to separate the kbuild part from the rest.
+
+	The next example shows a backward compatible version.
+
+	Example 3::
+
+		--> filename: Kbuild
+		obj-m  := 8123.o
+		8123-y := 8123_if.o 8123_pci.o 8123_bin.o
+
+		--> filename: Makefile
+		ifneq ($(KERNELRELEASE),)
+		# kbuild part of makefile
+		include Kbuild
+
+		else
+		# normal makefile
+		KDIR ?= /lib/modules/`uname -r`/build
+
+		default:
+			$(MAKE) -C $(KDIR) M=$$PWD
+
+		# Module specific targets
+		genbin:
+			echo "X" > 8123_bin.o_shipped
+
+		endif
+
+	Here the "Kbuild" file is included from the makefile. This
+	allows an older version of kbuild, which only knows of
+	makefiles, to be used when the "make" and kbuild parts are
+	split into separate files.
+
+3.3 Binary Blobs
+----------------
+
+	Some external modules need to include an object file as a blob.
+	kbuild has support for this, but requires the blob file to be
+	named <filename>_shipped. When the kbuild rules kick in, a copy
+	of <filename>_shipped is created with _shipped stripped off,
+	giving us <filename>. This shortened filename can be used in
+	the assignment to the module.
+
+	Throughout this section, 8123_bin.o_shipped has been used to
+	build the kernel module 8123.ko; it has been included as
+	8123_bin.o::
+
+		8123-y := 8123_if.o 8123_pci.o 8123_bin.o
+
+	Although there is no distinction between the ordinary source
+	files and the binary file, kbuild will pick up different rules
+	when creating the object file for the module.
+
+3.4 Building Multiple Modules
+=============================
+
+	kbuild supports building multiple modules with a single build
+	file. For example, if you wanted to build two modules, foo.ko
+	and bar.ko, the kbuild lines would be::
+
+		obj-m := foo.o bar.o
+		foo-y := <foo_srcs>
+		bar-y := <bar_srcs>
+
+	It is that simple!
+
+
+4. Include Files
+================
+
+Within the kernel, header files are kept in standard locations
+according to the following rule:
+
+	* If the header file only describes the internal interface of a
+	  module, then the file is placed in the same directory as the
+	  source files.
+	* If the header file describes an interface used by other parts
+	  of the kernel that are located in different directories, then
+	  the file is placed in include/linux/.
+
+	  NOTE:
+	      There are two notable exceptions to this rule: larger
+	      subsystems have their own directory under include/, such as
+	      include/scsi; and architecture specific headers are located
+	      under arch/$(ARCH)/include/.
+
+4.1 Kernel Includes
+-------------------
+
+	To include a header file located under include/linux/, simply
+	use::
+
+		#include <linux/module.h>
+
+	kbuild will add options to "gcc" so the relevant directories
+	are searched.
+
+4.2 Single Subdirectory
+-----------------------
+
+	External modules tend to place header files in a separate
+	include/ directory where their source is located, although this
+	is not the usual kernel style. To inform kbuild of the
+	directory, use either ccflags-y or CFLAGS_<filename>.o.
+
+	Using the example from section 3, if we moved 8123_if.h to a
+	subdirectory named include, the resulting kbuild file would
+	look like::
+
+		--> filename: Kbuild
+		obj-m := 8123.o
+
+		ccflags-y := -Iinclude
+		8123-y := 8123_if.o 8123_pci.o 8123_bin.o
+
+	Note that in the assignment there is no space between -I and
+	the path. This is a limitation of kbuild: there must be no
+	space present.
+
+4.3 Several Subdirectories
+--------------------------
+
+	kbuild can handle files that are spread over several directories.
+	Consider the following example::
+
+		.
+		|__ src
+		|   |__ complex_main.c
+		|   |__ hal
+		|	|__ hardwareif.c
+		|	|__ include
+		|	    |__ hardwareif.h
+		|__ include
+		|__ complex.h
+
+	To build the module complex.ko, we then need the following
+	kbuild file::
+
+		--> filename: Kbuild
+		obj-m := complex.o
+		complex-y := src/complex_main.o
+		complex-y += src/hal/hardwareif.o
+
+		ccflags-y := -I$(src)/include
+		ccflags-y += -I$(src)/src/hal/include
+
+	As you can see, kbuild knows how to handle object files located
+	in other directories. The trick is to specify the directory
+	relative to the kbuild file's location. That being said, this
+	is NOT recommended practice.
+
+	For the header files, kbuild must be explicitly told where to
+	look. When kbuild executes, the current directory is always the
+	root of the kernel tree (the argument to "-C") and therefore an
+	absolute path is needed. $(src) provides the absolute path by
+	pointing to the directory where the currently executing kbuild
+	file is located.
+
+
+5. Module Installation
+======================
+
+Modules which are included in the kernel are installed in the
+directory:
+
+	/lib/modules/$(KERNELRELEASE)/kernel/
+
+And external modules are installed in:
+
+	/lib/modules/$(KERNELRELEASE)/extra/
+
+5.1 INSTALL_MOD_PATH
+--------------------
+
+	Above are the default directories but as always some level of
+	customization is possible. A prefix can be added to the
+	installation path using the variable INSTALL_MOD_PATH::
+
+		$ make INSTALL_MOD_PATH=/frodo modules_install
+		=> Install dir: /frodo/lib/modules/$(KERNELRELEASE)/kernel/
+
+	INSTALL_MOD_PATH may be set as an ordinary shell variable or,
+	as shown above, can be specified on the command line when
+	calling "make." This has effect when installing both in-tree
+	and out-of-tree modules.
+
+5.2 INSTALL_MOD_DIR
+-------------------
+
+	External modules are by default installed to a directory under
+	/lib/modules/$(KERNELRELEASE)/extra/, but you may wish to
+	locate modules for a specific functionality in a separate
+	directory. For this purpose, use INSTALL_MOD_DIR to specify an
+	alternative name to "extra."::
+
+		$ make INSTALL_MOD_DIR=gandalf -C $KDIR \
+		       M=$PWD modules_install
+		=> Install dir: /lib/modules/$(KERNELRELEASE)/gandalf/
+
+
+6. Module Versioning
+====================
+
+Module versioning is enabled by the CONFIG_MODVERSIONS tag, and is used
+as a simple ABI consistency check. A CRC value of the full prototype
+for an exported symbol is created. When a module is loaded/used, the
+CRC values contained in the kernel are compared with similar values in
+the module; if they are not equal, the kernel refuses to load the
+module.
+
+Module.symvers contains a list of all exported symbols from a kernel
+build.
+
+6.1 Symbols From the Kernel (vmlinux + modules)
+-----------------------------------------------
+
+	During a kernel build, a file named Module.symvers will be
+	generated. Module.symvers contains all exported symbols from
+	the kernel and compiled modules. For each symbol, the
+	corresponding CRC value is also stored.
+
+	The syntax of the Module.symvers file is::
+
+		<CRC>	    <Symbol>	       <module>
+
+		0x2d036834  scsi_remove_host   drivers/scsi/scsi_mod
+
+	For a kernel build without CONFIG_MODVERSIONS enabled, the CRC
+	would read 0x00000000.
+
+	Module.symvers serves two purposes:
+
+	1) It lists all exported symbols from vmlinux and all modules.
+	2) It lists the CRC if CONFIG_MODVERSIONS is enabled.
+
+6.2 Symbols and External Modules
+--------------------------------
+
+	When building an external module, the build system needs access
+	to the symbols from the kernel to check if all external symbols
+	are defined. This is done in the MODPOST step. modpost obtains
+	the symbols by reading Module.symvers from the kernel source
+	tree. If a Module.symvers file is present in the directory
+	where the external module is being built, this file will be
+	read too. During the MODPOST step, a new Module.symvers file
+	will be written containing all exported symbols that were not
+	defined in the kernel.
+
+--- 6.3 Symbols From Another External Module
+
+	Sometimes, an external module uses exported symbols from
+	another external module. kbuild needs to have full knowledge of
+	all symbols to avoid spitting out warnings about undefined
+	symbols. Three solutions exist for this situation.
+
+	NOTE: The method with a top-level kbuild file is recommended
+	but may be impractical in certain situations.
+
+	Use a top-level kbuild file
+		If you have two modules, foo.ko and bar.ko, where
+		foo.ko needs symbols from bar.ko, you can use a
+		common top-level kbuild file so both modules are
+		compiled in the same build. Consider the following
+		directory layout::
+
+			./foo/ <= contains foo.ko
+			./bar/ <= contains bar.ko
+
+		The top-level kbuild file would then look like::
+
+			#./Kbuild (or ./Makefile):
+				obj-y := foo/ bar/
+
+		And executing::
+
+			$ make -C $KDIR M=$PWD
+
+		will then do the expected and compile both modules with
+		full knowledge of symbols from either module.
+
+	Use an extra Module.symvers file
+		When an external module is built, a Module.symvers file
+		is generated containing all exported symbols which are
+		not defined in the kernel. To get access to symbols
+		from bar.ko, copy the Module.symvers file from the
+		compilation of bar.ko to the directory where foo.ko is
+		built. During the module build, kbuild will read the
+		Module.symvers file in the directory of the external
+		module, and when the build is finished, a new
+		Module.symvers file is created containing the sum of
+		all symbols defined and not part of the kernel.
+
+	Use "make" variable KBUILD_EXTRA_SYMBOLS
+		If it is impractical to copy Module.symvers from
+		another module, you can assign a space separated list
+		of files to KBUILD_EXTRA_SYMBOLS in your build file.
+		These files will be loaded by modpost during the
+		initialization of its symbol tables.
+
+
+7. Tips & Tricks
+================
+
+7.1 Testing for CONFIG_FOO_BAR
+------------------------------
+
+	Modules often need to check for certain `CONFIG_` options to
+	decide if a specific feature is included in the module. In
+	kbuild this is done by referencing the `CONFIG_` variable
+	directly::
+
+		#fs/ext2/Makefile
+		obj-$(CONFIG_EXT2_FS) += ext2.o
+
+		ext2-y := balloc.o bitmap.o dir.o
+		ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o
+
+	External modules have traditionally used "grep" to check for
+	specific `CONFIG_` settings directly in .config. This usage is
+	broken. As introduced before, external modules should use
+	kbuild for building and can therefore use the same methods as
+	in-tree modules when testing for `CONFIG_` definitions.
diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt
deleted file mode 100644
index 80295c613e37..000000000000
--- a/Documentation/kbuild/modules.txt
+++ /dev/null
@@ -1,541 +0,0 @@
-Building External Modules
-
-This document describes how to build an out-of-tree kernel module.
-
-=== Table of Contents
-
-	=== 1 Introduction
-	=== 2 How to Build External Modules
-	   --- 2.1 Command Syntax
-	   --- 2.2 Options
-	   --- 2.3 Targets
-	   --- 2.4 Building Separate Files
-	=== 3. Creating a Kbuild File for an External Module
-	   --- 3.1 Shared Makefile
-	   --- 3.2 Separate Kbuild file and Makefile
-	   --- 3.3 Binary Blobs
-	   --- 3.4 Building Multiple Modules
-	=== 4. Include Files
-	   --- 4.1 Kernel Includes
-	   --- 4.2 Single Subdirectory
-	   --- 4.3 Several Subdirectories
-	=== 5. Module Installation
-	   --- 5.1 INSTALL_MOD_PATH
-	   --- 5.2 INSTALL_MOD_DIR
-	=== 6. Module Versioning
-	   --- 6.1 Symbols From the Kernel (vmlinux + modules)
-	   --- 6.2 Symbols and External Modules
-	   --- 6.3 Symbols From Another External Module
-	=== 7. Tips & Tricks
-	   --- 7.1 Testing for CONFIG_FOO_BAR
-
-
-
-=== 1. Introduction
-
-"kbuild" is the build system used by the Linux kernel. Modules must use
-kbuild to stay compatible with changes in the build infrastructure and
-to pick up the right flags to "gcc." Functionality for building modules
-both in-tree and out-of-tree is provided. The method for building
-either is similar, and all modules are initially developed and built
-out-of-tree.
-
-Covered in this document is information aimed at developers interested
-in building out-of-tree (or "external") modules. The author of an
-external module should supply a makefile that hides most of the
-complexity, so one only has to type "make" to build the module. This is
-easily accomplished, and a complete example will be presented in
-section 3.
-
-
-=== 2. How to Build External Modules
-
-To build external modules, you must have a prebuilt kernel available
-that contains the configuration and header files used in the build.
-Also, the kernel must have been built with modules enabled. If you are
-using a distribution kernel, there will be a package for the kernel you
-are running provided by your distribution.
-
-An alternative is to use the "make" target "modules_prepare." This will
-make sure the kernel contains the information required. The target
-exists solely as a simple way to prepare a kernel source tree for
-building external modules.
-
-NOTE: "modules_prepare" will not build Module.symvers even if
-CONFIG_MODVERSIONS is set; therefore, a full kernel build needs to be
-executed to make module versioning work.
-
---- 2.1 Command Syntax
-
-	The command to build an external module is:
-
-		$ make -C <path_to_kernel_src> M=$PWD
-
-	The kbuild system knows that an external module is being built
-	due to the "M=<dir>" option given in the command.
-
-	To build against the running kernel use:
-
-		$ make -C /lib/modules/`uname -r`/build M=$PWD
-
-	Then to install the module(s) just built, add the target
-	"modules_install" to the command:
-
-		$ make -C /lib/modules/`uname -r`/build M=$PWD modules_install
-
---- 2.2 Options
-
-	($KDIR refers to the path of the kernel source directory.)
-
-	make -C $KDIR M=$PWD
-
-	-C $KDIR
-		The directory where the kernel source is located.
-		"make" will actually change to the specified directory
-		when executing and will change back when finished.
-
-	M=$PWD
-		Informs kbuild that an external module is being built.
-		The value given to "M" is the absolute path of the
-		directory where the external module (kbuild file) is
-		located.
-
---- 2.3 Targets
-
-	When building an external module, only a subset of the "make"
-	targets are available.
-
-	make -C $KDIR M=$PWD [target]
-
-	The default will build the module(s) located in the current
-	directory, so a target does not need to be specified. All
-	output files will also be generated in this directory. No
-	attempts are made to update the kernel source, and it is a
-	precondition that a successful "make" has been executed for the
-	kernel.
-
-	modules
-		The default target for external modules. It has the
-		same functionality as if no target was specified. See
-		description above.
-
-	modules_install
-		Install the external module(s). The default location is
-		/lib/modules/<kernel_release>/extra/, but a prefix may
-		be added with INSTALL_MOD_PATH (discussed in section 5).
-
-	clean
-		Remove all generated files in the module directory only.
-
-	help
-		List the available targets for external modules.
-
---- 2.4 Building Separate Files
-
-	It is possible to build single files that are part of a module.
-	This works equally well for the kernel, a module, and even for
-	external modules.
-
-	Example (The module foo.ko, consist of bar.o and baz.o):
-		make -C $KDIR M=$PWD bar.lst
-		make -C $KDIR M=$PWD baz.o
-		make -C $KDIR M=$PWD foo.ko
-		make -C $KDIR M=$PWD ./
-
-
-=== 3. Creating a Kbuild File for an External Module
-
-In the last section we saw the command to build a module for the
-running kernel. The module is not actually built, however, because a
-build file is required. Contained in this file will be the name of
-the module(s) being built, along with the list of requisite source
-files. The file may be as simple as a single line:
-
-	obj-m := <module_name>.o
-
-The kbuild system will build <module_name>.o from <module_name>.c,
-and, after linking, will result in the kernel module <module_name>.ko.
-The above line can be put in either a "Kbuild" file or a "Makefile."
-When the module is built from multiple sources, an additional line is
-needed listing the files:
-
-	<module_name>-y := <src1>.o <src2>.o ...
-
-NOTE: Further documentation describing the syntax used by kbuild is
-located in Documentation/kbuild/makefiles.txt.
-
-The examples below demonstrate how to create a build file for the
-module 8123.ko, which is built from the following files:
-
-	8123_if.c
-	8123_if.h
-	8123_pci.c
-	8123_bin.o_shipped	<= Binary blob
-
---- 3.1 Shared Makefile
-
-	An external module always includes a wrapper makefile that
-	supports building the module using "make" with no arguments.
-	This target is not used by kbuild; it is only for convenience.
-	Additional functionality, such as test targets, can be included
-	but should be filtered out from kbuild due to possible name
-	clashes.
-
-	Example 1:
-		--> filename: Makefile
-		ifneq ($(KERNELRELEASE),)
-		# kbuild part of makefile
-		obj-m  := 8123.o
-		8123-y := 8123_if.o 8123_pci.o 8123_bin.o
-
-		else
-		# normal makefile
-		KDIR ?= /lib/modules/`uname -r`/build
-
-		default:
-			$(MAKE) -C $(KDIR) M=$$PWD
-
-		# Module specific targets
-		genbin:
-			echo "X" > 8123_bin.o_shipped
-
-		endif
-
-	The check for KERNELRELEASE is used to separate the two parts
-	of the makefile. In the example, kbuild will only see the two
-	assignments, whereas "make" will see everything except these
-	two assignments. This is due to two passes made on the file:
-	the first pass is by the "make" instance run on the command
-	line; the second pass is by the kbuild system, which is
-	initiated by the parameterized "make" in the default target.
-
---- 3.2 Separate Kbuild File and Makefile
-
-	In newer versions of the kernel, kbuild will first look for a
-	file named "Kbuild," and only if that is not found, will it
-	then look for a makefile. Utilizing a "Kbuild" file allows us
-	to split up the makefile from example 1 into two files:
-
-	Example 2:
-		--> filename: Kbuild
-		obj-m  := 8123.o
-		8123-y := 8123_if.o 8123_pci.o 8123_bin.o
-
-		--> filename: Makefile
-		KDIR ?= /lib/modules/`uname -r`/build
-
-		default:
-			$(MAKE) -C $(KDIR) M=$$PWD
-
-		# Module specific targets
-		genbin:
-			echo "X" > 8123_bin.o_shipped
-
-	The split in example 2 is questionable due to the simplicity of
-	each file; however, some external modules use makefiles
-	consisting of several hundred lines, and here it really pays
-	off to separate the kbuild part from the rest.
-
-	The next example shows a backward compatible version.
-
-	Example 3:
-		--> filename: Kbuild
-		obj-m  := 8123.o
-		8123-y := 8123_if.o 8123_pci.o 8123_bin.o
-
-		--> filename: Makefile
-		ifneq ($(KERNELRELEASE),)
-		# kbuild part of makefile
-		include Kbuild
-
-		else
-		# normal makefile
-		KDIR ?= /lib/modules/`uname -r`/build
-
-		default:
-			$(MAKE) -C $(KDIR) M=$$PWD
-
-		# Module specific targets
-		genbin:
-			echo "X" > 8123_bin.o_shipped
-
-		endif
-
-	Here the "Kbuild" file is included from the makefile. This
-	allows an older version of kbuild, which only knows of
-	makefiles, to be used when the "make" and kbuild parts are
-	split into separate files.
-
---- 3.3 Binary Blobs
-
-	Some external modules need to include an object file as a blob.
-	kbuild has support for this, but requires the blob file to be
-	named <filename>_shipped. When the kbuild rules kick in, a copy
-	of <filename>_shipped is created with _shipped stripped off,
-	giving us <filename>. This shortened filename can be used in
-	the assignment to the module.
-
-	Throughout this section, 8123_bin.o_shipped has been used to
-	build the kernel module 8123.ko; it has been included as
-	8123_bin.o.
-
-		8123-y := 8123_if.o 8123_pci.o 8123_bin.o
-
-	Although there is no distinction between the ordinary source
-	files and the binary file, kbuild will pick up different rules
-	when creating the object file for the module.
-
---- 3.4 Building Multiple Modules
-
-	kbuild supports building multiple modules with a single build
-	file. For example, if you wanted to build two modules, foo.ko
-	and bar.ko, the kbuild lines would be:
-
-		obj-m := foo.o bar.o
-		foo-y := <foo_srcs>
-		bar-y := <bar_srcs>
-
-	It is that simple!
-
-
-=== 4. Include Files
-
-Within the kernel, header files are kept in standard locations
-according to the following rule:
-
-	* If the header file only describes the internal interface of a
-	  module, then the file is placed in the same directory as the
-	  source files.
-	* If the header file describes an interface used by other parts
-	  of the kernel that are located in different directories, then
-	  the file is placed in include/linux/.
-
-	  NOTE: There are two notable exceptions to this rule: larger
-	  subsystems have their own directory under include/, such as
-	  include/scsi; and architecture specific headers are located
-	  under arch/$(ARCH)/include/.
-
---- 4.1 Kernel Includes
-
-	To include a header file located under include/linux/, simply
-	use:
-
-		#include <linux/module.h>
-
-	kbuild will add options to "gcc" so the relevant directories
-	are searched.
-
---- 4.2 Single Subdirectory
-
-	External modules tend to place header files in a separate
-	include/ directory where their source is located, although this
-	is not the usual kernel style. To inform kbuild of the
-	directory, use either ccflags-y or CFLAGS_<filename>.o.
-
-	Using the example from section 3, if we moved 8123_if.h to a
-	subdirectory named include, the resulting kbuild file would
-	look like:
-
-		--> filename: Kbuild
-		obj-m := 8123.o
-
-		ccflags-y := -Iinclude
-		8123-y := 8123_if.o 8123_pci.o 8123_bin.o
-
-	Note that in the assignment there is no space between -I and
-	the path. This is a limitation of kbuild: there must be no
-	space present.
-
---- 4.3 Several Subdirectories
-
-	kbuild can handle files that are spread over several directories.
-	Consider the following example:
-
-	.
-	|__ src
-	|   |__ complex_main.c
-	|   |__ hal
-	|	|__ hardwareif.c
-	|	|__ include
-	|	    |__ hardwareif.h
-	|__ include
-	    |__ complex.h
-
-	To build the module complex.ko, we then need the following
-	kbuild file:
-
-		--> filename: Kbuild
-		obj-m := complex.o
-		complex-y := src/complex_main.o
-		complex-y += src/hal/hardwareif.o
-
-		ccflags-y := -I$(src)/include
-		ccflags-y += -I$(src)/src/hal/include
-
-	As you can see, kbuild knows how to handle object files located
-	in other directories. The trick is to specify the directory
-	relative to the kbuild file's location. That being said, this
-	is NOT recommended practice.
-
-	For the header files, kbuild must be explicitly told where to
-	look. When kbuild executes, the current directory is always the
-	root of the kernel tree (the argument to "-C") and therefore an
-	absolute path is needed. $(src) provides the absolute path by
-	pointing to the directory where the currently executing kbuild
-	file is located.
-
-
-=== 5. Module Installation
-
-Modules which are included in the kernel are installed in the
-directory:
-
-	/lib/modules/$(KERNELRELEASE)/kernel/
-
-And external modules are installed in:
-
-	/lib/modules/$(KERNELRELEASE)/extra/
-
---- 5.1 INSTALL_MOD_PATH
-
-	Above are the default directories but as always some level of
-	customization is possible. A prefix can be added to the
-	installation path using the variable INSTALL_MOD_PATH:
-
-		$ make INSTALL_MOD_PATH=/frodo modules_install
-		=> Install dir: /frodo/lib/modules/$(KERNELRELEASE)/kernel/
-
-	INSTALL_MOD_PATH may be set as an ordinary shell variable or,
-	as shown above, can be specified on the command line when
-	calling "make." This has effect when installing both in-tree
-	and out-of-tree modules.
-
---- 5.2 INSTALL_MOD_DIR
-
-	External modules are by default installed to a directory under
-	/lib/modules/$(KERNELRELEASE)/extra/, but you may wish to
-	locate modules for a specific functionality in a separate
-	directory. For this purpose, use INSTALL_MOD_DIR to specify an
-	alternative name to "extra."
-
-		$ make INSTALL_MOD_DIR=gandalf -C $KDIR \
-		       M=$PWD modules_install
-		=> Install dir: /lib/modules/$(KERNELRELEASE)/gandalf/
-
-
-=== 6. Module Versioning
-
-Module versioning is enabled by the CONFIG_MODVERSIONS tag, and is used
-as a simple ABI consistency check. A CRC value of the full prototype
-for an exported symbol is created. When a module is loaded/used, the
-CRC values contained in the kernel are compared with similar values in
-the module; if they are not equal, the kernel refuses to load the
-module.
-
-Module.symvers contains a list of all exported symbols from a kernel
-build.
-
---- 6.1 Symbols From the Kernel (vmlinux + modules)
-
-	During a kernel build, a file named Module.symvers will be
-	generated. Module.symvers contains all exported symbols from
-	the kernel and compiled modules. For each symbol, the
-	corresponding CRC value is also stored.
-
-	The syntax of the Module.symvers file is:
-		<CRC>	    <Symbol>	       <module>
-
-		0x2d036834  scsi_remove_host   drivers/scsi/scsi_mod
-
-	For a kernel build without CONFIG_MODVERSIONS enabled, the CRC
-	would read 0x00000000.
-
-	Module.symvers serves two purposes:
-	1) It lists all exported symbols from vmlinux and all modules.
-	2) It lists the CRC if CONFIG_MODVERSIONS is enabled.
-
---- 6.2 Symbols and External Modules
-
-	When building an external module, the build system needs access
-	to the symbols from the kernel to check if all external symbols
-	are defined. This is done in the MODPOST step. modpost obtains
-	the symbols by reading Module.symvers from the kernel source
-	tree. If a Module.symvers file is present in the directory
-	where the external module is being built, this file will be
-	read too. During the MODPOST step, a new Module.symvers file
-	will be written containing all exported symbols that were not
-	defined in the kernel.
-
---- 6.3 Symbols From Another External Module
-
-	Sometimes, an external module uses exported symbols from
-	another external module. kbuild needs to have full knowledge of
-	all symbols to avoid spitting out warnings about undefined
-	symbols. Three solutions exist for this situation.
-
-	NOTE: The method with a top-level kbuild file is recommended
-	but may be impractical in certain situations.
-
-	Use a top-level kbuild file
-		If you have two modules, foo.ko and bar.ko, where
-		foo.ko needs symbols from bar.ko, you can use a
-		common top-level kbuild file so both modules are
-		compiled in the same build. Consider the following
-		directory layout:
-
-		./foo/ <= contains foo.ko
-		./bar/ <= contains bar.ko
-
-		The top-level kbuild file would then look like:
-
-		#./Kbuild (or ./Makefile):
-			obj-y := foo/ bar/
-
-		And executing
-
-			$ make -C $KDIR M=$PWD
-
-		will then do the expected and compile both modules with
-		full knowledge of symbols from either module.
-
-	Use an extra Module.symvers file
-		When an external module is built, a Module.symvers file
-		is generated containing all exported symbols which are
-		not defined in the kernel. To get access to symbols
-		from bar.ko, copy the Module.symvers file from the
-		compilation of bar.ko to the directory where foo.ko is
-		built. During the module build, kbuild will read the
-		Module.symvers file in the directory of the external
-		module, and when the build is finished, a new
-		Module.symvers file is created containing the sum of
-		all symbols defined and not part of the kernel.
-
-	Use "make" variable KBUILD_EXTRA_SYMBOLS
-		If it is impractical to copy Module.symvers from
-		another module, you can assign a space separated list
-		of files to KBUILD_EXTRA_SYMBOLS in your build file.
-		These files will be loaded by modpost during the
-		initialization of its symbol tables.
-
-
-=== 7. Tips & Tricks
-
---- 7.1 Testing for CONFIG_FOO_BAR
-
-	Modules often need to check for certain CONFIG_ options to
-	decide if a specific feature is included in the module. In
-	kbuild this is done by referencing the CONFIG_ variable
-	directly.
-
-		#fs/ext2/Makefile
-		obj-$(CONFIG_EXT2_FS) += ext2.o
-
-		ext2-y := balloc.o bitmap.o dir.o
-		ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o
-
-	External modules have traditionally used "grep" to check for
-	specific CONFIG_ settings directly in .config. This usage is
-	broken. As introduced before, external modules should use
-	kbuild for building and can therefore use the same methods as
-	in-tree modules when testing for CONFIG_ definitions.
-
diff --git a/Documentation/kernel-hacking/hacking.rst b/Documentation/kernel-hacking/hacking.rst
index d824e4feaff3..5891a701a159 100644
--- a/Documentation/kernel-hacking/hacking.rst
+++ b/Documentation/kernel-hacking/hacking.rst
@@ -718,7 +718,7 @@ make a neat patch, there's administrative work to be done:
 -  Usually you want a configuration option for your kernel hack. Edit
    ``Kconfig`` in the appropriate directory. The Config language is
    simple to use by cut and paste, and there's complete documentation in
-   ``Documentation/kbuild/kconfig-language.txt``.
+   ``Documentation/kbuild/kconfig-language.rst``.
 
    In your description of the option, make sure you address both the
    expert user and the user who knows nothing about your feature.
@@ -728,7 +728,7 @@ make a neat patch, there's administrative work to be done:
 
 -  Edit the ``Makefile``: the CONFIG variables are exported here so you
    can usually just add a "obj-$(CONFIG_xxx) += xxx.o" line. The syntax
-   is documented in ``Documentation/kbuild/makefiles.txt``.
+   is documented in ``Documentation/kbuild/makefiles.rst``.
 
 -  Put yourself in ``CREDITS`` if you've done something noteworthy,
    usually beyond a single file (your name should be at the top of the
diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst
index fa864a51e6ea..f4a2198187f9 100644
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@ -686,7 +686,7 @@ filesystems) should advertise this prominently in their prompt string::
 	...
 
 For full documentation on the configuration files, see the file
-Documentation/kbuild/kconfig-language.txt.
+Documentation/kbuild/kconfig-language.rst.
 
 
 11) Data structures
diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst
index c88867b173d9..365efc9e4aa8 100644
--- a/Documentation/process/submit-checklist.rst
+++ b/Documentation/process/submit-checklist.rst
@@ -39,7 +39,7 @@ and elsewhere regarding submitting Linux kernel patches.
 
 6) Any new or modified ``CONFIG`` options do not muck up the config menu and
    default to off unless they meet the exception criteria documented in
-   ``Documentation/kbuild/kconfig-language.txt`` Menu attributes: default value.
+   ``Documentation/kbuild/kconfig-language.rst`` Menu attributes: default value.
 
 7) All new ``Kconfig`` options have help text.
 
diff --git a/Documentation/translations/it_IT/kernel-hacking/hacking.rst b/Documentation/translations/it_IT/kernel-hacking/hacking.rst
index 7178e517af0a..24c592852bf1 100644
--- a/Documentation/translations/it_IT/kernel-hacking/hacking.rst
+++ b/Documentation/translations/it_IT/kernel-hacking/hacking.rst
@@ -755,7 +755,7 @@ anche per avere patch pulite, c'è del lavoro amministrativo da fare:
 -  Solitamente vorrete un'opzione di configurazione per la vostra modifica
    al kernel. Modificate ``Kconfig`` nella cartella giusta. Il linguaggio
    Config è facile con copia ed incolla, e c'è una completa documentazione
-   nel file ``Documentation/kbuild/kconfig-language.txt``.
+   nel file ``Documentation/kbuild/kconfig-language.rst``.
 
    Nella descrizione della vostra opzione, assicuratevi di parlare sia agli
    utenti esperti sia agli utente che non sanno nulla del vostro lavoro.
@@ -767,7 +767,7 @@ anche per avere patch pulite, c'è del lavoro amministrativo da fare:
 -  Modificate il file ``Makefile``: le variabili CONFIG sono esportate qui,
    quindi potete solitamente aggiungere una riga come la seguete
    "obj-$(CONFIG_xxx) += xxx.o". La sintassi è documentata nel file
-   ``Documentation/kbuild/makefiles.txt``.
+   ``Documentation/kbuild/makefiles.rst``.
 
 -  Aggiungete voi stessi in ``CREDITS`` se avete fatto qualcosa di notevole,
    solitamente qualcosa che supera il singolo file (comunque il vostro nome
diff --git a/Documentation/translations/it_IT/process/coding-style.rst b/Documentation/translations/it_IT/process/coding-style.rst
index a6559d25a23d..8995d2d19f20 100644
--- a/Documentation/translations/it_IT/process/coding-style.rst
+++ b/Documentation/translations/it_IT/process/coding-style.rst
@@ -696,7 +696,7 @@ nella stringa di titolo::
 	...
 
 Per la documentazione completa sui file di configurazione, consultate
-il documento Documentation/kbuild/kconfig-language.txt
+il documento Documentation/kbuild/kconfig-language.rst
 
 
 11) Strutture dati
diff --git a/Documentation/translations/it_IT/process/submit-checklist.rst b/Documentation/translations/it_IT/process/submit-checklist.rst
index 70e65a7b3620..ea74cae958d7 100644
--- a/Documentation/translations/it_IT/process/submit-checklist.rst
+++ b/Documentation/translations/it_IT/process/submit-checklist.rst
@@ -43,7 +43,7 @@ sottomissione delle patch, in particolare
 
 6) Le opzioni ``CONFIG``, nuove o modificate, non scombussolano il menu
    di configurazione e sono preimpostate come disabilitate a meno che non
-   soddisfino i criteri descritti in ``Documentation/kbuild/kconfig-language.txt``
+   soddisfino i criteri descritti in ``Documentation/kbuild/kconfig-language.rst``
    alla punto "Voci di menu: valori predefiniti".
 
 7) Tutte le nuove opzioni ``Kconfig`` hanno un messaggio di aiuto.
diff --git a/Documentation/translations/zh_CN/process/coding-style.rst b/Documentation/translations/zh_CN/process/coding-style.rst
index 5479c591c2f7..4f6237392e65 100644
--- a/Documentation/translations/zh_CN/process/coding-style.rst
+++ b/Documentation/translations/zh_CN/process/coding-style.rst
@@ -599,7 +599,7 @@ Documentation/doc-guide/ 和 scripts/kernel-doc 以获得详细信息。
 	depends on ADFS_FS
 	...
 
-要查看配置文件的完整文档，请看 Documentation/kbuild/kconfig-language.txt。
+要查看配置文件的完整文档，请看 Documentation/kbuild/kconfig-language.rst。
 
 
 11) 数据结构
diff --git a/Documentation/translations/zh_CN/process/submit-checklist.rst b/Documentation/translations/zh_CN/process/submit-checklist.rst
index 89061aa8fdbe..f4785d2b0491 100644
--- a/Documentation/translations/zh_CN/process/submit-checklist.rst
+++ b/Documentation/translations/zh_CN/process/submit-checklist.rst
@@ -38,7 +38,7 @@ Linux内核补丁提交清单
    违规行为。
 
 6) 任何新的或修改过的 ``CONFIG`` 选项都不会弄脏配置菜单，并默认为关闭，除非
-   它们符合 ``Documentation/kbuild/kconfig-language.txt`` 中记录的异常条件,
+   它们符合 ``Documentation/kbuild/kconfig-language.rst`` 中记录的异常条件,
    菜单属性：默认值.
 
 7) 所有新的 ``kconfig`` 选项都有帮助文本。
-- 
cgit 


From d67297ad343ec02a88f947b45526c92d2870aed3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:49 -0300
Subject: docs: kdump: convert docs to ReST and rename to *.rst

Convert kdump documentation to ReST and add it to the
user faced manual, as the documents are mainly focused on
sysadmins that would be enabling kdump.

Note: the vmcoreinfo.rst has one very long title on one of its
sub-sections:

	PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_slab|PG_hwpoision|PG_head_mask|PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline)

I opted to break this one, into two entries with the same content,
in order to make it easier to display after being parsed in html and PDF.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/bug-hunting.rst         |   2 +-
 Documentation/admin-guide/kernel-parameters.txt   |   6 +-
 Documentation/kdump/index.rst                     |  21 +
 Documentation/kdump/kdump.rst                     | 534 ++++++++++++++++++++++
 Documentation/kdump/kdump.txt                     | 509 ---------------------
 Documentation/kdump/vmcoreinfo.rst                | 488 ++++++++++++++++++++
 Documentation/kdump/vmcoreinfo.txt                | 495 --------------------
 Documentation/powerpc/firmware-assisted-dump.txt  |   2 +-
 Documentation/translations/zh_CN/oops-tracing.txt |   2 +-
 Documentation/watchdog/hpwdt.txt                  |   2 +-
 10 files changed, 1050 insertions(+), 1011 deletions(-)
 create mode 100644 Documentation/kdump/index.rst
 create mode 100644 Documentation/kdump/kdump.rst
 delete mode 100644 Documentation/kdump/kdump.txt
 create mode 100644 Documentation/kdump/vmcoreinfo.rst
 delete mode 100644 Documentation/kdump/vmcoreinfo.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/bug-hunting.rst b/Documentation/admin-guide/bug-hunting.rst
index f278b289e260..b761aa2a51d2 100644
--- a/Documentation/admin-guide/bug-hunting.rst
+++ b/Documentation/admin-guide/bug-hunting.rst
@@ -90,7 +90,7 @@ the disk is not available then you have three options:
     run a null modem to a second machine and capture the output there
     using your favourite communication program.  Minicom works well.
 
-(3) Use Kdump (see Documentation/kdump/kdump.txt),
+(3) Use Kdump (see Documentation/kdump/kdump.rst),
     extract the kernel ring buffer from old memory with using dmesg
     gdbmacro in Documentation/kdump/gdbmacros.txt.
 
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 81c168b25b20..2148fd289851 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -708,14 +708,14 @@
 			[KNL, x86_64] select a region under 4G first, and
 			fall back to reserve region above 4G when '@offset'
 			hasn't been specified.
-			See Documentation/kdump/kdump.txt for further details.
+			See Documentation/kdump/kdump.rst for further details.
 
 	crashkernel=range1:size1[,range2:size2,...][@offset]
 			[KNL] Same as above, but depends on the memory
 			in the running system. The syntax of range is
 			start-[end] where start and end are both
 			a memory unit (amount[KMG]). See also
-			Documentation/kdump/kdump.txt for an example.
+			Documentation/kdump/kdump.rst for an example.
 
 	crashkernel=size[KMG],high
 			[KNL, x86_64] range could be above 4G. Allow kernel
@@ -1209,7 +1209,7 @@
 			Specifies physical address of start of kernel core
 			image elf header and optionally the size. Generally
 			kexec loader will pass this option to capture kernel.
-			See Documentation/kdump/kdump.txt for details.
+			See Documentation/kdump/kdump.rst for details.
 
 	enable_mtrr_cleanup [X86]
 			The kernel tries to adjust MTRR layout from continuous
diff --git a/Documentation/kdump/index.rst b/Documentation/kdump/index.rst
new file mode 100644
index 000000000000..2b17fcf6867a
--- /dev/null
+++ b/Documentation/kdump/index.rst
@@ -0,0 +1,21 @@
+:orphan:
+
+================================================================
+Documentation for Kdump - The kexec-based Crash Dumping Solution
+================================================================
+
+This document includes overview, setup and installation, and analysis
+information.
+
+.. toctree::
+    :maxdepth: 1
+
+    kdump
+    vmcoreinfo
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/kdump/kdump.rst b/Documentation/kdump/kdump.rst
new file mode 100644
index 000000000000..ac7e131d2935
--- /dev/null
+++ b/Documentation/kdump/kdump.rst
@@ -0,0 +1,534 @@
+================================================================
+Documentation for Kdump - The kexec-based Crash Dumping Solution
+================================================================
+
+This document includes overview, setup and installation, and analysis
+information.
+
+Overview
+========
+
+Kdump uses kexec to quickly boot to a dump-capture kernel whenever a
+dump of the system kernel's memory needs to be taken (for example, when
+the system panics). The system kernel's memory image is preserved across
+the reboot and is accessible to the dump-capture kernel.
+
+You can use common commands, such as cp and scp, to copy the
+memory image to a dump file on the local disk, or across the network to
+a remote system.
+
+Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
+s390x, arm and arm64 architectures.
+
+When the system kernel boots, it reserves a small section of memory for
+the dump-capture kernel. This ensures that ongoing Direct Memory Access
+(DMA) from the system kernel does not corrupt the dump-capture kernel.
+The kexec -p command loads the dump-capture kernel into this reserved
+memory.
+
+On x86 machines, the first 640 KB of physical memory is needed to boot,
+regardless of where the kernel loads. Therefore, kexec backs up this
+region just before rebooting into the dump-capture kernel.
+
+Similarly on PPC64 machines first 32KB of physical memory is needed for
+booting regardless of where the kernel is loaded and to support 64K page
+size kexec backs up the first 64KB memory.
+
+For s390x, when kdump is triggered, the crashkernel region is exchanged
+with the region [0, crashkernel region size] and then the kdump kernel
+runs in [0, crashkernel region size]. Therefore no relocatable kernel is
+needed for s390x.
+
+All of the necessary information about the system kernel's core image is
+encoded in the ELF format, and stored in a reserved area of memory
+before a crash. The physical address of the start of the ELF header is
+passed to the dump-capture kernel through the elfcorehdr= boot
+parameter. Optionally the size of the ELF header can also be passed
+when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax.
+
+
+With the dump-capture kernel, you can access the memory image through
+/proc/vmcore. This exports the dump as an ELF-format file that you can
+write out using file copy commands such as cp or scp. Further, you can
+use analysis tools such as the GNU Debugger (GDB) and the Crash tool to
+debug the dump file. This method ensures that the dump pages are correctly
+ordered.
+
+
+Setup and Installation
+======================
+
+Install kexec-tools
+-------------------
+
+1) Login as the root user.
+
+2) Download the kexec-tools user-space package from the following URL:
+
+http://kernel.org/pub/linux/utils/kernel/kexec/kexec-tools.tar.gz
+
+This is a symlink to the latest version.
+
+The latest kexec-tools git tree is available at:
+
+- git://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git
+- http://www.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git
+
+There is also a gitweb interface available at
+http://www.kernel.org/git/?p=utils/kernel/kexec/kexec-tools.git
+
+More information about kexec-tools can be found at
+http://horms.net/projects/kexec/
+
+3) Unpack the tarball with the tar command, as follows::
+
+	tar xvpzf kexec-tools.tar.gz
+
+4) Change to the kexec-tools directory, as follows::
+
+	cd kexec-tools-VERSION
+
+5) Configure the package, as follows::
+
+	./configure
+
+6) Compile the package, as follows::
+
+	make
+
+7) Install the package, as follows::
+
+	make install
+
+
+Build the system and dump-capture kernels
+-----------------------------------------
+There are two possible methods of using Kdump.
+
+1) Build a separate custom dump-capture kernel for capturing the
+   kernel core dump.
+
+2) Or use the system kernel binary itself as dump-capture kernel and there is
+   no need to build a separate dump-capture kernel. This is possible
+   only with the architectures which support a relocatable kernel. As
+   of today, i386, x86_64, ppc64, ia64, arm and arm64 architectures support
+   relocatable kernel.
+
+Building a relocatable kernel is advantageous from the point of view that
+one does not have to build a second kernel for capturing the dump. But
+at the same time one might want to build a custom dump capture kernel
+suitable to his needs.
+
+Following are the configuration setting required for system and
+dump-capture kernels for enabling kdump support.
+
+System kernel config options
+----------------------------
+
+1) Enable "kexec system call" in "Processor type and features."::
+
+	CONFIG_KEXEC=y
+
+2) Enable "sysfs file system support" in "Filesystem" -> "Pseudo
+   filesystems." This is usually enabled by default::
+
+	CONFIG_SYSFS=y
+
+   Note that "sysfs file system support" might not appear in the "Pseudo
+   filesystems" menu if "Configure standard kernel features (for small
+   systems)" is not enabled in "General Setup." In this case, check the
+   .config file itself to ensure that sysfs is turned on, as follows::
+
+	grep 'CONFIG_SYSFS' .config
+
+3) Enable "Compile the kernel with debug info" in "Kernel hacking."::
+
+	CONFIG_DEBUG_INFO=Y
+
+   This causes the kernel to be built with debug symbols. The dump
+   analysis tools require a vmlinux with debug symbols in order to read
+   and analyze a dump file.
+
+Dump-capture kernel config options (Arch Independent)
+-----------------------------------------------------
+
+1) Enable "kernel crash dumps" support under "Processor type and
+   features"::
+
+	CONFIG_CRASH_DUMP=y
+
+2) Enable "/proc/vmcore support" under "Filesystems" -> "Pseudo filesystems"::
+
+	CONFIG_PROC_VMCORE=y
+
+   (CONFIG_PROC_VMCORE is set by default when CONFIG_CRASH_DUMP is selected.)
+
+Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
+--------------------------------------------------------------------
+
+1) On i386, enable high memory support under "Processor type and
+   features"::
+
+	CONFIG_HIGHMEM64G=y
+
+   or::
+
+	CONFIG_HIGHMEM4G
+
+2) On i386 and x86_64, disable symmetric multi-processing support
+   under "Processor type and features"::
+
+	CONFIG_SMP=n
+
+   (If CONFIG_SMP=y, then specify maxcpus=1 on the kernel command line
+   when loading the dump-capture kernel, see section "Load the Dump-capture
+   Kernel".)
+
+3) If one wants to build and use a relocatable kernel,
+   Enable "Build a relocatable kernel" support under "Processor type and
+   features"::
+
+	CONFIG_RELOCATABLE=y
+
+4) Use a suitable value for "Physical address where the kernel is
+   loaded" (under "Processor type and features"). This only appears when
+   "kernel crash dumps" is enabled. A suitable value depends upon
+   whether kernel is relocatable or not.
+
+   If you are using a relocatable kernel use CONFIG_PHYSICAL_START=0x100000
+   This will compile the kernel for physical address 1MB, but given the fact
+   kernel is relocatable, it can be run from any physical address hence
+   kexec boot loader will load it in memory region reserved for dump-capture
+   kernel.
+
+   Otherwise it should be the start of memory region reserved for
+   second kernel using boot parameter "crashkernel=Y@X". Here X is
+   start of memory region reserved for dump-capture kernel.
+   Generally X is 16MB (0x1000000). So you can set
+   CONFIG_PHYSICAL_START=0x1000000
+
+5) Make and install the kernel and its modules. DO NOT add this kernel
+   to the boot loader configuration files.
+
+Dump-capture kernel config options (Arch Dependent, ppc64)
+----------------------------------------------------------
+
+1) Enable "Build a kdump crash kernel" support under "Kernel" options::
+
+	CONFIG_CRASH_DUMP=y
+
+2)   Enable "Build a relocatable kernel" support::
+
+	CONFIG_RELOCATABLE=y
+
+   Make and install the kernel and its modules.
+
+Dump-capture kernel config options (Arch Dependent, ia64)
+----------------------------------------------------------
+
+- No specific options are required to create a dump-capture kernel
+  for ia64, other than those specified in the arch independent section
+  above. This means that it is possible to use the system kernel
+  as a dump-capture kernel if desired.
+
+  The crashkernel region can be automatically placed by the system
+  kernel at run time. This is done by specifying the base address as 0,
+  or omitting it all together::
+
+	crashkernel=256M@0
+
+  or::
+
+	crashkernel=256M
+
+  If the start address is specified, note that the start address of the
+  kernel will be aligned to 64Mb, so if the start address is not then
+  any space below the alignment point will be wasted.
+
+Dump-capture kernel config options (Arch Dependent, arm)
+----------------------------------------------------------
+
+-   To use a relocatable kernel,
+    Enable "AUTO_ZRELADDR" support under "Boot" options::
+
+	AUTO_ZRELADDR=y
+
+Dump-capture kernel config options (Arch Dependent, arm64)
+----------------------------------------------------------
+
+- Please note that kvm of the dump-capture kernel will not be enabled
+  on non-VHE systems even if it is configured. This is because the CPU
+  will not be reset to EL2 on panic.
+
+Extended crashkernel syntax
+===========================
+
+While the "crashkernel=size[@offset]" syntax is sufficient for most
+configurations, sometimes it's handy to have the reserved memory dependent
+on the value of System RAM -- that's mostly for distributors that pre-setup
+the kernel command line to avoid a unbootable system after some memory has
+been removed from the machine.
+
+The syntax is::
+
+    crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
+    range=start-[end]
+
+For example::
+
+    crashkernel=512M-2G:64M,2G-:128M
+
+This would mean:
+
+    1) if the RAM is smaller than 512M, then don't reserve anything
+       (this is the "rescue" case)
+    2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M
+    3) if the RAM size is larger than 2G, then reserve 128M
+
+
+
+Boot into System Kernel
+=======================
+
+1) Update the boot loader (such as grub, yaboot, or lilo) configuration
+   files as necessary.
+
+2) Boot the system kernel with the boot parameter "crashkernel=Y@X",
+   where Y specifies how much memory to reserve for the dump-capture kernel
+   and X specifies the beginning of this reserved memory. For example,
+   "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory
+   starting at physical address 0x01000000 (16MB) for the dump-capture kernel.
+
+   On x86 and x86_64, use "crashkernel=64M@16M".
+
+   On ppc64, use "crashkernel=128M@32M".
+
+   On ia64, 256M@256M is a generous value that typically works.
+   The region may be automatically placed on ia64, see the
+   dump-capture kernel config option notes above.
+   If use sparse memory, the size should be rounded to GRANULE boundaries.
+
+   On s390x, typically use "crashkernel=xxM". The value of xx is dependent
+   on the memory consumption of the kdump system. In general this is not
+   dependent on the memory size of the production system.
+
+   On arm, the use of "crashkernel=Y@X" is no longer necessary; the
+   kernel will automatically locate the crash kernel image within the
+   first 512MB of RAM if X is not given.
+
+   On arm64, use "crashkernel=Y[@X]".  Note that the start address of
+   the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
+
+Load the Dump-capture Kernel
+============================
+
+After booting to the system kernel, dump-capture kernel needs to be
+loaded.
+
+Based on the architecture and type of image (relocatable or not), one
+can choose to load the uncompressed vmlinux or compressed bzImage/vmlinuz
+of dump-capture kernel. Following is the summary.
+
+For i386 and x86_64:
+
+	- Use vmlinux if kernel is not relocatable.
+	- Use bzImage/vmlinuz if kernel is relocatable.
+
+For ppc64:
+
+	- Use vmlinux
+
+For ia64:
+
+	- Use vmlinux or vmlinuz.gz
+
+For s390x:
+
+	- Use image or bzImage
+
+For arm:
+
+	- Use zImage
+
+For arm64:
+
+	- Use vmlinux or Image
+
+If you are using an uncompressed vmlinux image then use following command
+to load dump-capture kernel::
+
+   kexec -p <dump-capture-kernel-vmlinux-image> \
+   --initrd=<initrd-for-dump-capture-kernel> --args-linux \
+   --append="root=<root-dev> <arch-specific-options>"
+
+If you are using a compressed bzImage/vmlinuz, then use following command
+to load dump-capture kernel::
+
+   kexec -p <dump-capture-kernel-bzImage> \
+   --initrd=<initrd-for-dump-capture-kernel> \
+   --append="root=<root-dev> <arch-specific-options>"
+
+If you are using a compressed zImage, then use following command
+to load dump-capture kernel::
+
+   kexec --type zImage -p <dump-capture-kernel-bzImage> \
+   --initrd=<initrd-for-dump-capture-kernel> \
+   --dtb=<dtb-for-dump-capture-kernel> \
+   --append="root=<root-dev> <arch-specific-options>"
+
+If you are using an uncompressed Image, then use following command
+to load dump-capture kernel::
+
+   kexec -p <dump-capture-kernel-Image> \
+   --initrd=<initrd-for-dump-capture-kernel> \
+   --append="root=<root-dev> <arch-specific-options>"
+
+Please note, that --args-linux does not need to be specified for ia64.
+It is planned to make this a no-op on that architecture, but for now
+it should be omitted
+
+Following are the arch specific command line options to be used while
+loading dump-capture kernel.
+
+For i386, x86_64 and ia64:
+
+	"1 irqpoll maxcpus=1 reset_devices"
+
+For ppc64:
+
+	"1 maxcpus=1 noirqdistrib reset_devices"
+
+For s390x:
+
+	"1 maxcpus=1 cgroup_disable=memory"
+
+For arm:
+
+	"1 maxcpus=1 reset_devices"
+
+For arm64:
+
+	"1 maxcpus=1 reset_devices"
+
+Notes on loading the dump-capture kernel:
+
+* By default, the ELF headers are stored in ELF64 format to support
+  systems with more than 4GB memory. On i386, kexec automatically checks if
+  the physical RAM size exceeds the 4 GB limit and if not, uses ELF32.
+  So, on non-PAE systems, ELF32 is always used.
+
+  The --elf32-core-headers option can be used to force the generation of ELF32
+  headers. This is necessary because GDB currently cannot open vmcore files
+  with ELF64 headers on 32-bit systems.
+
+* The "irqpoll" boot parameter reduces driver initialization failures
+  due to shared interrupts in the dump-capture kernel.
+
+* You must specify <root-dev> in the format corresponding to the root
+  device name in the output of mount command.
+
+* Boot parameter "1" boots the dump-capture kernel into single-user
+  mode without networking. If you want networking, use "3".
+
+* We generally don't have to bring up a SMP kernel just to capture the
+  dump. Hence generally it is useful either to build a UP dump-capture
+  kernel or specify maxcpus=1 option while loading dump-capture kernel.
+  Note, though maxcpus always works, you had better replace it with
+  nr_cpus to save memory if supported by the current ARCH, such as x86.
+
+* You should enable multi-cpu support in dump-capture kernel if you intend
+  to use multi-thread programs with it, such as parallel dump feature of
+  makedumpfile. Otherwise, the multi-thread program may have a great
+  performance degradation. To enable multi-cpu support, you should bring up an
+  SMP dump-capture kernel and specify maxcpus/nr_cpus, disable_cpu_apicid=[X]
+  options while loading it.
+
+* For s390x there are two kdump modes: If a ELF header is specified with
+  the elfcorehdr= kernel parameter, it is used by the kdump kernel as it
+  is done on all other architectures. If no elfcorehdr= kernel parameter is
+  specified, the s390x kdump kernel dynamically creates the header. The
+  second mode has the advantage that for CPU and memory hotplug, kdump has
+  not to be reloaded with kexec_load().
+
+* For s390x systems with many attached devices the "cio_ignore" kernel
+  parameter should be used for the kdump kernel in order to prevent allocation
+  of kernel memory for devices that are not relevant for kdump. The same
+  applies to systems that use SCSI/FCP devices. In that case the
+  "allow_lun_scan" zfcp module parameter should be set to zero before
+  setting FCP devices online.
+
+Kernel Panic
+============
+
+After successfully loading the dump-capture kernel as previously
+described, the system will reboot into the dump-capture kernel if a
+system crash is triggered.  Trigger points are located in panic(),
+die(), die_nmi() and in the sysrq handler (ALT-SysRq-c).
+
+The following conditions will execute a crash trigger point:
+
+If a hard lockup is detected and "NMI watchdog" is configured, the system
+will boot into the dump-capture kernel ( die_nmi() ).
+
+If die() is called, and it happens to be a thread with pid 0 or 1, or die()
+is called inside interrupt context or die() is called and panic_on_oops is set,
+the system will boot into the dump-capture kernel.
+
+On powerpc systems when a soft-reset is generated, die() is called by all cpus
+and the system will boot into the dump-capture kernel.
+
+For testing purposes, you can trigger a crash by using "ALT-SysRq-c",
+"echo c > /proc/sysrq-trigger" or write a module to force the panic.
+
+Write Out the Dump File
+=======================
+
+After the dump-capture kernel is booted, write out the dump file with
+the following command::
+
+   cp /proc/vmcore <dump-file>
+
+
+Analysis
+========
+
+Before analyzing the dump image, you should reboot into a stable kernel.
+
+You can do limited analysis using GDB on the dump file copied out of
+/proc/vmcore. Use the debug vmlinux built with -g and run the following
+command::
+
+   gdb vmlinux <dump-file>
+
+Stack trace for the task on processor 0, register display, and memory
+display work fine.
+
+Note: GDB cannot analyze core files generated in ELF64 format for x86.
+On systems with a maximum of 4GB of memory, you can generate
+ELF32-format headers using the --elf32-core-headers kernel option on the
+dump kernel.
+
+You can also use the Crash utility to analyze dump files in Kdump
+format. Crash is available on Dave Anderson's site at the following URL:
+
+   http://people.redhat.com/~anderson/
+
+Trigger Kdump on WARN()
+=======================
+
+The kernel parameter, panic_on_warn, calls panic() in all WARN() paths.  This
+will cause a kdump to occur at the panic() call.  In cases where a user wants
+to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1
+to achieve the same behaviour.
+
+Contact
+=======
+
+- Vivek Goyal (vgoyal@redhat.com)
+- Maneesh Soni (maneesh@in.ibm.com)
+
+GDB macros
+==========
+
+.. include:: gdbmacros.txt
+   :literal:
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
deleted file mode 100644
index 3162eeb8c262..000000000000
--- a/Documentation/kdump/kdump.txt
+++ /dev/null
@@ -1,509 +0,0 @@
-================================================================
-Documentation for Kdump - The kexec-based Crash Dumping Solution
-================================================================
-
-This document includes overview, setup and installation, and analysis
-information.
-
-Overview
-========
-
-Kdump uses kexec to quickly boot to a dump-capture kernel whenever a
-dump of the system kernel's memory needs to be taken (for example, when
-the system panics). The system kernel's memory image is preserved across
-the reboot and is accessible to the dump-capture kernel.
-
-You can use common commands, such as cp and scp, to copy the
-memory image to a dump file on the local disk, or across the network to
-a remote system.
-
-Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
-s390x, arm and arm64 architectures.
-
-When the system kernel boots, it reserves a small section of memory for
-the dump-capture kernel. This ensures that ongoing Direct Memory Access
-(DMA) from the system kernel does not corrupt the dump-capture kernel.
-The kexec -p command loads the dump-capture kernel into this reserved
-memory.
-
-On x86 machines, the first 640 KB of physical memory is needed to boot,
-regardless of where the kernel loads. Therefore, kexec backs up this
-region just before rebooting into the dump-capture kernel.
-
-Similarly on PPC64 machines first 32KB of physical memory is needed for
-booting regardless of where the kernel is loaded and to support 64K page
-size kexec backs up the first 64KB memory.
-
-For s390x, when kdump is triggered, the crashkernel region is exchanged
-with the region [0, crashkernel region size] and then the kdump kernel
-runs in [0, crashkernel region size]. Therefore no relocatable kernel is
-needed for s390x.
-
-All of the necessary information about the system kernel's core image is
-encoded in the ELF format, and stored in a reserved area of memory
-before a crash. The physical address of the start of the ELF header is
-passed to the dump-capture kernel through the elfcorehdr= boot
-parameter. Optionally the size of the ELF header can also be passed
-when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax.
-
-
-With the dump-capture kernel, you can access the memory image through
-/proc/vmcore. This exports the dump as an ELF-format file that you can
-write out using file copy commands such as cp or scp. Further, you can
-use analysis tools such as the GNU Debugger (GDB) and the Crash tool to
-debug the dump file. This method ensures that the dump pages are correctly
-ordered.
-
-
-Setup and Installation
-======================
-
-Install kexec-tools
--------------------
-
-1) Login as the root user.
-
-2) Download the kexec-tools user-space package from the following URL:
-
-http://kernel.org/pub/linux/utils/kernel/kexec/kexec-tools.tar.gz
-
-This is a symlink to the latest version.
-
-The latest kexec-tools git tree is available at:
-
-git://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git
-and
-http://www.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git
-
-There is also a gitweb interface available at
-http://www.kernel.org/git/?p=utils/kernel/kexec/kexec-tools.git
-
-More information about kexec-tools can be found at
-http://horms.net/projects/kexec/
-
-3) Unpack the tarball with the tar command, as follows:
-
-   tar xvpzf kexec-tools.tar.gz
-
-4) Change to the kexec-tools directory, as follows:
-
-   cd kexec-tools-VERSION
-
-5) Configure the package, as follows:
-
-   ./configure
-
-6) Compile the package, as follows:
-
-   make
-
-7) Install the package, as follows:
-
-   make install
-
-
-Build the system and dump-capture kernels
------------------------------------------
-There are two possible methods of using Kdump.
-
-1) Build a separate custom dump-capture kernel for capturing the
-   kernel core dump.
-
-2) Or use the system kernel binary itself as dump-capture kernel and there is
-   no need to build a separate dump-capture kernel. This is possible
-   only with the architectures which support a relocatable kernel. As
-   of today, i386, x86_64, ppc64, ia64, arm and arm64 architectures support
-   relocatable kernel.
-
-Building a relocatable kernel is advantageous from the point of view that
-one does not have to build a second kernel for capturing the dump. But
-at the same time one might want to build a custom dump capture kernel
-suitable to his needs.
-
-Following are the configuration setting required for system and
-dump-capture kernels for enabling kdump support.
-
-System kernel config options
-----------------------------
-
-1) Enable "kexec system call" in "Processor type and features."
-
-   CONFIG_KEXEC=y
-
-2) Enable "sysfs file system support" in "Filesystem" -> "Pseudo
-   filesystems." This is usually enabled by default.
-
-   CONFIG_SYSFS=y
-
-   Note that "sysfs file system support" might not appear in the "Pseudo
-   filesystems" menu if "Configure standard kernel features (for small
-   systems)" is not enabled in "General Setup." In this case, check the
-   .config file itself to ensure that sysfs is turned on, as follows:
-
-   grep 'CONFIG_SYSFS' .config
-
-3) Enable "Compile the kernel with debug info" in "Kernel hacking."
-
-   CONFIG_DEBUG_INFO=Y
-
-   This causes the kernel to be built with debug symbols. The dump
-   analysis tools require a vmlinux with debug symbols in order to read
-   and analyze a dump file.
-
-Dump-capture kernel config options (Arch Independent)
------------------------------------------------------
-
-1) Enable "kernel crash dumps" support under "Processor type and
-   features":
-
-   CONFIG_CRASH_DUMP=y
-
-2) Enable "/proc/vmcore support" under "Filesystems" -> "Pseudo filesystems".
-
-   CONFIG_PROC_VMCORE=y
-   (CONFIG_PROC_VMCORE is set by default when CONFIG_CRASH_DUMP is selected.)
-
-Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
---------------------------------------------------------------------
-
-1) On i386, enable high memory support under "Processor type and
-   features":
-
-   CONFIG_HIGHMEM64G=y
-   or
-   CONFIG_HIGHMEM4G
-
-2) On i386 and x86_64, disable symmetric multi-processing support
-   under "Processor type and features":
-
-   CONFIG_SMP=n
-
-   (If CONFIG_SMP=y, then specify maxcpus=1 on the kernel command line
-   when loading the dump-capture kernel, see section "Load the Dump-capture
-   Kernel".)
-
-3) If one wants to build and use a relocatable kernel,
-   Enable "Build a relocatable kernel" support under "Processor type and
-   features"
-
-   CONFIG_RELOCATABLE=y
-
-4) Use a suitable value for "Physical address where the kernel is
-   loaded" (under "Processor type and features"). This only appears when
-   "kernel crash dumps" is enabled. A suitable value depends upon
-   whether kernel is relocatable or not.
-
-   If you are using a relocatable kernel use CONFIG_PHYSICAL_START=0x100000
-   This will compile the kernel for physical address 1MB, but given the fact
-   kernel is relocatable, it can be run from any physical address hence
-   kexec boot loader will load it in memory region reserved for dump-capture
-   kernel.
-
-   Otherwise it should be the start of memory region reserved for
-   second kernel using boot parameter "crashkernel=Y@X". Here X is
-   start of memory region reserved for dump-capture kernel.
-   Generally X is 16MB (0x1000000). So you can set
-   CONFIG_PHYSICAL_START=0x1000000
-
-5) Make and install the kernel and its modules. DO NOT add this kernel
-   to the boot loader configuration files.
-
-Dump-capture kernel config options (Arch Dependent, ppc64)
-----------------------------------------------------------
-
-1) Enable "Build a kdump crash kernel" support under "Kernel" options:
-
-   CONFIG_CRASH_DUMP=y
-
-2)   Enable "Build a relocatable kernel" support
-
-   CONFIG_RELOCATABLE=y
-
-   Make and install the kernel and its modules.
-
-Dump-capture kernel config options (Arch Dependent, ia64)
-----------------------------------------------------------
-
-- No specific options are required to create a dump-capture kernel
-  for ia64, other than those specified in the arch independent section
-  above. This means that it is possible to use the system kernel
-  as a dump-capture kernel if desired.
-
-  The crashkernel region can be automatically placed by the system
-  kernel at run time. This is done by specifying the base address as 0,
-  or omitting it all together.
-
-  crashkernel=256M@0
-  or
-  crashkernel=256M
-
-  If the start address is specified, note that the start address of the
-  kernel will be aligned to 64Mb, so if the start address is not then
-  any space below the alignment point will be wasted.
-
-Dump-capture kernel config options (Arch Dependent, arm)
-----------------------------------------------------------
-
--   To use a relocatable kernel,
-    Enable "AUTO_ZRELADDR" support under "Boot" options:
-
-    AUTO_ZRELADDR=y
-
-Dump-capture kernel config options (Arch Dependent, arm64)
-----------------------------------------------------------
-
-- Please note that kvm of the dump-capture kernel will not be enabled
-  on non-VHE systems even if it is configured. This is because the CPU
-  will not be reset to EL2 on panic.
-
-Extended crashkernel syntax
-===========================
-
-While the "crashkernel=size[@offset]" syntax is sufficient for most
-configurations, sometimes it's handy to have the reserved memory dependent
-on the value of System RAM -- that's mostly for distributors that pre-setup
-the kernel command line to avoid a unbootable system after some memory has
-been removed from the machine.
-
-The syntax is:
-
-    crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
-    range=start-[end]
-
-For example:
-
-    crashkernel=512M-2G:64M,2G-:128M
-
-This would mean:
-
-    1) if the RAM is smaller than 512M, then don't reserve anything
-       (this is the "rescue" case)
-    2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M
-    3) if the RAM size is larger than 2G, then reserve 128M
-
-
-
-Boot into System Kernel
-=======================
-
-1) Update the boot loader (such as grub, yaboot, or lilo) configuration
-   files as necessary.
-
-2) Boot the system kernel with the boot parameter "crashkernel=Y@X",
-   where Y specifies how much memory to reserve for the dump-capture kernel
-   and X specifies the beginning of this reserved memory. For example,
-   "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory
-   starting at physical address 0x01000000 (16MB) for the dump-capture kernel.
-
-   On x86 and x86_64, use "crashkernel=64M@16M".
-
-   On ppc64, use "crashkernel=128M@32M".
-
-   On ia64, 256M@256M is a generous value that typically works.
-   The region may be automatically placed on ia64, see the
-   dump-capture kernel config option notes above.
-   If use sparse memory, the size should be rounded to GRANULE boundaries.
-
-   On s390x, typically use "crashkernel=xxM". The value of xx is dependent
-   on the memory consumption of the kdump system. In general this is not
-   dependent on the memory size of the production system.
-
-   On arm, the use of "crashkernel=Y@X" is no longer necessary; the
-   kernel will automatically locate the crash kernel image within the
-   first 512MB of RAM if X is not given.
-
-   On arm64, use "crashkernel=Y[@X]".  Note that the start address of
-   the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
-
-Load the Dump-capture Kernel
-============================
-
-After booting to the system kernel, dump-capture kernel needs to be
-loaded.
-
-Based on the architecture and type of image (relocatable or not), one
-can choose to load the uncompressed vmlinux or compressed bzImage/vmlinuz
-of dump-capture kernel. Following is the summary.
-
-For i386 and x86_64:
-	- Use vmlinux if kernel is not relocatable.
-	- Use bzImage/vmlinuz if kernel is relocatable.
-For ppc64:
-	- Use vmlinux
-For ia64:
-	- Use vmlinux or vmlinuz.gz
-For s390x:
-	- Use image or bzImage
-For arm:
-	- Use zImage
-For arm64:
-	- Use vmlinux or Image
-
-If you are using an uncompressed vmlinux image then use following command
-to load dump-capture kernel.
-
-   kexec -p <dump-capture-kernel-vmlinux-image> \
-   --initrd=<initrd-for-dump-capture-kernel> --args-linux \
-   --append="root=<root-dev> <arch-specific-options>"
-
-If you are using a compressed bzImage/vmlinuz, then use following command
-to load dump-capture kernel.
-
-   kexec -p <dump-capture-kernel-bzImage> \
-   --initrd=<initrd-for-dump-capture-kernel> \
-   --append="root=<root-dev> <arch-specific-options>"
-
-If you are using a compressed zImage, then use following command
-to load dump-capture kernel.
-
-   kexec --type zImage -p <dump-capture-kernel-bzImage> \
-   --initrd=<initrd-for-dump-capture-kernel> \
-   --dtb=<dtb-for-dump-capture-kernel> \
-   --append="root=<root-dev> <arch-specific-options>"
-
-If you are using an uncompressed Image, then use following command
-to load dump-capture kernel.
-
-   kexec -p <dump-capture-kernel-Image> \
-   --initrd=<initrd-for-dump-capture-kernel> \
-   --append="root=<root-dev> <arch-specific-options>"
-
-Please note, that --args-linux does not need to be specified for ia64.
-It is planned to make this a no-op on that architecture, but for now
-it should be omitted
-
-Following are the arch specific command line options to be used while
-loading dump-capture kernel.
-
-For i386, x86_64 and ia64:
-	"1 irqpoll maxcpus=1 reset_devices"
-
-For ppc64:
-	"1 maxcpus=1 noirqdistrib reset_devices"
-
-For s390x:
-	"1 maxcpus=1 cgroup_disable=memory"
-
-For arm:
-	"1 maxcpus=1 reset_devices"
-
-For arm64:
-	"1 maxcpus=1 reset_devices"
-
-Notes on loading the dump-capture kernel:
-
-* By default, the ELF headers are stored in ELF64 format to support
-  systems with more than 4GB memory. On i386, kexec automatically checks if
-  the physical RAM size exceeds the 4 GB limit and if not, uses ELF32.
-  So, on non-PAE systems, ELF32 is always used.
-
-  The --elf32-core-headers option can be used to force the generation of ELF32
-  headers. This is necessary because GDB currently cannot open vmcore files
-  with ELF64 headers on 32-bit systems.
-
-* The "irqpoll" boot parameter reduces driver initialization failures
-  due to shared interrupts in the dump-capture kernel.
-
-* You must specify <root-dev> in the format corresponding to the root
-  device name in the output of mount command.
-
-* Boot parameter "1" boots the dump-capture kernel into single-user
-  mode without networking. If you want networking, use "3".
-
-* We generally don't have to bring up a SMP kernel just to capture the
-  dump. Hence generally it is useful either to build a UP dump-capture
-  kernel or specify maxcpus=1 option while loading dump-capture kernel.
-  Note, though maxcpus always works, you had better replace it with
-  nr_cpus to save memory if supported by the current ARCH, such as x86.
-
-* You should enable multi-cpu support in dump-capture kernel if you intend
-  to use multi-thread programs with it, such as parallel dump feature of
-  makedumpfile. Otherwise, the multi-thread program may have a great
-  performance degradation. To enable multi-cpu support, you should bring up an
-  SMP dump-capture kernel and specify maxcpus/nr_cpus, disable_cpu_apicid=[X]
-  options while loading it.
-
-* For s390x there are two kdump modes: If a ELF header is specified with
-  the elfcorehdr= kernel parameter, it is used by the kdump kernel as it
-  is done on all other architectures. If no elfcorehdr= kernel parameter is
-  specified, the s390x kdump kernel dynamically creates the header. The
-  second mode has the advantage that for CPU and memory hotplug, kdump has
-  not to be reloaded with kexec_load().
-
-* For s390x systems with many attached devices the "cio_ignore" kernel
-  parameter should be used for the kdump kernel in order to prevent allocation
-  of kernel memory for devices that are not relevant for kdump. The same
-  applies to systems that use SCSI/FCP devices. In that case the
-  "allow_lun_scan" zfcp module parameter should be set to zero before
-  setting FCP devices online.
-
-Kernel Panic
-============
-
-After successfully loading the dump-capture kernel as previously
-described, the system will reboot into the dump-capture kernel if a
-system crash is triggered.  Trigger points are located in panic(),
-die(), die_nmi() and in the sysrq handler (ALT-SysRq-c).
-
-The following conditions will execute a crash trigger point:
-
-If a hard lockup is detected and "NMI watchdog" is configured, the system
-will boot into the dump-capture kernel ( die_nmi() ).
-
-If die() is called, and it happens to be a thread with pid 0 or 1, or die()
-is called inside interrupt context or die() is called and panic_on_oops is set,
-the system will boot into the dump-capture kernel.
-
-On powerpc systems when a soft-reset is generated, die() is called by all cpus
-and the system will boot into the dump-capture kernel.
-
-For testing purposes, you can trigger a crash by using "ALT-SysRq-c",
-"echo c > /proc/sysrq-trigger" or write a module to force the panic.
-
-Write Out the Dump File
-=======================
-
-After the dump-capture kernel is booted, write out the dump file with
-the following command:
-
-   cp /proc/vmcore <dump-file>
-
-
-Analysis
-========
-
-Before analyzing the dump image, you should reboot into a stable kernel.
-
-You can do limited analysis using GDB on the dump file copied out of
-/proc/vmcore. Use the debug vmlinux built with -g and run the following
-command:
-
-   gdb vmlinux <dump-file>
-
-Stack trace for the task on processor 0, register display, and memory
-display work fine.
-
-Note: GDB cannot analyze core files generated in ELF64 format for x86.
-On systems with a maximum of 4GB of memory, you can generate
-ELF32-format headers using the --elf32-core-headers kernel option on the
-dump kernel.
-
-You can also use the Crash utility to analyze dump files in Kdump
-format. Crash is available on Dave Anderson's site at the following URL:
-
-   http://people.redhat.com/~anderson/
-
-Trigger Kdump on WARN()
-=======================
-
-The kernel parameter, panic_on_warn, calls panic() in all WARN() paths.  This
-will cause a kdump to occur at the panic() call.  In cases where a user wants
-to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1
-to achieve the same behaviour.
-
-Contact
-=======
-
-Vivek Goyal (vgoyal@redhat.com)
-Maneesh Soni (maneesh@in.ibm.com)
-
diff --git a/Documentation/kdump/vmcoreinfo.rst b/Documentation/kdump/vmcoreinfo.rst
new file mode 100644
index 000000000000..007a6b86e0ee
--- /dev/null
+++ b/Documentation/kdump/vmcoreinfo.rst
@@ -0,0 +1,488 @@
+==========
+VMCOREINFO
+==========
+
+What is it?
+===========
+
+VMCOREINFO is a special ELF note section. It contains various
+information from the kernel like structure size, page size, symbol
+values, field offsets, etc. These data are packed into an ELF note
+section and used by user-space tools like crash and makedumpfile to
+analyze a kernel's memory layout.
+
+Common variables
+================
+
+init_uts_ns.name.release
+------------------------
+
+The version of the Linux kernel. Used to find the corresponding source
+code from which the kernel has been built. For example, crash uses it to
+find the corresponding vmlinux in order to process vmcore.
+
+PAGE_SIZE
+---------
+
+The size of a page. It is the smallest unit of data used by the memory
+management facilities. It is usually 4096 bytes of size and a page is
+aligned on 4096 bytes. Used for computing page addresses.
+
+init_uts_ns
+-----------
+
+The UTS namespace which is used to isolate two specific elements of the
+system that relate to the uname(2) system call. It is named after the
+data structure used to store information returned by the uname(2) system
+call.
+
+User-space tools can get the kernel name, host name, kernel release
+number, kernel version, architecture name and OS type from it.
+
+node_online_map
+---------------
+
+An array node_states[N_ONLINE] which represents the set of online nodes
+in a system, one bit position per node number. Used to keep track of
+which nodes are in the system and online.
+
+swapper_pg_dir
+--------------
+
+The global page directory pointer of the kernel. Used to translate
+virtual to physical addresses.
+
+_stext
+------
+
+Defines the beginning of the text section. In general, _stext indicates
+the kernel start address. Used to convert a virtual address from the
+direct kernel map to a physical address.
+
+vmap_area_list
+--------------
+
+Stores the virtual area list. makedumpfile gets the vmalloc start value
+from this variable and its value is necessary for vmalloc translation.
+
+mem_map
+-------
+
+Physical addresses are translated to struct pages by treating them as
+an index into the mem_map array. Right-shifting a physical address
+PAGE_SHIFT bits converts it into a page frame number which is an index
+into that mem_map array.
+
+Used to map an address to the corresponding struct page.
+
+contig_page_data
+----------------
+
+Makedumpfile gets the pglist_data structure from this symbol, which is
+used to describe the memory layout.
+
+User-space tools use this to exclude free pages when dumping memory.
+
+mem_section|(mem_section, NR_SECTION_ROOTS)|(mem_section, section_mem_map)
+--------------------------------------------------------------------------
+
+The address of the mem_section array, its length, structure size, and
+the section_mem_map offset.
+
+It exists in the sparse memory mapping model, and it is also somewhat
+similar to the mem_map variable, both of them are used to translate an
+address.
+
+page
+----
+
+The size of a page structure. struct page is an important data structure
+and it is widely used to compute contiguous memory.
+
+pglist_data
+-----------
+
+The size of a pglist_data structure. This value is used to check if the
+pglist_data structure is valid. It is also used for checking the memory
+type.
+
+zone
+----
+
+The size of a zone structure. This value is used to check if the zone
+structure has been found. It is also used for excluding free pages.
+
+free_area
+---------
+
+The size of a free_area structure. It indicates whether the free_area
+structure is valid or not. Useful when excluding free pages.
+
+list_head
+---------
+
+The size of a list_head structure. Used when iterating lists in a
+post-mortem analysis session.
+
+nodemask_t
+----------
+
+The size of a nodemask_t type. Used to compute the number of online
+nodes.
+
+(page, flags|_refcount|mapping|lru|_mapcount|private|compound_dtor|compound_order|compound_head)
+-------------------------------------------------------------------------------------------------
+
+User-space tools compute their values based on the offset of these
+variables. The variables are used when excluding unnecessary pages.
+
+(pglist_data, node_zones|nr_zones|node_mem_map|node_start_pfn|node_spanned_pages|node_id)
+-----------------------------------------------------------------------------------------
+
+On NUMA machines, each NUMA node has a pg_data_t to describe its memory
+layout. On UMA machines there is a single pglist_data which describes the
+whole memory.
+
+These values are used to check the memory type and to compute the
+virtual address for memory map.
+
+(zone, free_area|vm_stat|spanned_pages)
+---------------------------------------
+
+Each node is divided into a number of blocks called zones which
+represent ranges within memory. A zone is described by a structure zone.
+
+User-space tools compute required values based on the offset of these
+variables.
+
+(free_area, free_list)
+----------------------
+
+Offset of the free_list's member. This value is used to compute the number
+of free pages.
+
+Each zone has a free_area structure array called free_area[MAX_ORDER].
+The free_list represents a linked list of free page blocks.
+
+(list_head, next|prev)
+----------------------
+
+Offsets of the list_head's members. list_head is used to define a
+circular linked list. User-space tools need these in order to traverse
+lists.
+
+(vmap_area, va_start|list)
+--------------------------
+
+Offsets of the vmap_area's members. They carry vmalloc-specific
+information. Makedumpfile gets the start address of the vmalloc region
+from this.
+
+(zone.free_area, MAX_ORDER)
+---------------------------
+
+Free areas descriptor. User-space tools use this value to iterate the
+free_area ranges. MAX_ORDER is used by the zone buddy allocator.
+
+log_first_idx
+-------------
+
+Index of the first record stored in the buffer log_buf. Used by
+user-space tools to read the strings in the log_buf.
+
+log_buf
+-------
+
+Console output is written to the ring buffer log_buf at index
+log_first_idx. Used to get the kernel log.
+
+log_buf_len
+-----------
+
+log_buf's length.
+
+clear_idx
+---------
+
+The index that the next printk() record to read after the last clear
+command. It indicates the first record after the last SYSLOG_ACTION
+_CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump
+the dmesg log.
+
+log_next_idx
+------------
+
+The index of the next record to store in the buffer log_buf. Used to
+compute the index of the current buffer position.
+
+printk_log
+----------
+
+The size of a structure printk_log. Used to compute the size of
+messages, and extract dmesg log. It encapsulates header information for
+log_buf, such as timestamp, syslog level, etc.
+
+(printk_log, ts_nsec|len|text_len|dict_len)
+-------------------------------------------
+
+It represents field offsets in struct printk_log. User space tools
+parse it and check whether the values of printk_log's members have been
+changed.
+
+(free_area.free_list, MIGRATE_TYPES)
+------------------------------------
+
+The number of migrate types for pages. The free_list is described by the
+array. Used by tools to compute the number of free pages.
+
+NR_FREE_PAGES
+-------------
+
+On linux-2.6.21 or later, the number of free pages is in
+vm_stat[NR_FREE_PAGES]. Used to get the number of free pages.
+
+PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_slab|PG_hwpoision|PG_head_mask
+------------------------------------------------------------------------------
+
+Page attributes. These flags are used to filter various unnecessary for
+dumping pages.
+
+PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline)
+-----------------------------------------------------------------------------
+
+More page attributes. These flags are used to filter various unnecessary for
+dumping pages.
+
+
+HUGETLB_PAGE_DTOR
+-----------------
+
+The HUGETLB_PAGE_DTOR flag denotes hugetlbfs pages. Makedumpfile
+excludes these pages.
+
+x86_64
+======
+
+phys_base
+---------
+
+Used to convert the virtual address of an exported kernel symbol to its
+corresponding physical address.
+
+init_top_pgt
+------------
+
+Used to walk through the whole page table and convert virtual addresses
+to physical addresses. The init_top_pgt is somewhat similar to
+swapper_pg_dir, but it is only used in x86_64.
+
+pgtable_l5_enabled
+------------------
+
+User-space tools need to know whether the crash kernel was in 5-level
+paging mode.
+
+node_data
+---------
+
+This is a struct pglist_data array and stores all NUMA nodes
+information. Makedumpfile gets the pglist_data structure from it.
+
+(node_data, MAX_NUMNODES)
+-------------------------
+
+The maximum number of nodes in system.
+
+KERNELOFFSET
+------------
+
+The kernel randomization offset. Used to compute the page offset. If
+KASLR is disabled, this value is zero.
+
+KERNEL_IMAGE_SIZE
+-----------------
+
+Currently unused by Makedumpfile. Used to compute the module virtual
+address by Crash.
+
+sme_mask
+--------
+
+AMD-specific with SME support: it indicates the secure memory encryption
+mask. Makedumpfile tools need to know whether the crash kernel was
+encrypted. If SME is enabled in the first kernel, the crash kernel's
+page table entries (pgd/pud/pmd/pte) contain the memory encryption
+mask. This is used to remove the SME mask and obtain the true physical
+address.
+
+Currently, sme_mask stores the value of the C-bit position. If needed,
+additional SME-relevant info can be placed in that variable.
+
+For example::
+
+  [ misc	        ][ enc bit  ][ other misc SME info       ]
+  0000_0000_0000_0000_1000_0000_0000_0000_0000_0000_..._0000
+  63   59   55   51   47   43   39   35   31   27   ... 3
+
+x86_32
+======
+
+X86_PAE
+-------
+
+Denotes whether physical address extensions are enabled. It has the cost
+of a higher page table lookup overhead, and also consumes more page
+table space per process. Used to check whether PAE was enabled in the
+crash kernel when converting virtual addresses to physical addresses.
+
+ia64
+====
+
+pgdat_list|(pgdat_list, MAX_NUMNODES)
+-------------------------------------
+
+pg_data_t array storing all NUMA nodes information. MAX_NUMNODES
+indicates the number of the nodes.
+
+node_memblk|(node_memblk, NR_NODE_MEMBLKS)
+------------------------------------------
+
+List of node memory chunks. Filled when parsing the SRAT table to obtain
+information about memory nodes. NR_NODE_MEMBLKS indicates the number of
+node memory chunks.
+
+These values are used to compute the number of nodes the crashed kernel used.
+
+node_memblk_s|(node_memblk_s, start_paddr)|(node_memblk_s, size)
+----------------------------------------------------------------
+
+The size of a struct node_memblk_s and the offsets of the
+node_memblk_s's members. Used to compute the number of nodes.
+
+PGTABLE_3|PGTABLE_4
+-------------------
+
+User-space tools need to know whether the crash kernel was in 3-level or
+4-level paging mode. Used to distinguish the page table.
+
+ARM64
+=====
+
+VA_BITS
+-------
+
+The maximum number of bits for virtual addresses. Used to compute the
+virtual memory ranges.
+
+kimage_voffset
+--------------
+
+The offset between the kernel virtual and physical mappings. Used to
+translate virtual to physical addresses.
+
+PHYS_OFFSET
+-----------
+
+Indicates the physical address of the start of memory. Similar to
+kimage_voffset, which is used to translate virtual to physical
+addresses.
+
+KERNELOFFSET
+------------
+
+The kernel randomization offset. Used to compute the page offset. If
+KASLR is disabled, this value is zero.
+
+arm
+===
+
+ARM_LPAE
+--------
+
+It indicates whether the crash kernel supports large physical address
+extensions. Used to translate virtual to physical addresses.
+
+s390
+====
+
+lowcore_ptr
+-----------
+
+An array with a pointer to the lowcore of every CPU. Used to print the
+psw and all registers information.
+
+high_memory
+-----------
+
+Used to get the vmalloc_start address from the high_memory symbol.
+
+(lowcore_ptr, NR_CPUS)
+----------------------
+
+The maximum number of CPUs.
+
+powerpc
+=======
+
+
+node_data|(node_data, MAX_NUMNODES)
+-----------------------------------
+
+See above.
+
+contig_page_data
+----------------
+
+See above.
+
+vmemmap_list
+------------
+
+The vmemmap_list maintains the entire vmemmap physical mapping. Used
+to get vmemmap list count and populated vmemmap regions info. If the
+vmemmap address translation information is stored in the crash kernel,
+it is used to translate vmemmap kernel virtual addresses.
+
+mmu_vmemmap_psize
+-----------------
+
+The size of a page. Used to translate virtual to physical addresses.
+
+mmu_psize_defs
+--------------
+
+Page size definitions, i.e. 4k, 64k, or 16M.
+
+Used to make vtop translations.
+
+vmemmap_backing|(vmemmap_backing, list)|(vmemmap_backing, phys)|(vmemmap_backing, virt_addr)
+--------------------------------------------------------------------------------------------
+
+The vmemmap virtual address space management does not have a traditional
+page table to track which virtual struct pages are backed by a physical
+mapping. The virtual to physical mappings are tracked in a simple linked
+list format.
+
+User-space tools need to know the offset of list, phys and virt_addr
+when computing the count of vmemmap regions.
+
+mmu_psize_def|(mmu_psize_def, shift)
+------------------------------------
+
+The size of a struct mmu_psize_def and the offset of mmu_psize_def's
+member.
+
+Used in vtop translations.
+
+sh
+==
+
+node_data|(node_data, MAX_NUMNODES)
+-----------------------------------
+
+See above.
+
+X2TLB
+-----
+
+Indicates whether the crashed kernel enabled SH extended mode.
diff --git a/Documentation/kdump/vmcoreinfo.txt b/Documentation/kdump/vmcoreinfo.txt
deleted file mode 100644
index bb94a4bd597a..000000000000
--- a/Documentation/kdump/vmcoreinfo.txt
+++ /dev/null
@@ -1,495 +0,0 @@
-================================================================
-			VMCOREINFO
-================================================================
-
-===========
-What is it?
-===========
-
-VMCOREINFO is a special ELF note section. It contains various
-information from the kernel like structure size, page size, symbol
-values, field offsets, etc. These data are packed into an ELF note
-section and used by user-space tools like crash and makedumpfile to
-analyze a kernel's memory layout.
-
-================
-Common variables
-================
-
-init_uts_ns.name.release
-------------------------
-
-The version of the Linux kernel. Used to find the corresponding source
-code from which the kernel has been built. For example, crash uses it to
-find the corresponding vmlinux in order to process vmcore.
-
-PAGE_SIZE
----------
-
-The size of a page. It is the smallest unit of data used by the memory
-management facilities. It is usually 4096 bytes of size and a page is
-aligned on 4096 bytes. Used for computing page addresses.
-
-init_uts_ns
------------
-
-The UTS namespace which is used to isolate two specific elements of the
-system that relate to the uname(2) system call. It is named after the
-data structure used to store information returned by the uname(2) system
-call.
-
-User-space tools can get the kernel name, host name, kernel release
-number, kernel version, architecture name and OS type from it.
-
-node_online_map
----------------
-
-An array node_states[N_ONLINE] which represents the set of online nodes
-in a system, one bit position per node number. Used to keep track of
-which nodes are in the system and online.
-
-swapper_pg_dir
--------------
-
-The global page directory pointer of the kernel. Used to translate
-virtual to physical addresses.
-
-_stext
-------
-
-Defines the beginning of the text section. In general, _stext indicates
-the kernel start address. Used to convert a virtual address from the
-direct kernel map to a physical address.
-
-vmap_area_list
---------------
-
-Stores the virtual area list. makedumpfile gets the vmalloc start value
-from this variable and its value is necessary for vmalloc translation.
-
-mem_map
--------
-
-Physical addresses are translated to struct pages by treating them as
-an index into the mem_map array. Right-shifting a physical address
-PAGE_SHIFT bits converts it into a page frame number which is an index
-into that mem_map array.
-
-Used to map an address to the corresponding struct page.
-
-contig_page_data
-----------------
-
-Makedumpfile gets the pglist_data structure from this symbol, which is
-used to describe the memory layout.
-
-User-space tools use this to exclude free pages when dumping memory.
-
-mem_section|(mem_section, NR_SECTION_ROOTS)|(mem_section, section_mem_map)
---------------------------------------------------------------------------
-
-The address of the mem_section array, its length, structure size, and
-the section_mem_map offset.
-
-It exists in the sparse memory mapping model, and it is also somewhat
-similar to the mem_map variable, both of them are used to translate an
-address.
-
-page
-----
-
-The size of a page structure. struct page is an important data structure
-and it is widely used to compute contiguous memory.
-
-pglist_data
------------
-
-The size of a pglist_data structure. This value is used to check if the
-pglist_data structure is valid. It is also used for checking the memory
-type.
-
-zone
-----
-
-The size of a zone structure. This value is used to check if the zone
-structure has been found. It is also used for excluding free pages.
-
-free_area
----------
-
-The size of a free_area structure. It indicates whether the free_area
-structure is valid or not. Useful when excluding free pages.
-
-list_head
----------
-
-The size of a list_head structure. Used when iterating lists in a
-post-mortem analysis session.
-
-nodemask_t
-----------
-
-The size of a nodemask_t type. Used to compute the number of online
-nodes.
-
-(page, flags|_refcount|mapping|lru|_mapcount|private|compound_dtor|
-       compound_order|compound_head)
--------------------------------------------------------------------
-
-User-space tools compute their values based on the offset of these
-variables. The variables are used when excluding unnecessary pages.
-
-(pglist_data, node_zones|nr_zones|node_mem_map|node_start_pfn|node_
-              spanned_pages|node_id)
--------------------------------------------------------------------
-
-On NUMA machines, each NUMA node has a pg_data_t to describe its memory
-layout. On UMA machines there is a single pglist_data which describes the
-whole memory.
-
-These values are used to check the memory type and to compute the
-virtual address for memory map.
-
-(zone, free_area|vm_stat|spanned_pages)
----------------------------------------
-
-Each node is divided into a number of blocks called zones which
-represent ranges within memory. A zone is described by a structure zone.
-
-User-space tools compute required values based on the offset of these
-variables.
-
-(free_area, free_list)
-----------------------
-
-Offset of the free_list's member. This value is used to compute the number
-of free pages.
-
-Each zone has a free_area structure array called free_area[MAX_ORDER].
-The free_list represents a linked list of free page blocks.
-
-(list_head, next|prev)
-----------------------
-
-Offsets of the list_head's members. list_head is used to define a
-circular linked list. User-space tools need these in order to traverse
-lists.
-
-(vmap_area, va_start|list)
---------------------------
-
-Offsets of the vmap_area's members. They carry vmalloc-specific
-information. Makedumpfile gets the start address of the vmalloc region
-from this.
-
-(zone.free_area, MAX_ORDER)
----------------------------
-
-Free areas descriptor. User-space tools use this value to iterate the
-free_area ranges. MAX_ORDER is used by the zone buddy allocator.
-
-log_first_idx
--------------
-
-Index of the first record stored in the buffer log_buf. Used by
-user-space tools to read the strings in the log_buf.
-
-log_buf
--------
-
-Console output is written to the ring buffer log_buf at index
-log_first_idx. Used to get the kernel log.
-
-log_buf_len
------------
-
-log_buf's length.
-
-clear_idx
----------
-
-The index that the next printk() record to read after the last clear
-command. It indicates the first record after the last SYSLOG_ACTION
-_CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump
-the dmesg log.
-
-log_next_idx
-------------
-
-The index of the next record to store in the buffer log_buf. Used to
-compute the index of the current buffer position.
-
-printk_log
-----------
-
-The size of a structure printk_log. Used to compute the size of
-messages, and extract dmesg log. It encapsulates header information for
-log_buf, such as timestamp, syslog level, etc.
-
-(printk_log, ts_nsec|len|text_len|dict_len)
--------------------------------------------
-
-It represents field offsets in struct printk_log. User space tools
-parse it and check whether the values of printk_log's members have been
-changed.
-
-(free_area.free_list, MIGRATE_TYPES)
-------------------------------------
-
-The number of migrate types for pages. The free_list is described by the
-array. Used by tools to compute the number of free pages.
-
-NR_FREE_PAGES
--------------
-
-On linux-2.6.21 or later, the number of free pages is in
-vm_stat[NR_FREE_PAGES]. Used to get the number of free pages.
-
-PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_slab|PG_hwpoision
-|PG_head_mask|PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)
-|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline)
------------------------------------------------------------------
-
-Page attributes. These flags are used to filter various unnecessary for
-dumping pages.
-
-HUGETLB_PAGE_DTOR
------------------
-
-The HUGETLB_PAGE_DTOR flag denotes hugetlbfs pages. Makedumpfile
-excludes these pages.
-
-======
-x86_64
-======
-
-phys_base
----------
-
-Used to convert the virtual address of an exported kernel symbol to its
-corresponding physical address.
-
-init_top_pgt
-------------
-
-Used to walk through the whole page table and convert virtual addresses
-to physical addresses. The init_top_pgt is somewhat similar to
-swapper_pg_dir, but it is only used in x86_64.
-
-pgtable_l5_enabled
-------------------
-
-User-space tools need to know whether the crash kernel was in 5-level
-paging mode.
-
-node_data
----------
-
-This is a struct pglist_data array and stores all NUMA nodes
-information. Makedumpfile gets the pglist_data structure from it.
-
-(node_data, MAX_NUMNODES)
--------------------------
-
-The maximum number of nodes in system.
-
-KERNELOFFSET
-------------
-
-The kernel randomization offset. Used to compute the page offset. If
-KASLR is disabled, this value is zero.
-
-KERNEL_IMAGE_SIZE
------------------
-
-Currently unused by Makedumpfile. Used to compute the module virtual
-address by Crash.
-
-sme_mask
---------
-
-AMD-specific with SME support: it indicates the secure memory encryption
-mask. Makedumpfile tools need to know whether the crash kernel was
-encrypted. If SME is enabled in the first kernel, the crash kernel's
-page table entries (pgd/pud/pmd/pte) contain the memory encryption
-mask. This is used to remove the SME mask and obtain the true physical
-address.
-
-Currently, sme_mask stores the value of the C-bit position. If needed,
-additional SME-relevant info can be placed in that variable.
-
-For example:
-[ misc	        ][ enc bit  ][ other misc SME info       ]
-0000_0000_0000_0000_1000_0000_0000_0000_0000_0000_..._0000
-63   59   55   51   47   43   39   35   31   27   ... 3
-
-======
-x86_32
-======
-
-X86_PAE
--------
-
-Denotes whether physical address extensions are enabled. It has the cost
-of a higher page table lookup overhead, and also consumes more page
-table space per process. Used to check whether PAE was enabled in the
-crash kernel when converting virtual addresses to physical addresses.
-
-====
-ia64
-====
-
-pgdat_list|(pgdat_list, MAX_NUMNODES)
--------------------------------------
-
-pg_data_t array storing all NUMA nodes information. MAX_NUMNODES
-indicates the number of the nodes.
-
-node_memblk|(node_memblk, NR_NODE_MEMBLKS)
-------------------------------------------
-
-List of node memory chunks. Filled when parsing the SRAT table to obtain
-information about memory nodes. NR_NODE_MEMBLKS indicates the number of
-node memory chunks.
-
-These values are used to compute the number of nodes the crashed kernel used.
-
-node_memblk_s|(node_memblk_s, start_paddr)|(node_memblk_s, size)
-----------------------------------------------------------------
-
-The size of a struct node_memblk_s and the offsets of the
-node_memblk_s's members. Used to compute the number of nodes.
-
-PGTABLE_3|PGTABLE_4
--------------------
-
-User-space tools need to know whether the crash kernel was in 3-level or
-4-level paging mode. Used to distinguish the page table.
-
-=====
-ARM64
-=====
-
-VA_BITS
--------
-
-The maximum number of bits for virtual addresses. Used to compute the
-virtual memory ranges.
-
-kimage_voffset
---------------
-
-The offset between the kernel virtual and physical mappings. Used to
-translate virtual to physical addresses.
-
-PHYS_OFFSET
------------
-
-Indicates the physical address of the start of memory. Similar to
-kimage_voffset, which is used to translate virtual to physical
-addresses.
-
-KERNELOFFSET
-------------
-
-The kernel randomization offset. Used to compute the page offset. If
-KASLR is disabled, this value is zero.
-
-====
-arm
-====
-
-ARM_LPAE
---------
-
-It indicates whether the crash kernel supports large physical address
-extensions. Used to translate virtual to physical addresses.
-
-====
-s390
-====
-
-lowcore_ptr
-----------
-
-An array with a pointer to the lowcore of every CPU. Used to print the
-psw and all registers information.
-
-high_memory
------------
-
-Used to get the vmalloc_start address from the high_memory symbol.
-
-(lowcore_ptr, NR_CPUS)
-----------------------
-
-The maximum number of CPUs.
-
-=======
-powerpc
-=======
-
-
-node_data|(node_data, MAX_NUMNODES)
------------------------------------
-
-See above.
-
-contig_page_data
-----------------
-
-See above.
-
-vmemmap_list
-------------
-
-The vmemmap_list maintains the entire vmemmap physical mapping. Used
-to get vmemmap list count and populated vmemmap regions info. If the
-vmemmap address translation information is stored in the crash kernel,
-it is used to translate vmemmap kernel virtual addresses.
-
-mmu_vmemmap_psize
------------------
-
-The size of a page. Used to translate virtual to physical addresses.
-
-mmu_psize_defs
---------------
-
-Page size definitions, i.e. 4k, 64k, or 16M.
-
-Used to make vtop translations.
-
-vmemmap_backing|(vmemmap_backing, list)|(vmemmap_backing, phys)|
-(vmemmap_backing, virt_addr)
-----------------------------------------------------------------
-
-The vmemmap virtual address space management does not have a traditional
-page table to track which virtual struct pages are backed by a physical
-mapping. The virtual to physical mappings are tracked in a simple linked
-list format.
-
-User-space tools need to know the offset of list, phys and virt_addr
-when computing the count of vmemmap regions.
-
-mmu_psize_def|(mmu_psize_def, shift)
-------------------------------------
-
-The size of a struct mmu_psize_def and the offset of mmu_psize_def's
-member.
-
-Used in vtop translations.
-
-==
-sh
-==
-
-node_data|(node_data, MAX_NUMNODES)
------------------------------------
-
-See above.
-
-X2TLB
------
-
-Indicates whether the crashed kernel enabled SH extended mode.
diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.txt
index 18c5feef2577..0c41d6d463f3 100644
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -59,7 +59,7 @@ as follows:
          the default calculated size. Use this option if default
          boot memory size is not sufficient for second kernel to
          boot successfully. For syntax of crashkernel= parameter,
-         refer to Documentation/kdump/kdump.txt. If any offset is
+         refer to Documentation/kdump/kdump.rst. If any offset is
          provided in crashkernel= parameter, it will be ignored
          as fadump uses a predefined offset to reserve memory
          for boot memory dump preservation in case of a crash.
diff --git a/Documentation/translations/zh_CN/oops-tracing.txt b/Documentation/translations/zh_CN/oops-tracing.txt
index 93fa061cf9e4..368ddd05b304 100644
--- a/Documentation/translations/zh_CN/oops-tracing.txt
+++ b/Documentation/translations/zh_CN/oops-tracing.txt
@@ -53,7 +53,7 @@ cat /proc/kmsg > file， 然而你必须介入中止传输， kmsg是一个“
 （2）用串口终端启动（请参看Documentation/admin-guide/serial-console.rst），运行一个null
 modem到另一台机器并用你喜欢的通讯工具获取输出。Minicom工作地很好。
 
-（3）使用Kdump（请参看Documentation/kdump/kdump.txt），
+（3）使用Kdump（请参看Documentation/kdump/kdump.rst），
 使用在Documentation/kdump/gdbmacros.txt中定义的dmesg gdb宏，从旧的内存中提取内核
 环形缓冲区。
 
diff --git a/Documentation/watchdog/hpwdt.txt b/Documentation/watchdog/hpwdt.txt
index 55df692c5595..aaa9e4b4bdcd 100644
--- a/Documentation/watchdog/hpwdt.txt
+++ b/Documentation/watchdog/hpwdt.txt
@@ -51,7 +51,7 @@ Last reviewed: 08/20/2018
  and loop forever.  This is generally not what a watchdog user wants.
 
  For those wishing to learn more please see:
-	Documentation/kdump/kdump.txt
+	Documentation/kdump/kdump.rst
 	Documentation/admin-guide/kernel-parameters.txt (panic=)
 	Your Linux Distribution specific documentation.
 
-- 
cgit 


From 09bbf055c3329008522b4a9814afe412c202daa7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:51 -0300
Subject: docs: mic: convert docs to ReST and rename to *.rst

Convert Intel Many Integrated Core architecture docs to ReST.

The conversion is trivial: just add title and literal block
markups, and adjust some identation.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/mic/index.rst         |  18 ++++++
 Documentation/mic/mic_overview.rst  |  85 ++++++++++++++++++++++++++++
 Documentation/mic/mic_overview.txt  |  81 ---------------------------
 Documentation/mic/scif_overview.rst | 108 ++++++++++++++++++++++++++++++++++++
 Documentation/mic/scif_overview.txt |  98 --------------------------------
 5 files changed, 211 insertions(+), 179 deletions(-)
 create mode 100644 Documentation/mic/index.rst
 create mode 100644 Documentation/mic/mic_overview.rst
 delete mode 100644 Documentation/mic/mic_overview.txt
 create mode 100644 Documentation/mic/scif_overview.rst
 delete mode 100644 Documentation/mic/scif_overview.txt

(limited to 'Documentation')

diff --git a/Documentation/mic/index.rst b/Documentation/mic/index.rst
new file mode 100644
index 000000000000..082fa8f6a260
--- /dev/null
+++ b/Documentation/mic/index.rst
@@ -0,0 +1,18 @@
+:orphan:
+
+=============================================
+Intel Many Integrated Core (MIC) architecture
+=============================================
+
+.. toctree::
+    :maxdepth: 1
+
+    mic_overview
+    scif_overview
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/mic/mic_overview.rst b/Documentation/mic/mic_overview.rst
new file mode 100644
index 000000000000..17d956bdaf7c
--- /dev/null
+++ b/Documentation/mic/mic_overview.rst
@@ -0,0 +1,85 @@
+======================================================
+Intel Many Integrated Core (MIC) architecture overview
+======================================================
+
+An Intel MIC X100 device is a PCIe form factor add-in coprocessor
+card based on the Intel Many Integrated Core (MIC) architecture
+that runs a Linux OS. It is a PCIe endpoint in a platform and therefore
+implements the three required standard address spaces i.e. configuration,
+memory and I/O. The host OS loads a device driver as is typical for
+PCIe devices. The card itself runs a bootstrap after reset that
+transfers control to the card OS downloaded from the host driver. The
+host driver supports OSPM suspend and resume operations. It shuts down
+the card during suspend and reboots the card OS during resume.
+The card OS as shipped by Intel is a Linux kernel with modifications
+for the X100 devices.
+
+Since it is a PCIe card, it does not have the ability to host hardware
+devices for networking, storage and console. We provide these devices
+on X100 coprocessors thus enabling a self-bootable equivalent
+environment for applications. A key benefit of our solution is that it
+leverages the standard virtio framework for network, disk and console
+devices, though in our case the virtio framework is used across a PCIe
+bus. A Virtio Over PCIe (VOP) driver allows creating user space
+backends or devices on the host which are used to probe virtio drivers
+for these devices on the MIC card. The existing VRINGH infrastructure
+in the kernel is used to access virtio rings from the host. The card
+VOP driver allows card virtio drivers to communicate with their user
+space backends on the host via a device page. Ring 3 apps on the host
+can add, remove and configure virtio devices. A thin MIC specific
+virtio_config_ops is implemented which is borrowed heavily from
+previous similar implementations in lguest and s390.
+
+MIC PCIe card has a dma controller with 8 channels. These channels are
+shared between the host s/w and the card s/w. 0 to 3 are used by host
+and 4 to 7 by card. As the dma device doesn't show up as PCIe device,
+a virtual bus called mic bus is created and virtual dma devices are
+created on it by the host/card drivers. On host the channels are private
+and used only by the host driver to transfer data for the virtio devices.
+
+The Symmetric Communication Interface (SCIF (pronounced as skiff)) is a
+low level communications API across PCIe currently implemented for MIC.
+More details are available at scif_overview.txt.
+
+The Coprocessor State Management (COSM) driver on the host allows for
+boot, shutdown and reset of Intel MIC devices. It communicates with a COSM
+"client" driver on the MIC cards over SCIF to perform these functions.
+
+Here is a block diagram of the various components described above. The
+virtio backends are situated on the host rather than the card given better
+single threaded performance for the host compared to MIC, the ability of
+the host to initiate DMA's to/from the card using the MIC DMA engine and
+the fact that the virtio block storage backend can only be on the host::
+
+               +----------+           |             +----------+
+               | Card OS  |           |             | Host OS  |
+               +----------+           |             +----------+
+                                      |
+        +-------+ +--------+ +------+ | +---------+  +--------+ +--------+
+        | Virtio| |Virtio  | |Virtio| | |Virtio   |  |Virtio  | |Virtio  |
+        | Net   | |Console | |Block | | |Net      |  |Console | |Block   |
+        | Driver| |Driver  | |Driver| | |backend  |  |backend | |backend |
+        +---+---+ +---+----+ +--+---+ | +---------+  +----+---+ +--------+
+            |         |         |     |      |            |         |
+            |         |         |     |User  |            |         |
+            |         |         |     |------|------------|--+------|-------
+            +---------+---------+     |Kernel                |
+                      |               |                      |
+  +---------+     +---+----+ +------+ | +------+ +------+ +--+---+  +-------+
+  |MIC DMA  |     |  VOP   | | SCIF | | | SCIF | | COSM | | VOP  |  |MIC DMA|
+  +---+-----+     +---+----+ +--+---+ | +--+---+ +--+---+ +------+  +----+--+
+      |               |         |     |    |        |                    |
+  +---+-----+     +---+----+ +--+---+ | +--+---+ +--+---+ +------+  +----+--+
+  |MIC      |     |  VOP   | |SCIF  | | |SCIF  | | COSM | | VOP  |  | MIC   |
+  |HW Bus   |     |  HW Bus| |HW Bus| | |HW Bus| | Bus  | |HW Bus|  |HW Bus |
+  +---------+     +--------+ +--+---+ | +--+---+ +------+ +------+  +-------+
+      |               |         |     |       |     |                    |
+      |   +-----------+--+      |     |       |    +---------------+     |
+      |   |Intel MIC     |      |     |       |    |Intel MIC      |     |
+      |   |Card Driver   |      |     |       |    |Host Driver    |     |
+      +---+--------------+------+     |       +----+---------------+-----+
+                 |                    |                   |
+             +-------------------------------------------------------------+
+             |                                                             |
+             |                    PCIe Bus                                 |
+             +-------------------------------------------------------------+
diff --git a/Documentation/mic/mic_overview.txt b/Documentation/mic/mic_overview.txt
deleted file mode 100644
index 074adbdf83a4..000000000000
--- a/Documentation/mic/mic_overview.txt
+++ /dev/null
@@ -1,81 +0,0 @@
-An Intel MIC X100 device is a PCIe form factor add-in coprocessor
-card based on the Intel Many Integrated Core (MIC) architecture
-that runs a Linux OS. It is a PCIe endpoint in a platform and therefore
-implements the three required standard address spaces i.e. configuration,
-memory and I/O. The host OS loads a device driver as is typical for
-PCIe devices. The card itself runs a bootstrap after reset that
-transfers control to the card OS downloaded from the host driver. The
-host driver supports OSPM suspend and resume operations. It shuts down
-the card during suspend and reboots the card OS during resume.
-The card OS as shipped by Intel is a Linux kernel with modifications
-for the X100 devices.
-
-Since it is a PCIe card, it does not have the ability to host hardware
-devices for networking, storage and console. We provide these devices
-on X100 coprocessors thus enabling a self-bootable equivalent
-environment for applications. A key benefit of our solution is that it
-leverages the standard virtio framework for network, disk and console
-devices, though in our case the virtio framework is used across a PCIe
-bus. A Virtio Over PCIe (VOP) driver allows creating user space
-backends or devices on the host which are used to probe virtio drivers
-for these devices on the MIC card. The existing VRINGH infrastructure
-in the kernel is used to access virtio rings from the host. The card
-VOP driver allows card virtio drivers to communicate with their user
-space backends on the host via a device page. Ring 3 apps on the host
-can add, remove and configure virtio devices. A thin MIC specific
-virtio_config_ops is implemented which is borrowed heavily from
-previous similar implementations in lguest and s390.
-
-MIC PCIe card has a dma controller with 8 channels. These channels are
-shared between the host s/w and the card s/w. 0 to 3 are used by host
-and 4 to 7 by card. As the dma device doesn't show up as PCIe device,
-a virtual bus called mic bus is created and virtual dma devices are
-created on it by the host/card drivers. On host the channels are private
-and used only by the host driver to transfer data for the virtio devices.
-
-The Symmetric Communication Interface (SCIF (pronounced as skiff)) is a
-low level communications API across PCIe currently implemented for MIC.
-More details are available at scif_overview.txt.
-
-The Coprocessor State Management (COSM) driver on the host allows for
-boot, shutdown and reset of Intel MIC devices. It communicates with a COSM
-"client" driver on the MIC cards over SCIF to perform these functions.
-
-Here is a block diagram of the various components described above. The
-virtio backends are situated on the host rather than the card given better
-single threaded performance for the host compared to MIC, the ability of
-the host to initiate DMA's to/from the card using the MIC DMA engine and
-the fact that the virtio block storage backend can only be on the host.
-
-               +----------+           |             +----------+
-               | Card OS  |           |             | Host OS  |
-               +----------+           |             +----------+
-                                      |
-        +-------+ +--------+ +------+ | +---------+  +--------+ +--------+
-        | Virtio| |Virtio  | |Virtio| | |Virtio   |  |Virtio  | |Virtio  |
-        | Net   | |Console | |Block | | |Net      |  |Console | |Block   |
-        | Driver| |Driver  | |Driver| | |backend  |  |backend | |backend |
-        +---+---+ +---+----+ +--+---+ | +---------+  +----+---+ +--------+
-            |         |         |     |      |            |         |
-            |         |         |     |User  |            |         |
-            |         |         |     |------|------------|--+------|-------
-            +---------+---------+     |Kernel                |
-                      |               |                      |
-  +---------+     +---+----+ +------+ | +------+ +------+ +--+---+  +-------+
-  |MIC DMA  |     |  VOP   | | SCIF | | | SCIF | | COSM | | VOP  |  |MIC DMA|
-  +---+-----+     +---+----+ +--+---+ | +--+---+ +--+---+ +------+  +----+--+
-      |               |         |     |    |        |                    |
-  +---+-----+     +---+----+ +--+---+ | +--+---+ +--+---+ +------+  +----+--+
-  |MIC      |     |  VOP   | |SCIF  | | |SCIF  | | COSM | | VOP  |  | MIC   |
-  |HW Bus   |     |  HW Bus| |HW Bus| | |HW Bus| | Bus  | |HW Bus|  |HW Bus |
-  +---------+     +--------+ +--+---+ | +--+---+ +------+ +------+  +-------+
-      |               |         |     |       |     |                    |
-      |   +-----------+--+      |     |       |    +---------------+     |
-      |   |Intel MIC     |      |     |       |    |Intel MIC      |     |
-      |   |Card Driver   |      |     |       |    |Host Driver    |     |
-      +---+--------------+------+     |       +----+---------------+-----+
-                 |                    |                   |
-             +-------------------------------------------------------------+
-             |                                                             |
-             |                    PCIe Bus                                 |
-             +-------------------------------------------------------------+
diff --git a/Documentation/mic/scif_overview.rst b/Documentation/mic/scif_overview.rst
new file mode 100644
index 000000000000..4c8ad9e43706
--- /dev/null
+++ b/Documentation/mic/scif_overview.rst
@@ -0,0 +1,108 @@
+========================================
+Symmetric Communication Interface (SCIF)
+========================================
+
+The Symmetric Communication Interface (SCIF (pronounced as skiff)) is a low
+level communications API across PCIe currently implemented for MIC. Currently
+SCIF provides inter-node communication within a single host platform, where a
+node is a MIC Coprocessor or Xeon based host. SCIF abstracts the details of
+communicating over the PCIe bus while providing an API that is symmetric
+across all the nodes in the PCIe network. An important design objective for SCIF
+is to deliver the maximum possible performance given the communication
+abilities of the hardware. SCIF has been used to implement an offload compiler
+runtime and OFED support for MPI implementations for MIC coprocessors.
+
+SCIF API Components
+===================
+
+The SCIF API has the following parts:
+
+1. Connection establishment using a client server model
+2. Byte stream messaging intended for short messages
+3. Node enumeration to determine online nodes
+4. Poll semantics for detection of incoming connections and messages
+5. Memory registration to pin down pages
+6. Remote memory mapping for low latency CPU accesses via mmap
+7. Remote DMA (RDMA) for high bandwidth DMA transfers
+8. Fence APIs for RDMA synchronization
+
+SCIF exposes the notion of a connection which can be used by peer processes on
+nodes in a SCIF PCIe "network" to share memory "windows" and to communicate. A
+process in a SCIF node initiates a SCIF connection to a peer process on a
+different node via a SCIF "endpoint". SCIF endpoints support messaging APIs
+which are similar to connection oriented socket APIs. Connected SCIF endpoints
+can also register local memory which is followed by data transfer using either
+DMA, CPU copies or remote memory mapping via mmap. SCIF supports both user and
+kernel mode clients which are functionally equivalent.
+
+SCIF Performance for MIC
+========================
+
+DMA bandwidth comparison between the TCP (over ethernet over PCIe) stack versus
+SCIF shows the performance advantages of SCIF for HPC applications and
+runtimes::
+
+             Comparison of TCP and SCIF based BW
+
+  Throughput (GB/sec)
+    8 +                                             PCIe Bandwidth ******
+      +                                                        TCP ######
+    7 +    **************************************             SCIF %%%%%%
+      |                       %%%%%%%%%%%%%%%%%%%
+    6 +                   %%%%
+      |                 %%
+      |               %%%
+    5 +              %%
+      |            %%
+    4 +           %%
+      |          %%
+    3 +         %%
+      |        %
+    2 +      %%
+      |     %%
+      |    %
+    1 +
+      +    ######################################
+    0 +++---+++--+--+-+--+--+-++-+--+-++-+--+-++-+-
+      1       10     100      1000   10000   100000
+                   Transfer Size (KBytes)
+
+SCIF allows memory sharing via mmap(..) between processes on different PCIe
+nodes and thus provides bare-metal PCIe latency. The round trip SCIF mmap
+latency from the host to an x100 MIC for an 8 byte message is 0.44 usecs.
+
+SCIF has a user space library which is a thin IOCTL wrapper providing a user
+space API similar to the kernel API in scif.h. The SCIF user space library
+is distributed @ https://software.intel.com/en-us/mic-developer
+
+Here is some pseudo code for an example of how two applications on two PCIe
+nodes would typically use the SCIF API::
+
+  Process A (on node A)			Process B (on node B)
+
+  /* get online node information */
+  scif_get_node_ids(..)			scif_get_node_ids(..)
+  scif_open(..)				scif_open(..)
+  scif_bind(..)				scif_bind(..)
+  scif_listen(..)
+  scif_accept(..)				scif_connect(..)
+  /* SCIF connection established */
+
+  /* Send and receive short messages */
+  scif_send(..)/scif_recv(..)		scif_send(..)/scif_recv(..)
+
+  /* Register memory */
+  scif_register(..)			scif_register(..)
+
+  /* RDMA */
+  scif_readfrom(..)/scif_writeto(..)	scif_readfrom(..)/scif_writeto(..)
+
+  /* Fence DMAs */
+  scif_fence_signal(..)			scif_fence_signal(..)
+
+  mmap(..)				mmap(..)
+
+  /* Access remote registered memory */
+
+  /* Close the endpoints */
+  scif_close(..)				scif_close(..)
diff --git a/Documentation/mic/scif_overview.txt b/Documentation/mic/scif_overview.txt
deleted file mode 100644
index 0a280d986731..000000000000
--- a/Documentation/mic/scif_overview.txt
+++ /dev/null
@@ -1,98 +0,0 @@
-The Symmetric Communication Interface (SCIF (pronounced as skiff)) is a low
-level communications API across PCIe currently implemented for MIC. Currently
-SCIF provides inter-node communication within a single host platform, where a
-node is a MIC Coprocessor or Xeon based host. SCIF abstracts the details of
-communicating over the PCIe bus while providing an API that is symmetric
-across all the nodes in the PCIe network. An important design objective for SCIF
-is to deliver the maximum possible performance given the communication
-abilities of the hardware. SCIF has been used to implement an offload compiler
-runtime and OFED support for MPI implementations for MIC coprocessors.
-
-==== SCIF API Components ====
-The SCIF API has the following parts:
-1. Connection establishment using a client server model
-2. Byte stream messaging intended for short messages
-3. Node enumeration to determine online nodes
-4. Poll semantics for detection of incoming connections and messages
-5. Memory registration to pin down pages
-6. Remote memory mapping for low latency CPU accesses via mmap
-7. Remote DMA (RDMA) for high bandwidth DMA transfers
-8. Fence APIs for RDMA synchronization
-
-SCIF exposes the notion of a connection which can be used by peer processes on
-nodes in a SCIF PCIe "network" to share memory "windows" and to communicate. A
-process in a SCIF node initiates a SCIF connection to a peer process on a
-different node via a SCIF "endpoint". SCIF endpoints support messaging APIs
-which are similar to connection oriented socket APIs. Connected SCIF endpoints
-can also register local memory which is followed by data transfer using either
-DMA, CPU copies or remote memory mapping via mmap. SCIF supports both user and
-kernel mode clients which are functionally equivalent.
-
-==== SCIF Performance for MIC ====
-DMA bandwidth comparison between the TCP (over ethernet over PCIe) stack versus
-SCIF shows the performance advantages of SCIF for HPC applications and runtimes.
-
-             Comparison of TCP and SCIF based BW
-
-  Throughput (GB/sec)
-    8 +                                             PCIe Bandwidth ******
-      +                                                        TCP ######
-    7 +    **************************************             SCIF %%%%%%
-      |                       %%%%%%%%%%%%%%%%%%%
-    6 +                   %%%%
-      |                 %%
-      |               %%%
-    5 +              %%
-      |            %%
-    4 +           %%
-      |          %%
-    3 +         %%
-      |        %
-    2 +      %%
-      |     %%
-      |    %
-    1 +
-      +    ######################################
-    0 +++---+++--+--+-+--+--+-++-+--+-++-+--+-++-+-
-      1       10     100      1000   10000   100000
-                   Transfer Size (KBytes)
-
-SCIF allows memory sharing via mmap(..) between processes on different PCIe
-nodes and thus provides bare-metal PCIe latency. The round trip SCIF mmap
-latency from the host to an x100 MIC for an 8 byte message is 0.44 usecs.
-
-SCIF has a user space library which is a thin IOCTL wrapper providing a user
-space API similar to the kernel API in scif.h. The SCIF user space library
-is distributed @ https://software.intel.com/en-us/mic-developer
-
-Here is some pseudo code for an example of how two applications on two PCIe
-nodes would typically use the SCIF API:
-
-Process A (on node A)			Process B (on node B)
-
-/* get online node information */
-scif_get_node_ids(..)			scif_get_node_ids(..)
-scif_open(..)				scif_open(..)
-scif_bind(..)				scif_bind(..)
-scif_listen(..)
-scif_accept(..)				scif_connect(..)
-/* SCIF connection established */
-
-/* Send and receive short messages */
-scif_send(..)/scif_recv(..)		scif_send(..)/scif_recv(..)
-
-/* Register memory */
-scif_register(..)			scif_register(..)
-
-/* RDMA */
-scif_readfrom(..)/scif_writeto(..)	scif_readfrom(..)/scif_writeto(..)
-
-/* Fence DMAs */
-scif_fence_signal(..)			scif_fence_signal(..)
-
-mmap(..)				mmap(..)
-
-/* Access remote registered memory */
-
-/* Close the endpoints */
-scif_close(..)				scif_close(..)
-- 
cgit 


From 593733ab80ac2c607acc1fc3fbaba5031d38253a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:52 -0300
Subject: docs: netlabel: convert docs to ReST and rename to *.rst

Convert netlabel documentation to ReST.

This was trivial: just add proper title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/netlabel/cipso_ipv4.rst    | 56 ++++++++++++++++++++++++++++++++
 Documentation/netlabel/cipso_ipv4.txt    | 49 ----------------------------
 Documentation/netlabel/draft_ietf.rst    |  5 +++
 Documentation/netlabel/index.rst         | 21 ++++++++++++
 Documentation/netlabel/introduction.rst  | 52 +++++++++++++++++++++++++++++
 Documentation/netlabel/introduction.txt  | 46 --------------------------
 Documentation/netlabel/lsm_interface.rst | 53 ++++++++++++++++++++++++++++++
 Documentation/netlabel/lsm_interface.txt | 47 ---------------------------
 8 files changed, 187 insertions(+), 142 deletions(-)
 create mode 100644 Documentation/netlabel/cipso_ipv4.rst
 delete mode 100644 Documentation/netlabel/cipso_ipv4.txt
 create mode 100644 Documentation/netlabel/draft_ietf.rst
 create mode 100644 Documentation/netlabel/index.rst
 create mode 100644 Documentation/netlabel/introduction.rst
 delete mode 100644 Documentation/netlabel/introduction.txt
 create mode 100644 Documentation/netlabel/lsm_interface.rst
 delete mode 100644 Documentation/netlabel/lsm_interface.txt

(limited to 'Documentation')

diff --git a/Documentation/netlabel/cipso_ipv4.rst b/Documentation/netlabel/cipso_ipv4.rst
new file mode 100644
index 000000000000..cbd3f3231221
--- /dev/null
+++ b/Documentation/netlabel/cipso_ipv4.rst
@@ -0,0 +1,56 @@
+===================================
+NetLabel CIPSO/IPv4 Protocol Engine
+===================================
+
+Paul Moore, paul.moore@hp.com
+
+May 17, 2006
+
+Overview
+========
+
+The NetLabel CIPSO/IPv4 protocol engine is based on the IETF Commercial
+IP Security Option (CIPSO) draft from July 16, 1992.  A copy of this
+draft can be found in this directory
+(draft-ietf-cipso-ipsecurity-01.txt).  While the IETF draft never made
+it to an RFC standard it has become a de-facto standard for labeled
+networking and is used in many trusted operating systems.
+
+Outbound Packet Processing
+==========================
+
+The CIPSO/IPv4 protocol engine applies the CIPSO IP option to packets by
+adding the CIPSO label to the socket.  This causes all packets leaving the
+system through the socket to have the CIPSO IP option applied.  The socket's
+CIPSO label can be changed at any point in time, however, it is recommended
+that it is set upon the socket's creation.  The LSM can set the socket's CIPSO
+label by using the NetLabel security module API; if the NetLabel "domain" is
+configured to use CIPSO for packet labeling then a CIPSO IP option will be
+generated and attached to the socket.
+
+Inbound Packet Processing
+=========================
+
+The CIPSO/IPv4 protocol engine validates every CIPSO IP option it finds at the
+IP layer without any special handling required by the LSM.  However, in order
+to decode and translate the CIPSO label on the packet the LSM must use the
+NetLabel security module API to extract the security attributes of the packet.
+This is typically done at the socket layer using the 'socket_sock_rcv_skb()'
+LSM hook.
+
+Label Translation
+=================
+
+The CIPSO/IPv4 protocol engine contains a mechanism to translate CIPSO security
+attributes such as sensitivity level and category to values which are
+appropriate for the host.  These mappings are defined as part of a CIPSO
+Domain Of Interpretation (DOI) definition and are configured through the
+NetLabel user space communication layer.  Each DOI definition can have a
+different security attribute mapping table.
+
+Label Translation Cache
+=======================
+
+The NetLabel system provides a framework for caching security attribute
+mappings from the network labels to the corresponding LSM identifiers.  The
+CIPSO/IPv4 protocol engine supports this caching mechanism.
diff --git a/Documentation/netlabel/cipso_ipv4.txt b/Documentation/netlabel/cipso_ipv4.txt
deleted file mode 100644
index a6075481fd60..000000000000
--- a/Documentation/netlabel/cipso_ipv4.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-NetLabel CIPSO/IPv4 Protocol Engine
-==============================================================================
-Paul Moore, paul.moore@hp.com
-
-May 17, 2006
-
- * Overview
-
-The NetLabel CIPSO/IPv4 protocol engine is based on the IETF Commercial
-IP Security Option (CIPSO) draft from July 16, 1992.  A copy of this
-draft can be found in this directory
-(draft-ietf-cipso-ipsecurity-01.txt).  While the IETF draft never made
-it to an RFC standard it has become a de-facto standard for labeled
-networking and is used in many trusted operating systems.
-
- * Outbound Packet Processing
-
-The CIPSO/IPv4 protocol engine applies the CIPSO IP option to packets by
-adding the CIPSO label to the socket.  This causes all packets leaving the
-system through the socket to have the CIPSO IP option applied.  The socket's
-CIPSO label can be changed at any point in time, however, it is recommended
-that it is set upon the socket's creation.  The LSM can set the socket's CIPSO
-label by using the NetLabel security module API; if the NetLabel "domain" is
-configured to use CIPSO for packet labeling then a CIPSO IP option will be
-generated and attached to the socket.
-
- * Inbound Packet Processing
-
-The CIPSO/IPv4 protocol engine validates every CIPSO IP option it finds at the
-IP layer without any special handling required by the LSM.  However, in order
-to decode and translate the CIPSO label on the packet the LSM must use the
-NetLabel security module API to extract the security attributes of the packet.
-This is typically done at the socket layer using the 'socket_sock_rcv_skb()'
-LSM hook.
-
- * Label Translation
-
-The CIPSO/IPv4 protocol engine contains a mechanism to translate CIPSO security
-attributes such as sensitivity level and category to values which are
-appropriate for the host.  These mappings are defined as part of a CIPSO
-Domain Of Interpretation (DOI) definition and are configured through the
-NetLabel user space communication layer.  Each DOI definition can have a
-different security attribute mapping table.
-
- * Label Translation Cache
-
-The NetLabel system provides a framework for caching security attribute
-mappings from the network labels to the corresponding LSM identifiers.  The
-CIPSO/IPv4 protocol engine supports this caching mechanism.
diff --git a/Documentation/netlabel/draft_ietf.rst b/Documentation/netlabel/draft_ietf.rst
new file mode 100644
index 000000000000..5ed39ab8234b
--- /dev/null
+++ b/Documentation/netlabel/draft_ietf.rst
@@ -0,0 +1,5 @@
+Draft IETF CIPSO IP Security
+----------------------------
+
+ .. include:: draft-ietf-cipso-ipsecurity-01.txt
+    :literal:
diff --git a/Documentation/netlabel/index.rst b/Documentation/netlabel/index.rst
new file mode 100644
index 000000000000..47f1e0e5acd1
--- /dev/null
+++ b/Documentation/netlabel/index.rst
@@ -0,0 +1,21 @@
+:orphan:
+
+========
+NetLabel
+========
+
+.. toctree::
+    :maxdepth: 1
+
+    introduction
+    cipso_ipv4
+    lsm_interface
+
+    draft_ietf
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/netlabel/introduction.rst b/Documentation/netlabel/introduction.rst
new file mode 100644
index 000000000000..9333bbb0adc1
--- /dev/null
+++ b/Documentation/netlabel/introduction.rst
@@ -0,0 +1,52 @@
+=====================
+NetLabel Introduction
+=====================
+
+Paul Moore, paul.moore@hp.com
+
+August 2, 2006
+
+Overview
+========
+
+NetLabel is a mechanism which can be used by kernel security modules to attach
+security attributes to outgoing network packets generated from user space
+applications and read security attributes from incoming network packets.  It
+is composed of three main components, the protocol engines, the communication
+layer, and the kernel security module API.
+
+Protocol Engines
+================
+
+The protocol engines are responsible for both applying and retrieving the
+network packet's security attributes.  If any translation between the network
+security attributes and those on the host are required then the protocol
+engine will handle those tasks as well.  Other kernel subsystems should
+refrain from calling the protocol engines directly, instead they should use
+the NetLabel kernel security module API described below.
+
+Detailed information about each NetLabel protocol engine can be found in this
+directory.
+
+Communication Layer
+===================
+
+The communication layer exists to allow NetLabel configuration and monitoring
+from user space.  The NetLabel communication layer uses a message based
+protocol built on top of the Generic NETLINK transport mechanism.  The exact
+formatting of these NetLabel messages as well as the Generic NETLINK family
+names can be found in the 'net/netlabel/' directory as comments in the
+header files as well as in 'include/net/netlabel.h'.
+
+Security Module API
+===================
+
+The purpose of the NetLabel security module API is to provide a protocol
+independent interface to the underlying NetLabel protocol engines.  In addition
+to protocol independence, the security module API is designed to be completely
+LSM independent which should allow multiple LSMs to leverage the same code
+base.
+
+Detailed information about the NetLabel security module API can be found in the
+'include/net/netlabel.h' header file as well as the 'lsm_interface.txt' file
+found in this directory.
diff --git a/Documentation/netlabel/introduction.txt b/Documentation/netlabel/introduction.txt
deleted file mode 100644
index 3caf77bcff0f..000000000000
--- a/Documentation/netlabel/introduction.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-NetLabel Introduction
-==============================================================================
-Paul Moore, paul.moore@hp.com
-
-August 2, 2006
-
- * Overview
-
-NetLabel is a mechanism which can be used by kernel security modules to attach
-security attributes to outgoing network packets generated from user space
-applications and read security attributes from incoming network packets.  It
-is composed of three main components, the protocol engines, the communication
-layer, and the kernel security module API.
-
- * Protocol Engines
-
-The protocol engines are responsible for both applying and retrieving the
-network packet's security attributes.  If any translation between the network
-security attributes and those on the host are required then the protocol
-engine will handle those tasks as well.  Other kernel subsystems should
-refrain from calling the protocol engines directly, instead they should use
-the NetLabel kernel security module API described below.
-
-Detailed information about each NetLabel protocol engine can be found in this
-directory.
-
- * Communication Layer
-
-The communication layer exists to allow NetLabel configuration and monitoring
-from user space.  The NetLabel communication layer uses a message based
-protocol built on top of the Generic NETLINK transport mechanism.  The exact
-formatting of these NetLabel messages as well as the Generic NETLINK family
-names can be found in the 'net/netlabel/' directory as comments in the
-header files as well as in 'include/net/netlabel.h'.
-
- * Security Module API
-
-The purpose of the NetLabel security module API is to provide a protocol
-independent interface to the underlying NetLabel protocol engines.  In addition
-to protocol independence, the security module API is designed to be completely
-LSM independent which should allow multiple LSMs to leverage the same code
-base.
-
-Detailed information about the NetLabel security module API can be found in the
-'include/net/netlabel.h' header file as well as the 'lsm_interface.txt' file
-found in this directory.
diff --git a/Documentation/netlabel/lsm_interface.rst b/Documentation/netlabel/lsm_interface.rst
new file mode 100644
index 000000000000..026fc267f798
--- /dev/null
+++ b/Documentation/netlabel/lsm_interface.rst
@@ -0,0 +1,53 @@
+========================================
+NetLabel Linux Security Module Interface
+========================================
+
+Paul Moore, paul.moore@hp.com
+
+May 17, 2006
+
+Overview
+========
+
+NetLabel is a mechanism which can set and retrieve security attributes from
+network packets.  It is intended to be used by LSM developers who want to make
+use of a common code base for several different packet labeling protocols.
+The NetLabel security module API is defined in 'include/net/netlabel.h' but a
+brief overview is given below.
+
+NetLabel Security Attributes
+============================
+
+Since NetLabel supports multiple different packet labeling protocols and LSMs
+it uses the concept of security attributes to refer to the packet's security
+labels.  The NetLabel security attributes are defined by the
+'netlbl_lsm_secattr' structure in the NetLabel header file.  Internally the
+NetLabel subsystem converts the security attributes to and from the correct
+low-level packet label depending on the NetLabel build time and run time
+configuration.  It is up to the LSM developer to translate the NetLabel
+security attributes into whatever security identifiers are in use for their
+particular LSM.
+
+NetLabel LSM Protocol Operations
+================================
+
+These are the functions which allow the LSM developer to manipulate the labels
+on outgoing packets as well as read the labels on incoming packets.  Functions
+exist to operate both on sockets as well as the sk_buffs directly.  These high
+level functions are translated into low level protocol operations based on how
+the administrator has configured the NetLabel subsystem.
+
+NetLabel Label Mapping Cache Operations
+=======================================
+
+Depending on the exact configuration, translation between the network packet
+label and the internal LSM security identifier can be time consuming.  The
+NetLabel label mapping cache is a caching mechanism which can be used to
+sidestep much of this overhead once a mapping has been established.  Once the
+LSM has received a packet, used NetLabel to decode its security attributes,
+and translated the security attributes into a LSM internal identifier the LSM
+can use the NetLabel caching functions to associate the LSM internal
+identifier with the network packet's label.  This means that in the future
+when a incoming packet matches a cached value not only are the internal
+NetLabel translation mechanisms bypassed but the LSM translation mechanisms are
+bypassed as well which should result in a significant reduction in overhead.
diff --git a/Documentation/netlabel/lsm_interface.txt b/Documentation/netlabel/lsm_interface.txt
deleted file mode 100644
index 638c74f7de7f..000000000000
--- a/Documentation/netlabel/lsm_interface.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-NetLabel Linux Security Module Interface
-==============================================================================
-Paul Moore, paul.moore@hp.com
-
-May 17, 2006
-
- * Overview
-
-NetLabel is a mechanism which can set and retrieve security attributes from
-network packets.  It is intended to be used by LSM developers who want to make
-use of a common code base for several different packet labeling protocols.
-The NetLabel security module API is defined in 'include/net/netlabel.h' but a
-brief overview is given below.
-
- * NetLabel Security Attributes
-
-Since NetLabel supports multiple different packet labeling protocols and LSMs
-it uses the concept of security attributes to refer to the packet's security
-labels.  The NetLabel security attributes are defined by the
-'netlbl_lsm_secattr' structure in the NetLabel header file.  Internally the
-NetLabel subsystem converts the security attributes to and from the correct
-low-level packet label depending on the NetLabel build time and run time
-configuration.  It is up to the LSM developer to translate the NetLabel
-security attributes into whatever security identifiers are in use for their
-particular LSM.
-
- * NetLabel LSM Protocol Operations
-
-These are the functions which allow the LSM developer to manipulate the labels
-on outgoing packets as well as read the labels on incoming packets.  Functions
-exist to operate both on sockets as well as the sk_buffs directly.  These high
-level functions are translated into low level protocol operations based on how
-the administrator has configured the NetLabel subsystem.
-
- * NetLabel Label Mapping Cache Operations
-
-Depending on the exact configuration, translation between the network packet
-label and the internal LSM security identifier can be time consuming.  The
-NetLabel label mapping cache is a caching mechanism which can be used to
-sidestep much of this overhead once a mapping has been established.  Once the
-LSM has received a packet, used NetLabel to decode its security attributes,
-and translated the security attributes into a LSM internal identifier the LSM
-can use the NetLabel caching functions to associate the LSM internal
-identifier with the network packet's label.  This means that in the future
-when a incoming packet matches a cached value not only are the internal
-NetLabel translation mechanisms bypassed but the LSM translation mechanisms are
-bypassed as well which should result in a significant reduction in overhead.
-- 
cgit 


From 3bdab16c55f57a24245c97d707241dd9b48d1a91 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:53 -0300
Subject: docs: pcmcia: convert docs to ReST and rename to *.rst

Convert the pcmcia docs to ReST format. Most of the changes here
are trivial.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/pcmcia/devicetable.rst    |  37 ++++++++
 Documentation/pcmcia/devicetable.txt    |  33 -------
 Documentation/pcmcia/driver-changes.rst | 160 ++++++++++++++++++++++++++++++++
 Documentation/pcmcia/driver-changes.txt | 149 -----------------------------
 Documentation/pcmcia/driver.rst         |  30 ++++++
 Documentation/pcmcia/driver.txt         |  30 ------
 Documentation/pcmcia/index.rst          |  20 ++++
 Documentation/pcmcia/locking.rst        | 133 ++++++++++++++++++++++++++
 Documentation/pcmcia/locking.txt        | 118 -----------------------
 9 files changed, 380 insertions(+), 330 deletions(-)
 create mode 100644 Documentation/pcmcia/devicetable.rst
 delete mode 100644 Documentation/pcmcia/devicetable.txt
 create mode 100644 Documentation/pcmcia/driver-changes.rst
 delete mode 100644 Documentation/pcmcia/driver-changes.txt
 create mode 100644 Documentation/pcmcia/driver.rst
 delete mode 100644 Documentation/pcmcia/driver.txt
 create mode 100644 Documentation/pcmcia/index.rst
 create mode 100644 Documentation/pcmcia/locking.rst
 delete mode 100644 Documentation/pcmcia/locking.txt

(limited to 'Documentation')

diff --git a/Documentation/pcmcia/devicetable.rst b/Documentation/pcmcia/devicetable.rst
new file mode 100644
index 000000000000..fd1d60d12ca1
--- /dev/null
+++ b/Documentation/pcmcia/devicetable.rst
@@ -0,0 +1,37 @@
+============
+Device table
+============
+
+Matching of PCMCIA devices to drivers is done using one or more of the
+following criteria:
+
+- manufactor ID
+- card ID
+- product ID strings _and_ hashes of these strings
+- function ID
+- device function (actual and pseudo)
+
+You should use the helpers in include/pcmcia/device_id.h for generating the
+struct pcmcia_device_id[] entries which match devices to drivers.
+
+If you want to match product ID strings, you also need to pass the crc32
+hashes of the string to the macro, e.g. if you want to match the product ID
+string 1, you need to use
+
+PCMCIA_DEVICE_PROD_ID1("some_string", 0x(hash_of_some_string)),
+
+If the hash is incorrect, the kernel will inform you about this in "dmesg"
+upon module initialization, and tell you of the correct hash.
+
+You can determine the hash of the product ID strings by catting the file
+"modalias" in the sysfs directory of the PCMCIA device. It generates a string
+in the following form:
+pcmcia:m0149cC1ABf06pfn00fn00pa725B842DpbF1EFEE84pc0877B627pd00000000
+
+The hex value after "pa" is the hash of product ID string 1, after "pb" for
+string 2 and so on.
+
+Alternatively, you can use crc32hash (see tools/pcmcia/crc32hash.c)
+to determine the crc32 hash.  Simply pass the string you want to evaluate
+as argument to this program, e.g.:
+$ tools/pcmcia/crc32hash "Dual Speed"
diff --git a/Documentation/pcmcia/devicetable.txt b/Documentation/pcmcia/devicetable.txt
deleted file mode 100644
index 5f3e00ab54c4..000000000000
--- a/Documentation/pcmcia/devicetable.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Matching of PCMCIA devices to drivers is done using one or more of the
-following criteria:
-
-- manufactor ID
-- card ID
-- product ID strings _and_ hashes of these strings
-- function ID
-- device function (actual and pseudo)
-
-You should use the helpers in include/pcmcia/device_id.h for generating the
-struct pcmcia_device_id[] entries which match devices to drivers.
-
-If you want to match product ID strings, you also need to pass the crc32
-hashes of the string to the macro, e.g. if you want to match the product ID
-string 1, you need to use
-
-PCMCIA_DEVICE_PROD_ID1("some_string", 0x(hash_of_some_string)),
-
-If the hash is incorrect, the kernel will inform you about this in "dmesg"
-upon module initialization, and tell you of the correct hash.
-
-You can determine the hash of the product ID strings by catting the file
-"modalias" in the sysfs directory of the PCMCIA device. It generates a string
-in the following form:
-pcmcia:m0149cC1ABf06pfn00fn00pa725B842DpbF1EFEE84pc0877B627pd00000000
-
-The hex value after "pa" is the hash of product ID string 1, after "pb" for
-string 2 and so on.
-
-Alternatively, you can use crc32hash (see tools/pcmcia/crc32hash.c)
-to determine the crc32 hash.  Simply pass the string you want to evaluate
-as argument to this program, e.g.:
-$ tools/pcmcia/crc32hash "Dual Speed"
diff --git a/Documentation/pcmcia/driver-changes.rst b/Documentation/pcmcia/driver-changes.rst
new file mode 100644
index 000000000000..33fe9ebec049
--- /dev/null
+++ b/Documentation/pcmcia/driver-changes.rst
@@ -0,0 +1,160 @@
+==============
+Driver changes
+==============
+
+This file details changes in 2.6 which affect PCMCIA card driver authors:
+
+* pcmcia_loop_config() and autoconfiguration (as of 2.6.36)
+   If `struct pcmcia_device *p_dev->config_flags` is set accordingly,
+   pcmcia_loop_config() now sets up certain configuration values
+   automatically, though the driver may still override the settings
+   in the callback function. The following autoconfiguration options
+   are provided at the moment:
+
+	- CONF_AUTO_CHECK_VCC : check for matching Vcc
+	- CONF_AUTO_SET_VPP   : set Vpp
+	- CONF_AUTO_AUDIO     : auto-enable audio line, if required
+	- CONF_AUTO_SET_IO    : set ioport resources (->resource[0,1])
+	- CONF_AUTO_SET_IOMEM : set first iomem resource (->resource[2])
+
+* pcmcia_request_configuration -> pcmcia_enable_device (as of 2.6.36)
+   pcmcia_request_configuration() got renamed to pcmcia_enable_device(),
+   as it mirrors pcmcia_disable_device(). Configuration settings are now
+   stored in struct pcmcia_device, e.g. in the fields config_flags,
+   config_index, config_base, vpp.
+
+* pcmcia_request_window changes (as of 2.6.36)
+   Instead of win_req_t, drivers are now requested to fill out
+   `struct pcmcia_device *p_dev->resource[2,3,4,5]` for up to four ioport
+   ranges. After a call to pcmcia_request_window(), the regions found there
+   are reserved and may be used immediately -- until pcmcia_release_window()
+   is called.
+
+* pcmcia_request_io changes (as of 2.6.36)
+   Instead of io_req_t, drivers are now requested to fill out
+   `struct pcmcia_device *p_dev->resource[0,1]` for up to two ioport
+   ranges. After a call to pcmcia_request_io(), the ports found there
+   are reserved, after calling pcmcia_request_configuration(), they may
+   be used.
+
+* No dev_info_t, no cs_types.h (as of 2.6.36)
+   dev_info_t and a few other typedefs are removed. No longer use them
+   in PCMCIA device drivers. Also, do not include pcmcia/cs_types.h, as
+   this file is gone.
+
+* No dev_node_t (as of 2.6.35)
+   There is no more need to fill out a "dev_node_t" structure.
+
+* New IRQ request rules (as of 2.6.35)
+   Instead of the old pcmcia_request_irq() interface, drivers may now
+   choose between:
+
+   - calling request_irq/free_irq directly. Use the IRQ from `*p_dev->irq`.
+   - use pcmcia_request_irq(p_dev, handler_t); the PCMCIA core will
+     clean up automatically on calls to pcmcia_disable_device() or
+     device ejection.
+
+* no cs_error / CS_CHECK / CONFIG_PCMCIA_DEBUG (as of 2.6.33)
+   Instead of the cs_error() callback or the CS_CHECK() macro, please use
+   Linux-style checking of return values, and -- if necessary -- debug
+   messages using "dev_dbg()" or "pr_debug()".
+
+* New CIS tuple access (as of 2.6.33)
+   Instead of pcmcia_get_{first,next}_tuple(), pcmcia_get_tuple_data() and
+   pcmcia_parse_tuple(), a driver shall use "pcmcia_get_tuple()" if it is
+   only interested in one (raw) tuple, or "pcmcia_loop_tuple()" if it is
+   interested in all tuples of one type. To decode the MAC from CISTPL_FUNCE,
+   a new helper "pcmcia_get_mac_from_cis()" was added.
+
+* New configuration loop helper (as of 2.6.28)
+   By calling pcmcia_loop_config(), a driver can iterate over all available
+   configuration options. During a driver's probe() phase, one doesn't need
+   to use pcmcia_get_{first,next}_tuple, pcmcia_get_tuple_data and
+   pcmcia_parse_tuple directly in most if not all cases.
+
+* New release helper (as of 2.6.17)
+   Instead of calling pcmcia_release_{configuration,io,irq,win}, all that's
+   necessary now is calling pcmcia_disable_device. As there is no valid
+   reason left to call pcmcia_release_io and pcmcia_release_irq, the
+   exports for them were removed.
+
+* Unify detach and REMOVAL event code, as well as attach and INSERTION
+  code (as of 2.6.16)::
+
+       void (*remove)          (struct pcmcia_device *dev);
+       int (*probe)            (struct pcmcia_device *dev);
+
+* Move suspend, resume and reset out of event handler (as of 2.6.16)::
+
+       int (*suspend)          (struct pcmcia_device *dev);
+       int (*resume)           (struct pcmcia_device *dev);
+
+  should be initialized in struct pcmcia_driver, and handle
+  (SUSPEND == RESET_PHYSICAL) and (RESUME == CARD_RESET) events
+
+* event handler initialization in struct pcmcia_driver (as of 2.6.13)
+   The event handler is notified of all events, and must be initialized
+   as the event() callback in the driver's struct pcmcia_driver.
+
+* pcmcia/version.h should not be used (as of 2.6.13)
+   This file will be removed eventually.
+
+* in-kernel device<->driver matching (as of 2.6.13)
+   PCMCIA devices and their correct drivers can now be matched in
+   kernelspace. See 'devicetable.txt' for details.
+
+* Device model integration (as of 2.6.11)
+   A struct pcmcia_device is registered with the device model core,
+   and can be used (e.g. for SET_NETDEV_DEV) by using
+   handle_to_dev(client_handle_t * handle).
+
+* Convert internal I/O port addresses to unsigned int (as of 2.6.11)
+   ioaddr_t should be replaced by unsigned int in PCMCIA card drivers.
+
+* irq_mask and irq_list parameters (as of 2.6.11)
+   The irq_mask and irq_list parameters should no longer be used in
+   PCMCIA card drivers. Instead, it is the job of the PCMCIA core to
+   determine which IRQ should be used. Therefore, link->irq.IRQInfo2
+   is ignored.
+
+* client->PendingEvents is gone (as of 2.6.11)
+   client->PendingEvents is no longer available.
+
+* client->Attributes are gone (as of 2.6.11)
+   client->Attributes is unused, therefore it is removed from all
+   PCMCIA card drivers
+
+* core functions no longer available (as of 2.6.11)
+   The following functions have been removed from the kernel source
+   because they are unused by all in-kernel drivers, and no external
+   driver was reported to rely on them::
+
+	pcmcia_get_first_region()
+	pcmcia_get_next_region()
+	pcmcia_modify_window()
+	pcmcia_set_event_mask()
+	pcmcia_get_first_window()
+	pcmcia_get_next_window()
+
+* device list iteration upon module removal (as of 2.6.10)
+   It is no longer necessary to iterate on the driver's internal
+   client list and call the ->detach() function upon module removal.
+
+* Resource management. (as of 2.6.8)
+   Although the PCMCIA subsystem will allocate resources for cards,
+   it no longer marks these resources busy. This means that driver
+   authors are now responsible for claiming your resources as per
+   other drivers in Linux. You should use request_region() to mark
+   your IO regions in-use, and request_mem_region() to mark your
+   memory regions in-use. The name argument should be a pointer to
+   your driver name. Eg, for pcnet_cs, name should point to the
+   string "pcnet_cs".
+
+* CardServices is gone
+  CardServices() in 2.4 is just a big switch statement to call various
+  services.  In 2.6, all of those entry points are exported and called
+  directly (except for pcmcia_report_error(), just use cs_error() instead).
+
+* struct pcmcia_driver
+  You need to use struct pcmcia_driver and pcmcia_{un,}register_driver
+  instead of {un,}register_pccard_driver
diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt
deleted file mode 100644
index 78355c4c268a..000000000000
--- a/Documentation/pcmcia/driver-changes.txt
+++ /dev/null
@@ -1,149 +0,0 @@
-This file details changes in 2.6 which affect PCMCIA card driver authors:
-* pcmcia_loop_config() and autoconfiguration (as of 2.6.36)
-   If struct pcmcia_device *p_dev->config_flags is set accordingly,
-   pcmcia_loop_config() now sets up certain configuration values
-   automatically, though the driver may still override the settings
-   in the callback function. The following autoconfiguration options
-   are provided at the moment:
-	CONF_AUTO_CHECK_VCC : check for matching Vcc
-	CONF_AUTO_SET_VPP   : set Vpp
-	CONF_AUTO_AUDIO     : auto-enable audio line, if required
-	CONF_AUTO_SET_IO    : set ioport resources (->resource[0,1])
-	CONF_AUTO_SET_IOMEM : set first iomem resource (->resource[2])
-
-* pcmcia_request_configuration -> pcmcia_enable_device (as of 2.6.36)
-   pcmcia_request_configuration() got renamed to pcmcia_enable_device(),
-   as it mirrors pcmcia_disable_device(). Configuration settings are now
-   stored in struct pcmcia_device, e.g. in the fields config_flags,
-   config_index, config_base, vpp.
-
-* pcmcia_request_window changes (as of 2.6.36)
-   Instead of win_req_t, drivers are now requested to fill out
-   struct pcmcia_device *p_dev->resource[2,3,4,5] for up to four ioport
-   ranges. After a call to pcmcia_request_window(), the regions found there
-   are reserved and may be used immediately -- until pcmcia_release_window()
-   is called.
-
-* pcmcia_request_io changes (as of 2.6.36)
-   Instead of io_req_t, drivers are now requested to fill out
-   struct pcmcia_device *p_dev->resource[0,1] for up to two ioport
-   ranges. After a call to pcmcia_request_io(), the ports found there
-   are reserved, after calling pcmcia_request_configuration(), they may
-   be used.
-
-* No dev_info_t, no cs_types.h (as of 2.6.36)
-   dev_info_t and a few other typedefs are removed. No longer use them
-   in PCMCIA device drivers. Also, do not include pcmcia/cs_types.h, as
-   this file is gone.
-
-* No dev_node_t (as of 2.6.35)
-   There is no more need to fill out a "dev_node_t" structure.
-
-* New IRQ request rules (as of 2.6.35)
-   Instead of the old pcmcia_request_irq() interface, drivers may now
-   choose between:
-   - calling request_irq/free_irq directly. Use the IRQ from *p_dev->irq.
-   - use pcmcia_request_irq(p_dev, handler_t); the PCMCIA core will
-     clean up automatically on calls to pcmcia_disable_device() or
-     device ejection.
-
-* no cs_error / CS_CHECK / CONFIG_PCMCIA_DEBUG (as of 2.6.33)
-   Instead of the cs_error() callback or the CS_CHECK() macro, please use
-   Linux-style checking of return values, and -- if necessary -- debug
-   messages using "dev_dbg()" or "pr_debug()".
-
-* New CIS tuple access (as of 2.6.33)
-   Instead of pcmcia_get_{first,next}_tuple(), pcmcia_get_tuple_data() and
-   pcmcia_parse_tuple(), a driver shall use "pcmcia_get_tuple()" if it is
-   only interested in one (raw) tuple, or "pcmcia_loop_tuple()" if it is
-   interested in all tuples of one type. To decode the MAC from CISTPL_FUNCE,
-   a new helper "pcmcia_get_mac_from_cis()" was added.
-
-* New configuration loop helper (as of 2.6.28)
-   By calling pcmcia_loop_config(), a driver can iterate over all available
-   configuration options. During a driver's probe() phase, one doesn't need
-   to use pcmcia_get_{first,next}_tuple, pcmcia_get_tuple_data and
-   pcmcia_parse_tuple directly in most if not all cases.
-
-* New release helper (as of 2.6.17)
-   Instead of calling pcmcia_release_{configuration,io,irq,win}, all that's
-   necessary now is calling pcmcia_disable_device. As there is no valid
-   reason left to call pcmcia_release_io and pcmcia_release_irq, the
-   exports for them were removed.
-
-* Unify detach and REMOVAL event code, as well as attach and INSERTION
-  code (as of 2.6.16)
-       void (*remove)          (struct pcmcia_device *dev);
-       int (*probe)            (struct pcmcia_device *dev);
-
-* Move suspend, resume and reset out of event handler (as of 2.6.16)
-       int (*suspend)          (struct pcmcia_device *dev);
-       int (*resume)           (struct pcmcia_device *dev);
-  should be initialized in struct pcmcia_driver, and handle
-  (SUSPEND == RESET_PHYSICAL) and (RESUME == CARD_RESET) events
-
-* event handler initialization in struct pcmcia_driver (as of 2.6.13)
-   The event handler is notified of all events, and must be initialized
-   as the event() callback in the driver's struct pcmcia_driver.
-
-* pcmcia/version.h should not be used (as of 2.6.13)
-   This file will be removed eventually.
-
-* in-kernel device<->driver matching (as of 2.6.13)
-   PCMCIA devices and their correct drivers can now be matched in
-   kernelspace. See 'devicetable.txt' for details.
-
-* Device model integration (as of 2.6.11)
-   A struct pcmcia_device is registered with the device model core,
-   and can be used (e.g. for SET_NETDEV_DEV) by using
-   handle_to_dev(client_handle_t * handle).
-
-* Convert internal I/O port addresses to unsigned int (as of 2.6.11)
-   ioaddr_t should be replaced by unsigned int in PCMCIA card drivers.
-
-* irq_mask and irq_list parameters (as of 2.6.11)
-   The irq_mask and irq_list parameters should no longer be used in
-   PCMCIA card drivers. Instead, it is the job of the PCMCIA core to
-   determine which IRQ should be used. Therefore, link->irq.IRQInfo2
-   is ignored.
-
-* client->PendingEvents is gone (as of 2.6.11)
-   client->PendingEvents is no longer available.
-
-* client->Attributes are gone (as of 2.6.11)
-   client->Attributes is unused, therefore it is removed from all
-   PCMCIA card drivers
-
-* core functions no longer available (as of 2.6.11)
-   The following functions have been removed from the kernel source
-   because they are unused by all in-kernel drivers, and no external
-   driver was reported to rely on them:
-	pcmcia_get_first_region()
-	pcmcia_get_next_region()
-	pcmcia_modify_window()
-	pcmcia_set_event_mask()
-	pcmcia_get_first_window()
-	pcmcia_get_next_window()
-
-* device list iteration upon module removal (as of 2.6.10)
-   It is no longer necessary to iterate on the driver's internal
-   client list and call the ->detach() function upon module removal.
-
-* Resource management. (as of 2.6.8)
-   Although the PCMCIA subsystem will allocate resources for cards,
-   it no longer marks these resources busy. This means that driver
-   authors are now responsible for claiming your resources as per
-   other drivers in Linux. You should use request_region() to mark
-   your IO regions in-use, and request_mem_region() to mark your
-   memory regions in-use. The name argument should be a pointer to
-   your driver name. Eg, for pcnet_cs, name should point to the
-   string "pcnet_cs".
-
-* CardServices is gone
-  CardServices() in 2.4 is just a big switch statement to call various
-  services.  In 2.6, all of those entry points are exported and called
-  directly (except for pcmcia_report_error(), just use cs_error() instead).
-
-* struct pcmcia_driver
-  You need to use struct pcmcia_driver and pcmcia_{un,}register_driver
-  instead of {un,}register_pccard_driver
diff --git a/Documentation/pcmcia/driver.rst b/Documentation/pcmcia/driver.rst
new file mode 100644
index 000000000000..5c4fe84d51c1
--- /dev/null
+++ b/Documentation/pcmcia/driver.rst
@@ -0,0 +1,30 @@
+=============
+PCMCIA Driver
+=============
+
+sysfs
+-----
+
+New PCMCIA IDs may be added to a device driver pcmcia_device_id table at
+runtime as shown below::
+
+  echo "match_flags manf_id card_id func_id function device_no \
+  prod_id_hash[0] prod_id_hash[1] prod_id_hash[2] prod_id_hash[3]" > \
+  /sys/bus/pcmcia/drivers/{driver}/new_id
+
+All fields are passed in as hexadecimal values (no leading 0x).
+The meaning is described in the PCMCIA specification, the match_flags is
+a bitwise or-ed combination from PCMCIA_DEV_ID_MATCH_* constants
+defined in include/linux/mod_devicetable.h.
+
+Once added, the driver probe routine will be invoked for any unclaimed
+PCMCIA device listed in its (newly updated) pcmcia_device_id list.
+
+A common use-case is to add a new device according to the manufacturer ID
+and the card ID (form the manf_id and card_id file in the device tree).
+For this, just use::
+
+  echo "0x3 manf_id card_id 0 0 0 0 0 0 0" > \
+    /sys/bus/pcmcia/drivers/{driver}/new_id
+
+after loading the driver.
diff --git a/Documentation/pcmcia/driver.txt b/Documentation/pcmcia/driver.txt
deleted file mode 100644
index 0ac167920778..000000000000
--- a/Documentation/pcmcia/driver.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-PCMCIA Driver
--------------
-
-
-sysfs
------
-
-New PCMCIA IDs may be added to a device driver pcmcia_device_id table at
-runtime as shown below:
-
-echo "match_flags manf_id card_id func_id function device_no \
-prod_id_hash[0] prod_id_hash[1] prod_id_hash[2] prod_id_hash[3]" > \
-/sys/bus/pcmcia/drivers/{driver}/new_id
-
-All fields are passed in as hexadecimal values (no leading 0x).
-The meaning is described in the PCMCIA specification, the match_flags is
-a bitwise or-ed combination from PCMCIA_DEV_ID_MATCH_* constants
-defined in include/linux/mod_devicetable.h.
-
-Once added, the driver probe routine will be invoked for any unclaimed
-PCMCIA device listed in its (newly updated) pcmcia_device_id list.
-
-A common use-case is to add a new device according to the manufacturer ID
-and the card ID (form the manf_id and card_id file in the device tree).
-For this, just use:
-
-echo "0x3 manf_id card_id 0 0 0 0 0 0 0" > \
-        /sys/bus/pcmcia/drivers/{driver}/new_id
-
-after loading the driver.
diff --git a/Documentation/pcmcia/index.rst b/Documentation/pcmcia/index.rst
new file mode 100644
index 000000000000..779c8527109e
--- /dev/null
+++ b/Documentation/pcmcia/index.rst
@@ -0,0 +1,20 @@
+:orphan:
+
+======
+pcmcia
+======
+
+.. toctree::
+    :maxdepth: 1
+
+    driver
+    devicetable
+    locking
+    driver-changes
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/pcmcia/locking.rst b/Documentation/pcmcia/locking.rst
new file mode 100644
index 000000000000..e35257139c89
--- /dev/null
+++ b/Documentation/pcmcia/locking.rst
@@ -0,0 +1,133 @@
+=======
+Locking
+=======
+
+This file explains the locking and exclusion scheme used in the PCCARD
+and PCMCIA subsystems.
+
+
+A) Overview, Locking Hierarchy:
+===============================
+
+pcmcia_socket_list_rwsem
+	- protects only the list of sockets
+
+- skt_mutex
+	- serializes card insert / ejection
+
+  - ops_mutex
+	- serializes socket operation
+
+
+B) Exclusion
+============
+
+The following functions and callbacks to struct pcmcia_socket must
+be called with "skt_mutex" held::
+
+	socket_detect_change()
+	send_event()
+	socket_reset()
+	socket_shutdown()
+	socket_setup()
+	socket_remove()
+	socket_insert()
+	socket_early_resume()
+	socket_late_resume()
+	socket_resume()
+	socket_suspend()
+
+	struct pcmcia_callback	*callback
+
+The following functions and callbacks to struct pcmcia_socket must
+be called with "ops_mutex" held::
+
+	socket_reset()
+	socket_setup()
+
+	struct pccard_operations	*ops
+	struct pccard_resource_ops	*resource_ops;
+
+Note that send_event() and `struct pcmcia_callback *callback` must not be
+called with "ops_mutex" held.
+
+
+C) Protection
+=============
+
+1. Global Data:
+---------------
+struct list_head	pcmcia_socket_list;
+
+protected by pcmcia_socket_list_rwsem;
+
+
+2. Per-Socket Data:
+-------------------
+The resource_ops and their data are protected by ops_mutex.
+
+The "main" struct pcmcia_socket is protected as follows (read-only fields
+or single-use fields not mentioned):
+
+- by pcmcia_socket_list_rwsem::
+
+	struct list_head	socket_list;
+
+- by thread_lock::
+
+	unsigned int		thread_events;
+
+- by skt_mutex::
+
+	u_int			suspended_state;
+	void			(*tune_bridge);
+	struct pcmcia_callback	*callback;
+	int			resume_status;
+
+- by ops_mutex::
+
+	socket_state_t		socket;
+	u_int			state;
+	u_short			lock_count;
+	pccard_mem_map		cis_mem;
+	void __iomem 		*cis_virt;
+	struct { }		irq;
+	io_window_t		io[];
+	pccard_mem_map		win[];
+	struct list_head	cis_cache;
+	size_t			fake_cis_len;
+	u8			*fake_cis;
+	u_int			irq_mask;
+	void 			(*zoom_video);
+	int 			(*power_hook);
+	u8			resource...;
+	struct list_head	devices_list;
+	u8			device_count;
+	struct 			pcmcia_state;
+
+
+3. Per PCMCIA-device Data:
+--------------------------
+
+The "main" struct pcmcia_device is protected as follows (read-only fields
+or single-use fields not mentioned):
+
+
+- by pcmcia_socket->ops_mutex::
+
+	struct list_head	socket_device_list;
+	struct config_t		*function_config;
+	u16			_irq:1;
+	u16			_io:1;
+	u16			_win:4;
+	u16			_locked:1;
+	u16			allow_func_id_match:1;
+	u16			suspended:1;
+	u16			_removed:1;
+
+- by the PCMCIA driver::
+
+	io_req_t		io;
+	irq_req_t		irq;
+	config_req_t		conf;
+	window_handle_t		win;
diff --git a/Documentation/pcmcia/locking.txt b/Documentation/pcmcia/locking.txt
deleted file mode 100644
index b2c9b478906b..000000000000
--- a/Documentation/pcmcia/locking.txt
+++ /dev/null
@@ -1,118 +0,0 @@
-This file explains the locking and exclusion scheme used in the PCCARD
-and PCMCIA subsystems.
-
-
-A) Overview, Locking Hierarchy:
-===============================
-
-pcmcia_socket_list_rwsem	- protects only the list of sockets
-- skt_mutex			- serializes card insert / ejection
-  - ops_mutex			- serializes socket operation
-
-
-B) Exclusion
-============
-
-The following functions and callbacks to struct pcmcia_socket must
-be called with "skt_mutex" held:
-
-	socket_detect_change()
-	send_event()
-	socket_reset()
-	socket_shutdown()
-	socket_setup()
-	socket_remove()
-	socket_insert()
-	socket_early_resume()
-	socket_late_resume()
-	socket_resume()
-	socket_suspend()
-
-	struct pcmcia_callback	*callback
-
-The following functions and callbacks to struct pcmcia_socket must
-be called with "ops_mutex" held:
-
-	socket_reset()
-	socket_setup()
-
-	struct pccard_operations	*ops
-	struct pccard_resource_ops	*resource_ops;
-
-Note that send_event() and struct pcmcia_callback *callback must not be
-called with "ops_mutex" held.
-
-
-C) Protection
-=============
-
-1. Global Data:
----------------
-struct list_head	pcmcia_socket_list;
-
-protected by pcmcia_socket_list_rwsem;
-
-
-2. Per-Socket Data:
--------------------
-The resource_ops and their data are protected by ops_mutex.
-
-The "main" struct pcmcia_socket is protected as follows (read-only fields
-or single-use fields not mentioned):
-
-- by pcmcia_socket_list_rwsem:
-	struct list_head	socket_list;
-
-- by thread_lock:
-	unsigned int		thread_events;
-
-- by skt_mutex:
-	u_int			suspended_state;
-	void			(*tune_bridge);
-	struct pcmcia_callback	*callback;
-	int			resume_status;
-
-- by ops_mutex:
-	socket_state_t		socket;
-	u_int			state;
-	u_short			lock_count;
-	pccard_mem_map		cis_mem;
-	void __iomem 		*cis_virt;
-	struct { }		irq;
-	io_window_t		io[];
-	pccard_mem_map		win[];
-	struct list_head	cis_cache;
-	size_t			fake_cis_len;
-	u8			*fake_cis;
-	u_int			irq_mask;
-	void 			(*zoom_video);
-	int 			(*power_hook);
-	u8			resource...;
-	struct list_head	devices_list;
-	u8			device_count;
-	struct 			pcmcia_state;
-
-
-3. Per PCMCIA-device Data:
---------------------------
-
-The "main" struct pcmcia_device is protected as follows (read-only fields
-or single-use fields not mentioned):
-
-
-- by pcmcia_socket->ops_mutex:
-	struct list_head	socket_device_list;
-	struct config_t		*function_config;
-	u16			_irq:1;
-	u16			_io:1;
-	u16			_win:4;
-	u16			_locked:1;
-	u16			allow_func_id_match:1;
-	u16			suspended:1;
-	u16			_removed:1;
-
-- by the PCMCIA driver:
-	io_req_t		io;
-	irq_req_t		irq;
-	config_req_t		conf;
-	window_handle_t		win;
-- 
cgit 


From 99c8b231ae6c6ca4ca2fd1c0b3701071f589661f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:41 -0300
Subject: docs: cgroup-v1: convert docs to ReST and rename to *.rst

Convert the cgroup-v1 files to ReST format, in order to
allow a later addition to the admin-guide.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/admin-guide/hw-vuln/l1tf.rst         |    2 +-
 Documentation/admin-guide/kernel-parameters.txt    |    4 +-
 .../admin-guide/mm/numa_memory_policy.rst          |    2 +-
 Documentation/block/bfq-iosched.txt                |    2 +-
 Documentation/cgroup-v1/blkio-controller.rst       |  391 ++++++++
 Documentation/cgroup-v1/blkio-controller.txt       |  375 --------
 Documentation/cgroup-v1/cgroups.rst                |  695 ++++++++++++++
 Documentation/cgroup-v1/cgroups.txt                |  677 -------------
 Documentation/cgroup-v1/cpuacct.rst                |   50 +
 Documentation/cgroup-v1/cpuacct.txt                |   49 -
 Documentation/cgroup-v1/cpusets.rst                |  866 +++++++++++++++++
 Documentation/cgroup-v1/cpusets.txt                |  839 ----------------
 Documentation/cgroup-v1/devices.rst                |  132 +++
 Documentation/cgroup-v1/devices.txt                |  116 ---
 Documentation/cgroup-v1/freezer-subsystem.rst      |  127 +++
 Documentation/cgroup-v1/freezer-subsystem.txt      |  123 ---
 Documentation/cgroup-v1/hugetlb.rst                |   50 +
 Documentation/cgroup-v1/hugetlb.txt                |   49 -
 Documentation/cgroup-v1/index.rst                  |   30 +
 Documentation/cgroup-v1/memcg_test.rst             |  355 +++++++
 Documentation/cgroup-v1/memcg_test.txt             |  280 ------
 Documentation/cgroup-v1/memory.rst                 | 1003 ++++++++++++++++++++
 Documentation/cgroup-v1/memory.txt                 |  892 -----------------
 Documentation/cgroup-v1/net_cls.rst                |   44 +
 Documentation/cgroup-v1/net_cls.txt                |   39 -
 Documentation/cgroup-v1/net_prio.rst               |   57 ++
 Documentation/cgroup-v1/net_prio.txt               |   55 --
 Documentation/cgroup-v1/pids.rst                   |   92 ++
 Documentation/cgroup-v1/pids.txt                   |   88 --
 Documentation/cgroup-v1/rdma.rst                   |  117 +++
 Documentation/cgroup-v1/rdma.txt                   |  109 ---
 Documentation/filesystems/tmpfs.txt                |    2 +-
 Documentation/scheduler/sched-deadline.txt         |    2 +-
 Documentation/scheduler/sched-design-CFS.txt       |    2 +-
 Documentation/scheduler/sched-rt-group.txt         |    2 +-
 Documentation/vm/numa.rst                          |    4 +-
 Documentation/vm/page_migration.rst                |    2 +-
 Documentation/vm/unevictable-lru.rst               |    2 +-
 Documentation/x86/x86_64/fake-numa-for-cpusets.rst |    4 +-
 39 files changed, 4024 insertions(+), 3706 deletions(-)
 create mode 100644 Documentation/cgroup-v1/blkio-controller.rst
 delete mode 100644 Documentation/cgroup-v1/blkio-controller.txt
 create mode 100644 Documentation/cgroup-v1/cgroups.rst
 delete mode 100644 Documentation/cgroup-v1/cgroups.txt
 create mode 100644 Documentation/cgroup-v1/cpuacct.rst
 delete mode 100644 Documentation/cgroup-v1/cpuacct.txt
 create mode 100644 Documentation/cgroup-v1/cpusets.rst
 delete mode 100644 Documentation/cgroup-v1/cpusets.txt
 create mode 100644 Documentation/cgroup-v1/devices.rst
 delete mode 100644 Documentation/cgroup-v1/devices.txt
 create mode 100644 Documentation/cgroup-v1/freezer-subsystem.rst
 delete mode 100644 Documentation/cgroup-v1/freezer-subsystem.txt
 create mode 100644 Documentation/cgroup-v1/hugetlb.rst
 delete mode 100644 Documentation/cgroup-v1/hugetlb.txt
 create mode 100644 Documentation/cgroup-v1/index.rst
 create mode 100644 Documentation/cgroup-v1/memcg_test.rst
 delete mode 100644 Documentation/cgroup-v1/memcg_test.txt
 create mode 100644 Documentation/cgroup-v1/memory.rst
 delete mode 100644 Documentation/cgroup-v1/memory.txt
 create mode 100644 Documentation/cgroup-v1/net_cls.rst
 delete mode 100644 Documentation/cgroup-v1/net_cls.txt
 create mode 100644 Documentation/cgroup-v1/net_prio.rst
 delete mode 100644 Documentation/cgroup-v1/net_prio.txt
 create mode 100644 Documentation/cgroup-v1/pids.rst
 delete mode 100644 Documentation/cgroup-v1/pids.txt
 create mode 100644 Documentation/cgroup-v1/rdma.rst
 delete mode 100644 Documentation/cgroup-v1/rdma.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/hw-vuln/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst
index 31653a9f0e1b..656aee262e23 100644
--- a/Documentation/admin-guide/hw-vuln/l1tf.rst
+++ b/Documentation/admin-guide/hw-vuln/l1tf.rst
@@ -241,7 +241,7 @@ Guest mitigation mechanisms
    For further information about confining guests to a single or to a group
    of cores consult the cpusets documentation:
 
-   https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt
+   https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.rst
 
 .. _interrupt_isolation:
 
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..da0e84ecee32 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4078,7 +4078,7 @@
 
 	relax_domain_level=
 			[KNL, SMP] Set scheduler's default relax_domain_level.
-			See Documentation/cgroup-v1/cpusets.txt.
+			See Documentation/cgroup-v1/cpusets.rst.
 
 	reserve=	[KNL,BUGS] Force kernel to ignore I/O ports or memory
 			Format: <base1>,<size1>[,<base2>,<size2>,...]
@@ -4588,7 +4588,7 @@
 	swapaccount=[0|1]
 			[KNL] Enable accounting of swap in memory resource
 			controller if no parameter or 1 is given or disable
-			it if 0 is given (See Documentation/cgroup-v1/memory.txt)
+			it if 0 is given (See Documentation/cgroup-v1/memory.rst)
 
 	swiotlb=	[ARM,IA-64,PPC,MIPS,X86]
 			Format: { <int> | force | noforce }
diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst
index d78c5b315f72..546f174e5d6a 100644
--- a/Documentation/admin-guide/mm/numa_memory_policy.rst
+++ b/Documentation/admin-guide/mm/numa_memory_policy.rst
@@ -15,7 +15,7 @@ document attempts to describe the concepts and APIs of the 2.6 memory policy
 support.
 
 Memory policies should not be confused with cpusets
-(``Documentation/cgroup-v1/cpusets.txt``)
+(``Documentation/cgroup-v1/cpusets.rst``)
 which is an administrative mechanism for restricting the nodes from which
 memory may be allocated by a set of processes. Memory policies are a
 programming interface that a NUMA-aware application can take advantage of.  When
diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
index 1a0f2ac02eb6..b2265cf6c9c3 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.txt
@@ -539,7 +539,7 @@ As for cgroups-v1 (blkio controller), the exact set of stat files
 created, and kept up-to-date by bfq, depends on whether
 CONFIG_DEBUG_BLK_CGROUP is set. If it is set, then bfq creates all
 the stat files documented in
-Documentation/cgroup-v1/blkio-controller.txt. If, instead,
+Documentation/cgroup-v1/blkio-controller.rst. If, instead,
 CONFIG_DEBUG_BLK_CGROUP is not set, then bfq creates only the files
 blkio.bfq.io_service_bytes
 blkio.bfq.io_service_bytes_recursive
diff --git a/Documentation/cgroup-v1/blkio-controller.rst b/Documentation/cgroup-v1/blkio-controller.rst
new file mode 100644
index 000000000000..2c1b907afc14
--- /dev/null
+++ b/Documentation/cgroup-v1/blkio-controller.rst
@@ -0,0 +1,391 @@
+===================
+Block IO Controller
+===================
+
+Overview
+========
+cgroup subsys "blkio" implements the block io controller. There seems to be
+a need of various kinds of IO control policies (like proportional BW, max BW)
+both at leaf nodes as well as at intermediate nodes in a storage hierarchy.
+Plan is to use the same cgroup based management interface for blkio controller
+and based on user options switch IO policies in the background.
+
+Currently two IO control policies are implemented. First one is proportional
+weight time based division of disk policy. It is implemented in CFQ. Hence
+this policy takes effect only on leaf nodes when CFQ is being used. The second
+one is throttling policy which can be used to specify upper IO rate limits
+on devices. This policy is implemented in generic block layer and can be
+used on leaf nodes as well as higher level logical devices like device mapper.
+
+HOWTO
+=====
+Proportional Weight division of bandwidth
+-----------------------------------------
+You can do a very simple testing of running two dd threads in two different
+cgroups. Here is what you can do.
+
+- Enable Block IO controller::
+
+	CONFIG_BLK_CGROUP=y
+
+- Enable group scheduling in CFQ:
+
+
+	CONFIG_CFQ_GROUP_IOSCHED=y
+
+- Compile and boot into kernel and mount IO controller (blkio); see
+  cgroups.txt, Why are cgroups needed?.
+
+  ::
+
+	mount -t tmpfs cgroup_root /sys/fs/cgroup
+	mkdir /sys/fs/cgroup/blkio
+	mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
+
+- Create two cgroups::
+
+	mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2
+
+- Set weights of group test1 and test2::
+
+	echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight
+	echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight
+
+- Create two same size files (say 512MB each) on same disk (file1, file2) and
+  launch two dd threads in different cgroup to read those files::
+
+	sync
+	echo 3 > /proc/sys/vm/drop_caches
+
+	dd if=/mnt/sdb/zerofile1 of=/dev/null &
+	echo $! > /sys/fs/cgroup/blkio/test1/tasks
+	cat /sys/fs/cgroup/blkio/test1/tasks
+
+	dd if=/mnt/sdb/zerofile2 of=/dev/null &
+	echo $! > /sys/fs/cgroup/blkio/test2/tasks
+	cat /sys/fs/cgroup/blkio/test2/tasks
+
+- At macro level, first dd should finish first. To get more precise data, keep
+  on looking at (with the help of script), at blkio.disk_time and
+  blkio.disk_sectors files of both test1 and test2 groups. This will tell how
+  much disk time (in milliseconds), each group got and how many sectors each
+  group dispatched to the disk. We provide fairness in terms of disk time, so
+  ideally io.disk_time of cgroups should be in proportion to the weight.
+
+Throttling/Upper Limit policy
+-----------------------------
+- Enable Block IO controller::
+
+	CONFIG_BLK_CGROUP=y
+
+- Enable throttling in block layer::
+
+	CONFIG_BLK_DEV_THROTTLING=y
+
+- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)::
+
+        mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
+
+- Specify a bandwidth rate on particular device for root group. The format
+  for policy is "<major>:<minor>  <bytes_per_second>"::
+
+        echo "8:16  1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
+
+  Above will put a limit of 1MB/second on reads happening for root group
+  on device having major/minor number 8:16.
+
+- Run dd to read a file and see if rate is throttled to 1MB/s or not::
+
+        # dd iflag=direct if=/mnt/common/zerofile of=/dev/null bs=4K count=1024
+        1024+0 records in
+        1024+0 records out
+        4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
+
+ Limits for writes can be put using blkio.throttle.write_bps_device file.
+
+Hierarchical Cgroups
+====================
+
+Both CFQ and throttling implement hierarchy support; however,
+throttling's hierarchy support is enabled iff "sane_behavior" is
+enabled from cgroup side, which currently is a development option and
+not publicly available.
+
+If somebody created a hierarchy like as follows::
+
+			root
+			/  \
+		     test1 test2
+			|
+		     test3
+
+CFQ by default and throttling with "sane_behavior" will handle the
+hierarchy correctly.  For details on CFQ hierarchy support, refer to
+Documentation/block/cfq-iosched.txt.  For throttling, all limits apply
+to the whole subtree while all statistics are local to the IOs
+directly generated by tasks in that cgroup.
+
+Throttling without "sane_behavior" enabled from cgroup side will
+practically treat all groups at same level as if it looks like the
+following::
+
+				pivot
+			     /  /   \  \
+			root  test1 test2  test3
+
+Various user visible config options
+===================================
+CONFIG_BLK_CGROUP
+	- Block IO controller.
+
+CONFIG_DEBUG_BLK_CGROUP
+	- Debug help. Right now some additional stats file show up in cgroup
+	  if this option is enabled.
+
+CONFIG_CFQ_GROUP_IOSCHED
+	- Enables group scheduling in CFQ. Currently only 1 level of group
+	  creation is allowed.
+
+CONFIG_BLK_DEV_THROTTLING
+	- Enable block device throttling support in block layer.
+
+Details of cgroup files
+=======================
+Proportional weight policy files
+--------------------------------
+- blkio.weight
+	- Specifies per cgroup weight. This is default weight of the group
+	  on all the devices until and unless overridden by per device rule.
+	  (See blkio.weight_device).
+	  Currently allowed range of weights is from 10 to 1000.
+
+- blkio.weight_device
+	- One can specify per cgroup per device rules using this interface.
+	  These rules override the default value of group weight as specified
+	  by blkio.weight.
+
+	  Following is the format::
+
+	    # echo dev_maj:dev_minor weight > blkio.weight_device
+
+	  Configure weight=300 on /dev/sdb (8:16) in this cgroup::
+
+	    # echo 8:16 300 > blkio.weight_device
+	    # cat blkio.weight_device
+	    dev     weight
+	    8:16    300
+
+	  Configure weight=500 on /dev/sda (8:0) in this cgroup::
+
+	    # echo 8:0 500 > blkio.weight_device
+	    # cat blkio.weight_device
+	    dev     weight
+	    8:0     500
+	    8:16    300
+
+	  Remove specific weight for /dev/sda in this cgroup::
+
+	    # echo 8:0 0 > blkio.weight_device
+	    # cat blkio.weight_device
+	    dev     weight
+	    8:16    300
+
+- blkio.leaf_weight[_device]
+	- Equivalents of blkio.weight[_device] for the purpose of
+          deciding how much weight tasks in the given cgroup has while
+          competing with the cgroup's child cgroups. For details,
+          please refer to Documentation/block/cfq-iosched.txt.
+
+- blkio.time
+	- disk time allocated to cgroup per device in milliseconds. First
+	  two fields specify the major and minor number of the device and
+	  third field specifies the disk time allocated to group in
+	  milliseconds.
+
+- blkio.sectors
+	- number of sectors transferred to/from disk by the group. First
+	  two fields specify the major and minor number of the device and
+	  third field specifies the number of sectors transferred by the
+	  group to/from the device.
+
+- blkio.io_service_bytes
+	- Number of bytes transferred to/from the disk by the group. These
+	  are further divided by the type of operation - read or write, sync
+	  or async. First two fields specify the major and minor number of the
+	  device, third field specifies the operation type and the fourth field
+	  specifies the number of bytes.
+
+- blkio.io_serviced
+	- Number of IOs (bio) issued to the disk by the group. These
+	  are further divided by the type of operation - read or write, sync
+	  or async. First two fields specify the major and minor number of the
+	  device, third field specifies the operation type and the fourth field
+	  specifies the number of IOs.
+
+- blkio.io_service_time
+	- Total amount of time between request dispatch and request completion
+	  for the IOs done by this cgroup. This is in nanoseconds to make it
+	  meaningful for flash devices too. For devices with queue depth of 1,
+	  this time represents the actual service time. When queue_depth > 1,
+	  that is no longer true as requests may be served out of order. This
+	  may cause the service time for a given IO to include the service time
+	  of multiple IOs when served out of order which may result in total
+	  io_service_time > actual time elapsed. This time is further divided by
+	  the type of operation - read or write, sync or async. First two fields
+	  specify the major and minor number of the device, third field
+	  specifies the operation type and the fourth field specifies the
+	  io_service_time in ns.
+
+- blkio.io_wait_time
+	- Total amount of time the IOs for this cgroup spent waiting in the
+	  scheduler queues for service. This can be greater than the total time
+	  elapsed since it is cumulative io_wait_time for all IOs. It is not a
+	  measure of total time the cgroup spent waiting but rather a measure of
+	  the wait_time for its individual IOs. For devices with queue_depth > 1
+	  this metric does not include the time spent waiting for service once
+	  the IO is dispatched to the device but till it actually gets serviced
+	  (there might be a time lag here due to re-ordering of requests by the
+	  device). This is in nanoseconds to make it meaningful for flash
+	  devices too. This time is further divided by the type of operation -
+	  read or write, sync or async. First two fields specify the major and
+	  minor number of the device, third field specifies the operation type
+	  and the fourth field specifies the io_wait_time in ns.
+
+- blkio.io_merged
+	- Total number of bios/requests merged into requests belonging to this
+	  cgroup. This is further divided by the type of operation - read or
+	  write, sync or async.
+
+- blkio.io_queued
+	- Total number of requests queued up at any given instant for this
+	  cgroup. This is further divided by the type of operation - read or
+	  write, sync or async.
+
+- blkio.avg_queue_size
+	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
+	  The average queue size for this cgroup over the entire time of this
+	  cgroup's existence. Queue size samples are taken each time one of the
+	  queues of this cgroup gets a timeslice.
+
+- blkio.group_wait_time
+	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
+	  This is the amount of time the cgroup had to wait since it became busy
+	  (i.e., went from 0 to 1 request queued) to get a timeslice for one of
+	  its queues. This is different from the io_wait_time which is the
+	  cumulative total of the amount of time spent by each IO in that cgroup
+	  waiting in the scheduler queue. This is in nanoseconds. If this is
+	  read when the cgroup is in a waiting (for timeslice) state, the stat
+	  will only report the group_wait_time accumulated till the last time it
+	  got a timeslice and will not include the current delta.
+
+- blkio.empty_time
+	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
+	  This is the amount of time a cgroup spends without any pending
+	  requests when not being served, i.e., it does not include any time
+	  spent idling for one of the queues of the cgroup. This is in
+	  nanoseconds. If this is read when the cgroup is in an empty state,
+	  the stat will only report the empty_time accumulated till the last
+	  time it had a pending request and will not include the current delta.
+
+- blkio.idle_time
+	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
+	  This is the amount of time spent by the IO scheduler idling for a
+	  given cgroup in anticipation of a better request than the existing ones
+	  from other queues/cgroups. This is in nanoseconds. If this is read
+	  when the cgroup is in an idling state, the stat will only report the
+	  idle_time accumulated till the last idle period and will not include
+	  the current delta.
+
+- blkio.dequeue
+	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. This
+	  gives the statistics about how many a times a group was dequeued
+	  from service tree of the device. First two fields specify the major
+	  and minor number of the device and third field specifies the number
+	  of times a group was dequeued from a particular device.
+
+- blkio.*_recursive
+	- Recursive version of various stats. These files show the
+          same information as their non-recursive counterparts but
+          include stats from all the descendant cgroups.
+
+Throttling/Upper limit policy files
+-----------------------------------
+- blkio.throttle.read_bps_device
+	- Specifies upper limit on READ rate from the device. IO rate is
+	  specified in bytes per second. Rules are per device. Following is
+	  the format::
+
+	    echo "<major>:<minor>  <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
+
+- blkio.throttle.write_bps_device
+	- Specifies upper limit on WRITE rate to the device. IO rate is
+	  specified in bytes per second. Rules are per device. Following is
+	  the format::
+
+	    echo "<major>:<minor>  <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
+
+- blkio.throttle.read_iops_device
+	- Specifies upper limit on READ rate from the device. IO rate is
+	  specified in IO per second. Rules are per device. Following is
+	  the format::
+
+	   echo "<major>:<minor>  <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
+
+- blkio.throttle.write_iops_device
+	- Specifies upper limit on WRITE rate to the device. IO rate is
+	  specified in io per second. Rules are per device. Following is
+	  the format::
+
+	    echo "<major>:<minor>  <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
+
+Note: If both BW and IOPS rules are specified for a device, then IO is
+      subjected to both the constraints.
+
+- blkio.throttle.io_serviced
+	- Number of IOs (bio) issued to the disk by the group. These
+	  are further divided by the type of operation - read or write, sync
+	  or async. First two fields specify the major and minor number of the
+	  device, third field specifies the operation type and the fourth field
+	  specifies the number of IOs.
+
+- blkio.throttle.io_service_bytes
+	- Number of bytes transferred to/from the disk by the group. These
+	  are further divided by the type of operation - read or write, sync
+	  or async. First two fields specify the major and minor number of the
+	  device, third field specifies the operation type and the fourth field
+	  specifies the number of bytes.
+
+Common files among various policies
+-----------------------------------
+- blkio.reset_stats
+	- Writing an int to this file will result in resetting all the stats
+	  for that cgroup.
+
+CFQ sysfs tunable
+=================
+/sys/block/<disk>/queue/iosched/slice_idle
+------------------------------------------
+On a faster hardware CFQ can be slow, especially with sequential workload.
+This happens because CFQ idles on a single queue and single queue might not
+drive deeper request queue depths to keep the storage busy. In such scenarios
+one can try setting slice_idle=0 and that would switch CFQ to IOPS
+(IO operations per second) mode on NCQ supporting hardware.
+
+That means CFQ will not idle between cfq queues of a cfq group and hence be
+able to driver higher queue depth and achieve better throughput. That also
+means that cfq provides fairness among groups in terms of IOPS and not in
+terms of disk time.
+
+/sys/block/<disk>/queue/iosched/group_idle
+------------------------------------------
+If one disables idling on individual cfq queues and cfq service trees by
+setting slice_idle=0, group_idle kicks in. That means CFQ will still idle
+on the group in an attempt to provide fairness among groups.
+
+By default group_idle is same as slice_idle and does not do anything if
+slice_idle is enabled.
+
+One can experience an overall throughput drop if you have created multiple
+groups and put applications in that group which are not driving enough
+IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle
+on individual groups and throughput should improve.
diff --git a/Documentation/cgroup-v1/blkio-controller.txt b/Documentation/cgroup-v1/blkio-controller.txt
deleted file mode 100644
index 673dc34d3f78..000000000000
--- a/Documentation/cgroup-v1/blkio-controller.txt
+++ /dev/null
@@ -1,375 +0,0 @@
-				Block IO Controller
-				===================
-Overview
-========
-cgroup subsys "blkio" implements the block io controller. There seems to be
-a need of various kinds of IO control policies (like proportional BW, max BW)
-both at leaf nodes as well as at intermediate nodes in a storage hierarchy.
-Plan is to use the same cgroup based management interface for blkio controller
-and based on user options switch IO policies in the background.
-
-Currently two IO control policies are implemented. First one is proportional
-weight time based division of disk policy. It is implemented in CFQ. Hence
-this policy takes effect only on leaf nodes when CFQ is being used. The second
-one is throttling policy which can be used to specify upper IO rate limits
-on devices. This policy is implemented in generic block layer and can be
-used on leaf nodes as well as higher level logical devices like device mapper.
-
-HOWTO
-=====
-Proportional Weight division of bandwidth
------------------------------------------
-You can do a very simple testing of running two dd threads in two different
-cgroups. Here is what you can do.
-
-- Enable Block IO controller
-	CONFIG_BLK_CGROUP=y
-
-- Enable group scheduling in CFQ
-	CONFIG_CFQ_GROUP_IOSCHED=y
-
-- Compile and boot into kernel and mount IO controller (blkio); see
-  cgroups.txt, Why are cgroups needed?.
-
-	mount -t tmpfs cgroup_root /sys/fs/cgroup
-	mkdir /sys/fs/cgroup/blkio
-	mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
-
-- Create two cgroups
-	mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2
-
-- Set weights of group test1 and test2
-	echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight
-	echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight
-
-- Create two same size files (say 512MB each) on same disk (file1, file2) and
-  launch two dd threads in different cgroup to read those files.
-
-	sync
-	echo 3 > /proc/sys/vm/drop_caches
-
-	dd if=/mnt/sdb/zerofile1 of=/dev/null &
-	echo $! > /sys/fs/cgroup/blkio/test1/tasks
-	cat /sys/fs/cgroup/blkio/test1/tasks
-
-	dd if=/mnt/sdb/zerofile2 of=/dev/null &
-	echo $! > /sys/fs/cgroup/blkio/test2/tasks
-	cat /sys/fs/cgroup/blkio/test2/tasks
-
-- At macro level, first dd should finish first. To get more precise data, keep
-  on looking at (with the help of script), at blkio.disk_time and
-  blkio.disk_sectors files of both test1 and test2 groups. This will tell how
-  much disk time (in milliseconds), each group got and how many sectors each
-  group dispatched to the disk. We provide fairness in terms of disk time, so
-  ideally io.disk_time of cgroups should be in proportion to the weight.
-
-Throttling/Upper Limit policy
------------------------------
-- Enable Block IO controller
-	CONFIG_BLK_CGROUP=y
-
-- Enable throttling in block layer
-	CONFIG_BLK_DEV_THROTTLING=y
-
-- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)
-        mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
-
-- Specify a bandwidth rate on particular device for root group. The format
-  for policy is "<major>:<minor>  <bytes_per_second>".
-
-        echo "8:16  1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
-
-  Above will put a limit of 1MB/second on reads happening for root group
-  on device having major/minor number 8:16.
-
-- Run dd to read a file and see if rate is throttled to 1MB/s or not.
-
-        # dd iflag=direct if=/mnt/common/zerofile of=/dev/null bs=4K count=1024
-        1024+0 records in
-        1024+0 records out
-        4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
-
- Limits for writes can be put using blkio.throttle.write_bps_device file.
-
-Hierarchical Cgroups
-====================
-
-Both CFQ and throttling implement hierarchy support; however,
-throttling's hierarchy support is enabled iff "sane_behavior" is
-enabled from cgroup side, which currently is a development option and
-not publicly available.
-
-If somebody created a hierarchy like as follows.
-
-			root
-			/  \
-		     test1 test2
-			|
-		     test3
-
-CFQ by default and throttling with "sane_behavior" will handle the
-hierarchy correctly.  For details on CFQ hierarchy support, refer to
-Documentation/block/cfq-iosched.txt.  For throttling, all limits apply
-to the whole subtree while all statistics are local to the IOs
-directly generated by tasks in that cgroup.
-
-Throttling without "sane_behavior" enabled from cgroup side will
-practically treat all groups at same level as if it looks like the
-following.
-
-				pivot
-			     /  /   \  \
-			root  test1 test2  test3
-
-Various user visible config options
-===================================
-CONFIG_BLK_CGROUP
-	- Block IO controller.
-
-CONFIG_DEBUG_BLK_CGROUP
-	- Debug help. Right now some additional stats file show up in cgroup
-	  if this option is enabled.
-
-CONFIG_CFQ_GROUP_IOSCHED
-	- Enables group scheduling in CFQ. Currently only 1 level of group
-	  creation is allowed.
-
-CONFIG_BLK_DEV_THROTTLING
-	- Enable block device throttling support in block layer.
-
-Details of cgroup files
-=======================
-Proportional weight policy files
---------------------------------
-- blkio.weight
-	- Specifies per cgroup weight. This is default weight of the group
-	  on all the devices until and unless overridden by per device rule.
-	  (See blkio.weight_device).
-	  Currently allowed range of weights is from 10 to 1000.
-
-- blkio.weight_device
-	- One can specify per cgroup per device rules using this interface.
-	  These rules override the default value of group weight as specified
-	  by blkio.weight.
-
-	  Following is the format.
-
-	  # echo dev_maj:dev_minor weight > blkio.weight_device
-	  Configure weight=300 on /dev/sdb (8:16) in this cgroup
-	  # echo 8:16 300 > blkio.weight_device
-	  # cat blkio.weight_device
-	  dev     weight
-	  8:16    300
-
-	  Configure weight=500 on /dev/sda (8:0) in this cgroup
-	  # echo 8:0 500 > blkio.weight_device
-	  # cat blkio.weight_device
-	  dev     weight
-	  8:0     500
-	  8:16    300
-
-	  Remove specific weight for /dev/sda in this cgroup
-	  # echo 8:0 0 > blkio.weight_device
-	  # cat blkio.weight_device
-	  dev     weight
-	  8:16    300
-
-- blkio.leaf_weight[_device]
-	- Equivalents of blkio.weight[_device] for the purpose of
-          deciding how much weight tasks in the given cgroup has while
-          competing with the cgroup's child cgroups. For details,
-          please refer to Documentation/block/cfq-iosched.txt.
-
-- blkio.time
-	- disk time allocated to cgroup per device in milliseconds. First
-	  two fields specify the major and minor number of the device and
-	  third field specifies the disk time allocated to group in
-	  milliseconds.
-
-- blkio.sectors
-	- number of sectors transferred to/from disk by the group. First
-	  two fields specify the major and minor number of the device and
-	  third field specifies the number of sectors transferred by the
-	  group to/from the device.
-
-- blkio.io_service_bytes
-	- Number of bytes transferred to/from the disk by the group. These
-	  are further divided by the type of operation - read or write, sync
-	  or async. First two fields specify the major and minor number of the
-	  device, third field specifies the operation type and the fourth field
-	  specifies the number of bytes.
-
-- blkio.io_serviced
-	- Number of IOs (bio) issued to the disk by the group. These
-	  are further divided by the type of operation - read or write, sync
-	  or async. First two fields specify the major and minor number of the
-	  device, third field specifies the operation type and the fourth field
-	  specifies the number of IOs.
-
-- blkio.io_service_time
-	- Total amount of time between request dispatch and request completion
-	  for the IOs done by this cgroup. This is in nanoseconds to make it
-	  meaningful for flash devices too. For devices with queue depth of 1,
-	  this time represents the actual service time. When queue_depth > 1,
-	  that is no longer true as requests may be served out of order. This
-	  may cause the service time for a given IO to include the service time
-	  of multiple IOs when served out of order which may result in total
-	  io_service_time > actual time elapsed. This time is further divided by
-	  the type of operation - read or write, sync or async. First two fields
-	  specify the major and minor number of the device, third field
-	  specifies the operation type and the fourth field specifies the
-	  io_service_time in ns.
-
-- blkio.io_wait_time
-	- Total amount of time the IOs for this cgroup spent waiting in the
-	  scheduler queues for service. This can be greater than the total time
-	  elapsed since it is cumulative io_wait_time for all IOs. It is not a
-	  measure of total time the cgroup spent waiting but rather a measure of
-	  the wait_time for its individual IOs. For devices with queue_depth > 1
-	  this metric does not include the time spent waiting for service once
-	  the IO is dispatched to the device but till it actually gets serviced
-	  (there might be a time lag here due to re-ordering of requests by the
-	  device). This is in nanoseconds to make it meaningful for flash
-	  devices too. This time is further divided by the type of operation -
-	  read or write, sync or async. First two fields specify the major and
-	  minor number of the device, third field specifies the operation type
-	  and the fourth field specifies the io_wait_time in ns.
-
-- blkio.io_merged
-	- Total number of bios/requests merged into requests belonging to this
-	  cgroup. This is further divided by the type of operation - read or
-	  write, sync or async.
-
-- blkio.io_queued
-	- Total number of requests queued up at any given instant for this
-	  cgroup. This is further divided by the type of operation - read or
-	  write, sync or async.
-
-- blkio.avg_queue_size
-	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
-	  The average queue size for this cgroup over the entire time of this
-	  cgroup's existence. Queue size samples are taken each time one of the
-	  queues of this cgroup gets a timeslice.
-
-- blkio.group_wait_time
-	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
-	  This is the amount of time the cgroup had to wait since it became busy
-	  (i.e., went from 0 to 1 request queued) to get a timeslice for one of
-	  its queues. This is different from the io_wait_time which is the
-	  cumulative total of the amount of time spent by each IO in that cgroup
-	  waiting in the scheduler queue. This is in nanoseconds. If this is
-	  read when the cgroup is in a waiting (for timeslice) state, the stat
-	  will only report the group_wait_time accumulated till the last time it
-	  got a timeslice and will not include the current delta.
-
-- blkio.empty_time
-	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
-	  This is the amount of time a cgroup spends without any pending
-	  requests when not being served, i.e., it does not include any time
-	  spent idling for one of the queues of the cgroup. This is in
-	  nanoseconds. If this is read when the cgroup is in an empty state,
-	  the stat will only report the empty_time accumulated till the last
-	  time it had a pending request and will not include the current delta.
-
-- blkio.idle_time
-	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
-	  This is the amount of time spent by the IO scheduler idling for a
-	  given cgroup in anticipation of a better request than the existing ones
-	  from other queues/cgroups. This is in nanoseconds. If this is read
-	  when the cgroup is in an idling state, the stat will only report the
-	  idle_time accumulated till the last idle period and will not include
-	  the current delta.
-
-- blkio.dequeue
-	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. This
-	  gives the statistics about how many a times a group was dequeued
-	  from service tree of the device. First two fields specify the major
-	  and minor number of the device and third field specifies the number
-	  of times a group was dequeued from a particular device.
-
-- blkio.*_recursive
-	- Recursive version of various stats. These files show the
-          same information as their non-recursive counterparts but
-          include stats from all the descendant cgroups.
-
-Throttling/Upper limit policy files
------------------------------------
-- blkio.throttle.read_bps_device
-	- Specifies upper limit on READ rate from the device. IO rate is
-	  specified in bytes per second. Rules are per device. Following is
-	  the format.
-
-  echo "<major>:<minor>  <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
-
-- blkio.throttle.write_bps_device
-	- Specifies upper limit on WRITE rate to the device. IO rate is
-	  specified in bytes per second. Rules are per device. Following is
-	  the format.
-
-  echo "<major>:<minor>  <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
-
-- blkio.throttle.read_iops_device
-	- Specifies upper limit on READ rate from the device. IO rate is
-	  specified in IO per second. Rules are per device. Following is
-	  the format.
-
-  echo "<major>:<minor>  <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
-
-- blkio.throttle.write_iops_device
-	- Specifies upper limit on WRITE rate to the device. IO rate is
-	  specified in io per second. Rules are per device. Following is
-	  the format.
-
-  echo "<major>:<minor>  <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
-
-Note: If both BW and IOPS rules are specified for a device, then IO is
-      subjected to both the constraints.
-
-- blkio.throttle.io_serviced
-	- Number of IOs (bio) issued to the disk by the group. These
-	  are further divided by the type of operation - read or write, sync
-	  or async. First two fields specify the major and minor number of the
-	  device, third field specifies the operation type and the fourth field
-	  specifies the number of IOs.
-
-- blkio.throttle.io_service_bytes
-	- Number of bytes transferred to/from the disk by the group. These
-	  are further divided by the type of operation - read or write, sync
-	  or async. First two fields specify the major and minor number of the
-	  device, third field specifies the operation type and the fourth field
-	  specifies the number of bytes.
-
-Common files among various policies
------------------------------------
-- blkio.reset_stats
-	- Writing an int to this file will result in resetting all the stats
-	  for that cgroup.
-
-CFQ sysfs tunable
-=================
-/sys/block/<disk>/queue/iosched/slice_idle
-------------------------------------------
-On a faster hardware CFQ can be slow, especially with sequential workload.
-This happens because CFQ idles on a single queue and single queue might not
-drive deeper request queue depths to keep the storage busy. In such scenarios
-one can try setting slice_idle=0 and that would switch CFQ to IOPS
-(IO operations per second) mode on NCQ supporting hardware.
-
-That means CFQ will not idle between cfq queues of a cfq group and hence be
-able to driver higher queue depth and achieve better throughput. That also
-means that cfq provides fairness among groups in terms of IOPS and not in
-terms of disk time.
-
-/sys/block/<disk>/queue/iosched/group_idle
-------------------------------------------
-If one disables idling on individual cfq queues and cfq service trees by
-setting slice_idle=0, group_idle kicks in. That means CFQ will still idle
-on the group in an attempt to provide fairness among groups.
-
-By default group_idle is same as slice_idle and does not do anything if
-slice_idle is enabled.
-
-One can experience an overall throughput drop if you have created multiple
-groups and put applications in that group which are not driving enough
-IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle
-on individual groups and throughput should improve.
diff --git a/Documentation/cgroup-v1/cgroups.rst b/Documentation/cgroup-v1/cgroups.rst
new file mode 100644
index 000000000000..46bbe7e022d4
--- /dev/null
+++ b/Documentation/cgroup-v1/cgroups.rst
@@ -0,0 +1,695 @@
+==============
+Control Groups
+==============
+
+Written by Paul Menage <menage@google.com> based on
+Documentation/cgroup-v1/cpusets.rst
+
+Original copyright statements from cpusets.txt:
+
+Portions Copyright (C) 2004 BULL SA.
+
+Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+
+Modified by Paul Jackson <pj@sgi.com>
+
+Modified by Christoph Lameter <cl@linux.com>
+
+.. CONTENTS:
+
+	1. Control Groups
+	1.1 What are cgroups ?
+	1.2 Why are cgroups needed ?
+	1.3 How are cgroups implemented ?
+	1.4 What does notify_on_release do ?
+	1.5 What does clone_children do ?
+	1.6 How do I use cgroups ?
+	2. Usage Examples and Syntax
+	2.1 Basic Usage
+	2.2 Attaching processes
+	2.3 Mounting hierarchies by name
+	3. Kernel API
+	3.1 Overview
+	3.2 Synchronization
+	3.3 Subsystem API
+	4. Extended attributes usage
+	5. Questions
+
+1. Control Groups
+=================
+
+1.1 What are cgroups ?
+----------------------
+
+Control Groups provide a mechanism for aggregating/partitioning sets of
+tasks, and all their future children, into hierarchical groups with
+specialized behaviour.
+
+Definitions:
+
+A *cgroup* associates a set of tasks with a set of parameters for one
+or more subsystems.
+
+A *subsystem* is a module that makes use of the task grouping
+facilities provided by cgroups to treat groups of tasks in
+particular ways. A subsystem is typically a "resource controller" that
+schedules a resource or applies per-cgroup limits, but it may be
+anything that wants to act on a group of processes, e.g. a
+virtualization subsystem.
+
+A *hierarchy* is a set of cgroups arranged in a tree, such that
+every task in the system is in exactly one of the cgroups in the
+hierarchy, and a set of subsystems; each subsystem has system-specific
+state attached to each cgroup in the hierarchy.  Each hierarchy has
+an instance of the cgroup virtual filesystem associated with it.
+
+At any one time there may be multiple active hierarchies of task
+cgroups. Each hierarchy is a partition of all tasks in the system.
+
+User-level code may create and destroy cgroups by name in an
+instance of the cgroup virtual file system, specify and query to
+which cgroup a task is assigned, and list the task PIDs assigned to
+a cgroup. Those creations and assignments only affect the hierarchy
+associated with that instance of the cgroup file system.
+
+On their own, the only use for cgroups is for simple job
+tracking. The intention is that other subsystems hook into the generic
+cgroup support to provide new attributes for cgroups, such as
+accounting/limiting the resources which processes in a cgroup can
+access. For example, cpusets (see Documentation/cgroup-v1/cpusets.rst) allow
+you to associate a set of CPUs and a set of memory nodes with the
+tasks in each cgroup.
+
+1.2 Why are cgroups needed ?
+----------------------------
+
+There are multiple efforts to provide process aggregations in the
+Linux kernel, mainly for resource-tracking purposes. Such efforts
+include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
+namespaces. These all require the basic notion of a
+grouping/partitioning of processes, with newly forked processes ending
+up in the same group (cgroup) as their parent process.
+
+The kernel cgroup patch provides the minimum essential kernel
+mechanisms required to efficiently implement such groups. It has
+minimal impact on the system fast paths, and provides hooks for
+specific subsystems such as cpusets to provide additional behaviour as
+desired.
+
+Multiple hierarchy support is provided to allow for situations where
+the division of tasks into cgroups is distinctly different for
+different subsystems - having parallel hierarchies allows each
+hierarchy to be a natural division of tasks, without having to handle
+complex combinations of tasks that would be present if several
+unrelated subsystems needed to be forced into the same tree of
+cgroups.
+
+At one extreme, each resource controller or subsystem could be in a
+separate hierarchy; at the other extreme, all subsystems
+would be attached to the same hierarchy.
+
+As an example of a scenario (originally proposed by vatsa@in.ibm.com)
+that can benefit from multiple hierarchies, consider a large
+university server with various users - students, professors, system
+tasks etc. The resource planning for this server could be along the
+following lines::
+
+       CPU :          "Top cpuset"
+                       /       \
+               CPUSet1         CPUSet2
+                  |               |
+               (Professors)    (Students)
+
+               In addition (system tasks) are attached to topcpuset (so
+               that they can run anywhere) with a limit of 20%
+
+       Memory : Professors (50%), Students (30%), system (20%)
+
+       Disk : Professors (50%), Students (30%), system (20%)
+
+       Network : WWW browsing (20%), Network File System (60%), others (20%)
+                               / \
+               Professors (15%)  students (5%)
+
+Browsers like Firefox/Lynx go into the WWW network class, while (k)nfsd goes
+into the NFS network class.
+
+At the same time Firefox/Lynx will share an appropriate CPU/Memory class
+depending on who launched it (prof/student).
+
+With the ability to classify tasks differently for different resources
+(by putting those resource subsystems in different hierarchies),
+the admin can easily set up a script which receives exec notifications
+and depending on who is launching the browser he can::
+
+    # echo browser_pid > /sys/fs/cgroup/<restype>/<userclass>/tasks
+
+With only a single hierarchy, he now would potentially have to create
+a separate cgroup for every browser launched and associate it with
+appropriate network and other resource class.  This may lead to
+proliferation of such cgroups.
+
+Also let's say that the administrator would like to give enhanced network
+access temporarily to a student's browser (since it is night and the user
+wants to do online gaming :))  OR give one of the student's simulation
+apps enhanced CPU power.
+
+With ability to write PIDs directly to resource classes, it's just a
+matter of::
+
+       # echo pid > /sys/fs/cgroup/network/<new_class>/tasks
+       (after some time)
+       # echo pid > /sys/fs/cgroup/network/<orig_class>/tasks
+
+Without this ability, the administrator would have to split the cgroup into
+multiple separate ones and then associate the new cgroups with the
+new resource classes.
+
+
+
+1.3 How are cgroups implemented ?
+---------------------------------
+
+Control Groups extends the kernel as follows:
+
+ - Each task in the system has a reference-counted pointer to a
+   css_set.
+
+ - A css_set contains a set of reference-counted pointers to
+   cgroup_subsys_state objects, one for each cgroup subsystem
+   registered in the system. There is no direct link from a task to
+   the cgroup of which it's a member in each hierarchy, but this
+   can be determined by following pointers through the
+   cgroup_subsys_state objects. This is because accessing the
+   subsystem state is something that's expected to happen frequently
+   and in performance-critical code, whereas operations that require a
+   task's actual cgroup assignments (in particular, moving between
+   cgroups) are less common. A linked list runs through the cg_list
+   field of each task_struct using the css_set, anchored at
+   css_set->tasks.
+
+ - A cgroup hierarchy filesystem can be mounted for browsing and
+   manipulation from user space.
+
+ - You can list all the tasks (by PID) attached to any cgroup.
+
+The implementation of cgroups requires a few, simple hooks
+into the rest of the kernel, none in performance-critical paths:
+
+ - in init/main.c, to initialize the root cgroups and initial
+   css_set at system boot.
+
+ - in fork and exit, to attach and detach a task from its css_set.
+
+In addition, a new file system of type "cgroup" may be mounted, to
+enable browsing and modifying the cgroups presently known to the
+kernel.  When mounting a cgroup hierarchy, you may specify a
+comma-separated list of subsystems to mount as the filesystem mount
+options.  By default, mounting the cgroup filesystem attempts to
+mount a hierarchy containing all registered subsystems.
+
+If an active hierarchy with exactly the same set of subsystems already
+exists, it will be reused for the new mount. If no existing hierarchy
+matches, and any of the requested subsystems are in use in an existing
+hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
+is activated, associated with the requested subsystems.
+
+It's not currently possible to bind a new subsystem to an active
+cgroup hierarchy, or to unbind a subsystem from an active cgroup
+hierarchy. This may be possible in future, but is fraught with nasty
+error-recovery issues.
+
+When a cgroup filesystem is unmounted, if there are any
+child cgroups created below the top-level cgroup, that hierarchy
+will remain active even though unmounted; if there are no
+child cgroups then the hierarchy will be deactivated.
+
+No new system calls are added for cgroups - all support for
+querying and modifying cgroups is via this cgroup file system.
+
+Each task under /proc has an added file named 'cgroup' displaying,
+for each active hierarchy, the subsystem names and the cgroup name
+as the path relative to the root of the cgroup file system.
+
+Each cgroup is represented by a directory in the cgroup file system
+containing the following files describing that cgroup:
+
+ - tasks: list of tasks (by PID) attached to that cgroup.  This list
+   is not guaranteed to be sorted.  Writing a thread ID into this file
+   moves the thread into this cgroup.
+ - cgroup.procs: list of thread group IDs in the cgroup.  This list is
+   not guaranteed to be sorted or free of duplicate TGIDs, and userspace
+   should sort/uniquify the list if this property is required.
+   Writing a thread group ID into this file moves all threads in that
+   group into this cgroup.
+ - notify_on_release flag: run the release agent on exit?
+ - release_agent: the path to use for release notifications (this file
+   exists in the top cgroup only)
+
+Other subsystems such as cpusets may add additional files in each
+cgroup dir.
+
+New cgroups are created using the mkdir system call or shell
+command.  The properties of a cgroup, such as its flags, are
+modified by writing to the appropriate file in that cgroups
+directory, as listed above.
+
+The named hierarchical structure of nested cgroups allows partitioning
+a large system into nested, dynamically changeable, "soft-partitions".
+
+The attachment of each task, automatically inherited at fork by any
+children of that task, to a cgroup allows organizing the work load
+on a system into related sets of tasks.  A task may be re-attached to
+any other cgroup, if allowed by the permissions on the necessary
+cgroup file system directories.
+
+When a task is moved from one cgroup to another, it gets a new
+css_set pointer - if there's an already existing css_set with the
+desired collection of cgroups then that group is reused, otherwise a new
+css_set is allocated. The appropriate existing css_set is located by
+looking into a hash table.
+
+To allow access from a cgroup to the css_sets (and hence tasks)
+that comprise it, a set of cg_cgroup_link objects form a lattice;
+each cg_cgroup_link is linked into a list of cg_cgroup_links for
+a single cgroup on its cgrp_link_list field, and a list of
+cg_cgroup_links for a single css_set on its cg_link_list.
+
+Thus the set of tasks in a cgroup can be listed by iterating over
+each css_set that references the cgroup, and sub-iterating over
+each css_set's task set.
+
+The use of a Linux virtual file system (vfs) to represent the
+cgroup hierarchy provides for a familiar permission and name space
+for cgroups, with a minimum of additional kernel code.
+
+1.4 What does notify_on_release do ?
+------------------------------------
+
+If the notify_on_release flag is enabled (1) in a cgroup, then
+whenever the last task in the cgroup leaves (exits or attaches to
+some other cgroup) and the last child cgroup of that cgroup
+is removed, then the kernel runs the command specified by the contents
+of the "release_agent" file in that hierarchy's root directory,
+supplying the pathname (relative to the mount point of the cgroup
+file system) of the abandoned cgroup.  This enables automatic
+removal of abandoned cgroups.  The default value of
+notify_on_release in the root cgroup at system boot is disabled
+(0).  The default value of other cgroups at creation is the current
+value of their parents' notify_on_release settings. The default value of
+a cgroup hierarchy's release_agent path is empty.
+
+1.5 What does clone_children do ?
+---------------------------------
+
+This flag only affects the cpuset controller. If the clone_children
+flag is enabled (1) in a cgroup, a new cpuset cgroup will copy its
+configuration from the parent during initialization.
+
+1.6 How do I use cgroups ?
+--------------------------
+
+To start a new job that is to be contained within a cgroup, using
+the "cpuset" cgroup subsystem, the steps are something like::
+
+ 1) mount -t tmpfs cgroup_root /sys/fs/cgroup
+ 2) mkdir /sys/fs/cgroup/cpuset
+ 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ 4) Create the new cgroup by doing mkdir's and write's (or echo's) in
+    the /sys/fs/cgroup/cpuset virtual file system.
+ 5) Start a task that will be the "founding father" of the new job.
+ 6) Attach that task to the new cgroup by writing its PID to the
+    /sys/fs/cgroup/cpuset tasks file for that cgroup.
+ 7) fork, exec or clone the job tasks from this founding father task.
+
+For example, the following sequence of commands will setup a cgroup
+named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
+and then start a subshell 'sh' in that cgroup::
+
+  mount -t tmpfs cgroup_root /sys/fs/cgroup
+  mkdir /sys/fs/cgroup/cpuset
+  mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset
+  cd /sys/fs/cgroup/cpuset
+  mkdir Charlie
+  cd Charlie
+  /bin/echo 2-3 > cpuset.cpus
+  /bin/echo 1 > cpuset.mems
+  /bin/echo $$ > tasks
+  sh
+  # The subshell 'sh' is now running in cgroup Charlie
+  # The next line should display '/Charlie'
+  cat /proc/self/cgroup
+
+2. Usage Examples and Syntax
+============================
+
+2.1 Basic Usage
+---------------
+
+Creating, modifying, using cgroups can be done through the cgroup
+virtual filesystem.
+
+To mount a cgroup hierarchy with all available subsystems, type::
+
+  # mount -t cgroup xxx /sys/fs/cgroup
+
+The "xxx" is not interpreted by the cgroup code, but will appear in
+/proc/mounts so may be any useful identifying string that you like.
+
+Note: Some subsystems do not work without some user input first.  For instance,
+if cpusets are enabled the user will have to populate the cpus and mems files
+for each new cgroup created before that group can be used.
+
+As explained in section `1.2 Why are cgroups needed?` you should create
+different hierarchies of cgroups for each single resource or group of
+resources you want to control. Therefore, you should mount a tmpfs on
+/sys/fs/cgroup and create directories for each cgroup resource or resource
+group::
+
+  # mount -t tmpfs cgroup_root /sys/fs/cgroup
+  # mkdir /sys/fs/cgroup/rg1
+
+To mount a cgroup hierarchy with just the cpuset and memory
+subsystems, type::
+
+  # mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1
+
+While remounting cgroups is currently supported, it is not recommend
+to use it. Remounting allows changing bound subsystems and
+release_agent. Rebinding is hardly useful as it only works when the
+hierarchy is empty and release_agent itself should be replaced with
+conventional fsnotify. The support for remounting will be removed in
+the future.
+
+To Specify a hierarchy's release_agent::
+
+  # mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
+    xxx /sys/fs/cgroup/rg1
+
+Note that specifying 'release_agent' more than once will return failure.
+
+Note that changing the set of subsystems is currently only supported
+when the hierarchy consists of a single (root) cgroup. Supporting
+the ability to arbitrarily bind/unbind subsystems from an existing
+cgroup hierarchy is intended to be implemented in the future.
+
+Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the
+tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1
+is the cgroup that holds the whole system.
+
+If you want to change the value of release_agent::
+
+  # echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent
+
+It can also be changed via remount.
+
+If you want to create a new cgroup under /sys/fs/cgroup/rg1::
+
+  # cd /sys/fs/cgroup/rg1
+  # mkdir my_cgroup
+
+Now you want to do something with this cgroup:
+
+  # cd my_cgroup
+
+In this directory you can find several files::
+
+  # ls
+  cgroup.procs notify_on_release tasks
+  (plus whatever files added by the attached subsystems)
+
+Now attach your shell to this cgroup::
+
+  # /bin/echo $$ > tasks
+
+You can also create cgroups inside your cgroup by using mkdir in this
+directory::
+
+  # mkdir my_sub_cs
+
+To remove a cgroup, just use rmdir::
+
+  # rmdir my_sub_cs
+
+This will fail if the cgroup is in use (has cgroups inside, or
+has processes attached, or is held alive by other subsystem-specific
+reference).
+
+2.2 Attaching processes
+-----------------------
+
+::
+
+  # /bin/echo PID > tasks
+
+Note that it is PID, not PIDs. You can only attach ONE task at a time.
+If you have several tasks to attach, you have to do it one after another::
+
+  # /bin/echo PID1 > tasks
+  # /bin/echo PID2 > tasks
+	  ...
+  # /bin/echo PIDn > tasks
+
+You can attach the current shell task by echoing 0::
+
+  # echo 0 > tasks
+
+You can use the cgroup.procs file instead of the tasks file to move all
+threads in a threadgroup at once. Echoing the PID of any task in a
+threadgroup to cgroup.procs causes all tasks in that threadgroup to be
+attached to the cgroup. Writing 0 to cgroup.procs moves all tasks
+in the writing task's threadgroup.
+
+Note: Since every task is always a member of exactly one cgroup in each
+mounted hierarchy, to remove a task from its current cgroup you must
+move it into a new cgroup (possibly the root cgroup) by writing to the
+new cgroup's tasks file.
+
+Note: Due to some restrictions enforced by some cgroup subsystems, moving
+a process to another cgroup can fail.
+
+2.3 Mounting hierarchies by name
+--------------------------------
+
+Passing the name=<x> option when mounting a cgroups hierarchy
+associates the given name with the hierarchy.  This can be used when
+mounting a pre-existing hierarchy, in order to refer to it by name
+rather than by its set of active subsystems.  Each hierarchy is either
+nameless, or has a unique name.
+
+The name should match [\w.-]+
+
+When passing a name=<x> option for a new hierarchy, you need to
+specify subsystems manually; the legacy behaviour of mounting all
+subsystems when none are explicitly specified is not supported when
+you give a subsystem a name.
+
+The name of the subsystem appears as part of the hierarchy description
+in /proc/mounts and /proc/<pid>/cgroups.
+
+
+3. Kernel API
+=============
+
+3.1 Overview
+------------
+
+Each kernel subsystem that wants to hook into the generic cgroup
+system needs to create a cgroup_subsys object. This contains
+various methods, which are callbacks from the cgroup system, along
+with a subsystem ID which will be assigned by the cgroup system.
+
+Other fields in the cgroup_subsys object include:
+
+- subsys_id: a unique array index for the subsystem, indicating which
+  entry in cgroup->subsys[] this subsystem should be managing.
+
+- name: should be initialized to a unique subsystem name. Should be
+  no longer than MAX_CGROUP_TYPE_NAMELEN.
+
+- early_init: indicate if the subsystem needs early initialization
+  at system boot.
+
+Each cgroup object created by the system has an array of pointers,
+indexed by subsystem ID; this pointer is entirely managed by the
+subsystem; the generic cgroup code will never touch this pointer.
+
+3.2 Synchronization
+-------------------
+
+There is a global mutex, cgroup_mutex, used by the cgroup
+system. This should be taken by anything that wants to modify a
+cgroup. It may also be taken to prevent cgroups from being
+modified, but more specific locks may be more appropriate in that
+situation.
+
+See kernel/cgroup.c for more details.
+
+Subsystems can take/release the cgroup_mutex via the functions
+cgroup_lock()/cgroup_unlock().
+
+Accessing a task's cgroup pointer may be done in the following ways:
+- while holding cgroup_mutex
+- while holding the task's alloc_lock (via task_lock())
+- inside an rcu_read_lock() section via rcu_dereference()
+
+3.3 Subsystem API
+-----------------
+
+Each subsystem should:
+
+- add an entry in linux/cgroup_subsys.h
+- define a cgroup_subsys object called <name>_cgrp_subsys
+
+Each subsystem may export the following methods. The only mandatory
+methods are css_alloc/free. Any others that are null are presumed to
+be successful no-ops.
+
+``struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp)``
+(cgroup_mutex held by caller)
+
+Called to allocate a subsystem state object for a cgroup. The
+subsystem should allocate its subsystem state object for the passed
+cgroup, returning a pointer to the new object on success or a
+ERR_PTR() value. On success, the subsystem pointer should point to
+a structure of type cgroup_subsys_state (typically embedded in a
+larger subsystem-specific object), which will be initialized by the
+cgroup system. Note that this will be called at initialization to
+create the root subsystem state for this subsystem; this case can be
+identified by the passed cgroup object having a NULL parent (since
+it's the root of the hierarchy) and may be an appropriate place for
+initialization code.
+
+``int css_online(struct cgroup *cgrp)``
+(cgroup_mutex held by caller)
+
+Called after @cgrp successfully completed all allocations and made
+visible to cgroup_for_each_child/descendant_*() iterators. The
+subsystem may choose to fail creation by returning -errno. This
+callback can be used to implement reliable state sharing and
+propagation along the hierarchy. See the comment on
+cgroup_for_each_descendant_pre() for details.
+
+``void css_offline(struct cgroup *cgrp);``
+(cgroup_mutex held by caller)
+
+This is the counterpart of css_online() and called iff css_online()
+has succeeded on @cgrp. This signifies the beginning of the end of
+@cgrp. @cgrp is being removed and the subsystem should start dropping
+all references it's holding on @cgrp. When all references are dropped,
+cgroup removal will proceed to the next step - css_free(). After this
+callback, @cgrp should be considered dead to the subsystem.
+
+``void css_free(struct cgroup *cgrp)``
+(cgroup_mutex held by caller)
+
+The cgroup system is about to free @cgrp; the subsystem should free
+its subsystem state object. By the time this method is called, @cgrp
+is completely unused; @cgrp->parent is still valid. (Note - can also
+be called for a newly-created cgroup if an error occurs after this
+subsystem's create() method has been called for the new cgroup).
+
+``int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
+(cgroup_mutex held by caller)
+
+Called prior to moving one or more tasks into a cgroup; if the
+subsystem returns an error, this will abort the attach operation.
+@tset contains the tasks to be attached and is guaranteed to have at
+least one task in it.
+
+If there are multiple tasks in the taskset, then:
+  - it's guaranteed that all are from the same thread group
+  - @tset contains all tasks from the thread group whether or not
+    they're switching cgroups
+  - the first task is the leader
+
+Each @tset entry also contains the task's old cgroup and tasks which
+aren't switching cgroup can be skipped easily using the
+cgroup_taskset_for_each() iterator. Note that this isn't called on a
+fork. If this method returns 0 (success) then this should remain valid
+while the caller holds cgroup_mutex and it is ensured that either
+attach() or cancel_attach() will be called in future.
+
+``void css_reset(struct cgroup_subsys_state *css)``
+(cgroup_mutex held by caller)
+
+An optional operation which should restore @css's configuration to the
+initial state.  This is currently only used on the unified hierarchy
+when a subsystem is disabled on a cgroup through
+"cgroup.subtree_control" but should remain enabled because other
+subsystems depend on it.  cgroup core makes such a css invisible by
+removing the associated interface files and invokes this callback so
+that the hidden subsystem can return to the initial neutral state.
+This prevents unexpected resource control from a hidden css and
+ensures that the configuration is in the initial state when it is made
+visible again later.
+
+``void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
+(cgroup_mutex held by caller)
+
+Called when a task attach operation has failed after can_attach() has succeeded.
+A subsystem whose can_attach() has some side-effects should provide this
+function, so that the subsystem can implement a rollback. If not, not necessary.
+This will be called only about subsystems whose can_attach() operation have
+succeeded. The parameters are identical to can_attach().
+
+``void attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
+(cgroup_mutex held by caller)
+
+Called after the task has been attached to the cgroup, to allow any
+post-attachment activity that requires memory allocations or blocking.
+The parameters are identical to can_attach().
+
+``void fork(struct task_struct *task)``
+
+Called when a task is forked into a cgroup.
+
+``void exit(struct task_struct *task)``
+
+Called during task exit.
+
+``void free(struct task_struct *task)``
+
+Called when the task_struct is freed.
+
+``void bind(struct cgroup *root)``
+(cgroup_mutex held by caller)
+
+Called when a cgroup subsystem is rebound to a different hierarchy
+and root cgroup. Currently this will only involve movement between
+the default hierarchy (which never has sub-cgroups) and a hierarchy
+that is being created/destroyed (and hence has no sub-cgroups).
+
+4. Extended attribute usage
+===========================
+
+cgroup filesystem supports certain types of extended attributes in its
+directories and files.  The current supported types are:
+
+	- Trusted (XATTR_TRUSTED)
+	- Security (XATTR_SECURITY)
+
+Both require CAP_SYS_ADMIN capability to set.
+
+Like in tmpfs, the extended attributes in cgroup filesystem are stored
+using kernel memory and it's advised to keep the usage at minimum.  This
+is the reason why user defined extended attributes are not supported, since
+any user can do it and there's no limit in the value size.
+
+The current known users for this feature are SELinux to limit cgroup usage
+in containers and systemd for assorted meta data like main PID in a cgroup
+(systemd creates a cgroup per service).
+
+5. Questions
+============
+
+::
+
+  Q: what's up with this '/bin/echo' ?
+  A: bash's builtin 'echo' command does not check calls to write() against
+     errors. If you use it in the cgroup file system, you won't be
+     able to tell whether a command succeeded or failed.
+
+  Q: When I attach processes, only the first of the line gets really attached !
+  A: We can only return one error code per call to write(). So you should also
+     put only ONE PID.
diff --git a/Documentation/cgroup-v1/cgroups.txt b/Documentation/cgroup-v1/cgroups.txt
deleted file mode 100644
index 059f7063eea6..000000000000
--- a/Documentation/cgroup-v1/cgroups.txt
+++ /dev/null
@@ -1,677 +0,0 @@
-				CGROUPS
-				-------
-
-Written by Paul Menage <menage@google.com> based on
-Documentation/cgroup-v1/cpusets.txt
-
-Original copyright statements from cpusets.txt:
-Portions Copyright (C) 2004 BULL SA.
-Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
-Modified by Paul Jackson <pj@sgi.com>
-Modified by Christoph Lameter <cl@linux.com>
-
-CONTENTS:
-=========
-
-1. Control Groups
-  1.1 What are cgroups ?
-  1.2 Why are cgroups needed ?
-  1.3 How are cgroups implemented ?
-  1.4 What does notify_on_release do ?
-  1.5 What does clone_children do ?
-  1.6 How do I use cgroups ?
-2. Usage Examples and Syntax
-  2.1 Basic Usage
-  2.2 Attaching processes
-  2.3 Mounting hierarchies by name
-3. Kernel API
-  3.1 Overview
-  3.2 Synchronization
-  3.3 Subsystem API
-4. Extended attributes usage
-5. Questions
-
-1. Control Groups
-=================
-
-1.1 What are cgroups ?
-----------------------
-
-Control Groups provide a mechanism for aggregating/partitioning sets of
-tasks, and all their future children, into hierarchical groups with
-specialized behaviour.
-
-Definitions:
-
-A *cgroup* associates a set of tasks with a set of parameters for one
-or more subsystems.
-
-A *subsystem* is a module that makes use of the task grouping
-facilities provided by cgroups to treat groups of tasks in
-particular ways. A subsystem is typically a "resource controller" that
-schedules a resource or applies per-cgroup limits, but it may be
-anything that wants to act on a group of processes, e.g. a
-virtualization subsystem.
-
-A *hierarchy* is a set of cgroups arranged in a tree, such that
-every task in the system is in exactly one of the cgroups in the
-hierarchy, and a set of subsystems; each subsystem has system-specific
-state attached to each cgroup in the hierarchy.  Each hierarchy has
-an instance of the cgroup virtual filesystem associated with it.
-
-At any one time there may be multiple active hierarchies of task
-cgroups. Each hierarchy is a partition of all tasks in the system.
-
-User-level code may create and destroy cgroups by name in an
-instance of the cgroup virtual file system, specify and query to
-which cgroup a task is assigned, and list the task PIDs assigned to
-a cgroup. Those creations and assignments only affect the hierarchy
-associated with that instance of the cgroup file system.
-
-On their own, the only use for cgroups is for simple job
-tracking. The intention is that other subsystems hook into the generic
-cgroup support to provide new attributes for cgroups, such as
-accounting/limiting the resources which processes in a cgroup can
-access. For example, cpusets (see Documentation/cgroup-v1/cpusets.txt) allow
-you to associate a set of CPUs and a set of memory nodes with the
-tasks in each cgroup.
-
-1.2 Why are cgroups needed ?
-----------------------------
-
-There are multiple efforts to provide process aggregations in the
-Linux kernel, mainly for resource-tracking purposes. Such efforts
-include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
-namespaces. These all require the basic notion of a
-grouping/partitioning of processes, with newly forked processes ending
-up in the same group (cgroup) as their parent process.
-
-The kernel cgroup patch provides the minimum essential kernel
-mechanisms required to efficiently implement such groups. It has
-minimal impact on the system fast paths, and provides hooks for
-specific subsystems such as cpusets to provide additional behaviour as
-desired.
-
-Multiple hierarchy support is provided to allow for situations where
-the division of tasks into cgroups is distinctly different for
-different subsystems - having parallel hierarchies allows each
-hierarchy to be a natural division of tasks, without having to handle
-complex combinations of tasks that would be present if several
-unrelated subsystems needed to be forced into the same tree of
-cgroups.
-
-At one extreme, each resource controller or subsystem could be in a
-separate hierarchy; at the other extreme, all subsystems
-would be attached to the same hierarchy.
-
-As an example of a scenario (originally proposed by vatsa@in.ibm.com)
-that can benefit from multiple hierarchies, consider a large
-university server with various users - students, professors, system
-tasks etc. The resource planning for this server could be along the
-following lines:
-
-       CPU :          "Top cpuset"
-                       /       \
-               CPUSet1         CPUSet2
-                  |               |
-               (Professors)    (Students)
-
-               In addition (system tasks) are attached to topcpuset (so
-               that they can run anywhere) with a limit of 20%
-
-       Memory : Professors (50%), Students (30%), system (20%)
-
-       Disk : Professors (50%), Students (30%), system (20%)
-
-       Network : WWW browsing (20%), Network File System (60%), others (20%)
-                               / \
-               Professors (15%)  students (5%)
-
-Browsers like Firefox/Lynx go into the WWW network class, while (k)nfsd goes
-into the NFS network class.
-
-At the same time Firefox/Lynx will share an appropriate CPU/Memory class
-depending on who launched it (prof/student).
-
-With the ability to classify tasks differently for different resources
-(by putting those resource subsystems in different hierarchies),
-the admin can easily set up a script which receives exec notifications
-and depending on who is launching the browser he can
-
-    # echo browser_pid > /sys/fs/cgroup/<restype>/<userclass>/tasks
-
-With only a single hierarchy, he now would potentially have to create
-a separate cgroup for every browser launched and associate it with
-appropriate network and other resource class.  This may lead to
-proliferation of such cgroups.
-
-Also let's say that the administrator would like to give enhanced network
-access temporarily to a student's browser (since it is night and the user
-wants to do online gaming :))  OR give one of the student's simulation
-apps enhanced CPU power.
-
-With ability to write PIDs directly to resource classes, it's just a
-matter of:
-
-       # echo pid > /sys/fs/cgroup/network/<new_class>/tasks
-       (after some time)
-       # echo pid > /sys/fs/cgroup/network/<orig_class>/tasks
-
-Without this ability, the administrator would have to split the cgroup into
-multiple separate ones and then associate the new cgroups with the
-new resource classes.
-
-
-
-1.3 How are cgroups implemented ?
----------------------------------
-
-Control Groups extends the kernel as follows:
-
- - Each task in the system has a reference-counted pointer to a
-   css_set.
-
- - A css_set contains a set of reference-counted pointers to
-   cgroup_subsys_state objects, one for each cgroup subsystem
-   registered in the system. There is no direct link from a task to
-   the cgroup of which it's a member in each hierarchy, but this
-   can be determined by following pointers through the
-   cgroup_subsys_state objects. This is because accessing the
-   subsystem state is something that's expected to happen frequently
-   and in performance-critical code, whereas operations that require a
-   task's actual cgroup assignments (in particular, moving between
-   cgroups) are less common. A linked list runs through the cg_list
-   field of each task_struct using the css_set, anchored at
-   css_set->tasks.
-
- - A cgroup hierarchy filesystem can be mounted for browsing and
-   manipulation from user space.
-
- - You can list all the tasks (by PID) attached to any cgroup.
-
-The implementation of cgroups requires a few, simple hooks
-into the rest of the kernel, none in performance-critical paths:
-
- - in init/main.c, to initialize the root cgroups and initial
-   css_set at system boot.
-
- - in fork and exit, to attach and detach a task from its css_set.
-
-In addition, a new file system of type "cgroup" may be mounted, to
-enable browsing and modifying the cgroups presently known to the
-kernel.  When mounting a cgroup hierarchy, you may specify a
-comma-separated list of subsystems to mount as the filesystem mount
-options.  By default, mounting the cgroup filesystem attempts to
-mount a hierarchy containing all registered subsystems.
-
-If an active hierarchy with exactly the same set of subsystems already
-exists, it will be reused for the new mount. If no existing hierarchy
-matches, and any of the requested subsystems are in use in an existing
-hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
-is activated, associated with the requested subsystems.
-
-It's not currently possible to bind a new subsystem to an active
-cgroup hierarchy, or to unbind a subsystem from an active cgroup
-hierarchy. This may be possible in future, but is fraught with nasty
-error-recovery issues.
-
-When a cgroup filesystem is unmounted, if there are any
-child cgroups created below the top-level cgroup, that hierarchy
-will remain active even though unmounted; if there are no
-child cgroups then the hierarchy will be deactivated.
-
-No new system calls are added for cgroups - all support for
-querying and modifying cgroups is via this cgroup file system.
-
-Each task under /proc has an added file named 'cgroup' displaying,
-for each active hierarchy, the subsystem names and the cgroup name
-as the path relative to the root of the cgroup file system.
-
-Each cgroup is represented by a directory in the cgroup file system
-containing the following files describing that cgroup:
-
- - tasks: list of tasks (by PID) attached to that cgroup.  This list
-   is not guaranteed to be sorted.  Writing a thread ID into this file
-   moves the thread into this cgroup.
- - cgroup.procs: list of thread group IDs in the cgroup.  This list is
-   not guaranteed to be sorted or free of duplicate TGIDs, and userspace
-   should sort/uniquify the list if this property is required.
-   Writing a thread group ID into this file moves all threads in that
-   group into this cgroup.
- - notify_on_release flag: run the release agent on exit?
- - release_agent: the path to use for release notifications (this file
-   exists in the top cgroup only)
-
-Other subsystems such as cpusets may add additional files in each
-cgroup dir.
-
-New cgroups are created using the mkdir system call or shell
-command.  The properties of a cgroup, such as its flags, are
-modified by writing to the appropriate file in that cgroups
-directory, as listed above.
-
-The named hierarchical structure of nested cgroups allows partitioning
-a large system into nested, dynamically changeable, "soft-partitions".
-
-The attachment of each task, automatically inherited at fork by any
-children of that task, to a cgroup allows organizing the work load
-on a system into related sets of tasks.  A task may be re-attached to
-any other cgroup, if allowed by the permissions on the necessary
-cgroup file system directories.
-
-When a task is moved from one cgroup to another, it gets a new
-css_set pointer - if there's an already existing css_set with the
-desired collection of cgroups then that group is reused, otherwise a new
-css_set is allocated. The appropriate existing css_set is located by
-looking into a hash table.
-
-To allow access from a cgroup to the css_sets (and hence tasks)
-that comprise it, a set of cg_cgroup_link objects form a lattice;
-each cg_cgroup_link is linked into a list of cg_cgroup_links for
-a single cgroup on its cgrp_link_list field, and a list of
-cg_cgroup_links for a single css_set on its cg_link_list.
-
-Thus the set of tasks in a cgroup can be listed by iterating over
-each css_set that references the cgroup, and sub-iterating over
-each css_set's task set.
-
-The use of a Linux virtual file system (vfs) to represent the
-cgroup hierarchy provides for a familiar permission and name space
-for cgroups, with a minimum of additional kernel code.
-
-1.4 What does notify_on_release do ?
-------------------------------------
-
-If the notify_on_release flag is enabled (1) in a cgroup, then
-whenever the last task in the cgroup leaves (exits or attaches to
-some other cgroup) and the last child cgroup of that cgroup
-is removed, then the kernel runs the command specified by the contents
-of the "release_agent" file in that hierarchy's root directory,
-supplying the pathname (relative to the mount point of the cgroup
-file system) of the abandoned cgroup.  This enables automatic
-removal of abandoned cgroups.  The default value of
-notify_on_release in the root cgroup at system boot is disabled
-(0).  The default value of other cgroups at creation is the current
-value of their parents' notify_on_release settings. The default value of
-a cgroup hierarchy's release_agent path is empty.
-
-1.5 What does clone_children do ?
----------------------------------
-
-This flag only affects the cpuset controller. If the clone_children
-flag is enabled (1) in a cgroup, a new cpuset cgroup will copy its
-configuration from the parent during initialization.
-
-1.6 How do I use cgroups ?
---------------------------
-
-To start a new job that is to be contained within a cgroup, using
-the "cpuset" cgroup subsystem, the steps are something like:
-
- 1) mount -t tmpfs cgroup_root /sys/fs/cgroup
- 2) mkdir /sys/fs/cgroup/cpuset
- 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
- 4) Create the new cgroup by doing mkdir's and write's (or echo's) in
-    the /sys/fs/cgroup/cpuset virtual file system.
- 5) Start a task that will be the "founding father" of the new job.
- 6) Attach that task to the new cgroup by writing its PID to the
-    /sys/fs/cgroup/cpuset tasks file for that cgroup.
- 7) fork, exec or clone the job tasks from this founding father task.
-
-For example, the following sequence of commands will setup a cgroup
-named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
-and then start a subshell 'sh' in that cgroup:
-
-  mount -t tmpfs cgroup_root /sys/fs/cgroup
-  mkdir /sys/fs/cgroup/cpuset
-  mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset
-  cd /sys/fs/cgroup/cpuset
-  mkdir Charlie
-  cd Charlie
-  /bin/echo 2-3 > cpuset.cpus
-  /bin/echo 1 > cpuset.mems
-  /bin/echo $$ > tasks
-  sh
-  # The subshell 'sh' is now running in cgroup Charlie
-  # The next line should display '/Charlie'
-  cat /proc/self/cgroup
-
-2. Usage Examples and Syntax
-============================
-
-2.1 Basic Usage
----------------
-
-Creating, modifying, using cgroups can be done through the cgroup
-virtual filesystem.
-
-To mount a cgroup hierarchy with all available subsystems, type:
-# mount -t cgroup xxx /sys/fs/cgroup
-
-The "xxx" is not interpreted by the cgroup code, but will appear in
-/proc/mounts so may be any useful identifying string that you like.
-
-Note: Some subsystems do not work without some user input first.  For instance,
-if cpusets are enabled the user will have to populate the cpus and mems files
-for each new cgroup created before that group can be used.
-
-As explained in section `1.2 Why are cgroups needed?' you should create
-different hierarchies of cgroups for each single resource or group of
-resources you want to control. Therefore, you should mount a tmpfs on
-/sys/fs/cgroup and create directories for each cgroup resource or resource
-group.
-
-# mount -t tmpfs cgroup_root /sys/fs/cgroup
-# mkdir /sys/fs/cgroup/rg1
-
-To mount a cgroup hierarchy with just the cpuset and memory
-subsystems, type:
-# mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1
-
-While remounting cgroups is currently supported, it is not recommend
-to use it. Remounting allows changing bound subsystems and
-release_agent. Rebinding is hardly useful as it only works when the
-hierarchy is empty and release_agent itself should be replaced with
-conventional fsnotify. The support for remounting will be removed in
-the future.
-
-To Specify a hierarchy's release_agent:
-# mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
-  xxx /sys/fs/cgroup/rg1
-
-Note that specifying 'release_agent' more than once will return failure.
-
-Note that changing the set of subsystems is currently only supported
-when the hierarchy consists of a single (root) cgroup. Supporting
-the ability to arbitrarily bind/unbind subsystems from an existing
-cgroup hierarchy is intended to be implemented in the future.
-
-Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the
-tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1
-is the cgroup that holds the whole system.
-
-If you want to change the value of release_agent:
-# echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent
-
-It can also be changed via remount.
-
-If you want to create a new cgroup under /sys/fs/cgroup/rg1:
-# cd /sys/fs/cgroup/rg1
-# mkdir my_cgroup
-
-Now you want to do something with this cgroup.
-# cd my_cgroup
-
-In this directory you can find several files:
-# ls
-cgroup.procs notify_on_release tasks
-(plus whatever files added by the attached subsystems)
-
-Now attach your shell to this cgroup:
-# /bin/echo $$ > tasks
-
-You can also create cgroups inside your cgroup by using mkdir in this
-directory.
-# mkdir my_sub_cs
-
-To remove a cgroup, just use rmdir:
-# rmdir my_sub_cs
-
-This will fail if the cgroup is in use (has cgroups inside, or
-has processes attached, or is held alive by other subsystem-specific
-reference).
-
-2.2 Attaching processes
------------------------
-
-# /bin/echo PID > tasks
-
-Note that it is PID, not PIDs. You can only attach ONE task at a time.
-If you have several tasks to attach, you have to do it one after another:
-
-# /bin/echo PID1 > tasks
-# /bin/echo PID2 > tasks
-	...
-# /bin/echo PIDn > tasks
-
-You can attach the current shell task by echoing 0:
-
-# echo 0 > tasks
-
-You can use the cgroup.procs file instead of the tasks file to move all
-threads in a threadgroup at once. Echoing the PID of any task in a
-threadgroup to cgroup.procs causes all tasks in that threadgroup to be
-attached to the cgroup. Writing 0 to cgroup.procs moves all tasks
-in the writing task's threadgroup.
-
-Note: Since every task is always a member of exactly one cgroup in each
-mounted hierarchy, to remove a task from its current cgroup you must
-move it into a new cgroup (possibly the root cgroup) by writing to the
-new cgroup's tasks file.
-
-Note: Due to some restrictions enforced by some cgroup subsystems, moving
-a process to another cgroup can fail.
-
-2.3 Mounting hierarchies by name
---------------------------------
-
-Passing the name=<x> option when mounting a cgroups hierarchy
-associates the given name with the hierarchy.  This can be used when
-mounting a pre-existing hierarchy, in order to refer to it by name
-rather than by its set of active subsystems.  Each hierarchy is either
-nameless, or has a unique name.
-
-The name should match [\w.-]+
-
-When passing a name=<x> option for a new hierarchy, you need to
-specify subsystems manually; the legacy behaviour of mounting all
-subsystems when none are explicitly specified is not supported when
-you give a subsystem a name.
-
-The name of the subsystem appears as part of the hierarchy description
-in /proc/mounts and /proc/<pid>/cgroups.
-
-
-3. Kernel API
-=============
-
-3.1 Overview
-------------
-
-Each kernel subsystem that wants to hook into the generic cgroup
-system needs to create a cgroup_subsys object. This contains
-various methods, which are callbacks from the cgroup system, along
-with a subsystem ID which will be assigned by the cgroup system.
-
-Other fields in the cgroup_subsys object include:
-
-- subsys_id: a unique array index for the subsystem, indicating which
-  entry in cgroup->subsys[] this subsystem should be managing.
-
-- name: should be initialized to a unique subsystem name. Should be
-  no longer than MAX_CGROUP_TYPE_NAMELEN.
-
-- early_init: indicate if the subsystem needs early initialization
-  at system boot.
-
-Each cgroup object created by the system has an array of pointers,
-indexed by subsystem ID; this pointer is entirely managed by the
-subsystem; the generic cgroup code will never touch this pointer.
-
-3.2 Synchronization
--------------------
-
-There is a global mutex, cgroup_mutex, used by the cgroup
-system. This should be taken by anything that wants to modify a
-cgroup. It may also be taken to prevent cgroups from being
-modified, but more specific locks may be more appropriate in that
-situation.
-
-See kernel/cgroup.c for more details.
-
-Subsystems can take/release the cgroup_mutex via the functions
-cgroup_lock()/cgroup_unlock().
-
-Accessing a task's cgroup pointer may be done in the following ways:
-- while holding cgroup_mutex
-- while holding the task's alloc_lock (via task_lock())
-- inside an rcu_read_lock() section via rcu_dereference()
-
-3.3 Subsystem API
------------------
-
-Each subsystem should:
-
-- add an entry in linux/cgroup_subsys.h
-- define a cgroup_subsys object called <name>_cgrp_subsys
-
-Each subsystem may export the following methods. The only mandatory
-methods are css_alloc/free. Any others that are null are presumed to
-be successful no-ops.
-
-struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp)
-(cgroup_mutex held by caller)
-
-Called to allocate a subsystem state object for a cgroup. The
-subsystem should allocate its subsystem state object for the passed
-cgroup, returning a pointer to the new object on success or a
-ERR_PTR() value. On success, the subsystem pointer should point to
-a structure of type cgroup_subsys_state (typically embedded in a
-larger subsystem-specific object), which will be initialized by the
-cgroup system. Note that this will be called at initialization to
-create the root subsystem state for this subsystem; this case can be
-identified by the passed cgroup object having a NULL parent (since
-it's the root of the hierarchy) and may be an appropriate place for
-initialization code.
-
-int css_online(struct cgroup *cgrp)
-(cgroup_mutex held by caller)
-
-Called after @cgrp successfully completed all allocations and made
-visible to cgroup_for_each_child/descendant_*() iterators. The
-subsystem may choose to fail creation by returning -errno. This
-callback can be used to implement reliable state sharing and
-propagation along the hierarchy. See the comment on
-cgroup_for_each_descendant_pre() for details.
-
-void css_offline(struct cgroup *cgrp);
-(cgroup_mutex held by caller)
-
-This is the counterpart of css_online() and called iff css_online()
-has succeeded on @cgrp. This signifies the beginning of the end of
-@cgrp. @cgrp is being removed and the subsystem should start dropping
-all references it's holding on @cgrp. When all references are dropped,
-cgroup removal will proceed to the next step - css_free(). After this
-callback, @cgrp should be considered dead to the subsystem.
-
-void css_free(struct cgroup *cgrp)
-(cgroup_mutex held by caller)
-
-The cgroup system is about to free @cgrp; the subsystem should free
-its subsystem state object. By the time this method is called, @cgrp
-is completely unused; @cgrp->parent is still valid. (Note - can also
-be called for a newly-created cgroup if an error occurs after this
-subsystem's create() method has been called for the new cgroup).
-
-int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
-(cgroup_mutex held by caller)
-
-Called prior to moving one or more tasks into a cgroup; if the
-subsystem returns an error, this will abort the attach operation.
-@tset contains the tasks to be attached and is guaranteed to have at
-least one task in it.
-
-If there are multiple tasks in the taskset, then:
-  - it's guaranteed that all are from the same thread group
-  - @tset contains all tasks from the thread group whether or not
-    they're switching cgroups
-  - the first task is the leader
-
-Each @tset entry also contains the task's old cgroup and tasks which
-aren't switching cgroup can be skipped easily using the
-cgroup_taskset_for_each() iterator. Note that this isn't called on a
-fork. If this method returns 0 (success) then this should remain valid
-while the caller holds cgroup_mutex and it is ensured that either
-attach() or cancel_attach() will be called in future.
-
-void css_reset(struct cgroup_subsys_state *css)
-(cgroup_mutex held by caller)
-
-An optional operation which should restore @css's configuration to the
-initial state.  This is currently only used on the unified hierarchy
-when a subsystem is disabled on a cgroup through
-"cgroup.subtree_control" but should remain enabled because other
-subsystems depend on it.  cgroup core makes such a css invisible by
-removing the associated interface files and invokes this callback so
-that the hidden subsystem can return to the initial neutral state.
-This prevents unexpected resource control from a hidden css and
-ensures that the configuration is in the initial state when it is made
-visible again later.
-
-void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
-(cgroup_mutex held by caller)
-
-Called when a task attach operation has failed after can_attach() has succeeded.
-A subsystem whose can_attach() has some side-effects should provide this
-function, so that the subsystem can implement a rollback. If not, not necessary.
-This will be called only about subsystems whose can_attach() operation have
-succeeded. The parameters are identical to can_attach().
-
-void attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
-(cgroup_mutex held by caller)
-
-Called after the task has been attached to the cgroup, to allow any
-post-attachment activity that requires memory allocations or blocking.
-The parameters are identical to can_attach().
-
-void fork(struct task_struct *task)
-
-Called when a task is forked into a cgroup.
-
-void exit(struct task_struct *task)
-
-Called during task exit.
-
-void free(struct task_struct *task)
-
-Called when the task_struct is freed.
-
-void bind(struct cgroup *root)
-(cgroup_mutex held by caller)
-
-Called when a cgroup subsystem is rebound to a different hierarchy
-and root cgroup. Currently this will only involve movement between
-the default hierarchy (which never has sub-cgroups) and a hierarchy
-that is being created/destroyed (and hence has no sub-cgroups).
-
-4. Extended attribute usage
-===========================
-
-cgroup filesystem supports certain types of extended attributes in its
-directories and files.  The current supported types are:
-	- Trusted (XATTR_TRUSTED)
-	- Security (XATTR_SECURITY)
-
-Both require CAP_SYS_ADMIN capability to set.
-
-Like in tmpfs, the extended attributes in cgroup filesystem are stored
-using kernel memory and it's advised to keep the usage at minimum.  This
-is the reason why user defined extended attributes are not supported, since
-any user can do it and there's no limit in the value size.
-
-The current known users for this feature are SELinux to limit cgroup usage
-in containers and systemd for assorted meta data like main PID in a cgroup
-(systemd creates a cgroup per service).
-
-5. Questions
-============
-
-Q: what's up with this '/bin/echo' ?
-A: bash's builtin 'echo' command does not check calls to write() against
-   errors. If you use it in the cgroup file system, you won't be
-   able to tell whether a command succeeded or failed.
-
-Q: When I attach processes, only the first of the line gets really attached !
-A: We can only return one error code per call to write(). So you should also
-   put only ONE PID.
-
diff --git a/Documentation/cgroup-v1/cpuacct.rst b/Documentation/cgroup-v1/cpuacct.rst
new file mode 100644
index 000000000000..d30ed81d2ad7
--- /dev/null
+++ b/Documentation/cgroup-v1/cpuacct.rst
@@ -0,0 +1,50 @@
+=========================
+CPU Accounting Controller
+=========================
+
+The CPU accounting controller is used to group tasks using cgroups and
+account the CPU usage of these groups of tasks.
+
+The CPU accounting controller supports multi-hierarchy groups. An accounting
+group accumulates the CPU usage of all of its child groups and the tasks
+directly present in its group.
+
+Accounting groups can be created by first mounting the cgroup filesystem::
+
+  # mount -t cgroup -ocpuacct none /sys/fs/cgroup
+
+With the above step, the initial or the parent accounting group becomes
+visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
+the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
+/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained
+by this group which is essentially the CPU time obtained by all the tasks
+in the system.
+
+New accounting groups can be created under the parent group /sys/fs/cgroup::
+
+  # cd /sys/fs/cgroup
+  # mkdir g1
+  # echo $$ > g1/tasks
+
+The above steps create a new group g1 and move the current shell
+process (bash) into it. CPU time consumed by this bash and its children
+can be obtained from g1/cpuacct.usage and the same is accumulated in
+/sys/fs/cgroup/cpuacct.usage also.
+
+cpuacct.stat file lists a few statistics which further divide the
+CPU time obtained by the cgroup into user and system times. Currently
+the following statistics are supported:
+
+user: Time spent by tasks of the cgroup in user mode.
+system: Time spent by tasks of the cgroup in kernel mode.
+
+user and system are in USER_HZ unit.
+
+cpuacct controller uses percpu_counter interface to collect user and
+system times. This has two side effects:
+
+- It is theoretically possible to see wrong values for user and system times.
+  This is because percpu_counter_read() on 32bit systems isn't safe
+  against concurrent writes.
+- It is possible to see slightly outdated values for user and system times
+  due to the batch processing nature of percpu_counter.
diff --git a/Documentation/cgroup-v1/cpuacct.txt b/Documentation/cgroup-v1/cpuacct.txt
deleted file mode 100644
index 9d73cc0cadb9..000000000000
--- a/Documentation/cgroup-v1/cpuacct.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-CPU Accounting Controller
--------------------------
-
-The CPU accounting controller is used to group tasks using cgroups and
-account the CPU usage of these groups of tasks.
-
-The CPU accounting controller supports multi-hierarchy groups. An accounting
-group accumulates the CPU usage of all of its child groups and the tasks
-directly present in its group.
-
-Accounting groups can be created by first mounting the cgroup filesystem.
-
-# mount -t cgroup -ocpuacct none /sys/fs/cgroup
-
-With the above step, the initial or the parent accounting group becomes
-visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
-the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
-/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained
-by this group which is essentially the CPU time obtained by all the tasks
-in the system.
-
-New accounting groups can be created under the parent group /sys/fs/cgroup.
-
-# cd /sys/fs/cgroup
-# mkdir g1
-# echo $$ > g1/tasks
-
-The above steps create a new group g1 and move the current shell
-process (bash) into it. CPU time consumed by this bash and its children
-can be obtained from g1/cpuacct.usage and the same is accumulated in
-/sys/fs/cgroup/cpuacct.usage also.
-
-cpuacct.stat file lists a few statistics which further divide the
-CPU time obtained by the cgroup into user and system times. Currently
-the following statistics are supported:
-
-user: Time spent by tasks of the cgroup in user mode.
-system: Time spent by tasks of the cgroup in kernel mode.
-
-user and system are in USER_HZ unit.
-
-cpuacct controller uses percpu_counter interface to collect user and
-system times. This has two side effects:
-
-- It is theoretically possible to see wrong values for user and system times.
-  This is because percpu_counter_read() on 32bit systems isn't safe
-  against concurrent writes.
-- It is possible to see slightly outdated values for user and system times
-  due to the batch processing nature of percpu_counter.
diff --git a/Documentation/cgroup-v1/cpusets.rst b/Documentation/cgroup-v1/cpusets.rst
new file mode 100644
index 000000000000..b6a42cdea72b
--- /dev/null
+++ b/Documentation/cgroup-v1/cpusets.rst
@@ -0,0 +1,866 @@
+=======
+CPUSETS
+=======
+
+Copyright (C) 2004 BULL SA.
+
+Written by Simon.Derr@bull.net
+
+- Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+- Modified by Paul Jackson <pj@sgi.com>
+- Modified by Christoph Lameter <cl@linux.com>
+- Modified by Paul Menage <menage@google.com>
+- Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+
+.. CONTENTS:
+
+   1. Cpusets
+     1.1 What are cpusets ?
+     1.2 Why are cpusets needed ?
+     1.3 How are cpusets implemented ?
+     1.4 What are exclusive cpusets ?
+     1.5 What is memory_pressure ?
+     1.6 What is memory spread ?
+     1.7 What is sched_load_balance ?
+     1.8 What is sched_relax_domain_level ?
+     1.9 How do I use cpusets ?
+   2. Usage Examples and Syntax
+     2.1 Basic Usage
+     2.2 Adding/removing cpus
+     2.3 Setting flags
+     2.4 Attaching processes
+   3. Questions
+   4. Contact
+
+1. Cpusets
+==========
+
+1.1 What are cpusets ?
+----------------------
+
+Cpusets provide a mechanism for assigning a set of CPUs and Memory
+Nodes to a set of tasks.   In this document "Memory Node" refers to
+an on-line node that contains memory.
+
+Cpusets constrain the CPU and Memory placement of tasks to only
+the resources within a task's current cpuset.  They form a nested
+hierarchy visible in a virtual file system.  These are the essential
+hooks, beyond what is already present, required to manage dynamic
+job placement on large systems.
+
+Cpusets use the generic cgroup subsystem described in
+Documentation/cgroup-v1/cgroups.rst.
+
+Requests by a task, using the sched_setaffinity(2) system call to
+include CPUs in its CPU affinity mask, and using the mbind(2) and
+set_mempolicy(2) system calls to include Memory Nodes in its memory
+policy, are both filtered through that task's cpuset, filtering out any
+CPUs or Memory Nodes not in that cpuset.  The scheduler will not
+schedule a task on a CPU that is not allowed in its cpus_allowed
+vector, and the kernel page allocator will not allocate a page on a
+node that is not allowed in the requesting task's mems_allowed vector.
+
+User level code may create and destroy cpusets by name in the cgroup
+virtual file system, manage the attributes and permissions of these
+cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
+specify and query to which cpuset a task is assigned, and list the
+task pids assigned to a cpuset.
+
+
+1.2 Why are cpusets needed ?
+----------------------------
+
+The management of large computer systems, with many processors (CPUs),
+complex memory cache hierarchies and multiple Memory Nodes having
+non-uniform access times (NUMA) presents additional challenges for
+the efficient scheduling and memory placement of processes.
+
+Frequently more modest sized systems can be operated with adequate
+efficiency just by letting the operating system automatically share
+the available CPU and Memory resources amongst the requesting tasks.
+
+But larger systems, which benefit more from careful processor and
+memory placement to reduce memory access times and contention,
+and which typically represent a larger investment for the customer,
+can benefit from explicitly placing jobs on properly sized subsets of
+the system.
+
+This can be especially valuable on:
+
+    * Web Servers running multiple instances of the same web application,
+    * Servers running different applications (for instance, a web server
+      and a database), or
+    * NUMA systems running large HPC applications with demanding
+      performance characteristics.
+
+These subsets, or "soft partitions" must be able to be dynamically
+adjusted, as the job mix changes, without impacting other concurrently
+executing jobs. The location of the running jobs pages may also be moved
+when the memory locations are changed.
+
+The kernel cpuset patch provides the minimum essential kernel
+mechanisms required to efficiently implement such subsets.  It
+leverages existing CPU and Memory Placement facilities in the Linux
+kernel to avoid any additional impact on the critical scheduler or
+memory allocator code.
+
+
+1.3 How are cpusets implemented ?
+---------------------------------
+
+Cpusets provide a Linux kernel mechanism to constrain which CPUs and
+Memory Nodes are used by a process or set of processes.
+
+The Linux kernel already has a pair of mechanisms to specify on which
+CPUs a task may be scheduled (sched_setaffinity) and on which Memory
+Nodes it may obtain memory (mbind, set_mempolicy).
+
+Cpusets extends these two mechanisms as follows:
+
+ - Cpusets are sets of allowed CPUs and Memory Nodes, known to the
+   kernel.
+ - Each task in the system is attached to a cpuset, via a pointer
+   in the task structure to a reference counted cgroup structure.
+ - Calls to sched_setaffinity are filtered to just those CPUs
+   allowed in that task's cpuset.
+ - Calls to mbind and set_mempolicy are filtered to just
+   those Memory Nodes allowed in that task's cpuset.
+ - The root cpuset contains all the systems CPUs and Memory
+   Nodes.
+ - For any cpuset, one can define child cpusets containing a subset
+   of the parents CPU and Memory Node resources.
+ - The hierarchy of cpusets can be mounted at /dev/cpuset, for
+   browsing and manipulation from user space.
+ - A cpuset may be marked exclusive, which ensures that no other
+   cpuset (except direct ancestors and descendants) may contain
+   any overlapping CPUs or Memory Nodes.
+ - You can list all the tasks (by pid) attached to any cpuset.
+
+The implementation of cpusets requires a few, simple hooks
+into the rest of the kernel, none in performance critical paths:
+
+ - in init/main.c, to initialize the root cpuset at system boot.
+ - in fork and exit, to attach and detach a task from its cpuset.
+ - in sched_setaffinity, to mask the requested CPUs by what's
+   allowed in that task's cpuset.
+ - in sched.c migrate_live_tasks(), to keep migrating tasks within
+   the CPUs allowed by their cpuset, if possible.
+ - in the mbind and set_mempolicy system calls, to mask the requested
+   Memory Nodes by what's allowed in that task's cpuset.
+ - in page_alloc.c, to restrict memory to allowed nodes.
+ - in vmscan.c, to restrict page recovery to the current cpuset.
+
+You should mount the "cgroup" filesystem type in order to enable
+browsing and modifying the cpusets presently known to the kernel.  No
+new system calls are added for cpusets - all support for querying and
+modifying cpusets is via this cpuset file system.
+
+The /proc/<pid>/status file for each task has four added lines,
+displaying the task's cpus_allowed (on which CPUs it may be scheduled)
+and mems_allowed (on which Memory Nodes it may obtain memory),
+in the two formats seen in the following example::
+
+  Cpus_allowed:   ffffffff,ffffffff,ffffffff,ffffffff
+  Cpus_allowed_list:      0-127
+  Mems_allowed:   ffffffff,ffffffff
+  Mems_allowed_list:      0-63
+
+Each cpuset is represented by a directory in the cgroup file system
+containing (on top of the standard cgroup files) the following
+files describing that cpuset:
+
+ - cpuset.cpus: list of CPUs in that cpuset
+ - cpuset.mems: list of Memory Nodes in that cpuset
+ - cpuset.memory_migrate flag: if set, move pages to cpusets nodes
+ - cpuset.cpu_exclusive flag: is cpu placement exclusive?
+ - cpuset.mem_exclusive flag: is memory placement exclusive?
+ - cpuset.mem_hardwall flag:  is memory allocation hardwalled
+ - cpuset.memory_pressure: measure of how much paging pressure in cpuset
+ - cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes
+ - cpuset.memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes
+ - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
+ - cpuset.sched_relax_domain_level: the searching range when migrating tasks
+
+In addition, only the root cpuset has the following file:
+
+ - cpuset.memory_pressure_enabled flag: compute memory_pressure?
+
+New cpusets are created using the mkdir system call or shell
+command.  The properties of a cpuset, such as its flags, allowed
+CPUs and Memory Nodes, and attached tasks, are modified by writing
+to the appropriate file in that cpusets directory, as listed above.
+
+The named hierarchical structure of nested cpusets allows partitioning
+a large system into nested, dynamically changeable, "soft-partitions".
+
+The attachment of each task, automatically inherited at fork by any
+children of that task, to a cpuset allows organizing the work load
+on a system into related sets of tasks such that each set is constrained
+to using the CPUs and Memory Nodes of a particular cpuset.  A task
+may be re-attached to any other cpuset, if allowed by the permissions
+on the necessary cpuset file system directories.
+
+Such management of a system "in the large" integrates smoothly with
+the detailed placement done on individual tasks and memory regions
+using the sched_setaffinity, mbind and set_mempolicy system calls.
+
+The following rules apply to each cpuset:
+
+ - Its CPUs and Memory Nodes must be a subset of its parents.
+ - It can't be marked exclusive unless its parent is.
+ - If its cpu or memory is exclusive, they may not overlap any sibling.
+
+These rules, and the natural hierarchy of cpusets, enable efficient
+enforcement of the exclusive guarantee, without having to scan all
+cpusets every time any of them change to ensure nothing overlaps a
+exclusive cpuset.  Also, the use of a Linux virtual file system (vfs)
+to represent the cpuset hierarchy provides for a familiar permission
+and name space for cpusets, with a minimum of additional kernel code.
+
+The cpus and mems files in the root (top_cpuset) cpuset are
+read-only.  The cpus file automatically tracks the value of
+cpu_online_mask using a CPU hotplug notifier, and the mems file
+automatically tracks the value of node_states[N_MEMORY]--i.e.,
+nodes with memory--using the cpuset_track_online_nodes() hook.
+
+
+1.4 What are exclusive cpusets ?
+--------------------------------
+
+If a cpuset is cpu or mem exclusive, no other cpuset, other than
+a direct ancestor or descendant, may share any of the same CPUs or
+Memory Nodes.
+
+A cpuset that is cpuset.mem_exclusive *or* cpuset.mem_hardwall is "hardwalled",
+i.e. it restricts kernel allocations for page, buffer and other data
+commonly shared by the kernel across multiple users.  All cpusets,
+whether hardwalled or not, restrict allocations of memory for user
+space.  This enables configuring a system so that several independent
+jobs can share common kernel data, such as file system pages, while
+isolating each job's user allocation in its own cpuset.  To do this,
+construct a large mem_exclusive cpuset to hold all the jobs, and
+construct child, non-mem_exclusive cpusets for each individual job.
+Only a small amount of typical kernel memory, such as requests from
+interrupt handlers, is allowed to be taken outside even a
+mem_exclusive cpuset.
+
+
+1.5 What is memory_pressure ?
+-----------------------------
+The memory_pressure of a cpuset provides a simple per-cpuset metric
+of the rate that the tasks in a cpuset are attempting to free up in
+use memory on the nodes of the cpuset to satisfy additional memory
+requests.
+
+This enables batch managers monitoring jobs running in dedicated
+cpusets to efficiently detect what level of memory pressure that job
+is causing.
+
+This is useful both on tightly managed systems running a wide mix of
+submitted jobs, which may choose to terminate or re-prioritize jobs that
+are trying to use more memory than allowed on the nodes assigned to them,
+and with tightly coupled, long running, massively parallel scientific
+computing jobs that will dramatically fail to meet required performance
+goals if they start to use more memory than allowed to them.
+
+This mechanism provides a very economical way for the batch manager
+to monitor a cpuset for signs of memory pressure.  It's up to the
+batch manager or other user code to decide what to do about it and
+take action.
+
+==>
+    Unless this feature is enabled by writing "1" to the special file
+    /dev/cpuset/memory_pressure_enabled, the hook in the rebalance
+    code of __alloc_pages() for this metric reduces to simply noticing
+    that the cpuset_memory_pressure_enabled flag is zero.  So only
+    systems that enable this feature will compute the metric.
+
+Why a per-cpuset, running average:
+
+    Because this meter is per-cpuset, rather than per-task or mm,
+    the system load imposed by a batch scheduler monitoring this
+    metric is sharply reduced on large systems, because a scan of
+    the tasklist can be avoided on each set of queries.
+
+    Because this meter is a running average, instead of an accumulating
+    counter, a batch scheduler can detect memory pressure with a
+    single read, instead of having to read and accumulate results
+    for a period of time.
+
+    Because this meter is per-cpuset rather than per-task or mm,
+    the batch scheduler can obtain the key information, memory
+    pressure in a cpuset, with a single read, rather than having to
+    query and accumulate results over all the (dynamically changing)
+    set of tasks in the cpuset.
+
+A per-cpuset simple digital filter (requires a spinlock and 3 words
+of data per-cpuset) is kept, and updated by any task attached to that
+cpuset, if it enters the synchronous (direct) page reclaim code.
+
+A per-cpuset file provides an integer number representing the recent
+(half-life of 10 seconds) rate of direct page reclaims caused by
+the tasks in the cpuset, in units of reclaims attempted per second,
+times 1000.
+
+
+1.6 What is memory spread ?
+---------------------------
+There are two boolean flag files per cpuset that control where the
+kernel allocates pages for the file system buffers and related in
+kernel data structures.  They are called 'cpuset.memory_spread_page' and
+'cpuset.memory_spread_slab'.
+
+If the per-cpuset boolean flag file 'cpuset.memory_spread_page' is set, then
+the kernel will spread the file system buffers (page cache) evenly
+over all the nodes that the faulting task is allowed to use, instead
+of preferring to put those pages on the node where the task is running.
+
+If the per-cpuset boolean flag file 'cpuset.memory_spread_slab' is set,
+then the kernel will spread some file system related slab caches,
+such as for inodes and dentries evenly over all the nodes that the
+faulting task is allowed to use, instead of preferring to put those
+pages on the node where the task is running.
+
+The setting of these flags does not affect anonymous data segment or
+stack segment pages of a task.
+
+By default, both kinds of memory spreading are off, and memory
+pages are allocated on the node local to where the task is running,
+except perhaps as modified by the task's NUMA mempolicy or cpuset
+configuration, so long as sufficient free memory pages are available.
+
+When new cpusets are created, they inherit the memory spread settings
+of their parent.
+
+Setting memory spreading causes allocations for the affected page
+or slab caches to ignore the task's NUMA mempolicy and be spread
+instead.    Tasks using mbind() or set_mempolicy() calls to set NUMA
+mempolicies will not notice any change in these calls as a result of
+their containing task's memory spread settings.  If memory spreading
+is turned off, then the currently specified NUMA mempolicy once again
+applies to memory page allocations.
+
+Both 'cpuset.memory_spread_page' and 'cpuset.memory_spread_slab' are boolean flag
+files.  By default they contain "0", meaning that the feature is off
+for that cpuset.  If a "1" is written to that file, then that turns
+the named feature on.
+
+The implementation is simple.
+
+Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag
+PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently
+joins that cpuset.  The page allocation calls for the page cache
+is modified to perform an inline check for this PFA_SPREAD_PAGE task
+flag, and if set, a call to a new routine cpuset_mem_spread_node()
+returns the node to prefer for the allocation.
+
+Similarly, setting 'cpuset.memory_spread_slab' turns on the flag
+PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate
+pages from the node returned by cpuset_mem_spread_node().
+
+The cpuset_mem_spread_node() routine is also simple.  It uses the
+value of a per-task rotor cpuset_mem_spread_rotor to select the next
+node in the current task's mems_allowed to prefer for the allocation.
+
+This memory placement policy is also known (in other contexts) as
+round-robin or interleave.
+
+This policy can provide substantial improvements for jobs that need
+to place thread local data on the corresponding node, but that need
+to access large file system data sets that need to be spread across
+the several nodes in the jobs cpuset in order to fit.  Without this
+policy, especially for jobs that might have one thread reading in the
+data set, the memory allocation across the nodes in the jobs cpuset
+can become very uneven.
+
+1.7 What is sched_load_balance ?
+--------------------------------
+
+The kernel scheduler (kernel/sched/core.c) automatically load balances
+tasks.  If one CPU is underutilized, kernel code running on that
+CPU will look for tasks on other more overloaded CPUs and move those
+tasks to itself, within the constraints of such placement mechanisms
+as cpusets and sched_setaffinity.
+
+The algorithmic cost of load balancing and its impact on key shared
+kernel data structures such as the task list increases more than
+linearly with the number of CPUs being balanced.  So the scheduler
+has support to partition the systems CPUs into a number of sched
+domains such that it only load balances within each sched domain.
+Each sched domain covers some subset of the CPUs in the system;
+no two sched domains overlap; some CPUs might not be in any sched
+domain and hence won't be load balanced.
+
+Put simply, it costs less to balance between two smaller sched domains
+than one big one, but doing so means that overloads in one of the
+two domains won't be load balanced to the other one.
+
+By default, there is one sched domain covering all CPUs, including those
+marked isolated using the kernel boot time "isolcpus=" argument. However,
+the isolated CPUs will not participate in load balancing, and will not
+have tasks running on them unless explicitly assigned.
+
+This default load balancing across all CPUs is not well suited for
+the following two situations:
+
+ 1) On large systems, load balancing across many CPUs is expensive.
+    If the system is managed using cpusets to place independent jobs
+    on separate sets of CPUs, full load balancing is unnecessary.
+ 2) Systems supporting realtime on some CPUs need to minimize
+    system overhead on those CPUs, including avoiding task load
+    balancing if that is not needed.
+
+When the per-cpuset flag "cpuset.sched_load_balance" is enabled (the default
+setting), it requests that all the CPUs in that cpusets allowed 'cpuset.cpus'
+be contained in a single sched domain, ensuring that load balancing
+can move a task (not otherwised pinned, as by sched_setaffinity)
+from any CPU in that cpuset to any other.
+
+When the per-cpuset flag "cpuset.sched_load_balance" is disabled, then the
+scheduler will avoid load balancing across the CPUs in that cpuset,
+--except-- in so far as is necessary because some overlapping cpuset
+has "sched_load_balance" enabled.
+
+So, for example, if the top cpuset has the flag "cpuset.sched_load_balance"
+enabled, then the scheduler will have one sched domain covering all
+CPUs, and the setting of the "cpuset.sched_load_balance" flag in any other
+cpusets won't matter, as we're already fully load balancing.
+
+Therefore in the above two situations, the top cpuset flag
+"cpuset.sched_load_balance" should be disabled, and only some of the smaller,
+child cpusets have this flag enabled.
+
+When doing this, you don't usually want to leave any unpinned tasks in
+the top cpuset that might use non-trivial amounts of CPU, as such tasks
+may be artificially constrained to some subset of CPUs, depending on
+the particulars of this flag setting in descendant cpusets.  Even if
+such a task could use spare CPU cycles in some other CPUs, the kernel
+scheduler might not consider the possibility of load balancing that
+task to that underused CPU.
+
+Of course, tasks pinned to a particular CPU can be left in a cpuset
+that disables "cpuset.sched_load_balance" as those tasks aren't going anywhere
+else anyway.
+
+There is an impedance mismatch here, between cpusets and sched domains.
+Cpusets are hierarchical and nest.  Sched domains are flat; they don't
+overlap and each CPU is in at most one sched domain.
+
+It is necessary for sched domains to be flat because load balancing
+across partially overlapping sets of CPUs would risk unstable dynamics
+that would be beyond our understanding.  So if each of two partially
+overlapping cpusets enables the flag 'cpuset.sched_load_balance', then we
+form a single sched domain that is a superset of both.  We won't move
+a task to a CPU outside its cpuset, but the scheduler load balancing
+code might waste some compute cycles considering that possibility.
+
+This mismatch is why there is not a simple one-to-one relation
+between which cpusets have the flag "cpuset.sched_load_balance" enabled,
+and the sched domain configuration.  If a cpuset enables the flag, it
+will get balancing across all its CPUs, but if it disables the flag,
+it will only be assured of no load balancing if no other overlapping
+cpuset enables the flag.
+
+If two cpusets have partially overlapping 'cpuset.cpus' allowed, and only
+one of them has this flag enabled, then the other may find its
+tasks only partially load balanced, just on the overlapping CPUs.
+This is just the general case of the top_cpuset example given a few
+paragraphs above.  In the general case, as in the top cpuset case,
+don't leave tasks that might use non-trivial amounts of CPU in
+such partially load balanced cpusets, as they may be artificially
+constrained to some subset of the CPUs allowed to them, for lack of
+load balancing to the other CPUs.
+
+CPUs in "cpuset.isolcpus" were excluded from load balancing by the
+isolcpus= kernel boot option, and will never be load balanced regardless
+of the value of "cpuset.sched_load_balance" in any cpuset.
+
+1.7.1 sched_load_balance implementation details.
+------------------------------------------------
+
+The per-cpuset flag 'cpuset.sched_load_balance' defaults to enabled (contrary
+to most cpuset flags.)  When enabled for a cpuset, the kernel will
+ensure that it can load balance across all the CPUs in that cpuset
+(makes sure that all the CPUs in the cpus_allowed of that cpuset are
+in the same sched domain.)
+
+If two overlapping cpusets both have 'cpuset.sched_load_balance' enabled,
+then they will be (must be) both in the same sched domain.
+
+If, as is the default, the top cpuset has 'cpuset.sched_load_balance' enabled,
+then by the above that means there is a single sched domain covering
+the whole system, regardless of any other cpuset settings.
+
+The kernel commits to user space that it will avoid load balancing
+where it can.  It will pick as fine a granularity partition of sched
+domains as it can while still providing load balancing for any set
+of CPUs allowed to a cpuset having 'cpuset.sched_load_balance' enabled.
+
+The internal kernel cpuset to scheduler interface passes from the
+cpuset code to the scheduler code a partition of the load balanced
+CPUs in the system. This partition is a set of subsets (represented
+as an array of struct cpumask) of CPUs, pairwise disjoint, that cover
+all the CPUs that must be load balanced.
+
+The cpuset code builds a new such partition and passes it to the
+scheduler sched domain setup code, to have the sched domains rebuilt
+as necessary, whenever:
+
+ - the 'cpuset.sched_load_balance' flag of a cpuset with non-empty CPUs changes,
+ - or CPUs come or go from a cpuset with this flag enabled,
+ - or 'cpuset.sched_relax_domain_level' value of a cpuset with non-empty CPUs
+   and with this flag enabled changes,
+ - or a cpuset with non-empty CPUs and with this flag enabled is removed,
+ - or a cpu is offlined/onlined.
+
+This partition exactly defines what sched domains the scheduler should
+setup - one sched domain for each element (struct cpumask) in the
+partition.
+
+The scheduler remembers the currently active sched domain partitions.
+When the scheduler routine partition_sched_domains() is invoked from
+the cpuset code to update these sched domains, it compares the new
+partition requested with the current, and updates its sched domains,
+removing the old and adding the new, for each change.
+
+
+1.8 What is sched_relax_domain_level ?
+--------------------------------------
+
+In sched domain, the scheduler migrates tasks in 2 ways; periodic load
+balance on tick, and at time of some schedule events.
+
+When a task is woken up, scheduler try to move the task on idle CPU.
+For example, if a task A running on CPU X activates another task B
+on the same CPU X, and if CPU Y is X's sibling and performing idle,
+then scheduler migrate task B to CPU Y so that task B can start on
+CPU Y without waiting task A on CPU X.
+
+And if a CPU run out of tasks in its runqueue, the CPU try to pull
+extra tasks from other busy CPUs to help them before it is going to
+be idle.
+
+Of course it takes some searching cost to find movable tasks and/or
+idle CPUs, the scheduler might not search all CPUs in the domain
+every time.  In fact, in some architectures, the searching ranges on
+events are limited in the same socket or node where the CPU locates,
+while the load balance on tick searches all.
+
+For example, assume CPU Z is relatively far from CPU X.  Even if CPU Z
+is idle while CPU X and the siblings are busy, scheduler can't migrate
+woken task B from X to Z since it is out of its searching range.
+As the result, task B on CPU X need to wait task A or wait load balance
+on the next tick.  For some applications in special situation, waiting
+1 tick may be too long.
+
+The 'cpuset.sched_relax_domain_level' file allows you to request changing
+this searching range as you like.  This file takes int value which
+indicates size of searching range in levels ideally as follows,
+otherwise initial value -1 that indicates the cpuset has no request.
+
+====== ===========================================================
+  -1   no request. use system default or follow request of others.
+   0   no search.
+   1   search siblings (hyperthreads in a core).
+   2   search cores in a package.
+   3   search cpus in a node [= system wide on non-NUMA system]
+   4   search nodes in a chunk of node [on NUMA system]
+   5   search system wide [on NUMA system]
+====== ===========================================================
+
+The system default is architecture dependent.  The system default
+can be changed using the relax_domain_level= boot parameter.
+
+This file is per-cpuset and affect the sched domain where the cpuset
+belongs to.  Therefore if the flag 'cpuset.sched_load_balance' of a cpuset
+is disabled, then 'cpuset.sched_relax_domain_level' have no effect since
+there is no sched domain belonging the cpuset.
+
+If multiple cpusets are overlapping and hence they form a single sched
+domain, the largest value among those is used.  Be careful, if one
+requests 0 and others are -1 then 0 is used.
+
+Note that modifying this file will have both good and bad effects,
+and whether it is acceptable or not depends on your situation.
+Don't modify this file if you are not sure.
+
+If your situation is:
+
+ - The migration costs between each cpu can be assumed considerably
+   small(for you) due to your special application's behavior or
+   special hardware support for CPU cache etc.
+ - The searching cost doesn't have impact(for you) or you can make
+   the searching cost enough small by managing cpuset to compact etc.
+ - The latency is required even it sacrifices cache hit rate etc.
+   then increasing 'sched_relax_domain_level' would benefit you.
+
+
+1.9 How do I use cpusets ?
+--------------------------
+
+In order to minimize the impact of cpusets on critical kernel
+code, such as the scheduler, and due to the fact that the kernel
+does not support one task updating the memory placement of another
+task directly, the impact on a task of changing its cpuset CPU
+or Memory Node placement, or of changing to which cpuset a task
+is attached, is subtle.
+
+If a cpuset has its Memory Nodes modified, then for each task attached
+to that cpuset, the next time that the kernel attempts to allocate
+a page of memory for that task, the kernel will notice the change
+in the task's cpuset, and update its per-task memory placement to
+remain within the new cpusets memory placement.  If the task was using
+mempolicy MPOL_BIND, and the nodes to which it was bound overlap with
+its new cpuset, then the task will continue to use whatever subset
+of MPOL_BIND nodes are still allowed in the new cpuset.  If the task
+was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed
+in the new cpuset, then the task will be essentially treated as if it
+was MPOL_BIND bound to the new cpuset (even though its NUMA placement,
+as queried by get_mempolicy(), doesn't change).  If a task is moved
+from one cpuset to another, then the kernel will adjust the task's
+memory placement, as above, the next time that the kernel attempts
+to allocate a page of memory for that task.
+
+If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset
+will have its allowed CPU placement changed immediately.  Similarly,
+if a task's pid is written to another cpuset's 'tasks' file, then its
+allowed CPU placement is changed immediately.  If such a task had been
+bound to some subset of its cpuset using the sched_setaffinity() call,
+the task will be allowed to run on any CPU allowed in its new cpuset,
+negating the effect of the prior sched_setaffinity() call.
+
+In summary, the memory placement of a task whose cpuset is changed is
+updated by the kernel, on the next allocation of a page for that task,
+and the processor placement is updated immediately.
+
+Normally, once a page is allocated (given a physical page
+of main memory) then that page stays on whatever node it
+was allocated, so long as it remains allocated, even if the
+cpusets memory placement policy 'cpuset.mems' subsequently changes.
+If the cpuset flag file 'cpuset.memory_migrate' is set true, then when
+tasks are attached to that cpuset, any pages that task had
+allocated to it on nodes in its previous cpuset are migrated
+to the task's new cpuset. The relative placement of the page within
+the cpuset is preserved during these migration operations if possible.
+For example if the page was on the second valid node of the prior cpuset
+then the page will be placed on the second valid node of the new cpuset.
+
+Also if 'cpuset.memory_migrate' is set true, then if that cpuset's
+'cpuset.mems' file is modified, pages allocated to tasks in that
+cpuset, that were on nodes in the previous setting of 'cpuset.mems',
+will be moved to nodes in the new setting of 'mems.'
+Pages that were not in the task's prior cpuset, or in the cpuset's
+prior 'cpuset.mems' setting, will not be moved.
+
+There is an exception to the above.  If hotplug functionality is used
+to remove all the CPUs that are currently assigned to a cpuset,
+then all the tasks in that cpuset will be moved to the nearest ancestor
+with non-empty cpus.  But the moving of some (or all) tasks might fail if
+cpuset is bound with another cgroup subsystem which has some restrictions
+on task attaching.  In this failing case, those tasks will stay
+in the original cpuset, and the kernel will automatically update
+their cpus_allowed to allow all online CPUs.  When memory hotplug
+functionality for removing Memory Nodes is available, a similar exception
+is expected to apply there as well.  In general, the kernel prefers to
+violate cpuset placement, over starving a task that has had all
+its allowed CPUs or Memory Nodes taken offline.
+
+There is a second exception to the above.  GFP_ATOMIC requests are
+kernel internal allocations that must be satisfied, immediately.
+The kernel may drop some request, in rare cases even panic, if a
+GFP_ATOMIC alloc fails.  If the request cannot be satisfied within
+the current task's cpuset, then we relax the cpuset, and look for
+memory anywhere we can find it.  It's better to violate the cpuset
+than stress the kernel.
+
+To start a new job that is to be contained within a cpuset, the steps are:
+
+ 1) mkdir /sys/fs/cgroup/cpuset
+ 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ 3) Create the new cpuset by doing mkdir's and write's (or echo's) in
+    the /sys/fs/cgroup/cpuset virtual file system.
+ 4) Start a task that will be the "founding father" of the new job.
+ 5) Attach that task to the new cpuset by writing its pid to the
+    /sys/fs/cgroup/cpuset tasks file for that cpuset.
+ 6) fork, exec or clone the job tasks from this founding father task.
+
+For example, the following sequence of commands will setup a cpuset
+named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
+and then start a subshell 'sh' in that cpuset::
+
+  mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+  cd /sys/fs/cgroup/cpuset
+  mkdir Charlie
+  cd Charlie
+  /bin/echo 2-3 > cpuset.cpus
+  /bin/echo 1 > cpuset.mems
+  /bin/echo $$ > tasks
+  sh
+  # The subshell 'sh' is now running in cpuset Charlie
+  # The next line should display '/Charlie'
+  cat /proc/self/cpuset
+
+There are ways to query or modify cpusets:
+
+ - via the cpuset file system directly, using the various cd, mkdir, echo,
+   cat, rmdir commands from the shell, or their equivalent from C.
+ - via the C library libcpuset.
+ - via the C library libcgroup.
+   (http://sourceforge.net/projects/libcg/)
+ - via the python application cset.
+   (http://code.google.com/p/cpuset/)
+
+The sched_setaffinity calls can also be done at the shell prompt using
+SGI's runon or Robert Love's taskset.  The mbind and set_mempolicy
+calls can be done at the shell prompt using the numactl command
+(part of Andi Kleen's numa package).
+
+2. Usage Examples and Syntax
+============================
+
+2.1 Basic Usage
+---------------
+
+Creating, modifying, using the cpusets can be done through the cpuset
+virtual filesystem.
+
+To mount it, type:
+# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset
+
+Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the
+tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset
+is the cpuset that holds the whole system.
+
+If you want to create a new cpuset under /sys/fs/cgroup/cpuset::
+
+  # cd /sys/fs/cgroup/cpuset
+  # mkdir my_cpuset
+
+Now you want to do something with this cpuset::
+
+  # cd my_cpuset
+
+In this directory you can find several files::
+
+  # ls
+  cgroup.clone_children  cpuset.memory_pressure
+  cgroup.event_control   cpuset.memory_spread_page
+  cgroup.procs           cpuset.memory_spread_slab
+  cpuset.cpu_exclusive   cpuset.mems
+  cpuset.cpus            cpuset.sched_load_balance
+  cpuset.mem_exclusive   cpuset.sched_relax_domain_level
+  cpuset.mem_hardwall    notify_on_release
+  cpuset.memory_migrate  tasks
+
+Reading them will give you information about the state of this cpuset:
+the CPUs and Memory Nodes it can use, the processes that are using
+it, its properties.  By writing to these files you can manipulate
+the cpuset.
+
+Set some flags::
+
+  # /bin/echo 1 > cpuset.cpu_exclusive
+
+Add some cpus::
+
+  # /bin/echo 0-7 > cpuset.cpus
+
+Add some mems::
+
+  # /bin/echo 0-7 > cpuset.mems
+
+Now attach your shell to this cpuset::
+
+  # /bin/echo $$ > tasks
+
+You can also create cpusets inside your cpuset by using mkdir in this
+directory::
+
+  # mkdir my_sub_cs
+
+To remove a cpuset, just use rmdir::
+
+  # rmdir my_sub_cs
+
+This will fail if the cpuset is in use (has cpusets inside, or has
+processes attached).
+
+Note that for legacy reasons, the "cpuset" filesystem exists as a
+wrapper around the cgroup filesystem.
+
+The command::
+
+  mount -t cpuset X /sys/fs/cgroup/cpuset
+
+is equivalent to::
+
+  mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset
+  echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent
+
+2.2 Adding/removing cpus
+------------------------
+
+This is the syntax to use when writing in the cpus or mems files
+in cpuset directories::
+
+  # /bin/echo 1-4 > cpuset.cpus		-> set cpus list to cpus 1,2,3,4
+  # /bin/echo 1,2,3,4 > cpuset.cpus	-> set cpus list to cpus 1,2,3,4
+
+To add a CPU to a cpuset, write the new list of CPUs including the
+CPU to be added. To add 6 to the above cpuset::
+
+  # /bin/echo 1-4,6 > cpuset.cpus	-> set cpus list to cpus 1,2,3,4,6
+
+Similarly to remove a CPU from a cpuset, write the new list of CPUs
+without the CPU to be removed.
+
+To remove all the CPUs::
+
+  # /bin/echo "" > cpuset.cpus		-> clear cpus list
+
+2.3 Setting flags
+-----------------
+
+The syntax is very simple::
+
+  # /bin/echo 1 > cpuset.cpu_exclusive 	-> set flag 'cpuset.cpu_exclusive'
+  # /bin/echo 0 > cpuset.cpu_exclusive 	-> unset flag 'cpuset.cpu_exclusive'
+
+2.4 Attaching processes
+-----------------------
+
+::
+
+  # /bin/echo PID > tasks
+
+Note that it is PID, not PIDs. You can only attach ONE task at a time.
+If you have several tasks to attach, you have to do it one after another::
+
+  # /bin/echo PID1 > tasks
+  # /bin/echo PID2 > tasks
+	...
+  # /bin/echo PIDn > tasks
+
+
+3. Questions
+============
+
+Q:
+   what's up with this '/bin/echo' ?
+
+A:
+   bash's builtin 'echo' command does not check calls to write() against
+   errors. If you use it in the cpuset file system, you won't be
+   able to tell whether a command succeeded or failed.
+
+Q:
+   When I attach processes, only the first of the line gets really attached !
+
+A:
+   We can only return one error code per call to write(). So you should also
+   put only ONE pid.
+
+4. Contact
+==========
+
+Web: http://www.bullopensource.org/cpuset
diff --git a/Documentation/cgroup-v1/cpusets.txt b/Documentation/cgroup-v1/cpusets.txt
deleted file mode 100644
index 8402dd6de8df..000000000000
--- a/Documentation/cgroup-v1/cpusets.txt
+++ /dev/null
@@ -1,839 +0,0 @@
-				CPUSETS
-				-------
-
-Copyright (C) 2004 BULL SA.
-Written by Simon.Derr@bull.net
-
-Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
-Modified by Paul Jackson <pj@sgi.com>
-Modified by Christoph Lameter <cl@linux.com>
-Modified by Paul Menage <menage@google.com>
-Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
-
-CONTENTS:
-=========
-
-1. Cpusets
-  1.1 What are cpusets ?
-  1.2 Why are cpusets needed ?
-  1.3 How are cpusets implemented ?
-  1.4 What are exclusive cpusets ?
-  1.5 What is memory_pressure ?
-  1.6 What is memory spread ?
-  1.7 What is sched_load_balance ?
-  1.8 What is sched_relax_domain_level ?
-  1.9 How do I use cpusets ?
-2. Usage Examples and Syntax
-  2.1 Basic Usage
-  2.2 Adding/removing cpus
-  2.3 Setting flags
-  2.4 Attaching processes
-3. Questions
-4. Contact
-
-1. Cpusets
-==========
-
-1.1 What are cpusets ?
-----------------------
-
-Cpusets provide a mechanism for assigning a set of CPUs and Memory
-Nodes to a set of tasks.   In this document "Memory Node" refers to
-an on-line node that contains memory.
-
-Cpusets constrain the CPU and Memory placement of tasks to only
-the resources within a task's current cpuset.  They form a nested
-hierarchy visible in a virtual file system.  These are the essential
-hooks, beyond what is already present, required to manage dynamic
-job placement on large systems.
-
-Cpusets use the generic cgroup subsystem described in
-Documentation/cgroup-v1/cgroups.txt.
-
-Requests by a task, using the sched_setaffinity(2) system call to
-include CPUs in its CPU affinity mask, and using the mbind(2) and
-set_mempolicy(2) system calls to include Memory Nodes in its memory
-policy, are both filtered through that task's cpuset, filtering out any
-CPUs or Memory Nodes not in that cpuset.  The scheduler will not
-schedule a task on a CPU that is not allowed in its cpus_allowed
-vector, and the kernel page allocator will not allocate a page on a
-node that is not allowed in the requesting task's mems_allowed vector.
-
-User level code may create and destroy cpusets by name in the cgroup
-virtual file system, manage the attributes and permissions of these
-cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
-specify and query to which cpuset a task is assigned, and list the
-task pids assigned to a cpuset.
-
-
-1.2 Why are cpusets needed ?
-----------------------------
-
-The management of large computer systems, with many processors (CPUs),
-complex memory cache hierarchies and multiple Memory Nodes having
-non-uniform access times (NUMA) presents additional challenges for
-the efficient scheduling and memory placement of processes.
-
-Frequently more modest sized systems can be operated with adequate
-efficiency just by letting the operating system automatically share
-the available CPU and Memory resources amongst the requesting tasks.
-
-But larger systems, which benefit more from careful processor and
-memory placement to reduce memory access times and contention,
-and which typically represent a larger investment for the customer,
-can benefit from explicitly placing jobs on properly sized subsets of
-the system.
-
-This can be especially valuable on:
-
-    * Web Servers running multiple instances of the same web application,
-    * Servers running different applications (for instance, a web server
-      and a database), or
-    * NUMA systems running large HPC applications with demanding
-      performance characteristics.
-
-These subsets, or "soft partitions" must be able to be dynamically
-adjusted, as the job mix changes, without impacting other concurrently
-executing jobs. The location of the running jobs pages may also be moved
-when the memory locations are changed.
-
-The kernel cpuset patch provides the minimum essential kernel
-mechanisms required to efficiently implement such subsets.  It
-leverages existing CPU and Memory Placement facilities in the Linux
-kernel to avoid any additional impact on the critical scheduler or
-memory allocator code.
-
-
-1.3 How are cpusets implemented ?
----------------------------------
-
-Cpusets provide a Linux kernel mechanism to constrain which CPUs and
-Memory Nodes are used by a process or set of processes.
-
-The Linux kernel already has a pair of mechanisms to specify on which
-CPUs a task may be scheduled (sched_setaffinity) and on which Memory
-Nodes it may obtain memory (mbind, set_mempolicy).
-
-Cpusets extends these two mechanisms as follows:
-
- - Cpusets are sets of allowed CPUs and Memory Nodes, known to the
-   kernel.
- - Each task in the system is attached to a cpuset, via a pointer
-   in the task structure to a reference counted cgroup structure.
- - Calls to sched_setaffinity are filtered to just those CPUs
-   allowed in that task's cpuset.
- - Calls to mbind and set_mempolicy are filtered to just
-   those Memory Nodes allowed in that task's cpuset.
- - The root cpuset contains all the systems CPUs and Memory
-   Nodes.
- - For any cpuset, one can define child cpusets containing a subset
-   of the parents CPU and Memory Node resources.
- - The hierarchy of cpusets can be mounted at /dev/cpuset, for
-   browsing and manipulation from user space.
- - A cpuset may be marked exclusive, which ensures that no other
-   cpuset (except direct ancestors and descendants) may contain
-   any overlapping CPUs or Memory Nodes.
- - You can list all the tasks (by pid) attached to any cpuset.
-
-The implementation of cpusets requires a few, simple hooks
-into the rest of the kernel, none in performance critical paths:
-
- - in init/main.c, to initialize the root cpuset at system boot.
- - in fork and exit, to attach and detach a task from its cpuset.
- - in sched_setaffinity, to mask the requested CPUs by what's
-   allowed in that task's cpuset.
- - in sched.c migrate_live_tasks(), to keep migrating tasks within
-   the CPUs allowed by their cpuset, if possible.
- - in the mbind and set_mempolicy system calls, to mask the requested
-   Memory Nodes by what's allowed in that task's cpuset.
- - in page_alloc.c, to restrict memory to allowed nodes.
- - in vmscan.c, to restrict page recovery to the current cpuset.
-
-You should mount the "cgroup" filesystem type in order to enable
-browsing and modifying the cpusets presently known to the kernel.  No
-new system calls are added for cpusets - all support for querying and
-modifying cpusets is via this cpuset file system.
-
-The /proc/<pid>/status file for each task has four added lines,
-displaying the task's cpus_allowed (on which CPUs it may be scheduled)
-and mems_allowed (on which Memory Nodes it may obtain memory),
-in the two formats seen in the following example:
-
-  Cpus_allowed:   ffffffff,ffffffff,ffffffff,ffffffff
-  Cpus_allowed_list:      0-127
-  Mems_allowed:   ffffffff,ffffffff
-  Mems_allowed_list:      0-63
-
-Each cpuset is represented by a directory in the cgroup file system
-containing (on top of the standard cgroup files) the following
-files describing that cpuset:
-
- - cpuset.cpus: list of CPUs in that cpuset
- - cpuset.mems: list of Memory Nodes in that cpuset
- - cpuset.memory_migrate flag: if set, move pages to cpusets nodes
- - cpuset.cpu_exclusive flag: is cpu placement exclusive?
- - cpuset.mem_exclusive flag: is memory placement exclusive?
- - cpuset.mem_hardwall flag:  is memory allocation hardwalled
- - cpuset.memory_pressure: measure of how much paging pressure in cpuset
- - cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes
- - cpuset.memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes
- - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
- - cpuset.sched_relax_domain_level: the searching range when migrating tasks
-
-In addition, only the root cpuset has the following file:
- - cpuset.memory_pressure_enabled flag: compute memory_pressure?
-
-New cpusets are created using the mkdir system call or shell
-command.  The properties of a cpuset, such as its flags, allowed
-CPUs and Memory Nodes, and attached tasks, are modified by writing
-to the appropriate file in that cpusets directory, as listed above.
-
-The named hierarchical structure of nested cpusets allows partitioning
-a large system into nested, dynamically changeable, "soft-partitions".
-
-The attachment of each task, automatically inherited at fork by any
-children of that task, to a cpuset allows organizing the work load
-on a system into related sets of tasks such that each set is constrained
-to using the CPUs and Memory Nodes of a particular cpuset.  A task
-may be re-attached to any other cpuset, if allowed by the permissions
-on the necessary cpuset file system directories.
-
-Such management of a system "in the large" integrates smoothly with
-the detailed placement done on individual tasks and memory regions
-using the sched_setaffinity, mbind and set_mempolicy system calls.
-
-The following rules apply to each cpuset:
-
- - Its CPUs and Memory Nodes must be a subset of its parents.
- - It can't be marked exclusive unless its parent is.
- - If its cpu or memory is exclusive, they may not overlap any sibling.
-
-These rules, and the natural hierarchy of cpusets, enable efficient
-enforcement of the exclusive guarantee, without having to scan all
-cpusets every time any of them change to ensure nothing overlaps a
-exclusive cpuset.  Also, the use of a Linux virtual file system (vfs)
-to represent the cpuset hierarchy provides for a familiar permission
-and name space for cpusets, with a minimum of additional kernel code.
-
-The cpus and mems files in the root (top_cpuset) cpuset are
-read-only.  The cpus file automatically tracks the value of
-cpu_online_mask using a CPU hotplug notifier, and the mems file
-automatically tracks the value of node_states[N_MEMORY]--i.e.,
-nodes with memory--using the cpuset_track_online_nodes() hook.
-
-
-1.4 What are exclusive cpusets ?
---------------------------------
-
-If a cpuset is cpu or mem exclusive, no other cpuset, other than
-a direct ancestor or descendant, may share any of the same CPUs or
-Memory Nodes.
-
-A cpuset that is cpuset.mem_exclusive *or* cpuset.mem_hardwall is "hardwalled",
-i.e. it restricts kernel allocations for page, buffer and other data
-commonly shared by the kernel across multiple users.  All cpusets,
-whether hardwalled or not, restrict allocations of memory for user
-space.  This enables configuring a system so that several independent
-jobs can share common kernel data, such as file system pages, while
-isolating each job's user allocation in its own cpuset.  To do this,
-construct a large mem_exclusive cpuset to hold all the jobs, and
-construct child, non-mem_exclusive cpusets for each individual job.
-Only a small amount of typical kernel memory, such as requests from
-interrupt handlers, is allowed to be taken outside even a
-mem_exclusive cpuset.
-
-
-1.5 What is memory_pressure ?
------------------------------
-The memory_pressure of a cpuset provides a simple per-cpuset metric
-of the rate that the tasks in a cpuset are attempting to free up in
-use memory on the nodes of the cpuset to satisfy additional memory
-requests.
-
-This enables batch managers monitoring jobs running in dedicated
-cpusets to efficiently detect what level of memory pressure that job
-is causing.
-
-This is useful both on tightly managed systems running a wide mix of
-submitted jobs, which may choose to terminate or re-prioritize jobs that
-are trying to use more memory than allowed on the nodes assigned to them,
-and with tightly coupled, long running, massively parallel scientific
-computing jobs that will dramatically fail to meet required performance
-goals if they start to use more memory than allowed to them.
-
-This mechanism provides a very economical way for the batch manager
-to monitor a cpuset for signs of memory pressure.  It's up to the
-batch manager or other user code to decide what to do about it and
-take action.
-
-==> Unless this feature is enabled by writing "1" to the special file
-    /dev/cpuset/memory_pressure_enabled, the hook in the rebalance
-    code of __alloc_pages() for this metric reduces to simply noticing
-    that the cpuset_memory_pressure_enabled flag is zero.  So only
-    systems that enable this feature will compute the metric.
-
-Why a per-cpuset, running average:
-
-    Because this meter is per-cpuset, rather than per-task or mm,
-    the system load imposed by a batch scheduler monitoring this
-    metric is sharply reduced on large systems, because a scan of
-    the tasklist can be avoided on each set of queries.
-
-    Because this meter is a running average, instead of an accumulating
-    counter, a batch scheduler can detect memory pressure with a
-    single read, instead of having to read and accumulate results
-    for a period of time.
-
-    Because this meter is per-cpuset rather than per-task or mm,
-    the batch scheduler can obtain the key information, memory
-    pressure in a cpuset, with a single read, rather than having to
-    query and accumulate results over all the (dynamically changing)
-    set of tasks in the cpuset.
-
-A per-cpuset simple digital filter (requires a spinlock and 3 words
-of data per-cpuset) is kept, and updated by any task attached to that
-cpuset, if it enters the synchronous (direct) page reclaim code.
-
-A per-cpuset file provides an integer number representing the recent
-(half-life of 10 seconds) rate of direct page reclaims caused by
-the tasks in the cpuset, in units of reclaims attempted per second,
-times 1000.
-
-
-1.6 What is memory spread ?
----------------------------
-There are two boolean flag files per cpuset that control where the
-kernel allocates pages for the file system buffers and related in
-kernel data structures.  They are called 'cpuset.memory_spread_page' and
-'cpuset.memory_spread_slab'.
-
-If the per-cpuset boolean flag file 'cpuset.memory_spread_page' is set, then
-the kernel will spread the file system buffers (page cache) evenly
-over all the nodes that the faulting task is allowed to use, instead
-of preferring to put those pages on the node where the task is running.
-
-If the per-cpuset boolean flag file 'cpuset.memory_spread_slab' is set,
-then the kernel will spread some file system related slab caches,
-such as for inodes and dentries evenly over all the nodes that the
-faulting task is allowed to use, instead of preferring to put those
-pages on the node where the task is running.
-
-The setting of these flags does not affect anonymous data segment or
-stack segment pages of a task.
-
-By default, both kinds of memory spreading are off, and memory
-pages are allocated on the node local to where the task is running,
-except perhaps as modified by the task's NUMA mempolicy or cpuset
-configuration, so long as sufficient free memory pages are available.
-
-When new cpusets are created, they inherit the memory spread settings
-of their parent.
-
-Setting memory spreading causes allocations for the affected page
-or slab caches to ignore the task's NUMA mempolicy and be spread
-instead.    Tasks using mbind() or set_mempolicy() calls to set NUMA
-mempolicies will not notice any change in these calls as a result of
-their containing task's memory spread settings.  If memory spreading
-is turned off, then the currently specified NUMA mempolicy once again
-applies to memory page allocations.
-
-Both 'cpuset.memory_spread_page' and 'cpuset.memory_spread_slab' are boolean flag
-files.  By default they contain "0", meaning that the feature is off
-for that cpuset.  If a "1" is written to that file, then that turns
-the named feature on.
-
-The implementation is simple.
-
-Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag
-PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently
-joins that cpuset.  The page allocation calls for the page cache
-is modified to perform an inline check for this PFA_SPREAD_PAGE task
-flag, and if set, a call to a new routine cpuset_mem_spread_node()
-returns the node to prefer for the allocation.
-
-Similarly, setting 'cpuset.memory_spread_slab' turns on the flag
-PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate
-pages from the node returned by cpuset_mem_spread_node().
-
-The cpuset_mem_spread_node() routine is also simple.  It uses the
-value of a per-task rotor cpuset_mem_spread_rotor to select the next
-node in the current task's mems_allowed to prefer for the allocation.
-
-This memory placement policy is also known (in other contexts) as
-round-robin or interleave.
-
-This policy can provide substantial improvements for jobs that need
-to place thread local data on the corresponding node, but that need
-to access large file system data sets that need to be spread across
-the several nodes in the jobs cpuset in order to fit.  Without this
-policy, especially for jobs that might have one thread reading in the
-data set, the memory allocation across the nodes in the jobs cpuset
-can become very uneven.
-
-1.7 What is sched_load_balance ?
---------------------------------
-
-The kernel scheduler (kernel/sched/core.c) automatically load balances
-tasks.  If one CPU is underutilized, kernel code running on that
-CPU will look for tasks on other more overloaded CPUs and move those
-tasks to itself, within the constraints of such placement mechanisms
-as cpusets and sched_setaffinity.
-
-The algorithmic cost of load balancing and its impact on key shared
-kernel data structures such as the task list increases more than
-linearly with the number of CPUs being balanced.  So the scheduler
-has support to partition the systems CPUs into a number of sched
-domains such that it only load balances within each sched domain.
-Each sched domain covers some subset of the CPUs in the system;
-no two sched domains overlap; some CPUs might not be in any sched
-domain and hence won't be load balanced.
-
-Put simply, it costs less to balance between two smaller sched domains
-than one big one, but doing so means that overloads in one of the
-two domains won't be load balanced to the other one.
-
-By default, there is one sched domain covering all CPUs, including those
-marked isolated using the kernel boot time "isolcpus=" argument. However,
-the isolated CPUs will not participate in load balancing, and will not
-have tasks running on them unless explicitly assigned.
-
-This default load balancing across all CPUs is not well suited for
-the following two situations:
- 1) On large systems, load balancing across many CPUs is expensive.
-    If the system is managed using cpusets to place independent jobs
-    on separate sets of CPUs, full load balancing is unnecessary.
- 2) Systems supporting realtime on some CPUs need to minimize
-    system overhead on those CPUs, including avoiding task load
-    balancing if that is not needed.
-
-When the per-cpuset flag "cpuset.sched_load_balance" is enabled (the default
-setting), it requests that all the CPUs in that cpusets allowed 'cpuset.cpus'
-be contained in a single sched domain, ensuring that load balancing
-can move a task (not otherwised pinned, as by sched_setaffinity)
-from any CPU in that cpuset to any other.
-
-When the per-cpuset flag "cpuset.sched_load_balance" is disabled, then the
-scheduler will avoid load balancing across the CPUs in that cpuset,
---except-- in so far as is necessary because some overlapping cpuset
-has "sched_load_balance" enabled.
-
-So, for example, if the top cpuset has the flag "cpuset.sched_load_balance"
-enabled, then the scheduler will have one sched domain covering all
-CPUs, and the setting of the "cpuset.sched_load_balance" flag in any other
-cpusets won't matter, as we're already fully load balancing.
-
-Therefore in the above two situations, the top cpuset flag
-"cpuset.sched_load_balance" should be disabled, and only some of the smaller,
-child cpusets have this flag enabled.
-
-When doing this, you don't usually want to leave any unpinned tasks in
-the top cpuset that might use non-trivial amounts of CPU, as such tasks
-may be artificially constrained to some subset of CPUs, depending on
-the particulars of this flag setting in descendant cpusets.  Even if
-such a task could use spare CPU cycles in some other CPUs, the kernel
-scheduler might not consider the possibility of load balancing that
-task to that underused CPU.
-
-Of course, tasks pinned to a particular CPU can be left in a cpuset
-that disables "cpuset.sched_load_balance" as those tasks aren't going anywhere
-else anyway.
-
-There is an impedance mismatch here, between cpusets and sched domains.
-Cpusets are hierarchical and nest.  Sched domains are flat; they don't
-overlap and each CPU is in at most one sched domain.
-
-It is necessary for sched domains to be flat because load balancing
-across partially overlapping sets of CPUs would risk unstable dynamics
-that would be beyond our understanding.  So if each of two partially
-overlapping cpusets enables the flag 'cpuset.sched_load_balance', then we
-form a single sched domain that is a superset of both.  We won't move
-a task to a CPU outside its cpuset, but the scheduler load balancing
-code might waste some compute cycles considering that possibility.
-
-This mismatch is why there is not a simple one-to-one relation
-between which cpusets have the flag "cpuset.sched_load_balance" enabled,
-and the sched domain configuration.  If a cpuset enables the flag, it
-will get balancing across all its CPUs, but if it disables the flag,
-it will only be assured of no load balancing if no other overlapping
-cpuset enables the flag.
-
-If two cpusets have partially overlapping 'cpuset.cpus' allowed, and only
-one of them has this flag enabled, then the other may find its
-tasks only partially load balanced, just on the overlapping CPUs.
-This is just the general case of the top_cpuset example given a few
-paragraphs above.  In the general case, as in the top cpuset case,
-don't leave tasks that might use non-trivial amounts of CPU in
-such partially load balanced cpusets, as they may be artificially
-constrained to some subset of the CPUs allowed to them, for lack of
-load balancing to the other CPUs.
-
-CPUs in "cpuset.isolcpus" were excluded from load balancing by the
-isolcpus= kernel boot option, and will never be load balanced regardless
-of the value of "cpuset.sched_load_balance" in any cpuset.
-
-1.7.1 sched_load_balance implementation details.
-------------------------------------------------
-
-The per-cpuset flag 'cpuset.sched_load_balance' defaults to enabled (contrary
-to most cpuset flags.)  When enabled for a cpuset, the kernel will
-ensure that it can load balance across all the CPUs in that cpuset
-(makes sure that all the CPUs in the cpus_allowed of that cpuset are
-in the same sched domain.)
-
-If two overlapping cpusets both have 'cpuset.sched_load_balance' enabled,
-then they will be (must be) both in the same sched domain.
-
-If, as is the default, the top cpuset has 'cpuset.sched_load_balance' enabled,
-then by the above that means there is a single sched domain covering
-the whole system, regardless of any other cpuset settings.
-
-The kernel commits to user space that it will avoid load balancing
-where it can.  It will pick as fine a granularity partition of sched
-domains as it can while still providing load balancing for any set
-of CPUs allowed to a cpuset having 'cpuset.sched_load_balance' enabled.
-
-The internal kernel cpuset to scheduler interface passes from the
-cpuset code to the scheduler code a partition of the load balanced
-CPUs in the system. This partition is a set of subsets (represented
-as an array of struct cpumask) of CPUs, pairwise disjoint, that cover
-all the CPUs that must be load balanced.
-
-The cpuset code builds a new such partition and passes it to the
-scheduler sched domain setup code, to have the sched domains rebuilt
-as necessary, whenever:
- - the 'cpuset.sched_load_balance' flag of a cpuset with non-empty CPUs changes,
- - or CPUs come or go from a cpuset with this flag enabled,
- - or 'cpuset.sched_relax_domain_level' value of a cpuset with non-empty CPUs
-   and with this flag enabled changes,
- - or a cpuset with non-empty CPUs and with this flag enabled is removed,
- - or a cpu is offlined/onlined.
-
-This partition exactly defines what sched domains the scheduler should
-setup - one sched domain for each element (struct cpumask) in the
-partition.
-
-The scheduler remembers the currently active sched domain partitions.
-When the scheduler routine partition_sched_domains() is invoked from
-the cpuset code to update these sched domains, it compares the new
-partition requested with the current, and updates its sched domains,
-removing the old and adding the new, for each change.
-
-
-1.8 What is sched_relax_domain_level ?
---------------------------------------
-
-In sched domain, the scheduler migrates tasks in 2 ways; periodic load
-balance on tick, and at time of some schedule events.
-
-When a task is woken up, scheduler try to move the task on idle CPU.
-For example, if a task A running on CPU X activates another task B
-on the same CPU X, and if CPU Y is X's sibling and performing idle,
-then scheduler migrate task B to CPU Y so that task B can start on
-CPU Y without waiting task A on CPU X.
-
-And if a CPU run out of tasks in its runqueue, the CPU try to pull
-extra tasks from other busy CPUs to help them before it is going to
-be idle.
-
-Of course it takes some searching cost to find movable tasks and/or
-idle CPUs, the scheduler might not search all CPUs in the domain
-every time.  In fact, in some architectures, the searching ranges on
-events are limited in the same socket or node where the CPU locates,
-while the load balance on tick searches all.
-
-For example, assume CPU Z is relatively far from CPU X.  Even if CPU Z
-is idle while CPU X and the siblings are busy, scheduler can't migrate
-woken task B from X to Z since it is out of its searching range.
-As the result, task B on CPU X need to wait task A or wait load balance
-on the next tick.  For some applications in special situation, waiting
-1 tick may be too long.
-
-The 'cpuset.sched_relax_domain_level' file allows you to request changing
-this searching range as you like.  This file takes int value which
-indicates size of searching range in levels ideally as follows,
-otherwise initial value -1 that indicates the cpuset has no request.
-
-  -1  : no request. use system default or follow request of others.
-   0  : no search.
-   1  : search siblings (hyperthreads in a core).
-   2  : search cores in a package.
-   3  : search cpus in a node [= system wide on non-NUMA system]
-   4  : search nodes in a chunk of node [on NUMA system]
-   5  : search system wide [on NUMA system]
-
-The system default is architecture dependent.  The system default
-can be changed using the relax_domain_level= boot parameter.
-
-This file is per-cpuset and affect the sched domain where the cpuset
-belongs to.  Therefore if the flag 'cpuset.sched_load_balance' of a cpuset
-is disabled, then 'cpuset.sched_relax_domain_level' have no effect since
-there is no sched domain belonging the cpuset.
-
-If multiple cpusets are overlapping and hence they form a single sched
-domain, the largest value among those is used.  Be careful, if one
-requests 0 and others are -1 then 0 is used.
-
-Note that modifying this file will have both good and bad effects,
-and whether it is acceptable or not depends on your situation.
-Don't modify this file if you are not sure.
-
-If your situation is:
- - The migration costs between each cpu can be assumed considerably
-   small(for you) due to your special application's behavior or
-   special hardware support for CPU cache etc.
- - The searching cost doesn't have impact(for you) or you can make
-   the searching cost enough small by managing cpuset to compact etc.
- - The latency is required even it sacrifices cache hit rate etc.
-then increasing 'sched_relax_domain_level' would benefit you.
-
-
-1.9 How do I use cpusets ?
---------------------------
-
-In order to minimize the impact of cpusets on critical kernel
-code, such as the scheduler, and due to the fact that the kernel
-does not support one task updating the memory placement of another
-task directly, the impact on a task of changing its cpuset CPU
-or Memory Node placement, or of changing to which cpuset a task
-is attached, is subtle.
-
-If a cpuset has its Memory Nodes modified, then for each task attached
-to that cpuset, the next time that the kernel attempts to allocate
-a page of memory for that task, the kernel will notice the change
-in the task's cpuset, and update its per-task memory placement to
-remain within the new cpusets memory placement.  If the task was using
-mempolicy MPOL_BIND, and the nodes to which it was bound overlap with
-its new cpuset, then the task will continue to use whatever subset
-of MPOL_BIND nodes are still allowed in the new cpuset.  If the task
-was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed
-in the new cpuset, then the task will be essentially treated as if it
-was MPOL_BIND bound to the new cpuset (even though its NUMA placement,
-as queried by get_mempolicy(), doesn't change).  If a task is moved
-from one cpuset to another, then the kernel will adjust the task's
-memory placement, as above, the next time that the kernel attempts
-to allocate a page of memory for that task.
-
-If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset
-will have its allowed CPU placement changed immediately.  Similarly,
-if a task's pid is written to another cpuset's 'tasks' file, then its
-allowed CPU placement is changed immediately.  If such a task had been
-bound to some subset of its cpuset using the sched_setaffinity() call,
-the task will be allowed to run on any CPU allowed in its new cpuset,
-negating the effect of the prior sched_setaffinity() call.
-
-In summary, the memory placement of a task whose cpuset is changed is
-updated by the kernel, on the next allocation of a page for that task,
-and the processor placement is updated immediately.
-
-Normally, once a page is allocated (given a physical page
-of main memory) then that page stays on whatever node it
-was allocated, so long as it remains allocated, even if the
-cpusets memory placement policy 'cpuset.mems' subsequently changes.
-If the cpuset flag file 'cpuset.memory_migrate' is set true, then when
-tasks are attached to that cpuset, any pages that task had
-allocated to it on nodes in its previous cpuset are migrated
-to the task's new cpuset. The relative placement of the page within
-the cpuset is preserved during these migration operations if possible.
-For example if the page was on the second valid node of the prior cpuset
-then the page will be placed on the second valid node of the new cpuset.
-
-Also if 'cpuset.memory_migrate' is set true, then if that cpuset's
-'cpuset.mems' file is modified, pages allocated to tasks in that
-cpuset, that were on nodes in the previous setting of 'cpuset.mems',
-will be moved to nodes in the new setting of 'mems.'
-Pages that were not in the task's prior cpuset, or in the cpuset's
-prior 'cpuset.mems' setting, will not be moved.
-
-There is an exception to the above.  If hotplug functionality is used
-to remove all the CPUs that are currently assigned to a cpuset,
-then all the tasks in that cpuset will be moved to the nearest ancestor
-with non-empty cpus.  But the moving of some (or all) tasks might fail if
-cpuset is bound with another cgroup subsystem which has some restrictions
-on task attaching.  In this failing case, those tasks will stay
-in the original cpuset, and the kernel will automatically update
-their cpus_allowed to allow all online CPUs.  When memory hotplug
-functionality for removing Memory Nodes is available, a similar exception
-is expected to apply there as well.  In general, the kernel prefers to
-violate cpuset placement, over starving a task that has had all
-its allowed CPUs or Memory Nodes taken offline.
-
-There is a second exception to the above.  GFP_ATOMIC requests are
-kernel internal allocations that must be satisfied, immediately.
-The kernel may drop some request, in rare cases even panic, if a
-GFP_ATOMIC alloc fails.  If the request cannot be satisfied within
-the current task's cpuset, then we relax the cpuset, and look for
-memory anywhere we can find it.  It's better to violate the cpuset
-than stress the kernel.
-
-To start a new job that is to be contained within a cpuset, the steps are:
-
- 1) mkdir /sys/fs/cgroup/cpuset
- 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
- 3) Create the new cpuset by doing mkdir's and write's (or echo's) in
-    the /sys/fs/cgroup/cpuset virtual file system.
- 4) Start a task that will be the "founding father" of the new job.
- 5) Attach that task to the new cpuset by writing its pid to the
-    /sys/fs/cgroup/cpuset tasks file for that cpuset.
- 6) fork, exec or clone the job tasks from this founding father task.
-
-For example, the following sequence of commands will setup a cpuset
-named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
-and then start a subshell 'sh' in that cpuset:
-
-  mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
-  cd /sys/fs/cgroup/cpuset
-  mkdir Charlie
-  cd Charlie
-  /bin/echo 2-3 > cpuset.cpus
-  /bin/echo 1 > cpuset.mems
-  /bin/echo $$ > tasks
-  sh
-  # The subshell 'sh' is now running in cpuset Charlie
-  # The next line should display '/Charlie'
-  cat /proc/self/cpuset
-
-There are ways to query or modify cpusets:
- - via the cpuset file system directly, using the various cd, mkdir, echo,
-   cat, rmdir commands from the shell, or their equivalent from C.
- - via the C library libcpuset.
- - via the C library libcgroup.
-   (http://sourceforge.net/projects/libcg/)
- - via the python application cset.
-   (http://code.google.com/p/cpuset/)
-
-The sched_setaffinity calls can also be done at the shell prompt using
-SGI's runon or Robert Love's taskset.  The mbind and set_mempolicy
-calls can be done at the shell prompt using the numactl command
-(part of Andi Kleen's numa package).
-
-2. Usage Examples and Syntax
-============================
-
-2.1 Basic Usage
----------------
-
-Creating, modifying, using the cpusets can be done through the cpuset
-virtual filesystem.
-
-To mount it, type:
-# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset
-
-Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the
-tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset
-is the cpuset that holds the whole system.
-
-If you want to create a new cpuset under /sys/fs/cgroup/cpuset:
-# cd /sys/fs/cgroup/cpuset
-# mkdir my_cpuset
-
-Now you want to do something with this cpuset.
-# cd my_cpuset
-
-In this directory you can find several files:
-# ls
-cgroup.clone_children  cpuset.memory_pressure
-cgroup.event_control   cpuset.memory_spread_page
-cgroup.procs           cpuset.memory_spread_slab
-cpuset.cpu_exclusive   cpuset.mems
-cpuset.cpus            cpuset.sched_load_balance
-cpuset.mem_exclusive   cpuset.sched_relax_domain_level
-cpuset.mem_hardwall    notify_on_release
-cpuset.memory_migrate  tasks
-
-Reading them will give you information about the state of this cpuset:
-the CPUs and Memory Nodes it can use, the processes that are using
-it, its properties.  By writing to these files you can manipulate
-the cpuset.
-
-Set some flags:
-# /bin/echo 1 > cpuset.cpu_exclusive
-
-Add some cpus:
-# /bin/echo 0-7 > cpuset.cpus
-
-Add some mems:
-# /bin/echo 0-7 > cpuset.mems
-
-Now attach your shell to this cpuset:
-# /bin/echo $$ > tasks
-
-You can also create cpusets inside your cpuset by using mkdir in this
-directory.
-# mkdir my_sub_cs
-
-To remove a cpuset, just use rmdir:
-# rmdir my_sub_cs
-This will fail if the cpuset is in use (has cpusets inside, or has
-processes attached).
-
-Note that for legacy reasons, the "cpuset" filesystem exists as a
-wrapper around the cgroup filesystem.
-
-The command
-
-mount -t cpuset X /sys/fs/cgroup/cpuset
-
-is equivalent to
-
-mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset
-echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent
-
-2.2 Adding/removing cpus
-------------------------
-
-This is the syntax to use when writing in the cpus or mems files
-in cpuset directories:
-
-# /bin/echo 1-4 > cpuset.cpus		-> set cpus list to cpus 1,2,3,4
-# /bin/echo 1,2,3,4 > cpuset.cpus	-> set cpus list to cpus 1,2,3,4
-
-To add a CPU to a cpuset, write the new list of CPUs including the
-CPU to be added. To add 6 to the above cpuset:
-
-# /bin/echo 1-4,6 > cpuset.cpus	-> set cpus list to cpus 1,2,3,4,6
-
-Similarly to remove a CPU from a cpuset, write the new list of CPUs
-without the CPU to be removed.
-
-To remove all the CPUs:
-
-# /bin/echo "" > cpuset.cpus		-> clear cpus list
-
-2.3 Setting flags
------------------
-
-The syntax is very simple:
-
-# /bin/echo 1 > cpuset.cpu_exclusive 	-> set flag 'cpuset.cpu_exclusive'
-# /bin/echo 0 > cpuset.cpu_exclusive 	-> unset flag 'cpuset.cpu_exclusive'
-
-2.4 Attaching processes
------------------------
-
-# /bin/echo PID > tasks
-
-Note that it is PID, not PIDs. You can only attach ONE task at a time.
-If you have several tasks to attach, you have to do it one after another:
-
-# /bin/echo PID1 > tasks
-# /bin/echo PID2 > tasks
-	...
-# /bin/echo PIDn > tasks
-
-
-3. Questions
-============
-
-Q: what's up with this '/bin/echo' ?
-A: bash's builtin 'echo' command does not check calls to write() against
-   errors. If you use it in the cpuset file system, you won't be
-   able to tell whether a command succeeded or failed.
-
-Q: When I attach processes, only the first of the line gets really attached !
-A: We can only return one error code per call to write(). So you should also
-   put only ONE pid.
-
-4. Contact
-==========
-
-Web: http://www.bullopensource.org/cpuset
diff --git a/Documentation/cgroup-v1/devices.rst b/Documentation/cgroup-v1/devices.rst
new file mode 100644
index 000000000000..e1886783961e
--- /dev/null
+++ b/Documentation/cgroup-v1/devices.rst
@@ -0,0 +1,132 @@
+===========================
+Device Whitelist Controller
+===========================
+
+1. Description
+==============
+
+Implement a cgroup to track and enforce open and mknod restrictions
+on device files.  A device cgroup associates a device access
+whitelist with each cgroup.  A whitelist entry has 4 fields.
+'type' is a (all), c (char), or b (block).  'all' means it applies
+to all types and all major and minor numbers.  Major and minor are
+either an integer or * for all.  Access is a composition of r
+(read), w (write), and m (mknod).
+
+The root device cgroup starts with rwm to 'all'.  A child device
+cgroup gets a copy of the parent.  Administrators can then remove
+devices from the whitelist or add new entries.  A child cgroup can
+never receive a device access which is denied by its parent.
+
+2. User Interface
+=================
+
+An entry is added using devices.allow, and removed using
+devices.deny.  For instance::
+
+	echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow
+
+allows cgroup 1 to read and mknod the device usually known as
+/dev/null.  Doing::
+
+	echo a > /sys/fs/cgroup/1/devices.deny
+
+will remove the default 'a *:* rwm' entry. Doing::
+
+	echo a > /sys/fs/cgroup/1/devices.allow
+
+will add the 'a *:* rwm' entry to the whitelist.
+
+3. Security
+===========
+
+Any task can move itself between cgroups.  This clearly won't
+suffice, but we can decide the best way to adequately restrict
+movement as people get some experience with this.  We may just want
+to require CAP_SYS_ADMIN, which at least is a separate bit from
+CAP_MKNOD.  We may want to just refuse moving to a cgroup which
+isn't a descendant of the current one.  Or we may want to use
+CAP_MAC_ADMIN, since we really are trying to lock down root.
+
+CAP_SYS_ADMIN is needed to modify the whitelist or move another
+task to a new cgroup.  (Again we'll probably want to change that).
+
+A cgroup may not be granted more permissions than the cgroup's
+parent has.
+
+4. Hierarchy
+============
+
+device cgroups maintain hierarchy by making sure a cgroup never has more
+access permissions than its parent.  Every time an entry is written to
+a cgroup's devices.deny file, all its children will have that entry removed
+from their whitelist and all the locally set whitelist entries will be
+re-evaluated.  In case one of the locally set whitelist entries would provide
+more access than the cgroup's parent, it'll be removed from the whitelist.
+
+Example::
+
+      A
+     / \
+        B
+
+    group        behavior	exceptions
+    A            allow		"b 8:* rwm", "c 116:1 rw"
+    B            deny		"c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"
+
+If a device is denied in group A::
+
+	# echo "c 116:* r" > A/devices.deny
+
+it'll propagate down and after revalidating B's entries, the whitelist entry
+"c 116:2 rwm" will be removed::
+
+    group        whitelist entries                        denied devices
+    A            all                                      "b 8:* rwm", "c 116:* rw"
+    B            "c 1:3 rwm", "b 3:* rwm"                 all the rest
+
+In case parent's exceptions change and local exceptions are not allowed
+anymore, they'll be deleted.
+
+Notice that new whitelist entries will not be propagated::
+
+      A
+     / \
+        B
+
+    group        whitelist entries                        denied devices
+    A            "c 1:3 rwm", "c 1:5 r"                   all the rest
+    B            "c 1:3 rwm", "c 1:5 r"                   all the rest
+
+when adding ``c *:3 rwm``::
+
+	# echo "c *:3 rwm" >A/devices.allow
+
+the result::
+
+    group        whitelist entries                        denied devices
+    A            "c *:3 rwm", "c 1:5 r"                   all the rest
+    B            "c 1:3 rwm", "c 1:5 r"                   all the rest
+
+but now it'll be possible to add new entries to B::
+
+	# echo "c 2:3 rwm" >B/devices.allow
+	# echo "c 50:3 r" >B/devices.allow
+
+or even::
+
+	# echo "c *:3 rwm" >B/devices.allow
+
+Allowing or denying all by writing 'a' to devices.allow or devices.deny will
+not be possible once the device cgroups has children.
+
+4.1 Hierarchy (internal implementation)
+---------------------------------------
+
+device cgroups is implemented internally using a behavior (ALLOW, DENY) and a
+list of exceptions.  The internal state is controlled using the same user
+interface to preserve compatibility with the previous whitelist-only
+implementation.  Removal or addition of exceptions that will reduce the access
+to devices will be propagated down the hierarchy.
+For every propagated exception, the effective rules will be re-evaluated based
+on current parent's access rules.
diff --git a/Documentation/cgroup-v1/devices.txt b/Documentation/cgroup-v1/devices.txt
deleted file mode 100644
index 3c1095ca02ea..000000000000
--- a/Documentation/cgroup-v1/devices.txt
+++ /dev/null
@@ -1,116 +0,0 @@
-Device Whitelist Controller
-
-1. Description:
-
-Implement a cgroup to track and enforce open and mknod restrictions
-on device files.  A device cgroup associates a device access
-whitelist with each cgroup.  A whitelist entry has 4 fields.
-'type' is a (all), c (char), or b (block).  'all' means it applies
-to all types and all major and minor numbers.  Major and minor are
-either an integer or * for all.  Access is a composition of r
-(read), w (write), and m (mknod).
-
-The root device cgroup starts with rwm to 'all'.  A child device
-cgroup gets a copy of the parent.  Administrators can then remove
-devices from the whitelist or add new entries.  A child cgroup can
-never receive a device access which is denied by its parent.
-
-2. User Interface
-
-An entry is added using devices.allow, and removed using
-devices.deny.  For instance
-
-	echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow
-
-allows cgroup 1 to read and mknod the device usually known as
-/dev/null.  Doing
-
-	echo a > /sys/fs/cgroup/1/devices.deny
-
-will remove the default 'a *:* rwm' entry. Doing
-
-	echo a > /sys/fs/cgroup/1/devices.allow
-
-will add the 'a *:* rwm' entry to the whitelist.
-
-3. Security
-
-Any task can move itself between cgroups.  This clearly won't
-suffice, but we can decide the best way to adequately restrict
-movement as people get some experience with this.  We may just want
-to require CAP_SYS_ADMIN, which at least is a separate bit from
-CAP_MKNOD.  We may want to just refuse moving to a cgroup which
-isn't a descendant of the current one.  Or we may want to use
-CAP_MAC_ADMIN, since we really are trying to lock down root.
-
-CAP_SYS_ADMIN is needed to modify the whitelist or move another
-task to a new cgroup.  (Again we'll probably want to change that).
-
-A cgroup may not be granted more permissions than the cgroup's
-parent has.
-
-4. Hierarchy
-
-device cgroups maintain hierarchy by making sure a cgroup never has more
-access permissions than its parent.  Every time an entry is written to
-a cgroup's devices.deny file, all its children will have that entry removed
-from their whitelist and all the locally set whitelist entries will be
-re-evaluated.  In case one of the locally set whitelist entries would provide
-more access than the cgroup's parent, it'll be removed from the whitelist.
-
-Example:
-      A
-     / \
-        B
-
-    group        behavior	exceptions
-    A            allow		"b 8:* rwm", "c 116:1 rw"
-    B            deny		"c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"
-
-If a device is denied in group A:
-	# echo "c 116:* r" > A/devices.deny
-it'll propagate down and after revalidating B's entries, the whitelist entry
-"c 116:2 rwm" will be removed:
-
-    group        whitelist entries                        denied devices
-    A            all                                      "b 8:* rwm", "c 116:* rw"
-    B            "c 1:3 rwm", "b 3:* rwm"                 all the rest
-
-In case parent's exceptions change and local exceptions are not allowed
-anymore, they'll be deleted.
-
-Notice that new whitelist entries will not be propagated:
-      A
-     / \
-        B
-
-    group        whitelist entries                        denied devices
-    A            "c 1:3 rwm", "c 1:5 r"                   all the rest
-    B            "c 1:3 rwm", "c 1:5 r"                   all the rest
-
-when adding "c *:3 rwm":
-	# echo "c *:3 rwm" >A/devices.allow
-
-the result:
-    group        whitelist entries                        denied devices
-    A            "c *:3 rwm", "c 1:5 r"                   all the rest
-    B            "c 1:3 rwm", "c 1:5 r"                   all the rest
-
-but now it'll be possible to add new entries to B:
-	# echo "c 2:3 rwm" >B/devices.allow
-	# echo "c 50:3 r" >B/devices.allow
-or even
-	# echo "c *:3 rwm" >B/devices.allow
-
-Allowing or denying all by writing 'a' to devices.allow or devices.deny will
-not be possible once the device cgroups has children.
-
-4.1 Hierarchy (internal implementation)
-
-device cgroups is implemented internally using a behavior (ALLOW, DENY) and a
-list of exceptions.  The internal state is controlled using the same user
-interface to preserve compatibility with the previous whitelist-only
-implementation.  Removal or addition of exceptions that will reduce the access
-to devices will be propagated down the hierarchy.
-For every propagated exception, the effective rules will be re-evaluated based
-on current parent's access rules.
diff --git a/Documentation/cgroup-v1/freezer-subsystem.rst b/Documentation/cgroup-v1/freezer-subsystem.rst
new file mode 100644
index 000000000000..582d3427de3f
--- /dev/null
+++ b/Documentation/cgroup-v1/freezer-subsystem.rst
@@ -0,0 +1,127 @@
+==============
+Cgroup Freezer
+==============
+
+The cgroup freezer is useful to batch job management system which start
+and stop sets of tasks in order to schedule the resources of a machine
+according to the desires of a system administrator. This sort of program
+is often used on HPC clusters to schedule access to the cluster as a
+whole. The cgroup freezer uses cgroups to describe the set of tasks to
+be started/stopped by the batch job management system. It also provides
+a means to start and stop the tasks composing the job.
+
+The cgroup freezer will also be useful for checkpointing running groups
+of tasks. The freezer allows the checkpoint code to obtain a consistent
+image of the tasks by attempting to force the tasks in a cgroup into a
+quiescent state. Once the tasks are quiescent another task can
+walk /proc or invoke a kernel interface to gather information about the
+quiesced tasks. Checkpointed tasks can be restarted later should a
+recoverable error occur. This also allows the checkpointed tasks to be
+migrated between nodes in a cluster by copying the gathered information
+to another node and restarting the tasks there.
+
+Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
+and resuming tasks in userspace. Both of these signals are observable
+from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
+blocked, or ignored it can be seen by waiting or ptracing parent tasks.
+SIGCONT is especially unsuitable since it can be caught by the task. Any
+programs designed to watch for SIGSTOP and SIGCONT could be broken by
+attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
+demonstrate this problem using nested bash shells::
+
+	$ echo $$
+	16644
+	$ bash
+	$ echo $$
+	16690
+
+	From a second, unrelated bash shell:
+	$ kill -SIGSTOP 16690
+	$ kill -SIGCONT 16690
+
+	<at this point 16690 exits and causes 16644 to exit too>
+
+This happens because bash can observe both signals and choose how it
+responds to them.
+
+Another example of a program which catches and responds to these
+signals is gdb. In fact any program designed to use ptrace is likely to
+have a problem with this method of stopping and resuming tasks.
+
+In contrast, the cgroup freezer uses the kernel freezer code to
+prevent the freeze/unfreeze cycle from becoming visible to the tasks
+being frozen. This allows the bash example above and gdb to run as
+expected.
+
+The cgroup freezer is hierarchical. Freezing a cgroup freezes all
+tasks belonging to the cgroup and all its descendant cgroups. Each
+cgroup has its own state (self-state) and the state inherited from the
+parent (parent-state). Iff both states are THAWED, the cgroup is
+THAWED.
+
+The following cgroupfs files are created by cgroup freezer.
+
+* freezer.state: Read-write.
+
+  When read, returns the effective state of the cgroup - "THAWED",
+  "FREEZING" or "FROZEN". This is the combined self and parent-states.
+  If any is freezing, the cgroup is freezing (FREEZING or FROZEN).
+
+  FREEZING cgroup transitions into FROZEN state when all tasks
+  belonging to the cgroup and its descendants become frozen. Note that
+  a cgroup reverts to FREEZING from FROZEN after a new task is added
+  to the cgroup or one of its descendant cgroups until the new task is
+  frozen.
+
+  When written, sets the self-state of the cgroup. Two values are
+  allowed - "FROZEN" and "THAWED". If FROZEN is written, the cgroup,
+  if not already freezing, enters FREEZING state along with all its
+  descendant cgroups.
+
+  If THAWED is written, the self-state of the cgroup is changed to
+  THAWED.  Note that the effective state may not change to THAWED if
+  the parent-state is still freezing. If a cgroup's effective state
+  becomes THAWED, all its descendants which are freezing because of
+  the cgroup also leave the freezing state.
+
+* freezer.self_freezing: Read only.
+
+  Shows the self-state. 0 if the self-state is THAWED; otherwise, 1.
+  This value is 1 iff the last write to freezer.state was "FROZEN".
+
+* freezer.parent_freezing: Read only.
+
+  Shows the parent-state.  0 if none of the cgroup's ancestors is
+  frozen; otherwise, 1.
+
+The root cgroup is non-freezable and the above interface files don't
+exist.
+
+* Examples of usage::
+
+   # mkdir /sys/fs/cgroup/freezer
+   # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer
+   # mkdir /sys/fs/cgroup/freezer/0
+   # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks
+
+to get status of the freezer subsystem::
+
+   # cat /sys/fs/cgroup/freezer/0/freezer.state
+   THAWED
+
+to freeze all tasks in the container::
+
+   # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state
+   # cat /sys/fs/cgroup/freezer/0/freezer.state
+   FREEZING
+   # cat /sys/fs/cgroup/freezer/0/freezer.state
+   FROZEN
+
+to unfreeze all tasks in the container::
+
+   # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state
+   # cat /sys/fs/cgroup/freezer/0/freezer.state
+   THAWED
+
+This is the basic mechanism which should do the right thing for user space task
+in a simple scenario.
diff --git a/Documentation/cgroup-v1/freezer-subsystem.txt b/Documentation/cgroup-v1/freezer-subsystem.txt
deleted file mode 100644
index e831cb2b8394..000000000000
--- a/Documentation/cgroup-v1/freezer-subsystem.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-The cgroup freezer is useful to batch job management system which start
-and stop sets of tasks in order to schedule the resources of a machine
-according to the desires of a system administrator. This sort of program
-is often used on HPC clusters to schedule access to the cluster as a
-whole. The cgroup freezer uses cgroups to describe the set of tasks to
-be started/stopped by the batch job management system. It also provides
-a means to start and stop the tasks composing the job.
-
-The cgroup freezer will also be useful for checkpointing running groups
-of tasks. The freezer allows the checkpoint code to obtain a consistent
-image of the tasks by attempting to force the tasks in a cgroup into a
-quiescent state. Once the tasks are quiescent another task can
-walk /proc or invoke a kernel interface to gather information about the
-quiesced tasks. Checkpointed tasks can be restarted later should a
-recoverable error occur. This also allows the checkpointed tasks to be
-migrated between nodes in a cluster by copying the gathered information
-to another node and restarting the tasks there.
-
-Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
-and resuming tasks in userspace. Both of these signals are observable
-from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
-blocked, or ignored it can be seen by waiting or ptracing parent tasks.
-SIGCONT is especially unsuitable since it can be caught by the task. Any
-programs designed to watch for SIGSTOP and SIGCONT could be broken by
-attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
-demonstrate this problem using nested bash shells:
-
-	$ echo $$
-	16644
-	$ bash
-	$ echo $$
-	16690
-
-	From a second, unrelated bash shell:
-	$ kill -SIGSTOP 16690
-	$ kill -SIGCONT 16690
-
-	<at this point 16690 exits and causes 16644 to exit too>
-
-This happens because bash can observe both signals and choose how it
-responds to them.
-
-Another example of a program which catches and responds to these
-signals is gdb. In fact any program designed to use ptrace is likely to
-have a problem with this method of stopping and resuming tasks.
-
-In contrast, the cgroup freezer uses the kernel freezer code to
-prevent the freeze/unfreeze cycle from becoming visible to the tasks
-being frozen. This allows the bash example above and gdb to run as
-expected.
-
-The cgroup freezer is hierarchical. Freezing a cgroup freezes all
-tasks belonging to the cgroup and all its descendant cgroups. Each
-cgroup has its own state (self-state) and the state inherited from the
-parent (parent-state). Iff both states are THAWED, the cgroup is
-THAWED.
-
-The following cgroupfs files are created by cgroup freezer.
-
-* freezer.state: Read-write.
-
-  When read, returns the effective state of the cgroup - "THAWED",
-  "FREEZING" or "FROZEN". This is the combined self and parent-states.
-  If any is freezing, the cgroup is freezing (FREEZING or FROZEN).
-
-  FREEZING cgroup transitions into FROZEN state when all tasks
-  belonging to the cgroup and its descendants become frozen. Note that
-  a cgroup reverts to FREEZING from FROZEN after a new task is added
-  to the cgroup or one of its descendant cgroups until the new task is
-  frozen.
-
-  When written, sets the self-state of the cgroup. Two values are
-  allowed - "FROZEN" and "THAWED". If FROZEN is written, the cgroup,
-  if not already freezing, enters FREEZING state along with all its
-  descendant cgroups.
-
-  If THAWED is written, the self-state of the cgroup is changed to
-  THAWED.  Note that the effective state may not change to THAWED if
-  the parent-state is still freezing. If a cgroup's effective state
-  becomes THAWED, all its descendants which are freezing because of
-  the cgroup also leave the freezing state.
-
-* freezer.self_freezing: Read only.
-
-  Shows the self-state. 0 if the self-state is THAWED; otherwise, 1.
-  This value is 1 iff the last write to freezer.state was "FROZEN".
-
-* freezer.parent_freezing: Read only.
-
-  Shows the parent-state.  0 if none of the cgroup's ancestors is
-  frozen; otherwise, 1.
-
-The root cgroup is non-freezable and the above interface files don't
-exist.
-
-* Examples of usage :
-
-   # mkdir /sys/fs/cgroup/freezer
-   # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer
-   # mkdir /sys/fs/cgroup/freezer/0
-   # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks
-
-to get status of the freezer subsystem :
-
-   # cat /sys/fs/cgroup/freezer/0/freezer.state
-   THAWED
-
-to freeze all tasks in the container :
-
-   # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state
-   # cat /sys/fs/cgroup/freezer/0/freezer.state
-   FREEZING
-   # cat /sys/fs/cgroup/freezer/0/freezer.state
-   FROZEN
-
-to unfreeze all tasks in the container :
-
-   # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state
-   # cat /sys/fs/cgroup/freezer/0/freezer.state
-   THAWED
-
-This is the basic mechanism which should do the right thing for user space task
-in a simple scenario.
diff --git a/Documentation/cgroup-v1/hugetlb.rst b/Documentation/cgroup-v1/hugetlb.rst
new file mode 100644
index 000000000000..a3902aa253a9
--- /dev/null
+++ b/Documentation/cgroup-v1/hugetlb.rst
@@ -0,0 +1,50 @@
+==================
+HugeTLB Controller
+==================
+
+The HugeTLB controller allows to limit the HugeTLB usage per control group and
+enforces the controller limit during page fault. Since HugeTLB doesn't
+support page reclaim, enforcing the limit at page fault time implies that,
+the application will get SIGBUS signal if it tries to access HugeTLB pages
+beyond its limit. This requires the application to know beforehand how much
+HugeTLB pages it would require for its use.
+
+HugeTLB controller can be created by first mounting the cgroup filesystem.
+
+# mount -t cgroup -o hugetlb none /sys/fs/cgroup
+
+With the above step, the initial or the parent HugeTLB group becomes
+visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
+the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
+
+New groups can be created under the parent group /sys/fs/cgroup::
+
+  # cd /sys/fs/cgroup
+  # mkdir g1
+  # echo $$ > g1/tasks
+
+The above steps create a new group g1 and move the current shell
+process (bash) into it.
+
+Brief summary of control files::
+
+ hugetlb.<hugepagesize>.limit_in_bytes     # set/show limit of "hugepagesize" hugetlb usage
+ hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb  usage recorded
+ hugetlb.<hugepagesize>.usage_in_bytes     # show current usage for "hugepagesize" hugetlb
+ hugetlb.<hugepagesize>.failcnt		   # show the number of allocation failure due to HugeTLB limit
+
+For a system supporting three hugepage sizes (64k, 32M and 1G), the control
+files include::
+
+  hugetlb.1GB.limit_in_bytes
+  hugetlb.1GB.max_usage_in_bytes
+  hugetlb.1GB.usage_in_bytes
+  hugetlb.1GB.failcnt
+  hugetlb.64KB.limit_in_bytes
+  hugetlb.64KB.max_usage_in_bytes
+  hugetlb.64KB.usage_in_bytes
+  hugetlb.64KB.failcnt
+  hugetlb.32MB.limit_in_bytes
+  hugetlb.32MB.max_usage_in_bytes
+  hugetlb.32MB.usage_in_bytes
+  hugetlb.32MB.failcnt
diff --git a/Documentation/cgroup-v1/hugetlb.txt b/Documentation/cgroup-v1/hugetlb.txt
deleted file mode 100644
index 1260e5369b9b..000000000000
--- a/Documentation/cgroup-v1/hugetlb.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-HugeTLB Controller
--------------------
-
-The HugeTLB controller allows to limit the HugeTLB usage per control group and
-enforces the controller limit during page fault. Since HugeTLB doesn't
-support page reclaim, enforcing the limit at page fault time implies that,
-the application will get SIGBUS signal if it tries to access HugeTLB pages
-beyond its limit. This requires the application to know beforehand how much
-HugeTLB pages it would require for its use.
-
-HugeTLB controller can be created by first mounting the cgroup filesystem.
-
-# mount -t cgroup -o hugetlb none /sys/fs/cgroup
-
-With the above step, the initial or the parent HugeTLB group becomes
-visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
-the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
-
-New groups can be created under the parent group /sys/fs/cgroup.
-
-# cd /sys/fs/cgroup
-# mkdir g1
-# echo $$ > g1/tasks
-
-The above steps create a new group g1 and move the current shell
-process (bash) into it.
-
-Brief summary of control files
-
- hugetlb.<hugepagesize>.limit_in_bytes     # set/show limit of "hugepagesize" hugetlb usage
- hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb  usage recorded
- hugetlb.<hugepagesize>.usage_in_bytes     # show current usage for "hugepagesize" hugetlb
- hugetlb.<hugepagesize>.failcnt		   # show the number of allocation failure due to HugeTLB limit
-
-For a system supporting three hugepage sizes (64k, 32M and 1G), the control
-files include:
-
-hugetlb.1GB.limit_in_bytes
-hugetlb.1GB.max_usage_in_bytes
-hugetlb.1GB.usage_in_bytes
-hugetlb.1GB.failcnt
-hugetlb.64KB.limit_in_bytes
-hugetlb.64KB.max_usage_in_bytes
-hugetlb.64KB.usage_in_bytes
-hugetlb.64KB.failcnt
-hugetlb.32MB.limit_in_bytes
-hugetlb.32MB.max_usage_in_bytes
-hugetlb.32MB.usage_in_bytes
-hugetlb.32MB.failcnt
diff --git a/Documentation/cgroup-v1/index.rst b/Documentation/cgroup-v1/index.rst
new file mode 100644
index 000000000000..fe76d42edc11
--- /dev/null
+++ b/Documentation/cgroup-v1/index.rst
@@ -0,0 +1,30 @@
+:orphan:
+
+========================
+Control Groups version 1
+========================
+
+.. toctree::
+    :maxdepth: 1
+
+    cgroups
+
+    blkio-controller
+    cpuacct
+    cpusets
+    devices
+    freezer-subsystem
+    hugetlb
+    memcg_test
+    memory
+    net_cls
+    net_prio
+    pids
+    rdma
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/cgroup-v1/memcg_test.rst b/Documentation/cgroup-v1/memcg_test.rst
new file mode 100644
index 000000000000..91bd18c6a514
--- /dev/null
+++ b/Documentation/cgroup-v1/memcg_test.rst
@@ -0,0 +1,355 @@
+=====================================================
+Memory Resource Controller(Memcg) Implementation Memo
+=====================================================
+
+Last Updated: 2010/2
+
+Base Kernel Version: based on 2.6.33-rc7-mm(candidate for 34).
+
+Because VM is getting complex (one of reasons is memcg...), memcg's behavior
+is complex. This is a document for memcg's internal behavior.
+Please note that implementation details can be changed.
+
+(*) Topics on API should be in Documentation/cgroup-v1/memory.rst)
+
+0. How to record usage ?
+========================
+
+   2 objects are used.
+
+   page_cgroup ....an object per page.
+
+	Allocated at boot or memory hotplug. Freed at memory hot removal.
+
+   swap_cgroup ... an entry per swp_entry.
+
+	Allocated at swapon(). Freed at swapoff().
+
+   The page_cgroup has USED bit and double count against a page_cgroup never
+   occurs. swap_cgroup is used only when a charged page is swapped-out.
+
+1. Charge
+=========
+
+   a page/swp_entry may be charged (usage += PAGE_SIZE) at
+
+	mem_cgroup_try_charge()
+
+2. Uncharge
+===========
+
+  a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
+
+	mem_cgroup_uncharge()
+	  Called when a page's refcount goes down to 0.
+
+	mem_cgroup_uncharge_swap()
+	  Called when swp_entry's refcnt goes down to 0. A charge against swap
+	  disappears.
+
+3. charge-commit-cancel
+=======================
+
+	Memcg pages are charged in two steps:
+
+		- mem_cgroup_try_charge()
+		- mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
+
+	At try_charge(), there are no flags to say "this page is charged".
+	at this point, usage += PAGE_SIZE.
+
+	At commit(), the page is associated with the memcg.
+
+	At cancel(), simply usage -= PAGE_SIZE.
+
+Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
+
+4. Anonymous
+============
+
+	Anonymous page is newly allocated at
+		  - page fault into MAP_ANONYMOUS mapping.
+		  - Copy-On-Write.
+
+	4.1 Swap-in.
+	At swap-in, the page is taken from swap-cache. There are 2 cases.
+
+	(a) If the SwapCache is newly allocated and read, it has no charges.
+	(b) If the SwapCache has been mapped by processes, it has been
+	    charged already.
+
+	4.2 Swap-out.
+	At swap-out, typical state transition is below.
+
+	(a) add to swap cache. (marked as SwapCache)
+	    swp_entry's refcnt += 1.
+	(b) fully unmapped.
+	    swp_entry's refcnt += # of ptes.
+	(c) write back to swap.
+	(d) delete from swap cache. (remove from SwapCache)
+	    swp_entry's refcnt -= 1.
+
+
+	Finally, at task exit,
+	(e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
+
+5. Page Cache
+=============
+
+	Page Cache is charged at
+	- add_to_page_cache_locked().
+
+	The logic is very clear. (About migration, see below)
+
+	Note:
+	  __remove_from_page_cache() is called by remove_from_page_cache()
+	  and __remove_mapping().
+
+6. Shmem(tmpfs) Page Cache
+===========================
+
+	The best way to understand shmem's page state transition is to read
+	mm/shmem.c.
+
+	But brief explanation of the behavior of memcg around shmem will be
+	helpful to understand the logic.
+
+	Shmem's page (just leaf page, not direct/indirect block) can be on
+
+		- radix-tree of shmem's inode.
+		- SwapCache.
+		- Both on radix-tree and SwapCache. This happens at swap-in
+		  and swap-out,
+
+	It's charged when...
+
+	- A new page is added to shmem's radix-tree.
+	- A swp page is read. (move a charge from swap_cgroup to page_cgroup)
+
+7. Page Migration
+=================
+
+	mem_cgroup_migrate()
+
+8. LRU
+======
+        Each memcg has its own private LRU. Now, its handling is under global
+	VM's control (means that it's handled under global pgdat->lru_lock).
+	Almost all routines around memcg's LRU is called by global LRU's
+	list management functions under pgdat->lru_lock.
+
+	A special function is mem_cgroup_isolate_pages(). This scans
+	memcg's private LRU and call __isolate_lru_page() to extract a page
+	from LRU.
+
+	(By __isolate_lru_page(), the page is removed from both of global and
+	private LRU.)
+
+
+9. Typical Tests.
+=================
+
+ Tests for racy cases.
+
+9.1 Small limit to memcg.
+-------------------------
+
+	When you do test to do racy case, it's good test to set memcg's limit
+	to be very small rather than GB. Many races found in the test under
+	xKB or xxMB limits.
+
+	(Memory behavior under GB and Memory behavior under MB shows very
+	different situation.)
+
+9.2 Shmem
+---------
+
+	Historically, memcg's shmem handling was poor and we saw some amount
+	of troubles here. This is because shmem is page-cache but can be
+	SwapCache. Test with shmem/tmpfs is always good test.
+
+9.3 Migration
+-------------
+
+	For NUMA, migration is an another special case. To do easy test, cpuset
+	is useful. Following is a sample script to do migration::
+
+		mount -t cgroup -o cpuset none /opt/cpuset
+
+		mkdir /opt/cpuset/01
+		echo 1 > /opt/cpuset/01/cpuset.cpus
+		echo 0 > /opt/cpuset/01/cpuset.mems
+		echo 1 > /opt/cpuset/01/cpuset.memory_migrate
+		mkdir /opt/cpuset/02
+		echo 1 > /opt/cpuset/02/cpuset.cpus
+		echo 1 > /opt/cpuset/02/cpuset.mems
+		echo 1 > /opt/cpuset/02/cpuset.memory_migrate
+
+	In above set, when you moves a task from 01 to 02, page migration to
+	node 0 to node 1 will occur. Following is a script to migrate all
+	under cpuset.::
+
+		--
+		move_task()
+		{
+		for pid in $1
+		do
+			/bin/echo $pid >$2/tasks 2>/dev/null
+			echo -n $pid
+			echo -n " "
+		done
+		echo END
+		}
+
+		G1_TASK=`cat ${G1}/tasks`
+		G2_TASK=`cat ${G2}/tasks`
+		move_task "${G1_TASK}" ${G2} &
+		--
+
+9.4 Memory hotplug
+------------------
+
+	memory hotplug test is one of good test.
+
+	to offline memory, do following::
+
+		# echo offline > /sys/devices/system/memory/memoryXXX/state
+
+	(XXX is the place of memory)
+
+	This is an easy way to test page migration, too.
+
+9.5 mkdir/rmdir
+---------------
+
+	When using hierarchy, mkdir/rmdir test should be done.
+	Use tests like the following::
+
+		echo 1 >/opt/cgroup/01/memory/use_hierarchy
+		mkdir /opt/cgroup/01/child_a
+		mkdir /opt/cgroup/01/child_b
+
+		set limit to 01.
+		add limit to 01/child_b
+		run jobs under child_a and child_b
+
+	create/delete following groups at random while jobs are running::
+
+		/opt/cgroup/01/child_a/child_aa
+		/opt/cgroup/01/child_b/child_bb
+		/opt/cgroup/01/child_c
+
+	running new jobs in new group is also good.
+
+9.6 Mount with other subsystems
+-------------------------------
+
+	Mounting with other subsystems is a good test because there is a
+	race and lock dependency with other cgroup subsystems.
+
+	example::
+
+		# mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices
+
+	and do task move, mkdir, rmdir etc...under this.
+
+9.7 swapoff
+-----------
+
+	Besides management of swap is one of complicated parts of memcg,
+	call path of swap-in at swapoff is not same as usual swap-in path..
+	It's worth to be tested explicitly.
+
+	For example, test like following is good:
+
+	(Shell-A)::
+
+		# mount -t cgroup none /cgroup -o memory
+		# mkdir /cgroup/test
+		# echo 40M > /cgroup/test/memory.limit_in_bytes
+		# echo 0 > /cgroup/test/tasks
+
+	Run malloc(100M) program under this. You'll see 60M of swaps.
+
+	(Shell-B)::
+
+		# move all tasks in /cgroup/test to /cgroup
+		# /sbin/swapoff -a
+		# rmdir /cgroup/test
+		# kill malloc task.
+
+	Of course, tmpfs v.s. swapoff test should be tested, too.
+
+9.8 OOM-Killer
+--------------
+
+	Out-of-memory caused by memcg's limit will kill tasks under
+	the memcg. When hierarchy is used, a task under hierarchy
+	will be killed by the kernel.
+
+	In this case, panic_on_oom shouldn't be invoked and tasks
+	in other groups shouldn't be killed.
+
+	It's not difficult to cause OOM under memcg as following.
+
+	Case A) when you can swapoff::
+
+		#swapoff -a
+		#echo 50M > /memory.limit_in_bytes
+
+	run 51M of malloc
+
+	Case B) when you use mem+swap limitation::
+
+		#echo 50M > memory.limit_in_bytes
+		#echo 50M > memory.memsw.limit_in_bytes
+
+	run 51M of malloc
+
+9.9 Move charges at task migration
+----------------------------------
+
+	Charges associated with a task can be moved along with task migration.
+
+	(Shell-A)::
+
+		#mkdir /cgroup/A
+		#echo $$ >/cgroup/A/tasks
+
+	run some programs which uses some amount of memory in /cgroup/A.
+
+	(Shell-B)::
+
+		#mkdir /cgroup/B
+		#echo 1 >/cgroup/B/memory.move_charge_at_immigrate
+		#echo "pid of the program running in group A" >/cgroup/B/tasks
+
+	You can see charges have been moved by reading ``*.usage_in_bytes`` or
+	memory.stat of both A and B.
+
+	See 8.2 of Documentation/cgroup-v1/memory.rst to see what value should
+	be written to move_charge_at_immigrate.
+
+9.10 Memory thresholds
+----------------------
+
+	Memory controller implements memory thresholds using cgroups notification
+	API. You can use tools/cgroup/cgroup_event_listener.c to test it.
+
+	(Shell-A) Create cgroup and run event listener::
+
+		# mkdir /cgroup/A
+		# ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M
+
+	(Shell-B) Add task to cgroup and try to allocate and free memory::
+
+		# echo $$ >/cgroup/A/tasks
+		# a="$(dd if=/dev/zero bs=1M count=10)"
+		# a=
+
+	You will see message from cgroup_event_listener every time you cross
+	the thresholds.
+
+	Use /cgroup/A/memory.memsw.usage_in_bytes to test memsw thresholds.
+
+	It's good idea to test root cgroup as well.
diff --git a/Documentation/cgroup-v1/memcg_test.txt b/Documentation/cgroup-v1/memcg_test.txt
deleted file mode 100644
index 621e29ffb358..000000000000
--- a/Documentation/cgroup-v1/memcg_test.txt
+++ /dev/null
@@ -1,280 +0,0 @@
-Memory Resource Controller(Memcg)  Implementation Memo.
-Last Updated: 2010/2
-Base Kernel Version: based on 2.6.33-rc7-mm(candidate for 34).
-
-Because VM is getting complex (one of reasons is memcg...), memcg's behavior
-is complex. This is a document for memcg's internal behavior.
-Please note that implementation details can be changed.
-
-(*) Topics on API should be in Documentation/cgroup-v1/memory.txt)
-
-0. How to record usage ?
-   2 objects are used.
-
-   page_cgroup ....an object per page.
-	Allocated at boot or memory hotplug. Freed at memory hot removal.
-
-   swap_cgroup ... an entry per swp_entry.
-	Allocated at swapon(). Freed at swapoff().
-
-   The page_cgroup has USED bit and double count against a page_cgroup never
-   occurs. swap_cgroup is used only when a charged page is swapped-out.
-
-1. Charge
-
-   a page/swp_entry may be charged (usage += PAGE_SIZE) at
-
-	mem_cgroup_try_charge()
-
-2. Uncharge
-  a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
-
-	mem_cgroup_uncharge()
-	  Called when a page's refcount goes down to 0.
-
-	mem_cgroup_uncharge_swap()
-	  Called when swp_entry's refcnt goes down to 0. A charge against swap
-	  disappears.
-
-3. charge-commit-cancel
-	Memcg pages are charged in two steps:
-		mem_cgroup_try_charge()
-		mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
-
-	At try_charge(), there are no flags to say "this page is charged".
-	at this point, usage += PAGE_SIZE.
-
-	At commit(), the page is associated with the memcg.
-
-	At cancel(), simply usage -= PAGE_SIZE.
-
-Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
-
-4. Anonymous
-	Anonymous page is newly allocated at
-		  - page fault into MAP_ANONYMOUS mapping.
-		  - Copy-On-Write.
-
-	4.1 Swap-in.
-	At swap-in, the page is taken from swap-cache. There are 2 cases.
-
-	(a) If the SwapCache is newly allocated and read, it has no charges.
-	(b) If the SwapCache has been mapped by processes, it has been
-	    charged already.
-
-	4.2 Swap-out.
-	At swap-out, typical state transition is below.
-
-	(a) add to swap cache. (marked as SwapCache)
-	    swp_entry's refcnt += 1.
-	(b) fully unmapped.
-	    swp_entry's refcnt += # of ptes.
-	(c) write back to swap.
-	(d) delete from swap cache. (remove from SwapCache)
-	    swp_entry's refcnt -= 1.
-
-
-	Finally, at task exit,
-	(e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
-
-5. Page Cache
-   	Page Cache is charged at
-	- add_to_page_cache_locked().
-
-	The logic is very clear. (About migration, see below)
-	Note: __remove_from_page_cache() is called by remove_from_page_cache()
-	and __remove_mapping().
-
-6. Shmem(tmpfs) Page Cache
-	The best way to understand shmem's page state transition is to read
-	mm/shmem.c.
-	But brief explanation of the behavior of memcg around shmem will be
-	helpful to understand the logic.
-
-	Shmem's page (just leaf page, not direct/indirect block) can be on
-		- radix-tree of shmem's inode.
-		- SwapCache.
-		- Both on radix-tree and SwapCache. This happens at swap-in
-		  and swap-out,
-
-	It's charged when...
-	- A new page is added to shmem's radix-tree.
-	- A swp page is read. (move a charge from swap_cgroup to page_cgroup)
-
-7. Page Migration
-
-	mem_cgroup_migrate()
-
-8. LRU
-        Each memcg has its own private LRU. Now, its handling is under global
-	VM's control (means that it's handled under global pgdat->lru_lock).
-	Almost all routines around memcg's LRU is called by global LRU's
-	list management functions under pgdat->lru_lock.
-
-	A special function is mem_cgroup_isolate_pages(). This scans
-	memcg's private LRU and call __isolate_lru_page() to extract a page
-	from LRU.
-	(By __isolate_lru_page(), the page is removed from both of global and
-	 private LRU.)
-
-
-9. Typical Tests.
-
- Tests for racy cases.
-
- 9.1 Small limit to memcg.
-	When you do test to do racy case, it's good test to set memcg's limit
-	to be very small rather than GB. Many races found in the test under
-	xKB or xxMB limits.
-	(Memory behavior under GB and Memory behavior under MB shows very
-	 different situation.)
-
- 9.2 Shmem
-	Historically, memcg's shmem handling was poor and we saw some amount
-	of troubles here. This is because shmem is page-cache but can be
-	SwapCache. Test with shmem/tmpfs is always good test.
-
- 9.3 Migration
-	For NUMA, migration is an another special case. To do easy test, cpuset
-	is useful. Following is a sample script to do migration.
-
-	mount -t cgroup -o cpuset none /opt/cpuset
-
-	mkdir /opt/cpuset/01
-	echo 1 > /opt/cpuset/01/cpuset.cpus
-	echo 0 > /opt/cpuset/01/cpuset.mems
-	echo 1 > /opt/cpuset/01/cpuset.memory_migrate
-	mkdir /opt/cpuset/02
-	echo 1 > /opt/cpuset/02/cpuset.cpus
-	echo 1 > /opt/cpuset/02/cpuset.mems
-	echo 1 > /opt/cpuset/02/cpuset.memory_migrate
-
-	In above set, when you moves a task from 01 to 02, page migration to
-	node 0 to node 1 will occur. Following is a script to migrate all
-	under cpuset.
-	--
-	move_task()
-	{
-	for pid in $1
-        do
-                /bin/echo $pid >$2/tasks 2>/dev/null
-		echo -n $pid
-		echo -n " "
-        done
-	echo END
-	}
-
-	G1_TASK=`cat ${G1}/tasks`
-	G2_TASK=`cat ${G2}/tasks`
-	move_task "${G1_TASK}" ${G2} &
-	--
- 9.4 Memory hotplug.
-	memory hotplug test is one of good test.
-	to offline memory, do following.
-	# echo offline > /sys/devices/system/memory/memoryXXX/state
-	(XXX is the place of memory)
-	This is an easy way to test page migration, too.
-
- 9.5 mkdir/rmdir
-	When using hierarchy, mkdir/rmdir test should be done.
-	Use tests like the following.
-
-	echo 1 >/opt/cgroup/01/memory/use_hierarchy
-	mkdir /opt/cgroup/01/child_a
-	mkdir /opt/cgroup/01/child_b
-
-	set limit to 01.
-	add limit to 01/child_b
-	run jobs under child_a and child_b
-
-	create/delete following groups at random while jobs are running.
-	/opt/cgroup/01/child_a/child_aa
-	/opt/cgroup/01/child_b/child_bb
-	/opt/cgroup/01/child_c
-
-	running new jobs in new group is also good.
-
- 9.6 Mount with other subsystems.
-	Mounting with other subsystems is a good test because there is a
-	race and lock dependency with other cgroup subsystems.
-
-	example)
-	# mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices
-
-	and do task move, mkdir, rmdir etc...under this.
-
- 9.7 swapoff.
-	Besides management of swap is one of complicated parts of memcg,
-	call path of swap-in at swapoff is not same as usual swap-in path..
-	It's worth to be tested explicitly.
-
-	For example, test like following is good.
-	(Shell-A)
-	# mount -t cgroup none /cgroup -o memory
-	# mkdir /cgroup/test
-	# echo 40M > /cgroup/test/memory.limit_in_bytes
-	# echo 0 > /cgroup/test/tasks
-	Run malloc(100M) program under this. You'll see 60M of swaps.
-	(Shell-B)
-	# move all tasks in /cgroup/test to /cgroup
-	# /sbin/swapoff -a
-	# rmdir /cgroup/test
-	# kill malloc task.
-
-	Of course, tmpfs v.s. swapoff test should be tested, too.
-
- 9.8 OOM-Killer
-	Out-of-memory caused by memcg's limit will kill tasks under
-	the memcg. When hierarchy is used, a task under hierarchy
-	will be killed by the kernel.
-	In this case, panic_on_oom shouldn't be invoked and tasks
-	in other groups shouldn't be killed.
-
-	It's not difficult to cause OOM under memcg as following.
-	Case A) when you can swapoff
-	#swapoff -a
-	#echo 50M > /memory.limit_in_bytes
-	run 51M of malloc
-
-	Case B) when you use mem+swap limitation.
-	#echo 50M > memory.limit_in_bytes
-	#echo 50M > memory.memsw.limit_in_bytes
-	run 51M of malloc
-
- 9.9 Move charges at task migration
-	Charges associated with a task can be moved along with task migration.
-
-	(Shell-A)
-	#mkdir /cgroup/A
-	#echo $$ >/cgroup/A/tasks
-	run some programs which uses some amount of memory in /cgroup/A.
-
-	(Shell-B)
-	#mkdir /cgroup/B
-	#echo 1 >/cgroup/B/memory.move_charge_at_immigrate
-	#echo "pid of the program running in group A" >/cgroup/B/tasks
-
-	You can see charges have been moved by reading *.usage_in_bytes or
-	memory.stat of both A and B.
-	See 8.2 of Documentation/cgroup-v1/memory.txt to see what value should be
-	written to move_charge_at_immigrate.
-
- 9.10 Memory thresholds
-	Memory controller implements memory thresholds using cgroups notification
-	API. You can use tools/cgroup/cgroup_event_listener.c to test it.
-
-	(Shell-A) Create cgroup and run event listener
-	# mkdir /cgroup/A
-	# ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M
-
-	(Shell-B) Add task to cgroup and try to allocate and free memory
-	# echo $$ >/cgroup/A/tasks
-	# a="$(dd if=/dev/zero bs=1M count=10)"
-	# a=
-
-	You will see message from cgroup_event_listener every time you cross
-	the thresholds.
-
-	Use /cgroup/A/memory.memsw.usage_in_bytes to test memsw thresholds.
-
-	It's good idea to test root cgroup as well.
diff --git a/Documentation/cgroup-v1/memory.rst b/Documentation/cgroup-v1/memory.rst
new file mode 100644
index 000000000000..41bdc038dad9
--- /dev/null
+++ b/Documentation/cgroup-v1/memory.rst
@@ -0,0 +1,1003 @@
+==========================
+Memory Resource Controller
+==========================
+
+NOTE:
+      This document is hopelessly outdated and it asks for a complete
+      rewrite. It still contains a useful information so we are keeping it
+      here but make sure to check the current code if you need a deeper
+      understanding.
+
+NOTE:
+      The Memory Resource Controller has generically been referred to as the
+      memory controller in this document. Do not confuse memory controller
+      used here with the memory controller that is used in hardware.
+
+(For editors) In this document:
+      When we mention a cgroup (cgroupfs's directory) with memory controller,
+      we call it "memory cgroup". When you see git-log and source code, you'll
+      see patch's title and function names tend to use "memcg".
+      In this document, we avoid using it.
+
+Benefits and Purpose of the memory controller
+=============================================
+
+The memory controller isolates the memory behaviour of a group of tasks
+from the rest of the system. The article on LWN [12] mentions some probable
+uses of the memory controller. The memory controller can be used to
+
+a. Isolate an application or a group of applications
+   Memory-hungry applications can be isolated and limited to a smaller
+   amount of memory.
+b. Create a cgroup with a limited amount of memory; this can be used
+   as a good alternative to booting with mem=XXXX.
+c. Virtualization solutions can control the amount of memory they want
+   to assign to a virtual machine instance.
+d. A CD/DVD burner could control the amount of memory used by the
+   rest of the system to ensure that burning does not fail due to lack
+   of available memory.
+e. There are several other use cases; find one or use the controller just
+   for fun (to learn and hack on the VM subsystem).
+
+Current Status: linux-2.6.34-mmotm(development version of 2010/April)
+
+Features:
+
+ - accounting anonymous pages, file caches, swap caches usage and limiting them.
+ - pages are linked to per-memcg LRU exclusively, and there is no global LRU.
+ - optionally, memory+swap usage can be accounted and limited.
+ - hierarchical accounting
+ - soft limit
+ - moving (recharging) account at moving a task is selectable.
+ - usage threshold notifier
+ - memory pressure notifier
+ - oom-killer disable knob and oom-notifier
+ - Root cgroup has no limit controls.
+
+ Kernel memory support is a work in progress, and the current version provides
+ basically functionality. (See Section 2.7)
+
+Brief summary of control files.
+
+==================================== ==========================================
+ tasks				     attach a task(thread) and show list of
+				     threads
+ cgroup.procs			     show list of processes
+ cgroup.event_control		     an interface for event_fd()
+ memory.usage_in_bytes		     show current usage for memory
+				     (See 5.5 for details)
+ memory.memsw.usage_in_bytes	     show current usage for memory+Swap
+				     (See 5.5 for details)
+ memory.limit_in_bytes		     set/show limit of memory usage
+ memory.memsw.limit_in_bytes	     set/show limit of memory+Swap usage
+ memory.failcnt			     show the number of memory usage hits limits
+ memory.memsw.failcnt		     show the number of memory+Swap hits limits
+ memory.max_usage_in_bytes	     show max memory usage recorded
+ memory.memsw.max_usage_in_bytes     show max memory+Swap usage recorded
+ memory.soft_limit_in_bytes	     set/show soft limit of memory usage
+ memory.stat			     show various statistics
+ memory.use_hierarchy		     set/show hierarchical account enabled
+ memory.force_empty		     trigger forced page reclaim
+ memory.pressure_level		     set memory pressure notifications
+ memory.swappiness		     set/show swappiness parameter of vmscan
+				     (See sysctl's vm.swappiness)
+ memory.move_charge_at_immigrate     set/show controls of moving charges
+ memory.oom_control		     set/show oom controls.
+ memory.numa_stat		     show the number of memory usage per numa
+				     node
+
+ memory.kmem.limit_in_bytes          set/show hard limit for kernel memory
+ memory.kmem.usage_in_bytes          show current kernel memory allocation
+ memory.kmem.failcnt                 show the number of kernel memory usage
+				     hits limits
+ memory.kmem.max_usage_in_bytes      show max kernel memory usage recorded
+
+ memory.kmem.tcp.limit_in_bytes      set/show hard limit for tcp buf memory
+ memory.kmem.tcp.usage_in_bytes      show current tcp buf memory allocation
+ memory.kmem.tcp.failcnt             show the number of tcp buf memory usage
+				     hits limits
+ memory.kmem.tcp.max_usage_in_bytes  show max tcp buf memory usage recorded
+==================================== ==========================================
+
+1. History
+==========
+
+The memory controller has a long history. A request for comments for the memory
+controller was posted by Balbir Singh [1]. At the time the RFC was posted
+there were several implementations for memory control. The goal of the
+RFC was to build consensus and agreement for the minimal features required
+for memory control. The first RSS controller was posted by Balbir Singh[2]
+in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the
+RSS controller. At OLS, at the resource management BoF, everyone suggested
+that we handle both page cache and RSS together. Another request was raised
+to allow user space handling of OOM. The current memory controller is
+at version 6; it combines both mapped (RSS) and unmapped Page
+Cache Control [11].
+
+2. Memory Control
+=================
+
+Memory is a unique resource in the sense that it is present in a limited
+amount. If a task requires a lot of CPU processing, the task can spread
+its processing over a period of hours, days, months or years, but with
+memory, the same physical memory needs to be reused to accomplish the task.
+
+The memory controller implementation has been divided into phases. These
+are:
+
+1. Memory controller
+2. mlock(2) controller
+3. Kernel user memory accounting and slab control
+4. user mappings length controller
+
+The memory controller is the first controller developed.
+
+2.1. Design
+-----------
+
+The core of the design is a counter called the page_counter. The
+page_counter tracks the current memory usage and limit of the group of
+processes associated with the controller. Each cgroup has a memory controller
+specific data structure (mem_cgroup) associated with it.
+
+2.2. Accounting
+---------------
+
+::
+
+		+--------------------+
+		|  mem_cgroup        |
+		|  (page_counter)    |
+		+--------------------+
+		 /            ^      \
+		/             |       \
+           +---------------+  |        +---------------+
+           | mm_struct     |  |....    | mm_struct     |
+           |               |  |        |               |
+           +---------------+  |        +---------------+
+                              |
+                              + --------------+
+                                              |
+           +---------------+           +------+--------+
+           | page          +---------->  page_cgroup|
+           |               |           |               |
+           +---------------+           +---------------+
+
+             (Figure 1: Hierarchy of Accounting)
+
+
+Figure 1 shows the important aspects of the controller
+
+1. Accounting happens per cgroup
+2. Each mm_struct knows about which cgroup it belongs to
+3. Each page has a pointer to the page_cgroup, which in turn knows the
+   cgroup it belongs to
+
+The accounting is done as follows: mem_cgroup_charge_common() is invoked to
+set up the necessary data structures and check if the cgroup that is being
+charged is over its limit. If it is, then reclaim is invoked on the cgroup.
+More details can be found in the reclaim section of this document.
+If everything goes well, a page meta-data-structure called page_cgroup is
+updated. page_cgroup has its own LRU on cgroup.
+(*) page_cgroup structure is allocated at boot/memory-hotplug time.
+
+2.2.1 Accounting details
+------------------------
+
+All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
+Some pages which are never reclaimable and will not be on the LRU
+are not accounted. We just account pages under usual VM management.
+
+RSS pages are accounted at page_fault unless they've already been accounted
+for earlier. A file page will be accounted for as Page Cache when it's
+inserted into inode (radix-tree). While it's mapped into the page tables of
+processes, duplicate accounting is carefully avoided.
+
+An RSS page is unaccounted when it's fully unmapped. A PageCache page is
+unaccounted when it's removed from radix-tree. Even if RSS pages are fully
+unmapped (by kswapd), they may exist as SwapCache in the system until they
+are really freed. Such SwapCaches are also accounted.
+A swapped-in page is not accounted until it's mapped.
+
+Note: The kernel does swapin-readahead and reads multiple swaps at once.
+This means swapped-in pages may contain pages for other tasks than a task
+causing page fault. So, we avoid accounting at swap-in I/O.
+
+At page migration, accounting information is kept.
+
+Note: we just account pages-on-LRU because our purpose is to control amount
+of used pages; not-on-LRU pages tend to be out-of-control from VM view.
+
+2.3 Shared Page Accounting
+--------------------------
+
+Shared pages are accounted on the basis of the first touch approach. The
+cgroup that first touches a page is accounted for the page. The principle
+behind this approach is that a cgroup that aggressively uses a shared
+page will eventually get charged for it (once it is uncharged from
+the cgroup that brought it in -- this will happen on memory pressure).
+
+But see section 8.2: when moving a task to another cgroup, its pages may
+be recharged to the new cgroup, if move_charge_at_immigrate has been chosen.
+
+Exception: If CONFIG_MEMCG_SWAP is not used.
+When you do swapoff and make swapped-out pages of shmem(tmpfs) to
+be backed into memory in force, charges for pages are accounted against the
+caller of swapoff rather than the users of shmem.
+
+2.4 Swap Extension (CONFIG_MEMCG_SWAP)
+--------------------------------------
+
+Swap Extension allows you to record charge for swap. A swapped-in page is
+charged back to original page allocator if possible.
+
+When swap is accounted, following files are added.
+
+ - memory.memsw.usage_in_bytes.
+ - memory.memsw.limit_in_bytes.
+
+memsw means memory+swap. Usage of memory+swap is limited by
+memsw.limit_in_bytes.
+
+Example: Assume a system with 4G of swap. A task which allocates 6G of memory
+(by mistake) under 2G memory limitation will use all swap.
+In this case, setting memsw.limit_in_bytes=3G will prevent bad use of swap.
+By using the memsw limit, you can avoid system OOM which can be caused by swap
+shortage.
+
+**why 'memory+swap' rather than swap**
+
+The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
+to move account from memory to swap...there is no change in usage of
+memory+swap. In other words, when we want to limit the usage of swap without
+affecting global LRU, memory+swap limit is better than just limiting swap from
+an OS point of view.
+
+**What happens when a cgroup hits memory.memsw.limit_in_bytes**
+
+When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out
+in this cgroup. Then, swap-out will not be done by cgroup routine and file
+caches are dropped. But as mentioned above, global LRU can do swapout memory
+from it for sanity of the system's memory management state. You can't forbid
+it by cgroup.
+
+2.5 Reclaim
+-----------
+
+Each cgroup maintains a per cgroup LRU which has the same structure as
+global VM. When a cgroup goes over its limit, we first try
+to reclaim memory from the cgroup so as to make space for the new
+pages that the cgroup has touched. If the reclaim is unsuccessful,
+an OOM routine is invoked to select and kill the bulkiest task in the
+cgroup. (See 10. OOM Control below.)
+
+The reclaim algorithm has not been modified for cgroups, except that
+pages that are selected for reclaiming come from the per-cgroup LRU
+list.
+
+NOTE:
+  Reclaim does not work for the root cgroup, since we cannot set any
+  limits on the root cgroup.
+
+Note2:
+  When panic_on_oom is set to "2", the whole system will panic.
+
+When oom event notifier is registered, event will be delivered.
+(See oom_control section)
+
+2.6 Locking
+-----------
+
+   lock_page_cgroup()/unlock_page_cgroup() should not be called under
+   the i_pages lock.
+
+   Other lock order is following:
+
+   PG_locked.
+     mm->page_table_lock
+         pgdat->lru_lock
+	   lock_page_cgroup.
+
+  In many cases, just lock_page_cgroup() is called.
+
+  per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
+  pgdat->lru_lock, it has no lock of its own.
+
+2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
+-----------------------------------------------
+
+With the Kernel memory extension, the Memory Controller is able to limit
+the amount of kernel memory used by the system. Kernel memory is fundamentally
+different than user memory, since it can't be swapped out, which makes it
+possible to DoS the system by consuming too much of this precious resource.
+
+Kernel memory accounting is enabled for all memory cgroups by default. But
+it can be disabled system-wide by passing cgroup.memory=nokmem to the kernel
+at boot time. In this case, kernel memory will not be accounted at all.
+
+Kernel memory limits are not imposed for the root cgroup. Usage for the root
+cgroup may or may not be accounted. The memory used is accumulated into
+memory.kmem.usage_in_bytes, or in a separate counter when it makes sense.
+(currently only for tcp).
+
+The main "kmem" counter is fed into the main counter, so kmem charges will
+also be visible from the user counter.
+
+Currently no soft limit is implemented for kernel memory. It is future work
+to trigger slab reclaim when those limits are reached.
+
+2.7.1 Current Kernel Memory resources accounted
+-----------------------------------------------
+
+stack pages:
+  every process consumes some stack pages. By accounting into
+  kernel memory, we prevent new processes from being created when the kernel
+  memory usage is too high.
+
+slab pages:
+  pages allocated by the SLAB or SLUB allocator are tracked. A copy
+  of each kmem_cache is created every time the cache is touched by the first time
+  from inside the memcg. The creation is done lazily, so some objects can still be
+  skipped while the cache is being created. All objects in a slab page should
+  belong to the same memcg. This only fails to hold when a task is migrated to a
+  different memcg during the page allocation by the cache.
+
+sockets memory pressure:
+  some sockets protocols have memory pressure
+  thresholds. The Memory Controller allows them to be controlled individually
+  per cgroup, instead of globally.
+
+tcp memory pressure:
+  sockets memory pressure for the tcp protocol.
+
+2.7.2 Common use cases
+----------------------
+
+Because the "kmem" counter is fed to the main user counter, kernel memory can
+never be limited completely independently of user memory. Say "U" is the user
+limit, and "K" the kernel limit. There are three possible ways limits can be
+set:
+
+U != 0, K = unlimited:
+    This is the standard memcg limitation mechanism already present before kmem
+    accounting. Kernel memory is completely ignored.
+
+U != 0, K < U:
+    Kernel memory is a subset of the user memory. This setup is useful in
+    deployments where the total amount of memory per-cgroup is overcommited.
+    Overcommiting kernel memory limits is definitely not recommended, since the
+    box can still run out of non-reclaimable memory.
+    In this case, the admin could set up K so that the sum of all groups is
+    never greater than the total memory, and freely set U at the cost of his
+    QoS.
+
+WARNING:
+    In the current implementation, memory reclaim will NOT be
+    triggered for a cgroup when it hits K while staying below U, which makes
+    this setup impractical.
+
+U != 0, K >= U:
+    Since kmem charges will also be fed to the user counter and reclaim will be
+    triggered for the cgroup for both kinds of memory. This setup gives the
+    admin a unified view of memory, and it is also useful for people who just
+    want to track kernel memory usage.
+
+3. User Interface
+=================
+
+3.0. Configuration
+------------------
+
+a. Enable CONFIG_CGROUPS
+b. Enable CONFIG_MEMCG
+c. Enable CONFIG_MEMCG_SWAP (to use swap extension)
+d. Enable CONFIG_MEMCG_KMEM (to use kmem extension)
+
+3.1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
+-------------------------------------------------------------------
+
+::
+
+	# mount -t tmpfs none /sys/fs/cgroup
+	# mkdir /sys/fs/cgroup/memory
+	# mount -t cgroup none /sys/fs/cgroup/memory -o memory
+
+3.2. Make the new group and move bash into it::
+
+	# mkdir /sys/fs/cgroup/memory/0
+	# echo $$ > /sys/fs/cgroup/memory/0/tasks
+
+Since now we're in the 0 cgroup, we can alter the memory limit::
+
+	# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes
+
+NOTE:
+  We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
+  mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes,
+  Gibibytes.)
+
+NOTE:
+  We can write "-1" to reset the ``*.limit_in_bytes(unlimited)``.
+
+NOTE:
+  We cannot set limits on the root cgroup any more.
+
+::
+
+  # cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
+  4194304
+
+We can check the usage::
+
+  # cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes
+  1216512
+
+A successful write to this file does not guarantee a successful setting of
+this limit to the value written into the file. This can be due to a
+number of factors, such as rounding up to page boundaries or the total
+availability of memory on the system. The user is required to re-read
+this file after a write to guarantee the value committed by the kernel::
+
+  # echo 1 > memory.limit_in_bytes
+  # cat memory.limit_in_bytes
+  4096
+
+The memory.failcnt field gives the number of times that the cgroup limit was
+exceeded.
+
+The memory.stat file gives accounting information. Now, the number of
+caches, RSS and Active pages/Inactive pages are shown.
+
+4. Testing
+==========
+
+For testing features and implementation, see memcg_test.txt.
+
+Performance test is also important. To see pure memory controller's overhead,
+testing on tmpfs will give you good numbers of small overheads.
+Example: do kernel make on tmpfs.
+
+Page-fault scalability is also important. At measuring parallel
+page fault test, multi-process test may be better than multi-thread
+test because it has noise of shared objects/status.
+
+But the above two are testing extreme situations.
+Trying usual test under memory controller is always helpful.
+
+4.1 Troubleshooting
+-------------------
+
+Sometimes a user might find that the application under a cgroup is
+terminated by the OOM killer. There are several causes for this:
+
+1. The cgroup limit is too low (just too low to do anything useful)
+2. The user is using anonymous memory and swap is turned off or too low
+
+A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of
+some of the pages cached in the cgroup (page cache pages).
+
+To know what happens, disabling OOM_Kill as per "10. OOM Control" (below) and
+seeing what happens will be helpful.
+
+4.2 Task migration
+------------------
+
+When a task migrates from one cgroup to another, its charge is not
+carried forward by default. The pages allocated from the original cgroup still
+remain charged to it, the charge is dropped when the page is freed or
+reclaimed.
+
+You can move charges of a task along with task migration.
+See 8. "Move charges at task migration"
+
+4.3 Removing a cgroup
+---------------------
+
+A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
+cgroup might have some charge associated with it, even though all
+tasks have migrated away from it. (because we charge against pages, not
+against tasks.)
+
+We move the stats to root (if use_hierarchy==0) or parent (if
+use_hierarchy==1), and no change on the charge except uncharging
+from the child.
+
+Charges recorded in swap information is not updated at removal of cgroup.
+Recorded information is discarded and a cgroup which uses swap (swapcache)
+will be charged as a new owner of it.
+
+About use_hierarchy, see Section 6.
+
+5. Misc. interfaces
+===================
+
+5.1 force_empty
+---------------
+  memory.force_empty interface is provided to make cgroup's memory usage empty.
+  When writing anything to this::
+
+    # echo 0 > memory.force_empty
+
+  the cgroup will be reclaimed and as many pages reclaimed as possible.
+
+  The typical use case for this interface is before calling rmdir().
+  Though rmdir() offlines memcg, but the memcg may still stay there due to
+  charged file caches. Some out-of-use page caches may keep charged until
+  memory pressure happens. If you want to avoid that, force_empty will be useful.
+
+  Also, note that when memory.kmem.limit_in_bytes is set the charges due to
+  kernel pages will still be seen. This is not considered a failure and the
+  write will still return success. In this case, it is expected that
+  memory.kmem.usage_in_bytes == memory.usage_in_bytes.
+
+  About use_hierarchy, see Section 6.
+
+5.2 stat file
+-------------
+
+memory.stat file includes following statistics
+
+per-memory cgroup local status
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+=============== ===============================================================
+cache		# of bytes of page cache memory.
+rss		# of bytes of anonymous and swap cache memory (includes
+		transparent hugepages).
+rss_huge	# of bytes of anonymous transparent hugepages.
+mapped_file	# of bytes of mapped file (includes tmpfs/shmem)
+pgpgin		# of charging events to the memory cgroup. The charging
+		event happens each time a page is accounted as either mapped
+		anon page(RSS) or cache page(Page Cache) to the cgroup.
+pgpgout		# of uncharging events to the memory cgroup. The uncharging
+		event happens each time a page is unaccounted from the cgroup.
+swap		# of bytes of swap usage
+dirty		# of bytes that are waiting to get written back to the disk.
+writeback	# of bytes of file/anon cache that are queued for syncing to
+		disk.
+inactive_anon	# of bytes of anonymous and swap cache memory on inactive
+		LRU list.
+active_anon	# of bytes of anonymous and swap cache memory on active
+		LRU list.
+inactive_file	# of bytes of file-backed memory on inactive LRU list.
+active_file	# of bytes of file-backed memory on active LRU list.
+unevictable	# of bytes of memory that cannot be reclaimed (mlocked etc).
+=============== ===============================================================
+
+status considering hierarchy (see memory.use_hierarchy settings)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+========================= ===================================================
+hierarchical_memory_limit # of bytes of memory limit with regard to hierarchy
+			  under which the memory cgroup is
+hierarchical_memsw_limit  # of bytes of memory+swap limit with regard to
+			  hierarchy under which memory cgroup is.
+
+total_<counter>		  # hierarchical version of <counter>, which in
+			  addition to the cgroup's own value includes the
+			  sum of all hierarchical children's values of
+			  <counter>, i.e. total_cache
+========================= ===================================================
+
+The following additional stats are dependent on CONFIG_DEBUG_VM
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+========================= ========================================
+recent_rotated_anon	  VM internal parameter. (see mm/vmscan.c)
+recent_rotated_file	  VM internal parameter. (see mm/vmscan.c)
+recent_scanned_anon	  VM internal parameter. (see mm/vmscan.c)
+recent_scanned_file	  VM internal parameter. (see mm/vmscan.c)
+========================= ========================================
+
+Memo:
+	recent_rotated means recent frequency of LRU rotation.
+	recent_scanned means recent # of scans to LRU.
+	showing for better debug please see the code for meanings.
+
+Note:
+	Only anonymous and swap cache memory is listed as part of 'rss' stat.
+	This should not be confused with the true 'resident set size' or the
+	amount of physical memory used by the cgroup.
+
+	'rss + mapped_file" will give you resident set size of cgroup.
+
+	(Note: file and shmem may be shared among other cgroups. In that case,
+	mapped_file is accounted only when the memory cgroup is owner of page
+	cache.)
+
+5.3 swappiness
+--------------
+
+Overrides /proc/sys/vm/swappiness for the particular group. The tunable
+in the root cgroup corresponds to the global swappiness setting.
+
+Please note that unlike during the global reclaim, limit reclaim
+enforces that 0 swappiness really prevents from any swapping even if
+there is a swap storage available. This might lead to memcg OOM killer
+if there are no file pages to reclaim.
+
+5.4 failcnt
+-----------
+
+A memory cgroup provides memory.failcnt and memory.memsw.failcnt files.
+This failcnt(== failure count) shows the number of times that a usage counter
+hit its limit. When a memory cgroup hits a limit, failcnt increases and
+memory under it will be reclaimed.
+
+You can reset failcnt by writing 0 to failcnt file::
+
+	# echo 0 > .../memory.failcnt
+
+5.5 usage_in_bytes
+------------------
+
+For efficiency, as other kernel components, memory cgroup uses some optimization
+to avoid unnecessary cacheline false sharing. usage_in_bytes is affected by the
+method and doesn't show 'exact' value of memory (and swap) usage, it's a fuzz
+value for efficient access. (Of course, when necessary, it's synchronized.)
+If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP)
+value in memory.stat(see 5.2).
+
+5.6 numa_stat
+-------------
+
+This is similar to numa_maps but operates on a per-memcg basis.  This is
+useful for providing visibility into the numa locality information within
+an memcg since the pages are allowed to be allocated from any physical
+node.  One of the use cases is evaluating application performance by
+combining this information with the application's CPU allocation.
+
+Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable"
+per-node page counts including "hierarchical_<counter>" which sums up all
+hierarchical children's values in addition to the memcg's own value.
+
+The output format of memory.numa_stat is::
+
+  total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
+  file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
+  anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+  unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+  hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
+
+The "total" count is sum of file + anon + unevictable.
+
+6. Hierarchy support
+====================
+
+The memory controller supports a deep hierarchy and hierarchical accounting.
+The hierarchy is created by creating the appropriate cgroups in the
+cgroup filesystem. Consider for example, the following cgroup filesystem
+hierarchy::
+
+	       root
+	     /  |   \
+            /	|    \
+	   a	b     c
+		      | \
+		      |  \
+		      d   e
+
+In the diagram above, with hierarchical accounting enabled, all memory
+usage of e, is accounted to its ancestors up until the root (i.e, c and root),
+that has memory.use_hierarchy enabled. If one of the ancestors goes over its
+limit, the reclaim algorithm reclaims from the tasks in the ancestor and the
+children of the ancestor.
+
+6.1 Enabling hierarchical accounting and reclaim
+------------------------------------------------
+
+A memory cgroup by default disables the hierarchy feature. Support
+can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup::
+
+	# echo 1 > memory.use_hierarchy
+
+The feature can be disabled by::
+
+	# echo 0 > memory.use_hierarchy
+
+NOTE1:
+       Enabling/disabling will fail if either the cgroup already has other
+       cgroups created below it, or if the parent cgroup has use_hierarchy
+       enabled.
+
+NOTE2:
+       When panic_on_oom is set to "2", the whole system will panic in
+       case of an OOM event in any cgroup.
+
+7. Soft limits
+==============
+
+Soft limits allow for greater sharing of memory. The idea behind soft limits
+is to allow control groups to use as much of the memory as needed, provided
+
+a. There is no memory contention
+b. They do not exceed their hard limit
+
+When the system detects memory contention or low memory, control groups
+are pushed back to their soft limits. If the soft limit of each control
+group is very high, they are pushed back as much as possible to make
+sure that one control group does not starve the others of memory.
+
+Please note that soft limits is a best-effort feature; it comes with
+no guarantees, but it does its best to make sure that when memory is
+heavily contended for, memory is allocated based on the soft limit
+hints/setup. Currently soft limit based reclaim is set up such that
+it gets invoked from balance_pgdat (kswapd).
+
+7.1 Interface
+-------------
+
+Soft limits can be setup by using the following commands (in this example we
+assume a soft limit of 256 MiB)::
+
+	# echo 256M > memory.soft_limit_in_bytes
+
+If we want to change this to 1G, we can at any time use::
+
+	# echo 1G > memory.soft_limit_in_bytes
+
+NOTE1:
+       Soft limits take effect over a long period of time, since they involve
+       reclaiming memory for balancing between memory cgroups
+NOTE2:
+       It is recommended to set the soft limit always below the hard limit,
+       otherwise the hard limit will take precedence.
+
+8. Move charges at task migration
+=================================
+
+Users can move charges associated with a task along with task migration, that
+is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
+This feature is not supported in !CONFIG_MMU environments because of lack of
+page tables.
+
+8.1 Interface
+-------------
+
+This feature is disabled by default. It can be enabled (and disabled again) by
+writing to memory.move_charge_at_immigrate of the destination cgroup.
+
+If you want to enable it::
+
+	# echo (some positive value) > memory.move_charge_at_immigrate
+
+Note:
+      Each bits of move_charge_at_immigrate has its own meaning about what type
+      of charges should be moved. See 8.2 for details.
+Note:
+      Charges are moved only when you move mm->owner, in other words,
+      a leader of a thread group.
+Note:
+      If we cannot find enough space for the task in the destination cgroup, we
+      try to make space by reclaiming memory. Task migration may fail if we
+      cannot make enough space.
+Note:
+      It can take several seconds if you move charges much.
+
+And if you want disable it again::
+
+	# echo 0 > memory.move_charge_at_immigrate
+
+8.2 Type of charges which can be moved
+--------------------------------------
+
+Each bit in move_charge_at_immigrate has its own meaning about what type of
+charges should be moved. But in any case, it must be noted that an account of
+a page or a swap can be moved only when it is charged to the task's current
+(old) memory cgroup.
+
++---+--------------------------------------------------------------------------+
+|bit| what type of charges would be moved ?                                    |
++===+==========================================================================+
+| 0 | A charge of an anonymous page (or swap of it) used by the target task.   |
+|   | You must enable Swap Extension (see 2.4) to enable move of swap charges. |
++---+--------------------------------------------------------------------------+
+| 1 | A charge of file pages (normal file, tmpfs file (e.g. ipc shared memory) |
+|   | and swaps of tmpfs file) mmapped by the target task. Unlike the case of  |
+|   | anonymous pages, file pages (and swaps) in the range mmapped by the task |
+|   | will be moved even if the task hasn't done page fault, i.e. they might   |
+|   | not be the task's "RSS", but other task's "RSS" that maps the same file. |
+|   | And mapcount of the page is ignored (the page can be moved even if       |
+|   | page_mapcount(page) > 1). You must enable Swap Extension (see 2.4) to    |
+|   | enable move of swap charges.                                             |
++---+--------------------------------------------------------------------------+
+
+8.3 TODO
+--------
+
+- All of moving charge operations are done under cgroup_mutex. It's not good
+  behavior to hold the mutex too long, so we may need some trick.
+
+9. Memory thresholds
+====================
+
+Memory cgroup implements memory thresholds using the cgroups notification
+API (see cgroups.txt). It allows to register multiple memory and memsw
+thresholds and gets notifications when it crosses.
+
+To register a threshold, an application must:
+
+- create an eventfd using eventfd(2);
+- open memory.usage_in_bytes or memory.memsw.usage_in_bytes;
+- write string like "<event_fd> <fd of memory.usage_in_bytes> <threshold>" to
+  cgroup.event_control.
+
+Application will be notified through eventfd when memory usage crosses
+threshold in any direction.
+
+It's applicable for root and non-root cgroup.
+
+10. OOM Control
+===============
+
+memory.oom_control file is for OOM notification and other controls.
+
+Memory cgroup implements OOM notifier using the cgroup notification
+API (See cgroups.txt). It allows to register multiple OOM notification
+delivery and gets notification when OOM happens.
+
+To register a notifier, an application must:
+
+ - create an eventfd using eventfd(2)
+ - open memory.oom_control file
+ - write string like "<event_fd> <fd of memory.oom_control>" to
+   cgroup.event_control
+
+The application will be notified through eventfd when OOM happens.
+OOM notification doesn't work for the root cgroup.
+
+You can disable the OOM-killer by writing "1" to memory.oom_control file, as:
+
+	#echo 1 > memory.oom_control
+
+If OOM-killer is disabled, tasks under cgroup will hang/sleep
+in memory cgroup's OOM-waitqueue when they request accountable memory.
+
+For running them, you have to relax the memory cgroup's OOM status by
+
+	* enlarge limit or reduce usage.
+
+To reduce usage,
+
+	* kill some tasks.
+	* move some tasks to other group with account migration.
+	* remove some files (on tmpfs?)
+
+Then, stopped tasks will work again.
+
+At reading, current status of OOM is shown.
+
+	- oom_kill_disable 0 or 1
+	  (if 1, oom-killer is disabled)
+	- under_oom	   0 or 1
+	  (if 1, the memory cgroup is under OOM, tasks may be stopped.)
+
+11. Memory Pressure
+===================
+
+The pressure level notifications can be used to monitor the memory
+allocation cost; based on the pressure, applications can implement
+different strategies of managing their memory resources. The pressure
+levels are defined as following:
+
+The "low" level means that the system is reclaiming memory for new
+allocations. Monitoring this reclaiming activity might be useful for
+maintaining cache level. Upon notification, the program (typically
+"Activity Manager") might analyze vmstat and act in advance (i.e.
+prematurely shutdown unimportant services).
+
+The "medium" level means that the system is experiencing medium memory
+pressure, the system might be making swap, paging out active file caches,
+etc. Upon this event applications may decide to further analyze
+vmstat/zoneinfo/memcg or internal memory usage statistics and free any
+resources that can be easily reconstructed or re-read from a disk.
+
+The "critical" level means that the system is actively thrashing, it is
+about to out of memory (OOM) or even the in-kernel OOM killer is on its
+way to trigger. Applications should do whatever they can to help the
+system. It might be too late to consult with vmstat or any other
+statistics, so it's advisable to take an immediate action.
+
+By default, events are propagated upward until the event is handled, i.e. the
+events are not pass-through. For example, you have three cgroups: A->B->C. Now
+you set up an event listener on cgroups A, B and C, and suppose group C
+experiences some pressure. In this situation, only group C will receive the
+notification, i.e. groups A and B will not receive it. This is done to avoid
+excessive "broadcasting" of messages, which disturbs the system and which is
+especially bad if we are low on memory or thrashing. Group B, will receive
+notification only if there are no event listers for group C.
+
+There are three optional modes that specify different propagation behavior:
+
+ - "default": this is the default behavior specified above. This mode is the
+   same as omitting the optional mode parameter, preserved by backwards
+   compatibility.
+
+ - "hierarchy": events always propagate up to the root, similar to the default
+   behavior, except that propagation continues regardless of whether there are
+   event listeners at each level, with the "hierarchy" mode. In the above
+   example, groups A, B, and C will receive notification of memory pressure.
+
+ - "local": events are pass-through, i.e. they only receive notifications when
+   memory pressure is experienced in the memcg for which the notification is
+   registered. In the above example, group C will receive notification if
+   registered for "local" notification and the group experiences memory
+   pressure. However, group B will never receive notification, regardless if
+   there is an event listener for group C or not, if group B is registered for
+   local notification.
+
+The level and event notification mode ("hierarchy" or "local", if necessary) are
+specified by a comma-delimited string, i.e. "low,hierarchy" specifies
+hierarchical, pass-through, notification for all ancestor memcgs. Notification
+that is the default, non pass-through behavior, does not specify a mode.
+"medium,local" specifies pass-through notification for the medium level.
+
+The file memory.pressure_level is only used to setup an eventfd. To
+register a notification, an application must:
+
+- create an eventfd using eventfd(2);
+- open memory.pressure_level;
+- write string as "<event_fd> <fd of memory.pressure_level> <level[,mode]>"
+  to cgroup.event_control.
+
+Application will be notified through eventfd when memory pressure is at
+the specific level (or higher). Read/write operations to
+memory.pressure_level are no implemented.
+
+Test:
+
+   Here is a small script example that makes a new cgroup, sets up a
+   memory limit, sets up a notification in the cgroup and then makes child
+   cgroup experience a critical pressure::
+
+	# cd /sys/fs/cgroup/memory/
+	# mkdir foo
+	# cd foo
+	# cgroup_event_listener memory.pressure_level low,hierarchy &
+	# echo 8000000 > memory.limit_in_bytes
+	# echo 8000000 > memory.memsw.limit_in_bytes
+	# echo $$ > tasks
+	# dd if=/dev/zero | read x
+
+   (Expect a bunch of notifications, and eventually, the oom-killer will
+   trigger.)
+
+12. TODO
+========
+
+1. Make per-cgroup scanner reclaim not-shared pages first
+2. Teach controller to account for shared-pages
+3. Start reclamation in the background when the limit is
+   not yet hit but the usage is getting closer
+
+Summary
+=======
+
+Overall, the memory controller has been a stable controller and has been
+commented and discussed quite extensively in the community.
+
+References
+==========
+
+1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/
+2. Singh, Balbir. Memory Controller (RSS Control),
+   http://lwn.net/Articles/222762/
+3. Emelianov, Pavel. Resource controllers based on process cgroups
+   http://lkml.org/lkml/2007/3/6/198
+4. Emelianov, Pavel. RSS controller based on process cgroups (v2)
+   http://lkml.org/lkml/2007/4/9/78
+5. Emelianov, Pavel. RSS controller based on process cgroups (v3)
+   http://lkml.org/lkml/2007/5/30/244
+6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/
+7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control
+   subsystem (v3), http://lwn.net/Articles/235534/
+8. Singh, Balbir. RSS controller v2 test results (lmbench),
+   http://lkml.org/lkml/2007/5/17/232
+9. Singh, Balbir. RSS controller v2 AIM9 results
+   http://lkml.org/lkml/2007/5/18/1
+10. Singh, Balbir. Memory controller v6 test results,
+    http://lkml.org/lkml/2007/8/19/36
+11. Singh, Balbir. Memory controller introduction (v6),
+    http://lkml.org/lkml/2007/8/17/69
+12. Corbet, Jonathan, Controlling memory use in cgroups,
+    http://lwn.net/Articles/243795/
diff --git a/Documentation/cgroup-v1/memory.txt b/Documentation/cgroup-v1/memory.txt
deleted file mode 100644
index a33cedf85427..000000000000
--- a/Documentation/cgroup-v1/memory.txt
+++ /dev/null
@@ -1,892 +0,0 @@
-Memory Resource Controller
-
-NOTE: This document is hopelessly outdated and it asks for a complete
-      rewrite. It still contains a useful information so we are keeping it
-      here but make sure to check the current code if you need a deeper
-      understanding.
-
-NOTE: The Memory Resource Controller has generically been referred to as the
-      memory controller in this document. Do not confuse memory controller
-      used here with the memory controller that is used in hardware.
-
-(For editors)
-In this document:
-      When we mention a cgroup (cgroupfs's directory) with memory controller,
-      we call it "memory cgroup". When you see git-log and source code, you'll
-      see patch's title and function names tend to use "memcg".
-      In this document, we avoid using it.
-
-Benefits and Purpose of the memory controller
-
-The memory controller isolates the memory behaviour of a group of tasks
-from the rest of the system. The article on LWN [12] mentions some probable
-uses of the memory controller. The memory controller can be used to
-
-a. Isolate an application or a group of applications
-   Memory-hungry applications can be isolated and limited to a smaller
-   amount of memory.
-b. Create a cgroup with a limited amount of memory; this can be used
-   as a good alternative to booting with mem=XXXX.
-c. Virtualization solutions can control the amount of memory they want
-   to assign to a virtual machine instance.
-d. A CD/DVD burner could control the amount of memory used by the
-   rest of the system to ensure that burning does not fail due to lack
-   of available memory.
-e. There are several other use cases; find one or use the controller just
-   for fun (to learn and hack on the VM subsystem).
-
-Current Status: linux-2.6.34-mmotm(development version of 2010/April)
-
-Features:
- - accounting anonymous pages, file caches, swap caches usage and limiting them.
- - pages are linked to per-memcg LRU exclusively, and there is no global LRU.
- - optionally, memory+swap usage can be accounted and limited.
- - hierarchical accounting
- - soft limit
- - moving (recharging) account at moving a task is selectable.
- - usage threshold notifier
- - memory pressure notifier
- - oom-killer disable knob and oom-notifier
- - Root cgroup has no limit controls.
-
- Kernel memory support is a work in progress, and the current version provides
- basically functionality. (See Section 2.7)
-
-Brief summary of control files.
-
- tasks				 # attach a task(thread) and show list of threads
- cgroup.procs			 # show list of processes
- cgroup.event_control		 # an interface for event_fd()
- memory.usage_in_bytes		 # show current usage for memory
-				 (See 5.5 for details)
- memory.memsw.usage_in_bytes	 # show current usage for memory+Swap
-				 (See 5.5 for details)
- memory.limit_in_bytes		 # set/show limit of memory usage
- memory.memsw.limit_in_bytes	 # set/show limit of memory+Swap usage
- memory.failcnt			 # show the number of memory usage hits limits
- memory.memsw.failcnt		 # show the number of memory+Swap hits limits
- memory.max_usage_in_bytes	 # show max memory usage recorded
- memory.memsw.max_usage_in_bytes # show max memory+Swap usage recorded
- memory.soft_limit_in_bytes	 # set/show soft limit of memory usage
- memory.stat			 # show various statistics
- memory.use_hierarchy		 # set/show hierarchical account enabled
- memory.force_empty		 # trigger forced page reclaim
- memory.pressure_level		 # set memory pressure notifications
- memory.swappiness		 # set/show swappiness parameter of vmscan
-				 (See sysctl's vm.swappiness)
- memory.move_charge_at_immigrate # set/show controls of moving charges
- memory.oom_control		 # set/show oom controls.
- memory.numa_stat		 # show the number of memory usage per numa node
-
- memory.kmem.limit_in_bytes      # set/show hard limit for kernel memory
- memory.kmem.usage_in_bytes      # show current kernel memory allocation
- memory.kmem.failcnt             # show the number of kernel memory usage hits limits
- memory.kmem.max_usage_in_bytes  # show max kernel memory usage recorded
-
- memory.kmem.tcp.limit_in_bytes  # set/show hard limit for tcp buf memory
- memory.kmem.tcp.usage_in_bytes  # show current tcp buf memory allocation
- memory.kmem.tcp.failcnt            # show the number of tcp buf memory usage hits limits
- memory.kmem.tcp.max_usage_in_bytes # show max tcp buf memory usage recorded
-
-1. History
-
-The memory controller has a long history. A request for comments for the memory
-controller was posted by Balbir Singh [1]. At the time the RFC was posted
-there were several implementations for memory control. The goal of the
-RFC was to build consensus and agreement for the minimal features required
-for memory control. The first RSS controller was posted by Balbir Singh[2]
-in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the
-RSS controller. At OLS, at the resource management BoF, everyone suggested
-that we handle both page cache and RSS together. Another request was raised
-to allow user space handling of OOM. The current memory controller is
-at version 6; it combines both mapped (RSS) and unmapped Page
-Cache Control [11].
-
-2. Memory Control
-
-Memory is a unique resource in the sense that it is present in a limited
-amount. If a task requires a lot of CPU processing, the task can spread
-its processing over a period of hours, days, months or years, but with
-memory, the same physical memory needs to be reused to accomplish the task.
-
-The memory controller implementation has been divided into phases. These
-are:
-
-1. Memory controller
-2. mlock(2) controller
-3. Kernel user memory accounting and slab control
-4. user mappings length controller
-
-The memory controller is the first controller developed.
-
-2.1. Design
-
-The core of the design is a counter called the page_counter. The
-page_counter tracks the current memory usage and limit of the group of
-processes associated with the controller. Each cgroup has a memory controller
-specific data structure (mem_cgroup) associated with it.
-
-2.2. Accounting
-
-		+--------------------+
-		|  mem_cgroup        |
-		|  (page_counter)    |
-		+--------------------+
-		 /            ^      \
-		/             |       \
-           +---------------+  |        +---------------+
-           | mm_struct     |  |....    | mm_struct     |
-           |               |  |        |               |
-           +---------------+  |        +---------------+
-                              |
-                              + --------------+
-                                              |
-           +---------------+           +------+--------+
-           | page          +---------->  page_cgroup|
-           |               |           |               |
-           +---------------+           +---------------+
-
-             (Figure 1: Hierarchy of Accounting)
-
-
-Figure 1 shows the important aspects of the controller
-
-1. Accounting happens per cgroup
-2. Each mm_struct knows about which cgroup it belongs to
-3. Each page has a pointer to the page_cgroup, which in turn knows the
-   cgroup it belongs to
-
-The accounting is done as follows: mem_cgroup_charge_common() is invoked to
-set up the necessary data structures and check if the cgroup that is being
-charged is over its limit. If it is, then reclaim is invoked on the cgroup.
-More details can be found in the reclaim section of this document.
-If everything goes well, a page meta-data-structure called page_cgroup is
-updated. page_cgroup has its own LRU on cgroup.
-(*) page_cgroup structure is allocated at boot/memory-hotplug time.
-
-2.2.1 Accounting details
-
-All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
-Some pages which are never reclaimable and will not be on the LRU
-are not accounted. We just account pages under usual VM management.
-
-RSS pages are accounted at page_fault unless they've already been accounted
-for earlier. A file page will be accounted for as Page Cache when it's
-inserted into inode (radix-tree). While it's mapped into the page tables of
-processes, duplicate accounting is carefully avoided.
-
-An RSS page is unaccounted when it's fully unmapped. A PageCache page is
-unaccounted when it's removed from radix-tree. Even if RSS pages are fully
-unmapped (by kswapd), they may exist as SwapCache in the system until they
-are really freed. Such SwapCaches are also accounted.
-A swapped-in page is not accounted until it's mapped.
-
-Note: The kernel does swapin-readahead and reads multiple swaps at once.
-This means swapped-in pages may contain pages for other tasks than a task
-causing page fault. So, we avoid accounting at swap-in I/O.
-
-At page migration, accounting information is kept.
-
-Note: we just account pages-on-LRU because our purpose is to control amount
-of used pages; not-on-LRU pages tend to be out-of-control from VM view.
-
-2.3 Shared Page Accounting
-
-Shared pages are accounted on the basis of the first touch approach. The
-cgroup that first touches a page is accounted for the page. The principle
-behind this approach is that a cgroup that aggressively uses a shared
-page will eventually get charged for it (once it is uncharged from
-the cgroup that brought it in -- this will happen on memory pressure).
-
-But see section 8.2: when moving a task to another cgroup, its pages may
-be recharged to the new cgroup, if move_charge_at_immigrate has been chosen.
-
-Exception: If CONFIG_MEMCG_SWAP is not used.
-When you do swapoff and make swapped-out pages of shmem(tmpfs) to
-be backed into memory in force, charges for pages are accounted against the
-caller of swapoff rather than the users of shmem.
-
-2.4 Swap Extension (CONFIG_MEMCG_SWAP)
-
-Swap Extension allows you to record charge for swap. A swapped-in page is
-charged back to original page allocator if possible.
-
-When swap is accounted, following files are added.
- - memory.memsw.usage_in_bytes.
- - memory.memsw.limit_in_bytes.
-
-memsw means memory+swap. Usage of memory+swap is limited by
-memsw.limit_in_bytes.
-
-Example: Assume a system with 4G of swap. A task which allocates 6G of memory
-(by mistake) under 2G memory limitation will use all swap.
-In this case, setting memsw.limit_in_bytes=3G will prevent bad use of swap.
-By using the memsw limit, you can avoid system OOM which can be caused by swap
-shortage.
-
-* why 'memory+swap' rather than swap.
-The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
-to move account from memory to swap...there is no change in usage of
-memory+swap. In other words, when we want to limit the usage of swap without
-affecting global LRU, memory+swap limit is better than just limiting swap from
-an OS point of view.
-
-* What happens when a cgroup hits memory.memsw.limit_in_bytes
-When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out
-in this cgroup. Then, swap-out will not be done by cgroup routine and file
-caches are dropped. But as mentioned above, global LRU can do swapout memory
-from it for sanity of the system's memory management state. You can't forbid
-it by cgroup.
-
-2.5 Reclaim
-
-Each cgroup maintains a per cgroup LRU which has the same structure as
-global VM. When a cgroup goes over its limit, we first try
-to reclaim memory from the cgroup so as to make space for the new
-pages that the cgroup has touched. If the reclaim is unsuccessful,
-an OOM routine is invoked to select and kill the bulkiest task in the
-cgroup. (See 10. OOM Control below.)
-
-The reclaim algorithm has not been modified for cgroups, except that
-pages that are selected for reclaiming come from the per-cgroup LRU
-list.
-
-NOTE: Reclaim does not work for the root cgroup, since we cannot set any
-limits on the root cgroup.
-
-Note2: When panic_on_oom is set to "2", the whole system will panic.
-
-When oom event notifier is registered, event will be delivered.
-(See oom_control section)
-
-2.6 Locking
-
-   lock_page_cgroup()/unlock_page_cgroup() should not be called under
-   the i_pages lock.
-
-   Other lock order is following:
-   PG_locked.
-   mm->page_table_lock
-       pgdat->lru_lock
-	  lock_page_cgroup.
-  In many cases, just lock_page_cgroup() is called.
-  per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
-  pgdat->lru_lock, it has no lock of its own.
-
-2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
-
-With the Kernel memory extension, the Memory Controller is able to limit
-the amount of kernel memory used by the system. Kernel memory is fundamentally
-different than user memory, since it can't be swapped out, which makes it
-possible to DoS the system by consuming too much of this precious resource.
-
-Kernel memory accounting is enabled for all memory cgroups by default. But
-it can be disabled system-wide by passing cgroup.memory=nokmem to the kernel
-at boot time. In this case, kernel memory will not be accounted at all.
-
-Kernel memory limits are not imposed for the root cgroup. Usage for the root
-cgroup may or may not be accounted. The memory used is accumulated into
-memory.kmem.usage_in_bytes, or in a separate counter when it makes sense.
-(currently only for tcp).
-The main "kmem" counter is fed into the main counter, so kmem charges will
-also be visible from the user counter.
-
-Currently no soft limit is implemented for kernel memory. It is future work
-to trigger slab reclaim when those limits are reached.
-
-2.7.1 Current Kernel Memory resources accounted
-
-* stack pages: every process consumes some stack pages. By accounting into
-kernel memory, we prevent new processes from being created when the kernel
-memory usage is too high.
-
-* slab pages: pages allocated by the SLAB or SLUB allocator are tracked. A copy
-of each kmem_cache is created every time the cache is touched by the first time
-from inside the memcg. The creation is done lazily, so some objects can still be
-skipped while the cache is being created. All objects in a slab page should
-belong to the same memcg. This only fails to hold when a task is migrated to a
-different memcg during the page allocation by the cache.
-
-* sockets memory pressure: some sockets protocols have memory pressure
-thresholds. The Memory Controller allows them to be controlled individually
-per cgroup, instead of globally.
-
-* tcp memory pressure: sockets memory pressure for the tcp protocol.
-
-2.7.2 Common use cases
-
-Because the "kmem" counter is fed to the main user counter, kernel memory can
-never be limited completely independently of user memory. Say "U" is the user
-limit, and "K" the kernel limit. There are three possible ways limits can be
-set:
-
-    U != 0, K = unlimited:
-    This is the standard memcg limitation mechanism already present before kmem
-    accounting. Kernel memory is completely ignored.
-
-    U != 0, K < U:
-    Kernel memory is a subset of the user memory. This setup is useful in
-    deployments where the total amount of memory per-cgroup is overcommited.
-    Overcommiting kernel memory limits is definitely not recommended, since the
-    box can still run out of non-reclaimable memory.
-    In this case, the admin could set up K so that the sum of all groups is
-    never greater than the total memory, and freely set U at the cost of his
-    QoS.
-    WARNING: In the current implementation, memory reclaim will NOT be
-    triggered for a cgroup when it hits K while staying below U, which makes
-    this setup impractical.
-
-    U != 0, K >= U:
-    Since kmem charges will also be fed to the user counter and reclaim will be
-    triggered for the cgroup for both kinds of memory. This setup gives the
-    admin a unified view of memory, and it is also useful for people who just
-    want to track kernel memory usage.
-
-3. User Interface
-
-3.0. Configuration
-
-a. Enable CONFIG_CGROUPS
-b. Enable CONFIG_MEMCG
-c. Enable CONFIG_MEMCG_SWAP (to use swap extension)
-d. Enable CONFIG_MEMCG_KMEM (to use kmem extension)
-
-3.1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
-# mount -t tmpfs none /sys/fs/cgroup
-# mkdir /sys/fs/cgroup/memory
-# mount -t cgroup none /sys/fs/cgroup/memory -o memory
-
-3.2. Make the new group and move bash into it
-# mkdir /sys/fs/cgroup/memory/0
-# echo $$ > /sys/fs/cgroup/memory/0/tasks
-
-Since now we're in the 0 cgroup, we can alter the memory limit:
-# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes
-
-NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
-mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.)
-
-NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
-NOTE: We cannot set limits on the root cgroup any more.
-
-# cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
-4194304
-
-We can check the usage:
-# cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes
-1216512
-
-A successful write to this file does not guarantee a successful setting of
-this limit to the value written into the file. This can be due to a
-number of factors, such as rounding up to page boundaries or the total
-availability of memory on the system. The user is required to re-read
-this file after a write to guarantee the value committed by the kernel.
-
-# echo 1 > memory.limit_in_bytes
-# cat memory.limit_in_bytes
-4096
-
-The memory.failcnt field gives the number of times that the cgroup limit was
-exceeded.
-
-The memory.stat file gives accounting information. Now, the number of
-caches, RSS and Active pages/Inactive pages are shown.
-
-4. Testing
-
-For testing features and implementation, see memcg_test.txt.
-
-Performance test is also important. To see pure memory controller's overhead,
-testing on tmpfs will give you good numbers of small overheads.
-Example: do kernel make on tmpfs.
-
-Page-fault scalability is also important. At measuring parallel
-page fault test, multi-process test may be better than multi-thread
-test because it has noise of shared objects/status.
-
-But the above two are testing extreme situations.
-Trying usual test under memory controller is always helpful.
-
-4.1 Troubleshooting
-
-Sometimes a user might find that the application under a cgroup is
-terminated by the OOM killer. There are several causes for this:
-
-1. The cgroup limit is too low (just too low to do anything useful)
-2. The user is using anonymous memory and swap is turned off or too low
-
-A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of
-some of the pages cached in the cgroup (page cache pages).
-
-To know what happens, disabling OOM_Kill as per "10. OOM Control" (below) and
-seeing what happens will be helpful.
-
-4.2 Task migration
-
-When a task migrates from one cgroup to another, its charge is not
-carried forward by default. The pages allocated from the original cgroup still
-remain charged to it, the charge is dropped when the page is freed or
-reclaimed.
-
-You can move charges of a task along with task migration.
-See 8. "Move charges at task migration"
-
-4.3 Removing a cgroup
-
-A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
-cgroup might have some charge associated with it, even though all
-tasks have migrated away from it. (because we charge against pages, not
-against tasks.)
-
-We move the stats to root (if use_hierarchy==0) or parent (if
-use_hierarchy==1), and no change on the charge except uncharging
-from the child.
-
-Charges recorded in swap information is not updated at removal of cgroup.
-Recorded information is discarded and a cgroup which uses swap (swapcache)
-will be charged as a new owner of it.
-
-About use_hierarchy, see Section 6.
-
-5. Misc. interfaces.
-
-5.1 force_empty
-  memory.force_empty interface is provided to make cgroup's memory usage empty.
-  When writing anything to this
-
-  # echo 0 > memory.force_empty
-
-  the cgroup will be reclaimed and as many pages reclaimed as possible.
-
-  The typical use case for this interface is before calling rmdir().
-  Though rmdir() offlines memcg, but the memcg may still stay there due to
-  charged file caches. Some out-of-use page caches may keep charged until
-  memory pressure happens. If you want to avoid that, force_empty will be useful.
-
-  Also, note that when memory.kmem.limit_in_bytes is set the charges due to
-  kernel pages will still be seen. This is not considered a failure and the
-  write will still return success. In this case, it is expected that
-  memory.kmem.usage_in_bytes == memory.usage_in_bytes.
-
-  About use_hierarchy, see Section 6.
-
-5.2 stat file
-
-memory.stat file includes following statistics
-
-# per-memory cgroup local status
-cache		- # of bytes of page cache memory.
-rss		- # of bytes of anonymous and swap cache memory (includes
-		transparent hugepages).
-rss_huge	- # of bytes of anonymous transparent hugepages.
-mapped_file	- # of bytes of mapped file (includes tmpfs/shmem)
-pgpgin		- # of charging events to the memory cgroup. The charging
-		event happens each time a page is accounted as either mapped
-		anon page(RSS) or cache page(Page Cache) to the cgroup.
-pgpgout		- # of uncharging events to the memory cgroup. The uncharging
-		event happens each time a page is unaccounted from the cgroup.
-swap		- # of bytes of swap usage
-dirty		- # of bytes that are waiting to get written back to the disk.
-writeback	- # of bytes of file/anon cache that are queued for syncing to
-		disk.
-inactive_anon	- # of bytes of anonymous and swap cache memory on inactive
-		LRU list.
-active_anon	- # of bytes of anonymous and swap cache memory on active
-		LRU list.
-inactive_file	- # of bytes of file-backed memory on inactive LRU list.
-active_file	- # of bytes of file-backed memory on active LRU list.
-unevictable	- # of bytes of memory that cannot be reclaimed (mlocked etc).
-
-# status considering hierarchy (see memory.use_hierarchy settings)
-
-hierarchical_memory_limit - # of bytes of memory limit with regard to hierarchy
-			under which the memory cgroup is
-hierarchical_memsw_limit - # of bytes of memory+swap limit with regard to
-			hierarchy under which memory cgroup is.
-
-total_<counter>		- # hierarchical version of <counter>, which in
-			addition to the cgroup's own value includes the
-			sum of all hierarchical children's values of
-			<counter>, i.e. total_cache
-
-# The following additional stats are dependent on CONFIG_DEBUG_VM.
-
-recent_rotated_anon	- VM internal parameter. (see mm/vmscan.c)
-recent_rotated_file	- VM internal parameter. (see mm/vmscan.c)
-recent_scanned_anon	- VM internal parameter. (see mm/vmscan.c)
-recent_scanned_file	- VM internal parameter. (see mm/vmscan.c)
-
-Memo:
-	recent_rotated means recent frequency of LRU rotation.
-	recent_scanned means recent # of scans to LRU.
-	showing for better debug please see the code for meanings.
-
-Note:
-	Only anonymous and swap cache memory is listed as part of 'rss' stat.
-	This should not be confused with the true 'resident set size' or the
-	amount of physical memory used by the cgroup.
-	'rss + mapped_file" will give you resident set size of cgroup.
-	(Note: file and shmem may be shared among other cgroups. In that case,
-	 mapped_file is accounted only when the memory cgroup is owner of page
-	 cache.)
-
-5.3 swappiness
-
-Overrides /proc/sys/vm/swappiness for the particular group. The tunable
-in the root cgroup corresponds to the global swappiness setting.
-
-Please note that unlike during the global reclaim, limit reclaim
-enforces that 0 swappiness really prevents from any swapping even if
-there is a swap storage available. This might lead to memcg OOM killer
-if there are no file pages to reclaim.
-
-5.4 failcnt
-
-A memory cgroup provides memory.failcnt and memory.memsw.failcnt files.
-This failcnt(== failure count) shows the number of times that a usage counter
-hit its limit. When a memory cgroup hits a limit, failcnt increases and
-memory under it will be reclaimed.
-
-You can reset failcnt by writing 0 to failcnt file.
-# echo 0 > .../memory.failcnt
-
-5.5 usage_in_bytes
-
-For efficiency, as other kernel components, memory cgroup uses some optimization
-to avoid unnecessary cacheline false sharing. usage_in_bytes is affected by the
-method and doesn't show 'exact' value of memory (and swap) usage, it's a fuzz
-value for efficient access. (Of course, when necessary, it's synchronized.)
-If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP)
-value in memory.stat(see 5.2).
-
-5.6 numa_stat
-
-This is similar to numa_maps but operates on a per-memcg basis.  This is
-useful for providing visibility into the numa locality information within
-an memcg since the pages are allowed to be allocated from any physical
-node.  One of the use cases is evaluating application performance by
-combining this information with the application's CPU allocation.
-
-Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable"
-per-node page counts including "hierarchical_<counter>" which sums up all
-hierarchical children's values in addition to the memcg's own value.
-
-The output format of memory.numa_stat is:
-
-total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
-file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
-anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
-unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
-hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
-
-The "total" count is sum of file + anon + unevictable.
-
-6. Hierarchy support
-
-The memory controller supports a deep hierarchy and hierarchical accounting.
-The hierarchy is created by creating the appropriate cgroups in the
-cgroup filesystem. Consider for example, the following cgroup filesystem
-hierarchy
-
-	       root
-	     /  |   \
-            /	|    \
-	   a	b     c
-		      | \
-		      |  \
-		      d   e
-
-In the diagram above, with hierarchical accounting enabled, all memory
-usage of e, is accounted to its ancestors up until the root (i.e, c and root),
-that has memory.use_hierarchy enabled. If one of the ancestors goes over its
-limit, the reclaim algorithm reclaims from the tasks in the ancestor and the
-children of the ancestor.
-
-6.1 Enabling hierarchical accounting and reclaim
-
-A memory cgroup by default disables the hierarchy feature. Support
-can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup
-
-# echo 1 > memory.use_hierarchy
-
-The feature can be disabled by
-
-# echo 0 > memory.use_hierarchy
-
-NOTE1: Enabling/disabling will fail if either the cgroup already has other
-       cgroups created below it, or if the parent cgroup has use_hierarchy
-       enabled.
-
-NOTE2: When panic_on_oom is set to "2", the whole system will panic in
-       case of an OOM event in any cgroup.
-
-7. Soft limits
-
-Soft limits allow for greater sharing of memory. The idea behind soft limits
-is to allow control groups to use as much of the memory as needed, provided
-
-a. There is no memory contention
-b. They do not exceed their hard limit
-
-When the system detects memory contention or low memory, control groups
-are pushed back to their soft limits. If the soft limit of each control
-group is very high, they are pushed back as much as possible to make
-sure that one control group does not starve the others of memory.
-
-Please note that soft limits is a best-effort feature; it comes with
-no guarantees, but it does its best to make sure that when memory is
-heavily contended for, memory is allocated based on the soft limit
-hints/setup. Currently soft limit based reclaim is set up such that
-it gets invoked from balance_pgdat (kswapd).
-
-7.1 Interface
-
-Soft limits can be setup by using the following commands (in this example we
-assume a soft limit of 256 MiB)
-
-# echo 256M > memory.soft_limit_in_bytes
-
-If we want to change this to 1G, we can at any time use
-
-# echo 1G > memory.soft_limit_in_bytes
-
-NOTE1: Soft limits take effect over a long period of time, since they involve
-       reclaiming memory for balancing between memory cgroups
-NOTE2: It is recommended to set the soft limit always below the hard limit,
-       otherwise the hard limit will take precedence.
-
-8. Move charges at task migration
-
-Users can move charges associated with a task along with task migration, that
-is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
-This feature is not supported in !CONFIG_MMU environments because of lack of
-page tables.
-
-8.1 Interface
-
-This feature is disabled by default. It can be enabled (and disabled again) by
-writing to memory.move_charge_at_immigrate of the destination cgroup.
-
-If you want to enable it:
-
-# echo (some positive value) > memory.move_charge_at_immigrate
-
-Note: Each bits of move_charge_at_immigrate has its own meaning about what type
-      of charges should be moved. See 8.2 for details.
-Note: Charges are moved only when you move mm->owner, in other words,
-      a leader of a thread group.
-Note: If we cannot find enough space for the task in the destination cgroup, we
-      try to make space by reclaiming memory. Task migration may fail if we
-      cannot make enough space.
-Note: It can take several seconds if you move charges much.
-
-And if you want disable it again:
-
-# echo 0 > memory.move_charge_at_immigrate
-
-8.2 Type of charges which can be moved
-
-Each bit in move_charge_at_immigrate has its own meaning about what type of
-charges should be moved. But in any case, it must be noted that an account of
-a page or a swap can be moved only when it is charged to the task's current
-(old) memory cgroup.
-
-  bit | what type of charges would be moved ?
- -----+------------------------------------------------------------------------
-   0  | A charge of an anonymous page (or swap of it) used by the target task.
-      | You must enable Swap Extension (see 2.4) to enable move of swap charges.
- -----+------------------------------------------------------------------------
-   1  | A charge of file pages (normal file, tmpfs file (e.g. ipc shared memory)
-      | and swaps of tmpfs file) mmapped by the target task. Unlike the case of
-      | anonymous pages, file pages (and swaps) in the range mmapped by the task
-      | will be moved even if the task hasn't done page fault, i.e. they might
-      | not be the task's "RSS", but other task's "RSS" that maps the same file.
-      | And mapcount of the page is ignored (the page can be moved even if
-      | page_mapcount(page) > 1). You must enable Swap Extension (see 2.4) to
-      | enable move of swap charges.
-
-8.3 TODO
-
-- All of moving charge operations are done under cgroup_mutex. It's not good
-  behavior to hold the mutex too long, so we may need some trick.
-
-9. Memory thresholds
-
-Memory cgroup implements memory thresholds using the cgroups notification
-API (see cgroups.txt). It allows to register multiple memory and memsw
-thresholds and gets notifications when it crosses.
-
-To register a threshold, an application must:
-- create an eventfd using eventfd(2);
-- open memory.usage_in_bytes or memory.memsw.usage_in_bytes;
-- write string like "<event_fd> <fd of memory.usage_in_bytes> <threshold>" to
-  cgroup.event_control.
-
-Application will be notified through eventfd when memory usage crosses
-threshold in any direction.
-
-It's applicable for root and non-root cgroup.
-
-10. OOM Control
-
-memory.oom_control file is for OOM notification and other controls.
-
-Memory cgroup implements OOM notifier using the cgroup notification
-API (See cgroups.txt). It allows to register multiple OOM notification
-delivery and gets notification when OOM happens.
-
-To register a notifier, an application must:
- - create an eventfd using eventfd(2)
- - open memory.oom_control file
- - write string like "<event_fd> <fd of memory.oom_control>" to
-   cgroup.event_control
-
-The application will be notified through eventfd when OOM happens.
-OOM notification doesn't work for the root cgroup.
-
-You can disable the OOM-killer by writing "1" to memory.oom_control file, as:
-
-	#echo 1 > memory.oom_control
-
-If OOM-killer is disabled, tasks under cgroup will hang/sleep
-in memory cgroup's OOM-waitqueue when they request accountable memory.
-
-For running them, you have to relax the memory cgroup's OOM status by
-	* enlarge limit or reduce usage.
-To reduce usage,
-	* kill some tasks.
-	* move some tasks to other group with account migration.
-	* remove some files (on tmpfs?)
-
-Then, stopped tasks will work again.
-
-At reading, current status of OOM is shown.
-	oom_kill_disable 0 or 1 (if 1, oom-killer is disabled)
-	under_oom	 0 or 1 (if 1, the memory cgroup is under OOM, tasks may
-				 be stopped.)
-
-11. Memory Pressure
-
-The pressure level notifications can be used to monitor the memory
-allocation cost; based on the pressure, applications can implement
-different strategies of managing their memory resources. The pressure
-levels are defined as following:
-
-The "low" level means that the system is reclaiming memory for new
-allocations. Monitoring this reclaiming activity might be useful for
-maintaining cache level. Upon notification, the program (typically
-"Activity Manager") might analyze vmstat and act in advance (i.e.
-prematurely shutdown unimportant services).
-
-The "medium" level means that the system is experiencing medium memory
-pressure, the system might be making swap, paging out active file caches,
-etc. Upon this event applications may decide to further analyze
-vmstat/zoneinfo/memcg or internal memory usage statistics and free any
-resources that can be easily reconstructed or re-read from a disk.
-
-The "critical" level means that the system is actively thrashing, it is
-about to out of memory (OOM) or even the in-kernel OOM killer is on its
-way to trigger. Applications should do whatever they can to help the
-system. It might be too late to consult with vmstat or any other
-statistics, so it's advisable to take an immediate action.
-
-By default, events are propagated upward until the event is handled, i.e. the
-events are not pass-through. For example, you have three cgroups: A->B->C. Now
-you set up an event listener on cgroups A, B and C, and suppose group C
-experiences some pressure. In this situation, only group C will receive the
-notification, i.e. groups A and B will not receive it. This is done to avoid
-excessive "broadcasting" of messages, which disturbs the system and which is
-especially bad if we are low on memory or thrashing. Group B, will receive
-notification only if there are no event listers for group C.
-
-There are three optional modes that specify different propagation behavior:
-
- - "default": this is the default behavior specified above. This mode is the
-   same as omitting the optional mode parameter, preserved by backwards
-   compatibility.
-
- - "hierarchy": events always propagate up to the root, similar to the default
-   behavior, except that propagation continues regardless of whether there are
-   event listeners at each level, with the "hierarchy" mode. In the above
-   example, groups A, B, and C will receive notification of memory pressure.
-
- - "local": events are pass-through, i.e. they only receive notifications when
-   memory pressure is experienced in the memcg for which the notification is
-   registered. In the above example, group C will receive notification if
-   registered for "local" notification and the group experiences memory
-   pressure. However, group B will never receive notification, regardless if
-   there is an event listener for group C or not, if group B is registered for
-   local notification.
-
-The level and event notification mode ("hierarchy" or "local", if necessary) are
-specified by a comma-delimited string, i.e. "low,hierarchy" specifies
-hierarchical, pass-through, notification for all ancestor memcgs. Notification
-that is the default, non pass-through behavior, does not specify a mode.
-"medium,local" specifies pass-through notification for the medium level.
-
-The file memory.pressure_level is only used to setup an eventfd. To
-register a notification, an application must:
-
-- create an eventfd using eventfd(2);
-- open memory.pressure_level;
-- write string as "<event_fd> <fd of memory.pressure_level> <level[,mode]>"
-  to cgroup.event_control.
-
-Application will be notified through eventfd when memory pressure is at
-the specific level (or higher). Read/write operations to
-memory.pressure_level are no implemented.
-
-Test:
-
-   Here is a small script example that makes a new cgroup, sets up a
-   memory limit, sets up a notification in the cgroup and then makes child
-   cgroup experience a critical pressure:
-
-   # cd /sys/fs/cgroup/memory/
-   # mkdir foo
-   # cd foo
-   # cgroup_event_listener memory.pressure_level low,hierarchy &
-   # echo 8000000 > memory.limit_in_bytes
-   # echo 8000000 > memory.memsw.limit_in_bytes
-   # echo $$ > tasks
-   # dd if=/dev/zero | read x
-
-   (Expect a bunch of notifications, and eventually, the oom-killer will
-   trigger.)
-
-12. TODO
-
-1. Make per-cgroup scanner reclaim not-shared pages first
-2. Teach controller to account for shared-pages
-3. Start reclamation in the background when the limit is
-   not yet hit but the usage is getting closer
-
-Summary
-
-Overall, the memory controller has been a stable controller and has been
-commented and discussed quite extensively in the community.
-
-References
-
-1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/
-2. Singh, Balbir. Memory Controller (RSS Control),
-   http://lwn.net/Articles/222762/
-3. Emelianov, Pavel. Resource controllers based on process cgroups
-   http://lkml.org/lkml/2007/3/6/198
-4. Emelianov, Pavel. RSS controller based on process cgroups (v2)
-   http://lkml.org/lkml/2007/4/9/78
-5. Emelianov, Pavel. RSS controller based on process cgroups (v3)
-   http://lkml.org/lkml/2007/5/30/244
-6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/
-7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control
-   subsystem (v3), http://lwn.net/Articles/235534/
-8. Singh, Balbir. RSS controller v2 test results (lmbench),
-   http://lkml.org/lkml/2007/5/17/232
-9. Singh, Balbir. RSS controller v2 AIM9 results
-   http://lkml.org/lkml/2007/5/18/1
-10. Singh, Balbir. Memory controller v6 test results,
-    http://lkml.org/lkml/2007/8/19/36
-11. Singh, Balbir. Memory controller introduction (v6),
-    http://lkml.org/lkml/2007/8/17/69
-12. Corbet, Jonathan, Controlling memory use in cgroups,
-    http://lwn.net/Articles/243795/
diff --git a/Documentation/cgroup-v1/net_cls.rst b/Documentation/cgroup-v1/net_cls.rst
new file mode 100644
index 000000000000..a2cf272af7a0
--- /dev/null
+++ b/Documentation/cgroup-v1/net_cls.rst
@@ -0,0 +1,44 @@
+=========================
+Network classifier cgroup
+=========================
+
+The Network classifier cgroup provides an interface to
+tag network packets with a class identifier (classid).
+
+The Traffic Controller (tc) can be used to assign
+different priorities to packets from different cgroups.
+Also, Netfilter (iptables) can use this tag to perform
+actions on such packets.
+
+Creating a net_cls cgroups instance creates a net_cls.classid file.
+This net_cls.classid value is initialized to 0.
+
+You can write hexadecimal values to net_cls.classid; the format for these
+values is 0xAAAABBBB; AAAA is the major handle number and BBBB
+is the minor handle number.
+Reading net_cls.classid yields a decimal result.
+
+Example::
+
+	mkdir /sys/fs/cgroup/net_cls
+	mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls
+	mkdir /sys/fs/cgroup/net_cls/0
+	echo 0x100001 >  /sys/fs/cgroup/net_cls/0/net_cls.classid
+
+- setting a 10:1 handle::
+
+	cat /sys/fs/cgroup/net_cls/0/net_cls.classid
+	1048577
+
+- configuring tc::
+
+	tc qdisc add dev eth0 root handle 10: htb
+	tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit
+
+- creating traffic class 10:1::
+
+	tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup
+
+configuring iptables, basic example::
+
+	iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP
diff --git a/Documentation/cgroup-v1/net_cls.txt b/Documentation/cgroup-v1/net_cls.txt
deleted file mode 100644
index ec182346dea2..000000000000
--- a/Documentation/cgroup-v1/net_cls.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Network classifier cgroup
--------------------------
-
-The Network classifier cgroup provides an interface to
-tag network packets with a class identifier (classid).
-
-The Traffic Controller (tc) can be used to assign
-different priorities to packets from different cgroups.
-Also, Netfilter (iptables) can use this tag to perform
-actions on such packets.
-
-Creating a net_cls cgroups instance creates a net_cls.classid file.
-This net_cls.classid value is initialized to 0.
-
-You can write hexadecimal values to net_cls.classid; the format for these
-values is 0xAAAABBBB; AAAA is the major handle number and BBBB
-is the minor handle number.
-Reading net_cls.classid yields a decimal result.
-
-Example:
-mkdir /sys/fs/cgroup/net_cls
-mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls
-mkdir /sys/fs/cgroup/net_cls/0
-echo 0x100001 >  /sys/fs/cgroup/net_cls/0/net_cls.classid
-	- setting a 10:1 handle.
-
-cat /sys/fs/cgroup/net_cls/0/net_cls.classid
-1048577
-
-configuring tc:
-tc qdisc add dev eth0 root handle 10: htb
-
-tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit
- - creating traffic class 10:1
-
-tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup
-
-configuring iptables, basic example:
-iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP
diff --git a/Documentation/cgroup-v1/net_prio.rst b/Documentation/cgroup-v1/net_prio.rst
new file mode 100644
index 000000000000..b40905871c64
--- /dev/null
+++ b/Documentation/cgroup-v1/net_prio.rst
@@ -0,0 +1,57 @@
+=======================
+Network priority cgroup
+=======================
+
+The Network priority cgroup provides an interface to allow an administrator to
+dynamically set the priority of network traffic generated by various
+applications
+
+Nominally, an application would set the priority of its traffic via the
+SO_PRIORITY socket option.  This however, is not always possible because:
+
+1) The application may not have been coded to set this value
+2) The priority of application traffic is often a site-specific administrative
+   decision rather than an application defined one.
+
+This cgroup allows an administrator to assign a process to a group which defines
+the priority of egress traffic on a given interface. Network priority groups can
+be created by first mounting the cgroup filesystem::
+
+	# mount -t cgroup -onet_prio none /sys/fs/cgroup/net_prio
+
+With the above step, the initial group acting as the parent accounting group
+becomes visible at '/sys/fs/cgroup/net_prio'.  This group includes all tasks in
+the system. '/sys/fs/cgroup/net_prio/tasks' lists the tasks in this cgroup.
+
+Each net_prio cgroup contains two files that are subsystem specific
+
+net_prio.prioidx
+  This file is read-only, and is simply informative.  It contains a unique
+  integer value that the kernel uses as an internal representation of this
+  cgroup.
+
+net_prio.ifpriomap
+  This file contains a map of the priorities assigned to traffic originating
+  from processes in this group and egressing the system on various interfaces.
+  It contains a list of tuples in the form <ifname priority>.  Contents of this
+  file can be modified by echoing a string into the file using the same tuple
+  format. For example::
+
+	echo "eth0 5" > /sys/fs/cgroups/net_prio/iscsi/net_prio.ifpriomap
+
+This command would force any traffic originating from processes belonging to the
+iscsi net_prio cgroup and egressing on interface eth0 to have the priority of
+said traffic set to the value 5. The parent accounting group also has a
+writeable 'net_prio.ifpriomap' file that can be used to set a system default
+priority.
+
+Priorities are set immediately prior to queueing a frame to the device
+queueing discipline (qdisc) so priorities will be assigned prior to the hardware
+queue selection being made.
+
+One usage for the net_prio cgroup is with mqprio qdisc allowing application
+traffic to be steered to hardware/driver based traffic classes. These mappings
+can then be managed by administrators or other networking protocols such as
+DCBX.
+
+A new net_prio cgroup inherits the parent's configuration.
diff --git a/Documentation/cgroup-v1/net_prio.txt b/Documentation/cgroup-v1/net_prio.txt
deleted file mode 100644
index a82cbd28ea8a..000000000000
--- a/Documentation/cgroup-v1/net_prio.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-Network priority cgroup
--------------------------
-
-The Network priority cgroup provides an interface to allow an administrator to
-dynamically set the priority of network traffic generated by various
-applications
-
-Nominally, an application would set the priority of its traffic via the
-SO_PRIORITY socket option.  This however, is not always possible because:
-
-1) The application may not have been coded to set this value
-2) The priority of application traffic is often a site-specific administrative
-   decision rather than an application defined one.
-
-This cgroup allows an administrator to assign a process to a group which defines
-the priority of egress traffic on a given interface. Network priority groups can
-be created by first mounting the cgroup filesystem.
-
-# mount -t cgroup -onet_prio none /sys/fs/cgroup/net_prio
-
-With the above step, the initial group acting as the parent accounting group
-becomes visible at '/sys/fs/cgroup/net_prio'.  This group includes all tasks in
-the system. '/sys/fs/cgroup/net_prio/tasks' lists the tasks in this cgroup.
-
-Each net_prio cgroup contains two files that are subsystem specific
-
-net_prio.prioidx
-This file is read-only, and is simply informative.  It contains a unique integer
-value that the kernel uses as an internal representation of this cgroup.
-
-net_prio.ifpriomap
-This file contains a map of the priorities assigned to traffic originating from
-processes in this group and egressing the system on various interfaces. It
-contains a list of tuples in the form <ifname priority>.  Contents of this file
-can be modified by echoing a string into the file using the same tuple format.
-for example:
-
-echo "eth0 5" > /sys/fs/cgroups/net_prio/iscsi/net_prio.ifpriomap
-
-This command would force any traffic originating from processes belonging to the
-iscsi net_prio cgroup and egressing on interface eth0 to have the priority of
-said traffic set to the value 5. The parent accounting group also has a
-writeable 'net_prio.ifpriomap' file that can be used to set a system default
-priority.
-
-Priorities are set immediately prior to queueing a frame to the device
-queueing discipline (qdisc) so priorities will be assigned prior to the hardware
-queue selection being made.
-
-One usage for the net_prio cgroup is with mqprio qdisc allowing application
-traffic to be steered to hardware/driver based traffic classes. These mappings
-can then be managed by administrators or other networking protocols such as
-DCBX.
-
-A new net_prio cgroup inherits the parent's configuration.
diff --git a/Documentation/cgroup-v1/pids.rst b/Documentation/cgroup-v1/pids.rst
new file mode 100644
index 000000000000..6acebd9e72c8
--- /dev/null
+++ b/Documentation/cgroup-v1/pids.rst
@@ -0,0 +1,92 @@
+=========================
+Process Number Controller
+=========================
+
+Abstract
+--------
+
+The process number controller is used to allow a cgroup hierarchy to stop any
+new tasks from being fork()'d or clone()'d after a certain limit is reached.
+
+Since it is trivial to hit the task limit without hitting any kmemcg limits in
+place, PIDs are a fundamental resource. As such, PID exhaustion must be
+preventable in the scope of a cgroup hierarchy by allowing resource limiting of
+the number of tasks in a cgroup.
+
+Usage
+-----
+
+In order to use the `pids` controller, set the maximum number of tasks in
+pids.max (this is not available in the root cgroup for obvious reasons). The
+number of processes currently in the cgroup is given by pids.current.
+
+Organisational operations are not blocked by cgroup policies, so it is possible
+to have pids.current > pids.max. This can be done by either setting the limit to
+be smaller than pids.current, or attaching enough processes to the cgroup such
+that pids.current > pids.max. However, it is not possible to violate a cgroup
+policy through fork() or clone(). fork() and clone() will return -EAGAIN if the
+creation of a new process would cause a cgroup policy to be violated.
+
+To set a cgroup to have no limit, set pids.max to "max". This is the default for
+all new cgroups (N.B. that PID limits are hierarchical, so the most stringent
+limit in the hierarchy is followed).
+
+pids.current tracks all child cgroup hierarchies, so parent/pids.current is a
+superset of parent/child/pids.current.
+
+The pids.events file contains event counters:
+
+  - max: Number of times fork failed because limit was hit.
+
+Example
+-------
+
+First, we mount the pids controller::
+
+	# mkdir -p /sys/fs/cgroup/pids
+	# mount -t cgroup -o pids none /sys/fs/cgroup/pids
+
+Then we create a hierarchy, set limits and attach processes to it::
+
+	# mkdir -p /sys/fs/cgroup/pids/parent/child
+	# echo 2 > /sys/fs/cgroup/pids/parent/pids.max
+	# echo $$ > /sys/fs/cgroup/pids/parent/cgroup.procs
+	# cat /sys/fs/cgroup/pids/parent/pids.current
+	2
+	#
+
+It should be noted that attempts to overcome the set limit (2 in this case) will
+fail::
+
+	# cat /sys/fs/cgroup/pids/parent/pids.current
+	2
+	# ( /bin/echo "Here's some processes for you." | cat )
+	sh: fork: Resource temporary unavailable
+	#
+
+Even if we migrate to a child cgroup (which doesn't have a set limit), we will
+not be able to overcome the most stringent limit in the hierarchy (in this case,
+parent's)::
+
+	# echo $$ > /sys/fs/cgroup/pids/parent/child/cgroup.procs
+	# cat /sys/fs/cgroup/pids/parent/pids.current
+	2
+	# cat /sys/fs/cgroup/pids/parent/child/pids.current
+	2
+	# cat /sys/fs/cgroup/pids/parent/child/pids.max
+	max
+	# ( /bin/echo "Here's some processes for you." | cat )
+	sh: fork: Resource temporary unavailable
+	#
+
+We can set a limit that is smaller than pids.current, which will stop any new
+processes from being forked at all (note that the shell itself counts towards
+pids.current)::
+
+	# echo 1 > /sys/fs/cgroup/pids/parent/pids.max
+	# /bin/echo "We can't even spawn a single process now."
+	sh: fork: Resource temporary unavailable
+	# echo 0 > /sys/fs/cgroup/pids/parent/pids.max
+	# /bin/echo "We can't even spawn a single process now."
+	sh: fork: Resource temporary unavailable
+	#
diff --git a/Documentation/cgroup-v1/pids.txt b/Documentation/cgroup-v1/pids.txt
deleted file mode 100644
index e105d708ccde..000000000000
--- a/Documentation/cgroup-v1/pids.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-						   Process Number Controller
-						   =========================
-
-Abstract
---------
-
-The process number controller is used to allow a cgroup hierarchy to stop any
-new tasks from being fork()'d or clone()'d after a certain limit is reached.
-
-Since it is trivial to hit the task limit without hitting any kmemcg limits in
-place, PIDs are a fundamental resource. As such, PID exhaustion must be
-preventable in the scope of a cgroup hierarchy by allowing resource limiting of
-the number of tasks in a cgroup.
-
-Usage
------
-
-In order to use the `pids` controller, set the maximum number of tasks in
-pids.max (this is not available in the root cgroup for obvious reasons). The
-number of processes currently in the cgroup is given by pids.current.
-
-Organisational operations are not blocked by cgroup policies, so it is possible
-to have pids.current > pids.max. This can be done by either setting the limit to
-be smaller than pids.current, or attaching enough processes to the cgroup such
-that pids.current > pids.max. However, it is not possible to violate a cgroup
-policy through fork() or clone(). fork() and clone() will return -EAGAIN if the
-creation of a new process would cause a cgroup policy to be violated.
-
-To set a cgroup to have no limit, set pids.max to "max". This is the default for
-all new cgroups (N.B. that PID limits are hierarchical, so the most stringent
-limit in the hierarchy is followed).
-
-pids.current tracks all child cgroup hierarchies, so parent/pids.current is a
-superset of parent/child/pids.current.
-
-The pids.events file contains event counters:
-  - max: Number of times fork failed because limit was hit.
-
-Example
--------
-
-First, we mount the pids controller:
-# mkdir -p /sys/fs/cgroup/pids
-# mount -t cgroup -o pids none /sys/fs/cgroup/pids
-
-Then we create a hierarchy, set limits and attach processes to it:
-# mkdir -p /sys/fs/cgroup/pids/parent/child
-# echo 2 > /sys/fs/cgroup/pids/parent/pids.max
-# echo $$ > /sys/fs/cgroup/pids/parent/cgroup.procs
-# cat /sys/fs/cgroup/pids/parent/pids.current
-2
-#
-
-It should be noted that attempts to overcome the set limit (2 in this case) will
-fail:
-
-# cat /sys/fs/cgroup/pids/parent/pids.current
-2
-# ( /bin/echo "Here's some processes for you." | cat )
-sh: fork: Resource temporary unavailable
-#
-
-Even if we migrate to a child cgroup (which doesn't have a set limit), we will
-not be able to overcome the most stringent limit in the hierarchy (in this case,
-parent's):
-
-# echo $$ > /sys/fs/cgroup/pids/parent/child/cgroup.procs
-# cat /sys/fs/cgroup/pids/parent/pids.current
-2
-# cat /sys/fs/cgroup/pids/parent/child/pids.current
-2
-# cat /sys/fs/cgroup/pids/parent/child/pids.max
-max
-# ( /bin/echo "Here's some processes for you." | cat )
-sh: fork: Resource temporary unavailable
-#
-
-We can set a limit that is smaller than pids.current, which will stop any new
-processes from being forked at all (note that the shell itself counts towards
-pids.current):
-
-# echo 1 > /sys/fs/cgroup/pids/parent/pids.max
-# /bin/echo "We can't even spawn a single process now."
-sh: fork: Resource temporary unavailable
-# echo 0 > /sys/fs/cgroup/pids/parent/pids.max
-# /bin/echo "We can't even spawn a single process now."
-sh: fork: Resource temporary unavailable
-#
diff --git a/Documentation/cgroup-v1/rdma.rst b/Documentation/cgroup-v1/rdma.rst
new file mode 100644
index 000000000000..2fcb0a9bf790
--- /dev/null
+++ b/Documentation/cgroup-v1/rdma.rst
@@ -0,0 +1,117 @@
+===============
+RDMA Controller
+===============
+
+.. Contents
+
+   1. Overview
+     1-1. What is RDMA controller?
+     1-2. Why RDMA controller needed?
+     1-3. How is RDMA controller implemented?
+   2. Usage Examples
+
+1. Overview
+===========
+
+1-1. What is RDMA controller?
+-----------------------------
+
+RDMA controller allows user to limit RDMA/IB specific resources that a given
+set of processes can use. These processes are grouped using RDMA controller.
+
+RDMA controller defines two resources which can be limited for processes of a
+cgroup.
+
+1-2. Why RDMA controller needed?
+--------------------------------
+
+Currently user space applications can easily take away all the rdma verb
+specific resources such as AH, CQ, QP, MR etc. Due to which other applications
+in other cgroup or kernel space ULPs may not even get chance to allocate any
+rdma resources. This can lead to service unavailability.
+
+Therefore RDMA controller is needed through which resource consumption
+of processes can be limited. Through this controller different rdma
+resources can be accounted.
+
+1-3. How is RDMA controller implemented?
+----------------------------------------
+
+RDMA cgroup allows limit configuration of resources. Rdma cgroup maintains
+resource accounting per cgroup, per device using resource pool structure.
+Each such resource pool is limited up to 64 resources in given resource pool
+by rdma cgroup, which can be extended later if required.
+
+This resource pool object is linked to the cgroup css. Typically there
+are 0 to 4 resource pool instances per cgroup, per device in most use cases.
+But nothing limits to have it more. At present hundreds of RDMA devices per
+single cgroup may not be handled optimally, however there is no
+known use case or requirement for such configuration either.
+
+Since RDMA resources can be allocated from any process and can be freed by any
+of the child processes which shares the address space, rdma resources are
+always owned by the creator cgroup css. This allows process migration from one
+to other cgroup without major complexity of transferring resource ownership;
+because such ownership is not really present due to shared nature of
+rdma resources. Linking resources around css also ensures that cgroups can be
+deleted after processes migrated. This allow progress migration as well with
+active resources, even though that is not a primary use case.
+
+Whenever RDMA resource charging occurs, owner rdma cgroup is returned to
+the caller. Same rdma cgroup should be passed while uncharging the resource.
+This also allows process migrated with active RDMA resource to charge
+to new owner cgroup for new resource. It also allows to uncharge resource of
+a process from previously charged cgroup which is migrated to new cgroup,
+even though that is not a primary use case.
+
+Resource pool object is created in following situations.
+(a) User sets the limit and no previous resource pool exist for the device
+of interest for the cgroup.
+(b) No resource limits were configured, but IB/RDMA stack tries to
+charge the resource. So that it correctly uncharge them when applications are
+running without limits and later on when limits are enforced during uncharging,
+otherwise usage count will drop to negative.
+
+Resource pool is destroyed if all the resource limits are set to max and
+it is the last resource getting deallocated.
+
+User should set all the limit to max value if it intents to remove/unconfigure
+the resource pool for a particular device.
+
+IB stack honors limits enforced by the rdma controller. When application
+query about maximum resource limits of IB device, it returns minimum of
+what is configured by user for a given cgroup and what is supported by
+IB device.
+
+Following resources can be accounted by rdma controller.
+
+  ==========    =============================
+  hca_handle	Maximum number of HCA Handles
+  hca_object 	Maximum number of HCA Objects
+  ==========    =============================
+
+2. Usage Examples
+=================
+
+(a) Configure resource limit::
+
+	echo mlx4_0 hca_handle=2 hca_object=2000 > /sys/fs/cgroup/rdma/1/rdma.max
+	echo ocrdma1 hca_handle=3 > /sys/fs/cgroup/rdma/2/rdma.max
+
+(b) Query resource limit::
+
+	cat /sys/fs/cgroup/rdma/2/rdma.max
+	#Output:
+	mlx4_0 hca_handle=2 hca_object=2000
+	ocrdma1 hca_handle=3 hca_object=max
+
+(c) Query current usage::
+
+	cat /sys/fs/cgroup/rdma/2/rdma.current
+	#Output:
+	mlx4_0 hca_handle=1 hca_object=20
+	ocrdma1 hca_handle=1 hca_object=23
+
+(d) Delete resource limit::
+
+	echo echo mlx4_0 hca_handle=max hca_object=max > /sys/fs/cgroup/rdma/1/rdma.max
diff --git a/Documentation/cgroup-v1/rdma.txt b/Documentation/cgroup-v1/rdma.txt
deleted file mode 100644
index 9bdb7fd03f83..000000000000
--- a/Documentation/cgroup-v1/rdma.txt
+++ /dev/null
@@ -1,109 +0,0 @@
-				RDMA Controller
-				----------------
-
-Contents
---------
-
-1. Overview
-  1-1. What is RDMA controller?
-  1-2. Why RDMA controller needed?
-  1-3. How is RDMA controller implemented?
-2. Usage Examples
-
-1. Overview
-
-1-1. What is RDMA controller?
------------------------------
-
-RDMA controller allows user to limit RDMA/IB specific resources that a given
-set of processes can use. These processes are grouped using RDMA controller.
-
-RDMA controller defines two resources which can be limited for processes of a
-cgroup.
-
-1-2. Why RDMA controller needed?
---------------------------------
-
-Currently user space applications can easily take away all the rdma verb
-specific resources such as AH, CQ, QP, MR etc. Due to which other applications
-in other cgroup or kernel space ULPs may not even get chance to allocate any
-rdma resources. This can lead to service unavailability.
-
-Therefore RDMA controller is needed through which resource consumption
-of processes can be limited. Through this controller different rdma
-resources can be accounted.
-
-1-3. How is RDMA controller implemented?
-----------------------------------------
-
-RDMA cgroup allows limit configuration of resources. Rdma cgroup maintains
-resource accounting per cgroup, per device using resource pool structure.
-Each such resource pool is limited up to 64 resources in given resource pool
-by rdma cgroup, which can be extended later if required.
-
-This resource pool object is linked to the cgroup css. Typically there
-are 0 to 4 resource pool instances per cgroup, per device in most use cases.
-But nothing limits to have it more. At present hundreds of RDMA devices per
-single cgroup may not be handled optimally, however there is no
-known use case or requirement for such configuration either.
-
-Since RDMA resources can be allocated from any process and can be freed by any
-of the child processes which shares the address space, rdma resources are
-always owned by the creator cgroup css. This allows process migration from one
-to other cgroup without major complexity of transferring resource ownership;
-because such ownership is not really present due to shared nature of
-rdma resources. Linking resources around css also ensures that cgroups can be
-deleted after processes migrated. This allow progress migration as well with
-active resources, even though that is not a primary use case.
-
-Whenever RDMA resource charging occurs, owner rdma cgroup is returned to
-the caller. Same rdma cgroup should be passed while uncharging the resource.
-This also allows process migrated with active RDMA resource to charge
-to new owner cgroup for new resource. It also allows to uncharge resource of
-a process from previously charged cgroup which is migrated to new cgroup,
-even though that is not a primary use case.
-
-Resource pool object is created in following situations.
-(a) User sets the limit and no previous resource pool exist for the device
-of interest for the cgroup.
-(b) No resource limits were configured, but IB/RDMA stack tries to
-charge the resource. So that it correctly uncharge them when applications are
-running without limits and later on when limits are enforced during uncharging,
-otherwise usage count will drop to negative.
-
-Resource pool is destroyed if all the resource limits are set to max and
-it is the last resource getting deallocated.
-
-User should set all the limit to max value if it intents to remove/unconfigure
-the resource pool for a particular device.
-
-IB stack honors limits enforced by the rdma controller. When application
-query about maximum resource limits of IB device, it returns minimum of
-what is configured by user for a given cgroup and what is supported by
-IB device.
-
-Following resources can be accounted by rdma controller.
-  hca_handle	Maximum number of HCA Handles
-  hca_object 	Maximum number of HCA Objects
-
-2. Usage Examples
------------------
-
-(a) Configure resource limit:
-echo mlx4_0 hca_handle=2 hca_object=2000 > /sys/fs/cgroup/rdma/1/rdma.max
-echo ocrdma1 hca_handle=3 > /sys/fs/cgroup/rdma/2/rdma.max
-
-(b) Query resource limit:
-cat /sys/fs/cgroup/rdma/2/rdma.max
-#Output:
-mlx4_0 hca_handle=2 hca_object=2000
-ocrdma1 hca_handle=3 hca_object=max
-
-(c) Query current usage:
-cat /sys/fs/cgroup/rdma/2/rdma.current
-#Output:
-mlx4_0 hca_handle=1 hca_object=20
-ocrdma1 hca_handle=1 hca_object=23
-
-(d) Delete resource limit:
-echo echo mlx4_0 hca_handle=max hca_object=max > /sys/fs/cgroup/rdma/1/rdma.max
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index d06e9a59a9f4..cad797a8a39e 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -98,7 +98,7 @@ A memory policy with a valid NodeList will be saved, as specified, for
 use at file creation time.  When a task allocates a file in the file
 system, the mount option memory policy will be applied with a NodeList,
 if any, modified by the calling task's cpuset constraints
-[See Documentation/cgroup-v1/cpusets.txt] and any optional flags, listed
+[See Documentation/cgroup-v1/cpusets.rst] and any optional flags, listed
 below.  If the resulting NodeLists is the empty set, the effective memory
 policy for the file will revert to "default" policy.
 
diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.txt
index b14e03ff3528..a7514343b660 100644
--- a/Documentation/scheduler/sched-deadline.txt
+++ b/Documentation/scheduler/sched-deadline.txt
@@ -652,7 +652,7 @@ CONTENTS
 
  -deadline tasks cannot have an affinity mask smaller that the entire
  root_domain they are created on. However, affinities can be specified
- through the cpuset facility (Documentation/cgroup-v1/cpusets.txt).
+ through the cpuset facility (Documentation/cgroup-v1/cpusets.rst).
 
 5.1 SCHED_DEADLINE and cpusets HOWTO
 ------------------------------------
diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt
index edd861c94c1b..d1328890ef28 100644
--- a/Documentation/scheduler/sched-design-CFS.txt
+++ b/Documentation/scheduler/sched-design-CFS.txt
@@ -215,7 +215,7 @@ SCHED_BATCH) tasks.
 
    These options need CONFIG_CGROUPS to be defined, and let the administrator
    create arbitrary groups of tasks, using the "cgroup" pseudo filesystem.  See
-   Documentation/cgroup-v1/cgroups.txt for more information about this filesystem.
+   Documentation/cgroup-v1/cgroups.rst for more information about this filesystem.
 
 When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each
 group created using the pseudo filesystem.  See example steps below to create
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
index d8fce3e78457..c09f7a3fee66 100644
--- a/Documentation/scheduler/sched-rt-group.txt
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -133,7 +133,7 @@ This uses the cgroup virtual file system and "<cgroup>/cpu.rt_runtime_us"
 to control the CPU time reserved for each control group.
 
 For more information on working with control groups, you should read
-Documentation/cgroup-v1/cgroups.txt as well.
+Documentation/cgroup-v1/cgroups.rst as well.
 
 Group settings are checked against the following limits in order to keep the
 configuration schedulable:
diff --git a/Documentation/vm/numa.rst b/Documentation/vm/numa.rst
index 5cae13e9a08b..0d830edae8fe 100644
--- a/Documentation/vm/numa.rst
+++ b/Documentation/vm/numa.rst
@@ -67,7 +67,7 @@ nodes.  Each emulated node will manage a fraction of the underlying cells'
 physical memory.  NUMA emluation is useful for testing NUMA kernel and
 application features on non-NUMA platforms, and as a sort of memory resource
 management mechanism when used together with cpusets.
-[see Documentation/cgroup-v1/cpusets.txt]
+[see Documentation/cgroup-v1/cpusets.rst]
 
 For each node with memory, Linux constructs an independent memory management
 subsystem, complete with its own free page lists, in-use page lists, usage
@@ -114,7 +114,7 @@ allocation behavior using Linux NUMA memory policy. [see
 
 System administrators can restrict the CPUs and nodes' memories that a non-
 privileged user can specify in the scheduling or NUMA commands and functions
-using control groups and CPUsets.  [see Documentation/cgroup-v1/cpusets.txt]
+using control groups and CPUsets.  [see Documentation/cgroup-v1/cpusets.rst]
 
 On architectures that do not hide memoryless nodes, Linux will include only
 zones [nodes] with memory in the zonelists.  This means that for a memoryless
diff --git a/Documentation/vm/page_migration.rst b/Documentation/vm/page_migration.rst
index f68d61335abb..35bba27d5fff 100644
--- a/Documentation/vm/page_migration.rst
+++ b/Documentation/vm/page_migration.rst
@@ -41,7 +41,7 @@ locations.
 Larger installations usually partition the system using cpusets into
 sections of nodes. Paul Jackson has equipped cpusets with the ability to
 move pages when a task is moved to another cpuset (See
-Documentation/cgroup-v1/cpusets.txt).
+Documentation/cgroup-v1/cpusets.rst).
 Cpusets allows the automation of process locality. If a task is moved to
 a new cpuset then also all its pages are moved with it so that the
 performance of the process does not sink dramatically. Also the pages
diff --git a/Documentation/vm/unevictable-lru.rst b/Documentation/vm/unevictable-lru.rst
index b8e29f977f2d..c6d94118fbcc 100644
--- a/Documentation/vm/unevictable-lru.rst
+++ b/Documentation/vm/unevictable-lru.rst
@@ -98,7 +98,7 @@ Memory Control Group Interaction
 --------------------------------
 
 The unevictable LRU facility interacts with the memory control group [aka
-memory controller; see Documentation/cgroup-v1/memory.txt] by extending the
+memory controller; see Documentation/cgroup-v1/memory.rst] by extending the
 lru_list enum.
 
 The memory controller data structure automatically gets a per-zone unevictable
diff --git a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
index 74fbb78b3c67..a6926cd40f70 100644
--- a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
+++ b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
@@ -15,7 +15,7 @@ assign them to cpusets and their attached tasks.  This is a way of limiting the
 amount of system memory that are available to a certain class of tasks.
 
 For more information on the features of cpusets, see
-Documentation/cgroup-v1/cpusets.txt.
+Documentation/cgroup-v1/cpusets.rst.
 There are a number of different configurations you can use for your needs.  For
 more information on the numa=fake command line option and its various ways of
 configuring fake nodes, see Documentation/x86/x86_64/boot-options.txt.
@@ -40,7 +40,7 @@ A machine may be split as follows with "numa=fake=4*512," as reported by dmesg::
 	On node 3 totalpages: 131072
 
 Now following the instructions for mounting the cpusets filesystem from
-Documentation/cgroup-v1/cpusets.txt, you can assign fake nodes (i.e. contiguous memory
+Documentation/cgroup-v1/cpusets.rst, you can assign fake nodes (i.e. contiguous memory
 address spaces) to individual cpusets::
 
 	[root@xroads /]# mkdir exampleset
-- 
cgit 


From 28aedd7ee214eb63a2e6924b5ec2b081aa7b3953 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:56 -0300
Subject: docs: pps.txt: convert to ReST and rename to pps.rst

This file is already in a good shape: just its title and
adding some literal block markups is needed for it to be
part of the document.

While it has a small chapter with sysfs stuff, most of
the document is focused on driver development.

As it describes a kernel API, move it to the driver-api
directory.

In order to avoid conflicts, let's add an :orphan: tag
to it, to be removed when added to the driver-api book.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Rodolfo Giometti <giometti@enneenne.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/pps.rst | 242 +++++++++++++++++++++++++++++++++++++++
 Documentation/pps/pps.txt        | 239 --------------------------------------
 2 files changed, 242 insertions(+), 239 deletions(-)
 create mode 100644 Documentation/driver-api/pps.rst
 delete mode 100644 Documentation/pps/pps.txt

(limited to 'Documentation')

diff --git a/Documentation/driver-api/pps.rst b/Documentation/driver-api/pps.rst
new file mode 100644
index 000000000000..1456d2c32ebd
--- /dev/null
+++ b/Documentation/driver-api/pps.rst
@@ -0,0 +1,242 @@
+:orphan:
+
+======================
+PPS - Pulse Per Second
+======================
+
+Copyright (C) 2007 Rodolfo Giometti <giometti@enneenne.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+
+
+Overview
+--------
+
+LinuxPPS provides a programming interface (API) to define in the
+system several PPS sources.
+
+PPS means "pulse per second" and a PPS source is just a device which
+provides a high precision signal each second so that an application
+can use it to adjust system clock time.
+
+A PPS source can be connected to a serial port (usually to the Data
+Carrier Detect pin) or to a parallel port (ACK-pin) or to a special
+CPU's GPIOs (this is the common case in embedded systems) but in each
+case when a new pulse arrives the system must apply to it a timestamp
+and record it for userland.
+
+Common use is the combination of the NTPD as userland program, with a
+GPS receiver as PPS source, to obtain a wallclock-time with
+sub-millisecond synchronisation to UTC.
+
+
+RFC considerations
+------------------
+
+While implementing a PPS API as RFC 2783 defines and using an embedded
+CPU GPIO-Pin as physical link to the signal, I encountered a deeper
+problem:
+
+   At startup it needs a file descriptor as argument for the function
+   time_pps_create().
+
+This implies that the source has a /dev/... entry. This assumption is
+OK for the serial and parallel port, where you can do something
+useful besides(!) the gathering of timestamps as it is the central
+task for a PPS API. But this assumption does not work for a single
+purpose GPIO line. In this case even basic file-related functionality
+(like read() and write()) makes no sense at all and should not be a
+precondition for the use of a PPS API.
+
+The problem can be simply solved if you consider that a PPS source is
+not always connected with a GPS data source.
+
+So your programs should check if the GPS data source (the serial port
+for instance) is a PPS source too, and if not they should provide the
+possibility to open another device as PPS source.
+
+In LinuxPPS the PPS sources are simply char devices usually mapped
+into files /dev/pps0, /dev/pps1, etc.
+
+
+PPS with USB to serial devices
+------------------------------
+
+It is possible to grab the PPS from an USB to serial device. However,
+you should take into account the latencies and jitter introduced by
+the USB stack. Users have reported clock instability around +-1ms when
+synchronized with PPS through USB. With USB 2.0, jitter may decrease
+down to the order of 125 microseconds.
+
+This may be suitable for time server synchronization with NTP because
+of its undersampling and algorithms.
+
+If your device doesn't report PPS, you can check that the feature is
+supported by its driver. Most of the time, you only need to add a call
+to usb_serial_handle_dcd_change after checking the DCD status (see
+ch341 and pl2303 examples).
+
+
+Coding example
+--------------
+
+To register a PPS source into the kernel you should define a struct
+pps_source_info as follows::
+
+    static struct pps_source_info pps_ktimer_info = {
+	    .name         = "ktimer",
+	    .path         = "",
+	    .mode         = PPS_CAPTUREASSERT | PPS_OFFSETASSERT |
+			    PPS_ECHOASSERT |
+			    PPS_CANWAIT | PPS_TSFMT_TSPEC,
+	    .echo         = pps_ktimer_echo,
+	    .owner        = THIS_MODULE,
+    };
+
+and then calling the function pps_register_source() in your
+initialization routine as follows::
+
+    source = pps_register_source(&pps_ktimer_info,
+			PPS_CAPTUREASSERT | PPS_OFFSETASSERT);
+
+The pps_register_source() prototype is::
+
+  int pps_register_source(struct pps_source_info *info, int default_params)
+
+where "info" is a pointer to a structure that describes a particular
+PPS source, "default_params" tells the system what the initial default
+parameters for the device should be (it is obvious that these parameters
+must be a subset of ones defined in the struct
+pps_source_info which describe the capabilities of the driver).
+
+Once you have registered a new PPS source into the system you can
+signal an assert event (for example in the interrupt handler routine)
+just using::
+
+    pps_event(source, &ts, PPS_CAPTUREASSERT, ptr)
+
+where "ts" is the event's timestamp.
+
+The same function may also run the defined echo function
+(pps_ktimer_echo(), passing to it the "ptr" pointer) if the user
+asked for that... etc..
+
+Please see the file drivers/pps/clients/pps-ktimer.c for example code.
+
+
+SYSFS support
+-------------
+
+If the SYSFS filesystem is enabled in the kernel it provides a new class::
+
+   $ ls /sys/class/pps/
+   pps0/  pps1/  pps2/
+
+Every directory is the ID of a PPS sources defined in the system and
+inside you find several files::
+
+   $ ls -F /sys/class/pps/pps0/
+   assert     dev        mode       path       subsystem@
+   clear      echo       name       power/     uevent
+
+
+Inside each "assert" and "clear" file you can find the timestamp and a
+sequence number::
+
+   $ cat /sys/class/pps/pps0/assert
+   1170026870.983207967#8
+
+Where before the "#" is the timestamp in seconds; after it is the
+sequence number. Other files are:
+
+ * echo: reports if the PPS source has an echo function or not;
+
+ * mode: reports available PPS functioning modes;
+
+ * name: reports the PPS source's name;
+
+ * path: reports the PPS source's device path, that is the device the
+   PPS source is connected to (if it exists).
+
+
+Testing the PPS support
+-----------------------
+
+In order to test the PPS support even without specific hardware you can use
+the pps-ktimer driver (see the client subsection in the PPS configuration menu)
+and the userland tools available in your distribution's pps-tools package,
+http://linuxpps.org , or https://github.com/redlab-i/pps-tools.
+
+Once you have enabled the compilation of pps-ktimer just modprobe it (if
+not statically compiled)::
+
+   # modprobe pps-ktimer
+
+and the run ppstest as follow::
+
+   $ ./ppstest /dev/pps1
+   trying PPS source "/dev/pps1"
+   found PPS source "/dev/pps1"
+   ok, found 1 source(s), now start fetching data...
+   source 0 - assert 1186592699.388832443, sequence: 364 - clear  0.000000000, sequence: 0
+   source 0 - assert 1186592700.388931295, sequence: 365 - clear  0.000000000, sequence: 0
+   source 0 - assert 1186592701.389032765, sequence: 366 - clear  0.000000000, sequence: 0
+
+Please note that to compile userland programs, you need the file timepps.h.
+This is available in the pps-tools repository mentioned above.
+
+
+Generators
+----------
+
+Sometimes one needs to be able not only to catch PPS signals but to produce
+them also. For example, running a distributed simulation, which requires
+computers' clock to be synchronized very tightly. One way to do this is to
+invent some complicated hardware solutions but it may be neither necessary
+nor affordable. The cheap way is to load a PPS generator on one of the
+computers (master) and PPS clients on others (slaves), and use very simple
+cables to deliver signals using parallel ports, for example.
+
+Parallel port cable pinout::
+
+	pin	name	master      slave
+	1	STROBE	  *------     *
+	2	D0	  *     |     *
+	3	D1	  *     |     *
+	4	D2	  *     |     *
+	5	D3	  *     |     *
+	6	D4	  *     |     *
+	7	D5	  *     |     *
+	8	D6	  *     |     *
+	9	D7	  *     |     *
+	10	ACK	  *     ------*
+	11	BUSY	  *           *
+	12	PE	  *           *
+	13	SEL	  *           *
+	14	AUTOFD	  *           *
+	15	ERROR	  *           *
+	16	INIT	  *           *
+	17	SELIN	  *           *
+	18-25	GND	  *-----------*
+
+Please note that parallel port interrupt occurs only on high->low transition,
+so it is used for PPS assert edge. PPS clear edge can be determined only
+using polling in the interrupt handler which actually can be done way more
+precisely because interrupt handling delays can be quite big and random. So
+current parport PPS generator implementation (pps_gen_parport module) is
+geared towards using the clear edge for time synchronization.
+
+Clear edge polling is done with disabled interrupts so it's better to select
+delay between assert and clear edge as small as possible to reduce system
+latencies. But if it is too small slave won't be able to capture clear edge
+transition. The default of 30us should be good enough in most situations.
+The delay can be selected using 'delay' pps_gen_parport module parameter.
diff --git a/Documentation/pps/pps.txt b/Documentation/pps/pps.txt
deleted file mode 100644
index 99f5d8c4c652..000000000000
--- a/Documentation/pps/pps.txt
+++ /dev/null
@@ -1,239 +0,0 @@
-
-			PPS - Pulse Per Second
-			----------------------
-
-(C) Copyright 2007 Rodolfo Giometti <giometti@enneenne.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-
-
-Overview
---------
-
-LinuxPPS provides a programming interface (API) to define in the
-system several PPS sources.
-
-PPS means "pulse per second" and a PPS source is just a device which
-provides a high precision signal each second so that an application
-can use it to adjust system clock time.
-
-A PPS source can be connected to a serial port (usually to the Data
-Carrier Detect pin) or to a parallel port (ACK-pin) or to a special
-CPU's GPIOs (this is the common case in embedded systems) but in each
-case when a new pulse arrives the system must apply to it a timestamp
-and record it for userland.
-
-Common use is the combination of the NTPD as userland program, with a
-GPS receiver as PPS source, to obtain a wallclock-time with
-sub-millisecond synchronisation to UTC.
-
-
-RFC considerations
-------------------
-
-While implementing a PPS API as RFC 2783 defines and using an embedded
-CPU GPIO-Pin as physical link to the signal, I encountered a deeper
-problem:
-
-   At startup it needs a file descriptor as argument for the function
-   time_pps_create().
-
-This implies that the source has a /dev/... entry. This assumption is
-OK for the serial and parallel port, where you can do something
-useful besides(!) the gathering of timestamps as it is the central
-task for a PPS API. But this assumption does not work for a single
-purpose GPIO line. In this case even basic file-related functionality
-(like read() and write()) makes no sense at all and should not be a
-precondition for the use of a PPS API.
-
-The problem can be simply solved if you consider that a PPS source is
-not always connected with a GPS data source.
-
-So your programs should check if the GPS data source (the serial port
-for instance) is a PPS source too, and if not they should provide the
-possibility to open another device as PPS source.
-
-In LinuxPPS the PPS sources are simply char devices usually mapped
-into files /dev/pps0, /dev/pps1, etc.
-
-
-PPS with USB to serial devices
-------------------------------
-
-It is possible to grab the PPS from an USB to serial device. However,
-you should take into account the latencies and jitter introduced by
-the USB stack. Users have reported clock instability around +-1ms when
-synchronized with PPS through USB. With USB 2.0, jitter may decrease
-down to the order of 125 microseconds.
-
-This may be suitable for time server synchronization with NTP because
-of its undersampling and algorithms.
-
-If your device doesn't report PPS, you can check that the feature is
-supported by its driver. Most of the time, you only need to add a call
-to usb_serial_handle_dcd_change after checking the DCD status (see
-ch341 and pl2303 examples).
-
-
-Coding example
---------------
-
-To register a PPS source into the kernel you should define a struct
-pps_source_info as follows:
-
-    static struct pps_source_info pps_ktimer_info = {
-	    .name         = "ktimer",
-	    .path         = "",
-	    .mode         = PPS_CAPTUREASSERT | PPS_OFFSETASSERT |
-			    PPS_ECHOASSERT |
-			    PPS_CANWAIT | PPS_TSFMT_TSPEC,
-	    .echo         = pps_ktimer_echo,
-	    .owner        = THIS_MODULE,
-    };
-
-and then calling the function pps_register_source() in your
-initialization routine as follows:
-
-    source = pps_register_source(&pps_ktimer_info,
-			PPS_CAPTUREASSERT | PPS_OFFSETASSERT);
-
-The pps_register_source() prototype is:
-
-  int pps_register_source(struct pps_source_info *info, int default_params)
-
-where "info" is a pointer to a structure that describes a particular
-PPS source, "default_params" tells the system what the initial default
-parameters for the device should be (it is obvious that these parameters
-must be a subset of ones defined in the struct
-pps_source_info which describe the capabilities of the driver).
-
-Once you have registered a new PPS source into the system you can
-signal an assert event (for example in the interrupt handler routine)
-just using:
-
-    pps_event(source, &ts, PPS_CAPTUREASSERT, ptr)
-
-where "ts" is the event's timestamp.
-
-The same function may also run the defined echo function
-(pps_ktimer_echo(), passing to it the "ptr" pointer) if the user
-asked for that... etc..
-
-Please see the file drivers/pps/clients/pps-ktimer.c for example code.
-
-
-SYSFS support
--------------
-
-If the SYSFS filesystem is enabled in the kernel it provides a new class:
-
-   $ ls /sys/class/pps/
-   pps0/  pps1/  pps2/
-
-Every directory is the ID of a PPS sources defined in the system and
-inside you find several files:
-
-   $ ls -F /sys/class/pps/pps0/
-   assert     dev        mode       path       subsystem@
-   clear      echo       name       power/     uevent
-
-
-Inside each "assert" and "clear" file you can find the timestamp and a
-sequence number:
-
-   $ cat /sys/class/pps/pps0/assert
-   1170026870.983207967#8
-
-Where before the "#" is the timestamp in seconds; after it is the
-sequence number. Other files are:
-
- * echo: reports if the PPS source has an echo function or not;
-
- * mode: reports available PPS functioning modes;
-
- * name: reports the PPS source's name;
-
- * path: reports the PPS source's device path, that is the device the
-   PPS source is connected to (if it exists).
-
-
-Testing the PPS support
------------------------
-
-In order to test the PPS support even without specific hardware you can use
-the pps-ktimer driver (see the client subsection in the PPS configuration menu)
-and the userland tools available in your distribution's pps-tools package,
-http://linuxpps.org , or https://github.com/redlab-i/pps-tools.
-
-Once you have enabled the compilation of pps-ktimer just modprobe it (if
-not statically compiled):
-
-   # modprobe pps-ktimer
-
-and the run ppstest as follow:
-
-   $ ./ppstest /dev/pps1
-   trying PPS source "/dev/pps1"
-   found PPS source "/dev/pps1"
-   ok, found 1 source(s), now start fetching data...
-   source 0 - assert 1186592699.388832443, sequence: 364 - clear  0.000000000, sequence: 0
-   source 0 - assert 1186592700.388931295, sequence: 365 - clear  0.000000000, sequence: 0
-   source 0 - assert 1186592701.389032765, sequence: 366 - clear  0.000000000, sequence: 0
-
-Please note that to compile userland programs, you need the file timepps.h.
-This is available in the pps-tools repository mentioned above.
-
-
-Generators
-----------
-
-Sometimes one needs to be able not only to catch PPS signals but to produce
-them also. For example, running a distributed simulation, which requires
-computers' clock to be synchronized very tightly. One way to do this is to
-invent some complicated hardware solutions but it may be neither necessary
-nor affordable. The cheap way is to load a PPS generator on one of the
-computers (master) and PPS clients on others (slaves), and use very simple
-cables to deliver signals using parallel ports, for example.
-
-Parallel port cable pinout:
-pin	name	master      slave
-1	STROBE	  *------     *
-2	D0	  *     |     *
-3	D1	  *     |     *
-4	D2	  *     |     *
-5	D3	  *     |     *
-6	D4	  *     |     *
-7	D5	  *     |     *
-8	D6	  *     |     *
-9	D7	  *     |     *
-10	ACK	  *     ------*
-11	BUSY	  *           *
-12	PE	  *           *
-13	SEL	  *           *
-14	AUTOFD	  *           *
-15	ERROR	  *           *
-16	INIT	  *           *
-17	SELIN	  *           *
-18-25	GND	  *-----------*
-
-Please note that parallel port interrupt occurs only on high->low transition,
-so it is used for PPS assert edge. PPS clear edge can be determined only
-using polling in the interrupt handler which actually can be done way more
-precisely because interrupt handling delays can be quite big and random. So
-current parport PPS generator implementation (pps_gen_parport module) is
-geared towards using the clear edge for time synchronization.
-
-Clear edge polling is done with disabled interrupts so it's better to select
-delay between assert and clear edge as small as possible to reduce system
-latencies. But if it is too small slave won't be able to capture clear edge
-transition. The default of 30us should be good enough in most situations.
-The delay can be selected using 'delay' pps_gen_parport module parameter.
-- 
cgit 


From 329f00415a424063c23f75ff77f7d9c67916324d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:57 -0300
Subject: docs: ptp.txt: convert to ReST and move to driver-api

The conversion is trivial: just adjust title markups.

In order to avoid conflicts, let's add an :orphan: tag
to it, to be removed when this file gets added to the
driver-api book.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/ptp.rst          | 96 +++++++++++++++++++++++++++++++
 Documentation/networking/timestamping.txt |  2 +-
 Documentation/ptp/ptp.txt                 | 86 ---------------------------
 3 files changed, 97 insertions(+), 87 deletions(-)
 create mode 100644 Documentation/driver-api/ptp.rst
 delete mode 100644 Documentation/ptp/ptp.txt

(limited to 'Documentation')

diff --git a/Documentation/driver-api/ptp.rst b/Documentation/driver-api/ptp.rst
new file mode 100644
index 000000000000..b6e65d66d37a
--- /dev/null
+++ b/Documentation/driver-api/ptp.rst
@@ -0,0 +1,96 @@
+:orphan:
+
+===========================================
+PTP hardware clock infrastructure for Linux
+===========================================
+
+  This patch set introduces support for IEEE 1588 PTP clocks in
+  Linux. Together with the SO_TIMESTAMPING socket options, this
+  presents a standardized method for developing PTP user space
+  programs, synchronizing Linux with external clocks, and using the
+  ancillary features of PTP hardware clocks.
+
+  A new class driver exports a kernel interface for specific clock
+  drivers and a user space interface. The infrastructure supports a
+  complete set of PTP hardware clock functionality.
+
+  + Basic clock operations
+    - Set time
+    - Get time
+    - Shift the clock by a given offset atomically
+    - Adjust clock frequency
+
+  + Ancillary clock features
+    - Time stamp external events
+    - Period output signals configurable from user space
+    - Synchronization of the Linux system time via the PPS subsystem
+
+PTP hardware clock kernel API
+=============================
+
+   A PTP clock driver registers itself with the class driver. The
+   class driver handles all of the dealings with user space. The
+   author of a clock driver need only implement the details of
+   programming the clock hardware. The clock driver notifies the class
+   driver of asynchronous events (alarms and external time stamps) via
+   a simple message passing interface.
+
+   The class driver supports multiple PTP clock drivers. In normal use
+   cases, only one PTP clock is needed. However, for testing and
+   development, it can be useful to have more than one clock in a
+   single system, in order to allow performance comparisons.
+
+PTP hardware clock user space API
+=================================
+
+   The class driver also creates a character device for each
+   registered clock. User space can use an open file descriptor from
+   the character device as a POSIX clock id and may call
+   clock_gettime, clock_settime, and clock_adjtime.  These calls
+   implement the basic clock operations.
+
+   User space programs may control the clock using standardized
+   ioctls. A program may query, enable, configure, and disable the
+   ancillary clock features. User space can receive time stamped
+   events via blocking read() and poll().
+
+Writing clock drivers
+=====================
+
+   Clock drivers include include/linux/ptp_clock_kernel.h and register
+   themselves by presenting a 'struct ptp_clock_info' to the
+   registration method. Clock drivers must implement all of the
+   functions in the interface. If a clock does not offer a particular
+   ancillary feature, then the driver should just return -EOPNOTSUPP
+   from those functions.
+
+   Drivers must ensure that all of the methods in interface are
+   reentrant. Since most hardware implementations treat the time value
+   as a 64 bit integer accessed as two 32 bit registers, drivers
+   should use spin_lock_irqsave/spin_unlock_irqrestore to protect
+   against concurrent access. This locking cannot be accomplished in
+   class driver, since the lock may also be needed by the clock
+   driver's interrupt service routine.
+
+Supported hardware
+==================
+
+   * Freescale eTSEC gianfar
+
+     - 2 Time stamp external triggers, programmable polarity (opt. interrupt)
+     - 2 Alarm registers (optional interrupt)
+     - 3 Periodic signals (optional interrupt)
+
+   * National DP83640
+
+     - 6 GPIOs programmable as inputs or outputs
+     - 6 GPIOs with dedicated functions (LED/JTAG/clock) can also be
+       used as general inputs or outputs
+     - GPIO inputs can time stamp external triggers
+     - GPIO outputs can produce periodic signals
+     - 1 interrupt pin
+
+   * Intel IXP465
+
+     - Auxiliary Slave/Master Mode Snapshot (optional interrupt)
+     - Target Time (optional interrupt)
diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index bbdaf8990031..8dd6333c3270 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -368,7 +368,7 @@ ts[1] used to hold hardware timestamps converted to system time.
 Instead, expose the hardware clock device on the NIC directly as
 a HW PTP clock source, to allow time conversion in userspace and
 optionally synchronize system time with a userspace PTP stack such
-as linuxptp. For the PTP clock API, see Documentation/ptp/ptp.txt.
+as linuxptp. For the PTP clock API, see Documentation/driver-api/ptp.rst.
 
 Note that if the SO_TIMESTAMP or SO_TIMESTAMPNS option is enabled
 together with SO_TIMESTAMPING using SOF_TIMESTAMPING_SOFTWARE, a false
diff --git a/Documentation/ptp/ptp.txt b/Documentation/ptp/ptp.txt
deleted file mode 100644
index 11e904ee073f..000000000000
--- a/Documentation/ptp/ptp.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-
-* PTP hardware clock infrastructure for Linux
-
-  This patch set introduces support for IEEE 1588 PTP clocks in
-  Linux. Together with the SO_TIMESTAMPING socket options, this
-  presents a standardized method for developing PTP user space
-  programs, synchronizing Linux with external clocks, and using the
-  ancillary features of PTP hardware clocks.
-
-  A new class driver exports a kernel interface for specific clock
-  drivers and a user space interface. The infrastructure supports a
-  complete set of PTP hardware clock functionality.
-
-  + Basic clock operations
-    - Set time
-    - Get time
-    - Shift the clock by a given offset atomically
-    - Adjust clock frequency
-
-  + Ancillary clock features
-    - Time stamp external events
-    - Period output signals configurable from user space
-    - Synchronization of the Linux system time via the PPS subsystem
-
-** PTP hardware clock kernel API
-
-   A PTP clock driver registers itself with the class driver. The
-   class driver handles all of the dealings with user space. The
-   author of a clock driver need only implement the details of
-   programming the clock hardware. The clock driver notifies the class
-   driver of asynchronous events (alarms and external time stamps) via
-   a simple message passing interface.
-
-   The class driver supports multiple PTP clock drivers. In normal use
-   cases, only one PTP clock is needed. However, for testing and
-   development, it can be useful to have more than one clock in a
-   single system, in order to allow performance comparisons.
-
-** PTP hardware clock user space API
-
-   The class driver also creates a character device for each
-   registered clock. User space can use an open file descriptor from
-   the character device as a POSIX clock id and may call
-   clock_gettime, clock_settime, and clock_adjtime.  These calls
-   implement the basic clock operations.
-
-   User space programs may control the clock using standardized
-   ioctls. A program may query, enable, configure, and disable the
-   ancillary clock features. User space can receive time stamped
-   events via blocking read() and poll().
-
-** Writing clock drivers
-
-   Clock drivers include include/linux/ptp_clock_kernel.h and register
-   themselves by presenting a 'struct ptp_clock_info' to the
-   registration method. Clock drivers must implement all of the
-   functions in the interface. If a clock does not offer a particular
-   ancillary feature, then the driver should just return -EOPNOTSUPP
-   from those functions.
-
-   Drivers must ensure that all of the methods in interface are
-   reentrant. Since most hardware implementations treat the time value
-   as a 64 bit integer accessed as two 32 bit registers, drivers
-   should use spin_lock_irqsave/spin_unlock_irqrestore to protect
-   against concurrent access. This locking cannot be accomplished in
-   class driver, since the lock may also be needed by the clock
-   driver's interrupt service routine.
-
-** Supported hardware
-
-   + Freescale eTSEC gianfar
-     - 2 Time stamp external triggers, programmable polarity (opt. interrupt)
-     - 2 Alarm registers (optional interrupt)
-     - 3 Periodic signals (optional interrupt)
-
-   + National DP83640
-     - 6 GPIOs programmable as inputs or outputs
-     - 6 GPIOs with dedicated functions (LED/JTAG/clock) can also be
-       used as general inputs or outputs
-     - GPIO inputs can time stamp external triggers
-     - GPIO outputs can produce periodic signals
-     - 1 interrupt pin
-
-   + Intel IXP465
-     - Auxiliary Slave/Master Mode Snapshot (optional interrupt)
-     - Target Time (optional interrupt)
-- 
cgit 


From bdf3a950fb46600a2c34e5f3d9810c274e59a031 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:58 -0300
Subject: docs: riscv: convert docs to ReST and rename to *.rst

The conversion here is trivial:
 - Adjust the document title's markup
 - Do some whitespace alignment;
 - mark literal blocks;
 - Use ReST way to markup indented lists.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/riscv/index.rst |  17 +++
 Documentation/riscv/pmu.rst   | 255 ++++++++++++++++++++++++++++++++++++++++++
 Documentation/riscv/pmu.txt   | 249 -----------------------------------------
 3 files changed, 272 insertions(+), 249 deletions(-)
 create mode 100644 Documentation/riscv/index.rst
 create mode 100644 Documentation/riscv/pmu.rst
 delete mode 100644 Documentation/riscv/pmu.txt

(limited to 'Documentation')

diff --git a/Documentation/riscv/index.rst b/Documentation/riscv/index.rst
new file mode 100644
index 000000000000..c4b906d9b5a7
--- /dev/null
+++ b/Documentation/riscv/index.rst
@@ -0,0 +1,17 @@
+:orphan:
+
+===================
+RISC-V architecture
+===================
+
+.. toctree::
+    :maxdepth: 1
+
+    pmu
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/riscv/pmu.rst b/Documentation/riscv/pmu.rst
new file mode 100644
index 000000000000..acb216b99c26
--- /dev/null
+++ b/Documentation/riscv/pmu.rst
@@ -0,0 +1,255 @@
+===================================
+Supporting PMUs on RISC-V platforms
+===================================
+
+Alan Kao <alankao@andestech.com>, Mar 2018
+
+Introduction
+------------
+
+As of this writing, perf_event-related features mentioned in The RISC-V ISA
+Privileged Version 1.10 are as follows:
+(please check the manual for more details)
+
+* [m|s]counteren
+* mcycle[h], cycle[h]
+* minstret[h], instret[h]
+* mhpeventx, mhpcounterx[h]
+
+With such function set only, porting perf would require a lot of work, due to
+the lack of the following general architectural performance monitoring features:
+
+* Enabling/Disabling counters
+  Counters are just free-running all the time in our case.
+* Interrupt caused by counter overflow
+  No such feature in the spec.
+* Interrupt indicator
+  It is not possible to have many interrupt ports for all counters, so an
+  interrupt indicator is required for software to tell which counter has
+  just overflowed.
+* Writing to counters
+  There will be an SBI to support this since the kernel cannot modify the
+  counters [1].  Alternatively, some vendor considers to implement
+  hardware-extension for M-S-U model machines to write counters directly.
+
+This document aims to provide developers a quick guide on supporting their
+PMUs in the kernel.  The following sections briefly explain perf' mechanism
+and todos.
+
+You may check previous discussions here [1][2].  Also, it might be helpful
+to check the appendix for related kernel structures.
+
+
+1. Initialization
+-----------------
+
+*riscv_pmu* is a global pointer of type *struct riscv_pmu*, which contains
+various methods according to perf's internal convention and PMU-specific
+parameters.  One should declare such instance to represent the PMU.  By default,
+*riscv_pmu* points to a constant structure *riscv_base_pmu*, which has very
+basic support to a baseline QEMU model.
+
+Then he/she can either assign the instance's pointer to *riscv_pmu* so that
+the minimal and already-implemented logic can be leveraged, or invent his/her
+own *riscv_init_platform_pmu* implementation.
+
+In other words, existing sources of *riscv_base_pmu* merely provide a
+reference implementation.  Developers can flexibly decide how many parts they
+can leverage, and in the most extreme case, they can customize every function
+according to their needs.
+
+
+2. Event Initialization
+-----------------------
+
+When a user launches a perf command to monitor some events, it is first
+interpreted by the userspace perf tool into multiple *perf_event_open*
+system calls, and then each of them calls to the body of *event_init*
+member function that was assigned in the previous step.  In *riscv_base_pmu*'s
+case, it is *riscv_event_init*.
+
+The main purpose of this function is to translate the event provided by user
+into bitmap, so that HW-related control registers or counters can directly be
+manipulated.  The translation is based on the mappings and methods provided in
+*riscv_pmu*.
+
+Note that some features can be done in this stage as well:
+
+(1) interrupt setting, which is stated in the next section;
+(2) privilege level setting (user space only, kernel space only, both);
+(3) destructor setting.  Normally it is sufficient to apply *riscv_destroy_event*;
+(4) tweaks for non-sampling events, which will be utilized by functions such as
+    *perf_adjust_period*, usually something like the follows::
+
+      if (!is_sampling_event(event)) {
+              hwc->sample_period = x86_pmu.max_period;
+              hwc->last_period = hwc->sample_period;
+              local64_set(&hwc->period_left, hwc->sample_period);
+      }
+
+In the case of *riscv_base_pmu*, only (3) is provided for now.
+
+
+3. Interrupt
+------------
+
+3.1. Interrupt Initialization
+
+This often occurs at the beginning of the *event_init* method. In common
+practice, this should be a code segment like::
+
+  int x86_reserve_hardware(void)
+  {
+        int err = 0;
+
+        if (!atomic_inc_not_zero(&pmc_refcount)) {
+                mutex_lock(&pmc_reserve_mutex);
+                if (atomic_read(&pmc_refcount) == 0) {
+                        if (!reserve_pmc_hardware())
+                                err = -EBUSY;
+                        else
+                                reserve_ds_buffers();
+                }
+                if (!err)
+                        atomic_inc(&pmc_refcount);
+                mutex_unlock(&pmc_reserve_mutex);
+        }
+
+        return err;
+  }
+
+And the magic is in *reserve_pmc_hardware*, which usually does atomic
+operations to make implemented IRQ accessible from some global function pointer.
+*release_pmc_hardware* serves the opposite purpose, and it is used in event
+destructors mentioned in previous section.
+
+(Note: From the implementations in all the architectures, the *reserve/release*
+pair are always IRQ settings, so the *pmc_hardware* seems somehow misleading.
+It does NOT deal with the binding between an event and a physical counter,
+which will be introduced in the next section.)
+
+3.2. IRQ Structure
+
+Basically, a IRQ runs the following pseudo code::
+
+  for each hardware counter that triggered this overflow
+
+      get the event of this counter
+
+      // following two steps are defined as *read()*,
+      // check the section Reading/Writing Counters for details.
+      count the delta value since previous interrupt
+      update the event->count (# event occurs) by adding delta, and
+                 event->hw.period_left by subtracting delta
+
+      if the event overflows
+          sample data
+          set the counter appropriately for the next overflow
+
+          if the event overflows again
+              too frequently, throttle this event
+          fi
+      fi
+
+  end for
+
+However as of this writing, none of the RISC-V implementations have designed an
+interrupt for perf, so the details are to be completed in the future.
+
+4. Reading/Writing Counters
+---------------------------
+
+They seem symmetric but perf treats them quite differently.  For reading, there
+is a *read* interface in *struct pmu*, but it serves more than just reading.
+According to the context, the *read* function not only reads the content of the
+counter (event->count), but also updates the left period to the next interrupt
+(event->hw.period_left).
+
+But the core of perf does not need direct write to counters.  Writing counters
+is hidden behind the abstraction of 1) *pmu->start*, literally start counting so one
+has to set the counter to a good value for the next interrupt; 2) inside the IRQ
+it should set the counter to the same resonable value.
+
+Reading is not a problem in RISC-V but writing would need some effort, since
+counters are not allowed to be written by S-mode.
+
+
+5. add()/del()/start()/stop()
+-----------------------------
+
+Basic idea: add()/del() adds/deletes events to/from a PMU, and start()/stop()
+starts/stop the counter of some event in the PMU.  All of them take the same
+arguments: *struct perf_event *event* and *int flag*.
+
+Consider perf as a state machine, then you will find that these functions serve
+as the state transition process between those states.
+Three states (event->hw.state) are defined:
+
+* PERF_HES_STOPPED:	the counter is stopped
+* PERF_HES_UPTODATE:	the event->count is up-to-date
+* PERF_HES_ARCH:	arch-dependent usage ... we don't need this for now
+
+A normal flow of these state transitions are as follows:
+
+* A user launches a perf event, resulting in calling to *event_init*.
+* When being context-switched in, *add* is called by the perf core, with a flag
+  PERF_EF_START, which means that the event should be started after it is added.
+  At this stage, a general event is bound to a physical counter, if any.
+  The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, because it is now
+  stopped, and the (software) event count does not need updating.
+
+  - *start* is then called, and the counter is enabled.
+    With flag PERF_EF_RELOAD, it writes an appropriate value to the counter (check
+    previous section for detail).
+    Nothing is written if the flag does not contain PERF_EF_RELOAD.
+    The state now is reset to none, because it is neither stopped nor updated
+    (the counting already started)
+
+* When being context-switched out, *del* is called.  It then checks out all the
+  events in the PMU and calls *stop* to update their counts.
+
+  - *stop* is called by *del*
+    and the perf core with flag PERF_EF_UPDATE, and it often shares the same
+    subroutine as *read* with the same logic.
+    The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, again.
+
+  - Life cycle of these two pairs: *add* and *del* are called repeatedly as
+    tasks switch in-and-out; *start* and *stop* is also called when the perf core
+    needs a quick stop-and-start, for instance, when the interrupt period is being
+    adjusted.
+
+Current implementation is sufficient for now and can be easily extended to
+features in the future.
+
+A. Related Structures
+---------------------
+
+* struct pmu: include/linux/perf_event.h
+* struct riscv_pmu: arch/riscv/include/asm/perf_event.h
+
+  Both structures are designed to be read-only.
+
+  *struct pmu* defines some function pointer interfaces, and most of them take
+  *struct perf_event* as a main argument, dealing with perf events according to
+  perf's internal state machine (check kernel/events/core.c for details).
+
+  *struct riscv_pmu* defines PMU-specific parameters.  The naming follows the
+  convention of all other architectures.
+
+* struct perf_event: include/linux/perf_event.h
+* struct hw_perf_event
+
+  The generic structure that represents perf events, and the hardware-related
+  details.
+
+* struct riscv_hw_events: arch/riscv/include/asm/perf_event.h
+
+  The structure that holds the status of events, has two fixed members:
+  the number of events and the array of the events.
+
+References
+----------
+
+[1] https://github.com/riscv/riscv-linux/pull/124
+
+[2] https://groups.google.com/a/groups.riscv.org/forum/#!topic/sw-dev/f19TmCNP6yA
diff --git a/Documentation/riscv/pmu.txt b/Documentation/riscv/pmu.txt
deleted file mode 100644
index b29f03a6d82f..000000000000
--- a/Documentation/riscv/pmu.txt
+++ /dev/null
@@ -1,249 +0,0 @@
-Supporting PMUs on RISC-V platforms
-==========================================
-Alan Kao <alankao@andestech.com>, Mar 2018
-
-Introduction
-------------
-
-As of this writing, perf_event-related features mentioned in The RISC-V ISA
-Privileged Version 1.10 are as follows:
-(please check the manual for more details)
-
-* [m|s]counteren
-* mcycle[h], cycle[h]
-* minstret[h], instret[h]
-* mhpeventx, mhpcounterx[h]
-
-With such function set only, porting perf would require a lot of work, due to
-the lack of the following general architectural performance monitoring features:
-
-* Enabling/Disabling counters
-  Counters are just free-running all the time in our case.
-* Interrupt caused by counter overflow
-  No such feature in the spec.
-* Interrupt indicator
-  It is not possible to have many interrupt ports for all counters, so an
-  interrupt indicator is required for software to tell which counter has
-  just overflowed.
-* Writing to counters
-  There will be an SBI to support this since the kernel cannot modify the
-  counters [1].  Alternatively, some vendor considers to implement
-  hardware-extension for M-S-U model machines to write counters directly.
-
-This document aims to provide developers a quick guide on supporting their
-PMUs in the kernel.  The following sections briefly explain perf' mechanism
-and todos.
-
-You may check previous discussions here [1][2].  Also, it might be helpful
-to check the appendix for related kernel structures.
-
-
-1. Initialization
------------------
-
-*riscv_pmu* is a global pointer of type *struct riscv_pmu*, which contains
-various methods according to perf's internal convention and PMU-specific
-parameters.  One should declare such instance to represent the PMU.  By default,
-*riscv_pmu* points to a constant structure *riscv_base_pmu*, which has very
-basic support to a baseline QEMU model.
-
-Then he/she can either assign the instance's pointer to *riscv_pmu* so that
-the minimal and already-implemented logic can be leveraged, or invent his/her
-own *riscv_init_platform_pmu* implementation.
-
-In other words, existing sources of *riscv_base_pmu* merely provide a
-reference implementation.  Developers can flexibly decide how many parts they
-can leverage, and in the most extreme case, they can customize every function
-according to their needs.
-
-
-2. Event Initialization
------------------------
-
-When a user launches a perf command to monitor some events, it is first
-interpreted by the userspace perf tool into multiple *perf_event_open*
-system calls, and then each of them calls to the body of *event_init*
-member function that was assigned in the previous step.  In *riscv_base_pmu*'s
-case, it is *riscv_event_init*.
-
-The main purpose of this function is to translate the event provided by user
-into bitmap, so that HW-related control registers or counters can directly be
-manipulated.  The translation is based on the mappings and methods provided in
-*riscv_pmu*.
-
-Note that some features can be done in this stage as well:
-
-(1) interrupt setting, which is stated in the next section;
-(2) privilege level setting (user space only, kernel space only, both);
-(3) destructor setting.  Normally it is sufficient to apply *riscv_destroy_event*;
-(4) tweaks for non-sampling events, which will be utilized by functions such as
-*perf_adjust_period*, usually something like the follows:
-
-if (!is_sampling_event(event)) {
-        hwc->sample_period = x86_pmu.max_period;
-        hwc->last_period = hwc->sample_period;
-        local64_set(&hwc->period_left, hwc->sample_period);
-}
-
-In the case of *riscv_base_pmu*, only (3) is provided for now.
-
-
-3. Interrupt
-------------
-
-3.1. Interrupt Initialization
-
-This often occurs at the beginning of the *event_init* method. In common
-practice, this should be a code segment like
-
-int x86_reserve_hardware(void)
-{
-        int err = 0;
-
-        if (!atomic_inc_not_zero(&pmc_refcount)) {
-                mutex_lock(&pmc_reserve_mutex);
-                if (atomic_read(&pmc_refcount) == 0) {
-                        if (!reserve_pmc_hardware())
-                                err = -EBUSY;
-                        else
-                                reserve_ds_buffers();
-                }
-                if (!err)
-                        atomic_inc(&pmc_refcount);
-                mutex_unlock(&pmc_reserve_mutex);
-        }
-
-        return err;
-}
-
-And the magic is in *reserve_pmc_hardware*, which usually does atomic
-operations to make implemented IRQ accessible from some global function pointer.
-*release_pmc_hardware* serves the opposite purpose, and it is used in event
-destructors mentioned in previous section.
-
-(Note: From the implementations in all the architectures, the *reserve/release*
-pair are always IRQ settings, so the *pmc_hardware* seems somehow misleading.
-It does NOT deal with the binding between an event and a physical counter,
-which will be introduced in the next section.)
-
-3.2. IRQ Structure
-
-Basically, a IRQ runs the following pseudo code:
-
-for each hardware counter that triggered this overflow
-
-    get the event of this counter
-
-    // following two steps are defined as *read()*,
-    // check the section Reading/Writing Counters for details.
-    count the delta value since previous interrupt
-    update the event->count (# event occurs) by adding delta, and
-               event->hw.period_left by subtracting delta
-
-    if the event overflows
-        sample data
-        set the counter appropriately for the next overflow
-
-        if the event overflows again
-            too frequently, throttle this event
-        fi
-    fi
-
-end for
-
-However as of this writing, none of the RISC-V implementations have designed an
-interrupt for perf, so the details are to be completed in the future.
-
-4. Reading/Writing Counters
----------------------------
-
-They seem symmetric but perf treats them quite differently.  For reading, there
-is a *read* interface in *struct pmu*, but it serves more than just reading.
-According to the context, the *read* function not only reads the content of the
-counter (event->count), but also updates the left period to the next interrupt
-(event->hw.period_left).
-
-But the core of perf does not need direct write to counters.  Writing counters
-is hidden behind the abstraction of 1) *pmu->start*, literally start counting so one
-has to set the counter to a good value for the next interrupt; 2) inside the IRQ
-it should set the counter to the same resonable value.
-
-Reading is not a problem in RISC-V but writing would need some effort, since
-counters are not allowed to be written by S-mode.
-
-
-5. add()/del()/start()/stop()
------------------------------
-
-Basic idea: add()/del() adds/deletes events to/from a PMU, and start()/stop()
-starts/stop the counter of some event in the PMU.  All of them take the same
-arguments: *struct perf_event *event* and *int flag*.
-
-Consider perf as a state machine, then you will find that these functions serve
-as the state transition process between those states.
-Three states (event->hw.state) are defined:
-
-* PERF_HES_STOPPED:	the counter is stopped
-* PERF_HES_UPTODATE:	the event->count is up-to-date
-* PERF_HES_ARCH:	arch-dependent usage ... we don't need this for now
-
-A normal flow of these state transitions are as follows:
-
-* A user launches a perf event, resulting in calling to *event_init*.
-* When being context-switched in, *add* is called by the perf core, with a flag
-  PERF_EF_START, which means that the event should be started after it is added.
-  At this stage, a general event is bound to a physical counter, if any.
-  The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, because it is now
-  stopped, and the (software) event count does not need updating.
-** *start* is then called, and the counter is enabled.
-   With flag PERF_EF_RELOAD, it writes an appropriate value to the counter (check
-   previous section for detail).
-   Nothing is written if the flag does not contain PERF_EF_RELOAD.
-   The state now is reset to none, because it is neither stopped nor updated
-   (the counting already started)
-* When being context-switched out, *del* is called.  It then checks out all the
-  events in the PMU and calls *stop* to update their counts.
-** *stop* is called by *del*
-   and the perf core with flag PERF_EF_UPDATE, and it often shares the same
-   subroutine as *read* with the same logic.
-   The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, again.
-
-** Life cycle of these two pairs: *add* and *del* are called repeatedly as
-  tasks switch in-and-out; *start* and *stop* is also called when the perf core
-  needs a quick stop-and-start, for instance, when the interrupt period is being
-  adjusted.
-
-Current implementation is sufficient for now and can be easily extended to
-features in the future.
-
-A. Related Structures
----------------------
-
-* struct pmu: include/linux/perf_event.h
-* struct riscv_pmu: arch/riscv/include/asm/perf_event.h
-
-  Both structures are designed to be read-only.
-
-  *struct pmu* defines some function pointer interfaces, and most of them take
-*struct perf_event* as a main argument, dealing with perf events according to
-perf's internal state machine (check kernel/events/core.c for details).
-
-  *struct riscv_pmu* defines PMU-specific parameters.  The naming follows the
-convention of all other architectures.
-
-* struct perf_event: include/linux/perf_event.h
-* struct hw_perf_event
-
-  The generic structure that represents perf events, and the hardware-related
-details.
-
-* struct riscv_hw_events: arch/riscv/include/asm/perf_event.h
-
-  The structure that holds the status of events, has two fixed members:
-the number of events and the array of the events.
-
-References
-----------
-
-[1] https://github.com/riscv/riscv-linux/pull/124
-[2] https://groups.google.com/a/groups.riscv.org/forum/#!topic/sw-dev/f19TmCNP6yA
-- 
cgit 


From 4ca9bc225e46eb7bc040dd948be7cb68975d80d3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:52:59 -0300
Subject: docs: target: convert docs to ReST and rename to *.rst

Convert the TCM docs to ReST format and add them to the
bookset.

This has a mix of userspace-faced and Kernelspace faced
docs. Still, it sounds a better candidate to be added at
the kernel API set of docs.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/target/index.rst           |  19 ++
 Documentation/target/scripts.rst         |  11 +
 Documentation/target/tcm_mod_builder.rst | 149 ++++++++++++
 Documentation/target/tcm_mod_builder.txt | 145 -----------
 Documentation/target/tcmu-design.rst     | 405 +++++++++++++++++++++++++++++++
 Documentation/target/tcmu-design.txt     | 381 -----------------------------
 6 files changed, 584 insertions(+), 526 deletions(-)
 create mode 100644 Documentation/target/index.rst
 create mode 100644 Documentation/target/scripts.rst
 create mode 100644 Documentation/target/tcm_mod_builder.rst
 delete mode 100644 Documentation/target/tcm_mod_builder.txt
 create mode 100644 Documentation/target/tcmu-design.rst
 delete mode 100644 Documentation/target/tcmu-design.txt

(limited to 'Documentation')

diff --git a/Documentation/target/index.rst b/Documentation/target/index.rst
new file mode 100644
index 000000000000..b68f48982392
--- /dev/null
+++ b/Documentation/target/index.rst
@@ -0,0 +1,19 @@
+:orphan:
+
+==================
+TCM Virtual Device
+==================
+
+.. toctree::
+    :maxdepth: 1
+
+    tcmu-design
+    tcm_mod_builder
+    scripts
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/target/scripts.rst b/Documentation/target/scripts.rst
new file mode 100644
index 000000000000..172d42b522e4
--- /dev/null
+++ b/Documentation/target/scripts.rst
@@ -0,0 +1,11 @@
+TCM mod builder script
+----------------------
+
+.. literalinclude:: tcm_mod_builder.py
+    :language: perl
+
+Target export device script
+---------------------------
+
+.. literalinclude:: target-export-device
+    :language: shell
diff --git a/Documentation/target/tcm_mod_builder.rst b/Documentation/target/tcm_mod_builder.rst
new file mode 100644
index 000000000000..9bfc9822e2bd
--- /dev/null
+++ b/Documentation/target/tcm_mod_builder.rst
@@ -0,0 +1,149 @@
+=========================================
+The TCM v4 fabric module script generator
+=========================================
+
+Greetings all,
+
+This document is intended to be a mini-HOWTO for using the tcm_mod_builder.py
+script to generate a brand new functional TCM v4 fabric .ko module of your very own,
+that once built can be immediately be loaded to start access the new TCM/ConfigFS
+fabric skeleton, by simply using::
+
+	modprobe $TCM_NEW_MOD
+	mkdir -p /sys/kernel/config/target/$TCM_NEW_MOD
+
+This script will create a new drivers/target/$TCM_NEW_MOD/, and will do the following
+
+	1) Generate new API callers for drivers/target/target_core_fabric_configs.c logic
+	   ->make_tpg(), ->drop_tpg(), ->make_wwn(), ->drop_wwn().  These are created
+	   into $TCM_NEW_MOD/$TCM_NEW_MOD_configfs.c
+	2) Generate basic infrastructure for loading/unloading LKMs and TCM/ConfigFS fabric module
+	   using a skeleton struct target_core_fabric_ops API template.
+	3) Based on user defined T10 Proto_Ident for the new fabric module being built,
+	   the TransportID / Initiator and Target WWPN related handlers for
+	   SPC-3 persistent reservation are automatically generated in $TCM_NEW_MOD/$TCM_NEW_MOD_fabric.c
+	   using drivers/target/target_core_fabric_lib.c logic.
+	4) NOP API calls for all other Data I/O path and fabric dependent attribute logic
+	   in $TCM_NEW_MOD/$TCM_NEW_MOD_fabric.c
+
+tcm_mod_builder.py depends upon the mandatory '-p $PROTO_IDENT' and '-m
+$FABRIC_MOD_name' parameters, and actually running the script looks like::
+
+  target:/mnt/sdb/lio-core-2.6.git/Documentation/target# python tcm_mod_builder.py -p iSCSI -m tcm_nab5000
+  tcm_dir: /mnt/sdb/lio-core-2.6.git/Documentation/target/../../
+  Set fabric_mod_name: tcm_nab5000
+  Set fabric_mod_dir:
+  /mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000
+  Using proto_ident: iSCSI
+  Creating fabric_mod_dir:
+  /mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000
+  Writing file:
+  /mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/tcm_nab5000_base.h
+  Using tcm_mod_scan_fabric_ops:
+  /mnt/sdb/lio-core-2.6.git/Documentation/target/../../include/target/target_core_fabric_ops.h
+  Writing file:
+  /mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/tcm_nab5000_fabric.c
+  Writing file:
+  /mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/tcm_nab5000_fabric.h
+  Writing file:
+  /mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/tcm_nab5000_configfs.c
+  Writing file:
+  /mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/Kbuild
+  Writing file:
+  /mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/Kconfig
+  Would you like to add tcm_nab5000to drivers/target/Kbuild..? [yes,no]: yes
+  Would you like to add tcm_nab5000to drivers/target/Kconfig..? [yes,no]: yes
+
+At the end of tcm_mod_builder.py. the script will ask to add the following
+line to drivers/target/Kbuild::
+
+	obj-$(CONFIG_TCM_NAB5000)       += tcm_nab5000/
+
+and the same for drivers/target/Kconfig::
+
+	source "drivers/target/tcm_nab5000/Kconfig"
+
+#) Run 'make menuconfig' and select the new CONFIG_TCM_NAB5000 item::
+
+	<M>   TCM_NAB5000 fabric module
+
+#) Build using 'make modules', once completed you will have::
+
+    target:/mnt/sdb/lio-core-2.6.git# ls -la drivers/target/tcm_nab5000/
+    total 1348
+    drwxr-xr-x 2 root root   4096 2010-10-05 03:23 .
+    drwxr-xr-x 9 root root   4096 2010-10-05 03:22 ..
+    -rw-r--r-- 1 root root    282 2010-10-05 03:22 Kbuild
+    -rw-r--r-- 1 root root    171 2010-10-05 03:22 Kconfig
+    -rw-r--r-- 1 root root     49 2010-10-05 03:23 modules.order
+    -rw-r--r-- 1 root root    738 2010-10-05 03:22 tcm_nab5000_base.h
+    -rw-r--r-- 1 root root   9096 2010-10-05 03:22 tcm_nab5000_configfs.c
+    -rw-r--r-- 1 root root 191200 2010-10-05 03:23 tcm_nab5000_configfs.o
+    -rw-r--r-- 1 root root  40504 2010-10-05 03:23 .tcm_nab5000_configfs.o.cmd
+    -rw-r--r-- 1 root root   5414 2010-10-05 03:22 tcm_nab5000_fabric.c
+    -rw-r--r-- 1 root root   2016 2010-10-05 03:22 tcm_nab5000_fabric.h
+    -rw-r--r-- 1 root root 190932 2010-10-05 03:23 tcm_nab5000_fabric.o
+    -rw-r--r-- 1 root root  40713 2010-10-05 03:23 .tcm_nab5000_fabric.o.cmd
+    -rw-r--r-- 1 root root 401861 2010-10-05 03:23 tcm_nab5000.ko
+    -rw-r--r-- 1 root root    265 2010-10-05 03:23 .tcm_nab5000.ko.cmd
+    -rw-r--r-- 1 root root    459 2010-10-05 03:23 tcm_nab5000.mod.c
+    -rw-r--r-- 1 root root  23896 2010-10-05 03:23 tcm_nab5000.mod.o
+    -rw-r--r-- 1 root root  22655 2010-10-05 03:23 .tcm_nab5000.mod.o.cmd
+    -rw-r--r-- 1 root root 379022 2010-10-05 03:23 tcm_nab5000.o
+    -rw-r--r-- 1 root root    211 2010-10-05 03:23 .tcm_nab5000.o.cmd
+
+#) Load the new module, create a lun_0 configfs group, and add new TCM Core
+   IBLOCK backstore symlink to port::
+
+    target:/mnt/sdb/lio-core-2.6.git# insmod drivers/target/tcm_nab5000.ko
+    target:/mnt/sdb/lio-core-2.6.git# mkdir -p /sys/kernel/config/target/nab5000/iqn.foo/tpgt_1/lun/lun_0
+    target:/mnt/sdb/lio-core-2.6.git# cd /sys/kernel/config/target/nab5000/iqn.foo/tpgt_1/lun/lun_0/
+    target:/sys/kernel/config/target/nab5000/iqn.foo/tpgt_1/lun/lun_0# ln -s /sys/kernel/config/target/core/iblock_0/lvm_test0 nab5000_port
+
+    target:/sys/kernel/config/target/nab5000/iqn.foo/tpgt_1/lun/lun_0# cd -
+    target:/mnt/sdb/lio-core-2.6.git# tree /sys/kernel/config/target/nab5000/
+    /sys/kernel/config/target/nab5000/
+    |-- discovery_auth
+    |-- iqn.foo
+    |   `-- tpgt_1
+    |       |-- acls
+    |       |-- attrib
+    |       |-- lun
+    |       |   `-- lun_0
+    |       |       |-- alua_tg_pt_gp
+    |       |       |-- alua_tg_pt_offline
+    |       |       |-- alua_tg_pt_status
+    |       |       |-- alua_tg_pt_write_md
+    |	|	`-- nab5000_port -> ../../../../../../target/core/iblock_0/lvm_test0
+    |       |-- np
+    |       `-- param
+    `-- version
+
+    target:/mnt/sdb/lio-core-2.6.git# lsmod
+    Module                  Size  Used by
+    tcm_nab5000             3935  4
+    iscsi_target_mod      193211  0
+    target_core_stgt        8090  0
+    target_core_pscsi      11122  1
+    target_core_file        9172  2
+    target_core_iblock      9280  1
+    target_core_mod       228575  31
+    tcm_nab5000,iscsi_target_mod,target_core_stgt,target_core_pscsi,target_core_file,target_core_iblock
+    libfc                  73681  0
+    scsi_debug             56265  0
+    scsi_tgt                8666  1 target_core_stgt
+    configfs               20644  2 target_core_mod
+
+----------------------------------------------------------------------
+
+Future TODO items
+=================
+
+	1) Add more T10 proto_idents
+	2) Make tcm_mod_dump_fabric_ops() smarter and generate function pointer
+	   defs directly from include/target/target_core_fabric_ops.h:struct target_core_fabric_ops
+	   structure members.
+
+October 5th, 2010
+
+Nicholas A. Bellinger <nab@linux-iscsi.org>
diff --git a/Documentation/target/tcm_mod_builder.txt b/Documentation/target/tcm_mod_builder.txt
deleted file mode 100644
index ae22f7005540..000000000000
--- a/Documentation/target/tcm_mod_builder.txt
+++ /dev/null
@@ -1,145 +0,0 @@
->>>>>>>>>> The TCM v4 fabric module script generator <<<<<<<<<<
-
-Greetings all,
-
-This document is intended to be a mini-HOWTO for using the tcm_mod_builder.py
-script to generate a brand new functional TCM v4 fabric .ko module of your very own,
-that once built can be immediately be loaded to start access the new TCM/ConfigFS
-fabric skeleton, by simply using:
-
-	modprobe $TCM_NEW_MOD
-	mkdir -p /sys/kernel/config/target/$TCM_NEW_MOD
-
-This script will create a new drivers/target/$TCM_NEW_MOD/, and will do the following
-
-	*) Generate new API callers for drivers/target/target_core_fabric_configs.c logic
-	   ->make_tpg(), ->drop_tpg(), ->make_wwn(), ->drop_wwn().  These are created
-	   into $TCM_NEW_MOD/$TCM_NEW_MOD_configfs.c
-	*) Generate basic infrastructure for loading/unloading LKMs and TCM/ConfigFS fabric module
-	   using a skeleton struct target_core_fabric_ops API template.
-	*) Based on user defined T10 Proto_Ident for the new fabric module being built,
-	   the TransportID / Initiator and Target WWPN related handlers for
-	   SPC-3 persistent reservation are automatically generated in $TCM_NEW_MOD/$TCM_NEW_MOD_fabric.c
-	   using drivers/target/target_core_fabric_lib.c logic.
-	*) NOP API calls for all other Data I/O path and fabric dependent attribute logic
-	   in $TCM_NEW_MOD/$TCM_NEW_MOD_fabric.c
-
-tcm_mod_builder.py depends upon the mandatory '-p $PROTO_IDENT' and '-m
-$FABRIC_MOD_name' parameters, and actually running the script looks like:
-
-target:/mnt/sdb/lio-core-2.6.git/Documentation/target# python tcm_mod_builder.py -p iSCSI -m tcm_nab5000
-tcm_dir: /mnt/sdb/lio-core-2.6.git/Documentation/target/../../
-Set fabric_mod_name: tcm_nab5000
-Set fabric_mod_dir:
-/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000
-Using proto_ident: iSCSI
-Creating fabric_mod_dir:
-/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000
-Writing file:
-/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/tcm_nab5000_base.h
-Using tcm_mod_scan_fabric_ops:
-/mnt/sdb/lio-core-2.6.git/Documentation/target/../../include/target/target_core_fabric_ops.h
-Writing file:
-/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/tcm_nab5000_fabric.c
-Writing file:
-/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/tcm_nab5000_fabric.h
-Writing file:
-/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/tcm_nab5000_configfs.c
-Writing file:
-/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/Kbuild
-Writing file:
-/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/Kconfig
-Would you like to add tcm_nab5000to drivers/target/Kbuild..? [yes,no]: yes
-Would you like to add tcm_nab5000to drivers/target/Kconfig..? [yes,no]: yes
-
-At the end of tcm_mod_builder.py. the script will ask to add the following
-line to drivers/target/Kbuild:
-
-	obj-$(CONFIG_TCM_NAB5000)       += tcm_nab5000/
-
-and the same for drivers/target/Kconfig:
-
-	source "drivers/target/tcm_nab5000/Kconfig"
-
-*) Run 'make menuconfig' and select the new CONFIG_TCM_NAB5000 item:
-
-	<M>   TCM_NAB5000 fabric module
-
-*) Build using 'make modules', once completed you will have:
-
-target:/mnt/sdb/lio-core-2.6.git# ls -la drivers/target/tcm_nab5000/
-total 1348
-drwxr-xr-x 2 root root   4096 2010-10-05 03:23 .
-drwxr-xr-x 9 root root   4096 2010-10-05 03:22 ..
--rw-r--r-- 1 root root    282 2010-10-05 03:22 Kbuild
--rw-r--r-- 1 root root    171 2010-10-05 03:22 Kconfig
--rw-r--r-- 1 root root     49 2010-10-05 03:23 modules.order
--rw-r--r-- 1 root root    738 2010-10-05 03:22 tcm_nab5000_base.h
--rw-r--r-- 1 root root   9096 2010-10-05 03:22 tcm_nab5000_configfs.c
--rw-r--r-- 1 root root 191200 2010-10-05 03:23 tcm_nab5000_configfs.o
--rw-r--r-- 1 root root  40504 2010-10-05 03:23 .tcm_nab5000_configfs.o.cmd
--rw-r--r-- 1 root root   5414 2010-10-05 03:22 tcm_nab5000_fabric.c
--rw-r--r-- 1 root root   2016 2010-10-05 03:22 tcm_nab5000_fabric.h
--rw-r--r-- 1 root root 190932 2010-10-05 03:23 tcm_nab5000_fabric.o
--rw-r--r-- 1 root root  40713 2010-10-05 03:23 .tcm_nab5000_fabric.o.cmd
--rw-r--r-- 1 root root 401861 2010-10-05 03:23 tcm_nab5000.ko
--rw-r--r-- 1 root root    265 2010-10-05 03:23 .tcm_nab5000.ko.cmd
--rw-r--r-- 1 root root    459 2010-10-05 03:23 tcm_nab5000.mod.c
--rw-r--r-- 1 root root  23896 2010-10-05 03:23 tcm_nab5000.mod.o
--rw-r--r-- 1 root root  22655 2010-10-05 03:23 .tcm_nab5000.mod.o.cmd
--rw-r--r-- 1 root root 379022 2010-10-05 03:23 tcm_nab5000.o
--rw-r--r-- 1 root root    211 2010-10-05 03:23 .tcm_nab5000.o.cmd
-
-*) Load the new module, create a lun_0 configfs group, and add new TCM Core
-   IBLOCK backstore symlink to port:
-
-target:/mnt/sdb/lio-core-2.6.git# insmod drivers/target/tcm_nab5000.ko
-target:/mnt/sdb/lio-core-2.6.git# mkdir -p /sys/kernel/config/target/nab5000/iqn.foo/tpgt_1/lun/lun_0
-target:/mnt/sdb/lio-core-2.6.git# cd /sys/kernel/config/target/nab5000/iqn.foo/tpgt_1/lun/lun_0/
-target:/sys/kernel/config/target/nab5000/iqn.foo/tpgt_1/lun/lun_0# ln -s /sys/kernel/config/target/core/iblock_0/lvm_test0 nab5000_port
-
-target:/sys/kernel/config/target/nab5000/iqn.foo/tpgt_1/lun/lun_0# cd -
-target:/mnt/sdb/lio-core-2.6.git# tree /sys/kernel/config/target/nab5000/
-/sys/kernel/config/target/nab5000/
-|-- discovery_auth
-|-- iqn.foo
-|   `-- tpgt_1
-|       |-- acls
-|       |-- attrib
-|       |-- lun
-|       |   `-- lun_0
-|       |       |-- alua_tg_pt_gp
-|       |       |-- alua_tg_pt_offline
-|       |       |-- alua_tg_pt_status
-|       |       |-- alua_tg_pt_write_md
-|	|	`-- nab5000_port -> ../../../../../../target/core/iblock_0/lvm_test0
-|       |-- np
-|       `-- param
-`-- version
-
-target:/mnt/sdb/lio-core-2.6.git# lsmod
-Module                  Size  Used by
-tcm_nab5000             3935  4
-iscsi_target_mod      193211  0
-target_core_stgt        8090  0
-target_core_pscsi      11122  1
-target_core_file        9172  2
-target_core_iblock      9280  1
-target_core_mod       228575  31
-tcm_nab5000,iscsi_target_mod,target_core_stgt,target_core_pscsi,target_core_file,target_core_iblock
-libfc                  73681  0
-scsi_debug             56265  0
-scsi_tgt                8666  1 target_core_stgt
-configfs               20644  2 target_core_mod
-
-----------------------------------------------------------------------
-
-Future TODO items:
-
-	*) Add more T10 proto_idents
-	*) Make tcm_mod_dump_fabric_ops() smarter and generate function pointer
-	   defs directly from include/target/target_core_fabric_ops.h:struct target_core_fabric_ops
-	   structure members.
-
-October 5th, 2010
-Nicholas A. Bellinger <nab@linux-iscsi.org>
diff --git a/Documentation/target/tcmu-design.rst b/Documentation/target/tcmu-design.rst
new file mode 100644
index 000000000000..a7b426707bf6
--- /dev/null
+++ b/Documentation/target/tcmu-design.rst
@@ -0,0 +1,405 @@
+====================
+TCM Userspace Design
+====================
+
+
+.. Contents:
+
+   1) TCM Userspace Design
+     a) Background
+     b) Benefits
+     c) Design constraints
+     d) Implementation overview
+        i. Mailbox
+        ii. Command ring
+        iii. Data Area
+     e) Device discovery
+     f) Device events
+     g) Other contingencies
+   2) Writing a user pass-through handler
+     a) Discovering and configuring TCMU uio devices
+     b) Waiting for events on the device(s)
+     c) Managing the command ring
+   3) A final note
+
+
+TCM Userspace Design
+====================
+
+TCM is another name for LIO, an in-kernel iSCSI target (server).
+Existing TCM targets run in the kernel.  TCMU (TCM in Userspace)
+allows userspace programs to be written which act as iSCSI targets.
+This document describes the design.
+
+The existing kernel provides modules for different SCSI transport
+protocols.  TCM also modularizes the data storage.  There are existing
+modules for file, block device, RAM or using another SCSI device as
+storage.  These are called "backstores" or "storage engines".  These
+built-in modules are implemented entirely as kernel code.
+
+Background
+----------
+
+In addition to modularizing the transport protocol used for carrying
+SCSI commands ("fabrics"), the Linux kernel target, LIO, also modularizes
+the actual data storage as well. These are referred to as "backstores"
+or "storage engines". The target comes with backstores that allow a
+file, a block device, RAM, or another SCSI device to be used for the
+local storage needed for the exported SCSI LUN. Like the rest of LIO,
+these are implemented entirely as kernel code.
+
+These backstores cover the most common use cases, but not all. One new
+use case that other non-kernel target solutions, such as tgt, are able
+to support is using Gluster's GLFS or Ceph's RBD as a backstore. The
+target then serves as a translator, allowing initiators to store data
+in these non-traditional networked storage systems, while still only
+using standard protocols themselves.
+
+If the target is a userspace process, supporting these is easy. tgt,
+for example, needs only a small adapter module for each, because the
+modules just use the available userspace libraries for RBD and GLFS.
+
+Adding support for these backstores in LIO is considerably more
+difficult, because LIO is entirely kernel code. Instead of undertaking
+the significant work to port the GLFS or RBD APIs and protocols to the
+kernel, another approach is to create a userspace pass-through
+backstore for LIO, "TCMU".
+
+
+Benefits
+--------
+
+In addition to allowing relatively easy support for RBD and GLFS, TCMU
+will also allow easier development of new backstores. TCMU combines
+with the LIO loopback fabric to become something similar to FUSE
+(Filesystem in Userspace), but at the SCSI layer instead of the
+filesystem layer. A SUSE, if you will.
+
+The disadvantage is there are more distinct components to configure, and
+potentially to malfunction. This is unavoidable, but hopefully not
+fatal if we're careful to keep things as simple as possible.
+
+Design constraints
+------------------
+
+- Good performance: high throughput, low latency
+- Cleanly handle if userspace:
+
+   1) never attaches
+   2) hangs
+   3) dies
+   4) misbehaves
+
+- Allow future flexibility in user & kernel implementations
+- Be reasonably memory-efficient
+- Simple to configure & run
+- Simple to write a userspace backend
+
+
+Implementation overview
+-----------------------
+
+The core of the TCMU interface is a memory region that is shared
+between kernel and userspace. Within this region is: a control area
+(mailbox); a lockless producer/consumer circular buffer for commands
+to be passed up, and status returned; and an in/out data buffer area.
+
+TCMU uses the pre-existing UIO subsystem. UIO allows device driver
+development in userspace, and this is conceptually very close to the
+TCMU use case, except instead of a physical device, TCMU implements a
+memory-mapped layout designed for SCSI commands. Using UIO also
+benefits TCMU by handling device introspection (e.g. a way for
+userspace to determine how large the shared region is) and signaling
+mechanisms in both directions.
+
+There are no embedded pointers in the memory region. Everything is
+expressed as an offset from the region's starting address. This allows
+the ring to still work if the user process dies and is restarted with
+the region mapped at a different virtual address.
+
+See target_core_user.h for the struct definitions.
+
+The Mailbox
+-----------
+
+The mailbox is always at the start of the shared memory region, and
+contains a version, details about the starting offset and size of the
+command ring, and head and tail pointers to be used by the kernel and
+userspace (respectively) to put commands on the ring, and indicate
+when the commands are completed.
+
+version - 1 (userspace should abort if otherwise)
+
+flags:
+    - TCMU_MAILBOX_FLAG_CAP_OOOC:
+	indicates out-of-order completion is supported.
+	See "The Command Ring" for details.
+
+cmdr_off
+	The offset of the start of the command ring from the start
+	of the memory region, to account for the mailbox size.
+cmdr_size
+	The size of the command ring. This does *not* need to be a
+	power of two.
+cmd_head
+	Modified by the kernel to indicate when a command has been
+	placed on the ring.
+cmd_tail
+	Modified by userspace to indicate when it has completed
+	processing of a command.
+
+The Command Ring
+----------------
+
+Commands are placed on the ring by the kernel incrementing
+mailbox.cmd_head by the size of the command, modulo cmdr_size, and
+then signaling userspace via uio_event_notify(). Once the command is
+completed, userspace updates mailbox.cmd_tail in the same way and
+signals the kernel via a 4-byte write(). When cmd_head equals
+cmd_tail, the ring is empty -- no commands are currently waiting to be
+processed by userspace.
+
+TCMU commands are 8-byte aligned. They start with a common header
+containing "len_op", a 32-bit value that stores the length, as well as
+the opcode in the lowest unused bits. It also contains cmd_id and
+flags fields for setting by the kernel (kflags) and userspace
+(uflags).
+
+Currently only two opcodes are defined, TCMU_OP_CMD and TCMU_OP_PAD.
+
+When the opcode is CMD, the entry in the command ring is a struct
+tcmu_cmd_entry. Userspace finds the SCSI CDB (Command Data Block) via
+tcmu_cmd_entry.req.cdb_off. This is an offset from the start of the
+overall shared memory region, not the entry. The data in/out buffers
+are accessible via tht req.iov[] array. iov_cnt contains the number of
+entries in iov[] needed to describe either the Data-In or Data-Out
+buffers. For bidirectional commands, iov_cnt specifies how many iovec
+entries cover the Data-Out area, and iov_bidi_cnt specifies how many
+iovec entries immediately after that in iov[] cover the Data-In
+area. Just like other fields, iov.iov_base is an offset from the start
+of the region.
+
+When completing a command, userspace sets rsp.scsi_status, and
+rsp.sense_buffer if necessary. Userspace then increments
+mailbox.cmd_tail by entry.hdr.length (mod cmdr_size) and signals the
+kernel via the UIO method, a 4-byte write to the file descriptor.
+
+If TCMU_MAILBOX_FLAG_CAP_OOOC is set for mailbox->flags, kernel is
+capable of handling out-of-order completions. In this case, userspace can
+handle command in different order other than original. Since kernel would
+still process the commands in the same order it appeared in the command
+ring, userspace need to update the cmd->id when completing the
+command(a.k.a steal the original command's entry).
+
+When the opcode is PAD, userspace only updates cmd_tail as above --
+it's a no-op. (The kernel inserts PAD entries to ensure each CMD entry
+is contiguous within the command ring.)
+
+More opcodes may be added in the future. If userspace encounters an
+opcode it does not handle, it must set UNKNOWN_OP bit (bit 0) in
+hdr.uflags, update cmd_tail, and proceed with processing additional
+commands, if any.
+
+The Data Area
+-------------
+
+This is shared-memory space after the command ring. The organization
+of this area is not defined in the TCMU interface, and userspace
+should access only the parts referenced by pending iovs.
+
+
+Device Discovery
+----------------
+
+Other devices may be using UIO besides TCMU. Unrelated user processes
+may also be handling different sets of TCMU devices. TCMU userspace
+processes must find their devices by scanning sysfs
+class/uio/uio*/name. For TCMU devices, these names will be of the
+format::
+
+	tcm-user/<hba_num>/<device_name>/<subtype>/<path>
+
+where "tcm-user" is common for all TCMU-backed UIO devices. <hba_num>
+and <device_name> allow userspace to find the device's path in the
+kernel target's configfs tree. Assuming the usual mount point, it is
+found at::
+
+	/sys/kernel/config/target/core/user_<hba_num>/<device_name>
+
+This location contains attributes such as "hw_block_size", that
+userspace needs to know for correct operation.
+
+<subtype> will be a userspace-process-unique string to identify the
+TCMU device as expecting to be backed by a certain handler, and <path>
+will be an additional handler-specific string for the user process to
+configure the device, if needed. The name cannot contain ':', due to
+LIO limitations.
+
+For all devices so discovered, the user handler opens /dev/uioX and
+calls mmap()::
+
+	mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0)
+
+where size must be equal to the value read from
+/sys/class/uio/uioX/maps/map0/size.
+
+
+Device Events
+-------------
+
+If a new device is added or removed, a notification will be broadcast
+over netlink, using a generic netlink family name of "TCM-USER" and a
+multicast group named "config". This will include the UIO name as
+described in the previous section, as well as the UIO minor
+number. This should allow userspace to identify both the UIO device and
+the LIO device, so that after determining the device is supported
+(based on subtype) it can take the appropriate action.
+
+
+Other contingencies
+-------------------
+
+Userspace handler process never attaches:
+
+- TCMU will post commands, and then abort them after a timeout period
+  (30 seconds.)
+
+Userspace handler process is killed:
+
+- It is still possible to restart and re-connect to TCMU
+  devices. Command ring is preserved. However, after the timeout period,
+  the kernel will abort pending tasks.
+
+Userspace handler process hangs:
+
+- The kernel will abort pending tasks after a timeout period.
+
+Userspace handler process is malicious:
+
+- The process can trivially break the handling of devices it controls,
+  but should not be able to access kernel memory outside its shared
+  memory areas.
+
+
+Writing a user pass-through handler (with example code)
+=======================================================
+
+A user process handing a TCMU device must support the following:
+
+a) Discovering and configuring TCMU uio devices
+b) Waiting for events on the device(s)
+c) Managing the command ring: Parsing operations and commands,
+   performing work as needed, setting response fields (scsi_status and
+   possibly sense_buffer), updating cmd_tail, and notifying the kernel
+   that work has been finished
+
+First, consider instead writing a plugin for tcmu-runner. tcmu-runner
+implements all of this, and provides a higher-level API for plugin
+authors.
+
+TCMU is designed so that multiple unrelated processes can manage TCMU
+devices separately. All handlers should make sure to only open their
+devices, based opon a known subtype string.
+
+a) Discovering and configuring TCMU UIO devices::
+
+      /* error checking omitted for brevity */
+
+      int fd, dev_fd;
+      char buf[256];
+      unsigned long long map_len;
+      void *map;
+
+      fd = open("/sys/class/uio/uio0/name", O_RDONLY);
+      ret = read(fd, buf, sizeof(buf));
+      close(fd);
+      buf[ret-1] = '\0'; /* null-terminate and chop off the \n */
+
+      /* we only want uio devices whose name is a format we expect */
+      if (strncmp(buf, "tcm-user", 8))
+	exit(-1);
+
+      /* Further checking for subtype also needed here */
+
+      fd = open(/sys/class/uio/%s/maps/map0/size, O_RDONLY);
+      ret = read(fd, buf, sizeof(buf));
+      close(fd);
+      str_buf[ret-1] = '\0'; /* null-terminate and chop off the \n */
+
+      map_len = strtoull(buf, NULL, 0);
+
+      dev_fd = open("/dev/uio0", O_RDWR);
+      map = mmap(NULL, map_len, PROT_READ|PROT_WRITE, MAP_SHARED, dev_fd, 0);
+
+
+      b) Waiting for events on the device(s)
+
+      while (1) {
+        char buf[4];
+
+        int ret = read(dev_fd, buf, 4); /* will block */
+
+        handle_device_events(dev_fd, map);
+      }
+
+
+c) Managing the command ring::
+
+      #include <linux/target_core_user.h>
+
+      int handle_device_events(int fd, void *map)
+      {
+        struct tcmu_mailbox *mb = map;
+        struct tcmu_cmd_entry *ent = (void *) mb + mb->cmdr_off + mb->cmd_tail;
+        int did_some_work = 0;
+
+        /* Process events from cmd ring until we catch up with cmd_head */
+        while (ent != (void *)mb + mb->cmdr_off + mb->cmd_head) {
+
+          if (tcmu_hdr_get_op(ent->hdr.len_op) == TCMU_OP_CMD) {
+            uint8_t *cdb = (void *)mb + ent->req.cdb_off;
+            bool success = true;
+
+            /* Handle command here. */
+            printf("SCSI opcode: 0x%x\n", cdb[0]);
+
+            /* Set response fields */
+            if (success)
+              ent->rsp.scsi_status = SCSI_NO_SENSE;
+            else {
+              /* Also fill in rsp->sense_buffer here */
+              ent->rsp.scsi_status = SCSI_CHECK_CONDITION;
+            }
+          }
+          else if (tcmu_hdr_get_op(ent->hdr.len_op) != TCMU_OP_PAD) {
+            /* Tell the kernel we didn't handle unknown opcodes */
+            ent->hdr.uflags |= TCMU_UFLAG_UNKNOWN_OP;
+          }
+          else {
+            /* Do nothing for PAD entries except update cmd_tail */
+          }
+
+          /* update cmd_tail */
+          mb->cmd_tail = (mb->cmd_tail + tcmu_hdr_get_len(&ent->hdr)) % mb->cmdr_size;
+          ent = (void *) mb + mb->cmdr_off + mb->cmd_tail;
+          did_some_work = 1;
+        }
+
+        /* Notify the kernel that work has been finished */
+        if (did_some_work) {
+          uint32_t buf = 0;
+
+          write(fd, &buf, 4);
+        }
+
+        return 0;
+      }
+
+
+A final note
+============
+
+Please be careful to return codes as defined by the SCSI
+specifications. These are different than some values defined in the
+scsi/scsi.h include file. For example, CHECK CONDITION's status code
+is 2, not 1.
diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt
deleted file mode 100644
index 4cebc1ebf99a..000000000000
--- a/Documentation/target/tcmu-design.txt
+++ /dev/null
@@ -1,381 +0,0 @@
-Contents:
-
-1) TCM Userspace Design
-  a) Background
-  b) Benefits
-  c) Design constraints
-  d) Implementation overview
-     i. Mailbox
-     ii. Command ring
-     iii. Data Area
-  e) Device discovery
-  f) Device events
-  g) Other contingencies
-2) Writing a user pass-through handler
-  a) Discovering and configuring TCMU uio devices
-  b) Waiting for events on the device(s)
-  c) Managing the command ring
-3) A final note
-
-
-TCM Userspace Design
---------------------
-
-TCM is another name for LIO, an in-kernel iSCSI target (server).
-Existing TCM targets run in the kernel.  TCMU (TCM in Userspace)
-allows userspace programs to be written which act as iSCSI targets.
-This document describes the design.
-
-The existing kernel provides modules for different SCSI transport
-protocols.  TCM also modularizes the data storage.  There are existing
-modules for file, block device, RAM or using another SCSI device as
-storage.  These are called "backstores" or "storage engines".  These
-built-in modules are implemented entirely as kernel code.
-
-Background:
-
-In addition to modularizing the transport protocol used for carrying
-SCSI commands ("fabrics"), the Linux kernel target, LIO, also modularizes
-the actual data storage as well. These are referred to as "backstores"
-or "storage engines". The target comes with backstores that allow a
-file, a block device, RAM, or another SCSI device to be used for the
-local storage needed for the exported SCSI LUN. Like the rest of LIO,
-these are implemented entirely as kernel code.
-
-These backstores cover the most common use cases, but not all. One new
-use case that other non-kernel target solutions, such as tgt, are able
-to support is using Gluster's GLFS or Ceph's RBD as a backstore. The
-target then serves as a translator, allowing initiators to store data
-in these non-traditional networked storage systems, while still only
-using standard protocols themselves.
-
-If the target is a userspace process, supporting these is easy. tgt,
-for example, needs only a small adapter module for each, because the
-modules just use the available userspace libraries for RBD and GLFS.
-
-Adding support for these backstores in LIO is considerably more
-difficult, because LIO is entirely kernel code. Instead of undertaking
-the significant work to port the GLFS or RBD APIs and protocols to the
-kernel, another approach is to create a userspace pass-through
-backstore for LIO, "TCMU".
-
-
-Benefits:
-
-In addition to allowing relatively easy support for RBD and GLFS, TCMU
-will also allow easier development of new backstores. TCMU combines
-with the LIO loopback fabric to become something similar to FUSE
-(Filesystem in Userspace), but at the SCSI layer instead of the
-filesystem layer. A SUSE, if you will.
-
-The disadvantage is there are more distinct components to configure, and
-potentially to malfunction. This is unavoidable, but hopefully not
-fatal if we're careful to keep things as simple as possible.
-
-Design constraints:
-
-- Good performance: high throughput, low latency
-- Cleanly handle if userspace:
-   1) never attaches
-   2) hangs
-   3) dies
-   4) misbehaves
-- Allow future flexibility in user & kernel implementations
-- Be reasonably memory-efficient
-- Simple to configure & run
-- Simple to write a userspace backend
-
-
-Implementation overview:
-
-The core of the TCMU interface is a memory region that is shared
-between kernel and userspace. Within this region is: a control area
-(mailbox); a lockless producer/consumer circular buffer for commands
-to be passed up, and status returned; and an in/out data buffer area.
-
-TCMU uses the pre-existing UIO subsystem. UIO allows device driver
-development in userspace, and this is conceptually very close to the
-TCMU use case, except instead of a physical device, TCMU implements a
-memory-mapped layout designed for SCSI commands. Using UIO also
-benefits TCMU by handling device introspection (e.g. a way for
-userspace to determine how large the shared region is) and signaling
-mechanisms in both directions.
-
-There are no embedded pointers in the memory region. Everything is
-expressed as an offset from the region's starting address. This allows
-the ring to still work if the user process dies and is restarted with
-the region mapped at a different virtual address.
-
-See target_core_user.h for the struct definitions.
-
-The Mailbox:
-
-The mailbox is always at the start of the shared memory region, and
-contains a version, details about the starting offset and size of the
-command ring, and head and tail pointers to be used by the kernel and
-userspace (respectively) to put commands on the ring, and indicate
-when the commands are completed.
-
-version - 1 (userspace should abort if otherwise)
-flags:
-- TCMU_MAILBOX_FLAG_CAP_OOOC: indicates out-of-order completion is
-  supported.  See "The Command Ring" for details.
-cmdr_off - The offset of the start of the command ring from the start
-of the memory region, to account for the mailbox size.
-cmdr_size - The size of the command ring. This does *not* need to be a
-power of two.
-cmd_head - Modified by the kernel to indicate when a command has been
-placed on the ring.
-cmd_tail - Modified by userspace to indicate when it has completed
-processing of a command.
-
-The Command Ring:
-
-Commands are placed on the ring by the kernel incrementing
-mailbox.cmd_head by the size of the command, modulo cmdr_size, and
-then signaling userspace via uio_event_notify(). Once the command is
-completed, userspace updates mailbox.cmd_tail in the same way and
-signals the kernel via a 4-byte write(). When cmd_head equals
-cmd_tail, the ring is empty -- no commands are currently waiting to be
-processed by userspace.
-
-TCMU commands are 8-byte aligned. They start with a common header
-containing "len_op", a 32-bit value that stores the length, as well as
-the opcode in the lowest unused bits. It also contains cmd_id and
-flags fields for setting by the kernel (kflags) and userspace
-(uflags).
-
-Currently only two opcodes are defined, TCMU_OP_CMD and TCMU_OP_PAD.
-
-When the opcode is CMD, the entry in the command ring is a struct
-tcmu_cmd_entry. Userspace finds the SCSI CDB (Command Data Block) via
-tcmu_cmd_entry.req.cdb_off. This is an offset from the start of the
-overall shared memory region, not the entry. The data in/out buffers
-are accessible via tht req.iov[] array. iov_cnt contains the number of
-entries in iov[] needed to describe either the Data-In or Data-Out
-buffers. For bidirectional commands, iov_cnt specifies how many iovec
-entries cover the Data-Out area, and iov_bidi_cnt specifies how many
-iovec entries immediately after that in iov[] cover the Data-In
-area. Just like other fields, iov.iov_base is an offset from the start
-of the region.
-
-When completing a command, userspace sets rsp.scsi_status, and
-rsp.sense_buffer if necessary. Userspace then increments
-mailbox.cmd_tail by entry.hdr.length (mod cmdr_size) and signals the
-kernel via the UIO method, a 4-byte write to the file descriptor.
-
-If TCMU_MAILBOX_FLAG_CAP_OOOC is set for mailbox->flags, kernel is
-capable of handling out-of-order completions. In this case, userspace can
-handle command in different order other than original. Since kernel would
-still process the commands in the same order it appeared in the command
-ring, userspace need to update the cmd->id when completing the
-command(a.k.a steal the original command's entry).
-
-When the opcode is PAD, userspace only updates cmd_tail as above --
-it's a no-op. (The kernel inserts PAD entries to ensure each CMD entry
-is contiguous within the command ring.)
-
-More opcodes may be added in the future. If userspace encounters an
-opcode it does not handle, it must set UNKNOWN_OP bit (bit 0) in
-hdr.uflags, update cmd_tail, and proceed with processing additional
-commands, if any.
-
-The Data Area:
-
-This is shared-memory space after the command ring. The organization
-of this area is not defined in the TCMU interface, and userspace
-should access only the parts referenced by pending iovs.
-
-
-Device Discovery:
-
-Other devices may be using UIO besides TCMU. Unrelated user processes
-may also be handling different sets of TCMU devices. TCMU userspace
-processes must find their devices by scanning sysfs
-class/uio/uio*/name. For TCMU devices, these names will be of the
-format:
-
-tcm-user/<hba_num>/<device_name>/<subtype>/<path>
-
-where "tcm-user" is common for all TCMU-backed UIO devices. <hba_num>
-and <device_name> allow userspace to find the device's path in the
-kernel target's configfs tree. Assuming the usual mount point, it is
-found at:
-
-/sys/kernel/config/target/core/user_<hba_num>/<device_name>
-
-This location contains attributes such as "hw_block_size", that
-userspace needs to know for correct operation.
-
-<subtype> will be a userspace-process-unique string to identify the
-TCMU device as expecting to be backed by a certain handler, and <path>
-will be an additional handler-specific string for the user process to
-configure the device, if needed. The name cannot contain ':', due to
-LIO limitations.
-
-For all devices so discovered, the user handler opens /dev/uioX and
-calls mmap():
-
-mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0)
-
-where size must be equal to the value read from
-/sys/class/uio/uioX/maps/map0/size.
-
-
-Device Events:
-
-If a new device is added or removed, a notification will be broadcast
-over netlink, using a generic netlink family name of "TCM-USER" and a
-multicast group named "config". This will include the UIO name as
-described in the previous section, as well as the UIO minor
-number. This should allow userspace to identify both the UIO device and
-the LIO device, so that after determining the device is supported
-(based on subtype) it can take the appropriate action.
-
-
-Other contingencies:
-
-Userspace handler process never attaches:
-
-- TCMU will post commands, and then abort them after a timeout period
-  (30 seconds.)
-
-Userspace handler process is killed:
-
-- It is still possible to restart and re-connect to TCMU
-  devices. Command ring is preserved. However, after the timeout period,
-  the kernel will abort pending tasks.
-
-Userspace handler process hangs:
-
-- The kernel will abort pending tasks after a timeout period.
-
-Userspace handler process is malicious:
-
-- The process can trivially break the handling of devices it controls,
-  but should not be able to access kernel memory outside its shared
-  memory areas.
-
-
-Writing a user pass-through handler (with example code)
--------------------------------------------------------
-
-A user process handing a TCMU device must support the following:
-
-a) Discovering and configuring TCMU uio devices
-b) Waiting for events on the device(s)
-c) Managing the command ring: Parsing operations and commands,
-   performing work as needed, setting response fields (scsi_status and
-   possibly sense_buffer), updating cmd_tail, and notifying the kernel
-   that work has been finished
-
-First, consider instead writing a plugin for tcmu-runner. tcmu-runner
-implements all of this, and provides a higher-level API for plugin
-authors.
-
-TCMU is designed so that multiple unrelated processes can manage TCMU
-devices separately. All handlers should make sure to only open their
-devices, based opon a known subtype string.
-
-a) Discovering and configuring TCMU UIO devices:
-
-(error checking omitted for brevity)
-
-int fd, dev_fd;
-char buf[256];
-unsigned long long map_len;
-void *map;
-
-fd = open("/sys/class/uio/uio0/name", O_RDONLY);
-ret = read(fd, buf, sizeof(buf));
-close(fd);
-buf[ret-1] = '\0'; /* null-terminate and chop off the \n */
-
-/* we only want uio devices whose name is a format we expect */
-if (strncmp(buf, "tcm-user", 8))
-	exit(-1);
-
-/* Further checking for subtype also needed here */
-
-fd = open(/sys/class/uio/%s/maps/map0/size, O_RDONLY);
-ret = read(fd, buf, sizeof(buf));
-close(fd);
-str_buf[ret-1] = '\0'; /* null-terminate and chop off the \n */
-
-map_len = strtoull(buf, NULL, 0);
-
-dev_fd = open("/dev/uio0", O_RDWR);
-map = mmap(NULL, map_len, PROT_READ|PROT_WRITE, MAP_SHARED, dev_fd, 0);
-
-
-b) Waiting for events on the device(s)
-
-while (1) {
-  char buf[4];
-
-  int ret = read(dev_fd, buf, 4); /* will block */
-
-  handle_device_events(dev_fd, map);
-}
-
-
-c) Managing the command ring
-
-#include <linux/target_core_user.h>
-
-int handle_device_events(int fd, void *map)
-{
-  struct tcmu_mailbox *mb = map;
-  struct tcmu_cmd_entry *ent = (void *) mb + mb->cmdr_off + mb->cmd_tail;
-  int did_some_work = 0;
-
-  /* Process events from cmd ring until we catch up with cmd_head */
-  while (ent != (void *)mb + mb->cmdr_off + mb->cmd_head) {
-
-    if (tcmu_hdr_get_op(ent->hdr.len_op) == TCMU_OP_CMD) {
-      uint8_t *cdb = (void *)mb + ent->req.cdb_off;
-      bool success = true;
-
-      /* Handle command here. */
-      printf("SCSI opcode: 0x%x\n", cdb[0]);
-
-      /* Set response fields */
-      if (success)
-        ent->rsp.scsi_status = SCSI_NO_SENSE;
-      else {
-        /* Also fill in rsp->sense_buffer here */
-        ent->rsp.scsi_status = SCSI_CHECK_CONDITION;
-      }
-    }
-    else if (tcmu_hdr_get_op(ent->hdr.len_op) != TCMU_OP_PAD) {
-      /* Tell the kernel we didn't handle unknown opcodes */
-      ent->hdr.uflags |= TCMU_UFLAG_UNKNOWN_OP;
-    }
-    else {
-      /* Do nothing for PAD entries except update cmd_tail */
-    }
-
-    /* update cmd_tail */
-    mb->cmd_tail = (mb->cmd_tail + tcmu_hdr_get_len(&ent->hdr)) % mb->cmdr_size;
-    ent = (void *) mb + mb->cmdr_off + mb->cmd_tail;
-    did_some_work = 1;
-  }
-
-  /* Notify the kernel that work has been finished */
-  if (did_some_work) {
-    uint32_t buf = 0;
-
-    write(fd, &buf, 4);
-  }
-
-  return 0;
-}
-
-
-A final note
-------------
-
-Please be careful to return codes as defined by the SCSI
-specifications. These are different than some values defined in the
-scsi/scsi.h include file. For example, CHECK CONDITION's status code
-is 2, not 1.
-- 
cgit 


From 458f69ef36656dc74679667380422dd8063eabfb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:53:00 -0300
Subject: docs: timers: convert docs to ReST and rename to *.rst

The conversion here is really trivial: just a bunch of title
markups and very few puntual changes is enough to make it to
be parsed by Sphinx and generate a nice html.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/timers/NO_HZ.txt        | 318 ---------------------------------
 Documentation/timers/highres.rst      | 250 ++++++++++++++++++++++++++
 Documentation/timers/highres.txt      | 249 --------------------------
 Documentation/timers/hpet.rst         |  30 ++++
 Documentation/timers/hpet.txt         |  28 ---
 Documentation/timers/hrtimers.rst     | 178 +++++++++++++++++++
 Documentation/timers/hrtimers.txt     | 178 -------------------
 Documentation/timers/index.rst        |  22 +++
 Documentation/timers/no_hz.rst        | 326 ++++++++++++++++++++++++++++++++++
 Documentation/timers/timekeeping.rst  | 180 +++++++++++++++++++
 Documentation/timers/timekeeping.txt  | 179 -------------------
 Documentation/timers/timers-howto.rst | 112 ++++++++++++
 Documentation/timers/timers-howto.txt | 105 -----------
 13 files changed, 1098 insertions(+), 1057 deletions(-)
 delete mode 100644 Documentation/timers/NO_HZ.txt
 create mode 100644 Documentation/timers/highres.rst
 delete mode 100644 Documentation/timers/highres.txt
 create mode 100644 Documentation/timers/hpet.rst
 delete mode 100644 Documentation/timers/hpet.txt
 create mode 100644 Documentation/timers/hrtimers.rst
 delete mode 100644 Documentation/timers/hrtimers.txt
 create mode 100644 Documentation/timers/index.rst
 create mode 100644 Documentation/timers/no_hz.rst
 create mode 100644 Documentation/timers/timekeeping.rst
 delete mode 100644 Documentation/timers/timekeeping.txt
 create mode 100644 Documentation/timers/timers-howto.rst
 delete mode 100644 Documentation/timers/timers-howto.txt

(limited to 'Documentation')

diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt
deleted file mode 100644
index 9591092da5e0..000000000000
--- a/Documentation/timers/NO_HZ.txt
+++ /dev/null
@@ -1,318 +0,0 @@
-		NO_HZ: Reducing Scheduling-Clock Ticks
-
-
-This document describes Kconfig options and boot parameters that can
-reduce the number of scheduling-clock interrupts, thereby improving energy
-efficiency and reducing OS jitter.  Reducing OS jitter is important for
-some types of computationally intensive high-performance computing (HPC)
-applications and for real-time applications.
-
-There are three main ways of managing scheduling-clock interrupts
-(also known as "scheduling-clock ticks" or simply "ticks"):
-
-1.	Never omit scheduling-clock ticks (CONFIG_HZ_PERIODIC=y or
-	CONFIG_NO_HZ=n for older kernels).  You normally will -not-
-	want to choose this option.
-
-2.	Omit scheduling-clock ticks on idle CPUs (CONFIG_NO_HZ_IDLE=y or
-	CONFIG_NO_HZ=y for older kernels).  This is the most common
-	approach, and should be the default.
-
-3.	Omit scheduling-clock ticks on CPUs that are either idle or that
-	have only one runnable task (CONFIG_NO_HZ_FULL=y).  Unless you
-	are running realtime applications or certain types of HPC
-	workloads, you will normally -not- want this option.
-
-These three cases are described in the following three sections, followed
-by a third section on RCU-specific considerations, a fourth section
-discussing testing, and a fifth and final section listing known issues.
-
-
-NEVER OMIT SCHEDULING-CLOCK TICKS
-
-Very old versions of Linux from the 1990s and the very early 2000s
-are incapable of omitting scheduling-clock ticks.  It turns out that
-there are some situations where this old-school approach is still the
-right approach, for example, in heavy workloads with lots of tasks
-that use short bursts of CPU, where there are very frequent idle
-periods, but where these idle periods are also quite short (tens or
-hundreds of microseconds).  For these types of workloads, scheduling
-clock interrupts will normally be delivered any way because there
-will frequently be multiple runnable tasks per CPU.  In these cases,
-attempting to turn off the scheduling clock interrupt will have no effect
-other than increasing the overhead of switching to and from idle and
-transitioning between user and kernel execution.
-
-This mode of operation can be selected using CONFIG_HZ_PERIODIC=y (or
-CONFIG_NO_HZ=n for older kernels).
-
-However, if you are instead running a light workload with long idle
-periods, failing to omit scheduling-clock interrupts will result in
-excessive power consumption.  This is especially bad on battery-powered
-devices, where it results in extremely short battery lifetimes.  If you
-are running light workloads, you should therefore read the following
-section.
-
-In addition, if you are running either a real-time workload or an HPC
-workload with short iterations, the scheduling-clock interrupts can
-degrade your applications performance.  If this describes your workload,
-you should read the following two sections.
-
-
-OMIT SCHEDULING-CLOCK TICKS FOR IDLE CPUs
-
-If a CPU is idle, there is little point in sending it a scheduling-clock
-interrupt.  After all, the primary purpose of a scheduling-clock interrupt
-is to force a busy CPU to shift its attention among multiple duties,
-and an idle CPU has no duties to shift its attention among.
-
-The CONFIG_NO_HZ_IDLE=y Kconfig option causes the kernel to avoid sending
-scheduling-clock interrupts to idle CPUs, which is critically important
-both to battery-powered devices and to highly virtualized mainframes.
-A battery-powered device running a CONFIG_HZ_PERIODIC=y kernel would
-drain its battery very quickly, easily 2-3 times as fast as would the
-same device running a CONFIG_NO_HZ_IDLE=y kernel.  A mainframe running
-1,500 OS instances might find that half of its CPU time was consumed by
-unnecessary scheduling-clock interrupts.  In these situations, there
-is strong motivation to avoid sending scheduling-clock interrupts to
-idle CPUs.  That said, dyntick-idle mode is not free:
-
-1.	It increases the number of instructions executed on the path
-	to and from the idle loop.
-
-2.	On many architectures, dyntick-idle mode also increases the
-	number of expensive clock-reprogramming operations.
-
-Therefore, systems with aggressive real-time response constraints often
-run CONFIG_HZ_PERIODIC=y kernels (or CONFIG_NO_HZ=n for older kernels)
-in order to avoid degrading from-idle transition latencies.
-
-An idle CPU that is not receiving scheduling-clock interrupts is said to
-be "dyntick-idle", "in dyntick-idle mode", "in nohz mode", or "running
-tickless".  The remainder of this document will use "dyntick-idle mode".
-
-There is also a boot parameter "nohz=" that can be used to disable
-dyntick-idle mode in CONFIG_NO_HZ_IDLE=y kernels by specifying "nohz=off".
-By default, CONFIG_NO_HZ_IDLE=y kernels boot with "nohz=on", enabling
-dyntick-idle mode.
-
-
-OMIT SCHEDULING-CLOCK TICKS FOR CPUs WITH ONLY ONE RUNNABLE TASK
-
-If a CPU has only one runnable task, there is little point in sending it
-a scheduling-clock interrupt because there is no other task to switch to.
-Note that omitting scheduling-clock ticks for CPUs with only one runnable
-task implies also omitting them for idle CPUs.
-
-The CONFIG_NO_HZ_FULL=y Kconfig option causes the kernel to avoid
-sending scheduling-clock interrupts to CPUs with a single runnable task,
-and such CPUs are said to be "adaptive-ticks CPUs".  This is important
-for applications with aggressive real-time response constraints because
-it allows them to improve their worst-case response times by the maximum
-duration of a scheduling-clock interrupt.  It is also important for
-computationally intensive short-iteration workloads:  If any CPU is
-delayed during a given iteration, all the other CPUs will be forced to
-wait idle while the delayed CPU finishes.  Thus, the delay is multiplied
-by one less than the number of CPUs.  In these situations, there is
-again strong motivation to avoid sending scheduling-clock interrupts.
-
-By default, no CPU will be an adaptive-ticks CPU.  The "nohz_full="
-boot parameter specifies the adaptive-ticks CPUs.  For example,
-"nohz_full=1,6-8" says that CPUs 1, 6, 7, and 8 are to be adaptive-ticks
-CPUs.  Note that you are prohibited from marking all of the CPUs as
-adaptive-tick CPUs:  At least one non-adaptive-tick CPU must remain
-online to handle timekeeping tasks in order to ensure that system
-calls like gettimeofday() returns accurate values on adaptive-tick CPUs.
-(This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running
-user processes to observe slight drifts in clock rate.)  Therefore, the
-boot CPU is prohibited from entering adaptive-ticks mode.  Specifying a
-"nohz_full=" mask that includes the boot CPU will result in a boot-time
-error message, and the boot CPU will be removed from the mask.  Note that
-this means that your system must have at least two CPUs in order for
-CONFIG_NO_HZ_FULL=y to do anything for you.
-
-Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
-This is covered in the "RCU IMPLICATIONS" section below.
-
-Normally, a CPU remains in adaptive-ticks mode as long as possible.
-In particular, transitioning to kernel mode does not automatically change
-the mode.  Instead, the CPU will exit adaptive-ticks mode only if needed,
-for example, if that CPU enqueues an RCU callback.
-
-Just as with dyntick-idle mode, the benefits of adaptive-tick mode do
-not come for free:
-
-1.	CONFIG_NO_HZ_FULL selects CONFIG_NO_HZ_COMMON, so you cannot run
-	adaptive ticks without also running dyntick idle.  This dependency
-	extends down into the implementation, so that all of the costs
-	of CONFIG_NO_HZ_IDLE are also incurred by CONFIG_NO_HZ_FULL.
-
-2.	The user/kernel transitions are slightly more expensive due
-	to the need to inform kernel subsystems (such as RCU) about
-	the change in mode.
-
-3.	POSIX CPU timers prevent CPUs from entering adaptive-tick mode.
-	Real-time applications needing to take actions based on CPU time
-	consumption need to use other means of doing so.
-
-4.	If there are more perf events pending than the hardware can
-	accommodate, they are normally round-robined so as to collect
-	all of them over time.  Adaptive-tick mode may prevent this
-	round-robining from happening.  This will likely be fixed by
-	preventing CPUs with large numbers of perf events pending from
-	entering adaptive-tick mode.
-
-5.	Scheduler statistics for adaptive-tick CPUs may be computed
-	slightly differently than those for non-adaptive-tick CPUs.
-	This might in turn perturb load-balancing of real-time tasks.
-
-6.	The LB_BIAS scheduler feature is disabled by adaptive ticks.
-
-Although improvements are expected over time, adaptive ticks is quite
-useful for many types of real-time and compute-intensive applications.
-However, the drawbacks listed above mean that adaptive ticks should not
-(yet) be enabled by default.
-
-
-RCU IMPLICATIONS
-
-There are situations in which idle CPUs cannot be permitted to
-enter either dyntick-idle mode or adaptive-tick mode, the most
-common being when that CPU has RCU callbacks pending.
-
-The CONFIG_RCU_FAST_NO_HZ=y Kconfig option may be used to cause such CPUs
-to enter dyntick-idle mode or adaptive-tick mode anyway.  In this case,
-a timer will awaken these CPUs every four jiffies in order to ensure
-that the RCU callbacks are processed in a timely fashion.
-
-Another approach is to offload RCU callback processing to "rcuo" kthreads
-using the CONFIG_RCU_NOCB_CPU=y Kconfig option.  The specific CPUs to
-offload may be selected using The "rcu_nocbs=" kernel boot parameter,
-which takes a comma-separated list of CPUs and CPU ranges, for example,
-"1,3-5" selects CPUs 1, 3, 4, and 5.
-
-The offloaded CPUs will never queue RCU callbacks, and therefore RCU
-never prevents offloaded CPUs from entering either dyntick-idle mode
-or adaptive-tick mode.  That said, note that it is up to userspace to
-pin the "rcuo" kthreads to specific CPUs if desired.  Otherwise, the
-scheduler will decide where to run them, which might or might not be
-where you want them to run.
-
-
-TESTING
-
-So you enable all the OS-jitter features described in this document,
-but do not see any change in your workload's behavior.  Is this because
-your workload isn't affected that much by OS jitter, or is it because
-something else is in the way?  This section helps answer this question
-by providing a simple OS-jitter test suite, which is available on branch
-master of the following git archive:
-
-git://git.kernel.org/pub/scm/linux/kernel/git/frederic/dynticks-testing.git
-
-Clone this archive and follow the instructions in the README file.
-This test procedure will produce a trace that will allow you to evaluate
-whether or not you have succeeded in removing OS jitter from your system.
-If this trace shows that you have removed OS jitter as much as is
-possible, then you can conclude that your workload is not all that
-sensitive to OS jitter.
-
-Note: this test requires that your system have at least two CPUs.
-We do not currently have a good way to remove OS jitter from single-CPU
-systems.
-
-
-KNOWN ISSUES
-
-o	Dyntick-idle slows transitions to and from idle slightly.
-	In practice, this has not been a problem except for the most
-	aggressive real-time workloads, which have the option of disabling
-	dyntick-idle mode, an option that most of them take.  However,
-	some workloads will no doubt want to use adaptive ticks to
-	eliminate scheduling-clock interrupt latencies.  Here are some
-	options for these workloads:
-
-	a.	Use PMQOS from userspace to inform the kernel of your
-		latency requirements (preferred).
-
-	b.	On x86 systems, use the "idle=mwait" boot parameter.
-
-	c.	On x86 systems, use the "intel_idle.max_cstate=" to limit
-	`	the maximum C-state depth.
-
-	d.	On x86 systems, use the "idle=poll" boot parameter.
-		However, please note that use of this parameter can cause
-		your CPU to overheat, which may cause thermal throttling
-		to degrade your latencies -- and that this degradation can
-		be even worse than that of dyntick-idle.  Furthermore,
-		this parameter effectively disables Turbo Mode on Intel
-		CPUs, which can significantly reduce maximum performance.
-
-o	Adaptive-ticks slows user/kernel transitions slightly.
-	This is not expected to be a problem for computationally intensive
-	workloads, which have few such transitions.  Careful benchmarking
-	will be required to determine whether or not other workloads
-	are significantly affected by this effect.
-
-o	Adaptive-ticks does not do anything unless there is only one
-	runnable task for a given CPU, even though there are a number
-	of other situations where the scheduling-clock tick is not
-	needed.  To give but one example, consider a CPU that has one
-	runnable high-priority SCHED_FIFO task and an arbitrary number
-	of low-priority SCHED_OTHER tasks.  In this case, the CPU is
-	required to run the SCHED_FIFO task until it either blocks or
-	some other higher-priority task awakens on (or is assigned to)
-	this CPU, so there is no point in sending a scheduling-clock
-	interrupt to this CPU.	However, the current implementation
-	nevertheless sends scheduling-clock interrupts to CPUs having a
-	single runnable SCHED_FIFO task and multiple runnable SCHED_OTHER
-	tasks, even though these interrupts are unnecessary.
-
-	And even when there are multiple runnable tasks on a given CPU,
-	there is little point in interrupting that CPU until the current
-	running task's timeslice expires, which is almost always way
-	longer than the time of the next scheduling-clock interrupt.
-
-	Better handling of these sorts of situations is future work.
-
-o	A reboot is required to reconfigure both adaptive idle and RCU
-	callback offloading.  Runtime reconfiguration could be provided
-	if needed, however, due to the complexity of reconfiguring RCU at
-	runtime, there would need to be an earthshakingly good reason.
-	Especially given that you have the straightforward option of
-	simply offloading RCU callbacks from all CPUs and pinning them
-	where you want them whenever you want them pinned.
-
-o	Additional configuration is required to deal with other sources
-	of OS jitter, including interrupts and system-utility tasks
-	and processes.  This configuration normally involves binding
-	interrupts and tasks to particular CPUs.
-
-o	Some sources of OS jitter can currently be eliminated only by
-	constraining the workload.  For example, the only way to eliminate
-	OS jitter due to global TLB shootdowns is to avoid the unmapping
-	operations (such as kernel module unload operations) that
-	result in these shootdowns.  For another example, page faults
-	and TLB misses can be reduced (and in some cases eliminated) by
-	using huge pages and by constraining the amount of memory used
-	by the application.  Pre-faulting the working set can also be
-	helpful, especially when combined with the mlock() and mlockall()
-	system calls.
-
-o	Unless all CPUs are idle, at least one CPU must keep the
-	scheduling-clock interrupt going in order to support accurate
-	timekeeping.
-
-o	If there might potentially be some adaptive-ticks CPUs, there
-	will be at least one CPU keeping the scheduling-clock interrupt
-	going, even if all CPUs are otherwise idle.
-
-	Better handling of this situation is ongoing work.
-
-o	Some process-handling operations still require the occasional
-	scheduling-clock tick.	These operations include calculating CPU
-	load, maintaining sched average, computing CFS entity vruntime,
-	computing avenrun, and carrying out load balancing.  They are
-	currently accommodated by scheduling-clock tick every second
-	or so.	On-going work will eliminate the need even for these
-	infrequent scheduling-clock ticks.
diff --git a/Documentation/timers/highres.rst b/Documentation/timers/highres.rst
new file mode 100644
index 000000000000..bde5eb7e5c9e
--- /dev/null
+++ b/Documentation/timers/highres.rst
@@ -0,0 +1,250 @@
+=====================================================
+High resolution timers and dynamic ticks design notes
+=====================================================
+
+Further information can be found in the paper of the OLS 2006 talk "hrtimers
+and beyond". The paper is part of the OLS 2006 Proceedings Volume 1, which can
+be found on the OLS website:
+https://www.kernel.org/doc/ols/2006/ols2006v1-pages-333-346.pdf
+
+The slides to this talk are available from:
+http://www.cs.columbia.edu/~nahum/w6998/papers/ols2006-hrtimers-slides.pdf
+
+The slides contain five figures (pages 2, 15, 18, 20, 22), which illustrate the
+changes in the time(r) related Linux subsystems. Figure #1 (p. 2) shows the
+design of the Linux time(r) system before hrtimers and other building blocks
+got merged into mainline.
+
+Note: the paper and the slides are talking about "clock event source", while we
+switched to the name "clock event devices" in meantime.
+
+The design contains the following basic building blocks:
+
+- hrtimer base infrastructure
+- timeofday and clock source management
+- clock event management
+- high resolution timer functionality
+- dynamic ticks
+
+
+hrtimer base infrastructure
+---------------------------
+
+The hrtimer base infrastructure was merged into the 2.6.16 kernel. Details of
+the base implementation are covered in Documentation/timers/hrtimers.rst. See
+also figure #2 (OLS slides p. 15)
+
+The main differences to the timer wheel, which holds the armed timer_list type
+timers are:
+
+       - time ordered enqueueing into a rb-tree
+       - independent of ticks (the processing is based on nanoseconds)
+
+
+timeofday and clock source management
+-------------------------------------
+
+John Stultz's Generic Time Of Day (GTOD) framework moves a large portion of
+code out of the architecture-specific areas into a generic management
+framework, as illustrated in figure #3 (OLS slides p. 18). The architecture
+specific portion is reduced to the low level hardware details of the clock
+sources, which are registered in the framework and selected on a quality based
+decision. The low level code provides hardware setup and readout routines and
+initializes data structures, which are used by the generic time keeping code to
+convert the clock ticks to nanosecond based time values. All other time keeping
+related functionality is moved into the generic code. The GTOD base patch got
+merged into the 2.6.18 kernel.
+
+Further information about the Generic Time Of Day framework is available in the
+OLS 2005 Proceedings Volume 1:
+
+	http://www.linuxsymposium.org/2005/linuxsymposium_procv1.pdf
+
+The paper "We Are Not Getting Any Younger: A New Approach to Time and
+Timers" was written by J. Stultz, D.V. Hart, & N. Aravamudan.
+
+Figure #3 (OLS slides p.18) illustrates the transformation.
+
+
+clock event management
+----------------------
+
+While clock sources provide read access to the monotonically increasing time
+value, clock event devices are used to schedule the next event
+interrupt(s). The next event is currently defined to be periodic, with its
+period defined at compile time. The setup and selection of the event device
+for various event driven functionalities is hardwired into the architecture
+dependent code. This results in duplicated code across all architectures and
+makes it extremely difficult to change the configuration of the system to use
+event interrupt devices other than those already built into the
+architecture. Another implication of the current design is that it is necessary
+to touch all the architecture-specific implementations in order to provide new
+functionality like high resolution timers or dynamic ticks.
+
+The clock events subsystem tries to address this problem by providing a generic
+solution to manage clock event devices and their usage for the various clock
+event driven kernel functionalities. The goal of the clock event subsystem is
+to minimize the clock event related architecture dependent code to the pure
+hardware related handling and to allow easy addition and utilization of new
+clock event devices. It also minimizes the duplicated code across the
+architectures as it provides generic functionality down to the interrupt
+service handler, which is almost inherently hardware dependent.
+
+Clock event devices are registered either by the architecture dependent boot
+code or at module insertion time. Each clock event device fills a data
+structure with clock-specific property parameters and callback functions. The
+clock event management decides, by using the specified property parameters, the
+set of system functions a clock event device will be used to support. This
+includes the distinction of per-CPU and per-system global event devices.
+
+System-level global event devices are used for the Linux periodic tick. Per-CPU
+event devices are used to provide local CPU functionality such as process
+accounting, profiling, and high resolution timers.
+
+The management layer assigns one or more of the following functions to a clock
+event device:
+
+      - system global periodic tick (jiffies update)
+      - cpu local update_process_times
+      - cpu local profiling
+      - cpu local next event interrupt (non periodic mode)
+
+The clock event device delegates the selection of those timer interrupt related
+functions completely to the management layer. The clock management layer stores
+a function pointer in the device description structure, which has to be called
+from the hardware level handler. This removes a lot of duplicated code from the
+architecture specific timer interrupt handlers and hands the control over the
+clock event devices and the assignment of timer interrupt related functionality
+to the core code.
+
+The clock event layer API is rather small. Aside from the clock event device
+registration interface it provides functions to schedule the next event
+interrupt, clock event device notification service and support for suspend and
+resume.
+
+The framework adds about 700 lines of code which results in a 2KB increase of
+the kernel binary size. The conversion of i386 removes about 100 lines of
+code. The binary size decrease is in the range of 400 byte. We believe that the
+increase of flexibility and the avoidance of duplicated code across
+architectures justifies the slight increase of the binary size.
+
+The conversion of an architecture has no functional impact, but allows to
+utilize the high resolution and dynamic tick functionalities without any change
+to the clock event device and timer interrupt code. After the conversion the
+enabling of high resolution timers and dynamic ticks is simply provided by
+adding the kernel/time/Kconfig file to the architecture specific Kconfig and
+adding the dynamic tick specific calls to the idle routine (a total of 3 lines
+added to the idle function and the Kconfig file)
+
+Figure #4 (OLS slides p.20) illustrates the transformation.
+
+
+high resolution timer functionality
+-----------------------------------
+
+During system boot it is not possible to use the high resolution timer
+functionality, while making it possible would be difficult and would serve no
+useful function. The initialization of the clock event device framework, the
+clock source framework (GTOD) and hrtimers itself has to be done and
+appropriate clock sources and clock event devices have to be registered before
+the high resolution functionality can work. Up to the point where hrtimers are
+initialized, the system works in the usual low resolution periodic mode. The
+clock source and the clock event device layers provide notification functions
+which inform hrtimers about availability of new hardware. hrtimers validates
+the usability of the registered clock sources and clock event devices before
+switching to high resolution mode. This ensures also that a kernel which is
+configured for high resolution timers can run on a system which lacks the
+necessary hardware support.
+
+The high resolution timer code does not support SMP machines which have only
+global clock event devices. The support of such hardware would involve IPI
+calls when an interrupt happens. The overhead would be much larger than the
+benefit. This is the reason why we currently disable high resolution and
+dynamic ticks on i386 SMP systems which stop the local APIC in C3 power
+state. A workaround is available as an idea, but the problem has not been
+tackled yet.
+
+The time ordered insertion of timers provides all the infrastructure to decide
+whether the event device has to be reprogrammed when a timer is added. The
+decision is made per timer base and synchronized across per-cpu timer bases in
+a support function. The design allows the system to utilize separate per-CPU
+clock event devices for the per-CPU timer bases, but currently only one
+reprogrammable clock event device per-CPU is utilized.
+
+When the timer interrupt happens, the next event interrupt handler is called
+from the clock event distribution code and moves expired timers from the
+red-black tree to a separate double linked list and invokes the softirq
+handler. An additional mode field in the hrtimer structure allows the system to
+execute callback functions directly from the next event interrupt handler. This
+is restricted to code which can safely be executed in the hard interrupt
+context. This applies, for example, to the common case of a wakeup function as
+used by nanosleep. The advantage of executing the handler in the interrupt
+context is the avoidance of up to two context switches - from the interrupted
+context to the softirq and to the task which is woken up by the expired
+timer.
+
+Once a system has switched to high resolution mode, the periodic tick is
+switched off. This disables the per system global periodic clock event device -
+e.g. the PIT on i386 SMP systems.
+
+The periodic tick functionality is provided by an per-cpu hrtimer. The callback
+function is executed in the next event interrupt context and updates jiffies
+and calls update_process_times and profiling. The implementation of the hrtimer
+based periodic tick is designed to be extended with dynamic tick functionality.
+This allows to use a single clock event device to schedule high resolution
+timer and periodic events (jiffies tick, profiling, process accounting) on UP
+systems. This has been proved to work with the PIT on i386 and the Incrementer
+on PPC.
+
+The softirq for running the hrtimer queues and executing the callbacks has been
+separated from the tick bound timer softirq to allow accurate delivery of high
+resolution timer signals which are used by itimer and POSIX interval
+timers. The execution of this softirq can still be delayed by other softirqs,
+but the overall latencies have been significantly improved by this separation.
+
+Figure #5 (OLS slides p.22) illustrates the transformation.
+
+
+dynamic ticks
+-------------
+
+Dynamic ticks are the logical consequence of the hrtimer based periodic tick
+replacement (sched_tick). The functionality of the sched_tick hrtimer is
+extended by three functions:
+
+- hrtimer_stop_sched_tick
+- hrtimer_restart_sched_tick
+- hrtimer_update_jiffies
+
+hrtimer_stop_sched_tick() is called when a CPU goes into idle state. The code
+evaluates the next scheduled timer event (from both hrtimers and the timer
+wheel) and in case that the next event is further away than the next tick it
+reprograms the sched_tick to this future event, to allow longer idle sleeps
+without worthless interruption by the periodic tick. The function is also
+called when an interrupt happens during the idle period, which does not cause a
+reschedule. The call is necessary as the interrupt handler might have armed a
+new timer whose expiry time is before the time which was identified as the
+nearest event in the previous call to hrtimer_stop_sched_tick.
+
+hrtimer_restart_sched_tick() is called when the CPU leaves the idle state before
+it calls schedule(). hrtimer_restart_sched_tick() resumes the periodic tick,
+which is kept active until the next call to hrtimer_stop_sched_tick().
+
+hrtimer_update_jiffies() is called from irq_enter() when an interrupt happens
+in the idle period to make sure that jiffies are up to date and the interrupt
+handler has not to deal with an eventually stale jiffy value.
+
+The dynamic tick feature provides statistical values which are exported to
+userspace via /proc/stat and can be made available for enhanced power
+management control.
+
+The implementation leaves room for further development like full tickless
+systems, where the time slice is controlled by the scheduler, variable
+frequency profiling, and a complete removal of jiffies in the future.
+
+
+Aside the current initial submission of i386 support, the patchset has been
+extended to x86_64 and ARM already. Initial (work in progress) support is also
+available for MIPS and PowerPC.
+
+	  Thomas, Ingo
diff --git a/Documentation/timers/highres.txt b/Documentation/timers/highres.txt
deleted file mode 100644
index 8f9741592123..000000000000
--- a/Documentation/timers/highres.txt
+++ /dev/null
@@ -1,249 +0,0 @@
-High resolution timers and dynamic ticks design notes
------------------------------------------------------
-
-Further information can be found in the paper of the OLS 2006 talk "hrtimers
-and beyond". The paper is part of the OLS 2006 Proceedings Volume 1, which can
-be found on the OLS website:
-https://www.kernel.org/doc/ols/2006/ols2006v1-pages-333-346.pdf
-
-The slides to this talk are available from:
-http://www.cs.columbia.edu/~nahum/w6998/papers/ols2006-hrtimers-slides.pdf
-
-The slides contain five figures (pages 2, 15, 18, 20, 22), which illustrate the
-changes in the time(r) related Linux subsystems. Figure #1 (p. 2) shows the
-design of the Linux time(r) system before hrtimers and other building blocks
-got merged into mainline.
-
-Note: the paper and the slides are talking about "clock event source", while we
-switched to the name "clock event devices" in meantime.
-
-The design contains the following basic building blocks:
-
-- hrtimer base infrastructure
-- timeofday and clock source management
-- clock event management
-- high resolution timer functionality
-- dynamic ticks
-
-
-hrtimer base infrastructure
----------------------------
-
-The hrtimer base infrastructure was merged into the 2.6.16 kernel. Details of
-the base implementation are covered in Documentation/timers/hrtimers.txt. See
-also figure #2 (OLS slides p. 15)
-
-The main differences to the timer wheel, which holds the armed timer_list type
-timers are:
-       - time ordered enqueueing into a rb-tree
-       - independent of ticks (the processing is based on nanoseconds)
-
-
-timeofday and clock source management
--------------------------------------
-
-John Stultz's Generic Time Of Day (GTOD) framework moves a large portion of
-code out of the architecture-specific areas into a generic management
-framework, as illustrated in figure #3 (OLS slides p. 18). The architecture
-specific portion is reduced to the low level hardware details of the clock
-sources, which are registered in the framework and selected on a quality based
-decision. The low level code provides hardware setup and readout routines and
-initializes data structures, which are used by the generic time keeping code to
-convert the clock ticks to nanosecond based time values. All other time keeping
-related functionality is moved into the generic code. The GTOD base patch got
-merged into the 2.6.18 kernel.
-
-Further information about the Generic Time Of Day framework is available in the
-OLS 2005 Proceedings Volume 1:
-http://www.linuxsymposium.org/2005/linuxsymposium_procv1.pdf
-
-The paper "We Are Not Getting Any Younger: A New Approach to Time and
-Timers" was written by J. Stultz, D.V. Hart, & N. Aravamudan.
-
-Figure #3 (OLS slides p.18) illustrates the transformation.
-
-
-clock event management
-----------------------
-
-While clock sources provide read access to the monotonically increasing time
-value, clock event devices are used to schedule the next event
-interrupt(s). The next event is currently defined to be periodic, with its
-period defined at compile time. The setup and selection of the event device
-for various event driven functionalities is hardwired into the architecture
-dependent code. This results in duplicated code across all architectures and
-makes it extremely difficult to change the configuration of the system to use
-event interrupt devices other than those already built into the
-architecture. Another implication of the current design is that it is necessary
-to touch all the architecture-specific implementations in order to provide new
-functionality like high resolution timers or dynamic ticks.
-
-The clock events subsystem tries to address this problem by providing a generic
-solution to manage clock event devices and their usage for the various clock
-event driven kernel functionalities. The goal of the clock event subsystem is
-to minimize the clock event related architecture dependent code to the pure
-hardware related handling and to allow easy addition and utilization of new
-clock event devices. It also minimizes the duplicated code across the
-architectures as it provides generic functionality down to the interrupt
-service handler, which is almost inherently hardware dependent.
-
-Clock event devices are registered either by the architecture dependent boot
-code or at module insertion time. Each clock event device fills a data
-structure with clock-specific property parameters and callback functions. The
-clock event management decides, by using the specified property parameters, the
-set of system functions a clock event device will be used to support. This
-includes the distinction of per-CPU and per-system global event devices.
-
-System-level global event devices are used for the Linux periodic tick. Per-CPU
-event devices are used to provide local CPU functionality such as process
-accounting, profiling, and high resolution timers.
-
-The management layer assigns one or more of the following functions to a clock
-event device:
-      - system global periodic tick (jiffies update)
-      - cpu local update_process_times
-      - cpu local profiling
-      - cpu local next event interrupt (non periodic mode)
-
-The clock event device delegates the selection of those timer interrupt related
-functions completely to the management layer. The clock management layer stores
-a function pointer in the device description structure, which has to be called
-from the hardware level handler. This removes a lot of duplicated code from the
-architecture specific timer interrupt handlers and hands the control over the
-clock event devices and the assignment of timer interrupt related functionality
-to the core code.
-
-The clock event layer API is rather small. Aside from the clock event device
-registration interface it provides functions to schedule the next event
-interrupt, clock event device notification service and support for suspend and
-resume.
-
-The framework adds about 700 lines of code which results in a 2KB increase of
-the kernel binary size. The conversion of i386 removes about 100 lines of
-code. The binary size decrease is in the range of 400 byte. We believe that the
-increase of flexibility and the avoidance of duplicated code across
-architectures justifies the slight increase of the binary size.
-
-The conversion of an architecture has no functional impact, but allows to
-utilize the high resolution and dynamic tick functionalities without any change
-to the clock event device and timer interrupt code. After the conversion the
-enabling of high resolution timers and dynamic ticks is simply provided by
-adding the kernel/time/Kconfig file to the architecture specific Kconfig and
-adding the dynamic tick specific calls to the idle routine (a total of 3 lines
-added to the idle function and the Kconfig file)
-
-Figure #4 (OLS slides p.20) illustrates the transformation.
-
-
-high resolution timer functionality
------------------------------------
-
-During system boot it is not possible to use the high resolution timer
-functionality, while making it possible would be difficult and would serve no
-useful function. The initialization of the clock event device framework, the
-clock source framework (GTOD) and hrtimers itself has to be done and
-appropriate clock sources and clock event devices have to be registered before
-the high resolution functionality can work. Up to the point where hrtimers are
-initialized, the system works in the usual low resolution periodic mode. The
-clock source and the clock event device layers provide notification functions
-which inform hrtimers about availability of new hardware. hrtimers validates
-the usability of the registered clock sources and clock event devices before
-switching to high resolution mode. This ensures also that a kernel which is
-configured for high resolution timers can run on a system which lacks the
-necessary hardware support.
-
-The high resolution timer code does not support SMP machines which have only
-global clock event devices. The support of such hardware would involve IPI
-calls when an interrupt happens. The overhead would be much larger than the
-benefit. This is the reason why we currently disable high resolution and
-dynamic ticks on i386 SMP systems which stop the local APIC in C3 power
-state. A workaround is available as an idea, but the problem has not been
-tackled yet.
-
-The time ordered insertion of timers provides all the infrastructure to decide
-whether the event device has to be reprogrammed when a timer is added. The
-decision is made per timer base and synchronized across per-cpu timer bases in
-a support function. The design allows the system to utilize separate per-CPU
-clock event devices for the per-CPU timer bases, but currently only one
-reprogrammable clock event device per-CPU is utilized.
-
-When the timer interrupt happens, the next event interrupt handler is called
-from the clock event distribution code and moves expired timers from the
-red-black tree to a separate double linked list and invokes the softirq
-handler. An additional mode field in the hrtimer structure allows the system to
-execute callback functions directly from the next event interrupt handler. This
-is restricted to code which can safely be executed in the hard interrupt
-context. This applies, for example, to the common case of a wakeup function as
-used by nanosleep. The advantage of executing the handler in the interrupt
-context is the avoidance of up to two context switches - from the interrupted
-context to the softirq and to the task which is woken up by the expired
-timer.
-
-Once a system has switched to high resolution mode, the periodic tick is
-switched off. This disables the per system global periodic clock event device -
-e.g. the PIT on i386 SMP systems.
-
-The periodic tick functionality is provided by an per-cpu hrtimer. The callback
-function is executed in the next event interrupt context and updates jiffies
-and calls update_process_times and profiling. The implementation of the hrtimer
-based periodic tick is designed to be extended with dynamic tick functionality.
-This allows to use a single clock event device to schedule high resolution
-timer and periodic events (jiffies tick, profiling, process accounting) on UP
-systems. This has been proved to work with the PIT on i386 and the Incrementer
-on PPC.
-
-The softirq for running the hrtimer queues and executing the callbacks has been
-separated from the tick bound timer softirq to allow accurate delivery of high
-resolution timer signals which are used by itimer and POSIX interval
-timers. The execution of this softirq can still be delayed by other softirqs,
-but the overall latencies have been significantly improved by this separation.
-
-Figure #5 (OLS slides p.22) illustrates the transformation.
-
-
-dynamic ticks
--------------
-
-Dynamic ticks are the logical consequence of the hrtimer based periodic tick
-replacement (sched_tick). The functionality of the sched_tick hrtimer is
-extended by three functions:
-
-- hrtimer_stop_sched_tick
-- hrtimer_restart_sched_tick
-- hrtimer_update_jiffies
-
-hrtimer_stop_sched_tick() is called when a CPU goes into idle state. The code
-evaluates the next scheduled timer event (from both hrtimers and the timer
-wheel) and in case that the next event is further away than the next tick it
-reprograms the sched_tick to this future event, to allow longer idle sleeps
-without worthless interruption by the periodic tick. The function is also
-called when an interrupt happens during the idle period, which does not cause a
-reschedule. The call is necessary as the interrupt handler might have armed a
-new timer whose expiry time is before the time which was identified as the
-nearest event in the previous call to hrtimer_stop_sched_tick.
-
-hrtimer_restart_sched_tick() is called when the CPU leaves the idle state before
-it calls schedule(). hrtimer_restart_sched_tick() resumes the periodic tick,
-which is kept active until the next call to hrtimer_stop_sched_tick().
-
-hrtimer_update_jiffies() is called from irq_enter() when an interrupt happens
-in the idle period to make sure that jiffies are up to date and the interrupt
-handler has not to deal with an eventually stale jiffy value.
-
-The dynamic tick feature provides statistical values which are exported to
-userspace via /proc/stat and can be made available for enhanced power
-management control.
-
-The implementation leaves room for further development like full tickless
-systems, where the time slice is controlled by the scheduler, variable
-frequency profiling, and a complete removal of jiffies in the future.
-
-
-Aside the current initial submission of i386 support, the patchset has been
-extended to x86_64 and ARM already. Initial (work in progress) support is also
-available for MIPS and PowerPC.
-
-	  Thomas, Ingo
-
-
-
diff --git a/Documentation/timers/hpet.rst b/Documentation/timers/hpet.rst
new file mode 100644
index 000000000000..c9d05d3caaca
--- /dev/null
+++ b/Documentation/timers/hpet.rst
@@ -0,0 +1,30 @@
+===========================================
+High Precision Event Timer Driver for Linux
+===========================================
+
+The High Precision Event Timer (HPET) hardware follows a specification
+by Intel and Microsoft, revision 1.
+
+Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision")
+and up to 32 comparators.  Normally three or more comparators are provided,
+each of which can generate oneshot interrupts and at least one of which has
+additional hardware to support periodic interrupts.  The comparators are
+also called "timers", which can be misleading since usually timers are
+independent of each other ... these share a counter, complicating resets.
+
+HPET devices can support two interrupt routing modes.  In one mode, the
+comparators are additional interrupt sources with no particular system
+role.  Many x86 BIOS writers don't route HPET interrupts at all, which
+prevents use of that mode.  They support the other "legacy replacement"
+mode where the first two comparators block interrupts from 8254 timers
+and from the RTC.
+
+The driver supports detection of HPET driver allocation and initialization
+of the HPET before the driver module_init routine is called.  This enables
+platform code which uses timer 0 or 1 as the main timer to intercept HPET
+initialization.  An example of this initialization can be found in
+arch/x86/kernel/hpet.c.
+
+The driver provides a userspace API which resembles the API found in the
+RTC driver framework.  An example user space program is provided in
+file:samples/timers/hpet_example.c
diff --git a/Documentation/timers/hpet.txt b/Documentation/timers/hpet.txt
deleted file mode 100644
index 895345ec513b..000000000000
--- a/Documentation/timers/hpet.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-		High Precision Event Timer Driver for Linux
-
-The High Precision Event Timer (HPET) hardware follows a specification
-by Intel and Microsoft, revision 1.
-
-Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision")
-and up to 32 comparators.  Normally three or more comparators are provided,
-each of which can generate oneshot interrupts and at least one of which has
-additional hardware to support periodic interrupts.  The comparators are
-also called "timers", which can be misleading since usually timers are
-independent of each other ... these share a counter, complicating resets.
-
-HPET devices can support two interrupt routing modes.  In one mode, the
-comparators are additional interrupt sources with no particular system
-role.  Many x86 BIOS writers don't route HPET interrupts at all, which
-prevents use of that mode.  They support the other "legacy replacement"
-mode where the first two comparators block interrupts from 8254 timers
-and from the RTC.
-
-The driver supports detection of HPET driver allocation and initialization
-of the HPET before the driver module_init routine is called.  This enables
-platform code which uses timer 0 or 1 as the main timer to intercept HPET
-initialization.  An example of this initialization can be found in
-arch/x86/kernel/hpet.c.
-
-The driver provides a userspace API which resembles the API found in the
-RTC driver framework.  An example user space program is provided in
-file:samples/timers/hpet_example.c
diff --git a/Documentation/timers/hrtimers.rst b/Documentation/timers/hrtimers.rst
new file mode 100644
index 000000000000..c1c20a693e8f
--- /dev/null
+++ b/Documentation/timers/hrtimers.rst
@@ -0,0 +1,178 @@
+======================================================
+hrtimers - subsystem for high-resolution kernel timers
+======================================================
+
+This patch introduces a new subsystem for high-resolution kernel timers.
+
+One might ask the question: we already have a timer subsystem
+(kernel/timers.c), why do we need two timer subsystems? After a lot of
+back and forth trying to integrate high-resolution and high-precision
+features into the existing timer framework, and after testing various
+such high-resolution timer implementations in practice, we came to the
+conclusion that the timer wheel code is fundamentally not suitable for
+such an approach. We initially didn't believe this ('there must be a way
+to solve this'), and spent a considerable effort trying to integrate
+things into the timer wheel, but we failed. In hindsight, there are
+several reasons why such integration is hard/impossible:
+
+- the forced handling of low-resolution and high-resolution timers in
+  the same way leads to a lot of compromises, macro magic and #ifdef
+  mess. The timers.c code is very "tightly coded" around jiffies and
+  32-bitness assumptions, and has been honed and micro-optimized for a
+  relatively narrow use case (jiffies in a relatively narrow HZ range)
+  for many years - and thus even small extensions to it easily break
+  the wheel concept, leading to even worse compromises. The timer wheel
+  code is very good and tight code, there's zero problems with it in its
+  current usage - but it is simply not suitable to be extended for
+  high-res timers.
+
+- the unpredictable [O(N)] overhead of cascading leads to delays which
+  necessitate a more complex handling of high resolution timers, which
+  in turn decreases robustness. Such a design still leads to rather large
+  timing inaccuracies. Cascading is a fundamental property of the timer
+  wheel concept, it cannot be 'designed out' without inevitably
+  degrading other portions of the timers.c code in an unacceptable way.
+
+- the implementation of the current posix-timer subsystem on top of
+  the timer wheel has already introduced a quite complex handling of
+  the required readjusting of absolute CLOCK_REALTIME timers at
+  settimeofday or NTP time - further underlying our experience by
+  example: that the timer wheel data structure is too rigid for high-res
+  timers.
+
+- the timer wheel code is most optimal for use cases which can be
+  identified as "timeouts". Such timeouts are usually set up to cover
+  error conditions in various I/O paths, such as networking and block
+  I/O. The vast majority of those timers never expire and are rarely
+  recascaded because the expected correct event arrives in time so they
+  can be removed from the timer wheel before any further processing of
+  them becomes necessary. Thus the users of these timeouts can accept
+  the granularity and precision tradeoffs of the timer wheel, and
+  largely expect the timer subsystem to have near-zero overhead.
+  Accurate timing for them is not a core purpose - in fact most of the
+  timeout values used are ad-hoc. For them it is at most a necessary
+  evil to guarantee the processing of actual timeout completions
+  (because most of the timeouts are deleted before completion), which
+  should thus be as cheap and unintrusive as possible.
+
+The primary users of precision timers are user-space applications that
+utilize nanosleep, posix-timers and itimer interfaces. Also, in-kernel
+users like drivers and subsystems which require precise timed events
+(e.g. multimedia) can benefit from the availability of a separate
+high-resolution timer subsystem as well.
+
+While this subsystem does not offer high-resolution clock sources just
+yet, the hrtimer subsystem can be easily extended with high-resolution
+clock capabilities, and patches for that exist and are maturing quickly.
+The increasing demand for realtime and multimedia applications along
+with other potential users for precise timers gives another reason to
+separate the "timeout" and "precise timer" subsystems.
+
+Another potential benefit is that such a separation allows even more
+special-purpose optimization of the existing timer wheel for the low
+resolution and low precision use cases - once the precision-sensitive
+APIs are separated from the timer wheel and are migrated over to
+hrtimers. E.g. we could decrease the frequency of the timeout subsystem
+from 250 Hz to 100 HZ (or even smaller).
+
+hrtimer subsystem implementation details
+----------------------------------------
+
+the basic design considerations were:
+
+- simplicity
+
+- data structure not bound to jiffies or any other granularity. All the
+  kernel logic works at 64-bit nanoseconds resolution - no compromises.
+
+- simplification of existing, timing related kernel code
+
+another basic requirement was the immediate enqueueing and ordering of
+timers at activation time. After looking at several possible solutions
+such as radix trees and hashes, we chose the red black tree as the basic
+data structure. Rbtrees are available as a library in the kernel and are
+used in various performance-critical areas of e.g. memory management and
+file systems. The rbtree is solely used for time sorted ordering, while
+a separate list is used to give the expiry code fast access to the
+queued timers, without having to walk the rbtree.
+
+(This separate list is also useful for later when we'll introduce
+high-resolution clocks, where we need separate pending and expired
+queues while keeping the time-order intact.)
+
+Time-ordered enqueueing is not purely for the purposes of
+high-resolution clocks though, it also simplifies the handling of
+absolute timers based on a low-resolution CLOCK_REALTIME. The existing
+implementation needed to keep an extra list of all armed absolute
+CLOCK_REALTIME timers along with complex locking. In case of
+settimeofday and NTP, all the timers (!) had to be dequeued, the
+time-changing code had to fix them up one by one, and all of them had to
+be enqueued again. The time-ordered enqueueing and the storage of the
+expiry time in absolute time units removes all this complex and poorly
+scaling code from the posix-timer implementation - the clock can simply
+be set without having to touch the rbtree. This also makes the handling
+of posix-timers simpler in general.
+
+The locking and per-CPU behavior of hrtimers was mostly taken from the
+existing timer wheel code, as it is mature and well suited. Sharing code
+was not really a win, due to the different data structures. Also, the
+hrtimer functions now have clearer behavior and clearer names - such as
+hrtimer_try_to_cancel() and hrtimer_cancel() [which are roughly
+equivalent to del_timer() and del_timer_sync()] - so there's no direct
+1:1 mapping between them on the algorithmic level, and thus no real
+potential for code sharing either.
+
+Basic data types: every time value, absolute or relative, is in a
+special nanosecond-resolution type: ktime_t. The kernel-internal
+representation of ktime_t values and operations is implemented via
+macros and inline functions, and can be switched between a "hybrid
+union" type and a plain "scalar" 64bit nanoseconds representation (at
+compile time). The hybrid union type optimizes time conversions on 32bit
+CPUs. This build-time-selectable ktime_t storage format was implemented
+to avoid the performance impact of 64-bit multiplications and divisions
+on 32bit CPUs. Such operations are frequently necessary to convert
+between the storage formats provided by kernel and userspace interfaces
+and the internal time format. (See include/linux/ktime.h for further
+details.)
+
+hrtimers - rounding of timer values
+-----------------------------------
+
+the hrtimer code will round timer events to lower-resolution clocks
+because it has to. Otherwise it will do no artificial rounding at all.
+
+one question is, what resolution value should be returned to the user by
+the clock_getres() interface. This will return whatever real resolution
+a given clock has - be it low-res, high-res, or artificially-low-res.
+
+hrtimers - testing and verification
+-----------------------------------
+
+We used the high-resolution clock subsystem ontop of hrtimers to verify
+the hrtimer implementation details in praxis, and we also ran the posix
+timer tests in order to ensure specification compliance. We also ran
+tests on low-resolution clocks.
+
+The hrtimer patch converts the following kernel functionality to use
+hrtimers:
+
+ - nanosleep
+ - itimers
+ - posix-timers
+
+The conversion of nanosleep and posix-timers enabled the unification of
+nanosleep and clock_nanosleep.
+
+The code was successfully compiled for the following platforms:
+
+ i386, x86_64, ARM, PPC, PPC64, IA64
+
+The code was run-tested on the following platforms:
+
+ i386(UP/SMP), x86_64(UP/SMP), ARM, PPC
+
+hrtimers were also integrated into the -rt tree, along with a
+hrtimers-based high-resolution clock implementation, so the hrtimers
+code got a healthy amount of testing and use in practice.
+
+	Thomas Gleixner, Ingo Molnar
diff --git a/Documentation/timers/hrtimers.txt b/Documentation/timers/hrtimers.txt
deleted file mode 100644
index 588d85724f10..000000000000
--- a/Documentation/timers/hrtimers.txt
+++ /dev/null
@@ -1,178 +0,0 @@
-
-hrtimers - subsystem for high-resolution kernel timers
-----------------------------------------------------
-
-This patch introduces a new subsystem for high-resolution kernel timers.
-
-One might ask the question: we already have a timer subsystem
-(kernel/timers.c), why do we need two timer subsystems? After a lot of
-back and forth trying to integrate high-resolution and high-precision
-features into the existing timer framework, and after testing various
-such high-resolution timer implementations in practice, we came to the
-conclusion that the timer wheel code is fundamentally not suitable for
-such an approach. We initially didn't believe this ('there must be a way
-to solve this'), and spent a considerable effort trying to integrate
-things into the timer wheel, but we failed. In hindsight, there are
-several reasons why such integration is hard/impossible:
-
-- the forced handling of low-resolution and high-resolution timers in
-  the same way leads to a lot of compromises, macro magic and #ifdef
-  mess. The timers.c code is very "tightly coded" around jiffies and
-  32-bitness assumptions, and has been honed and micro-optimized for a
-  relatively narrow use case (jiffies in a relatively narrow HZ range)
-  for many years - and thus even small extensions to it easily break
-  the wheel concept, leading to even worse compromises. The timer wheel
-  code is very good and tight code, there's zero problems with it in its
-  current usage - but it is simply not suitable to be extended for
-  high-res timers.
-
-- the unpredictable [O(N)] overhead of cascading leads to delays which
-  necessitate a more complex handling of high resolution timers, which
-  in turn decreases robustness. Such a design still leads to rather large
-  timing inaccuracies. Cascading is a fundamental property of the timer
-  wheel concept, it cannot be 'designed out' without inevitably
-  degrading other portions of the timers.c code in an unacceptable way.
-
-- the implementation of the current posix-timer subsystem on top of
-  the timer wheel has already introduced a quite complex handling of
-  the required readjusting of absolute CLOCK_REALTIME timers at
-  settimeofday or NTP time - further underlying our experience by
-  example: that the timer wheel data structure is too rigid for high-res
-  timers.
-
-- the timer wheel code is most optimal for use cases which can be
-  identified as "timeouts". Such timeouts are usually set up to cover
-  error conditions in various I/O paths, such as networking and block
-  I/O. The vast majority of those timers never expire and are rarely
-  recascaded because the expected correct event arrives in time so they
-  can be removed from the timer wheel before any further processing of
-  them becomes necessary. Thus the users of these timeouts can accept
-  the granularity and precision tradeoffs of the timer wheel, and
-  largely expect the timer subsystem to have near-zero overhead.
-  Accurate timing for them is not a core purpose - in fact most of the
-  timeout values used are ad-hoc. For them it is at most a necessary
-  evil to guarantee the processing of actual timeout completions
-  (because most of the timeouts are deleted before completion), which
-  should thus be as cheap and unintrusive as possible.
-
-The primary users of precision timers are user-space applications that
-utilize nanosleep, posix-timers and itimer interfaces. Also, in-kernel
-users like drivers and subsystems which require precise timed events
-(e.g. multimedia) can benefit from the availability of a separate
-high-resolution timer subsystem as well.
-
-While this subsystem does not offer high-resolution clock sources just
-yet, the hrtimer subsystem can be easily extended with high-resolution
-clock capabilities, and patches for that exist and are maturing quickly.
-The increasing demand for realtime and multimedia applications along
-with other potential users for precise timers gives another reason to
-separate the "timeout" and "precise timer" subsystems.
-
-Another potential benefit is that such a separation allows even more
-special-purpose optimization of the existing timer wheel for the low
-resolution and low precision use cases - once the precision-sensitive
-APIs are separated from the timer wheel and are migrated over to
-hrtimers. E.g. we could decrease the frequency of the timeout subsystem
-from 250 Hz to 100 HZ (or even smaller).
-
-hrtimer subsystem implementation details
-----------------------------------------
-
-the basic design considerations were:
-
-- simplicity
-
-- data structure not bound to jiffies or any other granularity. All the
-  kernel logic works at 64-bit nanoseconds resolution - no compromises.
-
-- simplification of existing, timing related kernel code
-
-another basic requirement was the immediate enqueueing and ordering of
-timers at activation time. After looking at several possible solutions
-such as radix trees and hashes, we chose the red black tree as the basic
-data structure. Rbtrees are available as a library in the kernel and are
-used in various performance-critical areas of e.g. memory management and
-file systems. The rbtree is solely used for time sorted ordering, while
-a separate list is used to give the expiry code fast access to the
-queued timers, without having to walk the rbtree.
-
-(This separate list is also useful for later when we'll introduce
-high-resolution clocks, where we need separate pending and expired
-queues while keeping the time-order intact.)
-
-Time-ordered enqueueing is not purely for the purposes of
-high-resolution clocks though, it also simplifies the handling of
-absolute timers based on a low-resolution CLOCK_REALTIME. The existing
-implementation needed to keep an extra list of all armed absolute
-CLOCK_REALTIME timers along with complex locking. In case of
-settimeofday and NTP, all the timers (!) had to be dequeued, the
-time-changing code had to fix them up one by one, and all of them had to
-be enqueued again. The time-ordered enqueueing and the storage of the
-expiry time in absolute time units removes all this complex and poorly
-scaling code from the posix-timer implementation - the clock can simply
-be set without having to touch the rbtree. This also makes the handling
-of posix-timers simpler in general.
-
-The locking and per-CPU behavior of hrtimers was mostly taken from the
-existing timer wheel code, as it is mature and well suited. Sharing code
-was not really a win, due to the different data structures. Also, the
-hrtimer functions now have clearer behavior and clearer names - such as
-hrtimer_try_to_cancel() and hrtimer_cancel() [which are roughly
-equivalent to del_timer() and del_timer_sync()] - so there's no direct
-1:1 mapping between them on the algorithmic level, and thus no real
-potential for code sharing either.
-
-Basic data types: every time value, absolute or relative, is in a
-special nanosecond-resolution type: ktime_t. The kernel-internal
-representation of ktime_t values and operations is implemented via
-macros and inline functions, and can be switched between a "hybrid
-union" type and a plain "scalar" 64bit nanoseconds representation (at
-compile time). The hybrid union type optimizes time conversions on 32bit
-CPUs. This build-time-selectable ktime_t storage format was implemented
-to avoid the performance impact of 64-bit multiplications and divisions
-on 32bit CPUs. Such operations are frequently necessary to convert
-between the storage formats provided by kernel and userspace interfaces
-and the internal time format. (See include/linux/ktime.h for further
-details.)
-
-hrtimers - rounding of timer values
------------------------------------
-
-the hrtimer code will round timer events to lower-resolution clocks
-because it has to. Otherwise it will do no artificial rounding at all.
-
-one question is, what resolution value should be returned to the user by
-the clock_getres() interface. This will return whatever real resolution
-a given clock has - be it low-res, high-res, or artificially-low-res.
-
-hrtimers - testing and verification
-----------------------------------
-
-We used the high-resolution clock subsystem ontop of hrtimers to verify
-the hrtimer implementation details in praxis, and we also ran the posix
-timer tests in order to ensure specification compliance. We also ran
-tests on low-resolution clocks.
-
-The hrtimer patch converts the following kernel functionality to use
-hrtimers:
-
- - nanosleep
- - itimers
- - posix-timers
-
-The conversion of nanosleep and posix-timers enabled the unification of
-nanosleep and clock_nanosleep.
-
-The code was successfully compiled for the following platforms:
-
- i386, x86_64, ARM, PPC, PPC64, IA64
-
-The code was run-tested on the following platforms:
-
- i386(UP/SMP), x86_64(UP/SMP), ARM, PPC
-
-hrtimers were also integrated into the -rt tree, along with a
-hrtimers-based high-resolution clock implementation, so the hrtimers
-code got a healthy amount of testing and use in practice.
-
-	Thomas Gleixner, Ingo Molnar
diff --git a/Documentation/timers/index.rst b/Documentation/timers/index.rst
new file mode 100644
index 000000000000..91f6f8263c48
--- /dev/null
+++ b/Documentation/timers/index.rst
@@ -0,0 +1,22 @@
+:orphan:
+
+======
+timers
+======
+
+.. toctree::
+    :maxdepth: 1
+
+    highres
+    hpet
+    hrtimers
+    no_hz
+    timekeeping
+    timers-howto
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/timers/no_hz.rst b/Documentation/timers/no_hz.rst
new file mode 100644
index 000000000000..065db217cb04
--- /dev/null
+++ b/Documentation/timers/no_hz.rst
@@ -0,0 +1,326 @@
+﻿======================================
+NO_HZ: Reducing Scheduling-Clock Ticks
+======================================
+
+
+This document describes Kconfig options and boot parameters that can
+reduce the number of scheduling-clock interrupts, thereby improving energy
+efficiency and reducing OS jitter.  Reducing OS jitter is important for
+some types of computationally intensive high-performance computing (HPC)
+applications and for real-time applications.
+
+There are three main ways of managing scheduling-clock interrupts
+(also known as "scheduling-clock ticks" or simply "ticks"):
+
+1.	Never omit scheduling-clock ticks (CONFIG_HZ_PERIODIC=y or
+	CONFIG_NO_HZ=n for older kernels).  You normally will -not-
+	want to choose this option.
+
+2.	Omit scheduling-clock ticks on idle CPUs (CONFIG_NO_HZ_IDLE=y or
+	CONFIG_NO_HZ=y for older kernels).  This is the most common
+	approach, and should be the default.
+
+3.	Omit scheduling-clock ticks on CPUs that are either idle or that
+	have only one runnable task (CONFIG_NO_HZ_FULL=y).  Unless you
+	are running realtime applications or certain types of HPC
+	workloads, you will normally -not- want this option.
+
+These three cases are described in the following three sections, followed
+by a third section on RCU-specific considerations, a fourth section
+discussing testing, and a fifth and final section listing known issues.
+
+
+Never Omit Scheduling-Clock Ticks
+=================================
+
+Very old versions of Linux from the 1990s and the very early 2000s
+are incapable of omitting scheduling-clock ticks.  It turns out that
+there are some situations where this old-school approach is still the
+right approach, for example, in heavy workloads with lots of tasks
+that use short bursts of CPU, where there are very frequent idle
+periods, but where these idle periods are also quite short (tens or
+hundreds of microseconds).  For these types of workloads, scheduling
+clock interrupts will normally be delivered any way because there
+will frequently be multiple runnable tasks per CPU.  In these cases,
+attempting to turn off the scheduling clock interrupt will have no effect
+other than increasing the overhead of switching to and from idle and
+transitioning between user and kernel execution.
+
+This mode of operation can be selected using CONFIG_HZ_PERIODIC=y (or
+CONFIG_NO_HZ=n for older kernels).
+
+However, if you are instead running a light workload with long idle
+periods, failing to omit scheduling-clock interrupts will result in
+excessive power consumption.  This is especially bad on battery-powered
+devices, where it results in extremely short battery lifetimes.  If you
+are running light workloads, you should therefore read the following
+section.
+
+In addition, if you are running either a real-time workload or an HPC
+workload with short iterations, the scheduling-clock interrupts can
+degrade your applications performance.  If this describes your workload,
+you should read the following two sections.
+
+
+Omit Scheduling-Clock Ticks For Idle CPUs
+=========================================
+
+If a CPU is idle, there is little point in sending it a scheduling-clock
+interrupt.  After all, the primary purpose of a scheduling-clock interrupt
+is to force a busy CPU to shift its attention among multiple duties,
+and an idle CPU has no duties to shift its attention among.
+
+The CONFIG_NO_HZ_IDLE=y Kconfig option causes the kernel to avoid sending
+scheduling-clock interrupts to idle CPUs, which is critically important
+both to battery-powered devices and to highly virtualized mainframes.
+A battery-powered device running a CONFIG_HZ_PERIODIC=y kernel would
+drain its battery very quickly, easily 2-3 times as fast as would the
+same device running a CONFIG_NO_HZ_IDLE=y kernel.  A mainframe running
+1,500 OS instances might find that half of its CPU time was consumed by
+unnecessary scheduling-clock interrupts.  In these situations, there
+is strong motivation to avoid sending scheduling-clock interrupts to
+idle CPUs.  That said, dyntick-idle mode is not free:
+
+1.	It increases the number of instructions executed on the path
+	to and from the idle loop.
+
+2.	On many architectures, dyntick-idle mode also increases the
+	number of expensive clock-reprogramming operations.
+
+Therefore, systems with aggressive real-time response constraints often
+run CONFIG_HZ_PERIODIC=y kernels (or CONFIG_NO_HZ=n for older kernels)
+in order to avoid degrading from-idle transition latencies.
+
+An idle CPU that is not receiving scheduling-clock interrupts is said to
+be "dyntick-idle", "in dyntick-idle mode", "in nohz mode", or "running
+tickless".  The remainder of this document will use "dyntick-idle mode".
+
+There is also a boot parameter "nohz=" that can be used to disable
+dyntick-idle mode in CONFIG_NO_HZ_IDLE=y kernels by specifying "nohz=off".
+By default, CONFIG_NO_HZ_IDLE=y kernels boot with "nohz=on", enabling
+dyntick-idle mode.
+
+
+Omit Scheduling-Clock Ticks For CPUs With Only One Runnable Task
+================================================================
+
+If a CPU has only one runnable task, there is little point in sending it
+a scheduling-clock interrupt because there is no other task to switch to.
+Note that omitting scheduling-clock ticks for CPUs with only one runnable
+task implies also omitting them for idle CPUs.
+
+The CONFIG_NO_HZ_FULL=y Kconfig option causes the kernel to avoid
+sending scheduling-clock interrupts to CPUs with a single runnable task,
+and such CPUs are said to be "adaptive-ticks CPUs".  This is important
+for applications with aggressive real-time response constraints because
+it allows them to improve their worst-case response times by the maximum
+duration of a scheduling-clock interrupt.  It is also important for
+computationally intensive short-iteration workloads:  If any CPU is
+delayed during a given iteration, all the other CPUs will be forced to
+wait idle while the delayed CPU finishes.  Thus, the delay is multiplied
+by one less than the number of CPUs.  In these situations, there is
+again strong motivation to avoid sending scheduling-clock interrupts.
+
+By default, no CPU will be an adaptive-ticks CPU.  The "nohz_full="
+boot parameter specifies the adaptive-ticks CPUs.  For example,
+"nohz_full=1,6-8" says that CPUs 1, 6, 7, and 8 are to be adaptive-ticks
+CPUs.  Note that you are prohibited from marking all of the CPUs as
+adaptive-tick CPUs:  At least one non-adaptive-tick CPU must remain
+online to handle timekeeping tasks in order to ensure that system
+calls like gettimeofday() returns accurate values on adaptive-tick CPUs.
+(This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running
+user processes to observe slight drifts in clock rate.)  Therefore, the
+boot CPU is prohibited from entering adaptive-ticks mode.  Specifying a
+"nohz_full=" mask that includes the boot CPU will result in a boot-time
+error message, and the boot CPU will be removed from the mask.  Note that
+this means that your system must have at least two CPUs in order for
+CONFIG_NO_HZ_FULL=y to do anything for you.
+
+Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
+This is covered in the "RCU IMPLICATIONS" section below.
+
+Normally, a CPU remains in adaptive-ticks mode as long as possible.
+In particular, transitioning to kernel mode does not automatically change
+the mode.  Instead, the CPU will exit adaptive-ticks mode only if needed,
+for example, if that CPU enqueues an RCU callback.
+
+Just as with dyntick-idle mode, the benefits of adaptive-tick mode do
+not come for free:
+
+1.	CONFIG_NO_HZ_FULL selects CONFIG_NO_HZ_COMMON, so you cannot run
+	adaptive ticks without also running dyntick idle.  This dependency
+	extends down into the implementation, so that all of the costs
+	of CONFIG_NO_HZ_IDLE are also incurred by CONFIG_NO_HZ_FULL.
+
+2.	The user/kernel transitions are slightly more expensive due
+	to the need to inform kernel subsystems (such as RCU) about
+	the change in mode.
+
+3.	POSIX CPU timers prevent CPUs from entering adaptive-tick mode.
+	Real-time applications needing to take actions based on CPU time
+	consumption need to use other means of doing so.
+
+4.	If there are more perf events pending than the hardware can
+	accommodate, they are normally round-robined so as to collect
+	all of them over time.  Adaptive-tick mode may prevent this
+	round-robining from happening.  This will likely be fixed by
+	preventing CPUs with large numbers of perf events pending from
+	entering adaptive-tick mode.
+
+5.	Scheduler statistics for adaptive-tick CPUs may be computed
+	slightly differently than those for non-adaptive-tick CPUs.
+	This might in turn perturb load-balancing of real-time tasks.
+
+6.	The LB_BIAS scheduler feature is disabled by adaptive ticks.
+
+Although improvements are expected over time, adaptive ticks is quite
+useful for many types of real-time and compute-intensive applications.
+However, the drawbacks listed above mean that adaptive ticks should not
+(yet) be enabled by default.
+
+
+RCU Implications
+================
+
+There are situations in which idle CPUs cannot be permitted to
+enter either dyntick-idle mode or adaptive-tick mode, the most
+common being when that CPU has RCU callbacks pending.
+
+The CONFIG_RCU_FAST_NO_HZ=y Kconfig option may be used to cause such CPUs
+to enter dyntick-idle mode or adaptive-tick mode anyway.  In this case,
+a timer will awaken these CPUs every four jiffies in order to ensure
+that the RCU callbacks are processed in a timely fashion.
+
+Another approach is to offload RCU callback processing to "rcuo" kthreads
+using the CONFIG_RCU_NOCB_CPU=y Kconfig option.  The specific CPUs to
+offload may be selected using The "rcu_nocbs=" kernel boot parameter,
+which takes a comma-separated list of CPUs and CPU ranges, for example,
+"1,3-5" selects CPUs 1, 3, 4, and 5.
+
+The offloaded CPUs will never queue RCU callbacks, and therefore RCU
+never prevents offloaded CPUs from entering either dyntick-idle mode
+or adaptive-tick mode.  That said, note that it is up to userspace to
+pin the "rcuo" kthreads to specific CPUs if desired.  Otherwise, the
+scheduler will decide where to run them, which might or might not be
+where you want them to run.
+
+
+Testing
+=======
+
+So you enable all the OS-jitter features described in this document,
+but do not see any change in your workload's behavior.  Is this because
+your workload isn't affected that much by OS jitter, or is it because
+something else is in the way?  This section helps answer this question
+by providing a simple OS-jitter test suite, which is available on branch
+master of the following git archive:
+
+git://git.kernel.org/pub/scm/linux/kernel/git/frederic/dynticks-testing.git
+
+Clone this archive and follow the instructions in the README file.
+This test procedure will produce a trace that will allow you to evaluate
+whether or not you have succeeded in removing OS jitter from your system.
+If this trace shows that you have removed OS jitter as much as is
+possible, then you can conclude that your workload is not all that
+sensitive to OS jitter.
+
+Note: this test requires that your system have at least two CPUs.
+We do not currently have a good way to remove OS jitter from single-CPU
+systems.
+
+
+Known Issues
+============
+
+*	Dyntick-idle slows transitions to and from idle slightly.
+	In practice, this has not been a problem except for the most
+	aggressive real-time workloads, which have the option of disabling
+	dyntick-idle mode, an option that most of them take.  However,
+	some workloads will no doubt want to use adaptive ticks to
+	eliminate scheduling-clock interrupt latencies.  Here are some
+	options for these workloads:
+
+	a.	Use PMQOS from userspace to inform the kernel of your
+		latency requirements (preferred).
+
+	b.	On x86 systems, use the "idle=mwait" boot parameter.
+
+	c.	On x86 systems, use the "intel_idle.max_cstate=" to limit
+	`	the maximum C-state depth.
+
+	d.	On x86 systems, use the "idle=poll" boot parameter.
+		However, please note that use of this parameter can cause
+		your CPU to overheat, which may cause thermal throttling
+		to degrade your latencies -- and that this degradation can
+		be even worse than that of dyntick-idle.  Furthermore,
+		this parameter effectively disables Turbo Mode on Intel
+		CPUs, which can significantly reduce maximum performance.
+
+*	Adaptive-ticks slows user/kernel transitions slightly.
+	This is not expected to be a problem for computationally intensive
+	workloads, which have few such transitions.  Careful benchmarking
+	will be required to determine whether or not other workloads
+	are significantly affected by this effect.
+
+*	Adaptive-ticks does not do anything unless there is only one
+	runnable task for a given CPU, even though there are a number
+	of other situations where the scheduling-clock tick is not
+	needed.  To give but one example, consider a CPU that has one
+	runnable high-priority SCHED_FIFO task and an arbitrary number
+	of low-priority SCHED_OTHER tasks.  In this case, the CPU is
+	required to run the SCHED_FIFO task until it either blocks or
+	some other higher-priority task awakens on (or is assigned to)
+	this CPU, so there is no point in sending a scheduling-clock
+	interrupt to this CPU.	However, the current implementation
+	nevertheless sends scheduling-clock interrupts to CPUs having a
+	single runnable SCHED_FIFO task and multiple runnable SCHED_OTHER
+	tasks, even though these interrupts are unnecessary.
+
+	And even when there are multiple runnable tasks on a given CPU,
+	there is little point in interrupting that CPU until the current
+	running task's timeslice expires, which is almost always way
+	longer than the time of the next scheduling-clock interrupt.
+
+	Better handling of these sorts of situations is future work.
+
+*	A reboot is required to reconfigure both adaptive idle and RCU
+	callback offloading.  Runtime reconfiguration could be provided
+	if needed, however, due to the complexity of reconfiguring RCU at
+	runtime, there would need to be an earthshakingly good reason.
+	Especially given that you have the straightforward option of
+	simply offloading RCU callbacks from all CPUs and pinning them
+	where you want them whenever you want them pinned.
+
+*	Additional configuration is required to deal with other sources
+	of OS jitter, including interrupts and system-utility tasks
+	and processes.  This configuration normally involves binding
+	interrupts and tasks to particular CPUs.
+
+*	Some sources of OS jitter can currently be eliminated only by
+	constraining the workload.  For example, the only way to eliminate
+	OS jitter due to global TLB shootdowns is to avoid the unmapping
+	operations (such as kernel module unload operations) that
+	result in these shootdowns.  For another example, page faults
+	and TLB misses can be reduced (and in some cases eliminated) by
+	using huge pages and by constraining the amount of memory used
+	by the application.  Pre-faulting the working set can also be
+	helpful, especially when combined with the mlock() and mlockall()
+	system calls.
+
+*	Unless all CPUs are idle, at least one CPU must keep the
+	scheduling-clock interrupt going in order to support accurate
+	timekeeping.
+
+*	If there might potentially be some adaptive-ticks CPUs, there
+	will be at least one CPU keeping the scheduling-clock interrupt
+	going, even if all CPUs are otherwise idle.
+
+	Better handling of this situation is ongoing work.
+
+*	Some process-handling operations still require the occasional
+	scheduling-clock tick.	These operations include calculating CPU
+	load, maintaining sched average, computing CFS entity vruntime,
+	computing avenrun, and carrying out load balancing.  They are
+	currently accommodated by scheduling-clock tick every second
+	or so.	On-going work will eliminate the need even for these
+	infrequent scheduling-clock ticks.
diff --git a/Documentation/timers/timekeeping.rst b/Documentation/timers/timekeeping.rst
new file mode 100644
index 000000000000..f83e98852e2c
--- /dev/null
+++ b/Documentation/timers/timekeeping.rst
@@ -0,0 +1,180 @@
+===========================================================
+Clock sources, Clock events, sched_clock() and delay timers
+===========================================================
+
+This document tries to briefly explain some basic kernel timekeeping
+abstractions. It partly pertains to the drivers usually found in
+drivers/clocksource in the kernel tree, but the code may be spread out
+across the kernel.
+
+If you grep through the kernel source you will find a number of architecture-
+specific implementations of clock sources, clockevents and several likewise
+architecture-specific overrides of the sched_clock() function and some
+delay timers.
+
+To provide timekeeping for your platform, the clock source provides
+the basic timeline, whereas clock events shoot interrupts on certain points
+on this timeline, providing facilities such as high-resolution timers.
+sched_clock() is used for scheduling and timestamping, and delay timers
+provide an accurate delay source using hardware counters.
+
+
+Clock sources
+-------------
+
+The purpose of the clock source is to provide a timeline for the system that
+tells you where you are in time. For example issuing the command 'date' on
+a Linux system will eventually read the clock source to determine exactly
+what time it is.
+
+Typically the clock source is a monotonic, atomic counter which will provide
+n bits which count from 0 to (2^n)-1 and then wraps around to 0 and start over.
+It will ideally NEVER stop ticking as long as the system is running. It
+may stop during system suspend.
+
+The clock source shall have as high resolution as possible, and the frequency
+shall be as stable and correct as possible as compared to a real-world wall
+clock. It should not move unpredictably back and forth in time or miss a few
+cycles here and there.
+
+It must be immune to the kind of effects that occur in hardware where e.g.
+the counter register is read in two phases on the bus lowest 16 bits first
+and the higher 16 bits in a second bus cycle with the counter bits
+potentially being updated in between leading to the risk of very strange
+values from the counter.
+
+When the wall-clock accuracy of the clock source isn't satisfactory, there
+are various quirks and layers in the timekeeping code for e.g. synchronizing
+the user-visible time to RTC clocks in the system or against networked time
+servers using NTP, but all they do basically is update an offset against
+the clock source, which provides the fundamental timeline for the system.
+These measures does not affect the clock source per se, they only adapt the
+system to the shortcomings of it.
+
+The clock source struct shall provide means to translate the provided counter
+into a nanosecond value as an unsigned long long (unsigned 64 bit) number.
+Since this operation may be invoked very often, doing this in a strict
+mathematical sense is not desirable: instead the number is taken as close as
+possible to a nanosecond value using only the arithmetic operations
+multiply and shift, so in clocksource_cyc2ns() you find:
+
+  ns ~= (clocksource * mult) >> shift
+
+You will find a number of helper functions in the clock source code intended
+to aid in providing these mult and shift values, such as
+clocksource_khz2mult(), clocksource_hz2mult() that help determine the
+mult factor from a fixed shift, and clocksource_register_hz() and
+clocksource_register_khz() which will help out assigning both shift and mult
+factors using the frequency of the clock source as the only input.
+
+For real simple clock sources accessed from a single I/O memory location
+there is nowadays even clocksource_mmio_init() which will take a memory
+location, bit width, a parameter telling whether the counter in the
+register counts up or down, and the timer clock rate, and then conjure all
+necessary parameters.
+
+Since a 32-bit counter at say 100 MHz will wrap around to zero after some 43
+seconds, the code handling the clock source will have to compensate for this.
+That is the reason why the clock source struct also contains a 'mask'
+member telling how many bits of the source are valid. This way the timekeeping
+code knows when the counter will wrap around and can insert the necessary
+compensation code on both sides of the wrap point so that the system timeline
+remains monotonic.
+
+
+Clock events
+------------
+
+Clock events are the conceptual reverse of clock sources: they take a
+desired time specification value and calculate the values to poke into
+hardware timer registers.
+
+Clock events are orthogonal to clock sources. The same hardware
+and register range may be used for the clock event, but it is essentially
+a different thing. The hardware driving clock events has to be able to
+fire interrupts, so as to trigger events on the system timeline. On an SMP
+system, it is ideal (and customary) to have one such event driving timer per
+CPU core, so that each core can trigger events independently of any other
+core.
+
+You will notice that the clock event device code is based on the same basic
+idea about translating counters to nanoseconds using mult and shift
+arithmetic, and you find the same family of helper functions again for
+assigning these values. The clock event driver does not need a 'mask'
+attribute however: the system will not try to plan events beyond the time
+horizon of the clock event.
+
+
+sched_clock()
+-------------
+
+In addition to the clock sources and clock events there is a special weak
+function in the kernel called sched_clock(). This function shall return the
+number of nanoseconds since the system was started. An architecture may or
+may not provide an implementation of sched_clock() on its own. If a local
+implementation is not provided, the system jiffy counter will be used as
+sched_clock().
+
+As the name suggests, sched_clock() is used for scheduling the system,
+determining the absolute timeslice for a certain process in the CFS scheduler
+for example. It is also used for printk timestamps when you have selected to
+include time information in printk for things like bootcharts.
+
+Compared to clock sources, sched_clock() has to be very fast: it is called
+much more often, especially by the scheduler. If you have to do trade-offs
+between accuracy compared to the clock source, you may sacrifice accuracy
+for speed in sched_clock(). It however requires some of the same basic
+characteristics as the clock source, i.e. it should be monotonic.
+
+The sched_clock() function may wrap only on unsigned long long boundaries,
+i.e. after 64 bits. Since this is a nanosecond value this will mean it wraps
+after circa 585 years. (For most practical systems this means "never".)
+
+If an architecture does not provide its own implementation of this function,
+it will fall back to using jiffies, making its maximum resolution 1/HZ of the
+jiffy frequency for the architecture. This will affect scheduling accuracy
+and will likely show up in system benchmarks.
+
+The clock driving sched_clock() may stop or reset to zero during system
+suspend/sleep. This does not matter to the function it serves of scheduling
+events on the system. However it may result in interesting timestamps in
+printk().
+
+The sched_clock() function should be callable in any context, IRQ- and
+NMI-safe and return a sane value in any context.
+
+Some architectures may have a limited set of time sources and lack a nice
+counter to derive a 64-bit nanosecond value, so for example on the ARM
+architecture, special helper functions have been created to provide a
+sched_clock() nanosecond base from a 16- or 32-bit counter. Sometimes the
+same counter that is also used as clock source is used for this purpose.
+
+On SMP systems, it is crucial for performance that sched_clock() can be called
+independently on each CPU without any synchronization performance hits.
+Some hardware (such as the x86 TSC) will cause the sched_clock() function to
+drift between the CPUs on the system. The kernel can work around this by
+enabling the CONFIG_HAVE_UNSTABLE_SCHED_CLOCK option. This is another aspect
+that makes sched_clock() different from the ordinary clock source.
+
+
+Delay timers (some architectures only)
+--------------------------------------
+
+On systems with variable CPU frequency, the various kernel delay() functions
+will sometimes behave strangely. Basically these delays usually use a hard
+loop to delay a certain number of jiffy fractions using a "lpj" (loops per
+jiffy) value, calibrated on boot.
+
+Let's hope that your system is running on maximum frequency when this value
+is calibrated: as an effect when the frequency is geared down to half the
+full frequency, any delay() will be twice as long. Usually this does not
+hurt, as you're commonly requesting that amount of delay *or more*. But
+basically the semantics are quite unpredictable on such systems.
+
+Enter timer-based delays. Using these, a timer read may be used instead of
+a hard-coded loop for providing the desired delay.
+
+This is done by declaring a struct delay_timer and assigning the appropriate
+function pointers and rate settings for this delay timer.
+
+This is available on some architectures like OpenRISC or ARM.
diff --git a/Documentation/timers/timekeeping.txt b/Documentation/timers/timekeeping.txt
deleted file mode 100644
index 2d1732b0a868..000000000000
--- a/Documentation/timers/timekeeping.txt
+++ /dev/null
@@ -1,179 +0,0 @@
-Clock sources, Clock events, sched_clock() and delay timers
------------------------------------------------------------
-
-This document tries to briefly explain some basic kernel timekeeping
-abstractions. It partly pertains to the drivers usually found in
-drivers/clocksource in the kernel tree, but the code may be spread out
-across the kernel.
-
-If you grep through the kernel source you will find a number of architecture-
-specific implementations of clock sources, clockevents and several likewise
-architecture-specific overrides of the sched_clock() function and some
-delay timers.
-
-To provide timekeeping for your platform, the clock source provides
-the basic timeline, whereas clock events shoot interrupts on certain points
-on this timeline, providing facilities such as high-resolution timers.
-sched_clock() is used for scheduling and timestamping, and delay timers
-provide an accurate delay source using hardware counters.
-
-
-Clock sources
--------------
-
-The purpose of the clock source is to provide a timeline for the system that
-tells you where you are in time. For example issuing the command 'date' on
-a Linux system will eventually read the clock source to determine exactly
-what time it is.
-
-Typically the clock source is a monotonic, atomic counter which will provide
-n bits which count from 0 to (2^n)-1 and then wraps around to 0 and start over.
-It will ideally NEVER stop ticking as long as the system is running. It
-may stop during system suspend.
-
-The clock source shall have as high resolution as possible, and the frequency
-shall be as stable and correct as possible as compared to a real-world wall
-clock. It should not move unpredictably back and forth in time or miss a few
-cycles here and there.
-
-It must be immune to the kind of effects that occur in hardware where e.g.
-the counter register is read in two phases on the bus lowest 16 bits first
-and the higher 16 bits in a second bus cycle with the counter bits
-potentially being updated in between leading to the risk of very strange
-values from the counter.
-
-When the wall-clock accuracy of the clock source isn't satisfactory, there
-are various quirks and layers in the timekeeping code for e.g. synchronizing
-the user-visible time to RTC clocks in the system or against networked time
-servers using NTP, but all they do basically is update an offset against
-the clock source, which provides the fundamental timeline for the system.
-These measures does not affect the clock source per se, they only adapt the
-system to the shortcomings of it.
-
-The clock source struct shall provide means to translate the provided counter
-into a nanosecond value as an unsigned long long (unsigned 64 bit) number.
-Since this operation may be invoked very often, doing this in a strict
-mathematical sense is not desirable: instead the number is taken as close as
-possible to a nanosecond value using only the arithmetic operations
-multiply and shift, so in clocksource_cyc2ns() you find:
-
-  ns ~= (clocksource * mult) >> shift
-
-You will find a number of helper functions in the clock source code intended
-to aid in providing these mult and shift values, such as
-clocksource_khz2mult(), clocksource_hz2mult() that help determine the
-mult factor from a fixed shift, and clocksource_register_hz() and
-clocksource_register_khz() which will help out assigning both shift and mult
-factors using the frequency of the clock source as the only input.
-
-For real simple clock sources accessed from a single I/O memory location
-there is nowadays even clocksource_mmio_init() which will take a memory
-location, bit width, a parameter telling whether the counter in the
-register counts up or down, and the timer clock rate, and then conjure all
-necessary parameters.
-
-Since a 32-bit counter at say 100 MHz will wrap around to zero after some 43
-seconds, the code handling the clock source will have to compensate for this.
-That is the reason why the clock source struct also contains a 'mask'
-member telling how many bits of the source are valid. This way the timekeeping
-code knows when the counter will wrap around and can insert the necessary
-compensation code on both sides of the wrap point so that the system timeline
-remains monotonic.
-
-
-Clock events
-------------
-
-Clock events are the conceptual reverse of clock sources: they take a
-desired time specification value and calculate the values to poke into
-hardware timer registers.
-
-Clock events are orthogonal to clock sources. The same hardware
-and register range may be used for the clock event, but it is essentially
-a different thing. The hardware driving clock events has to be able to
-fire interrupts, so as to trigger events on the system timeline. On an SMP
-system, it is ideal (and customary) to have one such event driving timer per
-CPU core, so that each core can trigger events independently of any other
-core.
-
-You will notice that the clock event device code is based on the same basic
-idea about translating counters to nanoseconds using mult and shift
-arithmetic, and you find the same family of helper functions again for
-assigning these values. The clock event driver does not need a 'mask'
-attribute however: the system will not try to plan events beyond the time
-horizon of the clock event.
-
-
-sched_clock()
--------------
-
-In addition to the clock sources and clock events there is a special weak
-function in the kernel called sched_clock(). This function shall return the
-number of nanoseconds since the system was started. An architecture may or
-may not provide an implementation of sched_clock() on its own. If a local
-implementation is not provided, the system jiffy counter will be used as
-sched_clock().
-
-As the name suggests, sched_clock() is used for scheduling the system,
-determining the absolute timeslice for a certain process in the CFS scheduler
-for example. It is also used for printk timestamps when you have selected to
-include time information in printk for things like bootcharts.
-
-Compared to clock sources, sched_clock() has to be very fast: it is called
-much more often, especially by the scheduler. If you have to do trade-offs
-between accuracy compared to the clock source, you may sacrifice accuracy
-for speed in sched_clock(). It however requires some of the same basic
-characteristics as the clock source, i.e. it should be monotonic.
-
-The sched_clock() function may wrap only on unsigned long long boundaries,
-i.e. after 64 bits. Since this is a nanosecond value this will mean it wraps
-after circa 585 years. (For most practical systems this means "never".)
-
-If an architecture does not provide its own implementation of this function,
-it will fall back to using jiffies, making its maximum resolution 1/HZ of the
-jiffy frequency for the architecture. This will affect scheduling accuracy
-and will likely show up in system benchmarks.
-
-The clock driving sched_clock() may stop or reset to zero during system
-suspend/sleep. This does not matter to the function it serves of scheduling
-events on the system. However it may result in interesting timestamps in
-printk().
-
-The sched_clock() function should be callable in any context, IRQ- and
-NMI-safe and return a sane value in any context.
-
-Some architectures may have a limited set of time sources and lack a nice
-counter to derive a 64-bit nanosecond value, so for example on the ARM
-architecture, special helper functions have been created to provide a
-sched_clock() nanosecond base from a 16- or 32-bit counter. Sometimes the
-same counter that is also used as clock source is used for this purpose.
-
-On SMP systems, it is crucial for performance that sched_clock() can be called
-independently on each CPU without any synchronization performance hits.
-Some hardware (such as the x86 TSC) will cause the sched_clock() function to
-drift between the CPUs on the system. The kernel can work around this by
-enabling the CONFIG_HAVE_UNSTABLE_SCHED_CLOCK option. This is another aspect
-that makes sched_clock() different from the ordinary clock source.
-
-
-Delay timers (some architectures only)
---------------------------------------
-
-On systems with variable CPU frequency, the various kernel delay() functions
-will sometimes behave strangely. Basically these delays usually use a hard
-loop to delay a certain number of jiffy fractions using a "lpj" (loops per
-jiffy) value, calibrated on boot.
-
-Let's hope that your system is running on maximum frequency when this value
-is calibrated: as an effect when the frequency is geared down to half the
-full frequency, any delay() will be twice as long. Usually this does not
-hurt, as you're commonly requesting that amount of delay *or more*. But
-basically the semantics are quite unpredictable on such systems.
-
-Enter timer-based delays. Using these, a timer read may be used instead of
-a hard-coded loop for providing the desired delay.
-
-This is done by declaring a struct delay_timer and assigning the appropriate
-function pointers and rate settings for this delay timer.
-
-This is available on some architectures like OpenRISC or ARM.
diff --git a/Documentation/timers/timers-howto.rst b/Documentation/timers/timers-howto.rst
new file mode 100644
index 000000000000..7e3167bec2b1
--- /dev/null
+++ b/Documentation/timers/timers-howto.rst
@@ -0,0 +1,112 @@
+===================================================================
+delays - Information on the various kernel delay / sleep mechanisms
+===================================================================
+
+This document seeks to answer the common question: "What is the
+RightWay (TM) to insert a delay?"
+
+This question is most often faced by driver writers who have to
+deal with hardware delays and who may not be the most intimately
+familiar with the inner workings of the Linux Kernel.
+
+
+Inserting Delays
+----------------
+
+The first, and most important, question you need to ask is "Is my
+code in an atomic context?"  This should be followed closely by "Does
+it really need to delay in atomic context?" If so...
+
+ATOMIC CONTEXT:
+	You must use the `*delay` family of functions. These
+	functions use the jiffie estimation of clock speed
+	and will busy wait for enough loop cycles to achieve
+	the desired delay:
+
+	ndelay(unsigned long nsecs)
+	udelay(unsigned long usecs)
+	mdelay(unsigned long msecs)
+
+	udelay is the generally preferred API; ndelay-level
+	precision may not actually exist on many non-PC devices.
+
+	mdelay is macro wrapper around udelay, to account for
+	possible overflow when passing large arguments to udelay.
+	In general, use of mdelay is discouraged and code should
+	be refactored to allow for the use of msleep.
+
+NON-ATOMIC CONTEXT:
+	You should use the `*sleep[_range]` family of functions.
+	There are a few more options here, while any of them may
+	work correctly, using the "right" sleep function will
+	help the scheduler, power management, and just make your
+	driver better :)
+
+	-- Backed by busy-wait loop:
+
+		udelay(unsigned long usecs)
+
+	-- Backed by hrtimers:
+
+		usleep_range(unsigned long min, unsigned long max)
+
+	-- Backed by jiffies / legacy_timers
+
+		msleep(unsigned long msecs)
+		msleep_interruptible(unsigned long msecs)
+
+	Unlike the `*delay` family, the underlying mechanism
+	driving each of these calls varies, thus there are
+	quirks you should be aware of.
+
+
+	SLEEPING FOR "A FEW" USECS ( < ~10us? ):
+		* Use udelay
+
+		- Why not usleep?
+			On slower systems, (embedded, OR perhaps a speed-
+			stepped PC!) the overhead of setting up the hrtimers
+			for usleep *may* not be worth it. Such an evaluation
+			will obviously depend on your specific situation, but
+			it is something to be aware of.
+
+	SLEEPING FOR ~USECS OR SMALL MSECS ( 10us - 20ms):
+		* Use usleep_range
+
+		- Why not msleep for (1ms - 20ms)?
+			Explained originally here:
+				http://lkml.org/lkml/2007/8/3/250
+
+			msleep(1~20) may not do what the caller intends, and
+			will often sleep longer (~20 ms actual sleep for any
+			value given in the 1~20ms range). In many cases this
+			is not the desired behavior.
+
+		- Why is there no "usleep" / What is a good range?
+			Since usleep_range is built on top of hrtimers, the
+			wakeup will be very precise (ish), thus a simple
+			usleep function would likely introduce a large number
+			of undesired interrupts.
+
+			With the introduction of a range, the scheduler is
+			free to coalesce your wakeup with any other wakeup
+			that may have happened for other reasons, or at the
+			worst case, fire an interrupt for your upper bound.
+
+			The larger a range you supply, the greater a chance
+			that you will not trigger an interrupt; this should
+			be balanced with what is an acceptable upper bound on
+			delay / performance for your specific code path. Exact
+			tolerances here are very situation specific, thus it
+			is left to the caller to determine a reasonable range.
+
+	SLEEPING FOR LARGER MSECS ( 10ms+ )
+		* Use msleep or possibly msleep_interruptible
+
+		- What's the difference?
+			msleep sets the current task to TASK_UNINTERRUPTIBLE
+			whereas msleep_interruptible sets the current task to
+			TASK_INTERRUPTIBLE before scheduling the sleep. In
+			short, the difference is whether the sleep can be ended
+			early by a signal. In general, just use msleep unless
+			you know you have a need for the interruptible variant.
diff --git a/Documentation/timers/timers-howto.txt b/Documentation/timers/timers-howto.txt
deleted file mode 100644
index 038f8c77a076..000000000000
--- a/Documentation/timers/timers-howto.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-delays - Information on the various kernel delay / sleep mechanisms
--------------------------------------------------------------------
-
-This document seeks to answer the common question: "What is the
-RightWay (TM) to insert a delay?"
-
-This question is most often faced by driver writers who have to
-deal with hardware delays and who may not be the most intimately
-familiar with the inner workings of the Linux Kernel.
-
-
-Inserting Delays
-----------------
-
-The first, and most important, question you need to ask is "Is my
-code in an atomic context?"  This should be followed closely by "Does
-it really need to delay in atomic context?" If so...
-
-ATOMIC CONTEXT:
-	You must use the *delay family of functions. These
-	functions use the jiffie estimation of clock speed
-	and will busy wait for enough loop cycles to achieve
-	the desired delay:
-
-	ndelay(unsigned long nsecs)
-	udelay(unsigned long usecs)
-	mdelay(unsigned long msecs)
-
-	udelay is the generally preferred API; ndelay-level
-	precision may not actually exist on many non-PC devices.
-
-	mdelay is macro wrapper around udelay, to account for
-	possible overflow when passing large arguments to udelay.
-	In general, use of mdelay is discouraged and code should
-	be refactored to allow for the use of msleep.
-
-NON-ATOMIC CONTEXT:
-	You should use the *sleep[_range] family of functions.
-	There are a few more options here, while any of them may
-	work correctly, using the "right" sleep function will
-	help the scheduler, power management, and just make your
-	driver better :)
-
-	-- Backed by busy-wait loop:
-		udelay(unsigned long usecs)
-	-- Backed by hrtimers:
-		usleep_range(unsigned long min, unsigned long max)
-	-- Backed by jiffies / legacy_timers
-		msleep(unsigned long msecs)
-		msleep_interruptible(unsigned long msecs)
-
-	Unlike the *delay family, the underlying mechanism
-	driving each of these calls varies, thus there are
-	quirks you should be aware of.
-
-
-	SLEEPING FOR "A FEW" USECS ( < ~10us? ):
-		* Use udelay
-
-		- Why not usleep?
-			On slower systems, (embedded, OR perhaps a speed-
-			stepped PC!) the overhead of setting up the hrtimers
-			for usleep *may* not be worth it. Such an evaluation
-			will obviously depend on your specific situation, but
-			it is something to be aware of.
-
-	SLEEPING FOR ~USECS OR SMALL MSECS ( 10us - 20ms):
-		* Use usleep_range
-
-		- Why not msleep for (1ms - 20ms)?
-			Explained originally here:
-				http://lkml.org/lkml/2007/8/3/250
-			msleep(1~20) may not do what the caller intends, and
-			will often sleep longer (~20 ms actual sleep for any
-			value given in the 1~20ms range). In many cases this
-			is not the desired behavior.
-
-		- Why is there no "usleep" / What is a good range?
-			Since usleep_range is built on top of hrtimers, the
-			wakeup will be very precise (ish), thus a simple
-			usleep function would likely introduce a large number
-			of undesired interrupts.
-
-			With the introduction of a range, the scheduler is
-			free to coalesce your wakeup with any other wakeup
-			that may have happened for other reasons, or at the
-			worst case, fire an interrupt for your upper bound.
-
-			The larger a range you supply, the greater a chance
-			that you will not trigger an interrupt; this should
-			be balanced with what is an acceptable upper bound on
-			delay / performance for your specific code path. Exact
-			tolerances here are very situation specific, thus it
-			is left to the caller to determine a reasonable range.
-
-	SLEEPING FOR LARGER MSECS ( 10ms+ )
-		* Use msleep or possibly msleep_interruptible
-
-		- What's the difference?
-			msleep sets the current task to TASK_UNINTERRUPTIBLE
-			whereas msleep_interruptible sets the current task to
-			TASK_INTERRUPTIBLE before scheduling the sleep. In
-			short, the difference is whether the sleep can be ended
-			early by a signal. In general, just use msleep unless
-			you know you have a need for the interruptible variant.
-- 
cgit 


From cc2a2d19f896d174cad16c2348100bec49c00958 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:53:01 -0300
Subject: docs: watchdog: convert docs to ReST and rename to *.rst

Convert those documents and prepare them to be part of the kernel
API book, as most of the stuff there are related to the
Kernel interfaces.

Still, in the future, it would make sense to split the docs,
as some of the stuff is clearly focused on sysadmin tasks.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/kernel-parameters.txt    |   2 +-
 Documentation/kernel-per-CPU-kthreads.txt          |   2 +-
 .../watchdog/convert_drivers_to_kernel_api.rst     | 219 ++++++
 .../watchdog/convert_drivers_to_kernel_api.txt     | 218 ------
 Documentation/watchdog/hpwdt.rst                   |  73 ++
 Documentation/watchdog/hpwdt.txt                   |  66 --
 Documentation/watchdog/index.rst                   |  25 +
 Documentation/watchdog/mlx-wdt.rst                 |  56 ++
 Documentation/watchdog/mlx-wdt.txt                 |  52 --
 Documentation/watchdog/pcwd-watchdog.rst           |  71 ++
 Documentation/watchdog/pcwd-watchdog.txt           |  66 --
 Documentation/watchdog/watchdog-api.rst            | 271 ++++++++
 Documentation/watchdog/watchdog-api.txt            | 237 -------
 Documentation/watchdog/watchdog-kernel-api.rst     | 338 ++++++++++
 Documentation/watchdog/watchdog-kernel-api.txt     | 305 ---------
 Documentation/watchdog/watchdog-parameters.rst     | 736 +++++++++++++++++++++
 Documentation/watchdog/watchdog-parameters.txt     | 410 ------------
 Documentation/watchdog/watchdog-pm.rst             |  22 +
 Documentation/watchdog/watchdog-pm.txt             |  19 -
 Documentation/watchdog/wdt.rst                     |  63 ++
 Documentation/watchdog/wdt.txt                     |  50 --
 21 files changed, 1876 insertions(+), 1425 deletions(-)
 create mode 100644 Documentation/watchdog/convert_drivers_to_kernel_api.rst
 delete mode 100644 Documentation/watchdog/convert_drivers_to_kernel_api.txt
 create mode 100644 Documentation/watchdog/hpwdt.rst
 delete mode 100644 Documentation/watchdog/hpwdt.txt
 create mode 100644 Documentation/watchdog/index.rst
 create mode 100644 Documentation/watchdog/mlx-wdt.rst
 delete mode 100644 Documentation/watchdog/mlx-wdt.txt
 create mode 100644 Documentation/watchdog/pcwd-watchdog.rst
 delete mode 100644 Documentation/watchdog/pcwd-watchdog.txt
 create mode 100644 Documentation/watchdog/watchdog-api.rst
 delete mode 100644 Documentation/watchdog/watchdog-api.txt
 create mode 100644 Documentation/watchdog/watchdog-kernel-api.rst
 delete mode 100644 Documentation/watchdog/watchdog-kernel-api.txt
 create mode 100644 Documentation/watchdog/watchdog-parameters.rst
 delete mode 100644 Documentation/watchdog/watchdog-parameters.txt
 create mode 100644 Documentation/watchdog/watchdog-pm.rst
 delete mode 100644 Documentation/watchdog/watchdog-pm.txt
 create mode 100644 Documentation/watchdog/wdt.rst
 delete mode 100644 Documentation/watchdog/wdt.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2148fd289851..9ac37fcca3ee 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5160,7 +5160,7 @@
 			Default: 3 = cyan.
 
 	watchdog timers	[HW,WDT] For information on watchdog timers,
-			see Documentation/watchdog/watchdog-parameters.txt
+			see Documentation/watchdog/watchdog-parameters.rst
 			or other driver-specific files in the
 			Documentation/watchdog/ directory.
 
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index 23b0c8b20cd1..5623b9916411 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -348,7 +348,7 @@ To reduce its OS jitter, do at least one of the following:
 2.	Boot with "nosoftlockup=0", which will also prevent these kthreads
 	from being created.  Other related watchdog and softlockup boot
 	parameters may be found in Documentation/admin-guide/kernel-parameters.rst
-	and Documentation/watchdog/watchdog-parameters.txt.
+	and Documentation/watchdog/watchdog-parameters.rst.
 3.	Echo a zero to /proc/sys/kernel/watchdog to disable the
 	watchdog timer.
 4.	Echo a large number of /proc/sys/kernel/watchdog_thresh in
diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.rst b/Documentation/watchdog/convert_drivers_to_kernel_api.rst
new file mode 100644
index 000000000000..dd934cc08e40
--- /dev/null
+++ b/Documentation/watchdog/convert_drivers_to_kernel_api.rst
@@ -0,0 +1,219 @@
+=========================================================
+Converting old watchdog drivers to the watchdog framework
+=========================================================
+
+by Wolfram Sang <w.sang@pengutronix.de>
+
+Before the watchdog framework came into the kernel, every driver had to
+implement the API on its own. Now, as the framework factored out the common
+components, those drivers can be lightened making it a user of the framework.
+This document shall guide you for this task. The necessary steps are described
+as well as things to look out for.
+
+
+Remove the file_operations struct
+---------------------------------
+
+Old drivers define their own file_operations for actions like open(), write(),
+etc... These are now handled by the framework and just call the driver when
+needed. So, in general, the 'file_operations' struct and assorted functions can
+go. Only very few driver-specific details have to be moved to other functions.
+Here is a overview of the functions and probably needed actions:
+
+- open: Everything dealing with resource management (file-open checks, magic
+  close preparations) can simply go. Device specific stuff needs to go to the
+  driver specific start-function. Note that for some drivers, the start-function
+  also serves as the ping-function. If that is the case and you need start/stop
+  to be balanced (clocks!), you are better off refactoring a separate start-function.
+
+- close: Same hints as for open apply.
+
+- write: Can simply go, all defined behaviour is taken care of by the framework,
+  i.e. ping on write and magic char ('V') handling.
+
+- ioctl: While the driver is allowed to have extensions to the IOCTL interface,
+  the most common ones are handled by the framework, supported by some assistance
+  from the driver:
+
+	WDIOC_GETSUPPORT:
+		Returns the mandatory watchdog_info struct from the driver
+
+	WDIOC_GETSTATUS:
+		Needs the status-callback defined, otherwise returns 0
+
+	WDIOC_GETBOOTSTATUS:
+		Needs the bootstatus member properly set. Make sure it is 0 if you
+		don't have further support!
+
+	WDIOC_SETOPTIONS:
+		No preparations needed
+
+	WDIOC_KEEPALIVE:
+		If wanted, options in watchdog_info need to have WDIOF_KEEPALIVEPING
+		set
+
+	WDIOC_SETTIMEOUT:
+		Options in watchdog_info need to have WDIOF_SETTIMEOUT set
+		and a set_timeout-callback has to be defined. The core will also
+		do limit-checking, if min_timeout and max_timeout in the watchdog
+		device are set. All is optional.
+
+	WDIOC_GETTIMEOUT:
+		No preparations needed
+
+	WDIOC_GETTIMELEFT:
+		It needs get_timeleft() callback to be defined. Otherwise it
+		will return EOPNOTSUPP
+
+  Other IOCTLs can be served using the ioctl-callback. Note that this is mainly
+  intended for porting old drivers; new drivers should not invent private IOCTLs.
+  Private IOCTLs are processed first. When the callback returns with
+  -ENOIOCTLCMD, the IOCTLs of the framework will be tried, too. Any other error
+  is directly given to the user.
+
+Example conversion::
+
+  -static const struct file_operations s3c2410wdt_fops = {
+  -       .owner          = THIS_MODULE,
+  -       .llseek         = no_llseek,
+  -       .write          = s3c2410wdt_write,
+  -       .unlocked_ioctl = s3c2410wdt_ioctl,
+  -       .open           = s3c2410wdt_open,
+  -       .release        = s3c2410wdt_release,
+  -};
+
+Check the functions for device-specific stuff and keep it for later
+refactoring. The rest can go.
+
+
+Remove the miscdevice
+---------------------
+
+Since the file_operations are gone now, you can also remove the 'struct
+miscdevice'. The framework will create it on watchdog_dev_register() called by
+watchdog_register_device()::
+
+  -static struct miscdevice s3c2410wdt_miscdev = {
+  -       .minor          = WATCHDOG_MINOR,
+  -       .name           = "watchdog",
+  -       .fops           = &s3c2410wdt_fops,
+  -};
+
+
+Remove obsolete includes and defines
+------------------------------------
+
+Because of the simplifications, a few defines are probably unused now. Remove
+them. Includes can be removed, too. For example::
+
+  - #include <linux/fs.h>
+  - #include <linux/miscdevice.h> (if MODULE_ALIAS_MISCDEV is not used)
+  - #include <linux/uaccess.h> (if no custom IOCTLs are used)
+
+
+Add the watchdog operations
+---------------------------
+
+All possible callbacks are defined in 'struct watchdog_ops'. You can find it
+explained in 'watchdog-kernel-api.txt' in this directory. start(), stop() and
+owner must be set, the rest are optional. You will easily find corresponding
+functions in the old driver. Note that you will now get a pointer to the
+watchdog_device as a parameter to these functions, so you probably have to
+change the function header. Other changes are most likely not needed, because
+here simply happens the direct hardware access. If you have device-specific
+code left from the above steps, it should be refactored into these callbacks.
+
+Here is a simple example::
+
+  +static struct watchdog_ops s3c2410wdt_ops = {
+  +       .owner = THIS_MODULE,
+  +       .start = s3c2410wdt_start,
+  +       .stop = s3c2410wdt_stop,
+  +       .ping = s3c2410wdt_keepalive,
+  +       .set_timeout = s3c2410wdt_set_heartbeat,
+  +};
+
+A typical function-header change looks like::
+
+  -static void s3c2410wdt_keepalive(void)
+  +static int s3c2410wdt_keepalive(struct watchdog_device *wdd)
+   {
+  ...
+  +
+  +       return 0;
+   }
+
+  ...
+
+  -       s3c2410wdt_keepalive();
+  +       s3c2410wdt_keepalive(&s3c2410_wdd);
+
+
+Add the watchdog device
+-----------------------
+
+Now we need to create a 'struct watchdog_device' and populate it with the
+necessary information for the framework. The struct is also explained in detail
+in 'watchdog-kernel-api.txt' in this directory. We pass it the mandatory
+watchdog_info struct and the newly created watchdog_ops. Often, old drivers
+have their own record-keeping for things like bootstatus and timeout using
+static variables. Those have to be converted to use the members in
+watchdog_device. Note that the timeout values are unsigned int. Some drivers
+use signed int, so this has to be converted, too.
+
+Here is a simple example for a watchdog device::
+
+  +static struct watchdog_device s3c2410_wdd = {
+  +       .info = &s3c2410_wdt_ident,
+  +       .ops = &s3c2410wdt_ops,
+  +};
+
+
+Handle the 'nowayout' feature
+-----------------------------
+
+A few drivers use nowayout statically, i.e. there is no module parameter for it
+and only CONFIG_WATCHDOG_NOWAYOUT determines if the feature is going to be
+used. This needs to be converted by initializing the status variable of the
+watchdog_device like this::
+
+        .status = WATCHDOG_NOWAYOUT_INIT_STATUS,
+
+Most drivers, however, also allow runtime configuration of nowayout, usually
+by adding a module parameter. The conversion for this would be something like::
+
+	watchdog_set_nowayout(&s3c2410_wdd, nowayout);
+
+The module parameter itself needs to stay, everything else related to nowayout
+can go, though. This will likely be some code in open(), close() or write().
+
+
+Register the watchdog device
+----------------------------
+
+Replace misc_register(&miscdev) with watchdog_register_device(&watchdog_dev).
+Make sure the return value gets checked and the error message, if present,
+still fits. Also convert the unregister case::
+
+  -       ret = misc_register(&s3c2410wdt_miscdev);
+  +       ret = watchdog_register_device(&s3c2410_wdd);
+
+  ...
+
+  -       misc_deregister(&s3c2410wdt_miscdev);
+  +       watchdog_unregister_device(&s3c2410_wdd);
+
+
+Update the Kconfig-entry
+------------------------
+
+The entry for the driver now needs to select WATCHDOG_CORE:
+
+  +       select WATCHDOG_CORE
+
+
+Create a patch and send it to upstream
+--------------------------------------
+
+Make sure you understood Documentation/process/submitting-patches.rst and send your patch to
+linux-watchdog@vger.kernel.org. We are looking forward to it :)
diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.txt b/Documentation/watchdog/convert_drivers_to_kernel_api.txt
deleted file mode 100644
index 9fffb2958d13..000000000000
--- a/Documentation/watchdog/convert_drivers_to_kernel_api.txt
+++ /dev/null
@@ -1,218 +0,0 @@
-Converting old watchdog drivers to the watchdog framework
-by Wolfram Sang <w.sang@pengutronix.de>
-=========================================================
-
-Before the watchdog framework came into the kernel, every driver had to
-implement the API on its own. Now, as the framework factored out the common
-components, those drivers can be lightened making it a user of the framework.
-This document shall guide you for this task. The necessary steps are described
-as well as things to look out for.
-
-
-Remove the file_operations struct
----------------------------------
-
-Old drivers define their own file_operations for actions like open(), write(),
-etc... These are now handled by the framework and just call the driver when
-needed. So, in general, the 'file_operations' struct and assorted functions can
-go. Only very few driver-specific details have to be moved to other functions.
-Here is a overview of the functions and probably needed actions:
-
-- open: Everything dealing with resource management (file-open checks, magic
-  close preparations) can simply go. Device specific stuff needs to go to the
-  driver specific start-function. Note that for some drivers, the start-function
-  also serves as the ping-function. If that is the case and you need start/stop
-  to be balanced (clocks!), you are better off refactoring a separate start-function.
-
-- close: Same hints as for open apply.
-
-- write: Can simply go, all defined behaviour is taken care of by the framework,
-  i.e. ping on write and magic char ('V') handling.
-
-- ioctl: While the driver is allowed to have extensions to the IOCTL interface,
-  the most common ones are handled by the framework, supported by some assistance
-  from the driver:
-
-	WDIOC_GETSUPPORT:
-		Returns the mandatory watchdog_info struct from the driver
-
-	WDIOC_GETSTATUS:
-		Needs the status-callback defined, otherwise returns 0
-
-	WDIOC_GETBOOTSTATUS:
-		Needs the bootstatus member properly set. Make sure it is 0 if you
-		don't have further support!
-
-	WDIOC_SETOPTIONS:
-		No preparations needed
-
-	WDIOC_KEEPALIVE:
-		If wanted, options in watchdog_info need to have WDIOF_KEEPALIVEPING
-		set
-
-	WDIOC_SETTIMEOUT:
-		Options in watchdog_info need to have WDIOF_SETTIMEOUT set
-		and a set_timeout-callback has to be defined. The core will also
-		do limit-checking, if min_timeout and max_timeout in the watchdog
-		device are set. All is optional.
-
-	WDIOC_GETTIMEOUT:
-		No preparations needed
-
-	WDIOC_GETTIMELEFT:
-		It needs get_timeleft() callback to be defined. Otherwise it
-		will return EOPNOTSUPP
-
-  Other IOCTLs can be served using the ioctl-callback. Note that this is mainly
-  intended for porting old drivers; new drivers should not invent private IOCTLs.
-  Private IOCTLs are processed first. When the callback returns with
-  -ENOIOCTLCMD, the IOCTLs of the framework will be tried, too. Any other error
-  is directly given to the user.
-
-Example conversion:
-
--static const struct file_operations s3c2410wdt_fops = {
--       .owner          = THIS_MODULE,
--       .llseek         = no_llseek,
--       .write          = s3c2410wdt_write,
--       .unlocked_ioctl = s3c2410wdt_ioctl,
--       .open           = s3c2410wdt_open,
--       .release        = s3c2410wdt_release,
--};
-
-Check the functions for device-specific stuff and keep it for later
-refactoring. The rest can go.
-
-
-Remove the miscdevice
----------------------
-
-Since the file_operations are gone now, you can also remove the 'struct
-miscdevice'. The framework will create it on watchdog_dev_register() called by
-watchdog_register_device().
-
--static struct miscdevice s3c2410wdt_miscdev = {
--       .minor          = WATCHDOG_MINOR,
--       .name           = "watchdog",
--       .fops           = &s3c2410wdt_fops,
--};
-
-
-Remove obsolete includes and defines
-------------------------------------
-
-Because of the simplifications, a few defines are probably unused now. Remove
-them. Includes can be removed, too. For example:
-
-- #include <linux/fs.h>
-- #include <linux/miscdevice.h> (if MODULE_ALIAS_MISCDEV is not used)
-- #include <linux/uaccess.h> (if no custom IOCTLs are used)
-
-
-Add the watchdog operations
----------------------------
-
-All possible callbacks are defined in 'struct watchdog_ops'. You can find it
-explained in 'watchdog-kernel-api.txt' in this directory. start(), stop() and
-owner must be set, the rest are optional. You will easily find corresponding
-functions in the old driver. Note that you will now get a pointer to the
-watchdog_device as a parameter to these functions, so you probably have to
-change the function header. Other changes are most likely not needed, because
-here simply happens the direct hardware access. If you have device-specific
-code left from the above steps, it should be refactored into these callbacks.
-
-Here is a simple example:
-
-+static struct watchdog_ops s3c2410wdt_ops = {
-+       .owner = THIS_MODULE,
-+       .start = s3c2410wdt_start,
-+       .stop = s3c2410wdt_stop,
-+       .ping = s3c2410wdt_keepalive,
-+       .set_timeout = s3c2410wdt_set_heartbeat,
-+};
-
-A typical function-header change looks like:
-
--static void s3c2410wdt_keepalive(void)
-+static int s3c2410wdt_keepalive(struct watchdog_device *wdd)
- {
-...
-+
-+       return 0;
- }
-
-...
-
--       s3c2410wdt_keepalive();
-+       s3c2410wdt_keepalive(&s3c2410_wdd);
-
-
-Add the watchdog device
------------------------
-
-Now we need to create a 'struct watchdog_device' and populate it with the
-necessary information for the framework. The struct is also explained in detail
-in 'watchdog-kernel-api.txt' in this directory. We pass it the mandatory
-watchdog_info struct and the newly created watchdog_ops. Often, old drivers
-have their own record-keeping for things like bootstatus and timeout using
-static variables. Those have to be converted to use the members in
-watchdog_device. Note that the timeout values are unsigned int. Some drivers
-use signed int, so this has to be converted, too.
-
-Here is a simple example for a watchdog device:
-
-+static struct watchdog_device s3c2410_wdd = {
-+       .info = &s3c2410_wdt_ident,
-+       .ops = &s3c2410wdt_ops,
-+};
-
-
-Handle the 'nowayout' feature
------------------------------
-
-A few drivers use nowayout statically, i.e. there is no module parameter for it
-and only CONFIG_WATCHDOG_NOWAYOUT determines if the feature is going to be
-used. This needs to be converted by initializing the status variable of the
-watchdog_device like this:
-
-        .status = WATCHDOG_NOWAYOUT_INIT_STATUS,
-
-Most drivers, however, also allow runtime configuration of nowayout, usually
-by adding a module parameter. The conversion for this would be something like:
-
-	watchdog_set_nowayout(&s3c2410_wdd, nowayout);
-
-The module parameter itself needs to stay, everything else related to nowayout
-can go, though. This will likely be some code in open(), close() or write().
-
-
-Register the watchdog device
-----------------------------
-
-Replace misc_register(&miscdev) with watchdog_register_device(&watchdog_dev).
-Make sure the return value gets checked and the error message, if present,
-still fits. Also convert the unregister case.
-
--       ret = misc_register(&s3c2410wdt_miscdev);
-+       ret = watchdog_register_device(&s3c2410_wdd);
-
-...
-
--       misc_deregister(&s3c2410wdt_miscdev);
-+       watchdog_unregister_device(&s3c2410_wdd);
-
-
-Update the Kconfig-entry
-------------------------
-
-The entry for the driver now needs to select WATCHDOG_CORE:
-
-+       select WATCHDOG_CORE
-
-
-Create a patch and send it to upstream
---------------------------------------
-
-Make sure you understood Documentation/process/submitting-patches.rst and send your patch to
-linux-watchdog@vger.kernel.org. We are looking forward to it :)
-
diff --git a/Documentation/watchdog/hpwdt.rst b/Documentation/watchdog/hpwdt.rst
new file mode 100644
index 000000000000..94a96371113e
--- /dev/null
+++ b/Documentation/watchdog/hpwdt.rst
@@ -0,0 +1,73 @@
+===========================
+HPE iLO NMI Watchdog Driver
+===========================
+
+for iLO based ProLiant Servers
+==============================
+
+Last reviewed: 08/20/2018
+
+
+ The HPE iLO NMI Watchdog driver is a kernel module that provides basic
+ watchdog functionality and handler for the iLO "Generate NMI to System"
+ virtual button.
+
+ All references to iLO in this document imply it also works on iLO2 and all
+ subsequent generations.
+
+ Watchdog functionality is enabled like any other common watchdog driver. That
+ is, an application needs to be started that kicks off the watchdog timer. A
+ basic application exists in tools/testing/selftests/watchdog/ named
+ watchdog-test.c. Simply compile the C file and kick it off. If the system
+ gets into a bad state and hangs, the HPE ProLiant iLO timer register will
+ not be updated in a timely fashion and a hardware system reset (also known as
+ an Automatic Server Recovery (ASR)) event will occur.
+
+ The hpwdt driver also has the following module parameters:
+
+ ============  ================================================================
+ soft_margin   allows the user to set the watchdog timer value.
+               Default value is 30 seconds.
+ timeout       an alias of soft_margin.
+ pretimeout    allows the user to set the watchdog pretimeout value.
+               This is the number of seconds before timeout when an
+               NMI is delivered to the system. Setting the value to
+               zero disables the pretimeout NMI.
+               Default value is 9 seconds.
+ nowayout      basic watchdog parameter that does not allow the timer to
+               be restarted or an impending ASR to be escaped.
+               Default value is set when compiling the kernel. If it is set
+               to "Y", then there is no way of disabling the watchdog once
+               it has been started.
+ ============  ================================================================
+
+ NOTE:
+       More information about watchdog drivers in general, including the ioctl
+       interface to /dev/watchdog can be found in
+       Documentation/watchdog/watchdog-api.rst and Documentation/IPMI.txt.
+
+ Due to limitations in the iLO hardware, the NMI pretimeout if enabled,
+ can only be set to 9 seconds.  Attempts to set pretimeout to other
+ non-zero values will be rounded, possibly to zero.  Users should verify
+ the pretimeout value after attempting to set pretimeout or timeout.
+
+ Upon receipt of an NMI from the iLO, the hpwdt driver will initiate a
+ panic. This is to allow for a crash dump to be collected.  It is incumbent
+ upon the user to have properly configured the system for kdump.
+
+ The default Linux kernel behavior upon panic is to print a kernel tombstone
+ and loop forever.  This is generally not what a watchdog user wants.
+
+ For those wishing to learn more please see:
+	Documentation/kdump/kdump.rst
+	Documentation/admin-guide/kernel-parameters.txt (panic=)
+	Your Linux Distribution specific documentation.
+
+ If the hpwdt does not receive the NMI associated with an expiring timer,
+ the iLO will proceed to reset the system at timeout if the timer hasn't
+ been updated.
+
+--
+
+ The HPE iLO NMI Watchdog Driver and documentation were originally developed
+ by Tom Mingarelli.
diff --git a/Documentation/watchdog/hpwdt.txt b/Documentation/watchdog/hpwdt.txt
deleted file mode 100644
index aaa9e4b4bdcd..000000000000
--- a/Documentation/watchdog/hpwdt.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-Last reviewed: 08/20/2018
-
-                     HPE iLO NMI Watchdog Driver
-                   for iLO based ProLiant Servers
-
- The HPE iLO NMI Watchdog driver is a kernel module that provides basic
- watchdog functionality and handler for the iLO "Generate NMI to System"
- virtual button.
-
- All references to iLO in this document imply it also works on iLO2 and all
- subsequent generations.
-
- Watchdog functionality is enabled like any other common watchdog driver. That
- is, an application needs to be started that kicks off the watchdog timer. A
- basic application exists in tools/testing/selftests/watchdog/ named
- watchdog-test.c. Simply compile the C file and kick it off. If the system
- gets into a bad state and hangs, the HPE ProLiant iLO timer register will
- not be updated in a timely fashion and a hardware system reset (also known as
- an Automatic Server Recovery (ASR)) event will occur.
-
- The hpwdt driver also has the following module parameters:
-
- soft_margin - allows the user to set the watchdog timer value.
-               Default value is 30 seconds.
- timeout     - an alias of soft_margin.
- pretimeout  - allows the user to set the watchdog pretimeout value.
-               This is the number of seconds before timeout when an
-               NMI is delivered to the system. Setting the value to
-               zero disables the pretimeout NMI.
-               Default value is 9 seconds.
- nowayout    - basic watchdog parameter that does not allow the timer to
-               be restarted or an impending ASR to be escaped.
-               Default value is set when compiling the kernel. If it is set
-               to "Y", then there is no way of disabling the watchdog once
-               it has been started.
-
- NOTE: More information about watchdog drivers in general, including the ioctl
-       interface to /dev/watchdog can be found in
-       Documentation/watchdog/watchdog-api.txt and Documentation/IPMI.txt.
-
- Due to limitations in the iLO hardware, the NMI pretimeout if enabled,
- can only be set to 9 seconds.  Attempts to set pretimeout to other
- non-zero values will be rounded, possibly to zero.  Users should verify
- the pretimeout value after attempting to set pretimeout or timeout.
-
- Upon receipt of an NMI from the iLO, the hpwdt driver will initiate a
- panic. This is to allow for a crash dump to be collected.  It is incumbent
- upon the user to have properly configured the system for kdump.
-
- The default Linux kernel behavior upon panic is to print a kernel tombstone
- and loop forever.  This is generally not what a watchdog user wants.
-
- For those wishing to learn more please see:
-	Documentation/kdump/kdump.rst
-	Documentation/admin-guide/kernel-parameters.txt (panic=)
-	Your Linux Distribution specific documentation.
-
- If the hpwdt does not receive the NMI associated with an expiring timer,
- the iLO will proceed to reset the system at timeout if the timer hasn't
- been updated.
-
---
-
- The HPE iLO NMI Watchdog Driver and documentation were originally developed
- by Tom Mingarelli.
-
diff --git a/Documentation/watchdog/index.rst b/Documentation/watchdog/index.rst
new file mode 100644
index 000000000000..33a0de631e84
--- /dev/null
+++ b/Documentation/watchdog/index.rst
@@ -0,0 +1,25 @@
+:orphan:
+
+======================
+Linux Watchdog Support
+======================
+
+.. toctree::
+    :maxdepth: 1
+
+    hpwdt
+    mlx-wdt
+    pcwd-watchdog
+    watchdog-api
+    watchdog-kernel-api
+    watchdog-parameters
+    watchdog-pm
+    wdt
+    convert_drivers_to_kernel_api
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/watchdog/mlx-wdt.rst b/Documentation/watchdog/mlx-wdt.rst
new file mode 100644
index 000000000000..bf5bafac47f0
--- /dev/null
+++ b/Documentation/watchdog/mlx-wdt.rst
@@ -0,0 +1,56 @@
+=========================
+Mellanox watchdog drivers
+=========================
+
+for x86 based system switches
+=============================
+
+This driver provides watchdog functionality for various Mellanox
+Ethernet and Infiniband switch systems.
+
+Mellanox watchdog device is implemented in a programmable logic device.
+
+There are 2 types of HW watchdog implementations.
+
+Type 1:
+  Actual HW timeout can be defined as a power of 2 msec.
+  e.g. timeout 20 sec will be rounded up to 32768 msec.
+  The maximum timeout period is 32 sec (32768 msec.),
+  Get time-left isn't supported
+
+Type 2:
+  Actual HW timeout is defined in sec. and it's the same as
+  a user-defined timeout.
+  Maximum timeout is 255 sec.
+  Get time-left is supported.
+
+Type 1 HW watchdog implementation exist in old systems and
+all new systems have type 2 HW watchdog.
+Two types of HW implementation have also different register map.
+
+Mellanox system can have 2 watchdogs: main and auxiliary.
+Main and auxiliary watchdog devices can be enabled together
+on the same system.
+There are several actions that can be defined in the watchdog:
+system reset, start fans on full speed and increase register counter.
+The last 2 actions are performed without a system reset.
+Actions without reset are provided for auxiliary watchdog device,
+which is optional.
+Watchdog can be started during a probe, in this case it will be
+pinged by watchdog core before watchdog device will be opened by
+user space application.
+Watchdog can be initialised in nowayout way, i.e. oncse started
+it can't be stopped.
+
+This mlx-wdt driver supports both HW watchdog implementations.
+
+Watchdog driver is probed from the common mlx_platform driver.
+Mlx_platform driver provides an appropriate set of registers for
+Mellanox watchdog device, identity name (mlx-wdt-main or mlx-wdt-aux),
+initial timeout, performed action in expiration and configuration flags.
+watchdog configuration flags: nowayout and start_at_boot, hw watchdog
+version - type1 or type2.
+The driver checks during initialization if the previous system reset
+was done by the watchdog. If yes, it makes a notification about this event.
+
+Access to HW registers is performed through a generic regmap interface.
diff --git a/Documentation/watchdog/mlx-wdt.txt b/Documentation/watchdog/mlx-wdt.txt
deleted file mode 100644
index 66eeb78505c3..000000000000
--- a/Documentation/watchdog/mlx-wdt.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-		Mellanox watchdog drivers
-		for x86 based system switches
-
-This driver provides watchdog functionality for various Mellanox
-Ethernet and Infiniband switch systems.
-
-Mellanox watchdog device is implemented in a programmable logic device.
-
-There are 2 types of HW watchdog implementations.
-
-Type 1:
-Actual HW timeout can be defined as a power of 2 msec.
-e.g. timeout 20 sec will be rounded up to 32768 msec.
-The maximum timeout period is 32 sec (32768 msec.),
-Get time-left isn't supported
-
-Type 2:
-Actual HW timeout is defined in sec. and it's the same as
-a user-defined timeout.
-Maximum timeout is 255 sec.
-Get time-left is supported.
-
-Type 1 HW watchdog implementation exist in old systems and
-all new systems have type 2 HW watchdog.
-Two types of HW implementation have also different register map.
-
-Mellanox system can have 2 watchdogs: main and auxiliary.
-Main and auxiliary watchdog devices can be enabled together
-on the same system.
-There are several actions that can be defined in the watchdog:
-system reset, start fans on full speed and increase register counter.
-The last 2 actions are performed without a system reset.
-Actions without reset are provided for auxiliary watchdog device,
-which is optional.
-Watchdog can be started during a probe, in this case it will be
-pinged by watchdog core before watchdog device will be opened by
-user space application.
-Watchdog can be initialised in nowayout way, i.e. oncse started
-it can't be stopped.
-
-This mlx-wdt driver supports both HW watchdog implementations.
-
-Watchdog driver is probed from the common mlx_platform driver.
-Mlx_platform driver provides an appropriate set of registers for
-Mellanox watchdog device, identity name (mlx-wdt-main or mlx-wdt-aux),
-initial timeout, performed action in expiration and configuration flags.
-watchdog configuration flags: nowayout and start_at_boot, hw watchdog
-version - type1 or type2.
-The driver checks during initialization if the previous system reset
-was done by the watchdog. If yes, it makes a notification about this event.
-
-Access to HW registers is performed through a generic regmap interface.
diff --git a/Documentation/watchdog/pcwd-watchdog.rst b/Documentation/watchdog/pcwd-watchdog.rst
new file mode 100644
index 000000000000..405e2a370082
--- /dev/null
+++ b/Documentation/watchdog/pcwd-watchdog.rst
@@ -0,0 +1,71 @@
+===================================
+Berkshire Products PC Watchdog Card
+===================================
+
+Last reviewed: 10/05/2007
+
+Support for ISA Cards  Revision A and C
+=======================================
+
+Documentation and Driver by Ken Hollis <kenji@bitgate.com>
+
+ The PC Watchdog is a card that offers the same type of functionality that
+ the WDT card does, only it doesn't require an IRQ to run.  Furthermore,
+ the Revision C card allows you to monitor any IO Port to automatically
+ trigger the card into being reset.  This way you can make the card
+ monitor hard drive status, or anything else you need.
+
+ The Watchdog Driver has one basic role: to talk to the card and send
+ signals to it so it doesn't reset your computer ... at least during
+ normal operation.
+
+ The Watchdog Driver will automatically find your watchdog card, and will
+ attach a running driver for use with that card.  After the watchdog
+ drivers have initialized, you can then talk to the card using a PC
+ Watchdog program.
+
+ I suggest putting a "watchdog -d" before the beginning of an fsck, and
+ a "watchdog -e -t 1" immediately after the end of an fsck.  (Remember
+ to run the program with an "&" to run it in the background!)
+
+ If you want to write a program to be compatible with the PC Watchdog
+ driver, simply use of modify the watchdog test program:
+ tools/testing/selftests/watchdog/watchdog-test.c
+
+
+ Other IOCTL functions include:
+
+	WDIOC_GETSUPPORT
+		This returns the support of the card itself.  This
+		returns in structure "PCWDS" which returns:
+
+			options = WDIOS_TEMPPANIC
+				  (This card supports temperature)
+			firmware_version = xxxx
+				  (Firmware version of the card)
+
+	WDIOC_GETSTATUS
+		This returns the status of the card, with the bits of
+		WDIOF_* bitwise-anded into the value.  (The comments
+		are in linux/pcwd.h)
+
+	WDIOC_GETBOOTSTATUS
+		This returns the status of the card that was reported
+		at bootup.
+
+	WDIOC_GETTEMP
+		This returns the temperature of the card.  (You can also
+		read /dev/watchdog, which gives a temperature update
+		every second.)
+
+	WDIOC_SETOPTIONS
+		This lets you set the options of the card.  You can either
+		enable or disable the card this way.
+
+	WDIOC_KEEPALIVE
+		This pings the card to tell it not to reset your computer.
+
+ And that's all she wrote!
+
+ -- Ken Hollis
+    (kenji@bitgate.com)
diff --git a/Documentation/watchdog/pcwd-watchdog.txt b/Documentation/watchdog/pcwd-watchdog.txt
deleted file mode 100644
index b8e60a441a43..000000000000
--- a/Documentation/watchdog/pcwd-watchdog.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-Last reviewed: 10/05/2007
-
-                     Berkshire Products PC Watchdog Card
-                   Support for ISA Cards  Revision A and C
-           Documentation and Driver by Ken Hollis <kenji@bitgate.com>
-
- The PC Watchdog is a card that offers the same type of functionality that
- the WDT card does, only it doesn't require an IRQ to run.  Furthermore,
- the Revision C card allows you to monitor any IO Port to automatically
- trigger the card into being reset.  This way you can make the card
- monitor hard drive status, or anything else you need.
-
- The Watchdog Driver has one basic role: to talk to the card and send
- signals to it so it doesn't reset your computer ... at least during
- normal operation.
-
- The Watchdog Driver will automatically find your watchdog card, and will
- attach a running driver for use with that card.  After the watchdog
- drivers have initialized, you can then talk to the card using a PC
- Watchdog program.
-
- I suggest putting a "watchdog -d" before the beginning of an fsck, and
- a "watchdog -e -t 1" immediately after the end of an fsck.  (Remember
- to run the program with an "&" to run it in the background!)
-
- If you want to write a program to be compatible with the PC Watchdog
- driver, simply use of modify the watchdog test program:
- tools/testing/selftests/watchdog/watchdog-test.c
-
-
- Other IOCTL functions include:
-
-	WDIOC_GETSUPPORT
-		This returns the support of the card itself.  This
-		returns in structure "PCWDS" which returns:
-			options = WDIOS_TEMPPANIC
-				  (This card supports temperature)
-			firmware_version = xxxx
-				  (Firmware version of the card)
-
-	WDIOC_GETSTATUS
-		This returns the status of the card, with the bits of
-		WDIOF_* bitwise-anded into the value.  (The comments
-		are in linux/pcwd.h)
-
-	WDIOC_GETBOOTSTATUS
-		This returns the status of the card that was reported
-		at bootup.
-
-	WDIOC_GETTEMP
-		This returns the temperature of the card.  (You can also
-		read /dev/watchdog, which gives a temperature update
-		every second.)
-
-	WDIOC_SETOPTIONS
-		This lets you set the options of the card.  You can either
-		enable or disable the card this way.
-
-	WDIOC_KEEPALIVE
-		This pings the card to tell it not to reset your computer.
-
- And that's all she wrote!
-
- -- Ken Hollis
-    (kenji@bitgate.com)
-
diff --git a/Documentation/watchdog/watchdog-api.rst b/Documentation/watchdog/watchdog-api.rst
new file mode 100644
index 000000000000..c6c1e9fa9f73
--- /dev/null
+++ b/Documentation/watchdog/watchdog-api.rst
@@ -0,0 +1,271 @@
+=============================
+The Linux Watchdog driver API
+=============================
+
+Last reviewed: 10/05/2007
+
+
+
+Copyright 2002 Christer Weingel <wingel@nano-system.com>
+
+Some parts of this document are copied verbatim from the sbc60xxwdt
+driver which is (c) Copyright 2000 Jakob Oestergaard <jakob@ostenfeld.dk>
+
+This document describes the state of the Linux 2.4.18 kernel.
+
+Introduction
+============
+
+A Watchdog Timer (WDT) is a hardware circuit that can reset the
+computer system in case of a software fault.  You probably knew that
+already.
+
+Usually a userspace daemon will notify the kernel watchdog driver via the
+/dev/watchdog special device file that userspace is still alive, at
+regular intervals.  When such a notification occurs, the driver will
+usually tell the hardware watchdog that everything is in order, and
+that the watchdog should wait for yet another little while to reset
+the system.  If userspace fails (RAM error, kernel bug, whatever), the
+notifications cease to occur, and the hardware watchdog will reset the
+system (causing a reboot) after the timeout occurs.
+
+The Linux watchdog API is a rather ad-hoc construction and different
+drivers implement different, and sometimes incompatible, parts of it.
+This file is an attempt to document the existing usage and allow
+future driver writers to use it as a reference.
+
+The simplest API
+================
+
+All drivers support the basic mode of operation, where the watchdog
+activates as soon as /dev/watchdog is opened and will reboot unless
+the watchdog is pinged within a certain time, this time is called the
+timeout or margin.  The simplest way to ping the watchdog is to write
+some data to the device.  So a very simple watchdog daemon would look
+like this source file:  see samples/watchdog/watchdog-simple.c
+
+A more advanced driver could for example check that a HTTP server is
+still responding before doing the write call to ping the watchdog.
+
+When the device is closed, the watchdog is disabled, unless the "Magic
+Close" feature is supported (see below).  This is not always such a
+good idea, since if there is a bug in the watchdog daemon and it
+crashes the system will not reboot.  Because of this, some of the
+drivers support the configuration option "Disable watchdog shutdown on
+close", CONFIG_WATCHDOG_NOWAYOUT.  If it is set to Y when compiling
+the kernel, there is no way of disabling the watchdog once it has been
+started.  So, if the watchdog daemon crashes, the system will reboot
+after the timeout has passed. Watchdog devices also usually support
+the nowayout module parameter so that this option can be controlled at
+runtime.
+
+Magic Close feature
+===================
+
+If a driver supports "Magic Close", the driver will not disable the
+watchdog unless a specific magic character 'V' has been sent to
+/dev/watchdog just before closing the file.  If the userspace daemon
+closes the file without sending this special character, the driver
+will assume that the daemon (and userspace in general) died, and will
+stop pinging the watchdog without disabling it first.  This will then
+cause a reboot if the watchdog is not re-opened in sufficient time.
+
+The ioctl API
+=============
+
+All conforming drivers also support an ioctl API.
+
+Pinging the watchdog using an ioctl:
+
+All drivers that have an ioctl interface support at least one ioctl,
+KEEPALIVE.  This ioctl does exactly the same thing as a write to the
+watchdog device, so the main loop in the above program could be
+replaced with::
+
+	while (1) {
+		ioctl(fd, WDIOC_KEEPALIVE, 0);
+		sleep(10);
+	}
+
+the argument to the ioctl is ignored.
+
+Setting and getting the timeout
+===============================
+
+For some drivers it is possible to modify the watchdog timeout on the
+fly with the SETTIMEOUT ioctl, those drivers have the WDIOF_SETTIMEOUT
+flag set in their option field.  The argument is an integer
+representing the timeout in seconds.  The driver returns the real
+timeout used in the same variable, and this timeout might differ from
+the requested one due to limitation of the hardware::
+
+    int timeout = 45;
+    ioctl(fd, WDIOC_SETTIMEOUT, &timeout);
+    printf("The timeout was set to %d seconds\n", timeout);
+
+This example might actually print "The timeout was set to 60 seconds"
+if the device has a granularity of minutes for its timeout.
+
+Starting with the Linux 2.4.18 kernel, it is possible to query the
+current timeout using the GETTIMEOUT ioctl::
+
+    ioctl(fd, WDIOC_GETTIMEOUT, &timeout);
+    printf("The timeout was is %d seconds\n", timeout);
+
+Pretimeouts
+===========
+
+Some watchdog timers can be set to have a trigger go off before the
+actual time they will reset the system.  This can be done with an NMI,
+interrupt, or other mechanism.  This allows Linux to record useful
+information (like panic information and kernel coredumps) before it
+resets::
+
+    pretimeout = 10;
+    ioctl(fd, WDIOC_SETPRETIMEOUT, &pretimeout);
+
+Note that the pretimeout is the number of seconds before the time
+when the timeout will go off.  It is not the number of seconds until
+the pretimeout.  So, for instance, if you set the timeout to 60 seconds
+and the pretimeout to 10 seconds, the pretimeout will go off in 50
+seconds.  Setting a pretimeout to zero disables it.
+
+There is also a get function for getting the pretimeout::
+
+    ioctl(fd, WDIOC_GETPRETIMEOUT, &timeout);
+    printf("The pretimeout was is %d seconds\n", timeout);
+
+Not all watchdog drivers will support a pretimeout.
+
+Get the number of seconds before reboot
+=======================================
+
+Some watchdog drivers have the ability to report the remaining time
+before the system will reboot. The WDIOC_GETTIMELEFT is the ioctl
+that returns the number of seconds before reboot::
+
+    ioctl(fd, WDIOC_GETTIMELEFT, &timeleft);
+    printf("The timeout was is %d seconds\n", timeleft);
+
+Environmental monitoring
+========================
+
+All watchdog drivers are required return more information about the system,
+some do temperature, fan and power level monitoring, some can tell you
+the reason for the last reboot of the system.  The GETSUPPORT ioctl is
+available to ask what the device can do::
+
+	struct watchdog_info ident;
+	ioctl(fd, WDIOC_GETSUPPORT, &ident);
+
+the fields returned in the ident struct are:
+
+	================	=============================================
+        identity		a string identifying the watchdog driver
+	firmware_version	the firmware version of the card if available
+	options			a flags describing what the device supports
+	================	=============================================
+
+the options field can have the following bits set, and describes what
+kind of information that the GET_STATUS and GET_BOOT_STATUS ioctls can
+return.   [FIXME -- Is this correct?]
+
+	================	=========================
+	WDIOF_OVERHEAT		Reset due to CPU overheat
+	================	=========================
+
+The machine was last rebooted by the watchdog because the thermal limit was
+exceeded:
+
+	==============		==========
+	WDIOF_FANFAULT		Fan failed
+	==============		==========
+
+A system fan monitored by the watchdog card has failed
+
+	=============		================
+	WDIOF_EXTERN1		External relay 1
+	=============		================
+
+External monitoring relay/source 1 was triggered. Controllers intended for
+real world applications include external monitoring pins that will trigger
+a reset.
+
+	=============		================
+	WDIOF_EXTERN2		External relay 2
+	=============		================
+
+External monitoring relay/source 2 was triggered
+
+	================	=====================
+	WDIOF_POWERUNDER	Power bad/power fault
+	================	=====================
+
+The machine is showing an undervoltage status
+
+	===============		=============================
+	WDIOF_CARDRESET		Card previously reset the CPU
+	===============		=============================
+
+The last reboot was caused by the watchdog card
+
+	================	=====================
+	WDIOF_POWEROVER		Power over voltage
+	================	=====================
+
+The machine is showing an overvoltage status. Note that if one level is
+under and one over both bits will be set - this may seem odd but makes
+sense.
+
+	===================	=====================
+	WDIOF_KEEPALIVEPING	Keep alive ping reply
+	===================	=====================
+
+The watchdog saw a keepalive ping since it was last queried.
+
+	================	=======================
+	WDIOF_SETTIMEOUT	Can set/get the timeout
+	================	=======================
+
+The watchdog can do pretimeouts.
+
+	================	================================
+	WDIOF_PRETIMEOUT	Pretimeout (in seconds), get/set
+	================	================================
+
+
+For those drivers that return any bits set in the option field, the
+GETSTATUS and GETBOOTSTATUS ioctls can be used to ask for the current
+status, and the status at the last reboot, respectively::
+
+    int flags;
+    ioctl(fd, WDIOC_GETSTATUS, &flags);
+
+    or
+
+    ioctl(fd, WDIOC_GETBOOTSTATUS, &flags);
+
+Note that not all devices support these two calls, and some only
+support the GETBOOTSTATUS call.
+
+Some drivers can measure the temperature using the GETTEMP ioctl.  The
+returned value is the temperature in degrees fahrenheit::
+
+    int temperature;
+    ioctl(fd, WDIOC_GETTEMP, &temperature);
+
+Finally the SETOPTIONS ioctl can be used to control some aspects of
+the cards operation::
+
+    int options = 0;
+    ioctl(fd, WDIOC_SETOPTIONS, &options);
+
+The following options are available:
+
+	=================	================================
+	WDIOS_DISABLECARD	Turn off the watchdog timer
+	WDIOS_ENABLECARD	Turn on the watchdog timer
+	WDIOS_TEMPPANIC		Kernel panic on temperature trip
+	=================	================================
+
+[FIXME -- better explanations]
diff --git a/Documentation/watchdog/watchdog-api.txt b/Documentation/watchdog/watchdog-api.txt
deleted file mode 100644
index 0e62ba33b7fb..000000000000
--- a/Documentation/watchdog/watchdog-api.txt
+++ /dev/null
@@ -1,237 +0,0 @@
-Last reviewed: 10/05/2007
-
-
-The Linux Watchdog driver API.
-
-Copyright 2002 Christer Weingel <wingel@nano-system.com>
-
-Some parts of this document are copied verbatim from the sbc60xxwdt
-driver which is (c) Copyright 2000 Jakob Oestergaard <jakob@ostenfeld.dk>
-
-This document describes the state of the Linux 2.4.18 kernel.
-
-Introduction:
-
-A Watchdog Timer (WDT) is a hardware circuit that can reset the
-computer system in case of a software fault.  You probably knew that
-already.
-
-Usually a userspace daemon will notify the kernel watchdog driver via the
-/dev/watchdog special device file that userspace is still alive, at
-regular intervals.  When such a notification occurs, the driver will
-usually tell the hardware watchdog that everything is in order, and
-that the watchdog should wait for yet another little while to reset
-the system.  If userspace fails (RAM error, kernel bug, whatever), the
-notifications cease to occur, and the hardware watchdog will reset the
-system (causing a reboot) after the timeout occurs.
-
-The Linux watchdog API is a rather ad-hoc construction and different
-drivers implement different, and sometimes incompatible, parts of it.
-This file is an attempt to document the existing usage and allow
-future driver writers to use it as a reference.
-
-The simplest API:
-
-All drivers support the basic mode of operation, where the watchdog
-activates as soon as /dev/watchdog is opened and will reboot unless
-the watchdog is pinged within a certain time, this time is called the
-timeout or margin.  The simplest way to ping the watchdog is to write
-some data to the device.  So a very simple watchdog daemon would look
-like this source file:  see samples/watchdog/watchdog-simple.c
-
-A more advanced driver could for example check that a HTTP server is
-still responding before doing the write call to ping the watchdog.
-
-When the device is closed, the watchdog is disabled, unless the "Magic
-Close" feature is supported (see below).  This is not always such a
-good idea, since if there is a bug in the watchdog daemon and it
-crashes the system will not reboot.  Because of this, some of the
-drivers support the configuration option "Disable watchdog shutdown on
-close", CONFIG_WATCHDOG_NOWAYOUT.  If it is set to Y when compiling
-the kernel, there is no way of disabling the watchdog once it has been
-started.  So, if the watchdog daemon crashes, the system will reboot
-after the timeout has passed. Watchdog devices also usually support
-the nowayout module parameter so that this option can be controlled at
-runtime.
-
-Magic Close feature:
-
-If a driver supports "Magic Close", the driver will not disable the
-watchdog unless a specific magic character 'V' has been sent to
-/dev/watchdog just before closing the file.  If the userspace daemon
-closes the file without sending this special character, the driver
-will assume that the daemon (and userspace in general) died, and will
-stop pinging the watchdog without disabling it first.  This will then
-cause a reboot if the watchdog is not re-opened in sufficient time.
-
-The ioctl API:
-
-All conforming drivers also support an ioctl API.
-
-Pinging the watchdog using an ioctl:
-
-All drivers that have an ioctl interface support at least one ioctl,
-KEEPALIVE.  This ioctl does exactly the same thing as a write to the
-watchdog device, so the main loop in the above program could be
-replaced with:
-
-	while (1) {
-		ioctl(fd, WDIOC_KEEPALIVE, 0);
-		sleep(10);
-	}
-
-the argument to the ioctl is ignored.
-
-Setting and getting the timeout:
-
-For some drivers it is possible to modify the watchdog timeout on the
-fly with the SETTIMEOUT ioctl, those drivers have the WDIOF_SETTIMEOUT
-flag set in their option field.  The argument is an integer
-representing the timeout in seconds.  The driver returns the real
-timeout used in the same variable, and this timeout might differ from
-the requested one due to limitation of the hardware.
-
-    int timeout = 45;
-    ioctl(fd, WDIOC_SETTIMEOUT, &timeout);
-    printf("The timeout was set to %d seconds\n", timeout);
-
-This example might actually print "The timeout was set to 60 seconds"
-if the device has a granularity of minutes for its timeout.
-
-Starting with the Linux 2.4.18 kernel, it is possible to query the
-current timeout using the GETTIMEOUT ioctl.
-
-    ioctl(fd, WDIOC_GETTIMEOUT, &timeout);
-    printf("The timeout was is %d seconds\n", timeout);
-
-Pretimeouts:
-
-Some watchdog timers can be set to have a trigger go off before the
-actual time they will reset the system.  This can be done with an NMI,
-interrupt, or other mechanism.  This allows Linux to record useful
-information (like panic information and kernel coredumps) before it
-resets.
-
-    pretimeout = 10;
-    ioctl(fd, WDIOC_SETPRETIMEOUT, &pretimeout);
-
-Note that the pretimeout is the number of seconds before the time
-when the timeout will go off.  It is not the number of seconds until
-the pretimeout.  So, for instance, if you set the timeout to 60 seconds
-and the pretimeout to 10 seconds, the pretimeout will go off in 50
-seconds.  Setting a pretimeout to zero disables it.
-
-There is also a get function for getting the pretimeout:
-
-    ioctl(fd, WDIOC_GETPRETIMEOUT, &timeout);
-    printf("The pretimeout was is %d seconds\n", timeout);
-
-Not all watchdog drivers will support a pretimeout.
-
-Get the number of seconds before reboot:
-
-Some watchdog drivers have the ability to report the remaining time
-before the system will reboot. The WDIOC_GETTIMELEFT is the ioctl
-that returns the number of seconds before reboot.
-
-    ioctl(fd, WDIOC_GETTIMELEFT, &timeleft);
-    printf("The timeout was is %d seconds\n", timeleft);
-
-Environmental monitoring:
-
-All watchdog drivers are required return more information about the system,
-some do temperature, fan and power level monitoring, some can tell you
-the reason for the last reboot of the system.  The GETSUPPORT ioctl is
-available to ask what the device can do:
-
-	struct watchdog_info ident;
-	ioctl(fd, WDIOC_GETSUPPORT, &ident);
-
-the fields returned in the ident struct are:
-
-        identity		a string identifying the watchdog driver
-	firmware_version	the firmware version of the card if available
-	options			a flags describing what the device supports
-
-the options field can have the following bits set, and describes what
-kind of information that the GET_STATUS and GET_BOOT_STATUS ioctls can
-return.   [FIXME -- Is this correct?]
-
-	WDIOF_OVERHEAT		Reset due to CPU overheat
-
-The machine was last rebooted by the watchdog because the thermal limit was
-exceeded
-
-	WDIOF_FANFAULT		Fan failed
-
-A system fan monitored by the watchdog card has failed
-
-	WDIOF_EXTERN1		External relay 1
-
-External monitoring relay/source 1 was triggered. Controllers intended for
-real world applications include external monitoring pins that will trigger
-a reset.
-
-	WDIOF_EXTERN2		External relay 2
-
-External monitoring relay/source 2 was triggered
-
-	WDIOF_POWERUNDER	Power bad/power fault
-
-The machine is showing an undervoltage status
-
-	WDIOF_CARDRESET		Card previously reset the CPU
-
-The last reboot was caused by the watchdog card
-
-	WDIOF_POWEROVER		Power over voltage
-
-The machine is showing an overvoltage status. Note that if one level is
-under and one over both bits will be set - this may seem odd but makes
-sense.
-
-	WDIOF_KEEPALIVEPING	Keep alive ping reply
-
-The watchdog saw a keepalive ping since it was last queried.
-
-	WDIOF_SETTIMEOUT	Can set/get the timeout
-
-The watchdog can do pretimeouts.
-
-	WDIOF_PRETIMEOUT	Pretimeout (in seconds), get/set
-
-
-For those drivers that return any bits set in the option field, the
-GETSTATUS and GETBOOTSTATUS ioctls can be used to ask for the current
-status, and the status at the last reboot, respectively.  
-
-    int flags;
-    ioctl(fd, WDIOC_GETSTATUS, &flags);
-
-    or
-
-    ioctl(fd, WDIOC_GETBOOTSTATUS, &flags);
-
-Note that not all devices support these two calls, and some only
-support the GETBOOTSTATUS call.
-
-Some drivers can measure the temperature using the GETTEMP ioctl.  The
-returned value is the temperature in degrees fahrenheit.
-
-    int temperature;
-    ioctl(fd, WDIOC_GETTEMP, &temperature);
-
-Finally the SETOPTIONS ioctl can be used to control some aspects of
-the cards operation.
-
-    int options = 0;
-    ioctl(fd, WDIOC_SETOPTIONS, &options);
-
-The following options are available:
-
-	WDIOS_DISABLECARD	Turn off the watchdog timer
-	WDIOS_ENABLECARD	Turn on the watchdog timer
-	WDIOS_TEMPPANIC		Kernel panic on temperature trip
-
-[FIXME -- better explanations]
-
diff --git a/Documentation/watchdog/watchdog-kernel-api.rst b/Documentation/watchdog/watchdog-kernel-api.rst
new file mode 100644
index 000000000000..864edbe932c1
--- /dev/null
+++ b/Documentation/watchdog/watchdog-kernel-api.rst
@@ -0,0 +1,338 @@
+===============================================
+The Linux WatchDog Timer Driver Core kernel API
+===============================================
+
+Last reviewed: 12-Feb-2013
+
+Wim Van Sebroeck <wim@iguana.be>
+
+Introduction
+------------
+This document does not describe what a WatchDog Timer (WDT) Driver or Device is.
+It also does not describe the API which can be used by user space to communicate
+with a WatchDog Timer. If you want to know this then please read the following
+file: Documentation/watchdog/watchdog-api.rst .
+
+So what does this document describe? It describes the API that can be used by
+WatchDog Timer Drivers that want to use the WatchDog Timer Driver Core
+Framework. This framework provides all interfacing towards user space so that
+the same code does not have to be reproduced each time. This also means that
+a watchdog timer driver then only needs to provide the different routines
+(operations) that control the watchdog timer (WDT).
+
+The API
+-------
+Each watchdog timer driver that wants to use the WatchDog Timer Driver Core
+must #include <linux/watchdog.h> (you would have to do this anyway when
+writing a watchdog device driver). This include file contains following
+register/unregister routines::
+
+	extern int watchdog_register_device(struct watchdog_device *);
+	extern void watchdog_unregister_device(struct watchdog_device *);
+
+The watchdog_register_device routine registers a watchdog timer device.
+The parameter of this routine is a pointer to a watchdog_device structure.
+This routine returns zero on success and a negative errno code for failure.
+
+The watchdog_unregister_device routine deregisters a registered watchdog timer
+device. The parameter of this routine is the pointer to the registered
+watchdog_device structure.
+
+The watchdog subsystem includes an registration deferral mechanism,
+which allows you to register an watchdog as early as you wish during
+the boot process.
+
+The watchdog device structure looks like this::
+
+  struct watchdog_device {
+	int id;
+	struct device *parent;
+	const struct attribute_group **groups;
+	const struct watchdog_info *info;
+	const struct watchdog_ops *ops;
+	const struct watchdog_governor *gov;
+	unsigned int bootstatus;
+	unsigned int timeout;
+	unsigned int pretimeout;
+	unsigned int min_timeout;
+	unsigned int max_timeout;
+	unsigned int min_hw_heartbeat_ms;
+	unsigned int max_hw_heartbeat_ms;
+	struct notifier_block reboot_nb;
+	struct notifier_block restart_nb;
+	void *driver_data;
+	struct watchdog_core_data *wd_data;
+	unsigned long status;
+	struct list_head deferred;
+  };
+
+It contains following fields:
+
+* id: set by watchdog_register_device, id 0 is special. It has both a
+  /dev/watchdog0 cdev (dynamic major, minor 0) as well as the old
+  /dev/watchdog miscdev. The id is set automatically when calling
+  watchdog_register_device.
+* parent: set this to the parent device (or NULL) before calling
+  watchdog_register_device.
+* groups: List of sysfs attribute groups to create when creating the watchdog
+  device.
+* info: a pointer to a watchdog_info structure. This structure gives some
+  additional information about the watchdog timer itself. (Like it's unique name)
+* ops: a pointer to the list of watchdog operations that the watchdog supports.
+* gov: a pointer to the assigned watchdog device pretimeout governor or NULL.
+* timeout: the watchdog timer's timeout value (in seconds).
+  This is the time after which the system will reboot if user space does
+  not send a heartbeat request if WDOG_ACTIVE is set.
+* pretimeout: the watchdog timer's pretimeout value (in seconds).
+* min_timeout: the watchdog timer's minimum timeout value (in seconds).
+  If set, the minimum configurable value for 'timeout'.
+* max_timeout: the watchdog timer's maximum timeout value (in seconds),
+  as seen from userspace. If set, the maximum configurable value for
+  'timeout'. Not used if max_hw_heartbeat_ms is non-zero.
+* min_hw_heartbeat_ms: Hardware limit for minimum time between heartbeats,
+  in milli-seconds. This value is normally 0; it should only be provided
+  if the hardware can not tolerate lower intervals between heartbeats.
+* max_hw_heartbeat_ms: Maximum hardware heartbeat, in milli-seconds.
+  If set, the infrastructure will send heartbeats to the watchdog driver
+  if 'timeout' is larger than max_hw_heartbeat_ms, unless WDOG_ACTIVE
+  is set and userspace failed to send a heartbeat for at least 'timeout'
+  seconds. max_hw_heartbeat_ms must be set if a driver does not implement
+  the stop function.
+* reboot_nb: notifier block that is registered for reboot notifications, for
+  internal use only. If the driver calls watchdog_stop_on_reboot, watchdog core
+  will stop the watchdog on such notifications.
+* restart_nb: notifier block that is registered for machine restart, for
+  internal use only. If a watchdog is capable of restarting the machine, it
+  should define ops->restart. Priority can be changed through
+  watchdog_set_restart_priority.
+* bootstatus: status of the device after booting (reported with watchdog
+  WDIOF_* status bits).
+* driver_data: a pointer to the drivers private data of a watchdog device.
+  This data should only be accessed via the watchdog_set_drvdata and
+  watchdog_get_drvdata routines.
+* wd_data: a pointer to watchdog core internal data.
+* status: this field contains a number of status bits that give extra
+  information about the status of the device (Like: is the watchdog timer
+  running/active, or is the nowayout bit set).
+* deferred: entry in wtd_deferred_reg_list which is used to
+  register early initialized watchdogs.
+
+The list of watchdog operations is defined as::
+
+  struct watchdog_ops {
+	struct module *owner;
+	/* mandatory operations */
+	int (*start)(struct watchdog_device *);
+	int (*stop)(struct watchdog_device *);
+	/* optional operations */
+	int (*ping)(struct watchdog_device *);
+	unsigned int (*status)(struct watchdog_device *);
+	int (*set_timeout)(struct watchdog_device *, unsigned int);
+	int (*set_pretimeout)(struct watchdog_device *, unsigned int);
+	unsigned int (*get_timeleft)(struct watchdog_device *);
+	int (*restart)(struct watchdog_device *);
+	long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
+  };
+
+It is important that you first define the module owner of the watchdog timer
+driver's operations. This module owner will be used to lock the module when
+the watchdog is active. (This to avoid a system crash when you unload the
+module and /dev/watchdog is still open).
+
+Some operations are mandatory and some are optional. The mandatory operations
+are:
+
+* start: this is a pointer to the routine that starts the watchdog timer
+  device.
+  The routine needs a pointer to the watchdog timer device structure as a
+  parameter. It returns zero on success or a negative errno code for failure.
+
+Not all watchdog timer hardware supports the same functionality. That's why
+all other routines/operations are optional. They only need to be provided if
+they are supported. These optional routines/operations are:
+
+* stop: with this routine the watchdog timer device is being stopped.
+
+  The routine needs a pointer to the watchdog timer device structure as a
+  parameter. It returns zero on success or a negative errno code for failure.
+  Some watchdog timer hardware can only be started and not be stopped. A
+  driver supporting such hardware does not have to implement the stop routine.
+
+  If a driver has no stop function, the watchdog core will set WDOG_HW_RUNNING
+  and start calling the driver's keepalive pings function after the watchdog
+  device is closed.
+
+  If a watchdog driver does not implement the stop function, it must set
+  max_hw_heartbeat_ms.
+* ping: this is the routine that sends a keepalive ping to the watchdog timer
+  hardware.
+
+  The routine needs a pointer to the watchdog timer device structure as a
+  parameter. It returns zero on success or a negative errno code for failure.
+
+  Most hardware that does not support this as a separate function uses the
+  start function to restart the watchdog timer hardware. And that's also what
+  the watchdog timer driver core does: to send a keepalive ping to the watchdog
+  timer hardware it will either use the ping operation (when available) or the
+  start operation (when the ping operation is not available).
+
+  (Note: the WDIOC_KEEPALIVE ioctl call will only be active when the
+  WDIOF_KEEPALIVEPING bit has been set in the option field on the watchdog's
+  info structure).
+* status: this routine checks the status of the watchdog timer device. The
+  status of the device is reported with watchdog WDIOF_* status flags/bits.
+
+  WDIOF_MAGICCLOSE and WDIOF_KEEPALIVEPING are reported by the watchdog core;
+  it is not necessary to report those bits from the driver. Also, if no status
+  function is provided by the driver, the watchdog core reports the status bits
+  provided in the bootstatus variable of struct watchdog_device.
+
+* set_timeout: this routine checks and changes the timeout of the watchdog
+  timer device. It returns 0 on success, -EINVAL for "parameter out of range"
+  and -EIO for "could not write value to the watchdog". On success this
+  routine should set the timeout value of the watchdog_device to the
+  achieved timeout value (which may be different from the requested one
+  because the watchdog does not necessarily have a 1 second resolution).
+
+  Drivers implementing max_hw_heartbeat_ms set the hardware watchdog heartbeat
+  to the minimum of timeout and max_hw_heartbeat_ms. Those drivers set the
+  timeout value of the watchdog_device either to the requested timeout value
+  (if it is larger than max_hw_heartbeat_ms), or to the achieved timeout value.
+  (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the
+  watchdog's info structure).
+
+  If the watchdog driver does not have to perform any action but setting the
+  watchdog_device.timeout, this callback can be omitted.
+
+  If set_timeout is not provided but, WDIOF_SETTIMEOUT is set, the watchdog
+  infrastructure updates the timeout value of the watchdog_device internally
+  to the requested value.
+
+  If the pretimeout feature is used (WDIOF_PRETIMEOUT), then set_timeout must
+  also take care of checking if pretimeout is still valid and set up the timer
+  accordingly. This can't be done in the core without races, so it is the
+  duty of the driver.
+* set_pretimeout: this routine checks and changes the pretimeout value of
+  the watchdog. It is optional because not all watchdogs support pretimeout
+  notification. The timeout value is not an absolute time, but the number of
+  seconds before the actual timeout would happen. It returns 0 on success,
+  -EINVAL for "parameter out of range" and -EIO for "could not write value to
+  the watchdog". A value of 0 disables pretimeout notification.
+
+  (Note: the WDIOF_PRETIMEOUT needs to be set in the options field of the
+  watchdog's info structure).
+
+  If the watchdog driver does not have to perform any action but setting the
+  watchdog_device.pretimeout, this callback can be omitted. That means if
+  set_pretimeout is not provided but WDIOF_PRETIMEOUT is set, the watchdog
+  infrastructure updates the pretimeout value of the watchdog_device internally
+  to the requested value.
+
+* get_timeleft: this routines returns the time that's left before a reset.
+* restart: this routine restarts the machine. It returns 0 on success or a
+  negative errno code for failure.
+* ioctl: if this routine is present then it will be called first before we do
+  our own internal ioctl call handling. This routine should return -ENOIOCTLCMD
+  if a command is not supported. The parameters that are passed to the ioctl
+  call are: watchdog_device, cmd and arg.
+
+The status bits should (preferably) be set with the set_bit and clear_bit alike
+bit-operations. The status bits that are defined are:
+
+* WDOG_ACTIVE: this status bit indicates whether or not a watchdog timer device
+  is active or not from user perspective. User space is expected to send
+  heartbeat requests to the driver while this flag is set.
+* WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog.
+  If this bit is set then the watchdog timer will not be able to stop.
+* WDOG_HW_RUNNING: Set by the watchdog driver if the hardware watchdog is
+  running. The bit must be set if the watchdog timer hardware can not be
+  stopped. The bit may also be set if the watchdog timer is running after
+  booting, before the watchdog device is opened. If set, the watchdog
+  infrastructure will send keepalives to the watchdog hardware while
+  WDOG_ACTIVE is not set.
+  Note: when you register the watchdog timer device with this bit set,
+  then opening /dev/watchdog will skip the start operation but send a keepalive
+  request instead.
+
+  To set the WDOG_NO_WAY_OUT status bit (before registering your watchdog
+  timer device) you can either:
+
+  * set it statically in your watchdog_device struct with
+
+	.status = WATCHDOG_NOWAYOUT_INIT_STATUS,
+
+    (this will set the value the same as CONFIG_WATCHDOG_NOWAYOUT) or
+  * use the following helper function::
+
+	static inline void watchdog_set_nowayout(struct watchdog_device *wdd,
+						 int nowayout)
+
+Note:
+   The WatchDog Timer Driver Core supports the magic close feature and
+   the nowayout feature. To use the magic close feature you must set the
+   WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure.
+
+The nowayout feature will overrule the magic close feature.
+
+To get or set driver specific data the following two helper functions should be
+used::
+
+  static inline void watchdog_set_drvdata(struct watchdog_device *wdd,
+					  void *data)
+  static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
+
+The watchdog_set_drvdata function allows you to add driver specific data. The
+arguments of this function are the watchdog device where you want to add the
+driver specific data to and a pointer to the data itself.
+
+The watchdog_get_drvdata function allows you to retrieve driver specific data.
+The argument of this function is the watchdog device where you want to retrieve
+data from. The function returns the pointer to the driver specific data.
+
+To initialize the timeout field, the following function can be used::
+
+  extern int watchdog_init_timeout(struct watchdog_device *wdd,
+                                   unsigned int timeout_parm,
+                                   struct device *dev);
+
+The watchdog_init_timeout function allows you to initialize the timeout field
+using the module timeout parameter or by retrieving the timeout-sec property from
+the device tree (if the module timeout parameter is invalid). Best practice is
+to set the default timeout value as timeout value in the watchdog_device and
+then use this function to set the user "preferred" timeout value.
+This routine returns zero on success and a negative errno code for failure.
+
+To disable the watchdog on reboot, the user must call the following helper::
+
+  static inline void watchdog_stop_on_reboot(struct watchdog_device *wdd);
+
+To disable the watchdog when unregistering the watchdog, the user must call
+the following helper. Note that this will only stop the watchdog if the
+nowayout flag is not set.
+
+::
+
+  static inline void watchdog_stop_on_unregister(struct watchdog_device *wdd);
+
+To change the priority of the restart handler the following helper should be
+used::
+
+  void watchdog_set_restart_priority(struct watchdog_device *wdd, int priority);
+
+User should follow the following guidelines for setting the priority:
+
+* 0: should be called in last resort, has limited restart capabilities
+* 128: default restart handler, use if no other handler is expected to be
+  available, and/or if restart is sufficient to restart the entire system
+* 255: highest priority, will preempt all other restart handlers
+
+To raise a pretimeout notification, the following function should be used::
+
+  void watchdog_notify_pretimeout(struct watchdog_device *wdd)
+
+The function can be called in the interrupt context. If watchdog pretimeout
+governor framework (kbuild CONFIG_WATCHDOG_PRETIMEOUT_GOV symbol) is enabled,
+an action is taken by a preconfigured pretimeout governor preassigned to
+the watchdog device. If watchdog pretimeout governor framework is not
+enabled, watchdog_notify_pretimeout() prints a notification message to
+the kernel log buffer.
diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
deleted file mode 100644
index 3a91ef5af044..000000000000
--- a/Documentation/watchdog/watchdog-kernel-api.txt
+++ /dev/null
@@ -1,305 +0,0 @@
-The Linux WatchDog Timer Driver Core kernel API.
-===============================================
-Last reviewed: 12-Feb-2013
-
-Wim Van Sebroeck <wim@iguana.be>
-
-Introduction
-------------
-This document does not describe what a WatchDog Timer (WDT) Driver or Device is.
-It also does not describe the API which can be used by user space to communicate
-with a WatchDog Timer. If you want to know this then please read the following
-file: Documentation/watchdog/watchdog-api.txt .
-
-So what does this document describe? It describes the API that can be used by
-WatchDog Timer Drivers that want to use the WatchDog Timer Driver Core
-Framework. This framework provides all interfacing towards user space so that
-the same code does not have to be reproduced each time. This also means that
-a watchdog timer driver then only needs to provide the different routines
-(operations) that control the watchdog timer (WDT).
-
-The API
--------
-Each watchdog timer driver that wants to use the WatchDog Timer Driver Core
-must #include <linux/watchdog.h> (you would have to do this anyway when
-writing a watchdog device driver). This include file contains following
-register/unregister routines:
-
-extern int watchdog_register_device(struct watchdog_device *);
-extern void watchdog_unregister_device(struct watchdog_device *);
-
-The watchdog_register_device routine registers a watchdog timer device.
-The parameter of this routine is a pointer to a watchdog_device structure.
-This routine returns zero on success and a negative errno code for failure.
-
-The watchdog_unregister_device routine deregisters a registered watchdog timer
-device. The parameter of this routine is the pointer to the registered
-watchdog_device structure.
-
-The watchdog subsystem includes an registration deferral mechanism,
-which allows you to register an watchdog as early as you wish during
-the boot process.
-
-The watchdog device structure looks like this:
-
-struct watchdog_device {
-	int id;
-	struct device *parent;
-	const struct attribute_group **groups;
-	const struct watchdog_info *info;
-	const struct watchdog_ops *ops;
-	const struct watchdog_governor *gov;
-	unsigned int bootstatus;
-	unsigned int timeout;
-	unsigned int pretimeout;
-	unsigned int min_timeout;
-	unsigned int max_timeout;
-	unsigned int min_hw_heartbeat_ms;
-	unsigned int max_hw_heartbeat_ms;
-	struct notifier_block reboot_nb;
-	struct notifier_block restart_nb;
-	void *driver_data;
-	struct watchdog_core_data *wd_data;
-	unsigned long status;
-	struct list_head deferred;
-};
-
-It contains following fields:
-* id: set by watchdog_register_device, id 0 is special. It has both a
-  /dev/watchdog0 cdev (dynamic major, minor 0) as well as the old
-  /dev/watchdog miscdev. The id is set automatically when calling
-  watchdog_register_device.
-* parent: set this to the parent device (or NULL) before calling
-  watchdog_register_device.
-* groups: List of sysfs attribute groups to create when creating the watchdog
-  device.
-* info: a pointer to a watchdog_info structure. This structure gives some
-  additional information about the watchdog timer itself. (Like it's unique name)
-* ops: a pointer to the list of watchdog operations that the watchdog supports.
-* gov: a pointer to the assigned watchdog device pretimeout governor or NULL.
-* timeout: the watchdog timer's timeout value (in seconds).
-  This is the time after which the system will reboot if user space does
-  not send a heartbeat request if WDOG_ACTIVE is set.
-* pretimeout: the watchdog timer's pretimeout value (in seconds).
-* min_timeout: the watchdog timer's minimum timeout value (in seconds).
-  If set, the minimum configurable value for 'timeout'.
-* max_timeout: the watchdog timer's maximum timeout value (in seconds),
-  as seen from userspace. If set, the maximum configurable value for
-  'timeout'. Not used if max_hw_heartbeat_ms is non-zero.
-* min_hw_heartbeat_ms: Hardware limit for minimum time between heartbeats,
-  in milli-seconds. This value is normally 0; it should only be provided
-  if the hardware can not tolerate lower intervals between heartbeats.
-* max_hw_heartbeat_ms: Maximum hardware heartbeat, in milli-seconds.
-  If set, the infrastructure will send heartbeats to the watchdog driver
-  if 'timeout' is larger than max_hw_heartbeat_ms, unless WDOG_ACTIVE
-  is set and userspace failed to send a heartbeat for at least 'timeout'
-  seconds. max_hw_heartbeat_ms must be set if a driver does not implement
-  the stop function.
-* reboot_nb: notifier block that is registered for reboot notifications, for
-  internal use only. If the driver calls watchdog_stop_on_reboot, watchdog core
-  will stop the watchdog on such notifications.
-* restart_nb: notifier block that is registered for machine restart, for
-  internal use only. If a watchdog is capable of restarting the machine, it
-  should define ops->restart. Priority can be changed through
-  watchdog_set_restart_priority.
-* bootstatus: status of the device after booting (reported with watchdog
-  WDIOF_* status bits).
-* driver_data: a pointer to the drivers private data of a watchdog device.
-  This data should only be accessed via the watchdog_set_drvdata and
-  watchdog_get_drvdata routines.
-* wd_data: a pointer to watchdog core internal data.
-* status: this field contains a number of status bits that give extra
-  information about the status of the device (Like: is the watchdog timer
-  running/active, or is the nowayout bit set).
-* deferred: entry in wtd_deferred_reg_list which is used to
-  register early initialized watchdogs.
-
-The list of watchdog operations is defined as:
-
-struct watchdog_ops {
-	struct module *owner;
-	/* mandatory operations */
-	int (*start)(struct watchdog_device *);
-	int (*stop)(struct watchdog_device *);
-	/* optional operations */
-	int (*ping)(struct watchdog_device *);
-	unsigned int (*status)(struct watchdog_device *);
-	int (*set_timeout)(struct watchdog_device *, unsigned int);
-	int (*set_pretimeout)(struct watchdog_device *, unsigned int);
-	unsigned int (*get_timeleft)(struct watchdog_device *);
-	int (*restart)(struct watchdog_device *);
-	long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
-};
-
-It is important that you first define the module owner of the watchdog timer
-driver's operations. This module owner will be used to lock the module when
-the watchdog is active. (This to avoid a system crash when you unload the
-module and /dev/watchdog is still open).
-
-Some operations are mandatory and some are optional. The mandatory operations
-are:
-* start: this is a pointer to the routine that starts the watchdog timer
-  device.
-  The routine needs a pointer to the watchdog timer device structure as a
-  parameter. It returns zero on success or a negative errno code for failure.
-
-Not all watchdog timer hardware supports the same functionality. That's why
-all other routines/operations are optional. They only need to be provided if
-they are supported. These optional routines/operations are:
-* stop: with this routine the watchdog timer device is being stopped.
-  The routine needs a pointer to the watchdog timer device structure as a
-  parameter. It returns zero on success or a negative errno code for failure.
-  Some watchdog timer hardware can only be started and not be stopped. A
-  driver supporting such hardware does not have to implement the stop routine.
-  If a driver has no stop function, the watchdog core will set WDOG_HW_RUNNING
-  and start calling the driver's keepalive pings function after the watchdog
-  device is closed.
-  If a watchdog driver does not implement the stop function, it must set
-  max_hw_heartbeat_ms.
-* ping: this is the routine that sends a keepalive ping to the watchdog timer
-  hardware.
-  The routine needs a pointer to the watchdog timer device structure as a
-  parameter. It returns zero on success or a negative errno code for failure.
-  Most hardware that does not support this as a separate function uses the
-  start function to restart the watchdog timer hardware. And that's also what
-  the watchdog timer driver core does: to send a keepalive ping to the watchdog
-  timer hardware it will either use the ping operation (when available) or the
-  start operation (when the ping operation is not available).
-  (Note: the WDIOC_KEEPALIVE ioctl call will only be active when the
-  WDIOF_KEEPALIVEPING bit has been set in the option field on the watchdog's
-  info structure).
-* status: this routine checks the status of the watchdog timer device. The
-  status of the device is reported with watchdog WDIOF_* status flags/bits.
-  WDIOF_MAGICCLOSE and WDIOF_KEEPALIVEPING are reported by the watchdog core;
-  it is not necessary to report those bits from the driver. Also, if no status
-  function is provided by the driver, the watchdog core reports the status bits
-  provided in the bootstatus variable of struct watchdog_device.
-* set_timeout: this routine checks and changes the timeout of the watchdog
-  timer device. It returns 0 on success, -EINVAL for "parameter out of range"
-  and -EIO for "could not write value to the watchdog". On success this
-  routine should set the timeout value of the watchdog_device to the
-  achieved timeout value (which may be different from the requested one
-  because the watchdog does not necessarily have a 1 second resolution).
-  Drivers implementing max_hw_heartbeat_ms set the hardware watchdog heartbeat
-  to the minimum of timeout and max_hw_heartbeat_ms. Those drivers set the
-  timeout value of the watchdog_device either to the requested timeout value
-  (if it is larger than max_hw_heartbeat_ms), or to the achieved timeout value.
-  (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the
-  watchdog's info structure).
-  If the watchdog driver does not have to perform any action but setting the
-  watchdog_device.timeout, this callback can be omitted.
-  If set_timeout is not provided but, WDIOF_SETTIMEOUT is set, the watchdog
-  infrastructure updates the timeout value of the watchdog_device internally
-  to the requested value.
-  If the pretimeout feature is used (WDIOF_PRETIMEOUT), then set_timeout must
-  also take care of checking if pretimeout is still valid and set up the timer
-  accordingly. This can't be done in the core without races, so it is the
-  duty of the driver.
-* set_pretimeout: this routine checks and changes the pretimeout value of
-  the watchdog. It is optional because not all watchdogs support pretimeout
-  notification. The timeout value is not an absolute time, but the number of
-  seconds before the actual timeout would happen. It returns 0 on success,
-  -EINVAL for "parameter out of range" and -EIO for "could not write value to
-  the watchdog". A value of 0 disables pretimeout notification.
-  (Note: the WDIOF_PRETIMEOUT needs to be set in the options field of the
-  watchdog's info structure).
-  If the watchdog driver does not have to perform any action but setting the
-  watchdog_device.pretimeout, this callback can be omitted. That means if
-  set_pretimeout is not provided but WDIOF_PRETIMEOUT is set, the watchdog
-  infrastructure updates the pretimeout value of the watchdog_device internally
-  to the requested value.
-* get_timeleft: this routines returns the time that's left before a reset.
-* restart: this routine restarts the machine. It returns 0 on success or a
-  negative errno code for failure.
-* ioctl: if this routine is present then it will be called first before we do
-  our own internal ioctl call handling. This routine should return -ENOIOCTLCMD
-  if a command is not supported. The parameters that are passed to the ioctl
-  call are: watchdog_device, cmd and arg.
-
-The status bits should (preferably) be set with the set_bit and clear_bit alike
-bit-operations. The status bits that are defined are:
-* WDOG_ACTIVE: this status bit indicates whether or not a watchdog timer device
-  is active or not from user perspective. User space is expected to send
-  heartbeat requests to the driver while this flag is set.
-* WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog.
-  If this bit is set then the watchdog timer will not be able to stop.
-* WDOG_HW_RUNNING: Set by the watchdog driver if the hardware watchdog is
-  running. The bit must be set if the watchdog timer hardware can not be
-  stopped. The bit may also be set if the watchdog timer is running after
-  booting, before the watchdog device is opened. If set, the watchdog
-  infrastructure will send keepalives to the watchdog hardware while
-  WDOG_ACTIVE is not set.
-  Note: when you register the watchdog timer device with this bit set,
-  then opening /dev/watchdog will skip the start operation but send a keepalive
-  request instead.
-
-  To set the WDOG_NO_WAY_OUT status bit (before registering your watchdog
-  timer device) you can either:
-  * set it statically in your watchdog_device struct with
-	.status = WATCHDOG_NOWAYOUT_INIT_STATUS,
-    (this will set the value the same as CONFIG_WATCHDOG_NOWAYOUT) or
-  * use the following helper function:
-  static inline void watchdog_set_nowayout(struct watchdog_device *wdd, int nowayout)
-
-Note: The WatchDog Timer Driver Core supports the magic close feature and
-the nowayout feature. To use the magic close feature you must set the
-WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure.
-The nowayout feature will overrule the magic close feature.
-
-To get or set driver specific data the following two helper functions should be
-used:
-
-static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data)
-static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
-
-The watchdog_set_drvdata function allows you to add driver specific data. The
-arguments of this function are the watchdog device where you want to add the
-driver specific data to and a pointer to the data itself.
-
-The watchdog_get_drvdata function allows you to retrieve driver specific data.
-The argument of this function is the watchdog device where you want to retrieve
-data from. The function returns the pointer to the driver specific data.
-
-To initialize the timeout field, the following function can be used:
-
-extern int watchdog_init_timeout(struct watchdog_device *wdd,
-                                  unsigned int timeout_parm, struct device *dev);
-
-The watchdog_init_timeout function allows you to initialize the timeout field
-using the module timeout parameter or by retrieving the timeout-sec property from
-the device tree (if the module timeout parameter is invalid). Best practice is
-to set the default timeout value as timeout value in the watchdog_device and
-then use this function to set the user "preferred" timeout value.
-This routine returns zero on success and a negative errno code for failure.
-
-To disable the watchdog on reboot, the user must call the following helper:
-
-static inline void watchdog_stop_on_reboot(struct watchdog_device *wdd);
-
-To disable the watchdog when unregistering the watchdog, the user must call
-the following helper. Note that this will only stop the watchdog if the
-nowayout flag is not set.
-
-static inline void watchdog_stop_on_unregister(struct watchdog_device *wdd);
-
-To change the priority of the restart handler the following helper should be
-used:
-
-void watchdog_set_restart_priority(struct watchdog_device *wdd, int priority);
-
-User should follow the following guidelines for setting the priority:
-* 0: should be called in last resort, has limited restart capabilities
-* 128: default restart handler, use if no other handler is expected to be
-  available, and/or if restart is sufficient to restart the entire system
-* 255: highest priority, will preempt all other restart handlers
-
-To raise a pretimeout notification, the following function should be used:
-
-void watchdog_notify_pretimeout(struct watchdog_device *wdd)
-
-The function can be called in the interrupt context. If watchdog pretimeout
-governor framework (kbuild CONFIG_WATCHDOG_PRETIMEOUT_GOV symbol) is enabled,
-an action is taken by a preconfigured pretimeout governor preassigned to
-the watchdog device. If watchdog pretimeout governor framework is not
-enabled, watchdog_notify_pretimeout() prints a notification message to
-the kernel log buffer.
diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst
new file mode 100644
index 000000000000..b121caae7798
--- /dev/null
+++ b/Documentation/watchdog/watchdog-parameters.rst
@@ -0,0 +1,736 @@
+==========================
+WatchDog Module Parameters
+==========================
+
+This file provides information on the module parameters of many of
+the Linux watchdog drivers.  Watchdog driver parameter specs should
+be listed here unless the driver has its own driver-specific information
+file.
+
+See Documentation/admin-guide/kernel-parameters.rst for information on
+providing kernel parameters for builtin drivers versus loadable
+modules.
+
+-------------------------------------------------
+
+acquirewdt:
+    wdt_stop:
+	Acquire WDT 'stop' io port (default 0x43)
+    wdt_start:
+	Acquire WDT 'start' io port (default 0x443)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+advantechwdt:
+    wdt_stop:
+	Advantech WDT 'stop' io port (default 0x443)
+    wdt_start:
+	Advantech WDT 'start' io port (default 0x443)
+    timeout:
+	Watchdog timeout in seconds. 1<= timeout <=63, default=60.
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+alim1535_wdt:
+    timeout:
+	Watchdog timeout in seconds. (0 < timeout < 18000, default=60
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+alim7101_wdt:
+    timeout:
+	Watchdog timeout in seconds. (1<=timeout<=3600, default=30
+    use_gpio:
+	Use the gpio watchdog (required by old cobalt boards).
+	default=0/off/no
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+ar7_wdt:
+    margin:
+	Watchdog margin in seconds (default=60)
+    nowayout:
+	Disable watchdog shutdown on close
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+armada_37xx_wdt:
+    timeout:
+	Watchdog timeout in seconds. (default=120)
+    nowayout:
+	Disable watchdog shutdown on close
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+at91rm9200_wdt:
+    wdt_time:
+	Watchdog time in seconds. (default=5)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+at91sam9_wdt:
+    heartbeat:
+	Watchdog heartbeats in seconds. (default = 15)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+bcm47xx_wdt:
+    wdt_time:
+	Watchdog time in seconds. (default=30)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+coh901327_wdt:
+    margin:
+	Watchdog margin in seconds (default 60s)
+
+-------------------------------------------------
+
+cpu5wdt:
+    port:
+	base address of watchdog card, default is 0x91
+    verbose:
+	be verbose, default is 0 (no)
+    ticks:
+	count down ticks, default is 10000
+
+-------------------------------------------------
+
+cpwd:
+    wd0_timeout:
+	Default watchdog0 timeout in 1/10secs
+    wd1_timeout:
+	Default watchdog1 timeout in 1/10secs
+    wd2_timeout:
+	Default watchdog2 timeout in 1/10secs
+
+-------------------------------------------------
+
+da9052wdt:
+    timeout:
+	Watchdog timeout in seconds. 2<= timeout <=131, default=2.048s
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+davinci_wdt:
+    heartbeat:
+	Watchdog heartbeat period in seconds from 1 to 600, default 60
+
+-------------------------------------------------
+
+ebc-c384_wdt:
+    timeout:
+	Watchdog timeout in seconds. (1<=timeout<=15300, default=60)
+    nowayout:
+	Watchdog cannot be stopped once started
+
+-------------------------------------------------
+
+ep93xx_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+    timeout:
+	Watchdog timeout in seconds. (1<=timeout<=3600, default=TBD)
+
+-------------------------------------------------
+
+eurotechwdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+    io:
+	Eurotech WDT io port (default=0x3f0)
+    irq:
+	Eurotech WDT irq (default=10)
+    ev:
+	Eurotech WDT event type (default is `int`)
+
+-------------------------------------------------
+
+gef_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+geodewdt:
+    timeout:
+	Watchdog timeout in seconds. 1<= timeout <=131, default=60.
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+i6300esb:
+    heartbeat:
+	Watchdog heartbeat in seconds. (1<heartbeat<2046, default=30)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+iTCO_wdt:
+    heartbeat:
+	Watchdog heartbeat in seconds.
+	(2<heartbeat<39 (TCO v1) or 613 (TCO v2), default=30)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+iTCO_vendor_support:
+    vendorsupport:
+	iTCO vendor specific support mode, default=0 (none),
+	1=SuperMicro Pent3, 2=SuperMicro Pent4+, 911=Broken SMI BIOS
+
+-------------------------------------------------
+
+ib700wdt:
+    timeout:
+	Watchdog timeout in seconds. 0<= timeout <=30, default=30.
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+ibmasr:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+imx2_wdt:
+    timeout:
+	Watchdog timeout in seconds (default 60 s)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+indydog:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+iop_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+it8712f_wdt:
+    margin:
+	Watchdog margin in seconds (default 60)
+    nowayout:
+	Disable watchdog shutdown on close
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+it87_wdt:
+    nogameport:
+	Forbid the activation of game port, default=0
+    nocir:
+	Forbid the use of CIR (workaround for some buggy setups); set to 1 if
+system resets despite watchdog daemon running, default=0
+    exclusive:
+	Watchdog exclusive device open, default=1
+    timeout:
+	Watchdog timeout in seconds, default=60
+    testmode:
+	Watchdog test mode (1 = no reboot), default=0
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+ixp4xx_wdt:
+    heartbeat:
+	Watchdog heartbeat in seconds (default 60s)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+ks8695_wdt:
+    wdt_time:
+	Watchdog time in seconds. (default=5)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+machzwd:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+    action:
+	after watchdog resets, generate:
+	0 = RESET(*)  1 = SMI  2 = NMI  3 = SCI
+
+-------------------------------------------------
+
+max63xx_wdt:
+    heartbeat:
+	Watchdog heartbeat period in seconds from 1 to 60, default 60
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+    nodelay:
+	Force selection of a timeout setting without initial delay
+	(max6373/74 only, default=0)
+
+-------------------------------------------------
+
+mixcomwd:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+mpc8xxx_wdt:
+    timeout:
+	Watchdog timeout in ticks. (0<timeout<65536, default=65535)
+    reset:
+	Watchdog Interrupt/Reset Mode. 0 = interrupt, 1 = reset
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+mv64x60_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+ni903x_wdt:
+    timeout:
+	Initial watchdog timeout in seconds (0<timeout<516, default=60)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+nic7018_wdt:
+    timeout:
+	Initial watchdog timeout in seconds (0<timeout<464, default=80)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+nuc900_wdt:
+    heartbeat:
+	Watchdog heartbeats in seconds.
+	(default = 15)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+omap_wdt:
+    timer_margin:
+	initial watchdog timeout (in seconds)
+    early_enable:
+	Watchdog is started on module insertion (default=0
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+orion_wdt:
+    heartbeat:
+	Initial watchdog heartbeat in seconds
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+pc87413_wdt:
+    io:
+	pc87413 WDT I/O port (default: io).
+    timeout:
+	Watchdog timeout in minutes (default=timeout).
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+pika_wdt:
+    heartbeat:
+	Watchdog heartbeats in seconds. (default = 15)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+pnx4008_wdt:
+    heartbeat:
+	Watchdog heartbeat period in seconds from 1 to 60, default 19
+    nowayout:
+	Set to 1 to keep watchdog running after device release
+
+-------------------------------------------------
+
+pnx833x_wdt:
+    timeout:
+	Watchdog timeout in Mhz. (68Mhz clock), default=2040000000 (30 seconds)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+    start_enabled:
+	Watchdog is started on module insertion (default=1)
+
+-------------------------------------------------
+
+rc32434_wdt:
+    timeout:
+	Watchdog timeout value, in seconds (default=20)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+riowd:
+    riowd_timeout:
+	Watchdog timeout in minutes (default=1)
+
+-------------------------------------------------
+
+s3c2410_wdt:
+    tmr_margin:
+	Watchdog tmr_margin in seconds. (default=15)
+    tmr_atboot:
+	Watchdog is started at boot time if set to 1, default=0
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+    soft_noboot:
+	Watchdog action, set to 1 to ignore reboots, 0 to reboot
+    debug:
+	Watchdog debug, set to >1 for debug, (default 0)
+
+-------------------------------------------------
+
+sa1100_wdt:
+    margin:
+	Watchdog margin in seconds (default 60s)
+
+-------------------------------------------------
+
+sb_wdog:
+    timeout:
+	Watchdog timeout in microseconds (max/default 8388607 or 8.3ish secs)
+
+-------------------------------------------------
+
+sbc60xxwdt:
+    wdt_stop:
+	SBC60xx WDT 'stop' io port (default 0x45)
+    wdt_start:
+	SBC60xx WDT 'start' io port (default 0x443)
+    timeout:
+	Watchdog timeout in seconds. (1<=timeout<=3600, default=30)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sbc7240_wdt:
+    timeout:
+	Watchdog timeout in seconds. (1<=timeout<=255, default=30)
+    nowayout:
+	Disable watchdog when closing device file
+
+-------------------------------------------------
+
+sbc8360:
+    timeout:
+	Index into timeout table (0-63) (default=27 (60s))
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sbc_epx_c3:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sbc_fitpc2_wdt:
+    margin:
+	Watchdog margin in seconds (default 60s)
+    nowayout:
+	Watchdog cannot be stopped once started
+
+-------------------------------------------------
+
+sbsa_gwdt:
+    timeout:
+	Watchdog timeout in seconds. (default 10s)
+    action:
+	Watchdog action at the first stage timeout,
+	set to 0 to ignore, 1 to panic. (default=0)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sc1200wdt:
+    isapnp:
+	When set to 0 driver ISA PnP support will be disabled (default=1)
+    io:
+	io port
+    timeout:
+	range is 0-255 minutes, default is 1
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sc520_wdt:
+    timeout:
+	Watchdog timeout in seconds. (1 <= timeout <= 3600, default=30)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sch311x_wdt:
+    force_id:
+	Override the detected device ID
+    therm_trip:
+	Should a ThermTrip trigger the reset generator
+    timeout:
+	Watchdog timeout in seconds. 1<= timeout <=15300, default=60
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+scx200_wdt:
+    margin:
+	Watchdog margin in seconds
+    nowayout:
+	Disable watchdog shutdown on close
+
+-------------------------------------------------
+
+shwdt:
+    clock_division_ratio:
+	Clock division ratio. Valid ranges are from 0x5 (1.31ms)
+	to 0x7 (5.25ms). (default=7)
+    heartbeat:
+	Watchdog heartbeat in seconds. (1 <= heartbeat <= 3600, default=30
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+smsc37b787_wdt:
+    timeout:
+	range is 1-255 units, default is 60
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+softdog:
+    soft_margin:
+	Watchdog soft_margin in seconds.
+	(0 < soft_margin < 65536, default=60)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+    soft_noboot:
+	Softdog action, set to 1 to ignore reboots, 0 to reboot
+	(default=0)
+
+-------------------------------------------------
+
+stmp3xxx_wdt:
+    heartbeat:
+	Watchdog heartbeat period in seconds from 1 to 4194304, default 19
+
+-------------------------------------------------
+
+tegra_wdt:
+    heartbeat:
+	Watchdog heartbeats in seconds. (default = 120)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+ts72xx_wdt:
+    timeout:
+	Watchdog timeout in seconds. (1 <= timeout <= 8, default=8)
+    nowayout:
+	Disable watchdog shutdown on close
+
+-------------------------------------------------
+
+twl4030_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+txx9wdt:
+    timeout:
+	Watchdog timeout in seconds. (0<timeout<N, default=60)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+uniphier_wdt:
+    timeout:
+	Watchdog timeout in power of two seconds.
+	(1 <= timeout <= 128, default=64)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+w83627hf_wdt:
+    wdt_io:
+	w83627hf/thf WDT io port (default 0x2E)
+    timeout:
+	Watchdog timeout in seconds. 1 <= timeout <= 255, default=60.
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+w83877f_wdt:
+    timeout:
+	Watchdog timeout in seconds. (1<=timeout<=3600, default=30)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+w83977f_wdt:
+    timeout:
+	Watchdog timeout in seconds (15..7635), default=45)
+    testmode:
+	Watchdog testmode (1 = no reboot), default=0
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+wafer5823wdt:
+    timeout:
+	Watchdog timeout in seconds. 1 <= timeout <= 255, default=60.
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+wdt285:
+    soft_margin:
+	Watchdog timeout in seconds (default=60)
+
+-------------------------------------------------
+
+wdt977:
+    timeout:
+	Watchdog timeout in seconds (60..15300, default=60)
+    testmode:
+	Watchdog testmode (1 = no reboot), default=0
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+wm831x_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+wm8350_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sun4v_wdt:
+    timeout_ms:
+	Watchdog timeout in milliseconds 1..180000, default=60000)
+    nowayout:
+	Watchdog cannot be stopped once started
diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
deleted file mode 100644
index 0b88e333f9e1..000000000000
--- a/Documentation/watchdog/watchdog-parameters.txt
+++ /dev/null
@@ -1,410 +0,0 @@
-This file provides information on the module parameters of many of
-the Linux watchdog drivers.  Watchdog driver parameter specs should
-be listed here unless the driver has its own driver-specific information
-file.
-
-
-See Documentation/admin-guide/kernel-parameters.rst for information on
-providing kernel parameters for builtin drivers versus loadable
-modules.
-
-
--------------------------------------------------
-acquirewdt:
-wdt_stop: Acquire WDT 'stop' io port (default 0x43)
-wdt_start: Acquire WDT 'start' io port (default 0x443)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-advantechwdt:
-wdt_stop: Advantech WDT 'stop' io port (default 0x443)
-wdt_start: Advantech WDT 'start' io port (default 0x443)
-timeout: Watchdog timeout in seconds. 1<= timeout <=63, default=60.
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-alim1535_wdt:
-timeout: Watchdog timeout in seconds. (0 < timeout < 18000, default=60
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-alim7101_wdt:
-timeout: Watchdog timeout in seconds. (1<=timeout<=3600, default=30
-use_gpio: Use the gpio watchdog (required by old cobalt boards).
-	default=0/off/no
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-ar7_wdt:
-margin: Watchdog margin in seconds (default=60)
-nowayout: Disable watchdog shutdown on close
-	(default=kernel config parameter)
--------------------------------------------------
-armada_37xx_wdt:
-timeout: Watchdog timeout in seconds. (default=120)
-nowayout: Disable watchdog shutdown on close
-	(default=kernel config parameter)
--------------------------------------------------
-at91rm9200_wdt:
-wdt_time: Watchdog time in seconds. (default=5)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-at91sam9_wdt:
-heartbeat: Watchdog heartbeats in seconds. (default = 15)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-bcm47xx_wdt:
-wdt_time: Watchdog time in seconds. (default=30)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-coh901327_wdt:
-margin: Watchdog margin in seconds (default 60s)
--------------------------------------------------
-cpu5wdt:
-port: base address of watchdog card, default is 0x91
-verbose: be verbose, default is 0 (no)
-ticks: count down ticks, default is 10000
--------------------------------------------------
-cpwd:
-wd0_timeout: Default watchdog0 timeout in 1/10secs
-wd1_timeout: Default watchdog1 timeout in 1/10secs
-wd2_timeout: Default watchdog2 timeout in 1/10secs
--------------------------------------------------
-da9052wdt:
-timeout: Watchdog timeout in seconds. 2<= timeout <=131, default=2.048s
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-davinci_wdt:
-heartbeat: Watchdog heartbeat period in seconds from 1 to 600, default 60
--------------------------------------------------
-ebc-c384_wdt:
-timeout: Watchdog timeout in seconds. (1<=timeout<=15300, default=60)
-nowayout: Watchdog cannot be stopped once started
--------------------------------------------------
-ep93xx_wdt:
-nowayout: Watchdog cannot be stopped once started
-timeout: Watchdog timeout in seconds. (1<=timeout<=3600, default=TBD)
--------------------------------------------------
-eurotechwdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
-io: Eurotech WDT io port (default=0x3f0)
-irq: Eurotech WDT irq (default=10)
-ev: Eurotech WDT event type (default is `int')
--------------------------------------------------
-gef_wdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-geodewdt:
-timeout: Watchdog timeout in seconds. 1<= timeout <=131, default=60.
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-i6300esb:
-heartbeat: Watchdog heartbeat in seconds. (1<heartbeat<2046, default=30)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-iTCO_wdt:
-heartbeat: Watchdog heartbeat in seconds.
-	(2<heartbeat<39 (TCO v1) or 613 (TCO v2), default=30)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-iTCO_vendor_support:
-vendorsupport: iTCO vendor specific support mode, default=0 (none),
-	1=SuperMicro Pent3, 2=SuperMicro Pent4+, 911=Broken SMI BIOS
--------------------------------------------------
-ib700wdt:
-timeout: Watchdog timeout in seconds. 0<= timeout <=30, default=30.
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-ibmasr:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-imx2_wdt:
-timeout: Watchdog timeout in seconds (default 60 s)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-indydog:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-iop_wdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-it8712f_wdt:
-margin: Watchdog margin in seconds (default 60)
-nowayout: Disable watchdog shutdown on close
-	(default=kernel config parameter)
--------------------------------------------------
-it87_wdt:
-nogameport: Forbid the activation of game port, default=0
-nocir: Forbid the use of CIR (workaround for some buggy setups); set to 1 if
-system resets despite watchdog daemon running, default=0
-exclusive: Watchdog exclusive device open, default=1
-timeout: Watchdog timeout in seconds, default=60
-testmode: Watchdog test mode (1 = no reboot), default=0
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-ixp4xx_wdt:
-heartbeat: Watchdog heartbeat in seconds (default 60s)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-ks8695_wdt:
-wdt_time: Watchdog time in seconds. (default=5)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-machzwd:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
-action: after watchdog resets, generate:
-	0 = RESET(*)  1 = SMI  2 = NMI  3 = SCI
--------------------------------------------------
-max63xx_wdt:
-heartbeat: Watchdog heartbeat period in seconds from 1 to 60, default 60
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
-nodelay: Force selection of a timeout setting without initial delay
-	(max6373/74 only, default=0)
--------------------------------------------------
-mixcomwd:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-mpc8xxx_wdt:
-timeout: Watchdog timeout in ticks. (0<timeout<65536, default=65535)
-reset: Watchdog Interrupt/Reset Mode. 0 = interrupt, 1 = reset
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-mv64x60_wdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-ni903x_wdt:
-timeout: Initial watchdog timeout in seconds (0<timeout<516, default=60)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-nic7018_wdt:
-timeout: Initial watchdog timeout in seconds (0<timeout<464, default=80)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-nuc900_wdt:
-heartbeat: Watchdog heartbeats in seconds.
-	(default = 15)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-omap_wdt:
-timer_margin: initial watchdog timeout (in seconds)
-early_enable: Watchdog is started on module insertion (default=0
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-orion_wdt:
-heartbeat: Initial watchdog heartbeat in seconds
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-pc87413_wdt:
-io: pc87413 WDT I/O port (default: io).
-timeout: Watchdog timeout in minutes (default=timeout).
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-pika_wdt:
-heartbeat: Watchdog heartbeats in seconds. (default = 15)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-pnx4008_wdt:
-heartbeat: Watchdog heartbeat period in seconds from 1 to 60, default 19
-nowayout: Set to 1 to keep watchdog running after device release
--------------------------------------------------
-pnx833x_wdt:
-timeout: Watchdog timeout in Mhz. (68Mhz clock), default=2040000000 (30 seconds)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
-start_enabled: Watchdog is started on module insertion (default=1)
--------------------------------------------------
-rc32434_wdt:
-timeout: Watchdog timeout value, in seconds (default=20)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-riowd:
-riowd_timeout: Watchdog timeout in minutes (default=1)
--------------------------------------------------
-s3c2410_wdt:
-tmr_margin: Watchdog tmr_margin in seconds. (default=15)
-tmr_atboot: Watchdog is started at boot time if set to 1, default=0
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
-soft_noboot: Watchdog action, set to 1 to ignore reboots, 0 to reboot
-debug: Watchdog debug, set to >1 for debug, (default 0)
--------------------------------------------------
-sa1100_wdt:
-margin: Watchdog margin in seconds (default 60s)
--------------------------------------------------
-sb_wdog:
-timeout: Watchdog timeout in microseconds (max/default 8388607 or 8.3ish secs)
--------------------------------------------------
-sbc60xxwdt:
-wdt_stop: SBC60xx WDT 'stop' io port (default 0x45)
-wdt_start: SBC60xx WDT 'start' io port (default 0x443)
-timeout: Watchdog timeout in seconds. (1<=timeout<=3600, default=30)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sbc7240_wdt:
-timeout: Watchdog timeout in seconds. (1<=timeout<=255, default=30)
-nowayout: Disable watchdog when closing device file
--------------------------------------------------
-sbc8360:
-timeout: Index into timeout table (0-63) (default=27 (60s))
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sbc_epx_c3:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sbc_fitpc2_wdt:
-margin: Watchdog margin in seconds (default 60s)
-nowayout: Watchdog cannot be stopped once started
--------------------------------------------------
-sbsa_gwdt:
-timeout: Watchdog timeout in seconds. (default 10s)
-action: Watchdog action at the first stage timeout,
-	set to 0 to ignore, 1 to panic. (default=0)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sc1200wdt:
-isapnp: When set to 0 driver ISA PnP support will be disabled (default=1)
-io: io port
-timeout: range is 0-255 minutes, default is 1
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sc520_wdt:
-timeout: Watchdog timeout in seconds. (1 <= timeout <= 3600, default=30)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sch311x_wdt:
-force_id: Override the detected device ID
-therm_trip: Should a ThermTrip trigger the reset generator
-timeout: Watchdog timeout in seconds. 1<= timeout <=15300, default=60
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-scx200_wdt:
-margin: Watchdog margin in seconds
-nowayout: Disable watchdog shutdown on close
--------------------------------------------------
-shwdt:
-clock_division_ratio: Clock division ratio. Valid ranges are from 0x5 (1.31ms)
-	to 0x7 (5.25ms). (default=7)
-heartbeat: Watchdog heartbeat in seconds. (1 <= heartbeat <= 3600, default=30
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-smsc37b787_wdt:
-timeout: range is 1-255 units, default is 60
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-softdog:
-soft_margin: Watchdog soft_margin in seconds.
-	(0 < soft_margin < 65536, default=60)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
-soft_noboot: Softdog action, set to 1 to ignore reboots, 0 to reboot
-	(default=0)
--------------------------------------------------
-stmp3xxx_wdt:
-heartbeat: Watchdog heartbeat period in seconds from 1 to 4194304, default 19
--------------------------------------------------
-tegra_wdt:
-heartbeat: Watchdog heartbeats in seconds. (default = 120)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-ts72xx_wdt:
-timeout: Watchdog timeout in seconds. (1 <= timeout <= 8, default=8)
-nowayout: Disable watchdog shutdown on close
--------------------------------------------------
-twl4030_wdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-txx9wdt:
-timeout: Watchdog timeout in seconds. (0<timeout<N, default=60)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-uniphier_wdt:
-timeout: Watchdog timeout in power of two seconds.
-	(1 <= timeout <= 128, default=64)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-w83627hf_wdt:
-wdt_io: w83627hf/thf WDT io port (default 0x2E)
-timeout: Watchdog timeout in seconds. 1 <= timeout <= 255, default=60.
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-w83877f_wdt:
-timeout: Watchdog timeout in seconds. (1<=timeout<=3600, default=30)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-w83977f_wdt:
-timeout: Watchdog timeout in seconds (15..7635), default=45)
-testmode: Watchdog testmode (1 = no reboot), default=0
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-wafer5823wdt:
-timeout: Watchdog timeout in seconds. 1 <= timeout <= 255, default=60.
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-wdt285:
-soft_margin: Watchdog timeout in seconds (default=60)
--------------------------------------------------
-wdt977:
-timeout: Watchdog timeout in seconds (60..15300, default=60)
-testmode: Watchdog testmode (1 = no reboot), default=0
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-wm831x_wdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-wm8350_wdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sun4v_wdt:
-timeout_ms: Watchdog timeout in milliseconds 1..180000, default=60000)
-nowayout: Watchdog cannot be stopped once started
--------------------------------------------------
diff --git a/Documentation/watchdog/watchdog-pm.rst b/Documentation/watchdog/watchdog-pm.rst
new file mode 100644
index 000000000000..646e1f28f31f
--- /dev/null
+++ b/Documentation/watchdog/watchdog-pm.rst
@@ -0,0 +1,22 @@
+===============================================
+The Linux WatchDog Timer Power Management Guide
+===============================================
+
+Last reviewed: 17-Dec-2018
+
+Wolfram Sang <wsa+renesas@sang-engineering.com>
+
+Introduction
+------------
+This document states rules about watchdog devices and their power management
+handling to ensure a uniform behaviour for Linux systems.
+
+
+Ping on resume
+--------------
+On resume, a watchdog timer shall be reset to its selected value to give
+userspace enough time to resume. [1] [2]
+
+[1] https://patchwork.kernel.org/patch/10252209/
+
+[2] https://patchwork.kernel.org/patch/10711625/
diff --git a/Documentation/watchdog/watchdog-pm.txt b/Documentation/watchdog/watchdog-pm.txt
deleted file mode 100644
index 7a4dd46e0d24..000000000000
--- a/Documentation/watchdog/watchdog-pm.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-The Linux WatchDog Timer Power Management Guide
-===============================================
-Last reviewed: 17-Dec-2018
-
-Wolfram Sang <wsa+renesas@sang-engineering.com>
-
-Introduction
-------------
-This document states rules about watchdog devices and their power management
-handling to ensure a uniform behaviour for Linux systems.
-
-
-Ping on resume
---------------
-On resume, a watchdog timer shall be reset to its selected value to give
-userspace enough time to resume. [1] [2]
-
-[1] https://patchwork.kernel.org/patch/10252209/
-[2] https://patchwork.kernel.org/patch/10711625/
diff --git a/Documentation/watchdog/wdt.rst b/Documentation/watchdog/wdt.rst
new file mode 100644
index 000000000000..d97b0361535b
--- /dev/null
+++ b/Documentation/watchdog/wdt.rst
@@ -0,0 +1,63 @@
+============================================================
+WDT Watchdog Timer Interfaces For The Linux Operating System
+============================================================
+
+Last Reviewed: 10/05/2007
+
+Alan Cox <alan@lxorguk.ukuu.org.uk>
+
+	- ICS	WDT501-P
+	- ICS	WDT501-P (no fan tachometer)
+	- ICS	WDT500-P
+
+All the interfaces provide /dev/watchdog, which when open must be written
+to within a timeout or the machine will reboot. Each write delays the reboot
+time another timeout. In the case of the software watchdog the ability to
+reboot will depend on the state of the machines and interrupts. The hardware
+boards physically pull the machine down off their own onboard timers and
+will reboot from almost anything.
+
+A second temperature monitoring interface is available on the WDT501P cards.
+This provides /dev/temperature. This is the machine internal temperature in
+degrees Fahrenheit. Each read returns a single byte giving the temperature.
+
+The third interface logs kernel messages on additional alert events.
+
+The ICS ISA-bus wdt card cannot be safely probed for. Instead you need to
+pass IO address and IRQ boot parameters.  E.g.::
+
+	wdt.io=0x240 wdt.irq=11
+
+Other "wdt" driver parameters are:
+
+	===========	======================================================
+	heartbeat	Watchdog heartbeat in seconds (default 60)
+	nowayout	Watchdog cannot be stopped once started (kernel
+			build parameter)
+	tachometer	WDT501-P Fan Tachometer support (0=disable, default=0)
+	type		WDT501-P Card type (500 or 501, default=500)
+	===========	======================================================
+
+Features
+--------
+
+================   =======	   =======
+		   WDT501P	   WDT500P
+================   =======	   =======
+Reboot Timer	   X               X
+External Reboot	   X	           X
+I/O Port Monitor   o		   o
+Temperature	   X		   o
+Fan Speed          X		   o
+Power Under	   X               o
+Power Over         X               o
+Overheat           X               o
+================   =======	   =======
+
+The external event interfaces on the WDT boards are not currently supported.
+Minor numbers are however allocated for it.
+
+
+Example Watchdog Driver:
+
+	see samples/watchdog/watchdog-simple.c
diff --git a/Documentation/watchdog/wdt.txt b/Documentation/watchdog/wdt.txt
deleted file mode 100644
index ed2f0b860869..000000000000
--- a/Documentation/watchdog/wdt.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-Last Reviewed: 10/05/2007
-
-	WDT Watchdog Timer Interfaces For The Linux Operating System
-		Alan Cox <alan@lxorguk.ukuu.org.uk>
-
-	ICS	WDT501-P
-	ICS	WDT501-P (no fan tachometer)
-	ICS	WDT500-P
-
-All the interfaces provide /dev/watchdog, which when open must be written
-to within a timeout or the machine will reboot. Each write delays the reboot
-time another timeout. In the case of the software watchdog the ability to
-reboot will depend on the state of the machines and interrupts. The hardware
-boards physically pull the machine down off their own onboard timers and
-will reboot from almost anything.
-
-A second temperature monitoring interface is available on the WDT501P cards.
-This provides /dev/temperature. This is the machine internal temperature in
-degrees Fahrenheit. Each read returns a single byte giving the temperature.
-
-The third interface logs kernel messages on additional alert events.
-
-The ICS ISA-bus wdt card cannot be safely probed for. Instead you need to
-pass IO address and IRQ boot parameters.  E.g.:
-	wdt.io=0x240 wdt.irq=11
-
-Other "wdt" driver parameters are:
-	heartbeat	Watchdog heartbeat in seconds (default 60)
-	nowayout	Watchdog cannot be stopped once started (kernel
-				build parameter)
-	tachometer	WDT501-P Fan Tachometer support (0=disable, default=0)
-	type		WDT501-P Card type (500 or 501, default=500)
-
-Features
---------
-		WDT501P		WDT500P
-Reboot Timer	   X               X
-External Reboot	   X	           X
-I/O Port Monitor   o		   o
-Temperature	   X		   o
-Fan Speed          X		   o
-Power Under	   X               o
-Power Over         X               o
-Overheat           X               o
-
-The external event interfaces on the WDT boards are not currently supported.
-Minor numbers are however allocated for it.
-
-
-Example Watchdog Driver:  see samples/watchdog/watchdog-simple.c
-- 
cgit 


From d223884089734cc637c4e5458870d69f6ded9f89 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:53:02 -0300
Subject: docs: xilinx: convert eemi.txt to eemi.rst

This is a very trivial conversion: adjust the title markup
and add a few literal block markups to produce a better
visual when parsed and avoid warnings.

As newer documents related to xilinx could be added in the future,
create a new index file for it.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/xilinx/eemi.rst  | 67 ++++++++++++++++++++++++++++++++++++++++++
 Documentation/xilinx/eemi.txt  | 67 ------------------------------------------
 Documentation/xilinx/index.rst | 17 +++++++++++
 3 files changed, 84 insertions(+), 67 deletions(-)
 create mode 100644 Documentation/xilinx/eemi.rst
 delete mode 100644 Documentation/xilinx/eemi.txt
 create mode 100644 Documentation/xilinx/index.rst

(limited to 'Documentation')

diff --git a/Documentation/xilinx/eemi.rst b/Documentation/xilinx/eemi.rst
new file mode 100644
index 000000000000..9dcbc6f18d75
--- /dev/null
+++ b/Documentation/xilinx/eemi.rst
@@ -0,0 +1,67 @@
+====================================
+Xilinx Zynq MPSoC EEMI Documentation
+====================================
+
+Xilinx Zynq MPSoC Firmware Interface
+-------------------------------------
+The zynqmp-firmware node describes the interface to platform firmware.
+ZynqMP has an interface to communicate with secure firmware. Firmware
+driver provides an interface to firmware APIs. Interface APIs can be
+used by any driver to communicate with PMC(Platform Management Controller).
+
+Embedded Energy Management Interface (EEMI)
+----------------------------------------------
+The embedded energy management interface is used to allow software
+components running across different processing clusters on a chip or
+device to communicate with a power management controller (PMC) on a
+device to issue or respond to power management requests.
+
+EEMI ops is a structure containing all eemi APIs supported by Zynq MPSoC.
+The zynqmp-firmware driver maintain all EEMI APIs in zynqmp_eemi_ops
+structure. Any driver who want to communicate with PMC using EEMI APIs
+can call zynqmp_pm_get_eemi_ops().
+
+Example of EEMI ops::
+
+	/* zynqmp-firmware driver maintain all EEMI APIs */
+	struct zynqmp_eemi_ops {
+		int (*get_api_version)(u32 *version);
+		int (*query_data)(struct zynqmp_pm_query_data qdata, u32 *out);
+	};
+
+	static const struct zynqmp_eemi_ops eemi_ops = {
+		.get_api_version = zynqmp_pm_get_api_version,
+		.query_data = zynqmp_pm_query_data,
+	};
+
+Example of EEMI ops usage::
+
+	static const struct zynqmp_eemi_ops *eemi_ops;
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	eemi_ops = zynqmp_pm_get_eemi_ops();
+	if (IS_ERR(eemi_ops))
+		return PTR_ERR(eemi_ops);
+
+	ret = eemi_ops->query_data(qdata, ret_payload);
+
+IOCTL
+------
+IOCTL API is for device control and configuration. It is not a system
+IOCTL but it is an EEMI API. This API can be used by master to control
+any device specific configuration. IOCTL definitions can be platform
+specific. This API also manage shared device configuration.
+
+The following IOCTL IDs are valid for device control:
+- IOCTL_SET_PLL_FRAC_MODE	8
+- IOCTL_GET_PLL_FRAC_MODE	9
+- IOCTL_SET_PLL_FRAC_DATA	10
+- IOCTL_GET_PLL_FRAC_DATA	11
+
+Refer EEMI API guide [0] for IOCTL specific parameters and other EEMI APIs.
+
+References
+----------
+[0] Embedded Energy Management Interface (EEMI) API guide:
+    https://www.xilinx.com/support/documentation/user_guides/ug1200-eemi-api.pdf
diff --git a/Documentation/xilinx/eemi.txt b/Documentation/xilinx/eemi.txt
deleted file mode 100644
index 5f39b4ffdcd4..000000000000
--- a/Documentation/xilinx/eemi.txt
+++ /dev/null
@@ -1,67 +0,0 @@
----------------------------------------------------------------------
-Xilinx Zynq MPSoC EEMI Documentation
----------------------------------------------------------------------
-
-Xilinx Zynq MPSoC Firmware Interface
--------------------------------------
-The zynqmp-firmware node describes the interface to platform firmware.
-ZynqMP has an interface to communicate with secure firmware. Firmware
-driver provides an interface to firmware APIs. Interface APIs can be
-used by any driver to communicate with PMC(Platform Management Controller).
-
-Embedded Energy Management Interface (EEMI)
-----------------------------------------------
-The embedded energy management interface is used to allow software
-components running across different processing clusters on a chip or
-device to communicate with a power management controller (PMC) on a
-device to issue or respond to power management requests.
-
-EEMI ops is a structure containing all eemi APIs supported by Zynq MPSoC.
-The zynqmp-firmware driver maintain all EEMI APIs in zynqmp_eemi_ops
-structure. Any driver who want to communicate with PMC using EEMI APIs
-can call zynqmp_pm_get_eemi_ops().
-
-Example of EEMI ops:
-
-	/* zynqmp-firmware driver maintain all EEMI APIs */
-	struct zynqmp_eemi_ops {
-		int (*get_api_version)(u32 *version);
-		int (*query_data)(struct zynqmp_pm_query_data qdata, u32 *out);
-	};
-
-	static const struct zynqmp_eemi_ops eemi_ops = {
-		.get_api_version = zynqmp_pm_get_api_version,
-		.query_data = zynqmp_pm_query_data,
-	};
-
-Example of EEMI ops usage:
-
-	static const struct zynqmp_eemi_ops *eemi_ops;
-	u32 ret_payload[PAYLOAD_ARG_CNT];
-	int ret;
-
-	eemi_ops = zynqmp_pm_get_eemi_ops();
-	if (IS_ERR(eemi_ops))
-		return PTR_ERR(eemi_ops);
-
-	ret = eemi_ops->query_data(qdata, ret_payload);
-
-IOCTL
-------
-IOCTL API is for device control and configuration. It is not a system
-IOCTL but it is an EEMI API. This API can be used by master to control
-any device specific configuration. IOCTL definitions can be platform
-specific. This API also manage shared device configuration.
-
-The following IOCTL IDs are valid for device control:
-- IOCTL_SET_PLL_FRAC_MODE	8
-- IOCTL_GET_PLL_FRAC_MODE	9
-- IOCTL_SET_PLL_FRAC_DATA	10
-- IOCTL_GET_PLL_FRAC_DATA	11
-
-Refer EEMI API guide [0] for IOCTL specific parameters and other EEMI APIs.
-
-References
-----------
-[0] Embedded Energy Management Interface (EEMI) API guide:
-    https://www.xilinx.com/support/documentation/user_guides/ug1200-eemi-api.pdf
diff --git a/Documentation/xilinx/index.rst b/Documentation/xilinx/index.rst
new file mode 100644
index 000000000000..01cc1a0714df
--- /dev/null
+++ b/Documentation/xilinx/index.rst
@@ -0,0 +1,17 @@
+:orphan:
+
+===========
+Xilinx FPGA
+===========
+
+.. toctree::
+    :maxdepth: 1
+
+    eemi
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
-- 
cgit 


From d6a3b247627a3bc0551504eb305d624cc6fb5453 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:53:03 -0300
Subject: docs: scheduler: convert docs to ReST and rename to *.rst

In order to prepare to add them to the Kernel API book,
convert the files to ReST format.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/ABI/testing/sysfs-kernel-uids   |   2 +-
 Documentation/scheduler/completion.rst        | 293 +++++++++
 Documentation/scheduler/completion.txt        | 291 ---------
 Documentation/scheduler/index.rst             |  29 +
 Documentation/scheduler/sched-arch.rst        |  76 +++
 Documentation/scheduler/sched-arch.txt        |  74 ---
 Documentation/scheduler/sched-bwc.rst         | 128 ++++
 Documentation/scheduler/sched-bwc.txt         | 122 ----
 Documentation/scheduler/sched-deadline.rst    | 888 ++++++++++++++++++++++++++
 Documentation/scheduler/sched-deadline.txt    | 871 -------------------------
 Documentation/scheduler/sched-design-CFS.rst  | 249 ++++++++
 Documentation/scheduler/sched-design-CFS.txt  | 242 -------
 Documentation/scheduler/sched-domains.rst     |  83 +++
 Documentation/scheduler/sched-domains.txt     |  77 ---
 Documentation/scheduler/sched-energy.rst      | 430 +++++++++++++
 Documentation/scheduler/sched-energy.txt      | 425 ------------
 Documentation/scheduler/sched-nice-design.rst | 112 ++++
 Documentation/scheduler/sched-nice-design.txt | 108 ----
 Documentation/scheduler/sched-rt-group.rst    | 185 ++++++
 Documentation/scheduler/sched-rt-group.txt    | 183 ------
 Documentation/scheduler/sched-stats.rst       | 167 +++++
 Documentation/scheduler/sched-stats.txt       | 154 -----
 Documentation/scheduler/text_files.rst        |   5 +
 Documentation/vm/numa.rst                     |   2 +-
 24 files changed, 2647 insertions(+), 2549 deletions(-)
 create mode 100644 Documentation/scheduler/completion.rst
 delete mode 100644 Documentation/scheduler/completion.txt
 create mode 100644 Documentation/scheduler/index.rst
 create mode 100644 Documentation/scheduler/sched-arch.rst
 delete mode 100644 Documentation/scheduler/sched-arch.txt
 create mode 100644 Documentation/scheduler/sched-bwc.rst
 delete mode 100644 Documentation/scheduler/sched-bwc.txt
 create mode 100644 Documentation/scheduler/sched-deadline.rst
 delete mode 100644 Documentation/scheduler/sched-deadline.txt
 create mode 100644 Documentation/scheduler/sched-design-CFS.rst
 delete mode 100644 Documentation/scheduler/sched-design-CFS.txt
 create mode 100644 Documentation/scheduler/sched-domains.rst
 delete mode 100644 Documentation/scheduler/sched-domains.txt
 create mode 100644 Documentation/scheduler/sched-energy.rst
 delete mode 100644 Documentation/scheduler/sched-energy.txt
 create mode 100644 Documentation/scheduler/sched-nice-design.rst
 delete mode 100644 Documentation/scheduler/sched-nice-design.txt
 create mode 100644 Documentation/scheduler/sched-rt-group.rst
 delete mode 100644 Documentation/scheduler/sched-rt-group.txt
 create mode 100644 Documentation/scheduler/sched-stats.rst
 delete mode 100644 Documentation/scheduler/sched-stats.txt
 create mode 100644 Documentation/scheduler/text_files.rst

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-kernel-uids b/Documentation/ABI/testing/sysfs-kernel-uids
index 28f14695a852..4182b7061816 100644
--- a/Documentation/ABI/testing/sysfs-kernel-uids
+++ b/Documentation/ABI/testing/sysfs-kernel-uids
@@ -11,4 +11,4 @@ Description:
 		example would be, if User A has shares = 1024 and user
 		B has shares = 2048, User B will get twice the CPU
 		bandwidth user A will. For more details refer
-		Documentation/scheduler/sched-design-CFS.txt
+		Documentation/scheduler/sched-design-CFS.rst
diff --git a/Documentation/scheduler/completion.rst b/Documentation/scheduler/completion.rst
new file mode 100644
index 000000000000..9f039b4f4b09
--- /dev/null
+++ b/Documentation/scheduler/completion.rst
@@ -0,0 +1,293 @@
+================================================
+Completions - "wait for completion" barrier APIs
+================================================
+
+Introduction:
+-------------
+
+If you have one or more threads that must wait for some kernel activity
+to have reached a point or a specific state, completions can provide a
+race-free solution to this problem. Semantically they are somewhat like a
+pthread_barrier() and have similar use-cases.
+
+Completions are a code synchronization mechanism which is preferable to any
+misuse of locks/semaphores and busy-loops. Any time you think of using
+yield() or some quirky msleep(1) loop to allow something else to proceed,
+you probably want to look into using one of the wait_for_completion*()
+calls and complete() instead.
+
+The advantage of using completions is that they have a well defined, focused
+purpose which makes it very easy to see the intent of the code, but they
+also result in more efficient code as all threads can continue execution
+until the result is actually needed, and both the waiting and the signalling
+is highly efficient using low level scheduler sleep/wakeup facilities.
+
+Completions are built on top of the waitqueue and wakeup infrastructure of
+the Linux scheduler. The event the threads on the waitqueue are waiting for
+is reduced to a simple flag in 'struct completion', appropriately called "done".
+
+As completions are scheduling related, the code can be found in
+kernel/sched/completion.c.
+
+
+Usage:
+------
+
+There are three main parts to using completions:
+
+ - the initialization of the 'struct completion' synchronization object
+ - the waiting part through a call to one of the variants of wait_for_completion(),
+ - the signaling side through a call to complete() or complete_all().
+
+There are also some helper functions for checking the state of completions.
+Note that while initialization must happen first, the waiting and signaling
+part can happen in any order. I.e. it's entirely normal for a thread
+to have marked a completion as 'done' before another thread checks whether
+it has to wait for it.
+
+To use completions you need to #include <linux/completion.h> and
+create a static or dynamic variable of type 'struct completion',
+which has only two fields::
+
+	struct completion {
+		unsigned int done;
+		wait_queue_head_t wait;
+	};
+
+This provides the ->wait waitqueue to place tasks on for waiting (if any), and
+the ->done completion flag for indicating whether it's completed or not.
+
+Completions should be named to refer to the event that is being synchronized on.
+A good example is::
+
+	wait_for_completion(&early_console_added);
+
+	complete(&early_console_added);
+
+Good, intuitive naming (as always) helps code readability. Naming a completion
+'complete' is not helpful unless the purpose is super obvious...
+
+
+Initializing completions:
+-------------------------
+
+Dynamically allocated completion objects should preferably be embedded in data
+structures that are assured to be alive for the life-time of the function/driver,
+to prevent races with asynchronous complete() calls from occurring.
+
+Particular care should be taken when using the _timeout() or _killable()/_interruptible()
+variants of wait_for_completion(), as it must be assured that memory de-allocation
+does not happen until all related activities (complete() or reinit_completion())
+have taken place, even if these wait functions return prematurely due to a timeout
+or a signal triggering.
+
+Initializing of dynamically allocated completion objects is done via a call to
+init_completion()::
+
+	init_completion(&dynamic_object->done);
+
+In this call we initialize the waitqueue and set ->done to 0, i.e. "not completed"
+or "not done".
+
+The re-initialization function, reinit_completion(), simply resets the
+->done field to 0 ("not done"), without touching the waitqueue.
+Callers of this function must make sure that there are no racy
+wait_for_completion() calls going on in parallel.
+
+Calling init_completion() on the same completion object twice is
+most likely a bug as it re-initializes the queue to an empty queue and
+enqueued tasks could get "lost" - use reinit_completion() in that case,
+but be aware of other races.
+
+For static declaration and initialization, macros are available.
+
+For static (or global) declarations in file scope you can use
+DECLARE_COMPLETION()::
+
+	static DECLARE_COMPLETION(setup_done);
+	DECLARE_COMPLETION(setup_done);
+
+Note that in this case the completion is boot time (or module load time)
+initialized to 'not done' and doesn't require an init_completion() call.
+
+When a completion is declared as a local variable within a function,
+then the initialization should always use DECLARE_COMPLETION_ONSTACK()
+explicitly, not just to make lockdep happy, but also to make it clear
+that limited scope had been considered and is intentional::
+
+	DECLARE_COMPLETION_ONSTACK(setup_done)
+
+Note that when using completion objects as local variables you must be
+acutely aware of the short life time of the function stack: the function
+must not return to a calling context until all activities (such as waiting
+threads) have ceased and the completion object is completely unused.
+
+To emphasise this again: in particular when using some of the waiting API variants
+with more complex outcomes, such as the timeout or signalling (_timeout(),
+_killable() and _interruptible()) variants, the wait might complete
+prematurely while the object might still be in use by another thread - and a return
+from the wait_on_completion*() caller function will deallocate the function
+stack and cause subtle data corruption if a complete() is done in some
+other thread. Simple testing might not trigger these kinds of races.
+
+If unsure, use dynamically allocated completion objects, preferably embedded
+in some other long lived object that has a boringly long life time which
+exceeds the life time of any helper threads using the completion object,
+or has a lock or other synchronization mechanism to make sure complete()
+is not called on a freed object.
+
+A naive DECLARE_COMPLETION() on the stack triggers a lockdep warning.
+
+Waiting for completions:
+------------------------
+
+For a thread to wait for some concurrent activity to finish, it
+calls wait_for_completion() on the initialized completion structure::
+
+	void wait_for_completion(struct completion *done)
+
+A typical usage scenario is::
+
+	CPU#1					CPU#2
+
+	struct completion setup_done;
+
+	init_completion(&setup_done);
+	initialize_work(...,&setup_done,...);
+
+	/* run non-dependent code */		/* do setup */
+
+	wait_for_completion(&setup_done);	complete(setup_done);
+
+This is not implying any particular order between wait_for_completion() and
+the call to complete() - if the call to complete() happened before the call
+to wait_for_completion() then the waiting side simply will continue
+immediately as all dependencies are satisfied; if not, it will block until
+completion is signaled by complete().
+
+Note that wait_for_completion() is calling spin_lock_irq()/spin_unlock_irq(),
+so it can only be called safely when you know that interrupts are enabled.
+Calling it from IRQs-off atomic contexts will result in hard-to-detect
+spurious enabling of interrupts.
+
+The default behavior is to wait without a timeout and to mark the task as
+uninterruptible. wait_for_completion() and its variants are only safe
+in process context (as they can sleep) but not in atomic context,
+interrupt context, with disabled IRQs, or preemption is disabled - see also
+try_wait_for_completion() below for handling completion in atomic/interrupt
+context.
+
+As all variants of wait_for_completion() can (obviously) block for a long
+time depending on the nature of the activity they are waiting for, so in
+most cases you probably don't want to call this with held mutexes.
+
+
+wait_for_completion*() variants available:
+------------------------------------------
+
+The below variants all return status and this status should be checked in
+most(/all) cases - in cases where the status is deliberately not checked you
+probably want to make a note explaining this (e.g. see
+arch/arm/kernel/smp.c:__cpu_up()).
+
+A common problem that occurs is to have unclean assignment of return types,
+so take care to assign return-values to variables of the proper type.
+
+Checking for the specific meaning of return values also has been found
+to be quite inaccurate, e.g. constructs like::
+
+	if (!wait_for_completion_interruptible_timeout(...))
+
+... would execute the same code path for successful completion and for the
+interrupted case - which is probably not what you want::
+
+	int wait_for_completion_interruptible(struct completion *done)
+
+This function marks the task TASK_INTERRUPTIBLE while it is waiting.
+If a signal was received while waiting it will return -ERESTARTSYS; 0 otherwise::
+
+	unsigned long wait_for_completion_timeout(struct completion *done, unsigned long timeout)
+
+The task is marked as TASK_UNINTERRUPTIBLE and will wait at most 'timeout'
+jiffies. If a timeout occurs it returns 0, else the remaining time in
+jiffies (but at least 1).
+
+Timeouts are preferably calculated with msecs_to_jiffies() or usecs_to_jiffies(),
+to make the code largely HZ-invariant.
+
+If the returned timeout value is deliberately ignored a comment should probably explain
+why (e.g. see drivers/mfd/wm8350-core.c wm8350_read_auxadc())::
+
+	long wait_for_completion_interruptible_timeout(struct completion *done, unsigned long timeout)
+
+This function passes a timeout in jiffies and marks the task as
+TASK_INTERRUPTIBLE. If a signal was received it will return -ERESTARTSYS;
+otherwise it returns 0 if the completion timed out, or the remaining time in
+jiffies if completion occurred.
+
+Further variants include _killable which uses TASK_KILLABLE as the
+designated tasks state and will return -ERESTARTSYS if it is interrupted,
+or 0 if completion was achieved.  There is a _timeout variant as well::
+
+	long wait_for_completion_killable(struct completion *done)
+	long wait_for_completion_killable_timeout(struct completion *done, unsigned long timeout)
+
+The _io variants wait_for_completion_io() behave the same as the non-_io
+variants, except for accounting waiting time as 'waiting on IO', which has
+an impact on how the task is accounted in scheduling/IO stats::
+
+	void wait_for_completion_io(struct completion *done)
+	unsigned long wait_for_completion_io_timeout(struct completion *done, unsigned long timeout)
+
+
+Signaling completions:
+----------------------
+
+A thread that wants to signal that the conditions for continuation have been
+achieved calls complete() to signal exactly one of the waiters that it can
+continue::
+
+	void complete(struct completion *done)
+
+... or calls complete_all() to signal all current and future waiters::
+
+	void complete_all(struct completion *done)
+
+The signaling will work as expected even if completions are signaled before
+a thread starts waiting. This is achieved by the waiter "consuming"
+(decrementing) the done field of 'struct completion'. Waiting threads
+wakeup order is the same in which they were enqueued (FIFO order).
+
+If complete() is called multiple times then this will allow for that number
+of waiters to continue - each call to complete() will simply increment the
+done field. Calling complete_all() multiple times is a bug though. Both
+complete() and complete_all() can be called in IRQ/atomic context safely.
+
+There can only be one thread calling complete() or complete_all() on a
+particular 'struct completion' at any time - serialized through the wait
+queue spinlock. Any such concurrent calls to complete() or complete_all()
+probably are a design bug.
+
+Signaling completion from IRQ context is fine as it will appropriately
+lock with spin_lock_irqsave()/spin_unlock_irqrestore() and it will never
+sleep.
+
+
+try_wait_for_completion()/completion_done():
+--------------------------------------------
+
+The try_wait_for_completion() function will not put the thread on the wait
+queue but rather returns false if it would need to enqueue (block) the thread,
+else it consumes one posted completion and returns true::
+
+	bool try_wait_for_completion(struct completion *done)
+
+Finally, to check the state of a completion without changing it in any way,
+call completion_done(), which returns false if there are no posted
+completions that were not yet consumed by waiters (implying that there are
+waiters) and true otherwise::
+
+	bool completion_done(struct completion *done)
+
+Both try_wait_for_completion() and completion_done() are safe to be called in
+IRQ or atomic context.
diff --git a/Documentation/scheduler/completion.txt b/Documentation/scheduler/completion.txt
deleted file mode 100644
index e5b9df4d8078..000000000000
--- a/Documentation/scheduler/completion.txt
+++ /dev/null
@@ -1,291 +0,0 @@
-Completions - "wait for completion" barrier APIs
-================================================
-
-Introduction:
--------------
-
-If you have one or more threads that must wait for some kernel activity
-to have reached a point or a specific state, completions can provide a
-race-free solution to this problem. Semantically they are somewhat like a
-pthread_barrier() and have similar use-cases.
-
-Completions are a code synchronization mechanism which is preferable to any
-misuse of locks/semaphores and busy-loops. Any time you think of using
-yield() or some quirky msleep(1) loop to allow something else to proceed,
-you probably want to look into using one of the wait_for_completion*()
-calls and complete() instead.
-
-The advantage of using completions is that they have a well defined, focused
-purpose which makes it very easy to see the intent of the code, but they
-also result in more efficient code as all threads can continue execution
-until the result is actually needed, and both the waiting and the signalling
-is highly efficient using low level scheduler sleep/wakeup facilities.
-
-Completions are built on top of the waitqueue and wakeup infrastructure of
-the Linux scheduler. The event the threads on the waitqueue are waiting for
-is reduced to a simple flag in 'struct completion', appropriately called "done".
-
-As completions are scheduling related, the code can be found in
-kernel/sched/completion.c.
-
-
-Usage:
-------
-
-There are three main parts to using completions:
-
- - the initialization of the 'struct completion' synchronization object
- - the waiting part through a call to one of the variants of wait_for_completion(),
- - the signaling side through a call to complete() or complete_all().
-
-There are also some helper functions for checking the state of completions.
-Note that while initialization must happen first, the waiting and signaling
-part can happen in any order. I.e. it's entirely normal for a thread
-to have marked a completion as 'done' before another thread checks whether
-it has to wait for it.
-
-To use completions you need to #include <linux/completion.h> and
-create a static or dynamic variable of type 'struct completion',
-which has only two fields:
-
-	struct completion {
-		unsigned int done;
-		wait_queue_head_t wait;
-	};
-
-This provides the ->wait waitqueue to place tasks on for waiting (if any), and
-the ->done completion flag for indicating whether it's completed or not.
-
-Completions should be named to refer to the event that is being synchronized on.
-A good example is:
-
-	wait_for_completion(&early_console_added);
-
-	complete(&early_console_added);
-
-Good, intuitive naming (as always) helps code readability. Naming a completion
-'complete' is not helpful unless the purpose is super obvious...
-
-
-Initializing completions:
--------------------------
-
-Dynamically allocated completion objects should preferably be embedded in data
-structures that are assured to be alive for the life-time of the function/driver,
-to prevent races with asynchronous complete() calls from occurring.
-
-Particular care should be taken when using the _timeout() or _killable()/_interruptible()
-variants of wait_for_completion(), as it must be assured that memory de-allocation
-does not happen until all related activities (complete() or reinit_completion())
-have taken place, even if these wait functions return prematurely due to a timeout
-or a signal triggering.
-
-Initializing of dynamically allocated completion objects is done via a call to
-init_completion():
-
-	init_completion(&dynamic_object->done);
-
-In this call we initialize the waitqueue and set ->done to 0, i.e. "not completed"
-or "not done".
-
-The re-initialization function, reinit_completion(), simply resets the
-->done field to 0 ("not done"), without touching the waitqueue.
-Callers of this function must make sure that there are no racy
-wait_for_completion() calls going on in parallel.
-
-Calling init_completion() on the same completion object twice is
-most likely a bug as it re-initializes the queue to an empty queue and
-enqueued tasks could get "lost" - use reinit_completion() in that case,
-but be aware of other races.
-
-For static declaration and initialization, macros are available.
-
-For static (or global) declarations in file scope you can use DECLARE_COMPLETION():
-
-	static DECLARE_COMPLETION(setup_done);
-	DECLARE_COMPLETION(setup_done);
-
-Note that in this case the completion is boot time (or module load time)
-initialized to 'not done' and doesn't require an init_completion() call.
-
-When a completion is declared as a local variable within a function,
-then the initialization should always use DECLARE_COMPLETION_ONSTACK()
-explicitly, not just to make lockdep happy, but also to make it clear
-that limited scope had been considered and is intentional:
-
-	DECLARE_COMPLETION_ONSTACK(setup_done)
-
-Note that when using completion objects as local variables you must be
-acutely aware of the short life time of the function stack: the function
-must not return to a calling context until all activities (such as waiting
-threads) have ceased and the completion object is completely unused.
-
-To emphasise this again: in particular when using some of the waiting API variants
-with more complex outcomes, such as the timeout or signalling (_timeout(),
-_killable() and _interruptible()) variants, the wait might complete
-prematurely while the object might still be in use by another thread - and a return
-from the wait_on_completion*() caller function will deallocate the function
-stack and cause subtle data corruption if a complete() is done in some
-other thread. Simple testing might not trigger these kinds of races.
-
-If unsure, use dynamically allocated completion objects, preferably embedded
-in some other long lived object that has a boringly long life time which
-exceeds the life time of any helper threads using the completion object,
-or has a lock or other synchronization mechanism to make sure complete()
-is not called on a freed object.
-
-A naive DECLARE_COMPLETION() on the stack triggers a lockdep warning.
-
-Waiting for completions:
-------------------------
-
-For a thread to wait for some concurrent activity to finish, it
-calls wait_for_completion() on the initialized completion structure:
-
-	void wait_for_completion(struct completion *done)
-
-A typical usage scenario is:
-
-	CPU#1					CPU#2
-
-	struct completion setup_done;
-
-	init_completion(&setup_done);
-	initialize_work(...,&setup_done,...);
-
-	/* run non-dependent code */		/* do setup */
-
-	wait_for_completion(&setup_done);	complete(setup_done);
-
-This is not implying any particular order between wait_for_completion() and
-the call to complete() - if the call to complete() happened before the call
-to wait_for_completion() then the waiting side simply will continue
-immediately as all dependencies are satisfied; if not, it will block until
-completion is signaled by complete().
-
-Note that wait_for_completion() is calling spin_lock_irq()/spin_unlock_irq(),
-so it can only be called safely when you know that interrupts are enabled.
-Calling it from IRQs-off atomic contexts will result in hard-to-detect
-spurious enabling of interrupts.
-
-The default behavior is to wait without a timeout and to mark the task as
-uninterruptible. wait_for_completion() and its variants are only safe
-in process context (as they can sleep) but not in atomic context,
-interrupt context, with disabled IRQs, or preemption is disabled - see also
-try_wait_for_completion() below for handling completion in atomic/interrupt
-context.
-
-As all variants of wait_for_completion() can (obviously) block for a long
-time depending on the nature of the activity they are waiting for, so in
-most cases you probably don't want to call this with held mutexes.
-
-
-wait_for_completion*() variants available:
-------------------------------------------
-
-The below variants all return status and this status should be checked in
-most(/all) cases - in cases where the status is deliberately not checked you
-probably want to make a note explaining this (e.g. see
-arch/arm/kernel/smp.c:__cpu_up()).
-
-A common problem that occurs is to have unclean assignment of return types,
-so take care to assign return-values to variables of the proper type.
-
-Checking for the specific meaning of return values also has been found
-to be quite inaccurate, e.g. constructs like:
-
-	if (!wait_for_completion_interruptible_timeout(...))
-
-... would execute the same code path for successful completion and for the
-interrupted case - which is probably not what you want.
-
-	int wait_for_completion_interruptible(struct completion *done)
-
-This function marks the task TASK_INTERRUPTIBLE while it is waiting.
-If a signal was received while waiting it will return -ERESTARTSYS; 0 otherwise.
-
-	unsigned long wait_for_completion_timeout(struct completion *done, unsigned long timeout)
-
-The task is marked as TASK_UNINTERRUPTIBLE and will wait at most 'timeout'
-jiffies. If a timeout occurs it returns 0, else the remaining time in
-jiffies (but at least 1).
-
-Timeouts are preferably calculated with msecs_to_jiffies() or usecs_to_jiffies(),
-to make the code largely HZ-invariant.
-
-If the returned timeout value is deliberately ignored a comment should probably explain
-why (e.g. see drivers/mfd/wm8350-core.c wm8350_read_auxadc()).
-
-	long wait_for_completion_interruptible_timeout(struct completion *done, unsigned long timeout)
-
-This function passes a timeout in jiffies and marks the task as
-TASK_INTERRUPTIBLE. If a signal was received it will return -ERESTARTSYS;
-otherwise it returns 0 if the completion timed out, or the remaining time in
-jiffies if completion occurred.
-
-Further variants include _killable which uses TASK_KILLABLE as the
-designated tasks state and will return -ERESTARTSYS if it is interrupted,
-or 0 if completion was achieved.  There is a _timeout variant as well:
-
-	long wait_for_completion_killable(struct completion *done)
-	long wait_for_completion_killable_timeout(struct completion *done, unsigned long timeout)
-
-The _io variants wait_for_completion_io() behave the same as the non-_io
-variants, except for accounting waiting time as 'waiting on IO', which has
-an impact on how the task is accounted in scheduling/IO stats:
-
-	void wait_for_completion_io(struct completion *done)
-	unsigned long wait_for_completion_io_timeout(struct completion *done, unsigned long timeout)
-
-
-Signaling completions:
-----------------------
-
-A thread that wants to signal that the conditions for continuation have been
-achieved calls complete() to signal exactly one of the waiters that it can
-continue:
-
-	void complete(struct completion *done)
-
-... or calls complete_all() to signal all current and future waiters:
-
-	void complete_all(struct completion *done)
-
-The signaling will work as expected even if completions are signaled before
-a thread starts waiting. This is achieved by the waiter "consuming"
-(decrementing) the done field of 'struct completion'. Waiting threads
-wakeup order is the same in which they were enqueued (FIFO order).
-
-If complete() is called multiple times then this will allow for that number
-of waiters to continue - each call to complete() will simply increment the
-done field. Calling complete_all() multiple times is a bug though. Both
-complete() and complete_all() can be called in IRQ/atomic context safely.
-
-There can only be one thread calling complete() or complete_all() on a
-particular 'struct completion' at any time - serialized through the wait
-queue spinlock. Any such concurrent calls to complete() or complete_all()
-probably are a design bug.
-
-Signaling completion from IRQ context is fine as it will appropriately
-lock with spin_lock_irqsave()/spin_unlock_irqrestore() and it will never
-sleep. 
-
-
-try_wait_for_completion()/completion_done():
---------------------------------------------
-
-The try_wait_for_completion() function will not put the thread on the wait
-queue but rather returns false if it would need to enqueue (block) the thread,
-else it consumes one posted completion and returns true.
-
-	bool try_wait_for_completion(struct completion *done)
-
-Finally, to check the state of a completion without changing it in any way,
-call completion_done(), which returns false if there are no posted
-completions that were not yet consumed by waiters (implying that there are
-waiters) and true otherwise;
-
-	bool completion_done(struct completion *done)
-
-Both try_wait_for_completion() and completion_done() are safe to be called in
-IRQ or atomic context.
diff --git a/Documentation/scheduler/index.rst b/Documentation/scheduler/index.rst
new file mode 100644
index 000000000000..058be77a4c34
--- /dev/null
+++ b/Documentation/scheduler/index.rst
@@ -0,0 +1,29 @@
+:orphan:
+
+===============
+Linux Scheduler
+===============
+
+.. toctree::
+    :maxdepth: 1
+
+
+    completion
+    sched-arch
+    sched-bwc
+    sched-deadline
+    sched-design-CFS
+    sched-domains
+    sched-energy
+    sched-nice-design
+    sched-rt-group
+    sched-stats
+
+    text_files
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/scheduler/sched-arch.rst b/Documentation/scheduler/sched-arch.rst
new file mode 100644
index 000000000000..0eaec669790a
--- /dev/null
+++ b/Documentation/scheduler/sched-arch.rst
@@ -0,0 +1,76 @@
+=================================================================
+CPU Scheduler implementation hints for architecture specific code
+=================================================================
+
+	Nick Piggin, 2005
+
+Context switch
+==============
+1. Runqueue locking
+By default, the switch_to arch function is called with the runqueue
+locked. This is usually not a problem unless switch_to may need to
+take the runqueue lock. This is usually due to a wake up operation in
+the context switch. See arch/ia64/include/asm/switch_to.h for an example.
+
+To request the scheduler call switch_to with the runqueue unlocked,
+you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
+(typically the one where switch_to is defined).
+
+Unlocked context switches introduce only a very minor performance
+penalty to the core scheduler implementation in the CONFIG_SMP case.
+
+CPU idle
+========
+Your cpu_idle routines need to obey the following rules:
+
+1. Preempt should now disabled over idle routines. Should only
+   be enabled to call schedule() then disabled again.
+
+2. need_resched/TIF_NEED_RESCHED is only ever set, and will never
+   be cleared until the running task has called schedule(). Idle
+   threads need only ever query need_resched, and may never set or
+   clear it.
+
+3. When cpu_idle finds (need_resched() == 'true'), it should call
+   schedule(). It should not call schedule() otherwise.
+
+4. The only time interrupts need to be disabled when checking
+   need_resched is if we are about to sleep the processor until
+   the next interrupt (this doesn't provide any protection of
+   need_resched, it prevents losing an interrupt):
+
+	4a. Common problem with this type of sleep appears to be::
+
+	        local_irq_disable();
+	        if (!need_resched()) {
+	                local_irq_enable();
+	                *** resched interrupt arrives here ***
+	                __asm__("sleep until next interrupt");
+	        }
+
+5. TIF_POLLING_NRFLAG can be set by idle routines that do not
+   need an interrupt to wake them up when need_resched goes high.
+   In other words, they must be periodically polling need_resched,
+   although it may be reasonable to do some background work or enter
+   a low CPU priority.
+
+      - 5a. If TIF_POLLING_NRFLAG is set, and we do decide to enter
+	an interrupt sleep, it needs to be cleared then a memory
+	barrier issued (followed by a test of need_resched with
+	interrupts disabled, as explained in 3).
+
+arch/x86/kernel/process.c has examples of both polling and
+sleeping idle functions.
+
+
+Possible arch/ problems
+=======================
+
+Possible arch problems I found (and either tried to fix or didn't):
+
+ia64 - is safe_halt call racy vs interrupts? (does it sleep?) (See #4a)
+
+sh64 - Is sleeping racy vs interrupts? (See #4a)
+
+sparc - IRQs on at this point(?), change local_irq_save to _disable.
+      - TODO: needs secondary CPUs to disable preempt (See #1)
diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.txt
deleted file mode 100644
index a2f27bbf2cba..000000000000
--- a/Documentation/scheduler/sched-arch.txt
+++ /dev/null
@@ -1,74 +0,0 @@
-	CPU Scheduler implementation hints for architecture specific code
-
-	Nick Piggin, 2005
-
-Context switch
-==============
-1. Runqueue locking
-By default, the switch_to arch function is called with the runqueue
-locked. This is usually not a problem unless switch_to may need to
-take the runqueue lock. This is usually due to a wake up operation in
-the context switch. See arch/ia64/include/asm/switch_to.h for an example.
-
-To request the scheduler call switch_to with the runqueue unlocked,
-you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
-(typically the one where switch_to is defined).
-
-Unlocked context switches introduce only a very minor performance
-penalty to the core scheduler implementation in the CONFIG_SMP case.
-
-CPU idle
-========
-Your cpu_idle routines need to obey the following rules:
-
-1. Preempt should now disabled over idle routines. Should only
-   be enabled to call schedule() then disabled again.
-
-2. need_resched/TIF_NEED_RESCHED is only ever set, and will never
-   be cleared until the running task has called schedule(). Idle
-   threads need only ever query need_resched, and may never set or
-   clear it.
-
-3. When cpu_idle finds (need_resched() == 'true'), it should call
-   schedule(). It should not call schedule() otherwise.
-
-4. The only time interrupts need to be disabled when checking
-   need_resched is if we are about to sleep the processor until
-   the next interrupt (this doesn't provide any protection of
-   need_resched, it prevents losing an interrupt).
-
-	4a. Common problem with this type of sleep appears to be:
-	        local_irq_disable();
-	        if (!need_resched()) {
-	                local_irq_enable();
-	                *** resched interrupt arrives here ***
-	                __asm__("sleep until next interrupt");
-	        }
-
-5. TIF_POLLING_NRFLAG can be set by idle routines that do not
-   need an interrupt to wake them up when need_resched goes high.
-   In other words, they must be periodically polling need_resched,
-   although it may be reasonable to do some background work or enter
-   a low CPU priority.
-
-   	5a. If TIF_POLLING_NRFLAG is set, and we do decide to enter
-	    an interrupt sleep, it needs to be cleared then a memory
-	    barrier issued (followed by a test of need_resched with
-	    interrupts disabled, as explained in 3).
-
-arch/x86/kernel/process.c has examples of both polling and
-sleeping idle functions.
-
-
-Possible arch/ problems
-=======================
-
-Possible arch problems I found (and either tried to fix or didn't):
-
-ia64 - is safe_halt call racy vs interrupts? (does it sleep?) (See #4a)
-
-sh64 - Is sleeping racy vs interrupts? (See #4a)
-
-sparc - IRQs on at this point(?), change local_irq_save to _disable.
-      - TODO: needs secondary CPUs to disable preempt (See #1)
-
diff --git a/Documentation/scheduler/sched-bwc.rst b/Documentation/scheduler/sched-bwc.rst
new file mode 100644
index 000000000000..3a9064219656
--- /dev/null
+++ b/Documentation/scheduler/sched-bwc.rst
@@ -0,0 +1,128 @@
+=====================
+CFS Bandwidth Control
+=====================
+
+[ This document only discusses CPU bandwidth control for SCHED_NORMAL.
+  The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.rst ]
+
+CFS bandwidth control is a CONFIG_FAIR_GROUP_SCHED extension which allows the
+specification of the maximum CPU bandwidth available to a group or hierarchy.
+
+The bandwidth allowed for a group is specified using a quota and period. Within
+each given "period" (microseconds), a group is allowed to consume only up to
+"quota" microseconds of CPU time.  When the CPU bandwidth consumption of a
+group exceeds this limit (for that period), the tasks belonging to its
+hierarchy will be throttled and are not allowed to run again until the next
+period.
+
+A group's unused runtime is globally tracked, being refreshed with quota units
+above at each period boundary.  As threads consume this bandwidth it is
+transferred to cpu-local "silos" on a demand basis.  The amount transferred
+within each of these updates is tunable and described as the "slice".
+
+Management
+----------
+Quota and period are managed within the cpu subsystem via cgroupfs.
+
+cpu.cfs_quota_us: the total available run-time within a period (in microseconds)
+cpu.cfs_period_us: the length of a period (in microseconds)
+cpu.stat: exports throttling statistics [explained further below]
+
+The default values are::
+
+	cpu.cfs_period_us=100ms
+	cpu.cfs_quota=-1
+
+A value of -1 for cpu.cfs_quota_us indicates that the group does not have any
+bandwidth restriction in place, such a group is described as an unconstrained
+bandwidth group.  This represents the traditional work-conserving behavior for
+CFS.
+
+Writing any (valid) positive value(s) will enact the specified bandwidth limit.
+The minimum quota allowed for the quota or period is 1ms.  There is also an
+upper bound on the period length of 1s.  Additional restrictions exist when
+bandwidth limits are used in a hierarchical fashion, these are explained in
+more detail below.
+
+Writing any negative value to cpu.cfs_quota_us will remove the bandwidth limit
+and return the group to an unconstrained state once more.
+
+Any updates to a group's bandwidth specification will result in it becoming
+unthrottled if it is in a constrained state.
+
+System wide settings
+--------------------
+For efficiency run-time is transferred between the global pool and CPU local
+"silos" in a batch fashion.  This greatly reduces global accounting pressure
+on large systems.  The amount transferred each time such an update is required
+is described as the "slice".
+
+This is tunable via procfs::
+
+	/proc/sys/kernel/sched_cfs_bandwidth_slice_us (default=5ms)
+
+Larger slice values will reduce transfer overheads, while smaller values allow
+for more fine-grained consumption.
+
+Statistics
+----------
+A group's bandwidth statistics are exported via 3 fields in cpu.stat.
+
+cpu.stat:
+
+- nr_periods: Number of enforcement intervals that have elapsed.
+- nr_throttled: Number of times the group has been throttled/limited.
+- throttled_time: The total time duration (in nanoseconds) for which entities
+  of the group have been throttled.
+
+This interface is read-only.
+
+Hierarchical considerations
+---------------------------
+The interface enforces that an individual entity's bandwidth is always
+attainable, that is: max(c_i) <= C. However, over-subscription in the
+aggregate case is explicitly allowed to enable work-conserving semantics
+within a hierarchy:
+
+  e.g. \Sum (c_i) may exceed C
+
+[ Where C is the parent's bandwidth, and c_i its children ]
+
+
+There are two ways in which a group may become throttled:
+
+	a. it fully consumes its own quota within a period
+	b. a parent's quota is fully consumed within its period
+
+In case b) above, even though the child may have runtime remaining it will not
+be allowed to until the parent's runtime is refreshed.
+
+Examples
+--------
+1. Limit a group to 1 CPU worth of runtime::
+
+	If period is 250ms and quota is also 250ms, the group will get
+	1 CPU worth of runtime every 250ms.
+
+	# echo 250000 > cpu.cfs_quota_us /* quota = 250ms */
+	# echo 250000 > cpu.cfs_period_us /* period = 250ms */
+
+2. Limit a group to 2 CPUs worth of runtime on a multi-CPU machine
+
+   With 500ms period and 1000ms quota, the group can get 2 CPUs worth of
+   runtime every 500ms::
+
+	# echo 1000000 > cpu.cfs_quota_us /* quota = 1000ms */
+	# echo 500000 > cpu.cfs_period_us /* period = 500ms */
+
+	The larger period here allows for increased burst capacity.
+
+3. Limit a group to 20% of 1 CPU.
+
+   With 50ms period, 10ms quota will be equivalent to 20% of 1 CPU::
+
+	# echo 10000 > cpu.cfs_quota_us /* quota = 10ms */
+	# echo 50000 > cpu.cfs_period_us /* period = 50ms */
+
+   By using a small period here we are ensuring a consistent latency
+   response at the expense of burst capacity.
diff --git a/Documentation/scheduler/sched-bwc.txt b/Documentation/scheduler/sched-bwc.txt
deleted file mode 100644
index f6b1873f68ab..000000000000
--- a/Documentation/scheduler/sched-bwc.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-CFS Bandwidth Control
-=====================
-
-[ This document only discusses CPU bandwidth control for SCHED_NORMAL.
-  The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.txt ]
-
-CFS bandwidth control is a CONFIG_FAIR_GROUP_SCHED extension which allows the
-specification of the maximum CPU bandwidth available to a group or hierarchy.
-
-The bandwidth allowed for a group is specified using a quota and period. Within
-each given "period" (microseconds), a group is allowed to consume only up to
-"quota" microseconds of CPU time.  When the CPU bandwidth consumption of a
-group exceeds this limit (for that period), the tasks belonging to its
-hierarchy will be throttled and are not allowed to run again until the next
-period.
-
-A group's unused runtime is globally tracked, being refreshed with quota units
-above at each period boundary.  As threads consume this bandwidth it is
-transferred to cpu-local "silos" on a demand basis.  The amount transferred
-within each of these updates is tunable and described as the "slice".
-
-Management
-----------
-Quota and period are managed within the cpu subsystem via cgroupfs.
-
-cpu.cfs_quota_us: the total available run-time within a period (in microseconds)
-cpu.cfs_period_us: the length of a period (in microseconds)
-cpu.stat: exports throttling statistics [explained further below]
-
-The default values are:
-	cpu.cfs_period_us=100ms
-	cpu.cfs_quota=-1
-
-A value of -1 for cpu.cfs_quota_us indicates that the group does not have any
-bandwidth restriction in place, such a group is described as an unconstrained
-bandwidth group.  This represents the traditional work-conserving behavior for
-CFS.
-
-Writing any (valid) positive value(s) will enact the specified bandwidth limit.
-The minimum quota allowed for the quota or period is 1ms.  There is also an
-upper bound on the period length of 1s.  Additional restrictions exist when
-bandwidth limits are used in a hierarchical fashion, these are explained in
-more detail below.
-
-Writing any negative value to cpu.cfs_quota_us will remove the bandwidth limit
-and return the group to an unconstrained state once more.
-
-Any updates to a group's bandwidth specification will result in it becoming
-unthrottled if it is in a constrained state.
-
-System wide settings
---------------------
-For efficiency run-time is transferred between the global pool and CPU local
-"silos" in a batch fashion.  This greatly reduces global accounting pressure
-on large systems.  The amount transferred each time such an update is required
-is described as the "slice".
-
-This is tunable via procfs:
-	/proc/sys/kernel/sched_cfs_bandwidth_slice_us (default=5ms)
-
-Larger slice values will reduce transfer overheads, while smaller values allow
-for more fine-grained consumption.
-
-Statistics
-----------
-A group's bandwidth statistics are exported via 3 fields in cpu.stat.
-
-cpu.stat:
-- nr_periods: Number of enforcement intervals that have elapsed.
-- nr_throttled: Number of times the group has been throttled/limited.
-- throttled_time: The total time duration (in nanoseconds) for which entities
-  of the group have been throttled.
-
-This interface is read-only.
-
-Hierarchical considerations
----------------------------
-The interface enforces that an individual entity's bandwidth is always
-attainable, that is: max(c_i) <= C. However, over-subscription in the
-aggregate case is explicitly allowed to enable work-conserving semantics
-within a hierarchy.
-  e.g. \Sum (c_i) may exceed C
-[ Where C is the parent's bandwidth, and c_i its children ]
-
-
-There are two ways in which a group may become throttled:
-	a. it fully consumes its own quota within a period
-	b. a parent's quota is fully consumed within its period
-
-In case b) above, even though the child may have runtime remaining it will not
-be allowed to until the parent's runtime is refreshed.
-
-Examples
---------
-1. Limit a group to 1 CPU worth of runtime.
-
-	If period is 250ms and quota is also 250ms, the group will get
-	1 CPU worth of runtime every 250ms.
-
-	# echo 250000 > cpu.cfs_quota_us /* quota = 250ms */
-	# echo 250000 > cpu.cfs_period_us /* period = 250ms */
-
-2. Limit a group to 2 CPUs worth of runtime on a multi-CPU machine.
-
-	With 500ms period and 1000ms quota, the group can get 2 CPUs worth of
-	runtime every 500ms.
-
-	# echo 1000000 > cpu.cfs_quota_us /* quota = 1000ms */
-	# echo 500000 > cpu.cfs_period_us /* period = 500ms */
-
-	The larger period here allows for increased burst capacity.
-
-3. Limit a group to 20% of 1 CPU.
-
-	With 50ms period, 10ms quota will be equivalent to 20% of 1 CPU.
-
-	# echo 10000 > cpu.cfs_quota_us /* quota = 10ms */
-	# echo 50000 > cpu.cfs_period_us /* period = 50ms */
-
-	By using a small period here we are ensuring a consistent latency
-	response at the expense of burst capacity.
-
diff --git a/Documentation/scheduler/sched-deadline.rst b/Documentation/scheduler/sched-deadline.rst
new file mode 100644
index 000000000000..873fb2775ca6
--- /dev/null
+++ b/Documentation/scheduler/sched-deadline.rst
@@ -0,0 +1,888 @@
+========================
+Deadline Task Scheduling
+========================
+
+.. CONTENTS
+
+    0. WARNING
+    1. Overview
+    2. Scheduling algorithm
+      2.1 Main algorithm
+      2.2 Bandwidth reclaiming
+    3. Scheduling Real-Time Tasks
+      3.1 Definitions
+      3.2 Schedulability Analysis for Uniprocessor Systems
+      3.3 Schedulability Analysis for Multiprocessor Systems
+      3.4 Relationship with SCHED_DEADLINE Parameters
+    4. Bandwidth management
+      4.1 System-wide settings
+      4.2 Task interface
+      4.3 Default behavior
+      4.4 Behavior of sched_yield()
+    5. Tasks CPU affinity
+      5.1 SCHED_DEADLINE and cpusets HOWTO
+    6. Future plans
+    A. Test suite
+    B. Minimal main()
+
+
+0. WARNING
+==========
+
+ Fiddling with these settings can result in an unpredictable or even unstable
+ system behavior. As for -rt (group) scheduling, it is assumed that root users
+ know what they're doing.
+
+
+1. Overview
+===========
+
+ The SCHED_DEADLINE policy contained inside the sched_dl scheduling class is
+ basically an implementation of the Earliest Deadline First (EDF) scheduling
+ algorithm, augmented with a mechanism (called Constant Bandwidth Server, CBS)
+ that makes it possible to isolate the behavior of tasks between each other.
+
+
+2. Scheduling algorithm
+=======================
+
+2.1 Main algorithm
+------------------
+
+ SCHED_DEADLINE [18] uses three parameters, named "runtime", "period", and
+ "deadline", to schedule tasks. A SCHED_DEADLINE task should receive
+ "runtime" microseconds of execution time every "period" microseconds, and
+ these "runtime" microseconds are available within "deadline" microseconds
+ from the beginning of the period.  In order to implement this behavior,
+ every time the task wakes up, the scheduler computes a "scheduling deadline"
+ consistent with the guarantee (using the CBS[2,3] algorithm). Tasks are then
+ scheduled using EDF[1] on these scheduling deadlines (the task with the
+ earliest scheduling deadline is selected for execution). Notice that the
+ task actually receives "runtime" time units within "deadline" if a proper
+ "admission control" strategy (see Section "4. Bandwidth management") is used
+ (clearly, if the system is overloaded this guarantee cannot be respected).
+
+ Summing up, the CBS[2,3] algorithm assigns scheduling deadlines to tasks so
+ that each task runs for at most its runtime every period, avoiding any
+ interference between different tasks (bandwidth isolation), while the EDF[1]
+ algorithm selects the task with the earliest scheduling deadline as the one
+ to be executed next. Thanks to this feature, tasks that do not strictly comply
+ with the "traditional" real-time task model (see Section 3) can effectively
+ use the new policy.
+
+ In more details, the CBS algorithm assigns scheduling deadlines to
+ tasks in the following way:
+
+  - Each SCHED_DEADLINE task is characterized by the "runtime",
+    "deadline", and "period" parameters;
+
+  - The state of the task is described by a "scheduling deadline", and
+    a "remaining runtime". These two parameters are initially set to 0;
+
+  - When a SCHED_DEADLINE task wakes up (becomes ready for execution),
+    the scheduler checks if::
+
+                 remaining runtime                  runtime
+        ----------------------------------    >    ---------
+        scheduling deadline - current time           period
+
+    then, if the scheduling deadline is smaller than the current time, or
+    this condition is verified, the scheduling deadline and the
+    remaining runtime are re-initialized as
+
+         scheduling deadline = current time + deadline
+         remaining runtime = runtime
+
+    otherwise, the scheduling deadline and the remaining runtime are
+    left unchanged;
+
+  - When a SCHED_DEADLINE task executes for an amount of time t, its
+    remaining runtime is decreased as::
+
+         remaining runtime = remaining runtime - t
+
+    (technically, the runtime is decreased at every tick, or when the
+    task is descheduled / preempted);
+
+  - When the remaining runtime becomes less or equal than 0, the task is
+    said to be "throttled" (also known as "depleted" in real-time literature)
+    and cannot be scheduled until its scheduling deadline. The "replenishment
+    time" for this task (see next item) is set to be equal to the current
+    value of the scheduling deadline;
+
+  - When the current time is equal to the replenishment time of a
+    throttled task, the scheduling deadline and the remaining runtime are
+    updated as::
+
+         scheduling deadline = scheduling deadline + period
+         remaining runtime = remaining runtime + runtime
+
+ The SCHED_FLAG_DL_OVERRUN flag in sched_attr's sched_flags field allows a task
+ to get informed about runtime overruns through the delivery of SIGXCPU
+ signals.
+
+
+2.2 Bandwidth reclaiming
+------------------------
+
+ Bandwidth reclaiming for deadline tasks is based on the GRUB (Greedy
+ Reclamation of Unused Bandwidth) algorithm [15, 16, 17] and it is enabled
+ when flag SCHED_FLAG_RECLAIM is set.
+
+ The following diagram illustrates the state names for tasks handled by GRUB::
+
+                             ------------
+                 (d)        |   Active   |
+              ------------->|            |
+              |             | Contending |
+              |              ------------
+              |                A      |
+          ----------           |      |
+         |          |          |      |
+         | Inactive |          |(b)   | (a)
+         |          |          |      |
+          ----------           |      |
+              A                |      V
+              |              ------------
+              |             |   Active   |
+              --------------|     Non    |
+                 (c)        | Contending |
+                             ------------
+
+ A task can be in one of the following states:
+
+  - ActiveContending: if it is ready for execution (or executing);
+
+  - ActiveNonContending: if it just blocked and has not yet surpassed the 0-lag
+    time;
+
+  - Inactive: if it is blocked and has surpassed the 0-lag time.
+
+ State transitions:
+
+  (a) When a task blocks, it does not become immediately inactive since its
+      bandwidth cannot be immediately reclaimed without breaking the
+      real-time guarantees. It therefore enters a transitional state called
+      ActiveNonContending. The scheduler arms the "inactive timer" to fire at
+      the 0-lag time, when the task's bandwidth can be reclaimed without
+      breaking the real-time guarantees.
+
+      The 0-lag time for a task entering the ActiveNonContending state is
+      computed as::
+
+                        (runtime * dl_period)
+             deadline - ---------------------
+                             dl_runtime
+
+      where runtime is the remaining runtime, while dl_runtime and dl_period
+      are the reservation parameters.
+
+  (b) If the task wakes up before the inactive timer fires, the task re-enters
+      the ActiveContending state and the "inactive timer" is canceled.
+      In addition, if the task wakes up on a different runqueue, then
+      the task's utilization must be removed from the previous runqueue's active
+      utilization and must be added to the new runqueue's active utilization.
+      In order to avoid races between a task waking up on a runqueue while the
+      "inactive timer" is running on a different CPU, the "dl_non_contending"
+      flag is used to indicate that a task is not on a runqueue but is active
+      (so, the flag is set when the task blocks and is cleared when the
+      "inactive timer" fires or when the task  wakes up).
+
+  (c) When the "inactive timer" fires, the task enters the Inactive state and
+      its utilization is removed from the runqueue's active utilization.
+
+  (d) When an inactive task wakes up, it enters the ActiveContending state and
+      its utilization is added to the active utilization of the runqueue where
+      it has been enqueued.
+
+ For each runqueue, the algorithm GRUB keeps track of two different bandwidths:
+
+  - Active bandwidth (running_bw): this is the sum of the bandwidths of all
+    tasks in active state (i.e., ActiveContending or ActiveNonContending);
+
+  - Total bandwidth (this_bw): this is the sum of all tasks "belonging" to the
+    runqueue, including the tasks in Inactive state.
+
+
+ The algorithm reclaims the bandwidth of the tasks in Inactive state.
+ It does so by decrementing the runtime of the executing task Ti at a pace equal
+ to
+
+           dq = -max{ Ui / Umax, (1 - Uinact - Uextra) } dt
+
+ where:
+
+  - Ui is the bandwidth of task Ti;
+  - Umax is the maximum reclaimable utilization (subjected to RT throttling
+    limits);
+  - Uinact is the (per runqueue) inactive utilization, computed as
+    (this_bq - running_bw);
+  - Uextra is the (per runqueue) extra reclaimable utilization
+    (subjected to RT throttling limits).
+
+
+ Let's now see a trivial example of two deadline tasks with runtime equal
+ to 4 and period equal to 8 (i.e., bandwidth equal to 0.5)::
+
+         A            Task T1
+         |
+         |                               |
+         |                               |
+         |--------                       |----
+         |       |                       V
+         |---|---|---|---|---|---|---|---|--------->t
+         0   1   2   3   4   5   6   7   8
+
+
+         A            Task T2
+         |
+         |                               |
+         |                               |
+         |       ------------------------|
+         |       |                       V
+         |---|---|---|---|---|---|---|---|--------->t
+         0   1   2   3   4   5   6   7   8
+
+
+         A            running_bw
+         |
+       1 -----------------               ------
+         |               |               |
+      0.5-               -----------------
+         |                               |
+         |---|---|---|---|---|---|---|---|--------->t
+         0   1   2   3   4   5   6   7   8
+
+
+  - Time t = 0:
+
+    Both tasks are ready for execution and therefore in ActiveContending state.
+    Suppose Task T1 is the first task to start execution.
+    Since there are no inactive tasks, its runtime is decreased as dq = -1 dt.
+
+  - Time t = 2:
+
+    Suppose that task T1 blocks
+    Task T1 therefore enters the ActiveNonContending state. Since its remaining
+    runtime is equal to 2, its 0-lag time is equal to t = 4.
+    Task T2 start execution, with runtime still decreased as dq = -1 dt since
+    there are no inactive tasks.
+
+  - Time t = 4:
+
+    This is the 0-lag time for Task T1. Since it didn't woken up in the
+    meantime, it enters the Inactive state. Its bandwidth is removed from
+    running_bw.
+    Task T2 continues its execution. However, its runtime is now decreased as
+    dq = - 0.5 dt because Uinact = 0.5.
+    Task T2 therefore reclaims the bandwidth unused by Task T1.
+
+  - Time t = 8:
+
+    Task T1 wakes up. It enters the ActiveContending state again, and the
+    running_bw is incremented.
+
+
+2.3 Energy-aware scheduling
+---------------------------
+
+ When cpufreq's schedutil governor is selected, SCHED_DEADLINE implements the
+ GRUB-PA [19] algorithm, reducing the CPU operating frequency to the minimum
+ value that still allows to meet the deadlines. This behavior is currently
+ implemented only for ARM architectures.
+
+ A particular care must be taken in case the time needed for changing frequency
+ is of the same order of magnitude of the reservation period. In such cases,
+ setting a fixed CPU frequency results in a lower amount of deadline misses.
+
+
+3. Scheduling Real-Time Tasks
+=============================
+
+
+
+ ..  BIG FAT WARNING ******************************************************
+
+ .. warning::
+
+   This section contains a (not-thorough) summary on classical deadline
+   scheduling theory, and how it applies to SCHED_DEADLINE.
+   The reader can "safely" skip to Section 4 if only interested in seeing
+   how the scheduling policy can be used. Anyway, we strongly recommend
+   to come back here and continue reading (once the urge for testing is
+   satisfied :P) to be sure of fully understanding all technical details.
+
+ .. ************************************************************************
+
+ There are no limitations on what kind of task can exploit this new
+ scheduling discipline, even if it must be said that it is particularly
+ suited for periodic or sporadic real-time tasks that need guarantees on their
+ timing behavior, e.g., multimedia, streaming, control applications, etc.
+
+3.1 Definitions
+------------------------
+
+ A typical real-time task is composed of a repetition of computation phases
+ (task instances, or jobs) which are activated on a periodic or sporadic
+ fashion.
+ Each job J_j (where J_j is the j^th job of the task) is characterized by an
+ arrival time r_j (the time when the job starts), an amount of computation
+ time c_j needed to finish the job, and a job absolute deadline d_j, which
+ is the time within which the job should be finished. The maximum execution
+ time max{c_j} is called "Worst Case Execution Time" (WCET) for the task.
+ A real-time task can be periodic with period P if r_{j+1} = r_j + P, or
+ sporadic with minimum inter-arrival time P is r_{j+1} >= r_j + P. Finally,
+ d_j = r_j + D, where D is the task's relative deadline.
+ Summing up, a real-time task can be described as
+
+	Task = (WCET, D, P)
+
+ The utilization of a real-time task is defined as the ratio between its
+ WCET and its period (or minimum inter-arrival time), and represents
+ the fraction of CPU time needed to execute the task.
+
+ If the total utilization U=sum(WCET_i/P_i) is larger than M (with M equal
+ to the number of CPUs), then the scheduler is unable to respect all the
+ deadlines.
+ Note that total utilization is defined as the sum of the utilizations
+ WCET_i/P_i over all the real-time tasks in the system. When considering
+ multiple real-time tasks, the parameters of the i-th task are indicated
+ with the "_i" suffix.
+ Moreover, if the total utilization is larger than M, then we risk starving
+ non- real-time tasks by real-time tasks.
+ If, instead, the total utilization is smaller than M, then non real-time
+ tasks will not be starved and the system might be able to respect all the
+ deadlines.
+ As a matter of fact, in this case it is possible to provide an upper bound
+ for tardiness (defined as the maximum between 0 and the difference
+ between the finishing time of a job and its absolute deadline).
+ More precisely, it can be proven that using a global EDF scheduler the
+ maximum tardiness of each task is smaller or equal than
+
+	((M − 1) · WCET_max − WCET_min)/(M − (M − 2) · U_max) + WCET_max
+
+ where WCET_max = max{WCET_i} is the maximum WCET, WCET_min=min{WCET_i}
+ is the minimum WCET, and U_max = max{WCET_i/P_i} is the maximum
+ utilization[12].
+
+3.2 Schedulability Analysis for Uniprocessor Systems
+----------------------------------------------------
+
+ If M=1 (uniprocessor system), or in case of partitioned scheduling (each
+ real-time task is statically assigned to one and only one CPU), it is
+ possible to formally check if all the deadlines are respected.
+ If D_i = P_i for all tasks, then EDF is able to respect all the deadlines
+ of all the tasks executing on a CPU if and only if the total utilization
+ of the tasks running on such a CPU is smaller or equal than 1.
+ If D_i != P_i for some task, then it is possible to define the density of
+ a task as WCET_i/min{D_i,P_i}, and EDF is able to respect all the deadlines
+ of all the tasks running on a CPU if the sum of the densities of the tasks
+ running on such a CPU is smaller or equal than 1:
+
+	sum(WCET_i / min{D_i, P_i}) <= 1
+
+ It is important to notice that this condition is only sufficient, and not
+ necessary: there are task sets that are schedulable, but do not respect the
+ condition. For example, consider the task set {Task_1,Task_2} composed by
+ Task_1=(50ms,50ms,100ms) and Task_2=(10ms,100ms,100ms).
+ EDF is clearly able to schedule the two tasks without missing any deadline
+ (Task_1 is scheduled as soon as it is released, and finishes just in time
+ to respect its deadline; Task_2 is scheduled immediately after Task_1, hence
+ its response time cannot be larger than 50ms + 10ms = 60ms) even if
+
+	50 / min{50,100} + 10 / min{100, 100} = 50 / 50 + 10 / 100 = 1.1
+
+ Of course it is possible to test the exact schedulability of tasks with
+ D_i != P_i (checking a condition that is both sufficient and necessary),
+ but this cannot be done by comparing the total utilization or density with
+ a constant. Instead, the so called "processor demand" approach can be used,
+ computing the total amount of CPU time h(t) needed by all the tasks to
+ respect all of their deadlines in a time interval of size t, and comparing
+ such a time with the interval size t. If h(t) is smaller than t (that is,
+ the amount of time needed by the tasks in a time interval of size t is
+ smaller than the size of the interval) for all the possible values of t, then
+ EDF is able to schedule the tasks respecting all of their deadlines. Since
+ performing this check for all possible values of t is impossible, it has been
+ proven[4,5,6] that it is sufficient to perform the test for values of t
+ between 0 and a maximum value L. The cited papers contain all of the
+ mathematical details and explain how to compute h(t) and L.
+ In any case, this kind of analysis is too complex as well as too
+ time-consuming to be performed on-line. Hence, as explained in Section
+ 4 Linux uses an admission test based on the tasks' utilizations.
+
+3.3 Schedulability Analysis for Multiprocessor Systems
+------------------------------------------------------
+
+ On multiprocessor systems with global EDF scheduling (non partitioned
+ systems), a sufficient test for schedulability can not be based on the
+ utilizations or densities: it can be shown that even if D_i = P_i task
+ sets with utilizations slightly larger than 1 can miss deadlines regardless
+ of the number of CPUs.
+
+ Consider a set {Task_1,...Task_{M+1}} of M+1 tasks on a system with M
+ CPUs, with the first task Task_1=(P,P,P) having period, relative deadline
+ and WCET equal to P. The remaining M tasks Task_i=(e,P-1,P-1) have an
+ arbitrarily small worst case execution time (indicated as "e" here) and a
+ period smaller than the one of the first task. Hence, if all the tasks
+ activate at the same time t, global EDF schedules these M tasks first
+ (because their absolute deadlines are equal to t + P - 1, hence they are
+ smaller than the absolute deadline of Task_1, which is t + P). As a
+ result, Task_1 can be scheduled only at time t + e, and will finish at
+ time t + e + P, after its absolute deadline. The total utilization of the
+ task set is U = M · e / (P - 1) + P / P = M · e / (P - 1) + 1, and for small
+ values of e this can become very close to 1. This is known as "Dhall's
+ effect"[7]. Note: the example in the original paper by Dhall has been
+ slightly simplified here (for example, Dhall more correctly computed
+ lim_{e->0}U).
+
+ More complex schedulability tests for global EDF have been developed in
+ real-time literature[8,9], but they are not based on a simple comparison
+ between total utilization (or density) and a fixed constant. If all tasks
+ have D_i = P_i, a sufficient schedulability condition can be expressed in
+ a simple way:
+
+	sum(WCET_i / P_i) <= M - (M - 1) · U_max
+
+ where U_max = max{WCET_i / P_i}[10]. Notice that for U_max = 1,
+ M - (M - 1) · U_max becomes M - M + 1 = 1 and this schedulability condition
+ just confirms the Dhall's effect. A more complete survey of the literature
+ about schedulability tests for multi-processor real-time scheduling can be
+ found in [11].
+
+ As seen, enforcing that the total utilization is smaller than M does not
+ guarantee that global EDF schedules the tasks without missing any deadline
+ (in other words, global EDF is not an optimal scheduling algorithm). However,
+ a total utilization smaller than M is enough to guarantee that non real-time
+ tasks are not starved and that the tardiness of real-time tasks has an upper
+ bound[12] (as previously noted). Different bounds on the maximum tardiness
+ experienced by real-time tasks have been developed in various papers[13,14],
+ but the theoretical result that is important for SCHED_DEADLINE is that if
+ the total utilization is smaller or equal than M then the response times of
+ the tasks are limited.
+
+3.4 Relationship with SCHED_DEADLINE Parameters
+-----------------------------------------------
+
+ Finally, it is important to understand the relationship between the
+ SCHED_DEADLINE scheduling parameters described in Section 2 (runtime,
+ deadline and period) and the real-time task parameters (WCET, D, P)
+ described in this section. Note that the tasks' temporal constraints are
+ represented by its absolute deadlines d_j = r_j + D described above, while
+ SCHED_DEADLINE schedules the tasks according to scheduling deadlines (see
+ Section 2).
+ If an admission test is used to guarantee that the scheduling deadlines
+ are respected, then SCHED_DEADLINE can be used to schedule real-time tasks
+ guaranteeing that all the jobs' deadlines of a task are respected.
+ In order to do this, a task must be scheduled by setting:
+
+  - runtime >= WCET
+  - deadline = D
+  - period <= P
+
+ IOW, if runtime >= WCET and if period is <= P, then the scheduling deadlines
+ and the absolute deadlines (d_j) coincide, so a proper admission control
+ allows to respect the jobs' absolute deadlines for this task (this is what is
+ called "hard schedulability property" and is an extension of Lemma 1 of [2]).
+ Notice that if runtime > deadline the admission control will surely reject
+ this task, as it is not possible to respect its temporal constraints.
+
+ References:
+
+  1 - C. L. Liu and J. W. Layland. Scheduling algorithms for multiprogram-
+      ming in a hard-real-time environment. Journal of the Association for
+      Computing Machinery, 20(1), 1973.
+  2 - L. Abeni , G. Buttazzo. Integrating Multimedia Applications in Hard
+      Real-Time Systems. Proceedings of the 19th IEEE Real-time Systems
+      Symposium, 1998. http://retis.sssup.it/~giorgio/paps/1998/rtss98-cbs.pdf
+  3 - L. Abeni. Server Mechanisms for Multimedia Applications. ReTiS Lab
+      Technical Report. http://disi.unitn.it/~abeni/tr-98-01.pdf
+  4 - J. Y. Leung and M.L. Merril. A Note on Preemptive Scheduling of
+      Periodic, Real-Time Tasks. Information Processing Letters, vol. 11,
+      no. 3, pp. 115-118, 1980.
+  5 - S. K. Baruah, A. K. Mok and L. E. Rosier. Preemptively Scheduling
+      Hard-Real-Time Sporadic Tasks on One Processor. Proceedings of the
+      11th IEEE Real-time Systems Symposium, 1990.
+  6 - S. K. Baruah, L. E. Rosier and R. R. Howell. Algorithms and Complexity
+      Concerning the Preemptive Scheduling of Periodic Real-Time tasks on
+      One Processor. Real-Time Systems Journal, vol. 4, no. 2, pp 301-324,
+      1990.
+  7 - S. J. Dhall and C. L. Liu. On a real-time scheduling problem. Operations
+      research, vol. 26, no. 1, pp 127-140, 1978.
+  8 - T. Baker. Multiprocessor EDF and Deadline Monotonic Schedulability
+      Analysis. Proceedings of the 24th IEEE Real-Time Systems Symposium, 2003.
+  9 - T. Baker. An Analysis of EDF Schedulability on a Multiprocessor.
+      IEEE Transactions on Parallel and Distributed Systems, vol. 16, no. 8,
+      pp 760-768, 2005.
+  10 - J. Goossens, S. Funk and S. Baruah, Priority-Driven Scheduling of
+       Periodic Task Systems on Multiprocessors. Real-Time Systems Journal,
+       vol. 25, no. 2–3, pp. 187–205, 2003.
+  11 - R. Davis and A. Burns. A Survey of Hard Real-Time Scheduling for
+       Multiprocessor Systems. ACM Computing Surveys, vol. 43, no. 4, 2011.
+       http://www-users.cs.york.ac.uk/~robdavis/papers/MPSurveyv5.0.pdf
+  12 - U. C. Devi and J. H. Anderson. Tardiness Bounds under Global EDF
+       Scheduling on a Multiprocessor. Real-Time Systems Journal, vol. 32,
+       no. 2, pp 133-189, 2008.
+  13 - P. Valente and G. Lipari. An Upper Bound to the Lateness of Soft
+       Real-Time Tasks Scheduled by EDF on Multiprocessors. Proceedings of
+       the 26th IEEE Real-Time Systems Symposium, 2005.
+  14 - J. Erickson, U. Devi and S. Baruah. Improved tardiness bounds for
+       Global EDF. Proceedings of the 22nd Euromicro Conference on
+       Real-Time Systems, 2010.
+  15 - G. Lipari, S. Baruah, Greedy reclamation of unused bandwidth in
+       constant-bandwidth servers, 12th IEEE Euromicro Conference on Real-Time
+       Systems, 2000.
+  16 - L. Abeni, J. Lelli, C. Scordino, L. Palopoli, Greedy CPU reclaiming for
+       SCHED DEADLINE. In Proceedings of the Real-Time Linux Workshop (RTLWS),
+       Dusseldorf, Germany, 2014.
+  17 - L. Abeni, G. Lipari, A. Parri, Y. Sun, Multicore CPU reclaiming: parallel
+       or sequential?. In Proceedings of the 31st Annual ACM Symposium on Applied
+       Computing, 2016.
+  18 - J. Lelli, C. Scordino, L. Abeni, D. Faggioli, Deadline scheduling in the
+       Linux kernel, Software: Practice and Experience, 46(6): 821-839, June
+       2016.
+  19 - C. Scordino, L. Abeni, J. Lelli, Energy-Aware Real-Time Scheduling in
+       the Linux Kernel, 33rd ACM/SIGAPP Symposium On Applied Computing (SAC
+       2018), Pau, France, April 2018.
+
+
+4. Bandwidth management
+=======================
+
+ As previously mentioned, in order for -deadline scheduling to be
+ effective and useful (that is, to be able to provide "runtime" time units
+ within "deadline"), it is important to have some method to keep the allocation
+ of the available fractions of CPU time to the various tasks under control.
+ This is usually called "admission control" and if it is not performed, then
+ no guarantee can be given on the actual scheduling of the -deadline tasks.
+
+ As already stated in Section 3, a necessary condition to be respected to
+ correctly schedule a set of real-time tasks is that the total utilization
+ is smaller than M. When talking about -deadline tasks, this requires that
+ the sum of the ratio between runtime and period for all tasks is smaller
+ than M. Notice that the ratio runtime/period is equivalent to the utilization
+ of a "traditional" real-time task, and is also often referred to as
+ "bandwidth".
+ The interface used to control the CPU bandwidth that can be allocated
+ to -deadline tasks is similar to the one already used for -rt
+ tasks with real-time group scheduling (a.k.a. RT-throttling - see
+ Documentation/scheduler/sched-rt-group.rst), and is based on readable/
+ writable control files located in procfs (for system wide settings).
+ Notice that per-group settings (controlled through cgroupfs) are still not
+ defined for -deadline tasks, because more discussion is needed in order to
+ figure out how we want to manage SCHED_DEADLINE bandwidth at the task group
+ level.
+
+ A main difference between deadline bandwidth management and RT-throttling
+ is that -deadline tasks have bandwidth on their own (while -rt ones don't!),
+ and thus we don't need a higher level throttling mechanism to enforce the
+ desired bandwidth. In other words, this means that interface parameters are
+ only used at admission control time (i.e., when the user calls
+ sched_setattr()). Scheduling is then performed considering actual tasks'
+ parameters, so that CPU bandwidth is allocated to SCHED_DEADLINE tasks
+ respecting their needs in terms of granularity. Therefore, using this simple
+ interface we can put a cap on total utilization of -deadline tasks (i.e.,
+ \Sum (runtime_i / period_i) < global_dl_utilization_cap).
+
+4.1 System wide settings
+------------------------
+
+ The system wide settings are configured under the /proc virtual file system.
+
+ For now the -rt knobs are used for -deadline admission control and the
+ -deadline runtime is accounted against the -rt runtime. We realize that this
+ isn't entirely desirable; however, it is better to have a small interface for
+ now, and be able to change it easily later. The ideal situation (see 5.) is to
+ run -rt tasks from a -deadline server; in which case the -rt bandwidth is a
+ direct subset of dl_bw.
+
+ This means that, for a root_domain comprising M CPUs, -deadline tasks
+ can be created while the sum of their bandwidths stays below:
+
+   M * (sched_rt_runtime_us / sched_rt_period_us)
+
+ It is also possible to disable this bandwidth management logic, and
+ be thus free of oversubscribing the system up to any arbitrary level.
+ This is done by writing -1 in /proc/sys/kernel/sched_rt_runtime_us.
+
+
+4.2 Task interface
+------------------
+
+ Specifying a periodic/sporadic task that executes for a given amount of
+ runtime at each instance, and that is scheduled according to the urgency of
+ its own timing constraints needs, in general, a way of declaring:
+
+  - a (maximum/typical) instance execution time,
+  - a minimum interval between consecutive instances,
+  - a time constraint by which each instance must be completed.
+
+ Therefore:
+
+  * a new struct sched_attr, containing all the necessary fields is
+    provided;
+  * the new scheduling related syscalls that manipulate it, i.e.,
+    sched_setattr() and sched_getattr() are implemented.
+
+ For debugging purposes, the leftover runtime and absolute deadline of a
+ SCHED_DEADLINE task can be retrieved through /proc/<pid>/sched (entries
+ dl.runtime and dl.deadline, both values in ns). A programmatic way to
+ retrieve these values from production code is under discussion.
+
+
+4.3 Default behavior
+---------------------
+
+ The default value for SCHED_DEADLINE bandwidth is to have rt_runtime equal to
+ 950000. With rt_period equal to 1000000, by default, it means that -deadline
+ tasks can use at most 95%, multiplied by the number of CPUs that compose the
+ root_domain, for each root_domain.
+ This means that non -deadline tasks will receive at least 5% of the CPU time,
+ and that -deadline tasks will receive their runtime with a guaranteed
+ worst-case delay respect to the "deadline" parameter. If "deadline" = "period"
+ and the cpuset mechanism is used to implement partitioned scheduling (see
+ Section 5), then this simple setting of the bandwidth management is able to
+ deterministically guarantee that -deadline tasks will receive their runtime
+ in a period.
+
+ Finally, notice that in order not to jeopardize the admission control a
+ -deadline task cannot fork.
+
+
+4.4 Behavior of sched_yield()
+-----------------------------
+
+ When a SCHED_DEADLINE task calls sched_yield(), it gives up its
+ remaining runtime and is immediately throttled, until the next
+ period, when its runtime will be replenished (a special flag
+ dl_yielded is set and used to handle correctly throttling and runtime
+ replenishment after a call to sched_yield()).
+
+ This behavior of sched_yield() allows the task to wake-up exactly at
+ the beginning of the next period. Also, this may be useful in the
+ future with bandwidth reclaiming mechanisms, where sched_yield() will
+ make the leftoever runtime available for reclamation by other
+ SCHED_DEADLINE tasks.
+
+
+5. Tasks CPU affinity
+=====================
+
+ -deadline tasks cannot have an affinity mask smaller that the entire
+ root_domain they are created on. However, affinities can be specified
+ through the cpuset facility (Documentation/cgroup-v1/cpusets.txt).
+
+5.1 SCHED_DEADLINE and cpusets HOWTO
+------------------------------------
+
+ An example of a simple configuration (pin a -deadline task to CPU0)
+ follows (rt-app is used to create a -deadline task)::
+
+   mkdir /dev/cpuset
+   mount -t cgroup -o cpuset cpuset /dev/cpuset
+   cd /dev/cpuset
+   mkdir cpu0
+   echo 0 > cpu0/cpuset.cpus
+   echo 0 > cpu0/cpuset.mems
+   echo 1 > cpuset.cpu_exclusive
+   echo 0 > cpuset.sched_load_balance
+   echo 1 > cpu0/cpuset.cpu_exclusive
+   echo 1 > cpu0/cpuset.mem_exclusive
+   echo $$ > cpu0/tasks
+   rt-app -t 100000:10000:d:0 -D5 # it is now actually superfluous to specify
+				  # task affinity
+
+6. Future plans
+===============
+
+ Still missing:
+
+  - programmatic way to retrieve current runtime and absolute deadline
+  - refinements to deadline inheritance, especially regarding the possibility
+    of retaining bandwidth isolation among non-interacting tasks. This is
+    being studied from both theoretical and practical points of view, and
+    hopefully we should be able to produce some demonstrative code soon;
+  - (c)group based bandwidth management, and maybe scheduling;
+  - access control for non-root users (and related security concerns to
+    address), which is the best way to allow unprivileged use of the mechanisms
+    and how to prevent non-root users "cheat" the system?
+
+ As already discussed, we are planning also to merge this work with the EDF
+ throttling patches [https://lkml.org/lkml/2010/2/23/239] but we still are in
+ the preliminary phases of the merge and we really seek feedback that would
+ help us decide on the direction it should take.
+
+Appendix A. Test suite
+======================
+
+ The SCHED_DEADLINE policy can be easily tested using two applications that
+ are part of a wider Linux Scheduler validation suite. The suite is
+ available as a GitHub repository: https://github.com/scheduler-tools.
+
+ The first testing application is called rt-app and can be used to
+ start multiple threads with specific parameters. rt-app supports
+ SCHED_{OTHER,FIFO,RR,DEADLINE} scheduling policies and their related
+ parameters (e.g., niceness, priority, runtime/deadline/period). rt-app
+ is a valuable tool, as it can be used to synthetically recreate certain
+ workloads (maybe mimicking real use-cases) and evaluate how the scheduler
+ behaves under such workloads. In this way, results are easily reproducible.
+ rt-app is available at: https://github.com/scheduler-tools/rt-app.
+
+ Thread parameters can be specified from the command line, with something like
+ this::
+
+  # rt-app -t 100000:10000:d -t 150000:20000:f:10 -D5
+
+ The above creates 2 threads. The first one, scheduled by SCHED_DEADLINE,
+ executes for 10ms every 100ms. The second one, scheduled at SCHED_FIFO
+ priority 10, executes for 20ms every 150ms. The test will run for a total
+ of 5 seconds.
+
+ More interestingly, configurations can be described with a json file that
+ can be passed as input to rt-app with something like this::
+
+  # rt-app my_config.json
+
+ The parameters that can be specified with the second method are a superset
+ of the command line options. Please refer to rt-app documentation for more
+ details (`<rt-app-sources>/doc/*.json`).
+
+ The second testing application is a modification of schedtool, called
+ schedtool-dl, which can be used to setup SCHED_DEADLINE parameters for a
+ certain pid/application. schedtool-dl is available at:
+ https://github.com/scheduler-tools/schedtool-dl.git.
+
+ The usage is straightforward::
+
+  # schedtool -E -t 10000000:100000000 -e ./my_cpuhog_app
+
+ With this, my_cpuhog_app is put to run inside a SCHED_DEADLINE reservation
+ of 10ms every 100ms (note that parameters are expressed in microseconds).
+ You can also use schedtool to create a reservation for an already running
+ application, given that you know its pid::
+
+  # schedtool -E -t 10000000:100000000 my_app_pid
+
+Appendix B. Minimal main()
+==========================
+
+ We provide in what follows a simple (ugly) self-contained code snippet
+ showing how SCHED_DEADLINE reservations can be created by a real-time
+ application developer::
+
+   #define _GNU_SOURCE
+   #include <unistd.h>
+   #include <stdio.h>
+   #include <stdlib.h>
+   #include <string.h>
+   #include <time.h>
+   #include <linux/unistd.h>
+   #include <linux/kernel.h>
+   #include <linux/types.h>
+   #include <sys/syscall.h>
+   #include <pthread.h>
+
+   #define gettid() syscall(__NR_gettid)
+
+   #define SCHED_DEADLINE	6
+
+   /* XXX use the proper syscall numbers */
+   #ifdef __x86_64__
+   #define __NR_sched_setattr		314
+   #define __NR_sched_getattr		315
+   #endif
+
+   #ifdef __i386__
+   #define __NR_sched_setattr		351
+   #define __NR_sched_getattr		352
+   #endif
+
+   #ifdef __arm__
+   #define __NR_sched_setattr		380
+   #define __NR_sched_getattr		381
+   #endif
+
+   static volatile int done;
+
+   struct sched_attr {
+	__u32 size;
+
+	__u32 sched_policy;
+	__u64 sched_flags;
+
+	/* SCHED_NORMAL, SCHED_BATCH */
+	__s32 sched_nice;
+
+	/* SCHED_FIFO, SCHED_RR */
+	__u32 sched_priority;
+
+	/* SCHED_DEADLINE (nsec) */
+	__u64 sched_runtime;
+	__u64 sched_deadline;
+	__u64 sched_period;
+   };
+
+   int sched_setattr(pid_t pid,
+		  const struct sched_attr *attr,
+		  unsigned int flags)
+   {
+	return syscall(__NR_sched_setattr, pid, attr, flags);
+   }
+
+   int sched_getattr(pid_t pid,
+		  struct sched_attr *attr,
+		  unsigned int size,
+		  unsigned int flags)
+   {
+	return syscall(__NR_sched_getattr, pid, attr, size, flags);
+   }
+
+   void *run_deadline(void *data)
+   {
+	struct sched_attr attr;
+	int x = 0;
+	int ret;
+	unsigned int flags = 0;
+
+	printf("deadline thread started [%ld]\n", gettid());
+
+	attr.size = sizeof(attr);
+	attr.sched_flags = 0;
+	attr.sched_nice = 0;
+	attr.sched_priority = 0;
+
+	/* This creates a 10ms/30ms reservation */
+	attr.sched_policy = SCHED_DEADLINE;
+	attr.sched_runtime = 10 * 1000 * 1000;
+	attr.sched_period = attr.sched_deadline = 30 * 1000 * 1000;
+
+	ret = sched_setattr(0, &attr, flags);
+	if (ret < 0) {
+		done = 0;
+		perror("sched_setattr");
+		exit(-1);
+	}
+
+	while (!done) {
+		x++;
+	}
+
+	printf("deadline thread dies [%ld]\n", gettid());
+	return NULL;
+   }
+
+   int main (int argc, char **argv)
+   {
+	pthread_t thread;
+
+	printf("main thread [%ld]\n", gettid());
+
+	pthread_create(&thread, NULL, run_deadline, NULL);
+
+	sleep(10);
+
+	done = 1;
+	pthread_join(thread, NULL);
+
+	printf("main dies [%ld]\n", gettid());
+	return 0;
+   }
diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.txt
deleted file mode 100644
index b14e03ff3528..000000000000
--- a/Documentation/scheduler/sched-deadline.txt
+++ /dev/null
@@ -1,871 +0,0 @@
-			  Deadline Task Scheduling
-			  ------------------------
-
-CONTENTS
-========
-
- 0. WARNING
- 1. Overview
- 2. Scheduling algorithm
-   2.1 Main algorithm
-   2.2 Bandwidth reclaiming
- 3. Scheduling Real-Time Tasks
-   3.1 Definitions
-   3.2 Schedulability Analysis for Uniprocessor Systems
-   3.3 Schedulability Analysis for Multiprocessor Systems
-   3.4 Relationship with SCHED_DEADLINE Parameters
- 4. Bandwidth management
-   4.1 System-wide settings
-   4.2 Task interface
-   4.3 Default behavior
-   4.4 Behavior of sched_yield()
- 5. Tasks CPU affinity
-   5.1 SCHED_DEADLINE and cpusets HOWTO
- 6. Future plans
- A. Test suite
- B. Minimal main()
-
-
-0. WARNING
-==========
-
- Fiddling with these settings can result in an unpredictable or even unstable
- system behavior. As for -rt (group) scheduling, it is assumed that root users
- know what they're doing.
-
-
-1. Overview
-===========
-
- The SCHED_DEADLINE policy contained inside the sched_dl scheduling class is
- basically an implementation of the Earliest Deadline First (EDF) scheduling
- algorithm, augmented with a mechanism (called Constant Bandwidth Server, CBS)
- that makes it possible to isolate the behavior of tasks between each other.
-
-
-2. Scheduling algorithm
-==================
-
-2.1 Main algorithm
-------------------
-
- SCHED_DEADLINE [18] uses three parameters, named "runtime", "period", and
- "deadline", to schedule tasks. A SCHED_DEADLINE task should receive
- "runtime" microseconds of execution time every "period" microseconds, and
- these "runtime" microseconds are available within "deadline" microseconds
- from the beginning of the period.  In order to implement this behavior,
- every time the task wakes up, the scheduler computes a "scheduling deadline"
- consistent with the guarantee (using the CBS[2,3] algorithm). Tasks are then
- scheduled using EDF[1] on these scheduling deadlines (the task with the
- earliest scheduling deadline is selected for execution). Notice that the
- task actually receives "runtime" time units within "deadline" if a proper
- "admission control" strategy (see Section "4. Bandwidth management") is used
- (clearly, if the system is overloaded this guarantee cannot be respected).
-
- Summing up, the CBS[2,3] algorithm assigns scheduling deadlines to tasks so
- that each task runs for at most its runtime every period, avoiding any
- interference between different tasks (bandwidth isolation), while the EDF[1]
- algorithm selects the task with the earliest scheduling deadline as the one
- to be executed next. Thanks to this feature, tasks that do not strictly comply
- with the "traditional" real-time task model (see Section 3) can effectively
- use the new policy.
-
- In more details, the CBS algorithm assigns scheduling deadlines to
- tasks in the following way:
-
-  - Each SCHED_DEADLINE task is characterized by the "runtime",
-    "deadline", and "period" parameters;
-
-  - The state of the task is described by a "scheduling deadline", and
-    a "remaining runtime". These two parameters are initially set to 0;
-
-  - When a SCHED_DEADLINE task wakes up (becomes ready for execution),
-    the scheduler checks if
-
-                 remaining runtime                  runtime
-        ----------------------------------    >    ---------
-        scheduling deadline - current time           period
-
-    then, if the scheduling deadline is smaller than the current time, or
-    this condition is verified, the scheduling deadline and the
-    remaining runtime are re-initialized as
-
-         scheduling deadline = current time + deadline
-         remaining runtime = runtime
-
-    otherwise, the scheduling deadline and the remaining runtime are
-    left unchanged;
-
-  - When a SCHED_DEADLINE task executes for an amount of time t, its
-    remaining runtime is decreased as
-
-         remaining runtime = remaining runtime - t
-
-    (technically, the runtime is decreased at every tick, or when the
-    task is descheduled / preempted);
-
-  - When the remaining runtime becomes less or equal than 0, the task is
-    said to be "throttled" (also known as "depleted" in real-time literature)
-    and cannot be scheduled until its scheduling deadline. The "replenishment
-    time" for this task (see next item) is set to be equal to the current
-    value of the scheduling deadline;
-
-  - When the current time is equal to the replenishment time of a
-    throttled task, the scheduling deadline and the remaining runtime are
-    updated as
-
-         scheduling deadline = scheduling deadline + period
-         remaining runtime = remaining runtime + runtime
-
- The SCHED_FLAG_DL_OVERRUN flag in sched_attr's sched_flags field allows a task
- to get informed about runtime overruns through the delivery of SIGXCPU
- signals.
-
-
-2.2 Bandwidth reclaiming
-------------------------
-
- Bandwidth reclaiming for deadline tasks is based on the GRUB (Greedy
- Reclamation of Unused Bandwidth) algorithm [15, 16, 17] and it is enabled
- when flag SCHED_FLAG_RECLAIM is set.
-
- The following diagram illustrates the state names for tasks handled by GRUB:
-
-                             ------------
-                 (d)        |   Active   |
-              ------------->|            |
-              |             | Contending |
-              |              ------------
-              |                A      |
-          ----------           |      |
-         |          |          |      |
-         | Inactive |          |(b)   | (a)
-         |          |          |      |
-          ----------           |      |
-              A                |      V
-              |              ------------
-              |             |   Active   |
-              --------------|     Non    |
-                 (c)        | Contending |
-                             ------------
-
- A task can be in one of the following states:
-
-  - ActiveContending: if it is ready for execution (or executing);
-
-  - ActiveNonContending: if it just blocked and has not yet surpassed the 0-lag
-    time;
-
-  - Inactive: if it is blocked and has surpassed the 0-lag time.
-
- State transitions:
-
-  (a) When a task blocks, it does not become immediately inactive since its
-      bandwidth cannot be immediately reclaimed without breaking the
-      real-time guarantees. It therefore enters a transitional state called
-      ActiveNonContending. The scheduler arms the "inactive timer" to fire at
-      the 0-lag time, when the task's bandwidth can be reclaimed without
-      breaking the real-time guarantees.
-
-      The 0-lag time for a task entering the ActiveNonContending state is
-      computed as
-
-                        (runtime * dl_period)
-             deadline - ---------------------
-                             dl_runtime
-
-      where runtime is the remaining runtime, while dl_runtime and dl_period
-      are the reservation parameters.
-
-  (b) If the task wakes up before the inactive timer fires, the task re-enters
-      the ActiveContending state and the "inactive timer" is canceled.
-      In addition, if the task wakes up on a different runqueue, then
-      the task's utilization must be removed from the previous runqueue's active
-      utilization and must be added to the new runqueue's active utilization.
-      In order to avoid races between a task waking up on a runqueue while the
-       "inactive timer" is running on a different CPU, the "dl_non_contending"
-      flag is used to indicate that a task is not on a runqueue but is active
-      (so, the flag is set when the task blocks and is cleared when the
-      "inactive timer" fires or when the task  wakes up).
-
-  (c) When the "inactive timer" fires, the task enters the Inactive state and
-      its utilization is removed from the runqueue's active utilization.
-
-  (d) When an inactive task wakes up, it enters the ActiveContending state and
-      its utilization is added to the active utilization of the runqueue where
-      it has been enqueued.
-
- For each runqueue, the algorithm GRUB keeps track of two different bandwidths:
-
-  - Active bandwidth (running_bw): this is the sum of the bandwidths of all
-    tasks in active state (i.e., ActiveContending or ActiveNonContending);
-
-  - Total bandwidth (this_bw): this is the sum of all tasks "belonging" to the
-    runqueue, including the tasks in Inactive state.
-
-
- The algorithm reclaims the bandwidth of the tasks in Inactive state.
- It does so by decrementing the runtime of the executing task Ti at a pace equal
- to
-
-           dq = -max{ Ui / Umax, (1 - Uinact - Uextra) } dt
-
- where:
-
-  - Ui is the bandwidth of task Ti;
-  - Umax is the maximum reclaimable utilization (subjected to RT throttling
-    limits);
-  - Uinact is the (per runqueue) inactive utilization, computed as
-    (this_bq - running_bw);
-  - Uextra is the (per runqueue) extra reclaimable utilization
-    (subjected to RT throttling limits).
-
-
- Let's now see a trivial example of two deadline tasks with runtime equal
- to 4 and period equal to 8 (i.e., bandwidth equal to 0.5):
-
-     A            Task T1
-     |
-     |                               |
-     |                               |
-     |--------                       |----
-     |       |                       V
-     |---|---|---|---|---|---|---|---|--------->t
-     0   1   2   3   4   5   6   7   8
-
-
-     A            Task T2
-     |
-     |                               |
-     |                               |
-     |       ------------------------|
-     |       |                       V
-     |---|---|---|---|---|---|---|---|--------->t
-     0   1   2   3   4   5   6   7   8
-
-
-     A            running_bw
-     |
-   1 -----------------               ------
-     |               |               |
-  0.5-               -----------------
-     |                               |
-     |---|---|---|---|---|---|---|---|--------->t
-     0   1   2   3   4   5   6   7   8
-
-
-  - Time t = 0:
-
-    Both tasks are ready for execution and therefore in ActiveContending state.
-    Suppose Task T1 is the first task to start execution.
-    Since there are no inactive tasks, its runtime is decreased as dq = -1 dt.
-
-  - Time t = 2:
-
-    Suppose that task T1 blocks
-    Task T1 therefore enters the ActiveNonContending state. Since its remaining
-    runtime is equal to 2, its 0-lag time is equal to t = 4.
-    Task T2 start execution, with runtime still decreased as dq = -1 dt since
-    there are no inactive tasks.
-
-  - Time t = 4:
-
-    This is the 0-lag time for Task T1. Since it didn't woken up in the
-    meantime, it enters the Inactive state. Its bandwidth is removed from
-    running_bw.
-    Task T2 continues its execution. However, its runtime is now decreased as
-    dq = - 0.5 dt because Uinact = 0.5.
-    Task T2 therefore reclaims the bandwidth unused by Task T1.
-
-  - Time t = 8:
-
-    Task T1 wakes up. It enters the ActiveContending state again, and the
-    running_bw is incremented.
-
-
-2.3 Energy-aware scheduling
-------------------------
-
- When cpufreq's schedutil governor is selected, SCHED_DEADLINE implements the
- GRUB-PA [19] algorithm, reducing the CPU operating frequency to the minimum
- value that still allows to meet the deadlines. This behavior is currently
- implemented only for ARM architectures.
-
- A particular care must be taken in case the time needed for changing frequency
- is of the same order of magnitude of the reservation period. In such cases,
- setting a fixed CPU frequency results in a lower amount of deadline misses.
-
-
-3. Scheduling Real-Time Tasks
-=============================
-
- * BIG FAT WARNING ******************************************************
- *
- * This section contains a (not-thorough) summary on classical deadline
- * scheduling theory, and how it applies to SCHED_DEADLINE.
- * The reader can "safely" skip to Section 4 if only interested in seeing
- * how the scheduling policy can be used. Anyway, we strongly recommend
- * to come back here and continue reading (once the urge for testing is
- * satisfied :P) to be sure of fully understanding all technical details.
- ************************************************************************
-
- There are no limitations on what kind of task can exploit this new
- scheduling discipline, even if it must be said that it is particularly
- suited for periodic or sporadic real-time tasks that need guarantees on their
- timing behavior, e.g., multimedia, streaming, control applications, etc.
-
-3.1 Definitions
-------------------------
-
- A typical real-time task is composed of a repetition of computation phases
- (task instances, or jobs) which are activated on a periodic or sporadic
- fashion.
- Each job J_j (where J_j is the j^th job of the task) is characterized by an
- arrival time r_j (the time when the job starts), an amount of computation
- time c_j needed to finish the job, and a job absolute deadline d_j, which
- is the time within which the job should be finished. The maximum execution
- time max{c_j} is called "Worst Case Execution Time" (WCET) for the task.
- A real-time task can be periodic with period P if r_{j+1} = r_j + P, or
- sporadic with minimum inter-arrival time P is r_{j+1} >= r_j + P. Finally,
- d_j = r_j + D, where D is the task's relative deadline.
- Summing up, a real-time task can be described as
-	Task = (WCET, D, P)
-
- The utilization of a real-time task is defined as the ratio between its
- WCET and its period (or minimum inter-arrival time), and represents
- the fraction of CPU time needed to execute the task.
-
- If the total utilization U=sum(WCET_i/P_i) is larger than M (with M equal
- to the number of CPUs), then the scheduler is unable to respect all the
- deadlines.
- Note that total utilization is defined as the sum of the utilizations
- WCET_i/P_i over all the real-time tasks in the system. When considering
- multiple real-time tasks, the parameters of the i-th task are indicated
- with the "_i" suffix.
- Moreover, if the total utilization is larger than M, then we risk starving
- non- real-time tasks by real-time tasks.
- If, instead, the total utilization is smaller than M, then non real-time
- tasks will not be starved and the system might be able to respect all the
- deadlines.
- As a matter of fact, in this case it is possible to provide an upper bound
- for tardiness (defined as the maximum between 0 and the difference
- between the finishing time of a job and its absolute deadline).
- More precisely, it can be proven that using a global EDF scheduler the
- maximum tardiness of each task is smaller or equal than
-	((M − 1) · WCET_max − WCET_min)/(M − (M − 2) · U_max) + WCET_max
- where WCET_max = max{WCET_i} is the maximum WCET, WCET_min=min{WCET_i}
- is the minimum WCET, and U_max = max{WCET_i/P_i} is the maximum
- utilization[12].
-
-3.2 Schedulability Analysis for Uniprocessor Systems
-------------------------
-
- If M=1 (uniprocessor system), or in case of partitioned scheduling (each
- real-time task is statically assigned to one and only one CPU), it is
- possible to formally check if all the deadlines are respected.
- If D_i = P_i for all tasks, then EDF is able to respect all the deadlines
- of all the tasks executing on a CPU if and only if the total utilization
- of the tasks running on such a CPU is smaller or equal than 1.
- If D_i != P_i for some task, then it is possible to define the density of
- a task as WCET_i/min{D_i,P_i}, and EDF is able to respect all the deadlines
- of all the tasks running on a CPU if the sum of the densities of the tasks
- running on such a CPU is smaller or equal than 1:
-	sum(WCET_i / min{D_i, P_i}) <= 1
- It is important to notice that this condition is only sufficient, and not
- necessary: there are task sets that are schedulable, but do not respect the
- condition. For example, consider the task set {Task_1,Task_2} composed by
- Task_1=(50ms,50ms,100ms) and Task_2=(10ms,100ms,100ms).
- EDF is clearly able to schedule the two tasks without missing any deadline
- (Task_1 is scheduled as soon as it is released, and finishes just in time
- to respect its deadline; Task_2 is scheduled immediately after Task_1, hence
- its response time cannot be larger than 50ms + 10ms = 60ms) even if
-	50 / min{50,100} + 10 / min{100, 100} = 50 / 50 + 10 / 100 = 1.1
- Of course it is possible to test the exact schedulability of tasks with
- D_i != P_i (checking a condition that is both sufficient and necessary),
- but this cannot be done by comparing the total utilization or density with
- a constant. Instead, the so called "processor demand" approach can be used,
- computing the total amount of CPU time h(t) needed by all the tasks to
- respect all of their deadlines in a time interval of size t, and comparing
- such a time with the interval size t. If h(t) is smaller than t (that is,
- the amount of time needed by the tasks in a time interval of size t is
- smaller than the size of the interval) for all the possible values of t, then
- EDF is able to schedule the tasks respecting all of their deadlines. Since
- performing this check for all possible values of t is impossible, it has been
- proven[4,5,6] that it is sufficient to perform the test for values of t
- between 0 and a maximum value L. The cited papers contain all of the
- mathematical details and explain how to compute h(t) and L.
- In any case, this kind of analysis is too complex as well as too
- time-consuming to be performed on-line. Hence, as explained in Section
- 4 Linux uses an admission test based on the tasks' utilizations.
-
-3.3 Schedulability Analysis for Multiprocessor Systems
-------------------------
-
- On multiprocessor systems with global EDF scheduling (non partitioned
- systems), a sufficient test for schedulability can not be based on the
- utilizations or densities: it can be shown that even if D_i = P_i task
- sets with utilizations slightly larger than 1 can miss deadlines regardless
- of the number of CPUs.
-
- Consider a set {Task_1,...Task_{M+1}} of M+1 tasks on a system with M
- CPUs, with the first task Task_1=(P,P,P) having period, relative deadline
- and WCET equal to P. The remaining M tasks Task_i=(e,P-1,P-1) have an
- arbitrarily small worst case execution time (indicated as "e" here) and a
- period smaller than the one of the first task. Hence, if all the tasks
- activate at the same time t, global EDF schedules these M tasks first
- (because their absolute deadlines are equal to t + P - 1, hence they are
- smaller than the absolute deadline of Task_1, which is t + P). As a
- result, Task_1 can be scheduled only at time t + e, and will finish at
- time t + e + P, after its absolute deadline. The total utilization of the
- task set is U = M · e / (P - 1) + P / P = M · e / (P - 1) + 1, and for small
- values of e this can become very close to 1. This is known as "Dhall's
- effect"[7]. Note: the example in the original paper by Dhall has been
- slightly simplified here (for example, Dhall more correctly computed
- lim_{e->0}U).
-
- More complex schedulability tests for global EDF have been developed in
- real-time literature[8,9], but they are not based on a simple comparison
- between total utilization (or density) and a fixed constant. If all tasks
- have D_i = P_i, a sufficient schedulability condition can be expressed in
- a simple way:
-	sum(WCET_i / P_i) <= M - (M - 1) · U_max
- where U_max = max{WCET_i / P_i}[10]. Notice that for U_max = 1,
- M - (M - 1) · U_max becomes M - M + 1 = 1 and this schedulability condition
- just confirms the Dhall's effect. A more complete survey of the literature
- about schedulability tests for multi-processor real-time scheduling can be
- found in [11].
-
- As seen, enforcing that the total utilization is smaller than M does not
- guarantee that global EDF schedules the tasks without missing any deadline
- (in other words, global EDF is not an optimal scheduling algorithm). However,
- a total utilization smaller than M is enough to guarantee that non real-time
- tasks are not starved and that the tardiness of real-time tasks has an upper
- bound[12] (as previously noted). Different bounds on the maximum tardiness
- experienced by real-time tasks have been developed in various papers[13,14],
- but the theoretical result that is important for SCHED_DEADLINE is that if
- the total utilization is smaller or equal than M then the response times of
- the tasks are limited.
-
-3.4 Relationship with SCHED_DEADLINE Parameters
-------------------------
-
- Finally, it is important to understand the relationship between the
- SCHED_DEADLINE scheduling parameters described in Section 2 (runtime,
- deadline and period) and the real-time task parameters (WCET, D, P)
- described in this section. Note that the tasks' temporal constraints are
- represented by its absolute deadlines d_j = r_j + D described above, while
- SCHED_DEADLINE schedules the tasks according to scheduling deadlines (see
- Section 2).
- If an admission test is used to guarantee that the scheduling deadlines
- are respected, then SCHED_DEADLINE can be used to schedule real-time tasks
- guaranteeing that all the jobs' deadlines of a task are respected.
- In order to do this, a task must be scheduled by setting:
-
-  - runtime >= WCET
-  - deadline = D
-  - period <= P
-
- IOW, if runtime >= WCET and if period is <= P, then the scheduling deadlines
- and the absolute deadlines (d_j) coincide, so a proper admission control
- allows to respect the jobs' absolute deadlines for this task (this is what is
- called "hard schedulability property" and is an extension of Lemma 1 of [2]).
- Notice that if runtime > deadline the admission control will surely reject
- this task, as it is not possible to respect its temporal constraints.
-
- References:
-  1 - C. L. Liu and J. W. Layland. Scheduling algorithms for multiprogram-
-      ming in a hard-real-time environment. Journal of the Association for
-      Computing Machinery, 20(1), 1973.
-  2 - L. Abeni , G. Buttazzo. Integrating Multimedia Applications in Hard
-      Real-Time Systems. Proceedings of the 19th IEEE Real-time Systems
-      Symposium, 1998. http://retis.sssup.it/~giorgio/paps/1998/rtss98-cbs.pdf
-  3 - L. Abeni. Server Mechanisms for Multimedia Applications. ReTiS Lab
-      Technical Report. http://disi.unitn.it/~abeni/tr-98-01.pdf
-  4 - J. Y. Leung and M.L. Merril. A Note on Preemptive Scheduling of
-      Periodic, Real-Time Tasks. Information Processing Letters, vol. 11,
-      no. 3, pp. 115-118, 1980.
-  5 - S. K. Baruah, A. K. Mok and L. E. Rosier. Preemptively Scheduling
-      Hard-Real-Time Sporadic Tasks on One Processor. Proceedings of the
-      11th IEEE Real-time Systems Symposium, 1990.
-  6 - S. K. Baruah, L. E. Rosier and R. R. Howell. Algorithms and Complexity
-      Concerning the Preemptive Scheduling of Periodic Real-Time tasks on
-      One Processor. Real-Time Systems Journal, vol. 4, no. 2, pp 301-324,
-      1990.
-  7 - S. J. Dhall and C. L. Liu. On a real-time scheduling problem. Operations
-      research, vol. 26, no. 1, pp 127-140, 1978.
-  8 - T. Baker. Multiprocessor EDF and Deadline Monotonic Schedulability
-      Analysis. Proceedings of the 24th IEEE Real-Time Systems Symposium, 2003.
-  9 - T. Baker. An Analysis of EDF Schedulability on a Multiprocessor.
-      IEEE Transactions on Parallel and Distributed Systems, vol. 16, no. 8,
-      pp 760-768, 2005.
-  10 - J. Goossens, S. Funk and S. Baruah, Priority-Driven Scheduling of
-       Periodic Task Systems on Multiprocessors. Real-Time Systems Journal,
-       vol. 25, no. 2–3, pp. 187–205, 2003.
-  11 - R. Davis and A. Burns. A Survey of Hard Real-Time Scheduling for
-       Multiprocessor Systems. ACM Computing Surveys, vol. 43, no. 4, 2011.
-       http://www-users.cs.york.ac.uk/~robdavis/papers/MPSurveyv5.0.pdf
-  12 - U. C. Devi and J. H. Anderson. Tardiness Bounds under Global EDF
-       Scheduling on a Multiprocessor. Real-Time Systems Journal, vol. 32,
-       no. 2, pp 133-189, 2008.
-  13 - P. Valente and G. Lipari. An Upper Bound to the Lateness of Soft
-       Real-Time Tasks Scheduled by EDF on Multiprocessors. Proceedings of
-       the 26th IEEE Real-Time Systems Symposium, 2005.
-  14 - J. Erickson, U. Devi and S. Baruah. Improved tardiness bounds for
-       Global EDF. Proceedings of the 22nd Euromicro Conference on
-       Real-Time Systems, 2010.
-  15 - G. Lipari, S. Baruah, Greedy reclamation of unused bandwidth in
-       constant-bandwidth servers, 12th IEEE Euromicro Conference on Real-Time
-       Systems, 2000.
-  16 - L. Abeni, J. Lelli, C. Scordino, L. Palopoli, Greedy CPU reclaiming for
-       SCHED DEADLINE. In Proceedings of the Real-Time Linux Workshop (RTLWS),
-       Dusseldorf, Germany, 2014.
-  17 - L. Abeni, G. Lipari, A. Parri, Y. Sun, Multicore CPU reclaiming: parallel
-       or sequential?. In Proceedings of the 31st Annual ACM Symposium on Applied
-       Computing, 2016.
-  18 - J. Lelli, C. Scordino, L. Abeni, D. Faggioli, Deadline scheduling in the
-       Linux kernel, Software: Practice and Experience, 46(6): 821-839, June
-       2016.
-  19 - C. Scordino, L. Abeni, J. Lelli, Energy-Aware Real-Time Scheduling in
-       the Linux Kernel, 33rd ACM/SIGAPP Symposium On Applied Computing (SAC
-       2018), Pau, France, April 2018.
-
-
-4. Bandwidth management
-=======================
-
- As previously mentioned, in order for -deadline scheduling to be
- effective and useful (that is, to be able to provide "runtime" time units
- within "deadline"), it is important to have some method to keep the allocation
- of the available fractions of CPU time to the various tasks under control.
- This is usually called "admission control" and if it is not performed, then
- no guarantee can be given on the actual scheduling of the -deadline tasks.
-
- As already stated in Section 3, a necessary condition to be respected to
- correctly schedule a set of real-time tasks is that the total utilization
- is smaller than M. When talking about -deadline tasks, this requires that
- the sum of the ratio between runtime and period for all tasks is smaller
- than M. Notice that the ratio runtime/period is equivalent to the utilization
- of a "traditional" real-time task, and is also often referred to as
- "bandwidth".
- The interface used to control the CPU bandwidth that can be allocated
- to -deadline tasks is similar to the one already used for -rt
- tasks with real-time group scheduling (a.k.a. RT-throttling - see
- Documentation/scheduler/sched-rt-group.txt), and is based on readable/
- writable control files located in procfs (for system wide settings).
- Notice that per-group settings (controlled through cgroupfs) are still not
- defined for -deadline tasks, because more discussion is needed in order to
- figure out how we want to manage SCHED_DEADLINE bandwidth at the task group
- level.
-
- A main difference between deadline bandwidth management and RT-throttling
- is that -deadline tasks have bandwidth on their own (while -rt ones don't!),
- and thus we don't need a higher level throttling mechanism to enforce the
- desired bandwidth. In other words, this means that interface parameters are
- only used at admission control time (i.e., when the user calls
- sched_setattr()). Scheduling is then performed considering actual tasks'
- parameters, so that CPU bandwidth is allocated to SCHED_DEADLINE tasks
- respecting their needs in terms of granularity. Therefore, using this simple
- interface we can put a cap on total utilization of -deadline tasks (i.e.,
- \Sum (runtime_i / period_i) < global_dl_utilization_cap).
-
-4.1 System wide settings
-------------------------
-
- The system wide settings are configured under the /proc virtual file system.
-
- For now the -rt knobs are used for -deadline admission control and the
- -deadline runtime is accounted against the -rt runtime. We realize that this
- isn't entirely desirable; however, it is better to have a small interface for
- now, and be able to change it easily later. The ideal situation (see 5.) is to
- run -rt tasks from a -deadline server; in which case the -rt bandwidth is a
- direct subset of dl_bw.
-
- This means that, for a root_domain comprising M CPUs, -deadline tasks
- can be created while the sum of their bandwidths stays below:
-
-   M * (sched_rt_runtime_us / sched_rt_period_us)
-
- It is also possible to disable this bandwidth management logic, and
- be thus free of oversubscribing the system up to any arbitrary level.
- This is done by writing -1 in /proc/sys/kernel/sched_rt_runtime_us.
-
-
-4.2 Task interface
-------------------
-
- Specifying a periodic/sporadic task that executes for a given amount of
- runtime at each instance, and that is scheduled according to the urgency of
- its own timing constraints needs, in general, a way of declaring:
-  - a (maximum/typical) instance execution time,
-  - a minimum interval between consecutive instances,
-  - a time constraint by which each instance must be completed.
-
- Therefore:
-  * a new struct sched_attr, containing all the necessary fields is
-    provided;
-  * the new scheduling related syscalls that manipulate it, i.e.,
-    sched_setattr() and sched_getattr() are implemented.
-
- For debugging purposes, the leftover runtime and absolute deadline of a
- SCHED_DEADLINE task can be retrieved through /proc/<pid>/sched (entries
- dl.runtime and dl.deadline, both values in ns). A programmatic way to
- retrieve these values from production code is under discussion.
-
-
-4.3 Default behavior
----------------------
-
- The default value for SCHED_DEADLINE bandwidth is to have rt_runtime equal to
- 950000. With rt_period equal to 1000000, by default, it means that -deadline
- tasks can use at most 95%, multiplied by the number of CPUs that compose the
- root_domain, for each root_domain.
- This means that non -deadline tasks will receive at least 5% of the CPU time,
- and that -deadline tasks will receive their runtime with a guaranteed
- worst-case delay respect to the "deadline" parameter. If "deadline" = "period"
- and the cpuset mechanism is used to implement partitioned scheduling (see
- Section 5), then this simple setting of the bandwidth management is able to
- deterministically guarantee that -deadline tasks will receive their runtime
- in a period.
-
- Finally, notice that in order not to jeopardize the admission control a
- -deadline task cannot fork.
-
-
-4.4 Behavior of sched_yield()
------------------------------
-
- When a SCHED_DEADLINE task calls sched_yield(), it gives up its
- remaining runtime and is immediately throttled, until the next
- period, when its runtime will be replenished (a special flag
- dl_yielded is set and used to handle correctly throttling and runtime
- replenishment after a call to sched_yield()).
-
- This behavior of sched_yield() allows the task to wake-up exactly at
- the beginning of the next period. Also, this may be useful in the
- future with bandwidth reclaiming mechanisms, where sched_yield() will
- make the leftoever runtime available for reclamation by other
- SCHED_DEADLINE tasks.
-
-
-5. Tasks CPU affinity
-=====================
-
- -deadline tasks cannot have an affinity mask smaller that the entire
- root_domain they are created on. However, affinities can be specified
- through the cpuset facility (Documentation/cgroup-v1/cpusets.txt).
-
-5.1 SCHED_DEADLINE and cpusets HOWTO
-------------------------------------
-
- An example of a simple configuration (pin a -deadline task to CPU0)
- follows (rt-app is used to create a -deadline task).
-
- mkdir /dev/cpuset
- mount -t cgroup -o cpuset cpuset /dev/cpuset
- cd /dev/cpuset
- mkdir cpu0
- echo 0 > cpu0/cpuset.cpus
- echo 0 > cpu0/cpuset.mems
- echo 1 > cpuset.cpu_exclusive
- echo 0 > cpuset.sched_load_balance
- echo 1 > cpu0/cpuset.cpu_exclusive
- echo 1 > cpu0/cpuset.mem_exclusive
- echo $$ > cpu0/tasks
- rt-app -t 100000:10000:d:0 -D5 (it is now actually superfluous to specify
- task affinity)
-
-6. Future plans
-===============
-
- Still missing:
-
-  - programmatic way to retrieve current runtime and absolute deadline
-  - refinements to deadline inheritance, especially regarding the possibility
-    of retaining bandwidth isolation among non-interacting tasks. This is
-    being studied from both theoretical and practical points of view, and
-    hopefully we should be able to produce some demonstrative code soon;
-  - (c)group based bandwidth management, and maybe scheduling;
-  - access control for non-root users (and related security concerns to
-    address), which is the best way to allow unprivileged use of the mechanisms
-    and how to prevent non-root users "cheat" the system?
-
- As already discussed, we are planning also to merge this work with the EDF
- throttling patches [https://lkml.org/lkml/2010/2/23/239] but we still are in
- the preliminary phases of the merge and we really seek feedback that would
- help us decide on the direction it should take.
-
-Appendix A. Test suite
-======================
-
- The SCHED_DEADLINE policy can be easily tested using two applications that
- are part of a wider Linux Scheduler validation suite. The suite is
- available as a GitHub repository: https://github.com/scheduler-tools.
-
- The first testing application is called rt-app and can be used to
- start multiple threads with specific parameters. rt-app supports
- SCHED_{OTHER,FIFO,RR,DEADLINE} scheduling policies and their related
- parameters (e.g., niceness, priority, runtime/deadline/period). rt-app
- is a valuable tool, as it can be used to synthetically recreate certain
- workloads (maybe mimicking real use-cases) and evaluate how the scheduler
- behaves under such workloads. In this way, results are easily reproducible.
- rt-app is available at: https://github.com/scheduler-tools/rt-app.
-
- Thread parameters can be specified from the command line, with something like
- this:
-
-  # rt-app -t 100000:10000:d -t 150000:20000:f:10 -D5
-
- The above creates 2 threads. The first one, scheduled by SCHED_DEADLINE,
- executes for 10ms every 100ms. The second one, scheduled at SCHED_FIFO
- priority 10, executes for 20ms every 150ms. The test will run for a total
- of 5 seconds.
-
- More interestingly, configurations can be described with a json file that
- can be passed as input to rt-app with something like this:
-
-  # rt-app my_config.json
-
- The parameters that can be specified with the second method are a superset
- of the command line options. Please refer to rt-app documentation for more
- details (<rt-app-sources>/doc/*.json).
-
- The second testing application is a modification of schedtool, called
- schedtool-dl, which can be used to setup SCHED_DEADLINE parameters for a
- certain pid/application. schedtool-dl is available at:
- https://github.com/scheduler-tools/schedtool-dl.git.
-
- The usage is straightforward:
-
-  # schedtool -E -t 10000000:100000000 -e ./my_cpuhog_app
-
- With this, my_cpuhog_app is put to run inside a SCHED_DEADLINE reservation
- of 10ms every 100ms (note that parameters are expressed in microseconds).
- You can also use schedtool to create a reservation for an already running
- application, given that you know its pid:
-
-  # schedtool -E -t 10000000:100000000 my_app_pid
-
-Appendix B. Minimal main()
-==========================
-
- We provide in what follows a simple (ugly) self-contained code snippet
- showing how SCHED_DEADLINE reservations can be created by a real-time
- application developer.
-
- #define _GNU_SOURCE
- #include <unistd.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <time.h>
- #include <linux/unistd.h>
- #include <linux/kernel.h>
- #include <linux/types.h>
- #include <sys/syscall.h>
- #include <pthread.h>
-
- #define gettid() syscall(__NR_gettid)
-
- #define SCHED_DEADLINE	6
-
- /* XXX use the proper syscall numbers */
- #ifdef __x86_64__
- #define __NR_sched_setattr		314
- #define __NR_sched_getattr		315
- #endif
-
- #ifdef __i386__
- #define __NR_sched_setattr		351
- #define __NR_sched_getattr		352
- #endif
-
- #ifdef __arm__
- #define __NR_sched_setattr		380
- #define __NR_sched_getattr		381
- #endif
-
- static volatile int done;
-
- struct sched_attr {
-	__u32 size;
-
-	__u32 sched_policy;
-	__u64 sched_flags;
-
-	/* SCHED_NORMAL, SCHED_BATCH */
-	__s32 sched_nice;
-
-	/* SCHED_FIFO, SCHED_RR */
-	__u32 sched_priority;
-
-	/* SCHED_DEADLINE (nsec) */
-	__u64 sched_runtime;
-	__u64 sched_deadline;
-	__u64 sched_period;
- };
-
- int sched_setattr(pid_t pid,
-		  const struct sched_attr *attr,
-		  unsigned int flags)
- {
-	return syscall(__NR_sched_setattr, pid, attr, flags);
- }
-
- int sched_getattr(pid_t pid,
-		  struct sched_attr *attr,
-		  unsigned int size,
-		  unsigned int flags)
- {
-	return syscall(__NR_sched_getattr, pid, attr, size, flags);
- }
-
- void *run_deadline(void *data)
- {
-	struct sched_attr attr;
-	int x = 0;
-	int ret;
-	unsigned int flags = 0;
-
-	printf("deadline thread started [%ld]\n", gettid());
-
-	attr.size = sizeof(attr);
-	attr.sched_flags = 0;
-	attr.sched_nice = 0;
-	attr.sched_priority = 0;
-
-	/* This creates a 10ms/30ms reservation */
-	attr.sched_policy = SCHED_DEADLINE;
-	attr.sched_runtime = 10 * 1000 * 1000;
-	attr.sched_period = attr.sched_deadline = 30 * 1000 * 1000;
-
-	ret = sched_setattr(0, &attr, flags);
-	if (ret < 0) {
-		done = 0;
-		perror("sched_setattr");
-		exit(-1);
-	}
-
-	while (!done) {
-		x++;
-	}
-
-	printf("deadline thread dies [%ld]\n", gettid());
-	return NULL;
- }
-
- int main (int argc, char **argv)
- {
-	pthread_t thread;
-
-	printf("main thread [%ld]\n", gettid());
-
-	pthread_create(&thread, NULL, run_deadline, NULL);
-
-	sleep(10);
-
-	done = 1;
-	pthread_join(thread, NULL);
-
-	printf("main dies [%ld]\n", gettid());
-	return 0;
- }
diff --git a/Documentation/scheduler/sched-design-CFS.rst b/Documentation/scheduler/sched-design-CFS.rst
new file mode 100644
index 000000000000..82406685365a
--- /dev/null
+++ b/Documentation/scheduler/sched-design-CFS.rst
@@ -0,0 +1,249 @@
+=============
+CFS Scheduler
+=============
+
+
+1.  OVERVIEW
+============
+
+CFS stands for "Completely Fair Scheduler," and is the new "desktop" process
+scheduler implemented by Ingo Molnar and merged in Linux 2.6.23.  It is the
+replacement for the previous vanilla scheduler's SCHED_OTHER interactivity
+code.
+
+80% of CFS's design can be summed up in a single sentence: CFS basically models
+an "ideal, precise multi-tasking CPU" on real hardware.
+
+"Ideal multi-tasking CPU" is a (non-existent  :-)) CPU that has 100% physical
+power and which can run each task at precise equal speed, in parallel, each at
+1/nr_running speed.  For example: if there are 2 tasks running, then it runs
+each at 50% physical power --- i.e., actually in parallel.
+
+On real hardware, we can run only a single task at once, so we have to
+introduce the concept of "virtual runtime."  The virtual runtime of a task
+specifies when its next timeslice would start execution on the ideal
+multi-tasking CPU described above.  In practice, the virtual runtime of a task
+is its actual runtime normalized to the total number of running tasks.
+
+
+
+2.  FEW IMPLEMENTATION DETAILS
+==============================
+
+In CFS the virtual runtime is expressed and tracked via the per-task
+p->se.vruntime (nanosec-unit) value.  This way, it's possible to accurately
+timestamp and measure the "expected CPU time" a task should have gotten.
+
+[ small detail: on "ideal" hardware, at any time all tasks would have the same
+  p->se.vruntime value --- i.e., tasks would execute simultaneously and no task
+  would ever get "out of balance" from the "ideal" share of CPU time.  ]
+
+CFS's task picking logic is based on this p->se.vruntime value and it is thus
+very simple: it always tries to run the task with the smallest p->se.vruntime
+value (i.e., the task which executed least so far).  CFS always tries to split
+up CPU time between runnable tasks as close to "ideal multitasking hardware" as
+possible.
+
+Most of the rest of CFS's design just falls out of this really simple concept,
+with a few add-on embellishments like nice levels, multiprocessing and various
+algorithm variants to recognize sleepers.
+
+
+
+3.  THE RBTREE
+==============
+
+CFS's design is quite radical: it does not use the old data structures for the
+runqueues, but it uses a time-ordered rbtree to build a "timeline" of future
+task execution, and thus has no "array switch" artifacts (by which both the
+previous vanilla scheduler and RSDL/SD are affected).
+
+CFS also maintains the rq->cfs.min_vruntime value, which is a monotonic
+increasing value tracking the smallest vruntime among all tasks in the
+runqueue.  The total amount of work done by the system is tracked using
+min_vruntime; that value is used to place newly activated entities on the left
+side of the tree as much as possible.
+
+The total number of running tasks in the runqueue is accounted through the
+rq->cfs.load value, which is the sum of the weights of the tasks queued on the
+runqueue.
+
+CFS maintains a time-ordered rbtree, where all runnable tasks are sorted by the
+p->se.vruntime key. CFS picks the "leftmost" task from this tree and sticks to it.
+As the system progresses forwards, the executed tasks are put into the tree
+more and more to the right --- slowly but surely giving a chance for every task
+to become the "leftmost task" and thus get on the CPU within a deterministic
+amount of time.
+
+Summing up, CFS works like this: it runs a task a bit, and when the task
+schedules (or a scheduler tick happens) the task's CPU usage is "accounted
+for": the (small) time it just spent using the physical CPU is added to
+p->se.vruntime.  Once p->se.vruntime gets high enough so that another task
+becomes the "leftmost task" of the time-ordered rbtree it maintains (plus a
+small amount of "granularity" distance relative to the leftmost task so that we
+do not over-schedule tasks and trash the cache), then the new leftmost task is
+picked and the current task is preempted.
+
+
+
+4.  SOME FEATURES OF CFS
+========================
+
+CFS uses nanosecond granularity accounting and does not rely on any jiffies or
+other HZ detail.  Thus the CFS scheduler has no notion of "timeslices" in the
+way the previous scheduler had, and has no heuristics whatsoever.  There is
+only one central tunable (you have to switch on CONFIG_SCHED_DEBUG):
+
+   /proc/sys/kernel/sched_min_granularity_ns
+
+which can be used to tune the scheduler from "desktop" (i.e., low latencies) to
+"server" (i.e., good batching) workloads.  It defaults to a setting suitable
+for desktop workloads.  SCHED_BATCH is handled by the CFS scheduler module too.
+
+Due to its design, the CFS scheduler is not prone to any of the "attacks" that
+exist today against the heuristics of the stock scheduler: fiftyp.c, thud.c,
+chew.c, ring-test.c, massive_intr.c all work fine and do not impact
+interactivity and produce the expected behavior.
+
+The CFS scheduler has a much stronger handling of nice levels and SCHED_BATCH
+than the previous vanilla scheduler: both types of workloads are isolated much
+more aggressively.
+
+SMP load-balancing has been reworked/sanitized: the runqueue-walking
+assumptions are gone from the load-balancing code now, and iterators of the
+scheduling modules are used.  The balancing code got quite a bit simpler as a
+result.
+
+
+
+5. Scheduling policies
+======================
+
+CFS implements three scheduling policies:
+
+  - SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling
+    policy that is used for regular tasks.
+
+  - SCHED_BATCH: Does not preempt nearly as often as regular tasks
+    would, thereby allowing tasks to run longer and make better use of
+    caches but at the cost of interactivity. This is well suited for
+    batch jobs.
+
+  - SCHED_IDLE: This is even weaker than nice 19, but its not a true
+    idle timer scheduler in order to avoid to get into priority
+    inversion problems which would deadlock the machine.
+
+SCHED_FIFO/_RR are implemented in sched/rt.c and are as specified by
+POSIX.
+
+The command chrt from util-linux-ng 2.13.1.1 can set all of these except
+SCHED_IDLE.
+
+
+
+6.  SCHEDULING CLASSES
+======================
+
+The new CFS scheduler has been designed in such a way to introduce "Scheduling
+Classes," an extensible hierarchy of scheduler modules.  These modules
+encapsulate scheduling policy details and are handled by the scheduler core
+without the core code assuming too much about them.
+
+sched/fair.c implements the CFS scheduler described above.
+
+sched/rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler way than
+the previous vanilla scheduler did.  It uses 100 runqueues (for all 100 RT
+priority levels, instead of 140 in the previous scheduler) and it needs no
+expired array.
+
+Scheduling classes are implemented through the sched_class structure, which
+contains hooks to functions that must be called whenever an interesting event
+occurs.
+
+This is the (partial) list of the hooks:
+
+ - enqueue_task(...)
+
+   Called when a task enters a runnable state.
+   It puts the scheduling entity (task) into the red-black tree and
+   increments the nr_running variable.
+
+ - dequeue_task(...)
+
+   When a task is no longer runnable, this function is called to keep the
+   corresponding scheduling entity out of the red-black tree.  It decrements
+   the nr_running variable.
+
+ - yield_task(...)
+
+   This function is basically just a dequeue followed by an enqueue, unless the
+   compat_yield sysctl is turned on; in that case, it places the scheduling
+   entity at the right-most end of the red-black tree.
+
+ - check_preempt_curr(...)
+
+   This function checks if a task that entered the runnable state should
+   preempt the currently running task.
+
+ - pick_next_task(...)
+
+   This function chooses the most appropriate task eligible to run next.
+
+ - set_curr_task(...)
+
+   This function is called when a task changes its scheduling class or changes
+   its task group.
+
+ - task_tick(...)
+
+   This function is mostly called from time tick functions; it might lead to
+   process switch.  This drives the running preemption.
+
+
+
+
+7.  GROUP SCHEDULER EXTENSIONS TO CFS
+=====================================
+
+Normally, the scheduler operates on individual tasks and strives to provide
+fair CPU time to each task.  Sometimes, it may be desirable to group tasks and
+provide fair CPU time to each such task group.  For example, it may be
+desirable to first provide fair CPU time to each user on the system and then to
+each task belonging to a user.
+
+CONFIG_CGROUP_SCHED strives to achieve exactly that.  It lets tasks to be
+grouped and divides CPU time fairly among such groups.
+
+CONFIG_RT_GROUP_SCHED permits to group real-time (i.e., SCHED_FIFO and
+SCHED_RR) tasks.
+
+CONFIG_FAIR_GROUP_SCHED permits to group CFS (i.e., SCHED_NORMAL and
+SCHED_BATCH) tasks.
+
+   These options need CONFIG_CGROUPS to be defined, and let the administrator
+   create arbitrary groups of tasks, using the "cgroup" pseudo filesystem.  See
+   Documentation/cgroup-v1/cgroups.txt for more information about this filesystem.
+
+When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each
+group created using the pseudo filesystem.  See example steps below to create
+task groups and modify their CPU share using the "cgroups" pseudo filesystem::
+
+	# mount -t tmpfs cgroup_root /sys/fs/cgroup
+	# mkdir /sys/fs/cgroup/cpu
+	# mount -t cgroup -ocpu none /sys/fs/cgroup/cpu
+	# cd /sys/fs/cgroup/cpu
+
+	# mkdir multimedia	# create "multimedia" group of tasks
+	# mkdir browser		# create "browser" group of tasks
+
+	# #Configure the multimedia group to receive twice the CPU bandwidth
+	# #that of browser group
+
+	# echo 2048 > multimedia/cpu.shares
+	# echo 1024 > browser/cpu.shares
+
+	# firefox &	# Launch firefox and move it to "browser" group
+	# echo <firefox_pid> > browser/tasks
+
+	# #Launch gmplayer (or your favourite movie player)
+	# echo <movie_player_pid> > multimedia/tasks
diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt
deleted file mode 100644
index edd861c94c1b..000000000000
--- a/Documentation/scheduler/sched-design-CFS.txt
+++ /dev/null
@@ -1,242 +0,0 @@
-                      =============
-                      CFS Scheduler
-                      =============
-
-
-1.  OVERVIEW
-
-CFS stands for "Completely Fair Scheduler," and is the new "desktop" process
-scheduler implemented by Ingo Molnar and merged in Linux 2.6.23.  It is the
-replacement for the previous vanilla scheduler's SCHED_OTHER interactivity
-code.
-
-80% of CFS's design can be summed up in a single sentence: CFS basically models
-an "ideal, precise multi-tasking CPU" on real hardware.
-
-"Ideal multi-tasking CPU" is a (non-existent  :-)) CPU that has 100% physical
-power and which can run each task at precise equal speed, in parallel, each at
-1/nr_running speed.  For example: if there are 2 tasks running, then it runs
-each at 50% physical power --- i.e., actually in parallel.
-
-On real hardware, we can run only a single task at once, so we have to
-introduce the concept of "virtual runtime."  The virtual runtime of a task
-specifies when its next timeslice would start execution on the ideal
-multi-tasking CPU described above.  In practice, the virtual runtime of a task
-is its actual runtime normalized to the total number of running tasks.
-
-
-
-2.  FEW IMPLEMENTATION DETAILS
-
-In CFS the virtual runtime is expressed and tracked via the per-task
-p->se.vruntime (nanosec-unit) value.  This way, it's possible to accurately
-timestamp and measure the "expected CPU time" a task should have gotten.
-
-[ small detail: on "ideal" hardware, at any time all tasks would have the same
-  p->se.vruntime value --- i.e., tasks would execute simultaneously and no task
-  would ever get "out of balance" from the "ideal" share of CPU time.  ]
-
-CFS's task picking logic is based on this p->se.vruntime value and it is thus
-very simple: it always tries to run the task with the smallest p->se.vruntime
-value (i.e., the task which executed least so far).  CFS always tries to split
-up CPU time between runnable tasks as close to "ideal multitasking hardware" as
-possible.
-
-Most of the rest of CFS's design just falls out of this really simple concept,
-with a few add-on embellishments like nice levels, multiprocessing and various
-algorithm variants to recognize sleepers.
-
-
-
-3.  THE RBTREE
-
-CFS's design is quite radical: it does not use the old data structures for the
-runqueues, but it uses a time-ordered rbtree to build a "timeline" of future
-task execution, and thus has no "array switch" artifacts (by which both the
-previous vanilla scheduler and RSDL/SD are affected).
-
-CFS also maintains the rq->cfs.min_vruntime value, which is a monotonic
-increasing value tracking the smallest vruntime among all tasks in the
-runqueue.  The total amount of work done by the system is tracked using
-min_vruntime; that value is used to place newly activated entities on the left
-side of the tree as much as possible.
-
-The total number of running tasks in the runqueue is accounted through the
-rq->cfs.load value, which is the sum of the weights of the tasks queued on the
-runqueue.
-
-CFS maintains a time-ordered rbtree, where all runnable tasks are sorted by the
-p->se.vruntime key. CFS picks the "leftmost" task from this tree and sticks to it.
-As the system progresses forwards, the executed tasks are put into the tree
-more and more to the right --- slowly but surely giving a chance for every task
-to become the "leftmost task" and thus get on the CPU within a deterministic
-amount of time.
-
-Summing up, CFS works like this: it runs a task a bit, and when the task
-schedules (or a scheduler tick happens) the task's CPU usage is "accounted
-for": the (small) time it just spent using the physical CPU is added to
-p->se.vruntime.  Once p->se.vruntime gets high enough so that another task
-becomes the "leftmost task" of the time-ordered rbtree it maintains (plus a
-small amount of "granularity" distance relative to the leftmost task so that we
-do not over-schedule tasks and trash the cache), then the new leftmost task is
-picked and the current task is preempted.
-
-
-
-4.  SOME FEATURES OF CFS
-
-CFS uses nanosecond granularity accounting and does not rely on any jiffies or
-other HZ detail.  Thus the CFS scheduler has no notion of "timeslices" in the
-way the previous scheduler had, and has no heuristics whatsoever.  There is
-only one central tunable (you have to switch on CONFIG_SCHED_DEBUG):
-
-   /proc/sys/kernel/sched_min_granularity_ns
-
-which can be used to tune the scheduler from "desktop" (i.e., low latencies) to
-"server" (i.e., good batching) workloads.  It defaults to a setting suitable
-for desktop workloads.  SCHED_BATCH is handled by the CFS scheduler module too.
-
-Due to its design, the CFS scheduler is not prone to any of the "attacks" that
-exist today against the heuristics of the stock scheduler: fiftyp.c, thud.c,
-chew.c, ring-test.c, massive_intr.c all work fine and do not impact
-interactivity and produce the expected behavior.
-
-The CFS scheduler has a much stronger handling of nice levels and SCHED_BATCH
-than the previous vanilla scheduler: both types of workloads are isolated much
-more aggressively.
-
-SMP load-balancing has been reworked/sanitized: the runqueue-walking
-assumptions are gone from the load-balancing code now, and iterators of the
-scheduling modules are used.  The balancing code got quite a bit simpler as a
-result.
-
-
-
-5. Scheduling policies
-
-CFS implements three scheduling policies:
-
-  - SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling
-    policy that is used for regular tasks.
-
-  - SCHED_BATCH: Does not preempt nearly as often as regular tasks
-    would, thereby allowing tasks to run longer and make better use of
-    caches but at the cost of interactivity. This is well suited for
-    batch jobs.
-
-  - SCHED_IDLE: This is even weaker than nice 19, but its not a true
-    idle timer scheduler in order to avoid to get into priority
-    inversion problems which would deadlock the machine.
-
-SCHED_FIFO/_RR are implemented in sched/rt.c and are as specified by
-POSIX.
-
-The command chrt from util-linux-ng 2.13.1.1 can set all of these except
-SCHED_IDLE.
-
-
-
-6.  SCHEDULING CLASSES
-
-The new CFS scheduler has been designed in such a way to introduce "Scheduling
-Classes," an extensible hierarchy of scheduler modules.  These modules
-encapsulate scheduling policy details and are handled by the scheduler core
-without the core code assuming too much about them.
-
-sched/fair.c implements the CFS scheduler described above.
-
-sched/rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler way than
-the previous vanilla scheduler did.  It uses 100 runqueues (for all 100 RT
-priority levels, instead of 140 in the previous scheduler) and it needs no
-expired array.
-
-Scheduling classes are implemented through the sched_class structure, which
-contains hooks to functions that must be called whenever an interesting event
-occurs.
-
-This is the (partial) list of the hooks:
-
- - enqueue_task(...)
-
-   Called when a task enters a runnable state.
-   It puts the scheduling entity (task) into the red-black tree and
-   increments the nr_running variable.
-
- - dequeue_task(...)
-
-   When a task is no longer runnable, this function is called to keep the
-   corresponding scheduling entity out of the red-black tree.  It decrements
-   the nr_running variable.
-
- - yield_task(...)
-
-   This function is basically just a dequeue followed by an enqueue, unless the
-   compat_yield sysctl is turned on; in that case, it places the scheduling
-   entity at the right-most end of the red-black tree.
-
- - check_preempt_curr(...)
-
-   This function checks if a task that entered the runnable state should
-   preempt the currently running task.
-
- - pick_next_task(...)
-
-   This function chooses the most appropriate task eligible to run next.
-
- - set_curr_task(...)
-
-   This function is called when a task changes its scheduling class or changes
-   its task group.
-
- - task_tick(...)
-
-   This function is mostly called from time tick functions; it might lead to
-   process switch.  This drives the running preemption.
-
-
-
-
-7.  GROUP SCHEDULER EXTENSIONS TO CFS
-
-Normally, the scheduler operates on individual tasks and strives to provide
-fair CPU time to each task.  Sometimes, it may be desirable to group tasks and
-provide fair CPU time to each such task group.  For example, it may be
-desirable to first provide fair CPU time to each user on the system and then to
-each task belonging to a user.
-
-CONFIG_CGROUP_SCHED strives to achieve exactly that.  It lets tasks to be
-grouped and divides CPU time fairly among such groups.
-
-CONFIG_RT_GROUP_SCHED permits to group real-time (i.e., SCHED_FIFO and
-SCHED_RR) tasks.
-
-CONFIG_FAIR_GROUP_SCHED permits to group CFS (i.e., SCHED_NORMAL and
-SCHED_BATCH) tasks.
-
-   These options need CONFIG_CGROUPS to be defined, and let the administrator
-   create arbitrary groups of tasks, using the "cgroup" pseudo filesystem.  See
-   Documentation/cgroup-v1/cgroups.txt for more information about this filesystem.
-
-When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each
-group created using the pseudo filesystem.  See example steps below to create
-task groups and modify their CPU share using the "cgroups" pseudo filesystem.
-
-	# mount -t tmpfs cgroup_root /sys/fs/cgroup
-	# mkdir /sys/fs/cgroup/cpu
-	# mount -t cgroup -ocpu none /sys/fs/cgroup/cpu
-	# cd /sys/fs/cgroup/cpu
-
-	# mkdir multimedia	# create "multimedia" group of tasks
-	# mkdir browser		# create "browser" group of tasks
-
-	# #Configure the multimedia group to receive twice the CPU bandwidth
-	# #that of browser group
-
-	# echo 2048 > multimedia/cpu.shares
-	# echo 1024 > browser/cpu.shares
-
-	# firefox &	# Launch firefox and move it to "browser" group
-	# echo <firefox_pid> > browser/tasks
-
-	# #Launch gmplayer (or your favourite movie player)
-	# echo <movie_player_pid> > multimedia/tasks
diff --git a/Documentation/scheduler/sched-domains.rst b/Documentation/scheduler/sched-domains.rst
new file mode 100644
index 000000000000..f7504226f445
--- /dev/null
+++ b/Documentation/scheduler/sched-domains.rst
@@ -0,0 +1,83 @@
+=================
+Scheduler Domains
+=================
+
+Each CPU has a "base" scheduling domain (struct sched_domain). The domain
+hierarchy is built from these base domains via the ->parent pointer. ->parent
+MUST be NULL terminated, and domain structures should be per-CPU as they are
+locklessly updated.
+
+Each scheduling domain spans a number of CPUs (stored in the ->span field).
+A domain's span MUST be a superset of it child's span (this restriction could
+be relaxed if the need arises), and a base domain for CPU i MUST span at least
+i. The top domain for each CPU will generally span all CPUs in the system
+although strictly it doesn't have to, but this could lead to a case where some
+CPUs will never be given tasks to run unless the CPUs allowed mask is
+explicitly set. A sched domain's span means "balance process load among these
+CPUs".
+
+Each scheduling domain must have one or more CPU groups (struct sched_group)
+which are organised as a circular one way linked list from the ->groups
+pointer. The union of cpumasks of these groups MUST be the same as the
+domain's span. The intersection of cpumasks from any two of these groups
+MUST be the empty set. The group pointed to by the ->groups pointer MUST
+contain the CPU to which the domain belongs. Groups may be shared among
+CPUs as they contain read only data after they have been set up.
+
+Balancing within a sched domain occurs between groups. That is, each group
+is treated as one entity. The load of a group is defined as the sum of the
+load of each of its member CPUs, and only when the load of a group becomes
+out of balance are tasks moved between groups.
+
+In kernel/sched/core.c, trigger_load_balance() is run periodically on each CPU
+through scheduler_tick(). It raises a softirq after the next regularly scheduled
+rebalancing event for the current runqueue has arrived. The actual load
+balancing workhorse, run_rebalance_domains()->rebalance_domains(), is then run
+in softirq context (SCHED_SOFTIRQ).
+
+The latter function takes two arguments: the current CPU and whether it was idle
+at the time the scheduler_tick() happened and iterates over all sched domains
+our CPU is on, starting from its base domain and going up the ->parent chain.
+While doing that, it checks to see if the current domain has exhausted its
+rebalance interval. If so, it runs load_balance() on that domain. It then checks
+the parent sched_domain (if it exists), and the parent of the parent and so
+forth.
+
+Initially, load_balance() finds the busiest group in the current sched domain.
+If it succeeds, it looks for the busiest runqueue of all the CPUs' runqueues in
+that group. If it manages to find such a runqueue, it locks both our initial
+CPU's runqueue and the newly found busiest one and starts moving tasks from it
+to our runqueue. The exact number of tasks amounts to an imbalance previously
+computed while iterating over this sched domain's groups.
+
+Implementing sched domains
+==========================
+
+The "base" domain will "span" the first level of the hierarchy. In the case
+of SMT, you'll span all siblings of the physical CPU, with each group being
+a single virtual CPU.
+
+In SMP, the parent of the base domain will span all physical CPUs in the
+node. Each group being a single physical CPU. Then with NUMA, the parent
+of the SMP domain will span the entire machine, with each group having the
+cpumask of a node. Or, you could do multi-level NUMA or Opteron, for example,
+might have just one domain covering its one NUMA level.
+
+The implementor should read comments in include/linux/sched.h:
+struct sched_domain fields, SD_FLAG_*, SD_*_INIT to get an idea of
+the specifics and what to tune.
+
+Architectures may retain the regular override the default SD_*_INIT flags
+while using the generic domain builder in kernel/sched/core.c if they wish to
+retain the traditional SMT->SMP->NUMA topology (or some subset of that). This
+can be done by #define'ing ARCH_HASH_SCHED_TUNE.
+
+Alternatively, the architecture may completely override the generic domain
+builder by #define'ing ARCH_HASH_SCHED_DOMAIN, and exporting your
+arch_init_sched_domains function. This function will attach domains to all
+CPUs using cpu_attach_domain.
+
+The sched-domains debugging infrastructure can be enabled by enabling
+CONFIG_SCHED_DEBUG. This enables an error checking parse of the sched domains
+which should catch most possible errors (described above). It also prints out
+the domain structure in a visual format.
diff --git a/Documentation/scheduler/sched-domains.txt b/Documentation/scheduler/sched-domains.txt
deleted file mode 100644
index 4af80b1c05aa..000000000000
--- a/Documentation/scheduler/sched-domains.txt
+++ /dev/null
@@ -1,77 +0,0 @@
-Each CPU has a "base" scheduling domain (struct sched_domain). The domain
-hierarchy is built from these base domains via the ->parent pointer. ->parent
-MUST be NULL terminated, and domain structures should be per-CPU as they are
-locklessly updated.
-
-Each scheduling domain spans a number of CPUs (stored in the ->span field).
-A domain's span MUST be a superset of it child's span (this restriction could
-be relaxed if the need arises), and a base domain for CPU i MUST span at least
-i. The top domain for each CPU will generally span all CPUs in the system
-although strictly it doesn't have to, but this could lead to a case where some
-CPUs will never be given tasks to run unless the CPUs allowed mask is
-explicitly set. A sched domain's span means "balance process load among these
-CPUs".
-
-Each scheduling domain must have one or more CPU groups (struct sched_group)
-which are organised as a circular one way linked list from the ->groups
-pointer. The union of cpumasks of these groups MUST be the same as the
-domain's span. The intersection of cpumasks from any two of these groups
-MUST be the empty set. The group pointed to by the ->groups pointer MUST
-contain the CPU to which the domain belongs. Groups may be shared among
-CPUs as they contain read only data after they have been set up.
-
-Balancing within a sched domain occurs between groups. That is, each group
-is treated as one entity. The load of a group is defined as the sum of the
-load of each of its member CPUs, and only when the load of a group becomes
-out of balance are tasks moved between groups.
-
-In kernel/sched/core.c, trigger_load_balance() is run periodically on each CPU
-through scheduler_tick(). It raises a softirq after the next regularly scheduled
-rebalancing event for the current runqueue has arrived. The actual load
-balancing workhorse, run_rebalance_domains()->rebalance_domains(), is then run
-in softirq context (SCHED_SOFTIRQ).
-
-The latter function takes two arguments: the current CPU and whether it was idle
-at the time the scheduler_tick() happened and iterates over all sched domains
-our CPU is on, starting from its base domain and going up the ->parent chain.
-While doing that, it checks to see if the current domain has exhausted its
-rebalance interval. If so, it runs load_balance() on that domain. It then checks
-the parent sched_domain (if it exists), and the parent of the parent and so
-forth.
-
-Initially, load_balance() finds the busiest group in the current sched domain.
-If it succeeds, it looks for the busiest runqueue of all the CPUs' runqueues in
-that group. If it manages to find such a runqueue, it locks both our initial
-CPU's runqueue and the newly found busiest one and starts moving tasks from it
-to our runqueue. The exact number of tasks amounts to an imbalance previously
-computed while iterating over this sched domain's groups.
-
-*** Implementing sched domains ***
-The "base" domain will "span" the first level of the hierarchy. In the case
-of SMT, you'll span all siblings of the physical CPU, with each group being
-a single virtual CPU.
-
-In SMP, the parent of the base domain will span all physical CPUs in the
-node. Each group being a single physical CPU. Then with NUMA, the parent
-of the SMP domain will span the entire machine, with each group having the
-cpumask of a node. Or, you could do multi-level NUMA or Opteron, for example,
-might have just one domain covering its one NUMA level.
-
-The implementor should read comments in include/linux/sched.h:
-struct sched_domain fields, SD_FLAG_*, SD_*_INIT to get an idea of
-the specifics and what to tune.
-
-Architectures may retain the regular override the default SD_*_INIT flags
-while using the generic domain builder in kernel/sched/core.c if they wish to
-retain the traditional SMT->SMP->NUMA topology (or some subset of that). This
-can be done by #define'ing ARCH_HASH_SCHED_TUNE.
-
-Alternatively, the architecture may completely override the generic domain
-builder by #define'ing ARCH_HASH_SCHED_DOMAIN, and exporting your
-arch_init_sched_domains function. This function will attach domains to all
-CPUs using cpu_attach_domain.
-
-The sched-domains debugging infrastructure can be enabled by enabling
-CONFIG_SCHED_DEBUG. This enables an error checking parse of the sched domains
-which should catch most possible errors (described above). It also prints out
-the domain structure in a visual format.
diff --git a/Documentation/scheduler/sched-energy.rst b/Documentation/scheduler/sched-energy.rst
new file mode 100644
index 000000000000..fce5858c9082
--- /dev/null
+++ b/Documentation/scheduler/sched-energy.rst
@@ -0,0 +1,430 @@
+=======================
+Energy Aware Scheduling
+=======================
+
+1. Introduction
+---------------
+
+Energy Aware Scheduling (or EAS) gives the scheduler the ability to predict
+the impact of its decisions on the energy consumed by CPUs. EAS relies on an
+Energy Model (EM) of the CPUs to select an energy efficient CPU for each task,
+with a minimal impact on throughput. This document aims at providing an
+introduction on how EAS works, what are the main design decisions behind it, and
+details what is needed to get it to run.
+
+Before going any further, please note that at the time of writing::
+
+   /!\ EAS does not support platforms with symmetric CPU topologies /!\
+
+EAS operates only on heterogeneous CPU topologies (such as Arm big.LITTLE)
+because this is where the potential for saving energy through scheduling is
+the highest.
+
+The actual EM used by EAS is _not_ maintained by the scheduler, but by a
+dedicated framework. For details about this framework and what it provides,
+please refer to its documentation (see Documentation/power/energy-model.txt).
+
+
+2. Background and Terminology
+-----------------------------
+
+To make it clear from the start:
+ - energy = [joule] (resource like a battery on powered devices)
+ - power = energy/time = [joule/second] = [watt]
+
+The goal of EAS is to minimize energy, while still getting the job done. That
+is, we want to maximize::
+
+	performance [inst/s]
+	--------------------
+	    power [W]
+
+which is equivalent to minimizing::
+
+	energy [J]
+	-----------
+	instruction
+
+while still getting 'good' performance. It is essentially an alternative
+optimization objective to the current performance-only objective for the
+scheduler. This alternative considers two objectives: energy-efficiency and
+performance.
+
+The idea behind introducing an EM is to allow the scheduler to evaluate the
+implications of its decisions rather than blindly applying energy-saving
+techniques that may have positive effects only on some platforms. At the same
+time, the EM must be as simple as possible to minimize the scheduler latency
+impact.
+
+In short, EAS changes the way CFS tasks are assigned to CPUs. When it is time
+for the scheduler to decide where a task should run (during wake-up), the EM
+is used to break the tie between several good CPU candidates and pick the one
+that is predicted to yield the best energy consumption without harming the
+system's throughput. The predictions made by EAS rely on specific elements of
+knowledge about the platform's topology, which include the 'capacity' of CPUs,
+and their respective energy costs.
+
+
+3. Topology information
+-----------------------
+
+EAS (as well as the rest of the scheduler) uses the notion of 'capacity' to
+differentiate CPUs with different computing throughput. The 'capacity' of a CPU
+represents the amount of work it can absorb when running at its highest
+frequency compared to the most capable CPU of the system. Capacity values are
+normalized in a 1024 range, and are comparable with the utilization signals of
+tasks and CPUs computed by the Per-Entity Load Tracking (PELT) mechanism. Thanks
+to capacity and utilization values, EAS is able to estimate how big/busy a
+task/CPU is, and to take this into consideration when evaluating performance vs
+energy trade-offs. The capacity of CPUs is provided via arch-specific code
+through the arch_scale_cpu_capacity() callback.
+
+The rest of platform knowledge used by EAS is directly read from the Energy
+Model (EM) framework. The EM of a platform is composed of a power cost table
+per 'performance domain' in the system (see Documentation/power/energy-model.txt
+for futher details about performance domains).
+
+The scheduler manages references to the EM objects in the topology code when the
+scheduling domains are built, or re-built. For each root domain (rd), the
+scheduler maintains a singly linked list of all performance domains intersecting
+the current rd->span. Each node in the list contains a pointer to a struct
+em_perf_domain as provided by the EM framework.
+
+The lists are attached to the root domains in order to cope with exclusive
+cpuset configurations. Since the boundaries of exclusive cpusets do not
+necessarily match those of performance domains, the lists of different root
+domains can contain duplicate elements.
+
+Example 1.
+    Let us consider a platform with 12 CPUs, split in 3 performance domains
+    (pd0, pd4 and pd8), organized as follows::
+
+	          CPUs:   0 1 2 3 4 5 6 7 8 9 10 11
+	          PDs:   |--pd0--|--pd4--|---pd8---|
+	          RDs:   |----rd1----|-----rd2-----|
+
+    Now, consider that userspace decided to split the system with two
+    exclusive cpusets, hence creating two independent root domains, each
+    containing 6 CPUs. The two root domains are denoted rd1 and rd2 in the
+    above figure. Since pd4 intersects with both rd1 and rd2, it will be
+    present in the linked list '->pd' attached to each of them:
+
+       * rd1->pd: pd0 -> pd4
+       * rd2->pd: pd4 -> pd8
+
+    Please note that the scheduler will create two duplicate list nodes for
+    pd4 (one for each list). However, both just hold a pointer to the same
+    shared data structure of the EM framework.
+
+Since the access to these lists can happen concurrently with hotplug and other
+things, they are protected by RCU, like the rest of topology structures
+manipulated by the scheduler.
+
+EAS also maintains a static key (sched_energy_present) which is enabled when at
+least one root domain meets all conditions for EAS to start. Those conditions
+are summarized in Section 6.
+
+
+4. Energy-Aware task placement
+------------------------------
+
+EAS overrides the CFS task wake-up balancing code. It uses the EM of the
+platform and the PELT signals to choose an energy-efficient target CPU during
+wake-up balance. When EAS is enabled, select_task_rq_fair() calls
+find_energy_efficient_cpu() to do the placement decision. This function looks
+for the CPU with the highest spare capacity (CPU capacity - CPU utilization) in
+each performance domain since it is the one which will allow us to keep the
+frequency the lowest. Then, the function checks if placing the task there could
+save energy compared to leaving it on prev_cpu, i.e. the CPU where the task ran
+in its previous activation.
+
+find_energy_efficient_cpu() uses compute_energy() to estimate what will be the
+energy consumed by the system if the waking task was migrated. compute_energy()
+looks at the current utilization landscape of the CPUs and adjusts it to
+'simulate' the task migration. The EM framework provides the em_pd_energy() API
+which computes the expected energy consumption of each performance domain for
+the given utilization landscape.
+
+An example of energy-optimized task placement decision is detailed below.
+
+Example 2.
+    Let us consider a (fake) platform with 2 independent performance domains
+    composed of two CPUs each. CPU0 and CPU1 are little CPUs; CPU2 and CPU3
+    are big.
+
+    The scheduler must decide where to place a task P whose util_avg = 200
+    and prev_cpu = 0.
+
+    The current utilization landscape of the CPUs is depicted on the graph
+    below. CPUs 0-3 have a util_avg of 400, 100, 600 and 500 respectively
+    Each performance domain has three Operating Performance Points (OPPs).
+    The CPU capacity and power cost associated with each OPP is listed in
+    the Energy Model table. The util_avg of P is shown on the figures
+    below as 'PP'::
+
+     CPU util.
+      1024                 - - - - - - -              Energy Model
+                                               +-----------+-------------+
+                                               |  Little   |     Big     |
+       768                 =============       +-----+-----+------+------+
+                                               | Cap | Pwr | Cap  | Pwr  |
+                                               +-----+-----+------+------+
+       512  ===========    - ##- - - - -       | 170 | 50  | 512  | 400  |
+                             ##     ##         | 341 | 150 | 768  | 800  |
+       341  -PP - - - -      ##     ##         | 512 | 300 | 1024 | 1700 |
+             PP              ##     ##         +-----+-----+------+------+
+       170  -## - - - -      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+      Current OPP: =====       Other OPP: - - -     util_avg (100 each): ##
+
+
+    find_energy_efficient_cpu() will first look for the CPUs with the
+    maximum spare capacity in the two performance domains. In this example,
+    CPU1 and CPU3. Then it will estimate the energy of the system if P was
+    placed on either of them, and check if that would save some energy
+    compared to leaving P on CPU0. EAS assumes that OPPs follow utilization
+    (which is coherent with the behaviour of the schedutil CPUFreq
+    governor, see Section 6. for more details on this topic).
+
+    **Case 1. P is migrated to CPU1**::
+
+      1024                 - - - - - - -
+
+                                            Energy calculation:
+       768                 =============     * CPU0: 200 / 341 * 150 = 88
+                                             * CPU1: 300 / 341 * 150 = 131
+                                             * CPU2: 600 / 768 * 800 = 625
+       512  - - - - - -    - ##- - - - -     * CPU3: 500 / 768 * 800 = 520
+                             ##     ##          => total_energy = 1364
+       341  ===========      ##     ##
+                    PP       ##     ##
+       170  -## - - PP-      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+
+    **Case 2. P is migrated to CPU3**::
+
+      1024                 - - - - - - -
+
+                                            Energy calculation:
+       768                 =============     * CPU0: 200 / 341 * 150 = 88
+                                             * CPU1: 100 / 341 * 150 = 43
+                                    PP       * CPU2: 600 / 768 * 800 = 625
+       512  - - - - - -    - ##- - -PP -     * CPU3: 700 / 768 * 800 = 729
+                             ##     ##          => total_energy = 1485
+       341  ===========      ##     ##
+                             ##     ##
+       170  -## - - - -      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+
+    **Case 3. P stays on prev_cpu / CPU 0**::
+
+      1024                 - - - - - - -
+
+                                            Energy calculation:
+       768                 =============     * CPU0: 400 / 512 * 300 = 234
+                                             * CPU1: 100 / 512 * 300 = 58
+                                             * CPU2: 600 / 768 * 800 = 625
+       512  ===========    - ##- - - - -     * CPU3: 500 / 768 * 800 = 520
+                             ##     ##          => total_energy = 1437
+       341  -PP - - - -      ##     ##
+             PP              ##     ##
+       170  -## - - - -      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+
+    From these calculations, the Case 1 has the lowest total energy. So CPU 1
+    is be the best candidate from an energy-efficiency standpoint.
+
+Big CPUs are generally more power hungry than the little ones and are thus used
+mainly when a task doesn't fit the littles. However, little CPUs aren't always
+necessarily more energy-efficient than big CPUs. For some systems, the high OPPs
+of the little CPUs can be less energy-efficient than the lowest OPPs of the
+bigs, for example. So, if the little CPUs happen to have enough utilization at
+a specific point in time, a small task waking up at that moment could be better
+of executing on the big side in order to save energy, even though it would fit
+on the little side.
+
+And even in the case where all OPPs of the big CPUs are less energy-efficient
+than those of the little, using the big CPUs for a small task might still, under
+specific conditions, save energy. Indeed, placing a task on a little CPU can
+result in raising the OPP of the entire performance domain, and that will
+increase the cost of the tasks already running there. If the waking task is
+placed on a big CPU, its own execution cost might be higher than if it was
+running on a little, but it won't impact the other tasks of the little CPUs
+which will keep running at a lower OPP. So, when considering the total energy
+consumed by CPUs, the extra cost of running that one task on a big core can be
+smaller than the cost of raising the OPP on the little CPUs for all the other
+tasks.
+
+The examples above would be nearly impossible to get right in a generic way, and
+for all platforms, without knowing the cost of running at different OPPs on all
+CPUs of the system. Thanks to its EM-based design, EAS should cope with them
+correctly without too many troubles. However, in order to ensure a minimal
+impact on throughput for high-utilization scenarios, EAS also implements another
+mechanism called 'over-utilization'.
+
+
+5. Over-utilization
+-------------------
+
+From a general standpoint, the use-cases where EAS can help the most are those
+involving a light/medium CPU utilization. Whenever long CPU-bound tasks are
+being run, they will require all of the available CPU capacity, and there isn't
+much that can be done by the scheduler to save energy without severly harming
+throughput. In order to avoid hurting performance with EAS, CPUs are flagged as
+'over-utilized' as soon as they are used at more than 80% of their compute
+capacity. As long as no CPUs are over-utilized in a root domain, load balancing
+is disabled and EAS overridess the wake-up balancing code. EAS is likely to load
+the most energy efficient CPUs of the system more than the others if that can be
+done without harming throughput. So, the load-balancer is disabled to prevent
+it from breaking the energy-efficient task placement found by EAS. It is safe to
+do so when the system isn't overutilized since being below the 80% tipping point
+implies that:
+
+    a. there is some idle time on all CPUs, so the utilization signals used by
+       EAS are likely to accurately represent the 'size' of the various tasks
+       in the system;
+    b. all tasks should already be provided with enough CPU capacity,
+       regardless of their nice values;
+    c. since there is spare capacity all tasks must be blocking/sleeping
+       regularly and balancing at wake-up is sufficient.
+
+As soon as one CPU goes above the 80% tipping point, at least one of the three
+assumptions above becomes incorrect. In this scenario, the 'overutilized' flag
+is raised for the entire root domain, EAS is disabled, and the load-balancer is
+re-enabled. By doing so, the scheduler falls back onto load-based algorithms for
+wake-up and load balance under CPU-bound conditions. This provides a better
+respect of the nice values of tasks.
+
+Since the notion of overutilization largely relies on detecting whether or not
+there is some idle time in the system, the CPU capacity 'stolen' by higher
+(than CFS) scheduling classes (as well as IRQ) must be taken into account. As
+such, the detection of overutilization accounts for the capacity used not only
+by CFS tasks, but also by the other scheduling classes and IRQ.
+
+
+6. Dependencies and requirements for EAS
+----------------------------------------
+
+Energy Aware Scheduling depends on the CPUs of the system having specific
+hardware properties and on other features of the kernel being enabled. This
+section lists these dependencies and provides hints as to how they can be met.
+
+
+6.1 - Asymmetric CPU topology
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+As mentioned in the introduction, EAS is only supported on platforms with
+asymmetric CPU topologies for now. This requirement is checked at run-time by
+looking for the presence of the SD_ASYM_CPUCAPACITY flag when the scheduling
+domains are built.
+
+The flag is set/cleared automatically by the scheduler topology code whenever
+there are CPUs with different capacities in a root domain. The capacities of
+CPUs are provided by arch-specific code through the arch_scale_cpu_capacity()
+callback. As an example, arm and arm64 share an implementation of this callback
+which uses a combination of CPUFreq data and device-tree bindings to compute the
+capacity of CPUs (see drivers/base/arch_topology.c for more details).
+
+So, in order to use EAS on your platform your architecture must implement the
+arch_scale_cpu_capacity() callback, and some of the CPUs must have a lower
+capacity than others.
+
+Please note that EAS is not fundamentally incompatible with SMP, but no
+significant savings on SMP platforms have been observed yet. This restriction
+could be amended in the future if proven otherwise.
+
+
+6.2 - Energy Model presence
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+EAS uses the EM of a platform to estimate the impact of scheduling decisions on
+energy. So, your platform must provide power cost tables to the EM framework in
+order to make EAS start. To do so, please refer to documentation of the
+independent EM framework in Documentation/power/energy-model.txt.
+
+Please also note that the scheduling domains need to be re-built after the
+EM has been registered in order to start EAS.
+
+
+6.3 - Energy Model complexity
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The task wake-up path is very latency-sensitive. When the EM of a platform is
+too complex (too many CPUs, too many performance domains, too many performance
+states, ...), the cost of using it in the wake-up path can become prohibitive.
+The energy-aware wake-up algorithm has a complexity of:
+
+	C = Nd * (Nc + Ns)
+
+with: Nd the number of performance domains; Nc the number of CPUs; and Ns the
+total number of OPPs (ex: for two perf. domains with 4 OPPs each, Ns = 8).
+
+A complexity check is performed at the root domain level, when scheduling
+domains are built. EAS will not start on a root domain if its C happens to be
+higher than the completely arbitrary EM_MAX_COMPLEXITY threshold (2048 at the
+time of writing).
+
+If you really want to use EAS but the complexity of your platform's Energy
+Model is too high to be used with a single root domain, you're left with only
+two possible options:
+
+    1. split your system into separate, smaller, root domains using exclusive
+       cpusets and enable EAS locally on each of them. This option has the
+       benefit to work out of the box but the drawback of preventing load
+       balance between root domains, which can result in an unbalanced system
+       overall;
+    2. submit patches to reduce the complexity of the EAS wake-up algorithm,
+       hence enabling it to cope with larger EMs in reasonable time.
+
+
+6.4 - Schedutil governor
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+EAS tries to predict at which OPP will the CPUs be running in the close future
+in order to estimate their energy consumption. To do so, it is assumed that OPPs
+of CPUs follow their utilization.
+
+Although it is very difficult to provide hard guarantees regarding the accuracy
+of this assumption in practice (because the hardware might not do what it is
+told to do, for example), schedutil as opposed to other CPUFreq governors at
+least _requests_ frequencies calculated using the utilization signals.
+Consequently, the only sane governor to use together with EAS is schedutil,
+because it is the only one providing some degree of consistency between
+frequency requests and energy predictions.
+
+Using EAS with any other governor than schedutil is not supported.
+
+
+6.5 Scale-invariant utilization signals
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In order to make accurate prediction across CPUs and for all performance
+states, EAS needs frequency-invariant and CPU-invariant PELT signals. These can
+be obtained using the architecture-defined arch_scale{cpu,freq}_capacity()
+callbacks.
+
+Using EAS on a platform that doesn't implement these two callbacks is not
+supported.
+
+
+6.6 Multithreading (SMT)
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+EAS in its current form is SMT unaware and is not able to leverage
+multithreaded hardware to save energy. EAS considers threads as independent
+CPUs, which can actually be counter-productive for both performance and energy.
+
+EAS on SMT is not supported.
diff --git a/Documentation/scheduler/sched-energy.txt b/Documentation/scheduler/sched-energy.txt
deleted file mode 100644
index 197d81f4b836..000000000000
--- a/Documentation/scheduler/sched-energy.txt
+++ /dev/null
@@ -1,425 +0,0 @@
-			   =======================
-			   Energy Aware Scheduling
-			   =======================
-
-1. Introduction
----------------
-
-Energy Aware Scheduling (or EAS) gives the scheduler the ability to predict
-the impact of its decisions on the energy consumed by CPUs. EAS relies on an
-Energy Model (EM) of the CPUs to select an energy efficient CPU for each task,
-with a minimal impact on throughput. This document aims at providing an
-introduction on how EAS works, what are the main design decisions behind it, and
-details what is needed to get it to run.
-
-Before going any further, please note that at the time of writing:
-
-   /!\ EAS does not support platforms with symmetric CPU topologies /!\
-
-EAS operates only on heterogeneous CPU topologies (such as Arm big.LITTLE)
-because this is where the potential for saving energy through scheduling is
-the highest.
-
-The actual EM used by EAS is _not_ maintained by the scheduler, but by a
-dedicated framework. For details about this framework and what it provides,
-please refer to its documentation (see Documentation/power/energy-model.txt).
-
-
-2. Background and Terminology
------------------------------
-
-To make it clear from the start:
- - energy = [joule] (resource like a battery on powered devices)
- - power = energy/time = [joule/second] = [watt]
-
-The goal of EAS is to minimize energy, while still getting the job done. That
-is, we want to maximize:
-
-	performance [inst/s]
-	--------------------
-	    power [W]
-
-which is equivalent to minimizing:
-
-	energy [J]
-	-----------
-	instruction
-
-while still getting 'good' performance. It is essentially an alternative
-optimization objective to the current performance-only objective for the
-scheduler. This alternative considers two objectives: energy-efficiency and
-performance.
-
-The idea behind introducing an EM is to allow the scheduler to evaluate the
-implications of its decisions rather than blindly applying energy-saving
-techniques that may have positive effects only on some platforms. At the same
-time, the EM must be as simple as possible to minimize the scheduler latency
-impact.
-
-In short, EAS changes the way CFS tasks are assigned to CPUs. When it is time
-for the scheduler to decide where a task should run (during wake-up), the EM
-is used to break the tie between several good CPU candidates and pick the one
-that is predicted to yield the best energy consumption without harming the
-system's throughput. The predictions made by EAS rely on specific elements of
-knowledge about the platform's topology, which include the 'capacity' of CPUs,
-and their respective energy costs.
-
-
-3. Topology information
------------------------
-
-EAS (as well as the rest of the scheduler) uses the notion of 'capacity' to
-differentiate CPUs with different computing throughput. The 'capacity' of a CPU
-represents the amount of work it can absorb when running at its highest
-frequency compared to the most capable CPU of the system. Capacity values are
-normalized in a 1024 range, and are comparable with the utilization signals of
-tasks and CPUs computed by the Per-Entity Load Tracking (PELT) mechanism. Thanks
-to capacity and utilization values, EAS is able to estimate how big/busy a
-task/CPU is, and to take this into consideration when evaluating performance vs
-energy trade-offs. The capacity of CPUs is provided via arch-specific code
-through the arch_scale_cpu_capacity() callback.
-
-The rest of platform knowledge used by EAS is directly read from the Energy
-Model (EM) framework. The EM of a platform is composed of a power cost table
-per 'performance domain' in the system (see Documentation/power/energy-model.txt
-for futher details about performance domains).
-
-The scheduler manages references to the EM objects in the topology code when the
-scheduling domains are built, or re-built. For each root domain (rd), the
-scheduler maintains a singly linked list of all performance domains intersecting
-the current rd->span. Each node in the list contains a pointer to a struct
-em_perf_domain as provided by the EM framework.
-
-The lists are attached to the root domains in order to cope with exclusive
-cpuset configurations. Since the boundaries of exclusive cpusets do not
-necessarily match those of performance domains, the lists of different root
-domains can contain duplicate elements.
-
-Example 1.
-    Let us consider a platform with 12 CPUs, split in 3 performance domains
-    (pd0, pd4 and pd8), organized as follows:
-
-	          CPUs:   0 1 2 3 4 5 6 7 8 9 10 11
-	          PDs:   |--pd0--|--pd4--|---pd8---|
-	          RDs:   |----rd1----|-----rd2-----|
-
-    Now, consider that userspace decided to split the system with two
-    exclusive cpusets, hence creating two independent root domains, each
-    containing 6 CPUs. The two root domains are denoted rd1 and rd2 in the
-    above figure. Since pd4 intersects with both rd1 and rd2, it will be
-    present in the linked list '->pd' attached to each of them:
-       * rd1->pd: pd0 -> pd4
-       * rd2->pd: pd4 -> pd8
-
-    Please note that the scheduler will create two duplicate list nodes for
-    pd4 (one for each list). However, both just hold a pointer to the same
-    shared data structure of the EM framework.
-
-Since the access to these lists can happen concurrently with hotplug and other
-things, they are protected by RCU, like the rest of topology structures
-manipulated by the scheduler.
-
-EAS also maintains a static key (sched_energy_present) which is enabled when at
-least one root domain meets all conditions for EAS to start. Those conditions
-are summarized in Section 6.
-
-
-4. Energy-Aware task placement
-------------------------------
-
-EAS overrides the CFS task wake-up balancing code. It uses the EM of the
-platform and the PELT signals to choose an energy-efficient target CPU during
-wake-up balance. When EAS is enabled, select_task_rq_fair() calls
-find_energy_efficient_cpu() to do the placement decision. This function looks
-for the CPU with the highest spare capacity (CPU capacity - CPU utilization) in
-each performance domain since it is the one which will allow us to keep the
-frequency the lowest. Then, the function checks if placing the task there could
-save energy compared to leaving it on prev_cpu, i.e. the CPU where the task ran
-in its previous activation.
-
-find_energy_efficient_cpu() uses compute_energy() to estimate what will be the
-energy consumed by the system if the waking task was migrated. compute_energy()
-looks at the current utilization landscape of the CPUs and adjusts it to
-'simulate' the task migration. The EM framework provides the em_pd_energy() API
-which computes the expected energy consumption of each performance domain for
-the given utilization landscape.
-
-An example of energy-optimized task placement decision is detailed below.
-
-Example 2.
-    Let us consider a (fake) platform with 2 independent performance domains
-    composed of two CPUs each. CPU0 and CPU1 are little CPUs; CPU2 and CPU3
-    are big.
-
-    The scheduler must decide where to place a task P whose util_avg = 200
-    and prev_cpu = 0.
-
-    The current utilization landscape of the CPUs is depicted on the graph
-    below. CPUs 0-3 have a util_avg of 400, 100, 600 and 500 respectively
-    Each performance domain has three Operating Performance Points (OPPs).
-    The CPU capacity and power cost associated with each OPP is listed in
-    the Energy Model table. The util_avg of P is shown on the figures
-    below as 'PP'.
-
-    CPU util.
-      1024                 - - - - - - -              Energy Model
-                                               +-----------+-------------+
-                                               |  Little   |     Big     |
-       768                 =============       +-----+-----+------+------+
-                                               | Cap | Pwr | Cap  | Pwr  |
-                                               +-----+-----+------+------+
-       512  ===========    - ##- - - - -       | 170 | 50  | 512  | 400  |
-                             ##     ##         | 341 | 150 | 768  | 800  |
-       341  -PP - - - -      ##     ##         | 512 | 300 | 1024 | 1700 |
-             PP              ##     ##         +-----+-----+------+------+
-       170  -## - - - -      ##     ##
-             ##     ##       ##     ##
-           ------------    -------------
-            CPU0   CPU1     CPU2   CPU3
-
-      Current OPP: =====       Other OPP: - - -     util_avg (100 each): ##
-
-
-    find_energy_efficient_cpu() will first look for the CPUs with the
-    maximum spare capacity in the two performance domains. In this example,
-    CPU1 and CPU3. Then it will estimate the energy of the system if P was
-    placed on either of them, and check if that would save some energy
-    compared to leaving P on CPU0. EAS assumes that OPPs follow utilization
-    (which is coherent with the behaviour of the schedutil CPUFreq
-    governor, see Section 6. for more details on this topic).
-
-    Case 1. P is migrated to CPU1
-    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-      1024                 - - - - - - -
-
-                                            Energy calculation:
-       768                 =============     * CPU0: 200 / 341 * 150 = 88
-                                             * CPU1: 300 / 341 * 150 = 131
-                                             * CPU2: 600 / 768 * 800 = 625
-       512  - - - - - -    - ##- - - - -     * CPU3: 500 / 768 * 800 = 520
-                             ##     ##          => total_energy = 1364
-       341  ===========      ##     ##
-                    PP       ##     ##
-       170  -## - - PP-      ##     ##
-             ##     ##       ##     ##
-           ------------    -------------
-            CPU0   CPU1     CPU2   CPU3
-
-
-    Case 2. P is migrated to CPU3
-    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-      1024                 - - - - - - -
-
-                                            Energy calculation:
-       768                 =============     * CPU0: 200 / 341 * 150 = 88
-                                             * CPU1: 100 / 341 * 150 = 43
-                                    PP       * CPU2: 600 / 768 * 800 = 625
-       512  - - - - - -    - ##- - -PP -     * CPU3: 700 / 768 * 800 = 729
-                             ##     ##          => total_energy = 1485
-       341  ===========      ##     ##
-                             ##     ##
-       170  -## - - - -      ##     ##
-             ##     ##       ##     ##
-           ------------    -------------
-            CPU0   CPU1     CPU2   CPU3
-
-
-    Case 3. P stays on prev_cpu / CPU 0
-    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-      1024                 - - - - - - -
-
-                                            Energy calculation:
-       768                 =============     * CPU0: 400 / 512 * 300 = 234
-                                             * CPU1: 100 / 512 * 300 = 58
-                                             * CPU2: 600 / 768 * 800 = 625
-       512  ===========    - ##- - - - -     * CPU3: 500 / 768 * 800 = 520
-                             ##     ##          => total_energy = 1437
-       341  -PP - - - -      ##     ##
-             PP              ##     ##
-       170  -## - - - -      ##     ##
-             ##     ##       ##     ##
-           ------------    -------------
-            CPU0   CPU1     CPU2   CPU3
-
-
-    From these calculations, the Case 1 has the lowest total energy. So CPU 1
-    is be the best candidate from an energy-efficiency standpoint.
-
-Big CPUs are generally more power hungry than the little ones and are thus used
-mainly when a task doesn't fit the littles. However, little CPUs aren't always
-necessarily more energy-efficient than big CPUs. For some systems, the high OPPs
-of the little CPUs can be less energy-efficient than the lowest OPPs of the
-bigs, for example. So, if the little CPUs happen to have enough utilization at
-a specific point in time, a small task waking up at that moment could be better
-of executing on the big side in order to save energy, even though it would fit
-on the little side.
-
-And even in the case where all OPPs of the big CPUs are less energy-efficient
-than those of the little, using the big CPUs for a small task might still, under
-specific conditions, save energy. Indeed, placing a task on a little CPU can
-result in raising the OPP of the entire performance domain, and that will
-increase the cost of the tasks already running there. If the waking task is
-placed on a big CPU, its own execution cost might be higher than if it was
-running on a little, but it won't impact the other tasks of the little CPUs
-which will keep running at a lower OPP. So, when considering the total energy
-consumed by CPUs, the extra cost of running that one task on a big core can be
-smaller than the cost of raising the OPP on the little CPUs for all the other
-tasks.
-
-The examples above would be nearly impossible to get right in a generic way, and
-for all platforms, without knowing the cost of running at different OPPs on all
-CPUs of the system. Thanks to its EM-based design, EAS should cope with them
-correctly without too many troubles. However, in order to ensure a minimal
-impact on throughput for high-utilization scenarios, EAS also implements another
-mechanism called 'over-utilization'.
-
-
-5. Over-utilization
--------------------
-
-From a general standpoint, the use-cases where EAS can help the most are those
-involving a light/medium CPU utilization. Whenever long CPU-bound tasks are
-being run, they will require all of the available CPU capacity, and there isn't
-much that can be done by the scheduler to save energy without severly harming
-throughput. In order to avoid hurting performance with EAS, CPUs are flagged as
-'over-utilized' as soon as they are used at more than 80% of their compute
-capacity. As long as no CPUs are over-utilized in a root domain, load balancing
-is disabled and EAS overridess the wake-up balancing code. EAS is likely to load
-the most energy efficient CPUs of the system more than the others if that can be
-done without harming throughput. So, the load-balancer is disabled to prevent
-it from breaking the energy-efficient task placement found by EAS. It is safe to
-do so when the system isn't overutilized since being below the 80% tipping point
-implies that:
-
-    a. there is some idle time on all CPUs, so the utilization signals used by
-       EAS are likely to accurately represent the 'size' of the various tasks
-       in the system;
-    b. all tasks should already be provided with enough CPU capacity,
-       regardless of their nice values;
-    c. since there is spare capacity all tasks must be blocking/sleeping
-       regularly and balancing at wake-up is sufficient.
-
-As soon as one CPU goes above the 80% tipping point, at least one of the three
-assumptions above becomes incorrect. In this scenario, the 'overutilized' flag
-is raised for the entire root domain, EAS is disabled, and the load-balancer is
-re-enabled. By doing so, the scheduler falls back onto load-based algorithms for
-wake-up and load balance under CPU-bound conditions. This provides a better
-respect of the nice values of tasks.
-
-Since the notion of overutilization largely relies on detecting whether or not
-there is some idle time in the system, the CPU capacity 'stolen' by higher
-(than CFS) scheduling classes (as well as IRQ) must be taken into account. As
-such, the detection of overutilization accounts for the capacity used not only
-by CFS tasks, but also by the other scheduling classes and IRQ.
-
-
-6. Dependencies and requirements for EAS
-----------------------------------------
-
-Energy Aware Scheduling depends on the CPUs of the system having specific
-hardware properties and on other features of the kernel being enabled. This
-section lists these dependencies and provides hints as to how they can be met.
-
-
-  6.1 - Asymmetric CPU topology
-
-As mentioned in the introduction, EAS is only supported on platforms with
-asymmetric CPU topologies for now. This requirement is checked at run-time by
-looking for the presence of the SD_ASYM_CPUCAPACITY flag when the scheduling
-domains are built.
-
-The flag is set/cleared automatically by the scheduler topology code whenever
-there are CPUs with different capacities in a root domain. The capacities of
-CPUs are provided by arch-specific code through the arch_scale_cpu_capacity()
-callback. As an example, arm and arm64 share an implementation of this callback
-which uses a combination of CPUFreq data and device-tree bindings to compute the
-capacity of CPUs (see drivers/base/arch_topology.c for more details).
-
-So, in order to use EAS on your platform your architecture must implement the
-arch_scale_cpu_capacity() callback, and some of the CPUs must have a lower
-capacity than others.
-
-Please note that EAS is not fundamentally incompatible with SMP, but no
-significant savings on SMP platforms have been observed yet. This restriction
-could be amended in the future if proven otherwise.
-
-
-  6.2 - Energy Model presence
-
-EAS uses the EM of a platform to estimate the impact of scheduling decisions on
-energy. So, your platform must provide power cost tables to the EM framework in
-order to make EAS start. To do so, please refer to documentation of the
-independent EM framework in Documentation/power/energy-model.txt.
-
-Please also note that the scheduling domains need to be re-built after the
-EM has been registered in order to start EAS.
-
-
-  6.3 - Energy Model complexity
-
-The task wake-up path is very latency-sensitive. When the EM of a platform is
-too complex (too many CPUs, too many performance domains, too many performance
-states, ...), the cost of using it in the wake-up path can become prohibitive.
-The energy-aware wake-up algorithm has a complexity of:
-
-	C = Nd * (Nc + Ns)
-
-with: Nd the number of performance domains; Nc the number of CPUs; and Ns the
-total number of OPPs (ex: for two perf. domains with 4 OPPs each, Ns = 8).
-
-A complexity check is performed at the root domain level, when scheduling
-domains are built. EAS will not start on a root domain if its C happens to be
-higher than the completely arbitrary EM_MAX_COMPLEXITY threshold (2048 at the
-time of writing).
-
-If you really want to use EAS but the complexity of your platform's Energy
-Model is too high to be used with a single root domain, you're left with only
-two possible options:
-
-    1. split your system into separate, smaller, root domains using exclusive
-       cpusets and enable EAS locally on each of them. This option has the
-       benefit to work out of the box but the drawback of preventing load
-       balance between root domains, which can result in an unbalanced system
-       overall;
-    2. submit patches to reduce the complexity of the EAS wake-up algorithm,
-       hence enabling it to cope with larger EMs in reasonable time.
-
-
-  6.4 - Schedutil governor
-
-EAS tries to predict at which OPP will the CPUs be running in the close future
-in order to estimate their energy consumption. To do so, it is assumed that OPPs
-of CPUs follow their utilization.
-
-Although it is very difficult to provide hard guarantees regarding the accuracy
-of this assumption in practice (because the hardware might not do what it is
-told to do, for example), schedutil as opposed to other CPUFreq governors at
-least _requests_ frequencies calculated using the utilization signals.
-Consequently, the only sane governor to use together with EAS is schedutil,
-because it is the only one providing some degree of consistency between
-frequency requests and energy predictions.
-
-Using EAS with any other governor than schedutil is not supported.
-
-
-  6.5 Scale-invariant utilization signals
-
-In order to make accurate prediction across CPUs and for all performance
-states, EAS needs frequency-invariant and CPU-invariant PELT signals. These can
-be obtained using the architecture-defined arch_scale{cpu,freq}_capacity()
-callbacks.
-
-Using EAS on a platform that doesn't implement these two callbacks is not
-supported.
-
-
-  6.6 Multithreading (SMT)
-
-EAS in its current form is SMT unaware and is not able to leverage
-multithreaded hardware to save energy. EAS considers threads as independent
-CPUs, which can actually be counter-productive for both performance and energy.
-
-EAS on SMT is not supported.
diff --git a/Documentation/scheduler/sched-nice-design.rst b/Documentation/scheduler/sched-nice-design.rst
new file mode 100644
index 000000000000..0571f1b47e64
--- /dev/null
+++ b/Documentation/scheduler/sched-nice-design.rst
@@ -0,0 +1,112 @@
+=====================
+Scheduler Nice Design
+=====================
+
+This document explains the thinking about the revamped and streamlined
+nice-levels implementation in the new Linux scheduler.
+
+Nice levels were always pretty weak under Linux and people continuously
+pestered us to make nice +19 tasks use up much less CPU time.
+
+Unfortunately that was not that easy to implement under the old
+scheduler, (otherwise we'd have done it long ago) because nice level
+support was historically coupled to timeslice length, and timeslice
+units were driven by the HZ tick, so the smallest timeslice was 1/HZ.
+
+In the O(1) scheduler (in 2003) we changed negative nice levels to be
+much stronger than they were before in 2.4 (and people were happy about
+that change), and we also intentionally calibrated the linear timeslice
+rule so that nice +19 level would be _exactly_ 1 jiffy. To better
+understand it, the timeslice graph went like this (cheesy ASCII art
+alert!)::
+
+
+                   A
+             \     | [timeslice length]
+              \    |
+               \   |
+                \  |
+                 \ |
+                  \|___100msecs
+                   |^ . _
+                   |      ^ . _
+                   |            ^ . _
+ -*----------------------------------*-----> [nice level]
+ -20               |                +19
+                   |
+                   |
+
+So that if someone wanted to really renice tasks, +19 would give a much
+bigger hit than the normal linear rule would do. (The solution of
+changing the ABI to extend priorities was discarded early on.)
+
+This approach worked to some degree for some time, but later on with
+HZ=1000 it caused 1 jiffy to be 1 msec, which meant 0.1% CPU usage which
+we felt to be a bit excessive. Excessive _not_ because it's too small of
+a CPU utilization, but because it causes too frequent (once per
+millisec) rescheduling. (and would thus trash the cache, etc. Remember,
+this was long ago when hardware was weaker and caches were smaller, and
+people were running number crunching apps at nice +19.)
+
+So for HZ=1000 we changed nice +19 to 5msecs, because that felt like the
+right minimal granularity - and this translates to 5% CPU utilization.
+But the fundamental HZ-sensitive property for nice+19 still remained,
+and we never got a single complaint about nice +19 being too _weak_ in
+terms of CPU utilization, we only got complaints about it (still) being
+too _strong_ :-)
+
+To sum it up: we always wanted to make nice levels more consistent, but
+within the constraints of HZ and jiffies and their nasty design level
+coupling to timeslices and granularity it was not really viable.
+
+The second (less frequent but still periodically occurring) complaint
+about Linux's nice level support was its assymetry around the origo
+(which you can see demonstrated in the picture above), or more
+accurately: the fact that nice level behavior depended on the _absolute_
+nice level as well, while the nice API itself is fundamentally
+"relative":
+
+   int nice(int inc);
+
+   asmlinkage long sys_nice(int increment)
+
+(the first one is the glibc API, the second one is the syscall API.)
+Note that the 'inc' is relative to the current nice level. Tools like
+bash's "nice" command mirror this relative API.
+
+With the old scheduler, if you for example started a niced task with +1
+and another task with +2, the CPU split between the two tasks would
+depend on the nice level of the parent shell - if it was at nice -10 the
+CPU split was different than if it was at +5 or +10.
+
+A third complaint against Linux's nice level support was that negative
+nice levels were not 'punchy enough', so lots of people had to resort to
+run audio (and other multimedia) apps under RT priorities such as
+SCHED_FIFO. But this caused other problems: SCHED_FIFO is not starvation
+proof, and a buggy SCHED_FIFO app can also lock up the system for good.
+
+The new scheduler in v2.6.23 addresses all three types of complaints:
+
+To address the first complaint (of nice levels being not "punchy"
+enough), the scheduler was decoupled from 'time slice' and HZ concepts
+(and granularity was made a separate concept from nice levels) and thus
+it was possible to implement better and more consistent nice +19
+support: with the new scheduler nice +19 tasks get a HZ-independent
+1.5%, instead of the variable 3%-5%-9% range they got in the old
+scheduler.
+
+To address the second complaint (of nice levels not being consistent),
+the new scheduler makes nice(1) have the same CPU utilization effect on
+tasks, regardless of their absolute nice levels. So on the new
+scheduler, running a nice +10 and a nice 11 task has the same CPU
+utilization "split" between them as running a nice -5 and a nice -4
+task. (one will get 55% of the CPU, the other 45%.) That is why nice
+levels were changed to be "multiplicative" (or exponential) - that way
+it does not matter which nice level you start out from, the 'relative
+result' will always be the same.
+
+The third complaint (of negative nice levels not being "punchy" enough
+and forcing audio apps to run under the more dangerous SCHED_FIFO
+scheduling policy) is addressed by the new scheduler almost
+automatically: stronger negative nice levels are an automatic
+side-effect of the recalibrated dynamic range of nice levels.
diff --git a/Documentation/scheduler/sched-nice-design.txt b/Documentation/scheduler/sched-nice-design.txt
deleted file mode 100644
index 3ac1e46d5365..000000000000
--- a/Documentation/scheduler/sched-nice-design.txt
+++ /dev/null
@@ -1,108 +0,0 @@
-This document explains the thinking about the revamped and streamlined
-nice-levels implementation in the new Linux scheduler.
-
-Nice levels were always pretty weak under Linux and people continuously
-pestered us to make nice +19 tasks use up much less CPU time.
-
-Unfortunately that was not that easy to implement under the old
-scheduler, (otherwise we'd have done it long ago) because nice level
-support was historically coupled to timeslice length, and timeslice
-units were driven by the HZ tick, so the smallest timeslice was 1/HZ.
-
-In the O(1) scheduler (in 2003) we changed negative nice levels to be
-much stronger than they were before in 2.4 (and people were happy about
-that change), and we also intentionally calibrated the linear timeslice
-rule so that nice +19 level would be _exactly_ 1 jiffy. To better
-understand it, the timeslice graph went like this (cheesy ASCII art
-alert!):
-
-
-                   A
-             \     | [timeslice length]
-              \    |
-               \   |
-                \  |
-                 \ |
-                  \|___100msecs
-                   |^ . _
-                   |      ^ . _
-                   |            ^ . _
- -*----------------------------------*-----> [nice level]
- -20               |                +19
-                   |
-                   |
-
-So that if someone wanted to really renice tasks, +19 would give a much
-bigger hit than the normal linear rule would do. (The solution of
-changing the ABI to extend priorities was discarded early on.)
-
-This approach worked to some degree for some time, but later on with
-HZ=1000 it caused 1 jiffy to be 1 msec, which meant 0.1% CPU usage which
-we felt to be a bit excessive. Excessive _not_ because it's too small of
-a CPU utilization, but because it causes too frequent (once per
-millisec) rescheduling. (and would thus trash the cache, etc. Remember,
-this was long ago when hardware was weaker and caches were smaller, and
-people were running number crunching apps at nice +19.)
-
-So for HZ=1000 we changed nice +19 to 5msecs, because that felt like the
-right minimal granularity - and this translates to 5% CPU utilization.
-But the fundamental HZ-sensitive property for nice+19 still remained,
-and we never got a single complaint about nice +19 being too _weak_ in
-terms of CPU utilization, we only got complaints about it (still) being
-too _strong_ :-)
-
-To sum it up: we always wanted to make nice levels more consistent, but
-within the constraints of HZ and jiffies and their nasty design level
-coupling to timeslices and granularity it was not really viable.
-
-The second (less frequent but still periodically occurring) complaint
-about Linux's nice level support was its assymetry around the origo
-(which you can see demonstrated in the picture above), or more
-accurately: the fact that nice level behavior depended on the _absolute_
-nice level as well, while the nice API itself is fundamentally
-"relative":
-
-   int nice(int inc);
-
-   asmlinkage long sys_nice(int increment)
-
-(the first one is the glibc API, the second one is the syscall API.)
-Note that the 'inc' is relative to the current nice level. Tools like
-bash's "nice" command mirror this relative API.
-
-With the old scheduler, if you for example started a niced task with +1
-and another task with +2, the CPU split between the two tasks would
-depend on the nice level of the parent shell - if it was at nice -10 the
-CPU split was different than if it was at +5 or +10.
-
-A third complaint against Linux's nice level support was that negative
-nice levels were not 'punchy enough', so lots of people had to resort to
-run audio (and other multimedia) apps under RT priorities such as
-SCHED_FIFO. But this caused other problems: SCHED_FIFO is not starvation
-proof, and a buggy SCHED_FIFO app can also lock up the system for good.
-
-The new scheduler in v2.6.23 addresses all three types of complaints:
-
-To address the first complaint (of nice levels being not "punchy"
-enough), the scheduler was decoupled from 'time slice' and HZ concepts
-(and granularity was made a separate concept from nice levels) and thus
-it was possible to implement better and more consistent nice +19
-support: with the new scheduler nice +19 tasks get a HZ-independent
-1.5%, instead of the variable 3%-5%-9% range they got in the old
-scheduler.
-
-To address the second complaint (of nice levels not being consistent),
-the new scheduler makes nice(1) have the same CPU utilization effect on
-tasks, regardless of their absolute nice levels. So on the new
-scheduler, running a nice +10 and a nice 11 task has the same CPU
-utilization "split" between them as running a nice -5 and a nice -4
-task. (one will get 55% of the CPU, the other 45%.) That is why nice
-levels were changed to be "multiplicative" (or exponential) - that way
-it does not matter which nice level you start out from, the 'relative
-result' will always be the same.
-
-The third complaint (of negative nice levels not being "punchy" enough
-and forcing audio apps to run under the more dangerous SCHED_FIFO
-scheduling policy) is addressed by the new scheduler almost
-automatically: stronger negative nice levels are an automatic
-side-effect of the recalibrated dynamic range of nice levels.
diff --git a/Documentation/scheduler/sched-rt-group.rst b/Documentation/scheduler/sched-rt-group.rst
new file mode 100644
index 000000000000..79b30a21c51a
--- /dev/null
+++ b/Documentation/scheduler/sched-rt-group.rst
@@ -0,0 +1,185 @@
+==========================
+Real-Time group scheduling
+==========================
+
+.. CONTENTS
+
+   0. WARNING
+   1. Overview
+     1.1 The problem
+     1.2 The solution
+   2. The interface
+     2.1 System-wide settings
+     2.2 Default behaviour
+     2.3 Basis for grouping tasks
+   3. Future plans
+
+
+0. WARNING
+==========
+
+ Fiddling with these settings can result in an unstable system, the knobs are
+ root only and assumes root knows what he is doing.
+
+Most notable:
+
+ * very small values in sched_rt_period_us can result in an unstable
+   system when the period is smaller than either the available hrtimer
+   resolution, or the time it takes to handle the budget refresh itself.
+
+ * very small values in sched_rt_runtime_us can result in an unstable
+   system when the runtime is so small the system has difficulty making
+   forward progress (NOTE: the migration thread and kstopmachine both
+   are real-time processes).
+
+1. Overview
+===========
+
+
+1.1 The problem
+---------------
+
+Realtime scheduling is all about determinism, a group has to be able to rely on
+the amount of bandwidth (eg. CPU time) being constant. In order to schedule
+multiple groups of realtime tasks, each group must be assigned a fixed portion
+of the CPU time available.  Without a minimum guarantee a realtime group can
+obviously fall short. A fuzzy upper limit is of no use since it cannot be
+relied upon. Which leaves us with just the single fixed portion.
+
+1.2 The solution
+----------------
+
+CPU time is divided by means of specifying how much time can be spent running
+in a given period. We allocate this "run time" for each realtime group which
+the other realtime groups will not be permitted to use.
+
+Any time not allocated to a realtime group will be used to run normal priority
+tasks (SCHED_OTHER). Any allocated run time not used will also be picked up by
+SCHED_OTHER.
+
+Let's consider an example: a frame fixed realtime renderer must deliver 25
+frames a second, which yields a period of 0.04s per frame. Now say it will also
+have to play some music and respond to input, leaving it with around 80% CPU
+time dedicated for the graphics. We can then give this group a run time of 0.8
+* 0.04s = 0.032s.
+
+This way the graphics group will have a 0.04s period with a 0.032s run time
+limit. Now if the audio thread needs to refill the DMA buffer every 0.005s, but
+needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s =
+0.00015s. So this group can be scheduled with a period of 0.005s and a run time
+of 0.00015s.
+
+The remaining CPU time will be used for user input and other tasks. Because
+realtime tasks have explicitly allocated the CPU time they need to perform
+their tasks, buffer underruns in the graphics or audio can be eliminated.
+
+NOTE: the above example is not fully implemented yet. We still
+lack an EDF scheduler to make non-uniform periods usable.
+
+
+2. The Interface
+================
+
+
+2.1 System wide settings
+------------------------
+
+The system wide settings are configured under the /proc virtual file system:
+
+/proc/sys/kernel/sched_rt_period_us:
+  The scheduling period that is equivalent to 100% CPU bandwidth
+
+/proc/sys/kernel/sched_rt_runtime_us:
+  A global limit on how much time realtime scheduling may use.  Even without
+  CONFIG_RT_GROUP_SCHED enabled, this will limit time reserved to realtime
+  processes. With CONFIG_RT_GROUP_SCHED it signifies the total bandwidth
+  available to all realtime groups.
+
+  * Time is specified in us because the interface is s32. This gives an
+    operating range from 1us to about 35 minutes.
+  * sched_rt_period_us takes values from 1 to INT_MAX.
+  * sched_rt_runtime_us takes values from -1 to (INT_MAX - 1).
+  * A run time of -1 specifies runtime == period, ie. no limit.
+
+
+2.2 Default behaviour
+---------------------
+
+The default values for sched_rt_period_us (1000000 or 1s) and
+sched_rt_runtime_us (950000 or 0.95s).  This gives 0.05s to be used by
+SCHED_OTHER (non-RT tasks). These defaults were chosen so that a run-away
+realtime tasks will not lock up the machine but leave a little time to recover
+it.  By setting runtime to -1 you'd get the old behaviour back.
+
+By default all bandwidth is assigned to the root group and new groups get the
+period from /proc/sys/kernel/sched_rt_period_us and a run time of 0. If you
+want to assign bandwidth to another group, reduce the root group's bandwidth
+and assign some or all of the difference to another group.
+
+Realtime group scheduling means you have to assign a portion of total CPU
+bandwidth to the group before it will accept realtime tasks. Therefore you will
+not be able to run realtime tasks as any user other than root until you have
+done that, even if the user has the rights to run processes with realtime
+priority!
+
+
+2.3 Basis for grouping tasks
+----------------------------
+
+Enabling CONFIG_RT_GROUP_SCHED lets you explicitly allocate real
+CPU bandwidth to task groups.
+
+This uses the cgroup virtual file system and "<cgroup>/cpu.rt_runtime_us"
+to control the CPU time reserved for each control group.
+
+For more information on working with control groups, you should read
+Documentation/cgroup-v1/cgroups.txt as well.
+
+Group settings are checked against the following limits in order to keep the
+configuration schedulable:
+
+   \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
+
+For now, this can be simplified to just the following (but see Future plans):
+
+   \Sum_{i} runtime_{i} <= global_runtime
+
+
+3. Future plans
+===============
+
+There is work in progress to make the scheduling period for each group
+("<cgroup>/cpu.rt_period_us") configurable as well.
+
+The constraint on the period is that a subgroup must have a smaller or
+equal period to its parent. But realistically its not very useful _yet_
+as its prone to starvation without deadline scheduling.
+
+Consider two sibling groups A and B; both have 50% bandwidth, but A's
+period is twice the length of B's.
+
+* group A: period=100000us, runtime=50000us
+
+	- this runs for 0.05s once every 0.1s
+
+* group B: period= 50000us, runtime=25000us
+
+	- this runs for 0.025s twice every 0.1s (or once every 0.05 sec).
+
+This means that currently a while (1) loop in A will run for the full period of
+B and can starve B's tasks (assuming they are of lower priority) for a whole
+period.
+
+The next project will be SCHED_EDF (Earliest Deadline First scheduling) to bring
+full deadline scheduling to the linux kernel. Deadline scheduling the above
+groups and treating end of the period as a deadline will ensure that they both
+get their allocated time.
+
+Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
+the biggest challenge as the current linux PI infrastructure is geared towards
+the limited static priority levels 0-99. With deadline scheduling you need to
+do deadline inheritance (since priority is inversely proportional to the
+deadline delta (deadline - now)).
+
+This means the whole PI machinery will have to be reworked - and that is one of
+the most complex pieces of code we have.
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
deleted file mode 100644
index d8fce3e78457..000000000000
--- a/Documentation/scheduler/sched-rt-group.txt
+++ /dev/null
@@ -1,183 +0,0 @@
-				Real-Time group scheduling
-				--------------------------
-
-CONTENTS
-========
-
-0. WARNING
-1. Overview
-  1.1 The problem
-  1.2 The solution
-2. The interface
-  2.1 System-wide settings
-  2.2 Default behaviour
-  2.3 Basis for grouping tasks
-3. Future plans
-
-
-0. WARNING
-==========
-
- Fiddling with these settings can result in an unstable system, the knobs are
- root only and assumes root knows what he is doing.
-
-Most notable:
-
- * very small values in sched_rt_period_us can result in an unstable
-   system when the period is smaller than either the available hrtimer
-   resolution, or the time it takes to handle the budget refresh itself.
-
- * very small values in sched_rt_runtime_us can result in an unstable
-   system when the runtime is so small the system has difficulty making
-   forward progress (NOTE: the migration thread and kstopmachine both
-   are real-time processes).
-
-1. Overview
-===========
-
-
-1.1 The problem
----------------
-
-Realtime scheduling is all about determinism, a group has to be able to rely on
-the amount of bandwidth (eg. CPU time) being constant. In order to schedule
-multiple groups of realtime tasks, each group must be assigned a fixed portion
-of the CPU time available.  Without a minimum guarantee a realtime group can
-obviously fall short. A fuzzy upper limit is of no use since it cannot be
-relied upon. Which leaves us with just the single fixed portion.
-
-1.2 The solution
-----------------
-
-CPU time is divided by means of specifying how much time can be spent running
-in a given period. We allocate this "run time" for each realtime group which
-the other realtime groups will not be permitted to use.
-
-Any time not allocated to a realtime group will be used to run normal priority
-tasks (SCHED_OTHER). Any allocated run time not used will also be picked up by
-SCHED_OTHER.
-
-Let's consider an example: a frame fixed realtime renderer must deliver 25
-frames a second, which yields a period of 0.04s per frame. Now say it will also
-have to play some music and respond to input, leaving it with around 80% CPU
-time dedicated for the graphics. We can then give this group a run time of 0.8
-* 0.04s = 0.032s.
-
-This way the graphics group will have a 0.04s period with a 0.032s run time
-limit. Now if the audio thread needs to refill the DMA buffer every 0.005s, but
-needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s =
-0.00015s. So this group can be scheduled with a period of 0.005s and a run time
-of 0.00015s.
-
-The remaining CPU time will be used for user input and other tasks. Because
-realtime tasks have explicitly allocated the CPU time they need to perform
-their tasks, buffer underruns in the graphics or audio can be eliminated.
-
-NOTE: the above example is not fully implemented yet. We still
-lack an EDF scheduler to make non-uniform periods usable.
-
-
-2. The Interface
-================
-
-
-2.1 System wide settings
-------------------------
-
-The system wide settings are configured under the /proc virtual file system:
-
-/proc/sys/kernel/sched_rt_period_us:
-  The scheduling period that is equivalent to 100% CPU bandwidth
-
-/proc/sys/kernel/sched_rt_runtime_us:
-  A global limit on how much time realtime scheduling may use.  Even without
-  CONFIG_RT_GROUP_SCHED enabled, this will limit time reserved to realtime
-  processes. With CONFIG_RT_GROUP_SCHED it signifies the total bandwidth
-  available to all realtime groups.
-
-  * Time is specified in us because the interface is s32. This gives an
-    operating range from 1us to about 35 minutes.
-  * sched_rt_period_us takes values from 1 to INT_MAX.
-  * sched_rt_runtime_us takes values from -1 to (INT_MAX - 1).
-  * A run time of -1 specifies runtime == period, ie. no limit.
-
-
-2.2 Default behaviour
----------------------
-
-The default values for sched_rt_period_us (1000000 or 1s) and
-sched_rt_runtime_us (950000 or 0.95s).  This gives 0.05s to be used by
-SCHED_OTHER (non-RT tasks). These defaults were chosen so that a run-away
-realtime tasks will not lock up the machine but leave a little time to recover
-it.  By setting runtime to -1 you'd get the old behaviour back.
-
-By default all bandwidth is assigned to the root group and new groups get the
-period from /proc/sys/kernel/sched_rt_period_us and a run time of 0. If you
-want to assign bandwidth to another group, reduce the root group's bandwidth
-and assign some or all of the difference to another group.
-
-Realtime group scheduling means you have to assign a portion of total CPU
-bandwidth to the group before it will accept realtime tasks. Therefore you will
-not be able to run realtime tasks as any user other than root until you have
-done that, even if the user has the rights to run processes with realtime
-priority!
-
-
-2.3 Basis for grouping tasks
-----------------------------
-
-Enabling CONFIG_RT_GROUP_SCHED lets you explicitly allocate real
-CPU bandwidth to task groups.
-
-This uses the cgroup virtual file system and "<cgroup>/cpu.rt_runtime_us"
-to control the CPU time reserved for each control group.
-
-For more information on working with control groups, you should read
-Documentation/cgroup-v1/cgroups.txt as well.
-
-Group settings are checked against the following limits in order to keep the
-configuration schedulable:
-
-   \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
-
-For now, this can be simplified to just the following (but see Future plans):
-
-   \Sum_{i} runtime_{i} <= global_runtime
-
-
-3. Future plans
-===============
-
-There is work in progress to make the scheduling period for each group
-("<cgroup>/cpu.rt_period_us") configurable as well.
-
-The constraint on the period is that a subgroup must have a smaller or
-equal period to its parent. But realistically its not very useful _yet_
-as its prone to starvation without deadline scheduling.
-
-Consider two sibling groups A and B; both have 50% bandwidth, but A's
-period is twice the length of B's.
-
-* group A: period=100000us, runtime=50000us
-	- this runs for 0.05s once every 0.1s
-
-* group B: period= 50000us, runtime=25000us
-	- this runs for 0.025s twice every 0.1s (or once every 0.05 sec).
-
-This means that currently a while (1) loop in A will run for the full period of
-B and can starve B's tasks (assuming they are of lower priority) for a whole
-period.
-
-The next project will be SCHED_EDF (Earliest Deadline First scheduling) to bring
-full deadline scheduling to the linux kernel. Deadline scheduling the above
-groups and treating end of the period as a deadline will ensure that they both
-get their allocated time.
-
-Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
-the biggest challenge as the current linux PI infrastructure is geared towards
-the limited static priority levels 0-99. With deadline scheduling you need to
-do deadline inheritance (since priority is inversely proportional to the
-deadline delta (deadline - now)).
-
-This means the whole PI machinery will have to be reworked - and that is one of
-the most complex pieces of code we have.
diff --git a/Documentation/scheduler/sched-stats.rst b/Documentation/scheduler/sched-stats.rst
new file mode 100644
index 000000000000..0cb0aa714545
--- /dev/null
+++ b/Documentation/scheduler/sched-stats.rst
@@ -0,0 +1,167 @@
+====================
+Scheduler Statistics
+====================
+
+Version 15 of schedstats dropped counters for some sched_yield:
+yld_exp_empty, yld_act_empty and yld_both_empty. Otherwise, it is
+identical to version 14.
+
+Version 14 of schedstats includes support for sched_domains, which hit the
+mainline kernel in 2.6.20 although it is identical to the stats from version
+12 which was in the kernel from 2.6.13-2.6.19 (version 13 never saw a kernel
+release).  Some counters make more sense to be per-runqueue; other to be
+per-domain.  Note that domains (and their associated information) will only
+be pertinent and available on machines utilizing CONFIG_SMP.
+
+In version 14 of schedstat, there is at least one level of domain
+statistics for each cpu listed, and there may well be more than one
+domain.  Domains have no particular names in this implementation, but
+the highest numbered one typically arbitrates balancing across all the
+cpus on the machine, while domain0 is the most tightly focused domain,
+sometimes balancing only between pairs of cpus.  At this time, there
+are no architectures which need more than three domain levels. The first
+field in the domain stats is a bit map indicating which cpus are affected
+by that domain.
+
+These fields are counters, and only increment.  Programs which make use
+of these will need to start with a baseline observation and then calculate
+the change in the counters at each subsequent observation.  A perl script
+which does this for many of the fields is available at
+
+    http://eaglet.rain.com/rick/linux/schedstat/
+
+Note that any such script will necessarily be version-specific, as the main
+reason to change versions is changes in the output format.  For those wishing
+to write their own scripts, the fields are described here.
+
+CPU statistics
+--------------
+cpu<N> 1 2 3 4 5 6 7 8 9
+
+First field is a sched_yield() statistic:
+
+     1) # of times sched_yield() was called
+
+Next three are schedule() statistics:
+
+     2) This field is a legacy array expiration count field used in the O(1)
+	scheduler. We kept it for ABI compatibility, but it is always set to zero.
+     3) # of times schedule() was called
+     4) # of times schedule() left the processor idle
+
+Next two are try_to_wake_up() statistics:
+
+     5) # of times try_to_wake_up() was called
+     6) # of times try_to_wake_up() was called to wake up the local cpu
+
+Next three are statistics describing scheduling latency:
+
+     7) sum of all time spent running by tasks on this processor (in jiffies)
+     8) sum of all time spent waiting to run by tasks on this processor (in
+        jiffies)
+     9) # of timeslices run on this cpu
+
+
+Domain statistics
+-----------------
+One of these is produced per domain for each cpu described. (Note that if
+CONFIG_SMP is not defined, *no* domains are utilized and these lines
+will not appear in the output.)
+
+domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
+
+The first field is a bit mask indicating what cpus this domain operates over.
+
+The next 24 are a variety of load_balance() statistics in grouped into types
+of idleness (idle, busy, and newly idle):
+
+    1)  # of times in this domain load_balance() was called when the
+        cpu was idle
+    2)  # of times in this domain load_balance() checked but found
+        the load did not require balancing when the cpu was idle
+    3)  # of times in this domain load_balance() tried to move one or
+        more tasks and failed, when the cpu was idle
+    4)  sum of imbalances discovered (if any) with each call to
+        load_balance() in this domain when the cpu was idle
+    5)  # of times in this domain pull_task() was called when the cpu
+        was idle
+    6)  # of times in this domain pull_task() was called even though
+        the target task was cache-hot when idle
+    7)  # of times in this domain load_balance() was called but did
+        not find a busier queue while the cpu was idle
+    8)  # of times in this domain a busier queue was found while the
+        cpu was idle but no busier group was found
+    9)  # of times in this domain load_balance() was called when the
+        cpu was busy
+    10) # of times in this domain load_balance() checked but found the
+        load did not require balancing when busy
+    11) # of times in this domain load_balance() tried to move one or
+        more tasks and failed, when the cpu was busy
+    12) sum of imbalances discovered (if any) with each call to
+        load_balance() in this domain when the cpu was busy
+    13) # of times in this domain pull_task() was called when busy
+    14) # of times in this domain pull_task() was called even though the
+        target task was cache-hot when busy
+    15) # of times in this domain load_balance() was called but did not
+        find a busier queue while the cpu was busy
+    16) # of times in this domain a busier queue was found while the cpu
+        was busy but no busier group was found
+
+    17) # of times in this domain load_balance() was called when the
+        cpu was just becoming idle
+    18) # of times in this domain load_balance() checked but found the
+        load did not require balancing when the cpu was just becoming idle
+    19) # of times in this domain load_balance() tried to move one or more
+        tasks and failed, when the cpu was just becoming idle
+    20) sum of imbalances discovered (if any) with each call to
+        load_balance() in this domain when the cpu was just becoming idle
+    21) # of times in this domain pull_task() was called when newly idle
+    22) # of times in this domain pull_task() was called even though the
+        target task was cache-hot when just becoming idle
+    23) # of times in this domain load_balance() was called but did not
+        find a busier queue while the cpu was just becoming idle
+    24) # of times in this domain a busier queue was found while the cpu
+        was just becoming idle but no busier group was found
+
+   Next three are active_load_balance() statistics:
+
+    25) # of times active_load_balance() was called
+    26) # of times active_load_balance() tried to move a task and failed
+    27) # of times active_load_balance() successfully moved a task
+
+   Next three are sched_balance_exec() statistics:
+
+    28) sbe_cnt is not used
+    29) sbe_balanced is not used
+    30) sbe_pushed is not used
+
+   Next three are sched_balance_fork() statistics:
+
+    31) sbf_cnt is not used
+    32) sbf_balanced is not used
+    33) sbf_pushed is not used
+
+   Next three are try_to_wake_up() statistics:
+
+    34) # of times in this domain try_to_wake_up() awoke a task that
+        last ran on a different cpu in this domain
+    35) # of times in this domain try_to_wake_up() moved a task to the
+        waking cpu because it was cache-cold on its own cpu anyway
+    36) # of times in this domain try_to_wake_up() started passive balancing
+
+/proc/<pid>/schedstat
+---------------------
+schedstats also adds a new /proc/<pid>/schedstat file to include some of
+the same information on a per-process level.  There are three fields in
+this file correlating for that process to:
+
+     1) time spent on the cpu
+     2) time spent waiting on a runqueue
+     3) # of timeslices run on this cpu
+
+A program could be easily written to make use of these extra fields to
+report on how well a particular process or set of processes is faring
+under the scheduler's policies.  A simple version of such a program is
+available at
+
+    http://eaglet.rain.com/rick/linux/schedstat/v12/latency.c
diff --git a/Documentation/scheduler/sched-stats.txt b/Documentation/scheduler/sched-stats.txt
deleted file mode 100644
index 8259b34a66ae..000000000000
--- a/Documentation/scheduler/sched-stats.txt
+++ /dev/null
@@ -1,154 +0,0 @@
-Version 15 of schedstats dropped counters for some sched_yield:
-yld_exp_empty, yld_act_empty and yld_both_empty. Otherwise, it is
-identical to version 14.
-
-Version 14 of schedstats includes support for sched_domains, which hit the
-mainline kernel in 2.6.20 although it is identical to the stats from version
-12 which was in the kernel from 2.6.13-2.6.19 (version 13 never saw a kernel
-release).  Some counters make more sense to be per-runqueue; other to be
-per-domain.  Note that domains (and their associated information) will only
-be pertinent and available on machines utilizing CONFIG_SMP.
-
-In version 14 of schedstat, there is at least one level of domain
-statistics for each cpu listed, and there may well be more than one
-domain.  Domains have no particular names in this implementation, but
-the highest numbered one typically arbitrates balancing across all the
-cpus on the machine, while domain0 is the most tightly focused domain,
-sometimes balancing only between pairs of cpus.  At this time, there
-are no architectures which need more than three domain levels. The first
-field in the domain stats is a bit map indicating which cpus are affected
-by that domain.
-
-These fields are counters, and only increment.  Programs which make use
-of these will need to start with a baseline observation and then calculate
-the change in the counters at each subsequent observation.  A perl script
-which does this for many of the fields is available at
-
-    http://eaglet.rain.com/rick/linux/schedstat/
-
-Note that any such script will necessarily be version-specific, as the main
-reason to change versions is changes in the output format.  For those wishing
-to write their own scripts, the fields are described here.
-
-CPU statistics
---------------
-cpu<N> 1 2 3 4 5 6 7 8 9
-
-First field is a sched_yield() statistic:
-     1) # of times sched_yield() was called
-
-Next three are schedule() statistics:
-     2) This field is a legacy array expiration count field used in the O(1)
-	scheduler. We kept it for ABI compatibility, but it is always set to zero.
-     3) # of times schedule() was called
-     4) # of times schedule() left the processor idle
-
-Next two are try_to_wake_up() statistics:
-     5) # of times try_to_wake_up() was called
-     6) # of times try_to_wake_up() was called to wake up the local cpu
-
-Next three are statistics describing scheduling latency:
-     7) sum of all time spent running by tasks on this processor (in jiffies)
-     8) sum of all time spent waiting to run by tasks on this processor (in
-        jiffies)
-     9) # of timeslices run on this cpu
-
-
-Domain statistics
------------------
-One of these is produced per domain for each cpu described. (Note that if
-CONFIG_SMP is not defined, *no* domains are utilized and these lines
-will not appear in the output.)
-
-domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
-
-The first field is a bit mask indicating what cpus this domain operates over.
-
-The next 24 are a variety of load_balance() statistics in grouped into types
-of idleness (idle, busy, and newly idle):
-
-     1) # of times in this domain load_balance() was called when the
-        cpu was idle
-     2) # of times in this domain load_balance() checked but found
-        the load did not require balancing when the cpu was idle
-     3) # of times in this domain load_balance() tried to move one or
-        more tasks and failed, when the cpu was idle
-     4) sum of imbalances discovered (if any) with each call to
-        load_balance() in this domain when the cpu was idle
-     5) # of times in this domain pull_task() was called when the cpu
-        was idle
-     6) # of times in this domain pull_task() was called even though
-        the target task was cache-hot when idle
-     7) # of times in this domain load_balance() was called but did
-        not find a busier queue while the cpu was idle
-     8) # of times in this domain a busier queue was found while the
-        cpu was idle but no busier group was found
-
-     9) # of times in this domain load_balance() was called when the
-        cpu was busy
-    10) # of times in this domain load_balance() checked but found the
-        load did not require balancing when busy
-    11) # of times in this domain load_balance() tried to move one or
-        more tasks and failed, when the cpu was busy
-    12) sum of imbalances discovered (if any) with each call to
-        load_balance() in this domain when the cpu was busy
-    13) # of times in this domain pull_task() was called when busy
-    14) # of times in this domain pull_task() was called even though the
-        target task was cache-hot when busy
-    15) # of times in this domain load_balance() was called but did not
-        find a busier queue while the cpu was busy
-    16) # of times in this domain a busier queue was found while the cpu
-        was busy but no busier group was found
-
-    17) # of times in this domain load_balance() was called when the
-        cpu was just becoming idle
-    18) # of times in this domain load_balance() checked but found the
-        load did not require balancing when the cpu was just becoming idle
-    19) # of times in this domain load_balance() tried to move one or more
-        tasks and failed, when the cpu was just becoming idle
-    20) sum of imbalances discovered (if any) with each call to
-        load_balance() in this domain when the cpu was just becoming idle
-    21) # of times in this domain pull_task() was called when newly idle
-    22) # of times in this domain pull_task() was called even though the
-        target task was cache-hot when just becoming idle
-    23) # of times in this domain load_balance() was called but did not
-        find a busier queue while the cpu was just becoming idle
-    24) # of times in this domain a busier queue was found while the cpu
-        was just becoming idle but no busier group was found
-
-   Next three are active_load_balance() statistics:
-    25) # of times active_load_balance() was called
-    26) # of times active_load_balance() tried to move a task and failed
-    27) # of times active_load_balance() successfully moved a task
-
-   Next three are sched_balance_exec() statistics:
-    28) sbe_cnt is not used
-    29) sbe_balanced is not used
-    30) sbe_pushed is not used
-
-   Next three are sched_balance_fork() statistics:
-    31) sbf_cnt is not used
-    32) sbf_balanced is not used
-    33) sbf_pushed is not used
-
-   Next three are try_to_wake_up() statistics:
-    34) # of times in this domain try_to_wake_up() awoke a task that
-        last ran on a different cpu in this domain
-    35) # of times in this domain try_to_wake_up() moved a task to the
-        waking cpu because it was cache-cold on its own cpu anyway
-    36) # of times in this domain try_to_wake_up() started passive balancing
-
-/proc/<pid>/schedstat
-----------------
-schedstats also adds a new /proc/<pid>/schedstat file to include some of
-the same information on a per-process level.  There are three fields in
-this file correlating for that process to:
-     1) time spent on the cpu
-     2) time spent waiting on a runqueue
-     3) # of timeslices run on this cpu
-
-A program could be easily written to make use of these extra fields to
-report on how well a particular process or set of processes is faring
-under the scheduler's policies.  A simple version of such a program is
-available at
-    http://eaglet.rain.com/rick/linux/schedstat/v12/latency.c
diff --git a/Documentation/scheduler/text_files.rst b/Documentation/scheduler/text_files.rst
new file mode 100644
index 000000000000..0bc50307b241
--- /dev/null
+++ b/Documentation/scheduler/text_files.rst
@@ -0,0 +1,5 @@
+Scheduler pelt c program
+------------------------
+
+.. literalinclude:: sched-pelt.c
+    :language: c
diff --git a/Documentation/vm/numa.rst b/Documentation/vm/numa.rst
index 5cae13e9a08b..461d5d57cd4f 100644
--- a/Documentation/vm/numa.rst
+++ b/Documentation/vm/numa.rst
@@ -99,7 +99,7 @@ Local allocation will tend to keep subsequent access to the allocated memory
 as long as the task on whose behalf the kernel allocated some memory does not
 later migrate away from that memory.  The Linux scheduler is aware of the
 NUMA topology of the platform--embodied in the "scheduling domains" data
-structures [see Documentation/scheduler/sched-domains.txt]--and the scheduler
+structures [see Documentation/scheduler/sched-domains.rst]--and the scheduler
 attempts to minimize task migration to distant scheduling domains.  However,
 the scheduler does not take a task's NUMA footprint into account directly.
 Thus, under sufficient imbalance, tasks can migrate between nodes, remote
-- 
cgit 


From a2f405a5269fc7d705926298971dcb5e76054e8a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:53:04 -0300
Subject: docs: EDID/HOWTO.txt: convert it and rename to howto.rst

Sphinx need to know when a paragraph ends. So, do some adjustments
at the file for it to be properly parsed.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

that's said, I believe that this file should be moved to the
GPU/DRM documentation.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/EDID/HOWTO.txt                    | 49 ---------------------
 Documentation/EDID/howto.rst                    | 58 +++++++++++++++++++++++++
 Documentation/admin-guide/kernel-parameters.txt |  2 +-
 3 files changed, 59 insertions(+), 50 deletions(-)
 delete mode 100644 Documentation/EDID/HOWTO.txt
 create mode 100644 Documentation/EDID/howto.rst

(limited to 'Documentation')

diff --git a/Documentation/EDID/HOWTO.txt b/Documentation/EDID/HOWTO.txt
deleted file mode 100644
index 539871c3b785..000000000000
--- a/Documentation/EDID/HOWTO.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-In the good old days when graphics parameters were configured explicitly
-in a file called xorg.conf, even broken hardware could be managed.
-
-Today, with the advent of Kernel Mode Setting, a graphics board is
-either correctly working because all components follow the standards -
-or the computer is unusable, because the screen remains dark after
-booting or it displays the wrong area. Cases when this happens are:
-- The graphics board does not recognize the monitor.
-- The graphics board is unable to detect any EDID data.
-- The graphics board incorrectly forwards EDID data to the driver.
-- The monitor sends no or bogus EDID data.
-- A KVM sends its own EDID data instead of querying the connected monitor.
-Adding the kernel parameter "nomodeset" helps in most cases, but causes
-restrictions later on.
-
-As a remedy for such situations, the kernel configuration item
-CONFIG_DRM_LOAD_EDID_FIRMWARE was introduced. It allows to provide an
-individually prepared or corrected EDID data set in the /lib/firmware
-directory from where it is loaded via the firmware interface. The code
-(see drivers/gpu/drm/drm_edid_load.c) contains built-in data sets for
-commonly used screen resolutions (800x600, 1024x768, 1280x1024, 1600x1200,
-1680x1050, 1920x1080) as binary blobs, but the kernel source tree does
-not contain code to create these data. In order to elucidate the origin
-of the built-in binary EDID blobs and to facilitate the creation of
-individual data for a specific misbehaving monitor, commented sources
-and a Makefile environment are given here.
-
-To create binary EDID and C source code files from the existing data
-material, simply type "make".
-
-If you want to create your own EDID file, copy the file 1024x768.S,
-replace the settings with your own data and add a new target to the
-Makefile. Please note that the EDID data structure expects the timing
-values in a different way as compared to the standard X11 format.
-
-X11:
-HTimings:  hdisp hsyncstart hsyncend htotal
-VTimings:  vdisp vsyncstart vsyncend vtotal
-
-EDID:
-#define XPIX hdisp
-#define XBLANK htotal-hdisp
-#define XOFFSET hsyncstart-hdisp
-#define XPULSE hsyncend-hsyncstart
-
-#define YPIX vdisp
-#define YBLANK vtotal-vdisp
-#define YOFFSET vsyncstart-vdisp
-#define YPULSE vsyncend-vsyncstart
diff --git a/Documentation/EDID/howto.rst b/Documentation/EDID/howto.rst
new file mode 100644
index 000000000000..725fd49a88ca
--- /dev/null
+++ b/Documentation/EDID/howto.rst
@@ -0,0 +1,58 @@
+:orphan:
+
+====
+EDID
+====
+
+In the good old days when graphics parameters were configured explicitly
+in a file called xorg.conf, even broken hardware could be managed.
+
+Today, with the advent of Kernel Mode Setting, a graphics board is
+either correctly working because all components follow the standards -
+or the computer is unusable, because the screen remains dark after
+booting or it displays the wrong area. Cases when this happens are:
+- The graphics board does not recognize the monitor.
+- The graphics board is unable to detect any EDID data.
+- The graphics board incorrectly forwards EDID data to the driver.
+- The monitor sends no or bogus EDID data.
+- A KVM sends its own EDID data instead of querying the connected monitor.
+Adding the kernel parameter "nomodeset" helps in most cases, but causes
+restrictions later on.
+
+As a remedy for such situations, the kernel configuration item
+CONFIG_DRM_LOAD_EDID_FIRMWARE was introduced. It allows to provide an
+individually prepared or corrected EDID data set in the /lib/firmware
+directory from where it is loaded via the firmware interface. The code
+(see drivers/gpu/drm/drm_edid_load.c) contains built-in data sets for
+commonly used screen resolutions (800x600, 1024x768, 1280x1024, 1600x1200,
+1680x1050, 1920x1080) as binary blobs, but the kernel source tree does
+not contain code to create these data. In order to elucidate the origin
+of the built-in binary EDID blobs and to facilitate the creation of
+individual data for a specific misbehaving monitor, commented sources
+and a Makefile environment are given here.
+
+To create binary EDID and C source code files from the existing data
+material, simply type "make".
+
+If you want to create your own EDID file, copy the file 1024x768.S,
+replace the settings with your own data and add a new target to the
+Makefile. Please note that the EDID data structure expects the timing
+values in a different way as compared to the standard X11 format.
+
+X11:
+  HTimings:
+    hdisp hsyncstart hsyncend htotal
+  VTimings:
+    vdisp vsyncstart vsyncend vtotal
+
+EDID::
+
+  #define XPIX hdisp
+  #define XBLANK htotal-hdisp
+  #define XOFFSET hsyncstart-hdisp
+  #define XPULSE hsyncend-hsyncstart
+
+  #define YPIX vdisp
+  #define YBLANK vtotal-vdisp
+  #define YOFFSET vsyncstart-vdisp
+  #define YPULSE vsyncend-vsyncstart
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 9ac37fcca3ee..4edf67801420 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -932,7 +932,7 @@
 			edid/1680x1050.bin, or edid/1920x1080.bin is given
 			and no file with the same name exists. Details and
 			instructions how to build your own EDID data are
-			available in Documentation/EDID/HOWTO.txt. An EDID
+			available in Documentation/EDID/howto.rst. An EDID
 			data set will only be used for a particular connector,
 			if its name and a colon are prepended to the EDID
 			name. Each connector may use a unique EDID data
-- 
cgit 


From 83e8b971f81cebe4f9a84cc76d328ac955b62a7a Mon Sep 17 00:00:00 2001
From: André Almeida <andrealmeid@collabora.com>
Date: Tue, 11 Jun 2019 17:03:16 -0300
Subject: sphinx.rst: Add note about code snippets embedded in the text
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There's a paragraph that explains how to create fixed width text block,
but it doesn't explains how to create fixed width text inline, although
this feature is really used through the documentation. Fix that adding a
quick note about it.

Signed-off-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/doc-guide/sphinx.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst
index 4ba081f43e98..e60a56640c63 100644
--- a/Documentation/doc-guide/sphinx.rst
+++ b/Documentation/doc-guide/sphinx.rst
@@ -217,7 +217,7 @@ Here are some specific guidelines for the kernel documentation:
   examples, etc.), use ``::`` for anything that doesn't really benefit
   from syntax highlighting, especially short snippets. Use
   ``.. code-block:: <language>`` for longer code blocks that benefit
-  from highlighting.
+  from highlighting. For a short snippet of code embedded in the text, use \`\`.
 
 
 the C domain
-- 
cgit 


From cd84d63a2983ee2d386ff5a020c2c36562e4ef68 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Mon, 10 Jun 2019 19:02:42 +0100
Subject: Documentation: coresight: Update the generic device names

Update the documentation to reflect the new naming scheme with
latest changes.

Reported-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/trace/coresight.txt | 82 ++++++++++++++++++++++++++++++++-------
 1 file changed, 67 insertions(+), 15 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/trace/coresight.txt b/Documentation/trace/coresight.txt
index efbc832146e7..b027d61b27a6 100644
--- a/Documentation/trace/coresight.txt
+++ b/Documentation/trace/coresight.txt
@@ -188,6 +188,49 @@ specific to that component only.  "Implementation defined" customisations are
 expected to be accessed and controlled using those entries.
 
 
+Device Naming scheme
+------------------------
+The devices that appear on the "coresight" bus were named the same as their
+parent devices, i.e, the real devices that appears on AMBA bus or the platform bus.
+Thus the names were based on the Linux Open Firmware layer naming convention,
+which follows the base physical address of the device followed by the device
+type. e.g:
+
+root:~# ls /sys/bus/coresight/devices/
+ 20010000.etf  20040000.funnel      20100000.stm     22040000.etm
+ 22140000.etm  230c0000.funnel      23240000.etm     20030000.tpiu
+ 20070000.etr  20120000.replicator  220c0000.funnel
+ 23040000.etm  23140000.etm         23340000.etm
+
+However, with the introduction of ACPI support, the names of the real
+devices are a bit cryptic and non-obvious. Thus, a new naming scheme was
+introduced to use more generic names based on the type of the device. The
+following rules apply:
+
+  1) Devices that are bound to CPUs, are named based on the CPU logical
+     number.
+
+     e.g, ETM bound to CPU0 is named "etm0"
+
+  2) All other devices follow a pattern, "<device_type_prefix>N", where :
+
+	<device_type_prefix> 	- A prefix specific to the type of the device
+	N			- a sequential number assigned based on the order
+				  of probing.
+
+	e.g, tmc_etf0, tmc_etr0, funnel0, funnel1
+
+Thus, with the new scheme the devices could appear as :
+
+root:~# ls /sys/bus/coresight/devices/
+ etm0     etm1     etm2         etm3  etm4      etm5      funnel0
+ funnel1  funnel2  replicator0  stm0  tmc_etf0  tmc_etr0  tpiu0
+
+Some of the examples below might refer to old naming scheme and some
+to the newer scheme, to give a confirmation that what you see on your
+system is not unexpected. One must use the "names" as they appear on
+the system under specified locations.
+
 How to use the tracer modules
 -----------------------------
 
@@ -326,16 +369,25 @@ amount of processor cores), the "cs_etm" PMU will be listed only once.
 A Coresight PMU works the same way as any other PMU, i.e the name of the PMU is
 listed along with configuration options within forward slashes '/'.  Since a
 Coresight system will typically have more than one sink, the name of the sink to
-work with needs to be specified as an event option.  Names for sink to choose
-from are listed in sysFS under ($SYSFS)/bus/coresight/devices:
+work with needs to be specified as an event option.
+On newer kernels the available sinks are listed in sysFS under:
+($SYSFS)/bus/event_source/devices/cs_etm/sinks/
+
+	root@localhost:/sys/bus/event_source/devices/cs_etm/sinks# ls
+	tmc_etf0  tmc_etr0  tpiu0
+
+On older kernels, this may need to be found from the list of coresight devices,
+available under ($SYSFS)/bus/coresight/devices/:
+
+	root:~# ls /sys/bus/coresight/devices/
+	 etm0     etm1     etm2         etm3  etm4      etm5      funnel0
+	 funnel1  funnel2  replicator0  stm0  tmc_etf0  tmc_etr0  tpiu0
 
-	root@linaro-nano:~# ls /sys/bus/coresight/devices/
-		20010000.etf   20040000.funnel  20100000.stm  22040000.etm
-		22140000.etm  230c0000.funnel  23240000.etm 20030000.tpiu
-		20070000.etr     20120000.replicator  220c0000.funnel
-		23040000.etm  23140000.etm     23340000.etm
+	root@linaro-nano:~# perf record -e cs_etm/@tmc_etr0/u --per-thread program
 
-	root@linaro-nano:~# perf record -e cs_etm/@20070000.etr/u --per-thread program
+As mentioned above in section "Device Naming scheme", the names of the devices could
+look different from what is used in the example above. One must use the device names
+as it appears under the sysFS.
 
 The syntax within the forward slashes '/' is important.  The '@' character
 tells the parser that a sink is about to be specified and that this is the sink
@@ -352,7 +404,7 @@ perf can be used to record and analyze trace of programs.
 Execution can be recorded using 'perf record' with the cs_etm event,
 specifying the name of the sink to record to, e.g:
 
-    perf record -e cs_etm/@20070000.etr/u --per-thread
+    perf record -e cs_etm/@tmc_etr0/u --per-thread
 
 The 'perf report' and 'perf script' commands can be used to analyze execution,
 synthesizing instruction and branch events from the instruction trace.
@@ -381,7 +433,7 @@ sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tuto
 	Bubble sorting array of 30000 elements
 	5910 ms
 
-	$ perf record -e cs_etm/@20070000.etr/u --per-thread taskset -c 2 ./sort
+	$ perf record -e cs_etm/@tmc_etr0/u --per-thread taskset -c 2 ./sort
 	Bubble sorting array of 30000 elements
 	12543 ms
 	[ perf record: Woken up 35 times to write data ]
@@ -405,7 +457,7 @@ than the program flow through the code.
 As with any other CoreSight component, specifics about the STM tracer can be
 found in sysfs with more information on each entry being found in [1]:
 
-root@genericarmv8:~# ls /sys/bus/coresight/devices/20100000.stm
+root@genericarmv8:~# ls /sys/bus/coresight/devices/stm0
 enable_source   hwevent_select  port_enable     subsystem       uevent
 hwevent_enable  mgmt            port_select     traceid
 root@genericarmv8:~#
@@ -413,14 +465,14 @@ root@genericarmv8:~#
 Like any other source a sink needs to be identified and the STM enabled before
 being used:
 
-root@genericarmv8:~# echo 1 > /sys/bus/coresight/devices/20010000.etf/enable_sink
-root@genericarmv8:~# echo 1 > /sys/bus/coresight/devices/20100000.stm/enable_source
+root@genericarmv8:~# echo 1 > /sys/bus/coresight/devices/tmc_etf0/enable_sink
+root@genericarmv8:~# echo 1 > /sys/bus/coresight/devices/stm0/enable_source
 
 From there user space applications can request and use channels using the devfs
 interface provided for that purpose by the generic STM API:
 
-root@genericarmv8:~# ls -l /dev/20100000.stm
-crw-------    1 root     root       10,  61 Jan  3 18:11 /dev/20100000.stm
+root@genericarmv8:~# ls -l /dev/stm0
+crw-------    1 root     root       10,  61 Jan  3 18:11 /dev/stm0
 root@genericarmv8:~#
 
 Details on how to use the generic STM API can be found here [2].
-- 
cgit 


From 31753202325dd4f9bba27fbbad05f189881c60bc Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhsharma@redhat.com>
Date: Mon, 10 Jun 2019 15:33:39 +0530
Subject: Documentation/stackprotector: powerpc supports stack protector

powerpc architecture (both 64-bit and 32-bit) supports stack protector
mechanism since some time now [see commit 06ec27aea9fc ("powerpc/64:
add stack protector support")].

Update stackprotector arch support documentation to reflect the same.

Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Bhupesh Sharma <bhsharma@redhat.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/features/debug/stackprotector/arch-support.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt
index 9999ea521f3e..32bbdfc64c32 100644
--- a/Documentation/features/debug/stackprotector/arch-support.txt
+++ b/Documentation/features/debug/stackprotector/arch-support.txt
@@ -22,7 +22,7 @@
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
-    |     powerpc: | TODO |
+    |     powerpc: |  ok  |
     |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: |  ok  |
-- 
cgit 


From 9d9b889540c380d8f56f7a79edbaae2fff8684d1 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Sun, 9 Jun 2019 14:14:36 +0300
Subject: block: document iostat changes for disk busy time accounting

Since commit 5b18b5a73760 ("block: delete part_round_stats and switch to
less precise counting") io_ticks is approximated by adding one at each
start and end of requests if jiffies has changed.

This works perfectly for requests shorter than a jiffy. If requests runs
more than 2 jiffies some I/O time will not be accounted unless there are
other reuqests.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/iostats.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt
index 49df45f90e8a..5d63b18bd6d1 100644
--- a/Documentation/iostats.txt
+++ b/Documentation/iostats.txt
@@ -97,6 +97,10 @@ Field  9 -- # of I/Os currently in progress
 Field 10 -- # of milliseconds spent doing I/Os
     This field increases so long as field 9 is nonzero.
 
+    Since 5.0 this field counts jiffies when at least one request was
+    started or completed. If request runs more than 2 jiffies then some
+    I/O time will not be accounted unless there are other requests.
+
 Field 11 -- weighted # of milliseconds spent doing I/Os
     This field is incremented at each I/O start, I/O completion, I/O
     merge, or read of these stats by the number of I/Os in progress
-- 
cgit 


From 151f4e2bdc7a04020ae5c533896fb91a16e1f501 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 13 Jun 2019 07:10:36 -0300
Subject: docs: power: convert docs to ReST and rename to *.rst

Convert the PM documents to ReST, in order to allow them to
build with Sphinx.

The conversion is actually:
  - add blank lines and indentation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
---
 Documentation/ABI/testing/sysfs-class-powercap     |    2 +-
 Documentation/admin-guide/kernel-parameters.txt    |    6 +-
 Documentation/cpu-freq/core.txt                    |    2 +-
 Documentation/driver-api/pm/devices.rst            |    6 +-
 Documentation/driver-api/usb/power-management.rst  |    2 +-
 Documentation/power/apm-acpi.rst                   |   36 +
 Documentation/power/apm-acpi.txt                   |   32 -
 Documentation/power/basic-pm-debugging.rst         |  269 +++++
 Documentation/power/basic-pm-debugging.txt         |  254 -----
 Documentation/power/charger-manager.rst            |  205 ++++
 Documentation/power/charger-manager.txt            |  200 ----
 Documentation/power/drivers-testing.rst            |   51 +
 Documentation/power/drivers-testing.txt            |   46 -
 Documentation/power/energy-model.rst               |  147 +++
 Documentation/power/energy-model.txt               |  144 ---
 Documentation/power/freezing-of-tasks.rst          |  244 +++++
 Documentation/power/freezing-of-tasks.txt          |  231 ----
 Documentation/power/index.rst                      |   46 +
 Documentation/power/interface.rst                  |   79 ++
 Documentation/power/interface.txt                  |   77 --
 Documentation/power/opp.rst                        |  379 +++++++
 Documentation/power/opp.txt                        |  342 ------
 Documentation/power/pci.rst                        | 1135 ++++++++++++++++++++
 Documentation/power/pci.txt                        | 1094 -------------------
 Documentation/power/pm_qos_interface.rst           |  225 ++++
 Documentation/power/pm_qos_interface.txt           |  212 ----
 Documentation/power/power_supply_class.rst         |  282 +++++
 Documentation/power/power_supply_class.txt         |  231 ----
 Documentation/power/powercap/powercap.rst          |  257 +++++
 Documentation/power/powercap/powercap.txt          |  236 ----
 Documentation/power/regulator/consumer.rst         |  229 ++++
 Documentation/power/regulator/consumer.txt         |  218 ----
 Documentation/power/regulator/design.rst           |   38 +
 Documentation/power/regulator/design.txt           |   33 -
 Documentation/power/regulator/machine.rst          |   97 ++
 Documentation/power/regulator/machine.txt          |   96 --
 Documentation/power/regulator/overview.rst         |  178 +++
 Documentation/power/regulator/overview.txt         |  171 ---
 Documentation/power/regulator/regulator.rst        |   32 +
 Documentation/power/regulator/regulator.txt        |   30 -
 Documentation/power/runtime_pm.rst                 |  940 ++++++++++++++++
 Documentation/power/runtime_pm.txt                 |  928 ----------------
 Documentation/power/s2ram.rst                      |   87 ++
 Documentation/power/s2ram.txt                      |   85 --
 Documentation/power/suspend-and-cpuhotplug.rst     |  286 +++++
 Documentation/power/suspend-and-cpuhotplug.txt     |  274 -----
 Documentation/power/suspend-and-interrupts.rst     |  137 +++
 Documentation/power/suspend-and-interrupts.txt     |  135 ---
 Documentation/power/swsusp-and-swap-files.rst      |   63 ++
 Documentation/power/swsusp-and-swap-files.txt      |   60 --
 Documentation/power/swsusp-dmcrypt.rst             |  140 +++
 Documentation/power/swsusp-dmcrypt.txt             |  138 ---
 Documentation/power/swsusp.rst                     |  501 +++++++++
 Documentation/power/swsusp.txt                     |  446 --------
 Documentation/power/tricks.rst                     |   29 +
 Documentation/power/tricks.txt                     |   27 -
 Documentation/power/userland-swsusp.rst            |  191 ++++
 Documentation/power/userland-swsusp.txt            |  170 ---
 Documentation/power/video.rst                      |  213 ++++
 Documentation/power/video.txt                      |  185 ----
 Documentation/process/submitting-drivers.rst       |    2 +-
 Documentation/scheduler/sched-energy.txt           |    6 +-
 Documentation/trace/coresight-cpu-debug.txt        |    2 +-
 .../zh_CN/process/submitting-drivers.rst           |    2 +-
 64 files changed, 6531 insertions(+), 6110 deletions(-)
 create mode 100644 Documentation/power/apm-acpi.rst
 delete mode 100644 Documentation/power/apm-acpi.txt
 create mode 100644 Documentation/power/basic-pm-debugging.rst
 delete mode 100644 Documentation/power/basic-pm-debugging.txt
 create mode 100644 Documentation/power/charger-manager.rst
 delete mode 100644 Documentation/power/charger-manager.txt
 create mode 100644 Documentation/power/drivers-testing.rst
 delete mode 100644 Documentation/power/drivers-testing.txt
 create mode 100644 Documentation/power/energy-model.rst
 delete mode 100644 Documentation/power/energy-model.txt
 create mode 100644 Documentation/power/freezing-of-tasks.rst
 delete mode 100644 Documentation/power/freezing-of-tasks.txt
 create mode 100644 Documentation/power/index.rst
 create mode 100644 Documentation/power/interface.rst
 delete mode 100644 Documentation/power/interface.txt
 create mode 100644 Documentation/power/opp.rst
 delete mode 100644 Documentation/power/opp.txt
 create mode 100644 Documentation/power/pci.rst
 delete mode 100644 Documentation/power/pci.txt
 create mode 100644 Documentation/power/pm_qos_interface.rst
 delete mode 100644 Documentation/power/pm_qos_interface.txt
 create mode 100644 Documentation/power/power_supply_class.rst
 delete mode 100644 Documentation/power/power_supply_class.txt
 create mode 100644 Documentation/power/powercap/powercap.rst
 delete mode 100644 Documentation/power/powercap/powercap.txt
 create mode 100644 Documentation/power/regulator/consumer.rst
 delete mode 100644 Documentation/power/regulator/consumer.txt
 create mode 100644 Documentation/power/regulator/design.rst
 delete mode 100644 Documentation/power/regulator/design.txt
 create mode 100644 Documentation/power/regulator/machine.rst
 delete mode 100644 Documentation/power/regulator/machine.txt
 create mode 100644 Documentation/power/regulator/overview.rst
 delete mode 100644 Documentation/power/regulator/overview.txt
 create mode 100644 Documentation/power/regulator/regulator.rst
 delete mode 100644 Documentation/power/regulator/regulator.txt
 create mode 100644 Documentation/power/runtime_pm.rst
 delete mode 100644 Documentation/power/runtime_pm.txt
 create mode 100644 Documentation/power/s2ram.rst
 delete mode 100644 Documentation/power/s2ram.txt
 create mode 100644 Documentation/power/suspend-and-cpuhotplug.rst
 delete mode 100644 Documentation/power/suspend-and-cpuhotplug.txt
 create mode 100644 Documentation/power/suspend-and-interrupts.rst
 delete mode 100644 Documentation/power/suspend-and-interrupts.txt
 create mode 100644 Documentation/power/swsusp-and-swap-files.rst
 delete mode 100644 Documentation/power/swsusp-and-swap-files.txt
 create mode 100644 Documentation/power/swsusp-dmcrypt.rst
 delete mode 100644 Documentation/power/swsusp-dmcrypt.txt
 create mode 100644 Documentation/power/swsusp.rst
 delete mode 100644 Documentation/power/swsusp.txt
 create mode 100644 Documentation/power/tricks.rst
 delete mode 100644 Documentation/power/tricks.txt
 create mode 100644 Documentation/power/userland-swsusp.rst
 delete mode 100644 Documentation/power/userland-swsusp.txt
 create mode 100644 Documentation/power/video.rst
 delete mode 100644 Documentation/power/video.txt

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-class-powercap b/Documentation/ABI/testing/sysfs-class-powercap
index db3b3ff70d84..742dfd966592 100644
--- a/Documentation/ABI/testing/sysfs-class-powercap
+++ b/Documentation/ABI/testing/sysfs-class-powercap
@@ -5,7 +5,7 @@ Contact:	linux-pm@vger.kernel.org
 Description:
 		The powercap/ class sub directory belongs to the power cap
 		subsystem. Refer to
-		Documentation/power/powercap/powercap.txt for details.
+		Documentation/power/powercap/powercap.rst for details.
 
 What:		/sys/class/powercap/<control type>
 Date:		September 2013
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..7f5ca6e7c4d3 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -13,7 +13,7 @@
 			For ARM64, ONLY "acpi=off", "acpi=on" or "acpi=force"
 			are available
 
-			See also Documentation/power/runtime_pm.txt, pci=noacpi
+			See also Documentation/power/runtime_pm.rst, pci=noacpi
 
 	acpi_apic_instance=	[ACPI, IOAPIC]
 			Format: <int>
@@ -223,7 +223,7 @@
 	acpi_sleep=	[HW,ACPI] Sleep options
 			Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
 				  old_ordering, nonvs, sci_force_enable, nobl }
-			See Documentation/power/video.txt for information on
+			See Documentation/power/video.rst for information on
 			s3_bios and s3_mode.
 			s3_beep is for debugging; it makes the PC's speaker beep
 			as soon as the kernel's real-mode entry point is called.
@@ -4108,7 +4108,7 @@
 			Specify the offset from the beginning of the partition
 			given by "resume=" at which the swap header is located,
 			in <PAGE_SIZE> units (needed only for swap files).
-			See  Documentation/power/swsusp-and-swap-files.txt
+			See  Documentation/power/swsusp-and-swap-files.rst
 
 	resumedelay=	[HIBERNATION] Delay (in seconds) to pause before attempting to
 			read the resume files
diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt
index 073f128af5a7..55193e680250 100644
--- a/Documentation/cpu-freq/core.txt
+++ b/Documentation/cpu-freq/core.txt
@@ -95,7 +95,7 @@ flags	- flags of the cpufreq driver
 
 3. CPUFreq Table Generation with Operating Performance Point (OPP)
 ==================================================================
-For details about OPP, see Documentation/power/opp.txt
+For details about OPP, see Documentation/power/opp.rst
 
 dev_pm_opp_init_cpufreq_table -
 	This function provides a ready to use conversion routine to translate
diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst
index 30835683616a..f66c7b9126ea 100644
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@@ -225,7 +225,7 @@ system-wide transition to a sleep state even though its :c:member:`runtime_auto`
 flag is clear.
 
 For more information about the runtime power management framework, refer to
-:file:`Documentation/power/runtime_pm.txt`.
+:file:`Documentation/power/runtime_pm.rst`.
 
 
 Calling Drivers to Enter and Leave System Sleep States
@@ -728,7 +728,7 @@ it into account in any way.
 
 Devices may be defined as IRQ-safe which indicates to the PM core that their
 runtime PM callbacks may be invoked with disabled interrupts (see
-:file:`Documentation/power/runtime_pm.txt` for more information).  If an
+:file:`Documentation/power/runtime_pm.rst` for more information).  If an
 IRQ-safe device belongs to a PM domain, the runtime PM of the domain will be
 disallowed, unless the domain itself is defined as IRQ-safe. However, it
 makes sense to define a PM domain as IRQ-safe only if all the devices in it
@@ -795,7 +795,7 @@ so on) and the final state of the device must reflect the "active" runtime PM
 status in that case.
 
 During system-wide resume from a sleep state it's easiest to put devices into
-the full-power state, as explained in :file:`Documentation/power/runtime_pm.txt`.
+the full-power state, as explained in :file:`Documentation/power/runtime_pm.rst`.
 [Refer to that document for more information regarding this particular issue as
 well as for information on the device runtime power management framework in
 general.]
diff --git a/Documentation/driver-api/usb/power-management.rst b/Documentation/driver-api/usb/power-management.rst
index 4a74cf6f2797..2525c3622cae 100644
--- a/Documentation/driver-api/usb/power-management.rst
+++ b/Documentation/driver-api/usb/power-management.rst
@@ -46,7 +46,7 @@ device is turned off while the system as a whole remains running, we
 call it a "dynamic suspend" (also known as a "runtime suspend" or
 "selective suspend").  This document concentrates mostly on how
 dynamic PM is implemented in the USB subsystem, although system PM is
-covered to some extent (see ``Documentation/power/*.txt`` for more
+covered to some extent (see ``Documentation/power/*.rst`` for more
 information about system PM).
 
 System PM support is present only if the kernel was built with
diff --git a/Documentation/power/apm-acpi.rst b/Documentation/power/apm-acpi.rst
new file mode 100644
index 000000000000..5b90d947126d
--- /dev/null
+++ b/Documentation/power/apm-acpi.rst
@@ -0,0 +1,36 @@
+============
+APM or ACPI?
+============
+
+If you have a relatively recent x86 mobile, desktop, or server system,
+odds are it supports either Advanced Power Management (APM) or
+Advanced Configuration and Power Interface (ACPI).  ACPI is the newer
+of the two technologies and puts power management in the hands of the
+operating system, allowing for more intelligent power management than
+is possible with BIOS controlled APM.
+
+The best way to determine which, if either, your system supports is to
+build a kernel with both ACPI and APM enabled (as of 2.3.x ACPI is
+enabled by default).  If a working ACPI implementation is found, the
+ACPI driver will override and disable APM, otherwise the APM driver
+will be used.
+
+No, sorry, you cannot have both ACPI and APM enabled and running at
+once.  Some people with broken ACPI or broken APM implementations
+would like to use both to get a full set of working features, but you
+simply cannot mix and match the two.  Only one power management
+interface can be in control of the machine at once.  Think about it..
+
+User-space Daemons
+------------------
+Both APM and ACPI rely on user-space daemons, apmd and acpid
+respectively, to be completely functional.  Obtain both of these
+daemons from your Linux distribution or from the Internet (see below)
+and be sure that they are started sometime in the system boot process.
+Go ahead and start both.  If ACPI or APM is not available on your
+system the associated daemon will exit gracefully.
+
+  =====  =======================================
+  apmd   http://ftp.debian.org/pool/main/a/apmd/
+  acpid  http://acpid.sf.net/
+  =====  =======================================
diff --git a/Documentation/power/apm-acpi.txt b/Documentation/power/apm-acpi.txt
deleted file mode 100644
index 6cc423d3662e..000000000000
--- a/Documentation/power/apm-acpi.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-APM or ACPI?
-------------
-If you have a relatively recent x86 mobile, desktop, or server system,
-odds are it supports either Advanced Power Management (APM) or
-Advanced Configuration and Power Interface (ACPI).  ACPI is the newer
-of the two technologies and puts power management in the hands of the
-operating system, allowing for more intelligent power management than
-is possible with BIOS controlled APM.
-
-The best way to determine which, if either, your system supports is to
-build a kernel with both ACPI and APM enabled (as of 2.3.x ACPI is
-enabled by default).  If a working ACPI implementation is found, the
-ACPI driver will override and disable APM, otherwise the APM driver
-will be used.
-
-No, sorry, you cannot have both ACPI and APM enabled and running at
-once.  Some people with broken ACPI or broken APM implementations
-would like to use both to get a full set of working features, but you
-simply cannot mix and match the two.  Only one power management
-interface can be in control of the machine at once.  Think about it..
-
-User-space Daemons
-------------------
-Both APM and ACPI rely on user-space daemons, apmd and acpid
-respectively, to be completely functional.  Obtain both of these
-daemons from your Linux distribution or from the Internet (see below)
-and be sure that they are started sometime in the system boot process.
-Go ahead and start both.  If ACPI or APM is not available on your
-system the associated daemon will exit gracefully.
-
-  apmd:   http://ftp.debian.org/pool/main/a/apmd/
-  acpid:  http://acpid.sf.net/
diff --git a/Documentation/power/basic-pm-debugging.rst b/Documentation/power/basic-pm-debugging.rst
new file mode 100644
index 000000000000..69862e759c30
--- /dev/null
+++ b/Documentation/power/basic-pm-debugging.rst
@@ -0,0 +1,269 @@
+=================================
+Debugging hibernation and suspend
+=================================
+
+	(C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
+
+1. Testing hibernation (aka suspend to disk or STD)
+===================================================
+
+To check if hibernation works, you can try to hibernate in the "reboot" mode::
+
+	# echo reboot > /sys/power/disk
+	# echo disk > /sys/power/state
+
+and the system should create a hibernation image, reboot, resume and get back to
+the command prompt where you have started the transition.  If that happens,
+hibernation is most likely to work correctly.  Still, you need to repeat the
+test at least a couple of times in a row for confidence.  [This is necessary,
+because some problems only show up on a second attempt at suspending and
+resuming the system.]  Moreover, hibernating in the "reboot" and "shutdown"
+modes causes the PM core to skip some platform-related callbacks which on ACPI
+systems might be necessary to make hibernation work.  Thus, if your machine
+fails to hibernate or resume in the "reboot" mode, you should try the
+"platform" mode::
+
+	# echo platform > /sys/power/disk
+	# echo disk > /sys/power/state
+
+which is the default and recommended mode of hibernation.
+
+Unfortunately, the "platform" mode of hibernation does not work on some systems
+with broken BIOSes.  In such cases the "shutdown" mode of hibernation might
+work::
+
+	# echo shutdown > /sys/power/disk
+	# echo disk > /sys/power/state
+
+(it is similar to the "reboot" mode, but it requires you to press the power
+button to make the system resume).
+
+If neither "platform" nor "shutdown" hibernation mode works, you will need to
+identify what goes wrong.
+
+a) Test modes of hibernation
+----------------------------
+
+To find out why hibernation fails on your system, you can use a special testing
+facility available if the kernel is compiled with CONFIG_PM_DEBUG set.  Then,
+there is the file /sys/power/pm_test that can be used to make the hibernation
+core run in a test mode.  There are 5 test modes available:
+
+freezer
+	- test the freezing of processes
+
+devices
+	- test the freezing of processes and suspending of devices
+
+platform
+	- test the freezing of processes, suspending of devices and platform
+	  global control methods [1]_
+
+processors
+	- test the freezing of processes, suspending of devices, platform
+	  global control methods [1]_ and the disabling of nonboot CPUs
+
+core
+	- test the freezing of processes, suspending of devices, platform global
+	  control methods\ [1]_, the disabling of nonboot CPUs and suspending
+	  of platform/system devices
+
+.. [1]
+
+    the platform global control methods are only available on ACPI systems
+    and are only tested if the hibernation mode is set to "platform"
+
+To use one of them it is necessary to write the corresponding string to
+/sys/power/pm_test (eg. "devices" to test the freezing of processes and
+suspending devices) and issue the standard hibernation commands.  For example,
+to use the "devices" test mode along with the "platform" mode of hibernation,
+you should do the following::
+
+	# echo devices > /sys/power/pm_test
+	# echo platform > /sys/power/disk
+	# echo disk > /sys/power/state
+
+Then, the kernel will try to freeze processes, suspend devices, wait a few
+seconds (5 by default, but configurable by the suspend.pm_test_delay module
+parameter), resume devices and thaw processes.  If "platform" is written to
+/sys/power/pm_test , then after suspending devices the kernel will additionally
+invoke the global control methods (eg. ACPI global control methods) used to
+prepare the platform firmware for hibernation.  Next, it will wait a
+configurable number of seconds and invoke the platform (eg. ACPI) global
+methods used to cancel hibernation etc.
+
+Writing "none" to /sys/power/pm_test causes the kernel to switch to the normal
+hibernation/suspend operations.  Also, when open for reading, /sys/power/pm_test
+contains a space-separated list of all available tests (including "none" that
+represents the normal functionality) in which the current test level is
+indicated by square brackets.
+
+Generally, as you can see, each test level is more "invasive" than the previous
+one and the "core" level tests the hardware and drivers as deeply as possible
+without creating a hibernation image.  Obviously, if the "devices" test fails,
+the "platform" test will fail as well and so on.  Thus, as a rule of thumb, you
+should try the test modes starting from "freezer", through "devices", "platform"
+and "processors" up to "core" (repeat the test on each level a couple of times
+to make sure that any random factors are avoided).
+
+If the "freezer" test fails, there is a task that cannot be frozen (in that case
+it usually is possible to identify the offending task by analysing the output of
+dmesg obtained after the failing test).  Failure at this level usually means
+that there is a problem with the tasks freezer subsystem that should be
+reported.
+
+If the "devices" test fails, most likely there is a driver that cannot suspend
+or resume its device (in the latter case the system may hang or become unstable
+after the test, so please take that into consideration).  To find this driver,
+you can carry out a binary search according to the rules:
+
+- if the test fails, unload a half of the drivers currently loaded and repeat
+  (that would probably involve rebooting the system, so always note what drivers
+  have been loaded before the test),
+- if the test succeeds, load a half of the drivers you have unloaded most
+  recently and repeat.
+
+Once you have found the failing driver (there can be more than just one of
+them), you have to unload it every time before hibernation.  In that case please
+make sure to report the problem with the driver.
+
+It is also possible that the "devices" test will still fail after you have
+unloaded all modules. In that case, you may want to look in your kernel
+configuration for the drivers that can be compiled as modules (and test again
+with these drivers compiled as modules).  You may also try to use some special
+kernel command line options such as "noapic", "noacpi" or even "acpi=off".
+
+If the "platform" test fails, there is a problem with the handling of the
+platform (eg. ACPI) firmware on your system.  In that case the "platform" mode
+of hibernation is not likely to work.  You can try the "shutdown" mode, but that
+is rather a poor man's workaround.
+
+If the "processors" test fails, the disabling/enabling of nonboot CPUs does not
+work (of course, this only may be an issue on SMP systems) and the problem
+should be reported.  In that case you can also try to switch the nonboot CPUs
+off and on using the /sys/devices/system/cpu/cpu*/online sysfs attributes and
+see if that works.
+
+If the "core" test fails, which means that suspending of the system/platform
+devices has failed (these devices are suspended on one CPU with interrupts off),
+the problem is most probably hardware-related and serious, so it should be
+reported.
+
+A failure of any of the "platform", "processors" or "core" tests may cause your
+system to hang or become unstable, so please beware.  Such a failure usually
+indicates a serious problem that very well may be related to the hardware, but
+please report it anyway.
+
+b) Testing minimal configuration
+--------------------------------
+
+If all of the hibernation test modes work, you can boot the system with the
+"init=/bin/bash" command line parameter and attempt to hibernate in the
+"reboot", "shutdown" and "platform" modes.  If that does not work, there
+probably is a problem with a driver statically compiled into the kernel and you
+can try to compile more drivers as modules, so that they can be tested
+individually.  Otherwise, there is a problem with a modular driver and you can
+find it by loading a half of the modules you normally use and binary searching
+in accordance with the algorithm:
+- if there are n modules loaded and the attempt to suspend and resume fails,
+unload n/2 of the modules and try again (that would probably involve rebooting
+the system),
+- if there are n modules loaded and the attempt to suspend and resume succeeds,
+load n/2 modules more and try again.
+
+Again, if you find the offending module(s), it(they) must be unloaded every time
+before hibernation, and please report the problem with it(them).
+
+c) Using the "test_resume" hibernation option
+---------------------------------------------
+
+/sys/power/disk generally tells the kernel what to do after creating a
+hibernation image.  One of the available options is "test_resume" which
+causes the just created image to be used for immediate restoration.  Namely,
+after doing::
+
+	# echo test_resume > /sys/power/disk
+	# echo disk > /sys/power/state
+
+a hibernation image will be created and a resume from it will be triggered
+immediately without involving the platform firmware in any way.
+
+That test can be used to check if failures to resume from hibernation are
+related to bad interactions with the platform firmware.  That is, if the above
+works every time, but resume from actual hibernation does not work or is
+unreliable, the platform firmware may be responsible for the failures.
+
+On architectures and platforms that support using different kernels to restore
+hibernation images (that is, the kernel used to read the image from storage and
+load it into memory is different from the one included in the image) or support
+kernel address space randomization, it also can be used to check if failures
+to resume may be related to the differences between the restore and image
+kernels.
+
+d) Advanced debugging
+---------------------
+
+In case that hibernation does not work on your system even in the minimal
+configuration and compiling more drivers as modules is not practical or some
+modules cannot be unloaded, you can use one of the more advanced debugging
+techniques to find the problem.  First, if there is a serial port in your box,
+you can boot the kernel with the 'no_console_suspend' parameter and try to log
+kernel messages using the serial console.  This may provide you with some
+information about the reasons of the suspend (resume) failure.  Alternatively,
+it may be possible to use a FireWire port for debugging with firescope
+(http://v3.sk/~lkundrak/firescope/).  On x86 it is also possible to
+use the PM_TRACE mechanism documented in Documentation/power/s2ram.rst .
+
+2. Testing suspend to RAM (STR)
+===============================
+
+To verify that the STR works, it is generally more convenient to use the s2ram
+tool available from http://suspend.sf.net and documented at
+http://en.opensuse.org/SDB:Suspend_to_RAM (S2RAM_LINK).
+
+Namely, after writing "freezer", "devices", "platform", "processors", or "core"
+into /sys/power/pm_test (available if the kernel is compiled with
+CONFIG_PM_DEBUG set) the suspend code will work in the test mode corresponding
+to given string.  The STR test modes are defined in the same way as for
+hibernation, so please refer to Section 1 for more information about them.  In
+particular, the "core" test allows you to test everything except for the actual
+invocation of the platform firmware in order to put the system into the sleep
+state.
+
+Among other things, the testing with the help of /sys/power/pm_test may allow
+you to identify drivers that fail to suspend or resume their devices.  They
+should be unloaded every time before an STR transition.
+
+Next, you can follow the instructions at S2RAM_LINK to test the system, but if
+it does not work "out of the box", you may need to boot it with
+"init=/bin/bash" and test s2ram in the minimal configuration.  In that case,
+you may be able to search for failing drivers by following the procedure
+analogous to the one described in section 1.  If you find some failing drivers,
+you will have to unload them every time before an STR transition (ie. before
+you run s2ram), and please report the problems with them.
+
+There is a debugfs entry which shows the suspend to RAM statistics. Here is an
+example of its output::
+
+	# mount -t debugfs none /sys/kernel/debug
+	# cat /sys/kernel/debug/suspend_stats
+	success: 20
+	fail: 5
+	failed_freeze: 0
+	failed_prepare: 0
+	failed_suspend: 5
+	failed_suspend_noirq: 0
+	failed_resume: 0
+	failed_resume_noirq: 0
+	failures:
+	  last_failed_dev:	alarm
+				adc
+	  last_failed_errno:	-16
+				-16
+	  last_failed_step:	suspend
+				suspend
+
+Field success means the success number of suspend to RAM, and field fail means
+the failure number. Others are the failure number of different steps of suspend
+to RAM. suspend_stats just lists the last 2 failed devices, error number and
+failed step of suspend.
diff --git a/Documentation/power/basic-pm-debugging.txt b/Documentation/power/basic-pm-debugging.txt
deleted file mode 100644
index 708f87f78a75..000000000000
--- a/Documentation/power/basic-pm-debugging.txt
+++ /dev/null
@@ -1,254 +0,0 @@
-Debugging hibernation and suspend
-	(C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
-
-1. Testing hibernation (aka suspend to disk or STD)
-
-To check if hibernation works, you can try to hibernate in the "reboot" mode:
-
-# echo reboot > /sys/power/disk
-# echo disk > /sys/power/state
-
-and the system should create a hibernation image, reboot, resume and get back to
-the command prompt where you have started the transition.  If that happens,
-hibernation is most likely to work correctly.  Still, you need to repeat the
-test at least a couple of times in a row for confidence.  [This is necessary,
-because some problems only show up on a second attempt at suspending and
-resuming the system.]  Moreover, hibernating in the "reboot" and "shutdown"
-modes causes the PM core to skip some platform-related callbacks which on ACPI
-systems might be necessary to make hibernation work.  Thus, if your machine fails
-to hibernate or resume in the "reboot" mode, you should try the "platform" mode:
-
-# echo platform > /sys/power/disk
-# echo disk > /sys/power/state
-
-which is the default and recommended mode of hibernation.
-
-Unfortunately, the "platform" mode of hibernation does not work on some systems
-with broken BIOSes.  In such cases the "shutdown" mode of hibernation might
-work:
-
-# echo shutdown > /sys/power/disk
-# echo disk > /sys/power/state
-
-(it is similar to the "reboot" mode, but it requires you to press the power
-button to make the system resume).
-
-If neither "platform" nor "shutdown" hibernation mode works, you will need to
-identify what goes wrong.
-
-a) Test modes of hibernation
-
-To find out why hibernation fails on your system, you can use a special testing
-facility available if the kernel is compiled with CONFIG_PM_DEBUG set.  Then,
-there is the file /sys/power/pm_test that can be used to make the hibernation
-core run in a test mode.  There are 5 test modes available:
-
-freezer
-- test the freezing of processes
-
-devices
-- test the freezing of processes and suspending of devices
-
-platform
-- test the freezing of processes, suspending of devices and platform
-  global control methods(*)
-
-processors
-- test the freezing of processes, suspending of devices, platform
-  global control methods(*) and the disabling of nonboot CPUs
-
-core
-- test the freezing of processes, suspending of devices, platform global
-  control methods(*), the disabling of nonboot CPUs and suspending of
-  platform/system devices
-
-(*) the platform global control methods are only available on ACPI systems
-    and are only tested if the hibernation mode is set to "platform"
-
-To use one of them it is necessary to write the corresponding string to
-/sys/power/pm_test (eg. "devices" to test the freezing of processes and
-suspending devices) and issue the standard hibernation commands.  For example,
-to use the "devices" test mode along with the "platform" mode of hibernation,
-you should do the following:
-
-# echo devices > /sys/power/pm_test
-# echo platform > /sys/power/disk
-# echo disk > /sys/power/state
-
-Then, the kernel will try to freeze processes, suspend devices, wait a few
-seconds (5 by default, but configurable by the suspend.pm_test_delay module
-parameter), resume devices and thaw processes.  If "platform" is written to
-/sys/power/pm_test , then after suspending devices the kernel will additionally
-invoke the global control methods (eg. ACPI global control methods) used to
-prepare the platform firmware for hibernation.  Next, it will wait a
-configurable number of seconds and invoke the platform (eg. ACPI) global
-methods used to cancel hibernation etc.
-
-Writing "none" to /sys/power/pm_test causes the kernel to switch to the normal
-hibernation/suspend operations.  Also, when open for reading, /sys/power/pm_test
-contains a space-separated list of all available tests (including "none" that
-represents the normal functionality) in which the current test level is
-indicated by square brackets.
-
-Generally, as you can see, each test level is more "invasive" than the previous
-one and the "core" level tests the hardware and drivers as deeply as possible
-without creating a hibernation image.  Obviously, if the "devices" test fails,
-the "platform" test will fail as well and so on.  Thus, as a rule of thumb, you
-should try the test modes starting from "freezer", through "devices", "platform"
-and "processors" up to "core" (repeat the test on each level a couple of times
-to make sure that any random factors are avoided).
-
-If the "freezer" test fails, there is a task that cannot be frozen (in that case
-it usually is possible to identify the offending task by analysing the output of
-dmesg obtained after the failing test).  Failure at this level usually means
-that there is a problem with the tasks freezer subsystem that should be
-reported.
-
-If the "devices" test fails, most likely there is a driver that cannot suspend
-or resume its device (in the latter case the system may hang or become unstable
-after the test, so please take that into consideration).  To find this driver,
-you can carry out a binary search according to the rules:
-- if the test fails, unload a half of the drivers currently loaded and repeat
-(that would probably involve rebooting the system, so always note what drivers
-have been loaded before the test),
-- if the test succeeds, load a half of the drivers you have unloaded most
-recently and repeat.
-
-Once you have found the failing driver (there can be more than just one of
-them), you have to unload it every time before hibernation.  In that case please
-make sure to report the problem with the driver.
-
-It is also possible that the "devices" test will still fail after you have
-unloaded all modules. In that case, you may want to look in your kernel
-configuration for the drivers that can be compiled as modules (and test again
-with these drivers compiled as modules).  You may also try to use some special
-kernel command line options such as "noapic", "noacpi" or even "acpi=off".
-
-If the "platform" test fails, there is a problem with the handling of the
-platform (eg. ACPI) firmware on your system.  In that case the "platform" mode
-of hibernation is not likely to work.  You can try the "shutdown" mode, but that
-is rather a poor man's workaround.
-
-If the "processors" test fails, the disabling/enabling of nonboot CPUs does not
-work (of course, this only may be an issue on SMP systems) and the problem
-should be reported.  In that case you can also try to switch the nonboot CPUs
-off and on using the /sys/devices/system/cpu/cpu*/online sysfs attributes and
-see if that works.
-
-If the "core" test fails, which means that suspending of the system/platform
-devices has failed (these devices are suspended on one CPU with interrupts off),
-the problem is most probably hardware-related and serious, so it should be
-reported.
-
-A failure of any of the "platform", "processors" or "core" tests may cause your
-system to hang or become unstable, so please beware.  Such a failure usually
-indicates a serious problem that very well may be related to the hardware, but
-please report it anyway.
-
-b) Testing minimal configuration
-
-If all of the hibernation test modes work, you can boot the system with the
-"init=/bin/bash" command line parameter and attempt to hibernate in the
-"reboot", "shutdown" and "platform" modes.  If that does not work, there
-probably is a problem with a driver statically compiled into the kernel and you
-can try to compile more drivers as modules, so that they can be tested
-individually.  Otherwise, there is a problem with a modular driver and you can
-find it by loading a half of the modules you normally use and binary searching
-in accordance with the algorithm:
-- if there are n modules loaded and the attempt to suspend and resume fails,
-unload n/2 of the modules and try again (that would probably involve rebooting
-the system),
-- if there are n modules loaded and the attempt to suspend and resume succeeds,
-load n/2 modules more and try again.
-
-Again, if you find the offending module(s), it(they) must be unloaded every time
-before hibernation, and please report the problem with it(them).
-
-c) Using the "test_resume" hibernation option
-
-/sys/power/disk generally tells the kernel what to do after creating a
-hibernation image.  One of the available options is "test_resume" which
-causes the just created image to be used for immediate restoration.  Namely,
-after doing:
-
-# echo test_resume > /sys/power/disk
-# echo disk > /sys/power/state
-
-a hibernation image will be created and a resume from it will be triggered
-immediately without involving the platform firmware in any way.
-
-That test can be used to check if failures to resume from hibernation are
-related to bad interactions with the platform firmware.  That is, if the above
-works every time, but resume from actual hibernation does not work or is
-unreliable, the platform firmware may be responsible for the failures.
-
-On architectures and platforms that support using different kernels to restore
-hibernation images (that is, the kernel used to read the image from storage and
-load it into memory is different from the one included in the image) or support
-kernel address space randomization, it also can be used to check if failures
-to resume may be related to the differences between the restore and image
-kernels.
-
-d) Advanced debugging
-
-In case that hibernation does not work on your system even in the minimal
-configuration and compiling more drivers as modules is not practical or some
-modules cannot be unloaded, you can use one of the more advanced debugging
-techniques to find the problem.  First, if there is a serial port in your box,
-you can boot the kernel with the 'no_console_suspend' parameter and try to log
-kernel messages using the serial console.  This may provide you with some
-information about the reasons of the suspend (resume) failure.  Alternatively,
-it may be possible to use a FireWire port for debugging with firescope
-(http://v3.sk/~lkundrak/firescope/).  On x86 it is also possible to
-use the PM_TRACE mechanism documented in Documentation/power/s2ram.txt .
-
-2. Testing suspend to RAM (STR)
-
-To verify that the STR works, it is generally more convenient to use the s2ram
-tool available from http://suspend.sf.net and documented at
-http://en.opensuse.org/SDB:Suspend_to_RAM (S2RAM_LINK).
-
-Namely, after writing "freezer", "devices", "platform", "processors", or "core"
-into /sys/power/pm_test (available if the kernel is compiled with
-CONFIG_PM_DEBUG set) the suspend code will work in the test mode corresponding
-to given string.  The STR test modes are defined in the same way as for
-hibernation, so please refer to Section 1 for more information about them.  In
-particular, the "core" test allows you to test everything except for the actual
-invocation of the platform firmware in order to put the system into the sleep
-state.
-
-Among other things, the testing with the help of /sys/power/pm_test may allow
-you to identify drivers that fail to suspend or resume their devices.  They
-should be unloaded every time before an STR transition.
-
-Next, you can follow the instructions at S2RAM_LINK to test the system, but if
-it does not work "out of the box", you may need to boot it with
-"init=/bin/bash" and test s2ram in the minimal configuration.  In that case,
-you may be able to search for failing drivers by following the procedure
-analogous to the one described in section 1.  If you find some failing drivers,
-you will have to unload them every time before an STR transition (ie. before
-you run s2ram), and please report the problems with them.
-
-There is a debugfs entry which shows the suspend to RAM statistics. Here is an
-example of its output.
-	# mount -t debugfs none /sys/kernel/debug
-	# cat /sys/kernel/debug/suspend_stats
-	success: 20
-	fail: 5
-	failed_freeze: 0
-	failed_prepare: 0
-	failed_suspend: 5
-	failed_suspend_noirq: 0
-	failed_resume: 0
-	failed_resume_noirq: 0
-	failures:
-	  last_failed_dev:	alarm
-				adc
-	  last_failed_errno:	-16
-				-16
-	  last_failed_step:	suspend
-				suspend
-Field success means the success number of suspend to RAM, and field fail means
-the failure number. Others are the failure number of different steps of suspend
-to RAM. suspend_stats just lists the last 2 failed devices, error number and
-failed step of suspend.
diff --git a/Documentation/power/charger-manager.rst b/Documentation/power/charger-manager.rst
new file mode 100644
index 000000000000..84fab9376792
--- /dev/null
+++ b/Documentation/power/charger-manager.rst
@@ -0,0 +1,205 @@
+===============
+Charger Manager
+===============
+
+	(C) 2011 MyungJoo Ham <myungjoo.ham@samsung.com>, GPL
+
+Charger Manager provides in-kernel battery charger management that
+requires temperature monitoring during suspend-to-RAM state
+and where each battery may have multiple chargers attached and the userland
+wants to look at the aggregated information of the multiple chargers.
+
+Charger Manager is a platform_driver with power-supply-class entries.
+An instance of Charger Manager (a platform-device created with Charger-Manager)
+represents an independent battery with chargers. If there are multiple
+batteries with their own chargers acting independently in a system,
+the system may need multiple instances of Charger Manager.
+
+1. Introduction
+===============
+
+Charger Manager supports the following:
+
+* Support for multiple chargers (e.g., a device with USB, AC, and solar panels)
+	A system may have multiple chargers (or power sources) and some of
+	they may be activated at the same time. Each charger may have its
+	own power-supply-class and each power-supply-class can provide
+	different information about the battery status. This framework
+	aggregates charger-related information from multiple sources and
+	shows combined information as a single power-supply-class.
+
+* Support for in suspend-to-RAM polling (with suspend_again callback)
+	While the battery is being charged and the system is in suspend-to-RAM,
+	we may need to monitor the battery health by looking at the ambient or
+	battery temperature. We can accomplish this by waking up the system
+	periodically. However, such a method wakes up devices unnecessarily for
+	monitoring the battery health and tasks, and user processes that are
+	supposed to be kept suspended. That, in turn, incurs unnecessary power
+	consumption and slow down charging process. Or even, such peak power
+	consumption can stop chargers in the middle of charging
+	(external power input < device power consumption), which not
+	only affects the charging time, but the lifespan of the battery.
+
+	Charger Manager provides a function "cm_suspend_again" that can be
+	used as suspend_again callback of platform_suspend_ops. If the platform
+	requires tasks other than cm_suspend_again, it may implement its own
+	suspend_again callback that calls cm_suspend_again in the middle.
+	Normally, the platform will need to resume and suspend some devices
+	that are used by Charger Manager.
+
+* Support for premature full-battery event handling
+	If the battery voltage drops by "fullbatt_vchkdrop_uV" after
+	"fullbatt_vchkdrop_ms" from the full-battery event, the framework
+	restarts charging. This check is also performed while suspended by
+	setting wakeup time accordingly and using suspend_again.
+
+* Support for uevent-notify
+	With the charger-related events, the device sends
+	notification to users with UEVENT.
+
+2. Global Charger-Manager Data related with suspend_again
+=========================================================
+In order to setup Charger Manager with suspend-again feature
+(in-suspend monitoring), the user should provide charger_global_desc
+with setup_charger_manager(`struct charger_global_desc *`).
+This charger_global_desc data for in-suspend monitoring is global
+as the name suggests. Thus, the user needs to provide only once even
+if there are multiple batteries. If there are multiple batteries, the
+multiple instances of Charger Manager share the same charger_global_desc
+and it will manage in-suspend monitoring for all instances of Charger Manager.
+
+The user needs to provide all the three entries to `struct charger_global_desc`
+properly in order to activate in-suspend monitoring:
+
+`char *rtc_name;`
+	The name of rtc (e.g., "rtc0") used to wakeup the system from
+	suspend for Charger Manager. The alarm interrupt (AIE) of the rtc
+	should be able to wake up the system from suspend. Charger Manager
+	saves and restores the alarm value and use the previously-defined
+	alarm if it is going to go off earlier than Charger Manager so that
+	Charger Manager does not interfere with previously-defined alarms.
+
+`bool (*rtc_only_wakeup)(void);`
+	This callback should let CM know whether
+	the wakeup-from-suspend is caused only by the alarm of "rtc" in the
+	same struct. If there is any other wakeup source triggered the
+	wakeup, it should return false. If the "rtc" is the only wakeup
+	reason, it should return true.
+
+`bool assume_timer_stops_in_suspend;`
+	if true, Charger Manager assumes that
+	the timer (CM uses jiffies as timer) stops during suspend. Then, CM
+	assumes that the suspend-duration is same as the alarm length.
+
+
+3. How to setup suspend_again
+=============================
+Charger Manager provides a function "extern bool cm_suspend_again(void)".
+When cm_suspend_again is called, it monitors every battery. The suspend_ops
+callback of the system's platform_suspend_ops can call cm_suspend_again
+function to know whether Charger Manager wants to suspend again or not.
+If there are no other devices or tasks that want to use suspend_again
+feature, the platform_suspend_ops may directly refer to cm_suspend_again
+for its suspend_again callback.
+
+The cm_suspend_again() returns true (meaning "I want to suspend again")
+if the system was woken up by Charger Manager and the polling
+(in-suspend monitoring) results in "normal".
+
+4. Charger-Manager Data (struct charger_desc)
+=============================================
+For each battery charged independently from other batteries (if a series of
+batteries are charged by a single charger, they are counted as one independent
+battery), an instance of Charger Manager is attached to it. The following
+
+struct charger_desc elements:
+
+`char *psy_name;`
+	The power-supply-class name of the battery. Default is
+	"battery" if psy_name is NULL. Users can access the psy entries
+	at "/sys/class/power_supply/[psy_name]/".
+
+`enum polling_modes polling_mode;`
+	  CM_POLL_DISABLE:
+		do not poll this battery.
+	  CM_POLL_ALWAYS:
+		always poll this battery.
+	  CM_POLL_EXTERNAL_POWER_ONLY:
+		poll this battery if and only if an external power
+		source is attached.
+	  CM_POLL_CHARGING_ONLY:
+		poll this battery if and only if the battery is being charged.
+
+`unsigned int fullbatt_vchkdrop_ms; / unsigned int fullbatt_vchkdrop_uV;`
+	If both have non-zero values, Charger Manager will check the
+	battery voltage drop fullbatt_vchkdrop_ms after the battery is fully
+	charged. If the voltage drop is over fullbatt_vchkdrop_uV, Charger
+	Manager will try to recharge the battery by disabling and enabling
+	chargers. Recharge with voltage drop condition only (without delay
+	condition) is needed to be implemented with hardware interrupts from
+	fuel gauges or charger devices/chips.
+
+`unsigned int fullbatt_uV;`
+	If specified with a non-zero value, Charger Manager assumes
+	that the battery is full (capacity = 100) if the battery is not being
+	charged and the battery voltage is equal to or greater than
+	fullbatt_uV.
+
+`unsigned int polling_interval_ms;`
+	Required polling interval in ms. Charger Manager will poll
+	this battery every polling_interval_ms or more frequently.
+
+`enum data_source battery_present;`
+	CM_BATTERY_PRESENT:
+		assume that the battery exists.
+	CM_NO_BATTERY:
+		assume that the battery does not exists.
+	CM_FUEL_GAUGE:
+		get battery presence information from fuel gauge.
+	CM_CHARGER_STAT:
+		get battery presence from chargers.
+
+`char **psy_charger_stat;`
+	An array ending with NULL that has power-supply-class names of
+	chargers. Each power-supply-class should provide "PRESENT" (if
+	battery_present is "CM_CHARGER_STAT"), "ONLINE" (shows whether an
+	external power source is attached or not), and "STATUS" (shows whether
+	the battery is {"FULL" or not FULL} or {"FULL", "Charging",
+	"Discharging", "NotCharging"}).
+
+`int num_charger_regulators; / struct regulator_bulk_data *charger_regulators;`
+	Regulators representing the chargers in the form for
+	regulator framework's bulk functions.
+
+`char *psy_fuel_gauge;`
+	Power-supply-class name of the fuel gauge.
+
+`int (*temperature_out_of_range)(int *mC); / bool measure_battery_temp;`
+	This callback returns 0 if the temperature is safe for charging,
+	a positive number if it is too hot to charge, and a negative number
+	if it is too cold to charge. With the variable mC, the callback returns
+	the temperature in 1/1000 of centigrade.
+	The source of temperature can be battery or ambient one according to
+	the value of measure_battery_temp.
+
+
+5. Notify Charger-Manager of charger events: cm_notify_event()
+==============================================================
+If there is an charger event is required to notify
+Charger Manager, a charger device driver that triggers the event can call
+cm_notify_event(psy, type, msg) to notify the corresponding Charger Manager.
+In the function, psy is the charger driver's power_supply pointer, which is
+associated with Charger-Manager. The parameter "type"
+is the same as irq's type (enum cm_event_types). The event message "msg" is
+optional and is effective only if the event type is "UNDESCRIBED" or "OTHERS".
+
+6. Other Considerations
+=======================
+
+At the charger/battery-related events such as battery-pulled-out,
+charger-pulled-out, charger-inserted, DCIN-over/under-voltage, charger-stopped,
+and others critical to chargers, the system should be configured to wake up.
+At least the following should wake up the system from a suspend:
+a) charger-on/off b) external-power-in/out c) battery-in/out (while charging)
+
+It is usually accomplished by configuring the PMIC as a wakeup source.
diff --git a/Documentation/power/charger-manager.txt b/Documentation/power/charger-manager.txt
deleted file mode 100644
index 9ff1105e58d6..000000000000
--- a/Documentation/power/charger-manager.txt
+++ /dev/null
@@ -1,200 +0,0 @@
-Charger Manager
-	(C) 2011 MyungJoo Ham <myungjoo.ham@samsung.com>, GPL
-
-Charger Manager provides in-kernel battery charger management that
-requires temperature monitoring during suspend-to-RAM state
-and where each battery may have multiple chargers attached and the userland
-wants to look at the aggregated information of the multiple chargers.
-
-Charger Manager is a platform_driver with power-supply-class entries.
-An instance of Charger Manager (a platform-device created with Charger-Manager)
-represents an independent battery with chargers. If there are multiple
-batteries with their own chargers acting independently in a system,
-the system may need multiple instances of Charger Manager.
-
-1. Introduction
-===============
-
-Charger Manager supports the following:
-
-* Support for multiple chargers (e.g., a device with USB, AC, and solar panels)
-	A system may have multiple chargers (or power sources) and some of
-	they may be activated at the same time. Each charger may have its
-	own power-supply-class and each power-supply-class can provide
-	different information about the battery status. This framework
-	aggregates charger-related information from multiple sources and
-	shows combined information as a single power-supply-class.
-
-* Support for in suspend-to-RAM polling (with suspend_again callback)
-	While the battery is being charged and the system is in suspend-to-RAM,
-	we may need to monitor the battery health by looking at the ambient or
-	battery temperature. We can accomplish this by waking up the system
-	periodically. However, such a method wakes up devices unnecessarily for
-	monitoring the battery health and tasks, and user processes that are
-	supposed to be kept suspended. That, in turn, incurs unnecessary power
-	consumption and slow down charging process. Or even, such peak power
-	consumption can stop chargers in the middle of charging
-	(external power input < device power consumption), which not
-	only affects the charging time, but the lifespan of the battery.
-
-	Charger Manager provides a function "cm_suspend_again" that can be
-	used as suspend_again callback of platform_suspend_ops. If the platform
-	requires tasks other than cm_suspend_again, it may implement its own
-	suspend_again callback that calls cm_suspend_again in the middle.
-	Normally, the platform will need to resume and suspend some devices
-	that are used by Charger Manager.
-
-* Support for premature full-battery event handling
-	If the battery voltage drops by "fullbatt_vchkdrop_uV" after
-	"fullbatt_vchkdrop_ms" from the full-battery event, the framework
-	restarts charging. This check is also performed while suspended by
-	setting wakeup time accordingly and using suspend_again.
-
-* Support for uevent-notify
-	With the charger-related events, the device sends
-	notification to users with UEVENT.
-
-2. Global Charger-Manager Data related with suspend_again
-========================================================
-In order to setup Charger Manager with suspend-again feature
-(in-suspend monitoring), the user should provide charger_global_desc
-with setup_charger_manager(struct charger_global_desc *).
-This charger_global_desc data for in-suspend monitoring is global
-as the name suggests. Thus, the user needs to provide only once even
-if there are multiple batteries. If there are multiple batteries, the
-multiple instances of Charger Manager share the same charger_global_desc
-and it will manage in-suspend monitoring for all instances of Charger Manager.
-
-The user needs to provide all the three entries properly in order to activate
-in-suspend monitoring:
-
-struct charger_global_desc {
-
-char *rtc_name;
-	: The name of rtc (e.g., "rtc0") used to wakeup the system from
-	suspend for Charger Manager. The alarm interrupt (AIE) of the rtc
-	should be able to wake up the system from suspend. Charger Manager
-	saves and restores the alarm value and use the previously-defined
-	alarm if it is going to go off earlier than Charger Manager so that
-	Charger Manager does not interfere with previously-defined alarms.
-
-bool (*rtc_only_wakeup)(void);
-	: This callback should let CM know whether
-	the wakeup-from-suspend is caused only by the alarm of "rtc" in the
-	same struct. If there is any other wakeup source triggered the
-	wakeup, it should return false. If the "rtc" is the only wakeup
-	reason, it should return true.
-
-bool assume_timer_stops_in_suspend;
-	: if true, Charger Manager assumes that
-	the timer (CM uses jiffies as timer) stops during suspend. Then, CM
-	assumes that the suspend-duration is same as the alarm length.
-};
-
-3. How to setup suspend_again
-=============================
-Charger Manager provides a function "extern bool cm_suspend_again(void)".
-When cm_suspend_again is called, it monitors every battery. The suspend_ops
-callback of the system's platform_suspend_ops can call cm_suspend_again
-function to know whether Charger Manager wants to suspend again or not.
-If there are no other devices or tasks that want to use suspend_again
-feature, the platform_suspend_ops may directly refer to cm_suspend_again
-for its suspend_again callback.
-
-The cm_suspend_again() returns true (meaning "I want to suspend again")
-if the system was woken up by Charger Manager and the polling
-(in-suspend monitoring) results in "normal".
-
-4. Charger-Manager Data (struct charger_desc)
-=============================================
-For each battery charged independently from other batteries (if a series of
-batteries are charged by a single charger, they are counted as one independent
-battery), an instance of Charger Manager is attached to it.
-
-struct charger_desc {
-
-char *psy_name;
-	: The power-supply-class name of the battery. Default is
-	"battery" if psy_name is NULL. Users can access the psy entries
-	at "/sys/class/power_supply/[psy_name]/".
-
-enum polling_modes polling_mode;
-	: CM_POLL_DISABLE: do not poll this battery.
-	  CM_POLL_ALWAYS: always poll this battery.
-	  CM_POLL_EXTERNAL_POWER_ONLY: poll this battery if and only if
-				       an external power source is attached.
-	  CM_POLL_CHARGING_ONLY: poll this battery if and only if the
-				 battery is being charged.
-
-unsigned int fullbatt_vchkdrop_ms;
-unsigned int fullbatt_vchkdrop_uV;
-	: If both have non-zero values, Charger Manager will check the
-	battery voltage drop fullbatt_vchkdrop_ms after the battery is fully
-	charged. If the voltage drop is over fullbatt_vchkdrop_uV, Charger
-	Manager will try to recharge the battery by disabling and enabling
-	chargers. Recharge with voltage drop condition only (without delay
-	condition) is needed to be implemented with hardware interrupts from
-	fuel gauges or charger devices/chips.
-
-unsigned int fullbatt_uV;
-	: If specified with a non-zero value, Charger Manager assumes
-	that the battery is full (capacity = 100) if the battery is not being
-	charged and the battery voltage is equal to or greater than
-	fullbatt_uV.
-
-unsigned int polling_interval_ms;
-	: Required polling interval in ms. Charger Manager will poll
-	this battery every polling_interval_ms or more frequently.
-
-enum data_source battery_present;
-	: CM_BATTERY_PRESENT: assume that the battery exists.
-	CM_NO_BATTERY: assume that the battery does not exists.
-	CM_FUEL_GAUGE: get battery presence information from fuel gauge.
-	CM_CHARGER_STAT: get battery presence from chargers.
-
-char **psy_charger_stat;
-	: An array ending with NULL that has power-supply-class names of
-	chargers. Each power-supply-class should provide "PRESENT" (if
-	battery_present is "CM_CHARGER_STAT"), "ONLINE" (shows whether an
-	external power source is attached or not), and "STATUS" (shows whether
-	the battery is {"FULL" or not FULL} or {"FULL", "Charging",
-	"Discharging", "NotCharging"}).
-
-int num_charger_regulators;
-struct regulator_bulk_data *charger_regulators;
-	: Regulators representing the chargers in the form for
-	regulator framework's bulk functions.
-
-char *psy_fuel_gauge;
-	: Power-supply-class name of the fuel gauge.
-
-int (*temperature_out_of_range)(int *mC);
-bool measure_battery_temp;
-	: This callback returns 0 if the temperature is safe for charging,
-	a positive number if it is too hot to charge, and a negative number
-	if it is too cold to charge. With the variable mC, the callback returns
-	the temperature in 1/1000 of centigrade.
-	The source of temperature can be battery or ambient one according to
-	the value of measure_battery_temp.
-};
-
-5. Notify Charger-Manager of charger events: cm_notify_event()
-=========================================================
-If there is an charger event is required to notify
-Charger Manager, a charger device driver that triggers the event can call
-cm_notify_event(psy, type, msg) to notify the corresponding Charger Manager.
-In the function, psy is the charger driver's power_supply pointer, which is
-associated with Charger-Manager. The parameter "type"
-is the same as irq's type (enum cm_event_types). The event message "msg" is
-optional and is effective only if the event type is "UNDESCRIBED" or "OTHERS".
-
-6. Other Considerations
-=======================
-
-At the charger/battery-related events such as battery-pulled-out,
-charger-pulled-out, charger-inserted, DCIN-over/under-voltage, charger-stopped,
-and others critical to chargers, the system should be configured to wake up.
-At least the following should wake up the system from a suspend:
-a) charger-on/off b) external-power-in/out c) battery-in/out (while charging)
-
-It is usually accomplished by configuring the PMIC as a wakeup source.
diff --git a/Documentation/power/drivers-testing.rst b/Documentation/power/drivers-testing.rst
new file mode 100644
index 000000000000..e53f1999fc39
--- /dev/null
+++ b/Documentation/power/drivers-testing.rst
@@ -0,0 +1,51 @@
+====================================================
+Testing suspend and resume support in device drivers
+====================================================
+
+	(C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
+
+1. Preparing the test system
+============================
+
+Unfortunately, to effectively test the support for the system-wide suspend and
+resume transitions in a driver, it is necessary to suspend and resume a fully
+functional system with this driver loaded.  Moreover, that should be done
+several times, preferably several times in a row, and separately for hibernation
+(aka suspend to disk or STD) and suspend to RAM (STR), because each of these
+cases involves slightly different operations and different interactions with
+the machine's BIOS.
+
+Of course, for this purpose the test system has to be known to suspend and
+resume without the driver being tested.  Thus, if possible, you should first
+resolve all suspend/resume-related problems in the test system before you start
+testing the new driver.  Please see Documentation/power/basic-pm-debugging.rst
+for more information about the debugging of suspend/resume functionality.
+
+2. Testing the driver
+=====================
+
+Once you have resolved the suspend/resume-related problems with your test system
+without the new driver, you are ready to test it:
+
+a) Build the driver as a module, load it and try the test modes of hibernation
+   (see: Documentation/power/basic-pm-debugging.rst, 1).
+
+b) Load the driver and attempt to hibernate in the "reboot", "shutdown" and
+   "platform" modes (see: Documentation/power/basic-pm-debugging.rst, 1).
+
+c) Compile the driver directly into the kernel and try the test modes of
+   hibernation.
+
+d) Attempt to hibernate with the driver compiled directly into the kernel
+   in the "reboot", "shutdown" and "platform" modes.
+
+e) Try the test modes of suspend (see: Documentation/power/basic-pm-debugging.rst,
+   2).  [As far as the STR tests are concerned, it should not matter whether or
+   not the driver is built as a module.]
+
+f) Attempt to suspend to RAM using the s2ram tool with the driver loaded
+   (see: Documentation/power/basic-pm-debugging.rst, 2).
+
+Each of the above tests should be repeated several times and the STD tests
+should be mixed with the STR tests.  If any of them fails, the driver cannot be
+regarded as suspend/resume-safe.
diff --git a/Documentation/power/drivers-testing.txt b/Documentation/power/drivers-testing.txt
deleted file mode 100644
index 638afdf4d6b8..000000000000
--- a/Documentation/power/drivers-testing.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-Testing suspend and resume support in device drivers
-	(C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
-
-1. Preparing the test system
-
-Unfortunately, to effectively test the support for the system-wide suspend and
-resume transitions in a driver, it is necessary to suspend and resume a fully
-functional system with this driver loaded.  Moreover, that should be done
-several times, preferably several times in a row, and separately for hibernation
-(aka suspend to disk or STD) and suspend to RAM (STR), because each of these
-cases involves slightly different operations and different interactions with
-the machine's BIOS.
-
-Of course, for this purpose the test system has to be known to suspend and
-resume without the driver being tested.  Thus, if possible, you should first
-resolve all suspend/resume-related problems in the test system before you start
-testing the new driver.  Please see Documentation/power/basic-pm-debugging.txt
-for more information about the debugging of suspend/resume functionality.
-
-2. Testing the driver
-
-Once you have resolved the suspend/resume-related problems with your test system
-without the new driver, you are ready to test it:
-
-a) Build the driver as a module, load it and try the test modes of hibernation
-   (see: Documentation/power/basic-pm-debugging.txt, 1).
-
-b) Load the driver and attempt to hibernate in the "reboot", "shutdown" and
-   "platform" modes (see: Documentation/power/basic-pm-debugging.txt, 1).
-
-c) Compile the driver directly into the kernel and try the test modes of
-   hibernation.
-
-d) Attempt to hibernate with the driver compiled directly into the kernel
-   in the "reboot", "shutdown" and "platform" modes.
-
-e) Try the test modes of suspend (see: Documentation/power/basic-pm-debugging.txt,
-   2).  [As far as the STR tests are concerned, it should not matter whether or
-   not the driver is built as a module.]
-
-f) Attempt to suspend to RAM using the s2ram tool with the driver loaded
-   (see: Documentation/power/basic-pm-debugging.txt, 2).
-
-Each of the above tests should be repeated several times and the STD tests
-should be mixed with the STR tests.  If any of them fails, the driver cannot be
-regarded as suspend/resume-safe.
diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst
new file mode 100644
index 000000000000..90a345d57ae9
--- /dev/null
+++ b/Documentation/power/energy-model.rst
@@ -0,0 +1,147 @@
+====================
+Energy Model of CPUs
+====================
+
+1. Overview
+-----------
+
+The Energy Model (EM) framework serves as an interface between drivers knowing
+the power consumed by CPUs at various performance levels, and the kernel
+subsystems willing to use that information to make energy-aware decisions.
+
+The source of the information about the power consumed by CPUs can vary greatly
+from one platform to another. These power costs can be estimated using
+devicetree data in some cases. In others, the firmware will know better.
+Alternatively, userspace might be best positioned. And so on. In order to avoid
+each and every client subsystem to re-implement support for each and every
+possible source of information on its own, the EM framework intervenes as an
+abstraction layer which standardizes the format of power cost tables in the
+kernel, hence enabling to avoid redundant work.
+
+The figure below depicts an example of drivers (Arm-specific here, but the
+approach is applicable to any architecture) providing power costs to the EM
+framework, and interested clients reading the data from it::
+
+       +---------------+  +-----------------+  +---------------+
+       | Thermal (IPA) |  | Scheduler (EAS) |  |     Other     |
+       +---------------+  +-----------------+  +---------------+
+               |                   | em_pd_energy()    |
+               |                   | em_cpu_get()      |
+               +---------+         |         +---------+
+                         |         |         |
+                         v         v         v
+                        +---------------------+
+                        |    Energy Model     |
+                        |     Framework       |
+                        +---------------------+
+                           ^       ^       ^
+                           |       |       | em_register_perf_domain()
+                +----------+       |       +---------+
+                |                  |                 |
+        +---------------+  +---------------+  +--------------+
+        |  cpufreq-dt   |  |   arm_scmi    |  |    Other     |
+        +---------------+  +---------------+  +--------------+
+                ^                  ^                 ^
+                |                  |                 |
+        +--------------+   +---------------+  +--------------+
+        | Device Tree  |   |   Firmware    |  |      ?       |
+        +--------------+   +---------------+  +--------------+
+
+The EM framework manages power cost tables per 'performance domain' in the
+system. A performance domain is a group of CPUs whose performance is scaled
+together. Performance domains generally have a 1-to-1 mapping with CPUFreq
+policies. All CPUs in a performance domain are required to have the same
+micro-architecture. CPUs in different performance domains can have different
+micro-architectures.
+
+
+2. Core APIs
+------------
+
+2.1 Config options
+^^^^^^^^^^^^^^^^^^
+
+CONFIG_ENERGY_MODEL must be enabled to use the EM framework.
+
+
+2.2 Registration of performance domains
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Drivers are expected to register performance domains into the EM framework by
+calling the following API::
+
+  int em_register_perf_domain(cpumask_t *span, unsigned int nr_states,
+			      struct em_data_callback *cb);
+
+Drivers must specify the CPUs of the performance domains using the cpumask
+argument, and provide a callback function returning <frequency, power> tuples
+for each capacity state. The callback function provided by the driver is free
+to fetch data from any relevant location (DT, firmware, ...), and by any mean
+deemed necessary. See Section 3. for an example of driver implementing this
+callback, and kernel/power/energy_model.c for further documentation on this
+API.
+
+
+2.3 Accessing performance domains
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Subsystems interested in the energy model of a CPU can retrieve it using the
+em_cpu_get() API. The energy model tables are allocated once upon creation of
+the performance domains, and kept in memory untouched.
+
+The energy consumed by a performance domain can be estimated using the
+em_pd_energy() API. The estimation is performed assuming that the schedutil
+CPUfreq governor is in use.
+
+More details about the above APIs can be found in include/linux/energy_model.h.
+
+
+3. Example driver
+-----------------
+
+This section provides a simple example of a CPUFreq driver registering a
+performance domain in the Energy Model framework using the (fake) 'foo'
+protocol. The driver implements an est_power() function to be provided to the
+EM framework::
+
+  -> drivers/cpufreq/foo_cpufreq.c
+
+  01	static int est_power(unsigned long *mW, unsigned long *KHz, int cpu)
+  02	{
+  03		long freq, power;
+  04
+  05		/* Use the 'foo' protocol to ceil the frequency */
+  06		freq = foo_get_freq_ceil(cpu, *KHz);
+  07		if (freq < 0);
+  08			return freq;
+  09
+  10		/* Estimate the power cost for the CPU at the relevant freq. */
+  11		power = foo_estimate_power(cpu, freq);
+  12		if (power < 0);
+  13			return power;
+  14
+  15		/* Return the values to the EM framework */
+  16		*mW = power;
+  17		*KHz = freq;
+  18
+  19		return 0;
+  20	}
+  21
+  22	static int foo_cpufreq_init(struct cpufreq_policy *policy)
+  23	{
+  24		struct em_data_callback em_cb = EM_DATA_CB(est_power);
+  25		int nr_opp, ret;
+  26
+  27		/* Do the actual CPUFreq init work ... */
+  28		ret = do_foo_cpufreq_init(policy);
+  29		if (ret)
+  30			return ret;
+  31
+  32		/* Find the number of OPPs for this policy */
+  33		nr_opp = foo_get_nr_opp(policy);
+  34
+  35		/* And register the new performance domain */
+  36		em_register_perf_domain(policy->cpus, nr_opp, &em_cb);
+  37
+  38	        return 0;
+  39	}
diff --git a/Documentation/power/energy-model.txt b/Documentation/power/energy-model.txt
deleted file mode 100644
index a2b0ae4c76bd..000000000000
--- a/Documentation/power/energy-model.txt
+++ /dev/null
@@ -1,144 +0,0 @@
-                           ====================
-                           Energy Model of CPUs
-                           ====================
-
-1. Overview
------------
-
-The Energy Model (EM) framework serves as an interface between drivers knowing
-the power consumed by CPUs at various performance levels, and the kernel
-subsystems willing to use that information to make energy-aware decisions.
-
-The source of the information about the power consumed by CPUs can vary greatly
-from one platform to another. These power costs can be estimated using
-devicetree data in some cases. In others, the firmware will know better.
-Alternatively, userspace might be best positioned. And so on. In order to avoid
-each and every client subsystem to re-implement support for each and every
-possible source of information on its own, the EM framework intervenes as an
-abstraction layer which standardizes the format of power cost tables in the
-kernel, hence enabling to avoid redundant work.
-
-The figure below depicts an example of drivers (Arm-specific here, but the
-approach is applicable to any architecture) providing power costs to the EM
-framework, and interested clients reading the data from it.
-
-       +---------------+  +-----------------+  +---------------+
-       | Thermal (IPA) |  | Scheduler (EAS) |  |     Other     |
-       +---------------+  +-----------------+  +---------------+
-               |                   | em_pd_energy()    |
-               |                   | em_cpu_get()      |
-               +---------+         |         +---------+
-                         |         |         |
-                         v         v         v
-                        +---------------------+
-                        |    Energy Model     |
-                        |     Framework       |
-                        +---------------------+
-                           ^       ^       ^
-                           |       |       | em_register_perf_domain()
-                +----------+       |       +---------+
-                |                  |                 |
-        +---------------+  +---------------+  +--------------+
-        |  cpufreq-dt   |  |   arm_scmi    |  |    Other     |
-        +---------------+  +---------------+  +--------------+
-                ^                  ^                 ^
-                |                  |                 |
-        +--------------+   +---------------+  +--------------+
-        | Device Tree  |   |   Firmware    |  |      ?       |
-        +--------------+   +---------------+  +--------------+
-
-The EM framework manages power cost tables per 'performance domain' in the
-system. A performance domain is a group of CPUs whose performance is scaled
-together. Performance domains generally have a 1-to-1 mapping with CPUFreq
-policies. All CPUs in a performance domain are required to have the same
-micro-architecture. CPUs in different performance domains can have different
-micro-architectures.
-
-
-2. Core APIs
-------------
-
-  2.1 Config options
-
-CONFIG_ENERGY_MODEL must be enabled to use the EM framework.
-
-
-  2.2 Registration of performance domains
-
-Drivers are expected to register performance domains into the EM framework by
-calling the following API:
-
-  int em_register_perf_domain(cpumask_t *span, unsigned int nr_states,
-			      struct em_data_callback *cb);
-
-Drivers must specify the CPUs of the performance domains using the cpumask
-argument, and provide a callback function returning <frequency, power> tuples
-for each capacity state. The callback function provided by the driver is free
-to fetch data from any relevant location (DT, firmware, ...), and by any mean
-deemed necessary. See Section 3. for an example of driver implementing this
-callback, and kernel/power/energy_model.c for further documentation on this
-API.
-
-
-  2.3 Accessing performance domains
-
-Subsystems interested in the energy model of a CPU can retrieve it using the
-em_cpu_get() API. The energy model tables are allocated once upon creation of
-the performance domains, and kept in memory untouched.
-
-The energy consumed by a performance domain can be estimated using the
-em_pd_energy() API. The estimation is performed assuming that the schedutil
-CPUfreq governor is in use.
-
-More details about the above APIs can be found in include/linux/energy_model.h.
-
-
-3. Example driver
------------------
-
-This section provides a simple example of a CPUFreq driver registering a
-performance domain in the Energy Model framework using the (fake) 'foo'
-protocol. The driver implements an est_power() function to be provided to the
-EM framework.
-
- -> drivers/cpufreq/foo_cpufreq.c
-
-01	static int est_power(unsigned long *mW, unsigned long *KHz, int cpu)
-02	{
-03		long freq, power;
-04
-05		/* Use the 'foo' protocol to ceil the frequency */
-06		freq = foo_get_freq_ceil(cpu, *KHz);
-07		if (freq < 0);
-08			return freq;
-09
-10		/* Estimate the power cost for the CPU at the relevant freq. */
-11		power = foo_estimate_power(cpu, freq);
-12		if (power < 0);
-13			return power;
-14
-15		/* Return the values to the EM framework */
-16		*mW = power;
-17		*KHz = freq;
-18
-19		return 0;
-20	}
-21
-22	static int foo_cpufreq_init(struct cpufreq_policy *policy)
-23	{
-24		struct em_data_callback em_cb = EM_DATA_CB(est_power);
-25		int nr_opp, ret;
-26
-27		/* Do the actual CPUFreq init work ... */
-28		ret = do_foo_cpufreq_init(policy);
-29		if (ret)
-30			return ret;
-31
-32		/* Find the number of OPPs for this policy */
-33		nr_opp = foo_get_nr_opp(policy);
-34
-35		/* And register the new performance domain */
-36		em_register_perf_domain(policy->cpus, nr_opp, &em_cb);
-37
-38	        return 0;
-39	}
diff --git a/Documentation/power/freezing-of-tasks.rst b/Documentation/power/freezing-of-tasks.rst
new file mode 100644
index 000000000000..ef110fe55e82
--- /dev/null
+++ b/Documentation/power/freezing-of-tasks.rst
@@ -0,0 +1,244 @@
+=================
+Freezing of tasks
+=================
+
+(C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
+
+I. What is the freezing of tasks?
+=================================
+
+The freezing of tasks is a mechanism by which user space processes and some
+kernel threads are controlled during hibernation or system-wide suspend (on some
+architectures).
+
+II. How does it work?
+=====================
+
+There are three per-task flags used for that, PF_NOFREEZE, PF_FROZEN
+and PF_FREEZER_SKIP (the last one is auxiliary).  The tasks that have
+PF_NOFREEZE unset (all user space processes and some kernel threads) are
+regarded as 'freezable' and treated in a special way before the system enters a
+suspend state as well as before a hibernation image is created (in what follows
+we only consider hibernation, but the description also applies to suspend).
+
+Namely, as the first step of the hibernation procedure the function
+freeze_processes() (defined in kernel/power/process.c) is called.  A system-wide
+variable system_freezing_cnt (as opposed to a per-task flag) is used to indicate
+whether the system is to undergo a freezing operation. And freeze_processes()
+sets this variable.  After this, it executes try_to_freeze_tasks() that sends a
+fake signal to all user space processes, and wakes up all the kernel threads.
+All freezable tasks must react to that by calling try_to_freeze(), which
+results in a call to __refrigerator() (defined in kernel/freezer.c), which sets
+the task's PF_FROZEN flag, changes its state to TASK_UNINTERRUPTIBLE and makes
+it loop until PF_FROZEN is cleared for it. Then, we say that the task is
+'frozen' and therefore the set of functions handling this mechanism is referred
+to as 'the freezer' (these functions are defined in kernel/power/process.c,
+kernel/freezer.c & include/linux/freezer.h). User space processes are generally
+frozen before kernel threads.
+
+__refrigerator() must not be called directly.  Instead, use the
+try_to_freeze() function (defined in include/linux/freezer.h), that checks
+if the task is to be frozen and makes the task enter __refrigerator().
+
+For user space processes try_to_freeze() is called automatically from the
+signal-handling code, but the freezable kernel threads need to call it
+explicitly in suitable places or use the wait_event_freezable() or
+wait_event_freezable_timeout() macros (defined in include/linux/freezer.h)
+that combine interruptible sleep with checking if the task is to be frozen and
+calling try_to_freeze().  The main loop of a freezable kernel thread may look
+like the following one::
+
+	set_freezable();
+	do {
+		hub_events();
+		wait_event_freezable(khubd_wait,
+				!list_empty(&hub_event_list) ||
+				kthread_should_stop());
+	} while (!kthread_should_stop() || !list_empty(&hub_event_list));
+
+(from drivers/usb/core/hub.c::hub_thread()).
+
+If a freezable kernel thread fails to call try_to_freeze() after the freezer has
+initiated a freezing operation, the freezing of tasks will fail and the entire
+hibernation operation will be cancelled.  For this reason, freezable kernel
+threads must call try_to_freeze() somewhere or use one of the
+wait_event_freezable() and wait_event_freezable_timeout() macros.
+
+After the system memory state has been restored from a hibernation image and
+devices have been reinitialized, the function thaw_processes() is called in
+order to clear the PF_FROZEN flag for each frozen task.  Then, the tasks that
+have been frozen leave __refrigerator() and continue running.
+
+
+Rationale behind the functions dealing with freezing and thawing of tasks
+-------------------------------------------------------------------------
+
+freeze_processes():
+  - freezes only userspace tasks
+
+freeze_kernel_threads():
+  - freezes all tasks (including kernel threads) because we can't freeze
+    kernel threads without freezing userspace tasks
+
+thaw_kernel_threads():
+  - thaws only kernel threads; this is particularly useful if we need to do
+    anything special in between thawing of kernel threads and thawing of
+    userspace tasks, or if we want to postpone the thawing of userspace tasks
+
+thaw_processes():
+  - thaws all tasks (including kernel threads) because we can't thaw userspace
+    tasks without thawing kernel threads
+
+
+III. Which kernel threads are freezable?
+========================================
+
+Kernel threads are not freezable by default.  However, a kernel thread may clear
+PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE
+directly is not allowed).  From this point it is regarded as freezable
+and must call try_to_freeze() in a suitable place.
+
+IV. Why do we do that?
+======================
+
+Generally speaking, there is a couple of reasons to use the freezing of tasks:
+
+1. The principal reason is to prevent filesystems from being damaged after
+   hibernation.  At the moment we have no simple means of checkpointing
+   filesystems, so if there are any modifications made to filesystem data and/or
+   metadata on disks, we cannot bring them back to the state from before the
+   modifications.  At the same time each hibernation image contains some
+   filesystem-related information that must be consistent with the state of the
+   on-disk data and metadata after the system memory state has been restored
+   from the image (otherwise the filesystems will be damaged in a nasty way,
+   usually making them almost impossible to repair).  We therefore freeze
+   tasks that might cause the on-disk filesystems' data and metadata to be
+   modified after the hibernation image has been created and before the
+   system is finally powered off. The majority of these are user space
+   processes, but if any of the kernel threads may cause something like this
+   to happen, they have to be freezable.
+
+2. Next, to create the hibernation image we need to free a sufficient amount of
+   memory (approximately 50% of available RAM) and we need to do that before
+   devices are deactivated, because we generally need them for swapping out.
+   Then, after the memory for the image has been freed, we don't want tasks
+   to allocate additional memory and we prevent them from doing that by
+   freezing them earlier. [Of course, this also means that device drivers
+   should not allocate substantial amounts of memory from their .suspend()
+   callbacks before hibernation, but this is a separate issue.]
+
+3. The third reason is to prevent user space processes and some kernel threads
+   from interfering with the suspending and resuming of devices.  A user space
+   process running on a second CPU while we are suspending devices may, for
+   example, be troublesome and without the freezing of tasks we would need some
+   safeguards against race conditions that might occur in such a case.
+
+Although Linus Torvalds doesn't like the freezing of tasks, he said this in one
+of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608):
+
+"RJW:> Why we freeze tasks at all or why we freeze kernel threads?
+
+Linus: In many ways, 'at all'.
+
+I **do** realize the IO request queue issues, and that we cannot actually do
+s2ram with some devices in the middle of a DMA.  So we want to be able to
+avoid *that*, there's no question about that.  And I suspect that stopping
+user threads and then waiting for a sync is practically one of the easier
+ways to do so.
+
+So in practice, the 'at all' may become a 'why freeze kernel threads?' and
+freezing user threads I don't find really objectionable."
+
+Still, there are kernel threads that may want to be freezable.  For example, if
+a kernel thread that belongs to a device driver accesses the device directly, it
+in principle needs to know when the device is suspended, so that it doesn't try
+to access it at that time.  However, if the kernel thread is freezable, it will
+be frozen before the driver's .suspend() callback is executed and it will be
+thawed after the driver's .resume() callback has run, so it won't be accessing
+the device while it's suspended.
+
+4. Another reason for freezing tasks is to prevent user space processes from
+   realizing that hibernation (or suspend) operation takes place.  Ideally, user
+   space processes should not notice that such a system-wide operation has
+   occurred and should continue running without any problems after the restore
+   (or resume from suspend).  Unfortunately, in the most general case this
+   is quite difficult to achieve without the freezing of tasks.  Consider,
+   for example, a process that depends on all CPUs being online while it's
+   running.  Since we need to disable nonboot CPUs during the hibernation,
+   if this process is not frozen, it may notice that the number of CPUs has
+   changed and may start to work incorrectly because of that.
+
+V. Are there any problems related to the freezing of tasks?
+===========================================================
+
+Yes, there are.
+
+First of all, the freezing of kernel threads may be tricky if they depend one
+on another.  For example, if kernel thread A waits for a completion (in the
+TASK_UNINTERRUPTIBLE state) that needs to be done by freezable kernel thread B
+and B is frozen in the meantime, then A will be blocked until B is thawed, which
+may be undesirable.  That's why kernel threads are not freezable by default.
+
+Second, there are the following two problems related to the freezing of user
+space processes:
+
+1. Putting processes into an uninterruptible sleep distorts the load average.
+2. Now that we have FUSE, plus the framework for doing device drivers in
+   userspace, it gets even more complicated because some userspace processes are
+   now doing the sorts of things that kernel threads do
+   (https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012309.html).
+
+The problem 1. seems to be fixable, although it hasn't been fixed so far.  The
+other one is more serious, but it seems that we can work around it by using
+hibernation (and suspend) notifiers (in that case, though, we won't be able to
+avoid the realization by the user space processes that the hibernation is taking
+place).
+
+There are also problems that the freezing of tasks tends to expose, although
+they are not directly related to it.  For example, if request_firmware() is
+called from a device driver's .resume() routine, it will timeout and eventually
+fail, because the user land process that should respond to the request is frozen
+at this point.  So, seemingly, the failure is due to the freezing of tasks.
+Suppose, however, that the firmware file is located on a filesystem accessible
+only through another device that hasn't been resumed yet.  In that case,
+request_firmware() will fail regardless of whether or not the freezing of tasks
+is used.  Consequently, the problem is not really related to the freezing of
+tasks, since it generally exists anyway.
+
+A driver must have all firmwares it may need in RAM before suspend() is called.
+If keeping them is not practical, for example due to their size, they must be
+requested early enough using the suspend notifier API described in
+Documentation/driver-api/pm/notifiers.rst.
+
+VI. Are there any precautions to be taken to prevent freezing failures?
+=======================================================================
+
+Yes, there are.
+
+First of all, grabbing the 'system_transition_mutex' lock to mutually exclude a piece of code
+from system-wide sleep such as suspend/hibernation is not encouraged.
+If possible, that piece of code must instead hook onto the suspend/hibernation
+notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code
+(kernel/cpu.c) for an example.
+
+However, if that is not feasible, and grabbing 'system_transition_mutex' is deemed necessary,
+it is strongly discouraged to directly call mutex_[un]lock(&system_transition_mutex) since
+that could lead to freezing failures, because if the suspend/hibernate code
+successfully acquired the 'system_transition_mutex' lock, and hence that other entity failed
+to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE
+state. As a consequence, the freezer would not be able to freeze that task,
+leading to freezing failure.
+
+However, the [un]lock_system_sleep() APIs are safe to use in this scenario,
+since they ask the freezer to skip freezing this task, since it is anyway
+"frozen enough" as it is blocked on 'system_transition_mutex', which will be released
+only after the entire suspend/hibernation sequence is complete.
+So, to summarize, use [un]lock_system_sleep() instead of directly using
+mutex_[un]lock(&system_transition_mutex). That would prevent freezing failures.
+
+V. Miscellaneous
+================
+
+/sys/power/pm_freeze_timeout controls how long it will cost at most to freeze
+all user space processes or all freezable kernel threads, in unit of millisecond.
+The default value is 20000, with range of unsigned integer.
diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
deleted file mode 100644
index cd283190855a..000000000000
--- a/Documentation/power/freezing-of-tasks.txt
+++ /dev/null
@@ -1,231 +0,0 @@
-Freezing of tasks
-	(C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
-
-I. What is the freezing of tasks?
-
-The freezing of tasks is a mechanism by which user space processes and some
-kernel threads are controlled during hibernation or system-wide suspend (on some
-architectures).
-
-II. How does it work?
-
-There are three per-task flags used for that, PF_NOFREEZE, PF_FROZEN
-and PF_FREEZER_SKIP (the last one is auxiliary).  The tasks that have
-PF_NOFREEZE unset (all user space processes and some kernel threads) are
-regarded as 'freezable' and treated in a special way before the system enters a
-suspend state as well as before a hibernation image is created (in what follows
-we only consider hibernation, but the description also applies to suspend).
-
-Namely, as the first step of the hibernation procedure the function
-freeze_processes() (defined in kernel/power/process.c) is called.  A system-wide
-variable system_freezing_cnt (as opposed to a per-task flag) is used to indicate
-whether the system is to undergo a freezing operation. And freeze_processes()
-sets this variable.  After this, it executes try_to_freeze_tasks() that sends a
-fake signal to all user space processes, and wakes up all the kernel threads.
-All freezable tasks must react to that by calling try_to_freeze(), which
-results in a call to __refrigerator() (defined in kernel/freezer.c), which sets
-the task's PF_FROZEN flag, changes its state to TASK_UNINTERRUPTIBLE and makes
-it loop until PF_FROZEN is cleared for it. Then, we say that the task is
-'frozen' and therefore the set of functions handling this mechanism is referred
-to as 'the freezer' (these functions are defined in kernel/power/process.c,
-kernel/freezer.c & include/linux/freezer.h). User space processes are generally
-frozen before kernel threads.
-
-__refrigerator() must not be called directly.  Instead, use the
-try_to_freeze() function (defined in include/linux/freezer.h), that checks
-if the task is to be frozen and makes the task enter __refrigerator().
-
-For user space processes try_to_freeze() is called automatically from the
-signal-handling code, but the freezable kernel threads need to call it
-explicitly in suitable places or use the wait_event_freezable() or
-wait_event_freezable_timeout() macros (defined in include/linux/freezer.h)
-that combine interruptible sleep with checking if the task is to be frozen and
-calling try_to_freeze().  The main loop of a freezable kernel thread may look
-like the following one:
-
-	set_freezable();
-	do {
-		hub_events();
-		wait_event_freezable(khubd_wait,
-				!list_empty(&hub_event_list) ||
-				kthread_should_stop());
-	} while (!kthread_should_stop() || !list_empty(&hub_event_list));
-
-(from drivers/usb/core/hub.c::hub_thread()).
-
-If a freezable kernel thread fails to call try_to_freeze() after the freezer has
-initiated a freezing operation, the freezing of tasks will fail and the entire
-hibernation operation will be cancelled.  For this reason, freezable kernel
-threads must call try_to_freeze() somewhere or use one of the
-wait_event_freezable() and wait_event_freezable_timeout() macros.
-
-After the system memory state has been restored from a hibernation image and
-devices have been reinitialized, the function thaw_processes() is called in
-order to clear the PF_FROZEN flag for each frozen task.  Then, the tasks that
-have been frozen leave __refrigerator() and continue running.
-
-
-Rationale behind the functions dealing with freezing and thawing of tasks:
--------------------------------------------------------------------------
-
-freeze_processes():
-  - freezes only userspace tasks
-
-freeze_kernel_threads():
-  - freezes all tasks (including kernel threads) because we can't freeze
-    kernel threads without freezing userspace tasks
-
-thaw_kernel_threads():
-  - thaws only kernel threads; this is particularly useful if we need to do
-    anything special in between thawing of kernel threads and thawing of
-    userspace tasks, or if we want to postpone the thawing of userspace tasks
-
-thaw_processes():
-  - thaws all tasks (including kernel threads) because we can't thaw userspace
-    tasks without thawing kernel threads
-
-
-III. Which kernel threads are freezable?
-
-Kernel threads are not freezable by default.  However, a kernel thread may clear
-PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE
-directly is not allowed).  From this point it is regarded as freezable
-and must call try_to_freeze() in a suitable place.
-
-IV. Why do we do that?
-
-Generally speaking, there is a couple of reasons to use the freezing of tasks:
-
-1. The principal reason is to prevent filesystems from being damaged after
-hibernation.  At the moment we have no simple means of checkpointing
-filesystems, so if there are any modifications made to filesystem data and/or
-metadata on disks, we cannot bring them back to the state from before the
-modifications.  At the same time each hibernation image contains some
-filesystem-related information that must be consistent with the state of the
-on-disk data and metadata after the system memory state has been restored from
-the image (otherwise the filesystems will be damaged in a nasty way, usually
-making them almost impossible to repair).  We therefore freeze tasks that might
-cause the on-disk filesystems' data and metadata to be modified after the
-hibernation image has been created and before the system is finally powered off.
-The majority of these are user space processes, but if any of the kernel threads
-may cause something like this to happen, they have to be freezable.
-
-2. Next, to create the hibernation image we need to free a sufficient amount of
-memory (approximately 50% of available RAM) and we need to do that before
-devices are deactivated, because we generally need them for swapping out.  Then,
-after the memory for the image has been freed, we don't want tasks to allocate
-additional memory and we prevent them from doing that by freezing them earlier.
-[Of course, this also means that device drivers should not allocate substantial
-amounts of memory from their .suspend() callbacks before hibernation, but this
-is a separate issue.]
-
-3. The third reason is to prevent user space processes and some kernel threads
-from interfering with the suspending and resuming of devices.  A user space
-process running on a second CPU while we are suspending devices may, for
-example, be troublesome and without the freezing of tasks we would need some
-safeguards against race conditions that might occur in such a case.
-
-Although Linus Torvalds doesn't like the freezing of tasks, he said this in one
-of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608):
-
-"RJW:> Why we freeze tasks at all or why we freeze kernel threads?
-
-Linus: In many ways, 'at all'.
-
-I _do_ realize the IO request queue issues, and that we cannot actually do
-s2ram with some devices in the middle of a DMA.  So we want to be able to
-avoid *that*, there's no question about that.  And I suspect that stopping
-user threads and then waiting for a sync is practically one of the easier
-ways to do so.
-
-So in practice, the 'at all' may become a 'why freeze kernel threads?' and
-freezing user threads I don't find really objectionable."
-
-Still, there are kernel threads that may want to be freezable.  For example, if
-a kernel thread that belongs to a device driver accesses the device directly, it
-in principle needs to know when the device is suspended, so that it doesn't try
-to access it at that time.  However, if the kernel thread is freezable, it will
-be frozen before the driver's .suspend() callback is executed and it will be
-thawed after the driver's .resume() callback has run, so it won't be accessing
-the device while it's suspended.
-
-4. Another reason for freezing tasks is to prevent user space processes from
-realizing that hibernation (or suspend) operation takes place.  Ideally, user
-space processes should not notice that such a system-wide operation has occurred
-and should continue running without any problems after the restore (or resume
-from suspend).  Unfortunately, in the most general case this is quite difficult
-to achieve without the freezing of tasks.  Consider, for example, a process
-that depends on all CPUs being online while it's running.  Since we need to
-disable nonboot CPUs during the hibernation, if this process is not frozen, it
-may notice that the number of CPUs has changed and may start to work incorrectly
-because of that.
-
-V. Are there any problems related to the freezing of tasks?
-
-Yes, there are.
-
-First of all, the freezing of kernel threads may be tricky if they depend one
-on another.  For example, if kernel thread A waits for a completion (in the
-TASK_UNINTERRUPTIBLE state) that needs to be done by freezable kernel thread B
-and B is frozen in the meantime, then A will be blocked until B is thawed, which
-may be undesirable.  That's why kernel threads are not freezable by default.
-
-Second, there are the following two problems related to the freezing of user
-space processes:
-1. Putting processes into an uninterruptible sleep distorts the load average.
-2. Now that we have FUSE, plus the framework for doing device drivers in
-userspace, it gets even more complicated because some userspace processes are
-now doing the sorts of things that kernel threads do
-(https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012309.html).
-
-The problem 1. seems to be fixable, although it hasn't been fixed so far.  The
-other one is more serious, but it seems that we can work around it by using
-hibernation (and suspend) notifiers (in that case, though, we won't be able to
-avoid the realization by the user space processes that the hibernation is taking
-place).
-
-There are also problems that the freezing of tasks tends to expose, although
-they are not directly related to it.  For example, if request_firmware() is
-called from a device driver's .resume() routine, it will timeout and eventually
-fail, because the user land process that should respond to the request is frozen
-at this point.  So, seemingly, the failure is due to the freezing of tasks.
-Suppose, however, that the firmware file is located on a filesystem accessible
-only through another device that hasn't been resumed yet.  In that case,
-request_firmware() will fail regardless of whether or not the freezing of tasks
-is used.  Consequently, the problem is not really related to the freezing of
-tasks, since it generally exists anyway.
-
-A driver must have all firmwares it may need in RAM before suspend() is called.
-If keeping them is not practical, for example due to their size, they must be
-requested early enough using the suspend notifier API described in
-Documentation/driver-api/pm/notifiers.rst.
-
-VI. Are there any precautions to be taken to prevent freezing failures?
-
-Yes, there are.
-
-First of all, grabbing the 'system_transition_mutex' lock to mutually exclude a piece of code
-from system-wide sleep such as suspend/hibernation is not encouraged.
-If possible, that piece of code must instead hook onto the suspend/hibernation
-notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code
-(kernel/cpu.c) for an example.
-
-However, if that is not feasible, and grabbing 'system_transition_mutex' is deemed necessary,
-it is strongly discouraged to directly call mutex_[un]lock(&system_transition_mutex) since
-that could lead to freezing failures, because if the suspend/hibernate code
-successfully acquired the 'system_transition_mutex' lock, and hence that other entity failed
-to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE
-state. As a consequence, the freezer would not be able to freeze that task,
-leading to freezing failure.
-
-However, the [un]lock_system_sleep() APIs are safe to use in this scenario,
-since they ask the freezer to skip freezing this task, since it is anyway
-"frozen enough" as it is blocked on 'system_transition_mutex', which will be released
-only after the entire suspend/hibernation sequence is complete.
-So, to summarize, use [un]lock_system_sleep() instead of directly using
-mutex_[un]lock(&system_transition_mutex). That would prevent freezing failures.
-
-V. Miscellaneous
-/sys/power/pm_freeze_timeout controls how long it will cost at most to freeze
-all user space processes or all freezable kernel threads, in unit of millisecond.
-The default value is 20000, with range of unsigned integer.
diff --git a/Documentation/power/index.rst b/Documentation/power/index.rst
new file mode 100644
index 000000000000..20415f21e48a
--- /dev/null
+++ b/Documentation/power/index.rst
@@ -0,0 +1,46 @@
+:orphan:
+
+================
+Power Management
+================
+
+.. toctree::
+    :maxdepth: 1
+
+    apm-acpi
+    basic-pm-debugging
+    charger-manager
+    drivers-testing
+    energy-model
+    freezing-of-tasks
+    interface
+    opp
+    pci
+    pm_qos_interface
+    power_supply_class
+    runtime_pm
+    s2ram
+    suspend-and-cpuhotplug
+    suspend-and-interrupts
+    swsusp-and-swap-files
+    swsusp-dmcrypt
+    swsusp
+    video
+    tricks
+
+    userland-swsusp
+
+    powercap/powercap
+
+    regulator/consumer
+    regulator/design
+    regulator/machine
+    regulator/overview
+    regulator/regulator
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/power/interface.rst b/Documentation/power/interface.rst
new file mode 100644
index 000000000000..8d270ed27228
--- /dev/null
+++ b/Documentation/power/interface.rst
@@ -0,0 +1,79 @@
+===========================================
+Power Management Interface for System Sleep
+===========================================
+
+Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+The power management subsystem provides userspace with a unified sysfs interface
+for system sleep regardless of the underlying system architecture or platform.
+The interface is located in the /sys/power/ directory (assuming that sysfs is
+mounted at /sys).
+
+/sys/power/state is the system sleep state control file.
+
+Reading from it returns a list of supported sleep states, encoded as:
+
+- 'freeze' (Suspend-to-Idle)
+- 'standby' (Power-On Suspend)
+- 'mem' (Suspend-to-RAM)
+- 'disk' (Suspend-to-Disk)
+
+Suspend-to-Idle is always supported.  Suspend-to-Disk is always supported
+too as long the kernel has been configured to support hibernation at all
+(ie. CONFIG_HIBERNATION is set in the kernel configuration file).  Support
+for Suspend-to-RAM and Power-On Suspend depends on the capabilities of the
+platform.
+
+If one of the strings listed in /sys/power/state is written to it, the system
+will attempt to transition into the corresponding sleep state.  Refer to
+Documentation/admin-guide/pm/sleep-states.rst for a description of each of
+those states.
+
+/sys/power/disk controls the operating mode of hibernation (Suspend-to-Disk).
+Specifically, it tells the kernel what to do after creating a hibernation image.
+
+Reading from it returns a list of supported options encoded as:
+
+- 'platform' (put the system into sleep using a platform-provided method)
+- 'shutdown' (shut the system down)
+- 'reboot' (reboot the system)
+- 'suspend' (trigger a Suspend-to-RAM transition)
+- 'test_resume' (resume-after-hibernation test mode)
+
+The currently selected option is printed in square brackets.
+
+The 'platform' option is only available if the platform provides a special
+mechanism to put the system to sleep after creating a hibernation image (ACPI
+does that, for example).  The 'suspend' option is available if Suspend-to-RAM
+is supported.  Refer to Documentation/power/basic-pm-debugging.rst for the
+description of the 'test_resume' option.
+
+To select an option, write the string representing it to /sys/power/disk.
+
+/sys/power/image_size controls the size of hibernation images.
+
+It can be written a string representing a non-negative integer that will be
+used as a best-effort upper limit of the image size, in bytes.  The hibernation
+core will do its best to ensure that the image size will not exceed that number.
+However, if that turns out to be impossible to achieve, a hibernation image will
+still be created and its size will be as small as possible.  In particular,
+writing '0' to this file will enforce hibernation images to be as small as
+possible.
+
+Reading from this file returns the current image size limit, which is set to
+around 2/5 of available RAM by default.
+
+/sys/power/pm_trace controls the PM trace mechanism saving the last suspend
+or resume event point in the RTC across reboots.
+
+It helps to debug hard lockups or reboots due to device driver failures that
+occur during system suspend or resume (which is more common) more effectively.
+
+If /sys/power/pm_trace contains '1', the fingerprint of each suspend/resume
+event point in turn will be stored in the RTC memory (overwriting the actual
+RTC information), so it will survive a system crash if one occurs right after
+storing it and it can be used later to identify the driver that caused the crash
+to happen (see Documentation/power/s2ram.rst for more information).
+
+Initially it contains '0' which may be changed to '1' by writing a string
+representing a nonzero integer into it.
diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt
deleted file mode 100644
index 27df7f98668a..000000000000
--- a/Documentation/power/interface.txt
+++ /dev/null
@@ -1,77 +0,0 @@
-Power Management Interface for System Sleep
-
-Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-
-The power management subsystem provides userspace with a unified sysfs interface
-for system sleep regardless of the underlying system architecture or platform.
-The interface is located in the /sys/power/ directory (assuming that sysfs is
-mounted at /sys).
-
-/sys/power/state is the system sleep state control file.
-
-Reading from it returns a list of supported sleep states, encoded as:
-
-'freeze' (Suspend-to-Idle)
-'standby' (Power-On Suspend)
-'mem' (Suspend-to-RAM)
-'disk' (Suspend-to-Disk)
-
-Suspend-to-Idle is always supported.  Suspend-to-Disk is always supported
-too as long the kernel has been configured to support hibernation at all
-(ie. CONFIG_HIBERNATION is set in the kernel configuration file).  Support
-for Suspend-to-RAM and Power-On Suspend depends on the capabilities of the
-platform.
-
-If one of the strings listed in /sys/power/state is written to it, the system
-will attempt to transition into the corresponding sleep state.  Refer to
-Documentation/admin-guide/pm/sleep-states.rst for a description of each of
-those states.
-
-/sys/power/disk controls the operating mode of hibernation (Suspend-to-Disk).
-Specifically, it tells the kernel what to do after creating a hibernation image.
-
-Reading from it returns a list of supported options encoded as:
-
-'platform' (put the system into sleep using a platform-provided method)
-'shutdown' (shut the system down)
-'reboot' (reboot the system)
-'suspend' (trigger a Suspend-to-RAM transition)
-'test_resume' (resume-after-hibernation test mode)
-
-The currently selected option is printed in square brackets.
-
-The 'platform' option is only available if the platform provides a special
-mechanism to put the system to sleep after creating a hibernation image (ACPI
-does that, for example).  The 'suspend' option is available if Suspend-to-RAM
-is supported.  Refer to Documentation/power/basic-pm-debugging.txt for the
-description of the 'test_resume' option.
-
-To select an option, write the string representing it to /sys/power/disk.
-
-/sys/power/image_size controls the size of hibernation images.
-
-It can be written a string representing a non-negative integer that will be
-used as a best-effort upper limit of the image size, in bytes.  The hibernation
-core will do its best to ensure that the image size will not exceed that number.
-However, if that turns out to be impossible to achieve, a hibernation image will
-still be created and its size will be as small as possible.  In particular,
-writing '0' to this file will enforce hibernation images to be as small as
-possible.
-
-Reading from this file returns the current image size limit, which is set to
-around 2/5 of available RAM by default.
-
-/sys/power/pm_trace controls the PM trace mechanism saving the last suspend
-or resume event point in the RTC across reboots.
-
-It helps to debug hard lockups or reboots due to device driver failures that
-occur during system suspend or resume (which is more common) more effectively.
-
-If /sys/power/pm_trace contains '1', the fingerprint of each suspend/resume
-event point in turn will be stored in the RTC memory (overwriting the actual
-RTC information), so it will survive a system crash if one occurs right after
-storing it and it can be used later to identify the driver that caused the crash
-to happen (see Documentation/power/s2ram.txt for more information).
-
-Initially it contains '0' which may be changed to '1' by writing a string
-representing a nonzero integer into it.
diff --git a/Documentation/power/opp.rst b/Documentation/power/opp.rst
new file mode 100644
index 000000000000..b3cf1def9dee
--- /dev/null
+++ b/Documentation/power/opp.rst
@@ -0,0 +1,379 @@
+==========================================
+Operating Performance Points (OPP) Library
+==========================================
+
+(C) 2009-2010 Nishanth Menon <nm@ti.com>, Texas Instruments Incorporated
+
+.. Contents
+
+  1. Introduction
+  2. Initial OPP List Registration
+  3. OPP Search Functions
+  4. OPP Availability Control Functions
+  5. OPP Data Retrieval Functions
+  6. Data Structures
+
+1. Introduction
+===============
+
+1.1 What is an Operating Performance Point (OPP)?
+-------------------------------------------------
+
+Complex SoCs of today consists of a multiple sub-modules working in conjunction.
+In an operational system executing varied use cases, not all modules in the SoC
+need to function at their highest performing frequency all the time. To
+facilitate this, sub-modules in a SoC are grouped into domains, allowing some
+domains to run at lower voltage and frequency while other domains run at
+voltage/frequency pairs that are higher.
+
+The set of discrete tuples consisting of frequency and voltage pairs that
+the device will support per domain are called Operating Performance Points or
+OPPs.
+
+As an example:
+
+Let us consider an MPU device which supports the following:
+{300MHz at minimum voltage of 1V}, {800MHz at minimum voltage of 1.2V},
+{1GHz at minimum voltage of 1.3V}
+
+We can represent these as three OPPs as the following {Hz, uV} tuples:
+
+- {300000000, 1000000}
+- {800000000, 1200000}
+- {1000000000, 1300000}
+
+1.2 Operating Performance Points Library
+----------------------------------------
+
+OPP library provides a set of helper functions to organize and query the OPP
+information. The library is located in drivers/base/power/opp.c and the header
+is located in include/linux/pm_opp.h. OPP library can be enabled by enabling
+CONFIG_PM_OPP from power management menuconfig menu. OPP library depends on
+CONFIG_PM as certain SoCs such as Texas Instrument's OMAP framework allows to
+optionally boot at a certain OPP without needing cpufreq.
+
+Typical usage of the OPP library is as follows::
+
+ (users)	-> registers a set of default OPPs		-> (library)
+ SoC framework	-> modifies on required cases certain OPPs	-> OPP layer
+		-> queries to search/retrieve information	->
+
+OPP layer expects each domain to be represented by a unique device pointer. SoC
+framework registers a set of initial OPPs per device with the OPP layer. This
+list is expected to be an optimally small number typically around 5 per device.
+This initial list contains a set of OPPs that the framework expects to be safely
+enabled by default in the system.
+
+Note on OPP Availability
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+As the system proceeds to operate, SoC framework may choose to make certain
+OPPs available or not available on each device based on various external
+factors. Example usage: Thermal management or other exceptional situations where
+SoC framework might choose to disable a higher frequency OPP to safely continue
+operations until that OPP could be re-enabled if possible.
+
+OPP library facilitates this concept in it's implementation. The following
+operational functions operate only on available opps:
+opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq, dev_pm_opp_get_opp_count
+
+dev_pm_opp_find_freq_exact is meant to be used to find the opp pointer which can then
+be used for dev_pm_opp_enable/disable functions to make an opp available as required.
+
+WARNING: Users of OPP library should refresh their availability count using
+get_opp_count if dev_pm_opp_enable/disable functions are invoked for a device, the
+exact mechanism to trigger these or the notification mechanism to other
+dependent subsystems such as cpufreq are left to the discretion of the SoC
+specific framework which uses the OPP library. Similar care needs to be taken
+care to refresh the cpufreq table in cases of these operations.
+
+2. Initial OPP List Registration
+================================
+The SoC implementation calls dev_pm_opp_add function iteratively to add OPPs per
+device. It is expected that the SoC framework will register the OPP entries
+optimally- typical numbers range to be less than 5. The list generated by
+registering the OPPs is maintained by OPP library throughout the device
+operation. The SoC framework can subsequently control the availability of the
+OPPs dynamically using the dev_pm_opp_enable / disable functions.
+
+dev_pm_opp_add
+	Add a new OPP for a specific domain represented by the device pointer.
+	The OPP is defined using the frequency and voltage. Once added, the OPP
+	is assumed to be available and control of it's availability can be done
+	with the dev_pm_opp_enable/disable functions. OPP library internally stores
+	and manages this information in the opp struct. This function may be
+	used by SoC framework to define a optimal list as per the demands of
+	SoC usage environment.
+
+	WARNING:
+		Do not use this function in interrupt context.
+
+	Example::
+
+	 soc_pm_init()
+	 {
+		/* Do things */
+		r = dev_pm_opp_add(mpu_dev, 1000000, 900000);
+		if (!r) {
+			pr_err("%s: unable to register mpu opp(%d)\n", r);
+			goto no_cpufreq;
+		}
+		/* Do cpufreq things */
+	 no_cpufreq:
+		/* Do remaining things */
+	 }
+
+3. OPP Search Functions
+=======================
+High level framework such as cpufreq operates on frequencies. To map the
+frequency back to the corresponding OPP, OPP library provides handy functions
+to search the OPP list that OPP library internally manages. These search
+functions return the matching pointer representing the opp if a match is
+found, else returns error. These errors are expected to be handled by standard
+error checks such as IS_ERR() and appropriate actions taken by the caller.
+
+Callers of these functions shall call dev_pm_opp_put() after they have used the
+OPP. Otherwise the memory for the OPP will never get freed and result in
+memleak.
+
+dev_pm_opp_find_freq_exact
+	Search for an OPP based on an *exact* frequency and
+	availability. This function is especially useful to enable an OPP which
+	is not available by default.
+	Example: In a case when SoC framework detects a situation where a
+	higher frequency could be made available, it can use this function to
+	find the OPP prior to call the dev_pm_opp_enable to actually make
+	it available::
+
+	 opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
+	 dev_pm_opp_put(opp);
+	 /* dont operate on the pointer.. just do a sanity check.. */
+	 if (IS_ERR(opp)) {
+		pr_err("frequency not disabled!\n");
+		/* trigger appropriate actions.. */
+	 } else {
+		dev_pm_opp_enable(dev,1000000000);
+	 }
+
+	NOTE:
+	  This is the only search function that operates on OPPs which are
+	  not available.
+
+dev_pm_opp_find_freq_floor
+	Search for an available OPP which is *at most* the
+	provided frequency. This function is useful while searching for a lesser
+	match OR operating on OPP information in the order of decreasing
+	frequency.
+	Example: To find the highest opp for a device::
+
+	 freq = ULONG_MAX;
+	 opp = dev_pm_opp_find_freq_floor(dev, &freq);
+	 dev_pm_opp_put(opp);
+
+dev_pm_opp_find_freq_ceil
+	Search for an available OPP which is *at least* the
+	provided frequency. This function is useful while searching for a
+	higher match OR operating on OPP information in the order of increasing
+	frequency.
+	Example 1: To find the lowest opp for a device::
+
+	 freq = 0;
+	 opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+	 dev_pm_opp_put(opp);
+
+	Example 2: A simplified implementation of a SoC cpufreq_driver->target::
+
+	 soc_cpufreq_target(..)
+	 {
+		/* Do stuff like policy checks etc. */
+		/* Find the best frequency match for the req */
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+		dev_pm_opp_put(opp);
+		if (!IS_ERR(opp))
+			soc_switch_to_freq_voltage(freq);
+		else
+			/* do something when we can't satisfy the req */
+		/* do other stuff */
+	 }
+
+4. OPP Availability Control Functions
+=====================================
+A default OPP list registered with the OPP library may not cater to all possible
+situation. The OPP library provides a set of functions to modify the
+availability of a OPP within the OPP list. This allows SoC frameworks to have
+fine grained dynamic control of which sets of OPPs are operationally available.
+These functions are intended to *temporarily* remove an OPP in conditions such
+as thermal considerations (e.g. don't use OPPx until the temperature drops).
+
+WARNING:
+	Do not use these functions in interrupt context.
+
+dev_pm_opp_enable
+	Make a OPP available for operation.
+	Example: Lets say that 1GHz OPP is to be made available only if the
+	SoC temperature is lower than a certain threshold. The SoC framework
+	implementation might choose to do something as follows::
+
+	 if (cur_temp < temp_low_thresh) {
+		/* Enable 1GHz if it was disabled */
+		opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
+		dev_pm_opp_put(opp);
+		/* just error check */
+		if (!IS_ERR(opp))
+			ret = dev_pm_opp_enable(dev, 1000000000);
+		else
+			goto try_something_else;
+	 }
+
+dev_pm_opp_disable
+	Make an OPP to be not available for operation
+	Example: Lets say that 1GHz OPP is to be disabled if the temperature
+	exceeds a threshold value. The SoC framework implementation might
+	choose to do something as follows::
+
+	 if (cur_temp > temp_high_thresh) {
+		/* Disable 1GHz if it was enabled */
+		opp = dev_pm_opp_find_freq_exact(dev, 1000000000, true);
+		dev_pm_opp_put(opp);
+		/* just error check */
+		if (!IS_ERR(opp))
+			ret = dev_pm_opp_disable(dev, 1000000000);
+		else
+			goto try_something_else;
+	 }
+
+5. OPP Data Retrieval Functions
+===============================
+Since OPP library abstracts away the OPP information, a set of functions to pull
+information from the OPP structure is necessary. Once an OPP pointer is
+retrieved using the search functions, the following functions can be used by SoC
+framework to retrieve the information represented inside the OPP layer.
+
+dev_pm_opp_get_voltage
+	Retrieve the voltage represented by the opp pointer.
+	Example: At a cpufreq transition to a different frequency, SoC
+	framework requires to set the voltage represented by the OPP using
+	the regulator framework to the Power Management chip providing the
+	voltage::
+
+	 soc_switch_to_freq_voltage(freq)
+	 {
+		/* do things */
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+		v = dev_pm_opp_get_voltage(opp);
+		dev_pm_opp_put(opp);
+		if (v)
+			regulator_set_voltage(.., v);
+		/* do other things */
+	 }
+
+dev_pm_opp_get_freq
+	Retrieve the freq represented by the opp pointer.
+	Example: Lets say the SoC framework uses a couple of helper functions
+	we could pass opp pointers instead of doing additional parameters to
+	handle quiet a bit of data parameters::
+
+	 soc_cpufreq_target(..)
+	 {
+		/* do things.. */
+		 max_freq = ULONG_MAX;
+		 max_opp = dev_pm_opp_find_freq_floor(dev,&max_freq);
+		 requested_opp = dev_pm_opp_find_freq_ceil(dev,&freq);
+		 if (!IS_ERR(max_opp) && !IS_ERR(requested_opp))
+			r = soc_test_validity(max_opp, requested_opp);
+		 dev_pm_opp_put(max_opp);
+		 dev_pm_opp_put(requested_opp);
+		/* do other things */
+	 }
+	 soc_test_validity(..)
+	 {
+		 if(dev_pm_opp_get_voltage(max_opp) < dev_pm_opp_get_voltage(requested_opp))
+			 return -EINVAL;
+		 if(dev_pm_opp_get_freq(max_opp) < dev_pm_opp_get_freq(requested_opp))
+			 return -EINVAL;
+		/* do things.. */
+	 }
+
+dev_pm_opp_get_opp_count
+	Retrieve the number of available opps for a device
+	Example: Lets say a co-processor in the SoC needs to know the available
+	frequencies in a table, the main processor can notify as following::
+
+	 soc_notify_coproc_available_frequencies()
+	 {
+		/* Do things */
+		num_available = dev_pm_opp_get_opp_count(dev);
+		speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL);
+		/* populate the table in increasing order */
+		freq = 0;
+		while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) {
+			speeds[i] = freq;
+			freq++;
+			i++;
+			dev_pm_opp_put(opp);
+		}
+
+		soc_notify_coproc(AVAILABLE_FREQs, speeds, num_available);
+		/* Do other things */
+	 }
+
+6. Data Structures
+==================
+Typically an SoC contains multiple voltage domains which are variable. Each
+domain is represented by a device pointer. The relationship to OPP can be
+represented as follows::
+
+  SoC
+   |- device 1
+   |	|- opp 1 (availability, freq, voltage)
+   |	|- opp 2 ..
+   ...	...
+   |	`- opp n ..
+   |- device 2
+   ...
+   `- device m
+
+OPP library maintains a internal list that the SoC framework populates and
+accessed by various functions as described above. However, the structures
+representing the actual OPPs and domains are internal to the OPP library itself
+to allow for suitable abstraction reusable across systems.
+
+struct dev_pm_opp
+	The internal data structure of OPP library which is used to
+	represent an OPP. In addition to the freq, voltage, availability
+	information, it also contains internal book keeping information required
+	for the OPP library to operate on.  Pointer to this structure is
+	provided back to the users such as SoC framework to be used as a
+	identifier for OPP in the interactions with OPP layer.
+
+	WARNING:
+	  The struct dev_pm_opp pointer should not be parsed or modified by the
+	  users. The defaults of for an instance is populated by
+	  dev_pm_opp_add, but the availability of the OPP can be modified
+	  by dev_pm_opp_enable/disable functions.
+
+struct device
+	This is used to identify a domain to the OPP layer. The
+	nature of the device and it's implementation is left to the user of
+	OPP library such as the SoC framework.
+
+Overall, in a simplistic view, the data structure operations is represented as
+following::
+
+  Initialization / modification:
+              +-----+        /- dev_pm_opp_enable
+  dev_pm_opp_add --> | opp | <-------
+    |         +-----+        \- dev_pm_opp_disable
+    \-------> domain_info(device)
+
+  Search functions:
+               /-- dev_pm_opp_find_freq_ceil  ---\   +-----+
+  domain_info<---- dev_pm_opp_find_freq_exact -----> | opp |
+               \-- dev_pm_opp_find_freq_floor ---/   +-----+
+
+  Retrieval functions:
+  +-----+     /- dev_pm_opp_get_voltage
+  | opp | <---
+  +-----+     \- dev_pm_opp_get_freq
+
+  domain_info <- dev_pm_opp_get_opp_count
diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt
deleted file mode 100644
index 0c007e250cd1..000000000000
--- a/Documentation/power/opp.txt
+++ /dev/null
@@ -1,342 +0,0 @@
-Operating Performance Points (OPP) Library
-==========================================
-
-(C) 2009-2010 Nishanth Menon <nm@ti.com>, Texas Instruments Incorporated
-
-Contents
---------
-1. Introduction
-2. Initial OPP List Registration
-3. OPP Search Functions
-4. OPP Availability Control Functions
-5. OPP Data Retrieval Functions
-6. Data Structures
-
-1. Introduction
-===============
-1.1 What is an Operating Performance Point (OPP)?
-
-Complex SoCs of today consists of a multiple sub-modules working in conjunction.
-In an operational system executing varied use cases, not all modules in the SoC
-need to function at their highest performing frequency all the time. To
-facilitate this, sub-modules in a SoC are grouped into domains, allowing some
-domains to run at lower voltage and frequency while other domains run at
-voltage/frequency pairs that are higher.
-
-The set of discrete tuples consisting of frequency and voltage pairs that
-the device will support per domain are called Operating Performance Points or
-OPPs.
-
-As an example:
-Let us consider an MPU device which supports the following:
-{300MHz at minimum voltage of 1V}, {800MHz at minimum voltage of 1.2V},
-{1GHz at minimum voltage of 1.3V}
-
-We can represent these as three OPPs as the following {Hz, uV} tuples:
-{300000000, 1000000}
-{800000000, 1200000}
-{1000000000, 1300000}
-
-1.2 Operating Performance Points Library
-
-OPP library provides a set of helper functions to organize and query the OPP
-information. The library is located in drivers/base/power/opp.c and the header
-is located in include/linux/pm_opp.h. OPP library can be enabled by enabling
-CONFIG_PM_OPP from power management menuconfig menu. OPP library depends on
-CONFIG_PM as certain SoCs such as Texas Instrument's OMAP framework allows to
-optionally boot at a certain OPP without needing cpufreq.
-
-Typical usage of the OPP library is as follows:
-(users)		-> registers a set of default OPPs		-> (library)
-SoC framework	-> modifies on required cases certain OPPs	-> OPP layer
-		-> queries to search/retrieve information	->
-
-OPP layer expects each domain to be represented by a unique device pointer. SoC
-framework registers a set of initial OPPs per device with the OPP layer. This
-list is expected to be an optimally small number typically around 5 per device.
-This initial list contains a set of OPPs that the framework expects to be safely
-enabled by default in the system.
-
-Note on OPP Availability:
-------------------------
-As the system proceeds to operate, SoC framework may choose to make certain
-OPPs available or not available on each device based on various external
-factors. Example usage: Thermal management or other exceptional situations where
-SoC framework might choose to disable a higher frequency OPP to safely continue
-operations until that OPP could be re-enabled if possible.
-
-OPP library facilitates this concept in it's implementation. The following
-operational functions operate only on available opps:
-opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq, dev_pm_opp_get_opp_count
-
-dev_pm_opp_find_freq_exact is meant to be used to find the opp pointer which can then
-be used for dev_pm_opp_enable/disable functions to make an opp available as required.
-
-WARNING: Users of OPP library should refresh their availability count using
-get_opp_count if dev_pm_opp_enable/disable functions are invoked for a device, the
-exact mechanism to trigger these or the notification mechanism to other
-dependent subsystems such as cpufreq are left to the discretion of the SoC
-specific framework which uses the OPP library. Similar care needs to be taken
-care to refresh the cpufreq table in cases of these operations.
-
-2. Initial OPP List Registration
-================================
-The SoC implementation calls dev_pm_opp_add function iteratively to add OPPs per
-device. It is expected that the SoC framework will register the OPP entries
-optimally- typical numbers range to be less than 5. The list generated by
-registering the OPPs is maintained by OPP library throughout the device
-operation. The SoC framework can subsequently control the availability of the
-OPPs dynamically using the dev_pm_opp_enable / disable functions.
-
-dev_pm_opp_add - Add a new OPP for a specific domain represented by the device pointer.
-	The OPP is defined using the frequency and voltage. Once added, the OPP
-	is assumed to be available and control of it's availability can be done
-	with the dev_pm_opp_enable/disable functions. OPP library internally stores
-	and manages this information in the opp struct. This function may be
-	used by SoC framework to define a optimal list as per the demands of
-	SoC usage environment.
-
-	WARNING: Do not use this function in interrupt context.
-
-	Example:
-	 soc_pm_init()
-	 {
-		/* Do things */
-		r = dev_pm_opp_add(mpu_dev, 1000000, 900000);
-		if (!r) {
-			pr_err("%s: unable to register mpu opp(%d)\n", r);
-			goto no_cpufreq;
-		}
-		/* Do cpufreq things */
-	 no_cpufreq:
-		/* Do remaining things */
-	 }
-
-3. OPP Search Functions
-=======================
-High level framework such as cpufreq operates on frequencies. To map the
-frequency back to the corresponding OPP, OPP library provides handy functions
-to search the OPP list that OPP library internally manages. These search
-functions return the matching pointer representing the opp if a match is
-found, else returns error. These errors are expected to be handled by standard
-error checks such as IS_ERR() and appropriate actions taken by the caller.
-
-Callers of these functions shall call dev_pm_opp_put() after they have used the
-OPP. Otherwise the memory for the OPP will never get freed and result in
-memleak.
-
-dev_pm_opp_find_freq_exact - Search for an OPP based on an *exact* frequency and
-	availability. This function is especially useful to enable an OPP which
-	is not available by default.
-	Example: In a case when SoC framework detects a situation where a
-	higher frequency could be made available, it can use this function to
-	find the OPP prior to call the dev_pm_opp_enable to actually make it available.
-	 opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
-	 dev_pm_opp_put(opp);
-	 /* dont operate on the pointer.. just do a sanity check.. */
-	 if (IS_ERR(opp)) {
-		pr_err("frequency not disabled!\n");
-		/* trigger appropriate actions.. */
-	 } else {
-		dev_pm_opp_enable(dev,1000000000);
-	 }
-
-	NOTE: This is the only search function that operates on OPPs which are
-	not available.
-
-dev_pm_opp_find_freq_floor - Search for an available OPP which is *at most* the
-	provided frequency. This function is useful while searching for a lesser
-	match OR operating on OPP information in the order of decreasing
-	frequency.
-	Example: To find the highest opp for a device:
-	 freq = ULONG_MAX;
-	 opp = dev_pm_opp_find_freq_floor(dev, &freq);
-	 dev_pm_opp_put(opp);
-
-dev_pm_opp_find_freq_ceil - Search for an available OPP which is *at least* the
-	provided frequency. This function is useful while searching for a
-	higher match OR operating on OPP information in the order of increasing
-	frequency.
-	Example 1: To find the lowest opp for a device:
-	 freq = 0;
-	 opp = dev_pm_opp_find_freq_ceil(dev, &freq);
-	 dev_pm_opp_put(opp);
-	Example 2: A simplified implementation of a SoC cpufreq_driver->target:
-	 soc_cpufreq_target(..)
-	 {
-		/* Do stuff like policy checks etc. */
-		/* Find the best frequency match for the req */
-		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
-		dev_pm_opp_put(opp);
-		if (!IS_ERR(opp))
-			soc_switch_to_freq_voltage(freq);
-		else
-			/* do something when we can't satisfy the req */
-		/* do other stuff */
-	 }
-
-4. OPP Availability Control Functions
-=====================================
-A default OPP list registered with the OPP library may not cater to all possible
-situation. The OPP library provides a set of functions to modify the
-availability of a OPP within the OPP list. This allows SoC frameworks to have
-fine grained dynamic control of which sets of OPPs are operationally available.
-These functions are intended to *temporarily* remove an OPP in conditions such
-as thermal considerations (e.g. don't use OPPx until the temperature drops).
-
-WARNING: Do not use these functions in interrupt context.
-
-dev_pm_opp_enable - Make a OPP available for operation.
-	Example: Lets say that 1GHz OPP is to be made available only if the
-	SoC temperature is lower than a certain threshold. The SoC framework
-	implementation might choose to do something as follows:
-	 if (cur_temp < temp_low_thresh) {
-		/* Enable 1GHz if it was disabled */
-		opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
-		dev_pm_opp_put(opp);
-		/* just error check */
-		if (!IS_ERR(opp))
-			ret = dev_pm_opp_enable(dev, 1000000000);
-		else
-			goto try_something_else;
-	 }
-
-dev_pm_opp_disable - Make an OPP to be not available for operation
-	Example: Lets say that 1GHz OPP is to be disabled if the temperature
-	exceeds a threshold value. The SoC framework implementation might
-	choose to do something as follows:
-	 if (cur_temp > temp_high_thresh) {
-		/* Disable 1GHz if it was enabled */
-		opp = dev_pm_opp_find_freq_exact(dev, 1000000000, true);
-		dev_pm_opp_put(opp);
-		/* just error check */
-		if (!IS_ERR(opp))
-			ret = dev_pm_opp_disable(dev, 1000000000);
-		else
-			goto try_something_else;
-	 }
-
-5. OPP Data Retrieval Functions
-===============================
-Since OPP library abstracts away the OPP information, a set of functions to pull
-information from the OPP structure is necessary. Once an OPP pointer is
-retrieved using the search functions, the following functions can be used by SoC
-framework to retrieve the information represented inside the OPP layer.
-
-dev_pm_opp_get_voltage - Retrieve the voltage represented by the opp pointer.
-	Example: At a cpufreq transition to a different frequency, SoC
-	framework requires to set the voltage represented by the OPP using
-	the regulator framework to the Power Management chip providing the
-	voltage.
-	 soc_switch_to_freq_voltage(freq)
-	 {
-		/* do things */
-		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
-		v = dev_pm_opp_get_voltage(opp);
-		dev_pm_opp_put(opp);
-		if (v)
-			regulator_set_voltage(.., v);
-		/* do other things */
-	 }
-
-dev_pm_opp_get_freq - Retrieve the freq represented by the opp pointer.
-	Example: Lets say the SoC framework uses a couple of helper functions
-	we could pass opp pointers instead of doing additional parameters to
-	handle quiet a bit of data parameters.
-	 soc_cpufreq_target(..)
-	 {
-		/* do things.. */
-		 max_freq = ULONG_MAX;
-		 max_opp = dev_pm_opp_find_freq_floor(dev,&max_freq);
-		 requested_opp = dev_pm_opp_find_freq_ceil(dev,&freq);
-		 if (!IS_ERR(max_opp) && !IS_ERR(requested_opp))
-			r = soc_test_validity(max_opp, requested_opp);
-		 dev_pm_opp_put(max_opp);
-		 dev_pm_opp_put(requested_opp);
-		/* do other things */
-	 }
-	 soc_test_validity(..)
-	 {
-		 if(dev_pm_opp_get_voltage(max_opp) < dev_pm_opp_get_voltage(requested_opp))
-			 return -EINVAL;
-		 if(dev_pm_opp_get_freq(max_opp) < dev_pm_opp_get_freq(requested_opp))
-			 return -EINVAL;
-		/* do things.. */
-	 }
-
-dev_pm_opp_get_opp_count - Retrieve the number of available opps for a device
-	Example: Lets say a co-processor in the SoC needs to know the available
-	frequencies in a table, the main processor can notify as following:
-	 soc_notify_coproc_available_frequencies()
-	 {
-		/* Do things */
-		num_available = dev_pm_opp_get_opp_count(dev);
-		speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL);
-		/* populate the table in increasing order */
-		freq = 0;
-		while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) {
-			speeds[i] = freq;
-			freq++;
-			i++;
-			dev_pm_opp_put(opp);
-		}
-
-		soc_notify_coproc(AVAILABLE_FREQs, speeds, num_available);
-		/* Do other things */
-	 }
-
-6. Data Structures
-==================
-Typically an SoC contains multiple voltage domains which are variable. Each
-domain is represented by a device pointer. The relationship to OPP can be
-represented as follows:
-SoC
- |- device 1
- |	|- opp 1 (availability, freq, voltage)
- |	|- opp 2 ..
- ...	...
- |	`- opp n ..
- |- device 2
- ...
- `- device m
-
-OPP library maintains a internal list that the SoC framework populates and
-accessed by various functions as described above. However, the structures
-representing the actual OPPs and domains are internal to the OPP library itself
-to allow for suitable abstraction reusable across systems.
-
-struct dev_pm_opp - The internal data structure of OPP library which is used to
-	represent an OPP. In addition to the freq, voltage, availability
-	information, it also contains internal book keeping information required
-	for the OPP library to operate on.  Pointer to this structure is
-	provided back to the users such as SoC framework to be used as a
-	identifier for OPP in the interactions with OPP layer.
-
-	WARNING: The struct dev_pm_opp pointer should not be parsed or modified by the
-	users. The defaults of for an instance is populated by dev_pm_opp_add, but the
-	availability of the OPP can be modified by dev_pm_opp_enable/disable functions.
-
-struct device - This is used to identify a domain to the OPP layer. The
-	nature of the device and it's implementation is left to the user of
-	OPP library such as the SoC framework.
-
-Overall, in a simplistic view, the data structure operations is represented as
-following:
-
-Initialization / modification:
-            +-----+        /- dev_pm_opp_enable
-dev_pm_opp_add --> | opp | <-------
-  |         +-----+        \- dev_pm_opp_disable
-  \-------> domain_info(device)
-
-Search functions:
-             /-- dev_pm_opp_find_freq_ceil  ---\   +-----+
-domain_info<---- dev_pm_opp_find_freq_exact -----> | opp |
-             \-- dev_pm_opp_find_freq_floor ---/   +-----+
-
-Retrieval functions:
-+-----+     /- dev_pm_opp_get_voltage
-| opp | <---
-+-----+     \- dev_pm_opp_get_freq
-
-domain_info <- dev_pm_opp_get_opp_count
diff --git a/Documentation/power/pci.rst b/Documentation/power/pci.rst
new file mode 100644
index 000000000000..0e2ef7429304
--- /dev/null
+++ b/Documentation/power/pci.rst
@@ -0,0 +1,1135 @@
+====================
+PCI Power Management
+====================
+
+Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+
+An overview of concepts and the Linux kernel's interfaces related to PCI power
+management.  Based on previous work by Patrick Mochel <mochel@transmeta.com>
+(and others).
+
+This document only covers the aspects of power management specific to PCI
+devices.  For general description of the kernel's interfaces related to device
+power management refer to Documentation/driver-api/pm/devices.rst and
+Documentation/power/runtime_pm.rst.
+
+.. contents:
+
+   1. Hardware and Platform Support for PCI Power Management
+   2. PCI Subsystem and Device Power Management
+   3. PCI Device Drivers and Power Management
+   4. Resources
+
+
+1. Hardware and Platform Support for PCI Power Management
+=========================================================
+
+1.1. Native and Platform-Based Power Management
+-----------------------------------------------
+
+In general, power management is a feature allowing one to save energy by putting
+devices into states in which they draw less power (low-power states) at the
+price of reduced functionality or performance.
+
+Usually, a device is put into a low-power state when it is underutilized or
+completely inactive.  However, when it is necessary to use the device once
+again, it has to be put back into the "fully functional" state (full-power
+state).  This may happen when there are some data for the device to handle or
+as a result of an external event requiring the device to be active, which may
+be signaled by the device itself.
+
+PCI devices may be put into low-power states in two ways, by using the device
+capabilities introduced by the PCI Bus Power Management Interface Specification,
+or with the help of platform firmware, such as an ACPI BIOS.  In the first
+approach, that is referred to as the native PCI power management (native PCI PM)
+in what follows, the device power state is changed as a result of writing a
+specific value into one of its standard configuration registers.  The second
+approach requires the platform firmware to provide special methods that may be
+used by the kernel to change the device's power state.
+
+Devices supporting the native PCI PM usually can generate wakeup signals called
+Power Management Events (PMEs) to let the kernel know about external events
+requiring the device to be active.  After receiving a PME the kernel is supposed
+to put the device that sent it into the full-power state.  However, the PCI Bus
+Power Management Interface Specification doesn't define any standard method of
+delivering the PME from the device to the CPU and the operating system kernel.
+It is assumed that the platform firmware will perform this task and therefore,
+even though a PCI device is set up to generate PMEs, it also may be necessary to
+prepare the platform firmware for notifying the CPU of the PMEs coming from the
+device (e.g. by generating interrupts).
+
+In turn, if the methods provided by the platform firmware are used for changing
+the power state of a device, usually the platform also provides a method for
+preparing the device to generate wakeup signals.  In that case, however, it
+often also is necessary to prepare the device for generating PMEs using the
+native PCI PM mechanism, because the method provided by the platform depends on
+that.
+
+Thus in many situations both the native and the platform-based power management
+mechanisms have to be used simultaneously to obtain the desired result.
+
+1.2. Native PCI Power Management
+--------------------------------
+
+The PCI Bus Power Management Interface Specification (PCI PM Spec) was
+introduced between the PCI 2.1 and PCI 2.2 Specifications.  It defined a
+standard interface for performing various operations related to power
+management.
+
+The implementation of the PCI PM Spec is optional for conventional PCI devices,
+but it is mandatory for PCI Express devices.  If a device supports the PCI PM
+Spec, it has an 8 byte power management capability field in its PCI
+configuration space.  This field is used to describe and control the standard
+features related to the native PCI power management.
+
+The PCI PM Spec defines 4 operating states for devices (D0-D3) and for buses
+(B0-B3).  The higher the number, the less power is drawn by the device or bus
+in that state.  However, the higher the number, the longer the latency for
+the device or bus to return to the full-power state (D0 or B0, respectively).
+
+There are two variants of the D3 state defined by the specification.  The first
+one is D3hot, referred to as the software accessible D3, because devices can be
+programmed to go into it.  The second one, D3cold, is the state that PCI devices
+are in when the supply voltage (Vcc) is removed from them.  It is not possible
+to program a PCI device to go into D3cold, although there may be a programmable
+interface for putting the bus the device is on into a state in which Vcc is
+removed from all devices on the bus.
+
+PCI bus power management, however, is not supported by the Linux kernel at the
+time of this writing and therefore it is not covered by this document.
+
+Note that every PCI device can be in the full-power state (D0) or in D3cold,
+regardless of whether or not it implements the PCI PM Spec.  In addition to
+that, if the PCI PM Spec is implemented by the device, it must support D3hot
+as well as D0.  The support for the D1 and D2 power states is optional.
+
+PCI devices supporting the PCI PM Spec can be programmed to go to any of the
+supported low-power states (except for D3cold).  While in D1-D3hot the
+standard configuration registers of the device must be accessible to software
+(i.e. the device is required to respond to PCI configuration accesses), although
+its I/O and memory spaces are then disabled.  This allows the device to be
+programmatically put into D0.  Thus the kernel can switch the device back and
+forth between D0 and the supported low-power states (except for D3cold) and the
+possible power state transitions the device can undergo are the following:
+
++----------------------------+
+| Current State | New State  |
++----------------------------+
+| D0            | D1, D2, D3 |
++----------------------------+
+| D1            | D2, D3     |
++----------------------------+
+| D2            | D3         |
++----------------------------+
+| D1, D2, D3    | D0         |
++----------------------------+
+
+The transition from D3cold to D0 occurs when the supply voltage is provided to
+the device (i.e. power is restored).  In that case the device returns to D0 with
+a full power-on reset sequence and the power-on defaults are restored to the
+device by hardware just as at initial power up.
+
+PCI devices supporting the PCI PM Spec can be programmed to generate PMEs
+while in a low-power state (D1-D3), but they are not required to be capable
+of generating PMEs from all supported low-power states.  In particular, the
+capability of generating PMEs from D3cold is optional and depends on the
+presence of additional voltage (3.3Vaux) allowing the device to remain
+sufficiently active to generate a wakeup signal.
+
+1.3. ACPI Device Power Management
+---------------------------------
+
+The platform firmware support for the power management of PCI devices is
+system-specific.  However, if the system in question is compliant with the
+Advanced Configuration and Power Interface (ACPI) Specification, like the
+majority of x86-based systems, it is supposed to implement device power
+management interfaces defined by the ACPI standard.
+
+For this purpose the ACPI BIOS provides special functions called "control
+methods" that may be executed by the kernel to perform specific tasks, such as
+putting a device into a low-power state.  These control methods are encoded
+using special byte-code language called the ACPI Machine Language (AML) and
+stored in the machine's BIOS.  The kernel loads them from the BIOS and executes
+them as needed using an AML interpreter that translates the AML byte code into
+computations and memory or I/O space accesses.  This way, in theory, a BIOS
+writer can provide the kernel with a means to perform actions depending
+on the system design in a system-specific fashion.
+
+ACPI control methods may be divided into global control methods, that are not
+associated with any particular devices, and device control methods, that have
+to be defined separately for each device supposed to be handled with the help of
+the platform.  This means, in particular, that ACPI device control methods can
+only be used to handle devices that the BIOS writer knew about in advance.  The
+ACPI methods used for device power management fall into that category.
+
+The ACPI specification assumes that devices can be in one of four power states
+labeled as D0, D1, D2, and D3 that roughly correspond to the native PCI PM
+D0-D3 states (although the difference between D3hot and D3cold is not taken
+into account by ACPI).  Moreover, for each power state of a device there is a
+set of power resources that have to be enabled for the device to be put into
+that state.  These power resources are controlled (i.e. enabled or disabled)
+with the help of their own control methods, _ON and _OFF, that have to be
+defined individually for each of them.
+
+To put a device into the ACPI power state Dx (where x is a number between 0 and
+3 inclusive) the kernel is supposed to (1) enable the power resources required
+by the device in this state using their _ON control methods and (2) execute the
+_PSx control method defined for the device.  In addition to that, if the device
+is going to be put into a low-power state (D1-D3) and is supposed to generate
+wakeup signals from that state, the _DSW (or _PSW, replaced with _DSW by ACPI
+3.0) control method defined for it has to be executed before _PSx.  Power
+resources that are not required by the device in the target power state and are
+not required any more by any other device should be disabled (by executing their
+_OFF control methods).  If the current power state of the device is D3, it can
+only be put into D0 this way.
+
+However, quite often the power states of devices are changed during a
+system-wide transition into a sleep state or back into the working state.  ACPI
+defines four system sleep states, S1, S2, S3, and S4, and denotes the system
+working state as S0.  In general, the target system sleep (or working) state
+determines the highest power (lowest number) state the device can be put
+into and the kernel is supposed to obtain this information by executing the
+device's _SxD control method (where x is a number between 0 and 4 inclusive).
+If the device is required to wake up the system from the target sleep state, the
+lowest power (highest number) state it can be put into is also determined by the
+target state of the system.  The kernel is then supposed to use the device's
+_SxW control method to obtain the number of that state.  It also is supposed to
+use the device's _PRW control method to learn which power resources need to be
+enabled for the device to be able to generate wakeup signals.
+
+1.4. Wakeup Signaling
+---------------------
+
+Wakeup signals generated by PCI devices, either as native PCI PMEs, or as
+a result of the execution of the _DSW (or _PSW) ACPI control method before
+putting the device into a low-power state, have to be caught and handled as
+appropriate.  If they are sent while the system is in the working state
+(ACPI S0), they should be translated into interrupts so that the kernel can
+put the devices generating them into the full-power state and take care of the
+events that triggered them.  In turn, if they are sent while the system is
+sleeping, they should cause the system's core logic to trigger wakeup.
+
+On ACPI-based systems wakeup signals sent by conventional PCI devices are
+converted into ACPI General-Purpose Events (GPEs) which are hardware signals
+from the system core logic generated in response to various events that need to
+be acted upon.  Every GPE is associated with one or more sources of potentially
+interesting events.  In particular, a GPE may be associated with a PCI device
+capable of signaling wakeup.  The information on the connections between GPEs
+and event sources is recorded in the system's ACPI BIOS from where it can be
+read by the kernel.
+
+If a PCI device known to the system's ACPI BIOS signals wakeup, the GPE
+associated with it (if there is one) is triggered.  The GPEs associated with PCI
+bridges may also be triggered in response to a wakeup signal from one of the
+devices below the bridge (this also is the case for root bridges) and, for
+example, native PCI PMEs from devices unknown to the system's ACPI BIOS may be
+handled this way.
+
+A GPE may be triggered when the system is sleeping (i.e. when it is in one of
+the ACPI S1-S4 states), in which case system wakeup is started by its core logic
+(the device that was the source of the signal causing the system wakeup to occur
+may be identified later).  The GPEs used in such situations are referred to as
+wakeup GPEs.
+
+Usually, however, GPEs are also triggered when the system is in the working
+state (ACPI S0) and in that case the system's core logic generates a System
+Control Interrupt (SCI) to notify the kernel of the event.  Then, the SCI
+handler identifies the GPE that caused the interrupt to be generated which,
+in turn, allows the kernel to identify the source of the event (that may be
+a PCI device signaling wakeup).  The GPEs used for notifying the kernel of
+events occurring while the system is in the working state are referred to as
+runtime GPEs.
+
+Unfortunately, there is no standard way of handling wakeup signals sent by
+conventional PCI devices on systems that are not ACPI-based, but there is one
+for PCI Express devices.  Namely, the PCI Express Base Specification introduced
+a native mechanism for converting native PCI PMEs into interrupts generated by
+root ports.  For conventional PCI devices native PMEs are out-of-band, so they
+are routed separately and they need not pass through bridges (in principle they
+may be routed directly to the system's core logic), but for PCI Express devices
+they are in-band messages that have to pass through the PCI Express hierarchy,
+including the root port on the path from the device to the Root Complex.  Thus
+it was possible to introduce a mechanism by which a root port generates an
+interrupt whenever it receives a PME message from one of the devices below it.
+The PCI Express Requester ID of the device that sent the PME message is then
+recorded in one of the root port's configuration registers from where it may be
+read by the interrupt handler allowing the device to be identified.  [PME
+messages sent by PCI Express endpoints integrated with the Root Complex don't
+pass through root ports, but instead they cause a Root Complex Event Collector
+(if there is one) to generate interrupts.]
+
+In principle the native PCI Express PME signaling may also be used on ACPI-based
+systems along with the GPEs, but to use it the kernel has to ask the system's
+ACPI BIOS to release control of root port configuration registers.  The ACPI
+BIOS, however, is not required to allow the kernel to control these registers
+and if it doesn't do that, the kernel must not modify their contents.  Of course
+the native PCI Express PME signaling cannot be used by the kernel in that case.
+
+
+2. PCI Subsystem and Device Power Management
+============================================
+
+2.1. Device Power Management Callbacks
+--------------------------------------
+
+The PCI Subsystem participates in the power management of PCI devices in a
+number of ways.  First of all, it provides an intermediate code layer between
+the device power management core (PM core) and PCI device drivers.
+Specifically, the pm field of the PCI subsystem's struct bus_type object,
+pci_bus_type, points to a struct dev_pm_ops object, pci_dev_pm_ops, containing
+pointers to several device power management callbacks::
+
+  const struct dev_pm_ops pci_dev_pm_ops = {
+	.prepare = pci_pm_prepare,
+	.complete = pci_pm_complete,
+	.suspend = pci_pm_suspend,
+	.resume = pci_pm_resume,
+	.freeze = pci_pm_freeze,
+	.thaw = pci_pm_thaw,
+	.poweroff = pci_pm_poweroff,
+	.restore = pci_pm_restore,
+	.suspend_noirq = pci_pm_suspend_noirq,
+	.resume_noirq = pci_pm_resume_noirq,
+	.freeze_noirq = pci_pm_freeze_noirq,
+	.thaw_noirq = pci_pm_thaw_noirq,
+	.poweroff_noirq = pci_pm_poweroff_noirq,
+	.restore_noirq = pci_pm_restore_noirq,
+	.runtime_suspend = pci_pm_runtime_suspend,
+	.runtime_resume = pci_pm_runtime_resume,
+	.runtime_idle = pci_pm_runtime_idle,
+  };
+
+These callbacks are executed by the PM core in various situations related to
+device power management and they, in turn, execute power management callbacks
+provided by PCI device drivers.  They also perform power management operations
+involving some standard configuration registers of PCI devices that device
+drivers need not know or care about.
+
+The structure representing a PCI device, struct pci_dev, contains several fields
+that these callbacks operate on::
+
+  struct pci_dev {
+	...
+	pci_power_t     current_state;  /* Current operating state. */
+	int		pm_cap;		/* PM capability offset in the
+					   configuration space */
+	unsigned int	pme_support:5;	/* Bitmask of states from which PME#
+					   can be generated */
+	unsigned int	pme_interrupt:1;/* Is native PCIe PME signaling used? */
+	unsigned int	d1_support:1;	/* Low power state D1 is supported */
+	unsigned int	d2_support:1;	/* Low power state D2 is supported */
+	unsigned int	no_d1d2:1;	/* D1 and D2 are forbidden */
+	unsigned int	wakeup_prepared:1;  /* Device prepared for wake up */
+	unsigned int	d3_delay;	/* D3->D0 transition time in ms */
+	...
+  };
+
+They also indirectly use some fields of the struct device that is embedded in
+struct pci_dev.
+
+2.2. Device Initialization
+--------------------------
+
+The PCI subsystem's first task related to device power management is to
+prepare the device for power management and initialize the fields of struct
+pci_dev used for this purpose.  This happens in two functions defined in
+drivers/pci/pci.c, pci_pm_init() and platform_pci_wakeup_init().
+
+The first of these functions checks if the device supports native PCI PM
+and if that's the case the offset of its power management capability structure
+in the configuration space is stored in the pm_cap field of the device's struct
+pci_dev object.  Next, the function checks which PCI low-power states are
+supported by the device and from which low-power states the device can generate
+native PCI PMEs.  The power management fields of the device's struct pci_dev and
+the struct device embedded in it are updated accordingly and the generation of
+PMEs by the device is disabled.
+
+The second function checks if the device can be prepared to signal wakeup with
+the help of the platform firmware, such as the ACPI BIOS.  If that is the case,
+the function updates the wakeup fields in struct device embedded in the
+device's struct pci_dev and uses the firmware-provided method to prevent the
+device from signaling wakeup.
+
+At this point the device is ready for power management.  For driverless devices,
+however, this functionality is limited to a few basic operations carried out
+during system-wide transitions to a sleep state and back to the working state.
+
+2.3. Runtime Device Power Management
+------------------------------------
+
+The PCI subsystem plays a vital role in the runtime power management of PCI
+devices.  For this purpose it uses the general runtime power management
+(runtime PM) framework described in Documentation/power/runtime_pm.rst.
+Namely, it provides subsystem-level callbacks::
+
+	pci_pm_runtime_suspend()
+	pci_pm_runtime_resume()
+	pci_pm_runtime_idle()
+
+that are executed by the core runtime PM routines.  It also implements the
+entire mechanics necessary for handling runtime wakeup signals from PCI devices
+in low-power states, which at the time of this writing works for both the native
+PCI Express PME signaling and the ACPI GPE-based wakeup signaling described in
+Section 1.
+
+First, a PCI device is put into a low-power state, or suspended, with the help
+of pm_schedule_suspend() or pm_runtime_suspend() which for PCI devices call
+pci_pm_runtime_suspend() to do the actual job.  For this to work, the device's
+driver has to provide a pm->runtime_suspend() callback (see below), which is
+run by pci_pm_runtime_suspend() as the first action.  If the driver's callback
+returns successfully, the device's standard configuration registers are saved,
+the device is prepared to generate wakeup signals and, finally, it is put into
+the target low-power state.
+
+The low-power state to put the device into is the lowest-power (highest number)
+state from which it can signal wakeup.  The exact method of signaling wakeup is
+system-dependent and is determined by the PCI subsystem on the basis of the
+reported capabilities of the device and the platform firmware.  To prepare the
+device for signaling wakeup and put it into the selected low-power state, the
+PCI subsystem can use the platform firmware as well as the device's native PCI
+PM capabilities, if supported.
+
+It is expected that the device driver's pm->runtime_suspend() callback will
+not attempt to prepare the device for signaling wakeup or to put it into a
+low-power state.  The driver ought to leave these tasks to the PCI subsystem
+that has all of the information necessary to perform them.
+
+A suspended device is brought back into the "active" state, or resumed,
+with the help of pm_request_resume() or pm_runtime_resume() which both call
+pci_pm_runtime_resume() for PCI devices.  Again, this only works if the device's
+driver provides a pm->runtime_resume() callback (see below).  However, before
+the driver's callback is executed, pci_pm_runtime_resume() brings the device
+back into the full-power state, prevents it from signaling wakeup while in that
+state and restores its standard configuration registers.  Thus the driver's
+callback need not worry about the PCI-specific aspects of the device resume.
+
+Note that generally pci_pm_runtime_resume() may be called in two different
+situations.  First, it may be called at the request of the device's driver, for
+example if there are some data for it to process.  Second, it may be called
+as a result of a wakeup signal from the device itself (this sometimes is
+referred to as "remote wakeup").  Of course, for this purpose the wakeup signal
+is handled in one of the ways described in Section 1 and finally converted into
+a notification for the PCI subsystem after the source device has been
+identified.
+
+The pci_pm_runtime_idle() function, called for PCI devices by pm_runtime_idle()
+and pm_request_idle(), executes the device driver's pm->runtime_idle()
+callback, if defined, and if that callback doesn't return error code (or is not
+present at all), suspends the device with the help of pm_runtime_suspend().
+Sometimes pci_pm_runtime_idle() is called automatically by the PM core (for
+example, it is called right after the device has just been resumed), in which
+cases it is expected to suspend the device if that makes sense.  Usually,
+however, the PCI subsystem doesn't really know if the device really can be
+suspended, so it lets the device's driver decide by running its
+pm->runtime_idle() callback.
+
+2.4. System-Wide Power Transitions
+----------------------------------
+There are a few different types of system-wide power transitions, described in
+Documentation/driver-api/pm/devices.rst.  Each of them requires devices to be handled
+in a specific way and the PM core executes subsystem-level power management
+callbacks for this purpose.  They are executed in phases such that each phase
+involves executing the same subsystem-level callback for every device belonging
+to the given subsystem before the next phase begins.  These phases always run
+after tasks have been frozen.
+
+2.4.1. System Suspend
+^^^^^^^^^^^^^^^^^^^^^
+
+When the system is going into a sleep state in which the contents of memory will
+be preserved, such as one of the ACPI sleep states S1-S3, the phases are:
+
+	prepare, suspend, suspend_noirq.
+
+The following PCI bus type's callbacks, respectively, are used in these phases::
+
+	pci_pm_prepare()
+	pci_pm_suspend()
+	pci_pm_suspend_noirq()
+
+The pci_pm_prepare() routine first puts the device into the "fully functional"
+state with the help of pm_runtime_resume().  Then, it executes the device
+driver's pm->prepare() callback if defined (i.e. if the driver's struct
+dev_pm_ops object is present and the prepare pointer in that object is valid).
+
+The pci_pm_suspend() routine first checks if the device's driver implements
+legacy PCI suspend routines (see Section 3), in which case the driver's legacy
+suspend callback is executed, if present, and its result is returned.  Next, if
+the device's driver doesn't provide a struct dev_pm_ops object (containing
+pointers to the driver's callbacks), pci_pm_default_suspend() is called, which
+simply turns off the device's bus master capability and runs
+pcibios_disable_device() to disable it, unless the device is a bridge (PCI
+bridges are ignored by this routine).  Next, the device driver's pm->suspend()
+callback is executed, if defined, and its result is returned if it fails.
+Finally, pci_fixup_device() is called to apply hardware suspend quirks related
+to the device if necessary.
+
+Note that the suspend phase is carried out asynchronously for PCI devices, so
+the pci_pm_suspend() callback may be executed in parallel for any pair of PCI
+devices that don't depend on each other in a known way (i.e. none of the paths
+in the device tree from the root bridge to a leaf device contains both of them).
+
+The pci_pm_suspend_noirq() routine is executed after suspend_device_irqs() has
+been called, which means that the device driver's interrupt handler won't be
+invoked while this routine is running.  It first checks if the device's driver
+implements legacy PCI suspends routines (Section 3), in which case the legacy
+late suspend routine is called and its result is returned (the standard
+configuration registers of the device are saved if the driver's callback hasn't
+done that).  Second, if the device driver's struct dev_pm_ops object is not
+present, the device's standard configuration registers are saved and the routine
+returns success.  Otherwise the device driver's pm->suspend_noirq() callback is
+executed, if present, and its result is returned if it fails.  Next, if the
+device's standard configuration registers haven't been saved yet (one of the
+device driver's callbacks executed before might do that), pci_pm_suspend_noirq()
+saves them, prepares the device to signal wakeup (if necessary) and puts it into
+a low-power state.
+
+The low-power state to put the device into is the lowest-power (highest number)
+state from which it can signal wakeup while the system is in the target sleep
+state.  Just like in the runtime PM case described above, the mechanism of
+signaling wakeup is system-dependent and determined by the PCI subsystem, which
+is also responsible for preparing the device to signal wakeup from the system's
+target sleep state as appropriate.
+
+PCI device drivers (that don't implement legacy power management callbacks) are
+generally not expected to prepare devices for signaling wakeup or to put them
+into low-power states.  However, if one of the driver's suspend callbacks
+(pm->suspend() or pm->suspend_noirq()) saves the device's standard configuration
+registers, pci_pm_suspend_noirq() will assume that the device has been prepared
+to signal wakeup and put into a low-power state by the driver (the driver is
+then assumed to have used the helper functions provided by the PCI subsystem for
+this purpose).  PCI device drivers are not encouraged to do that, but in some
+rare cases doing that in the driver may be the optimum approach.
+
+2.4.2. System Resume
+^^^^^^^^^^^^^^^^^^^^
+
+When the system is undergoing a transition from a sleep state in which the
+contents of memory have been preserved, such as one of the ACPI sleep states
+S1-S3, into the working state (ACPI S0), the phases are:
+
+	resume_noirq, resume, complete.
+
+The following PCI bus type's callbacks, respectively, are executed in these
+phases::
+
+	pci_pm_resume_noirq()
+	pci_pm_resume()
+	pci_pm_complete()
+
+The pci_pm_resume_noirq() routine first puts the device into the full-power
+state, restores its standard configuration registers and applies early resume
+hardware quirks related to the device, if necessary.  This is done
+unconditionally, regardless of whether or not the device's driver implements
+legacy PCI power management callbacks (this way all PCI devices are in the
+full-power state and their standard configuration registers have been restored
+when their interrupt handlers are invoked for the first time during resume,
+which allows the kernel to avoid problems with the handling of shared interrupts
+by drivers whose devices are still suspended).  If legacy PCI power management
+callbacks (see Section 3) are implemented by the device's driver, the legacy
+early resume callback is executed and its result is returned.  Otherwise, the
+device driver's pm->resume_noirq() callback is executed, if defined, and its
+result is returned.
+
+The pci_pm_resume() routine first checks if the device's standard configuration
+registers have been restored and restores them if that's not the case (this
+only is necessary in the error path during a failing suspend).  Next, resume
+hardware quirks related to the device are applied, if necessary, and if the
+device's driver implements legacy PCI power management callbacks (see
+Section 3), the driver's legacy resume callback is executed and its result is
+returned.  Otherwise, the device's wakeup signaling mechanisms are blocked and
+its driver's pm->resume() callback is executed, if defined (the callback's
+result is then returned).
+
+The resume phase is carried out asynchronously for PCI devices, like the
+suspend phase described above, which means that if two PCI devices don't depend
+on each other in a known way, the pci_pm_resume() routine may be executed for
+the both of them in parallel.
+
+The pci_pm_complete() routine only executes the device driver's pm->complete()
+callback, if defined.
+
+2.4.3. System Hibernation
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+System hibernation is more complicated than system suspend, because it requires
+a system image to be created and written into a persistent storage medium.  The
+image is created atomically and all devices are quiesced, or frozen, before that
+happens.
+
+The freezing of devices is carried out after enough memory has been freed (at
+the time of this writing the image creation requires at least 50% of system RAM
+to be free) in the following three phases:
+
+	prepare, freeze, freeze_noirq
+
+that correspond to the PCI bus type's callbacks::
+
+	pci_pm_prepare()
+	pci_pm_freeze()
+	pci_pm_freeze_noirq()
+
+This means that the prepare phase is exactly the same as for system suspend.
+The other two phases, however, are different.
+
+The pci_pm_freeze() routine is quite similar to pci_pm_suspend(), but it runs
+the device driver's pm->freeze() callback, if defined, instead of pm->suspend(),
+and it doesn't apply the suspend-related hardware quirks.  It is executed
+asynchronously for different PCI devices that don't depend on each other in a
+known way.
+
+The pci_pm_freeze_noirq() routine, in turn, is similar to
+pci_pm_suspend_noirq(), but it calls the device driver's pm->freeze_noirq()
+routine instead of pm->suspend_noirq().  It also doesn't attempt to prepare the
+device for signaling wakeup and put it into a low-power state.  Still, it saves
+the device's standard configuration registers if they haven't been saved by one
+of the driver's callbacks.
+
+Once the image has been created, it has to be saved.  However, at this point all
+devices are frozen and they cannot handle I/O, while their ability to handle
+I/O is obviously necessary for the image saving.  Thus they have to be brought
+back to the fully functional state and this is done in the following phases:
+
+	thaw_noirq, thaw, complete
+
+using the following PCI bus type's callbacks::
+
+	pci_pm_thaw_noirq()
+	pci_pm_thaw()
+	pci_pm_complete()
+
+respectively.
+
+The first of them, pci_pm_thaw_noirq(), is analogous to pci_pm_resume_noirq(),
+but it doesn't put the device into the full power state and doesn't attempt to
+restore its standard configuration registers.  It also executes the device
+driver's pm->thaw_noirq() callback, if defined, instead of pm->resume_noirq().
+
+The pci_pm_thaw() routine is similar to pci_pm_resume(), but it runs the device
+driver's pm->thaw() callback instead of pm->resume().  It is executed
+asynchronously for different PCI devices that don't depend on each other in a
+known way.
+
+The complete phase it the same as for system resume.
+
+After saving the image, devices need to be powered down before the system can
+enter the target sleep state (ACPI S4 for ACPI-based systems).  This is done in
+three phases:
+
+	prepare, poweroff, poweroff_noirq
+
+where the prepare phase is exactly the same as for system suspend.  The other
+two phases are analogous to the suspend and suspend_noirq phases, respectively.
+The PCI subsystem-level callbacks they correspond to::
+
+	pci_pm_poweroff()
+	pci_pm_poweroff_noirq()
+
+work in analogy with pci_pm_suspend() and pci_pm_poweroff_noirq(), respectively,
+although they don't attempt to save the device's standard configuration
+registers.
+
+2.4.4. System Restore
+^^^^^^^^^^^^^^^^^^^^^
+
+System restore requires a hibernation image to be loaded into memory and the
+pre-hibernation memory contents to be restored before the pre-hibernation system
+activity can be resumed.
+
+As described in Documentation/driver-api/pm/devices.rst, the hibernation image is loaded
+into memory by a fresh instance of the kernel, called the boot kernel, which in
+turn is loaded and run by a boot loader in the usual way.  After the boot kernel
+has loaded the image, it needs to replace its own code and data with the code
+and data of the "hibernated" kernel stored within the image, called the image
+kernel.  For this purpose all devices are frozen just like before creating
+the image during hibernation, in the
+
+	prepare, freeze, freeze_noirq
+
+phases described above.  However, the devices affected by these phases are only
+those having drivers in the boot kernel; other devices will still be in whatever
+state the boot loader left them.
+
+Should the restoration of the pre-hibernation memory contents fail, the boot
+kernel would go through the "thawing" procedure described above, using the
+thaw_noirq, thaw, and complete phases (that will only affect the devices having
+drivers in the boot kernel), and then continue running normally.
+
+If the pre-hibernation memory contents are restored successfully, which is the
+usual situation, control is passed to the image kernel, which then becomes
+responsible for bringing the system back to the working state.  To achieve this,
+it must restore the devices' pre-hibernation functionality, which is done much
+like waking up from the memory sleep state, although it involves different
+phases:
+
+	restore_noirq, restore, complete
+
+The first two of these are analogous to the resume_noirq and resume phases
+described above, respectively, and correspond to the following PCI subsystem
+callbacks::
+
+	pci_pm_restore_noirq()
+	pci_pm_restore()
+
+These callbacks work in analogy with pci_pm_resume_noirq() and pci_pm_resume(),
+respectively, but they execute the device driver's pm->restore_noirq() and
+pm->restore() callbacks, if available.
+
+The complete phase is carried out in exactly the same way as during system
+resume.
+
+
+3. PCI Device Drivers and Power Management
+==========================================
+
+3.1. Power Management Callbacks
+-------------------------------
+
+PCI device drivers participate in power management by providing callbacks to be
+executed by the PCI subsystem's power management routines described above and by
+controlling the runtime power management of their devices.
+
+At the time of this writing there are two ways to define power management
+callbacks for a PCI device driver, the recommended one, based on using a
+dev_pm_ops structure described in Documentation/driver-api/pm/devices.rst, and the
+"legacy" one, in which the .suspend(), .suspend_late(), .resume_early(), and
+.resume() callbacks from struct pci_driver are used.  The legacy approach,
+however, doesn't allow one to define runtime power management callbacks and is
+not really suitable for any new drivers.  Therefore it is not covered by this
+document (refer to the source code to learn more about it).
+
+It is recommended that all PCI device drivers define a struct dev_pm_ops object
+containing pointers to power management (PM) callbacks that will be executed by
+the PCI subsystem's PM routines in various circumstances.  A pointer to the
+driver's struct dev_pm_ops object has to be assigned to the driver.pm field in
+its struct pci_driver object.  Once that has happened, the "legacy" PM callbacks
+in struct pci_driver are ignored (even if they are not NULL).
+
+The PM callbacks in struct dev_pm_ops are not mandatory and if they are not
+defined (i.e. the respective fields of struct dev_pm_ops are unset) the PCI
+subsystem will handle the device in a simplified default manner.  If they are
+defined, though, they are expected to behave as described in the following
+subsections.
+
+3.1.1. prepare()
+^^^^^^^^^^^^^^^^
+
+The prepare() callback is executed during system suspend, during hibernation
+(when a hibernation image is about to be created), during power-off after
+saving a hibernation image and during system restore, when a hibernation image
+has just been loaded into memory.
+
+This callback is only necessary if the driver's device has children that in
+general may be registered at any time.  In that case the role of the prepare()
+callback is to prevent new children of the device from being registered until
+one of the resume_noirq(), thaw_noirq(), or restore_noirq() callbacks is run.
+
+In addition to that the prepare() callback may carry out some operations
+preparing the device to be suspended, although it should not allocate memory
+(if additional memory is required to suspend the device, it has to be
+preallocated earlier, for example in a suspend/hibernate notifier as described
+in Documentation/driver-api/pm/notifiers.rst).
+
+3.1.2. suspend()
+^^^^^^^^^^^^^^^^
+
+The suspend() callback is only executed during system suspend, after prepare()
+callbacks have been executed for all devices in the system.
+
+This callback is expected to quiesce the device and prepare it to be put into a
+low-power state by the PCI subsystem.  It is not required (in fact it even is
+not recommended) that a PCI driver's suspend() callback save the standard
+configuration registers of the device, prepare it for waking up the system, or
+put it into a low-power state.  All of these operations can very well be taken
+care of by the PCI subsystem, without the driver's participation.
+
+However, in some rare case it is convenient to carry out these operations in
+a PCI driver.  Then, pci_save_state(), pci_prepare_to_sleep(), and
+pci_set_power_state() should be used to save the device's standard configuration
+registers, to prepare it for system wakeup (if necessary), and to put it into a
+low-power state, respectively.  Moreover, if the driver calls pci_save_state(),
+the PCI subsystem will not execute either pci_prepare_to_sleep(), or
+pci_set_power_state() for its device, so the driver is then responsible for
+handling the device as appropriate.
+
+While the suspend() callback is being executed, the driver's interrupt handler
+can be invoked to handle an interrupt from the device, so all suspend-related
+operations relying on the driver's ability to handle interrupts should be
+carried out in this callback.
+
+3.1.3. suspend_noirq()
+^^^^^^^^^^^^^^^^^^^^^^
+
+The suspend_noirq() callback is only executed during system suspend, after
+suspend() callbacks have been executed for all devices in the system and
+after device interrupts have been disabled by the PM core.
+
+The difference between suspend_noirq() and suspend() is that the driver's
+interrupt handler will not be invoked while suspend_noirq() is running.  Thus
+suspend_noirq() can carry out operations that would cause race conditions to
+arise if they were performed in suspend().
+
+3.1.4. freeze()
+^^^^^^^^^^^^^^^
+
+The freeze() callback is hibernation-specific and is executed in two situations,
+during hibernation, after prepare() callbacks have been executed for all devices
+in preparation for the creation of a system image, and during restore,
+after a system image has been loaded into memory from persistent storage and the
+prepare() callbacks have been executed for all devices.
+
+The role of this callback is analogous to the role of the suspend() callback
+described above.  In fact, they only need to be different in the rare cases when
+the driver takes the responsibility for putting the device into a low-power
+state.
+
+In that cases the freeze() callback should not prepare the device system wakeup
+or put it into a low-power state.  Still, either it or freeze_noirq() should
+save the device's standard configuration registers using pci_save_state().
+
+3.1.5. freeze_noirq()
+^^^^^^^^^^^^^^^^^^^^^
+
+The freeze_noirq() callback is hibernation-specific.  It is executed during
+hibernation, after prepare() and freeze() callbacks have been executed for all
+devices in preparation for the creation of a system image, and during restore,
+after a system image has been loaded into memory and after prepare() and
+freeze() callbacks have been executed for all devices.  It is always executed
+after device interrupts have been disabled by the PM core.
+
+The role of this callback is analogous to the role of the suspend_noirq()
+callback described above and it very rarely is necessary to define
+freeze_noirq().
+
+The difference between freeze_noirq() and freeze() is analogous to the
+difference between suspend_noirq() and suspend().
+
+3.1.6. poweroff()
+^^^^^^^^^^^^^^^^^
+
+The poweroff() callback is hibernation-specific.  It is executed when the system
+is about to be powered off after saving a hibernation image to a persistent
+storage.  prepare() callbacks are executed for all devices before poweroff() is
+called.
+
+The role of this callback is analogous to the role of the suspend() and freeze()
+callbacks described above, although it does not need to save the contents of
+the device's registers.  In particular, if the driver wants to put the device
+into a low-power state itself instead of allowing the PCI subsystem to do that,
+the poweroff() callback should use pci_prepare_to_sleep() and
+pci_set_power_state() to prepare the device for system wakeup and to put it
+into a low-power state, respectively, but it need not save the device's standard
+configuration registers.
+
+3.1.7. poweroff_noirq()
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The poweroff_noirq() callback is hibernation-specific.  It is executed after
+poweroff() callbacks have been executed for all devices in the system.
+
+The role of this callback is analogous to the role of the suspend_noirq() and
+freeze_noirq() callbacks described above, but it does not need to save the
+contents of the device's registers.
+
+The difference between poweroff_noirq() and poweroff() is analogous to the
+difference between suspend_noirq() and suspend().
+
+3.1.8. resume_noirq()
+^^^^^^^^^^^^^^^^^^^^^
+
+The resume_noirq() callback is only executed during system resume, after the
+PM core has enabled the non-boot CPUs.  The driver's interrupt handler will not
+be invoked while resume_noirq() is running, so this callback can carry out
+operations that might race with the interrupt handler.
+
+Since the PCI subsystem unconditionally puts all devices into the full power
+state in the resume_noirq phase of system resume and restores their standard
+configuration registers, resume_noirq() is usually not necessary.  In general
+it should only be used for performing operations that would lead to race
+conditions if carried out by resume().
+
+3.1.9. resume()
+^^^^^^^^^^^^^^^
+
+The resume() callback is only executed during system resume, after
+resume_noirq() callbacks have been executed for all devices in the system and
+device interrupts have been enabled by the PM core.
+
+This callback is responsible for restoring the pre-suspend configuration of the
+device and bringing it back to the fully functional state.  The device should be
+able to process I/O in a usual way after resume() has returned.
+
+3.1.10. thaw_noirq()
+^^^^^^^^^^^^^^^^^^^^
+
+The thaw_noirq() callback is hibernation-specific.  It is executed after a
+system image has been created and the non-boot CPUs have been enabled by the PM
+core, in the thaw_noirq phase of hibernation.  It also may be executed if the
+loading of a hibernation image fails during system restore (it is then executed
+after enabling the non-boot CPUs).  The driver's interrupt handler will not be
+invoked while thaw_noirq() is running.
+
+The role of this callback is analogous to the role of resume_noirq().  The
+difference between these two callbacks is that thaw_noirq() is executed after
+freeze() and freeze_noirq(), so in general it does not need to modify the
+contents of the device's registers.
+
+3.1.11. thaw()
+^^^^^^^^^^^^^^
+
+The thaw() callback is hibernation-specific.  It is executed after thaw_noirq()
+callbacks have been executed for all devices in the system and after device
+interrupts have been enabled by the PM core.
+
+This callback is responsible for restoring the pre-freeze configuration of
+the device, so that it will work in a usual way after thaw() has returned.
+
+3.1.12. restore_noirq()
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The restore_noirq() callback is hibernation-specific.  It is executed in the
+restore_noirq phase of hibernation, when the boot kernel has passed control to
+the image kernel and the non-boot CPUs have been enabled by the image kernel's
+PM core.
+
+This callback is analogous to resume_noirq() with the exception that it cannot
+make any assumption on the previous state of the device, even if the BIOS (or
+generally the platform firmware) is known to preserve that state over a
+suspend-resume cycle.
+
+For the vast majority of PCI device drivers there is no difference between
+resume_noirq() and restore_noirq().
+
+3.1.13. restore()
+^^^^^^^^^^^^^^^^^
+
+The restore() callback is hibernation-specific.  It is executed after
+restore_noirq() callbacks have been executed for all devices in the system and
+after the PM core has enabled device drivers' interrupt handlers to be invoked.
+
+This callback is analogous to resume(), just like restore_noirq() is analogous
+to resume_noirq().  Consequently, the difference between restore_noirq() and
+restore() is analogous to the difference between resume_noirq() and resume().
+
+For the vast majority of PCI device drivers there is no difference between
+resume() and restore().
+
+3.1.14. complete()
+^^^^^^^^^^^^^^^^^^
+
+The complete() callback is executed in the following situations:
+
+  - during system resume, after resume() callbacks have been executed for all
+    devices,
+  - during hibernation, before saving the system image, after thaw() callbacks
+    have been executed for all devices,
+  - during system restore, when the system is going back to its pre-hibernation
+    state, after restore() callbacks have been executed for all devices.
+
+It also may be executed if the loading of a hibernation image into memory fails
+(in that case it is run after thaw() callbacks have been executed for all
+devices that have drivers in the boot kernel).
+
+This callback is entirely optional, although it may be necessary if the
+prepare() callback performs operations that need to be reversed.
+
+3.1.15. runtime_suspend()
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The runtime_suspend() callback is specific to device runtime power management
+(runtime PM).  It is executed by the PM core's runtime PM framework when the
+device is about to be suspended (i.e. quiesced and put into a low-power state)
+at run time.
+
+This callback is responsible for freezing the device and preparing it to be
+put into a low-power state, but it must allow the PCI subsystem to perform all
+of the PCI-specific actions necessary for suspending the device.
+
+3.1.16. runtime_resume()
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+The runtime_resume() callback is specific to device runtime PM.  It is executed
+by the PM core's runtime PM framework when the device is about to be resumed
+(i.e. put into the full-power state and programmed to process I/O normally) at
+run time.
+
+This callback is responsible for restoring the normal functionality of the
+device after it has been put into the full-power state by the PCI subsystem.
+The device is expected to be able to process I/O in the usual way after
+runtime_resume() has returned.
+
+3.1.17. runtime_idle()
+^^^^^^^^^^^^^^^^^^^^^^
+
+The runtime_idle() callback is specific to device runtime PM.  It is executed
+by the PM core's runtime PM framework whenever it may be desirable to suspend
+the device according to the PM core's information.  In particular, it is
+automatically executed right after runtime_resume() has returned in case the
+resume of the device has happened as a result of a spurious event.
+
+This callback is optional, but if it is not implemented or if it returns 0, the
+PCI subsystem will call pm_runtime_suspend() for the device, which in turn will
+cause the driver's runtime_suspend() callback to be executed.
+
+3.1.18. Pointing Multiple Callback Pointers to One Routine
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Although in principle each of the callbacks described in the previous
+subsections can be defined as a separate function, it often is convenient to
+point two or more members of struct dev_pm_ops to the same routine.  There are
+a few convenience macros that can be used for this purpose.
+
+The SIMPLE_DEV_PM_OPS macro declares a struct dev_pm_ops object with one
+suspend routine pointed to by the .suspend(), .freeze(), and .poweroff()
+members and one resume routine pointed to by the .resume(), .thaw(), and
+.restore() members.  The other function pointers in this struct dev_pm_ops are
+unset.
+
+The UNIVERSAL_DEV_PM_OPS macro is similar to SIMPLE_DEV_PM_OPS, but it
+additionally sets the .runtime_resume() pointer to the same value as
+.resume() (and .thaw(), and .restore()) and the .runtime_suspend() pointer to
+the same value as .suspend() (and .freeze() and .poweroff()).
+
+The SET_SYSTEM_SLEEP_PM_OPS can be used inside of a declaration of struct
+dev_pm_ops to indicate that one suspend routine is to be pointed to by the
+.suspend(), .freeze(), and .poweroff() members and one resume routine is to
+be pointed to by the .resume(), .thaw(), and .restore() members.
+
+3.1.19. Driver Flags for Power Management
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The PM core allows device drivers to set flags that influence the handling of
+power management for the devices by the core itself and by middle layer code
+including the PCI bus type.  The flags should be set once at the driver probe
+time with the help of the dev_pm_set_driver_flags() function and they should not
+be updated directly afterwards.
+
+The DPM_FLAG_NEVER_SKIP flag prevents the PM core from using the direct-complete
+mechanism allowing device suspend/resume callbacks to be skipped if the device
+is in runtime suspend when the system suspend starts.  That also affects all of
+the ancestors of the device, so this flag should only be used if absolutely
+necessary.
+
+The DPM_FLAG_SMART_PREPARE flag instructs the PCI bus type to only return a
+positive value from pci_pm_prepare() if the ->prepare callback provided by the
+driver of the device returns a positive value.  That allows the driver to opt
+out from using the direct-complete mechanism dynamically.
+
+The DPM_FLAG_SMART_SUSPEND flag tells the PCI bus type that from the driver's
+perspective the device can be safely left in runtime suspend during system
+suspend.  That causes pci_pm_suspend(), pci_pm_freeze() and pci_pm_poweroff()
+to skip resuming the device from runtime suspend unless there are PCI-specific
+reasons for doing that.  Also, it causes pci_pm_suspend_late/noirq(),
+pci_pm_freeze_late/noirq() and pci_pm_poweroff_late/noirq() to return early
+if the device remains in runtime suspend in the beginning of the "late" phase
+of the system-wide transition under way.  Moreover, if the device is in
+runtime suspend in pci_pm_resume_noirq() or pci_pm_restore_noirq(), its runtime
+power management status will be changed to "active" (as it is going to be put
+into D0 going forward), but if it is in runtime suspend in pci_pm_thaw_noirq(),
+the function will set the power.direct_complete flag for it (to make the PM core
+skip the subsequent "thaw" callbacks for it) and return.
+
+Setting the DPM_FLAG_LEAVE_SUSPENDED flag means that the driver prefers the
+device to be left in suspend after system-wide transitions to the working state.
+This flag is checked by the PM core, but the PCI bus type informs the PM core
+which devices may be left in suspend from its perspective (that happens during
+the "noirq" phase of system-wide suspend and analogous transitions) and next it
+uses the dev_pm_may_skip_resume() helper to decide whether or not to return from
+pci_pm_resume_noirq() early, as the PM core will skip the remaining resume
+callbacks for the device during the transition under way and will set its
+runtime PM status to "suspended" if dev_pm_may_skip_resume() returns "true" for
+it.
+
+3.2. Device Runtime Power Management
+------------------------------------
+
+In addition to providing device power management callbacks PCI device drivers
+are responsible for controlling the runtime power management (runtime PM) of
+their devices.
+
+The PCI device runtime PM is optional, but it is recommended that PCI device
+drivers implement it at least in the cases where there is a reliable way of
+verifying that the device is not used (like when the network cable is detached
+from an Ethernet adapter or there are no devices attached to a USB controller).
+
+To support the PCI runtime PM the driver first needs to implement the
+runtime_suspend() and runtime_resume() callbacks.  It also may need to implement
+the runtime_idle() callback to prevent the device from being suspended again
+every time right after the runtime_resume() callback has returned
+(alternatively, the runtime_suspend() callback will have to check if the
+device should really be suspended and return -EAGAIN if that is not the case).
+
+The runtime PM of PCI devices is enabled by default by the PCI core.  PCI
+device drivers do not need to enable it and should not attempt to do so.
+However, it is blocked by pci_pm_init() that runs the pm_runtime_forbid()
+helper function.  In addition to that, the runtime PM usage counter of
+each PCI device is incremented by local_pci_probe() before executing the
+probe callback provided by the device's driver.
+
+If a PCI driver implements the runtime PM callbacks and intends to use the
+runtime PM framework provided by the PM core and the PCI subsystem, it needs
+to decrement the device's runtime PM usage counter in its probe callback
+function.  If it doesn't do that, the counter will always be different from
+zero for the device and it will never be runtime-suspended.  The simplest
+way to do that is by calling pm_runtime_put_noidle(), but if the driver
+wants to schedule an autosuspend right away, for example, it may call
+pm_runtime_put_autosuspend() instead for this purpose.  Generally, it
+just needs to call a function that decrements the devices usage counter
+from its probe routine to make runtime PM work for the device.
+
+It is important to remember that the driver's runtime_suspend() callback
+may be executed right after the usage counter has been decremented, because
+user space may already have caused the pm_runtime_allow() helper function
+unblocking the runtime PM of the device to run via sysfs, so the driver must
+be prepared to cope with that.
+
+The driver itself should not call pm_runtime_allow(), though.  Instead, it
+should let user space or some platform-specific code do that (user space can
+do it via sysfs as stated above), but it must be prepared to handle the
+runtime PM of the device correctly as soon as pm_runtime_allow() is called
+(which may happen at any time, even before the driver is loaded).
+
+When the driver's remove callback runs, it has to balance the decrementation
+of the device's runtime PM usage counter at the probe time.  For this reason,
+if it has decremented the counter in its probe callback, it must run
+pm_runtime_get_noresume() in its remove callback.  [Since the core carries
+out a runtime resume of the device and bumps up the device's usage counter
+before running the driver's remove callback, the runtime PM of the device
+is effectively disabled for the duration of the remove execution and all
+runtime PM helper functions incrementing the device's usage counter are
+then effectively equivalent to pm_runtime_get_noresume().]
+
+The runtime PM framework works by processing requests to suspend or resume
+devices, or to check if they are idle (in which cases it is reasonable to
+subsequently request that they be suspended).  These requests are represented
+by work items put into the power management workqueue, pm_wq.  Although there
+are a few situations in which power management requests are automatically
+queued by the PM core (for example, after processing a request to resume a
+device the PM core automatically queues a request to check if the device is
+idle), device drivers are generally responsible for queuing power management
+requests for their devices.  For this purpose they should use the runtime PM
+helper functions provided by the PM core, discussed in
+Documentation/power/runtime_pm.rst.
+
+Devices can also be suspended and resumed synchronously, without placing a
+request into pm_wq.  In the majority of cases this also is done by their
+drivers that use helper functions provided by the PM core for this purpose.
+
+For more information on the runtime PM of devices refer to
+Documentation/power/runtime_pm.rst.
+
+
+4. Resources
+============
+
+PCI Local Bus Specification, Rev. 3.0
+
+PCI Bus Power Management Interface Specification, Rev. 1.2
+
+Advanced Configuration and Power Interface (ACPI) Specification, Rev. 3.0b
+
+PCI Express Base Specification, Rev. 2.0
+
+Documentation/driver-api/pm/devices.rst
+
+Documentation/power/runtime_pm.rst
diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt
deleted file mode 100644
index 8eaf9ee24d43..000000000000
--- a/Documentation/power/pci.txt
+++ /dev/null
@@ -1,1094 +0,0 @@
-PCI Power Management
-
-Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
-
-An overview of concepts and the Linux kernel's interfaces related to PCI power
-management.  Based on previous work by Patrick Mochel <mochel@transmeta.com>
-(and others).
-
-This document only covers the aspects of power management specific to PCI
-devices.  For general description of the kernel's interfaces related to device
-power management refer to Documentation/driver-api/pm/devices.rst and
-Documentation/power/runtime_pm.txt.
-
----------------------------------------------------------------------------
-
-1. Hardware and Platform Support for PCI Power Management
-2. PCI Subsystem and Device Power Management
-3. PCI Device Drivers and Power Management
-4. Resources
-
-
-1. Hardware and Platform Support for PCI Power Management
-=========================================================
-
-1.1. Native and Platform-Based Power Management
------------------------------------------------
-In general, power management is a feature allowing one to save energy by putting
-devices into states in which they draw less power (low-power states) at the
-price of reduced functionality or performance.
-
-Usually, a device is put into a low-power state when it is underutilized or
-completely inactive.  However, when it is necessary to use the device once
-again, it has to be put back into the "fully functional" state (full-power
-state).  This may happen when there are some data for the device to handle or
-as a result of an external event requiring the device to be active, which may
-be signaled by the device itself.
-
-PCI devices may be put into low-power states in two ways, by using the device
-capabilities introduced by the PCI Bus Power Management Interface Specification,
-or with the help of platform firmware, such as an ACPI BIOS.  In the first
-approach, that is referred to as the native PCI power management (native PCI PM)
-in what follows, the device power state is changed as a result of writing a
-specific value into one of its standard configuration registers.  The second
-approach requires the platform firmware to provide special methods that may be
-used by the kernel to change the device's power state.
-
-Devices supporting the native PCI PM usually can generate wakeup signals called
-Power Management Events (PMEs) to let the kernel know about external events
-requiring the device to be active.  After receiving a PME the kernel is supposed
-to put the device that sent it into the full-power state.  However, the PCI Bus
-Power Management Interface Specification doesn't define any standard method of
-delivering the PME from the device to the CPU and the operating system kernel.
-It is assumed that the platform firmware will perform this task and therefore,
-even though a PCI device is set up to generate PMEs, it also may be necessary to
-prepare the platform firmware for notifying the CPU of the PMEs coming from the
-device (e.g. by generating interrupts).
-
-In turn, if the methods provided by the platform firmware are used for changing
-the power state of a device, usually the platform also provides a method for
-preparing the device to generate wakeup signals.  In that case, however, it
-often also is necessary to prepare the device for generating PMEs using the
-native PCI PM mechanism, because the method provided by the platform depends on
-that.
-
-Thus in many situations both the native and the platform-based power management
-mechanisms have to be used simultaneously to obtain the desired result.
-
-1.2. Native PCI Power Management
---------------------------------
-The PCI Bus Power Management Interface Specification (PCI PM Spec) was
-introduced between the PCI 2.1 and PCI 2.2 Specifications.  It defined a
-standard interface for performing various operations related to power
-management.
-
-The implementation of the PCI PM Spec is optional for conventional PCI devices,
-but it is mandatory for PCI Express devices.  If a device supports the PCI PM
-Spec, it has an 8 byte power management capability field in its PCI
-configuration space.  This field is used to describe and control the standard
-features related to the native PCI power management.
-
-The PCI PM Spec defines 4 operating states for devices (D0-D3) and for buses
-(B0-B3).  The higher the number, the less power is drawn by the device or bus
-in that state.  However, the higher the number, the longer the latency for
-the device or bus to return to the full-power state (D0 or B0, respectively).
-
-There are two variants of the D3 state defined by the specification.  The first
-one is D3hot, referred to as the software accessible D3, because devices can be
-programmed to go into it.  The second one, D3cold, is the state that PCI devices
-are in when the supply voltage (Vcc) is removed from them.  It is not possible
-to program a PCI device to go into D3cold, although there may be a programmable
-interface for putting the bus the device is on into a state in which Vcc is
-removed from all devices on the bus.
-
-PCI bus power management, however, is not supported by the Linux kernel at the
-time of this writing and therefore it is not covered by this document.
-
-Note that every PCI device can be in the full-power state (D0) or in D3cold,
-regardless of whether or not it implements the PCI PM Spec.  In addition to
-that, if the PCI PM Spec is implemented by the device, it must support D3hot
-as well as D0.  The support for the D1 and D2 power states is optional.
-
-PCI devices supporting the PCI PM Spec can be programmed to go to any of the
-supported low-power states (except for D3cold).  While in D1-D3hot the
-standard configuration registers of the device must be accessible to software
-(i.e. the device is required to respond to PCI configuration accesses), although
-its I/O and memory spaces are then disabled.  This allows the device to be
-programmatically put into D0.  Thus the kernel can switch the device back and
-forth between D0 and the supported low-power states (except for D3cold) and the
-possible power state transitions the device can undergo are the following:
-
-+----------------------------+
-| Current State | New State  |
-+----------------------------+
-| D0            | D1, D2, D3 |
-+----------------------------+
-| D1            | D2, D3     |
-+----------------------------+
-| D2            | D3         |
-+----------------------------+
-| D1, D2, D3    | D0         |
-+----------------------------+
-
-The transition from D3cold to D0 occurs when the supply voltage is provided to
-the device (i.e. power is restored).  In that case the device returns to D0 with
-a full power-on reset sequence and the power-on defaults are restored to the
-device by hardware just as at initial power up.
-
-PCI devices supporting the PCI PM Spec can be programmed to generate PMEs
-while in a low-power state (D1-D3), but they are not required to be capable
-of generating PMEs from all supported low-power states.  In particular, the
-capability of generating PMEs from D3cold is optional and depends on the
-presence of additional voltage (3.3Vaux) allowing the device to remain
-sufficiently active to generate a wakeup signal.
-
-1.3. ACPI Device Power Management
----------------------------------
-The platform firmware support for the power management of PCI devices is
-system-specific.  However, if the system in question is compliant with the
-Advanced Configuration and Power Interface (ACPI) Specification, like the
-majority of x86-based systems, it is supposed to implement device power
-management interfaces defined by the ACPI standard.
-
-For this purpose the ACPI BIOS provides special functions called "control
-methods" that may be executed by the kernel to perform specific tasks, such as
-putting a device into a low-power state.  These control methods are encoded
-using special byte-code language called the ACPI Machine Language (AML) and
-stored in the machine's BIOS.  The kernel loads them from the BIOS and executes
-them as needed using an AML interpreter that translates the AML byte code into
-computations and memory or I/O space accesses.  This way, in theory, a BIOS
-writer can provide the kernel with a means to perform actions depending
-on the system design in a system-specific fashion.
-
-ACPI control methods may be divided into global control methods, that are not
-associated with any particular devices, and device control methods, that have
-to be defined separately for each device supposed to be handled with the help of
-the platform.  This means, in particular, that ACPI device control methods can
-only be used to handle devices that the BIOS writer knew about in advance.  The
-ACPI methods used for device power management fall into that category.
-
-The ACPI specification assumes that devices can be in one of four power states
-labeled as D0, D1, D2, and D3 that roughly correspond to the native PCI PM
-D0-D3 states (although the difference between D3hot and D3cold is not taken
-into account by ACPI).  Moreover, for each power state of a device there is a
-set of power resources that have to be enabled for the device to be put into
-that state.  These power resources are controlled (i.e. enabled or disabled)
-with the help of their own control methods, _ON and _OFF, that have to be
-defined individually for each of them.
-
-To put a device into the ACPI power state Dx (where x is a number between 0 and
-3 inclusive) the kernel is supposed to (1) enable the power resources required
-by the device in this state using their _ON control methods and (2) execute the
-_PSx control method defined for the device.  In addition to that, if the device
-is going to be put into a low-power state (D1-D3) and is supposed to generate
-wakeup signals from that state, the _DSW (or _PSW, replaced with _DSW by ACPI
-3.0) control method defined for it has to be executed before _PSx.  Power
-resources that are not required by the device in the target power state and are
-not required any more by any other device should be disabled (by executing their
-_OFF control methods).  If the current power state of the device is D3, it can
-only be put into D0 this way.
-
-However, quite often the power states of devices are changed during a
-system-wide transition into a sleep state or back into the working state.  ACPI
-defines four system sleep states, S1, S2, S3, and S4, and denotes the system
-working state as S0.  In general, the target system sleep (or working) state
-determines the highest power (lowest number) state the device can be put
-into and the kernel is supposed to obtain this information by executing the
-device's _SxD control method (where x is a number between 0 and 4 inclusive).
-If the device is required to wake up the system from the target sleep state, the
-lowest power (highest number) state it can be put into is also determined by the
-target state of the system.  The kernel is then supposed to use the device's
-_SxW control method to obtain the number of that state.  It also is supposed to
-use the device's _PRW control method to learn which power resources need to be
-enabled for the device to be able to generate wakeup signals.
-
-1.4. Wakeup Signaling
----------------------
-Wakeup signals generated by PCI devices, either as native PCI PMEs, or as
-a result of the execution of the _DSW (or _PSW) ACPI control method before
-putting the device into a low-power state, have to be caught and handled as
-appropriate.  If they are sent while the system is in the working state
-(ACPI S0), they should be translated into interrupts so that the kernel can
-put the devices generating them into the full-power state and take care of the
-events that triggered them.  In turn, if they are sent while the system is
-sleeping, they should cause the system's core logic to trigger wakeup.
-
-On ACPI-based systems wakeup signals sent by conventional PCI devices are
-converted into ACPI General-Purpose Events (GPEs) which are hardware signals
-from the system core logic generated in response to various events that need to
-be acted upon.  Every GPE is associated with one or more sources of potentially
-interesting events.  In particular, a GPE may be associated with a PCI device
-capable of signaling wakeup.  The information on the connections between GPEs
-and event sources is recorded in the system's ACPI BIOS from where it can be
-read by the kernel.
-
-If a PCI device known to the system's ACPI BIOS signals wakeup, the GPE
-associated with it (if there is one) is triggered.  The GPEs associated with PCI
-bridges may also be triggered in response to a wakeup signal from one of the
-devices below the bridge (this also is the case for root bridges) and, for
-example, native PCI PMEs from devices unknown to the system's ACPI BIOS may be
-handled this way.
-
-A GPE may be triggered when the system is sleeping (i.e. when it is in one of
-the ACPI S1-S4 states), in which case system wakeup is started by its core logic
-(the device that was the source of the signal causing the system wakeup to occur
-may be identified later).  The GPEs used in such situations are referred to as
-wakeup GPEs.
-
-Usually, however, GPEs are also triggered when the system is in the working
-state (ACPI S0) and in that case the system's core logic generates a System
-Control Interrupt (SCI) to notify the kernel of the event.  Then, the SCI
-handler identifies the GPE that caused the interrupt to be generated which,
-in turn, allows the kernel to identify the source of the event (that may be
-a PCI device signaling wakeup).  The GPEs used for notifying the kernel of
-events occurring while the system is in the working state are referred to as
-runtime GPEs.
-
-Unfortunately, there is no standard way of handling wakeup signals sent by
-conventional PCI devices on systems that are not ACPI-based, but there is one
-for PCI Express devices.  Namely, the PCI Express Base Specification introduced
-a native mechanism for converting native PCI PMEs into interrupts generated by
-root ports.  For conventional PCI devices native PMEs are out-of-band, so they
-are routed separately and they need not pass through bridges (in principle they
-may be routed directly to the system's core logic), but for PCI Express devices
-they are in-band messages that have to pass through the PCI Express hierarchy,
-including the root port on the path from the device to the Root Complex.  Thus
-it was possible to introduce a mechanism by which a root port generates an
-interrupt whenever it receives a PME message from one of the devices below it.
-The PCI Express Requester ID of the device that sent the PME message is then
-recorded in one of the root port's configuration registers from where it may be
-read by the interrupt handler allowing the device to be identified.  [PME
-messages sent by PCI Express endpoints integrated with the Root Complex don't
-pass through root ports, but instead they cause a Root Complex Event Collector
-(if there is one) to generate interrupts.]
-
-In principle the native PCI Express PME signaling may also be used on ACPI-based
-systems along with the GPEs, but to use it the kernel has to ask the system's
-ACPI BIOS to release control of root port configuration registers.  The ACPI
-BIOS, however, is not required to allow the kernel to control these registers
-and if it doesn't do that, the kernel must not modify their contents.  Of course
-the native PCI Express PME signaling cannot be used by the kernel in that case.
-
-
-2. PCI Subsystem and Device Power Management
-============================================
-
-2.1. Device Power Management Callbacks
---------------------------------------
-The PCI Subsystem participates in the power management of PCI devices in a
-number of ways.  First of all, it provides an intermediate code layer between
-the device power management core (PM core) and PCI device drivers.
-Specifically, the pm field of the PCI subsystem's struct bus_type object,
-pci_bus_type, points to a struct dev_pm_ops object, pci_dev_pm_ops, containing
-pointers to several device power management callbacks:
-
-const struct dev_pm_ops pci_dev_pm_ops = {
-	.prepare = pci_pm_prepare,
-	.complete = pci_pm_complete,
-	.suspend = pci_pm_suspend,
-	.resume = pci_pm_resume,
-	.freeze = pci_pm_freeze,
-	.thaw = pci_pm_thaw,
-	.poweroff = pci_pm_poweroff,
-	.restore = pci_pm_restore,
-	.suspend_noirq = pci_pm_suspend_noirq,
-	.resume_noirq = pci_pm_resume_noirq,
-	.freeze_noirq = pci_pm_freeze_noirq,
-	.thaw_noirq = pci_pm_thaw_noirq,
-	.poweroff_noirq = pci_pm_poweroff_noirq,
-	.restore_noirq = pci_pm_restore_noirq,
-	.runtime_suspend = pci_pm_runtime_suspend,
-	.runtime_resume = pci_pm_runtime_resume,
-	.runtime_idle = pci_pm_runtime_idle,
-};
-
-These callbacks are executed by the PM core in various situations related to
-device power management and they, in turn, execute power management callbacks
-provided by PCI device drivers.  They also perform power management operations
-involving some standard configuration registers of PCI devices that device
-drivers need not know or care about.
-
-The structure representing a PCI device, struct pci_dev, contains several fields
-that these callbacks operate on:
-
-struct pci_dev {
-	...
-	pci_power_t     current_state;  /* Current operating state. */
-	int		pm_cap;		/* PM capability offset in the
-					   configuration space */
-	unsigned int	pme_support:5;	/* Bitmask of states from which PME#
-					   can be generated */
-	unsigned int	pme_interrupt:1;/* Is native PCIe PME signaling used? */
-	unsigned int	d1_support:1;	/* Low power state D1 is supported */
-	unsigned int	d2_support:1;	/* Low power state D2 is supported */
-	unsigned int	no_d1d2:1;	/* D1 and D2 are forbidden */
-	unsigned int	wakeup_prepared:1;  /* Device prepared for wake up */
-	unsigned int	d3_delay;	/* D3->D0 transition time in ms */
-	...
-};
-
-They also indirectly use some fields of the struct device that is embedded in
-struct pci_dev.
-
-2.2. Device Initialization
---------------------------
-The PCI subsystem's first task related to device power management is to
-prepare the device for power management and initialize the fields of struct
-pci_dev used for this purpose.  This happens in two functions defined in
-drivers/pci/pci.c, pci_pm_init() and platform_pci_wakeup_init().
-
-The first of these functions checks if the device supports native PCI PM
-and if that's the case the offset of its power management capability structure
-in the configuration space is stored in the pm_cap field of the device's struct
-pci_dev object.  Next, the function checks which PCI low-power states are
-supported by the device and from which low-power states the device can generate
-native PCI PMEs.  The power management fields of the device's struct pci_dev and
-the struct device embedded in it are updated accordingly and the generation of
-PMEs by the device is disabled.
-
-The second function checks if the device can be prepared to signal wakeup with
-the help of the platform firmware, such as the ACPI BIOS.  If that is the case,
-the function updates the wakeup fields in struct device embedded in the
-device's struct pci_dev and uses the firmware-provided method to prevent the
-device from signaling wakeup.
-
-At this point the device is ready for power management.  For driverless devices,
-however, this functionality is limited to a few basic operations carried out
-during system-wide transitions to a sleep state and back to the working state.
-
-2.3. Runtime Device Power Management
-------------------------------------
-The PCI subsystem plays a vital role in the runtime power management of PCI
-devices.  For this purpose it uses the general runtime power management
-(runtime PM) framework described in Documentation/power/runtime_pm.txt.
-Namely, it provides subsystem-level callbacks:
-
-	pci_pm_runtime_suspend()
-	pci_pm_runtime_resume()
-	pci_pm_runtime_idle()
-
-that are executed by the core runtime PM routines.  It also implements the
-entire mechanics necessary for handling runtime wakeup signals from PCI devices
-in low-power states, which at the time of this writing works for both the native
-PCI Express PME signaling and the ACPI GPE-based wakeup signaling described in
-Section 1.
-
-First, a PCI device is put into a low-power state, or suspended, with the help
-of pm_schedule_suspend() or pm_runtime_suspend() which for PCI devices call
-pci_pm_runtime_suspend() to do the actual job.  For this to work, the device's
-driver has to provide a pm->runtime_suspend() callback (see below), which is
-run by pci_pm_runtime_suspend() as the first action.  If the driver's callback
-returns successfully, the device's standard configuration registers are saved,
-the device is prepared to generate wakeup signals and, finally, it is put into
-the target low-power state.
-
-The low-power state to put the device into is the lowest-power (highest number)
-state from which it can signal wakeup.  The exact method of signaling wakeup is
-system-dependent and is determined by the PCI subsystem on the basis of the
-reported capabilities of the device and the platform firmware.  To prepare the
-device for signaling wakeup and put it into the selected low-power state, the
-PCI subsystem can use the platform firmware as well as the device's native PCI
-PM capabilities, if supported.
-
-It is expected that the device driver's pm->runtime_suspend() callback will
-not attempt to prepare the device for signaling wakeup or to put it into a
-low-power state.  The driver ought to leave these tasks to the PCI subsystem
-that has all of the information necessary to perform them.
-
-A suspended device is brought back into the "active" state, or resumed,
-with the help of pm_request_resume() or pm_runtime_resume() which both call
-pci_pm_runtime_resume() for PCI devices.  Again, this only works if the device's
-driver provides a pm->runtime_resume() callback (see below).  However, before
-the driver's callback is executed, pci_pm_runtime_resume() brings the device
-back into the full-power state, prevents it from signaling wakeup while in that
-state and restores its standard configuration registers.  Thus the driver's
-callback need not worry about the PCI-specific aspects of the device resume.
-
-Note that generally pci_pm_runtime_resume() may be called in two different
-situations.  First, it may be called at the request of the device's driver, for
-example if there are some data for it to process.  Second, it may be called
-as a result of a wakeup signal from the device itself (this sometimes is
-referred to as "remote wakeup").  Of course, for this purpose the wakeup signal
-is handled in one of the ways described in Section 1 and finally converted into
-a notification for the PCI subsystem after the source device has been
-identified.
-
-The pci_pm_runtime_idle() function, called for PCI devices by pm_runtime_idle()
-and pm_request_idle(), executes the device driver's pm->runtime_idle()
-callback, if defined, and if that callback doesn't return error code (or is not
-present at all), suspends the device with the help of pm_runtime_suspend().
-Sometimes pci_pm_runtime_idle() is called automatically by the PM core (for
-example, it is called right after the device has just been resumed), in which
-cases it is expected to suspend the device if that makes sense.  Usually,
-however, the PCI subsystem doesn't really know if the device really can be
-suspended, so it lets the device's driver decide by running its
-pm->runtime_idle() callback.
-
-2.4. System-Wide Power Transitions
-----------------------------------
-There are a few different types of system-wide power transitions, described in
-Documentation/driver-api/pm/devices.rst.  Each of them requires devices to be handled
-in a specific way and the PM core executes subsystem-level power management
-callbacks for this purpose.  They are executed in phases such that each phase
-involves executing the same subsystem-level callback for every device belonging
-to the given subsystem before the next phase begins.  These phases always run
-after tasks have been frozen.
-
-2.4.1. System Suspend
-
-When the system is going into a sleep state in which the contents of memory will
-be preserved, such as one of the ACPI sleep states S1-S3, the phases are:
-
-	prepare, suspend, suspend_noirq.
-
-The following PCI bus type's callbacks, respectively, are used in these phases:
-
-	pci_pm_prepare()
-	pci_pm_suspend()
-	pci_pm_suspend_noirq()
-
-The pci_pm_prepare() routine first puts the device into the "fully functional"
-state with the help of pm_runtime_resume().  Then, it executes the device
-driver's pm->prepare() callback if defined (i.e. if the driver's struct
-dev_pm_ops object is present and the prepare pointer in that object is valid).
-
-The pci_pm_suspend() routine first checks if the device's driver implements
-legacy PCI suspend routines (see Section 3), in which case the driver's legacy
-suspend callback is executed, if present, and its result is returned.  Next, if
-the device's driver doesn't provide a struct dev_pm_ops object (containing
-pointers to the driver's callbacks), pci_pm_default_suspend() is called, which
-simply turns off the device's bus master capability and runs
-pcibios_disable_device() to disable it, unless the device is a bridge (PCI
-bridges are ignored by this routine).  Next, the device driver's pm->suspend()
-callback is executed, if defined, and its result is returned if it fails.
-Finally, pci_fixup_device() is called to apply hardware suspend quirks related
-to the device if necessary.
-
-Note that the suspend phase is carried out asynchronously for PCI devices, so
-the pci_pm_suspend() callback may be executed in parallel for any pair of PCI
-devices that don't depend on each other in a known way (i.e. none of the paths
-in the device tree from the root bridge to a leaf device contains both of them).
-
-The pci_pm_suspend_noirq() routine is executed after suspend_device_irqs() has
-been called, which means that the device driver's interrupt handler won't be
-invoked while this routine is running.  It first checks if the device's driver
-implements legacy PCI suspends routines (Section 3), in which case the legacy
-late suspend routine is called and its result is returned (the standard
-configuration registers of the device are saved if the driver's callback hasn't
-done that).  Second, if the device driver's struct dev_pm_ops object is not
-present, the device's standard configuration registers are saved and the routine
-returns success.  Otherwise the device driver's pm->suspend_noirq() callback is
-executed, if present, and its result is returned if it fails.  Next, if the
-device's standard configuration registers haven't been saved yet (one of the
-device driver's callbacks executed before might do that), pci_pm_suspend_noirq()
-saves them, prepares the device to signal wakeup (if necessary) and puts it into
-a low-power state.
-
-The low-power state to put the device into is the lowest-power (highest number)
-state from which it can signal wakeup while the system is in the target sleep
-state.  Just like in the runtime PM case described above, the mechanism of
-signaling wakeup is system-dependent and determined by the PCI subsystem, which
-is also responsible for preparing the device to signal wakeup from the system's
-target sleep state as appropriate.
-
-PCI device drivers (that don't implement legacy power management callbacks) are
-generally not expected to prepare devices for signaling wakeup or to put them
-into low-power states.  However, if one of the driver's suspend callbacks
-(pm->suspend() or pm->suspend_noirq()) saves the device's standard configuration
-registers, pci_pm_suspend_noirq() will assume that the device has been prepared
-to signal wakeup and put into a low-power state by the driver (the driver is
-then assumed to have used the helper functions provided by the PCI subsystem for
-this purpose).  PCI device drivers are not encouraged to do that, but in some
-rare cases doing that in the driver may be the optimum approach.
-
-2.4.2. System Resume
-
-When the system is undergoing a transition from a sleep state in which the
-contents of memory have been preserved, such as one of the ACPI sleep states
-S1-S3, into the working state (ACPI S0), the phases are:
-
-	resume_noirq, resume, complete.
-
-The following PCI bus type's callbacks, respectively, are executed in these
-phases:
-
-	pci_pm_resume_noirq()
-	pci_pm_resume()
-	pci_pm_complete()
-
-The pci_pm_resume_noirq() routine first puts the device into the full-power
-state, restores its standard configuration registers and applies early resume
-hardware quirks related to the device, if necessary.  This is done
-unconditionally, regardless of whether or not the device's driver implements
-legacy PCI power management callbacks (this way all PCI devices are in the
-full-power state and their standard configuration registers have been restored
-when their interrupt handlers are invoked for the first time during resume,
-which allows the kernel to avoid problems with the handling of shared interrupts
-by drivers whose devices are still suspended).  If legacy PCI power management
-callbacks (see Section 3) are implemented by the device's driver, the legacy
-early resume callback is executed and its result is returned.  Otherwise, the
-device driver's pm->resume_noirq() callback is executed, if defined, and its
-result is returned.
-
-The pci_pm_resume() routine first checks if the device's standard configuration
-registers have been restored and restores them if that's not the case (this
-only is necessary in the error path during a failing suspend).  Next, resume
-hardware quirks related to the device are applied, if necessary, and if the
-device's driver implements legacy PCI power management callbacks (see
-Section 3), the driver's legacy resume callback is executed and its result is
-returned.  Otherwise, the device's wakeup signaling mechanisms are blocked and
-its driver's pm->resume() callback is executed, if defined (the callback's
-result is then returned).
-
-The resume phase is carried out asynchronously for PCI devices, like the
-suspend phase described above, which means that if two PCI devices don't depend
-on each other in a known way, the pci_pm_resume() routine may be executed for
-the both of them in parallel.
-
-The pci_pm_complete() routine only executes the device driver's pm->complete()
-callback, if defined.
-
-2.4.3. System Hibernation
-
-System hibernation is more complicated than system suspend, because it requires
-a system image to be created and written into a persistent storage medium.  The
-image is created atomically and all devices are quiesced, or frozen, before that
-happens.
-
-The freezing of devices is carried out after enough memory has been freed (at
-the time of this writing the image creation requires at least 50% of system RAM
-to be free) in the following three phases:
-
-	prepare, freeze, freeze_noirq
-
-that correspond to the PCI bus type's callbacks:
-
-	pci_pm_prepare()
-	pci_pm_freeze()
-	pci_pm_freeze_noirq()
-
-This means that the prepare phase is exactly the same as for system suspend.
-The other two phases, however, are different.
-
-The pci_pm_freeze() routine is quite similar to pci_pm_suspend(), but it runs
-the device driver's pm->freeze() callback, if defined, instead of pm->suspend(),
-and it doesn't apply the suspend-related hardware quirks.  It is executed
-asynchronously for different PCI devices that don't depend on each other in a
-known way.
-
-The pci_pm_freeze_noirq() routine, in turn, is similar to
-pci_pm_suspend_noirq(), but it calls the device driver's pm->freeze_noirq()
-routine instead of pm->suspend_noirq().  It also doesn't attempt to prepare the
-device for signaling wakeup and put it into a low-power state.  Still, it saves
-the device's standard configuration registers if they haven't been saved by one
-of the driver's callbacks.
-
-Once the image has been created, it has to be saved.  However, at this point all
-devices are frozen and they cannot handle I/O, while their ability to handle
-I/O is obviously necessary for the image saving.  Thus they have to be brought
-back to the fully functional state and this is done in the following phases:
-
-	thaw_noirq, thaw, complete
-
-using the following PCI bus type's callbacks:
-
-	pci_pm_thaw_noirq()
-	pci_pm_thaw()
-	pci_pm_complete()
-
-respectively.
-
-The first of them, pci_pm_thaw_noirq(), is analogous to pci_pm_resume_noirq(),
-but it doesn't put the device into the full power state and doesn't attempt to
-restore its standard configuration registers.  It also executes the device
-driver's pm->thaw_noirq() callback, if defined, instead of pm->resume_noirq().
-
-The pci_pm_thaw() routine is similar to pci_pm_resume(), but it runs the device
-driver's pm->thaw() callback instead of pm->resume().  It is executed
-asynchronously for different PCI devices that don't depend on each other in a
-known way.
-
-The complete phase it the same as for system resume.
-
-After saving the image, devices need to be powered down before the system can
-enter the target sleep state (ACPI S4 for ACPI-based systems).  This is done in
-three phases:
-
-	prepare, poweroff, poweroff_noirq
-
-where the prepare phase is exactly the same as for system suspend.  The other
-two phases are analogous to the suspend and suspend_noirq phases, respectively.
-The PCI subsystem-level callbacks they correspond to
-
-	pci_pm_poweroff()
-	pci_pm_poweroff_noirq()
-
-work in analogy with pci_pm_suspend() and pci_pm_poweroff_noirq(), respectively,
-although they don't attempt to save the device's standard configuration
-registers.
-
-2.4.4. System Restore
-
-System restore requires a hibernation image to be loaded into memory and the
-pre-hibernation memory contents to be restored before the pre-hibernation system
-activity can be resumed.
-
-As described in Documentation/driver-api/pm/devices.rst, the hibernation image is loaded
-into memory by a fresh instance of the kernel, called the boot kernel, which in
-turn is loaded and run by a boot loader in the usual way.  After the boot kernel
-has loaded the image, it needs to replace its own code and data with the code
-and data of the "hibernated" kernel stored within the image, called the image
-kernel.  For this purpose all devices are frozen just like before creating
-the image during hibernation, in the
-
-	prepare, freeze, freeze_noirq
-
-phases described above.  However, the devices affected by these phases are only
-those having drivers in the boot kernel; other devices will still be in whatever
-state the boot loader left them.
-
-Should the restoration of the pre-hibernation memory contents fail, the boot
-kernel would go through the "thawing" procedure described above, using the
-thaw_noirq, thaw, and complete phases (that will only affect the devices having
-drivers in the boot kernel), and then continue running normally.
-
-If the pre-hibernation memory contents are restored successfully, which is the
-usual situation, control is passed to the image kernel, which then becomes
-responsible for bringing the system back to the working state.  To achieve this,
-it must restore the devices' pre-hibernation functionality, which is done much
-like waking up from the memory sleep state, although it involves different
-phases:
-
-	restore_noirq, restore, complete
-
-The first two of these are analogous to the resume_noirq and resume phases
-described above, respectively, and correspond to the following PCI subsystem
-callbacks:
-
-	pci_pm_restore_noirq()
-	pci_pm_restore()
-
-These callbacks work in analogy with pci_pm_resume_noirq() and pci_pm_resume(),
-respectively, but they execute the device driver's pm->restore_noirq() and
-pm->restore() callbacks, if available.
-
-The complete phase is carried out in exactly the same way as during system
-resume.
-
-
-3. PCI Device Drivers and Power Management
-==========================================
-
-3.1. Power Management Callbacks
--------------------------------
-PCI device drivers participate in power management by providing callbacks to be
-executed by the PCI subsystem's power management routines described above and by
-controlling the runtime power management of their devices.
-
-At the time of this writing there are two ways to define power management
-callbacks for a PCI device driver, the recommended one, based on using a
-dev_pm_ops structure described in Documentation/driver-api/pm/devices.rst, and the
-"legacy" one, in which the .suspend(), .suspend_late(), .resume_early(), and
-.resume() callbacks from struct pci_driver are used.  The legacy approach,
-however, doesn't allow one to define runtime power management callbacks and is
-not really suitable for any new drivers.  Therefore it is not covered by this
-document (refer to the source code to learn more about it).
-
-It is recommended that all PCI device drivers define a struct dev_pm_ops object
-containing pointers to power management (PM) callbacks that will be executed by
-the PCI subsystem's PM routines in various circumstances.  A pointer to the
-driver's struct dev_pm_ops object has to be assigned to the driver.pm field in
-its struct pci_driver object.  Once that has happened, the "legacy" PM callbacks
-in struct pci_driver are ignored (even if they are not NULL).
-
-The PM callbacks in struct dev_pm_ops are not mandatory and if they are not
-defined (i.e. the respective fields of struct dev_pm_ops are unset) the PCI
-subsystem will handle the device in a simplified default manner.  If they are
-defined, though, they are expected to behave as described in the following
-subsections.
-
-3.1.1. prepare()
-
-The prepare() callback is executed during system suspend, during hibernation
-(when a hibernation image is about to be created), during power-off after
-saving a hibernation image and during system restore, when a hibernation image
-has just been loaded into memory.
-
-This callback is only necessary if the driver's device has children that in
-general may be registered at any time.  In that case the role of the prepare()
-callback is to prevent new children of the device from being registered until
-one of the resume_noirq(), thaw_noirq(), or restore_noirq() callbacks is run.
-
-In addition to that the prepare() callback may carry out some operations
-preparing the device to be suspended, although it should not allocate memory
-(if additional memory is required to suspend the device, it has to be
-preallocated earlier, for example in a suspend/hibernate notifier as described
-in Documentation/driver-api/pm/notifiers.rst).
-
-3.1.2. suspend()
-
-The suspend() callback is only executed during system suspend, after prepare()
-callbacks have been executed for all devices in the system.
-
-This callback is expected to quiesce the device and prepare it to be put into a
-low-power state by the PCI subsystem.  It is not required (in fact it even is
-not recommended) that a PCI driver's suspend() callback save the standard
-configuration registers of the device, prepare it for waking up the system, or
-put it into a low-power state.  All of these operations can very well be taken
-care of by the PCI subsystem, without the driver's participation.
-
-However, in some rare case it is convenient to carry out these operations in
-a PCI driver.  Then, pci_save_state(), pci_prepare_to_sleep(), and
-pci_set_power_state() should be used to save the device's standard configuration
-registers, to prepare it for system wakeup (if necessary), and to put it into a
-low-power state, respectively.  Moreover, if the driver calls pci_save_state(),
-the PCI subsystem will not execute either pci_prepare_to_sleep(), or
-pci_set_power_state() for its device, so the driver is then responsible for
-handling the device as appropriate.
-
-While the suspend() callback is being executed, the driver's interrupt handler
-can be invoked to handle an interrupt from the device, so all suspend-related
-operations relying on the driver's ability to handle interrupts should be
-carried out in this callback.
-
-3.1.3. suspend_noirq()
-
-The suspend_noirq() callback is only executed during system suspend, after
-suspend() callbacks have been executed for all devices in the system and
-after device interrupts have been disabled by the PM core.
-
-The difference between suspend_noirq() and suspend() is that the driver's
-interrupt handler will not be invoked while suspend_noirq() is running.  Thus
-suspend_noirq() can carry out operations that would cause race conditions to
-arise if they were performed in suspend().
-
-3.1.4. freeze()
-
-The freeze() callback is hibernation-specific and is executed in two situations,
-during hibernation, after prepare() callbacks have been executed for all devices
-in preparation for the creation of a system image, and during restore,
-after a system image has been loaded into memory from persistent storage and the
-prepare() callbacks have been executed for all devices.
-
-The role of this callback is analogous to the role of the suspend() callback
-described above.  In fact, they only need to be different in the rare cases when
-the driver takes the responsibility for putting the device into a low-power
-state.
-
-In that cases the freeze() callback should not prepare the device system wakeup
-or put it into a low-power state.  Still, either it or freeze_noirq() should
-save the device's standard configuration registers using pci_save_state().
-
-3.1.5. freeze_noirq()
-
-The freeze_noirq() callback is hibernation-specific.  It is executed during
-hibernation, after prepare() and freeze() callbacks have been executed for all
-devices in preparation for the creation of a system image, and during restore,
-after a system image has been loaded into memory and after prepare() and
-freeze() callbacks have been executed for all devices.  It is always executed
-after device interrupts have been disabled by the PM core.
-
-The role of this callback is analogous to the role of the suspend_noirq()
-callback described above and it very rarely is necessary to define
-freeze_noirq().
-
-The difference between freeze_noirq() and freeze() is analogous to the
-difference between suspend_noirq() and suspend().
-
-3.1.6. poweroff()
-
-The poweroff() callback is hibernation-specific.  It is executed when the system
-is about to be powered off after saving a hibernation image to a persistent
-storage.  prepare() callbacks are executed for all devices before poweroff() is
-called.
-
-The role of this callback is analogous to the role of the suspend() and freeze()
-callbacks described above, although it does not need to save the contents of
-the device's registers.  In particular, if the driver wants to put the device
-into a low-power state itself instead of allowing the PCI subsystem to do that,
-the poweroff() callback should use pci_prepare_to_sleep() and
-pci_set_power_state() to prepare the device for system wakeup and to put it
-into a low-power state, respectively, but it need not save the device's standard
-configuration registers.
-
-3.1.7. poweroff_noirq()
-
-The poweroff_noirq() callback is hibernation-specific.  It is executed after
-poweroff() callbacks have been executed for all devices in the system.
-
-The role of this callback is analogous to the role of the suspend_noirq() and
-freeze_noirq() callbacks described above, but it does not need to save the
-contents of the device's registers.
-
-The difference between poweroff_noirq() and poweroff() is analogous to the
-difference between suspend_noirq() and suspend().
-
-3.1.8. resume_noirq()
-
-The resume_noirq() callback is only executed during system resume, after the
-PM core has enabled the non-boot CPUs.  The driver's interrupt handler will not
-be invoked while resume_noirq() is running, so this callback can carry out
-operations that might race with the interrupt handler.
-
-Since the PCI subsystem unconditionally puts all devices into the full power
-state in the resume_noirq phase of system resume and restores their standard
-configuration registers, resume_noirq() is usually not necessary.  In general
-it should only be used for performing operations that would lead to race
-conditions if carried out by resume().
-
-3.1.9. resume()
-
-The resume() callback is only executed during system resume, after
-resume_noirq() callbacks have been executed for all devices in the system and
-device interrupts have been enabled by the PM core.
-
-This callback is responsible for restoring the pre-suspend configuration of the
-device and bringing it back to the fully functional state.  The device should be
-able to process I/O in a usual way after resume() has returned.
-
-3.1.10. thaw_noirq()
-
-The thaw_noirq() callback is hibernation-specific.  It is executed after a
-system image has been created and the non-boot CPUs have been enabled by the PM
-core, in the thaw_noirq phase of hibernation.  It also may be executed if the
-loading of a hibernation image fails during system restore (it is then executed
-after enabling the non-boot CPUs).  The driver's interrupt handler will not be
-invoked while thaw_noirq() is running.
-
-The role of this callback is analogous to the role of resume_noirq().  The
-difference between these two callbacks is that thaw_noirq() is executed after
-freeze() and freeze_noirq(), so in general it does not need to modify the
-contents of the device's registers.
-
-3.1.11. thaw()
-
-The thaw() callback is hibernation-specific.  It is executed after thaw_noirq()
-callbacks have been executed for all devices in the system and after device
-interrupts have been enabled by the PM core.
-
-This callback is responsible for restoring the pre-freeze configuration of
-the device, so that it will work in a usual way after thaw() has returned.
-
-3.1.12. restore_noirq()
-
-The restore_noirq() callback is hibernation-specific.  It is executed in the
-restore_noirq phase of hibernation, when the boot kernel has passed control to
-the image kernel and the non-boot CPUs have been enabled by the image kernel's
-PM core.
-
-This callback is analogous to resume_noirq() with the exception that it cannot
-make any assumption on the previous state of the device, even if the BIOS (or
-generally the platform firmware) is known to preserve that state over a
-suspend-resume cycle.
-
-For the vast majority of PCI device drivers there is no difference between
-resume_noirq() and restore_noirq().
-
-3.1.13. restore()
-
-The restore() callback is hibernation-specific.  It is executed after
-restore_noirq() callbacks have been executed for all devices in the system and
-after the PM core has enabled device drivers' interrupt handlers to be invoked.
-
-This callback is analogous to resume(), just like restore_noirq() is analogous
-to resume_noirq().  Consequently, the difference between restore_noirq() and
-restore() is analogous to the difference between resume_noirq() and resume().
-
-For the vast majority of PCI device drivers there is no difference between
-resume() and restore().
-
-3.1.14. complete()
-
-The complete() callback is executed in the following situations:
-  - during system resume, after resume() callbacks have been executed for all
-    devices,
-  - during hibernation, before saving the system image, after thaw() callbacks
-    have been executed for all devices,
-  - during system restore, when the system is going back to its pre-hibernation
-    state, after restore() callbacks have been executed for all devices.
-It also may be executed if the loading of a hibernation image into memory fails
-(in that case it is run after thaw() callbacks have been executed for all
-devices that have drivers in the boot kernel).
-
-This callback is entirely optional, although it may be necessary if the
-prepare() callback performs operations that need to be reversed.
-
-3.1.15. runtime_suspend()
-
-The runtime_suspend() callback is specific to device runtime power management
-(runtime PM).  It is executed by the PM core's runtime PM framework when the
-device is about to be suspended (i.e. quiesced and put into a low-power state)
-at run time.
-
-This callback is responsible for freezing the device and preparing it to be
-put into a low-power state, but it must allow the PCI subsystem to perform all
-of the PCI-specific actions necessary for suspending the device.
-
-3.1.16. runtime_resume()
-
-The runtime_resume() callback is specific to device runtime PM.  It is executed
-by the PM core's runtime PM framework when the device is about to be resumed
-(i.e. put into the full-power state and programmed to process I/O normally) at
-run time.
-
-This callback is responsible for restoring the normal functionality of the
-device after it has been put into the full-power state by the PCI subsystem.
-The device is expected to be able to process I/O in the usual way after
-runtime_resume() has returned.
-
-3.1.17. runtime_idle()
-
-The runtime_idle() callback is specific to device runtime PM.  It is executed
-by the PM core's runtime PM framework whenever it may be desirable to suspend
-the device according to the PM core's information.  In particular, it is
-automatically executed right after runtime_resume() has returned in case the
-resume of the device has happened as a result of a spurious event.
-
-This callback is optional, but if it is not implemented or if it returns 0, the
-PCI subsystem will call pm_runtime_suspend() for the device, which in turn will
-cause the driver's runtime_suspend() callback to be executed.
-
-3.1.18. Pointing Multiple Callback Pointers to One Routine
-
-Although in principle each of the callbacks described in the previous
-subsections can be defined as a separate function, it often is convenient to
-point two or more members of struct dev_pm_ops to the same routine.  There are
-a few convenience macros that can be used for this purpose.
-
-The SIMPLE_DEV_PM_OPS macro declares a struct dev_pm_ops object with one
-suspend routine pointed to by the .suspend(), .freeze(), and .poweroff()
-members and one resume routine pointed to by the .resume(), .thaw(), and
-.restore() members.  The other function pointers in this struct dev_pm_ops are
-unset.
-
-The UNIVERSAL_DEV_PM_OPS macro is similar to SIMPLE_DEV_PM_OPS, but it
-additionally sets the .runtime_resume() pointer to the same value as
-.resume() (and .thaw(), and .restore()) and the .runtime_suspend() pointer to
-the same value as .suspend() (and .freeze() and .poweroff()).
-
-The SET_SYSTEM_SLEEP_PM_OPS can be used inside of a declaration of struct
-dev_pm_ops to indicate that one suspend routine is to be pointed to by the
-.suspend(), .freeze(), and .poweroff() members and one resume routine is to
-be pointed to by the .resume(), .thaw(), and .restore() members.
-
-3.1.19. Driver Flags for Power Management
-
-The PM core allows device drivers to set flags that influence the handling of
-power management for the devices by the core itself and by middle layer code
-including the PCI bus type.  The flags should be set once at the driver probe
-time with the help of the dev_pm_set_driver_flags() function and they should not
-be updated directly afterwards.
-
-The DPM_FLAG_NEVER_SKIP flag prevents the PM core from using the direct-complete
-mechanism allowing device suspend/resume callbacks to be skipped if the device
-is in runtime suspend when the system suspend starts.  That also affects all of
-the ancestors of the device, so this flag should only be used if absolutely
-necessary.
-
-The DPM_FLAG_SMART_PREPARE flag instructs the PCI bus type to only return a
-positive value from pci_pm_prepare() if the ->prepare callback provided by the
-driver of the device returns a positive value.  That allows the driver to opt
-out from using the direct-complete mechanism dynamically.
-
-The DPM_FLAG_SMART_SUSPEND flag tells the PCI bus type that from the driver's
-perspective the device can be safely left in runtime suspend during system
-suspend.  That causes pci_pm_suspend(), pci_pm_freeze() and pci_pm_poweroff()
-to skip resuming the device from runtime suspend unless there are PCI-specific
-reasons for doing that.  Also, it causes pci_pm_suspend_late/noirq(),
-pci_pm_freeze_late/noirq() and pci_pm_poweroff_late/noirq() to return early
-if the device remains in runtime suspend in the beginning of the "late" phase
-of the system-wide transition under way.  Moreover, if the device is in
-runtime suspend in pci_pm_resume_noirq() or pci_pm_restore_noirq(), its runtime
-power management status will be changed to "active" (as it is going to be put
-into D0 going forward), but if it is in runtime suspend in pci_pm_thaw_noirq(),
-the function will set the power.direct_complete flag for it (to make the PM core
-skip the subsequent "thaw" callbacks for it) and return.
-
-Setting the DPM_FLAG_LEAVE_SUSPENDED flag means that the driver prefers the
-device to be left in suspend after system-wide transitions to the working state.
-This flag is checked by the PM core, but the PCI bus type informs the PM core
-which devices may be left in suspend from its perspective (that happens during
-the "noirq" phase of system-wide suspend and analogous transitions) and next it
-uses the dev_pm_may_skip_resume() helper to decide whether or not to return from
-pci_pm_resume_noirq() early, as the PM core will skip the remaining resume
-callbacks for the device during the transition under way and will set its
-runtime PM status to "suspended" if dev_pm_may_skip_resume() returns "true" for
-it.
-
-3.2. Device Runtime Power Management
-------------------------------------
-In addition to providing device power management callbacks PCI device drivers
-are responsible for controlling the runtime power management (runtime PM) of
-their devices.
-
-The PCI device runtime PM is optional, but it is recommended that PCI device
-drivers implement it at least in the cases where there is a reliable way of
-verifying that the device is not used (like when the network cable is detached
-from an Ethernet adapter or there are no devices attached to a USB controller).
-
-To support the PCI runtime PM the driver first needs to implement the
-runtime_suspend() and runtime_resume() callbacks.  It also may need to implement
-the runtime_idle() callback to prevent the device from being suspended again
-every time right after the runtime_resume() callback has returned
-(alternatively, the runtime_suspend() callback will have to check if the
-device should really be suspended and return -EAGAIN if that is not the case).
-
-The runtime PM of PCI devices is enabled by default by the PCI core.  PCI
-device drivers do not need to enable it and should not attempt to do so.
-However, it is blocked by pci_pm_init() that runs the pm_runtime_forbid()
-helper function.  In addition to that, the runtime PM usage counter of
-each PCI device is incremented by local_pci_probe() before executing the
-probe callback provided by the device's driver.
-
-If a PCI driver implements the runtime PM callbacks and intends to use the
-runtime PM framework provided by the PM core and the PCI subsystem, it needs
-to decrement the device's runtime PM usage counter in its probe callback
-function.  If it doesn't do that, the counter will always be different from
-zero for the device and it will never be runtime-suspended.  The simplest
-way to do that is by calling pm_runtime_put_noidle(), but if the driver
-wants to schedule an autosuspend right away, for example, it may call
-pm_runtime_put_autosuspend() instead for this purpose.  Generally, it
-just needs to call a function that decrements the devices usage counter
-from its probe routine to make runtime PM work for the device.
-
-It is important to remember that the driver's runtime_suspend() callback
-may be executed right after the usage counter has been decremented, because
-user space may already have caused the pm_runtime_allow() helper function
-unblocking the runtime PM of the device to run via sysfs, so the driver must
-be prepared to cope with that.
-
-The driver itself should not call pm_runtime_allow(), though.  Instead, it
-should let user space or some platform-specific code do that (user space can
-do it via sysfs as stated above), but it must be prepared to handle the
-runtime PM of the device correctly as soon as pm_runtime_allow() is called
-(which may happen at any time, even before the driver is loaded).
-
-When the driver's remove callback runs, it has to balance the decrementation
-of the device's runtime PM usage counter at the probe time.  For this reason,
-if it has decremented the counter in its probe callback, it must run
-pm_runtime_get_noresume() in its remove callback.  [Since the core carries
-out a runtime resume of the device and bumps up the device's usage counter
-before running the driver's remove callback, the runtime PM of the device
-is effectively disabled for the duration of the remove execution and all
-runtime PM helper functions incrementing the device's usage counter are
-then effectively equivalent to pm_runtime_get_noresume().]
-
-The runtime PM framework works by processing requests to suspend or resume
-devices, or to check if they are idle (in which cases it is reasonable to
-subsequently request that they be suspended).  These requests are represented
-by work items put into the power management workqueue, pm_wq.  Although there
-are a few situations in which power management requests are automatically
-queued by the PM core (for example, after processing a request to resume a
-device the PM core automatically queues a request to check if the device is
-idle), device drivers are generally responsible for queuing power management
-requests for their devices.  For this purpose they should use the runtime PM
-helper functions provided by the PM core, discussed in
-Documentation/power/runtime_pm.txt.
-
-Devices can also be suspended and resumed synchronously, without placing a
-request into pm_wq.  In the majority of cases this also is done by their
-drivers that use helper functions provided by the PM core for this purpose.
-
-For more information on the runtime PM of devices refer to
-Documentation/power/runtime_pm.txt.
-
-
-4. Resources
-============
-
-PCI Local Bus Specification, Rev. 3.0
-PCI Bus Power Management Interface Specification, Rev. 1.2
-Advanced Configuration and Power Interface (ACPI) Specification, Rev. 3.0b
-PCI Express Base Specification, Rev. 2.0
-Documentation/driver-api/pm/devices.rst
-Documentation/power/runtime_pm.txt
diff --git a/Documentation/power/pm_qos_interface.rst b/Documentation/power/pm_qos_interface.rst
new file mode 100644
index 000000000000..945fc6d760c9
--- /dev/null
+++ b/Documentation/power/pm_qos_interface.rst
@@ -0,0 +1,225 @@
+===============================
+PM Quality Of Service Interface
+===============================
+
+This interface provides a kernel and user mode interface for registering
+performance expectations by drivers, subsystems and user space applications on
+one of the parameters.
+
+Two different PM QoS frameworks are available:
+1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput,
+memory_bandwidth.
+2. the per-device PM QoS framework provides the API to manage the per-device latency
+constraints and PM QoS flags.
+
+Each parameters have defined units:
+
+ * latency: usec
+ * timeout: usec
+ * throughput: kbs (kilo bit / sec)
+ * memory bandwidth: mbs (mega bit / sec)
+
+
+1. PM QoS framework
+===================
+
+The infrastructure exposes multiple misc device nodes one per implemented
+parameter.  The set of parameters implement is defined by pm_qos_power_init()
+and pm_qos_params.h.  This is done because having the available parameters
+being runtime configurable or changeable from a driver was seen as too easy to
+abuse.
+
+For each parameter a list of performance requests is maintained along with
+an aggregated target value.  The aggregated target value is updated with
+changes to the request list or elements of the list.  Typically the
+aggregated target value is simply the max or min of the request values held
+in the parameter list elements.
+Note: the aggregated target value is implemented as an atomic variable so that
+reading the aggregated value does not require any locking mechanism.
+
+
+From kernel mode the use of this interface is simple:
+
+void pm_qos_add_request(handle, param_class, target_value):
+  Will insert an element into the list for that identified PM QoS class with the
+  target value.  Upon change to this list the new target is recomputed and any
+  registered notifiers are called only if the target value is now different.
+  Clients of pm_qos need to save the returned handle for future use in other
+  pm_qos API functions.
+
+void pm_qos_update_request(handle, new_target_value):
+  Will update the list element pointed to by the handle with the new target value
+  and recompute the new aggregated target, calling the notification tree if the
+  target is changed.
+
+void pm_qos_remove_request(handle):
+  Will remove the element.  After removal it will update the aggregate target and
+  call the notification tree if the target was changed as a result of removing
+  the request.
+
+int pm_qos_request(param_class):
+  Returns the aggregated value for a given PM QoS class.
+
+int pm_qos_request_active(handle):
+  Returns if the request is still active, i.e. it has not been removed from a
+  PM QoS class constraints list.
+
+int pm_qos_add_notifier(param_class, notifier):
+  Adds a notification callback function to the PM QoS class. The callback is
+  called when the aggregated value for the PM QoS class is changed.
+
+int pm_qos_remove_notifier(int param_class, notifier):
+  Removes the notification callback function for the PM QoS class.
+
+
+From user mode:
+
+Only processes can register a pm_qos request.  To provide for automatic
+cleanup of a process, the interface requires the process to register its
+parameter requests in the following way:
+
+To register the default pm_qos target for the specific parameter, the process
+must open one of /dev/[cpu_dma_latency, network_latency, network_throughput]
+
+As long as the device node is held open that process has a registered
+request on the parameter.
+
+To change the requested target value the process needs to write an s32 value to
+the open device node.  Alternatively the user mode program could write a hex
+string for the value using 10 char long format e.g. "0x12345678".  This
+translates to a pm_qos_update_request call.
+
+To remove the user mode request for a target value simply close the device
+node.
+
+
+2. PM QoS per-device latency and flags framework
+================================================
+
+For each device, there are three lists of PM QoS requests. Two of them are
+maintained along with the aggregated targets of resume latency and active
+state latency tolerance (in microseconds) and the third one is for PM QoS flags.
+Values are updated in response to changes of the request list.
+
+The target values of resume latency and active state latency tolerance are
+simply the minimum of the request values held in the parameter list elements.
+The PM QoS flags aggregate value is a gather (bitwise OR) of all list elements'
+values.  One device PM QoS flag is defined currently: PM_QOS_FLAG_NO_POWER_OFF.
+
+Note: The aggregated target values are implemented in such a way that reading
+the aggregated value does not require any locking mechanism.
+
+
+From kernel mode the use of this interface is the following:
+
+int dev_pm_qos_add_request(device, handle, type, value):
+  Will insert an element into the list for that identified device with the
+  target value.  Upon change to this list the new target is recomputed and any
+  registered notifiers are called only if the target value is now different.
+  Clients of dev_pm_qos need to save the handle for future use in other
+  dev_pm_qos API functions.
+
+int dev_pm_qos_update_request(handle, new_value):
+  Will update the list element pointed to by the handle with the new target
+  value and recompute the new aggregated target, calling the notification
+  trees if the target is changed.
+
+int dev_pm_qos_remove_request(handle):
+  Will remove the element.  After removal it will update the aggregate target
+  and call the notification trees if the target was changed as a result of
+  removing the request.
+
+s32 dev_pm_qos_read_value(device):
+  Returns the aggregated value for a given device's constraints list.
+
+enum pm_qos_flags_status dev_pm_qos_flags(device, mask)
+  Check PM QoS flags of the given device against the given mask of flags.
+  The meaning of the return values is as follows:
+
+	PM_QOS_FLAGS_ALL:
+		All flags from the mask are set
+	PM_QOS_FLAGS_SOME:
+		Some flags from the mask are set
+	PM_QOS_FLAGS_NONE:
+		No flags from the mask are set
+	PM_QOS_FLAGS_UNDEFINED:
+		The device's PM QoS structure has not been initialized
+		or the list of requests is empty.
+
+int dev_pm_qos_add_ancestor_request(dev, handle, type, value)
+  Add a PM QoS request for the first direct ancestor of the given device whose
+  power.ignore_children flag is unset (for DEV_PM_QOS_RESUME_LATENCY requests)
+  or whose power.set_latency_tolerance callback pointer is not NULL (for
+  DEV_PM_QOS_LATENCY_TOLERANCE requests).
+
+int dev_pm_qos_expose_latency_limit(device, value)
+  Add a request to the device's PM QoS list of resume latency constraints and
+  create a sysfs attribute pm_qos_resume_latency_us under the device's power
+  directory allowing user space to manipulate that request.
+
+void dev_pm_qos_hide_latency_limit(device)
+  Drop the request added by dev_pm_qos_expose_latency_limit() from the device's
+  PM QoS list of resume latency constraints and remove sysfs attribute
+  pm_qos_resume_latency_us from the device's power directory.
+
+int dev_pm_qos_expose_flags(device, value)
+  Add a request to the device's PM QoS list of flags and create sysfs attribute
+  pm_qos_no_power_off under the device's power directory allowing user space to
+  change the value of the PM_QOS_FLAG_NO_POWER_OFF flag.
+
+void dev_pm_qos_hide_flags(device)
+  Drop the request added by dev_pm_qos_expose_flags() from the device's PM QoS list
+  of flags and remove sysfs attribute pm_qos_no_power_off from the device's power
+  directory.
+
+Notification mechanisms:
+
+The per-device PM QoS framework has a per-device notification tree.
+
+int dev_pm_qos_add_notifier(device, notifier):
+  Adds a notification callback function for the device.
+  The callback is called when the aggregated value of the device constraints list
+  is changed (for resume latency device PM QoS only).
+
+int dev_pm_qos_remove_notifier(device, notifier):
+  Removes the notification callback function for the device.
+
+
+Active state latency tolerance
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This device PM QoS type is used to support systems in which hardware may switch
+to energy-saving operation modes on the fly.  In those systems, if the operation
+mode chosen by the hardware attempts to save energy in an overly aggressive way,
+it may cause excess latencies to be visible to software, causing it to miss
+certain protocol requirements or target frame or sample rates etc.
+
+If there is a latency tolerance control mechanism for a given device available
+to software, the .set_latency_tolerance callback in that device's dev_pm_info
+structure should be populated.  The routine pointed to by it is should implement
+whatever is necessary to transfer the effective requirement value to the
+hardware.
+
+Whenever the effective latency tolerance changes for the device, its
+.set_latency_tolerance() callback will be executed and the effective value will
+be passed to it.  If that value is negative, which means that the list of
+latency tolerance requirements for the device is empty, the callback is expected
+to switch the underlying hardware latency tolerance control mechanism to an
+autonomous mode if available.  If that value is PM_QOS_LATENCY_ANY, in turn, and
+the hardware supports a special "no requirement" setting, the callback is
+expected to use it.  That allows software to prevent the hardware from
+automatically updating the device's latency tolerance in response to its power
+state changes (e.g. during transitions from D3cold to D0), which generally may
+be done in the autonomous latency tolerance control mode.
+
+If .set_latency_tolerance() is present for the device, sysfs attribute
+pm_qos_latency_tolerance_us will be present in the devivce's power directory.
+Then, user space can use that attribute to specify its latency tolerance
+requirement for the device, if any.  Writing "any" to it means "no requirement,
+but do not let the hardware control latency tolerance" and writing "auto" to it
+allows the hardware to be switched to the autonomous mode if there are no other
+requirements from the kernel side in the device's list.
+
+Kernel code can use the functions described above along with the
+DEV_PM_QOS_LATENCY_TOLERANCE device PM QoS type to add, remove and update
+latency tolerance requirements for devices.
diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
deleted file mode 100644
index 19c5f7b1a7ba..000000000000
--- a/Documentation/power/pm_qos_interface.txt
+++ /dev/null
@@ -1,212 +0,0 @@
-PM Quality Of Service Interface.
-
-This interface provides a kernel and user mode interface for registering
-performance expectations by drivers, subsystems and user space applications on
-one of the parameters.
-
-Two different PM QoS frameworks are available:
-1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput,
-memory_bandwidth.
-2. the per-device PM QoS framework provides the API to manage the per-device latency
-constraints and PM QoS flags.
-
-Each parameters have defined units:
- * latency: usec
- * timeout: usec
- * throughput: kbs (kilo bit / sec)
- * memory bandwidth: mbs (mega bit / sec)
-
-
-1. PM QoS framework
-
-The infrastructure exposes multiple misc device nodes one per implemented
-parameter.  The set of parameters implement is defined by pm_qos_power_init()
-and pm_qos_params.h.  This is done because having the available parameters
-being runtime configurable or changeable from a driver was seen as too easy to
-abuse.
-
-For each parameter a list of performance requests is maintained along with
-an aggregated target value.  The aggregated target value is updated with
-changes to the request list or elements of the list.  Typically the
-aggregated target value is simply the max or min of the request values held
-in the parameter list elements.
-Note: the aggregated target value is implemented as an atomic variable so that
-reading the aggregated value does not require any locking mechanism.
-
-
-From kernel mode the use of this interface is simple:
-
-void pm_qos_add_request(handle, param_class, target_value):
-Will insert an element into the list for that identified PM QoS class with the
-target value.  Upon change to this list the new target is recomputed and any
-registered notifiers are called only if the target value is now different.
-Clients of pm_qos need to save the returned handle for future use in other
-pm_qos API functions.
-
-void pm_qos_update_request(handle, new_target_value):
-Will update the list element pointed to by the handle with the new target value
-and recompute the new aggregated target, calling the notification tree if the
-target is changed.
-
-void pm_qos_remove_request(handle):
-Will remove the element.  After removal it will update the aggregate target and
-call the notification tree if the target was changed as a result of removing
-the request.
-
-int pm_qos_request(param_class):
-Returns the aggregated value for a given PM QoS class.
-
-int pm_qos_request_active(handle):
-Returns if the request is still active, i.e. it has not been removed from a
-PM QoS class constraints list.
-
-int pm_qos_add_notifier(param_class, notifier):
-Adds a notification callback function to the PM QoS class. The callback is
-called when the aggregated value for the PM QoS class is changed.
-
-int pm_qos_remove_notifier(int param_class, notifier):
-Removes the notification callback function for the PM QoS class.
-
-
-From user mode:
-Only processes can register a pm_qos request.  To provide for automatic
-cleanup of a process, the interface requires the process to register its
-parameter requests in the following way:
-
-To register the default pm_qos target for the specific parameter, the process
-must open one of /dev/[cpu_dma_latency, network_latency, network_throughput]
-
-As long as the device node is held open that process has a registered
-request on the parameter.
-
-To change the requested target value the process needs to write an s32 value to
-the open device node.  Alternatively the user mode program could write a hex
-string for the value using 10 char long format e.g. "0x12345678".  This
-translates to a pm_qos_update_request call.
-
-To remove the user mode request for a target value simply close the device
-node.
-
-
-2. PM QoS per-device latency and flags framework
-
-For each device, there are three lists of PM QoS requests. Two of them are
-maintained along with the aggregated targets of resume latency and active
-state latency tolerance (in microseconds) and the third one is for PM QoS flags.
-Values are updated in response to changes of the request list.
-
-The target values of resume latency and active state latency tolerance are
-simply the minimum of the request values held in the parameter list elements.
-The PM QoS flags aggregate value is a gather (bitwise OR) of all list elements'
-values.  One device PM QoS flag is defined currently: PM_QOS_FLAG_NO_POWER_OFF.
-
-Note: The aggregated target values are implemented in such a way that reading
-the aggregated value does not require any locking mechanism.
-
-
-From kernel mode the use of this interface is the following:
-
-int dev_pm_qos_add_request(device, handle, type, value):
-Will insert an element into the list for that identified device with the
-target value.  Upon change to this list the new target is recomputed and any
-registered notifiers are called only if the target value is now different.
-Clients of dev_pm_qos need to save the handle for future use in other
-dev_pm_qos API functions.
-
-int dev_pm_qos_update_request(handle, new_value):
-Will update the list element pointed to by the handle with the new target value
-and recompute the new aggregated target, calling the notification trees if the
-target is changed.
-
-int dev_pm_qos_remove_request(handle):
-Will remove the element.  After removal it will update the aggregate target and
-call the notification trees if the target was changed as a result of removing
-the request.
-
-s32 dev_pm_qos_read_value(device):
-Returns the aggregated value for a given device's constraints list.
-
-enum pm_qos_flags_status dev_pm_qos_flags(device, mask)
-Check PM QoS flags of the given device against the given mask of flags.
-The meaning of the return values is as follows:
-	PM_QOS_FLAGS_ALL: All flags from the mask are set
-	PM_QOS_FLAGS_SOME: Some flags from the mask are set
-	PM_QOS_FLAGS_NONE: No flags from the mask are set
-	PM_QOS_FLAGS_UNDEFINED: The device's PM QoS structure has not been
-			initialized or the list of requests is empty.
-
-int dev_pm_qos_add_ancestor_request(dev, handle, type, value)
-Add a PM QoS request for the first direct ancestor of the given device whose
-power.ignore_children flag is unset (for DEV_PM_QOS_RESUME_LATENCY requests)
-or whose power.set_latency_tolerance callback pointer is not NULL (for
-DEV_PM_QOS_LATENCY_TOLERANCE requests).
-
-int dev_pm_qos_expose_latency_limit(device, value)
-Add a request to the device's PM QoS list of resume latency constraints and
-create a sysfs attribute pm_qos_resume_latency_us under the device's power
-directory allowing user space to manipulate that request.
-
-void dev_pm_qos_hide_latency_limit(device)
-Drop the request added by dev_pm_qos_expose_latency_limit() from the device's
-PM QoS list of resume latency constraints and remove sysfs attribute
-pm_qos_resume_latency_us from the device's power directory.
-
-int dev_pm_qos_expose_flags(device, value)
-Add a request to the device's PM QoS list of flags and create sysfs attribute
-pm_qos_no_power_off under the device's power directory allowing user space to
-change the value of the PM_QOS_FLAG_NO_POWER_OFF flag.
-
-void dev_pm_qos_hide_flags(device)
-Drop the request added by dev_pm_qos_expose_flags() from the device's PM QoS list
-of flags and remove sysfs attribute pm_qos_no_power_off from the device's power
-directory.
-
-Notification mechanisms:
-The per-device PM QoS framework has a per-device notification tree.
-
-int dev_pm_qos_add_notifier(device, notifier):
-Adds a notification callback function for the device.
-The callback is called when the aggregated value of the device constraints list
-is changed (for resume latency device PM QoS only).
-
-int dev_pm_qos_remove_notifier(device, notifier):
-Removes the notification callback function for the device.
-
-
-Active state latency tolerance
-
-This device PM QoS type is used to support systems in which hardware may switch
-to energy-saving operation modes on the fly.  In those systems, if the operation
-mode chosen by the hardware attempts to save energy in an overly aggressive way,
-it may cause excess latencies to be visible to software, causing it to miss
-certain protocol requirements or target frame or sample rates etc.
-
-If there is a latency tolerance control mechanism for a given device available
-to software, the .set_latency_tolerance callback in that device's dev_pm_info
-structure should be populated.  The routine pointed to by it is should implement
-whatever is necessary to transfer the effective requirement value to the
-hardware.
-
-Whenever the effective latency tolerance changes for the device, its
-.set_latency_tolerance() callback will be executed and the effective value will
-be passed to it.  If that value is negative, which means that the list of
-latency tolerance requirements for the device is empty, the callback is expected
-to switch the underlying hardware latency tolerance control mechanism to an
-autonomous mode if available.  If that value is PM_QOS_LATENCY_ANY, in turn, and
-the hardware supports a special "no requirement" setting, the callback is
-expected to use it.  That allows software to prevent the hardware from
-automatically updating the device's latency tolerance in response to its power
-state changes (e.g. during transitions from D3cold to D0), which generally may
-be done in the autonomous latency tolerance control mode.
-
-If .set_latency_tolerance() is present for the device, sysfs attribute
-pm_qos_latency_tolerance_us will be present in the devivce's power directory.
-Then, user space can use that attribute to specify its latency tolerance
-requirement for the device, if any.  Writing "any" to it means "no requirement,
-but do not let the hardware control latency tolerance" and writing "auto" to it
-allows the hardware to be switched to the autonomous mode if there are no other
-requirements from the kernel side in the device's list.
-
-Kernel code can use the functions described above along with the
-DEV_PM_QOS_LATENCY_TOLERANCE device PM QoS type to add, remove and update
-latency tolerance requirements for devices.
diff --git a/Documentation/power/power_supply_class.rst b/Documentation/power/power_supply_class.rst
new file mode 100644
index 000000000000..3f2c3fe38a61
--- /dev/null
+++ b/Documentation/power/power_supply_class.rst
@@ -0,0 +1,282 @@
+========================
+Linux power supply class
+========================
+
+Synopsis
+~~~~~~~~
+Power supply class used to represent battery, UPS, AC or DC power supply
+properties to user-space.
+
+It defines core set of attributes, which should be applicable to (almost)
+every power supply out there. Attributes are available via sysfs and uevent
+interfaces.
+
+Each attribute has well defined meaning, up to unit of measure used. While
+the attributes provided are believed to be universally applicable to any
+power supply, specific monitoring hardware may not be able to provide them
+all, so any of them may be skipped.
+
+Power supply class is extensible, and allows to define drivers own attributes.
+The core attribute set is subject to the standard Linux evolution (i.e.
+if it will be found that some attribute is applicable to many power supply
+types or their drivers, it can be added to the core set).
+
+It also integrates with LED framework, for the purpose of providing
+typically expected feedback of battery charging/fully charged status and
+AC/USB power supply online status. (Note that specific details of the
+indication (including whether to use it at all) are fully controllable by
+user and/or specific machine defaults, per design principles of LED
+framework).
+
+
+Attributes/properties
+~~~~~~~~~~~~~~~~~~~~~
+Power supply class has predefined set of attributes, this eliminates code
+duplication across drivers. Power supply class insist on reusing its
+predefined attributes *and* their units.
+
+So, userspace gets predictable set of attributes and their units for any
+kind of power supply, and can process/present them to a user in consistent
+manner. Results for different power supplies and machines are also directly
+comparable.
+
+See drivers/power/supply/ds2760_battery.c and drivers/power/supply/pda_power.c
+for the example how to declare and handle attributes.
+
+
+Units
+~~~~~
+Quoting include/linux/power_supply.h:
+
+  All voltages, currents, charges, energies, time and temperatures in µV,
+  µA, µAh, µWh, seconds and tenths of degree Celsius unless otherwise
+  stated. It's driver's job to convert its raw values to units in which
+  this class operates.
+
+
+Attributes/properties detailed
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
++--------------------------------------------------------------------------+
+|               **Charge/Energy/Capacity - how to not confuse**            |
++--------------------------------------------------------------------------+
+| **Because both "charge" (µAh) and "energy" (µWh) represents "capacity"   |
+| of battery, this class distinguish these terms. Don't mix them!**        |
+|                                                                          |
+| - `CHARGE_*`                                                             |
+|	attributes represents capacity in µAh only.                        |
+| - `ENERGY_*`                                                             |
+|	attributes represents capacity in µWh only.                        |
+| - `CAPACITY`                                                             |
+|	attribute represents capacity in *percents*, from 0 to 100.        |
++--------------------------------------------------------------------------+
+
+Postfixes:
+
+_AVG
+  *hardware* averaged value, use it if your hardware is really able to
+  report averaged values.
+_NOW
+  momentary/instantaneous values.
+
+STATUS
+  this attribute represents operating status (charging, full,
+  discharging (i.e. powering a load), etc.). This corresponds to
+  `BATTERY_STATUS_*` values, as defined in battery.h.
+
+CHARGE_TYPE
+  batteries can typically charge at different rates.
+  This defines trickle and fast charges.  For batteries that
+  are already charged or discharging, 'n/a' can be displayed (or
+  'unknown', if the status is not known).
+
+AUTHENTIC
+  indicates the power supply (battery or charger) connected
+  to the platform is authentic(1) or non authentic(0).
+
+HEALTH
+  represents health of the battery, values corresponds to
+  POWER_SUPPLY_HEALTH_*, defined in battery.h.
+
+VOLTAGE_OCV
+  open circuit voltage of the battery.
+
+VOLTAGE_MAX_DESIGN, VOLTAGE_MIN_DESIGN
+  design values for maximal and minimal power supply voltages.
+  Maximal/minimal means values of voltages when battery considered
+  "full"/"empty" at normal conditions. Yes, there is no direct relation
+  between voltage and battery capacity, but some dumb
+  batteries use voltage for very approximated calculation of capacity.
+  Battery driver also can use this attribute just to inform userspace
+  about maximal and minimal voltage thresholds of a given battery.
+
+VOLTAGE_MAX, VOLTAGE_MIN
+  same as _DESIGN voltage values except that these ones should be used
+  if hardware could only guess (measure and retain) the thresholds of a
+  given power supply.
+
+VOLTAGE_BOOT
+  Reports the voltage measured during boot
+
+CURRENT_BOOT
+  Reports the current measured during boot
+
+CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN
+  design charge values, when battery considered full/empty.
+
+ENERGY_FULL_DESIGN, ENERGY_EMPTY_DESIGN
+  same as above but for energy.
+
+CHARGE_FULL, CHARGE_EMPTY
+  These attributes means "last remembered value of charge when battery
+  became full/empty". It also could mean "value of charge when battery
+  considered full/empty at given conditions (temperature, age)".
+  I.e. these attributes represents real thresholds, not design values.
+
+ENERGY_FULL, ENERGY_EMPTY
+  same as above but for energy.
+
+CHARGE_COUNTER
+  the current charge counter (in µAh).  This could easily
+  be negative; there is no empty or full value.  It is only useful for
+  relative, time-based measurements.
+
+PRECHARGE_CURRENT
+  the maximum charge current during precharge phase of charge cycle
+  (typically 20% of battery capacity).
+
+CHARGE_TERM_CURRENT
+  Charge termination current. The charge cycle terminates when battery
+  voltage is above recharge threshold, and charge current is below
+  this setting (typically 10% of battery capacity).
+
+CONSTANT_CHARGE_CURRENT
+  constant charge current programmed by charger.
+
+
+CONSTANT_CHARGE_CURRENT_MAX
+  maximum charge current supported by the power supply object.
+
+CONSTANT_CHARGE_VOLTAGE
+  constant charge voltage programmed by charger.
+CONSTANT_CHARGE_VOLTAGE_MAX
+  maximum charge voltage supported by the power supply object.
+
+INPUT_CURRENT_LIMIT
+  input current limit programmed by charger. Indicates
+  the current drawn from a charging source.
+
+CHARGE_CONTROL_LIMIT
+  current charge control limit setting
+CHARGE_CONTROL_LIMIT_MAX
+  maximum charge control limit setting
+
+CALIBRATE
+  battery or coulomb counter calibration status
+
+CAPACITY
+  capacity in percents.
+CAPACITY_ALERT_MIN
+  minimum capacity alert value in percents.
+CAPACITY_ALERT_MAX
+  maximum capacity alert value in percents.
+CAPACITY_LEVEL
+  capacity level. This corresponds to POWER_SUPPLY_CAPACITY_LEVEL_*.
+
+TEMP
+  temperature of the power supply.
+TEMP_ALERT_MIN
+  minimum battery temperature alert.
+TEMP_ALERT_MAX
+  maximum battery temperature alert.
+TEMP_AMBIENT
+  ambient temperature.
+TEMP_AMBIENT_ALERT_MIN
+  minimum ambient temperature alert.
+TEMP_AMBIENT_ALERT_MAX
+  maximum ambient temperature alert.
+TEMP_MIN
+  minimum operatable temperature
+TEMP_MAX
+  maximum operatable temperature
+
+TIME_TO_EMPTY
+  seconds left for battery to be considered empty
+  (i.e. while battery powers a load)
+TIME_TO_FULL
+  seconds left for battery to be considered full
+  (i.e. while battery is charging)
+
+
+Battery <-> external power supply interaction
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Often power supplies are acting as supplies and supplicants at the same
+time. Batteries are good example. So, batteries usually care if they're
+externally powered or not.
+
+For that case, power supply class implements notification mechanism for
+batteries.
+
+External power supply (AC) lists supplicants (batteries) names in
+"supplied_to" struct member, and each power_supply_changed() call
+issued by external power supply will notify supplicants via
+external_power_changed callback.
+
+
+Devicetree battery characteristics
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Drivers should call power_supply_get_battery_info() to obtain battery
+characteristics from a devicetree battery node, defined in
+Documentation/devicetree/bindings/power/supply/battery.txt. This is
+implemented in drivers/power/supply/bq27xxx_battery.c.
+
+Properties in struct power_supply_battery_info and their counterparts in the
+battery node have names corresponding to elements in enum power_supply_property,
+for naming consistency between sysfs attributes and battery node properties.
+
+
+QA
+~~
+
+Q:
+   Where is POWER_SUPPLY_PROP_XYZ attribute?
+A:
+   If you cannot find attribute suitable for your driver needs, feel free
+   to add it and send patch along with your driver.
+
+   The attributes available currently are the ones currently provided by the
+   drivers written.
+
+   Good candidates to add in future: model/part#, cycle_time, manufacturer,
+   etc.
+
+
+Q:
+   I have some very specific attribute (e.g. battery color), should I add
+   this attribute to standard ones?
+A:
+   Most likely, no. Such attribute can be placed in the driver itself, if
+   it is useful. Of course, if the attribute in question applicable to
+   large set of batteries, provided by many drivers, and/or comes from
+   some general battery specification/standard, it may be a candidate to
+   be added to the core attribute set.
+
+
+Q:
+   Suppose, my battery monitoring chip/firmware does not provides capacity
+   in percents, but provides charge_{now,full,empty}. Should I calculate
+   percentage capacity manually, inside the driver, and register CAPACITY
+   attribute? The same question about time_to_empty/time_to_full.
+A:
+   Most likely, no. This class is designed to export properties which are
+   directly measurable by the specific hardware available.
+
+   Inferring not available properties using some heuristics or mathematical
+   model is not subject of work for a battery driver. Such functionality
+   should be factored out, and in fact, apm_power, the driver to serve
+   legacy APM API on top of power supply class, uses a simple heuristic of
+   approximating remaining battery capacity based on its charge, current,
+   voltage and so on. But full-fledged battery model is likely not subject
+   for kernel at all, as it would require floating point calculation to deal
+   with things like differential equations and Kalman filters. This is
+   better be handled by batteryd/libbattery, yet to be written.
diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
deleted file mode 100644
index 300d37896e51..000000000000
--- a/Documentation/power/power_supply_class.txt
+++ /dev/null
@@ -1,231 +0,0 @@
-Linux power supply class
-========================
-
-Synopsis
-~~~~~~~~
-Power supply class used to represent battery, UPS, AC or DC power supply
-properties to user-space.
-
-It defines core set of attributes, which should be applicable to (almost)
-every power supply out there. Attributes are available via sysfs and uevent
-interfaces.
-
-Each attribute has well defined meaning, up to unit of measure used. While
-the attributes provided are believed to be universally applicable to any
-power supply, specific monitoring hardware may not be able to provide them
-all, so any of them may be skipped.
-
-Power supply class is extensible, and allows to define drivers own attributes.
-The core attribute set is subject to the standard Linux evolution (i.e.
-if it will be found that some attribute is applicable to many power supply
-types or their drivers, it can be added to the core set).
-
-It also integrates with LED framework, for the purpose of providing
-typically expected feedback of battery charging/fully charged status and
-AC/USB power supply online status. (Note that specific details of the
-indication (including whether to use it at all) are fully controllable by
-user and/or specific machine defaults, per design principles of LED
-framework).
-
-
-Attributes/properties
-~~~~~~~~~~~~~~~~~~~~~
-Power supply class has predefined set of attributes, this eliminates code
-duplication across drivers. Power supply class insist on reusing its
-predefined attributes *and* their units.
-
-So, userspace gets predictable set of attributes and their units for any
-kind of power supply, and can process/present them to a user in consistent
-manner. Results for different power supplies and machines are also directly
-comparable.
-
-See drivers/power/supply/ds2760_battery.c and drivers/power/supply/pda_power.c
-for the example how to declare and handle attributes.
-
-
-Units
-~~~~~
-Quoting include/linux/power_supply.h:
-
-  All voltages, currents, charges, energies, time and temperatures in µV,
-  µA, µAh, µWh, seconds and tenths of degree Celsius unless otherwise
-  stated. It's driver's job to convert its raw values to units in which
-  this class operates.
-
-
-Attributes/properties detailed
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-~ ~ ~ ~ ~ ~ ~  Charge/Energy/Capacity - how to not confuse  ~ ~ ~ ~ ~ ~ ~
-~                                                                       ~
-~ Because both "charge" (µAh) and "energy" (µWh) represents "capacity"  ~
-~ of battery, this class distinguish these terms. Don't mix them!       ~
-~                                                                       ~
-~ CHARGE_* attributes represents capacity in µAh only.                  ~
-~ ENERGY_* attributes represents capacity in µWh only.                  ~
-~ CAPACITY attribute represents capacity in *percents*, from 0 to 100.  ~
-~                                                                       ~
-~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
-
-Postfixes:
-_AVG - *hardware* averaged value, use it if your hardware is really able to
-report averaged values.
-_NOW - momentary/instantaneous values.
-
-STATUS - this attribute represents operating status (charging, full,
-discharging (i.e. powering a load), etc.). This corresponds to
-BATTERY_STATUS_* values, as defined in battery.h.
-
-CHARGE_TYPE - batteries can typically charge at different rates.
-This defines trickle and fast charges.  For batteries that
-are already charged or discharging, 'n/a' can be displayed (or
-'unknown', if the status is not known).
-
-AUTHENTIC - indicates the power supply (battery or charger) connected
-to the platform is authentic(1) or non authentic(0).
-
-HEALTH - represents health of the battery, values corresponds to
-POWER_SUPPLY_HEALTH_*, defined in battery.h.
-
-VOLTAGE_OCV - open circuit voltage of the battery.
-
-VOLTAGE_MAX_DESIGN, VOLTAGE_MIN_DESIGN - design values for maximal and
-minimal power supply voltages. Maximal/minimal means values of voltages
-when battery considered "full"/"empty" at normal conditions. Yes, there is
-no direct relation between voltage and battery capacity, but some dumb
-batteries use voltage for very approximated calculation of capacity.
-Battery driver also can use this attribute just to inform userspace
-about maximal and minimal voltage thresholds of a given battery.
-
-VOLTAGE_MAX, VOLTAGE_MIN - same as _DESIGN voltage values except that
-these ones should be used if hardware could only guess (measure and
-retain) the thresholds of a given power supply.
-
-VOLTAGE_BOOT - Reports the voltage measured during boot
-
-CURRENT_BOOT - Reports the current measured during boot
-
-CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN - design charge values, when
-battery considered full/empty.
-
-ENERGY_FULL_DESIGN, ENERGY_EMPTY_DESIGN - same as above but for energy.
-
-CHARGE_FULL, CHARGE_EMPTY - These attributes means "last remembered value
-of charge when battery became full/empty". It also could mean "value of
-charge when battery considered full/empty at given conditions (temperature,
-age)". I.e. these attributes represents real thresholds, not design values.
-
-ENERGY_FULL, ENERGY_EMPTY - same as above but for energy.
-
-CHARGE_COUNTER - the current charge counter (in µAh).  This could easily
-be negative; there is no empty or full value.  It is only useful for
-relative, time-based measurements.
-
-PRECHARGE_CURRENT - the maximum charge current during precharge phase
-of charge cycle (typically 20% of battery capacity).
-CHARGE_TERM_CURRENT - Charge termination current. The charge cycle
-terminates when battery voltage is above recharge threshold, and charge
-current is below this setting (typically 10% of battery capacity).
-
-CONSTANT_CHARGE_CURRENT - constant charge current programmed by charger.
-CONSTANT_CHARGE_CURRENT_MAX - maximum charge current supported by the
-power supply object.
-
-CONSTANT_CHARGE_VOLTAGE - constant charge voltage programmed by charger.
-CONSTANT_CHARGE_VOLTAGE_MAX - maximum charge voltage supported by the
-power supply object.
-
-INPUT_CURRENT_LIMIT - input current limit programmed by charger. Indicates
-the current drawn from a charging source.
-
-CHARGE_CONTROL_LIMIT - current charge control limit setting
-CHARGE_CONTROL_LIMIT_MAX - maximum charge control limit setting
-
-CALIBRATE - battery or coulomb counter calibration status
-
-CAPACITY - capacity in percents.
-CAPACITY_ALERT_MIN - minimum capacity alert value in percents.
-CAPACITY_ALERT_MAX - maximum capacity alert value in percents.
-CAPACITY_LEVEL - capacity level. This corresponds to
-POWER_SUPPLY_CAPACITY_LEVEL_*.
-
-TEMP - temperature of the power supply.
-TEMP_ALERT_MIN - minimum battery temperature alert.
-TEMP_ALERT_MAX - maximum battery temperature alert.
-TEMP_AMBIENT - ambient temperature.
-TEMP_AMBIENT_ALERT_MIN - minimum ambient temperature alert.
-TEMP_AMBIENT_ALERT_MAX - maximum ambient temperature alert.
-TEMP_MIN - minimum operatable temperature
-TEMP_MAX - maximum operatable temperature
-
-TIME_TO_EMPTY - seconds left for battery to be considered empty (i.e.
-while battery powers a load)
-TIME_TO_FULL - seconds left for battery to be considered full (i.e.
-while battery is charging)
-
-
-Battery <-> external power supply interaction
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Often power supplies are acting as supplies and supplicants at the same
-time. Batteries are good example. So, batteries usually care if they're
-externally powered or not.
-
-For that case, power supply class implements notification mechanism for
-batteries.
-
-External power supply (AC) lists supplicants (batteries) names in
-"supplied_to" struct member, and each power_supply_changed() call
-issued by external power supply will notify supplicants via
-external_power_changed callback.
-
-
-Devicetree battery characteristics
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Drivers should call power_supply_get_battery_info() to obtain battery
-characteristics from a devicetree battery node, defined in
-Documentation/devicetree/bindings/power/supply/battery.txt. This is
-implemented in drivers/power/supply/bq27xxx_battery.c.
-
-Properties in struct power_supply_battery_info and their counterparts in the
-battery node have names corresponding to elements in enum power_supply_property,
-for naming consistency between sysfs attributes and battery node properties.
-
-
-QA
-~~
-Q: Where is POWER_SUPPLY_PROP_XYZ attribute?
-A: If you cannot find attribute suitable for your driver needs, feel free
-   to add it and send patch along with your driver.
-
-   The attributes available currently are the ones currently provided by the
-   drivers written.
-
-   Good candidates to add in future: model/part#, cycle_time, manufacturer,
-   etc.
-
-
-Q: I have some very specific attribute (e.g. battery color), should I add
-   this attribute to standard ones?
-A: Most likely, no. Such attribute can be placed in the driver itself, if
-   it is useful. Of course, if the attribute in question applicable to
-   large set of batteries, provided by many drivers, and/or comes from
-   some general battery specification/standard, it may be a candidate to
-   be added to the core attribute set.
-
-
-Q: Suppose, my battery monitoring chip/firmware does not provides capacity
-   in percents, but provides charge_{now,full,empty}. Should I calculate
-   percentage capacity manually, inside the driver, and register CAPACITY
-   attribute? The same question about time_to_empty/time_to_full.
-A: Most likely, no. This class is designed to export properties which are
-   directly measurable by the specific hardware available.
-
-   Inferring not available properties using some heuristics or mathematical
-   model is not subject of work for a battery driver. Such functionality
-   should be factored out, and in fact, apm_power, the driver to serve
-   legacy APM API on top of power supply class, uses a simple heuristic of
-   approximating remaining battery capacity based on its charge, current,
-   voltage and so on. But full-fledged battery model is likely not subject
-   for kernel at all, as it would require floating point calculation to deal
-   with things like differential equations and Kalman filters. This is
-   better be handled by batteryd/libbattery, yet to be written.
diff --git a/Documentation/power/powercap/powercap.rst b/Documentation/power/powercap/powercap.rst
new file mode 100644
index 000000000000..7ae3b44c7624
--- /dev/null
+++ b/Documentation/power/powercap/powercap.rst
@@ -0,0 +1,257 @@
+=======================
+Power Capping Framework
+=======================
+
+The power capping framework provides a consistent interface between the kernel
+and the user space that allows power capping drivers to expose the settings to
+user space in a uniform way.
+
+Terminology
+===========
+
+The framework exposes power capping devices to user space via sysfs in the
+form of a tree of objects. The objects at the root level of the tree represent
+'control types', which correspond to different methods of power capping.  For
+example, the intel-rapl control type represents the Intel "Running Average
+Power Limit" (RAPL) technology, whereas the 'idle-injection' control type
+corresponds to the use of idle injection for controlling power.
+
+Power zones represent different parts of the system, which can be controlled and
+monitored using the power capping method determined by the control type the
+given zone belongs to. They each contain attributes for monitoring power, as
+well as controls represented in the form of power constraints.  If the parts of
+the system represented by different power zones are hierarchical (that is, one
+bigger part consists of multiple smaller parts that each have their own power
+controls), those power zones may also be organized in a hierarchy with one
+parent power zone containing multiple subzones and so on to reflect the power
+control topology of the system.  In that case, it is possible to apply power
+capping to a set of devices together using the parent power zone and if more
+fine grained control is required, it can be applied through the subzones.
+
+
+Example sysfs interface tree::
+
+  /sys/devices/virtual/powercap
+  └──intel-rapl
+      ├──intel-rapl:0
+      │   ├──constraint_0_name
+      │   ├──constraint_0_power_limit_uw
+      │   ├──constraint_0_time_window_us
+      │   ├──constraint_1_name
+      │   ├──constraint_1_power_limit_uw
+      │   ├──constraint_1_time_window_us
+      │   ├──device -> ../../intel-rapl
+      │   ├──energy_uj
+      │   ├──intel-rapl:0:0
+      │   │   ├──constraint_0_name
+      │   │   ├──constraint_0_power_limit_uw
+      │   │   ├──constraint_0_time_window_us
+      │   │   ├──constraint_1_name
+      │   │   ├──constraint_1_power_limit_uw
+      │   │   ├──constraint_1_time_window_us
+      │   │   ├──device -> ../../intel-rapl:0
+      │   │   ├──energy_uj
+      │   │   ├──max_energy_range_uj
+      │   │   ├──name
+      │   │   ├──enabled
+      │   │   ├──power
+      │   │   │   ├──async
+      │   │   │   []
+      │   │   ├──subsystem -> ../../../../../../class/power_cap
+      │   │   └──uevent
+      │   ├──intel-rapl:0:1
+      │   │   ├──constraint_0_name
+      │   │   ├──constraint_0_power_limit_uw
+      │   │   ├──constraint_0_time_window_us
+      │   │   ├──constraint_1_name
+      │   │   ├──constraint_1_power_limit_uw
+      │   │   ├──constraint_1_time_window_us
+      │   │   ├──device -> ../../intel-rapl:0
+      │   │   ├──energy_uj
+      │   │   ├──max_energy_range_uj
+      │   │   ├──name
+      │   │   ├──enabled
+      │   │   ├──power
+      │   │   │   ├──async
+      │   │   │   []
+      │   │   ├──subsystem -> ../../../../../../class/power_cap
+      │   │   └──uevent
+      │   ├──max_energy_range_uj
+      │   ├──max_power_range_uw
+      │   ├──name
+      │   ├──enabled
+      │   ├──power
+      │   │   ├──async
+      │   │   []
+      │   ├──subsystem -> ../../../../../class/power_cap
+      │   ├──enabled
+      │   ├──uevent
+      ├──intel-rapl:1
+      │   ├──constraint_0_name
+      │   ├──constraint_0_power_limit_uw
+      │   ├──constraint_0_time_window_us
+      │   ├──constraint_1_name
+      │   ├──constraint_1_power_limit_uw
+      │   ├──constraint_1_time_window_us
+      │   ├──device -> ../../intel-rapl
+      │   ├──energy_uj
+      │   ├──intel-rapl:1:0
+      │   │   ├──constraint_0_name
+      │   │   ├──constraint_0_power_limit_uw
+      │   │   ├──constraint_0_time_window_us
+      │   │   ├──constraint_1_name
+      │   │   ├──constraint_1_power_limit_uw
+      │   │   ├──constraint_1_time_window_us
+      │   │   ├──device -> ../../intel-rapl:1
+      │   │   ├──energy_uj
+      │   │   ├──max_energy_range_uj
+      │   │   ├──name
+      │   │   ├──enabled
+      │   │   ├──power
+      │   │   │   ├──async
+      │   │   │   []
+      │   │   ├──subsystem -> ../../../../../../class/power_cap
+      │   │   └──uevent
+      │   ├──intel-rapl:1:1
+      │   │   ├──constraint_0_name
+      │   │   ├──constraint_0_power_limit_uw
+      │   │   ├──constraint_0_time_window_us
+      │   │   ├──constraint_1_name
+      │   │   ├──constraint_1_power_limit_uw
+      │   │   ├──constraint_1_time_window_us
+      │   │   ├──device -> ../../intel-rapl:1
+      │   │   ├──energy_uj
+      │   │   ├──max_energy_range_uj
+      │   │   ├──name
+      │   │   ├──enabled
+      │   │   ├──power
+      │   │   │   ├──async
+      │   │   │   []
+      │   │   ├──subsystem -> ../../../../../../class/power_cap
+      │   │   └──uevent
+      │   ├──max_energy_range_uj
+      │   ├──max_power_range_uw
+      │   ├──name
+      │   ├──enabled
+      │   ├──power
+      │   │   ├──async
+      │   │   []
+      │   ├──subsystem -> ../../../../../class/power_cap
+      │   ├──uevent
+      ├──power
+      │   ├──async
+      │   []
+      ├──subsystem -> ../../../../class/power_cap
+      ├──enabled
+      └──uevent
+
+The above example illustrates a case in which the Intel RAPL technology,
+available in Intel® IA-64 and IA-32 Processor Architectures, is used. There is one
+control type called intel-rapl which contains two power zones, intel-rapl:0 and
+intel-rapl:1, representing CPU packages.  Each of these power zones contains
+two subzones, intel-rapl:j:0 and intel-rapl:j:1 (j = 0, 1), representing the
+"core" and the "uncore" parts of the given CPU package, respectively.  All of
+the zones and subzones contain energy monitoring attributes (energy_uj,
+max_energy_range_uj) and constraint attributes (constraint_*) allowing controls
+to be applied (the constraints in the 'package' power zones apply to the whole
+CPU packages and the subzone constraints only apply to the respective parts of
+the given package individually). Since Intel RAPL doesn't provide instantaneous
+power value, there is no power_uw attribute.
+
+In addition to that, each power zone contains a name attribute, allowing the
+part of the system represented by that zone to be identified.
+For example::
+
+	cat /sys/class/power_cap/intel-rapl/intel-rapl:0/name
+
+package-0
+---------
+
+The Intel RAPL technology allows two constraints, short term and long term,
+with two different time windows to be applied to each power zone.  Thus for
+each zone there are 2 attributes representing the constraint names, 2 power
+limits and 2 attributes representing the sizes of the time windows. Such that,
+constraint_j_* attributes correspond to the jth constraint (j = 0,1).
+
+For example::
+
+	constraint_0_name
+	constraint_0_power_limit_uw
+	constraint_0_time_window_us
+	constraint_1_name
+	constraint_1_power_limit_uw
+	constraint_1_time_window_us
+
+Power Zone Attributes
+=====================
+
+Monitoring attributes
+---------------------
+
+energy_uj (rw)
+	Current energy counter in micro joules. Write "0" to reset.
+	If the counter can not be reset, then this attribute is read only.
+
+max_energy_range_uj (ro)
+	Range of the above energy counter in micro-joules.
+
+power_uw (ro)
+	Current power in micro watts.
+
+max_power_range_uw (ro)
+	Range of the above power value in micro-watts.
+
+name (ro)
+	Name of this power zone.
+
+It is possible that some domains have both power ranges and energy counter ranges;
+however, only one is mandatory.
+
+Constraints
+-----------
+
+constraint_X_power_limit_uw (rw)
+	Power limit in micro watts, which should be applicable for the
+	time window specified by "constraint_X_time_window_us".
+
+constraint_X_time_window_us (rw)
+	Time window in micro seconds.
+
+constraint_X_name (ro)
+	An optional name of the constraint
+
+constraint_X_max_power_uw(ro)
+	Maximum allowed power in micro watts.
+
+constraint_X_min_power_uw(ro)
+	Minimum allowed power in micro watts.
+
+constraint_X_max_time_window_us(ro)
+	Maximum allowed time window in micro seconds.
+
+constraint_X_min_time_window_us(ro)
+	Minimum allowed time window in micro seconds.
+
+Except power_limit_uw and time_window_us other fields are optional.
+
+Common zone and control type attributes
+---------------------------------------
+
+enabled (rw): Enable/Disable controls at zone level or for all zones using
+a control type.
+
+Power Cap Client Driver Interface
+=================================
+
+The API summary:
+
+Call powercap_register_control_type() to register control type object.
+Call powercap_register_zone() to register a power zone (under a given
+control type), either as a top-level power zone or as a subzone of another
+power zone registered earlier.
+The number of constraints in a power zone and the corresponding callbacks have
+to be defined prior to calling powercap_register_zone() to register that zone.
+
+To Free a power zone call powercap_unregister_zone().
+To free a control type object call powercap_unregister_control_type().
+Detailed API can be generated using kernel-doc on include/linux/powercap.h.
diff --git a/Documentation/power/powercap/powercap.txt b/Documentation/power/powercap/powercap.txt
deleted file mode 100644
index 1e6ef164e07a..000000000000
--- a/Documentation/power/powercap/powercap.txt
+++ /dev/null
@@ -1,236 +0,0 @@
-Power Capping Framework
-==================================
-
-The power capping framework provides a consistent interface between the kernel
-and the user space that allows power capping drivers to expose the settings to
-user space in a uniform way.
-
-Terminology
-=========================
-The framework exposes power capping devices to user space via sysfs in the
-form of a tree of objects. The objects at the root level of the tree represent
-'control types', which correspond to different methods of power capping.  For
-example, the intel-rapl control type represents the Intel "Running Average
-Power Limit" (RAPL) technology, whereas the 'idle-injection' control type
-corresponds to the use of idle injection for controlling power.
-
-Power zones represent different parts of the system, which can be controlled and
-monitored using the power capping method determined by the control type the
-given zone belongs to. They each contain attributes for monitoring power, as
-well as controls represented in the form of power constraints.  If the parts of
-the system represented by different power zones are hierarchical (that is, one
-bigger part consists of multiple smaller parts that each have their own power
-controls), those power zones may also be organized in a hierarchy with one
-parent power zone containing multiple subzones and so on to reflect the power
-control topology of the system.  In that case, it is possible to apply power
-capping to a set of devices together using the parent power zone and if more
-fine grained control is required, it can be applied through the subzones.
-
-
-Example sysfs interface tree:
-
-/sys/devices/virtual/powercap
-??? intel-rapl
-    ??? intel-rapl:0
-    ?   ??? constraint_0_name
-    ?   ??? constraint_0_power_limit_uw
-    ?   ??? constraint_0_time_window_us
-    ?   ??? constraint_1_name
-    ?   ??? constraint_1_power_limit_uw
-    ?   ??? constraint_1_time_window_us
-    ?   ??? device -> ../../intel-rapl
-    ?   ??? energy_uj
-    ?   ??? intel-rapl:0:0
-    ?   ?   ??? constraint_0_name
-    ?   ?   ??? constraint_0_power_limit_uw
-    ?   ?   ??? constraint_0_time_window_us
-    ?   ?   ??? constraint_1_name
-    ?   ?   ??? constraint_1_power_limit_uw
-    ?   ?   ??? constraint_1_time_window_us
-    ?   ?   ??? device -> ../../intel-rapl:0
-    ?   ?   ??? energy_uj
-    ?   ?   ??? max_energy_range_uj
-    ?   ?   ??? name
-    ?   ?   ??? enabled
-    ?   ?   ??? power
-    ?   ?   ?   ??? async
-    ?   ?   ?   []
-    ?   ?   ??? subsystem -> ../../../../../../class/power_cap
-    ?   ?   ??? uevent
-    ?   ??? intel-rapl:0:1
-    ?   ?   ??? constraint_0_name
-    ?   ?   ??? constraint_0_power_limit_uw
-    ?   ?   ??? constraint_0_time_window_us
-    ?   ?   ??? constraint_1_name
-    ?   ?   ??? constraint_1_power_limit_uw
-    ?   ?   ??? constraint_1_time_window_us
-    ?   ?   ??? device -> ../../intel-rapl:0
-    ?   ?   ??? energy_uj
-    ?   ?   ??? max_energy_range_uj
-    ?   ?   ??? name
-    ?   ?   ??? enabled
-    ?   ?   ??? power
-    ?   ?   ?   ??? async
-    ?   ?   ?   []
-    ?   ?   ??? subsystem -> ../../../../../../class/power_cap
-    ?   ?   ??? uevent
-    ?   ??? max_energy_range_uj
-    ?   ??? max_power_range_uw
-    ?   ??? name
-    ?   ??? enabled
-    ?   ??? power
-    ?   ?   ??? async
-    ?   ?   []
-    ?   ??? subsystem -> ../../../../../class/power_cap
-    ?   ??? enabled
-    ?   ??? uevent
-    ??? intel-rapl:1
-    ?   ??? constraint_0_name
-    ?   ??? constraint_0_power_limit_uw
-    ?   ??? constraint_0_time_window_us
-    ?   ??? constraint_1_name
-    ?   ??? constraint_1_power_limit_uw
-    ?   ??? constraint_1_time_window_us
-    ?   ??? device -> ../../intel-rapl
-    ?   ??? energy_uj
-    ?   ??? intel-rapl:1:0
-    ?   ?   ??? constraint_0_name
-    ?   ?   ??? constraint_0_power_limit_uw
-    ?   ?   ??? constraint_0_time_window_us
-    ?   ?   ??? constraint_1_name
-    ?   ?   ??? constraint_1_power_limit_uw
-    ?   ?   ??? constraint_1_time_window_us
-    ?   ?   ??? device -> ../../intel-rapl:1
-    ?   ?   ??? energy_uj
-    ?   ?   ??? max_energy_range_uj
-    ?   ?   ??? name
-    ?   ?   ??? enabled
-    ?   ?   ??? power
-    ?   ?   ?   ??? async
-    ?   ?   ?   []
-    ?   ?   ??? subsystem -> ../../../../../../class/power_cap
-    ?   ?   ??? uevent
-    ?   ??? intel-rapl:1:1
-    ?   ?   ??? constraint_0_name
-    ?   ?   ??? constraint_0_power_limit_uw
-    ?   ?   ??? constraint_0_time_window_us
-    ?   ?   ??? constraint_1_name
-    ?   ?   ??? constraint_1_power_limit_uw
-    ?   ?   ??? constraint_1_time_window_us
-    ?   ?   ??? device -> ../../intel-rapl:1
-    ?   ?   ??? energy_uj
-    ?   ?   ??? max_energy_range_uj
-    ?   ?   ??? name
-    ?   ?   ??? enabled
-    ?   ?   ??? power
-    ?   ?   ?   ??? async
-    ?   ?   ?   []
-    ?   ?   ??? subsystem -> ../../../../../../class/power_cap
-    ?   ?   ??? uevent
-    ?   ??? max_energy_range_uj
-    ?   ??? max_power_range_uw
-    ?   ??? name
-    ?   ??? enabled
-    ?   ??? power
-    ?   ?   ??? async
-    ?   ?   []
-    ?   ??? subsystem -> ../../../../../class/power_cap
-    ?   ??? uevent
-    ??? power
-    ?   ??? async
-    ?   []
-    ??? subsystem -> ../../../../class/power_cap
-    ??? enabled
-    ??? uevent
-
-The above example illustrates a case in which the Intel RAPL technology,
-available in Intel® IA-64 and IA-32 Processor Architectures, is used. There is one
-control type called intel-rapl which contains two power zones, intel-rapl:0 and
-intel-rapl:1, representing CPU packages.  Each of these power zones contains
-two subzones, intel-rapl:j:0 and intel-rapl:j:1 (j = 0, 1), representing the
-"core" and the "uncore" parts of the given CPU package, respectively.  All of
-the zones and subzones contain energy monitoring attributes (energy_uj,
-max_energy_range_uj) and constraint attributes (constraint_*) allowing controls
-to be applied (the constraints in the 'package' power zones apply to the whole
-CPU packages and the subzone constraints only apply to the respective parts of
-the given package individually). Since Intel RAPL doesn't provide instantaneous
-power value, there is no power_uw attribute.
-
-In addition to that, each power zone contains a name attribute, allowing the
-part of the system represented by that zone to be identified.
-For example:
-
-cat /sys/class/power_cap/intel-rapl/intel-rapl:0/name
-package-0
-
-The Intel RAPL technology allows two constraints, short term and long term,
-with two different time windows to be applied to each power zone.  Thus for
-each zone there are 2 attributes representing the constraint names, 2 power
-limits and 2 attributes representing the sizes of the time windows. Such that,
-constraint_j_* attributes correspond to the jth constraint (j = 0,1).
-
-For example:
-	constraint_0_name
-	constraint_0_power_limit_uw
-	constraint_0_time_window_us
-	constraint_1_name
-	constraint_1_power_limit_uw
-	constraint_1_time_window_us
-
-Power Zone Attributes
-=================================
-Monitoring attributes
-----------------------
-
-energy_uj (rw): Current energy counter in micro joules. Write "0" to reset.
-If the counter can not be reset, then this attribute is read only.
-
-max_energy_range_uj (ro): Range of the above energy counter in micro-joules.
-
-power_uw (ro): Current power in micro watts.
-
-max_power_range_uw (ro): Range of the above power value in micro-watts.
-
-name (ro): Name of this power zone.
-
-It is possible that some domains have both power ranges and energy counter ranges;
-however, only one is mandatory.
-
-Constraints
-----------------
-constraint_X_power_limit_uw (rw): Power limit in micro watts, which should be
-applicable for the time window specified by "constraint_X_time_window_us".
-
-constraint_X_time_window_us (rw): Time window in micro seconds.
-
-constraint_X_name (ro): An optional name of the constraint
-
-constraint_X_max_power_uw(ro): Maximum allowed power in micro watts.
-
-constraint_X_min_power_uw(ro): Minimum allowed power in micro watts.
-
-constraint_X_max_time_window_us(ro): Maximum allowed time window in micro seconds.
-
-constraint_X_min_time_window_us(ro): Minimum allowed time window in micro seconds.
-
-Except power_limit_uw and time_window_us other fields are optional.
-
-Common zone and control type attributes
-----------------------------------------
-enabled (rw): Enable/Disable controls at zone level or for all zones using
-a control type.
-
-Power Cap Client Driver Interface
-==================================
-The API summary:
-
-Call powercap_register_control_type() to register control type object.
-Call powercap_register_zone() to register a power zone (under a given
-control type), either as a top-level power zone or as a subzone of another
-power zone registered earlier.
-The number of constraints in a power zone and the corresponding callbacks have
-to be defined prior to calling powercap_register_zone() to register that zone.
-
-To Free a power zone call powercap_unregister_zone().
-To free a control type object call powercap_unregister_control_type().
-Detailed API can be generated using kernel-doc on include/linux/powercap.h.
diff --git a/Documentation/power/regulator/consumer.rst b/Documentation/power/regulator/consumer.rst
new file mode 100644
index 000000000000..0cd8cc1275a7
--- /dev/null
+++ b/Documentation/power/regulator/consumer.rst
@@ -0,0 +1,229 @@
+===================================
+Regulator Consumer Driver Interface
+===================================
+
+This text describes the regulator interface for consumer device drivers.
+Please see overview.txt for a description of the terms used in this text.
+
+
+1. Consumer Regulator Access (static & dynamic drivers)
+=======================================================
+
+A consumer driver can get access to its supply regulator by calling ::
+
+	regulator = regulator_get(dev, "Vcc");
+
+The consumer passes in its struct device pointer and power supply ID. The core
+then finds the correct regulator by consulting a machine specific lookup table.
+If the lookup is successful then this call will return a pointer to the struct
+regulator that supplies this consumer.
+
+To release the regulator the consumer driver should call ::
+
+	regulator_put(regulator);
+
+Consumers can be supplied by more than one regulator e.g. codec consumer with
+analog and digital supplies ::
+
+	digital = regulator_get(dev, "Vcc");  /* digital core */
+	analog = regulator_get(dev, "Avdd");  /* analog */
+
+The regulator access functions regulator_get() and regulator_put() will
+usually be called in your device drivers probe() and remove() respectively.
+
+
+2. Regulator Output Enable & Disable (static & dynamic drivers)
+===============================================================
+
+
+A consumer can enable its power supply by calling::
+
+	int regulator_enable(regulator);
+
+NOTE:
+  The supply may already be enabled before regulator_enabled() is called.
+  This may happen if the consumer shares the regulator or the regulator has been
+  previously enabled by bootloader or kernel board initialization code.
+
+A consumer can determine if a regulator is enabled by calling::
+
+	int regulator_is_enabled(regulator);
+
+This will return > zero when the regulator is enabled.
+
+
+A consumer can disable its supply when no longer needed by calling::
+
+	int regulator_disable(regulator);
+
+NOTE:
+  This may not disable the supply if it's shared with other consumers. The
+  regulator will only be disabled when the enabled reference count is zero.
+
+Finally, a regulator can be forcefully disabled in the case of an emergency::
+
+	int regulator_force_disable(regulator);
+
+NOTE:
+  this will immediately and forcefully shutdown the regulator output. All
+  consumers will be powered off.
+
+
+3. Regulator Voltage Control & Status (dynamic drivers)
+=======================================================
+
+Some consumer drivers need to be able to dynamically change their supply
+voltage to match system operating points. e.g. CPUfreq drivers can scale
+voltage along with frequency to save power, SD drivers may need to select the
+correct card voltage, etc.
+
+Consumers can control their supply voltage by calling::
+
+	int regulator_set_voltage(regulator, min_uV, max_uV);
+
+Where min_uV and max_uV are the minimum and maximum acceptable voltages in
+microvolts.
+
+NOTE: this can be called when the regulator is enabled or disabled. If called
+when enabled, then the voltage changes instantly, otherwise the voltage
+configuration changes and the voltage is physically set when the regulator is
+next enabled.
+
+The regulators configured voltage output can be found by calling::
+
+	int regulator_get_voltage(regulator);
+
+NOTE:
+  get_voltage() will return the configured output voltage whether the
+  regulator is enabled or disabled and should NOT be used to determine regulator
+  output state. However this can be used in conjunction with is_enabled() to
+  determine the regulator physical output voltage.
+
+
+4. Regulator Current Limit Control & Status (dynamic drivers)
+=============================================================
+
+Some consumer drivers need to be able to dynamically change their supply
+current limit to match system operating points. e.g. LCD backlight driver can
+change the current limit to vary the backlight brightness, USB drivers may want
+to set the limit to 500mA when supplying power.
+
+Consumers can control their supply current limit by calling::
+
+	int regulator_set_current_limit(regulator, min_uA, max_uA);
+
+Where min_uA and max_uA are the minimum and maximum acceptable current limit in
+microamps.
+
+NOTE:
+  this can be called when the regulator is enabled or disabled. If called
+  when enabled, then the current limit changes instantly, otherwise the current
+  limit configuration changes and the current limit is physically set when the
+  regulator is next enabled.
+
+A regulators current limit can be found by calling::
+
+	int regulator_get_current_limit(regulator);
+
+NOTE:
+  get_current_limit() will return the current limit whether the regulator
+  is enabled or disabled and should not be used to determine regulator current
+  load.
+
+
+5. Regulator Operating Mode Control & Status (dynamic drivers)
+==============================================================
+
+Some consumers can further save system power by changing the operating mode of
+their supply regulator to be more efficient when the consumers operating state
+changes. e.g. consumer driver is idle and subsequently draws less current
+
+Regulator operating mode can be changed indirectly or directly.
+
+Indirect operating mode control.
+--------------------------------
+Consumer drivers can request a change in their supply regulator operating mode
+by calling::
+
+	int regulator_set_load(struct regulator *regulator, int load_uA);
+
+This will cause the core to recalculate the total load on the regulator (based
+on all its consumers) and change operating mode (if necessary and permitted)
+to best match the current operating load.
+
+The load_uA value can be determined from the consumer's datasheet. e.g. most
+datasheets have tables showing the maximum current consumed in certain
+situations.
+
+Most consumers will use indirect operating mode control since they have no
+knowledge of the regulator or whether the regulator is shared with other
+consumers.
+
+Direct operating mode control.
+------------------------------
+
+Bespoke or tightly coupled drivers may want to directly control regulator
+operating mode depending on their operating point. This can be achieved by
+calling::
+
+	int regulator_set_mode(struct regulator *regulator, unsigned int mode);
+	unsigned int regulator_get_mode(struct regulator *regulator);
+
+Direct mode will only be used by consumers that *know* about the regulator and
+are not sharing the regulator with other consumers.
+
+
+6. Regulator Events
+===================
+
+Regulators can notify consumers of external events. Events could be received by
+consumers under regulator stress or failure conditions.
+
+Consumers can register interest in regulator events by calling::
+
+	int regulator_register_notifier(struct regulator *regulator,
+					struct notifier_block *nb);
+
+Consumers can unregister interest by calling::
+
+	int regulator_unregister_notifier(struct regulator *regulator,
+					  struct notifier_block *nb);
+
+Regulators use the kernel notifier framework to send event to their interested
+consumers.
+
+7. Regulator Direct Register Access
+===================================
+
+Some kinds of power management hardware or firmware are designed such that
+they need to do low-level hardware access to regulators, with no involvement
+from the kernel. Examples of such devices are:
+
+- clocksource with a voltage-controlled oscillator and control logic to change
+  the supply voltage over I2C to achieve a desired output clock rate
+- thermal management firmware that can issue an arbitrary I2C transaction to
+  perform system poweroff during overtemperature conditions
+
+To set up such a device/firmware, various parameters like I2C address of the
+regulator, addresses of various regulator registers etc. need to be configured
+to it. The regulator framework provides the following helpers for querying
+these details.
+
+Bus-specific details, like I2C addresses or transfer rates are handled by the
+regmap framework. To get the regulator's regmap (if supported), use::
+
+	struct regmap *regulator_get_regmap(struct regulator *regulator);
+
+To obtain the hardware register offset and bitmask for the regulator's voltage
+selector register, use::
+
+	int regulator_get_hardware_vsel_register(struct regulator *regulator,
+						 unsigned *vsel_reg,
+						 unsigned *vsel_mask);
+
+To convert a regulator framework voltage selector code (used by
+regulator_list_voltage) to a hardware-specific voltage selector that can be
+directly written to the voltage selector register, use::
+
+	int regulator_list_hardware_vsel(struct regulator *regulator,
+					 unsigned selector);
diff --git a/Documentation/power/regulator/consumer.txt b/Documentation/power/regulator/consumer.txt
deleted file mode 100644
index e51564c1a140..000000000000
--- a/Documentation/power/regulator/consumer.txt
+++ /dev/null
@@ -1,218 +0,0 @@
-Regulator Consumer Driver Interface
-===================================
-
-This text describes the regulator interface for consumer device drivers.
-Please see overview.txt for a description of the terms used in this text.
-
-
-1. Consumer Regulator Access (static & dynamic drivers)
-=======================================================
-
-A consumer driver can get access to its supply regulator by calling :-
-
-regulator = regulator_get(dev, "Vcc");
-
-The consumer passes in its struct device pointer and power supply ID. The core
-then finds the correct regulator by consulting a machine specific lookup table.
-If the lookup is successful then this call will return a pointer to the struct
-regulator that supplies this consumer.
-
-To release the regulator the consumer driver should call :-
-
-regulator_put(regulator);
-
-Consumers can be supplied by more than one regulator e.g. codec consumer with
-analog and digital supplies :-
-
-digital = regulator_get(dev, "Vcc");  /* digital core */
-analog = regulator_get(dev, "Avdd");  /* analog */
-
-The regulator access functions regulator_get() and regulator_put() will
-usually be called in your device drivers probe() and remove() respectively.
-
-
-2. Regulator Output Enable & Disable (static & dynamic drivers)
-====================================================================
-
-A consumer can enable its power supply by calling:-
-
-int regulator_enable(regulator);
-
-NOTE: The supply may already be enabled before regulator_enabled() is called.
-This may happen if the consumer shares the regulator or the regulator has been
-previously enabled by bootloader or kernel board initialization code.
-
-A consumer can determine if a regulator is enabled by calling :-
-
-int regulator_is_enabled(regulator);
-
-This will return > zero when the regulator is enabled.
-
-
-A consumer can disable its supply when no longer needed by calling :-
-
-int regulator_disable(regulator);
-
-NOTE: This may not disable the supply if it's shared with other consumers. The
-regulator will only be disabled when the enabled reference count is zero.
-
-Finally, a regulator can be forcefully disabled in the case of an emergency :-
-
-int regulator_force_disable(regulator);
-
-NOTE: this will immediately and forcefully shutdown the regulator output. All
-consumers will be powered off.
-
-
-3. Regulator Voltage Control & Status (dynamic drivers)
-======================================================
-
-Some consumer drivers need to be able to dynamically change their supply
-voltage to match system operating points. e.g. CPUfreq drivers can scale
-voltage along with frequency to save power, SD drivers may need to select the
-correct card voltage, etc.
-
-Consumers can control their supply voltage by calling :-
-
-int regulator_set_voltage(regulator, min_uV, max_uV);
-
-Where min_uV and max_uV are the minimum and maximum acceptable voltages in
-microvolts.
-
-NOTE: this can be called when the regulator is enabled or disabled. If called
-when enabled, then the voltage changes instantly, otherwise the voltage
-configuration changes and the voltage is physically set when the regulator is
-next enabled.
-
-The regulators configured voltage output can be found by calling :-
-
-int regulator_get_voltage(regulator);
-
-NOTE: get_voltage() will return the configured output voltage whether the
-regulator is enabled or disabled and should NOT be used to determine regulator
-output state. However this can be used in conjunction with is_enabled() to
-determine the regulator physical output voltage.
-
-
-4. Regulator Current Limit Control & Status (dynamic drivers)
-===========================================================
-
-Some consumer drivers need to be able to dynamically change their supply
-current limit to match system operating points. e.g. LCD backlight driver can
-change the current limit to vary the backlight brightness, USB drivers may want
-to set the limit to 500mA when supplying power.
-
-Consumers can control their supply current limit by calling :-
-
-int regulator_set_current_limit(regulator, min_uA, max_uA);
-
-Where min_uA and max_uA are the minimum and maximum acceptable current limit in
-microamps.
-
-NOTE: this can be called when the regulator is enabled or disabled. If called
-when enabled, then the current limit changes instantly, otherwise the current
-limit configuration changes and the current limit is physically set when the
-regulator is next enabled.
-
-A regulators current limit can be found by calling :-
-
-int regulator_get_current_limit(regulator);
-
-NOTE: get_current_limit() will return the current limit whether the regulator
-is enabled or disabled and should not be used to determine regulator current
-load.
-
-
-5. Regulator Operating Mode Control & Status (dynamic drivers)
-=============================================================
-
-Some consumers can further save system power by changing the operating mode of
-their supply regulator to be more efficient when the consumers operating state
-changes. e.g. consumer driver is idle and subsequently draws less current
-
-Regulator operating mode can be changed indirectly or directly.
-
-Indirect operating mode control.
---------------------------------
-Consumer drivers can request a change in their supply regulator operating mode
-by calling :-
-
-int regulator_set_load(struct regulator *regulator, int load_uA);
-
-This will cause the core to recalculate the total load on the regulator (based
-on all its consumers) and change operating mode (if necessary and permitted)
-to best match the current operating load.
-
-The load_uA value can be determined from the consumer's datasheet. e.g. most
-datasheets have tables showing the maximum current consumed in certain
-situations.
-
-Most consumers will use indirect operating mode control since they have no
-knowledge of the regulator or whether the regulator is shared with other
-consumers.
-
-Direct operating mode control.
-------------------------------
-Bespoke or tightly coupled drivers may want to directly control regulator
-operating mode depending on their operating point. This can be achieved by
-calling :-
-
-int regulator_set_mode(struct regulator *regulator, unsigned int mode);
-unsigned int regulator_get_mode(struct regulator *regulator);
-
-Direct mode will only be used by consumers that *know* about the regulator and
-are not sharing the regulator with other consumers.
-
-
-6. Regulator Events
-===================
-Regulators can notify consumers of external events. Events could be received by
-consumers under regulator stress or failure conditions.
-
-Consumers can register interest in regulator events by calling :-
-
-int regulator_register_notifier(struct regulator *regulator,
-			      struct notifier_block *nb);
-
-Consumers can unregister interest by calling :-
-
-int regulator_unregister_notifier(struct regulator *regulator,
-				struct notifier_block *nb);
-
-Regulators use the kernel notifier framework to send event to their interested
-consumers.
-
-7. Regulator Direct Register Access
-===================================
-Some kinds of power management hardware or firmware are designed such that
-they need to do low-level hardware access to regulators, with no involvement
-from the kernel. Examples of such devices are:
-
-- clocksource with a voltage-controlled oscillator and control logic to change
-  the supply voltage over I2C to achieve a desired output clock rate
-- thermal management firmware that can issue an arbitrary I2C transaction to
-  perform system poweroff during overtemperature conditions
-
-To set up such a device/firmware, various parameters like I2C address of the
-regulator, addresses of various regulator registers etc. need to be configured
-to it. The regulator framework provides the following helpers for querying
-these details.
-
-Bus-specific details, like I2C addresses or transfer rates are handled by the
-regmap framework. To get the regulator's regmap (if supported), use :-
-
-struct regmap *regulator_get_regmap(struct regulator *regulator);
-
-To obtain the hardware register offset and bitmask for the regulator's voltage
-selector register, use :-
-
-int regulator_get_hardware_vsel_register(struct regulator *regulator,
-					 unsigned *vsel_reg,
-					 unsigned *vsel_mask);
-
-To convert a regulator framework voltage selector code (used by
-regulator_list_voltage) to a hardware-specific voltage selector that can be
-directly written to the voltage selector register, use :-
-
-int regulator_list_hardware_vsel(struct regulator *regulator,
-				 unsigned selector);
diff --git a/Documentation/power/regulator/design.rst b/Documentation/power/regulator/design.rst
new file mode 100644
index 000000000000..3b09c6841dc4
--- /dev/null
+++ b/Documentation/power/regulator/design.rst
@@ -0,0 +1,38 @@
+==========================
+Regulator API design notes
+==========================
+
+This document provides a brief, partially structured, overview of some
+of the design considerations which impact the regulator API design.
+
+Safety
+------
+
+ - Errors in regulator configuration can have very serious consequences
+   for the system, potentially including lasting hardware damage.
+ - It is not possible to automatically determine the power configuration
+   of the system - software-equivalent variants of the same chip may
+   have different power requirements, and not all components with power
+   requirements are visible to software.
+
+.. note::
+
+     The API should make no changes to the hardware state unless it has
+     specific knowledge that these changes are safe to perform on this
+     particular system.
+
+Consumer use cases
+------------------
+
+ - The overwhelming majority of devices in a system will have no
+   requirement to do any runtime configuration of their power beyond
+   being able to turn it on or off.
+
+ - Many of the power supplies in the system will be shared between many
+   different consumers.
+
+.. note::
+
+     The consumer API should be structured so that these use cases are
+     very easy to handle and so that consumers will work with shared
+     supplies without any additional effort.
diff --git a/Documentation/power/regulator/design.txt b/Documentation/power/regulator/design.txt
deleted file mode 100644
index fdd919b96830..000000000000
--- a/Documentation/power/regulator/design.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Regulator API design notes
-==========================
-
-This document provides a brief, partially structured, overview of some
-of the design considerations which impact the regulator API design.
-
-Safety
-------
-
- - Errors in regulator configuration can have very serious consequences
-   for the system, potentially including lasting hardware damage.
- - It is not possible to automatically determine the power configuration
-   of the system - software-equivalent variants of the same chip may
-   have different power requirements, and not all components with power
-   requirements are visible to software.
-
-  => The API should make no changes to the hardware state unless it has
-     specific knowledge that these changes are safe to perform on this
-     particular system.
-
-Consumer use cases
-------------------
-
- - The overwhelming majority of devices in a system will have no
-   requirement to do any runtime configuration of their power beyond
-   being able to turn it on or off.
-
- - Many of the power supplies in the system will be shared between many
-   different consumers.
-
-  => The consumer API should be structured so that these use cases are
-     very easy to handle and so that consumers will work with shared
-     supplies without any additional effort.
diff --git a/Documentation/power/regulator/machine.rst b/Documentation/power/regulator/machine.rst
new file mode 100644
index 000000000000..22fffefaa3ad
--- /dev/null
+++ b/Documentation/power/regulator/machine.rst
@@ -0,0 +1,97 @@
+==================================
+Regulator Machine Driver Interface
+==================================
+
+The regulator machine driver interface is intended for board/machine specific
+initialisation code to configure the regulator subsystem.
+
+Consider the following machine::
+
+  Regulator-1 -+-> Regulator-2 --> [Consumer A @ 1.8 - 2.0V]
+               |
+               +-> [Consumer B @ 3.3V]
+
+The drivers for consumers A & B must be mapped to the correct regulator in
+order to control their power supplies. This mapping can be achieved in machine
+initialisation code by creating a struct regulator_consumer_supply for
+each regulator::
+
+  struct regulator_consumer_supply {
+	const char *dev_name;	/* consumer dev_name() */
+	const char *supply;	/* consumer supply - e.g. "vcc" */
+  };
+
+e.g. for the machine above::
+
+  static struct regulator_consumer_supply regulator1_consumers[] = {
+	REGULATOR_SUPPLY("Vcc", "consumer B"),
+  };
+
+  static struct regulator_consumer_supply regulator2_consumers[] = {
+	REGULATOR_SUPPLY("Vcc", "consumer A"),
+  };
+
+This maps Regulator-1 to the 'Vcc' supply for Consumer B and maps Regulator-2
+to the 'Vcc' supply for Consumer A.
+
+Constraints can now be registered by defining a struct regulator_init_data
+for each regulator power domain. This structure also maps the consumers
+to their supply regulators::
+
+  static struct regulator_init_data regulator1_data = {
+	.constraints = {
+		.name = "Regulator-1",
+		.min_uV = 3300000,
+		.max_uV = 3300000,
+		.valid_modes_mask = REGULATOR_MODE_NORMAL,
+	},
+	.num_consumer_supplies = ARRAY_SIZE(regulator1_consumers),
+	.consumer_supplies = regulator1_consumers,
+  };
+
+The name field should be set to something that is usefully descriptive
+for the board for configuration of supplies for other regulators and
+for use in logging and other diagnostic output.  Normally the name
+used for the supply rail in the schematic is a good choice.  If no
+name is provided then the subsystem will choose one.
+
+Regulator-1 supplies power to Regulator-2. This relationship must be registered
+with the core so that Regulator-1 is also enabled when Consumer A enables its
+supply (Regulator-2). The supply regulator is set by the supply_regulator
+field below and co::
+
+  static struct regulator_init_data regulator2_data = {
+	.supply_regulator = "Regulator-1",
+	.constraints = {
+		.min_uV = 1800000,
+		.max_uV = 2000000,
+		.valid_ops_mask = REGULATOR_CHANGE_VOLTAGE,
+		.valid_modes_mask = REGULATOR_MODE_NORMAL,
+	},
+	.num_consumer_supplies = ARRAY_SIZE(regulator2_consumers),
+	.consumer_supplies = regulator2_consumers,
+  };
+
+Finally the regulator devices must be registered in the usual manner::
+
+  static struct platform_device regulator_devices[] = {
+	{
+		.name = "regulator",
+		.id = DCDC_1,
+		.dev = {
+			.platform_data = &regulator1_data,
+		},
+	},
+	{
+		.name = "regulator",
+		.id = DCDC_2,
+		.dev = {
+			.platform_data = &regulator2_data,
+		},
+	},
+  };
+  /* register regulator 1 device */
+  platform_device_register(&regulator_devices[0]);
+
+  /* register regulator 2 device */
+  platform_device_register(&regulator_devices[1]);
diff --git a/Documentation/power/regulator/machine.txt b/Documentation/power/regulator/machine.txt
deleted file mode 100644
index eff4dcaaa252..000000000000
--- a/Documentation/power/regulator/machine.txt
+++ /dev/null
@@ -1,96 +0,0 @@
-Regulator Machine Driver Interface
-===================================
-
-The regulator machine driver interface is intended for board/machine specific
-initialisation code to configure the regulator subsystem.
-
-Consider the following machine :-
-
-  Regulator-1 -+-> Regulator-2 --> [Consumer A @ 1.8 - 2.0V]
-               |
-               +-> [Consumer B @ 3.3V]
-
-The drivers for consumers A & B must be mapped to the correct regulator in
-order to control their power supplies. This mapping can be achieved in machine
-initialisation code by creating a struct regulator_consumer_supply for
-each regulator.
-
-struct regulator_consumer_supply {
-	const char *dev_name;	/* consumer dev_name() */
-	const char *supply;	/* consumer supply - e.g. "vcc" */
-};
-
-e.g. for the machine above
-
-static struct regulator_consumer_supply regulator1_consumers[] = {
-	REGULATOR_SUPPLY("Vcc", "consumer B"),
-};
-
-static struct regulator_consumer_supply regulator2_consumers[] = {
-	REGULATOR_SUPPLY("Vcc", "consumer A"),
-};
-
-This maps Regulator-1 to the 'Vcc' supply for Consumer B and maps Regulator-2
-to the 'Vcc' supply for Consumer A.
-
-Constraints can now be registered by defining a struct regulator_init_data
-for each regulator power domain. This structure also maps the consumers
-to their supply regulators :-
-
-static struct regulator_init_data regulator1_data = {
-	.constraints = {
-		.name = "Regulator-1",
-		.min_uV = 3300000,
-		.max_uV = 3300000,
-		.valid_modes_mask = REGULATOR_MODE_NORMAL,
-	},
-	.num_consumer_supplies = ARRAY_SIZE(regulator1_consumers),
-	.consumer_supplies = regulator1_consumers,
-};
-
-The name field should be set to something that is usefully descriptive
-for the board for configuration of supplies for other regulators and
-for use in logging and other diagnostic output.  Normally the name
-used for the supply rail in the schematic is a good choice.  If no
-name is provided then the subsystem will choose one.
-
-Regulator-1 supplies power to Regulator-2. This relationship must be registered
-with the core so that Regulator-1 is also enabled when Consumer A enables its
-supply (Regulator-2). The supply regulator is set by the supply_regulator
-field below and co:-
-
-static struct regulator_init_data regulator2_data = {
-	.supply_regulator = "Regulator-1",
-	.constraints = {
-		.min_uV = 1800000,
-		.max_uV = 2000000,
-		.valid_ops_mask = REGULATOR_CHANGE_VOLTAGE,
-		.valid_modes_mask = REGULATOR_MODE_NORMAL,
-	},
-	.num_consumer_supplies = ARRAY_SIZE(regulator2_consumers),
-	.consumer_supplies = regulator2_consumers,
-};
-
-Finally the regulator devices must be registered in the usual manner.
-
-static struct platform_device regulator_devices[] = {
-	{
-		.name = "regulator",
-		.id = DCDC_1,
-		.dev = {
-			.platform_data = &regulator1_data,
-		},
-	},
-	{
-		.name = "regulator",
-		.id = DCDC_2,
-		.dev = {
-			.platform_data = &regulator2_data,
-		},
-	},
-};
-/* register regulator 1 device */
-platform_device_register(&regulator_devices[0]);
-
-/* register regulator 2 device */
-platform_device_register(&regulator_devices[1]);
diff --git a/Documentation/power/regulator/overview.rst b/Documentation/power/regulator/overview.rst
new file mode 100644
index 000000000000..ee494c70a7c4
--- /dev/null
+++ b/Documentation/power/regulator/overview.rst
@@ -0,0 +1,178 @@
+=============================================
+Linux voltage and current regulator framework
+=============================================
+
+About
+=====
+
+This framework is designed to provide a standard kernel interface to control
+voltage and current regulators.
+
+The intention is to allow systems to dynamically control regulator power output
+in order to save power and prolong battery life. This applies to both voltage
+regulators (where voltage output is controllable) and current sinks (where
+current limit is controllable).
+
+(C) 2008  Wolfson Microelectronics PLC.
+
+Author: Liam Girdwood <lrg@slimlogic.co.uk>
+
+
+Nomenclature
+============
+
+Some terms used in this document:
+
+  - Regulator
+                 - Electronic device that supplies power to other devices.
+                   Most regulators can enable and disable their output while
+                   some can control their output voltage and or current.
+
+                   Input Voltage -> Regulator -> Output Voltage
+
+
+  - PMIC
+                 - Power Management IC. An IC that contains numerous
+                   regulators and often contains other subsystems.
+
+
+  - Consumer
+                 - Electronic device that is supplied power by a regulator.
+                   Consumers can be classified into two types:-
+
+                   Static: consumer does not change its supply voltage or
+                   current limit. It only needs to enable or disable its
+                   power supply. Its supply voltage is set by the hardware,
+                   bootloader, firmware or kernel board initialisation code.
+
+                   Dynamic: consumer needs to change its supply voltage or
+                   current limit to meet operation demands.
+
+
+  - Power Domain
+                 - Electronic circuit that is supplied its input power by the
+                   output power of a regulator, switch or by another power
+                   domain.
+
+                   The supply regulator may be behind a switch(s). i.e.::
+
+                     Regulator -+-> Switch-1 -+-> Switch-2 --> [Consumer A]
+                                |             |
+                                |             +-> [Consumer B], [Consumer C]
+                                |
+                                +-> [Consumer D], [Consumer E]
+
+                   That is one regulator and three power domains:
+
+                   - Domain 1: Switch-1, Consumers D & E.
+                   - Domain 2: Switch-2, Consumers B & C.
+                   - Domain 3: Consumer A.
+
+                   and this represents a "supplies" relationship:
+
+                   Domain-1 --> Domain-2 --> Domain-3.
+
+                   A power domain may have regulators that are supplied power
+                   by other regulators. i.e.::
+
+                     Regulator-1 -+-> Regulator-2 -+-> [Consumer A]
+                                  |
+                                  +-> [Consumer B]
+
+                   This gives us two regulators and two power domains:
+
+                   - Domain 1: Regulator-2, Consumer B.
+                   - Domain 2: Consumer A.
+
+                   and a "supplies" relationship:
+
+                   Domain-1 --> Domain-2
+
+
+  - Constraints
+                 - Constraints are used to define power levels for performance
+                   and hardware protection. Constraints exist at three levels:
+
+                   Regulator Level: This is defined by the regulator hardware
+                   operating parameters and is specified in the regulator
+                   datasheet. i.e.
+
+                     - voltage output is in the range 800mV -> 3500mV.
+                     - regulator current output limit is 20mA @ 5V but is
+                       10mA @ 10V.
+
+                   Power Domain Level: This is defined in software by kernel
+                   level board initialisation code. It is used to constrain a
+                   power domain to a particular power range. i.e.
+
+                     - Domain-1 voltage is 3300mV
+                     - Domain-2 voltage is 1400mV -> 1600mV
+                     - Domain-3 current limit is 0mA -> 20mA.
+
+                   Consumer Level: This is defined by consumer drivers
+                   dynamically setting voltage or current limit levels.
+
+                   e.g. a consumer backlight driver asks for a current increase
+                   from 5mA to 10mA to increase LCD illumination. This passes
+                   to through the levels as follows :-
+
+                   Consumer: need to increase LCD brightness. Lookup and
+                   request next current mA value in brightness table (the
+                   consumer driver could be used on several different
+                   personalities based upon the same reference device).
+
+                   Power Domain: is the new current limit within the domain
+                   operating limits for this domain and system state (e.g.
+                   battery power, USB power)
+
+                   Regulator Domains: is the new current limit within the
+                   regulator operating parameters for input/output voltage.
+
+                   If the regulator request passes all the constraint tests
+                   then the new regulator value is applied.
+
+
+Design
+======
+
+The framework is designed and targeted at SoC based devices but may also be
+relevant to non SoC devices and is split into the following four interfaces:-
+
+
+   1. Consumer driver interface.
+
+      This uses a similar API to the kernel clock interface in that consumer
+      drivers can get and put a regulator (like they can with clocks atm) and
+      get/set voltage, current limit, mode, enable and disable. This should
+      allow consumers complete control over their supply voltage and current
+      limit. This also compiles out if not in use so drivers can be reused in
+      systems with no regulator based power control.
+
+        See Documentation/power/regulator/consumer.rst
+
+   2. Regulator driver interface.
+
+      This allows regulator drivers to register their regulators and provide
+      operations to the core. It also has a notifier call chain for propagating
+      regulator events to clients.
+
+        See Documentation/power/regulator/regulator.rst
+
+   3. Machine interface.
+
+      This interface is for machine specific code and allows the creation of
+      voltage/current domains (with constraints) for each regulator. It can
+      provide regulator constraints that will prevent device damage through
+      overvoltage or overcurrent caused by buggy client drivers. It also
+      allows the creation of a regulator tree whereby some regulators are
+      supplied by others (similar to a clock tree).
+
+        See Documentation/power/regulator/machine.rst
+
+   4. Userspace ABI.
+
+      The framework also exports a lot of useful voltage/current/opmode data to
+      userspace via sysfs. This could be used to help monitor device power
+      consumption and status.
+
+        See Documentation/ABI/testing/sysfs-class-regulator
diff --git a/Documentation/power/regulator/overview.txt b/Documentation/power/regulator/overview.txt
deleted file mode 100644
index 721b4739ec32..000000000000
--- a/Documentation/power/regulator/overview.txt
+++ /dev/null
@@ -1,171 +0,0 @@
-Linux voltage and current regulator framework
-=============================================
-
-About
-=====
-
-This framework is designed to provide a standard kernel interface to control
-voltage and current regulators.
-
-The intention is to allow systems to dynamically control regulator power output
-in order to save power and prolong battery life. This applies to both voltage
-regulators (where voltage output is controllable) and current sinks (where
-current limit is controllable).
-
-(C) 2008  Wolfson Microelectronics PLC.
-Author: Liam Girdwood <lrg@slimlogic.co.uk>
-
-
-Nomenclature
-============
-
-Some terms used in this document:-
-
-  o Regulator    - Electronic device that supplies power to other devices.
-                   Most regulators can enable and disable their output while
-                   some can control their output voltage and or current.
-
-                   Input Voltage -> Regulator -> Output Voltage
-
-
-  o PMIC         - Power Management IC. An IC that contains numerous regulators
-                   and often contains other subsystems.
-
-
-  o Consumer     - Electronic device that is supplied power by a regulator.
-                   Consumers can be classified into two types:-
-
-                   Static: consumer does not change its supply voltage or
-                   current limit. It only needs to enable or disable its
-                   power supply. Its supply voltage is set by the hardware,
-                   bootloader, firmware or kernel board initialisation code.
-
-                   Dynamic: consumer needs to change its supply voltage or
-                   current limit to meet operation demands.
-
-
-  o Power Domain - Electronic circuit that is supplied its input power by the
-                   output power of a regulator, switch or by another power
-                   domain.
-
-                   The supply regulator may be behind a switch(s). i.e.
-
-                   Regulator -+-> Switch-1 -+-> Switch-2 --> [Consumer A]
-                              |             |
-                              |             +-> [Consumer B], [Consumer C]
-                              |
-                              +-> [Consumer D], [Consumer E]
-
-                   That is one regulator and three power domains:
-
-                   Domain 1: Switch-1, Consumers D & E.
-                   Domain 2: Switch-2, Consumers B & C.
-                   Domain 3: Consumer A.
-
-                   and this represents a "supplies" relationship:
-
-                   Domain-1 --> Domain-2 --> Domain-3.
-
-                   A power domain may have regulators that are supplied power
-                   by other regulators. i.e.
-
-                   Regulator-1 -+-> Regulator-2 -+-> [Consumer A]
-                                |
-                                +-> [Consumer B]
-
-                   This gives us two regulators and two power domains:
-
-                   Domain 1: Regulator-2, Consumer B.
-                   Domain 2: Consumer A.
-
-                   and a "supplies" relationship:
-
-                   Domain-1 --> Domain-2
-
-
-  o Constraints  - Constraints are used to define power levels for performance
-                   and hardware protection. Constraints exist at three levels:
-
-                   Regulator Level: This is defined by the regulator hardware
-                   operating parameters and is specified in the regulator
-                   datasheet. i.e.
-
-                     - voltage output is in the range 800mV -> 3500mV.
-                     - regulator current output limit is 20mA @ 5V but is
-                       10mA @ 10V.
-
-                   Power Domain Level: This is defined in software by kernel
-                   level board initialisation code. It is used to constrain a
-                   power domain to a particular power range. i.e.
-
-                     - Domain-1 voltage is 3300mV
-                     - Domain-2 voltage is 1400mV -> 1600mV
-                     - Domain-3 current limit is 0mA -> 20mA.
-
-                   Consumer Level: This is defined by consumer drivers
-                   dynamically setting voltage or current limit levels.
-
-                   e.g. a consumer backlight driver asks for a current increase
-                   from 5mA to 10mA to increase LCD illumination. This passes
-                   to through the levels as follows :-
-
-                   Consumer: need to increase LCD brightness. Lookup and
-                   request next current mA value in brightness table (the
-                   consumer driver could be used on several different
-                   personalities based upon the same reference device).
-
-                   Power Domain: is the new current limit within the domain
-                   operating limits for this domain and system state (e.g.
-                   battery power, USB power)
-
-                   Regulator Domains: is the new current limit within the
-                   regulator operating parameters for input/output voltage.
-
-                   If the regulator request passes all the constraint tests
-                   then the new regulator value is applied.
-
-
-Design
-======
-
-The framework is designed and targeted at SoC based devices but may also be
-relevant to non SoC devices and is split into the following four interfaces:-
-
-
-   1. Consumer driver interface.
-
-      This uses a similar API to the kernel clock interface in that consumer
-      drivers can get and put a regulator (like they can with clocks atm) and
-      get/set voltage, current limit, mode, enable and disable. This should
-      allow consumers complete control over their supply voltage and current
-      limit. This also compiles out if not in use so drivers can be reused in
-      systems with no regulator based power control.
-
-        See Documentation/power/regulator/consumer.txt
-
-   2. Regulator driver interface.
-
-      This allows regulator drivers to register their regulators and provide
-      operations to the core. It also has a notifier call chain for propagating
-      regulator events to clients.
-
-        See Documentation/power/regulator/regulator.txt
-
-   3. Machine interface.
-
-      This interface is for machine specific code and allows the creation of
-      voltage/current domains (with constraints) for each regulator. It can
-      provide regulator constraints that will prevent device damage through
-      overvoltage or overcurrent caused by buggy client drivers. It also
-      allows the creation of a regulator tree whereby some regulators are
-      supplied by others (similar to a clock tree).
-
-        See Documentation/power/regulator/machine.txt
-
-   4. Userspace ABI.
-
-      The framework also exports a lot of useful voltage/current/opmode data to
-      userspace via sysfs. This could be used to help monitor device power
-      consumption and status.
-
-        See Documentation/ABI/testing/sysfs-class-regulator
diff --git a/Documentation/power/regulator/regulator.rst b/Documentation/power/regulator/regulator.rst
new file mode 100644
index 000000000000..794b3256fbb9
--- /dev/null
+++ b/Documentation/power/regulator/regulator.rst
@@ -0,0 +1,32 @@
+==========================
+Regulator Driver Interface
+==========================
+
+The regulator driver interface is relatively simple and designed to allow
+regulator drivers to register their services with the core framework.
+
+
+Registration
+============
+
+Drivers can register a regulator by calling::
+
+  struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
+					   const struct regulator_config *config);
+
+This will register the regulator's capabilities and operations to the regulator
+core.
+
+Regulators can be unregistered by calling::
+
+  void regulator_unregister(struct regulator_dev *rdev);
+
+
+Regulator Events
+================
+
+Regulators can send events (e.g. overtemperature, undervoltage, etc) to
+consumer drivers by calling::
+
+  int regulator_notifier_call_chain(struct regulator_dev *rdev,
+				    unsigned long event, void *data);
diff --git a/Documentation/power/regulator/regulator.txt b/Documentation/power/regulator/regulator.txt
deleted file mode 100644
index b17e5833ce21..000000000000
--- a/Documentation/power/regulator/regulator.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Regulator Driver Interface
-==========================
-
-The regulator driver interface is relatively simple and designed to allow
-regulator drivers to register their services with the core framework.
-
-
-Registration
-============
-
-Drivers can register a regulator by calling :-
-
-struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
-					 const struct regulator_config *config);
-
-This will register the regulator's capabilities and operations to the regulator
-core.
-
-Regulators can be unregistered by calling :-
-
-void regulator_unregister(struct regulator_dev *rdev);
-
-
-Regulator Events
-================
-Regulators can send events (e.g. overtemperature, undervoltage, etc) to
-consumer drivers by calling :-
-
-int regulator_notifier_call_chain(struct regulator_dev *rdev,
-				  unsigned long event, void *data);
diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst
new file mode 100644
index 000000000000..2c2ec99b5088
--- /dev/null
+++ b/Documentation/power/runtime_pm.rst
@@ -0,0 +1,940 @@
+==================================================
+Runtime Power Management Framework for I/O Devices
+==================================================
+
+(C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+
+(C) 2010 Alan Stern <stern@rowland.harvard.edu>
+
+(C) 2014 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+1. Introduction
+===============
+
+Support for runtime power management (runtime PM) of I/O devices is provided
+at the power management core (PM core) level by means of:
+
+* The power management workqueue pm_wq in which bus types and device drivers can
+  put their PM-related work items.  It is strongly recommended that pm_wq be
+  used for queuing all work items related to runtime PM, because this allows
+  them to be synchronized with system-wide power transitions (suspend to RAM,
+  hibernation and resume from system sleep states).  pm_wq is declared in
+  include/linux/pm_runtime.h and defined in kernel/power/main.c.
+
+* A number of runtime PM fields in the 'power' member of 'struct device' (which
+  is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can
+  be used for synchronizing runtime PM operations with one another.
+
+* Three device runtime PM callbacks in 'struct dev_pm_ops' (defined in
+  include/linux/pm.h).
+
+* A set of helper functions defined in drivers/base/power/runtime.c that can be
+  used for carrying out runtime PM operations in such a way that the
+  synchronization between them is taken care of by the PM core.  Bus types and
+  device drivers are encouraged to use these functions.
+
+The runtime PM callbacks present in 'struct dev_pm_ops', the device runtime PM
+fields of 'struct dev_pm_info' and the core helper functions provided for
+runtime PM are described below.
+
+2. Device Runtime PM Callbacks
+==============================
+
+There are three device runtime PM callbacks defined in 'struct dev_pm_ops'::
+
+  struct dev_pm_ops {
+	...
+	int (*runtime_suspend)(struct device *dev);
+	int (*runtime_resume)(struct device *dev);
+	int (*runtime_idle)(struct device *dev);
+	...
+  };
+
+The ->runtime_suspend(), ->runtime_resume() and ->runtime_idle() callbacks
+are executed by the PM core for the device's subsystem that may be either of
+the following:
+
+  1. PM domain of the device, if the device's PM domain object, dev->pm_domain,
+     is present.
+
+  2. Device type of the device, if both dev->type and dev->type->pm are present.
+
+  3. Device class of the device, if both dev->class and dev->class->pm are
+     present.
+
+  4. Bus type of the device, if both dev->bus and dev->bus->pm are present.
+
+If the subsystem chosen by applying the above rules doesn't provide the relevant
+callback, the PM core will invoke the corresponding driver callback stored in
+dev->driver->pm directly (if present).
+
+The PM core always checks which callback to use in the order given above, so the
+priority order of callbacks from high to low is: PM domain, device type, class
+and bus type.  Moreover, the high-priority one will always take precedence over
+a low-priority one.  The PM domain, bus type, device type and class callbacks
+are referred to as subsystem-level callbacks in what follows.
+
+By default, the callbacks are always invoked in process context with interrupts
+enabled.  However, the pm_runtime_irq_safe() helper function can be used to tell
+the PM core that it is safe to run the ->runtime_suspend(), ->runtime_resume()
+and ->runtime_idle() callbacks for the given device in atomic context with
+interrupts disabled.  This implies that the callback routines in question must
+not block or sleep, but it also means that the synchronous helper functions
+listed at the end of Section 4 may be used for that device within an interrupt
+handler or generally in an atomic context.
+
+The subsystem-level suspend callback, if present, is _entirely_ _responsible_
+for handling the suspend of the device as appropriate, which may, but need not
+include executing the device driver's own ->runtime_suspend() callback (from the
+PM core's point of view it is not necessary to implement a ->runtime_suspend()
+callback in a device driver as long as the subsystem-level suspend callback
+knows what to do to handle the device).
+
+  * Once the subsystem-level suspend callback (or the driver suspend callback,
+    if invoked directly) has completed successfully for the given device, the PM
+    core regards the device as suspended, which need not mean that it has been
+    put into a low power state.  It is supposed to mean, however, that the
+    device will not process data and will not communicate with the CPU(s) and
+    RAM until the appropriate resume callback is executed for it.  The runtime
+    PM status of a device after successful execution of the suspend callback is
+    'suspended'.
+
+  * If the suspend callback returns -EBUSY or -EAGAIN, the device's runtime PM
+    status remains 'active', which means that the device _must_ be fully
+    operational afterwards.
+
+  * If the suspend callback returns an error code different from -EBUSY and
+    -EAGAIN, the PM core regards this as a fatal error and will refuse to run
+    the helper functions described in Section 4 for the device until its status
+    is directly set to  either 'active', or 'suspended' (the PM core provides
+    special helper functions for this purpose).
+
+In particular, if the driver requires remote wakeup capability (i.e. hardware
+mechanism allowing the device to request a change of its power state, such as
+PCI PME) for proper functioning and device_can_wakeup() returns 'false' for the
+device, then ->runtime_suspend() should return -EBUSY.  On the other hand, if
+device_can_wakeup() returns 'true' for the device and the device is put into a
+low-power state during the execution of the suspend callback, it is expected
+that remote wakeup will be enabled for the device.  Generally, remote wakeup
+should be enabled for all input devices put into low-power states at run time.
+
+The subsystem-level resume callback, if present, is **entirely responsible** for
+handling the resume of the device as appropriate, which may, but need not
+include executing the device driver's own ->runtime_resume() callback (from the
+PM core's point of view it is not necessary to implement a ->runtime_resume()
+callback in a device driver as long as the subsystem-level resume callback knows
+what to do to handle the device).
+
+  * Once the subsystem-level resume callback (or the driver resume callback, if
+    invoked directly) has completed successfully, the PM core regards the device
+    as fully operational, which means that the device _must_ be able to complete
+    I/O operations as needed.  The runtime PM status of the device is then
+    'active'.
+
+  * If the resume callback returns an error code, the PM core regards this as a
+    fatal error and will refuse to run the helper functions described in Section
+    4 for the device, until its status is directly set to either 'active', or
+    'suspended' (by means of special helper functions provided by the PM core
+    for this purpose).
+
+The idle callback (a subsystem-level one, if present, or the driver one) is
+executed by the PM core whenever the device appears to be idle, which is
+indicated to the PM core by two counters, the device's usage counter and the
+counter of 'active' children of the device.
+
+  * If any of these counters is decreased using a helper function provided by
+    the PM core and it turns out to be equal to zero, the other counter is
+    checked.  If that counter also is equal to zero, the PM core executes the
+    idle callback with the device as its argument.
+
+The action performed by the idle callback is totally dependent on the subsystem
+(or driver) in question, but the expected and recommended action is to check
+if the device can be suspended (i.e. if all of the conditions necessary for
+suspending the device are satisfied) and to queue up a suspend request for the
+device in that case.  If there is no idle callback, or if the callback returns
+0, then the PM core will attempt to carry out a runtime suspend of the device,
+also respecting devices configured for autosuspend.  In essence this means a
+call to pm_runtime_autosuspend() (do note that drivers needs to update the
+device last busy mark, pm_runtime_mark_last_busy(), to control the delay under
+this circumstance).  To prevent this (for example, if the callback routine has
+started a delayed suspend), the routine must return a non-zero value.  Negative
+error return codes are ignored by the PM core.
+
+The helper functions provided by the PM core, described in Section 4, guarantee
+that the following constraints are met with respect to runtime PM callbacks for
+one device:
+
+(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute
+    ->runtime_suspend() in parallel with ->runtime_resume() or with another
+    instance of ->runtime_suspend() for the same device) with the exception that
+    ->runtime_suspend() or ->runtime_resume() can be executed in parallel with
+    ->runtime_idle() (although ->runtime_idle() will not be started while any
+    of the other callbacks is being executed for the same device).
+
+(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active'
+    devices (i.e. the PM core will only execute ->runtime_idle() or
+    ->runtime_suspend() for the devices the runtime PM status of which is
+    'active').
+
+(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device
+    the usage counter of which is equal to zero _and_ either the counter of
+    'active' children of which is equal to zero, or the 'power.ignore_children'
+    flag of which is set.
+
+(4) ->runtime_resume() can only be executed for 'suspended' devices  (i.e. the
+    PM core will only execute ->runtime_resume() for the devices the runtime
+    PM status of which is 'suspended').
+
+Additionally, the helper functions provided by the PM core obey the following
+rules:
+
+  * If ->runtime_suspend() is about to be executed or there's a pending request
+    to execute it, ->runtime_idle() will not be executed for the same device.
+
+  * A request to execute or to schedule the execution of ->runtime_suspend()
+    will cancel any pending requests to execute ->runtime_idle() for the same
+    device.
+
+  * If ->runtime_resume() is about to be executed or there's a pending request
+    to execute it, the other callbacks will not be executed for the same device.
+
+  * A request to execute ->runtime_resume() will cancel any pending or
+    scheduled requests to execute the other callbacks for the same device,
+    except for scheduled autosuspends.
+
+3. Runtime PM Device Fields
+===========================
+
+The following device runtime PM fields are present in 'struct dev_pm_info', as
+defined in include/linux/pm.h:
+
+  `struct timer_list suspend_timer;`
+    - timer used for scheduling (delayed) suspend and autosuspend requests
+
+  `unsigned long timer_expires;`
+    - timer expiration time, in jiffies (if this is different from zero, the
+      timer is running and will expire at that time, otherwise the timer is not
+      running)
+
+  `struct work_struct work;`
+    - work structure used for queuing up requests (i.e. work items in pm_wq)
+
+  `wait_queue_head_t wait_queue;`
+    - wait queue used if any of the helper functions needs to wait for another
+      one to complete
+
+  `spinlock_t lock;`
+    - lock used for synchronization
+
+  `atomic_t usage_count;`
+    - the usage counter of the device
+
+  `atomic_t child_count;`
+    - the count of 'active' children of the device
+
+  `unsigned int ignore_children;`
+    - if set, the value of child_count is ignored (but still updated)
+
+  `unsigned int disable_depth;`
+    - used for disabling the helper functions (they work normally if this is
+      equal to zero); the initial value of it is 1 (i.e. runtime PM is
+      initially disabled for all devices)
+
+  `int runtime_error;`
+    - if set, there was a fatal error (one of the callbacks returned error code
+      as described in Section 2), so the helper functions will not work until
+      this flag is cleared; this is the error code returned by the failing
+      callback
+
+  `unsigned int idle_notification;`
+    - if set, ->runtime_idle() is being executed
+
+  `unsigned int request_pending;`
+    - if set, there's a pending request (i.e. a work item queued up into pm_wq)
+
+  `enum rpm_request request;`
+    - type of request that's pending (valid if request_pending is set)
+
+  `unsigned int deferred_resume;`
+    - set if ->runtime_resume() is about to be run while ->runtime_suspend() is
+      being executed for that device and it is not practical to wait for the
+      suspend to complete; means "start a resume as soon as you've suspended"
+
+  `enum rpm_status runtime_status;`
+    - the runtime PM status of the device; this field's initial value is
+      RPM_SUSPENDED, which means that each device is initially regarded by the
+      PM core as 'suspended', regardless of its real hardware status
+
+  `unsigned int runtime_auto;`
+    - if set, indicates that the user space has allowed the device driver to
+      power manage the device at run time via the /sys/devices/.../power/control
+      `interface;` it may only be modified with the help of the pm_runtime_allow()
+      and pm_runtime_forbid() helper functions
+
+  `unsigned int no_callbacks;`
+    - indicates that the device does not use the runtime PM callbacks (see
+      Section 8); it may be modified only by the pm_runtime_no_callbacks()
+      helper function
+
+  `unsigned int irq_safe;`
+    - indicates that the ->runtime_suspend() and ->runtime_resume() callbacks
+      will be invoked with the spinlock held and interrupts disabled
+
+  `unsigned int use_autosuspend;`
+    - indicates that the device's driver supports delayed autosuspend (see
+      Section 9); it may be modified only by the
+      pm_runtime{_dont}_use_autosuspend() helper functions
+
+  `unsigned int timer_autosuspends;`
+    - indicates that the PM core should attempt to carry out an autosuspend
+      when the timer expires rather than a normal suspend
+
+  `int autosuspend_delay;`
+    - the delay time (in milliseconds) to be used for autosuspend
+
+  `unsigned long last_busy;`
+    - the time (in jiffies) when the pm_runtime_mark_last_busy() helper
+      function was last called for this device; used in calculating inactivity
+      periods for autosuspend
+
+All of the above fields are members of the 'power' member of 'struct device'.
+
+4. Runtime PM Device Helper Functions
+=====================================
+
+The following runtime PM helper functions are defined in
+drivers/base/power/runtime.c and include/linux/pm_runtime.h:
+
+  `void pm_runtime_init(struct device *dev);`
+    - initialize the device runtime PM fields in 'struct dev_pm_info'
+
+  `void pm_runtime_remove(struct device *dev);`
+    - make sure that the runtime PM of the device will be disabled after
+      removing the device from device hierarchy
+
+  `int pm_runtime_idle(struct device *dev);`
+    - execute the subsystem-level idle callback for the device; returns an
+      error code on failure, where -EINPROGRESS means that ->runtime_idle() is
+      already being executed; if there is no callback or the callback returns 0
+      then run pm_runtime_autosuspend(dev) and return its result
+
+  `int pm_runtime_suspend(struct device *dev);`
+    - execute the subsystem-level suspend callback for the device; returns 0 on
+      success, 1 if the device's runtime PM status was already 'suspended', or
+      error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt
+      to suspend the device again in future and -EACCES means that
+      'power.disable_depth' is different from 0
+
+  `int pm_runtime_autosuspend(struct device *dev);`
+    - same as pm_runtime_suspend() except that the autosuspend delay is taken
+      `into account;` if pm_runtime_autosuspend_expiration() says the delay has
+      not yet expired then an autosuspend is scheduled for the appropriate time
+      and 0 is returned
+
+  `int pm_runtime_resume(struct device *dev);`
+    - execute the subsystem-level resume callback for the device; returns 0 on
+      success, 1 if the device's runtime PM status was already 'active' or
+      error code on failure, where -EAGAIN means it may be safe to attempt to
+      resume the device again in future, but 'power.runtime_error' should be
+      checked additionally, and -EACCES means that 'power.disable_depth' is
+      different from 0
+
+  `int pm_request_idle(struct device *dev);`
+    - submit a request to execute the subsystem-level idle callback for the
+      device (the request is represented by a work item in pm_wq); returns 0 on
+      success or error code if the request has not been queued up
+
+  `int pm_request_autosuspend(struct device *dev);`
+    - schedule the execution of the subsystem-level suspend callback for the
+      device when the autosuspend delay has expired; if the delay has already
+      expired then the work item is queued up immediately
+
+  `int pm_schedule_suspend(struct device *dev, unsigned int delay);`
+    - schedule the execution of the subsystem-level suspend callback for the
+      device in future, where 'delay' is the time to wait before queuing up a
+      suspend work item in pm_wq, in milliseconds (if 'delay' is zero, the work
+      item is queued up immediately); returns 0 on success, 1 if the device's PM
+      runtime status was already 'suspended', or error code if the request
+      hasn't been scheduled (or queued up if 'delay' is 0); if the execution of
+      ->runtime_suspend() is already scheduled and not yet expired, the new
+      value of 'delay' will be used as the time to wait
+
+  `int pm_request_resume(struct device *dev);`
+    - submit a request to execute the subsystem-level resume callback for the
+      device (the request is represented by a work item in pm_wq); returns 0 on
+      success, 1 if the device's runtime PM status was already 'active', or
+      error code if the request hasn't been queued up
+
+  `void pm_runtime_get_noresume(struct device *dev);`
+    - increment the device's usage counter
+
+  `int pm_runtime_get(struct device *dev);`
+    - increment the device's usage counter, run pm_request_resume(dev) and
+      return its result
+
+  `int pm_runtime_get_sync(struct device *dev);`
+    - increment the device's usage counter, run pm_runtime_resume(dev) and
+      return its result
+
+  `int pm_runtime_get_if_in_use(struct device *dev);`
+    - return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the
+      runtime PM status is RPM_ACTIVE and the runtime PM usage counter is
+      nonzero, increment the counter and return 1; otherwise return 0 without
+      changing the counter
+
+  `void pm_runtime_put_noidle(struct device *dev);`
+    - decrement the device's usage counter
+
+  `int pm_runtime_put(struct device *dev);`
+    - decrement the device's usage counter; if the result is 0 then run
+      pm_request_idle(dev) and return its result
+
+  `int pm_runtime_put_autosuspend(struct device *dev);`
+    - decrement the device's usage counter; if the result is 0 then run
+      pm_request_autosuspend(dev) and return its result
+
+  `int pm_runtime_put_sync(struct device *dev);`
+    - decrement the device's usage counter; if the result is 0 then run
+      pm_runtime_idle(dev) and return its result
+
+  `int pm_runtime_put_sync_suspend(struct device *dev);`
+    - decrement the device's usage counter; if the result is 0 then run
+      pm_runtime_suspend(dev) and return its result
+
+  `int pm_runtime_put_sync_autosuspend(struct device *dev);`
+    - decrement the device's usage counter; if the result is 0 then run
+      pm_runtime_autosuspend(dev) and return its result
+
+  `void pm_runtime_enable(struct device *dev);`
+    - decrement the device's 'power.disable_depth' field; if that field is equal
+      to zero, the runtime PM helper functions can execute subsystem-level
+      callbacks described in Section 2 for the device
+
+  `int pm_runtime_disable(struct device *dev);`
+    - increment the device's 'power.disable_depth' field (if the value of that
+      field was previously zero, this prevents subsystem-level runtime PM
+      callbacks from being run for the device), make sure that all of the
+      pending runtime PM operations on the device are either completed or
+      canceled; returns 1 if there was a resume request pending and it was
+      necessary to execute the subsystem-level resume callback for the device
+      to satisfy that request, otherwise 0 is returned
+
+  `int pm_runtime_barrier(struct device *dev);`
+    - check if there's a resume request pending for the device and resume it
+      (synchronously) in that case, cancel any other pending runtime PM requests
+      regarding it and wait for all runtime PM operations on it in progress to
+      complete; returns 1 if there was a resume request pending and it was
+      necessary to execute the subsystem-level resume callback for the device to
+      satisfy that request, otherwise 0 is returned
+
+  `void pm_suspend_ignore_children(struct device *dev, bool enable);`
+    - set/unset the power.ignore_children flag of the device
+
+  `int pm_runtime_set_active(struct device *dev);`
+    - clear the device's 'power.runtime_error' flag, set the device's runtime
+      PM status to 'active' and update its parent's counter of 'active'
+      children as appropriate (it is only valid to use this function if
+      'power.runtime_error' is set or 'power.disable_depth' is greater than
+      zero); it will fail and return error code if the device has a parent
+      which is not active and the 'power.ignore_children' flag of which is unset
+
+  `void pm_runtime_set_suspended(struct device *dev);`
+    - clear the device's 'power.runtime_error' flag, set the device's runtime
+      PM status to 'suspended' and update its parent's counter of 'active'
+      children as appropriate (it is only valid to use this function if
+      'power.runtime_error' is set or 'power.disable_depth' is greater than
+      zero)
+
+  `bool pm_runtime_active(struct device *dev);`
+    - return true if the device's runtime PM status is 'active' or its
+      'power.disable_depth' field is not equal to zero, or false otherwise
+
+  `bool pm_runtime_suspended(struct device *dev);`
+    - return true if the device's runtime PM status is 'suspended' and its
+      'power.disable_depth' field is equal to zero, or false otherwise
+
+  `bool pm_runtime_status_suspended(struct device *dev);`
+    - return true if the device's runtime PM status is 'suspended'
+
+  `void pm_runtime_allow(struct device *dev);`
+    - set the power.runtime_auto flag for the device and decrease its usage
+      counter (used by the /sys/devices/.../power/control interface to
+      effectively allow the device to be power managed at run time)
+
+  `void pm_runtime_forbid(struct device *dev);`
+    - unset the power.runtime_auto flag for the device and increase its usage
+      counter (used by the /sys/devices/.../power/control interface to
+      effectively prevent the device from being power managed at run time)
+
+  `void pm_runtime_no_callbacks(struct device *dev);`
+    - set the power.no_callbacks flag for the device and remove the runtime
+      PM attributes from /sys/devices/.../power (or prevent them from being
+      added when the device is registered)
+
+  `void pm_runtime_irq_safe(struct device *dev);`
+    - set the power.irq_safe flag for the device, causing the runtime-PM
+      callbacks to be invoked with interrupts off
+
+  `bool pm_runtime_is_irq_safe(struct device *dev);`
+    - return true if power.irq_safe flag was set for the device, causing
+      the runtime-PM callbacks to be invoked with interrupts off
+
+  `void pm_runtime_mark_last_busy(struct device *dev);`
+    - set the power.last_busy field to the current time
+
+  `void pm_runtime_use_autosuspend(struct device *dev);`
+    - set the power.use_autosuspend flag, enabling autosuspend delays; call
+      pm_runtime_get_sync if the flag was previously cleared and
+      power.autosuspend_delay is negative
+
+  `void pm_runtime_dont_use_autosuspend(struct device *dev);`
+    - clear the power.use_autosuspend flag, disabling autosuspend delays;
+      decrement the device's usage counter if the flag was previously set and
+      power.autosuspend_delay is negative; call pm_runtime_idle
+
+  `void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);`
+    - set the power.autosuspend_delay value to 'delay' (expressed in
+      milliseconds); if 'delay' is negative then runtime suspends are
+      prevented; if power.use_autosuspend is set, pm_runtime_get_sync may be
+      called or the device's usage counter may be decremented and
+      pm_runtime_idle called depending on if power.autosuspend_delay is
+      changed to or from a negative value; if power.use_autosuspend is clear,
+      pm_runtime_idle is called
+
+  `unsigned long pm_runtime_autosuspend_expiration(struct device *dev);`
+    - calculate the time when the current autosuspend delay period will expire,
+      based on power.last_busy and power.autosuspend_delay; if the delay time
+      is 1000 ms or larger then the expiration time is rounded up to the
+      nearest second; returns 0 if the delay period has already expired or
+      power.use_autosuspend isn't set, otherwise returns the expiration time
+      in jiffies
+
+It is safe to execute the following helper functions from interrupt context:
+
+- pm_request_idle()
+- pm_request_autosuspend()
+- pm_schedule_suspend()
+- pm_request_resume()
+- pm_runtime_get_noresume()
+- pm_runtime_get()
+- pm_runtime_put_noidle()
+- pm_runtime_put()
+- pm_runtime_put_autosuspend()
+- pm_runtime_enable()
+- pm_suspend_ignore_children()
+- pm_runtime_set_active()
+- pm_runtime_set_suspended()
+- pm_runtime_suspended()
+- pm_runtime_mark_last_busy()
+- pm_runtime_autosuspend_expiration()
+
+If pm_runtime_irq_safe() has been called for a device then the following helper
+functions may also be used in interrupt context:
+
+- pm_runtime_idle()
+- pm_runtime_suspend()
+- pm_runtime_autosuspend()
+- pm_runtime_resume()
+- pm_runtime_get_sync()
+- pm_runtime_put_sync()
+- pm_runtime_put_sync_suspend()
+- pm_runtime_put_sync_autosuspend()
+
+5. Runtime PM Initialization, Device Probing and Removal
+========================================================
+
+Initially, the runtime PM is disabled for all devices, which means that the
+majority of the runtime PM helper functions described in Section 4 will return
+-EAGAIN until pm_runtime_enable() is called for the device.
+
+In addition to that, the initial runtime PM status of all devices is
+'suspended', but it need not reflect the actual physical state of the device.
+Thus, if the device is initially active (i.e. it is able to process I/O), its
+runtime PM status must be changed to 'active', with the help of
+pm_runtime_set_active(), before pm_runtime_enable() is called for the device.
+
+However, if the device has a parent and the parent's runtime PM is enabled,
+calling pm_runtime_set_active() for the device will affect the parent, unless
+the parent's 'power.ignore_children' flag is set.  Namely, in that case the
+parent won't be able to suspend at run time, using the PM core's helper
+functions, as long as the child's status is 'active', even if the child's
+runtime PM is still disabled (i.e. pm_runtime_enable() hasn't been called for
+the child yet or pm_runtime_disable() has been called for it).  For this reason,
+once pm_runtime_set_active() has been called for the device, pm_runtime_enable()
+should be called for it too as soon as reasonably possible or its runtime PM
+status should be changed back to 'suspended' with the help of
+pm_runtime_set_suspended().
+
+If the default initial runtime PM status of the device (i.e. 'suspended')
+reflects the actual state of the device, its bus type's or its driver's
+->probe() callback will likely need to wake it up using one of the PM core's
+helper functions described in Section 4.  In that case, pm_runtime_resume()
+should be used.  Of course, for this purpose the device's runtime PM has to be
+enabled earlier by calling pm_runtime_enable().
+
+Note, if the device may execute pm_runtime calls during the probe (such as
+if it is registers with a subsystem that may call back in) then the
+pm_runtime_get_sync() call paired with a pm_runtime_put() call will be
+appropriate to ensure that the device is not put back to sleep during the
+probe. This can happen with systems such as the network device layer.
+
+It may be desirable to suspend the device once ->probe() has finished.
+Therefore the driver core uses the asynchronous pm_request_idle() to submit a
+request to execute the subsystem-level idle callback for the device at that
+time.  A driver that makes use of the runtime autosuspend feature, may want to
+update the last busy mark before returning from ->probe().
+
+Moreover, the driver core prevents runtime PM callbacks from racing with the bus
+notifier callback in __device_release_driver(), which is necessary, because the
+notifier is used by some subsystems to carry out operations affecting the
+runtime PM functionality.  It does so by calling pm_runtime_get_sync() before
+driver_sysfs_remove() and the BUS_NOTIFY_UNBIND_DRIVER notifications.  This
+resumes the device if it's in the suspended state and prevents it from
+being suspended again while those routines are being executed.
+
+To allow bus types and drivers to put devices into the suspended state by
+calling pm_runtime_suspend() from their ->remove() routines, the driver core
+executes pm_runtime_put_sync() after running the BUS_NOTIFY_UNBIND_DRIVER
+notifications in __device_release_driver().  This requires bus types and
+drivers to make their ->remove() callbacks avoid races with runtime PM directly,
+but also it allows of more flexibility in the handling of devices during the
+removal of their drivers.
+
+Drivers in ->remove() callback should undo the runtime PM changes done
+in ->probe(). Usually this means calling pm_runtime_disable(),
+pm_runtime_dont_use_autosuspend() etc.
+
+The user space can effectively disallow the driver of the device to power manage
+it at run time by changing the value of its /sys/devices/.../power/control
+attribute to "on", which causes pm_runtime_forbid() to be called.  In principle,
+this mechanism may also be used by the driver to effectively turn off the
+runtime power management of the device until the user space turns it on.
+Namely, during the initialization the driver can make sure that the runtime PM
+status of the device is 'active' and call pm_runtime_forbid().  It should be
+noted, however, that if the user space has already intentionally changed the
+value of /sys/devices/.../power/control to "auto" to allow the driver to power
+manage the device at run time, the driver may confuse it by using
+pm_runtime_forbid() this way.
+
+6. Runtime PM and System Sleep
+==============================
+
+Runtime PM and system sleep (i.e., system suspend and hibernation, also known
+as suspend-to-RAM and suspend-to-disk) interact with each other in a couple of
+ways.  If a device is active when a system sleep starts, everything is
+straightforward.  But what should happen if the device is already suspended?
+
+The device may have different wake-up settings for runtime PM and system sleep.
+For example, remote wake-up may be enabled for runtime suspend but disallowed
+for system sleep (device_may_wakeup(dev) returns 'false').  When this happens,
+the subsystem-level system suspend callback is responsible for changing the
+device's wake-up setting (it may leave that to the device driver's system
+suspend routine).  It may be necessary to resume the device and suspend it again
+in order to do so.  The same is true if the driver uses different power levels
+or other settings for runtime suspend and system sleep.
+
+During system resume, the simplest approach is to bring all devices back to full
+power, even if they had been suspended before the system suspend began.  There
+are several reasons for this, including:
+
+  * The device might need to switch power levels, wake-up settings, etc.
+
+  * Remote wake-up events might have been lost by the firmware.
+
+  * The device's children may need the device to be at full power in order
+    to resume themselves.
+
+  * The driver's idea of the device state may not agree with the device's
+    physical state.  This can happen during resume from hibernation.
+
+  * The device might need to be reset.
+
+  * Even though the device was suspended, if its usage counter was > 0 then most
+    likely it would need a runtime resume in the near future anyway.
+
+If the device had been suspended before the system suspend began and it's
+brought back to full power during resume, then its runtime PM status will have
+to be updated to reflect the actual post-system sleep status.  The way to do
+this is:
+
+	 - pm_runtime_disable(dev);
+	 - pm_runtime_set_active(dev);
+	 - pm_runtime_enable(dev);
+
+The PM core always increments the runtime usage counter before calling the
+->suspend() callback and decrements it after calling the ->resume() callback.
+Hence disabling runtime PM temporarily like this will not cause any runtime
+suspend attempts to be permanently lost.  If the usage count goes to zero
+following the return of the ->resume() callback, the ->runtime_idle() callback
+will be invoked as usual.
+
+On some systems, however, system sleep is not entered through a global firmware
+or hardware operation.  Instead, all hardware components are put into low-power
+states directly by the kernel in a coordinated way.  Then, the system sleep
+state effectively follows from the states the hardware components end up in
+and the system is woken up from that state by a hardware interrupt or a similar
+mechanism entirely under the kernel's control.  As a result, the kernel never
+gives control away and the states of all devices during resume are precisely
+known to it.  If that is the case and none of the situations listed above takes
+place (in particular, if the system is not waking up from hibernation), it may
+be more efficient to leave the devices that had been suspended before the system
+suspend began in the suspended state.
+
+To this end, the PM core provides a mechanism allowing some coordination between
+different levels of device hierarchy.  Namely, if a system suspend .prepare()
+callback returns a positive number for a device, that indicates to the PM core
+that the device appears to be runtime-suspended and its state is fine, so it
+may be left in runtime suspend provided that all of its descendants are also
+left in runtime suspend.  If that happens, the PM core will not execute any
+system suspend and resume callbacks for all of those devices, except for the
+complete callback, which is then entirely responsible for handling the device
+as appropriate.  This only applies to system suspend transitions that are not
+related to hibernation (see Documentation/driver-api/pm/devices.rst for more
+information).
+
+The PM core does its best to reduce the probability of race conditions between
+the runtime PM and system suspend/resume (and hibernation) callbacks by carrying
+out the following operations:
+
+  * During system suspend pm_runtime_get_noresume() is called for every device
+    right before executing the subsystem-level .prepare() callback for it and
+    pm_runtime_barrier() is called for every device right before executing the
+    subsystem-level .suspend() callback for it.  In addition to that the PM core
+    calls  __pm_runtime_disable() with 'false' as the second argument for every
+    device right before executing the subsystem-level .suspend_late() callback
+    for it.
+
+  * During system resume pm_runtime_enable() and pm_runtime_put() are called for
+    every device right after executing the subsystem-level .resume_early()
+    callback and right after executing the subsystem-level .complete() callback
+    for it, respectively.
+
+7. Generic subsystem callbacks
+
+Subsystems may wish to conserve code space by using the set of generic power
+management callbacks provided by the PM core, defined in
+driver/base/power/generic_ops.c:
+
+  `int pm_generic_runtime_suspend(struct device *dev);`
+    - invoke the ->runtime_suspend() callback provided by the driver of this
+      device and return its result, or return 0 if not defined
+
+  `int pm_generic_runtime_resume(struct device *dev);`
+    - invoke the ->runtime_resume() callback provided by the driver of this
+      device and return its result, or return 0 if not defined
+
+  `int pm_generic_suspend(struct device *dev);`
+    - if the device has not been suspended at run time, invoke the ->suspend()
+      callback provided by its driver and return its result, or return 0 if not
+      defined
+
+  `int pm_generic_suspend_noirq(struct device *dev);`
+    - if pm_runtime_suspended(dev) returns "false", invoke the ->suspend_noirq()
+      callback provided by the device's driver and return its result, or return
+      0 if not defined
+
+  `int pm_generic_resume(struct device *dev);`
+    - invoke the ->resume() callback provided by the driver of this device and,
+      if successful, change the device's runtime PM status to 'active'
+
+  `int pm_generic_resume_noirq(struct device *dev);`
+    - invoke the ->resume_noirq() callback provided by the driver of this device
+
+  `int pm_generic_freeze(struct device *dev);`
+    - if the device has not been suspended at run time, invoke the ->freeze()
+      callback provided by its driver and return its result, or return 0 if not
+      defined
+
+  `int pm_generic_freeze_noirq(struct device *dev);`
+    - if pm_runtime_suspended(dev) returns "false", invoke the ->freeze_noirq()
+      callback provided by the device's driver and return its result, or return
+      0 if not defined
+
+  `int pm_generic_thaw(struct device *dev);`
+    - if the device has not been suspended at run time, invoke the ->thaw()
+      callback provided by its driver and return its result, or return 0 if not
+      defined
+
+  `int pm_generic_thaw_noirq(struct device *dev);`
+    - if pm_runtime_suspended(dev) returns "false", invoke the ->thaw_noirq()
+      callback provided by the device's driver and return its result, or return
+      0 if not defined
+
+  `int pm_generic_poweroff(struct device *dev);`
+    - if the device has not been suspended at run time, invoke the ->poweroff()
+      callback provided by its driver and return its result, or return 0 if not
+      defined
+
+  `int pm_generic_poweroff_noirq(struct device *dev);`
+    - if pm_runtime_suspended(dev) returns "false", run the ->poweroff_noirq()
+      callback provided by the device's driver and return its result, or return
+      0 if not defined
+
+  `int pm_generic_restore(struct device *dev);`
+    - invoke the ->restore() callback provided by the driver of this device and,
+      if successful, change the device's runtime PM status to 'active'
+
+  `int pm_generic_restore_noirq(struct device *dev);`
+    - invoke the ->restore_noirq() callback provided by the device's driver
+
+These functions are the defaults used by the PM core, if a subsystem doesn't
+provide its own callbacks for ->runtime_idle(), ->runtime_suspend(),
+->runtime_resume(), ->suspend(), ->suspend_noirq(), ->resume(),
+->resume_noirq(), ->freeze(), ->freeze_noirq(), ->thaw(), ->thaw_noirq(),
+->poweroff(), ->poweroff_noirq(), ->restore(), ->restore_noirq() in the
+subsystem-level dev_pm_ops structure.
+
+Device drivers that wish to use the same function as a system suspend, freeze,
+poweroff and runtime suspend callback, and similarly for system resume, thaw,
+restore, and runtime resume, can achieve this with the help of the
+UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its
+last argument to NULL).
+
+8. "No-Callback" Devices
+========================
+
+Some "devices" are only logical sub-devices of their parent and cannot be
+power-managed on their own.  (The prototype example is a USB interface.  Entire
+USB devices can go into low-power mode or send wake-up requests, but neither is
+possible for individual interfaces.)  The drivers for these devices have no
+need of runtime PM callbacks; if the callbacks did exist, ->runtime_suspend()
+and ->runtime_resume() would always return 0 without doing anything else and
+->runtime_idle() would always call pm_runtime_suspend().
+
+Subsystems can tell the PM core about these devices by calling
+pm_runtime_no_callbacks().  This should be done after the device structure is
+initialized and before it is registered (although after device registration is
+also okay).  The routine will set the device's power.no_callbacks flag and
+prevent the non-debugging runtime PM sysfs attributes from being created.
+
+When power.no_callbacks is set, the PM core will not invoke the
+->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks.
+Instead it will assume that suspends and resumes always succeed and that idle
+devices should be suspended.
+
+As a consequence, the PM core will never directly inform the device's subsystem
+or driver about runtime power changes.  Instead, the driver for the device's
+parent must take responsibility for telling the device's driver when the
+parent's power state changes.
+
+9. Autosuspend, or automatically-delayed suspends
+=================================================
+
+Changing a device's power state isn't free; it requires both time and energy.
+A device should be put in a low-power state only when there's some reason to
+think it will remain in that state for a substantial time.  A common heuristic
+says that a device which hasn't been used for a while is liable to remain
+unused; following this advice, drivers should not allow devices to be suspended
+at runtime until they have been inactive for some minimum period.  Even when
+the heuristic ends up being non-optimal, it will still prevent devices from
+"bouncing" too rapidly between low-power and full-power states.
+
+The term "autosuspend" is an historical remnant.  It doesn't mean that the
+device is automatically suspended (the subsystem or driver still has to call
+the appropriate PM routines); rather it means that runtime suspends will
+automatically be delayed until the desired period of inactivity has elapsed.
+
+Inactivity is determined based on the power.last_busy field.  Drivers should
+call pm_runtime_mark_last_busy() to update this field after carrying out I/O,
+typically just before calling pm_runtime_put_autosuspend().  The desired length
+of the inactivity period is a matter of policy.  Subsystems can set this length
+initially by calling pm_runtime_set_autosuspend_delay(), but after device
+registration the length should be controlled by user space, using the
+/sys/devices/.../power/autosuspend_delay_ms attribute.
+
+In order to use autosuspend, subsystems or drivers must call
+pm_runtime_use_autosuspend() (preferably before registering the device), and
+thereafter they should use the various `*_autosuspend()` helper functions
+instead of the non-autosuspend counterparts::
+
+	Instead of: pm_runtime_suspend    use: pm_runtime_autosuspend;
+	Instead of: pm_schedule_suspend   use: pm_request_autosuspend;
+	Instead of: pm_runtime_put        use: pm_runtime_put_autosuspend;
+	Instead of: pm_runtime_put_sync   use: pm_runtime_put_sync_autosuspend.
+
+Drivers may also continue to use the non-autosuspend helper functions; they
+will behave normally, which means sometimes taking the autosuspend delay into
+account (see pm_runtime_idle).
+
+Under some circumstances a driver or subsystem may want to prevent a device
+from autosuspending immediately, even though the usage counter is zero and the
+autosuspend delay time has expired.  If the ->runtime_suspend() callback
+returns -EAGAIN or -EBUSY, and if the next autosuspend delay expiration time is
+in the future (as it normally would be if the callback invoked
+pm_runtime_mark_last_busy()), the PM core will automatically reschedule the
+autosuspend.  The ->runtime_suspend() callback can't do this rescheduling
+itself because no suspend requests of any kind are accepted while the device is
+suspending (i.e., while the callback is running).
+
+The implementation is well suited for asynchronous use in interrupt contexts.
+However such use inevitably involves races, because the PM core can't
+synchronize ->runtime_suspend() callbacks with the arrival of I/O requests.
+This synchronization must be handled by the driver, using its private lock.
+Here is a schematic pseudo-code example::
+
+	foo_read_or_write(struct foo_priv *foo, void *data)
+	{
+		lock(&foo->private_lock);
+		add_request_to_io_queue(foo, data);
+		if (foo->num_pending_requests++ == 0)
+			pm_runtime_get(&foo->dev);
+		if (!foo->is_suspended)
+			foo_process_next_request(foo);
+		unlock(&foo->private_lock);
+	}
+
+	foo_io_completion(struct foo_priv *foo, void *req)
+	{
+		lock(&foo->private_lock);
+		if (--foo->num_pending_requests == 0) {
+			pm_runtime_mark_last_busy(&foo->dev);
+			pm_runtime_put_autosuspend(&foo->dev);
+		} else {
+			foo_process_next_request(foo);
+		}
+		unlock(&foo->private_lock);
+		/* Send req result back to the user ... */
+	}
+
+	int foo_runtime_suspend(struct device *dev)
+	{
+		struct foo_priv foo = container_of(dev, ...);
+		int ret = 0;
+
+		lock(&foo->private_lock);
+		if (foo->num_pending_requests > 0) {
+			ret = -EBUSY;
+		} else {
+			/* ... suspend the device ... */
+			foo->is_suspended = 1;
+		}
+		unlock(&foo->private_lock);
+		return ret;
+	}
+
+	int foo_runtime_resume(struct device *dev)
+	{
+		struct foo_priv foo = container_of(dev, ...);
+
+		lock(&foo->private_lock);
+		/* ... resume the device ... */
+		foo->is_suspended = 0;
+		pm_runtime_mark_last_busy(&foo->dev);
+		if (foo->num_pending_requests > 0)
+			foo_process_next_request(foo);
+		unlock(&foo->private_lock);
+		return 0;
+	}
+
+The important point is that after foo_io_completion() asks for an autosuspend,
+the foo_runtime_suspend() callback may race with foo_read_or_write().
+Therefore foo_runtime_suspend() has to check whether there are any pending I/O
+requests (while holding the private lock) before allowing the suspend to
+proceed.
+
+In addition, the power.autosuspend_delay field can be changed by user space at
+any time.  If a driver cares about this, it can call
+pm_runtime_autosuspend_expiration() from within the ->runtime_suspend()
+callback while holding its private lock.  If the function returns a nonzero
+value then the delay has not yet expired and the callback should return
+-EAGAIN.
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
deleted file mode 100644
index 937e33c46211..000000000000
--- a/Documentation/power/runtime_pm.txt
+++ /dev/null
@@ -1,928 +0,0 @@
-Runtime Power Management Framework for I/O Devices
-
-(C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
-(C) 2010 Alan Stern <stern@rowland.harvard.edu>
-(C) 2014 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-
-1. Introduction
-
-Support for runtime power management (runtime PM) of I/O devices is provided
-at the power management core (PM core) level by means of:
-
-* The power management workqueue pm_wq in which bus types and device drivers can
-  put their PM-related work items.  It is strongly recommended that pm_wq be
-  used for queuing all work items related to runtime PM, because this allows
-  them to be synchronized with system-wide power transitions (suspend to RAM,
-  hibernation and resume from system sleep states).  pm_wq is declared in
-  include/linux/pm_runtime.h and defined in kernel/power/main.c.
-
-* A number of runtime PM fields in the 'power' member of 'struct device' (which
-  is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can
-  be used for synchronizing runtime PM operations with one another.
-
-* Three device runtime PM callbacks in 'struct dev_pm_ops' (defined in
-  include/linux/pm.h).
-
-* A set of helper functions defined in drivers/base/power/runtime.c that can be
-  used for carrying out runtime PM operations in such a way that the
-  synchronization between them is taken care of by the PM core.  Bus types and
-  device drivers are encouraged to use these functions.
-
-The runtime PM callbacks present in 'struct dev_pm_ops', the device runtime PM
-fields of 'struct dev_pm_info' and the core helper functions provided for
-runtime PM are described below.
-
-2. Device Runtime PM Callbacks
-
-There are three device runtime PM callbacks defined in 'struct dev_pm_ops':
-
-struct dev_pm_ops {
-	...
-	int (*runtime_suspend)(struct device *dev);
-	int (*runtime_resume)(struct device *dev);
-	int (*runtime_idle)(struct device *dev);
-	...
-};
-
-The ->runtime_suspend(), ->runtime_resume() and ->runtime_idle() callbacks
-are executed by the PM core for the device's subsystem that may be either of
-the following:
-
-  1. PM domain of the device, if the device's PM domain object, dev->pm_domain,
-     is present.
-
-  2. Device type of the device, if both dev->type and dev->type->pm are present.
-
-  3. Device class of the device, if both dev->class and dev->class->pm are
-     present.
-
-  4. Bus type of the device, if both dev->bus and dev->bus->pm are present.
-
-If the subsystem chosen by applying the above rules doesn't provide the relevant
-callback, the PM core will invoke the corresponding driver callback stored in
-dev->driver->pm directly (if present).
-
-The PM core always checks which callback to use in the order given above, so the
-priority order of callbacks from high to low is: PM domain, device type, class
-and bus type.  Moreover, the high-priority one will always take precedence over
-a low-priority one.  The PM domain, bus type, device type and class callbacks
-are referred to as subsystem-level callbacks in what follows.
-
-By default, the callbacks are always invoked in process context with interrupts
-enabled.  However, the pm_runtime_irq_safe() helper function can be used to tell
-the PM core that it is safe to run the ->runtime_suspend(), ->runtime_resume()
-and ->runtime_idle() callbacks for the given device in atomic context with
-interrupts disabled.  This implies that the callback routines in question must
-not block or sleep, but it also means that the synchronous helper functions
-listed at the end of Section 4 may be used for that device within an interrupt
-handler or generally in an atomic context.
-
-The subsystem-level suspend callback, if present, is _entirely_ _responsible_
-for handling the suspend of the device as appropriate, which may, but need not
-include executing the device driver's own ->runtime_suspend() callback (from the
-PM core's point of view it is not necessary to implement a ->runtime_suspend()
-callback in a device driver as long as the subsystem-level suspend callback
-knows what to do to handle the device).
-
-  * Once the subsystem-level suspend callback (or the driver suspend callback,
-    if invoked directly) has completed successfully for the given device, the PM
-    core regards the device as suspended, which need not mean that it has been
-    put into a low power state.  It is supposed to mean, however, that the
-    device will not process data and will not communicate with the CPU(s) and
-    RAM until the appropriate resume callback is executed for it.  The runtime
-    PM status of a device after successful execution of the suspend callback is
-    'suspended'.
-
-  * If the suspend callback returns -EBUSY or -EAGAIN, the device's runtime PM
-    status remains 'active', which means that the device _must_ be fully
-    operational afterwards.
-
-  * If the suspend callback returns an error code different from -EBUSY and
-    -EAGAIN, the PM core regards this as a fatal error and will refuse to run
-    the helper functions described in Section 4 for the device until its status
-    is directly set to  either 'active', or 'suspended' (the PM core provides
-    special helper functions for this purpose).
-
-In particular, if the driver requires remote wakeup capability (i.e. hardware
-mechanism allowing the device to request a change of its power state, such as
-PCI PME) for proper functioning and device_can_wakeup() returns 'false' for the
-device, then ->runtime_suspend() should return -EBUSY.  On the other hand, if
-device_can_wakeup() returns 'true' for the device and the device is put into a
-low-power state during the execution of the suspend callback, it is expected
-that remote wakeup will be enabled for the device.  Generally, remote wakeup
-should be enabled for all input devices put into low-power states at run time.
-
-The subsystem-level resume callback, if present, is _entirely_ _responsible_ for
-handling the resume of the device as appropriate, which may, but need not
-include executing the device driver's own ->runtime_resume() callback (from the
-PM core's point of view it is not necessary to implement a ->runtime_resume()
-callback in a device driver as long as the subsystem-level resume callback knows
-what to do to handle the device).
-
-  * Once the subsystem-level resume callback (or the driver resume callback, if
-    invoked directly) has completed successfully, the PM core regards the device
-    as fully operational, which means that the device _must_ be able to complete
-    I/O operations as needed.  The runtime PM status of the device is then
-    'active'.
-
-  * If the resume callback returns an error code, the PM core regards this as a
-    fatal error and will refuse to run the helper functions described in Section
-    4 for the device, until its status is directly set to either 'active', or
-    'suspended' (by means of special helper functions provided by the PM core
-    for this purpose).
-
-The idle callback (a subsystem-level one, if present, or the driver one) is
-executed by the PM core whenever the device appears to be idle, which is
-indicated to the PM core by two counters, the device's usage counter and the
-counter of 'active' children of the device.
-
-  * If any of these counters is decreased using a helper function provided by
-    the PM core and it turns out to be equal to zero, the other counter is
-    checked.  If that counter also is equal to zero, the PM core executes the
-    idle callback with the device as its argument.
-
-The action performed by the idle callback is totally dependent on the subsystem
-(or driver) in question, but the expected and recommended action is to check
-if the device can be suspended (i.e. if all of the conditions necessary for
-suspending the device are satisfied) and to queue up a suspend request for the
-device in that case.  If there is no idle callback, or if the callback returns
-0, then the PM core will attempt to carry out a runtime suspend of the device,
-also respecting devices configured for autosuspend.  In essence this means a
-call to pm_runtime_autosuspend() (do note that drivers needs to update the
-device last busy mark, pm_runtime_mark_last_busy(), to control the delay under
-this circumstance).  To prevent this (for example, if the callback routine has
-started a delayed suspend), the routine must return a non-zero value.  Negative
-error return codes are ignored by the PM core.
-
-The helper functions provided by the PM core, described in Section 4, guarantee
-that the following constraints are met with respect to runtime PM callbacks for
-one device:
-
-(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute
-    ->runtime_suspend() in parallel with ->runtime_resume() or with another
-    instance of ->runtime_suspend() for the same device) with the exception that
-    ->runtime_suspend() or ->runtime_resume() can be executed in parallel with
-    ->runtime_idle() (although ->runtime_idle() will not be started while any
-    of the other callbacks is being executed for the same device).
-
-(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active'
-    devices (i.e. the PM core will only execute ->runtime_idle() or
-    ->runtime_suspend() for the devices the runtime PM status of which is
-    'active').
-
-(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device
-    the usage counter of which is equal to zero _and_ either the counter of
-    'active' children of which is equal to zero, or the 'power.ignore_children'
-    flag of which is set.
-
-(4) ->runtime_resume() can only be executed for 'suspended' devices  (i.e. the
-    PM core will only execute ->runtime_resume() for the devices the runtime
-    PM status of which is 'suspended').
-
-Additionally, the helper functions provided by the PM core obey the following
-rules:
-
-  * If ->runtime_suspend() is about to be executed or there's a pending request
-    to execute it, ->runtime_idle() will not be executed for the same device.
-
-  * A request to execute or to schedule the execution of ->runtime_suspend()
-    will cancel any pending requests to execute ->runtime_idle() for the same
-    device.
-
-  * If ->runtime_resume() is about to be executed or there's a pending request
-    to execute it, the other callbacks will not be executed for the same device.
-
-  * A request to execute ->runtime_resume() will cancel any pending or
-    scheduled requests to execute the other callbacks for the same device,
-    except for scheduled autosuspends.
-
-3. Runtime PM Device Fields
-
-The following device runtime PM fields are present in 'struct dev_pm_info', as
-defined in include/linux/pm.h:
-
-  struct timer_list suspend_timer;
-    - timer used for scheduling (delayed) suspend and autosuspend requests
-
-  unsigned long timer_expires;
-    - timer expiration time, in jiffies (if this is different from zero, the
-      timer is running and will expire at that time, otherwise the timer is not
-      running)
-
-  struct work_struct work;
-    - work structure used for queuing up requests (i.e. work items in pm_wq)
-
-  wait_queue_head_t wait_queue;
-    - wait queue used if any of the helper functions needs to wait for another
-      one to complete
-
-  spinlock_t lock;
-    - lock used for synchronization
-
-  atomic_t usage_count;
-    - the usage counter of the device
-
-  atomic_t child_count;
-    - the count of 'active' children of the device
-
-  unsigned int ignore_children;
-    - if set, the value of child_count is ignored (but still updated)
-
-  unsigned int disable_depth;
-    - used for disabling the helper functions (they work normally if this is
-      equal to zero); the initial value of it is 1 (i.e. runtime PM is
-      initially disabled for all devices)
-
-  int runtime_error;
-    - if set, there was a fatal error (one of the callbacks returned error code
-      as described in Section 2), so the helper functions will not work until
-      this flag is cleared; this is the error code returned by the failing
-      callback
-
-  unsigned int idle_notification;
-    - if set, ->runtime_idle() is being executed
-
-  unsigned int request_pending;
-    - if set, there's a pending request (i.e. a work item queued up into pm_wq)
-
-  enum rpm_request request;
-    - type of request that's pending (valid if request_pending is set)
-
-  unsigned int deferred_resume;
-    - set if ->runtime_resume() is about to be run while ->runtime_suspend() is
-      being executed for that device and it is not practical to wait for the
-      suspend to complete; means "start a resume as soon as you've suspended"
-
-  enum rpm_status runtime_status;
-    - the runtime PM status of the device; this field's initial value is
-      RPM_SUSPENDED, which means that each device is initially regarded by the
-      PM core as 'suspended', regardless of its real hardware status
-
-  unsigned int runtime_auto;
-    - if set, indicates that the user space has allowed the device driver to
-      power manage the device at run time via the /sys/devices/.../power/control
-      interface; it may only be modified with the help of the pm_runtime_allow()
-      and pm_runtime_forbid() helper functions
-
-  unsigned int no_callbacks;
-    - indicates that the device does not use the runtime PM callbacks (see
-      Section 8); it may be modified only by the pm_runtime_no_callbacks()
-      helper function
-
-  unsigned int irq_safe;
-    - indicates that the ->runtime_suspend() and ->runtime_resume() callbacks
-      will be invoked with the spinlock held and interrupts disabled
-
-  unsigned int use_autosuspend;
-    - indicates that the device's driver supports delayed autosuspend (see
-      Section 9); it may be modified only by the
-      pm_runtime{_dont}_use_autosuspend() helper functions
-
-  unsigned int timer_autosuspends;
-    - indicates that the PM core should attempt to carry out an autosuspend
-      when the timer expires rather than a normal suspend
-
-  int autosuspend_delay;
-    - the delay time (in milliseconds) to be used for autosuspend
-
-  unsigned long last_busy;
-    - the time (in jiffies) when the pm_runtime_mark_last_busy() helper
-      function was last called for this device; used in calculating inactivity
-      periods for autosuspend
-
-All of the above fields are members of the 'power' member of 'struct device'.
-
-4. Runtime PM Device Helper Functions
-
-The following runtime PM helper functions are defined in
-drivers/base/power/runtime.c and include/linux/pm_runtime.h:
-
-  void pm_runtime_init(struct device *dev);
-    - initialize the device runtime PM fields in 'struct dev_pm_info'
-
-  void pm_runtime_remove(struct device *dev);
-    - make sure that the runtime PM of the device will be disabled after
-      removing the device from device hierarchy
-
-  int pm_runtime_idle(struct device *dev);
-    - execute the subsystem-level idle callback for the device; returns an
-      error code on failure, where -EINPROGRESS means that ->runtime_idle() is
-      already being executed; if there is no callback or the callback returns 0
-      then run pm_runtime_autosuspend(dev) and return its result
-
-  int pm_runtime_suspend(struct device *dev);
-    - execute the subsystem-level suspend callback for the device; returns 0 on
-      success, 1 if the device's runtime PM status was already 'suspended', or
-      error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt
-      to suspend the device again in future and -EACCES means that
-      'power.disable_depth' is different from 0
-
-  int pm_runtime_autosuspend(struct device *dev);
-    - same as pm_runtime_suspend() except that the autosuspend delay is taken
-      into account; if pm_runtime_autosuspend_expiration() says the delay has
-      not yet expired then an autosuspend is scheduled for the appropriate time
-      and 0 is returned
-
-  int pm_runtime_resume(struct device *dev);
-    - execute the subsystem-level resume callback for the device; returns 0 on
-      success, 1 if the device's runtime PM status was already 'active' or
-      error code on failure, where -EAGAIN means it may be safe to attempt to
-      resume the device again in future, but 'power.runtime_error' should be
-      checked additionally, and -EACCES means that 'power.disable_depth' is
-      different from 0
-
-  int pm_request_idle(struct device *dev);
-    - submit a request to execute the subsystem-level idle callback for the
-      device (the request is represented by a work item in pm_wq); returns 0 on
-      success or error code if the request has not been queued up
-
-  int pm_request_autosuspend(struct device *dev);
-    - schedule the execution of the subsystem-level suspend callback for the
-      device when the autosuspend delay has expired; if the delay has already
-      expired then the work item is queued up immediately
-
-  int pm_schedule_suspend(struct device *dev, unsigned int delay);
-    - schedule the execution of the subsystem-level suspend callback for the
-      device in future, where 'delay' is the time to wait before queuing up a
-      suspend work item in pm_wq, in milliseconds (if 'delay' is zero, the work
-      item is queued up immediately); returns 0 on success, 1 if the device's PM
-      runtime status was already 'suspended', or error code if the request
-      hasn't been scheduled (or queued up if 'delay' is 0); if the execution of
-      ->runtime_suspend() is already scheduled and not yet expired, the new
-      value of 'delay' will be used as the time to wait
-
-  int pm_request_resume(struct device *dev);
-    - submit a request to execute the subsystem-level resume callback for the
-      device (the request is represented by a work item in pm_wq); returns 0 on
-      success, 1 if the device's runtime PM status was already 'active', or
-      error code if the request hasn't been queued up
-
-  void pm_runtime_get_noresume(struct device *dev);
-    - increment the device's usage counter
-
-  int pm_runtime_get(struct device *dev);
-    - increment the device's usage counter, run pm_request_resume(dev) and
-      return its result
-
-  int pm_runtime_get_sync(struct device *dev);
-    - increment the device's usage counter, run pm_runtime_resume(dev) and
-      return its result
-
-  int pm_runtime_get_if_in_use(struct device *dev);
-    - return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the
-      runtime PM status is RPM_ACTIVE and the runtime PM usage counter is
-      nonzero, increment the counter and return 1; otherwise return 0 without
-      changing the counter
-
-  void pm_runtime_put_noidle(struct device *dev);
-    - decrement the device's usage counter
-
-  int pm_runtime_put(struct device *dev);
-    - decrement the device's usage counter; if the result is 0 then run
-      pm_request_idle(dev) and return its result
-
-  int pm_runtime_put_autosuspend(struct device *dev);
-    - decrement the device's usage counter; if the result is 0 then run
-      pm_request_autosuspend(dev) and return its result
-
-  int pm_runtime_put_sync(struct device *dev);
-    - decrement the device's usage counter; if the result is 0 then run
-      pm_runtime_idle(dev) and return its result
-
-  int pm_runtime_put_sync_suspend(struct device *dev);
-    - decrement the device's usage counter; if the result is 0 then run
-      pm_runtime_suspend(dev) and return its result
-
-  int pm_runtime_put_sync_autosuspend(struct device *dev);
-    - decrement the device's usage counter; if the result is 0 then run
-      pm_runtime_autosuspend(dev) and return its result
-
-  void pm_runtime_enable(struct device *dev);
-    - decrement the device's 'power.disable_depth' field; if that field is equal
-      to zero, the runtime PM helper functions can execute subsystem-level
-      callbacks described in Section 2 for the device
-
-  int pm_runtime_disable(struct device *dev);
-    - increment the device's 'power.disable_depth' field (if the value of that
-      field was previously zero, this prevents subsystem-level runtime PM
-      callbacks from being run for the device), make sure that all of the
-      pending runtime PM operations on the device are either completed or
-      canceled; returns 1 if there was a resume request pending and it was
-      necessary to execute the subsystem-level resume callback for the device
-      to satisfy that request, otherwise 0 is returned
-
-  int pm_runtime_barrier(struct device *dev);
-    - check if there's a resume request pending for the device and resume it
-      (synchronously) in that case, cancel any other pending runtime PM requests
-      regarding it and wait for all runtime PM operations on it in progress to
-      complete; returns 1 if there was a resume request pending and it was
-      necessary to execute the subsystem-level resume callback for the device to
-      satisfy that request, otherwise 0 is returned
-
-  void pm_suspend_ignore_children(struct device *dev, bool enable);
-    - set/unset the power.ignore_children flag of the device
-
-  int pm_runtime_set_active(struct device *dev);
-    - clear the device's 'power.runtime_error' flag, set the device's runtime
-      PM status to 'active' and update its parent's counter of 'active'
-      children as appropriate (it is only valid to use this function if
-      'power.runtime_error' is set or 'power.disable_depth' is greater than
-      zero); it will fail and return error code if the device has a parent
-      which is not active and the 'power.ignore_children' flag of which is unset
-
-  void pm_runtime_set_suspended(struct device *dev);
-    - clear the device's 'power.runtime_error' flag, set the device's runtime
-      PM status to 'suspended' and update its parent's counter of 'active'
-      children as appropriate (it is only valid to use this function if
-      'power.runtime_error' is set or 'power.disable_depth' is greater than
-      zero)
-
-  bool pm_runtime_active(struct device *dev);
-    - return true if the device's runtime PM status is 'active' or its
-      'power.disable_depth' field is not equal to zero, or false otherwise
-
-  bool pm_runtime_suspended(struct device *dev);
-    - return true if the device's runtime PM status is 'suspended' and its
-      'power.disable_depth' field is equal to zero, or false otherwise
-
-  bool pm_runtime_status_suspended(struct device *dev);
-    - return true if the device's runtime PM status is 'suspended'
-
-  void pm_runtime_allow(struct device *dev);
-    - set the power.runtime_auto flag for the device and decrease its usage
-      counter (used by the /sys/devices/.../power/control interface to
-      effectively allow the device to be power managed at run time)
-
-  void pm_runtime_forbid(struct device *dev);
-    - unset the power.runtime_auto flag for the device and increase its usage
-      counter (used by the /sys/devices/.../power/control interface to
-      effectively prevent the device from being power managed at run time)
-
-  void pm_runtime_no_callbacks(struct device *dev);
-    - set the power.no_callbacks flag for the device and remove the runtime
-      PM attributes from /sys/devices/.../power (or prevent them from being
-      added when the device is registered)
-
-  void pm_runtime_irq_safe(struct device *dev);
-    - set the power.irq_safe flag for the device, causing the runtime-PM
-      callbacks to be invoked with interrupts off
-
-  bool pm_runtime_is_irq_safe(struct device *dev);
-    - return true if power.irq_safe flag was set for the device, causing
-      the runtime-PM callbacks to be invoked with interrupts off
-
-  void pm_runtime_mark_last_busy(struct device *dev);
-    - set the power.last_busy field to the current time
-
-  void pm_runtime_use_autosuspend(struct device *dev);
-    - set the power.use_autosuspend flag, enabling autosuspend delays; call
-      pm_runtime_get_sync if the flag was previously cleared and
-      power.autosuspend_delay is negative
-
-  void pm_runtime_dont_use_autosuspend(struct device *dev);
-    - clear the power.use_autosuspend flag, disabling autosuspend delays;
-      decrement the device's usage counter if the flag was previously set and
-      power.autosuspend_delay is negative; call pm_runtime_idle
-
-  void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
-    - set the power.autosuspend_delay value to 'delay' (expressed in
-      milliseconds); if 'delay' is negative then runtime suspends are
-      prevented; if power.use_autosuspend is set, pm_runtime_get_sync may be
-      called or the device's usage counter may be decremented and
-      pm_runtime_idle called depending on if power.autosuspend_delay is
-      changed to or from a negative value; if power.use_autosuspend is clear,
-      pm_runtime_idle is called
-
-  unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
-    - calculate the time when the current autosuspend delay period will expire,
-      based on power.last_busy and power.autosuspend_delay; if the delay time
-      is 1000 ms or larger then the expiration time is rounded up to the
-      nearest second; returns 0 if the delay period has already expired or
-      power.use_autosuspend isn't set, otherwise returns the expiration time
-      in jiffies
-
-It is safe to execute the following helper functions from interrupt context:
-
-pm_request_idle()
-pm_request_autosuspend()
-pm_schedule_suspend()
-pm_request_resume()
-pm_runtime_get_noresume()
-pm_runtime_get()
-pm_runtime_put_noidle()
-pm_runtime_put()
-pm_runtime_put_autosuspend()
-pm_runtime_enable()
-pm_suspend_ignore_children()
-pm_runtime_set_active()
-pm_runtime_set_suspended()
-pm_runtime_suspended()
-pm_runtime_mark_last_busy()
-pm_runtime_autosuspend_expiration()
-
-If pm_runtime_irq_safe() has been called for a device then the following helper
-functions may also be used in interrupt context:
-
-pm_runtime_idle()
-pm_runtime_suspend()
-pm_runtime_autosuspend()
-pm_runtime_resume()
-pm_runtime_get_sync()
-pm_runtime_put_sync()
-pm_runtime_put_sync_suspend()
-pm_runtime_put_sync_autosuspend()
-
-5. Runtime PM Initialization, Device Probing and Removal
-
-Initially, the runtime PM is disabled for all devices, which means that the
-majority of the runtime PM helper functions described in Section 4 will return
--EAGAIN until pm_runtime_enable() is called for the device.
-
-In addition to that, the initial runtime PM status of all devices is
-'suspended', but it need not reflect the actual physical state of the device.
-Thus, if the device is initially active (i.e. it is able to process I/O), its
-runtime PM status must be changed to 'active', with the help of
-pm_runtime_set_active(), before pm_runtime_enable() is called for the device.
-
-However, if the device has a parent and the parent's runtime PM is enabled,
-calling pm_runtime_set_active() for the device will affect the parent, unless
-the parent's 'power.ignore_children' flag is set.  Namely, in that case the
-parent won't be able to suspend at run time, using the PM core's helper
-functions, as long as the child's status is 'active', even if the child's
-runtime PM is still disabled (i.e. pm_runtime_enable() hasn't been called for
-the child yet or pm_runtime_disable() has been called for it).  For this reason,
-once pm_runtime_set_active() has been called for the device, pm_runtime_enable()
-should be called for it too as soon as reasonably possible or its runtime PM
-status should be changed back to 'suspended' with the help of
-pm_runtime_set_suspended().
-
-If the default initial runtime PM status of the device (i.e. 'suspended')
-reflects the actual state of the device, its bus type's or its driver's
-->probe() callback will likely need to wake it up using one of the PM core's
-helper functions described in Section 4.  In that case, pm_runtime_resume()
-should be used.  Of course, for this purpose the device's runtime PM has to be
-enabled earlier by calling pm_runtime_enable().
-
-Note, if the device may execute pm_runtime calls during the probe (such as
-if it is registers with a subsystem that may call back in) then the
-pm_runtime_get_sync() call paired with a pm_runtime_put() call will be
-appropriate to ensure that the device is not put back to sleep during the
-probe. This can happen with systems such as the network device layer.
-
-It may be desirable to suspend the device once ->probe() has finished.
-Therefore the driver core uses the asynchronous pm_request_idle() to submit a
-request to execute the subsystem-level idle callback for the device at that
-time.  A driver that makes use of the runtime autosuspend feature, may want to
-update the last busy mark before returning from ->probe().
-
-Moreover, the driver core prevents runtime PM callbacks from racing with the bus
-notifier callback in __device_release_driver(), which is necessary, because the
-notifier is used by some subsystems to carry out operations affecting the
-runtime PM functionality.  It does so by calling pm_runtime_get_sync() before
-driver_sysfs_remove() and the BUS_NOTIFY_UNBIND_DRIVER notifications.  This
-resumes the device if it's in the suspended state and prevents it from
-being suspended again while those routines are being executed.
-
-To allow bus types and drivers to put devices into the suspended state by
-calling pm_runtime_suspend() from their ->remove() routines, the driver core
-executes pm_runtime_put_sync() after running the BUS_NOTIFY_UNBIND_DRIVER
-notifications in __device_release_driver().  This requires bus types and
-drivers to make their ->remove() callbacks avoid races with runtime PM directly,
-but also it allows of more flexibility in the handling of devices during the
-removal of their drivers.
-
-Drivers in ->remove() callback should undo the runtime PM changes done
-in ->probe(). Usually this means calling pm_runtime_disable(),
-pm_runtime_dont_use_autosuspend() etc.
-
-The user space can effectively disallow the driver of the device to power manage
-it at run time by changing the value of its /sys/devices/.../power/control
-attribute to "on", which causes pm_runtime_forbid() to be called.  In principle,
-this mechanism may also be used by the driver to effectively turn off the
-runtime power management of the device until the user space turns it on.
-Namely, during the initialization the driver can make sure that the runtime PM
-status of the device is 'active' and call pm_runtime_forbid().  It should be
-noted, however, that if the user space has already intentionally changed the
-value of /sys/devices/.../power/control to "auto" to allow the driver to power
-manage the device at run time, the driver may confuse it by using
-pm_runtime_forbid() this way.
-
-6. Runtime PM and System Sleep
-
-Runtime PM and system sleep (i.e., system suspend and hibernation, also known
-as suspend-to-RAM and suspend-to-disk) interact with each other in a couple of
-ways.  If a device is active when a system sleep starts, everything is
-straightforward.  But what should happen if the device is already suspended?
-
-The device may have different wake-up settings for runtime PM and system sleep.
-For example, remote wake-up may be enabled for runtime suspend but disallowed
-for system sleep (device_may_wakeup(dev) returns 'false').  When this happens,
-the subsystem-level system suspend callback is responsible for changing the
-device's wake-up setting (it may leave that to the device driver's system
-suspend routine).  It may be necessary to resume the device and suspend it again
-in order to do so.  The same is true if the driver uses different power levels
-or other settings for runtime suspend and system sleep.
-
-During system resume, the simplest approach is to bring all devices back to full
-power, even if they had been suspended before the system suspend began.  There
-are several reasons for this, including:
-
-  * The device might need to switch power levels, wake-up settings, etc.
-
-  * Remote wake-up events might have been lost by the firmware.
-
-  * The device's children may need the device to be at full power in order
-    to resume themselves.
-
-  * The driver's idea of the device state may not agree with the device's
-    physical state.  This can happen during resume from hibernation.
-
-  * The device might need to be reset.
-
-  * Even though the device was suspended, if its usage counter was > 0 then most
-    likely it would need a runtime resume in the near future anyway.
-
-If the device had been suspended before the system suspend began and it's
-brought back to full power during resume, then its runtime PM status will have
-to be updated to reflect the actual post-system sleep status.  The way to do
-this is:
-
-	pm_runtime_disable(dev);
-	pm_runtime_set_active(dev);
-	pm_runtime_enable(dev);
-
-The PM core always increments the runtime usage counter before calling the
-->suspend() callback and decrements it after calling the ->resume() callback.
-Hence disabling runtime PM temporarily like this will not cause any runtime
-suspend attempts to be permanently lost.  If the usage count goes to zero
-following the return of the ->resume() callback, the ->runtime_idle() callback
-will be invoked as usual.
-
-On some systems, however, system sleep is not entered through a global firmware
-or hardware operation.  Instead, all hardware components are put into low-power
-states directly by the kernel in a coordinated way.  Then, the system sleep
-state effectively follows from the states the hardware components end up in
-and the system is woken up from that state by a hardware interrupt or a similar
-mechanism entirely under the kernel's control.  As a result, the kernel never
-gives control away and the states of all devices during resume are precisely
-known to it.  If that is the case and none of the situations listed above takes
-place (in particular, if the system is not waking up from hibernation), it may
-be more efficient to leave the devices that had been suspended before the system
-suspend began in the suspended state.
-
-To this end, the PM core provides a mechanism allowing some coordination between
-different levels of device hierarchy.  Namely, if a system suspend .prepare()
-callback returns a positive number for a device, that indicates to the PM core
-that the device appears to be runtime-suspended and its state is fine, so it
-may be left in runtime suspend provided that all of its descendants are also
-left in runtime suspend.  If that happens, the PM core will not execute any
-system suspend and resume callbacks for all of those devices, except for the
-complete callback, which is then entirely responsible for handling the device
-as appropriate.  This only applies to system suspend transitions that are not
-related to hibernation (see Documentation/driver-api/pm/devices.rst for more
-information).
-
-The PM core does its best to reduce the probability of race conditions between
-the runtime PM and system suspend/resume (and hibernation) callbacks by carrying
-out the following operations:
-
-  * During system suspend pm_runtime_get_noresume() is called for every device
-    right before executing the subsystem-level .prepare() callback for it and
-    pm_runtime_barrier() is called for every device right before executing the
-    subsystem-level .suspend() callback for it.  In addition to that the PM core
-    calls  __pm_runtime_disable() with 'false' as the second argument for every
-    device right before executing the subsystem-level .suspend_late() callback
-    for it.
-
-  * During system resume pm_runtime_enable() and pm_runtime_put() are called for
-    every device right after executing the subsystem-level .resume_early()
-    callback and right after executing the subsystem-level .complete() callback
-    for it, respectively.
-
-7. Generic subsystem callbacks
-
-Subsystems may wish to conserve code space by using the set of generic power
-management callbacks provided by the PM core, defined in
-driver/base/power/generic_ops.c:
-
-  int pm_generic_runtime_suspend(struct device *dev);
-    - invoke the ->runtime_suspend() callback provided by the driver of this
-      device and return its result, or return 0 if not defined
-
-  int pm_generic_runtime_resume(struct device *dev);
-    - invoke the ->runtime_resume() callback provided by the driver of this
-      device and return its result, or return 0 if not defined
-
-  int pm_generic_suspend(struct device *dev);
-    - if the device has not been suspended at run time, invoke the ->suspend()
-      callback provided by its driver and return its result, or return 0 if not
-      defined
-
-  int pm_generic_suspend_noirq(struct device *dev);
-    - if pm_runtime_suspended(dev) returns "false", invoke the ->suspend_noirq()
-      callback provided by the device's driver and return its result, or return
-      0 if not defined
-
-  int pm_generic_resume(struct device *dev);
-    - invoke the ->resume() callback provided by the driver of this device and,
-      if successful, change the device's runtime PM status to 'active'
-
-  int pm_generic_resume_noirq(struct device *dev);
-    - invoke the ->resume_noirq() callback provided by the driver of this device
-
-  int pm_generic_freeze(struct device *dev);
-    - if the device has not been suspended at run time, invoke the ->freeze()
-      callback provided by its driver and return its result, or return 0 if not
-      defined
-
-  int pm_generic_freeze_noirq(struct device *dev);
-    - if pm_runtime_suspended(dev) returns "false", invoke the ->freeze_noirq()
-      callback provided by the device's driver and return its result, or return
-      0 if not defined
-
-  int pm_generic_thaw(struct device *dev);
-    - if the device has not been suspended at run time, invoke the ->thaw()
-      callback provided by its driver and return its result, or return 0 if not
-      defined
-
-  int pm_generic_thaw_noirq(struct device *dev);
-    - if pm_runtime_suspended(dev) returns "false", invoke the ->thaw_noirq()
-      callback provided by the device's driver and return its result, or return
-      0 if not defined
-
-  int pm_generic_poweroff(struct device *dev);
-    - if the device has not been suspended at run time, invoke the ->poweroff()
-      callback provided by its driver and return its result, or return 0 if not
-      defined
-
-  int pm_generic_poweroff_noirq(struct device *dev);
-    - if pm_runtime_suspended(dev) returns "false", run the ->poweroff_noirq()
-      callback provided by the device's driver and return its result, or return
-      0 if not defined
-
-  int pm_generic_restore(struct device *dev);
-    - invoke the ->restore() callback provided by the driver of this device and,
-      if successful, change the device's runtime PM status to 'active'
-
-  int pm_generic_restore_noirq(struct device *dev);
-    - invoke the ->restore_noirq() callback provided by the device's driver
-
-These functions are the defaults used by the PM core, if a subsystem doesn't
-provide its own callbacks for ->runtime_idle(), ->runtime_suspend(),
-->runtime_resume(), ->suspend(), ->suspend_noirq(), ->resume(),
-->resume_noirq(), ->freeze(), ->freeze_noirq(), ->thaw(), ->thaw_noirq(),
-->poweroff(), ->poweroff_noirq(), ->restore(), ->restore_noirq() in the
-subsystem-level dev_pm_ops structure.
-
-Device drivers that wish to use the same function as a system suspend, freeze,
-poweroff and runtime suspend callback, and similarly for system resume, thaw,
-restore, and runtime resume, can achieve this with the help of the
-UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its
-last argument to NULL).
-
-8. "No-Callback" Devices
-
-Some "devices" are only logical sub-devices of their parent and cannot be
-power-managed on their own.  (The prototype example is a USB interface.  Entire
-USB devices can go into low-power mode or send wake-up requests, but neither is
-possible for individual interfaces.)  The drivers for these devices have no
-need of runtime PM callbacks; if the callbacks did exist, ->runtime_suspend()
-and ->runtime_resume() would always return 0 without doing anything else and
-->runtime_idle() would always call pm_runtime_suspend().
-
-Subsystems can tell the PM core about these devices by calling
-pm_runtime_no_callbacks().  This should be done after the device structure is
-initialized and before it is registered (although after device registration is
-also okay).  The routine will set the device's power.no_callbacks flag and
-prevent the non-debugging runtime PM sysfs attributes from being created.
-
-When power.no_callbacks is set, the PM core will not invoke the
-->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks.
-Instead it will assume that suspends and resumes always succeed and that idle
-devices should be suspended.
-
-As a consequence, the PM core will never directly inform the device's subsystem
-or driver about runtime power changes.  Instead, the driver for the device's
-parent must take responsibility for telling the device's driver when the
-parent's power state changes.
-
-9. Autosuspend, or automatically-delayed suspends
-
-Changing a device's power state isn't free; it requires both time and energy.
-A device should be put in a low-power state only when there's some reason to
-think it will remain in that state for a substantial time.  A common heuristic
-says that a device which hasn't been used for a while is liable to remain
-unused; following this advice, drivers should not allow devices to be suspended
-at runtime until they have been inactive for some minimum period.  Even when
-the heuristic ends up being non-optimal, it will still prevent devices from
-"bouncing" too rapidly between low-power and full-power states.
-
-The term "autosuspend" is an historical remnant.  It doesn't mean that the
-device is automatically suspended (the subsystem or driver still has to call
-the appropriate PM routines); rather it means that runtime suspends will
-automatically be delayed until the desired period of inactivity has elapsed.
-
-Inactivity is determined based on the power.last_busy field.  Drivers should
-call pm_runtime_mark_last_busy() to update this field after carrying out I/O,
-typically just before calling pm_runtime_put_autosuspend().  The desired length
-of the inactivity period is a matter of policy.  Subsystems can set this length
-initially by calling pm_runtime_set_autosuspend_delay(), but after device
-registration the length should be controlled by user space, using the
-/sys/devices/.../power/autosuspend_delay_ms attribute.
-
-In order to use autosuspend, subsystems or drivers must call
-pm_runtime_use_autosuspend() (preferably before registering the device), and
-thereafter they should use the various *_autosuspend() helper functions instead
-of the non-autosuspend counterparts:
-
-	Instead of: pm_runtime_suspend    use: pm_runtime_autosuspend;
-	Instead of: pm_schedule_suspend   use: pm_request_autosuspend;
-	Instead of: pm_runtime_put        use: pm_runtime_put_autosuspend;
-	Instead of: pm_runtime_put_sync   use: pm_runtime_put_sync_autosuspend.
-
-Drivers may also continue to use the non-autosuspend helper functions; they
-will behave normally, which means sometimes taking the autosuspend delay into
-account (see pm_runtime_idle).
-
-Under some circumstances a driver or subsystem may want to prevent a device
-from autosuspending immediately, even though the usage counter is zero and the
-autosuspend delay time has expired.  If the ->runtime_suspend() callback
-returns -EAGAIN or -EBUSY, and if the next autosuspend delay expiration time is
-in the future (as it normally would be if the callback invoked
-pm_runtime_mark_last_busy()), the PM core will automatically reschedule the
-autosuspend.  The ->runtime_suspend() callback can't do this rescheduling
-itself because no suspend requests of any kind are accepted while the device is
-suspending (i.e., while the callback is running).
-
-The implementation is well suited for asynchronous use in interrupt contexts.
-However such use inevitably involves races, because the PM core can't
-synchronize ->runtime_suspend() callbacks with the arrival of I/O requests.
-This synchronization must be handled by the driver, using its private lock.
-Here is a schematic pseudo-code example:
-
-	foo_read_or_write(struct foo_priv *foo, void *data)
-	{
-		lock(&foo->private_lock);
-		add_request_to_io_queue(foo, data);
-		if (foo->num_pending_requests++ == 0)
-			pm_runtime_get(&foo->dev);
-		if (!foo->is_suspended)
-			foo_process_next_request(foo);
-		unlock(&foo->private_lock);
-	}
-
-	foo_io_completion(struct foo_priv *foo, void *req)
-	{
-		lock(&foo->private_lock);
-		if (--foo->num_pending_requests == 0) {
-			pm_runtime_mark_last_busy(&foo->dev);
-			pm_runtime_put_autosuspend(&foo->dev);
-		} else {
-			foo_process_next_request(foo);
-		}
-		unlock(&foo->private_lock);
-		/* Send req result back to the user ... */
-	}
-
-	int foo_runtime_suspend(struct device *dev)
-	{
-		struct foo_priv foo = container_of(dev, ...);
-		int ret = 0;
-
-		lock(&foo->private_lock);
-		if (foo->num_pending_requests > 0) {
-			ret = -EBUSY;
-		} else {
-			/* ... suspend the device ... */
-			foo->is_suspended = 1;
-		}
-		unlock(&foo->private_lock);
-		return ret;
-	}
-
-	int foo_runtime_resume(struct device *dev)
-	{
-		struct foo_priv foo = container_of(dev, ...);
-
-		lock(&foo->private_lock);
-		/* ... resume the device ... */
-		foo->is_suspended = 0;
-		pm_runtime_mark_last_busy(&foo->dev);
-		if (foo->num_pending_requests > 0)
-			foo_process_next_request(foo);
-		unlock(&foo->private_lock);
-		return 0;
-	}
-
-The important point is that after foo_io_completion() asks for an autosuspend,
-the foo_runtime_suspend() callback may race with foo_read_or_write().
-Therefore foo_runtime_suspend() has to check whether there are any pending I/O
-requests (while holding the private lock) before allowing the suspend to
-proceed.
-
-In addition, the power.autosuspend_delay field can be changed by user space at
-any time.  If a driver cares about this, it can call
-pm_runtime_autosuspend_expiration() from within the ->runtime_suspend()
-callback while holding its private lock.  If the function returns a nonzero
-value then the delay has not yet expired and the callback should return
--EAGAIN.
diff --git a/Documentation/power/s2ram.rst b/Documentation/power/s2ram.rst
new file mode 100644
index 000000000000..d739aa7c742c
--- /dev/null
+++ b/Documentation/power/s2ram.rst
@@ -0,0 +1,87 @@
+========================
+How to get s2ram working
+========================
+
+2006 Linus Torvalds
+2006 Pavel Machek
+
+1) Check suspend.sf.net, program s2ram there has long whitelist of
+   "known ok" machines, along with tricks to use on each one.
+
+2) If that does not help, try reading tricks.txt and
+   video.txt. Perhaps problem is as simple as broken module, and
+   simple module unload can fix it.
+
+3) You can use Linus' TRACE_RESUME infrastructure, described below.
+
+Using TRACE_RESUME
+~~~~~~~~~~~~~~~~~~
+
+I've been working at making the machines I have able to STR, and almost
+always it's a driver that is buggy. Thank God for the suspend/resume
+debugging - the thing that Chuck tried to disable. That's often the _only_
+way to debug these things, and it's actually pretty powerful (but
+time-consuming - having to insert TRACE_RESUME() markers into the device
+driver that doesn't resume and recompile and reboot).
+
+Anyway, the way to debug this for people who are interested (have a
+machine that doesn't boot) is:
+
+ - enable PM_DEBUG, and PM_TRACE
+
+ - use a script like this::
+
+	#!/bin/sh
+	sync
+	echo 1 > /sys/power/pm_trace
+	echo mem > /sys/power/state
+
+   to suspend
+
+ - if it doesn't come back up (which is usually the problem), reboot by
+   holding the power button down, and look at the dmesg output for things
+   like::
+
+	Magic number: 4:156:725
+	hash matches drivers/base/power/resume.c:28
+	hash matches device 0000:01:00.0
+
+   which means that the last trace event was just before trying to resume
+   device 0000:01:00.0. Then figure out what driver is controlling that
+   device (lspci and /sys/devices/pci* is your friend), and see if you can
+   fix it, disable it, or trace into its resume function.
+
+   If no device matches the hash (or any matches appear to be false positives),
+   the culprit may be a device from a loadable kernel module that is not loaded
+   until after the hash is checked. You can check the hash against the current
+   devices again after more modules are loaded using sysfs::
+
+	cat /sys/power/pm_trace_dev_match
+
+For example, the above happens to be the VGA device on my EVO, which I
+used to run with "radeonfb" (it's an ATI Radeon mobility). It turns out
+that "radeonfb" simply cannot resume that device - it tries to set the
+PLL's, and it just _hangs_. Using the regular VGA console and letting X
+resume it instead works fine.
+
+NOTE
+====
+pm_trace uses the system's Real Time Clock (RTC) to save the magic number.
+Reason for this is that the RTC is the only reliably available piece of
+hardware during resume operations where a value can be set that will
+survive a reboot.
+
+pm_trace is not compatible with asynchronous suspend, so it turns
+asynchronous suspend off (which may work around timing or
+ordering-sensitive bugs).
+
+Consequence is that after a resume (even if it is successful) your system
+clock will have a value corresponding to the magic number instead of the
+correct date/time! It is therefore advisable to use a program like ntp-date
+or rdate to reset the correct date/time from an external time source when
+using this trace option.
+
+As the clock keeps ticking it is also essential that the reboot is done
+quickly after the resume failure. The trace option does not use the seconds
+or the low order bits of the minutes of the RTC, but a too long delay will
+corrupt the magic value.
diff --git a/Documentation/power/s2ram.txt b/Documentation/power/s2ram.txt
deleted file mode 100644
index 4685aee197fd..000000000000
--- a/Documentation/power/s2ram.txt
+++ /dev/null
@@ -1,85 +0,0 @@
-			How to get s2ram working
-			~~~~~~~~~~~~~~~~~~~~~~~~
-			2006 Linus Torvalds
-			2006 Pavel Machek
-
-1) Check suspend.sf.net, program s2ram there has long whitelist of
-   "known ok" machines, along with tricks to use on each one.
-
-2) If that does not help, try reading tricks.txt and
-   video.txt. Perhaps problem is as simple as broken module, and
-   simple module unload can fix it.
-
-3) You can use Linus' TRACE_RESUME infrastructure, described below.
-
-		      Using TRACE_RESUME
-		      ~~~~~~~~~~~~~~~~~~
-
-I've been working at making the machines I have able to STR, and almost
-always it's a driver that is buggy. Thank God for the suspend/resume
-debugging - the thing that Chuck tried to disable. That's often the _only_
-way to debug these things, and it's actually pretty powerful (but
-time-consuming - having to insert TRACE_RESUME() markers into the device
-driver that doesn't resume and recompile and reboot).
-
-Anyway, the way to debug this for people who are interested (have a
-machine that doesn't boot) is:
-
- - enable PM_DEBUG, and PM_TRACE
-
- - use a script like this:
-
-	#!/bin/sh
-	sync
-	echo 1 > /sys/power/pm_trace
-	echo mem > /sys/power/state
-
-   to suspend
-
- - if it doesn't come back up (which is usually the problem), reboot by
-   holding the power button down, and look at the dmesg output for things
-   like
-
-	Magic number: 4:156:725
-	hash matches drivers/base/power/resume.c:28
-	hash matches device 0000:01:00.0
-
-   which means that the last trace event was just before trying to resume
-   device 0000:01:00.0. Then figure out what driver is controlling that
-   device (lspci and /sys/devices/pci* is your friend), and see if you can
-   fix it, disable it, or trace into its resume function.
-
-   If no device matches the hash (or any matches appear to be false positives),
-   the culprit may be a device from a loadable kernel module that is not loaded
-   until after the hash is checked. You can check the hash against the current
-   devices again after more modules are loaded using sysfs:
-
-	cat /sys/power/pm_trace_dev_match
-
-For example, the above happens to be the VGA device on my EVO, which I
-used to run with "radeonfb" (it's an ATI Radeon mobility). It turns out
-that "radeonfb" simply cannot resume that device - it tries to set the
-PLL's, and it just _hangs_. Using the regular VGA console and letting X
-resume it instead works fine.
-
-NOTE
-====
-pm_trace uses the system's Real Time Clock (RTC) to save the magic number.
-Reason for this is that the RTC is the only reliably available piece of
-hardware during resume operations where a value can be set that will
-survive a reboot.
-
-pm_trace is not compatible with asynchronous suspend, so it turns
-asynchronous suspend off (which may work around timing or
-ordering-sensitive bugs).
-
-Consequence is that after a resume (even if it is successful) your system
-clock will have a value corresponding to the magic number instead of the
-correct date/time! It is therefore advisable to use a program like ntp-date
-or rdate to reset the correct date/time from an external time source when
-using this trace option.
-
-As the clock keeps ticking it is also essential that the reboot is done
-quickly after the resume failure. The trace option does not use the seconds
-or the low order bits of the minutes of the RTC, but a too long delay will
-corrupt the magic value.
diff --git a/Documentation/power/suspend-and-cpuhotplug.rst b/Documentation/power/suspend-and-cpuhotplug.rst
new file mode 100644
index 000000000000..7ac8e1f549f4
--- /dev/null
+++ b/Documentation/power/suspend-and-cpuhotplug.rst
@@ -0,0 +1,286 @@
+====================================================================
+Interaction of Suspend code (S3) with the CPU hotplug infrastructure
+====================================================================
+
+(C) 2011 - 2014 Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
+
+
+I. Differences between CPU hotplug and Suspend-to-RAM
+======================================================
+
+How does the regular CPU hotplug code differ from how the Suspend-to-RAM
+infrastructure uses it internally? And where do they share common code?
+
+Well, a picture is worth a thousand words... So ASCII art follows :-)
+
+[This depicts the current design in the kernel, and focusses only on the
+interactions involving the freezer and CPU hotplug and also tries to explain
+the locking involved. It outlines the notifications involved as well.
+But please note that here, only the call paths are illustrated, with the aim
+of describing where they take different paths and where they share code.
+What happens when regular CPU hotplug and Suspend-to-RAM race with each other
+is not depicted here.]
+
+On a high level, the suspend-resume cycle goes like this::
+
+  |Freeze| -> |Disable nonboot| -> |Do suspend| -> |Enable nonboot| -> |Thaw |
+  |tasks |    |     cpus      |    |          |    |     cpus     |    |tasks|
+
+
+More details follow::
+
+                                Suspend call path
+                                -----------------
+
+                                  Write 'mem' to
+                                /sys/power/state
+                                    sysfs file
+                                        |
+                                        v
+                               Acquire system_transition_mutex lock
+                                        |
+                                        v
+                             Send PM_SUSPEND_PREPARE
+                                   notifications
+                                        |
+                                        v
+                                   Freeze tasks
+                                        |
+                                        |
+                                        v
+                              disable_nonboot_cpus()
+                                   /* start */
+                                        |
+                                        v
+                            Acquire cpu_add_remove_lock
+                                        |
+                                        v
+                             Iterate over CURRENTLY
+                                   online CPUs
+                                        |
+                                        |
+                                        |                ----------
+                                        v                          | L
+             ======>               _cpu_down()                     |
+            |              [This takes cpuhotplug.lock             |
+  Common    |               before taking down the CPU             |
+   code     |               and releases it when done]             | O
+            |            While it is at it, notifications          |
+            |            are sent when notable events occur,       |
+             ======>     by running all registered callbacks.      |
+                                        |                          | O
+                                        |                          |
+                                        |                          |
+                                        v                          |
+                            Note down these cpus in                | P
+                                frozen_cpus mask         ----------
+                                        |
+                                        v
+                           Disable regular cpu hotplug
+                        by increasing cpu_hotplug_disabled
+                                        |
+                                        v
+                            Release cpu_add_remove_lock
+                                        |
+                                        v
+                       /* disable_nonboot_cpus() complete */
+                                        |
+                                        v
+                                   Do suspend
+
+
+
+Resuming back is likewise, with the counterparts being (in the order of
+execution during resume):
+
+* enable_nonboot_cpus() which involves::
+
+   |  Acquire cpu_add_remove_lock
+   |  Decrease cpu_hotplug_disabled, thereby enabling regular cpu hotplug
+   |  Call _cpu_up() [for all those cpus in the frozen_cpus mask, in a loop]
+   |  Release cpu_add_remove_lock
+   v
+
+* thaw tasks
+* send PM_POST_SUSPEND notifications
+* Release system_transition_mutex lock.
+
+
+It is to be noted here that the system_transition_mutex lock is acquired at the very
+beginning, when we are just starting out to suspend, and then released only
+after the entire cycle is complete (i.e., suspend + resume).
+
+::
+
+
+
+                          Regular CPU hotplug call path
+                          -----------------------------
+
+                                Write 0 (or 1) to
+                       /sys/devices/system/cpu/cpu*/online
+                                    sysfs file
+                                        |
+                                        |
+                                        v
+                                    cpu_down()
+                                        |
+                                        v
+                           Acquire cpu_add_remove_lock
+                                        |
+                                        v
+                          If cpu_hotplug_disabled > 0
+                                return gracefully
+                                        |
+                                        |
+                                        v
+             ======>                _cpu_down()
+            |              [This takes cpuhotplug.lock
+  Common    |               before taking down the CPU
+   code     |               and releases it when done]
+            |            While it is at it, notifications
+            |           are sent when notable events occur,
+             ======>    by running all registered callbacks.
+                                        |
+                                        |
+                                        v
+                          Release cpu_add_remove_lock
+                               [That's it!, for
+                              regular CPU hotplug]
+
+
+
+So, as can be seen from the two diagrams (the parts marked as "Common code"),
+regular CPU hotplug and the suspend code path converge at the _cpu_down() and
+_cpu_up() functions. They differ in the arguments passed to these functions,
+in that during regular CPU hotplug, 0 is passed for the 'tasks_frozen'
+argument. But during suspend, since the tasks are already frozen by the time
+the non-boot CPUs are offlined or onlined, the _cpu_*() functions are called
+with the 'tasks_frozen' argument set to 1.
+[See below for some known issues regarding this.]
+
+
+Important files and functions/entry points:
+-------------------------------------------
+
+- kernel/power/process.c : freeze_processes(), thaw_processes()
+- kernel/power/suspend.c : suspend_prepare(), suspend_enter(), suspend_finish()
+- kernel/cpu.c: cpu_[up|down](), _cpu_[up|down](), [disable|enable]_nonboot_cpus()
+
+
+
+II. What are the issues involved in CPU hotplug?
+------------------------------------------------
+
+There are some interesting situations involving CPU hotplug and microcode
+update on the CPUs, as discussed below:
+
+[Please bear in mind that the kernel requests the microcode images from
+userspace, using the request_firmware() function defined in
+drivers/base/firmware_loader/main.c]
+
+
+a. When all the CPUs are identical:
+
+   This is the most common situation and it is quite straightforward: we want
+   to apply the same microcode revision to each of the CPUs.
+   To give an example of x86, the collect_cpu_info() function defined in
+   arch/x86/kernel/microcode_core.c helps in discovering the type of the CPU
+   and thereby in applying the correct microcode revision to it.
+   But note that the kernel does not maintain a common microcode image for the
+   all CPUs, in order to handle case 'b' described below.
+
+
+b. When some of the CPUs are different than the rest:
+
+   In this case since we probably need to apply different microcode revisions
+   to different CPUs, the kernel maintains a copy of the correct microcode
+   image for each CPU (after appropriate CPU type/model discovery using
+   functions such as collect_cpu_info()).
+
+
+c. When a CPU is physically hot-unplugged and a new (and possibly different
+   type of) CPU is hot-plugged into the system:
+
+   In the current design of the kernel, whenever a CPU is taken offline during
+   a regular CPU hotplug operation, upon receiving the CPU_DEAD notification
+   (which is sent by the CPU hotplug code), the microcode update driver's
+   callback for that event reacts by freeing the kernel's copy of the
+   microcode image for that CPU.
+
+   Hence, when a new CPU is brought online, since the kernel finds that it
+   doesn't have the microcode image, it does the CPU type/model discovery
+   afresh and then requests the userspace for the appropriate microcode image
+   for that CPU, which is subsequently applied.
+
+   For example, in x86, the mc_cpu_callback() function (which is the microcode
+   update driver's callback registered for CPU hotplug events) calls
+   microcode_update_cpu() which would call microcode_init_cpu() in this case,
+   instead of microcode_resume_cpu() when it finds that the kernel doesn't
+   have a valid microcode image. This ensures that the CPU type/model
+   discovery is performed and the right microcode is applied to the CPU after
+   getting it from userspace.
+
+
+d. Handling microcode update during suspend/hibernate:
+
+   Strictly speaking, during a CPU hotplug operation which does not involve
+   physically removing or inserting CPUs, the CPUs are not actually powered
+   off during a CPU offline. They are just put to the lowest C-states possible.
+   Hence, in such a case, it is not really necessary to re-apply microcode
+   when the CPUs are brought back online, since they wouldn't have lost the
+   image during the CPU offline operation.
+
+   This is the usual scenario encountered during a resume after a suspend.
+   However, in the case of hibernation, since all the CPUs are completely
+   powered off, during restore it becomes necessary to apply the microcode
+   images to all the CPUs.
+
+   [Note that we don't expect someone to physically pull out nodes and insert
+   nodes with a different type of CPUs in-between a suspend-resume or a
+   hibernate/restore cycle.]
+
+   In the current design of the kernel however, during a CPU offline operation
+   as part of the suspend/hibernate cycle (cpuhp_tasks_frozen is set),
+   the existing copy of microcode image in the kernel is not freed up.
+   And during the CPU online operations (during resume/restore), since the
+   kernel finds that it already has copies of the microcode images for all the
+   CPUs, it just applies them to the CPUs, avoiding any re-discovery of CPU
+   type/model and the need for validating whether the microcode revisions are
+   right for the CPUs or not (due to the above assumption that physical CPU
+   hotplug will not be done in-between suspend/resume or hibernate/restore
+   cycles).
+
+
+III. Known problems
+===================
+
+Are there any known problems when regular CPU hotplug and suspend race
+with each other?
+
+Yes, they are listed below:
+
+1. When invoking regular CPU hotplug, the 'tasks_frozen' argument passed to
+   the _cpu_down() and _cpu_up() functions is *always* 0.
+   This might not reflect the true current state of the system, since the
+   tasks could have been frozen by an out-of-band event such as a suspend
+   operation in progress. Hence, the cpuhp_tasks_frozen variable will not
+   reflect the frozen state and the CPU hotplug callbacks which evaluate
+   that variable might execute the wrong code path.
+
+2. If a regular CPU hotplug stress test happens to race with the freezer due
+   to a suspend operation in progress at the same time, then we could hit the
+   situation described below:
+
+    * A regular cpu online operation continues its journey from userspace
+      into the kernel, since the freezing has not yet begun.
+    * Then freezer gets to work and freezes userspace.
+    * If cpu online has not yet completed the microcode update stuff by now,
+      it will now start waiting on the frozen userspace in the
+      TASK_UNINTERRUPTIBLE state, in order to get the microcode image.
+    * Now the freezer continues and tries to freeze the remaining tasks. But
+      due to this wait mentioned above, the freezer won't be able to freeze
+      the cpu online hotplug task and hence freezing of tasks fails.
+
+   As a result of this task freezing failure, the suspend operation gets
+   aborted.
diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt
deleted file mode 100644
index a8751b8df10e..000000000000
--- a/Documentation/power/suspend-and-cpuhotplug.txt
+++ /dev/null
@@ -1,274 +0,0 @@
-Interaction of Suspend code (S3) with the CPU hotplug infrastructure
-
-     (C) 2011 - 2014 Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
-
-
-I. How does the regular CPU hotplug code differ from how the Suspend-to-RAM
-   infrastructure uses it internally? And where do they share common code?
-
-Well, a picture is worth a thousand words... So ASCII art follows :-)
-
-[This depicts the current design in the kernel, and focusses only on the
-interactions involving the freezer and CPU hotplug and also tries to explain
-the locking involved. It outlines the notifications involved as well.
-But please note that here, only the call paths are illustrated, with the aim
-of describing where they take different paths and where they share code.
-What happens when regular CPU hotplug and Suspend-to-RAM race with each other
-is not depicted here.]
-
-On a high level, the suspend-resume cycle goes like this:
-
-|Freeze| -> |Disable nonboot| -> |Do suspend| -> |Enable nonboot| -> |Thaw |
-|tasks |    |     cpus      |    |          |    |     cpus     |    |tasks|
-
-
-More details follow:
-
-                                Suspend call path
-                                -----------------
-
-                                  Write 'mem' to
-                                /sys/power/state
-                                    sysfs file
-                                        |
-                                        v
-                               Acquire system_transition_mutex lock
-                                        |
-                                        v
-                             Send PM_SUSPEND_PREPARE
-                                   notifications
-                                        |
-                                        v
-                                   Freeze tasks
-                                        |
-                                        |
-                                        v
-                              disable_nonboot_cpus()
-                                   /* start */
-                                        |
-                                        v
-                            Acquire cpu_add_remove_lock
-                                        |
-                                        v
-                             Iterate over CURRENTLY
-                                   online CPUs
-                                        |
-                                        |
-                                        |                ----------
-                                        v                          | L
-             ======>               _cpu_down()                     |
-            |              [This takes cpuhotplug.lock             |
-  Common    |               before taking down the CPU             |
-   code     |               and releases it when done]             | O
-            |            While it is at it, notifications          |
-            |            are sent when notable events occur,       |
-             ======>     by running all registered callbacks.      |
-                                        |                          | O
-                                        |                          |
-                                        |                          |
-                                        v                          |
-                            Note down these cpus in                | P
-                                frozen_cpus mask         ----------
-                                        |
-                                        v
-                           Disable regular cpu hotplug
-                        by increasing cpu_hotplug_disabled
-                                        |
-                                        v
-                            Release cpu_add_remove_lock
-                                        |
-                                        v
-                       /* disable_nonboot_cpus() complete */
-                                        |
-                                        v
-                                   Do suspend
-
-
-
-Resuming back is likewise, with the counterparts being (in the order of
-execution during resume):
-* enable_nonboot_cpus() which involves:
-   |  Acquire cpu_add_remove_lock
-   |  Decrease cpu_hotplug_disabled, thereby enabling regular cpu hotplug
-   |  Call _cpu_up() [for all those cpus in the frozen_cpus mask, in a loop]
-   |  Release cpu_add_remove_lock
-   v
-
-* thaw tasks
-* send PM_POST_SUSPEND notifications
-* Release system_transition_mutex lock.
-
-
-It is to be noted here that the system_transition_mutex lock is acquired at the very
-beginning, when we are just starting out to suspend, and then released only
-after the entire cycle is complete (i.e., suspend + resume).
-
-
-
-                          Regular CPU hotplug call path
-                          -----------------------------
-
-                                Write 0 (or 1) to
-                       /sys/devices/system/cpu/cpu*/online
-                                    sysfs file
-                                        |
-                                        |
-                                        v
-                                    cpu_down()
-                                        |
-                                        v
-                           Acquire cpu_add_remove_lock
-                                        |
-                                        v
-                          If cpu_hotplug_disabled > 0
-                                return gracefully
-                                        |
-                                        |
-                                        v
-             ======>                _cpu_down()
-            |              [This takes cpuhotplug.lock
-  Common    |               before taking down the CPU
-   code     |               and releases it when done]
-            |            While it is at it, notifications
-            |           are sent when notable events occur,
-             ======>    by running all registered callbacks.
-                                        |
-                                        |
-                                        v
-                          Release cpu_add_remove_lock
-                               [That's it!, for
-                              regular CPU hotplug]
-
-
-
-So, as can be seen from the two diagrams (the parts marked as "Common code"),
-regular CPU hotplug and the suspend code path converge at the _cpu_down() and
-_cpu_up() functions. They differ in the arguments passed to these functions,
-in that during regular CPU hotplug, 0 is passed for the 'tasks_frozen'
-argument. But during suspend, since the tasks are already frozen by the time
-the non-boot CPUs are offlined or onlined, the _cpu_*() functions are called
-with the 'tasks_frozen' argument set to 1.
-[See below for some known issues regarding this.]
-
-
-Important files and functions/entry points:
-------------------------------------------
-
-kernel/power/process.c : freeze_processes(), thaw_processes()
-kernel/power/suspend.c : suspend_prepare(), suspend_enter(), suspend_finish()
-kernel/cpu.c: cpu_[up|down](), _cpu_[up|down](), [disable|enable]_nonboot_cpus()
-
-
-
-II. What are the issues involved in CPU hotplug?
-    -------------------------------------------
-
-There are some interesting situations involving CPU hotplug and microcode
-update on the CPUs, as discussed below:
-
-[Please bear in mind that the kernel requests the microcode images from
-userspace, using the request_firmware() function defined in
-drivers/base/firmware_loader/main.c]
-
-
-a. When all the CPUs are identical:
-
-   This is the most common situation and it is quite straightforward: we want
-   to apply the same microcode revision to each of the CPUs.
-   To give an example of x86, the collect_cpu_info() function defined in
-   arch/x86/kernel/microcode_core.c helps in discovering the type of the CPU
-   and thereby in applying the correct microcode revision to it.
-   But note that the kernel does not maintain a common microcode image for the
-   all CPUs, in order to handle case 'b' described below.
-
-
-b. When some of the CPUs are different than the rest:
-
-   In this case since we probably need to apply different microcode revisions
-   to different CPUs, the kernel maintains a copy of the correct microcode
-   image for each CPU (after appropriate CPU type/model discovery using
-   functions such as collect_cpu_info()).
-
-
-c. When a CPU is physically hot-unplugged and a new (and possibly different
-   type of) CPU is hot-plugged into the system:
-
-   In the current design of the kernel, whenever a CPU is taken offline during
-   a regular CPU hotplug operation, upon receiving the CPU_DEAD notification
-   (which is sent by the CPU hotplug code), the microcode update driver's
-   callback for that event reacts by freeing the kernel's copy of the
-   microcode image for that CPU.
-
-   Hence, when a new CPU is brought online, since the kernel finds that it
-   doesn't have the microcode image, it does the CPU type/model discovery
-   afresh and then requests the userspace for the appropriate microcode image
-   for that CPU, which is subsequently applied.
-
-   For example, in x86, the mc_cpu_callback() function (which is the microcode
-   update driver's callback registered for CPU hotplug events) calls
-   microcode_update_cpu() which would call microcode_init_cpu() in this case,
-   instead of microcode_resume_cpu() when it finds that the kernel doesn't
-   have a valid microcode image. This ensures that the CPU type/model
-   discovery is performed and the right microcode is applied to the CPU after
-   getting it from userspace.
-
-
-d. Handling microcode update during suspend/hibernate:
-
-   Strictly speaking, during a CPU hotplug operation which does not involve
-   physically removing or inserting CPUs, the CPUs are not actually powered
-   off during a CPU offline. They are just put to the lowest C-states possible.
-   Hence, in such a case, it is not really necessary to re-apply microcode
-   when the CPUs are brought back online, since they wouldn't have lost the
-   image during the CPU offline operation.
-
-   This is the usual scenario encountered during a resume after a suspend.
-   However, in the case of hibernation, since all the CPUs are completely
-   powered off, during restore it becomes necessary to apply the microcode
-   images to all the CPUs.
-
-   [Note that we don't expect someone to physically pull out nodes and insert
-   nodes with a different type of CPUs in-between a suspend-resume or a
-   hibernate/restore cycle.]
-
-   In the current design of the kernel however, during a CPU offline operation
-   as part of the suspend/hibernate cycle (cpuhp_tasks_frozen is set),
-   the existing copy of microcode image in the kernel is not freed up.
-   And during the CPU online operations (during resume/restore), since the
-   kernel finds that it already has copies of the microcode images for all the
-   CPUs, it just applies them to the CPUs, avoiding any re-discovery of CPU
-   type/model and the need for validating whether the microcode revisions are
-   right for the CPUs or not (due to the above assumption that physical CPU
-   hotplug will not be done in-between suspend/resume or hibernate/restore
-   cycles).
-
-
-III. Are there any known problems when regular CPU hotplug and suspend race
-     with each other?
-
-Yes, they are listed below:
-
-1. When invoking regular CPU hotplug, the 'tasks_frozen' argument passed to
-   the _cpu_down() and _cpu_up() functions is *always* 0.
-   This might not reflect the true current state of the system, since the
-   tasks could have been frozen by an out-of-band event such as a suspend
-   operation in progress. Hence, the cpuhp_tasks_frozen variable will not
-   reflect the frozen state and the CPU hotplug callbacks which evaluate
-   that variable might execute the wrong code path.
-
-2. If a regular CPU hotplug stress test happens to race with the freezer due
-   to a suspend operation in progress at the same time, then we could hit the
-   situation described below:
-
-    * A regular cpu online operation continues its journey from userspace
-      into the kernel, since the freezing has not yet begun.
-    * Then freezer gets to work and freezes userspace.
-    * If cpu online has not yet completed the microcode update stuff by now,
-      it will now start waiting on the frozen userspace in the
-      TASK_UNINTERRUPTIBLE state, in order to get the microcode image.
-    * Now the freezer continues and tries to freeze the remaining tasks. But
-      due to this wait mentioned above, the freezer won't be able to freeze
-      the cpu online hotplug task and hence freezing of tasks fails.
-
-   As a result of this task freezing failure, the suspend operation gets
-   aborted.
diff --git a/Documentation/power/suspend-and-interrupts.rst b/Documentation/power/suspend-and-interrupts.rst
new file mode 100644
index 000000000000..4cda6617709a
--- /dev/null
+++ b/Documentation/power/suspend-and-interrupts.rst
@@ -0,0 +1,137 @@
+====================================
+System Suspend and Device Interrupts
+====================================
+
+Copyright (C) 2014 Intel Corp.
+Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
+Suspending and Resuming Device IRQs
+-----------------------------------
+
+Device interrupt request lines (IRQs) are generally disabled during system
+suspend after the "late" phase of suspending devices (that is, after all of the
+->prepare, ->suspend and ->suspend_late callbacks have been executed for all
+devices).  That is done by suspend_device_irqs().
+
+The rationale for doing so is that after the "late" phase of device suspend
+there is no legitimate reason why any interrupts from suspended devices should
+trigger and if any devices have not been suspended properly yet, it is better to
+block interrupts from them anyway.  Also, in the past we had problems with
+interrupt handlers for shared IRQs that device drivers implementing them were
+not prepared for interrupts triggering after their devices had been suspended.
+In some cases they would attempt to access, for example, memory address spaces
+of suspended devices and cause unpredictable behavior to ensue as a result.
+Unfortunately, such problems are very difficult to debug and the introduction
+of suspend_device_irqs(), along with the "noirq" phase of device suspend and
+resume, was the only practical way to mitigate them.
+
+Device IRQs are re-enabled during system resume, right before the "early" phase
+of resuming devices (that is, before starting to execute ->resume_early
+callbacks for devices).  The function doing that is resume_device_irqs().
+
+
+The IRQF_NO_SUSPEND Flag
+------------------------
+
+There are interrupts that can legitimately trigger during the entire system
+suspend-resume cycle, including the "noirq" phases of suspending and resuming
+devices as well as during the time when nonboot CPUs are taken offline and
+brought back online.  That applies to timer interrupts in the first place,
+but also to IPIs and to some other special-purpose interrupts.
+
+The IRQF_NO_SUSPEND flag is used to indicate that to the IRQ subsystem when
+requesting a special-purpose interrupt.  It causes suspend_device_irqs() to
+leave the corresponding IRQ enabled so as to allow the interrupt to work as
+expected during the suspend-resume cycle, but does not guarantee that the
+interrupt will wake the system from a suspended state -- for such cases it is
+necessary to use enable_irq_wake().
+
+Note that the IRQF_NO_SUSPEND flag affects the entire IRQ and not just one
+user of it.  Thus, if the IRQ is shared, all of the interrupt handlers installed
+for it will be executed as usual after suspend_device_irqs(), even if the
+IRQF_NO_SUSPEND flag was not passed to request_irq() (or equivalent) by some of
+the IRQ's users.  For this reason, using IRQF_NO_SUSPEND and IRQF_SHARED at the
+same time should be avoided.
+
+
+System Wakeup Interrupts, enable_irq_wake() and disable_irq_wake()
+------------------------------------------------------------------
+
+System wakeup interrupts generally need to be configured to wake up the system
+from sleep states, especially if they are used for different purposes (e.g. as
+I/O interrupts) in the working state.
+
+That may involve turning on a special signal handling logic within the platform
+(such as an SoC) so that signals from a given line are routed in a different way
+during system sleep so as to trigger a system wakeup when needed.  For example,
+the platform may include a dedicated interrupt controller used specifically for
+handling system wakeup events.  Then, if a given interrupt line is supposed to
+wake up the system from sleep sates, the corresponding input of that interrupt
+controller needs to be enabled to receive signals from the line in question.
+After wakeup, it generally is better to disable that input to prevent the
+dedicated controller from triggering interrupts unnecessarily.
+
+The IRQ subsystem provides two helper functions to be used by device drivers for
+those purposes.  Namely, enable_irq_wake() turns on the platform's logic for
+handling the given IRQ as a system wakeup interrupt line and disable_irq_wake()
+turns that logic off.
+
+Calling enable_irq_wake() causes suspend_device_irqs() to treat the given IRQ
+in a special way.  Namely, the IRQ remains enabled, by on the first interrupt
+it will be disabled, marked as pending and "suspended" so that it will be
+re-enabled by resume_device_irqs() during the subsequent system resume.  Also
+the PM core is notified about the event which causes the system suspend in
+progress to be aborted (that doesn't have to happen immediately, but at one
+of the points where the suspend thread looks for pending wakeup events).
+
+This way every interrupt from a wakeup interrupt source will either cause the
+system suspend currently in progress to be aborted or wake up the system if
+already suspended.  However, after suspend_device_irqs() interrupt handlers are
+not executed for system wakeup IRQs.  They are only executed for IRQF_NO_SUSPEND
+IRQs at that time, but those IRQs should not be configured for system wakeup
+using enable_irq_wake().
+
+
+Interrupts and Suspend-to-Idle
+------------------------------
+
+Suspend-to-idle (also known as the "freeze" sleep state) is a relatively new
+system sleep state that works by idling all of the processors and waiting for
+interrupts right after the "noirq" phase of suspending devices.
+
+Of course, this means that all of the interrupts with the IRQF_NO_SUSPEND flag
+set will bring CPUs out of idle while in that state, but they will not cause the
+IRQ subsystem to trigger a system wakeup.
+
+System wakeup interrupts, in turn, will trigger wakeup from suspend-to-idle in
+analogy with what they do in the full system suspend case.  The only difference
+is that the wakeup from suspend-to-idle is signaled using the usual working
+state interrupt delivery mechanisms and doesn't require the platform to use
+any special interrupt handling logic for it to work.
+
+
+IRQF_NO_SUSPEND and enable_irq_wake()
+-------------------------------------
+
+There are very few valid reasons to use both enable_irq_wake() and the
+IRQF_NO_SUSPEND flag on the same IRQ, and it is never valid to use both for the
+same device.
+
+First of all, if the IRQ is not shared, the rules for handling IRQF_NO_SUSPEND
+interrupts (interrupt handlers are invoked after suspend_device_irqs()) are
+directly at odds with the rules for handling system wakeup interrupts (interrupt
+handlers are not invoked after suspend_device_irqs()).
+
+Second, both enable_irq_wake() and IRQF_NO_SUSPEND apply to entire IRQs and not
+to individual interrupt handlers, so sharing an IRQ between a system wakeup
+interrupt source and an IRQF_NO_SUSPEND interrupt source does not generally
+make sense.
+
+In rare cases an IRQ can be shared between a wakeup device driver and an
+IRQF_NO_SUSPEND user. In order for this to be safe, the wakeup device driver
+must be able to discern spurious IRQs from genuine wakeup events (signalling
+the latter to the core with pm_system_wakeup()), must use enable_irq_wake() to
+ensure that the IRQ will function as a wakeup source, and must request the IRQ
+with IRQF_COND_SUSPEND to tell the core that it meets these requirements. If
+these requirements are not met, it is not valid to use IRQF_COND_SUSPEND.
diff --git a/Documentation/power/suspend-and-interrupts.txt b/Documentation/power/suspend-and-interrupts.txt
deleted file mode 100644
index 8afb29a8604a..000000000000
--- a/Documentation/power/suspend-and-interrupts.txt
+++ /dev/null
@@ -1,135 +0,0 @@
-System Suspend and Device Interrupts
-
-Copyright (C) 2014 Intel Corp.
-Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-
-
-Suspending and Resuming Device IRQs
------------------------------------
-
-Device interrupt request lines (IRQs) are generally disabled during system
-suspend after the "late" phase of suspending devices (that is, after all of the
-->prepare, ->suspend and ->suspend_late callbacks have been executed for all
-devices).  That is done by suspend_device_irqs().
-
-The rationale for doing so is that after the "late" phase of device suspend
-there is no legitimate reason why any interrupts from suspended devices should
-trigger and if any devices have not been suspended properly yet, it is better to
-block interrupts from them anyway.  Also, in the past we had problems with
-interrupt handlers for shared IRQs that device drivers implementing them were
-not prepared for interrupts triggering after their devices had been suspended.
-In some cases they would attempt to access, for example, memory address spaces
-of suspended devices and cause unpredictable behavior to ensue as a result.
-Unfortunately, such problems are very difficult to debug and the introduction
-of suspend_device_irqs(), along with the "noirq" phase of device suspend and
-resume, was the only practical way to mitigate them.
-
-Device IRQs are re-enabled during system resume, right before the "early" phase
-of resuming devices (that is, before starting to execute ->resume_early
-callbacks for devices).  The function doing that is resume_device_irqs().
-
-
-The IRQF_NO_SUSPEND Flag
-------------------------
-
-There are interrupts that can legitimately trigger during the entire system
-suspend-resume cycle, including the "noirq" phases of suspending and resuming
-devices as well as during the time when nonboot CPUs are taken offline and
-brought back online.  That applies to timer interrupts in the first place,
-but also to IPIs and to some other special-purpose interrupts.
-
-The IRQF_NO_SUSPEND flag is used to indicate that to the IRQ subsystem when
-requesting a special-purpose interrupt.  It causes suspend_device_irqs() to
-leave the corresponding IRQ enabled so as to allow the interrupt to work as
-expected during the suspend-resume cycle, but does not guarantee that the
-interrupt will wake the system from a suspended state -- for such cases it is
-necessary to use enable_irq_wake().
-
-Note that the IRQF_NO_SUSPEND flag affects the entire IRQ and not just one
-user of it.  Thus, if the IRQ is shared, all of the interrupt handlers installed
-for it will be executed as usual after suspend_device_irqs(), even if the
-IRQF_NO_SUSPEND flag was not passed to request_irq() (or equivalent) by some of
-the IRQ's users.  For this reason, using IRQF_NO_SUSPEND and IRQF_SHARED at the
-same time should be avoided.
-
-
-System Wakeup Interrupts, enable_irq_wake() and disable_irq_wake()
-------------------------------------------------------------------
-
-System wakeup interrupts generally need to be configured to wake up the system
-from sleep states, especially if they are used for different purposes (e.g. as
-I/O interrupts) in the working state.
-
-That may involve turning on a special signal handling logic within the platform
-(such as an SoC) so that signals from a given line are routed in a different way
-during system sleep so as to trigger a system wakeup when needed.  For example,
-the platform may include a dedicated interrupt controller used specifically for
-handling system wakeup events.  Then, if a given interrupt line is supposed to
-wake up the system from sleep sates, the corresponding input of that interrupt
-controller needs to be enabled to receive signals from the line in question.
-After wakeup, it generally is better to disable that input to prevent the
-dedicated controller from triggering interrupts unnecessarily.
-
-The IRQ subsystem provides two helper functions to be used by device drivers for
-those purposes.  Namely, enable_irq_wake() turns on the platform's logic for
-handling the given IRQ as a system wakeup interrupt line and disable_irq_wake()
-turns that logic off.
-
-Calling enable_irq_wake() causes suspend_device_irqs() to treat the given IRQ
-in a special way.  Namely, the IRQ remains enabled, by on the first interrupt
-it will be disabled, marked as pending and "suspended" so that it will be
-re-enabled by resume_device_irqs() during the subsequent system resume.  Also
-the PM core is notified about the event which causes the system suspend in
-progress to be aborted (that doesn't have to happen immediately, but at one
-of the points where the suspend thread looks for pending wakeup events).
-
-This way every interrupt from a wakeup interrupt source will either cause the
-system suspend currently in progress to be aborted or wake up the system if
-already suspended.  However, after suspend_device_irqs() interrupt handlers are
-not executed for system wakeup IRQs.  They are only executed for IRQF_NO_SUSPEND
-IRQs at that time, but those IRQs should not be configured for system wakeup
-using enable_irq_wake().
-
-
-Interrupts and Suspend-to-Idle
-------------------------------
-
-Suspend-to-idle (also known as the "freeze" sleep state) is a relatively new
-system sleep state that works by idling all of the processors and waiting for
-interrupts right after the "noirq" phase of suspending devices.
-
-Of course, this means that all of the interrupts with the IRQF_NO_SUSPEND flag
-set will bring CPUs out of idle while in that state, but they will not cause the
-IRQ subsystem to trigger a system wakeup.
-
-System wakeup interrupts, in turn, will trigger wakeup from suspend-to-idle in
-analogy with what they do in the full system suspend case.  The only difference
-is that the wakeup from suspend-to-idle is signaled using the usual working
-state interrupt delivery mechanisms and doesn't require the platform to use
-any special interrupt handling logic for it to work.
-
-
-IRQF_NO_SUSPEND and enable_irq_wake()
--------------------------------------
-
-There are very few valid reasons to use both enable_irq_wake() and the
-IRQF_NO_SUSPEND flag on the same IRQ, and it is never valid to use both for the
-same device.
-
-First of all, if the IRQ is not shared, the rules for handling IRQF_NO_SUSPEND
-interrupts (interrupt handlers are invoked after suspend_device_irqs()) are
-directly at odds with the rules for handling system wakeup interrupts (interrupt
-handlers are not invoked after suspend_device_irqs()).
-
-Second, both enable_irq_wake() and IRQF_NO_SUSPEND apply to entire IRQs and not
-to individual interrupt handlers, so sharing an IRQ between a system wakeup
-interrupt source and an IRQF_NO_SUSPEND interrupt source does not generally
-make sense.
-
-In rare cases an IRQ can be shared between a wakeup device driver and an
-IRQF_NO_SUSPEND user. In order for this to be safe, the wakeup device driver
-must be able to discern spurious IRQs from genuine wakeup events (signalling
-the latter to the core with pm_system_wakeup()), must use enable_irq_wake() to
-ensure that the IRQ will function as a wakeup source, and must request the IRQ
-with IRQF_COND_SUSPEND to tell the core that it meets these requirements. If
-these requirements are not met, it is not valid to use IRQF_COND_SUSPEND.
diff --git a/Documentation/power/swsusp-and-swap-files.rst b/Documentation/power/swsusp-and-swap-files.rst
new file mode 100644
index 000000000000..a33a2919dbe4
--- /dev/null
+++ b/Documentation/power/swsusp-and-swap-files.rst
@@ -0,0 +1,63 @@
+===============================================
+Using swap files with software suspend (swsusp)
+===============================================
+
+	(C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+
+The Linux kernel handles swap files almost in the same way as it handles swap
+partitions and there are only two differences between these two types of swap
+areas:
+(1) swap files need not be contiguous,
+(2) the header of a swap file is not in the first block of the partition that
+holds it.  From the swsusp's point of view (1) is not a problem, because it is
+already taken care of by the swap-handling code, but (2) has to be taken into
+consideration.
+
+In principle the location of a swap file's header may be determined with the
+help of appropriate filesystem driver.  Unfortunately, however, it requires the
+filesystem holding the swap file to be mounted, and if this filesystem is
+journaled, it cannot be mounted during resume from disk.  For this reason to
+identify a swap file swsusp uses the name of the partition that holds the file
+and the offset from the beginning of the partition at which the swap file's
+header is located.  For convenience, this offset is expressed in <PAGE_SIZE>
+units.
+
+In order to use a swap file with swsusp, you need to:
+
+1) Create the swap file and make it active, eg.::
+
+    # dd if=/dev/zero of=<swap_file_path> bs=1024 count=<swap_file_size_in_k>
+    # mkswap <swap_file_path>
+    # swapon <swap_file_path>
+
+2) Use an application that will bmap the swap file with the help of the
+FIBMAP ioctl and determine the location of the file's swap header, as the
+offset, in <PAGE_SIZE> units, from the beginning of the partition which
+holds the swap file.
+
+3) Add the following parameters to the kernel command line::
+
+    resume=<swap_file_partition> resume_offset=<swap_file_offset>
+
+where <swap_file_partition> is the partition on which the swap file is located
+and <swap_file_offset> is the offset of the swap header determined by the
+application in 2) (of course, this step may be carried out automatically
+by the same application that determines the swap file's header offset using the
+FIBMAP ioctl)
+
+OR
+
+Use a userland suspend application that will set the partition and offset
+with the help of the SNAPSHOT_SET_SWAP_AREA ioctl described in
+Documentation/power/userland-swsusp.rst (this is the only method to suspend
+to a swap file allowing the resume to be initiated from an initrd or initramfs
+image).
+
+Now, swsusp will use the swap file in the same way in which it would use a swap
+partition.  In particular, the swap file has to be active (ie. be present in
+/proc/swaps) so that it can be used for suspending.
+
+Note that if the swap file used for suspending is deleted and recreated,
+the location of its header need not be the same as before.  Thus every time
+this happens the value of the "resume_offset=" kernel command line parameter
+has to be updated.
diff --git a/Documentation/power/swsusp-and-swap-files.txt b/Documentation/power/swsusp-and-swap-files.txt
deleted file mode 100644
index f281886de490..000000000000
--- a/Documentation/power/swsusp-and-swap-files.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-Using swap files with software suspend (swsusp)
-	(C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
-
-The Linux kernel handles swap files almost in the same way as it handles swap
-partitions and there are only two differences between these two types of swap
-areas:
-(1) swap files need not be contiguous,
-(2) the header of a swap file is not in the first block of the partition that
-holds it.  From the swsusp's point of view (1) is not a problem, because it is
-already taken care of by the swap-handling code, but (2) has to be taken into
-consideration.
-
-In principle the location of a swap file's header may be determined with the
-help of appropriate filesystem driver.  Unfortunately, however, it requires the
-filesystem holding the swap file to be mounted, and if this filesystem is
-journaled, it cannot be mounted during resume from disk.  For this reason to
-identify a swap file swsusp uses the name of the partition that holds the file
-and the offset from the beginning of the partition at which the swap file's
-header is located.  For convenience, this offset is expressed in <PAGE_SIZE>
-units.
-
-In order to use a swap file with swsusp, you need to:
-
-1) Create the swap file and make it active, eg.
-
-# dd if=/dev/zero of=<swap_file_path> bs=1024 count=<swap_file_size_in_k>
-# mkswap <swap_file_path>
-# swapon <swap_file_path>
-
-2) Use an application that will bmap the swap file with the help of the
-FIBMAP ioctl and determine the location of the file's swap header, as the
-offset, in <PAGE_SIZE> units, from the beginning of the partition which
-holds the swap file.
-
-3) Add the following parameters to the kernel command line:
-
-resume=<swap_file_partition> resume_offset=<swap_file_offset>
-
-where <swap_file_partition> is the partition on which the swap file is located
-and <swap_file_offset> is the offset of the swap header determined by the
-application in 2) (of course, this step may be carried out automatically
-by the same application that determines the swap file's header offset using the
-FIBMAP ioctl)
-
-OR
-
-Use a userland suspend application that will set the partition and offset
-with the help of the SNAPSHOT_SET_SWAP_AREA ioctl described in
-Documentation/power/userland-swsusp.txt (this is the only method to suspend
-to a swap file allowing the resume to be initiated from an initrd or initramfs
-image).
-
-Now, swsusp will use the swap file in the same way in which it would use a swap
-partition.  In particular, the swap file has to be active (ie. be present in
-/proc/swaps) so that it can be used for suspending.
-
-Note that if the swap file used for suspending is deleted and recreated,
-the location of its header need not be the same as before.  Thus every time
-this happens the value of the "resume_offset=" kernel command line parameter
-has to be updated.
diff --git a/Documentation/power/swsusp-dmcrypt.rst b/Documentation/power/swsusp-dmcrypt.rst
new file mode 100644
index 000000000000..426df59172cd
--- /dev/null
+++ b/Documentation/power/swsusp-dmcrypt.rst
@@ -0,0 +1,140 @@
+=======================================
+How to use dm-crypt and swsusp together
+=======================================
+
+Author: Andreas Steinmetz <ast@domdv.de>
+
+
+
+Some prerequisites:
+You know how dm-crypt works. If not, visit the following web page:
+http://www.saout.de/misc/dm-crypt/
+You have read Documentation/power/swsusp.rst and understand it.
+You did read Documentation/admin-guide/initrd.rst and know how an initrd works.
+You know how to create or how to modify an initrd.
+
+Now your system is properly set up, your disk is encrypted except for
+the swap device(s) and the boot partition which may contain a mini
+system for crypto setup and/or rescue purposes. You may even have
+an initrd that does your current crypto setup already.
+
+At this point you want to encrypt your swap, too. Still you want to
+be able to suspend using swsusp. This, however, means that you
+have to be able to either enter a passphrase or that you read
+the key(s) from an external device like a pcmcia flash disk
+or an usb stick prior to resume. So you need an initrd, that sets
+up dm-crypt and then asks swsusp to resume from the encrypted
+swap device.
+
+The most important thing is that you set up dm-crypt in such
+a way that the swap device you suspend to/resume from has
+always the same major/minor within the initrd as well as
+within your running system. The easiest way to achieve this is
+to always set up this swap device first with dmsetup, so that
+it will always look like the following::
+
+  brw-------  1 root root 254, 0 Jul 28 13:37 /dev/mapper/swap0
+
+Now set up your kernel to use /dev/mapper/swap0 as the default
+resume partition, so your kernel .config contains::
+
+  CONFIG_PM_STD_PARTITION="/dev/mapper/swap0"
+
+Prepare your boot loader to use the initrd you will create or
+modify. For lilo the simplest setup looks like the following
+lines::
+
+  image=/boot/vmlinuz
+  initrd=/boot/initrd.gz
+  label=linux
+  append="root=/dev/ram0 init=/linuxrc rw"
+
+Finally you need to create or modify your initrd. Lets assume
+you create an initrd that reads the required dm-crypt setup
+from a pcmcia flash disk card. The card is formatted with an ext2
+fs which resides on /dev/hde1 when the card is inserted. The
+card contains at least the encrypted swap setup in a file
+named "swapkey". /etc/fstab of your initrd contains something
+like the following::
+
+  /dev/hda1   /mnt    ext3      ro                            0 0
+  none        /proc   proc      defaults,noatime,nodiratime   0 0
+  none        /sys    sysfs     defaults,noatime,nodiratime   0 0
+
+/dev/hda1 contains an unencrypted mini system that sets up all
+of your crypto devices, again by reading the setup from the
+pcmcia flash disk. What follows now is a /linuxrc for your
+initrd that allows you to resume from encrypted swap and that
+continues boot with your mini system on /dev/hda1 if resume
+does not happen::
+
+  #!/bin/sh
+  PATH=/sbin:/bin:/usr/sbin:/usr/bin
+  mount /proc
+  mount /sys
+  mapped=0
+  noresume=`grep -c noresume /proc/cmdline`
+  if [ "$*" != "" ]
+  then
+    noresume=1
+  fi
+  dmesg -n 1
+  /sbin/cardmgr -q
+  for i in 1 2 3 4 5 6 7 8 9 0
+  do
+    if [ -f /proc/ide/hde/media ]
+    then
+      usleep 500000
+      mount -t ext2 -o ro /dev/hde1 /mnt
+      if [ -f /mnt/swapkey ]
+      then
+        dmsetup create swap0 /mnt/swapkey > /dev/null 2>&1 && mapped=1
+      fi
+      umount /mnt
+      break
+    fi
+    usleep 500000
+  done
+  killproc /sbin/cardmgr
+  dmesg -n 6
+  if [ $mapped = 1 ]
+  then
+    if [ $noresume != 0 ]
+    then
+      mkswap /dev/mapper/swap0 > /dev/null 2>&1
+    fi
+    echo 254:0 > /sys/power/resume
+    dmsetup remove swap0
+  fi
+  umount /sys
+  mount /mnt
+  umount /proc
+  cd /mnt
+  pivot_root . mnt
+  mount /proc
+  umount -l /mnt
+  umount /proc
+  exec chroot . /sbin/init $* < dev/console > dev/console 2>&1
+
+Please don't mind the weird loop above, busybox's msh doesn't know
+the let statement. Now, what is happening in the script?
+First we have to decide if we want to try to resume, or not.
+We will not resume if booting with "noresume" or any parameters
+for init like "single" or "emergency" as boot parameters.
+
+Then we need to set up dmcrypt with the setup data from the
+pcmcia flash disk. If this succeeds we need to reset the swap
+device if we don't want to resume. The line "echo 254:0 > /sys/power/resume"
+then attempts to resume from the first device mapper device.
+Note that it is important to set the device in /sys/power/resume,
+regardless if resuming or not, otherwise later suspend will fail.
+If resume starts, script execution terminates here.
+
+Otherwise we just remove the encrypted swap device and leave it to the
+mini system on /dev/hda1 to set the whole crypto up (it is up to
+you to modify this to your taste).
+
+What then follows is the well known process to change the root
+file system and continue booting from there. I prefer to unmount
+the initrd prior to continue booting but it is up to you to modify
+this.
diff --git a/Documentation/power/swsusp-dmcrypt.txt b/Documentation/power/swsusp-dmcrypt.txt
deleted file mode 100644
index b802fbfd95ef..000000000000
--- a/Documentation/power/swsusp-dmcrypt.txt
+++ /dev/null
@@ -1,138 +0,0 @@
-Author: Andreas Steinmetz <ast@domdv.de>
-
-
-How to use dm-crypt and swsusp together:
-========================================
-
-Some prerequisites:
-You know how dm-crypt works. If not, visit the following web page:
-http://www.saout.de/misc/dm-crypt/
-You have read Documentation/power/swsusp.txt and understand it.
-You did read Documentation/admin-guide/initrd.rst and know how an initrd works.
-You know how to create or how to modify an initrd.
-
-Now your system is properly set up, your disk is encrypted except for
-the swap device(s) and the boot partition which may contain a mini
-system for crypto setup and/or rescue purposes. You may even have
-an initrd that does your current crypto setup already.
-
-At this point you want to encrypt your swap, too. Still you want to
-be able to suspend using swsusp. This, however, means that you
-have to be able to either enter a passphrase or that you read
-the key(s) from an external device like a pcmcia flash disk
-or an usb stick prior to resume. So you need an initrd, that sets
-up dm-crypt and then asks swsusp to resume from the encrypted
-swap device.
-
-The most important thing is that you set up dm-crypt in such
-a way that the swap device you suspend to/resume from has
-always the same major/minor within the initrd as well as
-within your running system. The easiest way to achieve this is
-to always set up this swap device first with dmsetup, so that
-it will always look like the following:
-
-brw-------  1 root root 254, 0 Jul 28 13:37 /dev/mapper/swap0
-
-Now set up your kernel to use /dev/mapper/swap0 as the default
-resume partition, so your kernel .config contains:
-
-CONFIG_PM_STD_PARTITION="/dev/mapper/swap0"
-
-Prepare your boot loader to use the initrd you will create or
-modify. For lilo the simplest setup looks like the following
-lines:
-
-image=/boot/vmlinuz
-initrd=/boot/initrd.gz
-label=linux
-append="root=/dev/ram0 init=/linuxrc rw"
-
-Finally you need to create or modify your initrd. Lets assume
-you create an initrd that reads the required dm-crypt setup
-from a pcmcia flash disk card. The card is formatted with an ext2
-fs which resides on /dev/hde1 when the card is inserted. The
-card contains at least the encrypted swap setup in a file
-named "swapkey". /etc/fstab of your initrd contains something
-like the following:
-
-/dev/hda1   /mnt    ext3      ro                            0 0
-none        /proc   proc      defaults,noatime,nodiratime   0 0
-none        /sys    sysfs     defaults,noatime,nodiratime   0 0
-
-/dev/hda1 contains an unencrypted mini system that sets up all
-of your crypto devices, again by reading the setup from the
-pcmcia flash disk. What follows now is a /linuxrc for your
-initrd that allows you to resume from encrypted swap and that
-continues boot with your mini system on /dev/hda1 if resume
-does not happen:
-
-#!/bin/sh
-PATH=/sbin:/bin:/usr/sbin:/usr/bin
-mount /proc
-mount /sys
-mapped=0
-noresume=`grep -c noresume /proc/cmdline`
-if [ "$*" != "" ]
-then
-  noresume=1
-fi
-dmesg -n 1
-/sbin/cardmgr -q
-for i in 1 2 3 4 5 6 7 8 9 0
-do
-  if [ -f /proc/ide/hde/media ]
-  then
-    usleep 500000
-    mount -t ext2 -o ro /dev/hde1 /mnt
-    if [ -f /mnt/swapkey ]
-    then
-      dmsetup create swap0 /mnt/swapkey > /dev/null 2>&1 && mapped=1
-    fi
-    umount /mnt
-    break
-  fi
-  usleep 500000
-done
-killproc /sbin/cardmgr
-dmesg -n 6
-if [ $mapped = 1 ]
-then
-  if [ $noresume != 0 ]
-  then
-    mkswap /dev/mapper/swap0 > /dev/null 2>&1
-  fi
-  echo 254:0 > /sys/power/resume
-  dmsetup remove swap0
-fi
-umount /sys
-mount /mnt
-umount /proc
-cd /mnt
-pivot_root . mnt
-mount /proc
-umount -l /mnt
-umount /proc
-exec chroot . /sbin/init $* < dev/console > dev/console 2>&1
-
-Please don't mind the weird loop above, busybox's msh doesn't know
-the let statement. Now, what is happening in the script?
-First we have to decide if we want to try to resume, or not.
-We will not resume if booting with "noresume" or any parameters
-for init like "single" or "emergency" as boot parameters.
-
-Then we need to set up dmcrypt with the setup data from the
-pcmcia flash disk. If this succeeds we need to reset the swap
-device if we don't want to resume. The line "echo 254:0 > /sys/power/resume"
-then attempts to resume from the first device mapper device.
-Note that it is important to set the device in /sys/power/resume,
-regardless if resuming or not, otherwise later suspend will fail.
-If resume starts, script execution terminates here.
-
-Otherwise we just remove the encrypted swap device and leave it to the
-mini system on /dev/hda1 to set the whole crypto up (it is up to
-you to modify this to your taste).
-
-What then follows is the well known process to change the root
-file system and continue booting from there. I prefer to unmount
-the initrd prior to continue booting but it is up to you to modify
-this.
diff --git a/Documentation/power/swsusp.rst b/Documentation/power/swsusp.rst
new file mode 100644
index 000000000000..d000312f6965
--- /dev/null
+++ b/Documentation/power/swsusp.rst
@@ -0,0 +1,501 @@
+============
+Swap suspend
+============
+
+Some warnings, first.
+
+.. warning::
+
+   **BIG FAT WARNING**
+
+   If you touch anything on disk between suspend and resume...
+				...kiss your data goodbye.
+
+   If you do resume from initrd after your filesystems are mounted...
+				...bye bye root partition.
+
+			[this is actually same case as above]
+
+   If you have unsupported ( ) devices using DMA, you may have some
+   problems. If your disk driver does not support suspend... (IDE does),
+   it may cause some problems, too. If you change kernel command line
+   between suspend and resume, it may do something wrong. If you change
+   your hardware while system is suspended... well, it was not good idea;
+   but it will probably only crash.
+
+   ( ) suspend/resume support is needed to make it safe.
+
+   If you have any filesystems on USB devices mounted before software suspend,
+   they won't be accessible after resume and you may lose data, as though
+   you have unplugged the USB devices with mounted filesystems on them;
+   see the FAQ below for details.  (This is not true for more traditional
+   power states like "standby", which normally don't turn USB off.)
+
+Swap partition:
+  You need to append resume=/dev/your_swap_partition to kernel command
+  line or specify it using /sys/power/resume.
+
+Swap file:
+  If using a swapfile you can also specify a resume offset using
+  resume_offset=<number> on the kernel command line or specify it
+  in /sys/power/resume_offset.
+
+After preparing then you suspend by::
+
+	echo shutdown > /sys/power/disk; echo disk > /sys/power/state
+
+- If you feel ACPI works pretty well on your system, you might try::
+
+	echo platform > /sys/power/disk; echo disk > /sys/power/state
+
+- If you would like to write hibernation image to swap and then suspend
+  to RAM (provided your platform supports it), you can try::
+
+	echo suspend > /sys/power/disk; echo disk > /sys/power/state
+
+- If you have SATA disks, you'll need recent kernels with SATA suspend
+  support. For suspend and resume to work, make sure your disk drivers
+  are built into kernel -- not modules. [There's way to make
+  suspend/resume with modular disk drivers, see FAQ, but you probably
+  should not do that.]
+
+If you want to limit the suspend image size to N bytes, do::
+
+	echo N > /sys/power/image_size
+
+before suspend (it is limited to around 2/5 of available RAM by default).
+
+- The resume process checks for the presence of the resume device,
+  if found, it then checks the contents for the hibernation image signature.
+  If both are found, it resumes the hibernation image.
+
+- The resume process may be triggered in two ways:
+
+  1) During lateinit:  If resume=/dev/your_swap_partition is specified on
+     the kernel command line, lateinit runs the resume process.  If the
+     resume device has not been probed yet, the resume process fails and
+     bootup continues.
+  2) Manually from an initrd or initramfs:  May be run from
+     the init script by using the /sys/power/resume file.  It is vital
+     that this be done prior to remounting any filesystems (even as
+     read-only) otherwise data may be corrupted.
+
+Article about goals and implementation of Software Suspend for Linux
+====================================================================
+
+Author: Gábor Kuti
+Last revised: 2003-10-20 by Pavel Machek
+
+Idea and goals to achieve
+-------------------------
+
+Nowadays it is common in several laptops that they have a suspend button. It
+saves the state of the machine to a filesystem or to a partition and switches
+to standby mode. Later resuming the machine the saved state is loaded back to
+ram and the machine can continue its work. It has two real benefits. First we
+save ourselves the time machine goes down and later boots up, energy costs
+are real high when running from batteries. The other gain is that we don't have
+to interrupt our programs so processes that are calculating something for a long
+time shouldn't need to be written interruptible.
+
+swsusp saves the state of the machine into active swaps and then reboots or
+powerdowns.  You must explicitly specify the swap partition to resume from with
+`resume=` kernel option. If signature is found it loads and restores saved
+state. If the option `noresume` is specified as a boot parameter, it skips
+the resuming.  If the option `hibernate=nocompress` is specified as a boot
+parameter, it saves hibernation image without compression.
+
+In the meantime while the system is suspended you should not add/remove any
+of the hardware, write to the filesystems, etc.
+
+Sleep states summary
+====================
+
+There are three different interfaces you can use, /proc/acpi should
+work like this:
+
+In a really perfect world::
+
+  echo 1 > /proc/acpi/sleep       # for standby
+  echo 2 > /proc/acpi/sleep       # for suspend to ram
+  echo 3 > /proc/acpi/sleep       # for suspend to ram, but with more power conservative
+  echo 4 > /proc/acpi/sleep       # for suspend to disk
+  echo 5 > /proc/acpi/sleep       # for shutdown unfriendly the system
+
+and perhaps::
+
+  echo 4b > /proc/acpi/sleep      # for suspend to disk via s4bios
+
+Frequently Asked Questions
+==========================
+
+Q:
+  well, suspending a server is IMHO a really stupid thing,
+  but... (Diego Zuccato):
+
+A:
+  You bought new UPS for your server. How do you install it without
+  bringing machine down? Suspend to disk, rearrange power cables,
+  resume.
+
+  You have your server on UPS. Power died, and UPS is indicating 30
+  seconds to failure. What do you do? Suspend to disk.
+
+
+Q:
+  Maybe I'm missing something, but why don't the regular I/O paths work?
+
+A:
+  We do use the regular I/O paths. However we cannot restore the data
+  to its original location as we load it. That would create an
+  inconsistent kernel state which would certainly result in an oops.
+  Instead, we load the image into unused memory and then atomically copy
+  it back to it original location. This implies, of course, a maximum
+  image size of half the amount of memory.
+
+  There are two solutions to this:
+
+  * require half of memory to be free during suspend. That way you can
+    read "new" data onto free spots, then cli and copy
+
+  * assume we had special "polling" ide driver that only uses memory
+    between 0-640KB. That way, I'd have to make sure that 0-640KB is free
+    during suspending, but otherwise it would work...
+
+  suspend2 shares this fundamental limitation, but does not include user
+  data and disk caches into "used memory" by saving them in
+  advance. That means that the limitation goes away in practice.
+
+Q:
+  Does linux support ACPI S4?
+
+A:
+  Yes. That's what echo platform > /sys/power/disk does.
+
+Q:
+  What is 'suspend2'?
+
+A:
+  suspend2 is 'Software Suspend 2', a forked implementation of
+  suspend-to-disk which is available as separate patches for 2.4 and 2.6
+  kernels from swsusp.sourceforge.net. It includes support for SMP, 4GB
+  highmem and preemption. It also has a extensible architecture that
+  allows for arbitrary transformations on the image (compression,
+  encryption) and arbitrary backends for writing the image (eg to swap
+  or an NFS share[Work In Progress]). Questions regarding suspend2
+  should be sent to the mailing list available through the suspend2
+  website, and not to the Linux Kernel Mailing List. We are working
+  toward merging suspend2 into the mainline kernel.
+
+Q:
+  What is the freezing of tasks and why are we using it?
+
+A:
+  The freezing of tasks is a mechanism by which user space processes and some
+  kernel threads are controlled during hibernation or system-wide suspend (on some
+  architectures).  See freezing-of-tasks.txt for details.
+
+Q:
+  What is the difference between "platform" and "shutdown"?
+
+A:
+  shutdown:
+	save state in linux, then tell bios to powerdown
+
+  platform:
+	save state in linux, then tell bios to powerdown and blink
+        "suspended led"
+
+  "platform" is actually right thing to do where supported, but
+  "shutdown" is most reliable (except on ACPI systems).
+
+Q:
+  I do not understand why you have such strong objections to idea of
+  selective suspend.
+
+A:
+  Do selective suspend during runtime power management, that's okay. But
+  it's useless for suspend-to-disk. (And I do not see how you could use
+  it for suspend-to-ram, I hope you do not want that).
+
+  Lets see, so you suggest to
+
+  * SUSPEND all but swap device and parents
+  * Snapshot
+  * Write image to disk
+  * SUSPEND swap device and parents
+  * Powerdown
+
+  Oh no, that does not work, if swap device or its parents uses DMA,
+  you've corrupted data. You'd have to do
+
+  * SUSPEND all but swap device and parents
+  * FREEZE swap device and parents
+  * Snapshot
+  * UNFREEZE swap device and parents
+  * Write
+  * SUSPEND swap device and parents
+
+  Which means that you still need that FREEZE state, and you get more
+  complicated code. (And I have not yet introduce details like system
+  devices).
+
+Q:
+  There don't seem to be any generally useful behavioral
+  distinctions between SUSPEND and FREEZE.
+
+A:
+  Doing SUSPEND when you are asked to do FREEZE is always correct,
+  but it may be unnecessarily slow. If you want your driver to stay simple,
+  slowness may not matter to you. It can always be fixed later.
+
+  For devices like disk it does matter, you do not want to spindown for
+  FREEZE.
+
+Q:
+  After resuming, system is paging heavily, leading to very bad interactivity.
+
+A:
+  Try running::
+
+    cat /proc/[0-9]*/maps | grep / | sed 's:.* /:/:' | sort -u | while read file
+    do
+      test -f "$file" && cat "$file" > /dev/null
+    done
+
+  after resume. swapoff -a; swapon -a may also be useful.
+
+Q:
+  What happens to devices during swsusp? They seem to be resumed
+  during system suspend?
+
+A:
+  That's correct. We need to resume them if we want to write image to
+  disk. Whole sequence goes like
+
+      **Suspend part**
+
+      running system, user asks for suspend-to-disk
+
+      user processes are stopped
+
+      suspend(PMSG_FREEZE): devices are frozen so that they don't interfere
+      with state snapshot
+
+      state snapshot: copy of whole used memory is taken with interrupts disabled
+
+      resume(): devices are woken up so that we can write image to swap
+
+      write image to swap
+
+      suspend(PMSG_SUSPEND): suspend devices so that we can power off
+
+      turn the power off
+
+      **Resume part**
+
+      (is actually pretty similar)
+
+      running system, user asks for suspend-to-disk
+
+      user processes are stopped (in common case there are none,
+      but with resume-from-initrd, no one knows)
+
+      read image from disk
+
+      suspend(PMSG_FREEZE): devices are frozen so that they don't interfere
+      with image restoration
+
+      image restoration: rewrite memory with image
+
+      resume(): devices are woken up so that system can continue
+
+      thaw all user processes
+
+Q:
+  What is this 'Encrypt suspend image' for?
+
+A:
+  First of all: it is not a replacement for dm-crypt encrypted swap.
+  It cannot protect your computer while it is suspended. Instead it does
+  protect from leaking sensitive data after resume from suspend.
+
+  Think of the following: you suspend while an application is running
+  that keeps sensitive data in memory. The application itself prevents
+  the data from being swapped out. Suspend, however, must write these
+  data to swap to be able to resume later on. Without suspend encryption
+  your sensitive data are then stored in plaintext on disk.  This means
+  that after resume your sensitive data are accessible to all
+  applications having direct access to the swap device which was used
+  for suspend. If you don't need swap after resume these data can remain
+  on disk virtually forever. Thus it can happen that your system gets
+  broken in weeks later and sensitive data which you thought were
+  encrypted and protected are retrieved and stolen from the swap device.
+  To prevent this situation you should use 'Encrypt suspend image'.
+
+  During suspend a temporary key is created and this key is used to
+  encrypt the data written to disk. When, during resume, the data was
+  read back into memory the temporary key is destroyed which simply
+  means that all data written to disk during suspend are then
+  inaccessible so they can't be stolen later on.  The only thing that
+  you must then take care of is that you call 'mkswap' for the swap
+  partition used for suspend as early as possible during regular
+  boot. This asserts that any temporary key from an oopsed suspend or
+  from a failed or aborted resume is erased from the swap device.
+
+  As a rule of thumb use encrypted swap to protect your data while your
+  system is shut down or suspended. Additionally use the encrypted
+  suspend image to prevent sensitive data from being stolen after
+  resume.
+
+Q:
+  Can I suspend to a swap file?
+
+A:
+  Generally, yes, you can.  However, it requires you to use the "resume=" and
+  "resume_offset=" kernel command line parameters, so the resume from a swap file
+  cannot be initiated from an initrd or initramfs image.  See
+  swsusp-and-swap-files.txt for details.
+
+Q:
+  Is there a maximum system RAM size that is supported by swsusp?
+
+A:
+  It should work okay with highmem.
+
+Q:
+  Does swsusp (to disk) use only one swap partition or can it use
+  multiple swap partitions (aggregate them into one logical space)?
+
+A:
+  Only one swap partition, sorry.
+
+Q:
+  If my application(s) causes lots of memory & swap space to be used
+  (over half of the total system RAM), is it correct that it is likely
+  to be useless to try to suspend to disk while that app is running?
+
+A:
+  No, it should work okay, as long as your app does not mlock()
+  it. Just prepare big enough swap partition.
+
+Q:
+  What information is useful for debugging suspend-to-disk problems?
+
+A:
+  Well, last messages on the screen are always useful. If something
+  is broken, it is usually some kernel driver, therefore trying with as
+  little as possible modules loaded helps a lot. I also prefer people to
+  suspend from console, preferably without X running. Booting with
+  init=/bin/bash, then swapon and starting suspend sequence manually
+  usually does the trick. Then it is good idea to try with latest
+  vanilla kernel.
+
+Q:
+  How can distributions ship a swsusp-supporting kernel with modular
+  disk drivers (especially SATA)?
+
+A:
+  Well, it can be done, load the drivers, then do echo into
+  /sys/power/resume file from initrd. Be sure not to mount
+  anything, not even read-only mount, or you are going to lose your
+  data.
+
+Q:
+  How do I make suspend more verbose?
+
+A:
+  If you want to see any non-error kernel messages on the virtual
+  terminal the kernel switches to during suspend, you have to set the
+  kernel console loglevel to at least 4 (KERN_WARNING), for example by
+  doing::
+
+	# save the old loglevel
+	read LOGLEVEL DUMMY < /proc/sys/kernel/printk
+	# set the loglevel so we see the progress bar.
+	# if the level is higher than needed, we leave it alone.
+	if [ $LOGLEVEL -lt 5 ]; then
+	        echo 5 > /proc/sys/kernel/printk
+		fi
+
+        IMG_SZ=0
+        read IMG_SZ < /sys/power/image_size
+        echo -n disk > /sys/power/state
+        RET=$?
+        #
+        # the logic here is:
+        # if image_size > 0 (without kernel support, IMG_SZ will be zero),
+        # then try again with image_size set to zero.
+	if [ $RET -ne 0 -a $IMG_SZ -ne 0 ]; then # try again with minimal image size
+                echo 0 > /sys/power/image_size
+                echo -n disk > /sys/power/state
+                RET=$?
+        fi
+
+	# restore previous loglevel
+	echo $LOGLEVEL > /proc/sys/kernel/printk
+	exit $RET
+
+Q:
+  Is this true that if I have a mounted filesystem on a USB device and
+  I suspend to disk, I can lose data unless the filesystem has been mounted
+  with "sync"?
+
+A:
+  That's right ... if you disconnect that device, you may lose data.
+  In fact, even with "-o sync" you can lose data if your programs have
+  information in buffers they haven't written out to a disk you disconnect,
+  or if you disconnect before the device finished saving data you wrote.
+
+  Software suspend normally powers down USB controllers, which is equivalent
+  to disconnecting all USB devices attached to your system.
+
+  Your system might well support low-power modes for its USB controllers
+  while the system is asleep, maintaining the connection, using true sleep
+  modes like "suspend-to-RAM" or "standby".  (Don't write "disk" to the
+  /sys/power/state file; write "standby" or "mem".)  We've not seen any
+  hardware that can use these modes through software suspend, although in
+  theory some systems might support "platform" modes that won't break the
+  USB connections.
+
+  Remember that it's always a bad idea to unplug a disk drive containing a
+  mounted filesystem.  That's true even when your system is asleep!  The
+  safest thing is to unmount all filesystems on removable media (such USB,
+  Firewire, CompactFlash, MMC, external SATA, or even IDE hotplug bays)
+  before suspending; then remount them after resuming.
+
+  There is a work-around for this problem.  For more information, see
+  Documentation/driver-api/usb/persist.rst.
+
+Q:
+  Can I suspend-to-disk using a swap partition under LVM?
+
+A:
+  Yes and No.  You can suspend successfully, but the kernel will not be able
+  to resume on its own.  You need an initramfs that can recognize the resume
+  situation, activate the logical volume containing the swap volume (but not
+  touch any filesystems!), and eventually call::
+
+    echo -n "$major:$minor" > /sys/power/resume
+
+  where $major and $minor are the respective major and minor device numbers of
+  the swap volume.
+
+  uswsusp works with LVM, too.  See http://suspend.sourceforge.net/
+
+Q:
+  I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were
+  compiled with the similar configuration files. Anyway I found that
+  suspend to disk (and resume) is much slower on 2.6.16 compared to
+  2.6.15. Any idea for why that might happen or how can I speed it up?
+
+A:
+  This is because the size of the suspend image is now greater than
+  for 2.6.15 (by saving more data we can get more responsive system
+  after resume).
+
+  There's the /sys/power/image_size knob that controls the size of the
+  image.  If you set it to 0 (eg. by echo 0 > /sys/power/image_size as
+  root), the 2.6.15 behavior should be restored.  If it is still too
+  slow, take a look at suspend.sf.net -- userland suspend is faster and
+  supports LZF compression to speed it up further.
diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt
deleted file mode 100644
index 236d1fb13640..000000000000
--- a/Documentation/power/swsusp.txt
+++ /dev/null
@@ -1,446 +0,0 @@
-Some warnings, first.
-
- * BIG FAT WARNING *********************************************************
- *
- * If you touch anything on disk between suspend and resume...
- *				...kiss your data goodbye.
- *
- * If you do resume from initrd after your filesystems are mounted...
- *				...bye bye root partition.
- *			[this is actually same case as above]
- *
- * If you have unsupported (*) devices using DMA, you may have some
- * problems. If your disk driver does not support suspend... (IDE does),
- * it may cause some problems, too. If you change kernel command line
- * between suspend and resume, it may do something wrong. If you change
- * your hardware while system is suspended... well, it was not good idea;
- * but it will probably only crash.
- *
- * (*) suspend/resume support is needed to make it safe.
- *
- * If you have any filesystems on USB devices mounted before software suspend,
- * they won't be accessible after resume and you may lose data, as though
- * you have unplugged the USB devices with mounted filesystems on them;
- * see the FAQ below for details.  (This is not true for more traditional
- * power states like "standby", which normally don't turn USB off.)
-
-Swap partition:
-You need to append resume=/dev/your_swap_partition to kernel command
-line or specify it using /sys/power/resume.
-
-Swap file:
-If using a swapfile you can also specify a resume offset using
-resume_offset=<number> on the kernel command line or specify it
-in /sys/power/resume_offset.
-
-After preparing then you suspend by
-
-echo shutdown > /sys/power/disk; echo disk > /sys/power/state
-
-. If you feel ACPI works pretty well on your system, you might try
-
-echo platform > /sys/power/disk; echo disk > /sys/power/state
-
-. If you would like to write hibernation image to swap and then suspend
-to RAM (provided your platform supports it), you can try
-
-echo suspend > /sys/power/disk; echo disk > /sys/power/state
-
-. If you have SATA disks, you'll need recent kernels with SATA suspend
-support. For suspend and resume to work, make sure your disk drivers
-are built into kernel -- not modules. [There's way to make
-suspend/resume with modular disk drivers, see FAQ, but you probably
-should not do that.]
-
-If you want to limit the suspend image size to N bytes, do
-
-echo N > /sys/power/image_size
-
-before suspend (it is limited to around 2/5 of available RAM by default).
-
-. The resume process checks for the presence of the resume device,
-if found, it then checks the contents for the hibernation image signature.
-If both are found, it resumes the hibernation image.
-
-. The resume process may be triggered in two ways:
-  1) During lateinit:  If resume=/dev/your_swap_partition is specified on
-     the kernel command line, lateinit runs the resume process.  If the
-     resume device has not been probed yet, the resume process fails and
-     bootup continues.
-  2) Manually from an initrd or initramfs:  May be run from
-     the init script by using the /sys/power/resume file.  It is vital
-     that this be done prior to remounting any filesystems (even as
-     read-only) otherwise data may be corrupted.
-
-Article about goals and implementation of Software Suspend for Linux
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Author: Gábor Kuti
-Last revised: 2003-10-20 by Pavel Machek
-
-Idea and goals to achieve
-
-Nowadays it is common in several laptops that they have a suspend button. It
-saves the state of the machine to a filesystem or to a partition and switches
-to standby mode. Later resuming the machine the saved state is loaded back to
-ram and the machine can continue its work. It has two real benefits. First we
-save ourselves the time machine goes down and later boots up, energy costs
-are real high when running from batteries. The other gain is that we don't have to
-interrupt our programs so processes that are calculating something for a long
-time shouldn't need to be written interruptible.
-
-swsusp saves the state of the machine into active swaps and then reboots or
-powerdowns.  You must explicitly specify the swap partition to resume from with
-``resume='' kernel option. If signature is found it loads and restores saved
-state. If the option ``noresume'' is specified as a boot parameter, it skips
-the resuming.  If the option ``hibernate=nocompress'' is specified as a boot
-parameter, it saves hibernation image without compression.
-
-In the meantime while the system is suspended you should not add/remove any
-of the hardware, write to the filesystems, etc.
-
-Sleep states summary
-====================
-
-There are three different interfaces you can use, /proc/acpi should
-work like this:
-
-In a really perfect world:
-echo 1 > /proc/acpi/sleep       # for standby
-echo 2 > /proc/acpi/sleep       # for suspend to ram
-echo 3 > /proc/acpi/sleep       # for suspend to ram, but with more power conservative
-echo 4 > /proc/acpi/sleep       # for suspend to disk
-echo 5 > /proc/acpi/sleep       # for shutdown unfriendly the system
-
-and perhaps
-echo 4b > /proc/acpi/sleep      # for suspend to disk via s4bios
-
-Frequently Asked Questions
-==========================
-
-Q: well, suspending a server is IMHO a really stupid thing,
-but... (Diego Zuccato):
-
-A: You bought new UPS for your server. How do you install it without
-bringing machine down? Suspend to disk, rearrange power cables,
-resume.
-
-You have your server on UPS. Power died, and UPS is indicating 30
-seconds to failure. What do you do? Suspend to disk.
-
-
-Q: Maybe I'm missing something, but why don't the regular I/O paths work?
-
-A: We do use the regular I/O paths. However we cannot restore the data
-to its original location as we load it. That would create an
-inconsistent kernel state which would certainly result in an oops.
-Instead, we load the image into unused memory and then atomically copy
-it back to it original location. This implies, of course, a maximum
-image size of half the amount of memory.
-
-There are two solutions to this:
-
-* require half of memory to be free during suspend. That way you can
-read "new" data onto free spots, then cli and copy
-
-* assume we had special "polling" ide driver that only uses memory
-between 0-640KB. That way, I'd have to make sure that 0-640KB is free
-during suspending, but otherwise it would work...
-
-suspend2 shares this fundamental limitation, but does not include user
-data and disk caches into "used memory" by saving them in
-advance. That means that the limitation goes away in practice.
-
-Q: Does linux support ACPI S4?
-
-A: Yes. That's what echo platform > /sys/power/disk does.
-
-Q: What is 'suspend2'?
-
-A: suspend2 is 'Software Suspend 2', a forked implementation of
-suspend-to-disk which is available as separate patches for 2.4 and 2.6
-kernels from swsusp.sourceforge.net. It includes support for SMP, 4GB
-highmem and preemption. It also has a extensible architecture that
-allows for arbitrary transformations on the image (compression,
-encryption) and arbitrary backends for writing the image (eg to swap
-or an NFS share[Work In Progress]). Questions regarding suspend2
-should be sent to the mailing list available through the suspend2
-website, and not to the Linux Kernel Mailing List. We are working
-toward merging suspend2 into the mainline kernel.
-
-Q: What is the freezing of tasks and why are we using it?
-
-A: The freezing of tasks is a mechanism by which user space processes and some
-kernel threads are controlled during hibernation or system-wide suspend (on some
-architectures).  See freezing-of-tasks.txt for details.
-
-Q: What is the difference between "platform" and "shutdown"?
-
-A:
-
-shutdown: save state in linux, then tell bios to powerdown
-
-platform: save state in linux, then tell bios to powerdown and blink
-          "suspended led"
-
-"platform" is actually right thing to do where supported, but
-"shutdown" is most reliable (except on ACPI systems).
-
-Q: I do not understand why you have such strong objections to idea of
-selective suspend.
-
-A: Do selective suspend during runtime power management, that's okay. But
-it's useless for suspend-to-disk. (And I do not see how you could use
-it for suspend-to-ram, I hope you do not want that).
-
-Lets see, so you suggest to
-
-* SUSPEND all but swap device and parents
-* Snapshot
-* Write image to disk
-* SUSPEND swap device and parents
-* Powerdown
-
-Oh no, that does not work, if swap device or its parents uses DMA,
-you've corrupted data. You'd have to do
-
-* SUSPEND all but swap device and parents
-* FREEZE swap device and parents
-* Snapshot
-* UNFREEZE swap device and parents
-* Write
-* SUSPEND swap device and parents
-
-Which means that you still need that FREEZE state, and you get more
-complicated code. (And I have not yet introduce details like system
-devices).
-
-Q: There don't seem to be any generally useful behavioral
-distinctions between SUSPEND and FREEZE.
-
-A: Doing SUSPEND when you are asked to do FREEZE is always correct,
-but it may be unnecessarily slow. If you want your driver to stay simple,
-slowness may not matter to you. It can always be fixed later.
-
-For devices like disk it does matter, you do not want to spindown for
-FREEZE.
-
-Q: After resuming, system is paging heavily, leading to very bad interactivity.
-
-A: Try running
-
-cat /proc/[0-9]*/maps | grep / | sed 's:.* /:/:' | sort -u | while read file
-do
-  test -f "$file" && cat "$file" > /dev/null
-done
-
-after resume. swapoff -a; swapon -a may also be useful.
-
-Q: What happens to devices during swsusp? They seem to be resumed
-during system suspend?
-
-A: That's correct. We need to resume them if we want to write image to
-disk. Whole sequence goes like
-
-      Suspend part
-      ~~~~~~~~~~~~
-      running system, user asks for suspend-to-disk
-
-      user processes are stopped
-
-      suspend(PMSG_FREEZE): devices are frozen so that they don't interfere
-      		      with state snapshot
-
-      state snapshot: copy of whole used memory is taken with interrupts disabled
-
-      resume(): devices are woken up so that we can write image to swap
-
-      write image to swap
-
-      suspend(PMSG_SUSPEND): suspend devices so that we can power off
-
-      turn the power off
-
-      Resume part
-      ~~~~~~~~~~~
-      (is actually pretty similar)
-
-      running system, user asks for suspend-to-disk
-
-      user processes are stopped (in common case there are none, but with resume-from-initrd, no one knows)
-
-      read image from disk
-
-      suspend(PMSG_FREEZE): devices are frozen so that they don't interfere
-      		      with image restoration
-
-      image restoration: rewrite memory with image
-
-      resume(): devices are woken up so that system can continue
-
-      thaw all user processes
-
-Q: What is this 'Encrypt suspend image' for?
-
-A: First of all: it is not a replacement for dm-crypt encrypted swap.
-It cannot protect your computer while it is suspended. Instead it does
-protect from leaking sensitive data after resume from suspend.
-
-Think of the following: you suspend while an application is running
-that keeps sensitive data in memory. The application itself prevents
-the data from being swapped out. Suspend, however, must write these
-data to swap to be able to resume later on. Without suspend encryption
-your sensitive data are then stored in plaintext on disk.  This means
-that after resume your sensitive data are accessible to all
-applications having direct access to the swap device which was used
-for suspend. If you don't need swap after resume these data can remain
-on disk virtually forever. Thus it can happen that your system gets
-broken in weeks later and sensitive data which you thought were
-encrypted and protected are retrieved and stolen from the swap device.
-To prevent this situation you should use 'Encrypt suspend image'.
-
-During suspend a temporary key is created and this key is used to
-encrypt the data written to disk. When, during resume, the data was
-read back into memory the temporary key is destroyed which simply
-means that all data written to disk during suspend are then
-inaccessible so they can't be stolen later on.  The only thing that
-you must then take care of is that you call 'mkswap' for the swap
-partition used for suspend as early as possible during regular
-boot. This asserts that any temporary key from an oopsed suspend or
-from a failed or aborted resume is erased from the swap device.
-
-As a rule of thumb use encrypted swap to protect your data while your
-system is shut down or suspended. Additionally use the encrypted
-suspend image to prevent sensitive data from being stolen after
-resume.
-
-Q: Can I suspend to a swap file?
-
-A: Generally, yes, you can.  However, it requires you to use the "resume=" and
-"resume_offset=" kernel command line parameters, so the resume from a swap file
-cannot be initiated from an initrd or initramfs image.  See
-swsusp-and-swap-files.txt for details.
-
-Q: Is there a maximum system RAM size that is supported by swsusp?
-
-A: It should work okay with highmem.
-
-Q: Does swsusp (to disk) use only one swap partition or can it use
-multiple swap partitions (aggregate them into one logical space)?
-
-A: Only one swap partition, sorry.
-
-Q: If my application(s) causes lots of memory & swap space to be used
-(over half of the total system RAM), is it correct that it is likely
-to be useless to try to suspend to disk while that app is running?
-
-A: No, it should work okay, as long as your app does not mlock()
-it. Just prepare big enough swap partition.
-
-Q: What information is useful for debugging suspend-to-disk problems?
-
-A: Well, last messages on the screen are always useful. If something
-is broken, it is usually some kernel driver, therefore trying with as
-little as possible modules loaded helps a lot. I also prefer people to
-suspend from console, preferably without X running. Booting with
-init=/bin/bash, then swapon and starting suspend sequence manually
-usually does the trick. Then it is good idea to try with latest
-vanilla kernel.
-
-Q: How can distributions ship a swsusp-supporting kernel with modular
-disk drivers (especially SATA)?
-
-A: Well, it can be done, load the drivers, then do echo into
-/sys/power/resume file from initrd. Be sure not to mount
-anything, not even read-only mount, or you are going to lose your
-data.
-
-Q: How do I make suspend more verbose?
-
-A: If you want to see any non-error kernel messages on the virtual
-terminal the kernel switches to during suspend, you have to set the
-kernel console loglevel to at least 4 (KERN_WARNING), for example by
-doing
-
-	# save the old loglevel
-	read LOGLEVEL DUMMY < /proc/sys/kernel/printk
-	# set the loglevel so we see the progress bar.
-	# if the level is higher than needed, we leave it alone.
-	if [ $LOGLEVEL -lt 5 ]; then
-	        echo 5 > /proc/sys/kernel/printk
-		fi
-
-        IMG_SZ=0
-        read IMG_SZ < /sys/power/image_size
-        echo -n disk > /sys/power/state
-        RET=$?
-        #
-        # the logic here is:
-        # if image_size > 0 (without kernel support, IMG_SZ will be zero),
-        # then try again with image_size set to zero.
-	if [ $RET -ne 0 -a $IMG_SZ -ne 0 ]; then # try again with minimal image size
-                echo 0 > /sys/power/image_size
-                echo -n disk > /sys/power/state
-                RET=$?
-        fi
-
-	# restore previous loglevel
-	echo $LOGLEVEL > /proc/sys/kernel/printk
-	exit $RET
-
-Q: Is this true that if I have a mounted filesystem on a USB device and
-I suspend to disk, I can lose data unless the filesystem has been mounted
-with "sync"?
-
-A: That's right ... if you disconnect that device, you may lose data.
-In fact, even with "-o sync" you can lose data if your programs have
-information in buffers they haven't written out to a disk you disconnect,
-or if you disconnect before the device finished saving data you wrote.
-
-Software suspend normally powers down USB controllers, which is equivalent
-to disconnecting all USB devices attached to your system.
-
-Your system might well support low-power modes for its USB controllers
-while the system is asleep, maintaining the connection, using true sleep
-modes like "suspend-to-RAM" or "standby".  (Don't write "disk" to the
-/sys/power/state file; write "standby" or "mem".)  We've not seen any
-hardware that can use these modes through software suspend, although in
-theory some systems might support "platform" modes that won't break the
-USB connections.
-
-Remember that it's always a bad idea to unplug a disk drive containing a
-mounted filesystem.  That's true even when your system is asleep!  The
-safest thing is to unmount all filesystems on removable media (such USB,
-Firewire, CompactFlash, MMC, external SATA, or even IDE hotplug bays)
-before suspending; then remount them after resuming.
-
-There is a work-around for this problem.  For more information, see
-Documentation/driver-api/usb/persist.rst.
-
-Q: Can I suspend-to-disk using a swap partition under LVM?
-
-A: Yes and No.  You can suspend successfully, but the kernel will not be able
-to resume on its own.  You need an initramfs that can recognize the resume
-situation, activate the logical volume containing the swap volume (but not
-touch any filesystems!), and eventually call
-
-echo -n "$major:$minor" > /sys/power/resume
-
-where $major and $minor are the respective major and minor device numbers of
-the swap volume.
-
-uswsusp works with LVM, too.  See http://suspend.sourceforge.net/
-
-Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were
-compiled with the similar configuration files. Anyway I found that
-suspend to disk (and resume) is much slower on 2.6.16 compared to
-2.6.15. Any idea for why that might happen or how can I speed it up?
-
-A: This is because the size of the suspend image is now greater than
-for 2.6.15 (by saving more data we can get more responsive system
-after resume).
-
-There's the /sys/power/image_size knob that controls the size of the
-image.  If you set it to 0 (eg. by echo 0 > /sys/power/image_size as
-root), the 2.6.15 behavior should be restored.  If it is still too
-slow, take a look at suspend.sf.net -- userland suspend is faster and
-supports LZF compression to speed it up further.
diff --git a/Documentation/power/tricks.rst b/Documentation/power/tricks.rst
new file mode 100644
index 000000000000..ca787f142c3f
--- /dev/null
+++ b/Documentation/power/tricks.rst
@@ -0,0 +1,29 @@
+================
+swsusp/S3 tricks
+================
+
+Pavel Machek <pavel@ucw.cz>
+
+If you want to trick swsusp/S3 into working, you might want to try:
+
+* go with minimal config, turn off drivers like USB, AGP you don't
+  really need
+
+* turn off APIC and preempt
+
+* use ext2. At least it has working fsck. [If something seems to go
+  wrong, force fsck when you have a chance]
+
+* turn off modules
+
+* use vga text console, shut down X. [If you really want X, you might
+  want to try vesafb later]
+
+* try running as few processes as possible, preferably go to single
+  user mode.
+
+* due to video issues, swsusp should be easier to get working than
+  S3. Try that first.
+
+When you make it work, try to find out what exactly was it that broke
+suspend, and preferably fix that.
diff --git a/Documentation/power/tricks.txt b/Documentation/power/tricks.txt
deleted file mode 100644
index a1b8f7249f4c..000000000000
--- a/Documentation/power/tricks.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-	swsusp/S3 tricks
-	~~~~~~~~~~~~~~~~
-Pavel Machek <pavel@ucw.cz>
-
-If you want to trick swsusp/S3 into working, you might want to try:
-
-* go with minimal config, turn off drivers like USB, AGP you don't
-  really need
-
-* turn off APIC and preempt
-
-* use ext2. At least it has working fsck. [If something seems to go
-  wrong, force fsck when you have a chance]
-
-* turn off modules
-
-* use vga text console, shut down X. [If you really want X, you might
-  want to try vesafb later]
-
-* try running as few processes as possible, preferably go to single
-  user mode.
-
-* due to video issues, swsusp should be easier to get working than
-  S3. Try that first.
-
-When you make it work, try to find out what exactly was it that broke
-suspend, and preferably fix that.
diff --git a/Documentation/power/userland-swsusp.rst b/Documentation/power/userland-swsusp.rst
new file mode 100644
index 000000000000..a0fa51bb1a4d
--- /dev/null
+++ b/Documentation/power/userland-swsusp.rst
@@ -0,0 +1,191 @@
+=====================================================
+Documentation for userland software suspend interface
+=====================================================
+
+	(C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+
+First, the warnings at the beginning of swsusp.txt still apply.
+
+Second, you should read the FAQ in swsusp.txt _now_ if you have not
+done it already.
+
+Now, to use the userland interface for software suspend you need special
+utilities that will read/write the system memory snapshot from/to the
+kernel.  Such utilities are available, for example, from
+<http://suspend.sourceforge.net>.  You may want to have a look at them if you
+are going to develop your own suspend/resume utilities.
+
+The interface consists of a character device providing the open(),
+release(), read(), and write() operations as well as several ioctl()
+commands defined in include/linux/suspend_ioctls.h .  The major and minor
+numbers of the device are, respectively, 10 and 231, and they can
+be read from /sys/class/misc/snapshot/dev.
+
+The device can be open either for reading or for writing.  If open for
+reading, it is considered to be in the suspend mode.  Otherwise it is
+assumed to be in the resume mode.  The device cannot be open for simultaneous
+reading and writing.  It is also impossible to have the device open more than
+once at a time.
+
+Even opening the device has side effects. Data structures are
+allocated, and PM_HIBERNATION_PREPARE / PM_RESTORE_PREPARE chains are
+called.
+
+The ioctl() commands recognized by the device are:
+
+SNAPSHOT_FREEZE
+	freeze user space processes (the current process is
+	not frozen); this is required for SNAPSHOT_CREATE_IMAGE
+	and SNAPSHOT_ATOMIC_RESTORE to succeed
+
+SNAPSHOT_UNFREEZE
+	thaw user space processes frozen by SNAPSHOT_FREEZE
+
+SNAPSHOT_CREATE_IMAGE
+	create a snapshot of the system memory; the
+	last argument of ioctl() should be a pointer to an int variable,
+	the value of which will indicate whether the call returned after
+	creating the snapshot (1) or after restoring the system memory state
+	from it (0) (after resume the system finds itself finishing the
+	SNAPSHOT_CREATE_IMAGE ioctl() again); after the snapshot
+	has been created the read() operation can be used to transfer
+	it out of the kernel
+
+SNAPSHOT_ATOMIC_RESTORE
+	restore the system memory state from the
+	uploaded snapshot image; before calling it you should transfer
+	the system memory snapshot back to the kernel using the write()
+	operation; this call will not succeed if the snapshot
+	image is not available to the kernel
+
+SNAPSHOT_FREE
+	free memory allocated for the snapshot image
+
+SNAPSHOT_PREF_IMAGE_SIZE
+	set the preferred maximum size of the image
+	(the kernel will do its best to ensure the image size will not exceed
+	this number, but if it turns out to be impossible, the kernel will
+	create the smallest image possible)
+
+SNAPSHOT_GET_IMAGE_SIZE
+	return the actual size of the hibernation image
+
+SNAPSHOT_AVAIL_SWAP_SIZE
+	return the amount of available swap in bytes (the
+	last argument should be a pointer to an unsigned int variable that will
+	contain the result if the call is successful).
+
+SNAPSHOT_ALLOC_SWAP_PAGE
+	allocate a swap page from the resume partition
+	(the last argument should be a pointer to a loff_t variable that
+	will contain the swap page offset if the call is successful)
+
+SNAPSHOT_FREE_SWAP_PAGES
+	free all swap pages allocated by
+	SNAPSHOT_ALLOC_SWAP_PAGE
+
+SNAPSHOT_SET_SWAP_AREA
+	set the resume partition and the offset (in <PAGE_SIZE>
+	units) from the beginning of the partition at which the swap header is
+	located (the last ioctl() argument should point to a struct
+	resume_swap_area, as defined in kernel/power/suspend_ioctls.h,
+	containing the resume device specification and the offset); for swap
+	partitions the offset is always 0, but it is different from zero for
+	swap files (see Documentation/power/swsusp-and-swap-files.rst for
+	details).
+
+SNAPSHOT_PLATFORM_SUPPORT
+	enable/disable the hibernation platform support,
+	depending on the argument value (enable, if the argument is nonzero)
+
+SNAPSHOT_POWER_OFF
+	make the kernel transition the system to the hibernation
+	state (eg. ACPI S4) using the platform (eg. ACPI) driver
+
+SNAPSHOT_S2RAM
+	suspend to RAM; using this call causes the kernel to
+	immediately enter the suspend-to-RAM state, so this call must always
+	be preceded by the SNAPSHOT_FREEZE call and it is also necessary
+	to use the SNAPSHOT_UNFREEZE call after the system wakes up.  This call
+	is needed to implement the suspend-to-both mechanism in which the
+	suspend image is first created, as though the system had been suspended
+	to disk, and then the system is suspended to RAM (this makes it possible
+	to resume the system from RAM if there's enough battery power or restore
+	its state on the basis of the saved suspend image otherwise)
+
+The device's read() operation can be used to transfer the snapshot image from
+the kernel.  It has the following limitations:
+
+- you cannot read() more than one virtual memory page at a time
+- read()s across page boundaries are impossible (ie. if you read() 1/2 of
+  a page in the previous call, you will only be able to read()
+  **at most** 1/2 of the page in the next call)
+
+The device's write() operation is used for uploading the system memory snapshot
+into the kernel.  It has the same limitations as the read() operation.
+
+The release() operation frees all memory allocated for the snapshot image
+and all swap pages allocated with SNAPSHOT_ALLOC_SWAP_PAGE (if any).
+Thus it is not necessary to use either SNAPSHOT_FREE or
+SNAPSHOT_FREE_SWAP_PAGES before closing the device (in fact it will also
+unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are
+still frozen when the device is being closed).
+
+Currently it is assumed that the userland utilities reading/writing the
+snapshot image from/to the kernel will use a swap partition, called the resume
+partition, or a swap file as storage space (if a swap file is used, the resume
+partition is the partition that holds this file).  However, this is not really
+required, as they can use, for example, a special (blank) suspend partition or
+a file on a partition that is unmounted before SNAPSHOT_CREATE_IMAGE and
+mounted afterwards.
+
+These utilities MUST NOT make any assumptions regarding the ordering of
+data within the snapshot image.  The contents of the image are entirely owned
+by the kernel and its structure may be changed in future kernel releases.
+
+The snapshot image MUST be written to the kernel unaltered (ie. all of the image
+data, metadata and header MUST be written in _exactly_ the same amount, form
+and order in which they have been read).  Otherwise, the behavior of the
+resumed system may be totally unpredictable.
+
+While executing SNAPSHOT_ATOMIC_RESTORE the kernel checks if the
+structure of the snapshot image is consistent with the information stored
+in the image header.  If any inconsistencies are detected,
+SNAPSHOT_ATOMIC_RESTORE will not succeed.  Still, this is not a fool-proof
+mechanism and the userland utilities using the interface SHOULD use additional
+means, such as checksums, to ensure the integrity of the snapshot image.
+
+The suspending and resuming utilities MUST lock themselves in memory,
+preferably using mlockall(), before calling SNAPSHOT_FREEZE.
+
+The suspending utility MUST check the value stored by SNAPSHOT_CREATE_IMAGE
+in the memory location pointed to by the last argument of ioctl() and proceed
+in accordance with it:
+
+1. 	If the value is 1 (ie. the system memory snapshot has just been
+	created and the system is ready for saving it):
+
+	(a)	The suspending utility MUST NOT close the snapshot device
+		_unless_ the whole suspend procedure is to be cancelled, in
+		which case, if the snapshot image has already been saved, the
+		suspending utility SHOULD destroy it, preferably by zapping
+		its header.  If the suspend is not to be cancelled, the
+		system MUST be powered off or rebooted after the snapshot
+		image has been saved.
+	(b)	The suspending utility SHOULD NOT attempt to perform any
+		file system operations (including reads) on the file systems
+		that were mounted before SNAPSHOT_CREATE_IMAGE has been
+		called.  However, it MAY mount a file system that was not
+		mounted at that time and perform some operations on it (eg.
+		use it for saving the image).
+
+2.	If the value is 0 (ie. the system state has just been restored from
+	the snapshot image), the suspending utility MUST close the snapshot
+	device.  Afterwards it will be treated as a regular userland process,
+	so it need not exit.
+
+The resuming utility SHOULD NOT attempt to mount any file systems that could
+be mounted before suspend and SHOULD NOT attempt to perform any operations
+involving such file systems.
+
+For details, please refer to the source code.
diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt
deleted file mode 100644
index bbfcd1bbedc5..000000000000
--- a/Documentation/power/userland-swsusp.txt
+++ /dev/null
@@ -1,170 +0,0 @@
-Documentation for userland software suspend interface
-	(C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
-
-First, the warnings at the beginning of swsusp.txt still apply.
-
-Second, you should read the FAQ in swsusp.txt _now_ if you have not
-done it already.
-
-Now, to use the userland interface for software suspend you need special
-utilities that will read/write the system memory snapshot from/to the
-kernel.  Such utilities are available, for example, from
-<http://suspend.sourceforge.net>.  You may want to have a look at them if you
-are going to develop your own suspend/resume utilities.
-
-The interface consists of a character device providing the open(),
-release(), read(), and write() operations as well as several ioctl()
-commands defined in include/linux/suspend_ioctls.h .  The major and minor
-numbers of the device are, respectively, 10 and 231, and they can
-be read from /sys/class/misc/snapshot/dev.
-
-The device can be open either for reading or for writing.  If open for
-reading, it is considered to be in the suspend mode.  Otherwise it is
-assumed to be in the resume mode.  The device cannot be open for simultaneous
-reading and writing.  It is also impossible to have the device open more than
-once at a time.
-
-Even opening the device has side effects. Data structures are
-allocated, and PM_HIBERNATION_PREPARE / PM_RESTORE_PREPARE chains are
-called.
-
-The ioctl() commands recognized by the device are:
-
-SNAPSHOT_FREEZE - freeze user space processes (the current process is
-	not frozen); this is required for SNAPSHOT_CREATE_IMAGE
-	and SNAPSHOT_ATOMIC_RESTORE to succeed
-
-SNAPSHOT_UNFREEZE - thaw user space processes frozen by SNAPSHOT_FREEZE
-
-SNAPSHOT_CREATE_IMAGE - create a snapshot of the system memory; the
-	last argument of ioctl() should be a pointer to an int variable,
-	the value of which will indicate whether the call returned after
-	creating the snapshot (1) or after restoring the system memory state
-	from it (0) (after resume the system finds itself finishing the
-	SNAPSHOT_CREATE_IMAGE ioctl() again); after the snapshot
-	has been created the read() operation can be used to transfer
-	it out of the kernel
-
-SNAPSHOT_ATOMIC_RESTORE - restore the system memory state from the
-	uploaded snapshot image; before calling it you should transfer
-	the system memory snapshot back to the kernel using the write()
-	operation; this call will not succeed if the snapshot
-	image is not available to the kernel
-
-SNAPSHOT_FREE - free memory allocated for the snapshot image
-
-SNAPSHOT_PREF_IMAGE_SIZE - set the preferred maximum size of the image
-	(the kernel will do its best to ensure the image size will not exceed
-	this number, but if it turns out to be impossible, the kernel will
-	create the smallest image possible)
-
-SNAPSHOT_GET_IMAGE_SIZE - return the actual size of the hibernation image
-
-SNAPSHOT_AVAIL_SWAP_SIZE - return the amount of available swap in bytes (the
-	last argument should be a pointer to an unsigned int variable that will
-	contain the result if the call is successful).
-
-SNAPSHOT_ALLOC_SWAP_PAGE - allocate a swap page from the resume partition
-	(the last argument should be a pointer to a loff_t variable that
-	will contain the swap page offset if the call is successful)
-
-SNAPSHOT_FREE_SWAP_PAGES - free all swap pages allocated by
-	SNAPSHOT_ALLOC_SWAP_PAGE
-
-SNAPSHOT_SET_SWAP_AREA - set the resume partition and the offset (in <PAGE_SIZE>
-	units) from the beginning of the partition at which the swap header is
-	located (the last ioctl() argument should point to a struct
-	resume_swap_area, as defined in kernel/power/suspend_ioctls.h,
-	containing the resume device specification and the offset); for swap
-	partitions the offset is always 0, but it is different from zero for
-	swap files (see Documentation/power/swsusp-and-swap-files.txt for
-	details).
-
-SNAPSHOT_PLATFORM_SUPPORT - enable/disable the hibernation platform support,
-	depending on the argument value (enable, if the argument is nonzero)
-
-SNAPSHOT_POWER_OFF - make the kernel transition the system to the hibernation
-	state (eg. ACPI S4) using the platform (eg. ACPI) driver
-
-SNAPSHOT_S2RAM - suspend to RAM; using this call causes the kernel to
-	immediately enter the suspend-to-RAM state, so this call must always
-	be preceded by the SNAPSHOT_FREEZE call and it is also necessary
-	to use the SNAPSHOT_UNFREEZE call after the system wakes up.  This call
-	is needed to implement the suspend-to-both mechanism in which the
-	suspend image is first created, as though the system had been suspended
-	to disk, and then the system is suspended to RAM (this makes it possible
-	to resume the system from RAM if there's enough battery power or restore
-	its state on the basis of the saved suspend image otherwise)
-
-The device's read() operation can be used to transfer the snapshot image from
-the kernel.  It has the following limitations:
-- you cannot read() more than one virtual memory page at a time
-- read()s across page boundaries are impossible (ie. if you read() 1/2 of
-	a page in the previous call, you will only be able to read()
-	_at_ _most_ 1/2 of the page in the next call)
-
-The device's write() operation is used for uploading the system memory snapshot
-into the kernel.  It has the same limitations as the read() operation.
-
-The release() operation frees all memory allocated for the snapshot image
-and all swap pages allocated with SNAPSHOT_ALLOC_SWAP_PAGE (if any).
-Thus it is not necessary to use either SNAPSHOT_FREE or
-SNAPSHOT_FREE_SWAP_PAGES before closing the device (in fact it will also
-unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are
-still frozen when the device is being closed).
-
-Currently it is assumed that the userland utilities reading/writing the
-snapshot image from/to the kernel will use a swap partition, called the resume
-partition, or a swap file as storage space (if a swap file is used, the resume
-partition is the partition that holds this file).  However, this is not really
-required, as they can use, for example, a special (blank) suspend partition or
-a file on a partition that is unmounted before SNAPSHOT_CREATE_IMAGE and
-mounted afterwards.
-
-These utilities MUST NOT make any assumptions regarding the ordering of
-data within the snapshot image.  The contents of the image are entirely owned
-by the kernel and its structure may be changed in future kernel releases.
-
-The snapshot image MUST be written to the kernel unaltered (ie. all of the image
-data, metadata and header MUST be written in _exactly_ the same amount, form
-and order in which they have been read).  Otherwise, the behavior of the
-resumed system may be totally unpredictable.
-
-While executing SNAPSHOT_ATOMIC_RESTORE the kernel checks if the
-structure of the snapshot image is consistent with the information stored
-in the image header.  If any inconsistencies are detected,
-SNAPSHOT_ATOMIC_RESTORE will not succeed.  Still, this is not a fool-proof
-mechanism and the userland utilities using the interface SHOULD use additional
-means, such as checksums, to ensure the integrity of the snapshot image.
-
-The suspending and resuming utilities MUST lock themselves in memory,
-preferably using mlockall(), before calling SNAPSHOT_FREEZE.
-
-The suspending utility MUST check the value stored by SNAPSHOT_CREATE_IMAGE
-in the memory location pointed to by the last argument of ioctl() and proceed
-in accordance with it:
-1. 	If the value is 1 (ie. the system memory snapshot has just been
-	created and the system is ready for saving it):
-	(a)	The suspending utility MUST NOT close the snapshot device
-		_unless_ the whole suspend procedure is to be cancelled, in
-		which case, if the snapshot image has already been saved, the
-		suspending utility SHOULD destroy it, preferably by zapping
-		its header.  If the suspend is not to be cancelled, the
-		system MUST be powered off or rebooted after the snapshot
-		image has been saved.
-	(b)	The suspending utility SHOULD NOT attempt to perform any
-		file system operations (including reads) on the file systems
-		that were mounted before SNAPSHOT_CREATE_IMAGE has been
-		called.  However, it MAY mount a file system that was not
-		mounted at that time and perform some operations on it (eg.
-		use it for saving the image).
-2.	If the value is 0 (ie. the system state has just been restored from
-	the snapshot image), the suspending utility MUST close the snapshot
-	device.  Afterwards it will be treated as a regular userland process,
-	so it need not exit.
-
-The resuming utility SHOULD NOT attempt to mount any file systems that could
-be mounted before suspend and SHOULD NOT attempt to perform any operations
-involving such file systems.
-
-For details, please refer to the source code.
diff --git a/Documentation/power/video.rst b/Documentation/power/video.rst
new file mode 100644
index 000000000000..337a2ba9f32f
--- /dev/null
+++ b/Documentation/power/video.rst
@@ -0,0 +1,213 @@
+===========================
+Video issues with S3 resume
+===========================
+
+2003-2006, Pavel Machek
+
+During S3 resume, hardware needs to be reinitialized. For most
+devices, this is easy, and kernel driver knows how to do
+it. Unfortunately there's one exception: video card. Those are usually
+initialized by BIOS, and kernel does not have enough information to
+boot video card. (Kernel usually does not even contain video card
+driver -- vesafb and vgacon are widely used).
+
+This is not problem for swsusp, because during swsusp resume, BIOS is
+run normally so video card is normally initialized. It should not be
+problem for S1 standby, because hardware should retain its state over
+that.
+
+We either have to run video BIOS during early resume, or interpret it
+using vbetool later, or maybe nothing is necessary on particular
+system because video state is preserved. Unfortunately different
+methods work on different systems, and no known method suits all of
+them.
+
+Userland application called s2ram has been developed; it contains long
+whitelist of systems, and automatically selects working method for a
+given system. It can be downloaded from CVS at
+www.sf.net/projects/suspend . If you get a system that is not in the
+whitelist, please try to find a working solution, and submit whitelist
+entry so that work does not need to be repeated.
+
+Currently, VBE_SAVE method (6 below) works on most
+systems. Unfortunately, vbetool only runs after userland is resumed,
+so it makes debugging of early resume problems
+hard/impossible. Methods that do not rely on userland are preferable.
+
+Details
+~~~~~~~
+
+There are a few types of systems where video works after S3 resume:
+
+(1) systems where video state is preserved over S3.
+
+(2) systems where it is possible to call the video BIOS during S3
+    resume. Unfortunately, it is not correct to call the video BIOS at
+    that point, but it happens to work on some machines. Use
+    acpi_sleep=s3_bios.
+
+(3) systems that initialize video card into vga text mode and where
+    the BIOS works well enough to be able to set video mode. Use
+    acpi_sleep=s3_mode on these.
+
+(4) on some systems s3_bios kicks video into text mode, and
+    acpi_sleep=s3_bios,s3_mode is needed.
+
+(5) radeon systems, where X can soft-boot your video card. You'll need
+    a new enough X, and a plain text console (no vesafb or radeonfb). See
+    http://www.doesi.gmxhome.de/linux/tm800s3/s3.html for more information.
+    Alternatively, you should use vbetool (6) instead.
+
+(6) other radeon systems, where vbetool is enough to bring system back
+    to life. It needs text console to be working. Do vbetool vbestate
+    save > /tmp/delme; echo 3 > /proc/acpi/sleep; vbetool post; vbetool
+    vbestate restore < /tmp/delme; setfont <whatever>, and your video
+    should work.
+
+(7) on some systems, it is possible to boot most of kernel, and then
+    POSTing bios works. Ole Rohne has patch to do just that at
+    http://dev.gentoo.org/~marineam/patch-radeonfb-2.6.11-rc2-mm2.
+
+(8) on some systems, you can use the video_post utility and or
+    do echo 3 > /sys/power/state  && /usr/sbin/video_post - which will
+    initialize the display in console mode. If you are in X, you can switch
+    to a virtual terminal and back to X using  CTRL+ALT+F1 - CTRL+ALT+F7 to get
+    the display working in graphical mode again.
+
+Now, if you pass acpi_sleep=something, and it does not work with your
+bios, you'll get a hard crash during resume. Be careful. Also it is
+safest to do your experiments with plain old VGA console. The vesafb
+and radeonfb (etc) drivers have a tendency to crash the machine during
+resume.
+
+You may have a system where none of above works. At that point you
+either invent another ugly hack that works, or write proper driver for
+your video card (good luck getting docs :-(). Maybe suspending from X
+(proper X, knowing your hardware, not XF68_FBcon) might have better
+chance of working.
+
+Table of known working notebooks:
+
+
+=============================== ===============================================
+Model                           hack (or "how to do it")
+=============================== ===============================================
+Acer Aspire 1406LC		ole's late BIOS init (7), turn off DRI
+Acer TM 230			s3_bios (2)
+Acer TM 242FX			vbetool (6)
+Acer TM C110			video_post (8)
+Acer TM C300                    vga=normal (only suspend on console, not in X),
+				vbetool (6) or video_post (8)
+Acer TM 4052LCi		        s3_bios (2)
+Acer TM 636Lci			s3_bios,s3_mode (4)
+Acer TM 650 (Radeon M7)		vga=normal plus boot-radeon (5) gets text
+				console back
+Acer TM 660			??? [#f1]_
+Acer TM 800			vga=normal, X patches, see webpage (5)
+				or vbetool (6)
+Acer TM 803			vga=normal, X patches, see webpage (5)
+				or vbetool (6)
+Acer TM 803LCi			vga=normal, vbetool (6)
+Arima W730a			vbetool needed (6)
+Asus L2400D                     s3_mode (3) [#f2]_ (S1 also works OK)
+Asus L3350M (SiS 740)           (6)
+Asus L3800C (Radeon M7)		s3_bios (2) (S1 also works OK)
+Asus M6887Ne			vga=normal, s3_bios (2), use radeon driver
+				instead of fglrx in x.org
+Athlon64 desktop prototype	s3_bios (2)
+Compal CL-50			??? [#f1]_
+Compaq Armada E500 - P3-700     none (1) (S1 also works OK)
+Compaq Evo N620c		vga=normal, s3_bios (2)
+Dell 600m, ATI R250 Lf		none (1), but needs xorg-x11-6.8.1.902-1
+Dell D600, ATI RV250            vga=normal and X, or try vbestate (6)
+Dell D610			vga=normal and X (possibly vbestate (6) too,
+				but not tested)
+Dell Inspiron 4000		??? [#f1]_
+Dell Inspiron 500m		??? [#f1]_
+Dell Inspiron 510m		???
+Dell Inspiron 5150		vbetool needed (6)
+Dell Inspiron 600m		??? [#f1]_
+Dell Inspiron 8200		??? [#f1]_
+Dell Inspiron 8500		??? [#f1]_
+Dell Inspiron 8600		??? [#f1]_
+eMachines athlon64 machines	vbetool needed (6) (someone please get
+				me model #s)
+HP NC6000			s3_bios, may not use radeonfb (2);
+				or vbetool (6)
+HP NX7000			??? [#f1]_
+HP Pavilion ZD7000		vbetool post needed, need open-source nv
+				driver for X
+HP Omnibook XE3	athlon version	none (1)
+HP Omnibook XE3GC		none (1), video is S3 Savage/IX-MV
+HP Omnibook XE3L-GF		vbetool (6)
+HP Omnibook 5150		none (1), (S1 also works OK)
+IBM TP T20, model 2647-44G	none (1), video is S3 Inc. 86C270-294
+				Savage/IX-MV, vesafb gets "interesting"
+				but X work.
+IBM TP A31 / Type 2652-M5G      s3_mode (3) [works ok with
+				BIOS 1.04 2002-08-23, but not at all with
+				BIOS 1.11 2004-11-05 :-(]
+IBM TP R32 / Type 2658-MMG      none (1)
+IBM TP R40 2722B3G		??? [#f1]_
+IBM TP R50p / Type 1832-22U     s3_bios (2)
+IBM TP R51			none (1)
+IBM TP T30	236681A		??? [#f1]_
+IBM TP T40 / Type 2373-MU4      none (1)
+IBM TP T40p			none (1)
+IBM TP R40p			s3_bios (2)
+IBM TP T41p			s3_bios (2), switch to X after resume
+IBM TP T42			s3_bios (2)
+IBM ThinkPad T42p (2373-GTG)	s3_bios (2)
+IBM TP X20			??? [#f1]_
+IBM TP X30			s3_bios, s3_mode (4)
+IBM TP X31 / Type 2672-XXH      none (1), use radeontool
+				(http://fdd.com/software/radeon/) to
+				turn off backlight.
+IBM TP X32			none (1), but backlight is on and video is
+				trashed after long suspend. s3_bios,
+				s3_mode (4) works too. Perhaps that gets
+				better results?
+IBM Thinkpad X40 Type 2371-7JG  s3_bios,s3_mode (4)
+IBM TP 600e			none(1), but a switch to console and
+				back to X is needed
+Medion MD4220			??? [#f1]_
+Samsung P35			vbetool needed (6)
+Sharp PC-AR10 (ATI rage)	none (1), backlight does not switch off
+Sony Vaio PCG-C1VRX/K		s3_bios (2)
+Sony Vaio PCG-F403		??? [#f1]_
+Sony Vaio PCG-GRT995MP		none (1), works with 'nv' X driver
+Sony Vaio PCG-GR7/K		none (1), but needs radeonfb, use
+				radeontool (http://fdd.com/software/radeon/)
+				to turn off backlight.
+Sony Vaio PCG-N505SN		??? [#f1]_
+Sony Vaio vgn-s260		X or boot-radeon can init it (5)
+Sony Vaio vgn-S580BH		vga=normal, but suspend from X. Console will
+				be blank unless you return to X.
+Sony Vaio vgn-FS115B		s3_bios (2),s3_mode (4)
+Toshiba Libretto L5		none (1)
+Toshiba Libretto 100CT/110CT    vbetool (6)
+Toshiba Portege 3020CT		s3_mode (3)
+Toshiba Satellite 4030CDT	s3_mode (3) (S1 also works OK)
+Toshiba Satellite 4080XCDT      s3_mode (3) (S1 also works OK)
+Toshiba Satellite 4090XCDT      ??? [#f1]_
+Toshiba Satellite P10-554       s3_bios,s3_mode (4)[#f3]_
+Toshiba M30                     (2) xor X with nvidia driver using internal AGP
+Uniwill 244IIO			??? [#f1]_
+=============================== ===============================================
+
+Known working desktop systems
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+=================== ============================= ========================
+Mainboard	    Graphics card                 hack (or "how to do it")
+=================== ============================= ========================
+Asus A7V8X	    nVidia RIVA TNT2 model 64	  s3_bios,s3_mode (4)
+=================== ============================= ========================
+
+
+.. [#f1] from https://wiki.ubuntu.com/HoaryPMResults, not sure
+         which options to use. If you know, please tell me.
+
+.. [#f2] To be tested with a newer kernel.
+
+.. [#f3] Not with SMP kernel, UP only.
diff --git a/Documentation/power/video.txt b/Documentation/power/video.txt
deleted file mode 100644
index 3e6272bc4472..000000000000
--- a/Documentation/power/video.txt
+++ /dev/null
@@ -1,185 +0,0 @@
-
-		Video issues with S3 resume
-		~~~~~~~~~~~~~~~~~~~~~~~~~~~
-		  2003-2006, Pavel Machek
-
-During S3 resume, hardware needs to be reinitialized. For most
-devices, this is easy, and kernel driver knows how to do
-it. Unfortunately there's one exception: video card. Those are usually
-initialized by BIOS, and kernel does not have enough information to
-boot video card. (Kernel usually does not even contain video card
-driver -- vesafb and vgacon are widely used).
-
-This is not problem for swsusp, because during swsusp resume, BIOS is
-run normally so video card is normally initialized. It should not be
-problem for S1 standby, because hardware should retain its state over
-that.
-
-We either have to run video BIOS during early resume, or interpret it
-using vbetool later, or maybe nothing is necessary on particular
-system because video state is preserved. Unfortunately different
-methods work on different systems, and no known method suits all of
-them.
-
-Userland application called s2ram has been developed; it contains long
-whitelist of systems, and automatically selects working method for a
-given system. It can be downloaded from CVS at
-www.sf.net/projects/suspend . If you get a system that is not in the
-whitelist, please try to find a working solution, and submit whitelist
-entry so that work does not need to be repeated.
-
-Currently, VBE_SAVE method (6 below) works on most
-systems. Unfortunately, vbetool only runs after userland is resumed,
-so it makes debugging of early resume problems
-hard/impossible. Methods that do not rely on userland are preferable.
-
-Details
-~~~~~~~
-
-There are a few types of systems where video works after S3 resume:
-
-(1) systems where video state is preserved over S3.
-
-(2) systems where it is possible to call the video BIOS during S3
-  resume. Unfortunately, it is not correct to call the video BIOS at
-  that point, but it happens to work on some machines. Use
-  acpi_sleep=s3_bios.
-
-(3) systems that initialize video card into vga text mode and where
-  the BIOS works well enough to be able to set video mode. Use
-  acpi_sleep=s3_mode on these.
-
-(4) on some systems s3_bios kicks video into text mode, and
-  acpi_sleep=s3_bios,s3_mode is needed.
-
-(5) radeon systems, where X can soft-boot your video card. You'll need
-  a new enough X, and a plain text console (no vesafb or radeonfb). See
-  http://www.doesi.gmxhome.de/linux/tm800s3/s3.html for more information.
-  Alternatively, you should use vbetool (6) instead.
-
-(6) other radeon systems, where vbetool is enough to bring system back
-  to life. It needs text console to be working. Do vbetool vbestate
-  save > /tmp/delme; echo 3 > /proc/acpi/sleep; vbetool post; vbetool
-  vbestate restore < /tmp/delme; setfont <whatever>, and your video
-  should work.
-
-(7) on some systems, it is possible to boot most of kernel, and then
-  POSTing bios works. Ole Rohne has patch to do just that at
-  http://dev.gentoo.org/~marineam/patch-radeonfb-2.6.11-rc2-mm2.
-
-(8) on some systems, you can use the video_post utility and or 
-  do echo 3 > /sys/power/state  && /usr/sbin/video_post - which will 
-  initialize the display in console mode. If you are in X, you can switch
-  to a virtual terminal and back to X using  CTRL+ALT+F1 - CTRL+ALT+F7 to get
-  the display working in graphical mode again.
-
-Now, if you pass acpi_sleep=something, and it does not work with your
-bios, you'll get a hard crash during resume. Be careful. Also it is
-safest to do your experiments with plain old VGA console. The vesafb
-and radeonfb (etc) drivers have a tendency to crash the machine during
-resume.
-
-You may have a system where none of above works. At that point you
-either invent another ugly hack that works, or write proper driver for
-your video card (good luck getting docs :-(). Maybe suspending from X
-(proper X, knowing your hardware, not XF68_FBcon) might have better
-chance of working.
-
-Table of known working notebooks:
-
-Model                           hack (or "how to do it")
-------------------------------------------------------------------------------
-Acer Aspire 1406LC		ole's late BIOS init (7), turn off DRI
-Acer TM 230			s3_bios (2)
-Acer TM 242FX			vbetool (6)
-Acer TM C110			video_post (8)
-Acer TM C300                    vga=normal (only suspend on console, not in X), vbetool (6) or video_post (8)
-Acer TM 4052LCi		        s3_bios (2)
-Acer TM 636Lci			s3_bios,s3_mode (4)
-Acer TM 650 (Radeon M7)		vga=normal plus boot-radeon (5) gets text console back
-Acer TM 660			??? (*)
-Acer TM 800			vga=normal, X patches, see webpage (5) or vbetool (6)
-Acer TM 803			vga=normal, X patches, see webpage (5) or vbetool (6)
-Acer TM 803LCi			vga=normal, vbetool (6)
-Arima W730a			vbetool needed (6)
-Asus L2400D                     s3_mode (3)(***) (S1 also works OK)
-Asus L3350M (SiS 740)           (6)
-Asus L3800C (Radeon M7)		s3_bios (2) (S1 also works OK)
-Asus M6887Ne			vga=normal, s3_bios (2), use radeon driver instead of fglrx in x.org
-Athlon64 desktop prototype	s3_bios (2)
-Compal CL-50			??? (*)
-Compaq Armada E500 - P3-700     none (1) (S1 also works OK)
-Compaq Evo N620c		vga=normal, s3_bios (2)
-Dell 600m, ATI R250 Lf		none (1), but needs xorg-x11-6.8.1.902-1
-Dell D600, ATI RV250            vga=normal and X, or try vbestate (6)
-Dell D610			vga=normal and X (possibly vbestate (6) too, but not tested)
-Dell Inspiron 4000		??? (*)
-Dell Inspiron 500m		??? (*)
-Dell Inspiron 510m		???
-Dell Inspiron 5150		vbetool needed (6)
-Dell Inspiron 600m		??? (*)
-Dell Inspiron 8200		??? (*)
-Dell Inspiron 8500		??? (*)
-Dell Inspiron 8600		??? (*)
-eMachines athlon64 machines	vbetool needed (6) (someone please get me model #s)
-HP NC6000			s3_bios, may not use radeonfb (2); or vbetool (6)
-HP NX7000			??? (*)
-HP Pavilion ZD7000		vbetool post needed, need open-source nv driver for X
-HP Omnibook XE3	athlon version	none (1)
-HP Omnibook XE3GC		none (1), video is S3 Savage/IX-MV
-HP Omnibook XE3L-GF		vbetool (6)
-HP Omnibook 5150		none (1), (S1 also works OK)
-IBM TP T20, model 2647-44G	none (1), video is S3 Inc. 86C270-294 Savage/IX-MV, vesafb gets "interesting" but X work.
-IBM TP A31 / Type 2652-M5G      s3_mode (3) [works ok with BIOS 1.04 2002-08-23, but not at all with BIOS 1.11 2004-11-05 :-(]
-IBM TP R32 / Type 2658-MMG      none (1)
-IBM TP R40 2722B3G		??? (*)
-IBM TP R50p / Type 1832-22U     s3_bios (2)
-IBM TP R51			none (1)
-IBM TP T30	236681A		??? (*)
-IBM TP T40 / Type 2373-MU4      none (1)
-IBM TP T40p			none (1)
-IBM TP R40p			s3_bios (2)
-IBM TP T41p			s3_bios (2), switch to X after resume
-IBM TP T42			s3_bios (2)
-IBM ThinkPad T42p (2373-GTG)	s3_bios (2)
-IBM TP X20			??? (*)
-IBM TP X30			s3_bios, s3_mode (4)
-IBM TP X31 / Type 2672-XXH      none (1), use radeontool (http://fdd.com/software/radeon/) to turn off backlight.
-IBM TP X32			none (1), but backlight is on and video is trashed after long suspend. s3_bios,s3_mode (4) works too. Perhaps that gets better results?
-IBM Thinkpad X40 Type 2371-7JG  s3_bios,s3_mode (4)
-IBM TP 600e			none(1), but a switch to console and back to X is needed
-Medion MD4220			??? (*)
-Samsung P35			vbetool needed (6)
-Sharp PC-AR10 (ATI rage)	none (1), backlight does not switch off
-Sony Vaio PCG-C1VRX/K		s3_bios (2)
-Sony Vaio PCG-F403		??? (*)
-Sony Vaio PCG-GRT995MP		none (1), works with 'nv' X driver
-Sony Vaio PCG-GR7/K		none (1), but needs radeonfb, use radeontool (http://fdd.com/software/radeon/) to turn off backlight.
-Sony Vaio PCG-N505SN		??? (*)
-Sony Vaio vgn-s260		X or boot-radeon can init it (5)
-Sony Vaio vgn-S580BH		vga=normal, but suspend from X. Console will be blank unless you return to X.
-Sony Vaio vgn-FS115B		s3_bios (2),s3_mode (4)
-Toshiba Libretto L5		none (1)
-Toshiba Libretto 100CT/110CT    vbetool (6)
-Toshiba Portege 3020CT		s3_mode (3)
-Toshiba Satellite 4030CDT	s3_mode (3) (S1 also works OK)
-Toshiba Satellite 4080XCDT      s3_mode (3) (S1 also works OK)
-Toshiba Satellite 4090XCDT      ??? (*)
-Toshiba Satellite P10-554       s3_bios,s3_mode (4)(****)
-Toshiba M30                     (2) xor X with nvidia driver using internal AGP
-Uniwill 244IIO			??? (*)
-
-Known working desktop systems
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Mainboard	    Graphics card                 hack (or "how to do it")
-------------------------------------------------------------------------------
-Asus A7V8X	    nVidia RIVA TNT2 model 64	  s3_bios,s3_mode (4)
-
-
-(*) from https://wiki.ubuntu.com/HoaryPMResults, not sure
-    which options to use. If you know, please tell me.
-
-(***) To be tested with a newer kernel.
-
-(****) Not with SMP kernel, UP only.
diff --git a/Documentation/process/submitting-drivers.rst b/Documentation/process/submitting-drivers.rst
index 58bc047e7b95..1acaa14903d6 100644
--- a/Documentation/process/submitting-drivers.rst
+++ b/Documentation/process/submitting-drivers.rst
@@ -117,7 +117,7 @@ PM support:
 		implemented") error.  You should also try to make sure that your
 		driver uses as little power as possible when it's not doing
 		anything.  For the driver testing instructions see
-		Documentation/power/drivers-testing.txt and for a relatively
+		Documentation/power/drivers-testing.rst and for a relatively
 		complete overview of the power management issues related to
 		drivers see :ref:`Documentation/driver-api/pm/devices.rst <driverapi_pm_devices>`.
 
diff --git a/Documentation/scheduler/sched-energy.txt b/Documentation/scheduler/sched-energy.txt
index 197d81f4b836..d97207b9accb 100644
--- a/Documentation/scheduler/sched-energy.txt
+++ b/Documentation/scheduler/sched-energy.txt
@@ -22,7 +22,7 @@ the highest.
 
 The actual EM used by EAS is _not_ maintained by the scheduler, but by a
 dedicated framework. For details about this framework and what it provides,
-please refer to its documentation (see Documentation/power/energy-model.txt).
+please refer to its documentation (see Documentation/power/energy-model.rst).
 
 
 2. Background and Terminology
@@ -81,7 +81,7 @@ through the arch_scale_cpu_capacity() callback.
 
 The rest of platform knowledge used by EAS is directly read from the Energy
 Model (EM) framework. The EM of a platform is composed of a power cost table
-per 'performance domain' in the system (see Documentation/power/energy-model.txt
+per 'performance domain' in the system (see Documentation/power/energy-model.rst
 for futher details about performance domains).
 
 The scheduler manages references to the EM objects in the topology code when the
@@ -352,7 +352,7 @@ could be amended in the future if proven otherwise.
 EAS uses the EM of a platform to estimate the impact of scheduling decisions on
 energy. So, your platform must provide power cost tables to the EM framework in
 order to make EAS start. To do so, please refer to documentation of the
-independent EM framework in Documentation/power/energy-model.txt.
+independent EM framework in Documentation/power/energy-model.rst.
 
 Please also note that the scheduling domains need to be re-built after the
 EM has been registered in order to start EAS.
diff --git a/Documentation/trace/coresight-cpu-debug.txt b/Documentation/trace/coresight-cpu-debug.txt
index f07e38094b40..1a660a39e3c0 100644
--- a/Documentation/trace/coresight-cpu-debug.txt
+++ b/Documentation/trace/coresight-cpu-debug.txt
@@ -151,7 +151,7 @@ At the runtime you can disable idle states with below methods:
 
 It is possible to disable CPU idle states by way of the PM QoS
 subsystem, more specifically by using the "/dev/cpu_dma_latency"
-interface (see Documentation/power/pm_qos_interface.txt for more
+interface (see Documentation/power/pm_qos_interface.rst for more
 details).  As specified in the PM QoS documentation the requested
 parameter will stay in effect until the file descriptor is released.
 For example:
diff --git a/Documentation/translations/zh_CN/process/submitting-drivers.rst b/Documentation/translations/zh_CN/process/submitting-drivers.rst
index 72c6cd935821..f1c3906c69a8 100644
--- a/Documentation/translations/zh_CN/process/submitting-drivers.rst
+++ b/Documentation/translations/zh_CN/process/submitting-drivers.rst
@@ -97,7 +97,7 @@ Linux 2.6:
 		函数定义成返回 -ENOSYS（功能未实现）错误。你还应该尝试确
 		保你的驱动在什么都不干的情况下将耗电降到最低。要获得驱动
 		程序测试的指导，请参阅
-		Documentation/power/drivers-testing.txt。有关驱动程序电
+		Documentation/power/drivers-testing.rst。有关驱动程序电
 		源管理问题相对全面的概述，请参阅
 		Documentation/driver-api/pm/devices.rst。
 
-- 
cgit 


From 7ea651ec392ecbd52227e009a482b571ce7b3bce Mon Sep 17 00:00:00 2001
From: Sagar Shrikant Kadam <sagar.kadam@sifive.com>
Date: Sat, 1 Jun 2019 11:41:13 +0530
Subject: dt-bindings: i2c: extend existing opencore bindings

Reformatted compatibility strings to one valid combination on
each line.
Add FU540-C000 specific device tree bindings to already available
i2-ocores file. This device is available on
HiFive Unleashed Rev A00 board. Move interrupt under optional
property list as this can be optional.

The FU540-C000 SoC from sifive, has an Opencore's I2C block
reimplementation.

The DT compatibility string for this IP is present in HDL and available at.
https://github.com/sifive/sifive-blocks/blob/master/src/main/scala/devices/i2c/I2C.scala#L73

Signed-off-by: Sagar Shrikant Kadam <sagar.kadam@sifive.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/devicetree/bindings/i2c/i2c-ocores.txt | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/i2c/i2c-ocores.txt b/Documentation/devicetree/bindings/i2c/i2c-ocores.txt
index 17bef9a34e50..6b25a80ae8d3 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-ocores.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-ocores.txt
@@ -1,9 +1,13 @@
 Device tree configuration for i2c-ocores
 
 Required properties:
-- compatible      : "opencores,i2c-ocores" or "aeroflexgaisler,i2cmst"
+- compatible      : "opencores,i2c-ocores"
+                    "aeroflexgaisler,i2cmst"
+                    "sifive,fu540-c000-i2c", "sifive,i2c0"
+                    For Opencore based I2C IP block reimplemented in
+                    FU540-C000 SoC. Please refer to sifive-blocks-ip-versioning.txt
+                    for additional details.
 - reg             : bus address start and address range size of device
-- interrupts      : interrupt number
 - clocks          : handle to the controller clock; see the note below.
                     Mutually exclusive with opencores,ip-clock-frequency
 - opencores,ip-clock-frequency: frequency of the controller clock in Hz;
@@ -12,6 +16,7 @@ Required properties:
 - #size-cells     : should be <0>
 
 Optional properties:
+- interrupts      : interrupt number.
 - clock-frequency : frequency of bus clock in Hz; see the note below.
                     Defaults to 100 KHz when the property is not specified
 - reg-shift       : device register offsets are shifted by this value
-- 
cgit 


From 36815b416fa48766ac5a98e4b2dc3ebc5887222e Mon Sep 17 00:00:00 2001
From: Reinhard Speyerer <rspmn@arcor.de>
Date: Wed, 12 Jun 2019 19:03:50 +0200
Subject: qmi_wwan: extend permitted QMAP mux_id value range

Permit mux_id values up to 254 to be used in qmimux_register_device()
for compatibility with ip(8) and the rmnet driver.

Fixes: c6adf77953bc ("net: usb: qmi_wwan: add qmap mux protocol support")
Cc: Daniele Palmas <dnlplm@gmail.com>
Signed-off-by: Reinhard Speyerer <rspmn@arcor.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-class-net-qmi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-class-net-qmi b/Documentation/ABI/testing/sysfs-class-net-qmi
index 7122d6264c49..c310db4ccbc2 100644
--- a/Documentation/ABI/testing/sysfs-class-net-qmi
+++ b/Documentation/ABI/testing/sysfs-class-net-qmi
@@ -29,7 +29,7 @@ Contact:	Bjørn Mork <bjorn@mork.no>
 Description:
 		Unsigned integer.
 
-		Write a number ranging from 1 to 127 to add a qmap mux
+		Write a number ranging from 1 to 254 to add a qmap mux
 		based network device, supported by recent Qualcomm based
 		modems.
 
@@ -46,5 +46,5 @@ Contact:	Bjørn Mork <bjorn@mork.no>
 Description:
 		Unsigned integer.
 
-		Write a number ranging from 1 to 127 to delete a previously
+		Write a number ranging from 1 to 254 to delete a previously
 		created qmap mux based network device.
-- 
cgit 


From 79c8bd15b61eca456c49d20d083f02a2614b4d12 Mon Sep 17 00:00:00 2001
From: Robert Hancock <hancock@sedsystems.ca>
Date: Wed, 12 Jun 2019 14:49:06 -0600
Subject: net: dsa: microchip: Support optional 125MHz SYNCLKO output

The KSZ9477 series chips have a SYNCLKO pin which by default outputs a
25MHz clock, but some board setups require a 125MHz clock instead. Added
a microchip,synclko-125 device tree property to allow indicating a
125MHz clock output is required.

Signed-off-by: Robert Hancock <hancock@sedsystems.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/ksz.txt | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/dsa/ksz.txt b/Documentation/devicetree/bindings/net/dsa/ksz.txt
index e7db7268fd0f..4ac21cef370e 100644
--- a/Documentation/devicetree/bindings/net/dsa/ksz.txt
+++ b/Documentation/devicetree/bindings/net/dsa/ksz.txt
@@ -16,6 +16,8 @@ Required properties:
 Optional properties:
 
 - reset-gpios		: Should be a gpio specifier for a reset line
+- microchip,synclko-125 : Set if the output SYNCLKO frequency should be set to
+			  125MHz instead of 25MHz.
 
 See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
 required and optional properties.
-- 
cgit 


From 363887a2cdfeb6af52a9b78d84697662adf6f8d5 Mon Sep 17 00:00:00 2001
From: Stephen Suryaputra <ssuryaextr@gmail.com>
Date: Thu, 13 Jun 2019 14:38:58 -0400
Subject: ipv4: Support multipath hashing on inner IP pkts for GRE tunnel

Multipath hash policy value of 0 isn't distributing since the outer IP
dest and src aren't varied eventhough the inner ones are. Since the flow
is on the inner ones in the case of tunneled traffic, hashing on them is
desired.

This is done mainly for IP over GRE, hence only tested for that. But
anything else supported by flow dissection should work.

v2: Use skb_flow_dissect_flow_keys() directly so that other tunneling
    can be supported through flow dissection (per Nikolay Aleksandrov).
v3: Remove accidental inclusion of ports in the hash keys and clarify
    the documentation (Nikolay Alexandrov).
Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index f4b1043e92ed..dc473354d90b 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -80,6 +80,7 @@ fib_multipath_hash_policy - INTEGER
 	Possible values:
 	0 - Layer 3
 	1 - Layer 4
+	2 - Layer 3 or inner Layer 3 if present
 
 fib_sync_mem - UNSIGNED INTEGER
 	Amount of dirty memory from fib entries that can be backlogged before
-- 
cgit 


From ede61ca474a0348b975d9824565b66c7595461de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 14 Jun 2019 16:22:19 -0700
Subject: tcp: add tcp_rx_skb_cache sysctl

Instead of relying on rps_needed, it is safer to use a separate
static key, since we do not want to enable TCP rx_skb_cache
by default. This feature can cause huge increase of memory
usage on hosts with millions of sockets.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 14fe93049d28..288aa264ac26 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -772,6 +772,14 @@ tcp_challenge_ack_limit - INTEGER
 	in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks)
 	Default: 100
 
+tcp_rx_skb_cache - BOOLEAN
+	Controls a per TCP socket cache of one skb, that might help
+	performance of some workloads. This might be dangerous
+	on systems with a lot of TCP sockets, since it increases
+	memory usage.
+
+	Default: 0 (disabled)
+
 UDP variables:
 
 udp_l3mdev_accept - BOOLEAN
-- 
cgit 


From abf313b5a8b72302062dd407ed7e470d67d389bb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 14 Jun 2019 14:52:15 -0300
Subject: ABI: sysfs-bus-pci-devices-aer_stats uses an invalid tag

According with Documentation/ABI/, the right tag to describe
an ABI symbol is "What:", and not "Where:".

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
index 4b0318c99507..ff229d71961c 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
@@ -9,7 +9,7 @@ errors may be "seen" / reported by the link partner and not the
 problematic endpoint itself (which may report all counters as 0 as it never
 saw any problems).
 
-Where:		/sys/bus/pci/devices/<dev>/aer_dev_correctable
+What:		/sys/bus/pci/devices/<dev>/aer_dev_correctable
 Date:		July 2018
 Kernel Version: 4.19.0
 Contact:	linux-pci@vger.kernel.org, rajatja@google.com
@@ -31,7 +31,7 @@ Header Log Overflow 0
 TOTAL_ERR_COR 2
 -------------------------------------------------------------------------
 
-Where:		/sys/bus/pci/devices/<dev>/aer_dev_fatal
+What:		/sys/bus/pci/devices/<dev>/aer_dev_fatal
 Date:		July 2018
 Kernel Version: 4.19.0
 Contact:	linux-pci@vger.kernel.org, rajatja@google.com
@@ -62,7 +62,7 @@ TLP Prefix Blocked Error 0
 TOTAL_ERR_FATAL 0
 -------------------------------------------------------------------------
 
-Where:		/sys/bus/pci/devices/<dev>/aer_dev_nonfatal
+What:		/sys/bus/pci/devices/<dev>/aer_dev_nonfatal
 Date:		July 2018
 Kernel Version: 4.19.0
 Contact:	linux-pci@vger.kernel.org, rajatja@google.com
@@ -103,19 +103,19 @@ collectors) that are AER capable. These indicate the number of error messages as
 device, so these counters include them and are thus cumulative of all the error
 messages on the PCI hierarchy originating at that root port.
 
-Where:		/sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_cor
+What:		/sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_cor
 Date:		July 2018
 Kernel Version: 4.19.0
 Contact:	linux-pci@vger.kernel.org, rajatja@google.com
 Description:	Total number of ERR_COR messages reported to rootport.
 
-Where:	    /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_fatal
+What:	    /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_fatal
 Date:		July 2018
 Kernel Version: 4.19.0
 Contact:	linux-pci@vger.kernel.org, rajatja@google.com
 Description:	Total number of ERR_FATAL messages reported to rootport.
 
-Where:	    /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_nonfatal
+What:	    /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_nonfatal
 Date:		July 2018
 Kernel Version: 4.19.0
 Contact:	linux-pci@vger.kernel.org, rajatja@google.com
-- 
cgit 


From 129fb4cb3b6a352a1ba3e9b4d2c41fbfa06f28a2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 14 Jun 2019 14:52:16 -0300
Subject: ABI: Fix KernelVersion tags

It is "KernelVersion:" and not "Kernel Version:".

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/pstore                   |  2 +-
 .../testing/sysfs-bus-event_source-devices-format  |  2 +-
 .../ABI/testing/sysfs-bus-i2c-devices-hm6352       |  6 +++---
 .../ABI/testing/sysfs-bus-pci-devices-aer_stats    | 12 ++++++------
 .../ABI/testing/sysfs-bus-pci-devices-cciss        | 22 +++++++++++-----------
 .../ABI/testing/sysfs-bus-usb-devices-usbsevseg    | 10 +++++-----
 Documentation/ABI/testing/sysfs-driver-altera-cvp  |  2 +-
 Documentation/ABI/testing/sysfs-driver-ppi         |  2 +-
 Documentation/ABI/testing/sysfs-driver-st          |  2 +-
 Documentation/ABI/testing/sysfs-driver-wacom       |  2 +-
 10 files changed, 31 insertions(+), 31 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/pstore b/Documentation/ABI/testing/pstore
index 8d6e48f4e8ef..d45209abdb1b 100644
--- a/Documentation/ABI/testing/pstore
+++ b/Documentation/ABI/testing/pstore
@@ -1,6 +1,6 @@
 What:		/sys/fs/pstore/... (or /dev/pstore/...)
 Date:		March 2011
-Kernel Version: 2.6.39
+KernelVersion: 2.6.39
 Contact:	tony.luck@intel.com
 Description:	Generic interface to platform dependent persistent storage.
 
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-format b/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
index b6f8748e0200..5bb793ec926c 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
@@ -1,6 +1,6 @@
 What:		/sys/bus/event_source/devices/<dev>/format
 Date:		January 2012
-Kernel Version: 3.3
+KernelVersion: 3.3
 Contact:	Jiri Olsa <jolsa@redhat.com>
 Description:
 		Attribute group to describe the magic bits that go into
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352 b/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352
index 29bd447e50a0..4a251b7f11e4 100644
--- a/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352
@@ -1,20 +1,20 @@
 What:		/sys/bus/i2c/devices/.../heading0_input
 Date:		April 2010
-Kernel Version: 2.6.36?
+KernelVersion: 2.6.36?
 Contact:	alan.cox@intel.com
 Description:	Reports the current heading from the compass as a floating
 		point value in degrees.
 
 What:		/sys/bus/i2c/devices/.../power_state
 Date:		April 2010
-Kernel Version: 2.6.36?
+KernelVersion: 2.6.36?
 Contact:	alan.cox@intel.com
 Description:	Sets the power state of the device. 0 sets the device into
 		sleep mode, 1 wakes it up.
 
 What:		/sys/bus/i2c/devices/.../calibration
 Date:		April 2010
-Kernel Version: 2.6.36?
+KernelVersion: 2.6.36?
 Contact:	alan.cox@intel.com
 Description:	Sets the calibration on or off (1 = on, 0 = off). See the
 		chip data sheet.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
index ff229d71961c..3c9a8c4a25eb 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
@@ -11,7 +11,7 @@ saw any problems).
 
 What:		/sys/bus/pci/devices/<dev>/aer_dev_correctable
 Date:		July 2018
-Kernel Version: 4.19.0
+KernelVersion: 4.19.0
 Contact:	linux-pci@vger.kernel.org, rajatja@google.com
 Description:	List of correctable errors seen and reported by this
 		PCI device using ERR_COR. Note that since multiple errors may
@@ -33,7 +33,7 @@ TOTAL_ERR_COR 2
 
 What:		/sys/bus/pci/devices/<dev>/aer_dev_fatal
 Date:		July 2018
-Kernel Version: 4.19.0
+KernelVersion: 4.19.0
 Contact:	linux-pci@vger.kernel.org, rajatja@google.com
 Description:	List of uncorrectable fatal errors seen and reported by this
 		PCI device using ERR_FATAL. Note that since multiple errors may
@@ -64,7 +64,7 @@ TOTAL_ERR_FATAL 0
 
 What:		/sys/bus/pci/devices/<dev>/aer_dev_nonfatal
 Date:		July 2018
-Kernel Version: 4.19.0
+KernelVersion: 4.19.0
 Contact:	linux-pci@vger.kernel.org, rajatja@google.com
 Description:	List of uncorrectable nonfatal errors seen and reported by this
 		PCI device using ERR_NONFATAL. Note that since multiple errors
@@ -105,18 +105,18 @@ messages on the PCI hierarchy originating at that root port.
 
 What:		/sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_cor
 Date:		July 2018
-Kernel Version: 4.19.0
+KernelVersion: 4.19.0
 Contact:	linux-pci@vger.kernel.org, rajatja@google.com
 Description:	Total number of ERR_COR messages reported to rootport.
 
 What:	    /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_fatal
 Date:		July 2018
-Kernel Version: 4.19.0
+KernelVersion: 4.19.0
 Contact:	linux-pci@vger.kernel.org, rajatja@google.com
 Description:	Total number of ERR_FATAL messages reported to rootport.
 
 What:	    /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_nonfatal
 Date:		July 2018
-Kernel Version: 4.19.0
+KernelVersion: 4.19.0
 Contact:	linux-pci@vger.kernel.org, rajatja@google.com
 Description:	Total number of ERR_NONFATAL messages reported to rootport.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
index eb449169c30b..92a94e1068c2 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
@@ -1,68 +1,68 @@
 What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/model
 Date:		March 2009
-Kernel Version: 2.6.30
+KernelVersion: 2.6.30
 Contact:	iss_storagedev@hp.com
 Description:	Displays the SCSI INQUIRY page 0 model for logical drive
 		Y of controller X.
 
 What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/rev
 Date:		March 2009
-Kernel Version: 2.6.30
+KernelVersion: 2.6.30
 Contact:	iss_storagedev@hp.com
 Description:	Displays the SCSI INQUIRY page 0 revision for logical
 		drive Y of controller X.
 
 What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/unique_id
 Date:		March 2009
-Kernel Version: 2.6.30
+KernelVersion: 2.6.30
 Contact:	iss_storagedev@hp.com
 Description:	Displays the SCSI INQUIRY page 83 serial number for logical
 		drive Y of controller X.
 
 What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/vendor
 Date:		March 2009
-Kernel Version: 2.6.30
+KernelVersion: 2.6.30
 Contact:	iss_storagedev@hp.com
 Description:	Displays the SCSI INQUIRY page 0 vendor for logical drive
 		Y of controller X.
 
 What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/block:cciss!cXdY
 Date:		March 2009
-Kernel Version: 2.6.30
+KernelVersion: 2.6.30
 Contact:	iss_storagedev@hp.com
 Description:	A symbolic link to /sys/block/cciss!cXdY
 
 What:		/sys/bus/pci/devices/<dev>/ccissX/rescan
 Date:		August 2009
-Kernel Version:	2.6.31
+KernelVersion:	2.6.31
 Contact:	iss_storagedev@hp.com
 Description:	Kicks of a rescan of the controller to discover logical
 		drive topology changes.
 
 What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/lunid
 Date:		August 2009
-Kernel Version: 2.6.31
+KernelVersion: 2.6.31
 Contact:	iss_storagedev@hp.com
 Description:	Displays the 8-byte LUN ID used to address logical
 		drive Y of controller X.
 
 What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/raid_level
 Date:		August 2009
-Kernel Version: 2.6.31
+KernelVersion: 2.6.31
 Contact:	iss_storagedev@hp.com
 Description:	Displays the RAID level of logical drive Y of
 		controller X.
 
 What:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/usage_count
 Date:		August 2009
-Kernel Version: 2.6.31
+KernelVersion: 2.6.31
 Contact:	iss_storagedev@hp.com
 Description:	Displays the usage count (number of opens) of logical drive Y
 		of controller X.
 
 What:		/sys/bus/pci/devices/<dev>/ccissX/resettable
 Date:		February 2011
-Kernel Version:	2.6.38
+KernelVersion:	2.6.38
 Contact:	iss_storagedev@hp.com
 Description:	Value of 1 indicates the controller can honor the reset_devices
 		kernel parameter.  Value of 0 indicates reset_devices cannot be
@@ -73,7 +73,7 @@ Description:	Value of 1 indicates the controller can honor the reset_devices
 
 What:		/sys/bus/pci/devices/<dev>/ccissX/transport_mode
 Date:		July 2011
-Kernel Version:	3.0
+KernelVersion:	3.0
 Contact:	iss_storagedev@hp.com
 Description:	Value of "simple" indicates that the controller has been placed
 		in "simple mode". Value of "performant" indicates that the
diff --git a/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg b/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg
index f6199b314196..9ade80f81f96 100644
--- a/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg
+++ b/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg
@@ -1,6 +1,6 @@
 What:		/sys/bus/usb/.../powered
 Date:		August 2008
-Kernel Version:	2.6.26
+KernelVersion:	2.6.26
 Contact:	Harrison Metzger <harrisonmetz@gmail.com>
 Description:	Controls whether the device's display will powered.
 		A value of 0 is off and a non-zero value is on.
@@ -8,7 +8,7 @@ Description:	Controls whether the device's display will powered.
 What:		/sys/bus/usb/.../mode_msb
 What:		/sys/bus/usb/.../mode_lsb
 Date:		August 2008
-Kernel Version:	2.6.26
+KernelVersion:	2.6.26
 Contact:	Harrison Metzger <harrisonmetz@gmail.com>
 Description:	Controls the devices display mode.
 		For a 6 character display the values are
@@ -18,7 +18,7 @@ Description:	Controls the devices display mode.
 
 What:		/sys/bus/usb/.../textmode
 Date:		August 2008
-Kernel Version:	2.6.26
+KernelVersion:	2.6.26
 Contact:	Harrison Metzger <harrisonmetz@gmail.com>
 Description:	Controls the way the device interprets its text buffer.
 		raw:	each character controls its segment manually
@@ -27,13 +27,13 @@ Description:	Controls the way the device interprets its text buffer.
 
 What:		/sys/bus/usb/.../text
 Date:		August 2008
-Kernel Version:	2.6.26
+KernelVersion:	2.6.26
 Contact:	Harrison Metzger <harrisonmetz@gmail.com>
 Description:	The text (or data) for the device to display
 
 What:		/sys/bus/usb/.../decimals
 Date:		August 2008
-Kernel Version:	2.6.26
+KernelVersion:	2.6.26
 Contact:	Harrison Metzger <harrisonmetz@gmail.com>
 Description:	Controls the decimal places on the device.
 		To set the nth decimal place, give this field
diff --git a/Documentation/ABI/testing/sysfs-driver-altera-cvp b/Documentation/ABI/testing/sysfs-driver-altera-cvp
index 8cde64a71edb..fbd8078fd7ad 100644
--- a/Documentation/ABI/testing/sysfs-driver-altera-cvp
+++ b/Documentation/ABI/testing/sysfs-driver-altera-cvp
@@ -1,6 +1,6 @@
 What:		/sys/bus/pci/drivers/altera-cvp/chkcfg
 Date:		May 2017
-Kernel Version:	4.13
+KernelVersion:	4.13
 Contact:	Anatolij Gustschin <agust@denx.de>
 Description:
 		Contains either 1 or 0 and controls if configuration
diff --git a/Documentation/ABI/testing/sysfs-driver-ppi b/Documentation/ABI/testing/sysfs-driver-ppi
index 9921ef285899..1a56fc507689 100644
--- a/Documentation/ABI/testing/sysfs-driver-ppi
+++ b/Documentation/ABI/testing/sysfs-driver-ppi
@@ -1,6 +1,6 @@
 What:		/sys/class/tpm/tpmX/ppi/
 Date:		August 2012
-Kernel Version:	3.6
+KernelVersion:	3.6
 Contact:	xiaoyan.zhang@intel.com
 Description:
 		This folder includes the attributes related with PPI (Physical
diff --git a/Documentation/ABI/testing/sysfs-driver-st b/Documentation/ABI/testing/sysfs-driver-st
index ba5d77008a85..88cab66fd77f 100644
--- a/Documentation/ABI/testing/sysfs-driver-st
+++ b/Documentation/ABI/testing/sysfs-driver-st
@@ -1,6 +1,6 @@
 What:		/sys/bus/scsi/drivers/st/debug_flag
 Date:		October 2015
-Kernel Version:	?.?
+KernelVersion:	?.?
 Contact:	shane.seymour@hpe.com
 Description:
 		This file allows you to turn debug output from the st driver
diff --git a/Documentation/ABI/testing/sysfs-driver-wacom b/Documentation/ABI/testing/sysfs-driver-wacom
index 2aa5503ee200..afc48fc163b5 100644
--- a/Documentation/ABI/testing/sysfs-driver-wacom
+++ b/Documentation/ABI/testing/sysfs-driver-wacom
@@ -1,6 +1,6 @@
 What:		/sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/speed
 Date:		April 2010
-Kernel Version:	2.6.35
+KernelVersion:	2.6.35
 Contact:	linux-bluetooth@vger.kernel.org
 Description:
 		The /sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/speed file
-- 
cgit 


From 70d22b78d3235303555c921246e3c1ec37b0a29c Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Thu, 23 May 2019 08:01:53 -0700
Subject: soc: qcom: apr: Don't use reg for domain id

The reg property represents the address and size on the bus that a
device lives, but for APR the parent is a rpmsg bus, which does not have
numerical addresses. Simply defining #address/#size-cells to 1 and 0,
respectively, to silence the compiler is not an appropriate solution.

Replace the use of "reg" with an APR specific property.

Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 Documentation/devicetree/bindings/soc/qcom/qcom,apr.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,apr.txt b/Documentation/devicetree/bindings/soc/qcom/qcom,apr.txt
index bcc612cc7423..db501269f47b 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,apr.txt
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,apr.txt
@@ -9,7 +9,7 @@ used for audio/voice services on the QDSP.
 	Value type: <stringlist>
 	Definition: must be "qcom,apr-v<VERSION-NUMBER>", example "qcom,apr-v2"
 
-- reg
+- qcom,apr-domain
 	Usage: required
 	Value type: <u32>
 	Definition: Destination processor ID.
@@ -49,9 +49,9 @@ by the individual bindings for the specific service
 The following example represents a QDSP based sound card on a MSM8996 device
 which uses apr as communication between Apps and QDSP.
 
-	apr@4 {
+	apr {
 		compatible = "qcom,apr-v2";
-		reg = <APR_DOMAIN_ADSP>;
+		qcom,apr-domain = <APR_DOMAIN_ADSP>;
 
 		q6core@3 {
 			compatible = "qcom,q6core";
-- 
cgit 


From 2a368ed7b5df04b4543602fe07e49445750ee0b7 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 17 May 2019 10:42:02 -0500
Subject: dt-bindings: arm: Convert MediaTek board/soc bindings to json-schema

Convert MediaTek SoC bindings to DT schema format using json-schema.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: devicetree@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-mediatek@lists.infradead.org
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 Documentation/devicetree/bindings/arm/mediatek.txt | 89 ---------------------
 .../devicetree/bindings/arm/mediatek.yaml          | 91 ++++++++++++++++++++++
 2 files changed, 91 insertions(+), 89 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/mediatek.txt
 create mode 100644 Documentation/devicetree/bindings/arm/mediatek.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/mediatek.txt b/Documentation/devicetree/bindings/arm/mediatek.txt
deleted file mode 100644
index 56ac7896d6d8..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek.txt
+++ /dev/null
@@ -1,89 +0,0 @@
-MediaTek SoC based Platforms Device Tree Bindings
-
-Boards with a MediaTek SoC shall have the following property:
-
-Required root node property:
-
-compatible: Must contain one of
-   "mediatek,mt2701"
-   "mediatek,mt2712"
-   "mediatek,mt6580"
-   "mediatek,mt6589"
-   "mediatek,mt6592"
-   "mediatek,mt6755"
-   "mediatek,mt6765"
-   "mediatek,mt6795"
-   "mediatek,mt6797"
-   "mediatek,mt7622"
-   "mediatek,mt7623"
-   "mediatek,mt7629"
-   "mediatek,mt8127"
-   "mediatek,mt8135"
-   "mediatek,mt8173"
-   "mediatek,mt8183"
-
-
-Supported boards:
-
-- Evaluation board for MT2701:
-    Required root node properties:
-      - compatible = "mediatek,mt2701-evb", "mediatek,mt2701";
-- Evaluation board for MT2712:
-    Required root node properties:
-      - compatible = "mediatek,mt2712-evb", "mediatek,mt2712";
-- Evaluation board for MT6580:
-    Required root node properties:
-      - compatible = "mediatek,mt6580-evbp1", "mediatek,mt6580";
-- bq Aquaris5 smart phone:
-    Required root node properties:
-      - compatible = "mundoreader,bq-aquaris5", "mediatek,mt6589";
-- Evaluation board for MT6592:
-    Required root node properties:
-      - compatible = "mediatek,mt6592-evb", "mediatek,mt6592";
-- Evaluation phone for MT6755(Helio P10):
-    Required root node properties:
-      - compatible = "mediatek,mt6755-evb", "mediatek,mt6755";
-- Evaluation board for MT6765(Helio P22):
-    Required root node properties:
-      - compatible = "mediatek,mt6765-evb", "mediatek,mt6765";
-- Evaluation board for MT6795(Helio X10):
-    Required root node properties:
-      - compatible = "mediatek,mt6795-evb", "mediatek,mt6795";
-- Evaluation board for MT6797(Helio X20):
-    Required root node properties:
-      - compatible = "mediatek,mt6797-evb", "mediatek,mt6797";
-- Mediatek X20 Development Board:
-    Required root node properties:
-      - compatible = "archermind,mt6797-x20-dev", "mediatek,mt6797";
-- Reference board variant 1 for MT7622:
-    Required root node properties:
-      - compatible = "mediatek,mt7622-rfb1", "mediatek,mt7622";
-- Bananapi BPI-R64 for MT7622:
-    Required root node properties:
-      - compatible = "bananapi,bpi-r64", "mediatek,mt7622";
-- Reference board for MT7623a with eMMC:
-    Required root node properties:
-      - compatible = "mediatek,mt7623a-rfb-emmc", "mediatek,mt7623";
-- Reference board for MT7623a with NAND:
-    Required root node properties:
-      - compatible = "mediatek,mt7623a-rfb-nand", "mediatek,mt7623";
-- Reference board for MT7623n with eMMC:
-    Required root node properties:
-      - compatible = "mediatek,mt7623n-rfb-emmc", "mediatek,mt7623";
-- Bananapi BPI-R2 board:
-      - compatible = "bananapi,bpi-r2", "mediatek,mt7623";
-- Reference board for MT7629:
-    Required root node properties:
-      - compatible = "mediatek,mt7629-rfb", "mediatek,mt7629";
-- MTK mt8127 tablet moose EVB:
-    Required root node properties:
-      - compatible = "mediatek,mt8127-moose", "mediatek,mt8127";
-- MTK mt8135 tablet EVB:
-    Required root node properties:
-      - compatible = "mediatek,mt8135-evbp1", "mediatek,mt8135";
-- MTK mt8173 tablet EVB:
-    Required root node properties:
-      - compatible = "mediatek,mt8173-evb", "mediatek,mt8173";
-- Evaluation board for MT8183:
-    Required root node properties:
-      - compatible = "mediatek,mt8183-evb", "mediatek,mt8183";
diff --git a/Documentation/devicetree/bindings/arm/mediatek.yaml b/Documentation/devicetree/bindings/arm/mediatek.yaml
new file mode 100644
index 000000000000..a4ad2eb926f9
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek.yaml
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/mediatek.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek SoC based Platforms Device Tree Bindings
+
+maintainers:
+  - Sean Wang <sean.wang@mediatek.com>
+  - Matthias Brugger <matthias.bgg@gmail.com>
+description: |
+  Boards with a MediaTek SoC shall have the following properties.
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - mediatek,mt2701-evb
+          - const: mediatek,mt2701
+
+      - items:
+          - enum:
+              - mediatek,mt2712-evb
+          - const: mediatek,mt2712
+      - items:
+          - enum:
+              - mediatek,mt6580-evbp1
+          - const: mediatek,mt6580
+      - items:
+          - enum:
+              - mundoreader,bq-aquaris5
+          - const: mediatek,mt6589
+      - items:
+          - enum:
+              - mediatek,mt6592-evb
+          - const: mediatek,mt6592
+      - items:
+          - enum:
+              - mediatek,mt6755-evb
+          - const: mediatek,mt6755
+      - items:
+          - enum:
+              - mediatek,mt6765-evb
+          - const: mediatek,mt6765
+      - items:
+          - enum:
+              - mediatek,mt6795-evb
+          - const: mediatek,mt6795
+      - items:
+          - enum:
+              - archermind,mt6797-x20-dev
+              - mediatek,mt6797-evb
+          - const: mediatek,mt6797
+      - items:
+          - enum:
+              - bananapi,bpi-r64
+              - mediatek,mt7622-rfb1
+          - const: mediatek,mt7622
+      - items:
+          - enum:
+              - mediatek,mt7623a-rfb-emmc
+              - mediatek,mt7623a-rfb-nand
+              - mediatek,mt7623n-rfb-emmc
+              - bananapi,bpi-r2
+          - const: mediatek,mt7623
+
+      - items:
+          - enum:
+              - mediatek,mt7629-rfb
+          - const: mediatek,mt7629
+      - items:
+          - enum:
+              - mediatek,mt8127-moose
+          - const: mediatek,mt8127
+      - items:
+          - enum:
+              - mediatek,mt8135-evbp1
+          - const: mediatek,mt8135
+      - items:
+          - enum:
+              - mediatek,mt8173-evb
+          - const: mediatek,mt8173
+      - items:
+          - enum:
+              - mediatek,mt8183-evb
+          - const: mediatek,mt8183
+...
-- 
cgit 


From f3c8d4c7a7280cdc6622adb4f8e39d51b3786d68 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 4 Jun 2019 19:13:55 +0900
Subject: kbuild: remove headers_{install,check}_all

headers_install_all does not make much sense any more because different
architectures export different set of uapi/linux/ headers. As you see
in include/uapi/linux/Kbuild, the installation of a.out.h, kvm.h, and
kvm_para.h is arch-dependent. So, headers_install_all repeats the
installation/removal of them.

If somebody really thinks it is useful to do headers_install for all
architectures, it would be possible by small shell-scripting, but
the top Makefile does not have to provide entry targets just for that
purpose.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
---
 Documentation/kbuild/headers_install.txt | 7 -------
 Documentation/kbuild/makefiles.txt       | 6 +-----
 2 files changed, 1 insertion(+), 12 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/kbuild/headers_install.txt b/Documentation/kbuild/headers_install.txt
index f0153adb95e2..f07e34eab70b 100644
--- a/Documentation/kbuild/headers_install.txt
+++ b/Documentation/kbuild/headers_install.txt
@@ -39,12 +39,5 @@ INSTALL_HDR_PATH indicates where to install the headers. It defaults to
 An 'include' directory is automatically created inside INSTALL_HDR_PATH and
 headers are installed in 'INSTALL_HDR_PATH/include'.
 
-The command "make headers_install_all" exports headers for all architectures
-simultaneously.  (This is mostly of interest to distribution maintainers,
-who create an architecture-independent tarball from the resulting include
-directory.)  You also can use HDR_ARCH_LIST to specify list of architectures.
-Remember to provide the appropriate linux/asm directory via "mv" or "ln -s"
-before building a C library with headers exported this way.
-
 The kernel header export infrastructure is maintained by David Woodhouse
 <dwmw2@infradead.org>.
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index d65ad5746f94..bac301a73a86 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -897,11 +897,7 @@ When kbuild executes, the following steps are followed (roughly):
 --- 6.2 Add prerequisites to archheaders:
 
 	The archheaders: rule is used to generate header files that
-	may be installed into user space by "make header_install" or
-	"make headers_install_all".  In order to support
-	"make headers_install_all", this target has to be able to run
-	on an unconfigured tree, or a tree configured for another
-	architecture.
+	may be installed into user space by "make header_install".
 
 	It is run before "make archprepare" when run on the
 	architecture itself.
-- 
cgit 


From e846f0dc57f441e5e93194d39bc9b8ac2ab5e0a4 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 4 Jun 2019 15:42:48 +0300
Subject: kbuild: add support for ensuring headers are self-contained

Sometimes it's useful to be able to explicitly ensure certain headers
remain self-contained, i.e. that they are compilable as standalone
units, by including and/or forward declaring everything they depend on.

Add special target header-test-y where individual Makefiles can add
headers to be tested if CONFIG_HEADER_TEST is enabled. This will
generate a dummy C file per header that gets built as part of extra-y.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Documentation/dontdiff             | 1 +
 Documentation/kbuild/makefiles.txt | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 5eba889ea84d..554dfe4883d2 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -19,6 +19,7 @@
 *.grep
 *.grp
 *.gz
+*.hdrtest.c
 *.html
 *.i
 *.jpeg
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index bac301a73a86..ca4b24ec0399 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -1018,6 +1018,13 @@ When kbuild executes, the following steps are followed (roughly):
 	In this example, extra-y is used to list object files that
 	shall be built, but shall not be linked as part of built-in.a.
 
+    header-test-y
+
+	header-test-y specifies headers (*.h) in the current directory that
+	should be compile tested to ensure they are self-contained,
+	i.e. compilable as standalone units. If CONFIG_HEADER_TEST is enabled,
+	this autogenerates dummy sources to include the headers, and builds them
+	as part of extra-y.
 
 --- 6.7 Commands useful for building a boot image
 
-- 
cgit 


From d2563290dbe124afc3ac5cebeb0231dabe694845 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Fri, 14 Jun 2019 18:40:52 +0800
Subject: dt-binding: ptp_qoriq: support DPAA2 PTP compatible

Add a new compatible for DPAA2 PTP.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/ptp/ptp-qoriq.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
index 6ec053449278..d48f9eb3636e 100644
--- a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
+++ b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
@@ -4,7 +4,8 @@ General Properties:
 
   - compatible   Should be "fsl,etsec-ptp" for eTSEC
                  Should be "fsl,fman-ptp-timer" for DPAA FMan
-		 Should be "fsl,enetc-ptp" for ENETC
+                 Should be "fsl,dpaa2-ptp" for DPAA2
+                 Should be "fsl,enetc-ptp" for ENETC
   - reg          Offset and length of the register set for the device
   - interrupts   There should be at least two interrupts. Some devices
                  have as many as four PTP related interrupts.
-- 
cgit 


From 5f3e2bf008c2221478101ee72f5cb4654b9fc363 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 6 Jun 2019 09:15:31 -0700
Subject: tcp: add tcp_min_snd_mss sysctl

Some TCP peers announce a very small MSS option in their SYN and/or
SYN/ACK messages.

This forces the stack to send packets with a very high network/cpu
overhead.

Linux has enforced a minimal value of 48. Since this value includes
the size of TCP options, and that the options can consume up to 40
bytes, this means that each segment can include only 8 bytes of payload.

In some cases, it can be useful to increase the minimal value
to a saner value.

We still let the default to 48 (TCP_MIN_SND_MSS), for compatibility
reasons.

Note that TCP_MAXSEG socket option enforces a minimal value
of (TCP_MIN_MSS). David Miller increased this minimal value
in commit c39508d6f118 ("tcp: Make TCP_MAXSEG minimum more correct.")
from 64 to 88.

We might in the future merge TCP_MIN_SND_MSS and TCP_MIN_MSS.

CVE-2019-11479 -- tcp mss hardcoded to 48

Signed-off-by: Eric Dumazet <edumazet@google.com>
Suggested-by: Jonathan Looney <jtl@netflix.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Bruce Curtis <brucec@netflix.com>
Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 288aa264ac26..22f6b8b1110a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -255,6 +255,14 @@ tcp_base_mss - INTEGER
 	Path MTU discovery (MTU probing).  If MTU probing is enabled,
 	this is the initial MSS used by the connection.
 
+tcp_min_snd_mss - INTEGER
+	TCP SYN and SYNACK messages usually advertise an ADVMSS option,
+	as described in RFC 1122 and RFC 6691.
+	If this ADVMSS option is smaller than tcp_min_snd_mss,
+	it is silently capped to tcp_min_snd_mss.
+
+	Default : 48 (at least 8 bytes of payload per segment)
+
 tcp_congestion_control - STRING
 	Set the congestion control algorithm to be used for new
 	connections. The algorithm "reno" is always available, but
-- 
cgit 


From 4a0ce7764b09044fdb5f92dcf03ec5d8657296f4 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Sun, 16 Jun 2019 13:48:29 +0000
Subject: habanalabs: Allow accessing host mapped addresses via debugfs

Allows using the addr/data32 debugfs nodes to access a device VA of a
host mapped memory when the IOMMU is disabled.

Due to the possible large amount of a user host mapped memory, the
driver doesn't maintain a database with the host addresses per device VA.
When the IOMMU is disabled, this missing info is being overcome by
simply using phys_to_virt(). However, this is not useful when the IOMMU
is enabled, and thus the enforced limitation.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 Documentation/ABI/testing/debugfs-driver-habanalabs | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index 2f5b80be07a3..18191c2becab 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -3,7 +3,10 @@ Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        oded.gabbay@gmail.com
 Description:    Sets the device address to be used for read or write through
-                PCI bar. The acceptable value is a string that starts with "0x"
+                PCI bar, or the device VA of a host mapped memory to be read or
+                written directly from the host. The latter option is allowed
+                only when the IOMMU is disabled.
+                The acceptable value is a string that starts with "0x"
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/command_buffers
 Date:           Jan 2019
@@ -33,10 +36,12 @@ Contact:        oded.gabbay@gmail.com
 Description:    Allows the root user to read or write directly through the
                 device's PCI bar. Writing to this file generates a write
                 transaction while reading from the file generates a read
-                transcation. This custom interface is needed (instead of using
+                transaction. This custom interface is needed (instead of using
                 the generic Linux user-space PCI mapping) because the DDR bar
                 is very small compared to the DDR memory and only the driver can
-                move the bar before and after the transaction
+                move the bar before and after the transaction.
+                If the IOMMU is disabled, it also allows the root user to read
+                or write from the host a device VA of a host mapped memory
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/device
 Date:           Jan 2019
-- 
cgit 


From 8f69a686e2355c03d4e36e9b4c71f9580bf6aa47 Mon Sep 17 00:00:00 2001
From: Renato Lui Geh <renatogeh@gmail.com>
Date: Fri, 24 May 2019 22:26:30 -0300
Subject: dt-bindings: iio: adc: add adi,ad7780.yaml binding

This patch adds a YAML binding for the Analog Devices AD7780/1 and
AD7170/1 analog-to-digital converters.

Signed-off-by: Renato Lui Geh <renatogeh@gmail.com>
Acked-by: Michael Hennerich <michael.hennerich@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/adc/adi,ad7780.txt     | 48 ------------
 .../devicetree/bindings/iio/adc/adi,ad7780.yaml    | 87 ++++++++++++++++++++++
 2 files changed, 87 insertions(+), 48 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/iio/adc/adi,ad7780.txt
 create mode 100644 Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7780.txt b/Documentation/devicetree/bindings/iio/adc/adi,ad7780.txt
deleted file mode 100644
index 440e52555349..000000000000
--- a/Documentation/devicetree/bindings/iio/adc/adi,ad7780.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-* Analog Devices AD7170/AD7171/AD7780/AD7781
-
-Data sheets:
-
-- AD7170:
-	* https://www.analog.com/media/en/technical-documentation/data-sheets/AD7170.pdf
-- AD7171:
-	* https://www.analog.com/media/en/technical-documentation/data-sheets/AD7171.pdf
-- AD7780:
-	* https://www.analog.com/media/en/technical-documentation/data-sheets/ad7780.pdf
-- AD7781:
-	* https://www.analog.com/media/en/technical-documentation/data-sheets/AD7781.pdf
-
-Required properties:
-
-- compatible: should be one of
-	* "adi,ad7170"
-	* "adi,ad7171"
-	* "adi,ad7780"
-	* "adi,ad7781"
-- reg: spi chip select number for the device
-- vref-supply: the regulator supply for the ADC reference voltage
-
-Optional properties:
-
-- powerdown-gpios:  must be the device tree identifier of the PDRST pin. If
-		    specified, it will be asserted during driver probe. As the
-		    line is active high, it should be marked GPIO_ACTIVE_HIGH.
-- adi,gain-gpios:   must be the device tree identifier of the GAIN pin. Only for
-		    the ad778x chips. If specified, it will be asserted during
-		    driver probe. As the line is active low, it should be marked
-		    GPIO_ACTIVE_LOW.
-- adi,filter-gpios: must be the device tree identifier of the FILTER pin. Only
-		    for the ad778x chips. If specified, it will be asserted
-		    during driver probe. As the line is active low, it should be
-		    marked GPIO_ACTIVE_LOW.
-
-Example:
-
-adc@0 {
-	compatible =  "adi,ad7780";
-	reg =	      <0>;
-	vref-supply = <&vdd_supply>
-
-	powerdown-gpios  = <&gpio 12 GPIO_ACTIVE_HIGH>;
-	adi,gain-gpios   = <&gpio  5 GPIO_ACTIVE_LOW>;
-	adi,filter-gpios = <&gpio 15 GPIO_ACTIVE_LOW>;
-};
diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml
new file mode 100644
index 000000000000..d1109416963c
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/adc/adi,ad7780.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices AD7170/AD7171/AD7780/AD7781 analog to digital converters
+
+maintainers:
+  - Michael Hennerich <michael.hennerich@analog.com>
+
+description: |
+  The ad7780 is a sigma-delta analog to digital converter. This driver provides
+  reading voltage values and status bits from both the ad778x and ad717x series.
+  Its interface also allows writing on the FILTER and GAIN GPIO pins on the
+  ad778x.
+
+  Specifications on the converters can be found at:
+    AD7170:
+      https://www.analog.com/media/en/technical-documentation/data-sheets/AD7170.pdf
+    AD7171:
+      https://www.analog.com/media/en/technical-documentation/data-sheets/AD7171.pdf
+    AD7780:
+      https://www.analog.com/media/en/technical-documentation/data-sheets/ad7780.pdf
+    AD7781:
+      https://www.analog.com/media/en/technical-documentation/data-sheets/AD7781.pdf
+
+properties:
+  compatible:
+    enum:
+      - adi,ad7170
+      - adi,ad7171
+      - adi,ad7780
+      - adi,ad7781
+
+  reg:
+    maxItems: 1
+
+  avdd-supply:
+    description:
+      The regulator supply for the ADC reference voltage.
+    maxItems: 1
+
+  powerdown-gpios:
+    description:
+      Must be the device tree identifier of the PDRST pin. If
+      specified, it will be asserted during driver probe. As the
+      line is active high, it should be marked GPIO_ACTIVE_HIGH.
+    maxItems: 1
+
+  adi,gain-gpios:
+    description:
+      Must be the device tree identifier of the GAIN pin. Only for
+      the ad778x chips. If specified, it will be asserted during
+      driver probe. As the line is active low, it should be marked
+      GPIO_ACTIVE_LOW.
+    maxItems: 1
+
+  adi,filter-gpios:
+    description:
+      Must be the device tree identifier of the FILTER pin. Only
+      for the ad778x chips. If specified, it will be asserted
+      during driver probe. As the line is active low, it should be
+      marked GPIO_ACTIVE_LOW.
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    spi0 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        adc@0 {
+            compatible = "adi,ad7780";
+            reg = <0>;
+
+            avdd-supply      = <&vdd_supply>;
+            powerdown-gpios  = <&gpio0 12 GPIO_ACTIVE_HIGH>;
+            adi,gain-gpios   = <&gpio1  5 GPIO_ACTIVE_LOW>;
+            adi,filter-gpios = <&gpio2 15 GPIO_ACTIVE_LOW>;
+        };
+    };
-- 
cgit 


From c88dd606a1cd9edeb6bb7e6f6a45e1248c3a6453 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 15 May 2019 15:38:46 +0530
Subject: dt-bindings: mfd: lp87565: Add LP87561 configuration

lp87561 is a single output 4-phase regulator configuration.
Add support for the same.

Data Sheet: https://www.ti.com/lit/ds/symlink/lp87561-q1.pdf

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/devicetree/bindings/mfd/lp87565.txt | 36 +++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mfd/lp87565.txt b/Documentation/devicetree/bindings/mfd/lp87565.txt
index a48df7c08ab0..41671e0dc26b 100644
--- a/Documentation/devicetree/bindings/mfd/lp87565.txt
+++ b/Documentation/devicetree/bindings/mfd/lp87565.txt
@@ -41,3 +41,39 @@ lp87565_pmic: pmic@60 {
 		};
 	};
 };
+
+TI LP87561 PMIC:
+
+This is a single output 4-phase regulator configuration
+
+Required properties:
+  - compatible:	"ti,lp87561-q1"
+  - reg:		I2C slave address.
+  - gpio-controller:	Marks the device node as a GPIO Controller.
+  - #gpio-cells:	Should be two.  The first cell is the pin number and
+			the second cell is used to specify flags.
+			See ../gpio/gpio.txt for more information.
+  - xxx-in-supply:	Phandle to parent supply node of each regulator
+			populated under regulators node. xxx should match
+			the supply_name populated in driver.
+Example:
+
+lp87561_pmic: pmic@62 {
+	compatible = "ti,lp87561-q1";
+	reg = <0x62>;
+	gpio-controller;
+	#gpio-cells = <2>;
+
+	buck3210-in-supply = <&vsys_3v3>;
+
+	regulators: regulators {
+		buck3210_reg: buck3210 {
+			/* VDD_CORE */
+			regulator-name = "buck3210";
+			regulator-min-microvolt = <800000>;
+			regulator-max-microvolt = <800000>;
+			regulator-always-on;
+			regulator-boot-on;
+		};
+	};
+};
-- 
cgit 


From c7af55981789299547c51caf5db38e6872e7841f Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul.walmsley@sifive.com>
Date: Mon, 20 May 2019 09:19:40 -0700
Subject: dt-bindings: riscv: sifive: add YAML documentation for the SiFive
 FU540

Add YAML DT binding documentation for the SiFive FU540 SoC.  This
SoC is documented at:

    https://static.dev.sifive.com/FU540-C000-v1.0.pdf

Passes dt-doc-validate, as of yaml-bindings commit 4c79d42e9216.

Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
Signed-off-by: Paul Walmsley <paul@pwsan.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: devicetree@vger.kernel.org
Cc: linux-riscv@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
 .../devicetree/bindings/riscv/sifive.yaml          | 25 ++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/riscv/sifive.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/riscv/sifive.yaml b/Documentation/devicetree/bindings/riscv/sifive.yaml
new file mode 100644
index 000000000000..9d17dc2f3f84
--- /dev/null
+++ b/Documentation/devicetree/bindings/riscv/sifive.yaml
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: (GPL-2.0 OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/riscv/sifive.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SiFive SoC-based boards
+
+maintainers:
+  - Paul Walmsley <paul.walmsley@sifive.com>
+  - Palmer Dabbelt <palmer@sifive.com>
+
+description:
+  SiFive SoC-based boards
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    items:
+      - enum:
+          - sifive,freedom-unleashed-a00
+      - const: sifive,fu540-c000
+      - const: sifive,fu540
+...
-- 
cgit 


From 4fd669a8c4873b6ad54e77bc45aa37b03edc5587 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul.walmsley@sifive.com>
Date: Mon, 20 May 2019 09:19:40 -0700
Subject: dt-bindings: riscv: convert cpu binding to json-schema

At Rob's request, we're starting to migrate our DT binding
documentation to json-schema YAML format.  Start by converting our cpu
binding documentation.  While doing so, document more properties and
nodes.  This includes adding binding documentation support for the E51
and U54 CPU cores ("harts") that are present on this SoC.  These cores
are described in:

    https://static.dev.sifive.com/FU540-C000-v1.0.pdf

This cpus.yaml file is intended to be a starting point and to
evolve over time.  It passes dt-doc-validate as of the yaml-bindings
commit 4c79d42e9216.

This patch was originally based on the ARM json-schema binding
documentation as added by commit 672951cbd1b7 ("dt-bindings: arm: Convert
cpu binding to json-schema").

Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
Signed-off-by: Paul Walmsley <paul@pwsan.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: devicetree@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-riscv@lists.infradead.org
---
 Documentation/devicetree/bindings/riscv/cpus.yaml | 168 ++++++++++++++++++++++
 1 file changed, 168 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/riscv/cpus.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml
new file mode 100644
index 000000000000..27f02ec4bb45
--- /dev/null
+++ b/Documentation/devicetree/bindings/riscv/cpus.yaml
@@ -0,0 +1,168 @@
+# SPDX-License-Identifier: (GPL-2.0 OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/riscv/cpus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: RISC-V bindings for 'cpus' DT nodes
+
+maintainers:
+  - Paul Walmsley <paul.walmsley@sifive.com>
+  - Palmer Dabbelt <palmer@sifive.com>
+
+allOf:
+  - $ref: /schemas/cpus.yaml#
+
+properties:
+  $nodename:
+    const: cpus
+    description: Container of cpu nodes
+
+  '#address-cells':
+    const: 1
+    description: |
+      A single unsigned 32-bit integer uniquely identifies each RISC-V
+      hart in a system.  (See the "reg" node under the "cpu" node,
+      below).
+
+  '#size-cells':
+    const: 0
+
+patternProperties:
+  '^cpu@[0-9a-f]+$':
+    properties:
+      compatible:
+        type: array
+        items:
+          - enum:
+              - sifive,rocket0
+              - sifive,e5
+              - sifive,e51
+              - sifive,u54-mc
+              - sifive,u54
+              - sifive,u5
+          - const: riscv
+        description:
+          Identifies that the hart uses the RISC-V instruction set
+          and identifies the type of the hart.
+
+      mmu-type:
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/string"
+          - enum:
+              - riscv,sv32
+              - riscv,sv39
+              - riscv,sv48
+        description:
+          Identifies the MMU address translation mode used on this
+          hart.  These values originate from the RISC-V Privileged
+          Specification document, available from
+          https://riscv.org/specifications/
+
+      riscv,isa:
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/string"
+          - enum:
+              - rv64imac
+              - rv64imafdc
+        description:
+          Identifies the specific RISC-V instruction set architecture
+          supported by the hart.  These are documented in the RISC-V
+          User-Level ISA document, available from
+          https://riscv.org/specifications/
+
+      timebase-frequency:
+        type: integer
+        minimum: 1
+        description:
+          Specifies the clock frequency of the system timer in Hz.
+          This value is common to all harts on a single system image.
+
+      interrupt-controller:
+        type: object
+        description: Describes the CPU's local interrupt controller
+
+        properties:
+          '#interrupt-cells':
+            const: 1
+
+          compatible:
+            const: riscv,cpu-intc
+
+          interrupt-controller: true
+
+        required:
+          - '#interrupt-cells'
+          - compatible
+          - interrupt-controller
+
+    required:
+      - riscv,isa
+      - timebase-frequency
+      - interrupt-controller
+
+examples:
+  - |
+    // Example 1: SiFive Freedom U540G Development Kit
+    cpus {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        timebase-frequency = <1000000>;
+        cpu@0 {
+                clock-frequency = <0>;
+                compatible = "sifive,rocket0", "riscv";
+                device_type = "cpu";
+                i-cache-block-size = <64>;
+                i-cache-sets = <128>;
+                i-cache-size = <16384>;
+                reg = <0>;
+                riscv,isa = "rv64imac";
+                cpu_intc0: interrupt-controller {
+                        #interrupt-cells = <1>;
+                        compatible = "riscv,cpu-intc";
+                        interrupt-controller;
+                };
+        };
+        cpu@1 {
+                clock-frequency = <0>;
+                compatible = "sifive,rocket0", "riscv";
+                d-cache-block-size = <64>;
+                d-cache-sets = <64>;
+                d-cache-size = <32768>;
+                d-tlb-sets = <1>;
+                d-tlb-size = <32>;
+                device_type = "cpu";
+                i-cache-block-size = <64>;
+                i-cache-sets = <64>;
+                i-cache-size = <32768>;
+                i-tlb-sets = <1>;
+                i-tlb-size = <32>;
+                mmu-type = "riscv,sv39";
+                reg = <1>;
+                riscv,isa = "rv64imafdc";
+                tlb-split;
+                cpu_intc1: interrupt-controller {
+                        #interrupt-cells = <1>;
+                        compatible = "riscv,cpu-intc";
+                        interrupt-controller;
+                };
+        };
+    };
+
+  - |
+    // Example 2: Spike ISA Simulator with 1 Hart
+    cpus {
+            cpu@0 {
+                    device_type = "cpu";
+                    reg = <0>;
+                    compatible = "riscv";
+                    riscv,isa = "rv64imafdc";
+                    mmu-type = "riscv,sv48";
+                    interrupt-controller {
+                            #interrupt-cells = <1>;
+                            interrupt-controller;
+                            compatible = "riscv,cpu-intc";
+                    };
+            };
+    };
+...
-- 
cgit 


From d04824b263480212771da4efbb34b49dc1a88d9f Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Thu, 9 May 2019 20:20:20 +0100
Subject: dt-bindings: can: rcar_canfd: document r8a77965 support

Document the support for rcar_canfd on R8A77965 SoC devices.

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 Documentation/devicetree/bindings/net/can/rcar_canfd.txt | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
index ac71daa46195..4720e916fbdd 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
@@ -6,6 +6,7 @@ Required properties:
   - "renesas,rcar-gen3-canfd" for R-Car Gen3 compatible controller.
   - "renesas,r8a7795-canfd" for R8A7795 (R-Car H3) compatible controller.
   - "renesas,r8a7796-canfd" for R8A7796 (R-Car M3-W) compatible controller.
+  - "renesas,r8a77965-canfd" for R8A77965 (R-Car M3-N) compatible controller.
   - "renesas,r8a77970-canfd" for R8A77970 (R-Car V3M) compatible controller.
   - "renesas,r8a77980-canfd" for R8A77980 (R-Car V3H) compatible controller.
 
@@ -26,12 +27,12 @@ The name of the child nodes are "channel0" and "channel1" respectively. Each
 child node supports the "status" property only, which is used to
 enable/disable the respective channel.
 
-Required properties for "renesas,r8a7795-canfd" and "renesas,r8a7796-canfd"
-compatible:
-In R8A7795 and R8A7796 SoCs, canfd clock is a div6 clock and can be used by both
-CAN and CAN FD controller at the same time. It needs to be scaled to maximum
-frequency if any of these controllers use it. This is done using the below
-properties:
+Required properties for "renesas,r8a7795-canfd", "renesas,r8a7796-canfd" and
+"renesas,r8a77965-canfd" compatible:
+In R8A7795, R8A7796 and R8A77965 SoCs, canfd clock is a div6 clock and can
+be used by both CAN and CAN FD controller at the same time. It needs to be
+scaled to maximum frequency if any of these controllers use it. This is done
+using the below properties:
 
 - assigned-clocks: phandle of canfd clock.
 - assigned-clock-rates: maximum frequency of this clock.
-- 
cgit 


From 485dccb48f7590939c6dee794e0a30512926c830 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Thu, 9 May 2019 20:20:21 +0100
Subject: dt-bindings: can: rcar_canfd: document r8a77990 support

Document the support for rcar_canfd on R8A77990 SoC devices.

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 Documentation/devicetree/bindings/net/can/rcar_canfd.txt | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
index 4720e916fbdd..41049fed5872 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
@@ -9,6 +9,7 @@ Required properties:
   - "renesas,r8a77965-canfd" for R8A77965 (R-Car M3-N) compatible controller.
   - "renesas,r8a77970-canfd" for R8A77970 (R-Car V3M) compatible controller.
   - "renesas,r8a77980-canfd" for R8A77980 (R-Car V3H) compatible controller.
+  - "renesas,r8a77990-canfd" for R8A77990 (R-Car E3) compatible controller.
 
   When compatible with the generic version, nodes must list the
   SoC-specific version corresponding to the platform first, followed by the
@@ -27,12 +28,12 @@ The name of the child nodes are "channel0" and "channel1" respectively. Each
 child node supports the "status" property only, which is used to
 enable/disable the respective channel.
 
-Required properties for "renesas,r8a7795-canfd", "renesas,r8a7796-canfd" and
-"renesas,r8a77965-canfd" compatible:
-In R8A7795, R8A7796 and R8A77965 SoCs, canfd clock is a div6 clock and can
-be used by both CAN and CAN FD controller at the same time. It needs to be
-scaled to maximum frequency if any of these controllers use it. This is done
-using the below properties:
+Required properties for "renesas,r8a7795-canfd", "renesas,r8a7796-canfd",
+"renesas,r8a77965-canfd" and "renesas,r8a77990-canfd" compatible:
+In R8A7795, R8A7796, R8A77965 and R8A77990 SoCs, canfd clock is a div6 clock
+and can be used by both CAN and CAN FD controller at the same time. It needs
+to be scaled to maximum frequency if any of these controllers use it. This is
+done using the below properties:
 
 - assigned-clocks: phandle of canfd clock.
 - assigned-clock-rates: maximum frequency of this clock.
-- 
cgit 


From c908fd08692b8b4d87524762218652df6922386d Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Date: Thu, 9 May 2019 20:20:22 +0100
Subject: dt-bindings: can: rcar_canfd: document r8a774c0 support

Document the support for rcar_canfd on R8A774C0 SoC devices.

Signed-off-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Reviewed-by: Chris Paterson <Chris.Paterson2@renesas.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 Documentation/devicetree/bindings/net/can/rcar_canfd.txt | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
index 41049fed5872..32f051f6d338 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
@@ -3,7 +3,8 @@ Renesas R-Car CAN FD controller Device Tree Bindings
 
 Required properties:
 - compatible: Must contain one or more of the following:
-  - "renesas,rcar-gen3-canfd" for R-Car Gen3 compatible controller.
+  - "renesas,rcar-gen3-canfd" for R-Car Gen3 and RZ/G2 compatible controllers.
+  - "renesas,r8a774c0-canfd" for R8A774C0 (RZ/G2E) compatible controller.
   - "renesas,r8a7795-canfd" for R8A7795 (R-Car H3) compatible controller.
   - "renesas,r8a7796-canfd" for R8A7796 (R-Car M3-W) compatible controller.
   - "renesas,r8a77965-canfd" for R8A77965 (R-Car M3-N) compatible controller.
@@ -28,12 +29,13 @@ The name of the child nodes are "channel0" and "channel1" respectively. Each
 child node supports the "status" property only, which is used to
 enable/disable the respective channel.
 
-Required properties for "renesas,r8a7795-canfd", "renesas,r8a7796-canfd",
-"renesas,r8a77965-canfd" and "renesas,r8a77990-canfd" compatible:
-In R8A7795, R8A7796, R8A77965 and R8A77990 SoCs, canfd clock is a div6 clock
-and can be used by both CAN and CAN FD controller at the same time. It needs
-to be scaled to maximum frequency if any of these controllers use it. This is
-done using the below properties:
+Required properties for "renesas,r8a774c0-canfd", "renesas,r8a7795-canfd",
+"renesas,r8a7796-canfd", "renesas,r8a77965-canfd", and "renesas,r8a77990-canfd"
+compatible:
+In R8A774C0, R8A7795, R8A7796, R8A77965, and R8A77990 SoCs, canfd clock is a
+div6 clock and can be used by both CAN and CAN FD controller at the same time.
+It needs to be scaled to maximum frequency if any of these controllers use it.
+This is done using the below properties:
 
 - assigned-clocks: phandle of canfd clock.
 - assigned-clock-rates: maximum frequency of this clock.
-- 
cgit 


From ac97aea7df7b97ff56d50861404c7a8491b03d87 Mon Sep 17 00:00:00 2001
From: Cao Van Dong <cv-dong@jinso.co.jp>
Date: Thu, 9 May 2019 14:29:49 +0200
Subject: dt-bindings: timer: renesas, cmt: Document r8a779{5|65|90} CMT
 support

Document SoC specific bindings for R-Car H3/M3-N/E3 SoCs.

Signed-off-by: Cao Van Dong <cv-dong@jinso.co.jp>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 Documentation/devicetree/bindings/timer/renesas,cmt.txt | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/timer/renesas,cmt.txt b/Documentation/devicetree/bindings/timer/renesas,cmt.txt
index c0594450e9ef..c5220bcd852b 100644
--- a/Documentation/devicetree/bindings/timer/renesas,cmt.txt
+++ b/Documentation/devicetree/bindings/timer/renesas,cmt.txt
@@ -42,12 +42,18 @@ Required Properties:
     - "renesas,r8a7793-cmt1" for the 48-bit CMT1 device included in r8a7793.
     - "renesas,r8a7794-cmt0" for the 32-bit CMT0 device included in r8a7794.
     - "renesas,r8a7794-cmt1" for the 48-bit CMT1 device included in r8a7794.
+    - "renesas,r8a7795-cmt0" for the 32-bit CMT0 device included in r8a7795.
+    - "renesas,r8a7795-cmt1" for the 48-bit CMT1 device included in r8a7795.
     - "renesas,r8a7796-cmt0" for the 32-bit CMT0 device included in r8a7796.
     - "renesas,r8a7796-cmt1" for the 48-bit CMT1 device included in r8a7796.
+    - "renesas,r8a77965-cmt0" for the 32-bit CMT0 device included in r8a77965.
+    - "renesas,r8a77965-cmt1" for the 48-bit CMT1 device included in r8a77965.
     - "renesas,r8a77970-cmt0" for the 32-bit CMT0 device included in r8a77970.
     - "renesas,r8a77970-cmt1" for the 48-bit CMT1 device included in r8a77970.
     - "renesas,r8a77980-cmt0" for the 32-bit CMT0 device included in r8a77980.
     - "renesas,r8a77980-cmt1" for the 48-bit CMT1 device included in r8a77980.
+    - "renesas,r8a77990-cmt0" for the 32-bit CMT0 device included in r8a77990.
+    - "renesas,r8a77990-cmt1" for the 48-bit CMT1 device included in r8a77990.
 
     - "renesas,rcar-gen2-cmt0" for 32-bit CMT0 devices included in R-Car Gen2
 		and RZ/G1.
-- 
cgit 


From a457b70904bbf9a96d39fa01ce4d04a1d10c5e44 Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas@ti.com>
Date: Tue, 4 Jun 2019 11:39:11 +0530
Subject: dt-bindings: mmc: sdhci-am654: Document bindings for the host
 controllers on TI's J721E devices.

Add binding documentation for mmc host controllers present on TI's J721E
SOC. The 4 bit IP on J721E doesn't have a phy DLL so make DLL related
properties as optional for that compatible. Also add an optional
strobe-sel property used for HS400 speed mode.

Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/sdhci-am654.txt | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mmc/sdhci-am654.txt b/Documentation/devicetree/bindings/mmc/sdhci-am654.txt
index 15dbbbace27e..50e87df47971 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-am654.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-am654.txt
@@ -8,7 +8,10 @@ Only deviations are documented here.
   [3] Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
 
 Required Properties:
-	- compatible: should be "ti,am654-sdhci-5.1"
+	- compatible: should be one of:
+			"ti,am654-sdhci-5.1": SDHCI on AM654 device.
+			"ti,j721e-sdhci-8bit": 8 bit SDHCI on J721E device.
+			"ti,j721e-sdhci-4bit": 4 bit SDHCI on J721E device.
 	- reg: Must be two entries.
 		- The first should be the sdhci register space
 		- The second should the subsystem/phy register space
@@ -16,9 +19,13 @@ Required Properties:
 	- clock-names: Tuple including "clk_xin" and "clk_ahb"
 	- interrupts: Interrupt specifiers
 	- ti,otap-del-sel: Output Tap Delay select
+
+Optional Properties (Required for ti,am654-sdhci-5.1 and ti,j721e-sdhci-8bit):
 	- ti,trm-icp: DLL trim select
 	- ti,driver-strength-ohm: driver strength in ohms.
 				  Valid values are 33, 40, 50, 66 and 100 ohms.
+Optional Properties:
+	- ti,strobe-sel: strobe select delay for HS400 speed mode. Default value: 0x0.
 
 Example:
 
-- 
cgit 


From 69d927bba39517d0980462efc051875b7f4db185 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 24 Apr 2019 13:38:23 +0200
Subject: x86/atomic: Fix smp_mb__{before,after}_atomic()

Recent probing at the Linux Kernel Memory Model uncovered a
'surprise'. Strongly ordered architectures where the atomic RmW
primitive implies full memory ordering and
smp_mb__{before,after}_atomic() are a simple barrier() (such as x86)
fail for:

	*x = 1;
	atomic_inc(u);
	smp_mb__after_atomic();
	r0 = *y;

Because, while the atomic_inc() implies memory order, it
(surprisingly) does not provide a compiler barrier. This then allows
the compiler to re-order like so:

	atomic_inc(u);
	*x = 1;
	smp_mb__after_atomic();
	r0 = *y;

Which the CPU is then allowed to re-order (under TSO rules) like:

	atomic_inc(u);
	r0 = *y;
	*x = 1;

And this very much was not intended. Therefore strengthen the atomic
RmW ops to include a compiler barrier.

NOTE: atomic_{or,and,xor} and the bitops already had the compiler
barrier.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/atomic_t.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
index 89eae7f6b360..d439a0fdbe47 100644
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -196,6 +196,9 @@ These helper barriers exist because architectures have varying implicit
 ordering on their SMP atomic primitives. For example our TSO architectures
 provide full ordered atomics and these barriers are no-ops.
 
+NOTE: when the atomic RmW ops are fully ordered, they should also imply a
+compiler barrier.
+
 Thus:
 
   atomic_fetch_add();
-- 
cgit 


From 509466b7d480bc5d22e90b9fbe6122ae0e2fbe39 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Mon, 3 Jun 2019 17:11:44 -0400
Subject: sched/fair: Fix "runnable_avg_yN_inv" not used warnings

runnable_avg_yN_inv[] is only used in kernel/sched/pelt.c but was
included in several other places because they need other macros all
came from kernel/sched/sched-pelt.h which was generated by
Documentation/scheduler/sched-pelt. As the result, it causes compilation
a lot of warnings,

  kernel/sched/sched-pelt.h:4:18: warning: 'runnable_avg_yN_inv' defined but not used [-Wunused-const-variable=]
  kernel/sched/sched-pelt.h:4:18: warning: 'runnable_avg_yN_inv' defined but not used [-Wunused-const-variable=]
  kernel/sched/sched-pelt.h:4:18: warning: 'runnable_avg_yN_inv' defined but not used [-Wunused-const-variable=]
  ...

Silence it by appending the __maybe_unused attribute for it, so all
generated variables and macros can still be kept in the same file.

Signed-off-by: Qian Cai <cai@lca.pw>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/1559596304-31581-1-git-send-email-cai@lca.pw
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/scheduler/sched-pelt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/scheduler/sched-pelt.c b/Documentation/scheduler/sched-pelt.c
index e4219139386a..7238b355919c 100644
--- a/Documentation/scheduler/sched-pelt.c
+++ b/Documentation/scheduler/sched-pelt.c
@@ -20,7 +20,8 @@ void calc_runnable_avg_yN_inv(void)
 	int i;
 	unsigned int x;
 
-	printf("static const u32 runnable_avg_yN_inv[] = {");
+	/* To silence -Wunused-but-set-variable warnings. */
+	printf("static const u32 runnable_avg_yN_inv[] __maybe_unused = {");
 	for (i = 0; i < HALFLIFE; i++) {
 		x = ((1UL<<32)-1)*pow(y, i);
 
-- 
cgit 


From b096f626a6827ad2ced5ebdbdc04e62422d463f6 Mon Sep 17 00:00:00 2001
From: Yurii Pavlovskyi <yurii.pavlovskyi@gmail.com>
Date: Tue, 14 May 2019 21:07:05 +0200
Subject: platform/x86: asus-wmi: Switch fan boost mode

The WMI exposes a write-only device ID where up to three fan modes can be
switched on some laptops (TUF Gaming FX505GM). There is a hotkey
combination Fn-F5 that does have a fan icon, which is designed to toggle
between fan modes. The DSTS of the device ID returns information about the
presence of this capability and the presence of each of the two additional
fan modes as a bitmask (0x01 - overboost present, 0x02 - silent present)
[1].

Add a SysFS entry that reads the last written value and updates value in
WMI on write and a hotkey handler that toggles the modes taking into
account their availability according to DSTS.

Modes:
* 0x00 - normal or balanced,
* 0x01 - overboost, increased fan RPM,
* 0x02 - silent, decreased fan RPM

[1] Link: https://lkml.org/lkml/2019/4/12/110

Signed-off-by: Yurii Pavlovskyi <yurii.pavlovskyi@gmail.com>
Suggested-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 Documentation/ABI/testing/sysfs-platform-asus-wmi | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-platform-asus-wmi b/Documentation/ABI/testing/sysfs-platform-asus-wmi
index 019e1e29370e..87ae5cc983bf 100644
--- a/Documentation/ABI/testing/sysfs-platform-asus-wmi
+++ b/Documentation/ABI/testing/sysfs-platform-asus-wmi
@@ -36,3 +36,13 @@ KernelVersion:	3.5
 Contact:	"AceLan Kao" <acelan.kao@canonical.com>
 Description:
 		Resume on lid open. 1 means on, 0 means off.
+
+What:		/sys/devices/platform/<platform>/fan_mode
+Date:		Apr 2019
+KernelVersion:	5.2
+Contact:	"Yurii Pavlovskyi" <yurii.pavlovskyi@gmail.com>
+Description:
+		Fan boost mode:
+			* 0 - normal,
+			* 1 - overboost,
+			* 2 - silent
-- 
cgit 


From 59d7d4c5178d18b58414194271ad85e30ecaff92 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms+renesas@verge.net.au>
Date: Mon, 17 Jun 2019 11:06:02 +0200
Subject: dt-bindings: usb: renesas_usbhs: Rename bindings documentation file

For consistency with the naming of (most) other documentation files for DT
bindings for Renesas IP blocks rename the Renesas USBHS documentation file
from renesas-usbhs.txt to renesas,usbhs.txt.

Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 .../devicetree/bindings/usb/renesas,usbhs.txt      | 57 ++++++++++++++++++++++
 .../devicetree/bindings/usb/renesas_usbhs.txt      | 57 ----------------------
 2 files changed, 57 insertions(+), 57 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/usb/renesas,usbhs.txt
 delete mode 100644 Documentation/devicetree/bindings/usb/renesas_usbhs.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/usb/renesas,usbhs.txt b/Documentation/devicetree/bindings/usb/renesas,usbhs.txt
new file mode 100644
index 000000000000..e39255ea6e4f
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/renesas,usbhs.txt
@@ -0,0 +1,57 @@
+Renesas Electronics USBHS driver
+
+Required properties:
+  - compatible: Must contain one or more of the following:
+
+	- "renesas,usbhs-r8a7743" for r8a7743 (RZ/G1M) compatible device
+	- "renesas,usbhs-r8a7744" for r8a7744 (RZ/G1N) compatible device
+	- "renesas,usbhs-r8a7745" for r8a7745 (RZ/G1E) compatible device
+	- "renesas,usbhs-r8a77470" for r8a77470 (RZ/G1C) compatible device
+	- "renesas,usbhs-r8a774a1" for r8a774a1 (RZ/G2M) compatible device
+	- "renesas,usbhs-r8a774c0" for r8a774c0 (RZ/G2E) compatible device
+	- "renesas,usbhs-r8a7790" for r8a7790 (R-Car H2) compatible device
+	- "renesas,usbhs-r8a7791" for r8a7791 (R-Car M2-W) compatible device
+	- "renesas,usbhs-r8a7792" for r8a7792 (R-Car V2H) compatible device
+	- "renesas,usbhs-r8a7793" for r8a7793 (R-Car M2-N) compatible device
+	- "renesas,usbhs-r8a7794" for r8a7794 (R-Car E2) compatible device
+	- "renesas,usbhs-r8a7795" for r8a7795 (R-Car H3) compatible device
+	- "renesas,usbhs-r8a7796" for r8a7796 (R-Car M3-W) compatible device
+	- "renesas,usbhs-r8a77965" for r8a77965 (R-Car M3-N) compatible device
+	- "renesas,usbhs-r8a77990" for r8a77990 (R-Car E3) compatible device
+	- "renesas,usbhs-r8a77995" for r8a77995 (R-Car D3) compatible device
+	- "renesas,usbhs-r7s72100" for r7s72100 (RZ/A1) compatible device
+	- "renesas,usbhs-r7s9210" for r7s9210 (RZ/A2) compatible device
+	- "renesas,rcar-gen2-usbhs" for R-Car Gen2 or RZ/G1 compatible devices
+	- "renesas,rcar-gen3-usbhs" for R-Car Gen3 or RZ/G2 compatible devices
+	- "renesas,rza1-usbhs" for RZ/A1 compatible device
+	- "renesas,rza2-usbhs" for RZ/A2 compatible device
+
+	When compatible with the generic version, nodes must list the
+	SoC-specific version corresponding to the platform first followed
+	by the generic version.
+
+  - reg: Base address and length of the register for the USBHS
+  - interrupts: Interrupt specifier for the USBHS
+  - clocks: A list of phandle + clock specifier pairs.
+	    - In case of "renesas,rcar-gen3-usbhs", two clocks are required.
+	      First clock should be peripheral and second one should be host.
+	    - In case of except above, one clock is required. First clock
+	      should be peripheral.
+
+Optional properties:
+  - renesas,buswait: Integer to use BUSWAIT register
+  - renesas,enable-gpio: A gpio specifier to check GPIO determining if USB
+			 function should be enabled
+  - phys: phandle + phy specifier pair
+  - phy-names: must be "usb"
+  - dmas: Must contain a list of references to DMA specifiers.
+  - dma-names : named "ch%d", where %d is the channel number ranging from zero
+                to the number of channels (DnFIFOs) minus one.
+
+Example:
+	usbhs: usb@e6590000 {
+		compatible = "renesas,usbhs-r8a7790", "renesas,rcar-gen2-usbhs";
+		reg = <0 0xe6590000 0 0x100>;
+		interrupts = <0 107 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&mstp7_clks R8A7790_CLK_HSUSB>;
+	};
diff --git a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
deleted file mode 100644
index e39255ea6e4f..000000000000
--- a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Renesas Electronics USBHS driver
-
-Required properties:
-  - compatible: Must contain one or more of the following:
-
-	- "renesas,usbhs-r8a7743" for r8a7743 (RZ/G1M) compatible device
-	- "renesas,usbhs-r8a7744" for r8a7744 (RZ/G1N) compatible device
-	- "renesas,usbhs-r8a7745" for r8a7745 (RZ/G1E) compatible device
-	- "renesas,usbhs-r8a77470" for r8a77470 (RZ/G1C) compatible device
-	- "renesas,usbhs-r8a774a1" for r8a774a1 (RZ/G2M) compatible device
-	- "renesas,usbhs-r8a774c0" for r8a774c0 (RZ/G2E) compatible device
-	- "renesas,usbhs-r8a7790" for r8a7790 (R-Car H2) compatible device
-	- "renesas,usbhs-r8a7791" for r8a7791 (R-Car M2-W) compatible device
-	- "renesas,usbhs-r8a7792" for r8a7792 (R-Car V2H) compatible device
-	- "renesas,usbhs-r8a7793" for r8a7793 (R-Car M2-N) compatible device
-	- "renesas,usbhs-r8a7794" for r8a7794 (R-Car E2) compatible device
-	- "renesas,usbhs-r8a7795" for r8a7795 (R-Car H3) compatible device
-	- "renesas,usbhs-r8a7796" for r8a7796 (R-Car M3-W) compatible device
-	- "renesas,usbhs-r8a77965" for r8a77965 (R-Car M3-N) compatible device
-	- "renesas,usbhs-r8a77990" for r8a77990 (R-Car E3) compatible device
-	- "renesas,usbhs-r8a77995" for r8a77995 (R-Car D3) compatible device
-	- "renesas,usbhs-r7s72100" for r7s72100 (RZ/A1) compatible device
-	- "renesas,usbhs-r7s9210" for r7s9210 (RZ/A2) compatible device
-	- "renesas,rcar-gen2-usbhs" for R-Car Gen2 or RZ/G1 compatible devices
-	- "renesas,rcar-gen3-usbhs" for R-Car Gen3 or RZ/G2 compatible devices
-	- "renesas,rza1-usbhs" for RZ/A1 compatible device
-	- "renesas,rza2-usbhs" for RZ/A2 compatible device
-
-	When compatible with the generic version, nodes must list the
-	SoC-specific version corresponding to the platform first followed
-	by the generic version.
-
-  - reg: Base address and length of the register for the USBHS
-  - interrupts: Interrupt specifier for the USBHS
-  - clocks: A list of phandle + clock specifier pairs.
-	    - In case of "renesas,rcar-gen3-usbhs", two clocks are required.
-	      First clock should be peripheral and second one should be host.
-	    - In case of except above, one clock is required. First clock
-	      should be peripheral.
-
-Optional properties:
-  - renesas,buswait: Integer to use BUSWAIT register
-  - renesas,enable-gpio: A gpio specifier to check GPIO determining if USB
-			 function should be enabled
-  - phys: phandle + phy specifier pair
-  - phy-names: must be "usb"
-  - dmas: Must contain a list of references to DMA specifiers.
-  - dma-names : named "ch%d", where %d is the channel number ranging from zero
-                to the number of channels (DnFIFOs) minus one.
-
-Example:
-	usbhs: usb@e6590000 {
-		compatible = "renesas,usbhs-r8a7790", "renesas,rcar-gen2-usbhs";
-		reg = <0 0xe6590000 0 0x100>;
-		interrupts = <0 107 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&mstp7_clks R8A7790_CLK_HSUSB>;
-	};
-- 
cgit 


From 23c46801d14cb647afefc9ec9a7f785777251e76 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms+renesas@verge.net.au>
Date: Mon, 17 Jun 2019 11:06:03 +0200
Subject: dt-bindings: usb: renesas_gen3: Rename bindings documentation file

For consistency with the naming of (most) other documentation files for DT
bindings for Renesas IP blocks rename the Renesas USB3.0 peripheral
documentation file from renesas-gen3.txt to renesas,gen3.txt.

Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 .../devicetree/bindings/usb/renesas,usb3.txt       | 41 ++++++++++++++++++++++
 .../devicetree/bindings/usb/renesas_usb3.txt       | 41 ----------------------
 2 files changed, 41 insertions(+), 41 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/usb/renesas,usb3.txt
 delete mode 100644 Documentation/devicetree/bindings/usb/renesas_usb3.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/usb/renesas,usb3.txt b/Documentation/devicetree/bindings/usb/renesas,usb3.txt
new file mode 100644
index 000000000000..35039e720515
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/renesas,usb3.txt
@@ -0,0 +1,41 @@
+Renesas Electronics USB3.0 Peripheral driver
+
+Required properties:
+  - compatible: Must contain one of the following:
+	- "renesas,r8a774a1-usb3-peri"
+	- "renesas,r8a774c0-usb3-peri"
+	- "renesas,r8a7795-usb3-peri"
+	- "renesas,r8a7796-usb3-peri"
+	- "renesas,r8a77965-usb3-peri"
+	- "renesas,r8a77990-usb3-peri"
+	- "renesas,rcar-gen3-usb3-peri" for a generic R-Car Gen3 or RZ/G2
+	  compatible device
+
+    When compatible with the generic version, nodes must list the
+    SoC-specific version corresponding to the platform first
+    followed by the generic version.
+
+  - reg: Base address and length of the register for the USB3.0 Peripheral
+  - interrupts: Interrupt specifier for the USB3.0 Peripheral
+  - clocks: clock phandle and specifier pair
+
+Optional properties:
+  - phys: phandle + phy specifier pair
+  - phy-names: must be "usb"
+
+Example of R-Car H3 ES1.x:
+	usb3_peri0: usb@ee020000 {
+		compatible = "renesas,r8a7795-usb3-peri",
+			     "renesas,rcar-gen3-usb3-peri";
+		reg = <0 0xee020000 0 0x400>;
+		interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cpg CPG_MOD 328>;
+	};
+
+	usb3_peri1: usb@ee060000 {
+		compatible = "renesas,r8a7795-usb3-peri",
+			     "renesas,rcar-gen3-usb3-peri";
+		reg = <0 0xee060000 0 0x400>;
+		interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cpg CPG_MOD 327>;
+	};
diff --git a/Documentation/devicetree/bindings/usb/renesas_usb3.txt b/Documentation/devicetree/bindings/usb/renesas_usb3.txt
deleted file mode 100644
index 35039e720515..000000000000
--- a/Documentation/devicetree/bindings/usb/renesas_usb3.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-Renesas Electronics USB3.0 Peripheral driver
-
-Required properties:
-  - compatible: Must contain one of the following:
-	- "renesas,r8a774a1-usb3-peri"
-	- "renesas,r8a774c0-usb3-peri"
-	- "renesas,r8a7795-usb3-peri"
-	- "renesas,r8a7796-usb3-peri"
-	- "renesas,r8a77965-usb3-peri"
-	- "renesas,r8a77990-usb3-peri"
-	- "renesas,rcar-gen3-usb3-peri" for a generic R-Car Gen3 or RZ/G2
-	  compatible device
-
-    When compatible with the generic version, nodes must list the
-    SoC-specific version corresponding to the platform first
-    followed by the generic version.
-
-  - reg: Base address and length of the register for the USB3.0 Peripheral
-  - interrupts: Interrupt specifier for the USB3.0 Peripheral
-  - clocks: clock phandle and specifier pair
-
-Optional properties:
-  - phys: phandle + phy specifier pair
-  - phy-names: must be "usb"
-
-Example of R-Car H3 ES1.x:
-	usb3_peri0: usb@ee020000 {
-		compatible = "renesas,r8a7795-usb3-peri",
-			     "renesas,rcar-gen3-usb3-peri";
-		reg = <0 0xee020000 0 0x400>;
-		interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&cpg CPG_MOD 328>;
-	};
-
-	usb3_peri1: usb@ee060000 {
-		compatible = "renesas,r8a7795-usb3-peri",
-			     "renesas,rcar-gen3-usb3-peri";
-		reg = <0 0xee060000 0 0x400>;
-		interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&cpg CPG_MOD 327>;
-	};
-- 
cgit 


From 7f699bd14913423ce971f7b8d725448093eaa51a Mon Sep 17 00:00:00 2001
From: Stefan Popa <stefan.popa@analog.com>
Date: Tue, 4 Jun 2019 17:58:02 +0300
Subject: iio: frequency: adf4371: Add support for ADF4371 PLL

The ADF4371 is a frequency synthesizer with an integrated voltage
controlled oscillator (VCO) for phase-locked loops (PLLs). The ADF4371
has an integrated VCO with a fundamental output frequency ranging from
4000 MHz to 8000 MHz. In addition, the VCO frequency is connected to
divide by 1, 2, 4, 8, 16, 32, or 64 circuits that allows the user to
generate radio frequency (RF) output frequencies as low as 62.5 MHz at
RF8x. A frequency multiplier at RF16x generates from 8 GHz to 16 GHz. A
frequency quadrupler generates frequencies from 16 GHz to 32 GHz at RF32x.
RFAUX8x duplicates the frequency range of RF8x or permits direct access to
the VCO output.

The driver takes the reference input frequency from the device tree and
uses it to calculate and maximize the PFD frequency (frequency of the phase
frequency detector). The PFD frequency is further used to calculate the
timeouts: synthesizer lock, VCO band selection, automatic level
calibration (ALC) and PLL settling time.

This initial driver exposes the attributes for setting the frequency and
enabling/disabling the different adf4371 channels.

Datasheet:
Link: https://www.analog.com/media/en/technical-documentation/data-sheets/adf4371.pdf

Signed-off-by: Stefan Popa <stefan.popa@analog.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../ABI/testing/sysfs-bus-iio-frequency-adf4371    | 44 ++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4371

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4371 b/Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4371
new file mode 100644
index 000000000000..302de64cb424
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4371
@@ -0,0 +1,44 @@
+What:		/sys/bus/iio/devices/iio:deviceX/out_altvoltageY_frequency
+KernelVersion:
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Stores the PLL frequency in Hz for channel Y.
+		Reading returns the actual frequency in Hz.
+		The ADF4371 has an integrated VCO with fundamendal output
+		frequency ranging from 4000000000 Hz 8000000000 Hz.
+
+		out_altvoltage0_frequency:
+			A divide by 1, 2, 4, 8, 16, 32 or circuit generates
+			frequencies from 62500000 Hz to 8000000000 Hz.
+		out_altvoltage1_frequency:
+			This channel duplicates the channel 0 frequency
+		out_altvoltage2_frequency:
+			A frequency doubler generates frequencies from
+			8000000000 Hz to 16000000000 Hz.
+		out_altvoltage3_frequency:
+			A frequency quadrupler generates frequencies from
+			16000000000 Hz to 32000000000 Hz.
+
+		Note: writes to one of the channels will affect the frequency of
+		all the other channels, since it involves changing the VCO
+		fundamental output frequency.
+
+What:		/sys/bus/iio/devices/iio:deviceX/out_altvoltageY_name
+KernelVersion:
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Reading returns the datasheet name for channel Y:
+
+		out_altvoltage0_name: RF8x
+		out_altvoltage1_name: RFAUX8x
+		out_altvoltage2_name: RF16x
+		out_altvoltage3_name: RF32x
+
+What:		/sys/bus/iio/devices/iio:deviceX/out_altvoltageY_powerdown
+KernelVersion:
+Contact:	linux-iio@vger.kernel.org
+Description:
+		This attribute allows the user to power down the PLL and it's
+		RFOut buffers.
+		Writing 1 causes the specified channel to power down.
+		Clearing returns to normal operation.
-- 
cgit 


From 4b65e3ba97ca1a8ad487a40fe2be807bea324d10 Mon Sep 17 00:00:00 2001
From: Stefan Popa <stefan.popa@analog.com>
Date: Tue, 4 Jun 2019 17:58:21 +0300
Subject: dt-bindings: iio: frequency: Add docs for ADF4371 PLL

Document support for Analog Devices ADF4371 SPI Wideband Synthesizer.

Signed-off-by: Stefan Popa <stefan.popa@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/frequency/adf4371.yaml | 54 ++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/iio/frequency/adf4371.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml b/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml
new file mode 100644
index 000000000000..d7adf07421cf
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/frequency/adf4371.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices ADF4371 Wideband Synthesizer
+
+maintainers:
+  - Popa Stefan <stefan.popa@analog.com>
+
+description: |
+  Analog Devices ADF4371 SPI Wideband Synthesizer
+  https://www.analog.com/media/en/technical-documentation/data-sheets/adf4371.pdf
+
+properties:
+  compatible:
+    enum:
+      - adi,adf4371
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    description:
+      Definition of the external clock (see clock/clock-bindings.txt)
+    maxItems: 1
+
+  clock-names:
+    description:
+      Must be "clkin"
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+
+examples:
+  - |
+    spi0 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        frequency@0 {
+                compatible = "adi,adf4371";
+                reg = <0>;
+                spi-max-frequency = <1000000>;
+                clocks = <&adf4371_clkin>;
+                clock-names = "clkin";
+        };
+    };
+...
-- 
cgit 


From 925120108860a61e3825f149433714e6a0549748 Mon Sep 17 00:00:00 2001
From: Lucas Oshiro <lucasseikioshiro@gmail.com>
Date: Fri, 31 May 2019 17:11:14 -0300
Subject: dt-bindings: iio: accel: adxl372: switch to YAML bindings

Convert the old device tree documentation to yaml format.

Signed-off-by: Lucas Oshiro <lucasseikioshiro@gmail.com>
Signed-off-by: Rodrigo Ribeiro <rodrigorsdc@gmail.com>
Co-developed-by: Rodrigo Ribeiro <rodrigorsdc@gmail.com>
Reviewed-by: Matheus Tavares <matheus.bernardino@usp.br>
Reviewed-by: Marcelo Schmitt <marcelo.schmitt1@gmail.com>
Reviewed-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/accel/adi,adxl372.yaml | 63 ++++++++++++++++++++++
 .../devicetree/bindings/iio/accel/adxl372.txt      | 33 ------------
 2 files changed, 63 insertions(+), 33 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/iio/accel/adi,adxl372.yaml
 delete mode 100644 Documentation/devicetree/bindings/iio/accel/adxl372.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/accel/adi,adxl372.yaml b/Documentation/devicetree/bindings/iio/accel/adi,adxl372.yaml
new file mode 100644
index 000000000000..a7fafb9bf5c6
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/accel/adi,adxl372.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/accelerometers/adi,adxl372.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices ADXL372 3-Axis, +/-(200g) Digital Accelerometer
+
+maintainers:
+  - Stefan Popa <stefan.popa@analog.com>
+
+description: |
+  Analog Devices ADXL372 3-Axis, +/-(200g) Digital Accelerometer that supports
+  both I2C & SPI interfaces
+    https://www.analog.com/en/products/adxl372.html
+
+properties:
+  compatible:
+    enum:
+      - adi,adxl372
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+examples:
+  - |
+        #include <dt-bindings/gpio/gpio.h>
+        #include <dt-bindings/interrupt-controller/irq.h>
+        i2c0 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                /* Example for a I2C device node */
+                accelerometer@53 {
+                        compatible = "adi,adxl372";
+                        reg = <0x53>;
+                        interrupt-parent = <&gpio>;
+                        interrupts = <25 IRQ_TYPE_EDGE_FALLING>;
+                };
+        };
+  - |
+        #include <dt-bindings/gpio/gpio.h>
+        #include <dt-bindings/interrupt-controller/irq.h>
+        spi0 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                accelerometer@0 {
+                        compatible = "adi,adxl372";
+                        reg = <0>;
+                        spi-max-frequency = <1000000>;
+                        interrupt-parent = <&gpio>;
+                        interrupts = <25 IRQ_TYPE_EDGE_FALLING>;
+                };
+        };
diff --git a/Documentation/devicetree/bindings/iio/accel/adxl372.txt b/Documentation/devicetree/bindings/iio/accel/adxl372.txt
deleted file mode 100644
index a289964756a7..000000000000
--- a/Documentation/devicetree/bindings/iio/accel/adxl372.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Analog Devices ADXL372 3-Axis, +/-(200g) Digital Accelerometer
-
-http://www.analog.com/media/en/technical-documentation/data-sheets/adxl372.pdf
-
-Required properties:
- - compatible : should be "adi,adxl372"
- - reg: the I2C address or SPI chip select number for the device
-
-Required properties for SPI bus usage:
- - spi-max-frequency: Max SPI frequency to use
-
-Optional properties:
- - interrupts: interrupt mapping for IRQ as documented in
-   Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
-
-Example for a I2C device node:
-
-	accelerometer@53 {
-		compatible = "adi,adxl372";
-		reg = <0x53>;
-		interrupt-parent = <&gpio>;
-		interrupts = <25 IRQ_TYPE_EDGE_FALLING>;
-	};
-
-Example for a SPI device node:
-
-	accelerometer@0 {
-		compatible = "adi,adxl372";
-		reg = <0>;
-		spi-max-frequency = <1000000>;
-		interrupt-parent = <&gpio>;
-		interrupts = <25 IRQ_TYPE_EDGE_FALLING>;
-	};
-- 
cgit 


From 6800d9a53532b9fd602b4f9b89bff5cf61c45dbe Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Mon, 17 Jun 2019 13:29:44 +0300
Subject: Documentation/i915: fix file references after display/ subdir renames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the plethora of Sphinx build errors after moving the display files
under a subdirectory.

Fixes: 379bc100232a ("drm/i915: move modesetting output/encoder code under display/")
Fixes: df0566a641f9 ("drm/i915: move modesetting core code under display/")
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190617102944.25129-1-jani.nikula@intel.com
---
 Documentation/gpu/i915.rst | 66 +++++++++++++++++++++++-----------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 300220c4b498..c38ef0dda605 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -82,13 +82,13 @@ change.
 Frontbuffer Tracking
 --------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_frontbuffer.c
    :doc: frontbuffer tracking
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.h
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_frontbuffer.h
    :internal:
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_frontbuffer.c
    :internal:
 
 .. kernel-doc:: drivers/gpu/drm/i915/i915_gem.c
@@ -97,10 +97,10 @@ Frontbuffer Tracking
 Display FIFO Underrun Reporting
 -------------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_fifo_underrun.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_fifo_underrun.c
    :doc: fifo underrun handling
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_fifo_underrun.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_fifo_underrun.c
    :internal:
 
 Plane Configuration
@@ -115,10 +115,10 @@ panel self refresh.
 Atomic Plane Helpers
 --------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_atomic_plane.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_atomic_plane.c
    :doc: atomic plane helpers
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_atomic_plane.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_atomic_plane.c
    :internal:
 
 Output Probing
@@ -132,19 +132,19 @@ probing, so those sections fully apply.
 Hotplug
 -------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_hotplug.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_hotplug.c
    :doc: Hotplug
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_hotplug.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_hotplug.c
    :internal:
 
 High Definition Audio
 ---------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_audio.c
    :doc: High Definition Audio over HDMI and Display Port
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_audio.c
    :internal:
 
 .. kernel-doc:: include/drm/i915_component.h
@@ -153,58 +153,58 @@ High Definition Audio
 Intel HDMI LPE Audio Support
 ----------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_lpe_audio.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_lpe_audio.c
    :doc: LPE Audio integration for HDMI or DP playback
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_lpe_audio.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_lpe_audio.c
    :internal:
 
 Panel Self Refresh PSR (PSR/SRD)
 --------------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_psr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_psr.c
    :doc: Panel Self Refresh (PSR/SRD)
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_psr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_psr.c
    :internal:
 
 Frame Buffer Compression (FBC)
 ------------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_fbc.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_fbc.c
    :doc: Frame Buffer Compression (FBC)
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_fbc.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_fbc.c
    :internal:
 
 Display Refresh Rate Switching (DRRS)
 -------------------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :doc: Display Refresh Rate Switching (DRRS)
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :functions: intel_dp_set_drrs_state
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :functions: intel_edp_drrs_enable
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :functions: intel_edp_drrs_disable
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :functions: intel_edp_drrs_invalidate
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :functions: intel_edp_drrs_flush
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :functions: intel_dp_drrs_init
 
 DPIO
 ----
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dpio_phy.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpio_phy.c
    :doc: DPIO
 
 CSR firmware support for DMC
@@ -219,34 +219,34 @@ CSR firmware support for DMC
 Video BIOS Table (VBT)
 ----------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_bios.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_bios.c
    :doc: Video BIOS Table (VBT)
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_bios.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_bios.c
    :internal:
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_vbt_defs.h
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_vbt_defs.h
    :internal:
 
 Display clocks
 --------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_cdclk.c
    :doc: CDCLK / RAWCLK
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_cdclk.c
    :internal:
 
 Display PLLs
 ------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dpll_mgr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpll_mgr.c
    :doc: Display PLLs
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dpll_mgr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpll_mgr.c
    :internal:
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dpll_mgr.h
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpll_mgr.h
    :internal:
 
 Memory Management and Command Submission
-- 
cgit 


From bee0aa5704d8c9353b4c230f2613906aadd979c2 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Thu, 13 Jun 2019 18:57:04 +0530
Subject: dt-bindings: arm: Document 96Boards Meerkat96 devicetree binding

Document 96Boards Meerkat96 devicetree binding based on i.MX7D SoC.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 Documentation/devicetree/bindings/arm/fsl.yaml | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml
index 407138ebc0d0..c82e0185227c 100644
--- a/Documentation/devicetree/bindings/arm/fsl.yaml
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -156,6 +156,7 @@ properties:
         items:
           - enum:
               - fsl,imx7d-sdb             # i.MX7 SabreSD Board
+              - novtech,imx7d-meerkat96   # i.MX7 Meerkat96 Board
               - tq,imx7d-mba7             # i.MX7D TQ MBa7 with TQMa7D SoM
               - zii,imx7d-rpu2            # ZII RPU2 Board
           - const: fsl,imx7d
-- 
cgit 


From e90716a6612150218aaff1fd47ca6de954100a06 Mon Sep 17 00:00:00 2001
From: Tim Wawrzynczak <twawrzynczak@chromium.org>
Date: Thu, 13 Jun 2019 11:57:36 -0600
Subject: platform/chrome: cros_ec_debugfs: Add debugfs entry to retrieve EC
 uptime

The new debugfs entry 'uptime' is being made available to userspace so that
a userspace daemon can synchronize EC logs with host time.

Signed-off-by: Tim Wawrzynczak <twawrzynczak@chromium.org>
[rework based on Tim's first approach]
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Tested-by: Tim Wawrzynczak <twawrzynczak@chromium.org>
---
 Documentation/ABI/testing/debugfs-cros-ec | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 Documentation/ABI/testing/debugfs-cros-ec

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/debugfs-cros-ec b/Documentation/ABI/testing/debugfs-cros-ec
new file mode 100644
index 000000000000..c91da2d374aa
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-cros-ec
@@ -0,0 +1,8 @@
+What:		/sys/kernel/debug/<cros-ec-device>/uptime
+Date:		June 2019
+KernelVersion:	5.3
+Description:
+		A u32 providing the time since EC booted in ms. This is
+		is used for synchronizing the AP host time with the EC
+		log. An error is returned if the command is not supported
+		by the EC or there is a communication problem.
-- 
cgit 


From 1fbc6ec2f35e28a23c9f0e9bef199bbbcfb4635e Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Fri, 14 Jun 2019 12:23:47 +0200
Subject: platform/chrome: cros_ec_debugfs: Add debugfs ABI documentation

Add the missing ABI documentation for the already available debugfs
entries: console_log, panicinfo and pdinfo.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
---
 Documentation/ABI/testing/debugfs-cros-ec | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/debugfs-cros-ec b/Documentation/ABI/testing/debugfs-cros-ec
index c91da2d374aa..573a82d23c89 100644
--- a/Documentation/ABI/testing/debugfs-cros-ec
+++ b/Documentation/ABI/testing/debugfs-cros-ec
@@ -1,3 +1,29 @@
+What:		/sys/kernel/debug/<cros-ec-device>/console_log
+Date:		September 2017
+KernelVersion:	4.13
+Description:
+		If the EC supports the CONSOLE_READ command type, this file
+		can be used to grab the EC logs. The kernel polls for the log
+		and keeps its own buffer but userspace should grab this and
+		write it out to some logs.
+
+What:		/sys/kernel/debug/<cros-ec-device>/panicinfo
+Date:		September 2017
+KernelVersion:	4.13
+Description:
+		This file dumps the EC panic information from the previous
+		reboot. This file will only exist if the PANIC_INFO command
+		type is supported by the EC.
+
+What:		/sys/kernel/debug/<cros-ec-device>/pdinfo
+Date:		June 2018
+KernelVersion:	4.17
+Description:
+		This file provides the port role, muxes and power debug
+		information for all the USB PD/type-C ports available. If
+		the are no ports available, this file will be just an empty
+		file.
+
 What:		/sys/kernel/debug/<cros-ec-device>/uptime
 Date:		June 2019
 KernelVersion:	5.3
-- 
cgit 


From cd5f9726773b654f8e1cfd6ec1a989c558fa164b Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Mon, 20 May 2019 10:56:03 -0700
Subject: Documentation: dt-bindings: Add snps,need-phy-for-wake for dwc2 USB

Some SoCs with a dwc2 USB controller may need to keep the PHY on to
support remote wakeup.  Allow specifying this as a device tree
property.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 Documentation/devicetree/bindings/usb/dwc2.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt
index 49eac0dc86b0..aafff3a6904d 100644
--- a/Documentation/devicetree/bindings/usb/dwc2.txt
+++ b/Documentation/devicetree/bindings/usb/dwc2.txt
@@ -42,6 +42,8 @@ Refer to phy/phy-bindings.txt for generic phy consumer properties
 - g-rx-fifo-size: size of rx fifo size in gadget mode.
 - g-np-tx-fifo-size: size of non-periodic tx fifo size in gadget mode.
 - g-tx-fifo-size: size of periodic tx fifo per endpoint (except ep0) in gadget mode.
+- snps,need-phy-for-wake: If present indicates that the phy needs to be left
+                          on for remote wakeup during suspend.
 - snps,reset-phy-on-wake: If present indicates that we need to reset the PHY when
                           we detect a wakeup.  This is due to a hardware errata.
 
@@ -58,4 +60,5 @@ Example:
 		clock-names = "otg";
 		phys = <&usbphy>;
 		phy-names = "usb2-phy";
+		snps,need-phy-for-wake;
         };
-- 
cgit 


From 34cc761bdcc84fa7b9c5fd4e1f8548df6979f194 Mon Sep 17 00:00:00 2001
From: Anurag Kumar Vulisha <anurag.kumar.vulisha@xilinx.com>
Date: Fri, 10 May 2019 12:37:26 +0530
Subject: doc: dt: bindings: usb: dwc3: Update entries for disabling U1 and U2

This patch updates the documentation with the information related
to the quirks that needs to be added for disabling the link entering
into the U1 and U2 states

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Anurag Kumar Vulisha <anurag.kumar.vulisha@xilinx.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 Documentation/devicetree/bindings/usb/dwc3.txt | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt b/Documentation/devicetree/bindings/usb/dwc3.txt
index 8e5265e9f658..66780a47ad85 100644
--- a/Documentation/devicetree/bindings/usb/dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/dwc3.txt
@@ -64,6 +64,8 @@ Optional properties:
  - snps,dis_u2_susphy_quirk: when set core will disable USB2 suspend phy.
  - snps,dis_enblslpm_quirk: when set clears the enblslpm in GUSB2PHYCFG,
 			disabling the suspend signal to the PHY.
+ - snps,dis-u1-entry-quirk: set if link entering into U1 needs to be disabled.
+ - snps,dis-u2-entry-quirk: set if link entering into U2 needs to be disabled.
  - snps,dis_rxdet_inp3_quirk: when set core will disable receiver detection
 			in PHY P3 power state.
  - snps,dis-u2-freeclk-exists-quirk: when set, clear the u2_freeclk_exists
-- 
cgit 


From 2d5ba19bdfef4dd06add144eb04287ee98409f75 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 3 Jun 2019 19:52:44 -0300
Subject: kvm: x86: add host poll control msrs

Add an MSRs which allows the guest to disable
host polling (specifically the cpuidle-haltpoll,
when performing polling in the guest, disables
host side polling).

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/msr.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index f3f0d57ced8e..df1f4338b3ca 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -273,3 +273,12 @@ MSR_KVM_EOI_EN: 0x4b564d04
 	guest must both read the least significant bit in the memory area and
 	clear it using a single CPU instruction, such as test and clear, or
 	compare and exchange.
+
+MSR_KVM_POLL_CONTROL: 0x4b564d05
+	Control host-side polling.
+
+	data: Bit 0 enables (1) or disables (0) host-side HLT polling logic.
+
+	KVM guests can request the host not to poll on HLT, for example if
+	they are performing polling themselves.
+
-- 
cgit 


From 781f9f30312625a13c999e2cff429989e2275e55 Mon Sep 17 00:00:00 2001
From: Peng Ma <peng.ma@nxp.com>
Date: Mon, 6 May 2019 09:03:44 +0000
Subject: dt-bindings: fsl-qdma: Add LS1028A qDMA bindings

Add LS1028A qDMA controller bindings to fsl-qdma bindings.

Signed-off-by: Peng Ma <peng.ma@nxp.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 Documentation/devicetree/bindings/dma/fsl-qdma.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/dma/fsl-qdma.txt b/Documentation/devicetree/bindings/dma/fsl-qdma.txt
index 6a0ff9059e72..da371c4d406c 100644
--- a/Documentation/devicetree/bindings/dma/fsl-qdma.txt
+++ b/Documentation/devicetree/bindings/dma/fsl-qdma.txt
@@ -7,6 +7,7 @@ Required properties:
 
 - compatible:		Must be one of
 			 "fsl,ls1021a-qdma": for LS1021A Board
+			 "fsl,ls1028a-qdma": for LS1028A Board
 			 "fsl,ls1043a-qdma": for ls1043A Board
 			 "fsl,ls1046a-qdma": for ls1046A Board
 - reg:			Should contain the register's base address and length.
-- 
cgit 


From ac013c7e2dc256dad465da539f7b072167770018 Mon Sep 17 00:00:00 2001
From: "Angus Ainslie (Purism)" <angus@akkea.ca>
Date: Mon, 17 Jun 2019 07:52:14 -0600
Subject: dt-bindings: Add an entry for Purism SPC

Add an entry for Purism, SPC

Signed-off-by: Angus Ainslie (Purism) <angus@akkea.ca>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 33a65a45e319..104595f55f34 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -683,6 +683,8 @@ patternProperties:
     description: PROBOX2 (by W2COMP Co., Ltd.)
   "^pulsedlight,.*":
     description: PulsedLight, Inc
+  "^purism,.*":
+    description: Purism, SPC
   "^qca,.*":
     description: Qualcomm Atheros, Inc.
   "^qcom,.*":
-- 
cgit 


From e126417ff1b172cba599d39e3e8583bf12e651eb Mon Sep 17 00:00:00 2001
From: "Angus Ainslie (Purism)" <angus@akkea.ca>
Date: Mon, 17 Jun 2019 07:52:15 -0600
Subject: dt-bindings: arm: fsl: Add the imx8mq boards

Add an entry for imx8mq based boards

Signed-off-by: Angus Ainslie (Purism) <angus@akkea.ca>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 Documentation/devicetree/bindings/arm/fsl.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml
index c82e0185227c..113f148edf86 100644
--- a/Documentation/devicetree/bindings/arm/fsl.yaml
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -178,6 +178,13 @@ properties:
               - fsl,imx8mm-evk            # i.MX8MM EVK Board
           - const: fsl,imx8mm
 
+      - description: i.MX8MQ based Boards
+        items:
+          - enum:
+              - fsl,imx8mq-evk            # i.MX8MQ EVK Board
+              - purism,librem5-devkit     # Purism Librem5 devkit
+          - const: fsl,imx8mq
+
       - description: i.MX8QXP based Boards
         items:
           - enum:
-- 
cgit 


From fbfe9ba4599f8941b02bc4fc701d6df9f04ebf02 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 17 Jun 2019 14:44:23 -0600
Subject: dt-bindings: arm: Move Emtrion i.MX6 board bindings to schema

The Emtrion board bindings landed when the i.MX board/SoC bindings were
being converted to DT schema. Add them to the schema and remove the
separate file.

Cc: Jan Tuerk <jan.tuerk@emtrion.com>
Cc: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 Documentation/devicetree/bindings/arm/emtrion.txt | 12 ------------
 Documentation/devicetree/bindings/arm/fsl.yaml    |  4 ++++
 2 files changed, 4 insertions(+), 12 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/emtrion.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/emtrion.txt b/Documentation/devicetree/bindings/arm/emtrion.txt
deleted file mode 100644
index 83329aefc483..000000000000
--- a/Documentation/devicetree/bindings/arm/emtrion.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Emtrion Devicetree Bindings
-===========================
-
-emCON Series:
--------------
-
-Required root node properties
-	- compatible:
-	- "emtrion,emcon-mx6", "fsl,imx6q"; 		: emCON-MX6D or emCON-MX6Q SoM
-	- "emtrion,emcon-mx6-avari", "fsl,imx6q";	: emCON-MX6D or emCON-MX6Q SoM on Avari Base
-	- "emtrion,emcon-mx6", "fsl,imx6dl"; 		: emCON-MX6S or emCON-MX6DL SoM
-	- "emtrion,emcon-mx6-avari", "fsl,imx6dl";	: emCON-MX6S or emCON-MX6DL SoM on Avari Base
diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml
index 113f148edf86..f933f841d8ad 100644
--- a/Documentation/devicetree/bindings/arm/fsl.yaml
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -80,6 +80,8 @@ properties:
       - description: i.MX6Q based Boards
         items:
           - enum:
+              - emtrion,emcon-mx6         # emCON-MX6D or emCON-MX6Q SoM
+              - emtrion,emcon-mx6-avari   # emCON-MX6D or emCON-MX6Q SoM on Avari Base
               - fsl,imx6q-arm2
               - fsl,imx6q-sabreauto
               - fsl,imx6q-sabrelite
@@ -99,6 +101,8 @@ properties:
         items:
           - enum:
               - eckelmann,imx6dl-ci4x10
+              - emtrion,emcon-mx6         # emCON-MX6S or emCON-MX6DL SoM
+              - emtrion,emcon-mx6-avari   # emCON-MX6S or emCON-MX6DL SoM on Avari Base
               - fsl,imx6dl-sabreauto      # i.MX6 DualLite/Solo SABRE Automotive Board
               - fsl,imx6dl-sabresd        # i.MX6 DualLite SABRE Smart Device Board
               - technologic,imx6dl-ts4900
-- 
cgit 


From f37545c59bf3b92f05a030b4a3c0cfb8d3a003c7 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 17 Jun 2019 16:40:28 -0600
Subject: dt-bindings: arm: fsl: Add back missing i.MX7ULP binding

In the conversion to DT schema, the addition of the i.MX7ULP binding got
dropped. Add it to the binding schema.

Fixes: a1a38e1f4d1d ("dt-bindings: arm: Convert FSL board/soc bindings to json-schema")
Cc: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 Documentation/devicetree/bindings/arm/fsl.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml
index f933f841d8ad..3842d701a386 100644
--- a/Documentation/devicetree/bindings/arm/fsl.yaml
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -176,6 +176,12 @@ properties:
           - const: compulab,cl-som-imx7
           - const: fsl,imx7d
 
+      - description: i.MX7ULP based Boards
+        items:
+          - enum:
+              - fsl,imx7ulp-evk           # i.MX7ULP Evaluation Kit
+          - const: fsl,imx7ulp
+
       - description: i.MX8MM based Boards
         items:
           - enum:
-- 
cgit 


From a10ecf624b7b7c1ec6c263d4562bd6631d380d2f Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 17 Jun 2019 16:42:44 -0600
Subject: dt-bindings: arm: fsl: Add missing schemas for i.MX1/31/35

The SoC/board bindings for i.MX1/31/35 are undocumented. Add the missing
bindings to the schema.

Cc: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 Documentation/devicetree/bindings/arm/fsl.yaml | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml
index 3842d701a386..7294ac36f4c0 100644
--- a/Documentation/devicetree/bindings/arm/fsl.yaml
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -15,6 +15,13 @@ properties:
     const: '/'
   compatible:
     oneOf:
+      - description: i.MX1 based Boards
+        items:
+          - enum:
+              - armadeus,imx1-apf9328
+              - fsl,imx1ads
+          - const: fsl,imx1
+
       - description: i.MX23 based Boards
         items:
           - enum:
@@ -51,6 +58,25 @@ properties:
           - const: i2se,duckbill-2
           - const: fsl,imx28
 
+      - description: i.MX31 based Boards
+        items:
+          - enum:
+              - buglabs,imx31-bug
+              - logicpd,imx31-lite
+          - const: fsl,imx31
+
+      - description: i.MX35 based Boards
+        items:
+          - enum:
+              - fsl,imx35-pdk
+          - const: fsl,imx35
+
+      - description: i.MX35 Eukrea CPUIMX35 Board
+        items:
+          - const: eukrea,mbimxsd35-baseboard
+          - const: eukrea,cpuimx35
+          - const: fsl,imx35
+
       - description: i.MX50 based Boards
         items:
           - enum:
-- 
cgit 


From 2554fcb8cc4028f1ea0269c23265ce693d7de965 Mon Sep 17 00:00:00 2001
From: Marco Felsch <m.felsch@pengutronix.de>
Date: Mon, 17 Jun 2019 18:14:30 +0200
Subject: dt-bindings: add Kontron vendor prefix

Kontron is a leading embedded computer supplier. More information can be
found on: https://www.kontron.de/

Signed-off-by: Marco Felsch <m.felsch@pengutronix.de>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index e0a3e5ff3825..52e84257363b 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -479,6 +479,8 @@ patternProperties:
     description: Rakuten Kobo Inc.
   "^koe,.*":
     description: Kaohsiung Opto-Electronics Inc.
+  "^kontron,.*":
+    description: Kontron S&T AG
   "^kosagi,.*":
     description: Sutajio Ko-Usagi PTE Ltd.
   "^kyo,.*":
-- 
cgit 


From 1bdd44579a54f73b87aff21d3b53f0f4e20b679c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 17 Jun 2019 16:33:22 +0200
Subject: dt-bindings: Add missing newline at end of file

"git diff" says:

    \ No newline at end of file

after modifying the files.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/ipmi/npcm7xx-kcs-bmc.txt            | 2 +-
 Documentation/devicetree/bindings/pinctrl/nuvoton,npcm7xx-pinctrl.txt | 2 +-
 Documentation/devicetree/bindings/regulator/pv88060.txt               | 2 +-
 Documentation/devicetree/bindings/sound/cs42l73.txt                   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/ipmi/npcm7xx-kcs-bmc.txt b/Documentation/devicetree/bindings/ipmi/npcm7xx-kcs-bmc.txt
index 3538a214fff1..352f5e9c759b 100644
--- a/Documentation/devicetree/bindings/ipmi/npcm7xx-kcs-bmc.txt
+++ b/Documentation/devicetree/bindings/ipmi/npcm7xx-kcs-bmc.txt
@@ -36,4 +36,4 @@ Example:
             kcs_chan = <2>;
             status = "disabled";
         };
-    };
\ No newline at end of file
+    };
diff --git a/Documentation/devicetree/bindings/pinctrl/nuvoton,npcm7xx-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/nuvoton,npcm7xx-pinctrl.txt
index 83f4bbac94bb..a1264cc8660d 100644
--- a/Documentation/devicetree/bindings/pinctrl/nuvoton,npcm7xx-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/nuvoton,npcm7xx-pinctrl.txt
@@ -213,4 +213,4 @@ pinctrl: pinctrl@f0800000 {
 		groups = "clkreq";
 		function = "clkreq";
 	};
-};
\ No newline at end of file
+};
diff --git a/Documentation/devicetree/bindings/regulator/pv88060.txt b/Documentation/devicetree/bindings/regulator/pv88060.txt
index 10a6dadc008e..6a7c8a92fdb0 100644
--- a/Documentation/devicetree/bindings/regulator/pv88060.txt
+++ b/Documentation/devicetree/bindings/regulator/pv88060.txt
@@ -121,4 +121,4 @@ Example
 				regulator-max-microvolt = <5000000>;
 			};
 		};
-	};
\ No newline at end of file
+	};
diff --git a/Documentation/devicetree/bindings/sound/cs42l73.txt b/Documentation/devicetree/bindings/sound/cs42l73.txt
index 80ae910dbf6c..47b868b5ab01 100644
--- a/Documentation/devicetree/bindings/sound/cs42l73.txt
+++ b/Documentation/devicetree/bindings/sound/cs42l73.txt
@@ -19,4 +19,4 @@ codec: cs42l73@4a {
 	reg = <0x4a>;
 	reset_gpio = <&gpio 10 0>;
 	chgfreq = <0x05>;
-};
\ No newline at end of file
+};
-- 
cgit 


From d95ea1a4e1fb7c7ec44969afaf0d983f8170ebef Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 28 May 2019 17:12:51 -0600
Subject: docs: Add a document on repository management

Every merge window seems to involve at least one episode where subsystem
maintainers don't manage their trees as Linus would like.  Document the
expectations so that at least he has something to point people to.

Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/maintainer/index.rst                |   1 +
 Documentation/maintainer/rebasing-and-merging.rst | 226 ++++++++++++++++++++++
 2 files changed, 227 insertions(+)
 create mode 100644 Documentation/maintainer/rebasing-and-merging.rst

(limited to 'Documentation')

diff --git a/Documentation/maintainer/index.rst b/Documentation/maintainer/index.rst
index 2a14916930cb..56e2c09dfa39 100644
--- a/Documentation/maintainer/index.rst
+++ b/Documentation/maintainer/index.rst
@@ -10,5 +10,6 @@ additions to this manual.
    :maxdepth: 2
 
    configure-git
+   rebasing-and-merging
    pull-requests
 
diff --git a/Documentation/maintainer/rebasing-and-merging.rst b/Documentation/maintainer/rebasing-and-merging.rst
new file mode 100644
index 000000000000..09f988e7fa71
--- /dev/null
+++ b/Documentation/maintainer/rebasing-and-merging.rst
@@ -0,0 +1,226 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Rebasing and merging
+====================
+
+Maintaining a subsystem, as a general rule, requires a familiarity with the
+Git source-code management system.  Git is a powerful tool with a lot of
+features; as is often the case with such tools, there are right and wrong
+ways to use those features.  This document looks in particular at the use
+of rebasing and merging.  Maintainers often get in trouble when they use
+those tools incorrectly, but avoiding problems is not actually all that
+hard.
+
+One thing to be aware of in general is that, unlike many other projects,
+the kernel community is not scared by seeing merge commits in its
+development history.  Indeed, given the scale of the project, avoiding
+merges would be nearly impossible.  Some problems encountered by
+maintainers result from a desire to avoid merges, while others come from
+merging a little too often.
+
+Rebasing
+========
+
+"Rebasing" is the process of changing the history of a series of commits
+within a repository.  There are two different types of operations that are
+referred to as rebasing since both are done with the ``git rebase``
+command, but there are significant differences between them:
+
+ - Changing the parent (starting) commit upon which a series of patches is
+   built.  For example, a rebase operation could take a patch set built on
+   the previous kernel release and base it, instead, on the current
+   release.  We'll call this operation "reparenting" in the discussion
+   below.
+
+ - Changing the history of a set of patches by fixing (or deleting) broken
+   commits, adding patches, adding tags to commit changelogs, or changing
+   the order in which commits are applied.  In the following text, this
+   type of operation will be referred to as "history modification"
+
+The term "rebasing" will be used to refer to both of the above operations.
+Used properly, rebasing can yield a cleaner and clearer development
+history; used improperly, it can obscure that history and introduce bugs.
+
+There are a few rules of thumb that can help developers to avoid the worst
+perils of rebasing:
+
+ - History that has been exposed to the world beyond your private system
+   should usually not be changed.  Others may have pulled a copy of your
+   tree and built on it; modifying your tree will create pain for them.  If
+   work is in need of rebasing, that is usually a sign that it is not yet
+   ready to be committed to a public repository.
+
+   That said, there are always exceptions.  Some trees (linux-next being
+   a significant example) are frequently rebased by their nature, and
+   developers know not to base work on them.  Developers will sometimes
+   expose an unstable branch for others to test with or for automated
+   testing services.  If you do expose a branch that may be unstable in
+   this way, be sure that prospective users know not to base work on it.
+
+ - Do not rebase a branch that contains history created by others.  If you
+   have pulled changes from another developer's repository, you are now a
+   custodian of their history.  You should not change it.  With few
+   exceptions, for example, a broken commit in a tree like this should be
+   explicitly reverted rather than disappeared via history modification.
+
+ - Do not reparent a tree without a good reason to do so.  Just being on a
+   newer base or avoiding a merge with an upstream repository is not
+   generally a good reason.
+
+ - If you must reparent a repository, do not pick some random kernel commit
+   as the new base.  The kernel is often in a relatively unstable state
+   between release points; basing development on one of those points
+   increases the chances of running into surprising bugs.  When a patch
+   series must move to a new base, pick a stable point (such as one of
+   the -rc releases) to move to.
+
+ - Realize that reparenting a patch series (or making significant history
+   modifications) changes the environment in which it was developed and,
+   likely, invalidates much of the testing that was done.  A reparented
+   patch series should, as a general rule, be treated like new code and
+   retested from the beginning.
+
+A frequent cause of merge-window trouble is when Linus is presented with a
+patch series that has clearly been reparented, often to a random commit,
+shortly before the pull request was sent.  The chances of such a series
+having been adequately tested are relatively low - as are the chances of
+the pull request being acted upon.
+
+If, instead, rebasing is limited to private trees, commits are based on a
+well-known starting point, and they are well tested, the potential for
+trouble is low.
+
+Merging
+=======
+
+Merging is a common operation in the kernel development process; the 5.1
+development cycle included 1,126 merge commits - nearly 9% of the total.
+Kernel work is accumulated in over 100 different subsystem trees, each of
+which may contain multiple topic branches; each branch is usually developed
+independently of the others.  So naturally, at least one merge will be
+required before any given branch finds its way into an upstream repository.
+
+Many projects require that branches in pull requests be based on the
+current trunk so that no merge commits appear in the history.  The kernel
+is not such a project; any rebasing of branches to avoid merges will, most
+likely, lead to trouble.
+
+Subsystem maintainers find themselves having to do two types of merges:
+from lower-level subsystem trees and from others, either sibling trees or
+the mainline.  The best practices to follow differ in those two situations.
+
+Merging from lower-level trees
+------------------------------
+
+Larger subsystems tend to have multiple levels of maintainers, with the
+lower-level maintainers sending pull requests to the higher levels.  Acting
+on such a pull request will almost certainly generate a merge commit; that
+is as it should be.  In fact, subsystem maintainers may want to use
+the --no-ff flag to force the addition of a merge commit in the rare cases
+where one would not normally be created so that the reasons for the merge
+can be recorded.  The changelog for the merge should, for any kind of
+merge, say *why* the merge is being done.  For a lower-level tree, "why" is
+usually a summary of the changes that will come with that pull.
+
+Maintainers at all levels should be using signed tags on their pull
+requests, and upstream maintainers should verify the tags when pulling
+branches.  Failure to do so threatens the security of the development
+process as a whole.
+
+As per the rules outlined above, once you have merged somebody else's
+history into your tree, you cannot rebase that branch, even if you
+otherwise would be able to.
+
+Merging from sibling or upstream trees
+--------------------------------------
+
+While merges from downstream are common and unremarkable, merges from other
+trees tend to be a red flag when it comes time to push a branch upstream.
+Such merges need to be carefully thought about and well justified, or
+there's a good chance that a subsequent pull request will be rejected.
+
+It is natural to want to merge the master branch into a repository; this
+type of merge is often called a "back merge".  Back merges can help to make
+sure that there are no conflicts with parallel development and generally
+gives a warm, fuzzy feeling of being up-to-date.  But this temptation
+should be avoided almost all of the time.
+
+Why is that?  Back merges will muddy the development history of your own
+branch.  They will significantly increase your chances of encountering bugs
+from elsewhere in the community and make it hard to ensure that the work
+you are managing is stable and ready for upstream.  Frequent merges can
+also obscure problems with the development process in your tree; they can
+hide interactions with other trees that should not be happening (often) in
+a well-managed branch.
+
+That said, back merges are occasionally required; when that happens, be
+sure to document *why* it was required in the commit message.  As always,
+merge to a well-known stable point, rather than to some random commit.
+Even then, you should not back merge a tree above your immediate upstream
+tree; if a higher-level back merge is really required, the upstream tree
+should do it first.
+
+One of the most frequent causes of merge-related trouble is when a
+maintainer merges with the upstream in order to resolve merge conflicts
+before sending a pull request.  Again, this temptation is easy enough to
+understand, but it should absolutely be avoided.  This is especially true
+for the final pull request: Linus is adamant that he would much rather see
+merge conflicts than unnecessary back merges.  Seeing the conflicts lets
+him know where potential problem areas are.  He does a lot of merges (382
+in the 5.1 development cycle) and has gotten quite good at conflict
+resolution - often better than the developers involved.
+
+So what should a maintainer do when there is a conflict between their
+subsystem branch and the mainline?  The most important step is to warn
+Linus in the pull request that the conflict will happen; if nothing else,
+that demonstrates an awareness of how your branch fits into the whole.  For
+especially difficult conflicts, create and push a *separate* branch to show
+how you would resolve things.  Mention that branch in your pull request,
+but the pull request itself should be for the unmerged branch.
+
+Even in the absence of known conflicts, doing a test merge before sending a
+pull request is a good idea.  It may alert you to problems that you somehow
+didn't see from linux-next and helps to understand exactly what you are
+asking upstream to do.
+
+Another reason for doing merges of upstream or another subsystem tree is to
+resolve dependencies.  These dependency issues do happen at times, and
+sometimes a cross-merge with another tree is the best way to resolve them;
+as always, in such situations, the merge commit should explain why the
+merge has been done.  Take a moment to do it right; people will read those
+changelogs.
+
+Often, though, dependency issues indicate that a change of approach is
+needed.  Merging another subsystem tree to resolve a dependency risks
+bringing in other bugs and should almost never be done.  If that subsystem
+tree fails to be pulled upstream, whatever problems it had will block the
+merging of your tree as well.  Preferable alternatives include agreeing
+with the maintainer to carry both sets of changes in one of the trees or
+creating a topic branch dedicated to the prerequisite commits that can be
+merged into both trees.  If the dependency is related to major
+infrastructural changes, the right solution might be to hold the dependent
+commits for one development cycle so that those changes have time to
+stabilize in the mainline.
+
+Finally
+=======
+
+It is relatively common to merge with the mainline toward the beginning of
+the development cycle in order to pick up changes and fixes done elsewhere
+in the tree.  As always, such a merge should pick a well-known release
+point rather than some random spot.  If your upstream-bound branch has
+emptied entirely into the mainline during the merge window, you can pull it
+forward with a command like::
+
+  git merge v5.2-rc1^0
+
+The "^0" will cause Git to do a fast-forward merge (which should be
+possible in this situation), thus avoiding the addition of a spurious merge
+commit.
+
+The guidelines laid out above are just that: guidelines.  There will always
+be situations that call out for a different solution, and these guidelines
+should not prevent developers from doing the right thing when the need
+arises.  But one should always think about whether the need has truly
+arisen and be prepared to explain why something abnormal needs to be done. 
-- 
cgit 


From 76e3bcdb61cad2c58ad88b2e4c70c69769026d37 Mon Sep 17 00:00:00 2001
From: Dennis Restle <derestle@htwg-konstanz.de>
Date: Tue, 30 Apr 2019 01:01:49 +0200
Subject: KVM: fix typo in documentation

The documentation mentions a non-existing capability KVM_CAP_USER_MEM.s
The right name is KVM_CAP_USER_MEMORY.

Signed-off-by: Dennis Restle <derestle@htwg-konstanz.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index ba6c42c576dd..f5616b441af8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1079,7 +1079,7 @@ yet and must be cleared on entry.
 
 4.35 KVM_SET_USER_MEMORY_REGION
 
-Capability: KVM_CAP_USER_MEM
+Capability: KVM_CAP_USER_MEMORY
 Architectures: all
 Type: vm ioctl
 Parameters: struct kvm_userspace_memory_region (in)
-- 
cgit 


From 4fe0676b04edc5032ffdc3fed00b670e1cfef049 Mon Sep 17 00:00:00 2001
From: Jeffrey Hugo <jeffrey.l.hugo@gmail.com>
Date: Mon, 17 Jun 2019 11:37:16 -0700
Subject: dt-bindings: qcom_spmi: Document PM8005 regulators

Document the dt bindings for the PM8005 regulators which are usually used
for VDD of standalone blocks on a SoC like the GPU.

Signed-off-by: Jeffrey Hugo <jeffrey.l.hugo@gmail.com>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
index 406f2e570c50..ba94bc2d407a 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
@@ -4,6 +4,7 @@ Qualcomm SPMI Regulators
 	Usage: required
 	Value type: <string>
 	Definition: must be one of:
+			"qcom,pm8005-regulators"
 			"qcom,pm8841-regulators"
 			"qcom,pm8916-regulators"
 			"qcom,pm8941-regulators"
@@ -120,6 +121,9 @@ The regulator node houses sub-nodes for each regulator within the device. Each
 sub-node is identified using the node's name, with valid values listed for each
 of the PMICs below.
 
+pm8005:
+	s1, s2, s3, s4
+
 pm8841:
 	s1, s2, s3, s4, s5, s6, s7, s8
 
-- 
cgit 


From bbd7992e6a32a3a52f3c26c86eba81037651c5c6 Mon Sep 17 00:00:00 2001
From: Jorge Ramirez <jorge.ramirez-ortiz@linaro.org>
Date: Mon, 17 Jun 2019 11:38:15 -0700
Subject: dt-bindings: qcom_spmi: Document pms405 support

The PMS405 supports 5 SMPS and 13 LDO regulators.

Signed-off-by: Jorge Ramirez-Ortiz <jorge.ramirez-ortiz@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jeffrey Hugo <jeffrey.l.hugo@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/qcom,spmi-regulator.txt         | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
index ba94bc2d407a..430b8622bda1 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
@@ -10,6 +10,7 @@ Qualcomm SPMI Regulators
 			"qcom,pm8941-regulators"
 			"qcom,pm8994-regulators"
 			"qcom,pmi8994-regulators"
+			"qcom,pms405-regulators"
 
 - interrupts:
 	Usage: optional
@@ -111,6 +112,23 @@ Qualcomm SPMI Regulators
 	Definition: Reference to regulator supplying the input pin, as
 		    described in the data sheet.
 
+- vdd_l1_l2-supply:
+- vdd_l3_l8-supply:
+- vdd_l4-supply:
+- vdd_l5_l6-supply:
+- vdd_l10_l11_l12_l13-supply:
+- vdd_l7-supply:
+- vdd_l9-supply:
+- vdd_s1-supply:
+- vdd_s2-supply:
+- vdd_s3-supply:
+- vdd_s4-supply:
+- vdd_s5-supply
+	Usage: optional (pms405 only)
+	Value type: <phandle>
+	Definition: Reference to regulator supplying the input pin, as
+		    described in the data sheet.
+
 - qcom,saw-reg:
 	Usage: optional
 	Value type: <phandle>
-- 
cgit 


From 590714e5a3704c7db21a4f82932d3f2699edb35c Mon Sep 17 00:00:00 2001
From: Jeffrey Hugo <jeffrey.l.hugo@gmail.com>
Date: Thu, 30 May 2019 09:00:23 -0700
Subject: dt-bindings: msm/dsi: Add 10nm phy for msm8998 compatible

The DSI phy on MSM8998 is a 10nm design like SDM845, however it has some
slightly different quirks which need to be handled by drivers.  Provide
a separate compatible to assist in handling the specifics.

Signed-off-by: Jeffrey Hugo <jeffrey.l.hugo@gmail.com>
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 Documentation/devicetree/bindings/display/msm/dsi.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/display/msm/dsi.txt b/Documentation/devicetree/bindings/display/msm/dsi.txt
index 9ae946942720..af95586c898f 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi.txt
+++ b/Documentation/devicetree/bindings/display/msm/dsi.txt
@@ -88,6 +88,7 @@ Required properties:
   * "qcom,dsi-phy-28nm-8960"
   * "qcom,dsi-phy-14nm"
   * "qcom,dsi-phy-10nm"
+  * "qcom,dsi-phy-10nm-8998"
 - reg: Physical base address and length of the registers of PLL, PHY. Some
   revisions require the PHY regulator base address, whereas others require the
   PHY lane base address. See below for each PHY revision.
-- 
cgit 


From 4e3ea141b5cb79ded7f4a67bb32e5b23a06a784a Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.com>
Date: Mon, 6 May 2019 08:19:15 +0200
Subject: scsi: osst: kill obsolete driver

The osst driver is becoming obsolete, as the manufacturer went out of
business ages ago, and the maintainer has no means of testing any
improvements anymore.  Plus these days flash drives are cheaper and offer a
higher capacity.  So drop it completely.

Cc: Willem Riede <osst@riede.org>
Signed-off-by: Hannes Reinece <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 Documentation/scsi/osst.txt | 218 --------------------------------------------
 1 file changed, 218 deletions(-)
 delete mode 100644 Documentation/scsi/osst.txt

(limited to 'Documentation')

diff --git a/Documentation/scsi/osst.txt b/Documentation/scsi/osst.txt
deleted file mode 100644
index 00c8ebb2fd18..000000000000
--- a/Documentation/scsi/osst.txt
+++ /dev/null
@@ -1,218 +0,0 @@
-README file for the osst driver
-===============================
-(w) Kurt Garloff <garloff@suse.de> 12/2000
-
-This file describes the osst driver as of version 0.8.x/0.9.x, the released
-version of the osst driver.
-It is intended to help advanced users to understand the role of osst and to
-get them started using (and maybe debugging) it.
-It won't address issues like "How do I compile a kernel?" or "How do I load
-a module?", as these are too basic.
-Once the OnStream got merged into the official kernel, the distro makers
-will provide the OnStream support for those who are not familiar with
-hacking their kernels.
-
-
-Purpose
--------
-The osst driver was developed, because the standard SCSI tape driver in
-Linux, st, does not support the OnStream SC-x0 SCSI tape. The st is not to
-blame for that, as the OnStream tape drives do not support the standard SCSI
-command set for Serial Access Storage Devices (SASDs), which basically
-corresponds to the QIC-157 spec.
-Nevertheless, the OnStream tapes are nice pieces of hardware and therefore
-the osst driver has been written to make these tape devs supported by Linux.
-The driver is free software. It's released under the GNU GPL and planned to
-be integrated into the mainstream kernel.
-
-
-Implementation
---------------
-The osst is a new high-level SCSI driver, just like st, sr, sd and sg. It
-can be compiled into the kernel or loaded as a module.
-As it represents a new device, it got assigned a new device node: /dev/osstX
-are character devices with major no 206 and minor numbers like the /dev/stX
-devices. If those are not present, you may create them by calling
-Makedevs.sh as root (see below).
-The driver started being a copy of st and as such, the osst devices'
-behavior looks very much the same as st to the userspace applications.
-
-
-History
--------
-In the first place, osst shared its identity very much with st. That meant
-that it used the same kernel structures and the same device node as st.
-So you could only have either of them being present in the kernel. This has
-been fixed by registering an own device, now.
-st and osst can coexist, each only accessing the devices it can support by
-themselves.
-
-
-Installation
-------------
-osst got integrated into the linux kernel. Select it during kernel
-configuration as module or compile statically into the kernel.
-Compile your kernel and install the modules.
-
-Now, your osst driver is inside the kernel or available as a module,
-depending on your choice during kernel config. You may still need to create
-the device nodes by calling the Makedevs.sh script (see below) manually.
-
-To load your module, you may use the command 
-modprobe osst
-as root. dmesg should show you, whether your OnStream tapes have been
-recognized.
-
-If you want to have the module autoloaded on access to /dev/osst, you may
-add something like
-alias char-major-206 osst
-to a file under /etc/modprobe.d/ directory.
-
-You may find it convenient to create a symbolic link 
-ln -s nosst0 /dev/tape
-to make programs assuming a default name of /dev/tape more convenient to
-use.
-
-The device nodes for osst have to be created. Use the Makedevs.sh script
-attached to this file.
-
-
-Using it
---------
-You may use the OnStream tape driver with your standard backup software,
-which may be tar, cpio, amanda, arkeia, BRU, Lone Tar, ...
-by specifying /dev/(n)osst0 as the tape device to use or using the above
-symlink trick. The IOCTLs to control tape operation are also mostly
-supported and you may try the mt (or mt_st) program to jump between
-filemarks, eject the tape, ...
-
-There's one limitation: You need to use a block size of 32kB.
-
-(This limitation is worked on and will be fixed in version 0.8.8 of
- this driver.)
-
-If you just want to get started with standard software, here is an example
-for creating and restoring a full backup:
-# Backup
-tar cvf - / --exclude /proc | buffer -s 32k -m 24M -B -t -o /dev/nosst0
-# Restore
-buffer -s 32k -m 8M -B -t -i /dev/osst0 | tar xvf - -C /
-
-The buffer command has been used to buffer the data before it goes to the
-tape (or the file system) in order to smooth out the data stream and prevent
-the tape from needing to stop and rewind. The OnStream does have an internal
-buffer and a variable speed which help this, but especially on writing, the
-buffering still proves useful in most cases. It also pads the data to
-guarantees the block size of 32k. (Otherwise you may pass the -b64 option to
-tar.)
-Expect something like 1.8MB/s for the SC-x0 drives and 0.9MB/s for the DI-30.
-The USB drive will give you about 0.7MB/s.
-On a fast machine, you may profit from software data compression (z flag for
-tar).
-
-
-USB and IDE
------------
-Via the SCSI emulation layers usb-storage and ide-scsi, you can also use the
-osst driver to drive the USB-30 and the DI-30 drives. (Unfortunately, there
-is no such layer for the parallel port, otherwise the DP-30 would work as
-well.) For the USB support, you need the latest 2.4.0-test kernels and the 
-latest usb-storage driver from 
-http://www.linux-usb.org/
-http://sourceforge.net/cvs/?group_id=3581
-
-Note that the ide-tape driver as of 1.16f uses a slightly outdated on-tape
-format and therefore is not completely interoperable with osst tapes.
-
-The ADR-x0 line is fully SCSI-2 compliant and is supported by st, not osst.
-The on-tape format is supposed to be compatible with the one used by osst.
-
-
-Feedback and updates
---------------------
-The driver development is coordinated through a mailing list
-<osst@linux1.onstream.nl>
-a CVS repository and some web pages. 
-The tester's pages which contain recent news and updated drivers to download
-can be found on
-http://sourceforge.net/projects/osst/
-
-If you find any problems, please have a look at the tester's page in order
-to see whether the problem is already known and solved. Otherwise, please
-report it to the mailing list. Your feedback is welcome. (This holds also
-for reports of successful usage, of course.) 
-In case of trouble, please do always provide the following info:
-* driver and kernel version used (see syslog)
-* driver messages (syslog)
-* SCSI config and OnStream Firmware (/proc/scsi/scsi)
-* description of error. Is it reproducible?
-* software and commands used
-
-You may subscribe to the mailing list, BTW, it's a majordomo list.
-
-
-Status
-------
-0.8.0 was the first widespread BETA release. Since then a lot of reports
-have been sent, but mostly reported success or only minor trouble.
-All the issues have been addressed.
-Check the web pages for more info about the current developments.
-0.9.x is the tree for the 2.3/2.4 kernel.
-
-
-Acknowledgments
-----------------
-The driver has been started by making a copy of Kai Makisara's st driver.
-Most of the development has been done by Willem Riede. The presence of the
-userspace program osg (onstreamsg) from Terry Hardie has been rather
-helpful. The same holds for Gadi Oxman's ide-tape support for the DI-30.
-I did add some patches to those drivers as well and coordinated things a
-little bit. 
-Note that most of them did mostly spend their spare time for the creation of
-this driver.
-The people from OnStream, especially Jack Bombeeck did support this project
-and always tried to answer HW or FW related questions. Furthermore, he
-pushed the FW developers to do the right things.
-SuSE did support this project by allowing me to work on it during my working
-time for them and by integrating the driver into their distro.
-
-More people did help by sending useful comments. Sorry to those who have
-been forgotten. Thanks to all the GNU/FSF and Linux developers who made this
-platform such an interesting, nice and stable platform.
-Thanks go to those who tested the drivers and did send useful reports. Your
-help is needed!
-
-
-Makedevs.sh
------------
-#!/bin/sh
-# Script to create OnStream SC-x0 device nodes (major 206)
-# Usage: Makedevs.sh [nos [path to dev]]
-# $Id: README.osst.kernel,v 1.4 2000/12/20 14:13:15 garloff Exp $
-major=206
-nrs=4
-dir=/dev
-test -z "$1" || nrs=$1
-test -z "$2" || dir=$2
-declare -i nr
-nr=0
-test -d $dir || mkdir -p $dir
-while test $nr -lt $nrs; do
-  mknod $dir/osst$nr c $major $nr
-  chown 0.disk $dir/osst$nr; chmod 660 $dir/osst$nr;
-  mknod $dir/nosst$nr c $major $[nr+128]
-  chown 0.disk $dir/nosst$nr; chmod 660 $dir/nosst$nr;
-  mknod $dir/osst${nr}l c $major $[nr+32]
-  chown 0.disk $dir/osst${nr}l; chmod 660 $dir/osst${nr}l;
-  mknod $dir/nosst${nr}l c $major $[nr+160]
-  chown 0.disk $dir/nosst${nr}l; chmod 660 $dir/nosst${nr}l;
-  mknod $dir/osst${nr}m c $major $[nr+64]
-  chown 0.disk $dir/osst${nr}m; chmod 660 $dir/osst${nr}m;
-  mknod $dir/nosst${nr}m c $major $[nr+192]
-  chown 0.disk $dir/nosst${nr}m; chmod 660 $dir/nosst${nr}m;
-  mknod $dir/osst${nr}a c $major $[nr+96]
-  chown 0.disk $dir/osst${nr}a; chmod 660 $dir/osst${nr}a;
-  mknod $dir/nosst${nr}a c $major $[nr+224]
-  chown 0.disk $dir/nosst${nr}a; chmod 660 $dir/nosst${nr}a;
-  let nr+=1
-done
-- 
cgit 


From d4993e19da6e7eb18eeb509aeebb6796650a3c1f Mon Sep 17 00:00:00 2001
From: Yash Shah <yash.shah@sifive.com>
Date: Tue, 18 Jun 2019 13:26:07 +0530
Subject: macb: bindings doc: add sifive fu540-c000 binding

Add the compatibility string documentation for SiFive FU540-C0000
interface.
On the FU540, this driver also needs to read and write registers in a
management IP block that monitors or drives boundary signals for the
GEMGXL IP block that are not directly mapped to GEMGXL registers.
Therefore, add additional range to "reg" property for SiFive GEMGXL
management IP registers.

Signed-off-by: Yash Shah <yash.shah@sifive.com>
Reviewed-by: Paul Walmsley <paul.walmsley@sifive.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/macb.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
index 9c5e94482b5f..63c73fafe26d 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -15,8 +15,11 @@ Required properties:
   Use "atmel,sama5d4-gem" for the GEM IP (10/100) available on Atmel sama5d4 SoCs.
   Use "cdns,zynq-gem" Xilinx Zynq-7xxx SoC.
   Use "cdns,zynqmp-gem" for Zynq Ultrascale+ MPSoC.
+  Use "sifive,fu540-macb" for SiFive FU540-C000 SoC.
   Or the generic form: "cdns,emac".
 - reg: Address and length of the register set for the device
+	For "sifive,fu540-macb", second range is required to specify the
+	address and length of the registers for GEMGXL Management block.
 - interrupts: Should contain macb interrupt
 - phy-mode: See ethernet.txt file in the same directory.
 - clock-names: Tuple listing input clock names.
-- 
cgit 


From 7c42f43c29e2613536f34a39bf42df6fb838989a Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Wed, 22 May 2019 11:19:16 -0500
Subject: dt-bindings: arm: ti: Add bindings for J721E SoC

The J721E SoC belongs to the K3 Multicore SoC architecture platform,
providing advanced system integration to enable lower system costs
of automotive applications such as infotainment, cluster, premium
Audio, Gateway, industrial and a range of broad market applications.
This SoC is designed around reducing the system cost by eliminating
the need of an external system MCU and is targeted towards ASIL-B/C
certification/requirements in addition to allowing complex software
and system use-cases.

Some highlights of this SoC are:
* Dual Cortex-A72s in a single cluster, three clusters of lockstep
  capable dual Cortex-R5F MCUs, Deep-learning Matrix Multiply Accelerator(MMA),
  C7x floating point Vector DSP, Two C66x floating point DSPs.
* 3D GPU PowerVR Rogue 8XE GE8430
* Vision Processing Accelerator (VPAC) with image signal processor and Depth
  and Motion Processing Accelerator (DMPAC)
* Two Gigabit Industrial Communication Subsystems (ICSSG), each with dual
  PRUs and dual RTUs
* Two CSI2.0 4L RX plus one CSI2.0 4L TX, one eDP/DP, One DSI Tx, and
  up to two DPI interfaces.
* Integrated Ethernet switch supporting up to a total of 8 external ports in
  addition to legacy Ethernet switch of up to 2 ports.
* System MMU (SMMU) Version 3.0 and advanced virtualisation
  capabilities.
* Upto 4 PCIe-GEN3 controllers, 2 USB3.0 Dual-role device subsystems,
  16 MCANs, 12 McASP, eMMC and SD, UFS, OSPI/HyperBus memory controller, QSPI,
  I3C and I2C, eCAP/eQEP, eHRPWM, MLB among other peripherals.
* Two hardware accelerator block containing AES/DES/SHA/MD5 called SA2UL
  management.
* Configurable L3 Cache and IO-coherent architecture with high data throughput
  capable distributed DMA architecture under NAVSS
* Centralized System Controller for Security, Power, and Resource
  Management (DMSC)

See J721E Technical Reference Manual (SPRUIL1, May 2019)
for further details: http://www.ti.com/lit/pdf/spruil1

Signed-off-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 Documentation/devicetree/bindings/arm/ti/k3.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/ti/k3.txt b/Documentation/devicetree/bindings/arm/ti/k3.txt
index 6a059cabb2da..333e7256126a 100644
--- a/Documentation/devicetree/bindings/arm/ti/k3.txt
+++ b/Documentation/devicetree/bindings/arm/ti/k3.txt
@@ -13,6 +13,9 @@ architecture it uses, using one of the following compatible values:
 - AM654
   compatible = "ti,am654";
 
+- J721E
+  compatible = "ti,j721e";
+
 Boards
 ------
 
-- 
cgit 


From e28c6d941dacbdd0d6fdda2dfe009fb72f6b1f8f Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Wed, 22 May 2019 11:19:17 -0500
Subject: dt-bindings: serial: 8250_omap: Add compatible for J721E UART
 controller

J721e uses a UART controller that is compatible with AM654 UART.
Introduce a specific compatible to help handle the differences if
necessary.

Cc: Sekhar Nori <nsekhar@ti.com>
Cc: Vignesh R <vigneshr@ti.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 Documentation/devicetree/bindings/serial/omap_serial.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/serial/omap_serial.txt b/Documentation/devicetree/bindings/serial/omap_serial.txt
index 0a9b5444f4e6..dcba86b0a0d0 100644
--- a/Documentation/devicetree/bindings/serial/omap_serial.txt
+++ b/Documentation/devicetree/bindings/serial/omap_serial.txt
@@ -1,6 +1,7 @@
 OMAP UART controller
 
 Required properties:
+- compatible : should be "ti,j721e-uart", "ti,am654-uart" for J721E controllers
 - compatible : should be "ti,am654-uart" for AM654 controllers
 - compatible : should be "ti,omap2-uart" for OMAP2 controllers
 - compatible : should be "ti,omap3-uart" for OMAP3 controllers
-- 
cgit 


From d909f9109c301f4e9e41678025c45d719ab8f7d7 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 10 Jun 2019 13:08:18 +1000
Subject: powerpc/64s/radix: Enable HAVE_ARCH_HUGE_VMAP

This sets the HAVE_ARCH_HUGE_VMAP option, and defines the required
page table functions.

This enables huge (2MB and 1GB) ioremap mappings. I don't have a
benchmark for this change, but huge vmap will be used by a later core
kernel change to enable huge vmalloc memory mappings. This improves
cached `git diff` performance by about 5% on a 2-node POWER9 with 32MB
size dentry cache hash.

  Profiling git diff dTLB misses with a vanilla kernel:

  81.75%  git      [kernel.vmlinux]    [k] __d_lookup_rcu
   7.21%  git      [kernel.vmlinux]    [k] strncpy_from_user
   1.77%  git      [kernel.vmlinux]    [k] find_get_entry
   1.59%  git      [kernel.vmlinux]    [k] kmem_cache_free

            40,168      dTLB-miss
       0.100342754 seconds time elapsed

  With powerpc huge vmalloc:

             2,987      dTLB-miss
       0.095933138 seconds time elapsed

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/admin-guide/kernel-parameters.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..a4c3538857e9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2927,7 +2927,7 @@
 			register save and restore. The kernel will only save
 			legacy floating-point registers on task switch.
 
-	nohugeiomap	[KNL,x86] Disable kernel huge I/O mappings.
+	nohugeiomap	[KNL,x86,PPC] Disable kernel huge I/O mappings.
 
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.
-- 
cgit 


From 6af386db05635db7cc3fac4d1d38a83b953f7251 Mon Sep 17 00:00:00 2001
From: "Lowry Li (Arm Technology China)" <Lowry.Li@arm.com>
Date: Thu, 6 Jun 2019 10:53:10 +0100
Subject: dt/bindings: drm/komeda: Adds SMMU support for D71 devicetree

Updates the device-tree doc about how to enable SMMU by devicetree.

Signed-off-by: Lowry Li (Arm Technology China) <lowry.li@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: James Qian Wang (Arm Technology China) <james.qian.wang@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
---
 Documentation/devicetree/bindings/display/arm,komeda.txt | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/display/arm,komeda.txt b/Documentation/devicetree/bindings/display/arm,komeda.txt
index 02b226532ebd..b12c0453a421 100644
--- a/Documentation/devicetree/bindings/display/arm,komeda.txt
+++ b/Documentation/devicetree/bindings/display/arm,komeda.txt
@@ -11,6 +11,10 @@ Required properties:
       - "pclk": for the APB interface clock
 - #address-cells: Must be 1
 - #size-cells: Must be 0
+- iommus: configure the stream id to IOMMU, Must be configured if want to
+    enable iommu in display. for how to configure this node please reference
+        devicetree/bindings/iommu/arm,smmu-v3.txt,
+        devicetree/bindings/iommu/iommu.txt
 
 Required properties for sub-node: pipeline@nq
 Each device contains one or two pipeline sub-nodes (at least one), each
@@ -44,6 +48,9 @@ Example:
 		interrupts = <0 168 4>;
 		clocks = <&dpu_mclk>, <&dpu_aclk>;
 		clock-names = "mclk", "pclk";
+		iommus = <&smmu 0>, <&smmu 1>, <&smmu 2>, <&smmu 3>,
+			<&smmu 4>, <&smmu 5>, <&smmu 6>, <&smmu 7>,
+			<&smmu 8>, <&smmu 9>;
 
 		dp0_pipe0: pipeline@0 {
 			clocks = <&fpgaosc2>, <&dpu_aclk>;
-- 
cgit 


From 619053220a7d5620d26d60d1848ca9c681bea830 Mon Sep 17 00:00:00 2001
From: "james qian wang (Arm Technology China)" <james.qian.wang@arm.com>
Date: Wed, 5 Jun 2019 11:35:45 +0100
Subject: dt/bindings: drm/komeda: Unify mclk/pclk/pipeline->aclk to one ACLK

Current komeda driver uses three dedicated clks for a specific purpose:
- mclk: main engine clock
- pclk: APB clock
- pipeline->aclk: AXI clock.

But per spec the komeda HW only has three input clks:
- ACLK: used for AXI masters, APB slave and most pipeline processing
- PXCLK for pipeline 0: output pixel clock for pipeline 0
- PXCLK for pipeline 1: output pixel clock for pipeline 1

So one ACLK is enough, no need to split it to three mclk/pclk/axiclk.

Signed-off-by: James Qian Wang (Arm Technology China) <james.qian.wang@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
---
 Documentation/devicetree/bindings/display/arm,komeda.txt | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/display/arm,komeda.txt b/Documentation/devicetree/bindings/display/arm,komeda.txt
index b12c0453a421..8513695ee47f 100644
--- a/Documentation/devicetree/bindings/display/arm,komeda.txt
+++ b/Documentation/devicetree/bindings/display/arm,komeda.txt
@@ -7,8 +7,7 @@ Required properties:
 - clocks: A list of phandle + clock-specifier pairs, one for each entry
     in 'clock-names'
 - clock-names: A list of clock names. It should contain:
-      - "mclk": for the main processor clock
-      - "pclk": for the APB interface clock
+      - "aclk": for the main processor clock
 - #address-cells: Must be 1
 - #size-cells: Must be 0
 - iommus: configure the stream id to IOMMU, Must be configured if want to
@@ -24,7 +23,6 @@ pipeline node should provide properties:
     in 'clock-names'
 - clock-names: should contain:
       - "pxclk": pixel clock
-      - "aclk": AXI interface clock
 
 - port: each pipeline connect to an encoder input port. The connection is
     modeled using the OF graph bindings specified in
@@ -46,15 +44,15 @@ Example:
 		compatible = "arm,mali-d71";
 		reg = <0xc00000 0x20000>;
 		interrupts = <0 168 4>;
-		clocks = <&dpu_mclk>, <&dpu_aclk>;
-		clock-names = "mclk", "pclk";
+		clocks = <&dpu_aclk>;
+		clock-names = "aclk";
 		iommus = <&smmu 0>, <&smmu 1>, <&smmu 2>, <&smmu 3>,
 			<&smmu 4>, <&smmu 5>, <&smmu 6>, <&smmu 7>,
 			<&smmu 8>, <&smmu 9>;
 
 		dp0_pipe0: pipeline@0 {
-			clocks = <&fpgaosc2>, <&dpu_aclk>;
-			clock-names = "pxclk", "aclk";
+			clocks = <&fpgaosc2>;
+			clock-names = "pxclk";
 			reg = <0>;
 
 			port {
@@ -65,8 +63,8 @@ Example:
 		};
 
 		dp0_pipe1: pipeline@1 {
-			clocks = <&fpgaosc2>, <&dpu_aclk>;
-			clock-names = "pxclk", "aclk";
+			clocks = <&fpgaosc2>;
+			clock-names = "pxclk";
 			reg = <1>;
 
 			port {
-- 
cgit 


From 74a22e8f4350a9e096c84fc9e88cf72abf71887c Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Date: Tue, 18 Jun 2019 16:18:37 +0100
Subject: dt-bindings: display: renesas: Add r8a774a1 support

Document RZ/G2M (R8A774A1) SoC bindings.

Signed-off-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt b/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
index a41d280c3f9f..db680413e89c 100644
--- a/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
+++ b/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
@@ -12,10 +12,12 @@ following device-specific properties.
 Required properties:
 
 - compatible : Shall contain one or more of
+  - "renesas,r8a774a1-hdmi" for R8A774A1 (RZ/G2M) compatible HDMI TX
   - "renesas,r8a7795-hdmi" for R8A7795 (R-Car H3) compatible HDMI TX
   - "renesas,r8a7796-hdmi" for R8A7796 (R-Car M3-W) compatible HDMI TX
   - "renesas,r8a77965-hdmi" for R8A77965 (R-Car M3-N) compatible HDMI TX
-  - "renesas,rcar-gen3-hdmi" for the generic R-Car Gen3 compatible HDMI TX
+  - "renesas,rcar-gen3-hdmi" for the generic R-Car Gen3 and RZ/G2 compatible
+			     HDMI TX
 
     When compatible with generic versions, nodes must list the SoC-specific
     version corresponding to the platform first, followed by the
-- 
cgit 


From 6ca00dfafda731d6eafdc164326e7336cdf42d74 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Sun, 16 Jun 2019 15:03:10 +0300
Subject: KVM: x86: Modify struct kvm_nested_state to have explicit fields for
 data

Improve the KVM_{GET,SET}_NESTED_STATE structs by detailing the format
of VMX nested state data in a struct.

In order to avoid changing the ioctl values of
KVM_{GET,SET}_NESTED_STATE, there is a need to preserve
sizeof(struct kvm_nested_state). This is done by defining the data
struct as "data.vmx[0]". It was the most elegant way I found to
preserve struct size while still keeping struct readable and easy to
maintain. It does have a misfortunate side-effect that now it has to be
accessed as "data.vmx[0]" rather than just "data.vmx".

Because we are already modifying these structs, I also modified the
following:
* Define the "format" field values as macros.
* Rename vmcs_pa to vmcs12_pa for better readability.

Signed-off-by: Liran Alon <liran.alon@oracle.com>
[Remove SVM stubs, add KVM_STATE_NESTED_VMX_VMCS12_SIZE. - Paolo]
Reviewed-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 46 ++++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 15 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index f5616b441af8..2a4531bb06bd 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3857,43 +3857,59 @@ Type: vcpu ioctl
 Parameters: struct kvm_nested_state (in/out)
 Returns: 0 on success, -1 on error
 Errors:
-  E2BIG:     the total state size (including the fixed-size part of struct
-             kvm_nested_state) exceeds the value of 'size' specified by
+  E2BIG:     the total state size exceeds the value of 'size' specified by
              the user; the size required will be written into size.
 
 struct kvm_nested_state {
 	__u16 flags;
 	__u16 format;
 	__u32 size;
+
 	union {
-		struct kvm_vmx_nested_state vmx;
-		struct kvm_svm_nested_state svm;
+		struct kvm_vmx_nested_state_hdr vmx;
+		struct kvm_svm_nested_state_hdr svm;
+
+		/* Pad the header to 128 bytes.  */
 		__u8 pad[120];
-	};
-	__u8 data[0];
+	} hdr;
+
+	union {
+		struct kvm_vmx_nested_state_data vmx[0];
+		struct kvm_svm_nested_state_data svm[0];
+	} data;
 };
 
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
+#define KVM_STATE_NESTED_EVMCS		0x00000004
 
-#define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
-#define KVM_STATE_NESTED_SMM_VMXON	0x00000002
+#define KVM_STATE_NESTED_FORMAT_VMX		0
+#define KVM_STATE_NESTED_FORMAT_SVM		1
 
-struct kvm_vmx_nested_state {
+#define KVM_STATE_NESTED_VMX_VMCS_SIZE		0x1000
+
+#define KVM_STATE_NESTED_VMX_SMM_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_VMX_SMM_VMXON		0x00000002
+
+struct kvm_vmx_nested_state_hdr {
 	__u64 vmxon_pa;
-	__u64 vmcs_pa;
+	__u64 vmcs12_pa;
 
 	struct {
 		__u16 flags;
 	} smm;
 };
 
+struct kvm_vmx_nested_state_data {
+	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+};
+
 This ioctl copies the vcpu's nested virtualization state from the kernel to
 userspace.
 
-The maximum size of the state, including the fixed-size part of struct
-kvm_nested_state, can be retrieved by passing KVM_CAP_NESTED_STATE to
-the KVM_CHECK_EXTENSION ioctl().
+The maximum size of the state can be retrieved by passing KVM_CAP_NESTED_STATE
+to the KVM_CHECK_EXTENSION ioctl().
 
 4.115 KVM_SET_NESTED_STATE
 
@@ -3903,8 +3919,8 @@ Type: vcpu ioctl
 Parameters: struct kvm_nested_state (in)
 Returns: 0 on success, -1 on error
 
-This copies the vcpu's kvm_nested_state struct from userspace to the kernel.  For
-the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
+This copies the vcpu's kvm_nested_state struct from userspace to the kernel.
+For the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
 
 4.116 KVM_(UN)REGISTER_COALESCED_MMIO
 
-- 
cgit 


From 504b69eb3c95180bc59f1ae9096ad4b10bbbf254 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 19 Jun 2019 16:10:15 +0100
Subject: keys: Fix request_key() lack of Link perm check on found key

The request_key() syscall allows a process to gain access to the 'possessor'
permits of any key that grants it Search permission by virtue of request_key()
not checking whether a key it finds grants Link permission to the caller.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/security/keys/core.rst | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index 823d29bf44f7..82dd457ff78d 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -433,6 +433,10 @@ The main syscalls are:
      /sbin/request-key will be invoked in an attempt to obtain a key. The
      callout_info string will be passed as an argument to the program.
 
+     To link a key into the destination keyring the key must grant link
+     permission on the key to the caller and the keyring must grant write
+     permission.
+
      See also Documentation/security/keys/request-key.rst.
 
 
-- 
cgit 


From e59428f721ee096d8a020504ea908a6f0d952735 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 19 Jun 2019 16:10:15 +0100
Subject: keys: Move the RCU locks outwards from the keyring search functions

Move the RCU locks outwards from the keyring search functions so that it
will become possible to provide an RCU-capable partial request_key()
function in a later commit.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/security/keys/request-key.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/security/keys/request-key.rst b/Documentation/security/keys/request-key.rst
index 600ad67d1707..07af991463b5 100644
--- a/Documentation/security/keys/request-key.rst
+++ b/Documentation/security/keys/request-key.rst
@@ -148,7 +148,7 @@ The Search Algorithm
 
 A search of any particular keyring proceeds in the following fashion:
 
-  1) When the key management code searches for a key (keyring_search_aux) it
+  1) When the key management code searches for a key (keyring_search_rcu) it
      firstly calls key_permission(SEARCH) on the keyring it's starting with,
      if this denies permission, it doesn't search further.
 
-- 
cgit 


From 896f1950e5944532b971d880a6bae7fba3b6a8d3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 19 Jun 2019 16:10:15 +0100
Subject: keys: Provide request_key_rcu()

Provide a request_key_rcu() function that can be used to request a key
under RCU conditions.  It can only search and check permissions; it cannot
allocate a new key, upcall or wait for an upcall to complete.  It may
return a partially constructed key.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/security/keys/core.rst        | 10 ++++++++++
 Documentation/security/keys/request-key.rst |  9 +++++++++
 2 files changed, 19 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index 82dd457ff78d..003f1452a5b7 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -1147,6 +1147,16 @@ payload contents" for more information.
     case error ERESTARTSYS will be returned.
 
 
+ *  To search for a key under RCU conditions, call::
+
+	struct key *request_key_rcu(const struct key_type *type,
+				    const char *description);
+
+    which is similar to request_key() except that it does not check for keys
+    that are under construction and it will not call out to userspace to
+    construct a key if it can't find a match.
+
+
  *  When it is no longer required, the key should be released using::
 
 	void key_put(struct key *key);
diff --git a/Documentation/security/keys/request-key.rst b/Documentation/security/keys/request-key.rst
index 07af991463b5..7caedc4d29f1 100644
--- a/Documentation/security/keys/request-key.rst
+++ b/Documentation/security/keys/request-key.rst
@@ -36,6 +36,11 @@ or::
 					     	   size_t callout_len,
 						   void *aux);
 
+or::
+
+	struct key *request_key_rcu(const struct key_type *type,
+				    const char *description);
+
 Or by userspace invoking the request_key system call::
 
 	key_serial_t request_key(const char *type,
@@ -57,6 +62,10 @@ The two async in-kernel calls may return keys that are still in the process of
 being constructed.  The two non-async ones will wait for construction to
 complete first.
 
+The request_key_rcu() call is like the in-kernel request_key() call, except
+that it doesn't check for keys that are under construction and doesn't attempt
+to construct missing keys.
+
 The userspace interface links the key to a keyring associated with the process
 to prevent the key from going away, and returns the serial number of the key to
 the caller.
-- 
cgit 


From 7743c48e54ee9be9c799cbf3b8e3e9f2b8d19e72 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 19 Jun 2019 16:10:15 +0100
Subject: keys: Cache result of request_key*() temporarily in task_struct

If a filesystem uses keys to hold authentication tokens, then it needs a
token for each VFS operation that might perform an authentication check -
either by passing it to the server, or using to perform a check based on
authentication data cached locally.

For open files this isn't a problem, since the key should be cached in the
file struct since it represents the subject performing operations on that
file descriptor.

During pathwalk, however, there isn't anywhere to cache the key, except
perhaps in the nameidata struct - but that isn't exposed to the
filesystems.  Further, a pathwalk can incur a lot of operations, calling
one or more of the following, for instance:

	->lookup()
	->permission()
	->d_revalidate()
	->d_automount()
	->get_acl()
	->getxattr()

on each dentry/inode it encounters - and each one may need to call
request_key().  And then, at the end of pathwalk, it will call the actual
operation:

	->mkdir()
	->mknod()
	->getattr()
	->open()
	...

which may need to go and get the token again.

However, it is very likely that all of the operations on a single
dentry/inode - and quite possibly a sequence of them - will all want to use
the same authentication token, which suggests that caching it would be a
good idea.

To this end:

 (1) Make it so that a positive result of request_key() and co. that didn't
     require upcalling to userspace is cached temporarily in task_struct.

 (2) The cache is 1 deep, so a new result displaces the old one.

 (3) The key is released by exit and by notify-resume.

 (4) The cache is cleared in a newly forked process.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/security/keys/request-key.rst | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/security/keys/request-key.rst b/Documentation/security/keys/request-key.rst
index 7caedc4d29f1..45049abdf290 100644
--- a/Documentation/security/keys/request-key.rst
+++ b/Documentation/security/keys/request-key.rst
@@ -176,6 +176,9 @@ The process stops immediately a valid key is found with permission granted to
 use it.  Any error from a previous match attempt is discarded and the key is
 returned.
 
+When request_key() is invoked, if CONFIG_KEYS_REQUEST_CACHE=y, a per-task
+one-key cache is first checked for a match.
+
 When search_process_keyrings() is invoked, it performs the following searches
 until one succeeds:
 
@@ -195,7 +198,9 @@ until one succeeds:
       c) The calling process's session keyring is searched.
 
 The moment one succeeds, all pending errors are discarded and the found key is
-returned.
+returned.  If CONFIG_KEYS_REQUEST_CACHE=y, then that key is placed in the
+per-task cache, displacing the previous key.  The cache is cleared on exit or
+just prior to resumption of userspace.
 
 Only if all these fail does the whole thing fail with the highest priority
 error.  Note that several errors may have come from LSM.
-- 
cgit 


From 191fa92b344831e3d1b15e0bf698dfe9755a81d2 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 17 Jun 2019 14:02:40 +0200
Subject: s390/sclp: remove call home support

This feature has never been used, so remove it.

Acked-by: Vasily Gorbik <gor@linux.ibm.com>
Acked-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 Documentation/sysctl/kernel.txt | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index f0c86fbb3b48..5af8b131ccbc 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -23,7 +23,6 @@ show up in /proc/sys/kernel:
 - auto_msgmni
 - bootloader_type	     [ X86 only ]
 - bootloader_version	     [ X86 only ]
-- callhome		     [ S390 only ]
 - cap_last_cap
 - core_pattern
 - core_pipe_limit
@@ -171,21 +170,6 @@ Documentation/x86/boot.txt for additional information.
 
 ==============================================================
 
-callhome:
-
-Controls the kernel's callhome behavior in case of a kernel panic.
-
-The s390 hardware allows an operating system to send a notification
-to a service organization (callhome) in case of an operating system panic.
-
-When the value in this file is 0 (which is the default behavior)
-nothing happens in case of a kernel panic. If this value is set to "1"
-the complete kernel oops message is send to the IBM customer service
-organization in case the mainframe the Linux operating system is running
-on has a service contract with IBM.
-
-==============================================================
-
 cap_last_cap
 
 Highest valid capability of the running kernel.  Exports
-- 
cgit 


From 2966f8d440c3d2b6ef6c44093a9f9c42701a077a Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 3 May 2019 13:13:44 -0400
Subject: Documentation: atomic_t.txt: Explain ordering provided by
 smp_mb__{before,after}_atomic()

The description of smp_mb__before_atomic() and smp_mb__after_atomic()
in Documentation/atomic_t.txt is slightly terse and misleading.  It
does not clearly state which other instructions are ordered by these
barriers.

This improves the text to make the actual ordering implications clear,
and also to explain how these barriers differ from a RELEASE or
ACQUIRE ordering.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 Documentation/atomic_t.txt | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
index dca3fb0554db..b3afe69d03a1 100644
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -187,8 +187,14 @@ The barriers:
 
   smp_mb__{before,after}_atomic()
 
-only apply to the RMW ops and can be used to augment/upgrade the ordering
-inherent to the used atomic op. These barriers provide a full smp_mb().
+only apply to the RMW atomic ops and can be used to augment/upgrade the
+ordering inherent to the op. These barriers act almost like a full smp_mb():
+smp_mb__before_atomic() orders all earlier accesses against the RMW op
+itself and all accesses following it, and smp_mb__after_atomic() orders all
+later accesses against the RMW op and all accesses preceding it. However,
+accesses between the smp_mb__{before,after}_atomic() and the RMW op are not
+ordered, so it is advisable to place the barrier right next to the RMW atomic
+op whenever possible.
 
 These helper barriers exist because architectures have varying implicit
 ordering on their SMP atomic primitives. For example our TSO architectures
@@ -212,7 +218,9 @@ Further, while something like:
   atomic_dec(&X);
 
 is a 'typical' RELEASE pattern, the barrier is strictly stronger than
-a RELEASE. Similarly for something like:
+a RELEASE because it orders preceding instructions against both the read
+and write parts of the atomic_dec(), and against all following instructions
+as well. Similarly, something like:
 
   atomic_inc(&X);
   smp_mb__after_atomic();
@@ -244,7 +252,8 @@ strictly stronger than ACQUIRE. As illustrated:
 
 This should not happen; but a hypothetical atomic_inc_acquire() --
 (void)atomic_fetch_inc_acquire() for instance -- would allow the outcome,
-since then:
+because it would not order the W part of the RMW against the following
+WRITE_ONCE.  Thus:
 
   P1			P2
 
-- 
cgit 


From fce677d7e8f0cab84864674d7fc74d1f389eee6f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 19 Jun 2019 07:39:43 +0200
Subject: docs: fb: Add TER16x32 to the available font names

The new font is available since recently.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/fb/fbcon.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/fb/fbcon.txt b/Documentation/fb/fbcon.txt
index 60a5ec04e8f0..5a865437b33f 100644
--- a/Documentation/fb/fbcon.txt
+++ b/Documentation/fb/fbcon.txt
@@ -79,7 +79,7 @@ C. Boot options
 
         Select the initial font to use. The value 'name' can be any of the
         compiled-in fonts: 10x18, 6x10, 7x14, Acorn8x8, MINI4x6,
-        PEARL8x8, ProFont6x11, SUN12x22, SUN8x16, VGA8x16, VGA8x8.
+        PEARL8x8, ProFont6x11, SUN12x22, SUN8x16, TER16x32, VGA8x16, VGA8x8.
 
 	Note, not all drivers can handle font with widths not divisible by 8,
         such as vga16fb.
-- 
cgit 


From 75d7627f49c54e2eed4b785112d0104e2954d27c Mon Sep 17 00:00:00 2001
From: Gavin Schenk <g.schenk@eckelmann.de>
Date: Tue, 18 Jun 2019 08:04:12 +0200
Subject: MAINTAINERS / Documentation: Thorsten Scherer is the successor of
 Gavin Schenk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Due to new challenges in my life I can no longer take care of SIOX.
Thorsten takes over my SIOX tasks.

Signed-off-by: Gavin Schenk <g.schenk@eckelmann.de>
Acked-by: Thorsten Scherer <t.scherer@eckelmann.de>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-bus-siox | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-bus-siox b/Documentation/ABI/testing/sysfs-bus-siox
index fed7c3765a4e..c2a403f20b90 100644
--- a/Documentation/ABI/testing/sysfs-bus-siox
+++ b/Documentation/ABI/testing/sysfs-bus-siox
@@ -1,6 +1,6 @@
 What:		/sys/bus/siox/devices/siox-X/active
 KernelVersion:	4.16
-Contact:	Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact:	Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
 Description:
 		On reading represents the current state of the bus. If it
 		contains a "0" the bus is stopped and connected devices are
@@ -12,7 +12,7 @@ Description:
 
 What:		/sys/bus/siox/devices/siox-X/device_add
 KernelVersion:	4.16
-Contact:	Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact:	Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
 Description:
 		Write-only file. Write
 
@@ -27,13 +27,13 @@ Description:
 
 What:		/sys/bus/siox/devices/siox-X/device_remove
 KernelVersion:	4.16
-Contact:	Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact:	Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
 Description:
 		Write-only file. A single write removes the last device in the siox chain.
 
 What:		/sys/bus/siox/devices/siox-X/poll_interval_ns
 KernelVersion:	4.16
-Contact:	Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact:	Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
 Description:
 		Defines the interval between two poll cycles in nano seconds.
 		Note this is rounded to jiffies on writing. On reading the current value
@@ -41,33 +41,33 @@ Description:
 
 What:		/sys/bus/siox/devices/siox-X-Y/connected
 KernelVersion:	4.16
-Contact:	Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact:	Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
 Description:
 		Read-only value. "0" means the Yth device on siox bus X isn't "connected" i.e.
 		communication with it is not ensured. "1" signals a working connection.
 
 What:		/sys/bus/siox/devices/siox-X-Y/inbytes
 KernelVersion:	4.16
-Contact:	Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact:	Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
 Description:
 		Read-only value reporting the inbytes value provided to siox-X/device_add
 
 What:		/sys/bus/siox/devices/siox-X-Y/status_errors
 KernelVersion:	4.16
-Contact:	Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact:	Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
 Description:
 		Counts the number of time intervals when the read status byte doesn't yield the
 		expected value.
 
 What:		/sys/bus/siox/devices/siox-X-Y/type
 KernelVersion:	4.16
-Contact:	Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact:	Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
 Description:
 		Read-only value reporting the type value provided to siox-X/device_add.
 
 What:		/sys/bus/siox/devices/siox-X-Y/watchdog
 KernelVersion:	4.16
-Contact:	Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact:	Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
 Description:
 		Read-only value reporting if the watchdog of the siox device is
 		active. "0" means the watchdog is not active and the device is expected to
@@ -75,13 +75,13 @@ Description:
 
 What:		/sys/bus/siox/devices/siox-X-Y/watchdog_errors
 KernelVersion:	4.16
-Contact:	Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact:	Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
 Description:
 		Read-only value reporting the number to time intervals when the
 		watchdog was active.
 
 What:		/sys/bus/siox/devices/siox-X-Y/outbytes
 KernelVersion:	4.16
-Contact:	Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact:	Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
 Description:
 		Read-only value reporting the outbytes value provided to siox-X/device_add.
-- 
cgit 


From 6ad805b82dc5fc0ffd2de1d1f0de47214a050278 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 15 Jun 2019 17:41:29 +0800
Subject: doc: fix documentation about UIO_MEM_LOGICAL using

After commit d4fc5069a394 ("mm: switch s_mem and slab_cache in struct page")
page->mapping will be re-used by slab allocations and page->mapping->host
will be used in balance_dirty_pages_ratelimited() as an inode member but
it's not an inode in fact and leads an oops.

[  159.906493] Unable to handle kernel paging request at virtual address ffff200012d90be8
[  159.908029] Mem abort info:
[  159.908552]   ESR = 0x96000007
[  159.909138]   Exception class = DABT (current EL), IL = 32 bits
[  159.910155]   SET = 0, FnV = 0
[  159.910690]   EA = 0, S1PTW = 0
[  159.911241] Data abort info:
[  159.911846]   ISV = 0, ISS = 0x00000007
[  159.912567]   CM = 0, WnR = 0
[  159.913105] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000042acd000
[  159.914269] [ffff200012d90be8] pgd=000000043ffff003, pud=000000043fffe003, pmd=000000043fffa003, pte=0000000000000000
[  159.916280] Internal error: Oops: 96000007 [#1] SMP
[  159.917195] Dumping ftrace buffer:
[  159.917845]    (ftrace buffer empty)
[  159.918521] Modules linked in: uio_dev(OE)
[  159.919276] CPU: 1 PID: 295 Comm: uio_test Tainted: G           OE     5.2.0-rc4+ #46
[  159.920859] Hardware name: linux,dummy-virt (DT)
[  159.921815] pstate: 60000005 (nZCv daif -PAN -UAO)
[  159.922809] pc : balance_dirty_pages_ratelimited+0x68/0xc38
[  159.923965] lr : fault_dirty_shared_page.isra.8+0xe4/0x100
[  159.925134] sp : ffff800368a77ae0
[  159.925824] x29: ffff800368a77ae0 x28: 1ffff0006d14ce1a
[  159.926906] x27: ffff800368a670d0 x26: ffff800368a67120
[  159.927985] x25: 1ffff0006d10f5fe x24: ffff200012d90be8
[  159.929089] x23: ffff200013732000 x22: ffff80036ec03200
[  159.930172] x21: ffff200012d90bc0 x20: 1fffe400025b217d
[  159.931253] x19: ffff80036ec03200 x18: 0000000000000000
[  159.932348] x17: 0000000000000000 x16: 0ffffe0000010208
[  159.933439] x15: 0000000000000000 x14: 0000000000000000
[  159.934518] x13: 0000000000000000 x12: 0000000000000000
[  159.935596] x11: 1fffefc001b452c0 x10: ffff0fc001b452c0
[  159.936697] x9 : dfff200000000000 x8 : dfff200000000001
[  159.937781] x7 : ffff7e000da29607 x6 : ffff0fc001b452c1
[  159.938859] x5 : ffff0fc001b452c1 x4 : ffff0fc001b452c1
[  159.939944] x3 : ffff200010523ad4 x2 : 1fffe400026e659b
[  159.941065] x1 : dfff200000000000 x0 : ffff200013732cd8
[  159.942205] Call trace:
[  159.942732]  balance_dirty_pages_ratelimited+0x68/0xc38
[  159.943797]  fault_dirty_shared_page.isra.8+0xe4/0x100
[  159.944867]  do_fault+0x608/0x1250
[  159.945571]  __handle_mm_fault+0x93c/0xfb8
[  159.946412]  handle_mm_fault+0x1c0/0x360
[  159.947224]  do_page_fault+0x358/0x8d0
[  159.947993]  do_translation_fault+0xf8/0x124
[  159.948884]  do_mem_abort+0x70/0x190
[  159.949624]  el0_da+0x24/0x28

According another commit 5e901d0b15c0 ("scsi: qedi: Fix bad pte call trace
when iscsiuio is stopped."), using kmalloc also cause other problem.

But the documentation about UIO_MEM_LOGICAL allows using kmalloc(), remove
and don't allow using kmalloc() in documentation.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/uio-howto.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/uio-howto.rst b/Documentation/driver-api/uio-howto.rst
index 25f50eace28b..8fecfa11d4ff 100644
--- a/Documentation/driver-api/uio-howto.rst
+++ b/Documentation/driver-api/uio-howto.rst
@@ -276,8 +276,8 @@ fields of ``struct uio_mem``:
 -  ``int memtype``: Required if the mapping is used. Set this to
    ``UIO_MEM_PHYS`` if you you have physical memory on your card to be
    mapped. Use ``UIO_MEM_LOGICAL`` for logical memory (e.g. allocated
-   with :c:func:`kmalloc()`). There's also ``UIO_MEM_VIRTUAL`` for
-   virtual memory.
+   with :c:func:`__get_free_pages()` but not kmalloc()). There's also
+   ``UIO_MEM_VIRTUAL`` for virtual memory.
 
 -  ``phys_addr_t addr``: Required if the mapping is used. Fill in the
    address of your memory block. This address is the one that appears in
-- 
cgit 


From 19453ce0bcfbdf7332a104eebf5d835977af7284 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <matthewgarrett@google.com>
Date: Wed, 19 Jun 2019 15:46:11 -0700
Subject: IMA: support for per policy rule template formats

Admins may wish to log different measurements using different IMA
templates. Add support for overriding the default template on a per-rule
basis.

Inspired-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 Documentation/ABI/testing/ima_policy | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index 74c6702de74e..b383c1763610 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -24,8 +24,7 @@ Description:
 				[euid=] [fowner=] [fsname=]]
 			lsm:	[[subj_user=] [subj_role=] [subj_type=]
 				 [obj_user=] [obj_role=] [obj_type=]]
-			option:	[[appraise_type=]] [permit_directio]
-
+			option:	[[appraise_type=]] [template=] [permit_directio]
 		base: 	func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
 				[FIRMWARE_CHECK]
 				[KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK]
@@ -38,6 +37,8 @@ Description:
 			fowner:= decimal value
 		lsm:  	are LSM specific
 		option:	appraise_type:= [imasig]
+			template:= name of a defined IMA template type
+			(eg, ima-ng). Only valid when action is "measure".
 			pcr:= decimal value
 
 		default policy:
-- 
cgit 


From 6223949a1531f31f4191a40257e6cb37af1af75f Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 10 May 2019 10:31:10 -0700
Subject: dt-bindings: rng: Document BCM7211 RNG compatible string

BCM7211 features a RNG200 block, document its compatible string.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt b/Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt
index 0014da9145af..c223e54452da 100644
--- a/Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt
+++ b/Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt
@@ -2,6 +2,7 @@ HWRNG support for the iproc-rng200 driver
 
 Required properties:
 - compatible : Must be one of:
+	       "brcm,bcm7211-rng200"
 	       "brcm,bcm7278-rng200"
 	       "brcm,iproc-rng200"
 - reg : base address and size of control register block
-- 
cgit 


From 271839b0a819cbb76ef3ce5c7237d6cb624b3eba Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 17 May 2019 10:39:11 -0500
Subject: dt-bindings: arm: Convert Atmel board/soc bindings to json-schema

Convert Atmel SoC bindings to DT schema format using json-schema.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: devicetree@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 .../devicetree/bindings/arm/atmel-at91.txt         |  73 -----------
 .../devicetree/bindings/arm/atmel-at91.yaml        | 134 +++++++++++++++++++++
 2 files changed, 134 insertions(+), 73 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/atmel-at91.txt
 create mode 100644 Documentation/devicetree/bindings/arm/atmel-at91.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.txt b/Documentation/devicetree/bindings/arm/atmel-at91.txt
deleted file mode 100644
index 99dee23c74a4..000000000000
--- a/Documentation/devicetree/bindings/arm/atmel-at91.txt
+++ /dev/null
@@ -1,73 +0,0 @@
-Atmel AT91 device tree bindings.
-================================
-
-Boards with a SoC of the Atmel AT91 or SMART family shall have the following
-properties:
-
-Required root node properties:
-compatible: must be one of:
- * "atmel,at91rm9200"
-
- * "atmel,at91sam9" for SoCs using an ARM926EJ-S core, shall be extended with
-   the specific SoC family or compatible:
-    o "atmel,at91sam9260"
-    o "atmel,at91sam9261"
-    o "atmel,at91sam9263"
-    o "atmel,at91sam9x5" for the 5 series, shall be extended with the specific
-      SoC compatible:
-       - "atmel,at91sam9g15"
-       - "atmel,at91sam9g25"
-       - "atmel,at91sam9g35"
-       - "atmel,at91sam9x25"
-       - "atmel,at91sam9x35"
-    o "atmel,at91sam9g20"
-    o "atmel,at91sam9g45"
-    o "atmel,at91sam9n12"
-    o "atmel,at91sam9rl"
-    o "atmel,at91sam9xe"
-    o "microchip,sam9x60"
- * "atmel,sama5" for SoCs using a Cortex-A5, shall be extended with the specific
-   SoC family:
-    o "atmel,sama5d2" shall be extended with the specific SoC compatible:
-       - "atmel,sama5d27"
-    o "atmel,sama5d3" shall be extended with the specific SoC compatible:
-       - "atmel,sama5d31"
-       - "atmel,sama5d33"
-       - "atmel,sama5d34"
-       - "atmel,sama5d35"
-       - "atmel,sama5d36"
-    o "atmel,sama5d4" shall be extended with the specific SoC compatible:
-       - "atmel,sama5d41"
-       - "atmel,sama5d42"
-       - "atmel,sama5d43"
-       - "atmel,sama5d44"
-
- * "atmel,samv7" for MCUs using a Cortex-M7, shall be extended with the specific
-   SoC family:
-    o "atmel,sams70" shall be extended with the specific MCU compatible:
-       - "atmel,sams70j19"
-       - "atmel,sams70j20"
-       - "atmel,sams70j21"
-       - "atmel,sams70n19"
-       - "atmel,sams70n20"
-       - "atmel,sams70n21"
-       - "atmel,sams70q19"
-       - "atmel,sams70q20"
-       - "atmel,sams70q21"
-    o "atmel,samv70" shall be extended with the specific MCU compatible:
-       - "atmel,samv70j19"
-       - "atmel,samv70j20"
-       - "atmel,samv70n19"
-       - "atmel,samv70n20"
-       - "atmel,samv70q19"
-       - "atmel,samv70q20"
-    o "atmel,samv71" shall be extended with the specific MCU compatible:
-       - "atmel,samv71j19"
-       - "atmel,samv71j20"
-       - "atmel,samv71j21"
-       - "atmel,samv71n19"
-       - "atmel,samv71n20"
-       - "atmel,samv71n21"
-       - "atmel,samv71q19"
-       - "atmel,samv71q20"
-       - "atmel,samv71q21"
diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.yaml b/Documentation/devicetree/bindings/arm/atmel-at91.yaml
new file mode 100644
index 000000000000..6e168abcd4d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/atmel-at91.yaml
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/atmel-at91.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Atmel AT91 device tree bindings.
+
+maintainers:
+  - Alexandre Belloni <alexandre.belloni@bootlin.com>
+  - Ludovic Desroches <ludovic.desroches@microchip.com>
+
+description: |
+  Boards with a SoC of the Atmel AT91 or SMART family shall have the following
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    oneOf:
+      - items:
+          - const: atmel,at91rm9200
+      - items:
+          - enum:
+              - olimex,sam9-l9260
+          - enum:
+              - atmel,at91sam9260
+              - atmel,at91sam9261
+              - atmel,at91sam9263
+              - atmel,at91sam9g20
+              - atmel,at91sam9g45
+              - atmel,at91sam9n12
+              - atmel,at91sam9rl
+              - atmel,at91sam9xe
+              - atmel,at91sam9x60
+          - const: atmel,at91sam9
+
+      - items:
+          - enum:
+              - atmel,at91sam9g15
+              - atmel,at91sam9g25
+              - atmel,at91sam9g35
+              - atmel,at91sam9x25
+              - atmel,at91sam9x35
+          - const: atmel,at91sam9x5
+          - const: atmel,at91sam9
+
+      - items:
+          - const: atmel,sama5d27
+          - const: atmel,sama5d2
+          - const: atmel,sama5
+
+      - description: Nattis v2 board with Natte v2 power board
+        items:
+          - const: axentia,nattis-2
+          - const: axentia,natte-2
+          - const: axentia,linea
+          - const: atmel,sama5d31
+          - const: atmel,sama5d3
+          - const: atmel,sama5
+
+      - description: TSE-850 v3 board
+        items:
+          - const: axentia,tse850v3
+          - const: axentia,linea
+          - const: atmel,sama5d31
+          - const: atmel,sama5d3
+          - const: atmel,sama5
+
+      - items:
+          - const: axentia,linea
+          - const: atmel,sama5d31
+          - const: atmel,sama5d3
+          - const: atmel,sama5
+
+      - items:
+          - enum:
+              - atmel,sama5d31
+              - atmel,sama5d33
+              - atmel,sama5d34
+              - atmel,sama5d35
+              - atmel,sama5d36
+          - const: atmel,sama5d3
+          - const: atmel,sama5
+
+      - items:
+          - enum:
+              - atmel,sama5d41
+              - atmel,sama5d42
+              - atmel,sama5d43
+              - atmel,sama5d44
+          - const: atmel,sama5d4
+          - const: atmel,sama5
+
+      - items:
+          - enum:
+              - atmel,sams70j19
+              - atmel,sams70j20
+              - atmel,sams70j21
+              - atmel,sams70n19
+              - atmel,sams70n20
+              - atmel,sams70n21
+              - atmel,sams70q19
+              - atmel,sams70q20
+              - atmel,sams70q21
+          - const: atmel,sams70
+          - const: atmel,samv7
+
+      - items:
+          - enum:
+              - atmel,samv70j19
+              - atmel,samv70j20
+              - atmel,samv70n19
+              - atmel,samv70n20
+              - atmel,samv70q19
+              - atmel,samv70q20
+          - const: atmel,samv70
+          - const: atmel,samv7
+
+      - items:
+          - enum:
+              - atmel,samv71j19
+              - atmel,samv71j20
+              - atmel,samv71j21
+              - atmel,samv71n19
+              - atmel,samv71n20
+              - atmel,samv71n21
+              - atmel,samv71q19
+              - atmel,samv71q20
+              - atmel,samv71q21
+          - const: atmel,samv71
+          - const: atmel,samv7
+
+...
-- 
cgit 


From f0b1f5f08dfbc043fb3759a3ed6d0a249d55e8ec Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Wed, 19 Jun 2019 14:41:55 +0100
Subject: ASoC: madera: Add DT bindings for Cirrus Logic Madera codecs

The Cirrus Logic Madera codecs are a family of related codecs with
extensive digital and analogue I/O, digital mixing and routing,
signal processing and programmable DSPs.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/sound/madera.txt | 67 ++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/sound/madera.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/madera.txt b/Documentation/devicetree/bindings/sound/madera.txt
new file mode 100644
index 000000000000..5e669ce552f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/madera.txt
@@ -0,0 +1,67 @@
+Cirrus Logic Madera class audio codecs
+
+This describes audio configuration bindings for these codecs.
+
+See also the core bindings for the parent MFD driver:
+See Documentation/devicetree/bindings/mfd/madera.txt
+
+and defines for values used in these bindings:
+include/dt-bindings/sound/madera.h
+
+These properties are all contained in the parent MFD node.
+
+Optional properties:
+  - cirrus,dmic-ref : Indicates how the MICBIAS pins have been externally
+    connected to DMICs on each input, one cell per input.
+    <IN1 IN2 IN3 ...>
+    A value of 0 indicates MICVDD and is the default, other values depend on the
+    codec:
+    For CS47L35 one of the CS47L35_DMIC_REF_xxx values
+    For all other codecs one of the MADERA_DMIC_REF_xxx values
+    Also see the datasheet for a description of the INn_DMIC_SUP field.
+
+  - cirrus,inmode : A list of input mode settings for each input. A maximum of
+    16 cells, with four cells per input in the order INnAL, INnAR INnBL INnBR.
+    For non-muxed inputs the first two cells for that input set the mode for
+    the left and right channel and the second two cells must be 0.
+    For muxed inputs the first two cells for that input set the mode of the
+    left and right A inputs and the second two cells set the mode of the left
+    and right B inputs.
+    Valid mode values are one of the MADERA_INMODE_xxx. If the array is shorter
+    than the number of inputs the unspecified inputs default to
+    MADERA_INMODE_DIFF.
+
+  - cirrus,out-mono : Mono bit for each output, maximum of six cells if the
+    array is shorter outputs will be set to stereo.
+
+  - cirrus,max-channels-clocked : Maximum number of channels that I2S clocks
+    will be generated for. Useful when clock master for systems where the I2S
+    bus has multiple data lines.
+    One cell for each AIF, use a value of zero for AIFs that should be handled
+    normally.
+
+  - cirrus,pdm-fmt : PDM speaker data format, must contain 2 cells
+    (OUT5 and OUT6). See the PDM_SPKn_FMT field in the datasheet for a
+    description of this value.
+    The second cell is ignored for codecs that do not have OUT6.
+
+  - cirrus,pdm-mute : PDM mute format, must contain 2 cells
+    (OUT5 and OUT6). See the PDM_SPKn_CTRL_1 register in the datasheet for a
+    description of this value.
+    The second cell is ignored for codecs that do not have OUT6.
+
+Example:
+
+cs47l35@0 {
+	compatible = "cirrus,cs47l35";
+
+	cirrus,dmic-ref = <0 0 CS47L35_DMIC_REF_MICBIAS1B 0>;
+	cirrus,inmode = <
+		MADERA_INMODE_DMIC MADERA_INMODE_DMIC /* IN1A digital */
+		MADERA_INMODE_SE   MADERA_INMODE_SE   /* IN1B single-ended */
+		MADERA_INMODE_DIFF MADERA_INMODE_DIFF /* IN2 differential */
+		0 0 	/* not used on this codec */
+	>;
+	cirrus,out-mono = <0 0 0 0 0 0>;
+	cirrus,max-channels-clocked = <2 0 0>;
+};
-- 
cgit 


From ecefae6db042283bf88ef3777f2381b18df8ed46 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 18:05:38 -0300
Subject: docs: usb: rename files to .rst and add them to drivers-api

While there are a mix of things here, most of the stuff
were written from Kernel developer's PoV. So, add them to
the driver-api book.

A follow up for this patch would be to move documents from
there that are specific to sysadmins, adding them to the
admin-guide.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Johan Hovold <johan@kernel.org>
Acked-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/index.rst                    |   1 +
 Documentation/index.rst.rej                |  10 +
 Documentation/usb/WUSB-Design-overview.txt | 457 --------------
 Documentation/usb/acm.rst                  | 132 ++++
 Documentation/usb/acm.txt                  | 132 ----
 Documentation/usb/authorization.rst        | 132 ++++
 Documentation/usb/authorization.txt        | 132 ----
 Documentation/usb/chipidea.rst             | 133 ++++
 Documentation/usb/chipidea.txt             | 133 ----
 Documentation/usb/dwc3.rst                 |  53 ++
 Documentation/usb/dwc3.txt                 |  53 --
 Documentation/usb/ehci.rst                 | 230 +++++++
 Documentation/usb/ehci.txt                 | 230 -------
 Documentation/usb/functionfs.rst           |  68 +++
 Documentation/usb/functionfs.txt           |  68 ---
 Documentation/usb/gadget-testing.rst       | 934 +++++++++++++++++++++++++++++
 Documentation/usb/gadget-testing.txt       | 934 -----------------------------
 Documentation/usb/gadget_configfs.rst      | 390 ++++++++++++
 Documentation/usb/gadget_configfs.txt      | 390 ------------
 Documentation/usb/gadget_hid.rst           | 457 ++++++++++++++
 Documentation/usb/gadget_hid.txt           | 457 --------------
 Documentation/usb/gadget_multi.rst         | 165 +++++
 Documentation/usb/gadget_multi.txt         | 165 -----
 Documentation/usb/gadget_printer.rst       | 523 ++++++++++++++++
 Documentation/usb/gadget_printer.txt       | 523 ----------------
 Documentation/usb/gadget_serial.rst        | 289 +++++++++
 Documentation/usb/gadget_serial.txt        | 289 ---------
 Documentation/usb/index.rst                |  39 ++
 Documentation/usb/iuu_phoenix.rst          |  94 +++
 Documentation/usb/iuu_phoenix.txt          |  94 ---
 Documentation/usb/mass-storage.rst         | 234 ++++++++
 Documentation/usb/mass-storage.txt         | 234 --------
 Documentation/usb/misc_usbsevseg.rst       |  51 ++
 Documentation/usb/misc_usbsevseg.txt       |  51 --
 Documentation/usb/mtouchusb.rst            |  84 +++
 Documentation/usb/mtouchusb.txt            |  84 ---
 Documentation/usb/ohci.rst                 |  35 ++
 Documentation/usb/ohci.txt                 |  35 --
 Documentation/usb/rio.rst                  | 109 ++++
 Documentation/usb/rio.txt                  | 109 ----
 Documentation/usb/text_files.rst           |  29 +
 Documentation/usb/usb-help.rst             |  17 +
 Documentation/usb/usb-help.txt             |  17 -
 Documentation/usb/usb-serial.rst           | 537 +++++++++++++++++
 Documentation/usb/usb-serial.txt           | 537 -----------------
 Documentation/usb/usbip_protocol.rst       | 411 +++++++++++++
 Documentation/usb/usbip_protocol.txt       | 411 -------------
 Documentation/usb/usbmon.rst               | 375 ++++++++++++
 Documentation/usb/usbmon.txt               | 375 ------------
 Documentation/usb/wusb-design-overview.rst | 457 ++++++++++++++
 50 files changed, 5989 insertions(+), 5910 deletions(-)
 create mode 100644 Documentation/index.rst.rej
 delete mode 100644 Documentation/usb/WUSB-Design-overview.txt
 create mode 100644 Documentation/usb/acm.rst
 delete mode 100644 Documentation/usb/acm.txt
 create mode 100644 Documentation/usb/authorization.rst
 delete mode 100644 Documentation/usb/authorization.txt
 create mode 100644 Documentation/usb/chipidea.rst
 delete mode 100644 Documentation/usb/chipidea.txt
 create mode 100644 Documentation/usb/dwc3.rst
 delete mode 100644 Documentation/usb/dwc3.txt
 create mode 100644 Documentation/usb/ehci.rst
 delete mode 100644 Documentation/usb/ehci.txt
 create mode 100644 Documentation/usb/functionfs.rst
 delete mode 100644 Documentation/usb/functionfs.txt
 create mode 100644 Documentation/usb/gadget-testing.rst
 delete mode 100644 Documentation/usb/gadget-testing.txt
 create mode 100644 Documentation/usb/gadget_configfs.rst
 delete mode 100644 Documentation/usb/gadget_configfs.txt
 create mode 100644 Documentation/usb/gadget_hid.rst
 delete mode 100644 Documentation/usb/gadget_hid.txt
 create mode 100644 Documentation/usb/gadget_multi.rst
 delete mode 100644 Documentation/usb/gadget_multi.txt
 create mode 100644 Documentation/usb/gadget_printer.rst
 delete mode 100644 Documentation/usb/gadget_printer.txt
 create mode 100644 Documentation/usb/gadget_serial.rst
 delete mode 100644 Documentation/usb/gadget_serial.txt
 create mode 100644 Documentation/usb/index.rst
 create mode 100644 Documentation/usb/iuu_phoenix.rst
 delete mode 100644 Documentation/usb/iuu_phoenix.txt
 create mode 100644 Documentation/usb/mass-storage.rst
 delete mode 100644 Documentation/usb/mass-storage.txt
 create mode 100644 Documentation/usb/misc_usbsevseg.rst
 delete mode 100644 Documentation/usb/misc_usbsevseg.txt
 create mode 100644 Documentation/usb/mtouchusb.rst
 delete mode 100644 Documentation/usb/mtouchusb.txt
 create mode 100644 Documentation/usb/ohci.rst
 delete mode 100644 Documentation/usb/ohci.txt
 create mode 100644 Documentation/usb/rio.rst
 delete mode 100644 Documentation/usb/rio.txt
 create mode 100644 Documentation/usb/text_files.rst
 create mode 100644 Documentation/usb/usb-help.rst
 delete mode 100644 Documentation/usb/usb-help.txt
 create mode 100644 Documentation/usb/usb-serial.rst
 delete mode 100644 Documentation/usb/usb-serial.txt
 create mode 100644 Documentation/usb/usbip_protocol.rst
 delete mode 100644 Documentation/usb/usbip_protocol.txt
 create mode 100644 Documentation/usb/usbmon.rst
 delete mode 100644 Documentation/usb/usbmon.txt
 create mode 100644 Documentation/usb/wusb-design-overview.rst

(limited to 'Documentation')

diff --git a/Documentation/index.rst b/Documentation/index.rst
index a7566ef62411..91055adde327 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -101,6 +101,7 @@ needed).
    filesystems/index
    vm/index
    bpf/index
+   usb/index
    misc-devices/index
 
 Architecture-specific documentation
diff --git a/Documentation/index.rst.rej b/Documentation/index.rst.rej
new file mode 100644
index 000000000000..b1af6eab22f3
--- /dev/null
+++ b/Documentation/index.rst.rej
@@ -0,0 +1,10 @@
+--- Documentation/index.rst
++++ Documentation/index.rst
+@@ -103,6 +103,7 @@ needed).
+    vm/index
+    bpf/index
+    PCI/index
++   usb/index
+    misc-devices/index
+ 
+ Architecture-specific documentation
diff --git a/Documentation/usb/WUSB-Design-overview.txt b/Documentation/usb/WUSB-Design-overview.txt
deleted file mode 100644
index dc5e21609bb5..000000000000
--- a/Documentation/usb/WUSB-Design-overview.txt
+++ /dev/null
@@ -1,457 +0,0 @@
-================================
-Linux UWB + Wireless USB + WiNET
-================================
-
-   Copyright (C) 2005-2006 Intel Corporation
-
-   Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License version
-   2 as published by the Free Software Foundation.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA.
-
-
-Please visit http://bughost.org/thewiki/Design-overview.txt-1.8 for
-updated content.
-
-    * Design-overview.txt-1.8
-
-This code implements a Ultra Wide Band stack for Linux, as well as
-drivers for the USB based UWB radio controllers defined in the
-Wireless USB 1.0 specification (including Wireless USB host controller
-and an Intel WiNET controller).
-
-.. Contents
-   1. Introduction
-         1. HWA: Host Wire adapters, your Wireless USB dongle
-
-         2. DWA: Device Wired Adaptor, a Wireless USB hub for wired
-            devices
-         3. WHCI: Wireless Host Controller Interface, the PCI WUSB host
-            adapter
-   2. The UWB stack
-         1. Devices and hosts: the basic structure
-
-         2. Host Controller life cycle
-
-         3. On the air: beacons and enumerating the radio neighborhood
-
-         4. Device lists
-         5. Bandwidth allocation
-
-   3. Wireless USB Host Controller drivers
-
-   4. Glossary
-
-
-Introduction
-============
-
-UWB is a wide-band communication protocol that is to serve also as the
-low-level protocol for others (much like TCP sits on IP). Currently
-these others are Wireless USB and TCP/IP, but seems Bluetooth and
-Firewire/1394 are coming along.
-
-UWB uses a band from roughly 3 to 10 GHz, transmitting at a max of
-~-41dB (or 0.074 uW/MHz--geography specific data is still being
-negotiated w/ regulators, so watch for changes). That band is divided in
-a bunch of ~1.5 GHz wide channels (or band groups) composed of three
-subbands/subchannels (528 MHz each). Each channel is independent of each
-other, so you could consider them different "busses". Initially this
-driver considers them all a single one.
-
-Radio time is divided in 65536 us long /superframes/, each one divided
-in 256 256us long /MASs/ (Media Allocation Slots), which are the basic
-time/media allocation units for transferring data. At the beginning of
-each superframe there is a Beacon Period (BP), where every device
-transmit its beacon on a single MAS. The length of the BP depends on how
-many devices are present and the length of their beacons.
-
-Devices have a MAC (fixed, 48 bit address) and a device (changeable, 16
-bit address) and send periodic beacons to advertise themselves and pass
-info on what they are and do. They advertise their capabilities and a
-bunch of other stuff.
-
-The different logical parts of this driver are:
-
-    *
-
-      *UWB*: the Ultra-Wide-Band stack -- manages the radio and
-      associated spectrum to allow for devices sharing it. Allows to
-      control bandwidth assignment, beaconing, scanning, etc
-
-    *
-
-      *WUSB*: the layer that sits on top of UWB to provide Wireless USB.
-      The Wireless USB spec defines means to control a UWB radio and to
-      do the actual WUSB.
-
-
-HWA: Host Wire adapters, your Wireless USB dongle
--------------------------------------------------
-
-WUSB also defines a device called a Host Wire Adaptor (HWA), which in
-mere terms is a USB dongle that enables your PC to have UWB and Wireless
-USB. The Wireless USB Host Controller in a HWA looks to the host like a
-[Wireless] USB controller connected via USB (!)
-
-The HWA itself is broken in two or three main interfaces:
-
-    *
-
-      *RC*: Radio control -- this implements an interface to the
-      Ultra-Wide-Band radio controller. The driver for this implements a
-      USB-based UWB Radio Controller to the UWB stack.
-
-    *
-
-      *HC*: the wireless USB host controller. It looks like a USB host
-      whose root port is the radio and the WUSB devices connect to it.
-      To the system it looks like a separate USB host. The driver (will)
-      implement a USB host controller (similar to UHCI, OHCI or EHCI)
-      for which the root hub is the radio...To reiterate: it is a USB
-      controller that is connected via USB instead of PCI.
-
-    *
-
-      *WINET*: some HW provide a WiNET interface (IP over UWB). This
-      package provides a driver for it (it looks like a network
-      interface, winetX). The driver detects when there is a link up for
-      their type and kick into gear.
-
-
-DWA: Device Wired Adaptor, a Wireless USB hub for wired devices
----------------------------------------------------------------
-
-These are the complement to HWAs. They are a USB host for connecting
-wired devices, but it is connected to your PC connected via Wireless
-USB. To the system it looks like yet another USB host. To the untrained
-eye, it looks like a hub that connects upstream wirelessly.
-
-We still offer no support for this; however, it should share a lot of
-code with the HWA-RC driver; there is a bunch of factorization work that
-has been done to support that in upcoming releases.
-
-
-WHCI: Wireless Host Controller Interface, the PCI WUSB host adapter
--------------------------------------------------------------------
-
-This is your usual PCI device that implements WHCI. Similar in concept
-to EHCI, it allows your wireless USB devices (including DWAs) to connect
-to your host via a PCI interface. As in the case of the HWA, it has a
-Radio Control interface and the WUSB Host Controller interface per se.
-
-There is still no driver support for this, but will be in upcoming
-releases.
-
-
-The UWB stack
-=============
-
-The main mission of the UWB stack is to keep a tally of which devices
-are in radio proximity to allow drivers to connect to them. As well, it
-provides an API for controlling the local radio controllers (RCs from
-now on), such as to start/stop beaconing, scan, allocate bandwidth, etc.
-
-
-Devices and hosts: the basic structure
---------------------------------------
-
-The main building block here is the UWB device (struct uwb_dev). For
-each device that pops up in radio presence (ie: the UWB host receives a
-beacon from it) you get a struct uwb_dev that will show up in
-/sys/bus/uwb/devices.
-
-For each RC that is detected, a new struct uwb_rc and struct uwb_dev are
-created. An entry is also created in /sys/class/uwb_rc for each RC.
-
-Each RC driver is implemented by a separate driver that plugs into the
-interface that the UWB stack provides through a struct uwb_rc_ops. The
-spec creators have been nice enough to make the message format the same
-for HWA and WHCI RCs, so the driver is really a very thin transport that
-moves the requests from the UWB API to the device [/uwb_rc_ops->cmd()/]
-and sends the replies and notifications back to the API
-[/uwb_rc_neh_grok()/]. Notifications are handled to the UWB daemon, that
-is chartered, among other things, to keep the tab of how the UWB radio
-neighborhood looks, creating and destroying devices as they show up or
-disappear.
-
-Command execution is very simple: a command block is sent and a event
-block or reply is expected back. For sending/receiving command/events, a
-handle called /neh/ (Notification/Event Handle) is opened with
-/uwb_rc_neh_open()/.
-
-The HWA-RC (USB dongle) driver (drivers/uwb/hwa-rc.c) does this job for
-the USB connected HWA. Eventually, drivers/whci-rc.c will do the same
-for the PCI connected WHCI controller.
-
-
-Host Controller life cycle
---------------------------
-
-So let's say we connect a dongle to the system: it is detected and
-firmware uploaded if needed [for Intel's i1480
-/drivers/uwb/ptc/usb.c:ptc_usb_probe()/] and then it is reenumerated.
-Now we have a real HWA device connected and
-/drivers/uwb/hwa-rc.c:hwarc_probe()/ picks it up, that will set up the
-Wire-Adaptor environment and then suck it into the UWB stack's vision of
-the world [/drivers/uwb/lc-rc.c:uwb_rc_add()/].
-
-    *
-
-      [*] The stack should put a new RC to scan for devices
-      [/uwb_rc_scan()/] so it finds what's available around and tries to
-      connect to them, but this is policy stuff and should be driven
-      from user space. As of now, the operator is expected to do it
-      manually; see the release notes for documentation on the procedure.
-
-When a dongle is disconnected, /drivers/uwb/hwa-rc.c:hwarc_disconnect()/
-takes time of tearing everything down safely (or not...).
-
-
-On the air: beacons and enumerating the radio neighborhood
-----------------------------------------------------------
-
-So assuming we have devices and we have agreed for a channel to connect
-on (let's say 9), we put the new RC to beacon:
-
-    *
-
-            $ echo 9 0 > /sys/class/uwb_rc/uwb0/beacon
-
-Now it is visible. If there were other devices in the same radio channel
-and beacon group (that's what the zero is for), the dongle's radio
-control interface will send beacon notifications on its
-notification/event endpoint (NEEP). The beacon notifications are part of
-the event stream that is funneled into the API with
-/drivers/uwb/neh.c:uwb_rc_neh_grok()/ and delivered to the UWBD, the UWB
-daemon through a notification list.
-
-UWBD wakes up and scans the event list; finds a beacon and adds it to
-the BEACON CACHE (/uwb_beca/). If he receives a number of beacons from
-the same device, he considers it to be 'onair' and creates a new device
-[/drivers/uwb/lc-dev.c:uwbd_dev_onair()/]. Similarly, when no beacons
-are received in some time, the device is considered gone and wiped out
-[uwbd calls periodically /uwb/beacon.c:uwb_beca_purge()/ that will purge
-the beacon cache of dead devices].
-
-
-Device lists
-------------
-
-All UWB devices are kept in the list of the struct bus_type uwb_bus_type.
-
-
-Bandwidth allocation
---------------------
-
-The UWB stack maintains a local copy of DRP availability through
-processing of incoming *DRP Availability Change* notifications. This
-local copy is currently used to present the current bandwidth
-availability to the user through the sysfs file
-/sys/class/uwb_rc/uwbx/bw_avail. In the future the bandwidth
-availability information will be used by the bandwidth reservation
-routines.
-
-The bandwidth reservation routines are in progress and are thus not
-present in the current release. When completed they will enable a user
-to initiate DRP reservation requests through interaction with sysfs. DRP
-reservation requests from remote UWB devices will also be handled. The
-bandwidth management done by the UWB stack will include callbacks to the
-higher layers will enable the higher layers to use the reservations upon
-completion. [Note: The bandwidth reservation work is in progress and
-subject to change.]
-
-
-Wireless USB Host Controller drivers
-====================================
-
-*WARNING* This section needs a lot of work!
-
-As explained above, there are three different types of HCs in the WUSB
-world: HWA-HC, DWA-HC and WHCI-HC.
-
-HWA-HC and DWA-HC share that they are Wire-Adapters (USB or WUSB
-connected controllers), and their transfer management system is almost
-identical. So is their notification delivery system.
-
-HWA-HC and WHCI-HC share that they are both WUSB host controllers, so
-they have to deal with WUSB device life cycle and maintenance, wireless
-root-hub
-
-HWA exposes a Host Controller interface (HWA-HC 0xe0/02/02). This has
-three endpoints (Notifications, Data Transfer In and Data Transfer
-Out--known as NEP, DTI and DTO in the code).
-
-We reserve UWB bandwidth for our Wireless USB Cluster, create a Cluster
-ID and tell the HC to use all that. Then we start it. This means the HC
-starts sending MMCs.
-
-    *
-
-      The MMCs are blocks of data defined somewhere in the WUSB1.0 spec
-      that define a stream in the UWB channel time allocated for sending
-      WUSB IEs (host to device commands/notifications) and Device
-      Notifications (device initiated to host). Each host defines a
-      unique Wireless USB cluster through MMCs. Devices can connect to a
-      single cluster at the time. The IEs are Information Elements, and
-      among them are the bandwidth allocations that tell each device
-      when can they transmit or receive.
-
-Now it all depends on external stimuli.
-
-New device connection
----------------------
-
-A new device pops up, it scans the radio looking for MMCs that give out
-the existence of Wireless USB channels. Once one (or more) are found,
-selects which one to connect to. Sends a /DN_Connect/ (device
-notification connect) during the DNTS (Device Notification Time
-Slot--announced in the MMCs
-
-HC picks the /DN_Connect/ out (nep module sends to notif.c for delivery
-into /devconnect/). This process starts the authentication process for
-the device. First we allocate a /fake port/ and assign an
-unauthenticated address (128 to 255--what we really do is
-0x80 | fake_port_idx). We fiddle with the fake port status and /hub_wq/
-sees a new connection, so he moves on to enable the fake port with a reset.
-
-So now we are in the reset path -- we know we have a non-yet enumerated
-device with an unauthorized address; we ask user space to authenticate
-(FIXME: not yet done, similar to bluetooth pairing), then we do the key
-exchange (FIXME: not yet done) and issue a /set address 0/ to bring the
-device to the default state. Device is authenticated.
-
-From here, the USB stack takes control through the usb_hcd ops. hub_wq
-has seen the port status changes, as we have been toggling them. It will
-start enumerating and doing transfers through usb_hcd->urb_enqueue() to
-read descriptors and move our data.
-
-Device life cycle and keep alives
----------------------------------
-
-Every time there is a successful transfer to/from a device, we update a
-per-device activity timestamp. If not, every now and then we check and
-if the activity timestamp gets old, we ping the device by sending it a
-Keep Alive IE; it responds with a /DN_Alive/ pong during the DNTS (this
-arrives to us as a notification through
-devconnect.c:wusb_handle_dn_alive(). If a device times out, we
-disconnect it from the system (cleaning up internal information and
-toggling the bits in the fake hub port, which kicks hub_wq into removing
-the rest of the stuff).
-
-This is done through devconnect:__wusb_check_devs(), which will scan the
-device list looking for whom needs refreshing.
-
-If the device wants to disconnect, it will either die (ugly) or send a
-/DN_Disconnect/ that will prompt a disconnection from the system.
-
-Sending and receiving data
---------------------------
-
-Data is sent and received through /Remote Pipes/ (rpipes). An rpipe is
-/aimed/ at an endpoint in a WUSB device. This is the same for HWAs and
-DWAs.
-
-Each HC has a number of rpipes and buffers that can be assigned to them;
-when doing a data transfer (xfer), first the rpipe has to be aimed and
-prepared (buffers assigned), then we can start queueing requests for
-data in or out.
-
-Data buffers have to be segmented out before sending--so we send first a
-header (segment request) and then if there is any data, a data buffer
-immediately after to the DTI interface (yep, even the request). If our
-buffer is bigger than the max segment size, then we just do multiple
-requests.
-
-[This sucks, because doing USB scatter gatter in Linux is resource
-intensive, if any...not that the current approach is not. It just has to
-be cleaned up a lot :)].
-
-If reading, we don't send data buffers, just the segment headers saying
-we want to read segments.
-
-When the xfer is executed, we receive a notification that says data is
-ready in the DTI endpoint (handled through
-xfer.c:wa_handle_notif_xfer()). In there we read from the DTI endpoint a
-descriptor that gives us the status of the transfer, its identification
-(given when we issued it) and the segment number. If it was a data read,
-we issue another URB to read into the destination buffer the chunk of
-data coming out of the remote endpoint. Done, wait for the next guy. The
-callbacks for the URBs issued from here are the ones that will declare
-the xfer complete at some point and call its callback.
-
-Seems simple, but the implementation is not trivial.
-
-    *
-
-      *WARNING* Old!!
-
-The main xfer descriptor, wa_xfer (equivalent to a URB) contains an
-array of segments, tallys on segments and buffers and callback
-information. Buried in there is a lot of URBs for executing the segments
-and buffer transfers.
-
-For OUT xfers, there is an array of segments, one URB for each, another
-one of buffer URB. When submitting, we submit URBs for segment request
-1, buffer 1, segment 2, buffer 2...etc. Then we wait on the DTI for xfer
-result data; when all the segments are complete, we call the callback to
-finalize the transfer.
-
-For IN xfers, we only issue URBs for the segments we want to read and
-then wait for the xfer result data.
-
-URB mapping into xfers
-^^^^^^^^^^^^^^^^^^^^^^
-
-This is done by hwahc_op_urb_[en|de]queue(). In enqueue() we aim an
-rpipe to the endpoint where we have to transmit, create a transfer
-context (wa_xfer) and submit it. When the xfer is done, our callback is
-called and we assign the status bits and release the xfer resources.
-
-In dequeue() we are basically cancelling/aborting the transfer. We issue
-a xfer abort request to the HC, cancel all the URBs we had submitted
-and not yet done and when all that is done, the xfer callback will be
-called--this will call the URB callback.
-
-
-Glossary
-========
-
-*DWA* -- Device Wire Adapter
-
-USB host, wired for downstream devices, upstream connects wirelessly
-with Wireless USB.
-
-*EVENT* -- Response to a command on the NEEP
-
-*HWA* -- Host Wire Adapter / USB dongle for UWB and Wireless USB
-
-*NEH* -- Notification/Event Handle
-
-Handle/file descriptor for receiving notifications or events. The WA
-code requires you to get one of this to listen for notifications or
-events on the NEEP.
-
-*NEEP* -- Notification/Event EndPoint
-
-Stuff related to the management of the first endpoint of a HWA USB
-dongle that is used to deliver an stream of events and notifications to
-the host.
-
-*NOTIFICATION* -- Message coming in the NEEP as response to something.
-
-*RC* -- Radio Control
-
-Design-overview.txt-1.8 (last edited 2006-11-04 12:22:24 by
-InakyPerezGonzalez)
diff --git a/Documentation/usb/acm.rst b/Documentation/usb/acm.rst
new file mode 100644
index 000000000000..e8bda98e9b51
--- /dev/null
+++ b/Documentation/usb/acm.rst
@@ -0,0 +1,132 @@
+======================
+Linux ACM driver v0.16
+======================
+
+Copyright (c) 1999 Vojtech Pavlik <vojtech@suse.cz>
+
+Sponsored by SuSE
+
+0. Disclaimer
+~~~~~~~~~~~~~
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc., 59
+Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Should you need to contact me, the author, you can do so either by e-mail -
+mail your message to <vojtech@suse.cz>, or by paper mail: Vojtech Pavlik,
+Ucitelska 1576, Prague 8, 182 00 Czech Republic
+
+For your convenience, the GNU General Public License version 2 is included
+in the package: See the file COPYING.
+
+1. Usage
+~~~~~~~~
+The drivers/usb/class/cdc-acm.c drivers works with USB modems and USB ISDN terminal
+adapters that conform to the Universal Serial Bus Communication Device Class
+Abstract Control Model (USB CDC ACM) specification.
+
+Many modems do, here is a list of those I know of:
+
+	- 3Com OfficeConnect 56k
+	- 3Com Voice FaxModem Pro
+	- 3Com Sportster
+	- MultiTech MultiModem 56k
+	- Zoom 2986L FaxModem
+	- Compaq 56k FaxModem
+	- ELSA Microlink 56k
+
+I know of one ISDN TA that does work with the acm driver:
+
+	- 3Com USR ISDN Pro TA
+
+Some cell phones also connect via USB. I know the following phones work:
+
+	- SonyEricsson K800i
+
+Unfortunately many modems and most ISDN TAs use proprietary interfaces and
+thus won't work with this drivers. Check for ACM compliance before buying.
+
+To use the modems you need these modules loaded::
+
+	usbcore.ko
+	uhci-hcd.ko ohci-hcd.ko or ehci-hcd.ko
+	cdc-acm.ko
+
+After that, the modem[s] should be accessible. You should be able to use
+minicom, ppp and mgetty with them.
+
+2. Verifying that it works
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The first step would be to check /sys/kernel/debug/usb/devices, it should look
+like this::
+
+  T:  Bus=01 Lev=00 Prnt=00 Port=00 Cnt=00 Dev#=  1 Spd=12  MxCh= 2
+  B:  Alloc=  0/900 us ( 0%), #Int=  0, #Iso=  0
+  D:  Ver= 1.00 Cls=09(hub  ) Sub=00 Prot=00 MxPS= 8 #Cfgs=  1
+  P:  Vendor=0000 ProdID=0000 Rev= 0.00
+  S:  Product=USB UHCI Root Hub
+  S:  SerialNumber=6800
+  C:* #Ifs= 1 Cfg#= 1 Atr=40 MxPwr=  0mA
+  I:  If#= 0 Alt= 0 #EPs= 1 Cls=09(hub  ) Sub=00 Prot=00 Driver=hub
+  E:  Ad=81(I) Atr=03(Int.) MxPS=   8 Ivl=255ms
+  T:  Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#=  2 Spd=12  MxCh= 0
+  D:  Ver= 1.00 Cls=02(comm.) Sub=00 Prot=00 MxPS= 8 #Cfgs=  2
+  P:  Vendor=04c1 ProdID=008f Rev= 2.07
+  S:  Manufacturer=3Com Inc.
+  S:  Product=3Com U.S. Robotics Pro ISDN TA
+  S:  SerialNumber=UFT53A49BVT7
+  C:  #Ifs= 1 Cfg#= 1 Atr=60 MxPwr=  0mA
+  I:  If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=acm
+  E:  Ad=85(I) Atr=02(Bulk) MxPS=  64 Ivl=  0ms
+  E:  Ad=04(O) Atr=02(Bulk) MxPS=  64 Ivl=  0ms
+  E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=128ms
+  C:* #Ifs= 2 Cfg#= 2 Atr=60 MxPwr=  0mA
+  I:  If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=02 Prot=01 Driver=acm
+  E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=128ms
+  I:  If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=acm
+  E:  Ad=85(I) Atr=02(Bulk) MxPS=  64 Ivl=  0ms
+  E:  Ad=04(O) Atr=02(Bulk) MxPS=  64 Ivl=  0ms
+
+The presence of these three lines (and the Cls= 'comm' and 'data' classes)
+is important, it means it's an ACM device. The Driver=acm means the acm
+driver is used for the device. If you see only Cls=ff(vend.) then you're out
+of luck, you have a device with vendor specific-interface::
+
+  D:  Ver= 1.00 Cls=02(comm.) Sub=00 Prot=00 MxPS= 8 #Cfgs=  2
+  I:  If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=02 Prot=01 Driver=acm
+  I:  If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=acm
+
+In the system log you should see::
+
+  usb.c: USB new device connect, assigned device number 2
+  usb.c: kmalloc IF c7691fa0, numif 1
+  usb.c: kmalloc IF c7b5f3e0, numif 2
+  usb.c: skipped 4 class/vendor specific interface descriptors
+  usb.c: new device strings: Mfr=1, Product=2, SerialNumber=3
+  usb.c: USB device number 2 default language ID 0x409
+  Manufacturer: 3Com Inc.
+  Product: 3Com U.S. Robotics Pro ISDN TA
+  SerialNumber: UFT53A49BVT7
+  acm.c: probing config 1
+  acm.c: probing config 2
+  ttyACM0: USB ACM device
+  acm.c: acm_control_msg: rq: 0x22 val: 0x0 len: 0x0 result: 0
+  acm.c: acm_control_msg: rq: 0x20 val: 0x0 len: 0x7 result: 7
+  usb.c: acm driver claimed interface c7b5f3e0
+  usb.c: acm driver claimed interface c7b5f3f8
+  usb.c: acm driver claimed interface c7691fa0
+
+If all this seems to be OK, fire up minicom and set it to talk to the ttyACM
+device and try typing 'at'. If it responds with 'OK', then everything is
+working.
diff --git a/Documentation/usb/acm.txt b/Documentation/usb/acm.txt
deleted file mode 100644
index e8bda98e9b51..000000000000
--- a/Documentation/usb/acm.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-======================
-Linux ACM driver v0.16
-======================
-
-Copyright (c) 1999 Vojtech Pavlik <vojtech@suse.cz>
-
-Sponsored by SuSE
-
-0. Disclaimer
-~~~~~~~~~~~~~
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2 of the License, or (at your option)
-any later version.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc., 59
-Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-Should you need to contact me, the author, you can do so either by e-mail -
-mail your message to <vojtech@suse.cz>, or by paper mail: Vojtech Pavlik,
-Ucitelska 1576, Prague 8, 182 00 Czech Republic
-
-For your convenience, the GNU General Public License version 2 is included
-in the package: See the file COPYING.
-
-1. Usage
-~~~~~~~~
-The drivers/usb/class/cdc-acm.c drivers works with USB modems and USB ISDN terminal
-adapters that conform to the Universal Serial Bus Communication Device Class
-Abstract Control Model (USB CDC ACM) specification.
-
-Many modems do, here is a list of those I know of:
-
-	- 3Com OfficeConnect 56k
-	- 3Com Voice FaxModem Pro
-	- 3Com Sportster
-	- MultiTech MultiModem 56k
-	- Zoom 2986L FaxModem
-	- Compaq 56k FaxModem
-	- ELSA Microlink 56k
-
-I know of one ISDN TA that does work with the acm driver:
-
-	- 3Com USR ISDN Pro TA
-
-Some cell phones also connect via USB. I know the following phones work:
-
-	- SonyEricsson K800i
-
-Unfortunately many modems and most ISDN TAs use proprietary interfaces and
-thus won't work with this drivers. Check for ACM compliance before buying.
-
-To use the modems you need these modules loaded::
-
-	usbcore.ko
-	uhci-hcd.ko ohci-hcd.ko or ehci-hcd.ko
-	cdc-acm.ko
-
-After that, the modem[s] should be accessible. You should be able to use
-minicom, ppp and mgetty with them.
-
-2. Verifying that it works
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The first step would be to check /sys/kernel/debug/usb/devices, it should look
-like this::
-
-  T:  Bus=01 Lev=00 Prnt=00 Port=00 Cnt=00 Dev#=  1 Spd=12  MxCh= 2
-  B:  Alloc=  0/900 us ( 0%), #Int=  0, #Iso=  0
-  D:  Ver= 1.00 Cls=09(hub  ) Sub=00 Prot=00 MxPS= 8 #Cfgs=  1
-  P:  Vendor=0000 ProdID=0000 Rev= 0.00
-  S:  Product=USB UHCI Root Hub
-  S:  SerialNumber=6800
-  C:* #Ifs= 1 Cfg#= 1 Atr=40 MxPwr=  0mA
-  I:  If#= 0 Alt= 0 #EPs= 1 Cls=09(hub  ) Sub=00 Prot=00 Driver=hub
-  E:  Ad=81(I) Atr=03(Int.) MxPS=   8 Ivl=255ms
-  T:  Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#=  2 Spd=12  MxCh= 0
-  D:  Ver= 1.00 Cls=02(comm.) Sub=00 Prot=00 MxPS= 8 #Cfgs=  2
-  P:  Vendor=04c1 ProdID=008f Rev= 2.07
-  S:  Manufacturer=3Com Inc.
-  S:  Product=3Com U.S. Robotics Pro ISDN TA
-  S:  SerialNumber=UFT53A49BVT7
-  C:  #Ifs= 1 Cfg#= 1 Atr=60 MxPwr=  0mA
-  I:  If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=acm
-  E:  Ad=85(I) Atr=02(Bulk) MxPS=  64 Ivl=  0ms
-  E:  Ad=04(O) Atr=02(Bulk) MxPS=  64 Ivl=  0ms
-  E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=128ms
-  C:* #Ifs= 2 Cfg#= 2 Atr=60 MxPwr=  0mA
-  I:  If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=02 Prot=01 Driver=acm
-  E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=128ms
-  I:  If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=acm
-  E:  Ad=85(I) Atr=02(Bulk) MxPS=  64 Ivl=  0ms
-  E:  Ad=04(O) Atr=02(Bulk) MxPS=  64 Ivl=  0ms
-
-The presence of these three lines (and the Cls= 'comm' and 'data' classes)
-is important, it means it's an ACM device. The Driver=acm means the acm
-driver is used for the device. If you see only Cls=ff(vend.) then you're out
-of luck, you have a device with vendor specific-interface::
-
-  D:  Ver= 1.00 Cls=02(comm.) Sub=00 Prot=00 MxPS= 8 #Cfgs=  2
-  I:  If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=02 Prot=01 Driver=acm
-  I:  If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=acm
-
-In the system log you should see::
-
-  usb.c: USB new device connect, assigned device number 2
-  usb.c: kmalloc IF c7691fa0, numif 1
-  usb.c: kmalloc IF c7b5f3e0, numif 2
-  usb.c: skipped 4 class/vendor specific interface descriptors
-  usb.c: new device strings: Mfr=1, Product=2, SerialNumber=3
-  usb.c: USB device number 2 default language ID 0x409
-  Manufacturer: 3Com Inc.
-  Product: 3Com U.S. Robotics Pro ISDN TA
-  SerialNumber: UFT53A49BVT7
-  acm.c: probing config 1
-  acm.c: probing config 2
-  ttyACM0: USB ACM device
-  acm.c: acm_control_msg: rq: 0x22 val: 0x0 len: 0x0 result: 0
-  acm.c: acm_control_msg: rq: 0x20 val: 0x0 len: 0x7 result: 7
-  usb.c: acm driver claimed interface c7b5f3e0
-  usb.c: acm driver claimed interface c7b5f3f8
-  usb.c: acm driver claimed interface c7691fa0
-
-If all this seems to be OK, fire up minicom and set it to talk to the ttyACM
-device and try typing 'at'. If it responds with 'OK', then everything is
-working.
diff --git a/Documentation/usb/authorization.rst b/Documentation/usb/authorization.rst
new file mode 100644
index 000000000000..9e53909d04c2
--- /dev/null
+++ b/Documentation/usb/authorization.rst
@@ -0,0 +1,132 @@
+==============================================================
+Authorizing (or not) your USB devices to connect to the system
+==============================================================
+
+Copyright (C) 2007 Inaky Perez-Gonzalez <inaky@linux.intel.com> Intel Corporation
+
+This feature allows you to control if a USB device can be used (or
+not) in a system. This feature will allow you to implement a lock-down
+of USB devices, fully controlled by user space.
+
+As of now, when a USB device is connected it is configured and
+its interfaces are immediately made available to the users.  With this
+modification, only if root authorizes the device to be configured will
+then it be possible to use it.
+
+Usage
+=====
+
+Authorize a device to connect::
+
+	$ echo 1 > /sys/bus/usb/devices/DEVICE/authorized
+
+De-authorize a device::
+
+	$ echo 0 > /sys/bus/usb/devices/DEVICE/authorized
+
+Set new devices connected to hostX to be deauthorized by default (ie:
+lock down)::
+
+	$ echo 0 > /sys/bus/usb/devices/usbX/authorized_default
+
+Remove the lock down::
+
+	$ echo 1 > /sys/bus/usb/devices/usbX/authorized_default
+
+By default, Wired USB devices are authorized by default to
+connect. Wireless USB hosts deauthorize by default all new connected
+devices (this is so because we need to do an authentication phase
+before authorizing). Writing "2" to the authorized_default attribute
+causes kernel to only authorize by default devices connected to internal
+USB ports.
+
+
+Example system lockdown (lame)
+------------------------------
+
+Imagine you want to implement a lockdown so only devices of type XYZ
+can be connected (for example, it is a kiosk machine with a visible
+USB port)::
+
+  boot up
+  rc.local ->
+
+   for host in /sys/bus/usb/devices/usb*
+   do
+      echo 0 > $host/authorized_default
+   done
+
+Hookup an script to udev, for new USB devices::
+
+ if device_is_my_type $DEV
+ then
+   echo 1 > $device_path/authorized
+ done
+
+
+Now, device_is_my_type() is where the juice for a lockdown is. Just
+checking if the class, type and protocol match something is the worse
+security verification you can make (or the best, for someone willing
+to break it). If you need something secure, use crypto and Certificate
+Authentication or stuff like that. Something simple for an storage key
+could be::
+
+ function device_is_my_type()
+ {
+   echo 1 > authorized		# temporarily authorize it
+                                # FIXME: make sure none can mount it
+   mount DEVICENODE /mntpoint
+   sum=$(md5sum /mntpoint/.signature)
+   if [ $sum = $(cat /etc/lockdown/keysum) ]
+   then
+        echo "We are good, connected"
+        umount /mntpoint
+        # Other stuff so others can use it
+   else
+        echo 0 > authorized
+   fi
+ }
+
+
+Of course, this is lame, you'd want to do a real certificate
+verification stuff with PKI, so you don't depend on a shared secret,
+etc, but you get the idea. Anybody with access to a device gadget kit
+can fake descriptors and device info. Don't trust that. You are
+welcome.
+
+
+Interface authorization
+-----------------------
+
+There is a similar approach to allow or deny specific USB interfaces.
+That allows to block only a subset of an USB device.
+
+Authorize an interface::
+
+	$ echo 1 > /sys/bus/usb/devices/INTERFACE/authorized
+
+Deauthorize an interface::
+
+	$ echo 0 > /sys/bus/usb/devices/INTERFACE/authorized
+
+The default value for new interfaces
+on a particular USB bus can be changed, too.
+
+Allow interfaces per default::
+
+	$ echo 1 > /sys/bus/usb/devices/usbX/interface_authorized_default
+
+Deny interfaces per default::
+
+	$ echo 0 > /sys/bus/usb/devices/usbX/interface_authorized_default
+
+Per default the interface_authorized_default bit is 1.
+So all interfaces would authorized per default.
+
+Note:
+  If a deauthorized interface will be authorized so the driver probing must
+  be triggered manually by writing INTERFACE to /sys/bus/usb/drivers_probe
+
+For drivers that need multiple interfaces all needed interfaces should be
+authorized first. After that the drivers should be probed.
+This avoids side effects.
diff --git a/Documentation/usb/authorization.txt b/Documentation/usb/authorization.txt
deleted file mode 100644
index 9e53909d04c2..000000000000
--- a/Documentation/usb/authorization.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-==============================================================
-Authorizing (or not) your USB devices to connect to the system
-==============================================================
-
-Copyright (C) 2007 Inaky Perez-Gonzalez <inaky@linux.intel.com> Intel Corporation
-
-This feature allows you to control if a USB device can be used (or
-not) in a system. This feature will allow you to implement a lock-down
-of USB devices, fully controlled by user space.
-
-As of now, when a USB device is connected it is configured and
-its interfaces are immediately made available to the users.  With this
-modification, only if root authorizes the device to be configured will
-then it be possible to use it.
-
-Usage
-=====
-
-Authorize a device to connect::
-
-	$ echo 1 > /sys/bus/usb/devices/DEVICE/authorized
-
-De-authorize a device::
-
-	$ echo 0 > /sys/bus/usb/devices/DEVICE/authorized
-
-Set new devices connected to hostX to be deauthorized by default (ie:
-lock down)::
-
-	$ echo 0 > /sys/bus/usb/devices/usbX/authorized_default
-
-Remove the lock down::
-
-	$ echo 1 > /sys/bus/usb/devices/usbX/authorized_default
-
-By default, Wired USB devices are authorized by default to
-connect. Wireless USB hosts deauthorize by default all new connected
-devices (this is so because we need to do an authentication phase
-before authorizing). Writing "2" to the authorized_default attribute
-causes kernel to only authorize by default devices connected to internal
-USB ports.
-
-
-Example system lockdown (lame)
-------------------------------
-
-Imagine you want to implement a lockdown so only devices of type XYZ
-can be connected (for example, it is a kiosk machine with a visible
-USB port)::
-
-  boot up
-  rc.local ->
-
-   for host in /sys/bus/usb/devices/usb*
-   do
-      echo 0 > $host/authorized_default
-   done
-
-Hookup an script to udev, for new USB devices::
-
- if device_is_my_type $DEV
- then
-   echo 1 > $device_path/authorized
- done
-
-
-Now, device_is_my_type() is where the juice for a lockdown is. Just
-checking if the class, type and protocol match something is the worse
-security verification you can make (or the best, for someone willing
-to break it). If you need something secure, use crypto and Certificate
-Authentication or stuff like that. Something simple for an storage key
-could be::
-
- function device_is_my_type()
- {
-   echo 1 > authorized		# temporarily authorize it
-                                # FIXME: make sure none can mount it
-   mount DEVICENODE /mntpoint
-   sum=$(md5sum /mntpoint/.signature)
-   if [ $sum = $(cat /etc/lockdown/keysum) ]
-   then
-        echo "We are good, connected"
-        umount /mntpoint
-        # Other stuff so others can use it
-   else
-        echo 0 > authorized
-   fi
- }
-
-
-Of course, this is lame, you'd want to do a real certificate
-verification stuff with PKI, so you don't depend on a shared secret,
-etc, but you get the idea. Anybody with access to a device gadget kit
-can fake descriptors and device info. Don't trust that. You are
-welcome.
-
-
-Interface authorization
------------------------
-
-There is a similar approach to allow or deny specific USB interfaces.
-That allows to block only a subset of an USB device.
-
-Authorize an interface::
-
-	$ echo 1 > /sys/bus/usb/devices/INTERFACE/authorized
-
-Deauthorize an interface::
-
-	$ echo 0 > /sys/bus/usb/devices/INTERFACE/authorized
-
-The default value for new interfaces
-on a particular USB bus can be changed, too.
-
-Allow interfaces per default::
-
-	$ echo 1 > /sys/bus/usb/devices/usbX/interface_authorized_default
-
-Deny interfaces per default::
-
-	$ echo 0 > /sys/bus/usb/devices/usbX/interface_authorized_default
-
-Per default the interface_authorized_default bit is 1.
-So all interfaces would authorized per default.
-
-Note:
-  If a deauthorized interface will be authorized so the driver probing must
-  be triggered manually by writing INTERFACE to /sys/bus/usb/drivers_probe
-
-For drivers that need multiple interfaces all needed interfaces should be
-authorized first. After that the drivers should be probed.
-This avoids side effects.
diff --git a/Documentation/usb/chipidea.rst b/Documentation/usb/chipidea.rst
new file mode 100644
index 000000000000..68473abe2823
--- /dev/null
+++ b/Documentation/usb/chipidea.rst
@@ -0,0 +1,133 @@
+==============================================
+ChipIdea Highspeed Dual Role Controller Driver
+==============================================
+
+1. How to test OTG FSM(HNP and SRP)
+-----------------------------------
+
+To show how to demo OTG HNP and SRP functions via sys input files
+with 2 Freescale i.MX6Q sabre SD boards.
+
+1.1 How to enable OTG FSM
+-------------------------
+
+1.1.1 Select CONFIG_USB_OTG_FSM in menuconfig, rebuild kernel
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Image and modules. If you want to check some internal
+variables for otg fsm, mount debugfs, there are 2 files
+which can show otg fsm variables and some controller registers value::
+
+	cat /sys/kernel/debug/ci_hdrc.0/otg
+	cat /sys/kernel/debug/ci_hdrc.0/registers
+
+1.1.2 Add below entries in your dts file for your controller node
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+::
+
+	otg-rev = <0x0200>;
+	adp-disable;
+
+1.2 Test operations
+-------------------
+
+1) Power up 2 Freescale i.MX6Q sabre SD boards with gadget class driver loaded
+   (e.g. g_mass_storage).
+
+2) Connect 2 boards with usb cable with one end is micro A plug, the other end
+   is micro B plug.
+
+   The A-device(with micro A plug inserted) should enumerate B-device.
+
+3) Role switch
+
+   On B-device::
+
+	echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
+
+   B-device should take host role and enumerate A-device.
+
+4) A-device switch back to host.
+
+   On B-device::
+
+	echo 0 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
+
+   or, by introducing HNP polling, B-Host can know when A-peripheral wish
+   to be host role, so this role switch also can be trigged in A-peripheral
+   side by answering the polling from B-Host, this can be done on A-device::
+
+	echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_req
+
+   A-device should switch back to host and enumerate B-device.
+
+5) Remove B-device(unplug micro B plug) and insert again in 10 seconds,
+   A-device should enumerate B-device again.
+
+6) Remove B-device(unplug micro B plug) and insert again after 10 seconds,
+   A-device should NOT enumerate B-device.
+
+   if A-device wants to use bus:
+
+   On A-device::
+
+	echo 0 > /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_drop
+	echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_req
+
+   if B-device wants to use bus:
+
+   On B-device::
+
+	echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
+
+7) A-device power down the bus.
+
+   On A-device::
+
+	echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_drop
+
+   A-device should disconnect with B-device and power down the bus.
+
+8) B-device does data pulse for SRP.
+
+   On B-device::
+
+	echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
+
+   A-device should resume usb bus and enumerate B-device.
+
+1.3 Reference document
+----------------------
+"On-The-Go and Embedded Host Supplement to the USB Revision 2.0 Specification
+July 27, 2012 Revision 2.0 version 1.1a"
+
+2. How to enable USB as system wakeup source
+--------------------------------------------
+Below is the example for how to enable USB as system wakeup source
+at imx6 platform.
+
+2.1 Enable core's wakeup::
+
+	echo enabled > /sys/bus/platform/devices/ci_hdrc.0/power/wakeup
+
+2.2 Enable glue layer's wakeup::
+
+	echo enabled > /sys/bus/platform/devices/2184000.usb/power/wakeup
+
+2.3 Enable PHY's wakeup (optional)::
+
+	echo enabled > /sys/bus/platform/devices/20c9000.usbphy/power/wakeup
+
+2.4 Enable roothub's wakeup::
+
+	echo enabled > /sys/bus/usb/devices/usb1/power/wakeup
+
+2.5 Enable related device's wakeup::
+
+	echo enabled > /sys/bus/usb/devices/1-1/power/wakeup
+
+If the system has only one usb port, and you want usb wakeup at this port, you
+can use below script to enable usb wakeup::
+
+	for i in $(find /sys -name wakeup | grep usb);do echo enabled > $i;done;
diff --git a/Documentation/usb/chipidea.txt b/Documentation/usb/chipidea.txt
deleted file mode 100644
index 68473abe2823..000000000000
--- a/Documentation/usb/chipidea.txt
+++ /dev/null
@@ -1,133 +0,0 @@
-==============================================
-ChipIdea Highspeed Dual Role Controller Driver
-==============================================
-
-1. How to test OTG FSM(HNP and SRP)
------------------------------------
-
-To show how to demo OTG HNP and SRP functions via sys input files
-with 2 Freescale i.MX6Q sabre SD boards.
-
-1.1 How to enable OTG FSM
--------------------------
-
-1.1.1 Select CONFIG_USB_OTG_FSM in menuconfig, rebuild kernel
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Image and modules. If you want to check some internal
-variables for otg fsm, mount debugfs, there are 2 files
-which can show otg fsm variables and some controller registers value::
-
-	cat /sys/kernel/debug/ci_hdrc.0/otg
-	cat /sys/kernel/debug/ci_hdrc.0/registers
-
-1.1.2 Add below entries in your dts file for your controller node
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-::
-
-	otg-rev = <0x0200>;
-	adp-disable;
-
-1.2 Test operations
--------------------
-
-1) Power up 2 Freescale i.MX6Q sabre SD boards with gadget class driver loaded
-   (e.g. g_mass_storage).
-
-2) Connect 2 boards with usb cable with one end is micro A plug, the other end
-   is micro B plug.
-
-   The A-device(with micro A plug inserted) should enumerate B-device.
-
-3) Role switch
-
-   On B-device::
-
-	echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
-
-   B-device should take host role and enumerate A-device.
-
-4) A-device switch back to host.
-
-   On B-device::
-
-	echo 0 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
-
-   or, by introducing HNP polling, B-Host can know when A-peripheral wish
-   to be host role, so this role switch also can be trigged in A-peripheral
-   side by answering the polling from B-Host, this can be done on A-device::
-
-	echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_req
-
-   A-device should switch back to host and enumerate B-device.
-
-5) Remove B-device(unplug micro B plug) and insert again in 10 seconds,
-   A-device should enumerate B-device again.
-
-6) Remove B-device(unplug micro B plug) and insert again after 10 seconds,
-   A-device should NOT enumerate B-device.
-
-   if A-device wants to use bus:
-
-   On A-device::
-
-	echo 0 > /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_drop
-	echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_req
-
-   if B-device wants to use bus:
-
-   On B-device::
-
-	echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
-
-7) A-device power down the bus.
-
-   On A-device::
-
-	echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_drop
-
-   A-device should disconnect with B-device and power down the bus.
-
-8) B-device does data pulse for SRP.
-
-   On B-device::
-
-	echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
-
-   A-device should resume usb bus and enumerate B-device.
-
-1.3 Reference document
-----------------------
-"On-The-Go and Embedded Host Supplement to the USB Revision 2.0 Specification
-July 27, 2012 Revision 2.0 version 1.1a"
-
-2. How to enable USB as system wakeup source
---------------------------------------------
-Below is the example for how to enable USB as system wakeup source
-at imx6 platform.
-
-2.1 Enable core's wakeup::
-
-	echo enabled > /sys/bus/platform/devices/ci_hdrc.0/power/wakeup
-
-2.2 Enable glue layer's wakeup::
-
-	echo enabled > /sys/bus/platform/devices/2184000.usb/power/wakeup
-
-2.3 Enable PHY's wakeup (optional)::
-
-	echo enabled > /sys/bus/platform/devices/20c9000.usbphy/power/wakeup
-
-2.4 Enable roothub's wakeup::
-
-	echo enabled > /sys/bus/usb/devices/usb1/power/wakeup
-
-2.5 Enable related device's wakeup::
-
-	echo enabled > /sys/bus/usb/devices/1-1/power/wakeup
-
-If the system has only one usb port, and you want usb wakeup at this port, you
-can use below script to enable usb wakeup::
-
-	for i in $(find /sys -name wakeup | grep usb);do echo enabled > $i;done;
diff --git a/Documentation/usb/dwc3.rst b/Documentation/usb/dwc3.rst
new file mode 100644
index 000000000000..f94a7ba16573
--- /dev/null
+++ b/Documentation/usb/dwc3.rst
@@ -0,0 +1,53 @@
+===========
+DWC3 driver
+===========
+
+
+TODO
+~~~~
+
+Please pick something while reading :)
+
+- Convert interrupt handler to per-ep-thread-irq
+
+  As it turns out some DWC3-commands ~1ms to complete. Currently we spin
+  until the command completes which is bad.
+
+  Implementation idea:
+
+  - dwc core implements a demultiplexing irq chip for interrupts per
+    endpoint. The interrupt numbers are allocated during probe and belong
+    to the device. If MSI provides per-endpoint interrupt this dummy
+    interrupt chip can be replaced with "real" interrupts.
+  - interrupts are requested / allocated on usb_ep_enable() and removed on
+    usb_ep_disable(). Worst case are 32 interrupts, the lower limit is two
+    for ep0/1.
+  - dwc3_send_gadget_ep_cmd() will sleep in wait_for_completion_timeout()
+    until the command completes.
+  - the interrupt handler is split into the following pieces:
+
+    - primary handler of the device
+      goes through every event and calls generic_handle_irq() for event
+      it. On return from generic_handle_irq() in acknowledges the event
+      counter so interrupt goes away (eventually).
+
+    - threaded handler of the device
+      none
+
+    - primary handler of the EP-interrupt
+      reads the event and tries to process it. Everything that requires
+      sleeping is handed over to the Thread. The event is saved in an
+      per-endpoint data-structure.
+      We probably have to pay attention not to process events once we
+      handed something to thread so we don't process event X prio Y
+      where X > Y.
+
+    - threaded handler of the EP-interrupt
+      handles the remaining EP work which might sleep such as waiting
+      for command completion.
+
+  Latency:
+
+   There should be no increase in latency since the interrupt-thread has a
+   high priority and will be run before an average task in user land
+   (except the user changed priorities).
diff --git a/Documentation/usb/dwc3.txt b/Documentation/usb/dwc3.txt
deleted file mode 100644
index f94a7ba16573..000000000000
--- a/Documentation/usb/dwc3.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-===========
-DWC3 driver
-===========
-
-
-TODO
-~~~~
-
-Please pick something while reading :)
-
-- Convert interrupt handler to per-ep-thread-irq
-
-  As it turns out some DWC3-commands ~1ms to complete. Currently we spin
-  until the command completes which is bad.
-
-  Implementation idea:
-
-  - dwc core implements a demultiplexing irq chip for interrupts per
-    endpoint. The interrupt numbers are allocated during probe and belong
-    to the device. If MSI provides per-endpoint interrupt this dummy
-    interrupt chip can be replaced with "real" interrupts.
-  - interrupts are requested / allocated on usb_ep_enable() and removed on
-    usb_ep_disable(). Worst case are 32 interrupts, the lower limit is two
-    for ep0/1.
-  - dwc3_send_gadget_ep_cmd() will sleep in wait_for_completion_timeout()
-    until the command completes.
-  - the interrupt handler is split into the following pieces:
-
-    - primary handler of the device
-      goes through every event and calls generic_handle_irq() for event
-      it. On return from generic_handle_irq() in acknowledges the event
-      counter so interrupt goes away (eventually).
-
-    - threaded handler of the device
-      none
-
-    - primary handler of the EP-interrupt
-      reads the event and tries to process it. Everything that requires
-      sleeping is handed over to the Thread. The event is saved in an
-      per-endpoint data-structure.
-      We probably have to pay attention not to process events once we
-      handed something to thread so we don't process event X prio Y
-      where X > Y.
-
-    - threaded handler of the EP-interrupt
-      handles the remaining EP work which might sleep such as waiting
-      for command completion.
-
-  Latency:
-
-   There should be no increase in latency since the interrupt-thread has a
-   high priority and will be run before an average task in user land
-   (except the user changed priorities).
diff --git a/Documentation/usb/ehci.rst b/Documentation/usb/ehci.rst
new file mode 100644
index 000000000000..31f650e7c1b4
--- /dev/null
+++ b/Documentation/usb/ehci.rst
@@ -0,0 +1,230 @@
+﻿===========
+EHCI driver
+===========
+
+27-Dec-2002
+
+The EHCI driver is used to talk to high speed USB 2.0 devices using
+USB 2.0-capable host controller hardware.  The USB 2.0 standard is
+compatible with the USB 1.1 standard. It defines three transfer speeds:
+
+    - "High Speed" 480 Mbit/sec (60 MByte/sec)
+    - "Full Speed" 12 Mbit/sec (1.5 MByte/sec)
+    - "Low Speed" 1.5 Mbit/sec
+
+USB 1.1 only addressed full speed and low speed.  High speed devices
+can be used on USB 1.1 systems, but they slow down to USB 1.1 speeds.
+
+USB 1.1 devices may also be used on USB 2.0 systems.  When plugged
+into an EHCI controller, they are given to a USB 1.1 "companion"
+controller, which is a OHCI or UHCI controller as normally used with
+such devices.  When USB 1.1 devices plug into USB 2.0 hubs, they
+interact with the EHCI controller through a "Transaction Translator"
+(TT) in the hub, which turns low or full speed transactions into
+high speed "split transactions" that don't waste transfer bandwidth.
+
+At this writing, this driver has been seen to work with implementations
+of EHCI from (in alphabetical order):  Intel, NEC, Philips, and VIA.
+Other EHCI implementations are becoming available from other vendors;
+you should expect this driver to work with them too.
+
+While usb-storage devices have been available since mid-2001 (working
+quite speedily on the 2.4 version of this driver), hubs have only
+been available since late 2001, and other kinds of high speed devices
+appear to be on hold until more systems come with USB 2.0 built-in.
+Such new systems have been available since early 2002, and became much
+more typical in the second half of 2002.
+
+Note that USB 2.0 support involves more than just EHCI.  It requires
+other changes to the Linux-USB core APIs, including the hub driver,
+but those changes haven't needed to really change the basic "usbcore"
+APIs exposed to USB device drivers.
+
+- David Brownell
+  <dbrownell@users.sourceforge.net>
+
+
+Functionality
+=============
+
+This driver is regularly tested on x86 hardware, and has also been
+used on PPC hardware so big/little endianness issues should be gone.
+It's believed to do all the right PCI magic so that I/O works even on
+systems with interesting DMA mapping issues.
+
+Transfer Types
+--------------
+
+At this writing the driver should comfortably handle all control, bulk,
+and interrupt transfers, including requests to USB 1.1 devices through
+transaction translators (TTs) in USB 2.0 hubs.  But you may find bugs.
+
+High Speed Isochronous (ISO) transfer support is also functional, but
+at this writing no Linux drivers have been using that support.
+
+Full Speed Isochronous transfer support, through transaction translators,
+is not yet available.  Note that split transaction support for ISO
+transfers can't share much code with the code for high speed ISO transfers,
+since EHCI represents these with a different data structure.  So for now,
+most USB audio and video devices can't be connected to high speed buses.
+
+Driver Behavior
+---------------
+
+Transfers of all types can be queued.  This means that control transfers
+from a driver on one interface (or through usbfs) won't interfere with
+ones from another driver, and that interrupt transfers can use periods
+of one frame without risking data loss due to interrupt processing costs.
+
+The EHCI root hub code hands off USB 1.1 devices to its companion
+controller.  This driver doesn't need to know anything about those
+drivers; a OHCI or UHCI driver that works already doesn't need to change
+just because the EHCI driver is also present.
+
+There are some issues with power management; suspend/resume doesn't
+behave quite right at the moment.
+
+Also, some shortcuts have been taken with the scheduling periodic
+transactions (interrupt and isochronous transfers).  These place some
+limits on the number of periodic transactions that can be scheduled,
+and prevent use of polling intervals of less than one frame.
+
+
+Use by
+======
+
+Assuming you have an EHCI controller (on a PCI card or motherboard)
+and have compiled this driver as a module, load this like::
+
+    # modprobe ehci-hcd
+
+and remove it by::
+
+    # rmmod ehci-hcd
+
+You should also have a driver for a "companion controller", such as
+"ohci-hcd"  or "uhci-hcd".  In case of any trouble with the EHCI driver,
+remove its module and then the driver for that companion controller will
+take over (at lower speed) all the devices that were previously handled
+by the EHCI driver.
+
+Module parameters (pass to "modprobe") include:
+
+    log2_irq_thresh (default 0):
+	Log2 of default interrupt delay, in microframes.  The default
+	value is 0, indicating 1 microframe (125 usec).  Maximum value
+	is 6, indicating 2^6 = 64 microframes.  This controls how often
+	the EHCI controller can issue interrupts.
+
+If you're using this driver on a 2.5 kernel, and you've enabled USB
+debugging support, you'll see three files in the "sysfs" directory for
+any EHCI controller:
+
+	"async"
+		dumps the asynchronous schedule, used for control
+		and bulk transfers.  Shows each active qh and the qtds
+		pending, usually one qtd per urb.  (Look at it with
+		usb-storage doing disk I/O; watch the request queues!)
+	"periodic"
+		dumps the periodic schedule, used for interrupt
+		and isochronous transfers.  Doesn't show qtds.
+	"registers"
+		show controller register state, and
+
+The contents of those files can help identify driver problems.
+
+
+Device drivers shouldn't care whether they're running over EHCI or not,
+but they may want to check for "usb_device->speed == USB_SPEED_HIGH".
+High speed devices can do things that full speed (or low speed) ones
+can't, such as "high bandwidth" periodic (interrupt or ISO) transfers.
+Also, some values in device descriptors (such as polling intervals for
+periodic transfers) use different encodings when operating at high speed.
+
+However, do make a point of testing device drivers through USB 2.0 hubs.
+Those hubs report some failures, such as disconnections, differently when
+transaction translators are in use; some drivers have been seen to behave
+badly when they see different faults than OHCI or UHCI report.
+
+
+Performance
+===========
+
+USB 2.0 throughput is gated by two main factors:  how fast the host
+controller can process requests, and how fast devices can respond to
+them.  The 480 Mbit/sec "raw transfer rate" is obeyed by all devices,
+but aggregate throughput is also affected by issues like delays between
+individual high speed packets, driver intelligence, and of course the
+overall system load.  Latency is also a performance concern.
+
+Bulk transfers are most often used where throughput is an issue.  It's
+good to keep in mind that bulk transfers are always in 512 byte packets,
+and at most 13 of those fit into one USB 2.0 microframe.  Eight USB 2.0
+microframes fit in a USB 1.1 frame; a microframe is 1 msec/8 = 125 usec.
+
+So more than 50 MByte/sec is available for bulk transfers, when both
+hardware and device driver software allow it.  Periodic transfer modes
+(isochronous and interrupt) allow the larger packet sizes which let you
+approach the quoted 480 MBit/sec transfer rate.
+
+Hardware Performance
+--------------------
+
+At this writing, individual USB 2.0 devices tend to max out at around
+20 MByte/sec transfer rates.  This is of course subject to change;
+and some devices now go faster, while others go slower.
+
+The first NEC implementation of EHCI seems to have a hardware bottleneck
+at around 28 MByte/sec aggregate transfer rate.  While this is clearly
+enough for a single device at 20 MByte/sec, putting three such devices
+onto one bus does not get you 60 MByte/sec.  The issue appears to be
+that the controller hardware won't do concurrent USB and PCI access,
+so that it's only trying six (or maybe seven) USB transactions each
+microframe rather than thirteen.  (Seems like a reasonable trade off
+for a product that beat all the others to market by over a year!)
+
+It's expected that newer implementations will better this, throwing
+more silicon real estate at the problem so that new motherboard chip
+sets will get closer to that 60 MByte/sec target.  That includes an
+updated implementation from NEC, as well as other vendors' silicon.
+
+There's a minimum latency of one microframe (125 usec) for the host
+to receive interrupts from the EHCI controller indicating completion
+of requests.  That latency is tunable; there's a module option.  By
+default ehci-hcd driver uses the minimum latency, which means that if
+you issue a control or bulk request you can often expect to learn that
+it completed in less than 250 usec (depending on transfer size).
+
+Software Performance
+--------------------
+
+To get even 20 MByte/sec transfer rates, Linux-USB device drivers will
+need to keep the EHCI queue full.  That means issuing large requests,
+or using bulk queuing if a series of small requests needs to be issued.
+When drivers don't do that, their performance results will show it.
+
+In typical situations, a usb_bulk_msg() loop writing out 4 KB chunks is
+going to waste more than half the USB 2.0 bandwidth.  Delays between the
+I/O completion and the driver issuing the next request will take longer
+than the I/O.  If that same loop used 16 KB chunks, it'd be better; a
+sequence of 128 KB chunks would waste a lot less.
+
+But rather than depending on such large I/O buffers to make synchronous
+I/O be efficient, it's better to just queue up several (bulk) requests
+to the HC, and wait for them all to complete (or be canceled on error).
+Such URB queuing should work with all the USB 1.1 HC drivers too.
+
+In the Linux 2.5 kernels, new usb_sg_*() api calls have been defined; they
+queue all the buffers from a scatterlist.  They also use scatterlist DMA
+mapping (which might apply an IOMMU) and IRQ reduction, all of which will
+help make high speed transfers run as fast as they can.
+
+
+TBD:
+   Interrupt and ISO transfer performance issues.  Those periodic
+   transfers are fully scheduled, so the main issue is likely to be how
+   to trigger "high bandwidth" modes.
+
+TBD:
+   More than standard 80% periodic bandwidth allocation is possible
+   through sysfs uframe_periodic_max parameter. Describe that.
diff --git a/Documentation/usb/ehci.txt b/Documentation/usb/ehci.txt
deleted file mode 100644
index 31f650e7c1b4..000000000000
--- a/Documentation/usb/ehci.txt
+++ /dev/null
@@ -1,230 +0,0 @@
-﻿===========
-EHCI driver
-===========
-
-27-Dec-2002
-
-The EHCI driver is used to talk to high speed USB 2.0 devices using
-USB 2.0-capable host controller hardware.  The USB 2.0 standard is
-compatible with the USB 1.1 standard. It defines three transfer speeds:
-
-    - "High Speed" 480 Mbit/sec (60 MByte/sec)
-    - "Full Speed" 12 Mbit/sec (1.5 MByte/sec)
-    - "Low Speed" 1.5 Mbit/sec
-
-USB 1.1 only addressed full speed and low speed.  High speed devices
-can be used on USB 1.1 systems, but they slow down to USB 1.1 speeds.
-
-USB 1.1 devices may also be used on USB 2.0 systems.  When plugged
-into an EHCI controller, they are given to a USB 1.1 "companion"
-controller, which is a OHCI or UHCI controller as normally used with
-such devices.  When USB 1.1 devices plug into USB 2.0 hubs, they
-interact with the EHCI controller through a "Transaction Translator"
-(TT) in the hub, which turns low or full speed transactions into
-high speed "split transactions" that don't waste transfer bandwidth.
-
-At this writing, this driver has been seen to work with implementations
-of EHCI from (in alphabetical order):  Intel, NEC, Philips, and VIA.
-Other EHCI implementations are becoming available from other vendors;
-you should expect this driver to work with them too.
-
-While usb-storage devices have been available since mid-2001 (working
-quite speedily on the 2.4 version of this driver), hubs have only
-been available since late 2001, and other kinds of high speed devices
-appear to be on hold until more systems come with USB 2.0 built-in.
-Such new systems have been available since early 2002, and became much
-more typical in the second half of 2002.
-
-Note that USB 2.0 support involves more than just EHCI.  It requires
-other changes to the Linux-USB core APIs, including the hub driver,
-but those changes haven't needed to really change the basic "usbcore"
-APIs exposed to USB device drivers.
-
-- David Brownell
-  <dbrownell@users.sourceforge.net>
-
-
-Functionality
-=============
-
-This driver is regularly tested on x86 hardware, and has also been
-used on PPC hardware so big/little endianness issues should be gone.
-It's believed to do all the right PCI magic so that I/O works even on
-systems with interesting DMA mapping issues.
-
-Transfer Types
---------------
-
-At this writing the driver should comfortably handle all control, bulk,
-and interrupt transfers, including requests to USB 1.1 devices through
-transaction translators (TTs) in USB 2.0 hubs.  But you may find bugs.
-
-High Speed Isochronous (ISO) transfer support is also functional, but
-at this writing no Linux drivers have been using that support.
-
-Full Speed Isochronous transfer support, through transaction translators,
-is not yet available.  Note that split transaction support for ISO
-transfers can't share much code with the code for high speed ISO transfers,
-since EHCI represents these with a different data structure.  So for now,
-most USB audio and video devices can't be connected to high speed buses.
-
-Driver Behavior
----------------
-
-Transfers of all types can be queued.  This means that control transfers
-from a driver on one interface (or through usbfs) won't interfere with
-ones from another driver, and that interrupt transfers can use periods
-of one frame without risking data loss due to interrupt processing costs.
-
-The EHCI root hub code hands off USB 1.1 devices to its companion
-controller.  This driver doesn't need to know anything about those
-drivers; a OHCI or UHCI driver that works already doesn't need to change
-just because the EHCI driver is also present.
-
-There are some issues with power management; suspend/resume doesn't
-behave quite right at the moment.
-
-Also, some shortcuts have been taken with the scheduling periodic
-transactions (interrupt and isochronous transfers).  These place some
-limits on the number of periodic transactions that can be scheduled,
-and prevent use of polling intervals of less than one frame.
-
-
-Use by
-======
-
-Assuming you have an EHCI controller (on a PCI card or motherboard)
-and have compiled this driver as a module, load this like::
-
-    # modprobe ehci-hcd
-
-and remove it by::
-
-    # rmmod ehci-hcd
-
-You should also have a driver for a "companion controller", such as
-"ohci-hcd"  or "uhci-hcd".  In case of any trouble with the EHCI driver,
-remove its module and then the driver for that companion controller will
-take over (at lower speed) all the devices that were previously handled
-by the EHCI driver.
-
-Module parameters (pass to "modprobe") include:
-
-    log2_irq_thresh (default 0):
-	Log2 of default interrupt delay, in microframes.  The default
-	value is 0, indicating 1 microframe (125 usec).  Maximum value
-	is 6, indicating 2^6 = 64 microframes.  This controls how often
-	the EHCI controller can issue interrupts.
-
-If you're using this driver on a 2.5 kernel, and you've enabled USB
-debugging support, you'll see three files in the "sysfs" directory for
-any EHCI controller:
-
-	"async"
-		dumps the asynchronous schedule, used for control
-		and bulk transfers.  Shows each active qh and the qtds
-		pending, usually one qtd per urb.  (Look at it with
-		usb-storage doing disk I/O; watch the request queues!)
-	"periodic"
-		dumps the periodic schedule, used for interrupt
-		and isochronous transfers.  Doesn't show qtds.
-	"registers"
-		show controller register state, and
-
-The contents of those files can help identify driver problems.
-
-
-Device drivers shouldn't care whether they're running over EHCI or not,
-but they may want to check for "usb_device->speed == USB_SPEED_HIGH".
-High speed devices can do things that full speed (or low speed) ones
-can't, such as "high bandwidth" periodic (interrupt or ISO) transfers.
-Also, some values in device descriptors (such as polling intervals for
-periodic transfers) use different encodings when operating at high speed.
-
-However, do make a point of testing device drivers through USB 2.0 hubs.
-Those hubs report some failures, such as disconnections, differently when
-transaction translators are in use; some drivers have been seen to behave
-badly when they see different faults than OHCI or UHCI report.
-
-
-Performance
-===========
-
-USB 2.0 throughput is gated by two main factors:  how fast the host
-controller can process requests, and how fast devices can respond to
-them.  The 480 Mbit/sec "raw transfer rate" is obeyed by all devices,
-but aggregate throughput is also affected by issues like delays between
-individual high speed packets, driver intelligence, and of course the
-overall system load.  Latency is also a performance concern.
-
-Bulk transfers are most often used where throughput is an issue.  It's
-good to keep in mind that bulk transfers are always in 512 byte packets,
-and at most 13 of those fit into one USB 2.0 microframe.  Eight USB 2.0
-microframes fit in a USB 1.1 frame; a microframe is 1 msec/8 = 125 usec.
-
-So more than 50 MByte/sec is available for bulk transfers, when both
-hardware and device driver software allow it.  Periodic transfer modes
-(isochronous and interrupt) allow the larger packet sizes which let you
-approach the quoted 480 MBit/sec transfer rate.
-
-Hardware Performance
---------------------
-
-At this writing, individual USB 2.0 devices tend to max out at around
-20 MByte/sec transfer rates.  This is of course subject to change;
-and some devices now go faster, while others go slower.
-
-The first NEC implementation of EHCI seems to have a hardware bottleneck
-at around 28 MByte/sec aggregate transfer rate.  While this is clearly
-enough for a single device at 20 MByte/sec, putting three such devices
-onto one bus does not get you 60 MByte/sec.  The issue appears to be
-that the controller hardware won't do concurrent USB and PCI access,
-so that it's only trying six (or maybe seven) USB transactions each
-microframe rather than thirteen.  (Seems like a reasonable trade off
-for a product that beat all the others to market by over a year!)
-
-It's expected that newer implementations will better this, throwing
-more silicon real estate at the problem so that new motherboard chip
-sets will get closer to that 60 MByte/sec target.  That includes an
-updated implementation from NEC, as well as other vendors' silicon.
-
-There's a minimum latency of one microframe (125 usec) for the host
-to receive interrupts from the EHCI controller indicating completion
-of requests.  That latency is tunable; there's a module option.  By
-default ehci-hcd driver uses the minimum latency, which means that if
-you issue a control or bulk request you can often expect to learn that
-it completed in less than 250 usec (depending on transfer size).
-
-Software Performance
---------------------
-
-To get even 20 MByte/sec transfer rates, Linux-USB device drivers will
-need to keep the EHCI queue full.  That means issuing large requests,
-or using bulk queuing if a series of small requests needs to be issued.
-When drivers don't do that, their performance results will show it.
-
-In typical situations, a usb_bulk_msg() loop writing out 4 KB chunks is
-going to waste more than half the USB 2.0 bandwidth.  Delays between the
-I/O completion and the driver issuing the next request will take longer
-than the I/O.  If that same loop used 16 KB chunks, it'd be better; a
-sequence of 128 KB chunks would waste a lot less.
-
-But rather than depending on such large I/O buffers to make synchronous
-I/O be efficient, it's better to just queue up several (bulk) requests
-to the HC, and wait for them all to complete (or be canceled on error).
-Such URB queuing should work with all the USB 1.1 HC drivers too.
-
-In the Linux 2.5 kernels, new usb_sg_*() api calls have been defined; they
-queue all the buffers from a scatterlist.  They also use scatterlist DMA
-mapping (which might apply an IOMMU) and IRQ reduction, all of which will
-help make high speed transfers run as fast as they can.
-
-
-TBD:
-   Interrupt and ISO transfer performance issues.  Those periodic
-   transfers are fully scheduled, so the main issue is likely to be how
-   to trigger "high bandwidth" modes.
-
-TBD:
-   More than standard 80% periodic bandwidth allocation is possible
-   through sysfs uframe_periodic_max parameter. Describe that.
diff --git a/Documentation/usb/functionfs.rst b/Documentation/usb/functionfs.rst
new file mode 100644
index 000000000000..7fdc6d840ac5
--- /dev/null
+++ b/Documentation/usb/functionfs.rst
@@ -0,0 +1,68 @@
+====================
+How FunctionFS works
+====================
+
+From kernel point of view it is just a composite function with some
+unique behaviour.  It may be added to an USB configuration only after
+the user space driver has registered by writing descriptors and
+strings (the user space program has to provide the same information
+that kernel level composite functions provide when they are added to
+the configuration).
+
+This in particular means that the composite initialisation functions
+may not be in init section (ie. may not use the __init tag).
+
+From user space point of view it is a file system which when
+mounted provides an "ep0" file.  User space driver need to
+write descriptors and strings to that file.  It does not need
+to worry about endpoints, interfaces or strings numbers but
+simply provide descriptors such as if the function was the
+only one (endpoints and strings numbers starting from one and
+interface numbers starting from zero).  The FunctionFS changes
+them as needed also handling situation when numbers differ in
+different configurations.
+
+When descriptors and strings are written "ep#" files appear
+(one for each declared endpoint) which handle communication on
+a single endpoint.  Again, FunctionFS takes care of the real
+numbers and changing of the configuration (which means that
+"ep1" file may be really mapped to (say) endpoint 3 (and when
+configuration changes to (say) endpoint 2)).  "ep0" is used
+for receiving events and handling setup requests.
+
+When all files are closed the function disables itself.
+
+What I also want to mention is that the FunctionFS is designed in such
+a way that it is possible to mount it several times so in the end
+a gadget could use several FunctionFS functions. The idea is that
+each FunctionFS instance is identified by the device name used
+when mounting.
+
+One can imagine a gadget that has an Ethernet, MTP and HID interfaces
+where the last two are implemented via FunctionFS.  On user space
+level it would look like this::
+
+  $ insmod g_ffs.ko idVendor=<ID> iSerialNumber=<string> functions=mtp,hid
+  $ mkdir /dev/ffs-mtp && mount -t functionfs mtp /dev/ffs-mtp
+  $ ( cd /dev/ffs-mtp && mtp-daemon ) &
+  $ mkdir /dev/ffs-hid && mount -t functionfs hid /dev/ffs-hid
+  $ ( cd /dev/ffs-hid && hid-daemon ) &
+
+On kernel level the gadget checks ffs_data->dev_name to identify
+whether it's FunctionFS designed for MTP ("mtp") or HID ("hid").
+
+If no "functions" module parameters is supplied, the driver accepts
+just one function with any name.
+
+When "functions" module parameter is supplied, only functions
+with listed names are accepted. In particular, if the "functions"
+parameter's value is just a one-element list, then the behaviour
+is similar to when there is no "functions" at all; however,
+only a function with the specified name is accepted.
+
+The gadget is registered only after all the declared function
+filesystems have been mounted and USB descriptors of all functions
+have been written to their ep0's.
+
+Conversely, the gadget is unregistered after the first USB function
+closes its endpoints.
diff --git a/Documentation/usb/functionfs.txt b/Documentation/usb/functionfs.txt
deleted file mode 100644
index 7fdc6d840ac5..000000000000
--- a/Documentation/usb/functionfs.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-====================
-How FunctionFS works
-====================
-
-From kernel point of view it is just a composite function with some
-unique behaviour.  It may be added to an USB configuration only after
-the user space driver has registered by writing descriptors and
-strings (the user space program has to provide the same information
-that kernel level composite functions provide when they are added to
-the configuration).
-
-This in particular means that the composite initialisation functions
-may not be in init section (ie. may not use the __init tag).
-
-From user space point of view it is a file system which when
-mounted provides an "ep0" file.  User space driver need to
-write descriptors and strings to that file.  It does not need
-to worry about endpoints, interfaces or strings numbers but
-simply provide descriptors such as if the function was the
-only one (endpoints and strings numbers starting from one and
-interface numbers starting from zero).  The FunctionFS changes
-them as needed also handling situation when numbers differ in
-different configurations.
-
-When descriptors and strings are written "ep#" files appear
-(one for each declared endpoint) which handle communication on
-a single endpoint.  Again, FunctionFS takes care of the real
-numbers and changing of the configuration (which means that
-"ep1" file may be really mapped to (say) endpoint 3 (and when
-configuration changes to (say) endpoint 2)).  "ep0" is used
-for receiving events and handling setup requests.
-
-When all files are closed the function disables itself.
-
-What I also want to mention is that the FunctionFS is designed in such
-a way that it is possible to mount it several times so in the end
-a gadget could use several FunctionFS functions. The idea is that
-each FunctionFS instance is identified by the device name used
-when mounting.
-
-One can imagine a gadget that has an Ethernet, MTP and HID interfaces
-where the last two are implemented via FunctionFS.  On user space
-level it would look like this::
-
-  $ insmod g_ffs.ko idVendor=<ID> iSerialNumber=<string> functions=mtp,hid
-  $ mkdir /dev/ffs-mtp && mount -t functionfs mtp /dev/ffs-mtp
-  $ ( cd /dev/ffs-mtp && mtp-daemon ) &
-  $ mkdir /dev/ffs-hid && mount -t functionfs hid /dev/ffs-hid
-  $ ( cd /dev/ffs-hid && hid-daemon ) &
-
-On kernel level the gadget checks ffs_data->dev_name to identify
-whether it's FunctionFS designed for MTP ("mtp") or HID ("hid").
-
-If no "functions" module parameters is supplied, the driver accepts
-just one function with any name.
-
-When "functions" module parameter is supplied, only functions
-with listed names are accepted. In particular, if the "functions"
-parameter's value is just a one-element list, then the behaviour
-is similar to when there is no "functions" at all; however,
-only a function with the specified name is accepted.
-
-The gadget is registered only after all the declared function
-filesystems have been mounted and USB descriptors of all functions
-have been written to their ep0's.
-
-Conversely, the gadget is unregistered after the first USB function
-closes its endpoints.
diff --git a/Documentation/usb/gadget-testing.rst b/Documentation/usb/gadget-testing.rst
new file mode 100644
index 000000000000..2eeb3e9299e4
--- /dev/null
+++ b/Documentation/usb/gadget-testing.rst
@@ -0,0 +1,934 @@
+==============
+Gadget Testing
+==============
+
+This file summarizes information on basic testing of USB functions
+provided by gadgets.
+
+.. contents
+
+   1. ACM function
+   2. ECM function
+   3. ECM subset function
+   4. EEM function
+   5. FFS function
+   6. HID function
+   7. LOOPBACK function
+   8. MASS STORAGE function
+   9. MIDI function
+   10. NCM function
+   11. OBEX function
+   12. PHONET function
+   13. RNDIS function
+   14. SERIAL function
+   15. SOURCESINK function
+   16. UAC1 function (legacy implementation)
+   17. UAC2 function
+   18. UVC function
+   19. PRINTER function
+   20. UAC1 function (new API)
+
+
+1. ACM function
+===============
+
+The function is provided by usb_f_acm.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "acm".
+The ACM function provides just one attribute in its function directory:
+
+	port_num
+
+The attribute is read-only.
+
+There can be at most 4 ACM/generic serial/OBEX ports in the system.
+
+
+Testing the ACM function
+------------------------
+
+On the host::
+
+	cat > /dev/ttyACM<X>
+
+On the device::
+
+	cat /dev/ttyGS<Y>
+
+then the other way round
+
+On the device::
+
+	cat > /dev/ttyGS<Y>
+
+On the host::
+
+	cat /dev/ttyACM<X>
+
+2. ECM function
+===============
+
+The function is provided by usb_f_ecm.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "ecm".
+The ECM function provides these attributes in its function directory:
+
+	=============== ==================================================
+	ifname		network device interface name associated with this
+			function instance
+	qmult		queue length multiplier for high and super speed
+	host_addr	MAC address of host's end of this
+			Ethernet over USB link
+	dev_addr	MAC address of device's end of this
+			Ethernet over USB link
+	=============== ==================================================
+
+and after creating the functions/ecm.<instance name> they contain default
+values: qmult is 5, dev_addr and host_addr are randomly selected.
+Except for ifname they can be written to until the function is linked to a
+configuration. The ifname is read-only and contains the name of the interface
+which was assigned by the net core, e. g. usb0.
+
+Testing the ECM function
+------------------------
+
+Configure IP addresses of the device and the host. Then:
+
+On the device::
+
+	ping <host's IP>
+
+On the host::
+
+	ping <device's IP>
+
+3. ECM subset function
+======================
+
+The function is provided by usb_f_ecm_subset.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "geth".
+The ECM subset function provides these attributes in its function directory:
+
+	=============== ==================================================
+	ifname		network device interface name associated with this
+			function instance
+	qmult		queue length multiplier for high and super speed
+	host_addr	MAC address of host's end of this
+			Ethernet over USB link
+	dev_addr	MAC address of device's end of this
+			Ethernet over USB link
+	=============== ==================================================
+
+and after creating the functions/ecm.<instance name> they contain default
+values: qmult is 5, dev_addr and host_addr are randomly selected.
+Except for ifname they can be written to until the function is linked to a
+configuration. The ifname is read-only and contains the name of the interface
+which was assigned by the net core, e. g. usb0.
+
+Testing the ECM subset function
+-------------------------------
+
+Configure IP addresses of the device and the host. Then:
+
+On the device::
+
+	ping <host's IP>
+
+On the host::
+
+	ping <device's IP>
+
+4. EEM function
+===============
+
+The function is provided by usb_f_eem.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "eem".
+The EEM function provides these attributes in its function directory:
+
+	=============== ==================================================
+	ifname		network device interface name associated with this
+			function instance
+	qmult		queue length multiplier for high and super speed
+	host_addr	MAC address of host's end of this
+			Ethernet over USB link
+	dev_addr	MAC address of device's end of this
+			Ethernet over USB link
+	=============== ==================================================
+
+and after creating the functions/eem.<instance name> they contain default
+values: qmult is 5, dev_addr and host_addr are randomly selected.
+Except for ifname they can be written to until the function is linked to a
+configuration. The ifname is read-only and contains the name of the interface
+which was assigned by the net core, e. g. usb0.
+
+Testing the EEM function
+------------------------
+
+Configure IP addresses of the device and the host. Then:
+
+On the device::
+
+	ping <host's IP>
+
+On the host::
+
+	ping <device's IP>
+
+5. FFS function
+===============
+
+The function is provided by usb_f_fs.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "ffs".
+The function directory is intentionally empty and not modifiable.
+
+After creating the directory there is a new instance (a "device") of FunctionFS
+available in the system. Once a "device" is available, the user should follow
+the standard procedure for using FunctionFS (mount it, run the userspace
+process which implements the function proper). The gadget should be enabled
+by writing a suitable string to usb_gadget/<gadget>/UDC.
+
+Testing the FFS function
+------------------------
+
+On the device: start the function's userspace daemon, enable the gadget
+
+On the host: use the USB function provided by the device
+
+6. HID function
+===============
+
+The function is provided by usb_f_hid.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "hid".
+The HID function provides these attributes in its function directory:
+
+	=============== ===========================================
+	protocol	HID protocol to use
+	report_desc	data to be used in HID reports, except data
+			passed with /dev/hidg<X>
+	report_length	HID report length
+	subclass	HID subclass to use
+	=============== ===========================================
+
+For a keyboard the protocol and the subclass are 1, the report_length is 8,
+while the report_desc is::
+
+  $ hd my_report_desc
+  00000000  05 01 09 06 a1 01 05 07  19 e0 29 e7 15 00 25 01  |..........)...%.|
+  00000010  75 01 95 08 81 02 95 01  75 08 81 03 95 05 75 01  |u.......u.....u.|
+  00000020  05 08 19 01 29 05 91 02  95 01 75 03 91 03 95 06  |....).....u.....|
+  00000030  75 08 15 00 25 65 05 07  19 00 29 65 81 00 c0     |u...%e....)e...|
+  0000003f
+
+Such a sequence of bytes can be stored to the attribute with echo::
+
+  $ echo -ne \\x05\\x01\\x09\\x06\\xa1.....
+
+Testing the HID function
+------------------------
+
+Device:
+
+- create the gadget
+- connect the gadget to a host, preferably not the one used
+  to control the gadget
+- run a program which writes to /dev/hidg<N>, e.g.
+  a userspace program found in Documentation/usb/gadget_hid.rst::
+
+	$ ./hid_gadget_test /dev/hidg0 keyboard
+
+Host:
+
+- observe the keystrokes from the gadget
+
+7. LOOPBACK function
+====================
+
+The function is provided by usb_f_ss_lb.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "Loopback".
+The LOOPBACK function provides these attributes in its function directory:
+
+	=============== =======================
+	qlen		depth of loopback queue
+	bulk_buflen	buffer length
+	=============== =======================
+
+Testing the LOOPBACK function
+-----------------------------
+
+device: run the gadget
+
+host: test-usb (tools/usb/testusb.c)
+
+8. MASS STORAGE function
+========================
+
+The function is provided by usb_f_mass_storage.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "mass_storage".
+The MASS STORAGE function provides these attributes in its directory:
+files:
+
+	=============== ==============================================
+	stall		Set to permit function to halt bulk endpoints.
+			Disabled on some USB devices known not to work
+			correctly. You should set it to true.
+	num_buffers	Number of pipeline buffers. Valid numbers
+			are 2..4. Available only if
+			CONFIG_USB_GADGET_DEBUG_FILES is set.
+	=============== ==============================================
+
+and a default lun.0 directory corresponding to SCSI LUN #0.
+
+A new lun can be added with mkdir::
+
+	$ mkdir functions/mass_storage.0/partition.5
+
+Lun numbering does not have to be continuous, except for lun #0 which is
+created by default. A maximum of 8 luns can be specified and they all must be
+named following the <name>.<number> scheme. The numbers can be 0..8.
+Probably a good convention is to name the luns "lun.<number>",
+although it is not mandatory.
+
+In each lun directory there are the following attribute files:
+
+	=============== ==============================================
+	file		The path to the backing file for the LUN.
+			Required if LUN is not marked as removable.
+	ro		Flag specifying access to the LUN shall be
+			read-only. This is implied if CD-ROM emulation
+			is enabled as well as when it was impossible
+			to open "filename" in R/W mode.
+	removable	Flag specifying that LUN shall be indicated as
+			being removable.
+	cdrom		Flag specifying that LUN shall be reported as
+			being a CD-ROM.
+	nofua		Flag specifying that FUA flag
+			in SCSI WRITE(10,12)
+	=============== ==============================================
+
+Testing the MASS STORAGE function
+---------------------------------
+
+device: connect the gadget, enable it
+host: dmesg, see the USB drives appear (if system configured to automatically
+mount)
+
+9. MIDI function
+================
+
+The function is provided by usb_f_midi.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "midi".
+The MIDI function provides these attributes in its function directory:
+
+	=============== ====================================
+	buflen		MIDI buffer length
+	id		ID string for the USB MIDI adapter
+	in_ports	number of MIDI input ports
+	index		index value for the USB MIDI adapter
+	out_ports	number of MIDI output ports
+	qlen		USB read request queue length
+	=============== ====================================
+
+Testing the MIDI function
+-------------------------
+
+There are two cases: playing a mid from the gadget to
+the host and playing a mid from the host to the gadget.
+
+1) Playing a mid from the gadget to the host:
+
+host::
+
+  $ arecordmidi -l
+   Port    Client name                      Port name
+   14:0    Midi Through                     Midi Through Port-0
+   24:0    MIDI Gadget                      MIDI Gadget MIDI 1
+  $ arecordmidi -p 24:0 from_gadget.mid
+
+gadget::
+
+  $ aplaymidi -l
+   Port    Client name                      Port name
+   20:0    f_midi                           f_midi
+
+  $ aplaymidi -p 20:0 to_host.mid
+
+2) Playing a mid from the host to the gadget
+
+gadget::
+
+  $ arecordmidi -l
+   Port    Client name                      Port name
+   20:0    f_midi                           f_midi
+
+  $ arecordmidi -p 20:0 from_host.mid
+
+host::
+
+  $ aplaymidi -l
+   Port    Client name                      Port name
+   14:0    Midi Through                     Midi Through Port-0
+   24:0    MIDI Gadget                      MIDI Gadget MIDI 1
+
+  $ aplaymidi -p24:0 to_gadget.mid
+
+The from_gadget.mid should sound identical to the to_host.mid.
+
+The from_host.id should sound identical to the to_gadget.mid.
+
+MIDI files can be played to speakers/headphones with e.g. timidity installed::
+
+  $ aplaymidi -l
+   Port    Client name                      Port name
+   14:0    Midi Through                     Midi Through Port-0
+   24:0    MIDI Gadget                      MIDI Gadget MIDI 1
+  128:0    TiMidity                         TiMidity port 0
+  128:1    TiMidity                         TiMidity port 1
+  128:2    TiMidity                         TiMidity port 2
+  128:3    TiMidity                         TiMidity port 3
+
+  $ aplaymidi -p 128:0 file.mid
+
+MIDI ports can be logically connected using the aconnect utility, e.g.::
+
+  $ aconnect 24:0 128:0 # try it on the host
+
+After the gadget's MIDI port is connected to timidity's MIDI port,
+whatever is played at the gadget side with aplaymidi -l is audible
+in host's speakers/headphones.
+
+10. NCM function
+================
+
+The function is provided by usb_f_ncm.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "ncm".
+The NCM function provides these attributes in its function directory:
+
+	=============== ==================================================
+	ifname		network device interface name associated with this
+			function instance
+	qmult		queue length multiplier for high and super speed
+	host_addr	MAC address of host's end of this
+			Ethernet over USB link
+	dev_addr	MAC address of device's end of this
+			Ethernet over USB link
+	=============== ==================================================
+
+and after creating the functions/ncm.<instance name> they contain default
+values: qmult is 5, dev_addr and host_addr are randomly selected.
+Except for ifname they can be written to until the function is linked to a
+configuration. The ifname is read-only and contains the name of the interface
+which was assigned by the net core, e. g. usb0.
+
+Testing the NCM function
+------------------------
+
+Configure IP addresses of the device and the host. Then:
+
+On the device::
+
+	ping <host's IP>
+
+On the host::
+
+	ping <device's IP>
+
+11. OBEX function
+=================
+
+The function is provided by usb_f_obex.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "obex".
+The OBEX function provides just one attribute in its function directory:
+
+	port_num
+
+The attribute is read-only.
+
+There can be at most 4 ACM/generic serial/OBEX ports in the system.
+
+Testing the OBEX function
+-------------------------
+
+On device::
+
+	seriald -f /dev/ttyGS<Y> -s 1024
+
+On host::
+
+	serialc -v <vendorID> -p <productID> -i<interface#> -a1 -s1024 \
+                -t<out endpoint addr> -r<in endpoint addr>
+
+where seriald and serialc are Felipe's utilities found here:
+
+	https://github.com/felipebalbi/usb-tools.git master
+
+12. PHONET function
+===================
+
+The function is provided by usb_f_phonet.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "phonet".
+The PHONET function provides just one attribute in its function directory:
+
+	=============== ==================================================
+	ifname		network device interface name associated with this
+			function instance
+	=============== ==================================================
+
+Testing the PHONET function
+---------------------------
+
+It is not possible to test the SOCK_STREAM protocol without a specific piece
+of hardware, so only SOCK_DGRAM has been tested. For the latter to work,
+in the past I had to apply the patch mentioned here:
+
+http://www.spinics.net/lists/linux-usb/msg85689.html
+
+These tools are required:
+
+git://git.gitorious.org/meego-cellular/phonet-utils.git
+
+On the host::
+
+	$ ./phonet -a 0x10 -i usbpn0
+	$ ./pnroute add 0x6c usbpn0
+	$./pnroute add 0x10 usbpn0
+	$ ifconfig usbpn0 up
+
+On the device::
+
+	$ ./phonet -a 0x6c -i upnlink0
+	$ ./pnroute add 0x10 upnlink0
+	$ ifconfig upnlink0 up
+
+Then a test program can be used::
+
+	http://www.spinics.net/lists/linux-usb/msg85690.html
+
+On the device::
+
+	$ ./pnxmit -a 0x6c -r
+
+On the host::
+
+	$ ./pnxmit -a 0x10 -s 0x6c
+
+As a result some data should be sent from host to device.
+Then the other way round:
+
+On the host::
+
+	$ ./pnxmit -a 0x10 -r
+
+On the device::
+
+	$ ./pnxmit -a 0x6c -s 0x10
+
+13. RNDIS function
+==================
+
+The function is provided by usb_f_rndis.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "rndis".
+The RNDIS function provides these attributes in its function directory:
+
+	=============== ==================================================
+	ifname		network device interface name associated with this
+			function instance
+	qmult		queue length multiplier for high and super speed
+	host_addr	MAC address of host's end of this
+			Ethernet over USB link
+	dev_addr	MAC address of device's end of this
+			Ethernet over USB link
+	=============== ==================================================
+
+and after creating the functions/rndis.<instance name> they contain default
+values: qmult is 5, dev_addr and host_addr are randomly selected.
+Except for ifname they can be written to until the function is linked to a
+configuration. The ifname is read-only and contains the name of the interface
+which was assigned by the net core, e. g. usb0.
+
+Testing the RNDIS function
+--------------------------
+
+Configure IP addresses of the device and the host. Then:
+
+On the device::
+
+	ping <host's IP>
+
+On the host::
+
+	ping <device's IP>
+
+14. SERIAL function
+===================
+
+The function is provided by usb_f_gser.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "gser".
+The SERIAL function provides just one attribute in its function directory:
+
+	port_num
+
+The attribute is read-only.
+
+There can be at most 4 ACM/generic serial/OBEX ports in the system.
+
+Testing the SERIAL function
+---------------------------
+
+On host::
+
+	insmod usbserial
+	echo VID PID >/sys/bus/usb-serial/drivers/generic/new_id
+
+On host::
+
+	cat > /dev/ttyUSB<X>
+
+On target::
+
+	cat /dev/ttyGS<Y>
+
+then the other way round
+
+On target::
+
+	cat > /dev/ttyGS<Y>
+
+On host::
+
+	cat /dev/ttyUSB<X>
+
+15. SOURCESINK function
+=======================
+
+The function is provided by usb_f_ss_lb.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "SourceSink".
+The SOURCESINK function provides these attributes in its function directory:
+
+	=============== ==================================
+	pattern		0 (all zeros), 1 (mod63), 2 (none)
+	isoc_interval	1..16
+	isoc_maxpacket	0 - 1023 (fs), 0 - 1024 (hs/ss)
+	isoc_mult	0..2 (hs/ss only)
+	isoc_maxburst	0..15 (ss only)
+	bulk_buflen	buffer length
+	bulk_qlen	depth of queue for bulk
+	iso_qlen	depth of queue for iso
+	=============== ==================================
+
+Testing the SOURCESINK function
+-------------------------------
+
+device: run the gadget
+
+host: test-usb (tools/usb/testusb.c)
+
+
+16. UAC1 function (legacy implementation)
+=========================================
+
+The function is provided by usb_f_uac1_legacy.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory
+is "uac1_legacy".
+The uac1 function provides these attributes in its function directory:
+
+	=============== ====================================
+	audio_buf_size	audio buffer size
+	fn_cap		capture pcm device file name
+	fn_cntl		control device file name
+	fn_play		playback pcm device file name
+	req_buf_size	ISO OUT endpoint request buffer size
+	req_count	ISO OUT endpoint request count
+	=============== ====================================
+
+The attributes have sane default values.
+
+Testing the UAC1 function
+-------------------------
+
+device: run the gadget
+
+host::
+
+	aplay -l # should list our USB Audio Gadget
+
+17. UAC2 function
+=================
+
+The function is provided by usb_f_uac2.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "uac2".
+The uac2 function provides these attributes in its function directory:
+
+	=============== ====================================================
+	c_chmask	capture channel mask
+	c_srate		capture sampling rate
+	c_ssize		capture sample size (bytes)
+	p_chmask	playback channel mask
+	p_srate		playback sampling rate
+	p_ssize		playback sample size (bytes)
+	req_number	the number of pre-allocated request for both capture
+			and playback
+	=============== ====================================================
+
+The attributes have sane default values.
+
+Testing the UAC2 function
+-------------------------
+
+device: run the gadget
+host: aplay -l # should list our USB Audio Gadget
+
+This function does not require real hardware support, it just
+sends a stream of audio data to/from the host. In order to
+actually hear something at the device side, a command similar
+to this must be used at the device side::
+
+	$ arecord -f dat -t wav -D hw:2,0 | aplay -D hw:0,0 &
+
+e.g.::
+
+	$ arecord -f dat -t wav -D hw:CARD=UAC2Gadget,DEV=0 | \
+	  aplay -D default:CARD=OdroidU3
+
+18. UVC function
+================
+
+The function is provided by usb_f_uvc.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "uvc".
+The uvc function provides these attributes in its function directory:
+
+	=================== ================================================
+	streaming_interval  interval for polling endpoint for data transfers
+	streaming_maxburst  bMaxBurst for super speed companion descriptor
+	streaming_maxpacket maximum packet size this endpoint is capable of
+			    sending or receiving when this configuration is
+			    selected
+	=================== ================================================
+
+There are also "control" and "streaming" subdirectories, each of which contain
+a number of their subdirectories. There are some sane defaults provided, but
+the user must provide the following:
+
+	================== ====================================================
+	control header     create in control/header, link from control/class/fs
+			   and/or control/class/ss
+	streaming header   create in streaming/header, link from
+			   streaming/class/fs and/or streaming/class/hs and/or
+			   streaming/class/ss
+	format description create in streaming/mjpeg and/or
+			   streaming/uncompressed
+	frame description  create in streaming/mjpeg/<format> and/or in
+			   streaming/uncompressed/<format>
+	================== ====================================================
+
+Each frame description contains frame interval specification, and each
+such specification consists of a number of lines with an inverval value
+in each line. The rules stated above are best illustrated with an example::
+
+  # mkdir functions/uvc.usb0/control/header/h
+  # cd functions/uvc.usb0/control/
+  # ln -s header/h class/fs
+  # ln -s header/h class/ss
+  # mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p
+  # cat <<EOF > functions/uvc.usb0/streaming/uncompressed/u/360p/dwFrameInterval
+  666666
+  1000000
+  5000000
+  EOF
+  # cd $GADGET_CONFIGFS_ROOT
+  # mkdir functions/uvc.usb0/streaming/header/h
+  # cd functions/uvc.usb0/streaming/header/h
+  # ln -s ../../uncompressed/u
+  # cd ../../class/fs
+  # ln -s ../../header/h
+  # cd ../../class/hs
+  # ln -s ../../header/h
+  # cd ../../class/ss
+  # ln -s ../../header/h
+
+
+Testing the UVC function
+------------------------
+
+device: run the gadget, modprobe vivid::
+
+  # uvc-gadget -u /dev/video<uvc video node #> -v /dev/video<vivid video node #>
+
+where uvc-gadget is this program:
+	http://git.ideasonboard.org/uvc-gadget.git
+
+with these patches:
+
+	http://www.spinics.net/lists/linux-usb/msg99220.html
+
+host::
+
+	luvcview -f yuv
+
+19. PRINTER function
+====================
+
+The function is provided by usb_f_printer.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "printer".
+The printer function provides these attributes in its function directory:
+
+	==========	===========================================
+	pnp_string	Data to be passed to the host in pnp string
+	q_len		Number of requests per endpoint
+	==========	===========================================
+
+Testing the PRINTER function
+----------------------------
+
+The most basic testing:
+
+device: run the gadget::
+
+	# ls -l /devices/virtual/usb_printer_gadget/
+
+should show g_printer<number>.
+
+If udev is active, then /dev/g_printer<number> should appear automatically.
+
+host:
+
+If udev is active, then e.g. /dev/usb/lp0 should appear.
+
+host->device transmission:
+
+device::
+
+	# cat /dev/g_printer<number>
+
+host::
+
+	# cat > /dev/usb/lp0
+
+device->host transmission::
+
+	# cat > /dev/g_printer<number>
+
+host::
+
+	# cat /dev/usb/lp0
+
+More advanced testing can be done with the prn_example
+described in Documentation/usb/gadget_printer.rst.
+
+
+20. UAC1 function (virtual ALSA card, using u_audio API)
+========================================================
+
+The function is provided by usb_f_uac1.ko module.
+It will create a virtual ALSA card and the audio streams are simply
+sinked to and sourced from it.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "uac1".
+The uac1 function provides these attributes in its function directory:
+
+	========== ====================================================
+	c_chmask   capture channel mask
+	c_srate    capture sampling rate
+	c_ssize    capture sample size (bytes)
+	p_chmask   playback channel mask
+	p_srate    playback sampling rate
+	p_ssize    playback sample size (bytes)
+	req_number the number of pre-allocated request for both capture
+		   and playback
+	========== ====================================================
+
+The attributes have sane default values.
+
+Testing the UAC1 function
+-------------------------
+
+device: run the gadget
+host: aplay -l # should list our USB Audio Gadget
+
+This function does not require real hardware support, it just
+sends a stream of audio data to/from the host. In order to
+actually hear something at the device side, a command similar
+to this must be used at the device side::
+
+	$ arecord -f dat -t wav -D hw:2,0 | aplay -D hw:0,0 &
+
+e.g.::
+
+	$ arecord -f dat -t wav -D hw:CARD=UAC1Gadget,DEV=0 | \
+	  aplay -D default:CARD=OdroidU3
diff --git a/Documentation/usb/gadget-testing.txt b/Documentation/usb/gadget-testing.txt
deleted file mode 100644
index 7d7f2340af42..000000000000
--- a/Documentation/usb/gadget-testing.txt
+++ /dev/null
@@ -1,934 +0,0 @@
-==============
-Gadget Testing
-==============
-
-This file summarizes information on basic testing of USB functions
-provided by gadgets.
-
-.. contents
-
-   1. ACM function
-   2. ECM function
-   3. ECM subset function
-   4. EEM function
-   5. FFS function
-   6. HID function
-   7. LOOPBACK function
-   8. MASS STORAGE function
-   9. MIDI function
-   10. NCM function
-   11. OBEX function
-   12. PHONET function
-   13. RNDIS function
-   14. SERIAL function
-   15. SOURCESINK function
-   16. UAC1 function (legacy implementation)
-   17. UAC2 function
-   18. UVC function
-   19. PRINTER function
-   20. UAC1 function (new API)
-
-
-1. ACM function
-===============
-
-The function is provided by usb_f_acm.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "acm".
-The ACM function provides just one attribute in its function directory:
-
-	port_num
-
-The attribute is read-only.
-
-There can be at most 4 ACM/generic serial/OBEX ports in the system.
-
-
-Testing the ACM function
-------------------------
-
-On the host::
-
-	cat > /dev/ttyACM<X>
-
-On the device::
-
-	cat /dev/ttyGS<Y>
-
-then the other way round
-
-On the device::
-
-	cat > /dev/ttyGS<Y>
-
-On the host::
-
-	cat /dev/ttyACM<X>
-
-2. ECM function
-===============
-
-The function is provided by usb_f_ecm.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "ecm".
-The ECM function provides these attributes in its function directory:
-
-	=============== ==================================================
-	ifname		network device interface name associated with this
-			function instance
-	qmult		queue length multiplier for high and super speed
-	host_addr	MAC address of host's end of this
-			Ethernet over USB link
-	dev_addr	MAC address of device's end of this
-			Ethernet over USB link
-	=============== ==================================================
-
-and after creating the functions/ecm.<instance name> they contain default
-values: qmult is 5, dev_addr and host_addr are randomly selected.
-Except for ifname they can be written to until the function is linked to a
-configuration. The ifname is read-only and contains the name of the interface
-which was assigned by the net core, e. g. usb0.
-
-Testing the ECM function
-------------------------
-
-Configure IP addresses of the device and the host. Then:
-
-On the device::
-
-	ping <host's IP>
-
-On the host::
-
-	ping <device's IP>
-
-3. ECM subset function
-======================
-
-The function is provided by usb_f_ecm_subset.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "geth".
-The ECM subset function provides these attributes in its function directory:
-
-	=============== ==================================================
-	ifname		network device interface name associated with this
-			function instance
-	qmult		queue length multiplier for high and super speed
-	host_addr	MAC address of host's end of this
-			Ethernet over USB link
-	dev_addr	MAC address of device's end of this
-			Ethernet over USB link
-	=============== ==================================================
-
-and after creating the functions/ecm.<instance name> they contain default
-values: qmult is 5, dev_addr and host_addr are randomly selected.
-Except for ifname they can be written to until the function is linked to a
-configuration. The ifname is read-only and contains the name of the interface
-which was assigned by the net core, e. g. usb0.
-
-Testing the ECM subset function
--------------------------------
-
-Configure IP addresses of the device and the host. Then:
-
-On the device::
-
-	ping <host's IP>
-
-On the host::
-
-	ping <device's IP>
-
-4. EEM function
-===============
-
-The function is provided by usb_f_eem.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "eem".
-The EEM function provides these attributes in its function directory:
-
-	=============== ==================================================
-	ifname		network device interface name associated with this
-			function instance
-	qmult		queue length multiplier for high and super speed
-	host_addr	MAC address of host's end of this
-			Ethernet over USB link
-	dev_addr	MAC address of device's end of this
-			Ethernet over USB link
-	=============== ==================================================
-
-and after creating the functions/eem.<instance name> they contain default
-values: qmult is 5, dev_addr and host_addr are randomly selected.
-Except for ifname they can be written to until the function is linked to a
-configuration. The ifname is read-only and contains the name of the interface
-which was assigned by the net core, e. g. usb0.
-
-Testing the EEM function
-------------------------
-
-Configure IP addresses of the device and the host. Then:
-
-On the device::
-
-	ping <host's IP>
-
-On the host::
-
-	ping <device's IP>
-
-5. FFS function
-===============
-
-The function is provided by usb_f_fs.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "ffs".
-The function directory is intentionally empty and not modifiable.
-
-After creating the directory there is a new instance (a "device") of FunctionFS
-available in the system. Once a "device" is available, the user should follow
-the standard procedure for using FunctionFS (mount it, run the userspace
-process which implements the function proper). The gadget should be enabled
-by writing a suitable string to usb_gadget/<gadget>/UDC.
-
-Testing the FFS function
-------------------------
-
-On the device: start the function's userspace daemon, enable the gadget
-
-On the host: use the USB function provided by the device
-
-6. HID function
-===============
-
-The function is provided by usb_f_hid.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "hid".
-The HID function provides these attributes in its function directory:
-
-	=============== ===========================================
-	protocol	HID protocol to use
-	report_desc	data to be used in HID reports, except data
-			passed with /dev/hidg<X>
-	report_length	HID report length
-	subclass	HID subclass to use
-	=============== ===========================================
-
-For a keyboard the protocol and the subclass are 1, the report_length is 8,
-while the report_desc is::
-
-  $ hd my_report_desc
-  00000000  05 01 09 06 a1 01 05 07  19 e0 29 e7 15 00 25 01  |..........)...%.|
-  00000010  75 01 95 08 81 02 95 01  75 08 81 03 95 05 75 01  |u.......u.....u.|
-  00000020  05 08 19 01 29 05 91 02  95 01 75 03 91 03 95 06  |....).....u.....|
-  00000030  75 08 15 00 25 65 05 07  19 00 29 65 81 00 c0     |u...%e....)e...|
-  0000003f
-
-Such a sequence of bytes can be stored to the attribute with echo::
-
-  $ echo -ne \\x05\\x01\\x09\\x06\\xa1.....
-
-Testing the HID function
-------------------------
-
-Device:
-
-- create the gadget
-- connect the gadget to a host, preferably not the one used
-  to control the gadget
-- run a program which writes to /dev/hidg<N>, e.g.
-  a userspace program found in Documentation/usb/gadget_hid.txt::
-
-	$ ./hid_gadget_test /dev/hidg0 keyboard
-
-Host:
-
-- observe the keystrokes from the gadget
-
-7. LOOPBACK function
-====================
-
-The function is provided by usb_f_ss_lb.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "Loopback".
-The LOOPBACK function provides these attributes in its function directory:
-
-	=============== =======================
-	qlen		depth of loopback queue
-	bulk_buflen	buffer length
-	=============== =======================
-
-Testing the LOOPBACK function
------------------------------
-
-device: run the gadget
-
-host: test-usb (tools/usb/testusb.c)
-
-8. MASS STORAGE function
-========================
-
-The function is provided by usb_f_mass_storage.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "mass_storage".
-The MASS STORAGE function provides these attributes in its directory:
-files:
-
-	=============== ==============================================
-	stall		Set to permit function to halt bulk endpoints.
-			Disabled on some USB devices known not to work
-			correctly. You should set it to true.
-	num_buffers	Number of pipeline buffers. Valid numbers
-			are 2..4. Available only if
-			CONFIG_USB_GADGET_DEBUG_FILES is set.
-	=============== ==============================================
-
-and a default lun.0 directory corresponding to SCSI LUN #0.
-
-A new lun can be added with mkdir::
-
-	$ mkdir functions/mass_storage.0/partition.5
-
-Lun numbering does not have to be continuous, except for lun #0 which is
-created by default. A maximum of 8 luns can be specified and they all must be
-named following the <name>.<number> scheme. The numbers can be 0..8.
-Probably a good convention is to name the luns "lun.<number>",
-although it is not mandatory.
-
-In each lun directory there are the following attribute files:
-
-	=============== ==============================================
-	file		The path to the backing file for the LUN.
-			Required if LUN is not marked as removable.
-	ro		Flag specifying access to the LUN shall be
-			read-only. This is implied if CD-ROM emulation
-			is enabled as well as when it was impossible
-			to open "filename" in R/W mode.
-	removable	Flag specifying that LUN shall be indicated as
-			being removable.
-	cdrom		Flag specifying that LUN shall be reported as
-			being a CD-ROM.
-	nofua		Flag specifying that FUA flag
-			in SCSI WRITE(10,12)
-	=============== ==============================================
-
-Testing the MASS STORAGE function
----------------------------------
-
-device: connect the gadget, enable it
-host: dmesg, see the USB drives appear (if system configured to automatically
-mount)
-
-9. MIDI function
-================
-
-The function is provided by usb_f_midi.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "midi".
-The MIDI function provides these attributes in its function directory:
-
-	=============== ====================================
-	buflen		MIDI buffer length
-	id		ID string for the USB MIDI adapter
-	in_ports	number of MIDI input ports
-	index		index value for the USB MIDI adapter
-	out_ports	number of MIDI output ports
-	qlen		USB read request queue length
-	=============== ====================================
-
-Testing the MIDI function
--------------------------
-
-There are two cases: playing a mid from the gadget to
-the host and playing a mid from the host to the gadget.
-
-1) Playing a mid from the gadget to the host:
-
-host::
-
-  $ arecordmidi -l
-   Port    Client name                      Port name
-   14:0    Midi Through                     Midi Through Port-0
-   24:0    MIDI Gadget                      MIDI Gadget MIDI 1
-  $ arecordmidi -p 24:0 from_gadget.mid
-
-gadget::
-
-  $ aplaymidi -l
-   Port    Client name                      Port name
-   20:0    f_midi                           f_midi
-
-  $ aplaymidi -p 20:0 to_host.mid
-
-2) Playing a mid from the host to the gadget
-
-gadget::
-
-  $ arecordmidi -l
-   Port    Client name                      Port name
-   20:0    f_midi                           f_midi
-
-  $ arecordmidi -p 20:0 from_host.mid
-
-host::
-
-  $ aplaymidi -l
-   Port    Client name                      Port name
-   14:0    Midi Through                     Midi Through Port-0
-   24:0    MIDI Gadget                      MIDI Gadget MIDI 1
-
-  $ aplaymidi -p24:0 to_gadget.mid
-
-The from_gadget.mid should sound identical to the to_host.mid.
-
-The from_host.id should sound identical to the to_gadget.mid.
-
-MIDI files can be played to speakers/headphones with e.g. timidity installed::
-
-  $ aplaymidi -l
-   Port    Client name                      Port name
-   14:0    Midi Through                     Midi Through Port-0
-   24:0    MIDI Gadget                      MIDI Gadget MIDI 1
-  128:0    TiMidity                         TiMidity port 0
-  128:1    TiMidity                         TiMidity port 1
-  128:2    TiMidity                         TiMidity port 2
-  128:3    TiMidity                         TiMidity port 3
-
-  $ aplaymidi -p 128:0 file.mid
-
-MIDI ports can be logically connected using the aconnect utility, e.g.::
-
-  $ aconnect 24:0 128:0 # try it on the host
-
-After the gadget's MIDI port is connected to timidity's MIDI port,
-whatever is played at the gadget side with aplaymidi -l is audible
-in host's speakers/headphones.
-
-10. NCM function
-================
-
-The function is provided by usb_f_ncm.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "ncm".
-The NCM function provides these attributes in its function directory:
-
-	=============== ==================================================
-	ifname		network device interface name associated with this
-			function instance
-	qmult		queue length multiplier for high and super speed
-	host_addr	MAC address of host's end of this
-			Ethernet over USB link
-	dev_addr	MAC address of device's end of this
-			Ethernet over USB link
-	=============== ==================================================
-
-and after creating the functions/ncm.<instance name> they contain default
-values: qmult is 5, dev_addr and host_addr are randomly selected.
-Except for ifname they can be written to until the function is linked to a
-configuration. The ifname is read-only and contains the name of the interface
-which was assigned by the net core, e. g. usb0.
-
-Testing the NCM function
-------------------------
-
-Configure IP addresses of the device and the host. Then:
-
-On the device::
-
-	ping <host's IP>
-
-On the host::
-
-	ping <device's IP>
-
-11. OBEX function
-=================
-
-The function is provided by usb_f_obex.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "obex".
-The OBEX function provides just one attribute in its function directory:
-
-	port_num
-
-The attribute is read-only.
-
-There can be at most 4 ACM/generic serial/OBEX ports in the system.
-
-Testing the OBEX function
--------------------------
-
-On device::
-
-	seriald -f /dev/ttyGS<Y> -s 1024
-
-On host::
-
-	serialc -v <vendorID> -p <productID> -i<interface#> -a1 -s1024 \
-                -t<out endpoint addr> -r<in endpoint addr>
-
-where seriald and serialc are Felipe's utilities found here:
-
-	https://github.com/felipebalbi/usb-tools.git master
-
-12. PHONET function
-===================
-
-The function is provided by usb_f_phonet.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "phonet".
-The PHONET function provides just one attribute in its function directory:
-
-	=============== ==================================================
-	ifname		network device interface name associated with this
-			function instance
-	=============== ==================================================
-
-Testing the PHONET function
----------------------------
-
-It is not possible to test the SOCK_STREAM protocol without a specific piece
-of hardware, so only SOCK_DGRAM has been tested. For the latter to work,
-in the past I had to apply the patch mentioned here:
-
-http://www.spinics.net/lists/linux-usb/msg85689.html
-
-These tools are required:
-
-git://git.gitorious.org/meego-cellular/phonet-utils.git
-
-On the host::
-
-	$ ./phonet -a 0x10 -i usbpn0
-	$ ./pnroute add 0x6c usbpn0
-	$./pnroute add 0x10 usbpn0
-	$ ifconfig usbpn0 up
-
-On the device::
-
-	$ ./phonet -a 0x6c -i upnlink0
-	$ ./pnroute add 0x10 upnlink0
-	$ ifconfig upnlink0 up
-
-Then a test program can be used::
-
-	http://www.spinics.net/lists/linux-usb/msg85690.html
-
-On the device::
-
-	$ ./pnxmit -a 0x6c -r
-
-On the host::
-
-	$ ./pnxmit -a 0x10 -s 0x6c
-
-As a result some data should be sent from host to device.
-Then the other way round:
-
-On the host::
-
-	$ ./pnxmit -a 0x10 -r
-
-On the device::
-
-	$ ./pnxmit -a 0x6c -s 0x10
-
-13. RNDIS function
-==================
-
-The function is provided by usb_f_rndis.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "rndis".
-The RNDIS function provides these attributes in its function directory:
-
-	=============== ==================================================
-	ifname		network device interface name associated with this
-			function instance
-	qmult		queue length multiplier for high and super speed
-	host_addr	MAC address of host's end of this
-			Ethernet over USB link
-	dev_addr	MAC address of device's end of this
-			Ethernet over USB link
-	=============== ==================================================
-
-and after creating the functions/rndis.<instance name> they contain default
-values: qmult is 5, dev_addr and host_addr are randomly selected.
-Except for ifname they can be written to until the function is linked to a
-configuration. The ifname is read-only and contains the name of the interface
-which was assigned by the net core, e. g. usb0.
-
-Testing the RNDIS function
---------------------------
-
-Configure IP addresses of the device and the host. Then:
-
-On the device::
-
-	ping <host's IP>
-
-On the host::
-
-	ping <device's IP>
-
-14. SERIAL function
-===================
-
-The function is provided by usb_f_gser.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "gser".
-The SERIAL function provides just one attribute in its function directory:
-
-	port_num
-
-The attribute is read-only.
-
-There can be at most 4 ACM/generic serial/OBEX ports in the system.
-
-Testing the SERIAL function
----------------------------
-
-On host::
-
-	insmod usbserial
-	echo VID PID >/sys/bus/usb-serial/drivers/generic/new_id
-
-On host::
-
-	cat > /dev/ttyUSB<X>
-
-On target::
-
-	cat /dev/ttyGS<Y>
-
-then the other way round
-
-On target::
-
-	cat > /dev/ttyGS<Y>
-
-On host::
-
-	cat /dev/ttyUSB<X>
-
-15. SOURCESINK function
-=======================
-
-The function is provided by usb_f_ss_lb.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "SourceSink".
-The SOURCESINK function provides these attributes in its function directory:
-
-	=============== ==================================
-	pattern		0 (all zeros), 1 (mod63), 2 (none)
-	isoc_interval	1..16
-	isoc_maxpacket	0 - 1023 (fs), 0 - 1024 (hs/ss)
-	isoc_mult	0..2 (hs/ss only)
-	isoc_maxburst	0..15 (ss only)
-	bulk_buflen	buffer length
-	bulk_qlen	depth of queue for bulk
-	iso_qlen	depth of queue for iso
-	=============== ==================================
-
-Testing the SOURCESINK function
--------------------------------
-
-device: run the gadget
-
-host: test-usb (tools/usb/testusb.c)
-
-
-16. UAC1 function (legacy implementation)
-=========================================
-
-The function is provided by usb_f_uac1_legacy.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory
-is "uac1_legacy".
-The uac1 function provides these attributes in its function directory:
-
-	=============== ====================================
-	audio_buf_size	audio buffer size
-	fn_cap		capture pcm device file name
-	fn_cntl		control device file name
-	fn_play		playback pcm device file name
-	req_buf_size	ISO OUT endpoint request buffer size
-	req_count	ISO OUT endpoint request count
-	=============== ====================================
-
-The attributes have sane default values.
-
-Testing the UAC1 function
--------------------------
-
-device: run the gadget
-
-host::
-
-	aplay -l # should list our USB Audio Gadget
-
-17. UAC2 function
-=================
-
-The function is provided by usb_f_uac2.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "uac2".
-The uac2 function provides these attributes in its function directory:
-
-	=============== ====================================================
-	c_chmask	capture channel mask
-	c_srate		capture sampling rate
-	c_ssize		capture sample size (bytes)
-	p_chmask	playback channel mask
-	p_srate		playback sampling rate
-	p_ssize		playback sample size (bytes)
-	req_number	the number of pre-allocated request for both capture
-			and playback
-	=============== ====================================================
-
-The attributes have sane default values.
-
-Testing the UAC2 function
--------------------------
-
-device: run the gadget
-host: aplay -l # should list our USB Audio Gadget
-
-This function does not require real hardware support, it just
-sends a stream of audio data to/from the host. In order to
-actually hear something at the device side, a command similar
-to this must be used at the device side::
-
-	$ arecord -f dat -t wav -D hw:2,0 | aplay -D hw:0,0 &
-
-e.g.::
-
-	$ arecord -f dat -t wav -D hw:CARD=UAC2Gadget,DEV=0 | \
-	  aplay -D default:CARD=OdroidU3
-
-18. UVC function
-================
-
-The function is provided by usb_f_uvc.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "uvc".
-The uvc function provides these attributes in its function directory:
-
-	=================== ================================================
-	streaming_interval  interval for polling endpoint for data transfers
-	streaming_maxburst  bMaxBurst for super speed companion descriptor
-	streaming_maxpacket maximum packet size this endpoint is capable of
-			    sending or receiving when this configuration is
-			    selected
-	=================== ================================================
-
-There are also "control" and "streaming" subdirectories, each of which contain
-a number of their subdirectories. There are some sane defaults provided, but
-the user must provide the following:
-
-	================== ====================================================
-	control header     create in control/header, link from control/class/fs
-			   and/or control/class/ss
-	streaming header   create in streaming/header, link from
-			   streaming/class/fs and/or streaming/class/hs and/or
-			   streaming/class/ss
-	format description create in streaming/mjpeg and/or
-			   streaming/uncompressed
-	frame description  create in streaming/mjpeg/<format> and/or in
-			   streaming/uncompressed/<format>
-	================== ====================================================
-
-Each frame description contains frame interval specification, and each
-such specification consists of a number of lines with an inverval value
-in each line. The rules stated above are best illustrated with an example::
-
-  # mkdir functions/uvc.usb0/control/header/h
-  # cd functions/uvc.usb0/control/
-  # ln -s header/h class/fs
-  # ln -s header/h class/ss
-  # mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p
-  # cat <<EOF > functions/uvc.usb0/streaming/uncompressed/u/360p/dwFrameInterval
-  666666
-  1000000
-  5000000
-  EOF
-  # cd $GADGET_CONFIGFS_ROOT
-  # mkdir functions/uvc.usb0/streaming/header/h
-  # cd functions/uvc.usb0/streaming/header/h
-  # ln -s ../../uncompressed/u
-  # cd ../../class/fs
-  # ln -s ../../header/h
-  # cd ../../class/hs
-  # ln -s ../../header/h
-  # cd ../../class/ss
-  # ln -s ../../header/h
-
-
-Testing the UVC function
-------------------------
-
-device: run the gadget, modprobe vivid::
-
-  # uvc-gadget -u /dev/video<uvc video node #> -v /dev/video<vivid video node #>
-
-where uvc-gadget is this program:
-	http://git.ideasonboard.org/uvc-gadget.git
-
-with these patches:
-
-	http://www.spinics.net/lists/linux-usb/msg99220.html
-
-host::
-
-	luvcview -f yuv
-
-19. PRINTER function
-====================
-
-The function is provided by usb_f_printer.ko module.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "printer".
-The printer function provides these attributes in its function directory:
-
-	==========	===========================================
-	pnp_string	Data to be passed to the host in pnp string
-	q_len		Number of requests per endpoint
-	==========	===========================================
-
-Testing the PRINTER function
-----------------------------
-
-The most basic testing:
-
-device: run the gadget::
-
-	# ls -l /devices/virtual/usb_printer_gadget/
-
-should show g_printer<number>.
-
-If udev is active, then /dev/g_printer<number> should appear automatically.
-
-host:
-
-If udev is active, then e.g. /dev/usb/lp0 should appear.
-
-host->device transmission:
-
-device::
-
-	# cat /dev/g_printer<number>
-
-host::
-
-	# cat > /dev/usb/lp0
-
-device->host transmission::
-
-	# cat > /dev/g_printer<number>
-
-host::
-
-	# cat /dev/usb/lp0
-
-More advanced testing can be done with the prn_example
-described in Documentation/usb/gadget_printer.txt.
-
-
-20. UAC1 function (virtual ALSA card, using u_audio API)
-========================================================
-
-The function is provided by usb_f_uac1.ko module.
-It will create a virtual ALSA card and the audio streams are simply
-sinked to and sourced from it.
-
-Function-specific configfs interface
-------------------------------------
-
-The function name to use when creating the function directory is "uac1".
-The uac1 function provides these attributes in its function directory:
-
-	========== ====================================================
-	c_chmask   capture channel mask
-	c_srate    capture sampling rate
-	c_ssize    capture sample size (bytes)
-	p_chmask   playback channel mask
-	p_srate    playback sampling rate
-	p_ssize    playback sample size (bytes)
-	req_number the number of pre-allocated request for both capture
-		   and playback
-	========== ====================================================
-
-The attributes have sane default values.
-
-Testing the UAC1 function
--------------------------
-
-device: run the gadget
-host: aplay -l # should list our USB Audio Gadget
-
-This function does not require real hardware support, it just
-sends a stream of audio data to/from the host. In order to
-actually hear something at the device side, a command similar
-to this must be used at the device side::
-
-	$ arecord -f dat -t wav -D hw:2,0 | aplay -D hw:0,0 &
-
-e.g.::
-
-	$ arecord -f dat -t wav -D hw:CARD=UAC1Gadget,DEV=0 | \
-	  aplay -D default:CARD=OdroidU3
diff --git a/Documentation/usb/gadget_configfs.rst b/Documentation/usb/gadget_configfs.rst
new file mode 100644
index 000000000000..54fb08baae22
--- /dev/null
+++ b/Documentation/usb/gadget_configfs.rst
@@ -0,0 +1,390 @@
+============================================
+Linux USB gadget configured through configfs
+============================================
+
+
+25th April 2013
+
+
+
+
+Overview
+========
+
+A USB Linux Gadget is a device which has a UDC (USB Device Controller) and can
+be connected to a USB Host to extend it with additional functions like a serial
+port or a mass storage capability.
+
+A gadget is seen by its host as a set of configurations, each of which contains
+a number of interfaces which, from the gadget's perspective, are known as
+functions, each function representing e.g. a serial connection or a SCSI disk.
+
+Linux provides a number of functions for gadgets to use.
+
+Creating a gadget means deciding what configurations there will be
+and which functions each configuration will provide.
+
+Configfs (please see `Documentation/filesystems/configfs/*`) lends itself nicely
+for the purpose of telling the kernel about the above mentioned decision.
+This document is about how to do it.
+
+It also describes how configfs integration into gadget is designed.
+
+
+
+
+Requirements
+============
+
+In order for this to work configfs must be available, so CONFIGFS_FS must be
+'y' or 'm' in .config. As of this writing USB_LIBCOMPOSITE selects CONFIGFS_FS.
+
+
+
+
+Usage
+=====
+
+(The original post describing the first function
+made available through configfs can be seen here:
+http://www.spinics.net/lists/linux-usb/msg76388.html)
+
+::
+
+	$ modprobe libcomposite
+	$ mount none $CONFIGFS_HOME -t configfs
+
+where CONFIGFS_HOME is the mount point for configfs
+
+1. Creating the gadgets
+-----------------------
+
+For each gadget to be created its corresponding directory must be created::
+
+	$ mkdir $CONFIGFS_HOME/usb_gadget/<gadget name>
+
+e.g.::
+
+	$ mkdir $CONFIGFS_HOME/usb_gadget/g1
+
+	...
+	...
+	...
+
+	$ cd $CONFIGFS_HOME/usb_gadget/g1
+
+Each gadget needs to have its vendor id <VID> and product id <PID> specified::
+
+	$ echo <VID> > idVendor
+	$ echo <PID> > idProduct
+
+A gadget also needs its serial number, manufacturer and product strings.
+In order to have a place to store them, a strings subdirectory must be created
+for each language, e.g.::
+
+	$ mkdir strings/0x409
+
+Then the strings can be specified::
+
+	$ echo <serial number> > strings/0x409/serialnumber
+	$ echo <manufacturer> > strings/0x409/manufacturer
+	$ echo <product> > strings/0x409/product
+
+2. Creating the configurations
+------------------------------
+
+Each gadget will consist of a number of configurations, their corresponding
+directories must be created:
+
+$ mkdir configs/<name>.<number>
+
+where <name> can be any string which is legal in a filesystem and the
+<number> is the configuration's number, e.g.::
+
+	$ mkdir configs/c.1
+
+	...
+	...
+	...
+
+Each configuration also needs its strings, so a subdirectory must be created
+for each language, e.g.::
+
+	$ mkdir configs/c.1/strings/0x409
+
+Then the configuration string can be specified::
+
+	$ echo <configuration> > configs/c.1/strings/0x409/configuration
+
+Some attributes can also be set for a configuration, e.g.::
+
+	$ echo 120 > configs/c.1/MaxPower
+
+3. Creating the functions
+-------------------------
+
+The gadget will provide some functions, for each function its corresponding
+directory must be created::
+
+	$ mkdir functions/<name>.<instance name>
+
+where <name> corresponds to one of allowed function names and instance name
+is an arbitrary string allowed in a filesystem, e.g.::
+
+  $ mkdir functions/ncm.usb0 # usb_f_ncm.ko gets loaded with request_module()
+
+  ...
+  ...
+  ...
+
+Each function provides its specific set of attributes, with either read-only
+or read-write access. Where applicable they need to be written to as
+appropriate.
+Please refer to Documentation/ABI/*/configfs-usb-gadget* for more information.
+
+4. Associating the functions with their configurations
+------------------------------------------------------
+
+At this moment a number of gadgets is created, each of which has a number of
+configurations specified and a number of functions available. What remains
+is specifying which function is available in which configuration (the same
+function can be used in multiple configurations). This is achieved with
+creating symbolic links::
+
+	$ ln -s functions/<name>.<instance name> configs/<name>.<number>
+
+e.g.::
+
+	$ ln -s functions/ncm.usb0 configs/c.1
+
+	...
+	...
+	...
+
+5. Enabling the gadget
+----------------------
+
+All the above steps serve the purpose of composing the gadget of
+configurations and functions.
+
+An example directory structure might look like this::
+
+  .
+  ./strings
+  ./strings/0x409
+  ./strings/0x409/serialnumber
+  ./strings/0x409/product
+  ./strings/0x409/manufacturer
+  ./configs
+  ./configs/c.1
+  ./configs/c.1/ncm.usb0 -> ../../../../usb_gadget/g1/functions/ncm.usb0
+  ./configs/c.1/strings
+  ./configs/c.1/strings/0x409
+  ./configs/c.1/strings/0x409/configuration
+  ./configs/c.1/bmAttributes
+  ./configs/c.1/MaxPower
+  ./functions
+  ./functions/ncm.usb0
+  ./functions/ncm.usb0/ifname
+  ./functions/ncm.usb0/qmult
+  ./functions/ncm.usb0/host_addr
+  ./functions/ncm.usb0/dev_addr
+  ./UDC
+  ./bcdUSB
+  ./bcdDevice
+  ./idProduct
+  ./idVendor
+  ./bMaxPacketSize0
+  ./bDeviceProtocol
+  ./bDeviceSubClass
+  ./bDeviceClass
+
+
+Such a gadget must be finally enabled so that the USB host can enumerate it.
+
+In order to enable the gadget it must be bound to a UDC (USB Device
+Controller)::
+
+	$ echo <udc name> > UDC
+
+where <udc name> is one of those found in /sys/class/udc/*
+e.g.::
+
+	$ echo s3c-hsotg > UDC
+
+
+6. Disabling the gadget
+-----------------------
+
+::
+
+	$ echo "" > UDC
+
+7. Cleaning up
+--------------
+
+Remove functions from configurations::
+
+	$ rm configs/<config name>.<number>/<function>
+
+where <config name>.<number> specify the configuration and <function> is
+a symlink to a function being removed from the configuration, e.g.::
+
+	$ rm configs/c.1/ncm.usb0
+
+	...
+	...
+	...
+
+Remove strings directories in configurations:
+
+	$ rmdir configs/<config name>.<number>/strings/<lang>
+
+e.g.::
+
+	$ rmdir configs/c.1/strings/0x409
+
+	...
+	...
+	...
+
+and remove the configurations::
+
+	$ rmdir configs/<config name>.<number>
+
+e.g.::
+
+	rmdir configs/c.1
+
+	...
+	...
+	...
+
+Remove functions (function modules are not unloaded, though):
+
+	$ rmdir functions/<name>.<instance name>
+
+e.g.::
+
+	$ rmdir functions/ncm.usb0
+
+	...
+	...
+	...
+
+Remove strings directories in the gadget::
+
+	$ rmdir strings/<lang>
+
+e.g.::
+
+	$ rmdir strings/0x409
+
+and finally remove the gadget::
+
+	$ cd ..
+	$ rmdir <gadget name>
+
+e.g.::
+
+	$ rmdir g1
+
+
+
+
+Implementation design
+=====================
+
+Below the idea of how configfs works is presented.
+In configfs there are items and groups, both represented as directories.
+The difference between an item and a group is that a group can contain
+other groups. In the picture below only an item is shown.
+Both items and groups can have attributes, which are represented as files.
+The user can create and remove directories, but cannot remove files,
+which can be read-only or read-write, depending on what they represent.
+
+The filesystem part of configfs operates on config_items/groups and
+configfs_attributes which are generic and of the same type for all
+configured elements. However, they are embedded in usage-specific
+larger structures. In the picture below there is a "cs" which contains
+a config_item and an "sa" which contains a configfs_attribute.
+
+The filesystem view would be like this::
+
+  ./
+  ./cs        (directory)
+     |
+     +--sa    (file)
+     |
+     .
+     .
+     .
+
+Whenever a user reads/writes the "sa" file, a function is called
+which accepts a struct config_item and a struct configfs_attribute.
+In the said function the "cs" and "sa" are retrieved using the well
+known container_of technique and an appropriate sa's function (show or
+store) is called and passed the "cs" and a character buffer. The "show"
+is for displaying the file's contents (copy data from the cs to the
+buffer), while the "store" is for modifying the file's contents (copy data
+from the buffer to the cs), but it is up to the implementer of the
+two functions to decide what they actually do.
+
+::
+
+  typedef struct configured_structure cs;
+  typedef struct specific_attribute sa;
+
+                                         sa
+                         +----------------------------------+
+          cs             |  (*show)(cs *, buffer);          |
+  +-----------------+    |  (*store)(cs *, buffer, length); |
+  |                 |    |                                  |
+  | +-------------+ |    |       +------------------+       |
+  | | struct      |-|----|------>|struct            |       |
+  | | config_item | |    |       |configfs_attribute|       |
+  | +-------------+ |    |       +------------------+       |
+  |                 |    +----------------------------------+
+  | data to be set  |                .
+  |                 |                .
+  +-----------------+                .
+
+The file names are decided by the config item/group designer, while
+the directories in general can be named at will. A group can have
+a number of its default sub-groups created automatically.
+
+For more information on configfs please see
+`Documentation/filesystems/configfs/*`.
+
+The concepts described above translate to USB gadgets like this:
+
+1. A gadget has its config group, which has some attributes (idVendor,
+idProduct etc) and default sub-groups (configs, functions, strings).
+Writing to the attributes causes the information to be stored in
+appropriate locations. In the configs, functions and strings sub-groups
+a user can create their sub-groups to represent configurations, functions,
+and groups of strings in a given language.
+
+2. The user creates configurations and functions, in the configurations
+creates symbolic links to functions. This information is used when the
+gadget's UDC attribute is written to, which means binding the gadget
+to the UDC. The code in drivers/usb/gadget/configfs.c iterates over
+all configurations, and in each configuration it iterates over all
+functions and binds them. This way the whole gadget is bound.
+
+3. The file drivers/usb/gadget/configfs.c contains code for
+
+	- gadget's config_group
+	- gadget's default groups (configs, functions, strings)
+	- associating functions with configurations (symlinks)
+
+4. Each USB function naturally has its own view of what it wants
+configured, so config_groups for particular functions are defined
+in the functions implementation files drivers/usb/gadget/f_*.c.
+
+5. Function's code is written in such a way that it uses
+
+usb_get_function_instance(), which, in turn, calls request_module.
+So, provided that modprobe works, modules for particular functions
+are loaded automatically. Please note that the converse is not true:
+after a gadget is disabled and torn down, the modules remain loaded.
diff --git a/Documentation/usb/gadget_configfs.txt b/Documentation/usb/gadget_configfs.txt
deleted file mode 100644
index 54fb08baae22..000000000000
--- a/Documentation/usb/gadget_configfs.txt
+++ /dev/null
@@ -1,390 +0,0 @@
-============================================
-Linux USB gadget configured through configfs
-============================================
-
-
-25th April 2013
-
-
-
-
-Overview
-========
-
-A USB Linux Gadget is a device which has a UDC (USB Device Controller) and can
-be connected to a USB Host to extend it with additional functions like a serial
-port or a mass storage capability.
-
-A gadget is seen by its host as a set of configurations, each of which contains
-a number of interfaces which, from the gadget's perspective, are known as
-functions, each function representing e.g. a serial connection or a SCSI disk.
-
-Linux provides a number of functions for gadgets to use.
-
-Creating a gadget means deciding what configurations there will be
-and which functions each configuration will provide.
-
-Configfs (please see `Documentation/filesystems/configfs/*`) lends itself nicely
-for the purpose of telling the kernel about the above mentioned decision.
-This document is about how to do it.
-
-It also describes how configfs integration into gadget is designed.
-
-
-
-
-Requirements
-============
-
-In order for this to work configfs must be available, so CONFIGFS_FS must be
-'y' or 'm' in .config. As of this writing USB_LIBCOMPOSITE selects CONFIGFS_FS.
-
-
-
-
-Usage
-=====
-
-(The original post describing the first function
-made available through configfs can be seen here:
-http://www.spinics.net/lists/linux-usb/msg76388.html)
-
-::
-
-	$ modprobe libcomposite
-	$ mount none $CONFIGFS_HOME -t configfs
-
-where CONFIGFS_HOME is the mount point for configfs
-
-1. Creating the gadgets
------------------------
-
-For each gadget to be created its corresponding directory must be created::
-
-	$ mkdir $CONFIGFS_HOME/usb_gadget/<gadget name>
-
-e.g.::
-
-	$ mkdir $CONFIGFS_HOME/usb_gadget/g1
-
-	...
-	...
-	...
-
-	$ cd $CONFIGFS_HOME/usb_gadget/g1
-
-Each gadget needs to have its vendor id <VID> and product id <PID> specified::
-
-	$ echo <VID> > idVendor
-	$ echo <PID> > idProduct
-
-A gadget also needs its serial number, manufacturer and product strings.
-In order to have a place to store them, a strings subdirectory must be created
-for each language, e.g.::
-
-	$ mkdir strings/0x409
-
-Then the strings can be specified::
-
-	$ echo <serial number> > strings/0x409/serialnumber
-	$ echo <manufacturer> > strings/0x409/manufacturer
-	$ echo <product> > strings/0x409/product
-
-2. Creating the configurations
-------------------------------
-
-Each gadget will consist of a number of configurations, their corresponding
-directories must be created:
-
-$ mkdir configs/<name>.<number>
-
-where <name> can be any string which is legal in a filesystem and the
-<number> is the configuration's number, e.g.::
-
-	$ mkdir configs/c.1
-
-	...
-	...
-	...
-
-Each configuration also needs its strings, so a subdirectory must be created
-for each language, e.g.::
-
-	$ mkdir configs/c.1/strings/0x409
-
-Then the configuration string can be specified::
-
-	$ echo <configuration> > configs/c.1/strings/0x409/configuration
-
-Some attributes can also be set for a configuration, e.g.::
-
-	$ echo 120 > configs/c.1/MaxPower
-
-3. Creating the functions
--------------------------
-
-The gadget will provide some functions, for each function its corresponding
-directory must be created::
-
-	$ mkdir functions/<name>.<instance name>
-
-where <name> corresponds to one of allowed function names and instance name
-is an arbitrary string allowed in a filesystem, e.g.::
-
-  $ mkdir functions/ncm.usb0 # usb_f_ncm.ko gets loaded with request_module()
-
-  ...
-  ...
-  ...
-
-Each function provides its specific set of attributes, with either read-only
-or read-write access. Where applicable they need to be written to as
-appropriate.
-Please refer to Documentation/ABI/*/configfs-usb-gadget* for more information.
-
-4. Associating the functions with their configurations
-------------------------------------------------------
-
-At this moment a number of gadgets is created, each of which has a number of
-configurations specified and a number of functions available. What remains
-is specifying which function is available in which configuration (the same
-function can be used in multiple configurations). This is achieved with
-creating symbolic links::
-
-	$ ln -s functions/<name>.<instance name> configs/<name>.<number>
-
-e.g.::
-
-	$ ln -s functions/ncm.usb0 configs/c.1
-
-	...
-	...
-	...
-
-5. Enabling the gadget
-----------------------
-
-All the above steps serve the purpose of composing the gadget of
-configurations and functions.
-
-An example directory structure might look like this::
-
-  .
-  ./strings
-  ./strings/0x409
-  ./strings/0x409/serialnumber
-  ./strings/0x409/product
-  ./strings/0x409/manufacturer
-  ./configs
-  ./configs/c.1
-  ./configs/c.1/ncm.usb0 -> ../../../../usb_gadget/g1/functions/ncm.usb0
-  ./configs/c.1/strings
-  ./configs/c.1/strings/0x409
-  ./configs/c.1/strings/0x409/configuration
-  ./configs/c.1/bmAttributes
-  ./configs/c.1/MaxPower
-  ./functions
-  ./functions/ncm.usb0
-  ./functions/ncm.usb0/ifname
-  ./functions/ncm.usb0/qmult
-  ./functions/ncm.usb0/host_addr
-  ./functions/ncm.usb0/dev_addr
-  ./UDC
-  ./bcdUSB
-  ./bcdDevice
-  ./idProduct
-  ./idVendor
-  ./bMaxPacketSize0
-  ./bDeviceProtocol
-  ./bDeviceSubClass
-  ./bDeviceClass
-
-
-Such a gadget must be finally enabled so that the USB host can enumerate it.
-
-In order to enable the gadget it must be bound to a UDC (USB Device
-Controller)::
-
-	$ echo <udc name> > UDC
-
-where <udc name> is one of those found in /sys/class/udc/*
-e.g.::
-
-	$ echo s3c-hsotg > UDC
-
-
-6. Disabling the gadget
------------------------
-
-::
-
-	$ echo "" > UDC
-
-7. Cleaning up
---------------
-
-Remove functions from configurations::
-
-	$ rm configs/<config name>.<number>/<function>
-
-where <config name>.<number> specify the configuration and <function> is
-a symlink to a function being removed from the configuration, e.g.::
-
-	$ rm configs/c.1/ncm.usb0
-
-	...
-	...
-	...
-
-Remove strings directories in configurations:
-
-	$ rmdir configs/<config name>.<number>/strings/<lang>
-
-e.g.::
-
-	$ rmdir configs/c.1/strings/0x409
-
-	...
-	...
-	...
-
-and remove the configurations::
-
-	$ rmdir configs/<config name>.<number>
-
-e.g.::
-
-	rmdir configs/c.1
-
-	...
-	...
-	...
-
-Remove functions (function modules are not unloaded, though):
-
-	$ rmdir functions/<name>.<instance name>
-
-e.g.::
-
-	$ rmdir functions/ncm.usb0
-
-	...
-	...
-	...
-
-Remove strings directories in the gadget::
-
-	$ rmdir strings/<lang>
-
-e.g.::
-
-	$ rmdir strings/0x409
-
-and finally remove the gadget::
-
-	$ cd ..
-	$ rmdir <gadget name>
-
-e.g.::
-
-	$ rmdir g1
-
-
-
-
-Implementation design
-=====================
-
-Below the idea of how configfs works is presented.
-In configfs there are items and groups, both represented as directories.
-The difference between an item and a group is that a group can contain
-other groups. In the picture below only an item is shown.
-Both items and groups can have attributes, which are represented as files.
-The user can create and remove directories, but cannot remove files,
-which can be read-only or read-write, depending on what they represent.
-
-The filesystem part of configfs operates on config_items/groups and
-configfs_attributes which are generic and of the same type for all
-configured elements. However, they are embedded in usage-specific
-larger structures. In the picture below there is a "cs" which contains
-a config_item and an "sa" which contains a configfs_attribute.
-
-The filesystem view would be like this::
-
-  ./
-  ./cs        (directory)
-     |
-     +--sa    (file)
-     |
-     .
-     .
-     .
-
-Whenever a user reads/writes the "sa" file, a function is called
-which accepts a struct config_item and a struct configfs_attribute.
-In the said function the "cs" and "sa" are retrieved using the well
-known container_of technique and an appropriate sa's function (show or
-store) is called and passed the "cs" and a character buffer. The "show"
-is for displaying the file's contents (copy data from the cs to the
-buffer), while the "store" is for modifying the file's contents (copy data
-from the buffer to the cs), but it is up to the implementer of the
-two functions to decide what they actually do.
-
-::
-
-  typedef struct configured_structure cs;
-  typedef struct specific_attribute sa;
-
-                                         sa
-                         +----------------------------------+
-          cs             |  (*show)(cs *, buffer);          |
-  +-----------------+    |  (*store)(cs *, buffer, length); |
-  |                 |    |                                  |
-  | +-------------+ |    |       +------------------+       |
-  | | struct      |-|----|------>|struct            |       |
-  | | config_item | |    |       |configfs_attribute|       |
-  | +-------------+ |    |       +------------------+       |
-  |                 |    +----------------------------------+
-  | data to be set  |                .
-  |                 |                .
-  +-----------------+                .
-
-The file names are decided by the config item/group designer, while
-the directories in general can be named at will. A group can have
-a number of its default sub-groups created automatically.
-
-For more information on configfs please see
-`Documentation/filesystems/configfs/*`.
-
-The concepts described above translate to USB gadgets like this:
-
-1. A gadget has its config group, which has some attributes (idVendor,
-idProduct etc) and default sub-groups (configs, functions, strings).
-Writing to the attributes causes the information to be stored in
-appropriate locations. In the configs, functions and strings sub-groups
-a user can create their sub-groups to represent configurations, functions,
-and groups of strings in a given language.
-
-2. The user creates configurations and functions, in the configurations
-creates symbolic links to functions. This information is used when the
-gadget's UDC attribute is written to, which means binding the gadget
-to the UDC. The code in drivers/usb/gadget/configfs.c iterates over
-all configurations, and in each configuration it iterates over all
-functions and binds them. This way the whole gadget is bound.
-
-3. The file drivers/usb/gadget/configfs.c contains code for
-
-	- gadget's config_group
-	- gadget's default groups (configs, functions, strings)
-	- associating functions with configurations (symlinks)
-
-4. Each USB function naturally has its own view of what it wants
-configured, so config_groups for particular functions are defined
-in the functions implementation files drivers/usb/gadget/f_*.c.
-
-5. Function's code is written in such a way that it uses
-
-usb_get_function_instance(), which, in turn, calls request_module.
-So, provided that modprobe works, modules for particular functions
-are loaded automatically. Please note that the converse is not true:
-after a gadget is disabled and torn down, the modules remain loaded.
diff --git a/Documentation/usb/gadget_hid.rst b/Documentation/usb/gadget_hid.rst
new file mode 100644
index 000000000000..098d563040cc
--- /dev/null
+++ b/Documentation/usb/gadget_hid.rst
@@ -0,0 +1,457 @@
+===========================
+Linux USB HID gadget driver
+===========================
+
+Introduction
+============
+
+The HID Gadget driver provides emulation of USB Human Interface
+Devices (HID). The basic HID handling is done in the kernel,
+and HID reports can be sent/received through I/O on the
+/dev/hidgX character devices.
+
+For more details about HID, see the developer page on
+http://www.usb.org/developers/hidpage/
+
+Configuration
+=============
+
+g_hid is a platform driver, so to use it you need to add
+struct platform_device(s) to your platform code defining the
+HID function descriptors you want to use - E.G. something
+like::
+
+  #include <linux/platform_device.h>
+  #include <linux/usb/g_hid.h>
+
+  /* hid descriptor for a keyboard */
+  static struct hidg_func_descriptor my_hid_data = {
+	.subclass		= 0, /* No subclass */
+	.protocol		= 1, /* Keyboard */
+	.report_length		= 8,
+	.report_desc_length	= 63,
+	.report_desc		= {
+		0x05, 0x01,	/* USAGE_PAGE (Generic Desktop)	          */
+		0x09, 0x06,	/* USAGE (Keyboard)                       */
+		0xa1, 0x01,	/* COLLECTION (Application)               */
+		0x05, 0x07,	/*   USAGE_PAGE (Keyboard)                */
+		0x19, 0xe0,	/*   USAGE_MINIMUM (Keyboard LeftControl) */
+		0x29, 0xe7,	/*   USAGE_MAXIMUM (Keyboard Right GUI)   */
+		0x15, 0x00,	/*   LOGICAL_MINIMUM (0)                  */
+		0x25, 0x01,	/*   LOGICAL_MAXIMUM (1)                  */
+		0x75, 0x01,	/*   REPORT_SIZE (1)                      */
+		0x95, 0x08,	/*   REPORT_COUNT (8)                     */
+		0x81, 0x02,	/*   INPUT (Data,Var,Abs)                 */
+		0x95, 0x01,	/*   REPORT_COUNT (1)                     */
+		0x75, 0x08,	/*   REPORT_SIZE (8)                      */
+		0x81, 0x03,	/*   INPUT (Cnst,Var,Abs)                 */
+		0x95, 0x05,	/*   REPORT_COUNT (5)                     */
+		0x75, 0x01,	/*   REPORT_SIZE (1)                      */
+		0x05, 0x08,	/*   USAGE_PAGE (LEDs)                    */
+		0x19, 0x01,	/*   USAGE_MINIMUM (Num Lock)             */
+		0x29, 0x05,	/*   USAGE_MAXIMUM (Kana)                 */
+		0x91, 0x02,	/*   OUTPUT (Data,Var,Abs)                */
+		0x95, 0x01,	/*   REPORT_COUNT (1)                     */
+		0x75, 0x03,	/*   REPORT_SIZE (3)                      */
+		0x91, 0x03,	/*   OUTPUT (Cnst,Var,Abs)                */
+		0x95, 0x06,	/*   REPORT_COUNT (6)                     */
+		0x75, 0x08,	/*   REPORT_SIZE (8)                      */
+		0x15, 0x00,	/*   LOGICAL_MINIMUM (0)                  */
+		0x25, 0x65,	/*   LOGICAL_MAXIMUM (101)                */
+		0x05, 0x07,	/*   USAGE_PAGE (Keyboard)                */
+		0x19, 0x00,	/*   USAGE_MINIMUM (Reserved)             */
+		0x29, 0x65,	/*   USAGE_MAXIMUM (Keyboard Application) */
+		0x81, 0x00,	/*   INPUT (Data,Ary,Abs)                 */
+		0xc0		/* END_COLLECTION                         */
+	}
+  };
+
+  static struct platform_device my_hid = {
+	.name			= "hidg",
+	.id			= 0,
+	.num_resources		= 0,
+	.resource		= 0,
+	.dev.platform_data	= &my_hid_data,
+  };
+
+You can add as many HID functions as you want, only limited by
+the amount of interrupt endpoints your gadget driver supports.
+
+Configuration with configfs
+===========================
+
+Instead of adding fake platform devices and drivers in order to pass
+some data to the kernel, if HID is a part of a gadget composed with
+configfs the hidg_func_descriptor.report_desc is passed to the kernel
+by writing the appropriate stream of bytes to a configfs attribute.
+
+Send and receive HID reports
+============================
+
+HID reports can be sent/received using read/write on the
+/dev/hidgX character devices. See below for an example program
+to do this.
+
+hid_gadget_test is a small interactive program to test the HID
+gadget driver. To use, point it at a hidg device and set the
+device type (keyboard / mouse / joystick) - E.G.::
+
+	# hid_gadget_test /dev/hidg0 keyboard
+
+You are now in the prompt of hid_gadget_test. You can type any
+combination of options and values. Available options and
+values are listed at program start. In keyboard mode you can
+send up to six values.
+
+For example type: g i s t r --left-shift
+
+Hit return and the corresponding report will be sent by the
+HID gadget.
+
+Another interesting example is the caps lock test. Type
+--caps-lock and hit return. A report is then sent by the
+gadget and you should receive the host answer, corresponding
+to the caps lock LED status::
+
+	--caps-lock
+	recv report:2
+
+With this command::
+
+	# hid_gadget_test /dev/hidg1 mouse
+
+You can test the mouse emulation. Values are two signed numbers.
+
+
+Sample code::
+
+    /* hid_gadget_test */
+
+    #include <pthread.h>
+    #include <string.h>
+    #include <stdio.h>
+    #include <ctype.h>
+    #include <fcntl.h>
+    #include <errno.h>
+    #include <stdio.h>
+    #include <stdlib.h>
+    #include <unistd.h>
+
+    #define BUF_LEN 512
+
+    struct options {
+	const char    *opt;
+	unsigned char val;
+  };
+
+  static struct options kmod[] = {
+	{.opt = "--left-ctrl",		.val = 0x01},
+	{.opt = "--right-ctrl",		.val = 0x10},
+	{.opt = "--left-shift",		.val = 0x02},
+	{.opt = "--right-shift",	.val = 0x20},
+	{.opt = "--left-alt",		.val = 0x04},
+	{.opt = "--right-alt",		.val = 0x40},
+	{.opt = "--left-meta",		.val = 0x08},
+	{.opt = "--right-meta",		.val = 0x80},
+	{.opt = NULL}
+  };
+
+  static struct options kval[] = {
+	{.opt = "--return",	.val = 0x28},
+	{.opt = "--esc",	.val = 0x29},
+	{.opt = "--bckspc",	.val = 0x2a},
+	{.opt = "--tab",	.val = 0x2b},
+	{.opt = "--spacebar",	.val = 0x2c},
+	{.opt = "--caps-lock",	.val = 0x39},
+	{.opt = "--f1",		.val = 0x3a},
+	{.opt = "--f2",		.val = 0x3b},
+	{.opt = "--f3",		.val = 0x3c},
+	{.opt = "--f4",		.val = 0x3d},
+	{.opt = "--f5",		.val = 0x3e},
+	{.opt = "--f6",		.val = 0x3f},
+	{.opt = "--f7",		.val = 0x40},
+	{.opt = "--f8",		.val = 0x41},
+	{.opt = "--f9",		.val = 0x42},
+	{.opt = "--f10",	.val = 0x43},
+	{.opt = "--f11",	.val = 0x44},
+	{.opt = "--f12",	.val = 0x45},
+	{.opt = "--insert",	.val = 0x49},
+	{.opt = "--home",	.val = 0x4a},
+	{.opt = "--pageup",	.val = 0x4b},
+	{.opt = "--del",	.val = 0x4c},
+	{.opt = "--end",	.val = 0x4d},
+	{.opt = "--pagedown",	.val = 0x4e},
+	{.opt = "--right",	.val = 0x4f},
+	{.opt = "--left",	.val = 0x50},
+	{.opt = "--down",	.val = 0x51},
+	{.opt = "--kp-enter",	.val = 0x58},
+	{.opt = "--up",		.val = 0x52},
+	{.opt = "--num-lock",	.val = 0x53},
+	{.opt = NULL}
+  };
+
+  int keyboard_fill_report(char report[8], char buf[BUF_LEN], int *hold)
+  {
+	char *tok = strtok(buf, " ");
+	int key = 0;
+	int i = 0;
+
+	for (; tok != NULL; tok = strtok(NULL, " ")) {
+
+		if (strcmp(tok, "--quit") == 0)
+			return -1;
+
+		if (strcmp(tok, "--hold") == 0) {
+			*hold = 1;
+			continue;
+		}
+
+		if (key < 6) {
+			for (i = 0; kval[i].opt != NULL; i++)
+				if (strcmp(tok, kval[i].opt) == 0) {
+					report[2 + key++] = kval[i].val;
+					break;
+				}
+			if (kval[i].opt != NULL)
+				continue;
+		}
+
+		if (key < 6)
+			if (islower(tok[0])) {
+				report[2 + key++] = (tok[0] - ('a' - 0x04));
+				continue;
+			}
+
+		for (i = 0; kmod[i].opt != NULL; i++)
+			if (strcmp(tok, kmod[i].opt) == 0) {
+				report[0] = report[0] | kmod[i].val;
+				break;
+			}
+		if (kmod[i].opt != NULL)
+			continue;
+
+		if (key < 6)
+			fprintf(stderr, "unknown option: %s\n", tok);
+	}
+	return 8;
+  }
+
+  static struct options mmod[] = {
+	{.opt = "--b1", .val = 0x01},
+	{.opt = "--b2", .val = 0x02},
+	{.opt = "--b3", .val = 0x04},
+	{.opt = NULL}
+  };
+
+  int mouse_fill_report(char report[8], char buf[BUF_LEN], int *hold)
+  {
+	char *tok = strtok(buf, " ");
+	int mvt = 0;
+	int i = 0;
+	for (; tok != NULL; tok = strtok(NULL, " ")) {
+
+		if (strcmp(tok, "--quit") == 0)
+			return -1;
+
+		if (strcmp(tok, "--hold") == 0) {
+			*hold = 1;
+			continue;
+		}
+
+		for (i = 0; mmod[i].opt != NULL; i++)
+			if (strcmp(tok, mmod[i].opt) == 0) {
+				report[0] = report[0] | mmod[i].val;
+				break;
+			}
+		if (mmod[i].opt != NULL)
+			continue;
+
+		if (!(tok[0] == '-' && tok[1] == '-') && mvt < 2) {
+			errno = 0;
+			report[1 + mvt++] = (char)strtol(tok, NULL, 0);
+			if (errno != 0) {
+				fprintf(stderr, "Bad value:'%s'\n", tok);
+				report[1 + mvt--] = 0;
+			}
+			continue;
+		}
+
+		fprintf(stderr, "unknown option: %s\n", tok);
+	}
+	return 3;
+  }
+
+  static struct options jmod[] = {
+	{.opt = "--b1",		.val = 0x10},
+	{.opt = "--b2",		.val = 0x20},
+	{.opt = "--b3",		.val = 0x40},
+	{.opt = "--b4",		.val = 0x80},
+	{.opt = "--hat1",	.val = 0x00},
+	{.opt = "--hat2",	.val = 0x01},
+	{.opt = "--hat3",	.val = 0x02},
+	{.opt = "--hat4",	.val = 0x03},
+	{.opt = "--hatneutral",	.val = 0x04},
+	{.opt = NULL}
+  };
+
+  int joystick_fill_report(char report[8], char buf[BUF_LEN], int *hold)
+  {
+	char *tok = strtok(buf, " ");
+	int mvt = 0;
+	int i = 0;
+
+	*hold = 1;
+
+	/* set default hat position: neutral */
+	report[3] = 0x04;
+
+	for (; tok != NULL; tok = strtok(NULL, " ")) {
+
+		if (strcmp(tok, "--quit") == 0)
+			return -1;
+
+		for (i = 0; jmod[i].opt != NULL; i++)
+			if (strcmp(tok, jmod[i].opt) == 0) {
+				report[3] = (report[3] & 0xF0) | jmod[i].val;
+				break;
+			}
+		if (jmod[i].opt != NULL)
+			continue;
+
+		if (!(tok[0] == '-' && tok[1] == '-') && mvt < 3) {
+			errno = 0;
+			report[mvt++] = (char)strtol(tok, NULL, 0);
+			if (errno != 0) {
+				fprintf(stderr, "Bad value:'%s'\n", tok);
+				report[mvt--] = 0;
+			}
+			continue;
+		}
+
+		fprintf(stderr, "unknown option: %s\n", tok);
+	}
+	return 4;
+  }
+
+  void print_options(char c)
+  {
+	int i = 0;
+
+	if (c == 'k') {
+		printf("	keyboard options:\n"
+		       "		--hold\n");
+		for (i = 0; kmod[i].opt != NULL; i++)
+			printf("\t\t%s\n", kmod[i].opt);
+		printf("\n	keyboard values:\n"
+		       "		[a-z] or\n");
+		for (i = 0; kval[i].opt != NULL; i++)
+			printf("\t\t%-8s%s", kval[i].opt, i % 2 ? "\n" : "");
+		printf("\n");
+	} else if (c == 'm') {
+		printf("	mouse options:\n"
+		       "		--hold\n");
+		for (i = 0; mmod[i].opt != NULL; i++)
+			printf("\t\t%s\n", mmod[i].opt);
+		printf("\n	mouse values:\n"
+		       "		Two signed numbers\n"
+		       "--quit to close\n");
+	} else {
+		printf("	joystick options:\n");
+		for (i = 0; jmod[i].opt != NULL; i++)
+			printf("\t\t%s\n", jmod[i].opt);
+		printf("\n	joystick values:\n"
+		       "		three signed numbers\n"
+		       "--quit to close\n");
+	}
+  }
+
+  int main(int argc, const char *argv[])
+  {
+	const char *filename = NULL;
+	int fd = 0;
+	char buf[BUF_LEN];
+	int cmd_len;
+	char report[8];
+	int to_send = 8;
+	int hold = 0;
+	fd_set rfds;
+	int retval, i;
+
+	if (argc < 3) {
+		fprintf(stderr, "Usage: %s devname mouse|keyboard|joystick\n",
+			argv[0]);
+		return 1;
+	}
+
+	if (argv[2][0] != 'k' && argv[2][0] != 'm' && argv[2][0] != 'j')
+	  return 2;
+
+	filename = argv[1];
+
+	if ((fd = open(filename, O_RDWR, 0666)) == -1) {
+		perror(filename);
+		return 3;
+	}
+
+	print_options(argv[2][0]);
+
+	while (42) {
+
+		FD_ZERO(&rfds);
+		FD_SET(STDIN_FILENO, &rfds);
+		FD_SET(fd, &rfds);
+
+		retval = select(fd + 1, &rfds, NULL, NULL, NULL);
+		if (retval == -1 && errno == EINTR)
+			continue;
+		if (retval < 0) {
+			perror("select()");
+			return 4;
+		}
+
+		if (FD_ISSET(fd, &rfds)) {
+			cmd_len = read(fd, buf, BUF_LEN - 1);
+			printf("recv report:");
+			for (i = 0; i < cmd_len; i++)
+				printf(" %02x", buf[i]);
+			printf("\n");
+		}
+
+		if (FD_ISSET(STDIN_FILENO, &rfds)) {
+			memset(report, 0x0, sizeof(report));
+			cmd_len = read(STDIN_FILENO, buf, BUF_LEN - 1);
+
+			if (cmd_len == 0)
+				break;
+
+			buf[cmd_len - 1] = '\0';
+			hold = 0;
+
+			memset(report, 0x0, sizeof(report));
+			if (argv[2][0] == 'k')
+				to_send = keyboard_fill_report(report, buf, &hold);
+			else if (argv[2][0] == 'm')
+				to_send = mouse_fill_report(report, buf, &hold);
+			else
+				to_send = joystick_fill_report(report, buf, &hold);
+
+			if (to_send == -1)
+				break;
+
+			if (write(fd, report, to_send) != to_send) {
+				perror(filename);
+				return 5;
+			}
+			if (!hold) {
+				memset(report, 0x0, sizeof(report));
+				if (write(fd, report, to_send) != to_send) {
+					perror(filename);
+					return 6;
+				}
+			}
+		}
+	}
+
+	close(fd);
+	return 0;
+  }
diff --git a/Documentation/usb/gadget_hid.txt b/Documentation/usb/gadget_hid.txt
deleted file mode 100644
index 098d563040cc..000000000000
--- a/Documentation/usb/gadget_hid.txt
+++ /dev/null
@@ -1,457 +0,0 @@
-===========================
-Linux USB HID gadget driver
-===========================
-
-Introduction
-============
-
-The HID Gadget driver provides emulation of USB Human Interface
-Devices (HID). The basic HID handling is done in the kernel,
-and HID reports can be sent/received through I/O on the
-/dev/hidgX character devices.
-
-For more details about HID, see the developer page on
-http://www.usb.org/developers/hidpage/
-
-Configuration
-=============
-
-g_hid is a platform driver, so to use it you need to add
-struct platform_device(s) to your platform code defining the
-HID function descriptors you want to use - E.G. something
-like::
-
-  #include <linux/platform_device.h>
-  #include <linux/usb/g_hid.h>
-
-  /* hid descriptor for a keyboard */
-  static struct hidg_func_descriptor my_hid_data = {
-	.subclass		= 0, /* No subclass */
-	.protocol		= 1, /* Keyboard */
-	.report_length		= 8,
-	.report_desc_length	= 63,
-	.report_desc		= {
-		0x05, 0x01,	/* USAGE_PAGE (Generic Desktop)	          */
-		0x09, 0x06,	/* USAGE (Keyboard)                       */
-		0xa1, 0x01,	/* COLLECTION (Application)               */
-		0x05, 0x07,	/*   USAGE_PAGE (Keyboard)                */
-		0x19, 0xe0,	/*   USAGE_MINIMUM (Keyboard LeftControl) */
-		0x29, 0xe7,	/*   USAGE_MAXIMUM (Keyboard Right GUI)   */
-		0x15, 0x00,	/*   LOGICAL_MINIMUM (0)                  */
-		0x25, 0x01,	/*   LOGICAL_MAXIMUM (1)                  */
-		0x75, 0x01,	/*   REPORT_SIZE (1)                      */
-		0x95, 0x08,	/*   REPORT_COUNT (8)                     */
-		0x81, 0x02,	/*   INPUT (Data,Var,Abs)                 */
-		0x95, 0x01,	/*   REPORT_COUNT (1)                     */
-		0x75, 0x08,	/*   REPORT_SIZE (8)                      */
-		0x81, 0x03,	/*   INPUT (Cnst,Var,Abs)                 */
-		0x95, 0x05,	/*   REPORT_COUNT (5)                     */
-		0x75, 0x01,	/*   REPORT_SIZE (1)                      */
-		0x05, 0x08,	/*   USAGE_PAGE (LEDs)                    */
-		0x19, 0x01,	/*   USAGE_MINIMUM (Num Lock)             */
-		0x29, 0x05,	/*   USAGE_MAXIMUM (Kana)                 */
-		0x91, 0x02,	/*   OUTPUT (Data,Var,Abs)                */
-		0x95, 0x01,	/*   REPORT_COUNT (1)                     */
-		0x75, 0x03,	/*   REPORT_SIZE (3)                      */
-		0x91, 0x03,	/*   OUTPUT (Cnst,Var,Abs)                */
-		0x95, 0x06,	/*   REPORT_COUNT (6)                     */
-		0x75, 0x08,	/*   REPORT_SIZE (8)                      */
-		0x15, 0x00,	/*   LOGICAL_MINIMUM (0)                  */
-		0x25, 0x65,	/*   LOGICAL_MAXIMUM (101)                */
-		0x05, 0x07,	/*   USAGE_PAGE (Keyboard)                */
-		0x19, 0x00,	/*   USAGE_MINIMUM (Reserved)             */
-		0x29, 0x65,	/*   USAGE_MAXIMUM (Keyboard Application) */
-		0x81, 0x00,	/*   INPUT (Data,Ary,Abs)                 */
-		0xc0		/* END_COLLECTION                         */
-	}
-  };
-
-  static struct platform_device my_hid = {
-	.name			= "hidg",
-	.id			= 0,
-	.num_resources		= 0,
-	.resource		= 0,
-	.dev.platform_data	= &my_hid_data,
-  };
-
-You can add as many HID functions as you want, only limited by
-the amount of interrupt endpoints your gadget driver supports.
-
-Configuration with configfs
-===========================
-
-Instead of adding fake platform devices and drivers in order to pass
-some data to the kernel, if HID is a part of a gadget composed with
-configfs the hidg_func_descriptor.report_desc is passed to the kernel
-by writing the appropriate stream of bytes to a configfs attribute.
-
-Send and receive HID reports
-============================
-
-HID reports can be sent/received using read/write on the
-/dev/hidgX character devices. See below for an example program
-to do this.
-
-hid_gadget_test is a small interactive program to test the HID
-gadget driver. To use, point it at a hidg device and set the
-device type (keyboard / mouse / joystick) - E.G.::
-
-	# hid_gadget_test /dev/hidg0 keyboard
-
-You are now in the prompt of hid_gadget_test. You can type any
-combination of options and values. Available options and
-values are listed at program start. In keyboard mode you can
-send up to six values.
-
-For example type: g i s t r --left-shift
-
-Hit return and the corresponding report will be sent by the
-HID gadget.
-
-Another interesting example is the caps lock test. Type
---caps-lock and hit return. A report is then sent by the
-gadget and you should receive the host answer, corresponding
-to the caps lock LED status::
-
-	--caps-lock
-	recv report:2
-
-With this command::
-
-	# hid_gadget_test /dev/hidg1 mouse
-
-You can test the mouse emulation. Values are two signed numbers.
-
-
-Sample code::
-
-    /* hid_gadget_test */
-
-    #include <pthread.h>
-    #include <string.h>
-    #include <stdio.h>
-    #include <ctype.h>
-    #include <fcntl.h>
-    #include <errno.h>
-    #include <stdio.h>
-    #include <stdlib.h>
-    #include <unistd.h>
-
-    #define BUF_LEN 512
-
-    struct options {
-	const char    *opt;
-	unsigned char val;
-  };
-
-  static struct options kmod[] = {
-	{.opt = "--left-ctrl",		.val = 0x01},
-	{.opt = "--right-ctrl",		.val = 0x10},
-	{.opt = "--left-shift",		.val = 0x02},
-	{.opt = "--right-shift",	.val = 0x20},
-	{.opt = "--left-alt",		.val = 0x04},
-	{.opt = "--right-alt",		.val = 0x40},
-	{.opt = "--left-meta",		.val = 0x08},
-	{.opt = "--right-meta",		.val = 0x80},
-	{.opt = NULL}
-  };
-
-  static struct options kval[] = {
-	{.opt = "--return",	.val = 0x28},
-	{.opt = "--esc",	.val = 0x29},
-	{.opt = "--bckspc",	.val = 0x2a},
-	{.opt = "--tab",	.val = 0x2b},
-	{.opt = "--spacebar",	.val = 0x2c},
-	{.opt = "--caps-lock",	.val = 0x39},
-	{.opt = "--f1",		.val = 0x3a},
-	{.opt = "--f2",		.val = 0x3b},
-	{.opt = "--f3",		.val = 0x3c},
-	{.opt = "--f4",		.val = 0x3d},
-	{.opt = "--f5",		.val = 0x3e},
-	{.opt = "--f6",		.val = 0x3f},
-	{.opt = "--f7",		.val = 0x40},
-	{.opt = "--f8",		.val = 0x41},
-	{.opt = "--f9",		.val = 0x42},
-	{.opt = "--f10",	.val = 0x43},
-	{.opt = "--f11",	.val = 0x44},
-	{.opt = "--f12",	.val = 0x45},
-	{.opt = "--insert",	.val = 0x49},
-	{.opt = "--home",	.val = 0x4a},
-	{.opt = "--pageup",	.val = 0x4b},
-	{.opt = "--del",	.val = 0x4c},
-	{.opt = "--end",	.val = 0x4d},
-	{.opt = "--pagedown",	.val = 0x4e},
-	{.opt = "--right",	.val = 0x4f},
-	{.opt = "--left",	.val = 0x50},
-	{.opt = "--down",	.val = 0x51},
-	{.opt = "--kp-enter",	.val = 0x58},
-	{.opt = "--up",		.val = 0x52},
-	{.opt = "--num-lock",	.val = 0x53},
-	{.opt = NULL}
-  };
-
-  int keyboard_fill_report(char report[8], char buf[BUF_LEN], int *hold)
-  {
-	char *tok = strtok(buf, " ");
-	int key = 0;
-	int i = 0;
-
-	for (; tok != NULL; tok = strtok(NULL, " ")) {
-
-		if (strcmp(tok, "--quit") == 0)
-			return -1;
-
-		if (strcmp(tok, "--hold") == 0) {
-			*hold = 1;
-			continue;
-		}
-
-		if (key < 6) {
-			for (i = 0; kval[i].opt != NULL; i++)
-				if (strcmp(tok, kval[i].opt) == 0) {
-					report[2 + key++] = kval[i].val;
-					break;
-				}
-			if (kval[i].opt != NULL)
-				continue;
-		}
-
-		if (key < 6)
-			if (islower(tok[0])) {
-				report[2 + key++] = (tok[0] - ('a' - 0x04));
-				continue;
-			}
-
-		for (i = 0; kmod[i].opt != NULL; i++)
-			if (strcmp(tok, kmod[i].opt) == 0) {
-				report[0] = report[0] | kmod[i].val;
-				break;
-			}
-		if (kmod[i].opt != NULL)
-			continue;
-
-		if (key < 6)
-			fprintf(stderr, "unknown option: %s\n", tok);
-	}
-	return 8;
-  }
-
-  static struct options mmod[] = {
-	{.opt = "--b1", .val = 0x01},
-	{.opt = "--b2", .val = 0x02},
-	{.opt = "--b3", .val = 0x04},
-	{.opt = NULL}
-  };
-
-  int mouse_fill_report(char report[8], char buf[BUF_LEN], int *hold)
-  {
-	char *tok = strtok(buf, " ");
-	int mvt = 0;
-	int i = 0;
-	for (; tok != NULL; tok = strtok(NULL, " ")) {
-
-		if (strcmp(tok, "--quit") == 0)
-			return -1;
-
-		if (strcmp(tok, "--hold") == 0) {
-			*hold = 1;
-			continue;
-		}
-
-		for (i = 0; mmod[i].opt != NULL; i++)
-			if (strcmp(tok, mmod[i].opt) == 0) {
-				report[0] = report[0] | mmod[i].val;
-				break;
-			}
-		if (mmod[i].opt != NULL)
-			continue;
-
-		if (!(tok[0] == '-' && tok[1] == '-') && mvt < 2) {
-			errno = 0;
-			report[1 + mvt++] = (char)strtol(tok, NULL, 0);
-			if (errno != 0) {
-				fprintf(stderr, "Bad value:'%s'\n", tok);
-				report[1 + mvt--] = 0;
-			}
-			continue;
-		}
-
-		fprintf(stderr, "unknown option: %s\n", tok);
-	}
-	return 3;
-  }
-
-  static struct options jmod[] = {
-	{.opt = "--b1",		.val = 0x10},
-	{.opt = "--b2",		.val = 0x20},
-	{.opt = "--b3",		.val = 0x40},
-	{.opt = "--b4",		.val = 0x80},
-	{.opt = "--hat1",	.val = 0x00},
-	{.opt = "--hat2",	.val = 0x01},
-	{.opt = "--hat3",	.val = 0x02},
-	{.opt = "--hat4",	.val = 0x03},
-	{.opt = "--hatneutral",	.val = 0x04},
-	{.opt = NULL}
-  };
-
-  int joystick_fill_report(char report[8], char buf[BUF_LEN], int *hold)
-  {
-	char *tok = strtok(buf, " ");
-	int mvt = 0;
-	int i = 0;
-
-	*hold = 1;
-
-	/* set default hat position: neutral */
-	report[3] = 0x04;
-
-	for (; tok != NULL; tok = strtok(NULL, " ")) {
-
-		if (strcmp(tok, "--quit") == 0)
-			return -1;
-
-		for (i = 0; jmod[i].opt != NULL; i++)
-			if (strcmp(tok, jmod[i].opt) == 0) {
-				report[3] = (report[3] & 0xF0) | jmod[i].val;
-				break;
-			}
-		if (jmod[i].opt != NULL)
-			continue;
-
-		if (!(tok[0] == '-' && tok[1] == '-') && mvt < 3) {
-			errno = 0;
-			report[mvt++] = (char)strtol(tok, NULL, 0);
-			if (errno != 0) {
-				fprintf(stderr, "Bad value:'%s'\n", tok);
-				report[mvt--] = 0;
-			}
-			continue;
-		}
-
-		fprintf(stderr, "unknown option: %s\n", tok);
-	}
-	return 4;
-  }
-
-  void print_options(char c)
-  {
-	int i = 0;
-
-	if (c == 'k') {
-		printf("	keyboard options:\n"
-		       "		--hold\n");
-		for (i = 0; kmod[i].opt != NULL; i++)
-			printf("\t\t%s\n", kmod[i].opt);
-		printf("\n	keyboard values:\n"
-		       "		[a-z] or\n");
-		for (i = 0; kval[i].opt != NULL; i++)
-			printf("\t\t%-8s%s", kval[i].opt, i % 2 ? "\n" : "");
-		printf("\n");
-	} else if (c == 'm') {
-		printf("	mouse options:\n"
-		       "		--hold\n");
-		for (i = 0; mmod[i].opt != NULL; i++)
-			printf("\t\t%s\n", mmod[i].opt);
-		printf("\n	mouse values:\n"
-		       "		Two signed numbers\n"
-		       "--quit to close\n");
-	} else {
-		printf("	joystick options:\n");
-		for (i = 0; jmod[i].opt != NULL; i++)
-			printf("\t\t%s\n", jmod[i].opt);
-		printf("\n	joystick values:\n"
-		       "		three signed numbers\n"
-		       "--quit to close\n");
-	}
-  }
-
-  int main(int argc, const char *argv[])
-  {
-	const char *filename = NULL;
-	int fd = 0;
-	char buf[BUF_LEN];
-	int cmd_len;
-	char report[8];
-	int to_send = 8;
-	int hold = 0;
-	fd_set rfds;
-	int retval, i;
-
-	if (argc < 3) {
-		fprintf(stderr, "Usage: %s devname mouse|keyboard|joystick\n",
-			argv[0]);
-		return 1;
-	}
-
-	if (argv[2][0] != 'k' && argv[2][0] != 'm' && argv[2][0] != 'j')
-	  return 2;
-
-	filename = argv[1];
-
-	if ((fd = open(filename, O_RDWR, 0666)) == -1) {
-		perror(filename);
-		return 3;
-	}
-
-	print_options(argv[2][0]);
-
-	while (42) {
-
-		FD_ZERO(&rfds);
-		FD_SET(STDIN_FILENO, &rfds);
-		FD_SET(fd, &rfds);
-
-		retval = select(fd + 1, &rfds, NULL, NULL, NULL);
-		if (retval == -1 && errno == EINTR)
-			continue;
-		if (retval < 0) {
-			perror("select()");
-			return 4;
-		}
-
-		if (FD_ISSET(fd, &rfds)) {
-			cmd_len = read(fd, buf, BUF_LEN - 1);
-			printf("recv report:");
-			for (i = 0; i < cmd_len; i++)
-				printf(" %02x", buf[i]);
-			printf("\n");
-		}
-
-		if (FD_ISSET(STDIN_FILENO, &rfds)) {
-			memset(report, 0x0, sizeof(report));
-			cmd_len = read(STDIN_FILENO, buf, BUF_LEN - 1);
-
-			if (cmd_len == 0)
-				break;
-
-			buf[cmd_len - 1] = '\0';
-			hold = 0;
-
-			memset(report, 0x0, sizeof(report));
-			if (argv[2][0] == 'k')
-				to_send = keyboard_fill_report(report, buf, &hold);
-			else if (argv[2][0] == 'm')
-				to_send = mouse_fill_report(report, buf, &hold);
-			else
-				to_send = joystick_fill_report(report, buf, &hold);
-
-			if (to_send == -1)
-				break;
-
-			if (write(fd, report, to_send) != to_send) {
-				perror(filename);
-				return 5;
-			}
-			if (!hold) {
-				memset(report, 0x0, sizeof(report));
-				if (write(fd, report, to_send) != to_send) {
-					perror(filename);
-					return 6;
-				}
-			}
-		}
-	}
-
-	close(fd);
-	return 0;
-  }
diff --git a/Documentation/usb/gadget_multi.rst b/Documentation/usb/gadget_multi.rst
new file mode 100644
index 000000000000..9806b55af301
--- /dev/null
+++ b/Documentation/usb/gadget_multi.rst
@@ -0,0 +1,165 @@
+==============================
+Multifunction Composite Gadget
+==============================
+
+Overview
+========
+
+The Multifunction Composite Gadget (or g_multi) is a composite gadget
+that makes extensive use of the composite framework to provide
+a... multifunction gadget.
+
+In it's standard configuration it provides a single USB configuration
+with RNDIS[1] (that is Ethernet), USB CDC[2] ACM (that is serial) and
+USB Mass Storage functions.
+
+A CDC ECM (Ethernet) function may be turned on via a Kconfig option
+and RNDIS can be turned off.  If they are both enabled the gadget will
+have two configurations -- one with RNDIS and another with CDC ECM[3].
+
+Please note that if you use non-standard configuration (that is enable
+CDC ECM) you may need to change vendor and/or product ID.
+
+Host drivers
+============
+
+To make use of the gadget one needs to make it work on host side --
+without that there's no hope of achieving anything with the gadget.
+As one might expect, things one need to do very from system to system.
+
+Linux host drivers
+------------------
+
+Since the gadget uses standard composite framework and appears as such
+to Linux host it does not need any additional drivers on Linux host
+side.  All the functions are handled by respective drivers developed
+for them.
+
+This is also true for two configuration set-up with RNDIS
+configuration being the first one.  Linux host will use the second
+configuration with CDC ECM which should work better under Linux.
+
+Windows host drivers
+--------------------
+
+For the gadget to work under Windows two conditions have to be met:
+
+Detecting as composite gadget
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+First of all, Windows need to detect the gadget as an USB composite
+gadget which on its own have some conditions[4].  If they are met,
+Windows lets USB Generic Parent Driver[5] handle the device which then
+tries to match drivers for each individual interface (sort of, don't
+get into too many details).
+
+The good news is: you do not have to worry about most of the
+conditions!
+
+The only thing to worry is that the gadget has to have a single
+configuration so a dual RNDIS and CDC ECM gadget won't work unless you
+create a proper INF -- and of course, if you do submit it!
+
+Installing drivers for each function
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The other, trickier thing is making Windows install drivers for each
+individual function.
+
+For mass storage it is trivial since Windows detect it's an interface
+implementing USB Mass Storage class and selects appropriate driver.
+
+Things are harder with RDNIS and CDC ACM.
+
+RNDIS
+.....
+
+To make Windows select RNDIS drivers for the first function in the
+gadget, one needs to use the [[file:linux.inf]] file provided with this
+document.  It "attaches" Window's RNDIS driver to the first interface
+of the gadget.
+
+Please note, that while testing we encountered some issues[6] when
+RNDIS was not the first interface.  You do not need to worry abut it
+unless you are trying to develop your own gadget in which case watch
+out for this bug.
+
+CDC ACM
+.......
+
+Similarly, [[file:linux-cdc-acm.inf]] is provided for CDC ACM.
+
+Customising the gadget
+......................
+
+If you intend to hack the g_multi gadget be advised that rearranging
+functions will obviously change interface numbers for each of the
+functionality.  As an effect provided INFs won't work since they have
+interface numbers hard-coded in them (it's not hard to change those
+though[7]).
+
+This also means, that after experimenting with g_multi and changing
+provided functions one should change gadget's vendor and/or product ID
+so there will be no collision with other customised gadgets or the
+original gadget.
+
+Failing to comply may cause brain damage after wondering for hours why
+things don't work as intended before realising Windows have cached
+some drivers information (changing USB port may sometimes help plus
+you might try using USBDeview[8] to remove the phantom device).
+
+INF testing
+...........
+
+Provided INF files have been tested on Windows XP SP3, Windows Vista
+and Windows 7, all 32-bit versions.  It should work on 64-bit versions
+as well.  It most likely won't work on Windows prior to Windows XP
+SP2.
+
+Other systems
+-------------
+
+At this moment, drivers for any other systems have not been tested.
+Knowing how MacOS is based on BSD and BSD is an Open Source it is
+believed that it should (read: "I have no idea whether it will") work
+out-of-the-box.
+
+For more exotic systems I have even less to say...
+
+Any testing and drivers *are* *welcome*!
+
+Authors
+=======
+
+This document has been written by Michal Nazarewicz
+([[mailto:mina86@mina86.com]]).  INF files have been hacked with
+support of Marek Szyprowski ([[mailto:m.szyprowski@samsung.com]]) and
+Xiaofan Chen ([[mailto:xiaofanc@gmail.com]]) basing on the MS RNDIS
+template[9], Microchip's CDC ACM INF file and David Brownell's
+([[mailto:dbrownell@users.sourceforge.net]]) original INF files.
+
+Footnotes
+=========
+
+[1] Remote Network Driver Interface Specification,
+[[http://msdn.microsoft.com/en-us/library/ee484414.aspx]].
+
+[2] Communications Device Class Abstract Control Model, spec for this
+and other USB classes can be found at
+[[http://www.usb.org/developers/devclass_docs/]].
+
+[3] CDC Ethernet Control Model.
+
+[4] [[http://msdn.microsoft.com/en-us/library/ff537109(v=VS.85).aspx]]
+
+[5] [[http://msdn.microsoft.com/en-us/library/ff539234(v=VS.85).aspx]]
+
+[6] To put it in some other nice words, Windows failed to respond to
+any user input.
+
+[7] You may find [[http://www.cygnal.org/ubb/Forum9/HTML/001050.html]]
+useful.
+
+[8] http://www.nirsoft.net/utils/usb_devices_view.html
+
+[9] [[http://msdn.microsoft.com/en-us/library/ff570620.aspx]]
diff --git a/Documentation/usb/gadget_multi.txt b/Documentation/usb/gadget_multi.txt
deleted file mode 100644
index 9806b55af301..000000000000
--- a/Documentation/usb/gadget_multi.txt
+++ /dev/null
@@ -1,165 +0,0 @@
-==============================
-Multifunction Composite Gadget
-==============================
-
-Overview
-========
-
-The Multifunction Composite Gadget (or g_multi) is a composite gadget
-that makes extensive use of the composite framework to provide
-a... multifunction gadget.
-
-In it's standard configuration it provides a single USB configuration
-with RNDIS[1] (that is Ethernet), USB CDC[2] ACM (that is serial) and
-USB Mass Storage functions.
-
-A CDC ECM (Ethernet) function may be turned on via a Kconfig option
-and RNDIS can be turned off.  If they are both enabled the gadget will
-have two configurations -- one with RNDIS and another with CDC ECM[3].
-
-Please note that if you use non-standard configuration (that is enable
-CDC ECM) you may need to change vendor and/or product ID.
-
-Host drivers
-============
-
-To make use of the gadget one needs to make it work on host side --
-without that there's no hope of achieving anything with the gadget.
-As one might expect, things one need to do very from system to system.
-
-Linux host drivers
-------------------
-
-Since the gadget uses standard composite framework and appears as such
-to Linux host it does not need any additional drivers on Linux host
-side.  All the functions are handled by respective drivers developed
-for them.
-
-This is also true for two configuration set-up with RNDIS
-configuration being the first one.  Linux host will use the second
-configuration with CDC ECM which should work better under Linux.
-
-Windows host drivers
---------------------
-
-For the gadget to work under Windows two conditions have to be met:
-
-Detecting as composite gadget
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-First of all, Windows need to detect the gadget as an USB composite
-gadget which on its own have some conditions[4].  If they are met,
-Windows lets USB Generic Parent Driver[5] handle the device which then
-tries to match drivers for each individual interface (sort of, don't
-get into too many details).
-
-The good news is: you do not have to worry about most of the
-conditions!
-
-The only thing to worry is that the gadget has to have a single
-configuration so a dual RNDIS and CDC ECM gadget won't work unless you
-create a proper INF -- and of course, if you do submit it!
-
-Installing drivers for each function
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The other, trickier thing is making Windows install drivers for each
-individual function.
-
-For mass storage it is trivial since Windows detect it's an interface
-implementing USB Mass Storage class and selects appropriate driver.
-
-Things are harder with RDNIS and CDC ACM.
-
-RNDIS
-.....
-
-To make Windows select RNDIS drivers for the first function in the
-gadget, one needs to use the [[file:linux.inf]] file provided with this
-document.  It "attaches" Window's RNDIS driver to the first interface
-of the gadget.
-
-Please note, that while testing we encountered some issues[6] when
-RNDIS was not the first interface.  You do not need to worry abut it
-unless you are trying to develop your own gadget in which case watch
-out for this bug.
-
-CDC ACM
-.......
-
-Similarly, [[file:linux-cdc-acm.inf]] is provided for CDC ACM.
-
-Customising the gadget
-......................
-
-If you intend to hack the g_multi gadget be advised that rearranging
-functions will obviously change interface numbers for each of the
-functionality.  As an effect provided INFs won't work since they have
-interface numbers hard-coded in them (it's not hard to change those
-though[7]).
-
-This also means, that after experimenting with g_multi and changing
-provided functions one should change gadget's vendor and/or product ID
-so there will be no collision with other customised gadgets or the
-original gadget.
-
-Failing to comply may cause brain damage after wondering for hours why
-things don't work as intended before realising Windows have cached
-some drivers information (changing USB port may sometimes help plus
-you might try using USBDeview[8] to remove the phantom device).
-
-INF testing
-...........
-
-Provided INF files have been tested on Windows XP SP3, Windows Vista
-and Windows 7, all 32-bit versions.  It should work on 64-bit versions
-as well.  It most likely won't work on Windows prior to Windows XP
-SP2.
-
-Other systems
--------------
-
-At this moment, drivers for any other systems have not been tested.
-Knowing how MacOS is based on BSD and BSD is an Open Source it is
-believed that it should (read: "I have no idea whether it will") work
-out-of-the-box.
-
-For more exotic systems I have even less to say...
-
-Any testing and drivers *are* *welcome*!
-
-Authors
-=======
-
-This document has been written by Michal Nazarewicz
-([[mailto:mina86@mina86.com]]).  INF files have been hacked with
-support of Marek Szyprowski ([[mailto:m.szyprowski@samsung.com]]) and
-Xiaofan Chen ([[mailto:xiaofanc@gmail.com]]) basing on the MS RNDIS
-template[9], Microchip's CDC ACM INF file and David Brownell's
-([[mailto:dbrownell@users.sourceforge.net]]) original INF files.
-
-Footnotes
-=========
-
-[1] Remote Network Driver Interface Specification,
-[[http://msdn.microsoft.com/en-us/library/ee484414.aspx]].
-
-[2] Communications Device Class Abstract Control Model, spec for this
-and other USB classes can be found at
-[[http://www.usb.org/developers/devclass_docs/]].
-
-[3] CDC Ethernet Control Model.
-
-[4] [[http://msdn.microsoft.com/en-us/library/ff537109(v=VS.85).aspx]]
-
-[5] [[http://msdn.microsoft.com/en-us/library/ff539234(v=VS.85).aspx]]
-
-[6] To put it in some other nice words, Windows failed to respond to
-any user input.
-
-[7] You may find [[http://www.cygnal.org/ubb/Forum9/HTML/001050.html]]
-useful.
-
-[8] http://www.nirsoft.net/utils/usb_devices_view.html
-
-[9] [[http://msdn.microsoft.com/en-us/library/ff570620.aspx]]
diff --git a/Documentation/usb/gadget_printer.rst b/Documentation/usb/gadget_printer.rst
new file mode 100644
index 000000000000..5e5516c69075
--- /dev/null
+++ b/Documentation/usb/gadget_printer.rst
@@ -0,0 +1,523 @@
+﻿===============================
+Linux USB Printer Gadget Driver
+===============================
+
+06/04/2007
+
+Copyright (C) 2007 Craig W. Nadler <craig@nadler.us>
+
+
+
+General
+=======
+
+This driver may be used if you are writing printer firmware using Linux as
+the embedded OS. This driver has nothing to do with using a printer with
+your Linux host system.
+
+You will need a USB device controller and a Linux driver for it that accepts
+a gadget / "device class" driver using the Linux USB Gadget API. After the
+USB device controller driver is loaded then load the printer gadget driver.
+This will present a printer interface to the USB Host that your USB Device
+port is connected to.
+
+This driver is structured for printer firmware that runs in user mode. The
+user mode printer firmware will read and write data from the kernel mode
+printer gadget driver using a device file. The printer returns a printer status
+byte when the USB HOST sends a device request to get the printer status.  The
+user space firmware can read or write this status byte using a device file
+/dev/g_printer . Both blocking and non-blocking read/write calls are supported.
+
+
+
+
+Howto Use This Driver
+=====================
+
+To load the USB device controller driver and the printer gadget driver. The
+following example uses the Netchip 2280 USB device controller driver::
+
+	modprobe net2280
+	modprobe g_printer
+
+
+The follow command line parameter can be used when loading the printer gadget
+(ex: modprobe g_printer idVendor=0x0525 idProduct=0xa4a8 ):
+
+idVendor
+	This is the Vendor ID used in the device descriptor. The default is
+	the Netchip vendor id 0x0525. YOU MUST CHANGE TO YOUR OWN VENDOR ID
+	BEFORE RELEASING A PRODUCT. If you plan to release a product and don't
+	already have a Vendor ID please see www.usb.org for details on how to
+	get one.
+
+idProduct
+	This is the Product ID used in the device descriptor. The default
+	is 0xa4a8, you should change this to an ID that's not used by any of
+	your other USB products if you have any. It would be a good idea to
+	start numbering your products starting with say 0x0001.
+
+bcdDevice
+	This is the version number of your product. It would be a good idea
+	to put your firmware version here.
+
+iManufacturer
+	A string containing the name of the Vendor.
+
+iProduct
+	A string containing the Product Name.
+
+iSerialNum
+	A string containing the Serial Number. This should be changed for
+	each unit of your product.
+
+iPNPstring
+	The PNP ID string used for this printer. You will want to set
+	either on the command line or hard code the PNP ID string used for
+	your printer product.
+
+qlen
+	The number of 8k buffers to use per endpoint. The default is 10, you
+	should tune this for your product. You may also want to tune the
+	size of each buffer for your product.
+
+
+
+
+Using The Example Code
+======================
+
+This example code talks to stdout, instead of a print engine.
+
+To compile the test code below:
+
+1) save it to a file called prn_example.c
+2) compile the code with the follow command::
+
+	 gcc prn_example.c -o prn_example
+
+
+
+To read printer data from the host to stdout::
+
+	# prn_example -read_data
+
+
+To write printer data from a file (data_file) to the host::
+
+	# cat data_file | prn_example -write_data
+
+
+To get the current printer status for the gadget driver:::
+
+	# prn_example -get_status
+
+	Printer status is:
+	     Printer is NOT Selected
+	     Paper is Out
+	     Printer OK
+
+
+To set printer to Selected/On-line::
+
+	# prn_example -selected
+
+
+To set printer to Not Selected/Off-line::
+
+	# prn_example -not_selected
+
+
+To set paper status to paper out::
+
+	# prn_example -paper_out
+
+
+To set paper status to paper loaded::
+
+	# prn_example -paper_loaded
+
+
+To set error status to printer OK::
+
+	# prn_example -no_error
+
+
+To set error status to ERROR::
+
+	# prn_example -error
+
+
+
+
+Example Code
+============
+
+::
+
+
+  #include <stdio.h>
+  #include <stdlib.h>
+  #include <fcntl.h>
+  #include <linux/poll.h>
+  #include <sys/ioctl.h>
+  #include <linux/usb/g_printer.h>
+
+  #define PRINTER_FILE			"/dev/g_printer"
+  #define BUF_SIZE			512
+
+
+  /*
+   * 'usage()' - Show program usage.
+   */
+
+  static void
+  usage(const char *option)		/* I - Option string or NULL */
+  {
+	if (option) {
+		fprintf(stderr,"prn_example: Unknown option \"%s\"!\n",
+				option);
+	}
+
+	fputs("\n", stderr);
+	fputs("Usage: prn_example -[options]\n", stderr);
+	fputs("Options:\n", stderr);
+	fputs("\n", stderr);
+	fputs("-get_status    Get the current printer status.\n", stderr);
+	fputs("-selected      Set the selected status to selected.\n", stderr);
+	fputs("-not_selected  Set the selected status to NOT selected.\n",
+			stderr);
+	fputs("-error         Set the error status to error.\n", stderr);
+	fputs("-no_error      Set the error status to NO error.\n", stderr);
+	fputs("-paper_out     Set the paper status to paper out.\n", stderr);
+	fputs("-paper_loaded  Set the paper status to paper loaded.\n",
+			stderr);
+	fputs("-read_data     Read printer data from driver.\n", stderr);
+	fputs("-write_data    Write printer sata to driver.\n", stderr);
+	fputs("-NB_read_data  (Non-Blocking) Read printer data from driver.\n",
+			stderr);
+	fputs("\n\n", stderr);
+
+	exit(1);
+  }
+
+
+  static int
+  read_printer_data()
+  {
+	struct pollfd	fd[1];
+
+	/* Open device file for printer gadget. */
+	fd[0].fd = open(PRINTER_FILE, O_RDWR);
+	if (fd[0].fd < 0) {
+		printf("Error %d opening %s\n", fd[0].fd, PRINTER_FILE);
+		close(fd[0].fd);
+		return(-1);
+	}
+
+	fd[0].events = POLLIN | POLLRDNORM;
+
+	while (1) {
+		static char buf[BUF_SIZE];
+		int bytes_read;
+		int retval;
+
+		/* Wait for up to 1 second for data. */
+		retval = poll(fd, 1, 1000);
+
+		if (retval && (fd[0].revents & POLLRDNORM)) {
+
+			/* Read data from printer gadget driver. */
+			bytes_read = read(fd[0].fd, buf, BUF_SIZE);
+
+			if (bytes_read < 0) {
+				printf("Error %d reading from %s\n",
+						fd[0].fd, PRINTER_FILE);
+				close(fd[0].fd);
+				return(-1);
+			} else if (bytes_read > 0) {
+				/* Write data to standard OUTPUT (stdout). */
+				fwrite(buf, 1, bytes_read, stdout);
+				fflush(stdout);
+			}
+
+		}
+
+	}
+
+	/* Close the device file. */
+	close(fd[0].fd);
+
+	return 0;
+  }
+
+
+  static int
+  write_printer_data()
+  {
+	struct pollfd	fd[1];
+
+	/* Open device file for printer gadget. */
+	fd[0].fd = open (PRINTER_FILE, O_RDWR);
+	if (fd[0].fd < 0) {
+		printf("Error %d opening %s\n", fd[0].fd, PRINTER_FILE);
+		close(fd[0].fd);
+		return(-1);
+	}
+
+	fd[0].events = POLLOUT | POLLWRNORM;
+
+	while (1) {
+		int retval;
+		static char buf[BUF_SIZE];
+		/* Read data from standard INPUT (stdin). */
+		int bytes_read = fread(buf, 1, BUF_SIZE, stdin);
+
+		if (!bytes_read) {
+			break;
+		}
+
+		while (bytes_read) {
+
+			/* Wait for up to 1 second to sent data. */
+			retval = poll(fd, 1, 1000);
+
+			/* Write data to printer gadget driver. */
+			if (retval && (fd[0].revents & POLLWRNORM)) {
+				retval = write(fd[0].fd, buf, bytes_read);
+				if (retval < 0) {
+					printf("Error %d writing to %s\n",
+							fd[0].fd,
+							PRINTER_FILE);
+					close(fd[0].fd);
+					return(-1);
+				} else {
+					bytes_read -= retval;
+				}
+
+			}
+
+		}
+
+	}
+
+	/* Wait until the data has been sent. */
+	fsync(fd[0].fd);
+
+	/* Close the device file. */
+	close(fd[0].fd);
+
+	return 0;
+  }
+
+
+  static int
+  read_NB_printer_data()
+  {
+	int		fd;
+	static char	buf[BUF_SIZE];
+	int		bytes_read;
+
+	/* Open device file for printer gadget. */
+	fd = open(PRINTER_FILE, O_RDWR|O_NONBLOCK);
+	if (fd < 0) {
+		printf("Error %d opening %s\n", fd, PRINTER_FILE);
+		close(fd);
+		return(-1);
+	}
+
+	while (1) {
+		/* Read data from printer gadget driver. */
+		bytes_read = read(fd, buf, BUF_SIZE);
+		if (bytes_read <= 0) {
+			break;
+		}
+
+		/* Write data to standard OUTPUT (stdout). */
+		fwrite(buf, 1, bytes_read, stdout);
+		fflush(stdout);
+	}
+
+	/* Close the device file. */
+	close(fd);
+
+	return 0;
+  }
+
+
+  static int
+  get_printer_status()
+  {
+	int	retval;
+	int	fd;
+
+	/* Open device file for printer gadget. */
+	fd = open(PRINTER_FILE, O_RDWR);
+	if (fd < 0) {
+		printf("Error %d opening %s\n", fd, PRINTER_FILE);
+		close(fd);
+		return(-1);
+	}
+
+	/* Make the IOCTL call. */
+	retval = ioctl(fd, GADGET_GET_PRINTER_STATUS);
+	if (retval < 0) {
+		fprintf(stderr, "ERROR: Failed to set printer status\n");
+		return(-1);
+	}
+
+	/* Close the device file. */
+	close(fd);
+
+	return(retval);
+  }
+
+
+  static int
+  set_printer_status(unsigned char buf, int clear_printer_status_bit)
+  {
+	int	retval;
+	int	fd;
+
+	retval = get_printer_status();
+	if (retval < 0) {
+		fprintf(stderr, "ERROR: Failed to get printer status\n");
+		return(-1);
+	}
+
+	/* Open device file for printer gadget. */
+	fd = open(PRINTER_FILE, O_RDWR);
+
+	if (fd < 0) {
+		printf("Error %d opening %s\n", fd, PRINTER_FILE);
+		close(fd);
+		return(-1);
+	}
+
+	if (clear_printer_status_bit) {
+		retval &= ~buf;
+	} else {
+		retval |= buf;
+	}
+
+	/* Make the IOCTL call. */
+	if (ioctl(fd, GADGET_SET_PRINTER_STATUS, (unsigned char)retval)) {
+		fprintf(stderr, "ERROR: Failed to set printer status\n");
+		return(-1);
+	}
+
+	/* Close the device file. */
+	close(fd);
+
+	return 0;
+  }
+
+
+  static int
+  display_printer_status()
+  {
+	char	printer_status;
+
+	printer_status = get_printer_status();
+	if (printer_status < 0) {
+		fprintf(stderr, "ERROR: Failed to get printer status\n");
+		return(-1);
+	}
+
+	printf("Printer status is:\n");
+	if (printer_status & PRINTER_SELECTED) {
+		printf("     Printer is Selected\n");
+	} else {
+		printf("     Printer is NOT Selected\n");
+	}
+	if (printer_status & PRINTER_PAPER_EMPTY) {
+		printf("     Paper is Out\n");
+	} else {
+		printf("     Paper is Loaded\n");
+	}
+	if (printer_status & PRINTER_NOT_ERROR) {
+		printf("     Printer OK\n");
+	} else {
+		printf("     Printer ERROR\n");
+	}
+
+	return(0);
+  }
+
+
+  int
+  main(int  argc, char *argv[])
+  {
+	int	i;		/* Looping var */
+	int	retval = 0;
+
+	/* No Args */
+	if (argc == 1) {
+		usage(0);
+		exit(0);
+	}
+
+	for (i = 1; i < argc && !retval; i ++) {
+
+		if (argv[i][0] != '-') {
+			continue;
+		}
+
+		if (!strcmp(argv[i], "-get_status")) {
+			if (display_printer_status()) {
+				retval = 1;
+			}
+
+		} else if (!strcmp(argv[i], "-paper_loaded")) {
+			if (set_printer_status(PRINTER_PAPER_EMPTY, 1)) {
+				retval = 1;
+			}
+
+		} else if (!strcmp(argv[i], "-paper_out")) {
+			if (set_printer_status(PRINTER_PAPER_EMPTY, 0)) {
+				retval = 1;
+			}
+
+		} else if (!strcmp(argv[i], "-selected")) {
+			if (set_printer_status(PRINTER_SELECTED, 0)) {
+				retval = 1;
+			}
+
+		} else if (!strcmp(argv[i], "-not_selected")) {
+			if (set_printer_status(PRINTER_SELECTED, 1)) {
+				retval = 1;
+			}
+
+		} else if (!strcmp(argv[i], "-error")) {
+			if (set_printer_status(PRINTER_NOT_ERROR, 1)) {
+				retval = 1;
+			}
+
+		} else if (!strcmp(argv[i], "-no_error")) {
+			if (set_printer_status(PRINTER_NOT_ERROR, 0)) {
+				retval = 1;
+			}
+
+		} else if (!strcmp(argv[i], "-read_data")) {
+			if (read_printer_data()) {
+				retval = 1;
+			}
+
+		} else if (!strcmp(argv[i], "-write_data")) {
+			if (write_printer_data()) {
+				retval = 1;
+			}
+
+		} else if (!strcmp(argv[i], "-NB_read_data")) {
+			if (read_NB_printer_data()) {
+				retval = 1;
+			}
+
+		} else {
+			usage(argv[i]);
+			retval = 1;
+		}
+	}
+
+	exit(retval);
+  }
diff --git a/Documentation/usb/gadget_printer.txt b/Documentation/usb/gadget_printer.txt
deleted file mode 100644
index 5e5516c69075..000000000000
--- a/Documentation/usb/gadget_printer.txt
+++ /dev/null
@@ -1,523 +0,0 @@
-﻿===============================
-Linux USB Printer Gadget Driver
-===============================
-
-06/04/2007
-
-Copyright (C) 2007 Craig W. Nadler <craig@nadler.us>
-
-
-
-General
-=======
-
-This driver may be used if you are writing printer firmware using Linux as
-the embedded OS. This driver has nothing to do with using a printer with
-your Linux host system.
-
-You will need a USB device controller and a Linux driver for it that accepts
-a gadget / "device class" driver using the Linux USB Gadget API. After the
-USB device controller driver is loaded then load the printer gadget driver.
-This will present a printer interface to the USB Host that your USB Device
-port is connected to.
-
-This driver is structured for printer firmware that runs in user mode. The
-user mode printer firmware will read and write data from the kernel mode
-printer gadget driver using a device file. The printer returns a printer status
-byte when the USB HOST sends a device request to get the printer status.  The
-user space firmware can read or write this status byte using a device file
-/dev/g_printer . Both blocking and non-blocking read/write calls are supported.
-
-
-
-
-Howto Use This Driver
-=====================
-
-To load the USB device controller driver and the printer gadget driver. The
-following example uses the Netchip 2280 USB device controller driver::
-
-	modprobe net2280
-	modprobe g_printer
-
-
-The follow command line parameter can be used when loading the printer gadget
-(ex: modprobe g_printer idVendor=0x0525 idProduct=0xa4a8 ):
-
-idVendor
-	This is the Vendor ID used in the device descriptor. The default is
-	the Netchip vendor id 0x0525. YOU MUST CHANGE TO YOUR OWN VENDOR ID
-	BEFORE RELEASING A PRODUCT. If you plan to release a product and don't
-	already have a Vendor ID please see www.usb.org for details on how to
-	get one.
-
-idProduct
-	This is the Product ID used in the device descriptor. The default
-	is 0xa4a8, you should change this to an ID that's not used by any of
-	your other USB products if you have any. It would be a good idea to
-	start numbering your products starting with say 0x0001.
-
-bcdDevice
-	This is the version number of your product. It would be a good idea
-	to put your firmware version here.
-
-iManufacturer
-	A string containing the name of the Vendor.
-
-iProduct
-	A string containing the Product Name.
-
-iSerialNum
-	A string containing the Serial Number. This should be changed for
-	each unit of your product.
-
-iPNPstring
-	The PNP ID string used for this printer. You will want to set
-	either on the command line or hard code the PNP ID string used for
-	your printer product.
-
-qlen
-	The number of 8k buffers to use per endpoint. The default is 10, you
-	should tune this for your product. You may also want to tune the
-	size of each buffer for your product.
-
-
-
-
-Using The Example Code
-======================
-
-This example code talks to stdout, instead of a print engine.
-
-To compile the test code below:
-
-1) save it to a file called prn_example.c
-2) compile the code with the follow command::
-
-	 gcc prn_example.c -o prn_example
-
-
-
-To read printer data from the host to stdout::
-
-	# prn_example -read_data
-
-
-To write printer data from a file (data_file) to the host::
-
-	# cat data_file | prn_example -write_data
-
-
-To get the current printer status for the gadget driver:::
-
-	# prn_example -get_status
-
-	Printer status is:
-	     Printer is NOT Selected
-	     Paper is Out
-	     Printer OK
-
-
-To set printer to Selected/On-line::
-
-	# prn_example -selected
-
-
-To set printer to Not Selected/Off-line::
-
-	# prn_example -not_selected
-
-
-To set paper status to paper out::
-
-	# prn_example -paper_out
-
-
-To set paper status to paper loaded::
-
-	# prn_example -paper_loaded
-
-
-To set error status to printer OK::
-
-	# prn_example -no_error
-
-
-To set error status to ERROR::
-
-	# prn_example -error
-
-
-
-
-Example Code
-============
-
-::
-
-
-  #include <stdio.h>
-  #include <stdlib.h>
-  #include <fcntl.h>
-  #include <linux/poll.h>
-  #include <sys/ioctl.h>
-  #include <linux/usb/g_printer.h>
-
-  #define PRINTER_FILE			"/dev/g_printer"
-  #define BUF_SIZE			512
-
-
-  /*
-   * 'usage()' - Show program usage.
-   */
-
-  static void
-  usage(const char *option)		/* I - Option string or NULL */
-  {
-	if (option) {
-		fprintf(stderr,"prn_example: Unknown option \"%s\"!\n",
-				option);
-	}
-
-	fputs("\n", stderr);
-	fputs("Usage: prn_example -[options]\n", stderr);
-	fputs("Options:\n", stderr);
-	fputs("\n", stderr);
-	fputs("-get_status    Get the current printer status.\n", stderr);
-	fputs("-selected      Set the selected status to selected.\n", stderr);
-	fputs("-not_selected  Set the selected status to NOT selected.\n",
-			stderr);
-	fputs("-error         Set the error status to error.\n", stderr);
-	fputs("-no_error      Set the error status to NO error.\n", stderr);
-	fputs("-paper_out     Set the paper status to paper out.\n", stderr);
-	fputs("-paper_loaded  Set the paper status to paper loaded.\n",
-			stderr);
-	fputs("-read_data     Read printer data from driver.\n", stderr);
-	fputs("-write_data    Write printer sata to driver.\n", stderr);
-	fputs("-NB_read_data  (Non-Blocking) Read printer data from driver.\n",
-			stderr);
-	fputs("\n\n", stderr);
-
-	exit(1);
-  }
-
-
-  static int
-  read_printer_data()
-  {
-	struct pollfd	fd[1];
-
-	/* Open device file for printer gadget. */
-	fd[0].fd = open(PRINTER_FILE, O_RDWR);
-	if (fd[0].fd < 0) {
-		printf("Error %d opening %s\n", fd[0].fd, PRINTER_FILE);
-		close(fd[0].fd);
-		return(-1);
-	}
-
-	fd[0].events = POLLIN | POLLRDNORM;
-
-	while (1) {
-		static char buf[BUF_SIZE];
-		int bytes_read;
-		int retval;
-
-		/* Wait for up to 1 second for data. */
-		retval = poll(fd, 1, 1000);
-
-		if (retval && (fd[0].revents & POLLRDNORM)) {
-
-			/* Read data from printer gadget driver. */
-			bytes_read = read(fd[0].fd, buf, BUF_SIZE);
-
-			if (bytes_read < 0) {
-				printf("Error %d reading from %s\n",
-						fd[0].fd, PRINTER_FILE);
-				close(fd[0].fd);
-				return(-1);
-			} else if (bytes_read > 0) {
-				/* Write data to standard OUTPUT (stdout). */
-				fwrite(buf, 1, bytes_read, stdout);
-				fflush(stdout);
-			}
-
-		}
-
-	}
-
-	/* Close the device file. */
-	close(fd[0].fd);
-
-	return 0;
-  }
-
-
-  static int
-  write_printer_data()
-  {
-	struct pollfd	fd[1];
-
-	/* Open device file for printer gadget. */
-	fd[0].fd = open (PRINTER_FILE, O_RDWR);
-	if (fd[0].fd < 0) {
-		printf("Error %d opening %s\n", fd[0].fd, PRINTER_FILE);
-		close(fd[0].fd);
-		return(-1);
-	}
-
-	fd[0].events = POLLOUT | POLLWRNORM;
-
-	while (1) {
-		int retval;
-		static char buf[BUF_SIZE];
-		/* Read data from standard INPUT (stdin). */
-		int bytes_read = fread(buf, 1, BUF_SIZE, stdin);
-
-		if (!bytes_read) {
-			break;
-		}
-
-		while (bytes_read) {
-
-			/* Wait for up to 1 second to sent data. */
-			retval = poll(fd, 1, 1000);
-
-			/* Write data to printer gadget driver. */
-			if (retval && (fd[0].revents & POLLWRNORM)) {
-				retval = write(fd[0].fd, buf, bytes_read);
-				if (retval < 0) {
-					printf("Error %d writing to %s\n",
-							fd[0].fd,
-							PRINTER_FILE);
-					close(fd[0].fd);
-					return(-1);
-				} else {
-					bytes_read -= retval;
-				}
-
-			}
-
-		}
-
-	}
-
-	/* Wait until the data has been sent. */
-	fsync(fd[0].fd);
-
-	/* Close the device file. */
-	close(fd[0].fd);
-
-	return 0;
-  }
-
-
-  static int
-  read_NB_printer_data()
-  {
-	int		fd;
-	static char	buf[BUF_SIZE];
-	int		bytes_read;
-
-	/* Open device file for printer gadget. */
-	fd = open(PRINTER_FILE, O_RDWR|O_NONBLOCK);
-	if (fd < 0) {
-		printf("Error %d opening %s\n", fd, PRINTER_FILE);
-		close(fd);
-		return(-1);
-	}
-
-	while (1) {
-		/* Read data from printer gadget driver. */
-		bytes_read = read(fd, buf, BUF_SIZE);
-		if (bytes_read <= 0) {
-			break;
-		}
-
-		/* Write data to standard OUTPUT (stdout). */
-		fwrite(buf, 1, bytes_read, stdout);
-		fflush(stdout);
-	}
-
-	/* Close the device file. */
-	close(fd);
-
-	return 0;
-  }
-
-
-  static int
-  get_printer_status()
-  {
-	int	retval;
-	int	fd;
-
-	/* Open device file for printer gadget. */
-	fd = open(PRINTER_FILE, O_RDWR);
-	if (fd < 0) {
-		printf("Error %d opening %s\n", fd, PRINTER_FILE);
-		close(fd);
-		return(-1);
-	}
-
-	/* Make the IOCTL call. */
-	retval = ioctl(fd, GADGET_GET_PRINTER_STATUS);
-	if (retval < 0) {
-		fprintf(stderr, "ERROR: Failed to set printer status\n");
-		return(-1);
-	}
-
-	/* Close the device file. */
-	close(fd);
-
-	return(retval);
-  }
-
-
-  static int
-  set_printer_status(unsigned char buf, int clear_printer_status_bit)
-  {
-	int	retval;
-	int	fd;
-
-	retval = get_printer_status();
-	if (retval < 0) {
-		fprintf(stderr, "ERROR: Failed to get printer status\n");
-		return(-1);
-	}
-
-	/* Open device file for printer gadget. */
-	fd = open(PRINTER_FILE, O_RDWR);
-
-	if (fd < 0) {
-		printf("Error %d opening %s\n", fd, PRINTER_FILE);
-		close(fd);
-		return(-1);
-	}
-
-	if (clear_printer_status_bit) {
-		retval &= ~buf;
-	} else {
-		retval |= buf;
-	}
-
-	/* Make the IOCTL call. */
-	if (ioctl(fd, GADGET_SET_PRINTER_STATUS, (unsigned char)retval)) {
-		fprintf(stderr, "ERROR: Failed to set printer status\n");
-		return(-1);
-	}
-
-	/* Close the device file. */
-	close(fd);
-
-	return 0;
-  }
-
-
-  static int
-  display_printer_status()
-  {
-	char	printer_status;
-
-	printer_status = get_printer_status();
-	if (printer_status < 0) {
-		fprintf(stderr, "ERROR: Failed to get printer status\n");
-		return(-1);
-	}
-
-	printf("Printer status is:\n");
-	if (printer_status & PRINTER_SELECTED) {
-		printf("     Printer is Selected\n");
-	} else {
-		printf("     Printer is NOT Selected\n");
-	}
-	if (printer_status & PRINTER_PAPER_EMPTY) {
-		printf("     Paper is Out\n");
-	} else {
-		printf("     Paper is Loaded\n");
-	}
-	if (printer_status & PRINTER_NOT_ERROR) {
-		printf("     Printer OK\n");
-	} else {
-		printf("     Printer ERROR\n");
-	}
-
-	return(0);
-  }
-
-
-  int
-  main(int  argc, char *argv[])
-  {
-	int	i;		/* Looping var */
-	int	retval = 0;
-
-	/* No Args */
-	if (argc == 1) {
-		usage(0);
-		exit(0);
-	}
-
-	for (i = 1; i < argc && !retval; i ++) {
-
-		if (argv[i][0] != '-') {
-			continue;
-		}
-
-		if (!strcmp(argv[i], "-get_status")) {
-			if (display_printer_status()) {
-				retval = 1;
-			}
-
-		} else if (!strcmp(argv[i], "-paper_loaded")) {
-			if (set_printer_status(PRINTER_PAPER_EMPTY, 1)) {
-				retval = 1;
-			}
-
-		} else if (!strcmp(argv[i], "-paper_out")) {
-			if (set_printer_status(PRINTER_PAPER_EMPTY, 0)) {
-				retval = 1;
-			}
-
-		} else if (!strcmp(argv[i], "-selected")) {
-			if (set_printer_status(PRINTER_SELECTED, 0)) {
-				retval = 1;
-			}
-
-		} else if (!strcmp(argv[i], "-not_selected")) {
-			if (set_printer_status(PRINTER_SELECTED, 1)) {
-				retval = 1;
-			}
-
-		} else if (!strcmp(argv[i], "-error")) {
-			if (set_printer_status(PRINTER_NOT_ERROR, 1)) {
-				retval = 1;
-			}
-
-		} else if (!strcmp(argv[i], "-no_error")) {
-			if (set_printer_status(PRINTER_NOT_ERROR, 0)) {
-				retval = 1;
-			}
-
-		} else if (!strcmp(argv[i], "-read_data")) {
-			if (read_printer_data()) {
-				retval = 1;
-			}
-
-		} else if (!strcmp(argv[i], "-write_data")) {
-			if (write_printer_data()) {
-				retval = 1;
-			}
-
-		} else if (!strcmp(argv[i], "-NB_read_data")) {
-			if (read_NB_printer_data()) {
-				retval = 1;
-			}
-
-		} else {
-			usage(argv[i]);
-			retval = 1;
-		}
-	}
-
-	exit(retval);
-  }
diff --git a/Documentation/usb/gadget_serial.rst b/Documentation/usb/gadget_serial.rst
new file mode 100644
index 000000000000..dce8bc1fb1f2
--- /dev/null
+++ b/Documentation/usb/gadget_serial.rst
@@ -0,0 +1,289 @@
+===============================
+Linux Gadget Serial Driver v2.0
+===============================
+
+11/20/2004
+
+(updated 8-May-2008 for v2.3)
+
+
+License and Disclaimer
+----------------------
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of
+the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public
+License along with this program; if not, write to the Free
+Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+MA 02111-1307 USA.
+
+This document and the gadget serial driver itself are
+Copyright (C) 2004 by Al Borchers (alborchers@steinerpoint.com).
+
+If you have questions, problems, or suggestions for this driver
+please contact Al Borchers at alborchers@steinerpoint.com.
+
+
+Prerequisites
+-------------
+Versions of the gadget serial driver are available for the
+2.4 Linux kernels, but this document assumes you are using
+version 2.3 or later of the gadget serial driver in a 2.6
+Linux kernel.
+
+This document assumes that you are familiar with Linux and
+Windows and know how to configure and build Linux kernels, run
+standard utilities, use minicom and HyperTerminal, and work with
+USB and serial devices.  It also assumes you configure the Linux
+gadget and usb drivers as modules.
+
+With version 2.3 of the driver, major and minor device nodes are
+no longer statically defined.  Your Linux based system should mount
+sysfs in /sys, and use "mdev" (in Busybox) or "udev" to make the
+/dev nodes matching the sysfs /sys/class/tty files.
+
+
+
+Overview
+--------
+The gadget serial driver is a Linux USB gadget driver, a USB device
+side driver.  It runs on a Linux system that has USB device side
+hardware; for example, a PDA, an embedded Linux system, or a PC
+with a USB development card.
+
+The gadget serial driver talks over USB to either a CDC ACM driver
+or a generic USB serial driver running on a host PC::
+
+   Host
+   --------------------------------------
+  | Host-Side   CDC ACM       USB Host   |
+  | Operating |   or        | Controller |   USB
+  | System    | Generic USB | Driver     |--------
+  | (Linux or | Serial      | and        |        |
+  | Windows)    Driver        USB Stack  |        |
+   --------------------------------------         |
+                                                  |
+                                                  |
+                                                  |
+   Gadget                                         |
+   --------------------------------------         |
+  | Gadget                   USB Periph. |        |
+  | Device-Side |  Gadget  | Controller  |        |
+  | Linux       |  Serial  | Driver      |--------
+  | Operating   |  Driver  | and         |
+  | System                   USB Stack   |
+   --------------------------------------
+
+On the device-side Linux system, the gadget serial driver looks
+like a serial device.
+
+On the host-side system, the gadget serial device looks like a
+CDC ACM compliant class device or a simple vendor specific device
+with bulk in and bulk out endpoints, and it is treated similarly
+to other serial devices.
+
+The host side driver can potentially be any ACM compliant driver
+or any driver that can talk to a device with a simple bulk in/out
+interface.  Gadget serial has been tested with the Linux ACM driver,
+the Windows usbser.sys ACM driver, and the Linux USB generic serial
+driver.
+
+With the gadget serial driver and the host side ACM or generic
+serial driver running, you should be able to communicate between
+the host and the gadget side systems as if they were connected by a
+serial cable.
+
+The gadget serial driver only provides simple unreliable data
+communication.  It does not yet handle flow control or many other
+features of normal serial devices.
+
+
+Installing the Gadget Serial Driver
+-----------------------------------
+To use the gadget serial driver you must configure the Linux gadget
+side kernel for "Support for USB Gadgets", for a "USB Peripheral
+Controller" (for example, net2280), and for the "Serial Gadget"
+driver.  All this are listed under "USB Gadget Support" when
+configuring the kernel.  Then rebuild and install the kernel or
+modules.
+
+Then you must load the gadget serial driver.  To load it as an
+ACM device (recommended for interoperability), do this::
+
+  modprobe g_serial
+
+To load it as a vendor specific bulk in/out device, do this::
+
+  modprobe g_serial use_acm=0
+
+This will also automatically load the underlying gadget peripheral
+controller driver.  This must be done each time you reboot the gadget
+side Linux system.  You can add this to the start up scripts, if
+desired.
+
+Your system should use mdev (from busybox) or udev to make the
+device nodes.  After this gadget driver has been set up you should
+then see a /dev/ttyGS0 node::
+
+  # ls -l /dev/ttyGS0 | cat
+  crw-rw----    1 root     root     253,   0 May  8 14:10 /dev/ttyGS0
+  #
+
+Note that the major number (253, above) is system-specific.  If
+you need to create /dev nodes by hand, the right numbers to use
+will be in the /sys/class/tty/ttyGS0/dev file.
+
+When you link this gadget driver early, perhaps even statically,
+you may want to set up an /etc/inittab entry to run "getty" on it.
+The /dev/ttyGS0 line should work like most any other serial port.
+
+
+If gadget serial is loaded as an ACM device you will want to use
+either the Windows or Linux ACM driver on the host side.  If gadget
+serial is loaded as a bulk in/out device, you will want to use the
+Linux generic serial driver on the host side.  Follow the appropriate
+instructions below to install the host side driver.
+
+
+Installing the Windows Host ACM Driver
+--------------------------------------
+To use the Windows ACM driver you must have the "linux-cdc-acm.inf"
+file (provided along this document) which supports all recent versions
+of Windows.
+
+When the gadget serial driver is loaded and the USB device connected
+to the Windows host with a USB cable, Windows should recognize the
+gadget serial device and ask for a driver.  Tell Windows to find the
+driver in the folder that contains the "linux-cdc-acm.inf" file.
+
+For example, on Windows XP, when the gadget serial device is first
+plugged in, the "Found New Hardware Wizard" starts up.  Select
+"Install from a list or specific location (Advanced)", then on the
+next screen select "Include this location in the search" and enter the
+path or browse to the folder containing the "linux-cdc-acm.inf" file.
+Windows will complain that the Gadget Serial driver has not passed
+Windows Logo testing, but select "Continue anyway" and finish the
+driver installation.
+
+On Windows XP, in the "Device Manager" (under "Control Panel",
+"System", "Hardware") expand the "Ports (COM & LPT)" entry and you
+should see "Gadget Serial" listed as the driver for one of the COM
+ports.
+
+To uninstall the Windows XP driver for "Gadget Serial", right click
+on the "Gadget Serial" entry in the "Device Manager" and select
+"Uninstall".
+
+
+Installing the Linux Host ACM Driver
+------------------------------------
+To use the Linux ACM driver you must configure the Linux host side
+kernel for "Support for Host-side USB" and for "USB Modem (CDC ACM)
+support".
+
+Once the gadget serial driver is loaded and the USB device connected
+to the Linux host with a USB cable, the host system should recognize
+the gadget serial device.  For example, the command::
+
+  cat /sys/kernel/debug/usb/devices
+
+should show something like this:::
+
+  T:  Bus=01 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#=  5 Spd=480 MxCh= 0
+  D:  Ver= 2.00 Cls=02(comm.) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
+  P:  Vendor=0525 ProdID=a4a7 Rev= 2.01
+  S:  Manufacturer=Linux 2.6.8.1 with net2280
+  S:  Product=Gadget Serial
+  S:  SerialNumber=0
+  C:* #Ifs= 2 Cfg#= 2 Atr=c0 MxPwr=  2mA
+  I:  If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=02 Prot=01 Driver=acm
+  E:  Ad=83(I) Atr=03(Int.) MxPS=   8 Ivl=32ms
+  I:  If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=acm
+  E:  Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
+  E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
+
+If the host side Linux system is configured properly, the ACM driver
+should be loaded automatically.  The command "lsmod" should show the
+"acm" module is loaded.
+
+
+Installing the Linux Host Generic USB Serial Driver
+---------------------------------------------------
+To use the Linux generic USB serial driver you must configure the
+Linux host side kernel for "Support for Host-side USB", for "USB
+Serial Converter support", and for the "USB Generic Serial Driver".
+
+Once the gadget serial driver is loaded and the USB device connected
+to the Linux host with a USB cable, the host system should recognize
+the gadget serial device.  For example, the command::
+
+  cat /sys/kernel/debug/usb/devices
+
+should show something like this:::
+
+  T:  Bus=01 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#=  6 Spd=480 MxCh= 0
+  D:  Ver= 2.00 Cls=ff(vend.) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
+  P:  Vendor=0525 ProdID=a4a6 Rev= 2.01
+  S:  Manufacturer=Linux 2.6.8.1 with net2280
+  S:  Product=Gadget Serial
+  S:  SerialNumber=0
+  C:* #Ifs= 1 Cfg#= 1 Atr=c0 MxPwr=  2mA
+  I:  If#= 0 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=serial
+  E:  Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
+  E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
+
+You must load the usbserial driver and explicitly set its parameters
+to configure it to recognize the gadget serial device, like this::
+
+  echo 0x0525 0xA4A6 >/sys/bus/usb-serial/drivers/generic/new_id
+
+The legacy way is to use module parameters::
+
+  modprobe usbserial vendor=0x0525 product=0xA4A6
+
+If everything is working, usbserial will print a message in the
+system log saying something like "Gadget Serial converter now
+attached to ttyUSB0".
+
+
+Testing with Minicom or HyperTerminal
+-------------------------------------
+Once the gadget serial driver and the host driver are both installed,
+and a USB cable connects the gadget device to the host, you should
+be able to communicate over USB between the gadget and host systems.
+You can use minicom or HyperTerminal to try this out.
+
+On the gadget side run "minicom -s" to configure a new minicom
+session.  Under "Serial port setup" set "/dev/ttygserial" as the
+"Serial Device".  Set baud rate, data bits, parity, and stop bits,
+to 9600, 8, none, and 1--these settings mostly do not matter.
+Under "Modem and dialing" erase all the modem and dialing strings.
+
+On a Linux host running the ACM driver, configure minicom similarly
+but use "/dev/ttyACM0" as the "Serial Device".  (If you have other
+ACM devices connected, change the device name appropriately.)
+
+On a Linux host running the USB generic serial driver, configure
+minicom similarly, but use "/dev/ttyUSB0" as the "Serial Device".
+(If you have other USB serial devices connected, change the device
+name appropriately.)
+
+On a Windows host configure a new HyperTerminal session to use the
+COM port assigned to Gadget Serial.  The "Port Settings" will be
+set automatically when HyperTerminal connects to the gadget serial
+device, so you can leave them set to the default values--these
+settings mostly do not matter.
+
+With minicom configured and running on the gadget side and with
+minicom or HyperTerminal configured and running on the host side,
+you should be able to send data back and forth between the gadget
+side and host side systems.  Anything you type on the terminal
+window on the gadget side should appear in the terminal window on
+the host side and vice versa.
diff --git a/Documentation/usb/gadget_serial.txt b/Documentation/usb/gadget_serial.txt
deleted file mode 100644
index dce8bc1fb1f2..000000000000
--- a/Documentation/usb/gadget_serial.txt
+++ /dev/null
@@ -1,289 +0,0 @@
-===============================
-Linux Gadget Serial Driver v2.0
-===============================
-
-11/20/2004
-
-(updated 8-May-2008 for v2.3)
-
-
-License and Disclaimer
-----------------------
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of
-the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public
-License along with this program; if not, write to the Free
-Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
-MA 02111-1307 USA.
-
-This document and the gadget serial driver itself are
-Copyright (C) 2004 by Al Borchers (alborchers@steinerpoint.com).
-
-If you have questions, problems, or suggestions for this driver
-please contact Al Borchers at alborchers@steinerpoint.com.
-
-
-Prerequisites
--------------
-Versions of the gadget serial driver are available for the
-2.4 Linux kernels, but this document assumes you are using
-version 2.3 or later of the gadget serial driver in a 2.6
-Linux kernel.
-
-This document assumes that you are familiar with Linux and
-Windows and know how to configure and build Linux kernels, run
-standard utilities, use minicom and HyperTerminal, and work with
-USB and serial devices.  It also assumes you configure the Linux
-gadget and usb drivers as modules.
-
-With version 2.3 of the driver, major and minor device nodes are
-no longer statically defined.  Your Linux based system should mount
-sysfs in /sys, and use "mdev" (in Busybox) or "udev" to make the
-/dev nodes matching the sysfs /sys/class/tty files.
-
-
-
-Overview
---------
-The gadget serial driver is a Linux USB gadget driver, a USB device
-side driver.  It runs on a Linux system that has USB device side
-hardware; for example, a PDA, an embedded Linux system, or a PC
-with a USB development card.
-
-The gadget serial driver talks over USB to either a CDC ACM driver
-or a generic USB serial driver running on a host PC::
-
-   Host
-   --------------------------------------
-  | Host-Side   CDC ACM       USB Host   |
-  | Operating |   or        | Controller |   USB
-  | System    | Generic USB | Driver     |--------
-  | (Linux or | Serial      | and        |        |
-  | Windows)    Driver        USB Stack  |        |
-   --------------------------------------         |
-                                                  |
-                                                  |
-                                                  |
-   Gadget                                         |
-   --------------------------------------         |
-  | Gadget                   USB Periph. |        |
-  | Device-Side |  Gadget  | Controller  |        |
-  | Linux       |  Serial  | Driver      |--------
-  | Operating   |  Driver  | and         |
-  | System                   USB Stack   |
-   --------------------------------------
-
-On the device-side Linux system, the gadget serial driver looks
-like a serial device.
-
-On the host-side system, the gadget serial device looks like a
-CDC ACM compliant class device or a simple vendor specific device
-with bulk in and bulk out endpoints, and it is treated similarly
-to other serial devices.
-
-The host side driver can potentially be any ACM compliant driver
-or any driver that can talk to a device with a simple bulk in/out
-interface.  Gadget serial has been tested with the Linux ACM driver,
-the Windows usbser.sys ACM driver, and the Linux USB generic serial
-driver.
-
-With the gadget serial driver and the host side ACM or generic
-serial driver running, you should be able to communicate between
-the host and the gadget side systems as if they were connected by a
-serial cable.
-
-The gadget serial driver only provides simple unreliable data
-communication.  It does not yet handle flow control or many other
-features of normal serial devices.
-
-
-Installing the Gadget Serial Driver
------------------------------------
-To use the gadget serial driver you must configure the Linux gadget
-side kernel for "Support for USB Gadgets", for a "USB Peripheral
-Controller" (for example, net2280), and for the "Serial Gadget"
-driver.  All this are listed under "USB Gadget Support" when
-configuring the kernel.  Then rebuild and install the kernel or
-modules.
-
-Then you must load the gadget serial driver.  To load it as an
-ACM device (recommended for interoperability), do this::
-
-  modprobe g_serial
-
-To load it as a vendor specific bulk in/out device, do this::
-
-  modprobe g_serial use_acm=0
-
-This will also automatically load the underlying gadget peripheral
-controller driver.  This must be done each time you reboot the gadget
-side Linux system.  You can add this to the start up scripts, if
-desired.
-
-Your system should use mdev (from busybox) or udev to make the
-device nodes.  After this gadget driver has been set up you should
-then see a /dev/ttyGS0 node::
-
-  # ls -l /dev/ttyGS0 | cat
-  crw-rw----    1 root     root     253,   0 May  8 14:10 /dev/ttyGS0
-  #
-
-Note that the major number (253, above) is system-specific.  If
-you need to create /dev nodes by hand, the right numbers to use
-will be in the /sys/class/tty/ttyGS0/dev file.
-
-When you link this gadget driver early, perhaps even statically,
-you may want to set up an /etc/inittab entry to run "getty" on it.
-The /dev/ttyGS0 line should work like most any other serial port.
-
-
-If gadget serial is loaded as an ACM device you will want to use
-either the Windows or Linux ACM driver on the host side.  If gadget
-serial is loaded as a bulk in/out device, you will want to use the
-Linux generic serial driver on the host side.  Follow the appropriate
-instructions below to install the host side driver.
-
-
-Installing the Windows Host ACM Driver
---------------------------------------
-To use the Windows ACM driver you must have the "linux-cdc-acm.inf"
-file (provided along this document) which supports all recent versions
-of Windows.
-
-When the gadget serial driver is loaded and the USB device connected
-to the Windows host with a USB cable, Windows should recognize the
-gadget serial device and ask for a driver.  Tell Windows to find the
-driver in the folder that contains the "linux-cdc-acm.inf" file.
-
-For example, on Windows XP, when the gadget serial device is first
-plugged in, the "Found New Hardware Wizard" starts up.  Select
-"Install from a list or specific location (Advanced)", then on the
-next screen select "Include this location in the search" and enter the
-path or browse to the folder containing the "linux-cdc-acm.inf" file.
-Windows will complain that the Gadget Serial driver has not passed
-Windows Logo testing, but select "Continue anyway" and finish the
-driver installation.
-
-On Windows XP, in the "Device Manager" (under "Control Panel",
-"System", "Hardware") expand the "Ports (COM & LPT)" entry and you
-should see "Gadget Serial" listed as the driver for one of the COM
-ports.
-
-To uninstall the Windows XP driver for "Gadget Serial", right click
-on the "Gadget Serial" entry in the "Device Manager" and select
-"Uninstall".
-
-
-Installing the Linux Host ACM Driver
-------------------------------------
-To use the Linux ACM driver you must configure the Linux host side
-kernel for "Support for Host-side USB" and for "USB Modem (CDC ACM)
-support".
-
-Once the gadget serial driver is loaded and the USB device connected
-to the Linux host with a USB cable, the host system should recognize
-the gadget serial device.  For example, the command::
-
-  cat /sys/kernel/debug/usb/devices
-
-should show something like this:::
-
-  T:  Bus=01 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#=  5 Spd=480 MxCh= 0
-  D:  Ver= 2.00 Cls=02(comm.) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
-  P:  Vendor=0525 ProdID=a4a7 Rev= 2.01
-  S:  Manufacturer=Linux 2.6.8.1 with net2280
-  S:  Product=Gadget Serial
-  S:  SerialNumber=0
-  C:* #Ifs= 2 Cfg#= 2 Atr=c0 MxPwr=  2mA
-  I:  If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=02 Prot=01 Driver=acm
-  E:  Ad=83(I) Atr=03(Int.) MxPS=   8 Ivl=32ms
-  I:  If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=acm
-  E:  Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
-  E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
-
-If the host side Linux system is configured properly, the ACM driver
-should be loaded automatically.  The command "lsmod" should show the
-"acm" module is loaded.
-
-
-Installing the Linux Host Generic USB Serial Driver
----------------------------------------------------
-To use the Linux generic USB serial driver you must configure the
-Linux host side kernel for "Support for Host-side USB", for "USB
-Serial Converter support", and for the "USB Generic Serial Driver".
-
-Once the gadget serial driver is loaded and the USB device connected
-to the Linux host with a USB cable, the host system should recognize
-the gadget serial device.  For example, the command::
-
-  cat /sys/kernel/debug/usb/devices
-
-should show something like this:::
-
-  T:  Bus=01 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#=  6 Spd=480 MxCh= 0
-  D:  Ver= 2.00 Cls=ff(vend.) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
-  P:  Vendor=0525 ProdID=a4a6 Rev= 2.01
-  S:  Manufacturer=Linux 2.6.8.1 with net2280
-  S:  Product=Gadget Serial
-  S:  SerialNumber=0
-  C:* #Ifs= 1 Cfg#= 1 Atr=c0 MxPwr=  2mA
-  I:  If#= 0 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=serial
-  E:  Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
-  E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
-
-You must load the usbserial driver and explicitly set its parameters
-to configure it to recognize the gadget serial device, like this::
-
-  echo 0x0525 0xA4A6 >/sys/bus/usb-serial/drivers/generic/new_id
-
-The legacy way is to use module parameters::
-
-  modprobe usbserial vendor=0x0525 product=0xA4A6
-
-If everything is working, usbserial will print a message in the
-system log saying something like "Gadget Serial converter now
-attached to ttyUSB0".
-
-
-Testing with Minicom or HyperTerminal
--------------------------------------
-Once the gadget serial driver and the host driver are both installed,
-and a USB cable connects the gadget device to the host, you should
-be able to communicate over USB between the gadget and host systems.
-You can use minicom or HyperTerminal to try this out.
-
-On the gadget side run "minicom -s" to configure a new minicom
-session.  Under "Serial port setup" set "/dev/ttygserial" as the
-"Serial Device".  Set baud rate, data bits, parity, and stop bits,
-to 9600, 8, none, and 1--these settings mostly do not matter.
-Under "Modem and dialing" erase all the modem and dialing strings.
-
-On a Linux host running the ACM driver, configure minicom similarly
-but use "/dev/ttyACM0" as the "Serial Device".  (If you have other
-ACM devices connected, change the device name appropriately.)
-
-On a Linux host running the USB generic serial driver, configure
-minicom similarly, but use "/dev/ttyUSB0" as the "Serial Device".
-(If you have other USB serial devices connected, change the device
-name appropriately.)
-
-On a Windows host configure a new HyperTerminal session to use the
-COM port assigned to Gadget Serial.  The "Port Settings" will be
-set automatically when HyperTerminal connects to the gadget serial
-device, so you can leave them set to the default values--these
-settings mostly do not matter.
-
-With minicom configured and running on the gadget side and with
-minicom or HyperTerminal configured and running on the host side,
-you should be able to send data back and forth between the gadget
-side and host side systems.  Anything you type on the terminal
-window on the gadget side should appear in the terminal window on
-the host side and vice versa.
diff --git a/Documentation/usb/index.rst b/Documentation/usb/index.rst
new file mode 100644
index 000000000000..e55386a4abfb
--- /dev/null
+++ b/Documentation/usb/index.rst
@@ -0,0 +1,39 @@
+===========
+USB support
+===========
+
+.. toctree::
+    :maxdepth: 1
+
+    acm
+    authorization
+    chipidea
+    dwc3
+    ehci
+    functionfs
+    gadget_configfs
+    gadget_hid
+    gadget_multi
+    gadget_printer
+    gadget_serial
+    gadget-testing
+    iuu_phoenix
+    mass-storage
+    misc_usbsevseg
+    mtouchusb
+    ohci
+    rio
+    usbip_protocol
+    usbmon
+    usb-serial
+    wusb-design-overview
+
+    usb-help
+    text_files
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/usb/iuu_phoenix.rst b/Documentation/usb/iuu_phoenix.rst
new file mode 100644
index 000000000000..b76268728450
--- /dev/null
+++ b/Documentation/usb/iuu_phoenix.rst
@@ -0,0 +1,94 @@
+=============================
+Infinity Usb Unlimited Readme
+=============================
+
+Hi all,
+
+
+This module provide a serial interface to use your
+IUU unit in phoenix mode. Loading this module will
+bring a ttyUSB[0-x] interface. This driver must be
+used by your favorite application to pilot the IUU
+
+This driver is still in beta stage, so bugs can
+occur and your system may freeze. As far I now,
+I never had any problem with it, but I'm not a real
+guru, so don't blame me if your system is unstable
+
+You can plug more than one IUU. Every unit will
+have his own device file(/dev/ttyUSB0,/dev/ttyUSB1,...)
+
+
+
+How to tune the reader speed?
+=============================
+
+ A few parameters can be used at load time
+ To use parameters, just unload the module if it is
+ already loaded and use modprobe iuu_phoenix param=value.
+ In case of prebuilt module, use the command
+ insmod iuu_phoenix param=value.
+
+ Example::
+
+	modprobe iuu_phoenix clockmode=3
+
+ The parameters are:
+
+clockmode:
+	1=3Mhz579,2=3Mhz680,3=6Mhz (int)
+boost:
+	overclock boost percent 100 to 500 (int)
+cdmode:
+	Card detect mode
+	0=none, 1=CD, 2=!CD, 3=DSR, 4=!DSR, 5=CTS, 6=!CTS, 7=RING, 8=!RING (int)
+xmas:
+	xmas color enabled or not (bool)
+debug:
+	Debug enabled or not (bool)
+
+-  clockmode will provide 3 different base settings commonly adopted by
+   different software:
+
+	1. 3Mhz579
+	2. 3Mhz680
+	3. 6Mhz
+
+-  boost provide a way to overclock the reader ( my favorite :-)  )
+   For example to have best performance than a simple clockmode=3, try this::
+
+      modprobe boost=195
+
+   This will put the reader in a base of 3Mhz579 but boosted a 195 % !
+   the real clock will be now : 6979050 Hz ( 6Mhz979 ) and will increase
+   the speed to a score 10 to 20% better than the simple clockmode=3 !!!
+
+
+-  cdmode permit to setup the signal used to inform the userland ( ioctl answer )
+   if the card is present or not. Eight signals are possible.
+
+-  xmas is completely useless except for your eyes. This is one of my friend who was
+   so sad to have a nice device like the iuu without seeing all color range available.
+   So I have added this option to permit him to see a lot of color ( each activity change the color
+   and the frequency randomly )
+
+-  debug will produce a lot of debugging messages...
+
+
+Last notes
+==========
+
+ Don't worry about the serial settings, the serial emulation
+ is an abstraction, so use any speed or parity setting will
+ work. ( This will not change anything ).Later I will perhaps
+ use this settings to deduce de boost but is that feature
+ really necessary ?
+ The autodetect feature used is the serial CD. If that doesn't
+ work for your software, disable detection mechanism in it.
+
+
+ Have fun !
+
+ Alain Degreffe
+
+ eczema(at)ecze.com
diff --git a/Documentation/usb/iuu_phoenix.txt b/Documentation/usb/iuu_phoenix.txt
deleted file mode 100644
index b76268728450..000000000000
--- a/Documentation/usb/iuu_phoenix.txt
+++ /dev/null
@@ -1,94 +0,0 @@
-=============================
-Infinity Usb Unlimited Readme
-=============================
-
-Hi all,
-
-
-This module provide a serial interface to use your
-IUU unit in phoenix mode. Loading this module will
-bring a ttyUSB[0-x] interface. This driver must be
-used by your favorite application to pilot the IUU
-
-This driver is still in beta stage, so bugs can
-occur and your system may freeze. As far I now,
-I never had any problem with it, but I'm not a real
-guru, so don't blame me if your system is unstable
-
-You can plug more than one IUU. Every unit will
-have his own device file(/dev/ttyUSB0,/dev/ttyUSB1,...)
-
-
-
-How to tune the reader speed?
-=============================
-
- A few parameters can be used at load time
- To use parameters, just unload the module if it is
- already loaded and use modprobe iuu_phoenix param=value.
- In case of prebuilt module, use the command
- insmod iuu_phoenix param=value.
-
- Example::
-
-	modprobe iuu_phoenix clockmode=3
-
- The parameters are:
-
-clockmode:
-	1=3Mhz579,2=3Mhz680,3=6Mhz (int)
-boost:
-	overclock boost percent 100 to 500 (int)
-cdmode:
-	Card detect mode
-	0=none, 1=CD, 2=!CD, 3=DSR, 4=!DSR, 5=CTS, 6=!CTS, 7=RING, 8=!RING (int)
-xmas:
-	xmas color enabled or not (bool)
-debug:
-	Debug enabled or not (bool)
-
--  clockmode will provide 3 different base settings commonly adopted by
-   different software:
-
-	1. 3Mhz579
-	2. 3Mhz680
-	3. 6Mhz
-
--  boost provide a way to overclock the reader ( my favorite :-)  )
-   For example to have best performance than a simple clockmode=3, try this::
-
-      modprobe boost=195
-
-   This will put the reader in a base of 3Mhz579 but boosted a 195 % !
-   the real clock will be now : 6979050 Hz ( 6Mhz979 ) and will increase
-   the speed to a score 10 to 20% better than the simple clockmode=3 !!!
-
-
--  cdmode permit to setup the signal used to inform the userland ( ioctl answer )
-   if the card is present or not. Eight signals are possible.
-
--  xmas is completely useless except for your eyes. This is one of my friend who was
-   so sad to have a nice device like the iuu without seeing all color range available.
-   So I have added this option to permit him to see a lot of color ( each activity change the color
-   and the frequency randomly )
-
--  debug will produce a lot of debugging messages...
-
-
-Last notes
-==========
-
- Don't worry about the serial settings, the serial emulation
- is an abstraction, so use any speed or parity setting will
- work. ( This will not change anything ).Later I will perhaps
- use this settings to deduce de boost but is that feature
- really necessary ?
- The autodetect feature used is the serial CD. If that doesn't
- work for your software, disable detection mechanism in it.
-
-
- Have fun !
-
- Alain Degreffe
-
- eczema(at)ecze.com
diff --git a/Documentation/usb/mass-storage.rst b/Documentation/usb/mass-storage.rst
new file mode 100644
index 000000000000..d181b47c3cb6
--- /dev/null
+++ b/Documentation/usb/mass-storage.rst
@@ -0,0 +1,234 @@
+=========================
+Mass Storage Gadget (MSG)
+=========================
+
+Overview
+========
+
+  Mass Storage Gadget (or MSG) acts as a USB Mass Storage device,
+  appearing to the host as a disk or a CD-ROM drive.  It supports
+  multiple logical units (LUNs).  Backing storage for each LUN is
+  provided by a regular file or a block device, access can be limited
+  to read-only, and gadget can indicate that it is removable and/or
+  CD-ROM (the latter implies read-only access).
+
+  Its requirements are modest; only a bulk-in and a bulk-out endpoint
+  are needed.  The memory requirement amounts to two 16K buffers.
+  Support is included for full-speed, high-speed and SuperSpeed
+  operation.
+
+  Note that the driver is slightly non-portable in that it assumes
+  a single memory/DMA buffer will be usable for bulk-in and bulk-out
+  endpoints.  With most device controllers this is not an issue, but
+  there may be some with hardware restrictions that prevent a buffer
+  from being used by more than one endpoint.
+
+  This document describes how to use the gadget from user space, its
+  relation to mass storage function (or MSF) and different gadgets
+  using it, and how it differs from File Storage Gadget (or FSG)
+  (which is no longer included in Linux).  It will talk only briefly
+  about how to use MSF within composite gadgets.
+
+Module parameters
+=================
+
+  The mass storage gadget accepts the following mass storage specific
+  module parameters:
+
+  - file=filename[,filename...]
+
+    This parameter lists paths to files or block devices used for
+    backing storage for each logical unit.  There may be at most
+    FSG_MAX_LUNS (8) LUNs set.  If more files are specified, they will
+    be silently ignored.  See also “luns” parameter.
+
+    *BEWARE* that if a file is used as a backing storage, it may not
+    be modified by any other process.  This is because the host
+    assumes the data does not change without its knowledge.  It may be
+    read, but (if the logical unit is writable) due to buffering on
+    the host side, the contents are not well defined.
+
+    The size of the logical unit will be rounded down to a full
+    logical block.  The logical block size is 2048 bytes for LUNs
+    simulating CD-ROM, block size of the device if the backing file is
+    a block device, or 512 bytes otherwise.
+
+  - removable=b[,b...]
+
+    This parameter specifies whether each logical unit should be
+    removable.  “b” here is either “y”, “Y” or “1” for true or “n”,
+    “N” or “0” for false.
+
+    If this option is set for a logical unit, gadget will accept an
+    “eject” SCSI request (Start/Stop Unit).  When it is sent, the
+    backing file will be closed to simulate ejection and the logical
+    unit will not be mountable by the host until a new backing file is
+    specified by userspace on the device (see “sysfs entries”
+    section).
+
+    If a logical unit is not removable (the default), a backing file
+    must be specified for it with the “file” parameter as the module
+    is loaded.  The same applies if the module is built in, no
+    exceptions.
+
+    The default value of the flag is false, *HOWEVER* it used to be
+    true.  This has been changed to better match File Storage Gadget
+    and because it seems like a saner default after all.  Thus to
+    maintain compatibility with older kernels, it's best to specify
+    the default values.  Also, if one relied on old default, explicit
+    “n” needs to be specified now.
+
+    Note that “removable” means the logical unit's media can be
+    ejected or removed (as is true for a CD-ROM drive or a card
+    reader).  It does *not* mean that the entire gadget can be
+    unplugged from the host; the proper term for that is
+    “hot-unpluggable”.
+
+  - cdrom=b[,b...]
+
+    This parameter specifies whether each logical unit should simulate
+    CD-ROM.  The default is false.
+
+  - ro=b[,b...]
+
+    This parameter specifies whether each logical unit should be
+    reported as read only.  This will prevent host from modifying the
+    backing files.
+
+    Note that if this flag for given logical unit is false but the
+    backing file could not be opened in read/write mode, the gadget
+    will fall back to read only mode anyway.
+
+    The default value for non-CD-ROM logical units is false; for
+    logical units simulating CD-ROM it is forced to true.
+
+  - nofua=b[,b...]
+
+    This parameter specifies whether FUA flag should be ignored in SCSI
+    Write10 and Write12 commands sent to given logical units.
+
+    MS Windows mounts removable storage in “Removal optimised mode” by
+    default.  All the writes to the media are synchronous, which is
+    achieved by setting the FUA (Force Unit Access) bit in SCSI
+    Write(10,12) commands.  This forces each write to wait until the
+    data has actually been written out and prevents I/O requests
+    aggregation in block layer dramatically decreasing performance.
+
+    Note that this may mean that if the device is powered from USB and
+    the user unplugs the device without unmounting it first (which at
+    least some Windows users do), the data may be lost.
+
+    The default value is false.
+
+  - luns=N
+
+    This parameter specifies number of logical units the gadget will
+    have.  It is limited by FSG_MAX_LUNS (8) and higher value will be
+    capped.
+
+    If this parameter is provided, and the number of files specified
+    in “file” argument is greater then the value of “luns”, all excess
+    files will be ignored.
+
+    If this parameter is not present, the number of logical units will
+    be deduced from the number of files specified in the “file”
+    parameter.  If the file parameter is missing as well, one is
+    assumed.
+
+  - stall=b
+
+    Specifies whether the gadget is allowed to halt bulk endpoints.
+    The default is determined according to the type of USB device
+    controller, but usually true.
+
+  In addition to the above, the gadget also accepts the following
+  parameters defined by the composite framework (they are common to
+  all composite gadgets so just a quick listing):
+
+  - idVendor      -- USB Vendor ID (16 bit integer)
+  - idProduct     -- USB Product ID (16 bit integer)
+  - bcdDevice     -- USB Device version (BCD) (16 bit integer)
+  - iManufacturer -- USB Manufacturer string (string)
+  - iProduct      -- USB Product string (string)
+  - iSerialNumber -- SerialNumber string (sting)
+
+sysfs entries
+=============
+
+  For each logical unit, the gadget creates a directory in the sysfs
+  hierarchy.  Inside of it the following three files are created:
+
+  - file
+
+    When read it returns the path to the backing file for the given
+    logical unit.  If there is no backing file (possible only if the
+    logical unit is removable), the content is empty.
+
+    When written into, it changes the backing file for given logical
+    unit.  This change can be performed even if given logical unit is
+    not specified as removable (but that may look strange to the
+    host).  It may fail, however, if host disallowed medium removal
+    with the Prevent-Allow Medium Removal SCSI command.
+
+  - ro
+
+    Reflects the state of ro flag for the given logical unit.  It can
+    be read any time, and written to when there is no backing file
+    open for given logical unit.
+
+  - nofua
+
+    Reflects the state of nofua flag for given logical unit.  It can
+    be read and written.
+
+  Other then those, as usual, the values of module parameters can be
+  read from /sys/module/g_mass_storage/parameters/* files.
+
+Other gadgets using mass storage function
+=========================================
+
+  The Mass Storage Gadget uses the Mass Storage Function to handle
+  mass storage protocol.  As a composite function, MSF may be used by
+  other gadgets as well (eg. g_multi and acm_ms).
+
+  All of the information in previous sections are valid for other
+  gadgets using MSF, except that support for mass storage related
+  module parameters may be missing, or the parameters may have
+  a prefix.  To figure out whether any of this is true one needs to
+  consult the gadget's documentation or its source code.
+
+  For examples of how to include mass storage function in gadgets, one
+  may take a look at mass_storage.c, acm_ms.c and multi.c (sorted by
+  complexity).
+
+Relation to file storage gadget
+===============================
+
+  The Mass Storage Function and thus the Mass Storage Gadget has been
+  based on the File Storage Gadget.  The difference between the two is
+  that MSG is a composite gadget (ie. uses the composite framework)
+  while file storage gadget was a traditional gadget.  From userspace
+  point of view this distinction does not really matter, but from
+  kernel hacker's point of view, this means that (i) MSG does not
+  duplicate code needed for handling basic USB protocol commands and
+  (ii) MSF can be used in any other composite gadget.
+
+  Because of that, File Storage Gadget has been removed in Linux 3.8.
+  All users need to transition to the Mass Storage Gadget.  The two
+  gadgets behave mostly the same from the outside except:
+
+  1. In FSG the “removable” and “cdrom” module parameters set the flag
+     for all logical units whereas in MSG they accept a list of y/n
+     values for each logical unit.  If one uses only a single logical
+     unit this does not matter, but if there are more, the y/n value
+     needs to be repeated for each logical unit.
+
+  2. FSG's “serial”, “vendor”, “product” and “release” module
+     parameters are handled in MSG by the composite layer's parameters
+     named respectively: “iSerialnumber”, “idVendor”, “idProduct” and
+     “bcdDevice”.
+
+  3. MSG does not support FSG's test mode, thus “transport”,
+     “protocol” and “buflen” FSG's module parameters are not
+     supported.  MSG always uses SCSI protocol with bulk only
+     transport mode and 16 KiB buffers.
diff --git a/Documentation/usb/mass-storage.txt b/Documentation/usb/mass-storage.txt
deleted file mode 100644
index d181b47c3cb6..000000000000
--- a/Documentation/usb/mass-storage.txt
+++ /dev/null
@@ -1,234 +0,0 @@
-=========================
-Mass Storage Gadget (MSG)
-=========================
-
-Overview
-========
-
-  Mass Storage Gadget (or MSG) acts as a USB Mass Storage device,
-  appearing to the host as a disk or a CD-ROM drive.  It supports
-  multiple logical units (LUNs).  Backing storage for each LUN is
-  provided by a regular file or a block device, access can be limited
-  to read-only, and gadget can indicate that it is removable and/or
-  CD-ROM (the latter implies read-only access).
-
-  Its requirements are modest; only a bulk-in and a bulk-out endpoint
-  are needed.  The memory requirement amounts to two 16K buffers.
-  Support is included for full-speed, high-speed and SuperSpeed
-  operation.
-
-  Note that the driver is slightly non-portable in that it assumes
-  a single memory/DMA buffer will be usable for bulk-in and bulk-out
-  endpoints.  With most device controllers this is not an issue, but
-  there may be some with hardware restrictions that prevent a buffer
-  from being used by more than one endpoint.
-
-  This document describes how to use the gadget from user space, its
-  relation to mass storage function (or MSF) and different gadgets
-  using it, and how it differs from File Storage Gadget (or FSG)
-  (which is no longer included in Linux).  It will talk only briefly
-  about how to use MSF within composite gadgets.
-
-Module parameters
-=================
-
-  The mass storage gadget accepts the following mass storage specific
-  module parameters:
-
-  - file=filename[,filename...]
-
-    This parameter lists paths to files or block devices used for
-    backing storage for each logical unit.  There may be at most
-    FSG_MAX_LUNS (8) LUNs set.  If more files are specified, they will
-    be silently ignored.  See also “luns” parameter.
-
-    *BEWARE* that if a file is used as a backing storage, it may not
-    be modified by any other process.  This is because the host
-    assumes the data does not change without its knowledge.  It may be
-    read, but (if the logical unit is writable) due to buffering on
-    the host side, the contents are not well defined.
-
-    The size of the logical unit will be rounded down to a full
-    logical block.  The logical block size is 2048 bytes for LUNs
-    simulating CD-ROM, block size of the device if the backing file is
-    a block device, or 512 bytes otherwise.
-
-  - removable=b[,b...]
-
-    This parameter specifies whether each logical unit should be
-    removable.  “b” here is either “y”, “Y” or “1” for true or “n”,
-    “N” or “0” for false.
-
-    If this option is set for a logical unit, gadget will accept an
-    “eject” SCSI request (Start/Stop Unit).  When it is sent, the
-    backing file will be closed to simulate ejection and the logical
-    unit will not be mountable by the host until a new backing file is
-    specified by userspace on the device (see “sysfs entries”
-    section).
-
-    If a logical unit is not removable (the default), a backing file
-    must be specified for it with the “file” parameter as the module
-    is loaded.  The same applies if the module is built in, no
-    exceptions.
-
-    The default value of the flag is false, *HOWEVER* it used to be
-    true.  This has been changed to better match File Storage Gadget
-    and because it seems like a saner default after all.  Thus to
-    maintain compatibility with older kernels, it's best to specify
-    the default values.  Also, if one relied on old default, explicit
-    “n” needs to be specified now.
-
-    Note that “removable” means the logical unit's media can be
-    ejected or removed (as is true for a CD-ROM drive or a card
-    reader).  It does *not* mean that the entire gadget can be
-    unplugged from the host; the proper term for that is
-    “hot-unpluggable”.
-
-  - cdrom=b[,b...]
-
-    This parameter specifies whether each logical unit should simulate
-    CD-ROM.  The default is false.
-
-  - ro=b[,b...]
-
-    This parameter specifies whether each logical unit should be
-    reported as read only.  This will prevent host from modifying the
-    backing files.
-
-    Note that if this flag for given logical unit is false but the
-    backing file could not be opened in read/write mode, the gadget
-    will fall back to read only mode anyway.
-
-    The default value for non-CD-ROM logical units is false; for
-    logical units simulating CD-ROM it is forced to true.
-
-  - nofua=b[,b...]
-
-    This parameter specifies whether FUA flag should be ignored in SCSI
-    Write10 and Write12 commands sent to given logical units.
-
-    MS Windows mounts removable storage in “Removal optimised mode” by
-    default.  All the writes to the media are synchronous, which is
-    achieved by setting the FUA (Force Unit Access) bit in SCSI
-    Write(10,12) commands.  This forces each write to wait until the
-    data has actually been written out and prevents I/O requests
-    aggregation in block layer dramatically decreasing performance.
-
-    Note that this may mean that if the device is powered from USB and
-    the user unplugs the device without unmounting it first (which at
-    least some Windows users do), the data may be lost.
-
-    The default value is false.
-
-  - luns=N
-
-    This parameter specifies number of logical units the gadget will
-    have.  It is limited by FSG_MAX_LUNS (8) and higher value will be
-    capped.
-
-    If this parameter is provided, and the number of files specified
-    in “file” argument is greater then the value of “luns”, all excess
-    files will be ignored.
-
-    If this parameter is not present, the number of logical units will
-    be deduced from the number of files specified in the “file”
-    parameter.  If the file parameter is missing as well, one is
-    assumed.
-
-  - stall=b
-
-    Specifies whether the gadget is allowed to halt bulk endpoints.
-    The default is determined according to the type of USB device
-    controller, but usually true.
-
-  In addition to the above, the gadget also accepts the following
-  parameters defined by the composite framework (they are common to
-  all composite gadgets so just a quick listing):
-
-  - idVendor      -- USB Vendor ID (16 bit integer)
-  - idProduct     -- USB Product ID (16 bit integer)
-  - bcdDevice     -- USB Device version (BCD) (16 bit integer)
-  - iManufacturer -- USB Manufacturer string (string)
-  - iProduct      -- USB Product string (string)
-  - iSerialNumber -- SerialNumber string (sting)
-
-sysfs entries
-=============
-
-  For each logical unit, the gadget creates a directory in the sysfs
-  hierarchy.  Inside of it the following three files are created:
-
-  - file
-
-    When read it returns the path to the backing file for the given
-    logical unit.  If there is no backing file (possible only if the
-    logical unit is removable), the content is empty.
-
-    When written into, it changes the backing file for given logical
-    unit.  This change can be performed even if given logical unit is
-    not specified as removable (but that may look strange to the
-    host).  It may fail, however, if host disallowed medium removal
-    with the Prevent-Allow Medium Removal SCSI command.
-
-  - ro
-
-    Reflects the state of ro flag for the given logical unit.  It can
-    be read any time, and written to when there is no backing file
-    open for given logical unit.
-
-  - nofua
-
-    Reflects the state of nofua flag for given logical unit.  It can
-    be read and written.
-
-  Other then those, as usual, the values of module parameters can be
-  read from /sys/module/g_mass_storage/parameters/* files.
-
-Other gadgets using mass storage function
-=========================================
-
-  The Mass Storage Gadget uses the Mass Storage Function to handle
-  mass storage protocol.  As a composite function, MSF may be used by
-  other gadgets as well (eg. g_multi and acm_ms).
-
-  All of the information in previous sections are valid for other
-  gadgets using MSF, except that support for mass storage related
-  module parameters may be missing, or the parameters may have
-  a prefix.  To figure out whether any of this is true one needs to
-  consult the gadget's documentation or its source code.
-
-  For examples of how to include mass storage function in gadgets, one
-  may take a look at mass_storage.c, acm_ms.c and multi.c (sorted by
-  complexity).
-
-Relation to file storage gadget
-===============================
-
-  The Mass Storage Function and thus the Mass Storage Gadget has been
-  based on the File Storage Gadget.  The difference between the two is
-  that MSG is a composite gadget (ie. uses the composite framework)
-  while file storage gadget was a traditional gadget.  From userspace
-  point of view this distinction does not really matter, but from
-  kernel hacker's point of view, this means that (i) MSG does not
-  duplicate code needed for handling basic USB protocol commands and
-  (ii) MSF can be used in any other composite gadget.
-
-  Because of that, File Storage Gadget has been removed in Linux 3.8.
-  All users need to transition to the Mass Storage Gadget.  The two
-  gadgets behave mostly the same from the outside except:
-
-  1. In FSG the “removable” and “cdrom” module parameters set the flag
-     for all logical units whereas in MSG they accept a list of y/n
-     values for each logical unit.  If one uses only a single logical
-     unit this does not matter, but if there are more, the y/n value
-     needs to be repeated for each logical unit.
-
-  2. FSG's “serial”, “vendor”, “product” and “release” module
-     parameters are handled in MSG by the composite layer's parameters
-     named respectively: “iSerialnumber”, “idVendor”, “idProduct” and
-     “bcdDevice”.
-
-  3. MSG does not support FSG's test mode, thus “transport”,
-     “protocol” and “buflen” FSG's module parameters are not
-     supported.  MSG always uses SCSI protocol with bulk only
-     transport mode and 16 KiB buffers.
diff --git a/Documentation/usb/misc_usbsevseg.rst b/Documentation/usb/misc_usbsevseg.rst
new file mode 100644
index 000000000000..6274aee083ed
--- /dev/null
+++ b/Documentation/usb/misc_usbsevseg.rst
@@ -0,0 +1,51 @@
+=============================
+USB 7-Segment Numeric Display
+=============================
+
+Manufactured by Delcom Engineering
+
+Device Information
+------------------
+USB VENDOR_ID	0x0fc5
+USB PRODUCT_ID	0x1227
+Both the 6 character and 8 character displays have PRODUCT_ID,
+and according to Delcom Engineering no queryable information
+can be obtained from the device to tell them apart.
+
+Device Modes
+------------
+By default, the driver assumes the display is only 6 characters
+The mode for 6 characters is:
+
+	MSB 0x06; LSB 0x3f
+
+For the 8 character display:
+
+	MSB 0x08; LSB 0xff
+
+The device can accept "text" either in raw, hex, or ascii textmode.
+raw controls each segment manually,
+hex expects a value between 0-15 per character,
+ascii expects a value between '0'-'9' and 'A'-'F'.
+The default is ascii.
+
+Device Operation
+----------------
+1.	Turn on the device:
+	echo 1 > /sys/bus/usb/.../powered
+2.	Set the device's mode:
+	echo $mode_msb > /sys/bus/usb/.../mode_msb
+	echo $mode_lsb > /sys/bus/usb/.../mode_lsb
+3.	Set the textmode:
+	echo $textmode > /sys/bus/usb/.../textmode
+4.	set the text (for example):
+	echo "123ABC" > /sys/bus/usb/.../text (ascii)
+	echo "A1B2" > /sys/bus/usb/.../text (ascii)
+	echo -ne "\x01\x02\x03" > /sys/bus/usb/.../text (hex)
+5.	Set the decimal places.
+	The device has either 6 or 8 decimal points.
+	to set the nth decimal place calculate 10 ** n
+	and echo it in to /sys/bus/usb/.../decimals
+	To set multiple decimals points sum up each power.
+	For example, to set the 0th and 3rd decimal place
+	echo 1001 > /sys/bus/usb/.../decimals
diff --git a/Documentation/usb/misc_usbsevseg.txt b/Documentation/usb/misc_usbsevseg.txt
deleted file mode 100644
index 6274aee083ed..000000000000
--- a/Documentation/usb/misc_usbsevseg.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-=============================
-USB 7-Segment Numeric Display
-=============================
-
-Manufactured by Delcom Engineering
-
-Device Information
-------------------
-USB VENDOR_ID	0x0fc5
-USB PRODUCT_ID	0x1227
-Both the 6 character and 8 character displays have PRODUCT_ID,
-and according to Delcom Engineering no queryable information
-can be obtained from the device to tell them apart.
-
-Device Modes
-------------
-By default, the driver assumes the display is only 6 characters
-The mode for 6 characters is:
-
-	MSB 0x06; LSB 0x3f
-
-For the 8 character display:
-
-	MSB 0x08; LSB 0xff
-
-The device can accept "text" either in raw, hex, or ascii textmode.
-raw controls each segment manually,
-hex expects a value between 0-15 per character,
-ascii expects a value between '0'-'9' and 'A'-'F'.
-The default is ascii.
-
-Device Operation
-----------------
-1.	Turn on the device:
-	echo 1 > /sys/bus/usb/.../powered
-2.	Set the device's mode:
-	echo $mode_msb > /sys/bus/usb/.../mode_msb
-	echo $mode_lsb > /sys/bus/usb/.../mode_lsb
-3.	Set the textmode:
-	echo $textmode > /sys/bus/usb/.../textmode
-4.	set the text (for example):
-	echo "123ABC" > /sys/bus/usb/.../text (ascii)
-	echo "A1B2" > /sys/bus/usb/.../text (ascii)
-	echo -ne "\x01\x02\x03" > /sys/bus/usb/.../text (hex)
-5.	Set the decimal places.
-	The device has either 6 or 8 decimal points.
-	to set the nth decimal place calculate 10 ** n
-	and echo it in to /sys/bus/usb/.../decimals
-	To set multiple decimals points sum up each power.
-	For example, to set the 0th and 3rd decimal place
-	echo 1001 > /sys/bus/usb/.../decimals
diff --git a/Documentation/usb/mtouchusb.rst b/Documentation/usb/mtouchusb.rst
new file mode 100644
index 000000000000..d1111b74bf75
--- /dev/null
+++ b/Documentation/usb/mtouchusb.rst
@@ -0,0 +1,84 @@
+﻿================
+mtouchusb driver
+================
+
+Changes
+=======
+
+- 0.3 - Created based off of scanner & INSTALL from the original touchscreen
+  driver on freecode (http://freecode.com/projects/3mtouchscreendriver)
+- Amended for linux-2.4.18, then 2.4.19
+
+- 0.5 - Complete rewrite using Linux Input in 2.6.3
+  Unfortunately no calibration support at this time
+
+- 1.4 - Multiple changes to support the EXII 5000UC and house cleaning
+  Changed reset from standard USB dev reset to vendor reset
+  Changed data sent to host from compensated to raw coordinates
+  Eliminated vendor/product module params
+  Performed multiple successful tests with an EXII-5010UC
+
+Supported Hardware
+==================
+
+::
+
+        All controllers have the Vendor: 0x0596 & Product: 0x0001
+
+
+        Controller Description          Part Number
+        ------------------------------------------------------
+
+        USB Capacitive - Pearl Case     14-205  (Discontinued)
+        USB Capacitive - Black Case     14-124  (Discontinued)
+        USB Capacitive - No Case        14-206  (Discontinued)
+
+        USB Capacitive - Pearl Case     EXII-5010UC
+        USB Capacitive - Black Case     EXII-5030UC
+        USB Capacitive - No Case        EXII-5050UC
+
+Driver Notes
+============
+
+Installation is simple, you only need to add Linux Input, Linux USB, and the
+driver to the kernel.  The driver can also be optionally built as a module.
+
+This driver appears to be one of possible 2 Linux USB Input Touchscreen
+drivers.  Although 3M produces a binary only driver available for
+download, I persist in updating this driver since I would like to use the
+touchscreen for embedded apps using QTEmbedded, DirectFB, etc. So I feel the
+logical choice is to use Linux Input.
+
+Currently there is no way to calibrate the device via this driver.  Even if
+the device could be calibrated, the driver pulls to raw coordinate data from
+the controller.  This means calibration must be performed within the
+userspace.
+
+The controller screen resolution is now 0 to 16384 for both X and Y reporting
+the raw touch data.  This is the same for the old and new capacitive USB
+controllers.
+
+Perhaps at some point an abstract function will be placed into evdev so
+generic functions like calibrations, resets, and vendor information can be
+requested from the userspace (And the drivers would handle the vendor specific
+tasks).
+
+TODO
+====
+
+Implement a control urb again to handle requests to and from the device
+such as calibration, etc once/if it becomes available.
+
+Disclaimer
+==========
+
+I am not a MicroTouch/3M employee, nor have I ever been.  3M does not support
+this driver!  If you want touch drivers only supported within X, please go to:
+
+http://www.3m.com/3MTouchSystems/
+
+Thanks
+======
+
+A huge thank you to 3M Touch Systems for the EXII-5010UC controllers for
+testing!
diff --git a/Documentation/usb/mtouchusb.txt b/Documentation/usb/mtouchusb.txt
deleted file mode 100644
index d1111b74bf75..000000000000
--- a/Documentation/usb/mtouchusb.txt
+++ /dev/null
@@ -1,84 +0,0 @@
-﻿================
-mtouchusb driver
-================
-
-Changes
-=======
-
-- 0.3 - Created based off of scanner & INSTALL from the original touchscreen
-  driver on freecode (http://freecode.com/projects/3mtouchscreendriver)
-- Amended for linux-2.4.18, then 2.4.19
-
-- 0.5 - Complete rewrite using Linux Input in 2.6.3
-  Unfortunately no calibration support at this time
-
-- 1.4 - Multiple changes to support the EXII 5000UC and house cleaning
-  Changed reset from standard USB dev reset to vendor reset
-  Changed data sent to host from compensated to raw coordinates
-  Eliminated vendor/product module params
-  Performed multiple successful tests with an EXII-5010UC
-
-Supported Hardware
-==================
-
-::
-
-        All controllers have the Vendor: 0x0596 & Product: 0x0001
-
-
-        Controller Description          Part Number
-        ------------------------------------------------------
-
-        USB Capacitive - Pearl Case     14-205  (Discontinued)
-        USB Capacitive - Black Case     14-124  (Discontinued)
-        USB Capacitive - No Case        14-206  (Discontinued)
-
-        USB Capacitive - Pearl Case     EXII-5010UC
-        USB Capacitive - Black Case     EXII-5030UC
-        USB Capacitive - No Case        EXII-5050UC
-
-Driver Notes
-============
-
-Installation is simple, you only need to add Linux Input, Linux USB, and the
-driver to the kernel.  The driver can also be optionally built as a module.
-
-This driver appears to be one of possible 2 Linux USB Input Touchscreen
-drivers.  Although 3M produces a binary only driver available for
-download, I persist in updating this driver since I would like to use the
-touchscreen for embedded apps using QTEmbedded, DirectFB, etc. So I feel the
-logical choice is to use Linux Input.
-
-Currently there is no way to calibrate the device via this driver.  Even if
-the device could be calibrated, the driver pulls to raw coordinate data from
-the controller.  This means calibration must be performed within the
-userspace.
-
-The controller screen resolution is now 0 to 16384 for both X and Y reporting
-the raw touch data.  This is the same for the old and new capacitive USB
-controllers.
-
-Perhaps at some point an abstract function will be placed into evdev so
-generic functions like calibrations, resets, and vendor information can be
-requested from the userspace (And the drivers would handle the vendor specific
-tasks).
-
-TODO
-====
-
-Implement a control urb again to handle requests to and from the device
-such as calibration, etc once/if it becomes available.
-
-Disclaimer
-==========
-
-I am not a MicroTouch/3M employee, nor have I ever been.  3M does not support
-this driver!  If you want touch drivers only supported within X, please go to:
-
-http://www.3m.com/3MTouchSystems/
-
-Thanks
-======
-
-A huge thank you to 3M Touch Systems for the EXII-5010UC controllers for
-testing!
diff --git a/Documentation/usb/ohci.rst b/Documentation/usb/ohci.rst
new file mode 100644
index 000000000000..bb3c49719e6b
--- /dev/null
+++ b/Documentation/usb/ohci.rst
@@ -0,0 +1,35 @@
+====
+OHCI
+====
+
+23-Aug-2002
+
+The "ohci-hcd" driver is a USB Host Controller Driver (HCD) that is derived
+from the "usb-ohci" driver from the 2.4 kernel series.  The "usb-ohci" code
+was written primarily by Roman Weissgaerber <weissg@vienna.at> but with
+contributions from many others (read its copyright/licencing header).
+
+It supports the "Open Host Controller Interface" (OHCI), which standardizes
+hardware register protocols used to talk to USB 1.1 host controllers.  As
+compared to the earlier "Universal Host Controller Interface" (UHCI) from
+Intel, it pushes more intelligence into the hardware.  USB 1.1 controllers
+from vendors other than Intel and VIA generally use OHCI.
+
+Changes since the 2.4 kernel include
+
+	- improved robustness; bugfixes; and less overhead
+	- supports the updated and simplified usbcore APIs
+	- interrupt transfers can be larger, and can be queued
+	- less code, by using the upper level "hcd" framework
+	- supports some non-PCI implementations of OHCI
+	- ... more
+
+The "ohci-hcd" driver handles all USB 1.1 transfer types.  Transfers of all
+types can be queued.  That was also true in "usb-ohci", except for interrupt
+transfers.  Previously, using periods of one frame would risk data loss due
+to overhead in IRQ processing.  When interrupt transfers are queued, those
+risks can be minimized by making sure the hardware always has transfers to
+work on while the OS is getting around to the relevant IRQ processing.
+
+- David Brownell
+  <dbrownell@users.sourceforge.net>
diff --git a/Documentation/usb/ohci.txt b/Documentation/usb/ohci.txt
deleted file mode 100644
index bb3c49719e6b..000000000000
--- a/Documentation/usb/ohci.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-====
-OHCI
-====
-
-23-Aug-2002
-
-The "ohci-hcd" driver is a USB Host Controller Driver (HCD) that is derived
-from the "usb-ohci" driver from the 2.4 kernel series.  The "usb-ohci" code
-was written primarily by Roman Weissgaerber <weissg@vienna.at> but with
-contributions from many others (read its copyright/licencing header).
-
-It supports the "Open Host Controller Interface" (OHCI), which standardizes
-hardware register protocols used to talk to USB 1.1 host controllers.  As
-compared to the earlier "Universal Host Controller Interface" (UHCI) from
-Intel, it pushes more intelligence into the hardware.  USB 1.1 controllers
-from vendors other than Intel and VIA generally use OHCI.
-
-Changes since the 2.4 kernel include
-
-	- improved robustness; bugfixes; and less overhead
-	- supports the updated and simplified usbcore APIs
-	- interrupt transfers can be larger, and can be queued
-	- less code, by using the upper level "hcd" framework
-	- supports some non-PCI implementations of OHCI
-	- ... more
-
-The "ohci-hcd" driver handles all USB 1.1 transfer types.  Transfers of all
-types can be queued.  That was also true in "usb-ohci", except for interrupt
-transfers.  Previously, using periods of one frame would risk data loss due
-to overhead in IRQ processing.  When interrupt transfers are queued, those
-risks can be minimized by making sure the hardware always has transfers to
-work on while the OS is getting around to the relevant IRQ processing.
-
-- David Brownell
-  <dbrownell@users.sourceforge.net>
diff --git a/Documentation/usb/rio.rst b/Documentation/usb/rio.rst
new file mode 100644
index 000000000000..ea73475471db
--- /dev/null
+++ b/Documentation/usb/rio.rst
@@ -0,0 +1,109 @@
+﻿============
+Diamonds Rio
+============
+
+Copyright (C) 1999, 2000 Bruce Tenison
+
+Portions Copyright (C) 1999, 2000 David Nelson
+
+Thanks to David Nelson for guidance and the usage of the scanner.txt
+and scanner.c files to model our driver and this informative file.
+
+Mar. 2, 2000
+
+Changes
+=======
+
+- Initial Revision
+
+
+Overview
+========
+
+This README will address issues regarding how to configure the kernel
+to access a RIO 500 mp3 player.
+Before I explain how to use this to access the Rio500 please be warned:
+
+.. warning::
+
+   Please note that this software is still under development.  The authors
+   are in no way responsible for any damage that may occur, no matter how
+   inconsequential.
+
+It seems that the Rio has a problem when sending .mp3 with low batteries.
+I suggest when the batteries are low and you want to transfer stuff that you
+replace it with a fresh one. In my case, what happened is I lost two 16kb
+blocks (they are no longer usable to store information to it). But I don't
+know if that's normal or not; it could simply be a problem with the flash
+memory.
+
+In an extreme case, I left my Rio playing overnight and the batteries wore
+down to nothing and appear to have corrupted the flash memory. My RIO
+needed to be replaced as a result.  Diamond tech support is aware of the
+problem.  Do NOT allow your batteries to wear down to nothing before
+changing them.  It appears RIO 500 firmware does not handle low battery
+power well at all.
+
+On systems with OHCI controllers, the kernel OHCI code appears to have
+power on problems with some chipsets.  If you are having problems
+connecting to your RIO 500, try turning it on first and then plugging it
+into the USB cable.
+
+Contact Information
+-------------------
+
+   The main page for the project is hosted at sourceforge.net in the following
+   URL: <http://rio500.sourceforge.net>. You can also go to the project's
+   sourceforge home page at: <http://sourceforge.net/projects/rio500/>.
+   There is also a mailing list: rio500-users@lists.sourceforge.net
+
+Authors
+-------
+
+Most of the code was written by Cesar Miquel <miquel@df.uba.ar>. Keith
+Clayton <kclayton@jps.net> is incharge of the PPC port and making sure
+things work there. Bruce Tenison <btenison@dibbs.net> is adding support
+for .fon files and also does testing. The program will mostly sure be
+re-written and Pete Ikusz along with the rest will re-design it. I would
+also like to thank Tri Nguyen <tmn_3022000@hotmail.com> who provided use
+with some important information regarding the communication with the Rio.
+
+Additional Information and userspace tools
+
+	http://rio500.sourceforge.net/
+
+
+Requirements
+============
+
+A host with a USB port running a Linux kernel with RIO 500 support enabled.
+
+The driver is a module called rio500, which should be automatically loaded
+as you plug in your device. If that fails you can manually load it with
+
+  modprobe rio500
+
+Udev should automatically create a device node as soon as plug in your device.
+If that fails, you can manually add a device for the USB rio500::
+
+  mknod /dev/usb/rio500 c 180 64
+
+In that case, set appropriate permissions for /dev/usb/rio500 (don't forget
+about group and world permissions).  Both read and write permissions are
+required for proper operation.
+
+That's it.  The Rio500 Utils at: http://rio500.sourceforge.net should
+be able to access the rio500.
+
+Limits
+======
+
+You can use only a single rio500 device at a time with your computer.
+
+Bugs
+====
+
+If you encounter any problems feel free to drop me an email.
+
+Bruce Tenison
+btenison@dibbs.net
diff --git a/Documentation/usb/rio.txt b/Documentation/usb/rio.txt
deleted file mode 100644
index ea73475471db..000000000000
--- a/Documentation/usb/rio.txt
+++ /dev/null
@@ -1,109 +0,0 @@
-﻿============
-Diamonds Rio
-============
-
-Copyright (C) 1999, 2000 Bruce Tenison
-
-Portions Copyright (C) 1999, 2000 David Nelson
-
-Thanks to David Nelson for guidance and the usage of the scanner.txt
-and scanner.c files to model our driver and this informative file.
-
-Mar. 2, 2000
-
-Changes
-=======
-
-- Initial Revision
-
-
-Overview
-========
-
-This README will address issues regarding how to configure the kernel
-to access a RIO 500 mp3 player.
-Before I explain how to use this to access the Rio500 please be warned:
-
-.. warning::
-
-   Please note that this software is still under development.  The authors
-   are in no way responsible for any damage that may occur, no matter how
-   inconsequential.
-
-It seems that the Rio has a problem when sending .mp3 with low batteries.
-I suggest when the batteries are low and you want to transfer stuff that you
-replace it with a fresh one. In my case, what happened is I lost two 16kb
-blocks (they are no longer usable to store information to it). But I don't
-know if that's normal or not; it could simply be a problem with the flash
-memory.
-
-In an extreme case, I left my Rio playing overnight and the batteries wore
-down to nothing and appear to have corrupted the flash memory. My RIO
-needed to be replaced as a result.  Diamond tech support is aware of the
-problem.  Do NOT allow your batteries to wear down to nothing before
-changing them.  It appears RIO 500 firmware does not handle low battery
-power well at all.
-
-On systems with OHCI controllers, the kernel OHCI code appears to have
-power on problems with some chipsets.  If you are having problems
-connecting to your RIO 500, try turning it on first and then plugging it
-into the USB cable.
-
-Contact Information
--------------------
-
-   The main page for the project is hosted at sourceforge.net in the following
-   URL: <http://rio500.sourceforge.net>. You can also go to the project's
-   sourceforge home page at: <http://sourceforge.net/projects/rio500/>.
-   There is also a mailing list: rio500-users@lists.sourceforge.net
-
-Authors
--------
-
-Most of the code was written by Cesar Miquel <miquel@df.uba.ar>. Keith
-Clayton <kclayton@jps.net> is incharge of the PPC port and making sure
-things work there. Bruce Tenison <btenison@dibbs.net> is adding support
-for .fon files and also does testing. The program will mostly sure be
-re-written and Pete Ikusz along with the rest will re-design it. I would
-also like to thank Tri Nguyen <tmn_3022000@hotmail.com> who provided use
-with some important information regarding the communication with the Rio.
-
-Additional Information and userspace tools
-
-	http://rio500.sourceforge.net/
-
-
-Requirements
-============
-
-A host with a USB port running a Linux kernel with RIO 500 support enabled.
-
-The driver is a module called rio500, which should be automatically loaded
-as you plug in your device. If that fails you can manually load it with
-
-  modprobe rio500
-
-Udev should automatically create a device node as soon as plug in your device.
-If that fails, you can manually add a device for the USB rio500::
-
-  mknod /dev/usb/rio500 c 180 64
-
-In that case, set appropriate permissions for /dev/usb/rio500 (don't forget
-about group and world permissions).  Both read and write permissions are
-required for proper operation.
-
-That's it.  The Rio500 Utils at: http://rio500.sourceforge.net should
-be able to access the rio500.
-
-Limits
-======
-
-You can use only a single rio500 device at a time with your computer.
-
-Bugs
-====
-
-If you encounter any problems feel free to drop me an email.
-
-Bruce Tenison
-btenison@dibbs.net
diff --git a/Documentation/usb/text_files.rst b/Documentation/usb/text_files.rst
new file mode 100644
index 000000000000..6a8d3fcf64b6
--- /dev/null
+++ b/Documentation/usb/text_files.rst
@@ -0,0 +1,29 @@
+Linux CDC ACM inf
+-----------------
+
+.. include:: linux-cdc-acm.inf
+    :literal:
+
+Linux inf
+---------
+
+.. include:: linux.inf
+    :literal:
+
+USB devfs drop permissions source
+---------------------------------
+
+.. literalinclude:: usbdevfs-drop-permissions.c
+    :language: c
+
+WUSB command line script to manipulate auth credentials
+-------------------------------------------------------
+
+.. literalinclude:: wusb-cbaf
+   :language: shell
+
+Credits
+-------
+
+.. include:: CREDITS
+    :literal:
diff --git a/Documentation/usb/usb-help.rst b/Documentation/usb/usb-help.rst
new file mode 100644
index 000000000000..dc23ecd4d802
--- /dev/null
+++ b/Documentation/usb/usb-help.rst
@@ -0,0 +1,17 @@
+==============
+USB references
+==============
+
+2008-Mar-7
+
+For USB help other than the readme files that are located in
+`Documentation/usb/*`, see the following:
+
+- Linux-USB project:  http://www.linux-usb.org
+  mirrors at          http://usb.in.tum.de/linux-usb/
+  and                 http://it.linux-usb.org
+- Linux USB Guide:    http://linux-usb.sourceforge.net
+- Linux-USB device overview (working devices and drivers):
+  http://www.qbik.ch/usb/devices/
+
+The Linux-USB mailing list is at linux-usb@vger.kernel.org
diff --git a/Documentation/usb/usb-help.txt b/Documentation/usb/usb-help.txt
deleted file mode 100644
index dc23ecd4d802..000000000000
--- a/Documentation/usb/usb-help.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-==============
-USB references
-==============
-
-2008-Mar-7
-
-For USB help other than the readme files that are located in
-`Documentation/usb/*`, see the following:
-
-- Linux-USB project:  http://www.linux-usb.org
-  mirrors at          http://usb.in.tum.de/linux-usb/
-  and                 http://it.linux-usb.org
-- Linux USB Guide:    http://linux-usb.sourceforge.net
-- Linux-USB device overview (working devices and drivers):
-  http://www.qbik.ch/usb/devices/
-
-The Linux-USB mailing list is at linux-usb@vger.kernel.org
diff --git a/Documentation/usb/usb-serial.rst b/Documentation/usb/usb-serial.rst
new file mode 100644
index 000000000000..8fa7dbd3da9a
--- /dev/null
+++ b/Documentation/usb/usb-serial.rst
@@ -0,0 +1,537 @@
+﻿==========
+USB serial
+==========
+
+Introduction
+============
+
+  The USB serial driver currently supports a number of different USB to
+  serial converter products, as well as some devices that use a serial
+  interface from userspace to talk to the device.
+
+  See the individual product section below for specific information about
+  the different devices.
+
+
+Configuration
+=============
+
+  Currently the driver can handle up to 256 different serial interfaces at
+  one time.
+
+    The major number that the driver uses is 188 so to use the driver,
+    create the following nodes::
+
+	mknod /dev/ttyUSB0 c 188 0
+	mknod /dev/ttyUSB1 c 188 1
+	mknod /dev/ttyUSB2 c 188 2
+	mknod /dev/ttyUSB3 c 188 3
+		.
+		.
+		.
+	mknod /dev/ttyUSB254 c 188 254
+	mknod /dev/ttyUSB255 c 188 255
+
+  When the device is connected and recognized by the driver, the driver
+  will print to the system log, which node(s) the device has been bound
+  to.
+
+
+Specific Devices Supported
+==========================
+
+
+ConnectTech WhiteHEAT 4 port converter
+--------------------------------------
+
+  ConnectTech has been very forthcoming with information about their
+  device, including providing a unit to test with.
+
+  The driver is officially supported by Connect Tech Inc.
+  http://www.connecttech.com
+
+  For any questions or problems with this driver, please contact
+  Connect Tech's Support Department at support@connecttech.com
+
+
+HandSpring Visor, Palm USB, and Clié USB driver
+-----------------------------------------------
+
+  This driver works with all HandSpring USB, Palm USB, and Sony Clié USB
+  devices.
+
+  Only when the device tries to connect to the host, will the device show
+  up to the host as a valid USB device. When this happens, the device is
+  properly enumerated, assigned a port, and then communication _should_ be
+  possible. The driver cleans up properly when the device is removed, or
+  the connection is canceled on the device.
+
+  NOTE:
+    This means that in order to talk to the device, the sync button must be
+    pressed BEFORE trying to get any program to communicate to the device.
+    This goes against the current documentation for pilot-xfer and other
+    packages, but is the only way that it will work due to the hardware
+    in the device.
+
+  When the device is connected, try talking to it on the second port
+  (this is usually /dev/ttyUSB1 if you do not have any other usb-serial
+  devices in the system.) The system log should tell you which port is
+  the port to use for the HotSync transfer. The "Generic" port can be used
+  for other device communication, such as a PPP link.
+
+  For some Sony Clié devices, /dev/ttyUSB0 must be used to talk to the
+  device.  This is true for all OS version 3.5 devices, and most devices
+  that have had a flash upgrade to a newer version of the OS.  See the
+  kernel system log for information on which is the correct port to use.
+
+  If after pressing the sync button, nothing shows up in the system log,
+  try resetting the device, first a hot reset, and then a cold reset if
+  necessary.  Some devices need this before they can talk to the USB port
+  properly.
+
+  Devices that are not compiled into the kernel can be specified with module
+  parameters.  e.g. modprobe visor vendor=0x54c product=0x66
+
+  There is a webpage and mailing lists for this portion of the driver at:
+  http://sourceforge.net/projects/usbvisor/
+
+  For any questions or problems with this driver, please contact Greg
+  Kroah-Hartman at greg@kroah.com
+
+
+PocketPC PDA Driver
+-------------------
+
+  This driver can be used to connect to Compaq iPAQ, HP Jornada, Casio EM500
+  and other PDAs running Windows CE 3.0 or PocketPC 2002 using a USB
+  cable/cradle.
+  Most devices supported by ActiveSync are supported out of the box.
+  For others, please use module parameters to specify the product and vendor
+  id. e.g. modprobe ipaq vendor=0x3f0 product=0x1125
+
+  The driver presents a serial interface (usually on /dev/ttyUSB0) over
+  which one may run ppp and establish a TCP/IP link to the PDA. Once this
+  is done, you can transfer files, backup, download email etc. The most
+  significant advantage of using USB is speed - I can get 73 to 113
+  kbytes/sec for download/upload to my iPAQ.
+
+  This driver is only one of a set of components required to utilize
+  the USB connection. Please visit http://synce.sourceforge.net which
+  contains the necessary packages and a simple step-by-step howto.
+
+  Once connected, you can use Win CE programs like ftpView, Pocket Outlook
+  from the PDA and xcerdisp, synce utilities from the Linux side.
+
+  To use Pocket IE, follow the instructions given at
+  http://www.tekguru.co.uk/EM500/usbtonet.htm to achieve the same thing
+  on Win98. Omit the proxy server part; Linux is quite capable of forwarding
+  packets unlike Win98. Another modification is required at least for the
+  iPAQ - disable autosync by going to the Start/Settings/Connections menu
+  and unchecking the "Automatically synchronize ..." box. Go to
+  Start/Programs/Connections, connect the cable and select "usbdial" (or
+  whatever you named your new USB connection). You should finally wind
+  up with a "Connected to usbdial" window with status shown as connected.
+  Now start up PIE and browse away.
+
+  If it doesn't work for some reason, load both the usbserial and ipaq module
+  with the module parameter "debug" set to 1 and examine the system log.
+  You can also try soft-resetting your PDA before attempting a connection.
+
+  Other functionality may be possible depending on your PDA. According to
+  Wes Cilldhaire <billybobjoehenrybob@hotmail.com>, with the Toshiba E570,
+  ...if you boot into the bootloader (hold down the power when hitting the
+  reset button, continuing to hold onto the power until the bootloader screen
+  is displayed), then put it in the cradle with the ipaq driver loaded, open
+  a terminal on /dev/ttyUSB0, it gives you a "USB Reflash" terminal, which can
+  be used to flash the ROM, as well as the microP code..  so much for needing
+  Toshiba's $350 serial cable for flashing!! :D
+  NOTE: This has NOT been tested. Use at your own risk.
+
+  For any questions or problems with the driver, please contact Ganesh
+  Varadarajan <ganesh@veritas.com>
+
+
+Keyspan PDA Serial Adapter
+--------------------------
+
+  Single port DB-9 serial adapter, pushed as a PDA adapter for iMacs (mostly
+  sold in Macintosh catalogs, comes in a translucent white/green dongle).
+  Fairly simple device. Firmware is homebrew.
+  This driver also works for the Xircom/Entrega single port serial adapter.
+
+  Current status:
+
+   Things that work:
+     - basic input/output (tested with 'cu')
+     - blocking write when serial line can't keep up
+     - changing baud rates (up to 115200)
+     - getting/setting modem control pins (TIOCM{GET,SET,BIS,BIC})
+     - sending break (although duration looks suspect)
+
+   Things that don't:
+     - device strings (as logged by kernel) have trailing binary garbage
+     - device ID isn't right, might collide with other Keyspan products
+     - changing baud rates ought to flush tx/rx to avoid mangled half characters
+
+   Big Things on the todo list:
+     - parity, 7 vs 8 bits per char, 1 or 2 stop bits
+     - HW flow control
+     - not all of the standard USB descriptors are handled:
+       Get_Status, Set_Feature, O_NONBLOCK, select()
+
+  For any questions or problems with this driver, please contact Brian
+  Warner at warner@lothar.com
+
+
+Keyspan USA-series Serial Adapters
+----------------------------------
+
+  Single, Dual and Quad port adapters - driver uses Keyspan supplied
+  firmware and is being developed with their support.
+
+  Current status:
+
+    The USA-18X, USA-28X, USA-19, USA-19W and USA-49W are supported and
+    have been pretty thoroughly tested at various baud rates with 8-N-1
+    character settings.  Other character lengths and parity setups are
+    presently untested.
+
+    The USA-28 isn't yet supported though doing so should be pretty
+    straightforward.  Contact the maintainer if you require this
+    functionality.
+
+  More information is available at:
+
+        http://www.carnationsoftware.com/carnation/Keyspan.html
+
+  For any questions or problems with this driver, please contact Hugh
+  Blemings at hugh@misc.nu
+
+
+FTDI Single Port Serial Driver
+------------------------------
+
+  This is a single port DB-25 serial adapter.
+
+  Devices supported include:
+
+                - TripNav TN-200 USB GPS
+                - Navis Engineering Bureau CH-4711 USB GPS
+
+  For any questions or problems with this driver, please contact Bill Ryder.
+
+
+ZyXEL omni.net lcd plus ISDN TA
+-------------------------------
+
+  This is an ISDN TA. Please report both successes and troubles to
+  azummo@towertech.it
+
+
+Cypress M8 CY4601 Family Serial Driver
+--------------------------------------
+
+  This driver was in most part developed by Neil "koyama" Whelchel.  It
+  has been improved since that previous form to support dynamic serial
+  line settings and improved line handling.  The driver is for the most
+  part stable and has been tested on an smp machine. (dual p2)
+
+    Chipsets supported under CY4601 family:
+
+		CY7C63723, CY7C63742, CY7C63743, CY7C64013
+
+    Devices supported:
+
+		- DeLorme's USB Earthmate GPS (SiRF Star II lp arch)
+		- Cypress HID->COM RS232 adapter
+
+		Note:
+			Cypress Semiconductor claims no affiliation with the
+			hid->com device.
+
+     Most devices using chipsets under the CY4601 family should
+     work with the driver.  As long as they stay true to the CY4601
+     usbserial specification.
+
+    Technical notes:
+
+        The Earthmate starts out at 4800 8N1 by default... the driver will
+	upon start init to this setting.  usbserial core provides the rest
+	of the termios settings, along with some custom termios so that the
+	output is in proper format and parsable.
+
+	The device can be put into sirf mode by issuing NMEA command::
+
+		$PSRF100,<protocol>,<baud>,<databits>,<stopbits>,<parity>*CHECKSUM
+		$PSRF100,0,9600,8,1,0*0C
+
+		It should then be sufficient to change the port termios to match this
+		to begin communicating.
+
+	As far as I can tell it supports pretty much every sirf command as
+	documented online available with firmware 2.31, with some unknown
+	message ids.
+
+	The hid->com adapter can run at a maximum baud of 115200bps.  Please note
+	that the device has trouble or is incapable of raising line voltage properly.
+	It will be fine with null modem links, as long as you do not try to link two
+	together without hacking the adapter to set the line high.
+
+	The driver is smp safe.  Performance with the driver is rather low when using
+	it for transferring files.  This is being worked on, but I would be willing to
+	accept patches.  An urb queue or packet buffer would likely fit the bill here.
+
+	If you have any questions, problems, patches, feature requests, etc. you can
+	contact me here via email:
+
+					dignome@gmail.com
+
+		(your problems/patches can alternately be submitted to usb-devel)
+
+
+Digi AccelePort Driver
+----------------------
+
+  This driver supports the Digi AccelePort USB 2 and 4 devices, 2 port
+  (plus a parallel port) and 4 port USB serial converters.  The driver
+  does NOT yet support the Digi AccelePort USB 8.
+
+  This driver works under SMP with the usb-uhci driver.  It does not
+  work under SMP with the uhci driver.
+
+  The driver is generally working, though we still have a few more ioctls
+  to implement and final testing and debugging to do.  The parallel port
+  on the USB 2 is supported as a serial to parallel converter; in other
+  words, it appears as another USB serial port on Linux, even though
+  physically it is really a parallel port.  The Digi Acceleport USB 8
+  is not yet supported.
+
+  Please contact Peter Berger (pberger@brimson.com) or Al Borchers
+  (alborchers@steinerpoint.com) for questions or problems with this
+  driver.
+
+
+Belkin USB Serial Adapter F5U103
+--------------------------------
+
+  Single port DB-9/PS-2 serial adapter from Belkin with firmware by eTEK Labs.
+  The Peracom single port serial adapter also works with this driver, as
+  well as the GoHubs adapter.
+
+  Current status:
+
+    The following have been tested and work:
+
+      - Baud rate    300-230400
+      - Data bits    5-8
+      - Stop bits    1-2
+      - Parity       N,E,O,M,S
+      - Handshake    None, Software (XON/XOFF), Hardware (CTSRTS,CTSDTR) [1]_
+      - Break        Set and clear
+      - Line control Input/Output query and control [2]_
+
+  .. [1]
+         Hardware input flow control is only enabled for firmware
+         levels above 2.06.  Read source code comments describing Belkin
+         firmware errata.  Hardware output flow control is working for all
+         firmware versions.
+
+  .. [2]
+         Queries of inputs (CTS,DSR,CD,RI) show the last
+         reported state.  Queries of outputs (DTR,RTS) show the last
+         requested state and may not reflect current state as set by
+         automatic hardware flow control.
+
+  TO DO List:
+    - Add true modem control line query capability.  Currently tracks the
+      states reported by the interrupt and the states requested.
+    - Add error reporting back to application for UART error conditions.
+    - Add support for flush ioctls.
+    - Add everything else that is missing :)
+
+  For any questions or problems with this driver, please contact William
+  Greathouse at wgreathouse@smva.com
+
+
+Empeg empeg-car Mark I/II Driver
+--------------------------------
+
+  This is an experimental driver to provide connectivity support for the
+  client synchronization tools for an Empeg empeg-car mp3 player.
+
+  Tips:
+    * Don't forget to create the device nodes for ttyUSB{0,1,2,...}
+    * modprobe empeg (modprobe is your friend)
+    * emptool --usb /dev/ttyUSB0 (or whatever you named your device node)
+
+  For any questions or problems with this driver, please contact Gary
+  Brubaker at xavyer@ix.netcom.com
+
+
+MCT USB Single Port Serial Adapter U232
+---------------------------------------
+
+  This driver is for the MCT USB-RS232 Converter (25 pin, Model No.
+  U232-P25) from Magic Control Technology Corp. (there is also a 9 pin
+  Model No. U232-P9). More information about this device can be found at
+  the manufacturer's web-site: http://www.mct.com.tw.
+
+  The driver is generally working, though it still needs some more testing.
+  It is derived from the Belkin USB Serial Adapter F5U103 driver and its
+  TODO list is valid for this driver as well.
+
+  This driver has also been found to work for other products, which have
+  the same Vendor ID but different Product IDs. Sitecom's U232-P25 serial
+  converter uses Product ID 0x230 and Vendor ID 0x711 and works with this
+  driver. Also, D-Link's DU-H3SP USB BAY also works with this driver.
+
+  For any questions or problems with this driver, please contact Wolfgang
+  Grandegger at wolfgang@ces.ch
+
+
+Inside Out Networks Edgeport Driver
+-----------------------------------
+
+  This driver supports all devices made by Inside Out Networks, specifically
+  the following models:
+
+       - Edgeport/4
+       - Rapidport/4
+       - Edgeport/4t
+       - Edgeport/2
+       - Edgeport/4i
+       - Edgeport/2i
+       - Edgeport/421
+       - Edgeport/21
+       - Edgeport/8
+       - Edgeport/8 Dual
+       - Edgeport/2D8
+       - Edgeport/4D8
+       - Edgeport/8i
+       - Edgeport/2 DIN
+       - Edgeport/4 DIN
+       - Edgeport/16 Dual
+
+  For any questions or problems with this driver, please contact Greg
+  Kroah-Hartman at greg@kroah.com
+
+
+REINER SCT cyberJack pinpad/e-com USB chipcard reader
+-----------------------------------------------------
+
+  Interface to ISO 7816 compatible contactbased chipcards, e.g. GSM SIMs.
+
+  Current status:
+
+    This is the kernel part of the driver for this USB card reader.
+    There is also a user part for a CT-API driver available. A site
+    for downloading is TBA. For now, you can request it from the
+    maintainer (linux-usb@sii.li).
+
+  For any questions or problems with this driver, please contact
+  linux-usb@sii.li
+
+
+Prolific PL2303 Driver
+----------------------
+
+  This driver supports any device that has the PL2303 chip from Prolific
+  in it.  This includes a number of single port USB to serial converters,
+  more than 70% of USB GPS devices (in 2010), and some USB UPSes. Devices
+  from Aten (the UC-232) and IO-Data work with this driver, as does
+  the DCU-11 mobile-phone cable.
+
+  For any questions or problems with this driver, please contact Greg
+  Kroah-Hartman at greg@kroah.com
+
+
+KL5KUSB105 chipset / PalmConnect USB single-port adapter
+--------------------------------------------------------
+
+Current status:
+
+  The driver was put together by looking at the usb bus transactions
+  done by Palm's driver under Windows, so a lot of functionality is
+  still missing.  Notably, serial ioctls are sometimes faked or not yet
+  implemented.  Support for finding out about DSR and CTS line status is
+  however implemented (though not nicely), so your favorite autopilot(1)
+  and pilot-manager -daemon calls will work.  Baud rates up to 115200
+  are supported, but handshaking (software or hardware) is not, which is
+  why it is wise to cut down on the rate used is wise for large
+  transfers until this is settled.
+
+  See http://www.uuhaus.de/linux/palmconnect.html for up-to-date
+  information on this driver.
+
+Winchiphead CH341 Driver
+------------------------
+
+  This driver is for the Winchiphead CH341 USB-RS232 Converter. This chip
+  also implements an IEEE 1284 parallel port, I2C and SPI, but that is not
+  supported by the driver. The protocol was analyzed from the behaviour
+  of the Windows driver, no datasheet is available at present.
+
+  The manufacturer's website: http://www.winchiphead.com/.
+
+  For any questions or problems with this driver, please contact
+  frank@kingswood-consulting.co.uk.
+
+Moschip MCS7720, MCS7715 driver
+-------------------------------
+
+  These chips are present in devices sold by various manufacturers, such as Syba
+  and Cables Unlimited.  There may be others.  The 7720 provides two serial
+  ports, and the 7715 provides one serial and one standard PC parallel port.
+  Support for the 7715's parallel port is enabled by a separate option, which
+  will not appear unless parallel port support is first enabled at the top-level
+  of the Device Drivers config menu.  Currently only compatibility mode is
+  supported on the parallel port (no ECP/EPP).
+
+  TODO:
+    - Implement ECP/EPP modes for the parallel port.
+    - Baud rates higher than 115200 are currently broken.
+    - Devices with a single serial port based on the Moschip MCS7703 may work
+      with this driver with a simple addition to the usb_device_id table.  I
+      don't have one of these devices, so I can't say for sure.
+
+Generic Serial driver
+---------------------
+
+  If your device is not one of the above listed devices, compatible with
+  the above models, you can try out the "generic" interface. This
+  interface does not provide any type of control messages sent to the
+  device, and does not support any kind of device flow control. All that
+  is required of your device is that it has at least one bulk in endpoint,
+  or one bulk out endpoint.
+
+  To enable the generic driver to recognize your device, provide::
+
+	echo <vid> <pid> >/sys/bus/usb-serial/drivers/generic/new_id
+
+  where the <vid> and <pid> is replaced with the hex representation of your
+  device's vendor id and product id.
+  If the driver is compiled as a module you can also provide one id when
+  loading the module::
+
+	insmod usbserial vendor=0x#### product=0x####
+
+  This driver has been successfully used to connect to the NetChip USB
+  development board, providing a way to develop USB firmware without
+  having to write a custom driver.
+
+  For any questions or problems with this driver, please contact Greg
+  Kroah-Hartman at greg@kroah.com
+
+
+Contact
+=======
+
+  If anyone has any problems using these drivers, with any of the above
+  specified products, please contact the specific driver's author listed
+  above, or join the Linux-USB mailing list (information on joining the
+  mailing list, as well as a link to its searchable archive is at
+  http://www.linux-usb.org/ )
+
+
+Greg Kroah-Hartman
+greg@kroah.com
diff --git a/Documentation/usb/usb-serial.txt b/Documentation/usb/usb-serial.txt
deleted file mode 100644
index 8fa7dbd3da9a..000000000000
--- a/Documentation/usb/usb-serial.txt
+++ /dev/null
@@ -1,537 +0,0 @@
-﻿==========
-USB serial
-==========
-
-Introduction
-============
-
-  The USB serial driver currently supports a number of different USB to
-  serial converter products, as well as some devices that use a serial
-  interface from userspace to talk to the device.
-
-  See the individual product section below for specific information about
-  the different devices.
-
-
-Configuration
-=============
-
-  Currently the driver can handle up to 256 different serial interfaces at
-  one time.
-
-    The major number that the driver uses is 188 so to use the driver,
-    create the following nodes::
-
-	mknod /dev/ttyUSB0 c 188 0
-	mknod /dev/ttyUSB1 c 188 1
-	mknod /dev/ttyUSB2 c 188 2
-	mknod /dev/ttyUSB3 c 188 3
-		.
-		.
-		.
-	mknod /dev/ttyUSB254 c 188 254
-	mknod /dev/ttyUSB255 c 188 255
-
-  When the device is connected and recognized by the driver, the driver
-  will print to the system log, which node(s) the device has been bound
-  to.
-
-
-Specific Devices Supported
-==========================
-
-
-ConnectTech WhiteHEAT 4 port converter
---------------------------------------
-
-  ConnectTech has been very forthcoming with information about their
-  device, including providing a unit to test with.
-
-  The driver is officially supported by Connect Tech Inc.
-  http://www.connecttech.com
-
-  For any questions or problems with this driver, please contact
-  Connect Tech's Support Department at support@connecttech.com
-
-
-HandSpring Visor, Palm USB, and Clié USB driver
------------------------------------------------
-
-  This driver works with all HandSpring USB, Palm USB, and Sony Clié USB
-  devices.
-
-  Only when the device tries to connect to the host, will the device show
-  up to the host as a valid USB device. When this happens, the device is
-  properly enumerated, assigned a port, and then communication _should_ be
-  possible. The driver cleans up properly when the device is removed, or
-  the connection is canceled on the device.
-
-  NOTE:
-    This means that in order to talk to the device, the sync button must be
-    pressed BEFORE trying to get any program to communicate to the device.
-    This goes against the current documentation for pilot-xfer and other
-    packages, but is the only way that it will work due to the hardware
-    in the device.
-
-  When the device is connected, try talking to it on the second port
-  (this is usually /dev/ttyUSB1 if you do not have any other usb-serial
-  devices in the system.) The system log should tell you which port is
-  the port to use for the HotSync transfer. The "Generic" port can be used
-  for other device communication, such as a PPP link.
-
-  For some Sony Clié devices, /dev/ttyUSB0 must be used to talk to the
-  device.  This is true for all OS version 3.5 devices, and most devices
-  that have had a flash upgrade to a newer version of the OS.  See the
-  kernel system log for information on which is the correct port to use.
-
-  If after pressing the sync button, nothing shows up in the system log,
-  try resetting the device, first a hot reset, and then a cold reset if
-  necessary.  Some devices need this before they can talk to the USB port
-  properly.
-
-  Devices that are not compiled into the kernel can be specified with module
-  parameters.  e.g. modprobe visor vendor=0x54c product=0x66
-
-  There is a webpage and mailing lists for this portion of the driver at:
-  http://sourceforge.net/projects/usbvisor/
-
-  For any questions or problems with this driver, please contact Greg
-  Kroah-Hartman at greg@kroah.com
-
-
-PocketPC PDA Driver
--------------------
-
-  This driver can be used to connect to Compaq iPAQ, HP Jornada, Casio EM500
-  and other PDAs running Windows CE 3.0 or PocketPC 2002 using a USB
-  cable/cradle.
-  Most devices supported by ActiveSync are supported out of the box.
-  For others, please use module parameters to specify the product and vendor
-  id. e.g. modprobe ipaq vendor=0x3f0 product=0x1125
-
-  The driver presents a serial interface (usually on /dev/ttyUSB0) over
-  which one may run ppp and establish a TCP/IP link to the PDA. Once this
-  is done, you can transfer files, backup, download email etc. The most
-  significant advantage of using USB is speed - I can get 73 to 113
-  kbytes/sec for download/upload to my iPAQ.
-
-  This driver is only one of a set of components required to utilize
-  the USB connection. Please visit http://synce.sourceforge.net which
-  contains the necessary packages and a simple step-by-step howto.
-
-  Once connected, you can use Win CE programs like ftpView, Pocket Outlook
-  from the PDA and xcerdisp, synce utilities from the Linux side.
-
-  To use Pocket IE, follow the instructions given at
-  http://www.tekguru.co.uk/EM500/usbtonet.htm to achieve the same thing
-  on Win98. Omit the proxy server part; Linux is quite capable of forwarding
-  packets unlike Win98. Another modification is required at least for the
-  iPAQ - disable autosync by going to the Start/Settings/Connections menu
-  and unchecking the "Automatically synchronize ..." box. Go to
-  Start/Programs/Connections, connect the cable and select "usbdial" (or
-  whatever you named your new USB connection). You should finally wind
-  up with a "Connected to usbdial" window with status shown as connected.
-  Now start up PIE and browse away.
-
-  If it doesn't work for some reason, load both the usbserial and ipaq module
-  with the module parameter "debug" set to 1 and examine the system log.
-  You can also try soft-resetting your PDA before attempting a connection.
-
-  Other functionality may be possible depending on your PDA. According to
-  Wes Cilldhaire <billybobjoehenrybob@hotmail.com>, with the Toshiba E570,
-  ...if you boot into the bootloader (hold down the power when hitting the
-  reset button, continuing to hold onto the power until the bootloader screen
-  is displayed), then put it in the cradle with the ipaq driver loaded, open
-  a terminal on /dev/ttyUSB0, it gives you a "USB Reflash" terminal, which can
-  be used to flash the ROM, as well as the microP code..  so much for needing
-  Toshiba's $350 serial cable for flashing!! :D
-  NOTE: This has NOT been tested. Use at your own risk.
-
-  For any questions or problems with the driver, please contact Ganesh
-  Varadarajan <ganesh@veritas.com>
-
-
-Keyspan PDA Serial Adapter
---------------------------
-
-  Single port DB-9 serial adapter, pushed as a PDA adapter for iMacs (mostly
-  sold in Macintosh catalogs, comes in a translucent white/green dongle).
-  Fairly simple device. Firmware is homebrew.
-  This driver also works for the Xircom/Entrega single port serial adapter.
-
-  Current status:
-
-   Things that work:
-     - basic input/output (tested with 'cu')
-     - blocking write when serial line can't keep up
-     - changing baud rates (up to 115200)
-     - getting/setting modem control pins (TIOCM{GET,SET,BIS,BIC})
-     - sending break (although duration looks suspect)
-
-   Things that don't:
-     - device strings (as logged by kernel) have trailing binary garbage
-     - device ID isn't right, might collide with other Keyspan products
-     - changing baud rates ought to flush tx/rx to avoid mangled half characters
-
-   Big Things on the todo list:
-     - parity, 7 vs 8 bits per char, 1 or 2 stop bits
-     - HW flow control
-     - not all of the standard USB descriptors are handled:
-       Get_Status, Set_Feature, O_NONBLOCK, select()
-
-  For any questions or problems with this driver, please contact Brian
-  Warner at warner@lothar.com
-
-
-Keyspan USA-series Serial Adapters
-----------------------------------
-
-  Single, Dual and Quad port adapters - driver uses Keyspan supplied
-  firmware and is being developed with their support.
-
-  Current status:
-
-    The USA-18X, USA-28X, USA-19, USA-19W and USA-49W are supported and
-    have been pretty thoroughly tested at various baud rates with 8-N-1
-    character settings.  Other character lengths and parity setups are
-    presently untested.
-
-    The USA-28 isn't yet supported though doing so should be pretty
-    straightforward.  Contact the maintainer if you require this
-    functionality.
-
-  More information is available at:
-
-        http://www.carnationsoftware.com/carnation/Keyspan.html
-
-  For any questions or problems with this driver, please contact Hugh
-  Blemings at hugh@misc.nu
-
-
-FTDI Single Port Serial Driver
-------------------------------
-
-  This is a single port DB-25 serial adapter.
-
-  Devices supported include:
-
-                - TripNav TN-200 USB GPS
-                - Navis Engineering Bureau CH-4711 USB GPS
-
-  For any questions or problems with this driver, please contact Bill Ryder.
-
-
-ZyXEL omni.net lcd plus ISDN TA
--------------------------------
-
-  This is an ISDN TA. Please report both successes and troubles to
-  azummo@towertech.it
-
-
-Cypress M8 CY4601 Family Serial Driver
---------------------------------------
-
-  This driver was in most part developed by Neil "koyama" Whelchel.  It
-  has been improved since that previous form to support dynamic serial
-  line settings and improved line handling.  The driver is for the most
-  part stable and has been tested on an smp machine. (dual p2)
-
-    Chipsets supported under CY4601 family:
-
-		CY7C63723, CY7C63742, CY7C63743, CY7C64013
-
-    Devices supported:
-
-		- DeLorme's USB Earthmate GPS (SiRF Star II lp arch)
-		- Cypress HID->COM RS232 adapter
-
-		Note:
-			Cypress Semiconductor claims no affiliation with the
-			hid->com device.
-
-     Most devices using chipsets under the CY4601 family should
-     work with the driver.  As long as they stay true to the CY4601
-     usbserial specification.
-
-    Technical notes:
-
-        The Earthmate starts out at 4800 8N1 by default... the driver will
-	upon start init to this setting.  usbserial core provides the rest
-	of the termios settings, along with some custom termios so that the
-	output is in proper format and parsable.
-
-	The device can be put into sirf mode by issuing NMEA command::
-
-		$PSRF100,<protocol>,<baud>,<databits>,<stopbits>,<parity>*CHECKSUM
-		$PSRF100,0,9600,8,1,0*0C
-
-		It should then be sufficient to change the port termios to match this
-		to begin communicating.
-
-	As far as I can tell it supports pretty much every sirf command as
-	documented online available with firmware 2.31, with some unknown
-	message ids.
-
-	The hid->com adapter can run at a maximum baud of 115200bps.  Please note
-	that the device has trouble or is incapable of raising line voltage properly.
-	It will be fine with null modem links, as long as you do not try to link two
-	together without hacking the adapter to set the line high.
-
-	The driver is smp safe.  Performance with the driver is rather low when using
-	it for transferring files.  This is being worked on, but I would be willing to
-	accept patches.  An urb queue or packet buffer would likely fit the bill here.
-
-	If you have any questions, problems, patches, feature requests, etc. you can
-	contact me here via email:
-
-					dignome@gmail.com
-
-		(your problems/patches can alternately be submitted to usb-devel)
-
-
-Digi AccelePort Driver
-----------------------
-
-  This driver supports the Digi AccelePort USB 2 and 4 devices, 2 port
-  (plus a parallel port) and 4 port USB serial converters.  The driver
-  does NOT yet support the Digi AccelePort USB 8.
-
-  This driver works under SMP with the usb-uhci driver.  It does not
-  work under SMP with the uhci driver.
-
-  The driver is generally working, though we still have a few more ioctls
-  to implement and final testing and debugging to do.  The parallel port
-  on the USB 2 is supported as a serial to parallel converter; in other
-  words, it appears as another USB serial port on Linux, even though
-  physically it is really a parallel port.  The Digi Acceleport USB 8
-  is not yet supported.
-
-  Please contact Peter Berger (pberger@brimson.com) or Al Borchers
-  (alborchers@steinerpoint.com) for questions or problems with this
-  driver.
-
-
-Belkin USB Serial Adapter F5U103
---------------------------------
-
-  Single port DB-9/PS-2 serial adapter from Belkin with firmware by eTEK Labs.
-  The Peracom single port serial adapter also works with this driver, as
-  well as the GoHubs adapter.
-
-  Current status:
-
-    The following have been tested and work:
-
-      - Baud rate    300-230400
-      - Data bits    5-8
-      - Stop bits    1-2
-      - Parity       N,E,O,M,S
-      - Handshake    None, Software (XON/XOFF), Hardware (CTSRTS,CTSDTR) [1]_
-      - Break        Set and clear
-      - Line control Input/Output query and control [2]_
-
-  .. [1]
-         Hardware input flow control is only enabled for firmware
-         levels above 2.06.  Read source code comments describing Belkin
-         firmware errata.  Hardware output flow control is working for all
-         firmware versions.
-
-  .. [2]
-         Queries of inputs (CTS,DSR,CD,RI) show the last
-         reported state.  Queries of outputs (DTR,RTS) show the last
-         requested state and may not reflect current state as set by
-         automatic hardware flow control.
-
-  TO DO List:
-    - Add true modem control line query capability.  Currently tracks the
-      states reported by the interrupt and the states requested.
-    - Add error reporting back to application for UART error conditions.
-    - Add support for flush ioctls.
-    - Add everything else that is missing :)
-
-  For any questions or problems with this driver, please contact William
-  Greathouse at wgreathouse@smva.com
-
-
-Empeg empeg-car Mark I/II Driver
---------------------------------
-
-  This is an experimental driver to provide connectivity support for the
-  client synchronization tools for an Empeg empeg-car mp3 player.
-
-  Tips:
-    * Don't forget to create the device nodes for ttyUSB{0,1,2,...}
-    * modprobe empeg (modprobe is your friend)
-    * emptool --usb /dev/ttyUSB0 (or whatever you named your device node)
-
-  For any questions or problems with this driver, please contact Gary
-  Brubaker at xavyer@ix.netcom.com
-
-
-MCT USB Single Port Serial Adapter U232
----------------------------------------
-
-  This driver is for the MCT USB-RS232 Converter (25 pin, Model No.
-  U232-P25) from Magic Control Technology Corp. (there is also a 9 pin
-  Model No. U232-P9). More information about this device can be found at
-  the manufacturer's web-site: http://www.mct.com.tw.
-
-  The driver is generally working, though it still needs some more testing.
-  It is derived from the Belkin USB Serial Adapter F5U103 driver and its
-  TODO list is valid for this driver as well.
-
-  This driver has also been found to work for other products, which have
-  the same Vendor ID but different Product IDs. Sitecom's U232-P25 serial
-  converter uses Product ID 0x230 and Vendor ID 0x711 and works with this
-  driver. Also, D-Link's DU-H3SP USB BAY also works with this driver.
-
-  For any questions or problems with this driver, please contact Wolfgang
-  Grandegger at wolfgang@ces.ch
-
-
-Inside Out Networks Edgeport Driver
------------------------------------
-
-  This driver supports all devices made by Inside Out Networks, specifically
-  the following models:
-
-       - Edgeport/4
-       - Rapidport/4
-       - Edgeport/4t
-       - Edgeport/2
-       - Edgeport/4i
-       - Edgeport/2i
-       - Edgeport/421
-       - Edgeport/21
-       - Edgeport/8
-       - Edgeport/8 Dual
-       - Edgeport/2D8
-       - Edgeport/4D8
-       - Edgeport/8i
-       - Edgeport/2 DIN
-       - Edgeport/4 DIN
-       - Edgeport/16 Dual
-
-  For any questions or problems with this driver, please contact Greg
-  Kroah-Hartman at greg@kroah.com
-
-
-REINER SCT cyberJack pinpad/e-com USB chipcard reader
------------------------------------------------------
-
-  Interface to ISO 7816 compatible contactbased chipcards, e.g. GSM SIMs.
-
-  Current status:
-
-    This is the kernel part of the driver for this USB card reader.
-    There is also a user part for a CT-API driver available. A site
-    for downloading is TBA. For now, you can request it from the
-    maintainer (linux-usb@sii.li).
-
-  For any questions or problems with this driver, please contact
-  linux-usb@sii.li
-
-
-Prolific PL2303 Driver
-----------------------
-
-  This driver supports any device that has the PL2303 chip from Prolific
-  in it.  This includes a number of single port USB to serial converters,
-  more than 70% of USB GPS devices (in 2010), and some USB UPSes. Devices
-  from Aten (the UC-232) and IO-Data work with this driver, as does
-  the DCU-11 mobile-phone cable.
-
-  For any questions or problems with this driver, please contact Greg
-  Kroah-Hartman at greg@kroah.com
-
-
-KL5KUSB105 chipset / PalmConnect USB single-port adapter
---------------------------------------------------------
-
-Current status:
-
-  The driver was put together by looking at the usb bus transactions
-  done by Palm's driver under Windows, so a lot of functionality is
-  still missing.  Notably, serial ioctls are sometimes faked or not yet
-  implemented.  Support for finding out about DSR and CTS line status is
-  however implemented (though not nicely), so your favorite autopilot(1)
-  and pilot-manager -daemon calls will work.  Baud rates up to 115200
-  are supported, but handshaking (software or hardware) is not, which is
-  why it is wise to cut down on the rate used is wise for large
-  transfers until this is settled.
-
-  See http://www.uuhaus.de/linux/palmconnect.html for up-to-date
-  information on this driver.
-
-Winchiphead CH341 Driver
-------------------------
-
-  This driver is for the Winchiphead CH341 USB-RS232 Converter. This chip
-  also implements an IEEE 1284 parallel port, I2C and SPI, but that is not
-  supported by the driver. The protocol was analyzed from the behaviour
-  of the Windows driver, no datasheet is available at present.
-
-  The manufacturer's website: http://www.winchiphead.com/.
-
-  For any questions or problems with this driver, please contact
-  frank@kingswood-consulting.co.uk.
-
-Moschip MCS7720, MCS7715 driver
--------------------------------
-
-  These chips are present in devices sold by various manufacturers, such as Syba
-  and Cables Unlimited.  There may be others.  The 7720 provides two serial
-  ports, and the 7715 provides one serial and one standard PC parallel port.
-  Support for the 7715's parallel port is enabled by a separate option, which
-  will not appear unless parallel port support is first enabled at the top-level
-  of the Device Drivers config menu.  Currently only compatibility mode is
-  supported on the parallel port (no ECP/EPP).
-
-  TODO:
-    - Implement ECP/EPP modes for the parallel port.
-    - Baud rates higher than 115200 are currently broken.
-    - Devices with a single serial port based on the Moschip MCS7703 may work
-      with this driver with a simple addition to the usb_device_id table.  I
-      don't have one of these devices, so I can't say for sure.
-
-Generic Serial driver
----------------------
-
-  If your device is not one of the above listed devices, compatible with
-  the above models, you can try out the "generic" interface. This
-  interface does not provide any type of control messages sent to the
-  device, and does not support any kind of device flow control. All that
-  is required of your device is that it has at least one bulk in endpoint,
-  or one bulk out endpoint.
-
-  To enable the generic driver to recognize your device, provide::
-
-	echo <vid> <pid> >/sys/bus/usb-serial/drivers/generic/new_id
-
-  where the <vid> and <pid> is replaced with the hex representation of your
-  device's vendor id and product id.
-  If the driver is compiled as a module you can also provide one id when
-  loading the module::
-
-	insmod usbserial vendor=0x#### product=0x####
-
-  This driver has been successfully used to connect to the NetChip USB
-  development board, providing a way to develop USB firmware without
-  having to write a custom driver.
-
-  For any questions or problems with this driver, please contact Greg
-  Kroah-Hartman at greg@kroah.com
-
-
-Contact
-=======
-
-  If anyone has any problems using these drivers, with any of the above
-  specified products, please contact the specific driver's author listed
-  above, or join the Linux-USB mailing list (information on joining the
-  mailing list, as well as a link to its searchable archive is at
-  http://www.linux-usb.org/ )
-
-
-Greg Kroah-Hartman
-greg@kroah.com
diff --git a/Documentation/usb/usbip_protocol.rst b/Documentation/usb/usbip_protocol.rst
new file mode 100644
index 000000000000..988c832166cd
--- /dev/null
+++ b/Documentation/usb/usbip_protocol.rst
@@ -0,0 +1,411 @@
+===============
+USB/IP protocol
+===============
+
+PRELIMINARY DRAFT, MAY CONTAIN MISTAKES!
+28 Jun 2011
+
+The USB/IP protocol follows a server/client architecture. The server exports the
+USB devices and the clients imports them. The device driver for the exported
+USB device runs on the client machine.
+
+The client may ask for the list of the exported USB devices. To get the list the
+client opens a TCP/IP connection towards the server, and sends an OP_REQ_DEVLIST
+packet on top of the TCP/IP connection (so the actual OP_REQ_DEVLIST may be sent
+in one or more pieces at the low level transport layer). The server sends back
+the OP_REP_DEVLIST packet which lists the exported USB devices. Finally the
+TCP/IP connection is closed.
+
+::
+
+ virtual host controller                                 usb host
+      "client"                                           "server"
+  (imports USB devices)                             (exports USB devices)
+          |                                                 |
+          |                  OP_REQ_DEVLIST                 |
+          | ----------------------------------------------> |
+          |                                                 |
+          |                  OP_REP_DEVLIST                 |
+          | <---------------------------------------------- |
+          |                                                 |
+
+Once the client knows the list of exported USB devices it may decide to use one
+of them. First the client opens a TCP/IP connection towards the server and
+sends an OP_REQ_IMPORT packet. The server replies with OP_REP_IMPORT. If the
+import was successful the TCP/IP connection remains open and will be used
+to transfer the URB traffic between the client and the server. The client may
+send two types of packets: the USBIP_CMD_SUBMIT to submit an URB, and
+USBIP_CMD_UNLINK to unlink a previously submitted URB. The answers of the
+server may be USBIP_RET_SUBMIT and USBIP_RET_UNLINK respectively.
+
+::
+
+ virtual host controller                                 usb host
+      "client"                                           "server"
+  (imports USB devices)                             (exports USB devices)
+          |                                                 |
+          |                  OP_REQ_IMPORT                  |
+          | ----------------------------------------------> |
+          |                                                 |
+          |                  OP_REP_IMPORT                  |
+          | <---------------------------------------------- |
+          |                                                 |
+          |                                                 |
+          |            USBIP_CMD_SUBMIT(seqnum = n)         |
+          | ----------------------------------------------> |
+          |                                                 |
+          |            USBIP_RET_SUBMIT(seqnum = n)         |
+          | <---------------------------------------------- |
+          |                        .                        |
+          |                        :                        |
+          |                                                 |
+          |            USBIP_CMD_SUBMIT(seqnum = m)         |
+          | ----------------------------------------------> |
+          |                                                 |
+          |            USBIP_CMD_SUBMIT(seqnum = m+1)       |
+          | ----------------------------------------------> |
+          |                                                 |
+          |            USBIP_CMD_SUBMIT(seqnum = m+2)       |
+          | ----------------------------------------------> |
+          |                                                 |
+          |            USBIP_RET_SUBMIT(seqnum = m)         |
+          | <---------------------------------------------- |
+          |                                                 |
+          |            USBIP_CMD_SUBMIT(seqnum = m+3)       |
+          | ----------------------------------------------> |
+          |                                                 |
+          |            USBIP_RET_SUBMIT(seqnum = m+1)       |
+          | <---------------------------------------------- |
+          |                                                 |
+          |            USBIP_CMD_SUBMIT(seqnum = m+4)       |
+          | ----------------------------------------------> |
+          |                                                 |
+          |            USBIP_RET_SUBMIT(seqnum = m+2)       |
+          | <---------------------------------------------- |
+          |                        .                        |
+          |                        :                        |
+          |                                                 |
+          |               USBIP_CMD_UNLINK                  |
+          | ----------------------------------------------> |
+          |                                                 |
+          |               USBIP_RET_UNLINK                  |
+          | <---------------------------------------------- |
+          |                                                 |
+
+The fields are in network (big endian) byte order meaning that the most significant
+byte (MSB) is stored at the lowest address.
+
+
+OP_REQ_DEVLIST:
+	Retrieve the list of exported USB devices.
+
++-----------+--------+------------+---------------------------------------------------+
+| Offset    | Length | Value      | Description                                       |
++===========+========+============+===================================================+
+| 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0 |
++-----------+--------+------------+---------------------------------------------------+
+| 2         | 2      | 0x8005     | Command code: Retrieve the list of exported USB   |
+|           |        |            | devices.                                          |
++-----------+--------+------------+---------------------------------------------------+
+| 4         | 4      | 0x00000000 | Status: unused, shall be set to 0                 |
++-----------+--------+------------+---------------------------------------------------+
+
+OP_REP_DEVLIST:
+	Reply with the list of exported USB devices.
+
++-----------+--------+------------+---------------------------------------------------+
+| Offset    | Length | Value      | Description                                       |
++===========+========+============+===================================================+
+| 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0.|
++-----------+--------+------------+---------------------------------------------------+
+| 2         | 2      | 0x0005     | Reply code: The list of exported USB devices.     |
++-----------+--------+------------+---------------------------------------------------+
+| 4         | 4      | 0x00000000 | Status: 0 for OK                                  |
++-----------+--------+------------+---------------------------------------------------+
+| 8         | 4      | n          | Number of exported devices: 0 means no exported   |
+|           |        |            | devices.                                          |
++-----------+--------+------------+---------------------------------------------------+
+| 0x0C      |        |            | From now on the exported n devices are described, |
+|           |        |            | if any. If no devices are exported the message    |
+|           |        |            | ends with the previous "number of exported        |
+|           |        |            | devices" field.                                   |
++-----------+--------+------------+---------------------------------------------------+
+|           | 256    |            | path: Path of the device on the host exporting the|
+|           |        |            | USB device, string closed with zero byte, e.g.    |
+|           |        |            | "/sys/devices/pci0000:00/0000:00:1d.1/usb3/3-2"   |
+|           |        |            | The unused bytes shall be filled with zero        |
+|           |        |            | bytes.                                            |
++-----------+--------+------------+---------------------------------------------------+
+| 0x10C     | 32     |            | busid: Bus ID of the exported device, string      |
+|           |        |            | closed with zero byte, e.g. "3-2". The unused     |
+|           |        |            | bytes shall be filled with zero bytes.            |
++-----------+--------+------------+---------------------------------------------------+
+| 0x12C     | 4      |            | busnum                                            |
++-----------+--------+------------+---------------------------------------------------+
+| 0x130     | 4      |            | devnum                                            |
++-----------+--------+------------+---------------------------------------------------+
+| 0x134     | 4      |            | speed                                             |
++-----------+--------+------------+---------------------------------------------------+
+| 0x138     | 2      |            | idVendor                                          |
++-----------+--------+------------+---------------------------------------------------+
+| 0x13A     | 2      |            | idProduct                                         |
++-----------+--------+------------+---------------------------------------------------+
+| 0x13C     | 2      |            | bcdDevice                                         |
++-----------+--------+------------+---------------------------------------------------+
+| 0x13E     | 1      |            | bDeviceClass                                      |
++-----------+--------+------------+---------------------------------------------------+
+| 0x13F     | 1      |            | bDeviceSubClass                                   |
++-----------+--------+------------+---------------------------------------------------+
+| 0x140     | 1      |            | bDeviceProtocol                                   |
++-----------+--------+------------+---------------------------------------------------+
+| 0x141     | 1      |            | bConfigurationValue                               |
++-----------+--------+------------+---------------------------------------------------+
+| 0x142     | 1      |            | bNumConfigurations                                |
++-----------+--------+------------+---------------------------------------------------+
+| 0x143     | 1      |            | bNumInterfaces                                    |
++-----------+--------+------------+---------------------------------------------------+
+| 0x144     |        | m_0        | From now on each interface is described, all      |
+|           |        |            | together bNumInterfaces times, with the           |
+|           |        |            | the following 4 fields:                           |
++-----------+--------+------------+---------------------------------------------------+
+|           | 1      |            | bInterfaceClass                                   |
++-----------+--------+------------+---------------------------------------------------+
+| 0x145     | 1      |            | bInterfaceSubClass                                |
++-----------+--------+------------+---------------------------------------------------+
+| 0x146     | 1      |            | bInterfaceProtocol                                |
++-----------+--------+------------+---------------------------------------------------+
+| 0x147     | 1      |            | padding byte for alignment, shall be set to zero  |
++-----------+--------+------------+---------------------------------------------------+
+| 0xC +     |        |            | The second exported USB device starts at i=1      |
+| i*0x138 + |        |            | with the busid field.                             |
+| m_(i-1)*4 |        |            |                                                   |
++-----------+--------+------------+---------------------------------------------------+
+
+OP_REQ_IMPORT:
+	Request to import (attach) a remote USB device.
+
++-----------+--------+------------+---------------------------------------------------+
+| Offset    | Length | Value      | Description                                       |
++===========+========+============+===================================================+
+| 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0 |
++-----------+--------+------------+---------------------------------------------------+
+| 2         | 2      | 0x8003     | Command code: import a remote USB device.         |
++-----------+--------+------------+---------------------------------------------------+
+| 4         | 4      | 0x00000000 | Status: unused, shall be set to 0                 |
++-----------+--------+------------+---------------------------------------------------+
+| 8         | 32     |            | busid: the busid of the exported device on the    |
+|           |        |            | remote host. The possible values are taken        |
+|           |        |            | from the message field OP_REP_DEVLIST.busid.      |
+|           |        |            | A string closed with zero, the unused bytes       |
+|           |        |            | shall be filled with zeros.                       |
++-----------+--------+------------+---------------------------------------------------+
+
+OP_REP_IMPORT:
+	Reply to import (attach) a remote USB device.
+
++-----------+--------+------------+---------------------------------------------------+
+| Offset    | Length | Value      | Description                                       |
++===========+========+============+===================================================+
+| 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0 |
++-----------+--------+------------+---------------------------------------------------+
+| 2         | 2      | 0x0003     | Reply code: Reply to import.                      |
++-----------+--------+------------+---------------------------------------------------+
+| 4         | 4      | 0x00000000 | Status:                                           |
+|           |        |            |                                                   |
+|           |        |            |   - 0 for OK                                      |
+|           |        |            |   - 1 for error                                   |
++-----------+--------+------------+---------------------------------------------------+
+| 8         |        |            | From now on comes the details of the imported     |
+|           |        |            | device, if the previous status field was OK (0),  |
+|           |        |            | otherwise the reply ends with the status field.   |
++-----------+--------+------------+---------------------------------------------------+
+|           | 256    |            | path: Path of the device on the host exporting the|
+|           |        |            | USB device, string closed with zero byte, e.g.    |
+|           |        |            | "/sys/devices/pci0000:00/0000:00:1d.1/usb3/3-2"   |
+|           |        |            | The unused bytes shall be filled with zero        |
+|           |        |            | bytes.                                            |
++-----------+--------+------------+---------------------------------------------------+
+| 0x108     | 32     |            | busid: Bus ID of the exported device, string      |
+|           |        |            | closed with zero byte, e.g. "3-2". The unused     |
+|           |        |            | bytes shall be filled with zero bytes.            |
++-----------+--------+------------+---------------------------------------------------+
+| 0x128     | 4      |            | busnum                                            |
++-----------+--------+------------+---------------------------------------------------+
+| 0x12C     | 4      |            | devnum                                            |
++-----------+--------+------------+---------------------------------------------------+
+| 0x130     | 4      |            | speed                                             |
++-----------+--------+------------+---------------------------------------------------+
+| 0x134     | 2      |            | idVendor                                          |
++-----------+--------+------------+---------------------------------------------------+
+| 0x136     | 2      |            | idProduct                                         |
++-----------+--------+------------+---------------------------------------------------+
+| 0x138     | 2      |            | bcdDevice                                         |
++-----------+--------+------------+---------------------------------------------------+
+| 0x139     | 1      |            | bDeviceClass                                      |
++-----------+--------+------------+---------------------------------------------------+
+| 0x13A     | 1      |            | bDeviceSubClass                                   |
++-----------+--------+------------+---------------------------------------------------+
+| 0x13B     | 1      |            | bDeviceProtocol                                   |
++-----------+--------+------------+---------------------------------------------------+
+| 0x13C     | 1      |            | bConfigurationValue                               |
++-----------+--------+------------+---------------------------------------------------+
+| 0x13D     | 1      |            | bNumConfigurations                                |
++-----------+--------+------------+---------------------------------------------------+
+| 0x13E     | 1      |            | bNumInterfaces                                    |
++-----------+--------+------------+---------------------------------------------------+
+
+USBIP_CMD_SUBMIT:
+	Submit an URB
+
++-----------+--------+------------+---------------------------------------------------+
+| Offset    | Length | Value      | Description                                       |
++===========+========+============+===================================================+
+| 0         | 4      | 0x00000001 | command: Submit an URB                            |
++-----------+--------+------------+---------------------------------------------------+
+| 4         | 4      |            | seqnum: the sequence number of the URB to submit  |
++-----------+--------+------------+---------------------------------------------------+
+| 8         | 4      |            | devid                                             |
++-----------+--------+------------+---------------------------------------------------+
+| 0xC       | 4      |            | direction:                                        |
+|           |        |            |                                                   |
+|           |        |            |    - 0: USBIP_DIR_OUT                             |
+|           |        |            |    - 1: USBIP_DIR_IN                              |
++-----------+--------+------------+---------------------------------------------------+
+| 0x10      | 4      |            | ep: endpoint number, possible values are: 0...15  |
++-----------+--------+------------+---------------------------------------------------+
+| 0x14      | 4      |            | transfer_flags: possible values depend on the     |
+|           |        |            | URB transfer type, see below                      |
++-----------+--------+------------+---------------------------------------------------+
+| 0x18      | 4      |            | transfer_buffer_length                            |
++-----------+--------+------------+---------------------------------------------------+
+| 0x1C      | 4      |            | start_frame: specify the selected frame to        |
+|           |        |            | transmit an ISO frame, ignored if URB_ISO_ASAP    |
+|           |        |            | is specified at transfer_flags                    |
++-----------+--------+------------+---------------------------------------------------+
+| 0x20      | 4      |            | number_of_packets: number of ISO packets          |
++-----------+--------+------------+---------------------------------------------------+
+| 0x24      | 4      |            | interval: maximum time for the request on the     |
+|           |        |            | server-side host controller                       |
++-----------+--------+------------+---------------------------------------------------+
+| 0x28      | 8      |            | setup: data bytes for USB setup, filled with      |
+|           |        |            | zeros if not used                                 |
++-----------+--------+------------+---------------------------------------------------+
+| 0x30      |        |            | URB data. For ISO transfers the padding between   |
+|           |        |            | each ISO packets is not transmitted.              |
++-----------+--------+------------+---------------------------------------------------+
+
+
+ +-------------------------+------------+---------+-----------+----------+-------------+
+ | Allowed transfer_flags  | value      | control | interrupt | bulk     | isochronous |
+ +=========================+============+=========+===========+==========+=============+
+ | URB_SHORT_NOT_OK        | 0x00000001 | only in | only in   | only in  | no          |
+ +-------------------------+------------+---------+-----------+----------+-------------+
+ | URB_ISO_ASAP            | 0x00000002 | no      | no        | no       | yes         |
+ +-------------------------+------------+---------+-----------+----------+-------------+
+ | URB_NO_TRANSFER_DMA_MAP | 0x00000004 | yes     | yes       | yes      | yes         |
+ +-------------------------+------------+---------+-----------+----------+-------------+
+ | URB_ZERO_PACKET         | 0x00000040 | no      | no        | only out | no          |
+ +-------------------------+------------+---------+-----------+----------+-------------+
+ | URB_NO_INTERRUPT        | 0x00000080 | yes     | yes       | yes      | yes         |
+ +-------------------------+------------+---------+-----------+----------+-------------+
+ | URB_FREE_BUFFER         | 0x00000100 | yes     | yes       | yes      | yes         |
+ +-------------------------+------------+---------+-----------+----------+-------------+
+ | URB_DIR_MASK            | 0x00000200 | yes     | yes       | yes      | yes         |
+ +-------------------------+------------+---------+-----------+----------+-------------+
+
+
+USBIP_RET_SUBMIT:
+	Reply for submitting an URB
+
++-----------+--------+------------+---------------------------------------------------+
+| Offset    | Length | Value      | Description                                       |
++===========+========+============+===================================================+
+| 0         | 4      | 0x00000003 | command                                           |
++-----------+--------+------------+---------------------------------------------------+
+| 4         | 4      |            | seqnum: URB sequence number                       |
++-----------+--------+------------+---------------------------------------------------+
+| 8         | 4      |            | devid                                             |
++-----------+--------+------------+---------------------------------------------------+
+| 0xC       | 4      |            | direction:                                        |
+|           |        |            |                                                   |
+|           |        |            |    - 0: USBIP_DIR_OUT                             |
+|           |        |            |    - 1: USBIP_DIR_IN                              |
++-----------+--------+------------+---------------------------------------------------+
+| 0x10      | 4      |            | ep: endpoint number                               |
++-----------+--------+------------+---------------------------------------------------+
+| 0x14      | 4      |            | status: zero for successful URB transaction,      |
+|           |        |            | otherwise some kind of error happened.            |
++-----------+--------+------------+---------------------------------------------------+
+| 0x18      | 4      | n          | actual_length: number of URB data bytes           |
++-----------+--------+------------+---------------------------------------------------+
+| 0x1C      | 4      |            | start_frame: for an ISO frame the actually        |
+|           |        |            | selected frame for transmit.                      |
++-----------+--------+------------+---------------------------------------------------+
+| 0x20      | 4      |            | number_of_packets                                 |
++-----------+--------+------------+---------------------------------------------------+
+| 0x24      | 4      |            | error_count                                       |
++-----------+--------+------------+---------------------------------------------------+
+| 0x28      | 8      |            | setup: data bytes for USB setup, filled with      |
+|           |        |            | zeros if not used                                 |
++-----------+--------+------------+---------------------------------------------------+
+| 0x30      | n      |            | URB data bytes. For ISO transfers the padding     |
+|           |        |            | between each ISO packets is not transmitted.      |
++-----------+--------+------------+---------------------------------------------------+
+
+USBIP_CMD_UNLINK:
+	Unlink an URB
+
++-----------+--------+------------+---------------------------------------------------+
+| Offset    | Length | Value      | Description                                       |
++===========+========+============+===================================================+
+| 0         | 4      | 0x00000002 | command: URB unlink command                       |
++-----------+--------+------------+---------------------------------------------------+
+| 4         | 4      |            | seqnum: URB sequence number to unlink:            |
+|           |        |            |                                                   |
+|           |        |            | FIXME:                                            |
+|           |        |            |    is this so?                                    |
++-----------+--------+------------+---------------------------------------------------+
+| 8         | 4      |            | devid                                             |
++-----------+--------+------------+---------------------------------------------------+
+| 0xC       | 4      |            | direction:                                        |
+|           |        |            |                                                   |
+|           |        |            |    - 0: USBIP_DIR_OUT                             |
+|           |        |            |    - 1: USBIP_DIR_IN                              |
++-----------+--------+------------+---------------------------------------------------+
+| 0x10      | 4      |            | ep: endpoint number: zero                         |
++-----------+--------+------------+---------------------------------------------------+
+| 0x14      | 4      |            | seqnum: the URB sequence number given previously  |
+|           |        |            | at USBIP_CMD_SUBMIT.seqnum field                  |
++-----------+--------+------------+---------------------------------------------------+
+| 0x30      | n      |            | URB data bytes. For ISO transfers the padding     |
+|           |        |            | between each ISO packets is not transmitted.      |
++-----------+--------+------------+---------------------------------------------------+
+
+USBIP_RET_UNLINK:
+	Reply for URB unlink
+
++-----------+--------+------------+---------------------------------------------------+
+| Offset    | Length | Value      | Description                                       |
++===========+========+============+===================================================+
+| 0         | 4      | 0x00000004 | command: reply for the URB unlink command         |
++-----------+--------+------------+---------------------------------------------------+
+| 4         | 4      |            | seqnum: the unlinked URB sequence number          |
++-----------+--------+------------+---------------------------------------------------+
+| 8         | 4      |            | devid                                             |
++-----------+--------+------------+---------------------------------------------------+
+| 0xC       | 4      |            | direction:                                        |
+|           |        |            |                                                   |
+|           |        |            |    - 0: USBIP_DIR_OUT                             |
+|           |        |            |    - 1: USBIP_DIR_IN                              |
++-----------+--------+------------+---------------------------------------------------+
+| 0x10      | 4      |            | ep: endpoint number                               |
++-----------+--------+------------+---------------------------------------------------+
+| 0x14      | 4      |            | status: This is the value contained in the        |
+|           |        |            | urb->status in the URB completition handler.      |
+|           |        |            |                                                   |
+|           |        |            | FIXME:                                            |
+|           |        |            |      a better explanation needed.                 |
++-----------+--------+------------+---------------------------------------------------+
+| 0x30      | n      |            | URB data bytes. For ISO transfers the padding     |
+|           |        |            | between each ISO packets is not transmitted.      |
++-----------+--------+------------+---------------------------------------------------+
diff --git a/Documentation/usb/usbip_protocol.txt b/Documentation/usb/usbip_protocol.txt
deleted file mode 100644
index 988c832166cd..000000000000
--- a/Documentation/usb/usbip_protocol.txt
+++ /dev/null
@@ -1,411 +0,0 @@
-===============
-USB/IP protocol
-===============
-
-PRELIMINARY DRAFT, MAY CONTAIN MISTAKES!
-28 Jun 2011
-
-The USB/IP protocol follows a server/client architecture. The server exports the
-USB devices and the clients imports them. The device driver for the exported
-USB device runs on the client machine.
-
-The client may ask for the list of the exported USB devices. To get the list the
-client opens a TCP/IP connection towards the server, and sends an OP_REQ_DEVLIST
-packet on top of the TCP/IP connection (so the actual OP_REQ_DEVLIST may be sent
-in one or more pieces at the low level transport layer). The server sends back
-the OP_REP_DEVLIST packet which lists the exported USB devices. Finally the
-TCP/IP connection is closed.
-
-::
-
- virtual host controller                                 usb host
-      "client"                                           "server"
-  (imports USB devices)                             (exports USB devices)
-          |                                                 |
-          |                  OP_REQ_DEVLIST                 |
-          | ----------------------------------------------> |
-          |                                                 |
-          |                  OP_REP_DEVLIST                 |
-          | <---------------------------------------------- |
-          |                                                 |
-
-Once the client knows the list of exported USB devices it may decide to use one
-of them. First the client opens a TCP/IP connection towards the server and
-sends an OP_REQ_IMPORT packet. The server replies with OP_REP_IMPORT. If the
-import was successful the TCP/IP connection remains open and will be used
-to transfer the URB traffic between the client and the server. The client may
-send two types of packets: the USBIP_CMD_SUBMIT to submit an URB, and
-USBIP_CMD_UNLINK to unlink a previously submitted URB. The answers of the
-server may be USBIP_RET_SUBMIT and USBIP_RET_UNLINK respectively.
-
-::
-
- virtual host controller                                 usb host
-      "client"                                           "server"
-  (imports USB devices)                             (exports USB devices)
-          |                                                 |
-          |                  OP_REQ_IMPORT                  |
-          | ----------------------------------------------> |
-          |                                                 |
-          |                  OP_REP_IMPORT                  |
-          | <---------------------------------------------- |
-          |                                                 |
-          |                                                 |
-          |            USBIP_CMD_SUBMIT(seqnum = n)         |
-          | ----------------------------------------------> |
-          |                                                 |
-          |            USBIP_RET_SUBMIT(seqnum = n)         |
-          | <---------------------------------------------- |
-          |                        .                        |
-          |                        :                        |
-          |                                                 |
-          |            USBIP_CMD_SUBMIT(seqnum = m)         |
-          | ----------------------------------------------> |
-          |                                                 |
-          |            USBIP_CMD_SUBMIT(seqnum = m+1)       |
-          | ----------------------------------------------> |
-          |                                                 |
-          |            USBIP_CMD_SUBMIT(seqnum = m+2)       |
-          | ----------------------------------------------> |
-          |                                                 |
-          |            USBIP_RET_SUBMIT(seqnum = m)         |
-          | <---------------------------------------------- |
-          |                                                 |
-          |            USBIP_CMD_SUBMIT(seqnum = m+3)       |
-          | ----------------------------------------------> |
-          |                                                 |
-          |            USBIP_RET_SUBMIT(seqnum = m+1)       |
-          | <---------------------------------------------- |
-          |                                                 |
-          |            USBIP_CMD_SUBMIT(seqnum = m+4)       |
-          | ----------------------------------------------> |
-          |                                                 |
-          |            USBIP_RET_SUBMIT(seqnum = m+2)       |
-          | <---------------------------------------------- |
-          |                        .                        |
-          |                        :                        |
-          |                                                 |
-          |               USBIP_CMD_UNLINK                  |
-          | ----------------------------------------------> |
-          |                                                 |
-          |               USBIP_RET_UNLINK                  |
-          | <---------------------------------------------- |
-          |                                                 |
-
-The fields are in network (big endian) byte order meaning that the most significant
-byte (MSB) is stored at the lowest address.
-
-
-OP_REQ_DEVLIST:
-	Retrieve the list of exported USB devices.
-
-+-----------+--------+------------+---------------------------------------------------+
-| Offset    | Length | Value      | Description                                       |
-+===========+========+============+===================================================+
-| 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0 |
-+-----------+--------+------------+---------------------------------------------------+
-| 2         | 2      | 0x8005     | Command code: Retrieve the list of exported USB   |
-|           |        |            | devices.                                          |
-+-----------+--------+------------+---------------------------------------------------+
-| 4         | 4      | 0x00000000 | Status: unused, shall be set to 0                 |
-+-----------+--------+------------+---------------------------------------------------+
-
-OP_REP_DEVLIST:
-	Reply with the list of exported USB devices.
-
-+-----------+--------+------------+---------------------------------------------------+
-| Offset    | Length | Value      | Description                                       |
-+===========+========+============+===================================================+
-| 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0.|
-+-----------+--------+------------+---------------------------------------------------+
-| 2         | 2      | 0x0005     | Reply code: The list of exported USB devices.     |
-+-----------+--------+------------+---------------------------------------------------+
-| 4         | 4      | 0x00000000 | Status: 0 for OK                                  |
-+-----------+--------+------------+---------------------------------------------------+
-| 8         | 4      | n          | Number of exported devices: 0 means no exported   |
-|           |        |            | devices.                                          |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x0C      |        |            | From now on the exported n devices are described, |
-|           |        |            | if any. If no devices are exported the message    |
-|           |        |            | ends with the previous "number of exported        |
-|           |        |            | devices" field.                                   |
-+-----------+--------+------------+---------------------------------------------------+
-|           | 256    |            | path: Path of the device on the host exporting the|
-|           |        |            | USB device, string closed with zero byte, e.g.    |
-|           |        |            | "/sys/devices/pci0000:00/0000:00:1d.1/usb3/3-2"   |
-|           |        |            | The unused bytes shall be filled with zero        |
-|           |        |            | bytes.                                            |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x10C     | 32     |            | busid: Bus ID of the exported device, string      |
-|           |        |            | closed with zero byte, e.g. "3-2". The unused     |
-|           |        |            | bytes shall be filled with zero bytes.            |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x12C     | 4      |            | busnum                                            |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x130     | 4      |            | devnum                                            |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x134     | 4      |            | speed                                             |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x138     | 2      |            | idVendor                                          |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x13A     | 2      |            | idProduct                                         |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x13C     | 2      |            | bcdDevice                                         |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x13E     | 1      |            | bDeviceClass                                      |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x13F     | 1      |            | bDeviceSubClass                                   |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x140     | 1      |            | bDeviceProtocol                                   |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x141     | 1      |            | bConfigurationValue                               |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x142     | 1      |            | bNumConfigurations                                |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x143     | 1      |            | bNumInterfaces                                    |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x144     |        | m_0        | From now on each interface is described, all      |
-|           |        |            | together bNumInterfaces times, with the           |
-|           |        |            | the following 4 fields:                           |
-+-----------+--------+------------+---------------------------------------------------+
-|           | 1      |            | bInterfaceClass                                   |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x145     | 1      |            | bInterfaceSubClass                                |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x146     | 1      |            | bInterfaceProtocol                                |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x147     | 1      |            | padding byte for alignment, shall be set to zero  |
-+-----------+--------+------------+---------------------------------------------------+
-| 0xC +     |        |            | The second exported USB device starts at i=1      |
-| i*0x138 + |        |            | with the busid field.                             |
-| m_(i-1)*4 |        |            |                                                   |
-+-----------+--------+------------+---------------------------------------------------+
-
-OP_REQ_IMPORT:
-	Request to import (attach) a remote USB device.
-
-+-----------+--------+------------+---------------------------------------------------+
-| Offset    | Length | Value      | Description                                       |
-+===========+========+============+===================================================+
-| 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0 |
-+-----------+--------+------------+---------------------------------------------------+
-| 2         | 2      | 0x8003     | Command code: import a remote USB device.         |
-+-----------+--------+------------+---------------------------------------------------+
-| 4         | 4      | 0x00000000 | Status: unused, shall be set to 0                 |
-+-----------+--------+------------+---------------------------------------------------+
-| 8         | 32     |            | busid: the busid of the exported device on the    |
-|           |        |            | remote host. The possible values are taken        |
-|           |        |            | from the message field OP_REP_DEVLIST.busid.      |
-|           |        |            | A string closed with zero, the unused bytes       |
-|           |        |            | shall be filled with zeros.                       |
-+-----------+--------+------------+---------------------------------------------------+
-
-OP_REP_IMPORT:
-	Reply to import (attach) a remote USB device.
-
-+-----------+--------+------------+---------------------------------------------------+
-| Offset    | Length | Value      | Description                                       |
-+===========+========+============+===================================================+
-| 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0 |
-+-----------+--------+------------+---------------------------------------------------+
-| 2         | 2      | 0x0003     | Reply code: Reply to import.                      |
-+-----------+--------+------------+---------------------------------------------------+
-| 4         | 4      | 0x00000000 | Status:                                           |
-|           |        |            |                                                   |
-|           |        |            |   - 0 for OK                                      |
-|           |        |            |   - 1 for error                                   |
-+-----------+--------+------------+---------------------------------------------------+
-| 8         |        |            | From now on comes the details of the imported     |
-|           |        |            | device, if the previous status field was OK (0),  |
-|           |        |            | otherwise the reply ends with the status field.   |
-+-----------+--------+------------+---------------------------------------------------+
-|           | 256    |            | path: Path of the device on the host exporting the|
-|           |        |            | USB device, string closed with zero byte, e.g.    |
-|           |        |            | "/sys/devices/pci0000:00/0000:00:1d.1/usb3/3-2"   |
-|           |        |            | The unused bytes shall be filled with zero        |
-|           |        |            | bytes.                                            |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x108     | 32     |            | busid: Bus ID of the exported device, string      |
-|           |        |            | closed with zero byte, e.g. "3-2". The unused     |
-|           |        |            | bytes shall be filled with zero bytes.            |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x128     | 4      |            | busnum                                            |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x12C     | 4      |            | devnum                                            |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x130     | 4      |            | speed                                             |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x134     | 2      |            | idVendor                                          |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x136     | 2      |            | idProduct                                         |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x138     | 2      |            | bcdDevice                                         |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x139     | 1      |            | bDeviceClass                                      |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x13A     | 1      |            | bDeviceSubClass                                   |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x13B     | 1      |            | bDeviceProtocol                                   |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x13C     | 1      |            | bConfigurationValue                               |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x13D     | 1      |            | bNumConfigurations                                |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x13E     | 1      |            | bNumInterfaces                                    |
-+-----------+--------+------------+---------------------------------------------------+
-
-USBIP_CMD_SUBMIT:
-	Submit an URB
-
-+-----------+--------+------------+---------------------------------------------------+
-| Offset    | Length | Value      | Description                                       |
-+===========+========+============+===================================================+
-| 0         | 4      | 0x00000001 | command: Submit an URB                            |
-+-----------+--------+------------+---------------------------------------------------+
-| 4         | 4      |            | seqnum: the sequence number of the URB to submit  |
-+-----------+--------+------------+---------------------------------------------------+
-| 8         | 4      |            | devid                                             |
-+-----------+--------+------------+---------------------------------------------------+
-| 0xC       | 4      |            | direction:                                        |
-|           |        |            |                                                   |
-|           |        |            |    - 0: USBIP_DIR_OUT                             |
-|           |        |            |    - 1: USBIP_DIR_IN                              |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x10      | 4      |            | ep: endpoint number, possible values are: 0...15  |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x14      | 4      |            | transfer_flags: possible values depend on the     |
-|           |        |            | URB transfer type, see below                      |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x18      | 4      |            | transfer_buffer_length                            |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x1C      | 4      |            | start_frame: specify the selected frame to        |
-|           |        |            | transmit an ISO frame, ignored if URB_ISO_ASAP    |
-|           |        |            | is specified at transfer_flags                    |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x20      | 4      |            | number_of_packets: number of ISO packets          |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x24      | 4      |            | interval: maximum time for the request on the     |
-|           |        |            | server-side host controller                       |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x28      | 8      |            | setup: data bytes for USB setup, filled with      |
-|           |        |            | zeros if not used                                 |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x30      |        |            | URB data. For ISO transfers the padding between   |
-|           |        |            | each ISO packets is not transmitted.              |
-+-----------+--------+------------+---------------------------------------------------+
-
-
- +-------------------------+------------+---------+-----------+----------+-------------+
- | Allowed transfer_flags  | value      | control | interrupt | bulk     | isochronous |
- +=========================+============+=========+===========+==========+=============+
- | URB_SHORT_NOT_OK        | 0x00000001 | only in | only in   | only in  | no          |
- +-------------------------+------------+---------+-----------+----------+-------------+
- | URB_ISO_ASAP            | 0x00000002 | no      | no        | no       | yes         |
- +-------------------------+------------+---------+-----------+----------+-------------+
- | URB_NO_TRANSFER_DMA_MAP | 0x00000004 | yes     | yes       | yes      | yes         |
- +-------------------------+------------+---------+-----------+----------+-------------+
- | URB_ZERO_PACKET         | 0x00000040 | no      | no        | only out | no          |
- +-------------------------+------------+---------+-----------+----------+-------------+
- | URB_NO_INTERRUPT        | 0x00000080 | yes     | yes       | yes      | yes         |
- +-------------------------+------------+---------+-----------+----------+-------------+
- | URB_FREE_BUFFER         | 0x00000100 | yes     | yes       | yes      | yes         |
- +-------------------------+------------+---------+-----------+----------+-------------+
- | URB_DIR_MASK            | 0x00000200 | yes     | yes       | yes      | yes         |
- +-------------------------+------------+---------+-----------+----------+-------------+
-
-
-USBIP_RET_SUBMIT:
-	Reply for submitting an URB
-
-+-----------+--------+------------+---------------------------------------------------+
-| Offset    | Length | Value      | Description                                       |
-+===========+========+============+===================================================+
-| 0         | 4      | 0x00000003 | command                                           |
-+-----------+--------+------------+---------------------------------------------------+
-| 4         | 4      |            | seqnum: URB sequence number                       |
-+-----------+--------+------------+---------------------------------------------------+
-| 8         | 4      |            | devid                                             |
-+-----------+--------+------------+---------------------------------------------------+
-| 0xC       | 4      |            | direction:                                        |
-|           |        |            |                                                   |
-|           |        |            |    - 0: USBIP_DIR_OUT                             |
-|           |        |            |    - 1: USBIP_DIR_IN                              |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x10      | 4      |            | ep: endpoint number                               |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x14      | 4      |            | status: zero for successful URB transaction,      |
-|           |        |            | otherwise some kind of error happened.            |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x18      | 4      | n          | actual_length: number of URB data bytes           |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x1C      | 4      |            | start_frame: for an ISO frame the actually        |
-|           |        |            | selected frame for transmit.                      |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x20      | 4      |            | number_of_packets                                 |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x24      | 4      |            | error_count                                       |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x28      | 8      |            | setup: data bytes for USB setup, filled with      |
-|           |        |            | zeros if not used                                 |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x30      | n      |            | URB data bytes. For ISO transfers the padding     |
-|           |        |            | between each ISO packets is not transmitted.      |
-+-----------+--------+------------+---------------------------------------------------+
-
-USBIP_CMD_UNLINK:
-	Unlink an URB
-
-+-----------+--------+------------+---------------------------------------------------+
-| Offset    | Length | Value      | Description                                       |
-+===========+========+============+===================================================+
-| 0         | 4      | 0x00000002 | command: URB unlink command                       |
-+-----------+--------+------------+---------------------------------------------------+
-| 4         | 4      |            | seqnum: URB sequence number to unlink:            |
-|           |        |            |                                                   |
-|           |        |            | FIXME:                                            |
-|           |        |            |    is this so?                                    |
-+-----------+--------+------------+---------------------------------------------------+
-| 8         | 4      |            | devid                                             |
-+-----------+--------+------------+---------------------------------------------------+
-| 0xC       | 4      |            | direction:                                        |
-|           |        |            |                                                   |
-|           |        |            |    - 0: USBIP_DIR_OUT                             |
-|           |        |            |    - 1: USBIP_DIR_IN                              |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x10      | 4      |            | ep: endpoint number: zero                         |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x14      | 4      |            | seqnum: the URB sequence number given previously  |
-|           |        |            | at USBIP_CMD_SUBMIT.seqnum field                  |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x30      | n      |            | URB data bytes. For ISO transfers the padding     |
-|           |        |            | between each ISO packets is not transmitted.      |
-+-----------+--------+------------+---------------------------------------------------+
-
-USBIP_RET_UNLINK:
-	Reply for URB unlink
-
-+-----------+--------+------------+---------------------------------------------------+
-| Offset    | Length | Value      | Description                                       |
-+===========+========+============+===================================================+
-| 0         | 4      | 0x00000004 | command: reply for the URB unlink command         |
-+-----------+--------+------------+---------------------------------------------------+
-| 4         | 4      |            | seqnum: the unlinked URB sequence number          |
-+-----------+--------+------------+---------------------------------------------------+
-| 8         | 4      |            | devid                                             |
-+-----------+--------+------------+---------------------------------------------------+
-| 0xC       | 4      |            | direction:                                        |
-|           |        |            |                                                   |
-|           |        |            |    - 0: USBIP_DIR_OUT                             |
-|           |        |            |    - 1: USBIP_DIR_IN                              |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x10      | 4      |            | ep: endpoint number                               |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x14      | 4      |            | status: This is the value contained in the        |
-|           |        |            | urb->status in the URB completition handler.      |
-|           |        |            |                                                   |
-|           |        |            | FIXME:                                            |
-|           |        |            |      a better explanation needed.                 |
-+-----------+--------+------------+---------------------------------------------------+
-| 0x30      | n      |            | URB data bytes. For ISO transfers the padding     |
-|           |        |            | between each ISO packets is not transmitted.      |
-+-----------+--------+------------+---------------------------------------------------+
diff --git a/Documentation/usb/usbmon.rst b/Documentation/usb/usbmon.rst
new file mode 100644
index 000000000000..b0bd51080799
--- /dev/null
+++ b/Documentation/usb/usbmon.rst
@@ -0,0 +1,375 @@
+======
+usbmon
+======
+
+Introduction
+============
+
+The name "usbmon" in lowercase refers to a facility in kernel which is
+used to collect traces of I/O on the USB bus. This function is analogous
+to a packet socket used by network monitoring tools such as tcpdump(1)
+or Ethereal. Similarly, it is expected that a tool such as usbdump or
+USBMon (with uppercase letters) is used to examine raw traces produced
+by usbmon.
+
+The usbmon reports requests made by peripheral-specific drivers to Host
+Controller Drivers (HCD). So, if HCD is buggy, the traces reported by
+usbmon may not correspond to bus transactions precisely. This is the same
+situation as with tcpdump.
+
+Two APIs are currently implemented: "text" and "binary". The binary API
+is available through a character device in /dev namespace and is an ABI.
+The text API is deprecated since 2.6.35, but available for convenience.
+
+How to use usbmon to collect raw text traces
+============================================
+
+Unlike the packet socket, usbmon has an interface which provides traces
+in a text format. This is used for two purposes. First, it serves as a
+common trace exchange format for tools while more sophisticated formats
+are finalized. Second, humans can read it in case tools are not available.
+
+To collect a raw text trace, execute following steps.
+
+1. Prepare
+----------
+
+Mount debugfs (it has to be enabled in your kernel configuration), and
+load the usbmon module (if built as module). The second step is skipped
+if usbmon is built into the kernel::
+
+	# mount -t debugfs none_debugs /sys/kernel/debug
+	# modprobe usbmon
+	#
+
+Verify that bus sockets are present:
+
+	# ls /sys/kernel/debug/usb/usbmon
+	0s  0u  1s  1t  1u  2s  2t  2u  3s  3t  3u  4s  4t  4u
+	#
+
+Now you can choose to either use the socket '0u' (to capture packets on all
+buses), and skip to step #3, or find the bus used by your device with step #2.
+This allows to filter away annoying devices that talk continuously.
+
+2. Find which bus connects to the desired device
+------------------------------------------------
+
+Run "cat /sys/kernel/debug/usb/devices", and find the T-line which corresponds
+to the device. Usually you do it by looking for the vendor string. If you have
+many similar devices, unplug one and compare the two
+/sys/kernel/debug/usb/devices outputs. The T-line will have a bus number.
+
+Example::
+
+  T:  Bus=03 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  2 Spd=12  MxCh= 0
+  D:  Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs=  1
+  P:  Vendor=0557 ProdID=2004 Rev= 1.00
+  S:  Manufacturer=ATEN
+  S:  Product=UC100KM V2.00
+
+"Bus=03" means it's bus 3. Alternatively, you can look at the output from
+"lsusb" and get the bus number from the appropriate line. Example:
+
+Bus 003 Device 002: ID 0557:2004 ATEN UC100KM V2.00
+
+3. Start 'cat'
+--------------
+
+::
+
+	# cat /sys/kernel/debug/usb/usbmon/3u > /tmp/1.mon.out
+
+to listen on a single bus, otherwise, to listen on all buses, type::
+
+	# cat /sys/kernel/debug/usb/usbmon/0u > /tmp/1.mon.out
+
+This process will read until it is killed. Naturally, the output can be
+redirected to a desirable location. This is preferred, because it is going
+to be quite long.
+
+4. Perform the desired operation on the USB bus
+-----------------------------------------------
+
+This is where you do something that creates the traffic: plug in a flash key,
+copy files, control a webcam, etc.
+
+5. Kill cat
+-----------
+
+Usually it's done with a keyboard interrupt (Control-C).
+
+At this point the output file (/tmp/1.mon.out in this example) can be saved,
+sent by e-mail, or inspected with a text editor. In the last case make sure
+that the file size is not excessive for your favourite editor.
+
+Raw text data format
+====================
+
+Two formats are supported currently: the original, or '1t' format, and
+the '1u' format. The '1t' format is deprecated in kernel 2.6.21. The '1u'
+format adds a few fields, such as ISO frame descriptors, interval, etc.
+It produces slightly longer lines, but otherwise is a perfect superset
+of '1t' format.
+
+If it is desired to recognize one from the other in a program, look at the
+"address" word (see below), where '1u' format adds a bus number. If 2 colons
+are present, it's the '1t' format, otherwise '1u'.
+
+Any text format data consists of a stream of events, such as URB submission,
+URB callback, submission error. Every event is a text line, which consists
+of whitespace separated words. The number or position of words may depend
+on the event type, but there is a set of words, common for all types.
+
+Here is the list of words, from left to right:
+
+- URB Tag. This is used to identify URBs, and is normally an in-kernel address
+  of the URB structure in hexadecimal, but can be a sequence number or any
+  other unique string, within reason.
+
+- Timestamp in microseconds, a decimal number. The timestamp's resolution
+  depends on available clock, and so it can be much worse than a microsecond
+  (if the implementation uses jiffies, for example).
+
+- Event Type. This type refers to the format of the event, not URB type.
+  Available types are: S - submission, C - callback, E - submission error.
+
+- "Address" word (formerly a "pipe"). It consists of four fields, separated by
+  colons: URB type and direction, Bus number, Device address, Endpoint number.
+  Type and direction are encoded with two bytes in the following manner:
+
+    == ==   =============================
+    Ci Co   Control input and output
+    Zi Zo   Isochronous input and output
+    Ii Io   Interrupt input and output
+    Bi Bo   Bulk input and output
+    == ==   =============================
+
+  Bus number, Device address, and Endpoint are decimal numbers, but they may
+  have leading zeros, for the sake of human readers.
+
+- URB Status word. This is either a letter, or several numbers separated
+  by colons: URB status, interval, start frame, and error count. Unlike the
+  "address" word, all fields save the status are optional. Interval is printed
+  only for interrupt and isochronous URBs. Start frame is printed only for
+  isochronous URBs. Error count is printed only for isochronous callback
+  events.
+
+  The status field is a decimal number, sometimes negative, which represents
+  a "status" field of the URB. This field makes no sense for submissions, but
+  is present anyway to help scripts with parsing. When an error occurs, the
+  field contains the error code.
+
+  In case of a submission of a Control packet, this field contains a Setup Tag
+  instead of an group of numbers. It is easy to tell whether the Setup Tag is
+  present because it is never a number. Thus if scripts find a set of numbers
+  in this word, they proceed to read Data Length (except for isochronous URBs).
+  If they find something else, like a letter, they read the setup packet before
+  reading the Data Length or isochronous descriptors.
+
+- Setup packet, if present, consists of 5 words: one of each for bmRequestType,
+  bRequest, wValue, wIndex, wLength, as specified by the USB Specification 2.0.
+  These words are safe to decode if Setup Tag was 's'. Otherwise, the setup
+  packet was present, but not captured, and the fields contain filler.
+
+- Number of isochronous frame descriptors and descriptors themselves.
+  If an Isochronous transfer event has a set of descriptors, a total number
+  of them in an URB is printed first, then a word per descriptor, up to a
+  total of 5. The word consists of 3 colon-separated decimal numbers for
+  status, offset, and length respectively. For submissions, initial length
+  is reported. For callbacks, actual length is reported.
+
+- Data Length. For submissions, this is the requested length. For callbacks,
+  this is the actual length.
+
+- Data tag. The usbmon may not always capture data, even if length is nonzero.
+  The data words are present only if this tag is '='.
+
+- Data words follow, in big endian hexadecimal format. Notice that they are
+  not machine words, but really just a byte stream split into words to make
+  it easier to read. Thus, the last word may contain from one to four bytes.
+  The length of collected data is limited and can be less than the data length
+  reported in the Data Length word. In the case of an Isochronous input (Zi)
+  completion where the received data is sparse in the buffer, the length of
+  the collected data can be greater than the Data Length value (because Data
+  Length counts only the bytes that were received whereas the Data words
+  contain the entire transfer buffer).
+
+Examples:
+
+An input control transfer to get a port status::
+
+  d5ea89a0 3575914555 S Ci:1:001:0 s a3 00 0000 0003 0004 4 <
+  d5ea89a0 3575914560 C Ci:1:001:0 0 4 = 01050000
+
+An output bulk transfer to send a SCSI command 0x28 (READ_10) in a 31-byte
+Bulk wrapper to a storage device at address 5::
+
+  dd65f0e8 4128379752 S Bo:1:005:2 -115 31 = 55534243 ad000000 00800000 80010a28 20000000 20000040 00000000 000000
+  dd65f0e8 4128379808 C Bo:1:005:2 0 31 >
+
+Raw binary format and API
+=========================
+
+The overall architecture of the API is about the same as the one above,
+only the events are delivered in binary format. Each event is sent in
+the following structure (its name is made up, so that we can refer to it)::
+
+  struct usbmon_packet {
+	u64 id;			/*  0: URB ID - from submission to callback */
+	unsigned char type;	/*  8: Same as text; extensible. */
+	unsigned char xfer_type; /*    ISO (0), Intr, Control, Bulk (3) */
+	unsigned char epnum;	/*     Endpoint number and transfer direction */
+	unsigned char devnum;	/*     Device address */
+	u16 busnum;		/* 12: Bus number */
+	char flag_setup;	/* 14: Same as text */
+	char flag_data;		/* 15: Same as text; Binary zero is OK. */
+	s64 ts_sec;		/* 16: gettimeofday */
+	s32 ts_usec;		/* 24: gettimeofday */
+	int status;		/* 28: */
+	unsigned int length;	/* 32: Length of data (submitted or actual) */
+	unsigned int len_cap;	/* 36: Delivered length */
+	union {			/* 40: */
+		unsigned char setup[SETUP_LEN];	/* Only for Control S-type */
+		struct iso_rec {		/* Only for ISO */
+			int error_count;
+			int numdesc;
+		} iso;
+	} s;
+	int interval;		/* 48: Only for Interrupt and ISO */
+	int start_frame;	/* 52: For ISO */
+	unsigned int xfer_flags; /* 56: copy of URB's transfer_flags */
+	unsigned int ndesc;	/* 60: Actual number of ISO descriptors */
+  };				/* 64 total length */
+
+These events can be received from a character device by reading with read(2),
+with an ioctl(2), or by accessing the buffer with mmap. However, read(2)
+only returns first 48 bytes for compatibility reasons.
+
+The character device is usually called /dev/usbmonN, where N is the USB bus
+number. Number zero (/dev/usbmon0) is special and means "all buses".
+Note that specific naming policy is set by your Linux distribution.
+
+If you create /dev/usbmon0 by hand, make sure that it is owned by root
+and has mode 0600. Otherwise, unprivileged users will be able to snoop
+keyboard traffic.
+
+The following ioctl calls are available, with MON_IOC_MAGIC 0x92:
+
+ MON_IOCQ_URB_LEN, defined as _IO(MON_IOC_MAGIC, 1)
+
+This call returns the length of data in the next event. Note that majority of
+events contain no data, so if this call returns zero, it does not mean that
+no events are available.
+
+ MON_IOCG_STATS, defined as _IOR(MON_IOC_MAGIC, 3, struct mon_bin_stats)
+
+The argument is a pointer to the following structure::
+
+  struct mon_bin_stats {
+	u32 queued;
+	u32 dropped;
+  };
+
+The member "queued" refers to the number of events currently queued in the
+buffer (and not to the number of events processed since the last reset).
+
+The member "dropped" is the number of events lost since the last call
+to MON_IOCG_STATS.
+
+ MON_IOCT_RING_SIZE, defined as _IO(MON_IOC_MAGIC, 4)
+
+This call sets the buffer size. The argument is the size in bytes.
+The size may be rounded down to the next chunk (or page). If the requested
+size is out of [unspecified] bounds for this kernel, the call fails with
+-EINVAL.
+
+ MON_IOCQ_RING_SIZE, defined as _IO(MON_IOC_MAGIC, 5)
+
+This call returns the current size of the buffer in bytes.
+
+ MON_IOCX_GET, defined as _IOW(MON_IOC_MAGIC, 6, struct mon_get_arg)
+ MON_IOCX_GETX, defined as _IOW(MON_IOC_MAGIC, 10, struct mon_get_arg)
+
+These calls wait for events to arrive if none were in the kernel buffer,
+then return the first event. The argument is a pointer to the following
+structure::
+
+  struct mon_get_arg {
+	struct usbmon_packet *hdr;
+	void *data;
+	size_t alloc;		/* Length of data (can be zero) */
+  };
+
+Before the call, hdr, data, and alloc should be filled. Upon return, the area
+pointed by hdr contains the next event structure, and the data buffer contains
+the data, if any. The event is removed from the kernel buffer.
+
+The MON_IOCX_GET copies 48 bytes to hdr area, MON_IOCX_GETX copies 64 bytes.
+
+ MON_IOCX_MFETCH, defined as _IOWR(MON_IOC_MAGIC, 7, struct mon_mfetch_arg)
+
+This ioctl is primarily used when the application accesses the buffer
+with mmap(2). Its argument is a pointer to the following structure::
+
+  struct mon_mfetch_arg {
+	uint32_t *offvec;	/* Vector of events fetched */
+	uint32_t nfetch;	/* Number of events to fetch (out: fetched) */
+	uint32_t nflush;	/* Number of events to flush */
+  };
+
+The ioctl operates in 3 stages.
+
+First, it removes and discards up to nflush events from the kernel buffer.
+The actual number of events discarded is returned in nflush.
+
+Second, it waits for an event to be present in the buffer, unless the pseudo-
+device is open with O_NONBLOCK.
+
+Third, it extracts up to nfetch offsets into the mmap buffer, and stores
+them into the offvec. The actual number of event offsets is stored into
+the nfetch.
+
+ MON_IOCH_MFLUSH, defined as _IO(MON_IOC_MAGIC, 8)
+
+This call removes a number of events from the kernel buffer. Its argument
+is the number of events to remove. If the buffer contains fewer events
+than requested, all events present are removed, and no error is reported.
+This works when no events are available too.
+
+ FIONBIO
+
+The ioctl FIONBIO may be implemented in the future, if there's a need.
+
+In addition to ioctl(2) and read(2), the special file of binary API can
+be polled with select(2) and poll(2). But lseek(2) does not work.
+
+* Memory-mapped access of the kernel buffer for the binary API
+
+The basic idea is simple:
+
+To prepare, map the buffer by getting the current size, then using mmap(2).
+Then, execute a loop similar to the one written in pseudo-code below::
+
+   struct mon_mfetch_arg fetch;
+   struct usbmon_packet *hdr;
+   int nflush = 0;
+   for (;;) {
+      fetch.offvec = vec; // Has N 32-bit words
+      fetch.nfetch = N;   // Or less than N
+      fetch.nflush = nflush;
+      ioctl(fd, MON_IOCX_MFETCH, &fetch);   // Process errors, too
+      nflush = fetch.nfetch;       // This many packets to flush when done
+      for (i = 0; i < nflush; i++) {
+         hdr = (struct ubsmon_packet *) &mmap_area[vec[i]];
+         if (hdr->type == '@')     // Filler packet
+            continue;
+         caddr_t data = &mmap_area[vec[i]] + 64;
+         process_packet(hdr, data);
+      }
+   }
+
+Thus, the main idea is to execute only one ioctl per N events.
+
+Although the buffer is circular, the returned headers and data do not cross
+the end of the buffer, so the above pseudo-code does not need any gathering.
diff --git a/Documentation/usb/usbmon.txt b/Documentation/usb/usbmon.txt
deleted file mode 100644
index b0bd51080799..000000000000
--- a/Documentation/usb/usbmon.txt
+++ /dev/null
@@ -1,375 +0,0 @@
-======
-usbmon
-======
-
-Introduction
-============
-
-The name "usbmon" in lowercase refers to a facility in kernel which is
-used to collect traces of I/O on the USB bus. This function is analogous
-to a packet socket used by network monitoring tools such as tcpdump(1)
-or Ethereal. Similarly, it is expected that a tool such as usbdump or
-USBMon (with uppercase letters) is used to examine raw traces produced
-by usbmon.
-
-The usbmon reports requests made by peripheral-specific drivers to Host
-Controller Drivers (HCD). So, if HCD is buggy, the traces reported by
-usbmon may not correspond to bus transactions precisely. This is the same
-situation as with tcpdump.
-
-Two APIs are currently implemented: "text" and "binary". The binary API
-is available through a character device in /dev namespace and is an ABI.
-The text API is deprecated since 2.6.35, but available for convenience.
-
-How to use usbmon to collect raw text traces
-============================================
-
-Unlike the packet socket, usbmon has an interface which provides traces
-in a text format. This is used for two purposes. First, it serves as a
-common trace exchange format for tools while more sophisticated formats
-are finalized. Second, humans can read it in case tools are not available.
-
-To collect a raw text trace, execute following steps.
-
-1. Prepare
-----------
-
-Mount debugfs (it has to be enabled in your kernel configuration), and
-load the usbmon module (if built as module). The second step is skipped
-if usbmon is built into the kernel::
-
-	# mount -t debugfs none_debugs /sys/kernel/debug
-	# modprobe usbmon
-	#
-
-Verify that bus sockets are present:
-
-	# ls /sys/kernel/debug/usb/usbmon
-	0s  0u  1s  1t  1u  2s  2t  2u  3s  3t  3u  4s  4t  4u
-	#
-
-Now you can choose to either use the socket '0u' (to capture packets on all
-buses), and skip to step #3, or find the bus used by your device with step #2.
-This allows to filter away annoying devices that talk continuously.
-
-2. Find which bus connects to the desired device
-------------------------------------------------
-
-Run "cat /sys/kernel/debug/usb/devices", and find the T-line which corresponds
-to the device. Usually you do it by looking for the vendor string. If you have
-many similar devices, unplug one and compare the two
-/sys/kernel/debug/usb/devices outputs. The T-line will have a bus number.
-
-Example::
-
-  T:  Bus=03 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  2 Spd=12  MxCh= 0
-  D:  Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs=  1
-  P:  Vendor=0557 ProdID=2004 Rev= 1.00
-  S:  Manufacturer=ATEN
-  S:  Product=UC100KM V2.00
-
-"Bus=03" means it's bus 3. Alternatively, you can look at the output from
-"lsusb" and get the bus number from the appropriate line. Example:
-
-Bus 003 Device 002: ID 0557:2004 ATEN UC100KM V2.00
-
-3. Start 'cat'
---------------
-
-::
-
-	# cat /sys/kernel/debug/usb/usbmon/3u > /tmp/1.mon.out
-
-to listen on a single bus, otherwise, to listen on all buses, type::
-
-	# cat /sys/kernel/debug/usb/usbmon/0u > /tmp/1.mon.out
-
-This process will read until it is killed. Naturally, the output can be
-redirected to a desirable location. This is preferred, because it is going
-to be quite long.
-
-4. Perform the desired operation on the USB bus
------------------------------------------------
-
-This is where you do something that creates the traffic: plug in a flash key,
-copy files, control a webcam, etc.
-
-5. Kill cat
------------
-
-Usually it's done with a keyboard interrupt (Control-C).
-
-At this point the output file (/tmp/1.mon.out in this example) can be saved,
-sent by e-mail, or inspected with a text editor. In the last case make sure
-that the file size is not excessive for your favourite editor.
-
-Raw text data format
-====================
-
-Two formats are supported currently: the original, or '1t' format, and
-the '1u' format. The '1t' format is deprecated in kernel 2.6.21. The '1u'
-format adds a few fields, such as ISO frame descriptors, interval, etc.
-It produces slightly longer lines, but otherwise is a perfect superset
-of '1t' format.
-
-If it is desired to recognize one from the other in a program, look at the
-"address" word (see below), where '1u' format adds a bus number. If 2 colons
-are present, it's the '1t' format, otherwise '1u'.
-
-Any text format data consists of a stream of events, such as URB submission,
-URB callback, submission error. Every event is a text line, which consists
-of whitespace separated words. The number or position of words may depend
-on the event type, but there is a set of words, common for all types.
-
-Here is the list of words, from left to right:
-
-- URB Tag. This is used to identify URBs, and is normally an in-kernel address
-  of the URB structure in hexadecimal, but can be a sequence number or any
-  other unique string, within reason.
-
-- Timestamp in microseconds, a decimal number. The timestamp's resolution
-  depends on available clock, and so it can be much worse than a microsecond
-  (if the implementation uses jiffies, for example).
-
-- Event Type. This type refers to the format of the event, not URB type.
-  Available types are: S - submission, C - callback, E - submission error.
-
-- "Address" word (formerly a "pipe"). It consists of four fields, separated by
-  colons: URB type and direction, Bus number, Device address, Endpoint number.
-  Type and direction are encoded with two bytes in the following manner:
-
-    == ==   =============================
-    Ci Co   Control input and output
-    Zi Zo   Isochronous input and output
-    Ii Io   Interrupt input and output
-    Bi Bo   Bulk input and output
-    == ==   =============================
-
-  Bus number, Device address, and Endpoint are decimal numbers, but they may
-  have leading zeros, for the sake of human readers.
-
-- URB Status word. This is either a letter, or several numbers separated
-  by colons: URB status, interval, start frame, and error count. Unlike the
-  "address" word, all fields save the status are optional. Interval is printed
-  only for interrupt and isochronous URBs. Start frame is printed only for
-  isochronous URBs. Error count is printed only for isochronous callback
-  events.
-
-  The status field is a decimal number, sometimes negative, which represents
-  a "status" field of the URB. This field makes no sense for submissions, but
-  is present anyway to help scripts with parsing. When an error occurs, the
-  field contains the error code.
-
-  In case of a submission of a Control packet, this field contains a Setup Tag
-  instead of an group of numbers. It is easy to tell whether the Setup Tag is
-  present because it is never a number. Thus if scripts find a set of numbers
-  in this word, they proceed to read Data Length (except for isochronous URBs).
-  If they find something else, like a letter, they read the setup packet before
-  reading the Data Length or isochronous descriptors.
-
-- Setup packet, if present, consists of 5 words: one of each for bmRequestType,
-  bRequest, wValue, wIndex, wLength, as specified by the USB Specification 2.0.
-  These words are safe to decode if Setup Tag was 's'. Otherwise, the setup
-  packet was present, but not captured, and the fields contain filler.
-
-- Number of isochronous frame descriptors and descriptors themselves.
-  If an Isochronous transfer event has a set of descriptors, a total number
-  of them in an URB is printed first, then a word per descriptor, up to a
-  total of 5. The word consists of 3 colon-separated decimal numbers for
-  status, offset, and length respectively. For submissions, initial length
-  is reported. For callbacks, actual length is reported.
-
-- Data Length. For submissions, this is the requested length. For callbacks,
-  this is the actual length.
-
-- Data tag. The usbmon may not always capture data, even if length is nonzero.
-  The data words are present only if this tag is '='.
-
-- Data words follow, in big endian hexadecimal format. Notice that they are
-  not machine words, but really just a byte stream split into words to make
-  it easier to read. Thus, the last word may contain from one to four bytes.
-  The length of collected data is limited and can be less than the data length
-  reported in the Data Length word. In the case of an Isochronous input (Zi)
-  completion where the received data is sparse in the buffer, the length of
-  the collected data can be greater than the Data Length value (because Data
-  Length counts only the bytes that were received whereas the Data words
-  contain the entire transfer buffer).
-
-Examples:
-
-An input control transfer to get a port status::
-
-  d5ea89a0 3575914555 S Ci:1:001:0 s a3 00 0000 0003 0004 4 <
-  d5ea89a0 3575914560 C Ci:1:001:0 0 4 = 01050000
-
-An output bulk transfer to send a SCSI command 0x28 (READ_10) in a 31-byte
-Bulk wrapper to a storage device at address 5::
-
-  dd65f0e8 4128379752 S Bo:1:005:2 -115 31 = 55534243 ad000000 00800000 80010a28 20000000 20000040 00000000 000000
-  dd65f0e8 4128379808 C Bo:1:005:2 0 31 >
-
-Raw binary format and API
-=========================
-
-The overall architecture of the API is about the same as the one above,
-only the events are delivered in binary format. Each event is sent in
-the following structure (its name is made up, so that we can refer to it)::
-
-  struct usbmon_packet {
-	u64 id;			/*  0: URB ID - from submission to callback */
-	unsigned char type;	/*  8: Same as text; extensible. */
-	unsigned char xfer_type; /*    ISO (0), Intr, Control, Bulk (3) */
-	unsigned char epnum;	/*     Endpoint number and transfer direction */
-	unsigned char devnum;	/*     Device address */
-	u16 busnum;		/* 12: Bus number */
-	char flag_setup;	/* 14: Same as text */
-	char flag_data;		/* 15: Same as text; Binary zero is OK. */
-	s64 ts_sec;		/* 16: gettimeofday */
-	s32 ts_usec;		/* 24: gettimeofday */
-	int status;		/* 28: */
-	unsigned int length;	/* 32: Length of data (submitted or actual) */
-	unsigned int len_cap;	/* 36: Delivered length */
-	union {			/* 40: */
-		unsigned char setup[SETUP_LEN];	/* Only for Control S-type */
-		struct iso_rec {		/* Only for ISO */
-			int error_count;
-			int numdesc;
-		} iso;
-	} s;
-	int interval;		/* 48: Only for Interrupt and ISO */
-	int start_frame;	/* 52: For ISO */
-	unsigned int xfer_flags; /* 56: copy of URB's transfer_flags */
-	unsigned int ndesc;	/* 60: Actual number of ISO descriptors */
-  };				/* 64 total length */
-
-These events can be received from a character device by reading with read(2),
-with an ioctl(2), or by accessing the buffer with mmap. However, read(2)
-only returns first 48 bytes for compatibility reasons.
-
-The character device is usually called /dev/usbmonN, where N is the USB bus
-number. Number zero (/dev/usbmon0) is special and means "all buses".
-Note that specific naming policy is set by your Linux distribution.
-
-If you create /dev/usbmon0 by hand, make sure that it is owned by root
-and has mode 0600. Otherwise, unprivileged users will be able to snoop
-keyboard traffic.
-
-The following ioctl calls are available, with MON_IOC_MAGIC 0x92:
-
- MON_IOCQ_URB_LEN, defined as _IO(MON_IOC_MAGIC, 1)
-
-This call returns the length of data in the next event. Note that majority of
-events contain no data, so if this call returns zero, it does not mean that
-no events are available.
-
- MON_IOCG_STATS, defined as _IOR(MON_IOC_MAGIC, 3, struct mon_bin_stats)
-
-The argument is a pointer to the following structure::
-
-  struct mon_bin_stats {
-	u32 queued;
-	u32 dropped;
-  };
-
-The member "queued" refers to the number of events currently queued in the
-buffer (and not to the number of events processed since the last reset).
-
-The member "dropped" is the number of events lost since the last call
-to MON_IOCG_STATS.
-
- MON_IOCT_RING_SIZE, defined as _IO(MON_IOC_MAGIC, 4)
-
-This call sets the buffer size. The argument is the size in bytes.
-The size may be rounded down to the next chunk (or page). If the requested
-size is out of [unspecified] bounds for this kernel, the call fails with
--EINVAL.
-
- MON_IOCQ_RING_SIZE, defined as _IO(MON_IOC_MAGIC, 5)
-
-This call returns the current size of the buffer in bytes.
-
- MON_IOCX_GET, defined as _IOW(MON_IOC_MAGIC, 6, struct mon_get_arg)
- MON_IOCX_GETX, defined as _IOW(MON_IOC_MAGIC, 10, struct mon_get_arg)
-
-These calls wait for events to arrive if none were in the kernel buffer,
-then return the first event. The argument is a pointer to the following
-structure::
-
-  struct mon_get_arg {
-	struct usbmon_packet *hdr;
-	void *data;
-	size_t alloc;		/* Length of data (can be zero) */
-  };
-
-Before the call, hdr, data, and alloc should be filled. Upon return, the area
-pointed by hdr contains the next event structure, and the data buffer contains
-the data, if any. The event is removed from the kernel buffer.
-
-The MON_IOCX_GET copies 48 bytes to hdr area, MON_IOCX_GETX copies 64 bytes.
-
- MON_IOCX_MFETCH, defined as _IOWR(MON_IOC_MAGIC, 7, struct mon_mfetch_arg)
-
-This ioctl is primarily used when the application accesses the buffer
-with mmap(2). Its argument is a pointer to the following structure::
-
-  struct mon_mfetch_arg {
-	uint32_t *offvec;	/* Vector of events fetched */
-	uint32_t nfetch;	/* Number of events to fetch (out: fetched) */
-	uint32_t nflush;	/* Number of events to flush */
-  };
-
-The ioctl operates in 3 stages.
-
-First, it removes and discards up to nflush events from the kernel buffer.
-The actual number of events discarded is returned in nflush.
-
-Second, it waits for an event to be present in the buffer, unless the pseudo-
-device is open with O_NONBLOCK.
-
-Third, it extracts up to nfetch offsets into the mmap buffer, and stores
-them into the offvec. The actual number of event offsets is stored into
-the nfetch.
-
- MON_IOCH_MFLUSH, defined as _IO(MON_IOC_MAGIC, 8)
-
-This call removes a number of events from the kernel buffer. Its argument
-is the number of events to remove. If the buffer contains fewer events
-than requested, all events present are removed, and no error is reported.
-This works when no events are available too.
-
- FIONBIO
-
-The ioctl FIONBIO may be implemented in the future, if there's a need.
-
-In addition to ioctl(2) and read(2), the special file of binary API can
-be polled with select(2) and poll(2). But lseek(2) does not work.
-
-* Memory-mapped access of the kernel buffer for the binary API
-
-The basic idea is simple:
-
-To prepare, map the buffer by getting the current size, then using mmap(2).
-Then, execute a loop similar to the one written in pseudo-code below::
-
-   struct mon_mfetch_arg fetch;
-   struct usbmon_packet *hdr;
-   int nflush = 0;
-   for (;;) {
-      fetch.offvec = vec; // Has N 32-bit words
-      fetch.nfetch = N;   // Or less than N
-      fetch.nflush = nflush;
-      ioctl(fd, MON_IOCX_MFETCH, &fetch);   // Process errors, too
-      nflush = fetch.nfetch;       // This many packets to flush when done
-      for (i = 0; i < nflush; i++) {
-         hdr = (struct ubsmon_packet *) &mmap_area[vec[i]];
-         if (hdr->type == '@')     // Filler packet
-            continue;
-         caddr_t data = &mmap_area[vec[i]] + 64;
-         process_packet(hdr, data);
-      }
-   }
-
-Thus, the main idea is to execute only one ioctl per N events.
-
-Although the buffer is circular, the returned headers and data do not cross
-the end of the buffer, so the above pseudo-code does not need any gathering.
diff --git a/Documentation/usb/wusb-design-overview.rst b/Documentation/usb/wusb-design-overview.rst
new file mode 100644
index 000000000000..dc5e21609bb5
--- /dev/null
+++ b/Documentation/usb/wusb-design-overview.rst
@@ -0,0 +1,457 @@
+================================
+Linux UWB + Wireless USB + WiNET
+================================
+
+   Copyright (C) 2005-2006 Intel Corporation
+
+   Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License version
+   2 as published by the Free Software Foundation.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
+
+
+Please visit http://bughost.org/thewiki/Design-overview.txt-1.8 for
+updated content.
+
+    * Design-overview.txt-1.8
+
+This code implements a Ultra Wide Band stack for Linux, as well as
+drivers for the USB based UWB radio controllers defined in the
+Wireless USB 1.0 specification (including Wireless USB host controller
+and an Intel WiNET controller).
+
+.. Contents
+   1. Introduction
+         1. HWA: Host Wire adapters, your Wireless USB dongle
+
+         2. DWA: Device Wired Adaptor, a Wireless USB hub for wired
+            devices
+         3. WHCI: Wireless Host Controller Interface, the PCI WUSB host
+            adapter
+   2. The UWB stack
+         1. Devices and hosts: the basic structure
+
+         2. Host Controller life cycle
+
+         3. On the air: beacons and enumerating the radio neighborhood
+
+         4. Device lists
+         5. Bandwidth allocation
+
+   3. Wireless USB Host Controller drivers
+
+   4. Glossary
+
+
+Introduction
+============
+
+UWB is a wide-band communication protocol that is to serve also as the
+low-level protocol for others (much like TCP sits on IP). Currently
+these others are Wireless USB and TCP/IP, but seems Bluetooth and
+Firewire/1394 are coming along.
+
+UWB uses a band from roughly 3 to 10 GHz, transmitting at a max of
+~-41dB (or 0.074 uW/MHz--geography specific data is still being
+negotiated w/ regulators, so watch for changes). That band is divided in
+a bunch of ~1.5 GHz wide channels (or band groups) composed of three
+subbands/subchannels (528 MHz each). Each channel is independent of each
+other, so you could consider them different "busses". Initially this
+driver considers them all a single one.
+
+Radio time is divided in 65536 us long /superframes/, each one divided
+in 256 256us long /MASs/ (Media Allocation Slots), which are the basic
+time/media allocation units for transferring data. At the beginning of
+each superframe there is a Beacon Period (BP), where every device
+transmit its beacon on a single MAS. The length of the BP depends on how
+many devices are present and the length of their beacons.
+
+Devices have a MAC (fixed, 48 bit address) and a device (changeable, 16
+bit address) and send periodic beacons to advertise themselves and pass
+info on what they are and do. They advertise their capabilities and a
+bunch of other stuff.
+
+The different logical parts of this driver are:
+
+    *
+
+      *UWB*: the Ultra-Wide-Band stack -- manages the radio and
+      associated spectrum to allow for devices sharing it. Allows to
+      control bandwidth assignment, beaconing, scanning, etc
+
+    *
+
+      *WUSB*: the layer that sits on top of UWB to provide Wireless USB.
+      The Wireless USB spec defines means to control a UWB radio and to
+      do the actual WUSB.
+
+
+HWA: Host Wire adapters, your Wireless USB dongle
+-------------------------------------------------
+
+WUSB also defines a device called a Host Wire Adaptor (HWA), which in
+mere terms is a USB dongle that enables your PC to have UWB and Wireless
+USB. The Wireless USB Host Controller in a HWA looks to the host like a
+[Wireless] USB controller connected via USB (!)
+
+The HWA itself is broken in two or three main interfaces:
+
+    *
+
+      *RC*: Radio control -- this implements an interface to the
+      Ultra-Wide-Band radio controller. The driver for this implements a
+      USB-based UWB Radio Controller to the UWB stack.
+
+    *
+
+      *HC*: the wireless USB host controller. It looks like a USB host
+      whose root port is the radio and the WUSB devices connect to it.
+      To the system it looks like a separate USB host. The driver (will)
+      implement a USB host controller (similar to UHCI, OHCI or EHCI)
+      for which the root hub is the radio...To reiterate: it is a USB
+      controller that is connected via USB instead of PCI.
+
+    *
+
+      *WINET*: some HW provide a WiNET interface (IP over UWB). This
+      package provides a driver for it (it looks like a network
+      interface, winetX). The driver detects when there is a link up for
+      their type and kick into gear.
+
+
+DWA: Device Wired Adaptor, a Wireless USB hub for wired devices
+---------------------------------------------------------------
+
+These are the complement to HWAs. They are a USB host for connecting
+wired devices, but it is connected to your PC connected via Wireless
+USB. To the system it looks like yet another USB host. To the untrained
+eye, it looks like a hub that connects upstream wirelessly.
+
+We still offer no support for this; however, it should share a lot of
+code with the HWA-RC driver; there is a bunch of factorization work that
+has been done to support that in upcoming releases.
+
+
+WHCI: Wireless Host Controller Interface, the PCI WUSB host adapter
+-------------------------------------------------------------------
+
+This is your usual PCI device that implements WHCI. Similar in concept
+to EHCI, it allows your wireless USB devices (including DWAs) to connect
+to your host via a PCI interface. As in the case of the HWA, it has a
+Radio Control interface and the WUSB Host Controller interface per se.
+
+There is still no driver support for this, but will be in upcoming
+releases.
+
+
+The UWB stack
+=============
+
+The main mission of the UWB stack is to keep a tally of which devices
+are in radio proximity to allow drivers to connect to them. As well, it
+provides an API for controlling the local radio controllers (RCs from
+now on), such as to start/stop beaconing, scan, allocate bandwidth, etc.
+
+
+Devices and hosts: the basic structure
+--------------------------------------
+
+The main building block here is the UWB device (struct uwb_dev). For
+each device that pops up in radio presence (ie: the UWB host receives a
+beacon from it) you get a struct uwb_dev that will show up in
+/sys/bus/uwb/devices.
+
+For each RC that is detected, a new struct uwb_rc and struct uwb_dev are
+created. An entry is also created in /sys/class/uwb_rc for each RC.
+
+Each RC driver is implemented by a separate driver that plugs into the
+interface that the UWB stack provides through a struct uwb_rc_ops. The
+spec creators have been nice enough to make the message format the same
+for HWA and WHCI RCs, so the driver is really a very thin transport that
+moves the requests from the UWB API to the device [/uwb_rc_ops->cmd()/]
+and sends the replies and notifications back to the API
+[/uwb_rc_neh_grok()/]. Notifications are handled to the UWB daemon, that
+is chartered, among other things, to keep the tab of how the UWB radio
+neighborhood looks, creating and destroying devices as they show up or
+disappear.
+
+Command execution is very simple: a command block is sent and a event
+block or reply is expected back. For sending/receiving command/events, a
+handle called /neh/ (Notification/Event Handle) is opened with
+/uwb_rc_neh_open()/.
+
+The HWA-RC (USB dongle) driver (drivers/uwb/hwa-rc.c) does this job for
+the USB connected HWA. Eventually, drivers/whci-rc.c will do the same
+for the PCI connected WHCI controller.
+
+
+Host Controller life cycle
+--------------------------
+
+So let's say we connect a dongle to the system: it is detected and
+firmware uploaded if needed [for Intel's i1480
+/drivers/uwb/ptc/usb.c:ptc_usb_probe()/] and then it is reenumerated.
+Now we have a real HWA device connected and
+/drivers/uwb/hwa-rc.c:hwarc_probe()/ picks it up, that will set up the
+Wire-Adaptor environment and then suck it into the UWB stack's vision of
+the world [/drivers/uwb/lc-rc.c:uwb_rc_add()/].
+
+    *
+
+      [*] The stack should put a new RC to scan for devices
+      [/uwb_rc_scan()/] so it finds what's available around and tries to
+      connect to them, but this is policy stuff and should be driven
+      from user space. As of now, the operator is expected to do it
+      manually; see the release notes for documentation on the procedure.
+
+When a dongle is disconnected, /drivers/uwb/hwa-rc.c:hwarc_disconnect()/
+takes time of tearing everything down safely (or not...).
+
+
+On the air: beacons and enumerating the radio neighborhood
+----------------------------------------------------------
+
+So assuming we have devices and we have agreed for a channel to connect
+on (let's say 9), we put the new RC to beacon:
+
+    *
+
+            $ echo 9 0 > /sys/class/uwb_rc/uwb0/beacon
+
+Now it is visible. If there were other devices in the same radio channel
+and beacon group (that's what the zero is for), the dongle's radio
+control interface will send beacon notifications on its
+notification/event endpoint (NEEP). The beacon notifications are part of
+the event stream that is funneled into the API with
+/drivers/uwb/neh.c:uwb_rc_neh_grok()/ and delivered to the UWBD, the UWB
+daemon through a notification list.
+
+UWBD wakes up and scans the event list; finds a beacon and adds it to
+the BEACON CACHE (/uwb_beca/). If he receives a number of beacons from
+the same device, he considers it to be 'onair' and creates a new device
+[/drivers/uwb/lc-dev.c:uwbd_dev_onair()/]. Similarly, when no beacons
+are received in some time, the device is considered gone and wiped out
+[uwbd calls periodically /uwb/beacon.c:uwb_beca_purge()/ that will purge
+the beacon cache of dead devices].
+
+
+Device lists
+------------
+
+All UWB devices are kept in the list of the struct bus_type uwb_bus_type.
+
+
+Bandwidth allocation
+--------------------
+
+The UWB stack maintains a local copy of DRP availability through
+processing of incoming *DRP Availability Change* notifications. This
+local copy is currently used to present the current bandwidth
+availability to the user through the sysfs file
+/sys/class/uwb_rc/uwbx/bw_avail. In the future the bandwidth
+availability information will be used by the bandwidth reservation
+routines.
+
+The bandwidth reservation routines are in progress and are thus not
+present in the current release. When completed they will enable a user
+to initiate DRP reservation requests through interaction with sysfs. DRP
+reservation requests from remote UWB devices will also be handled. The
+bandwidth management done by the UWB stack will include callbacks to the
+higher layers will enable the higher layers to use the reservations upon
+completion. [Note: The bandwidth reservation work is in progress and
+subject to change.]
+
+
+Wireless USB Host Controller drivers
+====================================
+
+*WARNING* This section needs a lot of work!
+
+As explained above, there are three different types of HCs in the WUSB
+world: HWA-HC, DWA-HC and WHCI-HC.
+
+HWA-HC and DWA-HC share that they are Wire-Adapters (USB or WUSB
+connected controllers), and their transfer management system is almost
+identical. So is their notification delivery system.
+
+HWA-HC and WHCI-HC share that they are both WUSB host controllers, so
+they have to deal with WUSB device life cycle and maintenance, wireless
+root-hub
+
+HWA exposes a Host Controller interface (HWA-HC 0xe0/02/02). This has
+three endpoints (Notifications, Data Transfer In and Data Transfer
+Out--known as NEP, DTI and DTO in the code).
+
+We reserve UWB bandwidth for our Wireless USB Cluster, create a Cluster
+ID and tell the HC to use all that. Then we start it. This means the HC
+starts sending MMCs.
+
+    *
+
+      The MMCs are blocks of data defined somewhere in the WUSB1.0 spec
+      that define a stream in the UWB channel time allocated for sending
+      WUSB IEs (host to device commands/notifications) and Device
+      Notifications (device initiated to host). Each host defines a
+      unique Wireless USB cluster through MMCs. Devices can connect to a
+      single cluster at the time. The IEs are Information Elements, and
+      among them are the bandwidth allocations that tell each device
+      when can they transmit or receive.
+
+Now it all depends on external stimuli.
+
+New device connection
+---------------------
+
+A new device pops up, it scans the radio looking for MMCs that give out
+the existence of Wireless USB channels. Once one (or more) are found,
+selects which one to connect to. Sends a /DN_Connect/ (device
+notification connect) during the DNTS (Device Notification Time
+Slot--announced in the MMCs
+
+HC picks the /DN_Connect/ out (nep module sends to notif.c for delivery
+into /devconnect/). This process starts the authentication process for
+the device. First we allocate a /fake port/ and assign an
+unauthenticated address (128 to 255--what we really do is
+0x80 | fake_port_idx). We fiddle with the fake port status and /hub_wq/
+sees a new connection, so he moves on to enable the fake port with a reset.
+
+So now we are in the reset path -- we know we have a non-yet enumerated
+device with an unauthorized address; we ask user space to authenticate
+(FIXME: not yet done, similar to bluetooth pairing), then we do the key
+exchange (FIXME: not yet done) and issue a /set address 0/ to bring the
+device to the default state. Device is authenticated.
+
+From here, the USB stack takes control through the usb_hcd ops. hub_wq
+has seen the port status changes, as we have been toggling them. It will
+start enumerating and doing transfers through usb_hcd->urb_enqueue() to
+read descriptors and move our data.
+
+Device life cycle and keep alives
+---------------------------------
+
+Every time there is a successful transfer to/from a device, we update a
+per-device activity timestamp. If not, every now and then we check and
+if the activity timestamp gets old, we ping the device by sending it a
+Keep Alive IE; it responds with a /DN_Alive/ pong during the DNTS (this
+arrives to us as a notification through
+devconnect.c:wusb_handle_dn_alive(). If a device times out, we
+disconnect it from the system (cleaning up internal information and
+toggling the bits in the fake hub port, which kicks hub_wq into removing
+the rest of the stuff).
+
+This is done through devconnect:__wusb_check_devs(), which will scan the
+device list looking for whom needs refreshing.
+
+If the device wants to disconnect, it will either die (ugly) or send a
+/DN_Disconnect/ that will prompt a disconnection from the system.
+
+Sending and receiving data
+--------------------------
+
+Data is sent and received through /Remote Pipes/ (rpipes). An rpipe is
+/aimed/ at an endpoint in a WUSB device. This is the same for HWAs and
+DWAs.
+
+Each HC has a number of rpipes and buffers that can be assigned to them;
+when doing a data transfer (xfer), first the rpipe has to be aimed and
+prepared (buffers assigned), then we can start queueing requests for
+data in or out.
+
+Data buffers have to be segmented out before sending--so we send first a
+header (segment request) and then if there is any data, a data buffer
+immediately after to the DTI interface (yep, even the request). If our
+buffer is bigger than the max segment size, then we just do multiple
+requests.
+
+[This sucks, because doing USB scatter gatter in Linux is resource
+intensive, if any...not that the current approach is not. It just has to
+be cleaned up a lot :)].
+
+If reading, we don't send data buffers, just the segment headers saying
+we want to read segments.
+
+When the xfer is executed, we receive a notification that says data is
+ready in the DTI endpoint (handled through
+xfer.c:wa_handle_notif_xfer()). In there we read from the DTI endpoint a
+descriptor that gives us the status of the transfer, its identification
+(given when we issued it) and the segment number. If it was a data read,
+we issue another URB to read into the destination buffer the chunk of
+data coming out of the remote endpoint. Done, wait for the next guy. The
+callbacks for the URBs issued from here are the ones that will declare
+the xfer complete at some point and call its callback.
+
+Seems simple, but the implementation is not trivial.
+
+    *
+
+      *WARNING* Old!!
+
+The main xfer descriptor, wa_xfer (equivalent to a URB) contains an
+array of segments, tallys on segments and buffers and callback
+information. Buried in there is a lot of URBs for executing the segments
+and buffer transfers.
+
+For OUT xfers, there is an array of segments, one URB for each, another
+one of buffer URB. When submitting, we submit URBs for segment request
+1, buffer 1, segment 2, buffer 2...etc. Then we wait on the DTI for xfer
+result data; when all the segments are complete, we call the callback to
+finalize the transfer.
+
+For IN xfers, we only issue URBs for the segments we want to read and
+then wait for the xfer result data.
+
+URB mapping into xfers
+^^^^^^^^^^^^^^^^^^^^^^
+
+This is done by hwahc_op_urb_[en|de]queue(). In enqueue() we aim an
+rpipe to the endpoint where we have to transmit, create a transfer
+context (wa_xfer) and submit it. When the xfer is done, our callback is
+called and we assign the status bits and release the xfer resources.
+
+In dequeue() we are basically cancelling/aborting the transfer. We issue
+a xfer abort request to the HC, cancel all the URBs we had submitted
+and not yet done and when all that is done, the xfer callback will be
+called--this will call the URB callback.
+
+
+Glossary
+========
+
+*DWA* -- Device Wire Adapter
+
+USB host, wired for downstream devices, upstream connects wirelessly
+with Wireless USB.
+
+*EVENT* -- Response to a command on the NEEP
+
+*HWA* -- Host Wire Adapter / USB dongle for UWB and Wireless USB
+
+*NEH* -- Notification/Event Handle
+
+Handle/file descriptor for receiving notifications or events. The WA
+code requires you to get one of this to listen for notifications or
+events on the NEEP.
+
+*NEEP* -- Notification/Event EndPoint
+
+Stuff related to the management of the first endpoint of a HWA USB
+dongle that is used to deliver an stream of events and notifications to
+the host.
+
+*NOTIFICATION* -- Message coming in the NEEP as response to something.
+
+*RC* -- Radio Control
+
+Design-overview.txt-1.8 (last edited 2006-11-04 12:22:24 by
+InakyPerezGonzalez)
-- 
cgit 


From a67e13e1ee2d62871149e5ec629cf766d4127ff9 Mon Sep 17 00:00:00 2001
From: Jayant Shekhar <jshekhar@codeaurora.org>
Date: Tue, 18 Jun 2019 13:24:11 -0700
Subject: dt-bindings: msm/disp: Introduce interconnect bindings for MDSS on
 SDM845

Add interconnect properties such as interconnect provider specifier
, the edge source and destination ports which are required by the
interconnect API to configure interconnect path for MDSS.

Changes in v2:
	- None

Changes in v3:
	- Remove common property definitions (Rob Herring)

Changes in v4:
	- Use port macros and change port string names (Georgi Djakov)

Changes in v5-v7:
	- None

Signed-off-by: Sravanthi Kollukuduru <skolluku@codeaurora.org>
Signed-off-by: Jayant Shekhar <jshekhar@codeaurora.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 Documentation/devicetree/bindings/display/msm/dpu.txt | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/display/msm/dpu.txt b/Documentation/devicetree/bindings/display/msm/dpu.txt
index ad2e8830324e..a61dd40f3792 100644
--- a/Documentation/devicetree/bindings/display/msm/dpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/dpu.txt
@@ -28,6 +28,11 @@ Required properties:
 - #address-cells: number of address cells for the MDSS children. Should be 1.
 - #size-cells: Should be 1.
 - ranges: parent bus address space is the same as the child bus address space.
+- interconnects : interconnect path specifier for MDSS according to
+  Documentation/devicetree/bindings/interconnect/interconnect.txt. Should be
+  2 paths corresponding to 2 AXI ports.
+- interconnect-names : MDSS will have 2 port names to differentiate between the
+  2 interconnect paths defined with interconnect specifier.
 
 Optional properties:
 - assigned-clocks: list of clock specifiers for clocks needing rate assignment
@@ -86,6 +91,11 @@ Example:
 		interrupt-controller;
 		#interrupt-cells = <1>;
 
+		interconnects = <&rsc_hlos MASTER_MDP0 &rsc_hlos SLAVE_EBI1>,
+				<&rsc_hlos MASTER_MDP1 &rsc_hlos SLAVE_EBI1>;
+
+		interconnect-names = "mdp0-mem", "mdp1-mem";
+
 		iommus = <&apps_iommu 0>;
 
 		#address-cells = <2>;
-- 
cgit 


From 14ccb66b3f585b2bc21e7256c96090abed5a512c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 6 Jun 2019 12:29:01 +0200
Subject: block: remove the bi_phys_segments field in struct bio

We only need the number of segments in the blk-mq submission path.
Remove the field from struct bio, and return it from a variant of
blk_queue_split instead of that it can passed as an argument to
those functions that need the value.

This also means we stop recounting segments except for cloning
and partial segments.

To keep the number of arguments in this how path down remove
pointless struct request_queue arguments from any of the functions
that had it and grew a nr_segs argument.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/biodoc.txt | 1 -
 1 file changed, 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index ac18b488cb5e..31c177663ed5 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -436,7 +436,6 @@ struct bio {
        struct bvec_iter	bi_iter;	/* current index into bio_vec array */
 
        unsigned int	bi_size;     /* total size in bytes */
-       unsigned short 	bi_phys_segments; /* segments after physaddr coalesce*/
        unsigned short	bi_hw_segments; /* segments after DMA remapping */
        unsigned int	bi_max;	     /* max bio_vecs we can hold
                                         used as index into pool */
-- 
cgit 


From 8060c47ba853f147c46bf1e6f6d93d1726fcb57a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 6 Jun 2019 12:26:24 +0200
Subject: block: rename CONFIG_DEBUG_BLK_CGROUP to CONFIG_BFQ_CGROUP_DEBUG

This option is entirely bfq specific, give it an appropinquate name.

Also make it depend on CONFIG_BFQ_GROUP_IOSCHED in Kconfig, as all
the functionality already does so anyway.

Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/bfq-iosched.txt          | 12 ++++++------
 Documentation/cgroup-v1/blkio-controller.txt | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
index 1a0f2ac02eb6..f02163fabf80 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.txt
@@ -38,13 +38,13 @@ stack). To give an idea of the limits with BFQ, on slow or average
 CPUs, here are, first, the limits of BFQ for three different CPUs, on,
 respectively, an average laptop, an old desktop, and a cheap embedded
 system, in case full hierarchical support is enabled (i.e.,
-CONFIG_BFQ_GROUP_IOSCHED is set), but CONFIG_DEBUG_BLK_CGROUP is not
+CONFIG_BFQ_GROUP_IOSCHED is set), but CONFIG_BFQ_CGROUP_DEBUG is not
 set (Section 4-2):
 - Intel i7-4850HQ: 400 KIOPS
 - AMD A8-3850: 250 KIOPS
 - ARM CortexTM-A53 Octa-core: 80 KIOPS
 
-If CONFIG_DEBUG_BLK_CGROUP is set (and of course full hierarchical
+If CONFIG_BFQ_CGROUP_DEBUG is set (and of course full hierarchical
 support is enabled), then the sustainable throughput with BFQ
 decreases, because all blkio.bfq* statistics are created and updated
 (Section 4-2). For BFQ, this leads to the following maximum
@@ -537,19 +537,19 @@ or io.bfq.weight.
 
 As for cgroups-v1 (blkio controller), the exact set of stat files
 created, and kept up-to-date by bfq, depends on whether
-CONFIG_DEBUG_BLK_CGROUP is set. If it is set, then bfq creates all
+CONFIG_BFQ_CGROUP_DEBUG is set. If it is set, then bfq creates all
 the stat files documented in
 Documentation/cgroup-v1/blkio-controller.txt. If, instead,
-CONFIG_DEBUG_BLK_CGROUP is not set, then bfq creates only the files
+CONFIG_BFQ_CGROUP_DEBUG is not set, then bfq creates only the files
 blkio.bfq.io_service_bytes
 blkio.bfq.io_service_bytes_recursive
 blkio.bfq.io_serviced
 blkio.bfq.io_serviced_recursive
 
-The value of CONFIG_DEBUG_BLK_CGROUP greatly influences the maximum
+The value of CONFIG_BFQ_CGROUP_DEBUG greatly influences the maximum
 throughput sustainable with bfq, because updating the blkio.bfq.*
 stats is rather costly, especially for some of the stats enabled by
-CONFIG_DEBUG_BLK_CGROUP.
+CONFIG_BFQ_CGROUP_DEBUG.
 
 Parameters to set
 -----------------
diff --git a/Documentation/cgroup-v1/blkio-controller.txt b/Documentation/cgroup-v1/blkio-controller.txt
index d1a1b7bdd03a..78ec4500f220 100644
--- a/Documentation/cgroup-v1/blkio-controller.txt
+++ b/Documentation/cgroup-v1/blkio-controller.txt
@@ -77,7 +77,7 @@ Various user visible config options
 CONFIG_BLK_CGROUP
 	- Block IO controller.
 
-CONFIG_DEBUG_BLK_CGROUP
+CONFIG_BFQ_CGROUP_DEBUG
 	- Debug help. Right now some additional stats file show up in cgroup
 	  if this option is enabled.
 
@@ -193,13 +193,13 @@ Proportional weight policy files
 	  write, sync or async.
 
 - blkio.avg_queue_size
-	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
+	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
 	  The average queue size for this cgroup over the entire time of this
 	  cgroup's existence. Queue size samples are taken each time one of the
 	  queues of this cgroup gets a timeslice.
 
 - blkio.group_wait_time
-	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
+	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
 	  This is the amount of time the cgroup had to wait since it became busy
 	  (i.e., went from 0 to 1 request queued) to get a timeslice for one of
 	  its queues. This is different from the io_wait_time which is the
@@ -210,7 +210,7 @@ Proportional weight policy files
 	  got a timeslice and will not include the current delta.
 
 - blkio.empty_time
-	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
+	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
 	  This is the amount of time a cgroup spends without any pending
 	  requests when not being served, i.e., it does not include any time
 	  spent idling for one of the queues of the cgroup. This is in
@@ -219,7 +219,7 @@ Proportional weight policy files
 	  time it had a pending request and will not include the current delta.
 
 - blkio.idle_time
-	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
+	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
 	  This is the amount of time spent by the IO scheduler idling for a
 	  given cgroup in anticipation of a better request than the existing ones
 	  from other queues/cgroups. This is in nanoseconds. If this is read
@@ -228,7 +228,7 @@ Proportional weight policy files
 	  the current delta.
 
 - blkio.dequeue
-	- Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. This
+	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y. This
 	  gives the statistics about how many a times a group was dequeued
 	  from service tree of the device. First two fields specify the major
 	  and minor number of the device and third field specifies the number
-- 
cgit 


From 5992b044989daf281a2ab78b2e841cd6bd51d93a Mon Sep 17 00:00:00 2001
From: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Date: Tue, 18 Jun 2019 23:32:01 +0530
Subject: dt-bindings: pci: tegra: Document PCIe DPD pinctrl optional prop

Document PCIe DPD pinctrl optional property to put PEX clk & BIAS pads
in low power mode.

Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Thierry Reding <treding@nvidia.com>
---
 Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt b/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
index 145a4f04194f..7939bca47861 100644
--- a/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
@@ -65,6 +65,14 @@ Required properties:
   - afi
   - pcie_x
 
+Optional properties:
+- pinctrl-names: A list of pinctrl state names. Must contain the following
+  entries:
+  - "default": active state, puts PCIe I/O out of deep power down state
+  - "idle": puts PCIe I/O into deep power down state
+- pinctrl-0: phandle for the default/active state of pin configurations.
+- pinctrl-1: phandle for the idle state of pin configurations.
+
 Required properties on Tegra124 and later (deprecated):
 - phys: Must contain an entry for each entry in phy-names.
 - phy-names: Must include the following entries:
-- 
cgit 


From 0fc8b82f31c4a7bea4c487d380a10d1271bf8d4d Mon Sep 17 00:00:00 2001
From: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Date: Tue, 18 Jun 2019 23:32:04 +0530
Subject: PCI: Add DT binding for "reset-gpios" property

Add DT binding for "reset-gpios" property which supports GPIO based PERST#
signal.

Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Thierry Reding <treding@nvidia.com>
---
 Documentation/devicetree/bindings/pci/pci.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pci/pci.txt b/Documentation/devicetree/bindings/pci/pci.txt
index 92c01db610df..2a5d91024059 100644
--- a/Documentation/devicetree/bindings/pci/pci.txt
+++ b/Documentation/devicetree/bindings/pci/pci.txt
@@ -24,6 +24,9 @@ driver implementation may support the following properties:
    unsupported link speed, for instance, trying to do training for
    unsupported link speed, etc.  Must be '4' for gen4, '3' for gen3, '2'
    for gen2, and '1' for gen1. Any other values are invalid.
+- reset-gpios:
+   If present this property specifies PERST# GPIO. Host drivers can parse the
+   GPIO and apply fundamental reset to endpoints.
 
 PCI-PCI Bridge properties
 -------------------------
-- 
cgit 


From 0ad6be30baa3a3bc69349327d62ec4c188db3364 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 19 Jun 2019 07:39:43 +0200
Subject: docs: fb: Add TER16x32 to the available font names

The new font is available since recently.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/fb/fbcon.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/fb/fbcon.rst b/Documentation/fb/fbcon.rst
index cfb9f7c38f18..1da65b9000de 100644
--- a/Documentation/fb/fbcon.rst
+++ b/Documentation/fb/fbcon.rst
@@ -82,7 +82,7 @@ C. Boot options
 
 	Select the initial font to use. The value 'name' can be any of the
 	compiled-in fonts: 10x18, 6x10, 7x14, Acorn8x8, MINI4x6,
-	PEARL8x8, ProFont6x11, SUN12x22, SUN8x16, VGA8x16, VGA8x8.
+	PEARL8x8, ProFont6x11, SUN12x22, SUN8x16, TER16x32, VGA8x16, VGA8x8.
 
 	Note, not all drivers can handle font with widths not divisible by 8,
 	such as vga16fb.
-- 
cgit 


From 7e6294cdc0b055225f45e66003b33175e06154aa Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 15:51:18 -0300
Subject: docs: trace: add a missing blank line

Sphinx expects a blank line after a literal block markup.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/trace/kprobetrace.rst | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index baa3c42ba2f4..7d2b0178d3f3 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -191,6 +191,7 @@ events, you need to enable it.
 
 Use the following command to start tracing in an interval.
 ::
+
     # echo 1 > tracing_on
     Open something...
     # echo 0 > tracing_on
-- 
cgit 


From 220ee02a0b38726a90430e94714c87550dc3d476 Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Thu, 13 Jun 2019 18:25:48 +0200
Subject: docs: stop suggesting strlcpy

Since strlcpy is deprecated, the documentation shouldn't suggest using
it. This patch fixes the examples to use strscpy instead. It also uses
sizeof instead of underlying constants as far as possible, to simplify
future changes to the corresponding data structures.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Federico Vaga <federico.vaga@vaga.pv.it>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/hid/hid-transport.txt                         | 6 +++---
 Documentation/i2c/instantiating-devices                     | 2 +-
 Documentation/i2c/upgrading-clients                         | 4 ++--
 Documentation/kernel-hacking/locking.rst                    | 6 +++---
 Documentation/translations/it_IT/kernel-hacking/locking.rst | 6 +++---
 5 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/hid/hid-transport.txt b/Documentation/hid/hid-transport.txt
index 3dcba9fd4a3a..4f41d67f1b4b 100644
--- a/Documentation/hid/hid-transport.txt
+++ b/Documentation/hid/hid-transport.txt
@@ -194,9 +194,9 @@ with HID core:
 		goto err_<...>;
 	}
 
-	strlcpy(hid->name, <device-name-src>, 127);
-	strlcpy(hid->phys, <device-phys-src>, 63);
-	strlcpy(hid->uniq, <device-uniq-src>, 63);
+	strscpy(hid->name, <device-name-src>, sizeof(hid->name));
+	strscpy(hid->phys, <device-phys-src>, sizeof(hid->phys));
+	strscpy(hid->uniq, <device-uniq-src>, sizeof(hid->uniq));
 
 	hid->ll_driver = &custom_ll_driver;
 	hid->bus = <device-bus>;
diff --git a/Documentation/i2c/instantiating-devices b/Documentation/i2c/instantiating-devices
index 5a3e2f331e8c..345e9ea8281a 100644
--- a/Documentation/i2c/instantiating-devices
+++ b/Documentation/i2c/instantiating-devices
@@ -137,7 +137,7 @@ static int usb_hcd_nxp_probe(struct platform_device *pdev)
 	(...)
 	i2c_adap = i2c_get_adapter(2);
 	memset(&i2c_info, 0, sizeof(struct i2c_board_info));
-	strlcpy(i2c_info.type, "isp1301_nxp", I2C_NAME_SIZE);
+	strscpy(i2c_info.type, "isp1301_nxp", sizeof(i2c_info.type));
 	isp1301_i2c_client = i2c_new_probed_device(i2c_adap, &i2c_info,
 						   normal_i2c, NULL);
 	i2c_put_adapter(i2c_adap);
diff --git a/Documentation/i2c/upgrading-clients b/Documentation/i2c/upgrading-clients
index ccba3ffd6e80..96392cc5b5c7 100644
--- a/Documentation/i2c/upgrading-clients
+++ b/Documentation/i2c/upgrading-clients
@@ -43,7 +43,7 @@ static int example_attach(struct i2c_adapter *adap, int addr, int kind)
 	example->client.adapter = adap;
 
 	i2c_set_clientdata(&state->i2c_client, state);
-	strlcpy(client->i2c_client.name, "example", I2C_NAME_SIZE);
+	strscpy(client->i2c_client.name, "example", sizeof(client->i2c_client.name));
 
 	ret = i2c_attach_client(&state->i2c_client);
 	if (ret < 0) {
@@ -138,7 +138,7 @@ can be removed:
 -	example->client.flags   = 0;
 -	example->client.adapter = adap;
 -
--	strlcpy(client->i2c_client.name, "example", I2C_NAME_SIZE);
+-	strscpy(client->i2c_client.name, "example", sizeof(client->i2c_client.name));
 
 The i2c_set_clientdata is now:
 
diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst
index 519673df0e82..dc698ea456e0 100644
--- a/Documentation/kernel-hacking/locking.rst
+++ b/Documentation/kernel-hacking/locking.rst
@@ -451,7 +451,7 @@ to protect the cache and all the objects within it. Here's the code::
             if ((obj = kmalloc(sizeof(*obj), GFP_KERNEL)) == NULL)
                     return -ENOMEM;
 
-            strlcpy(obj->name, name, sizeof(obj->name));
+            strscpy(obj->name, name, sizeof(obj->name));
             obj->id = id;
             obj->popularity = 0;
 
@@ -660,7 +660,7 @@ Here is the code::
      }
 
     @@ -63,6 +94,7 @@
-             strlcpy(obj->name, name, sizeof(obj->name));
+             strscpy(obj->name, name, sizeof(obj->name));
              obj->id = id;
              obj->popularity = 0;
     +        obj->refcnt = 1; /* The cache holds a reference */
@@ -774,7 +774,7 @@ the lock is no longer used to protect the reference count itself.
      }
 
     @@ -94,7 +76,7 @@
-             strlcpy(obj->name, name, sizeof(obj->name));
+             strscpy(obj->name, name, sizeof(obj->name));
              obj->id = id;
              obj->popularity = 0;
     -        obj->refcnt = 1; /* The cache holds a reference */
diff --git a/Documentation/translations/it_IT/kernel-hacking/locking.rst b/Documentation/translations/it_IT/kernel-hacking/locking.rst
index 0ef31666663b..5fd8a1abd2be 100644
--- a/Documentation/translations/it_IT/kernel-hacking/locking.rst
+++ b/Documentation/translations/it_IT/kernel-hacking/locking.rst
@@ -468,7 +468,7 @@ e tutti gli oggetti che contiene. Ecco il codice::
             if ((obj = kmalloc(sizeof(*obj), GFP_KERNEL)) == NULL)
                     return -ENOMEM;
 
-            strlcpy(obj->name, name, sizeof(obj->name));
+            strscpy(obj->name, name, sizeof(obj->name));
             obj->id = id;
             obj->popularity = 0;
 
@@ -678,7 +678,7 @@ Ecco il codice::
      }
 
     @@ -63,6 +94,7 @@
-             strlcpy(obj->name, name, sizeof(obj->name));
+             strscpy(obj->name, name, sizeof(obj->name));
              obj->id = id;
              obj->popularity = 0;
     +        obj->refcnt = 1; /* The cache holds a reference */
@@ -792,7 +792,7 @@ contatore stesso.
      }
 
     @@ -94,7 +76,7 @@
-             strlcpy(obj->name, name, sizeof(obj->name));
+             strscpy(obj->name, name, sizeof(obj->name));
              obj->id = id;
              obj->popularity = 0;
     -        obj->refcnt = 1; /* The cache holds a reference */
-- 
cgit 


From 22aac857394c6105803afcb9801e4e4771eb755d Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Tue, 18 Jun 2019 15:56:05 +0100
Subject: docs/vm: hwpoison.rst: Fix quote formatting

The asterisks prepended to the quoted text currently get translated to
bullet points, which gets increasingly confusing the smaller your
screen is (when viewing the sphinx output, that is).

Convert the whole quote to a literal block.

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/vm/hwpoison.rst | 52 +++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/vm/hwpoison.rst b/Documentation/vm/hwpoison.rst
index 09bd24a92784..a5c884293dac 100644
--- a/Documentation/vm/hwpoison.rst
+++ b/Documentation/vm/hwpoison.rst
@@ -13,32 +13,32 @@ kill the processes associated with it and avoid using it in the future.
 
 This patchkit implements the necessary infrastructure in the VM.
 
-To quote the overview comment:
-
- * High level machine check handler. Handles pages reported by the
- * hardware as being corrupted usually due to a 2bit ECC memory or cache
- * failure.
- *
- * This focusses on pages detected as corrupted in the background.
- * When the current CPU tries to consume corruption the currently
- * running process can just be killed directly instead. This implies
- * that if the error cannot be handled for some reason it's safe to
- * just ignore it because no corruption has been consumed yet. Instead
- * when that happens another machine check will happen.
- *
- * Handles page cache pages in various states. The tricky part
- * here is that we can access any page asynchronous to other VM
- * users, because memory failures could happen anytime and anywhere,
- * possibly violating some of their assumptions. This is why this code
- * has to be extremely careful. Generally it tries to use normal locking
- * rules, as in get the standard locks, even if that means the
- * error handling takes potentially a long time.
- *
- * Some of the operations here are somewhat inefficient and have non
- * linear algorithmic complexity, because the data structures have not
- * been optimized for this case. This is in particular the case
- * for the mapping from a vma to a process. Since this case is expected
- * to be rare we hope we can get away with this.
+To quote the overview comment::
+
+	High level machine check handler. Handles pages reported by the
+	hardware as being corrupted usually due to a 2bit ECC memory or cache
+	failure.
+
+	This focusses on pages detected as corrupted in the background.
+	When the current CPU tries to consume corruption the currently
+	running process can just be killed directly instead. This implies
+	that if the error cannot be handled for some reason it's safe to
+	just ignore it because no corruption has been consumed yet. Instead
+	when that happens another machine check will happen.
+
+	Handles page cache pages in various states. The tricky part
+	here is that we can access any page asynchronous to other VM
+	users, because memory failures could happen anytime and anywhere,
+	possibly violating some of their assumptions. This is why this code
+	has to be extremely careful. Generally it tries to use normal locking
+	rules, as in get the standard locks, even if that means the
+	error handling takes potentially a long time.
+
+	Some of the operations here are somewhat inefficient and have non
+	linear algorithmic complexity, because the data structures have not
+	been optimized for this case. This is in particular the case
+	for the mapping from a vma to a process. Since this case is expected
+	to be rare we hope we can get away with this.
 
 The code consists of a the high level handler in mm/memory-failure.c,
 a new page poison bit and various checks in the VM to handle poisoned
-- 
cgit 


From eb8ed28f024fe428a08ee6b7c4de6a31ff6610ad Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 7 Jun 2019 16:14:06 +0100
Subject: Documentation: x86: Contiguous cbm isn't all X86

Since commit 4d05bf71f157 ("x86/resctrl: Introduce AMD QOS feature")
resctrl has supported non-contiguous cache bit masks. The interface
for this is currently try-it-and-see.

Update the documentation to say Intel CPUs have this requirement,
instead of X86.

Cc: Babu Moger <Babu.Moger@amd.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/x86/resctrl_ui.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/x86/resctrl_ui.rst b/Documentation/x86/resctrl_ui.rst
index 225cfd4daaee..066f94e53418 100644
--- a/Documentation/x86/resctrl_ui.rst
+++ b/Documentation/x86/resctrl_ui.rst
@@ -342,7 +342,7 @@ For cache resources we describe the portion of the cache that is available
 for allocation using a bitmask. The maximum value of the mask is defined
 by each cpu model (and may be different for different cache levels). It
 is found using CPUID, but is also provided in the "info" directory of
-the resctrl file system in "info/{resource}/cbm_mask". X86 hardware
+the resctrl file system in "info/{resource}/cbm_mask". Intel hardware
 requires that these masks have all the '1' bits in a contiguous block. So
 0x3, 0x6 and 0xC are legal 4-bit masks with two bits set, but 0x5, 0x9
 and 0xA are not.  On a system with a 20-bit mask each bit represents 5%
-- 
cgit 


From 7c7a49958286fee9a2b18baffc9c626304a00843 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 7 Jun 2019 16:14:07 +0100
Subject: Documentation: x86: Remove cdpl2 unspported statement and fix
 capitalisation

"L2 cache does not support code and data prioritization". This isn't
true, elsewhere the document says it can be enabled with the cdpl2
mount option.

While we're here, these sample strings have lower-case code/data,
which isn't how the kernel exports them.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/x86/resctrl_ui.rst | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/x86/resctrl_ui.rst b/Documentation/x86/resctrl_ui.rst
index 066f94e53418..638cd987937d 100644
--- a/Documentation/x86/resctrl_ui.rst
+++ b/Documentation/x86/resctrl_ui.rst
@@ -418,16 +418,22 @@ L3 schemata file details (CDP enabled via mount option to resctrl)
 When CDP is enabled L3 control is split into two separate resources
 so you can specify independent masks for code and data like this::
 
-	L3data:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
-	L3code:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
+	L3DATA:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
+	L3CODE:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
 
 L2 schemata file details
 ------------------------
-L2 cache does not support code and data prioritization, so the
-schemata format is always::
+CDP is supported at L2 using the 'cdpl2' mount option. The schemata
+format is either::
 
 	L2:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
 
+or
+
+	L2DATA:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
+	L2CODE:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
+
+
 Memory bandwidth Allocation (default mode)
 ------------------------------------------
 
-- 
cgit 


From b5453a8ec310f3ee2f7689958d63d9b8ffcbcd3e Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 7 Jun 2019 16:14:08 +0100
Subject: Documentation: x86: Clarify MBA takes MB as referring to mba_sc

"If the MBA is specified in MB then user can enter the max b/w in MB"
is a tautology. How can the user know if the schemata takes a percentage
or a MB/s value?

This is referring to whether the software controller is interpreting
the schemata's value. Make this clear.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/x86/resctrl_ui.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/x86/resctrl_ui.rst b/Documentation/x86/resctrl_ui.rst
index 638cd987937d..866b66aa289b 100644
--- a/Documentation/x86/resctrl_ui.rst
+++ b/Documentation/x86/resctrl_ui.rst
@@ -677,8 +677,8 @@ allocations can overlap or not. The allocations specifies the maximum
 b/w that the group may be able to use and the system admin can configure
 the b/w accordingly.
 
-If the MBA is specified in MB(megabytes) then user can enter the max b/w in MB
-rather than the percentage values.
+If resctrl is using the software controller (mba_sc) then user can enter the
+max b/w in MB rather than the percentage values.
 ::
 
   # echo "L3:0=3;1=c\nMB:0=1024;1=500" > /sys/fs/resctrl/p0/schemata
-- 
cgit 


From 57794aab8884087debb22fc214d8ca81999ffb0e Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 7 Jun 2019 16:14:09 +0100
Subject: Documentation: x86: fix some typos

These are all obvious typos.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/x86/resctrl_ui.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/x86/resctrl_ui.rst b/Documentation/x86/resctrl_ui.rst
index 866b66aa289b..5368cedfb530 100644
--- a/Documentation/x86/resctrl_ui.rst
+++ b/Documentation/x86/resctrl_ui.rst
@@ -40,7 +40,7 @@ mount options are:
 	Enable the MBA Software Controller(mba_sc) to specify MBA
 	bandwidth in MBps
 
-L2 and L3 CDP are controlled seperately.
+L2 and L3 CDP are controlled separately.
 
 RDT features are orthogonal. A particular system may support only
 monitoring, only control, or both monitoring and control.  Cache
@@ -118,7 +118,7 @@ related to allocation:
 			      Corresponding region is pseudo-locked. No
 			      sharing allowed.
 
-Memory bandwitdh(MB) subdirectory contains the following files
+Memory bandwidth(MB) subdirectory contains the following files
 with respect to allocation:
 
 "min_bandwidth":
@@ -209,7 +209,7 @@ All groups contain the following files:
 	CPUs to/from this group. As with the tasks file a hierarchy is
 	maintained where MON groups may only include CPUs owned by the
 	parent CTRL_MON group.
-	When the resouce group is in pseudo-locked mode this file will
+	When the resource group is in pseudo-locked mode this file will
 	only be readable, reflecting the CPUs associated with the
 	pseudo-locked region.
 
@@ -380,7 +380,7 @@ where L2 external  is 10GBps (hence aggregate L2 external bandwidth is
 240GBps) and L3 external bandwidth is 100GBps. Now a workload with '20
 threads, having 50% bandwidth, each consuming 5GBps' consumes the max L3
 bandwidth of 100GBps although the percentage value specified is only 50%
-<< 100%. Hence increasing the bandwidth percentage will not yeild any
+<< 100%. Hence increasing the bandwidth percentage will not yield any
 more bandwidth. This is because although the L2 external bandwidth still
 has capacity, the L3 external bandwidth is fully used. Also note that
 this would be dependent on number of cores the benchmark is run on.
@@ -398,7 +398,7 @@ In order to mitigate this and make the interface more user friendly,
 resctrl added support for specifying the bandwidth in MBps as well.  The
 kernel underneath would use a software feedback mechanism or a "Software
 Controller(mba_sc)" which reads the actual bandwidth using MBM counters
-and adjust the memowy bandwidth percentages to ensure::
+and adjust the memory bandwidth percentages to ensure::
 
 	"actual bandwidth < user specified bandwidth".
 
-- 
cgit 


From 0f48a2441613d39db50fb12cc739455ef7f1921f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 17 Jun 2019 16:34:00 +0200
Subject: doc-rst: Add missing newline at end of file

"git diff" says:

    \ No newline at end of file

after modifying the file.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/docutils.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/docutils.conf b/Documentation/docutils.conf
index 2830772264c8..f1a180b97dec 100644
--- a/Documentation/docutils.conf
+++ b/Documentation/docutils.conf
@@ -4,4 +4,4 @@
 # http://docutils.sourceforge.net/docs/user/config.html
 
 [general]
-halt_level: severe
\ No newline at end of file
+halt_level: severe
-- 
cgit 


From 2269f0c15191af317d64115176a01bf303532af3 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 16 May 2019 14:08:48 +0200
Subject: dt-bindings: pwm: Convert Allwinner PWM to a schema

The Allwinner SoCs have a PWM controller supported in Linux, with a
matching Device Tree binding.

Now that we have the DT validation in place, let's convert the device tree
bindings for that controller over to a YAML schemas.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
---
 .../bindings/pwm/allwinner,sun4i-a10-pwm.yaml      | 57 ++++++++++++++++++++++
 .../devicetree/bindings/pwm/pwm-sun4i.txt          | 24 ---------
 2 files changed, 57 insertions(+), 24 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml
 delete mode 100644 Documentation/devicetree/bindings/pwm/pwm-sun4i.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml b/Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml
new file mode 100644
index 000000000000..0ac52f83a58c
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/allwinner,sun4i-a10-pwm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 PWM Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  "#pwm-cells":
+    const: 3
+
+  compatible:
+    oneOf:
+      - const: allwinner,sun4i-a10-pwm
+      - const: allwinner,sun5i-a10s-pwm
+      - const: allwinner,sun5i-a13-pwm
+      - const: allwinner,sun7i-a20-pwm
+      - const: allwinner,sun8i-h3-pwm
+      - items:
+          - const: allwinner,sun8i-a83t-pwm
+          - const: allwinner,sun8i-h3-pwm
+      - items:
+          - const: allwinner,sun50i-a64-pwm
+          - const: allwinner,sun5i-a13-pwm
+      - items:
+          - const: allwinner,sun50i-h5-pwm
+          - const: allwinner,sun5i-a13-pwm
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - "#pwm-cells"
+  - compatible
+  - reg
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    pwm: pwm@1c20e00 {
+        compatible = "allwinner,sun7i-a20-pwm";
+        reg = <0x01c20e00 0xc>;
+        clocks = <&osc24M>;
+        #pwm-cells = <3>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt b/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt
deleted file mode 100644
index 2a1affbff45e..000000000000
--- a/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Allwinner sun4i and sun7i SoC PWM controller
-
-Required properties:
-  - compatible: should be one of:
-    - "allwinner,sun4i-a10-pwm"
-    - "allwinner,sun5i-a10s-pwm"
-    - "allwinner,sun5i-a13-pwm"
-    - "allwinner,sun7i-a20-pwm"
-    - "allwinner,sun8i-h3-pwm"
-    - "allwinner,sun50i-a64-pwm", "allwinner,sun5i-a13-pwm"
-    - "allwinner,sun50i-h5-pwm", "allwinner,sun5i-a13-pwm"
-  - reg: physical base address and length of the controller's registers
-  - #pwm-cells: should be 3. See pwm.txt in this directory for a description of
-    the cells format.
-  - clocks: From common clock binding, handle to the parent clock.
-
-Example:
-
-	pwm: pwm@1c20e00 {
-		compatible = "allwinner,sun7i-a20-pwm";
-		reg = <0x01c20e00 0xc>;
-		clocks = <&osc24M>;
-		#pwm-cells = <3>;
-	};
-- 
cgit 


From 4a96895f74c9633b51427fd080ab70fa62b65bc4 Mon Sep 17 00:00:00 2001
From: Yegor Yefremov <yegorslists@googlemail.com>
Date: Thu, 20 Jun 2019 08:24:20 +0200
Subject: tty/serial/8250: use mctrl_gpio helpers

This patch permits the usage for GPIOs to control
the CTS/RTS/DTR/DSR/DCD/RI signals.

Changed by Stefan:
Only call mctrl_gpio_init(), if the device has no ACPI companion device
to not break existing ACPI based systems. Also only use the mctrl_gpio_
functions when "gpios" is available.

Use MSR / MCR <-> TIOCM wrapper functions.

Signed-off-by: Yegor Yefremov <yegorslists@googlemail.com>
Signed-off-by: Stefan Roese <sr@denx.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Yegor Yefremov <yegorslists@googlemail.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Giulio Benetti <giulio.benetti@micronovasrl.com>
Cc: Yegor Yefremov <yegorslists@googlemail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/serial/8250.txt | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/serial/8250.txt b/Documentation/devicetree/bindings/serial/8250.txt
index 3cba12f855b7..20d351f268ef 100644
--- a/Documentation/devicetree/bindings/serial/8250.txt
+++ b/Documentation/devicetree/bindings/serial/8250.txt
@@ -53,6 +53,9 @@ Optional properties:
   programmable TX FIFO thresholds.
 - resets : phandle + reset specifier pairs
 - overrun-throttle-ms : how long to pause uart rx when input overrun is encountered.
+- {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD
+  line respectively. It will use specified GPIO instead of the peripheral
+  function pin for the UART feature. If unsure, don't specify this property.
 
 Note:
 * fsl,ns16550:
@@ -74,3 +77,19 @@ Example:
 		interrupts = <10>;
 		reg-shift = <2>;
 	};
+
+Example for OMAP UART using GPIO-based modem control signals:
+
+	uart4: serial@49042000 {
+		compatible = "ti,omap3-uart";
+		reg = <0x49042000 0x400>;
+		interrupts = <80>;
+		ti,hwmods = "uart4";
+		clock-frequency = <48000000>;
+		cts-gpios = <&gpio3 5 GPIO_ACTIVE_LOW>;
+		rts-gpios = <&gpio3 6 GPIO_ACTIVE_LOW>;
+		dtr-gpios = <&gpio1 12 GPIO_ACTIVE_LOW>;
+		dsr-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;
+		dcd-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>;
+		rng-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
+	};
-- 
cgit 


From 7e31d8215fd8cb1c13b47e23f1136545010e00de Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Sun, 9 Jun 2019 23:17:02 +0900
Subject: Documentation: nvme: add an example for nvme fault injection

This adds an example of how to inject errors into admin commands.

Suggested-by: Thomas Tai <thomas.tai@oracle.com>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Minwoo Im <minwoo.im@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 .../fault-injection/nvme-fault-injection.txt       | 56 ++++++++++++++++++++++
 1 file changed, 56 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/fault-injection/nvme-fault-injection.txt b/Documentation/fault-injection/nvme-fault-injection.txt
index 8fbf3bf60b62..efcb339a3add 100644
--- a/Documentation/fault-injection/nvme-fault-injection.txt
+++ b/Documentation/fault-injection/nvme-fault-injection.txt
@@ -114,3 +114,59 @@ R13: ffff88011a3c9680 R14: 0000000000000000 R15: 0000000000000000
   cpu_startup_entry+0x6f/0x80
   start_secondary+0x187/0x1e0
   secondary_startup_64+0xa5/0xb0
+
+Example 3: Inject an error into the 10th admin command
+------------------------------------------------------
+
+echo 100 > /sys/kernel/debug/nvme0/fault_inject/probability
+echo 10 > /sys/kernel/debug/nvme0/fault_inject/space
+echo 1 > /sys/kernel/debug/nvme0/fault_inject/times
+nvme reset /dev/nvme0
+
+Expected Result:
+
+After NVMe controller reset, the reinitialization may or may not succeed.
+It depends on which admin command is actually forced to fail.
+
+Message from dmesg:
+
+nvme nvme0: resetting controller
+FAULT_INJECTION: forcing a failure.
+name fault_inject, interval 1, probability 100, space 1, times 1
+CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.2.0-rc2+ #2
+Hardware name: MSI MS-7A45/B150M MORTAR ARCTIC (MS-7A45), BIOS 1.50 04/25/2017
+Call Trace:
+ <IRQ>
+ dump_stack+0x63/0x85
+ should_fail+0x14a/0x170
+ nvme_should_fail+0x38/0x80 [nvme_core]
+ nvme_irq+0x129/0x280 [nvme]
+ ? blk_mq_end_request+0xb3/0x120
+ __handle_irq_event_percpu+0x84/0x1a0
+ handle_irq_event_percpu+0x32/0x80
+ handle_irq_event+0x3b/0x60
+ handle_edge_irq+0x7f/0x1a0
+ handle_irq+0x20/0x30
+ do_IRQ+0x4e/0xe0
+ common_interrupt+0xf/0xf
+ </IRQ>
+RIP: 0010:cpuidle_enter_state+0xc5/0x460
+Code: ff e8 8f 5f 86 ff 80 7d c7 00 74 17 9c 58 0f 1f 44 00 00 f6 c4 02 0f 85 69 03 00 00 31 ff e8 62 aa 8c ff fb 66 0f 1f 44 00 00 <45> 85 ed 0f 88 37 03 00 00 4c 8b 45 d0 4c 2b 45 b8 48 ba cf f7 53
+RSP: 0018:ffffffff88c03dd0 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffdc
+RAX: ffff9dac25a2ac80 RBX: ffffffff88d53760 RCX: 000000000000001f
+RDX: 0000000000000000 RSI: 000000002d958403 RDI: 0000000000000000
+RBP: ffffffff88c03e18 R08: fffffff75e35ffb7 R09: 00000a49a56c0b48
+R10: ffffffff88c03da0 R11: 0000000000001b0c R12: ffff9dac25a34d00
+R13: 0000000000000006 R14: 0000000000000006 R15: ffffffff88d53760
+ cpuidle_enter+0x2e/0x40
+ call_cpuidle+0x23/0x40
+ do_idle+0x201/0x280
+ cpu_startup_entry+0x1d/0x20
+ rest_init+0xaa/0xb0
+ arch_call_rest_init+0xe/0x1b
+ start_kernel+0x51c/0x53b
+ x86_64_start_reservations+0x24/0x26
+ x86_64_start_kernel+0x74/0x77
+ secondary_startup_64+0xa4/0xb0
+nvme nvme0: Could not set queue count (16385)
+nvme nvme0: IO queues not created
-- 
cgit 


From 69bc586518e0902493c42b77652fa712fae3480f Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das@bp.renesas.com>
Date: Fri, 7 Jun 2019 08:03:36 +0100
Subject: dt-bindings: PCI: rcar: Add device tree support for r8a774a1

Add PCIe support for the RZ/G2M (a.k.a. R8A774A1).

Signed-off-by: Biju Das <biju.das@bp.renesas.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
---
 Documentation/devicetree/bindings/pci/rcar-pci.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pci/rcar-pci.txt b/Documentation/devicetree/bindings/pci/rcar-pci.txt
index 6904882a0e94..45bba9f88a51 100644
--- a/Documentation/devicetree/bindings/pci/rcar-pci.txt
+++ b/Documentation/devicetree/bindings/pci/rcar-pci.txt
@@ -3,6 +3,7 @@
 Required properties:
 compatible: "renesas,pcie-r8a7743" for the R8A7743 SoC;
 	    "renesas,pcie-r8a7744" for the R8A7744 SoC;
+	    "renesas,pcie-r8a774a1" for the R8A774A1 SoC;
 	    "renesas,pcie-r8a774c0" for the R8A774C0 SoC;
 	    "renesas,pcie-r8a7779" for the R8A7779 SoC;
 	    "renesas,pcie-r8a7790" for the R8A7790 SoC;
-- 
cgit 


From 51cc0da52e16ef487e3b9f73ad0ba8243684e73d Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Thu, 20 Jun 2019 13:46:36 +0200
Subject: dt-bindings: phy-pxa-usb: add bindings

This is the PHY chip for USB OTG on PXA platforms.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 Documentation/devicetree/bindings/phy/phy-pxa-usb.txt | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/phy/phy-pxa-usb.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt b/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt
new file mode 100644
index 000000000000..93fc09c12954
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt
@@ -0,0 +1,18 @@
+Marvell PXA USB PHY
+-------------------
+
+Required properties:
+- compatible: one of: "marvell,mmp2-usb-phy", "marvell,pxa910-usb-phy",
+	"marvell,pxa168-usb-phy",
+- #phy-cells: must be 0
+
+Example:
+	usb-phy: usbphy@d4207000 {
+		compatible = "marvell,mmp2-usb-phy";
+		reg = <0xd4207000 0x40>;
+		#phy-cells = <0>;
+		status = "okay";
+	};
+
+This document explains the device tree binding. For general
+information about PHY subsystem refer to Documentation/phy.txt
-- 
cgit 


From 00fcc69d6e52643fc5ecd04ff7e2d560ed3e2ec0 Mon Sep 17 00:00:00 2001
From: Guido Günther <agx@sigxcpu.org>
Date: Thu, 20 Jun 2019 21:42:36 +0200
Subject: dt-bindings: phy: Add documentation for mixel dphy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for the MIXEL DPHY IP as found on NXP's i.MX8MQ SoCs.

Signed-off-by: Guido Günther <agx@sigxcpu.org>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 .../devicetree/bindings/phy/mixel,mipi-dsi-phy.txt | 29 ++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/phy/mixel,mipi-dsi-phy.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/phy/mixel,mipi-dsi-phy.txt b/Documentation/devicetree/bindings/phy/mixel,mipi-dsi-phy.txt
new file mode 100644
index 000000000000..9b23407233c0
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/mixel,mipi-dsi-phy.txt
@@ -0,0 +1,29 @@
+Mixel DSI PHY for i.MX8
+
+The Mixel MIPI-DSI PHY IP block is e.g. found on i.MX8 platforms (along the
+MIPI-DSI IP from Northwest Logic). It represents the physical layer for the
+electrical signals for DSI.
+
+Required properties:
+- compatible: Must be:
+  - "fsl,imx8mq-mipi-dphy"
+- clocks: Must contain an entry for each entry in clock-names.
+- clock-names: Must contain the following entries:
+  - "phy_ref": phandle and specifier referring to the DPHY ref clock
+- reg: the register range of the PHY controller
+- #phy-cells: number of cells in PHY, as defined in
+  Documentation/devicetree/bindings/phy/phy-bindings.txt
+  this must be <0>
+
+Optional properties:
+- power-domains: phandle to power domain
+
+Example:
+	dphy: dphy@30a0030 {
+		compatible = "fsl,imx8mq-mipi-dphy";
+		clocks = <&clk IMX8MQ_CLK_DSI_PHY_REF>;
+		clock-names = "phy_ref";
+		reg = <0x30a00300 0x100>;
+		power-domains = <&pd_mipi0>;
+		#phy-cells = <0>;
+        };
-- 
cgit 


From 235e6e0a8d5fd1d0285a8d13fbaa3b92112b877a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 21 Jun 2019 15:28:35 +0200
Subject: remove Documentation/index.rst.rej

It got added accidentally by a previous patch that I had to "hand
apply".

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/index.rst.rej | 10 ----------
 1 file changed, 10 deletions(-)
 delete mode 100644 Documentation/index.rst.rej

(limited to 'Documentation')

diff --git a/Documentation/index.rst.rej b/Documentation/index.rst.rej
deleted file mode 100644
index b1af6eab22f3..000000000000
--- a/Documentation/index.rst.rej
+++ /dev/null
@@ -1,10 +0,0 @@
---- Documentation/index.rst
-+++ Documentation/index.rst
-@@ -103,6 +103,7 @@ needed).
-    vm/index
-    bpf/index
-    PCI/index
-+   usb/index
-    misc-devices/index
- 
- Architecture-specific documentation
-- 
cgit 


From 4489f161b739f01ab60a58784f6ef7de9d7a1352 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 17:53:27 -0300
Subject: docs: driver-model: convert docs to ReST and rename to *.rst

Convert the various documents at the driver-model, preparing
them to be part of the driver-api book.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> # ice
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/gpio/driver.rst       |   2 +-
 Documentation/driver-model/binding.rst         |  98 ++++++
 Documentation/driver-model/binding.txt         |  98 ------
 Documentation/driver-model/bus.rst             | 146 ++++++++
 Documentation/driver-model/bus.txt             | 143 --------
 Documentation/driver-model/class.rst           | 149 ++++++++
 Documentation/driver-model/class.txt           | 147 --------
 Documentation/driver-model/design-patterns.rst | 116 +++++++
 Documentation/driver-model/design-patterns.txt | 116 -------
 Documentation/driver-model/device.rst          | 109 ++++++
 Documentation/driver-model/device.txt          | 106 ------
 Documentation/driver-model/devres.rst          | 414 +++++++++++++++++++++++
 Documentation/driver-model/devres.txt          | 412 -----------------------
 Documentation/driver-model/driver.rst          | 223 ++++++++++++
 Documentation/driver-model/driver.txt          | 215 ------------
 Documentation/driver-model/index.rst           |  26 ++
 Documentation/driver-model/overview.rst        | 124 +++++++
 Documentation/driver-model/overview.txt        | 123 -------
 Documentation/driver-model/platform.rst        | 246 ++++++++++++++
 Documentation/driver-model/platform.txt        | 244 --------------
 Documentation/driver-model/porting.rst         | 448 +++++++++++++++++++++++++
 Documentation/driver-model/porting.txt         | 447 ------------------------
 Documentation/eisa.txt                         |   4 +-
 Documentation/hwmon/submitting-patches.rst     |   2 +-
 24 files changed, 2103 insertions(+), 2055 deletions(-)
 create mode 100644 Documentation/driver-model/binding.rst
 delete mode 100644 Documentation/driver-model/binding.txt
 create mode 100644 Documentation/driver-model/bus.rst
 delete mode 100644 Documentation/driver-model/bus.txt
 create mode 100644 Documentation/driver-model/class.rst
 delete mode 100644 Documentation/driver-model/class.txt
 create mode 100644 Documentation/driver-model/design-patterns.rst
 delete mode 100644 Documentation/driver-model/design-patterns.txt
 create mode 100644 Documentation/driver-model/device.rst
 delete mode 100644 Documentation/driver-model/device.txt
 create mode 100644 Documentation/driver-model/devres.rst
 delete mode 100644 Documentation/driver-model/devres.txt
 create mode 100644 Documentation/driver-model/driver.rst
 delete mode 100644 Documentation/driver-model/driver.txt
 create mode 100644 Documentation/driver-model/index.rst
 create mode 100644 Documentation/driver-model/overview.rst
 delete mode 100644 Documentation/driver-model/overview.txt
 create mode 100644 Documentation/driver-model/platform.rst
 delete mode 100644 Documentation/driver-model/platform.txt
 create mode 100644 Documentation/driver-model/porting.rst
 delete mode 100644 Documentation/driver-model/porting.txt

(limited to 'Documentation')

diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index 1ce7fcd0f989..f931597fe7be 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -399,7 +399,7 @@ symbol:
   will pass the struct gpio_chip* for the chip to all IRQ callbacks, so the
   callbacks need to embed the gpio_chip in its state container and obtain a
   pointer to the container using container_of().
-  (See Documentation/driver-model/design-patterns.txt)
+  (See Documentation/driver-model/design-patterns.rst)
 
 - gpiochip_irqchip_add_nested(): adds a nested cascaded irqchip to a gpiochip,
   as discussed above regarding different types of cascaded irqchips. The
diff --git a/Documentation/driver-model/binding.rst b/Documentation/driver-model/binding.rst
new file mode 100644
index 000000000000..7ea1d7a41e1d
--- /dev/null
+++ b/Documentation/driver-model/binding.rst
@@ -0,0 +1,98 @@
+==============
+Driver Binding
+==============
+
+Driver binding is the process of associating a device with a device
+driver that can control it. Bus drivers have typically handled this
+because there have been bus-specific structures to represent the
+devices and the drivers. With generic device and device driver
+structures, most of the binding can take place using common code.
+
+
+Bus
+~~~
+
+The bus type structure contains a list of all devices that are on that bus
+type in the system. When device_register is called for a device, it is
+inserted into the end of this list. The bus object also contains a
+list of all drivers of that bus type. When driver_register is called
+for a driver, it is inserted at the end of this list. These are the
+two events which trigger driver binding.
+
+
+device_register
+~~~~~~~~~~~~~~~
+
+When a new device is added, the bus's list of drivers is iterated over
+to find one that supports it. In order to determine that, the device
+ID of the device must match one of the device IDs that the driver
+supports. The format and semantics for comparing IDs is bus-specific.
+Instead of trying to derive a complex state machine and matching
+algorithm, it is up to the bus driver to provide a callback to compare
+a device against the IDs of a driver. The bus returns 1 if a match was
+found; 0 otherwise.
+
+int match(struct device * dev, struct device_driver * drv);
+
+If a match is found, the device's driver field is set to the driver
+and the driver's probe callback is called. This gives the driver a
+chance to verify that it really does support the hardware, and that
+it's in a working state.
+
+Device Class
+~~~~~~~~~~~~
+
+Upon the successful completion of probe, the device is registered with
+the class to which it belongs. Device drivers belong to one and only one
+class, and that is set in the driver's devclass field.
+devclass_add_device is called to enumerate the device within the class
+and actually register it with the class, which happens with the
+class's register_dev callback.
+
+
+Driver
+~~~~~~
+
+When a driver is attached to a device, the device is inserted into the
+driver's list of devices.
+
+
+sysfs
+~~~~~
+
+A symlink is created in the bus's 'devices' directory that points to
+the device's directory in the physical hierarchy.
+
+A symlink is created in the driver's 'devices' directory that points
+to the device's directory in the physical hierarchy.
+
+A directory for the device is created in the class's directory. A
+symlink is created in that directory that points to the device's
+physical location in the sysfs tree.
+
+A symlink can be created (though this isn't done yet) in the device's
+physical directory to either its class directory, or the class's
+top-level directory. One can also be created to point to its driver's
+directory also.
+
+
+driver_register
+~~~~~~~~~~~~~~~
+
+The process is almost identical for when a new driver is added.
+The bus's list of devices is iterated over to find a match. Devices
+that already have a driver are skipped. All the devices are iterated
+over, to bind as many devices as possible to the driver.
+
+
+Removal
+~~~~~~~
+
+When a device is removed, the reference count for it will eventually
+go to 0. When it does, the remove callback of the driver is called. It
+is removed from the driver's list of devices and the reference count
+of the driver is decremented. All symlinks between the two are removed.
+
+When a driver is removed, the list of devices that it supports is
+iterated over, and the driver's remove callback is called for each
+one. The device is removed from that list and the symlinks removed.
diff --git a/Documentation/driver-model/binding.txt b/Documentation/driver-model/binding.txt
deleted file mode 100644
index abfc8e290d53..000000000000
--- a/Documentation/driver-model/binding.txt
+++ /dev/null
@@ -1,98 +0,0 @@
-
-Driver Binding
-
-Driver binding is the process of associating a device with a device
-driver that can control it. Bus drivers have typically handled this
-because there have been bus-specific structures to represent the
-devices and the drivers. With generic device and device driver
-structures, most of the binding can take place using common code.
-
-
-Bus
-~~~
-
-The bus type structure contains a list of all devices that are on that bus
-type in the system. When device_register is called for a device, it is
-inserted into the end of this list. The bus object also contains a
-list of all drivers of that bus type. When driver_register is called
-for a driver, it is inserted at the end of this list. These are the
-two events which trigger driver binding.
-
-
-device_register
-~~~~~~~~~~~~~~~
-
-When a new device is added, the bus's list of drivers is iterated over
-to find one that supports it. In order to determine that, the device
-ID of the device must match one of the device IDs that the driver
-supports. The format and semantics for comparing IDs is bus-specific. 
-Instead of trying to derive a complex state machine and matching
-algorithm, it is up to the bus driver to provide a callback to compare
-a device against the IDs of a driver. The bus returns 1 if a match was
-found; 0 otherwise.
-
-int match(struct device * dev, struct device_driver * drv);
-
-If a match is found, the device's driver field is set to the driver
-and the driver's probe callback is called. This gives the driver a
-chance to verify that it really does support the hardware, and that
-it's in a working state. 
-
-Device Class
-~~~~~~~~~~~~
-
-Upon the successful completion of probe, the device is registered with
-the class to which it belongs. Device drivers belong to one and only one
-class, and that is set in the driver's devclass field. 
-devclass_add_device is called to enumerate the device within the class
-and actually register it with the class, which happens with the
-class's register_dev callback.
-
-
-Driver
-~~~~~~
-
-When a driver is attached to a device, the device is inserted into the
-driver's list of devices. 
-
-
-sysfs
-~~~~~
-
-A symlink is created in the bus's 'devices' directory that points to
-the device's directory in the physical hierarchy.
-
-A symlink is created in the driver's 'devices' directory that points
-to the device's directory in the physical hierarchy.
-
-A directory for the device is created in the class's directory. A
-symlink is created in that directory that points to the device's
-physical location in the sysfs tree. 
-
-A symlink can be created (though this isn't done yet) in the device's
-physical directory to either its class directory, or the class's
-top-level directory. One can also be created to point to its driver's
-directory also. 
-
-
-driver_register
-~~~~~~~~~~~~~~~
-
-The process is almost identical for when a new driver is added. 
-The bus's list of devices is iterated over to find a match. Devices
-that already have a driver are skipped. All the devices are iterated
-over, to bind as many devices as possible to the driver.
-
-
-Removal
-~~~~~~~
-
-When a device is removed, the reference count for it will eventually
-go to 0. When it does, the remove callback of the driver is called. It
-is removed from the driver's list of devices and the reference count
-of the driver is decremented. All symlinks between the two are removed.
-
-When a driver is removed, the list of devices that it supports is
-iterated over, and the driver's remove callback is called for each
-one. The device is removed from that list and the symlinks removed. 
-
diff --git a/Documentation/driver-model/bus.rst b/Documentation/driver-model/bus.rst
new file mode 100644
index 000000000000..016b15a6e8ea
--- /dev/null
+++ b/Documentation/driver-model/bus.rst
@@ -0,0 +1,146 @@
+=========
+Bus Types
+=========
+
+Definition
+~~~~~~~~~~
+See the kerneldoc for the struct bus_type.
+
+int bus_register(struct bus_type * bus);
+
+
+Declaration
+~~~~~~~~~~~
+
+Each bus type in the kernel (PCI, USB, etc) should declare one static
+object of this type. They must initialize the name field, and may
+optionally initialize the match callback::
+
+   struct bus_type pci_bus_type = {
+          .name	= "pci",
+          .match	= pci_bus_match,
+   };
+
+The structure should be exported to drivers in a header file:
+
+extern struct bus_type pci_bus_type;
+
+
+Registration
+~~~~~~~~~~~~
+
+When a bus driver is initialized, it calls bus_register. This
+initializes the rest of the fields in the bus object and inserts it
+into a global list of bus types. Once the bus object is registered,
+the fields in it are usable by the bus driver.
+
+
+Callbacks
+~~~~~~~~~
+
+match(): Attaching Drivers to Devices
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The format of device ID structures and the semantics for comparing
+them are inherently bus-specific. Drivers typically declare an array
+of device IDs of devices they support that reside in a bus-specific
+driver structure.
+
+The purpose of the match callback is to give the bus an opportunity to
+determine if a particular driver supports a particular device by
+comparing the device IDs the driver supports with the device ID of a
+particular device, without sacrificing bus-specific functionality or
+type-safety.
+
+When a driver is registered with the bus, the bus's list of devices is
+iterated over, and the match callback is called for each device that
+does not have a driver associated with it.
+
+
+
+Device and Driver Lists
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The lists of devices and drivers are intended to replace the local
+lists that many buses keep. They are lists of struct devices and
+struct device_drivers, respectively. Bus drivers are free to use the
+lists as they please, but conversion to the bus-specific type may be
+necessary.
+
+The LDM core provides helper functions for iterating over each list::
+
+  int bus_for_each_dev(struct bus_type * bus, struct device * start,
+		       void * data,
+		       int (*fn)(struct device *, void *));
+
+  int bus_for_each_drv(struct bus_type * bus, struct device_driver * start,
+		       void * data, int (*fn)(struct device_driver *, void *));
+
+These helpers iterate over the respective list, and call the callback
+for each device or driver in the list. All list accesses are
+synchronized by taking the bus's lock (read currently). The reference
+count on each object in the list is incremented before the callback is
+called; it is decremented after the next object has been obtained. The
+lock is not held when calling the callback.
+
+
+sysfs
+~~~~~~~~
+There is a top-level directory named 'bus'.
+
+Each bus gets a directory in the bus directory, along with two default
+directories::
+
+	/sys/bus/pci/
+	|-- devices
+	`-- drivers
+
+Drivers registered with the bus get a directory in the bus's drivers
+directory::
+
+	/sys/bus/pci/
+	|-- devices
+	`-- drivers
+	    |-- Intel ICH
+	    |-- Intel ICH Joystick
+	    |-- agpgart
+	    `-- e100
+
+Each device that is discovered on a bus of that type gets a symlink in
+the bus's devices directory to the device's directory in the physical
+hierarchy::
+
+	/sys/bus/pci/
+	|-- devices
+	|   |-- 00:00.0 -> ../../../root/pci0/00:00.0
+	|   |-- 00:01.0 -> ../../../root/pci0/00:01.0
+	|   `-- 00:02.0 -> ../../../root/pci0/00:02.0
+	`-- drivers
+
+
+Exporting Attributes
+~~~~~~~~~~~~~~~~~~~~
+
+::
+
+  struct bus_attribute {
+	struct attribute	attr;
+	ssize_t (*show)(struct bus_type *, char * buf);
+	ssize_t (*store)(struct bus_type *, const char * buf, size_t count);
+  };
+
+Bus drivers can export attributes using the BUS_ATTR_RW macro that works
+similarly to the DEVICE_ATTR_RW macro for devices. For example, a
+definition like this::
+
+	static BUS_ATTR_RW(debug);
+
+is equivalent to declaring::
+
+	static bus_attribute bus_attr_debug;
+
+This can then be used to add and remove the attribute from the bus's
+sysfs directory using::
+
+	int bus_create_file(struct bus_type *, struct bus_attribute *);
+	void bus_remove_file(struct bus_type *, struct bus_attribute *);
diff --git a/Documentation/driver-model/bus.txt b/Documentation/driver-model/bus.txt
deleted file mode 100644
index c247b488a567..000000000000
--- a/Documentation/driver-model/bus.txt
+++ /dev/null
@@ -1,143 +0,0 @@
-
-Bus Types 
-
-Definition
-~~~~~~~~~~
-See the kerneldoc for the struct bus_type.
-
-int bus_register(struct bus_type * bus);
-
-
-Declaration
-~~~~~~~~~~~
-
-Each bus type in the kernel (PCI, USB, etc) should declare one static
-object of this type. They must initialize the name field, and may
-optionally initialize the match callback.
-
-struct bus_type pci_bus_type = {
-       .name	= "pci",
-       .match	= pci_bus_match,
-};
-
-The structure should be exported to drivers in a header file:
-
-extern struct bus_type pci_bus_type;
-
-
-Registration
-~~~~~~~~~~~~
-
-When a bus driver is initialized, it calls bus_register. This
-initializes the rest of the fields in the bus object and inserts it
-into a global list of bus types. Once the bus object is registered, 
-the fields in it are usable by the bus driver. 
-
-
-Callbacks
-~~~~~~~~~
-
-match(): Attaching Drivers to Devices
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The format of device ID structures and the semantics for comparing
-them are inherently bus-specific. Drivers typically declare an array
-of device IDs of devices they support that reside in a bus-specific
-driver structure. 
-
-The purpose of the match callback is to give the bus an opportunity to
-determine if a particular driver supports a particular device by
-comparing the device IDs the driver supports with the device ID of a
-particular device, without sacrificing bus-specific functionality or
-type-safety. 
-
-When a driver is registered with the bus, the bus's list of devices is
-iterated over, and the match callback is called for each device that
-does not have a driver associated with it. 
-
-
-
-Device and Driver Lists
-~~~~~~~~~~~~~~~~~~~~~~~
-
-The lists of devices and drivers are intended to replace the local
-lists that many buses keep. They are lists of struct devices and
-struct device_drivers, respectively. Bus drivers are free to use the
-lists as they please, but conversion to the bus-specific type may be
-necessary. 
-
-The LDM core provides helper functions for iterating over each list.
-
-int bus_for_each_dev(struct bus_type * bus, struct device * start, void * data,
-		     int (*fn)(struct device *, void *));
-
-int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, 
-		     void * data, int (*fn)(struct device_driver *, void *));
-
-These helpers iterate over the respective list, and call the callback
-for each device or driver in the list. All list accesses are
-synchronized by taking the bus's lock (read currently). The reference
-count on each object in the list is incremented before the callback is
-called; it is decremented after the next object has been obtained. The
-lock is not held when calling the callback. 
-
-
-sysfs
-~~~~~~~~
-There is a top-level directory named 'bus'.
-
-Each bus gets a directory in the bus directory, along with two default
-directories:
-
-	/sys/bus/pci/
-	|-- devices
-	`-- drivers
-
-Drivers registered with the bus get a directory in the bus's drivers
-directory:
-
-	/sys/bus/pci/
-	|-- devices
-	`-- drivers
-	    |-- Intel ICH
-	    |-- Intel ICH Joystick
-	    |-- agpgart
-	    `-- e100
-
-Each device that is discovered on a bus of that type gets a symlink in
-the bus's devices directory to the device's directory in the physical
-hierarchy:
-
-	/sys/bus/pci/
-	|-- devices
-	|   |-- 00:00.0 -> ../../../root/pci0/00:00.0
-	|   |-- 00:01.0 -> ../../../root/pci0/00:01.0
-	|   `-- 00:02.0 -> ../../../root/pci0/00:02.0
-	`-- drivers
-
-
-Exporting Attributes
-~~~~~~~~~~~~~~~~~~~~
-struct bus_attribute {
-	struct attribute	attr;
-	ssize_t (*show)(struct bus_type *, char * buf);
-	ssize_t (*store)(struct bus_type *, const char * buf, size_t count);
-};
-
-Bus drivers can export attributes using the BUS_ATTR_RW macro that works
-similarly to the DEVICE_ATTR_RW macro for devices. For example, a
-definition like this:
-
-static BUS_ATTR_RW(debug);
-
-is equivalent to declaring:
-
-static bus_attribute bus_attr_debug;
-
-This can then be used to add and remove the attribute from the bus's
-sysfs directory using:
-
-int bus_create_file(struct bus_type *, struct bus_attribute *);
-void bus_remove_file(struct bus_type *, struct bus_attribute *);
-
-
diff --git a/Documentation/driver-model/class.rst b/Documentation/driver-model/class.rst
new file mode 100644
index 000000000000..fff55b80e86a
--- /dev/null
+++ b/Documentation/driver-model/class.rst
@@ -0,0 +1,149 @@
+==============
+Device Classes
+==============
+
+Introduction
+~~~~~~~~~~~~
+A device class describes a type of device, like an audio or network
+device. The following device classes have been identified:
+
+<Insert List of Device Classes Here>
+
+
+Each device class defines a set of semantics and a programming interface
+that devices of that class adhere to. Device drivers are the
+implementation of that programming interface for a particular device on
+a particular bus.
+
+Device classes are agnostic with respect to what bus a device resides
+on.
+
+
+Programming Interface
+~~~~~~~~~~~~~~~~~~~~~
+The device class structure looks like::
+
+
+  typedef int (*devclass_add)(struct device *);
+  typedef void (*devclass_remove)(struct device *);
+
+See the kerneldoc for the struct class.
+
+A typical device class definition would look like::
+
+  struct device_class input_devclass = {
+        .name		= "input",
+        .add_device	= input_add_device,
+	.remove_device	= input_remove_device,
+  };
+
+Each device class structure should be exported in a header file so it
+can be used by drivers, extensions and interfaces.
+
+Device classes are registered and unregistered with the core using::
+
+  int devclass_register(struct device_class * cls);
+  void devclass_unregister(struct device_class * cls);
+
+
+Devices
+~~~~~~~
+As devices are bound to drivers, they are added to the device class
+that the driver belongs to. Before the driver model core, this would
+typically happen during the driver's probe() callback, once the device
+has been initialized. It now happens after the probe() callback
+finishes from the core.
+
+The device is enumerated in the class. Each time a device is added to
+the class, the class's devnum field is incremented and assigned to the
+device. The field is never decremented, so if the device is removed
+from the class and re-added, it will receive a different enumerated
+value.
+
+The class is allowed to create a class-specific structure for the
+device and store it in the device's class_data pointer.
+
+There is no list of devices in the device class. Each driver has a
+list of devices that it supports. The device class has a list of
+drivers of that particular class. To access all of the devices in the
+class, iterate over the device lists of each driver in the class.
+
+
+Device Drivers
+~~~~~~~~~~~~~~
+Device drivers are added to device classes when they are registered
+with the core. A driver specifies the class it belongs to by setting
+the struct device_driver::devclass field.
+
+
+sysfs directory structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+There is a top-level sysfs directory named 'class'.
+
+Each class gets a directory in the class directory, along with two
+default subdirectories::
+
+        class/
+        `-- input
+            |-- devices
+            `-- drivers
+
+
+Drivers registered with the class get a symlink in the drivers/ directory
+that points to the driver's directory (under its bus directory)::
+
+   class/
+   `-- input
+       |-- devices
+       `-- drivers
+           `-- usb:usb_mouse -> ../../../bus/drivers/usb_mouse/
+
+
+Each device gets a symlink in the devices/ directory that points to the
+device's directory in the physical hierarchy::
+
+   class/
+   `-- input
+       |-- devices
+       |   `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/
+       `-- drivers
+
+
+Exporting Attributes
+~~~~~~~~~~~~~~~~~~~~
+
+::
+
+  struct devclass_attribute {
+        struct attribute        attr;
+        ssize_t (*show)(struct device_class *, char * buf, size_t count, loff_t off);
+        ssize_t (*store)(struct device_class *, const char * buf, size_t count, loff_t off);
+  };
+
+Class drivers can export attributes using the DEVCLASS_ATTR macro that works
+similarly to the DEVICE_ATTR macro for devices. For example, a definition
+like this::
+
+  static DEVCLASS_ATTR(debug,0644,show_debug,store_debug);
+
+is equivalent to declaring::
+
+  static devclass_attribute devclass_attr_debug;
+
+The bus driver can add and remove the attribute from the class's
+sysfs directory using::
+
+  int devclass_create_file(struct device_class *, struct devclass_attribute *);
+  void devclass_remove_file(struct device_class *, struct devclass_attribute *);
+
+In the example above, the file will be named 'debug' in placed in the
+class's directory in sysfs.
+
+
+Interfaces
+~~~~~~~~~~
+There may exist multiple mechanisms for accessing the same device of a
+particular class type. Device interfaces describe these mechanisms.
+
+When a device is added to a device class, the core attempts to add it
+to every interface that is registered with the device class.
diff --git a/Documentation/driver-model/class.txt b/Documentation/driver-model/class.txt
deleted file mode 100644
index 1fefc480a80b..000000000000
--- a/Documentation/driver-model/class.txt
+++ /dev/null
@@ -1,147 +0,0 @@
-
-Device Classes
-
-
-Introduction
-~~~~~~~~~~~~
-A device class describes a type of device, like an audio or network
-device. The following device classes have been identified:
-
-<Insert List of Device Classes Here>
-
-
-Each device class defines a set of semantics and a programming interface
-that devices of that class adhere to. Device drivers are the
-implementation of that programming interface for a particular device on
-a particular bus. 
-
-Device classes are agnostic with respect to what bus a device resides
-on. 
-
-
-Programming Interface
-~~~~~~~~~~~~~~~~~~~~~
-The device class structure looks like: 
-
-
-typedef int (*devclass_add)(struct device *);
-typedef void (*devclass_remove)(struct device *);
-
-See the kerneldoc for the struct class.
-
-A typical device class definition would look like: 
-
-struct device_class input_devclass = {
-        .name		= "input",
-        .add_device	= input_add_device,
-	.remove_device	= input_remove_device,
-};
-
-Each device class structure should be exported in a header file so it
-can be used by drivers, extensions and interfaces.
-
-Device classes are registered and unregistered with the core using: 
-
-int devclass_register(struct device_class * cls);
-void devclass_unregister(struct device_class * cls);
-
-
-Devices
-~~~~~~~
-As devices are bound to drivers, they are added to the device class
-that the driver belongs to. Before the driver model core, this would
-typically happen during the driver's probe() callback, once the device
-has been initialized. It now happens after the probe() callback
-finishes from the core. 
-
-The device is enumerated in the class. Each time a device is added to
-the class, the class's devnum field is incremented and assigned to the
-device. The field is never decremented, so if the device is removed
-from the class and re-added, it will receive a different enumerated
-value. 
-
-The class is allowed to create a class-specific structure for the
-device and store it in the device's class_data pointer. 
-
-There is no list of devices in the device class. Each driver has a
-list of devices that it supports. The device class has a list of
-drivers of that particular class. To access all of the devices in the
-class, iterate over the device lists of each driver in the class.
-
-
-Device Drivers
-~~~~~~~~~~~~~~
-Device drivers are added to device classes when they are registered
-with the core. A driver specifies the class it belongs to by setting
-the struct device_driver::devclass field. 
-
-
-sysfs directory structure
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-There is a top-level sysfs directory named 'class'. 
-
-Each class gets a directory in the class directory, along with two
-default subdirectories:
-
-        class/
-        `-- input
-            |-- devices
-            `-- drivers
-
-
-Drivers registered with the class get a symlink in the drivers/ directory 
-that points to the driver's directory (under its bus directory):
-
-   class/
-   `-- input
-       |-- devices
-       `-- drivers
-           `-- usb:usb_mouse -> ../../../bus/drivers/usb_mouse/
-
-
-Each device gets a symlink in the devices/ directory that points to the 
-device's directory in the physical hierarchy:
-
-   class/
-   `-- input
-       |-- devices
-       |   `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/
-       `-- drivers
-
-
-Exporting Attributes
-~~~~~~~~~~~~~~~~~~~~
-struct devclass_attribute {
-        struct attribute        attr;
-        ssize_t (*show)(struct device_class *, char * buf, size_t count, loff_t off);
-        ssize_t (*store)(struct device_class *, const char * buf, size_t count, loff_t off);
-};
-
-Class drivers can export attributes using the DEVCLASS_ATTR macro that works
-similarly to the DEVICE_ATTR macro for devices. For example, a definition 
-like this:
-
-static DEVCLASS_ATTR(debug,0644,show_debug,store_debug);
-
-is equivalent to declaring:
-
-static devclass_attribute devclass_attr_debug;
-
-The bus driver can add and remove the attribute from the class's
-sysfs directory using:
-
-int devclass_create_file(struct device_class *, struct devclass_attribute *);
-void devclass_remove_file(struct device_class *, struct devclass_attribute *);
-
-In the example above, the file will be named 'debug' in placed in the
-class's directory in sysfs. 
-
-
-Interfaces
-~~~~~~~~~~
-There may exist multiple mechanisms for accessing the same device of a
-particular class type. Device interfaces describe these mechanisms. 
-
-When a device is added to a device class, the core attempts to add it
-to every interface that is registered with the device class.
-
diff --git a/Documentation/driver-model/design-patterns.rst b/Documentation/driver-model/design-patterns.rst
new file mode 100644
index 000000000000..41eb8f41f7dd
--- /dev/null
+++ b/Documentation/driver-model/design-patterns.rst
@@ -0,0 +1,116 @@
+=============================
+Device Driver Design Patterns
+=============================
+
+This document describes a few common design patterns found in device drivers.
+It is likely that subsystem maintainers will ask driver developers to
+conform to these design patterns.
+
+1. State Container
+2. container_of()
+
+
+1. State Container
+~~~~~~~~~~~~~~~~~~
+
+While the kernel contains a few device drivers that assume that they will
+only be probed() once on a certain system (singletons), it is custom to assume
+that the device the driver binds to will appear in several instances. This
+means that the probe() function and all callbacks need to be reentrant.
+
+The most common way to achieve this is to use the state container design
+pattern. It usually has this form::
+
+  struct foo {
+      spinlock_t lock; /* Example member */
+      (...)
+  };
+
+  static int foo_probe(...)
+  {
+      struct foo *foo;
+
+      foo = devm_kzalloc(dev, sizeof(*foo), GFP_KERNEL);
+      if (!foo)
+          return -ENOMEM;
+      spin_lock_init(&foo->lock);
+      (...)
+  }
+
+This will create an instance of struct foo in memory every time probe() is
+called. This is our state container for this instance of the device driver.
+Of course it is then necessary to always pass this instance of the
+state around to all functions that need access to the state and its members.
+
+For example, if the driver is registering an interrupt handler, you would
+pass around a pointer to struct foo like this::
+
+  static irqreturn_t foo_handler(int irq, void *arg)
+  {
+      struct foo *foo = arg;
+      (...)
+  }
+
+  static int foo_probe(...)
+  {
+      struct foo *foo;
+
+      (...)
+      ret = request_irq(irq, foo_handler, 0, "foo", foo);
+  }
+
+This way you always get a pointer back to the correct instance of foo in
+your interrupt handler.
+
+
+2. container_of()
+~~~~~~~~~~~~~~~~~
+
+Continuing on the above example we add an offloaded work::
+
+  struct foo {
+      spinlock_t lock;
+      struct workqueue_struct *wq;
+      struct work_struct offload;
+      (...)
+  };
+
+  static void foo_work(struct work_struct *work)
+  {
+      struct foo *foo = container_of(work, struct foo, offload);
+
+      (...)
+  }
+
+  static irqreturn_t foo_handler(int irq, void *arg)
+  {
+      struct foo *foo = arg;
+
+      queue_work(foo->wq, &foo->offload);
+      (...)
+  }
+
+  static int foo_probe(...)
+  {
+      struct foo *foo;
+
+      foo->wq = create_singlethread_workqueue("foo-wq");
+      INIT_WORK(&foo->offload, foo_work);
+      (...)
+  }
+
+The design pattern is the same for an hrtimer or something similar that will
+return a single argument which is a pointer to a struct member in the
+callback.
+
+container_of() is a macro defined in <linux/kernel.h>
+
+What container_of() does is to obtain a pointer to the containing struct from
+a pointer to a member by a simple subtraction using the offsetof() macro from
+standard C, which allows something similar to object oriented behaviours.
+Notice that the contained member must not be a pointer, but an actual member
+for this to work.
+
+We can see here that we avoid having global pointers to our struct foo *
+instance this way, while still keeping the number of parameters passed to the
+work function to a single pointer.
diff --git a/Documentation/driver-model/design-patterns.txt b/Documentation/driver-model/design-patterns.txt
deleted file mode 100644
index ba7b2df64904..000000000000
--- a/Documentation/driver-model/design-patterns.txt
+++ /dev/null
@@ -1,116 +0,0 @@
-
-Device Driver Design Patterns
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This document describes a few common design patterns found in device drivers.
-It is likely that subsystem maintainers will ask driver developers to
-conform to these design patterns.
-
-1. State Container
-2. container_of()
-
-
-1. State Container
-~~~~~~~~~~~~~~~~~~
-
-While the kernel contains a few device drivers that assume that they will
-only be probed() once on a certain system (singletons), it is custom to assume
-that the device the driver binds to will appear in several instances. This
-means that the probe() function and all callbacks need to be reentrant.
-
-The most common way to achieve this is to use the state container design
-pattern. It usually has this form:
-
-struct foo {
-    spinlock_t lock; /* Example member */
-    (...)
-};
-
-static int foo_probe(...)
-{
-    struct foo *foo;
-
-    foo = devm_kzalloc(dev, sizeof(*foo), GFP_KERNEL);
-    if (!foo)
-        return -ENOMEM;
-    spin_lock_init(&foo->lock);
-    (...)
-}
-
-This will create an instance of struct foo in memory every time probe() is
-called. This is our state container for this instance of the device driver.
-Of course it is then necessary to always pass this instance of the
-state around to all functions that need access to the state and its members.
-
-For example, if the driver is registering an interrupt handler, you would
-pass around a pointer to struct foo like this:
-
-static irqreturn_t foo_handler(int irq, void *arg)
-{
-    struct foo *foo = arg;
-    (...)
-}
-
-static int foo_probe(...)
-{
-    struct foo *foo;
-
-    (...)
-    ret = request_irq(irq, foo_handler, 0, "foo", foo);
-}
-
-This way you always get a pointer back to the correct instance of foo in
-your interrupt handler.
-
-
-2. container_of()
-~~~~~~~~~~~~~~~~~
-
-Continuing on the above example we add an offloaded work:
-
-struct foo {
-    spinlock_t lock;
-    struct workqueue_struct *wq;
-    struct work_struct offload;
-    (...)
-};
-
-static void foo_work(struct work_struct *work)
-{
-    struct foo *foo = container_of(work, struct foo, offload);
-
-    (...)
-}
-
-static irqreturn_t foo_handler(int irq, void *arg)
-{
-    struct foo *foo = arg;
-
-    queue_work(foo->wq, &foo->offload);
-    (...)
-}
-
-static int foo_probe(...)
-{
-    struct foo *foo;
-
-    foo->wq = create_singlethread_workqueue("foo-wq");
-    INIT_WORK(&foo->offload, foo_work);
-    (...)
-}
-
-The design pattern is the same for an hrtimer or something similar that will
-return a single argument which is a pointer to a struct member in the
-callback.
-
-container_of() is a macro defined in <linux/kernel.h>
-
-What container_of() does is to obtain a pointer to the containing struct from
-a pointer to a member by a simple subtraction using the offsetof() macro from
-standard C, which allows something similar to object oriented behaviours.
-Notice that the contained member must not be a pointer, but an actual member
-for this to work.
-
-We can see here that we avoid having global pointers to our struct foo *
-instance this way, while still keeping the number of parameters passed to the
-work function to a single pointer.
diff --git a/Documentation/driver-model/device.rst b/Documentation/driver-model/device.rst
new file mode 100644
index 000000000000..2b868d49d349
--- /dev/null
+++ b/Documentation/driver-model/device.rst
@@ -0,0 +1,109 @@
+==========================
+The Basic Device Structure
+==========================
+
+See the kerneldoc for the struct device.
+
+
+Programming Interface
+~~~~~~~~~~~~~~~~~~~~~
+The bus driver that discovers the device uses this to register the
+device with the core::
+
+  int device_register(struct device * dev);
+
+The bus should initialize the following fields:
+
+    - parent
+    - name
+    - bus_id
+    - bus
+
+A device is removed from the core when its reference count goes to
+0. The reference count can be adjusted using::
+
+  struct device * get_device(struct device * dev);
+  void put_device(struct device * dev);
+
+get_device() will return a pointer to the struct device passed to it
+if the reference is not already 0 (if it's in the process of being
+removed already).
+
+A driver can access the lock in the device structure using::
+
+  void lock_device(struct device * dev);
+  void unlock_device(struct device * dev);
+
+
+Attributes
+~~~~~~~~~~
+
+::
+
+  struct device_attribute {
+	struct attribute	attr;
+	ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count);
+  };
+
+Attributes of devices can be exported by a device driver through sysfs.
+
+Please see Documentation/filesystems/sysfs.txt for more information
+on how sysfs works.
+
+As explained in Documentation/kobject.txt, device attributes must be
+created before the KOBJ_ADD uevent is generated. The only way to realize
+that is by defining an attribute group.
+
+Attributes are declared using a macro called DEVICE_ATTR::
+
+  #define DEVICE_ATTR(name,mode,show,store)
+
+Example:::
+
+  static DEVICE_ATTR(type, 0444, show_type, NULL);
+  static DEVICE_ATTR(power, 0644, show_power, store_power);
+
+This declares two structures of type struct device_attribute with respective
+names 'dev_attr_type' and 'dev_attr_power'. These two attributes can be
+organized as follows into a group::
+
+  static struct attribute *dev_attrs[] = {
+	&dev_attr_type.attr,
+	&dev_attr_power.attr,
+	NULL,
+  };
+
+  static struct attribute_group dev_attr_group = {
+	.attrs = dev_attrs,
+  };
+
+  static const struct attribute_group *dev_attr_groups[] = {
+	&dev_attr_group,
+	NULL,
+  };
+
+This array of groups can then be associated with a device by setting the
+group pointer in struct device before device_register() is invoked::
+
+        dev->groups = dev_attr_groups;
+        device_register(dev);
+
+The device_register() function will use the 'groups' pointer to create the
+device attributes and the device_unregister() function will use this pointer
+to remove the device attributes.
+
+Word of warning:  While the kernel allows device_create_file() and
+device_remove_file() to be called on a device at any time, userspace has
+strict expectations on when attributes get created.  When a new device is
+registered in the kernel, a uevent is generated to notify userspace (like
+udev) that a new device is available.  If attributes are added after the
+device is registered, then userspace won't get notified and userspace will
+not know about the new attributes.
+
+This is important for device driver that need to publish additional
+attributes for a device at driver probe time.  If the device driver simply
+calls device_create_file() on the device structure passed to it, then
+userspace will never be notified of the new attributes.
diff --git a/Documentation/driver-model/device.txt b/Documentation/driver-model/device.txt
deleted file mode 100644
index 2403eb856187..000000000000
--- a/Documentation/driver-model/device.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-
-The Basic Device Structure
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-See the kerneldoc for the struct device.
-
-
-Programming Interface
-~~~~~~~~~~~~~~~~~~~~~
-The bus driver that discovers the device uses this to register the
-device with the core:
-
-int device_register(struct device * dev);
-
-The bus should initialize the following fields:
-
-    - parent
-    - name
-    - bus_id
-    - bus
-
-A device is removed from the core when its reference count goes to
-0. The reference count can be adjusted using:
-
-struct device * get_device(struct device * dev);
-void put_device(struct device * dev);
-
-get_device() will return a pointer to the struct device passed to it
-if the reference is not already 0 (if it's in the process of being
-removed already).
-
-A driver can access the lock in the device structure using: 
-
-void lock_device(struct device * dev);
-void unlock_device(struct device * dev);
-
-
-Attributes
-~~~~~~~~~~
-struct device_attribute {
-	struct attribute	attr;
-	ssize_t (*show)(struct device *dev, struct device_attribute *attr,
-			char *buf);
-	ssize_t (*store)(struct device *dev, struct device_attribute *attr,
-			 const char *buf, size_t count);
-};
-
-Attributes of devices can be exported by a device driver through sysfs.
-
-Please see Documentation/filesystems/sysfs.txt for more information
-on how sysfs works.
-
-As explained in Documentation/kobject.txt, device attributes must be
-created before the KOBJ_ADD uevent is generated. The only way to realize
-that is by defining an attribute group.
-
-Attributes are declared using a macro called DEVICE_ATTR:
-
-#define DEVICE_ATTR(name,mode,show,store)
-
-Example:
-
-static DEVICE_ATTR(type, 0444, show_type, NULL);
-static DEVICE_ATTR(power, 0644, show_power, store_power);
-
-This declares two structures of type struct device_attribute with respective
-names 'dev_attr_type' and 'dev_attr_power'. These two attributes can be
-organized as follows into a group:
-
-static struct attribute *dev_attrs[] = {
-	&dev_attr_type.attr,
-	&dev_attr_power.attr,
-	NULL,
-};
-
-static struct attribute_group dev_attr_group = {
-	.attrs = dev_attrs,
-};
-
-static const struct attribute_group *dev_attr_groups[] = {
-	&dev_attr_group,
-	NULL,
-};
-
-This array of groups can then be associated with a device by setting the
-group pointer in struct device before device_register() is invoked:
-
-      dev->groups = dev_attr_groups;
-      device_register(dev);
-
-The device_register() function will use the 'groups' pointer to create the
-device attributes and the device_unregister() function will use this pointer
-to remove the device attributes.
-
-Word of warning:  While the kernel allows device_create_file() and
-device_remove_file() to be called on a device at any time, userspace has
-strict expectations on when attributes get created.  When a new device is
-registered in the kernel, a uevent is generated to notify userspace (like
-udev) that a new device is available.  If attributes are added after the
-device is registered, then userspace won't get notified and userspace will
-not know about the new attributes.
-
-This is important for device driver that need to publish additional
-attributes for a device at driver probe time.  If the device driver simply
-calls device_create_file() on the device structure passed to it, then
-userspace will never be notified of the new attributes.
diff --git a/Documentation/driver-model/devres.rst b/Documentation/driver-model/devres.rst
new file mode 100644
index 000000000000..4ac99122b5f1
--- /dev/null
+++ b/Documentation/driver-model/devres.rst
@@ -0,0 +1,414 @@
+================================
+Devres - Managed Device Resource
+================================
+
+Tejun Heo	<teheo@suse.de>
+
+First draft	10 January 2007
+
+.. contents
+
+   1. Intro			: Huh? Devres?
+   2. Devres			: Devres in a nutshell
+   3. Devres Group		: Group devres'es and release them together
+   4. Details			: Life time rules, calling context, ...
+   5. Overhead			: How much do we have to pay for this?
+   6. List of managed interfaces: Currently implemented managed interfaces
+
+
+1. Intro
+--------
+
+devres came up while trying to convert libata to use iomap.  Each
+iomapped address should be kept and unmapped on driver detach.  For
+example, a plain SFF ATA controller (that is, good old PCI IDE) in
+native mode makes use of 5 PCI BARs and all of them should be
+maintained.
+
+As with many other device drivers, libata low level drivers have
+sufficient bugs in ->remove and ->probe failure path.  Well, yes,
+that's probably because libata low level driver developers are lazy
+bunch, but aren't all low level driver developers?  After spending a
+day fiddling with braindamaged hardware with no document or
+braindamaged document, if it's finally working, well, it's working.
+
+For one reason or another, low level drivers don't receive as much
+attention or testing as core code, and bugs on driver detach or
+initialization failure don't happen often enough to be noticeable.
+Init failure path is worse because it's much less travelled while
+needs to handle multiple entry points.
+
+So, many low level drivers end up leaking resources on driver detach
+and having half broken failure path implementation in ->probe() which
+would leak resources or even cause oops when failure occurs.  iomap
+adds more to this mix.  So do msi and msix.
+
+
+2. Devres
+---------
+
+devres is basically linked list of arbitrarily sized memory areas
+associated with a struct device.  Each devres entry is associated with
+a release function.  A devres can be released in several ways.  No
+matter what, all devres entries are released on driver detach.  On
+release, the associated release function is invoked and then the
+devres entry is freed.
+
+Managed interface is created for resources commonly used by device
+drivers using devres.  For example, coherent DMA memory is acquired
+using dma_alloc_coherent().  The managed version is called
+dmam_alloc_coherent().  It is identical to dma_alloc_coherent() except
+for the DMA memory allocated using it is managed and will be
+automatically released on driver detach.  Implementation looks like
+the following::
+
+  struct dma_devres {
+	size_t		size;
+	void		*vaddr;
+	dma_addr_t	dma_handle;
+  };
+
+  static void dmam_coherent_release(struct device *dev, void *res)
+  {
+	struct dma_devres *this = res;
+
+	dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle);
+  }
+
+  dmam_alloc_coherent(dev, size, dma_handle, gfp)
+  {
+	struct dma_devres *dr;
+	void *vaddr;
+
+	dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp);
+	...
+
+	/* alloc DMA memory as usual */
+	vaddr = dma_alloc_coherent(...);
+	...
+
+	/* record size, vaddr, dma_handle in dr */
+	dr->vaddr = vaddr;
+	...
+
+	devres_add(dev, dr);
+
+	return vaddr;
+  }
+
+If a driver uses dmam_alloc_coherent(), the area is guaranteed to be
+freed whether initialization fails half-way or the device gets
+detached.  If most resources are acquired using managed interface, a
+driver can have much simpler init and exit code.  Init path basically
+looks like the following::
+
+  my_init_one()
+  {
+	struct mydev *d;
+
+	d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->ring = dmam_alloc_coherent(...);
+	if (!d->ring)
+		return -ENOMEM;
+
+	if (check something)
+		return -EINVAL;
+	...
+
+	return register_to_upper_layer(d);
+  }
+
+And exit path::
+
+  my_remove_one()
+  {
+	unregister_from_upper_layer(d);
+	shutdown_my_hardware();
+  }
+
+As shown above, low level drivers can be simplified a lot by using
+devres.  Complexity is shifted from less maintained low level drivers
+to better maintained higher layer.  Also, as init failure path is
+shared with exit path, both can get more testing.
+
+Note though that when converting current calls or assignments to
+managed devm_* versions it is up to you to check if internal operations
+like allocating memory, have failed. Managed resources pertains to the
+freeing of these resources *only* - all other checks needed are still
+on you. In some cases this may mean introducing checks that were not
+necessary before moving to the managed devm_* calls.
+
+
+3. Devres group
+---------------
+
+Devres entries can be grouped using devres group.  When a group is
+released, all contained normal devres entries and properly nested
+groups are released.  One usage is to rollback series of acquired
+resources on failure.  For example::
+
+  if (!devres_open_group(dev, NULL, GFP_KERNEL))
+	return -ENOMEM;
+
+  acquire A;
+  if (failed)
+	goto err;
+
+  acquire B;
+  if (failed)
+	goto err;
+  ...
+
+  devres_remove_group(dev, NULL);
+  return 0;
+
+ err:
+  devres_release_group(dev, NULL);
+  return err_code;
+
+As resource acquisition failure usually means probe failure, constructs
+like above are usually useful in midlayer driver (e.g. libata core
+layer) where interface function shouldn't have side effect on failure.
+For LLDs, just returning error code suffices in most cases.
+
+Each group is identified by `void *id`.  It can either be explicitly
+specified by @id argument to devres_open_group() or automatically
+created by passing NULL as @id as in the above example.  In both
+cases, devres_open_group() returns the group's id.  The returned id
+can be passed to other devres functions to select the target group.
+If NULL is given to those functions, the latest open group is
+selected.
+
+For example, you can do something like the following::
+
+  int my_midlayer_create_something()
+  {
+	if (!devres_open_group(dev, my_midlayer_create_something, GFP_KERNEL))
+		return -ENOMEM;
+
+	...
+
+	devres_close_group(dev, my_midlayer_create_something);
+	return 0;
+  }
+
+  void my_midlayer_destroy_something()
+  {
+	devres_release_group(dev, my_midlayer_create_something);
+  }
+
+
+4. Details
+----------
+
+Lifetime of a devres entry begins on devres allocation and finishes
+when it is released or destroyed (removed and freed) - no reference
+counting.
+
+devres core guarantees atomicity to all basic devres operations and
+has support for single-instance devres types (atomic
+lookup-and-add-if-not-found).  Other than that, synchronizing
+concurrent accesses to allocated devres data is caller's
+responsibility.  This is usually non-issue because bus ops and
+resource allocations already do the job.
+
+For an example of single-instance devres type, read pcim_iomap_table()
+in lib/devres.c.
+
+All devres interface functions can be called without context if the
+right gfp mask is given.
+
+
+5. Overhead
+-----------
+
+Each devres bookkeeping info is allocated together with requested data
+area.  With debug option turned off, bookkeeping info occupies 16
+bytes on 32bit machines and 24 bytes on 64bit (three pointers rounded
+up to ull alignment).  If singly linked list is used, it can be
+reduced to two pointers (8 bytes on 32bit, 16 bytes on 64bit).
+
+Each devres group occupies 8 pointers.  It can be reduced to 6 if
+singly linked list is used.
+
+Memory space overhead on ahci controller with two ports is between 300
+and 400 bytes on 32bit machine after naive conversion (we can
+certainly invest a bit more effort into libata core layer).
+
+
+6. List of managed interfaces
+-----------------------------
+
+CLOCK
+  devm_clk_get()
+  devm_clk_get_optional()
+  devm_clk_put()
+  devm_clk_hw_register()
+  devm_of_clk_add_hw_provider()
+  devm_clk_hw_register_clkdev()
+
+DMA
+  dmaenginem_async_device_register()
+  dmam_alloc_coherent()
+  dmam_alloc_attrs()
+  dmam_free_coherent()
+  dmam_pool_create()
+  dmam_pool_destroy()
+
+DRM
+  devm_drm_dev_init()
+
+GPIO
+  devm_gpiod_get()
+  devm_gpiod_get_index()
+  devm_gpiod_get_index_optional()
+  devm_gpiod_get_optional()
+  devm_gpiod_put()
+  devm_gpiod_unhinge()
+  devm_gpiochip_add_data()
+  devm_gpio_request()
+  devm_gpio_request_one()
+  devm_gpio_free()
+
+I2C
+  devm_i2c_new_dummy_device()
+
+IIO
+  devm_iio_device_alloc()
+  devm_iio_device_free()
+  devm_iio_device_register()
+  devm_iio_device_unregister()
+  devm_iio_kfifo_allocate()
+  devm_iio_kfifo_free()
+  devm_iio_triggered_buffer_setup()
+  devm_iio_triggered_buffer_cleanup()
+  devm_iio_trigger_alloc()
+  devm_iio_trigger_free()
+  devm_iio_trigger_register()
+  devm_iio_trigger_unregister()
+  devm_iio_channel_get()
+  devm_iio_channel_release()
+  devm_iio_channel_get_all()
+  devm_iio_channel_release_all()
+
+INPUT
+  devm_input_allocate_device()
+
+IO region
+  devm_release_mem_region()
+  devm_release_region()
+  devm_release_resource()
+  devm_request_mem_region()
+  devm_request_region()
+  devm_request_resource()
+
+IOMAP
+  devm_ioport_map()
+  devm_ioport_unmap()
+  devm_ioremap()
+  devm_ioremap_nocache()
+  devm_ioremap_wc()
+  devm_ioremap_resource() : checks resource, requests memory region, ioremaps
+  devm_iounmap()
+  pcim_iomap()
+  pcim_iomap_regions()	: do request_region() and iomap() on multiple BARs
+  pcim_iomap_table()	: array of mapped addresses indexed by BAR
+  pcim_iounmap()
+
+IRQ
+  devm_free_irq()
+  devm_request_any_context_irq()
+  devm_request_irq()
+  devm_request_threaded_irq()
+  devm_irq_alloc_descs()
+  devm_irq_alloc_desc()
+  devm_irq_alloc_desc_at()
+  devm_irq_alloc_desc_from()
+  devm_irq_alloc_descs_from()
+  devm_irq_alloc_generic_chip()
+  devm_irq_setup_generic_chip()
+  devm_irq_sim_init()
+
+LED
+  devm_led_classdev_register()
+  devm_led_classdev_unregister()
+
+MDIO
+  devm_mdiobus_alloc()
+  devm_mdiobus_alloc_size()
+  devm_mdiobus_free()
+
+MEM
+  devm_free_pages()
+  devm_get_free_pages()
+  devm_kasprintf()
+  devm_kcalloc()
+  devm_kfree()
+  devm_kmalloc()
+  devm_kmalloc_array()
+  devm_kmemdup()
+  devm_kstrdup()
+  devm_kvasprintf()
+  devm_kzalloc()
+
+MFD
+  devm_mfd_add_devices()
+
+MUX
+  devm_mux_chip_alloc()
+  devm_mux_chip_register()
+  devm_mux_control_get()
+
+PER-CPU MEM
+  devm_alloc_percpu()
+  devm_free_percpu()
+
+PCI
+  devm_pci_alloc_host_bridge()  : managed PCI host bridge allocation
+  devm_pci_remap_cfgspace()	: ioremap PCI configuration space
+  devm_pci_remap_cfg_resource()	: ioremap PCI configuration space resource
+  pcim_enable_device()		: after success, all PCI ops become managed
+  pcim_pin_device()		: keep PCI device enabled after release
+
+PHY
+  devm_usb_get_phy()
+  devm_usb_put_phy()
+
+PINCTRL
+  devm_pinctrl_get()
+  devm_pinctrl_put()
+  devm_pinctrl_register()
+  devm_pinctrl_unregister()
+
+POWER
+  devm_reboot_mode_register()
+  devm_reboot_mode_unregister()
+
+PWM
+  devm_pwm_get()
+  devm_pwm_put()
+
+REGULATOR
+  devm_regulator_bulk_get()
+  devm_regulator_get()
+  devm_regulator_put()
+  devm_regulator_register()
+
+RESET
+  devm_reset_control_get()
+  devm_reset_controller_register()
+
+SERDEV
+  devm_serdev_device_open()
+
+SLAVE DMA ENGINE
+  devm_acpi_dma_controller_register()
+
+SPI
+  devm_spi_register_master()
+
+WATCHDOG
+  devm_watchdog_register_device()
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
deleted file mode 100644
index 69c7fa7f616c..000000000000
--- a/Documentation/driver-model/devres.txt
+++ /dev/null
@@ -1,412 +0,0 @@
-Devres - Managed Device Resource
-================================
-
-Tejun Heo	<teheo@suse.de>
-
-First draft	10 January 2007
-
-
-1. Intro			: Huh? Devres?
-2. Devres			: Devres in a nutshell
-3. Devres Group			: Group devres'es and release them together
-4. Details			: Life time rules, calling context, ...
-5. Overhead			: How much do we have to pay for this?
-6. List of managed interfaces	: Currently implemented managed interfaces
-
-
-  1. Intro
-  --------
-
-devres came up while trying to convert libata to use iomap.  Each
-iomapped address should be kept and unmapped on driver detach.  For
-example, a plain SFF ATA controller (that is, good old PCI IDE) in
-native mode makes use of 5 PCI BARs and all of them should be
-maintained.
-
-As with many other device drivers, libata low level drivers have
-sufficient bugs in ->remove and ->probe failure path.  Well, yes,
-that's probably because libata low level driver developers are lazy
-bunch, but aren't all low level driver developers?  After spending a
-day fiddling with braindamaged hardware with no document or
-braindamaged document, if it's finally working, well, it's working.
-
-For one reason or another, low level drivers don't receive as much
-attention or testing as core code, and bugs on driver detach or
-initialization failure don't happen often enough to be noticeable.
-Init failure path is worse because it's much less travelled while
-needs to handle multiple entry points.
-
-So, many low level drivers end up leaking resources on driver detach
-and having half broken failure path implementation in ->probe() which
-would leak resources or even cause oops when failure occurs.  iomap
-adds more to this mix.  So do msi and msix.
-
-
-  2. Devres
-  ---------
-
-devres is basically linked list of arbitrarily sized memory areas
-associated with a struct device.  Each devres entry is associated with
-a release function.  A devres can be released in several ways.  No
-matter what, all devres entries are released on driver detach.  On
-release, the associated release function is invoked and then the
-devres entry is freed.
-
-Managed interface is created for resources commonly used by device
-drivers using devres.  For example, coherent DMA memory is acquired
-using dma_alloc_coherent().  The managed version is called
-dmam_alloc_coherent().  It is identical to dma_alloc_coherent() except
-for the DMA memory allocated using it is managed and will be
-automatically released on driver detach.  Implementation looks like
-the following.
-
-  struct dma_devres {
-	size_t		size;
-	void		*vaddr;
-	dma_addr_t	dma_handle;
-  };
-
-  static void dmam_coherent_release(struct device *dev, void *res)
-  {
-	struct dma_devres *this = res;
-
-	dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle);
-  }
-
-  dmam_alloc_coherent(dev, size, dma_handle, gfp)
-  {
-	struct dma_devres *dr;
-	void *vaddr;
-
-	dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp);
-	...
-
-	/* alloc DMA memory as usual */
-	vaddr = dma_alloc_coherent(...);
-	...
-
-	/* record size, vaddr, dma_handle in dr */
-	dr->vaddr = vaddr;
-	...
-
-	devres_add(dev, dr);
-
-	return vaddr;
-  }
-
-If a driver uses dmam_alloc_coherent(), the area is guaranteed to be
-freed whether initialization fails half-way or the device gets
-detached.  If most resources are acquired using managed interface, a
-driver can have much simpler init and exit code.  Init path basically
-looks like the following.
-
-  my_init_one()
-  {
-	struct mydev *d;
-
-	d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
-	if (!d)
-		return -ENOMEM;
-
-	d->ring = dmam_alloc_coherent(...);
-	if (!d->ring)
-		return -ENOMEM;
-
-	if (check something)
-		return -EINVAL;
-	...
-
-	return register_to_upper_layer(d);
-  }
-
-And exit path,
-
-  my_remove_one()
-  {
-	unregister_from_upper_layer(d);
-	shutdown_my_hardware();
-  }
-
-As shown above, low level drivers can be simplified a lot by using
-devres.  Complexity is shifted from less maintained low level drivers
-to better maintained higher layer.  Also, as init failure path is
-shared with exit path, both can get more testing.
-
-Note though that when converting current calls or assignments to
-managed devm_* versions it is up to you to check if internal operations
-like allocating memory, have failed. Managed resources pertains to the
-freeing of these resources *only* - all other checks needed are still
-on you. In some cases this may mean introducing checks that were not
-necessary before moving to the managed devm_* calls.
-
-
-  3. Devres group
-  ---------------
-
-Devres entries can be grouped using devres group.  When a group is
-released, all contained normal devres entries and properly nested
-groups are released.  One usage is to rollback series of acquired
-resources on failure.  For example,
-
-  if (!devres_open_group(dev, NULL, GFP_KERNEL))
-	return -ENOMEM;
-
-  acquire A;
-  if (failed)
-	goto err;
-
-  acquire B;
-  if (failed)
-	goto err;
-  ...
-
-  devres_remove_group(dev, NULL);
-  return 0;
-
- err:
-  devres_release_group(dev, NULL);
-  return err_code;
-
-As resource acquisition failure usually means probe failure, constructs
-like above are usually useful in midlayer driver (e.g. libata core
-layer) where interface function shouldn't have side effect on failure.
-For LLDs, just returning error code suffices in most cases.
-
-Each group is identified by void *id.  It can either be explicitly
-specified by @id argument to devres_open_group() or automatically
-created by passing NULL as @id as in the above example.  In both
-cases, devres_open_group() returns the group's id.  The returned id
-can be passed to other devres functions to select the target group.
-If NULL is given to those functions, the latest open group is
-selected.
-
-For example, you can do something like the following.
-
-  int my_midlayer_create_something()
-  {
-	if (!devres_open_group(dev, my_midlayer_create_something, GFP_KERNEL))
-		return -ENOMEM;
-
-	...
-
-	devres_close_group(dev, my_midlayer_create_something);
-	return 0;
-  }
-
-  void my_midlayer_destroy_something()
-  {
-	devres_release_group(dev, my_midlayer_create_something);
-  }
-
-
-  4. Details
-  ----------
-
-Lifetime of a devres entry begins on devres allocation and finishes
-when it is released or destroyed (removed and freed) - no reference
-counting.
-
-devres core guarantees atomicity to all basic devres operations and
-has support for single-instance devres types (atomic
-lookup-and-add-if-not-found).  Other than that, synchronizing
-concurrent accesses to allocated devres data is caller's
-responsibility.  This is usually non-issue because bus ops and
-resource allocations already do the job.
-
-For an example of single-instance devres type, read pcim_iomap_table()
-in lib/devres.c.
-
-All devres interface functions can be called without context if the
-right gfp mask is given.
-
-
-  5. Overhead
-  -----------
-
-Each devres bookkeeping info is allocated together with requested data
-area.  With debug option turned off, bookkeeping info occupies 16
-bytes on 32bit machines and 24 bytes on 64bit (three pointers rounded
-up to ull alignment).  If singly linked list is used, it can be
-reduced to two pointers (8 bytes on 32bit, 16 bytes on 64bit).
-
-Each devres group occupies 8 pointers.  It can be reduced to 6 if
-singly linked list is used.
-
-Memory space overhead on ahci controller with two ports is between 300
-and 400 bytes on 32bit machine after naive conversion (we can
-certainly invest a bit more effort into libata core layer).
-
-
-  6. List of managed interfaces
-  -----------------------------
-
-CLOCK
-  devm_clk_get()
-  devm_clk_get_optional()
-  devm_clk_put()
-  devm_clk_hw_register()
-  devm_of_clk_add_hw_provider()
-  devm_clk_hw_register_clkdev()
-
-DMA
-  dmaenginem_async_device_register()
-  dmam_alloc_coherent()
-  dmam_alloc_attrs()
-  dmam_free_coherent()
-  dmam_pool_create()
-  dmam_pool_destroy()
-
-DRM
-  devm_drm_dev_init()
-
-GPIO
-  devm_gpiod_get()
-  devm_gpiod_get_index()
-  devm_gpiod_get_index_optional()
-  devm_gpiod_get_optional()
-  devm_gpiod_put()
-  devm_gpiod_unhinge()
-  devm_gpiochip_add_data()
-  devm_gpio_request()
-  devm_gpio_request_one()
-  devm_gpio_free()
-
-I2C
-  devm_i2c_new_dummy_device()
-
-IIO
-  devm_iio_device_alloc()
-  devm_iio_device_free()
-  devm_iio_device_register()
-  devm_iio_device_unregister()
-  devm_iio_kfifo_allocate()
-  devm_iio_kfifo_free()
-  devm_iio_triggered_buffer_setup()
-  devm_iio_triggered_buffer_cleanup()
-  devm_iio_trigger_alloc()
-  devm_iio_trigger_free()
-  devm_iio_trigger_register()
-  devm_iio_trigger_unregister()
-  devm_iio_channel_get()
-  devm_iio_channel_release()
-  devm_iio_channel_get_all()
-  devm_iio_channel_release_all()
-
-INPUT
-  devm_input_allocate_device()
-
-IO region
-  devm_release_mem_region()
-  devm_release_region()
-  devm_release_resource()
-  devm_request_mem_region()
-  devm_request_region()
-  devm_request_resource()
-
-IOMAP
-  devm_ioport_map()
-  devm_ioport_unmap()
-  devm_ioremap()
-  devm_ioremap_nocache()
-  devm_ioremap_wc()
-  devm_ioremap_resource() : checks resource, requests memory region, ioremaps
-  devm_iounmap()
-  pcim_iomap()
-  pcim_iomap_regions()	: do request_region() and iomap() on multiple BARs
-  pcim_iomap_table()	: array of mapped addresses indexed by BAR
-  pcim_iounmap()
-
-IRQ
-  devm_free_irq()
-  devm_request_any_context_irq()
-  devm_request_irq()
-  devm_request_threaded_irq()
-  devm_irq_alloc_descs()
-  devm_irq_alloc_desc()
-  devm_irq_alloc_desc_at()
-  devm_irq_alloc_desc_from()
-  devm_irq_alloc_descs_from()
-  devm_irq_alloc_generic_chip()
-  devm_irq_setup_generic_chip()
-  devm_irq_sim_init()
-
-LED
-  devm_led_classdev_register()
-  devm_led_classdev_unregister()
-
-MDIO
-  devm_mdiobus_alloc()
-  devm_mdiobus_alloc_size()
-  devm_mdiobus_free()
-
-MEM
-  devm_free_pages()
-  devm_get_free_pages()
-  devm_kasprintf()
-  devm_kcalloc()
-  devm_kfree()
-  devm_kmalloc()
-  devm_kmalloc_array()
-  devm_kmemdup()
-  devm_kstrdup()
-  devm_kvasprintf()
-  devm_kzalloc()
-
-MFD
-  devm_mfd_add_devices()
-
-MUX
-  devm_mux_chip_alloc()
-  devm_mux_chip_register()
-  devm_mux_control_get()
-
-PER-CPU MEM
-  devm_alloc_percpu()
-  devm_free_percpu()
-
-PCI
-  devm_pci_alloc_host_bridge()  : managed PCI host bridge allocation
-  devm_pci_remap_cfgspace()	: ioremap PCI configuration space
-  devm_pci_remap_cfg_resource()	: ioremap PCI configuration space resource
-  pcim_enable_device()		: after success, all PCI ops become managed
-  pcim_pin_device()		: keep PCI device enabled after release
-
-PHY
-  devm_usb_get_phy()
-  devm_usb_put_phy()
-
-PINCTRL
-  devm_pinctrl_get()
-  devm_pinctrl_put()
-  devm_pinctrl_register()
-  devm_pinctrl_unregister()
-
-POWER
-  devm_reboot_mode_register()
-  devm_reboot_mode_unregister()
-
-PWM
-  devm_pwm_get()
-  devm_pwm_put()
-
-REGULATOR
-  devm_regulator_bulk_get()
-  devm_regulator_get()
-  devm_regulator_put()
-  devm_regulator_register()
-
-RESET
-  devm_reset_control_get()
-  devm_reset_controller_register()
-
-SERDEV
-  devm_serdev_device_open()
-
-SLAVE DMA ENGINE
-  devm_acpi_dma_controller_register()
-
-SPI
-  devm_spi_register_master()
-
-WATCHDOG
-  devm_watchdog_register_device()
diff --git a/Documentation/driver-model/driver.rst b/Documentation/driver-model/driver.rst
new file mode 100644
index 000000000000..11d281506a04
--- /dev/null
+++ b/Documentation/driver-model/driver.rst
@@ -0,0 +1,223 @@
+==============
+Device Drivers
+==============
+
+See the kerneldoc for the struct device_driver.
+
+
+Allocation
+~~~~~~~~~~
+
+Device drivers are statically allocated structures. Though there may
+be multiple devices in a system that a driver supports, struct
+device_driver represents the driver as a whole (not a particular
+device instance).
+
+Initialization
+~~~~~~~~~~~~~~
+
+The driver must initialize at least the name and bus fields. It should
+also initialize the devclass field (when it arrives), so it may obtain
+the proper linkage internally. It should also initialize as many of
+the callbacks as possible, though each is optional.
+
+Declaration
+~~~~~~~~~~~
+
+As stated above, struct device_driver objects are statically
+allocated. Below is an example declaration of the eepro100
+driver. This declaration is hypothetical only; it relies on the driver
+being converted completely to the new model::
+
+  static struct device_driver eepro100_driver = {
+         .name		= "eepro100",
+         .bus		= &pci_bus_type,
+
+         .probe		= eepro100_probe,
+         .remove		= eepro100_remove,
+         .suspend		= eepro100_suspend,
+         .resume		= eepro100_resume,
+  };
+
+Most drivers will not be able to be converted completely to the new
+model because the bus they belong to has a bus-specific structure with
+bus-specific fields that cannot be generalized.
+
+The most common example of this are device ID structures. A driver
+typically defines an array of device IDs that it supports. The format
+of these structures and the semantics for comparing device IDs are
+completely bus-specific. Defining them as bus-specific entities would
+sacrifice type-safety, so we keep bus-specific structures around.
+
+Bus-specific drivers should include a generic struct device_driver in
+the definition of the bus-specific driver. Like this::
+
+  struct pci_driver {
+         const struct pci_device_id *id_table;
+         struct device_driver	  driver;
+  };
+
+A definition that included bus-specific fields would look like
+(using the eepro100 driver again)::
+
+  static struct pci_driver eepro100_driver = {
+         .id_table       = eepro100_pci_tbl,
+         .driver	       = {
+		.name		= "eepro100",
+		.bus		= &pci_bus_type,
+		.probe		= eepro100_probe,
+		.remove		= eepro100_remove,
+		.suspend	= eepro100_suspend,
+		.resume		= eepro100_resume,
+         },
+  };
+
+Some may find the syntax of embedded struct initialization awkward or
+even a bit ugly. So far, it's the best way we've found to do what we want...
+
+Registration
+~~~~~~~~~~~~
+
+::
+
+  int driver_register(struct device_driver *drv);
+
+The driver registers the structure on startup. For drivers that have
+no bus-specific fields (i.e. don't have a bus-specific driver
+structure), they would use driver_register and pass a pointer to their
+struct device_driver object.
+
+Most drivers, however, will have a bus-specific structure and will
+need to register with the bus using something like pci_driver_register.
+
+It is important that drivers register their driver structure as early as
+possible. Registration with the core initializes several fields in the
+struct device_driver object, including the reference count and the
+lock. These fields are assumed to be valid at all times and may be
+used by the device model core or the bus driver.
+
+
+Transition Bus Drivers
+~~~~~~~~~~~~~~~~~~~~~~
+
+By defining wrapper functions, the transition to the new model can be
+made easier. Drivers can ignore the generic structure altogether and
+let the bus wrapper fill in the fields. For the callbacks, the bus can
+define generic callbacks that forward the call to the bus-specific
+callbacks of the drivers.
+
+This solution is intended to be only temporary. In order to get class
+information in the driver, the drivers must be modified anyway. Since
+converting drivers to the new model should reduce some infrastructural
+complexity and code size, it is recommended that they are converted as
+class information is added.
+
+Access
+~~~~~~
+
+Once the object has been registered, it may access the common fields of
+the object, like the lock and the list of devices::
+
+  int driver_for_each_dev(struct device_driver *drv, void *data,
+			  int (*callback)(struct device *dev, void *data));
+
+The devices field is a list of all the devices that have been bound to
+the driver. The LDM core provides a helper function to operate on all
+the devices a driver controls. This helper locks the driver on each
+node access, and does proper reference counting on each device as it
+accesses it.
+
+
+sysfs
+~~~~~
+
+When a driver is registered, a sysfs directory is created in its
+bus's directory. In this directory, the driver can export an interface
+to userspace to control operation of the driver on a global basis;
+e.g. toggling debugging output in the driver.
+
+A future feature of this directory will be a 'devices' directory. This
+directory will contain symlinks to the directories of devices it
+supports.
+
+
+
+Callbacks
+~~~~~~~~~
+
+::
+
+	int	(*probe)	(struct device *dev);
+
+The probe() entry is called in task context, with the bus's rwsem locked
+and the driver partially bound to the device.  Drivers commonly use
+container_of() to convert "dev" to a bus-specific type, both in probe()
+and other routines.  That type often provides device resource data, such
+as pci_dev.resource[] or platform_device.resources, which is used in
+addition to dev->platform_data to initialize the driver.
+
+This callback holds the driver-specific logic to bind the driver to a
+given device.  That includes verifying that the device is present, that
+it's a version the driver can handle, that driver data structures can
+be allocated and initialized, and that any hardware can be initialized.
+Drivers often store a pointer to their state with dev_set_drvdata().
+When the driver has successfully bound itself to that device, then probe()
+returns zero and the driver model code will finish its part of binding
+the driver to that device.
+
+A driver's probe() may return a negative errno value to indicate that
+the driver did not bind to this device, in which case it should have
+released all resources it allocated::
+
+	int 	(*remove)	(struct device *dev);
+
+remove is called to unbind a driver from a device. This may be
+called if a device is physically removed from the system, if the
+driver module is being unloaded, during a reboot sequence, or
+in other cases.
+
+It is up to the driver to determine if the device is present or
+not. It should free any resources allocated specifically for the
+device; i.e. anything in the device's driver_data field.
+
+If the device is still present, it should quiesce the device and place
+it into a supported low-power state::
+
+	int	(*suspend)	(struct device *dev, pm_message_t state);
+
+suspend is called to put the device in a low power state::
+
+	int	(*resume)	(struct device *dev);
+
+Resume is used to bring a device back from a low power state.
+
+
+Attributes
+~~~~~~~~~~
+
+::
+
+  struct driver_attribute {
+          struct attribute        attr;
+          ssize_t (*show)(struct device_driver *driver, char *buf);
+          ssize_t (*store)(struct device_driver *, const char *buf, size_t count);
+  };
+
+Device drivers can export attributes via their sysfs directories.
+Drivers can declare attributes using a DRIVER_ATTR_RW and DRIVER_ATTR_RO
+macro that works identically to the DEVICE_ATTR_RW and DEVICE_ATTR_RO
+macros.
+
+Example::
+
+	DRIVER_ATTR_RW(debug);
+
+This is equivalent to declaring::
+
+	struct driver_attribute driver_attr_debug;
+
+This can then be used to add and remove the attribute from the
+driver's directory using::
+
+  int driver_create_file(struct device_driver *, const struct driver_attribute *);
+  void driver_remove_file(struct device_driver *, const struct driver_attribute *);
diff --git a/Documentation/driver-model/driver.txt b/Documentation/driver-model/driver.txt
deleted file mode 100644
index d661e6f7e6a0..000000000000
--- a/Documentation/driver-model/driver.txt
+++ /dev/null
@@ -1,215 +0,0 @@
-
-Device Drivers
-
-See the kerneldoc for the struct device_driver.
-
-
-Allocation
-~~~~~~~~~~
-
-Device drivers are statically allocated structures. Though there may
-be multiple devices in a system that a driver supports, struct
-device_driver represents the driver as a whole (not a particular
-device instance).
-
-Initialization
-~~~~~~~~~~~~~~
-
-The driver must initialize at least the name and bus fields. It should
-also initialize the devclass field (when it arrives), so it may obtain
-the proper linkage internally. It should also initialize as many of
-the callbacks as possible, though each is optional.
-
-Declaration
-~~~~~~~~~~~
-
-As stated above, struct device_driver objects are statically
-allocated. Below is an example declaration of the eepro100
-driver. This declaration is hypothetical only; it relies on the driver
-being converted completely to the new model. 
-
-static struct device_driver eepro100_driver = {
-       .name		= "eepro100",
-       .bus		= &pci_bus_type,
-       
-       .probe		= eepro100_probe,
-       .remove		= eepro100_remove,
-       .suspend		= eepro100_suspend,
-       .resume		= eepro100_resume,
-};
-
-Most drivers will not be able to be converted completely to the new
-model because the bus they belong to has a bus-specific structure with
-bus-specific fields that cannot be generalized. 
-
-The most common example of this are device ID structures. A driver
-typically defines an array of device IDs that it supports. The format
-of these structures and the semantics for comparing device IDs are
-completely bus-specific. Defining them as bus-specific entities would
-sacrifice type-safety, so we keep bus-specific structures around. 
-
-Bus-specific drivers should include a generic struct device_driver in
-the definition of the bus-specific driver. Like this:
-
-struct pci_driver {
-       const struct pci_device_id *id_table;
-       struct device_driver	  driver;
-};
-
-A definition that included bus-specific fields would look like
-(using the eepro100 driver again):
-
-static struct pci_driver eepro100_driver = {
-       .id_table       = eepro100_pci_tbl,
-       .driver	       = {
-		.name		= "eepro100",
-		.bus		= &pci_bus_type,
-		.probe		= eepro100_probe,
-		.remove		= eepro100_remove,
-		.suspend	= eepro100_suspend,
-		.resume		= eepro100_resume,
-       },
-};
-
-Some may find the syntax of embedded struct initialization awkward or
-even a bit ugly. So far, it's the best way we've found to do what we want...
-
-Registration
-~~~~~~~~~~~~
-
-int driver_register(struct device_driver * drv);
-
-The driver registers the structure on startup. For drivers that have
-no bus-specific fields (i.e. don't have a bus-specific driver
-structure), they would use driver_register and pass a pointer to their
-struct device_driver object. 
-
-Most drivers, however, will have a bus-specific structure and will
-need to register with the bus using something like pci_driver_register.
-
-It is important that drivers register their driver structure as early as
-possible. Registration with the core initializes several fields in the
-struct device_driver object, including the reference count and the
-lock. These fields are assumed to be valid at all times and may be
-used by the device model core or the bus driver.
-
-
-Transition Bus Drivers
-~~~~~~~~~~~~~~~~~~~~~~
-
-By defining wrapper functions, the transition to the new model can be
-made easier. Drivers can ignore the generic structure altogether and
-let the bus wrapper fill in the fields. For the callbacks, the bus can
-define generic callbacks that forward the call to the bus-specific
-callbacks of the drivers. 
-
-This solution is intended to be only temporary. In order to get class
-information in the driver, the drivers must be modified anyway. Since
-converting drivers to the new model should reduce some infrastructural
-complexity and code size, it is recommended that they are converted as
-class information is added.
-
-Access
-~~~~~~
-
-Once the object has been registered, it may access the common fields of
-the object, like the lock and the list of devices. 
-
-int driver_for_each_dev(struct device_driver * drv, void * data, 
-		        int (*callback)(struct device * dev, void * data));
-
-The devices field is a list of all the devices that have been bound to
-the driver. The LDM core provides a helper function to operate on all
-the devices a driver controls. This helper locks the driver on each
-node access, and does proper reference counting on each device as it
-accesses it. 
-
-
-sysfs
-~~~~~
-
-When a driver is registered, a sysfs directory is created in its
-bus's directory. In this directory, the driver can export an interface
-to userspace to control operation of the driver on a global basis;
-e.g. toggling debugging output in the driver.
-
-A future feature of this directory will be a 'devices' directory. This
-directory will contain symlinks to the directories of devices it
-supports.
-
-
-
-Callbacks
-~~~~~~~~~
-
-	int	(*probe)	(struct device * dev);
-
-The probe() entry is called in task context, with the bus's rwsem locked
-and the driver partially bound to the device.  Drivers commonly use
-container_of() to convert "dev" to a bus-specific type, both in probe()
-and other routines.  That type often provides device resource data, such
-as pci_dev.resource[] or platform_device.resources, which is used in
-addition to dev->platform_data to initialize the driver.
-
-This callback holds the driver-specific logic to bind the driver to a
-given device.  That includes verifying that the device is present, that
-it's a version the driver can handle, that driver data structures can
-be allocated and initialized, and that any hardware can be initialized.
-Drivers often store a pointer to their state with dev_set_drvdata().
-When the driver has successfully bound itself to that device, then probe()
-returns zero and the driver model code will finish its part of binding
-the driver to that device.
-
-A driver's probe() may return a negative errno value to indicate that
-the driver did not bind to this device, in which case it should have
-released all resources it allocated.
-
-	int 	(*remove)	(struct device * dev);
-
-remove is called to unbind a driver from a device. This may be
-called if a device is physically removed from the system, if the
-driver module is being unloaded, during a reboot sequence, or
-in other cases.
-
-It is up to the driver to determine if the device is present or
-not. It should free any resources allocated specifically for the
-device; i.e. anything in the device's driver_data field. 
-
-If the device is still present, it should quiesce the device and place
-it into a supported low-power state.
-
-	int	(*suspend)	(struct device * dev, pm_message_t state);
-
-suspend is called to put the device in a low power state.
-
-	int	(*resume)	(struct device * dev);
-
-Resume is used to bring a device back from a low power state.
-
-
-Attributes
-~~~~~~~~~~
-struct driver_attribute {
-        struct attribute        attr;
-        ssize_t (*show)(struct device_driver *driver, char *buf);
-        ssize_t (*store)(struct device_driver *, const char * buf, size_t count);
-};
-
-Device drivers can export attributes via their sysfs directories. 
-Drivers can declare attributes using a DRIVER_ATTR_RW and DRIVER_ATTR_RO
-macro that works identically to the DEVICE_ATTR_RW and DEVICE_ATTR_RO
-macros.
-
-Example:
-
-DRIVER_ATTR_RW(debug);
-
-This is equivalent to declaring:
-
-struct driver_attribute driver_attr_debug;
-
-This can then be used to add and remove the attribute from the
-driver's directory using:
-
-int driver_create_file(struct device_driver *, const struct driver_attribute *);
-void driver_remove_file(struct device_driver *, const struct driver_attribute *);
diff --git a/Documentation/driver-model/index.rst b/Documentation/driver-model/index.rst
new file mode 100644
index 000000000000..9f85d579ce56
--- /dev/null
+++ b/Documentation/driver-model/index.rst
@@ -0,0 +1,26 @@
+:orphan:
+
+============
+Driver Model
+============
+
+.. toctree::
+   :maxdepth: 1
+
+   binding
+   bus
+   class
+   design-patterns
+   device
+   devres
+   driver
+   overview
+   platform
+   porting
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/driver-model/overview.rst b/Documentation/driver-model/overview.rst
new file mode 100644
index 000000000000..d4d1e9b40e0c
--- /dev/null
+++ b/Documentation/driver-model/overview.rst
@@ -0,0 +1,124 @@
+=============================
+The Linux Kernel Device Model
+=============================
+
+Patrick Mochel	<mochel@digitalimplant.org>
+
+Drafted 26 August 2002
+Updated 31 January 2006
+
+
+Overview
+~~~~~~~~
+
+The Linux Kernel Driver Model is a unification of all the disparate driver
+models that were previously used in the kernel. It is intended to augment the
+bus-specific drivers for bridges and devices by consolidating a set of data
+and operations into globally accessible data structures.
+
+Traditional driver models implemented some sort of tree-like structure
+(sometimes just a list) for the devices they control. There wasn't any
+uniformity across the different bus types.
+
+The current driver model provides a common, uniform data model for describing
+a bus and the devices that can appear under the bus. The unified bus
+model includes a set of common attributes which all busses carry, and a set
+of common callbacks, such as device discovery during bus probing, bus
+shutdown, bus power management, etc.
+
+The common device and bridge interface reflects the goals of the modern
+computer: namely the ability to do seamless device "plug and play", power
+management, and hot plug. In particular, the model dictated by Intel and
+Microsoft (namely ACPI) ensures that almost every device on almost any bus
+on an x86-compatible system can work within this paradigm.  Of course,
+not every bus is able to support all such operations, although most
+buses support most of those operations.
+
+
+Downstream Access
+~~~~~~~~~~~~~~~~~
+
+Common data fields have been moved out of individual bus layers into a common
+data structure. These fields must still be accessed by the bus layers,
+and sometimes by the device-specific drivers.
+
+Other bus layers are encouraged to do what has been done for the PCI layer.
+struct pci_dev now looks like this::
+
+  struct pci_dev {
+	...
+
+	struct device dev;     /* Generic device interface */
+	...
+  };
+
+Note first that the struct device dev within the struct pci_dev is
+statically allocated. This means only one allocation on device discovery.
+
+Note also that that struct device dev is not necessarily defined at the
+front of the pci_dev structure.  This is to make people think about what
+they're doing when switching between the bus driver and the global driver,
+and to discourage meaningless and incorrect casts between the two.
+
+The PCI bus layer freely accesses the fields of struct device. It knows about
+the structure of struct pci_dev, and it should know the structure of struct
+device. Individual PCI device drivers that have been converted to the current
+driver model generally do not and should not touch the fields of struct device,
+unless there is a compelling reason to do so.
+
+The above abstraction prevents unnecessary pain during transitional phases.
+If it were not done this way, then when a field was renamed or removed, every
+downstream driver would break.  On the other hand, if only the bus layer
+(and not the device layer) accesses the struct device, it is only the bus
+layer that needs to change.
+
+
+User Interface
+~~~~~~~~~~~~~~
+
+By virtue of having a complete hierarchical view of all the devices in the
+system, exporting a complete hierarchical view to userspace becomes relatively
+easy. This has been accomplished by implementing a special purpose virtual
+file system named sysfs.
+
+Almost all mainstream Linux distros mount this filesystem automatically; you
+can see some variation of the following in the output of the "mount" command::
+
+  $ mount
+  ...
+  none on /sys type sysfs (rw,noexec,nosuid,nodev)
+  ...
+  $
+
+The auto-mounting of sysfs is typically accomplished by an entry similar to
+the following in the /etc/fstab file::
+
+  none     	/sys	sysfs    defaults	  	0 0
+
+or something similar in the /lib/init/fstab file on Debian-based systems::
+
+  none            /sys    sysfs    nodev,noexec,nosuid    0 0
+
+If sysfs is not automatically mounted, you can always do it manually with::
+
+	# mount -t sysfs sysfs /sys
+
+Whenever a device is inserted into the tree, a directory is created for it.
+This directory may be populated at each layer of discovery - the global layer,
+the bus layer, or the device layer.
+
+The global layer currently creates two files - 'name' and 'power'. The
+former only reports the name of the device. The latter reports the
+current power state of the device. It will also be used to set the current
+power state.
+
+The bus layer may also create files for the devices it finds while probing the
+bus. For example, the PCI layer currently creates 'irq' and 'resource' files
+for each PCI device.
+
+A device-specific driver may also export files in its directory to expose
+device-specific data or tunable interfaces.
+
+More information about the sysfs directory layout can be found in
+the other documents in this directory and in the file
+Documentation/filesystems/sysfs.txt.
diff --git a/Documentation/driver-model/overview.txt b/Documentation/driver-model/overview.txt
deleted file mode 100644
index 6a8f9a8075d8..000000000000
--- a/Documentation/driver-model/overview.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-The Linux Kernel Device Model
-
-Patrick Mochel	<mochel@digitalimplant.org>
-
-Drafted 26 August 2002
-Updated 31 January 2006
-
-
-Overview
-~~~~~~~~
-
-The Linux Kernel Driver Model is a unification of all the disparate driver
-models that were previously used in the kernel. It is intended to augment the
-bus-specific drivers for bridges and devices by consolidating a set of data
-and operations into globally accessible data structures.
-
-Traditional driver models implemented some sort of tree-like structure
-(sometimes just a list) for the devices they control. There wasn't any
-uniformity across the different bus types.
-
-The current driver model provides a common, uniform data model for describing
-a bus and the devices that can appear under the bus. The unified bus
-model includes a set of common attributes which all busses carry, and a set
-of common callbacks, such as device discovery during bus probing, bus
-shutdown, bus power management, etc.
-
-The common device and bridge interface reflects the goals of the modern
-computer: namely the ability to do seamless device "plug and play", power
-management, and hot plug. In particular, the model dictated by Intel and
-Microsoft (namely ACPI) ensures that almost every device on almost any bus
-on an x86-compatible system can work within this paradigm.  Of course,
-not every bus is able to support all such operations, although most
-buses support most of those operations.
-
-
-Downstream Access
-~~~~~~~~~~~~~~~~~
-
-Common data fields have been moved out of individual bus layers into a common
-data structure. These fields must still be accessed by the bus layers,
-and sometimes by the device-specific drivers.
-
-Other bus layers are encouraged to do what has been done for the PCI layer.
-struct pci_dev now looks like this:
-
-struct pci_dev {
-	...
-
-	struct device dev;     /* Generic device interface */
-	...
-};
-
-Note first that the struct device dev within the struct pci_dev is
-statically allocated. This means only one allocation on device discovery.
-
-Note also that that struct device dev is not necessarily defined at the
-front of the pci_dev structure.  This is to make people think about what
-they're doing when switching between the bus driver and the global driver,
-and to discourage meaningless and incorrect casts between the two.
-
-The PCI bus layer freely accesses the fields of struct device. It knows about
-the structure of struct pci_dev, and it should know the structure of struct
-device. Individual PCI device drivers that have been converted to the current
-driver model generally do not and should not touch the fields of struct device,
-unless there is a compelling reason to do so.
-
-The above abstraction prevents unnecessary pain during transitional phases.
-If it were not done this way, then when a field was renamed or removed, every
-downstream driver would break.  On the other hand, if only the bus layer
-(and not the device layer) accesses the struct device, it is only the bus
-layer that needs to change.
-
-
-User Interface
-~~~~~~~~~~~~~~
-
-By virtue of having a complete hierarchical view of all the devices in the
-system, exporting a complete hierarchical view to userspace becomes relatively
-easy. This has been accomplished by implementing a special purpose virtual
-file system named sysfs.
-
-Almost all mainstream Linux distros mount this filesystem automatically; you
-can see some variation of the following in the output of the "mount" command:
-
-$ mount
-...
-none on /sys type sysfs (rw,noexec,nosuid,nodev)
-...
-$
-
-The auto-mounting of sysfs is typically accomplished by an entry similar to
-the following in the /etc/fstab file:
-
-none     	/sys	sysfs    defaults	  	0 0
-
-or something similar in the /lib/init/fstab file on Debian-based systems:
-
-none            /sys    sysfs    nodev,noexec,nosuid    0 0
-
-If sysfs is not automatically mounted, you can always do it manually with:
-
-# mount -t sysfs sysfs /sys
-
-Whenever a device is inserted into the tree, a directory is created for it.
-This directory may be populated at each layer of discovery - the global layer,
-the bus layer, or the device layer.
-
-The global layer currently creates two files - 'name' and 'power'. The
-former only reports the name of the device. The latter reports the
-current power state of the device. It will also be used to set the current
-power state. 
-
-The bus layer may also create files for the devices it finds while probing the
-bus. For example, the PCI layer currently creates 'irq' and 'resource' files
-for each PCI device.
-
-A device-specific driver may also export files in its directory to expose
-device-specific data or tunable interfaces.
-
-More information about the sysfs directory layout can be found in
-the other documents in this directory and in the file 
-Documentation/filesystems/sysfs.txt.
-
diff --git a/Documentation/driver-model/platform.rst b/Documentation/driver-model/platform.rst
new file mode 100644
index 000000000000..334dd4071ae4
--- /dev/null
+++ b/Documentation/driver-model/platform.rst
@@ -0,0 +1,246 @@
+============================
+Platform Devices and Drivers
+============================
+
+See <linux/platform_device.h> for the driver model interface to the
+platform bus:  platform_device, and platform_driver.  This pseudo-bus
+is used to connect devices on busses with minimal infrastructure,
+like those used to integrate peripherals on many system-on-chip
+processors, or some "legacy" PC interconnects; as opposed to large
+formally specified ones like PCI or USB.
+
+
+Platform devices
+~~~~~~~~~~~~~~~~
+Platform devices are devices that typically appear as autonomous
+entities in the system. This includes legacy port-based devices and
+host bridges to peripheral buses, and most controllers integrated
+into system-on-chip platforms.  What they usually have in common
+is direct addressing from a CPU bus.  Rarely, a platform_device will
+be connected through a segment of some other kind of bus; but its
+registers will still be directly addressable.
+
+Platform devices are given a name, used in driver binding, and a
+list of resources such as addresses and IRQs::
+
+  struct platform_device {
+	const char	*name;
+	u32		id;
+	struct device	dev;
+	u32		num_resources;
+	struct resource	*resource;
+  };
+
+
+Platform drivers
+~~~~~~~~~~~~~~~~
+Platform drivers follow the standard driver model convention, where
+discovery/enumeration is handled outside the drivers, and drivers
+provide probe() and remove() methods.  They support power management
+and shutdown notifications using the standard conventions::
+
+  struct platform_driver {
+	int (*probe)(struct platform_device *);
+	int (*remove)(struct platform_device *);
+	void (*shutdown)(struct platform_device *);
+	int (*suspend)(struct platform_device *, pm_message_t state);
+	int (*suspend_late)(struct platform_device *, pm_message_t state);
+	int (*resume_early)(struct platform_device *);
+	int (*resume)(struct platform_device *);
+	struct device_driver driver;
+  };
+
+Note that probe() should in general verify that the specified device hardware
+actually exists; sometimes platform setup code can't be sure.  The probing
+can use device resources, including clocks, and device platform_data.
+
+Platform drivers register themselves the normal way::
+
+	int platform_driver_register(struct platform_driver *drv);
+
+Or, in common situations where the device is known not to be hot-pluggable,
+the probe() routine can live in an init section to reduce the driver's
+runtime memory footprint::
+
+	int platform_driver_probe(struct platform_driver *drv,
+			  int (*probe)(struct platform_device *))
+
+Kernel modules can be composed of several platform drivers. The platform core
+provides helpers to register and unregister an array of drivers::
+
+	int __platform_register_drivers(struct platform_driver * const *drivers,
+				      unsigned int count, struct module *owner);
+	void platform_unregister_drivers(struct platform_driver * const *drivers,
+					 unsigned int count);
+
+If one of the drivers fails to register, all drivers registered up to that
+point will be unregistered in reverse order. Note that there is a convenience
+macro that passes THIS_MODULE as owner parameter::
+
+	#define platform_register_drivers(drivers, count)
+
+
+Device Enumeration
+~~~~~~~~~~~~~~~~~~
+As a rule, platform specific (and often board-specific) setup code will
+register platform devices::
+
+	int platform_device_register(struct platform_device *pdev);
+
+	int platform_add_devices(struct platform_device **pdevs, int ndev);
+
+The general rule is to register only those devices that actually exist,
+but in some cases extra devices might be registered.  For example, a kernel
+might be configured to work with an external network adapter that might not
+be populated on all boards, or likewise to work with an integrated controller
+that some boards might not hook up to any peripherals.
+
+In some cases, boot firmware will export tables describing the devices
+that are populated on a given board.   Without such tables, often the
+only way for system setup code to set up the correct devices is to build
+a kernel for a specific target board.  Such board-specific kernels are
+common with embedded and custom systems development.
+
+In many cases, the memory and IRQ resources associated with the platform
+device are not enough to let the device's driver work.  Board setup code
+will often provide additional information using the device's platform_data
+field to hold additional information.
+
+Embedded systems frequently need one or more clocks for platform devices,
+which are normally kept off until they're actively needed (to save power).
+System setup also associates those clocks with the device, so that that
+calls to clk_get(&pdev->dev, clock_name) return them as needed.
+
+
+Legacy Drivers:  Device Probing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Some drivers are not fully converted to the driver model, because they take
+on a non-driver role:  the driver registers its platform device, rather than
+leaving that for system infrastructure.  Such drivers can't be hotplugged
+or coldplugged, since those mechanisms require device creation to be in a
+different system component than the driver.
+
+The only "good" reason for this is to handle older system designs which, like
+original IBM PCs, rely on error-prone "probe-the-hardware" models for hardware
+configuration.  Newer systems have largely abandoned that model, in favor of
+bus-level support for dynamic configuration (PCI, USB), or device tables
+provided by the boot firmware (e.g. PNPACPI on x86).  There are too many
+conflicting options about what might be where, and even educated guesses by
+an operating system will be wrong often enough to make trouble.
+
+This style of driver is discouraged.  If you're updating such a driver,
+please try to move the device enumeration to a more appropriate location,
+outside the driver.  This will usually be cleanup, since such drivers
+tend to already have "normal" modes, such as ones using device nodes that
+were created by PNP or by platform device setup.
+
+None the less, there are some APIs to support such legacy drivers.  Avoid
+using these calls except with such hotplug-deficient drivers::
+
+	struct platform_device *platform_device_alloc(
+			const char *name, int id);
+
+You can use platform_device_alloc() to dynamically allocate a device, which
+you will then initialize with resources and platform_device_register().
+A better solution is usually::
+
+	struct platform_device *platform_device_register_simple(
+			const char *name, int id,
+			struct resource *res, unsigned int nres);
+
+You can use platform_device_register_simple() as a one-step call to allocate
+and register a device.
+
+
+Device Naming and Driver Binding
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The platform_device.dev.bus_id is the canonical name for the devices.
+It's built from two components:
+
+    * platform_device.name ... which is also used to for driver matching.
+
+    * platform_device.id ... the device instance number, or else "-1"
+      to indicate there's only one.
+
+These are concatenated, so name/id "serial"/0 indicates bus_id "serial.0", and
+"serial/3" indicates bus_id "serial.3"; both would use the platform_driver
+named "serial".  While "my_rtc"/-1 would be bus_id "my_rtc" (no instance id)
+and use the platform_driver called "my_rtc".
+
+Driver binding is performed automatically by the driver core, invoking
+driver probe() after finding a match between device and driver.  If the
+probe() succeeds, the driver and device are bound as usual.  There are
+three different ways to find such a match:
+
+    - Whenever a device is registered, the drivers for that bus are
+      checked for matches.  Platform devices should be registered very
+      early during system boot.
+
+    - When a driver is registered using platform_driver_register(), all
+      unbound devices on that bus are checked for matches.  Drivers
+      usually register later during booting, or by module loading.
+
+    - Registering a driver using platform_driver_probe() works just like
+      using platform_driver_register(), except that the driver won't
+      be probed later if another device registers.  (Which is OK, since
+      this interface is only for use with non-hotpluggable devices.)
+
+
+Early Platform Devices and Drivers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The early platform interfaces provide platform data to platform device
+drivers early on during the system boot. The code is built on top of the
+early_param() command line parsing and can be executed very early on.
+
+Example: "earlyprintk" class early serial console in 6 steps
+
+1. Registering early platform device data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The architecture code registers platform device data using the function
+early_platform_add_devices(). In the case of early serial console this
+should be hardware configuration for the serial port. Devices registered
+at this point will later on be matched against early platform drivers.
+
+2. Parsing kernel command line
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The architecture code calls parse_early_param() to parse the kernel
+command line. This will execute all matching early_param() callbacks.
+User specified early platform devices will be registered at this point.
+For the early serial console case the user can specify port on the
+kernel command line as "earlyprintk=serial.0" where "earlyprintk" is
+the class string, "serial" is the name of the platform driver and
+0 is the platform device id. If the id is -1 then the dot and the
+id can be omitted.
+
+3. Installing early platform drivers belonging to a certain class
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The architecture code may optionally force registration of all early
+platform drivers belonging to a certain class using the function
+early_platform_driver_register_all(). User specified devices from
+step 2 have priority over these. This step is omitted by the serial
+driver example since the early serial driver code should be disabled
+unless the user has specified port on the kernel command line.
+
+4. Early platform driver registration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Compiled-in platform drivers making use of early_platform_init() are
+automatically registered during step 2 or 3. The serial driver example
+should use early_platform_init("earlyprintk", &platform_driver).
+
+5. Probing of early platform drivers belonging to a certain class
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The architecture code calls early_platform_driver_probe() to match
+registered early platform devices associated with a certain class with
+registered early platform drivers. Matched devices will get probed().
+This step can be executed at any point during the early boot. As soon
+as possible may be good for the serial port case.
+
+6. Inside the early platform driver probe()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The driver code needs to take special care during early boot, especially
+when it comes to memory allocation and interrupt registration. The code
+in the probe() function can use is_early_platform_device() to check if
+it is called at early platform device or at the regular platform device
+time. The early serial driver performs register_console() at this point.
+
+For further information, see <linux/platform_device.h>.
diff --git a/Documentation/driver-model/platform.txt b/Documentation/driver-model/platform.txt
deleted file mode 100644
index 9d9e47dfc013..000000000000
--- a/Documentation/driver-model/platform.txt
+++ /dev/null
@@ -1,244 +0,0 @@
-Platform Devices and Drivers
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-See <linux/platform_device.h> for the driver model interface to the
-platform bus:  platform_device, and platform_driver.  This pseudo-bus
-is used to connect devices on busses with minimal infrastructure,
-like those used to integrate peripherals on many system-on-chip
-processors, or some "legacy" PC interconnects; as opposed to large
-formally specified ones like PCI or USB.
-
-
-Platform devices
-~~~~~~~~~~~~~~~~
-Platform devices are devices that typically appear as autonomous
-entities in the system. This includes legacy port-based devices and
-host bridges to peripheral buses, and most controllers integrated
-into system-on-chip platforms.  What they usually have in common
-is direct addressing from a CPU bus.  Rarely, a platform_device will
-be connected through a segment of some other kind of bus; but its
-registers will still be directly addressable.
-
-Platform devices are given a name, used in driver binding, and a
-list of resources such as addresses and IRQs.
-
-struct platform_device {
-	const char	*name;
-	u32		id;
-	struct device	dev;
-	u32		num_resources;
-	struct resource	*resource;
-};
-
-
-Platform drivers
-~~~~~~~~~~~~~~~~
-Platform drivers follow the standard driver model convention, where
-discovery/enumeration is handled outside the drivers, and drivers
-provide probe() and remove() methods.  They support power management
-and shutdown notifications using the standard conventions.
-
-struct platform_driver {
-	int (*probe)(struct platform_device *);
-	int (*remove)(struct platform_device *);
-	void (*shutdown)(struct platform_device *);
-	int (*suspend)(struct platform_device *, pm_message_t state);
-	int (*suspend_late)(struct platform_device *, pm_message_t state);
-	int (*resume_early)(struct platform_device *);
-	int (*resume)(struct platform_device *);
-	struct device_driver driver;
-};
-
-Note that probe() should in general verify that the specified device hardware
-actually exists; sometimes platform setup code can't be sure.  The probing
-can use device resources, including clocks, and device platform_data.
-
-Platform drivers register themselves the normal way:
-
-	int platform_driver_register(struct platform_driver *drv);
-
-Or, in common situations where the device is known not to be hot-pluggable,
-the probe() routine can live in an init section to reduce the driver's
-runtime memory footprint:
-
-	int platform_driver_probe(struct platform_driver *drv,
-			  int (*probe)(struct platform_device *))
-
-Kernel modules can be composed of several platform drivers. The platform core
-provides helpers to register and unregister an array of drivers:
-
-	int __platform_register_drivers(struct platform_driver * const *drivers,
-				      unsigned int count, struct module *owner);
-	void platform_unregister_drivers(struct platform_driver * const *drivers,
-					 unsigned int count);
-
-If one of the drivers fails to register, all drivers registered up to that
-point will be unregistered in reverse order. Note that there is a convenience
-macro that passes THIS_MODULE as owner parameter:
-
-	#define platform_register_drivers(drivers, count)
-
-
-Device Enumeration
-~~~~~~~~~~~~~~~~~~
-As a rule, platform specific (and often board-specific) setup code will
-register platform devices:
-
-	int platform_device_register(struct platform_device *pdev);
-
-	int platform_add_devices(struct platform_device **pdevs, int ndev);
-
-The general rule is to register only those devices that actually exist,
-but in some cases extra devices might be registered.  For example, a kernel
-might be configured to work with an external network adapter that might not
-be populated on all boards, or likewise to work with an integrated controller
-that some boards might not hook up to any peripherals.
-
-In some cases, boot firmware will export tables describing the devices
-that are populated on a given board.   Without such tables, often the
-only way for system setup code to set up the correct devices is to build
-a kernel for a specific target board.  Such board-specific kernels are
-common with embedded and custom systems development.
-
-In many cases, the memory and IRQ resources associated with the platform
-device are not enough to let the device's driver work.  Board setup code
-will often provide additional information using the device's platform_data
-field to hold additional information.
-
-Embedded systems frequently need one or more clocks for platform devices,
-which are normally kept off until they're actively needed (to save power).
-System setup also associates those clocks with the device, so that that
-calls to clk_get(&pdev->dev, clock_name) return them as needed.
-
-
-Legacy Drivers:  Device Probing
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Some drivers are not fully converted to the driver model, because they take
-on a non-driver role:  the driver registers its platform device, rather than
-leaving that for system infrastructure.  Such drivers can't be hotplugged
-or coldplugged, since those mechanisms require device creation to be in a
-different system component than the driver.
-
-The only "good" reason for this is to handle older system designs which, like
-original IBM PCs, rely on error-prone "probe-the-hardware" models for hardware
-configuration.  Newer systems have largely abandoned that model, in favor of
-bus-level support for dynamic configuration (PCI, USB), or device tables
-provided by the boot firmware (e.g. PNPACPI on x86).  There are too many
-conflicting options about what might be where, and even educated guesses by
-an operating system will be wrong often enough to make trouble.
-
-This style of driver is discouraged.  If you're updating such a driver,
-please try to move the device enumeration to a more appropriate location,
-outside the driver.  This will usually be cleanup, since such drivers
-tend to already have "normal" modes, such as ones using device nodes that
-were created by PNP or by platform device setup.
-
-None the less, there are some APIs to support such legacy drivers.  Avoid
-using these calls except with such hotplug-deficient drivers.
-
-	struct platform_device *platform_device_alloc(
-			const char *name, int id);
-
-You can use platform_device_alloc() to dynamically allocate a device, which
-you will then initialize with resources and platform_device_register().
-A better solution is usually:
-
-	struct platform_device *platform_device_register_simple(
-			const char *name, int id,
-			struct resource *res, unsigned int nres);
-
-You can use platform_device_register_simple() as a one-step call to allocate
-and register a device.
-
-
-Device Naming and Driver Binding
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The platform_device.dev.bus_id is the canonical name for the devices.
-It's built from two components:
-
-    * platform_device.name ... which is also used to for driver matching.
-
-    * platform_device.id ... the device instance number, or else "-1"
-      to indicate there's only one.
-
-These are concatenated, so name/id "serial"/0 indicates bus_id "serial.0", and
-"serial/3" indicates bus_id "serial.3"; both would use the platform_driver
-named "serial".  While "my_rtc"/-1 would be bus_id "my_rtc" (no instance id)
-and use the platform_driver called "my_rtc".
-
-Driver binding is performed automatically by the driver core, invoking
-driver probe() after finding a match between device and driver.  If the
-probe() succeeds, the driver and device are bound as usual.  There are
-three different ways to find such a match:
-
-    - Whenever a device is registered, the drivers for that bus are
-      checked for matches.  Platform devices should be registered very
-      early during system boot.
-
-    - When a driver is registered using platform_driver_register(), all
-      unbound devices on that bus are checked for matches.  Drivers
-      usually register later during booting, or by module loading.
-
-    - Registering a driver using platform_driver_probe() works just like
-      using platform_driver_register(), except that the driver won't
-      be probed later if another device registers.  (Which is OK, since
-      this interface is only for use with non-hotpluggable devices.)
-
-
-Early Platform Devices and Drivers
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The early platform interfaces provide platform data to platform device
-drivers early on during the system boot. The code is built on top of the
-early_param() command line parsing and can be executed very early on.
-
-Example: "earlyprintk" class early serial console in 6 steps
-
-1. Registering early platform device data
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The architecture code registers platform device data using the function
-early_platform_add_devices(). In the case of early serial console this
-should be hardware configuration for the serial port. Devices registered
-at this point will later on be matched against early platform drivers.
-
-2. Parsing kernel command line
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The architecture code calls parse_early_param() to parse the kernel
-command line. This will execute all matching early_param() callbacks.
-User specified early platform devices will be registered at this point.
-For the early serial console case the user can specify port on the
-kernel command line as "earlyprintk=serial.0" where "earlyprintk" is
-the class string, "serial" is the name of the platform driver and
-0 is the platform device id. If the id is -1 then the dot and the
-id can be omitted.
-
-3. Installing early platform drivers belonging to a certain class
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The architecture code may optionally force registration of all early
-platform drivers belonging to a certain class using the function
-early_platform_driver_register_all(). User specified devices from
-step 2 have priority over these. This step is omitted by the serial
-driver example since the early serial driver code should be disabled
-unless the user has specified port on the kernel command line.
-
-4. Early platform driver registration
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Compiled-in platform drivers making use of early_platform_init() are
-automatically registered during step 2 or 3. The serial driver example
-should use early_platform_init("earlyprintk", &platform_driver).
-
-5. Probing of early platform drivers belonging to a certain class
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The architecture code calls early_platform_driver_probe() to match
-registered early platform devices associated with a certain class with
-registered early platform drivers. Matched devices will get probed().
-This step can be executed at any point during the early boot. As soon
-as possible may be good for the serial port case.
-
-6. Inside the early platform driver probe()
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The driver code needs to take special care during early boot, especially
-when it comes to memory allocation and interrupt registration. The code
-in the probe() function can use is_early_platform_device() to check if
-it is called at early platform device or at the regular platform device
-time. The early serial driver performs register_console() at this point.
-
-For further information, see <linux/platform_device.h>.
diff --git a/Documentation/driver-model/porting.rst b/Documentation/driver-model/porting.rst
new file mode 100644
index 000000000000..ae4bf843c1d6
--- /dev/null
+++ b/Documentation/driver-model/porting.rst
@@ -0,0 +1,448 @@
+=======================================
+Porting Drivers to the New Driver Model
+=======================================
+
+Patrick Mochel
+
+7 January 2003
+
+
+Overview
+
+Please refer to `Documentation/driver-model/*.rst` for definitions of
+various driver types and concepts.
+
+Most of the work of porting devices drivers to the new model happens
+at the bus driver layer. This was intentional, to minimize the
+negative effect on kernel drivers, and to allow a gradual transition
+of bus drivers.
+
+In a nutshell, the driver model consists of a set of objects that can
+be embedded in larger, bus-specific objects. Fields in these generic
+objects can replace fields in the bus-specific objects.
+
+The generic objects must be registered with the driver model core. By
+doing so, they will exported via the sysfs filesystem. sysfs can be
+mounted by doing::
+
+	# mount -t sysfs sysfs /sys
+
+
+
+The Process
+
+Step 0: Read include/linux/device.h for object and function definitions.
+
+Step 1: Registering the bus driver.
+
+
+- Define a struct bus_type for the bus driver::
+
+    struct bus_type pci_bus_type = {
+          .name           = "pci",
+    };
+
+
+- Register the bus type.
+
+  This should be done in the initialization function for the bus type,
+  which is usually the module_init(), or equivalent, function::
+
+    static int __init pci_driver_init(void)
+    {
+            return bus_register(&pci_bus_type);
+    }
+
+    subsys_initcall(pci_driver_init);
+
+
+  The bus type may be unregistered (if the bus driver may be compiled
+  as a module) by doing::
+
+     bus_unregister(&pci_bus_type);
+
+
+- Export the bus type for others to use.
+
+  Other code may wish to reference the bus type, so declare it in a
+  shared header file and export the symbol.
+
+From include/linux/pci.h::
+
+  extern struct bus_type pci_bus_type;
+
+
+From file the above code appears in::
+
+  EXPORT_SYMBOL(pci_bus_type);
+
+
+
+- This will cause the bus to show up in /sys/bus/pci/ with two
+  subdirectories: 'devices' and 'drivers'::
+
+    # tree -d /sys/bus/pci/
+    /sys/bus/pci/
+    |-- devices
+    `-- drivers
+
+
+
+Step 2: Registering Devices.
+
+struct device represents a single device. It mainly contains metadata
+describing the relationship the device has to other entities.
+
+
+- Embed a struct device in the bus-specific device type::
+
+
+    struct pci_dev {
+           ...
+           struct  device  dev;            /* Generic device interface */
+           ...
+    };
+
+  It is recommended that the generic device not be the first item in
+  the struct to discourage programmers from doing mindless casts
+  between the object types. Instead macros, or inline functions,
+  should be created to convert from the generic object type::
+
+
+    #define to_pci_dev(n) container_of(n, struct pci_dev, dev)
+
+    or
+
+    static inline struct pci_dev * to_pci_dev(struct kobject * kobj)
+    {
+	return container_of(n, struct pci_dev, dev);
+    }
+
+  This allows the compiler to verify type-safety of the operations
+  that are performed (which is Good).
+
+
+- Initialize the device on registration.
+
+  When devices are discovered or registered with the bus type, the
+  bus driver should initialize the generic device. The most important
+  things to initialize are the bus_id, parent, and bus fields.
+
+  The bus_id is an ASCII string that contains the device's address on
+  the bus. The format of this string is bus-specific. This is
+  necessary for representing devices in sysfs.
+
+  parent is the physical parent of the device. It is important that
+  the bus driver sets this field correctly.
+
+  The driver model maintains an ordered list of devices that it uses
+  for power management. This list must be in order to guarantee that
+  devices are shutdown before their physical parents, and vice versa.
+  The order of this list is determined by the parent of registered
+  devices.
+
+  Also, the location of the device's sysfs directory depends on a
+  device's parent. sysfs exports a directory structure that mirrors
+  the device hierarchy. Accurately setting the parent guarantees that
+  sysfs will accurately represent the hierarchy.
+
+  The device's bus field is a pointer to the bus type the device
+  belongs to. This should be set to the bus_type that was declared
+  and initialized before.
+
+  Optionally, the bus driver may set the device's name and release
+  fields.
+
+  The name field is an ASCII string describing the device, like
+
+     "ATI Technologies Inc Radeon QD"
+
+  The release field is a callback that the driver model core calls
+  when the device has been removed, and all references to it have
+  been released. More on this in a moment.
+
+
+- Register the device.
+
+  Once the generic device has been initialized, it can be registered
+  with the driver model core by doing::
+
+       device_register(&dev->dev);
+
+  It can later be unregistered by doing::
+
+       device_unregister(&dev->dev);
+
+  This should happen on buses that support hotpluggable devices.
+  If a bus driver unregisters a device, it should not immediately free
+  it. It should instead wait for the driver model core to call the
+  device's release method, then free the bus-specific object.
+  (There may be other code that is currently referencing the device
+  structure, and it would be rude to free the device while that is
+  happening).
+
+
+  When the device is registered, a directory in sysfs is created.
+  The PCI tree in sysfs looks like::
+
+    /sys/devices/pci0/
+    |-- 00:00.0
+    |-- 00:01.0
+    |   `-- 01:00.0
+    |-- 00:02.0
+    |   `-- 02:1f.0
+    |       `-- 03:00.0
+    |-- 00:1e.0
+    |   `-- 04:04.0
+    |-- 00:1f.0
+    |-- 00:1f.1
+    |   |-- ide0
+    |   |   |-- 0.0
+    |   |   `-- 0.1
+    |   `-- ide1
+    |       `-- 1.0
+    |-- 00:1f.2
+    |-- 00:1f.3
+    `-- 00:1f.5
+
+  Also, symlinks are created in the bus's 'devices' directory
+  that point to the device's directory in the physical hierarchy::
+
+    /sys/bus/pci/devices/
+    |-- 00:00.0 -> ../../../devices/pci0/00:00.0
+    |-- 00:01.0 -> ../../../devices/pci0/00:01.0
+    |-- 00:02.0 -> ../../../devices/pci0/00:02.0
+    |-- 00:1e.0 -> ../../../devices/pci0/00:1e.0
+    |-- 00:1f.0 -> ../../../devices/pci0/00:1f.0
+    |-- 00:1f.1 -> ../../../devices/pci0/00:1f.1
+    |-- 00:1f.2 -> ../../../devices/pci0/00:1f.2
+    |-- 00:1f.3 -> ../../../devices/pci0/00:1f.3
+    |-- 00:1f.5 -> ../../../devices/pci0/00:1f.5
+    |-- 01:00.0 -> ../../../devices/pci0/00:01.0/01:00.0
+    |-- 02:1f.0 -> ../../../devices/pci0/00:02.0/02:1f.0
+    |-- 03:00.0 -> ../../../devices/pci0/00:02.0/02:1f.0/03:00.0
+    `-- 04:04.0 -> ../../../devices/pci0/00:1e.0/04:04.0
+
+
+
+Step 3: Registering Drivers.
+
+struct device_driver is a simple driver structure that contains a set
+of operations that the driver model core may call.
+
+
+- Embed a struct device_driver in the bus-specific driver.
+
+  Just like with devices, do something like::
+
+    struct pci_driver {
+           ...
+           struct device_driver    driver;
+    };
+
+
+- Initialize the generic driver structure.
+
+  When the driver registers with the bus (e.g. doing pci_register_driver()),
+  initialize the necessary fields of the driver: the name and bus
+  fields.
+
+
+- Register the driver.
+
+  After the generic driver has been initialized, call::
+
+	driver_register(&drv->driver);
+
+  to register the driver with the core.
+
+  When the driver is unregistered from the bus, unregister it from the
+  core by doing::
+
+        driver_unregister(&drv->driver);
+
+  Note that this will block until all references to the driver have
+  gone away. Normally, there will not be any.
+
+
+- Sysfs representation.
+
+  Drivers are exported via sysfs in their bus's 'driver's directory.
+  For example::
+
+    /sys/bus/pci/drivers/
+    |-- 3c59x
+    |-- Ensoniq AudioPCI
+    |-- agpgart-amdk7
+    |-- e100
+    `-- serial
+
+
+Step 4: Define Generic Methods for Drivers.
+
+struct device_driver defines a set of operations that the driver model
+core calls. Most of these operations are probably similar to
+operations the bus already defines for drivers, but taking different
+parameters.
+
+It would be difficult and tedious to force every driver on a bus to
+simultaneously convert their drivers to generic format. Instead, the
+bus driver should define single instances of the generic methods that
+forward call to the bus-specific drivers. For instance::
+
+
+  static int pci_device_remove(struct device * dev)
+  {
+          struct pci_dev * pci_dev = to_pci_dev(dev);
+          struct pci_driver * drv = pci_dev->driver;
+
+          if (drv) {
+                  if (drv->remove)
+                          drv->remove(pci_dev);
+                  pci_dev->driver = NULL;
+          }
+          return 0;
+  }
+
+
+The generic driver should be initialized with these methods before it
+is registered::
+
+        /* initialize common driver fields */
+        drv->driver.name = drv->name;
+        drv->driver.bus = &pci_bus_type;
+        drv->driver.probe = pci_device_probe;
+        drv->driver.resume = pci_device_resume;
+        drv->driver.suspend = pci_device_suspend;
+        drv->driver.remove = pci_device_remove;
+
+        /* register with core */
+        driver_register(&drv->driver);
+
+
+Ideally, the bus should only initialize the fields if they are not
+already set. This allows the drivers to implement their own generic
+methods.
+
+
+Step 5: Support generic driver binding.
+
+The model assumes that a device or driver can be dynamically
+registered with the bus at any time. When registration happens,
+devices must be bound to a driver, or drivers must be bound to all
+devices that it supports.
+
+A driver typically contains a list of device IDs that it supports. The
+bus driver compares these IDs to the IDs of devices registered with it.
+The format of the device IDs, and the semantics for comparing them are
+bus-specific, so the generic model does attempt to generalize them.
+
+Instead, a bus may supply a method in struct bus_type that does the
+comparison::
+
+  int (*match)(struct device * dev, struct device_driver * drv);
+
+match should return positive value if the driver supports the device,
+and zero otherwise. It may also return error code (for example
+-EPROBE_DEFER) if determining that given driver supports the device is
+not possible.
+
+When a device is registered, the bus's list of drivers is iterated
+over. bus->match() is called for each one until a match is found.
+
+When a driver is registered, the bus's list of devices is iterated
+over. bus->match() is called for each device that is not already
+claimed by a driver.
+
+When a device is successfully bound to a driver, device->driver is
+set, the device is added to a per-driver list of devices, and a
+symlink is created in the driver's sysfs directory that points to the
+device's physical directory::
+
+  /sys/bus/pci/drivers/
+  |-- 3c59x
+  |   `-- 00:0b.0 -> ../../../../devices/pci0/00:0b.0
+  |-- Ensoniq AudioPCI
+  |-- agpgart-amdk7
+  |   `-- 00:00.0 -> ../../../../devices/pci0/00:00.0
+  |-- e100
+  |   `-- 00:0c.0 -> ../../../../devices/pci0/00:0c.0
+  `-- serial
+
+
+This driver binding should replace the existing driver binding
+mechanism the bus currently uses.
+
+
+Step 6: Supply a hotplug callback.
+
+Whenever a device is registered with the driver model core, the
+userspace program /sbin/hotplug is called to notify userspace.
+Users can define actions to perform when a device is inserted or
+removed.
+
+The driver model core passes several arguments to userspace via
+environment variables, including
+
+- ACTION: set to 'add' or 'remove'
+- DEVPATH: set to the device's physical path in sysfs.
+
+A bus driver may also supply additional parameters for userspace to
+consume. To do this, a bus must implement the 'hotplug' method in
+struct bus_type::
+
+     int (*hotplug) (struct device *dev, char **envp,
+                     int num_envp, char *buffer, int buffer_size);
+
+This is called immediately before /sbin/hotplug is executed.
+
+
+Step 7: Cleaning up the bus driver.
+
+The generic bus, device, and driver structures provide several fields
+that can replace those defined privately to the bus driver.
+
+- Device list.
+
+struct bus_type contains a list of all devices registered with the bus
+type. This includes all devices on all instances of that bus type.
+An internal list that the bus uses may be removed, in favor of using
+this one.
+
+The core provides an iterator to access these devices::
+
+  int bus_for_each_dev(struct bus_type * bus, struct device * start,
+                       void * data, int (*fn)(struct device *, void *));
+
+
+- Driver list.
+
+struct bus_type also contains a list of all drivers registered with
+it. An internal list of drivers that the bus driver maintains may
+be removed in favor of using the generic one.
+
+The drivers may be iterated over, like devices::
+
+  int bus_for_each_drv(struct bus_type * bus, struct device_driver * start,
+                       void * data, int (*fn)(struct device_driver *, void *));
+
+
+Please see drivers/base/bus.c for more information.
+
+
+- rwsem
+
+struct bus_type contains an rwsem that protects all core accesses to
+the device and driver lists. This can be used by the bus driver
+internally, and should be used when accessing the device or driver
+lists the bus maintains.
+
+
+- Device and driver fields.
+
+Some of the fields in struct device and struct device_driver duplicate
+fields in the bus-specific representations of these objects. Feel free
+to remove the bus-specific ones and favor the generic ones. Note
+though, that this will likely mean fixing up all the drivers that
+reference the bus-specific fields (though those should all be 1-line
+changes).
diff --git a/Documentation/driver-model/porting.txt b/Documentation/driver-model/porting.txt
deleted file mode 100644
index 453053f1661f..000000000000
--- a/Documentation/driver-model/porting.txt
+++ /dev/null
@@ -1,447 +0,0 @@
-
-Porting Drivers to the New Driver Model
-
-Patrick Mochel
-
-7 January 2003
-
-
-Overview
-
-Please refer to Documentation/driver-model/*.txt for definitions of
-various driver types and concepts. 
-
-Most of the work of porting devices drivers to the new model happens
-at the bus driver layer. This was intentional, to minimize the
-negative effect on kernel drivers, and to allow a gradual transition
-of bus drivers.
-
-In a nutshell, the driver model consists of a set of objects that can
-be embedded in larger, bus-specific objects. Fields in these generic
-objects can replace fields in the bus-specific objects. 
-
-The generic objects must be registered with the driver model core. By
-doing so, they will exported via the sysfs filesystem. sysfs can be
-mounted by doing 
-
-	# mount -t sysfs sysfs /sys
-
-
-
-The Process
-
-Step 0: Read include/linux/device.h for object and function definitions. 
-
-Step 1: Registering the bus driver. 
-
-
-- Define a struct bus_type for the bus driver.
-
-struct bus_type pci_bus_type = {
-        .name           = "pci",
-};
-
-
-- Register the bus type.
-  This should be done in the initialization function for the bus type,
-  which is usually the module_init(), or equivalent, function. 
-
-static int __init pci_driver_init(void)
-{
-        return bus_register(&pci_bus_type);
-}
-
-subsys_initcall(pci_driver_init);
-
-
-  The bus type may be unregistered (if the bus driver may be compiled
-  as a module) by doing:
-
-     bus_unregister(&pci_bus_type);
-
-
-- Export the bus type for others to use. 
-
-  Other code may wish to reference the bus type, so declare it in a 
-  shared header file and export the symbol.
-
-From include/linux/pci.h:
-
-extern struct bus_type pci_bus_type;
-
-
-From file the above code appears in:
-
-EXPORT_SYMBOL(pci_bus_type);
-
-
-
-- This will cause the bus to show up in /sys/bus/pci/ with two
-  subdirectories: 'devices' and 'drivers'.
-
-# tree -d /sys/bus/pci/
-/sys/bus/pci/
-|-- devices
-`-- drivers
-
-
-
-Step 2: Registering Devices. 
-
-struct device represents a single device. It mainly contains metadata
-describing the relationship the device has to other entities. 
-
-
-- Embed a struct device in the bus-specific device type. 
-
-
-struct pci_dev {
-       ...
-       struct  device  dev;            /* Generic device interface */
-       ...
-};
-
-  It is recommended that the generic device not be the first item in 
-  the struct to discourage programmers from doing mindless casts
-  between the object types. Instead macros, or inline functions,
-  should be created to convert from the generic object type.
-
-
-#define to_pci_dev(n) container_of(n, struct pci_dev, dev)
-
-or 
-
-static inline struct pci_dev * to_pci_dev(struct kobject * kobj)
-{
-	return container_of(n, struct pci_dev, dev);
-}
-
-  This allows the compiler to verify type-safety of the operations 
-  that are performed (which is Good).
-
-
-- Initialize the device on registration.
-
-  When devices are discovered or registered with the bus type, the 
-  bus driver should initialize the generic device. The most important
-  things to initialize are the bus_id, parent, and bus fields.
-
-  The bus_id is an ASCII string that contains the device's address on
-  the bus. The format of this string is bus-specific. This is
-  necessary for representing devices in sysfs. 
-
-  parent is the physical parent of the device. It is important that
-  the bus driver sets this field correctly. 
-
-  The driver model maintains an ordered list of devices that it uses
-  for power management. This list must be in order to guarantee that
-  devices are shutdown before their physical parents, and vice versa.
-  The order of this list is determined by the parent of registered
-  devices.
-
-  Also, the location of the device's sysfs directory depends on a
-  device's parent. sysfs exports a directory structure that mirrors 
-  the device hierarchy. Accurately setting the parent guarantees that
-  sysfs will accurately represent the hierarchy.
-
-  The device's bus field is a pointer to the bus type the device
-  belongs to. This should be set to the bus_type that was declared
-  and initialized before. 
-
-  Optionally, the bus driver may set the device's name and release
-  fields.
-
-  The name field is an ASCII string describing the device, like
-
-     "ATI Technologies Inc Radeon QD"
-
-  The release field is a callback that the driver model core calls 
-  when the device has been removed, and all references to it have 
-  been released. More on this in a moment.
-
-
-- Register the device. 
-
-  Once the generic device has been initialized, it can be registered
-  with the driver model core by doing:
-
-       device_register(&dev->dev);
-
-  It can later be unregistered by doing: 
-
-       device_unregister(&dev->dev);
-
-  This should happen on buses that support hotpluggable devices. 
-  If a bus driver unregisters a device, it should not immediately free
-  it. It should instead wait for the driver model core to call the 
-  device's release method, then free the bus-specific object. 
-  (There may be other code that is currently referencing the device
-  structure, and it would be rude to free the device while that is 
-  happening).
-
-
-  When the device is registered, a directory in sysfs is created. 
-  The PCI tree in sysfs looks like: 
-
-/sys/devices/pci0/
-|-- 00:00.0
-|-- 00:01.0
-|   `-- 01:00.0
-|-- 00:02.0
-|   `-- 02:1f.0
-|       `-- 03:00.0
-|-- 00:1e.0
-|   `-- 04:04.0
-|-- 00:1f.0
-|-- 00:1f.1
-|   |-- ide0
-|   |   |-- 0.0
-|   |   `-- 0.1
-|   `-- ide1
-|       `-- 1.0
-|-- 00:1f.2
-|-- 00:1f.3
-`-- 00:1f.5
-
-  Also, symlinks are created in the bus's 'devices' directory
-  that point to the device's directory in the physical hierarchy. 
-
-/sys/bus/pci/devices/
-|-- 00:00.0 -> ../../../devices/pci0/00:00.0
-|-- 00:01.0 -> ../../../devices/pci0/00:01.0
-|-- 00:02.0 -> ../../../devices/pci0/00:02.0
-|-- 00:1e.0 -> ../../../devices/pci0/00:1e.0
-|-- 00:1f.0 -> ../../../devices/pci0/00:1f.0
-|-- 00:1f.1 -> ../../../devices/pci0/00:1f.1
-|-- 00:1f.2 -> ../../../devices/pci0/00:1f.2
-|-- 00:1f.3 -> ../../../devices/pci0/00:1f.3
-|-- 00:1f.5 -> ../../../devices/pci0/00:1f.5
-|-- 01:00.0 -> ../../../devices/pci0/00:01.0/01:00.0
-|-- 02:1f.0 -> ../../../devices/pci0/00:02.0/02:1f.0
-|-- 03:00.0 -> ../../../devices/pci0/00:02.0/02:1f.0/03:00.0
-`-- 04:04.0 -> ../../../devices/pci0/00:1e.0/04:04.0
-
-
-
-Step 3: Registering Drivers.
-
-struct device_driver is a simple driver structure that contains a set
-of operations that the driver model core may call. 
-
-
-- Embed a struct device_driver in the bus-specific driver. 
-
-  Just like with devices, do something like:
-
-struct pci_driver {
-       ...
-       struct device_driver    driver;
-};
-
-
-- Initialize the generic driver structure. 
-
-  When the driver registers with the bus (e.g. doing pci_register_driver()),
-  initialize the necessary fields of the driver: the name and bus
-  fields. 
-
-
-- Register the driver.
-
-  After the generic driver has been initialized, call
-
-	driver_register(&drv->driver);
-
-  to register the driver with the core.
-
-  When the driver is unregistered from the bus, unregister it from the
-  core by doing:
-
-        driver_unregister(&drv->driver);
-
-  Note that this will block until all references to the driver have
-  gone away. Normally, there will not be any.
-
-
-- Sysfs representation.
-
-  Drivers are exported via sysfs in their bus's 'driver's directory. 
-  For example:
-
-/sys/bus/pci/drivers/
-|-- 3c59x
-|-- Ensoniq AudioPCI
-|-- agpgart-amdk7
-|-- e100
-`-- serial
-
-
-Step 4: Define Generic Methods for Drivers.
-
-struct device_driver defines a set of operations that the driver model
-core calls. Most of these operations are probably similar to
-operations the bus already defines for drivers, but taking different
-parameters. 
-
-It would be difficult and tedious to force every driver on a bus to
-simultaneously convert their drivers to generic format. Instead, the
-bus driver should define single instances of the generic methods that
-forward call to the bus-specific drivers. For instance: 
-
-
-static int pci_device_remove(struct device * dev)
-{
-        struct pci_dev * pci_dev = to_pci_dev(dev);
-        struct pci_driver * drv = pci_dev->driver;
-
-        if (drv) {
-                if (drv->remove)
-                        drv->remove(pci_dev);
-                pci_dev->driver = NULL;
-        }
-        return 0;
-}
-
-
-The generic driver should be initialized with these methods before it
-is registered. 
-
-        /* initialize common driver fields */
-        drv->driver.name = drv->name;
-        drv->driver.bus = &pci_bus_type;
-        drv->driver.probe = pci_device_probe;
-        drv->driver.resume = pci_device_resume;
-        drv->driver.suspend = pci_device_suspend;
-        drv->driver.remove = pci_device_remove;
-
-        /* register with core */
-        driver_register(&drv->driver);
-
-
-Ideally, the bus should only initialize the fields if they are not
-already set. This allows the drivers to implement their own generic
-methods. 
-
-
-Step 5: Support generic driver binding. 
-
-The model assumes that a device or driver can be dynamically
-registered with the bus at any time. When registration happens,
-devices must be bound to a driver, or drivers must be bound to all
-devices that it supports. 
-
-A driver typically contains a list of device IDs that it supports. The
-bus driver compares these IDs to the IDs of devices registered with it. 
-The format of the device IDs, and the semantics for comparing them are
-bus-specific, so the generic model does attempt to generalize them. 
-
-Instead, a bus may supply a method in struct bus_type that does the
-comparison: 
-
-  int (*match)(struct device * dev, struct device_driver * drv);
-
-match should return positive value if the driver supports the device,
-and zero otherwise. It may also return error code (for example
--EPROBE_DEFER) if determining that given driver supports the device is
-not possible.
-
-When a device is registered, the bus's list of drivers is iterated
-over. bus->match() is called for each one until a match is found. 
-
-When a driver is registered, the bus's list of devices is iterated
-over. bus->match() is called for each device that is not already
-claimed by a driver. 
-
-When a device is successfully bound to a driver, device->driver is
-set, the device is added to a per-driver list of devices, and a
-symlink is created in the driver's sysfs directory that points to the
-device's physical directory:
-
-/sys/bus/pci/drivers/
-|-- 3c59x
-|   `-- 00:0b.0 -> ../../../../devices/pci0/00:0b.0
-|-- Ensoniq AudioPCI
-|-- agpgart-amdk7
-|   `-- 00:00.0 -> ../../../../devices/pci0/00:00.0
-|-- e100
-|   `-- 00:0c.0 -> ../../../../devices/pci0/00:0c.0
-`-- serial
-
-
-This driver binding should replace the existing driver binding
-mechanism the bus currently uses. 
-
-
-Step 6: Supply a hotplug callback.
-
-Whenever a device is registered with the driver model core, the
-userspace program /sbin/hotplug is called to notify userspace. 
-Users can define actions to perform when a device is inserted or
-removed. 
-
-The driver model core passes several arguments to userspace via
-environment variables, including
-
-- ACTION: set to 'add' or 'remove'
-- DEVPATH: set to the device's physical path in sysfs. 
-
-A bus driver may also supply additional parameters for userspace to
-consume. To do this, a bus must implement the 'hotplug' method in
-struct bus_type:
-
-     int (*hotplug) (struct device *dev, char **envp, 
-                     int num_envp, char *buffer, int buffer_size);
-
-This is called immediately before /sbin/hotplug is executed. 
-
-
-Step 7: Cleaning up the bus driver.
-
-The generic bus, device, and driver structures provide several fields
-that can replace those defined privately to the bus driver. 
-
-- Device list.
-
-struct bus_type contains a list of all devices registered with the bus
-type. This includes all devices on all instances of that bus type.
-An internal list that the bus uses may be removed, in favor of using
-this one.
-
-The core provides an iterator to access these devices. 
-
-int bus_for_each_dev(struct bus_type * bus, struct device * start, 
-                     void * data, int (*fn)(struct device *, void *));
-
-
-- Driver list.
-
-struct bus_type also contains a list of all drivers registered with
-it. An internal list of drivers that the bus driver maintains may 
-be removed in favor of using the generic one. 
-
-The drivers may be iterated over, like devices: 
-
-int bus_for_each_drv(struct bus_type * bus, struct device_driver * start,
-                     void * data, int (*fn)(struct device_driver *, void *));
-
-
-Please see drivers/base/bus.c for more information.
-
-
-- rwsem 
-
-struct bus_type contains an rwsem that protects all core accesses to
-the device and driver lists. This can be used by the bus driver
-internally, and should be used when accessing the device or driver
-lists the bus maintains. 
-
-
-- Device and driver fields. 
-
-Some of the fields in struct device and struct device_driver duplicate
-fields in the bus-specific representations of these objects. Feel free
-to remove the bus-specific ones and favor the generic ones. Note
-though, that this will likely mean fixing up all the drivers that
-reference the bus-specific fields (though those should all be 1-line
-changes).
-
diff --git a/Documentation/eisa.txt b/Documentation/eisa.txt
index 2806e5544e43..f388545a85a7 100644
--- a/Documentation/eisa.txt
+++ b/Documentation/eisa.txt
@@ -103,7 +103,7 @@ id_table	an array of NULL terminated EISA id strings,
 		(driver_data).
 
 driver		a generic driver, such as described in
-		Documentation/driver-model/driver.txt. Only .name,
+		Documentation/driver-model/driver.rst. Only .name,
 		.probe and .remove members are mandatory.
 =============== ====================================================
 
@@ -152,7 +152,7 @@ state    set of flags indicating the state of the device. Current
 	 flags are EISA_CONFIG_ENABLED and EISA_CONFIG_FORCED.
 res	 set of four 256 bytes I/O regions allocated to this device
 dma_mask DMA mask set from the parent device.
-dev	 generic device (see Documentation/driver-model/device.txt)
+dev	 generic device (see Documentation/driver-model/device.rst)
 ======== ============================================================
 
 You can get the 'struct eisa_device' from 'struct device' using the
diff --git a/Documentation/hwmon/submitting-patches.rst b/Documentation/hwmon/submitting-patches.rst
index f9796b9d9db6..d5b05d3e54ba 100644
--- a/Documentation/hwmon/submitting-patches.rst
+++ b/Documentation/hwmon/submitting-patches.rst
@@ -89,7 +89,7 @@ increase the chances of your change being accepted.
   console. Excessive logging can seriously affect system performance.
 
 * Use devres functions whenever possible to allocate resources. For rationale
-  and supported functions, please see Documentation/driver-model/devres.txt.
+  and supported functions, please see Documentation/driver-model/devres.rst.
   If a function is not supported by devres, consider using devm_add_action().
 
 * If the driver has a detect function, make sure it is silent. Debug messages
-- 
cgit 


From b85d7502188689f2adb5600d777f96ce79c9ae24 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Wed, 12 Jun 2019 13:24:49 +0530
Subject: dt-bindings: arm: stm32: Convert STM32 SoC bindings to DT schema

This commit converts STM32 SoC bindings to DT schema using jsonschema.

Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com>
---
 .../devicetree/bindings/arm/stm32/stm32.txt        | 10 --------
 .../devicetree/bindings/arm/stm32/stm32.yaml       | 29 ++++++++++++++++++++++
 2 files changed, 29 insertions(+), 10 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/stm32/stm32.txt
 create mode 100644 Documentation/devicetree/bindings/arm/stm32/stm32.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32.txt b/Documentation/devicetree/bindings/arm/stm32/stm32.txt
deleted file mode 100644
index 6808ed9ddfd5..000000000000
--- a/Documentation/devicetree/bindings/arm/stm32/stm32.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-STMicroelectronics STM32 Platforms Device Tree Bindings
-
-Each device tree must specify which STM32 SoC it uses,
-using one of the following compatible strings:
-
-  st,stm32f429
-  st,stm32f469
-  st,stm32f746
-  st,stm32h743
-  st,stm32mp157
diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
new file mode 100644
index 000000000000..f53dc0f2d7b3
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/stm32/stm32.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 Platforms Device Tree Bindings
+
+maintainers:
+  - Alexandre Torgue <alexandre.torgue@st.com>
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - const: st,stm32f429
+
+      - items:
+          - const: st,stm32f469
+
+      - items:
+          - const: st,stm32f746
+
+      - items:
+          - const: st,stm32h743
+
+      - items:
+          - const: st,stm32mp157
+...
-- 
cgit 


From 861ca2d32be767a2109b27a39ac71838881a9010 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Wed, 12 Jun 2019 13:24:50 +0530
Subject: dt-bindings: arm: stm32: Document Avenger96 devicetree binding

This commit documents Avenger96 devicetree binding based on
STM32MP157 SoC.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com>
---
 Documentation/devicetree/bindings/arm/stm32/stm32.yaml | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
index f53dc0f2d7b3..4d194f1eb03a 100644
--- a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
+++ b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
@@ -25,5 +25,7 @@ properties:
           - const: st,stm32h743
 
       - items:
+          - enum:
+              - arrow,stm32mp157a-avenger96 # Avenger96
           - const: st,stm32mp157
 ...
-- 
cgit 


From 4056e79faa2dc8352634b594991b4b210d724e6f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 21 Jun 2019 06:54:25 -0300
Subject: ABI: sysfs-driver-mlxreg-io: fix the what fields

The author of this file should be given an award for creativity:
the What: fields on this file technically fulfills the description
at README. Yet, the way it is, it can't be parsed on a script,
and if someone would try to do something like:

	grep hwmon*/jtag_enable

It wouldn't find anything.

Fix the What fields in a way that it can be parseable by a
script and other search tools.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/stable/sysfs-driver-mlxreg-io | 45 +++++++++++--------------
 1 file changed, 19 insertions(+), 26 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/stable/sysfs-driver-mlxreg-io b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
index 156319fc5b80..3544968f43cc 100644
--- a/Documentation/ABI/stable/sysfs-driver-mlxreg-io
+++ b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
@@ -1,5 +1,4 @@
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
-							asic_health
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/asic_health
 
 Date:		June 2018
 KernelVersion:	4.19
@@ -9,9 +8,8 @@ Description:	This file shows ASIC health status. The possible values are:
 
 		The files are read only.
 
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
-							cpld1_version
-							cpld2_version
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld1_version
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld2_version
 Date:		June 2018
 KernelVersion:	4.19
 Contact:	Vadim Pasternak <vadimpmellanox.com>
@@ -20,8 +18,7 @@ Description:	These files show with which CPLD versions have been burned
 
 		The files are read only.
 
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
-							fan_dir
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/fan_dir
 
 Date:		December 2018
 KernelVersion:	5.0
@@ -32,8 +29,7 @@ Description:	This file shows the system fans direction:
 
 		The files are read only.
 
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
-							jtag_enable
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/jtag_enable
 
 Date:		November 2018
 KernelVersion:	5.0
@@ -43,8 +39,7 @@ Description:	These files show with which CPLD versions have been burned
 
 		The files are read only.
 
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
-							jtag_enable
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/jtag_enable
 
 Date:		November 2018
 KernelVersion:	5.0
@@ -87,16 +82,15 @@ Description:	These files allow asserting system power cycling, switching
 
 		The files are write only.
 
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
-							reset_aux_pwr_or_ref
-							reset_asic_thermal
-							reset_hotswap_or_halt
-							reset_hotswap_or_wd
-							reset_fw_reset
-							reset_long_pb
-							reset_main_pwr_fail
-							reset_short_pb
-							reset_sw_reset
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_aux_pwr_or_ref
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_asic_thermal
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_hotswap_or_halt
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_hotswap_or_wd
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_fw_reset
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_long_pb
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_main_pwr_fail
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_short_pb
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_sw_reset
 Date:		June 2018
 KernelVersion:	4.19
 Contact:	Vadim Pasternak <vadimpmellanox.com>
@@ -110,11 +104,10 @@ Description:	These files show the system reset cause, as following: power
 
 		The files are read only.
 
-What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
-						reset_comex_pwr_fail
-						reset_from_comex
-						reset_system
-						reset_voltmon_upgrade_fail
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_comex_pwr_fail
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_from_comex
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_system
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_voltmon_upgrade_fail
 
 Date:		November 2018
 KernelVersion:	5.0
-- 
cgit 


From 13d93380fca826e49c42e8d83fc8a077cabbe6b8 Mon Sep 17 00:00:00 2001
From: Tomasz Figa <tfiga@chromium.org>
Date: Wed, 12 Jun 2019 05:36:48 -0400
Subject: media: Clarify the meaning of file descriptors in VIDIOC_DQBUF

When the application calls VIDIOC_DQBUF with the DMABUF memory type, the
v4l2_buffer structure (or v4l2_plane structures) are filled with DMA-buf
file descriptors. However, the current documentation does not explain
whether those are new file descriptors referring to the same DMA-bufs or
just the same integers as passed to VIDIOC_QBUF back in time. Clarify
the documentation that it's the latter.

Signed-off-by: Tomasz Figa <tfiga@chromium.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/vidioc-qbuf.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/v4l/vidioc-qbuf.rst b/Documentation/media/uapi/v4l/vidioc-qbuf.rst
index dbf7b445a27b..407302d80684 100644
--- a/Documentation/media/uapi/v4l/vidioc-qbuf.rst
+++ b/Documentation/media/uapi/v4l/vidioc-qbuf.rst
@@ -139,6 +139,14 @@ may continue as normal, but should be aware that data in the dequeued
 buffer might be corrupted. When using the multi-planar API, the planes
 array must be passed in as well.
 
+If the application sets the ``memory`` field to ``V4L2_MEMORY_DMABUF`` to
+dequeue a :ref:`DMABUF <dmabuf>` buffer, the driver fills the ``m.fd`` field
+with a file descriptor numerically the same as the one given to ``VIDIOC_QBUF``
+when the buffer was enqueued. No new file descriptor is created at dequeue time
+and the value is only for the application convenience. When the multi-planar
+API is used the ``m.fd`` fields of the passed array of struct
+:c:type:`v4l2_plane` are filled instead.
+
 By default ``VIDIOC_DQBUF`` blocks when no buffer is in the outgoing
 queue. When the ``O_NONBLOCK`` flag was given to the
 :ref:`open() <func-open>` function, ``VIDIOC_DQBUF`` returns
-- 
cgit 


From 7239682847b5a7d2633f39ddea1c0c11a69d5fd3 Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Thu, 13 Jun 2019 10:18:19 -0400
Subject: media: docs: v4l2-controls: remove repeated ioctl names

Mentioning :ref:`VIDIOC_QUERYCTRL` renders all the three related ioctls.
Explicitly adding VIDIOC_QUERY_EXT_CTRL and VIDIOC_QUERYMENU will make
them render twice, so remove them

Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/extended-controls.rst | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/v4l/extended-controls.rst b/Documentation/media/uapi/v4l/extended-controls.rst
index 24274b398e63..1323afcf2b7d 100644
--- a/Documentation/media/uapi/v4l/extended-controls.rst
+++ b/Documentation/media/uapi/v4l/extended-controls.rst
@@ -96,9 +96,7 @@ if this control consists of more than one element.
    #. It is important to realize that due to the flexibility of controls it is
       necessary to check whether the control you want to set actually is
       supported in the driver and what the valid range of values is. So use
-      the :ref:`VIDIOC_QUERYCTRL` (or :ref:`VIDIOC_QUERY_EXT_CTRL
-      <VIDIOC_QUERYCTRL>`) and :ref:`VIDIOC_QUERYMENU <VIDIOC_QUERYCTRL>`
-      ioctls to check this.
+      :ref:`VIDIOC_QUERYCTRL` to check this.
 
    #. It is possible that some of the menu indices in a control of
       type ``V4L2_CTRL_TYPE_MENU`` may not be supported (``VIDIOC_QUERYMENU``
-- 
cgit 


From e050f55207c547f78cd65758f148dd1b4404f133 Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Thu, 13 Jun 2019 10:18:20 -0400
Subject: media: docs: v4l2-controls: fix indentation

Fix indentation in example C code.

Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/extended-controls.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/v4l/extended-controls.rst b/Documentation/media/uapi/v4l/extended-controls.rst
index 1323afcf2b7d..0e9787072a41 100644
--- a/Documentation/media/uapi/v4l/extended-controls.rst
+++ b/Documentation/media/uapi/v4l/extended-controls.rst
@@ -142,7 +142,7 @@ control class is found:
     while (0 == ioctl(fd, VIDIOC_QUERYCTRL, &qctrl)) {
 	if (V4L2_CTRL_ID2CLASS(qctrl.id) != V4L2_CTRL_CLASS_MPEG)
 	    break;
-	    /* ... */
+	/* ... */
 	qctrl.id |= V4L2_CTRL_FLAG_NEXT_CTRL;
     }
 
-- 
cgit 


From 9e75efb0f2c56c7285f28bba877da14f428a730c Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Thu, 13 Jun 2019 10:18:21 -0400
Subject: media: docs: v4l2-controls: add links to structs

This section lacks links to struct definitions. Add one where each struct
is introduced.

Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/kapi/v4l2-controls.rst | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/media/kapi/v4l2-controls.rst b/Documentation/media/kapi/v4l2-controls.rst
index 64ab99abf0b6..0c3f486727ed 100644
--- a/Documentation/media/kapi/v4l2-controls.rst
+++ b/Documentation/media/kapi/v4l2-controls.rst
@@ -26,8 +26,9 @@ The control framework was created in order to implement all the rules of the
 V4L2 specification with respect to controls in a central place. And to make
 life as easy as possible for the driver developer.
 
-Note that the control framework relies on the presence of a struct v4l2_device
-for V4L2 drivers and struct v4l2_subdev for sub-device drivers.
+Note that the control framework relies on the presence of a struct
+:c:type:`v4l2_device` for V4L2 drivers and struct :c:type:`v4l2_subdev` for
+sub-device drivers.
 
 
 Objects in the framework
@@ -35,12 +36,13 @@ Objects in the framework
 
 There are two main objects:
 
-The v4l2_ctrl object describes the control properties and keeps track of the
-control's value (both the current value and the proposed new value).
+The :c:type:`v4l2_ctrl` object describes the control properties and keeps
+track of the control's value (both the current value and the proposed new
+value).
 
-v4l2_ctrl_handler is the object that keeps track of controls. It maintains a
-list of v4l2_ctrl objects that it owns and another list of references to
-controls, possibly to controls owned by other handlers.
+:c:type:`v4l2_ctrl_handler` is the object that keeps track of controls. It
+maintains a list of v4l2_ctrl objects that it owns and another list of
+references to controls, possibly to controls owned by other handlers.
 
 
 Basic usage for V4L2 and sub-device drivers
@@ -242,7 +244,7 @@ initializes the hardware to the default control values. It is recommended
 that you do this as this ensures that both the internal data structures and
 the hardware are in sync.
 
-4) Finally: implement the v4l2_ctrl_ops
+4) Finally: implement the :c:type:`v4l2_ctrl_ops`
 
 .. code-block:: none
 
-- 
cgit 


From e36160b84253d7125d0fa4109a3d69f0c55fc5d0 Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Thu, 13 Jun 2019 10:18:22 -0400
Subject: media: docs: v4l2-controls: rearrange control initialization sequence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The code snippet showing how to add controls to the driver’s top-level
struct is present twice, but only the second time it is split in the V4L2
and subdev cases. Consolidate everything at the beginning.

Also remove the "Where foo->bar is of type struct baz" sentences, this
obvious from the code.

Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/kapi/v4l2-controls.rst | 40 +++++++++++++-----------------
 1 file changed, 17 insertions(+), 23 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/media/kapi/v4l2-controls.rst b/Documentation/media/kapi/v4l2-controls.rst
index 0c3f486727ed..5281c9b1fd66 100644
--- a/Documentation/media/kapi/v4l2-controls.rst
+++ b/Documentation/media/kapi/v4l2-controls.rst
@@ -52,15 +52,29 @@ Basic usage for V4L2 and sub-device drivers
 
 1.1) Add the handler to your driver's top-level struct:
 
+For V4L2 drivers:
+
 .. code-block:: none
 
 	struct foo_dev {
+		...
+		struct v4l2_device v4l2_dev;
 		...
 		struct v4l2_ctrl_handler ctrl_handler;
 		...
 	};
 
-	struct foo_dev *foo;
+For sub-device drivers:
+
+.. code-block:: none
+
+	struct foo_dev {
+		...
+		struct v4l2_subdev sd;
+		...
+		struct v4l2_ctrl_handler ctrl_handler;
+		...
+	};
 
 1.2) Initialize the handler:
 
@@ -74,43 +88,23 @@ information. It is a hint only.
 
 1.3) Hook the control handler into the driver:
 
-1.3.1) For V4L2 drivers do this:
+For V4L2 drivers:
 
 .. code-block:: none
 
-	struct foo_dev {
-		...
-		struct v4l2_device v4l2_dev;
-		...
-		struct v4l2_ctrl_handler ctrl_handler;
-		...
-	};
-
 	foo->v4l2_dev.ctrl_handler = &foo->ctrl_handler;
 
-Where foo->v4l2_dev is of type struct v4l2_device.
-
 Finally, remove all control functions from your v4l2_ioctl_ops (if any):
 vidioc_queryctrl, vidioc_query_ext_ctrl, vidioc_querymenu, vidioc_g_ctrl,
 vidioc_s_ctrl, vidioc_g_ext_ctrls, vidioc_try_ext_ctrls and vidioc_s_ext_ctrls.
 Those are now no longer needed.
 
-1.3.2) For sub-device drivers do this:
+For sub-device drivers:
 
 .. code-block:: none
 
-	struct foo_dev {
-		...
-		struct v4l2_subdev sd;
-		...
-		struct v4l2_ctrl_handler ctrl_handler;
-		...
-	};
-
 	foo->sd.ctrl_handler = &foo->ctrl_handler;
 
-Where foo->sd is of type struct v4l2_subdev.
-
 1.4) Clean up the handler at the end:
 
 .. code-block:: none
-- 
cgit 


From 6b623dbfbd4f940de45b66ec07582aed0462f6da Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Thu, 13 Jun 2019 10:18:23 -0400
Subject: media: docs: v4l2-controls: add links to functions

This section lacks links to functions. Add one to simplify reading.

Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/kapi/v4l2-controls.rst | 67 ++++++++++++++++--------------
 1 file changed, 35 insertions(+), 32 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/media/kapi/v4l2-controls.rst b/Documentation/media/kapi/v4l2-controls.rst
index 5281c9b1fd66..41c0fd4009e9 100644
--- a/Documentation/media/kapi/v4l2-controls.rst
+++ b/Documentation/media/kapi/v4l2-controls.rst
@@ -114,7 +114,7 @@ For sub-device drivers:
 
 2) Add controls:
 
-You add non-menu controls by calling v4l2_ctrl_new_std:
+You add non-menu controls by calling :c:func:`v4l2_ctrl_new_std`:
 
 .. code-block:: none
 
@@ -122,7 +122,8 @@ You add non-menu controls by calling v4l2_ctrl_new_std:
 			const struct v4l2_ctrl_ops *ops,
 			u32 id, s32 min, s32 max, u32 step, s32 def);
 
-Menu and integer menu controls are added by calling v4l2_ctrl_new_std_menu:
+Menu and integer menu controls are added by calling
+:c:func:`v4l2_ctrl_new_std_menu`:
 
 .. code-block:: none
 
@@ -131,7 +132,7 @@ Menu and integer menu controls are added by calling v4l2_ctrl_new_std_menu:
 			u32 id, s32 max, s32 skip_mask, s32 def);
 
 Menu controls with a driver specific menu are added by calling
-v4l2_ctrl_new_std_menu_items:
+:c:func:`v4l2_ctrl_new_std_menu_items`:
 
 .. code-block:: none
 
@@ -141,7 +142,7 @@ v4l2_ctrl_new_std_menu_items:
                        s32 skip_mask, s32 def, const char * const *qmenu);
 
 Integer menu controls with a driver specific menu can be added by calling
-v4l2_ctrl_new_int_menu:
+:c:func:`v4l2_ctrl_new_int_menu`:
 
 .. code-block:: none
 
@@ -149,7 +150,8 @@ v4l2_ctrl_new_int_menu:
 			const struct v4l2_ctrl_ops *ops,
 			u32 id, s32 max, s32 def, const s64 *qmenu_int);
 
-These functions are typically called right after the v4l2_ctrl_handler_init:
+These functions are typically called right after the
+:c:func:`v4l2_ctrl_handler_init`:
 
 .. code-block:: none
 
@@ -188,33 +190,34 @@ These functions are typically called right after the v4l2_ctrl_handler_init:
 		return err;
 	}
 
-The v4l2_ctrl_new_std function returns the v4l2_ctrl pointer to the new
-control, but if you do not need to access the pointer outside the control ops,
-then there is no need to store it.
-
-The v4l2_ctrl_new_std function will fill in most fields based on the control
-ID except for the min, max, step and default values. These are passed in the
-last four arguments. These values are driver specific while control attributes
-like type, name, flags are all global. The control's current value will be set
-to the default value.
-
-The v4l2_ctrl_new_std_menu function is very similar but it is used for menu
-controls. There is no min argument since that is always 0 for menu controls,
-and instead of a step there is a skip_mask argument: if bit X is 1, then menu
-item X is skipped.
-
-The v4l2_ctrl_new_int_menu function creates a new standard integer menu
-control with driver-specific items in the menu. It differs from
-v4l2_ctrl_new_std_menu in that it doesn't have the mask argument and takes
-as the last argument an array of signed 64-bit integers that form an exact
-menu item list.
-
-The v4l2_ctrl_new_std_menu_items function is very similar to
-v4l2_ctrl_new_std_menu but takes an extra parameter qmenu, which is the driver
-specific menu for an otherwise standard menu control. A good example for this
-control is the test pattern control for capture/display/sensors devices that
-have the capability to generate test patterns. These test patterns are hardware
-specific, so the contents of the menu will vary from device to device.
+The :c:func:`v4l2_ctrl_new_std` function returns the v4l2_ctrl pointer to
+the new control, but if you do not need to access the pointer outside the
+control ops, then there is no need to store it.
+
+The :c:func:`v4l2_ctrl_new_std` function will fill in most fields based on
+the control ID except for the min, max, step and default values. These are
+passed in the last four arguments. These values are driver specific while
+control attributes like type, name, flags are all global. The control's
+current value will be set to the default value.
+
+The :c:func:`v4l2_ctrl_new_std_menu` function is very similar but it is
+used for menu controls. There is no min argument since that is always 0 for
+menu controls, and instead of a step there is a skip_mask argument: if bit
+X is 1, then menu item X is skipped.
+
+The :c:func:`v4l2_ctrl_new_int_menu` function creates a new standard
+integer menu control with driver-specific items in the menu. It differs
+from v4l2_ctrl_new_std_menu in that it doesn't have the mask argument and
+takes as the last argument an array of signed 64-bit integers that form an
+exact menu item list.
+
+The :c:func:`v4l2_ctrl_new_std_menu_items` function is very similar to
+v4l2_ctrl_new_std_menu but takes an extra parameter qmenu, which is the
+driver specific menu for an otherwise standard menu control. A good example
+for this control is the test pattern control for capture/display/sensors
+devices that have the capability to generate test patterns. These test
+patterns are hardware specific, so the contents of the menu will vary from
+device to device.
 
 Note that if something fails, the function will return NULL or an error and
 set ctrl_handler->error to the error code. If ctrl_handler->error was already
-- 
cgit 


From 270c0024a368806a84a7e15d6024cbd232752c04 Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Thu, 13 Jun 2019 10:18:24 -0400
Subject: media: docs: v4l2-controls: convert code blocks to C

All these code blocks contain C code, enable C formatting for a nicer
reading.

Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/kapi/v4l2-controls.rst | 74 +++++++++++++++---------------
 1 file changed, 37 insertions(+), 37 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/media/kapi/v4l2-controls.rst b/Documentation/media/kapi/v4l2-controls.rst
index 41c0fd4009e9..45541e05a0e7 100644
--- a/Documentation/media/kapi/v4l2-controls.rst
+++ b/Documentation/media/kapi/v4l2-controls.rst
@@ -54,7 +54,7 @@ Basic usage for V4L2 and sub-device drivers
 
 For V4L2 drivers:
 
-.. code-block:: none
+.. code-block:: c
 
 	struct foo_dev {
 		...
@@ -66,7 +66,7 @@ For V4L2 drivers:
 
 For sub-device drivers:
 
-.. code-block:: none
+.. code-block:: c
 
 	struct foo_dev {
 		...
@@ -78,7 +78,7 @@ For sub-device drivers:
 
 1.2) Initialize the handler:
 
-.. code-block:: none
+.. code-block:: c
 
 	v4l2_ctrl_handler_init(&foo->ctrl_handler, nr_of_controls);
 
@@ -90,7 +90,7 @@ information. It is a hint only.
 
 For V4L2 drivers:
 
-.. code-block:: none
+.. code-block:: c
 
 	foo->v4l2_dev.ctrl_handler = &foo->ctrl_handler;
 
@@ -101,13 +101,13 @@ Those are now no longer needed.
 
 For sub-device drivers:
 
-.. code-block:: none
+.. code-block:: c
 
 	foo->sd.ctrl_handler = &foo->ctrl_handler;
 
 1.4) Clean up the handler at the end:
 
-.. code-block:: none
+.. code-block:: c
 
 	v4l2_ctrl_handler_free(&foo->ctrl_handler);
 
@@ -116,7 +116,7 @@ For sub-device drivers:
 
 You add non-menu controls by calling :c:func:`v4l2_ctrl_new_std`:
 
-.. code-block:: none
+.. code-block:: c
 
 	struct v4l2_ctrl *v4l2_ctrl_new_std(struct v4l2_ctrl_handler *hdl,
 			const struct v4l2_ctrl_ops *ops,
@@ -125,7 +125,7 @@ You add non-menu controls by calling :c:func:`v4l2_ctrl_new_std`:
 Menu and integer menu controls are added by calling
 :c:func:`v4l2_ctrl_new_std_menu`:
 
-.. code-block:: none
+.. code-block:: c
 
 	struct v4l2_ctrl *v4l2_ctrl_new_std_menu(struct v4l2_ctrl_handler *hdl,
 			const struct v4l2_ctrl_ops *ops,
@@ -134,7 +134,7 @@ Menu and integer menu controls are added by calling
 Menu controls with a driver specific menu are added by calling
 :c:func:`v4l2_ctrl_new_std_menu_items`:
 
-.. code-block:: none
+.. code-block:: c
 
        struct v4l2_ctrl *v4l2_ctrl_new_std_menu_items(
                        struct v4l2_ctrl_handler *hdl,
@@ -144,7 +144,7 @@ Menu controls with a driver specific menu are added by calling
 Integer menu controls with a driver specific menu can be added by calling
 :c:func:`v4l2_ctrl_new_int_menu`:
 
-.. code-block:: none
+.. code-block:: c
 
 	struct v4l2_ctrl *v4l2_ctrl_new_int_menu(struct v4l2_ctrl_handler *hdl,
 			const struct v4l2_ctrl_ops *ops,
@@ -153,7 +153,7 @@ Integer menu controls with a driver specific menu can be added by calling
 These functions are typically called right after the
 :c:func:`v4l2_ctrl_handler_init`:
 
-.. code-block:: none
+.. code-block:: c
 
 	static const s64 exp_bias_qmenu[] = {
 	       -2, -1, 0, 1, 2
@@ -232,7 +232,7 @@ a bit faster that way.
 
 3) Optionally force initial control setup:
 
-.. code-block:: none
+.. code-block:: c
 
 	v4l2_ctrl_handler_setup(&foo->ctrl_handler);
 
@@ -243,7 +243,7 @@ the hardware are in sync.
 
 4) Finally: implement the :c:type:`v4l2_ctrl_ops`
 
-.. code-block:: none
+.. code-block:: c
 
 	static const struct v4l2_ctrl_ops foo_ctrl_ops = {
 		.s_ctrl = foo_s_ctrl,
@@ -251,7 +251,7 @@ the hardware are in sync.
 
 Usually all you need is s_ctrl:
 
-.. code-block:: none
+.. code-block:: c
 
 	static int foo_s_ctrl(struct v4l2_ctrl *ctrl)
 	{
@@ -304,7 +304,7 @@ Accessing Control Values
 The following union is used inside the control framework to access control
 values:
 
-.. code-block:: none
+.. code-block:: c
 
 	union v4l2_ctrl_ptr {
 		s32 *p_s32;
@@ -316,7 +316,7 @@ values:
 The v4l2_ctrl struct contains these fields that can be used to access both
 current and new values:
 
-.. code-block:: none
+.. code-block:: c
 
 	s32 val;
 	struct {
@@ -329,7 +329,7 @@ current and new values:
 
 If the control has a simple s32 type type, then:
 
-.. code-block:: none
+.. code-block:: c
 
 	&ctrl->val == ctrl->p_new.p_s32
 	&ctrl->cur.val == ctrl->p_cur.p_s32
@@ -353,7 +353,7 @@ exception is for controls that return a volatile register such as a signal
 strength read-out that changes continuously. In that case you will need to
 implement g_volatile_ctrl like this:
 
-.. code-block:: none
+.. code-block:: c
 
 	static int foo_g_volatile_ctrl(struct v4l2_ctrl *ctrl)
 	{
@@ -371,7 +371,7 @@ changes.
 
 To mark a control as volatile you have to set V4L2_CTRL_FLAG_VOLATILE:
 
-.. code-block:: none
+.. code-block:: c
 
 	ctrl = v4l2_ctrl_new_std(&sd->ctrl_handler, ...);
 	if (ctrl)
@@ -392,7 +392,7 @@ not to introduce deadlocks.
 Outside of the control ops you have to go through to helper functions to get
 or set a single control value safely in your driver:
 
-.. code-block:: none
+.. code-block:: c
 
 	s32 v4l2_ctrl_g_ctrl(struct v4l2_ctrl *ctrl);
 	int v4l2_ctrl_s_ctrl(struct v4l2_ctrl *ctrl, s32 val);
@@ -403,7 +403,7 @@ will result in a deadlock since these helpers lock the handler as well.
 
 You can also take the handler lock yourself:
 
-.. code-block:: none
+.. code-block:: c
 
 	mutex_lock(&state->ctrl_handler.lock);
 	pr_info("String value is '%s'\n", ctrl1->p_cur.p_char);
@@ -416,7 +416,7 @@ Menu Controls
 
 The v4l2_ctrl struct contains this union:
 
-.. code-block:: none
+.. code-block:: c
 
 	union {
 		u32 step;
@@ -444,7 +444,7 @@ Custom Controls
 
 Driver specific controls can be created using v4l2_ctrl_new_custom():
 
-.. code-block:: none
+.. code-block:: c
 
 	static const struct v4l2_ctrl_config ctrl_filter = {
 		.ops = &ctrl_custom_ops,
@@ -498,7 +498,7 @@ By default all controls are independent from the others. But in more
 complex scenarios you can get dependencies from one control to another.
 In that case you need to 'cluster' them:
 
-.. code-block:: none
+.. code-block:: c
 
 	struct foo {
 		struct v4l2_ctrl_handler ctrl_handler;
@@ -522,7 +522,7 @@ composite control. Similar to how a 'struct' works in C.
 So when s_ctrl is called with V4L2_CID_AUDIO_VOLUME as argument, you should set
 all two controls belonging to the audio_cluster:
 
-.. code-block:: none
+.. code-block:: c
 
 	static int foo_s_ctrl(struct v4l2_ctrl *ctrl)
 	{
@@ -544,7 +544,7 @@ all two controls belonging to the audio_cluster:
 
 In the example above the following are equivalent for the VOLUME case:
 
-.. code-block:: none
+.. code-block:: c
 
 	ctrl == ctrl->cluster[AUDIO_CL_VOLUME] == state->audio_cluster[AUDIO_CL_VOLUME]
 	ctrl->cluster[AUDIO_CL_MUTE] == state->audio_cluster[AUDIO_CL_MUTE]
@@ -552,7 +552,7 @@ In the example above the following are equivalent for the VOLUME case:
 In practice using cluster arrays like this becomes very tiresome. So instead
 the following equivalent method is used:
 
-.. code-block:: none
+.. code-block:: c
 
 	struct {
 		/* audio cluster */
@@ -564,7 +564,7 @@ The anonymous struct is used to clearly 'cluster' these two control pointers,
 but it serves no other purpose. The effect is the same as creating an
 array with two control pointers. So you can just do:
 
-.. code-block:: none
+.. code-block:: c
 
 	state->volume = v4l2_ctrl_new_std(&state->ctrl_handler, ...);
 	state->mute = v4l2_ctrl_new_std(&state->ctrl_handler, ...);
@@ -620,7 +620,7 @@ changing that control affects the control flags of the manual controls.
 In order to simplify this a special variation of v4l2_ctrl_cluster was
 introduced:
 
-.. code-block:: none
+.. code-block:: c
 
 	void v4l2_ctrl_auto_cluster(unsigned ncontrols, struct v4l2_ctrl **controls,
 				    u8 manual_val, bool set_volatile);
@@ -675,7 +675,7 @@ of another handler (e.g. for a video device node), then you should first add
 the controls to the first handler, add the other controls to the second
 handler and finally add the first handler to the second. For example:
 
-.. code-block:: none
+.. code-block:: c
 
 	v4l2_ctrl_new_std(&radio_ctrl_handler, &radio_ops, V4L2_CID_AUDIO_VOLUME, ...);
 	v4l2_ctrl_new_std(&radio_ctrl_handler, &radio_ops, V4L2_CID_AUDIO_MUTE, ...);
@@ -689,7 +689,7 @@ all controls.
 
 Or you can add specific controls to a handler:
 
-.. code-block:: none
+.. code-block:: c
 
 	volume = v4l2_ctrl_new_std(&video_ctrl_handler, &ops, V4L2_CID_AUDIO_VOLUME, ...);
 	v4l2_ctrl_new_std(&video_ctrl_handler, &ops, V4L2_CID_BRIGHTNESS, ...);
@@ -698,7 +698,7 @@ Or you can add specific controls to a handler:
 What you should not do is make two identical controls for two handlers.
 For example:
 
-.. code-block:: none
+.. code-block:: c
 
 	v4l2_ctrl_new_std(&radio_ctrl_handler, &radio_ops, V4L2_CID_AUDIO_MUTE, ...);
 	v4l2_ctrl_new_std(&video_ctrl_handler, &video_ops, V4L2_CID_AUDIO_MUTE, ...);
@@ -719,7 +719,7 @@ not own. For example, if you have to find a volume control from a subdev.
 
 You can do that by calling v4l2_ctrl_find:
 
-.. code-block:: none
+.. code-block:: c
 
 	struct v4l2_ctrl *volume;
 
@@ -728,7 +728,7 @@ You can do that by calling v4l2_ctrl_find:
 Since v4l2_ctrl_find will lock the handler you have to be careful where you
 use it. For example, this is not a good idea:
 
-.. code-block:: none
+.. code-block:: c
 
 	struct v4l2_ctrl_handler ctrl_handler;
 
@@ -737,7 +737,7 @@ use it. For example, this is not a good idea:
 
 ...and in video_ops.s_ctrl:
 
-.. code-block:: none
+.. code-block:: c
 
 	case V4L2_CID_BRIGHTNESS:
 		contrast = v4l2_find_ctrl(&ctrl_handler, V4L2_CID_CONTRAST);
@@ -759,7 +759,7 @@ not when it is used in consumer-level hardware. In that case you want to keep
 those low-level controls local to the subdev. You can do this by simply
 setting the 'is_private' flag of the control to 1:
 
-.. code-block:: none
+.. code-block:: c
 
 	static const struct v4l2_ctrl_config ctrl_private = {
 		.ops = &ctrl_custom_ops,
@@ -796,7 +796,7 @@ Sometimes the platform or bridge driver needs to be notified when a control
 from a sub-device driver changes. You can set a notify callback by calling
 this function:
 
-.. code-block:: none
+.. code-block:: c
 
 	void v4l2_ctrl_notify(struct v4l2_ctrl *ctrl,
 		void (*notify)(struct v4l2_ctrl *ctrl, void *priv), void *priv);
-- 
cgit 


From 025b941132352c6093cbb3a1f6760426e739dc79 Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Thu, 13 Jun 2019 10:18:25 -0400
Subject: media: docs: v4l2-controls: document file to include

The tutorial in this section is almost complete, add the one missing bit.

Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/kapi/v4l2-controls.rst | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/media/kapi/v4l2-controls.rst b/Documentation/media/kapi/v4l2-controls.rst
index 45541e05a0e7..407617b1d0ce 100644
--- a/Documentation/media/kapi/v4l2-controls.rst
+++ b/Documentation/media/kapi/v4l2-controls.rst
@@ -50,6 +50,10 @@ Basic usage for V4L2 and sub-device drivers
 
 1) Prepare the driver:
 
+.. code-block:: c
+
+	#include <media/v4l2-ctrls.h>
+
 1.1) Add the handler to your driver's top-level struct:
 
 For V4L2 drivers:
-- 
cgit 


From 40208924b988888f786850fae42fb5ca880fab80 Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Thu, 13 Jun 2019 10:18:26 -0400
Subject: media: docs: v4l2-controls: remove outdated paragraph
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This paragraph was added by commit a42b57f5aacf ("V4L/DVB: Documentation:
add v4l2-controls.txt documenting the new controls API") back in 2010, when
the controls API has been improved. Nowadays it is a bit anachronistic, so
remove it.

The same information is stated in up-to-date wording a few paragraphs later

> You’re done! And this is sufficient for most of the drivers we have. No
> need to do any validation of control values, or implement QUERYCTRL,
> QUERY_EXT_CTRL and QUERYMENU. And G/S_CTRL as well as G/TRY/S_EXT_CTRLS
> are automatically supported.

Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/kapi/v4l2-controls.rst | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/media/kapi/v4l2-controls.rst b/Documentation/media/kapi/v4l2-controls.rst
index 407617b1d0ce..ebe2a55908be 100644
--- a/Documentation/media/kapi/v4l2-controls.rst
+++ b/Documentation/media/kapi/v4l2-controls.rst
@@ -98,11 +98,6 @@ For V4L2 drivers:
 
 	foo->v4l2_dev.ctrl_handler = &foo->ctrl_handler;
 
-Finally, remove all control functions from your v4l2_ioctl_ops (if any):
-vidioc_queryctrl, vidioc_query_ext_ctrl, vidioc_querymenu, vidioc_g_ctrl,
-vidioc_s_ctrl, vidioc_g_ext_ctrls, vidioc_try_ext_ctrls and vidioc_s_ext_ctrls.
-Those are now no longer needed.
-
 For sub-device drivers:
 
 .. code-block:: c
-- 
cgit 


From 8c937ab966fdf639e517594b48f06b1f07309072 Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Fri, 14 Jun 2019 11:14:29 -0400
Subject: media: docs: v4l2-controls: fix sentence rendered in a nonsense way

This sentence renders as:

> Since such compound controls need to expose more information about
> themselves than is possible with ioctls VIDIOC_QUERYCTRL,
> VIDIOC_QUERY_EXT_CTRL and VIDIOC_QUERYMENU the VIDIOC_QUERY_EXT_CTRL
  ^^^^^^^^^^^^^^^^^^^^^                          ^^^^^^^^^^^^^^^^^^^^^
> ioctl was added.

This does not make sense. Fix by providing an explicit link text. This
results in:

> Since such compound controls need to expose more information about
> themselves than is possible with VIDIOC_QUERYCTRL the
> VIDIOC_QUERY_EXT_CTRL ioctl was added.

Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/extended-controls.rst | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/v4l/extended-controls.rst b/Documentation/media/uapi/v4l/extended-controls.rst
index 0e9787072a41..655362483730 100644
--- a/Documentation/media/uapi/v4l/extended-controls.rst
+++ b/Documentation/media/uapi/v4l/extended-controls.rst
@@ -85,11 +85,10 @@ be able to see such compound controls. In other words, these controls
 with compound types should only be used programmatically.
 
 Since such compound controls need to expose more information about
-themselves than is possible with
-:ref:`VIDIOC_QUERYCTRL` the
-:ref:`VIDIOC_QUERY_EXT_CTRL <VIDIOC_QUERYCTRL>` ioctl was added. In
-particular, this ioctl gives the dimensions of the N-dimensional array
-if this control consists of more than one element.
+themselves than is possible with :ref:`VIDIOC_QUERYCTRL <VIDIOC_QUERYCTRL>`
+the :ref:`VIDIOC_QUERY_EXT_CTRL <VIDIOC_QUERYCTRL>` ioctl was added. In
+particular, this ioctl gives the dimensions of the N-dimensional array if
+this control consists of more than one element.
 
 .. note::
 
-- 
cgit 


From 03f4175bd01f94b99688b087c82706a11efbde5c Mon Sep 17 00:00:00 2001
From: André Almeida <andrealmeid@collabora.com>
Date: Sat, 15 Jun 2019 22:09:59 -0400
Subject: media: docs: create vimc documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Create vimc documentation file to explain its basic features, its
topology, how to configure it and to document vimc's subdevices.

Signed-off-by: André Almeida <andrealmeid@collabora.com>
Suggested-by: Helen Koike <helen.koike@collabora.com>
[hverkuil-cisco@xs4all.nl: Fix typo: The ``v4l2-utils`` -> The ``v4l-utils`` package]
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/v4l-drivers/index.rst |  1 +
 Documentation/media/v4l-drivers/vimc.dot  | 22 +++++++
 Documentation/media/v4l-drivers/vimc.rst  | 98 +++++++++++++++++++++++++++++++
 3 files changed, 121 insertions(+)
 create mode 100644 Documentation/media/v4l-drivers/vimc.dot
 create mode 100644 Documentation/media/v4l-drivers/vimc.rst

(limited to 'Documentation')

diff --git a/Documentation/media/v4l-drivers/index.rst b/Documentation/media/v4l-drivers/index.rst
index 33a055907258..c4c78a28654c 100644
--- a/Documentation/media/v4l-drivers/index.rst
+++ b/Documentation/media/v4l-drivers/index.rst
@@ -64,5 +64,6 @@ For more details see the file COPYING in the source distribution of Linux.
 	si476x
 	soc-camera
 	uvcvideo
+	vimc
 	vivid
 	zr364xx
diff --git a/Documentation/media/v4l-drivers/vimc.dot b/Documentation/media/v4l-drivers/vimc.dot
new file mode 100644
index 000000000000..57863a13fa39
--- /dev/null
+++ b/Documentation/media/v4l-drivers/vimc.dot
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+
+digraph board {
+	rankdir=TB
+	n00000001 [label="{{} | Sensor A\n/dev/v4l-subdev0 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+	n00000001:port0 -> n00000005:port0 [style=bold]
+	n00000001:port0 -> n0000000b [style=bold]
+	n00000003 [label="{{} | Sensor B\n/dev/v4l-subdev1 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+	n00000003:port0 -> n00000008:port0 [style=bold]
+	n00000003:port0 -> n0000000f [style=bold]
+	n00000005 [label="{{<port0> 0} | Debayer A\n/dev/v4l-subdev2 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+	n00000005:port1 -> n00000017:port0
+	n00000008 [label="{{<port0> 0} | Debayer B\n/dev/v4l-subdev3 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+	n00000008:port1 -> n00000017:port0 [style=dashed]
+	n0000000b [label="Raw Capture 0\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+	n0000000f [label="Raw Capture 1\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
+	n00000013 [label="RGB/YUV Input\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
+	n00000013 -> n00000017:port0 [style=dashed]
+	n00000017 [label="{{<port0> 0} | Scaler\n/dev/v4l-subdev4 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+	n00000017:port1 -> n0000001a [style=bold]
+	n0000001a [label="RGB/YUV Capture\n/dev/video3", shape=box, style=filled, fillcolor=yellow]
+}
diff --git a/Documentation/media/v4l-drivers/vimc.rst b/Documentation/media/v4l-drivers/vimc.rst
new file mode 100644
index 000000000000..4628b12d417f
--- /dev/null
+++ b/Documentation/media/v4l-drivers/vimc.rst
@@ -0,0 +1,98 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+The Virtual Media Controller Driver (vimc)
+==========================================
+
+The vimc driver emulates complex video hardware using the V4L2 API and the Media
+API. It has a capture device and three subdevices: sensor, debayer and scaler.
+
+Topology
+--------
+
+The topology is hardcoded, although you could modify it in vimc-core and
+recompile the driver to achieve your own topology. This is the default topology:
+
+.. _vimc_topology_graph:
+
+.. kernel-figure:: vimc.dot
+    :alt:   vimc.dot
+    :align: center
+
+    Media pipeline graph on vimc
+
+Configuring the topology
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Each subdevice will come with its default configuration (pixelformat, height,
+width, ...). One needs to configure the topology in order to match the
+configuration on each linked subdevice to stream frames through the pipeline.
+If the configuration doesn't match, the stream will fail. The ``v4l-utils``
+package is a bundle of user-space applications, that comes with ``media-ctl`` and
+``v4l2-ctl`` that can be used to configure the vimc configuration. This sequence
+of commands fits for the default topology:
+
+.. code-block:: bash
+
+        media-ctl -d platform:vimc -V '"Sensor A":0[fmt:SBGGR8_1X8/640x480]'
+        media-ctl -d platform:vimc -V '"Debayer A":0[fmt:SBGGR8_1X8/640x480]'
+        media-ctl -d platform:vimc -V '"Sensor B":0[fmt:SBGGR8_1X8/640x480]'
+        media-ctl -d platform:vimc -V '"Debayer B":0[fmt:SBGGR8_1X8/640x480]'
+        v4l2-ctl -z platform:vimc -d "RGB/YUV Capture" -v width=1920,height=1440
+        v4l2-ctl -z platform:vimc -d "Raw Capture 0" -v pixelformat=BA81
+        v4l2-ctl -z platform:vimc -d "Raw Capture 1" -v pixelformat=BA81
+
+Subdevices
+----------
+
+Subdevices define the behavior of an entity in the topology. Depending on the
+subdevice, the entity can have multiple pads of type source or sink.
+
+vimc-sensor:
+	Generates images in several formats using video test pattern generator.
+	Exposes:
+
+	* 1 Pad source
+
+vimc-debayer:
+	Transforms images in bayer format into a non-bayer format.
+	Exposes:
+
+	* 1 Pad sink
+	* 1 Pad source
+
+vimc-scaler:
+	Scale up the image by a factor of 3. E.g.: a 640x480 image becomes a
+        1920x1440 image. (this value can be configured, see at
+        `Module options`_).
+	Exposes:
+
+	* 1 Pad sink
+	* 1 Pad source
+
+vimc-capture:
+	Exposes node /dev/videoX to allow userspace to capture the stream.
+	Exposes:
+
+	* 1 Pad sink
+	* 1 Pad source
+
+Module options
+---------------
+
+Vimc has a few module parameters to configure the driver. You should pass
+those arguments to each subdevice, not to the vimc module. For example::
+
+        vimc_subdevice.param=value
+
+* ``vimc_scaler.sca_mult=<unsigned int>``
+
+        Image size multiplier factor to be used to multiply both width and
+        height, so the image size will be ``sca_mult^2`` bigger than the
+        original one. Currently, only supports scaling up (the default value
+        is 3).
+
+* ``vimc_debayer.deb_mean_win_size=<unsigned int>``
+
+        Window size to calculate the mean. Note: the window size needs to be an
+        odd number, as the main pixel stays in the center of the window,
+        otherwise the next odd number is considered (the default value is 3).
-- 
cgit 


From ad266c031662e9e05304be70cc92fc3e833a7e78 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Sat, 4 May 2019 10:40:25 -0400
Subject: media: dt-bindings: imx7-csi: Document a single CSI clock

As per the i.MX7D Reference Manual only the MCLK is used for
the CSI block, so only document this single clock.

Signed-off-by: Fabio Estevam <festevam@gmail.com>
Tested-by: Rui Miguel Silva <rmfrfs@gmail.com>
Reviewed-by: Rui Miguel Silva <rmfrfs@gmail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/devicetree/bindings/media/imx7-csi.txt | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/media/imx7-csi.txt b/Documentation/devicetree/bindings/media/imx7-csi.txt
index 3c07bc676bc3..443aef07356e 100644
--- a/Documentation/devicetree/bindings/media/imx7-csi.txt
+++ b/Documentation/devicetree/bindings/media/imx7-csi.txt
@@ -14,8 +14,7 @@ Required properties:
 - interrupts    : should contain CSI interrupt;
 - clocks        : list of clock specifiers, see
         Documentation/devicetree/bindings/clock/clock-bindings.txt for details;
-- clock-names   : must contain "axi", "mclk" and "dcic" entries, matching
-                 entries in the clock property;
+- clock-names   : must contain "mclk";
 
 The device node shall contain one 'port' child node with one child 'endpoint'
 node, according to the bindings defined in:
@@ -32,10 +31,8 @@ example:
                         compatible = "fsl,imx7-csi";
                         reg = <0x30710000 0x10000>;
                         interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
-                        clocks = <&clks IMX7D_CLK_DUMMY>,
-                                        <&clks IMX7D_CSI_MCLK_ROOT_CLK>,
-                                        <&clks IMX7D_CLK_DUMMY>;
-                        clock-names = "axi", "mclk", "dcic";
+                        clocks = <&clks IMX7D_CSI_MCLK_ROOT_CLK>;
+                        clock-names = "mclk";
 
                         port {
                                 csi_from_csi_mux: endpoint {
-- 
cgit 


From 315cd67c945351f8a569500f8ab16b7fa94026e8 Mon Sep 17 00:00:00 2001
From: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Date: Tue, 18 Jun 2019 17:06:50 +0000
Subject: i2c: i801: Add Block Write-Block Read Process Call support

Add SMBUS 2.0 Block Write-Block Read Process Call command support.

Signed-off-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/busses/i2c-i801 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index ee9984f35868..cafb7e05f951 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801
@@ -88,7 +88,7 @@ SMBus controller.
 Process Call Support
 --------------------
 
-Not supported.
+Block process call is supported on the 82801EB (ICH5) and later chips.
 
 
 I2C Block Read Support
-- 
cgit 


From 9be1485accd4a0375170b93f90894e7728029078 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Thu, 20 Jun 2019 13:51:26 +0300
Subject: i2c: i801: Add support for Intel Elkhart Lake

Add PCI ID for Intel Elkhart Lake PCH.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/busses/i2c-i801 | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index cafb7e05f951..d247edcb0f99 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801
@@ -37,6 +37,7 @@ Supported adapters:
   * Intel Cedar Fork (PCH)
   * Intel Ice Lake (PCH)
   * Intel Comet Lake (PCH)
+  * Intel Elkhart Lake (PCH)
    Datasheets: Publicly available at the Intel website
 
 On Intel Patsburg and later chipsets, both the normal host SMBus controller
-- 
cgit 


From 4196ad7cc99fce466b50be5df94ca8a2f0d1cf42 Mon Sep 17 00:00:00 2001
From: Johan Korsnes <johan.korsnes@gmail.com>
Date: Tue, 18 Jun 2019 03:37:26 -0400
Subject: media: vivid.rst: describe display present control

Signed-off-by: Johan Korsnes <johan.korsnes@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/v4l-drivers/vivid.rst | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/media/v4l-drivers/vivid.rst b/Documentation/media/v4l-drivers/vivid.rst
index edb6f33e029c..7082fec4075d 100644
--- a/Documentation/media/v4l-drivers/vivid.rst
+++ b/Documentation/media/v4l-drivers/vivid.rst
@@ -941,6 +941,11 @@ Digital Video Controls
 	affects the reported colorspace since DVI_D outputs will always use
 	sRGB.
 
+- Display Present:
+
+	sets the presence of a "display" on the HDMI output. This affects
+	the tx_edid_present, tx_hotplug and tx_rxsense controls.
+
 
 FM Radio Receiver Controls
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
-- 
cgit 


From 05af0765707fc136433ab380d5d627afe3b8dda9 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 20 Jun 2019 22:01:47 +0200
Subject: i2c: mux: demux-pinctrl: use proper email address for ABI requests

Use my commercial address, not my community one.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
[Removed a misplaced newline. /Peter]
Signed-off-by: Peter Rosin <peda@axentia.se>
---
 Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl b/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl
index 3c3514815cd5..c394b808be19 100644
--- a/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl
+++ b/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl
@@ -1,7 +1,7 @@
 What:		/sys/devices/platform/<i2c-demux-name>/available_masters
 Date:		January 2016
 KernelVersion:	4.6
-Contact:	Wolfram Sang <wsa@the-dreams.de>
+Contact:	Wolfram Sang <wsa+renesas@sang-engineering.com>
 Description:
 		Reading the file will give you a list of masters which can be
 		selected for a demultiplexed bus. The format is
@@ -12,7 +12,7 @@ Description:
 What:		/sys/devices/platform/<i2c-demux-name>/current_master
 Date:		January 2016
 KernelVersion:	4.6
-Contact:	Wolfram Sang <wsa@the-dreams.de>
+Contact:	Wolfram Sang <wsa+renesas@sang-engineering.com>
 Description:
 		This file selects/shows the active I2C master for a demultiplexed
 		bus. It uses the <index> value from the file 'available_masters'.
-- 
cgit 


From f24cd7df6f329c9d03932d5824a49d0f69f2ceda Mon Sep 17 00:00:00 2001
From: Tomasz Duszynski <tduszyns@gmail.com>
Date: Thu, 20 Jun 2019 21:50:10 +0200
Subject: dt-bindings: iio: chemical: sps30: convert bindings to yaml

Convert existing device tree bindings to yaml.

Signed-off-by: Tomasz Duszynski <tduszyns@gmail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/chemical/sensirion,sps30.txt      | 12 -------
 .../bindings/iio/chemical/sensirion,sps30.yaml     | 39 ++++++++++++++++++++++
 2 files changed, 39 insertions(+), 12 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/iio/chemical/sensirion,sps30.txt
 create mode 100644 Documentation/devicetree/bindings/iio/chemical/sensirion,sps30.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/chemical/sensirion,sps30.txt b/Documentation/devicetree/bindings/iio/chemical/sensirion,sps30.txt
deleted file mode 100644
index 6eee2709b5b6..000000000000
--- a/Documentation/devicetree/bindings/iio/chemical/sensirion,sps30.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-* Sensirion SPS30 particulate matter sensor
-
-Required properties:
-- compatible: must be "sensirion,sps30"
-- reg: the I2C address of the sensor
-
-Example:
-
-sps30@69 {
-	compatible = "sensirion,sps30";
-	reg = <0x69>;
-};
diff --git a/Documentation/devicetree/bindings/iio/chemical/sensirion,sps30.yaml b/Documentation/devicetree/bindings/iio/chemical/sensirion,sps30.yaml
new file mode 100644
index 000000000000..50a50a0d7070
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/chemical/sensirion,sps30.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/chemical/sensirion,sps30.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sensirion SPS30 particulate matter sensor
+
+maintainers:
+  - Tomasz Duszynski <tduszyns@gmail.com>
+
+description: |
+  Air pollution sensor capable of measuring mass concentration of dust
+  particles.
+
+properties:
+  compatible:
+    enum:
+      - sensirion,sps30
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      air-pollution-sensor@69 {
+        compatible = "sensirion,sps30";
+        reg = <0x69>;
+      };
+    };
+
+...
-- 
cgit 


From 597382cbd3c1192f0bb0d19c48d1dcc8e1147bdd Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Wed, 19 Jun 2019 14:29:54 +0200
Subject: dt-bindings: iio: adc: stm32: add missing vdda supply

Add missing vdda-supply, analog power supply, to STM32 ADC. It's required
to properly supply the ADC.

Fixes: 841fcea454fe ("Documentation: dt-bindings: Document STM32 ADC DT
bindings")

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt
index 8346bcb04ad7..93a0bd2efc05 100644
--- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt
+++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt
@@ -38,6 +38,7 @@ Required properties:
     It's required on stm32h7.
 - clock-names: Must be "adc" and/or "bus" depending on part used.
 - interrupt-controller: Identifies the controller node as interrupt-parent
+- vdda-supply: Phandle to the vdda input analog voltage.
 - vref-supply: Phandle to the vref input analog reference voltage.
 - #interrupt-cells = <1>;
 - #address-cells = <1>;
-- 
cgit 


From b64ed19b93c368be0fb6acf05377e8e3a694c92b Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 8 May 2019 03:02:18 -0700
Subject: x86/cpu: Add 'unsafe_fsgsbase' to enable CR4.FSGSBASE

This is temporary.  It will allow the next few patches to be tested
incrementally.

Setting unsafe_fsgsbase is a root hole.  Don't do it.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Ravi Shankar <ravi.v.shankar@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Link: https://lkml.kernel.org/r/1557309753-24073-4-git-send-email-chang.seok.bae@intel.com
---
 Documentation/admin-guide/kernel-parameters.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..b0fa5273b0fc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2857,6 +2857,9 @@
 	no5lvl		[X86-64] Disable 5-level paging mode. Forces
 			kernel to use 4-level paging instead.
 
+	unsafe_fsgsbase	[X86] Allow FSGSBASE instructions.  This will be
+			replaced with a nofsgsbase flag.
+
 	no_console_suspend
 			[HW] Never suspend the console
 			Disable suspending of consoles during suspend and
-- 
cgit 


From 1fb12b35e5ffe379d109b22cb3069830d0136d9a Mon Sep 17 00:00:00 2001
From: "Chang S. Bae" <chang.seok.bae@intel.com>
Date: Wed, 8 May 2019 03:02:19 -0700
Subject: kbuild: Raise the minimum required binutils version to 2.21

It helps to use some new instructions directly in assembly code.

Suggested-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ravi Shankar <ravi.v.shankar@intel.com>
Cc: Linux Torvalds <torvalds@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Link: https://lkml.kernel.org/r/1557309753-24073-5-git-send-email-chang.seok.bae@intel.com
---
 Documentation/process/changes.rst | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 18735dc460a0..0a18075c485e 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -31,7 +31,7 @@ you probably needn't concern yourself with isdn4k-utils.
 ====================== ===============  ========================================
 GNU C                  4.6              gcc --version
 GNU make               3.81             make --version
-binutils               2.20             ld -v
+binutils               2.21             ld -v
 flex                   2.5.35           flex --version
 bison                  2.0              bison --version
 util-linux             2.10o            fdformat --version
@@ -77,9 +77,7 @@ You will need GNU make 3.81 or later to build the kernel.
 Binutils
 --------
 
-The build system has, as of 4.13, switched to using thin archives (`ar T`)
-rather than incremental linking (`ld -r`) for built-in.a intermediate steps.
-This requires binutils 2.20 or newer.
+Binutils 2.21 or newer is needed to build the kernel.
 
 pkg-config
 ----------
-- 
cgit 


From 5bf0cab60ee2c730ec91ae0aabc3146bcfed138b Mon Sep 17 00:00:00 2001
From: "Chang S. Bae" <chang.seok.bae@intel.com>
Date: Wed, 8 May 2019 03:02:28 -0700
Subject: x86/entry/64: Document GSBASE handling in the paranoid path

On a FSGSBASE system, the way to handle GSBASE in the paranoid path is
different from the existing SWAPGS-based entry/exit path handling. Document
the reason and what has to be done for FSGSBASE enabled systems.

[ tglx: Massaged doc and changelog ]

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ravi Shankar <ravi.v.shankar@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Link: https://lkml.kernel.org/r/1557309753-24073-14-git-send-email-chang.seok.bae@intel.com
---
 Documentation/x86/entry_64.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/x86/entry_64.rst b/Documentation/x86/entry_64.rst
index a48b3f6ebbe8..b87c1d816aea 100644
--- a/Documentation/x86/entry_64.rst
+++ b/Documentation/x86/entry_64.rst
@@ -108,3 +108,12 @@ We try to only use IST entries and the paranoid entry code for vectors
 that absolutely need the more expensive check for the GS base - and we
 generate all 'normal' entry points with the regular (faster) paranoid=0
 variant.
+
+On a FSGSBASE system, however, user space can set GS without kernel
+interaction. It means the value of GS base itself does not imply anything,
+whether a kernel value or a user space value. So, there is no longer a safe
+way to check whether the exception is entering from user mode or kernel
+mode in the paranoid entry code path. So the GSBASE value needs to be read
+out, saved and the kernel GSBASE value written. On exit the saved GSBASE
+value needs to be restored unconditionally. The non paranoid entry/exit
+code still uses SWAPGS unconditionally as the state is known.
-- 
cgit 


From 2032f1f96ee0da600633c6c627b9c0a2e0f8b8a6 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 8 May 2019 03:02:31 -0700
Subject: x86/cpu: Enable FSGSBASE on 64bit by default and add a chicken bit

Now that FSGSBASE is fully supported, remove unsafe_fsgsbase, enable
FSGSBASE by default, and add nofsgsbase to disable it.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Ravi Shankar <ravi.v.shankar@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Link: https://lkml.kernel.org/r/1557309753-24073-17-git-send-email-chang.seok.bae@intel.com
---
 Documentation/admin-guide/kernel-parameters.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b0fa5273b0fc..35bc3c3574c6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2857,8 +2857,7 @@
 	no5lvl		[X86-64] Disable 5-level paging mode. Forces
 			kernel to use 4-level paging instead.
 
-	unsafe_fsgsbase	[X86] Allow FSGSBASE instructions.  This will be
-			replaced with a nofsgsbase flag.
+	nofsgsbase	[X86] Disables FSGSBASE instructions.
 
 	no_console_suspend
 			[HW] Never suspend the console
-- 
cgit 


From 2c7b5ac5d5a93c4b0557293d06c6677f765081a6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 13 Jun 2019 22:04:24 +0300
Subject: Documentation/x86/64: Add documentation for GS/FS addressing mode

Explain how the GS/FS based addressing can be utilized in user space
applications along with the differences between the generic prctl() based
GS/FS base control and the FSGSBASE version available on newer CPUs.

Originally-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "Bae, Chang Seok" <chang.seok.bae@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>,
Cc: H . Peter Anvin <hpa@zytor.com>
Cc: "Shankar, Ravi V" <ravi.v.shankar@intel.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1906132246310.1791@nanos.tec.linutronix.de
---
 Documentation/x86/x86_64/fsgs.rst  | 199 +++++++++++++++++++++++++++++++++++++
 Documentation/x86/x86_64/index.rst |   1 +
 2 files changed, 200 insertions(+)
 create mode 100644 Documentation/x86/x86_64/fsgs.rst

(limited to 'Documentation')

diff --git a/Documentation/x86/x86_64/fsgs.rst b/Documentation/x86/x86_64/fsgs.rst
new file mode 100644
index 000000000000..380c0b5ccca2
--- /dev/null
+++ b/Documentation/x86/x86_64/fsgs.rst
@@ -0,0 +1,199 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Using FS and GS segments in user space applications
+===================================================
+
+The x86 architecture supports segmentation. Instructions which access
+memory can use segment register based addressing mode. The following
+notation is used to address a byte within a segment:
+
+  Segment-register:Byte-address
+
+The segment base address is added to the Byte-address to compute the
+resulting virtual address which is accessed. This allows to access multiple
+instances of data with the identical Byte-address, i.e. the same code. The
+selection of a particular instance is purely based on the base-address in
+the segment register.
+
+In 32-bit mode the CPU provides 6 segments, which also support segment
+limits. The limits can be used to enforce address space protections.
+
+In 64-bit mode the CS/SS/DS/ES segments are ignored and the base address is
+always 0 to provide a full 64bit address space. The FS and GS segments are
+still functional in 64-bit mode.
+
+Common FS and GS usage
+------------------------------
+
+The FS segment is commonly used to address Thread Local Storage (TLS). FS
+is usually managed by runtime code or a threading library. Variables
+declared with the '__thread' storage class specifier are instantiated per
+thread and the compiler emits the FS: address prefix for accesses to these
+variables. Each thread has its own FS base address so common code can be
+used without complex address offset calculations to access the per thread
+instances. Applications should not use FS for other purposes when they use
+runtimes or threading libraries which manage the per thread FS.
+
+The GS segment has no common use and can be used freely by
+applications. GCC and Clang support GS based addressing via address space
+identifiers.
+
+Reading and writing the FS/GS base address
+------------------------------------------
+
+There exist two mechanisms to read and write the FS/FS base address:
+
+ - the arch_prctl() system call
+
+ - the FSGSBASE instruction family
+
+Accessing FS/GS base with arch_prctl()
+--------------------------------------
+
+ The arch_prctl(2) based mechanism is available on all 64bit CPUs and all
+ kernel versions.
+
+ Reading the base:
+
+   arch_prctl(ARCH_GET_FS, &fsbase);
+   arch_prctl(ARCH_GET_GS, &gsbase);
+
+ Writing the base:
+
+   arch_prctl(ARCH_SET_FS, fsbase);
+   arch_prctl(ARCH_SET_GS, gsbase);
+
+ The ARCH_SET_GS prctl may be disabled depending on kernel configuration
+ and security settings.
+
+Accessing FS/GS base with the FSGSBASE instructions
+---------------------------------------------------
+
+ With the Ivy Bridge CPU generation Intel introduced a new set of
+ instructions to access the FS and GS base registers directly from user
+ space. These instructions are also supported on AMD Family 17H CPUs. The
+ following instructions are available:
+
+  =============== ===========================
+  RDFSBASE %reg   Read the FS base register
+  RDGSBASE %reg   Read the GS base register
+  WRFSBASE %reg   Write the FS base register
+  WRGSBASE %reg   Write the GS base register
+  =============== ===========================
+
+ The instructions avoid the overhead of the arch_prctl() syscall and allow
+ more flexible usage of the FS/GS addressing modes in user space
+ applications. This does not prevent conflicts between threading libraries
+ and runtimes which utilize FS and applications which want to use it for
+ their own purpose.
+
+FSGSBASE instructions enablement
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ The instructions are enumerated in CPUID leaf 7, bit 0 of EBX. If
+ available /proc/cpuinfo shows 'fsgsbase' in the flag entry of the CPUs.
+
+ The availability of the instructions does not enable them
+ automatically. The kernel has to enable them explicitly in CR4. The
+ reason for this is that older kernels make assumptions about the values in
+ the GS register and enforce them when GS base is set via
+ arch_prctl(). Allowing user space to write arbitrary values to GS base
+ would violate these assumptions and cause malfunction.
+
+ On kernels which do not enable FSGSBASE the execution of the FSGSBASE
+ instructions will fault with a #UD exception.
+
+ The kernel provides reliable information about the enabled state in the
+ ELF AUX vector. If the HWCAP2_FSGSBASE bit is set in the AUX vector, the
+ kernel has FSGSBASE instructions enabled and applications can use them.
+ The following code example shows how this detection works::
+
+   #include <sys/auxv.h>
+   #include <elf.h>
+
+   /* Will be eventually in asm/hwcap.h */
+   #ifndef HWCAP2_FSGSBASE
+   #define HWCAP2_FSGSBASE        (1 << 1)
+   #endif
+
+   ....
+
+   unsigned val = getauxval(AT_HWCAP2);
+
+   if (val & HWCAP2_FSGSBASE)
+        printf("FSGSBASE enabled\n");
+
+FSGSBASE instructions compiler support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+GCC version 4.6.4 and newer provide instrinsics for the FSGSBASE
+instructions. Clang supports them as well.
+
+  =================== ===========================
+  _readfsbase_u64()   Read the FS base register
+  _readfsbase_u64()   Read the GS base register
+  _writefsbase_u64()  Write the FS base register
+  _writegsbase_u64()  Write the GS base register
+  =================== ===========================
+
+To utilize these instrinsics <immintrin.h> must be included in the source
+code and the compiler option -mfsgsbase has to be added.
+
+Compiler support for FS/GS based addressing
+-------------------------------------------
+
+GCC version 6 and newer provide support for FS/GS based addressing via
+Named Address Spaces. GCC implements the following address space
+identifiers for x86:
+
+  ========= ====================================
+  __seg_fs  Variable is addressed relative to FS
+  __seg_gs  Variable is addressed relative to GS
+  ========= ====================================
+
+The preprocessor symbols __SEG_FS and __SEG_GS are defined when these
+address spaces are supported. Code which implements fallback modes should
+check whether these symbols are defined. Usage example::
+
+  #ifdef __SEG_GS
+
+  long data0 = 0;
+  long data1 = 1;
+
+  long __seg_gs *ptr;
+
+  /* Check whether FSGSBASE is enabled by the kernel (HWCAP2_FSGSBASE) */
+  ....
+
+  /* Set GS to point to data0 */
+  _writegsbase_u64(&data0);
+
+  /* Access offset 0 of GS */
+  ptr = 0;
+  printf("data0 = %ld\n", *ptr);
+
+  /* Set GS to point to data1 */
+  _writegsbase_u64(&data1);
+  /* ptr still addresses offset 0! */
+  printf("data1 = %ld\n", *ptr);
+
+
+Clang does not provide the GCC address space identifiers, but it provides
+address spaces via an attribute based mechanism in Clang 5 and newer
+versions:
+
+ ==================================== =====================================
+  __attribute__((address_space(256))  Variable is addressed relative to GS
+  __attribute__((address_space(257))  Variable is addressed relative to FS
+ ==================================== =====================================
+
+FS/GS based addressing with inline assembly
+-------------------------------------------
+
+In case the compiler does not support address spaces, inline assembly can
+be used for FS/GS based addressing mode::
+
+	mov %fs:offset, %reg
+	mov %gs:offset, %reg
+
+	mov %reg, %fs:offset
+	mov %reg, %gs:offset
diff --git a/Documentation/x86/x86_64/index.rst b/Documentation/x86/x86_64/index.rst
index d6eaaa5a35fc..a56070fc8e77 100644
--- a/Documentation/x86/x86_64/index.rst
+++ b/Documentation/x86/x86_64/index.rst
@@ -14,3 +14,4 @@ x86_64 Support
    fake-numa-for-cpusets
    cpu-hotplug-spec
    machinecheck
+   fsgs
-- 
cgit 


From 1c349f4fd36e7f957217402f3a7f87031dc8a0b7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 17:53:40 -0300
Subject: docs: iio: convert to ReST

Rename the iio documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/iio/ep93xx_adc.rst   |  40 +++++++++++++++
 Documentation/iio/ep93xx_adc.txt   |  29 -----------
 Documentation/iio/iio_configfs.rst | 101 +++++++++++++++++++++++++++++++++++++
 Documentation/iio/iio_configfs.txt |  93 ----------------------------------
 Documentation/iio/index.rst        |  12 +++++
 5 files changed, 153 insertions(+), 122 deletions(-)
 create mode 100644 Documentation/iio/ep93xx_adc.rst
 delete mode 100644 Documentation/iio/ep93xx_adc.txt
 create mode 100644 Documentation/iio/iio_configfs.rst
 delete mode 100644 Documentation/iio/iio_configfs.txt
 create mode 100644 Documentation/iio/index.rst

(limited to 'Documentation')

diff --git a/Documentation/iio/ep93xx_adc.rst b/Documentation/iio/ep93xx_adc.rst
new file mode 100644
index 000000000000..4fd8dea3f6b8
--- /dev/null
+++ b/Documentation/iio/ep93xx_adc.rst
@@ -0,0 +1,40 @@
+==============================
+Cirrus Logic EP93xx ADC driver
+==============================
+
+1. Overview
+===========
+
+The driver is intended to work on both low-end (EP9301, EP9302) devices with
+5-channel ADC and high-end (EP9307, EP9312, EP9315) devices with 10-channel
+touchscreen/ADC module.
+
+2. Channel numbering
+====================
+
+Numbering scheme for channels 0..4 is defined in EP9301 and EP9302 datasheets.
+EP9307, EP9312 and EP9312 have 3 channels more (total 8), but the numbering is
+not defined. So the last three are numbered randomly, let's say.
+
+Assuming ep93xx_adc is IIO device0, you'd find the following entries under
+/sys/bus/iio/devices/iio:device0/:
+
+  +-----------------+---------------+
+  | sysfs entry     | ball/pin name |
+  +=================+===============+
+  | in_voltage0_raw | YM            |
+  +-----------------+---------------+
+  | in_voltage1_raw | SXP           |
+  +-----------------+---------------+
+  | in_voltage2_raw | SXM           |
+  +-----------------+---------------+
+  | in_voltage3_raw | SYP           |
+  +-----------------+---------------+
+  | in_voltage4_raw | SYM           |
+  +-----------------+---------------+
+  | in_voltage5_raw | XP            |
+  +-----------------+---------------+
+  | in_voltage6_raw | XM            |
+  +-----------------+---------------+
+  | in_voltage7_raw | YP            |
+  +-----------------+---------------+
diff --git a/Documentation/iio/ep93xx_adc.txt b/Documentation/iio/ep93xx_adc.txt
deleted file mode 100644
index 23053e7817bd..000000000000
--- a/Documentation/iio/ep93xx_adc.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-Cirrus Logic EP93xx ADC driver.
-
-1. Overview
-
-The driver is intended to work on both low-end (EP9301, EP9302) devices with
-5-channel ADC and high-end (EP9307, EP9312, EP9315) devices with 10-channel
-touchscreen/ADC module.
-
-2. Channel numbering
-
-Numbering scheme for channels 0..4 is defined in EP9301 and EP9302 datasheets.
-EP9307, EP9312 and EP9312 have 3 channels more (total 8), but the numbering is
-not defined. So the last three are numbered randomly, let's say.
-
-Assuming ep93xx_adc is IIO device0, you'd find the following entries under
-/sys/bus/iio/devices/iio:device0/:
-
-  +-----------------+---------------+
-  | sysfs entry     | ball/pin name |
-  +-----------------+---------------+
-  | in_voltage0_raw | YM            |
-  | in_voltage1_raw | SXP           |
-  | in_voltage2_raw | SXM           |
-  | in_voltage3_raw | SYP           |
-  | in_voltage4_raw | SYM           |
-  | in_voltage5_raw | XP            |
-  | in_voltage6_raw | XM            |
-  | in_voltage7_raw | YP            |
-  +-----------------+---------------+
diff --git a/Documentation/iio/iio_configfs.rst b/Documentation/iio/iio_configfs.rst
new file mode 100644
index 000000000000..ecbfdb3afef7
--- /dev/null
+++ b/Documentation/iio/iio_configfs.rst
@@ -0,0 +1,101 @@
+===============================
+Industrial IIO configfs support
+===============================
+
+1. Overview
+===========
+
+Configfs is a filesystem-based manager of kernel objects. IIO uses some
+objects that could be easily configured using configfs (e.g.: devices,
+triggers).
+
+See Documentation/filesystems/configfs/configfs.txt for more information
+about how configfs works.
+
+2. Usage
+========
+
+In order to use configfs support in IIO we need to select it at compile
+time via CONFIG_IIO_CONFIGFS config option.
+
+Then, mount the configfs filesystem (usually under /config directory)::
+
+  $ mkdir /config
+  $ mount -t configfs none /config
+
+At this point, all default IIO groups will be created and can be accessed
+under /config/iio. Next chapters will describe available IIO configuration
+objects.
+
+3. Software triggers
+====================
+
+One of the IIO default configfs groups is the "triggers" group. It is
+automagically accessible when the configfs is mounted and can be found
+under /config/iio/triggers.
+
+IIO software triggers implementation offers support for creating multiple
+trigger types. A new trigger type is usually implemented as a separate
+kernel module following the interface in include/linux/iio/sw_trigger.h::
+
+  /*
+   * drivers/iio/trigger/iio-trig-sample.c
+   * sample kernel module implementing a new trigger type
+   */
+  #include <linux/iio/sw_trigger.h>
+
+
+  static struct iio_sw_trigger *iio_trig_sample_probe(const char *name)
+  {
+	/*
+	 * This allocates and registers an IIO trigger plus other
+	 * trigger type specific initialization.
+	 */
+  }
+
+  static int iio_trig_hrtimer_remove(struct iio_sw_trigger *swt)
+  {
+	/*
+	 * This undoes the actions in iio_trig_sample_probe
+	 */
+  }
+
+  static const struct iio_sw_trigger_ops iio_trig_sample_ops = {
+	.probe		= iio_trig_sample_probe,
+	.remove		= iio_trig_sample_remove,
+  };
+
+  static struct iio_sw_trigger_type iio_trig_sample = {
+	.name = "trig-sample",
+	.owner = THIS_MODULE,
+	.ops = &iio_trig_sample_ops,
+  };
+
+module_iio_sw_trigger_driver(iio_trig_sample);
+
+Each trigger type has its own directory under /config/iio/triggers. Loading
+iio-trig-sample module will create 'trig-sample' trigger type directory
+/config/iio/triggers/trig-sample.
+
+We support the following interrupt sources (trigger types):
+
+	* hrtimer, uses high resolution timers as interrupt source
+
+3.1 Hrtimer triggers creation and destruction
+---------------------------------------------
+
+Loading iio-trig-hrtimer module will register hrtimer trigger types allowing
+users to create hrtimer triggers under /config/iio/triggers/hrtimer.
+
+e.g::
+
+  $ mkdir /config/iio/triggers/hrtimer/instance1
+  $ rmdir /config/iio/triggers/hrtimer/instance1
+
+Each trigger can have one or more attributes specific to the trigger type.
+
+3.2 "hrtimer" trigger types attributes
+--------------------------------------
+
+"hrtimer" trigger type doesn't have any configurable attribute from /config dir.
+It does introduce the sampling_frequency attribute to trigger directory.
diff --git a/Documentation/iio/iio_configfs.txt b/Documentation/iio/iio_configfs.txt
deleted file mode 100644
index 4e5f101837a8..000000000000
--- a/Documentation/iio/iio_configfs.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-Industrial IIO configfs support
-
-1. Overview
-
-Configfs is a filesystem-based manager of kernel objects. IIO uses some
-objects that could be easily configured using configfs (e.g.: devices,
-triggers).
-
-See Documentation/filesystems/configfs/configfs.txt for more information
-about how configfs works.
-
-2. Usage
-
-In order to use configfs support in IIO we need to select it at compile
-time via CONFIG_IIO_CONFIGFS config option.
-
-Then, mount the configfs filesystem (usually under /config directory):
-
-$ mkdir /config
-$ mount -t configfs none /config
-
-At this point, all default IIO groups will be created and can be accessed
-under /config/iio. Next chapters will describe available IIO configuration
-objects.
-
-3. Software triggers
-
-One of the IIO default configfs groups is the "triggers" group. It is
-automagically accessible when the configfs is mounted and can be found
-under /config/iio/triggers.
-
-IIO software triggers implementation offers support for creating multiple
-trigger types. A new trigger type is usually implemented as a separate
-kernel module following the interface in include/linux/iio/sw_trigger.h:
-
-/*
- * drivers/iio/trigger/iio-trig-sample.c
- * sample kernel module implementing a new trigger type
- */
-#include <linux/iio/sw_trigger.h>
-
-
-static struct iio_sw_trigger *iio_trig_sample_probe(const char *name)
-{
-	/*
-	 * This allocates and registers an IIO trigger plus other
-	 * trigger type specific initialization.
-	 */
-}
-
-static int iio_trig_hrtimer_remove(struct iio_sw_trigger *swt)
-{
-	/*
-	 * This undoes the actions in iio_trig_sample_probe
-	 */
-}
-
-static const struct iio_sw_trigger_ops iio_trig_sample_ops = {
-	.probe		= iio_trig_sample_probe,
-	.remove		= iio_trig_sample_remove,
-};
-
-static struct iio_sw_trigger_type iio_trig_sample = {
-	.name = "trig-sample",
-	.owner = THIS_MODULE,
-	.ops = &iio_trig_sample_ops,
-};
-
-module_iio_sw_trigger_driver(iio_trig_sample);
-
-Each trigger type has its own directory under /config/iio/triggers. Loading
-iio-trig-sample module will create 'trig-sample' trigger type directory
-/config/iio/triggers/trig-sample.
-
-We support the following interrupt sources (trigger types):
-	* hrtimer, uses high resolution timers as interrupt source
-
-3.1 Hrtimer triggers creation and destruction
-
-Loading iio-trig-hrtimer module will register hrtimer trigger types allowing
-users to create hrtimer triggers under /config/iio/triggers/hrtimer.
-
-e.g:
-
-$ mkdir /config/iio/triggers/hrtimer/instance1
-$ rmdir /config/iio/triggers/hrtimer/instance1
-
-Each trigger can have one or more attributes specific to the trigger type.
-
-3.2 "hrtimer" trigger types attributes
-
-"hrtimer" trigger type doesn't have any configurable attribute from /config dir.
-It does introduce the sampling_frequency attribute to trigger directory.
diff --git a/Documentation/iio/index.rst b/Documentation/iio/index.rst
new file mode 100644
index 000000000000..0593dca89a94
--- /dev/null
+++ b/Documentation/iio/index.rst
@@ -0,0 +1,12 @@
+:orphan:
+
+==============
+Industrial I/O
+==============
+
+.. toctree::
+   :maxdepth: 1
+
+   iio_configfs
+
+   ep93xx_adc
-- 
cgit 


From 9285ec4c8b61d4930a575081abeba2cd4f449a74 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 21 Jun 2019 22:32:48 +0200
Subject: timekeeping: Use proper clock specifier names in functions

This makes boot uniformly boottime and tai uniformly clocktai, to
address the remaining oversights.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lkml.kernel.org/r/20190621203249.3909-2-Jason@zx2c4.com
---
 Documentation/core-api/timekeeping.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst
index 93cbeb9daec0..4d92b1ac8024 100644
--- a/Documentation/core-api/timekeeping.rst
+++ b/Documentation/core-api/timekeeping.rst
@@ -65,7 +65,7 @@ different format depending on what is required by the user:
 .. c:function:: u64 ktime_get_ns( void )
 		u64 ktime_get_boottime_ns( void )
 		u64 ktime_get_real_ns( void )
-		u64 ktime_get_tai_ns( void )
+		u64 ktime_get_clocktai_ns( void )
 		u64 ktime_get_raw_ns( void )
 
 	Same as the plain ktime_get functions, but returning a u64 number
-- 
cgit 


From 4c54294d01e605a9f992361b924c5d8b12822a6d Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 21 Jun 2019 22:32:49 +0200
Subject: timekeeping: Add missing _ns functions for coarse accessors

This further unifies the accessors for the fast and coarse functions, so
that the same types of functions are available for each. There was also
a bit of confusion with the documentation, which prior advertised a
function that has never existed. Finally, the vanilla ktime_get_coarse()
was omitted from the API originally, so this fills this oversight.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lkml.kernel.org/r/20190621203249.3909-3-Jason@zx2c4.com
---
 Documentation/core-api/timekeeping.rst | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst
index 4d92b1ac8024..15fc58e85ef9 100644
--- a/Documentation/core-api/timekeeping.rst
+++ b/Documentation/core-api/timekeeping.rst
@@ -99,16 +99,20 @@ Coarse and fast_ns access
 
 Some additional variants exist for more specialized cases:
 
-.. c:function:: ktime_t ktime_get_coarse_boottime( void )
+.. c:function:: ktime_t ktime_get_coarse( void )
+		ktime_t ktime_get_coarse_boottime( void )
 		ktime_t ktime_get_coarse_real( void )
 		ktime_t ktime_get_coarse_clocktai( void )
-		ktime_t ktime_get_coarse_raw( void )
+
+.. c:function:: u64 ktime_get_coarse_ns( void )
+		u64 ktime_get_coarse_boot_ns( void )
+		u64 ktime_get_coarse_real_ns( void )
+		u64 ktime_get_coarse_clocktai_ns( void )
 
 .. c:function:: void ktime_get_coarse_ts64( struct timespec64 * )
 		void ktime_get_coarse_boottime_ts64( struct timespec64 * )
 		void ktime_get_coarse_real_ts64( struct timespec64 * )
 		void ktime_get_coarse_clocktai_ts64( struct timespec64 * )
-		void ktime_get_coarse_raw_ts64( struct timespec64 * )
 
 	These are quicker than the non-coarse versions, but less accurate,
 	corresponding to CLOCK_MONONOTNIC_COARSE and CLOCK_REALTIME_COARSE
-- 
cgit 


From 8c25c0cb5bb4e63170bb7760179ec294a3827694 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Fri, 21 Jun 2019 15:59:09 +0100
Subject: doc: phy: document some PHY_INTERFACE_MODE_xxx settings

There seems to be some confusion surrounding three PHY interface modes,
specifically 1000BASE-X, 2500BASE-X and SGMII.  Add some documentation
to phylib detailing precisely what these interface modes refer to.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/phy.rst | 45 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst
index 0dd90d7df5ec..a689966bc4be 100644
--- a/Documentation/networking/phy.rst
+++ b/Documentation/networking/phy.rst
@@ -202,7 +202,8 @@ the PHY/controller, of which the PHY needs to be aware.
 
 *interface* is a u32 which specifies the connection type used
 between the controller and the PHY.  Examples are GMII, MII,
-RGMII, and SGMII.  For a full list, see include/linux/phy.h
+RGMII, and SGMII.  See "PHY interface mode" below.  For a full
+list, see include/linux/phy.h
 
 Now just make sure that phydev->supported and phydev->advertising have any
 values pruned from them which don't make sense for your controller (a 10/100
@@ -225,6 +226,48 @@ When you want to disconnect from the network (even if just briefly), you call
 phy_stop(phydev). This function also stops the phylib state machine and
 disables PHY interrupts.
 
+PHY interface modes
+===================
+
+The PHY interface mode supplied in the phy_connect() family of functions
+defines the initial operating mode of the PHY interface.  This is not
+guaranteed to remain constant; there are PHYs which dynamically change
+their interface mode without software interaction depending on the
+negotiation results.
+
+Some of the interface modes are described below:
+
+``PHY_INTERFACE_MODE_1000BASEX``
+    This defines the 1000BASE-X single-lane serdes link as defined by the
+    802.3 standard section 36.  The link operates at a fixed bit rate of
+    1.25Gbaud using a 10B/8B encoding scheme, resulting in an underlying
+    data rate of 1Gbps.  Embedded in the data stream is a 16-bit control
+    word which is used to negotiate the duplex and pause modes with the
+    remote end.  This does not include "up-clocked" variants such as 2.5Gbps
+    speeds (see below.)
+
+``PHY_INTERFACE_MODE_2500BASEX``
+    This defines a variant of 1000BASE-X which is clocked 2.5 times faster,
+    than the 802.3 standard giving a fixed bit rate of 3.125Gbaud.
+
+``PHY_INTERFACE_MODE_SGMII``
+    This is used for Cisco SGMII, which is a modification of 1000BASE-X
+    as defined by the 802.3 standard.  The SGMII link consists of a single
+    serdes lane running at a fixed bit rate of 1.25Gbaud with 10B/8B
+    encoding.  The underlying data rate is 1Gbps, with the slower speeds of
+    100Mbps and 10Mbps being achieved through replication of each data symbol.
+    The 802.3 control word is re-purposed to send the negotiated speed and
+    duplex information from to the MAC, and for the MAC to acknowledge
+    receipt.  This does not include "up-clocked" variants such as 2.5Gbps
+    speeds.
+
+    Note: mismatched SGMII vs 1000BASE-X configuration on a link can
+    successfully pass data in some circumstances, but the 16-bit control
+    word will not be correctly interpreted, which may cause mismatches in
+    duplex, pause or other settings.  This is dependent on the MAC and/or
+    PHY behaviour.
+
+
 Pause frames / flow control
 ===========================
 
-- 
cgit 


From 203dffacf592317e54480704f569a09f8b7ca380 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Wed, 19 Jun 2019 18:33:58 -0700
Subject: Documentation/ABI: Document umwait control sysfs interfaces

Since two new sysfs interface files are created for umwait control, add
an ABI document entry for the files:

   /sys/devices/system/cpu/umwait_control/enable_c02
   /sys/devices/system/cpu/umwait_control/max_time

[ tglx: Made the write value instructions readable ]

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ashok Raj <ashok.raj@intel.com>
Cc: "Borislav Petkov" <bp@alien8.de>
Cc: "H Peter Anvin" <hpa@zytor.com>
Cc: "Andy Lutomirski" <luto@kernel.org>
Cc: "Peter Zijlstra" <peterz@infradead.org>
Cc: "Tony Luck" <tony.luck@intel.com>
Cc: "Ravi V Shankar" <ravi.v.shankar@intel.com>
Link: https://lkml.kernel.org/r/1560994438-235698-6-git-send-email-fenghua.yu@intel.com
---
 Documentation/ABI/testing/sysfs-devices-system-cpu | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 1528239f69b2..923fe2001472 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -538,3 +538,26 @@ Description:	Intel Energy and Performance Bias Hint (EPB)
 
 		This attribute is present for all online CPUs supporting the
 		Intel EPB feature.
+
+What:		/sys/devices/system/cpu/umwait_control
+		/sys/devices/system/cpu/umwait_control/enable_c02
+		/sys/devices/system/cpu/umwait_control/max_time
+Date:		May 2019
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	Umwait control
+
+		enable_c02: Read/write interface to control umwait C0.2 state
+			Read returns C0.2 state status:
+				0: C0.2 is disabled
+				1: C0.2 is enabled
+
+			Write 'y' or '1'  or 'on' to enable C0.2 state.
+			Write 'n' or '0'  or 'off' to disable C0.2 state.
+
+			The interface is case insensitive.
+
+		max_time: Read/write interface to control umwait maximum time
+			  in TSC-quanta that the CPU can reside in either C0.1
+			  or C0.2 state. The time is an unsigned 32-bit number.
+			  Note that a value of zero means there is no limit.
+			  Low order two bits must be zero.
-- 
cgit 


From 37ab356417950bcefce49aa18a2c9d68dfcaab4b Mon Sep 17 00:00:00 2001
From: Vijay Khemka <vijaykhemka@fb.com>
Date: Thu, 30 May 2019 16:11:57 -0700
Subject: hwmon: (pmbus) Document Infineon PXE1610 driver

Added documentation for Infineon PXE1610 driver

Signed-off-by: Vijay Khemka <vijaykhemka@fb.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/pxe1610 | 90 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 Documentation/hwmon/pxe1610

(limited to 'Documentation')

diff --git a/Documentation/hwmon/pxe1610 b/Documentation/hwmon/pxe1610
new file mode 100644
index 000000000000..211cedeefb44
--- /dev/null
+++ b/Documentation/hwmon/pxe1610
@@ -0,0 +1,90 @@
+Kernel driver pxe1610
+=====================
+
+Supported chips:
+  * Infineon PXE1610
+    Prefix: 'pxe1610'
+    Addresses scanned: -
+    Datasheet: Datasheet is not publicly available.
+
+  * Infineon PXE1110
+    Prefix: 'pxe1110'
+    Addresses scanned: -
+    Datasheet: Datasheet is not publicly available.
+
+  * Infineon PXM1310
+    Prefix: 'pxm1310'
+    Addresses scanned: -
+    Datasheet: Datasheet is not publicly available.
+
+Author: Vijay Khemka <vijaykhemka@fb.com>
+
+
+Description
+-----------
+
+PXE1610/PXE1110 are Multi-rail/Multiphase Digital Controllers
+and compliant to
+	-- Intel VR13 DC-DC converter specifications.
+	-- Intel SVID protocol.
+Used for Vcore power regulation for Intel VR13 based microprocessors
+	-- Servers, Workstations, and High-end desktops
+
+PXM1310 is a Multi-rail Controller and it is compliant to
+	-- Intel VR13 DC-DC converter specifications.
+	-- Intel SVID protocol.
+Used for DDR3/DDR4 Memory power regulation for Intel VR13 and
+IMVP8 based systems
+
+
+Usage Notes
+-----------
+
+This driver does not probe for PMBus devices. You will have
+to instantiate devices explicitly.
+
+Example: the following commands will load the driver for an PXE1610
+at address 0x70 on I2C bus #4:
+
+# modprobe pxe1610
+# echo pxe1610 0x70 > /sys/bus/i2c/devices/i2c-4/new_device
+
+It can also be instantiated by declaring in device tree
+
+
+Sysfs attributes
+----------------
+
+curr1_label		"iin"
+curr1_input		Measured input current
+curr1_alarm		Current high alarm
+
+curr[2-4]_label		"iout[1-3]"
+curr[2-4]_input		Measured output current
+curr[2-4]_crit		Critical maximum current
+curr[2-4]_crit_alarm	Current critical high alarm
+
+in1_label		"vin"
+in1_input		Measured input voltage
+in1_crit		Critical maximum input voltage
+in1_crit_alarm		Input voltage critical high alarm
+
+in[2-4]_label		"vout[1-3]"
+in[2-4]_input		Measured output voltage
+in[2-4]_lcrit		Critical minimum output voltage
+in[2-4]_lcrit_alarm	Output voltage critical low alarm
+in[2-4]_crit		Critical maximum output voltage
+in[2-4]_crit_alarm	Output voltage critical high alarm
+
+power1_label		"pin"
+power1_input		Measured input power
+power1_alarm		Input power high alarm
+
+power[2-4]_label	"pout[1-3]"
+power[2-4]_input	Measured output power
+
+temp[1-3]_input		Measured temperature
+temp[1-3]_crit		Critical high temperature
+temp[1-3]_crit_alarm	Chip temperature critical high alarm
+temp[1-3]_max		Maximum temperature
+temp[1-3]_max_alarm	Chip temperature high alarm
-- 
cgit 


From b0935123a18360d19f1dcc779ea33841cdc304cc Mon Sep 17 00:00:00 2001
From: Prakhar Srivastava <prsriva02@gmail.com>
Date: Sun, 23 Jun 2019 23:23:29 -0700
Subject: IMA: Define a new hook to measure the kexec boot command line
 arguments

Currently during soft reboot(kexec_file_load) boot command line
arguments are not measured. Define hooks needed to measure kexec
command line arguments during soft reboot(kexec_file_load).

- A new ima hook ima_kexec_cmdline is defined to be called by the
kexec code.
- A new function process_buffer_measurement is defined to measure
the buffer hash into the IMA measurement list.
- A new func policy KEXEC_CMDLINE is defined to control the
 measurement.

Signed-off-by: Prakhar Srivastava <prsriva02@gmail.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 Documentation/ABI/testing/ima_policy | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index b383c1763610..fc376a323908 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -28,6 +28,7 @@ Description:
 		base: 	func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
 				[FIRMWARE_CHECK]
 				[KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK]
+				[KEXEC_CMDLINE]
 			mask:= [[^]MAY_READ] [[^]MAY_WRITE] [[^]MAY_APPEND]
 			       [[^]MAY_EXEC]
 			fsmagic:= hex value
-- 
cgit 


From 8b388cee66350f2dd8e77afb9eab798ed5c796e9 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Tue, 18 Jun 2019 21:07:29 +0200
Subject: dt-bindings: gpu: mali: Add Samsung compatibles for Midgard and
 Utgard

Add vendor compatibles for specific implementation of Mali Utgard
(Exynos3250, Exynos4-family) and Midgard (Exynos5433, Exynos7).

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt | 1 +
 Documentation/devicetree/bindings/gpu/arm,mali-utgard.txt  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt
index 1b1a74129141..c000d224b4e0 100644
--- a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt
@@ -16,6 +16,7 @@ Required properties:
     + "arm,mali-t880"
   * which must be preceded by one of the following vendor specifics:
     + "amlogic,meson-gxm-mali"
+    + "samsung,exynos5433-mali"
     + "rockchip,rk3288-mali"
     + "rockchip,rk3399-mali"
 
diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-utgard.txt b/Documentation/devicetree/bindings/gpu/arm,mali-utgard.txt
index ae63f09fda7d..b352a6851a06 100644
--- a/Documentation/devicetree/bindings/gpu/arm,mali-utgard.txt
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-utgard.txt
@@ -17,6 +17,7 @@ Required properties:
       + amlogic,meson8b-mali
       + amlogic,meson-gxbb-mali
       + amlogic,meson-gxl-mali
+      + samsung,exynos4210-mali
       + rockchip,rk3036-mali
       + rockchip,rk3066-mali
       + rockchip,rk3188-mali
-- 
cgit 


From 9faf929608f52b907d8d41a455f3bee852f3dc85 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 3 Jun 2019 22:23:17 +0000
Subject: dt-bindings: power: reset: qcom: Add qcom,pm8998-pon compatibility
 line

Update bindings to support for qcom,pm8998-pon which uses gen2 pon

Cc: Andy Gross <agross@kernel.org>
Cc: David Brown <david.brown@linaro.org>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: Amit Pundir <amit.pundir@linaro.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: linux-arm-msm@vger.kernel.org
Cc: devicetree@vger.kernel.org
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 Documentation/devicetree/bindings/power/reset/qcom,pon.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/power/reset/qcom,pon.txt b/Documentation/devicetree/bindings/power/reset/qcom,pon.txt
index 5705f575862d..0c0dc3a1e693 100644
--- a/Documentation/devicetree/bindings/power/reset/qcom,pon.txt
+++ b/Documentation/devicetree/bindings/power/reset/qcom,pon.txt
@@ -9,6 +9,7 @@ Required Properties:
 -compatible: Must be one of:
 	"qcom,pm8916-pon"
 	"qcom,pms405-pon"
+	"qcom,pm8998-pon"
 
 -reg: Specifies the physical address of the pon register
 
-- 
cgit 


From 0bed61371f7d537ecb6e1833acf59a74ae183b37 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@regit.org>
Date: Fri, 21 Jun 2019 22:13:10 +0200
Subject: xsk: sample kernel code is now in libbpf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix documentation that mention xdpsock_kern.c which has been
replaced by code embedded in libbpf.

Signed-off-by: Eric Leblond <eric@regit.org>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/networking/af_xdp.rst | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index 50bccbf68308..eeedc2e826aa 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -220,7 +220,21 @@ Usage
 In order to use AF_XDP sockets there are two parts needed. The
 user-space application and the XDP program. For a complete setup and
 usage example, please refer to the sample application. The user-space
-side is xdpsock_user.c and the XDP side xdpsock_kern.c.
+side is xdpsock_user.c and the XDP side is part of libbpf.
+
+The XDP code sample included in tools/lib/bpf/xsk.c is the following::
+
+   SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
+   {
+       int index = ctx->rx_queue_index;
+
+       // A set entry here means that the correspnding queue_id
+       // has an active AF_XDP socket bound to it.
+       if (bpf_map_lookup_elem(&xsks_map, &index))
+           return bpf_redirect_map(&xsks_map, index, 0);
+
+       return XDP_PASS;
+   }
 
 Naive ring dequeue and enqueue could look like this::
 
-- 
cgit 


From d48e0cd8fcaf314175a15d3076d7a1e71bd4e628 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 24 Jun 2019 11:15:39 +0200
Subject: timekeeping: Boot should be boottime for coarse ns accessor

Somewhere in all the patchsets before, this cleanup got lost.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Link: https://lkml.kernel.org/r/20190624091539.13512-1-Jason@zx2c4.com
---
 Documentation/core-api/timekeeping.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst
index 15fc58e85ef9..20ee447a50f3 100644
--- a/Documentation/core-api/timekeeping.rst
+++ b/Documentation/core-api/timekeeping.rst
@@ -105,7 +105,7 @@ Some additional variants exist for more specialized cases:
 		ktime_t ktime_get_coarse_clocktai( void )
 
 .. c:function:: u64 ktime_get_coarse_ns( void )
-		u64 ktime_get_coarse_boot_ns( void )
+		u64 ktime_get_coarse_boottime_ns( void )
 		u64 ktime_get_coarse_real_ns( void )
 		u64 ktime_get_coarse_clocktai_ns( void )
 
-- 
cgit 


From 124ecd6658e7ec2f1f14cfa36be76ac0f88cc33e Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Tue, 11 Jun 2019 20:25:33 +0800
Subject: dt-bindings: imx: Add pinctrl binding doc for i.MX8MN

Add binding doc for i.MX8MN pinctrl driver.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Acked-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../bindings/pinctrl/fsl,imx8mn-pinctrl.txt        | 39 ++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pinctrl/fsl,imx8mn-pinctrl.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mn-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mn-pinctrl.txt
new file mode 100644
index 000000000000..330716c971b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mn-pinctrl.txt
@@ -0,0 +1,39 @@
+* Freescale IMX8MN IOMUX Controller
+
+Please refer to fsl,imx-pinctrl.txt and pinctrl-bindings.txt in this directory
+for common binding part and usage.
+
+Required properties:
+- compatible: "fsl,imx8mn-iomuxc"
+- reg: should contain the base physical address and size of the iomuxc
+  registers.
+
+Required properties in sub-nodes:
+- fsl,pins: each entry consists of 6 integers and represents the mux and config
+  setting for one pin.  The first 5 integers <mux_reg conf_reg input_reg mux_val
+  input_val> are specified using a PIN_FUNC_ID macro, which can be found in
+  <arch/arm64/boot/dts/freescale/imx8mn-pinfunc.h>. The last integer CONFIG is
+  the pad setting value like pull-up on this pin. Please refer to i.MX8M Nano
+  Reference Manual for detailed CONFIG settings.
+
+Examples:
+
+&uart1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart1>;
+};
+
+iomuxc: pinctrl@30330000 {
+        compatible = "fsl,imx8mn-iomuxc";
+        reg = <0x0 0x30330000 0x0 0x10000>;
+
+        pinctrl_uart1: uart1grp {
+                fsl,pins = <
+			MX8MN_IOMUXC_UART1_RXD_UART1_DCE_RX	0x140
+			MX8MN_IOMUXC_UART1_TXD_UART1_DCE_TX	0x140
+			MX8MN_IOMUXC_UART3_RXD_UART1_DCE_CTS_B	0x140
+			MX8MN_IOMUXC_UART3_TXD_UART1_DCE_RTS_B	0x140
+			MX8MN_IOMUXC_SD1_DATA4_GPIO2_IO6	0x19
+                >;
+        };
+};
-- 
cgit 


From 7b34b0032bae3c6dac2ddd87fbb356489acda517 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Tue, 18 Jun 2019 09:54:55 +1200
Subject: dt-bindings: pinctrl: mvebu: Document bindings for 98DX1135

The 98DX1135 is similar to the 98DX4122 except the MPP options differ.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../bindings/pinctrl/marvell,kirkwood-pinctrl.txt  | 44 +++++++++++++++++++++-
 1 file changed, 42 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,kirkwood-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,kirkwood-pinctrl.txt
index 6c0ea155b708..2932f171ee85 100644
--- a/Documentation/devicetree/bindings/pinctrl/marvell,kirkwood-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/marvell,kirkwood-pinctrl.txt
@@ -6,8 +6,8 @@ part and usage.
 Required properties:
 - compatible: "marvell,88f6180-pinctrl",
               "marvell,88f6190-pinctrl", "marvell,88f6192-pinctrl",
-              "marvell,88f6281-pinctrl", "marvell,88f6282-pinctrl"
-              "marvell,98dx4122-pinctrl"
+              "marvell,88f6281-pinctrl", "marvell,88f6282-pinctrl",
+              "marvell,98dx4122-pinctrl", "marvell,98dx1135-pinctrl"
 - reg: register specifier of MPP registers
 
 This driver supports all kirkwood variants, i.e. 88f6180, 88f619x, and 88f628x.
@@ -317,3 +317,43 @@ mpp44         44       gpio
 mpp45         45       gpio
 mpp49         49       gpio
 
+* Marvell Poncat2 98dx1135
+
+name          pins     functions
+================================================================================
+
+mpp0          0        gpio, nand(io2), spi(cs)
+mpp1          1        gpo, nand(io3), spi(mosi)
+mpp2          2        gpo, nand(io4), spi(sck)
+mpp3          3        gpo, nand(io5), spi(miso)
+mpp4          4        gpio, nand(io6), uart0(rxd)
+mpp5          5        gpo, nand(io7), uart0(txd)
+mpp6          6        sysrst(out)
+mpp7          7        gpo, spi(cs)
+mpp8          8        gpio, twsi0(sda), uart1(rts)
+mpp9          9        gpio, twsi(sck), uart1(cts)
+mpp10         10       gpo, uart0(txd)
+mpp11         11       gpio, uart0(rxd)
+mpp13         13       gpio, uart1(txd)
+mpp14         14       gpio, uart1(rxd)
+mpp15         15       gpio, uart0(rts)
+mpp16         16       gpio, uart0(cts)
+mpp17         17       gpio, nand(cle)
+mpp18         18       gpo, nand(io0)
+mpp19         19       gpo, nand(io1)
+mpp20         20       gpio
+mpp21         21       gpio
+mpp22         22       gpio
+mpp23         23       gpio
+mpp24         24       gpio
+mpp25         25       gpio
+mpp26         26       gpio
+mpp27         27       gpio
+mpp28         28       gpio, nand(ren)
+mpp29         29       gpio, nand(wen)
+mpp30         30       gpio
+mpp31         31       gpio
+mpp32         32       gpio
+mpp33         33       gpio
+mpp34         34       gpio, nand(ale)
+mpp35         35       gpio, nand(cen)
-- 
cgit 


From daa78cc3408e7145d99462d51c5f4c6b0cb2af9c Mon Sep 17 00:00:00 2001
From: Yash Shah <yash.shah@sifive.com>
Date: Tue, 11 Jun 2019 11:14:43 +0530
Subject: pwm: sifive: Add DT documentation for SiFive PWM Controller

DT documentation for PWM controller added.

Signed-off-by: Wesley W. Terpstra <wesley@sifive.com>
[Atish: Compatible string update]
Signed-off-by: Atish Patra <atish.patra@wdc.com>
Signed-off-by: Yash Shah <yash.shah@sifive.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 .../devicetree/bindings/pwm/pwm-sifive.txt         | 33 ++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pwm/pwm-sifive.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pwm/pwm-sifive.txt b/Documentation/devicetree/bindings/pwm/pwm-sifive.txt
new file mode 100644
index 000000000000..36447e3c9378
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-sifive.txt
@@ -0,0 +1,33 @@
+SiFive PWM controller
+
+Unlike most other PWM controllers, the SiFive PWM controller currently only
+supports one period for all channels in the PWM. All PWMs need to run at
+the same period. The period also has significant restrictions on the values
+it can achieve, which the driver rounds to the nearest achievable period.
+PWM RTL that corresponds to the IP block version numbers can be found
+here:
+
+https://github.com/sifive/sifive-blocks/tree/master/src/main/scala/devices/pwm
+
+Required properties:
+- compatible: Should be "sifive,<chip>-pwm" and "sifive,pwm<version>".
+  Supported compatible strings are: "sifive,fu540-c000-pwm" for the SiFive
+  PWM v0 as integrated onto the SiFive FU540 chip, and "sifive,pwm0" for the
+  SiFive PWM v0 IP block with no chip integration tweaks.
+  Please refer to sifive-blocks-ip-versioning.txt for details.
+- reg: physical base address and length of the controller's registers
+- clocks: Should contain a clock identifier for the PWM's parent clock.
+- #pwm-cells: Should be 3. See pwm.txt in this directory
+  for a description of the cell format.
+- interrupts: one interrupt per PWM channel
+
+Examples:
+
+pwm:  pwm@10020000 {
+	compatible = "sifive,fu540-c000-pwm", "sifive,pwm0";
+	reg = <0x0 0x10020000 0x0 0x1000>;
+	clocks = <&tlclk>;
+	interrupt-parent = <&plic>;
+	interrupts = <42 43 44 45>;
+	#pwm-cells = <3>;
+};
-- 
cgit 


From f6c9b59769db7ec21230502d889d4bbceac56b67 Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Thu, 18 Apr 2019 11:37:45 +0200
Subject: dt-bindings: pwm: stm32-lp: Document pin control sleep state

Add documentation for pin control sleep state on STM32 LPTimer PWM.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt b/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt
index bd23302e84be..6521bc44a74e 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt
@@ -11,8 +11,10 @@ Required parameters:
 			bindings defined in pwm.txt.
 
 Optional properties:
-- pinctrl-names: 	Set to "default".
-- pinctrl-0: 		Phandle pointing to pin configuration node for PWM.
+- pinctrl-names: 	Set to "default". An additional "sleep" state can be
+			defined to set pins in sleep state when in low power.
+- pinctrl-n: 		Phandle(s) pointing to pin configuration node for PWM,
+			respectively for "default" and "sleep" states.
 
 Example:
 	timer@40002400 {
@@ -21,7 +23,8 @@ Example:
 		pwm {
 			compatible = "st,stm32-pwm-lp";
 			#pwm-cells = <3>;
-			pinctrl-names = "default";
+			pinctrl-names = "default", "sleep";
 			pinctrl-0 = <&lppwm1_pins>;
+			pinctrl-1 = <&lppwm1_sleep_pins>;
 		};
 	};
-- 
cgit 


From 69252ec16596ae679bbfc29dfe64682c17ea4dc0 Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Wed, 19 Jun 2019 11:52:01 +0200
Subject: dt-bindings: pwm: stm32: Add #pwm-cells

STM32 Timers support generic 3 cells PWM bindings to encode PWM number,
period and polarity as defined in pwm.txt.

Fixes: cd9a99c2f8e8 ("dt-bindings: pwm: Add STM32 bindings")
Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/pwm-stm32.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pwm/pwm-stm32.txt b/Documentation/devicetree/bindings/pwm/pwm-stm32.txt
index 3e6d55018d7a..a8690bfa5e1f 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-stm32.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-stm32.txt
@@ -8,6 +8,8 @@ Required parameters:
 - pinctrl-names: 	Set to "default".
 - pinctrl-0: 		List of phandles pointing to pin configuration nodes for PWM module.
 			For Pinctrl properties see ../pinctrl/pinctrl-bindings.txt
+- #pwm-cells:		Should be set to 3. This PWM chip uses the default 3 cells
+			bindings defined in pwm.txt.
 
 Optional parameters:
 - st,breakinput:	One or two <index level filter> to describe break input configurations.
@@ -28,6 +30,7 @@ Example:
 
 		pwm {
 			compatible = "st,stm32-pwm";
+			#pwm-cells = <3>;
 			pinctrl-0	= <&pwm1_pins>;
 			pinctrl-names	= "default";
 			st,breakinput = <0 1 5>;
-- 
cgit 


From 925488e8df4f396ad96ff008a84f5b14d8b73347 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Fri, 7 Jun 2019 17:44:05 +0200
Subject: dt-bindings: pwm: jz47xx: Remove unused compatible strings

Right now none of the Ingenic-based boards probe this driver from
devicetree. This driver defined three compatible strings for the exact
same behaviour. Before these strings are used, we can remove two of
them.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt b/Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt
index 7d9d3f90641b..493bec80d59b 100644
--- a/Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt
@@ -2,10 +2,7 @@ Ingenic JZ47xx PWM Controller
 =============================
 
 Required properties:
-- compatible: One of:
-  * "ingenic,jz4740-pwm"
-  * "ingenic,jz4770-pwm"
-  * "ingenic,jz4780-pwm"
+- compatible: Should be "ingenic,jz4740-pwm"
 - #pwm-cells: Should be 3. See pwm.txt in this directory for a description
   of the cells format.
 - clocks : phandle to the external clock.
-- 
cgit 


From 1201937491822b61641c1878ebcd16a93aed4540 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 18 Jun 2019 19:10:54 +0100
Subject: arm64: Expose ARMv8.5 CondM capability to userspace

ARMv8.5 adds new instructions XAFLAG and AXFLAG to translate the
representation of the results of floating point comparisons between the
native ARM format and an alternative format used by some software. Add
a hwcap allowing userspace to determine if they are present, since we
referred to earlier CondM extensions as FLAGM call these extensions
FLAGM2.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/arm64/elf_hwcaps.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
index b73a2519ecf2..ee8dbfe652b6 100644
--- a/Documentation/arm64/elf_hwcaps.txt
+++ b/Documentation/arm64/elf_hwcaps.txt
@@ -207,6 +207,10 @@ HWCAP_FLAGM
 
     Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
 
+HWCAP2_FLAGM2
+
+    Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
+
 HWCAP_SSBS
 
     Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.
-- 
cgit 


From 97162a1ee8a1735fc7a7159fe08de966d88354ce Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sat, 8 Jun 2019 23:27:03 -0300
Subject: docs: infiniband: convert docs to ReST and rename to *.rst

The InfiniBand docs are plain text with no markups.  So, all we needed to
do were to add the title markups and some markup sequences in order to
properly parse tables, lists and literal blocks.

At its new index.rst, let's add a :orphan: while this is not linked to the
main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 Documentation/infiniband/core_locking.rst | 118 +++++++++++++++++++++
 Documentation/infiniband/core_locking.txt | 112 --------------------
 Documentation/infiniband/index.rst        |  23 +++++
 Documentation/infiniband/ipoib.rst        | 115 +++++++++++++++++++++
 Documentation/infiniband/ipoib.txt        | 105 -------------------
 Documentation/infiniband/opa_vnic.rst     | 159 ++++++++++++++++++++++++++++
 Documentation/infiniband/opa_vnic.txt     | 153 ---------------------------
 Documentation/infiniband/sysfs.rst        |   6 ++
 Documentation/infiniband/sysfs.txt        |   4 -
 Documentation/infiniband/tag_matching.rst |  69 +++++++++++++
 Documentation/infiniband/tag_matching.txt |  64 ------------
 Documentation/infiniband/user_mad.rst     | 166 ++++++++++++++++++++++++++++++
 Documentation/infiniband/user_mad.txt     | 153 ---------------------------
 Documentation/infiniband/user_verbs.rst   |  75 ++++++++++++++
 Documentation/infiniband/user_verbs.txt   |  69 -------------
 15 files changed, 731 insertions(+), 660 deletions(-)
 create mode 100644 Documentation/infiniband/core_locking.rst
 delete mode 100644 Documentation/infiniband/core_locking.txt
 create mode 100644 Documentation/infiniband/index.rst
 create mode 100644 Documentation/infiniband/ipoib.rst
 delete mode 100644 Documentation/infiniband/ipoib.txt
 create mode 100644 Documentation/infiniband/opa_vnic.rst
 delete mode 100644 Documentation/infiniband/opa_vnic.txt
 create mode 100644 Documentation/infiniband/sysfs.rst
 delete mode 100644 Documentation/infiniband/sysfs.txt
 create mode 100644 Documentation/infiniband/tag_matching.rst
 delete mode 100644 Documentation/infiniband/tag_matching.txt
 create mode 100644 Documentation/infiniband/user_mad.rst
 delete mode 100644 Documentation/infiniband/user_mad.txt
 create mode 100644 Documentation/infiniband/user_verbs.rst
 delete mode 100644 Documentation/infiniband/user_verbs.txt

(limited to 'Documentation')

diff --git a/Documentation/infiniband/core_locking.rst b/Documentation/infiniband/core_locking.rst
new file mode 100644
index 000000000000..f34669beb4fe
--- /dev/null
+++ b/Documentation/infiniband/core_locking.rst
@@ -0,0 +1,118 @@
+===========================
+InfiniBand Midlayer Locking
+===========================
+
+  This guide is an attempt to make explicit the locking assumptions
+  made by the InfiniBand midlayer.  It describes the requirements on
+  both low-level drivers that sit below the midlayer and upper level
+  protocols that use the midlayer.
+
+Sleeping and interrupt context
+==============================
+
+  With the following exceptions, a low-level driver implementation of
+  all of the methods in struct ib_device may sleep.  The exceptions
+  are any methods from the list:
+
+    - create_ah
+    - modify_ah
+    - query_ah
+    - destroy_ah
+    - post_send
+    - post_recv
+    - poll_cq
+    - req_notify_cq
+    - map_phys_fmr
+
+  which may not sleep and must be callable from any context.
+
+  The corresponding functions exported to upper level protocol
+  consumers:
+
+    - ib_create_ah
+    - ib_modify_ah
+    - ib_query_ah
+    - ib_destroy_ah
+    - ib_post_send
+    - ib_post_recv
+    - ib_req_notify_cq
+    - ib_map_phys_fmr
+
+  are therefore safe to call from any context.
+
+  In addition, the function
+
+    - ib_dispatch_event
+
+  used by low-level drivers to dispatch asynchronous events through
+  the midlayer is also safe to call from any context.
+
+Reentrancy
+----------
+
+  All of the methods in struct ib_device exported by a low-level
+  driver must be fully reentrant.  The low-level driver is required to
+  perform all synchronization necessary to maintain consistency, even
+  if multiple function calls using the same object are run
+  simultaneously.
+
+  The IB midlayer does not perform any serialization of function calls.
+
+  Because low-level drivers are reentrant, upper level protocol
+  consumers are not required to perform any serialization.  However,
+  some serialization may be required to get sensible results.  For
+  example, a consumer may safely call ib_poll_cq() on multiple CPUs
+  simultaneously.  However, the ordering of the work completion
+  information between different calls of ib_poll_cq() is not defined.
+
+Callbacks
+---------
+
+  A low-level driver must not perform a callback directly from the
+  same callchain as an ib_device method call.  For example, it is not
+  allowed for a low-level driver to call a consumer's completion event
+  handler directly from its post_send method.  Instead, the low-level
+  driver should defer this callback by, for example, scheduling a
+  tasklet to perform the callback.
+
+  The low-level driver is responsible for ensuring that multiple
+  completion event handlers for the same CQ are not called
+  simultaneously.  The driver must guarantee that only one CQ event
+  handler for a given CQ is running at a time.  In other words, the
+  following situation is not allowed::
+
+          CPU1                                    CPU2
+
+    low-level driver ->
+      consumer CQ event callback:
+        /* ... */
+        ib_req_notify_cq(cq, ...);
+                                          low-level driver ->
+        /* ... */                           consumer CQ event callback:
+                                              /* ... */
+        return from CQ event handler
+
+  The context in which completion event and asynchronous event
+  callbacks run is not defined.  Depending on the low-level driver, it
+  may be process context, softirq context, or interrupt context.
+  Upper level protocol consumers may not sleep in a callback.
+
+Hot-plug
+--------
+
+  A low-level driver announces that a device is ready for use by
+  consumers when it calls ib_register_device(), all initialization
+  must be complete before this call.  The device must remain usable
+  until the driver's call to ib_unregister_device() has returned.
+
+  A low-level driver must call ib_register_device() and
+  ib_unregister_device() from process context.  It must not hold any
+  semaphores that could cause deadlock if a consumer calls back into
+  the driver across these calls.
+
+  An upper level protocol consumer may begin using an IB device as
+  soon as the add method of its struct ib_client is called for that
+  device.  A consumer must finish all cleanup and free all resources
+  relating to a device before returning from the remove method.
+
+  A consumer is permitted to sleep in its add and remove methods.
diff --git a/Documentation/infiniband/core_locking.txt b/Documentation/infiniband/core_locking.txt
deleted file mode 100644
index 4b1f36b6ada0..000000000000
--- a/Documentation/infiniband/core_locking.txt
+++ /dev/null
@@ -1,112 +0,0 @@
-INFINIBAND MIDLAYER LOCKING
-
-  This guide is an attempt to make explicit the locking assumptions
-  made by the InfiniBand midlayer.  It describes the requirements on
-  both low-level drivers that sit below the midlayer and upper level
-  protocols that use the midlayer.
-
-Sleeping and interrupt context
-
-  With the following exceptions, a low-level driver implementation of
-  all of the methods in struct ib_device may sleep.  The exceptions
-  are any methods from the list:
-
-    create_ah
-    modify_ah
-    query_ah
-    destroy_ah
-    post_send
-    post_recv
-    poll_cq
-    req_notify_cq
-    map_phys_fmr
-
-  which may not sleep and must be callable from any context.
-
-  The corresponding functions exported to upper level protocol
-  consumers:
-
-    ib_create_ah
-    ib_modify_ah
-    ib_query_ah
-    ib_destroy_ah
-    ib_post_send
-    ib_post_recv
-    ib_req_notify_cq
-    ib_map_phys_fmr
-
-  are therefore safe to call from any context.
-
-  In addition, the function
-
-    ib_dispatch_event
-
-  used by low-level drivers to dispatch asynchronous events through
-  the midlayer is also safe to call from any context.
-
-Reentrancy
-
-  All of the methods in struct ib_device exported by a low-level
-  driver must be fully reentrant.  The low-level driver is required to
-  perform all synchronization necessary to maintain consistency, even
-  if multiple function calls using the same object are run
-  simultaneously.
-
-  The IB midlayer does not perform any serialization of function calls.
-
-  Because low-level drivers are reentrant, upper level protocol
-  consumers are not required to perform any serialization.  However,
-  some serialization may be required to get sensible results.  For
-  example, a consumer may safely call ib_poll_cq() on multiple CPUs
-  simultaneously.  However, the ordering of the work completion
-  information between different calls of ib_poll_cq() is not defined.
-
-Callbacks
-
-  A low-level driver must not perform a callback directly from the
-  same callchain as an ib_device method call.  For example, it is not
-  allowed for a low-level driver to call a consumer's completion event
-  handler directly from its post_send method.  Instead, the low-level
-  driver should defer this callback by, for example, scheduling a
-  tasklet to perform the callback.
-
-  The low-level driver is responsible for ensuring that multiple
-  completion event handlers for the same CQ are not called
-  simultaneously.  The driver must guarantee that only one CQ event
-  handler for a given CQ is running at a time.  In other words, the
-  following situation is not allowed:
-
-        CPU1                                    CPU2
-
-  low-level driver ->
-    consumer CQ event callback:
-      /* ... */
-      ib_req_notify_cq(cq, ...);
-                                        low-level driver ->
-      /* ... */                           consumer CQ event callback:
-                                            /* ... */
-      return from CQ event handler
-
-  The context in which completion event and asynchronous event
-  callbacks run is not defined.  Depending on the low-level driver, it
-  may be process context, softirq context, or interrupt context.
-  Upper level protocol consumers may not sleep in a callback.
-
-Hot-plug
-
-  A low-level driver announces that a device is ready for use by
-  consumers when it calls ib_register_device(), all initialization
-  must be complete before this call.  The device must remain usable
-  until the driver's call to ib_unregister_device() has returned.
-
-  A low-level driver must call ib_register_device() and
-  ib_unregister_device() from process context.  It must not hold any
-  semaphores that could cause deadlock if a consumer calls back into
-  the driver across these calls.
-
-  An upper level protocol consumer may begin using an IB device as
-  soon as the add method of its struct ib_client is called for that
-  device.  A consumer must finish all cleanup and free all resources
-  relating to a device before returning from the remove method.
-
-  A consumer is permitted to sleep in its add and remove methods.
diff --git a/Documentation/infiniband/index.rst b/Documentation/infiniband/index.rst
new file mode 100644
index 000000000000..22eea64de722
--- /dev/null
+++ b/Documentation/infiniband/index.rst
@@ -0,0 +1,23 @@
+:orphan:
+
+==========
+InfiniBand
+==========
+
+.. toctree::
+   :maxdepth: 1
+
+   core_locking
+   ipoib
+   opa_vnic
+   sysfs
+   tag_matching
+   user_mad
+   user_verbs
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/infiniband/ipoib.rst b/Documentation/infiniband/ipoib.rst
new file mode 100644
index 000000000000..0dd36154c0c9
--- /dev/null
+++ b/Documentation/infiniband/ipoib.rst
@@ -0,0 +1,115 @@
+==================
+IP over InfiniBand
+==================
+
+  The ib_ipoib driver is an implementation of the IP over InfiniBand
+  protocol as specified by RFC 4391 and 4392, issued by the IETF ipoib
+  working group.  It is a "native" implementation in the sense of
+  setting the interface type to ARPHRD_INFINIBAND and the hardware
+  address length to 20 (earlier proprietary implementations
+  masqueraded to the kernel as ethernet interfaces).
+
+Partitions and P_Keys
+=====================
+
+  When the IPoIB driver is loaded, it creates one interface for each
+  port using the P_Key at index 0.  To create an interface with a
+  different P_Key, write the desired P_Key into the main interface's
+  /sys/class/net/<intf name>/create_child file.  For example::
+
+    echo 0x8001 > /sys/class/net/ib0/create_child
+
+  This will create an interface named ib0.8001 with P_Key 0x8001.  To
+  remove a subinterface, use the "delete_child" file::
+
+    echo 0x8001 > /sys/class/net/ib0/delete_child
+
+  The P_Key for any interface is given by the "pkey" file, and the
+  main interface for a subinterface is in "parent."
+
+  Child interface create/delete can also be done using IPoIB's
+  rtnl_link_ops, where children created using either way behave the same.
+
+Datagram vs Connected modes
+===========================
+
+  The IPoIB driver supports two modes of operation: datagram and
+  connected.  The mode is set and read through an interface's
+  /sys/class/net/<intf name>/mode file.
+
+  In datagram mode, the IB UD (Unreliable Datagram) transport is used
+  and so the interface MTU has is equal to the IB L2 MTU minus the
+  IPoIB encapsulation header (4 bytes).  For example, in a typical IB
+  fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes.
+
+  In connected mode, the IB RC (Reliable Connected) transport is used.
+  Connected mode takes advantage of the connected nature of the IB
+  transport and allows an MTU up to the maximal IP packet size of 64K,
+  which reduces the number of IP packets needed for handling large UDP
+  datagrams, TCP segments, etc and increases the performance for large
+  messages.
+
+  In connected mode, the interface's UD QP is still used for multicast
+  and communication with peers that don't support connected mode. In
+  this case, RX emulation of ICMP PMTU packets is used to cause the
+  networking stack to use the smaller UD MTU for these neighbours.
+
+Stateless offloads
+==================
+
+  If the IB HW supports IPoIB stateless offloads, IPoIB advertises
+  TCP/IP checksum and/or Large Send (LSO) offloading capability to the
+  network stack.
+
+  Large Receive (LRO) offloading is also implemented and may be turned
+  on/off using ethtool calls.  Currently LRO is supported only for
+  checksum offload capable devices.
+
+  Stateless offloads are supported only in datagram mode.
+
+Interrupt moderation
+====================
+
+  If the underlying IB device supports CQ event moderation, one can
+  use ethtool to set interrupt mitigation parameters and thus reduce
+  the overhead incurred by handling interrupts.  The main code path of
+  IPoIB doesn't use events for TX completion signaling so only RX
+  moderation is supported.
+
+Debugging Information
+=====================
+
+  By compiling the IPoIB driver with CONFIG_INFINIBAND_IPOIB_DEBUG set
+  to 'y', tracing messages are compiled into the driver.  They are
+  turned on by setting the module parameters debug_level and
+  mcast_debug_level to 1.  These parameters can be controlled at
+  runtime through files in /sys/module/ib_ipoib/.
+
+  CONFIG_INFINIBAND_IPOIB_DEBUG also enables files in the debugfs
+  virtual filesystem.  By mounting this filesystem, for example with::
+
+    mount -t debugfs none /sys/kernel/debug
+
+  it is possible to get statistics about multicast groups from the
+  files /sys/kernel/debug/ipoib/ib0_mcg and so on.
+
+  The performance impact of this option is negligible, so it
+  is safe to enable this option with debug_level set to 0 for normal
+  operation.
+
+  CONFIG_INFINIBAND_IPOIB_DEBUG_DATA enables even more debug output in
+  the data path when data_debug_level is set to 1.  However, even with
+  the output disabled, enabling this configuration option will affect
+  performance, because it adds tests to the fast path.
+
+References
+==========
+
+  Transmission of IP over InfiniBand (IPoIB) (RFC 4391)
+    http://ietf.org/rfc/rfc4391.txt
+
+  IP over InfiniBand (IPoIB) Architecture (RFC 4392)
+    http://ietf.org/rfc/rfc4392.txt
+
+  IP over InfiniBand: Connected Mode (RFC 4755)
+    http://ietf.org/rfc/rfc4755.txt
diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
deleted file mode 100644
index 47c1dd9818f2..000000000000
--- a/Documentation/infiniband/ipoib.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-IP OVER INFINIBAND
-
-  The ib_ipoib driver is an implementation of the IP over InfiniBand
-  protocol as specified by RFC 4391 and 4392, issued by the IETF ipoib
-  working group.  It is a "native" implementation in the sense of
-  setting the interface type to ARPHRD_INFINIBAND and the hardware
-  address length to 20 (earlier proprietary implementations
-  masqueraded to the kernel as ethernet interfaces).
-
-Partitions and P_Keys
-
-  When the IPoIB driver is loaded, it creates one interface for each
-  port using the P_Key at index 0.  To create an interface with a
-  different P_Key, write the desired P_Key into the main interface's
-  /sys/class/net/<intf name>/create_child file.  For example:
-
-    echo 0x8001 > /sys/class/net/ib0/create_child
-
-  This will create an interface named ib0.8001 with P_Key 0x8001.  To
-  remove a subinterface, use the "delete_child" file:
-
-    echo 0x8001 > /sys/class/net/ib0/delete_child
-
-  The P_Key for any interface is given by the "pkey" file, and the
-  main interface for a subinterface is in "parent."
-
-  Child interface create/delete can also be done using IPoIB's
-  rtnl_link_ops, where children created using either way behave the same.
-
-Datagram vs Connected modes
-
-  The IPoIB driver supports two modes of operation: datagram and
-  connected.  The mode is set and read through an interface's
-  /sys/class/net/<intf name>/mode file.
-
-  In datagram mode, the IB UD (Unreliable Datagram) transport is used
-  and so the interface MTU has is equal to the IB L2 MTU minus the
-  IPoIB encapsulation header (4 bytes).  For example, in a typical IB
-  fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes.
-
-  In connected mode, the IB RC (Reliable Connected) transport is used.
-  Connected mode takes advantage of the connected nature of the IB
-  transport and allows an MTU up to the maximal IP packet size of 64K,
-  which reduces the number of IP packets needed for handling large UDP
-  datagrams, TCP segments, etc and increases the performance for large
-  messages.
-
-  In connected mode, the interface's UD QP is still used for multicast
-  and communication with peers that don't support connected mode. In
-  this case, RX emulation of ICMP PMTU packets is used to cause the
-  networking stack to use the smaller UD MTU for these neighbours.
-
-Stateless offloads
-
-  If the IB HW supports IPoIB stateless offloads, IPoIB advertises
-  TCP/IP checksum and/or Large Send (LSO) offloading capability to the
-  network stack.
-
-  Large Receive (LRO) offloading is also implemented and may be turned
-  on/off using ethtool calls.  Currently LRO is supported only for
-  checksum offload capable devices.
-
-  Stateless offloads are supported only in datagram mode.  
-
-Interrupt moderation
-
-  If the underlying IB device supports CQ event moderation, one can
-  use ethtool to set interrupt mitigation parameters and thus reduce
-  the overhead incurred by handling interrupts.  The main code path of
-  IPoIB doesn't use events for TX completion signaling so only RX
-  moderation is supported.
-
-Debugging Information
-
-  By compiling the IPoIB driver with CONFIG_INFINIBAND_IPOIB_DEBUG set
-  to 'y', tracing messages are compiled into the driver.  They are
-  turned on by setting the module parameters debug_level and
-  mcast_debug_level to 1.  These parameters can be controlled at
-  runtime through files in /sys/module/ib_ipoib/.
-
-  CONFIG_INFINIBAND_IPOIB_DEBUG also enables files in the debugfs
-  virtual filesystem.  By mounting this filesystem, for example with
-
-    mount -t debugfs none /sys/kernel/debug
-
-  it is possible to get statistics about multicast groups from the
-  files /sys/kernel/debug/ipoib/ib0_mcg and so on.
-
-  The performance impact of this option is negligible, so it
-  is safe to enable this option with debug_level set to 0 for normal
-  operation.
-
-  CONFIG_INFINIBAND_IPOIB_DEBUG_DATA enables even more debug output in
-  the data path when data_debug_level is set to 1.  However, even with
-  the output disabled, enabling this configuration option will affect
-  performance, because it adds tests to the fast path.
-
-References
-
-  Transmission of IP over InfiniBand (IPoIB) (RFC 4391)
-    http://ietf.org/rfc/rfc4391.txt 
-  IP over InfiniBand (IPoIB) Architecture (RFC 4392)
-    http://ietf.org/rfc/rfc4392.txt 
-  IP over InfiniBand: Connected Mode (RFC 4755)
-    http://ietf.org/rfc/rfc4755.txt
diff --git a/Documentation/infiniband/opa_vnic.rst b/Documentation/infiniband/opa_vnic.rst
new file mode 100644
index 000000000000..2f888d9ffec0
--- /dev/null
+++ b/Documentation/infiniband/opa_vnic.rst
@@ -0,0 +1,159 @@
+=================================================================
+Intel Omni-Path (OPA) Virtual Network Interface Controller (VNIC)
+=================================================================
+
+Intel Omni-Path (OPA) Virtual Network Interface Controller (VNIC) feature
+supports Ethernet functionality over Omni-Path fabric by encapsulating
+the Ethernet packets between HFI nodes.
+
+Architecture
+=============
+The patterns of exchanges of Omni-Path encapsulated Ethernet packets
+involves one or more virtual Ethernet switches overlaid on the Omni-Path
+fabric topology. A subset of HFI nodes on the Omni-Path fabric are
+permitted to exchange encapsulated Ethernet packets across a particular
+virtual Ethernet switch. The virtual Ethernet switches are logical
+abstractions achieved by configuring the HFI nodes on the fabric for
+header generation and processing. In the simplest configuration all HFI
+nodes across the fabric exchange encapsulated Ethernet packets over a
+single virtual Ethernet switch. A virtual Ethernet switch, is effectively
+an independent Ethernet network. The configuration is performed by an
+Ethernet Manager (EM) which is part of the trusted Fabric Manager (FM)
+application. HFI nodes can have multiple VNICs each connected to a
+different virtual Ethernet switch. The below diagram presents a case
+of two virtual Ethernet switches with two HFI nodes::
+
+                               +-------------------+
+                               |      Subnet/      |
+                               |     Ethernet      |
+                               |      Manager      |
+                               +-------------------+
+                                  /          /
+                                /           /
+                              /            /
+                            /             /
+  +-----------------------------+  +------------------------------+
+  |  Virtual Ethernet Switch    |  |  Virtual Ethernet Switch     |
+  |  +---------+    +---------+ |  | +---------+    +---------+   |
+  |  | VPORT   |    |  VPORT  | |  | |  VPORT  |    |  VPORT  |   |
+  +--+---------+----+---------+-+  +-+---------+----+---------+---+
+           |                 \        /                 |
+           |                   \    /                   |
+           |                     \/                     |
+           |                    /  \                    |
+           |                  /      \                  |
+       +-----------+------------+  +-----------+------------+
+       |   VNIC    |    VNIC    |  |    VNIC   |    VNIC    |
+       +-----------+------------+  +-----------+------------+
+       |          HFI           |  |          HFI           |
+       +------------------------+  +------------------------+
+
+
+The Omni-Path encapsulated Ethernet packet format is as described below.
+
+==================== ================================
+Bits                 Field
+==================== ================================
+Quad Word 0:
+0-19                 SLID (lower 20 bits)
+20-30                Length (in Quad Words)
+31                   BECN bit
+32-51                DLID (lower 20 bits)
+52-56                SC (Service Class)
+57-59                RC (Routing Control)
+60                   FECN bit
+61-62                L2 (=10, 16B format)
+63                   LT (=1, Link Transfer Head Flit)
+
+Quad Word 1:
+0-7                  L4 type (=0x78 ETHERNET)
+8-11                 SLID[23:20]
+12-15                DLID[23:20]
+16-31                PKEY
+32-47                Entropy
+48-63                Reserved
+
+Quad Word 2:
+0-15                 Reserved
+16-31                L4 header
+32-63                Ethernet Packet
+
+Quad Words 3 to N-1:
+0-63                 Ethernet packet (pad extended)
+
+Quad Word N (last):
+0-23                 Ethernet packet (pad extended)
+24-55                ICRC
+56-61                Tail
+62-63                LT (=01, Link Transfer Tail Flit)
+==================== ================================
+
+Ethernet packet is padded on the transmit side to ensure that the VNIC OPA
+packet is quad word aligned. The 'Tail' field contains the number of bytes
+padded. On the receive side the 'Tail' field is read and the padding is
+removed (along with ICRC, Tail and OPA header) before passing packet up
+the network stack.
+
+The L4 header field contains the virtual Ethernet switch id the VNIC port
+belongs to. On the receive side, this field is used to de-multiplex the
+received VNIC packets to different VNIC ports.
+
+Driver Design
+==============
+Intel OPA VNIC software design is presented in the below diagram.
+OPA VNIC functionality has a HW dependent component and a HW
+independent component.
+
+The support has been added for IB device to allocate and free the RDMA
+netdev devices. The RDMA netdev supports interfacing with the network
+stack thus creating standard network interfaces. OPA_VNIC is an RDMA
+netdev device type.
+
+The HW dependent VNIC functionality is part of the HFI1 driver. It
+implements the verbs to allocate and free the OPA_VNIC RDMA netdev.
+It involves HW resource allocation/management for VNIC functionality.
+It interfaces with the network stack and implements the required
+net_device_ops functions. It expects Omni-Path encapsulated Ethernet
+packets in the transmit path and provides HW access to them. It strips
+the Omni-Path header from the received packets before passing them up
+the network stack. It also implements the RDMA netdev control operations.
+
+The OPA VNIC module implements the HW independent VNIC functionality.
+It consists of two parts. The VNIC Ethernet Management Agent (VEMA)
+registers itself with IB core as an IB client and interfaces with the
+IB MAD stack. It exchanges the management information with the Ethernet
+Manager (EM) and the VNIC netdev. The VNIC netdev part allocates and frees
+the OPA_VNIC RDMA netdev devices. It overrides the net_device_ops functions
+set by HW dependent VNIC driver where required to accommodate any control
+operation. It also handles the encapsulation of Ethernet packets with an
+Omni-Path header in the transmit path. For each VNIC interface, the
+information required for encapsulation is configured by the EM via VEMA MAD
+interface. It also passes any control information to the HW dependent driver
+by invoking the RDMA netdev control operations::
+
+        +-------------------+ +----------------------+
+        |                   | |       Linux          |
+        |     IB MAD        | |      Network         |
+        |                   | |       Stack          |
+        +-------------------+ +----------------------+
+                 |               |          |
+                 |               |          |
+        +----------------------------+      |
+        |                            |      |
+        |      OPA VNIC Module       |      |
+        |  (OPA VNIC RDMA Netdev     |      |
+        |     & EMA functions)       |      |
+        |                            |      |
+        +----------------------------+      |
+                    |                       |
+                    |                       |
+           +------------------+             |
+           |     IB core      |             |
+           +------------------+             |
+                    |                       |
+                    |                       |
+        +--------------------------------------------+
+        |                                            |
+        |      HFI1 Driver with VNIC support         |
+        |                                            |
+        +--------------------------------------------+
diff --git a/Documentation/infiniband/opa_vnic.txt b/Documentation/infiniband/opa_vnic.txt
deleted file mode 100644
index 282e17be798a..000000000000
--- a/Documentation/infiniband/opa_vnic.txt
+++ /dev/null
@@ -1,153 +0,0 @@
-Intel Omni-Path (OPA) Virtual Network Interface Controller (VNIC) feature
-supports Ethernet functionality over Omni-Path fabric by encapsulating
-the Ethernet packets between HFI nodes.
-
-Architecture
-=============
-The patterns of exchanges of Omni-Path encapsulated Ethernet packets
-involves one or more virtual Ethernet switches overlaid on the Omni-Path
-fabric topology. A subset of HFI nodes on the Omni-Path fabric are
-permitted to exchange encapsulated Ethernet packets across a particular
-virtual Ethernet switch. The virtual Ethernet switches are logical
-abstractions achieved by configuring the HFI nodes on the fabric for
-header generation and processing. In the simplest configuration all HFI
-nodes across the fabric exchange encapsulated Ethernet packets over a
-single virtual Ethernet switch. A virtual Ethernet switch, is effectively
-an independent Ethernet network. The configuration is performed by an
-Ethernet Manager (EM) which is part of the trusted Fabric Manager (FM)
-application. HFI nodes can have multiple VNICs each connected to a
-different virtual Ethernet switch. The below diagram presents a case
-of two virtual Ethernet switches with two HFI nodes.
-
-                             +-------------------+
-                             |      Subnet/      |
-                             |     Ethernet      |
-                             |      Manager      |
-                             +-------------------+
-                                /          /
-                              /           /
-                            /            /
-                          /             /
-+-----------------------------+  +------------------------------+
-|  Virtual Ethernet Switch    |  |  Virtual Ethernet Switch     |
-|  +---------+    +---------+ |  | +---------+    +---------+   |
-|  | VPORT   |    |  VPORT  | |  | |  VPORT  |    |  VPORT  |   |
-+--+---------+----+---------+-+  +-+---------+----+---------+---+
-         |                 \        /                 |
-         |                   \    /                   |
-         |                     \/                     |
-         |                    /  \                    |
-         |                  /      \                  |
-     +-----------+------------+  +-----------+------------+
-     |   VNIC    |    VNIC    |  |    VNIC   |    VNIC    |
-     +-----------+------------+  +-----------+------------+
-     |          HFI           |  |          HFI           |
-     +------------------------+  +------------------------+
-
-
-The Omni-Path encapsulated Ethernet packet format is as described below.
-
-Bits          Field
-------------------------------------
-Quad Word 0:
-0-19      SLID (lower 20 bits)
-20-30     Length (in Quad Words)
-31        BECN bit
-32-51     DLID (lower 20 bits)
-52-56     SC (Service Class)
-57-59     RC (Routing Control)
-60        FECN bit
-61-62     L2 (=10, 16B format)
-63        LT (=1, Link Transfer Head Flit)
-
-Quad Word 1:
-0-7       L4 type (=0x78 ETHERNET)
-8-11      SLID[23:20]
-12-15     DLID[23:20]
-16-31     PKEY
-32-47     Entropy
-48-63     Reserved
-
-Quad Word 2:
-0-15      Reserved
-16-31     L4 header
-32-63     Ethernet Packet
-
-Quad Words 3 to N-1:
-0-63      Ethernet packet (pad extended)
-
-Quad Word N (last):
-0-23      Ethernet packet (pad extended)
-24-55     ICRC
-56-61     Tail
-62-63     LT (=01, Link Transfer Tail Flit)
-
-Ethernet packet is padded on the transmit side to ensure that the VNIC OPA
-packet is quad word aligned. The 'Tail' field contains the number of bytes
-padded. On the receive side the 'Tail' field is read and the padding is
-removed (along with ICRC, Tail and OPA header) before passing packet up
-the network stack.
-
-The L4 header field contains the virtual Ethernet switch id the VNIC port
-belongs to. On the receive side, this field is used to de-multiplex the
-received VNIC packets to different VNIC ports.
-
-Driver Design
-==============
-Intel OPA VNIC software design is presented in the below diagram.
-OPA VNIC functionality has a HW dependent component and a HW
-independent component.
-
-The support has been added for IB device to allocate and free the RDMA
-netdev devices. The RDMA netdev supports interfacing with the network
-stack thus creating standard network interfaces. OPA_VNIC is an RDMA
-netdev device type.
-
-The HW dependent VNIC functionality is part of the HFI1 driver. It
-implements the verbs to allocate and free the OPA_VNIC RDMA netdev.
-It involves HW resource allocation/management for VNIC functionality.
-It interfaces with the network stack and implements the required
-net_device_ops functions. It expects Omni-Path encapsulated Ethernet
-packets in the transmit path and provides HW access to them. It strips
-the Omni-Path header from the received packets before passing them up
-the network stack. It also implements the RDMA netdev control operations.
-
-The OPA VNIC module implements the HW independent VNIC functionality.
-It consists of two parts. The VNIC Ethernet Management Agent (VEMA)
-registers itself with IB core as an IB client and interfaces with the
-IB MAD stack. It exchanges the management information with the Ethernet
-Manager (EM) and the VNIC netdev. The VNIC netdev part allocates and frees
-the OPA_VNIC RDMA netdev devices. It overrides the net_device_ops functions
-set by HW dependent VNIC driver where required to accommodate any control
-operation. It also handles the encapsulation of Ethernet packets with an
-Omni-Path header in the transmit path. For each VNIC interface, the
-information required for encapsulation is configured by the EM via VEMA MAD
-interface. It also passes any control information to the HW dependent driver
-by invoking the RDMA netdev control operations.
-
-        +-------------------+ +----------------------+
-        |                   | |       Linux          |
-        |     IB MAD        | |      Network         |
-        |                   | |       Stack          |
-        +-------------------+ +----------------------+
-                 |               |          |
-                 |               |          |
-        +----------------------------+      |
-        |                            |      |
-        |      OPA VNIC Module       |      |
-        |  (OPA VNIC RDMA Netdev     |      |
-        |     & EMA functions)       |      |
-        |                            |      |
-        +----------------------------+      |
-                    |                       |
-                    |                       |
-           +------------------+             |
-           |     IB core      |             |
-           +------------------+             |
-                    |                       |
-                    |                       |
-        +--------------------------------------------+
-        |                                            |
-        |      HFI1 Driver with VNIC support         |
-        |                                            |
-        +--------------------------------------------+
diff --git a/Documentation/infiniband/sysfs.rst b/Documentation/infiniband/sysfs.rst
new file mode 100644
index 000000000000..f0abd6fa48f4
--- /dev/null
+++ b/Documentation/infiniband/sysfs.rst
@@ -0,0 +1,6 @@
+===========
+Sysfs files
+===========
+
+The sysfs interface has moved to
+Documentation/ABI/stable/sysfs-class-infiniband.
diff --git a/Documentation/infiniband/sysfs.txt b/Documentation/infiniband/sysfs.txt
deleted file mode 100644
index 9fab5062f84b..000000000000
--- a/Documentation/infiniband/sysfs.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-SYSFS FILES
-
-The sysfs interface has moved to
-Documentation/ABI/stable/sysfs-class-infiniband.
diff --git a/Documentation/infiniband/tag_matching.rst b/Documentation/infiniband/tag_matching.rst
new file mode 100644
index 000000000000..ef56ea585f92
--- /dev/null
+++ b/Documentation/infiniband/tag_matching.rst
@@ -0,0 +1,69 @@
+==================
+Tag matching logic
+==================
+
+The MPI standard defines a set of rules, known as tag-matching, for matching
+source send operations to destination receives.  The following parameters must
+match the following source and destination parameters:
+
+*	Communicator
+*	User tag - wild card may be specified by the receiver
+*	Source rank – wild car may be specified by the receiver
+*	Destination rank – wild
+
+The ordering rules require that when more than one pair of send and receive
+message envelopes may match, the pair that includes the earliest posted-send
+and the earliest posted-receive is the pair that must be used to satisfy the
+matching operation. However, this doesn’t imply that tags are consumed in
+the order they are created, e.g., a later generated tag may be consumed, if
+earlier tags can’t be used to satisfy the matching rules.
+
+When a message is sent from the sender to the receiver, the communication
+library may attempt to process the operation either after or before the
+corresponding matching receive is posted.  If a matching receive is posted,
+this is an expected message, otherwise it is called an unexpected message.
+Implementations frequently use different matching schemes for these two
+different matching instances.
+
+To keep MPI library memory footprint down, MPI implementations typically use
+two different protocols for this purpose:
+
+1.	The Eager protocol- the complete message is sent when the send is
+processed by the sender. A completion send is received in the send_cq
+notifying that the buffer can be reused.
+
+2.	The Rendezvous Protocol - the sender sends the tag-matching header,
+and perhaps a portion of data when first notifying the receiver. When the
+corresponding buffer is posted, the responder will use the information from
+the header to initiate an RDMA READ operation directly to the matching buffer.
+A fin message needs to be received in order for the buffer to be reused.
+
+Tag matching implementation
+===========================
+
+There are two types of matching objects used, the posted receive list and the
+unexpected message list. The application posts receive buffers through calls
+to the MPI receive routines in the posted receive list and posts send messages
+using the MPI send routines. The head of the posted receive list may be
+maintained by the hardware, with the software expected to shadow this list.
+
+When send is initiated and arrives at the receive side, if there is no
+pre-posted receive for this arriving message, it is passed to the software and
+placed in the unexpected message list. Otherwise the match is processed,
+including rendezvous processing, if appropriate, delivering the data to the
+specified receive buffer. This allows overlapping receive-side MPI tag
+matching with computation.
+
+When a receive-message is posted, the communication library will first check
+the software unexpected message list for a matching receive. If a match is
+found, data is delivered to the user buffer, using a software controlled
+protocol. The UCX implementation uses either an eager or rendezvous protocol,
+depending on data size. If no match is found, the entire pre-posted receive
+list is maintained by the hardware, and there is space to add one more
+pre-posted receive to this list, this receive is passed to the hardware.
+Software is expected to shadow this list, to help with processing MPI cancel
+operations. In addition, because hardware and software are not expected to be
+tightly synchronized with respect to the tag-matching operation, this shadow
+list is used to detect the case that a pre-posted receive is passed to the
+hardware, as the matching unexpected message is being passed from the hardware
+to the software.
diff --git a/Documentation/infiniband/tag_matching.txt b/Documentation/infiniband/tag_matching.txt
deleted file mode 100644
index d2a3bf819226..000000000000
--- a/Documentation/infiniband/tag_matching.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-Tag matching logic
-
-The MPI standard defines a set of rules, known as tag-matching, for matching
-source send operations to destination receives.  The following parameters must
-match the following source and destination parameters:
-*	Communicator
-*	User tag - wild card may be specified by the receiver
-*	Source rank – wild car may be specified by the receiver
-*	Destination rank – wild
-The ordering rules require that when more than one pair of send and receive
-message envelopes may match, the pair that includes the earliest posted-send
-and the earliest posted-receive is the pair that must be used to satisfy the
-matching operation. However, this doesn’t imply that tags are consumed in
-the order they are created, e.g., a later generated tag may be consumed, if
-earlier tags can’t be used to satisfy the matching rules.
-
-When a message is sent from the sender to the receiver, the communication
-library may attempt to process the operation either after or before the
-corresponding matching receive is posted.  If a matching receive is posted,
-this is an expected message, otherwise it is called an unexpected message.
-Implementations frequently use different matching schemes for these two
-different matching instances.
-
-To keep MPI library memory footprint down, MPI implementations typically use
-two different protocols for this purpose:
-
-1.	The Eager protocol- the complete message is sent when the send is
-processed by the sender. A completion send is received in the send_cq
-notifying that the buffer can be reused.
-
-2.	The Rendezvous Protocol - the sender sends the tag-matching header,
-and perhaps a portion of data when first notifying the receiver. When the
-corresponding buffer is posted, the responder will use the information from
-the header to initiate an RDMA READ operation directly to the matching buffer.
-A fin message needs to be received in order for the buffer to be reused.
-
-Tag matching implementation
-
-There are two types of matching objects used, the posted receive list and the
-unexpected message list. The application posts receive buffers through calls
-to the MPI receive routines in the posted receive list and posts send messages
-using the MPI send routines. The head of the posted receive list may be
-maintained by the hardware, with the software expected to shadow this list.
-
-When send is initiated and arrives at the receive side, if there is no
-pre-posted receive for this arriving message, it is passed to the software and
-placed in the unexpected message list. Otherwise the match is processed,
-including rendezvous processing, if appropriate, delivering the data to the
-specified receive buffer. This allows overlapping receive-side MPI tag
-matching with computation.
-
-When a receive-message is posted, the communication library will first check
-the software unexpected message list for a matching receive. If a match is
-found, data is delivered to the user buffer, using a software controlled
-protocol. The UCX implementation uses either an eager or rendezvous protocol,
-depending on data size. If no match is found, the entire pre-posted receive
-list is maintained by the hardware, and there is space to add one more
-pre-posted receive to this list, this receive is passed to the hardware.
-Software is expected to shadow this list, to help with processing MPI cancel
-operations. In addition, because hardware and software are not expected to be
-tightly synchronized with respect to the tag-matching operation, this shadow
-list is used to detect the case that a pre-posted receive is passed to the
-hardware, as the matching unexpected message is being passed from the hardware
-to the software.
diff --git a/Documentation/infiniband/user_mad.rst b/Documentation/infiniband/user_mad.rst
new file mode 100644
index 000000000000..d88abfc0e370
--- /dev/null
+++ b/Documentation/infiniband/user_mad.rst
@@ -0,0 +1,166 @@
+====================
+Userspace MAD access
+====================
+
+Device files
+============
+
+  Each port of each InfiniBand device has a "umad" device and an
+  "issm" device attached.  For example, a two-port HCA will have two
+  umad devices and two issm devices, while a switch will have one
+  device of each type (for switch port 0).
+
+Creating MAD agents
+===================
+
+  A MAD agent can be created by filling in a struct ib_user_mad_reg_req
+  and then calling the IB_USER_MAD_REGISTER_AGENT ioctl on a file
+  descriptor for the appropriate device file.  If the registration
+  request succeeds, a 32-bit id will be returned in the structure.
+  For example::
+
+	struct ib_user_mad_reg_req req = { /* ... */ };
+	ret = ioctl(fd, IB_USER_MAD_REGISTER_AGENT, (char *) &req);
+        if (!ret)
+		my_agent = req.id;
+	else
+		perror("agent register");
+
+  Agents can be unregistered with the IB_USER_MAD_UNREGISTER_AGENT
+  ioctl.  Also, all agents registered through a file descriptor will
+  be unregistered when the descriptor is closed.
+
+  2014
+       a new registration ioctl is now provided which allows additional
+       fields to be provided during registration.
+       Users of this registration call are implicitly setting the use of
+       pkey_index (see below).
+
+Receiving MADs
+==============
+
+  MADs are received using read().  The receive side now supports
+  RMPP. The buffer passed to read() must be at least one
+  struct ib_user_mad + 256 bytes. For example:
+
+  If the buffer passed is not large enough to hold the received
+  MAD (RMPP), the errno is set to ENOSPC and the length of the
+  buffer needed is set in mad.length.
+
+  Example for normal MAD (non RMPP) reads::
+
+	struct ib_user_mad *mad;
+	mad = malloc(sizeof *mad + 256);
+	ret = read(fd, mad, sizeof *mad + 256);
+	if (ret != sizeof mad + 256) {
+		perror("read");
+		free(mad);
+	}
+
+  Example for RMPP reads::
+
+	struct ib_user_mad *mad;
+	mad = malloc(sizeof *mad + 256);
+	ret = read(fd, mad, sizeof *mad + 256);
+	if (ret == -ENOSPC)) {
+		length = mad.length;
+		free(mad);
+		mad = malloc(sizeof *mad + length);
+		ret = read(fd, mad, sizeof *mad + length);
+	}
+	if (ret < 0) {
+		perror("read");
+		free(mad);
+	}
+
+  In addition to the actual MAD contents, the other struct ib_user_mad
+  fields will be filled in with information on the received MAD.  For
+  example, the remote LID will be in mad.lid.
+
+  If a send times out, a receive will be generated with mad.status set
+  to ETIMEDOUT.  Otherwise when a MAD has been successfully received,
+  mad.status will be 0.
+
+  poll()/select() may be used to wait until a MAD can be read.
+
+Sending MADs
+============
+
+  MADs are sent using write().  The agent ID for sending should be
+  filled into the id field of the MAD, the destination LID should be
+  filled into the lid field, and so on.  The send side does support
+  RMPP so arbitrary length MAD can be sent. For example::
+
+	struct ib_user_mad *mad;
+
+	mad = malloc(sizeof *mad + mad_length);
+
+	/* fill in mad->data */
+
+	mad->hdr.id  = my_agent;	/* req.id from agent registration */
+	mad->hdr.lid = my_dest;		/* in network byte order... */
+	/* etc. */
+
+	ret = write(fd, &mad, sizeof *mad + mad_length);
+	if (ret != sizeof *mad + mad_length)
+		perror("write");
+
+Transaction IDs
+===============
+
+  Users of the umad devices can use the lower 32 bits of the
+  transaction ID field (that is, the least significant half of the
+  field in network byte order) in MADs being sent to match
+  request/response pairs.  The upper 32 bits are reserved for use by
+  the kernel and will be overwritten before a MAD is sent.
+
+P_Key Index Handling
+====================
+
+  The old ib_umad interface did not allow setting the P_Key index for
+  MADs that are sent and did not provide a way for obtaining the P_Key
+  index of received MADs.  A new layout for struct ib_user_mad_hdr
+  with a pkey_index member has been defined; however, to preserve binary
+  compatibility with older applications, this new layout will not be used
+  unless one of IB_USER_MAD_ENABLE_PKEY or IB_USER_MAD_REGISTER_AGENT2 ioctl's
+  are called before a file descriptor is used for anything else.
+
+  In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
+  to 6, the new layout of struct ib_user_mad_hdr will be used by
+  default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed.
+
+Setting IsSM Capability Bit
+===========================
+
+  To set the IsSM capability bit for a port, simply open the
+  corresponding issm device file.  If the IsSM bit is already set,
+  then the open call will block until the bit is cleared (or return
+  immediately with errno set to EAGAIN if the O_NONBLOCK flag is
+  passed to open()).  The IsSM bit will be cleared when the issm file
+  is closed.  No read, write or other operations can be performed on
+  the issm file.
+
+/dev files
+==========
+
+  To create the appropriate character device files automatically with
+  udev, a rule like::
+
+    KERNEL=="umad*", NAME="infiniband/%k"
+    KERNEL=="issm*", NAME="infiniband/%k"
+
+  can be used.  This will create device nodes named::
+
+    /dev/infiniband/umad0
+    /dev/infiniband/issm0
+
+  for the first port, and so on.  The InfiniBand device and port
+  associated with these devices can be determined from the files::
+
+    /sys/class/infiniband_mad/umad0/ibdev
+    /sys/class/infiniband_mad/umad0/port
+
+  and::
+
+    /sys/class/infiniband_mad/issm0/ibdev
+    /sys/class/infiniband_mad/issm0/port
diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt
deleted file mode 100644
index 7aca13a54a3a..000000000000
--- a/Documentation/infiniband/user_mad.txt
+++ /dev/null
@@ -1,153 +0,0 @@
-USERSPACE MAD ACCESS
-
-Device files
-
-  Each port of each InfiniBand device has a "umad" device and an
-  "issm" device attached.  For example, a two-port HCA will have two
-  umad devices and two issm devices, while a switch will have one
-  device of each type (for switch port 0).
-
-Creating MAD agents
-
-  A MAD agent can be created by filling in a struct ib_user_mad_reg_req
-  and then calling the IB_USER_MAD_REGISTER_AGENT ioctl on a file
-  descriptor for the appropriate device file.  If the registration
-  request succeeds, a 32-bit id will be returned in the structure.
-  For example:
-
-	struct ib_user_mad_reg_req req = { /* ... */ };
-	ret = ioctl(fd, IB_USER_MAD_REGISTER_AGENT, (char *) &req);
-        if (!ret)
-		my_agent = req.id;
-	else
-		perror("agent register");
-
-  Agents can be unregistered with the IB_USER_MAD_UNREGISTER_AGENT
-  ioctl.  Also, all agents registered through a file descriptor will
-  be unregistered when the descriptor is closed.
-
-  2014 -- a new registration ioctl is now provided which allows additional
-       fields to be provided during registration.
-       Users of this registration call are implicitly setting the use of
-       pkey_index (see below).
-
-Receiving MADs
-
-  MADs are received using read().  The receive side now supports
-  RMPP. The buffer passed to read() must be at least one
-  struct ib_user_mad + 256 bytes. For example:
-
-  If the buffer passed is not large enough to hold the received
-  MAD (RMPP), the errno is set to ENOSPC and the length of the
-  buffer needed is set in mad.length.
-
-  Example for normal MAD (non RMPP) reads:
-	struct ib_user_mad *mad;
-	mad = malloc(sizeof *mad + 256);
-	ret = read(fd, mad, sizeof *mad + 256);
-	if (ret != sizeof mad + 256) {
-		perror("read");
-		free(mad);
-	}
-
-  Example for RMPP reads:
-	struct ib_user_mad *mad;
-	mad = malloc(sizeof *mad + 256);
-	ret = read(fd, mad, sizeof *mad + 256);
-	if (ret == -ENOSPC)) {
-		length = mad.length;
-		free(mad);
-		mad = malloc(sizeof *mad + length);
-		ret = read(fd, mad, sizeof *mad + length);
-	}
-	if (ret < 0) {
-		perror("read");
-		free(mad);
-	}
-
-  In addition to the actual MAD contents, the other struct ib_user_mad
-  fields will be filled in with information on the received MAD.  For
-  example, the remote LID will be in mad.lid.
-
-  If a send times out, a receive will be generated with mad.status set
-  to ETIMEDOUT.  Otherwise when a MAD has been successfully received,
-  mad.status will be 0.
-
-  poll()/select() may be used to wait until a MAD can be read.
-
-Sending MADs
-
-  MADs are sent using write().  The agent ID for sending should be
-  filled into the id field of the MAD, the destination LID should be
-  filled into the lid field, and so on.  The send side does support
-  RMPP so arbitrary length MAD can be sent. For example:
-
-	struct ib_user_mad *mad;
-
-	mad = malloc(sizeof *mad + mad_length);
-
-	/* fill in mad->data */
-
-	mad->hdr.id  = my_agent;	/* req.id from agent registration */
-	mad->hdr.lid = my_dest;		/* in network byte order... */
-	/* etc. */
-
-	ret = write(fd, &mad, sizeof *mad + mad_length);
-	if (ret != sizeof *mad + mad_length)
-		perror("write");
-
-Transaction IDs
-
-  Users of the umad devices can use the lower 32 bits of the
-  transaction ID field (that is, the least significant half of the
-  field in network byte order) in MADs being sent to match
-  request/response pairs.  The upper 32 bits are reserved for use by
-  the kernel and will be overwritten before a MAD is sent.
-
-P_Key Index Handling
-
-  The old ib_umad interface did not allow setting the P_Key index for
-  MADs that are sent and did not provide a way for obtaining the P_Key
-  index of received MADs.  A new layout for struct ib_user_mad_hdr
-  with a pkey_index member has been defined; however, to preserve binary
-  compatibility with older applications, this new layout will not be used
-  unless one of IB_USER_MAD_ENABLE_PKEY or IB_USER_MAD_REGISTER_AGENT2 ioctl's
-  are called before a file descriptor is used for anything else.
-
-  In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
-  to 6, the new layout of struct ib_user_mad_hdr will be used by
-  default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed.
-
-Setting IsSM Capability Bit
-
-  To set the IsSM capability bit for a port, simply open the
-  corresponding issm device file.  If the IsSM bit is already set,
-  then the open call will block until the bit is cleared (or return
-  immediately with errno set to EAGAIN if the O_NONBLOCK flag is
-  passed to open()).  The IsSM bit will be cleared when the issm file
-  is closed.  No read, write or other operations can be performed on
-  the issm file.
-
-/dev files
-
-  To create the appropriate character device files automatically with
-  udev, a rule like
-
-    KERNEL=="umad*", NAME="infiniband/%k"
-    KERNEL=="issm*", NAME="infiniband/%k"
-
-  can be used.  This will create device nodes named
-
-    /dev/infiniband/umad0
-    /dev/infiniband/issm0
-
-  for the first port, and so on.  The InfiniBand device and port
-  associated with these devices can be determined from the files
-
-    /sys/class/infiniband_mad/umad0/ibdev
-    /sys/class/infiniband_mad/umad0/port
-
-  and
-
-    /sys/class/infiniband_mad/issm0/ibdev
-    /sys/class/infiniband_mad/issm0/port
diff --git a/Documentation/infiniband/user_verbs.rst b/Documentation/infiniband/user_verbs.rst
new file mode 100644
index 000000000000..8ddc4b1cfef2
--- /dev/null
+++ b/Documentation/infiniband/user_verbs.rst
@@ -0,0 +1,75 @@
+======================
+Userspace verbs access
+======================
+
+  The ib_uverbs module, built by enabling CONFIG_INFINIBAND_USER_VERBS,
+  enables direct userspace access to IB hardware via "verbs," as
+  described in chapter 11 of the InfiniBand Architecture Specification.
+
+  To use the verbs, the libibverbs library, available from
+  https://github.com/linux-rdma/rdma-core, is required. libibverbs contains a
+  device-independent API for using the ib_uverbs interface.
+  libibverbs also requires appropriate device-dependent kernel and
+  userspace driver for your InfiniBand hardware.  For example, to use
+  a Mellanox HCA, you will need the ib_mthca kernel module and the
+  libmthca userspace driver be installed.
+
+User-kernel communication
+=========================
+
+  Userspace communicates with the kernel for slow path, resource
+  management operations via the /dev/infiniband/uverbsN character
+  devices.  Fast path operations are typically performed by writing
+  directly to hardware registers mmap()ed into userspace, with no
+  system call or context switch into the kernel.
+
+  Commands are sent to the kernel via write()s on these device files.
+  The ABI is defined in drivers/infiniband/include/ib_user_verbs.h.
+  The structs for commands that require a response from the kernel
+  contain a 64-bit field used to pass a pointer to an output buffer.
+  Status is returned to userspace as the return value of the write()
+  system call.
+
+Resource management
+===================
+
+  Since creation and destruction of all IB resources is done by
+  commands passed through a file descriptor, the kernel can keep track
+  of which resources are attached to a given userspace context.  The
+  ib_uverbs module maintains idr tables that are used to translate
+  between kernel pointers and opaque userspace handles, so that kernel
+  pointers are never exposed to userspace and userspace cannot trick
+  the kernel into following a bogus pointer.
+
+  This also allows the kernel to clean up when a process exits and
+  prevent one process from touching another process's resources.
+
+Memory pinning
+==============
+
+  Direct userspace I/O requires that memory regions that are potential
+  I/O targets be kept resident at the same physical address.  The
+  ib_uverbs module manages pinning and unpinning memory regions via
+  get_user_pages() and put_page() calls.  It also accounts for the
+  amount of memory pinned in the process's pinned_vm, and checks that
+  unprivileged processes do not exceed their RLIMIT_MEMLOCK limit.
+
+  Pages that are pinned multiple times are counted each time they are
+  pinned, so the value of pinned_vm may be an overestimate of the
+  number of pages pinned by a process.
+
+/dev files
+==========
+
+  To create the appropriate character device files automatically with
+  udev, a rule like::
+
+    KERNEL=="uverbs*", NAME="infiniband/%k"
+
+  can be used.  This will create device nodes named::
+
+    /dev/infiniband/uverbs0
+
+  and so on.  Since the InfiniBand userspace verbs should be safe for
+  use by non-privileged processes, it may be useful to add an
+  appropriate MODE or GROUP to the udev rule.
diff --git a/Documentation/infiniband/user_verbs.txt b/Documentation/infiniband/user_verbs.txt
deleted file mode 100644
index 47ebf2f80b2b..000000000000
--- a/Documentation/infiniband/user_verbs.txt
+++ /dev/null
@@ -1,69 +0,0 @@
-USERSPACE VERBS ACCESS
-
-  The ib_uverbs module, built by enabling CONFIG_INFINIBAND_USER_VERBS,
-  enables direct userspace access to IB hardware via "verbs," as
-  described in chapter 11 of the InfiniBand Architecture Specification.
-
-  To use the verbs, the libibverbs library, available from
-  https://github.com/linux-rdma/rdma-core, is required. libibverbs contains a
-  device-independent API for using the ib_uverbs interface.
-  libibverbs also requires appropriate device-dependent kernel and
-  userspace driver for your InfiniBand hardware.  For example, to use
-  a Mellanox HCA, you will need the ib_mthca kernel module and the
-  libmthca userspace driver be installed.
-
-User-kernel communication
-
-  Userspace communicates with the kernel for slow path, resource
-  management operations via the /dev/infiniband/uverbsN character
-  devices.  Fast path operations are typically performed by writing
-  directly to hardware registers mmap()ed into userspace, with no
-  system call or context switch into the kernel.
-
-  Commands are sent to the kernel via write()s on these device files.
-  The ABI is defined in drivers/infiniband/include/ib_user_verbs.h.
-  The structs for commands that require a response from the kernel
-  contain a 64-bit field used to pass a pointer to an output buffer.
-  Status is returned to userspace as the return value of the write()
-  system call.
-
-Resource management
-
-  Since creation and destruction of all IB resources is done by
-  commands passed through a file descriptor, the kernel can keep track
-  of which resources are attached to a given userspace context.  The
-  ib_uverbs module maintains idr tables that are used to translate
-  between kernel pointers and opaque userspace handles, so that kernel
-  pointers are never exposed to userspace and userspace cannot trick
-  the kernel into following a bogus pointer.
-
-  This also allows the kernel to clean up when a process exits and
-  prevent one process from touching another process's resources.
-
-Memory pinning
-
-  Direct userspace I/O requires that memory regions that are potential
-  I/O targets be kept resident at the same physical address.  The
-  ib_uverbs module manages pinning and unpinning memory regions via
-  get_user_pages() and put_page() calls.  It also accounts for the
-  amount of memory pinned in the process's pinned_vm, and checks that
-  unprivileged processes do not exceed their RLIMIT_MEMLOCK limit.
-
-  Pages that are pinned multiple times are counted each time they are
-  pinned, so the value of pinned_vm may be an overestimate of the
-  number of pages pinned by a process.
-
-/dev files
-
-  To create the appropriate character device files automatically with
-  udev, a rule like
-
-    KERNEL=="uverbs*", NAME="infiniband/%k"
-
-  can be used.  This will create device nodes named
-
-    /dev/infiniband/uverbs0
-
-  and so on.  Since the InfiniBand userspace verbs should be safe for
-  use by non-privileged processes, it may be useful to add an
-  appropriate MODE or GROUP to the udev rule.
-- 
cgit 


From ca9503fc9e9812aa6258e55d44edb03eb30fc46f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 18 Jun 2019 19:10:55 +0100
Subject: arm64: Expose FRINT capabilities to userspace

ARMv8.5 introduces the FRINT series of instructions for rounding floating
point numbers to integers. Provide a capability to userspace in order to
allow applications to determine if the system supports these instructions.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/arm64/elf_hwcaps.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
index ee8dbfe652b6..5ae2ef2c12f3 100644
--- a/Documentation/arm64/elf_hwcaps.txt
+++ b/Documentation/arm64/elf_hwcaps.txt
@@ -227,6 +227,10 @@ HWCAP_PACG
     ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
     Documentation/arm64/pointer-authentication.txt.
 
+HWCAP2_FRINT
+
+    Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
+
 
 4. Unused AT_HWCAP bits
 -----------------------
-- 
cgit 


From 7117a44bc0eb5d73dc8aa8df92134f736c2099ac Mon Sep 17 00:00:00 2001
From: Bai Ping <ping.bai@nxp.com>
Date: Wed, 5 Jun 2019 06:40:52 +0000
Subject: clocksource/drivers/sysctr: Add nxp system counter timer driver
 support

The system counter (sys_ctr) is a programmable system counter
which provides a shared time base to the Cortex A15, A7, A53 etc cores.
It is intended for use in applications where the counter is always
powered on and supports multiple, unrelated clocks. The sys_ctr hardware
supports:
 - 56-bit counter width (roll-over time greater than 40 years)
 - compare frame(64-bit compare value) contains programmable interrupt
   generation when compare value <= counter value.

[dlezcano] Fixed over 80 chars length warning

Signed-off-by: Bai Ping <ping.bai@nxp.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 .../devicetree/bindings/timer/nxp,sysctr-timer.txt | 25 ++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/timer/nxp,sysctr-timer.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.txt b/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.txt
new file mode 100644
index 000000000000..d57659996d62
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.txt
@@ -0,0 +1,25 @@
+NXP System Counter Module(sys_ctr)
+
+The system counter(sys_ctr) is a programmable system counter which provides
+a shared time base to Cortex A15, A7, A53, A73, etc. it is intended for use in
+applications where the counter is always powered and support multiple,
+unrelated clocks. The compare frame inside can be used for timer purpose.
+
+Required properties:
+
+- compatible :      should be "nxp,sysctr-timer"
+- reg :             Specifies the base physical address and size of the comapre
+                    frame and the counter control, read & compare.
+- interrupts :      should be the first compare frames' interrupt
+- clocks : 	    Specifies the counter clock.
+- clock-names: 	    Specifies the clock's name of this module
+
+Example:
+
+	system_counter: timer@306a0000 {
+		compatible = "nxp,sysctr-timer";
+		reg = <0x306a0000 0x20000>;/* system-counter-rd & compare */
+		clocks = <&clk_8m>;
+		clock-names = "per";
+		interrupts = <GIC_SPI 47 IRQ_TYPE_LEVEL_HIGH>;
+	};
-- 
cgit 


From 179175d389c7aad4c7b7276f772d04cfc92864b1 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@kernel.org>
Date: Tue, 25 Jun 2019 14:20:56 -0700
Subject: clk: Document some devm_clk_bulk*() APIs

Add some new clk devm APIs that we've added over time to the devres
documentation.

Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 Documentation/driver-model/devres.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 69c7fa7f616c..38205662d417 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -244,6 +244,10 @@ CLOCK
   devm_clk_get()
   devm_clk_get_optional()
   devm_clk_put()
+  devm_clk_bulk_get()
+  devm_clk_bulk_get_all()
+  devm_clk_bulk_get_optional()
+  devm_get_clk_from_childl()
   devm_clk_hw_register()
   devm_of_clk_add_hw_provider()
   devm_clk_hw_register_clkdev()
-- 
cgit 


From 9a042e718fc0faf77db35a8106c8ded948971219 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Tue, 18 Jun 2019 09:54:56 +1200
Subject: dt-bindings: clock: mvebu: Add compatible string for 98dx1135 core
 clock

Add compatible string for the core clock on the 98dx1135 switch with
integrated CPU.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 Documentation/devicetree/bindings/clock/mvebu-core-clock.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt
index 796c260c183d..d8f5c490f893 100644
--- a/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt
+++ b/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt
@@ -59,6 +59,7 @@ Required properties:
 	"marvell,dove-core-clock" - for Dove SoC core clocks
 	"marvell,kirkwood-core-clock" - for Kirkwood SoC (except mv88f6180)
 	"marvell,mv88f6180-core-clock" - for Kirkwood MV88f6180 SoC
+	"marvell,mv98dx1135-core-clock" - for Kirkwood 98dx1135 SoC
 	"marvell,mv88f5181-core-clock" - for Orion MV88F5181 SoC
 	"marvell,mv88f5182-core-clock" - for Orion MV88F5182 SoC
 	"marvell,mv88f5281-core-clock" - for Orion MV88F5281 SoC
-- 
cgit 


From cba155e50a117465b64e197e197aedc18770c4cd Mon Sep 17 00:00:00 2001
From: Han Nandor <nandor.han@vaisala.com>
Date: Wed, 15 May 2019 10:47:15 +0000
Subject: dt-bindings: power: reset: add document for NVMEM based reboot-mode

Add the device tree bindings document for the NVMEM based reboot-mode
driver.

Signed-off-by: Nandor Han <nandor.han@vaisala.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 .../bindings/power/reset/nvmem-reboot-mode.txt     | 26 ++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.txt b/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.txt
new file mode 100644
index 000000000000..752d6126d5da
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.txt
@@ -0,0 +1,26 @@
+NVMEM reboot mode driver
+
+This driver gets reboot mode magic value from reboot-mode driver
+and stores it in a NVMEM cell named "reboot-mode". Then the bootloader
+can read it and take different action according to the magic
+value stored.
+
+Required properties:
+- compatible: should be "nvmem-reboot-mode".
+- nvmem-cells: A phandle to the reboot mode provided by a nvmem device.
+- nvmem-cell-names: Should be "reboot-mode".
+
+The rest of the properties should follow the generic reboot-mode description
+found in reboot-mode.txt
+
+Example:
+	reboot-mode {
+		compatible = "nvmem-reboot-mode";
+		nvmem-cells = <&reboot_mode>;
+		nvmem-cell-names = "reboot-mode";
+
+		mode-normal     = <0xAAAA5501>;
+		mode-bootloader = <0xBBBB5500>;
+		mode-recovery   = <0xCCCC5502>;
+		mode-test       = <0xDDDD5503>;
+	};
-- 
cgit 


From 321a7cea973b4f63bb7e3baaea8428dfb8b424a9 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Fri, 31 May 2019 18:54:58 +0900
Subject: pwm: Add power management descriptions

This patch adds power management descriptions that consumers should
implement it.

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/pwm.txt | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/pwm.txt b/Documentation/pwm.txt
index 8fbf0aa3ba2d..ab62f1bb0366 100644
--- a/Documentation/pwm.txt
+++ b/Documentation/pwm.txt
@@ -65,6 +65,10 @@ period). struct pwm_args contains 2 fields (period and polarity) and should
 be used to set the initial PWM config (usually done in the probe function
 of the PWM user). PWM arguments are retrieved with pwm_get_args().
 
+All consumers should really be reconfiguring the PWM upon resume as
+appropriate. This is the only way to ensure that everything is resumed in
+the proper order.
+
 Using PWMs with the sysfs interface
 -----------------------------------
 
@@ -141,6 +145,9 @@ The implementation of ->get_state() (a method used to retrieve initial PWM
 state) is also encouraged for the same reason: letting the PWM user know
 about the current PWM state would allow him to avoid glitches.
 
+Drivers should not implement any power management. In other words,
+consumers should implement it as described in the "Using PWMs" section.
+
 Locking
 -------
 
-- 
cgit 


From 71731e1b2da17f5d2a9ca654c66c393d89cba9e6 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Mon, 17 Jun 2019 10:31:16 +0200
Subject: dt-bindings: i2c: i2c-mt7621: Add bindings for MediaTek MT7621/28/88
 I2C

Add bindings for the I2C controller that can be found in the MediaTek
MT7621/7628/7688 SoCs.

Signed-off-by: Stefan Roese <sr@denx.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 .../devicetree/bindings/i2c/i2c-mt7621.txt         | 25 ++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/i2c-mt7621.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/i2c/i2c-mt7621.txt b/Documentation/devicetree/bindings/i2c/i2c-mt7621.txt
new file mode 100644
index 000000000000..bc36f0eb94cd
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-mt7621.txt
@@ -0,0 +1,25 @@
+MediaTek MT7621/MT7628 I2C master controller
+
+Required properties:
+
+- compatible: Should be one of the following:
+  - "mediatek,mt7621-i2c": for MT7621/MT7628/MT7688 platforms
+- #address-cells: should be 1.
+- #size-cells: should be 0.
+- reg: Address and length of the register set for the device
+- resets: phandle to the reset controller asserting this device in
+          reset
+  See ../reset/reset.txt for details.
+
+Optional properties :
+
+Example:
+
+i2c: i2c@900 {
+	compatible = "mediatek,mt7621-i2c";
+	reg = <0x900 0x100>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+	resets = <&rstctrl 16>;
+	reset-names = "i2c";
+};
-- 
cgit 


From bbddb0fc3bbf07dd0b2785c8581c97a2fd58a174 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Tue, 11 Jun 2019 11:03:08 +0200
Subject: dt-bindings: i2c: sun6i-p2wi: Add YAML schemas

Switch the DT binding to a YAML schema to enable the DT validation.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 .../bindings/i2c/allwinner,sun6i-a31-p2wi.yaml     | 65 ++++++++++++++++++++++
 .../devicetree/bindings/i2c/i2c-sun6i-p2wi.txt     | 41 --------------
 2 files changed, 65 insertions(+), 41 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml
 delete mode 100644 Documentation/devicetree/bindings/i2c/i2c-sun6i-p2wi.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml b/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml
new file mode 100644
index 000000000000..1804abe24f14
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/i2c/allwinner,sun6i-a31-p2wi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 P2WI (Push/Pull 2 Wires Interface) Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+allOf:
+  - $ref: /schemas/i2c/i2c-controller.yaml#
+
+properties:
+  compatible:
+    const: allwinner,sun6i-a31-p2wi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  clock-frequency:
+    minimum: 1
+    maximum: 6000000
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - resets
+
+# FIXME: We should set it, but it would report all the generic
+# properties as additional properties.
+# additionalProperties: false
+
+examples:
+  - |
+    i2c@1f03400 {
+        compatible = "allwinner,sun6i-a31-p2wi";
+        reg = <0x01f03400 0x400>;
+        interrupts = <0 39 4>;
+        clocks = <&apb0_gates 3>;
+        clock-frequency = <6000000>;
+        resets = <&apb0_rst 3>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        axp221: pmic@68 {
+            compatible = "x-powers,axp221";
+            reg = <0x68>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/i2c/i2c-sun6i-p2wi.txt b/Documentation/devicetree/bindings/i2c/i2c-sun6i-p2wi.txt
deleted file mode 100644
index 49df0053347a..000000000000
--- a/Documentation/devicetree/bindings/i2c/i2c-sun6i-p2wi.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-
-* Allwinner P2WI (Push/Pull 2 Wire Interface) controller
-
-Required properties :
-
- - reg             : Offset and length of the register set for the device.
- - compatible      : Should one of the following:
-                     - "allwinner,sun6i-a31-p2wi"
- - interrupts      : The interrupt line connected to the P2WI peripheral.
- - clocks          : The gate clk connected to the P2WI peripheral.
- - resets          : The reset line connected to the P2WI peripheral.
-
-Optional properties :
-
- - clock-frequency : Desired P2WI bus clock frequency in Hz. If not set the
-default frequency is 100kHz
-
-A P2WI may contain one child node encoding a P2WI slave device.
-
-Slave device properties:
-  Required properties:
-   - reg           : the I2C slave address used during the initialization
-                     process to switch from I2C to P2WI mode
-
-Example:
-
-	p2wi@1f03400 {
-		compatible = "allwinner,sun6i-a31-p2wi";
-		reg = <0x01f03400 0x400>;
-		interrupts = <0 39 4>;
-		clocks = <&apb0_gates 3>;
-		clock-frequency = <6000000>;
-		resets = <&apb0_rst 3>;
-
-		axp221: pmic@68 {
-			compatible = "x-powers,axp221";
-			reg = <0x68>;
-
-			/* ... */
-		};
-	};
-- 
cgit 


From f8bbde72ef44417e41d4731b05264dcbfb9f962a Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Tue, 11 Jun 2019 11:03:09 +0200
Subject: dt-bindings: i2c: mv64xxx: Add YAML schemas

Switch the DT binding to a YAML schema to enable the DT validation.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Gregory CLEMENT <gregory.clement@bootlin.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 .../devicetree/bindings/i2c/i2c-mv64xxx.txt        |  64 -----------
 .../bindings/i2c/marvell,mv64xxx-i2c.yaml          | 124 +++++++++++++++++++++
 2 files changed, 124 insertions(+), 64 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt
 create mode 100644 Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt b/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt
deleted file mode 100644
index 0ffe65a316ae..000000000000
--- a/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-
-* Marvell MV64XXX I2C controller
-
-Required properties :
-
- - reg             : Offset and length of the register set for the device
- - compatible      : Should be either:
-                     - "allwinner,sun4i-a10-i2c"
-                     - "allwinner,sun6i-a31-i2c"
-                     - "marvell,mv64xxx-i2c"
-                     - "marvell,mv78230-i2c"
-                     - "marvell,mv78230-a0-i2c"
-                       * Note: Only use "marvell,mv78230-a0-i2c" for a
-                         very rare, initial version of the SoC which
-                         had broken offload support.  Linux
-                         auto-detects this and sets it appropriately.
- - interrupts      : The interrupt number
-
-Optional properties :
-
- - clock-frequency : Desired I2C bus clock frequency in Hz. If not set the
-default frequency is 100kHz
-
- - resets          : phandle to the parent reset controller. Mandatory
-                     whenever you're using the "allwinner,sun6i-a31-i2c"
-                     compatible.
-
- - clocks:	   : pointers to the reference clocks for this device, the
-		     first one is the one used for the clock on the i2c bus,
-		     the second one is the clock used to acces the registers
-		     of the controller
-
- - clock-names	   : names of used clocks, mandatory if the second clock is
-		     used, the name must be "core", and "reg" (the latter is
-		     only for Armada 7K/8K).
-
-Examples:
-
-	i2c@11000 {
-		compatible = "marvell,mv64xxx-i2c";
-		reg = <0x11000 0x20>;
-		interrupts = <29>;
-		clock-frequency = <100000>;
-	};
-
-For the Armada XP:
-
-	i2c@11000 {
-		compatible = "marvell,mv78230-i2c", "marvell,mv64xxx-i2c";
-		reg = <0x11000 0x100>;
-		interrupts = <29>;
-		clock-frequency = <100000>;
-	};
-
-For the Armada 7040:
-
-	i2c@701000 {
-		compatible = "marvell,mv78230-i2c";
-		reg = <0x701000 0x20>;
-		interrupts = <29>;
-		clock-frequency = <100000>;
-		clock-names = "core", "reg";
-		clocks = <&core_clock>, <&reg_clock>;
-	};
diff --git a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml
new file mode 100644
index 000000000000..9a5654ef5670
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/i2c/marvell,mv64xxx-i2c.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell MV64XXX I2C Controller Device Tree Bindings
+
+maintainers:
+  - Gregory CLEMENT <gregory.clement@bootlin.com>
+
+properties:
+  compatible:
+    oneOf:
+      - const: allwinner,sun4i-a10-i2c
+      - items:
+          - const: allwinner,sun7i-a20-i2c
+          - const: allwinner,sun4i-a10-i2c
+      - const: allwinner,sun6i-a31-i2c
+      - items:
+          - const: allwinner,sun8i-a23-i2c
+          - const: allwinner,sun6i-a31-i2c
+      - items:
+          - const: allwinner,sun8i-a83t-i2c
+          - const: allwinner,sun6i-a31-i2c
+      - items:
+          - const: allwinner,sun50i-a64-i2c
+          - const: allwinner,sun6i-a31-i2c
+
+      - const: marvell,mv64xxx-i2c
+      - const: marvell,mv78230-i2c
+      - const: marvell,mv78230-a0-i2c
+
+    description:
+      Only use "marvell,mv78230-a0-i2c" for a very rare, initial
+      version of the SoC which had broken offload support. Linux
+      auto-detects this and sets it appropriately.
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    maxItems: 2
+    items:
+      - description: Reference clock for the I2C bus
+      - description: Bus clock (Only for Armada 7K/8K)
+
+  clock-names:
+    minItems: 1
+    maxItems: 2
+    items:
+      - const: core
+      - const: reg
+    description:
+      Mandatory if two clocks are used (only for Armada 7k and 8k).
+
+  resets:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+allOf:
+  - $ref: /schemas/i2c/i2c-controller.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun4i-a10-i2c
+              - allwinner,sun6i-a31-i2c
+
+    then:
+      required:
+        - clocks
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,sun6i-a31-i2c
+
+    then:
+      required:
+        - resets
+
+# FIXME: We should set it, but it would report all the generic
+# properties as additional properties.
+# additionalProperties: false
+
+examples:
+  - |
+    i2c@11000 {
+        compatible = "marvell,mv64xxx-i2c";
+        reg = <0x11000 0x20>;
+        interrupts = <29>;
+        clock-frequency = <100000>;
+    };
+
+  - |
+    i2c@11000 {
+        compatible = "marvell,mv78230-i2c", "marvell,mv64xxx-i2c";
+        reg = <0x11000 0x100>;
+        interrupts = <29>;
+        clock-frequency = <100000>;
+    };
+
+  - |
+    i2c@701000 {
+        compatible = "marvell,mv78230-i2c";
+        reg = <0x701000 0x20>;
+        interrupts = <29>;
+        clock-frequency = <100000>;
+        clock-names = "core", "reg";
+        clocks = <&core_clock>, <&reg_clock>;
+    };
+
+...
-- 
cgit 


From 4601db7ecf078523cd49b4d8953969739ed36bf4 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 5 Jun 2019 14:15:04 +0530
Subject: dt-bindings: i2c: omap: Add new compatible for J721E SoCs

J721E SoCs have same I2C IP as OMAP SoCs. Add new compatible to
handle J721E SoCs.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/devicetree/bindings/i2c/i2c-omap.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/i2c/i2c-omap.txt b/Documentation/devicetree/bindings/i2c/i2c-omap.txt
index 4b90ba9f31b7..a44573d7c118 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-omap.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-omap.txt
@@ -7,6 +7,7 @@ Required properties :
 	"ti,omap3-i2c" for OMAP3 SoCs
 	"ti,omap4-i2c" for OMAP4+ SoCs
 	"ti,am654-i2c", "ti,omap4-i2c" for AM654 SoCs
+	"ti,j721e-i2c", "ti,omap4-i2c" for J721E SoCs
 - ti,hwmods : Must be "i2c<n>", n being the instance number (1-based)
 - #address-cells = <1>;
 - #size-cells = <0>;
-- 
cgit 


From d74b0d31dddeac2b44c715588d53d9a1e5b1158e Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Thu, 25 Apr 2019 07:55:07 -0600
Subject: Docs: An initial automarkup extension for sphinx

Rather than fill our text files with :c:func:`function()` syntax, just do
the markup via a hook into the sphinx build process.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py              |  3 +-
 Documentation/sphinx/automarkup.py | 93 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/sphinx/automarkup.py

(limited to 'Documentation')

diff --git a/Documentation/conf.py b/Documentation/conf.py
index 7ace3f8852bd..a502baecbb85 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -34,7 +34,8 @@ needs_sphinx = '1.3'
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include', 'cdomain', 'kfigure', 'sphinx.ext.ifconfig']
+extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include', 'cdomain',
+              'kfigure', 'sphinx.ext.ifconfig', 'automarkup']
 
 # The name of the math extension changed on Sphinx 1.4
 if (major == 1 and minor > 3) or (major > 1):
diff --git a/Documentation/sphinx/automarkup.py b/Documentation/sphinx/automarkup.py
new file mode 100644
index 000000000000..b300cf129869
--- /dev/null
+++ b/Documentation/sphinx/automarkup.py
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2019 Jonathan Corbet <corbet@lwn.net>
+#
+# Apply kernel-specific tweaks after the initial document processing
+# has been done.
+#
+from docutils import nodes
+from sphinx import addnodes
+import re
+
+#
+# Regex nastiness.  Of course.
+# Try to identify "function()" that's not already marked up some
+# other way.  Sphinx doesn't like a lot of stuff right after a
+# :c:func: block (i.e. ":c:func:`mmap()`s" flakes out), so the last
+# bit tries to restrict matches to things that won't create trouble.
+#
+RE_function = re.compile(r'([\w_][\w\d_]+\(\))')
+
+#
+# Many places in the docs refer to common system calls.  It is
+# pointless to try to cross-reference them and, as has been known
+# to happen, somebody defining a function by these names can lead
+# to the creation of incorrect and confusing cross references.  So
+# just don't even try with these names.
+#
+Skipfuncs = [ 'open', 'close', 'read', 'write', 'fcntl', 'mmap'
+              'select', 'poll', 'fork', 'execve', 'clone', 'ioctl']
+
+#
+# Find all occurrences of function() and try to replace them with
+# appropriate cross references.
+#
+def markup_funcs(docname, app, node):
+    cdom = app.env.domains['c']
+    t = node.astext()
+    done = 0
+    repl = [ ]
+    for m in RE_function.finditer(t):
+        #
+        # Include any text prior to function() as a normal text node.
+        #
+        if m.start() > done:
+            repl.append(nodes.Text(t[done:m.start()]))
+        #
+        # Go through the dance of getting an xref out of the C domain
+        #
+        target = m.group(1)[:-2]
+        target_text = nodes.Text(target + '()')
+        xref = None
+        if target not in Skipfuncs:
+            lit_text = nodes.literal(classes=['xref', 'c', 'c-func'])
+            lit_text += target_text
+            pxref = addnodes.pending_xref('', refdomain = 'c',
+                                          reftype = 'function',
+                                          reftarget = target, modname = None,
+                                          classname = None)
+            xref = cdom.resolve_xref(app.env, docname, app.builder,
+                                     'function', target, pxref, lit_text)
+        #
+        # Toss the xref into the list if we got it; otherwise just put
+        # the function text.
+        #
+        if xref:
+            repl.append(xref)
+        else:
+            repl.append(target_text)
+        done = m.end()
+    if done < len(t):
+        repl.append(nodes.Text(t[done:]))
+    return repl
+
+def auto_markup(app, doctree, name):
+    #
+    # This loop could eventually be improved on.  Someday maybe we
+    # want a proper tree traversal with a lot of awareness of which
+    # kinds of nodes to prune.  But this works well for now.
+    #
+    # The nodes.literal test catches ``literal text``, its purpose is to
+    # avoid adding cross-references to functions that have been explicitly
+    # marked with cc:func:.
+    #
+    for para in doctree.traverse(nodes.paragraph):
+        for node in para.traverse(nodes.Text):
+            if not isinstance(node.parent, nodes.literal):
+                node.parent.replace(node, markup_funcs(name, app, node))
+
+def setup(app):
+    app.connect('doctree-resolved', auto_markup)
+    return {
+        'parallel_read_safe': True,
+        'parallel_write_safe': True,
+        }
-- 
cgit 


From 9c79df7f0312e883f17a7b1c2352d1511362354c Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Thu, 25 Apr 2019 13:48:13 -0600
Subject: docs: remove :c:func: annotations from xarray.rst

Now that the build system automatically marks up function references, we
don't have to clutter the source files, so take it out.

[Some paragraphs could now benefit from refilling, but that was left out to
avoid obscuring the real changes.]

Acked-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/xarray.rst | 270 +++++++++++++++++++-------------------
 1 file changed, 135 insertions(+), 135 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index ef6f9f98f595..fcedc5349ace 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -30,27 +30,27 @@ it called marks.  Each mark may be set or cleared independently of
 the others.  You can iterate over entries which are marked.
 
 Normal pointers may be stored in the XArray directly.  They must be 4-byte
-aligned, which is true for any pointer returned from :c:func:`kmalloc` and
-:c:func:`alloc_page`.  It isn't true for arbitrary user-space pointers,
+aligned, which is true for any pointer returned from kmalloc() and
+alloc_page().  It isn't true for arbitrary user-space pointers,
 nor for function pointers.  You can store pointers to statically allocated
 objects, as long as those objects have an alignment of at least 4.
 
 You can also store integers between 0 and ``LONG_MAX`` in the XArray.
-You must first convert it into an entry using :c:func:`xa_mk_value`.
+You must first convert it into an entry using xa_mk_value().
 When you retrieve an entry from the XArray, you can check whether it is
-a value entry by calling :c:func:`xa_is_value`, and convert it back to
-an integer by calling :c:func:`xa_to_value`.
+a value entry by calling xa_is_value(), and convert it back to
+an integer by calling xa_to_value().
 
 Some users want to store tagged pointers instead of using the marks
-described above.  They can call :c:func:`xa_tag_pointer` to create an
-entry with a tag, :c:func:`xa_untag_pointer` to turn a tagged entry
-back into an untagged pointer and :c:func:`xa_pointer_tag` to retrieve
+described above.  They can call xa_tag_pointer() to create an
+entry with a tag, xa_untag_pointer() to turn a tagged entry
+back into an untagged pointer and xa_pointer_tag() to retrieve
 the tag of an entry.  Tagged pointers use the same bits that are used
 to distinguish value entries from normal pointers, so each user must
 decide whether they want to store value entries or tagged pointers in
 any particular XArray.
 
-The XArray does not support storing :c:func:`IS_ERR` pointers as some
+The XArray does not support storing IS_ERR() pointers as some
 conflict with value entries or internal entries.
 
 An unusual feature of the XArray is the ability to create entries which
@@ -64,89 +64,89 @@ entry will cause the XArray to forget about the range.
 Normal API
 ==========
 
-Start by initialising an XArray, either with :c:func:`DEFINE_XARRAY`
-for statically allocated XArrays or :c:func:`xa_init` for dynamically
+Start by initialising an XArray, either with DEFINE_XARRAY()
+for statically allocated XArrays or xa_init() for dynamically
 allocated ones.  A freshly-initialised XArray contains a ``NULL``
 pointer at every index.
 
-You can then set entries using :c:func:`xa_store` and get entries
-using :c:func:`xa_load`.  xa_store will overwrite any entry with the
+You can then set entries using xa_store() and get entries
+using xa_load().  xa_store will overwrite any entry with the
 new entry and return the previous entry stored at that index.  You can
-use :c:func:`xa_erase` instead of calling :c:func:`xa_store` with a
+use xa_erase() instead of calling xa_store() with a
 ``NULL`` entry.  There is no difference between an entry that has never
 been stored to, one that has been erased and one that has most recently
 had ``NULL`` stored to it.
 
 You can conditionally replace an entry at an index by using
-:c:func:`xa_cmpxchg`.  Like :c:func:`cmpxchg`, it will only succeed if
+xa_cmpxchg().  Like cmpxchg(), it will only succeed if
 the entry at that index has the 'old' value.  It also returns the entry
 which was at that index; if it returns the same entry which was passed as
-'old', then :c:func:`xa_cmpxchg` succeeded.
+'old', then xa_cmpxchg() succeeded.
 
 If you want to only store a new entry to an index if the current entry
-at that index is ``NULL``, you can use :c:func:`xa_insert` which
+at that index is ``NULL``, you can use xa_insert() which
 returns ``-EBUSY`` if the entry is not empty.
 
 You can enquire whether a mark is set on an entry by using
-:c:func:`xa_get_mark`.  If the entry is not ``NULL``, you can set a mark
-on it by using :c:func:`xa_set_mark` and remove the mark from an entry by
-calling :c:func:`xa_clear_mark`.  You can ask whether any entry in the
-XArray has a particular mark set by calling :c:func:`xa_marked`.
+xa_get_mark().  If the entry is not ``NULL``, you can set a mark
+on it by using xa_set_mark() and remove the mark from an entry by
+calling xa_clear_mark().  You can ask whether any entry in the
+XArray has a particular mark set by calling xa_marked().
 
 You can copy entries out of the XArray into a plain array by calling
-:c:func:`xa_extract`.  Or you can iterate over the present entries in
-the XArray by calling :c:func:`xa_for_each`.  You may prefer to use
-:c:func:`xa_find` or :c:func:`xa_find_after` to move to the next present
+xa_extract().  Or you can iterate over the present entries in
+the XArray by calling xa_for_each().  You may prefer to use
+xa_find() or xa_find_after() to move to the next present
 entry in the XArray.
 
-Calling :c:func:`xa_store_range` stores the same entry in a range
+Calling xa_store_range() stores the same entry in a range
 of indices.  If you do this, some of the other operations will behave
 in a slightly odd way.  For example, marking the entry at one index
 may result in the entry being marked at some, but not all of the other
 indices.  Storing into one index may result in the entry retrieved by
 some, but not all of the other indices changing.
 
-Sometimes you need to ensure that a subsequent call to :c:func:`xa_store`
-will not need to allocate memory.  The :c:func:`xa_reserve` function
+Sometimes you need to ensure that a subsequent call to xa_store()
+will not need to allocate memory.  The xa_reserve() function
 will store a reserved entry at the indicated index.  Users of the
 normal API will see this entry as containing ``NULL``.  If you do
-not need to use the reserved entry, you can call :c:func:`xa_release`
+not need to use the reserved entry, you can call xa_release()
 to remove the unused entry.  If another user has stored to the entry
-in the meantime, :c:func:`xa_release` will do nothing; if instead you
-want the entry to become ``NULL``, you should use :c:func:`xa_erase`.
-Using :c:func:`xa_insert` on a reserved entry will fail.
+in the meantime, xa_release() will do nothing; if instead you
+want the entry to become ``NULL``, you should use xa_erase().
+Using xa_insert() on a reserved entry will fail.
 
-If all entries in the array are ``NULL``, the :c:func:`xa_empty` function
+If all entries in the array are ``NULL``, the xa_empty() function
 will return ``true``.
 
 Finally, you can remove all entries from an XArray by calling
-:c:func:`xa_destroy`.  If the XArray entries are pointers, you may wish
+xa_destroy().  If the XArray entries are pointers, you may wish
 to free the entries first.  You can do this by iterating over all present
-entries in the XArray using the :c:func:`xa_for_each` iterator.
+entries in the XArray using the xa_for_each() iterator.
 
 Allocating XArrays
 ------------------
 
-If you use :c:func:`DEFINE_XARRAY_ALLOC` to define the XArray, or
-initialise it by passing ``XA_FLAGS_ALLOC`` to :c:func:`xa_init_flags`,
+If you use DEFINE_XARRAY_ALLOC() to define the XArray, or
+initialise it by passing ``XA_FLAGS_ALLOC`` to xa_init_flags(),
 the XArray changes to track whether entries are in use or not.
 
-You can call :c:func:`xa_alloc` to store the entry at an unused index
+You can call xa_alloc() to store the entry at an unused index
 in the XArray.  If you need to modify the array from interrupt context,
-you can use :c:func:`xa_alloc_bh` or :c:func:`xa_alloc_irq` to disable
+you can use xa_alloc_bh() or xa_alloc_irq() to disable
 interrupts while allocating the ID.
 
-Using :c:func:`xa_store`, :c:func:`xa_cmpxchg` or :c:func:`xa_insert` will
+Using xa_store(), xa_cmpxchg() or xa_insert() will
 also mark the entry as being allocated.  Unlike a normal XArray, storing
-``NULL`` will mark the entry as being in use, like :c:func:`xa_reserve`.
-To free an entry, use :c:func:`xa_erase` (or :c:func:`xa_release` if
+``NULL`` will mark the entry as being in use, like xa_reserve().
+To free an entry, use xa_erase() (or xa_release() if
 you only want to free the entry if it's ``NULL``).
 
 By default, the lowest free entry is allocated starting from 0.  If you
 want to allocate entries starting at 1, it is more efficient to use
-:c:func:`DEFINE_XARRAY_ALLOC1` or ``XA_FLAGS_ALLOC1``.  If you want to
+DEFINE_XARRAY_ALLOC1() or ``XA_FLAGS_ALLOC1``.  If you want to
 allocate IDs up to a maximum, then wrap back around to the lowest free
-ID, you can use :c:func:`xa_alloc_cyclic`.
+ID, you can use xa_alloc_cyclic().
 
 You cannot use ``XA_MARK_0`` with an allocating XArray as this mark
 is used to track whether an entry is free or not.  The other marks are
@@ -155,17 +155,17 @@ available for your use.
 Memory allocation
 -----------------
 
-The :c:func:`xa_store`, :c:func:`xa_cmpxchg`, :c:func:`xa_alloc`,
-:c:func:`xa_reserve` and :c:func:`xa_insert` functions take a gfp_t
+The xa_store(), xa_cmpxchg(), xa_alloc(),
+xa_reserve() and xa_insert() functions take a gfp_t
 parameter in case the XArray needs to allocate memory to store this entry.
 If the entry is being deleted, no memory allocation needs to be performed,
 and the GFP flags specified will be ignored.
 
 It is possible for no memory to be allocatable, particularly if you pass
 a restrictive set of GFP flags.  In that case, the functions return a
-special value which can be turned into an errno using :c:func:`xa_err`.
+special value which can be turned into an errno using xa_err().
 If you don't need to know exactly which error occurred, using
-:c:func:`xa_is_err` is slightly more efficient.
+xa_is_err() is slightly more efficient.
 
 Locking
 -------
@@ -174,54 +174,54 @@ When using the Normal API, you do not have to worry about locking.
 The XArray uses RCU and an internal spinlock to synchronise access:
 
 No lock needed:
- * :c:func:`xa_empty`
- * :c:func:`xa_marked`
+ * xa_empty()
+ * xa_marked()
 
 Takes RCU read lock:
- * :c:func:`xa_load`
- * :c:func:`xa_for_each`
- * :c:func:`xa_find`
- * :c:func:`xa_find_after`
- * :c:func:`xa_extract`
- * :c:func:`xa_get_mark`
+ * xa_load()
+ * xa_for_each()
+ * xa_find()
+ * xa_find_after()
+ * xa_extract()
+ * xa_get_mark()
 
 Takes xa_lock internally:
- * :c:func:`xa_store`
- * :c:func:`xa_store_bh`
- * :c:func:`xa_store_irq`
- * :c:func:`xa_insert`
- * :c:func:`xa_insert_bh`
- * :c:func:`xa_insert_irq`
- * :c:func:`xa_erase`
- * :c:func:`xa_erase_bh`
- * :c:func:`xa_erase_irq`
- * :c:func:`xa_cmpxchg`
- * :c:func:`xa_cmpxchg_bh`
- * :c:func:`xa_cmpxchg_irq`
- * :c:func:`xa_store_range`
- * :c:func:`xa_alloc`
- * :c:func:`xa_alloc_bh`
- * :c:func:`xa_alloc_irq`
- * :c:func:`xa_reserve`
- * :c:func:`xa_reserve_bh`
- * :c:func:`xa_reserve_irq`
- * :c:func:`xa_destroy`
- * :c:func:`xa_set_mark`
- * :c:func:`xa_clear_mark`
+ * xa_store()
+ * xa_store_bh()
+ * xa_store_irq()
+ * xa_insert()
+ * xa_insert_bh()
+ * xa_insert_irq()
+ * xa_erase()
+ * xa_erase_bh()
+ * xa_erase_irq()
+ * xa_cmpxchg()
+ * xa_cmpxchg_bh()
+ * xa_cmpxchg_irq()
+ * xa_store_range()
+ * xa_alloc()
+ * xa_alloc_bh()
+ * xa_alloc_irq()
+ * xa_reserve()
+ * xa_reserve_bh()
+ * xa_reserve_irq()
+ * xa_destroy()
+ * xa_set_mark()
+ * xa_clear_mark()
 
 Assumes xa_lock held on entry:
- * :c:func:`__xa_store`
- * :c:func:`__xa_insert`
- * :c:func:`__xa_erase`
- * :c:func:`__xa_cmpxchg`
- * :c:func:`__xa_alloc`
- * :c:func:`__xa_set_mark`
- * :c:func:`__xa_clear_mark`
+ * __xa_store()
+ * __xa_insert()
+ * __xa_erase()
+ * __xa_cmpxchg()
+ * __xa_alloc()
+ * __xa_set_mark()
+ * __xa_clear_mark()
 
 If you want to take advantage of the lock to protect the data structures
-that you are storing in the XArray, you can call :c:func:`xa_lock`
-before calling :c:func:`xa_load`, then take a reference count on the
-object you have found before calling :c:func:`xa_unlock`.  This will
+that you are storing in the XArray, you can call xa_lock()
+before calling xa_load(), then take a reference count on the
+object you have found before calling xa_unlock().  This will
 prevent stores from removing the object from the array between looking
 up the object and incrementing the refcount.  You can also use RCU to
 avoid dereferencing freed memory, but an explanation of that is beyond
@@ -261,7 +261,7 @@ context and then erase them in softirq context, you can do that this way::
     }
 
 If you are going to modify the XArray from interrupt or softirq context,
-you need to initialise the array using :c:func:`xa_init_flags`, passing
+you need to initialise the array using xa_init_flags(), passing
 ``XA_FLAGS_LOCK_IRQ`` or ``XA_FLAGS_LOCK_BH``.
 
 The above example also shows a common pattern of wanting to extend the
@@ -269,20 +269,20 @@ coverage of the xa_lock on the store side to protect some statistics
 associated with the array.
 
 Sharing the XArray with interrupt context is also possible, either
-using :c:func:`xa_lock_irqsave` in both the interrupt handler and process
-context, or :c:func:`xa_lock_irq` in process context and :c:func:`xa_lock`
+using xa_lock_irqsave() in both the interrupt handler and process
+context, or xa_lock_irq() in process context and xa_lock()
 in the interrupt handler.  Some of the more common patterns have helper
-functions such as :c:func:`xa_store_bh`, :c:func:`xa_store_irq`,
-:c:func:`xa_erase_bh`, :c:func:`xa_erase_irq`, :c:func:`xa_cmpxchg_bh`
-and :c:func:`xa_cmpxchg_irq`.
+functions such as xa_store_bh(), xa_store_irq(),
+xa_erase_bh(), xa_erase_irq(), xa_cmpxchg_bh()
+and xa_cmpxchg_irq().
 
 Sometimes you need to protect access to the XArray with a mutex because
 that lock sits above another mutex in the locking hierarchy.  That does
-not entitle you to use functions like :c:func:`__xa_erase` without taking
+not entitle you to use functions like __xa_erase() without taking
 the xa_lock; the xa_lock is used for lockdep validation and will be used
 for other purposes in the future.
 
-The :c:func:`__xa_set_mark` and :c:func:`__xa_clear_mark` functions are also
+The __xa_set_mark() and __xa_clear_mark() functions are also
 available for situations where you look up an entry and want to atomically
 set or clear a mark.  It may be more efficient to use the advanced API
 in this case, as it will save you from walking the tree twice.
@@ -300,27 +300,27 @@ indeed the normal API is implemented in terms of the advanced API.  The
 advanced API is only available to modules with a GPL-compatible license.
 
 The advanced API is based around the xa_state.  This is an opaque data
-structure which you declare on the stack using the :c:func:`XA_STATE`
+structure which you declare on the stack using the XA_STATE()
 macro.  This macro initialises the xa_state ready to start walking
 around the XArray.  It is used as a cursor to maintain the position
 in the XArray and let you compose various operations together without
 having to restart from the top every time.
 
 The xa_state is also used to store errors.  You can call
-:c:func:`xas_error` to retrieve the error.  All operations check whether
+xas_error() to retrieve the error.  All operations check whether
 the xa_state is in an error state before proceeding, so there's no need
 for you to check for an error after each call; you can make multiple
 calls in succession and only check at a convenient point.  The only
 errors currently generated by the XArray code itself are ``ENOMEM`` and
 ``EINVAL``, but it supports arbitrary errors in case you want to call
-:c:func:`xas_set_err` yourself.
+xas_set_err() yourself.
 
-If the xa_state is holding an ``ENOMEM`` error, calling :c:func:`xas_nomem`
+If the xa_state is holding an ``ENOMEM`` error, calling xas_nomem()
 will attempt to allocate more memory using the specified gfp flags and
 cache it in the xa_state for the next attempt.  The idea is that you take
 the xa_lock, attempt the operation and drop the lock.  The operation
 attempts to allocate memory while holding the lock, but it is more
-likely to fail.  Once you have dropped the lock, :c:func:`xas_nomem`
+likely to fail.  Once you have dropped the lock, xas_nomem()
 can try harder to allocate more memory.  It will return ``true`` if it
 is worth retrying the operation (i.e. that there was a memory error *and*
 more memory was allocated).  If it has previously allocated memory, and
@@ -333,7 +333,7 @@ Internal Entries
 The XArray reserves some entries for its own purposes.  These are never
 exposed through the normal API, but when using the advanced API, it's
 possible to see them.  Usually the best way to handle them is to pass them
-to :c:func:`xas_retry`, and retry the operation if it returns ``true``.
+to xas_retry(), and retry the operation if it returns ``true``.
 
 .. flat-table::
    :widths: 1 1 6
@@ -343,89 +343,89 @@ to :c:func:`xas_retry`, and retry the operation if it returns ``true``.
      - Usage
 
    * - Node
-     - :c:func:`xa_is_node`
+     - xa_is_node()
      - An XArray node.  May be visible when using a multi-index xa_state.
 
    * - Sibling
-     - :c:func:`xa_is_sibling`
+     - xa_is_sibling()
      - A non-canonical entry for a multi-index entry.  The value indicates
        which slot in this node has the canonical entry.
 
    * - Retry
-     - :c:func:`xa_is_retry`
+     - xa_is_retry()
      - This entry is currently being modified by a thread which has the
        xa_lock.  The node containing this entry may be freed at the end
        of this RCU period.  You should restart the lookup from the head
        of the array.
 
    * - Zero
-     - :c:func:`xa_is_zero`
+     - xa_is_zero()
      - Zero entries appear as ``NULL`` through the Normal API, but occupy
        an entry in the XArray which can be used to reserve the index for
        future use.  This is used by allocating XArrays for allocated entries
        which are ``NULL``.
 
 Other internal entries may be added in the future.  As far as possible, they
-will be handled by :c:func:`xas_retry`.
+will be handled by xas_retry().
 
 Additional functionality
 ------------------------
 
-The :c:func:`xas_create_range` function allocates all the necessary memory
+The xas_create_range() function allocates all the necessary memory
 to store every entry in a range.  It will set ENOMEM in the xa_state if
 it cannot allocate memory.
 
-You can use :c:func:`xas_init_marks` to reset the marks on an entry
+You can use xas_init_marks() to reset the marks on an entry
 to their default state.  This is usually all marks clear, unless the
 XArray is marked with ``XA_FLAGS_TRACK_FREE``, in which case mark 0 is set
 and all other marks are clear.  Replacing one entry with another using
-:c:func:`xas_store` will not reset the marks on that entry; if you want
+xas_store() will not reset the marks on that entry; if you want
 the marks reset, you should do that explicitly.
 
-The :c:func:`xas_load` will walk the xa_state as close to the entry
+The xas_load() will walk the xa_state as close to the entry
 as it can.  If you know the xa_state has already been walked to the
 entry and need to check that the entry hasn't changed, you can use
-:c:func:`xas_reload` to save a function call.
+xas_reload() to save a function call.
 
 If you need to move to a different index in the XArray, call
-:c:func:`xas_set`.  This resets the cursor to the top of the tree, which
+xas_set().  This resets the cursor to the top of the tree, which
 will generally make the next operation walk the cursor to the desired
 spot in the tree.  If you want to move to the next or previous index,
-call :c:func:`xas_next` or :c:func:`xas_prev`.  Setting the index does
+call xas_next() or xas_prev().  Setting the index does
 not walk the cursor around the array so does not require a lock to be
 held, while moving to the next or previous index does.
 
-You can search for the next present entry using :c:func:`xas_find`.  This
-is the equivalent of both :c:func:`xa_find` and :c:func:`xa_find_after`;
+You can search for the next present entry using xas_find().  This
+is the equivalent of both xa_find() and xa_find_after();
 if the cursor has been walked to an entry, then it will find the next
 entry after the one currently referenced.  If not, it will return the
-entry at the index of the xa_state.  Using :c:func:`xas_next_entry` to
-move to the next present entry instead of :c:func:`xas_find` will save
+entry at the index of the xa_state.  Using xas_next_entry() to
+move to the next present entry instead of xas_find() will save
 a function call in the majority of cases at the expense of emitting more
 inline code.
 
-The :c:func:`xas_find_marked` function is similar.  If the xa_state has
+The xas_find_marked() function is similar.  If the xa_state has
 not been walked, it will return the entry at the index of the xa_state,
 if it is marked.  Otherwise, it will return the first marked entry after
-the entry referenced by the xa_state.  The :c:func:`xas_next_marked`
-function is the equivalent of :c:func:`xas_next_entry`.
+the entry referenced by the xa_state.  The xas_next_marked()
+function is the equivalent of xas_next_entry().
 
-When iterating over a range of the XArray using :c:func:`xas_for_each`
-or :c:func:`xas_for_each_marked`, it may be necessary to temporarily stop
-the iteration.  The :c:func:`xas_pause` function exists for this purpose.
+When iterating over a range of the XArray using xas_for_each()
+or xas_for_each_marked(), it may be necessary to temporarily stop
+the iteration.  The xas_pause() function exists for this purpose.
 After you have done the necessary work and wish to resume, the xa_state
 is in an appropriate state to continue the iteration after the entry
 you last processed.  If you have interrupts disabled while iterating,
 then it is good manners to pause the iteration and reenable interrupts
 every ``XA_CHECK_SCHED`` entries.
 
-The :c:func:`xas_get_mark`, :c:func:`xas_set_mark` and
-:c:func:`xas_clear_mark` functions require the xa_state cursor to have
+The xas_get_mark(), xas_set_mark() and
+xas_clear_mark() functions require the xa_state cursor to have
 been moved to the appropriate location in the xarray; they will do
-nothing if you have called :c:func:`xas_pause` or :c:func:`xas_set`
+nothing if you have called xas_pause() or xas_set()
 immediately before.
 
-You can call :c:func:`xas_set_update` to have a callback function
+You can call xas_set_update() to have a callback function
 called each time the XArray updates a node.  This is used by the page
 cache workingset code to maintain its list of nodes which contain only
 shadow entries.
@@ -443,25 +443,25 @@ eg indices 64-127 may be tied together, but 2-6 may not be.  This may
 save substantial quantities of memory; for example tying 512 entries
 together will save over 4kB.
 
-You can create a multi-index entry by using :c:func:`XA_STATE_ORDER`
-or :c:func:`xas_set_order` followed by a call to :c:func:`xas_store`.
-Calling :c:func:`xas_load` with a multi-index xa_state will walk the
+You can create a multi-index entry by using XA_STATE_ORDER()
+or xas_set_order() followed by a call to xas_store().
+Calling xas_load() with a multi-index xa_state will walk the
 xa_state to the right location in the tree, but the return value is not
 meaningful, potentially being an internal entry or ``NULL`` even when there
-is an entry stored within the range.  Calling :c:func:`xas_find_conflict`
+is an entry stored within the range.  Calling xas_find_conflict()
 will return the first entry within the range or ``NULL`` if there are no
-entries in the range.  The :c:func:`xas_for_each_conflict` iterator will
+entries in the range.  The xas_for_each_conflict() iterator will
 iterate over every entry which overlaps the specified range.
 
-If :c:func:`xas_load` encounters a multi-index entry, the xa_index
+If xas_load() encounters a multi-index entry, the xa_index
 in the xa_state will not be changed.  When iterating over an XArray
-or calling :c:func:`xas_find`, if the initial index is in the middle
+or calling xas_find(), if the initial index is in the middle
 of a multi-index entry, it will not be altered.  Subsequent calls
 or iterations will move the index to the first index in the range.
 Each entry will only be returned once, no matter how many indices it
 occupies.
 
-Using :c:func:`xas_next` or :c:func:`xas_prev` with a multi-index xa_state
+Using xas_next() or xas_prev() with a multi-index xa_state
 is not supported.  Using either of these functions on a multi-index entry
 will reveal sibling entries; these should be skipped over by the caller.
 
-- 
cgit 


From d9d7c0c497b8e2ffd9fe26cc96a49ed2d69d8b75 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Wed, 26 Jun 2019 11:20:21 -0600
Subject: docs: Note that :c:func: should no longer be used

Now that we can mark up function() automatically, there is no reason to use
:c:func: and every reason to avoid it.  Adjust the documentation to reflect
that fact.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/doc-guide/sphinx.rst | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst
index e60a56640c63..f71ddd592aaa 100644
--- a/Documentation/doc-guide/sphinx.rst
+++ b/Documentation/doc-guide/sphinx.rst
@@ -241,11 +241,14 @@ The C domain of the kernel-doc has some additional features. E.g. you can
 
 The func-name (e.g. ioctl) remains in the output but the ref-name changed from
 ``ioctl`` to ``VIDIOC_LOG_STATUS``. The index entry for this function is also
-changed to ``VIDIOC_LOG_STATUS`` and the function can now referenced by:
-
-.. code-block:: rst
-
-     :c:func:`VIDIOC_LOG_STATUS`
+changed to ``VIDIOC_LOG_STATUS``.
+
+Please note that there is no need to use ``c:func:`` to generate cross
+references to function documentation.  Due to some Sphinx extension magic,
+the documentation build system will automatically turn a reference to
+``function()`` into a cross reference if an index entry for the given
+function name exists.  If you see ``c:func:`` use in a kernel document,
+please feel free to remove it.
 
 
 list tables
-- 
cgit 


From 3cdb0157884344c7b40998f31ce43c4ded593cdd Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul.walmsley@sifive.com>
Date: Wed, 26 Jun 2019 08:19:29 -0700
Subject: dt-bindings: riscv: resolve 'make dt_binding_check' warnings

Rob pointed out that one of the examples in the RISC-V 'cpus' YAML
schema results in warnings from 'make dt_binding_check'.  Fix these.

While here, make the whitespace in the second example consistent
with the first example.

Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Rob Herring <robh@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org> # for fixing the dtc warnings
---
 Documentation/devicetree/bindings/riscv/cpus.yaml | 26 ++++++++++++-----------
 1 file changed, 14 insertions(+), 12 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml
index 27f02ec4bb45..f97a4ecd7b91 100644
--- a/Documentation/devicetree/bindings/riscv/cpus.yaml
+++ b/Documentation/devicetree/bindings/riscv/cpus.yaml
@@ -152,17 +152,19 @@ examples:
   - |
     // Example 2: Spike ISA Simulator with 1 Hart
     cpus {
-            cpu@0 {
-                    device_type = "cpu";
-                    reg = <0>;
-                    compatible = "riscv";
-                    riscv,isa = "rv64imafdc";
-                    mmu-type = "riscv,sv48";
-                    interrupt-controller {
-                            #interrupt-cells = <1>;
-                            interrupt-controller;
-                            compatible = "riscv,cpu-intc";
-                    };
-            };
+        #address-cells = <1>;
+        #size-cells = <0>;
+        cpu@0 {
+                device_type = "cpu";
+                reg = <0>;
+                compatible = "riscv";
+                riscv,isa = "rv64imafdc";
+                mmu-type = "riscv,sv48";
+                interrupt-controller {
+                        #interrupt-cells = <1>;
+                        interrupt-controller;
+                        compatible = "riscv,cpu-intc";
+                };
+        };
     };
 ...
-- 
cgit 


From 163ede97a9a29604c3a8afbf22ae0599f5148621 Mon Sep 17 00:00:00 2001
From: Puranjay Mohan <puranjay12@gmail.com>
Date: Fri, 21 Jun 2019 00:08:27 +0530
Subject: Documentation: platform: Delete x86-laptop-drivers.txt

The list of laptops supported by drivers in PDx86 subsystem is quite
big and growing. x86-laptop-drivers.txt contains details of very few
laptop models. Remove it because it does not  serve any purpose.

Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/platform/x86-laptop-drivers.txt | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 Documentation/platform/x86-laptop-drivers.txt

(limited to 'Documentation')

diff --git a/Documentation/platform/x86-laptop-drivers.txt b/Documentation/platform/x86-laptop-drivers.txt
deleted file mode 100644
index 01facd2590bb..000000000000
--- a/Documentation/platform/x86-laptop-drivers.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-compal-laptop
-=============
-List of supported hardware:
-
-by Compal:
-	Compal FL90/IFL90
-	Compal FL91/IFL91
-	Compal FL92/JFL92
-	Compal FT00/IFT00
-
-by Dell:
-	Dell Vostro 1200
-	Dell Mini 9 (Inspiron 910)
-	Dell Mini 10 (Inspiron 1010)
-	Dell Mini 10v (Inspiron 1011)
-	Dell Mini 1012 (Inspiron 1012)
-	Dell Inspiron 11z (Inspiron 1110)
-	Dell Mini 12 (Inspiron 1210)
-- 
cgit 


From 6e88559470f581741bcd0f2794f9054814ac9740 Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Thu, 20 Jun 2019 16:10:50 -0700
Subject: Documentation: Add section about CPU vulnerabilities for Spectre

Add documentation for Spectre vulnerability and the mitigation mechanisms:

- Explain the problem and risks
- Document the mitigation mechanisms
- Document the command line controls
- Document the sysfs files

Co-developed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Co-developed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/hw-vuln/index.rst   |   1 +
 Documentation/admin-guide/hw-vuln/spectre.rst | 697 ++++++++++++++++++++++++++
 Documentation/userspace-api/spec_ctrl.rst     |   2 +
 3 files changed, 700 insertions(+)
 create mode 100644 Documentation/admin-guide/hw-vuln/spectre.rst

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index ffc064c1ec68..49311f3da6f2 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -9,5 +9,6 @@ are configurable at compile, boot or run time.
 .. toctree::
    :maxdepth: 1
 
+   spectre
    l1tf
    mds
diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
new file mode 100644
index 000000000000..25f3b2532198
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -0,0 +1,697 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Spectre Side Channels
+=====================
+
+Spectre is a class of side channel attacks that exploit branch prediction
+and speculative execution on modern CPUs to read memory, possibly
+bypassing access controls. Speculative execution side channel exploits
+do not modify memory but attempt to infer privileged data in the memory.
+
+This document covers Spectre variant 1 and Spectre variant 2.
+
+Affected processors
+-------------------
+
+Speculative execution side channel methods affect a wide range of modern
+high performance processors, since most modern high speed processors
+use branch prediction and speculative execution.
+
+The following CPUs are vulnerable:
+
+    - Intel Core, Atom, Pentium, and Xeon processors
+
+    - AMD Phenom, EPYC, and Zen processors
+
+    - IBM POWER and zSeries processors
+
+    - Higher end ARM processors
+
+    - Apple CPUs
+
+    - Higher end MIPS CPUs
+
+    - Likely most other high performance CPUs. Contact your CPU vendor for details.
+
+Whether a processor is affected or not can be read out from the Spectre
+vulnerability files in sysfs. See :ref:`spectre_sys_info`.
+
+Related CVEs
+------------
+
+The following CVE entries describe Spectre variants:
+
+   =============   =======================  =================
+   CVE-2017-5753   Bounds check bypass      Spectre variant 1
+   CVE-2017-5715   Branch target injection  Spectre variant 2
+   =============   =======================  =================
+
+Problem
+-------
+
+CPUs use speculative operations to improve performance. That may leave
+traces of memory accesses or computations in the processor's caches,
+buffers, and branch predictors. Malicious software may be able to
+influence the speculative execution paths, and then use the side effects
+of the speculative execution in the CPUs' caches and buffers to infer
+privileged data touched during the speculative execution.
+
+Spectre variant 1 attacks take advantage of speculative execution of
+conditional branches, while Spectre variant 2 attacks use speculative
+execution of indirect branches to leak privileged memory.
+See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[7] <spec_ref7>`
+:ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
+
+Spectre variant 1 (Bounds Check Bypass)
+---------------------------------------
+
+The bounds check bypass attack :ref:`[2] <spec_ref2>` takes advantage
+of speculative execution that bypasses conditional branch instructions
+used for memory access bounds check (e.g. checking if the index of an
+array results in memory access within a valid range). This results in
+memory accesses to invalid memory (with out-of-bound index) that are
+done speculatively before validation checks resolve. Such speculative
+memory accesses can leave side effects, creating side channels which
+leak information to the attacker.
+
+There are some extensions of Spectre variant 1 attacks for reading data
+over the network, see :ref:`[12] <spec_ref12>`. However such attacks
+are difficult, low bandwidth, fragile, and are considered low risk.
+
+Spectre variant 2 (Branch Target Injection)
+-------------------------------------------
+
+The branch target injection attack takes advantage of speculative
+execution of indirect branches :ref:`[3] <spec_ref3>`.  The indirect
+branch predictors inside the processor used to guess the target of
+indirect branches can be influenced by an attacker, causing gadget code
+to be speculatively executed, thus exposing sensitive data touched by
+the victim. The side effects left in the CPU's caches during speculative
+execution can be measured to infer data values.
+
+.. _poison_btb:
+
+In Spectre variant 2 attacks, the attacker can steer speculative indirect
+branches in the victim to gadget code by poisoning the branch target
+buffer of a CPU used for predicting indirect branch addresses. Such
+poisoning could be done by indirect branching into existing code,
+with the address offset of the indirect branch under the attacker's
+control. Since the branch prediction on impacted hardware does not
+fully disambiguate branch address and uses the offset for prediction,
+this could cause privileged code's indirect branch to jump to a gadget
+code with the same offset.
+
+The most useful gadgets take an attacker-controlled input parameter (such
+as a register value) so that the memory read can be controlled. Gadgets
+without input parameters might be possible, but the attacker would have
+very little control over what memory can be read, reducing the risk of
+the attack revealing useful data.
+
+One other variant 2 attack vector is for the attacker to poison the
+return stack buffer (RSB) :ref:`[13] <spec_ref13>` to cause speculative
+subroutine return instruction execution to go to a gadget.  An attacker's
+imbalanced subroutine call instructions might "poison" entries in the
+return stack buffer which are later consumed by a victim's subroutine
+return instructions.  This attack can be mitigated by flushing the return
+stack buffer on context switch, or virtual machine (VM) exit.
+
+On systems with simultaneous multi-threading (SMT), attacks are possible
+from the sibling thread, as level 1 cache and branch target buffer
+(BTB) may be shared between hardware threads in a CPU core.  A malicious
+program running on the sibling thread may influence its peer's BTB to
+steer its indirect branch speculations to gadget code, and measure the
+speculative execution's side effects left in level 1 cache to infer the
+victim's data.
+
+Attack scenarios
+----------------
+
+The following list of attack scenarios have been anticipated, but may
+not cover all possible attack vectors.
+
+1. A user process attacking the kernel
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The attacker passes a parameter to the kernel via a register or
+   via a known address in memory during a syscall. Such parameter may
+   be used later by the kernel as an index to an array or to derive
+   a pointer for a Spectre variant 1 attack.  The index or pointer
+   is invalid, but bound checks are bypassed in the code branch taken
+   for speculative execution. This could cause privileged memory to be
+   accessed and leaked.
+
+   For kernel code that has been identified where data pointers could
+   potentially be influenced for Spectre attacks, new "nospec" accessor
+   macros are used to prevent speculative loading of data.
+
+   Spectre variant 2 attacker can :ref:`poison <poison_btb>` the branch
+   target buffer (BTB) before issuing syscall to launch an attack.
+   After entering the kernel, the kernel could use the poisoned branch
+   target buffer on indirect jump and jump to gadget code in speculative
+   execution.
+
+   If an attacker tries to control the memory addresses leaked during
+   speculative execution, he would also need to pass a parameter to the
+   gadget, either through a register or a known address in memory. After
+   the gadget has executed, he can measure the side effect.
+
+   The kernel can protect itself against consuming poisoned branch
+   target buffer entries by using return trampolines (also known as
+   "retpoline") :ref:`[3] <spec_ref3>` :ref:`[9] <spec_ref9>` for all
+   indirect branches. Return trampolines trap speculative execution paths
+   to prevent jumping to gadget code during speculative execution.
+   x86 CPUs with Enhanced Indirect Branch Restricted Speculation
+   (Enhanced IBRS) available in hardware should use the feature to
+   mitigate Spectre variant 2 instead of retpoline. Enhanced IBRS is
+   more efficient than retpoline.
+
+   There may be gadget code in firmware which could be exploited with
+   Spectre variant 2 attack by a rogue user process. To mitigate such
+   attacks on x86, Indirect Branch Restricted Speculation (IBRS) feature
+   is turned on before the kernel invokes any firmware code.
+
+2. A user process attacking another user process
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   A malicious user process can try to attack another user process,
+   either via a context switch on the same hardware thread, or from the
+   sibling hyperthread sharing a physical processor core on simultaneous
+   multi-threading (SMT) system.
+
+   Spectre variant 1 attacks generally require passing parameters
+   between the processes, which needs a data passing relationship, such
+   as remote procedure calls (RPC).  Those parameters are used in gadget
+   code to derive invalid data pointers accessing privileged memory in
+   the attacked process.
+
+   Spectre variant 2 attacks can be launched from a rogue process by
+   :ref:`poisoning <poison_btb>` the branch target buffer.  This can
+   influence the indirect branch targets for a victim process that either
+   runs later on the same hardware thread, or running concurrently on
+   a sibling hardware thread sharing the same physical core.
+
+   A user process can protect itself against Spectre variant 2 attacks
+   by using the prctl() syscall to disable indirect branch speculation
+   for itself.  An administrator can also cordon off an unsafe process
+   from polluting the branch target buffer by disabling the process's
+   indirect branch speculation. This comes with a performance cost
+   from not using indirect branch speculation and clearing the branch
+   target buffer.  When SMT is enabled on x86, for a process that has
+   indirect branch speculation disabled, Single Threaded Indirect Branch
+   Predictors (STIBP) :ref:`[4] <spec_ref4>` are turned on to prevent the
+   sibling thread from controlling branch target buffer.  In addition,
+   the Indirect Branch Prediction Barrier (IBPB) is issued to clear the
+   branch target buffer when context switching to and from such process.
+
+   On x86, the return stack buffer is stuffed on context switch.
+   This prevents the branch target buffer from being used for branch
+   prediction when the return stack buffer underflows while switching to
+   a deeper call stack. Any poisoned entries in the return stack buffer
+   left by the previous process will also be cleared.
+
+   User programs should use address space randomization to make attacks
+   more difficult (Set /proc/sys/kernel/randomize_va_space = 1 or 2).
+
+3. A virtualized guest attacking the host
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The attack mechanism is similar to how user processes attack the
+   kernel.  The kernel is entered via hyper-calls or other virtualization
+   exit paths.
+
+   For Spectre variant 1 attacks, rogue guests can pass parameters
+   (e.g. in registers) via hyper-calls to derive invalid pointers to
+   speculate into privileged memory after entering the kernel.  For places
+   where such kernel code has been identified, nospec accessor macros
+   are used to stop speculative memory access.
+
+   For Spectre variant 2 attacks, rogue guests can :ref:`poison
+   <poison_btb>` the branch target buffer or return stack buffer, causing
+   the kernel to jump to gadget code in the speculative execution paths.
+
+   To mitigate variant 2, the host kernel can use return trampolines
+   for indirect branches to bypass the poisoned branch target buffer,
+   and flushing the return stack buffer on VM exit.  This prevents rogue
+   guests from affecting indirect branching in the host kernel.
+
+   To protect host processes from rogue guests, host processes can have
+   indirect branch speculation disabled via prctl().  The branch target
+   buffer is cleared before context switching to such processes.
+
+4. A virtualized guest attacking other guest
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   A rogue guest may attack another guest to get data accessible by the
+   other guest.
+
+   Spectre variant 1 attacks are possible if parameters can be passed
+   between guests.  This may be done via mechanisms such as shared memory
+   or message passing.  Such parameters could be used to derive data
+   pointers to privileged data in guest.  The privileged data could be
+   accessed by gadget code in the victim's speculation paths.
+
+   Spectre variant 2 attacks can be launched from a rogue guest by
+   :ref:`poisoning <poison_btb>` the branch target buffer or the return
+   stack buffer. Such poisoned entries could be used to influence
+   speculation execution paths in the victim guest.
+
+   Linux kernel mitigates attacks to other guests running in the same
+   CPU hardware thread by flushing the return stack buffer on VM exit,
+   and clearing the branch target buffer before switching to a new guest.
+
+   If SMT is used, Spectre variant 2 attacks from an untrusted guest
+   in the sibling hyperthread can be mitigated by the administrator,
+   by turning off the unsafe guest's indirect branch speculation via
+   prctl().  A guest can also protect itself by turning on microcode
+   based mitigations (such as IBPB or STIBP on x86) within the guest.
+
+.. _spectre_sys_info:
+
+Spectre system information
+--------------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current
+mitigation status of the system for Spectre: whether the system is
+vulnerable, and which mitigations are active.
+
+The sysfs file showing Spectre variant 1 mitigation status is:
+
+   /sys/devices/system/cpu/vulnerabilities/spectre_v1
+
+The possible values in this file are:
+
+  =======================================  =================================
+  'Mitigation: __user pointer sanitation'  Protection in kernel on a case by
+                                           case base with explicit pointer
+                                           sanitation.
+  =======================================  =================================
+
+However, the protections are put in place on a case by case basis,
+and there is no guarantee that all possible attack vectors for Spectre
+variant 1 are covered.
+
+The spectre_v2 kernel file reports if the kernel has been compiled with
+retpoline mitigation or if the CPU has hardware mitigation, and if the
+CPU has support for additional process-specific mitigation.
+
+This file also reports CPU features enabled by microcode to mitigate
+attack between user processes:
+
+1. Indirect Branch Prediction Barrier (IBPB) to add additional
+   isolation between processes of different users.
+2. Single Thread Indirect Branch Predictors (STIBP) to add additional
+   isolation between CPU threads running on the same core.
+
+These CPU features may impact performance when used and can be enabled
+per process on a case-by-case base.
+
+The sysfs file showing Spectre variant 2 mitigation status is:
+
+   /sys/devices/system/cpu/vulnerabilities/spectre_v2
+
+The possible values in this file are:
+
+  - Kernel status:
+
+  ====================================  =================================
+  'Not affected'                        The processor is not vulnerable
+  'Vulnerable'                          Vulnerable, no mitigation
+  'Mitigation: Full generic retpoline'  Software-focused mitigation
+  'Mitigation: Full AMD retpoline'      AMD-specific software mitigation
+  'Mitigation: Enhanced IBRS'           Hardware-focused mitigation
+  ====================================  =================================
+
+  - Firmware status: Show if Indirect Branch Restricted Speculation (IBRS) is
+    used to protect against Spectre variant 2 attacks when calling firmware (x86 only).
+
+  ========== =============================================================
+  'IBRS_FW'  Protection against user program attacks when calling firmware
+  ========== =============================================================
+
+  - Indirect branch prediction barrier (IBPB) status for protection between
+    processes of different users. This feature can be controlled through
+    prctl() per process, or through kernel command line options. This is
+    an x86 only feature. For more details see below.
+
+  ===================   ========================================================
+  'IBPB: disabled'      IBPB unused
+  'IBPB: always-on'     Use IBPB on all tasks
+  'IBPB: conditional'   Use IBPB on SECCOMP or indirect branch restricted tasks
+  ===================   ========================================================
+
+  - Single threaded indirect branch prediction (STIBP) status for protection
+    between different hyper threads. This feature can be controlled through
+    prctl per process, or through kernel command line options. This is x86
+    only feature. For more details see below.
+
+  ====================  ========================================================
+  'STIBP: disabled'     STIBP unused
+  'STIBP: forced'       Use STIBP on all tasks
+  'STIBP: conditional'  Use STIBP on SECCOMP or indirect branch restricted tasks
+  ====================  ========================================================
+
+  - Return stack buffer (RSB) protection status:
+
+  =============   ===========================================
+  'RSB filling'   Protection of RSB on context switch enabled
+  =============   ===========================================
+
+Full mitigation might require a microcode update from the CPU
+vendor. When the necessary microcode is not available, the kernel will
+report vulnerability.
+
+Turning on mitigation for Spectre variant 1 and Spectre variant 2
+-----------------------------------------------------------------
+
+1. Kernel mitigation
+^^^^^^^^^^^^^^^^^^^^
+
+   For the Spectre variant 1, vulnerable kernel code (as determined
+   by code audit or scanning tools) is annotated on a case by case
+   basis to use nospec accessor macros for bounds clipping :ref:`[2]
+   <spec_ref2>` to avoid any usable disclosure gadgets. However, it may
+   not cover all attack vectors for Spectre variant 1.
+
+   For Spectre variant 2 mitigation, the compiler turns indirect calls or
+   jumps in the kernel into equivalent return trampolines (retpolines)
+   :ref:`[3] <spec_ref3>` :ref:`[9] <spec_ref9>` to go to the target
+   addresses.  Speculative execution paths under retpolines are trapped
+   in an infinite loop to prevent any speculative execution jumping to
+   a gadget.
+
+   To turn on retpoline mitigation on a vulnerable CPU, the kernel
+   needs to be compiled with a gcc compiler that supports the
+   -mindirect-branch=thunk-extern -mindirect-branch-register options.
+   If the kernel is compiled with a Clang compiler, the compiler needs
+   to support -mretpoline-external-thunk option.  The kernel config
+   CONFIG_RETPOLINE needs to be turned on, and the CPU needs to run with
+   the latest updated microcode.
+
+   On Intel Skylake-era systems the mitigation covers most, but not all,
+   cases. See :ref:`[3] <spec_ref3>` for more details.
+
+   On CPUs with hardware mitigation for Spectre variant 2 (e.g. Enhanced
+   IBRS on x86), retpoline is automatically disabled at run time.
+
+   The retpoline mitigation is turned on by default on vulnerable
+   CPUs. It can be forced on or off by the administrator
+   via the kernel command line and sysfs control files. See
+   :ref:`spectre_mitigation_control_command_line`.
+
+   On x86, indirect branch restricted speculation is turned on by default
+   before invoking any firmware code to prevent Spectre variant 2 exploits
+   using the firmware.
+
+   Using kernel address space randomization (CONFIG_RANDOMIZE_SLAB=y
+   and CONFIG_SLAB_FREELIST_RANDOM=y in the kernel configuration) makes
+   attacks on the kernel generally more difficult.
+
+2. User program mitigation
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   User programs can mitigate Spectre variant 1 using LFENCE or "bounds
+   clipping". For more details see :ref:`[2] <spec_ref2>`.
+
+   For Spectre variant 2 mitigation, individual user programs
+   can be compiled with return trampolines for indirect branches.
+   This protects them from consuming poisoned entries in the branch
+   target buffer left by malicious software.  Alternatively, the
+   programs can disable their indirect branch speculation via prctl()
+   (See :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
+   On x86, this will turn on STIBP to guard against attacks from the
+   sibling thread when the user program is running, and use IBPB to
+   flush the branch target buffer when switching to/from the program.
+
+   Restricting indirect branch speculation on a user program will
+   also prevent the program from launching a variant 2 attack
+   on x86.  All sand-boxed SECCOMP programs have indirect branch
+   speculation restricted by default.  Administrators can change
+   that behavior via the kernel command line and sysfs control files.
+   See :ref:`spectre_mitigation_control_command_line`.
+
+   Programs that disable their indirect branch speculation will have
+   more overhead and run slower.
+
+   User programs should use address space randomization
+   (/proc/sys/kernel/randomize_va_space = 1 or 2) to make attacks more
+   difficult.
+
+3. VM mitigation
+^^^^^^^^^^^^^^^^
+
+   Within the kernel, Spectre variant 1 attacks from rogue guests are
+   mitigated on a case by case basis in VM exit paths. Vulnerable code
+   uses nospec accessor macros for "bounds clipping", to avoid any
+   usable disclosure gadgets.  However, this may not cover all variant
+   1 attack vectors.
+
+   For Spectre variant 2 attacks from rogue guests to the kernel, the
+   Linux kernel uses retpoline or Enhanced IBRS to prevent consumption of
+   poisoned entries in branch target buffer left by rogue guests.  It also
+   flushes the return stack buffer on every VM exit to prevent a return
+   stack buffer underflow so poisoned branch target buffer could be used,
+   or attacker guests leaving poisoned entries in the return stack buffer.
+
+   To mitigate guest-to-guest attacks in the same CPU hardware thread,
+   the branch target buffer is sanitized by flushing before switching
+   to a new guest on a CPU.
+
+   The above mitigations are turned on by default on vulnerable CPUs.
+
+   To mitigate guest-to-guest attacks from sibling thread when SMT is
+   in use, an untrusted guest running in the sibling thread can have
+   its indirect branch speculation disabled by administrator via prctl().
+
+   The kernel also allows guests to use any microcode based mitigation
+   they choose to use (such as IBPB or STIBP on x86) to protect themselves.
+
+.. _spectre_mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+Spectre variant 2 mitigation can be disabled or force enabled at the
+kernel command line.
+
+	nospectre_v2
+
+		[X86] Disable all mitigations for the Spectre variant 2
+		(indirect branch prediction) vulnerability. System may
+		allow data leaks with this option, which is equivalent
+		to spectre_v2=off.
+
+
+        spectre_v2=
+
+		[X86] Control mitigation of Spectre variant 2
+		(indirect branch speculation) vulnerability.
+		The default operation protects the kernel from
+		user space attacks.
+
+		on
+			unconditionally enable, implies
+			spectre_v2_user=on
+		off
+			unconditionally disable, implies
+		        spectre_v2_user=off
+		auto
+			kernel detects whether your CPU model is
+		        vulnerable
+
+		Selecting 'on' will, and 'auto' may, choose a
+		mitigation method at run time according to the
+		CPU, the available microcode, the setting of the
+		CONFIG_RETPOLINE configuration option, and the
+		compiler with which the kernel was built.
+
+		Selecting 'on' will also enable the mitigation
+		against user space to user space task attacks.
+
+		Selecting 'off' will disable both the kernel and
+		the user space protections.
+
+		Specific mitigations can also be selected manually:
+
+		retpoline
+					replace indirect branches
+		retpoline,generic
+					google's original retpoline
+		retpoline,amd
+					AMD-specific minimal thunk
+
+		Not specifying this option is equivalent to
+		spectre_v2=auto.
+
+For user space mitigation:
+
+        spectre_v2_user=
+
+		[X86] Control mitigation of Spectre variant 2
+		(indirect branch speculation) vulnerability between
+		user space tasks
+
+		on
+			Unconditionally enable mitigations. Is
+			enforced by spectre_v2=on
+
+		off
+			Unconditionally disable mitigations. Is
+			enforced by spectre_v2=off
+
+		prctl
+			Indirect branch speculation is enabled,
+			but mitigation can be enabled via prctl
+			per thread. The mitigation control state
+			is inherited on fork.
+
+		prctl,ibpb
+			Like "prctl" above, but only STIBP is
+			controlled per thread. IBPB is issued
+			always when switching between different user
+			space processes.
+
+		seccomp
+			Same as "prctl" above, but all seccomp
+			threads will enable the mitigation unless
+			they explicitly opt out.
+
+		seccomp,ibpb
+			Like "seccomp" above, but only STIBP is
+			controlled per thread. IBPB is issued
+			always when switching between different
+			user space processes.
+
+		auto
+			Kernel selects the mitigation depending on
+			the available CPU features and vulnerability.
+
+		Default mitigation:
+		If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl"
+
+		Not specifying this option is equivalent to
+		spectre_v2_user=auto.
+
+		In general the kernel by default selects
+		reasonable mitigations for the current CPU. To
+		disable Spectre variant 2 mitigations, boot with
+		spectre_v2=off. Spectre variant 1 mitigations
+		cannot be disabled.
+
+Mitigation selection guide
+--------------------------
+
+1. Trusted userspace
+^^^^^^^^^^^^^^^^^^^^
+
+   If all userspace applications are from trusted sources and do not
+   execute externally supplied untrusted code, then the mitigations can
+   be disabled.
+
+2. Protect sensitive programs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   For security-sensitive programs that have secrets (e.g. crypto
+   keys), protection against Spectre variant 2 can be put in place by
+   disabling indirect branch speculation when the program is running
+   (See :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
+
+3. Sandbox untrusted programs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   Untrusted programs that could be a source of attacks can be cordoned
+   off by disabling their indirect branch speculation when they are run
+   (See :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
+   This prevents untrusted programs from polluting the branch target
+   buffer.  All programs running in SECCOMP sandboxes have indirect
+   branch speculation restricted by default. This behavior can be
+   changed via the kernel command line and sysfs control files. See
+   :ref:`spectre_mitigation_control_command_line`.
+
+3. High security mode
+^^^^^^^^^^^^^^^^^^^^^
+
+   All Spectre variant 2 mitigations can be forced on
+   at boot time for all programs (See the "on" option in
+   :ref:`spectre_mitigation_control_command_line`).  This will add
+   overhead as indirect branch speculations for all programs will be
+   restricted.
+
+   On x86, branch target buffer will be flushed with IBPB when switching
+   to a new program. STIBP is left on all the time to protect programs
+   against variant 2 attacks originating from programs running on
+   sibling threads.
+
+   Alternatively, STIBP can be used only when running programs
+   whose indirect branch speculation is explicitly disabled,
+   while IBPB is still used all the time when switching to a new
+   program to clear the branch target buffer (See "ibpb" option in
+   :ref:`spectre_mitigation_control_command_line`).  This "ibpb" option
+   has less performance cost than the "on" option, which leaves STIBP
+   on all the time.
+
+References on Spectre
+---------------------
+
+Intel white papers:
+
+.. _spec_ref1:
+
+[1] `Intel analysis of speculative execution side channels <https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/Intel-Analysis-of-Speculative-Execution-Side-Channels.pdf>`_.
+
+.. _spec_ref2:
+
+[2] `Bounds check bypass <https://software.intel.com/security-software-guidance/software-guidance/bounds-check-bypass>`_.
+
+.. _spec_ref3:
+
+[3] `Deep dive: Retpoline: A branch target injection mitigation <https://software.intel.com/security-software-guidance/insights/deep-dive-retpoline-branch-target-injection-mitigation>`_.
+
+.. _spec_ref4:
+
+[4] `Deep Dive: Single Thread Indirect Branch Predictors <https://software.intel.com/security-software-guidance/insights/deep-dive-single-thread-indirect-branch-predictors>`_.
+
+AMD white papers:
+
+.. _spec_ref5:
+
+[5] `AMD64 technology indirect branch control extension <https://developer.amd.com/wp-content/resources/Architecture_Guidelines_Update_Indirect_Branch_Control.pdf>`_.
+
+.. _spec_ref6:
+
+[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/90343-B_SoftwareTechniquesforManagingSpeculation_WP_7-18Update_FNL.pdf>`_.
+
+ARM white papers:
+
+.. _spec_ref7:
+
+[7] `Cache speculation side-channels <https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability/download-the-whitepaper>`_.
+
+.. _spec_ref8:
+
+[8] `Cache speculation issues update <https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability/latest-updates/cache-speculation-issues-update>`_.
+
+Google white paper:
+
+.. _spec_ref9:
+
+[9] `Retpoline: a software construct for preventing branch-target-injection <https://support.google.com/faqs/answer/7625886>`_.
+
+MIPS white paper:
+
+.. _spec_ref10:
+
+[10] `MIPS: response on speculative execution and side channel vulnerabilities <https://www.mips.com/blog/mips-response-on-speculative-execution-and-side-channel-vulnerabilities/>`_.
+
+Academic papers:
+
+.. _spec_ref11:
+
+[11] `Spectre Attacks: Exploiting Speculative Execution <https://spectreattack.com/spectre.pdf>`_.
+
+.. _spec_ref12:
+
+[12] `NetSpectre: Read Arbitrary Memory over Network <https://arxiv.org/abs/1807.10535>`_.
+
+.. _spec_ref13:
+
+[13] `Spectre Returns! Speculation Attacks using the Return Stack Buffer <https://www.usenix.org/system/files/conference/woot18/woot18-paper-koruyeh.pdf>`_.
diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
index 1129c7550a48..7ddd8f667459 100644
--- a/Documentation/userspace-api/spec_ctrl.rst
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -49,6 +49,8 @@ If PR_SPEC_PRCTL is set, then the per-task control of the mitigation is
 available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation
 misfeature will fail.
 
+.. _set_spec_ctrl:
+
 PR_SET_SPECULATION_CTRL
 -----------------------
 
-- 
cgit 


From b36d5cf75342e27bd1272971b69a3bbb42eae03b Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Tue, 21 May 2019 10:11:29 +0000
Subject: dt-bindings: clk: at91: add bindings for SAM9X60's slow clock
 controller

Add bindings for SAM9X60's slow clock controller.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 Documentation/devicetree/bindings/clock/at91-clock.txt | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/clock/at91-clock.txt b/Documentation/devicetree/bindings/clock/at91-clock.txt
index b520280e33ff..13f45db3b66d 100644
--- a/Documentation/devicetree/bindings/clock/at91-clock.txt
+++ b/Documentation/devicetree/bindings/clock/at91-clock.txt
@@ -9,10 +9,11 @@ Slow Clock controller:
 Required properties:
 - compatible : shall be one of the following:
 	"atmel,at91sam9x5-sckc",
-	"atmel,sama5d3-sckc" or
-	"atmel,sama5d4-sckc":
+	"atmel,sama5d3-sckc",
+	"atmel,sama5d4-sckc" or
+	"microchip,sam9x60-sckc":
 		at91 SCKC (Slow Clock Controller)
-- #clock-cells : shall be 0.
+- #clock-cells : shall be 1 for "microchip,sam9x60-sckc" otherwise shall be 0.
 - clocks : shall be the input parent clock phandle for the clock.
 
 Optional properties:
-- 
cgit 


From 3b8c4a08a471d56ecaaca939c972fdf5b8255629 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 19 Jun 2019 16:10:16 +0100
Subject: keys: Kill off request_key_async{,_with_auxdata}

Kill off request_key_async{,_with_auxdata}() as they're not currently used.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/security/keys/core.rst        | 32 -----------------------------
 Documentation/security/keys/request-key.rst | 23 ++-------------------
 2 files changed, 2 insertions(+), 53 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index 003f1452a5b7..a0e245f9576f 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -1115,38 +1115,6 @@ payload contents" for more information.
     is a blob of length callout_len, if given (the length may be 0).
 
 
- *  A key can be requested asynchronously by calling one of::
-
-	struct key *request_key_async(const struct key_type *type,
-				      const char *description,
-				      const void *callout_info,
-				      size_t callout_len);
-
-    or::
-
-	struct key *request_key_async_with_auxdata(const struct key_type *type,
-						   const char *description,
-						   const char *callout_info,
-					     	   size_t callout_len,
-					     	   void *aux);
-
-    which are asynchronous equivalents of request_key() and
-    request_key_with_auxdata() respectively.
-
-    These two functions return with the key potentially still under
-    construction.  To wait for construction completion, the following should be
-    called::
-
-	int wait_for_key_construction(struct key *key, bool intr);
-
-    The function will wait for the key to finish being constructed and then
-    invokes key_validate() to return an appropriate value to indicate the state
-    of the key (0 indicates the key is usable).
-
-    If intr is true, then the wait can be interrupted by a signal, in which
-    case error ERESTARTSYS will be returned.
-
-
  *  To search for a key under RCU conditions, call::
 
 	struct key *request_key_rcu(const struct key_type *type,
diff --git a/Documentation/security/keys/request-key.rst b/Documentation/security/keys/request-key.rst
index 45049abdf290..5a210baa583a 100644
--- a/Documentation/security/keys/request-key.rst
+++ b/Documentation/security/keys/request-key.rst
@@ -21,21 +21,6 @@ or::
 					     size_t callout_len,
 					     void *aux);
 
-or::
-
-	struct key *request_key_async(const struct key_type *type,
-				      const char *description,
-				      const char *callout_info,
-				      size_t callout_len);
-
-or::
-
-	struct key *request_key_async_with_auxdata(const struct key_type *type,
-						   const char *description,
-						   const char *callout_info,
-					     	   size_t callout_len,
-						   void *aux);
-
 or::
 
 	struct key *request_key_rcu(const struct key_type *type,
@@ -53,15 +38,11 @@ does not need to link the key to a keyring to prevent it from being immediately
 destroyed.  The kernel interface returns a pointer directly to the key, and
 it's up to the caller to destroy the key.
 
-The request_key*_with_auxdata() calls are like the in-kernel request_key*()
-calls, except that they permit auxiliary data to be passed to the upcaller (the
+The request_key_with_auxdata() calls is like the in-kernel request_key() call,
+except that they permit auxiliary data to be passed to the upcaller (the
 default is NULL).  This is only useful for those key types that define their
 own upcall mechanism rather than using /sbin/request-key.
 
-The two async in-kernel calls may return keys that are still in the process of
-being constructed.  The two non-async ones will wait for construction to
-complete first.
-
 The request_key_rcu() call is like the in-kernel request_key() call, except
 that it doesn't check for keys that are under construction and doesn't attempt
 to construct missing keys.
-- 
cgit 


From dcf49dbc8077e278ddd1bc7298abc781496e8a08 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 26 Jun 2019 21:02:32 +0100
Subject: keys: Add a 'recurse' flag for keyring searches

Add a 'recurse' flag for keyring searches so that the flag can be omitted
and recursion disabled, thereby allowing just the nominated keyring to be
searched and none of the children.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/security/keys/core.rst | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index a0e245f9576f..ae930ae9d590 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -1162,11 +1162,13 @@ payload contents" for more information.
 
 	key_ref_t keyring_search(key_ref_t keyring_ref,
 				 const struct key_type *type,
-				 const char *description)
+				 const char *description,
+				 bool recurse)
 
-    This searches the keyring tree specified for a matching key. Error ENOKEY
-    is returned upon failure (use IS_ERR/PTR_ERR to determine). If successful,
-    the returned key will need to be released.
+    This searches the specified keyring only (recurse == false) or keyring tree
+    (recurse == true) specified for a matching key. Error ENOKEY is returned
+    upon failure (use IS_ERR/PTR_ERR to determine). If successful, the returned
+    key will need to be released.
 
     The possession attribute from the keyring reference is used to control
     access through the permissions mask and is propagated to the returned key
-- 
cgit 


From 26ae15e62d3c85cc6f221fa1830fc9350cb2152f Mon Sep 17 00:00:00 2001
From: Mircea Caprioru <mircea.caprioru@analog.com>
Date: Wed, 26 Jun 2019 20:31:05 +0100
Subject: Convert AD7124 bindings documentation to YAML format.

Signed-off-by: Mircea Caprioru <mircea.caprioru@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/adc/adi,ad7124.txt     |  75 -----------
 .../devicetree/bindings/iio/adc/adi,ad7124.yaml    | 144 +++++++++++++++++++++
 2 files changed, 144 insertions(+), 75 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/iio/adc/adi,ad7124.txt
 create mode 100644 Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7124.txt b/Documentation/devicetree/bindings/iio/adc/adi,ad7124.txt
deleted file mode 100644
index 416273dce569..000000000000
--- a/Documentation/devicetree/bindings/iio/adc/adi,ad7124.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-Analog Devices AD7124 ADC device driver
-
-Required properties for the AD7124:
-	- compatible: Must be one of "adi,ad7124-4" or "adi,ad7124-8"
-	- reg: SPI chip select number for the device
-	- spi-max-frequency: Max SPI frequency to use
-		see: Documentation/devicetree/bindings/spi/spi-bus.txt
-	- clocks: phandle to the master clock (mclk)
-		see: Documentation/devicetree/bindings/clock/clock-bindings.txt
-	- clock-names: Must be "mclk".
-	- interrupts: IRQ line for the ADC
-		see: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
-
-	  Required properties:
-		* #address-cells: Must be 1.
-		* #size-cells: Must be 0.
-
-	  Subnode(s) represent the external channels which are connected to the ADC.
-	  Each subnode represents one channel and has the following properties:
-		Required properties:
-			* reg: The channel number. It can have up to 4 channels on ad7124-4
-			  and 8 channels on ad7124-8, numbered from 0 to 15.
-			* diff-channels: see: Documentation/devicetree/bindings/iio/adc/adc.txt
-
-		Optional properties:
-			* bipolar: see: Documentation/devicetree/bindings/iio/adc/adc.txt
-			* adi,reference-select: Select the reference source to use when
-			  converting on the the specific channel. Valid values are:
-			  0: REFIN1(+)/REFIN1(−).
-			  1: REFIN2(+)/REFIN2(−).
-			  3: AVDD
-			  If this field is left empty, internal reference is selected.
-
-Optional properties:
-	- refin1-supply: refin1 supply can be used as reference for conversion.
-	- refin2-supply: refin2 supply can be used as reference for conversion.
-	- avdd-supply: avdd supply can be used as reference for conversion.
-
-Example:
-	adc@0 {
-		compatible = "adi,ad7124-4";
-		reg = <0>;
-		spi-max-frequency = <5000000>;
-		interrupts = <25 2>;
-		interrupt-parent = <&gpio>;
-		refin1-supply = <&adc_vref>;
-		clocks = <&ad7124_mclk>;
-		clock-names = "mclk";
-
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		channel@0 {
-			reg = <0>;
-			diff-channels = <0 1>;
-			adi,reference-select = <0>;
-		};
-
-		channel@1 {
-			reg = <1>;
-			bipolar;
-			diff-channels = <2 3>;
-			adi,reference-select = <0>;
-		};
-
-		channel@2 {
-			reg = <2>;
-			diff-channels = <4 5>;
-		};
-
-		channel@3 {
-			reg = <3>;
-			diff-channels = <6 7>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
new file mode 100644
index 000000000000..1b3d84d08609
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 Analog Devices Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bindings/iio/adc/adi,ad7124.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices AD7124 ADC device driver
+
+maintainers:
+  - Stefan Popa <stefan.popa@analog.com>
+
+description: |
+  Bindings for the Analog Devices AD7124 ADC device. Datasheet can be
+  found here:
+    https://www.analog.com/media/en/technical-documentation/data-sheets/AD7124-8.pdf
+
+properties:
+  compatible:
+    enum:
+      - adi,ad7124-4
+      - adi,ad7124-8
+
+  reg:
+    description: SPI chip select number for the device
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+    description: phandle to the master clock (mclk)
+
+  clock-names:
+    items:
+      - const: mclk
+
+  interrupts:
+    description: IRQ line for the ADC
+    maxItems: 1
+
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 0
+
+  refin1-supply:
+    description: refin1 supply can be used as reference for conversion.
+    maxItems: 1
+
+  refin2-supply:
+    description: refin2 supply can be used as reference for conversion.
+    maxItems: 1
+
+  avdd-supply:
+    description: avdd supply can be used as reference for conversion.
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - interrupts
+
+patternProperties:
+  "^channel@([0-9]|1[0-5])$":
+    type: object
+    description: |
+      Represents the external channels which are connected to the ADC.
+      See Documentation/devicetree/bindings/iio/adc/adc.txt.
+
+    properties:
+      reg:
+        description: |
+          The channel number. It can have up to 8 channels on ad7124-4
+          and 16 channels on ad7124-8, numbered from 0 to 15.
+        items:
+         minimum: 0
+         maximum: 15
+
+      adi,reference-select:
+        description: |
+          Select the reference source to use when converting on
+          the specific channel. Valid values are:
+          0: REFIN1(+)/REFIN1(−).
+          1: REFIN2(+)/REFIN2(−).
+          3: AVDD
+          If this field is left empty, internal reference is selected.
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/uint32
+          - enum: [0, 1, 3]
+
+      diff-channels:
+        description: see Documentation/devicetree/bindings/iio/adc/adc.txt
+        items:
+          minimum: 0
+          maximum: 15
+
+      bipolar:
+        description: see Documentation/devicetree/bindings/iio/adc/adc.txt
+        type: boolean
+
+    required:
+      - reg
+      - diff-channels
+
+examples:
+  - |
+    adc@0 {
+      compatible = "adi,ad7124-4";
+      reg = <0>;
+      spi-max-frequency = <5000000>;
+      interrupts = <25 2>;
+      interrupt-parent = <&gpio>;
+      refin1-supply = <&adc_vref>;
+      clocks = <&ad7124_mclk>;
+      clock-names = "mclk";
+
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      channel@0 {
+        reg = <0>;
+        diff-channels = <0 1>;
+        adi,reference-select = <0>;
+      };
+
+      channel@1 {
+        reg = <1>;
+        bipolar;
+        diff-channels = <2 3>;
+        adi,reference-select = <0>;
+      };
+
+      channel@2 {
+        reg = <2>;
+        diff-channels = <4 5>;
+      };
+
+      channel@3 {
+        reg = <3>;
+        diff-channels = <6 7>;
+      };
+    };
-- 
cgit 


From c444e956a26115e605b4a345a39da294576f8d69 Mon Sep 17 00:00:00 2001
From: Mircea Caprioru <mircea.caprioru@analog.com>
Date: Tue, 25 Jun 2019 11:11:28 +0300
Subject: dt-bindings: iio: adc: Add buffered input property

This patch adds the buffered mode device tree property for positive and
negative inputs. Each option can be enabled independently.

In buffered mode, the input channel feeds into a high impedance input stage
of the buffer amplifier. Therefore, the input can tolerate significant
source impedances and is tailored for direct connection to external
resistive type sensors such as strain gages or RTDs.

Signed-off-by: Mircea Caprioru <mircea.caprioru@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
index 1b3d84d08609..cf494a08b837 100644
--- a/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
@@ -100,6 +100,14 @@ patternProperties:
         description: see Documentation/devicetree/bindings/iio/adc/adc.txt
         type: boolean
 
+      adi,buffered-positive:
+        description: Enable buffered mode for positive input.
+        type: boolean
+
+      adi,buffered-negative:
+        description: Enable buffered mode for negative input.
+        type: boolean
+
     required:
       - reg
       - diff-channels
@@ -123,6 +131,7 @@ examples:
         reg = <0>;
         diff-channels = <0 1>;
         adi,reference-select = <0>;
+        adi,buffered-positive;
       };
 
       channel@1 {
@@ -130,6 +139,8 @@ examples:
         bipolar;
         diff-channels = <2 3>;
         adi,reference-select = <0>;
+        adi,buffered-positive;
+        adi,buffered-negative;
       };
 
       channel@2 {
-- 
cgit 


From 84ed6482c6d889dcfa9c6b6e8b17c53748ab4f43 Mon Sep 17 00:00:00 2001
From: Stefan Popa <stefan.popa@analog.com>
Date: Mon, 24 Jun 2019 18:12:42 +0300
Subject: dt-bindings: iio: frequency: Add ADF4372 PLL documentation

Document support for ADF4372 SPI Wideband Synthesizer.

Signed-off-by: Stefan Popa <stefan.popa@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/frequency/adf4371.yaml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml b/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml
index d7adf07421cf..060900156ae5 100644
--- a/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml
+++ b/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml
@@ -4,19 +4,21 @@
 $id: http://devicetree.org/schemas/iio/frequency/adf4371.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Analog Devices ADF4371 Wideband Synthesizer
+title: Analog Devices ADF4371/ADF4372 Wideband Synthesizers
 
 maintainers:
   - Popa Stefan <stefan.popa@analog.com>
 
 description: |
-  Analog Devices ADF4371 SPI Wideband Synthesizer
+  Analog Devices ADF4371/ADF4372 SPI Wideband Synthesizers
   https://www.analog.com/media/en/technical-documentation/data-sheets/adf4371.pdf
+  https://www.analog.com/media/en/technical-documentation/data-sheets/adf4372.pdf
 
 properties:
   compatible:
     enum:
       - adi,adf4371
+      - adi,adf4372
 
   reg:
     maxItems: 1
-- 
cgit 


From def914a4c3899b6b3705c8ea67d29972f5652a14 Mon Sep 17 00:00:00 2001
From: Stefan Popa <stefan.popa@analog.com>
Date: Mon, 24 Jun 2019 18:13:56 +0300
Subject: iio: frequency: adf4371: Add support for output stage mute

Another feature of the ADF4371/ADF4372 is that the supply current to the
RF8P and RF8N output stage can shut down until the ADF4371 achieves lock
as measured by the digital lock detect circuitry. The mute to lock
detect bit (MUTE_LD) in REG25 enables this function.

Signed-off-by: Stefan Popa <stefan.popa@analog.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/frequency/adf4371.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml b/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml
index 060900156ae5..7ec3ec94356b 100644
--- a/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml
+++ b/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml
@@ -33,6 +33,13 @@ properties:
       Must be "clkin"
     maxItems: 1
 
+  adi,mute-till-lock-en:
+    type: boolean
+    description:
+      If this property is present, then the supply current to RF8P and RF8N
+      output stage will shut down until the ADF4371/ADF4372 achieves lock as
+      measured by the digital lock detect circuitry.
+
 required:
   - compatible
   - reg
-- 
cgit 


From cca5e0b8a430c888c5de1b5d36b87c085354f2c8 Mon Sep 17 00:00:00 2001
From: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
Date: Wed, 26 Jun 2019 11:49:42 -0600
Subject: Documentation: PGP: update for newer HW devices

Newer devices like Yubikey 5 and Nitrokey Pro 2 have added support for
NISTP's implementation of ECC cryptography, so update the guide
accordingly and add a note on when to use nistp256 and when to use
ed25519 for generating S keys.

Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/process/maintainer-pgp-guide.rst | 31 ++++++++++++++------------
 1 file changed, 17 insertions(+), 14 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/process/maintainer-pgp-guide.rst b/Documentation/process/maintainer-pgp-guide.rst
index 4bab7464ff8c..17db11b7ed48 100644
--- a/Documentation/process/maintainer-pgp-guide.rst
+++ b/Documentation/process/maintainer-pgp-guide.rst
@@ -238,7 +238,10 @@ your new subkey::
     work.
 
     If for some reason you prefer to stay with RSA subkeys, just replace
-    "ed25519" with "rsa2048" in the above command.
+    "ed25519" with "rsa2048" in the above command. Additionally, if you
+    plan to use a hardware device that does not support ED25519 ECC
+    keys, like Nitrokey Pro or a Yubikey, then you should use
+    "nistp256" instead or "ed25519."
 
 
 Back up your master key for disaster recovery
@@ -432,23 +435,23 @@ Available smartcard devices
 
 Unless all your laptops and workstations have smartcard readers, the
 easiest is to get a specialized USB device that implements smartcard
-functionality.  There are several options available:
+functionality. There are several options available:
 
 - `Nitrokey Start`_: Open hardware and Free Software, based on FSI
-  Japan's `Gnuk`_. Offers support for ECC keys, but fewest security
-  features (such as resistance to tampering or some side-channel
-  attacks).
-- `Nitrokey Pro`_: Similar to the Nitrokey Start, but more
-  tamper-resistant and offers more security features, but no ECC
-  support.
-- `Yubikey 4`_: proprietary hardware and software, but cheaper than
+  Japan's `Gnuk`_. One of the few available commercial devices that
+  support ED25519 ECC keys, but offer fewest security features (such as
+  resistance to tampering or some side-channel attacks).
+- `Nitrokey Pro 2`_: Similar to the Nitrokey Start, but more
+  tamper-resistant and offers more security features. Pro 2 supports ECC
+  cryptography (NISTP).
+- `Yubikey 5`_: proprietary hardware and software, but cheaper than
   Nitrokey Pro and comes available in the USB-C form that is more useful
   with newer laptops. Offers additional security features such as FIDO
-  U2F, but no ECC.
+  U2F, among others, and now finally supports ECC keys (NISTP).
 
 `LWN has a good review`_ of some of the above models, as well as several
-others. If you want to use ECC keys, your best bet among commercially
-available devices is the Nitrokey Start.
+others. Your choice will depend on cost, shipping availability in your
+geographical region, and open/proprietary hardware considerations.
 
 .. note::
 
@@ -457,8 +460,8 @@ available devices is the Nitrokey Start.
     Foundation.
 
 .. _`Nitrokey Start`: https://shop.nitrokey.com/shop/product/nitrokey-start-6
-.. _`Nitrokey Pro`: https://shop.nitrokey.com/shop/product/nitrokey-pro-3
-.. _`Yubikey 4`: https://www.yubico.com/product/yubikey-4-series/
+.. _`Nitrokey Pro 2`: https://shop.nitrokey.com/shop/product/nitrokey-pro-2-3
+.. _`Yubikey 5`: https://www.yubico.com/products/yubikey-5-overview/
 .. _Gnuk: http://www.fsij.org/doc-gnuk/
 .. _`LWN has a good review`: https://lwn.net/Articles/736231/
 .. _`qualify for a free Nitrokey Start`: https://www.kernel.org/nitrokey-digital-tokens-for-kernel-developers.html
-- 
cgit 


From b4f4174ae982dfc855c56e91776920e0166da1bf Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sat, 22 Jun 2019 14:47:46 -0300
Subject: docs: zh_CN: submitting-drivers.rst: Remove a duplicated
 Documentation/

Somehow, this file ended with Documentation/ twice.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/zh_CN/process/submitting-drivers.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/translations/zh_CN/process/submitting-drivers.rst b/Documentation/translations/zh_CN/process/submitting-drivers.rst
index 72c6cd935821..72f4f45c98de 100644
--- a/Documentation/translations/zh_CN/process/submitting-drivers.rst
+++ b/Documentation/translations/zh_CN/process/submitting-drivers.rst
@@ -22,7 +22,7 @@
 兴趣的是显卡驱动程序，你也许应该访问 XFree86 项目(http://www.xfree86.org/)
 和／或 X.org 项目 (http://x.org)。
 
-另请参阅 Documentation/Documentation/translations/zh_CN/process/submitting-patches.rst 文档。
+另请参阅 Documentation/translations/zh_CN/process/submitting-patches.rst 文档。
 
 
 分配设备号
-- 
cgit 


From 7c116d22ad23809767c5ec06affa19b9bb163d97 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 26 Jun 2019 10:35:11 -0300
Subject: docs: filesystems: Remove uneeded .rst extension on toctables

There's no need to use a .rst on Sphinx toc tables. As most of
the Documentation don't use, remove the remaing occurrences.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/ext4/index.rst | 8 ++++----
 Documentation/filesystems/index.rst      | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/ext4/index.rst b/Documentation/filesystems/ext4/index.rst
index 3be3e54d480d..705d813d558f 100644
--- a/Documentation/filesystems/ext4/index.rst
+++ b/Documentation/filesystems/ext4/index.rst
@@ -8,7 +8,7 @@ ext4 Data Structures and Algorithms
    :maxdepth: 6
    :numbered:
 
-   about.rst
-   overview.rst
-   globals.rst
-   dynamic.rst
+   about
+   overview
+   globals
+   dynamic
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 35644840a690..1651173f1118 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -17,7 +17,7 @@ algorithms work.
    :maxdepth: 2
 
    vfs
-   path-lookup.rst
+   path-lookup
    api-summary
    splice
 
@@ -41,4 +41,4 @@ Documentation for individual filesystem types can be found here.
 .. toctree::
    :maxdepth: 2
 
-   binderfs.rst
+   binderfs
-- 
cgit 


From 3222bcf5f1155dde0ba6c596253439dece3f8262 Mon Sep 17 00:00:00 2001
From: Vivek Unune <npcomplete13@gmail.com>
Date: Fri, 21 Jun 2019 16:53:08 -0400
Subject: arm64: dts: rockchip: Add support for Hugsun X99 TV Box

Add devicetree support for Hugsun X99 TV Box based on RK3399 SoC

Tested with LibreElec running kernel v5.1.2.
Following peripherals tested and work:

Peripheral works:
- UART2 debug
- eMMC
- USB 3.0 port
- USB 2.0 port
- sdio, sd-card
- HDMI
- Ethernet
- WiFi/BT

Not tested:
- Type-C port
- OPTICAL
- IR

Signed-off-by: Vivek Unune <npcomplete13@gmail.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 Documentation/devicetree/bindings/arm/rockchip.yaml    | 5 +++++
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/rockchip.yaml b/Documentation/devicetree/bindings/arm/rockchip.yaml
index eef822ce2ad4..34865042f4e4 100644
--- a/Documentation/devicetree/bindings/arm/rockchip.yaml
+++ b/Documentation/devicetree/bindings/arm/rockchip.yaml
@@ -316,6 +316,11 @@ properties:
           - const: haoyu,marsboard-rk3066
           - const: rockchip,rk3066a
 
+      - description: Hugsun X99 TV Box
+        items:
+          - const: hugsun,x99
+          - const: rockchip,rk3399
+
       - description: Khadas Edge series boards
         items:
           - enum:
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 33a65a45e319..ab2520c67b16 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -375,6 +375,8 @@ patternProperties:
     description: Hewlett Packard
   "^holtek,.*":
     description: Holtek Semiconductor, Inc.
+  "^hugsun,.*":
+    description: Shenzhen Hugsun Technology Co. Ltd.
   "^hwacom,.*":
     description: HwaCom Systems Inc.
   "^i2se,.*":
-- 
cgit 


From 8811249f0cfdd6552152173f881fddc7f20f427e Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Thu, 11 Apr 2019 12:28:02 +0800
Subject: vfs: update d_make_root() description

Clearify d_make_root() usage, error handling and cleanup
requirements.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/porting | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 3bd1148d8bb6..f6714f7e6bb5 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -428,8 +428,19 @@ release it yourself.
 --
 [mandatory]
 	d_alloc_root() is gone, along with a lot of bugs caused by code
-misusing it.  Replacement: d_make_root(inode).  The difference is,
-d_make_root() drops the reference to inode if dentry allocation fails.  
+misusing it.  Replacement: d_make_root(inode).  On success d_make_root(inode)
+allocates and returns a new dentry instantiated with the passed in inode.
+On failure NULL is returned and the passed in inode is dropped so the reference
+to inode is consumed in all cases and failure handling need not do any cleanup
+for the inode.  If d_make_root(inode) is passed a NULL inode it returns NULL
+and also requires no further error handling. Typical usage is:
+
+	inode = foofs_new_inode(....);
+	s->s_root = d_make_inode(inode);
+	if (!s->s_root)
+		/* Nothing needed for the inode cleanup */
+		return -ENOMEM;
+	...
 
 --
 [mandatory]
-- 
cgit 


From 4e1c94b047715e12942536d8457e9aa692e8b5fc Mon Sep 17 00:00:00 2001
From: Arthur Simchaev <Arthur.Simchaev@sandisk.com>
Date: Tue, 25 Jun 2019 15:36:00 +0300
Subject: scsi: ufs: Documentation: Announce ufs-tool v1.0

The ufs-tool stable release v1.0 is available at:

	https://github.com/westerndigitalcorporation/ufs-tool

Feedback and bug reports, as always, are welcomed.

Signed-off-by: Arthur Simchaev <Arthur.Simchaev@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 Documentation/scsi/ufs.txt | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/scsi/ufs.txt b/Documentation/scsi/ufs.txt
index 1769f71c4c20..81842ec3e116 100644
--- a/Documentation/scsi/ufs.txt
+++ b/Documentation/scsi/ufs.txt
@@ -158,6 +158,13 @@ send SG_IO with the applicable sg_io_v4:
 If you wish to read or write a descriptor, use the appropriate xferp of
 sg_io_v4.
 
+The userspace tool that interacts with the ufs-bsg endpoint and uses its
+upiu-based protocol is available at:
+
+	https://github.com/westerndigitalcorporation/ufs-tool
+
+For more detailed information about the tool and its supported
+features, please see the tool's README.
 
 UFS Specifications can be found at,
 UFS - http://www.jedec.org/sites/default/files/docs/JESD220.pdf
-- 
cgit 


From 478bb8f32e26c2497c1c594f547ecbfec9c55b98 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Mon, 3 Jun 2019 10:26:12 +0300
Subject: dt-bindings: mfd: Document first ROHM BD70528 bindings

Document bindings for regulators (3 bucks, 3 LDOs and 2 LED
drivers) and 4 GPIO pins which can be configured for I/O or
as interrupt sources withe configurable trigger levels.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 .../devicetree/bindings/mfd/rohm,bd70528-pmic.txt  | 102 +++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mfd/rohm,bd70528-pmic.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mfd/rohm,bd70528-pmic.txt b/Documentation/devicetree/bindings/mfd/rohm,bd70528-pmic.txt
new file mode 100644
index 000000000000..c3c02ce73cde
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/rohm,bd70528-pmic.txt
@@ -0,0 +1,102 @@
+* ROHM BD70528 Power Management Integrated Circuit bindings
+
+BD70528MWV is an ultra-low quiescent current general purpose, single-chip,
+power management IC for battery-powered portable devices. The IC
+integrates 3 ultra-low current consumption buck converters, 3 LDOs and 2
+LED Drivers. Also included are 4 GPIOs, a real-time clock (RTC), a 32kHz
+clock gate, high-accuracy VREF for use with an external ADC, flexible
+dual-input power path, 10 bit SAR ADC for battery temperature monitor and
+1S battery charger with scalable charge currents.
+
+Required properties:
+ - compatible		: Should be "rohm,bd70528"
+ - reg			: I2C slave address.
+ - interrupts		: The interrupt line the device is connected to.
+ - interrupt-controller	: To indicate BD70528 acts as an interrupt controller.
+ - #interrupt-cells	: Should be 2. Usage is compliant to the 2 cells
+			  variant of ../interrupt-controller/interrupts.txt
+ - gpio-controller	: To indicate BD70528 acts as a GPIO controller.
+ - #gpio-cells		: Should be 2. The first cell is the pin number and
+			  the second cell is used to specify flags. See
+			  ../gpio/gpio.txt for more information.
+ - #clock-cells		: Should be 0.
+ - regulators:		: List of child nodes that specify the regulators.
+			  Please see ../regulator/rohm,bd70528-regulator.txt
+
+Optional properties:
+ - clock-output-names	: Should contain name for output clock.
+
+Example:
+/* External oscillator */
+osc: oscillator {
+	compatible = "fixed-clock";
+	#clock-cells = <1>;
+	clock-frequency  = <32768>;
+	clock-output-names = "osc";
+};
+
+pmic: pmic@4b {
+	compatible = "rohm,bd70528";
+	reg = <0x4b>;
+	interrupt-parent = <&gpio1>;
+	interrupts = <29 GPIO_ACTIVE_LOW>;
+	clocks = <&osc 0>;
+	#clock-cells = <0>;
+	clock-output-names = "bd70528-32k-out";
+	#gpio-cells = <2>;
+	gpio-controller;
+	interrupt-controller;
+	#interrupt-cells = <2>;
+
+	regulators {
+		buck1: BUCK1 {
+			regulator-name = "buck1";
+			regulator-min-microvolt = <1200000>;
+			regulator-max-microvolt = <3400000>;
+			regulator-boot-on;
+			regulator-ramp-delay = <125>;
+		};
+		buck2: BUCK2 {
+			regulator-name = "buck2";
+			regulator-min-microvolt = <1200000>;
+			regulator-max-microvolt = <3300000>;
+			regulator-boot-on;
+			regulator-ramp-delay = <125>;
+		};
+		buck3: BUCK3 {
+			regulator-name = "buck3";
+			regulator-min-microvolt = <800000>;
+			regulator-max-microvolt = <1800000>;
+			regulator-boot-on;
+			regulator-ramp-delay = <250>;
+		};
+		ldo1: LDO1 {
+			regulator-name = "ldo1";
+			regulator-min-microvolt = <1650000>;
+			regulator-max-microvolt = <3300000>;
+			regulator-boot-on;
+		};
+		ldo2: LDO2 {
+			regulator-name = "ldo2";
+			regulator-min-microvolt = <1650000>;
+			regulator-max-microvolt = <3300000>;
+			regulator-boot-on;
+		};
+
+		ldo3: LDO3 {
+			regulator-name = "ldo3";
+			regulator-min-microvolt = <1650000>;
+			regulator-max-microvolt = <3300000>;
+		};
+		led_ldo1: LED_LDO1 {
+			regulator-name = "led_ldo1";
+			regulator-min-microvolt = <200000>;
+			regulator-max-microvolt = <300000>;
+		};
+		led_ldo2: LED_LDO2 {
+			regulator-name = "led_ldo2";
+			regulator-min-microvolt = <200000>;
+			regulator-max-microvolt = <300000>;
+		};
+	};
+};
-- 
cgit 


From 1910ea428f28e5731a2cea32b86e71953829f15d Mon Sep 17 00:00:00 2001
From: Bastien Nocera <hadess@hadess.net>
Date: Mon, 24 Jun 2019 11:42:14 -0400
Subject: media: rc: Prefer KEY_NUMERIC_* for number buttons on remotes

Prefer KEY_NUMERIC_* for number buttons on remotes. Now all the remotes
use KEY_NUMERIC_[0-9] for the number buttons rather than keys that
could be affected by modifiers (Caps-Lock, or Num-Lock) or regional
keymaps.

Created using:
sed -i  's/KEY_\([0-9]\) /KEY_NUMERIC_\1 /' *.c
sed -i  's/KEY_\([0-9]\)}/KEY_NUMERIC_\1}/' *.c
sed -i  's/``KEY_\([0-9]\)/``KEY_NUMERIC_\1/' Documentation/media/uapi/rc/rc-tables.rst

Signed-off-by: Bastien Nocera <hadess@hadess.net>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/rc/rc-tables.rst | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/rc/rc-tables.rst b/Documentation/media/uapi/rc/rc-tables.rst
index 177ac44fa0fa..468d8aa3849b 100644
--- a/Documentation/media/uapi/rc/rc-tables.rst
+++ b/Documentation/media/uapi/rc/rc-tables.rst
@@ -54,7 +54,7 @@ the remote via /dev/input/event devices.
 
     -  .. row 3
 
-       -  ``KEY_0``
+       -  ``KEY_NUMERIC_0``
 
        -  Keyboard digit 0
 
@@ -62,7 +62,7 @@ the remote via /dev/input/event devices.
 
     -  .. row 4
 
-       -  ``KEY_1``
+       -  ``KEY_NUMERIC_1``
 
        -  Keyboard digit 1
 
@@ -70,7 +70,7 @@ the remote via /dev/input/event devices.
 
     -  .. row 5
 
-       -  ``KEY_2``
+       -  ``KEY_NUMERIC_2``
 
        -  Keyboard digit 2
 
@@ -78,7 +78,7 @@ the remote via /dev/input/event devices.
 
     -  .. row 6
 
-       -  ``KEY_3``
+       -  ``KEY_NUMERIC_3``
 
        -  Keyboard digit 3
 
@@ -86,7 +86,7 @@ the remote via /dev/input/event devices.
 
     -  .. row 7
 
-       -  ``KEY_4``
+       -  ``KEY_NUMERIC_4``
 
        -  Keyboard digit 4
 
@@ -94,7 +94,7 @@ the remote via /dev/input/event devices.
 
     -  .. row 8
 
-       -  ``KEY_5``
+       -  ``KEY_NUMERIC_5``
 
        -  Keyboard digit 5
 
@@ -102,7 +102,7 @@ the remote via /dev/input/event devices.
 
     -  .. row 9
 
-       -  ``KEY_6``
+       -  ``KEY_NUMERIC_6``
 
        -  Keyboard digit 6
 
@@ -110,7 +110,7 @@ the remote via /dev/input/event devices.
 
     -  .. row 10
 
-       -  ``KEY_7``
+       -  ``KEY_NUMERIC_7``
 
        -  Keyboard digit 7
 
@@ -118,7 +118,7 @@ the remote via /dev/input/event devices.
 
     -  .. row 11
 
-       -  ``KEY_8``
+       -  ``KEY_NUMERIC_8``
 
        -  Keyboard digit 8
 
@@ -126,7 +126,7 @@ the remote via /dev/input/event devices.
 
     -  .. row 12
 
-       -  ``KEY_9``
+       -  ``KEY_NUMERIC_9``
 
        -  Keyboard digit 9
 
-- 
cgit 


From 46fb4749788159412622c5bb9d087a217cc00152 Mon Sep 17 00:00:00 2001
From: Bastien Nocera <hadess@hadess.net>
Date: Tue, 25 Jun 2019 08:03:48 -0400
Subject: media: doc-rst: Fix typos

Not sure how codespell thinks "sroweam" is a real word.

Signed-off-by: Bastien Nocera <hadess@hadess.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/rc/rc-tables.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/media/uapi/rc/rc-tables.rst b/Documentation/media/uapi/rc/rc-tables.rst
index 468d8aa3849b..20d7c686922b 100644
--- a/Documentation/media/uapi/rc/rc-tables.rst
+++ b/Documentation/media/uapi/rc/rc-tables.rst
@@ -196,7 +196,7 @@ the remote via /dev/input/event devices.
 
        -  ``KEY_PAUSE``
 
-       -  Pause sroweam
+       -  Pause stream
 
        -  PAUSE / FREEZE
 
@@ -220,7 +220,7 @@ the remote via /dev/input/event devices.
 
        -  ``KEY_STOP``
 
-       -  Stop sroweam
+       -  Stop stream
 
        -  STOP
 
@@ -228,7 +228,7 @@ the remote via /dev/input/event devices.
 
        -  ``KEY_RECORD``
 
-       -  Start/stop recording sroweam
+       -  Start/stop recording stream
 
        -  CAPTURE / REC / RECORD/PAUSE
 
@@ -577,7 +577,7 @@ the remote via /dev/input/event devices.
 
        -  ``KEY_CLEAR``
 
-       -  Stop sroweam and return to default input video/audio
+       -  Stop stream and return to default input video/audio
 
        -  CLEAR / RESET / BOSS KEY
 
@@ -593,7 +593,7 @@ the remote via /dev/input/event devices.
 
        -  ``KEY_FAVORITES``
 
-       -  Open the favorites sroweam window
+       -  Open the favorites stream window
 
        -  TV WALL / Favorites
 
-- 
cgit 


From 6bbe6f5732faeabb4bb583726ec2d7f9739532bd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 18:05:28 -0300
Subject: docs: thermal: convert to ReST

Rename the thermal documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 Documentation/thermal/cpu-cooling-api.rst          | 107 +++
 Documentation/thermal/cpu-cooling-api.txt          |  92 ---
 Documentation/thermal/exynos_thermal               |  77 --
 Documentation/thermal/exynos_thermal.rst           |  90 +++
 Documentation/thermal/exynos_thermal_emulation     |  53 --
 Documentation/thermal/exynos_thermal_emulation.rst |  61 ++
 Documentation/thermal/index.rst                    |  18 +
 Documentation/thermal/intel_powerclamp.rst         | 320 +++++++++
 Documentation/thermal/intel_powerclamp.txt         | 317 --------
 Documentation/thermal/nouveau_thermal              |  82 ---
 Documentation/thermal/nouveau_thermal.rst          |  96 +++
 Documentation/thermal/power_allocator.rst          | 271 +++++++
 Documentation/thermal/power_allocator.txt          | 247 -------
 Documentation/thermal/sysfs-api.rst                | 798 +++++++++++++++++++++
 Documentation/thermal/sysfs-api.txt                | 636 ----------------
 Documentation/thermal/x86_pkg_temperature_thermal  |  47 --
 .../thermal/x86_pkg_temperature_thermal.rst        |  55 ++
 17 files changed, 1816 insertions(+), 1551 deletions(-)
 create mode 100644 Documentation/thermal/cpu-cooling-api.rst
 delete mode 100644 Documentation/thermal/cpu-cooling-api.txt
 delete mode 100644 Documentation/thermal/exynos_thermal
 create mode 100644 Documentation/thermal/exynos_thermal.rst
 delete mode 100644 Documentation/thermal/exynos_thermal_emulation
 create mode 100644 Documentation/thermal/exynos_thermal_emulation.rst
 create mode 100644 Documentation/thermal/index.rst
 create mode 100644 Documentation/thermal/intel_powerclamp.rst
 delete mode 100644 Documentation/thermal/intel_powerclamp.txt
 delete mode 100644 Documentation/thermal/nouveau_thermal
 create mode 100644 Documentation/thermal/nouveau_thermal.rst
 create mode 100644 Documentation/thermal/power_allocator.rst
 delete mode 100644 Documentation/thermal/power_allocator.txt
 create mode 100644 Documentation/thermal/sysfs-api.rst
 delete mode 100644 Documentation/thermal/sysfs-api.txt
 delete mode 100644 Documentation/thermal/x86_pkg_temperature_thermal
 create mode 100644 Documentation/thermal/x86_pkg_temperature_thermal.rst

(limited to 'Documentation')

diff --git a/Documentation/thermal/cpu-cooling-api.rst b/Documentation/thermal/cpu-cooling-api.rst
new file mode 100644
index 000000000000..645d914c45a6
--- /dev/null
+++ b/Documentation/thermal/cpu-cooling-api.rst
@@ -0,0 +1,107 @@
+=======================
+CPU cooling APIs How To
+=======================
+
+Written by Amit Daniel Kachhap <amit.kachhap@linaro.org>
+
+Updated: 6 Jan 2015
+
+Copyright (c)  2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
+
+0. Introduction
+===============
+
+The generic cpu cooling(freq clipping) provides registration/unregistration APIs
+to the caller. The binding of the cooling devices to the trip point is left for
+the user. The registration APIs returns the cooling device pointer.
+
+1. cpu cooling APIs
+===================
+
+1.1 cpufreq registration/unregistration APIs
+--------------------------------------------
+
+    ::
+
+	struct thermal_cooling_device
+	*cpufreq_cooling_register(struct cpumask *clip_cpus)
+
+    This interface function registers the cpufreq cooling device with the name
+    "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
+    cooling devices.
+
+   clip_cpus:
+	cpumask of cpus where the frequency constraints will happen.
+
+    ::
+
+	struct thermal_cooling_device
+	*of_cpufreq_cooling_register(struct cpufreq_policy *policy)
+
+    This interface function registers the cpufreq cooling device with
+    the name "thermal-cpufreq-%x" linking it with a device tree node, in
+    order to bind it via the thermal DT code. This api can support multiple
+    instances of cpufreq cooling devices.
+
+    policy:
+	CPUFreq policy.
+
+
+    ::
+
+	void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
+
+    This interface function unregisters the "thermal-cpufreq-%x" cooling device.
+
+    cdev: Cooling device pointer which has to be unregistered.
+
+2. Power models
+===============
+
+The power API registration functions provide a simple power model for
+CPUs.  The current power is calculated as dynamic power (static power isn't
+supported currently).  This power model requires that the operating-points of
+the CPUs are registered using the kernel's opp library and the
+`cpufreq_frequency_table` is assigned to the `struct device` of the
+cpu.  If you are using CONFIG_CPUFREQ_DT then the
+`cpufreq_frequency_table` should already be assigned to the cpu
+device.
+
+The dynamic power consumption of a processor depends on many factors.
+For a given processor implementation the primary factors are:
+
+- The time the processor spends running, consuming dynamic power, as
+  compared to the time in idle states where dynamic consumption is
+  negligible.  Herein we refer to this as 'utilisation'.
+- The voltage and frequency levels as a result of DVFS.  The DVFS
+  level is a dominant factor governing power consumption.
+- In running time the 'execution' behaviour (instruction types, memory
+  access patterns and so forth) causes, in most cases, a second order
+  variation.  In pathological cases this variation can be significant,
+  but typically it is of a much lesser impact than the factors above.
+
+A high level dynamic power consumption model may then be represented as::
+
+	Pdyn = f(run) * Voltage^2 * Frequency * Utilisation
+
+f(run) here represents the described execution behaviour and its
+result has a units of Watts/Hz/Volt^2 (this often expressed in
+mW/MHz/uVolt^2)
+
+The detailed behaviour for f(run) could be modelled on-line.  However,
+in practice, such an on-line model has dependencies on a number of
+implementation specific processor support and characterisation
+factors.  Therefore, in initial implementation that contribution is
+represented as a constant coefficient.  This is a simplification
+consistent with the relative contribution to overall power variation.
+
+In this simplified representation our model becomes::
+
+	Pdyn = Capacitance * Voltage^2 * Frequency * Utilisation
+
+Where `capacitance` is a constant that represents an indicative
+running time dynamic power coefficient in fundamental units of
+mW/MHz/uVolt^2.  Typical values for mobile CPUs might lie in range
+from 100 to 500.  For reference, the approximate values for the SoC in
+ARM's Juno Development Platform are 530 for the Cortex-A57 cluster and
+140 for the Cortex-A53 cluster.
diff --git a/Documentation/thermal/cpu-cooling-api.txt b/Documentation/thermal/cpu-cooling-api.txt
deleted file mode 100644
index 7df567eaea1a..000000000000
--- a/Documentation/thermal/cpu-cooling-api.txt
+++ /dev/null
@@ -1,92 +0,0 @@
-CPU cooling APIs How To
-===================================
-
-Written by Amit Daniel Kachhap <amit.kachhap@linaro.org>
-
-Updated: 6 Jan 2015
-
-Copyright (c)  2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
-
-0. Introduction
-
-The generic cpu cooling(freq clipping) provides registration/unregistration APIs
-to the caller. The binding of the cooling devices to the trip point is left for
-the user. The registration APIs returns the cooling device pointer.
-
-1. cpu cooling APIs
-
-1.1 cpufreq registration/unregistration APIs
-1.1.1 struct thermal_cooling_device *cpufreq_cooling_register(
-	struct cpumask *clip_cpus)
-
-    This interface function registers the cpufreq cooling device with the name
-    "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
-    cooling devices.
-
-   clip_cpus: cpumask of cpus where the frequency constraints will happen.
-
-1.1.2 struct thermal_cooling_device *of_cpufreq_cooling_register(
-					struct cpufreq_policy *policy)
-
-    This interface function registers the cpufreq cooling device with
-    the name "thermal-cpufreq-%x" linking it with a device tree node, in
-    order to bind it via the thermal DT code. This api can support multiple
-    instances of cpufreq cooling devices.
-
-    policy: CPUFreq policy.
-
-1.1.3 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
-
-    This interface function unregisters the "thermal-cpufreq-%x" cooling device.
-
-    cdev: Cooling device pointer which has to be unregistered.
-
-2. Power models
-
-The power API registration functions provide a simple power model for
-CPUs.  The current power is calculated as dynamic power (static power isn't
-supported currently).  This power model requires that the operating-points of
-the CPUs are registered using the kernel's opp library and the
-`cpufreq_frequency_table` is assigned to the `struct device` of the
-cpu.  If you are using CONFIG_CPUFREQ_DT then the
-`cpufreq_frequency_table` should already be assigned to the cpu
-device.
-
-The dynamic power consumption of a processor depends on many factors.
-For a given processor implementation the primary factors are:
-
-- The time the processor spends running, consuming dynamic power, as
-  compared to the time in idle states where dynamic consumption is
-  negligible.  Herein we refer to this as 'utilisation'.
-- The voltage and frequency levels as a result of DVFS.  The DVFS
-  level is a dominant factor governing power consumption.
-- In running time the 'execution' behaviour (instruction types, memory
-  access patterns and so forth) causes, in most cases, a second order
-  variation.  In pathological cases this variation can be significant,
-  but typically it is of a much lesser impact than the factors above.
-
-A high level dynamic power consumption model may then be represented as:
-
-Pdyn = f(run) * Voltage^2 * Frequency * Utilisation
-
-f(run) here represents the described execution behaviour and its
-result has a units of Watts/Hz/Volt^2 (this often expressed in
-mW/MHz/uVolt^2)
-
-The detailed behaviour for f(run) could be modelled on-line.  However,
-in practice, such an on-line model has dependencies on a number of
-implementation specific processor support and characterisation
-factors.  Therefore, in initial implementation that contribution is
-represented as a constant coefficient.  This is a simplification
-consistent with the relative contribution to overall power variation.
-
-In this simplified representation our model becomes:
-
-Pdyn = Capacitance * Voltage^2 * Frequency * Utilisation
-
-Where `capacitance` is a constant that represents an indicative
-running time dynamic power coefficient in fundamental units of
-mW/MHz/uVolt^2.  Typical values for mobile CPUs might lie in range
-from 100 to 500.  For reference, the approximate values for the SoC in
-ARM's Juno Development Platform are 530 for the Cortex-A57 cluster and
-140 for the Cortex-A53 cluster.
diff --git a/Documentation/thermal/exynos_thermal b/Documentation/thermal/exynos_thermal
deleted file mode 100644
index 9010c4416967..000000000000
--- a/Documentation/thermal/exynos_thermal
+++ /dev/null
@@ -1,77 +0,0 @@
-Kernel driver exynos_tmu
-=================
-
-Supported chips:
-* ARM SAMSUNG EXYNOS4, EXYNOS5 series of SoC
-  Datasheet: Not publicly available
-
-Authors: Donggeun Kim <dg77.kim@samsung.com>
-Authors: Amit Daniel <amit.daniel@samsung.com>
-
-TMU controller Description:
----------------------------
-
-This driver allows to read temperature inside SAMSUNG EXYNOS4/5 series of SoC.
-
-The chip only exposes the measured 8-bit temperature code value
-through a register.
-Temperature can be taken from the temperature code.
-There are three equations converting from temperature to temperature code.
-
-The three equations are:
-  1. Two point trimming
-	Tc = (T - 25) * (TI2 - TI1) / (85 - 25) + TI1
-
-  2. One point trimming
-	Tc = T + TI1 - 25
-
-  3. No trimming
-	Tc = T + 50
-
-  Tc: Temperature code, T: Temperature,
-  TI1: Trimming info for 25 degree Celsius (stored at TRIMINFO register)
-       Temperature code measured at 25 degree Celsius which is unchanged
-  TI2: Trimming info for 85 degree Celsius (stored at TRIMINFO register)
-       Temperature code measured at 85 degree Celsius which is unchanged
-
-TMU(Thermal Management Unit) in EXYNOS4/5 generates interrupt
-when temperature exceeds pre-defined levels.
-The maximum number of configurable threshold is five.
-The threshold levels are defined as follows:
-  Level_0: current temperature > trigger_level_0 + threshold
-  Level_1: current temperature > trigger_level_1 + threshold
-  Level_2: current temperature > trigger_level_2 + threshold
-  Level_3: current temperature > trigger_level_3 + threshold
-
-  The threshold and each trigger_level are set
-  through the corresponding registers.
-
-When an interrupt occurs, this driver notify kernel thermal framework
-with the function exynos_report_trigger.
-Although an interrupt condition for level_0 can be set,
-it can be used to synchronize the cooling action.
-
-TMU driver description:
------------------------
-
-The exynos thermal driver is structured as,
-
-					Kernel Core thermal framework
-				(thermal_core.c, step_wise.c, cpu_cooling.c)
-								^
-								|
-								|
-TMU configuration data -------> TMU Driver  <------> Exynos Core thermal wrapper
-(exynos_tmu_data.c)	      (exynos_tmu.c)	   (exynos_thermal_common.c)
-(exynos_tmu_data.h)	      (exynos_tmu.h)	   (exynos_thermal_common.h)
-
-a) TMU configuration data: This consist of TMU register offsets/bitfields
-		described through structure exynos_tmu_registers. Also several
-		other platform data (struct exynos_tmu_platform_data) members
-		are used to configure the TMU.
-b) TMU driver: This component initialises the TMU controller and sets different
-		thresholds. It invokes core thermal implementation with the call
-		exynos_report_trigger.
-c) Exynos Core thermal wrapper: This provides 3 wrapper function to use the
-		Kernel core thermal framework. They are exynos_unregister_thermal,
-		exynos_register_thermal and exynos_report_trigger.
diff --git a/Documentation/thermal/exynos_thermal.rst b/Documentation/thermal/exynos_thermal.rst
new file mode 100644
index 000000000000..5bd556566c70
--- /dev/null
+++ b/Documentation/thermal/exynos_thermal.rst
@@ -0,0 +1,90 @@
+========================
+Kernel driver exynos_tmu
+========================
+
+Supported chips:
+
+* ARM SAMSUNG EXYNOS4, EXYNOS5 series of SoC
+
+  Datasheet: Not publicly available
+
+Authors: Donggeun Kim <dg77.kim@samsung.com>
+Authors: Amit Daniel <amit.daniel@samsung.com>
+
+TMU controller Description:
+---------------------------
+
+This driver allows to read temperature inside SAMSUNG EXYNOS4/5 series of SoC.
+
+The chip only exposes the measured 8-bit temperature code value
+through a register.
+Temperature can be taken from the temperature code.
+There are three equations converting from temperature to temperature code.
+
+The three equations are:
+  1. Two point trimming::
+
+	Tc = (T - 25) * (TI2 - TI1) / (85 - 25) + TI1
+
+  2. One point trimming::
+
+	Tc = T + TI1 - 25
+
+  3. No trimming::
+
+	Tc = T + 50
+
+  Tc:
+       Temperature code, T: Temperature,
+  TI1:
+       Trimming info for 25 degree Celsius (stored at TRIMINFO register)
+       Temperature code measured at 25 degree Celsius which is unchanged
+  TI2:
+       Trimming info for 85 degree Celsius (stored at TRIMINFO register)
+       Temperature code measured at 85 degree Celsius which is unchanged
+
+TMU(Thermal Management Unit) in EXYNOS4/5 generates interrupt
+when temperature exceeds pre-defined levels.
+The maximum number of configurable threshold is five.
+The threshold levels are defined as follows::
+
+  Level_0: current temperature > trigger_level_0 + threshold
+  Level_1: current temperature > trigger_level_1 + threshold
+  Level_2: current temperature > trigger_level_2 + threshold
+  Level_3: current temperature > trigger_level_3 + threshold
+
+The threshold and each trigger_level are set
+through the corresponding registers.
+
+When an interrupt occurs, this driver notify kernel thermal framework
+with the function exynos_report_trigger.
+Although an interrupt condition for level_0 can be set,
+it can be used to synchronize the cooling action.
+
+TMU driver description:
+-----------------------
+
+The exynos thermal driver is structured as::
+
+					Kernel Core thermal framework
+				(thermal_core.c, step_wise.c, cpu_cooling.c)
+								^
+								|
+								|
+  TMU configuration data -----> TMU Driver  <----> Exynos Core thermal wrapper
+  (exynos_tmu_data.c)	      (exynos_tmu.c)	   (exynos_thermal_common.c)
+  (exynos_tmu_data.h)	      (exynos_tmu.h)	   (exynos_thermal_common.h)
+
+a) TMU configuration data:
+		This consist of TMU register offsets/bitfields
+		described through structure exynos_tmu_registers. Also several
+		other platform data (struct exynos_tmu_platform_data) members
+		are used to configure the TMU.
+b) TMU driver:
+		This component initialises the TMU controller and sets different
+		thresholds. It invokes core thermal implementation with the call
+		exynos_report_trigger.
+c) Exynos Core thermal wrapper:
+		This provides 3 wrapper function to use the
+		Kernel core thermal framework. They are exynos_unregister_thermal,
+		exynos_register_thermal and exynos_report_trigger.
diff --git a/Documentation/thermal/exynos_thermal_emulation b/Documentation/thermal/exynos_thermal_emulation
deleted file mode 100644
index b15efec6ca28..000000000000
--- a/Documentation/thermal/exynos_thermal_emulation
+++ /dev/null
@@ -1,53 +0,0 @@
-EXYNOS EMULATION MODE
-========================
-
-Copyright (C) 2012 Samsung Electronics
-
-Written by Jonghwa Lee <jonghwa3.lee@samsung.com>
-
-Description
------------
-
-Exynos 4x12 (4212, 4412) and 5 series provide emulation mode for thermal management unit.
-Thermal emulation mode supports software debug for TMU's operation. User can set temperature
-manually with software code and TMU will read current temperature from user value not from
-sensor's value.
-
-Enabling CONFIG_THERMAL_EMULATION option will make this support available.
-When it's enabled, sysfs node will be created as
-/sys/devices/virtual/thermal/thermal_zone'zone id'/emul_temp.
-
-The sysfs node, 'emul_node', will contain value 0 for the initial state. When you input any
-temperature you want to update to sysfs node, it automatically enable emulation mode and
-current temperature will be changed into it.
-(Exynos also supports user changeable delay time which would be used to delay of
- changing temperature. However, this node only uses same delay of real sensing time, 938us.)
-
-Exynos emulation mode requires synchronous of value changing and enabling. It means when you
-want to update the any value of delay or next temperature, then you have to enable emulation
-mode at the same time. (Or you have to keep the mode enabling.) If you don't, it fails to
-change the value to updated one and just use last succeessful value repeatedly. That's why
-this node gives users the right to change termerpature only. Just one interface makes it more
-simply to use.
-
-Disabling emulation mode only requires writing value 0 to sysfs node.
-
-
-TEMP	120 |
-	    |
-	100 |
-	    |
-	 80 |
-	    |		     	 	 +-----------
-	 60 |      		     	 |	    |
-	    |	           +-------------|          |
-	 40 |              |         	 |          |
-	    |		   |	     	 |          |
-	 20 |		   |	     	 |          +----------
-	    |	 	   |	     	 |          |          |
-	  0 |______________|_____________|__________|__________|_________
-		   A	    	 A	    A	   	       A     TIME
-		   |<----->|	 |<----->|  |<----->|	       |
-		   | 938us |  	 |	 |  |       |          |
-emulation    :  0  50	   |  	 70      |  20      |          0
-current temp :   sensor   50		 70         20	      sensor
diff --git a/Documentation/thermal/exynos_thermal_emulation.rst b/Documentation/thermal/exynos_thermal_emulation.rst
new file mode 100644
index 000000000000..c21d10838bc5
--- /dev/null
+++ b/Documentation/thermal/exynos_thermal_emulation.rst
@@ -0,0 +1,61 @@
+=====================
+Exynos Emulation Mode
+=====================
+
+Copyright (C) 2012 Samsung Electronics
+
+Written by Jonghwa Lee <jonghwa3.lee@samsung.com>
+
+Description
+-----------
+
+Exynos 4x12 (4212, 4412) and 5 series provide emulation mode for thermal
+management unit. Thermal emulation mode supports software debug for
+TMU's operation. User can set temperature manually with software code
+and TMU will read current temperature from user value not from sensor's
+value.
+
+Enabling CONFIG_THERMAL_EMULATION option will make this support
+available. When it's enabled, sysfs node will be created as
+/sys/devices/virtual/thermal/thermal_zone'zone id'/emul_temp.
+
+The sysfs node, 'emul_node', will contain value 0 for the initial state.
+When you input any temperature you want to update to sysfs node, it
+automatically enable emulation mode and current temperature will be
+changed into it.
+
+(Exynos also supports user changeable delay time which would be used to
+delay of changing temperature. However, this node only uses same delay
+of real sensing time, 938us.)
+
+Exynos emulation mode requires synchronous of value changing and
+enabling. It means when you want to update the any value of delay or
+next temperature, then you have to enable emulation mode at the same
+time. (Or you have to keep the mode enabling.) If you don't, it fails to
+change the value to updated one and just use last succeessful value
+repeatedly. That's why this node gives users the right to change
+termerpature only. Just one interface makes it more simply to use.
+
+Disabling emulation mode only requires writing value 0 to sysfs node.
+
+::
+
+
+  TEMP	120 |
+	    |
+	100 |
+	    |
+	 80 |
+	    |				 +-----------
+	 60 |      			 |	    |
+	    |		   +-------------|          |
+	 40 |              |         	 |          |
+	    |		   |		 |          |
+	 20 |		   |		 |          +----------
+	    |		   |		 |          |          |
+	  0 |______________|_____________|__________|__________|_________
+		   A		 A	    A		       A     TIME
+		   |<----->|	 |<----->|  |<----->|	       |
+		   | 938us |  	 |	 |  |       |          |
+  emulation   : 0  50	   |  	 70      |  20      |          0
+  current temp:   sensor   50		 70         20	      sensor
diff --git a/Documentation/thermal/index.rst b/Documentation/thermal/index.rst
new file mode 100644
index 000000000000..8c1c00146cad
--- /dev/null
+++ b/Documentation/thermal/index.rst
@@ -0,0 +1,18 @@
+:orphan:
+
+=======
+Thermal
+=======
+
+.. toctree::
+   :maxdepth: 1
+
+   cpu-cooling-api
+   sysfs-api
+   power_allocator
+
+   exynos_thermal
+   exynos_thermal_emulation
+   intel_powerclamp
+   nouveau_thermal
+   x86_pkg_temperature_thermal
diff --git a/Documentation/thermal/intel_powerclamp.rst b/Documentation/thermal/intel_powerclamp.rst
new file mode 100644
index 000000000000..3f6dfb0b3ea6
--- /dev/null
+++ b/Documentation/thermal/intel_powerclamp.rst
@@ -0,0 +1,320 @@
+=======================
+Intel Powerclamp Driver
+=======================
+
+By:
+  - Arjan van de Ven <arjan@linux.intel.com>
+  - Jacob Pan <jacob.jun.pan@linux.intel.com>
+
+.. Contents:
+
+	(*) Introduction
+	    - Goals and Objectives
+
+	(*) Theory of Operation
+	    - Idle Injection
+	    - Calibration
+
+	(*) Performance Analysis
+	    - Effectiveness and Limitations
+	    - Power vs Performance
+	    - Scalability
+	    - Calibration
+	    - Comparison with Alternative Techniques
+
+	(*) Usage and Interfaces
+	    - Generic Thermal Layer (sysfs)
+	    - Kernel APIs (TBD)
+
+INTRODUCTION
+============
+
+Consider the situation where a system’s power consumption must be
+reduced at runtime, due to power budget, thermal constraint, or noise
+level, and where active cooling is not preferred. Software managed
+passive power reduction must be performed to prevent the hardware
+actions that are designed for catastrophic scenarios.
+
+Currently, P-states, T-states (clock modulation), and CPU offlining
+are used for CPU throttling.
+
+On Intel CPUs, C-states provide effective power reduction, but so far
+they’re only used opportunistically, based on workload. With the
+development of intel_powerclamp driver, the method of synchronizing
+idle injection across all online CPU threads was introduced. The goal
+is to achieve forced and controllable C-state residency.
+
+Test/Analysis has been made in the areas of power, performance,
+scalability, and user experience. In many cases, clear advantage is
+shown over taking the CPU offline or modulating the CPU clock.
+
+
+THEORY OF OPERATION
+===================
+
+Idle Injection
+--------------
+
+On modern Intel processors (Nehalem or later), package level C-state
+residency is available in MSRs, thus also available to the kernel.
+
+These MSRs are::
+
+      #define MSR_PKG_C2_RESIDENCY      0x60D
+      #define MSR_PKG_C3_RESIDENCY      0x3F8
+      #define MSR_PKG_C6_RESIDENCY      0x3F9
+      #define MSR_PKG_C7_RESIDENCY      0x3FA
+
+If the kernel can also inject idle time to the system, then a
+closed-loop control system can be established that manages package
+level C-state. The intel_powerclamp driver is conceived as such a
+control system, where the target set point is a user-selected idle
+ratio (based on power reduction), and the error is the difference
+between the actual package level C-state residency ratio and the target idle
+ratio.
+
+Injection is controlled by high priority kernel threads, spawned for
+each online CPU.
+
+These kernel threads, with SCHED_FIFO class, are created to perform
+clamping actions of controlled duty ratio and duration. Each per-CPU
+thread synchronizes its idle time and duration, based on the rounding
+of jiffies, so accumulated errors can be prevented to avoid a jittery
+effect. Threads are also bound to the CPU such that they cannot be
+migrated, unless the CPU is taken offline. In this case, threads
+belong to the offlined CPUs will be terminated immediately.
+
+Running as SCHED_FIFO and relatively high priority, also allows such
+scheme to work for both preemptable and non-preemptable kernels.
+Alignment of idle time around jiffies ensures scalability for HZ
+values. This effect can be better visualized using a Perf timechart.
+The following diagram shows the behavior of kernel thread
+kidle_inject/cpu. During idle injection, it runs monitor/mwait idle
+for a given "duration", then relinquishes the CPU to other tasks,
+until the next time interval.
+
+The NOHZ schedule tick is disabled during idle time, but interrupts
+are not masked. Tests show that the extra wakeups from scheduler tick
+have a dramatic impact on the effectiveness of the powerclamp driver
+on large scale systems (Westmere system with 80 processors).
+
+::
+
+  CPU0
+		    ____________          ____________
+  kidle_inject/0   |   sleep    |  mwait |  sleep     |
+	  _________|            |________|            |_______
+				 duration
+  CPU1
+		    ____________          ____________
+  kidle_inject/1   |   sleep    |  mwait |  sleep     |
+	  _________|            |________|            |_______
+				^
+				|
+				|
+				roundup(jiffies, interval)
+
+Only one CPU is allowed to collect statistics and update global
+control parameters. This CPU is referred to as the controlling CPU in
+this document. The controlling CPU is elected at runtime, with a
+policy that favors BSP, taking into account the possibility of a CPU
+hot-plug.
+
+In terms of dynamics of the idle control system, package level idle
+time is considered largely as a non-causal system where its behavior
+cannot be based on the past or current input. Therefore, the
+intel_powerclamp driver attempts to enforce the desired idle time
+instantly as given input (target idle ratio). After injection,
+powerclamp monitors the actual idle for a given time window and adjust
+the next injection accordingly to avoid over/under correction.
+
+When used in a causal control system, such as a temperature control,
+it is up to the user of this driver to implement algorithms where
+past samples and outputs are included in the feedback. For example, a
+PID-based thermal controller can use the powerclamp driver to
+maintain a desired target temperature, based on integral and
+derivative gains of the past samples.
+
+
+
+Calibration
+-----------
+During scalability testing, it is observed that synchronized actions
+among CPUs become challenging as the number of cores grows. This is
+also true for the ability of a system to enter package level C-states.
+
+To make sure the intel_powerclamp driver scales well, online
+calibration is implemented. The goals for doing such a calibration
+are:
+
+a) determine the effective range of idle injection ratio
+b) determine the amount of compensation needed at each target ratio
+
+Compensation to each target ratio consists of two parts:
+
+	a) steady state error compensation
+	This is to offset the error occurring when the system can
+	enter idle without extra wakeups (such as external interrupts).
+
+	b) dynamic error compensation
+	When an excessive amount of wakeups occurs during idle, an
+	additional idle ratio can be added to quiet interrupts, by
+	slowing down CPU activities.
+
+A debugfs file is provided for the user to examine compensation
+progress and results, such as on a Westmere system::
+
+  [jacob@nex01 ~]$ cat
+  /sys/kernel/debug/intel_powerclamp/powerclamp_calib
+  controlling cpu: 0
+  pct confidence steady dynamic (compensation)
+  0       0       0       0
+  1       1       0       0
+  2       1       1       0
+  3       3       1       0
+  4       3       1       0
+  5       3       1       0
+  6       3       1       0
+  7       3       1       0
+  8       3       1       0
+  ...
+  30      3       2       0
+  31      3       2       0
+  32      3       1       0
+  33      3       2       0
+  34      3       1       0
+  35      3       2       0
+  36      3       1       0
+  37      3       2       0
+  38      3       1       0
+  39      3       2       0
+  40      3       3       0
+  41      3       1       0
+  42      3       2       0
+  43      3       1       0
+  44      3       1       0
+  45      3       2       0
+  46      3       3       0
+  47      3       0       0
+  48      3       2       0
+  49      3       3       0
+
+Calibration occurs during runtime. No offline method is available.
+Steady state compensation is used only when confidence levels of all
+adjacent ratios have reached satisfactory level. A confidence level
+is accumulated based on clean data collected at runtime. Data
+collected during a period without extra interrupts is considered
+clean.
+
+To compensate for excessive amounts of wakeup during idle, additional
+idle time is injected when such a condition is detected. Currently,
+we have a simple algorithm to double the injection ratio. A possible
+enhancement might be to throttle the offending IRQ, such as delaying
+EOI for level triggered interrupts. But it is a challenge to be
+non-intrusive to the scheduler or the IRQ core code.
+
+
+CPU Online/Offline
+------------------
+Per-CPU kernel threads are started/stopped upon receiving
+notifications of CPU hotplug activities. The intel_powerclamp driver
+keeps track of clamping kernel threads, even after they are migrated
+to other CPUs, after a CPU offline event.
+
+
+Performance Analysis
+====================
+This section describes the general performance data collected on
+multiple systems, including Westmere (80P) and Ivy Bridge (4P, 8P).
+
+Effectiveness and Limitations
+-----------------------------
+The maximum range that idle injection is allowed is capped at 50
+percent. As mentioned earlier, since interrupts are allowed during
+forced idle time, excessive interrupts could result in less
+effectiveness. The extreme case would be doing a ping -f to generated
+flooded network interrupts without much CPU acknowledgement. In this
+case, little can be done from the idle injection threads. In most
+normal cases, such as scp a large file, applications can be throttled
+by the powerclamp driver, since slowing down the CPU also slows down
+network protocol processing, which in turn reduces interrupts.
+
+When control parameters change at runtime by the controlling CPU, it
+may take an additional period for the rest of the CPUs to catch up
+with the changes. During this time, idle injection is out of sync,
+thus not able to enter package C- states at the expected ratio. But
+this effect is minor, in that in most cases change to the target
+ratio is updated much less frequently than the idle injection
+frequency.
+
+Scalability
+-----------
+Tests also show a minor, but measurable, difference between the 4P/8P
+Ivy Bridge system and the 80P Westmere server under 50% idle ratio.
+More compensation is needed on Westmere for the same amount of
+target idle ratio. The compensation also increases as the idle ratio
+gets larger. The above reason constitutes the need for the
+calibration code.
+
+On the IVB 8P system, compared to an offline CPU, powerclamp can
+achieve up to 40% better performance per watt. (measured by a spin
+counter summed over per CPU counting threads spawned for all running
+CPUs).
+
+Usage and Interfaces
+====================
+The powerclamp driver is registered to the generic thermal layer as a
+cooling device. Currently, it’s not bound to any thermal zones::
+
+  jacob@chromoly:/sys/class/thermal/cooling_device14$ grep . *
+  cur_state:0
+  max_state:50
+  type:intel_powerclamp
+
+cur_state allows user to set the desired idle percentage. Writing 0 to
+cur_state will stop idle injection. Writing a value between 1 and
+max_state will start the idle injection. Reading cur_state returns the
+actual and current idle percentage. This may not be the same value
+set by the user in that current idle percentage depends on workload
+and includes natural idle. When idle injection is disabled, reading
+cur_state returns value -1 instead of 0 which is to avoid confusing
+100% busy state with the disabled state.
+
+Example usage:
+- To inject 25% idle time::
+
+	$ sudo sh -c "echo 25 > /sys/class/thermal/cooling_device80/cur_state
+
+If the system is not busy and has more than 25% idle time already,
+then the powerclamp driver will not start idle injection. Using Top
+will not show idle injection kernel threads.
+
+If the system is busy (spin test below) and has less than 25% natural
+idle time, powerclamp kernel threads will do idle injection. Forced
+idle time is accounted as normal idle in that common code path is
+taken as the idle task.
+
+In this example, 24.1% idle is shown. This helps the system admin or
+user determine the cause of slowdown, when a powerclamp driver is in action::
+
+
+  Tasks: 197 total,   1 running, 196 sleeping,   0 stopped,   0 zombie
+  Cpu(s): 71.2%us,  4.7%sy,  0.0%ni, 24.1%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
+  Mem:   3943228k total,  1689632k used,  2253596k free,    74960k buffers
+  Swap:  4087804k total,        0k used,  4087804k free,   945336k cached
+
+    PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
+   3352 jacob     20   0  262m  644  428 S  286  0.0   0:17.16 spin
+   3341 root     -51   0     0    0    0 D   25  0.0   0:01.62 kidle_inject/0
+   3344 root     -51   0     0    0    0 D   25  0.0   0:01.60 kidle_inject/3
+   3342 root     -51   0     0    0    0 D   25  0.0   0:01.61 kidle_inject/1
+   3343 root     -51   0     0    0    0 D   25  0.0   0:01.60 kidle_inject/2
+   2935 jacob     20   0  696m 125m  35m S    5  3.3   0:31.11 firefox
+   1546 root      20   0  158m  20m 6640 S    3  0.5   0:26.97 Xorg
+   2100 jacob     20   0 1223m  88m  30m S    3  2.3   0:23.68 compiz
+
+Tests have shown that by using the powerclamp driver as a cooling
+device, a PID based userspace thermal controller can manage to
+control CPU temperature effectively, when no other thermal influence
+is added. For example, a UltraBook user can compile the kernel under
+certain temperature (below most active trip points).
diff --git a/Documentation/thermal/intel_powerclamp.txt b/Documentation/thermal/intel_powerclamp.txt
deleted file mode 100644
index b5df21168fbc..000000000000
--- a/Documentation/thermal/intel_powerclamp.txt
+++ /dev/null
@@ -1,317 +0,0 @@
-			 =======================
-			 INTEL POWERCLAMP DRIVER
-			 =======================
-By: Arjan van de Ven <arjan@linux.intel.com>
-    Jacob Pan <jacob.jun.pan@linux.intel.com>
-
-Contents:
-	(*) Introduction
-	    - Goals and Objectives
-
-	(*) Theory of Operation
-	    - Idle Injection
-	    - Calibration
-
-	(*) Performance Analysis
-	    - Effectiveness and Limitations
-	    - Power vs Performance
-	    - Scalability
-	    - Calibration
-	    - Comparison with Alternative Techniques
-
-	(*) Usage and Interfaces
-	    - Generic Thermal Layer (sysfs)
-	    - Kernel APIs (TBD)
-
-============
-INTRODUCTION
-============
-
-Consider the situation where a system’s power consumption must be
-reduced at runtime, due to power budget, thermal constraint, or noise
-level, and where active cooling is not preferred. Software managed
-passive power reduction must be performed to prevent the hardware
-actions that are designed for catastrophic scenarios.
-
-Currently, P-states, T-states (clock modulation), and CPU offlining
-are used for CPU throttling.
-
-On Intel CPUs, C-states provide effective power reduction, but so far
-they’re only used opportunistically, based on workload. With the
-development of intel_powerclamp driver, the method of synchronizing
-idle injection across all online CPU threads was introduced. The goal
-is to achieve forced and controllable C-state residency.
-
-Test/Analysis has been made in the areas of power, performance,
-scalability, and user experience. In many cases, clear advantage is
-shown over taking the CPU offline or modulating the CPU clock.
-
-
-===================
-THEORY OF OPERATION
-===================
-
-Idle Injection
---------------
-
-On modern Intel processors (Nehalem or later), package level C-state
-residency is available in MSRs, thus also available to the kernel.
-
-These MSRs are:
-      #define MSR_PKG_C2_RESIDENCY	0x60D
-      #define MSR_PKG_C3_RESIDENCY	0x3F8
-      #define MSR_PKG_C6_RESIDENCY	0x3F9
-      #define MSR_PKG_C7_RESIDENCY	0x3FA
-
-If the kernel can also inject idle time to the system, then a
-closed-loop control system can be established that manages package
-level C-state. The intel_powerclamp driver is conceived as such a
-control system, where the target set point is a user-selected idle
-ratio (based on power reduction), and the error is the difference
-between the actual package level C-state residency ratio and the target idle
-ratio.
-
-Injection is controlled by high priority kernel threads, spawned for
-each online CPU.
-
-These kernel threads, with SCHED_FIFO class, are created to perform
-clamping actions of controlled duty ratio and duration. Each per-CPU
-thread synchronizes its idle time and duration, based on the rounding
-of jiffies, so accumulated errors can be prevented to avoid a jittery
-effect. Threads are also bound to the CPU such that they cannot be
-migrated, unless the CPU is taken offline. In this case, threads
-belong to the offlined CPUs will be terminated immediately.
-
-Running as SCHED_FIFO and relatively high priority, also allows such
-scheme to work for both preemptable and non-preemptable kernels.
-Alignment of idle time around jiffies ensures scalability for HZ
-values. This effect can be better visualized using a Perf timechart.
-The following diagram shows the behavior of kernel thread
-kidle_inject/cpu. During idle injection, it runs monitor/mwait idle
-for a given "duration", then relinquishes the CPU to other tasks,
-until the next time interval.
-
-The NOHZ schedule tick is disabled during idle time, but interrupts
-are not masked. Tests show that the extra wakeups from scheduler tick
-have a dramatic impact on the effectiveness of the powerclamp driver
-on large scale systems (Westmere system with 80 processors).
-
-CPU0
-		  ____________          ____________
-kidle_inject/0   |   sleep    |  mwait |  sleep     |
-	_________|            |________|            |_______
-			       duration
-CPU1
-		  ____________          ____________
-kidle_inject/1   |   sleep    |  mwait |  sleep     |
-	_________|            |________|            |_______
-			      ^
-			      |
-			      |
-			      roundup(jiffies, interval)
-
-Only one CPU is allowed to collect statistics and update global
-control parameters. This CPU is referred to as the controlling CPU in
-this document. The controlling CPU is elected at runtime, with a
-policy that favors BSP, taking into account the possibility of a CPU
-hot-plug.
-
-In terms of dynamics of the idle control system, package level idle
-time is considered largely as a non-causal system where its behavior
-cannot be based on the past or current input. Therefore, the
-intel_powerclamp driver attempts to enforce the desired idle time
-instantly as given input (target idle ratio). After injection,
-powerclamp monitors the actual idle for a given time window and adjust
-the next injection accordingly to avoid over/under correction.
-
-When used in a causal control system, such as a temperature control,
-it is up to the user of this driver to implement algorithms where
-past samples and outputs are included in the feedback. For example, a
-PID-based thermal controller can use the powerclamp driver to
-maintain a desired target temperature, based on integral and
-derivative gains of the past samples.
-
-
-
-Calibration
------------
-During scalability testing, it is observed that synchronized actions
-among CPUs become challenging as the number of cores grows. This is
-also true for the ability of a system to enter package level C-states.
-
-To make sure the intel_powerclamp driver scales well, online
-calibration is implemented. The goals for doing such a calibration
-are:
-
-a) determine the effective range of idle injection ratio
-b) determine the amount of compensation needed at each target ratio
-
-Compensation to each target ratio consists of two parts:
-
-        a) steady state error compensation
-	This is to offset the error occurring when the system can
-	enter idle without extra wakeups (such as external interrupts).
-
-	b) dynamic error compensation
-	When an excessive amount of wakeups occurs during idle, an
-	additional idle ratio can be added to quiet interrupts, by
-	slowing down CPU activities.
-
-A debugfs file is provided for the user to examine compensation
-progress and results, such as on a Westmere system.
-[jacob@nex01 ~]$ cat
-/sys/kernel/debug/intel_powerclamp/powerclamp_calib
-controlling cpu: 0
-pct confidence steady dynamic (compensation)
-0	0	0	0
-1	1	0	0
-2	1	1	0
-3	3	1	0
-4	3	1	0
-5	3	1	0
-6	3	1	0
-7	3	1	0
-8	3	1	0
-...
-30	3	2	0
-31	3	2	0
-32	3	1	0
-33	3	2	0
-34	3	1	0
-35	3	2	0
-36	3	1	0
-37	3	2	0
-38	3	1	0
-39	3	2	0
-40	3	3	0
-41	3	1	0
-42	3	2	0
-43	3	1	0
-44	3	1	0
-45	3	2	0
-46	3	3	0
-47	3	0	0
-48	3	2	0
-49	3	3	0
-
-Calibration occurs during runtime. No offline method is available.
-Steady state compensation is used only when confidence levels of all
-adjacent ratios have reached satisfactory level. A confidence level
-is accumulated based on clean data collected at runtime. Data
-collected during a period without extra interrupts is considered
-clean.
-
-To compensate for excessive amounts of wakeup during idle, additional
-idle time is injected when such a condition is detected. Currently,
-we have a simple algorithm to double the injection ratio. A possible
-enhancement might be to throttle the offending IRQ, such as delaying
-EOI for level triggered interrupts. But it is a challenge to be
-non-intrusive to the scheduler or the IRQ core code.
-
-
-CPU Online/Offline
-------------------
-Per-CPU kernel threads are started/stopped upon receiving
-notifications of CPU hotplug activities. The intel_powerclamp driver
-keeps track of clamping kernel threads, even after they are migrated
-to other CPUs, after a CPU offline event.
-
-
-=====================
-Performance Analysis
-=====================
-This section describes the general performance data collected on
-multiple systems, including Westmere (80P) and Ivy Bridge (4P, 8P).
-
-Effectiveness and Limitations
------------------------------
-The maximum range that idle injection is allowed is capped at 50
-percent. As mentioned earlier, since interrupts are allowed during
-forced idle time, excessive interrupts could result in less
-effectiveness. The extreme case would be doing a ping -f to generated
-flooded network interrupts without much CPU acknowledgement. In this
-case, little can be done from the idle injection threads. In most
-normal cases, such as scp a large file, applications can be throttled
-by the powerclamp driver, since slowing down the CPU also slows down
-network protocol processing, which in turn reduces interrupts.
-
-When control parameters change at runtime by the controlling CPU, it
-may take an additional period for the rest of the CPUs to catch up
-with the changes. During this time, idle injection is out of sync,
-thus not able to enter package C- states at the expected ratio. But
-this effect is minor, in that in most cases change to the target
-ratio is updated much less frequently than the idle injection
-frequency.
-
-Scalability
------------
-Tests also show a minor, but measurable, difference between the 4P/8P
-Ivy Bridge system and the 80P Westmere server under 50% idle ratio.
-More compensation is needed on Westmere for the same amount of
-target idle ratio. The compensation also increases as the idle ratio
-gets larger. The above reason constitutes the need for the
-calibration code.
-
-On the IVB 8P system, compared to an offline CPU, powerclamp can
-achieve up to 40% better performance per watt. (measured by a spin
-counter summed over per CPU counting threads spawned for all running
-CPUs).
-
-====================
-Usage and Interfaces
-====================
-The powerclamp driver is registered to the generic thermal layer as a
-cooling device. Currently, it’s not bound to any thermal zones.
-
-jacob@chromoly:/sys/class/thermal/cooling_device14$ grep . *
-cur_state:0
-max_state:50
-type:intel_powerclamp
-
-cur_state allows user to set the desired idle percentage. Writing 0 to
-cur_state will stop idle injection. Writing a value between 1 and
-max_state will start the idle injection. Reading cur_state returns the
-actual and current idle percentage. This may not be the same value
-set by the user in that current idle percentage depends on workload
-and includes natural idle. When idle injection is disabled, reading
-cur_state returns value -1 instead of 0 which is to avoid confusing
-100% busy state with the disabled state.
-
-Example usage:
-- To inject 25% idle time
-$ sudo sh -c "echo 25 > /sys/class/thermal/cooling_device80/cur_state
-"
-
-If the system is not busy and has more than 25% idle time already,
-then the powerclamp driver will not start idle injection. Using Top
-will not show idle injection kernel threads.
-
-If the system is busy (spin test below) and has less than 25% natural
-idle time, powerclamp kernel threads will do idle injection. Forced
-idle time is accounted as normal idle in that common code path is
-taken as the idle task.
-
-In this example, 24.1% idle is shown. This helps the system admin or
-user determine the cause of slowdown, when a powerclamp driver is in action.
-
-
-Tasks: 197 total,   1 running, 196 sleeping,   0 stopped,   0 zombie
-Cpu(s): 71.2%us,  4.7%sy,  0.0%ni, 24.1%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
-Mem:   3943228k total,  1689632k used,  2253596k free,    74960k buffers
-Swap:  4087804k total,        0k used,  4087804k free,   945336k cached
-
-  PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
- 3352 jacob     20   0  262m  644  428 S  286  0.0   0:17.16 spin
- 3341 root     -51   0     0    0    0 D   25  0.0   0:01.62 kidle_inject/0
- 3344 root     -51   0     0    0    0 D   25  0.0   0:01.60 kidle_inject/3
- 3342 root     -51   0     0    0    0 D   25  0.0   0:01.61 kidle_inject/1
- 3343 root     -51   0     0    0    0 D   25  0.0   0:01.60 kidle_inject/2
- 2935 jacob     20   0  696m 125m  35m S    5  3.3   0:31.11 firefox
- 1546 root      20   0  158m  20m 6640 S    3  0.5   0:26.97 Xorg
- 2100 jacob     20   0 1223m  88m  30m S    3  2.3   0:23.68 compiz
-
-Tests have shown that by using the powerclamp driver as a cooling
-device, a PID based userspace thermal controller can manage to
-control CPU temperature effectively, when no other thermal influence
-is added. For example, a UltraBook user can compile the kernel under
-certain temperature (below most active trip points).
diff --git a/Documentation/thermal/nouveau_thermal b/Documentation/thermal/nouveau_thermal
deleted file mode 100644
index 6e17a11efcb0..000000000000
--- a/Documentation/thermal/nouveau_thermal
+++ /dev/null
@@ -1,82 +0,0 @@
-Kernel driver nouveau
-===================
-
-Supported chips:
-* NV43+
-
-Authors: Martin Peres (mupuf) <martin.peres@free.fr>
-
-Description
----------
-
-This driver allows to read the GPU core temperature, drive the GPU fan and
-set temperature alarms.
-
-Currently, due to the absence of in-kernel API to access HWMON drivers, Nouveau
-cannot access any of the i2c external monitoring chips it may find. If you
-have one of those, temperature and/or fan management through Nouveau's HWMON
-interface is likely not to work. This document may then not cover your situation
-entirely.
-
-Temperature management
---------------------
-
-Temperature is exposed under as a read-only HWMON attribute temp1_input.
-
-In order to protect the GPU from overheating, Nouveau supports 4 configurable
-temperature thresholds:
-
- * Fan_boost: Fan speed is set to 100% when reaching this temperature;
- * Downclock: The GPU will be downclocked to reduce its power dissipation;
- * Critical: The GPU is put on hold to further lower power dissipation;
- * Shutdown: Shut the computer down to protect your GPU.
-
-WARNING: Some of these thresholds may not be used by Nouveau depending
-on your chipset.
-
-The default value for these thresholds comes from the GPU's vbios. These
-thresholds can be configured thanks to the following HWMON attributes:
-
- * Fan_boost: temp1_auto_point1_temp and temp1_auto_point1_temp_hyst;
- * Downclock: temp1_max and temp1_max_hyst;
- * Critical: temp1_crit and temp1_crit_hyst;
- * Shutdown: temp1_emergency and temp1_emergency_hyst.
-
-NOTE: Remember that the values are stored as milli degrees Celsius. Don't forget
-to multiply!
-
-Fan management
-------------
-
-Not all cards have a drivable fan. If you do, then the following HWMON
-attributes should be available:
-
- * pwm1_enable: Current fan management mode (NONE, MANUAL or AUTO);
- * pwm1: Current PWM value (power percentage);
- * pwm1_min: The minimum PWM speed allowed;
- * pwm1_max: The maximum PWM speed allowed (bypassed when hitting Fan_boost);
-
-You may also have the following attribute:
-
- * fan1_input: Speed in RPM of your fan.
-
-Your fan can be driven in different modes:
-
- * 0: The fan is left untouched;
- * 1: The fan can be driven in manual (use pwm1 to change the speed);
- * 2; The fan is driven automatically depending on the temperature.
-
-NOTE: Be sure to use the manual mode if you want to drive the fan speed manually
-
-NOTE2: When operating in manual mode outside the vbios-defined
-[PWM_min, PWM_max] range, the reported fan speed (RPM) may not be accurate
-depending on your hardware.
-
-Bug reports
----------
-
-Thermal management on Nouveau is new and may not work on all cards. If you have
-inquiries, please ping mupuf on IRC (#nouveau, freenode).
-
-Bug reports should be filled on Freedesktop's bug tracker. Please follow
-http://nouveau.freedesktop.org/wiki/Bugs
diff --git a/Documentation/thermal/nouveau_thermal.rst b/Documentation/thermal/nouveau_thermal.rst
new file mode 100644
index 000000000000..37255fd6735d
--- /dev/null
+++ b/Documentation/thermal/nouveau_thermal.rst
@@ -0,0 +1,96 @@
+=====================
+Kernel driver nouveau
+=====================
+
+Supported chips:
+
+* NV43+
+
+Authors: Martin Peres (mupuf) <martin.peres@free.fr>
+
+Description
+-----------
+
+This driver allows to read the GPU core temperature, drive the GPU fan and
+set temperature alarms.
+
+Currently, due to the absence of in-kernel API to access HWMON drivers, Nouveau
+cannot access any of the i2c external monitoring chips it may find. If you
+have one of those, temperature and/or fan management through Nouveau's HWMON
+interface is likely not to work. This document may then not cover your situation
+entirely.
+
+Temperature management
+----------------------
+
+Temperature is exposed under as a read-only HWMON attribute temp1_input.
+
+In order to protect the GPU from overheating, Nouveau supports 4 configurable
+temperature thresholds:
+
+ * Fan_boost:
+	Fan speed is set to 100% when reaching this temperature;
+ * Downclock:
+	The GPU will be downclocked to reduce its power dissipation;
+ * Critical:
+	The GPU is put on hold to further lower power dissipation;
+ * Shutdown:
+	Shut the computer down to protect your GPU.
+
+WARNING:
+	Some of these thresholds may not be used by Nouveau depending
+	on your chipset.
+
+The default value for these thresholds comes from the GPU's vbios. These
+thresholds can be configured thanks to the following HWMON attributes:
+
+ * Fan_boost: temp1_auto_point1_temp and temp1_auto_point1_temp_hyst;
+ * Downclock: temp1_max and temp1_max_hyst;
+ * Critical: temp1_crit and temp1_crit_hyst;
+ * Shutdown: temp1_emergency and temp1_emergency_hyst.
+
+NOTE: Remember that the values are stored as milli degrees Celsius. Don't forget
+to multiply!
+
+Fan management
+--------------
+
+Not all cards have a drivable fan. If you do, then the following HWMON
+attributes should be available:
+
+ * pwm1_enable:
+	Current fan management mode (NONE, MANUAL or AUTO);
+ * pwm1:
+	Current PWM value (power percentage);
+ * pwm1_min:
+	The minimum PWM speed allowed;
+ * pwm1_max:
+	The maximum PWM speed allowed (bypassed when hitting Fan_boost);
+
+You may also have the following attribute:
+
+ * fan1_input:
+	Speed in RPM of your fan.
+
+Your fan can be driven in different modes:
+
+ * 0: The fan is left untouched;
+ * 1: The fan can be driven in manual (use pwm1 to change the speed);
+ * 2; The fan is driven automatically depending on the temperature.
+
+NOTE:
+  Be sure to use the manual mode if you want to drive the fan speed manually
+
+NOTE2:
+  When operating in manual mode outside the vbios-defined
+  [PWM_min, PWM_max] range, the reported fan speed (RPM) may not be accurate
+  depending on your hardware.
+
+Bug reports
+-----------
+
+Thermal management on Nouveau is new and may not work on all cards. If you have
+inquiries, please ping mupuf on IRC (#nouveau, freenode).
+
+Bug reports should be filled on Freedesktop's bug tracker. Please follow
+http://nouveau.freedesktop.org/wiki/Bugs
diff --git a/Documentation/thermal/power_allocator.rst b/Documentation/thermal/power_allocator.rst
new file mode 100644
index 000000000000..67b6a3297238
--- /dev/null
+++ b/Documentation/thermal/power_allocator.rst
@@ -0,0 +1,271 @@
+=================================
+Power allocator governor tunables
+=================================
+
+Trip points
+-----------
+
+The governor works optimally with the following two passive trip points:
+
+1.  "switch on" trip point: temperature above which the governor
+    control loop starts operating.  This is the first passive trip
+    point of the thermal zone.
+
+2.  "desired temperature" trip point: it should be higher than the
+    "switch on" trip point.  This the target temperature the governor
+    is controlling for.  This is the last passive trip point of the
+    thermal zone.
+
+PID Controller
+--------------
+
+The power allocator governor implements a
+Proportional-Integral-Derivative controller (PID controller) with
+temperature as the control input and power as the controlled output:
+
+    P_max = k_p * e + k_i * err_integral + k_d * diff_err + sustainable_power
+
+where
+   -  e = desired_temperature - current_temperature
+   -  err_integral is the sum of previous errors
+   -  diff_err = e - previous_error
+
+It is similar to the one depicted below::
+
+				      k_d
+				       |
+  current_temp                         |
+       |                               v
+       |              +----------+   +---+
+       |       +----->| diff_err |-->| X |------+
+       |       |      +----------+   +---+      |
+       |       |                                |      tdp        actor
+       |       |                      k_i       |       |  get_requested_power()
+       |       |                       |        |       |        |     |
+       |       |                       |        |       |        |     | ...
+       v       |                       v        v       v        v     v
+     +---+     |      +-------+      +---+    +---+   +---+   +----------+
+     | S |-----+----->| sum e |----->| X |--->| S |-->| S |-->|power     |
+     +---+     |      +-------+      +---+    +---+   +---+   |allocation|
+       ^       |                                ^             +----------+
+       |       |                                |                |     |
+       |       |        +---+                   |                |     |
+       |       +------->| X |-------------------+                v     v
+       |                +---+                               granted performance
+  desired_temperature     ^
+			  |
+			  |
+		      k_po/k_pu
+
+Sustainable power
+-----------------
+
+An estimate of the sustainable dissipatable power (in mW) should be
+provided while registering the thermal zone.  This estimates the
+sustained power that can be dissipated at the desired control
+temperature.  This is the maximum sustained power for allocation at
+the desired maximum temperature.  The actual sustained power can vary
+for a number of reasons.  The closed loop controller will take care of
+variations such as environmental conditions, and some factors related
+to the speed-grade of the silicon.  `sustainable_power` is therefore
+simply an estimate, and may be tuned to affect the aggressiveness of
+the thermal ramp. For reference, the sustainable power of a 4" phone
+is typically 2000mW, while on a 10" tablet is around 4500mW (may vary
+depending on screen size).
+
+If you are using device tree, do add it as a property of the
+thermal-zone.  For example::
+
+	thermal-zones {
+		soc_thermal {
+			polling-delay = <1000>;
+			polling-delay-passive = <100>;
+			sustainable-power = <2500>;
+			...
+
+Instead, if the thermal zone is registered from the platform code, pass a
+`thermal_zone_params` that has a `sustainable_power`.  If no
+`thermal_zone_params` were being passed, then something like below
+will suffice::
+
+	static const struct thermal_zone_params tz_params = {
+		.sustainable_power = 3500,
+	};
+
+and then pass `tz_params` as the 5th parameter to
+`thermal_zone_device_register()`
+
+k_po and k_pu
+-------------
+
+The implementation of the PID controller in the power allocator
+thermal governor allows the configuration of two proportional term
+constants: `k_po` and `k_pu`.  `k_po` is the proportional term
+constant during temperature overshoot periods (current temperature is
+above "desired temperature" trip point).  Conversely, `k_pu` is the
+proportional term constant during temperature undershoot periods
+(current temperature below "desired temperature" trip point).
+
+These controls are intended as the primary mechanism for configuring
+the permitted thermal "ramp" of the system.  For instance, a lower
+`k_pu` value will provide a slower ramp, at the cost of capping
+available capacity at a low temperature.  On the other hand, a high
+value of `k_pu` will result in the governor granting very high power
+while temperature is low, and may lead to temperature overshooting.
+
+The default value for `k_pu` is::
+
+    2 * sustainable_power / (desired_temperature - switch_on_temp)
+
+This means that at `switch_on_temp` the output of the controller's
+proportional term will be 2 * `sustainable_power`.  The default value
+for `k_po` is::
+
+    sustainable_power / (desired_temperature - switch_on_temp)
+
+Focusing on the proportional and feed forward values of the PID
+controller equation we have::
+
+    P_max = k_p * e + sustainable_power
+
+The proportional term is proportional to the difference between the
+desired temperature and the current one.  When the current temperature
+is the desired one, then the proportional component is zero and
+`P_max` = `sustainable_power`.  That is, the system should operate in
+thermal equilibrium under constant load.  `sustainable_power` is only
+an estimate, which is the reason for closed-loop control such as this.
+
+Expanding `k_pu` we get::
+
+    P_max = 2 * sustainable_power * (T_set - T) / (T_set - T_on) +
+	sustainable_power
+
+where:
+
+    - T_set is the desired temperature
+    - T is the current temperature
+    - T_on is the switch on temperature
+
+When the current temperature is the switch_on temperature, the above
+formula becomes::
+
+    P_max = 2 * sustainable_power * (T_set - T_on) / (T_set - T_on) +
+	sustainable_power = 2 * sustainable_power + sustainable_power =
+	3 * sustainable_power
+
+Therefore, the proportional term alone linearly decreases power from
+3 * `sustainable_power` to `sustainable_power` as the temperature
+rises from the switch on temperature to the desired temperature.
+
+k_i and integral_cutoff
+-----------------------
+
+`k_i` configures the PID loop's integral term constant.  This term
+allows the PID controller to compensate for long term drift and for
+the quantized nature of the output control: cooling devices can't set
+the exact power that the governor requests.  When the temperature
+error is below `integral_cutoff`, errors are accumulated in the
+integral term.  This term is then multiplied by `k_i` and the result
+added to the output of the controller.  Typically `k_i` is set low (1
+or 2) and `integral_cutoff` is 0.
+
+k_d
+---
+
+`k_d` configures the PID loop's derivative term constant.  It's
+recommended to leave it as the default: 0.
+
+Cooling device power API
+========================
+
+Cooling devices controlled by this governor must supply the additional
+"power" API in their `cooling_device_ops`.  It consists on three ops:
+
+1. ::
+
+    int get_requested_power(struct thermal_cooling_device *cdev,
+			    struct thermal_zone_device *tz, u32 *power);
+
+
+@cdev:
+	The `struct thermal_cooling_device` pointer
+@tz:
+	thermal zone in which we are currently operating
+@power:
+	pointer in which to store the calculated power
+
+`get_requested_power()` calculates the power requested by the device
+in milliwatts and stores it in @power .  It should return 0 on
+success, -E* on failure.  This is currently used by the power
+allocator governor to calculate how much power to give to each cooling
+device.
+
+2. ::
+
+	int state2power(struct thermal_cooling_device *cdev, struct
+			thermal_zone_device *tz, unsigned long state,
+			u32 *power);
+
+@cdev:
+	The `struct thermal_cooling_device` pointer
+@tz:
+	thermal zone in which we are currently operating
+@state:
+	A cooling device state
+@power:
+	pointer in which to store the equivalent power
+
+Convert cooling device state @state into power consumption in
+milliwatts and store it in @power.  It should return 0 on success, -E*
+on failure.  This is currently used by thermal core to calculate the
+maximum power that an actor can consume.
+
+3. ::
+
+	int power2state(struct thermal_cooling_device *cdev, u32 power,
+			unsigned long *state);
+
+@cdev:
+	The `struct thermal_cooling_device` pointer
+@power:
+	power in milliwatts
+@state:
+	pointer in which to store the resulting state
+
+Calculate a cooling device state that would make the device consume at
+most @power mW and store it in @state.  It should return 0 on success,
+-E* on failure.  This is currently used by the thermal core to convert
+a given power set by the power allocator governor to a state that the
+cooling device can set.  It is a function because this conversion may
+depend on external factors that may change so this function should the
+best conversion given "current circumstances".
+
+Cooling device weights
+----------------------
+
+Weights are a mechanism to bias the allocation among cooling
+devices.  They express the relative power efficiency of different
+cooling devices.  Higher weight can be used to express higher power
+efficiency.  Weighting is relative such that if each cooling device
+has a weight of one they are considered equal.  This is particularly
+useful in heterogeneous systems where two cooling devices may perform
+the same kind of compute, but with different efficiency.  For example,
+a system with two different types of processors.
+
+If the thermal zone is registered using
+`thermal_zone_device_register()` (i.e., platform code), then weights
+are passed as part of the thermal zone's `thermal_bind_parameters`.
+If the platform is registered using device tree, then they are passed
+as the `contribution` property of each map in the `cooling-maps` node.
+
+Limitations of the power allocator governor
+===========================================
+
+The power allocator governor's PID controller works best if there is a
+periodic tick.  If you have a driver that calls
+`thermal_zone_device_update()` (or anything that ends up calling the
+governor's `throttle()` function) repetitively, the governor response
+won't be very good.  Note that this is not particular to this
+governor, step-wise will also misbehave if you call its throttle()
+faster than the normal thermal framework tick (due to interrupts for
+example) as it will overreact.
diff --git a/Documentation/thermal/power_allocator.txt b/Documentation/thermal/power_allocator.txt
deleted file mode 100644
index 9fb0ff06dca9..000000000000
--- a/Documentation/thermal/power_allocator.txt
+++ /dev/null
@@ -1,247 +0,0 @@
-Power allocator governor tunables
-=================================
-
-Trip points
------------
-
-The governor works optimally with the following two passive trip points:
-
-1.  "switch on" trip point: temperature above which the governor
-    control loop starts operating.  This is the first passive trip
-    point of the thermal zone.
-
-2.  "desired temperature" trip point: it should be higher than the
-    "switch on" trip point.  This the target temperature the governor
-    is controlling for.  This is the last passive trip point of the
-    thermal zone.
-
-PID Controller
---------------
-
-The power allocator governor implements a
-Proportional-Integral-Derivative controller (PID controller) with
-temperature as the control input and power as the controlled output:
-
-    P_max = k_p * e + k_i * err_integral + k_d * diff_err + sustainable_power
-
-where
-    e = desired_temperature - current_temperature
-    err_integral is the sum of previous errors
-    diff_err = e - previous_error
-
-It is similar to the one depicted below:
-
-                                      k_d
-                                       |
-current_temp                           |
-     |                                 v
-     |                +----------+   +---+
-     |         +----->| diff_err |-->| X |------+
-     |         |      +----------+   +---+      |
-     |         |                                |      tdp        actor
-     |         |                      k_i       |       |  get_requested_power()
-     |         |                       |        |       |        |     |
-     |         |                       |        |       |        |     | ...
-     v         |                       v        v       v        v     v
-   +---+       |      +-------+      +---+    +---+   +---+   +----------+
-   | S |-------+----->| sum e |----->| X |--->| S |-->| S |-->|power     |
-   +---+       |      +-------+      +---+    +---+   +---+   |allocation|
-     ^         |                                ^             +----------+
-     |         |                                |                |     |
-     |         |        +---+                   |                |     |
-     |         +------->| X |-------------------+                v     v
-     |                  +---+                               granted performance
-desired_temperature       ^
-                          |
-                          |
-                      k_po/k_pu
-
-Sustainable power
------------------
-
-An estimate of the sustainable dissipatable power (in mW) should be
-provided while registering the thermal zone.  This estimates the
-sustained power that can be dissipated at the desired control
-temperature.  This is the maximum sustained power for allocation at
-the desired maximum temperature.  The actual sustained power can vary
-for a number of reasons.  The closed loop controller will take care of
-variations such as environmental conditions, and some factors related
-to the speed-grade of the silicon.  `sustainable_power` is therefore
-simply an estimate, and may be tuned to affect the aggressiveness of
-the thermal ramp. For reference, the sustainable power of a 4" phone
-is typically 2000mW, while on a 10" tablet is around 4500mW (may vary
-depending on screen size).
-
-If you are using device tree, do add it as a property of the
-thermal-zone.  For example:
-
-	thermal-zones {
-		soc_thermal {
-			polling-delay = <1000>;
-			polling-delay-passive = <100>;
-			sustainable-power = <2500>;
-			...
-
-Instead, if the thermal zone is registered from the platform code, pass a
-`thermal_zone_params` that has a `sustainable_power`.  If no
-`thermal_zone_params` were being passed, then something like below
-will suffice:
-
-	static const struct thermal_zone_params tz_params = {
-		.sustainable_power = 3500,
-	};
-
-and then pass `tz_params` as the 5th parameter to
-`thermal_zone_device_register()`
-
-k_po and k_pu
--------------
-
-The implementation of the PID controller in the power allocator
-thermal governor allows the configuration of two proportional term
-constants: `k_po` and `k_pu`.  `k_po` is the proportional term
-constant during temperature overshoot periods (current temperature is
-above "desired temperature" trip point).  Conversely, `k_pu` is the
-proportional term constant during temperature undershoot periods
-(current temperature below "desired temperature" trip point).
-
-These controls are intended as the primary mechanism for configuring
-the permitted thermal "ramp" of the system.  For instance, a lower
-`k_pu` value will provide a slower ramp, at the cost of capping
-available capacity at a low temperature.  On the other hand, a high
-value of `k_pu` will result in the governor granting very high power
-while temperature is low, and may lead to temperature overshooting.
-
-The default value for `k_pu` is:
-
-    2 * sustainable_power / (desired_temperature - switch_on_temp)
-
-This means that at `switch_on_temp` the output of the controller's
-proportional term will be 2 * `sustainable_power`.  The default value
-for `k_po` is:
-
-    sustainable_power / (desired_temperature - switch_on_temp)
-
-Focusing on the proportional and feed forward values of the PID
-controller equation we have:
-
-    P_max = k_p * e + sustainable_power
-
-The proportional term is proportional to the difference between the
-desired temperature and the current one.  When the current temperature
-is the desired one, then the proportional component is zero and
-`P_max` = `sustainable_power`.  That is, the system should operate in
-thermal equilibrium under constant load.  `sustainable_power` is only
-an estimate, which is the reason for closed-loop control such as this.
-
-Expanding `k_pu` we get:
-    P_max = 2 * sustainable_power * (T_set - T) / (T_set - T_on) +
-        sustainable_power
-
-where
-    T_set is the desired temperature
-    T is the current temperature
-    T_on is the switch on temperature
-
-When the current temperature is the switch_on temperature, the above
-formula becomes:
-
-    P_max = 2 * sustainable_power * (T_set - T_on) / (T_set - T_on) +
-        sustainable_power = 2 * sustainable_power + sustainable_power =
-        3 * sustainable_power
-
-Therefore, the proportional term alone linearly decreases power from
-3 * `sustainable_power` to `sustainable_power` as the temperature
-rises from the switch on temperature to the desired temperature.
-
-k_i and integral_cutoff
------------------------
-
-`k_i` configures the PID loop's integral term constant.  This term
-allows the PID controller to compensate for long term drift and for
-the quantized nature of the output control: cooling devices can't set
-the exact power that the governor requests.  When the temperature
-error is below `integral_cutoff`, errors are accumulated in the
-integral term.  This term is then multiplied by `k_i` and the result
-added to the output of the controller.  Typically `k_i` is set low (1
-or 2) and `integral_cutoff` is 0.
-
-k_d
----
-
-`k_d` configures the PID loop's derivative term constant.  It's
-recommended to leave it as the default: 0.
-
-Cooling device power API
-========================
-
-Cooling devices controlled by this governor must supply the additional
-"power" API in their `cooling_device_ops`.  It consists on three ops:
-
-1. int get_requested_power(struct thermal_cooling_device *cdev,
-	struct thermal_zone_device *tz, u32 *power);
-@cdev: The `struct thermal_cooling_device` pointer
-@tz: thermal zone in which we are currently operating
-@power: pointer in which to store the calculated power
-
-`get_requested_power()` calculates the power requested by the device
-in milliwatts and stores it in @power .  It should return 0 on
-success, -E* on failure.  This is currently used by the power
-allocator governor to calculate how much power to give to each cooling
-device.
-
-2. int state2power(struct thermal_cooling_device *cdev, struct
-        thermal_zone_device *tz, unsigned long state, u32 *power);
-@cdev: The `struct thermal_cooling_device` pointer
-@tz: thermal zone in which we are currently operating
-@state: A cooling device state
-@power: pointer in which to store the equivalent power
-
-Convert cooling device state @state into power consumption in
-milliwatts and store it in @power.  It should return 0 on success, -E*
-on failure.  This is currently used by thermal core to calculate the
-maximum power that an actor can consume.
-
-3. int power2state(struct thermal_cooling_device *cdev, u32 power,
-	unsigned long *state);
-@cdev: The `struct thermal_cooling_device` pointer
-@power: power in milliwatts
-@state: pointer in which to store the resulting state
-
-Calculate a cooling device state that would make the device consume at
-most @power mW and store it in @state.  It should return 0 on success,
--E* on failure.  This is currently used by the thermal core to convert
-a given power set by the power allocator governor to a state that the
-cooling device can set.  It is a function because this conversion may
-depend on external factors that may change so this function should the
-best conversion given "current circumstances".
-
-Cooling device weights
-----------------------
-
-Weights are a mechanism to bias the allocation among cooling
-devices.  They express the relative power efficiency of different
-cooling devices.  Higher weight can be used to express higher power
-efficiency.  Weighting is relative such that if each cooling device
-has a weight of one they are considered equal.  This is particularly
-useful in heterogeneous systems where two cooling devices may perform
-the same kind of compute, but with different efficiency.  For example,
-a system with two different types of processors.
-
-If the thermal zone is registered using
-`thermal_zone_device_register()` (i.e., platform code), then weights
-are passed as part of the thermal zone's `thermal_bind_parameters`.
-If the platform is registered using device tree, then they are passed
-as the `contribution` property of each map in the `cooling-maps` node.
-
-Limitations of the power allocator governor
-===========================================
-
-The power allocator governor's PID controller works best if there is a
-periodic tick.  If you have a driver that calls
-`thermal_zone_device_update()` (or anything that ends up calling the
-governor's `throttle()` function) repetitively, the governor response
-won't be very good.  Note that this is not particular to this
-governor, step-wise will also misbehave if you call its throttle()
-faster than the normal thermal framework tick (due to interrupts for
-example) as it will overreact.
diff --git a/Documentation/thermal/sysfs-api.rst b/Documentation/thermal/sysfs-api.rst
new file mode 100644
index 000000000000..e4930761d3e5
--- /dev/null
+++ b/Documentation/thermal/sysfs-api.rst
@@ -0,0 +1,798 @@
+===================================
+Generic Thermal Sysfs driver How To
+===================================
+
+Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com>
+
+Updated: 2 January 2008
+
+Copyright (c)  2008 Intel Corporation
+
+
+0. Introduction
+===============
+
+The generic thermal sysfs provides a set of interfaces for thermal zone
+devices (sensors) and thermal cooling devices (fan, processor...) to register
+with the thermal management solution and to be a part of it.
+
+This how-to focuses on enabling new thermal zone and cooling devices to
+participate in thermal management.
+This solution is platform independent and any type of thermal zone devices
+and cooling devices should be able to make use of the infrastructure.
+
+The main task of the thermal sysfs driver is to expose thermal zone attributes
+as well as cooling device attributes to the user space.
+An intelligent thermal management application can make decisions based on
+inputs from thermal zone attributes (the current temperature and trip point
+temperature) and throttle appropriate devices.
+
+- `[0-*]`	denotes any positive number starting from 0
+- `[1-*]`	denotes any positive number starting from 1
+
+1. thermal sysfs driver interface functions
+===========================================
+
+1.1 thermal zone device interface
+---------------------------------
+
+    ::
+
+	struct thermal_zone_device
+	*thermal_zone_device_register(char *type,
+				      int trips, int mask, void *devdata,
+				      struct thermal_zone_device_ops *ops,
+				      const struct thermal_zone_params *tzp,
+				      int passive_delay, int polling_delay))
+
+    This interface function adds a new thermal zone device (sensor) to
+    /sys/class/thermal folder as `thermal_zone[0-*]`. It tries to bind all the
+    thermal cooling devices registered at the same time.
+
+    type:
+	the thermal zone type.
+    trips:
+	the total number of trip points this thermal zone supports.
+    mask:
+	Bit string: If 'n'th bit is set, then trip point 'n' is writeable.
+    devdata:
+	device private data
+    ops:
+	thermal zone device call-backs.
+
+	.bind:
+		bind the thermal zone device with a thermal cooling device.
+	.unbind:
+		unbind the thermal zone device with a thermal cooling device.
+	.get_temp:
+		get the current temperature of the thermal zone.
+	.set_trips:
+		    set the trip points window. Whenever the current temperature
+		    is updated, the trip points immediately below and above the
+		    current temperature are found.
+	.get_mode:
+		   get the current mode (enabled/disabled) of the thermal zone.
+
+			- "enabled" means the kernel thermal management is
+			  enabled.
+			- "disabled" will prevent kernel thermal driver action
+			  upon trip points so that user applications can take
+			  charge of thermal management.
+	.set_mode:
+		set the mode (enabled/disabled) of the thermal zone.
+	.get_trip_type:
+		get the type of certain trip point.
+	.get_trip_temp:
+			get the temperature above which the certain trip point
+			will be fired.
+	.set_emul_temp:
+			set the emulation temperature which helps in debugging
+			different threshold temperature points.
+    tzp:
+	thermal zone platform parameters.
+    passive_delay:
+	number of milliseconds to wait between polls when
+	performing passive cooling.
+    polling_delay:
+	number of milliseconds to wait between polls when checking
+	whether trip points have been crossed (0 for interrupt driven systems).
+
+    ::
+
+	void thermal_zone_device_unregister(struct thermal_zone_device *tz)
+
+    This interface function removes the thermal zone device.
+    It deletes the corresponding entry from /sys/class/thermal folder and
+    unbinds all the thermal cooling devices it uses.
+
+	::
+
+	   struct thermal_zone_device
+	   *thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
+				void *data,
+				const struct thermal_zone_of_device_ops *ops)
+
+	This interface adds a new sensor to a DT thermal zone.
+	This function will search the list of thermal zones described in
+	device tree and look for the zone that refer to the sensor device
+	pointed by dev->of_node as temperature providers. For the zone
+	pointing to the sensor node, the sensor will be added to the DT
+	thermal zone device.
+
+	The parameters for this interface are:
+
+	dev:
+			Device node of sensor containing valid node pointer in
+			dev->of_node.
+	sensor_id:
+			a sensor identifier, in case the sensor IP has more
+			than one sensors
+	data:
+			a private pointer (owned by the caller) that will be
+			passed back, when a temperature reading is needed.
+	ops:
+			`struct thermal_zone_of_device_ops *`.
+
+			==============  =======================================
+			get_temp	a pointer to a function that reads the
+					sensor temperature. This is mandatory
+					callback provided by sensor driver.
+			set_trips	a pointer to a function that sets a
+					temperature window. When this window is
+					left the driver must inform the thermal
+					core via thermal_zone_device_update.
+			get_trend 	a pointer to a function that reads the
+					sensor temperature trend.
+			set_emul_temp	a pointer to a function that sets
+					sensor emulated temperature.
+			==============  =======================================
+
+	The thermal zone temperature is provided by the get_temp() function
+	pointer of thermal_zone_of_device_ops. When called, it will
+	have the private pointer @data back.
+
+	It returns error pointer if fails otherwise valid thermal zone device
+	handle. Caller should check the return handle with IS_ERR() for finding
+	whether success or not.
+
+	::
+
+	    void thermal_zone_of_sensor_unregister(struct device *dev,
+						   struct thermal_zone_device *tzd)
+
+	This interface unregisters a sensor from a DT thermal zone which was
+	successfully added by interface thermal_zone_of_sensor_register().
+	This function removes the sensor callbacks and private data from the
+	thermal zone device registered with thermal_zone_of_sensor_register()
+	interface. It will also silent the zone by remove the .get_temp() and
+	get_trend() thermal zone device callbacks.
+
+	::
+
+	  struct thermal_zone_device
+	  *devm_thermal_zone_of_sensor_register(struct device *dev,
+				int sensor_id,
+				void *data,
+				const struct thermal_zone_of_device_ops *ops)
+
+	This interface is resource managed version of
+	thermal_zone_of_sensor_register().
+
+	All details of thermal_zone_of_sensor_register() described in
+	section 1.1.3 is applicable here.
+
+	The benefit of using this interface to register sensor is that it
+	is not require to explicitly call thermal_zone_of_sensor_unregister()
+	in error path or during driver unbinding as this is done by driver
+	resource manager.
+
+	::
+
+		void devm_thermal_zone_of_sensor_unregister(struct device *dev,
+						struct thermal_zone_device *tzd)
+
+	This interface is resource managed version of
+	thermal_zone_of_sensor_unregister().
+	All details of thermal_zone_of_sensor_unregister() described in
+	section 1.1.4 is applicable here.
+	Normally this function will not need to be called and the resource
+	management code will ensure that the resource is freed.
+
+	::
+
+		int thermal_zone_get_slope(struct thermal_zone_device *tz)
+
+	This interface is used to read the slope attribute value
+	for the thermal zone device, which might be useful for platform
+	drivers for temperature calculations.
+
+	::
+
+		int thermal_zone_get_offset(struct thermal_zone_device *tz)
+
+	This interface is used to read the offset attribute value
+	for the thermal zone device, which might be useful for platform
+	drivers for temperature calculations.
+
+1.2 thermal cooling device interface
+------------------------------------
+
+
+    ::
+
+	struct thermal_cooling_device
+	*thermal_cooling_device_register(char *name,
+			void *devdata, struct thermal_cooling_device_ops *)
+
+    This interface function adds a new thermal cooling device (fan/processor/...)
+    to /sys/class/thermal/ folder as `cooling_device[0-*]`. It tries to bind itself
+    to all the thermal zone devices registered at the same time.
+
+    name:
+	the cooling device name.
+    devdata:
+	device private data.
+    ops:
+	thermal cooling devices call-backs.
+
+	.get_max_state:
+		get the Maximum throttle state of the cooling device.
+	.get_cur_state:
+		get the Currently requested throttle state of the
+		cooling device.
+	.set_cur_state:
+		set the Current throttle state of the cooling device.
+
+    ::
+
+	void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
+
+    This interface function removes the thermal cooling device.
+    It deletes the corresponding entry from /sys/class/thermal folder and
+    unbinds itself from all the thermal zone devices using it.
+
+1.3 interface for binding a thermal zone device with a thermal cooling device
+-----------------------------------------------------------------------------
+
+    ::
+
+	int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
+		int trip, struct thermal_cooling_device *cdev,
+		unsigned long upper, unsigned long lower, unsigned int weight);
+
+    This interface function binds a thermal cooling device to a particular trip
+    point of a thermal zone device.
+
+    This function is usually called in the thermal zone device .bind callback.
+
+    tz:
+	  the thermal zone device
+    cdev:
+	  thermal cooling device
+    trip:
+	  indicates which trip point in this thermal zone the cooling device
+	  is associated with.
+    upper:
+	  the Maximum cooling state for this trip point.
+	  THERMAL_NO_LIMIT means no upper limit,
+	  and the cooling device can be in max_state.
+    lower:
+	  the Minimum cooling state can be used for this trip point.
+	  THERMAL_NO_LIMIT means no lower limit,
+	  and the cooling device can be in cooling state 0.
+    weight:
+	  the influence of this cooling device in this thermal
+	  zone.  See 1.4.1 below for more information.
+
+    ::
+
+	int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz,
+				int trip, struct thermal_cooling_device *cdev);
+
+    This interface function unbinds a thermal cooling device from a particular
+    trip point of a thermal zone device. This function is usually called in
+    the thermal zone device .unbind callback.
+
+    tz:
+	the thermal zone device
+    cdev:
+	thermal cooling device
+    trip:
+	indicates which trip point in this thermal zone the cooling device
+	is associated with.
+
+1.4 Thermal Zone Parameters
+---------------------------
+
+    ::
+
+	struct thermal_bind_params
+
+    This structure defines the following parameters that are used to bind
+    a zone with a cooling device for a particular trip point.
+
+    .cdev:
+	     The cooling device pointer
+    .weight:
+	     The 'influence' of a particular cooling device on this
+	     zone. This is relative to the rest of the cooling
+	     devices. For example, if all cooling devices have a
+	     weight of 1, then they all contribute the same. You can
+	     use percentages if you want, but it's not mandatory. A
+	     weight of 0 means that this cooling device doesn't
+	     contribute to the cooling of this zone unless all cooling
+	     devices have a weight of 0. If all weights are 0, then
+	     they all contribute the same.
+    .trip_mask:
+	       This is a bit mask that gives the binding relation between
+	       this thermal zone and cdev, for a particular trip point.
+	       If nth bit is set, then the cdev and thermal zone are bound
+	       for trip point n.
+    .binding_limits:
+		     This is an array of cooling state limits. Must have
+		     exactly 2 * thermal_zone.number_of_trip_points. It is an
+		     array consisting of tuples <lower-state upper-state> of
+		     state limits. Each trip will be associated with one state
+		     limit tuple when binding. A NULL pointer means
+		     <THERMAL_NO_LIMITS THERMAL_NO_LIMITS> on all trips.
+		     These limits are used when binding a cdev to a trip point.
+    .match:
+	    This call back returns success(0) if the 'tz and cdev' need to
+	    be bound, as per platform data.
+
+    ::
+
+	struct thermal_zone_params
+
+    This structure defines the platform level parameters for a thermal zone.
+    This data, for each thermal zone should come from the platform layer.
+    This is an optional feature where some platforms can choose not to
+    provide this data.
+
+    .governor_name:
+	       Name of the thermal governor used for this zone
+    .no_hwmon:
+	       a boolean to indicate if the thermal to hwmon sysfs interface
+	       is required. when no_hwmon == false, a hwmon sysfs interface
+	       will be created. when no_hwmon == true, nothing will be done.
+	       In case the thermal_zone_params is NULL, the hwmon interface
+	       will be created (for backward compatibility).
+    .num_tbps:
+	       Number of thermal_bind_params entries for this zone
+    .tbp:
+	       thermal_bind_params entries
+
+2. sysfs attributes structure
+=============================
+
+==	================
+RO	read only value
+WO	write only value
+RW	read/write value
+==	================
+
+Thermal sysfs attributes will be represented under /sys/class/thermal.
+Hwmon sysfs I/F extension is also available under /sys/class/hwmon
+if hwmon is compiled in or built as a module.
+
+Thermal zone device sys I/F, created once it's registered::
+
+  /sys/class/thermal/thermal_zone[0-*]:
+    |---type:			Type of the thermal zone
+    |---temp:			Current temperature
+    |---mode:			Working mode of the thermal zone
+    |---policy:			Thermal governor used for this zone
+    |---available_policies:	Available thermal governors for this zone
+    |---trip_point_[0-*]_temp:	Trip point temperature
+    |---trip_point_[0-*]_type:	Trip point type
+    |---trip_point_[0-*]_hyst:	Hysteresis value for this trip point
+    |---emul_temp:		Emulated temperature set node
+    |---sustainable_power:      Sustainable dissipatable power
+    |---k_po:                   Proportional term during temperature overshoot
+    |---k_pu:                   Proportional term during temperature undershoot
+    |---k_i:                    PID's integral term in the power allocator gov
+    |---k_d:                    PID's derivative term in the power allocator
+    |---integral_cutoff:        Offset above which errors are accumulated
+    |---slope:                  Slope constant applied as linear extrapolation
+    |---offset:                 Offset constant applied as linear extrapolation
+
+Thermal cooling device sys I/F, created once it's registered::
+
+  /sys/class/thermal/cooling_device[0-*]:
+    |---type:			Type of the cooling device(processor/fan/...)
+    |---max_state:		Maximum cooling state of the cooling device
+    |---cur_state:		Current cooling state of the cooling device
+    |---stats:			Directory containing cooling device's statistics
+    |---stats/reset:		Writing any value resets the statistics
+    |---stats/time_in_state_ms:	Time (msec) spent in various cooling states
+    |---stats/total_trans:	Total number of times cooling state is changed
+    |---stats/trans_table:	Cooing state transition table
+
+
+Then next two dynamic attributes are created/removed in pairs. They represent
+the relationship between a thermal zone and its associated cooling device.
+They are created/removed for each successful execution of
+thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device.
+
+::
+
+  /sys/class/thermal/thermal_zone[0-*]:
+    |---cdev[0-*]:		[0-*]th cooling device in current thermal zone
+    |---cdev[0-*]_trip_point:	Trip point that cdev[0-*] is associated with
+    |---cdev[0-*]_weight:       Influence of the cooling device in
+				this thermal zone
+
+Besides the thermal zone device sysfs I/F and cooling device sysfs I/F,
+the generic thermal driver also creates a hwmon sysfs I/F for each _type_
+of thermal zone device. E.g. the generic thermal driver registers one hwmon
+class device and build the associated hwmon sysfs I/F for all the registered
+ACPI thermal zones.
+
+::
+
+  /sys/class/hwmon/hwmon[0-*]:
+    |---name:			The type of the thermal zone devices
+    |---temp[1-*]_input:	The current temperature of thermal zone [1-*]
+    |---temp[1-*]_critical:	The critical trip point of thermal zone [1-*]
+
+Please read Documentation/hwmon/sysfs-interface.rst for additional information.
+
+Thermal zone attributes
+-----------------------
+
+type
+	Strings which represent the thermal zone type.
+	This is given by thermal zone driver as part of registration.
+	E.g: "acpitz" indicates it's an ACPI thermal device.
+	In order to keep it consistent with hwmon sys attribute; this should
+	be a short, lowercase string, not containing spaces nor dashes.
+	RO, Required
+
+temp
+	Current temperature as reported by thermal zone (sensor).
+	Unit: millidegree Celsius
+	RO, Required
+
+mode
+	One of the predefined values in [enabled, disabled].
+	This file gives information about the algorithm that is currently
+	managing the thermal zone. It can be either default kernel based
+	algorithm or user space application.
+
+	enabled
+			  enable Kernel Thermal management.
+	disabled
+			  Preventing kernel thermal zone driver actions upon
+			  trip points so that user application can take full
+			  charge of the thermal management.
+
+	RW, Optional
+
+policy
+	One of the various thermal governors used for a particular zone.
+
+	RW, Required
+
+available_policies
+	Available thermal governors which can be used for a particular zone.
+
+	RO, Required
+
+`trip_point_[0-*]_temp`
+	The temperature above which trip point will be fired.
+
+	Unit: millidegree Celsius
+
+	RO, Optional
+
+`trip_point_[0-*]_type`
+	Strings which indicate the type of the trip point.
+
+	E.g. it can be one of critical, hot, passive, `active[0-*]` for ACPI
+	thermal zone.
+
+	RO, Optional
+
+`trip_point_[0-*]_hyst`
+	The hysteresis value for a trip point, represented as an integer
+	Unit: Celsius
+	RW, Optional
+
+`cdev[0-*]`
+	Sysfs link to the thermal cooling device node where the sys I/F
+	for cooling device throttling control represents.
+
+	RO, Optional
+
+`cdev[0-*]_trip_point`
+	The trip point in this thermal zone which `cdev[0-*]` is associated
+	with; -1 means the cooling device is not associated with any trip
+	point.
+
+	RO, Optional
+
+`cdev[0-*]_weight`
+	The influence of `cdev[0-*]` in this thermal zone. This value
+	is relative to the rest of cooling devices in the thermal
+	zone. For example, if a cooling device has a weight double
+	than that of other, it's twice as effective in cooling the
+	thermal zone.
+
+	RW, Optional
+
+passive
+	Attribute is only present for zones in which the passive cooling
+	policy is not supported by native thermal driver. Default is zero
+	and can be set to a temperature (in millidegrees) to enable a
+	passive trip point for the zone. Activation is done by polling with
+	an interval of 1 second.
+
+	Unit: millidegrees Celsius
+
+	Valid values: 0 (disabled) or greater than 1000
+
+	RW, Optional
+
+emul_temp
+	Interface to set the emulated temperature method in thermal zone
+	(sensor). After setting this temperature, the thermal zone may pass
+	this temperature to platform emulation function if registered or
+	cache it locally. This is useful in debugging different temperature
+	threshold and its associated cooling action. This is write only node
+	and writing 0 on this node should disable emulation.
+	Unit: millidegree Celsius
+
+	WO, Optional
+
+	  WARNING:
+	    Be careful while enabling this option on production systems,
+	    because userland can easily disable the thermal policy by simply
+	    flooding this sysfs node with low temperature values.
+
+sustainable_power
+	An estimate of the sustained power that can be dissipated by
+	the thermal zone. Used by the power allocator governor. For
+	more information see Documentation/thermal/power_allocator.rst
+
+	Unit: milliwatts
+
+	RW, Optional
+
+k_po
+	The proportional term of the power allocator governor's PID
+	controller during temperature overshoot. Temperature overshoot
+	is when the current temperature is above the "desired
+	temperature" trip point. For more information see
+	Documentation/thermal/power_allocator.rst
+
+	RW, Optional
+
+k_pu
+	The proportional term of the power allocator governor's PID
+	controller during temperature undershoot. Temperature undershoot
+	is when the current temperature is below the "desired
+	temperature" trip point. For more information see
+	Documentation/thermal/power_allocator.rst
+
+	RW, Optional
+
+k_i
+	The integral term of the power allocator governor's PID
+	controller. This term allows the PID controller to compensate
+	for long term drift. For more information see
+	Documentation/thermal/power_allocator.rst
+
+	RW, Optional
+
+k_d
+	The derivative term of the power allocator governor's PID
+	controller. For more information see
+	Documentation/thermal/power_allocator.rst
+
+	RW, Optional
+
+integral_cutoff
+	Temperature offset from the desired temperature trip point
+	above which the integral term of the power allocator
+	governor's PID controller starts accumulating errors. For
+	example, if integral_cutoff is 0, then the integral term only
+	accumulates error when temperature is above the desired
+	temperature trip point. For more information see
+	Documentation/thermal/power_allocator.rst
+
+	Unit: millidegree Celsius
+
+	RW, Optional
+
+slope
+	The slope constant used in a linear extrapolation model
+	to determine a hotspot temperature based off the sensor's
+	raw readings. It is up to the device driver to determine
+	the usage of these values.
+
+	RW, Optional
+
+offset
+	The offset constant used in a linear extrapolation model
+	to determine a hotspot temperature based off the sensor's
+	raw readings. It is up to the device driver to determine
+	the usage of these values.
+
+	RW, Optional
+
+Cooling device attributes
+-------------------------
+
+type
+	String which represents the type of device, e.g:
+
+	- for generic ACPI: should be "Fan", "Processor" or "LCD"
+	- for memory controller device on intel_menlow platform:
+	  should be "Memory controller".
+
+	RO, Required
+
+max_state
+	The maximum permissible cooling state of this cooling device.
+
+	RO, Required
+
+cur_state
+	The current cooling state of this cooling device.
+	The value can any integer numbers between 0 and max_state:
+
+	- cur_state == 0 means no cooling
+	- cur_state == max_state means the maximum cooling.
+
+	RW, Required
+
+stats/reset
+	Writing any value resets the cooling device's statistics.
+	WO, Required
+
+stats/time_in_state_ms:
+	The amount of time spent by the cooling device in various cooling
+	states. The output will have "<state> <time>" pair in each line, which
+	will mean this cooling device spent <time> msec of time at <state>.
+	Output will have one line for each of the supported states.  usertime
+	units here is 10mS (similar to other time exported in /proc).
+	RO, Required
+
+
+stats/total_trans:
+	A single positive value showing the total number of times the state of a
+	cooling device is changed.
+
+	RO, Required
+
+stats/trans_table:
+	This gives fine grained information about all the cooling state
+	transitions. The cat output here is a two dimensional matrix, where an
+	entry <i,j> (row i, column j) represents the number of transitions from
+	State_i to State_j. If the transition table is bigger than PAGE_SIZE,
+	reading this will return an -EFBIG error.
+	RO, Required
+
+3. A simple implementation
+==========================
+
+ACPI thermal zone may support multiple trip points like critical, hot,
+passive, active. If an ACPI thermal zone supports critical, passive,
+active[0] and active[1] at the same time, it may register itself as a
+thermal_zone_device (thermal_zone1) with 4 trip points in all.
+It has one processor and one fan, which are both registered as
+thermal_cooling_device. Both are considered to have the same
+effectiveness in cooling the thermal zone.
+
+If the processor is listed in _PSL method, and the fan is listed in _AL0
+method, the sys I/F structure will be built like this::
+
+ /sys/class/thermal:
+  |thermal_zone1:
+    |---type:			acpitz
+    |---temp:			37000
+    |---mode:			enabled
+    |---policy:			step_wise
+    |---available_policies:	step_wise fair_share
+    |---trip_point_0_temp:	100000
+    |---trip_point_0_type:	critical
+    |---trip_point_1_temp:	80000
+    |---trip_point_1_type:	passive
+    |---trip_point_2_temp:	70000
+    |---trip_point_2_type:	active0
+    |---trip_point_3_temp:	60000
+    |---trip_point_3_type:	active1
+    |---cdev0:			--->/sys/class/thermal/cooling_device0
+    |---cdev0_trip_point:	1	/* cdev0 can be used for passive */
+    |---cdev0_weight:           1024
+    |---cdev1:			--->/sys/class/thermal/cooling_device3
+    |---cdev1_trip_point:	2	/* cdev1 can be used for active[0]*/
+    |---cdev1_weight:           1024
+
+  |cooling_device0:
+    |---type:			Processor
+    |---max_state:		8
+    |---cur_state:		0
+
+  |cooling_device3:
+    |---type:			Fan
+    |---max_state:		2
+    |---cur_state:		0
+
+ /sys/class/hwmon:
+  |hwmon0:
+    |---name:			acpitz
+    |---temp1_input:		37000
+    |---temp1_crit:		100000
+
+4. Event Notification
+=====================
+
+The framework includes a simple notification mechanism, in the form of a
+netlink event. Netlink socket initialization is done during the _init_
+of the framework. Drivers which intend to use the notification mechanism
+just need to call thermal_generate_netlink_event() with two arguments viz
+(originator, event). The originator is a pointer to struct thermal_zone_device
+from where the event has been originated. An integer which represents the
+thermal zone device will be used in the message to identify the zone. The
+event will be one of:{THERMAL_AUX0, THERMAL_AUX1, THERMAL_CRITICAL,
+THERMAL_DEV_FAULT}. Notification can be sent when the current temperature
+crosses any of the configured thresholds.
+
+5. Export Symbol APIs
+=====================
+
+5.1. get_tz_trend
+-----------------
+
+This function returns the trend of a thermal zone, i.e the rate of change
+of temperature of the thermal zone. Ideally, the thermal sensor drivers
+are supposed to implement the callback. If they don't, the thermal
+framework calculated the trend by comparing the previous and the current
+temperature values.
+
+5.2. get_thermal_instance
+-------------------------
+
+This function returns the thermal_instance corresponding to a given
+{thermal_zone, cooling_device, trip_point} combination. Returns NULL
+if such an instance does not exist.
+
+5.3. thermal_notify_framework
+-----------------------------
+
+This function handles the trip events from sensor drivers. It starts
+throttling the cooling devices according to the policy configured.
+For CRITICAL and HOT trip points, this notifies the respective drivers,
+and does actual throttling for other trip points i.e ACTIVE and PASSIVE.
+The throttling policy is based on the configured platform data; if no
+platform data is provided, this uses the step_wise throttling policy.
+
+5.4. thermal_cdev_update
+------------------------
+
+This function serves as an arbitrator to set the state of a cooling
+device. It sets the cooling device to the deepest cooling state if
+possible.
+
+6. thermal_emergency_poweroff
+=============================
+
+On an event of critical trip temperature crossing. Thermal framework
+allows the system to shutdown gracefully by calling orderly_poweroff().
+In the event of a failure of orderly_poweroff() to shut down the system
+we are in danger of keeping the system alive at undesirably high
+temperatures. To mitigate this high risk scenario we program a work
+queue to fire after a pre-determined number of seconds to start
+an emergency shutdown of the device using the kernel_power_off()
+function. In case kernel_power_off() fails then finally
+emergency_restart() is called in the worst case.
+
+The delay should be carefully profiled so as to give adequate time for
+orderly_poweroff(). In case of failure of an orderly_poweroff() the
+emergency poweroff kicks in after the delay has elapsed and shuts down
+the system.
+
+If set to 0 emergency poweroff will not be supported. So a carefully
+profiled non-zero positive value is a must for emergerncy poweroff to be
+triggered.
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
deleted file mode 100644
index c3fa500df92c..000000000000
--- a/Documentation/thermal/sysfs-api.txt
+++ /dev/null
@@ -1,636 +0,0 @@
-Generic Thermal Sysfs driver How To
-===================================
-
-Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com>
-
-Updated: 2 January 2008
-
-Copyright (c)  2008 Intel Corporation
-
-
-0. Introduction
-
-The generic thermal sysfs provides a set of interfaces for thermal zone
-devices (sensors) and thermal cooling devices (fan, processor...) to register
-with the thermal management solution and to be a part of it.
-
-This how-to focuses on enabling new thermal zone and cooling devices to
-participate in thermal management.
-This solution is platform independent and any type of thermal zone devices
-and cooling devices should be able to make use of the infrastructure.
-
-The main task of the thermal sysfs driver is to expose thermal zone attributes
-as well as cooling device attributes to the user space.
-An intelligent thermal management application can make decisions based on
-inputs from thermal zone attributes (the current temperature and trip point
-temperature) and throttle appropriate devices.
-
-[0-*]	denotes any positive number starting from 0
-[1-*]	denotes any positive number starting from 1
-
-1. thermal sysfs driver interface functions
-
-1.1 thermal zone device interface
-1.1.1 struct thermal_zone_device *thermal_zone_device_register(char *type,
-		int trips, int mask, void *devdata,
-		struct thermal_zone_device_ops *ops,
-		const struct thermal_zone_params *tzp,
-		int passive_delay, int polling_delay))
-
-    This interface function adds a new thermal zone device (sensor) to
-    /sys/class/thermal folder as thermal_zone[0-*]. It tries to bind all the
-    thermal cooling devices registered at the same time.
-
-    type: the thermal zone type.
-    trips: the total number of trip points this thermal zone supports.
-    mask: Bit string: If 'n'th bit is set, then trip point 'n' is writeable.
-    devdata: device private data
-    ops: thermal zone device call-backs.
-	.bind: bind the thermal zone device with a thermal cooling device.
-	.unbind: unbind the thermal zone device with a thermal cooling device.
-	.get_temp: get the current temperature of the thermal zone.
-	.set_trips: set the trip points window. Whenever the current temperature
-		    is updated, the trip points immediately below and above the
-		    current temperature are found.
-	.get_mode: get the current mode (enabled/disabled) of the thermal zone.
-	    - "enabled" means the kernel thermal management is enabled.
-	    - "disabled" will prevent kernel thermal driver action upon trip points
-	      so that user applications can take charge of thermal management.
-	.set_mode: set the mode (enabled/disabled) of the thermal zone.
-	.get_trip_type: get the type of certain trip point.
-	.get_trip_temp: get the temperature above which the certain trip point
-			will be fired.
-	.set_emul_temp: set the emulation temperature which helps in debugging
-			different threshold temperature points.
-    tzp: thermal zone platform parameters.
-    passive_delay: number of milliseconds to wait between polls when
-	performing passive cooling.
-    polling_delay: number of milliseconds to wait between polls when checking
-	whether trip points have been crossed (0 for interrupt driven systems).
-
-
-1.1.2 void thermal_zone_device_unregister(struct thermal_zone_device *tz)
-
-    This interface function removes the thermal zone device.
-    It deletes the corresponding entry from /sys/class/thermal folder and
-    unbinds all the thermal cooling devices it uses.
-
-1.1.3 struct thermal_zone_device *thermal_zone_of_sensor_register(
-		struct device *dev, int sensor_id, void *data,
-		const struct thermal_zone_of_device_ops *ops)
-
-	This interface adds a new sensor to a DT thermal zone.
-	This function will search the list of thermal zones described in
-	device tree and look for the zone that refer to the sensor device
-	pointed by dev->of_node as temperature providers. For the zone
-	pointing to the sensor node, the sensor will be added to the DT
-	thermal zone device.
-
-	The parameters for this interface are:
-	dev:		Device node of sensor containing valid node pointer in
-			dev->of_node.
-	sensor_id:	a sensor identifier, in case the sensor IP has more
-			than one sensors
-	data:		a private pointer (owned by the caller) that will be
-			passed back, when a temperature reading is needed.
-	ops:		struct thermal_zone_of_device_ops *.
-
-			get_temp:	a pointer to a function that reads the
-					sensor temperature. This is mandatory
-					callback provided by sensor driver.
-			set_trips:      a pointer to a function that sets a
-					temperature window. When this window is
-					left the driver must inform the thermal
-					core via thermal_zone_device_update.
-			get_trend: 	a pointer to a function that reads the
-					sensor temperature trend.
-			set_emul_temp:	a pointer to a function that sets
-					sensor emulated temperature.
-	The thermal zone temperature is provided by the get_temp() function
-	pointer of thermal_zone_of_device_ops. When called, it will
-	have the private pointer @data back.
-
-	It returns error pointer if fails otherwise valid thermal zone device
-	handle. Caller should check the return handle with IS_ERR() for finding
-	whether success or not.
-
-1.1.4 void thermal_zone_of_sensor_unregister(struct device *dev,
-		struct thermal_zone_device *tzd)
-
-	This interface unregisters a sensor from a DT thermal zone which was
-	successfully added by interface thermal_zone_of_sensor_register().
-	This function removes the sensor callbacks and private data from the
-	thermal zone device registered with thermal_zone_of_sensor_register()
-	interface. It will also silent the zone by remove the .get_temp() and
-	get_trend() thermal zone device callbacks.
-
-1.1.5 struct thermal_zone_device *devm_thermal_zone_of_sensor_register(
-		struct device *dev, int sensor_id,
-		void *data, const struct thermal_zone_of_device_ops *ops)
-
-	This interface is resource managed version of
-	thermal_zone_of_sensor_register().
-	All details of thermal_zone_of_sensor_register() described in
-	section 1.1.3 is applicable here.
-	The benefit of using this interface to register sensor is that it
-	is not require to explicitly call thermal_zone_of_sensor_unregister()
-	in error path or during driver unbinding as this is done by driver
-	resource manager.
-
-1.1.6 void devm_thermal_zone_of_sensor_unregister(struct device *dev,
-		struct thermal_zone_device *tzd)
-
-	This interface is resource managed version of
-	thermal_zone_of_sensor_unregister().
-	All details of thermal_zone_of_sensor_unregister() described in
-	section 1.1.4 is applicable here.
-	Normally this function will not need to be called and the resource
-	management code will ensure that the resource is freed.
-
-1.1.7 int thermal_zone_get_slope(struct thermal_zone_device *tz)
-
-	This interface is used to read the slope attribute value
-	for the thermal zone device, which might be useful for platform
-	drivers for temperature calculations.
-
-1.1.8 int thermal_zone_get_offset(struct thermal_zone_device *tz)
-
-	This interface is used to read the offset attribute value
-	for the thermal zone device, which might be useful for platform
-	drivers for temperature calculations.
-
-1.2 thermal cooling device interface
-1.2.1 struct thermal_cooling_device *thermal_cooling_device_register(char *name,
-		void *devdata, struct thermal_cooling_device_ops *)
-
-    This interface function adds a new thermal cooling device (fan/processor/...)
-    to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
-    to all the thermal zone devices registered at the same time.
-    name: the cooling device name.
-    devdata: device private data.
-    ops: thermal cooling devices call-backs.
-	.get_max_state: get the Maximum throttle state of the cooling device.
-	.get_cur_state: get the Currently requested throttle state of the cooling device.
-	.set_cur_state: set the Current throttle state of the cooling device.
-
-1.2.2 void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
-
-    This interface function removes the thermal cooling device.
-    It deletes the corresponding entry from /sys/class/thermal folder and
-    unbinds itself from all the thermal zone devices using it.
-
-1.3 interface for binding a thermal zone device with a thermal cooling device
-1.3.1 int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
-	int trip, struct thermal_cooling_device *cdev,
-	unsigned long upper, unsigned long lower, unsigned int weight);
-
-    This interface function binds a thermal cooling device to a particular trip
-    point of a thermal zone device.
-    This function is usually called in the thermal zone device .bind callback.
-    tz: the thermal zone device
-    cdev: thermal cooling device
-    trip: indicates which trip point in this thermal zone the cooling device
-          is associated with.
-    upper:the Maximum cooling state for this trip point.
-          THERMAL_NO_LIMIT means no upper limit,
-	  and the cooling device can be in max_state.
-    lower:the Minimum cooling state can be used for this trip point.
-          THERMAL_NO_LIMIT means no lower limit,
-	  and the cooling device can be in cooling state 0.
-    weight: the influence of this cooling device in this thermal
-            zone.  See 1.4.1 below for more information.
-
-1.3.2 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz,
-		int trip, struct thermal_cooling_device *cdev);
-
-    This interface function unbinds a thermal cooling device from a particular
-    trip point of a thermal zone device. This function is usually called in
-    the thermal zone device .unbind callback.
-    tz: the thermal zone device
-    cdev: thermal cooling device
-    trip: indicates which trip point in this thermal zone the cooling device
-          is associated with.
-
-1.4 Thermal Zone Parameters
-1.4.1 struct thermal_bind_params
-    This structure defines the following parameters that are used to bind
-    a zone with a cooling device for a particular trip point.
-    .cdev: The cooling device pointer
-    .weight: The 'influence' of a particular cooling device on this
-             zone. This is relative to the rest of the cooling
-             devices. For example, if all cooling devices have a
-             weight of 1, then they all contribute the same. You can
-             use percentages if you want, but it's not mandatory. A
-             weight of 0 means that this cooling device doesn't
-             contribute to the cooling of this zone unless all cooling
-             devices have a weight of 0. If all weights are 0, then
-             they all contribute the same.
-    .trip_mask:This is a bit mask that gives the binding relation between
-               this thermal zone and cdev, for a particular trip point.
-               If nth bit is set, then the cdev and thermal zone are bound
-               for trip point n.
-    .binding_limits: This is an array of cooling state limits. Must have
-                     exactly 2 * thermal_zone.number_of_trip_points. It is an
-                     array consisting of tuples <lower-state upper-state> of
-                     state limits. Each trip will be associated with one state
-                     limit tuple when binding. A NULL pointer means
-                     <THERMAL_NO_LIMITS THERMAL_NO_LIMITS> on all trips.
-                     These limits are used when binding a cdev to a trip point.
-    .match: This call back returns success(0) if the 'tz and cdev' need to
-	    be bound, as per platform data.
-1.4.2 struct thermal_zone_params
-    This structure defines the platform level parameters for a thermal zone.
-    This data, for each thermal zone should come from the platform layer.
-    This is an optional feature where some platforms can choose not to
-    provide this data.
-    .governor_name: Name of the thermal governor used for this zone
-    .no_hwmon: a boolean to indicate if the thermal to hwmon sysfs interface
-               is required. when no_hwmon == false, a hwmon sysfs interface
-               will be created. when no_hwmon == true, nothing will be done.
-               In case the thermal_zone_params is NULL, the hwmon interface
-               will be created (for backward compatibility).
-    .num_tbps: Number of thermal_bind_params entries for this zone
-    .tbp: thermal_bind_params entries
-
-2. sysfs attributes structure
-
-RO	read only value
-WO	write only value
-RW	read/write value
-
-Thermal sysfs attributes will be represented under /sys/class/thermal.
-Hwmon sysfs I/F extension is also available under /sys/class/hwmon
-if hwmon is compiled in or built as a module.
-
-Thermal zone device sys I/F, created once it's registered:
-/sys/class/thermal/thermal_zone[0-*]:
-    |---type:			Type of the thermal zone
-    |---temp:			Current temperature
-    |---mode:			Working mode of the thermal zone
-    |---policy:			Thermal governor used for this zone
-    |---available_policies:	Available thermal governors for this zone
-    |---trip_point_[0-*]_temp:	Trip point temperature
-    |---trip_point_[0-*]_type:	Trip point type
-    |---trip_point_[0-*]_hyst:	Hysteresis value for this trip point
-    |---emul_temp:		Emulated temperature set node
-    |---sustainable_power:      Sustainable dissipatable power
-    |---k_po:                   Proportional term during temperature overshoot
-    |---k_pu:                   Proportional term during temperature undershoot
-    |---k_i:                    PID's integral term in the power allocator gov
-    |---k_d:                    PID's derivative term in the power allocator
-    |---integral_cutoff:        Offset above which errors are accumulated
-    |---slope:                  Slope constant applied as linear extrapolation
-    |---offset:                 Offset constant applied as linear extrapolation
-
-Thermal cooling device sys I/F, created once it's registered:
-/sys/class/thermal/cooling_device[0-*]:
-    |---type:			Type of the cooling device(processor/fan/...)
-    |---max_state:		Maximum cooling state of the cooling device
-    |---cur_state:		Current cooling state of the cooling device
-    |---stats:			Directory containing cooling device's statistics
-    |---stats/reset:		Writing any value resets the statistics
-    |---stats/time_in_state_ms:	Time (msec) spent in various cooling states
-    |---stats/total_trans:	Total number of times cooling state is changed
-    |---stats/trans_table:	Cooing state transition table
-
-
-Then next two dynamic attributes are created/removed in pairs. They represent
-the relationship between a thermal zone and its associated cooling device.
-They are created/removed for each successful execution of
-thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device.
-
-/sys/class/thermal/thermal_zone[0-*]:
-    |---cdev[0-*]:		[0-*]th cooling device in current thermal zone
-    |---cdev[0-*]_trip_point:	Trip point that cdev[0-*] is associated with
-    |---cdev[0-*]_weight:       Influence of the cooling device in
-                                this thermal zone
-
-Besides the thermal zone device sysfs I/F and cooling device sysfs I/F,
-the generic thermal driver also creates a hwmon sysfs I/F for each _type_
-of thermal zone device. E.g. the generic thermal driver registers one hwmon
-class device and build the associated hwmon sysfs I/F for all the registered
-ACPI thermal zones.
-
-/sys/class/hwmon/hwmon[0-*]:
-    |---name:			The type of the thermal zone devices
-    |---temp[1-*]_input:	The current temperature of thermal zone [1-*]
-    |---temp[1-*]_critical:	The critical trip point of thermal zone [1-*]
-
-Please read Documentation/hwmon/sysfs-interface.rst for additional information.
-
-***************************
-* Thermal zone attributes *
-***************************
-
-type
-	Strings which represent the thermal zone type.
-	This is given by thermal zone driver as part of registration.
-	E.g: "acpitz" indicates it's an ACPI thermal device.
-	In order to keep it consistent with hwmon sys attribute; this should
-	be a short, lowercase string, not containing spaces nor dashes.
-	RO, Required
-
-temp
-	Current temperature as reported by thermal zone (sensor).
-	Unit: millidegree Celsius
-	RO, Required
-
-mode
-	One of the predefined values in [enabled, disabled].
-	This file gives information about the algorithm that is currently
-	managing the thermal zone. It can be either default kernel based
-	algorithm or user space application.
-	enabled		= enable Kernel Thermal management.
-	disabled	= Preventing kernel thermal zone driver actions upon
-			  trip points so that user application can take full
-			  charge of the thermal management.
-	RW, Optional
-
-policy
-	One of the various thermal governors used for a particular zone.
-	RW, Required
-
-available_policies
-	Available thermal governors which can be used for a particular zone.
-	RO, Required
-
-trip_point_[0-*]_temp
-	The temperature above which trip point will be fired.
-	Unit: millidegree Celsius
-	RO, Optional
-
-trip_point_[0-*]_type
-	Strings which indicate the type of the trip point.
-	E.g. it can be one of critical, hot, passive, active[0-*] for ACPI
-	thermal zone.
-	RO, Optional
-
-trip_point_[0-*]_hyst
-	The hysteresis value for a trip point, represented as an integer
-	Unit: Celsius
-	RW, Optional
-
-cdev[0-*]
-	Sysfs link to the thermal cooling device node where the sys I/F
-	for cooling device throttling control represents.
-	RO, Optional
-
-cdev[0-*]_trip_point
-	The trip point in this thermal zone which cdev[0-*] is associated
-	with; -1 means the cooling device is not associated with any trip
-	point.
-	RO, Optional
-
-cdev[0-*]_weight
-        The influence of cdev[0-*] in this thermal zone. This value
-        is relative to the rest of cooling devices in the thermal
-        zone. For example, if a cooling device has a weight double
-        than that of other, it's twice as effective in cooling the
-        thermal zone.
-        RW, Optional
-
-passive
-	Attribute is only present for zones in which the passive cooling
-	policy is not supported by native thermal driver. Default is zero
-	and can be set to a temperature (in millidegrees) to enable a
-	passive trip point for the zone. Activation is done by polling with
-	an interval of 1 second.
-	Unit: millidegrees Celsius
-	Valid values: 0 (disabled) or greater than 1000
-	RW, Optional
-
-emul_temp
-	Interface to set the emulated temperature method in thermal zone
-	(sensor). After setting this temperature, the thermal zone may pass
-	this temperature to platform emulation function if registered or
-	cache it locally. This is useful in debugging different temperature
-	threshold and its associated cooling action. This is write only node
-	and writing 0 on this node should disable emulation.
-	Unit: millidegree Celsius
-	WO, Optional
-
-	  WARNING: Be careful while enabling this option on production systems,
-	  because userland can easily disable the thermal policy by simply
-	  flooding this sysfs node with low temperature values.
-
-sustainable_power
-	An estimate of the sustained power that can be dissipated by
-	the thermal zone. Used by the power allocator governor. For
-	more information see Documentation/thermal/power_allocator.txt
-	Unit: milliwatts
-	RW, Optional
-
-k_po
-	The proportional term of the power allocator governor's PID
-	controller during temperature overshoot. Temperature overshoot
-	is when the current temperature is above the "desired
-	temperature" trip point. For more information see
-	Documentation/thermal/power_allocator.txt
-	RW, Optional
-
-k_pu
-	The proportional term of the power allocator governor's PID
-	controller during temperature undershoot. Temperature undershoot
-	is when the current temperature is below the "desired
-	temperature" trip point. For more information see
-	Documentation/thermal/power_allocator.txt
-	RW, Optional
-
-k_i
-	The integral term of the power allocator governor's PID
-	controller. This term allows the PID controller to compensate
-	for long term drift. For more information see
-	Documentation/thermal/power_allocator.txt
-	RW, Optional
-
-k_d
-	The derivative term of the power allocator governor's PID
-	controller. For more information see
-	Documentation/thermal/power_allocator.txt
-	RW, Optional
-
-integral_cutoff
-	Temperature offset from the desired temperature trip point
-	above which the integral term of the power allocator
-	governor's PID controller starts accumulating errors. For
-	example, if integral_cutoff is 0, then the integral term only
-	accumulates error when temperature is above the desired
-	temperature trip point. For more information see
-	Documentation/thermal/power_allocator.txt
-	Unit: millidegree Celsius
-	RW, Optional
-
-slope
-	The slope constant used in a linear extrapolation model
-	to determine a hotspot temperature based off the sensor's
-	raw readings. It is up to the device driver to determine
-	the usage of these values.
-	RW, Optional
-
-offset
-	The offset constant used in a linear extrapolation model
-	to determine a hotspot temperature based off the sensor's
-	raw readings. It is up to the device driver to determine
-	the usage of these values.
-	RW, Optional
-
-*****************************
-* Cooling device attributes *
-*****************************
-
-type
-	String which represents the type of device, e.g:
-	- for generic ACPI: should be "Fan", "Processor" or "LCD"
-	- for memory controller device on intel_menlow platform:
-	  should be "Memory controller".
-	RO, Required
-
-max_state
-	The maximum permissible cooling state of this cooling device.
-	RO, Required
-
-cur_state
-	The current cooling state of this cooling device.
-	The value can any integer numbers between 0 and max_state:
-	- cur_state == 0 means no cooling
-	- cur_state == max_state means the maximum cooling.
-	RW, Required
-
-stats/reset
-	Writing any value resets the cooling device's statistics.
-	WO, Required
-
-stats/time_in_state_ms:
-	The amount of time spent by the cooling device in various cooling
-	states. The output will have "<state> <time>" pair in each line, which
-	will mean this cooling device spent <time> msec of time at <state>.
-	Output will have one line for each of the supported states.  usertime
-	units here is 10mS (similar to other time exported in /proc).
-	RO, Required
-
-stats/total_trans:
-	A single positive value showing the total number of times the state of a
-	cooling device is changed.
-	RO, Required
-
-stats/trans_table:
-	This gives fine grained information about all the cooling state
-	transitions. The cat output here is a two dimensional matrix, where an
-	entry <i,j> (row i, column j) represents the number of transitions from
-	State_i to State_j. If the transition table is bigger than PAGE_SIZE,
-	reading this will return an -EFBIG error.
-	RO, Required
-
-3. A simple implementation
-
-ACPI thermal zone may support multiple trip points like critical, hot,
-passive, active. If an ACPI thermal zone supports critical, passive,
-active[0] and active[1] at the same time, it may register itself as a
-thermal_zone_device (thermal_zone1) with 4 trip points in all.
-It has one processor and one fan, which are both registered as
-thermal_cooling_device. Both are considered to have the same
-effectiveness in cooling the thermal zone.
-
-If the processor is listed in _PSL method, and the fan is listed in _AL0
-method, the sys I/F structure will be built like this:
-
-/sys/class/thermal:
-
-|thermal_zone1:
-    |---type:			acpitz
-    |---temp:			37000
-    |---mode:			enabled
-    |---policy:			step_wise
-    |---available_policies:	step_wise fair_share
-    |---trip_point_0_temp:	100000
-    |---trip_point_0_type:	critical
-    |---trip_point_1_temp:	80000
-    |---trip_point_1_type:	passive
-    |---trip_point_2_temp:	70000
-    |---trip_point_2_type:	active0
-    |---trip_point_3_temp:	60000
-    |---trip_point_3_type:	active1
-    |---cdev0:			--->/sys/class/thermal/cooling_device0
-    |---cdev0_trip_point:	1	/* cdev0 can be used for passive */
-    |---cdev0_weight:           1024
-    |---cdev1:			--->/sys/class/thermal/cooling_device3
-    |---cdev1_trip_point:	2	/* cdev1 can be used for active[0]*/
-    |---cdev1_weight:           1024
-
-|cooling_device0:
-    |---type:			Processor
-    |---max_state:		8
-    |---cur_state:		0
-
-|cooling_device3:
-    |---type:			Fan
-    |---max_state:		2
-    |---cur_state:		0
-
-/sys/class/hwmon:
-
-|hwmon0:
-    |---name:			acpitz
-    |---temp1_input:		37000
-    |---temp1_crit:		100000
-
-4. Event Notification
-
-The framework includes a simple notification mechanism, in the form of a
-netlink event. Netlink socket initialization is done during the _init_
-of the framework. Drivers which intend to use the notification mechanism
-just need to call thermal_generate_netlink_event() with two arguments viz
-(originator, event). The originator is a pointer to struct thermal_zone_device
-from where the event has been originated. An integer which represents the
-thermal zone device will be used in the message to identify the zone. The
-event will be one of:{THERMAL_AUX0, THERMAL_AUX1, THERMAL_CRITICAL,
-THERMAL_DEV_FAULT}. Notification can be sent when the current temperature
-crosses any of the configured thresholds.
-
-5. Export Symbol APIs:
-
-5.1: get_tz_trend:
-This function returns the trend of a thermal zone, i.e the rate of change
-of temperature of the thermal zone. Ideally, the thermal sensor drivers
-are supposed to implement the callback. If they don't, the thermal
-framework calculated the trend by comparing the previous and the current
-temperature values.
-
-5.2:get_thermal_instance:
-This function returns the thermal_instance corresponding to a given
-{thermal_zone, cooling_device, trip_point} combination. Returns NULL
-if such an instance does not exist.
-
-5.3:thermal_notify_framework:
-This function handles the trip events from sensor drivers. It starts
-throttling the cooling devices according to the policy configured.
-For CRITICAL and HOT trip points, this notifies the respective drivers,
-and does actual throttling for other trip points i.e ACTIVE and PASSIVE.
-The throttling policy is based on the configured platform data; if no
-platform data is provided, this uses the step_wise throttling policy.
-
-5.4:thermal_cdev_update:
-This function serves as an arbitrator to set the state of a cooling
-device. It sets the cooling device to the deepest cooling state if
-possible.
-
-6. thermal_emergency_poweroff:
-
-On an event of critical trip temperature crossing. Thermal framework
-allows the system to shutdown gracefully by calling orderly_poweroff().
-In the event of a failure of orderly_poweroff() to shut down the system
-we are in danger of keeping the system alive at undesirably high
-temperatures. To mitigate this high risk scenario we program a work
-queue to fire after a pre-determined number of seconds to start
-an emergency shutdown of the device using the kernel_power_off()
-function. In case kernel_power_off() fails then finally
-emergency_restart() is called in the worst case.
-
-The delay should be carefully profiled so as to give adequate time for
-orderly_poweroff(). In case of failure of an orderly_poweroff() the
-emergency poweroff kicks in after the delay has elapsed and shuts down
-the system.
-
-If set to 0 emergency poweroff will not be supported. So a carefully
-profiled non-zero positive value is a must for emergerncy poweroff to be
-triggered.
diff --git a/Documentation/thermal/x86_pkg_temperature_thermal b/Documentation/thermal/x86_pkg_temperature_thermal
deleted file mode 100644
index 17a3a4c0a0ca..000000000000
--- a/Documentation/thermal/x86_pkg_temperature_thermal
+++ /dev/null
@@ -1,47 +0,0 @@
-Kernel driver: x86_pkg_temp_thermal
-===================
-
-Supported chips:
-* x86: with package level thermal management
-(Verify using: CPUID.06H:EAX[bit 6] =1)
-
-Authors: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
-
-Reference
----
-Intel® 64 and IA-32 Architectures Software Developer’s Manual (Jan, 2013):
-Chapter 14.6: PACKAGE LEVEL THERMAL MANAGEMENT
-
-Description
----------
-
-This driver register CPU digital temperature package level sensor as a thermal
-zone with maximum two user mode configurable trip points. Number of trip points
-depends on the capability of the package. Once the trip point is violated,
-user mode can receive notification via thermal notification mechanism and can
-take any action to control temperature.
-
-
-Threshold management
---------------------
-Each package will register as a thermal zone under /sys/class/thermal.
-Example:
-/sys/class/thermal/thermal_zone1
-
-This contains two trip points:
-- trip_point_0_temp
-- trip_point_1_temp
-
-User can set any temperature between 0 to TJ-Max temperature. Temperature units
-are in milli-degree Celsius. Refer to "Documentation/thermal/sysfs-api.txt" for
-thermal sys-fs details.
-
-Any value other than 0 in these trip points, can trigger thermal notifications.
-Setting 0, stops sending thermal notifications.
-
-Thermal notifications: To get kobject-uevent notifications, set the thermal zone
-policy to "user_space". For example: echo -n "user_space" > policy
-
-
-
-
diff --git a/Documentation/thermal/x86_pkg_temperature_thermal.rst b/Documentation/thermal/x86_pkg_temperature_thermal.rst
new file mode 100644
index 000000000000..f134dbd3f5a9
--- /dev/null
+++ b/Documentation/thermal/x86_pkg_temperature_thermal.rst
@@ -0,0 +1,55 @@
+===================================
+Kernel driver: x86_pkg_temp_thermal
+===================================
+
+Supported chips:
+
+* x86: with package level thermal management
+
+(Verify using: CPUID.06H:EAX[bit 6] =1)
+
+Authors: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+Reference
+---------
+
+Intel® 64 and IA-32 Architectures Software Developer’s Manual (Jan, 2013):
+Chapter 14.6: PACKAGE LEVEL THERMAL MANAGEMENT
+
+Description
+-----------
+
+This driver register CPU digital temperature package level sensor as a thermal
+zone with maximum two user mode configurable trip points. Number of trip points
+depends on the capability of the package. Once the trip point is violated,
+user mode can receive notification via thermal notification mechanism and can
+take any action to control temperature.
+
+
+Threshold management
+--------------------
+Each package will register as a thermal zone under /sys/class/thermal.
+
+Example::
+
+	/sys/class/thermal/thermal_zone1
+
+This contains two trip points:
+
+- trip_point_0_temp
+- trip_point_1_temp
+
+User can set any temperature between 0 to TJ-Max temperature. Temperature units
+are in milli-degree Celsius. Refer to "Documentation/thermal/sysfs-api.rst" for
+thermal sys-fs details.
+
+Any value other than 0 in these trip points, can trigger thermal notifications.
+Setting 0, stops sending thermal notifications.
+
+Thermal notifications:
+To get kobject-uevent notifications, set the thermal zone
+policy to "user_space".
+
+For example::
+
+	echo -n "user_space" > policy
-- 
cgit 


From 696d7349f70ece84e0b485291a28781f0ce11245 Mon Sep 17 00:00:00 2001
From: Tony Xie <tony.xie@rock-chips.com>
Date: Fri, 21 Jun 2019 06:32:56 -0400
Subject: dt-bindings: mfd: rk808: Add binding information for RK809 and RK817.

Add device tree bindings documentation for Rockchip's RK809 & RK817 PMIC.

Signed-off-by: Tony Xie <tony.xie@rock-chips.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/devicetree/bindings/mfd/rk808.txt | 44 +++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mfd/rk808.txt b/Documentation/devicetree/bindings/mfd/rk808.txt
index 1683ec3245bc..04df07f6f793 100644
--- a/Documentation/devicetree/bindings/mfd/rk808.txt
+++ b/Documentation/devicetree/bindings/mfd/rk808.txt
@@ -3,11 +3,15 @@ RK8XX Power Management Integrated Circuit
 The rk8xx family current members:
 rk805
 rk808
+rk809
+rk817
 rk818
 
 Required properties:
 - compatible: "rockchip,rk805"
 - compatible: "rockchip,rk808"
+- compatible: "rockchip,rk809"
+- compatible: "rockchip,rk817"
 - compatible: "rockchip,rk818"
 - reg: I2C slave address
 - interrupts: the interrupt outputs of the controller.
@@ -45,6 +49,23 @@ Optional RK808 properties:
   the gpio controller. If DVS GPIOs aren't present, voltage changes will happen
   very quickly with no slow ramp time.
 
+Optional shared RK809 and RK817 properties:
+- vcc1-supply:  The input supply for DCDC_REG1
+- vcc2-supply:  The input supply for DCDC_REG2
+- vcc3-supply:  The input supply for DCDC_REG3
+- vcc4-supply:  The input supply for DCDC_REG4
+- vcc5-supply:  The input supply for LDO_REG1, LDO_REG2, LDO_REG3
+- vcc6-supply:  The input supply for LDO_REG4, LDO_REG5, LDO_REG6
+- vcc7-supply:  The input supply for LDO_REG7, LDO_REG8, LDO_REG9
+
+Optional RK809 properties:
+- vcc8-supply:  The input supply for SWITCH_REG1
+- vcc9-supply:  The input supply for DCDC_REG5, SWITCH_REG2
+
+Optional RK817 properties:
+- vcc8-supply:  The input supply for BOOST
+- vcc9-supply:  The input supply for OTG_SWITCH
+
 Optional RK818 properties:
 - vcc1-supply:  The input supply for DCDC_REG1
 - vcc2-supply:  The input supply for DCDC_REG2
@@ -86,6 +107,21 @@ number as described in RK808 datasheet.
 	- SWITCH_REGn
 		- valid values for n are 1 to 2
 
+Following regulators of the RK809 and RK817 PMIC blocks are supported. Note that
+the 'n' in regulator name, as in DCDC_REGn or LDOn, represents the DCDC or LDO
+number as described in RK809 and RK817 datasheets.
+
+	- DCDC_REGn
+		- valid values for n are 1 to 5 for RK809.
+		- valid values for n are 1 to 4 for RK817.
+	- LDO_REGn
+		- valid values for n are 1 to 9 for RK809.
+		- valid values for n are 1 to 9 for RK817.
+	- SWITCH_REGn
+		- valid values for n are 1 to 2 for RK809.
+	- BOOST for RK817
+	- OTG_SWITCH for RK817
+
 Following regulators of the RK818 PMIC block are supported. Note that
 the 'n' in regulator name, as in DCDC_REGn or LDOn, represents the DCDC or LDO
 number as described in RK818 datasheet.
@@ -98,6 +134,14 @@ number as described in RK818 datasheet.
 	- HDMI_SWITCH
 	- OTG_SWITCH
 
+It is necessary to configure three pins for both the RK809 and RK817, the three
+pins are "gpio_ts" "gpio_gt" "gpio_slp".
+	The gpio_gt and gpio_ts pins support the gpio function.
+	The gpio_slp pin is for controlling the pmic states, as below:
+		- reset
+		- power down
+		- sleep
+
 Standard regulator bindings are used inside regulator subnodes. Check
   Documentation/devicetree/bindings/regulator/regulator.txt
 for more details
-- 
cgit 


From 63d3cd297bc045536e4c3eaddc2cf6aa4a8cf0df Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen@kernel.org>
Date: Thu, 13 Jun 2019 06:31:37 -0500
Subject: dt-bindings: cadence-quadspi: add options reset property

The QSPI module can have an optional reset signals that will hold the
module in a reset state.

Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
---
 Documentation/devicetree/bindings/mtd/cadence-quadspi.txt | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mtd/cadence-quadspi.txt b/Documentation/devicetree/bindings/mtd/cadence-quadspi.txt
index 4345c3a6f530..945be7d5b236 100644
--- a/Documentation/devicetree/bindings/mtd/cadence-quadspi.txt
+++ b/Documentation/devicetree/bindings/mtd/cadence-quadspi.txt
@@ -35,6 +35,9 @@ custom properties:
 		  (qspi_n_ss_out).
 - cdns,tslch-ns : Delay in nanoseconds between setting qspi_n_ss_out low
                   and first bit transfer.
+- resets	: Must contain an entry for each entry in reset-names.
+		  See ../reset/reset.txt for details.
+- reset-names	: Must include either "qspi" and/or "qspi-ocp".
 
 Example:
 
@@ -50,6 +53,8 @@ Example:
 		cdns,fifo-depth = <128>;
 		cdns,fifo-width = <4>;
 		cdns,trigger-address = <0x00000000>;
+		resets = <&rst QSPI_RESET>, <&rst QSPI_OCP_RESET>;
+		reset-names = "qspi", "qspi-ocp";
 
 		flash0: n25q00@0 {
 			...
-- 
cgit 


From a9f0969cd7b3b3653a10ef4a5d62075aa4a5a27f Mon Sep 17 00:00:00 2001
From: Jiunn Chang <c0d1n61at3@gmail.com>
Date: Wed, 26 Jun 2019 15:07:01 -0500
Subject: Documentation: RCU: Convert RCU basic concepts to reST

RCU basic concepts reST markup.

Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Jiunn Chang <c0d1n61at3@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/RCU/rcu.txt | 119 ++++++++++++++++++++++++----------------------
 1 file changed, 61 insertions(+), 58 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
index c818cf65c5a9..8dfb437dacc3 100644
--- a/Documentation/RCU/rcu.txt
+++ b/Documentation/RCU/rcu.txt
@@ -1,5 +1,7 @@
-RCU Concepts
+.. _rcu_doc:
 
+RCU Concepts
+============
 
 The basic idea behind RCU (read-copy update) is to split destructive
 operations into two parts, one that prevents anyone from seeing the data
@@ -8,82 +10,83 @@ A "grace period" must elapse between the two parts, and this grace period
 must be long enough that any readers accessing the item being deleted have
 since dropped their references.  For example, an RCU-protected deletion
 from a linked list would first remove the item from the list, wait for
-a grace period to elapse, then free the element.  See the listRCU.txt
-file for more information on using RCU with linked lists.
-
+a grace period to elapse, then free the element.  See the
+Documentation/RCU/listRCU.rst file for more information on using RCU with
+linked lists.
 
 Frequently Asked Questions
+--------------------------
 
-o	Why would anyone want to use RCU?
+- Why would anyone want to use RCU?
 
-	The advantage of RCU's two-part approach is that RCU readers need
-	not acquire any locks, perform any atomic instructions, write to
-	shared memory, or (on CPUs other than Alpha) execute any memory
-	barriers.  The fact that these operations are quite expensive
-	on modern CPUs is what gives RCU its performance advantages
-	in read-mostly situations.  The fact that RCU readers need not
-	acquire locks can also greatly simplify deadlock-avoidance code.
+  The advantage of RCU's two-part approach is that RCU readers need
+  not acquire any locks, perform any atomic instructions, write to
+  shared memory, or (on CPUs other than Alpha) execute any memory
+  barriers.  The fact that these operations are quite expensive
+  on modern CPUs is what gives RCU its performance advantages
+  in read-mostly situations.  The fact that RCU readers need not
+  acquire locks can also greatly simplify deadlock-avoidance code.
 
-o	How can the updater tell when a grace period has completed
-	if the RCU readers give no indication when they are done?
+- How can the updater tell when a grace period has completed
+  if the RCU readers give no indication when they are done?
 
-	Just as with spinlocks, RCU readers are not permitted to
-	block, switch to user-mode execution, or enter the idle loop.
-	Therefore, as soon as a CPU is seen passing through any of these
-	three states, we know that that CPU has exited any previous RCU
-	read-side critical sections.  So, if we remove an item from a
-	linked list, and then wait until all CPUs have switched context,
-	executed in user mode, or executed in the idle loop, we can
-	safely free up that item.
+  Just as with spinlocks, RCU readers are not permitted to
+  block, switch to user-mode execution, or enter the idle loop.
+  Therefore, as soon as a CPU is seen passing through any of these
+  three states, we know that that CPU has exited any previous RCU
+  read-side critical sections.  So, if we remove an item from a
+  linked list, and then wait until all CPUs have switched context,
+  executed in user mode, or executed in the idle loop, we can
+  safely free up that item.
 
-	Preemptible variants of RCU (CONFIG_PREEMPT_RCU) get the
-	same effect, but require that the readers manipulate CPU-local
-	counters.  These counters allow limited types of blocking within
-	RCU read-side critical sections.  SRCU also uses CPU-local
-	counters, and permits general blocking within RCU read-side
-	critical sections.  These variants of RCU detect grace periods
-	by sampling these counters.
+  Preemptible variants of RCU (CONFIG_PREEMPT_RCU) get the
+  same effect, but require that the readers manipulate CPU-local
+  counters.  These counters allow limited types of blocking within
+  RCU read-side critical sections.  SRCU also uses CPU-local
+  counters, and permits general blocking within RCU read-side
+  critical sections.  These variants of RCU detect grace periods
+  by sampling these counters.
 
-o	If I am running on a uniprocessor kernel, which can only do one
-	thing at a time, why should I wait for a grace period?
+- If I am running on a uniprocessor kernel, which can only do one
+  thing at a time, why should I wait for a grace period?
 
-	See the UP.txt file in this directory.
+  See the Documentation/RCU/UP.rst file for more information.
 
-o	How can I see where RCU is currently used in the Linux kernel?
+- How can I see where RCU is currently used in the Linux kernel?
 
-	Search for "rcu_read_lock", "rcu_read_unlock", "call_rcu",
-	"rcu_read_lock_bh", "rcu_read_unlock_bh", "srcu_read_lock",
-	"srcu_read_unlock", "synchronize_rcu", "synchronize_net",
-	"synchronize_srcu", and the other RCU primitives.  Or grab one
-	of the cscope databases from:
+  Search for "rcu_read_lock", "rcu_read_unlock", "call_rcu",
+  "rcu_read_lock_bh", "rcu_read_unlock_bh", "srcu_read_lock",
+  "srcu_read_unlock", "synchronize_rcu", "synchronize_net",
+  "synchronize_srcu", and the other RCU primitives.  Or grab one
+  of the cscope databases from:
 
-	http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html
+  (http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html).
 
-o	What guidelines should I follow when writing code that uses RCU?
+- What guidelines should I follow when writing code that uses RCU?
 
-	See the checklist.txt file in this directory.
+  See the checklist.txt file in this directory.
 
-o	Why the name "RCU"?
+- Why the name "RCU"?
 
-	"RCU" stands for "read-copy update".  The file listRCU.txt has
-	more information on where this name came from, search for
-	"read-copy update" to find it.
+  "RCU" stands for "read-copy update".  The file Documentation/RCU/listRCU.rst
+  has more information on where this name came from, search for
+  "read-copy update" to find it.
 
-o	I hear that RCU is patented?  What is with that?
+- I hear that RCU is patented?  What is with that?
 
-	Yes, it is.  There are several known patents related to RCU,
-	search for the string "Patent" in RTFP.txt to find them.
-	Of these, one was allowed to lapse by the assignee, and the
-	others have been contributed to the Linux kernel under GPL.
-	There are now also LGPL implementations of user-level RCU
-	available (http://liburcu.org/).
+  Yes, it is.  There are several known patents related to RCU,
+  search for the string "Patent" in RTFP.txt to find them.
+  Of these, one was allowed to lapse by the assignee, and the
+  others have been contributed to the Linux kernel under GPL.
+  There are now also LGPL implementations of user-level RCU
+  available (http://liburcu.org/).
 
-o	I hear that RCU needs work in order to support realtime kernels?
+- I hear that RCU needs work in order to support realtime kernels?
 
-	Realtime-friendly RCU can be enabled via the CONFIG_PREEMPT_RCU
-	kernel configuration parameter.
+  Realtime-friendly RCU can be enabled via the CONFIG_PREEMPT_RCU
+  kernel configuration parameter.
 
-o	Where can I find more information on RCU?
+- Where can I find more information on RCU?
 
-	See the RTFP.txt file in this directory.
-	Or point your browser at http://www.rdrop.com/users/paulmck/RCU/.
+  See the RTFP.txt file in this directory.
+  Or point your browser at (http://www.rdrop.com/users/paulmck/RCU/).
-- 
cgit 


From 9422dc24df62ada59ead9e4a499f036b4ce670fc Mon Sep 17 00:00:00 2001
From: Jiunn Chang <c0d1n61at3@gmail.com>
Date: Wed, 26 Jun 2019 15:07:02 -0500
Subject: Documentation: RCU: Convert RCU linked list to reST

RCU linked list reST markup.

Signed-off-by: Jiunn Chang <c0d1n61at3@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/RCU/listRCU.txt | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/RCU/listRCU.txt b/Documentation/RCU/listRCU.txt
index adb5a3782846..7956ff33042b 100644
--- a/Documentation/RCU/listRCU.txt
+++ b/Documentation/RCU/listRCU.txt
@@ -1,5 +1,7 @@
-Using RCU to Protect Read-Mostly Linked Lists
+.. _list_rcu_doc:
 
+Using RCU to Protect Read-Mostly Linked Lists
+=============================================
 
 One of the best applications of RCU is to protect read-mostly linked lists
 ("struct list_head" in list.h).  One big advantage of this approach
@@ -7,8 +9,8 @@ is that all of the required memory barriers are included for you in
 the list macros.  This document describes several applications of RCU,
 with the best fits first.
 
-
 Example 1: Read-Side Action Taken Outside of Lock, No In-Place Updates
+----------------------------------------------------------------------
 
 The best applications are cases where, if reader-writer locking were
 used, the read-side lock would be dropped before taking any action
@@ -24,7 +26,7 @@ added or deleted, rather than being modified in place.
 
 A straightforward example of this use of RCU may be found in the
 system-call auditing support.  For example, a reader-writer locked
-implementation of audit_filter_task() might be as follows:
+implementation of audit_filter_task() might be as follows::
 
 	static enum audit_state audit_filter_task(struct task_struct *tsk)
 	{
@@ -48,7 +50,7 @@ the corresponding value is returned.  By the time that this value is acted
 on, the list may well have been modified.  This makes sense, since if
 you are turning auditing off, it is OK to audit a few extra system calls.
 
-This means that RCU can be easily applied to the read side, as follows:
+This means that RCU can be easily applied to the read side, as follows::
 
 	static enum audit_state audit_filter_task(struct task_struct *tsk)
 	{
@@ -73,7 +75,7 @@ become list_for_each_entry_rcu().  The _rcu() list-traversal primitives
 insert the read-side memory barriers that are required on DEC Alpha CPUs.
 
 The changes to the update side are also straightforward.  A reader-writer
-lock might be used as follows for deletion and insertion:
+lock might be used as follows for deletion and insertion::
 
 	static inline int audit_del_rule(struct audit_rule *rule,
 					 struct list_head *list)
@@ -106,7 +108,7 @@ lock might be used as follows for deletion and insertion:
 		return 0;
 	}
 
-Following are the RCU equivalents for these two functions:
+Following are the RCU equivalents for these two functions::
 
 	static inline int audit_del_rule(struct audit_rule *rule,
 					 struct list_head *list)
@@ -154,13 +156,13 @@ otherwise cause concurrent readers to fail spectacularly.
 So, when readers can tolerate stale data and when entries are either added
 or deleted, without in-place modification, it is very easy to use RCU!
 
-
 Example 2: Handling In-Place Updates
+------------------------------------
 
 The system-call auditing code does not update auditing rules in place.
 However, if it did, reader-writer-locked code to do so might look as
 follows (presumably, the field_count is only permitted to decrease,
-otherwise, the added fields would need to be filled in):
+otherwise, the added fields would need to be filled in)::
 
 	static inline int audit_upd_rule(struct audit_rule *rule,
 					 struct list_head *list,
@@ -187,7 +189,7 @@ otherwise, the added fields would need to be filled in):
 The RCU version creates a copy, updates the copy, then replaces the old
 entry with the newly updated entry.  This sequence of actions, allowing
 concurrent reads while doing a copy to perform an update, is what gives
-RCU ("read-copy update") its name.  The RCU code is as follows:
+RCU ("read-copy update") its name.  The RCU code is as follows::
 
 	static inline int audit_upd_rule(struct audit_rule *rule,
 					 struct list_head *list,
@@ -216,8 +218,8 @@ RCU ("read-copy update") its name.  The RCU code is as follows:
 Again, this assumes that the caller holds audit_netlink_sem.  Normally,
 the reader-writer lock would become a spinlock in this sort of code.
 
-
 Example 3: Eliminating Stale Data
+---------------------------------
 
 The auditing examples above tolerate stale data, as do most algorithms
 that are tracking external state.  Because there is a delay from the
@@ -231,13 +233,16 @@ per-entry spinlock, and, if the "deleted" flag is set, pretends that the
 entry does not exist.  For this to be helpful, the search function must
 return holding the per-entry spinlock, as ipc_lock() does in fact do.
 
-Quick Quiz:  Why does the search function need to return holding the
-	per-entry lock for this deleted-flag technique to be helpful?
+Quick Quiz:
+	Why does the search function need to return holding the per-entry lock for
+	this deleted-flag technique to be helpful?
+
+:ref:`Answer to Quick Quiz <answer_quick_quiz_list>`
 
 If the system-call audit module were to ever need to reject stale data,
 one way to accomplish this would be to add a "deleted" flag and a "lock"
 spinlock to the audit_entry structure, and modify audit_filter_task()
-as follows:
+as follows::
 
 	static enum audit_state audit_filter_task(struct task_struct *tsk)
 	{
@@ -268,7 +273,7 @@ audit_upd_rule() would need additional memory barriers to ensure
 that the list_add_rcu() was really executed before the list_del_rcu().
 
 The audit_del_rule() function would need to set the "deleted"
-flag under the spinlock as follows:
+flag under the spinlock as follows::
 
 	static inline int audit_del_rule(struct audit_rule *rule,
 					 struct list_head *list)
@@ -290,8 +295,8 @@ flag under the spinlock as follows:
 		return -EFAULT;		/* No matching rule */
 	}
 
-
 Summary
+-------
 
 Read-mostly list-based data structures that can tolerate stale data are
 the most amenable to use of RCU.  The simplest case is where entries are
@@ -302,8 +307,9 @@ If stale data cannot be tolerated, then a "deleted" flag may be used
 in conjunction with a per-entry spinlock in order to allow the search
 function to reject newly deleted data.
 
+.. _answer_quick_quiz_list:
 
-Answer to Quick Quiz
+Answer to Quick Quiz:
 	Why does the search function need to return holding the per-entry
 	lock for this deleted-flag technique to be helpful?
 
-- 
cgit 


From 2a5b0c841a9932e3562c9b0dfddeb54de255a595 Mon Sep 17 00:00:00 2001
From: Jiunn Chang <c0d1n61at3@gmail.com>
Date: Wed, 26 Jun 2019 15:07:03 -0500
Subject: Documentation: RCU: Convert RCU UP systems to reST

RCU UP systems reST markup.

Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Jiunn Chang <c0d1n61at3@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/RCU/UP.txt | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/RCU/UP.txt b/Documentation/RCU/UP.txt
index 53bde717017b..67715a47ae89 100644
--- a/Documentation/RCU/UP.txt
+++ b/Documentation/RCU/UP.txt
@@ -1,17 +1,19 @@
-RCU on Uniprocessor Systems
+.. _up_doc:
 
+RCU on Uniprocessor Systems
+===========================
 
 A common misconception is that, on UP systems, the call_rcu() primitive
 may immediately invoke its function.  The basis of this misconception
 is that since there is only one CPU, it should not be necessary to
 wait for anything else to get done, since there are no other CPUs for
-anything else to be happening on.  Although this approach will -sort- -of-
+anything else to be happening on.  Although this approach will *sort of*
 work a surprising amount of the time, it is a very bad idea in general.
 This document presents three examples that demonstrate exactly how bad
 an idea this is.
 
-
 Example 1: softirq Suicide
+--------------------------
 
 Suppose that an RCU-based algorithm scans a linked list containing
 elements A, B, and C in process context, and can delete elements from
@@ -28,8 +30,8 @@ your kernel.
 This same problem can occur if call_rcu() is invoked from a hardware
 interrupt handler.
 
-
 Example 2: Function-Call Fatality
+---------------------------------
 
 Of course, one could avert the suicide described in the preceding example
 by having call_rcu() directly invoke its arguments only if it was called
@@ -46,11 +48,13 @@ its arguments would cause it to fail to make the fundamental guarantee
 underlying RCU, namely that call_rcu() defers invoking its arguments until
 all RCU read-side critical sections currently executing have completed.
 
-Quick Quiz #1: why is it -not- legal to invoke synchronize_rcu() in
-	this case?
+Quick Quiz #1:
+	Why is it *not* legal to invoke synchronize_rcu() in this case?
 
+:ref:`Answers to Quick Quiz <answer_quick_quiz_up>`
 
 Example 3: Death by Deadlock
+----------------------------
 
 Suppose that call_rcu() is invoked while holding a lock, and that the
 callback function must acquire this same lock.  In this case, if
@@ -76,25 +80,30 @@ there are cases where this can be quite ugly:
 If call_rcu() directly invokes the callback, painful locking restrictions
 or API changes would be required.
 
-Quick Quiz #2: What locking restriction must RCU callbacks respect?
+Quick Quiz #2:
+	What locking restriction must RCU callbacks respect?
 
+:ref:`Answers to Quick Quiz <answer_quick_quiz_up>`
 
 Summary
+-------
 
 Permitting call_rcu() to immediately invoke its arguments breaks RCU,
 even on a UP system.  So do not do it!  Even on a UP system, the RCU
-infrastructure -must- respect grace periods, and -must- invoke callbacks
+infrastructure *must* respect grace periods, and *must* invoke callbacks
 from a known environment in which no locks are held.
 
-Note that it -is- safe for synchronize_rcu() to return immediately on
-UP systems, including !PREEMPT SMP builds running on UP systems.
+Note that it *is* safe for synchronize_rcu() to return immediately on
+UP systems, including PREEMPT SMP builds running on UP systems.
 
-Quick Quiz #3: Why can't synchronize_rcu() return immediately on
-	UP systems running preemptable RCU?
+Quick Quiz #3:
+	Why can't synchronize_rcu() return immediately on UP systems running
+	preemptable RCU?
 
+.. _answer_quick_quiz_up:
 
 Answer to Quick Quiz #1:
-	Why is it -not- legal to invoke synchronize_rcu() in this case?
+	Why is it *not* legal to invoke synchronize_rcu() in this case?
 
 	Because the calling function is scanning an RCU-protected linked
 	list, and is therefore within an RCU read-side critical section.
@@ -119,7 +128,7 @@ Answer to Quick Quiz #2:
 
 	This restriction might seem gratuitous, since very few RCU
 	callbacks acquire locks directly.  However, a great many RCU
-	callbacks do acquire locks -indirectly-, for example, via
+	callbacks do acquire locks *indirectly*, for example, via
 	the kfree() primitive.
 
 Answer to Quick Quiz #3:
-- 
cgit 


From f93a3e4e8705eb3ea17dcd68819b60875c834bad Mon Sep 17 00:00:00 2001
From: Jiunn Chang <c0d1n61at3@gmail.com>
Date: Wed, 26 Jun 2019 15:07:04 -0500
Subject: Documentation: RCU: Rename txt files to rst

Rename the following files to reST:
  - rcu.txt
  - listRCU.txt
  - UP.txt

Signed-off-by: Jiunn Chang <c0d1n61at3@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/RCU/UP.rst      | 142 +++++++++++++++++++
 Documentation/RCU/UP.txt      | 142 -------------------
 Documentation/RCU/listRCU.rst | 321 ++++++++++++++++++++++++++++++++++++++++++
 Documentation/RCU/listRCU.txt | 321 ------------------------------------------
 Documentation/RCU/rcu.rst     |  92 ++++++++++++
 Documentation/RCU/rcu.txt     |  92 ------------
 6 files changed, 555 insertions(+), 555 deletions(-)
 create mode 100644 Documentation/RCU/UP.rst
 delete mode 100644 Documentation/RCU/UP.txt
 create mode 100644 Documentation/RCU/listRCU.rst
 delete mode 100644 Documentation/RCU/listRCU.txt
 create mode 100644 Documentation/RCU/rcu.rst
 delete mode 100644 Documentation/RCU/rcu.txt

(limited to 'Documentation')

diff --git a/Documentation/RCU/UP.rst b/Documentation/RCU/UP.rst
new file mode 100644
index 000000000000..67715a47ae89
--- /dev/null
+++ b/Documentation/RCU/UP.rst
@@ -0,0 +1,142 @@
+.. _up_doc:
+
+RCU on Uniprocessor Systems
+===========================
+
+A common misconception is that, on UP systems, the call_rcu() primitive
+may immediately invoke its function.  The basis of this misconception
+is that since there is only one CPU, it should not be necessary to
+wait for anything else to get done, since there are no other CPUs for
+anything else to be happening on.  Although this approach will *sort of*
+work a surprising amount of the time, it is a very bad idea in general.
+This document presents three examples that demonstrate exactly how bad
+an idea this is.
+
+Example 1: softirq Suicide
+--------------------------
+
+Suppose that an RCU-based algorithm scans a linked list containing
+elements A, B, and C in process context, and can delete elements from
+this same list in softirq context.  Suppose that the process-context scan
+is referencing element B when it is interrupted by softirq processing,
+which deletes element B, and then invokes call_rcu() to free element B
+after a grace period.
+
+Now, if call_rcu() were to directly invoke its arguments, then upon return
+from softirq, the list scan would find itself referencing a newly freed
+element B.  This situation can greatly decrease the life expectancy of
+your kernel.
+
+This same problem can occur if call_rcu() is invoked from a hardware
+interrupt handler.
+
+Example 2: Function-Call Fatality
+---------------------------------
+
+Of course, one could avert the suicide described in the preceding example
+by having call_rcu() directly invoke its arguments only if it was called
+from process context.  However, this can fail in a similar manner.
+
+Suppose that an RCU-based algorithm again scans a linked list containing
+elements A, B, and C in process contexts, but that it invokes a function
+on each element as it is scanned.  Suppose further that this function
+deletes element B from the list, then passes it to call_rcu() for deferred
+freeing.  This may be a bit unconventional, but it is perfectly legal
+RCU usage, since call_rcu() must wait for a grace period to elapse.
+Therefore, in this case, allowing call_rcu() to immediately invoke
+its arguments would cause it to fail to make the fundamental guarantee
+underlying RCU, namely that call_rcu() defers invoking its arguments until
+all RCU read-side critical sections currently executing have completed.
+
+Quick Quiz #1:
+	Why is it *not* legal to invoke synchronize_rcu() in this case?
+
+:ref:`Answers to Quick Quiz <answer_quick_quiz_up>`
+
+Example 3: Death by Deadlock
+----------------------------
+
+Suppose that call_rcu() is invoked while holding a lock, and that the
+callback function must acquire this same lock.  In this case, if
+call_rcu() were to directly invoke the callback, the result would
+be self-deadlock.
+
+In some cases, it would possible to restructure to code so that
+the call_rcu() is delayed until after the lock is released.  However,
+there are cases where this can be quite ugly:
+
+1.	If a number of items need to be passed to call_rcu() within
+	the same critical section, then the code would need to create
+	a list of them, then traverse the list once the lock was
+	released.
+
+2.	In some cases, the lock will be held across some kernel API,
+	so that delaying the call_rcu() until the lock is released
+	requires that the data item be passed up via a common API.
+	It is far better to guarantee that callbacks are invoked
+	with no locks held than to have to modify such APIs to allow
+	arbitrary data items to be passed back up through them.
+
+If call_rcu() directly invokes the callback, painful locking restrictions
+or API changes would be required.
+
+Quick Quiz #2:
+	What locking restriction must RCU callbacks respect?
+
+:ref:`Answers to Quick Quiz <answer_quick_quiz_up>`
+
+Summary
+-------
+
+Permitting call_rcu() to immediately invoke its arguments breaks RCU,
+even on a UP system.  So do not do it!  Even on a UP system, the RCU
+infrastructure *must* respect grace periods, and *must* invoke callbacks
+from a known environment in which no locks are held.
+
+Note that it *is* safe for synchronize_rcu() to return immediately on
+UP systems, including PREEMPT SMP builds running on UP systems.
+
+Quick Quiz #3:
+	Why can't synchronize_rcu() return immediately on UP systems running
+	preemptable RCU?
+
+.. _answer_quick_quiz_up:
+
+Answer to Quick Quiz #1:
+	Why is it *not* legal to invoke synchronize_rcu() in this case?
+
+	Because the calling function is scanning an RCU-protected linked
+	list, and is therefore within an RCU read-side critical section.
+	Therefore, the called function has been invoked within an RCU
+	read-side critical section, and is not permitted to block.
+
+Answer to Quick Quiz #2:
+	What locking restriction must RCU callbacks respect?
+
+	Any lock that is acquired within an RCU callback must be
+	acquired elsewhere using an _irq variant of the spinlock
+	primitive.  For example, if "mylock" is acquired by an
+	RCU callback, then a process-context acquisition of this
+	lock must use something like spin_lock_irqsave() to
+	acquire the lock.
+
+	If the process-context code were to simply use spin_lock(),
+	then, since RCU callbacks can be invoked from softirq context,
+	the callback might be called from a softirq that interrupted
+	the process-context critical section.  This would result in
+	self-deadlock.
+
+	This restriction might seem gratuitous, since very few RCU
+	callbacks acquire locks directly.  However, a great many RCU
+	callbacks do acquire locks *indirectly*, for example, via
+	the kfree() primitive.
+
+Answer to Quick Quiz #3:
+	Why can't synchronize_rcu() return immediately on UP systems
+	running preemptable RCU?
+
+	Because some other task might have been preempted in the middle
+	of an RCU read-side critical section.  If synchronize_rcu()
+	simply immediately returned, it would prematurely signal the
+	end of the grace period, which would come as a nasty shock to
+	that other thread when it started running again.
diff --git a/Documentation/RCU/UP.txt b/Documentation/RCU/UP.txt
deleted file mode 100644
index 67715a47ae89..000000000000
--- a/Documentation/RCU/UP.txt
+++ /dev/null
@@ -1,142 +0,0 @@
-.. _up_doc:
-
-RCU on Uniprocessor Systems
-===========================
-
-A common misconception is that, on UP systems, the call_rcu() primitive
-may immediately invoke its function.  The basis of this misconception
-is that since there is only one CPU, it should not be necessary to
-wait for anything else to get done, since there are no other CPUs for
-anything else to be happening on.  Although this approach will *sort of*
-work a surprising amount of the time, it is a very bad idea in general.
-This document presents three examples that demonstrate exactly how bad
-an idea this is.
-
-Example 1: softirq Suicide
---------------------------
-
-Suppose that an RCU-based algorithm scans a linked list containing
-elements A, B, and C in process context, and can delete elements from
-this same list in softirq context.  Suppose that the process-context scan
-is referencing element B when it is interrupted by softirq processing,
-which deletes element B, and then invokes call_rcu() to free element B
-after a grace period.
-
-Now, if call_rcu() were to directly invoke its arguments, then upon return
-from softirq, the list scan would find itself referencing a newly freed
-element B.  This situation can greatly decrease the life expectancy of
-your kernel.
-
-This same problem can occur if call_rcu() is invoked from a hardware
-interrupt handler.
-
-Example 2: Function-Call Fatality
----------------------------------
-
-Of course, one could avert the suicide described in the preceding example
-by having call_rcu() directly invoke its arguments only if it was called
-from process context.  However, this can fail in a similar manner.
-
-Suppose that an RCU-based algorithm again scans a linked list containing
-elements A, B, and C in process contexts, but that it invokes a function
-on each element as it is scanned.  Suppose further that this function
-deletes element B from the list, then passes it to call_rcu() for deferred
-freeing.  This may be a bit unconventional, but it is perfectly legal
-RCU usage, since call_rcu() must wait for a grace period to elapse.
-Therefore, in this case, allowing call_rcu() to immediately invoke
-its arguments would cause it to fail to make the fundamental guarantee
-underlying RCU, namely that call_rcu() defers invoking its arguments until
-all RCU read-side critical sections currently executing have completed.
-
-Quick Quiz #1:
-	Why is it *not* legal to invoke synchronize_rcu() in this case?
-
-:ref:`Answers to Quick Quiz <answer_quick_quiz_up>`
-
-Example 3: Death by Deadlock
-----------------------------
-
-Suppose that call_rcu() is invoked while holding a lock, and that the
-callback function must acquire this same lock.  In this case, if
-call_rcu() were to directly invoke the callback, the result would
-be self-deadlock.
-
-In some cases, it would possible to restructure to code so that
-the call_rcu() is delayed until after the lock is released.  However,
-there are cases where this can be quite ugly:
-
-1.	If a number of items need to be passed to call_rcu() within
-	the same critical section, then the code would need to create
-	a list of them, then traverse the list once the lock was
-	released.
-
-2.	In some cases, the lock will be held across some kernel API,
-	so that delaying the call_rcu() until the lock is released
-	requires that the data item be passed up via a common API.
-	It is far better to guarantee that callbacks are invoked
-	with no locks held than to have to modify such APIs to allow
-	arbitrary data items to be passed back up through them.
-
-If call_rcu() directly invokes the callback, painful locking restrictions
-or API changes would be required.
-
-Quick Quiz #2:
-	What locking restriction must RCU callbacks respect?
-
-:ref:`Answers to Quick Quiz <answer_quick_quiz_up>`
-
-Summary
--------
-
-Permitting call_rcu() to immediately invoke its arguments breaks RCU,
-even on a UP system.  So do not do it!  Even on a UP system, the RCU
-infrastructure *must* respect grace periods, and *must* invoke callbacks
-from a known environment in which no locks are held.
-
-Note that it *is* safe for synchronize_rcu() to return immediately on
-UP systems, including PREEMPT SMP builds running on UP systems.
-
-Quick Quiz #3:
-	Why can't synchronize_rcu() return immediately on UP systems running
-	preemptable RCU?
-
-.. _answer_quick_quiz_up:
-
-Answer to Quick Quiz #1:
-	Why is it *not* legal to invoke synchronize_rcu() in this case?
-
-	Because the calling function is scanning an RCU-protected linked
-	list, and is therefore within an RCU read-side critical section.
-	Therefore, the called function has been invoked within an RCU
-	read-side critical section, and is not permitted to block.
-
-Answer to Quick Quiz #2:
-	What locking restriction must RCU callbacks respect?
-
-	Any lock that is acquired within an RCU callback must be
-	acquired elsewhere using an _irq variant of the spinlock
-	primitive.  For example, if "mylock" is acquired by an
-	RCU callback, then a process-context acquisition of this
-	lock must use something like spin_lock_irqsave() to
-	acquire the lock.
-
-	If the process-context code were to simply use spin_lock(),
-	then, since RCU callbacks can be invoked from softirq context,
-	the callback might be called from a softirq that interrupted
-	the process-context critical section.  This would result in
-	self-deadlock.
-
-	This restriction might seem gratuitous, since very few RCU
-	callbacks acquire locks directly.  However, a great many RCU
-	callbacks do acquire locks *indirectly*, for example, via
-	the kfree() primitive.
-
-Answer to Quick Quiz #3:
-	Why can't synchronize_rcu() return immediately on UP systems
-	running preemptable RCU?
-
-	Because some other task might have been preempted in the middle
-	of an RCU read-side critical section.  If synchronize_rcu()
-	simply immediately returned, it would prematurely signal the
-	end of the grace period, which would come as a nasty shock to
-	that other thread when it started running again.
diff --git a/Documentation/RCU/listRCU.rst b/Documentation/RCU/listRCU.rst
new file mode 100644
index 000000000000..7956ff33042b
--- /dev/null
+++ b/Documentation/RCU/listRCU.rst
@@ -0,0 +1,321 @@
+.. _list_rcu_doc:
+
+Using RCU to Protect Read-Mostly Linked Lists
+=============================================
+
+One of the best applications of RCU is to protect read-mostly linked lists
+("struct list_head" in list.h).  One big advantage of this approach
+is that all of the required memory barriers are included for you in
+the list macros.  This document describes several applications of RCU,
+with the best fits first.
+
+Example 1: Read-Side Action Taken Outside of Lock, No In-Place Updates
+----------------------------------------------------------------------
+
+The best applications are cases where, if reader-writer locking were
+used, the read-side lock would be dropped before taking any action
+based on the results of the search.  The most celebrated example is
+the routing table.  Because the routing table is tracking the state of
+equipment outside of the computer, it will at times contain stale data.
+Therefore, once the route has been computed, there is no need to hold
+the routing table static during transmission of the packet.  After all,
+you can hold the routing table static all you want, but that won't keep
+the external Internet from changing, and it is the state of the external
+Internet that really matters.  In addition, routing entries are typically
+added or deleted, rather than being modified in place.
+
+A straightforward example of this use of RCU may be found in the
+system-call auditing support.  For example, a reader-writer locked
+implementation of audit_filter_task() might be as follows::
+
+	static enum audit_state audit_filter_task(struct task_struct *tsk)
+	{
+		struct audit_entry *e;
+		enum audit_state   state;
+
+		read_lock(&auditsc_lock);
+		/* Note: audit_netlink_sem held by caller. */
+		list_for_each_entry(e, &audit_tsklist, list) {
+			if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+				read_unlock(&auditsc_lock);
+				return state;
+			}
+		}
+		read_unlock(&auditsc_lock);
+		return AUDIT_BUILD_CONTEXT;
+	}
+
+Here the list is searched under the lock, but the lock is dropped before
+the corresponding value is returned.  By the time that this value is acted
+on, the list may well have been modified.  This makes sense, since if
+you are turning auditing off, it is OK to audit a few extra system calls.
+
+This means that RCU can be easily applied to the read side, as follows::
+
+	static enum audit_state audit_filter_task(struct task_struct *tsk)
+	{
+		struct audit_entry *e;
+		enum audit_state   state;
+
+		rcu_read_lock();
+		/* Note: audit_netlink_sem held by caller. */
+		list_for_each_entry_rcu(e, &audit_tsklist, list) {
+			if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+				rcu_read_unlock();
+				return state;
+			}
+		}
+		rcu_read_unlock();
+		return AUDIT_BUILD_CONTEXT;
+	}
+
+The read_lock() and read_unlock() calls have become rcu_read_lock()
+and rcu_read_unlock(), respectively, and the list_for_each_entry() has
+become list_for_each_entry_rcu().  The _rcu() list-traversal primitives
+insert the read-side memory barriers that are required on DEC Alpha CPUs.
+
+The changes to the update side are also straightforward.  A reader-writer
+lock might be used as follows for deletion and insertion::
+
+	static inline int audit_del_rule(struct audit_rule *rule,
+					 struct list_head *list)
+	{
+		struct audit_entry  *e;
+
+		write_lock(&auditsc_lock);
+		list_for_each_entry(e, list, list) {
+			if (!audit_compare_rule(rule, &e->rule)) {
+				list_del(&e->list);
+				write_unlock(&auditsc_lock);
+				return 0;
+			}
+		}
+		write_unlock(&auditsc_lock);
+		return -EFAULT;		/* No matching rule */
+	}
+
+	static inline int audit_add_rule(struct audit_entry *entry,
+					 struct list_head *list)
+	{
+		write_lock(&auditsc_lock);
+		if (entry->rule.flags & AUDIT_PREPEND) {
+			entry->rule.flags &= ~AUDIT_PREPEND;
+			list_add(&entry->list, list);
+		} else {
+			list_add_tail(&entry->list, list);
+		}
+		write_unlock(&auditsc_lock);
+		return 0;
+	}
+
+Following are the RCU equivalents for these two functions::
+
+	static inline int audit_del_rule(struct audit_rule *rule,
+					 struct list_head *list)
+	{
+		struct audit_entry  *e;
+
+		/* Do not use the _rcu iterator here, since this is the only
+		 * deletion routine. */
+		list_for_each_entry(e, list, list) {
+			if (!audit_compare_rule(rule, &e->rule)) {
+				list_del_rcu(&e->list);
+				call_rcu(&e->rcu, audit_free_rule);
+				return 0;
+			}
+		}
+		return -EFAULT;		/* No matching rule */
+	}
+
+	static inline int audit_add_rule(struct audit_entry *entry,
+					 struct list_head *list)
+	{
+		if (entry->rule.flags & AUDIT_PREPEND) {
+			entry->rule.flags &= ~AUDIT_PREPEND;
+			list_add_rcu(&entry->list, list);
+		} else {
+			list_add_tail_rcu(&entry->list, list);
+		}
+		return 0;
+	}
+
+Normally, the write_lock() and write_unlock() would be replaced by
+a spin_lock() and a spin_unlock(), but in this case, all callers hold
+audit_netlink_sem, so no additional locking is required.  The auditsc_lock
+can therefore be eliminated, since use of RCU eliminates the need for
+writers to exclude readers.  Normally, the write_lock() calls would
+be converted into spin_lock() calls.
+
+The list_del(), list_add(), and list_add_tail() primitives have been
+replaced by list_del_rcu(), list_add_rcu(), and list_add_tail_rcu().
+The _rcu() list-manipulation primitives add memory barriers that are
+needed on weakly ordered CPUs (most of them!).  The list_del_rcu()
+primitive omits the pointer poisoning debug-assist code that would
+otherwise cause concurrent readers to fail spectacularly.
+
+So, when readers can tolerate stale data and when entries are either added
+or deleted, without in-place modification, it is very easy to use RCU!
+
+Example 2: Handling In-Place Updates
+------------------------------------
+
+The system-call auditing code does not update auditing rules in place.
+However, if it did, reader-writer-locked code to do so might look as
+follows (presumably, the field_count is only permitted to decrease,
+otherwise, the added fields would need to be filled in)::
+
+	static inline int audit_upd_rule(struct audit_rule *rule,
+					 struct list_head *list,
+					 __u32 newaction,
+					 __u32 newfield_count)
+	{
+		struct audit_entry  *e;
+		struct audit_newentry *ne;
+
+		write_lock(&auditsc_lock);
+		/* Note: audit_netlink_sem held by caller. */
+		list_for_each_entry(e, list, list) {
+			if (!audit_compare_rule(rule, &e->rule)) {
+				e->rule.action = newaction;
+				e->rule.file_count = newfield_count;
+				write_unlock(&auditsc_lock);
+				return 0;
+			}
+		}
+		write_unlock(&auditsc_lock);
+		return -EFAULT;		/* No matching rule */
+	}
+
+The RCU version creates a copy, updates the copy, then replaces the old
+entry with the newly updated entry.  This sequence of actions, allowing
+concurrent reads while doing a copy to perform an update, is what gives
+RCU ("read-copy update") its name.  The RCU code is as follows::
+
+	static inline int audit_upd_rule(struct audit_rule *rule,
+					 struct list_head *list,
+					 __u32 newaction,
+					 __u32 newfield_count)
+	{
+		struct audit_entry  *e;
+		struct audit_newentry *ne;
+
+		list_for_each_entry(e, list, list) {
+			if (!audit_compare_rule(rule, &e->rule)) {
+				ne = kmalloc(sizeof(*entry), GFP_ATOMIC);
+				if (ne == NULL)
+					return -ENOMEM;
+				audit_copy_rule(&ne->rule, &e->rule);
+				ne->rule.action = newaction;
+				ne->rule.file_count = newfield_count;
+				list_replace_rcu(&e->list, &ne->list);
+				call_rcu(&e->rcu, audit_free_rule);
+				return 0;
+			}
+		}
+		return -EFAULT;		/* No matching rule */
+	}
+
+Again, this assumes that the caller holds audit_netlink_sem.  Normally,
+the reader-writer lock would become a spinlock in this sort of code.
+
+Example 3: Eliminating Stale Data
+---------------------------------
+
+The auditing examples above tolerate stale data, as do most algorithms
+that are tracking external state.  Because there is a delay from the
+time the external state changes before Linux becomes aware of the change,
+additional RCU-induced staleness is normally not a problem.
+
+However, there are many examples where stale data cannot be tolerated.
+One example in the Linux kernel is the System V IPC (see the ipc_lock()
+function in ipc/util.c).  This code checks a "deleted" flag under a
+per-entry spinlock, and, if the "deleted" flag is set, pretends that the
+entry does not exist.  For this to be helpful, the search function must
+return holding the per-entry spinlock, as ipc_lock() does in fact do.
+
+Quick Quiz:
+	Why does the search function need to return holding the per-entry lock for
+	this deleted-flag technique to be helpful?
+
+:ref:`Answer to Quick Quiz <answer_quick_quiz_list>`
+
+If the system-call audit module were to ever need to reject stale data,
+one way to accomplish this would be to add a "deleted" flag and a "lock"
+spinlock to the audit_entry structure, and modify audit_filter_task()
+as follows::
+
+	static enum audit_state audit_filter_task(struct task_struct *tsk)
+	{
+		struct audit_entry *e;
+		enum audit_state   state;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(e, &audit_tsklist, list) {
+			if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+				spin_lock(&e->lock);
+				if (e->deleted) {
+					spin_unlock(&e->lock);
+					rcu_read_unlock();
+					return AUDIT_BUILD_CONTEXT;
+				}
+				rcu_read_unlock();
+				return state;
+			}
+		}
+		rcu_read_unlock();
+		return AUDIT_BUILD_CONTEXT;
+	}
+
+Note that this example assumes that entries are only added and deleted.
+Additional mechanism is required to deal correctly with the
+update-in-place performed by audit_upd_rule().  For one thing,
+audit_upd_rule() would need additional memory barriers to ensure
+that the list_add_rcu() was really executed before the list_del_rcu().
+
+The audit_del_rule() function would need to set the "deleted"
+flag under the spinlock as follows::
+
+	static inline int audit_del_rule(struct audit_rule *rule,
+					 struct list_head *list)
+	{
+		struct audit_entry  *e;
+
+		/* Do not need to use the _rcu iterator here, since this
+		 * is the only deletion routine. */
+		list_for_each_entry(e, list, list) {
+			if (!audit_compare_rule(rule, &e->rule)) {
+				spin_lock(&e->lock);
+				list_del_rcu(&e->list);
+				e->deleted = 1;
+				spin_unlock(&e->lock);
+				call_rcu(&e->rcu, audit_free_rule);
+				return 0;
+			}
+		}
+		return -EFAULT;		/* No matching rule */
+	}
+
+Summary
+-------
+
+Read-mostly list-based data structures that can tolerate stale data are
+the most amenable to use of RCU.  The simplest case is where entries are
+either added or deleted from the data structure (or atomically modified
+in place), but non-atomic in-place modifications can be handled by making
+a copy, updating the copy, then replacing the original with the copy.
+If stale data cannot be tolerated, then a "deleted" flag may be used
+in conjunction with a per-entry spinlock in order to allow the search
+function to reject newly deleted data.
+
+.. _answer_quick_quiz_list:
+
+Answer to Quick Quiz:
+	Why does the search function need to return holding the per-entry
+	lock for this deleted-flag technique to be helpful?
+
+	If the search function drops the per-entry lock before returning,
+	then the caller will be processing stale data in any case.  If it
+	is really OK to be processing stale data, then you don't need a
+	"deleted" flag.  If processing stale data really is a problem,
+	then you need to hold the per-entry lock across all of the code
+	that uses the value that was returned.
diff --git a/Documentation/RCU/listRCU.txt b/Documentation/RCU/listRCU.txt
deleted file mode 100644
index 7956ff33042b..000000000000
--- a/Documentation/RCU/listRCU.txt
+++ /dev/null
@@ -1,321 +0,0 @@
-.. _list_rcu_doc:
-
-Using RCU to Protect Read-Mostly Linked Lists
-=============================================
-
-One of the best applications of RCU is to protect read-mostly linked lists
-("struct list_head" in list.h).  One big advantage of this approach
-is that all of the required memory barriers are included for you in
-the list macros.  This document describes several applications of RCU,
-with the best fits first.
-
-Example 1: Read-Side Action Taken Outside of Lock, No In-Place Updates
-----------------------------------------------------------------------
-
-The best applications are cases where, if reader-writer locking were
-used, the read-side lock would be dropped before taking any action
-based on the results of the search.  The most celebrated example is
-the routing table.  Because the routing table is tracking the state of
-equipment outside of the computer, it will at times contain stale data.
-Therefore, once the route has been computed, there is no need to hold
-the routing table static during transmission of the packet.  After all,
-you can hold the routing table static all you want, but that won't keep
-the external Internet from changing, and it is the state of the external
-Internet that really matters.  In addition, routing entries are typically
-added or deleted, rather than being modified in place.
-
-A straightforward example of this use of RCU may be found in the
-system-call auditing support.  For example, a reader-writer locked
-implementation of audit_filter_task() might be as follows::
-
-	static enum audit_state audit_filter_task(struct task_struct *tsk)
-	{
-		struct audit_entry *e;
-		enum audit_state   state;
-
-		read_lock(&auditsc_lock);
-		/* Note: audit_netlink_sem held by caller. */
-		list_for_each_entry(e, &audit_tsklist, list) {
-			if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
-				read_unlock(&auditsc_lock);
-				return state;
-			}
-		}
-		read_unlock(&auditsc_lock);
-		return AUDIT_BUILD_CONTEXT;
-	}
-
-Here the list is searched under the lock, but the lock is dropped before
-the corresponding value is returned.  By the time that this value is acted
-on, the list may well have been modified.  This makes sense, since if
-you are turning auditing off, it is OK to audit a few extra system calls.
-
-This means that RCU can be easily applied to the read side, as follows::
-
-	static enum audit_state audit_filter_task(struct task_struct *tsk)
-	{
-		struct audit_entry *e;
-		enum audit_state   state;
-
-		rcu_read_lock();
-		/* Note: audit_netlink_sem held by caller. */
-		list_for_each_entry_rcu(e, &audit_tsklist, list) {
-			if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
-				rcu_read_unlock();
-				return state;
-			}
-		}
-		rcu_read_unlock();
-		return AUDIT_BUILD_CONTEXT;
-	}
-
-The read_lock() and read_unlock() calls have become rcu_read_lock()
-and rcu_read_unlock(), respectively, and the list_for_each_entry() has
-become list_for_each_entry_rcu().  The _rcu() list-traversal primitives
-insert the read-side memory barriers that are required on DEC Alpha CPUs.
-
-The changes to the update side are also straightforward.  A reader-writer
-lock might be used as follows for deletion and insertion::
-
-	static inline int audit_del_rule(struct audit_rule *rule,
-					 struct list_head *list)
-	{
-		struct audit_entry  *e;
-
-		write_lock(&auditsc_lock);
-		list_for_each_entry(e, list, list) {
-			if (!audit_compare_rule(rule, &e->rule)) {
-				list_del(&e->list);
-				write_unlock(&auditsc_lock);
-				return 0;
-			}
-		}
-		write_unlock(&auditsc_lock);
-		return -EFAULT;		/* No matching rule */
-	}
-
-	static inline int audit_add_rule(struct audit_entry *entry,
-					 struct list_head *list)
-	{
-		write_lock(&auditsc_lock);
-		if (entry->rule.flags & AUDIT_PREPEND) {
-			entry->rule.flags &= ~AUDIT_PREPEND;
-			list_add(&entry->list, list);
-		} else {
-			list_add_tail(&entry->list, list);
-		}
-		write_unlock(&auditsc_lock);
-		return 0;
-	}
-
-Following are the RCU equivalents for these two functions::
-
-	static inline int audit_del_rule(struct audit_rule *rule,
-					 struct list_head *list)
-	{
-		struct audit_entry  *e;
-
-		/* Do not use the _rcu iterator here, since this is the only
-		 * deletion routine. */
-		list_for_each_entry(e, list, list) {
-			if (!audit_compare_rule(rule, &e->rule)) {
-				list_del_rcu(&e->list);
-				call_rcu(&e->rcu, audit_free_rule);
-				return 0;
-			}
-		}
-		return -EFAULT;		/* No matching rule */
-	}
-
-	static inline int audit_add_rule(struct audit_entry *entry,
-					 struct list_head *list)
-	{
-		if (entry->rule.flags & AUDIT_PREPEND) {
-			entry->rule.flags &= ~AUDIT_PREPEND;
-			list_add_rcu(&entry->list, list);
-		} else {
-			list_add_tail_rcu(&entry->list, list);
-		}
-		return 0;
-	}
-
-Normally, the write_lock() and write_unlock() would be replaced by
-a spin_lock() and a spin_unlock(), but in this case, all callers hold
-audit_netlink_sem, so no additional locking is required.  The auditsc_lock
-can therefore be eliminated, since use of RCU eliminates the need for
-writers to exclude readers.  Normally, the write_lock() calls would
-be converted into spin_lock() calls.
-
-The list_del(), list_add(), and list_add_tail() primitives have been
-replaced by list_del_rcu(), list_add_rcu(), and list_add_tail_rcu().
-The _rcu() list-manipulation primitives add memory barriers that are
-needed on weakly ordered CPUs (most of them!).  The list_del_rcu()
-primitive omits the pointer poisoning debug-assist code that would
-otherwise cause concurrent readers to fail spectacularly.
-
-So, when readers can tolerate stale data and when entries are either added
-or deleted, without in-place modification, it is very easy to use RCU!
-
-Example 2: Handling In-Place Updates
-------------------------------------
-
-The system-call auditing code does not update auditing rules in place.
-However, if it did, reader-writer-locked code to do so might look as
-follows (presumably, the field_count is only permitted to decrease,
-otherwise, the added fields would need to be filled in)::
-
-	static inline int audit_upd_rule(struct audit_rule *rule,
-					 struct list_head *list,
-					 __u32 newaction,
-					 __u32 newfield_count)
-	{
-		struct audit_entry  *e;
-		struct audit_newentry *ne;
-
-		write_lock(&auditsc_lock);
-		/* Note: audit_netlink_sem held by caller. */
-		list_for_each_entry(e, list, list) {
-			if (!audit_compare_rule(rule, &e->rule)) {
-				e->rule.action = newaction;
-				e->rule.file_count = newfield_count;
-				write_unlock(&auditsc_lock);
-				return 0;
-			}
-		}
-		write_unlock(&auditsc_lock);
-		return -EFAULT;		/* No matching rule */
-	}
-
-The RCU version creates a copy, updates the copy, then replaces the old
-entry with the newly updated entry.  This sequence of actions, allowing
-concurrent reads while doing a copy to perform an update, is what gives
-RCU ("read-copy update") its name.  The RCU code is as follows::
-
-	static inline int audit_upd_rule(struct audit_rule *rule,
-					 struct list_head *list,
-					 __u32 newaction,
-					 __u32 newfield_count)
-	{
-		struct audit_entry  *e;
-		struct audit_newentry *ne;
-
-		list_for_each_entry(e, list, list) {
-			if (!audit_compare_rule(rule, &e->rule)) {
-				ne = kmalloc(sizeof(*entry), GFP_ATOMIC);
-				if (ne == NULL)
-					return -ENOMEM;
-				audit_copy_rule(&ne->rule, &e->rule);
-				ne->rule.action = newaction;
-				ne->rule.file_count = newfield_count;
-				list_replace_rcu(&e->list, &ne->list);
-				call_rcu(&e->rcu, audit_free_rule);
-				return 0;
-			}
-		}
-		return -EFAULT;		/* No matching rule */
-	}
-
-Again, this assumes that the caller holds audit_netlink_sem.  Normally,
-the reader-writer lock would become a spinlock in this sort of code.
-
-Example 3: Eliminating Stale Data
----------------------------------
-
-The auditing examples above tolerate stale data, as do most algorithms
-that are tracking external state.  Because there is a delay from the
-time the external state changes before Linux becomes aware of the change,
-additional RCU-induced staleness is normally not a problem.
-
-However, there are many examples where stale data cannot be tolerated.
-One example in the Linux kernel is the System V IPC (see the ipc_lock()
-function in ipc/util.c).  This code checks a "deleted" flag under a
-per-entry spinlock, and, if the "deleted" flag is set, pretends that the
-entry does not exist.  For this to be helpful, the search function must
-return holding the per-entry spinlock, as ipc_lock() does in fact do.
-
-Quick Quiz:
-	Why does the search function need to return holding the per-entry lock for
-	this deleted-flag technique to be helpful?
-
-:ref:`Answer to Quick Quiz <answer_quick_quiz_list>`
-
-If the system-call audit module were to ever need to reject stale data,
-one way to accomplish this would be to add a "deleted" flag and a "lock"
-spinlock to the audit_entry structure, and modify audit_filter_task()
-as follows::
-
-	static enum audit_state audit_filter_task(struct task_struct *tsk)
-	{
-		struct audit_entry *e;
-		enum audit_state   state;
-
-		rcu_read_lock();
-		list_for_each_entry_rcu(e, &audit_tsklist, list) {
-			if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
-				spin_lock(&e->lock);
-				if (e->deleted) {
-					spin_unlock(&e->lock);
-					rcu_read_unlock();
-					return AUDIT_BUILD_CONTEXT;
-				}
-				rcu_read_unlock();
-				return state;
-			}
-		}
-		rcu_read_unlock();
-		return AUDIT_BUILD_CONTEXT;
-	}
-
-Note that this example assumes that entries are only added and deleted.
-Additional mechanism is required to deal correctly with the
-update-in-place performed by audit_upd_rule().  For one thing,
-audit_upd_rule() would need additional memory barriers to ensure
-that the list_add_rcu() was really executed before the list_del_rcu().
-
-The audit_del_rule() function would need to set the "deleted"
-flag under the spinlock as follows::
-
-	static inline int audit_del_rule(struct audit_rule *rule,
-					 struct list_head *list)
-	{
-		struct audit_entry  *e;
-
-		/* Do not need to use the _rcu iterator here, since this
-		 * is the only deletion routine. */
-		list_for_each_entry(e, list, list) {
-			if (!audit_compare_rule(rule, &e->rule)) {
-				spin_lock(&e->lock);
-				list_del_rcu(&e->list);
-				e->deleted = 1;
-				spin_unlock(&e->lock);
-				call_rcu(&e->rcu, audit_free_rule);
-				return 0;
-			}
-		}
-		return -EFAULT;		/* No matching rule */
-	}
-
-Summary
--------
-
-Read-mostly list-based data structures that can tolerate stale data are
-the most amenable to use of RCU.  The simplest case is where entries are
-either added or deleted from the data structure (or atomically modified
-in place), but non-atomic in-place modifications can be handled by making
-a copy, updating the copy, then replacing the original with the copy.
-If stale data cannot be tolerated, then a "deleted" flag may be used
-in conjunction with a per-entry spinlock in order to allow the search
-function to reject newly deleted data.
-
-.. _answer_quick_quiz_list:
-
-Answer to Quick Quiz:
-	Why does the search function need to return holding the per-entry
-	lock for this deleted-flag technique to be helpful?
-
-	If the search function drops the per-entry lock before returning,
-	then the caller will be processing stale data in any case.  If it
-	is really OK to be processing stale data, then you don't need a
-	"deleted" flag.  If processing stale data really is a problem,
-	then you need to hold the per-entry lock across all of the code
-	that uses the value that was returned.
diff --git a/Documentation/RCU/rcu.rst b/Documentation/RCU/rcu.rst
new file mode 100644
index 000000000000..8dfb437dacc3
--- /dev/null
+++ b/Documentation/RCU/rcu.rst
@@ -0,0 +1,92 @@
+.. _rcu_doc:
+
+RCU Concepts
+============
+
+The basic idea behind RCU (read-copy update) is to split destructive
+operations into two parts, one that prevents anyone from seeing the data
+item being destroyed, and one that actually carries out the destruction.
+A "grace period" must elapse between the two parts, and this grace period
+must be long enough that any readers accessing the item being deleted have
+since dropped their references.  For example, an RCU-protected deletion
+from a linked list would first remove the item from the list, wait for
+a grace period to elapse, then free the element.  See the
+Documentation/RCU/listRCU.rst file for more information on using RCU with
+linked lists.
+
+Frequently Asked Questions
+--------------------------
+
+- Why would anyone want to use RCU?
+
+  The advantage of RCU's two-part approach is that RCU readers need
+  not acquire any locks, perform any atomic instructions, write to
+  shared memory, or (on CPUs other than Alpha) execute any memory
+  barriers.  The fact that these operations are quite expensive
+  on modern CPUs is what gives RCU its performance advantages
+  in read-mostly situations.  The fact that RCU readers need not
+  acquire locks can also greatly simplify deadlock-avoidance code.
+
+- How can the updater tell when a grace period has completed
+  if the RCU readers give no indication when they are done?
+
+  Just as with spinlocks, RCU readers are not permitted to
+  block, switch to user-mode execution, or enter the idle loop.
+  Therefore, as soon as a CPU is seen passing through any of these
+  three states, we know that that CPU has exited any previous RCU
+  read-side critical sections.  So, if we remove an item from a
+  linked list, and then wait until all CPUs have switched context,
+  executed in user mode, or executed in the idle loop, we can
+  safely free up that item.
+
+  Preemptible variants of RCU (CONFIG_PREEMPT_RCU) get the
+  same effect, but require that the readers manipulate CPU-local
+  counters.  These counters allow limited types of blocking within
+  RCU read-side critical sections.  SRCU also uses CPU-local
+  counters, and permits general blocking within RCU read-side
+  critical sections.  These variants of RCU detect grace periods
+  by sampling these counters.
+
+- If I am running on a uniprocessor kernel, which can only do one
+  thing at a time, why should I wait for a grace period?
+
+  See the Documentation/RCU/UP.rst file for more information.
+
+- How can I see where RCU is currently used in the Linux kernel?
+
+  Search for "rcu_read_lock", "rcu_read_unlock", "call_rcu",
+  "rcu_read_lock_bh", "rcu_read_unlock_bh", "srcu_read_lock",
+  "srcu_read_unlock", "synchronize_rcu", "synchronize_net",
+  "synchronize_srcu", and the other RCU primitives.  Or grab one
+  of the cscope databases from:
+
+  (http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html).
+
+- What guidelines should I follow when writing code that uses RCU?
+
+  See the checklist.txt file in this directory.
+
+- Why the name "RCU"?
+
+  "RCU" stands for "read-copy update".  The file Documentation/RCU/listRCU.rst
+  has more information on where this name came from, search for
+  "read-copy update" to find it.
+
+- I hear that RCU is patented?  What is with that?
+
+  Yes, it is.  There are several known patents related to RCU,
+  search for the string "Patent" in RTFP.txt to find them.
+  Of these, one was allowed to lapse by the assignee, and the
+  others have been contributed to the Linux kernel under GPL.
+  There are now also LGPL implementations of user-level RCU
+  available (http://liburcu.org/).
+
+- I hear that RCU needs work in order to support realtime kernels?
+
+  Realtime-friendly RCU can be enabled via the CONFIG_PREEMPT_RCU
+  kernel configuration parameter.
+
+- Where can I find more information on RCU?
+
+  See the RTFP.txt file in this directory.
+  Or point your browser at (http://www.rdrop.com/users/paulmck/RCU/).
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
deleted file mode 100644
index 8dfb437dacc3..000000000000
--- a/Documentation/RCU/rcu.txt
+++ /dev/null
@@ -1,92 +0,0 @@
-.. _rcu_doc:
-
-RCU Concepts
-============
-
-The basic idea behind RCU (read-copy update) is to split destructive
-operations into two parts, one that prevents anyone from seeing the data
-item being destroyed, and one that actually carries out the destruction.
-A "grace period" must elapse between the two parts, and this grace period
-must be long enough that any readers accessing the item being deleted have
-since dropped their references.  For example, an RCU-protected deletion
-from a linked list would first remove the item from the list, wait for
-a grace period to elapse, then free the element.  See the
-Documentation/RCU/listRCU.rst file for more information on using RCU with
-linked lists.
-
-Frequently Asked Questions
---------------------------
-
-- Why would anyone want to use RCU?
-
-  The advantage of RCU's two-part approach is that RCU readers need
-  not acquire any locks, perform any atomic instructions, write to
-  shared memory, or (on CPUs other than Alpha) execute any memory
-  barriers.  The fact that these operations are quite expensive
-  on modern CPUs is what gives RCU its performance advantages
-  in read-mostly situations.  The fact that RCU readers need not
-  acquire locks can also greatly simplify deadlock-avoidance code.
-
-- How can the updater tell when a grace period has completed
-  if the RCU readers give no indication when they are done?
-
-  Just as with spinlocks, RCU readers are not permitted to
-  block, switch to user-mode execution, or enter the idle loop.
-  Therefore, as soon as a CPU is seen passing through any of these
-  three states, we know that that CPU has exited any previous RCU
-  read-side critical sections.  So, if we remove an item from a
-  linked list, and then wait until all CPUs have switched context,
-  executed in user mode, or executed in the idle loop, we can
-  safely free up that item.
-
-  Preemptible variants of RCU (CONFIG_PREEMPT_RCU) get the
-  same effect, but require that the readers manipulate CPU-local
-  counters.  These counters allow limited types of blocking within
-  RCU read-side critical sections.  SRCU also uses CPU-local
-  counters, and permits general blocking within RCU read-side
-  critical sections.  These variants of RCU detect grace periods
-  by sampling these counters.
-
-- If I am running on a uniprocessor kernel, which can only do one
-  thing at a time, why should I wait for a grace period?
-
-  See the Documentation/RCU/UP.rst file for more information.
-
-- How can I see where RCU is currently used in the Linux kernel?
-
-  Search for "rcu_read_lock", "rcu_read_unlock", "call_rcu",
-  "rcu_read_lock_bh", "rcu_read_unlock_bh", "srcu_read_lock",
-  "srcu_read_unlock", "synchronize_rcu", "synchronize_net",
-  "synchronize_srcu", and the other RCU primitives.  Or grab one
-  of the cscope databases from:
-
-  (http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html).
-
-- What guidelines should I follow when writing code that uses RCU?
-
-  See the checklist.txt file in this directory.
-
-- Why the name "RCU"?
-
-  "RCU" stands for "read-copy update".  The file Documentation/RCU/listRCU.rst
-  has more information on where this name came from, search for
-  "read-copy update" to find it.
-
-- I hear that RCU is patented?  What is with that?
-
-  Yes, it is.  There are several known patents related to RCU,
-  search for the string "Patent" in RTFP.txt to find them.
-  Of these, one was allowed to lapse by the assignee, and the
-  others have been contributed to the Linux kernel under GPL.
-  There are now also LGPL implementations of user-level RCU
-  available (http://liburcu.org/).
-
-- I hear that RCU needs work in order to support realtime kernels?
-
-  Realtime-friendly RCU can be enabled via the CONFIG_PREEMPT_RCU
-  kernel configuration parameter.
-
-- Where can I find more information on RCU?
-
-  See the RTFP.txt file in this directory.
-  Or point your browser at (http://www.rdrop.com/users/paulmck/RCU/).
-- 
cgit 


From c0e679b4a180f7b9f2cee41c2781bb6af29f7755 Mon Sep 17 00:00:00 2001
From: Jiunn Chang <c0d1n61at3@gmail.com>
Date: Wed, 26 Jun 2019 15:07:05 -0500
Subject: Documentation: RCU: Add TOC tree hooks

Add TOC tree hooks for:
  - rcu
  - listRCU
  - UP

Signed-off-by: Jiunn Chang <c0d1n61at3@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/RCU/index.rst | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 Documentation/RCU/index.rst

(limited to 'Documentation')

diff --git a/Documentation/RCU/index.rst b/Documentation/RCU/index.rst
new file mode 100644
index 000000000000..340a9725676c
--- /dev/null
+++ b/Documentation/RCU/index.rst
@@ -0,0 +1,19 @@
+.. _rcu_concepts:
+
+============
+RCU concepts
+============
+
+.. toctree::
+   :maxdepth: 1
+
+   rcu
+   listRCU
+   UP
+
+.. only:: subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
-- 
cgit 


From 772626ecd2cd5b930fa03b4787ddf51ccf819229 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Thu, 27 Jun 2019 08:35:26 -0600
Subject: Add the RCU docs to the core-api manual

We should really move the RCU directory there as well, but that can wait
for another day.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/index.rst | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 2466a4c51031..322ac954b390 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -35,6 +35,7 @@ Core utilities
    boot-time-mm
    memory-hotplug
    protection-keys
+   ../RCU/index
 
 
 Interfaces for kernel debugging
-- 
cgit 


From b061e4e8f0f43ba9a8ec1bde36ceeb2368b1928f Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 26 Apr 2018 12:22:38 -0500
Subject: dt-bindings: arm: Convert Alpine board/soc bindings to json-schema

Convert Alpine SoC bindings to DT schema format using json-schema.

Cc: Tsahee Zidenberg <tsahee@annapurnalabs.com>
Cc: Antoine Tenart <antoine.tenart@free-electrons.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: devicetree@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/al,alpine.txt | 16 ----------------
 .../devicetree/bindings/arm/al,alpine.yaml          | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 16 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/al,alpine.txt
 create mode 100644 Documentation/devicetree/bindings/arm/al,alpine.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/al,alpine.txt b/Documentation/devicetree/bindings/arm/al,alpine.txt
deleted file mode 100644
index d00debe2e86f..000000000000
--- a/Documentation/devicetree/bindings/arm/al,alpine.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-Annapurna Labs Alpine Platform Device Tree Bindings
----------------------------------------------------------------
-
-Boards in the Alpine family shall have the following properties:
-
-* Required root node properties:
-compatible: must contain "al,alpine"
-
-* Example:
-
-/ {
-	model = "Annapurna Labs Alpine Dev Board";
-	compatible = "al,alpine";
-
-	...
-}
diff --git a/Documentation/devicetree/bindings/arm/al,alpine.yaml b/Documentation/devicetree/bindings/arm/al,alpine.yaml
new file mode 100644
index 000000000000..a70dff277e05
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/al,alpine.yaml
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/al,alpine.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Annapurna Labs Alpine Platform Device Tree Bindings
+
+maintainers:
+  - Tsahee Zidenberg <tsahee@annapurnalabs.com>
+  - Antoine Tenart <antoine.tenart@bootlin.com>
+
+properties:
+  compatible:
+    items:
+      - const: al,alpine
+  model:
+    items:
+      - const: "Annapurna Labs Alpine Dev Board"
+
+...
-- 
cgit 


From e5dad30bc7e7d0ddeb4c14d1785df7899eee619a Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jun 2019 10:07:18 -0600
Subject: dt-bindings: arm: qcom: Add missing schema for MSM8974

The SoC/board bindings for MSM8974 are undocumented. Add the missing
bindings to the schema.

Cc: Andy Gross <agross@kernel.org>
Cc: David Brown <david.brown@linaro.org>
Cc: linux-arm-msm@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/qcom.yaml | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/qcom.yaml b/Documentation/devicetree/bindings/arm/qcom.yaml
index f6316ab66385..04867577306a 100644
--- a/Documentation/devicetree/bindings/arm/qcom.yaml
+++ b/Documentation/devicetree/bindings/arm/qcom.yaml
@@ -101,6 +101,15 @@ properties:
               - qcom,msm8960-cdp
           - const: qcom,msm8960
 
+      - items:
+          - enum:
+              - fairphone,fp2
+              - lge,hammerhead
+              - sony,xperia-amami
+              - sony,xperia-castor
+              - sony,xperia-honami
+          - const: qcom,msm8974
+
       - items:
           - const: qcom,msm8916-mtp/1
           - const: qcom,msm8916-mtp
-- 
cgit 


From 5732bea634dcb7e5ba974df0fdbfdfe092aae3b2 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jun 2019 10:10:10 -0600
Subject: dt-bindings: arm: qcom: Add missing schema for IPQ4019 boards

The board bindings for IPQ4019 are undocumented. Add the missing bindings
to the schema.

Cc: Andy Gross <agross@kernel.org>
Cc: David Brown <david.brown@linaro.org>
Cc: linux-arm-msm@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/qcom.yaml | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/qcom.yaml b/Documentation/devicetree/bindings/arm/qcom.yaml
index 04867577306a..54ef6b6b9189 100644
--- a/Documentation/devicetree/bindings/arm/qcom.yaml
+++ b/Documentation/devicetree/bindings/arm/qcom.yaml
@@ -119,6 +119,11 @@ properties:
           - const: qcom,msm8996-mtp
 
       - items:
+          - enum:
+              - qcom,ipq4019-ap-dk04.1-c3
+              - qcom,ipq4019-ap-dk07.1-c1
+              - qcom,ipq4019-ap-dk07.1-c2
+              - qcom,ipq4019-dk04.1-c1
           - const: qcom,ipq4019
 
       - items:
-- 
cgit 


From 2c73aea7da209f79bbe163644871bcbadeab4ec4 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jun 2019 13:37:14 -0600
Subject: dt-bindings: arm: Convert Axxia board/soc bindings to json-schema

Convert Axxia SoC bindings to DT schema format using json-schema.

Cc: Anders Berg <anders.berg@lsi.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/axxia.txt  | 12 ------------
 Documentation/devicetree/bindings/arm/axxia.yaml | 19 +++++++++++++++++++
 2 files changed, 19 insertions(+), 12 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/axxia.txt
 create mode 100644 Documentation/devicetree/bindings/arm/axxia.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/axxia.txt b/Documentation/devicetree/bindings/arm/axxia.txt
deleted file mode 100644
index 7b4ef9c07696..000000000000
--- a/Documentation/devicetree/bindings/arm/axxia.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Axxia AXM55xx device tree bindings
-
-Boards using the AXM55xx SoC need to have the following properties:
-
-Required root node property:
-
-  - compatible = "lsi,axm5516"
-
-Boards:
-
-  LSI AXM5516 Validation board (Amarillo)
-	compatible = "lsi,axm5516-amarillo", "lsi,axm5516"
diff --git a/Documentation/devicetree/bindings/arm/axxia.yaml b/Documentation/devicetree/bindings/arm/axxia.yaml
new file mode 100644
index 000000000000..98780a569f22
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/axxia.yaml
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/axxia.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Axxia AXM55xx device tree bindings
+
+maintainers:
+  - Anders Berg <anders.berg@lsi.com>
+
+properties:
+  compatible:
+    description: LSI AXM5516 Validation board (Amarillo)
+    items:
+      - const: lsi,axm5516-amarillo
+      - const: lsi,axm5516
+
+...
-- 
cgit 


From 7eefa9047ea5f909761c6cec800be3be8d3f372f Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jun 2019 13:45:19 -0600
Subject: dt-bindings: arm: Convert MOXA ART board/soc bindings to json-schema

Convert MOXA ART SoC bindings to DT schema format using json-schema.

Cc: Jonas Jensen <jonas.jensen@gmail.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/moxart.txt  | 12 ------------
 Documentation/devicetree/bindings/arm/moxart.yaml | 19 +++++++++++++++++++
 2 files changed, 19 insertions(+), 12 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/moxart.txt
 create mode 100644 Documentation/devicetree/bindings/arm/moxart.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/moxart.txt b/Documentation/devicetree/bindings/arm/moxart.txt
deleted file mode 100644
index 11087edb0658..000000000000
--- a/Documentation/devicetree/bindings/arm/moxart.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-MOXA ART device tree bindings
-
-Boards with the MOXA ART SoC shall have the following properties:
-
-Required root node property:
-
-compatible = "moxa,moxart";
-
-Boards:
-
-- UC-7112-LX: embedded computer
-  compatible = "moxa,moxart-uc-7112-lx", "moxa,moxart"
diff --git a/Documentation/devicetree/bindings/arm/moxart.yaml b/Documentation/devicetree/bindings/arm/moxart.yaml
new file mode 100644
index 000000000000..c068df59fad2
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/moxart.yaml
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/moxart.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MOXA ART device tree bindings
+
+maintainers:
+  - Jonas Jensen <jonas.jensen@gmail.com>
+
+properties:
+  compatible:
+    description: UC-7112-LX embedded computer
+    items:
+      - const: moxa,moxart-uc-7112-lx
+      - const: moxa,moxart
+
+...
-- 
cgit 


From 26fd07e2e78fae8162d6411ee24a0373912c3193 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jun 2019 13:57:13 -0600
Subject: dt-bindings: arm: Convert NXP LPC32xx board/soc bindings to
 json-schema

Convert NXP LPC32xx SoC bindings to DT schema format using json-schema.

Cc: Roland Stigge <stigge@antcom.de>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/arm/nxp/lpc32xx.txt        |  8 -------
 .../devicetree/bindings/arm/nxp/lpc32xx.yaml       | 25 ++++++++++++++++++++++
 2 files changed, 25 insertions(+), 8 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/nxp/lpc32xx.txt
 create mode 100644 Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/nxp/lpc32xx.txt b/Documentation/devicetree/bindings/arm/nxp/lpc32xx.txt
deleted file mode 100644
index 56ec8ddc4a3b..000000000000
--- a/Documentation/devicetree/bindings/arm/nxp/lpc32xx.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-NXP LPC32xx Platforms Device Tree Bindings
-------------------------------------------
-
-Boards with the NXP LPC32xx SoC shall have the following properties:
-
-Required root node property:
-
-compatible: must be "nxp,lpc3220", "nxp,lpc3230", "nxp,lpc3240" or "nxp,lpc3250"
diff --git a/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml b/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml
new file mode 100644
index 000000000000..07f39d3eee7e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/nxp/lpc32xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP LPC32xx Platforms Device Tree Bindings
+
+maintainers:
+  - Roland Stigge <stigge@antcom.de>
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - nxp,lpc3220
+          - nxp,lpc3230
+          - nxp,lpc3240
+      - items:
+        - enum:
+            - ea,ea3250
+            - phytec,phy3250
+        - const: nxp,lpc3250
+
+...
-- 
cgit 


From c875a3f847274d4def25d48439f11f6cfd445e80 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jun 2019 15:14:12 -0600
Subject: dt-bindings: arm: Convert Conexant Digicolor board/soc bindings to
 json-schema

Convert Conexant Digicolor SoC bindings to DT schema format using json-schema.

Acked-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/digicolor.txt  |  6 ------
 Documentation/devicetree/bindings/arm/digicolor.yaml | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 6 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/digicolor.txt
 create mode 100644 Documentation/devicetree/bindings/arm/digicolor.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/digicolor.txt b/Documentation/devicetree/bindings/arm/digicolor.txt
deleted file mode 100644
index 658553f40b23..000000000000
--- a/Documentation/devicetree/bindings/arm/digicolor.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Conexant Digicolor Platforms Device Tree Bindings
-
-Each device tree must specify which Conexant Digicolor SoC it uses.
-Must be the following compatible string:
-
-  cnxt,cx92755
diff --git a/Documentation/devicetree/bindings/arm/digicolor.yaml b/Documentation/devicetree/bindings/arm/digicolor.yaml
new file mode 100644
index 000000000000..d9c80b827e9b
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/digicolor.yaml
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/digicolor.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Conexant Digicolor Platforms Device Tree Bindings
+
+maintainers:
+  - Baruch Siach <baruch@tkos.co.il>
+
+properties:
+  compatible:
+    const: cnxt,cx92755
+
+...
-- 
cgit 


From adbd9b4dee70c36eaa30ce93ffcd968533044efc Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 20 Jun 2019 11:15:05 -0700
Subject: fscrypt: remove selection of CONFIG_CRYPTO_SHA256

fscrypt only uses SHA-256 for AES-128-CBC-ESSIV, which isn't the default
and is only recommended on platforms that have hardware accelerated
AES-CBC but not AES-XTS.  There's no link-time dependency, since SHA-256
is requested via the crypto API on first use.

To reduce bloat, we should limit FS_ENCRYPTION to selecting the default
algorithms only.  SHA-256 by itself isn't that much bloat, but it's
being discussed to move ESSIV into a crypto API template, which would
incidentally bring in other things like "authenc" support, which would
all end up being built-in since FS_ENCRYPTION is now a bool.

For Adiantum encryption we already just document that users who want to
use it have to enable CONFIG_CRYPTO_ADIANTUM themselves.  So, let's do
the same for AES-128-CBC-ESSIV and CONFIG_CRYPTO_SHA256.

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 Documentation/filesystems/fscrypt.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 08c23b60e016..87d4e266ffc8 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -191,7 +191,9 @@ Currently, the following pairs of encryption modes are supported:
 If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
 
 AES-128-CBC was added only for low-powered embedded devices with
-crypto accelerators such as CAAM or CESA that do not support XTS.
+crypto accelerators such as CAAM or CESA that do not support XTS.  To
+use AES-128-CBC, CONFIG_CRYPTO_SHA256 (or another SHA-256
+implementation) must be enabled so that ESSIV can be used.
 
 Adiantum is a (primarily) stream cipher-based mode that is fast even
 on CPUs without dedicated crypto instructions.  It's also a true
-- 
cgit 


From 0564336329f0b03a78221ddf51e52af3665e5720 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 20 Jun 2019 11:16:58 -0700
Subject: fscrypt: document testing with xfstests

Document how to test ext4, f2fs, and ubifs encryption with xfstests.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 Documentation/filesystems/fscrypt.rst | 39 +++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 87d4e266ffc8..82efa41b0e6c 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -649,3 +649,42 @@ Note that the precise way that filenames are presented to userspace
 without the key is subject to change in the future.  It is only meant
 as a way to temporarily present valid filenames so that commands like
 ``rm -r`` work as expected on encrypted directories.
+
+Tests
+=====
+
+To test fscrypt, use xfstests, which is Linux's de facto standard
+filesystem test suite.  First, run all the tests in the "encrypt"
+group on the relevant filesystem(s).  For example, to test ext4 and
+f2fs encryption using `kvm-xfstests
+<https://github.com/tytso/xfstests-bld/blob/master/Documentation/kvm-quickstart.md>`_::
+
+    kvm-xfstests -c ext4,f2fs -g encrypt
+
+UBIFS encryption can also be tested this way, but it should be done in
+a separate command, and it takes some time for kvm-xfstests to set up
+emulated UBI volumes::
+
+    kvm-xfstests -c ubifs -g encrypt
+
+No tests should fail.  However, tests that use non-default encryption
+modes (e.g. generic/549 and generic/550) will be skipped if the needed
+algorithms were not built into the kernel's crypto API.  Also, tests
+that access the raw block device (e.g. generic/399, generic/548,
+generic/549, generic/550) will be skipped on UBIFS.
+
+Besides running the "encrypt" group tests, for ext4 and f2fs it's also
+possible to run most xfstests with the "test_dummy_encryption" mount
+option.  This option causes all new files to be automatically
+encrypted with a dummy key, without having to make any API calls.
+This tests the encrypted I/O paths more thoroughly.  To do this with
+kvm-xfstests, use the "encrypt" filesystem configuration::
+
+    kvm-xfstests -c ext4/encrypt,f2fs/encrypt -g auto
+
+Because this runs many more tests than "-g encrypt" does, it takes
+much longer to run; so also consider using `gce-xfstests
+<https://github.com/tytso/xfstests-bld/blob/master/Documentation/gce-xfstests.md>`_
+instead of kvm-xfstests::
+
+    gce-xfstests -c ext4/encrypt,f2fs/encrypt -g auto
-- 
cgit 


From 5025ef8b7ad8f92699faa70f14fffaf309bb4276 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 17 Jun 2019 13:29:33 -0600
Subject: dt-bindings: arm: Convert PSCI binding to json-schema

Convert the PSCI binding to use DT schema format.

Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/arm64/booting.txt                    |   2 +-
 Documentation/devicetree/bindings/arm/arm-boards   |   2 +-
 .../devicetree/bindings/arm/idle-states.txt        |   2 +-
 Documentation/devicetree/bindings/arm/psci.txt     | 111 --------------
 Documentation/devicetree/bindings/arm/psci.yaml    | 163 +++++++++++++++++++++
 Documentation/translations/zh_CN/arm64/booting.txt |   2 +-
 6 files changed, 167 insertions(+), 115 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/psci.txt
 create mode 100644 Documentation/devicetree/bindings/arm/psci.yaml

(limited to 'Documentation')

diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
index fbab7e21d116..1a87dee739db 100644
--- a/Documentation/arm64/booting.txt
+++ b/Documentation/arm64/booting.txt
@@ -257,7 +257,7 @@ following manner:
   processors") to bring CPUs into the kernel.
 
   The device tree should contain a 'psci' node, as described in
-  Documentation/devicetree/bindings/arm/psci.txt.
+  Documentation/devicetree/bindings/arm/psci.yaml.
 
 - Secondary CPU general-purpose register settings
   x0 = 0 (reserved for future use)
diff --git a/Documentation/devicetree/bindings/arm/arm-boards b/Documentation/devicetree/bindings/arm/arm-boards
index abff8d834a6a..6758ece324b1 100644
--- a/Documentation/devicetree/bindings/arm/arm-boards
+++ b/Documentation/devicetree/bindings/arm/arm-boards
@@ -197,7 +197,7 @@ Required nodes:
 The description for the board must include:
    - a "psci" node describing the boot method used for the secondary CPUs.
      A detailed description of the bindings used for "psci" nodes is present
-     in the psci.txt file.
+     in the psci.yaml file.
    - a "cpus" node describing the available cores and their associated
      "enable-method"s. For more details see cpus.txt file.
 
diff --git a/Documentation/devicetree/bindings/arm/idle-states.txt b/Documentation/devicetree/bindings/arm/idle-states.txt
index 3bdbe675b9e6..326f29b270ad 100644
--- a/Documentation/devicetree/bindings/arm/idle-states.txt
+++ b/Documentation/devicetree/bindings/arm/idle-states.txt
@@ -691,7 +691,7 @@ cpus {
     Documentation/devicetree/bindings/arm/cpus.yaml
 
 [2] ARM Linux Kernel documentation - PSCI bindings
-    Documentation/devicetree/bindings/arm/psci.txt
+    Documentation/devicetree/bindings/arm/psci.yaml
 
 [3] ARM Server Base System Architecture (SBSA)
     http://infocenter.arm.com/help/index.jsp
diff --git a/Documentation/devicetree/bindings/arm/psci.txt b/Documentation/devicetree/bindings/arm/psci.txt
deleted file mode 100644
index a2c4f1d52492..000000000000
--- a/Documentation/devicetree/bindings/arm/psci.txt
+++ /dev/null
@@ -1,111 +0,0 @@
-* Power State Coordination Interface (PSCI)
-
-Firmware implementing the PSCI functions described in ARM document number
-ARM DEN 0022A ("Power State Coordination Interface System Software on ARM
-processors") can be used by Linux to initiate various CPU-centric power
-operations.
-
-Issue A of the specification describes functions for CPU suspend, hotplug
-and migration of secure software.
-
-Functions are invoked by trapping to the privilege level of the PSCI
-firmware (specified as part of the binding below) and passing arguments
-in a manner similar to that specified by AAPCS:
-
-	 r0		=> 32-bit Function ID / return value
-	{r1 - r3}	=> Parameters
-
-Note that the immediate field of the trapping instruction must be set
-to #0.
-
-
-Main node required properties:
-
- - compatible    : should contain at least one of:
-
-     * "arm,psci"     : For implementations complying to PSCI versions prior
-			to 0.2.
-			For these cases function IDs must be provided.
-
-     * "arm,psci-0.2" : For implementations complying to PSCI 0.2.
-			Function IDs are not required and should be ignored by
-			an OS with PSCI 0.2 support, but are permitted to be
-			present for compatibility with existing software when
-			"arm,psci" is later in the compatible list.
-
-     * "arm,psci-1.0" : For implementations complying to PSCI 1.0.
-			PSCI 1.0 is backward compatible with PSCI 0.2 with
-			minor specification updates, as defined in the PSCI
-			specification[2].
-
- - method        : The method of calling the PSCI firmware. Permitted
-                   values are:
-
-                   "smc" : SMC #0, with the register assignments specified
-		           in this binding.
-
-                   "hvc" : HVC #0, with the register assignments specified
-		           in this binding.
-
-Main node optional properties:
-
- - cpu_suspend   : Function ID for CPU_SUSPEND operation
-
- - cpu_off       : Function ID for CPU_OFF operation
-
- - cpu_on        : Function ID for CPU_ON operation
-
- - migrate       : Function ID for MIGRATE operation
-
-Device tree nodes that require usage of PSCI CPU_SUSPEND function (ie idle
-state nodes, as per bindings in [1]) must specify the following properties:
-
-- arm,psci-suspend-param
-		Usage: Required for state nodes[1] if the corresponding
-                       idle-states node entry-method property is set
-                       to "psci".
-		Value type: <u32>
-		Definition: power_state parameter to pass to the PSCI
-			    suspend call.
-
-Example:
-
-Case 1: PSCI v0.1 only.
-
-	psci {
-		compatible	= "arm,psci";
-		method		= "smc";
-		cpu_suspend	= <0x95c10000>;
-		cpu_off		= <0x95c10001>;
-		cpu_on		= <0x95c10002>;
-		migrate		= <0x95c10003>;
-	};
-
-Case 2: PSCI v0.2 only
-
-	psci {
-		compatible	= "arm,psci-0.2";
-		method		= "smc";
-	};
-
-Case 3: PSCI v0.2 and PSCI v0.1.
-
-	A DTB may provide IDs for use by kernels without PSCI 0.2 support,
-	enabling firmware and hypervisors to support existing and new kernels.
-	These IDs will be ignored by kernels with PSCI 0.2 support, which will
-	use the standard PSCI 0.2 IDs exclusively.
-
-	psci {
-		compatible = "arm,psci-0.2", "arm,psci";
-		method = "hvc";
-
-		cpu_on = < arbitrary value >;
-		cpu_off = < arbitrary value >;
-
-		...
-	};
-
-[1] Kernel documentation - ARM idle states bindings
-    Documentation/devicetree/bindings/arm/idle-states.txt
-[2] Power State Coordination Interface (PSCI) specification
-    http://infocenter.arm.com/help/topic/com.arm.doc.den0022c/DEN0022C_Power_State_Coordination_Interface.pdf
diff --git a/Documentation/devicetree/bindings/arm/psci.yaml b/Documentation/devicetree/bindings/arm/psci.yaml
new file mode 100644
index 000000000000..7abdf58b335e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/psci.yaml
@@ -0,0 +1,163 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/psci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Power State Coordination Interface (PSCI)
+
+maintainers:
+  - Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+
+description: |+
+  Firmware implementing the PSCI functions described in ARM document number
+  ARM DEN 0022A ("Power State Coordination Interface System Software on ARM
+  processors") can be used by Linux to initiate various CPU-centric power
+  operations.
+
+  Issue A of the specification describes functions for CPU suspend, hotplug
+  and migration of secure software.
+
+  Functions are invoked by trapping to the privilege level of the PSCI
+  firmware (specified as part of the binding below) and passing arguments
+  in a manner similar to that specified by AAPCS:
+
+     r0       => 32-bit Function ID / return value
+    {r1 - r3}	=> Parameters
+
+  Note that the immediate field of the trapping instruction must be set
+  to #0.
+
+  [2] Power State Coordination Interface (PSCI) specification
+    http://infocenter.arm.com/help/topic/com.arm.doc.den0022c/DEN0022C_Power_State_Coordination_Interface.pdf
+
+properties:
+  compatible:
+    oneOf:
+      - description:
+          For implementations complying to PSCI versions prior to 0.2.
+        const: arm,psci
+
+      - description:
+          For implementations complying to PSCI 0.2.
+        const: arm,psci-0.2
+
+      - description:
+          For implementations complying to PSCI 0.2.
+          Function IDs are not required and should be ignored by an OS with
+          PSCI 0.2 support, but are permitted to be present for compatibility
+          with existing software when "arm,psci" is later in the compatible
+          list.
+        items:
+          - const: arm,psci-0.2
+          - const: arm,psci
+
+      - description:
+          For implementations complying to PSCI 1.0.
+        const: arm,psci-1.0
+
+      - description:
+          For implementations complying to PSCI 1.0.
+          PSCI 1.0 is backward compatible with PSCI 0.2 with minor
+          specification updates, as defined in the PSCI specification[2].
+        items:
+          - const: arm,psci-1.0
+          - const: arm,psci-0.2
+
+  method:
+    description: The method of calling the PSCI firmware.
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/string-array
+      - enum:
+          # SMC #0, with the register assignments specified in this binding.
+          - smc
+          # HVC #0, with the register assignments specified in this binding.
+          - hvc
+
+  cpu_suspend:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Function ID for CPU_SUSPEND operation
+
+  cpu_off:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Function ID for CPU_OFF operation
+
+  cpu_on:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Function ID for CPU_ON operation
+
+  migrate:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Function ID for MIGRATE operation
+
+  arm,psci-suspend-param:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      power_state parameter to pass to the PSCI suspend call.
+
+      Device tree nodes that require usage of PSCI CPU_SUSPEND function (ie
+      idle state nodes with entry-method property is set to "psci", as per
+      bindings in [1]) must specify this property.
+
+      [1] Kernel documentation - ARM idle states bindings
+        Documentation/devicetree/bindings/arm/idle-states.txt
+
+
+required:
+  - compatible
+  - method
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: arm,psci
+    then:
+      required:
+        - cpu_off
+        - cpu_on
+
+examples:
+  - |+
+
+    // Case 1: PSCI v0.1 only.
+
+    psci {
+      compatible      = "arm,psci";
+      method          = "smc";
+      cpu_suspend     = <0x95c10000>;
+      cpu_off         = <0x95c10001>;
+      cpu_on          = <0x95c10002>;
+      migrate         = <0x95c10003>;
+    };
+
+  - |+
+
+    // Case 2: PSCI v0.2 only
+
+    psci {
+      compatible      = "arm,psci-0.2";
+      method          = "smc";
+    };
+
+
+  - |+
+
+    // Case 3: PSCI v0.2 and PSCI v0.1.
+
+    /*
+     * A DTB may provide IDs for use by kernels without PSCI 0.2 support,
+     * enabling firmware and hypervisors to support existing and new kernels.
+     * These IDs will be ignored by kernels with PSCI 0.2 support, which will
+     * use the standard PSCI 0.2 IDs exclusively.
+     */
+
+    psci {
+      compatible = "arm,psci-0.2", "arm,psci";
+      method = "hvc";
+
+      cpu_on = <0x95c10002>;
+      cpu_off = <0x95c10001>;
+    };
+...
diff --git a/Documentation/translations/zh_CN/arm64/booting.txt b/Documentation/translations/zh_CN/arm64/booting.txt
index c1dd968c5ee9..c84f2accb173 100644
--- a/Documentation/translations/zh_CN/arm64/booting.txt
+++ b/Documentation/translations/zh_CN/arm64/booting.txt
@@ -236,7 +236,7 @@ AArch64 内核当前没有提供自解压代码，因此如果使用了压缩内
   *译者注: ARM DEN 0022A 已更新到 ARM DEN 0022C。
 
   设备树必须包含一个 ‘psci’ 节点，请参考以下文档：
-  Documentation/devicetree/bindings/arm/psci.txt
+  Documentation/devicetree/bindings/arm/psci.yaml
 
 
 - 辅助 CPU 通用寄存器设置
-- 
cgit 


From 89ebf2b8501c41b16b093fbf8b617fb5630445d9 Mon Sep 17 00:00:00 2001
From: Vignesh Raghavendra <vigneshr@ti.com>
Date: Tue, 25 Jun 2019 13:27:43 +0530
Subject: dt-bindings: mtd: Add binding documentation for HyperFlash

Add DT binding documentation for HyperFlash devices.

Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 .../devicetree/bindings/mtd/cypress,hyperflash.txt          | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mtd/cypress,hyperflash.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mtd/cypress,hyperflash.txt b/Documentation/devicetree/bindings/mtd/cypress,hyperflash.txt
new file mode 100644
index 000000000000..ad42f4db32f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/cypress,hyperflash.txt
@@ -0,0 +1,13 @@
+Bindings for HyperFlash NOR flash chips compliant with Cypress HyperBus
+specification and supports Cypress CFI specification 1.5 command set.
+
+Required properties:
+- compatible : "cypress,hyperflash", "cfi-flash" for HyperFlash NOR chips
+- reg : Address of flash's memory map
+
+Example:
+
+	flash@0 {
+		compatible = "cypress,hyperflash", "cfi-flash";
+		reg = <0x0 0x4000000>;
+	};
-- 
cgit 


From d7865933af9ed38f39cf085e1def44b637109999 Mon Sep 17 00:00:00 2001
From: Vignesh Raghavendra <vigneshr@ti.com>
Date: Tue, 25 Jun 2019 13:27:45 +0530
Subject: dt-bindings: mtd: Add bindings for TI's AM654 HyperBus memory
 controller

Add binding documentation for TI's HyperBus memory controller present on
AM654 SoC.

Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 .../devicetree/bindings/mtd/ti,am654-hbmc.txt      | 51 ++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mtd/ti,am654-hbmc.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mtd/ti,am654-hbmc.txt b/Documentation/devicetree/bindings/mtd/ti,am654-hbmc.txt
new file mode 100644
index 000000000000..faa81c2e5da6
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/ti,am654-hbmc.txt
@@ -0,0 +1,51 @@
+Bindings for HyperBus Memory Controller (HBMC) on TI's K3 family of SoCs
+
+Required properties:
+- compatible : "ti,am654-hbmc" for AM654 SoC
+- reg : Two entries:
+	First entry pointed to the register space of HBMC controller
+	Second entry pointing to the memory map region dedicated for
+	MMIO access to attached flash devices
+- ranges : Address translation from offset within CS to allocated MMIO
+	   space in SoC
+
+Optional properties:
+- mux-controls : phandle to the multiplexer that controls selection of
+		 HBMC vs OSPI inside Flash SubSystem (FSS). Default is OSPI,
+		 if property is absent.
+		 See Documentation/devicetree/bindings/mux/reg-mux.txt
+		 for mmio-mux binding details
+
+Example:
+
+	system-controller@47000000 {
+		compatible = "syscon", "simple-mfd";
+		reg = <0x0 0x47000000 0x0 0x100>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		hbmc_mux: multiplexer {
+			compatible = "mmio-mux";
+			#mux-control-cells = <1>;
+			mux-reg-masks = <0x4 0x2>; /* 0: reg 0x4, bit 1 */
+		};
+	};
+
+	hbmc: hyperbus@47034000 {
+		compatible = "ti,am654-hbmc";
+		reg = <0x0 0x47034000 0x0 0x100>,
+			<0x5 0x00000000 0x1 0x0000000>;
+		power-domains = <&k3_pds 55>;
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0x0 0x0 0x5 0x00000000 0x4000000>, /* CS0 - 64MB */
+			 <0x1 0x0 0x5 0x04000000 0x4000000>; /* CS1 - 64MB */
+		mux-controls = <&hbmc_mux 0>;
+
+		/* Slave flash node */
+		flash@0,0 {
+			compatible = "cypress,hyperflash", "cfi-flash";
+			reg = <0x0 0x0 0x4000000>;
+		};
+	};
-- 
cgit 


From 5a5d7a4dd4373d2bea000eaf290b3d73710174f1 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <Igor.Russkikh@aquantia.com>
Date: Wed, 26 Jun 2019 12:35:35 +0000
Subject: net: aquantia: add documentation for the atlantic driver

Document contains configuration options description,
details and examples of driver various settings.

Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../device_drivers/aquantia/atlantic.txt           | 439 +++++++++++++++++++++
 1 file changed, 439 insertions(+)
 create mode 100644 Documentation/networking/device_drivers/aquantia/atlantic.txt

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/aquantia/atlantic.txt b/Documentation/networking/device_drivers/aquantia/atlantic.txt
new file mode 100644
index 000000000000..d235cbaeccc6
--- /dev/null
+++ b/Documentation/networking/device_drivers/aquantia/atlantic.txt
@@ -0,0 +1,439 @@
+aQuantia AQtion Driver for the aQuantia Multi-Gigabit PCI Express Family of
+Ethernet Adapters
+=============================================================================
+
+Contents
+========
+
+- Identifying Your Adapter
+- Configuration
+- Supported ethtool options
+- Command Line Parameters
+- Config file parameters
+- Support
+- License
+
+Identifying Your Adapter
+========================
+
+The driver in this release is compatible with AQC-100, AQC-107, AQC-108 based ethernet adapters.
+
+
+SFP+ Devices (for AQC-100 based adapters)
+----------------------------------
+
+This release tested with passive Direct Attach Cables (DAC) and SFP+/LC Optical Transceiver.
+
+Configuration
+=========================
+  Viewing Link Messages
+  ---------------------
+  Link messages will not be displayed to the console if the distribution is
+  restricting system messages. In order to see network driver link messages on
+  your console, set dmesg to eight by entering the following:
+
+       dmesg -n 8
+
+  NOTE: This setting is not saved across reboots.
+
+  Jumbo Frames
+  ------------
+  The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
+  enabled by changing the MTU to a value larger than the default of 1500.
+  The maximum value for the MTU is 16000.  Use the `ip` command to
+  increase the MTU size.  For example:
+
+        ip link set mtu 16000 dev enp1s0
+
+  ethtool
+  -------
+  The driver utilizes the ethtool interface for driver configuration and
+  diagnostics, as well as displaying statistical information. The latest
+  ethtool version is required for this functionality.
+
+  NAPI
+  ----
+  NAPI (Rx polling mode) is supported in the atlantic driver.
+
+Supported ethtool options
+============================
+ Viewing adapter settings
+ ---------------------
+ ethtool <ethX>
+
+ Output example:
+
+  Settings for enp1s0:
+    Supported ports: [ TP ]
+    Supported link modes:   100baseT/Full
+                            1000baseT/Full
+                            10000baseT/Full
+                            2500baseT/Full
+                            5000baseT/Full
+    Supported pause frame use: Symmetric
+    Supports auto-negotiation: Yes
+    Supported FEC modes: Not reported
+    Advertised link modes:  100baseT/Full
+                            1000baseT/Full
+                            10000baseT/Full
+                            2500baseT/Full
+                            5000baseT/Full
+    Advertised pause frame use: Symmetric
+    Advertised auto-negotiation: Yes
+    Advertised FEC modes: Not reported
+    Speed: 10000Mb/s
+    Duplex: Full
+    Port: Twisted Pair
+    PHYAD: 0
+    Transceiver: internal
+    Auto-negotiation: on
+    MDI-X: Unknown
+    Supports Wake-on: g
+    Wake-on: d
+    Link detected: yes
+
+ ---
+ Note: AQrate speeds (2.5/5 Gb/s) will be displayed only with linux kernels > 4.10.
+    But you can still use these speeds:
+	ethtool -s eth0 autoneg off speed 2500
+
+ Viewing adapter information
+ ---------------------
+ ethtool -i <ethX>
+
+ Output example:
+
+  driver: atlantic
+  version: 5.2.0-050200rc5-generic-kern
+  firmware-version: 3.1.78
+  expansion-rom-version:
+  bus-info: 0000:01:00.0
+  supports-statistics: yes
+  supports-test: no
+  supports-eeprom-access: no
+  supports-register-dump: yes
+  supports-priv-flags: no
+
+
+ Viewing Ethernet adapter statistics:
+ ---------------------
+ ethtool -S <ethX>
+
+ Output example:
+ NIC statistics:
+     InPackets: 13238607
+     InUCast: 13293852
+     InMCast: 52
+     InBCast: 3
+     InErrors: 0
+     OutPackets: 23703019
+     OutUCast: 23704941
+     OutMCast: 67
+     OutBCast: 11
+     InUCastOctects: 213182760
+     OutUCastOctects: 22698443
+     InMCastOctects: 6600
+     OutMCastOctects: 8776
+     InBCastOctects: 192
+     OutBCastOctects: 704
+     InOctects: 2131839552
+     OutOctects: 226938073
+     InPacketsDma: 95532300
+     OutPacketsDma: 59503397
+     InOctetsDma: 1137102462
+     OutOctetsDma: 2394339518
+     InDroppedDma: 0
+     Queue[0] InPackets: 23567131
+     Queue[0] OutPackets: 20070028
+     Queue[0] InJumboPackets: 0
+     Queue[0] InLroPackets: 0
+     Queue[0] InErrors: 0
+     Queue[1] InPackets: 45428967
+     Queue[1] OutPackets: 11306178
+     Queue[1] InJumboPackets: 0
+     Queue[1] InLroPackets: 0
+     Queue[1] InErrors: 0
+     Queue[2] InPackets: 3187011
+     Queue[2] OutPackets: 13080381
+     Queue[2] InJumboPackets: 0
+     Queue[2] InLroPackets: 0
+     Queue[2] InErrors: 0
+     Queue[3] InPackets: 23349136
+     Queue[3] OutPackets: 15046810
+     Queue[3] InJumboPackets: 0
+     Queue[3] InLroPackets: 0
+     Queue[3] InErrors: 0
+
+ Interrupt coalescing support
+ ---------------------------------
+ ITR mode, TX/RX coalescing timings could be viewed with:
+
+ ethtool -c <ethX>
+
+ and changed with:
+
+ ethtool -C <ethX> tx-usecs <usecs> rx-usecs <usecs>
+
+ To disable coalescing:
+
+ ethtool -C <ethX> tx-usecs 0 rx-usecs 0 tx-max-frames 1 tx-max-frames 1
+
+ Wake on LAN support
+ ---------------------------------
+
+ WOL support by magic packet:
+
+ ethtool -s <ethX> wol g
+
+ To disable WOL:
+
+ ethtool -s <ethX> wol d
+
+ Set and check the driver message level
+ ---------------------------------
+
+ Set message level
+
+ ethtool -s <ethX> msglvl <level>
+
+ Level values:
+
+ 0x0001 - general driver status.
+ 0x0002 - hardware probing.
+ 0x0004 - link state.
+ 0x0008 - periodic status check.
+ 0x0010 - interface being brought down.
+ 0x0020 - interface being brought up.
+ 0x0040 - receive error.
+ 0x0080 - transmit error.
+ 0x0200 - interrupt handling.
+ 0x0400 - transmit completion.
+ 0x0800 - receive completion.
+ 0x1000 - packet contents.
+ 0x2000 - hardware status.
+ 0x4000 - Wake-on-LAN status.
+
+ By default, the level of debugging messages is set 0x0001(general driver status).
+
+ Check message level
+
+ ethtool <ethX> | grep "Current message level"
+
+ If you want to disable the output of messages
+
+ ethtool -s <ethX> msglvl 0
+
+ RX flow rules (ntuple filters)
+ ---------------------------------
+ There are separate rules supported, that applies in that order:
+ 1. 16 VLAN ID rules
+ 2. 16 L2 EtherType rules
+ 3. 8 L3/L4 5-Tuple rules
+
+
+ The driver utilizes the ethtool interface for configuring ntuple filters,
+ via "ethtool -N <device> <filter>".
+
+ To enable or disable the RX flow rules:
+
+ ethtool -K ethX ntuple <on|off>
+
+ When disabling ntuple filters, all the user programed filters are
+ flushed from the driver cache and hardware. All needed filters must
+ be re-added when ntuple is re-enabled.
+
+ Because of the fixed order of the rules, the location of filters is also fixed:
+ - Locations 0 - 15 for VLAN ID filters
+ - Locations 16 - 31 for L2 EtherType filters
+ - Locations 32 - 39 for L3/L4 5-tuple filters (locations 32, 36 for IPv6)
+
+ The L3/L4 5-tuple (protocol, source and destination IP address, source and
+ destination TCP/UDP/SCTP port) is compared against 8 filters. For IPv4, up to
+ 8 source and destination addresses can be matched. For IPv6, up to 2 pairs of
+ addresses can be supported. Source and destination ports are only compared for
+ TCP/UDP/SCTP packets.
+
+ To add a filter that directs packet to queue 5, use <-N|-U|--config-nfc|--config-ntuple> switch:
+
+ ethtool -N <ethX> flow-type udp4 src-ip 10.0.0.1 dst-ip 10.0.0.2 src-port 2000 dst-port 2001 action 5 <loc 32>
+
+ - action is the queue number.
+ - loc is the rule number.
+
+ For "flow-type ip4|udp4|tcp4|sctp4|ip6|udp6|tcp6|sctp6" you must set the loc
+ number within 32 - 39.
+ For "flow-type ip4|udp4|tcp4|sctp4|ip6|udp6|tcp6|sctp6" you can set 8 rules
+ for traffic IPv4 or you can set 2 rules for traffic IPv6. Loc number traffic
+ IPv6 is 32 and 36.
+ At the moment you can not use IPv4 and IPv6 filters at the same time.
+
+ Example filter for IPv6 filter traffic:
+
+ sudo ethtool -N <ethX> flow-type tcp6 src-ip 2001:db8:0:f101::1 dst-ip 2001:db8:0:f101::2 action 1 loc 32
+ sudo ethtool -N <ethX> flow-type ip6 src-ip 2001:db8:0:f101::2 dst-ip 2001:db8:0:f101::5 action -1 loc 36
+
+ Example filter for IPv4 filter traffic:
+
+ sudo ethtool -N <ethX> flow-type udp4 src-ip 10.0.0.4 dst-ip 10.0.0.7 src-port 2000 dst-port 2001 loc 32
+ sudo ethtool -N <ethX> flow-type tcp4 src-ip 10.0.0.3 dst-ip 10.0.0.9 src-port 2000 dst-port 2001 loc 33
+ sudo ethtool -N <ethX> flow-type ip4 src-ip 10.0.0.6 dst-ip 10.0.0.4 loc 34
+
+ If you set action -1, then all traffic corresponding to the filter will be discarded.
+ The maximum value action is 31.
+
+
+ The VLAN filter (VLAN id) is compared against 16 filters.
+ VLAN id must be accompanied by mask 0xF000. That is to distinguish VLAN filter
+ from L2 Ethertype filter with UserPriority since both User Priority and VLAN ID
+ are passed in the same 'vlan' parameter.
+
+ To add a filter that directs packets from VLAN 2001 to queue 5:
+ ethtool -N <ethX> flow-type ip4 vlan 2001 m 0xF000 action 1 loc 0
+
+
+ L2 EtherType filters allows filter packet by EtherType field or both EtherType
+ and User Priority (PCP) field of 802.1Q.
+ UserPriority (vlan) parameter must be accompanied by mask 0x1FFF. That is to
+ distinguish VLAN filter from L2 Ethertype filter with UserPriority since both
+ User Priority and VLAN ID are passed in the same 'vlan' parameter.
+
+ To add a filter that directs IP4 packess of priority 3 to queue 3:
+ ethtool -N <ethX> flow-type ether proto 0x800 vlan 0x600 m 0x1FFF action 3 loc 16
+
+
+ To see the list of filters currently present:
+
+ ethtool <-u|-n|--show-nfc|--show-ntuple> <ethX>
+
+ Rules may be deleted from the table itself. This is done using:
+
+ sudo ethtool <-N|-U|--config-nfc|--config-ntuple> <ethX> delete <loc>
+
+ - loc is the rule number to be deleted.
+
+ Rx filters is an interface to load the filter table that funnels all flow
+ into queue 0 unless an alternative queue is specified using "action". In that
+ case, any flow that matches the filter criteria will be directed to the
+ appropriate queue. RX filters is supported on all kernels 2.6.30 and later.
+
+ RSS for UDP
+ ---------------------------------
+ Currently, NIC does not support RSS for fragmented IP packets, which leads to
+ incorrect working of RSS for fragmented UDP traffic. To disable RSS for UDP the
+ RX Flow L3/L4 rule may be used.
+
+ Example:
+ ethtool -N eth0 flow-type udp4 action 0 loc 32
+
+Command Line Parameters
+=======================
+The following command line parameters are available on atlantic driver:
+
+aq_itr -Interrupt throttling mode
+----------------------------------------
+Accepted values: 0, 1, 0xFFFF
+Default value: 0xFFFF
+0      - Disable interrupt throttling.
+1      - Enable interrupt throttling and use specified tx and rx rates.
+0xFFFF - Auto throttling mode. Driver will choose the best RX and TX
+         interrupt throtting settings based on link speed.
+
+aq_itr_tx - TX interrupt throttle rate
+----------------------------------------
+Accepted values: 0 - 0x1FF
+Default value: 0
+TX side throttling in microseconds. Adapter will setup maximum interrupt delay
+to this value. Minimum interrupt delay will be a half of this value
+
+aq_itr_rx - RX interrupt throttle rate
+----------------------------------------
+Accepted values: 0 - 0x1FF
+Default value: 0
+RX side throttling in microseconds. Adapter will setup maximum interrupt delay
+to this value. Minimum interrupt delay will be a half of this value
+
+Note: ITR settings could be changed in runtime by ethtool -c means (see below)
+
+Config file parameters
+=======================
+For some fine tuning and performance optimizations,
+some parameters can be changed in the {source_dir}/aq_cfg.h file.
+
+AQ_CFG_RX_PAGEORDER
+----------------------------------------
+Default value: 0
+RX page order override. Thats a power of 2 number of RX pages allocated for
+each descriptor. Received descriptor size is still limited by AQ_CFG_RX_FRAME_MAX.
+Increasing pageorder makes page reuse better (actual on iommu enabled systems).
+
+AQ_CFG_RX_REFILL_THRES
+----------------------------------------
+Default value: 32
+RX refill threshold. RX path will not refill freed descriptors until the
+specified number of free descriptors is observed. Larger values may help
+better page reuse but may lead to packet drops as well.
+
+AQ_CFG_VECS_DEF
+------------------------------------------------------------
+Number of queues
+Valid Range: 0 - 8 (up to AQ_CFG_VECS_MAX)
+Default value: 8
+Notice this value will be capped by the number of cores available on the system.
+
+AQ_CFG_IS_RSS_DEF
+------------------------------------------------------------
+Enable/disable Receive Side Scaling
+
+This feature allows the adapter to distribute receive processing
+across multiple CPU-cores and to prevent from overloading a single CPU core.
+
+Valid values
+0 - disabled
+1 - enabled
+
+Default value: 1
+
+AQ_CFG_NUM_RSS_QUEUES_DEF
+------------------------------------------------------------
+Number of queues for Receive Side Scaling
+Valid Range: 0 - 8 (up to AQ_CFG_VECS_DEF)
+
+Default value: AQ_CFG_VECS_DEF
+
+AQ_CFG_IS_LRO_DEF
+------------------------------------------------------------
+Enable/disable Large Receive Offload
+
+This offload enables the adapter to coalesce multiple TCP segments and indicate
+them as a single coalesced unit to the OS networking subsystem.
+The system consumes less energy but it also introduces more latency in packets processing.
+
+Valid values
+0 - disabled
+1 - enabled
+
+Default value: 1
+
+AQ_CFG_TX_CLEAN_BUDGET
+----------------------------------------
+Maximum descriptors to cleanup on TX at once.
+Default value: 256
+
+After the aq_cfg.h file changed the driver must be rebuilt to take effect.
+
+Support
+=======
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to support@aquantia.com
+
+License
+=======
+
+aQuantia Corporation Network Driver
+Copyright(c) 2014 - 2019 aQuantia Corporation.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms and conditions of the GNU General Public License,
+version 2, as published by the Free Software Foundation.
-- 
cgit 


From edd3e620ab15e66b0d5477dc3bd4732a2b3baddb Mon Sep 17 00:00:00 2001
From: Kamal Dasu <kdasu.kdev@gmail.com>
Date: Tue, 21 May 2019 10:44:21 -0400
Subject: dt-bindings: mtd: brcmnand: Make nand-ecc-strength and
 nand-ecc-step-size optional

nand-ecc-strength and nand-ecc-step-size can be made optional as
brcmnand driver can support using raw NAND layer detected values.

Signed-off-by: Kamal Dasu <kdasu.kdev@gmail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt b/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt
index 0b7c3738b66c..0d952afa635d 100644
--- a/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt
+++ b/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt
@@ -101,10 +101,10 @@ Required properties:
                               number (e.g., 0, 1, 2, etc.)
 - #address-cells            : see partition.txt
 - #size-cells               : see partition.txt
-- nand-ecc-strength         : see nand-controller.yaml
-- nand-ecc-step-size        : must be 512 or 1024. See nand-controller.yaml
 
 Optional properties:
+- nand-ecc-strength         : see nand-controller.yaml
+- nand-ecc-step-size        : must be 512 or 1024. See nand-controller.yaml
 - nand-on-flash-bbt         : boolean, to enable the on-flash BBT for this
                               chip-select. See nand-controller.yaml
 - brcm,nand-oob-sector-size : integer, to denote the spare area sector size
-- 
cgit 


From 3054354ffe096f748410107e6e2c00165e7a17e5 Mon Sep 17 00:00:00 2001
From: Kamal Dasu <kdasu.kdev@gmail.com>
Date: Tue, 4 Jun 2019 10:36:31 -0400
Subject: dt-bindings: mtd: brcmnand: Add brcmnand, brcmnand-v7.3 support

Added brcm,brcmnand-v7.3 as possible compatible string to support
brcmnand controller v7.3.

Signed-off-by: Kamal Dasu <kdasu.kdev@gmail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt b/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt
index 0d952afa635d..82156dc8f304 100644
--- a/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt
+++ b/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt
@@ -28,6 +28,7 @@ Required properties:
                          brcm,brcmnand-v7.0
                          brcm,brcmnand-v7.1
                          brcm,brcmnand-v7.2
+                         brcm,brcmnand-v7.3
                          brcm,brcmnand
 - reg              : the register start and length for NAND register region.
                      (optional) Flash DMA register range (if present)
-- 
cgit 


From e7dd8a894830ca2d7b96d35dd5405993b0cfe32c Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@gmail.com>
Date: Tue, 25 Jun 2019 10:41:50 +0200
Subject: dt-bindings: net: dsa: qca8k: document reset-gpios property

This patch documents the qca8k's reset-gpios property that
can be used if the QCA8337N ends up in a bad state during
reset.

Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/qca8k.txt | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
index 93a7469e70d4..ccbc6d89325d 100644
--- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt
+++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
@@ -9,6 +9,10 @@ Required properties:
 - #size-cells: must be 0
 - #address-cells: must be 1
 
+Optional properties:
+
+- reset-gpios: GPIO to be used to reset the whole device
+
 Subnodes:
 
 The integrated switch subnode should be specified according to the binding
@@ -66,6 +70,7 @@ for the external mdio-bus configuration:
 			#address-cells = <1>;
 			#size-cells = <0>;
 
+			reset-gpios = <&gpio 42 GPIO_ACTIVE_LOW>;
 			reg = <0x10>;
 
 			ports {
@@ -123,6 +128,7 @@ for the internal master mdio-bus configuration:
 			#address-cells = <1>;
 			#size-cells = <0>;
 
+			reset-gpios = <&gpio 42 GPIO_ACTIVE_LOW>;
 			reg = <0x10>;
 
 			ports {
-- 
cgit 


From dc1d9dac5ca2f3c3cf024f1e17857033ba1e080e Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jonas.gorski@gmail.com>
Date: Thu, 2 May 2019 14:26:55 +0200
Subject: devicetree: document the BCM63XX gated clock bindings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add binding documentation for the gated clock controller found on MIPS
based BCM63XX SoCs.

Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 .../bindings/clock/brcm,bcm63xx-clocks.txt         | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/brcm,bcm63xx-clocks.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm63xx-clocks.txt b/Documentation/devicetree/bindings/clock/brcm,bcm63xx-clocks.txt
new file mode 100644
index 000000000000..3041657e2f96
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/brcm,bcm63xx-clocks.txt
@@ -0,0 +1,22 @@
+Gated Clock Controller Bindings for MIPS based BCM63XX SoCs
+
+Required properties:
+- compatible: must be one of:
+	 "brcm,bcm3368-clocks"
+	 "brcm,bcm6328-clocks"
+	 "brcm,bcm6358-clocks"
+	 "brcm,bcm6362-clocks"
+	 "brcm,bcm6368-clocks"
+	 "brcm,bcm63268-clocks"
+
+- reg: Address and length of the register set
+- #clock-cells: must be <1>
+
+
+Example:
+
+clkctl: clock-controller@10000004 {
+	compatible = "brcm,bcm6328-clocks";
+	reg = <0x10000004 0x4>;
+	#clock-cells = <1>;
+};
-- 
cgit 


From d743ea67cb98f9cd727e7ff07a0da647b9544444 Mon Sep 17 00:00:00 2001
From: Mike Looijmans <mike.looijmans@topic.nl>
Date: Fri, 17 May 2019 15:20:20 +0200
Subject: dt-bindings: clock: Add silabs,si5341

Adds the devicetree bindings for the Si5341 and Si5340 chips from
Silicon Labs. These are multiple-input multiple-output clock
synthesizers.

Signed-off-by: Mike Looijmans <mike.looijmans@topic.nl>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 .../devicetree/bindings/clock/silabs,si5341.txt    | 162 +++++++++++++++++++++
 1 file changed, 162 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/silabs,si5341.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/clock/silabs,si5341.txt b/Documentation/devicetree/bindings/clock/silabs,si5341.txt
new file mode 100644
index 000000000000..a70c333e4cd4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/silabs,si5341.txt
@@ -0,0 +1,162 @@
+Binding for Silicon Labs Si5341 and Si5340 programmable i2c clock generator.
+
+Reference
+[1] Si5341 Data Sheet
+    https://www.silabs.com/documents/public/data-sheets/Si5341-40-D-DataSheet.pdf
+[2] Si5341 Reference Manual
+    https://www.silabs.com/documents/public/reference-manuals/Si5341-40-D-RM.pdf
+
+The Si5341 and Si5340 are programmable i2c clock generators with up to 10 output
+clocks. The chip contains a PLL that sources 5 (or 4) multisynth clocks, which
+in turn can be directed to any of the 10 (or 4) outputs through a divider.
+The internal structure of the clock generators can be found in [2].
+
+The driver can be used in "as is" mode, reading the current settings from the
+chip at boot, in case you have a (pre-)programmed device. If the PLL is not
+configured when the driver probes, it assumes the driver must fully initialize
+it.
+
+The device type, speed grade and revision are determined runtime by probing.
+
+The driver currently only supports XTAL input mode, and does not support any
+fancy input configurations. They can still be programmed into the chip and
+the driver will leave them "as is".
+
+==I2C device node==
+
+Required properties:
+- compatible: shall be one of the following:
+	"silabs,si5340" - Si5340 A/B/C/D
+	"silabs,si5341" - Si5341 A/B/C/D
+- reg: i2c device address, usually 0x74
+- #clock-cells: from common clock binding; shall be set to 2.
+	The first value is "0" for outputs, "1" for synthesizers.
+	The second value is the output or synthesizer index.
+- clocks: from common clock binding; list of parent clock  handles,
+	corresponding to inputs. Use a fixed clock for the "xtal" input.
+	At least one must be present.
+- clock-names: One of: "xtal", "in0", "in1", "in2"
+- vdd-supply: Regulator node for VDD
+
+Optional properties:
+- vdda-supply: Regulator node for VDDA
+- vdds-supply: Regulator node for VDDS
+- silabs,pll-m-num, silabs,pll-m-den: Numerator and denominator for PLL
+  feedback divider. Must be such that the PLL output is in the valid range. For
+  example, to create 14GHz from a 48MHz xtal, use m-num=14000 and m-den=48. Only
+  the fraction matters, using 3500 and 12 will deliver the exact same result.
+  If these are not specified, and the PLL is not yet programmed when the driver
+  probes, the PLL will be set to 14GHz.
+- silabs,reprogram: When present, the driver will always assume the device must
+  be initialized, and always performs the soft-reset routine. Since this will
+  temporarily stop all output clocks, don't do this if the chip is generating
+  the CPU clock for example.
+- interrupts: Interrupt for INTRb pin.
+- #address-cells: shall be set to 1.
+- #size-cells: shall be set to 0.
+
+
+== Child nodes: Outputs ==
+
+The child nodes list the output clocks.
+
+Each of the clock outputs can be overwritten individually by using a child node.
+If a child node for a clock output is not set, the configuration remains
+unchanged.
+
+Required child node properties:
+- reg: number of clock output.
+
+Optional child node properties:
+- vdd-supply: Regulator node for VDD for this output. The driver selects default
+	values for common-mode and amplitude based on the voltage.
+- silabs,format: Output format, one of:
+	1 = differential (defaults to LVDS levels)
+	2 = low-power (defaults to HCSL levels)
+	4 = LVCMOS
+- silabs,common-mode: Manually override output common mode, see [2] for values
+- silabs,amplitude: Manually override output amplitude, see [2] for values
+- silabs,synth-master: boolean. If present, this output is allowed to change the
+	multisynth frequency dynamically.
+- silabs,silabs,disable-high: boolean. If set, the clock output is driven HIGH
+	when disabled, otherwise it's driven LOW.
+
+==Example==
+
+/* 48MHz reference crystal */
+ref48: ref48M {
+	compatible = "fixed-clock";
+	#clock-cells = <0>;
+	clock-frequency = <48000000>;
+};
+
+i2c-master-node {
+	/* Programmable clock (for logic) */
+	si5341: clock-generator@74 {
+		reg = <0x74>;
+		compatible = "silabs,si5341";
+		#clock-cells = <2>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		clocks = <&ref48>;
+		clock-names = "xtal";
+
+		silabs,pll-m-num = <14000>; /* PLL at 14.0 GHz */
+		silabs,pll-m-den = <48>;
+		silabs,reprogram; /* Chips are not programmed, always reset */
+
+		out@0 {
+			reg = <0>;
+			silabs,format = <1>; /* LVDS 3v3 */
+			silabs,common-mode = <3>;
+			silabs,amplitude = <3>;
+			silabs,synth-master;
+		};
+
+		/*
+		 * Output 6 configuration:
+		 *  LVDS 1v8
+		 */
+		out@6 {
+			reg = <6>;
+			silabs,format = <1>; /* LVDS 1v8 */
+			silabs,common-mode = <13>;
+			silabs,amplitude = <3>;
+		};
+
+		/*
+		 * Output 8 configuration:
+		 *  HCSL 3v3
+		 */
+		out@8 {
+			reg = <8>;
+			silabs,format = <2>;
+			silabs,common-mode = <11>;
+			silabs,amplitude = <3>;
+		};
+	};
+};
+
+some-video-node {
+	/* Standard clock bindings */
+	clock-names = "pixel";
+	clocks = <&si5341 0 7>; /* Output 7 */
+
+	/* Set output 7 to use syntesizer 3 as its parent */
+	assigned-clocks = <&si5341 0 7>, <&si5341 1 3>;
+	assigned-clock-parents = <&si5341 1 3>;
+	/* Set output 7 to 148.5 MHz using a synth frequency of 594 MHz */
+	assigned-clock-rates = <148500000>, <594000000>;
+};
+
+some-audio-node {
+	clock-names = "i2s-clk";
+	clocks = <&si5341 0 0>;
+	/*
+	 * since output 0 is a synth-master, the synth will be automatically set
+	 * to an appropriate frequency when the audio driver requests another
+	 * frequency. We give control over synth 2 to this output here.
+	 */
+	assigned-clocks = <&si5341 0 0>;
+	assigned-clock-parents = <&si5341 1 2>;
+};
-- 
cgit 


From f9d3fb22ab27aaee8748480fe3fa8cc17875ee30 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Thu, 27 Jun 2019 09:55:17 +0100
Subject: clk: lochnagar: Update DT binding doc to include the primary SPDIF
 MCLK

This clock was missed when the binding was initially merged but is
supported by the driver, so add it to the binding document.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 Documentation/devicetree/bindings/clock/cirrus,lochnagar.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/clock/cirrus,lochnagar.txt b/Documentation/devicetree/bindings/clock/cirrus,lochnagar.txt
index b8d8ef3bdc5f..52a064c789ee 100644
--- a/Documentation/devicetree/bindings/clock/cirrus,lochnagar.txt
+++ b/Documentation/devicetree/bindings/clock/cirrus,lochnagar.txt
@@ -40,6 +40,7 @@ Optional properties:
        input audio clocks from host system.
      - ln-psia1-mclk, ln-psia2-mclk : Optional input audio clocks from
        external connector.
+     - ln-spdif-mclk : Optional input audio clock from SPDIF.
      - ln-spdif-clkout : Optional input audio clock from SPDIF.
      - ln-adat-mclk : Optional input audio clock from ADAT.
      - ln-pmic-32k : On board fixed clock.
-- 
cgit 


From a58946c158a040068e7c94dc1d58bbd273258068 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 26 Jun 2019 21:02:33 +0100
Subject: keys: Pass the network namespace into request_key mechanism

Create a request_key_net() function and use it to pass the network
namespace domain tag into DNS revolver keys and rxrpc/AFS keys so that keys
for different domains can coexist in the same keyring.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: netdev@vger.kernel.org
cc: linux-nfs@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: linux-afs@lists.infradead.org
---
 Documentation/security/keys/core.rst        | 28 ++++++++++++++++++++++------
 Documentation/security/keys/request-key.rst | 29 +++++++++++++++++++++--------
 2 files changed, 43 insertions(+), 14 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index ae930ae9d590..0e74f372e58c 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -1102,26 +1102,42 @@ payload contents" for more information.
     See also Documentation/security/keys/request-key.rst.
 
 
+ *  To search for a key in a specific domain, call:
+
+	struct key *request_key_tag(const struct key_type *type,
+				    const char *description,
+				    struct key_tag *domain_tag,
+				    const char *callout_info);
+
+    This is identical to request_key(), except that a domain tag may be
+    specifies that causes search algorithm to only match keys matching that
+    tag.  The domain_tag may be NULL, specifying a global domain that is
+    separate from any nominated domain.
+
+
  *  To search for a key, passing auxiliary data to the upcaller, call::
 
 	struct key *request_key_with_auxdata(const struct key_type *type,
 					     const char *description,
+					     struct key_tag *domain_tag,
 					     const void *callout_info,
 					     size_t callout_len,
 					     void *aux);
 
-    This is identical to request_key(), except that the auxiliary data is
-    passed to the key_type->request_key() op if it exists, and the callout_info
-    is a blob of length callout_len, if given (the length may be 0).
+    This is identical to request_key_tag(), except that the auxiliary data is
+    passed to the key_type->request_key() op if it exists, and the
+    callout_info is a blob of length callout_len, if given (the length may be
+    0).
 
 
  *  To search for a key under RCU conditions, call::
 
 	struct key *request_key_rcu(const struct key_type *type,
-				    const char *description);
+				    const char *description,
+				    struct key_tag *domain_tag);
 
-    which is similar to request_key() except that it does not check for keys
-    that are under construction and it will not call out to userspace to
+    which is similar to request_key_tag() except that it does not check for
+    keys that are under construction and it will not call out to userspace to
     construct a key if it can't find a match.
 
 
diff --git a/Documentation/security/keys/request-key.rst b/Documentation/security/keys/request-key.rst
index 5a210baa583a..35f2296b704a 100644
--- a/Documentation/security/keys/request-key.rst
+++ b/Documentation/security/keys/request-key.rst
@@ -13,10 +13,18 @@ The process starts by either the kernel requesting a service by calling
 				const char *description,
 				const char *callout_info);
 
+or::
+
+	struct key *request_key_tag(const struct key_type *type,
+				    const char *description,
+				    const struct key_tag *domain_tag,
+				    const char *callout_info);
+
 or::
 
 	struct key *request_key_with_auxdata(const struct key_type *type,
 					     const char *description,
+					     const struct key_tag *domain_tag,
 					     const char *callout_info,
 					     size_t callout_len,
 					     void *aux);
@@ -24,7 +32,8 @@ or::
 or::
 
 	struct key *request_key_rcu(const struct key_type *type,
-				    const char *description);
+				    const char *description,
+				    const struct key_tag *domain_tag);
 
 Or by userspace invoking the request_key system call::
 
@@ -38,14 +47,18 @@ does not need to link the key to a keyring to prevent it from being immediately
 destroyed.  The kernel interface returns a pointer directly to the key, and
 it's up to the caller to destroy the key.
 
-The request_key_with_auxdata() calls is like the in-kernel request_key() call,
-except that they permit auxiliary data to be passed to the upcaller (the
-default is NULL).  This is only useful for those key types that define their
-own upcall mechanism rather than using /sbin/request-key.
+The request_key_tag() call is like the in-kernel request_key(), except that it
+also takes a domain tag that allows keys to be separated by namespace and
+killed off as a group.
+
+The request_key_with_auxdata() calls is like the request_key_tag() call, except
+that they permit auxiliary data to be passed to the upcaller (the default is
+NULL).  This is only useful for those key types that define their own upcall
+mechanism rather than using /sbin/request-key.
 
-The request_key_rcu() call is like the in-kernel request_key() call, except
-that it doesn't check for keys that are under construction and doesn't attempt
-to construct missing keys.
+The request_key_rcu() call is like the request_key_tag() call, except that it
+doesn't check for keys that are under construction and doesn't attempt to
+construct missing keys.
 
 The userspace interface links the key to a keyring associated with the process
 to prevent the key from going away, and returns the serial number of the key to
-- 
cgit 


From 2e12256b9a76584fa3a6da19210509d4775aee36 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 27 Jun 2019 23:03:07 +0100
Subject: keys: Replace uid/gid/perm permissions checking with an ACL

Replace the uid/gid/perm permissions checking on a key with an ACL to allow
the SETATTR and SEARCH permissions to be split.  This will also allow a
greater range of subjects to represented.

============
WHY DO THIS?
============

The problem is that SETATTR and SEARCH cover a slew of actions, not all of
which should be grouped together.

For SETATTR, this includes actions that are about controlling access to a
key:

 (1) Changing a key's ownership.

 (2) Changing a key's security information.

 (3) Setting a keyring's restriction.

And actions that are about managing a key's lifetime:

 (4) Setting an expiry time.

 (5) Revoking a key.

and (proposed) managing a key as part of a cache:

 (6) Invalidating a key.

Managing a key's lifetime doesn't really have anything to do with
controlling access to that key.

Expiry time is awkward since it's more about the lifetime of the content
and so, in some ways goes better with WRITE permission.  It can, however,
be set unconditionally by a process with an appropriate authorisation token
for instantiating a key, and can also be set by the key type driver when a
key is instantiated, so lumping it with the access-controlling actions is
probably okay.

As for SEARCH permission, that currently covers:

 (1) Finding keys in a keyring tree during a search.

 (2) Permitting keyrings to be joined.

 (3) Invalidation.

But these don't really belong together either, since these actions really
need to be controlled separately.

Finally, there are number of special cases to do with granting the
administrator special rights to invalidate or clear keys that I would like
to handle with the ACL rather than key flags and special checks.


===============
WHAT IS CHANGED
===============

The SETATTR permission is split to create two new permissions:

 (1) SET_SECURITY - which allows the key's owner, group and ACL to be
     changed and a restriction to be placed on a keyring.

 (2) REVOKE - which allows a key to be revoked.

The SEARCH permission is split to create:

 (1) SEARCH - which allows a keyring to be search and a key to be found.

 (2) JOIN - which allows a keyring to be joined as a session keyring.

 (3) INVAL - which allows a key to be invalidated.

The WRITE permission is also split to create:

 (1) WRITE - which allows a key's content to be altered and links to be
     added, removed and replaced in a keyring.

 (2) CLEAR - which allows a keyring to be cleared completely.  This is
     split out to make it possible to give just this to an administrator.

 (3) REVOKE - see above.


Keys acquire ACLs which consist of a series of ACEs, and all that apply are
unioned together.  An ACE specifies a subject, such as:

 (*) Possessor - permitted to anyone who 'possesses' a key
 (*) Owner - permitted to the key owner
 (*) Group - permitted to the key group
 (*) Everyone - permitted to everyone

Note that 'Other' has been replaced with 'Everyone' on the assumption that
you wouldn't grant a permit to 'Other' that you wouldn't also grant to
everyone else.

Further subjects may be made available by later patches.

The ACE also specifies a permissions mask.  The set of permissions is now:

	VIEW		Can view the key metadata
	READ		Can read the key content
	WRITE		Can update/modify the key content
	SEARCH		Can find the key by searching/requesting
	LINK		Can make a link to the key
	SET_SECURITY	Can change owner, ACL, expiry
	INVAL		Can invalidate
	REVOKE		Can revoke
	JOIN		Can join this keyring
	CLEAR		Can clear this keyring


The KEYCTL_SETPERM function is then deprecated.

The KEYCTL_SET_TIMEOUT function then is permitted if SET_SECURITY is set,
or if the caller has a valid instantiation auth token.

The KEYCTL_INVALIDATE function then requires INVAL.

The KEYCTL_REVOKE function then requires REVOKE.

The KEYCTL_JOIN_SESSION_KEYRING function then requires JOIN to join an
existing keyring.

The JOIN permission is enabled by default for session keyrings and manually
created keyrings only.


======================
BACKWARD COMPATIBILITY
======================

To maintain backward compatibility, KEYCTL_SETPERM will translate the
permissions mask it is given into a new ACL for a key - unless
KEYCTL_SET_ACL has been called on that key, in which case an error will be
returned.

It will convert possessor, owner, group and other permissions into separate
ACEs, if each portion of the mask is non-zero.

SETATTR permission turns on all of INVAL, REVOKE and SET_SECURITY.  WRITE
permission turns on WRITE, REVOKE and, if a keyring, CLEAR.  JOIN is turned
on if a keyring is being altered.

The KEYCTL_DESCRIBE function translates the ACL back into a permissions
mask to return depending on possessor, owner, group and everyone ACEs.

It will make the following mappings:

 (1) INVAL, JOIN -> SEARCH

 (2) SET_SECURITY -> SETATTR

 (3) REVOKE -> WRITE if SETATTR isn't already set

 (4) CLEAR -> WRITE

Note that the value subsequently returned by KEYCTL_DESCRIBE may not match
the value set with KEYCTL_SETATTR.


=======
TESTING
=======

This passes the keyutils testsuite for all but a couple of tests:

 (1) tests/keyctl/dh_compute/badargs: The first wrong-key-type test now
     returns EOPNOTSUPP rather than ENOKEY as READ permission isn't removed
     if the type doesn't have ->read().  You still can't actually read the
     key.

 (2) tests/keyctl/permitting/valid: The view-other-permissions test doesn't
     work as Other has been replaced with Everyone in the ACL.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/security/keys/core.rst        | 128 ++++++++++++++++++++++------
 Documentation/security/keys/request-key.rst |   9 +-
 2 files changed, 106 insertions(+), 31 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index 0e74f372e58c..1b3c907980ad 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -57,9 +57,9 @@ Each key has a number of attributes:
      type provides an operation to perform a match between the description on a
      key and a criterion string.
 
-  *  Each key has an owner user ID, a group ID and a permissions mask. These
-     are used to control what a process may do to a key from userspace, and
-     whether a kernel service will be able to find the key.
+  *  Each key has an owner user ID, a group ID and an ACL.  These are used to
+     control what a process may do to a key from userspace, and whether a
+     kernel service will be able to find the key.
 
   *  Each key can be set to expire at a specific time by the key type's
      instantiation function. Keys can also be immortal.
@@ -198,43 +198,110 @@ The key service provides a number of features besides keys:
 Key Access Permissions
 ======================
 
-Keys have an owner user ID, a group access ID, and a permissions mask. The mask
-has up to eight bits each for possessor, user, group and other access. Only
-six of each set of eight bits are defined. These permissions granted are:
+Keys have an owner user ID, a group ID and an ACL.  The ACL is made up of a
+sequence of ACEs that each contain three elements:
 
-  *  View
+  * The type of subject.
+  * The subject.
 
-     This permits a key or keyring's attributes to be viewed - including key
-     type and description.
+    These two together indicate the subject to whom the permits are granted.
+    The type can be one of:
 
-  *  Read
+     * ``KEY_ACE_SUBJ_STANDARD``
 
-     This permits a key's payload to be viewed or a keyring's list of linked
-     keys.
+       The subject is a standard 'macro' type.  The subject can be one of:
+
+        * ``KEY_ACE_EVERYONE``
+
+	  The permits are granted to everyone.  It replaces the old 'other'
+	  type on the assumption that you wouldn't grant a permission to other
+	  that you you wouldn't grant to everyone else.
+
+	* ``KEY_ACE_OWNER``
+
+	  The permits are granted to the owner of the key (key->uid).
+
+	* ``KEY_ACE_GROUP``
+
+	  The permits are granted to the key's group (key->gid).
+
+	* ``KEY_ACE_POSSESSOR``
+
+	  The permits are granted to anyone who possesses the key.
+
+  * The set of permits granted to the subject.  These include:
+
+     * ``KEY_ACE_VIEW``
+
+       This permits a key or keyring's attributes to be viewed - including the
+       key type and description.
+
+     * ``KEY_ACE_READ``
+
+       This permits a key's payload to be viewed or a keyring's list of linked
+       keys.
 
-  *  Write
+     * ``KEY_ACE_WRITE``
 
-     This permits a key's payload to be instantiated or updated, or it allows a
-     link to be added to or removed from a keyring.
+       This permits a key's payload to be instantiated or updated, or it allows
+       a link to be added to or removed from a keyring.
 
-  *  Search
+     * ``KEY_ACE_SEARCH``
 
-     This permits keyrings to be searched and keys to be found. Searches can
-     only recurse into nested keyrings that have search permission set.
+       This permits keyrings to be searched and keys to be found. Searches can
+       only recurse into nested keyrings that have search permission set.
 
-  *  Link
+     * ``KEY_ACE_LINK``
 
-     This permits a key or keyring to be linked to. To create a link from a
-     keyring to a key, a process must have Write permission on the keyring and
-     Link permission on the key.
+       This permits a key or keyring to be linked to. To create a link from a
+       keyring to a key, a process must have Write permission on the keyring
+       and Link permission on the key.
 
-  *  Set Attribute
+     * ``KEY_ACE_SET_SECURITY``
 
-     This permits a key's UID, GID and permissions mask to be changed.
+       This permits a key's UID, GID and permissions mask to be changed.
+
+     * ``KEY_ACE_INVAL``
+
+       This permits a key to be invalidated with KEYCTL_INVALIDATE.
+
+     * ``KEY_ACE_REVOKE``
+
+       This permits a key to be revoked with KEYCTL_REVOKE.
+
+     * ``KEY_ACE_JOIN``
+
+       This permits a keyring to be joined as a session by
+       KEYCTL_JOIN_SESSION_KEYRING or KEYCTL_SESSION_TO_PARENT.
+
+     * ``KEY_ACE_CLEAR``
+
+       This permits a keyring to be cleared.
 
 For changing the ownership, group ID or permissions mask, being the owner of
 the key or having the sysadmin capability is sufficient.
 
+The legacy KEYCTL_SETPERM and KEYCTL_DESCRIBE functions can only see/generate
+View, Read, Write, Search, Link and SetAttr permits, and do this for each of
+possessor, user, group and other permission sets as a 32-bit flag mask.  These
+will be approximated/inferred:
+
+	SETPERM Permit	Implied ACE Permit
+	===============	=======================
+	Search		Inval, Join
+	Write		Revoke, Clear
+	Setattr		Set Security, Revoke
+
+	ACE Permit	Described as
+	===============	=======================
+	Inval		Search
+	Join		Search
+	Revoke		Write (unless Setattr)
+	Clear		write
+	Set Security	Setattr
+
+'Other' will be approximated as/inferred from the 'Everyone' subject.
+
 
 SELinux Support
 ===============
@@ -1084,7 +1151,8 @@ payload contents" for more information.
 
 	struct key *request_key(const struct key_type *type,
 				const char *description,
-				const char *callout_info);
+				const char *callout_info,
+				struct key_acl *acl);
 
     This is used to request a key or keyring with a description that matches
     the description specified according to the key type's match_preparse()
@@ -1099,6 +1167,8 @@ payload contents" for more information.
     If successful, the key will have been attached to the default keyring for
     implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING.
 
+    If a key is created, it will be given the specified ACL.
+
     See also Documentation/security/keys/request-key.rst.
 
 
@@ -1107,7 +1177,8 @@ payload contents" for more information.
 	struct key *request_key_tag(const struct key_type *type,
 				    const char *description,
 				    struct key_tag *domain_tag,
-				    const char *callout_info);
+				    const char *callout_info,
+				    struct key_acl *acl);
 
     This is identical to request_key(), except that a domain tag may be
     specifies that causes search algorithm to only match keys matching that
@@ -1122,7 +1193,8 @@ payload contents" for more information.
 					     struct key_tag *domain_tag,
 					     const void *callout_info,
 					     size_t callout_len,
-					     void *aux);
+					     void *aux,
+					     struct key_acl *acl);
 
     This is identical to request_key_tag(), except that the auxiliary data is
     passed to the key_type->request_key() op if it exists, and the
@@ -1195,7 +1267,7 @@ payload contents" for more information.
 
 	struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
 				  const struct cred *cred,
-				  key_perm_t perm,
+				  struct key_acl *acl,
 				  struct key_restriction *restrict_link,
 				  unsigned long flags,
 				  struct key *dest);
diff --git a/Documentation/security/keys/request-key.rst b/Documentation/security/keys/request-key.rst
index 35f2296b704a..f356fd06c8d5 100644
--- a/Documentation/security/keys/request-key.rst
+++ b/Documentation/security/keys/request-key.rst
@@ -11,14 +11,16 @@ The process starts by either the kernel requesting a service by calling
 
 	struct key *request_key(const struct key_type *type,
 				const char *description,
-				const char *callout_info);
+				const char *callout_info,
+				struct key_acl *acl);
 
 or::
 
 	struct key *request_key_tag(const struct key_type *type,
 				    const char *description,
 				    const struct key_tag *domain_tag,
-				    const char *callout_info);
+				    const char *callout_info,
+				    struct key_acl *acl);
 
 or::
 
@@ -27,7 +29,8 @@ or::
 					     const struct key_tag *domain_tag,
 					     const char *callout_info,
 					     size_t callout_len,
-					     void *aux);
+					     void *aux,
+					     struct key_acl *acl);
 
 or::
 
-- 
cgit 


From d974ffcfb7447db5f29a4b662a3eaf99a4e1109e Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 26 Jun 2019 21:45:02 -0700
Subject: Documentation/admin: Remove the vsyscall=native documentation

The vsyscall=native feature is gone -- remove the docs.

Fixes: 076ca272a14c ("x86/vsyscall/64: Drop "native" vsyscalls")
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: stable@vger.kernel.org
Cc: Borislav Petkov <bp@alien8.de>
Cc: Kernel Hardening <kernel-hardening@lists.openwall.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/r/d77c7105eb4c57c1a95a95b6a5b8ba194a18e764.1561610354.git.luto@kernel.org
---
 Documentation/admin-guide/kernel-parameters.txt | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..0082d1e56999 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5102,12 +5102,6 @@
 			emulate     [default] Vsyscalls turn into traps and are
 			            emulated reasonably safely.
 
-			native      Vsyscalls are native syscall instructions.
-			            This is a little bit faster than trapping
-			            and makes a few dynamic recompilers work
-			            better than they would in emulation mode.
-			            It also makes exploits much easier to write.
-
 			none        Vsyscalls don't work at all.  This makes
 			            them quite hard to use for exploits but
 			            might break your system.
-- 
cgit 


From bd49e16e3339f052fae05fb3e955c5db0c9c6445 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 26 Jun 2019 21:45:03 -0700
Subject: x86/vsyscall: Add a new vsyscall=xonly mode

With vsyscall emulation on, a readable vsyscall page is still exposed that
contains syscall instructions that validly implement the vsyscalls.

This is required because certain dynamic binary instrumentation tools
attempt to read the call targets of call instructions in the instrumented
code.  If the instrumented code uses vsyscalls, then the vsyscall page needs
to contain readable code.

Unfortunately, leaving readable memory at a deterministic address can be
used to help various ASLR bypasses, so some hardening value can be gained
by disallowing vsyscall reads.

Given how rarely the vsyscall page needs to be readable, add a mechanism to
make the vsyscall page be execute only.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Kernel Hardening <kernel-hardening@lists.openwall.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/r/d17655777c21bc09a7af1bbcf74e6f2b69a51152.1561610354.git.luto@kernel.org
---
 Documentation/admin-guide/kernel-parameters.txt | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0082d1e56999..be8c3a680afa 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5100,7 +5100,12 @@
 			targets for exploits that can control RIP.
 
 			emulate     [default] Vsyscalls turn into traps and are
-			            emulated reasonably safely.
+			            emulated reasonably safely.  The vsyscall
+				    page is readable.
+
+			xonly       Vsyscalls turn into traps and are
+			            emulated reasonably safely.  The vsyscall
+				    page is not readable.
 
 			none        Vsyscalls don't work at all.  This makes
 			            them quite hard to use for exploits but
-- 
cgit 


From 0c51b3697a446e904e9e9fb70bbd7d08606a329a Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 27 Jun 2019 13:38:54 -0700
Subject: bpf: add sockopt documentation

Provide user documentation about sockopt prog type and cgroup hooks.

v9:
* add details about setsockopt context and inheritance

v7:
* add description for retval=0 and optlen=-1

v6:
* describe cgroup chaining, add example

v2:
* use return code 2 for kernel bypass

Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/index.rst               |  1 +
 Documentation/bpf/prog_cgroup_sockopt.rst | 93 +++++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+)
 create mode 100644 Documentation/bpf/prog_cgroup_sockopt.rst

(limited to 'Documentation')

diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index d3fe4cac0c90..801a6ed3f2e5 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -42,6 +42,7 @@ Program types
 .. toctree::
    :maxdepth: 1
 
+   prog_cgroup_sockopt
    prog_cgroup_sysctl
    prog_flow_dissector
 
diff --git a/Documentation/bpf/prog_cgroup_sockopt.rst b/Documentation/bpf/prog_cgroup_sockopt.rst
new file mode 100644
index 000000000000..c47d974629ae
--- /dev/null
+++ b/Documentation/bpf/prog_cgroup_sockopt.rst
@@ -0,0 +1,93 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+BPF_PROG_TYPE_CGROUP_SOCKOPT
+============================
+
+``BPF_PROG_TYPE_CGROUP_SOCKOPT`` program type can be attached to two
+cgroup hooks:
+
+* ``BPF_CGROUP_GETSOCKOPT`` - called every time process executes ``getsockopt``
+  system call.
+* ``BPF_CGROUP_SETSOCKOPT`` - called every time process executes ``setsockopt``
+  system call.
+
+The context (``struct bpf_sockopt``) has associated socket (``sk``) and
+all input arguments: ``level``, ``optname``, ``optval`` and ``optlen``.
+
+BPF_CGROUP_SETSOCKOPT
+=====================
+
+``BPF_CGROUP_SETSOCKOPT`` is triggered *before* the kernel handling of
+sockopt and it has writable context: it can modify the supplied arguments
+before passing them down to the kernel. This hook has access to the cgroup
+and socket local storage.
+
+If BPF program sets ``optlen`` to -1, the control will be returned
+back to the userspace after all other BPF programs in the cgroup
+chain finish (i.e. kernel ``setsockopt`` handling will *not* be executed).
+
+Note, that ``optlen`` can not be increased beyond the user-supplied
+value. It can only be decreased or set to -1. Any other value will
+trigger ``EFAULT``.
+
+Return Type
+-----------
+
+* ``0`` - reject the syscall, ``EPERM`` will be returned to the userspace.
+* ``1`` - success, continue with next BPF program in the cgroup chain.
+
+BPF_CGROUP_GETSOCKOPT
+=====================
+
+``BPF_CGROUP_GETSOCKOPT`` is triggered *after* the kernel handing of
+sockopt. The BPF hook can observe ``optval``, ``optlen`` and ``retval``
+if it's interested in whatever kernel has returned. BPF hook can override
+the values above, adjust ``optlen`` and reset ``retval`` to 0. If ``optlen``
+has been increased above initial ``getsockopt`` value (i.e. userspace
+buffer is too small), ``EFAULT`` is returned.
+
+This hook has access to the cgroup and socket local storage.
+
+Note, that the only acceptable value to set to ``retval`` is 0 and the
+original value that the kernel returned. Any other value will trigger
+``EFAULT``.
+
+Return Type
+-----------
+
+* ``0`` - reject the syscall, ``EPERM`` will be returned to the userspace.
+* ``1`` - success: copy ``optval`` and ``optlen`` to userspace, return
+  ``retval`` from the syscall (note that this can be overwritten by
+  the BPF program from the parent cgroup).
+
+Cgroup Inheritance
+==================
+
+Suppose, there is the following cgroup hierarchy where each cgroup
+has ``BPF_CGROUP_GETSOCKOPT`` attached at each level with
+``BPF_F_ALLOW_MULTI`` flag::
+
+  A (root, parent)
+   \
+    B (child)
+
+When the application calls ``getsockopt`` syscall from the cgroup B,
+the programs are executed from the bottom up: B, A. First program
+(B) sees the result of kernel's ``getsockopt``. It can optionally
+adjust ``optval``, ``optlen`` and reset ``retval`` to 0. After that
+control will be passed to the second (A) program which will see the
+same context as B including any potential modifications.
+
+Same for ``BPF_CGROUP_SETSOCKOPT``: if the program is attached to
+A and B, the trigger order is B, then A. If B does any changes
+to the input arguments (``level``, ``optname``, ``optval``, ``optlen``),
+then the next program in the chain (A) will see those changes,
+*not* the original input ``setsockopt`` arguments. The potentially
+modified values will be then passed down to the kernel.
+
+Example
+=======
+
+See ``tools/testing/selftests/bpf/progs/sockopt_sk.c`` for an example
+of BPF program that handles socket options.
-- 
cgit 


From cae86eac98cd90934f94072028c362550338e084 Mon Sep 17 00:00:00 2001
From: Ludovic Barre <ludovic.barre@st.com>
Date: Thu, 27 Jun 2019 09:43:58 +0200
Subject: dt-bindings: spi: stm32-qspi: add dma properties

This patch adds description of dma properties (optional).

Signed-off-by: Ludovic Barre <ludovic.barre@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-stm32-qspi.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/spi/spi-stm32-qspi.txt b/Documentation/devicetree/bindings/spi/spi-stm32-qspi.txt
index adeeb63e84b9..bfc038b9478d 100644
--- a/Documentation/devicetree/bindings/spi/spi-stm32-qspi.txt
+++ b/Documentation/devicetree/bindings/spi/spi-stm32-qspi.txt
@@ -19,8 +19,11 @@ Required properties:
 - reg: chip-Select number (QSPI controller may connect 2 flashes)
 - spi-max-frequency: max frequency of spi bus
 
-Optional property:
+Optional properties:
 - spi-rx-bus-width: see ./spi-bus.txt for the description
+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
+Documentation/devicetree/bindings/dma/dma.txt.
+- dma-names: DMA request names should include "tx" and "rx" if present.
 
 Example:
 
-- 
cgit 


From 2b9def8c0d92769eef3ef70d0a7c3646ab185d39 Mon Sep 17 00:00:00 2001
From: Derek Fang <derek.fang@realtek.com>
Date: Fri, 28 Jun 2019 20:51:43 +0800
Subject: ASoC: rt1308: Add RT1308 amplifier driver

This is the initial amplifier driver for rt1308.

Signed-off-by: Derek Fang <derek.fang@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/sound/rt1308.txt | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100755 Documentation/devicetree/bindings/sound/rt1308.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/rt1308.txt b/Documentation/devicetree/bindings/sound/rt1308.txt
new file mode 100755
index 000000000000..2d46084afce4
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/rt1308.txt
@@ -0,0 +1,17 @@
+RT1308 audio Amplifier
+
+This device supports I2C only.
+
+Required properties:
+
+- compatible : "realtek,rt1308".
+
+- reg : The I2C address of the device.
+
+
+Example:
+
+rt1308: rt1308@10 {
+	compatible = "realtek,rt1308";
+	reg = <0x10>;
+};
-- 
cgit 


From 9159ba14285c5432063a0ad83e50afb95674d9b1 Mon Sep 17 00:00:00 2001
From: Sheriff Esseson <sheriffesseson@gmail.com>
Date: Fri, 28 Jun 2019 07:20:01 +0100
Subject: Doc : doc-guide : Fix a typo

fix the disjunction by replacing "of" with "or".

Signed-off-by: Sheriff Esseson <sheriffesseson@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/doc-guide/kernel-doc.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/doc-guide/kernel-doc.rst b/Documentation/doc-guide/kernel-doc.rst
index f96059767c8c..192c36af39e2 100644
--- a/Documentation/doc-guide/kernel-doc.rst
+++ b/Documentation/doc-guide/kernel-doc.rst
@@ -359,7 +359,7 @@ Domain`_ references.
   ``monospaced font``.
 
   Useful if you need to use special characters that would otherwise have some
-  meaning either by kernel-doc script of by reStructuredText.
+  meaning either by kernel-doc script or by reStructuredText.
 
   This is particularly useful if you need to use things like ``%ph`` inside
   a function description.
-- 
cgit 


From 62ee81b5681daa781f5e800346ae8654b3e5a864 Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Thu, 27 Jun 2019 15:59:38 +0200
Subject: docs: format kernel-parameters -- as code

The current ReStructuredText formatting results in "--", used to
indicate the end of the kernel command-line parameters, appearing as
an en-dash instead of two hyphens; this patch formats them as code,
"``--``", as done elsewhere in the documentation.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/kernel-parameters.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index 8d3273e32eb1..5d29ba5ad88c 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -9,11 +9,11 @@ and sorted into English Dictionary order (defined as ignoring all
 punctuation and sorting digits before letters in a case insensitive
 manner), and with descriptions where known.
 
-The kernel parses parameters from the kernel command line up to "--";
+The kernel parses parameters from the kernel command line up to "``--``";
 if it doesn't recognize a parameter and it doesn't contain a '.', the
 parameter gets passed to init: parameters with '=' go into init's
 environment, others are passed as command line arguments to init.
-Everything after "--" is passed as an argument to init.
+Everything after "``--``" is passed as an argument to init.
 
 Module parameters can be specified in two ways: via the kernel command
 line with a module name prefix, or via modprobe, e.g.::
-- 
cgit 


From acb6258acc4fbb76449eec6d0c7ca25254671e31 Mon Sep 17 00:00:00 2001
From: Jiunn Chang <c0d1n61at3@gmail.com>
Date: Thu, 27 Jun 2019 16:01:47 -0500
Subject: doc: RCU callback locks need only _bh, not necessarily _irq

The UP.rst file calls for locks acquired within RCU callback functions
to use _irq variants (spin_lock_irqsave() or similar), which does work,
but can be overkill.  This commit therefore instead calls for _bh variants
(spin_lock_bh() or similar), while noting that _irq does work.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Signed-off-by: Jiunn Chang <c0d1n61at3@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/RCU/UP.rst | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/RCU/UP.rst b/Documentation/RCU/UP.rst
index 67715a47ae89..e26dda27430c 100644
--- a/Documentation/RCU/UP.rst
+++ b/Documentation/RCU/UP.rst
@@ -113,12 +113,13 @@ Answer to Quick Quiz #1:
 Answer to Quick Quiz #2:
 	What locking restriction must RCU callbacks respect?
 
-	Any lock that is acquired within an RCU callback must be
-	acquired elsewhere using an _irq variant of the spinlock
-	primitive.  For example, if "mylock" is acquired by an
-	RCU callback, then a process-context acquisition of this
-	lock must use something like spin_lock_irqsave() to
-	acquire the lock.
+	Any lock that is acquired within an RCU callback must be acquired
+	elsewhere using an _bh variant of the spinlock primitive.
+	For example, if "mylock" is acquired by an RCU callback, then
+	a process-context acquisition of this lock must use something
+	like spin_lock_bh() to acquire the lock.  Please note that
+	it is also OK to use _irq variants of spinlocks, for example,
+	spin_lock_irqsave().
 
 	If the process-context code were to simply use spin_lock(),
 	then, since RCU callbacks can be invoked from softirq context,
-- 
cgit 


From a4496d52b3430cb3c4c16d03cdd5f4ee97ad1241 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Tue, 7 May 2019 11:52:47 +0200
Subject: power: supply: add input power and voltage limit properties

For thermal management strategy you might be interested on limit the
input power for a power supply. We already have current limit but
basically what we probably want is to limit power. So, introduce the
input_power_limit property.

Although the common use case is limit the input power, in some
specific cases it is the voltage that is problematic (i.e some regulators
have different efficiencies at higher voltage resulting in more heat).
So introduce also the input_voltage_limit property.

This happens in one Chromebook and is used on the Pixel C's thermal
management strategy to effectively limit the input power to 5V 3A when
the screen is on. When the screen is on, the display, the CPU, and the GPU
all contribute more heat to the system than while the screen is off, and
we made a tradeoff to throttle the charger in order to give more of the
thermal budget to those other components.

So there's nothing fundamentally broken about the hardware that would
cause the Pixel C to malfunction if we were charging at 9V or 12V instead
of 5V when the screen is on, i.e. if userspace doesn't change this.

What would happen is that you wouldn't meet Google's skin temperature
targets on the system if the charger was allowed to run at 9V or 12V with
the screen on.

For folks hacking on Pixel Cs (which is now outside of Google's official
support window for Android) and customizing their own kernel and userspace
this would be acceptable, but we wanted to expose this feature in the
power supply properties because the feature does exist in the Emedded
Controller firmware of the Pixel C and all of Google's Chromebooks with
USB-C made since 2015 in case someone running an up to date kernel wanted
to limit the charging power for thermal or other reasons.

This patch exposes a new property, similar to input current limit, to
re-configure the maximum voltage from the external supply at runtime
based on system-level knowledge or user input.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Acked-by: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
Reviewed-by: Benson Leung <bleung@chromium.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 Documentation/ABI/testing/sysfs-class-power | 32 +++++++++++++++++++++++++++++
 Documentation/power/power_supply_class.txt  |  4 ++++
 2 files changed, 36 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power
index b77e30b9014e..27edc06e2495 100644
--- a/Documentation/ABI/testing/sysfs-class-power
+++ b/Documentation/ABI/testing/sysfs-class-power
@@ -376,10 +376,42 @@ Description:
 		supply. Normally this is configured based on the type of
 		connection made (e.g. A configured SDP should output a maximum
 		of 500mA so the input current limit is set to the same value).
+		Use preferably input_power_limit, and for problems that can be
+		solved using power limit use input_current_limit.
 
 		Access: Read, Write
 		Valid values: Represented in microamps
 
+What:		/sys/class/power_supply/<supply_name>/input_voltage_limit
+Date:		May 2019
+Contact:	linux-pm@vger.kernel.org
+Description:
+		This entry configures the incoming VBUS voltage limit currently
+		set in the supply. Normally this is configured based on
+		system-level knowledge or user input (e.g. This is part of the
+		Pixel C's thermal management strategy to effectively limit the
+		input power to 5V when the screen is on to meet Google's skin
+		temperature targets). Note that this feature should not be
+		used for safety critical things.
+		Use preferably input_power_limit, and for problems that can be
+		solved using power limit use input_voltage_limit.
+
+		Access: Read, Write
+		Valid values: Represented in microvolts
+
+What:		/sys/class/power_supply/<supply_name>/input_power_limit
+Date:		May 2019
+Contact:	linux-pm@vger.kernel.org
+Description:
+		This entry configures the incoming power limit currently set
+		in the supply. Normally this is configured based on
+		system-level knowledge or user input. Use preferably this
+		feature to limit the incoming power and use current/voltage
+		limit only for problems that can be solved using power limit.
+
+		Access: Read, Write
+		Valid values: Represented in microwatts
+
 What:		/sys/class/power_supply/<supply_name>/online,
 Date:		May 2007
 Contact:	linux-pm@vger.kernel.org
diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index 300d37896e51..1e3c705111db 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -137,6 +137,10 @@ power supply object.
 
 INPUT_CURRENT_LIMIT - input current limit programmed by charger. Indicates
 the current drawn from a charging source.
+INPUT_VOLTAGE_LIMIT - input voltage limit programmed by charger. Indicates
+the voltage limit from a charging source.
+INPUT_POWER_LIMIT - input power limit programmed by charger. Indicates
+the power limit from a charging source.
 
 CHARGE_CONTROL_LIMIT - current charge control limit setting
 CHARGE_CONTROL_LIMIT_MAX - maximum charge control limit setting
-- 
cgit 


From 8dab91970a8c01ffc7816bf8a4c4cd587b481f34 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 28 Jun 2019 09:20:20 -0300
Subject: docs: leds: convert to ReST

Rename the leds documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 Documentation/laptops/thinkpad-acpi.txt  |   4 +-
 Documentation/leds/index.rst             |  25 ++++
 Documentation/leds/leds-blinkm.rst       |  84 ++++++++++++
 Documentation/leds/leds-blinkm.txt       |  80 -----------
 Documentation/leds/leds-class-flash.rst  |  90 +++++++++++++
 Documentation/leds/leds-class-flash.txt  |  73 ----------
 Documentation/leds/leds-class.rst        | 125 +++++++++++++++++
 Documentation/leds/leds-class.txt        | 122 -----------------
 Documentation/leds/leds-lm3556.rst       | 137 +++++++++++++++++++
 Documentation/leds/leds-lm3556.txt       |  85 ------------
 Documentation/leds/leds-lp3944.rst       |  59 ++++++++
 Documentation/leds/leds-lp3944.txt       |  50 -------
 Documentation/leds/leds-lp5521.rst       | 115 ++++++++++++++++
 Documentation/leds/leds-lp5521.txt       | 101 --------------
 Documentation/leds/leds-lp5523.rst       | 147 ++++++++++++++++++++
 Documentation/leds/leds-lp5523.txt       | 130 ------------------
 Documentation/leds/leds-lp5562.rst       | 137 +++++++++++++++++++
 Documentation/leds/leds-lp5562.txt       | 120 -----------------
 Documentation/leds/leds-lp55xx.rst       | 224 +++++++++++++++++++++++++++++++
 Documentation/leds/leds-lp55xx.txt       | 194 --------------------------
 Documentation/leds/leds-mlxcpld.rst      | 118 ++++++++++++++++
 Documentation/leds/leds-mlxcpld.txt      | 110 ---------------
 Documentation/leds/ledtrig-oneshot.rst   |  44 ++++++
 Documentation/leds/ledtrig-oneshot.txt   |  43 ------
 Documentation/leds/ledtrig-transient.rst | 167 +++++++++++++++++++++++
 Documentation/leds/ledtrig-transient.txt | 152 ---------------------
 Documentation/leds/ledtrig-usbport.rst   |  46 +++++++
 Documentation/leds/ledtrig-usbport.txt   |  41 ------
 Documentation/leds/uleds.rst             |  37 +++++
 Documentation/leds/uleds.txt             |  36 -----
 30 files changed, 1557 insertions(+), 1339 deletions(-)
 create mode 100644 Documentation/leds/index.rst
 create mode 100644 Documentation/leds/leds-blinkm.rst
 delete mode 100644 Documentation/leds/leds-blinkm.txt
 create mode 100644 Documentation/leds/leds-class-flash.rst
 delete mode 100644 Documentation/leds/leds-class-flash.txt
 create mode 100644 Documentation/leds/leds-class.rst
 delete mode 100644 Documentation/leds/leds-class.txt
 create mode 100644 Documentation/leds/leds-lm3556.rst
 delete mode 100644 Documentation/leds/leds-lm3556.txt
 create mode 100644 Documentation/leds/leds-lp3944.rst
 delete mode 100644 Documentation/leds/leds-lp3944.txt
 create mode 100644 Documentation/leds/leds-lp5521.rst
 delete mode 100644 Documentation/leds/leds-lp5521.txt
 create mode 100644 Documentation/leds/leds-lp5523.rst
 delete mode 100644 Documentation/leds/leds-lp5523.txt
 create mode 100644 Documentation/leds/leds-lp5562.rst
 delete mode 100644 Documentation/leds/leds-lp5562.txt
 create mode 100644 Documentation/leds/leds-lp55xx.rst
 delete mode 100644 Documentation/leds/leds-lp55xx.txt
 create mode 100644 Documentation/leds/leds-mlxcpld.rst
 delete mode 100644 Documentation/leds/leds-mlxcpld.txt
 create mode 100644 Documentation/leds/ledtrig-oneshot.rst
 delete mode 100644 Documentation/leds/ledtrig-oneshot.txt
 create mode 100644 Documentation/leds/ledtrig-transient.rst
 delete mode 100644 Documentation/leds/ledtrig-transient.txt
 create mode 100644 Documentation/leds/ledtrig-usbport.rst
 delete mode 100644 Documentation/leds/ledtrig-usbport.txt
 create mode 100644 Documentation/leds/uleds.rst
 delete mode 100644 Documentation/leds/uleds.txt

(limited to 'Documentation')

diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index 6cced88de6da..75ef063622d2 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -679,7 +679,7 @@ status as "unknown". The available commands are:
 sysfs notes:
 
 The ThinkLight sysfs interface is documented by the LED class
-documentation, in Documentation/leds/leds-class.txt.  The ThinkLight LED name
+documentation, in Documentation/leds/leds-class.rst.  The ThinkLight LED name
 is "tpacpi::thinklight".
 
 Due to limitations in the sysfs LED class, if the status of the ThinkLight
@@ -779,7 +779,7 @@ All of the above can be turned on and off and can be made to blink.
 sysfs notes:
 
 The ThinkPad LED sysfs interface is described in detail by the LED class
-documentation, in Documentation/leds/leds-class.txt.
+documentation, in Documentation/leds/leds-class.rst.
 
 The LEDs are named (in LED ID order, from 0 to 12):
 "tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt",
diff --git a/Documentation/leds/index.rst b/Documentation/leds/index.rst
new file mode 100644
index 000000000000..9885f7c1b75d
--- /dev/null
+++ b/Documentation/leds/index.rst
@@ -0,0 +1,25 @@
+:orphan:
+
+====
+LEDs
+====
+
+.. toctree::
+   :maxdepth: 1
+
+   leds-class
+   leds-class-flash
+   ledtrig-oneshot
+   ledtrig-transient
+   ledtrig-usbport
+
+   uleds
+
+   leds-blinkm
+   leds-lm3556
+   leds-lp3944
+   leds-lp5521
+   leds-lp5523
+   leds-lp5562
+   leds-lp55xx
+   leds-mlxcpld
diff --git a/Documentation/leds/leds-blinkm.rst b/Documentation/leds/leds-blinkm.rst
new file mode 100644
index 000000000000..c74b5bc877b1
--- /dev/null
+++ b/Documentation/leds/leds-blinkm.rst
@@ -0,0 +1,84 @@
+==================
+Leds BlinkM driver
+==================
+
+The leds-blinkm driver supports the devices of the BlinkM family.
+
+They are RGB-LED modules driven by a (AT)tiny microcontroller and
+communicate through I2C. The default address of these modules is
+0x09 but this can be changed through a command. By this you could
+dasy-chain up to 127 BlinkMs on an I2C bus.
+
+The device accepts RGB and HSB color values through separate commands.
+Also you can store blinking sequences as "scripts" in
+the controller and run them. Also fading is an option.
+
+The interface this driver provides is 2-fold:
+
+a) LED class interface for use with triggers
+############################################
+
+The registration follows the scheme::
+
+  blinkm-<i2c-bus-nr>-<i2c-device-nr>-<color>
+
+  $ ls -h /sys/class/leds/blinkm-6-*
+  /sys/class/leds/blinkm-6-9-blue:
+  brightness  device  max_brightness  power  subsystem  trigger  uevent
+
+  /sys/class/leds/blinkm-6-9-green:
+  brightness  device  max_brightness  power  subsystem  trigger  uevent
+
+  /sys/class/leds/blinkm-6-9-red:
+  brightness  device  max_brightness  power  subsystem  trigger  uevent
+
+(same is /sys/bus/i2c/devices/6-0009/leds)
+
+We can control the colors separated into red, green and blue and
+assign triggers on each color.
+
+E.g.::
+
+  $ cat blinkm-6-9-blue/brightness
+  05
+
+  $ echo 200 > blinkm-6-9-blue/brightness
+  $
+
+  $ modprobe ledtrig-heartbeat
+  $ echo heartbeat > blinkm-6-9-green/trigger
+  $
+
+
+b) Sysfs group to control rgb, fade, hsb, scripts ...
+#####################################################
+
+This extended interface is available as folder blinkm
+in the sysfs folder of the I2C device.
+E.g. below /sys/bus/i2c/devices/6-0009/blinkm
+
+  $ ls -h /sys/bus/i2c/devices/6-0009/blinkm/
+  blue  green  red  test
+
+Currently supported is just setting red, green, blue
+and a test sequence.
+
+E.g.::
+
+  $ cat *
+  00
+  00
+  00
+  #Write into test to start test sequence!#
+
+  $ echo 1 > test
+  $
+
+  $ echo 255 > red
+  $
+
+
+
+as of 6/2012
+
+dl9pf <at> gmx <dot> de
diff --git a/Documentation/leds/leds-blinkm.txt b/Documentation/leds/leds-blinkm.txt
deleted file mode 100644
index 9dd92f4cf4e1..000000000000
--- a/Documentation/leds/leds-blinkm.txt
+++ /dev/null
@@ -1,80 +0,0 @@
-The leds-blinkm driver supports the devices of the BlinkM family.
-
-They are RGB-LED modules driven by a (AT)tiny microcontroller and
-communicate through I2C. The default address of these modules is
-0x09 but this can be changed through a command. By this you could
-dasy-chain up to 127 BlinkMs on an I2C bus.
-
-The device accepts RGB and HSB color values through separate commands.
-Also you can store blinking sequences as "scripts" in
-the controller and run them. Also fading is an option.
-
-The interface this driver provides is 2-fold:
-
-a) LED class interface for use with triggers
-############################################
-
-The registration follows the scheme:
-blinkm-<i2c-bus-nr>-<i2c-device-nr>-<color>
-
-$ ls -h /sys/class/leds/blinkm-6-*
-/sys/class/leds/blinkm-6-9-blue:
-brightness  device  max_brightness  power  subsystem  trigger  uevent
-
-/sys/class/leds/blinkm-6-9-green:
-brightness  device  max_brightness  power  subsystem  trigger  uevent
-
-/sys/class/leds/blinkm-6-9-red:
-brightness  device  max_brightness  power  subsystem  trigger  uevent
-
-(same is /sys/bus/i2c/devices/6-0009/leds)
-
-We can control the colors separated into red, green and blue and
-assign triggers on each color.
-
-E.g.:
-
-$ cat blinkm-6-9-blue/brightness
-05
-
-$ echo 200 > blinkm-6-9-blue/brightness
-$
-
-$ modprobe ledtrig-heartbeat
-$ echo heartbeat > blinkm-6-9-green/trigger
-$
-
-
-b) Sysfs group to control rgb, fade, hsb, scripts ...
-#####################################################
-
-This extended interface is available as folder blinkm
-in the sysfs folder of the I2C device.
-E.g. below /sys/bus/i2c/devices/6-0009/blinkm
-
-$ ls -h /sys/bus/i2c/devices/6-0009/blinkm/
-blue  green  red  test
-
-Currently supported is just setting red, green, blue
-and a test sequence.
-
-E.g.:
-
-$ cat *
-00
-00
-00
-#Write into test to start test sequence!#
-
-$ echo 1 > test
-$
-
-$ echo 255 > red
-$
-
-
-
-as of 6/2012
-
-dl9pf <at> gmx <dot> de
-
diff --git a/Documentation/leds/leds-class-flash.rst b/Documentation/leds/leds-class-flash.rst
new file mode 100644
index 000000000000..6ec12c5a1a0e
--- /dev/null
+++ b/Documentation/leds/leds-class-flash.rst
@@ -0,0 +1,90 @@
+==============================
+Flash LED handling under Linux
+==============================
+
+Some LED devices provide two modes - torch and flash. In the LED subsystem
+those modes are supported by LED class (see Documentation/leds/leds-class.rst)
+and LED Flash class respectively. The torch mode related features are enabled
+by default and the flash ones only if a driver declares it by setting
+LED_DEV_CAP_FLASH flag.
+
+In order to enable the support for flash LEDs CONFIG_LEDS_CLASS_FLASH symbol
+must be defined in the kernel config. A LED Flash class driver must be
+registered in the LED subsystem with led_classdev_flash_register function.
+
+Following sysfs attributes are exposed for controlling flash LED devices:
+(see Documentation/ABI/testing/sysfs-class-led-flash)
+
+	- flash_brightness
+	- max_flash_brightness
+	- flash_timeout
+	- max_flash_timeout
+	- flash_strobe
+	- flash_fault
+
+
+V4L2 flash wrapper for flash LEDs
+=================================
+
+A LED subsystem driver can be controlled also from the level of VideoForLinux2
+subsystem. In order to enable this CONFIG_V4L2_FLASH_LED_CLASS symbol has to
+be defined in the kernel config.
+
+The driver must call the v4l2_flash_init function to get registered in the
+V4L2 subsystem. The function takes six arguments:
+
+- dev:
+	flash device, e.g. an I2C device
+- of_node:
+	of_node of the LED, may be NULL if the same as device's
+- fled_cdev:
+	LED flash class device to wrap
+- iled_cdev:
+	LED flash class device representing indicator LED associated with
+	fled_cdev, may be NULL
+- ops:
+	V4L2 specific ops
+
+	* external_strobe_set
+		defines the source of the flash LED strobe -
+		V4L2_CID_FLASH_STROBE control or external source, typically
+		a sensor, which makes it possible to synchronise the flash
+		strobe start with exposure start,
+	* intensity_to_led_brightness and led_brightness_to_intensity
+		perform
+		enum led_brightness <-> V4L2 intensity conversion in a device
+		specific manner - they can be used for devices with non-linear
+		LED current scale.
+- config:
+	configuration for V4L2 Flash sub-device
+
+	* dev_name
+		the name of the media entity, unique in the system,
+	* flash_faults
+		bitmask of flash faults that the LED flash class
+		device can report; corresponding LED_FAULT* bit definitions are
+		available in <linux/led-class-flash.h>,
+	* torch_intensity
+		constraints for the LED in TORCH mode
+		in microamperes,
+	* indicator_intensity
+		constraints for the indicator LED
+		in microamperes,
+	* has_external_strobe
+		determines whether the flash strobe source
+		can be switched to external,
+
+On remove the v4l2_flash_release function has to be called, which takes one
+argument - struct v4l2_flash pointer returned previously by v4l2_flash_init.
+This function can be safely called with NULL or error pointer argument.
+
+Please refer to drivers/leds/leds-max77693.c for an exemplary usage of the
+v4l2 flash wrapper.
+
+Once the V4L2 sub-device is registered by the driver which created the Media
+controller device, the sub-device node acts just as a node of a native V4L2
+flash API device would. The calls are simply routed to the LED flash API.
+
+Opening the V4L2 flash sub-device makes the LED subsystem sysfs interface
+unavailable. The interface is re-enabled after the V4L2 flash sub-device
+is closed.
diff --git a/Documentation/leds/leds-class-flash.txt b/Documentation/leds/leds-class-flash.txt
deleted file mode 100644
index 8da3c6f4b60b..000000000000
--- a/Documentation/leds/leds-class-flash.txt
+++ /dev/null
@@ -1,73 +0,0 @@
-
-Flash LED handling under Linux
-==============================
-
-Some LED devices provide two modes - torch and flash. In the LED subsystem
-those modes are supported by LED class (see Documentation/leds/leds-class.txt)
-and LED Flash class respectively. The torch mode related features are enabled
-by default and the flash ones only if a driver declares it by setting
-LED_DEV_CAP_FLASH flag.
-
-In order to enable the support for flash LEDs CONFIG_LEDS_CLASS_FLASH symbol
-must be defined in the kernel config. A LED Flash class driver must be
-registered in the LED subsystem with led_classdev_flash_register function.
-
-Following sysfs attributes are exposed for controlling flash LED devices:
-(see Documentation/ABI/testing/sysfs-class-led-flash)
-	- flash_brightness
-	- max_flash_brightness
-	- flash_timeout
-	- max_flash_timeout
-	- flash_strobe
-	- flash_fault
-
-
-V4L2 flash wrapper for flash LEDs
-=================================
-
-A LED subsystem driver can be controlled also from the level of VideoForLinux2
-subsystem. In order to enable this CONFIG_V4L2_FLASH_LED_CLASS symbol has to
-be defined in the kernel config.
-
-The driver must call the v4l2_flash_init function to get registered in the
-V4L2 subsystem. The function takes six arguments:
-- dev       : flash device, e.g. an I2C device
-- of_node   : of_node of the LED, may be NULL if the same as device's
-- fled_cdev : LED flash class device to wrap
-- iled_cdev : LED flash class device representing indicator LED associated with
-	      fled_cdev, may be NULL
-- ops : V4L2 specific ops
-	* external_strobe_set - defines the source of the flash LED strobe -
-		V4L2_CID_FLASH_STROBE control or external source, typically
-		a sensor, which makes it possible to synchronise the flash
-		strobe start with exposure start,
-	* intensity_to_led_brightness and led_brightness_to_intensity - perform
-		enum led_brightness <-> V4L2 intensity conversion in a device
-		specific manner - they can be used for devices with non-linear
-		LED current scale.
-- config : configuration for V4L2 Flash sub-device
-	* dev_name - the name of the media entity, unique in the system,
-	* flash_faults - bitmask of flash faults that the LED flash class
-		device can report; corresponding LED_FAULT* bit definitions are
-		available in <linux/led-class-flash.h>,
-	* torch_intensity - constraints for the LED in TORCH mode
-		in microamperes,
-	* indicator_intensity - constraints for the indicator LED
-		in microamperes,
-	* has_external_strobe - determines whether the flash strobe source
-		can be switched to external,
-
-On remove the v4l2_flash_release function has to be called, which takes one
-argument - struct v4l2_flash pointer returned previously by v4l2_flash_init.
-This function can be safely called with NULL or error pointer argument.
-
-Please refer to drivers/leds/leds-max77693.c for an exemplary usage of the
-v4l2 flash wrapper.
-
-Once the V4L2 sub-device is registered by the driver which created the Media
-controller device, the sub-device node acts just as a node of a native V4L2
-flash API device would. The calls are simply routed to the LED flash API.
-
-Opening the V4L2 flash sub-device makes the LED subsystem sysfs interface
-unavailable. The interface is re-enabled after the V4L2 flash sub-device
-is closed.
diff --git a/Documentation/leds/leds-class.rst b/Documentation/leds/leds-class.rst
new file mode 100644
index 000000000000..df0120a1ee3c
--- /dev/null
+++ b/Documentation/leds/leds-class.rst
@@ -0,0 +1,125 @@
+========================
+LED handling under Linux
+========================
+
+In its simplest form, the LED class just allows control of LEDs from
+userspace. LEDs appear in /sys/class/leds/. The maximum brightness of the
+LED is defined in max_brightness file. The brightness file will set the brightness
+of the LED (taking a value 0-max_brightness). Most LEDs don't have hardware
+brightness support so will just be turned on for non-zero brightness settings.
+
+The class also introduces the optional concept of an LED trigger. A trigger
+is a kernel based source of led events. Triggers can either be simple or
+complex. A simple trigger isn't configurable and is designed to slot into
+existing subsystems with minimal additional code. Examples are the disk-activity,
+nand-disk and sharpsl-charge triggers. With led triggers disabled, the code
+optimises away.
+
+Complex triggers while available to all LEDs have LED specific
+parameters and work on a per LED basis. The timer trigger is an example.
+The timer trigger will periodically change the LED brightness between
+LED_OFF and the current brightness setting. The "on" and "off" time can
+be specified via /sys/class/leds/<device>/delay_{on,off} in milliseconds.
+You can change the brightness value of a LED independently of the timer
+trigger. However, if you set the brightness value to LED_OFF it will
+also disable the timer trigger.
+
+You can change triggers in a similar manner to the way an IO scheduler
+is chosen (via /sys/class/leds/<device>/trigger). Trigger specific
+parameters can appear in /sys/class/leds/<device> once a given trigger is
+selected.
+
+
+Design Philosophy
+=================
+
+The underlying design philosophy is simplicity. LEDs are simple devices
+and the aim is to keep a small amount of code giving as much functionality
+as possible.  Please keep this in mind when suggesting enhancements.
+
+
+LED Device Naming
+=================
+
+Is currently of the form:
+
+	"devicename:colour:function"
+
+There have been calls for LED properties such as colour to be exported as
+individual led class attributes. As a solution which doesn't incur as much
+overhead, I suggest these become part of the device name. The naming scheme
+above leaves scope for further attributes should they be needed. If sections
+of the name don't apply, just leave that section blank.
+
+
+Brightness setting API
+======================
+
+LED subsystem core exposes following API for setting brightness:
+
+    - led_set_brightness:
+		it is guaranteed not to sleep, passing LED_OFF stops
+		blinking,
+
+    - led_set_brightness_sync:
+		for use cases when immediate effect is desired -
+		it can block the caller for the time required for accessing
+		device registers and can sleep, passing LED_OFF stops hardware
+		blinking, returns -EBUSY if software blink fallback is enabled.
+
+
+LED registration API
+====================
+
+A driver wanting to register a LED classdev for use by other drivers /
+userspace needs to allocate and fill a led_classdev struct and then call
+`[devm_]led_classdev_register`. If the non devm version is used the driver
+must call led_classdev_unregister from its remove function before
+free-ing the led_classdev struct.
+
+If the driver can detect hardware initiated brightness changes and thus
+wants to have a brightness_hw_changed attribute then the LED_BRIGHT_HW_CHANGED
+flag must be set in flags before registering. Calling
+led_classdev_notify_brightness_hw_changed on a classdev not registered with
+the LED_BRIGHT_HW_CHANGED flag is a bug and will trigger a WARN_ON.
+
+Hardware accelerated blink of LEDs
+==================================
+
+Some LEDs can be programmed to blink without any CPU interaction. To
+support this feature, a LED driver can optionally implement the
+blink_set() function (see <linux/leds.h>). To set an LED to blinking,
+however, it is better to use the API function led_blink_set(), as it
+will check and implement software fallback if necessary.
+
+To turn off blinking, use the API function led_brightness_set()
+with brightness value LED_OFF, which should stop any software
+timers that may have been required for blinking.
+
+The blink_set() function should choose a user friendly blinking value
+if it is called with `*delay_on==0` && `*delay_off==0` parameters. In this
+case the driver should give back the chosen value through delay_on and
+delay_off parameters to the leds subsystem.
+
+Setting the brightness to zero with brightness_set() callback function
+should completely turn off the LED and cancel the previously programmed
+hardware blinking function, if any.
+
+
+Known Issues
+============
+
+The LED Trigger core cannot be a module as the simple trigger functions
+would cause nightmare dependency issues. I see this as a minor issue
+compared to the benefits the simple trigger functionality brings. The
+rest of the LED subsystem can be modular.
+
+
+Future Development
+==================
+
+At the moment, a trigger can't be created specifically for a single LED.
+There are a number of cases where a trigger might only be mappable to a
+particular LED (ACPI?). The addition of triggers provided by the LED driver
+should cover this option and be possible to add without breaking the
+current interface.
diff --git a/Documentation/leds/leds-class.txt b/Documentation/leds/leds-class.txt
deleted file mode 100644
index 8b39cc6b03ee..000000000000
--- a/Documentation/leds/leds-class.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-
-LED handling under Linux
-========================
-
-In its simplest form, the LED class just allows control of LEDs from
-userspace. LEDs appear in /sys/class/leds/. The maximum brightness of the
-LED is defined in max_brightness file. The brightness file will set the brightness
-of the LED (taking a value 0-max_brightness). Most LEDs don't have hardware
-brightness support so will just be turned on for non-zero brightness settings.
-
-The class also introduces the optional concept of an LED trigger. A trigger
-is a kernel based source of led events. Triggers can either be simple or
-complex. A simple trigger isn't configurable and is designed to slot into
-existing subsystems with minimal additional code. Examples are the disk-activity,
-nand-disk and sharpsl-charge triggers. With led triggers disabled, the code
-optimises away.
-
-Complex triggers while available to all LEDs have LED specific
-parameters and work on a per LED basis. The timer trigger is an example.
-The timer trigger will periodically change the LED brightness between
-LED_OFF and the current brightness setting. The "on" and "off" time can
-be specified via /sys/class/leds/<device>/delay_{on,off} in milliseconds.
-You can change the brightness value of a LED independently of the timer
-trigger. However, if you set the brightness value to LED_OFF it will
-also disable the timer trigger.
-
-You can change triggers in a similar manner to the way an IO scheduler
-is chosen (via /sys/class/leds/<device>/trigger). Trigger specific
-parameters can appear in /sys/class/leds/<device> once a given trigger is
-selected.
-
-
-Design Philosophy
-=================
-
-The underlying design philosophy is simplicity. LEDs are simple devices
-and the aim is to keep a small amount of code giving as much functionality
-as possible.  Please keep this in mind when suggesting enhancements.
-
-
-LED Device Naming
-=================
-
-Is currently of the form:
-
-"devicename:colour:function"
-
-There have been calls for LED properties such as colour to be exported as
-individual led class attributes. As a solution which doesn't incur as much
-overhead, I suggest these become part of the device name. The naming scheme
-above leaves scope for further attributes should they be needed. If sections
-of the name don't apply, just leave that section blank.
-
-
-Brightness setting API
-======================
-
-LED subsystem core exposes following API for setting brightness:
-
-    - led_set_brightness : it is guaranteed not to sleep, passing LED_OFF stops
-		blinking,
-    - led_set_brightness_sync : for use cases when immediate effect is desired -
-		it can block the caller for the time required for accessing
-		device registers and can sleep, passing LED_OFF stops hardware
-		blinking, returns -EBUSY if software blink fallback is enabled.
-
-
-LED registration API
-====================
-
-A driver wanting to register a LED classdev for use by other drivers /
-userspace needs to allocate and fill a led_classdev struct and then call
-[devm_]led_classdev_register. If the non devm version is used the driver
-must call led_classdev_unregister from its remove function before
-free-ing the led_classdev struct.
-
-If the driver can detect hardware initiated brightness changes and thus
-wants to have a brightness_hw_changed attribute then the LED_BRIGHT_HW_CHANGED
-flag must be set in flags before registering. Calling
-led_classdev_notify_brightness_hw_changed on a classdev not registered with
-the LED_BRIGHT_HW_CHANGED flag is a bug and will trigger a WARN_ON.
-
-Hardware accelerated blink of LEDs
-==================================
-
-Some LEDs can be programmed to blink without any CPU interaction. To
-support this feature, a LED driver can optionally implement the
-blink_set() function (see <linux/leds.h>). To set an LED to blinking,
-however, it is better to use the API function led_blink_set(), as it
-will check and implement software fallback if necessary.
-
-To turn off blinking, use the API function led_brightness_set()
-with brightness value LED_OFF, which should stop any software
-timers that may have been required for blinking.
-
-The blink_set() function should choose a user friendly blinking value
-if it is called with *delay_on==0 && *delay_off==0 parameters. In this
-case the driver should give back the chosen value through delay_on and
-delay_off parameters to the leds subsystem.
-
-Setting the brightness to zero with brightness_set() callback function
-should completely turn off the LED and cancel the previously programmed
-hardware blinking function, if any.
-
-
-Known Issues
-============
-
-The LED Trigger core cannot be a module as the simple trigger functions
-would cause nightmare dependency issues. I see this as a minor issue
-compared to the benefits the simple trigger functionality brings. The
-rest of the LED subsystem can be modular.
-
-
-Future Development
-==================
-
-At the moment, a trigger can't be created specifically for a single LED.
-There are a number of cases where a trigger might only be mappable to a
-particular LED (ACPI?). The addition of triggers provided by the LED driver
-should cover this option and be possible to add without breaking the
-current interface.
diff --git a/Documentation/leds/leds-lm3556.rst b/Documentation/leds/leds-lm3556.rst
new file mode 100644
index 000000000000..1ef17d7d800e
--- /dev/null
+++ b/Documentation/leds/leds-lm3556.rst
@@ -0,0 +1,137 @@
+========================
+Kernel driver for lm3556
+========================
+
+* Texas Instrument:
+  1.5 A Synchronous Boost LED Flash Driver w/ High-Side Current Source
+* Datasheet: http://www.national.com/ds/LM/LM3556.pdf
+
+Authors:
+      - Daniel Jeong
+
+	Contact:Daniel Jeong(daniel.jeong-at-ti.com, gshark.jeong-at-gmail.com)
+
+Description
+-----------
+There are 3 functions in LM3556, Flash, Torch and Indicator.
+
+Flash Mode
+^^^^^^^^^^
+
+In Flash Mode, the LED current source(LED) provides 16 target current levels
+from 93.75 mA to 1500 mA.The Flash currents are adjusted via the CURRENT
+CONTROL REGISTER(0x09).Flash mode is activated by the ENABLE REGISTER(0x0A),
+or by pulling the STROBE pin HIGH.
+
+LM3556 Flash can be controlled through sys/class/leds/flash/brightness file
+
+* if STROBE pin is enabled, below example control brightness only, and
+  ON / OFF will be controlled by STROBE pin.
+
+Flash Example:
+
+OFF::
+
+	#echo 0 > sys/class/leds/flash/brightness
+
+93.75 mA::
+
+	#echo 1 > sys/class/leds/flash/brightness
+
+...
+
+1500  mA::
+
+	#echo 16 > sys/class/leds/flash/brightness
+
+Torch Mode
+^^^^^^^^^^
+
+In Torch Mode, the current source(LED) is programmed via the CURRENT CONTROL
+REGISTER(0x09).Torch Mode is activated by the ENABLE REGISTER(0x0A) or by the
+hardware TORCH input.
+
+LM3556 torch can be controlled through sys/class/leds/torch/brightness file.
+* if TORCH pin is enabled, below example control brightness only,
+and ON / OFF will be controlled by TORCH pin.
+
+Torch Example:
+
+OFF::
+
+	#echo 0 > sys/class/leds/torch/brightness
+
+46.88 mA::
+
+	#echo 1 > sys/class/leds/torch/brightness
+
+...
+
+375 mA::
+
+	#echo 8 > sys/class/leds/torch/brightness
+
+Indicator Mode
+^^^^^^^^^^^^^^
+
+Indicator pattern can be set through sys/class/leds/indicator/pattern file,
+and 4 patterns are pre-defined in indicator_pattern array.
+
+According to N-lank, Pulse time and N Period values, different pattern wiill
+be generated.If you want new patterns for your own device, change
+indicator_pattern array with your own values and INDIC_PATTERN_SIZE.
+
+Please refer datasheet for more detail about N-Blank, Pulse time and N Period.
+
+Indicator pattern example:
+
+pattern 0::
+
+	#echo 0 > sys/class/leds/indicator/pattern
+
+...
+
+pattern 3::
+
+	#echo 3 > sys/class/leds/indicator/pattern
+
+Indicator brightness can be controlled through
+sys/class/leds/indicator/brightness file.
+
+Example:
+
+OFF::
+
+	#echo 0 > sys/class/leds/indicator/brightness
+
+5.86 mA::
+
+	#echo 1 > sys/class/leds/indicator/brightness
+
+...
+
+46.875mA::
+
+	#echo 8 > sys/class/leds/indicator/brightness
+
+Notes
+-----
+Driver expects it is registered using the i2c_board_info mechanism.
+To register the chip at address 0x63 on specific adapter, set the platform data
+according to include/linux/platform_data/leds-lm3556.h, set the i2c board info
+
+Example::
+
+	static struct i2c_board_info board_i2c_ch4[] __initdata = {
+		{
+			 I2C_BOARD_INFO(LM3556_NAME, 0x63),
+			 .platform_data = &lm3556_pdata,
+		 },
+	};
+
+and register it in the platform init function
+
+Example::
+
+	board_register_i2c_bus(4, 400,
+				board_i2c_ch4, ARRAY_SIZE(board_i2c_ch4));
diff --git a/Documentation/leds/leds-lm3556.txt b/Documentation/leds/leds-lm3556.txt
deleted file mode 100644
index 62278e871b50..000000000000
--- a/Documentation/leds/leds-lm3556.txt
+++ /dev/null
@@ -1,85 +0,0 @@
-Kernel driver for lm3556
-========================
-
-*Texas Instrument:
- 1.5 A Synchronous Boost LED Flash Driver w/ High-Side Current Source
-* Datasheet: http://www.national.com/ds/LM/LM3556.pdf
-
-Authors:
-	Daniel Jeong
-	Contact:Daniel Jeong(daniel.jeong-at-ti.com, gshark.jeong-at-gmail.com)
-
-Description
------------
-There are 3 functions in LM3556, Flash, Torch and Indicator.
-
-FLASH MODE
-In Flash Mode, the LED current source(LED) provides 16 target current levels
-from 93.75 mA to 1500 mA.The Flash currents are adjusted via the CURRENT
-CONTROL REGISTER(0x09).Flash mode is activated by the ENABLE REGISTER(0x0A),
-or by pulling the STROBE pin HIGH.
-LM3556 Flash can be controlled through sys/class/leds/flash/brightness file
-* if STROBE pin is enabled, below example control brightness only, and
-ON / OFF will be controlled by STROBE pin.
-
-Flash Example:
-OFF     : #echo 0 > sys/class/leds/flash/brightness
-93.75 mA: #echo 1 > sys/class/leds/flash/brightness
-... .....
-1500  mA: #echo 16 > sys/class/leds/flash/brightness
-
-TORCH MODE
-In Torch Mode, the current source(LED) is programmed via the CURRENT CONTROL
-REGISTER(0x09).Torch Mode is activated by the ENABLE REGISTER(0x0A) or by the
-hardware TORCH input.
-LM3556 torch can be controlled through sys/class/leds/torch/brightness file.
-* if TORCH pin is enabled, below example control brightness only,
-and ON / OFF will be controlled by TORCH pin.
-
-Torch Example:
-OFF     : #echo 0 > sys/class/leds/torch/brightness
-46.88 mA: #echo 1 > sys/class/leds/torch/brightness
-... .....
-375 mA  : #echo 8 > sys/class/leds/torch/brightness
-
-INDICATOR MODE
-Indicator pattern can be set through sys/class/leds/indicator/pattern file,
-and 4 patterns are pre-defined in indicator_pattern array.
-According to N-lank, Pulse time and N Period values, different pattern wiill
-be generated.If you want new patterns for your own device, change
-indicator_pattern array with your own values and INDIC_PATTERN_SIZE.
-Please refer datasheet for more detail about N-Blank, Pulse time and N Period.
-
-Indicator pattern example:
-pattern 0: #echo 0 > sys/class/leds/indicator/pattern
-....
-pattern 3: #echo 3 > sys/class/leds/indicator/pattern
-
-Indicator brightness can be controlled through
-sys/class/leds/indicator/brightness file.
-
-Example:
-OFF      : #echo 0 > sys/class/leds/indicator/brightness
-5.86 mA  : #echo 1 > sys/class/leds/indicator/brightness
-........
-46.875mA : #echo 8 > sys/class/leds/indicator/brightness
-
-Notes
------
-Driver expects it is registered using the i2c_board_info mechanism.
-To register the chip at address 0x63 on specific adapter, set the platform data
-according to include/linux/platform_data/leds-lm3556.h, set the i2c board info
-
-Example:
-	static struct i2c_board_info board_i2c_ch4[] __initdata = {
-		{
-			 I2C_BOARD_INFO(LM3556_NAME, 0x63),
-			 .platform_data = &lm3556_pdata,
-		 },
-	};
-
-and register it in the platform init function
-
-Example:
-	board_register_i2c_bus(4, 400,
-				board_i2c_ch4, ARRAY_SIZE(board_i2c_ch4));
diff --git a/Documentation/leds/leds-lp3944.rst b/Documentation/leds/leds-lp3944.rst
new file mode 100644
index 000000000000..c2f87dc1a3a9
--- /dev/null
+++ b/Documentation/leds/leds-lp3944.rst
@@ -0,0 +1,59 @@
+====================
+Kernel driver lp3944
+====================
+
+  * National Semiconductor LP3944 Fun-light Chip
+
+    Prefix: 'lp3944'
+
+    Addresses scanned: None (see the Notes section below)
+
+    Datasheet:
+
+	Publicly available at the National Semiconductor website
+	http://www.national.com/pf/LP/LP3944.html
+
+Authors:
+	Antonio Ospite <ospite@studenti.unina.it>
+
+
+Description
+-----------
+The LP3944 is a helper chip that can drive up to 8 leds, with two programmable
+DIM modes; it could even be used as a gpio expander but this driver assumes it
+is used as a led controller.
+
+The DIM modes are used to set _blink_ patterns for leds, the pattern is
+specified supplying two parameters:
+
+  - period:
+	from 0s to 1.6s
+  - duty cycle:
+	percentage of the period the led is on, from 0 to 100
+
+Setting a led in DIM0 or DIM1 mode makes it blink according to the pattern.
+See the datasheet for details.
+
+LP3944 can be found on Motorola A910 smartphone, where it drives the rgb
+leds, the camera flash light and the lcds power.
+
+
+Notes
+-----
+The chip is used mainly in embedded contexts, so this driver expects it is
+registered using the i2c_board_info mechanism.
+
+To register the chip at address 0x60 on adapter 0, set the platform data
+according to include/linux/leds-lp3944.h, set the i2c board info::
+
+	static struct i2c_board_info a910_i2c_board_info[] __initdata = {
+		{
+			I2C_BOARD_INFO("lp3944", 0x60),
+			.platform_data = &a910_lp3944_leds,
+		},
+	};
+
+and register it in the platform init function::
+
+	i2c_register_board_info(0, a910_i2c_board_info,
+			ARRAY_SIZE(a910_i2c_board_info));
diff --git a/Documentation/leds/leds-lp3944.txt b/Documentation/leds/leds-lp3944.txt
deleted file mode 100644
index e88ac3b60c08..000000000000
--- a/Documentation/leds/leds-lp3944.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-Kernel driver lp3944
-====================
-
-  * National Semiconductor LP3944 Fun-light Chip
-    Prefix: 'lp3944'
-    Addresses scanned: None (see the Notes section below)
-    Datasheet: Publicly available at the National Semiconductor website
-               http://www.national.com/pf/LP/LP3944.html
-
-Authors:
-        Antonio Ospite <ospite@studenti.unina.it>
-
-
-Description
------------
-The LP3944 is a helper chip that can drive up to 8 leds, with two programmable
-DIM modes; it could even be used as a gpio expander but this driver assumes it
-is used as a led controller.
-
-The DIM modes are used to set _blink_ patterns for leds, the pattern is
-specified supplying two parameters:
-  - period: from 0s to 1.6s
-  - duty cycle: percentage of the period the led is on, from 0 to 100
-
-Setting a led in DIM0 or DIM1 mode makes it blink according to the pattern.
-See the datasheet for details.
-
-LP3944 can be found on Motorola A910 smartphone, where it drives the rgb
-leds, the camera flash light and the lcds power.
-
-
-Notes
------
-The chip is used mainly in embedded contexts, so this driver expects it is
-registered using the i2c_board_info mechanism.
-
-To register the chip at address 0x60 on adapter 0, set the platform data
-according to include/linux/leds-lp3944.h, set the i2c board info:
-
-	static struct i2c_board_info a910_i2c_board_info[] __initdata = {
-		{
-			I2C_BOARD_INFO("lp3944", 0x60),
-			.platform_data = &a910_lp3944_leds,
-		},
-	};
-
-and register it in the platform init function
-
-	i2c_register_board_info(0, a910_i2c_board_info,
-			ARRAY_SIZE(a910_i2c_board_info));
diff --git a/Documentation/leds/leds-lp5521.rst b/Documentation/leds/leds-lp5521.rst
new file mode 100644
index 000000000000..0432615b083d
--- /dev/null
+++ b/Documentation/leds/leds-lp5521.rst
@@ -0,0 +1,115 @@
+========================
+Kernel driver for lp5521
+========================
+
+* National Semiconductor LP5521 led driver chip
+* Datasheet: http://www.national.com/pf/LP/LP5521.html
+
+Authors: Mathias Nyman, Yuri Zaporozhets, Samu Onkalo
+
+Contact: Samu Onkalo (samu.p.onkalo-at-nokia.com)
+
+Description
+-----------
+
+LP5521 can drive up to 3 channels. Leds can be controlled directly via
+the led class control interface. Channels have generic names:
+lp5521:channelx, where x is 0 .. 2
+
+All three channels can be also controlled using the engine micro programs.
+More details of the instructions can be found from the public data sheet.
+
+LP5521 has the internal program memory for running various LED patterns.
+There are two ways to run LED patterns.
+
+1) Legacy interface - enginex_mode and enginex_load
+   Control interface for the engines:
+
+   x is 1 .. 3
+
+   enginex_mode:
+	disabled, load, run
+   enginex_load:
+	store program (visible only in engine load mode)
+
+  Example (start to blink the channel 2 led)::
+
+	cd   /sys/class/leds/lp5521:channel2/device
+	echo "load" > engine3_mode
+	echo "037f4d0003ff6000" > engine3_load
+	echo "run" > engine3_mode
+
+  To stop the engine::
+
+	echo "disabled" > engine3_mode
+
+2) Firmware interface - LP55xx common interface
+
+For the details, please refer to 'firmware' section in leds-lp55xx.txt
+
+sysfs contains a selftest entry.
+
+The test communicates with the chip and checks that
+the clock mode is automatically set to the requested one.
+
+Each channel has its own led current settings.
+
+- /sys/class/leds/lp5521:channel0/led_current - RW
+- /sys/class/leds/lp5521:channel0/max_current - RO
+
+Format: 10x mA i.e 10 means 1.0 mA
+
+example platform data::
+
+  static struct lp55xx_led_config lp5521_led_config[] = {
+	  {
+		.name = "red",
+		  .chan_nr        = 0,
+		  .led_current    = 50,
+		.max_current    = 130,
+	  }, {
+		.name = "green",
+		  .chan_nr        = 1,
+		  .led_current    = 0,
+		.max_current    = 130,
+	  }, {
+		.name = "blue",
+		  .chan_nr        = 2,
+		  .led_current    = 0,
+		.max_current    = 130,
+	  }
+  };
+
+  static int lp5521_setup(void)
+  {
+	/* setup HW resources */
+  }
+
+  static void lp5521_release(void)
+  {
+	/* Release HW resources */
+  }
+
+  static void lp5521_enable(bool state)
+  {
+	/* Control of chip enable signal */
+  }
+
+  static struct lp55xx_platform_data lp5521_platform_data = {
+	  .led_config     = lp5521_led_config,
+	  .num_channels   = ARRAY_SIZE(lp5521_led_config),
+	  .clock_mode     = LP55XX_CLOCK_EXT,
+	  .setup_resources   = lp5521_setup,
+	  .release_resources = lp5521_release,
+	  .enable            = lp5521_enable,
+  };
+
+Note:
+  chan_nr can have values between 0 and 2.
+  The name of each channel can be configurable.
+  If the name field is not defined, the default name will be set to 'xxxx:channelN'
+  (XXXX : pdata->label or i2c client name, N : channel number)
+
+
+If the current is set to 0 in the platform data, that channel is
+disabled and it is not visible in the sysfs.
diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt
deleted file mode 100644
index d08d8c179f85..000000000000
--- a/Documentation/leds/leds-lp5521.txt
+++ /dev/null
@@ -1,101 +0,0 @@
-Kernel driver for lp5521
-========================
-
-* National Semiconductor LP5521 led driver chip
-* Datasheet: http://www.national.com/pf/LP/LP5521.html
-
-Authors: Mathias Nyman, Yuri Zaporozhets, Samu Onkalo
-Contact: Samu Onkalo (samu.p.onkalo-at-nokia.com)
-
-Description
------------
-
-LP5521 can drive up to 3 channels. Leds can be controlled directly via
-the led class control interface. Channels have generic names:
-lp5521:channelx, where x is 0 .. 2
-
-All three channels can be also controlled using the engine micro programs.
-More details of the instructions can be found from the public data sheet.
-
-LP5521 has the internal program memory for running various LED patterns.
-There are two ways to run LED patterns.
-
-1) Legacy interface - enginex_mode and enginex_load
-  Control interface for the engines:
-  x is 1 .. 3
-  enginex_mode : disabled, load, run
-  enginex_load : store program (visible only in engine load mode)
-
-  Example (start to blink the channel 2 led):
-  cd   /sys/class/leds/lp5521:channel2/device
-  echo "load" > engine3_mode
-  echo "037f4d0003ff6000" > engine3_load
-  echo "run" > engine3_mode
-
-  To stop the engine:
-  echo "disabled" > engine3_mode
-
-2) Firmware interface - LP55xx common interface
-  For the details, please refer to 'firmware' section in leds-lp55xx.txt
-
-sysfs contains a selftest entry.
-The test communicates with the chip and checks that
-the clock mode is automatically set to the requested one.
-
-Each channel has its own led current settings.
-/sys/class/leds/lp5521:channel0/led_current - RW
-/sys/class/leds/lp5521:channel0/max_current - RO
-Format: 10x mA i.e 10 means 1.0 mA
-
-example platform data:
-
-Note: chan_nr can have values between 0 and 2.
-The name of each channel can be configurable.
-If the name field is not defined, the default name will be set to 'xxxx:channelN'
-(XXXX : pdata->label or i2c client name, N : channel number)
-
-static struct lp55xx_led_config lp5521_led_config[] = {
-        {
-		.name = "red",
-                .chan_nr        = 0,
-                .led_current    = 50,
-		.max_current    = 130,
-        }, {
-		.name = "green",
-                .chan_nr        = 1,
-                .led_current    = 0,
-		.max_current    = 130,
-        }, {
-		.name = "blue",
-                .chan_nr        = 2,
-                .led_current    = 0,
-		.max_current    = 130,
-        }
-};
-
-static int lp5521_setup(void)
-{
-	/* setup HW resources */
-}
-
-static void lp5521_release(void)
-{
-	/* Release HW resources */
-}
-
-static void lp5521_enable(bool state)
-{
-	/* Control of chip enable signal */
-}
-
-static struct lp55xx_platform_data lp5521_platform_data = {
-        .led_config     = lp5521_led_config,
-        .num_channels   = ARRAY_SIZE(lp5521_led_config),
-        .clock_mode     = LP55XX_CLOCK_EXT,
-        .setup_resources   = lp5521_setup,
-        .release_resources = lp5521_release,
-        .enable            = lp5521_enable,
-};
-
-If the current is set to 0 in the platform data, that channel is
-disabled and it is not visible in the sysfs.
diff --git a/Documentation/leds/leds-lp5523.rst b/Documentation/leds/leds-lp5523.rst
new file mode 100644
index 000000000000..7d7362a1dd57
--- /dev/null
+++ b/Documentation/leds/leds-lp5523.rst
@@ -0,0 +1,147 @@
+========================
+Kernel driver for lp5523
+========================
+
+* National Semiconductor LP5523 led driver chip
+* Datasheet: http://www.national.com/pf/LP/LP5523.html
+
+Authors: Mathias Nyman, Yuri Zaporozhets, Samu Onkalo
+Contact: Samu Onkalo (samu.p.onkalo-at-nokia.com)
+
+Description
+-----------
+LP5523 can drive up to 9 channels. Leds can be controlled directly via
+the led class control interface.
+The name of each channel is configurable in the platform data - name and label.
+There are three options to make the channel name.
+
+a) Define the 'name' in the platform data
+
+To make specific channel name, then use 'name' platform data.
+
+- /sys/class/leds/R1               (name: 'R1')
+- /sys/class/leds/B1               (name: 'B1')
+
+b) Use the 'label' with no 'name' field
+
+For one device name with channel number, then use 'label'.
+- /sys/class/leds/RGB:channelN     (label: 'RGB', N: 0 ~ 8)
+
+c) Default
+
+If both fields are NULL, 'lp5523' is used by default.
+- /sys/class/leds/lp5523:channelN  (N: 0 ~ 8)
+
+LP5523 has the internal program memory for running various LED patterns.
+There are two ways to run LED patterns.
+
+1) Legacy interface - enginex_mode, enginex_load and enginex_leds
+
+  Control interface for the engines:
+
+  x is 1 .. 3
+
+  enginex_mode:
+	disabled, load, run
+  enginex_load:
+	microcode load
+  enginex_leds:
+	led mux control
+
+  ::
+
+	cd /sys/class/leds/lp5523:channel2/device
+	echo "load" > engine3_mode
+	echo "9d80400004ff05ff437f0000" > engine3_load
+	echo "111111111" > engine3_leds
+	echo "run" > engine3_mode
+
+  To stop the engine::
+
+	echo "disabled" > engine3_mode
+
+2) Firmware interface - LP55xx common interface
+
+For the details, please refer to 'firmware' section in leds-lp55xx.txt
+
+LP5523 has three master faders. If a channel is mapped to one of
+the master faders, its output is dimmed based on the value of the master
+fader.
+
+For example::
+
+  echo "123000123" > master_fader_leds
+
+creates the following channel-fader mappings::
+
+  channel 0,6 to master_fader1
+  channel 1,7 to master_fader2
+  channel 2,8 to master_fader3
+
+Then, to have 25% of the original output on channel 0,6::
+
+  echo 64 > master_fader1
+
+To have 0% of the original output (i.e. no output) channel 1,7::
+
+  echo 0 > master_fader2
+
+To have 100% of the original output (i.e. no dimming) on channel 2,8::
+
+  echo 255 > master_fader3
+
+To clear all master fader controls::
+
+  echo "000000000" > master_fader_leds
+
+Selftest uses always the current from the platform data.
+
+Each channel contains led current settings.
+- /sys/class/leds/lp5523:channel2/led_current - RW
+- /sys/class/leds/lp5523:channel2/max_current - RO
+
+Format: 10x mA i.e 10 means 1.0 mA
+
+Example platform data::
+
+	static struct lp55xx_led_config lp5523_led_config[] = {
+		{
+			.name		= "D1",
+			.chan_nr        = 0,
+			.led_current    = 50,
+			.max_current    = 130,
+		},
+	...
+		{
+			.chan_nr        = 8,
+			.led_current    = 50,
+			.max_current    = 130,
+		}
+	};
+
+	static int lp5523_setup(void)
+	{
+		/* Setup HW resources */
+	}
+
+	static void lp5523_release(void)
+	{
+		/* Release HW resources */
+	}
+
+	static void lp5523_enable(bool state)
+	{
+		/* Control chip enable signal */
+	}
+
+	static struct lp55xx_platform_data lp5523_platform_data = {
+		.led_config     = lp5523_led_config,
+		.num_channels   = ARRAY_SIZE(lp5523_led_config),
+		.clock_mode     = LP55XX_CLOCK_EXT,
+		.setup_resources   = lp5523_setup,
+		.release_resources = lp5523_release,
+		.enable            = lp5523_enable,
+	};
+
+Note
+  chan_nr can have values between 0 and 8.
diff --git a/Documentation/leds/leds-lp5523.txt b/Documentation/leds/leds-lp5523.txt
deleted file mode 100644
index 0961a060fc4d..000000000000
--- a/Documentation/leds/leds-lp5523.txt
+++ /dev/null
@@ -1,130 +0,0 @@
-Kernel driver for lp5523
-========================
-
-* National Semiconductor LP5523 led driver chip
-* Datasheet: http://www.national.com/pf/LP/LP5523.html
-
-Authors: Mathias Nyman, Yuri Zaporozhets, Samu Onkalo
-Contact: Samu Onkalo (samu.p.onkalo-at-nokia.com)
-
-Description
------------
-LP5523 can drive up to 9 channels. Leds can be controlled directly via
-the led class control interface.
-The name of each channel is configurable in the platform data - name and label.
-There are three options to make the channel name.
-
-a) Define the 'name' in the platform data
-To make specific channel name, then use 'name' platform data.
-/sys/class/leds/R1               (name: 'R1')
-/sys/class/leds/B1               (name: 'B1')
-
-b) Use the 'label' with no 'name' field
-For one device name with channel number, then use 'label'.
-/sys/class/leds/RGB:channelN     (label: 'RGB', N: 0 ~ 8)
-
-c) Default
-If both fields are NULL, 'lp5523' is used by default.
-/sys/class/leds/lp5523:channelN  (N: 0 ~ 8)
-
-LP5523 has the internal program memory for running various LED patterns.
-There are two ways to run LED patterns.
-
-1) Legacy interface - enginex_mode, enginex_load and enginex_leds
-  Control interface for the engines:
-  x is 1 .. 3
-  enginex_mode : disabled, load, run
-  enginex_load : microcode load
-  enginex_leds : led mux control
-
-  cd /sys/class/leds/lp5523:channel2/device
-  echo "load" > engine3_mode
-  echo "9d80400004ff05ff437f0000" > engine3_load
-  echo "111111111" > engine3_leds
-  echo "run" > engine3_mode
-
-  To stop the engine:
-  echo "disabled" > engine3_mode
-
-2) Firmware interface - LP55xx common interface
-  For the details, please refer to 'firmware' section in leds-lp55xx.txt
-
-LP5523 has three master faders. If a channel is mapped to one of
-the master faders, its output is dimmed based on the value of the master
-fader.
-
-For example,
-
-  echo "123000123" > master_fader_leds
-
-creates the following channel-fader mappings:
-
-  channel 0,6 to master_fader1
-  channel 1,7 to master_fader2
-  channel 2,8 to master_fader3
-
-Then, to have 25% of the original output on channel 0,6:
-
-  echo 64 > master_fader1
-
-To have 0% of the original output (i.e. no output) channel 1,7:
-
-  echo 0 > master_fader2
-
-To have 100% of the original output (i.e. no dimming) on channel 2,8:
-
-  echo 255 > master_fader3
-
-To clear all master fader controls:
-
-  echo "000000000" > master_fader_leds
-
-Selftest uses always the current from the platform data.
-
-Each channel contains led current settings.
-/sys/class/leds/lp5523:channel2/led_current - RW
-/sys/class/leds/lp5523:channel2/max_current - RO
-Format: 10x mA i.e 10 means 1.0 mA
-
-Example platform data:
-
-Note - chan_nr can have values between 0 and 8.
-
-static struct lp55xx_led_config lp5523_led_config[] = {
-        {
-		.name		= "D1",
-                .chan_nr        = 0,
-                .led_current    = 50,
-		.max_current    = 130,
-        },
-...
-        {
-                .chan_nr        = 8,
-                .led_current    = 50,
-		.max_current    = 130,
-        }
-};
-
-static int lp5523_setup(void)
-{
-	/* Setup HW resources */
-}
-
-static void lp5523_release(void)
-{
-	/* Release HW resources */
-}
-
-static void lp5523_enable(bool state)
-{
-	/* Control chip enable signal */
-}
-
-static struct lp55xx_platform_data lp5523_platform_data = {
-        .led_config     = lp5523_led_config,
-        .num_channels   = ARRAY_SIZE(lp5523_led_config),
-        .clock_mode     = LP55XX_CLOCK_EXT,
-        .setup_resources   = lp5523_setup,
-        .release_resources = lp5523_release,
-        .enable            = lp5523_enable,
-};
diff --git a/Documentation/leds/leds-lp5562.rst b/Documentation/leds/leds-lp5562.rst
new file mode 100644
index 000000000000..79bbb2487ff6
--- /dev/null
+++ b/Documentation/leds/leds-lp5562.rst
@@ -0,0 +1,137 @@
+========================
+Kernel driver for lp5562
+========================
+
+* TI LP5562 LED Driver
+
+Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+
+Description
+===========
+
+  LP5562 can drive up to 4 channels. R/G/B and White.
+  LEDs can be controlled directly via the led class control interface.
+
+  All four channels can be also controlled using the engine micro programs.
+  LP5562 has the internal program memory for running various LED patterns.
+  For the details, please refer to 'firmware' section in leds-lp55xx.txt
+
+Device attribute
+================
+
+engine_mux
+  3 Engines are allocated in LP5562, but the number of channel is 4.
+  Therefore each channel should be mapped to the engine number.
+
+  Value: RGB or W
+
+  This attribute is used for programming LED data with the firmware interface.
+  Unlike the LP5521/LP5523/55231, LP5562 has unique feature for the engine mux,
+  so additional sysfs is required
+
+  LED Map
+
+  ===== === ===============================
+  Red   ... Engine 1 (fixed)
+  Green ... Engine 2 (fixed)
+  Blue  ... Engine 3 (fixed)
+  White ... Engine 1 or 2 or 3 (selective)
+  ===== === ===============================
+
+How to load the program data using engine_mux
+=============================================
+
+  Before loading the LP5562 program data, engine_mux should be written between
+  the engine selection and loading the firmware.
+  Engine mux has two different mode, RGB and W.
+  RGB is used for loading RGB program data, W is used for W program data.
+
+  For example, run blinking green channel pattern::
+
+    echo 2 > /sys/bus/i2c/devices/xxxx/select_engine     # 2 is for green channel
+    echo "RGB" > /sys/bus/i2c/devices/xxxx/engine_mux    # engine mux for RGB
+    echo 1 > /sys/class/firmware/lp5562/loading
+    echo "4000600040FF6000" > /sys/class/firmware/lp5562/data
+    echo 0 > /sys/class/firmware/lp5562/loading
+    echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+  To run a blinking white pattern::
+
+    echo 1 or 2 or 3 > /sys/bus/i2c/devices/xxxx/select_engine
+    echo "W" > /sys/bus/i2c/devices/xxxx/engine_mux
+    echo 1 > /sys/class/firmware/lp5562/loading
+    echo "4000600040FF6000" > /sys/class/firmware/lp5562/data
+    echo 0 > /sys/class/firmware/lp5562/loading
+    echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+How to load the predefined patterns
+===================================
+
+  Please refer to 'leds-lp55xx.txt"
+
+Setting Current of Each Channel
+===============================
+
+  Like LP5521 and LP5523/55231, LP5562 provides LED current settings.
+  The 'led_current' and 'max_current' are used.
+
+Example of Platform data
+========================
+
+::
+
+	static struct lp55xx_led_config lp5562_led_config[] = {
+		{
+			.name 		= "R",
+			.chan_nr	= 0,
+			.led_current	= 20,
+			.max_current	= 40,
+		},
+		{
+			.name 		= "G",
+			.chan_nr	= 1,
+			.led_current	= 20,
+			.max_current	= 40,
+		},
+		{
+			.name 		= "B",
+			.chan_nr	= 2,
+			.led_current	= 20,
+			.max_current	= 40,
+		},
+		{
+			.name 		= "W",
+			.chan_nr	= 3,
+			.led_current	= 20,
+			.max_current	= 40,
+		},
+	};
+
+	static int lp5562_setup(void)
+	{
+		/* setup HW resources */
+	}
+
+	static void lp5562_release(void)
+	{
+		/* Release HW resources */
+	}
+
+	static void lp5562_enable(bool state)
+	{
+		/* Control of chip enable signal */
+	}
+
+	static struct lp55xx_platform_data lp5562_platform_data = {
+		.led_config     = lp5562_led_config,
+		.num_channels   = ARRAY_SIZE(lp5562_led_config),
+		.setup_resources   = lp5562_setup,
+		.release_resources = lp5562_release,
+		.enable            = lp5562_enable,
+	};
+
+To configure the platform specific data, lp55xx_platform_data structure is used
+
+
+If the current is set to 0 in the platform data, that channel is
+disabled and it is not visible in the sysfs.
diff --git a/Documentation/leds/leds-lp5562.txt b/Documentation/leds/leds-lp5562.txt
deleted file mode 100644
index 5a823ff6b393..000000000000
--- a/Documentation/leds/leds-lp5562.txt
+++ /dev/null
@@ -1,120 +0,0 @@
-Kernel driver for LP5562
-========================
-
-* TI LP5562 LED Driver
-
-Author: Milo(Woogyom) Kim <milo.kim@ti.com>
-
-Description
-
-  LP5562 can drive up to 4 channels. R/G/B and White.
-  LEDs can be controlled directly via the led class control interface.
-
-  All four channels can be also controlled using the engine micro programs.
-  LP5562 has the internal program memory for running various LED patterns.
-  For the details, please refer to 'firmware' section in leds-lp55xx.txt
-
-Device attribute: engine_mux
-
-  3 Engines are allocated in LP5562, but the number of channel is 4.
-  Therefore each channel should be mapped to the engine number.
-  Value : RGB or W
-
-  This attribute is used for programming LED data with the firmware interface.
-  Unlike the LP5521/LP5523/55231, LP5562 has unique feature for the engine mux,
-  so additional sysfs is required.
-
-  LED Map
-  Red   ... Engine 1 (fixed)
-  Green ... Engine 2 (fixed)
-  Blue  ... Engine 3 (fixed)
-  White ... Engine 1 or 2 or 3 (selective)
-
-How to load the program data using engine_mux
-
-  Before loading the LP5562 program data, engine_mux should be written between
-  the engine selection and loading the firmware.
-  Engine mux has two different mode, RGB and W.
-  RGB is used for loading RGB program data, W is used for W program data.
-
-  For example, run blinking green channel pattern,
-  echo 2 > /sys/bus/i2c/devices/xxxx/select_engine     # 2 is for green channel
-  echo "RGB" > /sys/bus/i2c/devices/xxxx/engine_mux    # engine mux for RGB
-  echo 1 > /sys/class/firmware/lp5562/loading
-  echo "4000600040FF6000" > /sys/class/firmware/lp5562/data
-  echo 0 > /sys/class/firmware/lp5562/loading
-  echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
-
-  To run a blinking white pattern,
-  echo 1 or 2 or 3 > /sys/bus/i2c/devices/xxxx/select_engine
-  echo "W" > /sys/bus/i2c/devices/xxxx/engine_mux
-  echo 1 > /sys/class/firmware/lp5562/loading
-  echo "4000600040FF6000" > /sys/class/firmware/lp5562/data
-  echo 0 > /sys/class/firmware/lp5562/loading
-  echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
-
-How to load the predefined patterns
-
-  Please refer to 'leds-lp55xx.txt"
-
-Setting Current of Each Channel
-
-  Like LP5521 and LP5523/55231, LP5562 provides LED current settings.
-  The 'led_current' and 'max_current' are used.
-
-(Example of Platform data)
-
-To configure the platform specific data, lp55xx_platform_data structure is used.
-
-static struct lp55xx_led_config lp5562_led_config[] = {
-	{
-		.name 		= "R",
-		.chan_nr	= 0,
-		.led_current	= 20,
-		.max_current	= 40,
-	},
-	{
-		.name 		= "G",
-		.chan_nr	= 1,
-		.led_current	= 20,
-		.max_current	= 40,
-	},
-	{
-		.name 		= "B",
-		.chan_nr	= 2,
-		.led_current	= 20,
-		.max_current	= 40,
-	},
-	{
-		.name 		= "W",
-		.chan_nr	= 3,
-		.led_current	= 20,
-		.max_current	= 40,
-	},
-};
-
-static int lp5562_setup(void)
-{
-	/* setup HW resources */
-}
-
-static void lp5562_release(void)
-{
-	/* Release HW resources */
-}
-
-static void lp5562_enable(bool state)
-{
-	/* Control of chip enable signal */
-}
-
-static struct lp55xx_platform_data lp5562_platform_data = {
-        .led_config     = lp5562_led_config,
-        .num_channels   = ARRAY_SIZE(lp5562_led_config),
-        .setup_resources   = lp5562_setup,
-        .release_resources = lp5562_release,
-        .enable            = lp5562_enable,
-};
-
-If the current is set to 0 in the platform data, that channel is
-disabled and it is not visible in the sysfs.
diff --git a/Documentation/leds/leds-lp55xx.rst b/Documentation/leds/leds-lp55xx.rst
new file mode 100644
index 000000000000..632e41cec0b5
--- /dev/null
+++ b/Documentation/leds/leds-lp55xx.rst
@@ -0,0 +1,224 @@
+=================================================
+LP5521/LP5523/LP55231/LP5562/LP8501 Common Driver
+=================================================
+
+Authors: Milo(Woogyom) Kim <milo.kim@ti.com>
+
+Description
+-----------
+LP5521, LP5523/55231, LP5562 and LP8501 have common features as below.
+
+  Register access via the I2C
+  Device initialization/deinitialization
+  Create LED class devices for multiple output channels
+  Device attributes for user-space interface
+  Program memory for running LED patterns
+
+The LP55xx common driver provides these features using exported functions.
+
+  lp55xx_init_device() / lp55xx_deinit_device()
+  lp55xx_register_leds() / lp55xx_unregister_leds()
+  lp55xx_regsister_sysfs() / lp55xx_unregister_sysfs()
+
+( Driver Structure Data )
+
+In lp55xx common driver, two different data structure is used.
+
+* lp55xx_led
+    control multi output LED channels such as led current, channel index.
+* lp55xx_chip
+    general chip control such like the I2C and platform data.
+
+For example, LP5521 has maximum 3 LED channels.
+LP5523/55231 has 9 output channels::
+
+  lp55xx_chip for LP5521 ... lp55xx_led #1
+			     lp55xx_led #2
+			     lp55xx_led #3
+
+  lp55xx_chip for LP5523 ... lp55xx_led #1
+			     lp55xx_led #2
+				   .
+				   .
+			     lp55xx_led #9
+
+( Chip Dependent Code )
+
+To support device specific configurations, special structure
+'lpxx_device_config' is used.
+
+  - Maximum number of channels
+  - Reset command, chip enable command
+  - Chip specific initialization
+  - Brightness control register access
+  - Setting LED output current
+  - Program memory address access for running patterns
+  - Additional device specific attributes
+
+( Firmware Interface )
+
+LP55xx family devices have the internal program memory for running
+various LED patterns.
+
+This pattern data is saved as a file in the user-land or
+hex byte string is written into the memory through the I2C.
+
+LP55xx common driver supports the firmware interface.
+
+LP55xx chips have three program engines.
+
+To load and run the pattern, the programming sequence is following.
+
+  (1) Select an engine number (1/2/3)
+  (2) Mode change to load
+  (3) Write pattern data into selected area
+  (4) Mode change to run
+
+The LP55xx common driver provides simple interfaces as below.
+
+select_engine:
+	Select which engine is used for running program
+run_engine:
+	Start program which is loaded via the firmware interface
+firmware:
+	Load program data
+
+In case of LP5523, one more command is required, 'enginex_leds'.
+It is used for selecting LED output(s) at each engine number.
+In more details, please refer to 'leds-lp5523.txt'.
+
+For example, run blinking pattern in engine #1 of LP5521::
+
+	echo 1 > /sys/bus/i2c/devices/xxxx/select_engine
+	echo 1 > /sys/class/firmware/lp5521/loading
+	echo "4000600040FF6000" > /sys/class/firmware/lp5521/data
+	echo 0 > /sys/class/firmware/lp5521/loading
+	echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+For example, run blinking pattern in engine #3 of LP55231
+
+Two LEDs are configured as pattern output channels::
+
+	echo 3 > /sys/bus/i2c/devices/xxxx/select_engine
+	echo 1 > /sys/class/firmware/lp55231/loading
+	echo "9d0740ff7e0040007e00a0010000" > /sys/class/firmware/lp55231/data
+	echo 0 > /sys/class/firmware/lp55231/loading
+	echo "000001100" > /sys/bus/i2c/devices/xxxx/engine3_leds
+	echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+To start blinking patterns in engine #2 and #3 simultaneously::
+
+	for idx in 2 3
+	do
+	echo $idx > /sys/class/leds/red/device/select_engine
+	sleep 0.1
+	echo 1 > /sys/class/firmware/lp5521/loading
+	echo "4000600040FF6000" > /sys/class/firmware/lp5521/data
+	echo 0 > /sys/class/firmware/lp5521/loading
+	done
+	echo 1 > /sys/class/leds/red/device/run_engine
+
+Here is another example for LP5523.
+
+Full LED strings are selected by 'engine2_leds'::
+
+	echo 2 > /sys/bus/i2c/devices/xxxx/select_engine
+	echo 1 > /sys/class/firmware/lp5523/loading
+	echo "9d80400004ff05ff437f0000" > /sys/class/firmware/lp5523/data
+	echo 0 > /sys/class/firmware/lp5523/loading
+	echo "111111111" > /sys/bus/i2c/devices/xxxx/engine2_leds
+	echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+As soon as 'loading' is set to 0, registered callback is called.
+Inside the callback, the selected engine is loaded and memory is updated.
+To run programmed pattern, 'run_engine' attribute should be enabled.
+
+The pattern sequence of LP8501 is similar to LP5523.
+
+However pattern data is specific.
+
+Ex 1) Engine 1 is used::
+
+	echo 1 > /sys/bus/i2c/devices/xxxx/select_engine
+	echo 1 > /sys/class/firmware/lp8501/loading
+	echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
+	echo 0 > /sys/class/firmware/lp8501/loading
+	echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+Ex 2) Engine 2 and 3 are used at the same time::
+
+	echo 2 > /sys/bus/i2c/devices/xxxx/select_engine
+	sleep 1
+	echo 1 > /sys/class/firmware/lp8501/loading
+	echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
+	echo 0 > /sys/class/firmware/lp8501/loading
+	sleep 1
+	echo 3 > /sys/bus/i2c/devices/xxxx/select_engine
+	sleep 1
+	echo 1 > /sys/class/firmware/lp8501/loading
+	echo "9d0340ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
+	echo 0 > /sys/class/firmware/lp8501/loading
+	sleep 1
+	echo 1 > /sys/class/leds/d1/device/run_engine
+
+( 'run_engine' and 'firmware_cb' )
+
+The sequence of running the program data is common.
+
+But each device has own specific register addresses for commands.
+
+To support this, 'run_engine' and 'firmware_cb' are configurable in each driver.
+
+run_engine:
+	Control the selected engine
+firmware_cb:
+	The callback function after loading the firmware is done.
+
+	Chip specific commands for loading and updating program memory.
+
+( Predefined pattern data )
+
+Without the firmware interface, LP55xx driver provides another method for
+loading a LED pattern. That is 'predefined' pattern.
+
+A predefined pattern is defined in the platform data and load it(or them)
+via the sysfs if needed.
+
+To use the predefined pattern concept, 'patterns' and 'num_patterns' should be
+configured.
+
+Example of predefined pattern data::
+
+  /* mode_1: blinking data */
+  static const u8 mode_1[] = {
+		0x40, 0x00, 0x60, 0x00, 0x40, 0xFF, 0x60, 0x00,
+		};
+
+  /* mode_2: always on */
+  static const u8 mode_2[] = { 0x40, 0xFF, };
+
+  struct lp55xx_predef_pattern board_led_patterns[] = {
+	{
+		.r = mode_1,
+		.size_r = ARRAY_SIZE(mode_1),
+	},
+	{
+		.b = mode_2,
+		.size_b = ARRAY_SIZE(mode_2),
+	},
+  }
+
+  struct lp55xx_platform_data lp5562_pdata = {
+  ...
+	.patterns      = board_led_patterns,
+	.num_patterns  = ARRAY_SIZE(board_led_patterns),
+  };
+
+Then, mode_1 and mode_2 can be run via through the sysfs::
+
+  echo 1 > /sys/bus/i2c/devices/xxxx/led_pattern    # red blinking LED pattern
+  echo 2 > /sys/bus/i2c/devices/xxxx/led_pattern    # blue LED always on
+
+To stop running pattern::
+
+  echo 0 > /sys/bus/i2c/devices/xxxx/led_pattern
diff --git a/Documentation/leds/leds-lp55xx.txt b/Documentation/leds/leds-lp55xx.txt
deleted file mode 100644
index e23fa91ea722..000000000000
--- a/Documentation/leds/leds-lp55xx.txt
+++ /dev/null
@@ -1,194 +0,0 @@
-LP5521/LP5523/LP55231/LP5562/LP8501 Common Driver
-=================================================
-
-Authors: Milo(Woogyom) Kim <milo.kim@ti.com>
-
-Description
------------
-LP5521, LP5523/55231, LP5562 and LP8501 have common features as below.
-
-  Register access via the I2C
-  Device initialization/deinitialization
-  Create LED class devices for multiple output channels
-  Device attributes for user-space interface
-  Program memory for running LED patterns
-
-The LP55xx common driver provides these features using exported functions.
-  lp55xx_init_device() / lp55xx_deinit_device()
-  lp55xx_register_leds() / lp55xx_unregister_leds()
-  lp55xx_regsister_sysfs() / lp55xx_unregister_sysfs()
-
-( Driver Structure Data )
-
-In lp55xx common driver, two different data structure is used.
-
-o lp55xx_led
-  control multi output LED channels such as led current, channel index.
-o lp55xx_chip
-  general chip control such like the I2C and platform data.
-
-For example, LP5521 has maximum 3 LED channels.
-LP5523/55231 has 9 output channels.
-
-lp55xx_chip for LP5521 ... lp55xx_led #1
-                           lp55xx_led #2
-                           lp55xx_led #3
-
-lp55xx_chip for LP5523 ... lp55xx_led #1
-                           lp55xx_led #2
-                                 .
-                                 .
-                           lp55xx_led #9
-
-( Chip Dependent Code )
-
-To support device specific configurations, special structure
-'lpxx_device_config' is used.
-
-  Maximum number of channels
-  Reset command, chip enable command
-  Chip specific initialization
-  Brightness control register access
-  Setting LED output current
-  Program memory address access for running patterns
-  Additional device specific attributes
-
-( Firmware Interface )
-
-LP55xx family devices have the internal program memory for running
-various LED patterns.
-This pattern data is saved as a file in the user-land or
-hex byte string is written into the memory through the I2C.
-LP55xx common driver supports the firmware interface.
-
-LP55xx chips have three program engines.
-To load and run the pattern, the programming sequence is following.
-  (1) Select an engine number (1/2/3)
-  (2) Mode change to load
-  (3) Write pattern data into selected area
-  (4) Mode change to run
-
-The LP55xx common driver provides simple interfaces as below.
-select_engine : Select which engine is used for running program
-run_engine    : Start program which is loaded via the firmware interface
-firmware      : Load program data
-
-In case of LP5523, one more command is required, 'enginex_leds'.
-It is used for selecting LED output(s) at each engine number.
-In more details, please refer to 'leds-lp5523.txt'.
-
-For example, run blinking pattern in engine #1 of LP5521
-echo 1 > /sys/bus/i2c/devices/xxxx/select_engine
-echo 1 > /sys/class/firmware/lp5521/loading
-echo "4000600040FF6000" > /sys/class/firmware/lp5521/data
-echo 0 > /sys/class/firmware/lp5521/loading
-echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
-
-For example, run blinking pattern in engine #3 of LP55231
-Two LEDs are configured as pattern output channels.
-echo 3 > /sys/bus/i2c/devices/xxxx/select_engine
-echo 1 > /sys/class/firmware/lp55231/loading
-echo "9d0740ff7e0040007e00a0010000" > /sys/class/firmware/lp55231/data
-echo 0 > /sys/class/firmware/lp55231/loading
-echo "000001100" > /sys/bus/i2c/devices/xxxx/engine3_leds
-echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
-
-To start blinking patterns in engine #2 and #3 simultaneously,
-for idx in 2 3
-do
-  echo $idx > /sys/class/leds/red/device/select_engine
-  sleep 0.1
-  echo 1 > /sys/class/firmware/lp5521/loading
-  echo "4000600040FF6000" > /sys/class/firmware/lp5521/data
-  echo 0 > /sys/class/firmware/lp5521/loading
-done
-echo 1 > /sys/class/leds/red/device/run_engine
-
-Here is another example for LP5523.
-Full LED strings are selected by 'engine2_leds'.
-echo 2 > /sys/bus/i2c/devices/xxxx/select_engine
-echo 1 > /sys/class/firmware/lp5523/loading
-echo "9d80400004ff05ff437f0000" > /sys/class/firmware/lp5523/data
-echo 0 > /sys/class/firmware/lp5523/loading
-echo "111111111" > /sys/bus/i2c/devices/xxxx/engine2_leds
-echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
-
-As soon as 'loading' is set to 0, registered callback is called.
-Inside the callback, the selected engine is loaded and memory is updated.
-To run programmed pattern, 'run_engine' attribute should be enabled.
-
-The pattern sequence of LP8501 is similar to LP5523.
-However pattern data is specific.
-Ex 1) Engine 1 is used
-echo 1 > /sys/bus/i2c/devices/xxxx/select_engine
-echo 1 > /sys/class/firmware/lp8501/loading
-echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
-echo 0 > /sys/class/firmware/lp8501/loading
-echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
-
-Ex 2) Engine 2 and 3 are used at the same time
-echo 2 > /sys/bus/i2c/devices/xxxx/select_engine
-sleep 1
-echo 1 > /sys/class/firmware/lp8501/loading
-echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
-echo 0 > /sys/class/firmware/lp8501/loading
-sleep 1
-echo 3 > /sys/bus/i2c/devices/xxxx/select_engine
-sleep 1
-echo 1 > /sys/class/firmware/lp8501/loading
-echo "9d0340ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
-echo 0 > /sys/class/firmware/lp8501/loading
-sleep 1
-echo 1 > /sys/class/leds/d1/device/run_engine
-
-( 'run_engine' and 'firmware_cb' )
-The sequence of running the program data is common.
-But each device has own specific register addresses for commands.
-To support this, 'run_engine' and 'firmware_cb' are configurable in each driver.
-run_engine  : Control the selected engine
-firmware_cb : The callback function after loading the firmware is done.
-              Chip specific commands for loading and updating program memory.
-
-( Predefined pattern data )
-
-Without the firmware interface, LP55xx driver provides another method for
-loading a LED pattern. That is 'predefined' pattern.
-A predefined pattern is defined in the platform data and load it(or them)
-via the sysfs if needed.
-To use the predefined pattern concept, 'patterns' and 'num_patterns' should be
-configured.
-
-  Example of predefined pattern data:
-
-  /* mode_1: blinking data */
-  static const u8 mode_1[] = {
-		0x40, 0x00, 0x60, 0x00, 0x40, 0xFF, 0x60, 0x00,
-		};
-
-  /* mode_2: always on */
-  static const u8 mode_2[] = { 0x40, 0xFF, };
-
-  struct lp55xx_predef_pattern board_led_patterns[] = {
-	{
-		.r = mode_1,
-		.size_r = ARRAY_SIZE(mode_1),
-	},
-	{
-		.b = mode_2,
-		.size_b = ARRAY_SIZE(mode_2),
-	},
-  }
-
-  struct lp55xx_platform_data lp5562_pdata = {
-  ...
-	.patterns      = board_led_patterns,
-	.num_patterns  = ARRAY_SIZE(board_led_patterns),
-  };
-
-Then, mode_1 and mode_2 can be run via through the sysfs.
-
-  echo 1 > /sys/bus/i2c/devices/xxxx/led_pattern    # red blinking LED pattern
-  echo 2 > /sys/bus/i2c/devices/xxxx/led_pattern    # blue LED always on
-
-To stop running pattern,
-  echo 0 > /sys/bus/i2c/devices/xxxx/led_pattern
diff --git a/Documentation/leds/leds-mlxcpld.rst b/Documentation/leds/leds-mlxcpld.rst
new file mode 100644
index 000000000000..528582429e0b
--- /dev/null
+++ b/Documentation/leds/leds-mlxcpld.rst
@@ -0,0 +1,118 @@
+=======================================
+Kernel driver for Mellanox systems LEDs
+=======================================
+
+Provide system LED support for the nex Mellanox systems:
+"msx6710", "msx6720", "msb7700", "msn2700", "msx1410",
+"msn2410", "msb7800", "msn2740", "msn2100".
+
+Description
+-----------
+Driver provides the following LEDs for the systems "msx6710", "msx6720",
+"msb7700", "msn2700", "msx1410", "msn2410", "msb7800", "msn2740":
+
+  - mlxcpld:fan1:green
+  - mlxcpld:fan1:red
+  - mlxcpld:fan2:green
+  - mlxcpld:fan2:red
+  - mlxcpld:fan3:green
+  - mlxcpld:fan3:red
+  - mlxcpld:fan4:green
+  - mlxcpld:fan4:red
+  - mlxcpld:psu:green
+  - mlxcpld:psu:red
+  - mlxcpld:status:green
+  - mlxcpld:status:red
+
+ "status"
+  - CPLD reg offset: 0x20
+  - Bits [3:0]
+
+ "psu"
+  - CPLD reg offset: 0x20
+  - Bits [7:4]
+
+ "fan1"
+  - CPLD reg offset: 0x21
+  - Bits [3:0]
+
+ "fan2"
+  - CPLD reg offset: 0x21
+  - Bits [7:4]
+
+ "fan3"
+  - CPLD reg offset: 0x22
+  - Bits [3:0]
+
+ "fan4"
+  - CPLD reg offset: 0x22
+  - Bits [7:4]
+
+ Color mask for all the above LEDs:
+
+  [bit3,bit2,bit1,bit0] or
+  [bit7,bit6,bit5,bit4]:
+
+	- [0,0,0,0] = LED OFF
+	- [0,1,0,1] = Red static ON
+	- [1,1,0,1] = Green static ON
+	- [0,1,1,0] = Red blink 3Hz
+	- [1,1,1,0] = Green blink 3Hz
+	- [0,1,1,1] = Red blink 6Hz
+	- [1,1,1,1] = Green blink 6Hz
+
+Driver provides the following LEDs for the system "msn2100":
+
+  - mlxcpld:fan:green
+  - mlxcpld:fan:red
+  - mlxcpld:psu1:green
+  - mlxcpld:psu1:red
+  - mlxcpld:psu2:green
+  - mlxcpld:psu2:red
+  - mlxcpld:status:green
+  - mlxcpld:status:red
+  - mlxcpld:uid:blue
+
+ "status"
+  - CPLD reg offset: 0x20
+  - Bits [3:0]
+
+ "fan"
+  - CPLD reg offset: 0x21
+  - Bits [3:0]
+
+ "psu1"
+  - CPLD reg offset: 0x23
+  - Bits [3:0]
+
+ "psu2"
+  - CPLD reg offset: 0x23
+  - Bits [7:4]
+
+ "uid"
+  - CPLD reg offset: 0x24
+  - Bits [3:0]
+
+ Color mask for all the above LEDs, excepted uid:
+
+  [bit3,bit2,bit1,bit0] or
+  [bit7,bit6,bit5,bit4]:
+
+	- [0,0,0,0] = LED OFF
+	- [0,1,0,1] = Red static ON
+	- [1,1,0,1] = Green static ON
+	- [0,1,1,0] = Red blink 3Hz
+	- [1,1,1,0] = Green blink 3Hz
+	- [0,1,1,1] = Red blink 6Hz
+	- [1,1,1,1] = Green blink 6Hz
+
+ Color mask for uid LED:
+  [bit3,bit2,bit1,bit0]:
+
+	- [0,0,0,0] = LED OFF
+	- [1,1,0,1] = Blue static ON
+	- [1,1,1,0] = Blue blink 3Hz
+	- [1,1,1,1] = Blue blink 6Hz
+
+Driver supports HW blinking at 3Hz and 6Hz frequency (50% duty cycle).
+For 3Hz duty cylce is about 167 msec, for 6Hz is about 83 msec.
diff --git a/Documentation/leds/leds-mlxcpld.txt b/Documentation/leds/leds-mlxcpld.txt
deleted file mode 100644
index a0e8fd457117..000000000000
--- a/Documentation/leds/leds-mlxcpld.txt
+++ /dev/null
@@ -1,110 +0,0 @@
-Kernel driver for Mellanox systems LEDs
-=======================================
-
-Provide system LED support for the nex Mellanox systems:
-"msx6710", "msx6720", "msb7700", "msn2700", "msx1410",
-"msn2410", "msb7800", "msn2740", "msn2100".
-
-Description
------------
-Driver provides the following LEDs for the systems "msx6710", "msx6720",
-"msb7700", "msn2700", "msx1410", "msn2410", "msb7800", "msn2740":
-  mlxcpld:fan1:green
-  mlxcpld:fan1:red
-  mlxcpld:fan2:green
-  mlxcpld:fan2:red
-  mlxcpld:fan3:green
-  mlxcpld:fan3:red
-  mlxcpld:fan4:green
-  mlxcpld:fan4:red
-  mlxcpld:psu:green
-  mlxcpld:psu:red
-  mlxcpld:status:green
-  mlxcpld:status:red
-
- "status"
-  CPLD reg offset: 0x20
-  Bits [3:0]
-
- "psu"
-  CPLD reg offset: 0x20
-  Bits [7:4]
-
- "fan1"
-  CPLD reg offset: 0x21
-  Bits [3:0]
-
- "fan2"
-  CPLD reg offset: 0x21
-  Bits [7:4]
-
- "fan3"
-  CPLD reg offset: 0x22
-  Bits [3:0]
-
- "fan4"
-  CPLD reg offset: 0x22
-  Bits [7:4]
-
- Color mask for all the above LEDs:
-  [bit3,bit2,bit1,bit0] or
-  [bit7,bit6,bit5,bit4]:
-	[0,0,0,0] = LED OFF
-	[0,1,0,1] = Red static ON
-	[1,1,0,1] = Green static ON
-	[0,1,1,0] = Red blink 3Hz
-	[1,1,1,0] = Green blink 3Hz
-	[0,1,1,1] = Red blink 6Hz
-	[1,1,1,1] = Green blink 6Hz
-
-Driver provides the following LEDs for the system "msn2100":
-  mlxcpld:fan:green
-  mlxcpld:fan:red
-  mlxcpld:psu1:green
-  mlxcpld:psu1:red
-  mlxcpld:psu2:green
-  mlxcpld:psu2:red
-  mlxcpld:status:green
-  mlxcpld:status:red
-  mlxcpld:uid:blue
-
- "status"
-  CPLD reg offset: 0x20
-  Bits [3:0]
-
- "fan"
-  CPLD reg offset: 0x21
-  Bits [3:0]
-
- "psu1"
-  CPLD reg offset: 0x23
-  Bits [3:0]
-
- "psu2"
-  CPLD reg offset: 0x23
-  Bits [7:4]
-
- "uid"
-  CPLD reg offset: 0x24
-  Bits [3:0]
-
- Color mask for all the above LEDs, excepted uid:
-  [bit3,bit2,bit1,bit0] or
-  [bit7,bit6,bit5,bit4]:
-	[0,0,0,0] = LED OFF
-	[0,1,0,1] = Red static ON
-	[1,1,0,1] = Green static ON
-	[0,1,1,0] = Red blink 3Hz
-	[1,1,1,0] = Green blink 3Hz
-	[0,1,1,1] = Red blink 6Hz
-	[1,1,1,1] = Green blink 6Hz
-
- Color mask for uid LED:
-  [bit3,bit2,bit1,bit0]:
-	[0,0,0,0] = LED OFF
-	[1,1,0,1] = Blue static ON
-	[1,1,1,0] = Blue blink 3Hz
-	[1,1,1,1] = Blue blink 6Hz
-
-Driver supports HW blinking at 3Hz and 6Hz frequency (50% duty cycle).
-For 3Hz duty cylce is about 167 msec, for 6Hz is about 83 msec.
diff --git a/Documentation/leds/ledtrig-oneshot.rst b/Documentation/leds/ledtrig-oneshot.rst
new file mode 100644
index 000000000000..69fa3ea1d554
--- /dev/null
+++ b/Documentation/leds/ledtrig-oneshot.rst
@@ -0,0 +1,44 @@
+====================
+One-shot LED Trigger
+====================
+
+This is a LED trigger useful for signaling the user of an event where there are
+no clear trap points to put standard led-on and led-off settings.  Using this
+trigger, the application needs only to signal the trigger when an event has
+happened, than the trigger turns the LED on and than keeps it off for a
+specified amount of time.
+
+This trigger is meant to be usable both for sporadic and dense events.  In the
+first case, the trigger produces a clear single controlled blink for each
+event, while in the latter it keeps blinking at constant rate, as to signal
+that the events are arriving continuously.
+
+A one-shot LED only stays in a constant state when there are no events.  An
+additional "invert" property specifies if the LED has to stay off (normal) or
+on (inverted) when not rearmed.
+
+The trigger can be activated from user space on led class devices as shown
+below::
+
+  echo oneshot > trigger
+
+This adds sysfs attributes to the LED that are documented in:
+Documentation/ABI/testing/sysfs-class-led-trigger-oneshot
+
+Example use-case: network devices, initialization::
+
+  echo oneshot > trigger # set trigger for this led
+  echo 33 > delay_on     # blink at 1 / (33 + 33) Hz on continuous traffic
+  echo 33 > delay_off
+
+interface goes up::
+
+  echo 1 > invert # set led as normally-on, turn the led on
+
+packet received/transmitted::
+
+  echo 1 > shot # led starts blinking, ignored if already blinking
+
+interface goes down::
+
+  echo 0 > invert # set led as normally-off, turn the led off
diff --git a/Documentation/leds/ledtrig-oneshot.txt b/Documentation/leds/ledtrig-oneshot.txt
deleted file mode 100644
index fe57474a12e2..000000000000
--- a/Documentation/leds/ledtrig-oneshot.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-One-shot LED Trigger
-====================
-
-This is a LED trigger useful for signaling the user of an event where there are
-no clear trap points to put standard led-on and led-off settings.  Using this
-trigger, the application needs only to signal the trigger when an event has
-happened, than the trigger turns the LED on and than keeps it off for a
-specified amount of time.
-
-This trigger is meant to be usable both for sporadic and dense events.  In the
-first case, the trigger produces a clear single controlled blink for each
-event, while in the latter it keeps blinking at constant rate, as to signal
-that the events are arriving continuously.
-
-A one-shot LED only stays in a constant state when there are no events.  An
-additional "invert" property specifies if the LED has to stay off (normal) or
-on (inverted) when not rearmed.
-
-The trigger can be activated from user space on led class devices as shown
-below:
-
-  echo oneshot > trigger
-
-This adds sysfs attributes to the LED that are documented in:
-Documentation/ABI/testing/sysfs-class-led-trigger-oneshot
-
-Example use-case: network devices, initialization:
-
-  echo oneshot > trigger # set trigger for this led
-  echo 33 > delay_on     # blink at 1 / (33 + 33) Hz on continuous traffic
-  echo 33 > delay_off
-
-interface goes up:
-
-  echo 1 > invert # set led as normally-on, turn the led on
-
-packet received/transmitted:
-
-  echo 1 > shot # led starts blinking, ignored if already blinking
-
-interface goes down
-
-  echo 0 > invert # set led as normally-off, turn the led off
diff --git a/Documentation/leds/ledtrig-transient.rst b/Documentation/leds/ledtrig-transient.rst
new file mode 100644
index 000000000000..d921dc830cd0
--- /dev/null
+++ b/Documentation/leds/ledtrig-transient.rst
@@ -0,0 +1,167 @@
+=====================
+LED Transient Trigger
+=====================
+
+The leds timer trigger does not currently have an interface to activate
+a one shot timer. The current support allows for setting two timers, one for
+specifying how long a state to be on, and the second for how long the state
+to be off. The delay_on value specifies the time period an LED should stay
+in on state, followed by a delay_off value that specifies how long the LED
+should stay in off state. The on and off cycle repeats until the trigger
+gets deactivated. There is no provision for one time activation to implement
+features that require an on or off state to be held just once and then stay in
+the original state forever.
+
+Without one shot timer interface, user space can still use timer trigger to
+set a timer to hold a state, however when user space application crashes or
+goes away without deactivating the timer, the hardware will be left in that
+state permanently.
+
+As a specific example of this use-case, let's look at vibrate feature on
+phones. Vibrate function on phones is implemented using PWM pins on SoC or
+PMIC. There is a need to activate one shot timer to control the vibrate
+feature, to prevent user space crashes leaving the phone in vibrate mode
+permanently causing the battery to drain.
+
+Transient trigger addresses the need for one shot timer activation. The
+transient trigger can be enabled and disabled just like the other leds
+triggers.
+
+When an led class device driver registers itself, it can specify all leds
+triggers it supports and a default trigger. During registration, activation
+routine for the default trigger gets called. During registration of an led
+class device, the LED state does not change.
+
+When the driver unregisters, deactivation routine for the currently active
+trigger will be called, and LED state is changed to LED_OFF.
+
+Driver suspend changes the LED state to LED_OFF and resume doesn't change
+the state. Please note that there is no explicit interaction between the
+suspend and resume actions and the currently enabled trigger. LED state
+changes are suspended while the driver is in suspend state. Any timers
+that are active at the time driver gets suspended, continue to run, without
+being able to actually change the LED state. Once driver is resumed, triggers
+start functioning again.
+
+LED state changes are controlled using brightness which is a common led
+class device property. When brightness is set to 0 from user space via
+echo 0 > brightness, it will result in deactivating the current trigger.
+
+Transient trigger uses standard register and unregister interfaces. During
+trigger registration, for each led class device that specifies this trigger
+as its default trigger, trigger activation routine will get called. During
+registration, the LED state does not change, unless there is another trigger
+active, in which case LED state changes to LED_OFF.
+
+During trigger unregistration, LED state gets changed to LED_OFF.
+
+Transient trigger activation routine doesn't change the LED state. It
+creates its properties and does its initialization. Transient trigger
+deactivation routine, will cancel any timer that is active before it cleans
+up and removes the properties it created. It will restore the LED state to
+non-transient state. When driver gets suspended, irrespective of the transient
+state, the LED state changes to LED_OFF.
+
+Transient trigger can be enabled and disabled from user space on led class
+devices, that support this trigger as shown below::
+
+	echo transient > trigger
+	echo none > trigger
+
+NOTE:
+	Add a new property trigger state to control the state.
+
+This trigger exports three properties, activate, state, and duration. When
+transient trigger is activated these properties are set to default values.
+
+- duration allows setting timer value in msecs. The initial value is 0.
+- activate allows activating and deactivating the timer specified by
+  duration as needed. The initial and default value is 0.  This will allow
+  duration to be set after trigger activation.
+- state allows user to specify a transient state to be held for the specified
+  duration.
+
+	activate
+	      - one shot timer activate mechanism.
+		1 when activated, 0 when deactivated.
+		default value is zero when transient trigger is enabled,
+		to allow duration to be set.
+
+		activate state indicates a timer with a value of specified
+		duration running.
+		deactivated state indicates that there is no active timer
+		running.
+
+	duration
+	      - one shot timer value. When activate is set, duration value
+		is used to start a timer that runs once. This value doesn't
+		get changed by the trigger unless user does a set via
+		echo new_value > duration
+
+	state
+	      - transient state to be held. It has two values 0 or 1. 0 maps
+		to LED_OFF and 1 maps to LED_FULL. The specified state is
+		held for the duration of the one shot timer and then the
+		state gets changed to the non-transient state which is the
+		inverse of transient state.
+		If state = LED_FULL, when the timer runs out the state will
+		go back to LED_OFF.
+		If state = LED_OFF, when the timer runs out the state will
+		go back to LED_FULL.
+		Please note that current LED state is not checked prior to
+		changing the state to the specified state.
+		Driver could map these values to inverted depending on the
+		default states it defines for the LED in its brightness_set()
+		interface which is called from the led brightness_set()
+		interfaces to control the LED state.
+
+When timer expires activate goes back to deactivated state, duration is left
+at the set value to be used when activate is set at a future time. This will
+allow user app to set the time once and activate it to run it once for the
+specified value as needed. When timer expires, state is restored to the
+non-transient state which is the inverse of the transient state:
+
+	=================   ===============================================
+	echo 1 > activate   starts timer = duration when duration is not 0.
+	echo 0 > activate   cancels currently running timer.
+	echo n > duration   stores timer value to be used upon next
+			    activate. Currently active timer if
+			    any, continues to run for the specified time.
+	echo 0 > duration   stores timer value to be used upon next
+			    activate. Currently active timer if any,
+			    continues to run for the specified time.
+	echo 1 > state      stores desired transient state LED_FULL to be
+			    held for the specified duration.
+	echo 0 > state      stores desired transient state LED_OFF to be
+			    held for the specified duration.
+	=================   ===============================================
+
+What is not supported
+=====================
+
+- Timer activation is one shot and extending and/or shortening the timer
+  is not supported.
+
+Examples
+========
+
+use-case 1::
+
+	echo transient > trigger
+	echo n > duration
+	echo 1 > state
+
+repeat the following step as needed::
+
+	echo 1 > activate - start timer = duration to run once
+	echo 1 > activate - start timer = duration to run once
+	echo none > trigger
+
+This trigger is intended to be used for for the following example use cases:
+
+ - Control of vibrate (phones, tablets etc.) hardware by user space app.
+ - Use of LED by user space app as activity indicator.
+ - Use of LED by user space app as a kind of watchdog indicator -- as
+   long as the app is alive, it can keep the LED illuminated, if it dies
+   the LED will be extinguished automatically.
+ - Use by any user space app that needs a transient GPIO output.
diff --git a/Documentation/leds/ledtrig-transient.txt b/Documentation/leds/ledtrig-transient.txt
deleted file mode 100644
index 3bd38b487df1..000000000000
--- a/Documentation/leds/ledtrig-transient.txt
+++ /dev/null
@@ -1,152 +0,0 @@
-LED Transient Trigger
-=====================
-
-The leds timer trigger does not currently have an interface to activate
-a one shot timer. The current support allows for setting two timers, one for
-specifying how long a state to be on, and the second for how long the state
-to be off. The delay_on value specifies the time period an LED should stay
-in on state, followed by a delay_off value that specifies how long the LED
-should stay in off state. The on and off cycle repeats until the trigger
-gets deactivated. There is no provision for one time activation to implement
-features that require an on or off state to be held just once and then stay in
-the original state forever.
-
-Without one shot timer interface, user space can still use timer trigger to
-set a timer to hold a state, however when user space application crashes or
-goes away without deactivating the timer, the hardware will be left in that
-state permanently.
-
-As a specific example of this use-case, let's look at vibrate feature on
-phones. Vibrate function on phones is implemented using PWM pins on SoC or
-PMIC. There is a need to activate one shot timer to control the vibrate
-feature, to prevent user space crashes leaving the phone in vibrate mode
-permanently causing the battery to drain.
-
-Transient trigger addresses the need for one shot timer activation. The
-transient trigger can be enabled and disabled just like the other leds
-triggers.
-
-When an led class device driver registers itself, it can specify all leds
-triggers it supports and a default trigger. During registration, activation
-routine for the default trigger gets called. During registration of an led
-class device, the LED state does not change.
-
-When the driver unregisters, deactivation routine for the currently active
-trigger will be called, and LED state is changed to LED_OFF.
-
-Driver suspend changes the LED state to LED_OFF and resume doesn't change
-the state. Please note that there is no explicit interaction between the
-suspend and resume actions and the currently enabled trigger. LED state
-changes are suspended while the driver is in suspend state. Any timers
-that are active at the time driver gets suspended, continue to run, without
-being able to actually change the LED state. Once driver is resumed, triggers
-start functioning again.
-
-LED state changes are controlled using brightness which is a common led
-class device property. When brightness is set to 0 from user space via
-echo 0 > brightness, it will result in deactivating the current trigger.
-
-Transient trigger uses standard register and unregister interfaces. During
-trigger registration, for each led class device that specifies this trigger
-as its default trigger, trigger activation routine will get called. During
-registration, the LED state does not change, unless there is another trigger
-active, in which case LED state changes to LED_OFF.
-
-During trigger unregistration, LED state gets changed to LED_OFF.
-
-Transient trigger activation routine doesn't change the LED state. It
-creates its properties and does its initialization. Transient trigger
-deactivation routine, will cancel any timer that is active before it cleans
-up and removes the properties it created. It will restore the LED state to
-non-transient state. When driver gets suspended, irrespective of the transient
-state, the LED state changes to LED_OFF.
-
-Transient trigger can be enabled and disabled from user space on led class
-devices, that support this trigger as shown below:
-
-echo transient > trigger
-echo none > trigger
-
-NOTE: Add a new property trigger state to control the state.
-
-This trigger exports three properties, activate, state, and duration. When
-transient trigger is activated these properties are set to default values.
-
-- duration allows setting timer value in msecs. The initial value is 0.
-- activate allows activating and deactivating the timer specified by
-  duration as needed. The initial and default value is 0.  This will allow
-  duration to be set after trigger activation.
-- state allows user to specify a transient state to be held for the specified
-  duration.
-
-	activate - one shot timer activate mechanism.
-		1 when activated, 0 when deactivated.
-		default value is zero when transient trigger is enabled,
-		to allow duration to be set.
-
-		activate state indicates a timer with a value of specified
-		duration running.
-		deactivated state indicates that there is no active timer
-		running.
-
-	duration - one shot timer value. When activate is set, duration value
-		is used to start a timer that runs once. This value doesn't
-		get changed by the trigger unless user does a set via
-		echo new_value > duration
-
-	state - transient state to be held. It has two values 0 or 1. 0 maps
-		to LED_OFF and 1 maps to LED_FULL. The specified state is
-		held for the duration of the one shot timer and then the
-		state gets changed to the non-transient state which is the
-		inverse of transient state.
-		If state = LED_FULL, when the timer runs out the state will
-		go back to LED_OFF.
-		If state = LED_OFF, when the timer runs out the state will
-		go back to LED_FULL.
-		Please note that current LED state is not checked prior to
-		changing the state to the specified state.
-		Driver could map these values to inverted depending on the
-		default states it defines for the LED in its brightness_set()
-		interface which is called from the led brightness_set()
-		interfaces to control the LED state.
-
-When timer expires activate goes back to deactivated state, duration is left
-at the set value to be used when activate is set at a future time. This will
-allow user app to set the time once and activate it to run it once for the
-specified value as needed. When timer expires, state is restored to the
-non-transient state which is the inverse of the transient state.
-
-	echo 1 > activate - starts timer = duration when duration is not 0.
-	echo 0 > activate - cancels currently running timer.
-	echo n > duration - stores timer value to be used upon next
-                            activate. Currently active timer if
-                            any, continues to run for the specified time.
-	echo 0 > duration - stores timer value to be used upon next
-                            activate. Currently active timer if any,
-                            continues to run for the specified time.
-	echo 1 > state    - stores desired transient state LED_FULL to be
-			    held for the specified duration.
-	echo 0 > state    - stores desired transient state LED_OFF to be
-			    held for the specified duration.
-
-What is not supported:
-======================
-- Timer activation is one shot and extending and/or shortening the timer
-  is not supported.
-
-Example use-case 1:
-	echo transient > trigger
-	echo n > duration
-	echo 1 > state
-repeat the following step as needed:
-	echo 1 > activate - start timer = duration to run once
-	echo 1 > activate - start timer = duration to run once
-	echo none > trigger
-
-This trigger is intended to be used for for the following example use cases:
- - Control of vibrate (phones, tablets etc.) hardware by user space app.
- - Use of LED by user space app as activity indicator.
- - Use of LED by user space app as a kind of watchdog indicator -- as
-       long as the app is alive, it can keep the LED illuminated, if it dies
-       the LED will be extinguished automatically.
- - Use by any user space app that needs a transient GPIO output.
diff --git a/Documentation/leds/ledtrig-usbport.rst b/Documentation/leds/ledtrig-usbport.rst
new file mode 100644
index 000000000000..37c2505bfd57
--- /dev/null
+++ b/Documentation/leds/ledtrig-usbport.rst
@@ -0,0 +1,46 @@
+====================
+USB port LED trigger
+====================
+
+This LED trigger can be used for signalling to the user a presence of USB device
+in a given port. It simply turns on LED when device appears and turns it off
+when it disappears.
+
+It requires selecting USB ports that should be observed. All available ones are
+listed as separated entries in a "ports" subdirectory. Selecting is handled by
+echoing "1" to a chosen port.
+
+Please note that this trigger allows selecting multiple USB ports for a single
+LED.
+
+This can be useful in two cases:
+
+1) Device with single USB LED and few physical ports
+====================================================
+
+In such a case LED will be turned on as long as there is at least one connected
+USB device.
+
+2) Device with a physical port handled by few controllers
+=========================================================
+
+Some devices may have one controller per PHY standard. E.g. USB 3.0 physical
+port may be handled by ohci-platform, ehci-platform and xhci-hcd. If there is
+only one LED user will most likely want to assign ports from all 3 hubs.
+
+
+This trigger can be activated from user space on led class devices as shown
+below::
+
+  echo usbport > trigger
+
+This adds sysfs attributes to the LED that are documented in:
+Documentation/ABI/testing/sysfs-class-led-trigger-usbport
+
+Example use-case::
+
+  echo usbport > trigger
+  echo 1 > ports/usb1-port1
+  echo 1 > ports/usb2-port1
+  cat ports/usb1-port1
+  echo 0 > ports/usb1-port1
diff --git a/Documentation/leds/ledtrig-usbport.txt b/Documentation/leds/ledtrig-usbport.txt
deleted file mode 100644
index 69f54bfb4789..000000000000
--- a/Documentation/leds/ledtrig-usbport.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-USB port LED trigger
-====================
-
-This LED trigger can be used for signalling to the user a presence of USB device
-in a given port. It simply turns on LED when device appears and turns it off
-when it disappears.
-
-It requires selecting USB ports that should be observed. All available ones are
-listed as separated entries in a "ports" subdirectory. Selecting is handled by
-echoing "1" to a chosen port.
-
-Please note that this trigger allows selecting multiple USB ports for a single
-LED. This can be useful in two cases:
-
-1) Device with single USB LED and few physical ports
-
-In such a case LED will be turned on as long as there is at least one connected
-USB device.
-
-2) Device with a physical port handled by few controllers
-
-Some devices may have one controller per PHY standard. E.g. USB 3.0 physical
-port may be handled by ohci-platform, ehci-platform and xhci-hcd. If there is
-only one LED user will most likely want to assign ports from all 3 hubs.
-
-
-This trigger can be activated from user space on led class devices as shown
-below:
-
-  echo usbport > trigger
-
-This adds sysfs attributes to the LED that are documented in:
-Documentation/ABI/testing/sysfs-class-led-trigger-usbport
-
-Example use-case:
-
-  echo usbport > trigger
-  echo 1 > ports/usb1-port1
-  echo 1 > ports/usb2-port1
-  cat ports/usb1-port1
-  echo 0 > ports/usb1-port1
diff --git a/Documentation/leds/uleds.rst b/Documentation/leds/uleds.rst
new file mode 100644
index 000000000000..83221098009c
--- /dev/null
+++ b/Documentation/leds/uleds.rst
@@ -0,0 +1,37 @@
+==============
+Userspace LEDs
+==============
+
+The uleds driver supports userspace LEDs. This can be useful for testing
+triggers and can also be used to implement virtual LEDs.
+
+
+Usage
+=====
+
+When the driver is loaded, a character device is created at /dev/uleds. To
+create a new LED class device, open /dev/uleds and write a uleds_user_dev
+structure to it (found in kernel public header file linux/uleds.h)::
+
+    #define LED_MAX_NAME_SIZE 64
+
+    struct uleds_user_dev {
+	char name[LED_MAX_NAME_SIZE];
+    };
+
+A new LED class device will be created with the name given. The name can be
+any valid sysfs device node name, but consider using the LED class naming
+convention of "devicename:color:function".
+
+The current brightness is found by reading a single byte from the character
+device. Values are unsigned: 0 to 255. Reading will block until the brightness
+changes. The device node can also be polled to notify when the brightness value
+changes.
+
+The LED class device will be removed when the open file handle to /dev/uleds
+is closed.
+
+Multiple LED class devices are created by opening additional file handles to
+/dev/uleds.
+
+See tools/leds/uledmon.c for an example userspace program.
diff --git a/Documentation/leds/uleds.txt b/Documentation/leds/uleds.txt
deleted file mode 100644
index 13e375a580f9..000000000000
--- a/Documentation/leds/uleds.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Userspace LEDs
-==============
-
-The uleds driver supports userspace LEDs. This can be useful for testing
-triggers and can also be used to implement virtual LEDs.
-
-
-Usage
-=====
-
-When the driver is loaded, a character device is created at /dev/uleds. To
-create a new LED class device, open /dev/uleds and write a uleds_user_dev
-structure to it (found in kernel public header file linux/uleds.h).
-
-    #define LED_MAX_NAME_SIZE 64
-
-    struct uleds_user_dev {
-        char name[LED_MAX_NAME_SIZE];
-    };
-
-A new LED class device will be created with the name given. The name can be
-any valid sysfs device node name, but consider using the LED class naming
-convention of "devicename:color:function".
-
-The current brightness is found by reading a single byte from the character
-device. Values are unsigned: 0 to 255. Reading will block until the brightness
-changes. The device node can also be polled to notify when the brightness value
-changes.
-
-The LED class device will be removed when the open file handle to /dev/uleds
-is closed.
-
-Multiple LED class devices are created by opening additional file handles to
-/dev/uleds.
-
-See tools/leds/uledmon.c for an example userspace program.
-- 
cgit 


From 2605085fba22792f3d4a6b856c7c5a05492d1fde Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 28 Jun 2019 09:12:26 -0300
Subject: dt: leds-lm36274.txt: fix a broken reference to ti-lmu.txt

There's a typo there:
	ti_lmu.txt -> ti-lmu.txt

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Dan Murphy <dmurphy@ti.com>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 Documentation/devicetree/bindings/leds/leds-lm36274.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/leds/leds-lm36274.txt b/Documentation/devicetree/bindings/leds/leds-lm36274.txt
index 456a589c65f0..39c230d59a4d 100644
--- a/Documentation/devicetree/bindings/leds/leds-lm36274.txt
+++ b/Documentation/devicetree/bindings/leds/leds-lm36274.txt
@@ -6,7 +6,7 @@ up to 29V total output voltage. The 11-bit LED current is programmable via
 the I2C bus and/or controlled via a logic level PWM input from 60 uA to 30 mA.
 
 Parent device properties are documented in
-Documentation/devicetree/bindings/mfd/ti_lmu.txt
+Documentation/devicetree/bindings/mfd/ti-lmu.txt
 
 Regulator properties are documented in
 Documentation/devicetree/bindings/regulator/lm363x-regulator.txt
-- 
cgit 


From 9d3de3c58347623b5e71d554628a1571cd3142ad Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 27 Jun 2019 17:31:43 +0200
Subject: dt-bindings: net: Add YAML schemas for the generic Ethernet options

The Ethernet controllers have a good number of generic options that can be
needed in a device tree. Add a YAML schemas for those.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../bindings/net/ethernet-controller.yaml          | 206 +++++++++++++++++++++
 Documentation/devicetree/bindings/net/ethernet.txt |  68 +------
 .../devicetree/bindings/net/fixed-link.txt         |  55 +-----
 3 files changed, 208 insertions(+), 121 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/ethernet-controller.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
new file mode 100644
index 000000000000..0e7c31794ae6
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -0,0 +1,206 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/ethernet-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ethernet Controller Generic Binding
+
+maintainers:
+  - David S. Miller <davem@davemloft.net>
+
+properties:
+  $nodename:
+    pattern: "^ethernet(@.*)?$"
+
+  local-mac-address:
+    allOf:
+      - $ref: /schemas/types.yaml#definitions/uint8-array
+      - items:
+          - minItems: 6
+            maxItems: 6
+    description:
+      Specifies the MAC address that was assigned to the network device.
+
+  mac-address:
+    allOf:
+      - $ref: /schemas/types.yaml#definitions/uint8-array
+      - items:
+          - minItems: 6
+            maxItems: 6
+    description:
+      Specifies the MAC address that was last used by the boot
+      program; should be used in cases where the MAC address assigned
+      to the device by the boot program is different from the
+      local-mac-address property.
+
+  max-frame-size:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description:
+      Maximum transfer unit (IEEE defined MTU), rather than the
+      maximum frame size (there\'s contradiction in the Devicetree
+      Specification).
+
+  max-speed:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description:
+      Specifies maximum speed in Mbit/s supported by the device.
+
+  nvmem-cells:
+    maxItems: 1
+    description:
+      Reference to an nvmem node for the MAC address
+
+  nvmem-cells-names:
+    const: mac-address
+
+  phy-connection-type:
+    description:
+      Operation mode of the PHY interface
+    enum:
+      # There is not a standard bus between the MAC and the PHY,
+      # something proprietary is being used to embed the PHY in the
+      # MAC.
+      - internal
+      - mii
+      - gmii
+      - sgmii
+      - qsgmii
+      - tbi
+      - rev-mii
+      - rmii
+
+      # RX and TX delays are added by the MAC when required
+      - rgmii
+
+      # RGMII with internal RX and TX delays provided by the PHY,
+      # the MAC should not add the RX or TX delays in this case
+      - rgmii-id
+
+      # RGMII with internal RX delay provided by the PHY, the MAC
+      # should not add an RX delay in this case
+      - rgmii-rxid
+
+      # RGMII with internal TX delay provided by the PHY, the MAC
+      # should not add an TX delay in this case
+      - rgmii-txid
+      - rtbi
+      - smii
+      - xgmii
+      - trgmii
+      - 1000base-x
+      - 2500base-x
+      - rxaui
+      - xaui
+
+      # 10GBASE-KR, XFI, SFI
+      - 10gbase-kr
+      - usxgmii
+
+  phy-mode:
+    $ref: "#/properties/phy-connection-type"
+
+  phy-handle:
+    $ref: /schemas/types.yaml#definitions/phandle
+    description:
+      Specifies a reference to a node representing a PHY device.
+
+  phy:
+    $ref: "#/properties/phy-handle"
+    deprecated: true
+
+  phy-device:
+    $ref: "#/properties/phy-handle"
+    deprecated: true
+
+  rx-fifo-depth:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description:
+      The size of the controller\'s receive fifo in bytes. This is used
+      for components that can have configurable receive fifo sizes,
+      and is useful for determining certain configuration settings
+      such as flow control thresholds.
+
+  tx-fifo-depth:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description:
+      The size of the controller\'s transmit fifo in bytes. This
+      is used for components that can have configurable fifo sizes.
+
+  managed:
+    allOf:
+      - $ref: /schemas/types.yaml#definitions/string
+      - default: auto
+        enum:
+          - auto
+          - in-band-status
+    description:
+      Specifies the PHY management type. If auto is set and fixed-link
+      is not specified, it uses MDIO for management.
+
+  fixed-link:
+    allOf:
+      - if:
+          type: array
+        then:
+          deprecated: true
+          minItems: 1
+          maxItems: 1
+          items:
+            items:
+              - minimum: 0
+                maximum: 31
+                description:
+                  Emulated PHY ID, choose any but unique to the all
+                  specified fixed-links
+
+              - enum: [0, 1]
+                description:
+                  Duplex configuration. 0 for half duplex or 1 for
+                  full duplex
+
+              - enum: [10, 100, 1000]
+                description:
+                  Link speed in Mbits/sec.
+
+              - enum: [0, 1]
+                description:
+                  Pause configuration. 0 for no pause, 1 for pause
+
+              - enum: [0, 1]
+                description:
+                  Asymmetric pause configuration. 0 for no asymmetric
+                  pause, 1 for asymmetric pause
+
+
+      - if:
+          type: object
+        then:
+          properties:
+            speed:
+              allOf:
+                - $ref: /schemas/types.yaml#definitions/uint32
+                - enum: [10, 100, 1000]
+              description:
+                Link speed.
+
+            full-duplex:
+              $ref: /schemas/types.yaml#definitions/flag
+              description:
+                Indicates that full-duplex is used. When absent, half
+                duplex is assumed.
+
+            asym-pause:
+              $ref: /schemas/types.yaml#definitions/flag
+              description:
+                Indicates that asym_pause should be enabled.
+
+            link-gpios:
+              maxItems: 1
+              description:
+                GPIO to determine if the link is up
+
+          required:
+            - speed
+
+...
diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
index e88c3641d613..5df413d01be2 100644
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -1,67 +1 @@
-The following properties are common to the Ethernet controllers:
-
-NOTE: All 'phy*' properties documented below are Ethernet specific. For the
-generic PHY 'phys' property, see
-Documentation/devicetree/bindings/phy/phy-bindings.txt.
-
-- mac-address: array of 6 bytes, specifies the MAC address that was last used by
-  the boot program; should be used in cases where the MAC address assigned to
-  the device by the boot program is different from the "local-mac-address"
-  property;
-- local-mac-address: array of 6 bytes, specifies the MAC address that was
-  assigned to the network device;
-- nvmem-cells: phandle, reference to an nvmem node for the MAC address
-- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used
-- max-speed: number, specifies maximum speed in Mbit/s supported by the device;
-- max-frame-size: number, maximum transfer unit (IEEE defined MTU), rather than
-  the maximum frame size (there's contradiction in the Devicetree
-  Specification).
-- phy-mode: string, operation mode of the PHY interface. This is now a de-facto
-  standard property; supported values are:
-  * "internal" (Internal means there is not a standard bus between the MAC and
-     the PHY, something proprietary is being used to embed the PHY in the MAC.)
-  * "mii"
-  * "gmii"
-  * "sgmii"
-  * "qsgmii"
-  * "tbi"
-  * "rev-mii"
-  * "rmii"
-  * "rgmii" (RX and TX delays are added by the MAC when required)
-  * "rgmii-id" (RGMII with internal RX and TX delays provided by the PHY, the
-     MAC should not add the RX or TX delays in this case)
-  * "rgmii-rxid" (RGMII with internal RX delay provided by the PHY, the MAC
-     should not add an RX delay in this case)
-  * "rgmii-txid" (RGMII with internal TX delay provided by the PHY, the MAC
-     should not add an TX delay in this case)
-  * "rtbi"
-  * "smii"
-  * "xgmii"
-  * "trgmii"
-  * "1000base-x",
-  * "2500base-x",
-  * "rxaui"
-  * "xaui"
-  * "10gbase-kr" (10GBASE-KR, XFI, SFI)
-- phy-connection-type: the same as "phy-mode" property but described in the
-  Devicetree Specification;
-- phy-handle: phandle, specifies a reference to a node representing a PHY
-  device; this property is described in the Devicetree Specification and so
-  preferred;
-- phy: the same as "phy-handle" property, not recommended for new bindings.
-- phy-device: the same as "phy-handle" property, not recommended for new
-  bindings.
-- rx-fifo-depth: the size of the controller's receive fifo in bytes. This
-  is used for components that can have configurable receive fifo sizes,
-  and is useful for determining certain configuration settings such as
-  flow control thresholds.
-- tx-fifo-depth: the size of the controller's transmit fifo in bytes. This
-  is used for components that can have configurable fifo sizes.
-- managed: string, specifies the PHY management type. Supported values are:
-  "auto", "in-band-status". "auto" is the default, it usess MDIO for
-  management if fixed-link is not specified.
-
-Child nodes of the Ethernet controller are typically the individual PHY devices
-connected via the MDIO bus (sometimes the MDIO bus controller is separate).
-They are described in the phy.txt file in this same directory.
-For non-MDIO PHY management see fixed-link.txt.
+This file has moved to ethernet-controller.yaml.
diff --git a/Documentation/devicetree/bindings/net/fixed-link.txt b/Documentation/devicetree/bindings/net/fixed-link.txt
index ec5d889fe3d8..5df413d01be2 100644
--- a/Documentation/devicetree/bindings/net/fixed-link.txt
+++ b/Documentation/devicetree/bindings/net/fixed-link.txt
@@ -1,54 +1 @@
-Fixed link Device Tree binding
-------------------------------
-
-Some Ethernet MACs have a "fixed link", and are not connected to a
-normal MDIO-managed PHY device. For those situations, a Device Tree
-binding allows to describe a "fixed link".
-
-Such a fixed link situation is described by creating a 'fixed-link'
-sub-node of the Ethernet MAC device node, with the following
-properties:
-
-* 'speed' (integer, mandatory), to indicate the link speed. Accepted
-  values are 10, 100 and 1000
-* 'full-duplex' (boolean, optional), to indicate that full duplex is
-  used. When absent, half duplex is assumed.
-* 'pause' (boolean, optional), to indicate that pause should be
-  enabled.
-* 'asym-pause' (boolean, optional), to indicate that asym_pause should
-  be enabled.
-* 'link-gpios' ('gpio-list', optional), to indicate if a gpio can be read
-  to determine if the link is up.
-
-Old, deprecated 'fixed-link' binding:
-
-* A 'fixed-link' property in the Ethernet MAC node, with 5 cells, of the
-  form <a b c d e> with the following accepted values:
-  - a: emulated PHY ID, choose any but but unique to the all specified
-    fixed-links, from 0 to 31
-  - b: duplex configuration: 0 for half duplex, 1 for full duplex
-  - c: link speed in Mbits/sec, accepted values are: 10, 100 and 1000
-  - d: pause configuration: 0 for no pause, 1 for pause
-  - e: asymmetric pause configuration: 0 for no asymmetric pause, 1 for
-    asymmetric pause
-
-Examples:
-
-ethernet@0 {
-	...
-	fixed-link {
-	      speed = <1000>;
-	      full-duplex;
-	};
-	...
-};
-
-ethernet@1 {
-	...
-	fixed-link {
-	      speed = <1000>;
-	      pause;
-	      link-gpios = <&gpio0 12 GPIO_ACTIVE_HIGH>;
-	};
-	...
-};
+This file has moved to ethernet-controller.yaml.
-- 
cgit 


From d8704342c10990c961600b6293eed5e38ad6bc9e Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 27 Jun 2019 17:31:44 +0200
Subject: dt-bindings: net: Add a YAML schemas for the generic PHY options

The networking PHYs have a number of available device tree properties that
can be used in their device tree node. Add a YAML schemas for those.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
[robh: drop maxItems from reg]
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/net/ethernet-phy.yaml      | 178 +++++++++++++++++++++
 Documentation/devicetree/bindings/net/phy.txt      |  80 +--------
 2 files changed, 179 insertions(+), 79 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/ethernet-phy.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
new file mode 100644
index 000000000000..b8d87135e11f
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@@ -0,0 +1,178 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/ethernet-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ethernet PHY Generic Binding
+
+maintainers:
+  - Andrew Lunn <andrew@lunn.ch>
+  - Florian Fainelli <f.fainelli@gmail.com>
+  - Heiner Kallweit <hkallweit1@gmail.com>
+
+# The dt-schema tools will generate a select statement first by using
+# the compatible, and second by using the node name if any. In our
+# case, the node name is the one we want to match on, while the
+# compatible is optional.
+select:
+  properties:
+    $nodename:
+      pattern: "^ethernet-phy(@[a-f0-9]+)?$"
+
+  required:
+    - $nodename
+
+properties:
+  $nodename:
+    pattern: "^ethernet-phy(@[a-f0-9]+)?$"
+
+  compatible:
+    oneOf:
+      - const: ethernet-phy-ieee802.3-c22
+        description: PHYs that implement IEEE802.3 clause 22
+      - const: ethernet-phy-ieee802.3-c45
+        description: PHYs that implement IEEE802.3 clause 45
+      - pattern: "^ethernet-phy-id[a-f0-9]{4}\\.[a-f0-9]{4}$"
+        description:
+          If the PHY reports an incorrect ID (or none at all) then the
+          compatible list may contain an entry with the correct PHY ID
+          in the above form.
+          The first group of digits is the 16 bit Phy Identifier 1
+          register, this is the chip vendor OUI bits 3:18. The
+          second group of digits is the Phy Identifier 2 register,
+          this is the chip vendor OUI bits 19:24, followed by 10
+          bits of a vendor specific ID.
+      - items:
+          - pattern: "^ethernet-phy-id[a-f0-9]{4}\\.[a-f0-9]{4}$"
+          - const: ethernet-phy-ieee802.3-c45
+
+  reg:
+    minimum: 0
+    maximum: 31
+    description:
+      The ID number for the PHY.
+
+  interrupts:
+    maxItems: 1
+
+  max-speed:
+    enum:
+      - 10
+      - 100
+      - 1000
+      - 2500
+      - 5000
+      - 10000
+      - 20000
+      - 25000
+      - 40000
+      - 50000
+      - 56000
+      - 100000
+      - 200000
+    description:
+      Maximum PHY supported speed in Mbits / seconds.
+
+  broken-turn-around:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      If set, indicates the PHY device does not correctly release
+      the turn around line low at the end of a MDIO transaction.
+
+  enet-phy-lane-swap:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      If set, indicates the PHY will swap the TX/RX lanes to
+      compensate for the board being designed with the lanes
+      swapped.
+
+  eee-broken-100tx:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Mark the corresponding energy efficient ethernet mode as
+      broken and request the ethernet to stop advertising it.
+
+  eee-broken-1000t:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Mark the corresponding energy efficient ethernet mode as
+      broken and request the ethernet to stop advertising it.
+
+  eee-broken-10gt:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Mark the corresponding energy efficient ethernet mode as
+      broken and request the ethernet to stop advertising it.
+
+  eee-broken-1000kx:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Mark the corresponding energy efficient ethernet mode as
+      broken and request the ethernet to stop advertising it.
+
+  eee-broken-10gkx4:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Mark the corresponding energy efficient ethernet mode as
+      broken and request the ethernet to stop advertising it.
+
+  eee-broken-10gkr:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Mark the corresponding energy efficient ethernet mode as
+      broken and request the ethernet to stop advertising it.
+
+  phy-is-integrated:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      If set, indicates that the PHY is integrated into the same
+      physical package as the Ethernet MAC. If needed, muxers
+      should be configured to ensure the integrated PHY is
+      used. The absence of this property indicates the muxers
+      should be configured so that the external PHY is used.
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    const: phy
+
+  reset-gpios:
+    maxItems: 1
+    description:
+      The GPIO phandle and specifier for the PHY reset signal.
+
+  reset-assert-us:
+    description:
+      Delay after the reset was asserted in microseconds. If this
+      property is missing the delay will be skipped.
+
+  reset-deassert-us:
+    description:
+      Delay after the reset was deasserted in microseconds. If
+      this property is missing the delay will be skipped.
+
+required:
+  - reg
+  - interrupts
+
+examples:
+  - |
+    ethernet {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        ethernet-phy@0 {
+            compatible = "ethernet-phy-id0141.0e90", "ethernet-phy-ieee802.3-c45";
+            interrupt-parent = <&PIC>;
+            interrupts = <35 1>;
+            reg = <0>;
+
+            resets = <&rst 8>;
+            reset-names = "phy";
+            reset-gpios = <&gpio1 4 1>;
+            reset-assert-us = <1000>;
+            reset-deassert-us = <2000>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/net/phy.txt b/Documentation/devicetree/bindings/net/phy.txt
index 9b9e5b1765dd..2399ee60caed 100644
--- a/Documentation/devicetree/bindings/net/phy.txt
+++ b/Documentation/devicetree/bindings/net/phy.txt
@@ -1,79 +1 @@
-PHY nodes
-
-Required properties:
-
- - interrupts : interrupt specifier for the sole interrupt.
- - reg : The ID number for the phy, usually a small integer
-
-Optional Properties:
-
-- compatible: Compatible list, may contain
-  "ethernet-phy-ieee802.3-c22" or "ethernet-phy-ieee802.3-c45" for
-  PHYs that implement IEEE802.3 clause 22 or IEEE802.3 clause 45
-  specifications. If neither of these are specified, the default is to
-  assume clause 22.
-
-  If the PHY reports an incorrect ID (or none at all) then the
-  "compatible" list may contain an entry with the correct PHY ID in the
-  form: "ethernet-phy-idAAAA.BBBB" where
-     AAAA - The value of the 16 bit Phy Identifier 1 register as
-            4 hex digits. This is the chip vendor OUI bits 3:18
-     BBBB - The value of the 16 bit Phy Identifier 2 register as
-            4 hex digits. This is the chip vendor OUI bits 19:24,
-            followed by 10 bits of a vendor specific ID.
-
-  The compatible list should not contain other values than those
-  listed here.
-
-- max-speed: Maximum PHY supported speed (10, 100, 1000...)
-
-- broken-turn-around: If set, indicates the PHY device does not correctly
-  release the turn around line low at the end of a MDIO transaction.
-
-- enet-phy-lane-swap: If set, indicates the PHY will swap the TX/RX lanes to
-  compensate for the board being designed with the lanes swapped.
-
-- enet-phy-lane-no-swap: If set, indicates that PHY will disable swap of the
-  TX/RX lanes. This property allows the PHY to work correcly after e.g. wrong
-  bootstrap configuration caused by issues in PCB layout design.
-
-- eee-broken-100tx:
-- eee-broken-1000t:
-- eee-broken-10gt:
-- eee-broken-1000kx:
-- eee-broken-10gkx4:
-- eee-broken-10gkr:
-  Mark the corresponding energy efficient ethernet mode as broken and
-  request the ethernet to stop advertising it.
-
-- phy-is-integrated: If set, indicates that the PHY is integrated into the same
-  physical package as the Ethernet MAC. If needed, muxers should be configured
-  to ensure the integrated PHY is used. The absence of this property indicates
-  the muxers should be configured so that the external PHY is used.
-
-- resets: The reset-controller phandle and specifier for the PHY reset signal.
-
-- reset-names: Must be "phy" for the PHY reset signal.
-
-- reset-gpios: The GPIO phandle and specifier for the PHY reset signal.
-
-- reset-assert-us: Delay after the reset was asserted in microseconds.
-  If this property is missing the delay will be skipped.
-
-- reset-deassert-us: Delay after the reset was deasserted in microseconds.
-  If this property is missing the delay will be skipped.
-
-Example:
-
-ethernet-phy@0 {
-	compatible = "ethernet-phy-id0141.0e90", "ethernet-phy-ieee802.3-c22";
-	interrupt-parent = <&PIC>;
-	interrupts = <35 IRQ_TYPE_EDGE_RISING>;
-	reg = <0>;
-
-	resets = <&rst 8>;
-	reset-names = "phy";
-	reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
-	reset-assert-us = <1000>;
-	reset-deassert-us = <2000>;
-};
+This file has moved to ethernet-phy.yaml.
-- 
cgit 


From 62d77ff7ecbff1377d5a167f18156bc69cdbaf25 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 27 Jun 2019 17:31:45 +0200
Subject: dt-bindings: net: Add a YAML schemas for the generic MDIO options

The MDIO buses have a number of available device tree properties that can
be used in their device tree node. Add a YAML schemas for those.

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/net/mdio.txt  | 38 +-----------------
 Documentation/devicetree/bindings/net/mdio.yaml | 51 +++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 37 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/mdio.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/mdio.txt b/Documentation/devicetree/bindings/net/mdio.txt
index e3e1603f256c..cf8a0105488e 100644
--- a/Documentation/devicetree/bindings/net/mdio.txt
+++ b/Documentation/devicetree/bindings/net/mdio.txt
@@ -1,37 +1 @@
-Common MDIO bus properties.
-
-These are generic properties that can apply to any MDIO bus.
-
-Optional properties:
-- reset-gpios: One GPIO that control the RESET lines of all PHYs on that MDIO
-  bus.
-- reset-delay-us: RESET pulse width in microseconds.
-
-A list of child nodes, one per device on the bus is expected. These
-should follow the generic phy.txt, or a device specific binding document.
-
-The 'reset-delay-us' indicates the RESET signal pulse width in microseconds and
-applies to all PHY devices. It must therefore be appropriately determined based
-on all PHY requirements (maximum value of all per-PHY RESET pulse widths).
-
-Example :
-This example shows these optional properties, plus other properties
-required for the TI Davinci MDIO driver.
-
-	davinci_mdio: ethernet@5c030000 {
-		compatible = "ti,davinci_mdio";
-		reg = <0x5c030000 0x1000>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		reset-gpios = <&gpio2 5 GPIO_ACTIVE_LOW>;
-		reset-delay-us = <2>;
-
-		ethphy0: ethernet-phy@1 {
-			reg = <1>;
-		};
-
-		ethphy1: ethernet-phy@3 {
-			reg = <3>;
-		};
-	};
+This file has moved to mdio.yaml.
diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml
new file mode 100644
index 000000000000..b8fa8251c4bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/mdio.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/mdio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MDIO Bus Generic Binding
+
+maintainers:
+  - Andrew Lunn <andrew@lunn.ch>
+  - Florian Fainelli <f.fainelli@gmail.com>
+  - Heiner Kallweit <hkallweit1@gmail.com>
+
+description:
+  These are generic properties that can apply to any MDIO bus. Any
+  MDIO bus must have a list of child nodes, one per device on the
+  bus. These should follow the generic ethernet-phy.yaml document, or
+  a device specific binding document.
+
+properties:
+  reset-gpios:
+    maxItems: 1
+    description:
+      The phandle and specifier for the GPIO that controls the RESET
+      lines of all PHYs on that MDIO bus.
+
+  reset-delay-us:
+    description:
+      RESET pulse width in microseconds. It applies to all PHY devices
+      and must therefore be appropriately determined based on all PHY
+      requirements (maximum value of all per-PHY RESET pulse widths).
+
+examples:
+  - |
+    davinci_mdio: mdio@5c030000 {
+        compatible = "ti,davinci_mdio";
+        reg = <0x5c030000 0x1000>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        reset-gpios = <&gpio2 5 1>;
+        reset-delay-us = <2>;
+
+        ethphy0: ethernet-phy@1 {
+            reg = <1>;
+        };
+
+        ethphy1: ethernet-phy@3 {
+            reg = <3>;
+        };
+    };
-- 
cgit 


From 4abe7a3a096b47fb3f645fd40dcd42f6ac566b31 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 27 Jun 2019 17:31:47 +0200
Subject: dt-bindings: net: phy: The interrupt property is not mandatory

Unlike what was initially claimed in the PHY binding, the interrupt
property of a PHY can be omitted, and the OS will turn to polling instead.

Document that.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/net/ethernet-phy.yaml | 1 -
 1 file changed, 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
index b8d87135e11f..f70f18ff821f 100644
--- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@@ -155,7 +155,6 @@ properties:
 
 required:
   - reg
-  - interrupts
 
 examples:
   - |
-- 
cgit 


From 8c5b094476256f641b3fd790cdb86c9bdb2c9fec Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 27 Jun 2019 17:31:48 +0200
Subject: dt-bindings: net: sun4i-emac: Convert the binding to a schemas

Switch our Allwinner A10 EMAC controller binding to a YAML schema to enable
the DT validation.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../bindings/net/allwinner,sun4i-a10-emac.yaml     | 56 ++++++++++++++++++++++
 .../bindings/net/allwinner,sun4i-emac.txt          | 19 --------
 2 files changed, 56 insertions(+), 19 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
 delete mode 100644 Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
new file mode 100644
index 000000000000..792196bf4abd
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/allwinner,sun4i-a10-emac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 EMAC Ethernet Controller Device Tree Bindings
+
+allOf:
+  - $ref: "ethernet-controller.yaml#"
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  compatible:
+    const: allwinner,sun4i-a10-emac
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  allwinner,sram:
+    description: Phandle to the device SRAM
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - phy-handle
+  - allwinner,sram
+
+examples:
+  - |
+    emac: ethernet@1c0b000 {
+        compatible = "allwinner,sun4i-a10-emac";
+        reg = <0x01c0b000 0x1000>;
+        interrupts = <55>;
+        clocks = <&ahb_gates 17>;
+        phy-handle = <&phy0>;
+        allwinner,sram = <&emac_sram 1>;
+    };
+
+# FIXME: We should set it, but it would report all the generic
+# properties as additional properties.
+# additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt b/Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt
deleted file mode 100644
index e98118aef5f6..000000000000
--- a/Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-* Allwinner EMAC ethernet controller
-
-Required properties:
-- compatible: should be "allwinner,sun4i-a10-emac" (Deprecated:
-              "allwinner,sun4i-emac")
-- reg: address and length of the register set for the device.
-- interrupts: interrupt for the device
-- phy: see ethernet.txt file in the same directory.
-- clocks: A phandle to the reference clock for this device
-
-Example:
-
-emac: ethernet@1c0b000 {
-       compatible = "allwinner,sun4i-a10-emac";
-       reg = <0x01c0b000 0x1000>;
-       interrupts = <55>;
-       clocks = <&ahb_gates 17>;
-       phy = <&phy0>;
-};
-- 
cgit 


From 7a47b908156c94c2bd3ab8dc412d1983d92cca40 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 27 Jun 2019 17:31:49 +0200
Subject: dt-bindings: net: sun4i-mdio: Convert the binding to a schemas

Switch our Allwinner A10 MDIO controller binding to a YAML schema to enable
the DT validation.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../bindings/net/allwinner,sun4i-a10-mdio.yaml     | 70 ++++++++++++++++++++++
 .../bindings/net/allwinner,sun4i-mdio.txt          | 27 ---------
 2 files changed, 70 insertions(+), 27 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
 delete mode 100644 Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
new file mode 100644
index 000000000000..df24d9d969f7
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/allwinner,sun4i-a10-mdio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 MDIO Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+allOf:
+  - $ref: "mdio.yaml#"
+
+# Select every compatible, including the deprecated ones. This way, we
+# will be able to report a warning when we have that compatible, since
+# we will validate the node thanks to the select, but won't report it
+# as a valid value in the compatible property description
+select:
+  properties:
+    compatible:
+      enum:
+        - allwinner,sun4i-a10-mdio
+
+        # Deprecated
+        - allwinner,sun4i-mdio
+
+  required:
+    - compatible
+
+properties:
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun4i-a10-mdio
+
+  reg:
+    maxItems: 1
+
+  phy-supply:
+    description: PHY regulator
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    mdio@1c0b080 {
+        compatible = "allwinner,sun4i-a10-mdio";
+        reg = <0x01c0b080 0x14>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+        phy-supply = <&reg_emac_3v3>;
+
+        phy0: ethernet-phy@0 {
+            reg = <0>;
+        };
+    };
+
+# FIXME: We should set it, but it would report all the generic
+# properties as additional properties.
+# additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt b/Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt
deleted file mode 100644
index ab5b8613b0ef..000000000000
--- a/Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-* Allwinner A10 MDIO Ethernet Controller interface
-
-Required properties:
-- compatible: should be "allwinner,sun4i-a10-mdio"
-              (Deprecated: "allwinner,sun4i-mdio").
-- reg: address and length of the register set for the device.
-
-Optional properties:
-- phy-supply: phandle to a regulator if the PHY needs one
-
-Example at the SoC level:
-mdio@1c0b080 {
-	compatible = "allwinner,sun4i-a10-mdio";
-	reg = <0x01c0b080 0x14>;
-	#address-cells = <1>;
-	#size-cells = <0>;
-};
-
-And at the board level:
-
-mdio@1c0b080 {
-	phy-supply = <&reg_emac_3v3>;
-
-	phy0: ethernet-phy@0 {
-		reg = <0>;
-	};
-};
-- 
cgit 


From 7db3545aef5fae02a9ccb4b1e37e72900d7101bb Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 27 Jun 2019 17:31:50 +0200
Subject: dt-bindings: net: stmmac: Convert the binding to a schemas

Switch the STMMAC / Synopsys DesignWare MAC controller binding to a YAML
schema to enable the DT validation.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/net/snps,dwmac.yaml        | 390 +++++++++++++++++++++
 Documentation/devicetree/bindings/net/stmmac.txt   | 179 +---------
 2 files changed, 391 insertions(+), 178 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/snps,dwmac.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
new file mode 100644
index 000000000000..f2a5bae7fcfc
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -0,0 +1,390 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/snps,dwmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DesignWare MAC Device Tree Bindings
+
+maintainers:
+  - Alexandre Torgue <alexandre.torgue@st.com>
+  - Giuseppe Cavallaro <peppe.cavallaro@st.com>
+  - Jose Abreu <joabreu@synopsys.com>
+
+# Select every compatible, including the deprecated ones. This way, we
+# will be able to report a warning when we have that compatible, since
+# we will validate the node thanks to the select, but won't report it
+# as a valid value in the compatible property description
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - snps,dwmac
+          - snps,dwmac-3.50a
+          - snps,dwmac-3.610
+          - snps,dwmac-3.70a
+          - snps,dwmac-3.710
+          - snps,dwmac-4.00
+          - snps,dwmac-4.10a
+          - snps,dwxgmac
+          - snps,dwxgmac-2.10
+
+          # Deprecated
+          - st,spear600-gmac
+
+  required:
+    - compatible
+
+properties:
+
+  # We need to include all the compatibles from schemas that will
+  # include that schemas, otherwise compatible won't validate for
+  # those.
+  compatible:
+    contains:
+      enum:
+        - snps,dwmac
+        - snps,dwmac-3.50a
+        - snps,dwmac-3.610
+        - snps,dwmac-3.70a
+        - snps,dwmac-3.710
+        - snps,dwmac-4.00
+        - snps,dwmac-4.10a
+        - snps,dwxgmac
+        - snps,dwxgmac-2.10
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 3
+    items:
+      - description: Combined signal for various interrupt events
+      - description: The interrupt to manage the remote wake-up packet detection
+      - description: The interrupt that occurs when Rx exits the LPI state
+
+  interrupt-names:
+    minItems: 1
+    maxItems: 3
+    items:
+      - const: macirq
+      - const: eth_wake_irq
+      - const: eth_lpi
+
+  clocks:
+    minItems: 1
+    maxItems: 3
+    items:
+      - description: GMAC main clock
+      - description: Peripheral registers interface clock
+      - description:
+          PTP reference clock. This clock is used for programming the
+          Timestamp Addend Register. If not passed then the system
+          clock will be used and this is fine on some platforms.
+
+  clock-names:
+    additionalItems: true
+    contains:
+      enum:
+        - stmmaceth
+        - pclk
+        - ptp_ref
+
+  resets:
+    maxItems: 1
+    description:
+      MAC Reset signal.
+
+  reset-names:
+    const: stmmaceth
+
+  snps,axi-config:
+    $ref: /schemas/types.yaml#definitions/phandle
+    description:
+      AXI BUS Mode parameters. Phandle to a node that can contain the
+      following properties
+        * snps,lpi_en, enable Low Power Interface
+        * snps,xit_frm, unlock on WoL
+        * snps,wr_osr_lmt, max write outstanding req. limit
+        * snps,rd_osr_lmt, max read outstanding req. limit
+        * snps,kbbe, do not cross 1KiB boundary.
+        * snps,blen, this is a vector of supported burst length.
+        * snps,fb, fixed-burst
+        * snps,mb, mixed-burst
+        * snps,rb, rebuild INCRx Burst
+
+  snps,mtl-rx-config:
+    $ref: /schemas/types.yaml#definitions/phandle
+    description:
+      Multiple RX Queues parameters. Phandle to a node that can
+      contain the following properties
+        * snps,rx-queues-to-use, number of RX queues to be used in the
+          driver
+        * Choose one of these RX scheduling algorithms
+          * snps,rx-sched-sp, Strict priority
+          * snps,rx-sched-wsp, Weighted Strict priority
+        * For each RX queue
+          * Choose one of these modes
+            * snps,dcb-algorithm, Queue to be enabled as DCB
+            * snps,avb-algorithm, Queue to be enabled as AVB
+          * snps,map-to-dma-channel, Channel to map
+          * Specifiy specific packet routing
+            * snps,route-avcp, AV Untagged Control packets
+            * snps,route-ptp, PTP Packets
+            * snps,route-dcbcp, DCB Control Packets
+            * snps,route-up, Untagged Packets
+            * snps,route-multi-broad, Multicast & Broadcast Packets
+          * snps,priority, RX queue priority (Range 0x0 to 0xF)
+
+  snps,mtl-tx-config:
+    $ref: /schemas/types.yaml#definitions/phandle
+    description:
+      Multiple TX Queues parameters. Phandle to a node that can
+      contain the following properties
+        * snps,tx-queues-to-use, number of TX queues to be used in the
+          driver
+        * Choose one of these TX scheduling algorithms
+          * snps,tx-sched-wrr, Weighted Round Robin
+          * snps,tx-sched-wfq, Weighted Fair Queuing
+          * snps,tx-sched-dwrr, Deficit Weighted Round Robin
+          * snps,tx-sched-sp, Strict priority
+        * For each TX queue
+          * snps,weight, TX queue weight (if using a DCB weight
+            algorithm)
+          * Choose one of these modes
+            * snps,dcb-algorithm, TX queue will be working in DCB
+            * snps,avb-algorithm, TX queue will be working in AVB
+              [Attention] Queue 0 is reserved for legacy traffic
+                          and so no AVB is available in this queue.
+          * Configure Credit Base Shaper (if AVB Mode selected)
+            * snps,send_slope, enable Low Power Interface
+            * snps,idle_slope, unlock on WoL
+            * snps,high_credit, max write outstanding req. limit
+            * snps,low_credit, max read outstanding req. limit
+          * snps,priority, TX queue priority (Range 0x0 to 0xF)
+
+  snps,reset-gpio:
+    maxItems: 1
+    description:
+      PHY Reset GPIO
+
+  snps,reset-active-low:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Indicates that the PHY Reset is active low
+
+  snps,reset-delays-us:
+    allOf:
+      - $ref: /schemas/types.yaml#definitions/uint32-array
+      - minItems: 3
+        maxItems: 3
+    description:
+      Triplet of delays. The 1st cell is reset pre-delay in micro
+      seconds. The 2nd cell is reset pulse in micro seconds. The 3rd
+      cell is reset post-delay in micro seconds.
+
+  snps,aal:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Use Address-Aligned Beats
+
+  snps,fixed-burst:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Program the DMA to use the fixed burst mode
+
+  snps,mixed-burst:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Program the DMA to use the mixed burst mode
+
+  snps,force_thresh_dma_mode:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Force DMA to use the threshold mode for both tx and rx
+
+  snps,force_sf_dma_mode:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Force DMA to use the Store and Forward mode for both tx and
+      rx. This flag is ignored if force_thresh_dma_mode is set.
+
+  snps,en-tx-lpi-clockgating:
+    $ref: /schemas/types.yaml#definitions/flag
+    description:
+      Enable gating of the MAC TX clock during TX low-power mode
+
+  snps,multicast-filter-bins:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description:
+      Number of multicast filter hash bins supported by this device
+      instance
+
+  snps,perfect-filter-entries:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description:
+      Number of perfect filter entries supported by this device
+      instance
+
+  snps,ps-speed:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description:
+      Port selection speed that can be passed to the core when PCS
+      is supported. For example, this is used in case of SGMII and
+      MAC2MAC connection.
+
+  mdio:
+    type: object
+    description:
+      Creates and registers an MDIO bus.
+
+    properties:
+      compatible:
+        const: snps,dwmac-mdio
+
+    required:
+      - compatible
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+  - phy-connection-type
+
+dependencies:
+  snps,reset-active-low: ["snps,reset-gpio"]
+  snps,reset-delay-us: ["snps,reset-gpio"]
+
+allOf:
+  - $ref: "ethernet-controller.yaml#"
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - snps,dwxgmac
+              - snps,dwxgmac-2.10
+              - st,spear600-gmac
+
+    then:
+      properties:
+        snps,pbl:
+          allOf:
+            - $ref: /schemas/types.yaml#definitions/uint32
+            - enum: [2, 4, 8]
+          description:
+            Programmable Burst Length (tx and rx)
+
+        snps,txpbl:
+          allOf:
+            - $ref: /schemas/types.yaml#definitions/uint32
+            - enum: [2, 4, 8]
+          description:
+            Tx Programmable Burst Length. If set, DMA tx will use this
+            value rather than snps,pbl.
+
+        snps,rxpbl:
+          allOf:
+            - $ref: /schemas/types.yaml#definitions/uint32
+            - enum: [2, 4, 8]
+          description:
+            Rx Programmable Burst Length. If set, DMA rx will use this
+            value rather than snps,pbl.
+
+        snps,no-pbl-x8:
+          $ref: /schemas/types.yaml#definitions/flag
+          description:
+            Don\'t multiply the pbl/txpbl/rxpbl values by 8. For core
+            rev < 3.50, don\'t multiply the values by 4.
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - snps,dwmac-4.00
+              - snps,dwmac-4.10a
+              - snps,dwxgmac
+              - snps,dwxgmac-2.10
+              - st,spear600-gmac
+
+    then:
+        snps,tso:
+          $ref: /schemas/types.yaml#definitions/flag
+          description:
+            Enables the TSO feature otherwise it will be managed by
+            MAC HW capability register.
+
+examples:
+  - |
+    stmmac_axi_setup: stmmac-axi-config {
+        snps,wr_osr_lmt = <0xf>;
+        snps,rd_osr_lmt = <0xf>;
+        snps,blen = <256 128 64 32 0 0 0>;
+    };
+
+    mtl_rx_setup: rx-queues-config {
+        snps,rx-queues-to-use = <1>;
+        snps,rx-sched-sp;
+        queue0 {
+            snps,dcb-algorithm;
+            snps,map-to-dma-channel = <0x0>;
+            snps,priority = <0x0>;
+        };
+    };
+
+    mtl_tx_setup: tx-queues-config {
+        snps,tx-queues-to-use = <2>;
+        snps,tx-sched-wrr;
+        queue0 {
+            snps,weight = <0x10>;
+            snps,dcb-algorithm;
+            snps,priority = <0x0>;
+        };
+
+        queue1 {
+            snps,avb-algorithm;
+            snps,send_slope = <0x1000>;
+            snps,idle_slope = <0x1000>;
+            snps,high_credit = <0x3E800>;
+            snps,low_credit = <0xFFC18000>;
+            snps,priority = <0x1>;
+        };
+    };
+
+    gmac0: ethernet@e0800000 {
+        compatible = "snps,dwxgmac-2.10", "snps,dwxgmac";
+        reg = <0xe0800000 0x8000>;
+        interrupt-parent = <&vic1>;
+        interrupts = <24 23 22>;
+        interrupt-names = "macirq", "eth_wake_irq", "eth_lpi";
+        mac-address = [000000000000]; /* Filled in by U-Boot */
+        max-frame-size = <3800>;
+        phy-connection-type = "gmii";
+        snps,multicast-filter-bins = <256>;
+        snps,perfect-filter-entries = <128>;
+        rx-fifo-depth = <16384>;
+        tx-fifo-depth = <16384>;
+        clocks = <&clock>;
+        clock-names = "stmmaceth";
+        snps,axi-config = <&stmmac_axi_setup>;
+        snps,mtl-rx-config = <&mtl_rx_setup>;
+        snps,mtl-tx-config = <&mtl_tx_setup>;
+        mdio0 {
+            #address-cells = <1>;
+            #size-cells = <0>;
+            compatible = "snps,dwmac-mdio";
+            phy1: ethernet-phy@0 {
+                reg = <0>;
+            };
+        };
+    };
+
+# FIXME: We should set it, but it would report all the generic
+# properties as additional properties.
+# additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt
index cb694062afff..7d48782767cb 100644
--- a/Documentation/devicetree/bindings/net/stmmac.txt
+++ b/Documentation/devicetree/bindings/net/stmmac.txt
@@ -1,178 +1 @@
-* STMicroelectronics 10/100/1000/2500/10000 Ethernet (GMAC/XGMAC)
-
-Required properties:
-- compatible: Should be "snps,dwmac-<ip_version>", "snps,dwmac" or
-	"snps,dwxgmac-<ip_version>", "snps,dwxgmac".
-	For backwards compatibility: "st,spear600-gmac" is also supported.
-- reg: Address and length of the register set for the device
-- interrupts: Should contain the STMMAC interrupts
-- interrupt-names: Should contain a list of interrupt names corresponding to
-	the interrupts in the interrupts property, if available.
-	Valid interrupt names are:
-  - "macirq" (combined signal for various interrupt events)
-  - "eth_wake_irq" (the interrupt to manage the remote wake-up packet detection)
-  - "eth_lpi" (the interrupt that occurs when Rx exits the LPI state)
-- phy-mode: See ethernet.txt file in the same directory.
-- snps,reset-gpio 	gpio number for phy reset.
-- snps,reset-active-low boolean flag to indicate if phy reset is active low.
-- snps,reset-delays-us  is triplet of delays
-	The 1st cell is reset pre-delay in micro seconds.
-	The 2nd cell is reset pulse in micro seconds.
-	The 3rd cell is reset post-delay in micro seconds.
-
-Optional properties:
-- resets: Should contain a phandle to the STMMAC reset signal, if any
-- reset-names: Should contain the reset signal name "stmmaceth", if a
-	reset phandle is given
-- max-frame-size: See ethernet.txt file in the same directory
-- clocks: If present, the first clock should be the GMAC main clock and
-  the second clock should be peripheral's register interface clock. Further
-  clocks may be specified in derived bindings.
-- clock-names: One name for each entry in the clocks property, the
-  first one should be "stmmaceth" and the second one should be "pclk".
-- ptp_ref: this is the PTP reference clock; in case of the PTP is available
-  this clock is used for programming the Timestamp Addend Register. If not
-  passed then the system clock will be used and this is fine on some
-  platforms.
-- tx-fifo-depth: See ethernet.txt file in the same directory
-- rx-fifo-depth: See ethernet.txt file in the same directory
-- snps,pbl		Programmable Burst Length (tx and rx)
-- snps,txpbl		Tx Programmable Burst Length. Only for GMAC and newer.
-			If set, DMA tx will use this value rather than snps,pbl.
-- snps,rxpbl		Rx Programmable Burst Length. Only for GMAC and newer.
-			If set, DMA rx will use this value rather than snps,pbl.
-- snps,no-pbl-x8	Don't multiply the pbl/txpbl/rxpbl values by 8.
-			For core rev < 3.50, don't multiply the values by 4.
-- snps,aal		Address-Aligned Beats
-- snps,fixed-burst	Program the DMA to use the fixed burst mode
-- snps,mixed-burst	Program the DMA to use the mixed burst mode
-- snps,force_thresh_dma_mode	Force DMA to use the threshold mode for
-				both tx and rx
-- snps,force_sf_dma_mode	Force DMA to use the Store and Forward
-				mode for both tx and rx. This flag is
-				ignored if force_thresh_dma_mode is set.
-- snps,en-tx-lpi-clockgating	Enable gating of the MAC TX clock during
-				TX low-power mode
-- snps,multicast-filter-bins:	Number of multicast filter hash bins
-				supported by this device instance
-- snps,perfect-filter-entries:	Number of perfect filter entries supported
-				by this device instance
-- snps,ps-speed: port selection speed that can be passed to the core when
-		 PCS is supported. For example, this is used in case of SGMII
-		 and MAC2MAC connection.
-- snps,tso: this enables the TSO feature otherwise it will be managed by
-		 MAC HW capability register. Only for GMAC4 and newer.
-- AXI BUS Mode parameters: below the list of all the parameters to program the
-			   AXI register inside the DMA module:
-	- snps,lpi_en: enable Low Power Interface
-	- snps,xit_frm: unlock on WoL
-	- snps,wr_osr_lmt: max write outstanding req. limit
-	- snps,rd_osr_lmt: max read outstanding req. limit
-	- snps,kbbe: do not cross 1KiB boundary.
-	- snps,blen: this is a vector of supported burst length.
-	- snps,fb: fixed-burst
-	- snps,mb: mixed-burst
-	- snps,rb: rebuild INCRx Burst
-- mdio: with compatible = "snps,dwmac-mdio", create and register mdio bus.
-- Multiple RX Queues parameters: below the list of all the parameters to
-				 configure the multiple RX queues:
-	- snps,rx-queues-to-use: number of RX queues to be used in the driver
-	- Choose one of these RX scheduling algorithms:
-		- snps,rx-sched-sp: Strict priority
-		- snps,rx-sched-wsp: Weighted Strict priority
-	- For each RX queue
-		- Choose one of these modes:
-			- snps,dcb-algorithm: Queue to be enabled as DCB
-			- snps,avb-algorithm: Queue to be enabled as AVB
-		- snps,map-to-dma-channel: Channel to map
-		- Specifiy specific packet routing:
-			- snps,route-avcp: AV Untagged Control packets
-			- snps,route-ptp: PTP Packets
-			- snps,route-dcbcp: DCB Control Packets
-			- snps,route-up: Untagged Packets
-			- snps,route-multi-broad: Multicast & Broadcast Packets
-		- snps,priority: RX queue priority (Range: 0x0 to 0xF)
-- Multiple TX Queues parameters: below the list of all the parameters to
-				 configure the multiple TX queues:
-	- snps,tx-queues-to-use: number of TX queues to be used in the driver
-	- Choose one of these TX scheduling algorithms:
-		- snps,tx-sched-wrr: Weighted Round Robin
-		- snps,tx-sched-wfq: Weighted Fair Queuing
-		- snps,tx-sched-dwrr: Deficit Weighted Round Robin
-		- snps,tx-sched-sp: Strict priority
-	- For each TX queue
-		- snps,weight: TX queue weight (if using a DCB weight algorithm)
-		- Choose one of these modes:
-			- snps,dcb-algorithm: TX queue will be working in DCB
-			- snps,avb-algorithm: TX queue will be working in AVB
-			  [Attention] Queue 0 is reserved for legacy traffic
-			  and so no AVB is available in this queue.
-		- Configure Credit Base Shaper (if AVB Mode selected):
-			- snps,send_slope: enable Low Power Interface
-			- snps,idle_slope: unlock on WoL
-			- snps,high_credit: max write outstanding req. limit
-			- snps,low_credit: max read outstanding req. limit
-		- snps,priority: TX queue priority (Range: 0x0 to 0xF)
-Examples:
-
-	stmmac_axi_setup: stmmac-axi-config {
-		snps,wr_osr_lmt = <0xf>;
-		snps,rd_osr_lmt = <0xf>;
-		snps,blen = <256 128 64 32 0 0 0>;
-	};
-
-	mtl_rx_setup: rx-queues-config {
-		snps,rx-queues-to-use = <1>;
-		snps,rx-sched-sp;
-		queue0 {
-			snps,dcb-algorithm;
-			snps,map-to-dma-channel = <0x0>;
-			snps,priority = <0x0>;
-		};
-	};
-
-	mtl_tx_setup: tx-queues-config {
-		snps,tx-queues-to-use = <2>;
-		snps,tx-sched-wrr;
-		queue0 {
-			snps,weight = <0x10>;
-			snps,dcb-algorithm;
-			snps,priority = <0x0>;
-		};
-
-		queue1 {
-			snps,avb-algorithm;
-			snps,send_slope = <0x1000>;
-			snps,idle_slope = <0x1000>;
-			snps,high_credit = <0x3E800>;
-			snps,low_credit = <0xFFC18000>;
-			snps,priority = <0x1>;
-		};
-	};
-
-	gmac0: ethernet@e0800000 {
-		compatible = "st,spear600-gmac";
-		reg = <0xe0800000 0x8000>;
-		interrupt-parent = <&vic1>;
-		interrupts = <24 23 22>;
-		interrupt-names = "macirq", "eth_wake_irq", "eth_lpi";
-		mac-address = [000000000000]; /* Filled in by U-Boot */
-		max-frame-size = <3800>;
-		phy-mode = "gmii";
-		snps,multicast-filter-bins = <256>;
-		snps,perfect-filter-entries = <128>;
-		rx-fifo-depth = <16384>;
-		tx-fifo-depth = <16384>;
-		clocks = <&clock>;
-		clock-names = "stmmaceth";
-		snps,axi-config = <&stmmac_axi_setup>;
-		mdio0 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "snps,dwmac-mdio";
-			phy1: ethernet-phy@0 {
-			};
-		};
-		snps,mtl-rx-config = <&mtl_rx_setup>;
-		snps,mtl-tx-config = <&mtl_tx_setup>;
-	};
+This file has moved to snps,dwmac.yaml.
-- 
cgit 


From 4b859450faa02aa48c0baf915b3faa090f2efb8e Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 27 Jun 2019 17:31:51 +0200
Subject: dt-bindings: net: sun7i-gmac: Convert the binding to a schemas

Switch our Allwinner A20 GMAC controller binding to a YAML schema to enable
the DT validation. Since that controller is based on a Synopsys IP, let's
add the validation to that schemas with a bunch of conditionals.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../bindings/net/allwinner,sun7i-a20-gmac.txt      | 27 ---------
 .../bindings/net/allwinner,sun7i-a20-gmac.yaml     | 65 ++++++++++++++++++++++
 .../devicetree/bindings/net/snps,dwmac.yaml        |  3 +
 3 files changed, 68 insertions(+), 27 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt
 create mode 100644 Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt
deleted file mode 100644
index 8b3f953656e3..000000000000
--- a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-* Allwinner GMAC ethernet controller
-
-This device is a platform glue layer for stmmac.
-Please see stmmac.txt for the other unchanged properties.
-
-Required properties:
- - compatible:  Should be "allwinner,sun7i-a20-gmac"
- - clocks: Should contain the GMAC main clock, and tx clock
-   The tx clock type should be "allwinner,sun7i-a20-gmac-clk"
- - clock-names: Should contain the clock names "stmmaceth",
-   and "allwinner_gmac_tx"
-
-Optional properties:
-- phy-supply: phandle to a regulator if the PHY needs one
-
-Examples:
-
-	gmac: ethernet@1c50000 {
-		compatible = "allwinner,sun7i-a20-gmac";
-		reg = <0x01c50000 0x10000>,
-		      <0x01c20164 0x4>;
-		interrupts = <0 85 1>;
-		interrupt-names = "macirq";
-		clocks = <&ahb_gates 49>, <&gmac_tx>;
-		clock-names = "stmmaceth", "allwinner_gmac_tx";
-		phy-mode = "mii";
-	};
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
new file mode 100644
index 000000000000..4b45798f0e68
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/allwinner,sun7i-a20-gmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A20 GMAC Device Tree Bindings
+
+allOf:
+  - $ref: "snps,dwmac.yaml#"
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  compatible:
+    const: allwinner,sun7i-a20-gmac
+
+  interrupts:
+    maxItems: 1
+
+  interrupt-names:
+    const: macirq
+
+  clocks:
+    items:
+      - description: GMAC main clock
+      - description: TX clock
+
+  clock-names:
+    items:
+      - const: stmmaceth
+      - const: allwinner_gmac_tx
+
+  phy-supply:
+    description:
+      PHY regulator
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+  - clocks
+  - clock-names
+  - phy-connection-type
+
+examples:
+  - |
+    gmac: ethernet@1c50000 {
+        compatible = "allwinner,sun7i-a20-gmac";
+        reg = <0x01c50000 0x10000>;
+        interrupts = <0 85 1>;
+        interrupt-names = "macirq";
+        clocks = <&ahb_gates 49>, <&gmac_tx>;
+        clock-names = "stmmaceth", "allwinner_gmac_tx";
+        phy-connection-type = "mii";
+    };
+
+# FIXME: We should set it, but it would report all the generic
+# properties as additional properties.
+# additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index f2a5bae7fcfc..49e1346516a7 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -44,6 +44,7 @@ properties:
   compatible:
     contains:
       enum:
+        - allwinner,sun7i-a20-gmac
         - snps,dwmac
         - snps,dwmac-3.50a
         - snps,dwmac-3.610
@@ -265,6 +266,7 @@ allOf:
         compatible:
           contains:
             enum:
+              - allwinner,sun7i-a20-gmac
               - snps,dwxgmac
               - snps,dwxgmac-2.10
               - st,spear600-gmac
@@ -305,6 +307,7 @@ allOf:
         compatible:
           contains:
             enum:
+              - allwinner,sun7i-a20-gmac
               - snps,dwmac-4.00
               - snps,dwmac-4.10a
               - snps,dwxgmac
-- 
cgit 


From 0569929d97997eea7126de180380dfcf2b45c9d1 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 27 Jun 2019 17:31:52 +0200
Subject: dt-bindings: net: sun8i-emac: Convert the binding to a schemas

Switch our Allwinner H3 EMAC controller binding to a YAML schema to enable
the DT validation. Since that controller is based on a Synopsys IP, let's
add the validation to that schemas with a bunch of conditionals.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../bindings/net/allwinner,sun8i-a83t-emac.yaml    | 321 +++++++++++++++++++++
 .../devicetree/bindings/net/dwmac-sun8i.txt        | 201 -------------
 .../devicetree/bindings/net/snps,dwmac.yaml        |  15 +
 3 files changed, 336 insertions(+), 201 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
 delete mode 100644 Documentation/devicetree/bindings/net/dwmac-sun8i.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
new file mode 100644
index 000000000000..6f68c7f5fc34
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
@@ -0,0 +1,321 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/allwinner,sun8i-a83t-gmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A83t EMAC Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  compatible:
+    oneOf:
+      - const: allwinner,sun8i-a83t-emac
+      - const: allwinner,sun8i-h3-emac
+      - const: allwinner,sun8i-r40-emac
+      - const: allwinner,sun8i-v3s-emac
+      - const: allwinner,sun50i-a64-emac
+      - items:
+        - const: allwinner,sun50i-h6-emac
+        - const: allwinner,sun50i-a64-emac
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  interrupt-names:
+    const: macirq
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    const: stmmaceth
+
+  syscon:
+    $ref: /schemas/types.yaml#definitions/phandle
+    description:
+      Phandle to the device containing the EMAC or GMAC clock
+      register
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - phy-connection-type
+  - phy-handle
+  - syscon
+
+allOf:
+  - $ref: "snps,dwmac.yaml#"
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun8i-a83t-emac
+              - allwinner,sun8i-h3-emac
+              - allwinner,sun8i-v3s-emac
+              - allwinner,sun50i-a64-emac
+
+    then:
+      properties:
+        allwinner,tx-delay-ps:
+          default: 0
+          minimum: 0
+          maximum: 700
+          multipleOf: 100
+          description:
+            External RGMII PHY TX clock delay chain value in ps.
+
+        allwinner,rx-delay-ps:
+          default: 0
+          minimum: 0
+          maximum: 3100
+          multipleOf: 100
+          description:
+            External RGMII PHY TX clock delay chain value in ps.
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun8i-r40-emac
+
+    then:
+      properties:
+        allwinner,rx-delay-ps:
+          default: 0
+          minimum: 0
+          maximum: 700
+          multipleOf: 100
+          description:
+            External RGMII PHY TX clock delay chain value in ps.
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun8i-h3-emac
+              - allwinner,sun8i-v3s-emac
+
+    then:
+      properties:
+        allwinner,leds-active-low:
+          $ref: /schemas/types.yaml#definitions/flag
+          description:
+            EPHY LEDs are active low.
+
+        mdio-mux:
+          type: object
+
+          properties:
+            compatible:
+              const: allwinner,sun8i-h3-mdio-mux
+
+            mdio-parent-bus:
+              $ref: /schemas/types.yaml#definitions/phandle
+              description:
+                Phandle to EMAC MDIO.
+
+            mdio@1:
+              type: object
+              description: Internal MDIO Bus
+
+              properties:
+                "#address-cells":
+                  const: 1
+
+                "#size-cells":
+                  const: 0
+
+                compatible:
+                  const: allwinner,sun8i-h3-mdio-internal
+
+                reg:
+                  const: 1
+
+              patternProperties:
+                "^ethernet-phy@[0-9a-f]$":
+                  type: object
+                  description:
+                    Integrated PHY node
+
+                  properties:
+                    clocks:
+                      maxItems: 1
+
+                    resets:
+                      maxItems: 1
+
+                  required:
+                    - clocks
+                    - resets
+
+
+            mdio@2:
+              type: object
+              description: External MDIO Bus (H3 only)
+
+              properties:
+                "#address-cells":
+                  const: 1
+
+                "#size-cells":
+                  const: 0
+
+                reg:
+                  const: 2
+
+          required:
+            - compatible
+            - mdio-parent-bus
+            - mdio@1
+
+examples:
+  - |
+    ethernet@1c0b000 {
+        compatible = "allwinner,sun8i-h3-emac";
+        syscon = <&syscon>;
+        reg = <0x01c0b000 0x104>;
+        interrupts = <0 82 1>;
+        interrupt-names = "macirq";
+        resets = <&ccu 12>;
+        reset-names = "stmmaceth";
+        clocks = <&ccu 27>;
+        clock-names = "stmmaceth";
+
+        phy-handle = <&int_mii_phy>;
+        phy-connection-type = "mii";
+        allwinner,leds-active-low;
+
+        mdio1: mdio {
+            #address-cells = <1>;
+            #size-cells = <0>;
+            compatible = "snps,dwmac-mdio";
+        };
+
+        mdio-mux {
+            compatible = "allwinner,sun8i-h3-mdio-mux";
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            mdio-parent-bus = <&mdio1>;
+
+            int_mii_phy: mdio@1 {
+                compatible = "allwinner,sun8i-h3-mdio-internal";
+                reg = <1>;
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                ethernet-phy@1 {
+                    reg = <1>;
+                    clocks = <&ccu 67>;
+                    resets = <&ccu 39>;
+                    phy-is-integrated;
+                };
+            };
+
+            mdio@2 {
+                reg = <2>;
+                #address-cells = <1>;
+                #size-cells = <0>;
+            };
+        };
+    };
+
+  - |
+    ethernet@1c0b000 {
+        compatible = "allwinner,sun8i-h3-emac";
+        syscon = <&syscon>;
+        reg = <0x01c0b000 0x104>;
+        interrupts = <0 82 1>;
+        interrupt-names = "macirq";
+        resets = <&ccu 12>;
+        reset-names = "stmmaceth";
+        clocks = <&ccu 27>;
+        clock-names = "stmmaceth";
+
+        phy-handle = <&ext_rgmii_phy>;
+        phy-connection-type = "rgmii";
+        allwinner,leds-active-low;
+
+        mdio2: mdio {
+            #address-cells = <1>;
+            #size-cells = <0>;
+            compatible = "snps,dwmac-mdio";
+        };
+
+        mdio-mux {
+            compatible = "allwinner,sun8i-h3-mdio-mux";
+            #address-cells = <1>;
+            #size-cells = <0>;
+            mdio-parent-bus = <&mdio2>;
+
+            mdio@1 {
+                compatible = "allwinner,sun8i-h3-mdio-internal";
+                reg = <1>;
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                ethernet-phy@1 {
+                    reg = <1>;
+                    clocks = <&ccu 67>;
+                    resets = <&ccu 39>;
+                };
+            };
+
+            mdio@2 {
+                reg = <2>;
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                ext_rgmii_phy: ethernet-phy@1 {
+                    reg = <1>;
+                };
+            };
+        };
+    };
+
+  - |
+    ethernet@1c0b000 {
+        compatible = "allwinner,sun8i-a83t-emac";
+        syscon = <&syscon>;
+        reg = <0x01c0b000 0x104>;
+        interrupts = <0 82 1>;
+        interrupt-names = "macirq";
+        resets = <&ccu 13>;
+        reset-names = "stmmaceth";
+        clocks = <&ccu 27>;
+        clock-names = "stmmaceth";
+        phy-handle = <&ext_rgmii_phy1>;
+        phy-connection-type = "rgmii";
+
+        mdio {
+            compatible = "snps,dwmac-mdio";
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            ext_rgmii_phy1: ethernet-phy@1 {
+                reg = <1>;
+            };
+        };
+    };
+
+# FIXME: We should set it, but it would report all the generic
+# properties as additional properties.
+# additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/net/dwmac-sun8i.txt b/Documentation/devicetree/bindings/net/dwmac-sun8i.txt
deleted file mode 100644
index 54c66d0611cb..000000000000
--- a/Documentation/devicetree/bindings/net/dwmac-sun8i.txt
+++ /dev/null
@@ -1,201 +0,0 @@
-* Allwinner sun8i GMAC ethernet controller
-
-This device is a platform glue layer for stmmac.
-Please see stmmac.txt for the other unchanged properties.
-
-Required properties:
-- compatible: must be one of the following string:
-		"allwinner,sun8i-a83t-emac"
-		"allwinner,sun8i-h3-emac"
-		"allwinner,sun8i-r40-gmac"
-		"allwinner,sun8i-v3s-emac"
-		"allwinner,sun50i-a64-emac"
-		"allwinner,sun50i-h6-emac", "allwinner-sun50i-a64-emac"
-- reg: address and length of the register for the device.
-- interrupts: interrupt for the device
-- interrupt-names: must be "macirq"
-- clocks: A phandle to the reference clock for this device
-- clock-names: must be "stmmaceth"
-- resets: A phandle to the reset control for this device
-- reset-names: must be "stmmaceth"
-- phy-mode: See ethernet.txt
-- phy-handle: See ethernet.txt
-- syscon: A phandle to the device containing the EMAC or GMAC clock register
-
-Optional properties:
-- allwinner,tx-delay-ps: TX clock delay chain value in ps.
-			 Range is 0-700. Default is 0.
-			 Unavailable for allwinner,sun8i-r40-gmac
-- allwinner,rx-delay-ps: RX clock delay chain value in ps.
-			 Range is 0-3100. Default is 0.
-			 Range is 0-700 for allwinner,sun8i-r40-gmac
-Both delay properties need to be a multiple of 100. They control the
-clock delay for external RGMII PHY. They do not apply to the internal
-PHY or external non-RGMII PHYs.
-
-Optional properties for the following compatibles:
-  - "allwinner,sun8i-h3-emac",
-  - "allwinner,sun8i-v3s-emac":
-- allwinner,leds-active-low: EPHY LEDs are active low
-
-Required child node of emac:
-- mdio bus node: should be named mdio with compatible "snps,dwmac-mdio"
-
-Required properties of the mdio node:
-- #address-cells: shall be 1
-- #size-cells: shall be 0
-
-The device node referenced by "phy" or "phy-handle" must be a child node
-of the mdio node. See phy.txt for the generic PHY bindings.
-
-The following compatibles require that the emac node have a mdio-mux child
-node called "mdio-mux":
-  - "allwinner,sun8i-h3-emac"
-  - "allwinner,sun8i-v3s-emac":
-Required properties for the mdio-mux node:
-  - compatible = "allwinner,sun8i-h3-mdio-mux"
-  - mdio-parent-bus: a phandle to EMAC mdio
-  - one child mdio for the integrated mdio with the compatible
-    "allwinner,sun8i-h3-mdio-internal"
-  - one child mdio for the external mdio if present (V3s have none)
-Required properties for the mdio-mux children node:
-  - reg: 1 for internal MDIO bus, 2 for external MDIO bus
-
-The following compatibles require a PHY node representing the integrated
-PHY, under the integrated MDIO bus node if an mdio-mux node is used:
-  - "allwinner,sun8i-h3-emac",
-  - "allwinner,sun8i-v3s-emac":
-
-Additional information regarding generic multiplexer properties can be found
-at Documentation/devicetree/bindings/net/mdio-mux.txt
-
-Required properties of the integrated phy node:
-- clocks: a phandle to the reference clock for the EPHY
-- resets: a phandle to the reset control for the EPHY
-- Must be a child of the integrated mdio
-
-Example with integrated PHY:
-emac: ethernet@1c0b000 {
-	compatible = "allwinner,sun8i-h3-emac";
-	syscon = <&syscon>;
-	reg = <0x01c0b000 0x104>;
-	interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
-	interrupt-names = "macirq";
-	resets = <&ccu RST_BUS_EMAC>;
-	reset-names = "stmmaceth";
-	clocks = <&ccu CLK_BUS_EMAC>;
-	clock-names = "stmmaceth";
-
-	phy-handle = <&int_mii_phy>;
-	phy-mode = "mii";
-	allwinner,leds-active-low;
-
-	mdio: mdio {
-		#address-cells = <1>;
-		#size-cells = <0>;
-		compatible = "snps,dwmac-mdio";
-	};
-
-	mdio-mux {
-		compatible = "mdio-mux", "allwinner,sun8i-h3-mdio-mux";
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		mdio-parent-bus = <&mdio>;
-
-		int_mdio: mdio@1 {
-			compatible = "allwinner,sun8i-h3-mdio-internal";
-			reg = <1>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			int_mii_phy: ethernet-phy@1 {
-				reg = <1>;
-				clocks = <&ccu CLK_BUS_EPHY>;
-				resets = <&ccu RST_BUS_EPHY>;
-				phy-is-integrated;
-			};
-		};
-		ext_mdio: mdio@2 {
-			reg = <2>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-		};
-	};
-};
-
-Example with external PHY:
-emac: ethernet@1c0b000 {
-	compatible = "allwinner,sun8i-h3-emac";
-	syscon = <&syscon>;
-	reg = <0x01c0b000 0x104>;
-	interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
-	interrupt-names = "macirq";
-	resets = <&ccu RST_BUS_EMAC>;
-	reset-names = "stmmaceth";
-	clocks = <&ccu CLK_BUS_EMAC>;
-	clock-names = "stmmaceth";
-
-	phy-handle = <&ext_rgmii_phy>;
-	phy-mode = "rgmii";
-	allwinner,leds-active-low;
-
-	mdio: mdio {
-		#address-cells = <1>;
-		#size-cells = <0>;
-		compatible = "snps,dwmac-mdio";
-	};
-
-	mdio-mux {
-		compatible = "allwinner,sun8i-h3-mdio-mux";
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		mdio-parent-bus = <&mdio>;
-
-		int_mdio: mdio@1 {
-			compatible = "allwinner,sun8i-h3-mdio-internal";
-			reg = <1>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			int_mii_phy: ethernet-phy@1 {
-				reg = <1>;
-				clocks = <&ccu CLK_BUS_EPHY>;
-				resets = <&ccu RST_BUS_EPHY>;
-			};
-		};
-		ext_mdio: mdio@2 {
-			reg = <2>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			ext_rgmii_phy: ethernet-phy@1 {
-				reg = <1>;
-			};
-		}:
-	};
-};
-
-Example with SoC without integrated PHY
-
-emac: ethernet@1c0b000 {
-	compatible = "allwinner,sun8i-a83t-emac";
-	syscon = <&syscon>;
-	reg = <0x01c0b000 0x104>;
-	interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
-	interrupt-names = "macirq";
-	resets = <&ccu RST_BUS_EMAC>;
-	reset-names = "stmmaceth";
-	clocks = <&ccu CLK_BUS_EMAC>;
-	clock-names = "stmmaceth";
-
-	phy-handle = <&ext_rgmii_phy>;
-	phy-mode = "rgmii";
-
-	mdio: mdio {
-		compatible = "snps,dwmac-mdio";
-		#address-cells = <1>;
-		#size-cells = <0>;
-		ext_rgmii_phy: ethernet-phy@1 {
-			reg = <1>;
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 49e1346516a7..07d1c347723d 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -45,6 +45,11 @@ properties:
     contains:
       enum:
         - allwinner,sun7i-a20-gmac
+        - allwinner,sun8i-a83t-emac
+        - allwinner,sun8i-h3-emac
+        - allwinner,sun8i-r40-emac
+        - allwinner,sun8i-v3s-emac
+        - allwinner,sun50i-a64-emac
         - snps,dwmac
         - snps,dwmac-3.50a
         - snps,dwmac-3.610
@@ -267,6 +272,11 @@ allOf:
           contains:
             enum:
               - allwinner,sun7i-a20-gmac
+              - allwinner,sun8i-a83t-emac
+              - allwinner,sun8i-h3-emac
+              - allwinner,sun8i-r40-emac
+              - allwinner,sun8i-v3s-emac
+              - allwinner,sun50i-a64-emac
               - snps,dwxgmac
               - snps,dwxgmac-2.10
               - st,spear600-gmac
@@ -308,6 +318,11 @@ allOf:
           contains:
             enum:
               - allwinner,sun7i-a20-gmac
+              - allwinner,sun8i-a83t-emac
+              - allwinner,sun8i-h3-emac
+              - allwinner,sun8i-r40-emac
+              - allwinner,sun8i-v3s-emac
+              - allwinner,sun50i-a64-emac
               - snps,dwmac-4.00
               - snps,dwmac-4.10a
               - snps,dwxgmac
-- 
cgit 


From f80b1dfc5680f3044b07b6dee8c198dde8aa313f Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Thu, 27 Jun 2019 17:31:53 +0200
Subject: dt-bindings: net: dwmac: Deprecate the PHY reset properties

Even though the DWMAC driver uses some driver specific properties, the PHY
core has a bunch of generic properties and can deal with them nicely.

Let's deprecate our specific properties.

Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/net/snps,dwmac.yaml | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 07d1c347723d..658726c620be 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -172,16 +172,19 @@ properties:
           * snps,priority, TX queue priority (Range 0x0 to 0xF)
 
   snps,reset-gpio:
+    deprecated: true
     maxItems: 1
     description:
       PHY Reset GPIO
 
   snps,reset-active-low:
+    deprecated: true
     $ref: /schemas/types.yaml#definitions/flag
     description:
       Indicates that the PHY Reset is active low
 
   snps,reset-delays-us:
+    deprecated: true
     allOf:
       - $ref: /schemas/types.yaml#definitions/uint32-array
       - minItems: 3
-- 
cgit 


From ef4db28c1f45cda6989bc8a8e45294894786d947 Mon Sep 17 00:00:00 2001
From: Brian Masney <masneyb@onstation.org>
Date: Mon, 20 May 2019 04:58:46 -0400
Subject: dt-bindings: backlight: lm3630a: correct schema validation

The '#address-cells' and '#size-cells' properties were not defined in
the lm3630a bindings and would cause the following error when
attempting to validate the examples against the schema:

Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.example.dt.yaml:
'#address-cells', '#size-cells' do not match any of the regexes:
'^led@[01]$', 'pinctrl-[0-9]+'

Correct this by adding those two properties.

While we're here, move the ti,linear-mapping-mode property to the
led@[01] child nodes to correct the following validation error:

Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.example.dt.yaml:
led@0: 'ti,linear-mapping-mode' does not match any of the regexes:
'pinctrl-[0-9]+'

Fixes: 32fcb75c66a0 ("dt-bindings: backlight: Add lm3630a bindings")
Signed-off-by: Brian Masney <masneyb@onstation.org>
Reported-by: Rob Herring <robh+dt@kernel.org>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Acked-by: Dan Murphy <dmurphy@ti.com>
[robh: also drop maxItems from child reg]
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../bindings/leds/backlight/lm3630a-backlight.yaml  | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.yaml b/Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.yaml
index 4d61fe0a98a4..dc129d9a329e 100644
--- a/Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.yaml
+++ b/Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.yaml
@@ -23,16 +23,17 @@ properties:
   reg:
     maxItems: 1
 
-  ti,linear-mapping-mode:
-    description: |
-      Enable linear mapping mode. If disabled, then it will use exponential
-      mapping mode in which the ramp up/down appears to have a more uniform
-      transition to the human eye.
-    type: boolean
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 0
 
 required:
   - compatible
   - reg
+  - '#address-cells'
+  - '#size-cells'
 
 patternProperties:
   "^led@[01]$":
@@ -48,7 +49,6 @@ patternProperties:
           in this property. The two current sinks can be controlled
           independently with both banks, or bank A can be configured to control
           both sinks with the led-sources property.
-        maxItems: 1
         minimum: 0
         maximum: 1
 
@@ -73,6 +73,13 @@ patternProperties:
         minimum: 0
         maximum: 255
 
+      ti,linear-mapping-mode:
+        description: |
+          Enable linear mapping mode. If disabled, then it will use exponential
+          mapping mode in which the ramp up/down appears to have a more uniform
+          transition to the human eye.
+        type: boolean
+
     required:
       - reg
 
-- 
cgit 


From 9ea6b821cef405a915bb5904627005fca8f01462 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 26 Jun 2019 16:27:21 -0600
Subject: dt-bindings: arm: Limit cpus schema to only check Arm 'cpu' nodes

Matching on the 'cpus' node was a bad choice because the schema is
incorrectly applied to non-Arm cpus nodes. As we now have a common cpus
schema which checks the general structure, it is also redundant to do so
in the Arm CPU schema.

The downside is one could conceivably mix different architecture's cpu
nodes or have typos in the compatible string. The latter problem pretty
much exists for every schema.

Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/cpus.yaml | 487 +++++++++++-------------
 1 file changed, 224 insertions(+), 263 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml
index 591bbd012d63..aa40b074b864 100644
--- a/Documentation/devicetree/bindings/arm/cpus.yaml
+++ b/Documentation/devicetree/bindings/arm/cpus.yaml
@@ -39,281 +39,242 @@ description: |+
   described below.
 
 properties:
-  $nodename:
-    const: cpus
-    description: Container of cpu nodes
-
-  '#address-cells':
-    enum: [1, 2]
+  reg:
+    maxItems: 1
     description: |
-      Definition depends on ARM architecture version and configuration:
+      Usage and definition depend on ARM architecture version and
+      configuration:
 
       On uniprocessor ARM architectures previous to v7
-        value must be 1, to enable a simple enumeration
-        scheme for processors that do not have a HW CPU
-        identification register.
-      On 32-bit ARM 11 MPcore, ARM v7 or later systems
-        value must be 1, that corresponds to CPUID/MPIDR
-        registers sizes.
-      On ARM v8 64-bit systems value should be set to 2,
-        that corresponds to the MPIDR_EL1 register size.
-        If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
-        in the system, #address-cells can be set to 1, since
-        MPIDR_EL1[63:32] bits are not used for CPUs
-        identification.
-
-  '#size-cells':
-    const: 0
-
-patternProperties:
-  '^cpu@[0-9a-f]+$':
-    type: object
-    properties:
-      device_type:
-        const: cpu
-
-      reg:
-        maxItems: 1
-        description: |
-          Usage and definition depend on ARM architecture version and
-          configuration:
-
-          On uniprocessor ARM architectures previous to v7
-          this property is required and must be set to 0.
-
-          On ARM 11 MPcore based systems this property is
-            required and matches the CPUID[11:0] register bits.
-
-            Bits [11:0] in the reg cell must be set to
-            bits [11:0] in CPU ID register.
-
-            All other bits in the reg cell must be set to 0.
-
-          On 32-bit ARM v7 or later systems this property is
-            required and matches the CPU MPIDR[23:0] register
-            bits.
-
-            Bits [23:0] in the reg cell must be set to
-            bits [23:0] in MPIDR.
-
-            All other bits in the reg cell must be set to 0.
-
-          On ARM v8 64-bit systems this property is required
-            and matches the MPIDR_EL1 register affinity bits.
+      this property is required and must be set to 0.
+
+      On ARM 11 MPcore based systems this property is
+        required and matches the CPUID[11:0] register bits.
+
+        Bits [11:0] in the reg cell must be set to
+        bits [11:0] in CPU ID register.
+
+        All other bits in the reg cell must be set to 0.
+
+      On 32-bit ARM v7 or later systems this property is
+        required and matches the CPU MPIDR[23:0] register
+        bits.
+
+        Bits [23:0] in the reg cell must be set to
+        bits [23:0] in MPIDR.
+
+        All other bits in the reg cell must be set to 0.
+
+      On ARM v8 64-bit systems this property is required
+        and matches the MPIDR_EL1 register affinity bits.
+
+        * If cpus node's #address-cells property is set to 2
+
+          The first reg cell bits [7:0] must be set to
+          bits [39:32] of MPIDR_EL1.
+
+          The second reg cell bits [23:0] must be set to
+          bits [23:0] of MPIDR_EL1.
+
+        * If cpus node's #address-cells property is set to 1
+
+          The reg cell bits [23:0] must be set to bits [23:0]
+          of MPIDR_EL1.
+
+      All other bits in the reg cells must be set to 0.
+
+  compatible:
+    enum:
+      - arm,arm710t
+      - arm,arm720t
+      - arm,arm740t
+      - arm,arm7ej-s
+      - arm,arm7tdmi
+      - arm,arm7tdmi-s
+      - arm,arm9es
+      - arm,arm9ej-s
+      - arm,arm920t
+      - arm,arm922t
+      - arm,arm925
+      - arm,arm926e-s
+      - arm,arm926ej-s
+      - arm,arm940t
+      - arm,arm946e-s
+      - arm,arm966e-s
+      - arm,arm968e-s
+      - arm,arm9tdmi
+      - arm,arm1020e
+      - arm,arm1020t
+      - arm,arm1022e
+      - arm,arm1026ej-s
+      - arm,arm1136j-s
+      - arm,arm1136jf-s
+      - arm,arm1156t2-s
+      - arm,arm1156t2f-s
+      - arm,arm1176jzf
+      - arm,arm1176jz-s
+      - arm,arm1176jzf-s
+      - arm,arm11mpcore
+      - arm,armv8 # Only for s/w models
+      - arm,cortex-a5
+      - arm,cortex-a7
+      - arm,cortex-a8
+      - arm,cortex-a9
+      - arm,cortex-a12
+      - arm,cortex-a15
+      - arm,cortex-a17
+      - arm,cortex-a53
+      - arm,cortex-a57
+      - arm,cortex-a72
+      - arm,cortex-a73
+      - arm,cortex-m0
+      - arm,cortex-m0+
+      - arm,cortex-m1
+      - arm,cortex-m3
+      - arm,cortex-m4
+      - arm,cortex-r4
+      - arm,cortex-r5
+      - arm,cortex-r7
+      - brcm,brahma-b15
+      - brcm,brahma-b53
+      - brcm,vulcan
+      - cavium,thunder
+      - cavium,thunder2
+      - faraday,fa526
+      - intel,sa110
+      - intel,sa1100
+      - marvell,feroceon
+      - marvell,mohawk
+      - marvell,pj4a
+      - marvell,pj4b
+      - marvell,sheeva-v5
+      - marvell,sheeva-v7
+      - nvidia,tegra132-denver
+      - nvidia,tegra186-denver
+      - nvidia,tegra194-carmel
+      - qcom,krait
+      - qcom,kryo
+      - qcom,kryo385
+      - qcom,scorpion
+
+  enable-method:
+    allOf:
+      - $ref: '/schemas/types.yaml#/definitions/string'
+      - oneOf:
+          # On ARM v8 64-bit this property is required
+          - enum:
+              - psci
+              - spin-table
+          # On ARM 32-bit systems this property is optional
+          - enum:
+              - actions,s500-smp
+              - allwinner,sun6i-a31
+              - allwinner,sun8i-a23
+              - allwinner,sun9i-a80-smp
+              - allwinner,sun8i-a83t-smp
+              - amlogic,meson8-smp
+              - amlogic,meson8b-smp
+              - arm,realview-smp
+              - brcm,bcm11351-cpu-method
+              - brcm,bcm23550
+              - brcm,bcm2836-smp
+              - brcm,bcm63138
+              - brcm,bcm-nsp-smp
+              - brcm,brahma-b15
+              - marvell,armada-375-smp
+              - marvell,armada-380-smp
+              - marvell,armada-390-smp
+              - marvell,armada-xp-smp
+              - marvell,98dx3236-smp
+              - mediatek,mt6589-smp
+              - mediatek,mt81xx-tz-smp
+              - qcom,gcc-msm8660
+              - qcom,kpss-acc-v1
+              - qcom,kpss-acc-v2
+              - renesas,apmu
+              - renesas,r9a06g032-smp
+              - rockchip,rk3036-smp
+              - rockchip,rk3066-smp
+              - socionext,milbeaut-m10v-smp
+              - ste,dbx500-smp
+
+  cpu-release-addr:
+    $ref: '/schemas/types.yaml#/definitions/uint64'
+
+    description:
+      Required for systems that have an "enable-method"
+        property value of "spin-table".
+      On ARM v8 64-bit systems must be a two cell
+        property identifying a 64-bit zero-initialised
+        memory location.
+
+  cpu-idle-states:
+    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    description: |
+      List of phandles to idle state nodes supported
+      by this cpu (see ./idle-states.txt).
+
+  capacity-dmips-mhz:
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description:
+      u32 value representing CPU capacity (see ./cpu-capacity.txt) in
+      DMIPS/MHz, relative to highest capacity-dmips-mhz
+      in the system.
+
+  dynamic-power-coefficient:
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description:
+      A u32 value that represents the running time dynamic
+      power coefficient in units of uW/MHz/V^2. The
+      coefficient can either be calculated from power
+      measurements or derived by analysis.
+
+      The dynamic power consumption of the CPU  is
+      proportional to the square of the Voltage (V) and
+      the clock frequency (f). The coefficient is used to
+      calculate the dynamic power as below -
+
+      Pdyn = dynamic-power-coefficient * V^2 * f
+
+      where voltage is in V, frequency is in MHz.
+
+  qcom,saw:
+    $ref: '/schemas/types.yaml#/definitions/phandle'
+    description: |
+      Specifies the SAW* node associated with this CPU.
 
-            * If cpus node's #address-cells property is set to 2
+      Required for systems that have an "enable-method" property
+      value of "qcom,kpss-acc-v1" or "qcom,kpss-acc-v2"
 
-              The first reg cell bits [7:0] must be set to
-              bits [39:32] of MPIDR_EL1.
+      * arm/msm/qcom,saw2.txt
 
-              The second reg cell bits [23:0] must be set to
-              bits [23:0] of MPIDR_EL1.
+  qcom,acc:
+    $ref: '/schemas/types.yaml#/definitions/phandle'
+    description: |
+      Specifies the ACC* node associated with this CPU.
 
-            * If cpus node's #address-cells property is set to 1
+      Required for systems that have an "enable-method" property
+      value of "qcom,kpss-acc-v1" or "qcom,kpss-acc-v2"
 
-              The reg cell bits [23:0] must be set to bits [23:0]
-              of MPIDR_EL1.
+      * arm/msm/qcom,kpss-acc.txt
 
-          All other bits in the reg cells must be set to 0.
+  rockchip,pmu:
+    $ref: '/schemas/types.yaml#/definitions/phandle'
+    description: |
+      Specifies the syscon node controlling the cpu core power domains.
 
-      compatible:
-        items:
-          - enum:
-              - arm,arm710t
-              - arm,arm720t
-              - arm,arm740t
-              - arm,arm7ej-s
-              - arm,arm7tdmi
-              - arm,arm7tdmi-s
-              - arm,arm9es
-              - arm,arm9ej-s
-              - arm,arm920t
-              - arm,arm922t
-              - arm,arm925
-              - arm,arm926e-s
-              - arm,arm926ej-s
-              - arm,arm940t
-              - arm,arm946e-s
-              - arm,arm966e-s
-              - arm,arm968e-s
-              - arm,arm9tdmi
-              - arm,arm1020e
-              - arm,arm1020t
-              - arm,arm1022e
-              - arm,arm1026ej-s
-              - arm,arm1136j-s
-              - arm,arm1136jf-s
-              - arm,arm1156t2-s
-              - arm,arm1156t2f-s
-              - arm,arm1176jzf
-              - arm,arm1176jz-s
-              - arm,arm1176jzf-s
-              - arm,arm11mpcore
-              - arm,armv8 # Only for s/w models
-              - arm,cortex-a5
-              - arm,cortex-a7
-              - arm,cortex-a8
-              - arm,cortex-a9
-              - arm,cortex-a12
-              - arm,cortex-a15
-              - arm,cortex-a17
-              - arm,cortex-a53
-              - arm,cortex-a57
-              - arm,cortex-a72
-              - arm,cortex-a73
-              - arm,cortex-m0
-              - arm,cortex-m0+
-              - arm,cortex-m1
-              - arm,cortex-m3
-              - arm,cortex-m4
-              - arm,cortex-r4
-              - arm,cortex-r5
-              - arm,cortex-r7
-              - brcm,brahma-b15
-              - brcm,brahma-b53
-              - brcm,vulcan
-              - cavium,thunder
-              - cavium,thunder2
-              - faraday,fa526
-              - intel,sa110
-              - intel,sa1100
-              - marvell,feroceon
-              - marvell,mohawk
-              - marvell,pj4a
-              - marvell,pj4b
-              - marvell,sheeva-v5
-              - marvell,sheeva-v7
-              - nvidia,tegra132-denver
-              - nvidia,tegra186-denver
-              - nvidia,tegra194-carmel
-              - qcom,krait
-              - qcom,kryo
-              - qcom,kryo385
-              - qcom,scorpion
-
-      enable-method:
-        allOf:
-          - $ref: '/schemas/types.yaml#/definitions/string'
-          - oneOf:
-            # On ARM v8 64-bit this property is required
-            - enum:
-                - psci
-                - spin-table
-            # On ARM 32-bit systems this property is optional
-            - enum:
-                - actions,s500-smp
-                - allwinner,sun6i-a31
-                - allwinner,sun8i-a23
-                - allwinner,sun9i-a80-smp
-                - allwinner,sun8i-a83t-smp
-                - amlogic,meson8-smp
-                - amlogic,meson8b-smp
-                - arm,realview-smp
-                - brcm,bcm11351-cpu-method
-                - brcm,bcm23550
-                - brcm,bcm2836-smp
-                - brcm,bcm63138
-                - brcm,bcm-nsp-smp
-                - brcm,brahma-b15
-                - marvell,armada-375-smp
-                - marvell,armada-380-smp
-                - marvell,armada-390-smp
-                - marvell,armada-xp-smp
-                - marvell,98dx3236-smp
-                - mediatek,mt6589-smp
-                - mediatek,mt81xx-tz-smp
-                - qcom,gcc-msm8660
-                - qcom,kpss-acc-v1
-                - qcom,kpss-acc-v2
-                - renesas,apmu
-                - renesas,r9a06g032-smp
-                - rockchip,rk3036-smp
-                - rockchip,rk3066-smp
-                - socionext,milbeaut-m10v-smp
-                - ste,dbx500-smp
-
-      cpu-release-addr:
-        $ref: '/schemas/types.yaml#/definitions/uint64'
-
-        description:
-          Required for systems that have an "enable-method"
-            property value of "spin-table".
-          On ARM v8 64-bit systems must be a two cell
-            property identifying a 64-bit zero-initialised
-            memory location.
-
-      cpu-idle-states:
-        $ref: '/schemas/types.yaml#/definitions/phandle-array'
-        description: |
-          List of phandles to idle state nodes supported
-          by this cpu (see ./idle-states.txt).
-
-      capacity-dmips-mhz:
-        $ref: '/schemas/types.yaml#/definitions/uint32'
-        description:
-          u32 value representing CPU capacity (see ./cpu-capacity.txt) in
-          DMIPS/MHz, relative to highest capacity-dmips-mhz
-          in the system.
-
-      dynamic-power-coefficient:
-        $ref: '/schemas/types.yaml#/definitions/uint32'
-        description:
-          A u32 value that represents the running time dynamic
-          power coefficient in units of uW/MHz/V^2. The
-          coefficient can either be calculated from power
-          measurements or derived by analysis.
-
-          The dynamic power consumption of the CPU  is
-          proportional to the square of the Voltage (V) and
-          the clock frequency (f). The coefficient is used to
-          calculate the dynamic power as below -
-
-          Pdyn = dynamic-power-coefficient * V^2 * f
-
-          where voltage is in V, frequency is in MHz.
-
-      qcom,saw:
-        $ref: '/schemas/types.yaml#/definitions/phandle'
-        description: |
-          Specifies the SAW* node associated with this CPU.
-
-          Required for systems that have an "enable-method" property
-          value of "qcom,kpss-acc-v1" or "qcom,kpss-acc-v2"
-
-          * arm/msm/qcom,saw2.txt
-
-      qcom,acc:
-        $ref: '/schemas/types.yaml#/definitions/phandle'
-        description: |
-          Specifies the ACC* node associated with this CPU.
-
-          Required for systems that have an "enable-method" property
-          value of "qcom,kpss-acc-v1" or "qcom,kpss-acc-v2"
-
-          * arm/msm/qcom,kpss-acc.txt
-
-      rockchip,pmu:
-        $ref: '/schemas/types.yaml#/definitions/phandle'
-        description: |
-          Specifies the syscon node controlling the cpu core power domains.
-
-          Optional for systems that have an "enable-method"
-          property value of "rockchip,rk3066-smp"
-          While optional, it is the preferred way to get access to
-          the cpu-core power-domains.
-
-    required:
-      - device_type
-      - reg
-      - compatible
-
-    dependencies:
-      cpu-release-addr: [enable-method]
-      rockchip,pmu: [enable-method]
+      Optional for systems that have an "enable-method"
+      property value of "rockchip,rk3066-smp"
+      While optional, it is the preferred way to get access to
+      the cpu-core power-domains.
 
 required:
-  - '#address-cells'
-  - '#size-cells'
+  - device_type
+  - reg
+  - compatible
+
+dependencies:
+  rockchip,pmu: [enable-method]
 
 examples:
   - |
-- 
cgit 


From e6ed6467117e7d7cabafb95c392b4e0b91bedd2e Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 28 Jun 2019 13:29:54 -0600
Subject: dt-bindings: mtd: sunxi-nand: Drop 'maxItems' from child 'reg'
 property

Mixing 'maxItems' and scalar properties doesn't make much sense, so drop
'maxItems' as a single item is implied.

Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Cc: Maxime Ripard <maxime.ripard@bootlin.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: linux-mtd@lists.infradead.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml | 1 -
 1 file changed, 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
index fbd4da3684fc..e5a411518be1 100644
--- a/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
@@ -57,7 +57,6 @@ patternProperties:
   "^nand@[a-f0-9]+$":
     properties:
       reg:
-        maxItems: 1
         minimum: 0
         maximum: 7
 
-- 
cgit 


From dbd329f1e44ed48de8a1c19da5eb1218f5e1b4a5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 28 Jun 2019 19:27:29 -0700
Subject: xfs: add struct xfs_mount pointer to struct xfs_buf

We need to derive the mount pointer from a buffer in a lot of place.
Add a direct pointer to short cut the pointer chasing.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 Documentation/filesystems/xfs-self-describing-metadata.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/xfs-self-describing-metadata.txt b/Documentation/filesystems/xfs-self-describing-metadata.txt
index 68604e67a495..8db0121d0980 100644
--- a/Documentation/filesystems/xfs-self-describing-metadata.txt
+++ b/Documentation/filesystems/xfs-self-describing-metadata.txt
@@ -222,7 +222,7 @@ static void
 xfs_foo_read_verify(
 	struct xfs_buf	*bp)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
 
         if ((xfs_sb_version_hascrc(&mp->m_sb) &&
              !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
@@ -245,7 +245,7 @@ static bool
 xfs_foo_verify(
 	struct xfs_buf		*bp)
 {
-        struct xfs_mount	*mp = bp->b_target->bt_mount;
+        struct xfs_mount	*mp = bp->b_mount;
         struct xfs_ondisk_hdr	*hdr = bp->b_addr;
 
         if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
@@ -272,7 +272,7 @@ static bool
 xfs_foo_verify(
 	struct xfs_buf		*bp)
 {
-        struct xfs_mount	*mp = bp->b_target->bt_mount;
+        struct xfs_mount	*mp = bp->b_mount;
         struct xfs_ondisk_hdr	*hdr = bp->b_addr;
 
         if (hdr->magic == cpu_to_be32(XFS_FOO_CRC_MAGIC)) {
@@ -297,7 +297,7 @@ static void
 xfs_foo_write_verify(
 	struct xfs_buf	*bp)
 {
-	struct xfs_mount	*mp = bp->b_target->bt_mount;
+	struct xfs_mount	*mp = bp->b_mount;
 	struct xfs_buf_log_item	*bip = bp->b_fspriv;
 
 	if (!xfs_foo_verify(bp)) {
-- 
cgit 


From 102a3375e6671e6ca85dbde245a5071f8c09b760 Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Tue, 4 Jun 2019 15:34:13 +0200
Subject: dt-bindings: i2c-stm32: document optional dmas

Add missing documentation for "dmas" and "dma-names" properties that can be
used on i2c-stm32.

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Reviewed-by: Pierre-Yves MORDRET <pierre-yves.mordret@st.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/devicetree/bindings/i2c/i2c-stm32.txt | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/i2c/i2c-stm32.txt b/Documentation/devicetree/bindings/i2c/i2c-stm32.txt
index f334738f7a35..ce3df2fff6c8 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-stm32.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-stm32.txt
@@ -21,6 +21,8 @@ Optional properties:
   100000 and 400000.
   For STM32F7, STM32H7 and STM32MP1 SoCs, Standard-mode, Fast-mode and Fast-mode
   Plus are supported, possible values are 100000, 400000 and 1000000.
+- dmas: List of phandles to rx and tx DMA channels. Refer to stm32-dma.txt.
+- dma-names: List of dma names. Valid names are: "rx" and "tx".
 - i2c-scl-rising-time-ns: I2C SCL Rising time for the board (default: 25)
   For STM32F7, STM32H7 and STM32MP1 only.
 - i2c-scl-falling-time-ns: I2C SCL Falling time for the board (default: 10)
-- 
cgit 


From 152c7776b9442f2f094da7d81e5a8f345dedb397 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 28 Jun 2019 13:07:42 -0700
Subject: block, documentation: Fix wbt_lat_usec documentation

Fix the spelling of the wbt_lat_usec sysfs attribute.

Fixes: 87760e5eef35 ("block: hook up writeback throttling") # v4.10.
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/queue-sysfs.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index 83b457e24bba..3eaf86806621 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -185,8 +185,8 @@ This is the number of bytes the device can write in a single write-same
 command.  A value of '0' means write-same is not supported by this
 device.
 
-wb_lat_usec (RW)
-----------------
+wbt_lat_usec (RW)
+-----------------
 If the device is registered for writeback throttling, then this file shows
 the target minimum read latency. If this latency is exceeded in a given
 window of time (see wb_window_usec), then the writeback throttling will start
-- 
cgit 


From 6728ac3396265184abe93f18b32aca329981e5ce Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 28 Jun 2019 13:07:43 -0700
Subject: block, documentation: Sort queue sysfs attribute names alphabetically

Commit f9824952ee1c ("block: update sysfs documentation") # v5.0 broke the
alphabetical order of the sysfs attribute names. List queue sysfs attribute
names alphabetically.

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/queue-sysfs.txt | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index 3eaf86806621..f6da2efe2105 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -14,6 +14,15 @@ add_random (RW)
 This file allows to turn off the disk entropy contribution. Default
 value of this file is '1'(on).
 
+chunk_sectors (RO)
+------------------
+This has different meaning depending on the type of the block device.
+For a RAID device (dm-raid), chunk_sectors indicates the size in 512B sectors
+of the RAID volume stripe segment. For a zoned block device, either host-aware
+or host-managed, chunk_sectors indicates the size in 512B sectors of the zones
+of the device, with the eventual exception of the last zone of the device which
+may be smaller.
+
 dax (RO)
 --------
 This file indicates whether the device supports Direct Access (DAX),
@@ -132,6 +141,12 @@ per-block-cgroup request pool.  IOW, if there are N block cgroups,
 each request queue may have up to N request pools, each independently
 regulated by nr_requests.
 
+nr_zones (RO)
+-------------
+For zoned block devices (zoned attribute indicating "host-managed" or
+"host-aware"), this indicates the total number of zones of the device.
+This is always 0 for regular block devices.
+
 optimal_io_size (RO)
 --------------------
 This is the optimal IO size reported by the device.
@@ -213,19 +228,4 @@ devices are described in the ZBC (Zoned Block Commands) and ZAC
 do not support zone commands, they will be treated as regular block devices
 and zoned will report "none".
 
-nr_zones (RO)
--------------
-For zoned block devices (zoned attribute indicating "host-managed" or
-"host-aware"), this indicates the total number of zones of the device.
-This is always 0 for regular block devices.
-
-chunk_sectors (RO)
-------------------
-This has different meaning depending on the type of the block device.
-For a RAID device (dm-raid), chunk_sectors indicates the size in 512B sectors
-of the RAID volume stripe segment. For a zoned block device, either host-aware
-or host-managed, chunk_sectors indicates the size in 512B sectors of the zones
-of the device, with the eventual exception of the last zone of the device which
-may be smaller.
-
 Jens Axboe <jens.axboe@oracle.com>, February 2009
-- 
cgit 


From 0c766e78bda6d4edf40779fc0cd48d0867a04d84 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 28 Jun 2019 13:07:44 -0700
Subject: block, documentation: Explain the word 'segments'

Several block layer users who are not kernel developers do not know that
the word 'segment' refers to an element in a DMA scatter/gather list. Make
the block layer documentation easier to understand by stating explicitly
what the word 'segment' stands for.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/queue-sysfs.txt | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index f6da2efe2105..1515dcf3dec4 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -98,8 +98,9 @@ This is the maximum number of kilobytes supported in a single data transfer.
 
 max_integrity_segments (RO)
 ---------------------------
-When read, this file shows the max limit of integrity segments as
-set by block layer which a hardware controller can handle.
+Maximum number of elements in a DMA scatter/gather list with integrity
+data that will be submitted by the block layer core to the associated
+block driver.
 
 max_sectors_kb (RW)
 -------------------
@@ -109,11 +110,12 @@ size allowed by the hardware.
 
 max_segments (RO)
 -----------------
-Maximum number of segments of the device.
+Maximum number of elements in a DMA scatter/gather list that is submitted
+to the associated block driver.
 
 max_segment_size (RO)
 ---------------------
-Maximum segment size of the device.
+Maximum size in bytes of a single element in a DMA scatter/gather list.
 
 minimum_io_size (RO)
 --------------------
-- 
cgit 


From fbbe7c86b483878da4a2ec7b899e0814195942af Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 28 Jun 2019 13:07:45 -0700
Subject: block, documentation: Document discard_zeroes_data, fua,
 max_discard_segments and write_zeroes_max_bytes

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/queue-sysfs.txt | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index 1515dcf3dec4..b40b5b7cebd9 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -52,6 +52,16 @@ large discards are issued, setting this value lower will make Linux issue
 smaller discards and potentially help reduce latencies induced by large
 discard operations.
 
+discard_zeroes_data (RO)
+------------------------
+Obsolete. Always zero.
+
+fua (RO)
+--------
+Whether or not the block driver supports the FUA flag for write requests.
+FUA stands for Force Unit Access. If the FUA flag is set that means that
+write requests must bypass the volatile cache of the storage device.
+
 hw_sector_size (RO)
 -------------------
 This is the hardware sector size of the device, in bytes.
@@ -92,6 +102,10 @@ logical_block_size (RO)
 -----------------------
 This is the logical block size of the device, in bytes.
 
+max_discard_segments (RO)
+-------------------------
+The maximum number of DMA scatter/gather entries in a discard request.
+
 max_hw_sectors_kb (RO)
 ----------------------
 This is the maximum number of kilobytes supported in a single data transfer.
@@ -218,6 +232,12 @@ blk-throttle makes decision based on the samplings. Lower time means cgroups
 have more smooth throughput, but higher CPU overhead. This exists only when
 CONFIG_BLK_DEV_THROTTLING_LOW is enabled.
 
+write_zeroes_max_bytes (RO)
+---------------------------
+For block drivers that support REQ_OP_WRITE_ZEROES, the maximum number of
+bytes that can be zeroed at once. The value 0 means that REQ_OP_WRITE_ZEROES
+is not supported.
+
 zoned (RO)
 ----------
 This indicates if the device is a zoned block device and the zone model of the
-- 
cgit 


From 7f40c260df86b9d145a4ddb13879787c266c5232 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Thu, 30 May 2019 21:13:19 -0500
Subject: dt-bindings: hwlock: Update OMAP binding for TI K3 SoCs

The TI K3 AM65x and J721E family of SoCs have a HwSpinlock IP that
is similar to the existing HwSpinlock IP present in OMAP architecture
based SoCs with minor differences. Update the existing OMAP HwSpinlock
binding for this IP on TI K3 AM65x and J721E SoCs. The same compatible
from AM65x SoCs is reused for J721E SoCs.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 .../devicetree/bindings/hwlock/omap-hwspinlock.txt | 25 +++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/hwlock/omap-hwspinlock.txt b/Documentation/devicetree/bindings/hwlock/omap-hwspinlock.txt
index 2c9804f4f4ac..8d365f89694c 100644
--- a/Documentation/devicetree/bindings/hwlock/omap-hwspinlock.txt
+++ b/Documentation/devicetree/bindings/hwlock/omap-hwspinlock.txt
@@ -1,12 +1,16 @@
-OMAP4+ HwSpinlock Driver
-========================
+TI HwSpinlock for OMAP and K3 based SoCs
+=========================================
 
 Required properties:
-- compatible:		Should be "ti,omap4-hwspinlock" for
-			    OMAP44xx, OMAP54xx, AM33xx, AM43xx, DRA7xx SoCs
+- compatible:		Should be one of the following,
+			  "ti,omap4-hwspinlock" for
+				OMAP44xx, OMAP54xx, AM33xx, AM43xx, DRA7xx SoCs
+			  "ti,am654-hwspinlock" for
+				K3 AM65x and J721E SoCs
 - reg:			Contains the hwspinlock module register address space
 			(base address and length)
 - ti,hwmods:		Name of the hwmod associated with the hwspinlock device
+			(for OMAP architecture based SoCs only)
 - #hwlock-cells:	Should be 1. The OMAP hwspinlock users will use a
 			0-indexed relative hwlock number as the argument
 			specifier value for requesting a specific hwspinlock
@@ -17,10 +21,21 @@ Please look at the generic hwlock binding for usage information for consumers,
 
 Example:
 
-/* OMAP4 */
+1. OMAP4 SoCs
 hwspinlock: spinlock@4a0f6000 {
 	compatible = "ti,omap4-hwspinlock";
 	reg = <0x4a0f6000 0x1000>;
 	ti,hwmods = "spinlock";
 	#hwlock-cells = <1>;
 };
+
+2. AM65x SoCs and J721E SoCs
+&cbass_main {
+	cbass_main_navss: interconnect0 {
+		hwspinlock: spinlock@30e00000 {
+			compatible = "ti,am654-hwspinlock";
+			reg = <0x00 0x30e00000 0x00 0x1000>;
+			#hwlock-cells = <1>;
+		};
+	};
+};
-- 
cgit 


From b1a17513a2d60f9e933016bed04d0eeb8651a915 Mon Sep 17 00:00:00 2001
From: Clement Leger <cleger@kalray.eu>
Date: Mon, 17 Jun 2019 14:57:30 +0200
Subject: remoteproc: add vendor resources handling

In order to allow rproc backend to handle vendor resources such as in
OpenAMP, add a handle_rsc hook. This hook allow the rproc backends to
handle vendor resources as they like. The hook will be called only for
vendor resources and should return RSC_HANDLED on successful resource
handling, RSC_IGNORED if resource was ignored, or a negative value on
error.

Signed-off-by: Clement Leger <cleger@kalray.eu>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 Documentation/remoteproc.txt | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/remoteproc.txt b/Documentation/remoteproc.txt
index 77fb03acdbb4..03c3d2e568b0 100644
--- a/Documentation/remoteproc.txt
+++ b/Documentation/remoteproc.txt
@@ -314,6 +314,8 @@ Here are the various resource types that are currently supported::
    * @RSC_VDEV:       declare support for a virtio device, and serve as its
    *		    virtio header.
    * @RSC_LAST:       just keep this one at the end
+   * @RSC_VENDOR_START:	start of the vendor specific resource types range
+   * @RSC_VENDOR_END:	end of the vendor specific resource types range
    *
    * Please note that these values are used as indices to the rproc_handle_rsc
    * lookup table, so please keep them sane. Moreover, @RSC_LAST is used to
@@ -321,11 +323,13 @@ Here are the various resource types that are currently supported::
    * please update it as needed.
    */
   enum fw_resource_type {
-	RSC_CARVEOUT	= 0,
-	RSC_DEVMEM	= 1,
-	RSC_TRACE	= 2,
-	RSC_VDEV	= 3,
-	RSC_LAST	= 4,
+	RSC_CARVEOUT		= 0,
+	RSC_DEVMEM		= 1,
+	RSC_TRACE		= 2,
+	RSC_VDEV		= 3,
+	RSC_LAST		= 4,
+	RSC_VENDOR_START	= 128,
+	RSC_VENDOR_END		= 512,
   };
 
 For more details regarding a specific resource type, please see its
-- 
cgit 


From f83c0510de8eba0bd6f71b0d4897ab3c2e7a24c0 Mon Sep 17 00:00:00 2001
From: Fabien Dessenne <fabien.dessenne@st.com>
Date: Tue, 14 May 2019 10:26:56 +0200
Subject: dt-bindings: stm32: add bindings for ML-AHB interconnect

Document the ML-AHB interconnect for stm32 SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 .../devicetree/bindings/arm/stm32/mlahb.txt        | 37 ++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/arm/stm32/mlahb.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/stm32/mlahb.txt b/Documentation/devicetree/bindings/arm/stm32/mlahb.txt
new file mode 100644
index 000000000000..25307aa1eb9b
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/stm32/mlahb.txt
@@ -0,0 +1,37 @@
+ML-AHB interconnect bindings
+
+These bindings describe the STM32 SoCs ML-AHB interconnect bus which connects
+a Cortex-M subsystem with dedicated memories.
+The MCU SRAM and RETRAM memory parts can be accessed through different addresses
+(see "RAM aliases" in [1]) using different buses (see [2]) : balancing the
+Cortex-M firmware accesses among those ports allows to tune the system
+performance.
+
+[1]: https://www.st.com/resource/en/reference_manual/dm00327659.pdf
+[2]: https://wiki.st.com/stm32mpu/wiki/STM32MP15_RAM_mapping
+
+Required properties:
+- compatible: should be "simple-bus"
+- dma-ranges: describes memory addresses translation between the local CPU and
+	   the remote Cortex-M processor. Each memory region, is declared with
+	   3 parameters:
+		 - param 1: device base address (Cortex-M processor address)
+		 - param 2: physical base address (local CPU address)
+		 - param 3: size of the memory region.
+
+The Cortex-M remote processor accessed via the mlahb interconnect is described
+by a child node.
+
+Example:
+mlahb {
+	compatible = "simple-bus";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	dma-ranges = <0x00000000 0x38000000 0x10000>,
+		     <0x10000000 0x10000000 0x60000>,
+		     <0x30000000 0x30000000 0x60000>;
+
+	m4_rproc: m4@10000000 {
+		...
+	};
+};
-- 
cgit 


From 6f576b439017aa6faa8b82e908212691c1817e6e Mon Sep 17 00:00:00 2001
From: Fabien Dessenne <fabien.dessenne@st.com>
Date: Tue, 14 May 2019 10:26:57 +0200
Subject: dt-bindings: remoteproc: add bindings for stm32 remote processor
 driver

Add the device tree bindings document for the stm32 remoteproc devices.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 .../devicetree/bindings/remoteproc/stm32-rproc.txt | 63 ++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/remoteproc/stm32-rproc.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/remoteproc/stm32-rproc.txt b/Documentation/devicetree/bindings/remoteproc/stm32-rproc.txt
new file mode 100644
index 000000000000..5fa915a4b736
--- /dev/null
+++ b/Documentation/devicetree/bindings/remoteproc/stm32-rproc.txt
@@ -0,0 +1,63 @@
+STMicroelectronics STM32 Remoteproc
+-----------------------------------
+This document defines the binding for the remoteproc component that loads and
+boots firmwares on the ST32MP family chipset.
+
+Required properties:
+- compatible:	Must be "st,stm32mp1-m4"
+- reg:		Address ranges of the RETRAM and MCU SRAM memories used by the
+		remote processor.
+- resets:	Reference to a reset controller asserting the remote processor.
+- st,syscfg-holdboot: Reference to the system configuration which holds the
+		remote processor reset hold boot
+	1st cell: phandle of syscon block
+	2nd cell: register offset containing the hold boot setting
+	3rd cell: register bitmask for the hold boot field
+- st,syscfg-tz: Reference to the system configuration which holds the RCC trust
+		zone mode
+	1st cell: phandle to syscon block
+	2nd cell: register offset containing the RCC trust zone mode setting
+	3rd cell: register bitmask for the RCC trust zone mode bit
+
+Optional properties:
+- interrupts:	Should contain the watchdog interrupt
+- mboxes:	This property is required only if the rpmsg/virtio functionality
+		is used. List of phandle and mailbox channel specifiers:
+		- a channel (a) used to communicate through virtqueues with the
+		  remote proc.
+		  Bi-directional channel:
+		      - from local to remote = send message
+		      - from remote to local = send message ack
+		- a channel (b) working the opposite direction of channel (a)
+		- a channel (c) used by the local proc to notify the remote proc
+		  that it is about to be shut down.
+		  Unidirectional channel:
+		      - from local to remote, where ACK from the remote means
+		        that it is ready for shutdown
+- mbox-names:	This property is required if the mboxes property is used.
+		- must be "vq0" for channel (a)
+		- must be "vq1" for channel (b)
+		- must be "shutdown" for channel (c)
+- memory-region: List of phandles to the reserved memory regions associated with
+		the remoteproc device. This is variable and describes the
+		memories shared with the remote processor (eg: remoteproc
+		firmware and carveouts, rpmsg vrings, ...).
+		(see ../reserved-memory/reserved-memory.txt)
+- st,syscfg-pdds: Reference to the system configuration which holds the remote
+		processor deep sleep setting
+	1st cell: phandle to syscon block
+	2nd cell: register offset containing the deep sleep setting
+	3rd cell: register bitmask for the deep sleep bit
+- st,auto-boot:	If defined, when remoteproc is probed, it loads the default
+		firmware and starts the remote processor.
+
+Example:
+	m4_rproc: m4@10000000 {
+		compatible = "st,stm32mp1-m4";
+		reg = <0x10000000 0x40000>,
+		      <0x30000000 0x40000>,
+		      <0x38000000 0x10000>;
+		resets = <&rcc MCU_R>;
+		st,syscfg-holdboot = <&rcc 0x10C 0x1>;
+		st,syscfg-tz = <&rcc 0x000 0x1>;
+	};
-- 
cgit 


From bce6f5221374ba451a337d0a3773e6eb99dad3e8 Mon Sep 17 00:00:00 2001
From: Fabien Dessenne <fabien.dessenne@st.com>
Date: Thu, 7 Mar 2019 16:58:22 +0100
Subject: hwspinlock: document the hwspinlock 'raw' API

Document the hwspin_lock_timeout_raw(), hwspin_trylock_raw() and
hwspin_unlock_raw() API.

Signed-off-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 Documentation/hwspinlock.txt | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/hwspinlock.txt b/Documentation/hwspinlock.txt
index ed640a278185..c3403f9ae27a 100644
--- a/Documentation/hwspinlock.txt
+++ b/Documentation/hwspinlock.txt
@@ -134,6 +134,23 @@ notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
 
 The function will never sleep.
 
+::
+
+  int hwspin_lock_timeout_raw(struct hwspinlock *hwlock, unsigned int timeout);
+
+Lock a previously-assigned hwspinlock with a timeout limit (specified in
+msecs). If the hwspinlock is already taken, the function will busy loop
+waiting for it to be released, but give up when the timeout elapses.
+
+Caution: User must protect the routine of getting hardware lock with mutex
+or spinlock to avoid dead-lock, that will let user can do some time-consuming
+or sleepable operations under the hardware lock.
+
+Returns 0 when successful and an appropriate error code otherwise (most
+notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
+
+The function will never sleep.
+
 ::
 
   int hwspin_trylock(struct hwspinlock *hwlock);
@@ -184,6 +201,21 @@ Returns 0 on success and an appropriate error code otherwise (most
 notably -EBUSY if the hwspinlock was already taken).
 The function will never sleep.
 
+::
+
+  int hwspin_trylock_raw(struct hwspinlock *hwlock);
+
+Attempt to lock a previously-assigned hwspinlock, but immediately fail if
+it is already taken.
+
+Caution: User must protect the routine of getting hardware lock with mutex
+or spinlock to avoid dead-lock, that will let user can do some time-consuming
+or sleepable operations under the hardware lock.
+
+Returns 0 on success and an appropriate error code otherwise (most
+notably -EBUSY if the hwspinlock was already taken).
+The function will never sleep.
+
 ::
 
   void hwspin_unlock(struct hwspinlock *hwlock);
@@ -220,6 +252,16 @@ Upon a successful return from this function, preemption is reenabled,
 and the state of the local interrupts is restored to the state saved at
 the given flags. This function will never sleep.
 
+::
+
+  void hwspin_unlock_raw(struct hwspinlock *hwlock);
+
+Unlock a previously-locked hwspinlock.
+
+The caller should **never** unlock an hwspinlock which is already unlocked.
+Doing so is considered a bug (there is no protection against this).
+This function will never sleep.
+
 ::
 
   int hwspin_lock_get_id(struct hwspinlock *hwlock);
-- 
cgit 


From 360aa640a59f269b784848c0b2d6d462952750d9 Mon Sep 17 00:00:00 2001
From: Fabien Dessenne <fabien.dessenne@st.com>
Date: Thu, 7 Mar 2019 16:58:23 +0100
Subject: hwspinlock: add the 'in_atomic' API

Add the 'in_atomic' mode which can be called from an atomic context.
This mode relies on the existing 'raw' mode (no lock, no preemption/irq
disabling) with the difference that the timeout is not based on jiffies
(jiffies won't increase when irq are disabled) but handled with
busy-waiting udelay() calls.

Signed-off-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 Documentation/hwspinlock.txt | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/hwspinlock.txt b/Documentation/hwspinlock.txt
index c3403f9ae27a..6f03713b7003 100644
--- a/Documentation/hwspinlock.txt
+++ b/Documentation/hwspinlock.txt
@@ -151,6 +151,22 @@ notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
 
 The function will never sleep.
 
+::
+
+  int hwspin_lock_timeout_in_atomic(struct hwspinlock *hwlock, unsigned int to);
+
+Lock a previously-assigned hwspinlock with a timeout limit (specified in
+msecs). If the hwspinlock is already taken, the function will busy loop
+waiting for it to be released, but give up when the timeout elapses.
+
+This function shall be called only from an atomic context and the timeout
+value shall not exceed a few msecs.
+
+Returns 0 when successful and an appropriate error code otherwise (most
+notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
+
+The function will never sleep.
+
 ::
 
   int hwspin_trylock(struct hwspinlock *hwlock);
@@ -216,6 +232,19 @@ Returns 0 on success and an appropriate error code otherwise (most
 notably -EBUSY if the hwspinlock was already taken).
 The function will never sleep.
 
+::
+
+  int hwspin_trylock_in_atomic(struct hwspinlock *hwlock);
+
+Attempt to lock a previously-assigned hwspinlock, but immediately fail if
+it is already taken.
+
+This function shall be called only from an atomic context.
+
+Returns 0 on success and an appropriate error code otherwise (most
+notably -EBUSY if the hwspinlock was already taken).
+The function will never sleep.
+
 ::
 
   void hwspin_unlock(struct hwspinlock *hwlock);
@@ -262,6 +291,16 @@ The caller should **never** unlock an hwspinlock which is already unlocked.
 Doing so is considered a bug (there is no protection against this).
 This function will never sleep.
 
+::
+
+  void hwspin_unlock_in_atomic(struct hwspinlock *hwlock);
+
+Unlock a previously-locked hwspinlock.
+
+The caller should **never** unlock an hwspinlock which is already unlocked.
+Doing so is considered a bug (there is no protection against this).
+This function will never sleep.
+
 ::
 
   int hwspin_lock_get_id(struct hwspinlock *hwlock);
-- 
cgit 


From 7282a93f4df586cac84a81c37f38cccec2e1d8bb Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Fri, 28 Jun 2019 20:38:41 +0200
Subject: Disable Sphinx SmartyPants in HTML output

The handling of dashes in particular results in confusing
documentation in a number of instances, since "--" becomes an
en-dash. This disables SmartyPants wholesale, losing smart quotes
along with smart dashes.

With Sphinx 1.6 we could fine-tune the conversion, using the new
smartquotes and smartquotes_action settings.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/conf.py b/Documentation/conf.py
index a502baecbb85..3b2397bcb565 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -201,7 +201,7 @@ html_context = {
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+html_use_smartypants = False
 
 # Custom sidebar templates, maps document names to template names.
 #html_sidebars = {}
-- 
cgit 


From 86b4da8c0e7fcb6c217c604efcd9438ad55dd055 Mon Sep 17 00:00:00 2001
From: Prakhar Srivastava <prsriva02@gmail.com>
Date: Sun, 23 Jun 2019 23:23:30 -0700
Subject: IMA: Define a new template field buf

A buffer(kexec boot command line arguments) measured into IMA
measuremnt list cannot be appraised, without already being
aware of the buffer contents. Since hashes are non-reversible,
raw buffer is needed for validation or regenerating hash for
appraisal/attestation.

Add support to store/read the buffer contents in HEX.
The kexec cmdline hash is stored in the "d-ng" field of the
template data.  It can be verified using
sudo cat /sys/kernel/security/integrity/ima/ascii_runtime_measurements |
  grep  kexec-cmdline | cut -d' ' -f 6 | xxd -r -p | sha256sum

- Add two new fields to ima_event_data to hold the buf and
buf_len
- Add a new template field 'buf' to be used to store/read
the buffer data.
- Updated process_buffer_meaurement to add the buffer to
ima_event_data. process_buffer_measurement added in
"Define a new IMA hook to measure the boot command line
 arguments"
- Add a new template policy name ima-buf to represent
'd-ng|n-ng|buf'

Signed-off-by: Prakhar Srivastava <prsriva02@gmail.com>
Reviewed-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: James Morris <jamorris@linux.microsoft.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 Documentation/security/IMA-templates.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/security/IMA-templates.rst b/Documentation/security/IMA-templates.rst
index 2cd0e273cc9a..3d1cca287aa4 100644
--- a/Documentation/security/IMA-templates.rst
+++ b/Documentation/security/IMA-templates.rst
@@ -69,15 +69,16 @@ descriptors by adding their identifier to the format string
    algorithm (field format: [<hash algo>:]digest, where the digest
    prefix is shown only if the hash algorithm is not SHA1 or MD5);
  - 'n-ng': the name of the event, without size limitations;
- - 'sig': the file signature.
+ - 'sig': the file signature;
+ - 'buf': the buffer data that was used to generate the hash without size limitations;
 
 
 Below, there is the list of defined template descriptors:
 
  - "ima": its format is ``d|n``;
  - "ima-ng" (default): its format is ``d-ng|n-ng``;
- - "ima-sig": its format is ``d-ng|n-ng|sig``.
-
+ - "ima-sig": its format is ``d-ng|n-ng|sig``;
+ - "ima-buf": its format is ``d-ng|n-ng|buf``;
 
 
 Use
-- 
cgit 


From ac499fba98c3c65078fd84fa0a62cd6f6d5837ed Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sat, 29 Jun 2019 07:36:46 -0300
Subject: docs: ipmb: place it at driver-api and convert to ReST

No new doc should be added at the main Documentation/ directory.

Instead, new docs should be added as ReST files, within the
Kernel documentation body.

Fixes: 51bd6f291583 ("Add support for IPMB driver")
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Message-Id: <d23c36ca65fe6ad56af1723bf70f7a7f4154c410.1561804596.git.mchehab+samsung@kernel.org>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 Documentation/IPMB.txt             | 103 ------------------------------------
 Documentation/driver-api/index.rst |   1 +
 Documentation/driver-api/ipmb.rst  | 105 +++++++++++++++++++++++++++++++++++++
 3 files changed, 106 insertions(+), 103 deletions(-)
 delete mode 100644 Documentation/IPMB.txt
 create mode 100644 Documentation/driver-api/ipmb.rst

(limited to 'Documentation')

diff --git a/Documentation/IPMB.txt b/Documentation/IPMB.txt
deleted file mode 100644
index a6ed8b68bd0f..000000000000
--- a/Documentation/IPMB.txt
+++ /dev/null
@@ -1,103 +0,0 @@
-==============================
-IPMB Driver for a Satellite MC
-==============================
-
-The Intelligent Platform Management Bus or IPMB, is an
-I2C bus that provides a standardized interconnection between
-different boards within a chassis. This interconnection is
-between the baseboard management (BMC) and chassis electronics.
-IPMB is also associated with the messaging protocol through the
-IPMB bus.
-
-The devices using the IPMB are usually management
-controllers that perform management functions such as servicing
-the front panel interface, monitoring the baseboard,
-hot-swapping disk drivers in the system chassis, etc...
-
-When an IPMB is implemented in the system, the BMC serves as
-a controller to give system software access to the IPMB. The BMC
-sends IPMI requests to a device (usually a Satellite Management
-Controller or Satellite MC) via IPMB and the device
-sends a response back to the BMC.
-
-For more information on IPMB and the format of an IPMB message,
-refer to the IPMB and IPMI specifications.
-
-IPMB driver for Satellite MC
-----------------------------
-
-ipmb-dev-int - This is the driver needed on a Satellite MC to
-receive IPMB messages from a BMC and send a response back.
-This driver works with the I2C driver and a userspace
-program such as OpenIPMI:
-
-1) It is an I2C slave backend driver. So, it defines a callback
-function to set the Satellite MC as an I2C slave.
-This callback function handles the received IPMI requests.
-
-2) It defines the read and write functions to enable a user
-space program (such as OpenIPMI) to communicate with the kernel.
-
-
-Load the IPMB driver
---------------------
-
-The driver needs to be loaded at boot time or manually first.
-First, make sure you have the following in your config file:
-CONFIG_IPMB_DEVICE_INTERFACE=y
-
-1) If you want the driver to be loaded at boot time:
-
-a) Add this entry to your ACPI table, under the appropriate SMBus:
-
-Device (SMB0) // Example SMBus host controller
-{
-  Name (_HID, "<Vendor-Specific HID>") // Vendor-Specific HID
-  Name (_UID, 0) // Unique ID of particular host controller
-  :
-  :
-    Device (IPMB)
-    {
-      Name (_HID, "IPMB0001") // IPMB device interface
-      Name (_UID, 0) // Unique device identifier
-    }
-}
-
-b) Example for device tree:
-
-&i2c2 {
-         status = "okay";
-
-         ipmb@10 {
-                 compatible = "ipmb-dev";
-                 reg = <0x10>;
-         };
-};
-
-2) Manually from Linux:
-modprobe ipmb-dev-int
-
-
-Instantiate the device
-----------------------
-
-After loading the driver, you can instantiate the device as
-described in 'Documentation/i2c/instantiating-devices'.
-If you have multiple BMCs, each connected to your Satellite MC via
-a different I2C bus, you can instantiate a device for each of
-those BMCs.
-The name of the instantiated device contains the I2C bus number
-associated with it as follows:
-
-BMC1 ------ IPMB/I2C bus 1 ---------|   /dev/ipmb-1
-				Satellite MC
-BMC1 ------ IPMB/I2C bus 2 ---------|   /dev/ipmb-2
-
-For instance, you can instantiate the ipmb-dev-int device from
-user space at the 7 bit address 0x10 on bus 2:
-
-  # echo ipmb-dev 0x1010 > /sys/bus/i2c/devices/i2c-2/new_device
-
-This will create the device file /dev/ipmb-2, which can be accessed
-by the user space program. The device needs to be instantiated
-before running the user space program.
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index d26308af6036..b189bd3013ff 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -34,6 +34,7 @@ available subsections can be seen below.
    pci/index
    spi
    i2c
+   ipmb
    i3c/index
    hsi
    edac
diff --git a/Documentation/driver-api/ipmb.rst b/Documentation/driver-api/ipmb.rst
new file mode 100644
index 000000000000..7e2265144157
--- /dev/null
+++ b/Documentation/driver-api/ipmb.rst
@@ -0,0 +1,105 @@
+==============================
+IPMB Driver for a Satellite MC
+==============================
+
+The Intelligent Platform Management Bus or IPMB, is an
+I2C bus that provides a standardized interconnection between
+different boards within a chassis. This interconnection is
+between the baseboard management (BMC) and chassis electronics.
+IPMB is also associated with the messaging protocol through the
+IPMB bus.
+
+The devices using the IPMB are usually management
+controllers that perform management functions such as servicing
+the front panel interface, monitoring the baseboard,
+hot-swapping disk drivers in the system chassis, etc...
+
+When an IPMB is implemented in the system, the BMC serves as
+a controller to give system software access to the IPMB. The BMC
+sends IPMI requests to a device (usually a Satellite Management
+Controller or Satellite MC) via IPMB and the device
+sends a response back to the BMC.
+
+For more information on IPMB and the format of an IPMB message,
+refer to the IPMB and IPMI specifications.
+
+IPMB driver for Satellite MC
+----------------------------
+
+ipmb-dev-int - This is the driver needed on a Satellite MC to
+receive IPMB messages from a BMC and send a response back.
+This driver works with the I2C driver and a userspace
+program such as OpenIPMI:
+
+1) It is an I2C slave backend driver. So, it defines a callback
+   function to set the Satellite MC as an I2C slave.
+   This callback function handles the received IPMI requests.
+
+2) It defines the read and write functions to enable a user
+   space program (such as OpenIPMI) to communicate with the kernel.
+
+
+Load the IPMB driver
+--------------------
+
+The driver needs to be loaded at boot time or manually first.
+First, make sure you have the following in your config file:
+CONFIG_IPMB_DEVICE_INTERFACE=y
+
+1) If you want the driver to be loaded at boot time:
+
+a) Add this entry to your ACPI table, under the appropriate SMBus::
+
+     Device (SMB0) // Example SMBus host controller
+     {
+     Name (_HID, "<Vendor-Specific HID>") // Vendor-Specific HID
+     Name (_UID, 0) // Unique ID of particular host controller
+     :
+     :
+       Device (IPMB)
+       {
+         Name (_HID, "IPMB0001") // IPMB device interface
+         Name (_UID, 0) // Unique device identifier
+       }
+     }
+
+b) Example for device tree::
+
+     &i2c2 {
+            status = "okay";
+
+            ipmb@10 {
+                    compatible = "ipmb-dev";
+                    reg = <0x10>;
+            };
+     };
+
+2) Manually from Linux::
+
+     modprobe ipmb-dev-int
+
+
+Instantiate the device
+----------------------
+
+After loading the driver, you can instantiate the device as
+described in 'Documentation/i2c/instantiating-devices'.
+If you have multiple BMCs, each connected to your Satellite MC via
+a different I2C bus, you can instantiate a device for each of
+those BMCs.
+
+The name of the instantiated device contains the I2C bus number
+associated with it as follows::
+
+  BMC1 ------ IPMB/I2C bus 1 ---------|   /dev/ipmb-1
+				Satellite MC
+  BMC1 ------ IPMB/I2C bus 2 ---------|   /dev/ipmb-2
+
+For instance, you can instantiate the ipmb-dev-int device from
+user space at the 7 bit address 0x10 on bus 2::
+
+  # echo ipmb-dev 0x1010 > /sys/bus/i2c/devices/i2c-2/new_device
+
+This will create the device file /dev/ipmb-2, which can be accessed
+by the user space program. The device needs to be instantiated
+before running the user space program.
-- 
cgit 


From ba45cff610a4223ce3ad98a700a820e3d60e3f7c Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Mon, 13 May 2019 15:39:10 +1000
Subject: powerpc: Document xive=off option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 243e25112d06 ("powerpc/xive: Native exploitation of the XIVE
interrupt controller") added an option to turn off Linux native XIVE
usage via the xive=off kernel command line option.

This documents this option.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Acked-by: Stewart Smith <stewart@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/admin-guide/kernel-parameters.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a4c3538857e9..7b15abf7db21 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5272,6 +5272,15 @@
 			Format:
 			<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
 
+	xive=		[PPC]
+			By default on POWER9 and above, the kernel will
+			natively use the XIVE interrupt controller. This option
+			allows the fallback firmware mode to be used:
+
+			off       Fallback to firmware control of XIVE interrupt
+				  controller on both pseries and powernv
+				  platforms. Only useful on POWER9 and above.
+
 	xhci-hcd.quirks		[USB,KNL]
 			A hex value specifying bitmask with supplemental xhci
 			host controller quirks. Meaning of each bit can be
-- 
cgit 


From 8c3166e17cf10161d2871dfb1d017287c7b79ff1 Mon Sep 17 00:00:00 2001
From: Evan Green <evgreen@chromium.org>
Date: Thu, 27 Jun 2019 13:44:45 -0700
Subject: mfd / platform: cros_ec_debugfs: Expose resume result via debugfs

For ECs that support it, the EC returns the number of slp_s0
transitions and whether or not there was a timeout in the resume
response. Expose the last resume result to usermode via debugfs so
that usermode can detect and report S0ix timeouts.

Signed-off-by: Evan Green <evgreen@chromium.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
---
 Documentation/ABI/testing/debugfs-cros-ec | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/debugfs-cros-ec b/Documentation/ABI/testing/debugfs-cros-ec
index 573a82d23c89..1fe0add99a2a 100644
--- a/Documentation/ABI/testing/debugfs-cros-ec
+++ b/Documentation/ABI/testing/debugfs-cros-ec
@@ -32,3 +32,25 @@ Description:
 		is used for synchronizing the AP host time with the EC
 		log. An error is returned if the command is not supported
 		by the EC or there is a communication problem.
+
+What:		/sys/kernel/debug/<cros-ec-device>/last_resume_result
+Date:		June 2019
+KernelVersion:	5.3
+Description:
+		Some ECs have a feature where they will track transitions to
+		the (Intel) processor's SLP_S0 line, in order to detect cases
+		where a system failed to go into S0ix. When the system resumes,
+		an EC with this feature will return a summary of SLP_S0
+		transitions that occurred. The last_resume_result file returns
+		the most recent response from the AP's resume message to the EC.
+
+		The bottom 31 bits contain a count of the number of SLP_S0
+		transitions that occurred since the suspend message was
+		received. Bit 31 is set if the EC attempted to wake the
+		system due to a timeout when watching for SLP_S0 transitions.
+		Callers can use this to detect a wake from the EC due to
+		S0ix timeouts. The result will be zero if no suspend
+		transitions have been attempted, or the EC does not support
+		this feature.
+
+		Output will be in the format: "0x%08x\n".
-- 
cgit 


From 06deb86a748a1667d906af996775603f2bc34d00 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Mon, 1 Jul 2019 13:59:45 +0000
Subject: habanalabs: Add debugfs node for engines status

Command submissions sent to the device are composed of command buffers
which are targeted to different device engines, like DMA and compute
entities. When a command submission gets stuck, knowing in which engine
the stuck is, is crucial for debugging.
This patch adds a debugfs node that exports this information, by
displaying the engines' various registers that assemble their idle/busy
status.
The information retrieval is based on the is_device_idle ASIC function.
The printout in this function, of the first detected busy engine, is
removed because it becomes redundant in the presence of the more
elaborated info of the new debugfs node.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 Documentation/ABI/testing/debugfs-driver-habanalabs | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index 18191c2becab..f0ac14b70ecb 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -51,6 +51,13 @@ Description:    Enables the root user to set the device to specific state.
                 Valid values are "disable", "enable", "suspend", "resume".
                 User can read this property to see the valid values
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/engines
+Date:           Jul 2019
+KernelVersion:  5.3
+Contact:        oded.gabbay@gmail.com
+Description:    Displays the status registers values of the device engines and
+                their derived idle status
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_addr
 Date:           Jan 2019
 KernelVersion:  5.1
-- 
cgit 


From b108ad53bb65c19ae8e2d9c7fc0d052b3d9c153b Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Fri, 24 May 2019 08:06:25 -0300
Subject: dt-bindings: reset: imx7: Fix the spelling of 'indices'

The correct spelling is 'indices', so fix it accordingly.

Signed-off-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 Documentation/devicetree/bindings/reset/fsl,imx7-src.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt b/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
index 2ecf33815d18..13e095182db4 100644
--- a/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
+++ b/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
@@ -45,6 +45,6 @@ Example:
         };
 
 
-For list of all valid reset indicies see
+For list of all valid reset indices see
 <dt-bindings/reset/imx7-reset.h> for i.MX7 and
 <dt-bindings/reset/imx8mq-reset.h> for i.MX8MQ
-- 
cgit 


From dca895b65d634f9e6506d5385ed58a8b9abd4900 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 1 Jul 2019 16:45:28 +0800
Subject: Documentation/networking: fix default_ttl typo in mpls-sysctl

default_ttl should be integer instead of bool

Reported-by: Ying Xu <yinxu@redhat.com>
Fixes: a59166e47086 ("mpls: allow TTL propagation from IP packets to be configured")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/mpls-sysctl.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
index 2f24a1912a48..025cc9b96992 100644
--- a/Documentation/networking/mpls-sysctl.txt
+++ b/Documentation/networking/mpls-sysctl.txt
@@ -30,7 +30,7 @@ ip_ttl_propagate - BOOL
 	0 - disabled / RFC 3443 [Short] Pipe Model
 	1 - enabled / RFC 3443 Uniform Model (default)
 
-default_ttl - BOOL
+default_ttl - INTEGER
 	Default TTL value to use for MPLS packets where it cannot be
 	propagated from an IP header, either because one isn't present
 	or ip_ttl_propagate has been disabled.
-- 
cgit 


From a346abe051bd2bd0d5d0140b2da9ec95639acad7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 1 Jul 2019 06:39:36 -0700
Subject: ipv6: icmp: allow flowlabel reflection in echo replies

Extend flowlabel_reflect bitmask to allow conditional
reflection of incoming flowlabels in echo replies.

Note this has precedence against auto flowlabels.

Add flowlabel_reflect enum to replace hard coded
values.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index e0d8a96e2c67..f0e6d1f53485 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1452,7 +1452,7 @@ flowlabel_reflect - INTEGER
 	environments. See RFC 7690 and:
 	https://tools.ietf.org/html/draft-wang-6man-flow-label-reflection-01
 
-	This is a mask of two bits.
+	This is a bitmask.
 	1: enabled for established flows
 
 	Note that this prevents automatic flowlabel changes, as done
@@ -1463,6 +1463,8 @@ flowlabel_reflect - INTEGER
 	If set, a RST packet sent in response to a SYN packet on a closed
 	port will reflect the incoming flow label.
 
+	4: enabled for ICMPv6 echo reply messages.
+
 	Default: 0
 
 fib_multipath_hash_policy - INTEGER
-- 
cgit 


From 893ce44df56580fb878ca5af9c4a5fd87567da50 Mon Sep 17 00:00:00 2001
From: Catherine Sullivan <csully@google.com>
Date: Mon, 1 Jul 2019 15:57:52 -0700
Subject: gve: Add basic driver framework for Compute Engine Virtual NIC

Add a driver framework for the Compute Engine Virtual NIC that will be
available in the future.

At this point the only functionality is loading the driver.

Signed-off-by: Catherine Sullivan <csully@google.com>
Signed-off-by: Sagi Shahar <sagis@google.com>
Signed-off-by: Jon Olson <jonolson@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Luigi Rizzo <lrizzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/google/gve.rst       | 82 ++++++++++++++++++++++
 Documentation/networking/device_drivers/index.rst  |  1 +
 2 files changed, 83 insertions(+)
 create mode 100644 Documentation/networking/device_drivers/google/gve.rst

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/google/gve.rst b/Documentation/networking/device_drivers/google/gve.rst
new file mode 100644
index 000000000000..7397c82f4c8f
--- /dev/null
+++ b/Documentation/networking/device_drivers/google/gve.rst
@@ -0,0 +1,82 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+==============================================================
+Linux kernel driver for Compute Engine Virtual Ethernet (gve):
+==============================================================
+
+Supported Hardware
+===================
+The GVE driver binds to a single PCI device id used by the virtual
+Ethernet device found in some Compute Engine VMs.
+
++--------------+----------+---------+
+|Field         | Value    | Comments|
++==============+==========+=========+
+|Vendor ID     | `0x1AE0` | Google  |
++--------------+----------+---------+
+|Device ID     | `0x0042` |         |
++--------------+----------+---------+
+|Sub-vendor ID | `0x1AE0` | Google  |
++--------------+----------+---------+
+|Sub-device ID | `0x0058` |         |
++--------------+----------+---------+
+|Revision ID   | `0x0`    |         |
++--------------+----------+---------+
+|Device Class  | `0x200`  | Ethernet|
++--------------+----------+---------+
+
+PCI Bars
+========
+The gVNIC PCI device exposes three 32-bit memory BARS:
+- Bar0 - Device configuration and status registers.
+- Bar1 - MSI-X vector table
+- Bar2 - IRQ, RX and TX doorbells
+
+Device Interactions
+===================
+The driver interacts with the device in the following ways:
+ - Registers
+    - A block of MMIO registers
+    - See gve_register.h for more detail
+ - Admin Queue
+    - See description below
+ - Interrupts
+    - See supported interrupts below
+
+Registers
+---------
+All registers are MMIO and big endian.
+
+The registers are used for initializing and configuring the device as well as
+querying device status in response to management interrupts.
+
+Admin Queue (AQ)
+----------------
+The Admin Queue is a PAGE_SIZE memory block, treated as an array of AQ
+commands, used by the driver to issue commands to the device and set up
+resources.The driver and the device maintain a count of how many commands
+have been submitted and executed. To issue AQ commands, the driver must do
+the following (with proper locking):
+
+1)  Copy new commands into next available slots in the AQ array
+2)  Increment its counter by he number of new commands
+3)  Write the counter into the GVE_ADMIN_QUEUE_DOORBELL register
+4)  Poll the ADMIN_QUEUE_EVENT_COUNTER register until it equals
+    the value written to the doorbell, or until a timeout.
+
+The device will update the status field in each AQ command reported as
+executed through the ADMIN_QUEUE_EVENT_COUNTER register.
+
+Interrupts
+----------
+The following interrupts are supported by the driver:
+
+Management Interrupt
+~~~~~~~~~~~~~~~~~~~~
+The management interrupt is used by the device to tell the driver to
+look at the GVE_DEVICE_STATUS register.
+
+Notification Block Interrupts
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The notification block interrupts are used to tell the driver to poll
+the queues associated with that interrupt.
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 24598d5f8ffa..2b7fefe72351 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -21,6 +21,7 @@ Contents:
    intel/i40e
    intel/iavf
    intel/ice
+   google/gve
    mellanox/mlx5
 
 .. only::  subproject
-- 
cgit 


From f5cedc84a30d2d3d0e0a7f3eb53fbd66d9bf5517 Mon Sep 17 00:00:00 2001
From: Catherine Sullivan <csully@google.com>
Date: Mon, 1 Jul 2019 15:57:53 -0700
Subject: gve: Add transmit and receive support

Add support for passing traffic.

Signed-off-by: Catherine Sullivan <csully@google.com>
Signed-off-by: Sagi Shahar <sagis@google.com>
Signed-off-by: Jon Olson <jonolson@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Luigi Rizzo <lrizzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/google/gve.rst       | 30 ++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/google/gve.rst b/Documentation/networking/device_drivers/google/gve.rst
index 7397c82f4c8f..df8974fb3270 100644
--- a/Documentation/networking/device_drivers/google/gve.rst
+++ b/Documentation/networking/device_drivers/google/gve.rst
@@ -42,6 +42,8 @@ The driver interacts with the device in the following ways:
     - See description below
  - Interrupts
     - See supported interrupts below
+ - Transmit and Receive Queues
+    - See description below
 
 Registers
 ---------
@@ -80,3 +82,31 @@ Notification Block Interrupts
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The notification block interrupts are used to tell the driver to poll
 the queues associated with that interrupt.
+
+The handler for these irqs schedule the napi for that block to run
+and poll the queues.
+
+Traffic Queues
+--------------
+gVNIC's queues are composed of a descriptor ring and a buffer and are
+assigned to a notification block.
+
+The descriptor rings are power-of-two-sized ring buffers consisting of
+fixed-size descriptors. They advance their head pointer using a __be32
+doorbell located in Bar2. The tail pointers are advanced by consuming
+descriptors in-order and updating a __be32 counter. Both the doorbell
+and the counter overflow to zero.
+
+Each queue's buffers must be registered in advance with the device as a
+queue page list, and packet data can only be put in those pages.
+
+Transmit
+~~~~~~~~
+gve maps the buffers for transmit rings into a FIFO and copies the packets
+into the FIFO before sending them to the NIC.
+
+Receive
+~~~~~~~
+The buffers for receive rings are put into a data ring that is the same
+length as the descriptor ring and the head and tail pointers advance over
+the rings together.
-- 
cgit 


From 9e5f7d26a4c087f34e540a5a6f853f8d93d51448 Mon Sep 17 00:00:00 2001
From: Catherine Sullivan <csully@google.com>
Date: Mon, 1 Jul 2019 15:57:54 -0700
Subject: gve: Add workqueue and reset support

Add support for the workqueue to handle management interrupts and
support for resets.

Signed-off-by: Catherine Sullivan <csully@google.com>
Signed-off-by: Sagi Shahar <sagis@google.com>
Signed-off-by: Jon Olson <jonolson@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Luigi Rizzo <lrizzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/google/gve.rst | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/google/gve.rst b/Documentation/networking/device_drivers/google/gve.rst
index df8974fb3270..793693cef6e3 100644
--- a/Documentation/networking/device_drivers/google/gve.rst
+++ b/Documentation/networking/device_drivers/google/gve.rst
@@ -40,6 +40,8 @@ The driver interacts with the device in the following ways:
     - See gve_register.h for more detail
  - Admin Queue
     - See description below
+ - Reset
+    - At any time the device can be reset
  - Interrupts
     - See supported interrupts below
  - Transmit and Receive Queues
@@ -69,6 +71,12 @@ the following (with proper locking):
 The device will update the status field in each AQ command reported as
 executed through the ADMIN_QUEUE_EVENT_COUNTER register.
 
+Device Resets
+-------------
+A device reset is triggered by writing 0x0 to the AQ PFN register.
+This causes the device to release all resources allocated by the
+driver, including the AQ itself.
+
 Interrupts
 ----------
 The following interrupts are supported by the driver:
@@ -78,6 +86,9 @@ Management Interrupt
 The management interrupt is used by the device to tell the driver to
 look at the GVE_DEVICE_STATUS register.
 
+The handler for the management irq simply queues the service task in
+the workqueue to check the register and acks the irq.
+
 Notification Block Interrupts
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The notification block interrupts are used to tell the driver to poll
-- 
cgit 


From cca4786174656673f89684347e0028c88df57031 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 28 Jun 2019 09:19:59 -0300
Subject: docs: hid: convert to ReST

Rename the HID documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

While here, fix the sysfs example from hid-sensor.txt, that
has a lot of "?" instead of the proper UTF-8 characters that
are produced by the tree command.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 Documentation/hid/hid-alps.rst      | 180 +++++++++++++
 Documentation/hid/hid-alps.txt      | 139 -----------
 Documentation/hid/hid-sensor.rst    | 242 ++++++++++++++++++
 Documentation/hid/hid-sensor.txt    | 224 -----------------
 Documentation/hid/hid-transport.rst | 359 ++++++++++++++++++++++++++
 Documentation/hid/hid-transport.txt | 317 -----------------------
 Documentation/hid/hiddev.rst        | 251 +++++++++++++++++++
 Documentation/hid/hiddev.txt        | 205 ---------------
 Documentation/hid/hidraw.rst        | 138 ++++++++++
 Documentation/hid/hidraw.txt        | 119 ---------
 Documentation/hid/index.rst         |  18 ++
 Documentation/hid/intel-ish-hid.rst | 485 ++++++++++++++++++++++++++++++++++++
 Documentation/hid/intel-ish-hid.txt | 454 ---------------------------------
 Documentation/hid/uhid.rst          | 193 ++++++++++++++
 Documentation/hid/uhid.txt          | 187 --------------
 Documentation/input/input.rst       |   2 +-
 16 files changed, 1867 insertions(+), 1646 deletions(-)
 create mode 100644 Documentation/hid/hid-alps.rst
 delete mode 100644 Documentation/hid/hid-alps.txt
 create mode 100644 Documentation/hid/hid-sensor.rst
 delete mode 100644 Documentation/hid/hid-sensor.txt
 create mode 100644 Documentation/hid/hid-transport.rst
 delete mode 100644 Documentation/hid/hid-transport.txt
 create mode 100644 Documentation/hid/hiddev.rst
 delete mode 100644 Documentation/hid/hiddev.txt
 create mode 100644 Documentation/hid/hidraw.rst
 delete mode 100644 Documentation/hid/hidraw.txt
 create mode 100644 Documentation/hid/index.rst
 create mode 100644 Documentation/hid/intel-ish-hid.rst
 delete mode 100644 Documentation/hid/intel-ish-hid.txt
 create mode 100644 Documentation/hid/uhid.rst
 delete mode 100644 Documentation/hid/uhid.txt

(limited to 'Documentation')

diff --git a/Documentation/hid/hid-alps.rst b/Documentation/hid/hid-alps.rst
new file mode 100644
index 000000000000..e2f4c4c11e3f
--- /dev/null
+++ b/Documentation/hid/hid-alps.rst
@@ -0,0 +1,180 @@
+==========================
+ALPS HID Touchpad Protocol
+==========================
+
+Introduction
+------------
+Currently ALPS HID driver supports U1 Touchpad device.
+
+U1 device basic information.
+
+==========	======
+Vender ID	0x044E
+Product ID	0x120B
+Version ID	0x0121
+==========	======
+
+
+HID Descriptor
+--------------
+
+=======	====================	=====	=======================================
+Byte	Field			Value	Notes
+=======	====================	=====	=======================================
+0	wHIDDescLength		001E	Length of HID Descriptor : 30 bytes
+2	bcdVersion		0100	Compliant with Version 1.00
+4	wReportDescLength	00B2	Report Descriptor is 178 Bytes (0x00B2)
+6	wReportDescRegister	0002	Identifier to read Report Descriptor
+8	wInputRegister		0003	Identifier to read Input Report
+10	wMaxInputLength		0053	Input Report is 80 Bytes + 2
+12	wOutputRegister		0000	Identifier to read Output Report
+14	wMaxOutputLength	0000	No Output Reports
+16	wCommandRegister	0005	Identifier for Command Register
+18	wDataRegister		0006	Identifier for Data Register
+20	wVendorID		044E	Vendor ID 0x044E
+22	wProductID		120B	Product ID 0x120B
+24	wVersionID		0121	Version 01.21
+26	RESERVED		0000	RESERVED
+=======	====================	=====	=======================================
+
+
+Report ID
+---------
+
+==========	=================  =========================================
+ReportID-1	(Input Reports)	   (HIDUsage-Mouse) for TP&SP
+ReportID-2	(Input Reports)	   (HIDUsage-keyboard) for TP
+ReportID-3	(Input Reports)	   (Vendor Usage: Max 10 finger data) for TP
+ReportID-4	(Input Reports)	   (Vendor Usage: ON bit data) for GP
+ReportID-5	(Feature Reports)  Feature Reports
+ReportID-6	(Input Reports)	   (Vendor Usage: StickPointer data) for SP
+ReportID-7	(Feature Reports)  Flash update (Bootloader)
+==========	=================  =========================================
+
+
+Data pattern
+------------
+
+=====	==========	=====	=================
+Case1	ReportID_1	TP/SP	Relative/Relative
+Case2	ReportID_3	TP	Absolute
+	ReportID_6	SP	Absolute
+=====	==========	=====	=================
+
+
+Command Read/Write
+------------------
+To read/write to RAM, need to send a commands to the device.
+
+The command format is as below.
+
+DataByte(SET_REPORT)
+
+=====	======================
+Byte1	Command Byte
+Byte2	Address - Byte 0 (LSB)
+Byte3	Address - Byte 1
+Byte4	Address - Byte 2
+Byte5	Address - Byte 3 (MSB)
+Byte6	Value Byte
+Byte7	Checksum
+=====	======================
+
+Command Byte is read=0xD1/write=0xD2 .
+
+Address is read/write RAM address.
+
+Value Byte is writing data when you send the write commands.
+
+When you read RAM, there is no meaning.
+
+DataByte(GET_REPORT)
+
+=====	======================
+Byte1	Response Byte
+Byte2	Address - Byte 0 (LSB)
+Byte3	Address - Byte 1
+Byte4	Address - Byte 2
+Byte5	Address - Byte 3 (MSB)
+Byte6	Value Byte
+Byte7	Checksum
+=====	======================
+
+Read value is stored in Value Byte.
+
+
+Packet Format
+Touchpad data byte
+------------------
+
+
+======= ======= ======= ======= ======= ======= ======= ======= =====
+-	b7	b6	b5	b4	b3	b2	b1	b0
+======= ======= ======= ======= ======= ======= ======= ======= =====
+1	0	0	SW6	SW5	SW4	SW3	SW2	SW1
+2	0	0	0	Fcv	Fn3	Fn2	Fn1	Fn0
+3	Xa0_7	Xa0_6	Xa0_5	Xa0_4	Xa0_3	Xa0_2	Xa0_1	Xa0_0
+4	Xa0_15	Xa0_14	Xa0_13	Xa0_12	Xa0_11	Xa0_10	Xa0_9	Xa0_8
+5	Ya0_7	Ya0_6	Ya0_5	Ya0_4	Ya0_3	Ya0_2	Ya0_1	Ya0_0
+6	Ya0_15	Ya0_14	Ya0_13	Ya0_12	Ya0_11	Ya0_10	Ya0_9	Ya0_8
+7	LFB0	Zs0_6	Zs0_5	Zs0_4	Zs0_3	Zs0_2	Zs0_1	Zs0_0
+
+8	Xa1_7	Xa1_6	Xa1_5	Xa1_4	Xa1_3	Xa1_2	Xa1_1	Xa1_0
+9	Xa1_15	Xa1_14	Xa1_13	Xa1_12	Xa1_11	Xa1_10	Xa1_9	Xa1_8
+10	Ya1_7	Ya1_6	Ya1_5	Ya1_4	Ya1_3	Ya1_2	Ya1_1	Ya1_0
+11	Ya1_15	Ya1_14	Ya1_13	Ya1_12	Ya1_11	Ya1_10	Ya1_9	Ya1_8
+12	LFB1	Zs1_6	Zs1_5	Zs1_4	Zs1_3	Zs1_2	Zs1_1	Zs1_0
+
+13	Xa2_7	Xa2_6	Xa2_5	Xa2_4	Xa2_3	Xa2_2	Xa2_1	Xa2_0
+14	Xa2_15	Xa2_14	Xa2_13	Xa2_12	Xa2_11	Xa2_10	Xa2_9	Xa2_8
+15	Ya2_7	Ya2_6	Ya2_5	Ya2_4	Ya2_3	Ya2_2	Ya2_1	Ya2_0
+16	Ya2_15	Ya2_14	Ya2_13	Ya2_12	Ya2_11	Ya2_10	Ya2_9	Ya2_8
+17	LFB2	Zs2_6	Zs2_5	Zs2_4	Zs2_3	Zs2_2	Zs2_1	Zs2_0
+
+18	Xa3_7	Xa3_6	Xa3_5	Xa3_4	Xa3_3	Xa3_2	Xa3_1	Xa3_0
+19	Xa3_15	Xa3_14	Xa3_13	Xa3_12	Xa3_11	Xa3_10	Xa3_9	Xa3_8
+20	Ya3_7	Ya3_6	Ya3_5	Ya3_4	Ya3_3	Ya3_2	Ya3_1	Ya3_0
+21	Ya3_15	Ya3_14	Ya3_13	Ya3_12	Ya3_11	Ya3_10	Ya3_9	Ya3_8
+22	LFB3	Zs3_6	Zs3_5	Zs3_4	Zs3_3	Zs3_2	Zs3_1	Zs3_0
+
+23	Xa4_7	Xa4_6	Xa4_5	Xa4_4	Xa4_3	Xa4_2	Xa4_1	Xa4_0
+24	Xa4_15	Xa4_14	Xa4_13	Xa4_12	Xa4_11	Xa4_10	Xa4_9	Xa4_8
+25	Ya4_7	Ya4_6	Ya4_5	Ya4_4	Ya4_3	Ya4_2	Ya4_1	Ya4_0
+26	Ya4_15	Ya4_14	Ya4_13	Ya4_12	Ya4_11	Ya4_10	Ya4_9	Ya4_8
+27	LFB4	Zs4_6	Zs4_5	Zs4_4	Zs4_3	Zs4_2	Zs4_1	Zs4_0
+======= ======= ======= ======= ======= ======= ======= ======= =====
+
+
+SW1-SW6:
+	SW ON/OFF status
+Xan_15-0(16bit):
+	X Absolute data of the "n"th finger
+Yan_15-0(16bit):
+	Y Absolute data of the "n"th finger
+Zsn_6-0(7bit):
+	Operation area of the "n"th finger
+
+
+StickPointer data byte
+----------------------
+
+======= ======= ======= ======= ======= ======= ======= ======= =====
+-	b7	b6	b5	b4	b3	b2	b1	b0
+======= ======= ======= ======= ======= ======= ======= ======= =====
+Byte1	1	1	1	0	1	SW3	SW2	SW1
+Byte2	X7	X6	X5	X4	X3	X2	X1	X0
+Byte3	X15	X14	X13	X12	X11	X10	X9	X8
+Byte4	Y7	Y6	Y5	Y4	Y3	Y2	Y1	Y0
+Byte5	Y15	Y14	Y13	Y12	Y11	Y10	Y9	Y8
+Byte6	Z7	Z6	Z5	Z4	Z3	Z2	Z1	Z0
+Byte7	T&P	Z14	Z13	Z12	Z11	Z10	Z9	Z8
+======= ======= ======= ======= ======= ======= ======= ======= =====
+
+SW1-SW3:
+	SW ON/OFF status
+Xn_15-0(16bit):
+	X Absolute data
+Yn_15-0(16bit):
+	Y Absolute data
+Zn_14-0(15bit):
+	Z
diff --git a/Documentation/hid/hid-alps.txt b/Documentation/hid/hid-alps.txt
deleted file mode 100644
index 6b02a2447c77..000000000000
--- a/Documentation/hid/hid-alps.txt
+++ /dev/null
@@ -1,139 +0,0 @@
-ALPS HID Touchpad Protocol
-----------------------
-
-Introduction
-------------
-Currently ALPS HID driver supports U1 Touchpad device.
-
-U1 devuce basic information.
-Vender ID	0x044E
-Product ID	0x120B
-Version ID	0x0121
-
-
-HID Descriptor
-------------
-Byte	Field			Value	Notes
-0	wHIDDescLength		001E	Length of HID Descriptor : 30 bytes
-2	bcdVersion		0100	Compliant with Version 1.00
-4	wReportDescLength	00B2	Report Descriptor is 178 Bytes (0x00B2)
-6	wReportDescRegister	0002	Identifier to read Report Descriptor
-8	wInputRegister		0003	Identifier to read Input Report
-10	wMaxInputLength		0053	Input Report is 80 Bytes + 2
-12	wOutputRegister		0000	Identifier to read Output Report
-14	wMaxOutputLength	0000	No Output Reports
-16	wCommandRegister	0005	Identifier for Command Register
-18	wDataRegister		0006	Identifier for Data Register
-20	wVendorID		044E	Vendor ID 0x044E
-22	wProductID		120B	Product ID 0x120B
-24	wVersionID		0121	Version 01.21
-26	RESERVED		0000	RESERVED
-
-
-Report ID
-------------
-ReportID-1	(Input Reports)	(HIDUsage-Mouse) for TP&SP
-ReportID-2	(Input Reports)	(HIDUsage-keyboard) for TP
-ReportID-3	(Input Reports)	(Vendor Usage: Max 10 finger data) for TP
-ReportID-4	(Input Reports)	(Vendor Usage: ON bit data) for GP
-ReportID-5	(Feature Reports)	Feature Reports
-ReportID-6	(Input Reports)	(Vendor Usage: StickPointer data) for SP
-ReportID-7	(Feature Reports)	Flash update (Bootloader)
-
-
-Data pattern
-------------
-Case1	ReportID_1	TP/SP	Relative/Relative
-Case2	ReportID_3	TP	Absolute
-	ReportID_6	SP	Absolute
-
-
-Command Read/Write
-------------------
-To read/write to RAM, need to send a commands to the device.
-The command format is as below.
-
-DataByte(SET_REPORT)
-Byte1	Command Byte
-Byte2	Address - Byte 0 (LSB)
-Byte3	Address - Byte 1
-Byte4	Address - Byte 2
-Byte5	Address - Byte 3 (MSB)
-Byte6	Value Byte
-Byte7	Checksum
-
-Command Byte is read=0xD1/write=0xD2 .
-Address is read/write RAM address.
-Value Byte is writing data when you send the write commands.
-When you read RAM, there is no meaning.
-
-DataByte(GET_REPORT)
-Byte1	Response Byte
-Byte2	Address - Byte 0 (LSB)
-Byte3	Address - Byte 1
-Byte4	Address - Byte 2
-Byte5	Address - Byte 3 (MSB)
-Byte6	Value Byte
-Byte7	Checksum
-
-Read value is stored in Value Byte.
-
-
-Packet Format
-Touchpad data byte
-------------------
-	b7	b6	b5	b4	b3	b2	b1	b0
-1	0	0	SW6	SW5	SW4	SW3	SW2	SW1
-2	0	0	0	Fcv	Fn3	Fn2	Fn1	Fn0
-3	Xa0_7	Xa0_6	Xa0_5	Xa0_4	Xa0_3	Xa0_2	Xa0_1	Xa0_0
-4	Xa0_15	Xa0_14	Xa0_13	Xa0_12	Xa0_11	Xa0_10	Xa0_9	Xa0_8
-5	Ya0_7	Ya0_6	Ya0_5	Ya0_4	Ya0_3	Ya0_2	Ya0_1	Ya0_0
-6	Ya0_15	Ya0_14	Ya0_13	Ya0_12	Ya0_11	Ya0_10	Ya0_9	Ya0_8
-7	LFB0	Zs0_6	Zs0_5	Zs0_4	Zs0_3	Zs0_2	Zs0_1	Zs0_0
-
-8	Xa1_7	Xa1_6	Xa1_5	Xa1_4	Xa1_3	Xa1_2	Xa1_1	Xa1_0
-9	Xa1_15	Xa1_14	Xa1_13	Xa1_12	Xa1_11	Xa1_10	Xa1_9	Xa1_8
-10	Ya1_7	Ya1_6	Ya1_5	Ya1_4	Ya1_3	Ya1_2	Ya1_1	Ya1_0
-11	Ya1_15	Ya1_14	Ya1_13	Ya1_12	Ya1_11	Ya1_10	Ya1_9	Ya1_8
-12	LFB1	Zs1_6	Zs1_5	Zs1_4	Zs1_3	Zs1_2	Zs1_1	Zs1_0
-
-13	Xa2_7	Xa2_6	Xa2_5	Xa2_4	Xa2_3	Xa2_2	Xa2_1	Xa2_0
-14	Xa2_15	Xa2_14	Xa2_13	Xa2_12	Xa2_11	Xa2_10	Xa2_9	Xa2_8
-15	Ya2_7	Ya2_6	Ya2_5	Ya2_4	Ya2_3	Ya2_2	Ya2_1	Ya2_0
-16	Ya2_15	Ya2_14	Ya2_13	Ya2_12	Ya2_11	Ya2_10	Ya2_9	Ya2_8
-17	LFB2	Zs2_6	Zs2_5	Zs2_4	Zs2_3	Zs2_2	Zs2_1	Zs2_0
-
-18	Xa3_7	Xa3_6	Xa3_5	Xa3_4	Xa3_3	Xa3_2	Xa3_1	Xa3_0
-19	Xa3_15	Xa3_14	Xa3_13	Xa3_12	Xa3_11	Xa3_10	Xa3_9	Xa3_8
-20	Ya3_7	Ya3_6	Ya3_5	Ya3_4	Ya3_3	Ya3_2	Ya3_1	Ya3_0
-21	Ya3_15	Ya3_14	Ya3_13	Ya3_12	Ya3_11	Ya3_10	Ya3_9	Ya3_8
-22	LFB3	Zs3_6	Zs3_5	Zs3_4	Zs3_3	Zs3_2	Zs3_1	Zs3_0
-
-23	Xa4_7	Xa4_6	Xa4_5	Xa4_4	Xa4_3	Xa4_2	Xa4_1	Xa4_0
-24	Xa4_15	Xa4_14	Xa4_13	Xa4_12	Xa4_11	Xa4_10	Xa4_9	Xa4_8
-25	Ya4_7	Ya4_6	Ya4_5	Ya4_4	Ya4_3	Ya4_2	Ya4_1	Ya4_0
-26	Ya4_15	Ya4_14	Ya4_13	Ya4_12	Ya4_11	Ya4_10	Ya4_9	Ya4_8
-27	LFB4	Zs4_6	Zs4_5	Zs4_4	Zs4_3	Zs4_2	Zs4_1	Zs4_0
-
-
-SW1-SW6:	SW ON/OFF status
-Xan_15-0(16bit):X Absolute data of the "n"th finger
-Yan_15-0(16bit):Y Absolute data of the "n"th finger
-Zsn_6-0(7bit):	Operation area of the "n"th finger
-
-
-StickPointer data byte
-------------------
-	b7	b6	b5	b4	b3	b2	b1	b0
-Byte1	1	1	1	0	1	SW3	SW2	SW1
-Byte2	X7	X6	X5	X4	X3	X2	X1	X0
-Byte3	X15	X14	X13	X12	X11	X10	X9	X8
-Byte4	Y7	Y6	Y5	Y4	Y3	Y2	Y1	Y0
-Byte5	Y15	Y14	Y13	Y12	Y11	Y10	Y9	Y8
-Byte6	Z7	Z6	Z5	Z4	Z3	Z2	Z1	Z0
-Byte7	T&P	Z14	Z13	Z12	Z11	Z10	Z9	Z8
-
-SW1-SW3:	SW ON/OFF status
-Xn_15-0(16bit):X Absolute data
-Yn_15-0(16bit):Y Absolute data
-Zn_14-0(15bit):Z
diff --git a/Documentation/hid/hid-sensor.rst b/Documentation/hid/hid-sensor.rst
new file mode 100644
index 000000000000..758972e34971
--- /dev/null
+++ b/Documentation/hid/hid-sensor.rst
@@ -0,0 +1,242 @@
+=====================
+HID Sensors Framework
+=====================
+HID sensor framework provides necessary interfaces to implement sensor drivers,
+which are connected to a sensor hub. The sensor hub is a HID device and it provides
+a report descriptor conforming to HID 1.12 sensor usage tables.
+
+Description from the HID 1.12 "HID Sensor Usages" specification:
+"Standardization of HID usages for sensors would allow (but not require) sensor
+hardware vendors to provide a consistent Plug And Play interface at the USB boundary,
+thereby enabling some operating systems to incorporate common device drivers that
+could be reused between vendors, alleviating any need for the vendors to provide
+the drivers themselves."
+
+This specification describes many usage IDs, which describe the type of sensor
+and also the individual data fields. Each sensor can have variable number of
+data fields. The length and order is specified in the report descriptor. For
+example a part of report descriptor can look like::
+
+     INPUT(1)[INPUT]
+   ..
+      Field(2)
+        Physical(0020.0073)
+        Usage(1)
+          0020.045f
+        Logical Minimum(-32767)
+        Logical Maximum(32767)
+        Report Size(8)
+        Report Count(1)
+        Report Offset(16)
+        Flags(Variable Absolute)
+  ..
+  ..
+
+The report is indicating "sensor page (0x20)" contains an accelerometer-3D (0x73).
+This accelerometer-3D has some fields. Here for example field 2 is motion intensity
+(0x045f) with a logical minimum value of -32767 and logical maximum of 32767. The
+order of fields and length of each field is important as the input event raw
+data will use this format.
+
+
+Implementation
+==============
+
+This specification defines many different types of sensors with different sets of
+data fields. It is difficult to have a common input event to user space applications,
+for different sensors. For example an accelerometer can send X,Y and Z data, whereas
+an ambient light sensor can send illumination data.
+So the implementation has two parts:
+
+- Core hid driver
+- Individual sensor processing part (sensor drivers)
+
+Core driver
+-----------
+The core driver registers (hid-sensor-hub) registers as a HID driver. It parses
+report descriptors and identifies all the sensors present. It adds an MFD device
+with name HID-SENSOR-xxxx (where xxxx is usage id from the specification).
+
+For example:
+
+HID-SENSOR-200073 is registered for an Accelerometer 3D driver.
+
+So if any driver with this name is inserted, then the probe routine for that
+function will be called. So an accelerometer processing driver can register
+with this name and will be probed if there is an accelerometer-3D detected.
+
+The core driver provides a set of APIs which can be used by the processing
+drivers to register and get events for that usage id. Also it provides parsing
+functions, which get and set each input/feature/output report.
+
+Individual sensor processing part (sensor drivers)
+--------------------------------------------------
+
+The processing driver will use an interface provided by the core driver to parse
+the report and get the indexes of the fields and also can get events. This driver
+can use IIO interface to use the standard ABI defined for a type of sensor.
+
+
+Core driver Interface
+=====================
+
+Callback structure::
+
+  Each processing driver can use this structure to set some callbacks.
+	int (*suspend)(..): Callback when HID suspend is received
+	int (*resume)(..): Callback when HID resume is received
+	int (*capture_sample)(..): Capture a sample for one of its data fields
+	int (*send_event)(..): One complete event is received which can have
+                               multiple data fields.
+
+Registration functions::
+
+  int sensor_hub_register_callback(struct hid_sensor_hub_device *hsdev,
+			u32 usage_id,
+			struct hid_sensor_hub_callbacks *usage_callback):
+
+Registers callbacks for an usage id. The callback functions are not allowed
+to sleep::
+
+
+  int sensor_hub_remove_callback(struct hid_sensor_hub_device *hsdev,
+			u32 usage_id):
+
+Removes callbacks for an usage id.
+
+
+Parsing function::
+
+  int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev,
+			u8 type,
+			u32 usage_id, u32 attr_usage_id,
+			struct hid_sensor_hub_attribute_info *info);
+
+A processing driver can look for some field of interest and check if it exists
+in a report descriptor. If it exists it will store necessary information
+so that fields can be set or get individually.
+These indexes avoid searching every time and getting field index to get or set.
+
+
+Set Feature report::
+
+  int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
+			u32 field_index, s32 value);
+
+This interface is used to set a value for a field in feature report. For example
+if there is a field report_interval, which is parsed by a call to
+sensor_hub_input_get_attribute_info before, then it can directly set that
+individual field::
+
+
+  int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
+			u32 field_index, s32 *value);
+
+This interface is used to get a value for a field in input report. For example
+if there is a field report_interval, which is parsed by a call to
+sensor_hub_input_get_attribute_info before, then it can directly get that
+individual field value::
+
+
+  int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
+			u32 usage_id,
+			u32 attr_usage_id, u32 report_id);
+
+This is used to get a particular field value through input reports. For example
+accelerometer wants to poll X axis value, then it can call this function with
+the usage id of X axis. HID sensors can provide events, so this is not necessary
+to poll for any field. If there is some new sample, the core driver will call
+registered callback function to process the sample.
+
+
+----------
+
+HID Custom and generic Sensors
+------------------------------
+
+
+HID Sensor specification defines two special sensor usage types. Since they
+don't represent a standard sensor, it is not possible to define using Linux IIO
+type interfaces.
+The purpose of these sensors is to extend the functionality or provide a
+way to obfuscate the data being communicated by a sensor. Without knowing the
+mapping between the data and its encapsulated form, it is difficult for
+an application/driver to determine what data is being communicated by the sensor.
+This allows some differentiating use cases, where vendor can provide applications.
+Some common use cases are debug other sensors or to provide some events like
+keyboard attached/detached or lid open/close.
+
+To allow application to utilize these sensors, here they are exported uses sysfs
+attribute groups, attributes and misc device interface.
+
+An example of this representation on sysfs::
+
+  /sys/devices/pci0000:00/INT33C2:00/i2c-0/i2c-INT33D1:00/0018:8086:09FA.0001/HID-SENSOR-2000e1.6.auto$ tree -R
+  .
+  │   ├──  enable_sensor
+  │   │   ├── feature-0-200316
+  │   │   │   ├── feature-0-200316-maximum
+  │   │   │   ├── feature-0-200316-minimum
+  │   │   │   ├── feature-0-200316-name
+  │   │   │   ├── feature-0-200316-size
+  │   │   │   ├── feature-0-200316-unit-expo
+  │   │   │   ├── feature-0-200316-units
+  │   │   │   ├── feature-0-200316-value
+  │   │   ├── feature-1-200201
+  │   │   │   ├── feature-1-200201-maximum
+  │   │   │   ├── feature-1-200201-minimum
+  │   │   │   ├── feature-1-200201-name
+  │   │   │   ├── feature-1-200201-size
+  │   │   │   ├── feature-1-200201-unit-expo
+  │   │   │   ├── feature-1-200201-units
+  │   │   │   ├── feature-1-200201-value
+  │   │   ├── input-0-200201
+  │   │   │   ├── input-0-200201-maximum
+  │   │   │   ├── input-0-200201-minimum
+  │   │   │   ├── input-0-200201-name
+  │   │   │   ├── input-0-200201-size
+  │   │   │   ├── input-0-200201-unit-expo
+  │   │   │   ├── input-0-200201-units
+  │   │   │   ├── input-0-200201-value
+  │   │   ├── input-1-200202
+  │   │   │   ├── input-1-200202-maximum
+  │   │   │   ├── input-1-200202-minimum
+  │   │   │   ├── input-1-200202-name
+  │   │   │   ├── input-1-200202-size
+  │   │   │   ├── input-1-200202-unit-expo
+  │   │   │   ├── input-1-200202-units
+  │   │   │   ├── input-1-200202-value
+
+Here there is a custom sensors with four fields, two feature and two inputs.
+Each field is represented by a set of attributes. All fields except the "value"
+are read only. The value field is a RW field.
+
+Example::
+
+  /sys/bus/platform/devices/HID-SENSOR-2000e1.6.auto/feature-0-200316$ grep -r . *
+  feature-0-200316-maximum:6
+  feature-0-200316-minimum:0
+  feature-0-200316-name:property-reporting-state
+  feature-0-200316-size:1
+  feature-0-200316-unit-expo:0
+  feature-0-200316-units:25
+  feature-0-200316-value:1
+
+How to enable such sensor?
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+By default sensor can be power gated. To enable sysfs attribute "enable" can be
+used::
+
+	$ echo 1 > enable_sensor
+
+Once enabled and powered on, sensor can report value using HID reports.
+These reports are pushed using misc device interface in a FIFO order::
+
+	/dev$ tree | grep HID-SENSOR-2000e1.6.auto
+	│   │   │   ├── 10:53 -> ../HID-SENSOR-2000e1.6.auto
+	│   ├──  HID-SENSOR-2000e1.6.auto
+
+Each reports can be of variable length preceded by a header. This header
+consist of a 32 bit usage id, 64 bit time stamp and 32 bit length field of raw
+data.
diff --git a/Documentation/hid/hid-sensor.txt b/Documentation/hid/hid-sensor.txt
deleted file mode 100644
index b287752a31cd..000000000000
--- a/Documentation/hid/hid-sensor.txt
+++ /dev/null
@@ -1,224 +0,0 @@
-
-HID Sensors Framework
-======================
-HID sensor framework provides necessary interfaces to implement sensor drivers,
-which are connected to a sensor hub. The sensor hub is a HID device and it provides
-a report descriptor conforming to HID 1.12 sensor usage tables.
-
-Description from the HID 1.12 "HID Sensor Usages" specification:
-"Standardization of HID usages for sensors would allow (but not require) sensor
-hardware vendors to provide a consistent Plug And Play interface at the USB boundary,
-thereby enabling some operating systems to incorporate common device drivers that
-could be reused between vendors, alleviating any need for the vendors to provide
-the drivers themselves."
-
-This specification describes many usage IDs, which describe the type of sensor
-and also the individual data fields. Each sensor can have variable number of
-data fields. The length and order is specified in the report descriptor. For
-example a part of report descriptor can look like:
-
-   INPUT(1)[INPUT]
- ..
-    Field(2)
-      Physical(0020.0073)
-      Usage(1)
-        0020.045f
-      Logical Minimum(-32767)
-      Logical Maximum(32767)
-      Report Size(8)
-      Report Count(1)
-      Report Offset(16)
-      Flags(Variable Absolute)
-..
-..
-
-The report is indicating "sensor page (0x20)" contains an accelerometer-3D (0x73).
-This accelerometer-3D has some fields. Here for example field 2 is motion intensity
-(0x045f) with a logical minimum value of -32767 and logical maximum of 32767. The
-order of fields and length of each field is important as the input event raw
-data will use this format.
-
-
-Implementation
-=================
-
-This specification defines many different types of sensors with different sets of
-data fields. It is difficult to have a common input event to user space applications,
-for different sensors. For example an accelerometer can send X,Y and Z data, whereas
-an ambient light sensor can send illumination data.
-So the implementation has two parts:
-- Core hid driver
-- Individual sensor processing part (sensor drivers)
-
-Core driver
------------
-The core driver registers (hid-sensor-hub) registers as a HID driver. It parses
-report descriptors and identifies all the sensors present. It adds an MFD device
-with name HID-SENSOR-xxxx (where xxxx is usage id from the specification).
-For example
-HID-SENSOR-200073 is registered for an Accelerometer 3D driver.
-So if any driver with this name is inserted, then the probe routine for that
-function will be called. So an accelerometer processing driver can register
-with this name and will be probed if there is an accelerometer-3D detected.
-
-The core driver provides a set of APIs which can be used by the processing
-drivers to register and get events for that usage id. Also it provides parsing
-functions, which get and set each input/feature/output report.
-
-Individual sensor processing part (sensor drivers)
------------
-The processing driver will use an interface provided by the core driver to parse
-the report and get the indexes of the fields and also can get events. This driver
-can use IIO interface to use the standard ABI defined for a type of sensor.
-
-
-Core driver Interface
-=====================
-
-Callback structure:
-Each processing driver can use this structure to set some callbacks.
-	int (*suspend)(..): Callback when HID suspend is received
-	int (*resume)(..): Callback when HID resume is received
-	int (*capture_sample)(..): Capture a sample for one of its data fields
-	int (*send_event)(..): One complete event is received which can have
-                               multiple data fields.
-
-Registration functions:
-int sensor_hub_register_callback(struct hid_sensor_hub_device *hsdev,
-			u32 usage_id,
-			struct hid_sensor_hub_callbacks *usage_callback):
-
-Registers callbacks for an usage id. The callback functions are not allowed
-to sleep.
-
-
-int sensor_hub_remove_callback(struct hid_sensor_hub_device *hsdev,
-			u32 usage_id):
-
-Removes callbacks for an usage id.
-
-
-Parsing function:
-int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev,
-			u8 type,
-			u32 usage_id, u32 attr_usage_id,
-			struct hid_sensor_hub_attribute_info *info);
-
-A processing driver can look for some field of interest and check if it exists
-in a report descriptor. If it exists it will store necessary information
-so that fields can be set or get individually.
-These indexes avoid searching every time and getting field index to get or set.
-
-
-Set Feature report
-int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
-			u32 field_index, s32 value);
-
-This interface is used to set a value for a field in feature report. For example
-if there is a field report_interval, which is parsed by a call to
-sensor_hub_input_get_attribute_info before, then it can directly set that individual
-field.
-
-
-int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
-			u32 field_index, s32 *value);
-
-This interface is used to get a value for a field in input report. For example
-if there is a field report_interval, which is parsed by a call to
-sensor_hub_input_get_attribute_info before, then it can directly get that individual
-field value.
-
-
-int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
-			u32 usage_id,
-			u32 attr_usage_id, u32 report_id);
-
-This is used to get a particular field value through input reports. For example
-accelerometer wants to poll X axis value, then it can call this function with
-the usage id of X axis. HID sensors can provide events, so this is not necessary
-to poll for any field. If there is some new sample, the core driver will call
-registered callback function to process the sample.
-
-
-----------
-
-HID Custom and generic Sensors
-
-HID Sensor specification defines two special sensor usage types. Since they
-don't represent a standard sensor, it is not possible to define using Linux IIO
-type interfaces.
-The purpose of these sensors is to extend the functionality or provide a
-way to obfuscate the data being communicated by a sensor. Without knowing the
-mapping between the data and its encapsulated form, it is difficult for
-an application/driver to determine what data is being communicated by the sensor.
-This allows some differentiating use cases, where vendor can provide applications.
-Some common use cases are debug other sensors or to provide some events like
-keyboard attached/detached or lid open/close.
-
-To allow application to utilize these sensors, here they are exported uses sysfs
-attribute groups, attributes and misc device interface.
-
-An example of this representation on sysfs:
-/sys/devices/pci0000:00/INT33C2:00/i2c-0/i2c-INT33D1:00/0018:8086:09FA.0001/HID-SENSOR-2000e1.6.auto$ tree -R
-.
-????????? enable_sensor
-????????? feature-0-200316
-??????? ????????? feature-0-200316-maximum
-??????? ????????? feature-0-200316-minimum
-??????? ????????? feature-0-200316-name
-??????? ????????? feature-0-200316-size
-??????? ????????? feature-0-200316-unit-expo
-??????? ????????? feature-0-200316-units
-??????? ????????? feature-0-200316-value
-????????? feature-1-200201
-??????? ????????? feature-1-200201-maximum
-??????? ????????? feature-1-200201-minimum
-??????? ????????? feature-1-200201-name
-??????? ????????? feature-1-200201-size
-??????? ????????? feature-1-200201-unit-expo
-??????? ????????? feature-1-200201-units
-??????? ????????? feature-1-200201-value
-????????? input-0-200201
-??????? ????????? input-0-200201-maximum
-??????? ????????? input-0-200201-minimum
-??????? ????????? input-0-200201-name
-??????? ????????? input-0-200201-size
-??????? ????????? input-0-200201-unit-expo
-??????? ????????? input-0-200201-units
-??????? ????????? input-0-200201-value
-????????? input-1-200202
-??????? ????????? input-1-200202-maximum
-??????? ????????? input-1-200202-minimum
-??????? ????????? input-1-200202-name
-??????? ????????? input-1-200202-size
-??????? ????????? input-1-200202-unit-expo
-??????? ????????? input-1-200202-units
-??????? ????????? input-1-200202-value
-
-Here there is a custom sensors with four fields, two feature and two inputs.
-Each field is represented by a set of attributes. All fields except the "value"
-are read only. The value field is a RW field.
-Example
-/sys/bus/platform/devices/HID-SENSOR-2000e1.6.auto/feature-0-200316$ grep -r . *
-feature-0-200316-maximum:6
-feature-0-200316-minimum:0
-feature-0-200316-name:property-reporting-state
-feature-0-200316-size:1
-feature-0-200316-unit-expo:0
-feature-0-200316-units:25
-feature-0-200316-value:1
-
-How to enable such sensor?
-By default sensor can be power gated. To enable sysfs attribute "enable" can be
-used.
-$ echo 1 > enable_sensor
-
-Once enabled and powered on, sensor can report value using HID reports.
-These reports are pushed using misc device interface in a FIFO order.
-/dev$ tree | grep HID-SENSOR-2000e1.6.auto
-??????? ????????? 10:53 -> ../HID-SENSOR-2000e1.6.auto
-????????? HID-SENSOR-2000e1.6.auto
-
-Each reports can be of variable length preceded by a header. This header
-consist of a 32 bit usage id, 64 bit time stamp and 32 bit length field of raw
-data.
diff --git a/Documentation/hid/hid-transport.rst b/Documentation/hid/hid-transport.rst
new file mode 100644
index 000000000000..6f3aaa86ce7b
--- /dev/null
+++ b/Documentation/hid/hid-transport.rst
@@ -0,0 +1,359 @@
+=========================
+HID I/O Transport Drivers
+=========================
+
+The HID subsystem is independent of the underlying transport driver. Initially,
+only USB was supported, but other specifications adopted the HID design and
+provided new transport drivers. The kernel includes at least support for USB,
+Bluetooth, I2C and user-space I/O drivers.
+
+1) HID Bus
+==========
+
+The HID subsystem is designed as a bus. Any I/O subsystem may provide HID
+devices and register them with the HID bus. HID core then loads generic device
+drivers on top of it. The transport drivers are responsible of raw data
+transport and device setup/management. HID core is responsible of
+report-parsing, report interpretation and the user-space API. Device specifics
+and quirks are handled by all layers depending on the quirk.
+
+::
+
+ +-----------+  +-----------+            +-----------+  +-----------+
+ | Device #1 |  | Device #i |            | Device #j |  | Device #k |
+ +-----------+  +-----------+            +-----------+  +-----------+
+          \\      //                              \\      //
+        +------------+                          +------------+
+        | I/O Driver |                          | I/O Driver |
+        +------------+                          +------------+
+              ||                                      ||
+     +------------------+                    +------------------+
+     | Transport Driver |                    | Transport Driver |
+     +------------------+                    +------------------+
+                       \___                ___/
+                           \              /
+                          +----------------+
+                          |    HID Core    |
+                          +----------------+
+                           /  |        |  \
+                          /   |        |   \
+             ____________/    |        |    \_________________
+            /                 |        |                      \
+           /                  |        |                       \
+ +----------------+  +-----------+  +------------------+  +------------------+
+ | Generic Driver |  | MT Driver |  | Custom Driver #1 |  | Custom Driver #2 |
+ +----------------+  +-----------+  +------------------+  +------------------+
+
+Example Drivers:
+
+  - I/O: USB, I2C, Bluetooth-l2cap
+  - Transport: USB-HID, I2C-HID, BT-HIDP
+
+Everything below "HID Core" is simplified in this graph as it is only of
+interest to HID device drivers. Transport drivers do not need to know the
+specifics.
+
+1.1) Device Setup
+-----------------
+
+I/O drivers normally provide hotplug detection or device enumeration APIs to the
+transport drivers. Transport drivers use this to find any suitable HID device.
+They allocate HID device objects and register them with HID core. Transport
+drivers are not required to register themselves with HID core. HID core is never
+aware of which transport drivers are available and is not interested in it. It
+is only interested in devices.
+
+Transport drivers attach a constant "struct hid_ll_driver" object with each
+device. Once a device is registered with HID core, the callbacks provided via
+this struct are used by HID core to communicate with the device.
+
+Transport drivers are responsible of detecting device failures and unplugging.
+HID core will operate a device as long as it is registered regardless of any
+device failures. Once transport drivers detect unplug or failure events, they
+must unregister the device from HID core and HID core will stop using the
+provided callbacks.
+
+1.2) Transport Driver Requirements
+----------------------------------
+
+The terms "asynchronous" and "synchronous" in this document describe the
+transmission behavior regarding acknowledgements. An asynchronous channel must
+not perform any synchronous operations like waiting for acknowledgements or
+verifications. Generally, HID calls operating on asynchronous channels must be
+running in atomic-context just fine.
+On the other hand, synchronous channels can be implemented by the transport
+driver in whatever way they like. They might just be the same as asynchronous
+channels, but they can also provide acknowledgement reports, automatic
+retransmission on failure, etc. in a blocking manner. If such functionality is
+required on asynchronous channels, a transport-driver must implement that via
+its own worker threads.
+
+HID core requires transport drivers to follow a given design. A Transport
+driver must provide two bi-directional I/O channels to each HID device. These
+channels must not necessarily be bi-directional in the hardware itself. A
+transport driver might just provide 4 uni-directional channels. Or it might
+multiplex all four on a single physical channel. However, in this document we
+will describe them as two bi-directional channels as they have several
+properties in common.
+
+ - Interrupt Channel (intr): The intr channel is used for asynchronous data
+   reports. No management commands or data acknowledgements are sent on this
+   channel. Any unrequested incoming or outgoing data report must be sent on
+   this channel and is never acknowledged by the remote side. Devices usually
+   send their input events on this channel. Outgoing events are normally
+   not send via intr, except if high throughput is required.
+ - Control Channel (ctrl): The ctrl channel is used for synchronous requests and
+   device management. Unrequested data input events must not be sent on this
+   channel and are normally ignored. Instead, devices only send management
+   events or answers to host requests on this channel.
+   The control-channel is used for direct blocking queries to the device
+   independent of any events on the intr-channel.
+   Outgoing reports are usually sent on the ctrl channel via synchronous
+   SET_REPORT requests.
+
+Communication between devices and HID core is mostly done via HID reports. A
+report can be of one of three types:
+
+ - INPUT Report: Input reports provide data from device to host. This
+   data may include button events, axis events, battery status or more. This
+   data is generated by the device and sent to the host with or without
+   requiring explicit requests. Devices can choose to send data continuously or
+   only on change.
+ - OUTPUT Report: Output reports change device states. They are sent from host
+   to device and may include LED requests, rumble requests or more. Output
+   reports are never sent from device to host, but a host can retrieve their
+   current state.
+   Hosts may choose to send output reports either continuously or only on
+   change.
+ - FEATURE Report: Feature reports are used for specific static device features
+   and never reported spontaneously. A host can read and/or write them to access
+   data like battery-state or device-settings.
+   Feature reports are never sent without requests. A host must explicitly set
+   or retrieve a feature report. This also means, feature reports are never sent
+   on the intr channel as this channel is asynchronous.
+
+INPUT and OUTPUT reports can be sent as pure data reports on the intr channel.
+For INPUT reports this is the usual operational mode. But for OUTPUT reports,
+this is rarely done as OUTPUT reports are normally quite scarce. But devices are
+free to make excessive use of asynchronous OUTPUT reports (for instance, custom
+HID audio speakers make great use of it).
+
+Plain reports must not be sent on the ctrl channel, though. Instead, the ctrl
+channel provides synchronous GET/SET_REPORT requests. Plain reports are only
+allowed on the intr channel and are the only means of data there.
+
+ - GET_REPORT: A GET_REPORT request has a report ID as payload and is sent
+   from host to device. The device must answer with a data report for the
+   requested report ID on the ctrl channel as a synchronous acknowledgement.
+   Only one GET_REPORT request can be pending for each device. This restriction
+   is enforced by HID core as several transport drivers don't allow multiple
+   simultaneous GET_REPORT requests.
+   Note that data reports which are sent as answer to a GET_REPORT request are
+   not handled as generic device events. That is, if a device does not operate
+   in continuous data reporting mode, an answer to GET_REPORT does not replace
+   the raw data report on the intr channel on state change.
+   GET_REPORT is only used by custom HID device drivers to query device state.
+   Normally, HID core caches any device state so this request is not necessary
+   on devices that follow the HID specs except during device initialization to
+   retrieve the current state.
+   GET_REPORT requests can be sent for any of the 3 report types and shall
+   return the current report state of the device. However, OUTPUT reports as
+   payload may be blocked by the underlying transport driver if the
+   specification does not allow them.
+ - SET_REPORT: A SET_REPORT request has a report ID plus data as payload. It is
+   sent from host to device and a device must update it's current report state
+   according to the given data. Any of the 3 report types can be used. However,
+   INPUT reports as payload might be blocked by the underlying transport driver
+   if the specification does not allow them.
+   A device must answer with a synchronous acknowledgement. However, HID core
+   does not require transport drivers to forward this acknowledgement to HID
+   core.
+   Same as for GET_REPORT, only one SET_REPORT can be pending at a time. This
+   restriction is enforced by HID core as some transport drivers do not support
+   multiple synchronous SET_REPORT requests.
+
+Other ctrl-channel requests are supported by USB-HID but are not available
+(or deprecated) in most other transport level specifications:
+
+ - GET/SET_IDLE: Only used by USB-HID and I2C-HID.
+ - GET/SET_PROTOCOL: Not used by HID core.
+ - RESET: Used by I2C-HID, not hooked up in HID core.
+ - SET_POWER: Used by I2C-HID, not hooked up in HID core.
+
+2) HID API
+==========
+
+2.1) Initialization
+-------------------
+
+Transport drivers normally use the following procedure to register a new device
+with HID core::
+
+	struct hid_device *hid;
+	int ret;
+
+	hid = hid_allocate_device();
+	if (IS_ERR(hid)) {
+		ret = PTR_ERR(hid);
+		goto err_<...>;
+	}
+
+	strlcpy(hid->name, <device-name-src>, 127);
+	strlcpy(hid->phys, <device-phys-src>, 63);
+	strlcpy(hid->uniq, <device-uniq-src>, 63);
+
+	hid->ll_driver = &custom_ll_driver;
+	hid->bus = <device-bus>;
+	hid->vendor = <device-vendor>;
+	hid->product = <device-product>;
+	hid->version = <device-version>;
+	hid->country = <device-country>;
+	hid->dev.parent = <pointer-to-parent-device>;
+	hid->driver_data = <transport-driver-data-field>;
+
+	ret = hid_add_device(hid);
+	if (ret)
+		goto err_<...>;
+
+Once hid_add_device() is entered, HID core might use the callbacks provided in
+"custom_ll_driver". Note that fields like "country" can be ignored by underlying
+transport-drivers if not supported.
+
+To unregister a device, use::
+
+	hid_destroy_device(hid);
+
+Once hid_destroy_device() returns, HID core will no longer make use of any
+driver callbacks.
+
+2.2) hid_ll_driver operations
+-----------------------------
+
+The available HID callbacks are:
+
+   ::
+
+      int (*start) (struct hid_device *hdev)
+
+   Called from HID device drivers once they want to use the device. Transport
+   drivers can choose to setup their device in this callback. However, normally
+   devices are already set up before transport drivers register them to HID core
+   so this is mostly only used by USB-HID.
+
+   ::
+
+      void (*stop) (struct hid_device *hdev)
+
+   Called from HID device drivers once they are done with a device. Transport
+   drivers can free any buffers and deinitialize the device. But note that
+   ->start() might be called again if another HID device driver is loaded on the
+   device.
+
+   Transport drivers are free to ignore it and deinitialize devices after they
+   destroyed them via hid_destroy_device().
+
+   ::
+
+      int (*open) (struct hid_device *hdev)
+
+   Called from HID device drivers once they are interested in data reports.
+   Usually, while user-space didn't open any input API/etc., device drivers are
+   not interested in device data and transport drivers can put devices asleep.
+   However, once ->open() is called, transport drivers must be ready for I/O.
+   ->open() calls are nested for each client that opens the HID device.
+
+   ::
+
+      void (*close) (struct hid_device *hdev)
+
+   Called from HID device drivers after ->open() was called but they are no
+   longer interested in device reports. (Usually if user-space closed any input
+   devices of the driver).
+
+   Transport drivers can put devices asleep and terminate any I/O of all
+   ->open() calls have been followed by a ->close() call. However, ->start() may
+   be called again if the device driver is interested in input reports again.
+
+   ::
+
+      int (*parse) (struct hid_device *hdev)
+
+   Called once during device setup after ->start() has been called. Transport
+   drivers must read the HID report-descriptor from the device and tell HID core
+   about it via hid_parse_report().
+
+   ::
+
+      int (*power) (struct hid_device *hdev, int level)
+
+   Called by HID core to give PM hints to transport drivers. Usually this is
+   analogical to the ->open() and ->close() hints and redundant.
+
+   ::
+
+      void (*request) (struct hid_device *hdev, struct hid_report *report,
+		       int reqtype)
+
+   Send an HID request on the ctrl channel. "report" contains the report that
+   should be sent and "reqtype" the request type. Request-type can be
+   HID_REQ_SET_REPORT or HID_REQ_GET_REPORT.
+
+   This callback is optional. If not provided, HID core will assemble a raw
+   report following the HID specs and send it via the ->raw_request() callback.
+   The transport driver is free to implement this asynchronously.
+
+   ::
+
+      int (*wait) (struct hid_device *hdev)
+
+   Used by HID core before calling ->request() again. A transport driver can use
+   it to wait for any pending requests to complete if only one request is
+   allowed at a time.
+
+   ::
+
+      int (*raw_request) (struct hid_device *hdev, unsigned char reportnum,
+                          __u8 *buf, size_t count, unsigned char rtype,
+                          int reqtype)
+
+   Same as ->request() but provides the report as raw buffer. This request shall
+   be synchronous. A transport driver must not use ->wait() to complete such
+   requests. This request is mandatory and hid core will reject the device if
+   it is missing.
+
+   ::
+
+      int (*output_report) (struct hid_device *hdev, __u8 *buf, size_t len)
+
+   Send raw output report via intr channel. Used by some HID device drivers
+   which require high throughput for outgoing requests on the intr channel. This
+   must not cause SET_REPORT calls! This must be implemented as asynchronous
+   output report on the intr channel!
+
+   ::
+
+      int (*idle) (struct hid_device *hdev, int report, int idle, int reqtype)
+
+   Perform SET/GET_IDLE request. Only used by USB-HID, do not implement!
+
+2.3) Data Path
+--------------
+
+Transport drivers are responsible of reading data from I/O devices. They must
+handle any I/O-related state-tracking themselves. HID core does not implement
+protocol handshakes or other management commands which can be required by the
+given HID transport specification.
+
+Every raw data packet read from a device must be fed into HID core via
+hid_input_report(). You must specify the channel-type (intr or ctrl) and report
+type (input/output/feature). Under normal conditions, only input reports are
+provided via this API.
+
+Responses to GET_REPORT requests via ->request() must also be provided via this
+API. Responses to ->raw_request() are synchronous and must be intercepted by the
+transport driver and not passed to hid_input_report().
+Acknowledgements to SET_REPORT requests are not of interest to HID core.
+
+----------------------------------------------------
+
+Written 2013, David Herrmann <dh.herrmann@gmail.com>
diff --git a/Documentation/hid/hid-transport.txt b/Documentation/hid/hid-transport.txt
deleted file mode 100644
index 3dcba9fd4a3a..000000000000
--- a/Documentation/hid/hid-transport.txt
+++ /dev/null
@@ -1,317 +0,0 @@
-                          HID I/O Transport Drivers
-                         ===========================
-
-The HID subsystem is independent of the underlying transport driver. Initially,
-only USB was supported, but other specifications adopted the HID design and
-provided new transport drivers. The kernel includes at least support for USB,
-Bluetooth, I2C and user-space I/O drivers.
-
-1) HID Bus
-==========
-
-The HID subsystem is designed as a bus. Any I/O subsystem may provide HID
-devices and register them with the HID bus. HID core then loads generic device
-drivers on top of it. The transport drivers are responsible of raw data
-transport and device setup/management. HID core is responsible of
-report-parsing, report interpretation and the user-space API. Device specifics
-and quirks are handled by all layers depending on the quirk.
-
- +-----------+  +-----------+            +-----------+  +-----------+
- | Device #1 |  | Device #i |            | Device #j |  | Device #k |
- +-----------+  +-----------+            +-----------+  +-----------+
-          \\      //                              \\      //
-        +------------+                          +------------+
-        | I/O Driver |                          | I/O Driver |
-        +------------+                          +------------+
-              ||                                      ||
-     +------------------+                    +------------------+
-     | Transport Driver |                    | Transport Driver |
-     +------------------+                    +------------------+
-                       \___                ___/
-                           \              /
-                          +----------------+
-                          |    HID Core    |
-                          +----------------+
-                           /  |        |  \
-                          /   |        |   \
-             ____________/    |        |    \_________________
-            /                 |        |                      \
-           /                  |        |                       \
- +----------------+  +-----------+  +------------------+  +------------------+
- | Generic Driver |  | MT Driver |  | Custom Driver #1 |  | Custom Driver #2 |
- +----------------+  +-----------+  +------------------+  +------------------+
-
-Example Drivers:
-  I/O: USB, I2C, Bluetooth-l2cap
-  Transport: USB-HID, I2C-HID, BT-HIDP
-
-Everything below "HID Core" is simplified in this graph as it is only of
-interest to HID device drivers. Transport drivers do not need to know the
-specifics.
-
-1.1) Device Setup
------------------
-
-I/O drivers normally provide hotplug detection or device enumeration APIs to the
-transport drivers. Transport drivers use this to find any suitable HID device.
-They allocate HID device objects and register them with HID core. Transport
-drivers are not required to register themselves with HID core. HID core is never
-aware of which transport drivers are available and is not interested in it. It
-is only interested in devices.
-
-Transport drivers attach a constant "struct hid_ll_driver" object with each
-device. Once a device is registered with HID core, the callbacks provided via
-this struct are used by HID core to communicate with the device.
-
-Transport drivers are responsible of detecting device failures and unplugging.
-HID core will operate a device as long as it is registered regardless of any
-device failures. Once transport drivers detect unplug or failure events, they
-must unregister the device from HID core and HID core will stop using the
-provided callbacks.
-
-1.2) Transport Driver Requirements
-----------------------------------
-
-The terms "asynchronous" and "synchronous" in this document describe the
-transmission behavior regarding acknowledgements. An asynchronous channel must
-not perform any synchronous operations like waiting for acknowledgements or
-verifications. Generally, HID calls operating on asynchronous channels must be
-running in atomic-context just fine.
-On the other hand, synchronous channels can be implemented by the transport
-driver in whatever way they like. They might just be the same as asynchronous
-channels, but they can also provide acknowledgement reports, automatic
-retransmission on failure, etc. in a blocking manner. If such functionality is
-required on asynchronous channels, a transport-driver must implement that via
-its own worker threads.
-
-HID core requires transport drivers to follow a given design. A Transport
-driver must provide two bi-directional I/O channels to each HID device. These
-channels must not necessarily be bi-directional in the hardware itself. A
-transport driver might just provide 4 uni-directional channels. Or it might
-multiplex all four on a single physical channel. However, in this document we
-will describe them as two bi-directional channels as they have several
-properties in common.
-
- - Interrupt Channel (intr): The intr channel is used for asynchronous data
-   reports. No management commands or data acknowledgements are sent on this
-   channel. Any unrequested incoming or outgoing data report must be sent on
-   this channel and is never acknowledged by the remote side. Devices usually
-   send their input events on this channel. Outgoing events are normally
-   not send via intr, except if high throughput is required.
- - Control Channel (ctrl): The ctrl channel is used for synchronous requests and
-   device management. Unrequested data input events must not be sent on this
-   channel and are normally ignored. Instead, devices only send management
-   events or answers to host requests on this channel.
-   The control-channel is used for direct blocking queries to the device
-   independent of any events on the intr-channel.
-   Outgoing reports are usually sent on the ctrl channel via synchronous
-   SET_REPORT requests.
-
-Communication between devices and HID core is mostly done via HID reports. A
-report can be of one of three types:
-
- - INPUT Report: Input reports provide data from device to host. This
-   data may include button events, axis events, battery status or more. This
-   data is generated by the device and sent to the host with or without
-   requiring explicit requests. Devices can choose to send data continuously or
-   only on change.
- - OUTPUT Report: Output reports change device states. They are sent from host
-   to device and may include LED requests, rumble requests or more. Output
-   reports are never sent from device to host, but a host can retrieve their
-   current state.
-   Hosts may choose to send output reports either continuously or only on
-   change.
- - FEATURE Report: Feature reports are used for specific static device features
-   and never reported spontaneously. A host can read and/or write them to access
-   data like battery-state or device-settings.
-   Feature reports are never sent without requests. A host must explicitly set
-   or retrieve a feature report. This also means, feature reports are never sent
-   on the intr channel as this channel is asynchronous.
-
-INPUT and OUTPUT reports can be sent as pure data reports on the intr channel.
-For INPUT reports this is the usual operational mode. But for OUTPUT reports,
-this is rarely done as OUTPUT reports are normally quite scarce. But devices are
-free to make excessive use of asynchronous OUTPUT reports (for instance, custom
-HID audio speakers make great use of it).
-
-Plain reports must not be sent on the ctrl channel, though. Instead, the ctrl
-channel provides synchronous GET/SET_REPORT requests. Plain reports are only
-allowed on the intr channel and are the only means of data there.
-
- - GET_REPORT: A GET_REPORT request has a report ID as payload and is sent
-   from host to device. The device must answer with a data report for the
-   requested report ID on the ctrl channel as a synchronous acknowledgement.
-   Only one GET_REPORT request can be pending for each device. This restriction
-   is enforced by HID core as several transport drivers don't allow multiple
-   simultaneous GET_REPORT requests.
-   Note that data reports which are sent as answer to a GET_REPORT request are
-   not handled as generic device events. That is, if a device does not operate
-   in continuous data reporting mode, an answer to GET_REPORT does not replace
-   the raw data report on the intr channel on state change.
-   GET_REPORT is only used by custom HID device drivers to query device state.
-   Normally, HID core caches any device state so this request is not necessary
-   on devices that follow the HID specs except during device initialization to
-   retrieve the current state.
-   GET_REPORT requests can be sent for any of the 3 report types and shall
-   return the current report state of the device. However, OUTPUT reports as
-   payload may be blocked by the underlying transport driver if the
-   specification does not allow them.
- - SET_REPORT: A SET_REPORT request has a report ID plus data as payload. It is
-   sent from host to device and a device must update it's current report state
-   according to the given data. Any of the 3 report types can be used. However,
-   INPUT reports as payload might be blocked by the underlying transport driver
-   if the specification does not allow them.
-   A device must answer with a synchronous acknowledgement. However, HID core
-   does not require transport drivers to forward this acknowledgement to HID
-   core.
-   Same as for GET_REPORT, only one SET_REPORT can be pending at a time. This
-   restriction is enforced by HID core as some transport drivers do not support
-   multiple synchronous SET_REPORT requests.
-
-Other ctrl-channel requests are supported by USB-HID but are not available
-(or deprecated) in most other transport level specifications:
-
- - GET/SET_IDLE: Only used by USB-HID and I2C-HID.
- - GET/SET_PROTOCOL: Not used by HID core.
- - RESET: Used by I2C-HID, not hooked up in HID core.
- - SET_POWER: Used by I2C-HID, not hooked up in HID core.
-
-2) HID API
-==========
-
-2.1) Initialization
--------------------
-
-Transport drivers normally use the following procedure to register a new device
-with HID core:
-
-	struct hid_device *hid;
-	int ret;
-
-	hid = hid_allocate_device();
-	if (IS_ERR(hid)) {
-		ret = PTR_ERR(hid);
-		goto err_<...>;
-	}
-
-	strlcpy(hid->name, <device-name-src>, 127);
-	strlcpy(hid->phys, <device-phys-src>, 63);
-	strlcpy(hid->uniq, <device-uniq-src>, 63);
-
-	hid->ll_driver = &custom_ll_driver;
-	hid->bus = <device-bus>;
-	hid->vendor = <device-vendor>;
-	hid->product = <device-product>;
-	hid->version = <device-version>;
-	hid->country = <device-country>;
-	hid->dev.parent = <pointer-to-parent-device>;
-	hid->driver_data = <transport-driver-data-field>;
-
-	ret = hid_add_device(hid);
-	if (ret)
-		goto err_<...>;
-
-Once hid_add_device() is entered, HID core might use the callbacks provided in
-"custom_ll_driver". Note that fields like "country" can be ignored by underlying
-transport-drivers if not supported.
-
-To unregister a device, use:
-
-	hid_destroy_device(hid);
-
-Once hid_destroy_device() returns, HID core will no longer make use of any
-driver callbacks.
-
-2.2) hid_ll_driver operations
------------------------------
-
-The available HID callbacks are:
- - int (*start) (struct hid_device *hdev)
-   Called from HID device drivers once they want to use the device. Transport
-   drivers can choose to setup their device in this callback. However, normally
-   devices are already set up before transport drivers register them to HID core
-   so this is mostly only used by USB-HID.
-
- - void (*stop) (struct hid_device *hdev)
-   Called from HID device drivers once they are done with a device. Transport
-   drivers can free any buffers and deinitialize the device. But note that
-   ->start() might be called again if another HID device driver is loaded on the
-   device.
-   Transport drivers are free to ignore it and deinitialize devices after they
-   destroyed them via hid_destroy_device().
-
- - int (*open) (struct hid_device *hdev)
-   Called from HID device drivers once they are interested in data reports.
-   Usually, while user-space didn't open any input API/etc., device drivers are
-   not interested in device data and transport drivers can put devices asleep.
-   However, once ->open() is called, transport drivers must be ready for I/O.
-   ->open() calls are nested for each client that opens the HID device.
-
- - void (*close) (struct hid_device *hdev)
-   Called from HID device drivers after ->open() was called but they are no
-   longer interested in device reports. (Usually if user-space closed any input
-   devices of the driver).
-   Transport drivers can put devices asleep and terminate any I/O of all
-   ->open() calls have been followed by a ->close() call. However, ->start() may
-   be called again if the device driver is interested in input reports again.
-
- - int (*parse) (struct hid_device *hdev)
-   Called once during device setup after ->start() has been called. Transport
-   drivers must read the HID report-descriptor from the device and tell HID core
-   about it via hid_parse_report().
-
- - int (*power) (struct hid_device *hdev, int level)
-   Called by HID core to give PM hints to transport drivers. Usually this is
-   analogical to the ->open() and ->close() hints and redundant.
-
- - void (*request) (struct hid_device *hdev, struct hid_report *report,
-                    int reqtype)
-   Send an HID request on the ctrl channel. "report" contains the report that
-   should be sent and "reqtype" the request type. Request-type can be
-   HID_REQ_SET_REPORT or HID_REQ_GET_REPORT.
-   This callback is optional. If not provided, HID core will assemble a raw
-   report following the HID specs and send it via the ->raw_request() callback.
-   The transport driver is free to implement this asynchronously.
-
- - int (*wait) (struct hid_device *hdev)
-   Used by HID core before calling ->request() again. A transport driver can use
-   it to wait for any pending requests to complete if only one request is
-   allowed at a time.
-
- - int (*raw_request) (struct hid_device *hdev, unsigned char reportnum,
-                       __u8 *buf, size_t count, unsigned char rtype,
-                       int reqtype)
-   Same as ->request() but provides the report as raw buffer. This request shall
-   be synchronous. A transport driver must not use ->wait() to complete such
-   requests. This request is mandatory and hid core will reject the device if
-   it is missing.
-
- - int (*output_report) (struct hid_device *hdev, __u8 *buf, size_t len)
-   Send raw output report via intr channel. Used by some HID device drivers
-   which require high throughput for outgoing requests on the intr channel. This
-   must not cause SET_REPORT calls! This must be implemented as asynchronous
-   output report on the intr channel!
-
- - int (*idle) (struct hid_device *hdev, int report, int idle, int reqtype)
-   Perform SET/GET_IDLE request. Only used by USB-HID, do not implement!
-
-2.3) Data Path
---------------
-
-Transport drivers are responsible of reading data from I/O devices. They must
-handle any I/O-related state-tracking themselves. HID core does not implement
-protocol handshakes or other management commands which can be required by the
-given HID transport specification.
-
-Every raw data packet read from a device must be fed into HID core via
-hid_input_report(). You must specify the channel-type (intr or ctrl) and report
-type (input/output/feature). Under normal conditions, only input reports are
-provided via this API.
-
-Responses to GET_REPORT requests via ->request() must also be provided via this
-API. Responses to ->raw_request() are synchronous and must be intercepted by the
-transport driver and not passed to hid_input_report().
-Acknowledgements to SET_REPORT requests are not of interest to HID core.
-
-----------------------------------------------------
-Written 2013, David Herrmann <dh.herrmann@gmail.com>
diff --git a/Documentation/hid/hiddev.rst b/Documentation/hid/hiddev.rst
new file mode 100644
index 000000000000..209e6ba4e019
--- /dev/null
+++ b/Documentation/hid/hiddev.rst
@@ -0,0 +1,251 @@
+================================================
+Care and feeding of your Human Interface Devices
+================================================
+
+Introduction
+============
+
+In addition to the normal input type HID devices, USB also uses the
+human interface device protocols for things that are not really human
+interfaces, but have similar sorts of communication needs. The two big
+examples for this are power devices (especially uninterruptable power
+supplies) and monitor control on higher end monitors.
+
+To support these disparate requirements, the Linux USB system provides
+HID events to two separate interfaces:
+* the input subsystem, which converts HID events into normal input
+device interfaces (such as keyboard, mouse and joystick) and a
+normalised event interface - see Documentation/input/input.rst
+* the hiddev interface, which provides fairly raw HID events
+
+The data flow for a HID event produced by a device is something like
+the following::
+
+ usb.c ---> hid-core.c  ----> hid-input.c ----> [keyboard/mouse/joystick/event]
+                         |
+                         |
+                          --> hiddev.c ----> POWER / MONITOR CONTROL
+
+In addition, other subsystems (apart from USB) can potentially feed
+events into the input subsystem, but these have no effect on the hid
+device interface.
+
+Using the HID Device Interface
+==============================
+
+The hiddev interface is a char interface using the normal USB major,
+with the minor numbers starting at 96 and finishing at 111. Therefore,
+you need the following commands::
+
+	mknod /dev/usb/hiddev0 c 180 96
+	mknod /dev/usb/hiddev1 c 180 97
+	mknod /dev/usb/hiddev2 c 180 98
+	mknod /dev/usb/hiddev3 c 180 99
+	mknod /dev/usb/hiddev4 c 180 100
+	mknod /dev/usb/hiddev5 c 180 101
+	mknod /dev/usb/hiddev6 c 180 102
+	mknod /dev/usb/hiddev7 c 180 103
+	mknod /dev/usb/hiddev8 c 180 104
+	mknod /dev/usb/hiddev9 c 180 105
+	mknod /dev/usb/hiddev10 c 180 106
+	mknod /dev/usb/hiddev11 c 180 107
+	mknod /dev/usb/hiddev12 c 180 108
+	mknod /dev/usb/hiddev13 c 180 109
+	mknod /dev/usb/hiddev14 c 180 110
+	mknod /dev/usb/hiddev15 c 180 111
+
+So you point your hiddev compliant user-space program at the correct
+interface for your device, and it all just works.
+
+Assuming that you have a hiddev compliant user-space program, of
+course. If you need to write one, read on.
+
+
+The HIDDEV API
+==============
+
+This description should be read in conjunction with the HID
+specification, freely available from http://www.usb.org, and
+conveniently linked of http://www.linux-usb.org.
+
+The hiddev API uses a read() interface, and a set of ioctl() calls.
+
+HID devices exchange data with the host computer using data
+bundles called "reports".  Each report is divided into "fields",
+each of which can have one or more "usages".  In the hid-core,
+each one of these usages has a single signed 32 bit value.
+
+read():
+-------
+
+This is the event interface.  When the HID device's state changes,
+it performs an interrupt transfer containing a report which contains
+the changed value.  The hid-core.c module parses the report, and
+returns to hiddev.c the individual usages that have changed within
+the report.  In its basic mode, the hiddev will make these individual
+usage changes available to the reader using a struct hiddev_event::
+
+       struct hiddev_event {
+           unsigned hid;
+           signed int value;
+       };
+
+containing the HID usage identifier for the status that changed, and
+the value that it was changed to. Note that the structure is defined
+within <linux/hiddev.h>, along with some other useful #defines and
+structures.  The HID usage identifier is a composite of the HID usage
+page shifted to the 16 high order bits ORed with the usage code.  The
+behavior of the read() function can be modified using the HIDIOCSFLAG
+ioctl() described below.
+
+
+ioctl():
+--------
+
+This is the control interface. There are a number of controls:
+
+HIDIOCGVERSION
+  - int (read)
+
+ Gets the version code out of the hiddev driver.
+
+HIDIOCAPPLICATION
+  - (none)
+
+This ioctl call returns the HID application usage associated with the
+hid device. The third argument to ioctl() specifies which application
+index to get. This is useful when the device has more than one
+application collection. If the index is invalid (greater or equal to
+the number of application collections this device has) the ioctl
+returns -1. You can find out beforehand how many application
+collections the device has from the num_applications field from the
+hiddev_devinfo structure.
+
+HIDIOCGCOLLECTIONINFO
+  - struct hiddev_collection_info (read/write)
+
+This returns a superset of the information above, providing not only
+application collections, but all the collections the device has.  It
+also returns the level the collection lives in the hierarchy.
+The user passes in a hiddev_collection_info struct with the index
+field set to the index that should be returned.  The ioctl fills in
+the other fields.  If the index is larger than the last collection
+index, the ioctl returns -1 and sets errno to -EINVAL.
+
+HIDIOCGDEVINFO
+  - struct hiddev_devinfo (read)
+
+Gets a hiddev_devinfo structure which describes the device.
+
+HIDIOCGSTRING
+  - struct hiddev_string_descriptor (read/write)
+
+Gets a string descriptor from the device. The caller must fill in the
+"index" field to indicate which descriptor should be returned.
+
+HIDIOCINITREPORT
+  - (none)
+
+Instructs the kernel to retrieve all input and feature report values
+from the device. At this point, all the usage structures will contain
+current values for the device, and will maintain it as the device
+changes.  Note that the use of this ioctl is unnecessary in general,
+since later kernels automatically initialize the reports from the
+device at attach time.
+
+HIDIOCGNAME
+  - string (variable length)
+
+Gets the device name
+
+HIDIOCGREPORT
+  - struct hiddev_report_info (write)
+
+Instructs the kernel to get a feature or input report from the device,
+in order to selectively update the usage structures (in contrast to
+INITREPORT).
+
+HIDIOCSREPORT
+  - struct hiddev_report_info (write)
+
+Instructs the kernel to send a report to the device. This report can
+be filled in by the user through HIDIOCSUSAGE calls (below) to fill in
+individual usage values in the report before sending the report in full
+to the device.
+
+HIDIOCGREPORTINFO
+  - struct hiddev_report_info (read/write)
+
+Fills in a hiddev_report_info structure for the user. The report is
+looked up by type (input, output or feature) and id, so these fields
+must be filled in by the user. The ID can be absolute -- the actual
+report id as reported by the device -- or relative --
+HID_REPORT_ID_FIRST for the first report, and (HID_REPORT_ID_NEXT |
+report_id) for the next report after report_id. Without a-priori
+information about report ids, the right way to use this ioctl is to
+use the relative IDs above to enumerate the valid IDs. The ioctl
+returns non-zero when there is no more next ID. The real report ID is
+filled into the returned hiddev_report_info structure.
+
+HIDIOCGFIELDINFO
+  - struct hiddev_field_info (read/write)
+
+Returns the field information associated with a report in a
+hiddev_field_info structure. The user must fill in report_id and
+report_type in this structure, as above. The field_index should also
+be filled in, which should be a number from 0 and maxfield-1, as
+returned from a previous HIDIOCGREPORTINFO call.
+
+HIDIOCGUCODE
+  - struct hiddev_usage_ref (read/write)
+
+Returns the usage_code in a hiddev_usage_ref structure, given that
+given its report type, report id, field index, and index within the
+field have already been filled into the structure.
+
+HIDIOCGUSAGE
+  - struct hiddev_usage_ref (read/write)
+
+Returns the value of a usage in a hiddev_usage_ref structure. The
+usage to be retrieved can be specified as above, or the user can
+choose to fill in the report_type field and specify the report_id as
+HID_REPORT_ID_UNKNOWN. In this case, the hiddev_usage_ref will be
+filled in with the report and field information associated with this
+usage if it is found.
+
+HIDIOCSUSAGE
+  - struct hiddev_usage_ref (write)
+
+Sets the value of a usage in an output report.  The user fills in
+the hiddev_usage_ref structure as above, but additionally fills in
+the value field.
+
+HIDIOGCOLLECTIONINDEX
+  - struct hiddev_usage_ref (write)
+
+Returns the collection index associated with this usage.  This
+indicates where in the collection hierarchy this usage sits.
+
+HIDIOCGFLAG
+  - int (read)
+HIDIOCSFLAG
+  - int (write)
+
+These operations respectively inspect and replace the mode flags
+that influence the read() call above.  The flags are as follows:
+
+    HIDDEV_FLAG_UREF
+      - read() calls will now return
+        struct hiddev_usage_ref instead of struct hiddev_event.
+        This is a larger structure, but in situations where the
+        device has more than one usage in its reports with the
+        same usage code, this mode serves to resolve such
+        ambiguity.
+
+    HIDDEV_FLAG_REPORT
+      - This flag can only be used in conjunction
+        with HIDDEV_FLAG_UREF.  With this flag set, when the device
+        sends a report, a struct hiddev_usage_ref will be returned
+        to read() filled in with the report_type and report_id, but
+        with field_index set to FIELD_INDEX_NONE.  This serves as
+        additional notification when the device has sent a report.
diff --git a/Documentation/hid/hiddev.txt b/Documentation/hid/hiddev.txt
deleted file mode 100644
index 638448707aa2..000000000000
--- a/Documentation/hid/hiddev.txt
+++ /dev/null
@@ -1,205 +0,0 @@
-Care and feeding of your Human Interface Devices
-
-INTRODUCTION
-
-In addition to the normal input type HID devices, USB also uses the
-human interface device protocols for things that are not really human
-interfaces, but have similar sorts of communication needs. The two big
-examples for this are power devices (especially uninterruptable power
-supplies) and monitor control on higher end monitors.
-
-To support these disparate requirements, the Linux USB system provides
-HID events to two separate interfaces:
-* the input subsystem, which converts HID events into normal input
-device interfaces (such as keyboard, mouse and joystick) and a
-normalised event interface - see Documentation/input/input.rst
-* the hiddev interface, which provides fairly raw HID events
-
-The data flow for a HID event produced by a device is something like
-the following :
-
- usb.c ---> hid-core.c  ----> hid-input.c ----> [keyboard/mouse/joystick/event]
-                         |
-                         |
-                          --> hiddev.c ----> POWER / MONITOR CONTROL 
-
-In addition, other subsystems (apart from USB) can potentially feed
-events into the input subsystem, but these have no effect on the hid
-device interface.
-
-USING THE HID DEVICE INTERFACE
-
-The hiddev interface is a char interface using the normal USB major,
-with the minor numbers starting at 96 and finishing at 111. Therefore,
-you need the following commands:
-mknod /dev/usb/hiddev0 c 180 96
-mknod /dev/usb/hiddev1 c 180 97
-mknod /dev/usb/hiddev2 c 180 98
-mknod /dev/usb/hiddev3 c 180 99
-mknod /dev/usb/hiddev4 c 180 100
-mknod /dev/usb/hiddev5 c 180 101
-mknod /dev/usb/hiddev6 c 180 102
-mknod /dev/usb/hiddev7 c 180 103
-mknod /dev/usb/hiddev8 c 180 104
-mknod /dev/usb/hiddev9 c 180 105
-mknod /dev/usb/hiddev10 c 180 106
-mknod /dev/usb/hiddev11 c 180 107
-mknod /dev/usb/hiddev12 c 180 108
-mknod /dev/usb/hiddev13 c 180 109
-mknod /dev/usb/hiddev14 c 180 110
-mknod /dev/usb/hiddev15 c 180 111
-
-So you point your hiddev compliant user-space program at the correct
-interface for your device, and it all just works.
-
-Assuming that you have a hiddev compliant user-space program, of
-course. If you need to write one, read on.
-
-
-THE HIDDEV API
-This description should be read in conjunction with the HID
-specification, freely available from http://www.usb.org, and
-conveniently linked of http://www.linux-usb.org.
-
-The hiddev API uses a read() interface, and a set of ioctl() calls.
-
-HID devices exchange data with the host computer using data
-bundles called "reports".  Each report is divided into "fields",
-each of which can have one or more "usages".  In the hid-core,
-each one of these usages has a single signed 32 bit value.
-
-read():
-This is the event interface.  When the HID device's state changes,
-it performs an interrupt transfer containing a report which contains
-the changed value.  The hid-core.c module parses the report, and
-returns to hiddev.c the individual usages that have changed within
-the report.  In its basic mode, the hiddev will make these individual
-usage changes available to the reader using a struct hiddev_event:
-
-       struct hiddev_event {
-           unsigned hid;
-           signed int value;
-       };
-
-containing the HID usage identifier for the status that changed, and
-the value that it was changed to. Note that the structure is defined
-within <linux/hiddev.h>, along with some other useful #defines and
-structures.  The HID usage identifier is a composite of the HID usage
-page shifted to the 16 high order bits ORed with the usage code.  The
-behavior of the read() function can be modified using the HIDIOCSFLAG
-ioctl() described below.
-
-
-ioctl(): 
-This is the control interface. There are a number of controls: 
-
-HIDIOCGVERSION - int (read)
-Gets the version code out of the hiddev driver.
-
-HIDIOCAPPLICATION - (none)
-This ioctl call returns the HID application usage associated with the
-hid device. The third argument to ioctl() specifies which application
-index to get. This is useful when the device has more than one
-application collection. If the index is invalid (greater or equal to
-the number of application collections this device has) the ioctl
-returns -1. You can find out beforehand how many application
-collections the device has from the num_applications field from the
-hiddev_devinfo structure. 
-
-HIDIOCGCOLLECTIONINFO - struct hiddev_collection_info (read/write)
-This returns a superset of the information above, providing not only
-application collections, but all the collections the device has.  It
-also returns the level the collection lives in the hierarchy.
-The user passes in a hiddev_collection_info struct with the index 
-field set to the index that should be returned.  The ioctl fills in 
-the other fields.  If the index is larger than the last collection 
-index, the ioctl returns -1 and sets errno to -EINVAL.
-
-HIDIOCGDEVINFO - struct hiddev_devinfo (read)
-Gets a hiddev_devinfo structure which describes the device.
-
-HIDIOCGSTRING - struct hiddev_string_descriptor (read/write)
-Gets a string descriptor from the device. The caller must fill in the
-"index" field to indicate which descriptor should be returned.
-
-HIDIOCINITREPORT - (none)
-Instructs the kernel to retrieve all input and feature report values
-from the device. At this point, all the usage structures will contain
-current values for the device, and will maintain it as the device
-changes.  Note that the use of this ioctl is unnecessary in general,
-since later kernels automatically initialize the reports from the
-device at attach time.
-
-HIDIOCGNAME - string (variable length)
-Gets the device name
-
-HIDIOCGREPORT - struct hiddev_report_info (write)
-Instructs the kernel to get a feature or input report from the device,
-in order to selectively update the usage structures (in contrast to
-INITREPORT).
-
-HIDIOCSREPORT - struct hiddev_report_info (write)
-Instructs the kernel to send a report to the device. This report can
-be filled in by the user through HIDIOCSUSAGE calls (below) to fill in
-individual usage values in the report before sending the report in full
-to the device. 
-
-HIDIOCGREPORTINFO - struct hiddev_report_info (read/write)
-Fills in a hiddev_report_info structure for the user. The report is
-looked up by type (input, output or feature) and id, so these fields
-must be filled in by the user. The ID can be absolute -- the actual
-report id as reported by the device -- or relative --
-HID_REPORT_ID_FIRST for the first report, and (HID_REPORT_ID_NEXT |
-report_id) for the next report after report_id. Without a-priori
-information about report ids, the right way to use this ioctl is to
-use the relative IDs above to enumerate the valid IDs. The ioctl
-returns non-zero when there is no more next ID. The real report ID is
-filled into the returned hiddev_report_info structure. 
-
-HIDIOCGFIELDINFO - struct hiddev_field_info (read/write)
-Returns the field information associated with a report in a
-hiddev_field_info structure. The user must fill in report_id and
-report_type in this structure, as above. The field_index should also
-be filled in, which should be a number from 0 and maxfield-1, as
-returned from a previous HIDIOCGREPORTINFO call. 
-
-HIDIOCGUCODE - struct hiddev_usage_ref (read/write)
-Returns the usage_code in a hiddev_usage_ref structure, given that
-given its report type, report id, field index, and index within the
-field have already been filled into the structure.
-
-HIDIOCGUSAGE - struct hiddev_usage_ref (read/write)
-Returns the value of a usage in a hiddev_usage_ref structure. The
-usage to be retrieved can be specified as above, or the user can
-choose to fill in the report_type field and specify the report_id as
-HID_REPORT_ID_UNKNOWN. In this case, the hiddev_usage_ref will be
-filled in with the report and field information associated with this
-usage if it is found. 
-
-HIDIOCSUSAGE - struct hiddev_usage_ref (write)
-Sets the value of a usage in an output report.  The user fills in
-the hiddev_usage_ref structure as above, but additionally fills in
-the value field.
-
-HIDIOGCOLLECTIONINDEX - struct hiddev_usage_ref (write)
-Returns the collection index associated with this usage.  This
-indicates where in the collection hierarchy this usage sits.
-
-HIDIOCGFLAG - int (read)
-HIDIOCSFLAG - int (write)
-These operations respectively inspect and replace the mode flags
-that influence the read() call above.  The flags are as follows:
-
-    HIDDEV_FLAG_UREF - read() calls will now return 
-        struct hiddev_usage_ref instead of struct hiddev_event.
-        This is a larger structure, but in situations where the
-        device has more than one usage in its reports with the
-        same usage code, this mode serves to resolve such
-        ambiguity.
-
-    HIDDEV_FLAG_REPORT - This flag can only be used in conjunction
-        with HIDDEV_FLAG_UREF.  With this flag set, when the device
-        sends a report, a struct hiddev_usage_ref will be returned
-        to read() filled in with the report_type and report_id, but 
-        with field_index set to FIELD_INDEX_NONE.  This serves as
-        additional notification when the device has sent a report.
diff --git a/Documentation/hid/hidraw.rst b/Documentation/hid/hidraw.rst
new file mode 100644
index 000000000000..4a4a0ba1f362
--- /dev/null
+++ b/Documentation/hid/hidraw.rst
@@ -0,0 +1,138 @@
+================================================================
+HIDRAW - Raw Access to USB and Bluetooth Human Interface Devices
+================================================================
+
+The hidraw driver provides a raw interface to USB and Bluetooth Human
+Interface Devices (HIDs).  It differs from hiddev in that reports sent and
+received are not parsed by the HID parser, but are sent to and received from
+the device unmodified.
+
+Hidraw should be used if the userspace application knows exactly how to
+communicate with the hardware device, and is able to construct the HID
+reports manually.  This is often the case when making userspace drivers for
+custom HID devices.
+
+Hidraw is also useful for communicating with non-conformant HID devices
+which send and receive data in a way that is inconsistent with their report
+descriptors.  Because hiddev parses reports which are sent and received
+through it, checking them against the device's report descriptor, such
+communication with these non-conformant devices is impossible using hiddev.
+Hidraw is the only alternative, short of writing a custom kernel driver, for
+these non-conformant devices.
+
+A benefit of hidraw is that its use by userspace applications is independent
+of the underlying hardware type.  Currently, Hidraw is implemented for USB
+and Bluetooth.  In the future, as new hardware bus types are developed which
+use the HID specification, hidraw will be expanded to add support for these
+new bus types.
+
+Hidraw uses a dynamic major number, meaning that udev should be relied on to
+create hidraw device nodes.  Udev will typically create the device nodes
+directly under /dev (eg: /dev/hidraw0).  As this location is distribution-
+and udev rule-dependent, applications should use libudev to locate hidraw
+devices attached to the system.  There is a tutorial on libudev with a
+working example at:
+
+	http://www.signal11.us/oss/udev/
+
+The HIDRAW API
+---------------
+
+read()
+-------
+read() will read a queued report received from the HID device. On USB
+devices, the reports read using read() are the reports sent from the device
+on the INTERRUPT IN endpoint.  By default, read() will block until there is
+a report available to be read.  read() can be made non-blocking, by passing
+the O_NONBLOCK flag to open(), or by setting the O_NONBLOCK flag using
+fcntl().
+
+On a device which uses numbered reports, the first byte of the returned data
+will be the report number; the report data follows, beginning in the second
+byte.  For devices which do not use numbered reports, the report data
+will begin at the first byte.
+
+write()
+-------
+The write() function will write a report to the device. For USB devices, if
+the device has an INTERRUPT OUT endpoint, the report will be sent on that
+endpoint. If it does not, the report will be sent over the control endpoint,
+using a SET_REPORT transfer.
+
+The first byte of the buffer passed to write() should be set to the report
+number.  If the device does not use numbered reports, the first byte should
+be set to 0. The report data itself should begin at the second byte.
+
+ioctl()
+-------
+Hidraw supports the following ioctls:
+
+HIDIOCGRDESCSIZE:
+	Get Report Descriptor Size
+
+This ioctl will get the size of the device's report descriptor.
+
+HIDIOCGRDESC:
+	Get Report Descriptor
+
+This ioctl returns the device's report descriptor using a
+hidraw_report_descriptor struct.  Make sure to set the size field of the
+hidraw_report_descriptor struct to the size returned from HIDIOCGRDESCSIZE.
+
+HIDIOCGRAWINFO:
+	Get Raw Info
+
+This ioctl will return a hidraw_devinfo struct containing the bus type, the
+vendor ID (VID), and product ID (PID) of the device. The bus type can be one
+of::
+
+	- BUS_USB
+	- BUS_HIL
+	- BUS_BLUETOOTH
+	- BUS_VIRTUAL
+
+which are defined in uapi/linux/input.h.
+
+HIDIOCGRAWNAME(len):
+	Get Raw Name
+
+This ioctl returns a string containing the vendor and product strings of
+the device.  The returned string is Unicode, UTF-8 encoded.
+
+HIDIOCGRAWPHYS(len):
+	Get Physical Address
+
+This ioctl returns a string representing the physical address of the device.
+For USB devices, the string contains the physical path to the device (the
+USB controller, hubs, ports, etc).  For Bluetooth devices, the string
+contains the hardware (MAC) address of the device.
+
+HIDIOCSFEATURE(len):
+	Send a Feature Report
+
+This ioctl will send a feature report to the device.  Per the HID
+specification, feature reports are always sent using the control endpoint.
+Set the first byte of the supplied buffer to the report number.  For devices
+which do not use numbered reports, set the first byte to 0. The report data
+begins in the second byte. Make sure to set len accordingly, to one more
+than the length of the report (to account for the report number).
+
+HIDIOCGFEATURE(len):
+	Get a Feature Report
+
+This ioctl will request a feature report from the device using the control
+endpoint.  The first byte of the supplied buffer should be set to the report
+number of the requested report.  For devices which do not use numbered
+reports, set the first byte to 0.  The report will be returned starting at
+the first byte of the buffer (ie: the report number is not returned).
+
+Example
+-------
+In samples/, find hid-example.c, which shows examples of read(), write(),
+and all the ioctls for hidraw.  The code may be used by anyone for any
+purpose, and can serve as a starting point for developing applications using
+hidraw.
+
+Document by:
+
+	Alan Ott <alan@signal11.us>, Signal 11 Software
diff --git a/Documentation/hid/hidraw.txt b/Documentation/hid/hidraw.txt
deleted file mode 100644
index c8436e354f44..000000000000
--- a/Documentation/hid/hidraw.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-      HIDRAW - Raw Access to USB and Bluetooth Human Interface Devices
-     ==================================================================
-
-The hidraw driver provides a raw interface to USB and Bluetooth Human
-Interface Devices (HIDs).  It differs from hiddev in that reports sent and
-received are not parsed by the HID parser, but are sent to and received from
-the device unmodified.
-
-Hidraw should be used if the userspace application knows exactly how to
-communicate with the hardware device, and is able to construct the HID
-reports manually.  This is often the case when making userspace drivers for
-custom HID devices.
-
-Hidraw is also useful for communicating with non-conformant HID devices
-which send and receive data in a way that is inconsistent with their report
-descriptors.  Because hiddev parses reports which are sent and received
-through it, checking them against the device's report descriptor, such
-communication with these non-conformant devices is impossible using hiddev.
-Hidraw is the only alternative, short of writing a custom kernel driver, for
-these non-conformant devices.
-
-A benefit of hidraw is that its use by userspace applications is independent
-of the underlying hardware type.  Currently, Hidraw is implemented for USB
-and Bluetooth.  In the future, as new hardware bus types are developed which
-use the HID specification, hidraw will be expanded to add support for these
-new bus types.
-
-Hidraw uses a dynamic major number, meaning that udev should be relied on to
-create hidraw device nodes.  Udev will typically create the device nodes
-directly under /dev (eg: /dev/hidraw0).  As this location is distribution-
-and udev rule-dependent, applications should use libudev to locate hidraw
-devices attached to the system.  There is a tutorial on libudev with a
-working example at:
-	http://www.signal11.us/oss/udev/
-
-The HIDRAW API
----------------
-
-read()
--------
-read() will read a queued report received from the HID device. On USB
-devices, the reports read using read() are the reports sent from the device
-on the INTERRUPT IN endpoint.  By default, read() will block until there is
-a report available to be read.  read() can be made non-blocking, by passing
-the O_NONBLOCK flag to open(), or by setting the O_NONBLOCK flag using
-fcntl().
-
-On a device which uses numbered reports, the first byte of the returned data
-will be the report number; the report data follows, beginning in the second
-byte.  For devices which do not use numbered reports, the report data
-will begin at the first byte.
-
-write()
---------
-The write() function will write a report to the device. For USB devices, if
-the device has an INTERRUPT OUT endpoint, the report will be sent on that
-endpoint. If it does not, the report will be sent over the control endpoint,
-using a SET_REPORT transfer.
-
-The first byte of the buffer passed to write() should be set to the report
-number.  If the device does not use numbered reports, the first byte should
-be set to 0. The report data itself should begin at the second byte.
-
-ioctl()
---------
-Hidraw supports the following ioctls:
-
-HIDIOCGRDESCSIZE: Get Report Descriptor Size
-This ioctl will get the size of the device's report descriptor.
-
-HIDIOCGRDESC: Get Report Descriptor
-This ioctl returns the device's report descriptor using a
-hidraw_report_descriptor struct.  Make sure to set the size field of the
-hidraw_report_descriptor struct to the size returned from HIDIOCGRDESCSIZE.
-
-HIDIOCGRAWINFO: Get Raw Info
-This ioctl will return a hidraw_devinfo struct containing the bus type, the
-vendor ID (VID), and product ID (PID) of the device. The bus type can be one
-of:
-	BUS_USB
-	BUS_HIL
-	BUS_BLUETOOTH
-	BUS_VIRTUAL
-which are defined in uapi/linux/input.h.
-
-HIDIOCGRAWNAME(len): Get Raw Name
-This ioctl returns a string containing the vendor and product strings of
-the device.  The returned string is Unicode, UTF-8 encoded.
-
-HIDIOCGRAWPHYS(len): Get Physical Address
-This ioctl returns a string representing the physical address of the device.
-For USB devices, the string contains the physical path to the device (the
-USB controller, hubs, ports, etc).  For Bluetooth devices, the string
-contains the hardware (MAC) address of the device.
-
-HIDIOCSFEATURE(len): Send a Feature Report
-This ioctl will send a feature report to the device.  Per the HID
-specification, feature reports are always sent using the control endpoint.
-Set the first byte of the supplied buffer to the report number.  For devices
-which do not use numbered reports, set the first byte to 0. The report data
-begins in the second byte. Make sure to set len accordingly, to one more
-than the length of the report (to account for the report number).
-
-HIDIOCGFEATURE(len): Get a Feature Report
-This ioctl will request a feature report from the device using the control
-endpoint.  The first byte of the supplied buffer should be set to the report
-number of the requested report.  For devices which do not use numbered
-reports, set the first byte to 0.  The report will be returned starting at
-the first byte of the buffer (ie: the report number is not returned).
-
-Example
----------
-In samples/, find hid-example.c, which shows examples of read(), write(),
-and all the ioctls for hidraw.  The code may be used by anyone for any
-purpose, and can serve as a starting point for developing applications using
-hidraw.
-
-Document by:
-	Alan Ott <alan@signal11.us>, Signal 11 Software
diff --git a/Documentation/hid/index.rst b/Documentation/hid/index.rst
new file mode 100644
index 000000000000..af4324902622
--- /dev/null
+++ b/Documentation/hid/index.rst
@@ -0,0 +1,18 @@
+:orphan:
+
+=============================
+Human Interface Devices (HID)
+=============================
+
+.. toctree::
+   :maxdepth: 1
+
+   hiddev
+   hidraw
+   hid-sensor
+   hid-transport
+
+   uhid
+
+   hid-alps
+   intel-ish-hid
diff --git a/Documentation/hid/intel-ish-hid.rst b/Documentation/hid/intel-ish-hid.rst
new file mode 100644
index 000000000000..cccbf4be17d7
--- /dev/null
+++ b/Documentation/hid/intel-ish-hid.rst
@@ -0,0 +1,485 @@
+=================================
+Intel Integrated Sensor Hub (ISH)
+=================================
+
+A sensor hub enables the ability to offload sensor polling and algorithm
+processing to a dedicated low power co-processor. This allows the core
+processor to go into low power modes more often, resulting in the increased
+battery life.
+
+There are many vendors providing external sensor hubs confirming to HID
+Sensor usage tables, and used in several tablets, 2 in 1 convertible laptops
+and embedded products. Linux had this support since Linux 3.9.
+
+Intel® introduced integrated sensor hubs as a part of the SoC starting from
+Cherry Trail and now supported on multiple generations of CPU packages. There
+are many commercial devices already shipped with Integrated Sensor Hubs (ISH).
+These ISH also comply to HID sensor specification, but the  difference is the
+transport protocol used for communication. The current external sensor hubs
+mainly use HID over i2C or USB. But ISH doesn't use either i2c or USB.
+
+1. Overview
+===========
+
+Using a analogy with a usbhid implementation, the ISH follows a similar model
+for a very high speed communication::
+
+	-----------------		----------------------
+	|    USB HID	|	-->	|    ISH HID	     |
+	-----------------		----------------------
+	-----------------		----------------------
+	|  USB protocol	|	-->	|    ISH Transport   |
+	-----------------		----------------------
+	-----------------		----------------------
+	|  EHCI/XHCI	|	-->	|    ISH IPC	     |
+	-----------------		----------------------
+	      PCI				 PCI
+	-----------------		----------------------
+        |Host controller|	-->	|    ISH processor   |
+	-----------------		----------------------
+	     USB Link
+	-----------------		----------------------
+	| USB End points|	-->	|    ISH Clients     |
+	-----------------		----------------------
+
+Like USB protocol provides a method for device enumeration, link management
+and user data encapsulation, the ISH also provides similar services. But it is
+very light weight tailored to manage and communicate with ISH client
+applications implemented in the firmware.
+
+The ISH allows multiple sensor management applications executing in the
+firmware. Like USB endpoints the messaging can be to/from a client. As part of
+enumeration process, these clients are identified. These clients can be simple
+HID sensor applications, sensor calibration application or senor firmware
+update application.
+
+The implementation model is similar, like USB bus, ISH transport is also
+implemented as a bus. Each client application executing in the ISH processor
+is registered as a device on this bus. The driver, which binds each device
+(ISH HID driver) identifies the device type and registers with the hid core.
+
+2. ISH Implementation: Block Diagram
+====================================
+
+::
+
+	 ---------------------------
+	|  User Space Applications  |
+	 ---------------------------
+
+  ----------------IIO ABI----------------
+	 --------------------------
+	|  IIO Sensor Drivers	  |
+	 --------------------------
+	 --------------------------
+	|	 IIO core	  |
+	 --------------------------
+	 --------------------------
+	|   HID Sensor Hub MFD	  |
+	 --------------------------
+	 --------------------------
+	|       HID Core	  |
+	 --------------------------
+	 --------------------------
+	|   HID over ISH Client   |
+	 --------------------------
+	 --------------------------
+	|   ISH Transport (ISHTP) |
+	 --------------------------
+	 --------------------------
+	|      IPC Drivers	  |
+	 --------------------------
+  OS
+  ---------------- PCI -----------------
+  Hardware + Firmware
+	 ----------------------------
+	| ISH Hardware/Firmware(FW) |
+	 ----------------------------
+
+3. High level processing in above blocks
+========================================
+
+3.1 Hardware Interface
+----------------------
+
+The ISH is exposed as "Non-VGA unclassified PCI device" to the host. The PCI
+product and vendor IDs are changed from different generations of processors. So
+the source code which enumerate drivers needs to update from generation to
+generation.
+
+3.2 Inter Processor Communication (IPC) driver
+----------------------------------------------
+
+Location: drivers/hid/intel-ish-hid/ipc
+
+The IPC message used memory mapped I/O. The registers are defined in
+hw-ish-regs.h.
+
+3.2.1 IPC/FW message types
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+There are two types of messages, one for management of link and other messages
+are to and from transport layers.
+
+TX and RX of Transport messages
+...............................
+
+A set of memory mapped register offers support of multi byte messages TX and
+RX (E.g.IPC_REG_ISH2HOST_MSG, IPC_REG_HOST2ISH_MSG). The IPC layer maintains
+internal queues to sequence messages and send them in order to the FW.
+Optionally the caller can register handler to get notification of completion.
+A door bell mechanism is used in messaging to trigger processing in host and
+client firmware side. When ISH interrupt handler is called, the ISH2HOST
+doorbell register is used by host drivers to determine that the interrupt
+is for ISH.
+
+Each side has 32 32-bit message registers and a 32-bit doorbell. Doorbell
+register has the following format:
+Bits 0..6: fragment length (7 bits are used)
+Bits 10..13: encapsulated protocol
+Bits 16..19: management command (for IPC management protocol)
+Bit 31: doorbell trigger (signal H/W interrupt to the other side)
+Other bits are reserved, should be 0.
+
+3.2.2 Transport layer interface
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To abstract HW level IPC communication, a set of callbacks are registered.
+The transport layer uses them to send and receive messages.
+Refer to  struct ishtp_hw_ops for callbacks.
+
+3.3 ISH Transport layer
+-----------------------
+
+Location: drivers/hid/intel-ish-hid/ishtp/
+
+3.3.1 A Generic Transport Layer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The transport layer is a bi-directional protocol, which defines:
+- Set of commands to start, stop, connect, disconnect and flow control
+(ishtp/hbm.h) for details
+- A flow control mechanism to avoid buffer overflows
+
+This protocol resembles bus messages described in the following document:
+http://www.intel.com/content/dam/www/public/us/en/documents/technical-\
+specifications/dcmi-hi-1-0-spec.pdf "Chapter 7: Bus Message Layer"
+
+3.3.2 Connection and Flow Control Mechanism
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Each FW client and a protocol is identified by an UUID. In order to communicate
+to a FW client, a connection must be established using connect request and
+response bus messages. If successful, a pair (host_client_id and fw_client_id)
+will identify the connection.
+
+Once connection is established, peers send each other flow control bus messages
+independently. Every peer may send a message only if it has received a
+flow-control credit before. Once it sent a message, it may not send another one
+before receiving the next flow control credit.
+Either side can send disconnect request bus message to end communication. Also
+the link will be dropped if major FW reset occurs.
+
+3.3.3 Peer to Peer data transfer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Peer to Peer data transfer can happen with or without using DMA. Depending on
+the sensor bandwidth requirement DMA can be enabled by using module parameter
+ishtp_use_dma under intel_ishtp.
+
+Each side (host and FW) manages its DMA transfer memory independently. When an
+ISHTP client from either host or FW side wants to send something, it decides
+whether to send over IPC or over DMA; for each transfer the decision is
+independent. The sending side sends DMA_XFER message when the message is in
+the respective host buffer (TX when host client sends, RX when FW client
+sends). The recipient of DMA message responds with DMA_XFER_ACK, indicating
+the sender that the memory region for that message may be reused.
+
+DMA initialization is started with host sending DMA_ALLOC_NOTIFY bus message
+(that includes RX buffer) and FW responds with DMA_ALLOC_NOTIFY_ACK.
+Additionally to DMA address communication, this sequence checks capabilities:
+if thw host doesn't support DMA, then it won't send DMA allocation, so FW can't
+send DMA; if FW doesn't support DMA then it won't respond with
+DMA_ALLOC_NOTIFY_ACK, in which case host will not use DMA transfers.
+Here ISH acts as busmaster DMA controller. Hence when host sends DMA_XFER,
+it's request to do host->ISH DMA transfer; when FW sends DMA_XFER, it means
+that it already did DMA and the message resides at host. Thus, DMA_XFER
+and DMA_XFER_ACK act as ownership indicators.
+
+At initial state all outgoing memory belongs to the sender (TX to host, RX to
+FW), DMA_XFER transfers ownership on the region that contains ISHTP message to
+the receiving side, DMA_XFER_ACK returns ownership to the sender. A sender
+needs not wait for previous DMA_XFER to be ack'ed, and may send another message
+as long as remaining continuous memory in its ownership is enough.
+In principle, multiple DMA_XFER and DMA_XFER_ACK messages may be sent at once
+(up to IPC MTU), thus allowing for interrupt throttling.
+Currently, ISH FW decides to send over DMA if ISHTP message is more than 3 IPC
+fragments and via IPC otherwise.
+
+3.3.4 Ring Buffers
+^^^^^^^^^^^^^^^^^^
+
+When a client initiate a connection, a ring or RX and TX buffers are allocated.
+The size of ring can be specified by the client. HID client set 16 and 32 for
+TX and RX buffers respectively. On send request from client, the data to be
+sent is copied to one of the send ring buffer and scheduled to be sent using
+bus message protocol. These buffers are required because the FW may have not
+have processed the last message and may not have enough flow control credits
+to send. Same thing holds true on receive side and flow control is required.
+
+3.3.5 Host Enumeration
+^^^^^^^^^^^^^^^^^^^^^^
+
+The host enumeration bus command allow discovery of clients present in the FW.
+There can be multiple sensor clients and clients for calibration function.
+
+To ease in implantation and allow independent driver handle each client
+this transport layer takes advantage of Linux Bus driver model. Each
+client is registered as device on the the transport bus (ishtp bus).
+
+Enumeration sequence of messages:
+
+- Host sends HOST_START_REQ_CMD, indicating that host ISHTP layer is up.
+- FW responds with HOST_START_RES_CMD
+- Host sends HOST_ENUM_REQ_CMD (enumerate FW clients)
+- FW responds with HOST_ENUM_RES_CMD that includes bitmap of available FW
+  client IDs
+- For each FW ID found in that bitmap host sends
+  HOST_CLIENT_PROPERTIES_REQ_CMD
+- FW responds with HOST_CLIENT_PROPERTIES_RES_CMD. Properties include UUID,
+  max ISHTP message size, etc.
+- Once host received properties for that last discovered client, it considers
+  ISHTP device fully functional (and allocates DMA buffers)
+
+3.4 HID over ISH Client
+-----------------------
+
+Location: drivers/hid/intel-ish-hid
+
+The ISHTP client driver is responsible for:
+
+- enumerate HID devices under FW ISH client
+- Get Report descriptor
+- Register with HID core as a LL driver
+- Process Get/Set feature request
+- Get input reports
+
+3.5 HID Sensor Hub MFD and IIO sensor drivers
+---------------------------------------------
+
+The functionality in these drivers is the same as an external sensor hub.
+Refer to
+Documentation/hid/hid-sensor.rst for HID sensor
+Documentation/ABI/testing/sysfs-bus-iio for IIO ABIs to user space
+
+3.6 End to End HID transport Sequence Diagram
+---------------------------------------------
+
+::
+
+  HID-ISH-CLN                    ISHTP                    IPC                             HW
+          |                        |                       |                               |
+          |                        |                       |-----WAKE UP------------------>|
+          |                        |                       |                               |
+          |                        |                       |-----HOST READY--------------->|
+          |                        |                       |                               |
+          |                        |                       |<----MNG_RESET_NOTIFY_ACK----- |
+          |                        |                       |                               |
+          |                        |<----ISHTP_START------ |                               |
+          |                        |                       |                               |
+          |                        |<-----------------HOST_START_RES_CMD-------------------|
+          |                        |                       |                               |
+          |                        |------------------QUERY_SUBSCRIBER-------------------->|
+          |                        |                       |                               |
+          |                        |------------------HOST_ENUM_REQ_CMD------------------->|
+          |                        |                       |                               |
+          |                        |<-----------------HOST_ENUM_RES_CMD--------------------|
+          |                        |                       |                               |
+          |                        |------------------HOST_CLIENT_PROPERTIES_REQ_CMD------>|
+          |                        |                       |                               |
+          |                        |<-----------------HOST_CLIENT_PROPERTIES_RES_CMD-------|
+          |       Create new device on in ishtp bus        |                               |
+          |                        |                       |                               |
+          |                        |------------------HOST_CLIENT_PROPERTIES_REQ_CMD------>|
+          |                        |                       |                               |
+          |                        |<-----------------HOST_CLIENT_PROPERTIES_RES_CMD-------|
+          |       Create new device on in ishtp bus        |                               |
+          |                        |                       |                               |
+          |                        |--Repeat HOST_CLIENT_PROPERTIES_REQ_CMD-till last one--|
+          |                        |                       |                               |
+       probed()
+          |----ishtp_cl_connect--->|----------------- CLIENT_CONNECT_REQ_CMD-------------->|
+          |                        |                       |                               |
+          |                        |<----------------CLIENT_CONNECT_RES_CMD----------------|
+          |                        |                       |                               |
+          |register event callback |                       |                               |
+          |                        |                       |                               |
+          |ishtp_cl_send(
+          HOSTIF_DM_ENUM_DEVICES)  |----------fill ishtp_msg_hdr struct write to HW-----  >|
+          |                        |                       |                               |
+          |                        |                       |<-----IRQ(IPC_PROTOCOL_ISHTP---|
+          |                        |                       |                               |
+          |<--ENUM_DEVICE RSP------|                       |                               |
+          |                        |                       |                               |
+  for each enumerated device
+          |ishtp_cl_send(
+          HOSTIF_GET_HID_DESCRIPTOR|----------fill ishtp_msg_hdr struct write to HW-----  >|
+          |                        |                       |                               |
+          ...Response
+          |                        |                       |                               |
+  for each enumerated device
+          |ishtp_cl_send(
+       HOSTIF_GET_REPORT_DESCRIPTOR|--------------fill ishtp_msg_hdr struct write to HW-- >|
+          |                        |                       |                               |
+          |                        |                       |                               |
+   hid_allocate_device
+          |                        |                       |                               |
+   hid_add_device                  |                       |                               |
+          |                        |                       |                               |
+
+
+3.7 ISH Debugging
+-----------------
+
+To debug ISH, event tracing mechanism is used. To enable debug logs
+echo 1 > /sys/kernel/debug/tracing/events/intel_ish/enable
+cat sys/kernel/debug/tracing/trace
+
+3.8 ISH IIO sysfs Example on Lenovo thinkpad Yoga 260
+-----------------------------------------------------
+
+::
+
+  root@otcpl-ThinkPad-Yoga-260:~# tree -l /sys/bus/iio/devices/
+  /sys/bus/iio/devices/
+  ├── iio:device0 -> ../../../devices/0044:8086:22D8.0001/HID-SENSOR-200073.9.auto/iio:device0
+  │   ├── buffer
+  │   │   ├── enable
+  │   │   ├── length
+  │   │   └── watermark
+  ...
+  │   ├── in_accel_hysteresis
+  │   ├── in_accel_offset
+  │   ├── in_accel_sampling_frequency
+  │   ├── in_accel_scale
+  │   ├── in_accel_x_raw
+  │   ├── in_accel_y_raw
+  │   ├── in_accel_z_raw
+  │   ├── name
+  │   ├── scan_elements
+  │   │   ├── in_accel_x_en
+  │   │   ├── in_accel_x_index
+  │   │   ├── in_accel_x_type
+  │   │   ├── in_accel_y_en
+  │   │   ├── in_accel_y_index
+  │   │   ├── in_accel_y_type
+  │   │   ├── in_accel_z_en
+  │   │   ├── in_accel_z_index
+  │   │   └── in_accel_z_type
+  ...
+  │   │   ├── devices
+  │   │   │   │   ├── buffer
+  │   │   │   │   │   ├── enable
+  │   │   │   │   │   ├── length
+  │   │   │   │   │   └── watermark
+  │   │   │   │   ├── dev
+  │   │   │   │   ├── in_intensity_both_raw
+  │   │   │   │   ├── in_intensity_hysteresis
+  │   │   │   │   ├── in_intensity_offset
+  │   │   │   │   ├── in_intensity_sampling_frequency
+  │   │   │   │   ├── in_intensity_scale
+  │   │   │   │   ├── name
+  │   │   │   │   ├── scan_elements
+  │   │   │   │   │   ├── in_intensity_both_en
+  │   │   │   │   │   ├── in_intensity_both_index
+  │   │   │   │   │   └── in_intensity_both_type
+  │   │   │   │   ├── trigger
+  │   │   │   │   │   └── current_trigger
+  ...
+  │   │   │   │   ├── buffer
+  │   │   │   │   │   ├── enable
+  │   │   │   │   │   ├── length
+  │   │   │   │   │   └── watermark
+  │   │   │   │   ├── dev
+  │   │   │   │   ├── in_magn_hysteresis
+  │   │   │   │   ├── in_magn_offset
+  │   │   │   │   ├── in_magn_sampling_frequency
+  │   │   │   │   ├── in_magn_scale
+  │   │   │   │   ├── in_magn_x_raw
+  │   │   │   │   ├── in_magn_y_raw
+  │   │   │   │   ├── in_magn_z_raw
+  │   │   │   │   ├── in_rot_from_north_magnetic_tilt_comp_raw
+  │   │   │   │   ├── in_rot_hysteresis
+  │   │   │   │   ├── in_rot_offset
+  │   │   │   │   ├── in_rot_sampling_frequency
+  │   │   │   │   ├── in_rot_scale
+  │   │   │   │   ├── name
+  ...
+  │   │   │   │   ├── scan_elements
+  │   │   │   │   │   ├── in_magn_x_en
+  │   │   │   │   │   ├── in_magn_x_index
+  │   │   │   │   │   ├── in_magn_x_type
+  │   │   │   │   │   ├── in_magn_y_en
+  │   │   │   │   │   ├── in_magn_y_index
+  │   │   │   │   │   ├── in_magn_y_type
+  │   │   │   │   │   ├── in_magn_z_en
+  │   │   │   │   │   ├── in_magn_z_index
+  │   │   │   │   │   ├── in_magn_z_type
+  │   │   │   │   │   ├── in_rot_from_north_magnetic_tilt_comp_en
+  │   │   │   │   │   ├── in_rot_from_north_magnetic_tilt_comp_index
+  │   │   │   │   │   └── in_rot_from_north_magnetic_tilt_comp_type
+  │   │   │   │   ├── trigger
+  │   │   │   │   │   └── current_trigger
+  ...
+  │   │   │   │   ├── buffer
+  │   │   │   │   │   ├── enable
+  │   │   │   │   │   ├── length
+  │   │   │   │   │   └── watermark
+  │   │   │   │   ├── dev
+  │   │   │   │   ├── in_anglvel_hysteresis
+  │   │   │   │   ├── in_anglvel_offset
+  │   │   │   │   ├── in_anglvel_sampling_frequency
+  │   │   │   │   ├── in_anglvel_scale
+  │   │   │   │   ├── in_anglvel_x_raw
+  │   │   │   │   ├── in_anglvel_y_raw
+  │   │   │   │   ├── in_anglvel_z_raw
+  │   │   │   │   ├── name
+  │   │   │   │   ├── scan_elements
+  │   │   │   │   │   ├── in_anglvel_x_en
+  │   │   │   │   │   ├── in_anglvel_x_index
+  │   │   │   │   │   ├── in_anglvel_x_type
+  │   │   │   │   │   ├── in_anglvel_y_en
+  │   │   │   │   │   ├── in_anglvel_y_index
+  │   │   │   │   │   ├── in_anglvel_y_type
+  │   │   │   │   │   ├── in_anglvel_z_en
+  │   │   │   │   │   ├── in_anglvel_z_index
+  │   │   │   │   │   └── in_anglvel_z_type
+  │   │   │   │   ├── trigger
+  │   │   │   │   │   └── current_trigger
+  ...
+  │   │   │   │   ├── buffer
+  │   │   │   │   │   ├── enable
+  │   │   │   │   │   ├── length
+  │   │   │   │   │   └── watermark
+  │   │   │   │   ├── dev
+  │   │   │   │   ├── in_anglvel_hysteresis
+  │   │   │   │   ├── in_anglvel_offset
+  │   │   │   │   ├── in_anglvel_sampling_frequency
+  │   │   │   │   ├── in_anglvel_scale
+  │   │   │   │   ├── in_anglvel_x_raw
+  │   │   │   │   ├── in_anglvel_y_raw
+  │   │   │   │   ├── in_anglvel_z_raw
+  │   │   │   │   ├── name
+  │   │   │   │   ├── scan_elements
+  │   │   │   │   │   ├── in_anglvel_x_en
+  │   │   │   │   │   ├── in_anglvel_x_index
+  │   │   │   │   │   ├── in_anglvel_x_type
+  │   │   │   │   │   ├── in_anglvel_y_en
+  │   │   │   │   │   ├── in_anglvel_y_index
+  │   │   │   │   │   ├── in_anglvel_y_type
+  │   │   │   │   │   ├── in_anglvel_z_en
+  │   │   │   │   │   ├── in_anglvel_z_index
+  │   │   │   │   │   └── in_anglvel_z_type
+  │   │   │   │   ├── trigger
+  │   │   │   │   │   └── current_trigger
+  ...
diff --git a/Documentation/hid/intel-ish-hid.txt b/Documentation/hid/intel-ish-hid.txt
deleted file mode 100644
index d48b21c71ddd..000000000000
--- a/Documentation/hid/intel-ish-hid.txt
+++ /dev/null
@@ -1,454 +0,0 @@
-Intel Integrated Sensor Hub (ISH)
-===============================
-
-A sensor hub enables the ability to offload sensor polling and algorithm
-processing to a dedicated low power co-processor. This allows the core
-processor to go into low power modes more often, resulting in the increased
-battery life.
-
-There are many vendors providing external sensor hubs confirming to HID
-Sensor usage tables, and used in several tablets, 2 in 1 convertible laptops
-and embedded products. Linux had this support since Linux 3.9.
-
-Intel® introduced integrated sensor hubs as a part of the SoC starting from
-Cherry Trail and now supported on multiple generations of CPU packages. There
-are many commercial devices already shipped with Integrated Sensor Hubs (ISH).
-These ISH also comply to HID sensor specification, but the  difference is the
-transport protocol used for communication. The current external sensor hubs
-mainly use HID over i2C or USB. But ISH doesn't use either i2c or USB.
-
-1. Overview
-
-Using a analogy with a usbhid implementation, the ISH follows a similar model
-for a very high speed communication:
-
-	-----------------		----------------------
-	|    USB HID	|	-->	|    ISH HID	     |
-	-----------------		----------------------
-	-----------------		----------------------
-	|  USB protocol	|	-->	|    ISH Transport   |
-	-----------------		----------------------
-	-----------------		----------------------
-	|  EHCI/XHCI	|	-->	|    ISH IPC	     |
-	-----------------		----------------------
-	      PCI				 PCI
-	-----------------		----------------------
-        |Host controller|	-->	|    ISH processor   |
-	-----------------		----------------------
-	     USB Link
-	-----------------		----------------------
-	| USB End points|	-->	|    ISH Clients     |
-	-----------------		----------------------
-
-Like USB protocol provides a method for device enumeration, link management
-and user data encapsulation, the ISH also provides similar services. But it is
-very light weight tailored to manage and communicate with ISH client
-applications implemented in the firmware.
-
-The ISH allows multiple sensor management applications executing in the
-firmware. Like USB endpoints the messaging can be to/from a client. As part of
-enumeration process, these clients are identified. These clients can be simple
-HID sensor applications, sensor calibration application or senor firmware
-update application.
-
-The implementation model is similar, like USB bus, ISH transport is also
-implemented as a bus. Each client application executing in the ISH processor
-is registered as a device on this bus. The driver, which binds each device
-(ISH HID driver) identifies the device type and registers with the hid core.
-
-2. ISH Implementation: Block Diagram
-
-	 ---------------------------
-	|  User Space Applications  |
-	 ---------------------------
-
-----------------IIO ABI----------------
-	 --------------------------
-	|  IIO Sensor Drivers	  |
-	 --------------------------
-	 --------------------------
-	|	 IIO core	  |
-	 --------------------------
-	 --------------------------
-	|   HID Sensor Hub MFD	  |
-	 --------------------------
-	 --------------------------
-	|       HID Core	  |
-	 --------------------------
-	 --------------------------
-	|   HID over ISH Client   |
-	 --------------------------
-	 --------------------------
-	|   ISH Transport (ISHTP) |
-	 --------------------------
-	 --------------------------
-	|      IPC Drivers	  |
-	 --------------------------
-OS
-----------------   PCI -----------------
-Hardware + Firmware
-	 ----------------------------
-	| ISH Hardware/Firmware(FW) |
-	 ----------------------------
-
-3. High level processing in above blocks
-
-3.1 Hardware Interface
-
-The ISH is exposed as "Non-VGA unclassified PCI device" to the host. The PCI
-product and vendor IDs are changed from different generations of processors. So
-the source code which enumerate drivers needs to update from generation to
-generation.
-
-3.2 Inter Processor Communication (IPC) driver
-Location: drivers/hid/intel-ish-hid/ipc
-
-The IPC message used memory mapped I/O. The registers are defined in
-hw-ish-regs.h.
-
-3.2.1 IPC/FW message types
-
-There are two types of messages, one for management of link and other messages
-are to and from transport layers.
-
-TX and RX of Transport messages
-
-A set of memory mapped register offers support of multi byte messages TX and
-RX (E.g.IPC_REG_ISH2HOST_MSG, IPC_REG_HOST2ISH_MSG). The IPC layer maintains
-internal queues to sequence messages and send them in order to the FW.
-Optionally the caller can register handler to get notification of completion.
-A door bell mechanism is used in messaging to trigger processing in host and
-client firmware side. When ISH interrupt handler is called, the ISH2HOST
-doorbell register is used by host drivers to determine that the interrupt
-is for ISH.
-
-Each side has 32 32-bit message registers and a 32-bit doorbell. Doorbell
-register has the following format:
-Bits 0..6: fragment length (7 bits are used)
-Bits 10..13: encapsulated protocol
-Bits 16..19: management command (for IPC management protocol)
-Bit 31: doorbell trigger (signal H/W interrupt to the other side)
-Other bits are reserved, should be 0.
-
-3.2.2 Transport layer interface
-
-To abstract HW level IPC communication, a set of callbacks are registered.
-The transport layer uses them to send and receive messages.
-Refer to  struct ishtp_hw_ops for callbacks.
-
-3.3 ISH Transport layer
-Location: drivers/hid/intel-ish-hid/ishtp/
-
-3.3.1 A Generic Transport Layer
-
-The transport layer is a bi-directional protocol, which defines:
-- Set of commands to start, stop, connect, disconnect and flow control
-(ishtp/hbm.h) for details
-- A flow control mechanism to avoid buffer overflows
-
-This protocol resembles bus messages described in the following document:
-http://www.intel.com/content/dam/www/public/us/en/documents/technical-\
-specifications/dcmi-hi-1-0-spec.pdf "Chapter 7: Bus Message Layer"
-
-3.3.2 Connection and Flow Control Mechanism
-
-Each FW client and a protocol is identified by an UUID. In order to communicate
-to a FW client, a connection must be established using connect request and
-response bus messages. If successful, a pair (host_client_id and fw_client_id)
-will identify the connection.
-
-Once connection is established, peers send each other flow control bus messages
-independently. Every peer may send a message only if it has received a
-flow-control credit before. Once it sent a message, it may not send another one
-before receiving the next flow control credit.
-Either side can send disconnect request bus message to end communication. Also
-the link will be dropped if major FW reset occurs.
-
-3.3.3 Peer to Peer data transfer
-
-Peer to Peer data transfer can happen with or without using DMA. Depending on
-the sensor bandwidth requirement DMA can be enabled by using module parameter
-ishtp_use_dma under intel_ishtp.
-
-Each side (host and FW) manages its DMA transfer memory independently. When an
-ISHTP client from either host or FW side wants to send something, it decides
-whether to send over IPC or over DMA; for each transfer the decision is
-independent. The sending side sends DMA_XFER message when the message is in
-the respective host buffer (TX when host client sends, RX when FW client
-sends). The recipient of DMA message responds with DMA_XFER_ACK, indicating
-the sender that the memory region for that message may be reused.
-
-DMA initialization is started with host sending DMA_ALLOC_NOTIFY bus message
-(that includes RX buffer) and FW responds with DMA_ALLOC_NOTIFY_ACK.
-Additionally to DMA address communication, this sequence checks capabilities:
-if thw host doesn't support DMA, then it won't send DMA allocation, so FW can't
-send DMA; if FW doesn't support DMA then it won't respond with
-DMA_ALLOC_NOTIFY_ACK, in which case host will not use DMA transfers.
-Here ISH acts as busmaster DMA controller. Hence when host sends DMA_XFER,
-it's request to do host->ISH DMA transfer; when FW sends DMA_XFER, it means
-that it already did DMA and the message resides at host. Thus, DMA_XFER
-and DMA_XFER_ACK act as ownership indicators.
-
-At initial state all outgoing memory belongs to the sender (TX to host, RX to
-FW), DMA_XFER transfers ownership on the region that contains ISHTP message to
-the receiving side, DMA_XFER_ACK returns ownership to the sender. A sender
-needs not wait for previous DMA_XFER to be ack'ed, and may send another message
-as long as remaining continuous memory in its ownership is enough.
-In principle, multiple DMA_XFER and DMA_XFER_ACK messages may be sent at once
-(up to IPC MTU), thus allowing for interrupt throttling.
-Currently, ISH FW decides to send over DMA if ISHTP message is more than 3 IPC
-fragments and via IPC otherwise.
-
-3.3.4 Ring Buffers
-
-When a client initiate a connection, a ring or RX and TX buffers are allocated.
-The size of ring can be specified by the client. HID client set 16 and 32 for
-TX and RX buffers respectively. On send request from client, the data to be
-sent is copied to one of the send ring buffer and scheduled to be sent using
-bus message protocol. These buffers are required because the FW may have not
-have processed the last message and may not have enough flow control credits
-to send. Same thing holds true on receive side and flow control is required.
-
-3.3.5 Host Enumeration
-
-The host enumeration bus command allow discovery of clients present in the FW.
-There can be multiple sensor clients and clients for calibration function.
-
-To ease in implantation and allow independent driver handle each client
-this transport layer takes advantage of Linux Bus driver model. Each
-client is registered as device on the the transport bus (ishtp bus).
-
-Enumeration sequence of messages:
-- Host sends HOST_START_REQ_CMD, indicating that host ISHTP layer is up.
-- FW responds with HOST_START_RES_CMD
-- Host sends HOST_ENUM_REQ_CMD (enumerate FW clients)
-- FW responds with HOST_ENUM_RES_CMD that includes bitmap of available FW
-client IDs
-- For each FW ID found in that bitmap host sends
-HOST_CLIENT_PROPERTIES_REQ_CMD
-- FW responds with HOST_CLIENT_PROPERTIES_RES_CMD. Properties include UUID,
-max ISHTP message size, etc.
-- Once host received properties for that last discovered client, it considers
-ISHTP device fully functional (and allocates DMA buffers)
-
-3.4 HID over ISH Client
-Location: drivers/hid/intel-ish-hid
-
-The ISHTP client driver is responsible for:
-- enumerate HID devices under FW ISH client
-- Get Report descriptor
-- Register with HID core as a LL driver
-- Process Get/Set feature request
-- Get input reports
-
-3.5 HID Sensor Hub MFD and IIO sensor drivers
-
-The functionality in these drivers is the same as an external sensor hub.
-Refer to
-Documentation/hid/hid-sensor.txt for HID sensor
-Documentation/ABI/testing/sysfs-bus-iio for IIO ABIs to user space
-
-3.6 End to End HID transport Sequence Diagram
-
-HID-ISH-CLN			ISHTP			IPC				HW
-	|			|			|				|
-	|			|			|-----WAKE UP------------------>|
-	|			|			|				|
-	|			|			|-----HOST READY--------------->|
-	|			|			|				|
-	|			|			|<----MNG_RESET_NOTIFY_ACK----- |
-	|			|			|				|
-	|			|<----ISHTP_START------ |				|
-	|			|			|				|
-	|			|<-----------------HOST_START_RES_CMD-------------------|
-	|			|			|				|
-	|			|------------------QUERY_SUBSCRIBER-------------------->|
-	|			|			|				|
-	|			|------------------HOST_ENUM_REQ_CMD------------------->|
-	|			|			|				|
-	|			|<-----------------HOST_ENUM_RES_CMD--------------------|
-	|			|			|				|
-	|			|------------------HOST_CLIENT_PROPERTIES_REQ_CMD------>|
-	|			|			|				|
-	|			|<-----------------HOST_CLIENT_PROPERTIES_RES_CMD-------|
-	|	Create new device on in ishtp bus	|				|
-	|			|			|				|
-	|			|------------------HOST_CLIENT_PROPERTIES_REQ_CMD------>|
-	|			|			|				|
-	|			|<-----------------HOST_CLIENT_PROPERTIES_RES_CMD-------|
-	|	Create new device on in ishtp bus	|				|
-	|			|			|				|
-	|			|--Repeat HOST_CLIENT_PROPERTIES_REQ_CMD-till last one--|
-	|			|			|				|
-     probed()
-	|----ishtp_cl_connect-->|----------------- CLIENT_CONNECT_REQ_CMD-------------->|
-	|			|			|				|
-	|			|<----------------CLIENT_CONNECT_RES_CMD----------------|
-	|			|			|				|
-	|register event callback|			|				|
-	|			|			|				|
-	|ishtp_cl_send(
-	HOSTIF_DM_ENUM_DEVICES) |----------fill ishtp_msg_hdr struct write to HW-----  >|
-	|			|			|				|
-	|			|			|<-----IRQ(IPC_PROTOCOL_ISHTP---|
-	|			|			|				|
-	|<--ENUM_DEVICE RSP-----|			|				|
-	|			|			|				|
-for each enumerated device
-	|ishtp_cl_send(
-	HOSTIF_GET_HID_DESCRIPTOR |----------fill ishtp_msg_hdr struct write to HW---  >|
-	|			|			|				|
-	...Response
-	|			|			|				|
-for each enumerated device
-	|ishtp_cl_send(
-	HOSTIF_GET_REPORT_DESCRIPTOR |----------fill ishtp_msg_hdr struct write to HW- >|
-	|			|			|				|
-	|			|			|				|
- hid_allocate_device
-	|			|			|				|
- hid_add_device			|			|				|
-	|			|			|				|
-
-
-3.7 ISH Debugging
-
-To debug ISH, event tracing mechanism is used. To enable debug logs
-echo 1 > /sys/kernel/debug/tracing/events/intel_ish/enable
-cat sys/kernel/debug/tracing/trace
-
-3.8 ISH IIO sysfs Example on Lenovo thinkpad Yoga 260
-
-root@otcpl-ThinkPad-Yoga-260:~# tree -l /sys/bus/iio/devices/
-/sys/bus/iio/devices/
-├── iio:device0 -> ../../../devices/0044:8086:22D8.0001/HID-SENSOR-200073.9.auto/iio:device0
-│   ├── buffer
-│   │   ├── enable
-│   │   ├── length
-│   │   └── watermark
-...
-│   ├── in_accel_hysteresis
-│   ├── in_accel_offset
-│   ├── in_accel_sampling_frequency
-│   ├── in_accel_scale
-│   ├── in_accel_x_raw
-│   ├── in_accel_y_raw
-│   ├── in_accel_z_raw
-│   ├── name
-│   ├── scan_elements
-│   │   ├── in_accel_x_en
-│   │   ├── in_accel_x_index
-│   │   ├── in_accel_x_type
-│   │   ├── in_accel_y_en
-│   │   ├── in_accel_y_index
-│   │   ├── in_accel_y_type
-│   │   ├── in_accel_z_en
-│   │   ├── in_accel_z_index
-│   │   └── in_accel_z_type
-...
-│   │   ├── devices
-│   │   │   │   ├── buffer
-│   │   │   │   │   ├── enable
-│   │   │   │   │   ├── length
-│   │   │   │   │   └── watermark
-│   │   │   │   ├── dev
-│   │   │   │   ├── in_intensity_both_raw
-│   │   │   │   ├── in_intensity_hysteresis
-│   │   │   │   ├── in_intensity_offset
-│   │   │   │   ├── in_intensity_sampling_frequency
-│   │   │   │   ├── in_intensity_scale
-│   │   │   │   ├── name
-│   │   │   │   ├── scan_elements
-│   │   │   │   │   ├── in_intensity_both_en
-│   │   │   │   │   ├── in_intensity_both_index
-│   │   │   │   │   └── in_intensity_both_type
-│   │   │   │   ├── trigger
-│   │   │   │   │   └── current_trigger
-...
-│   │   │   │   ├── buffer
-│   │   │   │   │   ├── enable
-│   │   │   │   │   ├── length
-│   │   │   │   │   └── watermark
-│   │   │   │   ├── dev
-│   │   │   │   ├── in_magn_hysteresis
-│   │   │   │   ├── in_magn_offset
-│   │   │   │   ├── in_magn_sampling_frequency
-│   │   │   │   ├── in_magn_scale
-│   │   │   │   ├── in_magn_x_raw
-│   │   │   │   ├── in_magn_y_raw
-│   │   │   │   ├── in_magn_z_raw
-│   │   │   │   ├── in_rot_from_north_magnetic_tilt_comp_raw
-│   │   │   │   ├── in_rot_hysteresis
-│   │   │   │   ├── in_rot_offset
-│   │   │   │   ├── in_rot_sampling_frequency
-│   │   │   │   ├── in_rot_scale
-│   │   │   │   ├── name
-...
-│   │   │   │   ├── scan_elements
-│   │   │   │   │   ├── in_magn_x_en
-│   │   │   │   │   ├── in_magn_x_index
-│   │   │   │   │   ├── in_magn_x_type
-│   │   │   │   │   ├── in_magn_y_en
-│   │   │   │   │   ├── in_magn_y_index
-│   │   │   │   │   ├── in_magn_y_type
-│   │   │   │   │   ├── in_magn_z_en
-│   │   │   │   │   ├── in_magn_z_index
-│   │   │   │   │   ├── in_magn_z_type
-│   │   │   │   │   ├── in_rot_from_north_magnetic_tilt_comp_en
-│   │   │   │   │   ├── in_rot_from_north_magnetic_tilt_comp_index
-│   │   │   │   │   └── in_rot_from_north_magnetic_tilt_comp_type
-│   │   │   │   ├── trigger
-│   │   │   │   │   └── current_trigger
-...
-│   │   │   │   ├── buffer
-│   │   │   │   │   ├── enable
-│   │   │   │   │   ├── length
-│   │   │   │   │   └── watermark
-│   │   │   │   ├── dev
-│   │   │   │   ├── in_anglvel_hysteresis
-│   │   │   │   ├── in_anglvel_offset
-│   │   │   │   ├── in_anglvel_sampling_frequency
-│   │   │   │   ├── in_anglvel_scale
-│   │   │   │   ├── in_anglvel_x_raw
-│   │   │   │   ├── in_anglvel_y_raw
-│   │   │   │   ├── in_anglvel_z_raw
-│   │   │   │   ├── name
-│   │   │   │   ├── scan_elements
-│   │   │   │   │   ├── in_anglvel_x_en
-│   │   │   │   │   ├── in_anglvel_x_index
-│   │   │   │   │   ├── in_anglvel_x_type
-│   │   │   │   │   ├── in_anglvel_y_en
-│   │   │   │   │   ├── in_anglvel_y_index
-│   │   │   │   │   ├── in_anglvel_y_type
-│   │   │   │   │   ├── in_anglvel_z_en
-│   │   │   │   │   ├── in_anglvel_z_index
-│   │   │   │   │   └── in_anglvel_z_type
-│   │   │   │   ├── trigger
-│   │   │   │   │   └── current_trigger
-...
-│   │   │   │   ├── buffer
-│   │   │   │   │   ├── enable
-│   │   │   │   │   ├── length
-│   │   │   │   │   └── watermark
-│   │   │   │   ├── dev
-│   │   │   │   ├── in_anglvel_hysteresis
-│   │   │   │   ├── in_anglvel_offset
-│   │   │   │   ├── in_anglvel_sampling_frequency
-│   │   │   │   ├── in_anglvel_scale
-│   │   │   │   ├── in_anglvel_x_raw
-│   │   │   │   ├── in_anglvel_y_raw
-│   │   │   │   ├── in_anglvel_z_raw
-│   │   │   │   ├── name
-│   │   │   │   ├── scan_elements
-│   │   │   │   │   ├── in_anglvel_x_en
-│   │   │   │   │   ├── in_anglvel_x_index
-│   │   │   │   │   ├── in_anglvel_x_type
-│   │   │   │   │   ├── in_anglvel_y_en
-│   │   │   │   │   ├── in_anglvel_y_index
-│   │   │   │   │   ├── in_anglvel_y_type
-│   │   │   │   │   ├── in_anglvel_z_en
-│   │   │   │   │   ├── in_anglvel_z_index
-│   │   │   │   │   └── in_anglvel_z_type
-│   │   │   │   ├── trigger
-│   │   │   │   │   └── current_trigger
-...
diff --git a/Documentation/hid/uhid.rst b/Documentation/hid/uhid.rst
new file mode 100644
index 000000000000..b18cb96c885f
--- /dev/null
+++ b/Documentation/hid/uhid.rst
@@ -0,0 +1,193 @@
+======================================================
+UHID - User-space I/O driver support for HID subsystem
+======================================================
+
+UHID allows user-space to implement HID transport drivers. Please see
+hid-transport.txt for an introduction into HID transport drivers. This document
+relies heavily on the definitions declared there.
+
+With UHID, a user-space transport driver can create kernel hid-devices for each
+device connected to the user-space controlled bus. The UHID API defines the I/O
+events provided from the kernel to user-space and vice versa.
+
+There is an example user-space application in ./samples/uhid/uhid-example.c
+
+The UHID API
+------------
+
+UHID is accessed through a character misc-device. The minor-number is allocated
+dynamically so you need to rely on udev (or similar) to create the device node.
+This is /dev/uhid by default.
+
+If a new device is detected by your HID I/O Driver and you want to register this
+device with the HID subsystem, then you need to open /dev/uhid once for each
+device you want to register. All further communication is done by read()'ing or
+write()'ing "struct uhid_event" objects. Non-blocking operations are supported
+by setting O_NONBLOCK::
+
+  struct uhid_event {
+        __u32 type;
+        union {
+                struct uhid_create2_req create2;
+                struct uhid_output_req output;
+                struct uhid_input2_req input2;
+                ...
+        } u;
+  };
+
+The "type" field contains the ID of the event. Depending on the ID different
+payloads are sent. You must not split a single event across multiple read()'s or
+multiple write()'s. A single event must always be sent as a whole. Furthermore,
+only a single event can be sent per read() or write(). Pending data is ignored.
+If you want to handle multiple events in a single syscall, then use vectored
+I/O with readv()/writev().
+The "type" field defines the payload. For each type, there is a
+payload-structure available in the union "u" (except for empty payloads). This
+payload contains management and/or device data.
+
+The first thing you should do is sending an UHID_CREATE2 event. This will
+register the device. UHID will respond with an UHID_START event. You can now
+start sending data to and reading data from UHID. However, unless UHID sends the
+UHID_OPEN event, the internally attached HID Device Driver has no user attached.
+That is, you might put your device asleep unless you receive the UHID_OPEN
+event. If you receive the UHID_OPEN event, you should start I/O. If the last
+user closes the HID device, you will receive an UHID_CLOSE event. This may be
+followed by an UHID_OPEN event again and so on. There is no need to perform
+reference-counting in user-space. That is, you will never receive multiple
+UHID_OPEN events without an UHID_CLOSE event. The HID subsystem performs
+ref-counting for you.
+You may decide to ignore UHID_OPEN/UHID_CLOSE, though. I/O is allowed even
+though the device may have no users.
+
+If you want to send data on the interrupt channel to the HID subsystem, you send
+an HID_INPUT2 event with your raw data payload. If the kernel wants to send data
+on the interrupt channel to the device, you will read an UHID_OUTPUT event.
+Data requests on the control channel are currently limited to GET_REPORT and
+SET_REPORT (no other data reports on the control channel are defined so far).
+Those requests are always synchronous. That means, the kernel sends
+UHID_GET_REPORT and UHID_SET_REPORT events and requires you to forward them to
+the device on the control channel. Once the device responds, you must forward
+the response via UHID_GET_REPORT_REPLY and UHID_SET_REPORT_REPLY to the kernel.
+The kernel blocks internal driver-execution during such round-trips (times out
+after a hard-coded period).
+
+If your device disconnects, you should send an UHID_DESTROY event. This will
+unregister the device. You can now send UHID_CREATE2 again to register a new
+device.
+If you close() the fd, the device is automatically unregistered and destroyed
+internally.
+
+write()
+-------
+write() allows you to modify the state of the device and feed input data into
+the kernel. The kernel will parse the event immediately and if the event ID is
+not supported, it will return -EOPNOTSUPP. If the payload is invalid, then
+-EINVAL is returned, otherwise, the amount of data that was read is returned and
+the request was handled successfully. O_NONBLOCK does not affect write() as
+writes are always handled immediately in a non-blocking fashion. Future requests
+might make use of O_NONBLOCK, though.
+
+UHID_CREATE2:
+  This creates the internal HID device. No I/O is possible until you send this
+  event to the kernel. The payload is of type struct uhid_create2_req and
+  contains information about your device. You can start I/O now.
+
+UHID_DESTROY:
+  This destroys the internal HID device. No further I/O will be accepted. There
+  may still be pending messages that you can receive with read() but no further
+  UHID_INPUT events can be sent to the kernel.
+  You can create a new device by sending UHID_CREATE2 again. There is no need to
+  reopen the character device.
+
+UHID_INPUT2:
+  You must send UHID_CREATE2 before sending input to the kernel! This event
+  contains a data-payload. This is the raw data that you read from your device
+  on the interrupt channel. The kernel will parse the HID reports.
+
+UHID_GET_REPORT_REPLY:
+  If you receive a UHID_GET_REPORT request you must answer with this request.
+  You  must copy the "id" field from the request into the answer. Set the "err"
+  field to 0 if no error occurred or to EIO if an I/O error occurred.
+  If "err" is 0 then you should fill the buffer of the answer with the results
+  of the GET_REPORT request and set "size" correspondingly.
+
+UHID_SET_REPORT_REPLY:
+  This is the SET_REPORT equivalent of UHID_GET_REPORT_REPLY. Unlike GET_REPORT,
+  SET_REPORT never returns a data buffer, therefore, it's sufficient to set the
+  "id" and "err" fields correctly.
+
+read()
+------
+read() will return a queued output report. No reaction is required to any of
+them but you should handle them according to your needs.
+
+UHID_START:
+  This is sent when the HID device is started. Consider this as an answer to
+  UHID_CREATE2. This is always the first event that is sent. Note that this
+  event might not be available immediately after write(UHID_CREATE2) returns.
+  Device drivers might required delayed setups.
+  This event contains a payload of type uhid_start_req. The "dev_flags" field
+  describes special behaviors of a device. The following flags are defined:
+
+      - UHID_DEV_NUMBERED_FEATURE_REPORTS
+      - UHID_DEV_NUMBERED_OUTPUT_REPORTS
+      - UHID_DEV_NUMBERED_INPUT_REPORTS
+
+          Each of these flags defines whether a given report-type uses numbered
+          reports. If numbered reports are used for a type, all messages from
+          the kernel already have the report-number as prefix. Otherwise, no
+          prefix is added by the kernel.
+          For messages sent by user-space to the kernel, you must adjust the
+          prefixes according to these flags.
+
+UHID_STOP:
+  This is sent when the HID device is stopped. Consider this as an answer to
+  UHID_DESTROY.
+
+  If you didn't destroy your device via UHID_DESTROY, but the kernel sends an
+  UHID_STOP event, this should usually be ignored. It means that the kernel
+  reloaded/changed the device driver loaded on your HID device (or some other
+  maintenance actions happened).
+
+  You can usually ignored any UHID_STOP events safely.
+
+UHID_OPEN:
+  This is sent when the HID device is opened. That is, the data that the HID
+  device provides is read by some other process. You may ignore this event but
+  it is useful for power-management. As long as you haven't received this event
+  there is actually no other process that reads your data so there is no need to
+  send UHID_INPUT2 events to the kernel.
+
+UHID_CLOSE:
+  This is sent when there are no more processes which read the HID data. It is
+  the counterpart of UHID_OPEN and you may as well ignore this event.
+
+UHID_OUTPUT:
+  This is sent if the HID device driver wants to send raw data to the I/O
+  device on the interrupt channel. You should read the payload and forward it to
+  the device. The payload is of type "struct uhid_output_req".
+  This may be received even though you haven't received UHID_OPEN, yet.
+
+UHID_GET_REPORT:
+  This event is sent if the kernel driver wants to perform a GET_REPORT request
+  on the control channeld as described in the HID specs. The report-type and
+  report-number are available in the payload.
+  The kernel serializes GET_REPORT requests so there will never be two in
+  parallel. However, if you fail to respond with a UHID_GET_REPORT_REPLY, the
+  request might silently time out.
+  Once you read a GET_REPORT request, you shall forward it to the hid device and
+  remember the "id" field in the payload. Once your hid device responds to the
+  GET_REPORT (or if it fails), you must send a UHID_GET_REPORT_REPLY to the
+  kernel with the exact same "id" as in the request. If the request already
+  timed out, the kernel will ignore the response silently. The "id" field is
+  never re-used, so conflicts cannot happen.
+
+UHID_SET_REPORT:
+  This is the SET_REPORT equivalent of UHID_GET_REPORT. On receipt, you shall
+  send a SET_REPORT request to your hid device. Once it replies, you must tell
+  the kernel about it via UHID_SET_REPORT_REPLY.
+  The same restrictions as for UHID_GET_REPORT apply.
+
+----------------------------------------------------
+
+Written 2012, David Herrmann <dh.herrmann@gmail.com>
diff --git a/Documentation/hid/uhid.txt b/Documentation/hid/uhid.txt
deleted file mode 100644
index 958fff945304..000000000000
--- a/Documentation/hid/uhid.txt
+++ /dev/null
@@ -1,187 +0,0 @@
-      UHID - User-space I/O driver support for HID subsystem
-     ========================================================
-
-UHID allows user-space to implement HID transport drivers. Please see
-hid-transport.txt for an introduction into HID transport drivers. This document
-relies heavily on the definitions declared there.
-
-With UHID, a user-space transport driver can create kernel hid-devices for each
-device connected to the user-space controlled bus. The UHID API defines the I/O
-events provided from the kernel to user-space and vice versa.
-
-There is an example user-space application in ./samples/uhid/uhid-example.c
-
-The UHID API
-------------
-
-UHID is accessed through a character misc-device. The minor-number is allocated
-dynamically so you need to rely on udev (or similar) to create the device node.
-This is /dev/uhid by default.
-
-If a new device is detected by your HID I/O Driver and you want to register this
-device with the HID subsystem, then you need to open /dev/uhid once for each
-device you want to register. All further communication is done by read()'ing or
-write()'ing "struct uhid_event" objects. Non-blocking operations are supported
-by setting O_NONBLOCK.
-
-struct uhid_event {
-        __u32 type;
-        union {
-                struct uhid_create2_req create2;
-                struct uhid_output_req output;
-                struct uhid_input2_req input2;
-                ...
-        } u;
-};
-
-The "type" field contains the ID of the event. Depending on the ID different
-payloads are sent. You must not split a single event across multiple read()'s or
-multiple write()'s. A single event must always be sent as a whole. Furthermore,
-only a single event can be sent per read() or write(). Pending data is ignored.
-If you want to handle multiple events in a single syscall, then use vectored
-I/O with readv()/writev().
-The "type" field defines the payload. For each type, there is a
-payload-structure available in the union "u" (except for empty payloads). This
-payload contains management and/or device data.
-
-The first thing you should do is sending an UHID_CREATE2 event. This will
-register the device. UHID will respond with an UHID_START event. You can now
-start sending data to and reading data from UHID. However, unless UHID sends the
-UHID_OPEN event, the internally attached HID Device Driver has no user attached.
-That is, you might put your device asleep unless you receive the UHID_OPEN
-event. If you receive the UHID_OPEN event, you should start I/O. If the last
-user closes the HID device, you will receive an UHID_CLOSE event. This may be
-followed by an UHID_OPEN event again and so on. There is no need to perform
-reference-counting in user-space. That is, you will never receive multiple
-UHID_OPEN events without an UHID_CLOSE event. The HID subsystem performs
-ref-counting for you.
-You may decide to ignore UHID_OPEN/UHID_CLOSE, though. I/O is allowed even
-though the device may have no users.
-
-If you want to send data on the interrupt channel to the HID subsystem, you send
-an HID_INPUT2 event with your raw data payload. If the kernel wants to send data
-on the interrupt channel to the device, you will read an UHID_OUTPUT event.
-Data requests on the control channel are currently limited to GET_REPORT and
-SET_REPORT (no other data reports on the control channel are defined so far).
-Those requests are always synchronous. That means, the kernel sends
-UHID_GET_REPORT and UHID_SET_REPORT events and requires you to forward them to
-the device on the control channel. Once the device responds, you must forward
-the response via UHID_GET_REPORT_REPLY and UHID_SET_REPORT_REPLY to the kernel.
-The kernel blocks internal driver-execution during such round-trips (times out
-after a hard-coded period).
-
-If your device disconnects, you should send an UHID_DESTROY event. This will
-unregister the device. You can now send UHID_CREATE2 again to register a new
-device.
-If you close() the fd, the device is automatically unregistered and destroyed
-internally.
-
-write()
--------
-write() allows you to modify the state of the device and feed input data into
-the kernel. The kernel will parse the event immediately and if the event ID is
-not supported, it will return -EOPNOTSUPP. If the payload is invalid, then
--EINVAL is returned, otherwise, the amount of data that was read is returned and
-the request was handled successfully. O_NONBLOCK does not affect write() as
-writes are always handled immediately in a non-blocking fashion. Future requests
-might make use of O_NONBLOCK, though.
-
-  UHID_CREATE2:
-  This creates the internal HID device. No I/O is possible until you send this
-  event to the kernel. The payload is of type struct uhid_create2_req and
-  contains information about your device. You can start I/O now.
-
-  UHID_DESTROY:
-  This destroys the internal HID device. No further I/O will be accepted. There
-  may still be pending messages that you can receive with read() but no further
-  UHID_INPUT events can be sent to the kernel.
-  You can create a new device by sending UHID_CREATE2 again. There is no need to
-  reopen the character device.
-
-  UHID_INPUT2:
-  You must send UHID_CREATE2 before sending input to the kernel! This event
-  contains a data-payload. This is the raw data that you read from your device
-  on the interrupt channel. The kernel will parse the HID reports.
-
-  UHID_GET_REPORT_REPLY:
-  If you receive a UHID_GET_REPORT request you must answer with this request.
-  You  must copy the "id" field from the request into the answer. Set the "err"
-  field to 0 if no error occurred or to EIO if an I/O error occurred.
-  If "err" is 0 then you should fill the buffer of the answer with the results
-  of the GET_REPORT request and set "size" correspondingly.
-
-  UHID_SET_REPORT_REPLY:
-  This is the SET_REPORT equivalent of UHID_GET_REPORT_REPLY. Unlike GET_REPORT,
-  SET_REPORT never returns a data buffer, therefore, it's sufficient to set the
-  "id" and "err" fields correctly.
-
-read()
-------
-read() will return a queued output report. No reaction is required to any of
-them but you should handle them according to your needs.
-
-  UHID_START:
-  This is sent when the HID device is started. Consider this as an answer to
-  UHID_CREATE2. This is always the first event that is sent. Note that this
-  event might not be available immediately after write(UHID_CREATE2) returns.
-  Device drivers might required delayed setups.
-  This event contains a payload of type uhid_start_req. The "dev_flags" field
-  describes special behaviors of a device. The following flags are defined:
-      UHID_DEV_NUMBERED_FEATURE_REPORTS:
-      UHID_DEV_NUMBERED_OUTPUT_REPORTS:
-      UHID_DEV_NUMBERED_INPUT_REPORTS:
-          Each of these flags defines whether a given report-type uses numbered
-          reports. If numbered reports are used for a type, all messages from
-          the kernel already have the report-number as prefix. Otherwise, no
-          prefix is added by the kernel.
-          For messages sent by user-space to the kernel, you must adjust the
-          prefixes according to these flags.
-
-  UHID_STOP:
-  This is sent when the HID device is stopped. Consider this as an answer to
-  UHID_DESTROY.
-  If you didn't destroy your device via UHID_DESTROY, but the kernel sends an
-  UHID_STOP event, this should usually be ignored. It means that the kernel
-  reloaded/changed the device driver loaded on your HID device (or some other
-  maintenance actions happened).
-  You can usually ignored any UHID_STOP events safely.
-
-  UHID_OPEN:
-  This is sent when the HID device is opened. That is, the data that the HID
-  device provides is read by some other process. You may ignore this event but
-  it is useful for power-management. As long as you haven't received this event
-  there is actually no other process that reads your data so there is no need to
-  send UHID_INPUT2 events to the kernel.
-
-  UHID_CLOSE:
-  This is sent when there are no more processes which read the HID data. It is
-  the counterpart of UHID_OPEN and you may as well ignore this event.
-
-  UHID_OUTPUT:
-  This is sent if the HID device driver wants to send raw data to the I/O
-  device on the interrupt channel. You should read the payload and forward it to
-  the device. The payload is of type "struct uhid_output_req".
-  This may be received even though you haven't received UHID_OPEN, yet.
-
-  UHID_GET_REPORT:
-  This event is sent if the kernel driver wants to perform a GET_REPORT request
-  on the control channeld as described in the HID specs. The report-type and
-  report-number are available in the payload.
-  The kernel serializes GET_REPORT requests so there will never be two in
-  parallel. However, if you fail to respond with a UHID_GET_REPORT_REPLY, the
-  request might silently time out.
-  Once you read a GET_REPORT request, you shall forward it to the hid device and
-  remember the "id" field in the payload. Once your hid device responds to the
-  GET_REPORT (or if it fails), you must send a UHID_GET_REPORT_REPLY to the
-  kernel with the exact same "id" as in the request. If the request already
-  timed out, the kernel will ignore the response silently. The "id" field is
-  never re-used, so conflicts cannot happen.
-
-  UHID_SET_REPORT:
-  This is the SET_REPORT equivalent of UHID_GET_REPORT. On receipt, you shall
-  send a SET_REPORT request to your hid device. Once it replies, you must tell
-  the kernel about it via UHID_SET_REPORT_REPLY.
-  The same restrictions as for UHID_GET_REPORT apply.
-
-----------------------------------------------------
-Written 2012, David Herrmann <dh.herrmann@gmail.com>
diff --git a/Documentation/input/input.rst b/Documentation/input/input.rst
index 47f86a4bf16c..0eb61e67a7b7 100644
--- a/Documentation/input/input.rst
+++ b/Documentation/input/input.rst
@@ -188,7 +188,7 @@ LCDs and many other purposes.
 
 The monitor and speaker controls should be easy to add to the hid/input
 interface, but for the UPSs and LCDs it doesn't make much sense. For this,
-the hiddev interface was designed. See Documentation/hid/hiddev.txt
+the hiddev interface was designed. See Documentation/hid/hiddev.rst
 for more information about it.
 
 The usage of the usbhid module is very simple, it takes no parameters,
-- 
cgit 


From 907bf9d65f2da9b7ea1b5aa0c1b8d5950f29356d Mon Sep 17 00:00:00 2001
From: Leonard Crestez <leonard.crestez@nxp.com>
Date: Tue, 21 May 2019 20:41:13 +0000
Subject: dt-bindings: mfd: Document short/long press duration for BD718X7

These values can be overwritten at probe time.

Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Acked-By: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.txt b/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.txt
index d5f68ac78d15..3649c1191432 100644
--- a/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.txt
+++ b/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.txt
@@ -38,6 +38,14 @@ target state is set to READY by default. If SNVS state is used the boot
 crucial regulators must have the regulator-always-on and regulator-boot-on
 properties set in regulator node.
 
+- rohm,short-press-ms	: Short press duration in milliseconds
+- rohm,long-press-ms	: Long press duration in milliseconds
+
+Configure the "short press" and "long press" timers for the power button.
+Values are rounded to what hardware supports (500ms multiple for short and
+1000ms multiple for long). If these properties are not present the existing
+configuration (from bootloader or OTP) is not touched.
+
 Example:
 
 	/* external oscillator node */
-- 
cgit 


From 8f759058e9f9616980aaf838a45cc545cbe6e447 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Thu, 30 May 2019 15:39:51 +0100
Subject: mfd: madera: Update DT bindings to add additional CODECs

This adds the cs47l15, cs42l92, cs47l92 and cs47l93 to the list of
compatible strings and updates some properties that need to note
the new CODECs.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/devicetree/bindings/mfd/madera.txt | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mfd/madera.txt b/Documentation/devicetree/bindings/mfd/madera.txt
index db3266088386..cad0f2800502 100644
--- a/Documentation/devicetree/bindings/mfd/madera.txt
+++ b/Documentation/devicetree/bindings/mfd/madera.txt
@@ -11,10 +11,14 @@ bindings/sound/madera.txt
 Required properties:
 
   - compatible : One of the following chip-specific strings:
+        "cirrus,cs47l15"
         "cirrus,cs47l35"
         "cirrus,cs47l85"
         "cirrus,cs47l90"
         "cirrus,cs47l91"
+        "cirrus,cs42l92"
+        "cirrus,cs47l92"
+        "cirrus,cs47l93"
         "cirrus,wm1840"
 
   - reg : I2C slave address when connected using I2C, chip select number when
@@ -22,7 +26,7 @@ Required properties:
 
   - DCVDD-supply : Power supply for the device as defined in
     bindings/regulator/regulator.txt
-    Mandatory on CS47L35, CS47L90, CS47L91
+    Mandatory on CS47L15, CS47L35, CS47L90, CS47L91, CS42L92, CS47L92, CS47L93
     Optional on CS47L85, WM1840
 
   - AVDD-supply, DBVDD1-supply, DBVDD2-supply, CPVDD1-supply, CPVDD2-supply :
@@ -35,7 +39,7 @@ Required properties:
     (CS47L85, WM1840)
 
   - SPKVDD-supply : Power supply for the device
-    (CS47L35)
+    (CS47L15, CS47L35)
 
   - interrupt-controller : Indicates that this device is an interrupt controller
 
-- 
cgit 


From c77cd342f9fd64a9f6ed3eb270fd527826ef92d3 Mon Sep 17 00:00:00 2001
From: Pi-Hsun Shih <pihsun@chromium.org>
Date: Mon, 3 Jun 2019 11:45:10 +0800
Subject: dt-bindings: Add binding for cros-ec-rpmsg

Add a DT binding documentation for ChromeOS EC driver over rpmsg.

Signed-off-by: Pi-Hsun Shih <pihsun@chromium.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/devicetree/bindings/mfd/cros-ec.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mfd/cros-ec.txt b/Documentation/devicetree/bindings/mfd/cros-ec.txt
index 6245c9b1a68b..4860eabd0f72 100644
--- a/Documentation/devicetree/bindings/mfd/cros-ec.txt
+++ b/Documentation/devicetree/bindings/mfd/cros-ec.txt
@@ -3,7 +3,7 @@ ChromeOS Embedded Controller
 Google's ChromeOS EC is a Cortex-M device which talks to the AP and
 implements various function such as keyboard and battery charging.
 
-The EC can be connect through various means (I2C, SPI, LPC) and the
+The EC can be connect through various means (I2C, SPI, LPC, RPMSG) and the
 compatible string used depends on the interface. Each connection method has
 its own driver which connects to the top level interface-agnostic EC driver.
 Other Linux driver (such as cros-ec-keyb for the matrix keyboard) connect to
@@ -17,6 +17,9 @@ Required properties (SPI):
 - compatible: "google,cros-ec-spi"
 - reg: SPI chip select
 
+Required properties (RPMSG):
+- compatible: "google,cros-ec-rpmsg"
+
 Optional properties (SPI):
 - google,cros-ec-spi-pre-delay: Some implementations of the EC need a little
   time to wake up from sleep before they can receive SPI transfers at a high
-- 
cgit 


From 84bdde17c5d60dea93c1db60d87059c8ca772360 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 13 Jun 2019 15:12:38 +0300
Subject: dt-bindings: mfd: Add link to ROHM BD71847 Datasheet

ROHM BD71847 PMIC datasheet was published.
Add datasheet link for BD71847 as well.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.txt | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.txt b/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.txt
index 3649c1191432..f22d74c7a8db 100644
--- a/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.txt
+++ b/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.txt
@@ -8,6 +8,8 @@ and 6 LDOs.
 
 Datasheet for BD71837 is available at:
 https://www.rohm.com/datasheet/BD71837MWV/bd71837mwv-e
+Datasheet for BD71847 is available at:
+https://www.rohm.com/datasheet/BD71847AMWV/bd71847amwv-e
 
 Required properties:
  - compatible		: Should be "rohm,bd71837" for bd71837
-- 
cgit 


From d6d02bc6e80453ca004e101eae28d741bde54426 Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Mon, 1 Jul 2019 10:14:22 +0200
Subject: dt-bindings: regulator: add support for the stm32-booster

Document the 3.3V booster regulator embedded in stm32h7 and stm32mp1
devices, that can be used to supply ADC analog input switches.
It's controlled by using system configuration registers (SYSCFG).
Introduce two compatibles as the booster regulator is controlled by:
- a unique register/bit in STM32H7
- a set/clear register pair in STM32MP1

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/regulator/st,stm32-booster.txt | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/st,stm32-booster.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/regulator/st,stm32-booster.txt b/Documentation/devicetree/bindings/regulator/st,stm32-booster.txt
new file mode 100644
index 000000000000..479ad4c8758e
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/st,stm32-booster.txt
@@ -0,0 +1,18 @@
+STM32 BOOSTER - Booster for ADC analog input switches
+
+Some STM32 devices embed a 3.3V booster supplied by Vdda, that can be used
+to supply ADC analog input switches.
+
+Required properties:
+- compatible: Should be one of:
+  "st,stm32h7-booster"
+  "st,stm32mp1-booster"
+- st,syscfg: Phandle to system configuration controller.
+- vdda-supply: Phandle to the vdda input analog voltage.
+
+Example:
+	booster: regulator-booster {
+		compatible = "st,stm32mp1-booster";
+		st,syscfg = <&syscfg>;
+		vdda-supply = <&vdda>;
+	};
-- 
cgit 


From ebc3d179150347f3b6d97d8f249378bb2218f95e Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Thu, 13 Jun 2019 13:08:15 +0200
Subject: s390/cio: introduce driver_override on the css bus

Sometimes, we want to control which of the matching drivers
binds to a subchannel device (e.g. for subchannels we want to
handle via vfio-ccw).

For pci devices, a mechanism to do so has been introduced in
782a985d7af2 ("PCI: Introduce new device binding path using
pci_dev.driver_override"). It makes sense to introduce the
driver_override attribute for subchannel devices as well, so
that we can easily extend the 'driverctl' tool (which makes
use of the driver_override attribute for pci).

Note that unlike pci we still require a driver override to
match the subchannel type; matching more than one subchannel
type is probably not useful anyway.

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
Reviewed-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 Documentation/ABI/testing/sysfs-bus-css | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-bus-css b/Documentation/ABI/testing/sysfs-bus-css
index 2979c40c10e9..966f8504bd7b 100644
--- a/Documentation/ABI/testing/sysfs-bus-css
+++ b/Documentation/ABI/testing/sysfs-bus-css
@@ -33,3 +33,26 @@ Description:	Contains the PIM/PAM/POM values, as reported by the
 		in sync with the values current in the channel subsystem).
 		Note: This is an I/O-subchannel specific attribute.
 Users:		s390-tools, HAL
+
+What:		/sys/bus/css/devices/.../driver_override
+Date:		June 2019
+Contact:	Cornelia Huck <cohuck@redhat.com>
+		linux-s390@vger.kernel.org
+Description:	This file allows the driver for a device to be specified. When
+		specified, only a driver with a name matching the value written
+		to driver_override will have an opportunity to bind to the
+		device. The override is specified by writing a string to the
+		driver_override file (echo vfio-ccw > driver_override) and
+		may be cleared with an empty string (echo > driver_override).
+		This returns the device to standard matching rules binding.
+		Writing to driver_override does not automatically unbind the
+		device from its current driver or make any attempt to
+		automatically load the specified driver.  If no driver with a
+		matching name is currently loaded in the kernel, the device
+		will not bind to any driver.  This also allows devices to
+		opt-out of driver binding using a driver_override name such as
+		"none".  Only a single driver may be specified in the override,
+		there is no support for parsing delimiters.
+		Note that unlike the mechanism of the same name for pci, this
+		file does not allow to override basic matching rules. I.e.,
+		the driver must still match the subchannel type of the device.
-- 
cgit 


From cec5b01f8f1c6064037f3181e7690e3765ed995b Mon Sep 17 00:00:00 2001
From: Mac Chiang <mac.chiang@intel.com>
Date: Wed, 19 Jun 2019 18:18:33 +0800
Subject: ASoC: max98357a: avoid speaker pop when playback startup

Loud speaker pop happens during playback even when in slience
playback. Specify Max98357a amp delay times to make sure
clocks are always earlier than sdmode on.

Signed-off-by: Mac Chiang <mac.chiang@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/sound/max98357a.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/max98357a.txt b/Documentation/devicetree/bindings/sound/max98357a.txt
index 28645a2ff885..4bce14ce806f 100644
--- a/Documentation/devicetree/bindings/sound/max98357a.txt
+++ b/Documentation/devicetree/bindings/sound/max98357a.txt
@@ -9,6 +9,10 @@ Optional properties:
 - sdmode-gpios : GPIO specifier for the chip's SD_MODE pin.
         If this option is not specified then driver does not manage
         the pin state (e.g. chip is always on).
+- sdmode-delay : specify delay time for SD_MODE pin.
+        If this option is specified, which means it's required i2s clocks
+        ready before SD_MODE is unmuted in order to avoid the speaker pop noise.
+        It's observed that 5ms is sufficient.
 
 Example:
 
-- 
cgit 


From 2ee5bfc1efc81179c73abcd33098dd2c86019146 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 26 Jun 2019 15:38:42 -0700
Subject: platform/x86: ISST: Update ioctl-number.txt for Intel Speed Select
 interface

Reserve ioctl numbers for intel Speed Select Technology interface
drivers.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 Documentation/ioctl/ioctl-number.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index c9558146ac58..ab0b3f686454 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -348,3 +348,4 @@ Code  Seq#(hex)	Include File		Comments
 0xF6	all	LTTng			Linux Trace Toolkit Next Generation
 					<mailto:mathieu.desnoyers@efficios.com>
 0xFD	all	linux/dm-ioctl.h
+0xFE	all	linux/isst_if.h
-- 
cgit 


From f85f6e7bc9682a6d8b342c010cd6aa58521fdeec Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 11 Jun 2019 20:23:48 +0800
Subject: KVM: X86: Yield to IPI target if necessary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When sending a call-function IPI-many to vCPUs, yield if any of
the IPI target vCPUs was preempted, we just select the first
preempted target vCPU which we found since the state of target
vCPUs can change underneath and to avoid race conditions.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/hypercalls.txt | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index da24c138c8d1..da210651f714 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -141,3 +141,14 @@ a0 corresponds to the APIC ID in the third argument (a2), bit 1
 corresponds to the APIC ID a2+1, and so on.
 
 Returns the number of CPUs to which the IPIs were delivered successfully.
+
+7. KVM_HC_SCHED_YIELD
+------------------------
+Architecture: x86
+Status: active
+Purpose: Hypercall used to yield if the IPI target vCPU is preempted
+
+a0: destination APIC ID
+
+Usage example: When sending a call-function IPI-many to vCPUs, yield if
+any of the IPI target vCPUs was preempted.
-- 
cgit 


From 32b72ecc83b651fb8633ac4bd44957c54367699d Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 11 Jun 2019 20:23:50 +0800
Subject: KVM: X86: Expose PV_SCHED_YIELD CPUID feature bit to guest
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Expose PV_SCHED_YIELD feature bit to guest, the guest can check this
feature bit before using paravirtualized sched yield.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/cpuid.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index 97ca1940a0dc..979a77ba5377 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -66,6 +66,10 @@ KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
                                    ||       || before using paravirtualized
                                    ||       || send IPIs.
 ------------------------------------------------------------------------------
+KVM_FEATURE_PV_SCHED_YIELD         ||    13 || guest checks this feature bit
+                                   ||       || before using paravirtualized
+                                   ||       || sched yield.
+------------------------------------------------------------------------------
 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                    ||       || per-cpu warps are expected in
                                    ||       || kvmclock.
-- 
cgit 


From 9824c83f92bc8351dfb5c387436cc2816616fb4a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 2 Jul 2019 18:57:29 +0200
Subject: Documentation: kvm: document CPUID bit for MSR_KVM_POLL_CONTROL

Cc: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/cpuid.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index 979a77ba5377..2bdac528e4a2 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -66,6 +66,10 @@ KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
                                    ||       || before using paravirtualized
                                    ||       || send IPIs.
 ------------------------------------------------------------------------------
+KVM_FEATURE_PV_POLL_CONTROL        ||    12 || host-side polling on HLT can
+                                   ||       || be disabled by writing
+                                   ||       || to msr 0x4b564d05.
+------------------------------------------------------------------------------
 KVM_FEATURE_PV_SCHED_YIELD         ||    13 || guest checks this feature bit
                                    ||       || before using paravirtualized
                                    ||       || sched yield.
-- 
cgit 


From eee3ae41b153e55e25d6cf7bd5b5098ba0afe705 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 26 Jun 2019 14:27:20 +0200
Subject: mm: remove hmm_devmem_add

There isn't really much value add in the hmm_devmem_add wrapper and
more, as using devm_memremap_pages directly now is just as simple.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 Documentation/vm/hmm.rst | 26 --------------------------
 1 file changed, 26 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index 7cdf7282e022..50e1380950a9 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -329,32 +329,6 @@ directly using struct page for device memory which left most kernel code paths
 unaware of the difference. We only need to make sure that no one ever tries to
 map those pages from the CPU side.
 
-HMM provides a set of helpers to register and hotplug device memory as a new
-region needing a struct page. This is offered through a very simple API::
-
- struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
-                                   struct device *device,
-                                   unsigned long size);
- void hmm_devmem_remove(struct hmm_devmem *devmem);
-
-The hmm_devmem_ops is where most of the important things are::
-
- struct hmm_devmem_ops {
-     void (*free)(struct hmm_devmem *devmem, struct page *page);
-     int (*fault)(struct hmm_devmem *devmem,
-                  struct vm_area_struct *vma,
-                  unsigned long addr,
-                  struct page *page,
-                  unsigned flags,
-                  pmd_t *pmdp);
- };
-
-The first callback (free()) happens when the last reference on a device page is
-dropped. This means the device page is now free and no longer used by anyone.
-The second callback happens whenever the CPU tries to access a device page
-which it cannot do. This second callback must trigger a migration back to
-system memory.
-
 
 Migration to and from device memory
 ===================================
-- 
cgit 


From cad3836f9eb292647c9e3e5c6c778e6fa6e6bf15 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 26 Jun 2019 18:23:05 -0700
Subject: f2fs: allocate blocks for pinned file

This patch allows fallocate to allocate physical blocks for pinned file.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.txt | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index bebd1be3ba49..496fa28b2492 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -804,3 +804,28 @@ WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
 WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
 WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
 WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
+
+Fallocate(2) Policy
+-------------------
+
+The default policy follows the below posix rule.
+
+Allocating disk space
+    The default operation (i.e., mode is zero) of fallocate() allocates
+    the disk space within the range specified by offset and len.  The
+    file size (as reported by stat(2)) will be changed if offset+len is
+    greater than the file size.  Any subregion within the range specified
+    by offset and len that did not contain data before the call will be
+    initialized to zero.  This default behavior closely resembles the
+    behavior of the posix_fallocate(3) library function, and is intended
+    as a method of optimally implementing that function.
+
+However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to
+fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addressess having
+zero or random data, which is useful to the below scenario where:
+ 1. create(fd)
+ 2. ioctl(fd, F2FS_IOC_SET_PIN_FILE)
+ 3. fallocate(fd, 0, 0, size)
+ 4. address = fibmap(fd, offset)
+ 5. open(blkdev)
+ 6. write(blkdev, address)
-- 
cgit 


From e8a8b40cc8922b38f91c6562d525f21f312f235c Mon Sep 17 00:00:00 2001
From: Pawel Laszczak <pawell@cadence.com>
Date: Tue, 2 Jul 2019 14:37:57 +0100
Subject: dt-bindings: add binding for USBSS-DRD controller.

This patch aim at documenting USB related dt-bindings for the
Cadence USBSS-DRD controller.

Signed-off-by: Pawel Laszczak <pawell@cadence.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 .../devicetree/bindings/usb/cdns-usb3.txt          | 45 ++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/usb/cdns-usb3.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/usb/cdns-usb3.txt b/Documentation/devicetree/bindings/usb/cdns-usb3.txt
new file mode 100644
index 000000000000..b7dc606d37b5
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/cdns-usb3.txt
@@ -0,0 +1,45 @@
+Binding for the Cadence USBSS-DRD controller
+
+Required properties:
+ - reg: Physical base address and size of the controller's register areas.
+	 Controller has 3 different regions:
+	 - HOST registers area
+	 - DEVICE registers area
+	 - OTG/DRD registers area
+ - reg-names - register memory area names:
+	"xhci" - for HOST registers space
+	"dev" - for DEVICE registers space
+	"otg" - for OTG/DRD registers space
+ - compatible: Should contain: "cdns,usb3"
+ - interrupts: Interrupts used by cdns3 controller:
+	"host" - interrupt used by XHCI driver.
+	"peripheral" - interrupt used by device driver
+	"otg" - interrupt used by DRD/OTG  part of driver
+
+Optional properties:
+ - maximum-speed : valid arguments are "super-speed", "high-speed" and
+                   "full-speed"; refer to usb/generic.txt
+ - dr_mode: Should be one of "host", "peripheral" or "otg".
+ - phys: reference to the USB PHY
+ - phy-names: from the *Generic PHY* bindings;
+	Supported names are:
+	- cdns3,usb2-phy
+	- cdns3,usb3-phy
+
+ - cdns,on-chip-buff-size : size of memory intended as internal memory for endpoints
+	buffers expressed in KB
+
+Example:
+	usb@f3000000 {
+		compatible = "cdns,usb3";
+		interrupts = <GIC_USB_IRQ 7 IRQ_TYPE_LEVEL_HIGH>,
+				<GIC_USB_IRQ  7 IRQ_TYPE_LEVEL_HIGH>,
+				<GIC_USB_IRQ  8 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "host", "peripheral", "otg";
+		reg = <0xf3000000 0x10000>,	/* memory area for HOST registers */
+			<0xf3010000 0x10000>,	/* memory area for DEVICE registers */
+			<0xf3020000 0x10000>;	/* memory area for OTG/DRD registers */
+		reg-names = "xhci", "dev", "otg";
+		phys = <&usb2_phy>, <&usb3_phy>;
+		phy-names = "cdns3,usb2-phy", "cnds3,usb3-phy";
+	};
-- 
cgit 


From 8cf8bde284b7a25229cbf119cbf8be8149132a5c Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@nxp.com>
Date: Mon, 24 Jun 2019 10:02:51 +0800
Subject: doc: dt-binding: mxs-usb-phy: add compatible for 7ulp

Add compatible for 7ulp USB PHY.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 Documentation/devicetree/bindings/phy/mxs-usb-phy.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/phy/mxs-usb-phy.txt b/Documentation/devicetree/bindings/phy/mxs-usb-phy.txt
index 6ac98b3b5f57..c9f5c0caf8a9 100644
--- a/Documentation/devicetree/bindings/phy/mxs-usb-phy.txt
+++ b/Documentation/devicetree/bindings/phy/mxs-usb-phy.txt
@@ -7,6 +7,7 @@ Required properties:
 	* "fsl,imx6sl-usbphy" for imx6sl
 	* "fsl,vf610-usbphy" for Vybrid vf610
 	* "fsl,imx6sx-usbphy" for imx6sx
+	* "fsl,imx7ulp-usbphy" for imx7ulp
   "fsl,imx23-usbphy" is still a fallback for other strings
 - reg: Should contain registers location and length
 - interrupts: Should contain phy interrupt
@@ -23,7 +24,7 @@ Optional properties:
   the 17.78mA TX reference current. Default: 100
 
 Example:
-usbphy1: usbphy@20c9000 {
+usbphy1: usb-phy@20c9000 {
 	compatible = "fsl,imx6q-usbphy", "fsl,imx23-usbphy";
 	reg = <0x020c9000 0x1000>;
 	interrupts = <0 44 0x04>;
-- 
cgit 


From a7c5c6f6bc295d6c158db4ef9d1ca6770032669d Mon Sep 17 00:00:00 2001
From: Robin Gong <yibin.gong@nxp.com>
Date: Tue, 25 Jun 2019 17:43:22 +0800
Subject: dt-bindings: dma: fsl-edma: add new i.mx7ulp-edma

More channel interrupts, one more clock, and only one
dmamux on i.mx7ulp-edma.

Signed-off-by: Robin Gong <yibin.gong@nxp.com>
Tested-by: Angelo Dureghello <angelo@sysam.it>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/devicetree/bindings/dma/fsl-edma.txt | 44 +++++++++++++++++++---
 1 file changed, 39 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/dma/fsl-edma.txt b/Documentation/devicetree/bindings/dma/fsl-edma.txt
index 97e213e07660..29dd3ccb1235 100644
--- a/Documentation/devicetree/bindings/dma/fsl-edma.txt
+++ b/Documentation/devicetree/bindings/dma/fsl-edma.txt
@@ -9,15 +9,16 @@ group, DMAMUX0 or DMAMUX1, but not both.
 Required properties:
 - compatible :
 	- "fsl,vf610-edma" for eDMA used similar to that on Vybrid vf610 SoC
+	- "fsl,imx7ulp-edma" for eDMA2 used similar to that on i.mx7ulp
 - reg : Specifies base physical address(s) and size of the eDMA registers.
 	The 1st region is eDMA control register's address and size.
 	The 2nd and the 3rd regions are programmable channel multiplexing
 	control register's address and size.
 - interrupts : A list of interrupt-specifiers, one for each entry in
-	interrupt-names.
-- interrupt-names : Should contain:
-	"edma-tx" - the transmission interrupt
-	"edma-err" - the error interrupt
+	interrupt-names on vf610 similar SoC. But for i.mx7ulp per channel
+	per transmission interrupt, total 16 channel interrupt and 1
+	error interrupt(located in the last), no interrupt-names list on
+	i.mx7ulp for clean on dts.
 - #dma-cells : Must be <2>.
 	The 1st cell specifies the DMAMUX(0 for DMAMUX0 and 1 for DMAMUX1).
 	Specific request source can only be multiplexed by specific channels
@@ -28,6 +29,7 @@ Required properties:
 - clock-names : A list of channel group clock names. Should contain:
 	"dmamux0" - clock name of mux0 group
 	"dmamux1" - clock name of mux1 group
+	Note: No dmamux0 on i.mx7ulp, but another 'dma' clk added on i.mx7ulp.
 - clocks : A list of phandle and clock-specifier pairs, one for each entry in
 	clock-names.
 
@@ -35,6 +37,10 @@ Optional properties:
 - big-endian: If present registers and hardware scatter/gather descriptors
 	of the eDMA are implemented in big endian mode, otherwise in little
 	mode.
+- interrupt-names : Should contain the below on vf610 similar SoC but not used
+	on i.mx7ulp similar SoC:
+	"edma-tx" - the transmission interrupt
+	"edma-err" - the error interrupt
 
 
 Examples:
@@ -52,8 +58,36 @@ edma0: dma-controller@40018000 {
 	clock-names = "dmamux0", "dmamux1";
 	clocks = <&clks VF610_CLK_DMAMUX0>,
 		<&clks VF610_CLK_DMAMUX1>;
-};
+}; /* vf610 */
 
+edma1: dma-controller@40080000 {
+	#dma-cells = <2>;
+	compatible = "fsl,imx7ulp-edma";
+	reg = <0x40080000 0x2000>,
+		<0x40210000 0x1000>;
+	dma-channels = <32>;
+	interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>,
+		     /* last is eDMA2-ERR interrupt */
+		     <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
+	clock-names = "dma", "dmamux0";
+	clocks = <&pcc2 IMX7ULP_CLK_DMA1>,
+		 <&pcc2 IMX7ULP_CLK_DMA_MUX1>;
+}; /* i.mx7ulp */
 
 * DMA clients
 DMA client drivers that uses the DMA function must use the format described
-- 
cgit 


From 74b5150cde45ab29ee332a3d1e770c00431f7993 Mon Sep 17 00:00:00 2001
From: Talel Shenhar <talel@amazon.com>
Date: Mon, 10 Jun 2019 11:34:42 +0300
Subject: dt-bindings: interrupt-controller: Add Amazon's Annapurna Labs FIC

Document Amazon's Annapurna Labs Fabric Interrupt Controller SoC binding.

Signed-off-by: Talel Shenhar <talel@amazon.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 .../interrupt-controller/amazon,al-fic.txt         | 29 ++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt b/Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt
new file mode 100644
index 000000000000..4e82fd575cec
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt
@@ -0,0 +1,29 @@
+Amazon's Annapurna Labs Fabric Interrupt Controller
+
+Required properties:
+
+- compatible: should be "amazon,al-fic"
+- reg: physical base address and size of the registers
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: must be 2.
+  First cell defines the index of the interrupt within the controller.
+  Second cell is used to specify the trigger type and must be one of the
+  following:
+    - bits[3:0] trigger type and level flags
+	1 = low-to-high edge triggered
+	4 = active high level-sensitive
+- interrupt-parent: specifies the parent interrupt controller.
+- interrupts: describes which input line in the interrupt parent, this
+  fic's output is connected to. This field property depends on the parent's
+  binding
+
+Example:
+
+amazon_fic: interrupt-controller@0xfd8a8500 {
+	compatible = "amazon,al-fic";
+	interrupt-controller;
+	#interrupt-cells = <2>;
+	reg = <0x0 0xfd8a8500 0x0 0x1000>;
+	interrupt-parent = <&gic>;
+	interrupts = <GIC_SPI 0x0 IRQ_TYPE_LEVEL_HIGH>;
+};
-- 
cgit 


From 34ae69998b665073bf27085e60b52329cf169e0d Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Fri, 28 Jun 2019 12:08:31 +0930
Subject: dt-bindings: pinctrl: aspeed: Split bindings document in two

Have one for each of the AST2400 and AST2500. The only thing that was
common was the fact that both support ASPEED BMC SoCs.

Cc: Johnny Huang <johnny_huang@aspeedtech.com>
Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Acked-by: Joel Stanley <joel@jms.id.au>
Link: https://lore.kernel.org/r/20190628023838.15426-2-andrew@aj.id.au
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../bindings/pinctrl/aspeed,ast2400-pinctrl.txt    |  80 ++++++++++
 .../bindings/pinctrl/aspeed,ast2500-pinctrl.txt    | 119 ++++++++++++++
 .../devicetree/bindings/pinctrl/pinctrl-aspeed.txt | 172 ---------------------
 3 files changed, 199 insertions(+), 172 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.txt
 create mode 100644 Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.txt
 delete mode 100644 Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.txt
new file mode 100644
index 000000000000..67e0325ccf2e
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.txt
@@ -0,0 +1,80 @@
+=============================
+Aspeed AST2400 Pin Controller
+=============================
+
+Required properties for the AST2400:
+- compatible : 			Should be one of the following:
+				"aspeed,ast2400-pinctrl"
+				"aspeed,g4-pinctrl"
+
+The pin controller node should be the child of a syscon node with the required
+property:
+
+- compatible : 		Should be one of the following:
+			"aspeed,ast2400-scu", "syscon", "simple-mfd"
+			"aspeed,g4-scu", "syscon", "simple-mfd"
+
+Refer to the the bindings described in
+Documentation/devicetree/bindings/mfd/syscon.txt
+
+Subnode Format
+==============
+
+The required properties of pinmux child nodes are:
+- function: the mux function to select
+- groups  : the list of groups to select with this function
+
+Required properties of pinconf child nodes are:
+- groups: A list of groups to select (either this or "pins" must be
+          specified)
+- pins  : A list of ball names as strings, eg "D14" (either this or "groups"
+          must be specified)
+
+Optional properties of pinconf child nodes are:
+- bias-disable  : disable any pin bias
+- bias-pull-down: pull down the pin
+- drive-strength: sink or source at most X mA
+
+Definitions are as specified in
+Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt, with any
+further limitations as described above.
+
+For pinmux, each mux function has only one associated pin group. Each group is
+named by its function. The following values for the function and groups
+properties are supported:
+
+ACPI ADC0 ADC1 ADC10 ADC11 ADC12 ADC13 ADC14 ADC15 ADC2 ADC3 ADC4 ADC5 ADC6
+ADC7 ADC8 ADC9 BMCINT DDCCLK DDCDAT EXTRST FLACK FLBUSY FLWP GPID GPID0 GPID2
+GPID4 GPID6 GPIE0 GPIE2 GPIE4 GPIE6 I2C10 I2C11 I2C12 I2C13 I2C14 I2C3 I2C4
+I2C5 I2C6 I2C7 I2C8 I2C9 LPCPD LPCPME LPCRST LPCSMI MAC1LINK MAC2LINK MDIO1
+MDIO2 NCTS1 NCTS2 NCTS3 NCTS4 NDCD1 NDCD2 NDCD3 NDCD4 NDSR1 NDSR2 NDSR3 NDSR4
+NDTR1 NDTR2 NDTR3 NDTR4 NDTS4 NRI1 NRI2 NRI3 NRI4 NRTS1 NRTS2 NRTS3 OSCCLK PWM0
+PWM1 PWM2 PWM3 PWM4 PWM5 PWM6 PWM7 RGMII1 RGMII2 RMII1 RMII2 ROM16 ROM8 ROMCS1
+ROMCS2 ROMCS3 ROMCS4 RXD1 RXD2 RXD3 RXD4 SALT1 SALT2 SALT3 SALT4 SD1 SD2 SGPMCK
+SGPMI SGPMLD SGPMO SGPSCK SGPSI0 SGPSI1 SGPSLD SIOONCTRL SIOPBI SIOPBO SIOPWREQ
+SIOPWRGD SIOS3 SIOS5 SIOSCI SPI1 SPI1DEBUG SPI1PASSTHRU SPICS1 TIMER3 TIMER4
+TIMER5 TIMER6 TIMER7 TIMER8 TXD1 TXD2 TXD3 TXD4 UART6 USB11D1 USB11H2 USB2D1
+USB2H1 USBCKI VGABIOS_ROM VGAHS VGAVS VPI18 VPI24 VPI30 VPO12 VPO24 WDTRST1
+WDTRST2
+
+Example
+=======
+
+syscon: scu@1e6e2000 {
+	compatible = "aspeed,ast2400-scu", "syscon", "simple-mfd";
+	reg = <0x1e6e2000 0x1a8>;
+
+	pinctrl: pinctrl {
+		compatible = "aspeed,g4-pinctrl";
+
+		pinctrl_i2c3_default: i2c3_default {
+			function = "I2C3";
+			groups = "I2C3";
+		};
+
+		pinctrl_gpioh0_unbiased_default: gpioh0 {
+			pins = "A8";
+			bias-disable;
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.txt
new file mode 100644
index 000000000000..ca7025dc1901
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.txt
@@ -0,0 +1,119 @@
+=============================
+Aspeed AST2500 Pin Controller
+=============================
+
+Required properties for g5:
+- compatible : 			Should be one of the following:
+				"aspeed,ast2500-pinctrl"
+				"aspeed,g5-pinctrl"
+
+- aspeed,external-nodes:	A cell of phandles to external controller nodes:
+				0: compatible with "aspeed,ast2500-gfx", "syscon"
+				1: compatible with "aspeed,ast2500-lhc", "syscon"
+
+The pin controller node should be the child of a syscon node with the required
+property:
+
+- compatible : 		Should be one of the following:
+			"aspeed,ast2500-scu", "syscon", "simple-mfd"
+			"aspeed,g5-scu", "syscon", "simple-mfd"
+
+Refer to the the bindings described in
+Documentation/devicetree/bindings/mfd/syscon.txt
+
+Subnode Format
+==============
+
+The required properties of pinmux child nodes are:
+- function: the mux function to select
+- groups  : the list of groups to select with this function
+
+Required properties of pinconf child nodes are:
+- groups: A list of groups to select (either this or "pins" must be
+          specified)
+- pins  : A list of ball names as strings, eg "D14" (either this or "groups"
+          must be specified)
+
+Optional properties of pinconf child nodes are:
+- bias-disable  : disable any pin bias
+- bias-pull-down: pull down the pin
+- drive-strength: sink or source at most X mA
+
+Definitions are as specified in
+Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt, with any
+further limitations as described above.
+
+For pinmux, each mux function has only one associated pin group. Each group is
+named by its function. The following values for the function and groups
+properties are supported:
+
+ACPI ADC0 ADC1 ADC10 ADC11 ADC12 ADC13 ADC14 ADC15 ADC2 ADC3 ADC4 ADC5 ADC6
+ADC7 ADC8 ADC9 BMCINT DDCCLK DDCDAT ESPI FWSPICS1 FWSPICS2 GPID0 GPID2 GPID4
+GPID6 GPIE0 GPIE2 GPIE4 GPIE6 I2C10 I2C11 I2C12 I2C13 I2C14 I2C3 I2C4 I2C5 I2C6
+I2C7 I2C8 I2C9 LAD0 LAD1 LAD2 LAD3 LCLK LFRAME LPCHC LPCPD LPCPLUS LPCPME
+LPCRST LPCSMI LSIRQ MAC1LINK MAC2LINK MDIO1 MDIO2 NCTS1 NCTS2 NCTS3 NCTS4 NDCD1
+NDCD2 NDCD3 NDCD4 NDSR1 NDSR2 NDSR3 NDSR4 NDTR1 NDTR2 NDTR3 NDTR4 NRI1 NRI2
+NRI3 NRI4 NRTS1 NRTS2 NRTS3 NRTS4 OSCCLK PEWAKE PNOR PWM0 PWM1 PWM2 PWM3 PWM4
+PWM5 PWM6 PWM7 RGMII1 RGMII2 RMII1 RMII2 RXD1 RXD2 RXD3 RXD4 SALT1 SALT10
+SALT11 SALT12 SALT13 SALT14 SALT2 SALT3 SALT4 SALT5 SALT6 SALT7 SALT8 SALT9
+SCL1 SCL2 SD1 SD2 SDA1 SDA2 SGPM SGPS1 SGPS2 SIOONCTRL SIOPBI SIOPBO SIOPWREQ
+SIOPWRGD SIOS3 SIOS5 SIOSCI SPI1 SPI1CS1 SPI1DEBUG SPI1PASSTHRU SPI2CK SPI2CS0
+SPI2CS1 SPI2MISO SPI2MOSI TIMER3 TIMER4 TIMER5 TIMER6 TIMER7 TIMER8 TXD1 TXD2
+TXD3 TXD4 UART6 USB11BHID USB2AD USB2AH USB2BD USB2BH USBCKI VGABIOSROM VGAHS
+VGAVS VPI24 VPO WDTRST1 WDTRST2
+
+Example
+=======
+
+ahb {
+	apb {
+		syscon: scu@1e6e2000 {
+			compatible = "aspeed,ast2500-scu", "syscon", "simple-mfd";
+			reg = <0x1e6e2000 0x1a8>;
+
+			pinctrl: pinctrl {
+				compatible = "aspeed,g5-pinctrl";
+				aspeed,external-nodes = <&gfx &lhc>;
+
+				pinctrl_i2c3_default: i2c3_default {
+					function = "I2C3";
+					groups = "I2C3";
+				};
+
+				pinctrl_gpioh0_unbiased_default: gpioh0 {
+					pins = "A18";
+					bias-disable;
+				};
+			};
+		};
+
+		gfx: display@1e6e6000 {
+			compatible = "aspeed,ast2500-gfx", "syscon";
+			reg = <0x1e6e6000 0x1000>;
+		};
+	};
+
+	lpc: lpc@1e789000 {
+		compatible = "aspeed,ast2500-lpc", "simple-mfd";
+		reg = <0x1e789000 0x1000>;
+
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0x0 0x1e789000 0x1000>;
+
+		lpc_host: lpc-host@80 {
+			compatible = "aspeed,ast2500-lpc-host", "simple-mfd", "syscon";
+			reg = <0x80 0x1e0>;
+			reg-io-width = <4>;
+
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0x0 0x80 0x1e0>;
+
+			lhc: lhc@20 {
+			       compatible = "aspeed,ast2500-lhc";
+			       reg = <0x20 0x24 0x48 0x8>;
+			};
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt
deleted file mode 100644
index 8f1c5c4d62f9..000000000000
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-======================
-Aspeed Pin Controllers
-======================
-
-The Aspeed SoCs vary in functionality inside a generation but have a common mux
-device register layout.
-
-Required properties for g4:
-- compatible : 			Should be one of the following:
-				"aspeed,ast2400-pinctrl"
-				"aspeed,g4-pinctrl"
-
-Required properties for g5:
-- compatible : 			Should be one of the following:
-				"aspeed,ast2500-pinctrl"
-				"aspeed,g5-pinctrl"
-
-- aspeed,external-nodes:	A cell of phandles to external controller nodes:
-				0: compatible with "aspeed,ast2500-gfx", "syscon"
-				1: compatible with "aspeed,ast2500-lhc", "syscon"
-
-The pin controller node should be the child of a syscon node with the required
-property:
-
-- compatible : 		Should be one of the following:
-			"aspeed,ast2400-scu", "syscon", "simple-mfd"
-			"aspeed,g4-scu", "syscon", "simple-mfd"
-			"aspeed,ast2500-scu", "syscon", "simple-mfd"
-			"aspeed,g5-scu", "syscon", "simple-mfd"
-
-Refer to the the bindings described in
-Documentation/devicetree/bindings/mfd/syscon.txt
-
-Subnode Format
-==============
-
-The required properties of pinmux child nodes are:
-- function: the mux function to select
-- groups  : the list of groups to select with this function
-
-Required properties of pinconf child nodes are:
-- groups: A list of groups to select (either this or "pins" must be
-          specified)
-- pins  : A list of ball names as strings, eg "D14" (either this or "groups"
-          must be specified)
-
-Optional properties of pinconf child nodes are:
-- bias-disable  : disable any pin bias
-- bias-pull-down: pull down the pin
-- drive-strength: sink or source at most X mA
-
-Definitions are as specified in
-Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt, with any
-further limitations as described above.
-
-For pinmux, each mux function has only one associated pin group. Each group is
-named by its function. The following values for the function and groups
-properties are supported:
-
-aspeed,ast2400-pinctrl, aspeed,g4-pinctrl:
-
-ACPI ADC0 ADC1 ADC10 ADC11 ADC12 ADC13 ADC14 ADC15 ADC2 ADC3 ADC4 ADC5 ADC6
-ADC7 ADC8 ADC9 BMCINT DDCCLK DDCDAT EXTRST FLACK FLBUSY FLWP GPID GPID0 GPID2
-GPID4 GPID6 GPIE0 GPIE2 GPIE4 GPIE6 I2C10 I2C11 I2C12 I2C13 I2C14 I2C3 I2C4
-I2C5 I2C6 I2C7 I2C8 I2C9 LPCPD LPCPME LPCRST LPCSMI MAC1LINK MAC2LINK MDIO1
-MDIO2 NCTS1 NCTS2 NCTS3 NCTS4 NDCD1 NDCD2 NDCD3 NDCD4 NDSR1 NDSR2 NDSR3 NDSR4
-NDTR1 NDTR2 NDTR3 NDTR4 NDTS4 NRI1 NRI2 NRI3 NRI4 NRTS1 NRTS2 NRTS3 OSCCLK PWM0
-PWM1 PWM2 PWM3 PWM4 PWM5 PWM6 PWM7 RGMII1 RGMII2 RMII1 RMII2 ROM16 ROM8 ROMCS1
-ROMCS2 ROMCS3 ROMCS4 RXD1 RXD2 RXD3 RXD4 SALT1 SALT2 SALT3 SALT4 SD1 SD2 SGPMCK
-SGPMI SGPMLD SGPMO SGPSCK SGPSI0 SGPSI1 SGPSLD SIOONCTRL SIOPBI SIOPBO SIOPWREQ
-SIOPWRGD SIOS3 SIOS5 SIOSCI SPI1 SPI1DEBUG SPI1PASSTHRU SPICS1 TIMER3 TIMER4
-TIMER5 TIMER6 TIMER7 TIMER8 TXD1 TXD2 TXD3 TXD4 UART6 USB11D1 USB11H2 USB2D1
-USB2H1 USBCKI VGABIOS_ROM VGAHS VGAVS VPI18 VPI24 VPI30 VPO12 VPO24 WDTRST1
-WDTRST2
-
-aspeed,ast2500-pinctrl, aspeed,g5-pinctrl:
-
-ACPI ADC0 ADC1 ADC10 ADC11 ADC12 ADC13 ADC14 ADC15 ADC2 ADC3 ADC4 ADC5 ADC6
-ADC7 ADC8 ADC9 BMCINT DDCCLK DDCDAT ESPI FWSPICS1 FWSPICS2 GPID0 GPID2 GPID4
-GPID6 GPIE0 GPIE2 GPIE4 GPIE6 I2C10 I2C11 I2C12 I2C13 I2C14 I2C3 I2C4 I2C5 I2C6
-I2C7 I2C8 I2C9 LAD0 LAD1 LAD2 LAD3 LCLK LFRAME LPCHC LPCPD LPCPLUS LPCPME
-LPCRST LPCSMI LSIRQ MAC1LINK MAC2LINK MDIO1 MDIO2 NCTS1 NCTS2 NCTS3 NCTS4 NDCD1
-NDCD2 NDCD3 NDCD4 NDSR1 NDSR2 NDSR3 NDSR4 NDTR1 NDTR2 NDTR3 NDTR4 NRI1 NRI2
-NRI3 NRI4 NRTS1 NRTS2 NRTS3 NRTS4 OSCCLK PEWAKE PNOR PWM0 PWM1 PWM2 PWM3 PWM4
-PWM5 PWM6 PWM7 RGMII1 RGMII2 RMII1 RMII2 RXD1 RXD2 RXD3 RXD4 SALT1 SALT10
-SALT11 SALT12 SALT13 SALT14 SALT2 SALT3 SALT4 SALT5 SALT6 SALT7 SALT8 SALT9
-SCL1 SCL2 SD1 SD2 SDA1 SDA2 SGPM SGPS1 SGPS2 SIOONCTRL SIOPBI SIOPBO SIOPWREQ
-SIOPWRGD SIOS3 SIOS5 SIOSCI SPI1 SPI1CS1 SPI1DEBUG SPI1PASSTHRU SPI2CK SPI2CS0
-SPI2CS1 SPI2MISO SPI2MOSI TIMER3 TIMER4 TIMER5 TIMER6 TIMER7 TIMER8 TXD1 TXD2
-TXD3 TXD4 UART6 USB11BHID USB2AD USB2AH USB2BD USB2BH USBCKI VGABIOSROM VGAHS
-VGAVS VPI24 VPO WDTRST1 WDTRST2
-
-Examples
-========
-
-g4 Example
-----------
-
-syscon: scu@1e6e2000 {
-	compatible = "aspeed,ast2400-scu", "syscon", "simple-mfd";
-	reg = <0x1e6e2000 0x1a8>;
-
-	pinctrl: pinctrl {
-		compatible = "aspeed,g4-pinctrl";
-
-		pinctrl_i2c3_default: i2c3_default {
-			function = "I2C3";
-			groups = "I2C3";
-		};
-
-		pinctrl_gpioh0_unbiased_default: gpioh0 {
-			pins = "A8";
-			bias-disable;
-		};
-	};
-};
-
-g5 Example
-----------
-
-ahb {
-	apb {
-		syscon: scu@1e6e2000 {
-			compatible = "aspeed,ast2500-scu", "syscon", "simple-mfd";
-			reg = <0x1e6e2000 0x1a8>;
-
-			pinctrl: pinctrl {
-				compatible = "aspeed,g5-pinctrl";
-				aspeed,external-nodes = <&gfx &lhc>;
-
-				pinctrl_i2c3_default: i2c3_default {
-					function = "I2C3";
-					groups = "I2C3";
-				};
-
-				pinctrl_gpioh0_unbiased_default: gpioh0 {
-					pins = "A18";
-					bias-disable;
-				};
-			};
-		};
-
-		gfx: display@1e6e6000 {
-			compatible = "aspeed,ast2500-gfx", "syscon";
-			reg = <0x1e6e6000 0x1000>;
-		};
-	};
-
-	lpc: lpc@1e789000 {
-		compatible = "aspeed,ast2500-lpc", "simple-mfd";
-		reg = <0x1e789000 0x1000>;
-
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges = <0x0 0x1e789000 0x1000>;
-
-		lpc_host: lpc-host@80 {
-			compatible = "aspeed,ast2500-lpc-host", "simple-mfd", "syscon";
-			reg = <0x80 0x1e0>;
-			reg-io-width = <4>;
-
-			#address-cells = <1>;
-			#size-cells = <1>;
-			ranges = <0x0 0x80 0x1e0>;
-
-			lhc: lhc@20 {
-			       compatible = "aspeed,ast2500-lhc";
-			       reg = <0x20 0x24 0x48 0x8>;
-			};
-		};
-	};
-};
-- 
cgit 


From 07457937bb5cef8ce797b29d2b0da1bb25509f54 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Fri, 28 Jun 2019 12:08:32 +0930
Subject: dt-bindings: pinctrl: aspeed: Convert AST2400 bindings to json-schema

Convert ASPEED pinctrl bindings to DT schema format using json-schema

Cc: Johnny Huang <johnny_huang@aspeedtech.com>
Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Link: https://lore.kernel.org/r/20190628023838.15426-3-andrew@aj.id.au
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../bindings/pinctrl/aspeed,ast2400-pinctrl.txt    | 80 ---------------------
 .../bindings/pinctrl/aspeed,ast2400-pinctrl.yaml   | 81 ++++++++++++++++++++++
 2 files changed, 81 insertions(+), 80 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.txt
 create mode 100644 Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.txt
deleted file mode 100644
index 67e0325ccf2e..000000000000
--- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.txt
+++ /dev/null
@@ -1,80 +0,0 @@
-=============================
-Aspeed AST2400 Pin Controller
-=============================
-
-Required properties for the AST2400:
-- compatible : 			Should be one of the following:
-				"aspeed,ast2400-pinctrl"
-				"aspeed,g4-pinctrl"
-
-The pin controller node should be the child of a syscon node with the required
-property:
-
-- compatible : 		Should be one of the following:
-			"aspeed,ast2400-scu", "syscon", "simple-mfd"
-			"aspeed,g4-scu", "syscon", "simple-mfd"
-
-Refer to the the bindings described in
-Documentation/devicetree/bindings/mfd/syscon.txt
-
-Subnode Format
-==============
-
-The required properties of pinmux child nodes are:
-- function: the mux function to select
-- groups  : the list of groups to select with this function
-
-Required properties of pinconf child nodes are:
-- groups: A list of groups to select (either this or "pins" must be
-          specified)
-- pins  : A list of ball names as strings, eg "D14" (either this or "groups"
-          must be specified)
-
-Optional properties of pinconf child nodes are:
-- bias-disable  : disable any pin bias
-- bias-pull-down: pull down the pin
-- drive-strength: sink or source at most X mA
-
-Definitions are as specified in
-Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt, with any
-further limitations as described above.
-
-For pinmux, each mux function has only one associated pin group. Each group is
-named by its function. The following values for the function and groups
-properties are supported:
-
-ACPI ADC0 ADC1 ADC10 ADC11 ADC12 ADC13 ADC14 ADC15 ADC2 ADC3 ADC4 ADC5 ADC6
-ADC7 ADC8 ADC9 BMCINT DDCCLK DDCDAT EXTRST FLACK FLBUSY FLWP GPID GPID0 GPID2
-GPID4 GPID6 GPIE0 GPIE2 GPIE4 GPIE6 I2C10 I2C11 I2C12 I2C13 I2C14 I2C3 I2C4
-I2C5 I2C6 I2C7 I2C8 I2C9 LPCPD LPCPME LPCRST LPCSMI MAC1LINK MAC2LINK MDIO1
-MDIO2 NCTS1 NCTS2 NCTS3 NCTS4 NDCD1 NDCD2 NDCD3 NDCD4 NDSR1 NDSR2 NDSR3 NDSR4
-NDTR1 NDTR2 NDTR3 NDTR4 NDTS4 NRI1 NRI2 NRI3 NRI4 NRTS1 NRTS2 NRTS3 OSCCLK PWM0
-PWM1 PWM2 PWM3 PWM4 PWM5 PWM6 PWM7 RGMII1 RGMII2 RMII1 RMII2 ROM16 ROM8 ROMCS1
-ROMCS2 ROMCS3 ROMCS4 RXD1 RXD2 RXD3 RXD4 SALT1 SALT2 SALT3 SALT4 SD1 SD2 SGPMCK
-SGPMI SGPMLD SGPMO SGPSCK SGPSI0 SGPSI1 SGPSLD SIOONCTRL SIOPBI SIOPBO SIOPWREQ
-SIOPWRGD SIOS3 SIOS5 SIOSCI SPI1 SPI1DEBUG SPI1PASSTHRU SPICS1 TIMER3 TIMER4
-TIMER5 TIMER6 TIMER7 TIMER8 TXD1 TXD2 TXD3 TXD4 UART6 USB11D1 USB11H2 USB2D1
-USB2H1 USBCKI VGABIOS_ROM VGAHS VGAVS VPI18 VPI24 VPI30 VPO12 VPO24 WDTRST1
-WDTRST2
-
-Example
-=======
-
-syscon: scu@1e6e2000 {
-	compatible = "aspeed,ast2400-scu", "syscon", "simple-mfd";
-	reg = <0x1e6e2000 0x1a8>;
-
-	pinctrl: pinctrl {
-		compatible = "aspeed,g4-pinctrl";
-
-		pinctrl_i2c3_default: i2c3_default {
-			function = "I2C3";
-			groups = "I2C3";
-		};
-
-		pinctrl_gpioh0_unbiased_default: gpioh0 {
-			pins = "A8";
-			bias-disable;
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml
new file mode 100644
index 000000000000..61a110a7db8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/aspeed,ast2400-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ASPEED AST2400 Pin Controller
+
+maintainers:
+  - Andrew Jeffery <andrew@aj.id.au>
+
+description: |+
+  The pin controller node should be the child of a syscon node with the
+  required property:
+
+  - compatible:     Should be one of the following:
+                    "aspeed,ast2400-scu", "syscon", "simple-mfd"
+                    "aspeed,g4-scu", "syscon", "simple-mfd"
+
+  Refer to the the bindings described in
+  Documentation/devicetree/bindings/mfd/syscon.txt
+
+properties:
+  compatible:
+    enum: [ aspeed,ast2400-pinctrl, aspeed,g4-pinctrl ]
+
+patternProperties:
+  '^.*$':
+    if:
+      type: object
+    then:
+      patternProperties:
+        "^function|groups$":
+          allOf:
+            - $ref: "/schemas/types.yaml#/definitions/string"
+            - enum: [ "ACPI", "ADC0", "ADC1", "ADC10", "ADC11", "ADC12", "ADC13",
+              "ADC14", "ADC15", "ADC2", "ADC3", "ADC4", "ADC5", "ADC6", "ADC7",
+              "ADC8", "ADC9", "BMCINT", "DDCCLK", "DDCDAT", "EXTRST", "FLACK",
+              "FLBUSY", "FLWP", "GPID", "GPID0", "GPID2", "GPID4", "GPID6",
+              "GPIE0", "GPIE2", "GPIE4", "GPIE6", "I2C10", "I2C11", "I2C12",
+              "I2C13", "I2C14", "I2C3", "I2C4", "I2C5", "I2C6", "I2C7", "I2C8",
+              "I2C9", "LPCPD", "LPCPME", "LPCRST", "LPCSMI", "MAC1LINK",
+              "MAC2LINK", "MDIO1", "MDIO2", "NCTS1", "NCTS2", "NCTS3", "NCTS4",
+              "NDCD1", "NDCD2", "NDCD3", "NDCD4", "NDSR1", "NDSR2", "NDSR3",
+              "NDSR4", "NDTR1", "NDTR2", "NDTR3", "NDTR4", "NDTS4", "NRI1",
+              "NRI2", "NRI3", "NRI4", "NRTS1", "NRTS2", "NRTS3", "OSCCLK",
+              "PWM0", "PWM1", "PWM2", "PWM3", "PWM4", "PWM5", "PWM6", "PWM7",
+              "RGMII1", "RGMII2", "RMII1", "RMII2", "ROM16", "ROM8", "ROMCS1",
+              "ROMCS2", "ROMCS3", "ROMCS4", "RXD1", "RXD2", "RXD3", "RXD4",
+              "SALT1", "SALT2", "SALT3", "SALT4", "SD1", "SD2", "SGPMCK",
+              "SGPMI", "SGPMLD", "SGPMO", "SGPSCK", "SGPSI0", "SGPSI1", "SGPSLD",
+              "SIOONCTRL", "SIOPBI", "SIOPBO", "SIOPWREQ", "SIOPWRGD", "SIOS3",
+              "SIOS5", "SIOSCI", "SPI1", "SPI1DEBUG", "SPI1PASSTHRU", "SPICS1",
+              "TIMER3", "TIMER4", "TIMER5", "TIMER6", "TIMER7", "TIMER8", "TXD1",
+              "TXD2", "TXD3", "TXD4", "UART6", "USB11D1", "USB11H2", "USB2D1",
+              "USB2H1", "USBCKI", "VGABIOS_ROM", "VGAHS", "VGAVS", "VPI18",
+              "VPI24", "VPI30", "VPO12", "VPO24", "WDTRST1", "WDTRST2" ]
+
+required:
+  - compatible
+
+examples:
+  - |
+    syscon: scu@1e6e2000 {
+        compatible = "aspeed,ast2400-scu", "syscon", "simple-mfd";
+        reg = <0x1e6e2000 0x1a8>;
+
+        pinctrl: pinctrl {
+            compatible = "aspeed,g4-pinctrl";
+
+            pinctrl_i2c3_default: i2c3_default {
+                function = "I2C3";
+                groups = "I2C3";
+            };
+
+            pinctrl_gpioh0_unbiased_default: gpioh0 {
+                pins = "A8";
+                bias-disable;
+            };
+        };
+    };
-- 
cgit 


From 0a617de167306231bdbf894aa2b93f4b2cd2a781 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Fri, 28 Jun 2019 12:08:33 +0930
Subject: dt-bindings: pinctrl: aspeed: Convert AST2500 bindings to json-schema

Convert ASPEED pinctrl bindings to DT schema format using json-schema.

Cc: Johnny Huang <johnny_huang@aspeedtech.com>
Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Link: https://lore.kernel.org/r/20190628023838.15426-4-andrew@aj.id.au
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../bindings/pinctrl/aspeed,ast2500-pinctrl.txt    | 119 ------------------
 .../bindings/pinctrl/aspeed,ast2500-pinctrl.yaml   | 134 +++++++++++++++++++++
 2 files changed, 134 insertions(+), 119 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.txt
 create mode 100644 Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.txt
deleted file mode 100644
index ca7025dc1901..000000000000
--- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-=============================
-Aspeed AST2500 Pin Controller
-=============================
-
-Required properties for g5:
-- compatible : 			Should be one of the following:
-				"aspeed,ast2500-pinctrl"
-				"aspeed,g5-pinctrl"
-
-- aspeed,external-nodes:	A cell of phandles to external controller nodes:
-				0: compatible with "aspeed,ast2500-gfx", "syscon"
-				1: compatible with "aspeed,ast2500-lhc", "syscon"
-
-The pin controller node should be the child of a syscon node with the required
-property:
-
-- compatible : 		Should be one of the following:
-			"aspeed,ast2500-scu", "syscon", "simple-mfd"
-			"aspeed,g5-scu", "syscon", "simple-mfd"
-
-Refer to the the bindings described in
-Documentation/devicetree/bindings/mfd/syscon.txt
-
-Subnode Format
-==============
-
-The required properties of pinmux child nodes are:
-- function: the mux function to select
-- groups  : the list of groups to select with this function
-
-Required properties of pinconf child nodes are:
-- groups: A list of groups to select (either this or "pins" must be
-          specified)
-- pins  : A list of ball names as strings, eg "D14" (either this or "groups"
-          must be specified)
-
-Optional properties of pinconf child nodes are:
-- bias-disable  : disable any pin bias
-- bias-pull-down: pull down the pin
-- drive-strength: sink or source at most X mA
-
-Definitions are as specified in
-Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt, with any
-further limitations as described above.
-
-For pinmux, each mux function has only one associated pin group. Each group is
-named by its function. The following values for the function and groups
-properties are supported:
-
-ACPI ADC0 ADC1 ADC10 ADC11 ADC12 ADC13 ADC14 ADC15 ADC2 ADC3 ADC4 ADC5 ADC6
-ADC7 ADC8 ADC9 BMCINT DDCCLK DDCDAT ESPI FWSPICS1 FWSPICS2 GPID0 GPID2 GPID4
-GPID6 GPIE0 GPIE2 GPIE4 GPIE6 I2C10 I2C11 I2C12 I2C13 I2C14 I2C3 I2C4 I2C5 I2C6
-I2C7 I2C8 I2C9 LAD0 LAD1 LAD2 LAD3 LCLK LFRAME LPCHC LPCPD LPCPLUS LPCPME
-LPCRST LPCSMI LSIRQ MAC1LINK MAC2LINK MDIO1 MDIO2 NCTS1 NCTS2 NCTS3 NCTS4 NDCD1
-NDCD2 NDCD3 NDCD4 NDSR1 NDSR2 NDSR3 NDSR4 NDTR1 NDTR2 NDTR3 NDTR4 NRI1 NRI2
-NRI3 NRI4 NRTS1 NRTS2 NRTS3 NRTS4 OSCCLK PEWAKE PNOR PWM0 PWM1 PWM2 PWM3 PWM4
-PWM5 PWM6 PWM7 RGMII1 RGMII2 RMII1 RMII2 RXD1 RXD2 RXD3 RXD4 SALT1 SALT10
-SALT11 SALT12 SALT13 SALT14 SALT2 SALT3 SALT4 SALT5 SALT6 SALT7 SALT8 SALT9
-SCL1 SCL2 SD1 SD2 SDA1 SDA2 SGPM SGPS1 SGPS2 SIOONCTRL SIOPBI SIOPBO SIOPWREQ
-SIOPWRGD SIOS3 SIOS5 SIOSCI SPI1 SPI1CS1 SPI1DEBUG SPI1PASSTHRU SPI2CK SPI2CS0
-SPI2CS1 SPI2MISO SPI2MOSI TIMER3 TIMER4 TIMER5 TIMER6 TIMER7 TIMER8 TXD1 TXD2
-TXD3 TXD4 UART6 USB11BHID USB2AD USB2AH USB2BD USB2BH USBCKI VGABIOSROM VGAHS
-VGAVS VPI24 VPO WDTRST1 WDTRST2
-
-Example
-=======
-
-ahb {
-	apb {
-		syscon: scu@1e6e2000 {
-			compatible = "aspeed,ast2500-scu", "syscon", "simple-mfd";
-			reg = <0x1e6e2000 0x1a8>;
-
-			pinctrl: pinctrl {
-				compatible = "aspeed,g5-pinctrl";
-				aspeed,external-nodes = <&gfx &lhc>;
-
-				pinctrl_i2c3_default: i2c3_default {
-					function = "I2C3";
-					groups = "I2C3";
-				};
-
-				pinctrl_gpioh0_unbiased_default: gpioh0 {
-					pins = "A18";
-					bias-disable;
-				};
-			};
-		};
-
-		gfx: display@1e6e6000 {
-			compatible = "aspeed,ast2500-gfx", "syscon";
-			reg = <0x1e6e6000 0x1000>;
-		};
-	};
-
-	lpc: lpc@1e789000 {
-		compatible = "aspeed,ast2500-lpc", "simple-mfd";
-		reg = <0x1e789000 0x1000>;
-
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges = <0x0 0x1e789000 0x1000>;
-
-		lpc_host: lpc-host@80 {
-			compatible = "aspeed,ast2500-lpc-host", "simple-mfd", "syscon";
-			reg = <0x80 0x1e0>;
-			reg-io-width = <4>;
-
-			#address-cells = <1>;
-			#size-cells = <1>;
-			ranges = <0x0 0x80 0x1e0>;
-
-			lhc: lhc@20 {
-			       compatible = "aspeed,ast2500-lhc";
-			       reg = <0x20 0x24 0x48 0x8>;
-			};
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml
new file mode 100644
index 000000000000..cf561bd55128
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/aspeed,ast2500-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ASPEED AST2500 Pin Controller
+
+maintainers:
+  - Andrew Jeffery <andrew@aj.id.au>
+
+description: |+
+  The pin controller node should be the child of a syscon node with the
+  required property:
+
+  - compatible: 	Should be one of the following:
+  			"aspeed,ast2500-scu", "syscon", "simple-mfd"
+  			"aspeed,g5-scu", "syscon", "simple-mfd"
+
+  Refer to the the bindings described in
+  Documentation/devicetree/bindings/mfd/syscon.txt
+
+properties:
+  compatible:
+    enum: [ aspeed,ast2500-pinctrl, aspeed,g5-pinctrl ]
+  aspeed,external-nodes:
+    minItems: 2
+    maxItems: 2
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/phandle-array
+    description: |
+      A cell of phandles to external controller nodes:
+      0: compatible with "aspeed,ast2500-gfx", "syscon"
+      1: compatible with "aspeed,ast2500-lhc", "syscon"
+
+patternProperties:
+  '^.*$':
+    if:
+      type: object
+    then:
+      patternProperties:
+        "^function|groups$":
+          allOf:
+            - $ref: "/schemas/types.yaml#/definitions/string"
+            - enum: [ "ACPI", "ADC0", "ADC1", "ADC10", "ADC11", "ADC12", "ADC13",
+              "ADC14", "ADC15", "ADC2", "ADC3", "ADC4", "ADC5", "ADC6", "ADC7",
+              "ADC8", "ADC9", "BMCINT", "DDCCLK", "DDCDAT", "ESPI", "FWSPICS1",
+              "FWSPICS2", "GPID0", "GPID2", "GPID4", "GPID6", "GPIE0", "GPIE2",
+              "GPIE4", "GPIE6", "I2C10", "I2C11", "I2C12", "I2C13", "I2C14",
+              "I2C3", "I2C4", "I2C5", "I2C6", "I2C7", "I2C8", "I2C9", "LAD0",
+              "LAD1", "LAD2", "LAD3", "LCLK", "LFRAME", "LPCHC", "LPCPD",
+              "LPCPLUS", "LPCPME", "LPCRST", "LPCSMI", "LSIRQ", "MAC1LINK",
+              "MAC2LINK", "MDIO1", "MDIO2", "NCTS1", "NCTS2", "NCTS3", "NCTS4",
+              "NDCD1", "NDCD2", "NDCD3", "NDCD4", "NDSR1", "NDSR2", "NDSR3",
+              "NDSR4", "NDTR1", "NDTR2", "NDTR3", "NDTR4", "NRI1", "NRI2",
+              "NRI3", "NRI4", "NRTS1", "NRTS2", "NRTS3", "NRTS4", "OSCCLK",
+              "PEWAKE", "PNOR", "PWM0", "PWM1", "PWM2", "PWM3", "PWM4", "PWM5",
+              "PWM6", "PWM7", "RGMII1", "RGMII2", "RMII1", "RMII2", "RXD1",
+              "RXD2", "RXD3", "RXD4", "SALT1", "SALT10", "SALT11", "SALT12",
+              "SALT13", "SALT14", "SALT2", "SALT3", "SALT4", "SALT5", "SALT6",
+              "SALT7", "SALT8", "SALT9", "SCL1", "SCL2", "SD1", "SD2", "SDA1",
+              "SDA2", "SGPS1", "SGPS2", "SIOONCTRL", "SIOPBI", "SIOPBO",
+              "SIOPWREQ", "SIOPWRGD", "SIOS3", "SIOS5", "SIOSCI", "SPI1",
+              "SPI1CS1", "SPI1DEBUG", "SPI1PASSTHRU", "SPI2CK", "SPI2CS0",
+              "SPI2CS1", "SPI2MISO", "SPI2MOSI", "TIMER3", "TIMER4", "TIMER5",
+              "TIMER6", "TIMER7", "TIMER8", "TXD1", "TXD2", "TXD3", "TXD4",
+              "UART6", "USB11BHID", "USB2AD", "USB2AH", "USB2BD", "USB2BH",
+              "USBCKI", "VGABIOSROM", "VGAHS", "VGAVS", "VPI24", "VPO",
+              "WDTRST1", "WDTRST2", ]
+
+required:
+  - compatible
+  - aspeed,external-nodes
+
+examples:
+  - |
+    compatible = "simple-bus";
+    ranges;
+
+    apb {
+        compatible = "simple-bus";
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges;
+
+        syscon: scu@1e6e2000 {
+            compatible = "aspeed,ast2500-scu", "syscon", "simple-mfd";
+            reg = <0x1e6e2000 0x1a8>;
+
+            pinctrl: pinctrl {
+                compatible = "aspeed,g5-pinctrl";
+                aspeed,external-nodes = <&gfx &lhc>;
+
+                pinctrl_i2c3_default: i2c3_default {
+                    function = "I2C3";
+                    groups = "I2C3";
+                };
+
+                pinctrl_gpioh0_unbiased_default: gpioh0 {
+                    pins = "A18";
+                    bias-disable;
+                };
+            };
+        };
+
+        gfx: display@1e6e6000 {
+            compatible = "aspeed,ast2500-gfx", "syscon";
+            reg = <0x1e6e6000 0x1000>;
+        };
+    };
+
+    lpc: lpc@1e789000 {
+        compatible = "aspeed,ast2500-lpc", "simple-mfd";
+        reg = <0x1e789000 0x1000>;
+
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges = <0x0 0x1e789000 0x1000>;
+
+        lpc_host: lpc-host@80 {
+            compatible = "aspeed,ast2500-lpc-host", "simple-mfd", "syscon";
+            reg = <0x80 0x1e0>;
+            reg-io-width = <4>;
+
+            #address-cells = <1>;
+            #size-cells = <1>;
+            ranges = <0x0 0x80 0x1e0>;
+
+            lhc: lhc@20 {
+                   compatible = "aspeed,ast2500-lhc";
+                   reg = <0x20 0x24 0x48 0x8>;
+            };
+        };
+    };
-- 
cgit 


From 5d8cbf7176a427cd904eaa6efdaf6960d717b0de Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 28 Jun 2019 09:20:37 -0300
Subject: docs: extcon: convert it to ReST and move to ACPI dir

The intel-int3496.txt file is a documentation for an ACPI driver.

There's no reason to keep it on a separate directory.

So, instead of keeping it on some random location, move it
to a sub-directory inside the ACPI documentation dir,
renaming it to .rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/extcon/intel-int3496.txt             | 27 ------------------
 .../firmware-guide/acpi/extcon-intel-int3496.rst   | 33 ++++++++++++++++++++++
 Documentation/firmware-guide/acpi/index.rst        |  1 +
 3 files changed, 34 insertions(+), 27 deletions(-)
 delete mode 100644 Documentation/extcon/intel-int3496.txt
 create mode 100644 Documentation/firmware-guide/acpi/extcon-intel-int3496.rst

(limited to 'Documentation')

diff --git a/Documentation/extcon/intel-int3496.txt b/Documentation/extcon/intel-int3496.txt
deleted file mode 100644
index 8155dbc7fad3..000000000000
--- a/Documentation/extcon/intel-int3496.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-Intel INT3496 ACPI device extcon driver documentation
------------------------------------------------------
-
-The Intel INT3496 ACPI device extcon driver is a driver for ACPI
-devices with an acpi-id of INT3496, such as found for example on
-Intel Baytrail and Cherrytrail tablets.
-
-This ACPI device describes how the OS can read the id-pin of the devices'
-USB-otg port, as well as how it optionally can enable Vbus output on the
-otg port and how it can optionally control the muxing of the data pins
-between an USB host and an USB peripheral controller.
-
-The ACPI devices exposes this functionality by returning an array with up
-to 3 gpio descriptors from its ACPI _CRS (Current Resource Settings) call:
-
-Index 0: The input gpio for the id-pin, this is always present and valid
-Index 1: The output gpio for enabling Vbus output from the device to the otg
-         port, write 1 to enable the Vbus output (this gpio descriptor may
-         be absent or invalid)
-Index 2: The output gpio for muxing of the data pins between the USB host and
-         the USB peripheral controller, write 1 to mux to the peripheral
-         controller
-
-There is a mapping between indices and GPIO connection IDs as follows
-	id	index 0
-	vbus	index 1
-	mux	index 2
diff --git a/Documentation/firmware-guide/acpi/extcon-intel-int3496.rst b/Documentation/firmware-guide/acpi/extcon-intel-int3496.rst
new file mode 100644
index 000000000000..5137ca834b54
--- /dev/null
+++ b/Documentation/firmware-guide/acpi/extcon-intel-int3496.rst
@@ -0,0 +1,33 @@
+=====================================================
+Intel INT3496 ACPI device extcon driver documentation
+=====================================================
+
+The Intel INT3496 ACPI device extcon driver is a driver for ACPI
+devices with an acpi-id of INT3496, such as found for example on
+Intel Baytrail and Cherrytrail tablets.
+
+This ACPI device describes how the OS can read the id-pin of the devices'
+USB-otg port, as well as how it optionally can enable Vbus output on the
+otg port and how it can optionally control the muxing of the data pins
+between an USB host and an USB peripheral controller.
+
+The ACPI devices exposes this functionality by returning an array with up
+to 3 gpio descriptors from its ACPI _CRS (Current Resource Settings) call:
+
+=======  =====================================================================
+Index 0  The input gpio for the id-pin, this is always present and valid
+Index 1  The output gpio for enabling Vbus output from the device to the otg
+         port, write 1 to enable the Vbus output (this gpio descriptor may
+         be absent or invalid)
+Index 2  The output gpio for muxing of the data pins between the USB host and
+         the USB peripheral controller, write 1 to mux to the peripheral
+         controller
+=======  =====================================================================
+
+There is a mapping between indices and GPIO connection IDs as follows
+
+	======= =======
+	id	index 0
+	vbus	index 1
+	mux	index 2
+	======= =======
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index ae609eec4679..90c90d42d9ad 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -24,3 +24,4 @@ ACPI Support
    acpi-lid
    lpit
    video_extension
+   extcon-intel-int3496
-- 
cgit 


From 5004efbb3611f0672af6c92823d17100603bcccd Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 1 Jul 2019 15:58:27 +0200
Subject: Documentation: ABI: power: Add missing newline at end of file

"git diff" says:

    \ No newline at end of file

after modifying the files.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/ABI/testing/sysfs-power | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index 18b7dc929234..3c5130355011 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -300,4 +300,4 @@ Description:
 		attempt.
 
 		Using this sysfs file will override any values that were
-		set using the kernel command line for disk offset.
\ No newline at end of file
+		set using the kernel command line for disk offset.
-- 
cgit 


From 094380ea2bf9f0fa7d63e67bf500b8c77e8d1910 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Wed, 3 Jul 2019 14:07:48 +0200
Subject: ASoC: meson: axg-tdm-formatter: add reset to the bindings
 documentation

Add an optional reset property to the tdm formatter bindings. The
dedicated reset line is present on some SoC families, such as the g12a.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://lore.kernel.org/r/20190703120749.32341-2-jbrunet@baylibre.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/sound/amlogic,axg-tdm-formatters.txt        | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/sound/amlogic,axg-tdm-formatters.txt b/Documentation/devicetree/bindings/sound/amlogic,axg-tdm-formatters.txt
index 3b94a715a0b9..8835a43edfbb 100644
--- a/Documentation/devicetree/bindings/sound/amlogic,axg-tdm-formatters.txt
+++ b/Documentation/devicetree/bindings/sound/amlogic,axg-tdm-formatters.txt
@@ -15,11 +15,15 @@ Required properties:
   * "lrclk"    : sample clock
   * "lrclk_sel": sample clock input multiplexer
 
-Example of TDMOUT_A on the A113 SoC:
+Optional property:
+- resets: phandle to the dedicated reset line of the tdm formatter.
+
+Example of TDMOUT_A on the S905X2 SoC:
 
 tdmout_a: audio-controller@500 {
 	compatible = "amlogic,axg-tdmout";
 	reg = <0x0 0x500 0x0 0x40>;
+	resets = <&clkc_audio AUD_RESET_TDMOUT_A>;
 	clocks = <&clkc_audio AUD_CLKID_TDMOUT_A>,
 		 <&clkc_audio AUD_CLKID_TDMOUT_A_SCLK>,
 		 <&clkc_audio AUD_CLKID_TDMOUT_A_SCLK_SEL>,
-- 
cgit 


From ba07e3aacc08de2e6f6cd199c3618fbbc2c45f0c Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@mellanox.com>
Date: Sun, 23 Jun 2019 12:16:29 +0000
Subject: Documentation/ABI: Add new attribute for mlxreg-io sysfs interfaces

Add documentation for the new attributes for exposing reset causes for
the next type of resets: caused by different watchdog, BIOS reload,
ASIC reset request.

Signed-off-by: Vadim Pasternak <vadimp@mellanox.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 Documentation/ABI/stable/sysfs-driver-mlxreg-io | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/ABI/stable/sysfs-driver-mlxreg-io b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
index 156319fc5b80..28ca00cc8d6b 100644
--- a/Documentation/ABI/stable/sysfs-driver-mlxreg-io
+++ b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
@@ -127,3 +127,23 @@ Description:	These files show the system reset cause, as following: ComEx
 		the last reset cause.
 
 		The files are read only.
+
+Date:		June 2019
+KernelVersion:	5.3
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	These files show the system reset cause, as following:
+		COMEX thermal shutdown; wathchdog power off or reset was derived
+		by one of the next components: COMEX, switch board or by Small Form
+		Factor mezzanine, reset requested from ASIC, reset cuased by BIOS
+		reload. Value 1 in file means this is reset cause, 0 - otherwise.
+		Only one of the above causes could be 1 at the same time, representing
+		only last reset cause.
+
+		The files are read only.
+
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_comex_thermal
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_comex_wd
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_from_asic
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_reload_bios
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_sff_wd
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_swb_wd
-- 
cgit 


From 5a35316d97914d56d6be8b3748b2437785e74790 Mon Sep 17 00:00:00 2001
From: "Hook, Gary" <Gary.Hook@amd.com>
Date: Tue, 25 Jun 2019 23:43:43 +0000
Subject: crypto: doc - Add parameter documentation

Fill in missing parameter descriptions for the compression algorithm,
then pick them up to document for the compression_alg structure.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/crypto/api-skcipher.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/crypto/api-skcipher.rst b/Documentation/crypto/api-skcipher.rst
index 4eec4a93f7e3..20ba08dddf2e 100644
--- a/Documentation/crypto/api-skcipher.rst
+++ b/Documentation/crypto/api-skcipher.rst
@@ -5,7 +5,7 @@ Block Cipher Algorithm Definitions
    :doc: Block Cipher Algorithm Definitions
 
 .. kernel-doc:: include/linux/crypto.h
-   :functions: crypto_alg ablkcipher_alg blkcipher_alg cipher_alg
+   :functions: crypto_alg ablkcipher_alg blkcipher_alg cipher_alg compress_alg
 
 Symmetric Key Cipher API
 ------------------------
-- 
cgit 


From ae400be94b84206e8344e8f10d896aeb3ea2f505 Mon Sep 17 00:00:00 2001
From: "Hook, Gary" <Gary.Hook@amd.com>
Date: Tue, 25 Jun 2019 23:43:50 +0000
Subject: crypto: doc - Fix formatting of new crypto engine content

Tidy up the formatting/grammar in crypto_engine.rst. Use bulleted lists
where appropriate.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/crypto/crypto_engine.rst | 111 ++++++++++++++++++++++-----------
 1 file changed, 73 insertions(+), 38 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/crypto/crypto_engine.rst b/Documentation/crypto/crypto_engine.rst
index 1d56221dfe35..236c674d6897 100644
--- a/Documentation/crypto/crypto_engine.rst
+++ b/Documentation/crypto/crypto_engine.rst
@@ -1,50 +1,85 @@
-=============
-CRYPTO ENGINE
+.. SPDX-License-Identifier: GPL-2.0
+Crypto Engine
 =============
 
 Overview
 --------
-The crypto engine API (CE), is a crypto queue manager.
+The crypto engine (CE) API is a crypto queue manager.
 
 Requirement
 -----------
-You have to put at start of your tfm_ctx the struct crypto_engine_ctx::
+You must put, at the start of your transform context your_tfm_ctx, the structure
+crypto_engine:
+
+::
 
-  struct your_tfm_ctx {
-        struct crypto_engine_ctx enginectx;
-        ...
-  };
+	struct your_tfm_ctx {
+		struct crypto_engine engine;
+		...
+	};
 
-Why: Since CE manage only crypto_async_request, it cannot know the underlying
-request_type and so have access only on the TFM.
-So using container_of for accessing __ctx is impossible.
-Furthermore, the crypto engine cannot know the "struct your_tfm_ctx",
-so it must assume that crypto_engine_ctx is at start of it.
+The crypto engine only manages asynchronous requests in the form of
+crypto_async_request. It cannot know the underlying request type and thus only
+has access to the transform structure. It is not possible to access the context
+using container_of. In addition, the engine knows nothing about your
+structure "``struct your_tfm_ctx``". The engine assumes (requires) the placement
+of the known member ``struct crypto_engine`` at the beginning.
 
 Order of operations
 -------------------
-You have to obtain a struct crypto_engine via crypto_engine_alloc_init().
-And start it via crypto_engine_start().
-
-Before transferring any request, you have to fill the enginectx.
-- prepare_request: (taking a function pointer) If you need to do some processing before doing the request
-- unprepare_request: (taking a function pointer) Undoing what's done in prepare_request
-- do_one_request: (taking a function pointer) Do encryption for current request
-
-Note: that those three functions get the crypto_async_request associated with the received request.
-So your need to get the original request via container_of(areq, struct yourrequesttype_request, base);
-
-When your driver receive a crypto_request, you have to transfer it to
-the cryptoengine via one of:
-- crypto_transfer_ablkcipher_request_to_engine()
-- crypto_transfer_aead_request_to_engine()
-- crypto_transfer_akcipher_request_to_engine()
-- crypto_transfer_hash_request_to_engine()
-- crypto_transfer_skcipher_request_to_engine()
-
-At the end of the request process, a call to one of the following function is needed:
-- crypto_finalize_ablkcipher_request
-- crypto_finalize_aead_request
-- crypto_finalize_akcipher_request
-- crypto_finalize_hash_request
-- crypto_finalize_skcipher_request
+You are required to obtain a struct crypto_engine via ``crypto_engine_alloc_init()``.
+Start it via ``crypto_engine_start()``. When finished with your work, shut down the
+engine using ``crypto_engine_stop()`` and destroy the engine with
+``crypto_engine_exit()``.
+
+Before transferring any request, you have to fill the context enginectx by
+providing functions for the following:
+
+* ``prepare_crypt_hardware``: Called once before any prepare functions are
+  called.
+
+* ``unprepare_crypt_hardware``: Called once after all unprepare functions have
+  been called.
+
+* ``prepare_cipher_request``/``prepare_hash_request``: Called before each
+  corresponding request is performed. If some processing or other preparatory
+  work is required, do it here.
+
+* ``unprepare_cipher_request``/``unprepare_hash_request``: Called after each
+  request is handled. Clean up / undo what was done in the prepare function.
+
+* ``cipher_one_request``/``hash_one_request``: Handle the current request by
+  performing the operation.
+
+Note that these functions access the crypto_async_request structure
+associated with the received request. You are able to retrieve the original
+request by using:
+
+::
+
+	container_of(areq, struct yourrequesttype_request, base);
+
+When your driver receives a crypto_request, you must to transfer it to
+the crypto engine via one of:
+
+* crypto_transfer_ablkcipher_request_to_engine()
+
+* crypto_transfer_aead_request_to_engine()
+
+* crypto_transfer_akcipher_request_to_engine()
+
+* crypto_transfer_hash_request_to_engine()
+
+* crypto_transfer_skcipher_request_to_engine()
+
+At the end of the request process, a call to one of the following functions is needed:
+
+* crypto_finalize_ablkcipher_request()
+
+* crypto_finalize_aead_request()
+
+* crypto_finalize_akcipher_request()
+
+* crypto_finalize_hash_request()
+
+* crypto_finalize_skcipher_request()
-- 
cgit 


From 049331f277fef1c3f2527c2c9afa1d285e9a1247 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 3 Jul 2019 14:19:36 +0200
Subject: x86/fsgsbase: Revert FSGSBASE support

The FSGSBASE series turned out to have serious bugs and there is still an
open issue which is not fully understood yet.

The confidence in those changes has become close to zero especially as the
test cases which have been shipped with that series were obviously never
run before sending the final series out to LKML.

  ./fsgsbase_64 >/dev/null
  Segmentation fault

As the merge window is close, the only sane decision is to revert FSGSBASE
support. The revert is necessary as this branch has been merged into
perf/core already and rebasing all of that a few days before the merge
window is not the most brilliant idea.

I could definitely slap myself for not noticing the test case fail when
merging that series, but TBH my expectations weren't that low back
then. Won't happen again.

Revert the following commits:
539bca535dec ("x86/entry/64: Fix and clean up paranoid_exit")
2c7b5ac5d5a9 ("Documentation/x86/64: Add documentation for GS/FS addressing mode")
f987c955c745 ("x86/elf: Enumerate kernel FSGSBASE capability in AT_HWCAP2")
2032f1f96ee0 ("x86/cpu: Enable FSGSBASE on 64bit by default and add a chicken bit")
5bf0cab60ee2 ("x86/entry/64: Document GSBASE handling in the paranoid path")
708078f65721 ("x86/entry/64: Handle FSGSBASE enabled paranoid entry/exit")
79e1932fa3ce ("x86/entry/64: Introduce the FIND_PERCPU_BASE macro")
1d07316b1363 ("x86/entry/64: Switch CR3 before SWAPGS in paranoid entry")
f60a83df4593 ("x86/process/64: Use FSGSBASE instructions on thread copy and ptrace")
1ab5f3f7fe3d ("x86/process/64: Use FSBSBASE in switch_to() if available")
a86b4625138d ("x86/fsgsbase/64: Enable FSGSBASE instructions in helper functions")
8b71340d702e ("x86/fsgsbase/64: Add intrinsics for FSGSBASE instructions")
b64ed19b93c3 ("x86/cpu: Add 'unsafe_fsgsbase' to enable CR4.FSGSBASE")

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Chang S. Bae <chang.seok.bae@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ravi Shankar <ravi.v.shankar@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
---
 Documentation/admin-guide/kernel-parameters.txt |   2 -
 Documentation/x86/entry_64.rst                  |   9 --
 Documentation/x86/x86_64/fsgs.rst               | 199 ------------------------
 Documentation/x86/x86_64/index.rst              |   1 -
 4 files changed, 211 deletions(-)
 delete mode 100644 Documentation/x86/x86_64/fsgs.rst

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 35bc3c3574c6..138f6664b2e2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2857,8 +2857,6 @@
 	no5lvl		[X86-64] Disable 5-level paging mode. Forces
 			kernel to use 4-level paging instead.
 
-	nofsgsbase	[X86] Disables FSGSBASE instructions.
-
 	no_console_suspend
 			[HW] Never suspend the console
 			Disable suspending of consoles during suspend and
diff --git a/Documentation/x86/entry_64.rst b/Documentation/x86/entry_64.rst
index b87c1d816aea..a48b3f6ebbe8 100644
--- a/Documentation/x86/entry_64.rst
+++ b/Documentation/x86/entry_64.rst
@@ -108,12 +108,3 @@ We try to only use IST entries and the paranoid entry code for vectors
 that absolutely need the more expensive check for the GS base - and we
 generate all 'normal' entry points with the regular (faster) paranoid=0
 variant.
-
-On a FSGSBASE system, however, user space can set GS without kernel
-interaction. It means the value of GS base itself does not imply anything,
-whether a kernel value or a user space value. So, there is no longer a safe
-way to check whether the exception is entering from user mode or kernel
-mode in the paranoid entry code path. So the GSBASE value needs to be read
-out, saved and the kernel GSBASE value written. On exit the saved GSBASE
-value needs to be restored unconditionally. The non paranoid entry/exit
-code still uses SWAPGS unconditionally as the state is known.
diff --git a/Documentation/x86/x86_64/fsgs.rst b/Documentation/x86/x86_64/fsgs.rst
deleted file mode 100644
index 380c0b5ccca2..000000000000
--- a/Documentation/x86/x86_64/fsgs.rst
+++ /dev/null
@@ -1,199 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-Using FS and GS segments in user space applications
-===================================================
-
-The x86 architecture supports segmentation. Instructions which access
-memory can use segment register based addressing mode. The following
-notation is used to address a byte within a segment:
-
-  Segment-register:Byte-address
-
-The segment base address is added to the Byte-address to compute the
-resulting virtual address which is accessed. This allows to access multiple
-instances of data with the identical Byte-address, i.e. the same code. The
-selection of a particular instance is purely based on the base-address in
-the segment register.
-
-In 32-bit mode the CPU provides 6 segments, which also support segment
-limits. The limits can be used to enforce address space protections.
-
-In 64-bit mode the CS/SS/DS/ES segments are ignored and the base address is
-always 0 to provide a full 64bit address space. The FS and GS segments are
-still functional in 64-bit mode.
-
-Common FS and GS usage
-------------------------------
-
-The FS segment is commonly used to address Thread Local Storage (TLS). FS
-is usually managed by runtime code or a threading library. Variables
-declared with the '__thread' storage class specifier are instantiated per
-thread and the compiler emits the FS: address prefix for accesses to these
-variables. Each thread has its own FS base address so common code can be
-used without complex address offset calculations to access the per thread
-instances. Applications should not use FS for other purposes when they use
-runtimes or threading libraries which manage the per thread FS.
-
-The GS segment has no common use and can be used freely by
-applications. GCC and Clang support GS based addressing via address space
-identifiers.
-
-Reading and writing the FS/GS base address
-------------------------------------------
-
-There exist two mechanisms to read and write the FS/FS base address:
-
- - the arch_prctl() system call
-
- - the FSGSBASE instruction family
-
-Accessing FS/GS base with arch_prctl()
---------------------------------------
-
- The arch_prctl(2) based mechanism is available on all 64bit CPUs and all
- kernel versions.
-
- Reading the base:
-
-   arch_prctl(ARCH_GET_FS, &fsbase);
-   arch_prctl(ARCH_GET_GS, &gsbase);
-
- Writing the base:
-
-   arch_prctl(ARCH_SET_FS, fsbase);
-   arch_prctl(ARCH_SET_GS, gsbase);
-
- The ARCH_SET_GS prctl may be disabled depending on kernel configuration
- and security settings.
-
-Accessing FS/GS base with the FSGSBASE instructions
----------------------------------------------------
-
- With the Ivy Bridge CPU generation Intel introduced a new set of
- instructions to access the FS and GS base registers directly from user
- space. These instructions are also supported on AMD Family 17H CPUs. The
- following instructions are available:
-
-  =============== ===========================
-  RDFSBASE %reg   Read the FS base register
-  RDGSBASE %reg   Read the GS base register
-  WRFSBASE %reg   Write the FS base register
-  WRGSBASE %reg   Write the GS base register
-  =============== ===========================
-
- The instructions avoid the overhead of the arch_prctl() syscall and allow
- more flexible usage of the FS/GS addressing modes in user space
- applications. This does not prevent conflicts between threading libraries
- and runtimes which utilize FS and applications which want to use it for
- their own purpose.
-
-FSGSBASE instructions enablement
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- The instructions are enumerated in CPUID leaf 7, bit 0 of EBX. If
- available /proc/cpuinfo shows 'fsgsbase' in the flag entry of the CPUs.
-
- The availability of the instructions does not enable them
- automatically. The kernel has to enable them explicitly in CR4. The
- reason for this is that older kernels make assumptions about the values in
- the GS register and enforce them when GS base is set via
- arch_prctl(). Allowing user space to write arbitrary values to GS base
- would violate these assumptions and cause malfunction.
-
- On kernels which do not enable FSGSBASE the execution of the FSGSBASE
- instructions will fault with a #UD exception.
-
- The kernel provides reliable information about the enabled state in the
- ELF AUX vector. If the HWCAP2_FSGSBASE bit is set in the AUX vector, the
- kernel has FSGSBASE instructions enabled and applications can use them.
- The following code example shows how this detection works::
-
-   #include <sys/auxv.h>
-   #include <elf.h>
-
-   /* Will be eventually in asm/hwcap.h */
-   #ifndef HWCAP2_FSGSBASE
-   #define HWCAP2_FSGSBASE        (1 << 1)
-   #endif
-
-   ....
-
-   unsigned val = getauxval(AT_HWCAP2);
-
-   if (val & HWCAP2_FSGSBASE)
-        printf("FSGSBASE enabled\n");
-
-FSGSBASE instructions compiler support
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-GCC version 4.6.4 and newer provide instrinsics for the FSGSBASE
-instructions. Clang supports them as well.
-
-  =================== ===========================
-  _readfsbase_u64()   Read the FS base register
-  _readfsbase_u64()   Read the GS base register
-  _writefsbase_u64()  Write the FS base register
-  _writegsbase_u64()  Write the GS base register
-  =================== ===========================
-
-To utilize these instrinsics <immintrin.h> must be included in the source
-code and the compiler option -mfsgsbase has to be added.
-
-Compiler support for FS/GS based addressing
--------------------------------------------
-
-GCC version 6 and newer provide support for FS/GS based addressing via
-Named Address Spaces. GCC implements the following address space
-identifiers for x86:
-
-  ========= ====================================
-  __seg_fs  Variable is addressed relative to FS
-  __seg_gs  Variable is addressed relative to GS
-  ========= ====================================
-
-The preprocessor symbols __SEG_FS and __SEG_GS are defined when these
-address spaces are supported. Code which implements fallback modes should
-check whether these symbols are defined. Usage example::
-
-  #ifdef __SEG_GS
-
-  long data0 = 0;
-  long data1 = 1;
-
-  long __seg_gs *ptr;
-
-  /* Check whether FSGSBASE is enabled by the kernel (HWCAP2_FSGSBASE) */
-  ....
-
-  /* Set GS to point to data0 */
-  _writegsbase_u64(&data0);
-
-  /* Access offset 0 of GS */
-  ptr = 0;
-  printf("data0 = %ld\n", *ptr);
-
-  /* Set GS to point to data1 */
-  _writegsbase_u64(&data1);
-  /* ptr still addresses offset 0! */
-  printf("data1 = %ld\n", *ptr);
-
-
-Clang does not provide the GCC address space identifiers, but it provides
-address spaces via an attribute based mechanism in Clang 5 and newer
-versions:
-
- ==================================== =====================================
-  __attribute__((address_space(256))  Variable is addressed relative to GS
-  __attribute__((address_space(257))  Variable is addressed relative to FS
- ==================================== =====================================
-
-FS/GS based addressing with inline assembly
--------------------------------------------
-
-In case the compiler does not support address spaces, inline assembly can
-be used for FS/GS based addressing mode::
-
-	mov %fs:offset, %reg
-	mov %gs:offset, %reg
-
-	mov %reg, %fs:offset
-	mov %reg, %gs:offset
diff --git a/Documentation/x86/x86_64/index.rst b/Documentation/x86/x86_64/index.rst
index a56070fc8e77..d6eaaa5a35fc 100644
--- a/Documentation/x86/x86_64/index.rst
+++ b/Documentation/x86/x86_64/index.rst
@@ -14,4 +14,3 @@ x86_64 Support
    fake-numa-for-cpusets
    cpu-hotplug-spec
    machinecheck
-   fsgs
-- 
cgit 


From ecd6bf67da3126e8ec731c2dd8cb6c2f17d9563a Mon Sep 17 00:00:00 2001
From: Mark Greer <mgreer@animalcreek.com>
Date: Wed, 26 Jun 2019 09:05:53 -0700
Subject: serial: mpsc: Remove obsolete MPSC driver

Support for the Marvell MV64x60 line of bridge chips that contained
MPSC controllers has been removed and there are no other components
that have that controller so remove its driver.

Signed-off-by: Mark Greer <mgreer@animalcreek.com>
Link: https://lore.kernel.org/r/20190626160553.28518-1-mgreer@animalcreek.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/admin-guide/devices.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/devices.txt b/Documentation/admin-guide/devices.txt
index 1649117e6087..e56e00655153 100644
--- a/Documentation/admin-guide/devices.txt
+++ b/Documentation/admin-guide/devices.txt
@@ -2693,8 +2693,8 @@
 		 41 = /dev/ttySMX0		Motorola i.MX - port 0
 		 42 = /dev/ttySMX1		Motorola i.MX - port 1
 		 43 = /dev/ttySMX2		Motorola i.MX - port 2
-		 44 = /dev/ttyMM0		Marvell MPSC - port 0
-		 45 = /dev/ttyMM1		Marvell MPSC - port 1
+		 44 = /dev/ttyMM0		Marvell MPSC - port 0 (obsolete unused)
+		 45 = /dev/ttyMM1		Marvell MPSC - port 1 (obsolete unused)
 		 46 = /dev/ttyCPM0		PPC CPM (SCC or SMC) - port 0
 		    ...
 		 47 = /dev/ttyCPM5		PPC CPM (SCC or SMC) - port 5
-- 
cgit 


From 4c2c04c27df9d2e48153e0bf8a59aec97101fb8c Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Wed, 3 Jul 2019 11:55:07 +0200
Subject: dt-bindings: net: mdio: Add a nodename pattern

The node name of an MDIO controller should be MDIO. Let's add a rule for
this.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/net/mdio.yaml | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml
index b8fa8251c4bc..12069d0b25c6 100644
--- a/Documentation/devicetree/bindings/net/mdio.yaml
+++ b/Documentation/devicetree/bindings/net/mdio.yaml
@@ -18,6 +18,9 @@ description:
   a device specific binding document.
 
 properties:
+  $nodename:
+    pattern: "^mdio(@.*)?"
+
   reset-gpios:
     maxItems: 1
     description:
-- 
cgit 


From 960ebc8ac65e9eb9605f5cbee9fc189fed7c052f Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Wed, 3 Jul 2019 11:55:08 +0200
Subject: dt-bindings: net: mdio: Add address and size cells

MDIO controllers should have child nodes with a singe value in their reg
properties being the ID of the PHY in the bus.

Let's add the proper constraints on #address-cells and #size-cells.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/net/mdio.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml
index 12069d0b25c6..24d67074d494 100644
--- a/Documentation/devicetree/bindings/net/mdio.yaml
+++ b/Documentation/devicetree/bindings/net/mdio.yaml
@@ -21,6 +21,12 @@ properties:
   $nodename:
     pattern: "^mdio(@.*)?"
 
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
   reset-gpios:
     maxItems: 1
     description:
-- 
cgit 


From 5b19b6c31c21d33fde74f3bf3c2a3ae3d010f114 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Wed, 3 Jul 2019 11:55:09 +0200
Subject: dt-bindings: net: mdio: Add child nodes

The child nodes of a mdio bus are supposed to be ethernet PHYs, with a reg
property. Make sure that's validated as well.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/net/mdio.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml
index 24d67074d494..5d08d2ffd4eb 100644
--- a/Documentation/devicetree/bindings/net/mdio.yaml
+++ b/Documentation/devicetree/bindings/net/mdio.yaml
@@ -39,6 +39,20 @@ properties:
       and must therefore be appropriately determined based on all PHY
       requirements (maximum value of all per-PHY RESET pulse widths).
 
+patternProperties:
+  "^ethernet-phy@[0-9a-f]+$":
+    type: object
+
+    properties:
+      reg:
+        minimum: 0
+        maximum: 31
+        description:
+          The ID number for the PHY.
+
+    required:
+      - reg
+
 examples:
   - |
     davinci_mdio: mdio@5c030000 {
-- 
cgit 


From f4913aeed73fd917782043db36b362c8f5686128 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Wed, 3 Jul 2019 11:55:10 +0200
Subject: dt-bindings: display: Fix simple-framebuffer example

The simple-framebuffer binding has a compatible that isn't one of the valid
options. Since an Allwinner pipeline is being described, let's add the
matching compatible.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/display/simple-framebuffer.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
index b052d76cf8b6..c8d73ef010b5 100644
--- a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
+++ b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
@@ -139,7 +139,7 @@ examples:
       #size-cells = <1>;
       stdout-path = "display0";
       framebuffer0: framebuffer@1d385000 {
-        compatible = "simple-framebuffer";
+        compatible = "allwinner,simple-framebuffer", "simple-framebuffer";
         reg = <0x1d385000 3840000>;
         width = <1600>;
         height = <1200>;
-- 
cgit 


From ed8e3f51b6b7c0565840683328ce6e4b830cea9f Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Wed, 3 Jul 2019 11:55:11 +0200
Subject: dt-bindings: simple-framebuffer: Add requirement for pipelines

Both the allwinner and amlogic compatibles require that either the
allwinner,pipeline or the amlogic,pipeline property is set. This was
dropped during the conversion since we didn't have conditionals back then,
but we can express this properly now.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../bindings/display/simple-framebuffer.yaml       | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
index c8d73ef010b5..678776b6012a 100644
--- a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
+++ b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
@@ -126,6 +126,28 @@ required:
   # but usually they will be filled by the bootloader.
   - compatible
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: allwinner,simple-framebuffer
+
+    then:
+      required:
+        - allwinner,pipeline
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: amlogic,simple-framebuffer
+
+    then:
+      required:
+        - amlogic,pipeline
+
+
 additionalProperties: false
 
 examples:
@@ -140,6 +162,7 @@ examples:
       stdout-path = "display0";
       framebuffer0: framebuffer@1d385000 {
         compatible = "allwinner,simple-framebuffer", "simple-framebuffer";
+        allwinner,pipeline = "de_be0-lcd0";
         reg = <0x1d385000 3840000>;
         width = <1600>;
         height = <1200>;
-- 
cgit 


From 67d0da99163f99eb39a931e79b128cbf0647dfdb Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Wed, 3 Jul 2019 11:55:12 +0200
Subject: dt-bindings: net: Use phy-mode instead of phy-connection-type

When adding support for the DWMAC (and derivatives) bindings,
phy-connection-type was required, even though the previous binding required
the equivalent phy-mode.

Let's fix this by using phy-mode as we should have.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml         | 4 ++--
 .../devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml        | 8 ++++----
 Documentation/devicetree/bindings/net/snps,dwmac.yaml             | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
index 4b45798f0e68..06b1cc8bea14 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
@@ -44,7 +44,7 @@ required:
   - interrupt-names
   - clocks
   - clock-names
-  - phy-connection-type
+  - phy-mode
 
 examples:
   - |
@@ -55,7 +55,7 @@ examples:
         interrupt-names = "macirq";
         clocks = <&ahb_gates 49>, <&gmac_tx>;
         clock-names = "stmmaceth", "allwinner_gmac_tx";
-        phy-connection-type = "mii";
+        phy-mode = "mii";
     };
 
 # FIXME: We should set it, but it would report all the generic
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
index 6f68c7f5fc34..d4084c149768 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
@@ -52,8 +52,8 @@ required:
   - clock-names
   - resets
   - reset-names
-  - phy-connection-type
   - phy-handle
+  - phy-mode
   - syscon
 
 allOf:
@@ -198,7 +198,7 @@ examples:
         clock-names = "stmmaceth";
 
         phy-handle = <&int_mii_phy>;
-        phy-connection-type = "mii";
+        phy-mode = "mii";
         allwinner,leds-active-low;
 
         mdio1: mdio {
@@ -249,7 +249,7 @@ examples:
         clock-names = "stmmaceth";
 
         phy-handle = <&ext_rgmii_phy>;
-        phy-connection-type = "rgmii";
+        phy-mode = "rgmii";
         allwinner,leds-active-low;
 
         mdio2: mdio {
@@ -301,7 +301,7 @@ examples:
         clocks = <&ccu 27>;
         clock-names = "stmmaceth";
         phy-handle = <&ext_rgmii_phy1>;
-        phy-connection-type = "rgmii";
+        phy-mode = "rgmii";
 
         mdio {
             compatible = "snps,dwmac-mdio";
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 658726c620be..76fea2be66ac 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -261,7 +261,7 @@ required:
   - reg
   - interrupts
   - interrupt-names
-  - phy-connection-type
+  - phy-mode
 
 dependencies:
   snps,reset-active-low: ["snps,reset-gpio"]
@@ -384,7 +384,7 @@ examples:
         interrupt-names = "macirq", "eth_wake_irq", "eth_lpi";
         mac-address = [000000000000]; /* Filled in by U-Boot */
         max-frame-size = <3800>;
-        phy-connection-type = "gmii";
+        phy-mode = "gmii";
         snps,multicast-filter-bins = <256>;
         snps,perfect-filter-entries = <128>;
         rx-fifo-depth = <16384>;
-- 
cgit 


From 5ff88144f5880f7fafcb6a8e913eadca4cb233c8 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Wed, 3 Jul 2019 11:55:13 +0200
Subject: dt-bindings: usb: ehci: Fix example warnings

The example of the EHCI binding generates a bunch of warnings now that the
examples are validated too.

Most notably, phy-names isn't used at all, and the node name should be USB.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/usb/generic-ehci.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/usb/generic-ehci.yaml b/Documentation/devicetree/bindings/usb/generic-ehci.yaml
index d3b4f6415920..059f6ef1ad4a 100644
--- a/Documentation/devicetree/bindings/usb/generic-ehci.yaml
+++ b/Documentation/devicetree/bindings/usb/generic-ehci.yaml
@@ -74,7 +74,7 @@ additionalProperties: false
 
 examples:
   - |
-    ehci@e0000300 {
+    usb@e0000300 {
         compatible = "ibm,usb-ehci-440epx", "generic-ehci";
         interrupt-parent = <&UIC0>;
         interrupts = <0x1a 4>;
@@ -89,7 +89,6 @@ examples:
         interrupts = <39>;
         clocks = <&ahb_gates 1>;
         phys = <&usbphy 1>;
-        phy-names = "usb";
     };
 
 ...
-- 
cgit 


From f85d93385e9fe6886a751f647f6812a89bf6bee3 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Thu, 23 May 2019 10:45:48 -0400
Subject: locks: Cleanup lm_compare_owner and lm_owner_key

After the update to use nlm_lockowners for the NLM server, there are no
more users of lm_compare_owner and lm_owner_key.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 Documentation/filesystems/Locking | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index dac435575384..204dd3ea36bb 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -361,8 +361,6 @@ so fl_release_private called on a lease should not block.
 
 ----------------------- lock_manager_operations ---------------------------
 prototypes:
-	int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
-	unsigned long (*lm_owner_key)(struct file_lock *);
 	void (*lm_notify)(struct file_lock *);  /* unblock callback */
 	int (*lm_grant)(struct file_lock *, struct file_lock *, int);
 	void (*lm_break)(struct file_lock *); /* break_lease callback */
@@ -371,23 +369,11 @@ prototypes:
 locking rules:
 
 			inode->i_lock	blocked_lock_lock	may block
-lm_compare_owner:	yes[1]		maybe			no
-lm_owner_key		yes[1]		yes			no
 lm_notify:		yes		yes			no
 lm_grant:		no		no			no
 lm_break:		yes		no			no
 lm_change		yes		no			no
 
-[1]:	->lm_compare_owner and ->lm_owner_key are generally called with
-*an* inode->i_lock held. It may not be the i_lock of the inode
-associated with either file_lock argument! This is the case with deadlock
-detection, since the code has to chase down the owners of locks that may
-be entirely unrelated to the one on which the lock is being acquired.
-For deadlock detection however, the blocked_lock_lock is also held. The
-fact that these locks are held ensures that the file_locks do not
-disappear out from under you while doing the comparison or generating an
-owner key.
-
 --------------------------- buffer_head -----------------------------------
 prototypes:
 	void (*b_end_io)(struct buffer_head *bh, int uptodate);
-- 
cgit 


From 01f14c52591dd9028b93d0641136a34b388b773d Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 1 Jul 2019 16:10:05 +0200
Subject: Documentation: gpio: Fix reference to gpiod_get_array()

The function is called gpiod_get_array(), not gpiod_array_get().

Fixes: 77588c14ac868cae ("gpiolib: Pass array info to get/set array functions")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20190701141005.24631-1-geert+renesas@glider.be
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/driver-api/gpio/consumer.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst
index 23d68c321c5c..9559aa3cbcef 100644
--- a/Documentation/driver-api/gpio/consumer.rst
+++ b/Documentation/driver-api/gpio/consumer.rst
@@ -364,7 +364,7 @@ accessed sequentially.
 The functions take three arguments:
 	* array_size	- the number of array elements
 	* desc_array	- an array of GPIO descriptors
-	* array_info	- optional information obtained from gpiod_array_get()
+	* array_info	- optional information obtained from gpiod_get_array()
 	* value_bitmap	- a bitmap to store the GPIOs' values (get) or
 			  a bitmap of values to assign to the GPIOs (set)
 
-- 
cgit 


From 80327437e377bd4d3f696ccec94b78895e6359f9 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Tue, 2 Jul 2019 16:20:43 +0530
Subject: dt-bindings: pinctrl: qcom: Document missing gpio nodes

The bindings for msm8998-pinctrl was missing gpio-ranges and
gpio-reserved-ranges, so document them as well

Signed-off-by: Vinod Koul <vkoul@kernel.org>
Link: https://lore.kernel.org/r/20190702105045.27646-2-vkoul@kernel.org
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../devicetree/bindings/pinctrl/qcom,msm8998-pinctrl.txt       | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8998-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8998-pinctrl.txt
index 00174f08ba1d..cdec1eeb2799 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,msm8998-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8998-pinctrl.txt
@@ -40,6 +40,14 @@ MSM8998 platform.
 	Definition: must be 2. Specifying the pin number and flags, as defined
 		    in <dt-bindings/gpio/gpio.h>
 
+- gpio-ranges:
+	Usage: required
+	Definition:  see ../gpio/gpio.txt
+
+- gpio-reserved-ranges:
+	Usage: optional
+	Definition: see ../gpio/gpio.txt
+
 Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for
 a general description of GPIO and interrupt bindings.
 
@@ -175,6 +183,8 @@ Example:
 		interrupts = <0 208 0>;
 		gpio-controller;
 		#gpio-cells = <2>;
+		gpio-ranges = <&tlmm 0 0 175>;
+		gpio-reserved-ranges = <0 4>, <81 4>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
 
-- 
cgit 


From 7f1fee22a2bb4073ddd2f4b06b6710a561ef3815 Mon Sep 17 00:00:00 2001
From: Prasad Sodagudi <psodagud@codeaurora.org>
Date: Tue, 2 Jul 2019 16:20:44 +0530
Subject: dt-bindings: pinctrl: qcom: Add SM8150 pinctrl binding

Add the binding for the TLMM pinctrl block found in the SM8150 platform.

Signed-off-by: Prasad Sodagudi <psodagud@codeaurora.org>
Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
[vkoul: add missing nodes of gpio range and reserved
	rewrote function names and order them]
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Link: https://lore.kernel.org/r/20190702105045.27646-3-vkoul@kernel.org
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../bindings/pinctrl/qcom,sm8150-pinctrl.txt       | 190 +++++++++++++++++++++
 1 file changed, 190 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pinctrl/qcom,sm8150-pinctrl.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm8150-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,sm8150-pinctrl.txt
new file mode 100644
index 000000000000..fa37733e5102
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm8150-pinctrl.txt
@@ -0,0 +1,190 @@
+Qualcomm SM8150 TLMM block
+
+This binding describes the Top Level Mode Multiplexer block found in the
+QCS404 platform.
+
+- compatible:
+	Usage: required
+	Value type: <string>
+	Definition: must be "qcom,sm8150-pinctrl"
+
+- reg:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: the base address and size of the north, south, west
+		    and east TLMM tiles.
+
+- reg-names:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Defintiion: names for the cells of reg, must contain "north", "south"
+		    "west" and "east".
+
+- interrupts:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: should specify the TLMM summary IRQ.
+
+- interrupt-controller:
+	Usage: required
+	Value type: <none>
+	Definition: identifies this node as an interrupt controller
+
+- #interrupt-cells:
+	Usage: required
+	Value type: <u32>
+	Definition: must be 2. Specifying the pin number and flags, as defined
+		    in <dt-bindings/interrupt-controller/irq.h>
+
+- gpio-controller:
+	Usage: required
+	Value type: <none>
+	Definition: identifies this node as a gpio controller
+
+- #gpio-cells:
+	Usage: required
+	Value type: <u32>
+	Definition: must be 2. Specifying the pin number and flags, as defined
+		    in <dt-bindings/gpio/gpio.h>
+
+- gpio-ranges:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition:  see ../gpio/gpio.txt
+
+- gpio-reserved-ranges:
+	Usage: optional
+	Value type: <prop-encoded-array>
+	Definition: see ../gpio/gpio.txt
+
+Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for
+a general description of GPIO and interrupt bindings.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+The pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, drive strength, etc.
+
+
+PIN CONFIGURATION NODES:
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+
+The following generic properties as defined in pinctrl-bindings.txt are valid
+to specify in a pin configuration subnode:
+
+- pins:
+	Usage: required
+	Value type: <string-array>
+	Definition: List of gpio pins affected by the properties specified in
+		    this subnode.
+
+		    Valid pins are:
+		      gpio0-gpio149
+		        Supports mux, bias and drive-strength
+
+		      sdc1_clk, sdc1_cmd, sdc1_data sdc2_clk, sdc2_cmd,
+		      sdc2_data sdc1_rclk
+		        Supports bias and drive-strength
+
+		      ufs_reset
+		        Supports bias and drive-strength
+
+- function:
+	Usage: required
+	Value type: <string>
+	Definition: Specify the alternative function to be configured for the
+		    specified pins. Functions are only valid for gpio pins.
+		    Valid values are:
+
+		    adsp_ext, agera_pll, aoss_cti, ddr_pxi2, atest_char,
+		    atest_char0, atest_char1, atest_char2, atest_char3,
+		    audio_ref, atest_usb1, atest_usb2, atest_usb10,
+		    atest_usb11, atest_usb12, atest_usb13, atest_usb20,
+		    atest_usb21, atest_usb22, atest_usb2, atest_usb23,
+		    btfm_slimbus, cam_mclk, cci_async, cci_i2c, cci_timer0,
+		    cci_timer1, cci_timer2, cci_timer3, cci_timer4,
+		    cri_trng, cri_trng0, cri_trng1, dbg_out, ddr_bist,
+		    ddr_pxi0, ddr_pxi1, ddr_pxi3, edp_hot, edp_lcd,
+		    emac_phy, emac_pps, gcc_gp1, gcc_gp2, gcc_gp3, gpio,
+		    hs1_mi2s, hs2_mi2s, hs3_mi2s, jitter_bist,
+		    lpass_slimbus, mdp_vsync, mdp_vsync0, mdp_vsync1,
+		    mdp_vsync2, mdp_vsync3, mss_lte, m_voc, nav_pps,
+		    pa_indicator, pci_e0, phase_flag, pll_bypassnl,
+		    pll_bist, pci_e1, pll_reset, pri_mi2s, pri_mi2s_ws,
+		    prng_rosc, qdss, qdss_cti, qlink_request, qlink_enable,
+		    qspi0, qspi1, qspi2, qspi3, qspi_clk, qspi_cs, qua_mi2s,
+		    qup0, qup1, qup2, qup3, qup4, qup5, qup6, qup7, qup8,
+		    qup9, qup10, qup11, qup12, qup13, qup14, qup15, qup16,
+		    qup17, qup18, qup19, qup_l4, qup_l5, qup_l6, rgmii,
+		    sdc4, sd_write, sec_mi2s, spkr_i2s, sp_cmu, ter_mi2s,
+		    tgu_ch0, tgu_ch1, tgu_ch2, tgu_ch3, tsense_pwm1,
+		    tsense_pwm2, tsif1, tsif2, uim1, uim2, uim_batt,
+		    usb2phy_ac, usb_phy, vfr_1, vsense_trigger, wlan1_adc0,
+		    wlan1_adc1, wlan2_adc0, wlan2_adc1, wmss_reset
+
+- bias-disable:
+	Usage: optional
+	Value type: <none>
+	Definition: The specified pins should be configued as no pull.
+
+- bias-pull-down:
+	Usage: optional
+	Value type: <none>
+	Definition: The specified pins should be configued as pull down.
+
+- bias-pull-up:
+	Usage: optional
+	Value type: <none>
+	Definition: The specified pins should be configued as pull up.
+
+- output-high:
+	Usage: optional
+	Value type: <none>
+	Definition: The specified pins are configured in output mode, driven
+		    high.
+		    Not valid for sdc pins.
+
+- output-low:
+	Usage: optional
+	Value type: <none>
+	Definition: The specified pins are configured in output mode, driven
+		    low.
+		    Not valid for sdc pins.
+
+- drive-strength:
+	Usage: optional
+	Value type: <u32>
+	Definition: Selects the drive strength for the specified pins, in mA.
+		    Valid values are: 2, 4, 6, 8, 10, 12, 14 and 16
+
+Example:
+
+	tlmm: pinctrl@3000000 {
+		compatible = "qcom,sm8150-pinctrl";
+		reg = <0x03100000 0x300000>,
+		      <0x03500000 0x300000>,
+		      <0x03900000 0x300000>,
+		      <0x03D00000 0x300000>;
+		reg-names = "west", "east", "north", "south";
+		interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>;
+		gpio-controller;
+		#gpio-cells = <2>;
+		gpio-ranges = <&tlmm 0 0 175>;
+		gpio-reserved-ranges = <0 4>, <126 4>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
-- 
cgit 


From 0b07ee944701dabcddc294d903b5e8e21c2c5d95 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 4 Jul 2019 13:06:17 +0530
Subject: PM / QOS: Pass request type to dev_pm_qos_{add|remove}_notifier()

In order to use the same set of routines to register notifiers for
different request types, update the existing
dev_pm_qos_{add|remove}_notifier() routines with an additional
parameter: request-type.

For now, it only supports resume-latency request type but will be
extended to frequency limit (min/max) constraints later on.

Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/pm_qos_interface.txt | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
index 19c5f7b1a7ba..ec7d662d1707 100644
--- a/Documentation/power/pm_qos_interface.txt
+++ b/Documentation/power/pm_qos_interface.txt
@@ -164,12 +164,14 @@ directory.
 Notification mechanisms:
 The per-device PM QoS framework has a per-device notification tree.
 
-int dev_pm_qos_add_notifier(device, notifier):
-Adds a notification callback function for the device.
+int dev_pm_qos_add_notifier(device, notifier, type):
+Adds a notification callback function for the device for a particular request
+type.
+
 The callback is called when the aggregated value of the device constraints list
-is changed (for resume latency device PM QoS only).
+is changed.
 
-int dev_pm_qos_remove_notifier(device, notifier):
+int dev_pm_qos_remove_notifier(device, notifier, type):
 Removes the notification callback function for the device.
 
 
-- 
cgit 


From 2a79ea5ec53973c8711b54d33ace5c77659dc8f8 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 4 Jul 2019 13:06:19 +0530
Subject: PM / QOS: Pass request type to dev_pm_qos_read_value()

In order to allow dev_pm_qos_read_value() to read values for different
QoS requests, pass request type as a parameter to these routines.

For now, it only supports resume-latency request type but will be
extended to frequency limit (min/max) constraints later on.

Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/pm_qos_interface.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
index ec7d662d1707..cfcb1df39799 100644
--- a/Documentation/power/pm_qos_interface.txt
+++ b/Documentation/power/pm_qos_interface.txt
@@ -123,7 +123,7 @@ Will remove the element.  After removal it will update the aggregate target and
 call the notification trees if the target was changed as a result of removing
 the request.
 
-s32 dev_pm_qos_read_value(device):
+s32 dev_pm_qos_read_value(device, type):
 Returns the aggregated value for a given device's constraints list.
 
 enum pm_qos_flags_status dev_pm_qos_flags(device, mask)
-- 
cgit 


From ea3bfeec6bf77033608b81a5e9a7cd6d686ab054 Mon Sep 17 00:00:00 2001
From: Qii Wang <qii.wang@mediatek.com>
Date: Mon, 1 Jul 2019 14:42:02 +0800
Subject: dt-bindings: i3c: cdns: Use correct cells for I2C device

I2C device reg should be "reg = <0x52 0x0 0x10>;"

Fixes: e29d0d9c90c9 ("dt-bindings: i3c: Document Cadence I3C master bindings")
Signed-off-by: Qii Wang <qii.wang@mediatek.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 Documentation/devicetree/bindings/i3c/cdns,i3c-master.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/i3c/cdns,i3c-master.txt b/Documentation/devicetree/bindings/i3c/cdns,i3c-master.txt
index 69da2115abdc..1cf6182f888c 100644
--- a/Documentation/devicetree/bindings/i3c/cdns,i3c-master.txt
+++ b/Documentation/devicetree/bindings/i3c/cdns,i3c-master.txt
@@ -38,6 +38,6 @@ Example:
 
 		nunchuk: nunchuk@52 {
 			compatible = "nintendo,nunchuk";
-			reg = <0x52 0x80000010 0>;
+			reg = <0x52 0x0 0x10>;
 		};
 	};
-- 
cgit 


From 1141301c20706acd444637d7c5c6b201ad878c6e Mon Sep 17 00:00:00 2001
From: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
Date: Thu, 4 Jul 2019 15:23:04 +0530
Subject: dt-bindings: coresight: Change CPU phandle to required property

Do not assume the affinity to CPU0 if cpu phandle is omitted.
Update the DT binding rules to reflect the same by changing it
to a required property.

Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Tested-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: https://lore.kernel.org/r/0f7f4105d5ffea6ca4313271f3b3fee69da2106a.1562229018.git.saiprakash.ranjan@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt | 4 ++--
 Documentation/devicetree/bindings/arm/coresight.txt           | 8 +++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt b/Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt
index 298291211ea4..f1de3247c1b7 100644
--- a/Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt
+++ b/Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt
@@ -26,8 +26,8 @@ Required properties:
 		processor core is clocked by the internal CPU clock, so it
 		is enabled with CPU clock by default.
 
-- cpu : the CPU phandle the debug module is affined to. When omitted
-	the module is considered to belong to CPU0.
+- cpu : the CPU phandle the debug module is affined to. Do not assume it
+        to default to CPU0 if omitted.
 
 Optional properties:
 
diff --git a/Documentation/devicetree/bindings/arm/coresight.txt b/Documentation/devicetree/bindings/arm/coresight.txt
index 8a88ddebc1a2..fcc3bacfd8bc 100644
--- a/Documentation/devicetree/bindings/arm/coresight.txt
+++ b/Documentation/devicetree/bindings/arm/coresight.txt
@@ -59,6 +59,11 @@ its hardware characteristcs.
 
 	* port or ports: see "Graph bindings for Coresight" below.
 
+* Additional required property for Embedded Trace Macrocell (version 3.x and
+  version 4.x):
+	* cpu: the cpu phandle this ETM/PTM is affined to. Do not
+	  assume it to default to CPU0 if omitted.
+
 * Additional required properties for System Trace Macrocells (STM):
 	* reg: along with the physical base address and length of the register
 	  set as described above, another entry is required to describe the
@@ -87,9 +92,6 @@ its hardware characteristcs.
 	* arm,cp14: must be present if the system accesses ETM/PTM management
 	  registers via co-processor 14.
 
-	* cpu: the cpu phandle this ETM/PTM is affined to. When omitted the
-	  source is considered to belong to CPU0.
-
 * Optional property for TMC:
 
 	* arm,buffer-size: size of contiguous buffer space for TMC ETR
-- 
cgit 


From 65d71f0095f611eb090bb9c1cca086a8967936e5 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 4 Jul 2019 13:02:18 +0200
Subject: Revert "dt-bindings: add binding for USBSS-DRD controller."

This reverts commit e8a8b40cc8922b38f91c6562d525f21f312f235c.

It's broken.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Felipe Balbi <balbi@kernel.org>
Cc: Pawel Laszczak <pawell@cadence.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/usb/cdns-usb3.txt          | 45 ----------------------
 1 file changed, 45 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/usb/cdns-usb3.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/usb/cdns-usb3.txt b/Documentation/devicetree/bindings/usb/cdns-usb3.txt
deleted file mode 100644
index b7dc606d37b5..000000000000
--- a/Documentation/devicetree/bindings/usb/cdns-usb3.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-Binding for the Cadence USBSS-DRD controller
-
-Required properties:
- - reg: Physical base address and size of the controller's register areas.
-	 Controller has 3 different regions:
-	 - HOST registers area
-	 - DEVICE registers area
-	 - OTG/DRD registers area
- - reg-names - register memory area names:
-	"xhci" - for HOST registers space
-	"dev" - for DEVICE registers space
-	"otg" - for OTG/DRD registers space
- - compatible: Should contain: "cdns,usb3"
- - interrupts: Interrupts used by cdns3 controller:
-	"host" - interrupt used by XHCI driver.
-	"peripheral" - interrupt used by device driver
-	"otg" - interrupt used by DRD/OTG  part of driver
-
-Optional properties:
- - maximum-speed : valid arguments are "super-speed", "high-speed" and
-                   "full-speed"; refer to usb/generic.txt
- - dr_mode: Should be one of "host", "peripheral" or "otg".
- - phys: reference to the USB PHY
- - phy-names: from the *Generic PHY* bindings;
-	Supported names are:
-	- cdns3,usb2-phy
-	- cdns3,usb3-phy
-
- - cdns,on-chip-buff-size : size of memory intended as internal memory for endpoints
-	buffers expressed in KB
-
-Example:
-	usb@f3000000 {
-		compatible = "cdns,usb3";
-		interrupts = <GIC_USB_IRQ 7 IRQ_TYPE_LEVEL_HIGH>,
-				<GIC_USB_IRQ  7 IRQ_TYPE_LEVEL_HIGH>,
-				<GIC_USB_IRQ  8 IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-names = "host", "peripheral", "otg";
-		reg = <0xf3000000 0x10000>,	/* memory area for HOST registers */
-			<0xf3010000 0x10000>,	/* memory area for DEVICE registers */
-			<0xf3020000 0x10000>;	/* memory area for OTG/DRD registers */
-		reg-names = "xhci", "dev", "otg";
-		phys = <&usb2_phy>, <&usb3_phy>;
-		phy-names = "cdns3,usb2-phy", "cnds3,usb3-phy";
-	};
-- 
cgit 


From 2438ac954bf506d4d7ad398f733103d7c9adbbfd Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Wed, 3 Jul 2019 22:34:02 +0530
Subject: powerpc/pseries: Add documentation for vcpudispatch_stats

Add a document describing the fields provided by
/proc/powerpc/vcpudispatch_stats.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/powerpc/vcpudispatch_stats.txt | 68 ++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 Documentation/powerpc/vcpudispatch_stats.txt

(limited to 'Documentation')

diff --git a/Documentation/powerpc/vcpudispatch_stats.txt b/Documentation/powerpc/vcpudispatch_stats.txt
new file mode 100644
index 000000000000..e21476bfd78c
--- /dev/null
+++ b/Documentation/powerpc/vcpudispatch_stats.txt
@@ -0,0 +1,68 @@
+VCPU Dispatch Statistics:
+=========================
+
+For Shared Processor LPARs, the POWER Hypervisor maintains a relatively
+static mapping of the LPAR processors (vcpus) to physical processor
+chips (representing the "home" node) and tries to always dispatch vcpus
+on their associated physical processor chip. However, under certain
+scenarios, vcpus may be dispatched on a different processor chip (away
+from its home node).
+
+/proc/powerpc/vcpudispatch_stats can be used to obtain statistics
+related to the vcpu dispatch behavior. Writing '1' to this file enables
+collecting the statistics, while writing '0' disables the statistics.
+By default, the DTLB log for each vcpu is processed 50 times a second so
+as not to miss any entries. This processing frequency can be changed
+through /proc/powerpc/vcpudispatch_stats_freq.
+
+The statistics themselves are available by reading the procfs file
+/proc/powerpc/vcpudispatch_stats. Each line in the output corresponds to
+a vcpu as represented by the first field, followed by 8 numbers.
+
+The first number corresponds to:
+1. total vcpu dispatches since the beginning of statistics collection
+
+The next 4 numbers represent vcpu dispatch dispersions:
+2. number of times this vcpu was dispatched on the same processor as last
+   time
+3. number of times this vcpu was dispatched on a different processor core
+   as last time, but within the same chip
+4. number of times this vcpu was dispatched on a different chip
+5. number of times this vcpu was dispatches on a different socket/drawer
+(next numa boundary)
+
+The final 3 numbers represent statistics in relation to the home node of
+the vcpu:
+6. number of times this vcpu was dispatched in its home node (chip)
+7. number of times this vcpu was dispatched in a different node
+8. number of times this vcpu was dispatched in a node further away (numa
+distance)
+
+An example output:
+    $ sudo cat /proc/powerpc/vcpudispatch_stats
+    cpu0 6839 4126 2683 30 0 6821 18 0
+    cpu1 2515 1274 1229 12 0 2509 6 0
+    cpu2 2317 1198 1109 10 0 2312 5 0
+    cpu3 2259 1165 1088 6 0 2256 3 0
+    cpu4 2205 1143 1056 6 0 2202 3 0
+    cpu5 2165 1121 1038 6 0 2162 3 0
+    cpu6 2183 1127 1050 6 0 2180 3 0
+    cpu7 2193 1133 1052 8 0 2187 6 0
+    cpu8 2165 1115 1032 18 0 2156 9 0
+    cpu9 2301 1252 1033 16 0 2293 8 0
+    cpu10 2197 1138 1041 18 0 2187 10 0
+    cpu11 2273 1185 1062 26 0 2260 13 0
+    cpu12 2186 1125 1043 18 0 2177 9 0
+    cpu13 2161 1115 1030 16 0 2153 8 0
+    cpu14 2206 1153 1033 20 0 2196 10 0
+    cpu15 2163 1115 1032 16 0 2155 8 0
+
+In the output above, for vcpu0, there have been 6839 dispatches since
+statistics were enabled. 4126 of those dispatches were on the same
+physical cpu as the last time. 2683 were on a different core, but within
+the same chip, while 30 dispatches were on a different chip compared to
+its last dispatch.
+
+Also, out of the total of 6839 dispatches, we see that there have been
+6821 dispatches on the vcpu's home node, while 18 dispatches were
+outside its home node, on a neighbouring chip.
-- 
cgit 


From 8338d93788950e63d12bd1d5eb09e239550e80e9 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayag@mellanox.com>
Date: Tue, 2 Jul 2019 23:55:11 +0000
Subject: net/mlx5: Added devlink info callback

The callback is invoked using 'devlink dev info <pci>' command and returns
the running and pending firmware version of the HCA and the name of the
kernel driver.

If there is a pending firmware version (a new version is burned but the
HCA still runs with the previous) it is returned as the stored
firmware version. Otherwise, the running version is returned for this
field.

Output example:
$ devlink dev info pci/0000:00:06.0
pci/0000:00:06.0:
  driver mlx5_core
  versions:
      fixed:
        fw.psid MT_0000000009
      running:
        fw.version 16.26.0100
      stored:
        fw.version 16.26.0100

Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../networking/device_drivers/mellanox/mlx5.rst       | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/mellanox/mlx5.rst b/Documentation/networking/device_drivers/mellanox/mlx5.rst
index 4eeef2df912f..214325897732 100644
--- a/Documentation/networking/device_drivers/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/mellanox/mlx5.rst
@@ -10,6 +10,7 @@ Contents
 ========
 
 - `Enabling the driver and kconfig options`_
+- `Devlink info`_
 - `Devlink health reporters`_
 
 Enabling the driver and kconfig options
@@ -101,6 +102,24 @@ Enabling the driver and kconfig options
 - CONFIG_VXLAN: When chosen, mlx5 vxaln support will be enabled.
 - CONFIG_MLXFW: When chosen, mlx5 firmware flashing support will be enabled (via devlink and ethtool).
 
+Devlink info
+============
+
+The devlink info reports the running and stored firmware versions on device.
+It also prints the device PSID which represents the HCA board type ID.
+
+User command example::
+
+   $ devlink dev info pci/0000:00:06.0
+      pci/0000:00:06.0:
+      driver mlx5_core
+      versions:
+         fixed:
+            fw.psid MT_0000000009
+         running:
+            fw.version 16.26.0100
+         stored:
+            fw.version 16.26.0100
 
 Devlink health reporters
 ========================
-- 
cgit 


From 3f57fe28f84d636ca6238f33afe8b88d27c34d1b Mon Sep 17 00:00:00 2001
From: Nick Crews <ncrews@chromium.org>
Date: Wed, 8 May 2019 14:38:25 -0600
Subject: power_supply: wilco_ec: Add charging config driver

Add a driver to control the charging algorithm used on Wilco
devices. See Documentation/ABI/testing/sysfs-class-power-wilco
for the userspace interface and other info.

Signed-off-by: Nick Crews <ncrews@chromium.org>
Reviewed-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 Documentation/ABI/testing/sysfs-class-power-wilco | 30 +++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-class-power-wilco

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-class-power-wilco b/Documentation/ABI/testing/sysfs-class-power-wilco
new file mode 100644
index 000000000000..da1d6ffe5e3c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power-wilco
@@ -0,0 +1,30 @@
+What:		/sys/class/power_supply/wilco-charger/charge_type
+Date:		April 2019
+KernelVersion:	5.2
+Description:
+		What charging algorithm to use:
+
+		Standard: Fully charges battery at a standard rate.
+		Adaptive: Battery settings adaptively optimized based on
+			typical battery usage pattern.
+		Fast: Battery charges over a shorter period.
+		Trickle: Extends battery lifespan, intended for users who
+			primarily use their Chromebook while connected to AC.
+		Custom: A low and high threshold percentage is specified.
+			Charging begins when level drops below
+			charge_control_start_threshold, and ceases when
+			level is above charge_control_end_threshold.
+
+What:		/sys/class/power_supply/wilco-charger/charge_control_start_threshold
+Date:		April 2019
+KernelVersion:	5.2
+Description:
+		Used when charge_type="Custom", as described above. Measured in
+		percentages. The valid range is [50, 95].
+
+What:		/sys/class/power_supply/wilco-charger/charge_control_end_threshold
+Date:		April 2019
+KernelVersion:	5.2
+Description:
+		Used when charge_type="Custom", as described above. Measured in
+		percentages. The valid range is [55, 100].
-- 
cgit 


From 9dcb98a29b6e81394fa33ca984f3aaad4d0d1393 Mon Sep 17 00:00:00 2001
From: "Hook, Gary" <Gary.Hook@amd.com>
Date: Mon, 24 Jun 2019 18:35:01 +0000
Subject: Documentation: dmaengine: clean up description of dmatest usage

Fix the formatting of the multi-channel test usage example. Call out
the note about parameter ordering and add detail on the settings of
parameters for the new version of dmatest.

Fixes: f80f9988a26d7 ("dmaengine: Documentation: Add documentation for multi chan testing")
Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/driver-api/dmaengine/dmatest.rst | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/dmaengine/dmatest.rst b/Documentation/driver-api/dmaengine/dmatest.rst
index e78d070bb468..ee268d445d38 100644
--- a/Documentation/driver-api/dmaengine/dmatest.rst
+++ b/Documentation/driver-api/dmaengine/dmatest.rst
@@ -44,7 +44,8 @@ Example of usage::
 
     dmatest.timeout=2000 dmatest.iterations=1 dmatest.channel=dma0chan0 dmatest.run=1
 
-Example of multi-channel test usage:
+Example of multi-channel test usage (new in the 5.0 kernel)::
+
     % modprobe dmatest
     % echo 2000 > /sys/module/dmatest/parameters/timeout
     % echo 1 > /sys/module/dmatest/parameters/iterations
@@ -53,15 +54,18 @@ Example of multi-channel test usage:
     % echo dma0chan2 > /sys/module/dmatest/parameters/channel
     % echo 1 > /sys/module/dmatest/parameters/run
 
-Note: the channel parameter should always be the last parameter set prior to
-running the test (setting run=1), this is because upon setting the channel
-parameter, that specific channel is requested using the dmaengine and a thread
-is created with the existing parameters. This thread is set as pending
-and will be executed once run is set to 1. Any parameters set after the thread
-is created are not applied.
+.. note::
+  For all tests, starting in the 5.0 kernel, either single- or multi-channel,
+  the channel parameter(s) must be set after all other parameters. It is at
+  that time that the existing parameter values are acquired for use by the
+  thread(s). All other parameters are shared. Therefore, if changes are made
+  to any of the other parameters, and an additional channel specified, the
+  (shared) parameters used for all threads will use the new values.
+  After the channels are specified, each thread is set as pending. All threads
+  begin execution when the run parameter is set to 1.
 
 .. hint::
-  available channel list could be extracted by running the following command::
+  A list of available channels can be found by running the following command::
 
     % ls -1 /sys/class/dma/
 
@@ -204,6 +208,7 @@ Releasing Channels
 Channels can be freed by setting run to 0.
 
 Example::
+
     % echo dma0chan1 > /sys/module/dmatest/parameters/channel
     dmatest: Added 1 threads using dma0chan1
     % cat /sys/class/dma/dma0chan1/in_use
-- 
cgit 


From f11977be1af94c6d42350fe85fa073d24ba75fac Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier@linux.ibm.com>
Date: Wed, 3 Jul 2019 12:19:47 +0200
Subject: docs: s390: restore important non-kdoc parts of s390dbf.rst

Complements previous ("s390: include/asm/debug.h add kerneldoc markups")
which seemed to have dropped important non-kdoc parts such as
user space interface (level, size, flush)
as well as views and caution regarding strings in the sprintf view.

Signed-off-by: Steffen Maier <maier@linux.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <1562149189-1417-2-git-send-email-maier@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 Documentation/s390/s390dbf.rst | 339 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 339 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/s390/s390dbf.rst b/Documentation/s390/s390dbf.rst
index d2595b548879..01d66251643d 100644
--- a/Documentation/s390/s390dbf.rst
+++ b/Documentation/s390/s390dbf.rst
@@ -112,6 +112,345 @@ Kernel Interfaces:
 Predefined views:
 -----------------
 
+extern struct debug_view debug_hex_ascii_view;
+
+extern struct debug_view debug_raw_view;
+
+extern struct debug_view debug_sprintf_view;
+
+Examples
+--------
+
+::
+
+  /*
+   * hex_ascii- + raw-view Example
+   */
+
+  #include <linux/init.h>
+  #include <asm/debug.h>
+
+  static debug_info_t* debug_info;
+
+  static int init(void)
+  {
+      /* register 4 debug areas with one page each and 4 byte data field */
+
+      debug_info = debug_register ("test", 1, 4, 4 );
+      debug_register_view(debug_info,&debug_hex_ascii_view);
+      debug_register_view(debug_info,&debug_raw_view);
+
+      debug_text_event(debug_info, 4 , "one ");
+      debug_int_exception(debug_info, 4, 4711);
+      debug_event(debug_info, 3, &debug_info, 4);
+
+      return 0;
+  }
+
+  static void cleanup(void)
+  {
+      debug_unregister (debug_info);
+  }
+
+  module_init(init);
+  module_exit(cleanup);
+
+::
+
+  /*
+   * sprintf-view Example
+   */
+
+  #include <linux/init.h>
+  #include <asm/debug.h>
+
+  static debug_info_t* debug_info;
+
+  static int init(void)
+  {
+      /* register 4 debug areas with one page each and data field for */
+      /* format string pointer + 2 varargs (= 3 * sizeof(long))       */
+
+      debug_info = debug_register ("test", 1, 4, sizeof(long) * 3);
+      debug_register_view(debug_info,&debug_sprintf_view);
+
+      debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
+      debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
+
+      return 0;
+  }
+
+  static void cleanup(void)
+  {
+      debug_unregister (debug_info);
+  }
+
+  module_init(init);
+  module_exit(cleanup);
+
+Debugfs Interface
+-----------------
+Views to the debug logs can be investigated through reading the corresponding
+debugfs-files:
+
+Example::
+
+  > ls /sys/kernel/debug/s390dbf/dasd
+  flush  hex_ascii  level pages raw
+  > cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
+  00 00974733272:680099 2 - 02 0006ad7e  07 ea 4a 90 | ....
+  00 00974733272:682210 2 - 02 0006ade6  46 52 45 45 | FREE
+  00 00974733272:682213 2 - 02 0006adf6  07 ea 4a 90 | ....
+  00 00974733272:682281 1 * 02 0006ab08  41 4c 4c 43 | EXCP
+  01 00974733272:682284 2 - 02 0006ab16  45 43 4b 44 | ECKD
+  01 00974733272:682287 2 - 02 0006ab28  00 00 00 04 | ....
+  01 00974733272:682289 2 - 02 0006ab3e  00 00 00 20 | ...
+  01 00974733272:682297 2 - 02 0006ad7e  07 ea 4a 90 | ....
+  01 00974733272:684384 2 - 00 0006ade6  46 52 45 45 | FREE
+  01 00974733272:684388 2 - 00 0006adf6  07 ea 4a 90 | ....
+
+See section about predefined views for explanation of the above output!
+
+Changing the debug level
+------------------------
+
+Example::
+
+
+  > cat /sys/kernel/debug/s390dbf/dasd/level
+  3
+  > echo "5" > /sys/kernel/debug/s390dbf/dasd/level
+  > cat /sys/kernel/debug/s390dbf/dasd/level
+  5
+
+Flushing debug areas
+--------------------
+Debug areas can be flushed with piping the number of the desired
+area (0...n) to the debugfs file "flush". When using "-" all debug areas
+are flushed.
+
+Examples:
+
+1. Flush debug area 0::
+
+     > echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
+
+2. Flush all debug areas::
+
+     > echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
+
+Changing the size of debug areas
+------------------------------------
+It is possible the change the size of debug areas through piping
+the number of pages to the debugfs file "pages". The resize request will
+also flush the debug areas.
+
+Example:
+
+Define 4 pages for the debug areas of debug feature "dasd"::
+
+  > echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
+
+Stooping the debug feature
+--------------------------
+Example:
+
+1. Check if stopping is allowed::
+
+     > cat /proc/sys/s390dbf/debug_stoppable
+
+2. Stop debug feature::
+
+     > echo 0 > /proc/sys/s390dbf/debug_active
+
+lcrash Interface
+----------------
+It is planned that the dump analysis tool lcrash gets an additional command
+'s390dbf' to display all the debug logs. With this tool it will be possible
+to investigate the debug logs on a live system and with a memory dump after
+a system crash.
+
+Investigating raw memory
+------------------------
+One last possibility to investigate the debug logs at a live
+system and after a system crash is to look at the raw memory
+under VM or at the Service Element.
+It is possible to find the anker of the debug-logs through
+the 'debug_area_first' symbol in the System map. Then one has
+to follow the correct pointers of the data-structures defined
+in debug.h and find the debug-areas in memory.
+Normally modules which use the debug feature will also have
+a global variable with the pointer to the debug-logs. Following
+this pointer it will also be possible to find the debug logs in
+memory.
+
+For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
+for the length of the data field in debug_register() in
+order to see the debug entries well formatted.
+
+
+Predefined Views
+----------------
+
+There are three predefined views: hex_ascii, raw and sprintf.
+The hex_ascii view shows the data field in hex and ascii representation
+(e.g. '45 43 4b 44 | ECKD').
+The raw view returns a bytestream as the debug areas are stored in memory.
+
+The sprintf view formats the debug entries in the same way as the sprintf
+function would do. The sprintf event/exception functions write to the
+debug entry a pointer to the format string (size = sizeof(long))
+and for each vararg a long value. So e.g. for a debug entry with a format
+string plus two varargs one would need to allocate a (3 * sizeof(long))
+byte data area in the debug_register() function.
+
+IMPORTANT:
+  Using "%s" in sprintf event functions is dangerous. You can only
+  use "%s" in the sprintf event functions, if the memory for the passed string
+  is available as long as the debug feature exists. The reason behind this is
+  that due to performance considerations only a pointer to the string is stored
+  in  the debug feature. If you log a string that is freed afterwards, you will
+  get an OOPS when inspecting the debug feature, because then the debug feature
+  will access the already freed memory.
+
+NOTE:
+  If using the sprintf view do NOT use other event/exception functions
+  than the sprintf-event and -exception functions.
+
+The format of the hex_ascii and sprintf view is as follows:
+
+- Number of area
+- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated
+  Universal Time (UTC), January 1, 1970)
+- level of debug entry
+- Exception flag (* = Exception)
+- Cpu-Number of calling task
+- Return Address to caller
+- data field
+
+The format of the raw view is:
+
+- Header as described in debug.h
+- datafield
+
+A typical line of the hex_ascii view will look like the following (first line
+is only for explanation and will not be displayed when 'cating' the view):
+
+area  time           level exception cpu caller    data (hex + ascii)
+--------------------------------------------------------------------------
+00    00964419409:440690 1 -         00  88023fe
+
+
+Defining views
+--------------
+
+Views are specified with the 'debug_view' structure. There are defined
+callback functions which are used for reading and writing the debugfs files::
+
+  struct debug_view {
+	char name[DEBUG_MAX_PROCF_LEN];
+	debug_prolog_proc_t* prolog_proc;
+	debug_header_proc_t* header_proc;
+	debug_format_proc_t* format_proc;
+	debug_input_proc_t*  input_proc;
+	void*                private_data;
+  };
+
+where::
+
+  typedef int (debug_header_proc_t) (debug_info_t* id,
+				     struct debug_view* view,
+				     int area,
+				     debug_entry_t* entry,
+				     char* out_buf);
+
+  typedef int (debug_format_proc_t) (debug_info_t* id,
+				     struct debug_view* view, char* out_buf,
+				     const char* in_buf);
+  typedef int (debug_prolog_proc_t) (debug_info_t* id,
+				     struct debug_view* view,
+				     char* out_buf);
+  typedef int (debug_input_proc_t) (debug_info_t* id,
+				    struct debug_view* view,
+				    struct file* file, const char* user_buf,
+				    size_t in_buf_size, loff_t* offset);
+
+
+The "private_data" member can be used as pointer to view specific data.
+It is not used by the debug feature itself.
+
+The output when reading a debugfs file is structured like this::
+
+  "prolog_proc output"
+
+  "header_proc output 1"  "format_proc output 1"
+  "header_proc output 2"  "format_proc output 2"
+  "header_proc output 3"  "format_proc output 3"
+  ...
+
+When a view is read from the debugfs, the Debug Feature calls the
+'prolog_proc' once for writing the prolog.
+Then 'header_proc' and 'format_proc' are called for each
+existing debug entry.
+
+The input_proc can be used to implement functionality when it is written to
+the view (e.g. like with 'echo "0" > /sys/kernel/debug/s390dbf/dasd/level).
+
+For header_proc there can be used the default function
+debug_dflt_header_fn() which is defined in debug.h.
+and which produces the same header output as the predefined views.
+E.g::
+
+  00 00964419409:440761 2 - 00 88023ec
+
+In order to see how to use the callback functions check the implementation
+of the default views!
+
+Example::
+
+  #include <asm/debug.h>
+
+  #define UNKNOWNSTR "data: %08x"
+
+  const char* messages[] =
+  {"This error...........\n",
+   "That error...........\n",
+   "Problem..............\n",
+   "Something went wrong.\n",
+   "Everything ok........\n",
+   NULL
+  };
+
+  static int debug_test_format_fn(
+     debug_info_t * id, struct debug_view *view,
+     char *out_buf, const char *in_buf
+  )
+  {
+    int i, rc = 0;
+
+    if(id->buf_size >= 4) {
+       int msg_nr = *((int*)in_buf);
+       if(msg_nr < sizeof(messages)/sizeof(char*) - 1)
+	  rc += sprintf(out_buf, "%s", messages[msg_nr]);
+       else
+	  rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
+    }
+   out:
+     return rc;
+  }
+
+  struct debug_view debug_test_view = {
+    "myview",                 /* name of view */
+    NULL,                     /* no prolog */
+    &debug_dflt_header_fn,    /* default header for each entry */
+    &debug_test_format_fn,    /* our own format function */
+    NULL,                     /* no input function */
+    NULL                      /* no private data */
+  };
+
+test:
+=====
+
 ::
 
   debug_info_t *debug_info;
-- 
cgit 


From 0328e519a726ff6e4abacba838eb00415171c34b Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier@linux.ibm.com>
Date: Wed, 3 Jul 2019 12:19:48 +0200
Subject: docs: s390: unify and update s390dbf kdocs at debug.c

For non-static-inlines, debug.c already had non-compliant function
header docs. So move the pure prototype kdocs of
("s390: include/asm/debug.h add kerneldoc markups")
from debug.h to debug.c and merge them with the old function docs.
Also, I had the impression that kdoc typically is at the implementation
in the compile unit rather than at the prototype in the header file.

While at it, update the short kdoc description to distinguish the
different functions. And a few more consistency cleanups.

Added a new kdoc for debug_set_critical() since debug.h comments it
as part of the API.

Signed-off-by: Steffen Maier <maier@linux.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <1562149189-1417-3-git-send-email-maier@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 Documentation/s390/s390dbf.rst | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/s390/s390dbf.rst b/Documentation/s390/s390dbf.rst
index 01d66251643d..be42892b159e 100644
--- a/Documentation/s390/s390dbf.rst
+++ b/Documentation/s390/s390dbf.rst
@@ -107,6 +107,7 @@ will stay deactivated.
 Kernel Interfaces:
 ------------------
 
+.. kernel-doc:: arch/s390/kernel/debug.c
 .. kernel-doc:: arch/s390/include/asm/debug.h
 
 Predefined views:
-- 
cgit 


From 499723d12063aab97dfe6b41c822e9c1c74eff3e Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier@linux.ibm.com>
Date: Wed, 3 Jul 2019 12:19:49 +0200
Subject: docs: s390: s390dbf: typos and formatting, update crash command

Signed-off-by: Steffen Maier <maier@linux.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <1562149189-1417-4-git-send-email-maier@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 Documentation/s390/s390dbf.rst | 122 +++++++++++++++++++++++------------------
 1 file changed, 68 insertions(+), 54 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/s390/s390dbf.rst b/Documentation/s390/s390dbf.rst
index be42892b159e..cdb36842b898 100644
--- a/Documentation/s390/s390dbf.rst
+++ b/Documentation/s390/s390dbf.rst
@@ -23,7 +23,8 @@ The debug feature may also very useful for kernel and driver development.
 Design:
 -------
 Kernel components (e.g. device drivers) can register themselves at the debug
-feature with the function call debug_register(). This function initializes a
+feature with the function call :c:func:`debug_register()`.
+This function initializes a
 debug log for the caller. For each debug log exists a number of debug areas
 where exactly one is active at one time.  Each debug area consists of contiguous
 pages in memory. In the debug areas there are stored debug entries (log records)
@@ -44,8 +45,9 @@ The debug areas themselves are also ordered in form of a ring buffer.
 When an exception is thrown in the last debug area, the following debug
 entries are then written again in the very first area.
 
-There are three versions for the event- and exception-calls: One for
-logging raw data, one for text and one for numbers.
+There are four versions for the event- and exception-calls: One for
+logging raw data, one for text, one for numbers (unsigned int and long),
+and one for sprintf-like formatted strings.
 
 Each debug entry contains the following data:
 
@@ -56,29 +58,29 @@ Each debug entry contains the following data:
 - Flag, if entry is an exception or not
 
 The debug logs can be inspected in a live system through entries in
-the debugfs-filesystem. Under the toplevel directory "s390dbf" there is
+the debugfs-filesystem. Under the toplevel directory "``s390dbf``" there is
 a directory for each registered component, which is named like the
 corresponding component. The debugfs normally should be mounted to
-/sys/kernel/debug therefore the debug feature can be accessed under
-/sys/kernel/debug/s390dbf.
+``/sys/kernel/debug`` therefore the debug feature can be accessed under
+``/sys/kernel/debug/s390dbf``.
 
 The content of the directories are files which represent different views
 to the debug log. Each component can decide which views should be
-used through registering them with the function debug_register_view().
+used through registering them with the function :c:func:`debug_register_view()`.
 Predefined views for hex/ascii, sprintf and raw binary data are provided.
 It is also possible to define other views. The content of
 a view can be inspected simply by reading the corresponding debugfs file.
 
 All debug logs have an actual debug level (range from 0 to 6).
-The default level is 3. Event and Exception functions have a 'level'
+The default level is 3. Event and Exception functions have a :c:data:`level`
 parameter. Only debug entries with a level that is lower or equal
 than the actual level are written to the log. This means, when
 writing events, high priority log entries should have a low level
 value whereas low priority entries should have a high one.
 The actual debug level can be changed with the help of the debugfs-filesystem
-through writing a number string "x" to the 'level' debugfs file which is
+through writing a number string "x" to the ``level`` debugfs file which is
 provided for every debug log. Debugging can be switched off completely
-by using "-" on the 'level' debugfs file.
+by using "-" on the ``level`` debugfs file.
 
 Example::
 
@@ -86,21 +88,21 @@ Example::
 
 It is also possible to deactivate the debug feature globally for every
 debug log. You can change the behavior using  2 sysctl parameters in
-/proc/sys/s390dbf:
+``/proc/sys/s390dbf``:
 
 There are currently 2 possible triggers, which stop the debug feature
-globally. The first possibility is to use the "debug_active" sysctl. If
-set to 1 the debug feature is running. If "debug_active" is set to 0 the
+globally. The first possibility is to use the ``debug_active`` sysctl. If
+set to 1 the debug feature is running. If ``debug_active`` is set to 0 the
 debug feature is turned off.
 
 The second trigger which stops the debug feature is a kernel oops.
 That prevents the debug feature from overwriting debug information that
 happened before the oops. After an oops you can reactivate the debug feature
-by piping 1 to /proc/sys/s390dbf/debug_active. Nevertheless, its not
+by piping 1 to ``/proc/sys/s390dbf/debug_active``. Nevertheless, it's not
 suggested to use an oopsed kernel in a production environment.
 
 If you want to disallow the deactivation of the debug feature, you can use
-the "debug_stoppable" sysctl. If you set "debug_stoppable" to 0 the debug
+the ``debug_stoppable`` sysctl. If you set ``debug_stoppable`` to 0 the debug
 feature cannot be stopped. If the debug feature is already stopped, it
 will stay deactivated.
 
@@ -113,16 +115,18 @@ Kernel Interfaces:
 Predefined views:
 -----------------
 
-extern struct debug_view debug_hex_ascii_view;
+.. code-block:: c
 
-extern struct debug_view debug_raw_view;
+  extern struct debug_view debug_hex_ascii_view;
 
-extern struct debug_view debug_sprintf_view;
+  extern struct debug_view debug_raw_view;
+
+  extern struct debug_view debug_sprintf_view;
 
 Examples
 --------
 
-::
+.. code-block:: c
 
   /*
    * hex_ascii- + raw-view Example
@@ -131,15 +135,15 @@ Examples
   #include <linux/init.h>
   #include <asm/debug.h>
 
-  static debug_info_t* debug_info;
+  static debug_info_t *debug_info;
 
   static int init(void)
   {
       /* register 4 debug areas with one page each and 4 byte data field */
 
-      debug_info = debug_register ("test", 1, 4, 4 );
-      debug_register_view(debug_info,&debug_hex_ascii_view);
-      debug_register_view(debug_info,&debug_raw_view);
+      debug_info = debug_register("test", 1, 4, 4 );
+      debug_register_view(debug_info, &debug_hex_ascii_view);
+      debug_register_view(debug_info, &debug_raw_view);
 
       debug_text_event(debug_info, 4 , "one ");
       debug_int_exception(debug_info, 4, 4711);
@@ -150,13 +154,13 @@ Examples
 
   static void cleanup(void)
   {
-      debug_unregister (debug_info);
+      debug_unregister(debug_info);
   }
 
   module_init(init);
   module_exit(cleanup);
 
-::
+.. code-block:: c
 
   /*
    * sprintf-view Example
@@ -165,15 +169,15 @@ Examples
   #include <linux/init.h>
   #include <asm/debug.h>
 
-  static debug_info_t* debug_info;
+  static debug_info_t *debug_info;
 
   static int init(void)
   {
       /* register 4 debug areas with one page each and data field for */
       /* format string pointer + 2 varargs (= 3 * sizeof(long))       */
 
-      debug_info = debug_register ("test", 1, 4, sizeof(long) * 3);
-      debug_register_view(debug_info,&debug_sprintf_view);
+      debug_info = debug_register("test", 1, 4, sizeof(long) * 3);
+      debug_register_view(debug_info, &debug_sprintf_view);
 
       debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
       debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
@@ -183,7 +187,7 @@ Examples
 
   static void cleanup(void)
   {
-      debug_unregister (debug_info);
+      debug_unregister(debug_info);
   }
 
   module_init(init);
@@ -252,7 +256,7 @@ Define 4 pages for the debug areas of debug feature "dasd"::
 
   > echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
 
-Stooping the debug feature
+Stopping the debug feature
 --------------------------
 Example:
 
@@ -264,10 +268,11 @@ Example:
 
      > echo 0 > /proc/sys/s390dbf/debug_active
 
-lcrash Interface
+crash Interface
 ----------------
-It is planned that the dump analysis tool lcrash gets an additional command
-'s390dbf' to display all the debug logs. With this tool it will be possible
+The ``crash`` tool since v5.1.0 has a built-in command
+``s390dbf`` to display all the debug logs or export them to the file system.
+With this tool it is possible
 to investigate the debug logs on a live system and with a memory dump after
 a system crash.
 
@@ -276,8 +281,8 @@ Investigating raw memory
 One last possibility to investigate the debug logs at a live
 system and after a system crash is to look at the raw memory
 under VM or at the Service Element.
-It is possible to find the anker of the debug-logs through
-the 'debug_area_first' symbol in the System map. Then one has
+It is possible to find the anchor of the debug-logs through
+the ``debug_area_first`` symbol in the System map. Then one has
 to follow the correct pointers of the data-structures defined
 in debug.h and find the debug-areas in memory.
 Normally modules which use the debug feature will also have
@@ -286,7 +291,7 @@ this pointer it will also be possible to find the debug logs in
 memory.
 
 For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
-for the length of the data field in debug_register() in
+for the length of the data field in :c:func:`debug_register()` in
 order to see the debug entries well formatted.
 
 
@@ -295,7 +300,7 @@ Predefined Views
 
 There are three predefined views: hex_ascii, raw and sprintf.
 The hex_ascii view shows the data field in hex and ascii representation
-(e.g. '45 43 4b 44 | ECKD').
+(e.g. ``45 43 4b 44 | ECKD``).
 The raw view returns a bytestream as the debug areas are stored in memory.
 
 The sprintf view formats the debug entries in the same way as the sprintf
@@ -335,18 +340,20 @@ The format of the raw view is:
 - datafield
 
 A typical line of the hex_ascii view will look like the following (first line
-is only for explanation and will not be displayed when 'cating' the view):
+is only for explanation and will not be displayed when 'cating' the view)::
 
-area  time           level exception cpu caller    data (hex + ascii)
---------------------------------------------------------------------------
-00    00964419409:440690 1 -         00  88023fe
+  area  time           level exception cpu caller    data (hex + ascii)
+  --------------------------------------------------------------------------
+  00    00964419409:440690 1 -         00  88023fe
 
 
 Defining views
 --------------
 
 Views are specified with the 'debug_view' structure. There are defined
-callback functions which are used for reading and writing the debugfs files::
+callback functions which are used for reading and writing the debugfs files:
+
+.. code-block:: c
 
   struct debug_view {
 	char name[DEBUG_MAX_PROCF_LEN];
@@ -357,7 +364,9 @@ callback functions which are used for reading and writing the debugfs files::
 	void*                private_data;
   };
 
-where::
+where:
+
+.. code-block:: c
 
   typedef int (debug_header_proc_t) (debug_info_t* id,
 				     struct debug_view* view,
@@ -395,10 +404,10 @@ Then 'header_proc' and 'format_proc' are called for each
 existing debug entry.
 
 The input_proc can be used to implement functionality when it is written to
-the view (e.g. like with 'echo "0" > /sys/kernel/debug/s390dbf/dasd/level).
+the view (e.g. like with ``echo "0" > /sys/kernel/debug/s390dbf/dasd/level``).
 
 For header_proc there can be used the default function
-debug_dflt_header_fn() which is defined in debug.h.
+:c:func:`debug_dflt_header_fn()` which is defined in debug.h.
 and which produces the same header output as the predefined views.
 E.g::
 
@@ -407,7 +416,9 @@ E.g::
 In order to see how to use the callback functions check the implementation
 of the default views!
 
-Example::
+Example:
+
+.. code-block:: c
 
   #include <asm/debug.h>
 
@@ -423,21 +434,20 @@ Example::
   };
 
   static int debug_test_format_fn(
-     debug_info_t * id, struct debug_view *view,
+     debug_info_t *id, struct debug_view *view,
      char *out_buf, const char *in_buf
   )
   {
     int i, rc = 0;
 
-    if(id->buf_size >= 4) {
+    if (id->buf_size >= 4) {
        int msg_nr = *((int*)in_buf);
-       if(msg_nr < sizeof(messages)/sizeof(char*) - 1)
+       if (msg_nr < sizeof(messages) / sizeof(char*) - 1)
 	  rc += sprintf(out_buf, "%s", messages[msg_nr]);
        else
 	  rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
     }
-   out:
-     return rc;
+    return rc;
   }
 
   struct debug_view debug_test_view = {
@@ -452,13 +462,17 @@ Example::
 test:
 =====
 
-::
+.. code-block:: c
 
   debug_info_t *debug_info;
+  int i;
   ...
-  debug_info = debug_register ("test", 0, 4, 4 ));
+  debug_info = debug_register("test", 0, 4, 4);
   debug_register_view(debug_info, &debug_test_view);
-  for(i = 0; i < 10; i ++) debug_int_event(debug_info, 1, i);
+  for (i = 0; i < 10; i ++)
+    debug_int_event(debug_info, 1, i);
+
+::
 
   > cat /sys/kernel/debug/s390dbf/test/myview
   00 00964419734:611402 1 - 00 88042ca   This error...........
-- 
cgit 


From 3276cc2489641f7f37e9558f5fe9d6ae17a25528 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 18 Jun 2019 16:17:38 +0100
Subject: arm64: Update silicon-errata.txt for Neoverse-N1 #1349291

Neoverse-N1 affected by #1349291 may report an Uncontained RAS Error
as Unrecoverable. The kernel's architecture code already considers
Unrecoverable errors as fatal as without kernel-first support no
further error-handling is possible.

Now that KVM attributes SError to the host/guest more precisely
the host's architecture code will always handle host errors that
become pending during world-switch.
Errors misclassified by this errata that affected the guest will be
re-injected to the guest as an implementation-defined SError, which can
be uncontained.

Until kernel-first support is implemented, no workaround is needed
for this issue.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 Documentation/arm64/silicon-errata.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index 2735462d5958..51d506a1f8dc 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -63,6 +63,7 @@ stable kernels.
 | ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807       |
 | ARM            | Cortex-A76      | #1463225        | ARM64_ERRATUM_1463225       |
 | ARM            | Neoverse-N1     | #1188873,1418040| ARM64_ERRATUM_1418040       |
+| ARM            | Neoverse-N1     | #1349291        | N/A                         |
 | ARM            | MMU-500         | #841119,826419  | N/A                         |
 |                |                 |                 |                             |
 | Cavium         | ThunderX ITS    | #22375,24313    | CAVIUM_ERRATUM_22375        |
-- 
cgit 


From 49caebe9b3e2a83161f4374ac347eb14e11c3b54 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Fri, 3 May 2019 15:27:50 +0100
Subject: KVM: doc: Add API documentation on the KVM_REG_ARM_WORKAROUNDS
 register

Add documentation for the newly defined firmware registers to save and
restore any vulnerability mitigation status.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 Documentation/virtual/kvm/arm/psci.txt | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/arm/psci.txt b/Documentation/virtual/kvm/arm/psci.txt
index aafdab887b04..559586fc9d37 100644
--- a/Documentation/virtual/kvm/arm/psci.txt
+++ b/Documentation/virtual/kvm/arm/psci.txt
@@ -28,3 +28,34 @@ The following register is defined:
   - Allows any PSCI version implemented by KVM and compatible with
     v0.2 to be set with SET_ONE_REG
   - Affects the whole VM (even if the register view is per-vcpu)
+
+* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+  Holds the state of the firmware support to mitigate CVE-2017-5715, as
+  offered by KVM to the guest via a HVC call. The workaround is described
+  under SMCCC_ARCH_WORKAROUND_1 in [1].
+  Accepted values are:
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer
+      firmware support for the workaround. The mitigation status for the
+      guest is unknown.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is
+      available to the guest and required for the mitigation.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call
+      is available to the guest, but it is not needed on this VCPU.
+
+* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+  Holds the state of the firmware support to mitigate CVE-2018-3639, as
+  offered by KVM to the guest via a HVC call. The workaround is described
+  under SMCCC_ARCH_WORKAROUND_2 in [1].
+  Accepted values are:
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not
+      available. KVM does not offer firmware support for the workaround.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is
+      unknown. KVM does not offer firmware support for the workaround.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available,
+      and can be disabled by a vCPU. If
+      KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
+      this vCPU.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is
+      always active on this vCPU or it is not needed.
+
+[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
-- 
cgit 


From 051d769f0a36b4642897d909cef980f944ae20ab Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 1 Jul 2019 16:15:34 +0300
Subject: i2c: i801: Add support for Intel Tiger Lake

Add SMBUS PCI ID for Intel Tiger Lake -LP.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/busses/i2c-i801 | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index d247edcb0f99..04b5de80ce4e 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801
@@ -38,6 +38,7 @@ Supported adapters:
   * Intel Ice Lake (PCH)
   * Intel Comet Lake (PCH)
   * Intel Elkhart Lake (PCH)
+  * Intel Tiger Lake (PCH)
    Datasheets: Publicly available at the Intel website
 
 On Intel Patsburg and later chipsets, both the normal host SMBus controller
-- 
cgit 


From ed6182a83924c2385679afd5a3b9cd3d41e2ef44 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Thu, 4 Jul 2019 11:34:02 +0200
Subject: i2c: i801: Documentation update

The i2c-i801 driver documentation needs some dusting:
* Mention disable_features flag 0x20.
* The i2c_ec driver has been removed from the kernel long ago. Driver
  i2c-scmi serves the same purpose for more recent hardware.
* Replace obsolete /proc paths with equivalent /sys paths.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Reviewed-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/busses/i2c-i801 | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index 04b5de80ce4e..f426c13c63a9 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801
@@ -60,6 +60,7 @@ question doesn't work as intended for whatever reason. Bit values:
  0x02  disable the block buffer
  0x08  disable the I2C block read functionality
  0x10  don't use interrupts
+ 0x20  disable SMBus Host Notify
 
 
 Description
@@ -120,16 +121,15 @@ BIOS to enable it, it means it has been hidden by the BIOS code. Asus is
 well known for first doing this on their P4B motherboard, and many other
 boards after that. Some vendor machines are affected as well.
 
-The first thing to try is the "i2c_ec" ACPI driver. It could be that the
+The first thing to try is the "i2c-scmi" ACPI driver. It could be that the
 SMBus was hidden on purpose because it'll be driven by ACPI. If the
-i2c_ec driver works for you, just forget about the i2c-i801 driver and
-don't try to unhide the ICH SMBus. Even if i2c_ec doesn't work, you
+i2c-scmi driver works for you, just forget about the i2c-i801 driver and
+don't try to unhide the ICH SMBus. Even if i2c-scmi doesn't work, you
 better make sure that the SMBus isn't used by the ACPI code. Try loading
-the "fan" and "thermal" drivers, and check in /proc/acpi/fan and
-/proc/acpi/thermal_zone. If you find anything there, it's likely that
-the ACPI is accessing the SMBus and it's safer not to unhide it. Only
-once you are certain that ACPI isn't using the SMBus, you can attempt
-to unhide it.
+the "fan" and "thermal" drivers, and check in /sys/class/thermal. If you
+find a thermal zone with type "acpitz", it's likely that the ACPI is
+accessing the SMBus and it's safer not to unhide it. Only once you are
+certain that ACPI isn't using the SMBus, you can attempt to unhide it.
 
 In order to unhide the SMBus, we need to change the value of a PCI
 register before the kernel enumerates the PCI devices. This is done in
-- 
cgit 


From 166f28c3eed269190d310ec611476b78101c309b Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Wed, 3 Jul 2019 11:53:37 +0200
Subject: dt-bindings: i2c: mv64xxx: Fix the example compatible

One example has a compatible that isn't a valid combination according to
the binding, and now that the examples are validated as well, this
generates a warning.

Let's fix this.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Acked-by: Gregory CLEMENT <gregory.clement@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml
index 9a5654ef5670..001f2b7abad0 100644
--- a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml
@@ -105,7 +105,7 @@ examples:
 
   - |
     i2c@11000 {
-        compatible = "marvell,mv78230-i2c", "marvell,mv64xxx-i2c";
+        compatible = "marvell,mv78230-i2c";
         reg = <0x11000 0x100>;
         interrupts = <29>;
         clock-frequency = <100000>;
-- 
cgit 


From 0038617132dea5c0516b6208a6d005ec9293ab94 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Wed, 3 Jul 2019 11:53:38 +0200
Subject: dt-bindings: i2c: sun6i-p2wi: Fix the binding example

Even though the binding claims that the frequency can go up to 6MHz, the
common i2c binding sets a limit at 3MHz, which then triggers a warning.

Since the only SoC that uses that bus uses a frequency of 100kHz, and that
this bus hasn't been found in an SoC for something like 5 years, let's just
fix the example to have a frequency within the acceptable range for i2c.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Acked-by: Gregory CLEMENT <gregory.clement@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml b/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml
index 1804abe24f14..f9d526b7da01 100644
--- a/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml
+++ b/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml
@@ -51,7 +51,7 @@ examples:
         reg = <0x01f03400 0x400>;
         interrupts = <0 39 4>;
         clocks = <&apb0_gates 3>;
-        clock-frequency = <6000000>;
+        clock-frequency = <100000>;
         resets = <&apb0_rst 3>;
         #address-cells = <1>;
         #size-cells = <0>;
-- 
cgit 


From be70e5e774a616ad43e63c0b42c2f6e2709dbf3b Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 14 Jun 2019 09:23:51 +0200
Subject: Bluetooth: hci_mrvl: Add serdev support

This adds serdev support to the Marvell hci uart driver. Only basic
serdev support, none of the fancier features like regulator or enable
GPIO support is added for now.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 .../devicetree/bindings/net/marvell-bluetooth.txt  | 25 ++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/marvell-bluetooth.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/marvell-bluetooth.txt b/Documentation/devicetree/bindings/net/marvell-bluetooth.txt
new file mode 100644
index 000000000000..0e2842296032
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/marvell-bluetooth.txt
@@ -0,0 +1,25 @@
+Marvell Bluetooth Chips
+-----------------------
+
+This documents the binding structure and common properties for serial
+attached Marvell Bluetooth devices. The following chips are included in
+this binding:
+
+* Marvell 88W8897 Bluetooth devices
+
+Required properties:
+ - compatible: should be:
+    "mrvl,88w8897"
+
+Optional properties:
+None so far
+
+Example:
+
+&serial0 {
+	compatible = "ns16550a";
+	...
+	bluetooth {
+		compatible = "mrvl,88w8897";
+	};
+};
-- 
cgit 


From 956f664635ecca59d027d57c6eb5a5cf4b711c9b Mon Sep 17 00:00:00 2001
From: Rocky Liao <rjliao@codeaurora.org>
Date: Thu, 6 Jun 2019 17:40:55 +0800
Subject: dt-bindings: net: bluetooth: Add device property firmware-name for
 QCA6174

This patch adds an optional device property "firmware-name" to allow the
driver to load customized nvm firmware file based on this property.

Signed-off-by: Rocky Liao <rjliao@codeaurora.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
index 7ef6118abd3d..68b67d9db63a 100644
--- a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
+++ b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
@@ -17,6 +17,7 @@ Optional properties for compatible string qcom,qca6174-bt:
 
  - enable-gpios: gpio specifier used to enable chip
  - clocks: clock provided to the controller (SUSCLK_32KHZ)
+ - firmware-name: specify the name of nvm firmware to load
 
 Required properties for compatible string qcom,wcn399x-bt:
 
@@ -28,6 +29,7 @@ Required properties for compatible string qcom,wcn399x-bt:
 Optional properties for compatible string qcom,wcn399x-bt:
 
  - max-speed: see Documentation/devicetree/bindings/serial/slave-device.txt
+ - firmware-name: specify the name of nvm firmware to load
 
 Examples:
 
@@ -40,6 +42,7 @@ serial@7570000 {
 
 		enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>;
 		clocks = <&divclk4>;
+		firmware-name = "nvm_00440302.bin";
 	};
 };
 
@@ -52,5 +55,6 @@ serial@898000 {
 		vddrf-supply = <&vreg_l17a_1p3>;
 		vddch0-supply = <&vreg_l25a_3p3>;
 		max-speed = <3200000>;
+		firmware-name = "crnv21.bin";
 	};
 };
-- 
cgit 


From 1c576f385a440cd8790516d6227235b4161c6668 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Sun, 2 Jun 2019 09:04:14 +0800
Subject: dt-bindings: net: bluetooth: add boot-gpios property to UART-based
 device

Not every platform has the pinctrl device integrates the GPIO the function
such as MT7621 whose pinctrl and GPIO are separate hardware so adding an
additional boot-gpios property for such platform allows them to bring up
the device.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 .../devicetree/bindings/net/mediatek-bluetooth.txt          | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/mediatek-bluetooth.txt b/Documentation/devicetree/bindings/net/mediatek-bluetooth.txt
index 41a7dcc80f5b..14f23b354a6d 100644
--- a/Documentation/devicetree/bindings/net/mediatek-bluetooth.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-bluetooth.txt
@@ -50,11 +50,24 @@ Required properties:
 		  "mediatek,mt7663u-bluetooth": for MT7663U device
 		  "mediatek,mt7668u-bluetooth": for MT7668U device
 - vcc-supply:	Main voltage regulator
+
+If the pin controller on the platform can support both pinmux and GPIO
+control such as the most of MediaTek platform. Please use below properties.
+
 - pinctrl-names: Should be "default", "runtime"
 - pinctrl-0: Should contain UART RXD low when the device is powered up to
 	     enter proper bootstrap mode.
 - pinctrl-1: Should contain UART mode pin ctrl
 
+Else, the pin controller on the platform only can support pinmux control and
+the GPIO control still has to rely on the dedicated GPIO controller such as
+a legacy MediaTek SoC, MT7621. Please use the below properties.
+
+- boot-gpios:	GPIO same to the pin as UART RXD and used to keep LOW when
+		the device is powered up to enter proper bootstrap mode when
+- pinctrl-names: Should be "default"
+- pinctrl-0: Should contain UART mode pin ctrl
+
 Optional properties:
 
 - reset-gpios:	GPIO used to reset the device whose initial state keeps low,
-- 
cgit 


From 14e3ed84d77e78fbf386006d3e5a0c20150681f5 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Sun, 2 Jun 2019 09:04:15 +0800
Subject: dt-bindings: net: bluetooth: add clock property to UART-based device

Some board requires explicitily control external osscilator via GPIO.
So, add a clock property for an external oscillator for the device.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 Documentation/devicetree/bindings/net/mediatek-bluetooth.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/mediatek-bluetooth.txt b/Documentation/devicetree/bindings/net/mediatek-bluetooth.txt
index 14f23b354a6d..112011c51d5e 100644
--- a/Documentation/devicetree/bindings/net/mediatek-bluetooth.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-bluetooth.txt
@@ -73,6 +73,10 @@ Optional properties:
 - reset-gpios:	GPIO used to reset the device whose initial state keeps low,
 		if the GPIO is missing, then board-level design should be
 		guaranteed.
+- clocks:	Should be the clock specifiers corresponding to the entry in
+		clock-names property. If the clock is missing, then board-level
+		design should be guaranteed.
+- clock-names:	Should contain "osc" entry for the external oscillator.
 - current-speed:  Current baud rate of the device whose defaults to 921600
 
 Example:
-- 
cgit 


From 86116f4d649bec311bbf3ee50a1eb582e3fcfd9c Mon Sep 17 00:00:00 2001
From: Pawel Dembicki <paweldembicki@gmail.com>
Date: Fri, 5 Jul 2019 00:29:04 +0200
Subject: net: dsa: Change DT bindings for Vitesse VSC73xx switches

This commit introduce how to use vsc73xx platform driver.

Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/dsa/vitesse,vsc73xx.txt           | 58 ++++++++++++++++++++--
 1 file changed, 54 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.txt b/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.txt
index ed4710c40641..bbf4a13f6d75 100644
--- a/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.txt
+++ b/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.txt
@@ -2,8 +2,8 @@ Vitesse VSC73xx Switches
 ========================
 
 This defines device tree bindings for the Vitesse VSC73xx switch chips.
-The Vitesse company has been acquired by Microsemi and Microsemi in turn
-acquired by Microchip but retains this vendor branding.
+The Vitesse company has been acquired by Microsemi and Microsemi has
+been acquired Microchip but retains this vendor branding.
 
 The currently supported switch chips are:
 Vitesse VSC7385 SparX-G5 5+1-port Integrated Gigabit Ethernet Switch
@@ -11,8 +11,14 @@ Vitesse VSC7388 SparX-G8 8-port Integrated Gigabit Ethernet Switch
 Vitesse VSC7395 SparX-G5e 5+1-port Integrated Gigabit Ethernet Switch
 Vitesse VSC7398 SparX-G8e 8-port Integrated Gigabit Ethernet Switch
 
-The device tree node is an SPI device so it must reside inside a SPI bus
-device tree node, see spi/spi-bus.txt
+This switch could have two different management interface.
+
+If SPI interface is used, the device tree node is an SPI device so it must
+reside inside a SPI bus device tree node, see spi/spi-bus.txt
+
+When the chip is connected to a parallel memory bus and work in memory-mapped
+I/O mode, a platform device is used to represent the vsc73xx. In this case it
+must reside inside a platform bus device tree node.
 
 Required properties:
 
@@ -38,6 +44,7 @@ and subnodes of DSA switches.
 
 Examples:
 
+SPI:
 switch@0 {
 	compatible = "vitesse,vsc7395";
 	reg = <0>;
@@ -79,3 +86,46 @@ switch@0 {
 		};
 	};
 };
+
+Platform:
+switch@2,0 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "vitesse,vsc7385";
+	reg = <0x2 0x0 0x20000>;
+	reset-gpios = <&gpio0 12 GPIO_ACTIVE_LOW>;
+
+	ports {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		port@0 {
+			reg = <0>;
+			label = "lan1";
+		};
+		port@1 {
+			reg = <1>;
+			label = "lan2";
+		};
+		port@2 {
+			reg = <2>;
+			label = "lan3";
+		};
+		port@3 {
+			reg = <3>;
+			label = "lan4";
+		};
+		vsc: port@6 {
+			reg = <6>;
+			label = "cpu";
+			ethernet = <&enet0>;
+			phy-mode = "rgmii";
+			fixed-link {
+				speed = <1000>;
+				full-duplex;
+				pause;
+			};
+		};
+	};
+
+};
-- 
cgit 


From 93bad0f5d15f3acd6d2ab0aee4a81ce1fcc6300d Mon Sep 17 00:00:00 2001
From: Hou Zhiqiang <Zhiqiang.Hou@nxp.com>
Date: Fri, 5 Jul 2019 17:56:40 +0800
Subject: dt-bindings: PCI: mobiveil: Change gpio_slave and apb_csr to optional

Change the "gpio_slave" and "apb_csr" to optional, the "gpio_slave"
is not used in current code, and "apb_csr" is not used by some platforms.

Signed-off-by: Hou Zhiqiang <Zhiqiang.Hou@nxp.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Subrahmanya Lingappa <l.subrahmanya@mobiveil.co.in>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Minghuan Lian <Minghuan.Lian@nxp.com>
Reviewed-by: Subrahmanya Lingappa <l.subrahmanya@mobiveil.co.in>
---
 Documentation/devicetree/bindings/pci/mobiveil-pcie.txt | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pci/mobiveil-pcie.txt b/Documentation/devicetree/bindings/pci/mobiveil-pcie.txt
index a618d4787dd7..64156993e052 100644
--- a/Documentation/devicetree/bindings/pci/mobiveil-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/mobiveil-pcie.txt
@@ -10,8 +10,10 @@ Required properties:
 	interrupt source. The value must be 1.
 - compatible: Should contain "mbvl,gpex40-pcie"
 - reg: Should contain PCIe registers location and length
+	Mandatory:
 	"config_axi_slave": PCIe controller registers
 	"csr_axi_slave"	  : Bridge config registers
+	Optional:
 	"gpio_slave"	  : GPIO registers to control slot power
 	"apb_csr"	  : MSI registers
 
-- 
cgit 


From f213fcf078c19d2b1b0876eb935cab20e311743e Mon Sep 17 00:00:00 2001
From: Jerry Hoemann <jerry.hoemann@hpe.com>
Date: Fri, 17 May 2019 14:59:42 -0600
Subject: watchdog/hpwdt: Update documentation

Update documentation to explain new module parameter kdumptimeout.

Signed-off-by: Jerry Hoemann <jerry.hoemann@hpe.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 Documentation/watchdog/hpwdt.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/watchdog/hpwdt.txt b/Documentation/watchdog/hpwdt.txt
index 55df692c5595..35da1413637e 100644
--- a/Documentation/watchdog/hpwdt.txt
+++ b/Documentation/watchdog/hpwdt.txt
@@ -33,6 +33,10 @@ Last reviewed: 08/20/2018
                Default value is set when compiling the kernel. If it is set
                to "Y", then there is no way of disabling the watchdog once
                it has been started.
+ kdumptimeout  Minimum timeout in seconds to apply upon receipt of an NMI
+               before calling panic. (-1) disables the watchdog.  When value
+               is > 0, the timer is reprogrammed with the greater of
+               value or current timeout value.
 
  NOTE: More information about watchdog drivers in general, including the ioctl
        interface to /dev/watchdog can be found in
-- 
cgit 


From dfc01e2c83776f4bcadf4ec0a5cd8407904cd9b3 Mon Sep 17 00:00:00 2001
From: Clément Péron <peron.clem@gmail.com>
Date: Thu, 23 May 2019 17:10:47 +0200
Subject: dt-bindings: watchdog: add Allwinner H6 watchdog
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allwinner H6 has a similar watchdog as the A64 which is already
a compatible of the A31.

This commit add the H6 compatible.

Signed-off-by: Clément Péron <peron.clem@gmail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt b/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt
index 46055254e8dd..e65198d82a2b 100644
--- a/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt
@@ -6,6 +6,7 @@ Required properties:
 	"allwinner,sun4i-a10-wdt"
 	"allwinner,sun6i-a31-wdt"
 	"allwinner,sun50i-a64-wdt","allwinner,sun6i-a31-wdt"
+	"allwinner,sun50i-h6-wdt","allwinner,sun6i-a31-wdt"
 	"allwinner,suniv-f1c100s-wdt", "allwinner,sun4i-a10-wdt"
 - reg : Specifies base physical address and size of the registers.
 
-- 
cgit 


From a3a400da206bd0cf426571633da51547d44f4f42 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 28 Jun 2019 09:30:28 -0300
Subject: docs: infiniband: add it to the driver-api bookset

While this contains some uAPI stuff, it was intended to be read by a
kernel doc. So, let's not move it to a different dir, but, instead, just
add it to the driver-api bookset.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 Documentation/index.rst            | 1 +
 Documentation/infiniband/index.rst | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/index.rst b/Documentation/index.rst
index a7566ef62411..869616b57aa8 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -90,6 +90,7 @@ needed).
 
    driver-api/index
    core-api/index
+   infiniband/index
    media/index
    networking/index
    input/index
diff --git a/Documentation/infiniband/index.rst b/Documentation/infiniband/index.rst
index 22eea64de722..9cd7615438b9 100644
--- a/Documentation/infiniband/index.rst
+++ b/Documentation/infiniband/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ==========
 InfiniBand
-- 
cgit 


From 8c21ead3ea5d896946fb5d85b52b0be757e9bef4 Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Mon, 27 May 2019 15:03:15 +0800
Subject: dt-bindings: watchdog: move i.MX system controller watchdog binding
 to SCU

i.MX system controller watchdog depends on SCU driver to support
interrupt function, so it needs to be subnode of SCU node in DT,
binding doc should be moved to fsl,scu.txt as well.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 .../devicetree/bindings/arm/freescale/fsl,scu.txt  | 15 ++++++++++++++
 .../bindings/watchdog/fsl-imx-sc-wdt.txt           | 24 ----------------------
 2 files changed, 15 insertions(+), 24 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/watchdog/fsl-imx-sc-wdt.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
index 5d7dbabbb784..1b56557df521 100644
--- a/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
@@ -133,6 +133,16 @@ RTC bindings based on SCU Message Protocol
 Required properties:
 - compatible: should be "fsl,imx8qxp-sc-rtc";
 
+Watchdog bindings based on SCU Message Protocol
+------------------------------------------------------------
+
+Required properties:
+- compatible: should be:
+              "fsl,imx8qxp-sc-wdt"
+              followed by "fsl,imx-sc-wdt";
+Optional properties:
+- timeout-sec: contains the watchdog timeout in seconds.
+
 Example (imx8qxp):
 -------------
 aliases {
@@ -185,6 +195,11 @@ firmware {
 		rtc: rtc {
 			compatible = "fsl,imx8qxp-sc-rtc";
 		};
+
+		watchdog {
+			compatible = "fsl,imx8qxp-sc-wdt", "fsl,imx-sc-wdt";
+			timeout-sec = <60>;
+		};
 	};
 };
 
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx-sc-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx-sc-wdt.txt
deleted file mode 100644
index 02b87e92ae68..000000000000
--- a/Documentation/devicetree/bindings/watchdog/fsl-imx-sc-wdt.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-* Freescale i.MX System Controller Watchdog
-
-i.MX system controller watchdog is for i.MX SoCs with system controller inside,
-the watchdog is managed by system controller, users can ONLY communicate with
-system controller from secure mode for watchdog operations, so Linux i.MX system
-controller watchdog driver will call ARM SMC API and trap into ARM-Trusted-Firmware
-for watchdog operations, ARM-Trusted-Firmware is running at secure EL3 mode and
-it will request system controller to execute the watchdog operation passed from
-Linux kernel.
-
-Required properties:
-- compatible:	Should be :
-		"fsl,imx8qxp-sc-wdt"
-		followed by "fsl,imx-sc-wdt";
-
-Optional properties:
-- timeout-sec : Contains the watchdog timeout in seconds.
-
-Examples:
-
-watchdog {
-	compatible = "fsl,imx8qxp-sc-wdt", "fsl,imx-sc-wdt";
-	timeout-sec = <60>;
-};
-- 
cgit 


From 4d1c6a0ec2d98e51f950127bf9299531caac53e1 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Date: Wed, 5 Jun 2019 14:06:41 +0000
Subject: watchdog: introduce watchdog.open_timeout commandline parameter

The watchdog framework takes care of feeding a hardware watchdog until
userspace opens /dev/watchdogN. If that never happens for some reason
(buggy init script, corrupt root filesystem or whatnot) but the kernel
itself is fine, the machine stays up indefinitely. This patch allows
setting an upper limit for how long the kernel will take care of the
watchdog, thus ensuring that the watchdog will eventually reset the
machine.

A value of 0 (the default) means infinite timeout, preserving the
current behaviour.

This is particularly useful for embedded devices where some fallback
logic is implemented in the bootloader (e.g., use a different root
partition, boot from network, ...).

There is already handle_boot_enabled serving a similar purpose. However,
such a binary choice is unsuitable if the hardware watchdog cannot be
programmed by the bootloader to provide a timeout long enough for
userspace to get up and running. Many of the embedded devices we see use
external (gpio-triggered) watchdogs with a fixed timeout of the order of
1-2 seconds.

The open timeout only applies for the first open from
userspace. Should userspace need to close the watchdog device, with
the intention of re-opening it shortly, the application can emulate
the open timeout feature by combining the nowayout feature with an
appropriate WDIOC_SETTIMEOUT immediately prior to closing the device.

Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 Documentation/watchdog/watchdog-parameters.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
index 0b88e333f9e1..32d3606caa65 100644
--- a/Documentation/watchdog/watchdog-parameters.txt
+++ b/Documentation/watchdog/watchdog-parameters.txt
@@ -8,6 +8,14 @@ See Documentation/admin-guide/kernel-parameters.rst for information on
 providing kernel parameters for builtin drivers versus loadable
 modules.
 
+The watchdog core parameter watchdog.open_timeout is the maximum time,
+in seconds, for which the watchdog framework will take care of pinging
+a running hardware watchdog until userspace opens the corresponding
+/dev/watchdogN device. A value of 0 (the default) means an infinite
+timeout. Setting this to a non-zero value can be useful to ensure that
+either userspace comes up properly, or the board gets reset and allows
+fallback logic in the bootloader to try something else.
+
 
 -------------------------------------------------
 acquirewdt:
-- 
cgit 


From 487e4e08221debb1ccf9cb2c249fac379b74cbb2 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Date: Wed, 5 Jun 2019 14:06:43 +0000
Subject: watchdog: introduce CONFIG_WATCHDOG_OPEN_TIMEOUT

This allows setting a default value for the watchdog.open_timeout
commandline parameter via Kconfig.

Some BSPs allow remote updating of the kernel image and root file
system, but updating the bootloader requires physical access. Hence, if
one has a firmware update that requires relaxing the
watchdog.open_timeout a little, the value used must be baked into the
kernel image itself and cannot come from the u-boot environment via the
kernel command line.

Being able to set the initial value in .config doesn't change the fact
that the value on the command line, if present, takes precedence, and is
of course immensely useful for development purposes while one has
console acccess, as well as usable in the cases where one can make a
permanent update of the kernel command line.

Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 Documentation/watchdog/watchdog-parameters.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
index 32d3606caa65..ec919dc895ff 100644
--- a/Documentation/watchdog/watchdog-parameters.txt
+++ b/Documentation/watchdog/watchdog-parameters.txt
@@ -11,10 +11,10 @@ modules.
 The watchdog core parameter watchdog.open_timeout is the maximum time,
 in seconds, for which the watchdog framework will take care of pinging
 a running hardware watchdog until userspace opens the corresponding
-/dev/watchdogN device. A value of 0 (the default) means an infinite
-timeout. Setting this to a non-zero value can be useful to ensure that
-either userspace comes up properly, or the board gets reset and allows
-fallback logic in the bootloader to try something else.
+/dev/watchdogN device. A value of 0 means an infinite timeout. Setting
+this to a non-zero value can be useful to ensure that either userspace
+comes up properly, or the board gets reset and allows fallback logic
+in the bootloader to try something else.
 
 
 -------------------------------------------------
-- 
cgit 


From 74665686f0e27496ea8aa47221282707f57dc178 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 12 Jun 2019 14:53:01 -0300
Subject: docs: watchdog: convert docs to ReST and rename to *.rst

Convert those documents and prepare them to be part of the kernel
API book, as most of the stuff there are related to the
Kernel interfaces.

Still, in the future, it would make sense to split the docs,
as some of the stuff is clearly focused on sysadmin tasks.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 Documentation/admin-guide/kernel-parameters.txt    |   2 +-
 Documentation/kernel-per-CPU-kthreads.txt          |   2 +-
 .../watchdog/convert_drivers_to_kernel_api.rst     | 219 ++++++
 .../watchdog/convert_drivers_to_kernel_api.txt     | 218 ------
 Documentation/watchdog/hpwdt.rst                   |  77 +++
 Documentation/watchdog/hpwdt.txt                   |  70 --
 Documentation/watchdog/index.rst                   |  25 +
 Documentation/watchdog/mlx-wdt.rst                 |  56 ++
 Documentation/watchdog/mlx-wdt.txt                 |  52 --
 Documentation/watchdog/pcwd-watchdog.rst           |  71 ++
 Documentation/watchdog/pcwd-watchdog.txt           |  66 --
 Documentation/watchdog/watchdog-api.rst            | 271 ++++++++
 Documentation/watchdog/watchdog-api.txt            | 237 -------
 Documentation/watchdog/watchdog-kernel-api.rst     | 338 ++++++++++
 Documentation/watchdog/watchdog-kernel-api.txt     | 305 ---------
 Documentation/watchdog/watchdog-parameters.rst     | 745 +++++++++++++++++++++
 Documentation/watchdog/watchdog-parameters.txt     | 418 ------------
 Documentation/watchdog/watchdog-pm.rst             |  22 +
 Documentation/watchdog/watchdog-pm.txt             |  19 -
 Documentation/watchdog/wdt.rst                     |  63 ++
 Documentation/watchdog/wdt.txt                     |  50 --
 21 files changed, 1889 insertions(+), 1437 deletions(-)
 create mode 100644 Documentation/watchdog/convert_drivers_to_kernel_api.rst
 delete mode 100644 Documentation/watchdog/convert_drivers_to_kernel_api.txt
 create mode 100644 Documentation/watchdog/hpwdt.rst
 delete mode 100644 Documentation/watchdog/hpwdt.txt
 create mode 100644 Documentation/watchdog/index.rst
 create mode 100644 Documentation/watchdog/mlx-wdt.rst
 delete mode 100644 Documentation/watchdog/mlx-wdt.txt
 create mode 100644 Documentation/watchdog/pcwd-watchdog.rst
 delete mode 100644 Documentation/watchdog/pcwd-watchdog.txt
 create mode 100644 Documentation/watchdog/watchdog-api.rst
 delete mode 100644 Documentation/watchdog/watchdog-api.txt
 create mode 100644 Documentation/watchdog/watchdog-kernel-api.rst
 delete mode 100644 Documentation/watchdog/watchdog-kernel-api.txt
 create mode 100644 Documentation/watchdog/watchdog-parameters.rst
 delete mode 100644 Documentation/watchdog/watchdog-parameters.txt
 create mode 100644 Documentation/watchdog/watchdog-pm.rst
 delete mode 100644 Documentation/watchdog/watchdog-pm.txt
 create mode 100644 Documentation/watchdog/wdt.rst
 delete mode 100644 Documentation/watchdog/wdt.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..da4c05072cfb 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5162,7 +5162,7 @@
 			Default: 3 = cyan.
 
 	watchdog timers	[HW,WDT] For information on watchdog timers,
-			see Documentation/watchdog/watchdog-parameters.txt
+			see Documentation/watchdog/watchdog-parameters.rst
 			or other driver-specific files in the
 			Documentation/watchdog/ directory.
 
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index 23b0c8b20cd1..5623b9916411 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -348,7 +348,7 @@ To reduce its OS jitter, do at least one of the following:
 2.	Boot with "nosoftlockup=0", which will also prevent these kthreads
 	from being created.  Other related watchdog and softlockup boot
 	parameters may be found in Documentation/admin-guide/kernel-parameters.rst
-	and Documentation/watchdog/watchdog-parameters.txt.
+	and Documentation/watchdog/watchdog-parameters.rst.
 3.	Echo a zero to /proc/sys/kernel/watchdog to disable the
 	watchdog timer.
 4.	Echo a large number of /proc/sys/kernel/watchdog_thresh in
diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.rst b/Documentation/watchdog/convert_drivers_to_kernel_api.rst
new file mode 100644
index 000000000000..dd934cc08e40
--- /dev/null
+++ b/Documentation/watchdog/convert_drivers_to_kernel_api.rst
@@ -0,0 +1,219 @@
+=========================================================
+Converting old watchdog drivers to the watchdog framework
+=========================================================
+
+by Wolfram Sang <w.sang@pengutronix.de>
+
+Before the watchdog framework came into the kernel, every driver had to
+implement the API on its own. Now, as the framework factored out the common
+components, those drivers can be lightened making it a user of the framework.
+This document shall guide you for this task. The necessary steps are described
+as well as things to look out for.
+
+
+Remove the file_operations struct
+---------------------------------
+
+Old drivers define their own file_operations for actions like open(), write(),
+etc... These are now handled by the framework and just call the driver when
+needed. So, in general, the 'file_operations' struct and assorted functions can
+go. Only very few driver-specific details have to be moved to other functions.
+Here is a overview of the functions and probably needed actions:
+
+- open: Everything dealing with resource management (file-open checks, magic
+  close preparations) can simply go. Device specific stuff needs to go to the
+  driver specific start-function. Note that for some drivers, the start-function
+  also serves as the ping-function. If that is the case and you need start/stop
+  to be balanced (clocks!), you are better off refactoring a separate start-function.
+
+- close: Same hints as for open apply.
+
+- write: Can simply go, all defined behaviour is taken care of by the framework,
+  i.e. ping on write and magic char ('V') handling.
+
+- ioctl: While the driver is allowed to have extensions to the IOCTL interface,
+  the most common ones are handled by the framework, supported by some assistance
+  from the driver:
+
+	WDIOC_GETSUPPORT:
+		Returns the mandatory watchdog_info struct from the driver
+
+	WDIOC_GETSTATUS:
+		Needs the status-callback defined, otherwise returns 0
+
+	WDIOC_GETBOOTSTATUS:
+		Needs the bootstatus member properly set. Make sure it is 0 if you
+		don't have further support!
+
+	WDIOC_SETOPTIONS:
+		No preparations needed
+
+	WDIOC_KEEPALIVE:
+		If wanted, options in watchdog_info need to have WDIOF_KEEPALIVEPING
+		set
+
+	WDIOC_SETTIMEOUT:
+		Options in watchdog_info need to have WDIOF_SETTIMEOUT set
+		and a set_timeout-callback has to be defined. The core will also
+		do limit-checking, if min_timeout and max_timeout in the watchdog
+		device are set. All is optional.
+
+	WDIOC_GETTIMEOUT:
+		No preparations needed
+
+	WDIOC_GETTIMELEFT:
+		It needs get_timeleft() callback to be defined. Otherwise it
+		will return EOPNOTSUPP
+
+  Other IOCTLs can be served using the ioctl-callback. Note that this is mainly
+  intended for porting old drivers; new drivers should not invent private IOCTLs.
+  Private IOCTLs are processed first. When the callback returns with
+  -ENOIOCTLCMD, the IOCTLs of the framework will be tried, too. Any other error
+  is directly given to the user.
+
+Example conversion::
+
+  -static const struct file_operations s3c2410wdt_fops = {
+  -       .owner          = THIS_MODULE,
+  -       .llseek         = no_llseek,
+  -       .write          = s3c2410wdt_write,
+  -       .unlocked_ioctl = s3c2410wdt_ioctl,
+  -       .open           = s3c2410wdt_open,
+  -       .release        = s3c2410wdt_release,
+  -};
+
+Check the functions for device-specific stuff and keep it for later
+refactoring. The rest can go.
+
+
+Remove the miscdevice
+---------------------
+
+Since the file_operations are gone now, you can also remove the 'struct
+miscdevice'. The framework will create it on watchdog_dev_register() called by
+watchdog_register_device()::
+
+  -static struct miscdevice s3c2410wdt_miscdev = {
+  -       .minor          = WATCHDOG_MINOR,
+  -       .name           = "watchdog",
+  -       .fops           = &s3c2410wdt_fops,
+  -};
+
+
+Remove obsolete includes and defines
+------------------------------------
+
+Because of the simplifications, a few defines are probably unused now. Remove
+them. Includes can be removed, too. For example::
+
+  - #include <linux/fs.h>
+  - #include <linux/miscdevice.h> (if MODULE_ALIAS_MISCDEV is not used)
+  - #include <linux/uaccess.h> (if no custom IOCTLs are used)
+
+
+Add the watchdog operations
+---------------------------
+
+All possible callbacks are defined in 'struct watchdog_ops'. You can find it
+explained in 'watchdog-kernel-api.txt' in this directory. start(), stop() and
+owner must be set, the rest are optional. You will easily find corresponding
+functions in the old driver. Note that you will now get a pointer to the
+watchdog_device as a parameter to these functions, so you probably have to
+change the function header. Other changes are most likely not needed, because
+here simply happens the direct hardware access. If you have device-specific
+code left from the above steps, it should be refactored into these callbacks.
+
+Here is a simple example::
+
+  +static struct watchdog_ops s3c2410wdt_ops = {
+  +       .owner = THIS_MODULE,
+  +       .start = s3c2410wdt_start,
+  +       .stop = s3c2410wdt_stop,
+  +       .ping = s3c2410wdt_keepalive,
+  +       .set_timeout = s3c2410wdt_set_heartbeat,
+  +};
+
+A typical function-header change looks like::
+
+  -static void s3c2410wdt_keepalive(void)
+  +static int s3c2410wdt_keepalive(struct watchdog_device *wdd)
+   {
+  ...
+  +
+  +       return 0;
+   }
+
+  ...
+
+  -       s3c2410wdt_keepalive();
+  +       s3c2410wdt_keepalive(&s3c2410_wdd);
+
+
+Add the watchdog device
+-----------------------
+
+Now we need to create a 'struct watchdog_device' and populate it with the
+necessary information for the framework. The struct is also explained in detail
+in 'watchdog-kernel-api.txt' in this directory. We pass it the mandatory
+watchdog_info struct and the newly created watchdog_ops. Often, old drivers
+have their own record-keeping for things like bootstatus and timeout using
+static variables. Those have to be converted to use the members in
+watchdog_device. Note that the timeout values are unsigned int. Some drivers
+use signed int, so this has to be converted, too.
+
+Here is a simple example for a watchdog device::
+
+  +static struct watchdog_device s3c2410_wdd = {
+  +       .info = &s3c2410_wdt_ident,
+  +       .ops = &s3c2410wdt_ops,
+  +};
+
+
+Handle the 'nowayout' feature
+-----------------------------
+
+A few drivers use nowayout statically, i.e. there is no module parameter for it
+and only CONFIG_WATCHDOG_NOWAYOUT determines if the feature is going to be
+used. This needs to be converted by initializing the status variable of the
+watchdog_device like this::
+
+        .status = WATCHDOG_NOWAYOUT_INIT_STATUS,
+
+Most drivers, however, also allow runtime configuration of nowayout, usually
+by adding a module parameter. The conversion for this would be something like::
+
+	watchdog_set_nowayout(&s3c2410_wdd, nowayout);
+
+The module parameter itself needs to stay, everything else related to nowayout
+can go, though. This will likely be some code in open(), close() or write().
+
+
+Register the watchdog device
+----------------------------
+
+Replace misc_register(&miscdev) with watchdog_register_device(&watchdog_dev).
+Make sure the return value gets checked and the error message, if present,
+still fits. Also convert the unregister case::
+
+  -       ret = misc_register(&s3c2410wdt_miscdev);
+  +       ret = watchdog_register_device(&s3c2410_wdd);
+
+  ...
+
+  -       misc_deregister(&s3c2410wdt_miscdev);
+  +       watchdog_unregister_device(&s3c2410_wdd);
+
+
+Update the Kconfig-entry
+------------------------
+
+The entry for the driver now needs to select WATCHDOG_CORE:
+
+  +       select WATCHDOG_CORE
+
+
+Create a patch and send it to upstream
+--------------------------------------
+
+Make sure you understood Documentation/process/submitting-patches.rst and send your patch to
+linux-watchdog@vger.kernel.org. We are looking forward to it :)
diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.txt b/Documentation/watchdog/convert_drivers_to_kernel_api.txt
deleted file mode 100644
index 9fffb2958d13..000000000000
--- a/Documentation/watchdog/convert_drivers_to_kernel_api.txt
+++ /dev/null
@@ -1,218 +0,0 @@
-Converting old watchdog drivers to the watchdog framework
-by Wolfram Sang <w.sang@pengutronix.de>
-=========================================================
-
-Before the watchdog framework came into the kernel, every driver had to
-implement the API on its own. Now, as the framework factored out the common
-components, those drivers can be lightened making it a user of the framework.
-This document shall guide you for this task. The necessary steps are described
-as well as things to look out for.
-
-
-Remove the file_operations struct
----------------------------------
-
-Old drivers define their own file_operations for actions like open(), write(),
-etc... These are now handled by the framework and just call the driver when
-needed. So, in general, the 'file_operations' struct and assorted functions can
-go. Only very few driver-specific details have to be moved to other functions.
-Here is a overview of the functions and probably needed actions:
-
-- open: Everything dealing with resource management (file-open checks, magic
-  close preparations) can simply go. Device specific stuff needs to go to the
-  driver specific start-function. Note that for some drivers, the start-function
-  also serves as the ping-function. If that is the case and you need start/stop
-  to be balanced (clocks!), you are better off refactoring a separate start-function.
-
-- close: Same hints as for open apply.
-
-- write: Can simply go, all defined behaviour is taken care of by the framework,
-  i.e. ping on write and magic char ('V') handling.
-
-- ioctl: While the driver is allowed to have extensions to the IOCTL interface,
-  the most common ones are handled by the framework, supported by some assistance
-  from the driver:
-
-	WDIOC_GETSUPPORT:
-		Returns the mandatory watchdog_info struct from the driver
-
-	WDIOC_GETSTATUS:
-		Needs the status-callback defined, otherwise returns 0
-
-	WDIOC_GETBOOTSTATUS:
-		Needs the bootstatus member properly set. Make sure it is 0 if you
-		don't have further support!
-
-	WDIOC_SETOPTIONS:
-		No preparations needed
-
-	WDIOC_KEEPALIVE:
-		If wanted, options in watchdog_info need to have WDIOF_KEEPALIVEPING
-		set
-
-	WDIOC_SETTIMEOUT:
-		Options in watchdog_info need to have WDIOF_SETTIMEOUT set
-		and a set_timeout-callback has to be defined. The core will also
-		do limit-checking, if min_timeout and max_timeout in the watchdog
-		device are set. All is optional.
-
-	WDIOC_GETTIMEOUT:
-		No preparations needed
-
-	WDIOC_GETTIMELEFT:
-		It needs get_timeleft() callback to be defined. Otherwise it
-		will return EOPNOTSUPP
-
-  Other IOCTLs can be served using the ioctl-callback. Note that this is mainly
-  intended for porting old drivers; new drivers should not invent private IOCTLs.
-  Private IOCTLs are processed first. When the callback returns with
-  -ENOIOCTLCMD, the IOCTLs of the framework will be tried, too. Any other error
-  is directly given to the user.
-
-Example conversion:
-
--static const struct file_operations s3c2410wdt_fops = {
--       .owner          = THIS_MODULE,
--       .llseek         = no_llseek,
--       .write          = s3c2410wdt_write,
--       .unlocked_ioctl = s3c2410wdt_ioctl,
--       .open           = s3c2410wdt_open,
--       .release        = s3c2410wdt_release,
--};
-
-Check the functions for device-specific stuff and keep it for later
-refactoring. The rest can go.
-
-
-Remove the miscdevice
----------------------
-
-Since the file_operations are gone now, you can also remove the 'struct
-miscdevice'. The framework will create it on watchdog_dev_register() called by
-watchdog_register_device().
-
--static struct miscdevice s3c2410wdt_miscdev = {
--       .minor          = WATCHDOG_MINOR,
--       .name           = "watchdog",
--       .fops           = &s3c2410wdt_fops,
--};
-
-
-Remove obsolete includes and defines
-------------------------------------
-
-Because of the simplifications, a few defines are probably unused now. Remove
-them. Includes can be removed, too. For example:
-
-- #include <linux/fs.h>
-- #include <linux/miscdevice.h> (if MODULE_ALIAS_MISCDEV is not used)
-- #include <linux/uaccess.h> (if no custom IOCTLs are used)
-
-
-Add the watchdog operations
----------------------------
-
-All possible callbacks are defined in 'struct watchdog_ops'. You can find it
-explained in 'watchdog-kernel-api.txt' in this directory. start(), stop() and
-owner must be set, the rest are optional. You will easily find corresponding
-functions in the old driver. Note that you will now get a pointer to the
-watchdog_device as a parameter to these functions, so you probably have to
-change the function header. Other changes are most likely not needed, because
-here simply happens the direct hardware access. If you have device-specific
-code left from the above steps, it should be refactored into these callbacks.
-
-Here is a simple example:
-
-+static struct watchdog_ops s3c2410wdt_ops = {
-+       .owner = THIS_MODULE,
-+       .start = s3c2410wdt_start,
-+       .stop = s3c2410wdt_stop,
-+       .ping = s3c2410wdt_keepalive,
-+       .set_timeout = s3c2410wdt_set_heartbeat,
-+};
-
-A typical function-header change looks like:
-
--static void s3c2410wdt_keepalive(void)
-+static int s3c2410wdt_keepalive(struct watchdog_device *wdd)
- {
-...
-+
-+       return 0;
- }
-
-...
-
--       s3c2410wdt_keepalive();
-+       s3c2410wdt_keepalive(&s3c2410_wdd);
-
-
-Add the watchdog device
------------------------
-
-Now we need to create a 'struct watchdog_device' and populate it with the
-necessary information for the framework. The struct is also explained in detail
-in 'watchdog-kernel-api.txt' in this directory. We pass it the mandatory
-watchdog_info struct and the newly created watchdog_ops. Often, old drivers
-have their own record-keeping for things like bootstatus and timeout using
-static variables. Those have to be converted to use the members in
-watchdog_device. Note that the timeout values are unsigned int. Some drivers
-use signed int, so this has to be converted, too.
-
-Here is a simple example for a watchdog device:
-
-+static struct watchdog_device s3c2410_wdd = {
-+       .info = &s3c2410_wdt_ident,
-+       .ops = &s3c2410wdt_ops,
-+};
-
-
-Handle the 'nowayout' feature
------------------------------
-
-A few drivers use nowayout statically, i.e. there is no module parameter for it
-and only CONFIG_WATCHDOG_NOWAYOUT determines if the feature is going to be
-used. This needs to be converted by initializing the status variable of the
-watchdog_device like this:
-
-        .status = WATCHDOG_NOWAYOUT_INIT_STATUS,
-
-Most drivers, however, also allow runtime configuration of nowayout, usually
-by adding a module parameter. The conversion for this would be something like:
-
-	watchdog_set_nowayout(&s3c2410_wdd, nowayout);
-
-The module parameter itself needs to stay, everything else related to nowayout
-can go, though. This will likely be some code in open(), close() or write().
-
-
-Register the watchdog device
-----------------------------
-
-Replace misc_register(&miscdev) with watchdog_register_device(&watchdog_dev).
-Make sure the return value gets checked and the error message, if present,
-still fits. Also convert the unregister case.
-
--       ret = misc_register(&s3c2410wdt_miscdev);
-+       ret = watchdog_register_device(&s3c2410_wdd);
-
-...
-
--       misc_deregister(&s3c2410wdt_miscdev);
-+       watchdog_unregister_device(&s3c2410_wdd);
-
-
-Update the Kconfig-entry
-------------------------
-
-The entry for the driver now needs to select WATCHDOG_CORE:
-
-+       select WATCHDOG_CORE
-
-
-Create a patch and send it to upstream
---------------------------------------
-
-Make sure you understood Documentation/process/submitting-patches.rst and send your patch to
-linux-watchdog@vger.kernel.org. We are looking forward to it :)
-
diff --git a/Documentation/watchdog/hpwdt.rst b/Documentation/watchdog/hpwdt.rst
new file mode 100644
index 000000000000..a1a12bc58577
--- /dev/null
+++ b/Documentation/watchdog/hpwdt.rst
@@ -0,0 +1,77 @@
+===========================
+HPE iLO NMI Watchdog Driver
+===========================
+
+for iLO based ProLiant Servers
+==============================
+
+Last reviewed: 08/20/2018
+
+
+ The HPE iLO NMI Watchdog driver is a kernel module that provides basic
+ watchdog functionality and handler for the iLO "Generate NMI to System"
+ virtual button.
+
+ All references to iLO in this document imply it also works on iLO2 and all
+ subsequent generations.
+
+ Watchdog functionality is enabled like any other common watchdog driver. That
+ is, an application needs to be started that kicks off the watchdog timer. A
+ basic application exists in tools/testing/selftests/watchdog/ named
+ watchdog-test.c. Simply compile the C file and kick it off. If the system
+ gets into a bad state and hangs, the HPE ProLiant iLO timer register will
+ not be updated in a timely fashion and a hardware system reset (also known as
+ an Automatic Server Recovery (ASR)) event will occur.
+
+ The hpwdt driver also has the following module parameters:
+
+ ============  ================================================================
+ soft_margin   allows the user to set the watchdog timer value.
+               Default value is 30 seconds.
+ timeout       an alias of soft_margin.
+ pretimeout    allows the user to set the watchdog pretimeout value.
+               This is the number of seconds before timeout when an
+               NMI is delivered to the system. Setting the value to
+               zero disables the pretimeout NMI.
+               Default value is 9 seconds.
+ nowayout      basic watchdog parameter that does not allow the timer to
+               be restarted or an impending ASR to be escaped.
+               Default value is set when compiling the kernel. If it is set
+               to "Y", then there is no way of disabling the watchdog once
+               it has been started.
+ kdumptimeout  Minimum timeout in seconds to apply upon receipt of an NMI
+               before calling panic. (-1) disables the watchdog.  When value
+               is > 0, the timer is reprogrammed with the greater of
+               value or current timeout value.
+ ============  ================================================================
+
+ NOTE:
+       More information about watchdog drivers in general, including the ioctl
+       interface to /dev/watchdog can be found in
+       Documentation/watchdog/watchdog-api.rst and Documentation/IPMI.txt.
+
+ Due to limitations in the iLO hardware, the NMI pretimeout if enabled,
+ can only be set to 9 seconds.  Attempts to set pretimeout to other
+ non-zero values will be rounded, possibly to zero.  Users should verify
+ the pretimeout value after attempting to set pretimeout or timeout.
+
+ Upon receipt of an NMI from the iLO, the hpwdt driver will initiate a
+ panic. This is to allow for a crash dump to be collected.  It is incumbent
+ upon the user to have properly configured the system for kdump.
+
+ The default Linux kernel behavior upon panic is to print a kernel tombstone
+ and loop forever.  This is generally not what a watchdog user wants.
+
+ For those wishing to learn more please see:
+	Documentation/kdump/kdump.txt
+	Documentation/admin-guide/kernel-parameters.txt (panic=)
+	Your Linux Distribution specific documentation.
+
+ If the hpwdt does not receive the NMI associated with an expiring timer,
+ the iLO will proceed to reset the system at timeout if the timer hasn't
+ been updated.
+
+--
+
+ The HPE iLO NMI Watchdog Driver and documentation were originally developed
+ by Tom Mingarelli.
diff --git a/Documentation/watchdog/hpwdt.txt b/Documentation/watchdog/hpwdt.txt
deleted file mode 100644
index 35da1413637e..000000000000
--- a/Documentation/watchdog/hpwdt.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-Last reviewed: 08/20/2018
-
-                     HPE iLO NMI Watchdog Driver
-                   for iLO based ProLiant Servers
-
- The HPE iLO NMI Watchdog driver is a kernel module that provides basic
- watchdog functionality and handler for the iLO "Generate NMI to System"
- virtual button.
-
- All references to iLO in this document imply it also works on iLO2 and all
- subsequent generations.
-
- Watchdog functionality is enabled like any other common watchdog driver. That
- is, an application needs to be started that kicks off the watchdog timer. A
- basic application exists in tools/testing/selftests/watchdog/ named
- watchdog-test.c. Simply compile the C file and kick it off. If the system
- gets into a bad state and hangs, the HPE ProLiant iLO timer register will
- not be updated in a timely fashion and a hardware system reset (also known as
- an Automatic Server Recovery (ASR)) event will occur.
-
- The hpwdt driver also has the following module parameters:
-
- soft_margin - allows the user to set the watchdog timer value.
-               Default value is 30 seconds.
- timeout     - an alias of soft_margin.
- pretimeout  - allows the user to set the watchdog pretimeout value.
-               This is the number of seconds before timeout when an
-               NMI is delivered to the system. Setting the value to
-               zero disables the pretimeout NMI.
-               Default value is 9 seconds.
- nowayout    - basic watchdog parameter that does not allow the timer to
-               be restarted or an impending ASR to be escaped.
-               Default value is set when compiling the kernel. If it is set
-               to "Y", then there is no way of disabling the watchdog once
-               it has been started.
- kdumptimeout  Minimum timeout in seconds to apply upon receipt of an NMI
-               before calling panic. (-1) disables the watchdog.  When value
-               is > 0, the timer is reprogrammed with the greater of
-               value or current timeout value.
-
- NOTE: More information about watchdog drivers in general, including the ioctl
-       interface to /dev/watchdog can be found in
-       Documentation/watchdog/watchdog-api.txt and Documentation/IPMI.txt.
-
- Due to limitations in the iLO hardware, the NMI pretimeout if enabled,
- can only be set to 9 seconds.  Attempts to set pretimeout to other
- non-zero values will be rounded, possibly to zero.  Users should verify
- the pretimeout value after attempting to set pretimeout or timeout.
-
- Upon receipt of an NMI from the iLO, the hpwdt driver will initiate a
- panic. This is to allow for a crash dump to be collected.  It is incumbent
- upon the user to have properly configured the system for kdump.
-
- The default Linux kernel behavior upon panic is to print a kernel tombstone
- and loop forever.  This is generally not what a watchdog user wants.
-
- For those wishing to learn more please see:
-	Documentation/kdump/kdump.txt
-	Documentation/admin-guide/kernel-parameters.txt (panic=)
-	Your Linux Distribution specific documentation.
-
- If the hpwdt does not receive the NMI associated with an expiring timer,
- the iLO will proceed to reset the system at timeout if the timer hasn't
- been updated.
-
---
-
- The HPE iLO NMI Watchdog Driver and documentation were originally developed
- by Tom Mingarelli.
-
diff --git a/Documentation/watchdog/index.rst b/Documentation/watchdog/index.rst
new file mode 100644
index 000000000000..33a0de631e84
--- /dev/null
+++ b/Documentation/watchdog/index.rst
@@ -0,0 +1,25 @@
+:orphan:
+
+======================
+Linux Watchdog Support
+======================
+
+.. toctree::
+    :maxdepth: 1
+
+    hpwdt
+    mlx-wdt
+    pcwd-watchdog
+    watchdog-api
+    watchdog-kernel-api
+    watchdog-parameters
+    watchdog-pm
+    wdt
+    convert_drivers_to_kernel_api
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/watchdog/mlx-wdt.rst b/Documentation/watchdog/mlx-wdt.rst
new file mode 100644
index 000000000000..bf5bafac47f0
--- /dev/null
+++ b/Documentation/watchdog/mlx-wdt.rst
@@ -0,0 +1,56 @@
+=========================
+Mellanox watchdog drivers
+=========================
+
+for x86 based system switches
+=============================
+
+This driver provides watchdog functionality for various Mellanox
+Ethernet and Infiniband switch systems.
+
+Mellanox watchdog device is implemented in a programmable logic device.
+
+There are 2 types of HW watchdog implementations.
+
+Type 1:
+  Actual HW timeout can be defined as a power of 2 msec.
+  e.g. timeout 20 sec will be rounded up to 32768 msec.
+  The maximum timeout period is 32 sec (32768 msec.),
+  Get time-left isn't supported
+
+Type 2:
+  Actual HW timeout is defined in sec. and it's the same as
+  a user-defined timeout.
+  Maximum timeout is 255 sec.
+  Get time-left is supported.
+
+Type 1 HW watchdog implementation exist in old systems and
+all new systems have type 2 HW watchdog.
+Two types of HW implementation have also different register map.
+
+Mellanox system can have 2 watchdogs: main and auxiliary.
+Main and auxiliary watchdog devices can be enabled together
+on the same system.
+There are several actions that can be defined in the watchdog:
+system reset, start fans on full speed and increase register counter.
+The last 2 actions are performed without a system reset.
+Actions without reset are provided for auxiliary watchdog device,
+which is optional.
+Watchdog can be started during a probe, in this case it will be
+pinged by watchdog core before watchdog device will be opened by
+user space application.
+Watchdog can be initialised in nowayout way, i.e. oncse started
+it can't be stopped.
+
+This mlx-wdt driver supports both HW watchdog implementations.
+
+Watchdog driver is probed from the common mlx_platform driver.
+Mlx_platform driver provides an appropriate set of registers for
+Mellanox watchdog device, identity name (mlx-wdt-main or mlx-wdt-aux),
+initial timeout, performed action in expiration and configuration flags.
+watchdog configuration flags: nowayout and start_at_boot, hw watchdog
+version - type1 or type2.
+The driver checks during initialization if the previous system reset
+was done by the watchdog. If yes, it makes a notification about this event.
+
+Access to HW registers is performed through a generic regmap interface.
diff --git a/Documentation/watchdog/mlx-wdt.txt b/Documentation/watchdog/mlx-wdt.txt
deleted file mode 100644
index 66eeb78505c3..000000000000
--- a/Documentation/watchdog/mlx-wdt.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-		Mellanox watchdog drivers
-		for x86 based system switches
-
-This driver provides watchdog functionality for various Mellanox
-Ethernet and Infiniband switch systems.
-
-Mellanox watchdog device is implemented in a programmable logic device.
-
-There are 2 types of HW watchdog implementations.
-
-Type 1:
-Actual HW timeout can be defined as a power of 2 msec.
-e.g. timeout 20 sec will be rounded up to 32768 msec.
-The maximum timeout period is 32 sec (32768 msec.),
-Get time-left isn't supported
-
-Type 2:
-Actual HW timeout is defined in sec. and it's the same as
-a user-defined timeout.
-Maximum timeout is 255 sec.
-Get time-left is supported.
-
-Type 1 HW watchdog implementation exist in old systems and
-all new systems have type 2 HW watchdog.
-Two types of HW implementation have also different register map.
-
-Mellanox system can have 2 watchdogs: main and auxiliary.
-Main and auxiliary watchdog devices can be enabled together
-on the same system.
-There are several actions that can be defined in the watchdog:
-system reset, start fans on full speed and increase register counter.
-The last 2 actions are performed without a system reset.
-Actions without reset are provided for auxiliary watchdog device,
-which is optional.
-Watchdog can be started during a probe, in this case it will be
-pinged by watchdog core before watchdog device will be opened by
-user space application.
-Watchdog can be initialised in nowayout way, i.e. oncse started
-it can't be stopped.
-
-This mlx-wdt driver supports both HW watchdog implementations.
-
-Watchdog driver is probed from the common mlx_platform driver.
-Mlx_platform driver provides an appropriate set of registers for
-Mellanox watchdog device, identity name (mlx-wdt-main or mlx-wdt-aux),
-initial timeout, performed action in expiration and configuration flags.
-watchdog configuration flags: nowayout and start_at_boot, hw watchdog
-version - type1 or type2.
-The driver checks during initialization if the previous system reset
-was done by the watchdog. If yes, it makes a notification about this event.
-
-Access to HW registers is performed through a generic regmap interface.
diff --git a/Documentation/watchdog/pcwd-watchdog.rst b/Documentation/watchdog/pcwd-watchdog.rst
new file mode 100644
index 000000000000..405e2a370082
--- /dev/null
+++ b/Documentation/watchdog/pcwd-watchdog.rst
@@ -0,0 +1,71 @@
+===================================
+Berkshire Products PC Watchdog Card
+===================================
+
+Last reviewed: 10/05/2007
+
+Support for ISA Cards  Revision A and C
+=======================================
+
+Documentation and Driver by Ken Hollis <kenji@bitgate.com>
+
+ The PC Watchdog is a card that offers the same type of functionality that
+ the WDT card does, only it doesn't require an IRQ to run.  Furthermore,
+ the Revision C card allows you to monitor any IO Port to automatically
+ trigger the card into being reset.  This way you can make the card
+ monitor hard drive status, or anything else you need.
+
+ The Watchdog Driver has one basic role: to talk to the card and send
+ signals to it so it doesn't reset your computer ... at least during
+ normal operation.
+
+ The Watchdog Driver will automatically find your watchdog card, and will
+ attach a running driver for use with that card.  After the watchdog
+ drivers have initialized, you can then talk to the card using a PC
+ Watchdog program.
+
+ I suggest putting a "watchdog -d" before the beginning of an fsck, and
+ a "watchdog -e -t 1" immediately after the end of an fsck.  (Remember
+ to run the program with an "&" to run it in the background!)
+
+ If you want to write a program to be compatible with the PC Watchdog
+ driver, simply use of modify the watchdog test program:
+ tools/testing/selftests/watchdog/watchdog-test.c
+
+
+ Other IOCTL functions include:
+
+	WDIOC_GETSUPPORT
+		This returns the support of the card itself.  This
+		returns in structure "PCWDS" which returns:
+
+			options = WDIOS_TEMPPANIC
+				  (This card supports temperature)
+			firmware_version = xxxx
+				  (Firmware version of the card)
+
+	WDIOC_GETSTATUS
+		This returns the status of the card, with the bits of
+		WDIOF_* bitwise-anded into the value.  (The comments
+		are in linux/pcwd.h)
+
+	WDIOC_GETBOOTSTATUS
+		This returns the status of the card that was reported
+		at bootup.
+
+	WDIOC_GETTEMP
+		This returns the temperature of the card.  (You can also
+		read /dev/watchdog, which gives a temperature update
+		every second.)
+
+	WDIOC_SETOPTIONS
+		This lets you set the options of the card.  You can either
+		enable or disable the card this way.
+
+	WDIOC_KEEPALIVE
+		This pings the card to tell it not to reset your computer.
+
+ And that's all she wrote!
+
+ -- Ken Hollis
+    (kenji@bitgate.com)
diff --git a/Documentation/watchdog/pcwd-watchdog.txt b/Documentation/watchdog/pcwd-watchdog.txt
deleted file mode 100644
index b8e60a441a43..000000000000
--- a/Documentation/watchdog/pcwd-watchdog.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-Last reviewed: 10/05/2007
-
-                     Berkshire Products PC Watchdog Card
-                   Support for ISA Cards  Revision A and C
-           Documentation and Driver by Ken Hollis <kenji@bitgate.com>
-
- The PC Watchdog is a card that offers the same type of functionality that
- the WDT card does, only it doesn't require an IRQ to run.  Furthermore,
- the Revision C card allows you to monitor any IO Port to automatically
- trigger the card into being reset.  This way you can make the card
- monitor hard drive status, or anything else you need.
-
- The Watchdog Driver has one basic role: to talk to the card and send
- signals to it so it doesn't reset your computer ... at least during
- normal operation.
-
- The Watchdog Driver will automatically find your watchdog card, and will
- attach a running driver for use with that card.  After the watchdog
- drivers have initialized, you can then talk to the card using a PC
- Watchdog program.
-
- I suggest putting a "watchdog -d" before the beginning of an fsck, and
- a "watchdog -e -t 1" immediately after the end of an fsck.  (Remember
- to run the program with an "&" to run it in the background!)
-
- If you want to write a program to be compatible with the PC Watchdog
- driver, simply use of modify the watchdog test program:
- tools/testing/selftests/watchdog/watchdog-test.c
-
-
- Other IOCTL functions include:
-
-	WDIOC_GETSUPPORT
-		This returns the support of the card itself.  This
-		returns in structure "PCWDS" which returns:
-			options = WDIOS_TEMPPANIC
-				  (This card supports temperature)
-			firmware_version = xxxx
-				  (Firmware version of the card)
-
-	WDIOC_GETSTATUS
-		This returns the status of the card, with the bits of
-		WDIOF_* bitwise-anded into the value.  (The comments
-		are in linux/pcwd.h)
-
-	WDIOC_GETBOOTSTATUS
-		This returns the status of the card that was reported
-		at bootup.
-
-	WDIOC_GETTEMP
-		This returns the temperature of the card.  (You can also
-		read /dev/watchdog, which gives a temperature update
-		every second.)
-
-	WDIOC_SETOPTIONS
-		This lets you set the options of the card.  You can either
-		enable or disable the card this way.
-
-	WDIOC_KEEPALIVE
-		This pings the card to tell it not to reset your computer.
-
- And that's all she wrote!
-
- -- Ken Hollis
-    (kenji@bitgate.com)
-
diff --git a/Documentation/watchdog/watchdog-api.rst b/Documentation/watchdog/watchdog-api.rst
new file mode 100644
index 000000000000..c6c1e9fa9f73
--- /dev/null
+++ b/Documentation/watchdog/watchdog-api.rst
@@ -0,0 +1,271 @@
+=============================
+The Linux Watchdog driver API
+=============================
+
+Last reviewed: 10/05/2007
+
+
+
+Copyright 2002 Christer Weingel <wingel@nano-system.com>
+
+Some parts of this document are copied verbatim from the sbc60xxwdt
+driver which is (c) Copyright 2000 Jakob Oestergaard <jakob@ostenfeld.dk>
+
+This document describes the state of the Linux 2.4.18 kernel.
+
+Introduction
+============
+
+A Watchdog Timer (WDT) is a hardware circuit that can reset the
+computer system in case of a software fault.  You probably knew that
+already.
+
+Usually a userspace daemon will notify the kernel watchdog driver via the
+/dev/watchdog special device file that userspace is still alive, at
+regular intervals.  When such a notification occurs, the driver will
+usually tell the hardware watchdog that everything is in order, and
+that the watchdog should wait for yet another little while to reset
+the system.  If userspace fails (RAM error, kernel bug, whatever), the
+notifications cease to occur, and the hardware watchdog will reset the
+system (causing a reboot) after the timeout occurs.
+
+The Linux watchdog API is a rather ad-hoc construction and different
+drivers implement different, and sometimes incompatible, parts of it.
+This file is an attempt to document the existing usage and allow
+future driver writers to use it as a reference.
+
+The simplest API
+================
+
+All drivers support the basic mode of operation, where the watchdog
+activates as soon as /dev/watchdog is opened and will reboot unless
+the watchdog is pinged within a certain time, this time is called the
+timeout or margin.  The simplest way to ping the watchdog is to write
+some data to the device.  So a very simple watchdog daemon would look
+like this source file:  see samples/watchdog/watchdog-simple.c
+
+A more advanced driver could for example check that a HTTP server is
+still responding before doing the write call to ping the watchdog.
+
+When the device is closed, the watchdog is disabled, unless the "Magic
+Close" feature is supported (see below).  This is not always such a
+good idea, since if there is a bug in the watchdog daemon and it
+crashes the system will not reboot.  Because of this, some of the
+drivers support the configuration option "Disable watchdog shutdown on
+close", CONFIG_WATCHDOG_NOWAYOUT.  If it is set to Y when compiling
+the kernel, there is no way of disabling the watchdog once it has been
+started.  So, if the watchdog daemon crashes, the system will reboot
+after the timeout has passed. Watchdog devices also usually support
+the nowayout module parameter so that this option can be controlled at
+runtime.
+
+Magic Close feature
+===================
+
+If a driver supports "Magic Close", the driver will not disable the
+watchdog unless a specific magic character 'V' has been sent to
+/dev/watchdog just before closing the file.  If the userspace daemon
+closes the file without sending this special character, the driver
+will assume that the daemon (and userspace in general) died, and will
+stop pinging the watchdog without disabling it first.  This will then
+cause a reboot if the watchdog is not re-opened in sufficient time.
+
+The ioctl API
+=============
+
+All conforming drivers also support an ioctl API.
+
+Pinging the watchdog using an ioctl:
+
+All drivers that have an ioctl interface support at least one ioctl,
+KEEPALIVE.  This ioctl does exactly the same thing as a write to the
+watchdog device, so the main loop in the above program could be
+replaced with::
+
+	while (1) {
+		ioctl(fd, WDIOC_KEEPALIVE, 0);
+		sleep(10);
+	}
+
+the argument to the ioctl is ignored.
+
+Setting and getting the timeout
+===============================
+
+For some drivers it is possible to modify the watchdog timeout on the
+fly with the SETTIMEOUT ioctl, those drivers have the WDIOF_SETTIMEOUT
+flag set in their option field.  The argument is an integer
+representing the timeout in seconds.  The driver returns the real
+timeout used in the same variable, and this timeout might differ from
+the requested one due to limitation of the hardware::
+
+    int timeout = 45;
+    ioctl(fd, WDIOC_SETTIMEOUT, &timeout);
+    printf("The timeout was set to %d seconds\n", timeout);
+
+This example might actually print "The timeout was set to 60 seconds"
+if the device has a granularity of minutes for its timeout.
+
+Starting with the Linux 2.4.18 kernel, it is possible to query the
+current timeout using the GETTIMEOUT ioctl::
+
+    ioctl(fd, WDIOC_GETTIMEOUT, &timeout);
+    printf("The timeout was is %d seconds\n", timeout);
+
+Pretimeouts
+===========
+
+Some watchdog timers can be set to have a trigger go off before the
+actual time they will reset the system.  This can be done with an NMI,
+interrupt, or other mechanism.  This allows Linux to record useful
+information (like panic information and kernel coredumps) before it
+resets::
+
+    pretimeout = 10;
+    ioctl(fd, WDIOC_SETPRETIMEOUT, &pretimeout);
+
+Note that the pretimeout is the number of seconds before the time
+when the timeout will go off.  It is not the number of seconds until
+the pretimeout.  So, for instance, if you set the timeout to 60 seconds
+and the pretimeout to 10 seconds, the pretimeout will go off in 50
+seconds.  Setting a pretimeout to zero disables it.
+
+There is also a get function for getting the pretimeout::
+
+    ioctl(fd, WDIOC_GETPRETIMEOUT, &timeout);
+    printf("The pretimeout was is %d seconds\n", timeout);
+
+Not all watchdog drivers will support a pretimeout.
+
+Get the number of seconds before reboot
+=======================================
+
+Some watchdog drivers have the ability to report the remaining time
+before the system will reboot. The WDIOC_GETTIMELEFT is the ioctl
+that returns the number of seconds before reboot::
+
+    ioctl(fd, WDIOC_GETTIMELEFT, &timeleft);
+    printf("The timeout was is %d seconds\n", timeleft);
+
+Environmental monitoring
+========================
+
+All watchdog drivers are required return more information about the system,
+some do temperature, fan and power level monitoring, some can tell you
+the reason for the last reboot of the system.  The GETSUPPORT ioctl is
+available to ask what the device can do::
+
+	struct watchdog_info ident;
+	ioctl(fd, WDIOC_GETSUPPORT, &ident);
+
+the fields returned in the ident struct are:
+
+	================	=============================================
+        identity		a string identifying the watchdog driver
+	firmware_version	the firmware version of the card if available
+	options			a flags describing what the device supports
+	================	=============================================
+
+the options field can have the following bits set, and describes what
+kind of information that the GET_STATUS and GET_BOOT_STATUS ioctls can
+return.   [FIXME -- Is this correct?]
+
+	================	=========================
+	WDIOF_OVERHEAT		Reset due to CPU overheat
+	================	=========================
+
+The machine was last rebooted by the watchdog because the thermal limit was
+exceeded:
+
+	==============		==========
+	WDIOF_FANFAULT		Fan failed
+	==============		==========
+
+A system fan monitored by the watchdog card has failed
+
+	=============		================
+	WDIOF_EXTERN1		External relay 1
+	=============		================
+
+External monitoring relay/source 1 was triggered. Controllers intended for
+real world applications include external monitoring pins that will trigger
+a reset.
+
+	=============		================
+	WDIOF_EXTERN2		External relay 2
+	=============		================
+
+External monitoring relay/source 2 was triggered
+
+	================	=====================
+	WDIOF_POWERUNDER	Power bad/power fault
+	================	=====================
+
+The machine is showing an undervoltage status
+
+	===============		=============================
+	WDIOF_CARDRESET		Card previously reset the CPU
+	===============		=============================
+
+The last reboot was caused by the watchdog card
+
+	================	=====================
+	WDIOF_POWEROVER		Power over voltage
+	================	=====================
+
+The machine is showing an overvoltage status. Note that if one level is
+under and one over both bits will be set - this may seem odd but makes
+sense.
+
+	===================	=====================
+	WDIOF_KEEPALIVEPING	Keep alive ping reply
+	===================	=====================
+
+The watchdog saw a keepalive ping since it was last queried.
+
+	================	=======================
+	WDIOF_SETTIMEOUT	Can set/get the timeout
+	================	=======================
+
+The watchdog can do pretimeouts.
+
+	================	================================
+	WDIOF_PRETIMEOUT	Pretimeout (in seconds), get/set
+	================	================================
+
+
+For those drivers that return any bits set in the option field, the
+GETSTATUS and GETBOOTSTATUS ioctls can be used to ask for the current
+status, and the status at the last reboot, respectively::
+
+    int flags;
+    ioctl(fd, WDIOC_GETSTATUS, &flags);
+
+    or
+
+    ioctl(fd, WDIOC_GETBOOTSTATUS, &flags);
+
+Note that not all devices support these two calls, and some only
+support the GETBOOTSTATUS call.
+
+Some drivers can measure the temperature using the GETTEMP ioctl.  The
+returned value is the temperature in degrees fahrenheit::
+
+    int temperature;
+    ioctl(fd, WDIOC_GETTEMP, &temperature);
+
+Finally the SETOPTIONS ioctl can be used to control some aspects of
+the cards operation::
+
+    int options = 0;
+    ioctl(fd, WDIOC_SETOPTIONS, &options);
+
+The following options are available:
+
+	=================	================================
+	WDIOS_DISABLECARD	Turn off the watchdog timer
+	WDIOS_ENABLECARD	Turn on the watchdog timer
+	WDIOS_TEMPPANIC		Kernel panic on temperature trip
+	=================	================================
+
+[FIXME -- better explanations]
diff --git a/Documentation/watchdog/watchdog-api.txt b/Documentation/watchdog/watchdog-api.txt
deleted file mode 100644
index 0e62ba33b7fb..000000000000
--- a/Documentation/watchdog/watchdog-api.txt
+++ /dev/null
@@ -1,237 +0,0 @@
-Last reviewed: 10/05/2007
-
-
-The Linux Watchdog driver API.
-
-Copyright 2002 Christer Weingel <wingel@nano-system.com>
-
-Some parts of this document are copied verbatim from the sbc60xxwdt
-driver which is (c) Copyright 2000 Jakob Oestergaard <jakob@ostenfeld.dk>
-
-This document describes the state of the Linux 2.4.18 kernel.
-
-Introduction:
-
-A Watchdog Timer (WDT) is a hardware circuit that can reset the
-computer system in case of a software fault.  You probably knew that
-already.
-
-Usually a userspace daemon will notify the kernel watchdog driver via the
-/dev/watchdog special device file that userspace is still alive, at
-regular intervals.  When such a notification occurs, the driver will
-usually tell the hardware watchdog that everything is in order, and
-that the watchdog should wait for yet another little while to reset
-the system.  If userspace fails (RAM error, kernel bug, whatever), the
-notifications cease to occur, and the hardware watchdog will reset the
-system (causing a reboot) after the timeout occurs.
-
-The Linux watchdog API is a rather ad-hoc construction and different
-drivers implement different, and sometimes incompatible, parts of it.
-This file is an attempt to document the existing usage and allow
-future driver writers to use it as a reference.
-
-The simplest API:
-
-All drivers support the basic mode of operation, where the watchdog
-activates as soon as /dev/watchdog is opened and will reboot unless
-the watchdog is pinged within a certain time, this time is called the
-timeout or margin.  The simplest way to ping the watchdog is to write
-some data to the device.  So a very simple watchdog daemon would look
-like this source file:  see samples/watchdog/watchdog-simple.c
-
-A more advanced driver could for example check that a HTTP server is
-still responding before doing the write call to ping the watchdog.
-
-When the device is closed, the watchdog is disabled, unless the "Magic
-Close" feature is supported (see below).  This is not always such a
-good idea, since if there is a bug in the watchdog daemon and it
-crashes the system will not reboot.  Because of this, some of the
-drivers support the configuration option "Disable watchdog shutdown on
-close", CONFIG_WATCHDOG_NOWAYOUT.  If it is set to Y when compiling
-the kernel, there is no way of disabling the watchdog once it has been
-started.  So, if the watchdog daemon crashes, the system will reboot
-after the timeout has passed. Watchdog devices also usually support
-the nowayout module parameter so that this option can be controlled at
-runtime.
-
-Magic Close feature:
-
-If a driver supports "Magic Close", the driver will not disable the
-watchdog unless a specific magic character 'V' has been sent to
-/dev/watchdog just before closing the file.  If the userspace daemon
-closes the file without sending this special character, the driver
-will assume that the daemon (and userspace in general) died, and will
-stop pinging the watchdog without disabling it first.  This will then
-cause a reboot if the watchdog is not re-opened in sufficient time.
-
-The ioctl API:
-
-All conforming drivers also support an ioctl API.
-
-Pinging the watchdog using an ioctl:
-
-All drivers that have an ioctl interface support at least one ioctl,
-KEEPALIVE.  This ioctl does exactly the same thing as a write to the
-watchdog device, so the main loop in the above program could be
-replaced with:
-
-	while (1) {
-		ioctl(fd, WDIOC_KEEPALIVE, 0);
-		sleep(10);
-	}
-
-the argument to the ioctl is ignored.
-
-Setting and getting the timeout:
-
-For some drivers it is possible to modify the watchdog timeout on the
-fly with the SETTIMEOUT ioctl, those drivers have the WDIOF_SETTIMEOUT
-flag set in their option field.  The argument is an integer
-representing the timeout in seconds.  The driver returns the real
-timeout used in the same variable, and this timeout might differ from
-the requested one due to limitation of the hardware.
-
-    int timeout = 45;
-    ioctl(fd, WDIOC_SETTIMEOUT, &timeout);
-    printf("The timeout was set to %d seconds\n", timeout);
-
-This example might actually print "The timeout was set to 60 seconds"
-if the device has a granularity of minutes for its timeout.
-
-Starting with the Linux 2.4.18 kernel, it is possible to query the
-current timeout using the GETTIMEOUT ioctl.
-
-    ioctl(fd, WDIOC_GETTIMEOUT, &timeout);
-    printf("The timeout was is %d seconds\n", timeout);
-
-Pretimeouts:
-
-Some watchdog timers can be set to have a trigger go off before the
-actual time they will reset the system.  This can be done with an NMI,
-interrupt, or other mechanism.  This allows Linux to record useful
-information (like panic information and kernel coredumps) before it
-resets.
-
-    pretimeout = 10;
-    ioctl(fd, WDIOC_SETPRETIMEOUT, &pretimeout);
-
-Note that the pretimeout is the number of seconds before the time
-when the timeout will go off.  It is not the number of seconds until
-the pretimeout.  So, for instance, if you set the timeout to 60 seconds
-and the pretimeout to 10 seconds, the pretimeout will go off in 50
-seconds.  Setting a pretimeout to zero disables it.
-
-There is also a get function for getting the pretimeout:
-
-    ioctl(fd, WDIOC_GETPRETIMEOUT, &timeout);
-    printf("The pretimeout was is %d seconds\n", timeout);
-
-Not all watchdog drivers will support a pretimeout.
-
-Get the number of seconds before reboot:
-
-Some watchdog drivers have the ability to report the remaining time
-before the system will reboot. The WDIOC_GETTIMELEFT is the ioctl
-that returns the number of seconds before reboot.
-
-    ioctl(fd, WDIOC_GETTIMELEFT, &timeleft);
-    printf("The timeout was is %d seconds\n", timeleft);
-
-Environmental monitoring:
-
-All watchdog drivers are required return more information about the system,
-some do temperature, fan and power level monitoring, some can tell you
-the reason for the last reboot of the system.  The GETSUPPORT ioctl is
-available to ask what the device can do:
-
-	struct watchdog_info ident;
-	ioctl(fd, WDIOC_GETSUPPORT, &ident);
-
-the fields returned in the ident struct are:
-
-        identity		a string identifying the watchdog driver
-	firmware_version	the firmware version of the card if available
-	options			a flags describing what the device supports
-
-the options field can have the following bits set, and describes what
-kind of information that the GET_STATUS and GET_BOOT_STATUS ioctls can
-return.   [FIXME -- Is this correct?]
-
-	WDIOF_OVERHEAT		Reset due to CPU overheat
-
-The machine was last rebooted by the watchdog because the thermal limit was
-exceeded
-
-	WDIOF_FANFAULT		Fan failed
-
-A system fan monitored by the watchdog card has failed
-
-	WDIOF_EXTERN1		External relay 1
-
-External monitoring relay/source 1 was triggered. Controllers intended for
-real world applications include external monitoring pins that will trigger
-a reset.
-
-	WDIOF_EXTERN2		External relay 2
-
-External monitoring relay/source 2 was triggered
-
-	WDIOF_POWERUNDER	Power bad/power fault
-
-The machine is showing an undervoltage status
-
-	WDIOF_CARDRESET		Card previously reset the CPU
-
-The last reboot was caused by the watchdog card
-
-	WDIOF_POWEROVER		Power over voltage
-
-The machine is showing an overvoltage status. Note that if one level is
-under and one over both bits will be set - this may seem odd but makes
-sense.
-
-	WDIOF_KEEPALIVEPING	Keep alive ping reply
-
-The watchdog saw a keepalive ping since it was last queried.
-
-	WDIOF_SETTIMEOUT	Can set/get the timeout
-
-The watchdog can do pretimeouts.
-
-	WDIOF_PRETIMEOUT	Pretimeout (in seconds), get/set
-
-
-For those drivers that return any bits set in the option field, the
-GETSTATUS and GETBOOTSTATUS ioctls can be used to ask for the current
-status, and the status at the last reboot, respectively.  
-
-    int flags;
-    ioctl(fd, WDIOC_GETSTATUS, &flags);
-
-    or
-
-    ioctl(fd, WDIOC_GETBOOTSTATUS, &flags);
-
-Note that not all devices support these two calls, and some only
-support the GETBOOTSTATUS call.
-
-Some drivers can measure the temperature using the GETTEMP ioctl.  The
-returned value is the temperature in degrees fahrenheit.
-
-    int temperature;
-    ioctl(fd, WDIOC_GETTEMP, &temperature);
-
-Finally the SETOPTIONS ioctl can be used to control some aspects of
-the cards operation.
-
-    int options = 0;
-    ioctl(fd, WDIOC_SETOPTIONS, &options);
-
-The following options are available:
-
-	WDIOS_DISABLECARD	Turn off the watchdog timer
-	WDIOS_ENABLECARD	Turn on the watchdog timer
-	WDIOS_TEMPPANIC		Kernel panic on temperature trip
-
-[FIXME -- better explanations]
-
diff --git a/Documentation/watchdog/watchdog-kernel-api.rst b/Documentation/watchdog/watchdog-kernel-api.rst
new file mode 100644
index 000000000000..864edbe932c1
--- /dev/null
+++ b/Documentation/watchdog/watchdog-kernel-api.rst
@@ -0,0 +1,338 @@
+===============================================
+The Linux WatchDog Timer Driver Core kernel API
+===============================================
+
+Last reviewed: 12-Feb-2013
+
+Wim Van Sebroeck <wim@iguana.be>
+
+Introduction
+------------
+This document does not describe what a WatchDog Timer (WDT) Driver or Device is.
+It also does not describe the API which can be used by user space to communicate
+with a WatchDog Timer. If you want to know this then please read the following
+file: Documentation/watchdog/watchdog-api.rst .
+
+So what does this document describe? It describes the API that can be used by
+WatchDog Timer Drivers that want to use the WatchDog Timer Driver Core
+Framework. This framework provides all interfacing towards user space so that
+the same code does not have to be reproduced each time. This also means that
+a watchdog timer driver then only needs to provide the different routines
+(operations) that control the watchdog timer (WDT).
+
+The API
+-------
+Each watchdog timer driver that wants to use the WatchDog Timer Driver Core
+must #include <linux/watchdog.h> (you would have to do this anyway when
+writing a watchdog device driver). This include file contains following
+register/unregister routines::
+
+	extern int watchdog_register_device(struct watchdog_device *);
+	extern void watchdog_unregister_device(struct watchdog_device *);
+
+The watchdog_register_device routine registers a watchdog timer device.
+The parameter of this routine is a pointer to a watchdog_device structure.
+This routine returns zero on success and a negative errno code for failure.
+
+The watchdog_unregister_device routine deregisters a registered watchdog timer
+device. The parameter of this routine is the pointer to the registered
+watchdog_device structure.
+
+The watchdog subsystem includes an registration deferral mechanism,
+which allows you to register an watchdog as early as you wish during
+the boot process.
+
+The watchdog device structure looks like this::
+
+  struct watchdog_device {
+	int id;
+	struct device *parent;
+	const struct attribute_group **groups;
+	const struct watchdog_info *info;
+	const struct watchdog_ops *ops;
+	const struct watchdog_governor *gov;
+	unsigned int bootstatus;
+	unsigned int timeout;
+	unsigned int pretimeout;
+	unsigned int min_timeout;
+	unsigned int max_timeout;
+	unsigned int min_hw_heartbeat_ms;
+	unsigned int max_hw_heartbeat_ms;
+	struct notifier_block reboot_nb;
+	struct notifier_block restart_nb;
+	void *driver_data;
+	struct watchdog_core_data *wd_data;
+	unsigned long status;
+	struct list_head deferred;
+  };
+
+It contains following fields:
+
+* id: set by watchdog_register_device, id 0 is special. It has both a
+  /dev/watchdog0 cdev (dynamic major, minor 0) as well as the old
+  /dev/watchdog miscdev. The id is set automatically when calling
+  watchdog_register_device.
+* parent: set this to the parent device (or NULL) before calling
+  watchdog_register_device.
+* groups: List of sysfs attribute groups to create when creating the watchdog
+  device.
+* info: a pointer to a watchdog_info structure. This structure gives some
+  additional information about the watchdog timer itself. (Like it's unique name)
+* ops: a pointer to the list of watchdog operations that the watchdog supports.
+* gov: a pointer to the assigned watchdog device pretimeout governor or NULL.
+* timeout: the watchdog timer's timeout value (in seconds).
+  This is the time after which the system will reboot if user space does
+  not send a heartbeat request if WDOG_ACTIVE is set.
+* pretimeout: the watchdog timer's pretimeout value (in seconds).
+* min_timeout: the watchdog timer's minimum timeout value (in seconds).
+  If set, the minimum configurable value for 'timeout'.
+* max_timeout: the watchdog timer's maximum timeout value (in seconds),
+  as seen from userspace. If set, the maximum configurable value for
+  'timeout'. Not used if max_hw_heartbeat_ms is non-zero.
+* min_hw_heartbeat_ms: Hardware limit for minimum time between heartbeats,
+  in milli-seconds. This value is normally 0; it should only be provided
+  if the hardware can not tolerate lower intervals between heartbeats.
+* max_hw_heartbeat_ms: Maximum hardware heartbeat, in milli-seconds.
+  If set, the infrastructure will send heartbeats to the watchdog driver
+  if 'timeout' is larger than max_hw_heartbeat_ms, unless WDOG_ACTIVE
+  is set and userspace failed to send a heartbeat for at least 'timeout'
+  seconds. max_hw_heartbeat_ms must be set if a driver does not implement
+  the stop function.
+* reboot_nb: notifier block that is registered for reboot notifications, for
+  internal use only. If the driver calls watchdog_stop_on_reboot, watchdog core
+  will stop the watchdog on such notifications.
+* restart_nb: notifier block that is registered for machine restart, for
+  internal use only. If a watchdog is capable of restarting the machine, it
+  should define ops->restart. Priority can be changed through
+  watchdog_set_restart_priority.
+* bootstatus: status of the device after booting (reported with watchdog
+  WDIOF_* status bits).
+* driver_data: a pointer to the drivers private data of a watchdog device.
+  This data should only be accessed via the watchdog_set_drvdata and
+  watchdog_get_drvdata routines.
+* wd_data: a pointer to watchdog core internal data.
+* status: this field contains a number of status bits that give extra
+  information about the status of the device (Like: is the watchdog timer
+  running/active, or is the nowayout bit set).
+* deferred: entry in wtd_deferred_reg_list which is used to
+  register early initialized watchdogs.
+
+The list of watchdog operations is defined as::
+
+  struct watchdog_ops {
+	struct module *owner;
+	/* mandatory operations */
+	int (*start)(struct watchdog_device *);
+	int (*stop)(struct watchdog_device *);
+	/* optional operations */
+	int (*ping)(struct watchdog_device *);
+	unsigned int (*status)(struct watchdog_device *);
+	int (*set_timeout)(struct watchdog_device *, unsigned int);
+	int (*set_pretimeout)(struct watchdog_device *, unsigned int);
+	unsigned int (*get_timeleft)(struct watchdog_device *);
+	int (*restart)(struct watchdog_device *);
+	long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
+  };
+
+It is important that you first define the module owner of the watchdog timer
+driver's operations. This module owner will be used to lock the module when
+the watchdog is active. (This to avoid a system crash when you unload the
+module and /dev/watchdog is still open).
+
+Some operations are mandatory and some are optional. The mandatory operations
+are:
+
+* start: this is a pointer to the routine that starts the watchdog timer
+  device.
+  The routine needs a pointer to the watchdog timer device structure as a
+  parameter. It returns zero on success or a negative errno code for failure.
+
+Not all watchdog timer hardware supports the same functionality. That's why
+all other routines/operations are optional. They only need to be provided if
+they are supported. These optional routines/operations are:
+
+* stop: with this routine the watchdog timer device is being stopped.
+
+  The routine needs a pointer to the watchdog timer device structure as a
+  parameter. It returns zero on success or a negative errno code for failure.
+  Some watchdog timer hardware can only be started and not be stopped. A
+  driver supporting such hardware does not have to implement the stop routine.
+
+  If a driver has no stop function, the watchdog core will set WDOG_HW_RUNNING
+  and start calling the driver's keepalive pings function after the watchdog
+  device is closed.
+
+  If a watchdog driver does not implement the stop function, it must set
+  max_hw_heartbeat_ms.
+* ping: this is the routine that sends a keepalive ping to the watchdog timer
+  hardware.
+
+  The routine needs a pointer to the watchdog timer device structure as a
+  parameter. It returns zero on success or a negative errno code for failure.
+
+  Most hardware that does not support this as a separate function uses the
+  start function to restart the watchdog timer hardware. And that's also what
+  the watchdog timer driver core does: to send a keepalive ping to the watchdog
+  timer hardware it will either use the ping operation (when available) or the
+  start operation (when the ping operation is not available).
+
+  (Note: the WDIOC_KEEPALIVE ioctl call will only be active when the
+  WDIOF_KEEPALIVEPING bit has been set in the option field on the watchdog's
+  info structure).
+* status: this routine checks the status of the watchdog timer device. The
+  status of the device is reported with watchdog WDIOF_* status flags/bits.
+
+  WDIOF_MAGICCLOSE and WDIOF_KEEPALIVEPING are reported by the watchdog core;
+  it is not necessary to report those bits from the driver. Also, if no status
+  function is provided by the driver, the watchdog core reports the status bits
+  provided in the bootstatus variable of struct watchdog_device.
+
+* set_timeout: this routine checks and changes the timeout of the watchdog
+  timer device. It returns 0 on success, -EINVAL for "parameter out of range"
+  and -EIO for "could not write value to the watchdog". On success this
+  routine should set the timeout value of the watchdog_device to the
+  achieved timeout value (which may be different from the requested one
+  because the watchdog does not necessarily have a 1 second resolution).
+
+  Drivers implementing max_hw_heartbeat_ms set the hardware watchdog heartbeat
+  to the minimum of timeout and max_hw_heartbeat_ms. Those drivers set the
+  timeout value of the watchdog_device either to the requested timeout value
+  (if it is larger than max_hw_heartbeat_ms), or to the achieved timeout value.
+  (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the
+  watchdog's info structure).
+
+  If the watchdog driver does not have to perform any action but setting the
+  watchdog_device.timeout, this callback can be omitted.
+
+  If set_timeout is not provided but, WDIOF_SETTIMEOUT is set, the watchdog
+  infrastructure updates the timeout value of the watchdog_device internally
+  to the requested value.
+
+  If the pretimeout feature is used (WDIOF_PRETIMEOUT), then set_timeout must
+  also take care of checking if pretimeout is still valid and set up the timer
+  accordingly. This can't be done in the core without races, so it is the
+  duty of the driver.
+* set_pretimeout: this routine checks and changes the pretimeout value of
+  the watchdog. It is optional because not all watchdogs support pretimeout
+  notification. The timeout value is not an absolute time, but the number of
+  seconds before the actual timeout would happen. It returns 0 on success,
+  -EINVAL for "parameter out of range" and -EIO for "could not write value to
+  the watchdog". A value of 0 disables pretimeout notification.
+
+  (Note: the WDIOF_PRETIMEOUT needs to be set in the options field of the
+  watchdog's info structure).
+
+  If the watchdog driver does not have to perform any action but setting the
+  watchdog_device.pretimeout, this callback can be omitted. That means if
+  set_pretimeout is not provided but WDIOF_PRETIMEOUT is set, the watchdog
+  infrastructure updates the pretimeout value of the watchdog_device internally
+  to the requested value.
+
+* get_timeleft: this routines returns the time that's left before a reset.
+* restart: this routine restarts the machine. It returns 0 on success or a
+  negative errno code for failure.
+* ioctl: if this routine is present then it will be called first before we do
+  our own internal ioctl call handling. This routine should return -ENOIOCTLCMD
+  if a command is not supported. The parameters that are passed to the ioctl
+  call are: watchdog_device, cmd and arg.
+
+The status bits should (preferably) be set with the set_bit and clear_bit alike
+bit-operations. The status bits that are defined are:
+
+* WDOG_ACTIVE: this status bit indicates whether or not a watchdog timer device
+  is active or not from user perspective. User space is expected to send
+  heartbeat requests to the driver while this flag is set.
+* WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog.
+  If this bit is set then the watchdog timer will not be able to stop.
+* WDOG_HW_RUNNING: Set by the watchdog driver if the hardware watchdog is
+  running. The bit must be set if the watchdog timer hardware can not be
+  stopped. The bit may also be set if the watchdog timer is running after
+  booting, before the watchdog device is opened. If set, the watchdog
+  infrastructure will send keepalives to the watchdog hardware while
+  WDOG_ACTIVE is not set.
+  Note: when you register the watchdog timer device with this bit set,
+  then opening /dev/watchdog will skip the start operation but send a keepalive
+  request instead.
+
+  To set the WDOG_NO_WAY_OUT status bit (before registering your watchdog
+  timer device) you can either:
+
+  * set it statically in your watchdog_device struct with
+
+	.status = WATCHDOG_NOWAYOUT_INIT_STATUS,
+
+    (this will set the value the same as CONFIG_WATCHDOG_NOWAYOUT) or
+  * use the following helper function::
+
+	static inline void watchdog_set_nowayout(struct watchdog_device *wdd,
+						 int nowayout)
+
+Note:
+   The WatchDog Timer Driver Core supports the magic close feature and
+   the nowayout feature. To use the magic close feature you must set the
+   WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure.
+
+The nowayout feature will overrule the magic close feature.
+
+To get or set driver specific data the following two helper functions should be
+used::
+
+  static inline void watchdog_set_drvdata(struct watchdog_device *wdd,
+					  void *data)
+  static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
+
+The watchdog_set_drvdata function allows you to add driver specific data. The
+arguments of this function are the watchdog device where you want to add the
+driver specific data to and a pointer to the data itself.
+
+The watchdog_get_drvdata function allows you to retrieve driver specific data.
+The argument of this function is the watchdog device where you want to retrieve
+data from. The function returns the pointer to the driver specific data.
+
+To initialize the timeout field, the following function can be used::
+
+  extern int watchdog_init_timeout(struct watchdog_device *wdd,
+                                   unsigned int timeout_parm,
+                                   struct device *dev);
+
+The watchdog_init_timeout function allows you to initialize the timeout field
+using the module timeout parameter or by retrieving the timeout-sec property from
+the device tree (if the module timeout parameter is invalid). Best practice is
+to set the default timeout value as timeout value in the watchdog_device and
+then use this function to set the user "preferred" timeout value.
+This routine returns zero on success and a negative errno code for failure.
+
+To disable the watchdog on reboot, the user must call the following helper::
+
+  static inline void watchdog_stop_on_reboot(struct watchdog_device *wdd);
+
+To disable the watchdog when unregistering the watchdog, the user must call
+the following helper. Note that this will only stop the watchdog if the
+nowayout flag is not set.
+
+::
+
+  static inline void watchdog_stop_on_unregister(struct watchdog_device *wdd);
+
+To change the priority of the restart handler the following helper should be
+used::
+
+  void watchdog_set_restart_priority(struct watchdog_device *wdd, int priority);
+
+User should follow the following guidelines for setting the priority:
+
+* 0: should be called in last resort, has limited restart capabilities
+* 128: default restart handler, use if no other handler is expected to be
+  available, and/or if restart is sufficient to restart the entire system
+* 255: highest priority, will preempt all other restart handlers
+
+To raise a pretimeout notification, the following function should be used::
+
+  void watchdog_notify_pretimeout(struct watchdog_device *wdd)
+
+The function can be called in the interrupt context. If watchdog pretimeout
+governor framework (kbuild CONFIG_WATCHDOG_PRETIMEOUT_GOV symbol) is enabled,
+an action is taken by a preconfigured pretimeout governor preassigned to
+the watchdog device. If watchdog pretimeout governor framework is not
+enabled, watchdog_notify_pretimeout() prints a notification message to
+the kernel log buffer.
diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
deleted file mode 100644
index 3a91ef5af044..000000000000
--- a/Documentation/watchdog/watchdog-kernel-api.txt
+++ /dev/null
@@ -1,305 +0,0 @@
-The Linux WatchDog Timer Driver Core kernel API.
-===============================================
-Last reviewed: 12-Feb-2013
-
-Wim Van Sebroeck <wim@iguana.be>
-
-Introduction
-------------
-This document does not describe what a WatchDog Timer (WDT) Driver or Device is.
-It also does not describe the API which can be used by user space to communicate
-with a WatchDog Timer. If you want to know this then please read the following
-file: Documentation/watchdog/watchdog-api.txt .
-
-So what does this document describe? It describes the API that can be used by
-WatchDog Timer Drivers that want to use the WatchDog Timer Driver Core
-Framework. This framework provides all interfacing towards user space so that
-the same code does not have to be reproduced each time. This also means that
-a watchdog timer driver then only needs to provide the different routines
-(operations) that control the watchdog timer (WDT).
-
-The API
--------
-Each watchdog timer driver that wants to use the WatchDog Timer Driver Core
-must #include <linux/watchdog.h> (you would have to do this anyway when
-writing a watchdog device driver). This include file contains following
-register/unregister routines:
-
-extern int watchdog_register_device(struct watchdog_device *);
-extern void watchdog_unregister_device(struct watchdog_device *);
-
-The watchdog_register_device routine registers a watchdog timer device.
-The parameter of this routine is a pointer to a watchdog_device structure.
-This routine returns zero on success and a negative errno code for failure.
-
-The watchdog_unregister_device routine deregisters a registered watchdog timer
-device. The parameter of this routine is the pointer to the registered
-watchdog_device structure.
-
-The watchdog subsystem includes an registration deferral mechanism,
-which allows you to register an watchdog as early as you wish during
-the boot process.
-
-The watchdog device structure looks like this:
-
-struct watchdog_device {
-	int id;
-	struct device *parent;
-	const struct attribute_group **groups;
-	const struct watchdog_info *info;
-	const struct watchdog_ops *ops;
-	const struct watchdog_governor *gov;
-	unsigned int bootstatus;
-	unsigned int timeout;
-	unsigned int pretimeout;
-	unsigned int min_timeout;
-	unsigned int max_timeout;
-	unsigned int min_hw_heartbeat_ms;
-	unsigned int max_hw_heartbeat_ms;
-	struct notifier_block reboot_nb;
-	struct notifier_block restart_nb;
-	void *driver_data;
-	struct watchdog_core_data *wd_data;
-	unsigned long status;
-	struct list_head deferred;
-};
-
-It contains following fields:
-* id: set by watchdog_register_device, id 0 is special. It has both a
-  /dev/watchdog0 cdev (dynamic major, minor 0) as well as the old
-  /dev/watchdog miscdev. The id is set automatically when calling
-  watchdog_register_device.
-* parent: set this to the parent device (or NULL) before calling
-  watchdog_register_device.
-* groups: List of sysfs attribute groups to create when creating the watchdog
-  device.
-* info: a pointer to a watchdog_info structure. This structure gives some
-  additional information about the watchdog timer itself. (Like it's unique name)
-* ops: a pointer to the list of watchdog operations that the watchdog supports.
-* gov: a pointer to the assigned watchdog device pretimeout governor or NULL.
-* timeout: the watchdog timer's timeout value (in seconds).
-  This is the time after which the system will reboot if user space does
-  not send a heartbeat request if WDOG_ACTIVE is set.
-* pretimeout: the watchdog timer's pretimeout value (in seconds).
-* min_timeout: the watchdog timer's minimum timeout value (in seconds).
-  If set, the minimum configurable value for 'timeout'.
-* max_timeout: the watchdog timer's maximum timeout value (in seconds),
-  as seen from userspace. If set, the maximum configurable value for
-  'timeout'. Not used if max_hw_heartbeat_ms is non-zero.
-* min_hw_heartbeat_ms: Hardware limit for minimum time between heartbeats,
-  in milli-seconds. This value is normally 0; it should only be provided
-  if the hardware can not tolerate lower intervals between heartbeats.
-* max_hw_heartbeat_ms: Maximum hardware heartbeat, in milli-seconds.
-  If set, the infrastructure will send heartbeats to the watchdog driver
-  if 'timeout' is larger than max_hw_heartbeat_ms, unless WDOG_ACTIVE
-  is set and userspace failed to send a heartbeat for at least 'timeout'
-  seconds. max_hw_heartbeat_ms must be set if a driver does not implement
-  the stop function.
-* reboot_nb: notifier block that is registered for reboot notifications, for
-  internal use only. If the driver calls watchdog_stop_on_reboot, watchdog core
-  will stop the watchdog on such notifications.
-* restart_nb: notifier block that is registered for machine restart, for
-  internal use only. If a watchdog is capable of restarting the machine, it
-  should define ops->restart. Priority can be changed through
-  watchdog_set_restart_priority.
-* bootstatus: status of the device after booting (reported with watchdog
-  WDIOF_* status bits).
-* driver_data: a pointer to the drivers private data of a watchdog device.
-  This data should only be accessed via the watchdog_set_drvdata and
-  watchdog_get_drvdata routines.
-* wd_data: a pointer to watchdog core internal data.
-* status: this field contains a number of status bits that give extra
-  information about the status of the device (Like: is the watchdog timer
-  running/active, or is the nowayout bit set).
-* deferred: entry in wtd_deferred_reg_list which is used to
-  register early initialized watchdogs.
-
-The list of watchdog operations is defined as:
-
-struct watchdog_ops {
-	struct module *owner;
-	/* mandatory operations */
-	int (*start)(struct watchdog_device *);
-	int (*stop)(struct watchdog_device *);
-	/* optional operations */
-	int (*ping)(struct watchdog_device *);
-	unsigned int (*status)(struct watchdog_device *);
-	int (*set_timeout)(struct watchdog_device *, unsigned int);
-	int (*set_pretimeout)(struct watchdog_device *, unsigned int);
-	unsigned int (*get_timeleft)(struct watchdog_device *);
-	int (*restart)(struct watchdog_device *);
-	long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
-};
-
-It is important that you first define the module owner of the watchdog timer
-driver's operations. This module owner will be used to lock the module when
-the watchdog is active. (This to avoid a system crash when you unload the
-module and /dev/watchdog is still open).
-
-Some operations are mandatory and some are optional. The mandatory operations
-are:
-* start: this is a pointer to the routine that starts the watchdog timer
-  device.
-  The routine needs a pointer to the watchdog timer device structure as a
-  parameter. It returns zero on success or a negative errno code for failure.
-
-Not all watchdog timer hardware supports the same functionality. That's why
-all other routines/operations are optional. They only need to be provided if
-they are supported. These optional routines/operations are:
-* stop: with this routine the watchdog timer device is being stopped.
-  The routine needs a pointer to the watchdog timer device structure as a
-  parameter. It returns zero on success or a negative errno code for failure.
-  Some watchdog timer hardware can only be started and not be stopped. A
-  driver supporting such hardware does not have to implement the stop routine.
-  If a driver has no stop function, the watchdog core will set WDOG_HW_RUNNING
-  and start calling the driver's keepalive pings function after the watchdog
-  device is closed.
-  If a watchdog driver does not implement the stop function, it must set
-  max_hw_heartbeat_ms.
-* ping: this is the routine that sends a keepalive ping to the watchdog timer
-  hardware.
-  The routine needs a pointer to the watchdog timer device structure as a
-  parameter. It returns zero on success or a negative errno code for failure.
-  Most hardware that does not support this as a separate function uses the
-  start function to restart the watchdog timer hardware. And that's also what
-  the watchdog timer driver core does: to send a keepalive ping to the watchdog
-  timer hardware it will either use the ping operation (when available) or the
-  start operation (when the ping operation is not available).
-  (Note: the WDIOC_KEEPALIVE ioctl call will only be active when the
-  WDIOF_KEEPALIVEPING bit has been set in the option field on the watchdog's
-  info structure).
-* status: this routine checks the status of the watchdog timer device. The
-  status of the device is reported with watchdog WDIOF_* status flags/bits.
-  WDIOF_MAGICCLOSE and WDIOF_KEEPALIVEPING are reported by the watchdog core;
-  it is not necessary to report those bits from the driver. Also, if no status
-  function is provided by the driver, the watchdog core reports the status bits
-  provided in the bootstatus variable of struct watchdog_device.
-* set_timeout: this routine checks and changes the timeout of the watchdog
-  timer device. It returns 0 on success, -EINVAL for "parameter out of range"
-  and -EIO for "could not write value to the watchdog". On success this
-  routine should set the timeout value of the watchdog_device to the
-  achieved timeout value (which may be different from the requested one
-  because the watchdog does not necessarily have a 1 second resolution).
-  Drivers implementing max_hw_heartbeat_ms set the hardware watchdog heartbeat
-  to the minimum of timeout and max_hw_heartbeat_ms. Those drivers set the
-  timeout value of the watchdog_device either to the requested timeout value
-  (if it is larger than max_hw_heartbeat_ms), or to the achieved timeout value.
-  (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the
-  watchdog's info structure).
-  If the watchdog driver does not have to perform any action but setting the
-  watchdog_device.timeout, this callback can be omitted.
-  If set_timeout is not provided but, WDIOF_SETTIMEOUT is set, the watchdog
-  infrastructure updates the timeout value of the watchdog_device internally
-  to the requested value.
-  If the pretimeout feature is used (WDIOF_PRETIMEOUT), then set_timeout must
-  also take care of checking if pretimeout is still valid and set up the timer
-  accordingly. This can't be done in the core without races, so it is the
-  duty of the driver.
-* set_pretimeout: this routine checks and changes the pretimeout value of
-  the watchdog. It is optional because not all watchdogs support pretimeout
-  notification. The timeout value is not an absolute time, but the number of
-  seconds before the actual timeout would happen. It returns 0 on success,
-  -EINVAL for "parameter out of range" and -EIO for "could not write value to
-  the watchdog". A value of 0 disables pretimeout notification.
-  (Note: the WDIOF_PRETIMEOUT needs to be set in the options field of the
-  watchdog's info structure).
-  If the watchdog driver does not have to perform any action but setting the
-  watchdog_device.pretimeout, this callback can be omitted. That means if
-  set_pretimeout is not provided but WDIOF_PRETIMEOUT is set, the watchdog
-  infrastructure updates the pretimeout value of the watchdog_device internally
-  to the requested value.
-* get_timeleft: this routines returns the time that's left before a reset.
-* restart: this routine restarts the machine. It returns 0 on success or a
-  negative errno code for failure.
-* ioctl: if this routine is present then it will be called first before we do
-  our own internal ioctl call handling. This routine should return -ENOIOCTLCMD
-  if a command is not supported. The parameters that are passed to the ioctl
-  call are: watchdog_device, cmd and arg.
-
-The status bits should (preferably) be set with the set_bit and clear_bit alike
-bit-operations. The status bits that are defined are:
-* WDOG_ACTIVE: this status bit indicates whether or not a watchdog timer device
-  is active or not from user perspective. User space is expected to send
-  heartbeat requests to the driver while this flag is set.
-* WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog.
-  If this bit is set then the watchdog timer will not be able to stop.
-* WDOG_HW_RUNNING: Set by the watchdog driver if the hardware watchdog is
-  running. The bit must be set if the watchdog timer hardware can not be
-  stopped. The bit may also be set if the watchdog timer is running after
-  booting, before the watchdog device is opened. If set, the watchdog
-  infrastructure will send keepalives to the watchdog hardware while
-  WDOG_ACTIVE is not set.
-  Note: when you register the watchdog timer device with this bit set,
-  then opening /dev/watchdog will skip the start operation but send a keepalive
-  request instead.
-
-  To set the WDOG_NO_WAY_OUT status bit (before registering your watchdog
-  timer device) you can either:
-  * set it statically in your watchdog_device struct with
-	.status = WATCHDOG_NOWAYOUT_INIT_STATUS,
-    (this will set the value the same as CONFIG_WATCHDOG_NOWAYOUT) or
-  * use the following helper function:
-  static inline void watchdog_set_nowayout(struct watchdog_device *wdd, int nowayout)
-
-Note: The WatchDog Timer Driver Core supports the magic close feature and
-the nowayout feature. To use the magic close feature you must set the
-WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure.
-The nowayout feature will overrule the magic close feature.
-
-To get or set driver specific data the following two helper functions should be
-used:
-
-static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data)
-static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
-
-The watchdog_set_drvdata function allows you to add driver specific data. The
-arguments of this function are the watchdog device where you want to add the
-driver specific data to and a pointer to the data itself.
-
-The watchdog_get_drvdata function allows you to retrieve driver specific data.
-The argument of this function is the watchdog device where you want to retrieve
-data from. The function returns the pointer to the driver specific data.
-
-To initialize the timeout field, the following function can be used:
-
-extern int watchdog_init_timeout(struct watchdog_device *wdd,
-                                  unsigned int timeout_parm, struct device *dev);
-
-The watchdog_init_timeout function allows you to initialize the timeout field
-using the module timeout parameter or by retrieving the timeout-sec property from
-the device tree (if the module timeout parameter is invalid). Best practice is
-to set the default timeout value as timeout value in the watchdog_device and
-then use this function to set the user "preferred" timeout value.
-This routine returns zero on success and a negative errno code for failure.
-
-To disable the watchdog on reboot, the user must call the following helper:
-
-static inline void watchdog_stop_on_reboot(struct watchdog_device *wdd);
-
-To disable the watchdog when unregistering the watchdog, the user must call
-the following helper. Note that this will only stop the watchdog if the
-nowayout flag is not set.
-
-static inline void watchdog_stop_on_unregister(struct watchdog_device *wdd);
-
-To change the priority of the restart handler the following helper should be
-used:
-
-void watchdog_set_restart_priority(struct watchdog_device *wdd, int priority);
-
-User should follow the following guidelines for setting the priority:
-* 0: should be called in last resort, has limited restart capabilities
-* 128: default restart handler, use if no other handler is expected to be
-  available, and/or if restart is sufficient to restart the entire system
-* 255: highest priority, will preempt all other restart handlers
-
-To raise a pretimeout notification, the following function should be used:
-
-void watchdog_notify_pretimeout(struct watchdog_device *wdd)
-
-The function can be called in the interrupt context. If watchdog pretimeout
-governor framework (kbuild CONFIG_WATCHDOG_PRETIMEOUT_GOV symbol) is enabled,
-an action is taken by a preconfigured pretimeout governor preassigned to
-the watchdog device. If watchdog pretimeout governor framework is not
-enabled, watchdog_notify_pretimeout() prints a notification message to
-the kernel log buffer.
diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst
new file mode 100644
index 000000000000..01023382ea73
--- /dev/null
+++ b/Documentation/watchdog/watchdog-parameters.rst
@@ -0,0 +1,745 @@
+==========================
+WatchDog Module Parameters
+==========================
+
+This file provides information on the module parameters of many of
+the Linux watchdog drivers.  Watchdog driver parameter specs should
+be listed here unless the driver has its own driver-specific information
+file.
+
+See Documentation/admin-guide/kernel-parameters.rst for information on
+providing kernel parameters for builtin drivers versus loadable
+modules.
+
+-------------------------------------------------
+watchdog core:
+    open_timeout:
+	Maximum time, in seconds, for which the watchdog framework will take
+	care of pinging a running hardware watchdog until userspace opens the
+	corresponding /dev/watchdogN device. A value of 0 means an infinite
+	timeout. Setting this to a non-zero value can be useful to ensure that
+	either userspace comes up properly, or the board gets reset and allows
+	fallback logic in the bootloader to try something else.
+
+-------------------------------------------------
+acquirewdt:
+    wdt_stop:
+	Acquire WDT 'stop' io port (default 0x43)
+    wdt_start:
+	Acquire WDT 'start' io port (default 0x443)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+advantechwdt:
+    wdt_stop:
+	Advantech WDT 'stop' io port (default 0x443)
+    wdt_start:
+	Advantech WDT 'start' io port (default 0x443)
+    timeout:
+	Watchdog timeout in seconds. 1<= timeout <=63, default=60.
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+alim1535_wdt:
+    timeout:
+	Watchdog timeout in seconds. (0 < timeout < 18000, default=60
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+alim7101_wdt:
+    timeout:
+	Watchdog timeout in seconds. (1<=timeout<=3600, default=30
+    use_gpio:
+	Use the gpio watchdog (required by old cobalt boards).
+	default=0/off/no
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+ar7_wdt:
+    margin:
+	Watchdog margin in seconds (default=60)
+    nowayout:
+	Disable watchdog shutdown on close
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+armada_37xx_wdt:
+    timeout:
+	Watchdog timeout in seconds. (default=120)
+    nowayout:
+	Disable watchdog shutdown on close
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+at91rm9200_wdt:
+    wdt_time:
+	Watchdog time in seconds. (default=5)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+at91sam9_wdt:
+    heartbeat:
+	Watchdog heartbeats in seconds. (default = 15)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+bcm47xx_wdt:
+    wdt_time:
+	Watchdog time in seconds. (default=30)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+coh901327_wdt:
+    margin:
+	Watchdog margin in seconds (default 60s)
+
+-------------------------------------------------
+
+cpu5wdt:
+    port:
+	base address of watchdog card, default is 0x91
+    verbose:
+	be verbose, default is 0 (no)
+    ticks:
+	count down ticks, default is 10000
+
+-------------------------------------------------
+
+cpwd:
+    wd0_timeout:
+	Default watchdog0 timeout in 1/10secs
+    wd1_timeout:
+	Default watchdog1 timeout in 1/10secs
+    wd2_timeout:
+	Default watchdog2 timeout in 1/10secs
+
+-------------------------------------------------
+
+da9052wdt:
+    timeout:
+	Watchdog timeout in seconds. 2<= timeout <=131, default=2.048s
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+davinci_wdt:
+    heartbeat:
+	Watchdog heartbeat period in seconds from 1 to 600, default 60
+
+-------------------------------------------------
+
+ebc-c384_wdt:
+    timeout:
+	Watchdog timeout in seconds. (1<=timeout<=15300, default=60)
+    nowayout:
+	Watchdog cannot be stopped once started
+
+-------------------------------------------------
+
+ep93xx_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+    timeout:
+	Watchdog timeout in seconds. (1<=timeout<=3600, default=TBD)
+
+-------------------------------------------------
+
+eurotechwdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+    io:
+	Eurotech WDT io port (default=0x3f0)
+    irq:
+	Eurotech WDT irq (default=10)
+    ev:
+	Eurotech WDT event type (default is `int`)
+
+-------------------------------------------------
+
+gef_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+geodewdt:
+    timeout:
+	Watchdog timeout in seconds. 1<= timeout <=131, default=60.
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+i6300esb:
+    heartbeat:
+	Watchdog heartbeat in seconds. (1<heartbeat<2046, default=30)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+iTCO_wdt:
+    heartbeat:
+	Watchdog heartbeat in seconds.
+	(2<heartbeat<39 (TCO v1) or 613 (TCO v2), default=30)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+iTCO_vendor_support:
+    vendorsupport:
+	iTCO vendor specific support mode, default=0 (none),
+	1=SuperMicro Pent3, 2=SuperMicro Pent4+, 911=Broken SMI BIOS
+
+-------------------------------------------------
+
+ib700wdt:
+    timeout:
+	Watchdog timeout in seconds. 0<= timeout <=30, default=30.
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+ibmasr:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+imx2_wdt:
+    timeout:
+	Watchdog timeout in seconds (default 60 s)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+indydog:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+iop_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+it8712f_wdt:
+    margin:
+	Watchdog margin in seconds (default 60)
+    nowayout:
+	Disable watchdog shutdown on close
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+it87_wdt:
+    nogameport:
+	Forbid the activation of game port, default=0
+    nocir:
+	Forbid the use of CIR (workaround for some buggy setups); set to 1 if
+system resets despite watchdog daemon running, default=0
+    exclusive:
+	Watchdog exclusive device open, default=1
+    timeout:
+	Watchdog timeout in seconds, default=60
+    testmode:
+	Watchdog test mode (1 = no reboot), default=0
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+ixp4xx_wdt:
+    heartbeat:
+	Watchdog heartbeat in seconds (default 60s)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+ks8695_wdt:
+    wdt_time:
+	Watchdog time in seconds. (default=5)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+machzwd:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+    action:
+	after watchdog resets, generate:
+	0 = RESET(*)  1 = SMI  2 = NMI  3 = SCI
+
+-------------------------------------------------
+
+max63xx_wdt:
+    heartbeat:
+	Watchdog heartbeat period in seconds from 1 to 60, default 60
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+    nodelay:
+	Force selection of a timeout setting without initial delay
+	(max6373/74 only, default=0)
+
+-------------------------------------------------
+
+mixcomwd:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+mpc8xxx_wdt:
+    timeout:
+	Watchdog timeout in ticks. (0<timeout<65536, default=65535)
+    reset:
+	Watchdog Interrupt/Reset Mode. 0 = interrupt, 1 = reset
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+mv64x60_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+ni903x_wdt:
+    timeout:
+	Initial watchdog timeout in seconds (0<timeout<516, default=60)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+nic7018_wdt:
+    timeout:
+	Initial watchdog timeout in seconds (0<timeout<464, default=80)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+nuc900_wdt:
+    heartbeat:
+	Watchdog heartbeats in seconds.
+	(default = 15)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+omap_wdt:
+    timer_margin:
+	initial watchdog timeout (in seconds)
+    early_enable:
+	Watchdog is started on module insertion (default=0
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+orion_wdt:
+    heartbeat:
+	Initial watchdog heartbeat in seconds
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+pc87413_wdt:
+    io:
+	pc87413 WDT I/O port (default: io).
+    timeout:
+	Watchdog timeout in minutes (default=timeout).
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+pika_wdt:
+    heartbeat:
+	Watchdog heartbeats in seconds. (default = 15)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+pnx4008_wdt:
+    heartbeat:
+	Watchdog heartbeat period in seconds from 1 to 60, default 19
+    nowayout:
+	Set to 1 to keep watchdog running after device release
+
+-------------------------------------------------
+
+pnx833x_wdt:
+    timeout:
+	Watchdog timeout in Mhz. (68Mhz clock), default=2040000000 (30 seconds)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+    start_enabled:
+	Watchdog is started on module insertion (default=1)
+
+-------------------------------------------------
+
+rc32434_wdt:
+    timeout:
+	Watchdog timeout value, in seconds (default=20)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+riowd:
+    riowd_timeout:
+	Watchdog timeout in minutes (default=1)
+
+-------------------------------------------------
+
+s3c2410_wdt:
+    tmr_margin:
+	Watchdog tmr_margin in seconds. (default=15)
+    tmr_atboot:
+	Watchdog is started at boot time if set to 1, default=0
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+    soft_noboot:
+	Watchdog action, set to 1 to ignore reboots, 0 to reboot
+    debug:
+	Watchdog debug, set to >1 for debug, (default 0)
+
+-------------------------------------------------
+
+sa1100_wdt:
+    margin:
+	Watchdog margin in seconds (default 60s)
+
+-------------------------------------------------
+
+sb_wdog:
+    timeout:
+	Watchdog timeout in microseconds (max/default 8388607 or 8.3ish secs)
+
+-------------------------------------------------
+
+sbc60xxwdt:
+    wdt_stop:
+	SBC60xx WDT 'stop' io port (default 0x45)
+    wdt_start:
+	SBC60xx WDT 'start' io port (default 0x443)
+    timeout:
+	Watchdog timeout in seconds. (1<=timeout<=3600, default=30)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sbc7240_wdt:
+    timeout:
+	Watchdog timeout in seconds. (1<=timeout<=255, default=30)
+    nowayout:
+	Disable watchdog when closing device file
+
+-------------------------------------------------
+
+sbc8360:
+    timeout:
+	Index into timeout table (0-63) (default=27 (60s))
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sbc_epx_c3:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sbc_fitpc2_wdt:
+    margin:
+	Watchdog margin in seconds (default 60s)
+    nowayout:
+	Watchdog cannot be stopped once started
+
+-------------------------------------------------
+
+sbsa_gwdt:
+    timeout:
+	Watchdog timeout in seconds. (default 10s)
+    action:
+	Watchdog action at the first stage timeout,
+	set to 0 to ignore, 1 to panic. (default=0)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sc1200wdt:
+    isapnp:
+	When set to 0 driver ISA PnP support will be disabled (default=1)
+    io:
+	io port
+    timeout:
+	range is 0-255 minutes, default is 1
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sc520_wdt:
+    timeout:
+	Watchdog timeout in seconds. (1 <= timeout <= 3600, default=30)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sch311x_wdt:
+    force_id:
+	Override the detected device ID
+    therm_trip:
+	Should a ThermTrip trigger the reset generator
+    timeout:
+	Watchdog timeout in seconds. 1<= timeout <=15300, default=60
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+scx200_wdt:
+    margin:
+	Watchdog margin in seconds
+    nowayout:
+	Disable watchdog shutdown on close
+
+-------------------------------------------------
+
+shwdt:
+    clock_division_ratio:
+	Clock division ratio. Valid ranges are from 0x5 (1.31ms)
+	to 0x7 (5.25ms). (default=7)
+    heartbeat:
+	Watchdog heartbeat in seconds. (1 <= heartbeat <= 3600, default=30
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+smsc37b787_wdt:
+    timeout:
+	range is 1-255 units, default is 60
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+softdog:
+    soft_margin:
+	Watchdog soft_margin in seconds.
+	(0 < soft_margin < 65536, default=60)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+    soft_noboot:
+	Softdog action, set to 1 to ignore reboots, 0 to reboot
+	(default=0)
+
+-------------------------------------------------
+
+stmp3xxx_wdt:
+    heartbeat:
+	Watchdog heartbeat period in seconds from 1 to 4194304, default 19
+
+-------------------------------------------------
+
+tegra_wdt:
+    heartbeat:
+	Watchdog heartbeats in seconds. (default = 120)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+ts72xx_wdt:
+    timeout:
+	Watchdog timeout in seconds. (1 <= timeout <= 8, default=8)
+    nowayout:
+	Disable watchdog shutdown on close
+
+-------------------------------------------------
+
+twl4030_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+txx9wdt:
+    timeout:
+	Watchdog timeout in seconds. (0<timeout<N, default=60)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+uniphier_wdt:
+    timeout:
+	Watchdog timeout in power of two seconds.
+	(1 <= timeout <= 128, default=64)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+w83627hf_wdt:
+    wdt_io:
+	w83627hf/thf WDT io port (default 0x2E)
+    timeout:
+	Watchdog timeout in seconds. 1 <= timeout <= 255, default=60.
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+w83877f_wdt:
+    timeout:
+	Watchdog timeout in seconds. (1<=timeout<=3600, default=30)
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+w83977f_wdt:
+    timeout:
+	Watchdog timeout in seconds (15..7635), default=45)
+    testmode:
+	Watchdog testmode (1 = no reboot), default=0
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+wafer5823wdt:
+    timeout:
+	Watchdog timeout in seconds. 1 <= timeout <= 255, default=60.
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+wdt285:
+    soft_margin:
+	Watchdog timeout in seconds (default=60)
+
+-------------------------------------------------
+
+wdt977:
+    timeout:
+	Watchdog timeout in seconds (60..15300, default=60)
+    testmode:
+	Watchdog testmode (1 = no reboot), default=0
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+wm831x_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+wm8350_wdt:
+    nowayout:
+	Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
+sun4v_wdt:
+    timeout_ms:
+	Watchdog timeout in milliseconds 1..180000, default=60000)
+    nowayout:
+	Watchdog cannot be stopped once started
diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
deleted file mode 100644
index ec919dc895ff..000000000000
--- a/Documentation/watchdog/watchdog-parameters.txt
+++ /dev/null
@@ -1,418 +0,0 @@
-This file provides information on the module parameters of many of
-the Linux watchdog drivers.  Watchdog driver parameter specs should
-be listed here unless the driver has its own driver-specific information
-file.
-
-
-See Documentation/admin-guide/kernel-parameters.rst for information on
-providing kernel parameters for builtin drivers versus loadable
-modules.
-
-The watchdog core parameter watchdog.open_timeout is the maximum time,
-in seconds, for which the watchdog framework will take care of pinging
-a running hardware watchdog until userspace opens the corresponding
-/dev/watchdogN device. A value of 0 means an infinite timeout. Setting
-this to a non-zero value can be useful to ensure that either userspace
-comes up properly, or the board gets reset and allows fallback logic
-in the bootloader to try something else.
-
-
--------------------------------------------------
-acquirewdt:
-wdt_stop: Acquire WDT 'stop' io port (default 0x43)
-wdt_start: Acquire WDT 'start' io port (default 0x443)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-advantechwdt:
-wdt_stop: Advantech WDT 'stop' io port (default 0x443)
-wdt_start: Advantech WDT 'start' io port (default 0x443)
-timeout: Watchdog timeout in seconds. 1<= timeout <=63, default=60.
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-alim1535_wdt:
-timeout: Watchdog timeout in seconds. (0 < timeout < 18000, default=60
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-alim7101_wdt:
-timeout: Watchdog timeout in seconds. (1<=timeout<=3600, default=30
-use_gpio: Use the gpio watchdog (required by old cobalt boards).
-	default=0/off/no
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-ar7_wdt:
-margin: Watchdog margin in seconds (default=60)
-nowayout: Disable watchdog shutdown on close
-	(default=kernel config parameter)
--------------------------------------------------
-armada_37xx_wdt:
-timeout: Watchdog timeout in seconds. (default=120)
-nowayout: Disable watchdog shutdown on close
-	(default=kernel config parameter)
--------------------------------------------------
-at91rm9200_wdt:
-wdt_time: Watchdog time in seconds. (default=5)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-at91sam9_wdt:
-heartbeat: Watchdog heartbeats in seconds. (default = 15)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-bcm47xx_wdt:
-wdt_time: Watchdog time in seconds. (default=30)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-coh901327_wdt:
-margin: Watchdog margin in seconds (default 60s)
--------------------------------------------------
-cpu5wdt:
-port: base address of watchdog card, default is 0x91
-verbose: be verbose, default is 0 (no)
-ticks: count down ticks, default is 10000
--------------------------------------------------
-cpwd:
-wd0_timeout: Default watchdog0 timeout in 1/10secs
-wd1_timeout: Default watchdog1 timeout in 1/10secs
-wd2_timeout: Default watchdog2 timeout in 1/10secs
--------------------------------------------------
-da9052wdt:
-timeout: Watchdog timeout in seconds. 2<= timeout <=131, default=2.048s
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-davinci_wdt:
-heartbeat: Watchdog heartbeat period in seconds from 1 to 600, default 60
--------------------------------------------------
-ebc-c384_wdt:
-timeout: Watchdog timeout in seconds. (1<=timeout<=15300, default=60)
-nowayout: Watchdog cannot be stopped once started
--------------------------------------------------
-ep93xx_wdt:
-nowayout: Watchdog cannot be stopped once started
-timeout: Watchdog timeout in seconds. (1<=timeout<=3600, default=TBD)
--------------------------------------------------
-eurotechwdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
-io: Eurotech WDT io port (default=0x3f0)
-irq: Eurotech WDT irq (default=10)
-ev: Eurotech WDT event type (default is `int')
--------------------------------------------------
-gef_wdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-geodewdt:
-timeout: Watchdog timeout in seconds. 1<= timeout <=131, default=60.
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-i6300esb:
-heartbeat: Watchdog heartbeat in seconds. (1<heartbeat<2046, default=30)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-iTCO_wdt:
-heartbeat: Watchdog heartbeat in seconds.
-	(2<heartbeat<39 (TCO v1) or 613 (TCO v2), default=30)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-iTCO_vendor_support:
-vendorsupport: iTCO vendor specific support mode, default=0 (none),
-	1=SuperMicro Pent3, 2=SuperMicro Pent4+, 911=Broken SMI BIOS
--------------------------------------------------
-ib700wdt:
-timeout: Watchdog timeout in seconds. 0<= timeout <=30, default=30.
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-ibmasr:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-imx2_wdt:
-timeout: Watchdog timeout in seconds (default 60 s)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-indydog:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-iop_wdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-it8712f_wdt:
-margin: Watchdog margin in seconds (default 60)
-nowayout: Disable watchdog shutdown on close
-	(default=kernel config parameter)
--------------------------------------------------
-it87_wdt:
-nogameport: Forbid the activation of game port, default=0
-nocir: Forbid the use of CIR (workaround for some buggy setups); set to 1 if
-system resets despite watchdog daemon running, default=0
-exclusive: Watchdog exclusive device open, default=1
-timeout: Watchdog timeout in seconds, default=60
-testmode: Watchdog test mode (1 = no reboot), default=0
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-ixp4xx_wdt:
-heartbeat: Watchdog heartbeat in seconds (default 60s)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-ks8695_wdt:
-wdt_time: Watchdog time in seconds. (default=5)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-machzwd:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
-action: after watchdog resets, generate:
-	0 = RESET(*)  1 = SMI  2 = NMI  3 = SCI
--------------------------------------------------
-max63xx_wdt:
-heartbeat: Watchdog heartbeat period in seconds from 1 to 60, default 60
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
-nodelay: Force selection of a timeout setting without initial delay
-	(max6373/74 only, default=0)
--------------------------------------------------
-mixcomwd:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-mpc8xxx_wdt:
-timeout: Watchdog timeout in ticks. (0<timeout<65536, default=65535)
-reset: Watchdog Interrupt/Reset Mode. 0 = interrupt, 1 = reset
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-mv64x60_wdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-ni903x_wdt:
-timeout: Initial watchdog timeout in seconds (0<timeout<516, default=60)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-nic7018_wdt:
-timeout: Initial watchdog timeout in seconds (0<timeout<464, default=80)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-nuc900_wdt:
-heartbeat: Watchdog heartbeats in seconds.
-	(default = 15)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-omap_wdt:
-timer_margin: initial watchdog timeout (in seconds)
-early_enable: Watchdog is started on module insertion (default=0
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-orion_wdt:
-heartbeat: Initial watchdog heartbeat in seconds
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-pc87413_wdt:
-io: pc87413 WDT I/O port (default: io).
-timeout: Watchdog timeout in minutes (default=timeout).
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-pika_wdt:
-heartbeat: Watchdog heartbeats in seconds. (default = 15)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-pnx4008_wdt:
-heartbeat: Watchdog heartbeat period in seconds from 1 to 60, default 19
-nowayout: Set to 1 to keep watchdog running after device release
--------------------------------------------------
-pnx833x_wdt:
-timeout: Watchdog timeout in Mhz. (68Mhz clock), default=2040000000 (30 seconds)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
-start_enabled: Watchdog is started on module insertion (default=1)
--------------------------------------------------
-rc32434_wdt:
-timeout: Watchdog timeout value, in seconds (default=20)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-riowd:
-riowd_timeout: Watchdog timeout in minutes (default=1)
--------------------------------------------------
-s3c2410_wdt:
-tmr_margin: Watchdog tmr_margin in seconds. (default=15)
-tmr_atboot: Watchdog is started at boot time if set to 1, default=0
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
-soft_noboot: Watchdog action, set to 1 to ignore reboots, 0 to reboot
-debug: Watchdog debug, set to >1 for debug, (default 0)
--------------------------------------------------
-sa1100_wdt:
-margin: Watchdog margin in seconds (default 60s)
--------------------------------------------------
-sb_wdog:
-timeout: Watchdog timeout in microseconds (max/default 8388607 or 8.3ish secs)
--------------------------------------------------
-sbc60xxwdt:
-wdt_stop: SBC60xx WDT 'stop' io port (default 0x45)
-wdt_start: SBC60xx WDT 'start' io port (default 0x443)
-timeout: Watchdog timeout in seconds. (1<=timeout<=3600, default=30)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sbc7240_wdt:
-timeout: Watchdog timeout in seconds. (1<=timeout<=255, default=30)
-nowayout: Disable watchdog when closing device file
--------------------------------------------------
-sbc8360:
-timeout: Index into timeout table (0-63) (default=27 (60s))
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sbc_epx_c3:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sbc_fitpc2_wdt:
-margin: Watchdog margin in seconds (default 60s)
-nowayout: Watchdog cannot be stopped once started
--------------------------------------------------
-sbsa_gwdt:
-timeout: Watchdog timeout in seconds. (default 10s)
-action: Watchdog action at the first stage timeout,
-	set to 0 to ignore, 1 to panic. (default=0)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sc1200wdt:
-isapnp: When set to 0 driver ISA PnP support will be disabled (default=1)
-io: io port
-timeout: range is 0-255 minutes, default is 1
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sc520_wdt:
-timeout: Watchdog timeout in seconds. (1 <= timeout <= 3600, default=30)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sch311x_wdt:
-force_id: Override the detected device ID
-therm_trip: Should a ThermTrip trigger the reset generator
-timeout: Watchdog timeout in seconds. 1<= timeout <=15300, default=60
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-scx200_wdt:
-margin: Watchdog margin in seconds
-nowayout: Disable watchdog shutdown on close
--------------------------------------------------
-shwdt:
-clock_division_ratio: Clock division ratio. Valid ranges are from 0x5 (1.31ms)
-	to 0x7 (5.25ms). (default=7)
-heartbeat: Watchdog heartbeat in seconds. (1 <= heartbeat <= 3600, default=30
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-smsc37b787_wdt:
-timeout: range is 1-255 units, default is 60
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-softdog:
-soft_margin: Watchdog soft_margin in seconds.
-	(0 < soft_margin < 65536, default=60)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
-soft_noboot: Softdog action, set to 1 to ignore reboots, 0 to reboot
-	(default=0)
--------------------------------------------------
-stmp3xxx_wdt:
-heartbeat: Watchdog heartbeat period in seconds from 1 to 4194304, default 19
--------------------------------------------------
-tegra_wdt:
-heartbeat: Watchdog heartbeats in seconds. (default = 120)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-ts72xx_wdt:
-timeout: Watchdog timeout in seconds. (1 <= timeout <= 8, default=8)
-nowayout: Disable watchdog shutdown on close
--------------------------------------------------
-twl4030_wdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-txx9wdt:
-timeout: Watchdog timeout in seconds. (0<timeout<N, default=60)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-uniphier_wdt:
-timeout: Watchdog timeout in power of two seconds.
-	(1 <= timeout <= 128, default=64)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-w83627hf_wdt:
-wdt_io: w83627hf/thf WDT io port (default 0x2E)
-timeout: Watchdog timeout in seconds. 1 <= timeout <= 255, default=60.
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-w83877f_wdt:
-timeout: Watchdog timeout in seconds. (1<=timeout<=3600, default=30)
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-w83977f_wdt:
-timeout: Watchdog timeout in seconds (15..7635), default=45)
-testmode: Watchdog testmode (1 = no reboot), default=0
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-wafer5823wdt:
-timeout: Watchdog timeout in seconds. 1 <= timeout <= 255, default=60.
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-wdt285:
-soft_margin: Watchdog timeout in seconds (default=60)
--------------------------------------------------
-wdt977:
-timeout: Watchdog timeout in seconds (60..15300, default=60)
-testmode: Watchdog testmode (1 = no reboot), default=0
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-wm831x_wdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-wm8350_wdt:
-nowayout: Watchdog cannot be stopped once started
-	(default=kernel config parameter)
--------------------------------------------------
-sun4v_wdt:
-timeout_ms: Watchdog timeout in milliseconds 1..180000, default=60000)
-nowayout: Watchdog cannot be stopped once started
--------------------------------------------------
diff --git a/Documentation/watchdog/watchdog-pm.rst b/Documentation/watchdog/watchdog-pm.rst
new file mode 100644
index 000000000000..646e1f28f31f
--- /dev/null
+++ b/Documentation/watchdog/watchdog-pm.rst
@@ -0,0 +1,22 @@
+===============================================
+The Linux WatchDog Timer Power Management Guide
+===============================================
+
+Last reviewed: 17-Dec-2018
+
+Wolfram Sang <wsa+renesas@sang-engineering.com>
+
+Introduction
+------------
+This document states rules about watchdog devices and their power management
+handling to ensure a uniform behaviour for Linux systems.
+
+
+Ping on resume
+--------------
+On resume, a watchdog timer shall be reset to its selected value to give
+userspace enough time to resume. [1] [2]
+
+[1] https://patchwork.kernel.org/patch/10252209/
+
+[2] https://patchwork.kernel.org/patch/10711625/
diff --git a/Documentation/watchdog/watchdog-pm.txt b/Documentation/watchdog/watchdog-pm.txt
deleted file mode 100644
index 7a4dd46e0d24..000000000000
--- a/Documentation/watchdog/watchdog-pm.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-The Linux WatchDog Timer Power Management Guide
-===============================================
-Last reviewed: 17-Dec-2018
-
-Wolfram Sang <wsa+renesas@sang-engineering.com>
-
-Introduction
-------------
-This document states rules about watchdog devices and their power management
-handling to ensure a uniform behaviour for Linux systems.
-
-
-Ping on resume
---------------
-On resume, a watchdog timer shall be reset to its selected value to give
-userspace enough time to resume. [1] [2]
-
-[1] https://patchwork.kernel.org/patch/10252209/
-[2] https://patchwork.kernel.org/patch/10711625/
diff --git a/Documentation/watchdog/wdt.rst b/Documentation/watchdog/wdt.rst
new file mode 100644
index 000000000000..d97b0361535b
--- /dev/null
+++ b/Documentation/watchdog/wdt.rst
@@ -0,0 +1,63 @@
+============================================================
+WDT Watchdog Timer Interfaces For The Linux Operating System
+============================================================
+
+Last Reviewed: 10/05/2007
+
+Alan Cox <alan@lxorguk.ukuu.org.uk>
+
+	- ICS	WDT501-P
+	- ICS	WDT501-P (no fan tachometer)
+	- ICS	WDT500-P
+
+All the interfaces provide /dev/watchdog, which when open must be written
+to within a timeout or the machine will reboot. Each write delays the reboot
+time another timeout. In the case of the software watchdog the ability to
+reboot will depend on the state of the machines and interrupts. The hardware
+boards physically pull the machine down off their own onboard timers and
+will reboot from almost anything.
+
+A second temperature monitoring interface is available on the WDT501P cards.
+This provides /dev/temperature. This is the machine internal temperature in
+degrees Fahrenheit. Each read returns a single byte giving the temperature.
+
+The third interface logs kernel messages on additional alert events.
+
+The ICS ISA-bus wdt card cannot be safely probed for. Instead you need to
+pass IO address and IRQ boot parameters.  E.g.::
+
+	wdt.io=0x240 wdt.irq=11
+
+Other "wdt" driver parameters are:
+
+	===========	======================================================
+	heartbeat	Watchdog heartbeat in seconds (default 60)
+	nowayout	Watchdog cannot be stopped once started (kernel
+			build parameter)
+	tachometer	WDT501-P Fan Tachometer support (0=disable, default=0)
+	type		WDT501-P Card type (500 or 501, default=500)
+	===========	======================================================
+
+Features
+--------
+
+================   =======	   =======
+		   WDT501P	   WDT500P
+================   =======	   =======
+Reboot Timer	   X               X
+External Reboot	   X	           X
+I/O Port Monitor   o		   o
+Temperature	   X		   o
+Fan Speed          X		   o
+Power Under	   X               o
+Power Over         X               o
+Overheat           X               o
+================   =======	   =======
+
+The external event interfaces on the WDT boards are not currently supported.
+Minor numbers are however allocated for it.
+
+
+Example Watchdog Driver:
+
+	see samples/watchdog/watchdog-simple.c
diff --git a/Documentation/watchdog/wdt.txt b/Documentation/watchdog/wdt.txt
deleted file mode 100644
index ed2f0b860869..000000000000
--- a/Documentation/watchdog/wdt.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-Last Reviewed: 10/05/2007
-
-	WDT Watchdog Timer Interfaces For The Linux Operating System
-		Alan Cox <alan@lxorguk.ukuu.org.uk>
-
-	ICS	WDT501-P
-	ICS	WDT501-P (no fan tachometer)
-	ICS	WDT500-P
-
-All the interfaces provide /dev/watchdog, which when open must be written
-to within a timeout or the machine will reboot. Each write delays the reboot
-time another timeout. In the case of the software watchdog the ability to
-reboot will depend on the state of the machines and interrupts. The hardware
-boards physically pull the machine down off their own onboard timers and
-will reboot from almost anything.
-
-A second temperature monitoring interface is available on the WDT501P cards.
-This provides /dev/temperature. This is the machine internal temperature in
-degrees Fahrenheit. Each read returns a single byte giving the temperature.
-
-The third interface logs kernel messages on additional alert events.
-
-The ICS ISA-bus wdt card cannot be safely probed for. Instead you need to
-pass IO address and IRQ boot parameters.  E.g.:
-	wdt.io=0x240 wdt.irq=11
-
-Other "wdt" driver parameters are:
-	heartbeat	Watchdog heartbeat in seconds (default 60)
-	nowayout	Watchdog cannot be stopped once started (kernel
-				build parameter)
-	tachometer	WDT501-P Fan Tachometer support (0=disable, default=0)
-	type		WDT501-P Card type (500 or 501, default=500)
-
-Features
---------
-		WDT501P		WDT500P
-Reboot Timer	   X               X
-External Reboot	   X	           X
-I/O Port Monitor   o		   o
-Temperature	   X		   o
-Fan Speed          X		   o
-Power Under	   X               o
-Power Over         X               o
-Overheat           X               o
-
-The external event interfaces on the WDT boards are not currently supported.
-Minor numbers are however allocated for it.
-
-
-Example Watchdog Driver:  see samples/watchdog/watchdog-simple.c
-- 
cgit 


From 3bd41e595b09ecce472432bbdaf7544d2a94a417 Mon Sep 17 00:00:00 2001
From: Jerry Hoemann <jerry.hoemann@hpe.com>
Date: Wed, 12 Jun 2019 18:02:37 -0600
Subject: docs: watchdog: Fix build error.

make htmldocs fails due to missing blank line following header.

Signed-off-by: Jerry Hoemann <jerry.hoemann@hpe.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 Documentation/watchdog/watchdog-parameters.rst | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst
index 01023382ea73..a3985cc5aeda 100644
--- a/Documentation/watchdog/watchdog-parameters.rst
+++ b/Documentation/watchdog/watchdog-parameters.rst
@@ -12,6 +12,7 @@ providing kernel parameters for builtin drivers versus loadable
 modules.
 
 -------------------------------------------------
+
 watchdog core:
     open_timeout:
 	Maximum time, in seconds, for which the watchdog framework will take
@@ -22,6 +23,7 @@ watchdog core:
 	fallback logic in the bootloader to try something else.
 
 -------------------------------------------------
+
 acquirewdt:
     wdt_stop:
 	Acquire WDT 'stop' io port (default 0x43)
-- 
cgit 


From 3265c3d8f70f0cb98dc7a7a3865200a7b19f654b Mon Sep 17 00:00:00 2001
From: Simon Horman <horms+renesas@verge.net.au>
Date: Mon, 17 Jun 2019 11:09:53 +0200
Subject: dt-bindings: watchdog: Rename bindings documentation file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For consistency with the naming of (most) other documentation files for DT
bindings for Renesas IP blocks rename the Renesas WDT documentation file
from renesas-wdt.txt to renesas,wdt.txt.

Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 .../devicetree/bindings/watchdog/renesas,wdt.txt   | 48 ++++++++++++++++++++++
 .../devicetree/bindings/watchdog/renesas-wdt.txt   | 48 ----------------------
 2 files changed, 48 insertions(+), 48 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/watchdog/renesas,wdt.txt
 delete mode 100644 Documentation/devicetree/bindings/watchdog/renesas-wdt.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/watchdog/renesas,wdt.txt b/Documentation/devicetree/bindings/watchdog/renesas,wdt.txt
new file mode 100644
index 000000000000..9f365c1a3399
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/renesas,wdt.txt
@@ -0,0 +1,48 @@
+Renesas Watchdog Timer (WDT) Controller
+
+Required properties:
+ - compatible : Must be "renesas,<soctype>-wdt", followed by a generic
+		fallback compatible string when compatible with the generic
+		version.
+	       Examples with soctypes are:
+		 - "renesas,r8a7743-wdt" (RZ/G1M)
+		 - "renesas,r8a7744-wdt" (RZ/G1N)
+		 - "renesas,r8a7745-wdt" (RZ/G1E)
+		 - "renesas,r8a77470-wdt" (RZ/G1C)
+		 - "renesas,r8a774a1-wdt" (RZ/G2M)
+		 - "renesas,r8a774c0-wdt" (RZ/G2E)
+	         - "renesas,r8a7790-wdt" (R-Car H2)
+	         - "renesas,r8a7791-wdt" (R-Car M2-W)
+	         - "renesas,r8a7792-wdt" (R-Car V2H)
+	         - "renesas,r8a7793-wdt" (R-Car M2-N)
+	         - "renesas,r8a7794-wdt" (R-Car E2)
+	         - "renesas,r8a7795-wdt" (R-Car H3)
+	         - "renesas,r8a7796-wdt" (R-Car M3-W)
+		 - "renesas,r8a77965-wdt" (R-Car M3-N)
+	         - "renesas,r8a77970-wdt" (R-Car V3M)
+	         - "renesas,r8a77990-wdt" (R-Car E3)
+	         - "renesas,r8a77995-wdt" (R-Car D3)
+	         - "renesas,r7s72100-wdt" (RZ/A1)
+	         - "renesas,r7s9210-wdt"  (RZ/A2)
+		The generic compatible string must be:
+		 - "renesas,rza-wdt" for RZ/A
+		 - "renesas,rcar-gen2-wdt" for R-Car Gen2 and RZ/G1
+		 - "renesas,rcar-gen3-wdt" for R-Car Gen3 and RZ/G2
+
+- reg : Should contain WDT registers location and length
+- clocks : the clock feeding the watchdog timer.
+
+Optional properties:
+- timeout-sec : Contains the watchdog timeout in seconds
+- power-domains : the power domain the WDT belongs to
+- interrupts: Some WDTs have an interrupt when used in interval timer mode
+
+Examples:
+
+	wdt0: watchdog@e6020000 {
+		compatible = "renesas,r8a7795-wdt", "renesas,rcar-gen3-wdt";
+		reg = <0 0xe6020000 0 0x0c>;
+		clocks = <&cpg CPG_MOD 402>;
+		power-domains = <&cpg>;
+		timeout-sec = <60>;
+	};
diff --git a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt b/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
deleted file mode 100644
index 9f365c1a3399..000000000000
--- a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-Renesas Watchdog Timer (WDT) Controller
-
-Required properties:
- - compatible : Must be "renesas,<soctype>-wdt", followed by a generic
-		fallback compatible string when compatible with the generic
-		version.
-	       Examples with soctypes are:
-		 - "renesas,r8a7743-wdt" (RZ/G1M)
-		 - "renesas,r8a7744-wdt" (RZ/G1N)
-		 - "renesas,r8a7745-wdt" (RZ/G1E)
-		 - "renesas,r8a77470-wdt" (RZ/G1C)
-		 - "renesas,r8a774a1-wdt" (RZ/G2M)
-		 - "renesas,r8a774c0-wdt" (RZ/G2E)
-	         - "renesas,r8a7790-wdt" (R-Car H2)
-	         - "renesas,r8a7791-wdt" (R-Car M2-W)
-	         - "renesas,r8a7792-wdt" (R-Car V2H)
-	         - "renesas,r8a7793-wdt" (R-Car M2-N)
-	         - "renesas,r8a7794-wdt" (R-Car E2)
-	         - "renesas,r8a7795-wdt" (R-Car H3)
-	         - "renesas,r8a7796-wdt" (R-Car M3-W)
-		 - "renesas,r8a77965-wdt" (R-Car M3-N)
-	         - "renesas,r8a77970-wdt" (R-Car V3M)
-	         - "renesas,r8a77990-wdt" (R-Car E3)
-	         - "renesas,r8a77995-wdt" (R-Car D3)
-	         - "renesas,r7s72100-wdt" (RZ/A1)
-	         - "renesas,r7s9210-wdt"  (RZ/A2)
-		The generic compatible string must be:
-		 - "renesas,rza-wdt" for RZ/A
-		 - "renesas,rcar-gen2-wdt" for R-Car Gen2 and RZ/G1
-		 - "renesas,rcar-gen3-wdt" for R-Car Gen3 and RZ/G2
-
-- reg : Should contain WDT registers location and length
-- clocks : the clock feeding the watchdog timer.
-
-Optional properties:
-- timeout-sec : Contains the watchdog timeout in seconds
-- power-domains : the power domain the WDT belongs to
-- interrupts: Some WDTs have an interrupt when used in interval timer mode
-
-Examples:
-
-	wdt0: watchdog@e6020000 {
-		compatible = "renesas,r8a7795-wdt", "renesas,rcar-gen3-wdt";
-		reg = <0 0xe6020000 0 0x0c>;
-		clocks = <&cpg CPG_MOD 402>;
-		power-domains = <&cpg>;
-		timeout-sec = <60>;
-	};
-- 
cgit 


From 66f2a122c68d8f13e5db978b6b7571aaf0e53a19 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 2 Jul 2019 13:54:38 -0400
Subject: docs: Move binderfs to admin-guide

The documentation is more appropriate for the administrator than for
the internal kernel API section it is currently in.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Christian Brauner <christian@brauner.io>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/binderfs.rst | 68 ++++++++++++++++++++++++++++++++++
 Documentation/admin-guide/index.rst    |  1 +
 Documentation/filesystems/binderfs.rst | 68 ----------------------------------
 Documentation/filesystems/index.rst    | 10 -----
 4 files changed, 69 insertions(+), 78 deletions(-)
 create mode 100644 Documentation/admin-guide/binderfs.rst
 delete mode 100644 Documentation/filesystems/binderfs.rst

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/binderfs.rst b/Documentation/admin-guide/binderfs.rst
new file mode 100644
index 000000000000..c009671f8434
--- /dev/null
+++ b/Documentation/admin-guide/binderfs.rst
@@ -0,0 +1,68 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+The Android binderfs Filesystem
+===============================
+
+Android binderfs is a filesystem for the Android binder IPC mechanism.  It
+allows to dynamically add and remove binder devices at runtime.  Binder devices
+located in a new binderfs instance are independent of binder devices located in
+other binderfs instances.  Mounting a new binderfs instance makes it possible
+to get a set of private binder devices.
+
+Mounting binderfs
+-----------------
+
+Android binderfs can be mounted with::
+
+  mkdir /dev/binderfs
+  mount -t binder binder /dev/binderfs
+
+at which point a new instance of binderfs will show up at ``/dev/binderfs``.
+In a fresh instance of binderfs no binder devices will be present.  There will
+only be a ``binder-control`` device which serves as the request handler for
+binderfs. Mounting another binderfs instance at a different location will
+create a new and separate instance from all other binderfs mounts.  This is
+identical to the behavior of e.g. ``devpts`` and ``tmpfs``. The Android
+binderfs filesystem can be mounted in user namespaces.
+
+Options
+-------
+max
+  binderfs instances can be mounted with a limit on the number of binder
+  devices that can be allocated. The ``max=<count>`` mount option serves as
+  a per-instance limit. If ``max=<count>`` is set then only ``<count>`` number
+  of binder devices can be allocated in this binderfs instance.
+
+Allocating binder Devices
+-------------------------
+
+.. _ioctl: http://man7.org/linux/man-pages/man2/ioctl.2.html
+
+To allocate a new binder device in a binderfs instance a request needs to be
+sent through the ``binder-control`` device node.  A request is sent in the form
+of an `ioctl() <ioctl_>`_.
+
+What a program needs to do is to open the ``binder-control`` device node and
+send a ``BINDER_CTL_ADD`` request to the kernel.  Users of binderfs need to
+tell the kernel which name the new binder device should get.  By default a name
+can only contain up to ``BINDERFS_MAX_NAME`` chars including the terminating
+zero byte.
+
+Once the request is made via an `ioctl() <ioctl_>`_ passing a ``struct
+binder_device`` with the name to the kernel it will allocate a new binder
+device and return the major and minor number of the new device in the struct
+(This is necessary because binderfs allocates a major device number
+dynamically.).  After the `ioctl() <ioctl_>`_ returns there will be a new
+binder device located under /dev/binderfs with the chosen name.
+
+Deleting binder Devices
+-----------------------
+
+.. _unlink: http://man7.org/linux/man-pages/man2/unlink.2.html
+.. _rm: http://man7.org/linux/man-pages/man1/rm.1.html
+
+Binderfs binder devices can be deleted via `unlink() <unlink_>`_.  This means
+that the `rm() <rm_>`_ tool can be used to delete them. Note that the
+``binder-control`` device cannot be deleted since this would make the binderfs
+instance unuseable.  The ``binder-control`` device will be deleted when the
+binderfs instance is unmounted and all references to it have been dropped.
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 8001917ee012..24fbe0568eff 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -70,6 +70,7 @@ configure specific aspects of kernel behavior to your liking.
    ras
    bcache
    ext4
+   binderfs
    pm/index
    thunderbolt
    LSM/index
diff --git a/Documentation/filesystems/binderfs.rst b/Documentation/filesystems/binderfs.rst
deleted file mode 100644
index c009671f8434..000000000000
--- a/Documentation/filesystems/binderfs.rst
+++ /dev/null
@@ -1,68 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-The Android binderfs Filesystem
-===============================
-
-Android binderfs is a filesystem for the Android binder IPC mechanism.  It
-allows to dynamically add and remove binder devices at runtime.  Binder devices
-located in a new binderfs instance are independent of binder devices located in
-other binderfs instances.  Mounting a new binderfs instance makes it possible
-to get a set of private binder devices.
-
-Mounting binderfs
------------------
-
-Android binderfs can be mounted with::
-
-  mkdir /dev/binderfs
-  mount -t binder binder /dev/binderfs
-
-at which point a new instance of binderfs will show up at ``/dev/binderfs``.
-In a fresh instance of binderfs no binder devices will be present.  There will
-only be a ``binder-control`` device which serves as the request handler for
-binderfs. Mounting another binderfs instance at a different location will
-create a new and separate instance from all other binderfs mounts.  This is
-identical to the behavior of e.g. ``devpts`` and ``tmpfs``. The Android
-binderfs filesystem can be mounted in user namespaces.
-
-Options
--------
-max
-  binderfs instances can be mounted with a limit on the number of binder
-  devices that can be allocated. The ``max=<count>`` mount option serves as
-  a per-instance limit. If ``max=<count>`` is set then only ``<count>`` number
-  of binder devices can be allocated in this binderfs instance.
-
-Allocating binder Devices
--------------------------
-
-.. _ioctl: http://man7.org/linux/man-pages/man2/ioctl.2.html
-
-To allocate a new binder device in a binderfs instance a request needs to be
-sent through the ``binder-control`` device node.  A request is sent in the form
-of an `ioctl() <ioctl_>`_.
-
-What a program needs to do is to open the ``binder-control`` device node and
-send a ``BINDER_CTL_ADD`` request to the kernel.  Users of binderfs need to
-tell the kernel which name the new binder device should get.  By default a name
-can only contain up to ``BINDERFS_MAX_NAME`` chars including the terminating
-zero byte.
-
-Once the request is made via an `ioctl() <ioctl_>`_ passing a ``struct
-binder_device`` with the name to the kernel it will allocate a new binder
-device and return the major and minor number of the new device in the struct
-(This is necessary because binderfs allocates a major device number
-dynamically.).  After the `ioctl() <ioctl_>`_ returns there will be a new
-binder device located under /dev/binderfs with the chosen name.
-
-Deleting binder Devices
------------------------
-
-.. _unlink: http://man7.org/linux/man-pages/man2/unlink.2.html
-.. _rm: http://man7.org/linux/man-pages/man1/rm.1.html
-
-Binderfs binder devices can be deleted via `unlink() <unlink_>`_.  This means
-that the `rm() <rm_>`_ tool can be used to delete them. Note that the
-``binder-control`` device cannot be deleted since this would make the binderfs
-instance unuseable.  The ``binder-control`` device will be deleted when the
-binderfs instance is unmounted and all references to it have been dropped.
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 1651173f1118..2de2fe2ab078 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -32,13 +32,3 @@ filesystem implementations.
 
    journalling
    fscrypt
-
-Filesystem-specific documentation
-=================================
-
-Documentation for individual filesystem types can be found here.
-
-.. toctree::
-   :maxdepth: 2
-
-   binderfs
-- 
cgit 


From 454f96f2b738374da4b0a703b1e2e7aed82c4486 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sat, 6 Jul 2019 13:28:42 -0300
Subject: docs: automarkup.py: ignore exceptions when seeking for xrefs

When using the automarkup extension with:
	make pdfdocs

without passing an specific book, the code will raise an exception:

	  File "/devel/v4l/docs/Documentation/sphinx/automarkup.py", line 86, in auto_markup
	    node.parent.replace(node, markup_funcs(name, app, node))
	  File "/devel/v4l/docs/Documentation/sphinx/automarkup.py", line 59, in markup_funcs
	    'function', target, pxref, lit_text)
	  File "/devel/v4l/docs/sphinx_2.0/lib/python3.7/site-packages/sphinx/domains/c.py", line 308, in resolve_xref
	    contnode, target)
	  File "/devel/v4l/docs/sphinx_2.0/lib/python3.7/site-packages/sphinx/util/nodes.py", line 450, in make_refnode
	    '#' + targetid)
	  File "/devel/v4l/docs/sphinx_2.0/lib/python3.7/site-packages/sphinx/builders/latex/__init__.py", line 159, in get_relative_uri
	    return self.get_target_uri(to, typ)
	  File "/devel/v4l/docs/sphinx_2.0/lib/python3.7/site-packages/sphinx/builders/latex/__init__.py", line 152, in get_target_uri
	    raise NoUri
	sphinx.environment.NoUri

This happens because not all references will belong to a single
PDF/LaTeX document.

Better to just ignore those than breaking Sphinx build.

Fixes: d74b0d31ddde ("Docs: An initial automarkup extension for sphinx")
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
[jc: Narrowed the "except" and tweaked the comment]
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/sphinx/automarkup.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/sphinx/automarkup.py b/Documentation/sphinx/automarkup.py
index b300cf129869..77e89c1956d7 100644
--- a/Documentation/sphinx/automarkup.py
+++ b/Documentation/sphinx/automarkup.py
@@ -6,6 +6,7 @@
 #
 from docutils import nodes
 from sphinx import addnodes
+from sphinx.environment import NoUri
 import re
 
 #
@@ -55,8 +56,15 @@ def markup_funcs(docname, app, node):
                                           reftype = 'function',
                                           reftarget = target, modname = None,
                                           classname = None)
-            xref = cdom.resolve_xref(app.env, docname, app.builder,
-                                     'function', target, pxref, lit_text)
+            #
+            # XXX The Latex builder will throw NoUri exceptions here,
+            # work around that by ignoring them.
+            #
+            try:
+                xref = cdom.resolve_xref(app.env, docname, app.builder,
+                                         'function', target, pxref, lit_text)
+            except NoUri:
+                xref = None
         #
         # Toss the xref into the list if we got it; otherwise just put
         # the function text.
-- 
cgit 


From 58dd7a8d9d02ee07d7aee450c5515cfe3ef8db21 Mon Sep 17 00:00:00 2001
From: Benedikt Spranger <b.spranger@linutronix.de>
Date: Fri, 5 Jul 2019 11:57:18 +0200
Subject: Documentation: net: dsa: Describe DSA switch configuration

Document DSA tagged and VLAN based switch configuration by showcases.

Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/configuration.rst | 292 +++++++++++++++++++++++++
 Documentation/networking/dsa/index.rst         |   1 +
 2 files changed, 293 insertions(+)
 create mode 100644 Documentation/networking/dsa/configuration.rst

(limited to 'Documentation')

diff --git a/Documentation/networking/dsa/configuration.rst b/Documentation/networking/dsa/configuration.rst
new file mode 100644
index 000000000000..af029b3ca2ab
--- /dev/null
+++ b/Documentation/networking/dsa/configuration.rst
@@ -0,0 +1,292 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================
+DSA switch configuration from userspace
+=======================================
+
+The DSA switch configuration is not integrated into the main userspace
+network configuration suites by now and has to be performed manualy.
+
+.. _dsa-config-showcases:
+
+Configuration showcases
+-----------------------
+
+To configure a DSA switch a couple of commands need to be executed. In this
+documentation some common configuration scenarios are handled as showcases:
+
+*single port*
+  Every switch port acts as a different configurable Ethernet port
+
+*bridge*
+  Every switch port is part of one configurable Ethernet bridge
+
+*gateway*
+  Every switch port except one upstream port is part of a configurable
+  Ethernet bridge.
+  The upstream port acts as different configurable Ethernet port.
+
+All configurations are performed with tools from iproute2, which is available
+at https://www.kernel.org/pub/linux/utils/net/iproute2/
+
+Through DSA every port of a switch is handled like a normal linux Ethernet
+interface. The CPU port is the switch port connected to an Ethernet MAC chip.
+The corresponding linux Ethernet interface is called the master interface.
+All other corresponding linux interfaces are called slave interfaces.
+
+The slave interfaces depend on the master interface. They can only brought up,
+when the master interface is up.
+
+In this documentation the following Ethernet interfaces are used:
+
+*eth0*
+  the master interface
+
+*lan1*
+  a slave interface
+
+*lan2*
+  another slave interface
+
+*lan3*
+  a third slave interface
+
+*wan*
+  A slave interface dedicated for upstream traffic
+
+Further Ethernet interfaces can be configured similar.
+The configured IPs and networks are:
+
+*single port*
+  * lan1: 192.0.2.1/30 (192.0.2.0 - 192.0.2.3)
+  * lan2: 192.0.2.5/30 (192.0.2.4 - 192.0.2.7)
+  * lan3: 192.0.2.9/30 (192.0.2.8 - 192.0.2.11)
+
+*bridge*
+  * br0: 192.0.2.129/25 (192.0.2.128 - 192.0.2.255)
+
+*gateway*
+  * br0: 192.0.2.129/25 (192.0.2.128 - 192.0.2.255)
+  * wan: 192.0.2.1/30 (192.0.2.0 - 192.0.2.3)
+
+.. _dsa-tagged-configuration:
+
+Configuration with tagging support
+----------------------------------
+
+The tagging based configuration is desired and supported by the majority of
+DSA switches. These switches are capable to tag incoming and outgoing traffic
+without using a VLAN based configuration.
+
+single port
+~~~~~~~~~~~
+
+.. code-block:: sh
+
+  # configure each interface
+  ip addr add 192.0.2.1/30 dev lan1
+  ip addr add 192.0.2.5/30 dev lan2
+  ip addr add 192.0.2.9/30 dev lan3
+
+  # The master interface needs to be brought up before the slave ports.
+  ip link set eth0 up
+
+  # bring up the slave interfaces
+  ip link set lan1 up
+  ip link set lan2 up
+  ip link set lan3 up
+
+bridge
+~~~~~~
+
+.. code-block:: sh
+
+  # The master interface needs to be brought up before the slave ports.
+  ip link set eth0 up
+
+  # bring up the slave interfaces
+  ip link set lan1 up
+  ip link set lan2 up
+  ip link set lan3 up
+
+  # create bridge
+  ip link add name br0 type bridge
+
+  # add ports to bridge
+  ip link set dev lan1 master br0
+  ip link set dev lan2 master br0
+  ip link set dev lan3 master br0
+
+  # configure the bridge
+  ip addr add 192.0.2.129/25 dev br0
+
+  # bring up the bridge
+  ip link set dev br0 up
+
+gateway
+~~~~~~~
+
+.. code-block:: sh
+
+  # The master interface needs to be brought up before the slave ports.
+  ip link set eth0 up
+
+  # bring up the slave interfaces
+  ip link set wan up
+  ip link set lan1 up
+  ip link set lan2 up
+
+  # configure the upstream port
+  ip addr add 192.0.2.1/30 dev wan
+
+  # create bridge
+  ip link add name br0 type bridge
+
+  # add ports to bridge
+  ip link set dev lan1 master br0
+  ip link set dev lan2 master br0
+
+  # configure the bridge
+  ip addr add 192.0.2.129/25 dev br0
+
+  # bring up the bridge
+  ip link set dev br0 up
+
+.. _dsa-vlan-configuration:
+
+Configuration without tagging support
+-------------------------------------
+
+A minority of switches are not capable to use a taging protocol
+(DSA_TAG_PROTO_NONE). These switches can be configured by a VLAN based
+configuration.
+
+single port
+~~~~~~~~~~~
+The configuration can only be set up via VLAN tagging and bridge setup.
+
+.. code-block:: sh
+
+  # tag traffic on CPU port
+  ip link add link eth0 name eth0.1 type vlan id 1
+  ip link add link eth0 name eth0.2 type vlan id 2
+  ip link add link eth0 name eth0.3 type vlan id 3
+
+  # The master interface needs to be brought up before the slave ports.
+  ip link set eth0 up
+  ip link set eth0.1 up
+  ip link set eth0.2 up
+  ip link set eth0.3 up
+
+  # bring up the slave interfaces
+  ip link set lan1 up
+  ip link set lan1 up
+  ip link set lan3 up
+
+  # create bridge
+  ip link add name br0 type bridge
+
+  # activate VLAN filtering
+  ip link set dev br0 type bridge vlan_filtering 1
+
+  # add ports to bridges
+  ip link set dev lan1 master br0
+  ip link set dev lan2 master br0
+  ip link set dev lan3 master br0
+
+  # tag traffic on ports
+  bridge vlan add dev lan1 vid 1 pvid untagged
+  bridge vlan add dev lan2 vid 2 pvid untagged
+  bridge vlan add dev lan3 vid 3 pvid untagged
+
+  # configure the VLANs
+  ip addr add 192.0.2.1/30 dev eth0.1
+  ip addr add 192.0.2.5/30 dev eth0.2
+  ip addr add 192.0.2.9/30 dev eth0.3
+
+  # bring up the bridge devices
+  ip link set br0 up
+
+
+bridge
+~~~~~~
+
+.. code-block:: sh
+
+  # tag traffic on CPU port
+  ip link add link eth0 name eth0.1 type vlan id 1
+
+  # The master interface needs to be brought up before the slave ports.
+  ip link set eth0 up
+  ip link set eth0.1 up
+
+  # bring up the slave interfaces
+  ip link set lan1 up
+  ip link set lan2 up
+  ip link set lan3 up
+
+  # create bridge
+  ip link add name br0 type bridge
+
+  # activate VLAN filtering
+  ip link set dev br0 type bridge vlan_filtering 1
+
+  # add ports to bridge
+  ip link set dev lan1 master br0
+  ip link set dev lan2 master br0
+  ip link set dev lan3 master br0
+  ip link set eth0.1 master br0
+
+  # tag traffic on ports
+  bridge vlan add dev lan1 vid 1 pvid untagged
+  bridge vlan add dev lan2 vid 1 pvid untagged
+  bridge vlan add dev lan3 vid 1 pvid untagged
+
+  # configure the bridge
+  ip addr add 192.0.2.129/25 dev br0
+
+  # bring up the bridge
+  ip link set dev br0 up
+
+gateway
+~~~~~~~
+
+.. code-block:: sh
+
+  # tag traffic on CPU port
+  ip link add link eth0 name eth0.1 type vlan id 1
+  ip link add link eth0 name eth0.2 type vlan id 2
+
+  # The master interface needs to be brought up before the slave ports.
+  ip link set eth0 up
+  ip link set eth0.1 up
+  ip link set eth0.2 up
+
+  # bring up the slave interfaces
+  ip link set wan up
+  ip link set lan1 up
+  ip link set lan2 up
+
+  # create bridge
+  ip link add name br0 type bridge
+
+  # activate VLAN filtering
+  ip link set dev br0 type bridge vlan_filtering 1
+
+  # add ports to bridges
+  ip link set dev wan master br0
+  ip link set eth0.1 master br0
+  ip link set dev lan1 master br0
+  ip link set dev lan2 master br0
+
+  # tag traffic on ports
+  bridge vlan add dev lan1 vid 1 pvid untagged
+  bridge vlan add dev lan2 vid 1 pvid untagged
+  bridge vlan add dev wan vid 2 pvid untagged
+
+  # configure the VLANs
+  ip addr add 192.0.2.1/30 dev eth0.2
+  ip addr add 192.0.2.129/25 dev br0
+
+  # bring up the bridge devices
+  ip link set br0 up
diff --git a/Documentation/networking/dsa/index.rst b/Documentation/networking/dsa/index.rst
index 0e5b7a9be406..c279cfbf9083 100644
--- a/Documentation/networking/dsa/index.rst
+++ b/Documentation/networking/dsa/index.rst
@@ -9,3 +9,4 @@ Distributed Switch Architecture
    bcm_sf2
    lan9303
    sja1105
+   configuration
-- 
cgit 


From ff2d339375d0162e322d96cfacba0ac8467a15c9 Mon Sep 17 00:00:00 2001
From: Benedikt Spranger <b.spranger@linutronix.de>
Date: Fri, 5 Jul 2019 11:57:19 +0200
Subject: Documentation: net: dsa: b53: Describe b53 configuration

Document the different needs of documentation for the b53 driver.

Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/b53.rst   | 183 +++++++++++++++++++++++++++++++++
 Documentation/networking/dsa/index.rst |   1 +
 2 files changed, 184 insertions(+)
 create mode 100644 Documentation/networking/dsa/b53.rst

(limited to 'Documentation')

diff --git a/Documentation/networking/dsa/b53.rst b/Documentation/networking/dsa/b53.rst
new file mode 100644
index 000000000000..b41637cdb82b
--- /dev/null
+++ b/Documentation/networking/dsa/b53.rst
@@ -0,0 +1,183 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================================
+Broadcom RoboSwitch Ethernet switch driver
+==========================================
+
+The Broadcom RoboSwitch Ethernet switch family is used in quite a range of
+xDSL router, cable modems and other multimedia devices.
+
+The actual implementation supports the devices BCM5325E, BCM5365, BCM539x,
+BCM53115 and BCM53125 as well as BCM63XX.
+
+Implementation details
+======================
+
+The driver is located in ``drivers/net/dsa/b53/`` and is implemented as a
+DSA driver; see ``Documentation/networking/dsa/dsa.rst`` for details on the
+subsystem and what it provides.
+
+The switch is, if possible, configured to enable a Broadcom specific 4-bytes
+switch tag which gets inserted by the switch for every packet forwarded to the
+CPU interface, conversely, the CPU network interface should insert a similar
+tag for packets entering the CPU port. The tag format is described in
+``net/dsa/tag_brcm.c``.
+
+The configuration of the device depends on whether or not tagging is
+supported.
+
+The interface names and example network configuration are used according the
+configuration described in the :ref:`dsa-config-showcases`.
+
+Configuration with tagging support
+----------------------------------
+
+The tagging based configuration is desired. It is not specific to the b53
+DSA driver and will work like all DSA drivers which supports tagging.
+
+See :ref:`dsa-tagged-configuration`.
+
+Configuration without tagging support
+-------------------------------------
+
+Older models (5325, 5365) support a different tag format that is not supported
+yet. 539x and 531x5 require managed mode and some special handling, which is
+also not yet supported. The tagging support is disabled in these cases and the
+switch need a different configuration.
+
+The configuration slightly differ from the :ref:`dsa-vlan-configuration`.
+
+The b53 tags the CPU port in all VLANs, since otherwise any PVID untagged
+VLAN programming would basically change the CPU port's default PVID and make
+it untagged, undesirable.
+
+In difference to the configuration described in :ref:`dsa-vlan-configuration`
+the default VLAN 1 has to be removed from the slave interface configuration in
+single port and gateway configuration, while there is no need to add an extra
+VLAN configuration in the bridge showcase.
+
+single port
+~~~~~~~~~~~
+The configuration can only be set up via VLAN tagging and bridge setup.
+By default packages are tagged with vid 1:
+
+.. code-block:: sh
+
+  # tag traffic on CPU port
+  ip link add link eth0 name eth0.1 type vlan id 1
+  ip link add link eth0 name eth0.2 type vlan id 2
+  ip link add link eth0 name eth0.3 type vlan id 3
+
+  # The master interface needs to be brought up before the slave ports.
+  ip link set eth0 up
+  ip link set eth0.1 up
+  ip link set eth0.2 up
+  ip link set eth0.3 up
+
+  # bring up the slave interfaces
+  ip link set wan up
+  ip link set lan1 up
+  ip link set lan2 up
+
+  # create bridge
+  ip link add name br0 type bridge
+
+  # activate VLAN filtering
+  ip link set dev br0 type bridge vlan_filtering 1
+
+  # add ports to bridges
+  ip link set dev wan master br0
+  ip link set dev lan1 master br0
+  ip link set dev lan2 master br0
+
+  # tag traffic on ports
+  bridge vlan add dev lan1 vid 2 pvid untagged
+  bridge vlan del dev lan1 vid 1
+  bridge vlan add dev lan2 vid 3 pvid untagged
+  bridge vlan del dev lan2 vid 1
+
+  # configure the VLANs
+  ip addr add 192.0.2.1/30 dev eth0.1
+  ip addr add 192.0.2.5/30 dev eth0.2
+  ip addr add 192.0.2.9/30 dev eth0.3
+
+  # bring up the bridge devices
+  ip link set br0 up
+
+
+bridge
+~~~~~~
+
+.. code-block:: sh
+
+  # tag traffic on CPU port
+  ip link add link eth0 name eth0.1 type vlan id 1
+
+  # The master interface needs to be brought up before the slave ports.
+  ip link set eth0 up
+  ip link set eth0.1 up
+
+  # bring up the slave interfaces
+  ip link set wan up
+  ip link set lan1 up
+  ip link set lan2 up
+
+  # create bridge
+  ip link add name br0 type bridge
+
+  # activate VLAN filtering
+  ip link set dev br0 type bridge vlan_filtering 1
+
+  # add ports to bridge
+  ip link set dev wan master br0
+  ip link set dev lan1 master br0
+  ip link set dev lan2 master br0
+  ip link set eth0.1 master br0
+
+  # configure the bridge
+  ip addr add 192.0.2.129/25 dev br0
+
+  # bring up the bridge
+  ip link set dev br0 up
+
+gateway
+~~~~~~~
+
+.. code-block:: sh
+
+  # tag traffic on CPU port
+  ip link add link eth0 name eth0.1 type vlan id 1
+  ip link add link eth0 name eth0.2 type vlan id 2
+
+  # The master interface needs to be brought up before the slave ports.
+  ip link set eth0 up
+  ip link set eth0.1 up
+  ip link set eth0.2 up
+
+  # bring up the slave interfaces
+  ip link set wan up
+  ip link set lan1 up
+  ip link set lan2 up
+
+  # create bridge
+  ip link add name br0 type bridge
+
+  # activate VLAN filtering
+  ip link set dev br0 type bridge vlan_filtering 1
+
+  # add ports to bridges
+  ip link set dev wan master br0
+  ip link set eth0.1 master br0
+  ip link set dev lan1 master br0
+  ip link set dev lan2 master br0
+
+  # tag traffic on ports
+  bridge vlan add dev wan vid 2 pvid untagged
+  bridge vlan del dev wan vid 1
+
+  # configure the VLANs
+  ip addr add 192.0.2.1/30 dev eth0.2
+  ip addr add 192.0.2.129/25 dev br0
+
+  # bring up the bridge devices
+  ip link set br0 up
diff --git a/Documentation/networking/dsa/index.rst b/Documentation/networking/dsa/index.rst
index c279cfbf9083..ee631e2d646f 100644
--- a/Documentation/networking/dsa/index.rst
+++ b/Documentation/networking/dsa/index.rst
@@ -6,6 +6,7 @@ Distributed Switch Architecture
    :maxdepth: 1
 
    dsa
+   b53
    bcm_sf2
    lan9303
    sja1105
-- 
cgit 


From d8f74f0975d8360ab15312697585837c989c406b Mon Sep 17 00:00:00 2001
From: Stephen Suryaputra <ssuryaextr@gmail.com>
Date: Sat, 6 Jul 2019 10:55:18 -0400
Subject: ipv6: Support multipath hashing on inner IP pkts

Make the same support as commit 363887a2cdfe ("ipv4: Support multipath
hashing on inner IP pkts for GRE tunnel") for outer IPv6. The hashing
considers both IPv4 and IPv6 pkts when they are tunneled by IPv6 GRE.

Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index f0e6d1f53485..48c79e78817b 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1473,6 +1473,7 @@ fib_multipath_hash_policy - INTEGER
 	Possible values:
 	0 - Layer 3 (source and destination addresses plus flow label)
 	1 - Layer 4 (standard 5-tuple)
+	2 - Layer 3 or inner Layer 3 if present
 
 anycast_src_echo_reply - BOOLEAN
 	Controls the use of anycast addresses as source addresses for ICMPv6
-- 
cgit 


From c93a0368aaa2962e6c89da20f79b8789b42e3387 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 1 Jul 2019 09:58:41 +0900
Subject: kbuild: do not create wrappers for header-test-y

header-test-y does not work with headers in sub-directories.

For example, you may want to write a Makefile, like this:

include/linux/Kbuild:

  header-test-y += mtd/nand.h

This entry will create a wrapper include/linux/mtd/nand.hdrtest.c
with the following content:

  #include "mtd/nand.h"

To make this work, we need to add $(srctree)/include/linux to the
header search path. It would be tedious to add ccflags-y.

Instead, we could change the *.hdrtest.c rule to wrap:

  #include "nand.h"

This works for in-tree build since #include "..." searches in the
relative path from the header with this directive. For O=... build,
we need to add $(srctree)/include/linux/mtd to the header search path,
which will be even more tedious.

After all, I thought it would be handier to compile headers directly
without creating wrappers.

I added a new build rule to compile %.h into %.h.s

The target is %.h.s instead of %.h.o because it is slightly faster.
Also, as for GCC, an empty assembly is smaller than an empty object.

I wrote the build rule:

  $(CC) $(c_flags) -S -o $@ -x c /dev/null -include $<

instead of:

  $(CC) $(c_flags) -S -o $@ -x c $<

Both work fine with GCC, but the latter is bad for Clang.

This comes down to the difference in the -Wunused-function policy.
GCC does not warn about unused 'static inline' functions at all.
Clang does not warn about the ones in included headers, but does
about the ones in the source. So, we should handle headers as
headers, not as source files.

In fact, this has been hidden since commit abb2ea7dfd82 ("compiler,
clang: suppress warning for unused static inline functions"), but we
should not rely on that.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Tested-by: Jani Nikula <jani.nikula@intel.com>
---
 Documentation/dontdiff             | 1 -
 Documentation/kbuild/makefiles.txt | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 554dfe4883d2..5eba889ea84d 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -19,7 +19,6 @@
 *.grep
 *.grp
 *.gz
-*.hdrtest.c
 *.html
 *.i
 *.jpeg
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index ca4b24ec0399..5080fec34609 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -1023,8 +1023,7 @@ When kbuild executes, the following steps are followed (roughly):
 	header-test-y specifies headers (*.h) in the current directory that
 	should be compile tested to ensure they are self-contained,
 	i.e. compilable as standalone units. If CONFIG_HEADER_TEST is enabled,
-	this autogenerates dummy sources to include the headers, and builds them
-	as part of extra-y.
+	this builds them as part of extra-y.
 
 --- 6.7 Commands useful for building a boot image
 
-- 
cgit 


From 1e21cbfada87f697a2a7c450542a7d28925abee6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 1 Jul 2019 09:58:42 +0900
Subject: kbuild: support header-test-pattern-y

In my view, most of headers can be self-contained. So, it would be
tedious to add every header to header-test-y explicitly. We usually
end up with "all headers with some exceptions".

There are two types in exceptions:

[1] headers that are never compiled as standalone units

  For examples, include/linux/compiler-gcc.h is not intended for
  direct inclusion. We should always exclude such ones.

[2] headers that are conditionally compiled as standalone units

  Some headers can be compiled only for particular architectures.
  For example, include/linux/arm-cci.h can be compiled only for
  arm/arm64 because it requires <asm/arm-cci.h> to exist.
  Clang can compile include/soc/nps/mtm.h only for arc because
  it contains an arch-specific register in inline assembler.

So, you can write Makefile like this:

  header-test-                += linux/compiler-gcc.h
  header-test-$(CONFIG_ARM)   += linux/arm-cci.h
  header-test-$(CONFIG_ARM64) += linux/arm-cci.h
  header-test-$(CONFIG_ARC)   += soc/nps/mtm.h

The new syntax header-test-pattern-y will be useful to specify
"the rest".

The typical usage is like this:

  header-test-pattern-y += */*.h

This will add all the headers in sub-directories to the test coverage,
excluding $(header-test-). In this regards, header-test-pattern-y
behaves like a weaker variant of header-test-y.

Caveat:
The patterns in header-test-pattern-y are prefixed with $(srctree)/$(src)/
but not $(objtree)/$(obj)/. Stale generated headers are often left over
when you traverse the git history without cleaning. Wildcard patterns for
$(objtree) may match to stale headers, which could fail to compile.
One pitfall is $(srctree)/$(src)/ and $(objtree)/$(obj)/ point to the
same directory for in-tree building. So, header-test-pattern-y should
be used with care since it can potentially match to stale headers.

Caveat2:
You could use wildcard for header-test-. For example,

  header-test- += asm-generic/%

... will exclude headers in asm-generic directory. Unfortunately, the
wildcard character is '%' instead of '*' here because this is evaluated
by $(filter-out ...) whereas header-test-pattern-y is evaluated by
$(wildcard ...). This is a kludge, but seems useful in some places...

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Tested-by: Jani Nikula <jani.nikula@intel.com>
---
 Documentation/kbuild/makefiles.txt | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 5080fec34609..b817e6cefb77 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -1025,6 +1025,16 @@ When kbuild executes, the following steps are followed (roughly):
 	i.e. compilable as standalone units. If CONFIG_HEADER_TEST is enabled,
 	this builds them as part of extra-y.
 
+    header-test-pattern-y
+
+	This works as a weaker version of header-test-y, and accepts wildcard
+	patterns. The typical usage is:
+
+		  header-test-pattern-y += *.h
+
+	This specifies all the files that matches to '*.h' in the current
+	directory, but the files in 'header-test-' are excluded.
+
 --- 6.7 Commands useful for building a boot image
 
 	Kbuild provides a few macros that are useful when building a
-- 
cgit 


From f21ce913a82faa512b583640226d4ab39b38735e Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jun 2019 14:06:49 -0600
Subject: dt-bindings: arm: Convert RDA Micro board/soc bindings to json-schema
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert RDA Micro SoC bindings to DT schema format using json-schema.

Cc: "Andreas Färber" <afaerber@suse.de>
Acked-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/rda.txt  | 17 -----------------
 Documentation/devicetree/bindings/arm/rda.yaml | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 17 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/rda.txt
 create mode 100644 Documentation/devicetree/bindings/arm/rda.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/rda.txt b/Documentation/devicetree/bindings/arm/rda.txt
deleted file mode 100644
index 43c80762c428..000000000000
--- a/Documentation/devicetree/bindings/arm/rda.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-RDA Micro platforms device tree bindings
-----------------------------------------
-
-RDA8810PL SoC
-=============
-
-Required root node properties:
-
- - compatible :  must contain "rda,8810pl"
-
-
-Boards:
-
-Root node property compatible must contain, depending on board:
-
- - Orange Pi 2G-IoT: "xunlong,orangepi-2g-iot"
- - Orange Pi i96: "xunlong,orangepi-i96"
diff --git a/Documentation/devicetree/bindings/arm/rda.yaml b/Documentation/devicetree/bindings/arm/rda.yaml
new file mode 100644
index 000000000000..51cec2b63b04
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/rda.yaml
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/rda.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: RDA Micro platforms device tree bindings
+
+maintainers:
+  - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - xunlong,orangepi-2g-iot     # Orange Pi 2G-IoT
+          - xunlong,orangepi-i96        # Orange Pi i96
+      - const: rda,8810pl
+
+...
-- 
cgit 


From 80785f5a22e9073e2ded5958feb7f220e066d17b Mon Sep 17 00:00:00 2001
From: Josua Mayer <josua@solid-run.com>
Date: Tue, 9 Jul 2019 15:00:58 +0200
Subject: dt-bindings: allow up to four clocks for orion-mdio

Armada 8040 needs four clocks to be enabled for MDIO accesses to work.
Update the binding to allow the extra clock to be specified.

Cc: stable@vger.kernel.org
Fixes: 6d6a331f44a1 ("dt-bindings: allow up to three clocks for orion-mdio")
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Josua Mayer <josua@solid-run.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/marvell-orion-mdio.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt b/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt
index 42cd81090a2c..3f3cfc1d8d4d 100644
--- a/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt
+++ b/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt
@@ -16,7 +16,7 @@ Required properties:
 
 Optional properties:
 - interrupts: interrupt line number for the SMI error/done interrupt
-- clocks: phandle for up to three required clocks for the MDIO instance
+- clocks: phandle for up to four required clocks for the MDIO instance
 
 The child nodes of the MDIO driver are the individual PHY devices
 connected to this MDIO bus. They must have a "reg" property given the
-- 
cgit 


From 1cadfc5877f15a14a77f4161f3ce9f436a39116d Mon Sep 17 00:00:00 2001
From: Claire Chang <tientzu@chromium.org>
Date: Wed, 19 Jun 2019 16:41:10 +0800
Subject: dt-bindings: serial: add documentation for Rx in-band wakeup support

To support Rx in-band wakeup, one must create an interrupt specifier with
edge sensitivity on Rx pin and an addtional pinctrl to reconfigure Rx pin
to normal GPIO in sleep state. Driver will switch to sleep mode pinctrl and
enable irq wake before suspend and restore to default settings when
resuming.

Signed-off-by: Claire Chang <tientzu@chromium.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/serial/mtk-uart.txt | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/serial/mtk-uart.txt b/Documentation/devicetree/bindings/serial/mtk-uart.txt
index c6b5262eb352..6fdffb735fb9 100644
--- a/Documentation/devicetree/bindings/serial/mtk-uart.txt
+++ b/Documentation/devicetree/bindings/serial/mtk-uart.txt
@@ -23,7 +23,12 @@ Required properties:
 
 - reg: The base address of the UART register bank.
 
-- interrupts: A single interrupt specifier.
+- interrupts:
+  index 0: an interrupt specifier for the UART controller itself
+  index 1: optional, an interrupt specifier with edge sensitivity on Rx pin to
+           support Rx in-band wake up. If one would like to use this feature,
+           one must create an addtional pinctrl to reconfigure Rx pin to normal
+           GPIO before suspend.
 
 - clocks : Must contain an entry for each entry in clock-names.
   See ../clocks/clock-bindings.txt for details.
@@ -39,7 +44,11 @@ Example:
 	uart0: serial@11006000 {
 		compatible = "mediatek,mt6589-uart", "mediatek,mt6577-uart";
 		reg = <0x11006000 0x400>;
-		interrupts = <GIC_SPI 51 IRQ_TYPE_LEVEL_LOW>;
+		interrupts = <GIC_SPI 51 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 52 IRQ_TYPE_EDGE_FALLING>;
 		clocks = <&uart_clk>, <&bus_clk>;
 		clock-names = "baud", "bus";
+		pinctrl-names = "default", "sleep";
+		pinctrl-0 = <&uart_pin>;
+		pinctrl-1 = <&uart_pin_sleep>;
 	};
-- 
cgit 


From bf96244074d9d0dc1b9ab0c9a6d95ae7b6ad628d Mon Sep 17 00:00:00 2001
From: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
Date: Tue, 9 Jul 2019 11:31:07 +0800
Subject: net: hisilicon: dt-bindings: Add an field of port-handle

In general, group is the same as the port, but some
boards specify a special group for better load
balancing of each processing unit.

Signed-off-by: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt b/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
index d1df8a00e1f3..464c0dafc617 100644
--- a/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
+++ b/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
@@ -10,6 +10,7 @@ Required properties:
 	phandle, specifies a reference to the syscon ppe node
 	port, port number connected to the controller
 	channel, recv channel start from channel * number (RX_DESC_NUM)
+	group, field in the pkg desc, in general, it is the same as the port.
 - phy-mode: see ethernet.txt [1].
 
 Optional properties:
@@ -66,7 +67,7 @@ Example:
 		reg = <0x28b0000 0x10000>;
 		interrupts = <0 413 4>;
 		phy-mode = "mii";
-		port-handle = <&ppe 31 0>;
+		port-handle = <&ppe 31 0 31>;
 	};
 
 	ge0: ethernet@2800000 {
@@ -74,7 +75,7 @@ Example:
 		reg = <0x2800000 0x10000>;
 		interrupts = <0 402 4>;
 		phy-mode = "sgmii";
-		port-handle = <&ppe 0 1>;
+		port-handle = <&ppe 0 1 0>;
 		phy-handle = <&phy0>;
 	};
 
@@ -83,6 +84,6 @@ Example:
 		reg = <0x2880000 0x10000>;
 		interrupts = <0 410 4>;
 		phy-mode = "sgmii";
-		port-handle = <&ppe 8 2>;
+		port-handle = <&ppe 8 2 8>;
 		phy-handle = <&phy1>;
 	};
-- 
cgit 


From f15d6358425af7976e146a07a47b75e4fe8ccd41 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Sat, 22 Jun 2019 11:45:57 +0800
Subject: dt-bindings: 83xx-512x-pci: Drop cell-index property

28eac2b74cc7 ("powerpc/fsl: Remove cell-index from PCI nodes"),
and for now it is still not used, drop it from doc.

Cc: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/pci/83xx-512x-pci.txt | 1 -
 1 file changed, 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt b/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt
index b9165b72473c..3abeecf4983f 100644
--- a/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt
+++ b/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt
@@ -9,7 +9,6 @@ Freescale 83xx and 512x SOCs include the same PCI bridge core.
 
 Example (MPC8313ERDB)
 	pci0: pci@e0008500 {
-		cell-index = <1>;
 		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
 		interrupt-map = <
 				/* IDSEL 0x0E -mini PCI */
-- 
cgit 


From 09d9ea4014d1c7715c072403a9e2d4e97f6084ec Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Sun, 23 Jun 2019 12:37:57 +0800
Subject: dt-bindings: vendor-prefixes: add SoChip

Shenzhen SoChip Technology Co., Ltd. is a hardware vendor that produces
EVBs with Allwinner chips. There's also a SoC named S3 that is developed
by Allwinner (based on Allwinner V3/V3s) but branded SoChip.

Add the vendor prefix for SoChip.

Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 52e84257363b..7015974a7904 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -851,6 +851,8 @@ patternProperties:
     description: Standard Microsystems Corporation
   "^snps,.*":
     description: Synopsys, Inc.
+  "^sochip,.*":
+    description: Shenzhen SoChip Technology Co., Ltd.
   "^socionext,.*":
     description: Socionext Inc.
   "^solidrun,.*":
-- 
cgit 


From f59d261180f3b66367962f1974090815ce710056 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Sun, 23 Jun 2019 12:37:59 +0800
Subject: dt-bindings: vendor-prefixes: add Sipeed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Shenzhen Sipeed Technology Co., Ltd. is a company focused on development
kits, which also contains rebranded Lichee Pi series.

Add its vendor prefix binding.

Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 7015974a7904..e779ed16de2b 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -839,6 +839,8 @@ patternProperties:
     description: Sinlinx Electronics Technology Co., LTD
   "^sinovoip,.*":
     description: SinoVoip Co., Ltd
+  "^sipeed,.*":
+    description: Shenzhen Sipeed Technology Co., Ltd.
   "^sirf,.*":
     description: SiRF Technology, Inc.
   "^sis,.*":
-- 
cgit 


From f97ee8f5d9cdd03933517a6336836b8e4415e668 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Fri, 21 Jun 2019 14:12:32 +0800
Subject: dt-bindings: mmc: sprd: Add pinctrl support

When changing SD card voltage signal for Spreadtrum SD host controller,
it also need to switch related pin's state. Thus add pinctrl properties'
description in documentation.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/sdhci-sprd.txt | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt b/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
index e675397fa428..eb7eb1b529f0 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
@@ -19,6 +19,9 @@ Required properties:
 Optional properties:
 - assigned-clocks: the same with "sdio" clock
 - assigned-clock-parents: the default parent of "sdio" clock
+- pinctrl-names: should be "default", "state_uhs"
+- pinctrl-0: should contain default/high speed pin control
+- pinctrl-1: should contain uhs mode pin control
 
 PHY DLL delays are used to delay the data valid window, and align the window
 to sampling clock. PHY DLL delays can be configured by following properties,
@@ -50,6 +53,10 @@ sdio0: sdio@20600000 {
 	assigned-clocks = <&ap_clk CLK_EMMC_2X>;
 	assigned-clock-parents = <&rpll CLK_RPLL_390M>;
 
+	pinctrl-names = "default", "state_uhs";
+	pinctrl-0 = <&sd0_pins_default>;
+	pinctrl-1 = <&sd0_pins_uhs>;
+
 	sprd,phy-delay-sd-uhs-sdr104 = <0x3f 0x7f 0x2e 0x2e>;
 	bus-width = <8>;
 	non-removable;
-- 
cgit 


From 1cdca16c043abd4803c3979e5faefaa623703e66 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 24 Jun 2019 16:03:45 +0900
Subject: dt-binding: mmc: rename tmio_mmc.txt to renesas,sdhi.txt

As commit b6147490e6aa ("mmc: tmio: split core functionality, DMA and
MFD glue") said, these MMC controllers use the IP from Panasonic.

TMIO (Toshiba Mobile IO) MMC was the first upstreamed user of this IP.
The common driver code was split and expanded as 'tmio-mmc-core', then
it became historical misnomer since 'tmio' is not the name of this IP.

In the discussion [1], we decide to keep this name as-is at least in
Linux driver level because renaming everything is a big churn.

However, DT should not be oriented to a particular project even though
it is mainly developed in Linux communities.

This is the misfortune only in Linux. Let's stop exporting it to other
projects, where there is no good reason to call this hardware "TMIO".
Rename the file to renesas,sdhi.txt. In fact, all the information in
this file is specific to the Renesas platform.

This commit also removes the first paragraph entirely. The DT-binding
should describe the hardware. It is strange to talk about Linux driver
internals such as how the drivers are probed, how platform data are
handed off, etc.

[1] https://www.spinics.net/lists/linux-mmc/msg46952.html

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 .../devicetree/bindings/mmc/renesas,sdhi.txt       | 111 +++++++++++++++++++
 Documentation/devicetree/bindings/mmc/tmio_mmc.txt | 120 ---------------------
 2 files changed, 111 insertions(+), 120 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/mmc/renesas,sdhi.txt
 delete mode 100644 Documentation/devicetree/bindings/mmc/tmio_mmc.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mmc/renesas,sdhi.txt b/Documentation/devicetree/bindings/mmc/renesas,sdhi.txt
new file mode 100644
index 000000000000..dd08d038a65c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/renesas,sdhi.txt
@@ -0,0 +1,111 @@
+* Renesas SDHI SD/MMC controller
+
+Required properties:
+- compatible: should contain one or more of the following:
+		"renesas,sdhi-sh73a0" - SDHI IP on SH73A0 SoC
+		"renesas,sdhi-r7s72100" - SDHI IP on R7S72100 SoC
+		"renesas,sdhi-r7s9210" - SDHI IP on R7S9210 SoC
+		"renesas,sdhi-r8a73a4" - SDHI IP on R8A73A4 SoC
+		"renesas,sdhi-r8a7740" - SDHI IP on R8A7740 SoC
+		"renesas,sdhi-r8a7743" - SDHI IP on R8A7743 SoC
+		"renesas,sdhi-r8a7744" - SDHI IP on R8A7744 SoC
+		"renesas,sdhi-r8a7745" - SDHI IP on R8A7745 SoC
+		"renesas,sdhi-r8a774a1" - SDHI IP on R8A774A1 SoC
+		"renesas,sdhi-r8a774c0" - SDHI IP on R8A774C0 SoC
+		"renesas,sdhi-r8a77470" - SDHI IP on R8A77470 SoC
+		"renesas,sdhi-mmc-r8a77470" - SDHI/MMC IP on R8A77470 SoC
+		"renesas,sdhi-r8a7778" - SDHI IP on R8A7778 SoC
+		"renesas,sdhi-r8a7779" - SDHI IP on R8A7779 SoC
+		"renesas,sdhi-r8a7790" - SDHI IP on R8A7790 SoC
+		"renesas,sdhi-r8a7791" - SDHI IP on R8A7791 SoC
+		"renesas,sdhi-r8a7792" - SDHI IP on R8A7792 SoC
+		"renesas,sdhi-r8a7793" - SDHI IP on R8A7793 SoC
+		"renesas,sdhi-r8a7794" - SDHI IP on R8A7794 SoC
+		"renesas,sdhi-r8a7795" - SDHI IP on R8A7795 SoC
+		"renesas,sdhi-r8a7796" - SDHI IP on R8A7796 SoC
+		"renesas,sdhi-r8a77965" - SDHI IP on R8A77965 SoC
+		"renesas,sdhi-r8a77970" - SDHI IP on R8A77970 SoC
+		"renesas,sdhi-r8a77980" - SDHI IP on R8A77980 SoC
+		"renesas,sdhi-r8a77990" - SDHI IP on R8A77990 SoC
+		"renesas,sdhi-r8a77995" - SDHI IP on R8A77995 SoC
+		"renesas,sdhi-shmobile" - a generic sh-mobile SDHI controller
+		"renesas,rcar-gen1-sdhi" - a generic R-Car Gen1 SDHI controller
+		"renesas,rcar-gen2-sdhi" - a generic R-Car Gen2 and RZ/G1 SDHI
+					   (not SDHI/MMC) controller
+		"renesas,rcar-gen3-sdhi" - a generic R-Car Gen3 or RZ/G2
+					   SDHI controller
+
+
+		When compatible with the generic version, nodes must list
+		the SoC-specific version corresponding to the platform
+		first followed by the generic version.
+
+- clocks: Most controllers only have 1 clock source per channel. However, on
+	  some variations of this controller, the internal card detection
+	  logic that exists in this controller is sectioned off to be run by a
+	  separate second clock source to allow the main core clock to be turned
+	  off to save power.
+	  If 2 clocks are specified by the hardware, you must name them as
+	  "core" and "cd". If the controller only has 1 clock, naming is not
+	  required.
+	  Devices which have more than 1 clock are listed below:
+	  2: R7S72100, R7S9210
+
+Optional properties:
+- pinctrl-names: should be "default", "state_uhs"
+- pinctrl-0: should contain default/high speed pin ctrl
+- pinctrl-1: should contain uhs mode pin ctrl
+
+Example: R8A7790 (R-Car H2) SDHI controller nodes
+
+	sdhi0: sd@ee100000 {
+		compatible = "renesas,sdhi-r8a7790", "renesas,rcar-gen2-sdhi";
+		reg = <0 0xee100000 0 0x328>;
+		interrupts = <GIC_SPI 165 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cpg CPG_MOD 314>;
+		dmas = <&dmac0 0xcd>, <&dmac0 0xce>,
+		       <&dmac1 0xcd>, <&dmac1 0xce>;
+		dma-names = "tx", "rx", "tx", "rx";
+		max-frequency = <195000000>;
+		power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
+		resets = <&cpg 314>;
+	};
+
+	sdhi1: sd@ee120000 {
+		compatible = "renesas,sdhi-r8a7790", "renesas,rcar-gen2-sdhi";
+		reg = <0 0xee120000 0 0x328>;
+		interrupts = <GIC_SPI 166 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cpg CPG_MOD 313>;
+		dmas = <&dmac0 0xc9>, <&dmac0 0xca>,
+		       <&dmac1 0xc9>, <&dmac1 0xca>;
+		dma-names = "tx", "rx", "tx", "rx";
+		max-frequency = <195000000>;
+		power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
+		resets = <&cpg 313>;
+	};
+
+	sdhi2: sd@ee140000 {
+		compatible = "renesas,sdhi-r8a7790", "renesas,rcar-gen2-sdhi";
+		reg = <0 0xee140000 0 0x100>;
+		interrupts = <GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cpg CPG_MOD 312>;
+		dmas = <&dmac0 0xc1>, <&dmac0 0xc2>,
+		       <&dmac1 0xc1>, <&dmac1 0xc2>;
+		dma-names = "tx", "rx", "tx", "rx";
+		max-frequency = <97500000>;
+		power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
+		resets = <&cpg 312>;
+	};
+
+	sdhi3: sd@ee160000 {
+		compatible = "renesas,sdhi-r8a7790", "renesas,rcar-gen2-sdhi";
+		reg = <0 0xee160000 0 0x100>;
+		interrupts = <GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cpg CPG_MOD 311>;
+		dmas = <&dmac0 0xd3>, <&dmac0 0xd4>,
+		       <&dmac1 0xd3>, <&dmac1 0xd4>;
+		dma-names = "tx", "rx", "tx", "rx";
+		max-frequency = <97500000>;
+		power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
+		resets = <&cpg 311>;
+	};
diff --git a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
deleted file mode 100644
index 2b4f17ca9087..000000000000
--- a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
+++ /dev/null
@@ -1,120 +0,0 @@
-* Toshiba Mobile IO SD/MMC controller
-
-The tmio-mmc driver doesn't probe its devices actively, instead its binding to
-devices is managed by either MFD drivers or by the sh_mobile_sdhi platform
-driver. Those drivers supply the tmio-mmc driver with platform data, that either
-describe hardware capabilities, known to them, or are obtained by them from
-their own platform data or from their DT information. In the latter case all
-compulsory and any optional properties, common to all SD/MMC drivers, as
-described in mmc.txt, can be used. Additionally the following tmio_mmc-specific
-optional bindings can be used.
-
-Required properties:
-- compatible: should contain one or more of the following:
-		"renesas,sdhi-sh73a0" - SDHI IP on SH73A0 SoC
-		"renesas,sdhi-r7s72100" - SDHI IP on R7S72100 SoC
-		"renesas,sdhi-r7s9210" - SDHI IP on R7S9210 SoC
-		"renesas,sdhi-r8a73a4" - SDHI IP on R8A73A4 SoC
-		"renesas,sdhi-r8a7740" - SDHI IP on R8A7740 SoC
-		"renesas,sdhi-r8a7743" - SDHI IP on R8A7743 SoC
-		"renesas,sdhi-r8a7744" - SDHI IP on R8A7744 SoC
-		"renesas,sdhi-r8a7745" - SDHI IP on R8A7745 SoC
-		"renesas,sdhi-r8a774a1" - SDHI IP on R8A774A1 SoC
-		"renesas,sdhi-r8a774c0" - SDHI IP on R8A774C0 SoC
-		"renesas,sdhi-r8a77470" - SDHI IP on R8A77470 SoC
-		"renesas,sdhi-mmc-r8a77470" - SDHI/MMC IP on R8A77470 SoC
-		"renesas,sdhi-r8a7778" - SDHI IP on R8A7778 SoC
-		"renesas,sdhi-r8a7779" - SDHI IP on R8A7779 SoC
-		"renesas,sdhi-r8a7790" - SDHI IP on R8A7790 SoC
-		"renesas,sdhi-r8a7791" - SDHI IP on R8A7791 SoC
-		"renesas,sdhi-r8a7792" - SDHI IP on R8A7792 SoC
-		"renesas,sdhi-r8a7793" - SDHI IP on R8A7793 SoC
-		"renesas,sdhi-r8a7794" - SDHI IP on R8A7794 SoC
-		"renesas,sdhi-r8a7795" - SDHI IP on R8A7795 SoC
-		"renesas,sdhi-r8a7796" - SDHI IP on R8A7796 SoC
-		"renesas,sdhi-r8a77965" - SDHI IP on R8A77965 SoC
-		"renesas,sdhi-r8a77970" - SDHI IP on R8A77970 SoC
-		"renesas,sdhi-r8a77980" - SDHI IP on R8A77980 SoC
-		"renesas,sdhi-r8a77990" - SDHI IP on R8A77990 SoC
-		"renesas,sdhi-r8a77995" - SDHI IP on R8A77995 SoC
-		"renesas,sdhi-shmobile" - a generic sh-mobile SDHI controller
-		"renesas,rcar-gen1-sdhi" - a generic R-Car Gen1 SDHI controller
-		"renesas,rcar-gen2-sdhi" - a generic R-Car Gen2 and RZ/G1 SDHI
-					   (not SDHI/MMC) controller
-		"renesas,rcar-gen3-sdhi" - a generic R-Car Gen3 or RZ/G2
-					   SDHI controller
-
-
-		When compatible with the generic version, nodes must list
-		the SoC-specific version corresponding to the platform
-		first followed by the generic version.
-
-- clocks: Most controllers only have 1 clock source per channel. However, on
-	  some variations of this controller, the internal card detection
-	  logic that exists in this controller is sectioned off to be run by a
-	  separate second clock source to allow the main core clock to be turned
-	  off to save power.
-	  If 2 clocks are specified by the hardware, you must name them as
-	  "core" and "cd". If the controller only has 1 clock, naming is not
-	  required.
-	  Devices which have more than 1 clock are listed below:
-	  2: R7S72100, R7S9210
-
-Optional properties:
-- pinctrl-names: should be "default", "state_uhs"
-- pinctrl-0: should contain default/high speed pin ctrl
-- pinctrl-1: should contain uhs mode pin ctrl
-
-Example: R8A7790 (R-Car H2) SDHI controller nodes
-
-	sdhi0: sd@ee100000 {
-		compatible = "renesas,sdhi-r8a7790", "renesas,rcar-gen2-sdhi";
-		reg = <0 0xee100000 0 0x328>;
-		interrupts = <GIC_SPI 165 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&cpg CPG_MOD 314>;
-		dmas = <&dmac0 0xcd>, <&dmac0 0xce>,
-		       <&dmac1 0xcd>, <&dmac1 0xce>;
-		dma-names = "tx", "rx", "tx", "rx";
-		max-frequency = <195000000>;
-		power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
-		resets = <&cpg 314>;
-	};
-
-	sdhi1: sd@ee120000 {
-		compatible = "renesas,sdhi-r8a7790", "renesas,rcar-gen2-sdhi";
-		reg = <0 0xee120000 0 0x328>;
-		interrupts = <GIC_SPI 166 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&cpg CPG_MOD 313>;
-		dmas = <&dmac0 0xc9>, <&dmac0 0xca>,
-		       <&dmac1 0xc9>, <&dmac1 0xca>;
-		dma-names = "tx", "rx", "tx", "rx";
-		max-frequency = <195000000>;
-		power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
-		resets = <&cpg 313>;
-	};
-
-	sdhi2: sd@ee140000 {
-		compatible = "renesas,sdhi-r8a7790", "renesas,rcar-gen2-sdhi";
-		reg = <0 0xee140000 0 0x100>;
-		interrupts = <GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&cpg CPG_MOD 312>;
-		dmas = <&dmac0 0xc1>, <&dmac0 0xc2>,
-		       <&dmac1 0xc1>, <&dmac1 0xc2>;
-		dma-names = "tx", "rx", "tx", "rx";
-		max-frequency = <97500000>;
-		power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
-		resets = <&cpg 312>;
-	};
-
-	sdhi3: sd@ee160000 {
-		compatible = "renesas,sdhi-r8a7790", "renesas,rcar-gen2-sdhi";
-		reg = <0 0xee160000 0 0x100>;
-		interrupts = <GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&cpg CPG_MOD 311>;
-		dmas = <&dmac0 0xd3>, <&dmac0 0xd4>,
-		       <&dmac1 0xd3>, <&dmac1 0xd4>;
-		dma-names = "tx", "rx", "tx", "rx";
-		max-frequency = <97500000>;
-		power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
-		resets = <&cpg 311>;
-	};
-- 
cgit 


From 34e51a5e1a6e939ed7d99c38173821ab86d577f4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 27 Jun 2019 13:39:49 -0700
Subject: blkcg, writeback: Rename wbc_account_io() to
 wbc_account_cgroup_owner()

wbc_account_io() does a very specific job - try to see which cgroup is
actually dirtying an inode and transfer its ownership to the majority
dirtier if needed.  The name is too generic and confusing.  Let's
rename it to something more specific.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/admin-guide/cgroup-v2.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index a5c845338d6d..6223f485f7e1 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2114,7 +2114,7 @@ following two functions.
 	a queue (device) has been associated with the bio and
 	before submission.
 
-  wbc_account_io(@wbc, @page, @bytes)
+  wbc_account_cgroup_owner(@wbc, @page, @bytes)
 	Should be called for each data segment being written out.
 	While this function doesn't care exactly when it's called
 	during the writeback session, it's the easiest and most
-- 
cgit 


From 95fd3f87bfbe8edaa5e955e0f858a0a573c09ab6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sat, 6 Jul 2019 12:07:13 +0900
Subject: kbuild: add a flag to force absolute path for srctree

In old days, Kbuild always used an absolute path for $(srctree).

Since commit 890676c65d69 ("kbuild: Use relative path when building in
the source tree"), $(srctree) is '.' when O= was not passed from the
command line.

Yet, using absolute paths is useful in some cases even without O=, for
instance, to create a cscope file with absolute path tags.

'O=.' was known to work as a workaround to force Kbuild to use absolute
paths even when you are building in the source tree.

Since commit 25b146c5b8ce ("kbuild: allow Kbuild to start from any
directory"), Kbuild is too clever to be tricked. Even if you pass 'O=.'
Kbuild notices you are building in the source tree, then use '.' for
$(srctree).

So, 'make O=. cscope' is no help to create absolute path tags.

We cannot force one or the other according to commit e93bc1a0cab3
("Revert "kbuild: specify absolute paths for cscope""). Both of
relative path and absolute path have pros and cons.

This commit adds a new flag KBUILD_ABS_SRCTREE to allow users to
choose the absolute path for $(srctree).

'make KBUILD_ABS_SRCTREE=1 cscope' will work as a replacement of
'make O=. cscope'.

Reported-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Documentation/kbuild/kbuild.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
index 9c230ea71963..dcfeb32e3360 100644
--- a/Documentation/kbuild/kbuild.txt
+++ b/Documentation/kbuild/kbuild.txt
@@ -183,6 +183,15 @@ The output directory is often set using "O=..." on the commandline.
 
 The value can be overridden in which case the default value is ignored.
 
+KBUILD_ABS_SRCTREE
+--------------------------------------------------
+Kbuild uses a relative path to point to the tree when possible. For instance,
+when building in the source tree, the source tree path is '.'
+
+Setting this flag requests Kbuild to use absolute path to the source tree.
+There are some useful cases to do so, like when generating tag files with
+absolute path entries etc.
+
 KBUILD_SIGN_PIN
 --------------------------------------------------
 This variable allows a passphrase or PIN to be passed to the sign-file
-- 
cgit 


From 36847a005489cfb74dc6388952da73346f867dca Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Thu, 11 Jul 2019 00:56:08 +0900
Subject: block: Remove unused definitions

The ELV_MQUEUE_XXX definitions in include/linux/elevator.h are unused
since the removal of elevator_may_queue_fn in kernel 5.0. Remove these
definitions and also remove the documentation of elevator_may_queue_fn
in Documentiation/block/biodoc.txt.

Acked-by: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/biodoc.txt | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 31c177663ed5..5a4a799fe61b 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -843,11 +843,6 @@ elevator_latter_req_fn		These return the request before or after the
 
 elevator_completed_req_fn	called when a request is completed.
 
-elevator_may_queue_fn		returns true if the scheduler wants to allow the
-				current context to queue a new request even if
-				it is over the queue limit. This must be used
-				very carefully!!
-
 elevator_set_req_fn
 elevator_put_req_fn		Must be used to allocate and free any elevator
 				specific storage for a request.
-- 
cgit 


From 028db3e290f15ac509084c0fc3b9d021f668f877 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 10 Jul 2019 18:43:43 -0700
Subject: Revert "Merge tag 'keys-acl-20190703' of
 git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs"

This reverts merge 0f75ef6a9cff49ff612f7ce0578bced9d0b38325 (and thus
effectively commits

   7a1ade847596 ("keys: Provide KEYCTL_GRANT_PERMISSION")
   2e12256b9a76 ("keys: Replace uid/gid/perm permissions checking with an ACL")

that the merge brought in).

It turns out that it breaks booting with an encrypted volume, and Eric
biggers reports that it also breaks the fscrypt tests [1] and loading of
in-kernel X.509 certificates [2].

The root cause of all the breakage is likely the same, but David Howells
is off email so rather than try to work it out it's getting reverted in
order to not impact the rest of the merge window.

 [1] https://lore.kernel.org/lkml/20190710011559.GA7973@sol.localdomain/
 [2] https://lore.kernel.org/lkml/20190710013225.GB7973@sol.localdomain/

Link: https://lore.kernel.org/lkml/CAHk-=wjxoeMJfeBahnWH=9zShKp2bsVy527vo3_y8HfOdhwAAw@mail.gmail.com/
Reported-by: Eric Biggers <ebiggers@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/security/keys/core.rst        | 128 ++++++----------------------
 Documentation/security/keys/request-key.rst |   9 +-
 2 files changed, 31 insertions(+), 106 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index bc561ca95c86..d6d8b0b756b6 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -57,9 +57,9 @@ Each key has a number of attributes:
      type provides an operation to perform a match between the description on a
      key and a criterion string.
 
-  *  Each key has an owner user ID, a group ID and an ACL.  These are used to
-     control what a process may do to a key from userspace, and whether a
-     kernel service will be able to find the key.
+  *  Each key has an owner user ID, a group ID and a permissions mask. These
+     are used to control what a process may do to a key from userspace, and
+     whether a kernel service will be able to find the key.
 
   *  Each key can be set to expire at a specific time by the key type's
      instantiation function. Keys can also be immortal.
@@ -198,110 +198,43 @@ The key service provides a number of features besides keys:
 Key Access Permissions
 ======================
 
-Keys have an owner user ID, a group ID and an ACL.  The ACL is made up of a
-sequence of ACEs that each contain three elements:
+Keys have an owner user ID, a group access ID, and a permissions mask. The mask
+has up to eight bits each for possessor, user, group and other access. Only
+six of each set of eight bits are defined. These permissions granted are:
 
-  * The type of subject.
-  * The subject.
+  *  View
 
-    These two together indicate the subject to whom the permits are granted.
-    The type can be one of:
+     This permits a key or keyring's attributes to be viewed - including key
+     type and description.
 
-     * ``KEY_ACE_SUBJ_STANDARD``
+  *  Read
 
-       The subject is a standard 'macro' type.  The subject can be one of:
-
-        * ``KEY_ACE_EVERYONE``
-
-	  The permits are granted to everyone.  It replaces the old 'other'
-	  type on the assumption that you wouldn't grant a permission to other
-	  that you you wouldn't grant to everyone else.
-
-	* ``KEY_ACE_OWNER``
-
-	  The permits are granted to the owner of the key (key->uid).
-
-	* ``KEY_ACE_GROUP``
-
-	  The permits are granted to the key's group (key->gid).
-
-	* ``KEY_ACE_POSSESSOR``
-
-	  The permits are granted to anyone who possesses the key.
-
-  * The set of permits granted to the subject.  These include:
-
-     * ``KEY_ACE_VIEW``
-
-       This permits a key or keyring's attributes to be viewed - including the
-       key type and description.
-
-     * ``KEY_ACE_READ``
-
-       This permits a key's payload to be viewed or a keyring's list of linked
-       keys.
-
-     * ``KEY_ACE_WRITE``
-
-       This permits a key's payload to be instantiated or updated, or it allows
-       a link to be added to or removed from a keyring.
-
-     * ``KEY_ACE_SEARCH``
-
-       This permits keyrings to be searched and keys to be found. Searches can
-       only recurse into nested keyrings that have search permission set.
-
-     * ``KEY_ACE_LINK``
-
-       This permits a key or keyring to be linked to. To create a link from a
-       keyring to a key, a process must have Write permission on the keyring
-       and Link permission on the key.
-
-     * ``KEY_ACE_SET_SECURITY``
-
-       This permits a key's UID, GID and permissions mask to be changed.
+     This permits a key's payload to be viewed or a keyring's list of linked
+     keys.
 
-     * ``KEY_ACE_INVAL``
+  *  Write
 
-       This permits a key to be invalidated with KEYCTL_INVALIDATE.
+     This permits a key's payload to be instantiated or updated, or it allows a
+     link to be added to or removed from a keyring.
 
-     * ``KEY_ACE_REVOKE``
+  *  Search
 
-       This permits a key to be revoked with KEYCTL_REVOKE.
+     This permits keyrings to be searched and keys to be found. Searches can
+     only recurse into nested keyrings that have search permission set.
 
-     * ``KEY_ACE_JOIN``
+  *  Link
 
-       This permits a keyring to be joined as a session by
-       KEYCTL_JOIN_SESSION_KEYRING or KEYCTL_SESSION_TO_PARENT.
+     This permits a key or keyring to be linked to. To create a link from a
+     keyring to a key, a process must have Write permission on the keyring and
+     Link permission on the key.
 
-     * ``KEY_ACE_CLEAR``
+  *  Set Attribute
 
-       This permits a keyring to be cleared.
+     This permits a key's UID, GID and permissions mask to be changed.
 
 For changing the ownership, group ID or permissions mask, being the owner of
 the key or having the sysadmin capability is sufficient.
 
-The legacy KEYCTL_SETPERM and KEYCTL_DESCRIBE functions can only see/generate
-View, Read, Write, Search, Link and SetAttr permits, and do this for each of
-possessor, user, group and other permission sets as a 32-bit flag mask.  These
-will be approximated/inferred:
-
-	SETPERM Permit	Implied ACE Permit
-	===============	=======================
-	Search		Inval, Join
-	Write		Revoke, Clear
-	Setattr		Set Security, Revoke
-
-	ACE Permit	Described as
-	===============	=======================
-	Inval		Search
-	Join		Search
-	Revoke		Write (unless Setattr)
-	Clear		write
-	Set Security	Setattr
-
-'Other' will be approximated as/inferred from the 'Everyone' subject.
-
 
 SELinux Support
 ===============
@@ -1151,8 +1084,7 @@ payload contents" for more information.
 
 	struct key *request_key(const struct key_type *type,
 				const char *description,
-				const char *callout_info,
-				struct key_acl *acl);
+				const char *callout_info);
 
     This is used to request a key or keyring with a description that matches
     the description specified according to the key type's match_preparse()
@@ -1167,8 +1099,6 @@ payload contents" for more information.
     If successful, the key will have been attached to the default keyring for
     implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING.
 
-    If a key is created, it will be given the specified ACL.
-
     See also Documentation/security/keys/request-key.rst.
 
 
@@ -1177,8 +1107,7 @@ payload contents" for more information.
 	struct key *request_key_tag(const struct key_type *type,
 				    const char *description,
 				    struct key_tag *domain_tag,
-				    const char *callout_info,
-				    struct key_acl *acl);
+				    const char *callout_info);
 
     This is identical to request_key(), except that a domain tag may be
     specifies that causes search algorithm to only match keys matching that
@@ -1193,8 +1122,7 @@ payload contents" for more information.
 					     struct key_tag *domain_tag,
 					     const void *callout_info,
 					     size_t callout_len,
-					     void *aux,
-					     struct key_acl *acl);
+					     void *aux);
 
     This is identical to request_key_tag(), except that the auxiliary data is
     passed to the key_type->request_key() op if it exists, and the
@@ -1267,7 +1195,7 @@ payload contents" for more information.
 
 	struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
 				  const struct cred *cred,
-				  struct key_acl *acl,
+				  key_perm_t perm,
 				  struct key_restriction *restrict_link,
 				  unsigned long flags,
 				  struct key *dest);
diff --git a/Documentation/security/keys/request-key.rst b/Documentation/security/keys/request-key.rst
index f356fd06c8d5..35f2296b704a 100644
--- a/Documentation/security/keys/request-key.rst
+++ b/Documentation/security/keys/request-key.rst
@@ -11,16 +11,14 @@ The process starts by either the kernel requesting a service by calling
 
 	struct key *request_key(const struct key_type *type,
 				const char *description,
-				const char *callout_info,
-				struct key_acl *acl);
+				const char *callout_info);
 
 or::
 
 	struct key *request_key_tag(const struct key_type *type,
 				    const char *description,
 				    const struct key_tag *domain_tag,
-				    const char *callout_info,
-				    struct key_acl *acl);
+				    const char *callout_info);
 
 or::
 
@@ -29,8 +27,7 @@ or::
 					     const struct key_tag *domain_tag,
 					     const char *callout_info,
 					     size_t callout_len,
-					     void *aux,
-					     struct key_acl *acl);
+					     void *aux);
 
 or::
 
-- 
cgit 


From 8c665292ec1263c627df60d10619e6f96deea788 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Tue, 4 Jun 2019 12:01:45 -0500
Subject: dt-bindings: mailbox: omap: Update bindings for TI K3 SoCs

The TI K3 AM65x and J721E family of SoCs have a new Mailbox IP that
is based on the existing Mailbox IP present in OMAP architecture based
SoCs. Update the existing OMAP Mailbox bindings for this new IP present
on TI K3 AM65x and J721E SoCs. The same compatible from AM65x SoCs is
reused for J721E SoCs.

Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 .../devicetree/bindings/mailbox/omap-mailbox.txt   | 59 ++++++++++++++++++----
 1 file changed, 50 insertions(+), 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt b/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt
index 0ef372656a3e..35c3f56b7f7b 100644
--- a/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt
+++ b/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt
@@ -1,4 +1,4 @@
-OMAP2+ Mailbox Driver
+OMAP2+ and K3 Mailbox
 =====================
 
 The OMAP mailbox hardware facilitates communication between different processors
@@ -7,7 +7,7 @@ various processor subsystems and is connected on an interconnect bus. The
 communication is achieved through a set of registers for message storage and
 interrupt configuration registers.
 
-Each mailbox IP block has a certain number of h/w fifo queues and output
+Each mailbox IP block/cluster has a certain number of h/w fifo queues and output
 interrupt lines. An output interrupt line is routed to an interrupt controller
 within a processor subsystem, and there can be more than one line going to a
 specific processor's interrupt controller. The interrupt line connections are
@@ -23,12 +23,16 @@ All the current OMAP SoCs except for the newest DRA7xx SoC has a single IP
 instance. DRA7xx has multiple instances with different number of h/w fifo queues
 and interrupt lines between different instances. The interrupt lines can also be
 routed to different processor sub-systems on DRA7xx as they are routed through
-the Crossbar, a kind of interrupt router/multiplexer.
+the Crossbar, a kind of interrupt router/multiplexer. The K3 AM65x and J721E
+SoCs has each of these instances form a cluster and combine multiple clusters
+into a single IP block present within the Main NavSS. The interrupt lines from
+all these clusters are multiplexed and routed to different processor subsystems
+over a limited number of common interrupt output lines of an Interrupt Router.
 
 Mailbox Device Node:
 ====================
-A Mailbox device node is used to represent a Mailbox IP instance within a SoC.
-The sub-mailboxes are represented as child nodes of this parent node.
+A Mailbox device node is used to represent a Mailbox IP instance/cluster within
+a SoC. The sub-mailboxes are represented as child nodes of this parent node.
 
 Required properties:
 --------------------
@@ -37,12 +41,12 @@ Required properties:
 			    "ti,omap3-mailbox" for OMAP3430, OMAP3630 SoCs
 			    "ti,omap4-mailbox" for OMAP44xx, OMAP54xx, AM33xx,
 						   AM43xx and DRA7xx SoCs
+			    "ti,am654-mailbox" for K3 AM65x and J721E SoCs
 - reg:			Contains the mailbox register address range (base
 			address and length)
 - interrupts:		Contains the interrupt information for the mailbox
 			device. The format is dependent on which interrupt
-			controller the OMAP device uses
-- ti,hwmods:		Name of the hwmod associated with the mailbox
+			controller the Mailbox device uses
 - #mbox-cells:		Common mailbox binding property to identify the number
 			of cells required for the mailbox specifier. Should be
 			1
@@ -50,6 +54,23 @@ Required properties:
 			device can interrupt
 - ti,mbox-num-fifos:	Number of h/w fifo queues within the mailbox IP block
 
+SoC-specific Required properties:
+---------------------------------
+The following are mandatory properties for the OMAP architecture based SoCs
+only:
+- ti,hwmods:		Name of the hwmod associated with the mailbox. This
+			should be defined in the mailbox node only if the node
+			is not defined as a child node of a corresponding sysc
+			interconnect node.
+
+The following are mandatory properties for the K3 AM65x and J721E SoCs only:
+- interrupt-parent:	Should contain a phandle to the TI-SCI interrupt
+			controller node that is used to dynamically program
+			the interrupt routes between the IP and the main GIC
+			controllers. See the following binding for additional
+			details,
+			Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt
+
 Child Nodes:
 ============
 A child node is used for representing the actual sub-mailbox device that is
@@ -98,7 +119,7 @@ to be used by the client user.
 Example:
 --------
 
-/* OMAP4 */
+1. /* OMAP4 */
 mailbox: mailbox@4a0f4000 {
 	compatible = "ti,omap4-mailbox";
 	reg = <0x4a0f4000 0x200>;
@@ -123,7 +144,7 @@ dsp {
 	...
 };
 
-/* AM33xx */
+2. /* AM33xx */
 mailbox: mailbox@480c8000 {
 	compatible = "ti,omap4-mailbox";
 	reg = <0x480C8000 0x200>;
@@ -137,3 +158,23 @@ mailbox: mailbox@480c8000 {
 		ti,mbox-rx = <0 0 3>;
 	};
 };
+
+3. /* AM65x */
+&cbass_main {
+	cbass_main_navss: interconnect0 {
+		mailbox0_cluster0: mailbox@31f80000 {
+			compatible = "ti,am654-mailbox";
+			reg = <0x00 0x31f80000 0x00 0x200>;
+			#mbox-cells = <1>;
+			ti,mbox-num-users = <4>;
+			ti,mbox-num-fifos = <16>;
+			interrupt-parent = <&intr_main_navss>;
+			interrupts = <164 0>;
+
+			mbox_mcu_r5fss0_core0: mbox-mcu-r5fss0-core0 {
+				ti,mbox-tx = <1 0 0>;
+				ti,mbox-rx = <0 0 0>;
+			};
+		};
+	};
+};
-- 
cgit 


From 66bb8a065f5aedd4551d8d3fbce582972f65c2e1 Mon Sep 17 00:00:00 2001
From: Eric Hankland <ehankland@google.com>
Date: Wed, 10 Jul 2019 18:25:15 -0700
Subject: KVM: x86: PMU Event Filter

Some events can provide a guest with information about other guests or the
host (e.g. L3 cache stats); providing the capability to restrict access
to a "safe" set of events would limit the potential for the PMU to be used
in any side channel attacks. This change introduces a new VM ioctl that
sets an event filter. If the guest attempts to program a counter for
any blacklisted or non-whitelisted event, the kernel counter won't be
created, so any RDPMC/RDMSR will show 0 instances of that event.

Signed-off-by: Eric Hankland <ehankland@google.com>
[Lots of changes. All remaining bugs are probably mine. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 91fd86fcc49f..38b0d4451a24 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4065,6 +4065,32 @@ KVM_ARM_VCPU_FINALIZE call.
 See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization
 using this ioctl.
 
+4.120 KVM_SET_PMU_EVENT_FILTER
+
+Capability: KVM_CAP_PMU_EVENT_FILTER
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_pmu_event_filter (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_pmu_event_filter {
+       __u32 action;
+       __u32 nevents;
+       __u64 events[0];
+};
+
+This ioctl restricts the set of PMU events that the guest can program.
+The argument holds a list of events which will be allowed or denied.
+The eventsel+umask of each event the guest attempts to program is compared
+against the events field to determine whether the guest should have access.
+This only affects general purpose counters; fixed purpose counters can
+be disabled by changing the perfmon CPUID leaf.
+
+Valid values for 'action':
+#define KVM_PMU_EVENT_ALLOW 0
+#define KVM_PMU_EVENT_DENY 1
+
+
 5. The kvm_run structure
 ------------------------
 
-- 
cgit 


From 8373bc4f1b7072a020860896b404bff0a54c7583 Mon Sep 17 00:00:00 2001
From: Luke Nowakowski-Krijger <lnowakow@eng.ucsd.edu>
Date: Wed, 10 Jul 2019 08:30:52 -0700
Subject: Documentation: virtual: Convert paravirt_ops.txt to .rst

Convert paravirt_opts.txt to .rst format to be able to be parsed by
sphinx.

Made some minor spacing and formatting corrections to make defintions
much more clear and easy to read. Added default kernel license to the
document.

Signed-off-by: Luke Nowakowski-Krijger <lnowakow@eng.ucsd.edu>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/paravirt_ops.rst | 35 ++++++++++++++++++++++++++++++++++
 Documentation/virtual/paravirt_ops.txt | 32 -------------------------------
 2 files changed, 35 insertions(+), 32 deletions(-)
 create mode 100644 Documentation/virtual/paravirt_ops.rst
 delete mode 100644 Documentation/virtual/paravirt_ops.txt

(limited to 'Documentation')

diff --git a/Documentation/virtual/paravirt_ops.rst b/Documentation/virtual/paravirt_ops.rst
new file mode 100644
index 000000000000..6b789d27cead
--- /dev/null
+++ b/Documentation/virtual/paravirt_ops.rst
@@ -0,0 +1,35 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Paravirt_ops
+============
+
+Linux provides support for different hypervisor virtualization technologies.
+Historically different binary kernels would be required in order to support
+different hypervisors, this restriction was removed with pv_ops.
+Linux pv_ops is a virtualization API which enables support for different
+hypervisors. It allows each hypervisor to override critical operations and
+allows a single kernel binary to run on all supported execution environments
+including native machine -- without any hypervisors.
+
+pv_ops provides a set of function pointers which represent operations
+corresponding to low level critical instructions and high level
+functionalities in various areas. pv-ops allows for optimizations at run
+time by enabling binary patching of the low-ops critical operations
+at boot time.
+
+pv_ops operations are classified into three categories:
+
+- simple indirect call
+   These operations correspond to high level functionality where it is
+   known that the overhead of indirect call isn't very important.
+
+- indirect call which allows optimization with binary patch
+   Usually these operations correspond to low level critical instructions. They
+   are called frequently and are performance critical. The overhead is
+   very important.
+
+- a set of macros for hand written assembly code
+   Hand written assembly codes (.S files) also need paravirtualization
+   because they include sensitive instructions or some of code paths in
+   them are very performance critical.
diff --git a/Documentation/virtual/paravirt_ops.txt b/Documentation/virtual/paravirt_ops.txt
deleted file mode 100644
index d4881c00e339..000000000000
--- a/Documentation/virtual/paravirt_ops.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Paravirt_ops
-============
-
-Linux provides support for different hypervisor virtualization technologies.
-Historically different binary kernels would be required in order to support
-different hypervisors, this restriction was removed with pv_ops.
-Linux pv_ops is a virtualization API which enables support for different
-hypervisors. It allows each hypervisor to override critical operations and
-allows a single kernel binary to run on all supported execution environments
-including native machine -- without any hypervisors.
-
-pv_ops provides a set of function pointers which represent operations
-corresponding to low level critical instructions and high level
-functionalities in various areas. pv-ops allows for optimizations at run
-time by enabling binary patching of the low-ops critical operations
-at boot time.
-
-pv_ops operations are classified into three categories:
-
-- simple indirect call
-  These operations correspond to high level functionality where it is
-  known that the overhead of indirect call isn't very important.
-
-- indirect call which allows optimization with binary patch
-  Usually these operations correspond to low level critical instructions. They
-  are called frequently and are performance critical. The overhead is
-  very important.
-
-- a set of macros for hand written assembly code
-  Hand written assembly codes (.S files) also need paravirtualization
-  because they include sensitive instructions or some of code paths in
-  them are very performance critical.
-- 
cgit 


From e287d6de62f745e223d2f7d9b621c892d4b2b45a Mon Sep 17 00:00:00 2001
From: Luke Nowakowski-Krijger <lnowakow@eng.ucsd.edu>
Date: Wed, 10 Jul 2019 08:30:53 -0700
Subject: Documentation: kvm: Convert cpuid.txt to .rst

Convert cpuid.txt to .rst format to be parsable by sphinx.

Change format and spacing to make function definitions and return values
much more clear. Also added a table that is parsable by sphinx and makes
the information much more clean. Updated Author email to their new
active email address. Added license identifier with the consent of the
author.

Signed-off-by: Luke Nowakowski-Krijger <lnowakow@eng.ucsd.edu>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/cpuid.rst | 107 ++++++++++++++++++++++++++++++++++++
 Documentation/virtual/kvm/cpuid.txt |  91 ------------------------------
 2 files changed, 107 insertions(+), 91 deletions(-)
 create mode 100644 Documentation/virtual/kvm/cpuid.rst
 delete mode 100644 Documentation/virtual/kvm/cpuid.txt

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/cpuid.rst b/Documentation/virtual/kvm/cpuid.rst
new file mode 100644
index 000000000000..01b081f6e7ea
--- /dev/null
+++ b/Documentation/virtual/kvm/cpuid.rst
@@ -0,0 +1,107 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+KVM CPUID bits
+==============
+
+:Author: Glauber Costa <glommer@gmail.com>
+
+A guest running on a kvm host, can check some of its features using
+cpuid. This is not always guaranteed to work, since userspace can
+mask-out some, or even all KVM-related cpuid features before launching
+a guest.
+
+KVM cpuid functions are:
+
+function: KVM_CPUID_SIGNATURE (0x40000000)
+
+returns::
+
+   eax = 0x40000001
+   ebx = 0x4b4d564b
+   ecx = 0x564b4d56
+   edx = 0x4d
+
+Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
+The value in eax corresponds to the maximum cpuid function present in this leaf,
+and will be updated if more functions are added in the future.
+Note also that old hosts set eax value to 0x0. This should
+be interpreted as if the value was 0x40000001.
+This function queries the presence of KVM cpuid leafs.
+
+function: define KVM_CPUID_FEATURES (0x40000001)
+
+returns::
+
+          ebx, ecx
+          eax = an OR'ed group of (1 << flag)
+
+where ``flag`` is defined as below:
+
+================================= =========== ================================
+flag                              value       meaning
+================================= =========== ================================
+KVM_FEATURE_CLOCKSOURCE           0           kvmclock available at msrs
+                                              0x11 and 0x12
+
+KVM_FEATURE_NOP_IO_DELAY          1           not necessary to perform delays
+                                              on PIO operations
+
+KVM_FEATURE_MMU_OP                2           deprecated
+
+KVM_FEATURE_CLOCKSOURCE2          3           kvmclock available at msrs
+
+                                              0x4b564d00 and 0x4b564d01
+KVM_FEATURE_ASYNC_PF              4           async pf can be enabled by
+                                              writing to msr 0x4b564d02
+
+KVM_FEATURE_STEAL_TIME            5           steal time can be enabled by
+                                              writing to msr 0x4b564d03
+
+KVM_FEATURE_PV_EOI                6           paravirtualized end of interrupt
+                                              handler can be enabled by
+                                              writing to msr 0x4b564d04
+
+KVM_FEATURE_PV_UNHAULT            7           guest checks this feature bit
+                                              before enabling paravirtualized
+                                              spinlock support
+
+KVM_FEATURE_PV_TLB_FLUSH          9           guest checks this feature bit
+                                              before enabling paravirtualized
+                                              tlb flush
+
+KVM_FEATURE_ASYNC_PF_VMEXIT       10          paravirtualized async PF VM EXIT
+                                              can be enabled by setting bit 2
+                                              when writing to msr 0x4b564d02
+
+KVM_FEATURE_PV_SEND_IPI           11          guest checks this feature bit
+                                              before enabling paravirtualized
+                                              sebd IPIs
+
+KVM_FEATURE_PV_POLL_CONTROL       12          host-side polling on HLT can
+                                              be disabled by writing
+                                              to msr 0x4b564d05.
+
+KVM_FEATURE_PV_SCHED_YIELD        13          guest checks this feature bit
+                                              before using paravirtualized
+                                              sched yield.
+
+KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24          host will warn if no guest-side
+                                              per-cpu warps are expeced in
+                                              kvmclock
+================================= =========== ================================
+
+::
+
+      edx = an OR'ed group of (1 << flag)
+
+Where ``flag`` here is defined as below:
+
+================== ============ =================================
+flag               value        meaning
+================== ============ =================================
+KVM_HINTS_REALTIME 0            guest checks this feature bit to
+                                determine that vCPUs are never
+                                preempted for an unlimited time
+                                allowing optimizations
+================== ============ =================================
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
deleted file mode 100644
index 2bdac528e4a2..000000000000
--- a/Documentation/virtual/kvm/cpuid.txt
+++ /dev/null
@@ -1,91 +0,0 @@
-KVM CPUID bits
-Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
-=====================================================
-
-A guest running on a kvm host, can check some of its features using
-cpuid. This is not always guaranteed to work, since userspace can
-mask-out some, or even all KVM-related cpuid features before launching
-a guest.
-
-KVM cpuid functions are:
-
-function: KVM_CPUID_SIGNATURE (0x40000000)
-returns : eax = 0x40000001,
-          ebx = 0x4b4d564b,
-          ecx = 0x564b4d56,
-          edx = 0x4d.
-Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
-The value in eax corresponds to the maximum cpuid function present in this leaf,
-and will be updated if more functions are added in the future.
-Note also that old hosts set eax value to 0x0. This should
-be interpreted as if the value was 0x40000001.
-This function queries the presence of KVM cpuid leafs.
-
-
-function: define KVM_CPUID_FEATURES (0x40000001)
-returns : ebx, ecx
-          eax = an OR'ed group of (1 << flag), where each flags is:
-
-
-flag                               || value || meaning
-=============================================================================
-KVM_FEATURE_CLOCKSOURCE            ||     0 || kvmclock available at msrs
-                                   ||       || 0x11 and 0x12.
-------------------------------------------------------------------------------
-KVM_FEATURE_NOP_IO_DELAY           ||     1 || not necessary to perform delays
-                                   ||       || on PIO operations.
-------------------------------------------------------------------------------
-KVM_FEATURE_MMU_OP                 ||     2 || deprecated.
-------------------------------------------------------------------------------
-KVM_FEATURE_CLOCKSOURCE2           ||     3 || kvmclock available at msrs
-                                   ||       || 0x4b564d00 and 0x4b564d01
-------------------------------------------------------------------------------
-KVM_FEATURE_ASYNC_PF               ||     4 || async pf can be enabled by
-                                   ||       || writing to msr 0x4b564d02
-------------------------------------------------------------------------------
-KVM_FEATURE_STEAL_TIME             ||     5 || steal time can be enabled by
-                                   ||       || writing to msr 0x4b564d03.
-------------------------------------------------------------------------------
-KVM_FEATURE_PV_EOI                 ||     6 || paravirtualized end of interrupt
-                                   ||       || handler can be enabled by writing
-                                   ||       || to msr 0x4b564d04.
-------------------------------------------------------------------------------
-KVM_FEATURE_PV_UNHALT              ||     7 || guest checks this feature bit
-                                   ||       || before enabling paravirtualized
-                                   ||       || spinlock support.
-------------------------------------------------------------------------------
-KVM_FEATURE_PV_TLB_FLUSH           ||     9 || guest checks this feature bit
-                                   ||       || before enabling paravirtualized
-                                   ||       || tlb flush.
-------------------------------------------------------------------------------
-KVM_FEATURE_ASYNC_PF_VMEXIT        ||    10 || paravirtualized async PF VM exit
-                                   ||       || can be enabled by setting bit 2
-                                   ||       || when writing to msr 0x4b564d02
-------------------------------------------------------------------------------
-KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
-                                   ||       || before using paravirtualized
-                                   ||       || send IPIs.
-------------------------------------------------------------------------------
-KVM_FEATURE_PV_POLL_CONTROL        ||    12 || host-side polling on HLT can
-                                   ||       || be disabled by writing
-                                   ||       || to msr 0x4b564d05.
-------------------------------------------------------------------------------
-KVM_FEATURE_PV_SCHED_YIELD         ||    13 || guest checks this feature bit
-                                   ||       || before using paravirtualized
-                                   ||       || sched yield.
-------------------------------------------------------------------------------
-KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
-                                   ||       || per-cpu warps are expected in
-                                   ||       || kvmclock.
-------------------------------------------------------------------------------
-
-          edx = an OR'ed group of (1 << flag), where each flags is:
-
-
-flag                               || value || meaning
-==================================================================================
-KVM_HINTS_REALTIME                 ||     0 || guest checks this feature bit to
-                                   ||       || determine that vCPUs are never
-                                   ||       || preempted for an unlimited time,
-                                   ||       || allowing optimizations
-----------------------------------------------------------------------------------
-- 
cgit 


From 429bb83af8bcea0115eb34fd7ed94a35166d8384 Mon Sep 17 00:00:00 2001
From: Luke Nowakowski-Krijger <lnowakow@eng.ucsd.edu>
Date: Wed, 10 Jul 2019 08:30:54 -0700
Subject: Documentation: virtual: Add toctree hooks

Added toctree hooks for indexing. Hooks added only for newly added
files.

The hook for the top of the tree will be added in a later patch series
when a few more substantial changes have been added.

Signed-off-by: Luke Nowakowski-Krijger <lnowakow@eng.ucsd.edu>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/index.rst     | 18 ++++++++++++++++++
 Documentation/virtual/kvm/index.rst | 11 +++++++++++
 2 files changed, 29 insertions(+)
 create mode 100644 Documentation/virtual/index.rst
 create mode 100644 Documentation/virtual/kvm/index.rst

(limited to 'Documentation')

diff --git a/Documentation/virtual/index.rst b/Documentation/virtual/index.rst
new file mode 100644
index 000000000000..062ffb527043
--- /dev/null
+++ b/Documentation/virtual/index.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+Linux Virtualization Support
+============================
+
+.. toctree::
+   :maxdepth: 2
+
+   kvm/index
+   paravirt_ops
+
+.. only:: html and subproject
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/virtual/kvm/index.rst b/Documentation/virtual/kvm/index.rst
new file mode 100644
index 000000000000..0b206a06f5be
--- /dev/null
+++ b/Documentation/virtual/kvm/index.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===
+KVM
+===
+
+.. toctree::
+   :maxdepth: 2
+
+   amd-memory-encryption
+   cpuid
-- 
cgit 


From 0f327f2aaad6a87356cbccfa390d4d3b64d0d3b6 Mon Sep 17 00:00:00 2001
From: Atish Patra <atish.patra@wdc.com>
Date: Thu, 6 Jun 2019 16:08:00 -0700
Subject: RISC-V: Add an Image header that boot loader can parse.

Currently, the last stage boot loaders such as U-Boot can accept only
uImage which is an unnecessary additional step in automating boot
process.

Add an image header that boot loader understands and boot Linux from
flat Image directly.

This header is based on ARM64 boot image header and provides an
opportunity to combine both ARM64 & RISC-V image headers in future.

Also make sure that PE/COFF header can co-exist in the same image so
that EFI stub can be supported for RISC-V in future. EFI specification
needs PE/COFF image header in the beginning of the kernel image in order
to load it as an EFI application. In order to support EFI stub, code0
should be replaced with "MZ" magic string and res4(at offset 0x3c)
should point to the rest of the PE/COFF header (which will be added
during EFI support).

Tested on both QEMU and HiFive Unleashed using OpenSBI + U-Boot + Linux.

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Karsten Merker <merker@debian.org>
Tested-by: Karsten Merker <merker@debian.org> (QEMU+OpenSBI+U-Boot)
Tested-by: Kevin Hilman <khilman@baylibre.com> (OpenSBI + U-Boot + Linux)
[paul.walmsley@sifive.com: fixed whitespace in boot-image-header.txt;
 converted structure comment to kernel-doc format and added some detail]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 Documentation/riscv/boot-image-header.txt | 50 +++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 Documentation/riscv/boot-image-header.txt

(limited to 'Documentation')

diff --git a/Documentation/riscv/boot-image-header.txt b/Documentation/riscv/boot-image-header.txt
new file mode 100644
index 000000000000..1b73fea23b39
--- /dev/null
+++ b/Documentation/riscv/boot-image-header.txt
@@ -0,0 +1,50 @@
+				Boot image header in RISC-V Linux
+			=============================================
+
+Author: Atish Patra <atish.patra@wdc.com>
+Date  : 20 May 2019
+
+This document only describes the boot image header details for RISC-V Linux.
+The complete booting guide will be available at Documentation/riscv/booting.txt.
+
+The following 64-byte header is present in decompressed Linux kernel image.
+
+	u32 code0;		  /* Executable code */
+	u32 code1; 		  /* Executable code */
+	u64 text_offset;	  /* Image load offset, little endian */
+	u64 image_size;		  /* Effective Image size, little endian */
+	u64 flags;		  /* kernel flags, little endian */
+	u32 version;		  /* Version of this header */
+	u32 res1  = 0;		  /* Reserved */
+	u64 res2  = 0;    	  /* Reserved */
+	u64 magic = 0x5643534952; /* Magic number, little endian, "RISCV" */
+	u32 res3;		  /* Reserved for additional RISC-V specific header */
+	u32 res4;		  /* Reserved for PE COFF offset */
+
+This header format is compliant with PE/COFF header and largely inspired from
+ARM64 header. Thus, both ARM64 & RISC-V header can be combined into one common
+header in future.
+
+Notes:
+- This header can also be reused to support EFI stub for RISC-V in future. EFI
+  specification needs PE/COFF image header in the beginning of the kernel image
+  in order to load it as an EFI application. In order to support EFI stub,
+  code0 should be replaced with "MZ" magic string and res5(at offset 0x3c) should
+  point to the rest of the PE/COFF header.
+
+- version field indicate header version number.
+	Bits 0:15  - Minor version
+	Bits 16:31 - Major version
+
+  This preserves compatibility across newer and older version of the header.
+  The current version is defined as 0.1.
+
+- res3 is reserved for offset to any other additional fields. This makes the
+  header extendible in future. One example would be to accommodate ISA
+  extension for RISC-V in future. For current version, it is set to be zero.
+
+- In current header, the flag field has only one field.
+	Bit 0: Kernel endianness. 1 if BE, 0 if LE.
+
+- Image size is mandatory for boot loader to load kernel image. Booting will
+  fail otherwise.
-- 
cgit 


From 2e6023850e177dbaca21498ada04c5a5ac93f812 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Wed, 19 Jun 2019 17:05:54 -0400
Subject: dm snapshot: add optional discard support features

discard_zeroes_cow - a discard issued to the snapshot device that maps
to entire chunks to will zero the corresponding exception(s) in the
snapshot's exception store.

discard_passdown_origin - a discard to the snapshot device is passed down
to the snapshot-origin's underlying device.  This doesn't cause copy-out
to the snapshot exception store because the snapshot-origin target is
bypassed.

The discard_passdown_origin feature depends on the discard_zeroes_cow
feature being enabled.

When these 2 features are enabled they allow a temporarily read-only
device that has completely exhausted its free space to recover space.
To do so dm-snapshot provides temporary buffer to accommodate writes
that the temporarily read-only device cannot handle yet.  Once the upper
layer frees space (e.g. fstrim to XFS) the discards issued to the
dm-snapshot target will be issued to underlying read-only device whose
free space was exhausted.  In addition those discards will also cause
zeroes to be written to the snapshot exception store if corresponding
exceptions exist.  If the underlying origin device provides
deduplication for zero blocks then if/when the snapshot is merged backed
to the origin those blocks will become unused.  Once the origin has
gained adequate space, merging the snapshot back to the thinly
provisioned device will permit continued use of that device without the
temporary space provided by the snapshot.

Requested-by: John Dorminy <jdorminy@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 Documentation/device-mapper/snapshot.txt | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/device-mapper/snapshot.txt b/Documentation/device-mapper/snapshot.txt
index b8bbb516f989..1810833f6dc6 100644
--- a/Documentation/device-mapper/snapshot.txt
+++ b/Documentation/device-mapper/snapshot.txt
@@ -31,6 +31,7 @@ its visible content unchanged, at least until the <COW device> fills up.
 
 
 *) snapshot <origin> <COW device> <persistent?> <chunksize>
+   [<# feature args> [<arg>]*]
 
 A snapshot of the <origin> block device is created. Changed chunks of
 <chunksize> sectors will be stored on the <COW device>.  Writes will
@@ -53,8 +54,23 @@ When loading or unloading the snapshot target, the corresponding
 snapshot-origin or snapshot-merge target must be suspended. A failure to
 suspend the origin target could result in data corruption.
 
+Optional features:
+
+   discard_zeroes_cow - a discard issued to the snapshot device that
+   maps to entire chunks to will zero the corresponding exception(s) in
+   the snapshot's exception store.
+
+   discard_passdown_origin - a discard to the snapshot device is passed
+   down to the snapshot-origin's underlying device.  This doesn't cause
+   copy-out to the snapshot exception store because the snapshot-origin
+   target is bypassed.
+
+   The discard_passdown_origin feature depends on the discard_zeroes_cow
+   feature being enabled.
+
 
 * snapshot-merge <origin> <COW device> <persistent> <chunksize>
+  [<# feature args> [<arg>]*]
 
 takes the same table arguments as the snapshot target except it only
 works with persistent snapshots.  This target assumes the role of the
-- 
cgit 


From b7c3613e685076bad6e5540501d84cc31bb30377 Mon Sep 17 00:00:00 2001
From: André Almeida <andrealmeid@collabora.com>
Date: Thu, 11 Jul 2019 20:53:46 -0700
Subject: docs: kmemleak: add more documentation details
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wikipedia now has a main article to "tracing garbage collector" topic.
Change the URL and use the reStructuredText syntax for hyperlinks and add
more details about the use of the tool.  Add a section about how to use
the kmemleak-test module to test the memory leak scanning.

Link: http://lkml.kernel.org/r/20190612155231.19448-2-andrealmeid@collabora.com
Signed-off-by: André Almeida <andrealmeid@collabora.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/dev-tools/kmemleak.rst | 48 +++++++++++++++++++++++++++++++++---
 1 file changed, 44 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/dev-tools/kmemleak.rst b/Documentation/dev-tools/kmemleak.rst
index e6f51260ff32..3621cd5e1eef 100644
--- a/Documentation/dev-tools/kmemleak.rst
+++ b/Documentation/dev-tools/kmemleak.rst
@@ -2,8 +2,8 @@ Kernel Memory Leak Detector
 ===========================
 
 Kmemleak provides a way of detecting possible kernel memory leaks in a
-way similar to a tracing garbage collector
-(https://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors),
+way similar to a `tracing garbage collector
+<https://en.wikipedia.org/wiki/Tracing_garbage_collection>`_,
 with the difference that the orphan objects are not freed but only
 reported via /sys/kernel/debug/kmemleak. A similar method is used by the
 Valgrind tool (``memcheck --leak-check``) to detect the memory leaks in
@@ -15,10 +15,13 @@ Usage
 
 CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel
 thread scans the memory every 10 minutes (by default) and prints the
-number of new unreferenced objects found. To display the details of all
-the possible memory leaks::
+number of new unreferenced objects found. If the ``debugfs`` isn't already
+mounted, mount with::
 
   # mount -t debugfs nodev /sys/kernel/debug/
+
+To display the details of all the possible scanned memory leaks::
+
   # cat /sys/kernel/debug/kmemleak
 
 To trigger an intermediate memory scan::
@@ -72,6 +75,9 @@ If CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF are enabled, the kmemleak is
 disabled by default. Passing ``kmemleak=on`` on the kernel command
 line enables the function. 
 
+If you are getting errors like "Error while writing to stdout" or "write_loop:
+Invalid argument", make sure kmemleak is properly enabled.
+
 Basic Algorithm
 ---------------
 
@@ -218,3 +224,37 @@ the pointer is calculated by other methods than the usual container_of
 macro or the pointer is stored in a location not scanned by kmemleak.
 
 Page allocations and ioremap are not tracked.
+
+Testing with kmemleak-test
+--------------------------
+
+To check if you have all set up to use kmemleak, you can use the kmemleak-test
+module, a module that deliberately leaks memory. Set CONFIG_DEBUG_KMEMLEAK_TEST
+as module (it can't be used as bult-in) and boot the kernel with kmemleak
+enabled. Load the module and perform a scan with::
+
+        # modprobe kmemleak-test
+        # echo scan > /sys/kernel/debug/kmemleak
+
+Note that the you may not get results instantly or on the first scanning. When
+kmemleak gets results, it'll log ``kmemleak: <count of leaks> new suspected
+memory leaks``. Then read the file to see then::
+
+        # cat /sys/kernel/debug/kmemleak
+        unreferenced object 0xffff89862ca702e8 (size 32):
+          comm "modprobe", pid 2088, jiffies 4294680594 (age 375.486s)
+          hex dump (first 32 bytes):
+            6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
+            6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5  kkkkkkkkkkkkkkk.
+          backtrace:
+            [<00000000e0a73ec7>] 0xffffffffc01d2036
+            [<000000000c5d2a46>] do_one_initcall+0x41/0x1df
+            [<0000000046db7e0a>] do_init_module+0x55/0x200
+            [<00000000542b9814>] load_module+0x203c/0x2480
+            [<00000000c2850256>] __do_sys_finit_module+0xba/0xe0
+            [<000000006564e7ef>] do_syscall_64+0x43/0x110
+            [<000000007c873fa6>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+        ...
+
+Removing the module with ``rmmod kmemleak_test`` should also trigger some
+kmemleak results.
-- 
cgit 


From 751ad98d5f881df91ba47e013b82422912381e8e Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Thu, 11 Jul 2019 20:54:00 -0700
Subject: asm-generic, x86: add bitops instrumentation for KASAN

This adds a new header to asm-generic to allow optionally instrumenting
architecture-specific asm implementations of bitops.

This change includes the required change for x86 as reference and
changes the kernel API doc to point to bitops-instrumented.h instead.
Rationale: the functions in x86's bitops.h are no longer the kernel API
functions, but instead the arch_ prefixed functions, which are then
instrumented via bitops-instrumented.h.

Other architectures can similarly add support for asm implementations of
bitops.

The documentation text was derived from x86 and existing bitops
asm-generic versions: 1) references to x86 have been removed; 2) as a
result, some of the text had to be reworded for clarity and consistency.

Tested using lib/test_kasan with bitops tests (pre-requisite patch).
Bugzilla ref: https://bugzilla.kernel.org/show_bug.cgi?id=198439

Link: http://lkml.kernel.org/r/20190613125950.197667-4-elver@google.com
Signed-off-by: Marco Elver <elver@google.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/core-api/kernel-api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 824f24ccf401..08af5caf036d 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -54,7 +54,7 @@ The Linux kernel provides more basic utility functions.
 Bit Operations
 --------------
 
-.. kernel-doc:: arch/x86/include/asm/bitops.h
+.. kernel-doc:: include/asm-generic/bitops-instrumented.h
    :internal:
 
 Bitmap Operations
-- 
cgit 


From 3972f6bb1c6ae1d32dcf2e4ff635d24b77f26dcb Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 11 Jul 2019 20:55:13 -0700
Subject: mm, debug_pagealloc: use a page type instead of page_ext flag

When debug_pagealloc is enabled, we currently allocate the page_ext
array to mark guard pages with the PAGE_EXT_DEBUG_GUARD flag.  Now that
we have the page_type field in struct page, we can use that instead, as
guard pages are neither PageSlab nor mapped to userspace.  This reduces
memory overhead when debug_pagealloc is enabled and there are no other
features requiring the page_ext array.

Link: http://lkml.kernel.org/r/20190603143451.27353-4-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1c433daef6b..aa4e7e7b87c2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -805,12 +805,10 @@
 			tracking down these problems.
 
 	debug_pagealloc=
-			[KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
-			parameter enables the feature at boot time. In
-			default, it is disabled. We can avoid allocating huge
-			chunk of memory for debug pagealloc if we don't enable
-			it at boot time and the system will work mostly same
-			with the kernel built without CONFIG_DEBUG_PAGEALLOC.
+			[KNL] When CONFIG_DEBUG_PAGEALLOC is set, this parameter
+			enables the feature at boot time. By default, it is
+			disabled and the system will work mostly the same as a
+			kernel built without CONFIG_DEBUG_PAGEALLOC.
 			on: enable the feature
 
 	debugpat	[X86] Enable PAT debugging
-- 
cgit 


From 1e577f970f66a53d429cbee37b36177c9712f488 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Thu, 11 Jul 2019 20:55:55 -0700
Subject: mm, memcg: introduce memory.events.local

The memory controller in cgroup v2 exposes memory.events file for each
memcg which shows the number of times events like low, high, max, oom
and oom_kill have happened for the whole tree rooted at that memcg.
Users can also poll or register notification to monitor the changes in
that file.  Any event at any level of the tree rooted at memcg will
notify all the listeners along the path till root_mem_cgroup.  There are
existing users which depend on this behavior.

However there are users which are only interested in the events
happening at a specific level of the memcg tree and not in the events in
the underlying tree rooted at that memcg.  One such use-case is a
centralized resource monitor which can dynamically adjust the limits of
the jobs running on a system.  The jobs can create their sub-hierarchy
for their own sub-tasks.  The centralized monitor is only interested in
the events at the top level memcgs of the jobs as it can then act and
adjust the limits of the jobs.  Using the current memory.events for such
centralized monitor is very inconvenient.  The monitor will keep
receiving events which it is not interested and to find if the received
event is interesting, it has to read memory.event files of the next
level and compare it with the top level one.  So, let's introduce
memory.events.local to the memcg which shows and notify for the events
at the memcg level.

Now, does memory.stat and memory.pressure need their local versions.  IMHO
no due to the no internal process contraint of the cgroup v2.  The
memory.stat file of the top level memcg of a job shows the stats and
vmevents of the whole tree.  The local stats or vmevents of the top level
memcg will only change if there is a process running in that memcg but v2
does not allow that.  Similarly for memory.pressure there will not be any
process in the internal nodes and thus no chance of local pressure.

Link: http://lkml.kernel.org/r/20190527174643.209172-1-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Roman Gushchin <guro@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Chris Down <chris@chrisdown.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/cgroup-v2.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index a5c845338d6d..a9548de56ac9 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1146,6 +1146,11 @@ PAGE_SIZE multiple when read back.
 	otherwise, a value change in this file generates a file
 	modified event.
 
+	Note that all fields in this file are hierarchical and the
+	file modified event can be generated due to an event down the
+	hierarchy. For for the local events at the cgroup level see
+	memory.events.local.
+
 	  low
 		The number of times the cgroup is reclaimed due to
 		high memory pressure even though its usage is under
@@ -1185,6 +1190,11 @@ PAGE_SIZE multiple when read back.
 		The number of processes belonging to this cgroup
 		killed by any kind of OOM killer.
 
+  memory.events.local
+	Similar to memory.events but the fields in the file are local
+	to the cgroup i.e. not hierarchical. The file modified event
+	generated on this file reflects only the local events.
+
   memory.stat
 	A read-only flat-keyed file which exists on non-root cgroups.
 
-- 
cgit 


From 6471384af2a6530696fc0203bafe4de41a23c9ef Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Thu, 11 Jul 2019 20:59:19 -0700
Subject: mm: security: introduce init_on_alloc=1 and init_on_free=1 boot
 options

Patch series "add init_on_alloc/init_on_free boot options", v10.

Provide init_on_alloc and init_on_free boot options.

These are aimed at preventing possible information leaks and making the
control-flow bugs that depend on uninitialized values more deterministic.

Enabling either of the options guarantees that the memory returned by the
page allocator and SL[AU]B is initialized with zeroes.  SLOB allocator
isn't supported at the moment, as its emulation of kmem caches complicates
handling of SLAB_TYPESAFE_BY_RCU caches correctly.

Enabling init_on_free also guarantees that pages and heap objects are
initialized right after they're freed, so it won't be possible to access
stale data by using a dangling pointer.

As suggested by Michal Hocko, right now we don't let the heap users to
disable initialization for certain allocations.  There's not enough
evidence that doing so can speed up real-life cases, and introducing ways
to opt-out may result in things going out of control.

This patch (of 2):

The new options are needed to prevent possible information leaks and make
control-flow bugs that depend on uninitialized values more deterministic.

This is expected to be on-by-default on Android and Chrome OS.  And it
gives the opportunity for anyone else to use it under distros too via the
boot args.  (The init_on_free feature is regularly requested by folks
where memory forensics is included in their threat models.)

init_on_alloc=1 makes the kernel initialize newly allocated pages and heap
objects with zeroes.  Initialization is done at allocation time at the
places where checks for __GFP_ZERO are performed.

init_on_free=1 makes the kernel initialize freed pages and heap objects
with zeroes upon their deletion.  This helps to ensure sensitive data
doesn't leak via use-after-free accesses.

Both init_on_alloc=1 and init_on_free=1 guarantee that the allocator
returns zeroed memory.  The two exceptions are slab caches with
constructors and SLAB_TYPESAFE_BY_RCU flag.  Those are never
zero-initialized to preserve their semantics.

Both init_on_alloc and init_on_free default to zero, but those defaults
can be overridden with CONFIG_INIT_ON_ALLOC_DEFAULT_ON and
CONFIG_INIT_ON_FREE_DEFAULT_ON.

If either SLUB poisoning or page poisoning is enabled, those options take
precedence over init_on_alloc and init_on_free: initialization is only
applied to unpoisoned allocations.

Slowdown for the new features compared to init_on_free=0, init_on_alloc=0:

hackbench, init_on_free=1:  +7.62% sys time (st.err 0.74%)
hackbench, init_on_alloc=1: +7.75% sys time (st.err 2.14%)

Linux build with -j12, init_on_free=1:  +8.38% wall time (st.err 0.39%)
Linux build with -j12, init_on_free=1:  +24.42% sys time (st.err 0.52%)
Linux build with -j12, init_on_alloc=1: -0.13% wall time (st.err 0.42%)
Linux build with -j12, init_on_alloc=1: +0.57% sys time (st.err 0.40%)

The slowdown for init_on_free=0, init_on_alloc=0 compared to the baseline
is within the standard error.

The new features are also going to pave the way for hardware memory
tagging (e.g.  arm64's MTE), which will require both on_alloc and on_free
hooks to set the tags for heap objects.  With MTE, tagging will have the
same cost as memory initialization.

Although init_on_free is rather costly, there are paranoid use-cases where
in-memory data lifetime is desired to be minimized.  There are various
arguments for/against the realism of the associated threat models, but
given that we'll need the infrastructure for MTE anyway, and there are
people who want wipe-on-free behavior no matter what the performance cost,
it seems reasonable to include it in this series.

[glider@google.com: v8]
  Link: http://lkml.kernel.org/r/20190626121943.131390-2-glider@google.com
[glider@google.com: v9]
  Link: http://lkml.kernel.org/r/20190627130316.254309-2-glider@google.com
[glider@google.com: v10]
  Link: http://lkml.kernel.org/r/20190628093131.199499-2-glider@google.com
Link: http://lkml.kernel.org/r/20190617151050.92663-2-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Michal Hocko <mhocko@suse.cz>		[page and dmapool parts
Acked-by: James Morris <jamorris@linux.microsoft.com>]
Cc: Christoph Lameter <cl@linux.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Sandeep Patil <sspatil@android.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Jann Horn <jannh@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Marco Elver <elver@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index aa4e7e7b87c2..099c5a4be95b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1668,6 +1668,15 @@
 
 	initrd=		[BOOT] Specify the location of the initial ramdisk
 
+	init_on_alloc=	[MM] Fill newly allocated pages and heap objects with
+			zeroes.
+			Format: 0 | 1
+			Default set by CONFIG_INIT_ON_ALLOC_DEFAULT_ON.
+
+	init_on_free=	[MM] Fill freed pages and heap objects with zeroes.
+			Format: 0 | 1
+			Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON.
+
 	init_pkru=	[x86] Specify the default memory protection keys rights
 			register contents for all processes.  0x55555554 by
 			default (disallow access to all but pkey 0).  Can
-- 
cgit 


From ee2ad71b0756e995fa4f6d922463e9bccd71b198 Mon Sep 17 00:00:00 2001
From: Luigi Semenzato <semenzato@chromium.org>
Date: Thu, 11 Jul 2019 21:00:10 -0700
Subject: mm: smaps: split PSS into components

Report separate components (anon, file, and shmem) for PSS in
smaps_rollup.

This helps understand and tune the memory manager behavior in consumer
devices, particularly mobile devices.  Many of them (e.g.  chromebooks and
Android-based devices) use zram for anon memory, and perform disk reads
for discarded file pages.  The difference in latency is large (e.g.
reading a single page from SSD is 30 times slower than decompressing a
zram page on one popular device), thus it is useful to know how much of
the PSS is anon vs.  file.

All the information is already present in /proc/pid/smaps, but much more
expensive to obtain because of the large size of that procfs entry.

This patch also removes a small code duplication in smaps_account, which
would have gotten worse otherwise.

Also updated Documentation/filesystems/proc.txt (the smaps section was a
bit stale, and I added a smaps_rollup section) and
Documentation/ABI/testing/procfs-smaps_rollup.

[semenzato@chromium.org: v5]
  Link: http://lkml.kernel.org/r/20190626234333.44608-1-semenzato@chromium.org
Link: http://lkml.kernel.org/r/20190626180429.174569-1-semenzato@chromium.org
Signed-off-by: Luigi Semenzato <semenzato@chromium.org>
Acked-by: Yu Zhao <yuzhao@chromium.org>
Cc: Sonny Rao <sonnyrao@chromium.org>
Cc: Yu Zhao <yuzhao@chromium.org>
Cc: Brian Geffon <bgeffon@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/ABI/testing/procfs-smaps_rollup | 14 +++++++--
 Documentation/filesystems/proc.txt            | 41 ++++++++++++++++++++-------
 2 files changed, 43 insertions(+), 12 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/procfs-smaps_rollup b/Documentation/ABI/testing/procfs-smaps_rollup
index 0a54ed0d63c9..274df44d8b1b 100644
--- a/Documentation/ABI/testing/procfs-smaps_rollup
+++ b/Documentation/ABI/testing/procfs-smaps_rollup
@@ -3,18 +3,28 @@ Date:		August 2017
 Contact:	Daniel Colascione <dancol@google.com>
 Description:
 		This file provides pre-summed memory information for a
-		process.  The format is identical to /proc/pid/smaps,
+		process.  The format is almost identical to /proc/pid/smaps,
 		except instead of an entry for each VMA in a process,
 		smaps_rollup has a single entry (tagged "[rollup]")
 		for which each field is the sum of the corresponding
 		fields from all the maps in /proc/pid/smaps.
-		For more details, see the procfs man page.
+		Additionally, the fields Pss_Anon, Pss_File and Pss_Shmem
+		are not present in /proc/pid/smaps.  These fields represent
+		the sum of the Pss field of each type (anon, file, shmem).
+		For more details, see Documentation/filesystems/proc.txt
+		and the procfs man page.
 
 		Typical output looks like this:
 
 		00100000-ff709000 ---p 00000000 00:00 0		 [rollup]
+		Size:               1192 kB
+		KernelPageSize:        4 kB
+		MMUPageSize:           4 kB
 		Rss:		     884 kB
 		Pss:		     385 kB
+		Pss_Anon:	     301 kB
+		Pss_File:	      80 kB
+		Pss_Shmem:	       4 kB
 		Shared_Clean:	     696 kB
 		Shared_Dirty:	       0 kB
 		Private_Clean:	     120 kB
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index a226061fa109..d750b6926899 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -154,9 +154,11 @@ Table 1-1: Process specific entries in /proc
 		symbol the task is blocked in - or "0" if not blocked.
  pagemap	Page table
  stack		Report full stack trace, enable via CONFIG_STACKTRACE
- smaps		an extension based on maps, showing the memory consumption of
+ smaps		An extension based on maps, showing the memory consumption of
 		each mapping and flags associated with it
- numa_maps	an extension based on maps, showing the memory locality and
+ smaps_rollup	Accumulated smaps stats for all mappings of the process.  This
+		can be derived from smaps, but is faster and more convenient
+ numa_maps	An extension based on maps, showing the memory locality and
 		binding policy as well as mem usage (in pages) of each mapping.
 ..............................................................................
 
@@ -366,7 +368,7 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
   exit_code     the thread's exit_code in the form reported by the waitpid system call
 ..............................................................................
 
-The /proc/PID/maps file containing the currently mapped memory regions and
+The /proc/PID/maps file contains the currently mapped memory regions and
 their access permissions.
 
 The format is:
@@ -417,11 +419,14 @@ is not associated with a file:
  or if empty, the mapping is anonymous.
 
 The /proc/PID/smaps is an extension based on maps, showing the memory
-consumption for each of the process's mappings. For each of mappings there
-is a series of lines such as the following:
+consumption for each of the process's mappings. For each mapping (aka Virtual
+Memory Area, or VMA) there is a series of lines such as the following:
 
 08048000-080bc000 r-xp 00000000 03:02 13130      /bin/bash
+
 Size:               1084 kB
+KernelPageSize:        4 kB
+MMUPageSize:           4 kB
 Rss:                 892 kB
 Pss:                 374 kB
 Shared_Clean:        892 kB
@@ -443,11 +448,14 @@ Locked:                0 kB
 THPeligible:           0
 VmFlags: rd ex mr mw me dw
 
-the first of these lines shows the same information as is displayed for the
-mapping in /proc/PID/maps.  The remaining lines show the size of the mapping
-(size), the amount of the mapping that is currently resident in RAM (RSS), the
-process' proportional share of this mapping (PSS), the number of clean and
-dirty private pages in the mapping.
+The first of these lines shows the same information as is displayed for the
+mapping in /proc/PID/maps.  Following lines show the size of the mapping
+(size); the size of each page allocated when backing a VMA (KernelPageSize),
+which is usually the same as the size in the page table entries; the page size
+used by the MMU when backing a VMA (in most cases, the same as KernelPageSize);
+the amount of the mapping that is currently resident in RAM (RSS); the
+process' proportional share of this mapping (PSS); and the number of clean and
+dirty shared and private pages in the mapping.
 
 The "proportional set size" (PSS) of a process is the count of pages it has
 in memory, where each page is divided by the number of processes sharing it.
@@ -532,6 +540,19 @@ guarantees:
 2) If there is something at a given vaddr during the entirety of the
    life of the smaps/maps walk, there will be some output for it.
 
+The /proc/PID/smaps_rollup file includes the same fields as /proc/PID/smaps,
+but their values are the sums of the corresponding values for all mappings of
+the process.  Additionally, it contains these fields:
+
+Pss_Anon
+Pss_File
+Pss_Shmem
+
+They represent the proportional shares of anonymous, file, and shmem pages, as
+described for smaps above.  These fields are omitted in smaps since each
+mapping identifies the type (anon, file, or shmem) of all pages it contains.
+Thus all information in smaps_rollup can be derived from smaps, but at a
+significantly higher cost.
 
 The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
 bits on both physical and virtual pages associated with a process, and the
-- 
cgit 


From 0307d589c4d6f84349afb7a53717baa2392f6a38 Mon Sep 17 00:00:00 2001
From: Vincent Bernat <vincent@bernat.ch>
Date: Sat, 13 Jul 2019 16:35:27 +0200
Subject: bonding: add documentation for peer_notif_delay

Ability to tweak the interval between peer notifications has been
added in 07a4ddec3ce9 ("bonding: add an option to specify a delay
between peer notifications") but the documentation was not updated.

Signed-off-by: Vincent Bernat <vincent@bernat.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/bonding.txt | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index d3e5dd26db12..e3abfbd32f71 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -706,9 +706,9 @@ num_unsol_na
 	unsolicited IPv6 Neighbor Advertisements) to be issued after a
 	failover event.  As soon as the link is up on the new slave
 	(possibly immediately) a peer notification is sent on the
-	bonding device and each VLAN sub-device.  This is repeated at
-	each link monitor interval (arp_interval or miimon, whichever
-	is active) if the number is greater than 1.
+	bonding device and each VLAN sub-device. This is repeated at
+	the rate specified by peer_notif_delay if the number is
+	greater than 1.
 
 	The valid range is 0 - 255; the default value is 1.  These options
 	affect only the active-backup mode.  These options were added for
@@ -727,6 +727,16 @@ packets_per_slave
 	The valid range is 0 - 65535; the default value is 1. This option
 	has effect only in balance-rr mode.
 
+peer_notif_delay
+
+        Specify the delay, in milliseconds, between each peer
+        notification (gratuitous ARP and unsolicited IPv6 Neighbor
+        Advertisement) when they are issued after a failover event.
+        This delay should be a multiple of the link monitor interval
+        (arp_interval or miimon, whichever is active). The default
+        value is 0 which means to match the value of the link monitor
+        interval.
+
 primary
 
 	A string (eth0, eth2, etc) specifying which slave is the
-- 
cgit 


From 387b14684f94483cbbb72843db406ec9a8d0d6d2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 10 Apr 2019 08:32:41 -0300
Subject: docs: locking: convert docs to ReST and rename to *.rst

Convert the locking documents to ReST and add them to the
kernel development book where it belongs.

Most of the stuff here is just to make Sphinx to properly
parse the text file, as they're already in good shape,
not requiring massive changes in order to be parsed.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Federico Vaga <federico.vaga@vaga.pv.it>
---
 Documentation/kernel-hacking/locking.rst           |   2 +-
 Documentation/locking/index.rst                    |  24 +
 Documentation/locking/lockdep-design.rst           | 394 ++++++++++++++
 Documentation/locking/lockdep-design.txt           | 389 --------------
 Documentation/locking/lockstat.rst                 | 204 ++++++++
 Documentation/locking/lockstat.txt                 | 183 -------
 Documentation/locking/locktorture.rst              | 170 ++++++
 Documentation/locking/locktorture.txt              | 145 ------
 Documentation/locking/mutex-design.rst             | 152 ++++++
 Documentation/locking/mutex-design.txt             | 142 -----
 Documentation/locking/rt-mutex-design.rst          | 574 +++++++++++++++++++++
 Documentation/locking/rt-mutex-design.txt          | 559 --------------------
 Documentation/locking/rt-mutex.rst                 |  77 +++
 Documentation/locking/rt-mutex.txt                 |  73 ---
 Documentation/locking/spinlocks.rst                | 177 +++++++
 Documentation/locking/spinlocks.txt                | 167 ------
 Documentation/locking/ww-mutex-design.rst          | 393 ++++++++++++++
 Documentation/locking/ww-mutex-design.txt          | 383 --------------
 Documentation/pi-futex.txt                         |   2 +-
 .../translations/it_IT/kernel-hacking/locking.rst  |   2 +-
 20 files changed, 2168 insertions(+), 2044 deletions(-)
 create mode 100644 Documentation/locking/index.rst
 create mode 100644 Documentation/locking/lockdep-design.rst
 delete mode 100644 Documentation/locking/lockdep-design.txt
 create mode 100644 Documentation/locking/lockstat.rst
 delete mode 100644 Documentation/locking/lockstat.txt
 create mode 100644 Documentation/locking/locktorture.rst
 delete mode 100644 Documentation/locking/locktorture.txt
 create mode 100644 Documentation/locking/mutex-design.rst
 delete mode 100644 Documentation/locking/mutex-design.txt
 create mode 100644 Documentation/locking/rt-mutex-design.rst
 delete mode 100644 Documentation/locking/rt-mutex-design.txt
 create mode 100644 Documentation/locking/rt-mutex.rst
 delete mode 100644 Documentation/locking/rt-mutex.txt
 create mode 100644 Documentation/locking/spinlocks.rst
 delete mode 100644 Documentation/locking/spinlocks.txt
 create mode 100644 Documentation/locking/ww-mutex-design.rst
 delete mode 100644 Documentation/locking/ww-mutex-design.txt

(limited to 'Documentation')

diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst
index dc698ea456e0..a8518ac0d31d 100644
--- a/Documentation/kernel-hacking/locking.rst
+++ b/Documentation/kernel-hacking/locking.rst
@@ -1364,7 +1364,7 @@ Futex API reference
 Further reading
 ===============
 
--  ``Documentation/locking/spinlocks.txt``: Linus Torvalds' spinlocking
+-  ``Documentation/locking/spinlocks.rst``: Linus Torvalds' spinlocking
    tutorial in the kernel sources.
 
 -  Unix Systems for Modern Architectures: Symmetric Multiprocessing and
diff --git a/Documentation/locking/index.rst b/Documentation/locking/index.rst
new file mode 100644
index 000000000000..ef5da7fe9aac
--- /dev/null
+++ b/Documentation/locking/index.rst
@@ -0,0 +1,24 @@
+:orphan:
+
+=======
+locking
+=======
+
+.. toctree::
+    :maxdepth: 1
+
+    lockdep-design
+    lockstat
+    locktorture
+    mutex-design
+    rt-mutex-design
+    rt-mutex
+    spinlocks
+    ww-mutex-design
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/locking/lockdep-design.rst b/Documentation/locking/lockdep-design.rst
new file mode 100644
index 000000000000..23fcbc4d3fc0
--- /dev/null
+++ b/Documentation/locking/lockdep-design.rst
@@ -0,0 +1,394 @@
+Runtime locking correctness validator
+=====================================
+
+started by Ingo Molnar <mingo@redhat.com>
+
+additions by Arjan van de Ven <arjan@linux.intel.com>
+
+Lock-class
+----------
+
+The basic object the validator operates upon is a 'class' of locks.
+
+A class of locks is a group of locks that are logically the same with
+respect to locking rules, even if the locks may have multiple (possibly
+tens of thousands of) instantiations. For example a lock in the inode
+struct is one class, while each inode has its own instantiation of that
+lock class.
+
+The validator tracks the 'usage state' of lock-classes, and it tracks
+the dependencies between different lock-classes. Lock usage indicates
+how a lock is used with regard to its IRQ contexts, while lock
+dependency can be understood as lock order, where L1 -> L2 suggests that
+a task is attempting to acquire L2 while holding L1. From lockdep's
+perspective, the two locks (L1 and L2) are not necessarily related; that
+dependency just means the order ever happened. The validator maintains a
+continuing effort to prove lock usages and dependencies are correct or
+the validator will shoot a splat if incorrect.
+
+A lock-class's behavior is constructed by its instances collectively:
+when the first instance of a lock-class is used after bootup the class
+gets registered, then all (subsequent) instances will be mapped to the
+class and hence their usages and dependecies will contribute to those of
+the class. A lock-class does not go away when a lock instance does, but
+it can be removed if the memory space of the lock class (static or
+dynamic) is reclaimed, this happens for example when a module is
+unloaded or a workqueue is destroyed.
+
+State
+-----
+
+The validator tracks lock-class usage history and divides the usage into
+(4 usages * n STATEs + 1) categories:
+
+where the 4 usages can be:
+- 'ever held in STATE context'
+- 'ever held as readlock in STATE context'
+- 'ever held with STATE enabled'
+- 'ever held as readlock with STATE enabled'
+
+where the n STATEs are coded in kernel/locking/lockdep_states.h and as of
+now they include:
+- hardirq
+- softirq
+
+where the last 1 category is:
+- 'ever used'                                       [ == !unused        ]
+
+When locking rules are violated, these usage bits are presented in the
+locking error messages, inside curlies, with a total of 2 * n STATEs bits.
+A contrived example::
+
+   modprobe/2287 is trying to acquire lock:
+    (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
+
+   but task is already holding lock:
+    (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
+
+
+For a given lock, the bit positions from left to right indicate the usage
+of the lock and readlock (if exists), for each of the n STATEs listed
+above respectively, and the character displayed at each bit position
+indicates:
+
+   ===  ===================================================
+   '.'  acquired while irqs disabled and not in irq context
+   '-'  acquired in irq context
+   '+'  acquired with irqs enabled
+   '?'  acquired in irq context with irqs enabled.
+   ===  ===================================================
+
+The bits are illustrated with an example::
+
+    (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
+                         ||||
+                         ||| \-> softirq disabled and not in softirq context
+                         || \--> acquired in softirq context
+                         | \---> hardirq disabled and not in hardirq context
+                          \----> acquired in hardirq context
+
+
+For a given STATE, whether the lock is ever acquired in that STATE
+context and whether that STATE is enabled yields four possible cases as
+shown in the table below. The bit character is able to indicate which
+exact case is for the lock as of the reporting time.
+
+  +--------------+-------------+--------------+
+  |              | irq enabled | irq disabled |
+  +--------------+-------------+--------------+
+  | ever in irq  |      ?      |       -      |
+  +--------------+-------------+--------------+
+  | never in irq |      +      |       .      |
+  +--------------+-------------+--------------+
+
+The character '-' suggests irq is disabled because if otherwise the
+charactor '?' would have been shown instead. Similar deduction can be
+applied for '+' too.
+
+Unused locks (e.g., mutexes) cannot be part of the cause of an error.
+
+
+Single-lock state rules:
+------------------------
+
+A lock is irq-safe means it was ever used in an irq context, while a lock
+is irq-unsafe means it was ever acquired with irq enabled.
+
+A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
+following states must be exclusive: only one of them is allowed to be set
+for any lock-class based on its usage::
+
+ <hardirq-safe> or <hardirq-unsafe>
+ <softirq-safe> or <softirq-unsafe>
+
+This is because if a lock can be used in irq context (irq-safe) then it
+cannot be ever acquired with irq enabled (irq-unsafe). Otherwise, a
+deadlock may happen. For example, in the scenario that after this lock
+was acquired but before released, if the context is interrupted this
+lock will be attempted to acquire twice, which creates a deadlock,
+referred to as lock recursion deadlock.
+
+The validator detects and reports lock usage that violates these
+single-lock state rules.
+
+Multi-lock dependency rules:
+----------------------------
+
+The same lock-class must not be acquired twice, because this could lead
+to lock recursion deadlocks.
+
+Furthermore, two locks can not be taken in inverse order::
+
+ <L1> -> <L2>
+ <L2> -> <L1>
+
+because this could lead to a deadlock - referred to as lock inversion
+deadlock - as attempts to acquire the two locks form a circle which
+could lead to the two contexts waiting for each other permanently. The
+validator will find such dependency circle in arbitrary complexity,
+i.e., there can be any other locking sequence between the acquire-lock
+operations; the validator will still find whether these locks can be
+acquired in a circular fashion.
+
+Furthermore, the following usage based lock dependencies are not allowed
+between any two lock-classes::
+
+   <hardirq-safe>   ->  <hardirq-unsafe>
+   <softirq-safe>   ->  <softirq-unsafe>
+
+The first rule comes from the fact that a hardirq-safe lock could be
+taken by a hardirq context, interrupting a hardirq-unsafe lock - and
+thus could result in a lock inversion deadlock. Likewise, a softirq-safe
+lock could be taken by an softirq context, interrupting a softirq-unsafe
+lock.
+
+The above rules are enforced for any locking sequence that occurs in the
+kernel: when acquiring a new lock, the validator checks whether there is
+any rule violation between the new lock and any of the held locks.
+
+When a lock-class changes its state, the following aspects of the above
+dependency rules are enforced:
+
+- if a new hardirq-safe lock is discovered, we check whether it
+  took any hardirq-unsafe lock in the past.
+
+- if a new softirq-safe lock is discovered, we check whether it took
+  any softirq-unsafe lock in the past.
+
+- if a new hardirq-unsafe lock is discovered, we check whether any
+  hardirq-safe lock took it in the past.
+
+- if a new softirq-unsafe lock is discovered, we check whether any
+  softirq-safe lock took it in the past.
+
+(Again, we do these checks too on the basis that an interrupt context
+could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which
+could lead to a lock inversion deadlock - even if that lock scenario did
+not trigger in practice yet.)
+
+Exception: Nested data dependencies leading to nested locking
+-------------------------------------------------------------
+
+There are a few cases where the Linux kernel acquires more than one
+instance of the same lock-class. Such cases typically happen when there
+is some sort of hierarchy within objects of the same type. In these
+cases there is an inherent "natural" ordering between the two objects
+(defined by the properties of the hierarchy), and the kernel grabs the
+locks in this fixed order on each of the objects.
+
+An example of such an object hierarchy that results in "nested locking"
+is that of a "whole disk" block-dev object and a "partition" block-dev
+object; the partition is "part of" the whole device and as long as one
+always takes the whole disk lock as a higher lock than the partition
+lock, the lock ordering is fully correct. The validator does not
+automatically detect this natural ordering, as the locking rule behind
+the ordering is not static.
+
+In order to teach the validator about this correct usage model, new
+versions of the various locking primitives were added that allow you to
+specify a "nesting level". An example call, for the block device mutex,
+looks like this::
+
+  enum bdev_bd_mutex_lock_class
+  {
+       BD_MUTEX_NORMAL,
+       BD_MUTEX_WHOLE,
+       BD_MUTEX_PARTITION
+  };
+
+mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
+
+In this case the locking is done on a bdev object that is known to be a
+partition.
+
+The validator treats a lock that is taken in such a nested fashion as a
+separate (sub)class for the purposes of validation.
+
+Note: When changing code to use the _nested() primitives, be careful and
+check really thoroughly that the hierarchy is correctly mapped; otherwise
+you can get false positives or false negatives.
+
+Annotations
+-----------
+
+Two constructs can be used to annotate and check where and if certain locks
+must be held: lockdep_assert_held*(&lock) and lockdep_*pin_lock(&lock).
+
+As the name suggests, lockdep_assert_held* family of macros assert that a
+particular lock is held at a certain time (and generate a WARN() otherwise).
+This annotation is largely used all over the kernel, e.g. kernel/sched/
+core.c::
+
+  void update_rq_clock(struct rq *rq)
+  {
+	s64 delta;
+
+	lockdep_assert_held(&rq->lock);
+	[...]
+  }
+
+where holding rq->lock is required to safely update a rq's clock.
+
+The other family of macros is lockdep_*pin_lock(), which is admittedly only
+used for rq->lock ATM. Despite their limited adoption these annotations
+generate a WARN() if the lock of interest is "accidentally" unlocked. This turns
+out to be especially helpful to debug code with callbacks, where an upper
+layer assumes a lock remains taken, but a lower layer thinks it can maybe drop
+and reacquire the lock ("unwittingly" introducing races). lockdep_pin_lock()
+returns a 'struct pin_cookie' that is then used by lockdep_unpin_lock() to check
+that nobody tampered with the lock, e.g. kernel/sched/sched.h::
+
+  static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
+  {
+	rf->cookie = lockdep_pin_lock(&rq->lock);
+	[...]
+  }
+
+  static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
+  {
+	[...]
+	lockdep_unpin_lock(&rq->lock, rf->cookie);
+  }
+
+While comments about locking requirements might provide useful information,
+the runtime checks performed by annotations are invaluable when debugging
+locking problems and they carry the same level of details when inspecting
+code.  Always prefer annotations when in doubt!
+
+Proof of 100% correctness:
+--------------------------
+
+The validator achieves perfect, mathematical 'closure' (proof of locking
+correctness) in the sense that for every simple, standalone single-task
+locking sequence that occurred at least once during the lifetime of the
+kernel, the validator proves it with a 100% certainty that no
+combination and timing of these locking sequences can cause any class of
+lock related deadlock. [1]_
+
+I.e. complex multi-CPU and multi-task locking scenarios do not have to
+occur in practice to prove a deadlock: only the simple 'component'
+locking chains have to occur at least once (anytime, in any
+task/context) for the validator to be able to prove correctness. (For
+example, complex deadlocks that would normally need more than 3 CPUs and
+a very unlikely constellation of tasks, irq-contexts and timings to
+occur, can be detected on a plain, lightly loaded single-CPU system as
+well!)
+
+This radically decreases the complexity of locking related QA of the
+kernel: what has to be done during QA is to trigger as many "simple"
+single-task locking dependencies in the kernel as possible, at least
+once, to prove locking correctness - instead of having to trigger every
+possible combination of locking interaction between CPUs, combined with
+every possible hardirq and softirq nesting scenario (which is impossible
+to do in practice).
+
+.. [1]
+
+    assuming that the validator itself is 100% correct, and no other
+    part of the system corrupts the state of the validator in any way.
+    We also assume that all NMI/SMM paths [which could interrupt
+    even hardirq-disabled codepaths] are correct and do not interfere
+    with the validator. We also assume that the 64-bit 'chain hash'
+    value is unique for every lock-chain in the system. Also, lock
+    recursion must not be higher than 20.
+
+Performance:
+------------
+
+The above rules require **massive** amounts of runtime checking. If we did
+that for every lock taken and for every irqs-enable event, it would
+render the system practically unusably slow. The complexity of checking
+is O(N^2), so even with just a few hundred lock-classes we'd have to do
+tens of thousands of checks for every event.
+
+This problem is solved by checking any given 'locking scenario' (unique
+sequence of locks taken after each other) only once. A simple stack of
+held locks is maintained, and a lightweight 64-bit hash value is
+calculated, which hash is unique for every lock chain. The hash value,
+when the chain is validated for the first time, is then put into a hash
+table, which hash-table can be checked in a lockfree manner. If the
+locking chain occurs again later on, the hash table tells us that we
+don't have to validate the chain again.
+
+Troubleshooting:
+----------------
+
+The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes.
+Exceeding this number will trigger the following lockdep warning:
+
+	(DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+
+By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical
+desktop systems have less than 1,000 lock classes, so this warning
+normally results from lock-class leakage or failure to properly
+initialize locks.  These two problems are illustrated below:
+
+1.	Repeated module loading and unloading while running the validator
+	will result in lock-class leakage.  The issue here is that each
+	load of the module will create a new set of lock classes for
+	that module's locks, but module unloading does not remove old
+	classes (see below discussion of reuse of lock classes for why).
+	Therefore, if that module is loaded and unloaded repeatedly,
+	the number of lock classes will eventually reach the maximum.
+
+2.	Using structures such as arrays that have large numbers of
+	locks that are not explicitly initialized.  For example,
+	a hash table with 8192 buckets where each bucket has its own
+	spinlock_t will consume 8192 lock classes -unless- each spinlock
+	is explicitly initialized at runtime, for example, using the
+	run-time spin_lock_init() as opposed to compile-time initializers
+	such as __SPIN_LOCK_UNLOCKED().  Failure to properly initialize
+	the per-bucket spinlocks would guarantee lock-class overflow.
+	In contrast, a loop that called spin_lock_init() on each lock
+	would place all 8192 locks into a single lock class.
+
+	The moral of this story is that you should always explicitly
+	initialize your locks.
+
+One might argue that the validator should be modified to allow
+lock classes to be reused.  However, if you are tempted to make this
+argument, first review the code and think through the changes that would
+be required, keeping in mind that the lock classes to be removed are
+likely to be linked into the lock-dependency graph.  This turns out to
+be harder to do than to say.
+
+Of course, if you do run out of lock classes, the next thing to do is
+to find the offending lock classes.  First, the following command gives
+you the number of lock classes currently in use along with the maximum::
+
+	grep "lock-classes" /proc/lockdep_stats
+
+This command produces the following output on a modest system::
+
+	lock-classes:                          748 [max: 8191]
+
+If the number allocated (748 above) increases continually over time,
+then there is likely a leak.  The following command can be used to
+identify the leaking lock classes::
+
+	grep "BD" /proc/lockdep
+
+Run the command and save the output, then compare against the output from
+a later run of this command to identify the leakers.  This same output
+can also help you find situations where runtime lock initialization has
+been omitted.
diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt
deleted file mode 100644
index f189d130e543..000000000000
--- a/Documentation/locking/lockdep-design.txt
+++ /dev/null
@@ -1,389 +0,0 @@
-Runtime locking correctness validator
-=====================================
-
-started by Ingo Molnar <mingo@redhat.com>
-additions by Arjan van de Ven <arjan@linux.intel.com>
-
-Lock-class
-----------
-
-The basic object the validator operates upon is a 'class' of locks.
-
-A class of locks is a group of locks that are logically the same with
-respect to locking rules, even if the locks may have multiple (possibly
-tens of thousands of) instantiations. For example a lock in the inode
-struct is one class, while each inode has its own instantiation of that
-lock class.
-
-The validator tracks the 'usage state' of lock-classes, and it tracks
-the dependencies between different lock-classes. Lock usage indicates
-how a lock is used with regard to its IRQ contexts, while lock
-dependency can be understood as lock order, where L1 -> L2 suggests that
-a task is attempting to acquire L2 while holding L1. From lockdep's
-perspective, the two locks (L1 and L2) are not necessarily related; that
-dependency just means the order ever happened. The validator maintains a
-continuing effort to prove lock usages and dependencies are correct or
-the validator will shoot a splat if incorrect.
-
-A lock-class's behavior is constructed by its instances collectively:
-when the first instance of a lock-class is used after bootup the class
-gets registered, then all (subsequent) instances will be mapped to the
-class and hence their usages and dependecies will contribute to those of
-the class. A lock-class does not go away when a lock instance does, but
-it can be removed if the memory space of the lock class (static or
-dynamic) is reclaimed, this happens for example when a module is
-unloaded or a workqueue is destroyed.
-
-State
------
-
-The validator tracks lock-class usage history and divides the usage into
-(4 usages * n STATEs + 1) categories:
-
-where the 4 usages can be:
-- 'ever held in STATE context'
-- 'ever held as readlock in STATE context'
-- 'ever held with STATE enabled'
-- 'ever held as readlock with STATE enabled'
-
-where the n STATEs are coded in kernel/locking/lockdep_states.h and as of
-now they include:
-- hardirq
-- softirq
-
-where the last 1 category is:
-- 'ever used'                                       [ == !unused        ]
-
-When locking rules are violated, these usage bits are presented in the
-locking error messages, inside curlies, with a total of 2 * n STATEs bits.
-A contrived example:
-
-   modprobe/2287 is trying to acquire lock:
-    (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
-
-   but task is already holding lock:
-    (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
-
-
-For a given lock, the bit positions from left to right indicate the usage
-of the lock and readlock (if exists), for each of the n STATEs listed
-above respectively, and the character displayed at each bit position
-indicates:
-
-   '.'  acquired while irqs disabled and not in irq context
-   '-'  acquired in irq context
-   '+'  acquired with irqs enabled
-   '?'  acquired in irq context with irqs enabled.
-
-The bits are illustrated with an example:
-
-    (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
-                         ||||
-                         ||| \-> softirq disabled and not in softirq context
-                         || \--> acquired in softirq context
-                         | \---> hardirq disabled and not in hardirq context
-                          \----> acquired in hardirq context
-
-
-For a given STATE, whether the lock is ever acquired in that STATE
-context and whether that STATE is enabled yields four possible cases as
-shown in the table below. The bit character is able to indicate which
-exact case is for the lock as of the reporting time.
-
-   -------------------------------------------
-  |              | irq enabled | irq disabled |
-  |-------------------------------------------|
-  | ever in irq  |      ?      |       -      |
-  |-------------------------------------------|
-  | never in irq |      +      |       .      |
-   -------------------------------------------
-
-The character '-' suggests irq is disabled because if otherwise the
-charactor '?' would have been shown instead. Similar deduction can be
-applied for '+' too.
-
-Unused locks (e.g., mutexes) cannot be part of the cause of an error.
-
-
-Single-lock state rules:
-------------------------
-
-A lock is irq-safe means it was ever used in an irq context, while a lock
-is irq-unsafe means it was ever acquired with irq enabled.
-
-A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
-following states must be exclusive: only one of them is allowed to be set
-for any lock-class based on its usage:
-
- <hardirq-safe> or <hardirq-unsafe>
- <softirq-safe> or <softirq-unsafe>
-
-This is because if a lock can be used in irq context (irq-safe) then it
-cannot be ever acquired with irq enabled (irq-unsafe). Otherwise, a
-deadlock may happen. For example, in the scenario that after this lock
-was acquired but before released, if the context is interrupted this
-lock will be attempted to acquire twice, which creates a deadlock,
-referred to as lock recursion deadlock.
-
-The validator detects and reports lock usage that violates these
-single-lock state rules.
-
-Multi-lock dependency rules:
-----------------------------
-
-The same lock-class must not be acquired twice, because this could lead
-to lock recursion deadlocks.
-
-Furthermore, two locks can not be taken in inverse order:
-
- <L1> -> <L2>
- <L2> -> <L1>
-
-because this could lead to a deadlock - referred to as lock inversion
-deadlock - as attempts to acquire the two locks form a circle which
-could lead to the two contexts waiting for each other permanently. The
-validator will find such dependency circle in arbitrary complexity,
-i.e., there can be any other locking sequence between the acquire-lock
-operations; the validator will still find whether these locks can be
-acquired in a circular fashion.
-
-Furthermore, the following usage based lock dependencies are not allowed
-between any two lock-classes:
-
-   <hardirq-safe>   ->  <hardirq-unsafe>
-   <softirq-safe>   ->  <softirq-unsafe>
-
-The first rule comes from the fact that a hardirq-safe lock could be
-taken by a hardirq context, interrupting a hardirq-unsafe lock - and
-thus could result in a lock inversion deadlock. Likewise, a softirq-safe
-lock could be taken by an softirq context, interrupting a softirq-unsafe
-lock.
-
-The above rules are enforced for any locking sequence that occurs in the
-kernel: when acquiring a new lock, the validator checks whether there is
-any rule violation between the new lock and any of the held locks.
-
-When a lock-class changes its state, the following aspects of the above
-dependency rules are enforced:
-
-- if a new hardirq-safe lock is discovered, we check whether it
-  took any hardirq-unsafe lock in the past.
-
-- if a new softirq-safe lock is discovered, we check whether it took
-  any softirq-unsafe lock in the past.
-
-- if a new hardirq-unsafe lock is discovered, we check whether any
-  hardirq-safe lock took it in the past.
-
-- if a new softirq-unsafe lock is discovered, we check whether any
-  softirq-safe lock took it in the past.
-
-(Again, we do these checks too on the basis that an interrupt context
-could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which
-could lead to a lock inversion deadlock - even if that lock scenario did
-not trigger in practice yet.)
-
-Exception: Nested data dependencies leading to nested locking
--------------------------------------------------------------
-
-There are a few cases where the Linux kernel acquires more than one
-instance of the same lock-class. Such cases typically happen when there
-is some sort of hierarchy within objects of the same type. In these
-cases there is an inherent "natural" ordering between the two objects
-(defined by the properties of the hierarchy), and the kernel grabs the
-locks in this fixed order on each of the objects.
-
-An example of such an object hierarchy that results in "nested locking"
-is that of a "whole disk" block-dev object and a "partition" block-dev
-object; the partition is "part of" the whole device and as long as one
-always takes the whole disk lock as a higher lock than the partition
-lock, the lock ordering is fully correct. The validator does not
-automatically detect this natural ordering, as the locking rule behind
-the ordering is not static.
-
-In order to teach the validator about this correct usage model, new
-versions of the various locking primitives were added that allow you to
-specify a "nesting level". An example call, for the block device mutex,
-looks like this:
-
-enum bdev_bd_mutex_lock_class
-{
-       BD_MUTEX_NORMAL,
-       BD_MUTEX_WHOLE,
-       BD_MUTEX_PARTITION
-};
-
- mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
-
-In this case the locking is done on a bdev object that is known to be a
-partition.
-
-The validator treats a lock that is taken in such a nested fashion as a
-separate (sub)class for the purposes of validation.
-
-Note: When changing code to use the _nested() primitives, be careful and
-check really thoroughly that the hierarchy is correctly mapped; otherwise
-you can get false positives or false negatives.
-
-Annotations
------------
-
-Two constructs can be used to annotate and check where and if certain locks
-must be held: lockdep_assert_held*(&lock) and lockdep_*pin_lock(&lock).
-
-As the name suggests, lockdep_assert_held* family of macros assert that a
-particular lock is held at a certain time (and generate a WARN() otherwise).
-This annotation is largely used all over the kernel, e.g. kernel/sched/
-core.c
-
-  void update_rq_clock(struct rq *rq)
-  {
-	s64 delta;
-
-	lockdep_assert_held(&rq->lock);
-	[...]
-  }
-
-where holding rq->lock is required to safely update a rq's clock.
-
-The other family of macros is lockdep_*pin_lock(), which is admittedly only
-used for rq->lock ATM. Despite their limited adoption these annotations
-generate a WARN() if the lock of interest is "accidentally" unlocked. This turns
-out to be especially helpful to debug code with callbacks, where an upper
-layer assumes a lock remains taken, but a lower layer thinks it can maybe drop
-and reacquire the lock ("unwittingly" introducing races). lockdep_pin_lock()
-returns a 'struct pin_cookie' that is then used by lockdep_unpin_lock() to check
-that nobody tampered with the lock, e.g. kernel/sched/sched.h
-
-  static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
-  {
-	rf->cookie = lockdep_pin_lock(&rq->lock);
-	[...]
-  }
-
-  static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
-  {
-	[...]
-	lockdep_unpin_lock(&rq->lock, rf->cookie);
-  }
-
-While comments about locking requirements might provide useful information,
-the runtime checks performed by annotations are invaluable when debugging
-locking problems and they carry the same level of details when inspecting
-code.  Always prefer annotations when in doubt!
-
-Proof of 100% correctness:
---------------------------
-
-The validator achieves perfect, mathematical 'closure' (proof of locking
-correctness) in the sense that for every simple, standalone single-task
-locking sequence that occurred at least once during the lifetime of the
-kernel, the validator proves it with a 100% certainty that no
-combination and timing of these locking sequences can cause any class of
-lock related deadlock. [*]
-
-I.e. complex multi-CPU and multi-task locking scenarios do not have to
-occur in practice to prove a deadlock: only the simple 'component'
-locking chains have to occur at least once (anytime, in any
-task/context) for the validator to be able to prove correctness. (For
-example, complex deadlocks that would normally need more than 3 CPUs and
-a very unlikely constellation of tasks, irq-contexts and timings to
-occur, can be detected on a plain, lightly loaded single-CPU system as
-well!)
-
-This radically decreases the complexity of locking related QA of the
-kernel: what has to be done during QA is to trigger as many "simple"
-single-task locking dependencies in the kernel as possible, at least
-once, to prove locking correctness - instead of having to trigger every
-possible combination of locking interaction between CPUs, combined with
-every possible hardirq and softirq nesting scenario (which is impossible
-to do in practice).
-
-[*] assuming that the validator itself is 100% correct, and no other
-    part of the system corrupts the state of the validator in any way.
-    We also assume that all NMI/SMM paths [which could interrupt
-    even hardirq-disabled codepaths] are correct and do not interfere
-    with the validator. We also assume that the 64-bit 'chain hash'
-    value is unique for every lock-chain in the system. Also, lock
-    recursion must not be higher than 20.
-
-Performance:
-------------
-
-The above rules require _massive_ amounts of runtime checking. If we did
-that for every lock taken and for every irqs-enable event, it would
-render the system practically unusably slow. The complexity of checking
-is O(N^2), so even with just a few hundred lock-classes we'd have to do
-tens of thousands of checks for every event.
-
-This problem is solved by checking any given 'locking scenario' (unique
-sequence of locks taken after each other) only once. A simple stack of
-held locks is maintained, and a lightweight 64-bit hash value is
-calculated, which hash is unique for every lock chain. The hash value,
-when the chain is validated for the first time, is then put into a hash
-table, which hash-table can be checked in a lockfree manner. If the
-locking chain occurs again later on, the hash table tells us that we
-don't have to validate the chain again.
-
-Troubleshooting:
-----------------
-
-The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes.
-Exceeding this number will trigger the following lockdep warning:
-
-	(DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
-
-By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical
-desktop systems have less than 1,000 lock classes, so this warning
-normally results from lock-class leakage or failure to properly
-initialize locks.  These two problems are illustrated below:
-
-1.	Repeated module loading and unloading while running the validator
-	will result in lock-class leakage.  The issue here is that each
-	load of the module will create a new set of lock classes for
-	that module's locks, but module unloading does not remove old
-	classes (see below discussion of reuse of lock classes for why).
-	Therefore, if that module is loaded and unloaded repeatedly,
-	the number of lock classes will eventually reach the maximum.
-
-2.	Using structures such as arrays that have large numbers of
-	locks that are not explicitly initialized.  For example,
-	a hash table with 8192 buckets where each bucket has its own
-	spinlock_t will consume 8192 lock classes -unless- each spinlock
-	is explicitly initialized at runtime, for example, using the
-	run-time spin_lock_init() as opposed to compile-time initializers
-	such as __SPIN_LOCK_UNLOCKED().  Failure to properly initialize
-	the per-bucket spinlocks would guarantee lock-class overflow.
-	In contrast, a loop that called spin_lock_init() on each lock
-	would place all 8192 locks into a single lock class.
-
-	The moral of this story is that you should always explicitly
-	initialize your locks.
-
-One might argue that the validator should be modified to allow
-lock classes to be reused.  However, if you are tempted to make this
-argument, first review the code and think through the changes that would
-be required, keeping in mind that the lock classes to be removed are
-likely to be linked into the lock-dependency graph.  This turns out to
-be harder to do than to say.
-
-Of course, if you do run out of lock classes, the next thing to do is
-to find the offending lock classes.  First, the following command gives
-you the number of lock classes currently in use along with the maximum:
-
-	grep "lock-classes" /proc/lockdep_stats
-
-This command produces the following output on a modest system:
-
-	 lock-classes:                          748 [max: 8191]
-
-If the number allocated (748 above) increases continually over time,
-then there is likely a leak.  The following command can be used to
-identify the leaking lock classes:
-
-	grep "BD" /proc/lockdep
-
-Run the command and save the output, then compare against the output from
-a later run of this command to identify the leakers.  This same output
-can also help you find situations where runtime lock initialization has
-been omitted.
diff --git a/Documentation/locking/lockstat.rst b/Documentation/locking/lockstat.rst
new file mode 100644
index 000000000000..536eab8dbd99
--- /dev/null
+++ b/Documentation/locking/lockstat.rst
@@ -0,0 +1,204 @@
+===============
+Lock Statistics
+===============
+
+What
+====
+
+As the name suggests, it provides statistics on locks.
+
+
+Why
+===
+
+Because things like lock contention can severely impact performance.
+
+How
+===
+
+Lockdep already has hooks in the lock functions and maps lock instances to
+lock classes. We build on that (see Documentation/locking/lockdep-design.rst).
+The graph below shows the relation between the lock functions and the various
+hooks therein::
+
+        __acquire
+            |
+           lock _____
+            |        \
+            |    __contended
+            |         |
+            |       <wait>
+            | _______/
+            |/
+            |
+       __acquired
+            |
+            .
+          <hold>
+            .
+            |
+       __release
+            |
+         unlock
+
+  lock, unlock	- the regular lock functions
+  __*		- the hooks
+  <> 		- states
+
+With these hooks we provide the following statistics:
+
+ con-bounces
+	- number of lock contention that involved x-cpu data
+ contentions
+	- number of lock acquisitions that had to wait
+ wait time
+     min
+	- shortest (non-0) time we ever had to wait for a lock
+     max
+	- longest time we ever had to wait for a lock
+     total
+	- total time we spend waiting on this lock
+     avg
+	- average time spent waiting on this lock
+ acq-bounces
+	- number of lock acquisitions that involved x-cpu data
+ acquisitions
+	- number of times we took the lock
+ hold time
+     min
+	- shortest (non-0) time we ever held the lock
+     max
+	- longest time we ever held the lock
+     total
+	- total time this lock was held
+     avg
+	- average time this lock was held
+
+These numbers are gathered per lock class, per read/write state (when
+applicable).
+
+It also tracks 4 contention points per class. A contention point is a call site
+that had to wait on lock acquisition.
+
+Configuration
+-------------
+
+Lock statistics are enabled via CONFIG_LOCK_STAT.
+
+Usage
+-----
+
+Enable collection of statistics::
+
+	# echo 1 >/proc/sys/kernel/lock_stat
+
+Disable collection of statistics::
+
+	# echo 0 >/proc/sys/kernel/lock_stat
+
+Look at the current lock statistics::
+
+  ( line numbers not part of actual output, done for clarity in the explanation
+    below )
+
+  # less /proc/lock_stat
+
+  01 lock_stat version 0.4
+  02-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+  03                              class name    con-bounces    contentions   waittime-min   waittime-max waittime-total   waittime-avg    acq-bounces   acquisitions   holdtime-min   holdtime-max holdtime-total   holdtime-avg
+  04-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+  05
+  06                         &mm->mmap_sem-W:            46             84           0.26         939.10       16371.53         194.90          47291        2922365           0.16     2220301.69 17464026916.32        5975.99
+  07                         &mm->mmap_sem-R:            37            100           1.31      299502.61      325629.52        3256.30         212344       34316685           0.10        7744.91    95016910.20           2.77
+  08                         ---------------
+  09                           &mm->mmap_sem              1          [<ffffffff811502a7>] khugepaged_scan_mm_slot+0x57/0x280
+  10                           &mm->mmap_sem             96          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+  11                           &mm->mmap_sem             34          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
+  12                           &mm->mmap_sem             17          [<ffffffff81127e71>] vm_munmap+0x41/0x80
+  13                         ---------------
+  14                           &mm->mmap_sem              1          [<ffffffff81046fda>] dup_mmap+0x2a/0x3f0
+  15                           &mm->mmap_sem             60          [<ffffffff81129e29>] SyS_mprotect+0xe9/0x250
+  16                           &mm->mmap_sem             41          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+  17                           &mm->mmap_sem             68          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
+  18
+  19.............................................................................................................................................................................................................................
+  20
+  21                         unix_table_lock:           110            112           0.21          49.24         163.91           1.46          21094          66312           0.12         624.42       31589.81           0.48
+  22                         ---------------
+  23                         unix_table_lock             45          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+  24                         unix_table_lock             47          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+  25                         unix_table_lock             15          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+  26                         unix_table_lock              5          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+  27                         ---------------
+  28                         unix_table_lock             39          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+  29                         unix_table_lock             49          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+  30                         unix_table_lock             20          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+  31                         unix_table_lock              4          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+
+
+This excerpt shows the first two lock class statistics. Line 01 shows the
+output version - each time the format changes this will be updated. Line 02-04
+show the header with column descriptions. Lines 05-18 and 20-31 show the actual
+statistics. These statistics come in two parts; the actual stats separated by a
+short separator (line 08, 13) from the contention points.
+
+Lines 09-12 show the first 4 recorded contention points (the code
+which tries to get the lock) and lines 14-17 show the first 4 recorded
+contended points (the lock holder). It is possible that the max
+con-bounces point is missing in the statistics.
+
+The first lock (05-18) is a read/write lock, and shows two lines above the
+short separator. The contention points don't match the column descriptors,
+they have two: contentions and [<IP>] symbol. The second set of contention
+points are the points we're contending with.
+
+The integer part of the time values is in us.
+
+Dealing with nested locks, subclasses may appear::
+
+  32...........................................................................................................................................................................................................................
+  33
+  34                               &rq->lock:       13128          13128           0.43         190.53      103881.26           7.91          97454        3453404           0.00         401.11    13224683.11           3.82
+  35                               ---------
+  36                               &rq->lock          645          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+  37                               &rq->lock          297          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+  38                               &rq->lock          360          [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
+  39                               &rq->lock          428          [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
+  40                               ---------
+  41                               &rq->lock           77          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+  42                               &rq->lock          174          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+  43                               &rq->lock         4715          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+  44                               &rq->lock          893          [<ffffffff81340524>] schedule+0x157/0x7b8
+  45
+  46...........................................................................................................................................................................................................................
+  47
+  48                             &rq->lock/1:        1526          11488           0.33         388.73      136294.31          11.86          21461          38404           0.00          37.93      109388.53           2.84
+  49                             -----------
+  50                             &rq->lock/1        11526          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+  51                             -----------
+  52                             &rq->lock/1         5645          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+  53                             &rq->lock/1         1224          [<ffffffff81340524>] schedule+0x157/0x7b8
+  54                             &rq->lock/1         4336          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+  55                             &rq->lock/1          181          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+
+Line 48 shows statistics for the second subclass (/1) of &rq->lock class
+(subclass starts from 0), since in this case, as line 50 suggests,
+double_rq_lock actually acquires a nested lock of two spinlocks.
+
+View the top contending locks::
+
+  # grep : /proc/lock_stat | head
+			clockevents_lock:       2926159        2947636           0.15       46882.81  1784540466.34         605.41        3381345        3879161           0.00        2260.97    53178395.68          13.71
+		     tick_broadcast_lock:        346460         346717           0.18        2257.43    39364622.71         113.54        3642919        4242696           0.00        2263.79    49173646.60          11.59
+		  &mapping->i_mmap_mutex:        203896         203899           3.36      645530.05 31767507988.39      155800.21        3361776        8893984           0.17        2254.15    14110121.02           1.59
+			       &rq->lock:        135014         136909           0.18         606.09      842160.68           6.15        1540728       10436146           0.00         728.72    17606683.41           1.69
+	       &(&zone->lru_lock)->rlock:         93000          94934           0.16          59.18      188253.78           1.98        1199912        3809894           0.15         391.40     3559518.81           0.93
+			 tasklist_lock-W:         40667          41130           0.23        1189.42      428980.51          10.43         270278         510106           0.16         653.51     3939674.91           7.72
+			 tasklist_lock-R:         21298          21305           0.20        1310.05      215511.12          10.12         186204         241258           0.14        1162.33     1179779.23           4.89
+			      rcu_node_1:         47656          49022           0.16         635.41      193616.41           3.95         844888        1865423           0.00         764.26     1656226.96           0.89
+       &(&dentry->d_lockref.lock)->rlock:         39791          40179           0.15        1302.08       88851.96           2.21        2790851       12527025           0.10        1910.75     3379714.27           0.27
+			      rcu_node_0:         29203          30064           0.16         786.55     1555573.00          51.74          88963         244254           0.00         398.87      428872.51           1.76
+
+Clear the statistics::
+
+  # echo 0 > /proc/lock_stat
diff --git a/Documentation/locking/lockstat.txt b/Documentation/locking/lockstat.txt
deleted file mode 100644
index fdbeb0c45ef3..000000000000
--- a/Documentation/locking/lockstat.txt
+++ /dev/null
@@ -1,183 +0,0 @@
-
-LOCK STATISTICS
-
-- WHAT
-
-As the name suggests, it provides statistics on locks.
-
-- WHY
-
-Because things like lock contention can severely impact performance.
-
-- HOW
-
-Lockdep already has hooks in the lock functions and maps lock instances to
-lock classes. We build on that (see Documentation/locking/lockdep-design.txt).
-The graph below shows the relation between the lock functions and the various
-hooks therein.
-
-        __acquire
-            |
-           lock _____
-            |        \
-            |    __contended
-            |         |
-            |       <wait>
-            | _______/
-            |/
-            |
-       __acquired
-            |
-            .
-          <hold>
-            .
-            |
-       __release
-            |
-         unlock
-
-lock, unlock	- the regular lock functions
-__*		- the hooks
-<> 		- states
-
-With these hooks we provide the following statistics:
-
- con-bounces       - number of lock contention that involved x-cpu data
- contentions       - number of lock acquisitions that had to wait
- wait time min     - shortest (non-0) time we ever had to wait for a lock
-           max     - longest time we ever had to wait for a lock
-	   total   - total time we spend waiting on this lock
-	   avg     - average time spent waiting on this lock
- acq-bounces       - number of lock acquisitions that involved x-cpu data
- acquisitions      - number of times we took the lock
- hold time min     - shortest (non-0) time we ever held the lock
-	   max     - longest time we ever held the lock
-	   total   - total time this lock was held
-	   avg     - average time this lock was held
-
-These numbers are gathered per lock class, per read/write state (when
-applicable).
-
-It also tracks 4 contention points per class. A contention point is a call site
-that had to wait on lock acquisition.
-
- - CONFIGURATION
-
-Lock statistics are enabled via CONFIG_LOCK_STAT.
-
- - USAGE
-
-Enable collection of statistics:
-
-# echo 1 >/proc/sys/kernel/lock_stat
-
-Disable collection of statistics:
-
-# echo 0 >/proc/sys/kernel/lock_stat
-
-Look at the current lock statistics:
-
-( line numbers not part of actual output, done for clarity in the explanation
-  below )
-
-# less /proc/lock_stat
-
-01 lock_stat version 0.4
-02-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-03                              class name    con-bounces    contentions   waittime-min   waittime-max waittime-total   waittime-avg    acq-bounces   acquisitions   holdtime-min   holdtime-max holdtime-total   holdtime-avg
-04-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-05
-06                         &mm->mmap_sem-W:            46             84           0.26         939.10       16371.53         194.90          47291        2922365           0.16     2220301.69 17464026916.32        5975.99
-07                         &mm->mmap_sem-R:            37            100           1.31      299502.61      325629.52        3256.30         212344       34316685           0.10        7744.91    95016910.20           2.77
-08                         ---------------
-09                           &mm->mmap_sem              1          [<ffffffff811502a7>] khugepaged_scan_mm_slot+0x57/0x280
-10                           &mm->mmap_sem             96          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
-11                           &mm->mmap_sem             34          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
-12                           &mm->mmap_sem             17          [<ffffffff81127e71>] vm_munmap+0x41/0x80
-13                         ---------------
-14                           &mm->mmap_sem              1          [<ffffffff81046fda>] dup_mmap+0x2a/0x3f0
-15                           &mm->mmap_sem             60          [<ffffffff81129e29>] SyS_mprotect+0xe9/0x250
-16                           &mm->mmap_sem             41          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
-17                           &mm->mmap_sem             68          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
-18
-19.............................................................................................................................................................................................................................
-20
-21                         unix_table_lock:           110            112           0.21          49.24         163.91           1.46          21094          66312           0.12         624.42       31589.81           0.48
-22                         ---------------
-23                         unix_table_lock             45          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
-24                         unix_table_lock             47          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
-25                         unix_table_lock             15          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
-26                         unix_table_lock              5          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
-27                         ---------------
-28                         unix_table_lock             39          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
-29                         unix_table_lock             49          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
-30                         unix_table_lock             20          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
-31                         unix_table_lock              4          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
-
-
-This excerpt shows the first two lock class statistics. Line 01 shows the
-output version - each time the format changes this will be updated. Line 02-04
-show the header with column descriptions. Lines 05-18 and 20-31 show the actual
-statistics. These statistics come in two parts; the actual stats separated by a
-short separator (line 08, 13) from the contention points.
-
-Lines 09-12 show the first 4 recorded contention points (the code
-which tries to get the lock) and lines 14-17 show the first 4 recorded
-contended points (the lock holder). It is possible that the max
-con-bounces point is missing in the statistics.
-
-The first lock (05-18) is a read/write lock, and shows two lines above the
-short separator. The contention points don't match the column descriptors,
-they have two: contentions and [<IP>] symbol. The second set of contention
-points are the points we're contending with.
-
-The integer part of the time values is in us.
-
-Dealing with nested locks, subclasses may appear:
-
-32...........................................................................................................................................................................................................................
-33
-34                               &rq->lock:       13128          13128           0.43         190.53      103881.26           7.91          97454        3453404           0.00         401.11    13224683.11           3.82
-35                               ---------
-36                               &rq->lock          645          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
-37                               &rq->lock          297          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-38                               &rq->lock          360          [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
-39                               &rq->lock          428          [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
-40                               ---------
-41                               &rq->lock           77          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
-42                               &rq->lock          174          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-43                               &rq->lock         4715          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
-44                               &rq->lock          893          [<ffffffff81340524>] schedule+0x157/0x7b8
-45
-46...........................................................................................................................................................................................................................
-47
-48                             &rq->lock/1:        1526          11488           0.33         388.73      136294.31          11.86          21461          38404           0.00          37.93      109388.53           2.84
-49                             -----------
-50                             &rq->lock/1        11526          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
-51                             -----------
-52                             &rq->lock/1         5645          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
-53                             &rq->lock/1         1224          [<ffffffff81340524>] schedule+0x157/0x7b8
-54                             &rq->lock/1         4336          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
-55                             &rq->lock/1          181          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-
-Line 48 shows statistics for the second subclass (/1) of &rq->lock class
-(subclass starts from 0), since in this case, as line 50 suggests,
-double_rq_lock actually acquires a nested lock of two spinlocks.
-
-View the top contending locks:
-
-# grep : /proc/lock_stat | head
-			clockevents_lock:       2926159        2947636           0.15       46882.81  1784540466.34         605.41        3381345        3879161           0.00        2260.97    53178395.68          13.71
-		     tick_broadcast_lock:        346460         346717           0.18        2257.43    39364622.71         113.54        3642919        4242696           0.00        2263.79    49173646.60          11.59
-		  &mapping->i_mmap_mutex:        203896         203899           3.36      645530.05 31767507988.39      155800.21        3361776        8893984           0.17        2254.15    14110121.02           1.59
-			       &rq->lock:        135014         136909           0.18         606.09      842160.68           6.15        1540728       10436146           0.00         728.72    17606683.41           1.69
-	       &(&zone->lru_lock)->rlock:         93000          94934           0.16          59.18      188253.78           1.98        1199912        3809894           0.15         391.40     3559518.81           0.93
-			 tasklist_lock-W:         40667          41130           0.23        1189.42      428980.51          10.43         270278         510106           0.16         653.51     3939674.91           7.72
-			 tasklist_lock-R:         21298          21305           0.20        1310.05      215511.12          10.12         186204         241258           0.14        1162.33     1179779.23           4.89
-			      rcu_node_1:         47656          49022           0.16         635.41      193616.41           3.95         844888        1865423           0.00         764.26     1656226.96           0.89
-       &(&dentry->d_lockref.lock)->rlock:         39791          40179           0.15        1302.08       88851.96           2.21        2790851       12527025           0.10        1910.75     3379714.27           0.27
-			      rcu_node_0:         29203          30064           0.16         786.55     1555573.00          51.74          88963         244254           0.00         398.87      428872.51           1.76
-
-Clear the statistics:
-
-# echo 0 > /proc/lock_stat
diff --git a/Documentation/locking/locktorture.rst b/Documentation/locking/locktorture.rst
new file mode 100644
index 000000000000..e79eeeca3ac6
--- /dev/null
+++ b/Documentation/locking/locktorture.rst
@@ -0,0 +1,170 @@
+==================================
+Kernel Lock Torture Test Operation
+==================================
+
+CONFIG_LOCK_TORTURE_TEST
+========================
+
+The CONFIG LOCK_TORTURE_TEST config option provides a kernel module
+that runs torture tests on core kernel locking primitives. The kernel
+module, 'locktorture', may be built after the fact on the running
+kernel to be tested, if desired. The tests periodically output status
+messages via printk(), which can be examined via the dmesg (perhaps
+grepping for "torture").  The test is started when the module is loaded,
+and stops when the module is unloaded. This program is based on how RCU
+is tortured, via rcutorture.
+
+This torture test consists of creating a number of kernel threads which
+acquire the lock and hold it for specific amount of time, thus simulating
+different critical region behaviors. The amount of contention on the lock
+can be simulated by either enlarging this critical region hold time and/or
+creating more kthreads.
+
+
+Module Parameters
+=================
+
+This module has the following parameters:
+
+
+Locktorture-specific
+--------------------
+
+nwriters_stress
+		  Number of kernel threads that will stress exclusive lock
+		  ownership (writers). The default value is twice the number
+		  of online CPUs.
+
+nreaders_stress
+		  Number of kernel threads that will stress shared lock
+		  ownership (readers). The default is the same amount of writer
+		  locks. If the user did not specify nwriters_stress, then
+		  both readers and writers be the amount of online CPUs.
+
+torture_type
+		  Type of lock to torture. By default, only spinlocks will
+		  be tortured. This module can torture the following locks,
+		  with string values as follows:
+
+		     - "lock_busted":
+				Simulates a buggy lock implementation.
+
+		     - "spin_lock":
+				spin_lock() and spin_unlock() pairs.
+
+		     - "spin_lock_irq":
+				spin_lock_irq() and spin_unlock_irq() pairs.
+
+		     - "rw_lock":
+				read/write lock() and unlock() rwlock pairs.
+
+		     - "rw_lock_irq":
+				read/write lock_irq() and unlock_irq()
+				rwlock pairs.
+
+		     - "mutex_lock":
+				mutex_lock() and mutex_unlock() pairs.
+
+		     - "rtmutex_lock":
+				rtmutex_lock() and rtmutex_unlock() pairs.
+				Kernel must have CONFIG_RT_MUTEX=y.
+
+		     - "rwsem_lock":
+				read/write down() and up() semaphore pairs.
+
+
+Torture-framework (RCU + locking)
+---------------------------------
+
+shutdown_secs
+		  The number of seconds to run the test before terminating
+		  the test and powering off the system.  The default is
+		  zero, which disables test termination and system shutdown.
+		  This capability is useful for automated testing.
+
+onoff_interval
+		  The number of seconds between each attempt to execute a
+		  randomly selected CPU-hotplug operation.  Defaults
+		  to zero, which disables CPU hotplugging.  In
+		  CONFIG_HOTPLUG_CPU=n kernels, locktorture will silently
+		  refuse to do any CPU-hotplug operations regardless of
+		  what value is specified for onoff_interval.
+
+onoff_holdoff
+		  The number of seconds to wait until starting CPU-hotplug
+		  operations.  This would normally only be used when
+		  locktorture was built into the kernel and started
+		  automatically at boot time, in which case it is useful
+		  in order to avoid confusing boot-time code with CPUs
+		  coming and going. This parameter is only useful if
+		  CONFIG_HOTPLUG_CPU is enabled.
+
+stat_interval
+		  Number of seconds between statistics-related printk()s.
+		  By default, locktorture will report stats every 60 seconds.
+		  Setting the interval to zero causes the statistics to
+		  be printed -only- when the module is unloaded, and this
+		  is the default.
+
+stutter
+		  The length of time to run the test before pausing for this
+		  same period of time.  Defaults to "stutter=5", so as
+		  to run and pause for (roughly) five-second intervals.
+		  Specifying "stutter=0" causes the test to run continuously
+		  without pausing, which is the old default behavior.
+
+shuffle_interval
+		  The number of seconds to keep the test threads affinitied
+		  to a particular subset of the CPUs, defaults to 3 seconds.
+		  Used in conjunction with test_no_idle_hz.
+
+verbose
+		  Enable verbose debugging printing, via printk(). Enabled
+		  by default. This extra information is mostly related to
+		  high-level errors and reports from the main 'torture'
+		  framework.
+
+
+Statistics
+==========
+
+Statistics are printed in the following format::
+
+  spin_lock-torture: Writes:  Total: 93746064  Max/Min: 0/0   Fail: 0
+     (A)		    (B)		   (C)		  (D)	       (E)
+
+  (A): Lock type that is being tortured -- torture_type parameter.
+
+  (B): Number of writer lock acquisitions. If dealing with a read/write
+       primitive a second "Reads" statistics line is printed.
+
+  (C): Number of times the lock was acquired.
+
+  (D): Min and max number of times threads failed to acquire the lock.
+
+  (E): true/false values if there were errors acquiring the lock. This should
+       -only- be positive if there is a bug in the locking primitive's
+       implementation. Otherwise a lock should never fail (i.e., spin_lock()).
+       Of course, the same applies for (C), above. A dummy example of this is
+       the "lock_busted" type.
+
+Usage
+=====
+
+The following script may be used to torture locks::
+
+	#!/bin/sh
+
+	modprobe locktorture
+	sleep 3600
+	rmmod locktorture
+	dmesg | grep torture:
+
+The output can be manually inspected for the error flag of "!!!".
+One could of course create a more elaborate script that automatically
+checked for such errors.  The "rmmod" command forces a "SUCCESS",
+"FAILURE", or "RCU_HOTPLUG" indication to be printk()ed.  The first
+two are self-explanatory, while the last indicates that while there
+were no locking failures, CPU-hotplug problems were detected.
+
+Also see: Documentation/RCU/torture.txt
diff --git a/Documentation/locking/locktorture.txt b/Documentation/locking/locktorture.txt
deleted file mode 100644
index 6a8df4cd19bf..000000000000
--- a/Documentation/locking/locktorture.txt
+++ /dev/null
@@ -1,145 +0,0 @@
-Kernel Lock Torture Test Operation
-
-CONFIG_LOCK_TORTURE_TEST
-
-The CONFIG LOCK_TORTURE_TEST config option provides a kernel module
-that runs torture tests on core kernel locking primitives. The kernel
-module, 'locktorture', may be built after the fact on the running
-kernel to be tested, if desired. The tests periodically output status
-messages via printk(), which can be examined via the dmesg (perhaps
-grepping for "torture").  The test is started when the module is loaded,
-and stops when the module is unloaded. This program is based on how RCU
-is tortured, via rcutorture.
-
-This torture test consists of creating a number of kernel threads which
-acquire the lock and hold it for specific amount of time, thus simulating
-different critical region behaviors. The amount of contention on the lock
-can be simulated by either enlarging this critical region hold time and/or
-creating more kthreads.
-
-
-MODULE PARAMETERS
-
-This module has the following parameters:
-
-
-	    ** Locktorture-specific **
-
-nwriters_stress   Number of kernel threads that will stress exclusive lock
-		  ownership (writers). The default value is twice the number
-		  of online CPUs.
-
-nreaders_stress   Number of kernel threads that will stress shared lock
-		  ownership (readers). The default is the same amount of writer
-		  locks. If the user did not specify nwriters_stress, then
-		  both readers and writers be the amount of online CPUs.
-
-torture_type	  Type of lock to torture. By default, only spinlocks will
-		  be tortured. This module can torture the following locks,
-		  with string values as follows:
-
-		     o "lock_busted": Simulates a buggy lock implementation.
-
-		     o "spin_lock": spin_lock() and spin_unlock() pairs.
-
-		     o "spin_lock_irq": spin_lock_irq() and spin_unlock_irq()
-					pairs.
-
-		     o "rw_lock": read/write lock() and unlock() rwlock pairs.
-
-		     o "rw_lock_irq": read/write lock_irq() and unlock_irq()
-				      rwlock pairs.
-
-		     o "mutex_lock": mutex_lock() and mutex_unlock() pairs.
-
-		     o "rtmutex_lock": rtmutex_lock() and rtmutex_unlock()
-				       pairs. Kernel must have CONFIG_RT_MUTEX=y.
-
-		     o "rwsem_lock": read/write down() and up() semaphore pairs.
-
-
-	    ** Torture-framework (RCU + locking) **
-
-shutdown_secs	  The number of seconds to run the test before terminating
-		  the test and powering off the system.  The default is
-		  zero, which disables test termination and system shutdown.
-		  This capability is useful for automated testing.
-
-onoff_interval	  The number of seconds between each attempt to execute a
-		  randomly selected CPU-hotplug operation.  Defaults
-		  to zero, which disables CPU hotplugging.  In
-		  CONFIG_HOTPLUG_CPU=n kernels, locktorture will silently
-		  refuse to do any CPU-hotplug operations regardless of
-		  what value is specified for onoff_interval.
-
-onoff_holdoff	  The number of seconds to wait until starting CPU-hotplug
-		  operations.  This would normally only be used when
-		  locktorture was built into the kernel and started
-		  automatically at boot time, in which case it is useful
-		  in order to avoid confusing boot-time code with CPUs
-		  coming and going. This parameter is only useful if
-		  CONFIG_HOTPLUG_CPU is enabled.
-
-stat_interval	  Number of seconds between statistics-related printk()s.
-		  By default, locktorture will report stats every 60 seconds.
-		  Setting the interval to zero causes the statistics to
-		  be printed -only- when the module is unloaded, and this
-		  is the default.
-
-stutter		  The length of time to run the test before pausing for this
-		  same period of time.  Defaults to "stutter=5", so as
-		  to run and pause for (roughly) five-second intervals.
-		  Specifying "stutter=0" causes the test to run continuously
-		  without pausing, which is the old default behavior.
-
-shuffle_interval  The number of seconds to keep the test threads affinitied
-		  to a particular subset of the CPUs, defaults to 3 seconds.
-		  Used in conjunction with test_no_idle_hz.
-
-verbose		  Enable verbose debugging printing, via printk(). Enabled
-		  by default. This extra information is mostly related to
-		  high-level errors and reports from the main 'torture'
-		  framework.
-
-
-STATISTICS
-
-Statistics are printed in the following format:
-
-spin_lock-torture: Writes:  Total: 93746064  Max/Min: 0/0   Fail: 0
-   (A)		    (B)		   (C)		  (D)	       (E)
-
-(A): Lock type that is being tortured -- torture_type parameter.
-
-(B): Number of writer lock acquisitions. If dealing with a read/write primitive
-     a second "Reads" statistics line is printed.
-
-(C): Number of times the lock was acquired.
-
-(D): Min and max number of times threads failed to acquire the lock.
-
-(E): true/false values if there were errors acquiring the lock. This should
-     -only- be positive if there is a bug in the locking primitive's
-     implementation. Otherwise a lock should never fail (i.e., spin_lock()).
-     Of course, the same applies for (C), above. A dummy example of this is
-     the "lock_busted" type.
-
-USAGE
-
-The following script may be used to torture locks:
-
-	#!/bin/sh
-
-	modprobe locktorture
-	sleep 3600
-	rmmod locktorture
-	dmesg | grep torture:
-
-The output can be manually inspected for the error flag of "!!!".
-One could of course create a more elaborate script that automatically
-checked for such errors.  The "rmmod" command forces a "SUCCESS",
-"FAILURE", or "RCU_HOTPLUG" indication to be printk()ed.  The first
-two are self-explanatory, while the last indicates that while there
-were no locking failures, CPU-hotplug problems were detected.
-
-Also see: Documentation/RCU/torture.txt
diff --git a/Documentation/locking/mutex-design.rst b/Documentation/locking/mutex-design.rst
new file mode 100644
index 000000000000..4d8236b81fa5
--- /dev/null
+++ b/Documentation/locking/mutex-design.rst
@@ -0,0 +1,152 @@
+=======================
+Generic Mutex Subsystem
+=======================
+
+started by Ingo Molnar <mingo@redhat.com>
+
+updated by Davidlohr Bueso <davidlohr@hp.com>
+
+What are mutexes?
+-----------------
+
+In the Linux kernel, mutexes refer to a particular locking primitive
+that enforces serialization on shared memory systems, and not only to
+the generic term referring to 'mutual exclusion' found in academia
+or similar theoretical text books. Mutexes are sleeping locks which
+behave similarly to binary semaphores, and were introduced in 2006[1]
+as an alternative to these. This new data structure provided a number
+of advantages, including simpler interfaces, and at that time smaller
+code (see Disadvantages).
+
+[1] http://lwn.net/Articles/164802/
+
+Implementation
+--------------
+
+Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
+and implemented in kernel/locking/mutex.c. These locks use an atomic variable
+(->owner) to keep track of the lock state during its lifetime.  Field owner
+actually contains `struct task_struct *` to the current lock owner and it is
+therefore NULL if not currently owned. Since task_struct pointers are aligned
+at at least L1_CACHE_BYTES, low bits (3) are used to store extra state (e.g.,
+if waiter list is non-empty).  In its most basic form it also includes a
+wait-queue and a spinlock that serializes access to it. Furthermore,
+CONFIG_MUTEX_SPIN_ON_OWNER=y systems use a spinner MCS lock (->osq), described
+below in (ii).
+
+When acquiring a mutex, there are three possible paths that can be
+taken, depending on the state of the lock:
+
+(i) fastpath: tries to atomically acquire the lock by cmpxchg()ing the owner with
+    the current task. This only works in the uncontended case (cmpxchg() checks
+    against 0UL, so all 3 state bits above have to be 0). If the lock is
+    contended it goes to the next possible path.
+
+(ii) midpath: aka optimistic spinning, tries to spin for acquisition
+     while the lock owner is running and there are no other tasks ready
+     to run that have higher priority (need_resched). The rationale is
+     that if the lock owner is running, it is likely to release the lock
+     soon. The mutex spinners are queued up using MCS lock so that only
+     one spinner can compete for the mutex.
+
+     The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock
+     with the desirable properties of being fair and with each cpu trying
+     to acquire the lock spinning on a local variable. It avoids expensive
+     cacheline bouncing that common test-and-set spinlock implementations
+     incur. An MCS-like lock is specially tailored for optimistic spinning
+     for sleeping lock implementation. An important feature of the customized
+     MCS lock is that it has the extra property that spinners are able to exit
+     the MCS spinlock queue when they need to reschedule. This further helps
+     avoid situations where MCS spinners that need to reschedule would continue
+     waiting to spin on mutex owner, only to go directly to slowpath upon
+     obtaining the MCS lock.
+
+
+(iii) slowpath: last resort, if the lock is still unable to be acquired,
+      the task is added to the wait-queue and sleeps until woken up by the
+      unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE.
+
+While formally kernel mutexes are sleepable locks, it is path (ii) that
+makes them more practically a hybrid type. By simply not interrupting a
+task and busy-waiting for a few cycles instead of immediately sleeping,
+the performance of this lock has been seen to significantly improve a
+number of workloads. Note that this technique is also used for rw-semaphores.
+
+Semantics
+---------
+
+The mutex subsystem checks and enforces the following rules:
+
+    - Only one task can hold the mutex at a time.
+    - Only the owner can unlock the mutex.
+    - Multiple unlocks are not permitted.
+    - Recursive locking/unlocking is not permitted.
+    - A mutex must only be initialized via the API (see below).
+    - A task may not exit with a mutex held.
+    - Memory areas where held locks reside must not be freed.
+    - Held mutexes must not be reinitialized.
+    - Mutexes may not be used in hardware or software interrupt
+      contexts such as tasklets and timers.
+
+These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled.
+In addition, the mutex debugging code also implements a number of other
+features that make lock debugging easier and faster:
+
+    - Uses symbolic names of mutexes, whenever they are printed
+      in debug output.
+    - Point-of-acquire tracking, symbolic lookup of function names,
+      list of all locks held in the system, printout of them.
+    - Owner tracking.
+    - Detects self-recursing locks and prints out all relevant info.
+    - Detects multi-task circular deadlocks and prints out all affected
+      locks and tasks (and only those tasks).
+
+
+Interfaces
+----------
+Statically define the mutex::
+
+   DEFINE_MUTEX(name);
+
+Dynamically initialize the mutex::
+
+   mutex_init(mutex);
+
+Acquire the mutex, uninterruptible::
+
+   void mutex_lock(struct mutex *lock);
+   void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
+   int  mutex_trylock(struct mutex *lock);
+
+Acquire the mutex, interruptible::
+
+   int mutex_lock_interruptible_nested(struct mutex *lock,
+				       unsigned int subclass);
+   int mutex_lock_interruptible(struct mutex *lock);
+
+Acquire the mutex, interruptible, if dec to 0::
+
+   int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
+
+Unlock the mutex::
+
+   void mutex_unlock(struct mutex *lock);
+
+Test if the mutex is taken::
+
+   int mutex_is_locked(struct mutex *lock);
+
+Disadvantages
+-------------
+
+Unlike its original design and purpose, 'struct mutex' is among the largest
+locks in the kernel. E.g: on x86-64 it is 32 bytes, where 'struct semaphore'
+is 24 bytes and rw_semaphore is 40 bytes. Larger structure sizes mean more CPU
+cache and memory footprint.
+
+When to use mutexes
+-------------------
+
+Unless the strict semantics of mutexes are unsuitable and/or the critical
+region prevents the lock from being shared, always prefer them to any other
+locking primitive.
diff --git a/Documentation/locking/mutex-design.txt b/Documentation/locking/mutex-design.txt
deleted file mode 100644
index 818aca19612f..000000000000
--- a/Documentation/locking/mutex-design.txt
+++ /dev/null
@@ -1,142 +0,0 @@
-Generic Mutex Subsystem
-
-started by Ingo Molnar <mingo@redhat.com>
-updated by Davidlohr Bueso <davidlohr@hp.com>
-
-What are mutexes?
------------------
-
-In the Linux kernel, mutexes refer to a particular locking primitive
-that enforces serialization on shared memory systems, and not only to
-the generic term referring to 'mutual exclusion' found in academia
-or similar theoretical text books. Mutexes are sleeping locks which
-behave similarly to binary semaphores, and were introduced in 2006[1]
-as an alternative to these. This new data structure provided a number
-of advantages, including simpler interfaces, and at that time smaller
-code (see Disadvantages).
-
-[1] http://lwn.net/Articles/164802/
-
-Implementation
---------------
-
-Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
-and implemented in kernel/locking/mutex.c. These locks use an atomic variable
-(->owner) to keep track of the lock state during its lifetime.  Field owner
-actually contains 'struct task_struct *' to the current lock owner and it is
-therefore NULL if not currently owned. Since task_struct pointers are aligned
-at at least L1_CACHE_BYTES, low bits (3) are used to store extra state (e.g.,
-if waiter list is non-empty).  In its most basic form it also includes a
-wait-queue and a spinlock that serializes access to it. Furthermore,
-CONFIG_MUTEX_SPIN_ON_OWNER=y systems use a spinner MCS lock (->osq), described
-below in (ii).
-
-When acquiring a mutex, there are three possible paths that can be
-taken, depending on the state of the lock:
-
-(i) fastpath: tries to atomically acquire the lock by cmpxchg()ing the owner with
-    the current task. This only works in the uncontended case (cmpxchg() checks
-    against 0UL, so all 3 state bits above have to be 0). If the lock is
-    contended it goes to the next possible path.
-
-(ii) midpath: aka optimistic spinning, tries to spin for acquisition
-     while the lock owner is running and there are no other tasks ready
-     to run that have higher priority (need_resched). The rationale is
-     that if the lock owner is running, it is likely to release the lock
-     soon. The mutex spinners are queued up using MCS lock so that only
-     one spinner can compete for the mutex.
-
-     The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock
-     with the desirable properties of being fair and with each cpu trying
-     to acquire the lock spinning on a local variable. It avoids expensive
-     cacheline bouncing that common test-and-set spinlock implementations
-     incur. An MCS-like lock is specially tailored for optimistic spinning
-     for sleeping lock implementation. An important feature of the customized
-     MCS lock is that it has the extra property that spinners are able to exit
-     the MCS spinlock queue when they need to reschedule. This further helps
-     avoid situations where MCS spinners that need to reschedule would continue
-     waiting to spin on mutex owner, only to go directly to slowpath upon
-     obtaining the MCS lock.
-
-
-(iii) slowpath: last resort, if the lock is still unable to be acquired,
-      the task is added to the wait-queue and sleeps until woken up by the
-      unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE.
-
-While formally kernel mutexes are sleepable locks, it is path (ii) that
-makes them more practically a hybrid type. By simply not interrupting a
-task and busy-waiting for a few cycles instead of immediately sleeping,
-the performance of this lock has been seen to significantly improve a
-number of workloads. Note that this technique is also used for rw-semaphores.
-
-Semantics
----------
-
-The mutex subsystem checks and enforces the following rules:
-
-    - Only one task can hold the mutex at a time.
-    - Only the owner can unlock the mutex.
-    - Multiple unlocks are not permitted.
-    - Recursive locking/unlocking is not permitted.
-    - A mutex must only be initialized via the API (see below).
-    - A task may not exit with a mutex held.
-    - Memory areas where held locks reside must not be freed.
-    - Held mutexes must not be reinitialized.
-    - Mutexes may not be used in hardware or software interrupt
-      contexts such as tasklets and timers.
-
-These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled.
-In addition, the mutex debugging code also implements a number of other
-features that make lock debugging easier and faster:
-
-    - Uses symbolic names of mutexes, whenever they are printed
-      in debug output.
-    - Point-of-acquire tracking, symbolic lookup of function names,
-      list of all locks held in the system, printout of them.
-    - Owner tracking.
-    - Detects self-recursing locks and prints out all relevant info.
-    - Detects multi-task circular deadlocks and prints out all affected
-      locks and tasks (and only those tasks).
-
-
-Interfaces
-----------
-Statically define the mutex:
-   DEFINE_MUTEX(name);
-
-Dynamically initialize the mutex:
-   mutex_init(mutex);
-
-Acquire the mutex, uninterruptible:
-   void mutex_lock(struct mutex *lock);
-   void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
-   int  mutex_trylock(struct mutex *lock);
-
-Acquire the mutex, interruptible:
-   int mutex_lock_interruptible_nested(struct mutex *lock,
-				       unsigned int subclass);
-   int mutex_lock_interruptible(struct mutex *lock);
-
-Acquire the mutex, interruptible, if dec to 0:
-   int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
-
-Unlock the mutex:
-   void mutex_unlock(struct mutex *lock);
-
-Test if the mutex is taken:
-   int mutex_is_locked(struct mutex *lock);
-
-Disadvantages
--------------
-
-Unlike its original design and purpose, 'struct mutex' is among the largest
-locks in the kernel. E.g: on x86-64 it is 32 bytes, where 'struct semaphore'
-is 24 bytes and rw_semaphore is 40 bytes. Larger structure sizes mean more CPU
-cache and memory footprint.
-
-When to use mutexes
--------------------
-
-Unless the strict semantics of mutexes are unsuitable and/or the critical
-region prevents the lock from being shared, always prefer them to any other
-locking primitive.
diff --git a/Documentation/locking/rt-mutex-design.rst b/Documentation/locking/rt-mutex-design.rst
new file mode 100644
index 000000000000..59c2a64efb21
--- /dev/null
+++ b/Documentation/locking/rt-mutex-design.rst
@@ -0,0 +1,574 @@
+==============================
+RT-mutex implementation design
+==============================
+
+Copyright (c) 2006 Steven Rostedt
+
+Licensed under the GNU Free Documentation License, Version 1.2
+
+
+This document tries to describe the design of the rtmutex.c implementation.
+It doesn't describe the reasons why rtmutex.c exists. For that please see
+Documentation/locking/rt-mutex.rst.  Although this document does explain problems
+that happen without this code, but that is in the concept to understand
+what the code actually is doing.
+
+The goal of this document is to help others understand the priority
+inheritance (PI) algorithm that is used, as well as reasons for the
+decisions that were made to implement PI in the manner that was done.
+
+
+Unbounded Priority Inversion
+----------------------------
+
+Priority inversion is when a lower priority process executes while a higher
+priority process wants to run.  This happens for several reasons, and
+most of the time it can't be helped.  Anytime a high priority process wants
+to use a resource that a lower priority process has (a mutex for example),
+the high priority process must wait until the lower priority process is done
+with the resource.  This is a priority inversion.  What we want to prevent
+is something called unbounded priority inversion.  That is when the high
+priority process is prevented from running by a lower priority process for
+an undetermined amount of time.
+
+The classic example of unbounded priority inversion is where you have three
+processes, let's call them processes A, B, and C, where A is the highest
+priority process, C is the lowest, and B is in between. A tries to grab a lock
+that C owns and must wait and lets C run to release the lock. But in the
+meantime, B executes, and since B is of a higher priority than C, it preempts C,
+but by doing so, it is in fact preempting A which is a higher priority process.
+Now there's no way of knowing how long A will be sleeping waiting for C
+to release the lock, because for all we know, B is a CPU hog and will
+never give C a chance to release the lock.  This is called unbounded priority
+inversion.
+
+Here's a little ASCII art to show the problem::
+
+     grab lock L1 (owned by C)
+       |
+  A ---+
+          C preempted by B
+            |
+  C    +----+
+
+  B         +-------->
+                  B now keeps A from running.
+
+
+Priority Inheritance (PI)
+-------------------------
+
+There are several ways to solve this issue, but other ways are out of scope
+for this document.  Here we only discuss PI.
+
+PI is where a process inherits the priority of another process if the other
+process blocks on a lock owned by the current process.  To make this easier
+to understand, let's use the previous example, with processes A, B, and C again.
+
+This time, when A blocks on the lock owned by C, C would inherit the priority
+of A.  So now if B becomes runnable, it would not preempt C, since C now has
+the high priority of A.  As soon as C releases the lock, it loses its
+inherited priority, and A then can continue with the resource that C had.
+
+Terminology
+-----------
+
+Here I explain some terminology that is used in this document to help describe
+the design that is used to implement PI.
+
+PI chain
+         - The PI chain is an ordered series of locks and processes that cause
+           processes to inherit priorities from a previous process that is
+           blocked on one of its locks.  This is described in more detail
+           later in this document.
+
+mutex
+         - In this document, to differentiate from locks that implement
+           PI and spin locks that are used in the PI code, from now on
+           the PI locks will be called a mutex.
+
+lock
+         - In this document from now on, I will use the term lock when
+           referring to spin locks that are used to protect parts of the PI
+           algorithm.  These locks disable preemption for UP (when
+           CONFIG_PREEMPT is enabled) and on SMP prevents multiple CPUs from
+           entering critical sections simultaneously.
+
+spin lock
+         - Same as lock above.
+
+waiter
+         - A waiter is a struct that is stored on the stack of a blocked
+           process.  Since the scope of the waiter is within the code for
+           a process being blocked on the mutex, it is fine to allocate
+           the waiter on the process's stack (local variable).  This
+           structure holds a pointer to the task, as well as the mutex that
+           the task is blocked on.  It also has rbtree node structures to
+           place the task in the waiters rbtree of a mutex as well as the
+           pi_waiters rbtree of a mutex owner task (described below).
+
+           waiter is sometimes used in reference to the task that is waiting
+           on a mutex. This is the same as waiter->task.
+
+waiters
+         - A list of processes that are blocked on a mutex.
+
+top waiter
+         - The highest priority process waiting on a specific mutex.
+
+top pi waiter
+              - The highest priority process waiting on one of the mutexes
+                that a specific process owns.
+
+Note:
+       task and process are used interchangeably in this document, mostly to
+       differentiate between two processes that are being described together.
+
+
+PI chain
+--------
+
+The PI chain is a list of processes and mutexes that may cause priority
+inheritance to take place.  Multiple chains may converge, but a chain
+would never diverge, since a process can't be blocked on more than one
+mutex at a time.
+
+Example::
+
+   Process:  A, B, C, D, E
+   Mutexes:  L1, L2, L3, L4
+
+   A owns: L1
+           B blocked on L1
+           B owns L2
+                  C blocked on L2
+                  C owns L3
+                         D blocked on L3
+                         D owns L4
+                                E blocked on L4
+
+The chain would be::
+
+   E->L4->D->L3->C->L2->B->L1->A
+
+To show where two chains merge, we could add another process F and
+another mutex L5 where B owns L5 and F is blocked on mutex L5.
+
+The chain for F would be::
+
+   F->L5->B->L1->A
+
+Since a process may own more than one mutex, but never be blocked on more than
+one, the chains merge.
+
+Here we show both chains::
+
+   E->L4->D->L3->C->L2-+
+                       |
+                       +->B->L1->A
+                       |
+                 F->L5-+
+
+For PI to work, the processes at the right end of these chains (or we may
+also call it the Top of the chain) must be equal to or higher in priority
+than the processes to the left or below in the chain.
+
+Also since a mutex may have more than one process blocked on it, we can
+have multiple chains merge at mutexes.  If we add another process G that is
+blocked on mutex L2::
+
+  G->L2->B->L1->A
+
+And once again, to show how this can grow I will show the merging chains
+again::
+
+   E->L4->D->L3->C-+
+                   +->L2-+
+                   |     |
+                 G-+     +->B->L1->A
+                         |
+                   F->L5-+
+
+If process G has the highest priority in the chain, then all the tasks up
+the chain (A and B in this example), must have their priorities increased
+to that of G.
+
+Mutex Waiters Tree
+------------------
+
+Every mutex keeps track of all the waiters that are blocked on itself. The
+mutex has a rbtree to store these waiters by priority.  This tree is protected
+by a spin lock that is located in the struct of the mutex. This lock is called
+wait_lock.
+
+
+Task PI Tree
+------------
+
+To keep track of the PI chains, each process has its own PI rbtree.  This is
+a tree of all top waiters of the mutexes that are owned by the process.
+Note that this tree only holds the top waiters and not all waiters that are
+blocked on mutexes owned by the process.
+
+The top of the task's PI tree is always the highest priority task that
+is waiting on a mutex that is owned by the task.  So if the task has
+inherited a priority, it will always be the priority of the task that is
+at the top of this tree.
+
+This tree is stored in the task structure of a process as a rbtree called
+pi_waiters.  It is protected by a spin lock also in the task structure,
+called pi_lock.  This lock may also be taken in interrupt context, so when
+locking the pi_lock, interrupts must be disabled.
+
+
+Depth of the PI Chain
+---------------------
+
+The maximum depth of the PI chain is not dynamic, and could actually be
+defined.  But is very complex to figure it out, since it depends on all
+the nesting of mutexes.  Let's look at the example where we have 3 mutexes,
+L1, L2, and L3, and four separate functions func1, func2, func3 and func4.
+The following shows a locking order of L1->L2->L3, but may not actually
+be directly nested that way::
+
+  void func1(void)
+  {
+	mutex_lock(L1);
+
+	/* do anything */
+
+	mutex_unlock(L1);
+  }
+
+  void func2(void)
+  {
+	mutex_lock(L1);
+	mutex_lock(L2);
+
+	/* do something */
+
+	mutex_unlock(L2);
+	mutex_unlock(L1);
+  }
+
+  void func3(void)
+  {
+	mutex_lock(L2);
+	mutex_lock(L3);
+
+	/* do something else */
+
+	mutex_unlock(L3);
+	mutex_unlock(L2);
+  }
+
+  void func4(void)
+  {
+	mutex_lock(L3);
+
+	/* do something again */
+
+	mutex_unlock(L3);
+  }
+
+Now we add 4 processes that run each of these functions separately.
+Processes A, B, C, and D which run functions func1, func2, func3 and func4
+respectively, and such that D runs first and A last.  With D being preempted
+in func4 in the "do something again" area, we have a locking that follows::
+
+  D owns L3
+         C blocked on L3
+         C owns L2
+                B blocked on L2
+                B owns L1
+                       A blocked on L1
+
+  And thus we have the chain A->L1->B->L2->C->L3->D.
+
+This gives us a PI depth of 4 (four processes), but looking at any of the
+functions individually, it seems as though they only have at most a locking
+depth of two.  So, although the locking depth is defined at compile time,
+it still is very difficult to find the possibilities of that depth.
+
+Now since mutexes can be defined by user-land applications, we don't want a DOS
+type of application that nests large amounts of mutexes to create a large
+PI chain, and have the code holding spin locks while looking at a large
+amount of data.  So to prevent this, the implementation not only implements
+a maximum lock depth, but also only holds at most two different locks at a
+time, as it walks the PI chain.  More about this below.
+
+
+Mutex owner and flags
+---------------------
+
+The mutex structure contains a pointer to the owner of the mutex.  If the
+mutex is not owned, this owner is set to NULL.  Since all architectures
+have the task structure on at least a two byte alignment (and if this is
+not true, the rtmutex.c code will be broken!), this allows for the least
+significant bit to be used as a flag.  Bit 0 is used as the "Has Waiters"
+flag. It's set whenever there are waiters on a mutex.
+
+See Documentation/locking/rt-mutex.rst for further details.
+
+cmpxchg Tricks
+--------------
+
+Some architectures implement an atomic cmpxchg (Compare and Exchange).  This
+is used (when applicable) to keep the fast path of grabbing and releasing
+mutexes short.
+
+cmpxchg is basically the following function performed atomically::
+
+  unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C)
+  {
+	unsigned long T = *A;
+	if (*A == *B) {
+		*A = *C;
+	}
+	return T;
+  }
+  #define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c)
+
+This is really nice to have, since it allows you to only update a variable
+if the variable is what you expect it to be.  You know if it succeeded if
+the return value (the old value of A) is equal to B.
+
+The macro rt_mutex_cmpxchg is used to try to lock and unlock mutexes. If
+the architecture does not support CMPXCHG, then this macro is simply set
+to fail every time.  But if CMPXCHG is supported, then this will
+help out extremely to keep the fast path short.
+
+The use of rt_mutex_cmpxchg with the flags in the owner field help optimize
+the system for architectures that support it.  This will also be explained
+later in this document.
+
+
+Priority adjustments
+--------------------
+
+The implementation of the PI code in rtmutex.c has several places that a
+process must adjust its priority.  With the help of the pi_waiters of a
+process this is rather easy to know what needs to be adjusted.
+
+The functions implementing the task adjustments are rt_mutex_adjust_prio
+and rt_mutex_setprio. rt_mutex_setprio is only used in rt_mutex_adjust_prio.
+
+rt_mutex_adjust_prio examines the priority of the task, and the highest
+priority process that is waiting any of mutexes owned by the task. Since
+the pi_waiters of a task holds an order by priority of all the top waiters
+of all the mutexes that the task owns, we simply need to compare the top
+pi waiter to its own normal/deadline priority and take the higher one.
+Then rt_mutex_setprio is called to adjust the priority of the task to the
+new priority. Note that rt_mutex_setprio is defined in kernel/sched/core.c
+to implement the actual change in priority.
+
+Note:
+	For the "prio" field in task_struct, the lower the number, the
+	higher the priority. A "prio" of 5 is of higher priority than a
+	"prio" of 10.
+
+It is interesting to note that rt_mutex_adjust_prio can either increase
+or decrease the priority of the task.  In the case that a higher priority
+process has just blocked on a mutex owned by the task, rt_mutex_adjust_prio
+would increase/boost the task's priority.  But if a higher priority task
+were for some reason to leave the mutex (timeout or signal), this same function
+would decrease/unboost the priority of the task.  That is because the pi_waiters
+always contains the highest priority task that is waiting on a mutex owned
+by the task, so we only need to compare the priority of that top pi waiter
+to the normal priority of the given task.
+
+
+High level overview of the PI chain walk
+----------------------------------------
+
+The PI chain walk is implemented by the function rt_mutex_adjust_prio_chain.
+
+The implementation has gone through several iterations, and has ended up
+with what we believe is the best.  It walks the PI chain by only grabbing
+at most two locks at a time, and is very efficient.
+
+The rt_mutex_adjust_prio_chain can be used either to boost or lower process
+priorities.
+
+rt_mutex_adjust_prio_chain is called with a task to be checked for PI
+(de)boosting (the owner of a mutex that a process is blocking on), a flag to
+check for deadlocking, the mutex that the task owns, a pointer to a waiter
+that is the process's waiter struct that is blocked on the mutex (although this
+parameter may be NULL for deboosting), a pointer to the mutex on which the task
+is blocked, and a top_task as the top waiter of the mutex.
+
+For this explanation, I will not mention deadlock detection. This explanation
+will try to stay at a high level.
+
+When this function is called, there are no locks held.  That also means
+that the state of the owner and lock can change when entered into this function.
+
+Before this function is called, the task has already had rt_mutex_adjust_prio
+performed on it.  This means that the task is set to the priority that it
+should be at, but the rbtree nodes of the task's waiter have not been updated
+with the new priorities, and this task may not be in the proper locations
+in the pi_waiters and waiters trees that the task is blocked on. This function
+solves all that.
+
+The main operation of this function is summarized by Thomas Gleixner in
+rtmutex.c. See the 'Chain walk basics and protection scope' comment for further
+details.
+
+Taking of a mutex (The walk through)
+------------------------------------
+
+OK, now let's take a look at the detailed walk through of what happens when
+taking a mutex.
+
+The first thing that is tried is the fast taking of the mutex.  This is
+done when we have CMPXCHG enabled (otherwise the fast taking automatically
+fails).  Only when the owner field of the mutex is NULL can the lock be
+taken with the CMPXCHG and nothing else needs to be done.
+
+If there is contention on the lock, we go about the slow path
+(rt_mutex_slowlock).
+
+The slow path function is where the task's waiter structure is created on
+the stack.  This is because the waiter structure is only needed for the
+scope of this function.  The waiter structure holds the nodes to store
+the task on the waiters tree of the mutex, and if need be, the pi_waiters
+tree of the owner.
+
+The wait_lock of the mutex is taken since the slow path of unlocking the
+mutex also takes this lock.
+
+We then call try_to_take_rt_mutex.  This is where the architecture that
+does not implement CMPXCHG would always grab the lock (if there's no
+contention).
+
+try_to_take_rt_mutex is used every time the task tries to grab a mutex in the
+slow path.  The first thing that is done here is an atomic setting of
+the "Has Waiters" flag of the mutex's owner field. By setting this flag
+now, the current owner of the mutex being contended for can't release the mutex
+without going into the slow unlock path, and it would then need to grab the
+wait_lock, which this code currently holds. So setting the "Has Waiters" flag
+forces the current owner to synchronize with this code.
+
+The lock is taken if the following are true:
+
+   1) The lock has no owner
+   2) The current task is the highest priority against all other
+      waiters of the lock
+
+If the task succeeds to acquire the lock, then the task is set as the
+owner of the lock, and if the lock still has waiters, the top_waiter
+(highest priority task waiting on the lock) is added to this task's
+pi_waiters tree.
+
+If the lock is not taken by try_to_take_rt_mutex(), then the
+task_blocks_on_rt_mutex() function is called. This will add the task to
+the lock's waiter tree and propagate the pi chain of the lock as well
+as the lock's owner's pi_waiters tree. This is described in the next
+section.
+
+Task blocks on mutex
+--------------------
+
+The accounting of a mutex and process is done with the waiter structure of
+the process.  The "task" field is set to the process, and the "lock" field
+to the mutex.  The rbtree node of waiter are initialized to the processes
+current priority.
+
+Since the wait_lock was taken at the entry of the slow lock, we can safely
+add the waiter to the task waiter tree.  If the current process is the
+highest priority process currently waiting on this mutex, then we remove the
+previous top waiter process (if it exists) from the pi_waiters of the owner,
+and add the current process to that tree.  Since the pi_waiter of the owner
+has changed, we call rt_mutex_adjust_prio on the owner to see if the owner
+should adjust its priority accordingly.
+
+If the owner is also blocked on a lock, and had its pi_waiters changed
+(or deadlock checking is on), we unlock the wait_lock of the mutex and go ahead
+and run rt_mutex_adjust_prio_chain on the owner, as described earlier.
+
+Now all locks are released, and if the current process is still blocked on a
+mutex (waiter "task" field is not NULL), then we go to sleep (call schedule).
+
+Waking up in the loop
+---------------------
+
+The task can then wake up for a couple of reasons:
+  1) The previous lock owner released the lock, and the task now is top_waiter
+  2) we received a signal or timeout
+
+In both cases, the task will try again to acquire the lock. If it
+does, then it will take itself off the waiters tree and set itself back
+to the TASK_RUNNING state.
+
+In first case, if the lock was acquired by another task before this task
+could get the lock, then it will go back to sleep and wait to be woken again.
+
+The second case is only applicable for tasks that are grabbing a mutex
+that can wake up before getting the lock, either due to a signal or
+a timeout (i.e. rt_mutex_timed_futex_lock()). When woken, it will try to
+take the lock again, if it succeeds, then the task will return with the
+lock held, otherwise it will return with -EINTR if the task was woken
+by a signal, or -ETIMEDOUT if it timed out.
+
+
+Unlocking the Mutex
+-------------------
+
+The unlocking of a mutex also has a fast path for those architectures with
+CMPXCHG.  Since the taking of a mutex on contention always sets the
+"Has Waiters" flag of the mutex's owner, we use this to know if we need to
+take the slow path when unlocking the mutex.  If the mutex doesn't have any
+waiters, the owner field of the mutex would equal the current process and
+the mutex can be unlocked by just replacing the owner field with NULL.
+
+If the owner field has the "Has Waiters" bit set (or CMPXCHG is not available),
+the slow unlock path is taken.
+
+The first thing done in the slow unlock path is to take the wait_lock of the
+mutex.  This synchronizes the locking and unlocking of the mutex.
+
+A check is made to see if the mutex has waiters or not.  On architectures that
+do not have CMPXCHG, this is the location that the owner of the mutex will
+determine if a waiter needs to be awoken or not.  On architectures that
+do have CMPXCHG, that check is done in the fast path, but it is still needed
+in the slow path too.  If a waiter of a mutex woke up because of a signal
+or timeout between the time the owner failed the fast path CMPXCHG check and
+the grabbing of the wait_lock, the mutex may not have any waiters, thus the
+owner still needs to make this check. If there are no waiters then the mutex
+owner field is set to NULL, the wait_lock is released and nothing more is
+needed.
+
+If there are waiters, then we need to wake one up.
+
+On the wake up code, the pi_lock of the current owner is taken.  The top
+waiter of the lock is found and removed from the waiters tree of the mutex
+as well as the pi_waiters tree of the current owner. The "Has Waiters" bit is
+marked to prevent lower priority tasks from stealing the lock.
+
+Finally we unlock the pi_lock of the pending owner and wake it up.
+
+
+Contact
+-------
+
+For updates on this document, please email Steven Rostedt <rostedt@goodmis.org>
+
+
+Credits
+-------
+
+Author:  Steven Rostedt <rostedt@goodmis.org>
+
+Updated: Alex Shi <alex.shi@linaro.org>	- 7/6/2017
+
+Original Reviewers:
+		     Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and
+		     Randy Dunlap
+
+Update (7/6/2017) Reviewers: Steven Rostedt and Sebastian Siewior
+
+Updates
+-------
+
+This document was originally written for 2.6.17-rc3-mm1
+was updated on 4.12
diff --git a/Documentation/locking/rt-mutex-design.txt b/Documentation/locking/rt-mutex-design.txt
deleted file mode 100644
index 3d7b865539cc..000000000000
--- a/Documentation/locking/rt-mutex-design.txt
+++ /dev/null
@@ -1,559 +0,0 @@
-#
-# Copyright (c) 2006 Steven Rostedt
-# Licensed under the GNU Free Documentation License, Version 1.2
-#
-
-RT-mutex implementation design
-------------------------------
-
-This document tries to describe the design of the rtmutex.c implementation.
-It doesn't describe the reasons why rtmutex.c exists. For that please see
-Documentation/locking/rt-mutex.txt.  Although this document does explain problems
-that happen without this code, but that is in the concept to understand
-what the code actually is doing.
-
-The goal of this document is to help others understand the priority
-inheritance (PI) algorithm that is used, as well as reasons for the
-decisions that were made to implement PI in the manner that was done.
-
-
-Unbounded Priority Inversion
-----------------------------
-
-Priority inversion is when a lower priority process executes while a higher
-priority process wants to run.  This happens for several reasons, and
-most of the time it can't be helped.  Anytime a high priority process wants
-to use a resource that a lower priority process has (a mutex for example),
-the high priority process must wait until the lower priority process is done
-with the resource.  This is a priority inversion.  What we want to prevent
-is something called unbounded priority inversion.  That is when the high
-priority process is prevented from running by a lower priority process for
-an undetermined amount of time.
-
-The classic example of unbounded priority inversion is where you have three
-processes, let's call them processes A, B, and C, where A is the highest
-priority process, C is the lowest, and B is in between. A tries to grab a lock
-that C owns and must wait and lets C run to release the lock. But in the
-meantime, B executes, and since B is of a higher priority than C, it preempts C,
-but by doing so, it is in fact preempting A which is a higher priority process.
-Now there's no way of knowing how long A will be sleeping waiting for C
-to release the lock, because for all we know, B is a CPU hog and will
-never give C a chance to release the lock.  This is called unbounded priority
-inversion.
-
-Here's a little ASCII art to show the problem.
-
-   grab lock L1 (owned by C)
-     |
-A ---+
-        C preempted by B
-          |
-C    +----+
-
-B         +-------->
-                B now keeps A from running.
-
-
-Priority Inheritance (PI)
--------------------------
-
-There are several ways to solve this issue, but other ways are out of scope
-for this document.  Here we only discuss PI.
-
-PI is where a process inherits the priority of another process if the other
-process blocks on a lock owned by the current process.  To make this easier
-to understand, let's use the previous example, with processes A, B, and C again.
-
-This time, when A blocks on the lock owned by C, C would inherit the priority
-of A.  So now if B becomes runnable, it would not preempt C, since C now has
-the high priority of A.  As soon as C releases the lock, it loses its
-inherited priority, and A then can continue with the resource that C had.
-
-Terminology
------------
-
-Here I explain some terminology that is used in this document to help describe
-the design that is used to implement PI.
-
-PI chain - The PI chain is an ordered series of locks and processes that cause
-           processes to inherit priorities from a previous process that is
-           blocked on one of its locks.  This is described in more detail
-           later in this document.
-
-mutex    - In this document, to differentiate from locks that implement
-           PI and spin locks that are used in the PI code, from now on
-           the PI locks will be called a mutex.
-
-lock     - In this document from now on, I will use the term lock when
-           referring to spin locks that are used to protect parts of the PI
-           algorithm.  These locks disable preemption for UP (when
-           CONFIG_PREEMPT is enabled) and on SMP prevents multiple CPUs from
-           entering critical sections simultaneously.
-
-spin lock - Same as lock above.
-
-waiter   - A waiter is a struct that is stored on the stack of a blocked
-           process.  Since the scope of the waiter is within the code for
-           a process being blocked on the mutex, it is fine to allocate
-           the waiter on the process's stack (local variable).  This
-           structure holds a pointer to the task, as well as the mutex that
-           the task is blocked on.  It also has rbtree node structures to
-           place the task in the waiters rbtree of a mutex as well as the
-           pi_waiters rbtree of a mutex owner task (described below).
-
-           waiter is sometimes used in reference to the task that is waiting
-           on a mutex. This is the same as waiter->task.
-
-waiters  - A list of processes that are blocked on a mutex.
-
-top waiter - The highest priority process waiting on a specific mutex.
-
-top pi waiter - The highest priority process waiting on one of the mutexes
-                that a specific process owns.
-
-Note:  task and process are used interchangeably in this document, mostly to
-       differentiate between two processes that are being described together.
-
-
-PI chain
---------
-
-The PI chain is a list of processes and mutexes that may cause priority
-inheritance to take place.  Multiple chains may converge, but a chain
-would never diverge, since a process can't be blocked on more than one
-mutex at a time.
-
-Example:
-
-   Process:  A, B, C, D, E
-   Mutexes:  L1, L2, L3, L4
-
-   A owns: L1
-           B blocked on L1
-           B owns L2
-                  C blocked on L2
-                  C owns L3
-                         D blocked on L3
-                         D owns L4
-                                E blocked on L4
-
-The chain would be:
-
-   E->L4->D->L3->C->L2->B->L1->A
-
-To show where two chains merge, we could add another process F and
-another mutex L5 where B owns L5 and F is blocked on mutex L5.
-
-The chain for F would be:
-
-   F->L5->B->L1->A
-
-Since a process may own more than one mutex, but never be blocked on more than
-one, the chains merge.
-
-Here we show both chains:
-
-   E->L4->D->L3->C->L2-+
-                       |
-                       +->B->L1->A
-                       |
-                 F->L5-+
-
-For PI to work, the processes at the right end of these chains (or we may
-also call it the Top of the chain) must be equal to or higher in priority
-than the processes to the left or below in the chain.
-
-Also since a mutex may have more than one process blocked on it, we can
-have multiple chains merge at mutexes.  If we add another process G that is
-blocked on mutex L2:
-
-  G->L2->B->L1->A
-
-And once again, to show how this can grow I will show the merging chains
-again.
-
-   E->L4->D->L3->C-+
-                   +->L2-+
-                   |     |
-                 G-+     +->B->L1->A
-                         |
-                   F->L5-+
-
-If process G has the highest priority in the chain, then all the tasks up
-the chain (A and B in this example), must have their priorities increased
-to that of G.
-
-Mutex Waiters Tree
------------------
-
-Every mutex keeps track of all the waiters that are blocked on itself. The
-mutex has a rbtree to store these waiters by priority.  This tree is protected
-by a spin lock that is located in the struct of the mutex. This lock is called
-wait_lock.
-
-
-Task PI Tree
-------------
-
-To keep track of the PI chains, each process has its own PI rbtree.  This is
-a tree of all top waiters of the mutexes that are owned by the process.
-Note that this tree only holds the top waiters and not all waiters that are
-blocked on mutexes owned by the process.
-
-The top of the task's PI tree is always the highest priority task that
-is waiting on a mutex that is owned by the task.  So if the task has
-inherited a priority, it will always be the priority of the task that is
-at the top of this tree.
-
-This tree is stored in the task structure of a process as a rbtree called
-pi_waiters.  It is protected by a spin lock also in the task structure,
-called pi_lock.  This lock may also be taken in interrupt context, so when
-locking the pi_lock, interrupts must be disabled.
-
-
-Depth of the PI Chain
----------------------
-
-The maximum depth of the PI chain is not dynamic, and could actually be
-defined.  But is very complex to figure it out, since it depends on all
-the nesting of mutexes.  Let's look at the example where we have 3 mutexes,
-L1, L2, and L3, and four separate functions func1, func2, func3 and func4.
-The following shows a locking order of L1->L2->L3, but may not actually
-be directly nested that way.
-
-void func1(void)
-{
-	mutex_lock(L1);
-
-	/* do anything */
-
-	mutex_unlock(L1);
-}
-
-void func2(void)
-{
-	mutex_lock(L1);
-	mutex_lock(L2);
-
-	/* do something */
-
-	mutex_unlock(L2);
-	mutex_unlock(L1);
-}
-
-void func3(void)
-{
-	mutex_lock(L2);
-	mutex_lock(L3);
-
-	/* do something else */
-
-	mutex_unlock(L3);
-	mutex_unlock(L2);
-}
-
-void func4(void)
-{
-	mutex_lock(L3);
-
-	/* do something again */
-
-	mutex_unlock(L3);
-}
-
-Now we add 4 processes that run each of these functions separately.
-Processes A, B, C, and D which run functions func1, func2, func3 and func4
-respectively, and such that D runs first and A last.  With D being preempted
-in func4 in the "do something again" area, we have a locking that follows:
-
-D owns L3
-       C blocked on L3
-       C owns L2
-              B blocked on L2
-              B owns L1
-                     A blocked on L1
-
-And thus we have the chain A->L1->B->L2->C->L3->D.
-
-This gives us a PI depth of 4 (four processes), but looking at any of the
-functions individually, it seems as though they only have at most a locking
-depth of two.  So, although the locking depth is defined at compile time,
-it still is very difficult to find the possibilities of that depth.
-
-Now since mutexes can be defined by user-land applications, we don't want a DOS
-type of application that nests large amounts of mutexes to create a large
-PI chain, and have the code holding spin locks while looking at a large
-amount of data.  So to prevent this, the implementation not only implements
-a maximum lock depth, but also only holds at most two different locks at a
-time, as it walks the PI chain.  More about this below.
-
-
-Mutex owner and flags
----------------------
-
-The mutex structure contains a pointer to the owner of the mutex.  If the
-mutex is not owned, this owner is set to NULL.  Since all architectures
-have the task structure on at least a two byte alignment (and if this is
-not true, the rtmutex.c code will be broken!), this allows for the least
-significant bit to be used as a flag.  Bit 0 is used as the "Has Waiters"
-flag. It's set whenever there are waiters on a mutex.
-
-See Documentation/locking/rt-mutex.txt for further details.
-
-cmpxchg Tricks
---------------
-
-Some architectures implement an atomic cmpxchg (Compare and Exchange).  This
-is used (when applicable) to keep the fast path of grabbing and releasing
-mutexes short.
-
-cmpxchg is basically the following function performed atomically:
-
-unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C)
-{
-	unsigned long T = *A;
-	if (*A == *B) {
-		*A = *C;
-	}
-	return T;
-}
-#define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c)
-
-This is really nice to have, since it allows you to only update a variable
-if the variable is what you expect it to be.  You know if it succeeded if
-the return value (the old value of A) is equal to B.
-
-The macro rt_mutex_cmpxchg is used to try to lock and unlock mutexes. If
-the architecture does not support CMPXCHG, then this macro is simply set
-to fail every time.  But if CMPXCHG is supported, then this will
-help out extremely to keep the fast path short.
-
-The use of rt_mutex_cmpxchg with the flags in the owner field help optimize
-the system for architectures that support it.  This will also be explained
-later in this document.
-
-
-Priority adjustments
---------------------
-
-The implementation of the PI code in rtmutex.c has several places that a
-process must adjust its priority.  With the help of the pi_waiters of a
-process this is rather easy to know what needs to be adjusted.
-
-The functions implementing the task adjustments are rt_mutex_adjust_prio
-and rt_mutex_setprio. rt_mutex_setprio is only used in rt_mutex_adjust_prio.
-
-rt_mutex_adjust_prio examines the priority of the task, and the highest
-priority process that is waiting any of mutexes owned by the task. Since
-the pi_waiters of a task holds an order by priority of all the top waiters
-of all the mutexes that the task owns, we simply need to compare the top
-pi waiter to its own normal/deadline priority and take the higher one.
-Then rt_mutex_setprio is called to adjust the priority of the task to the
-new priority. Note that rt_mutex_setprio is defined in kernel/sched/core.c
-to implement the actual change in priority.
-
-(Note:  For the "prio" field in task_struct, the lower the number, the
-	higher the priority. A "prio" of 5 is of higher priority than a
-	"prio" of 10.)
-
-It is interesting to note that rt_mutex_adjust_prio can either increase
-or decrease the priority of the task.  In the case that a higher priority
-process has just blocked on a mutex owned by the task, rt_mutex_adjust_prio
-would increase/boost the task's priority.  But if a higher priority task
-were for some reason to leave the mutex (timeout or signal), this same function
-would decrease/unboost the priority of the task.  That is because the pi_waiters
-always contains the highest priority task that is waiting on a mutex owned
-by the task, so we only need to compare the priority of that top pi waiter
-to the normal priority of the given task.
-
-
-High level overview of the PI chain walk
-----------------------------------------
-
-The PI chain walk is implemented by the function rt_mutex_adjust_prio_chain.
-
-The implementation has gone through several iterations, and has ended up
-with what we believe is the best.  It walks the PI chain by only grabbing
-at most two locks at a time, and is very efficient.
-
-The rt_mutex_adjust_prio_chain can be used either to boost or lower process
-priorities.
-
-rt_mutex_adjust_prio_chain is called with a task to be checked for PI
-(de)boosting (the owner of a mutex that a process is blocking on), a flag to
-check for deadlocking, the mutex that the task owns, a pointer to a waiter
-that is the process's waiter struct that is blocked on the mutex (although this
-parameter may be NULL for deboosting), a pointer to the mutex on which the task
-is blocked, and a top_task as the top waiter of the mutex.
-
-For this explanation, I will not mention deadlock detection. This explanation
-will try to stay at a high level.
-
-When this function is called, there are no locks held.  That also means
-that the state of the owner and lock can change when entered into this function.
-
-Before this function is called, the task has already had rt_mutex_adjust_prio
-performed on it.  This means that the task is set to the priority that it
-should be at, but the rbtree nodes of the task's waiter have not been updated
-with the new priorities, and this task may not be in the proper locations
-in the pi_waiters and waiters trees that the task is blocked on. This function
-solves all that.
-
-The main operation of this function is summarized by Thomas Gleixner in
-rtmutex.c. See the 'Chain walk basics and protection scope' comment for further
-details.
-
-Taking of a mutex (The walk through)
-------------------------------------
-
-OK, now let's take a look at the detailed walk through of what happens when
-taking a mutex.
-
-The first thing that is tried is the fast taking of the mutex.  This is
-done when we have CMPXCHG enabled (otherwise the fast taking automatically
-fails).  Only when the owner field of the mutex is NULL can the lock be
-taken with the CMPXCHG and nothing else needs to be done.
-
-If there is contention on the lock, we go about the slow path
-(rt_mutex_slowlock).
-
-The slow path function is where the task's waiter structure is created on
-the stack.  This is because the waiter structure is only needed for the
-scope of this function.  The waiter structure holds the nodes to store
-the task on the waiters tree of the mutex, and if need be, the pi_waiters
-tree of the owner.
-
-The wait_lock of the mutex is taken since the slow path of unlocking the
-mutex also takes this lock.
-
-We then call try_to_take_rt_mutex.  This is where the architecture that
-does not implement CMPXCHG would always grab the lock (if there's no
-contention).
-
-try_to_take_rt_mutex is used every time the task tries to grab a mutex in the
-slow path.  The first thing that is done here is an atomic setting of
-the "Has Waiters" flag of the mutex's owner field. By setting this flag
-now, the current owner of the mutex being contended for can't release the mutex
-without going into the slow unlock path, and it would then need to grab the
-wait_lock, which this code currently holds. So setting the "Has Waiters" flag
-forces the current owner to synchronize with this code.
-
-The lock is taken if the following are true:
-   1) The lock has no owner
-   2) The current task is the highest priority against all other
-      waiters of the lock
-
-If the task succeeds to acquire the lock, then the task is set as the
-owner of the lock, and if the lock still has waiters, the top_waiter
-(highest priority task waiting on the lock) is added to this task's
-pi_waiters tree.
-
-If the lock is not taken by try_to_take_rt_mutex(), then the
-task_blocks_on_rt_mutex() function is called. This will add the task to
-the lock's waiter tree and propagate the pi chain of the lock as well
-as the lock's owner's pi_waiters tree. This is described in the next
-section.
-
-Task blocks on mutex
---------------------
-
-The accounting of a mutex and process is done with the waiter structure of
-the process.  The "task" field is set to the process, and the "lock" field
-to the mutex.  The rbtree node of waiter are initialized to the processes
-current priority.
-
-Since the wait_lock was taken at the entry of the slow lock, we can safely
-add the waiter to the task waiter tree.  If the current process is the
-highest priority process currently waiting on this mutex, then we remove the
-previous top waiter process (if it exists) from the pi_waiters of the owner,
-and add the current process to that tree.  Since the pi_waiter of the owner
-has changed, we call rt_mutex_adjust_prio on the owner to see if the owner
-should adjust its priority accordingly.
-
-If the owner is also blocked on a lock, and had its pi_waiters changed
-(or deadlock checking is on), we unlock the wait_lock of the mutex and go ahead
-and run rt_mutex_adjust_prio_chain on the owner, as described earlier.
-
-Now all locks are released, and if the current process is still blocked on a
-mutex (waiter "task" field is not NULL), then we go to sleep (call schedule).
-
-Waking up in the loop
----------------------
-
-The task can then wake up for a couple of reasons:
-  1) The previous lock owner released the lock, and the task now is top_waiter
-  2) we received a signal or timeout
-
-In both cases, the task will try again to acquire the lock. If it
-does, then it will take itself off the waiters tree and set itself back
-to the TASK_RUNNING state.
-
-In first case, if the lock was acquired by another task before this task
-could get the lock, then it will go back to sleep and wait to be woken again.
-
-The second case is only applicable for tasks that are grabbing a mutex
-that can wake up before getting the lock, either due to a signal or
-a timeout (i.e. rt_mutex_timed_futex_lock()). When woken, it will try to
-take the lock again, if it succeeds, then the task will return with the
-lock held, otherwise it will return with -EINTR if the task was woken
-by a signal, or -ETIMEDOUT if it timed out.
-
-
-Unlocking the Mutex
--------------------
-
-The unlocking of a mutex also has a fast path for those architectures with
-CMPXCHG.  Since the taking of a mutex on contention always sets the
-"Has Waiters" flag of the mutex's owner, we use this to know if we need to
-take the slow path when unlocking the mutex.  If the mutex doesn't have any
-waiters, the owner field of the mutex would equal the current process and
-the mutex can be unlocked by just replacing the owner field with NULL.
-
-If the owner field has the "Has Waiters" bit set (or CMPXCHG is not available),
-the slow unlock path is taken.
-
-The first thing done in the slow unlock path is to take the wait_lock of the
-mutex.  This synchronizes the locking and unlocking of the mutex.
-
-A check is made to see if the mutex has waiters or not.  On architectures that
-do not have CMPXCHG, this is the location that the owner of the mutex will
-determine if a waiter needs to be awoken or not.  On architectures that
-do have CMPXCHG, that check is done in the fast path, but it is still needed
-in the slow path too.  If a waiter of a mutex woke up because of a signal
-or timeout between the time the owner failed the fast path CMPXCHG check and
-the grabbing of the wait_lock, the mutex may not have any waiters, thus the
-owner still needs to make this check. If there are no waiters then the mutex
-owner field is set to NULL, the wait_lock is released and nothing more is
-needed.
-
-If there are waiters, then we need to wake one up.
-
-On the wake up code, the pi_lock of the current owner is taken.  The top
-waiter of the lock is found and removed from the waiters tree of the mutex
-as well as the pi_waiters tree of the current owner. The "Has Waiters" bit is
-marked to prevent lower priority tasks from stealing the lock.
-
-Finally we unlock the pi_lock of the pending owner and wake it up.
-
-
-Contact
--------
-
-For updates on this document, please email Steven Rostedt <rostedt@goodmis.org>
-
-
-Credits
--------
-
-Author:  Steven Rostedt <rostedt@goodmis.org>
-Updated: Alex Shi <alex.shi@linaro.org>	- 7/6/2017
-
-Original Reviewers:  Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and
-		     Randy Dunlap
-Update (7/6/2017) Reviewers: Steven Rostedt and Sebastian Siewior
-
-Updates
--------
-
-This document was originally written for 2.6.17-rc3-mm1
-was updated on 4.12
diff --git a/Documentation/locking/rt-mutex.rst b/Documentation/locking/rt-mutex.rst
new file mode 100644
index 000000000000..c365dc302081
--- /dev/null
+++ b/Documentation/locking/rt-mutex.rst
@@ -0,0 +1,77 @@
+==================================
+RT-mutex subsystem with PI support
+==================================
+
+RT-mutexes with priority inheritance are used to support PI-futexes,
+which enable pthread_mutex_t priority inheritance attributes
+(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details
+about PI-futexes.]
+
+This technology was developed in the -rt tree and streamlined for
+pthread_mutex support.
+
+Basic principles:
+-----------------
+
+RT-mutexes extend the semantics of simple mutexes by the priority
+inheritance protocol.
+
+A low priority owner of a rt-mutex inherits the priority of a higher
+priority waiter until the rt-mutex is released. If the temporarily
+boosted owner blocks on a rt-mutex itself it propagates the priority
+boosting to the owner of the other rt_mutex it gets blocked on. The
+priority boosting is immediately removed once the rt_mutex has been
+unlocked.
+
+This approach allows us to shorten the block of high-prio tasks on
+mutexes which protect shared resources. Priority inheritance is not a
+magic bullet for poorly designed applications, but it allows
+well-designed applications to use userspace locks in critical parts of
+an high priority thread, without losing determinism.
+
+The enqueueing of the waiters into the rtmutex waiter tree is done in
+priority order. For same priorities FIFO order is chosen. For each
+rtmutex, only the top priority waiter is enqueued into the owner's
+priority waiters tree. This tree too queues in priority order. Whenever
+the top priority waiter of a task changes (for example it timed out or
+got a signal), the priority of the owner task is readjusted. The
+priority enqueueing is handled by "pi_waiters".
+
+RT-mutexes are optimized for fastpath operations and have no internal
+locking overhead when locking an uncontended mutex or unlocking a mutex
+without waiters. The optimized fastpath operations require cmpxchg
+support. [If that is not available then the rt-mutex internal spinlock
+is used]
+
+The state of the rt-mutex is tracked via the owner field of the rt-mutex
+structure:
+
+lock->owner holds the task_struct pointer of the owner. Bit 0 is used to
+keep track of the "lock has waiters" state:
+
+ ============ ======= ================================================
+ owner        bit0    Notes
+ ============ ======= ================================================
+ NULL         0       lock is free (fast acquire possible)
+ NULL         1       lock is free and has waiters and the top waiter
+		      is going to take the lock [1]_
+ taskpointer  0       lock is held (fast release possible)
+ taskpointer  1       lock is held and has waiters [2]_
+ ============ ======= ================================================
+
+The fast atomic compare exchange based acquire and release is only
+possible when bit 0 of lock->owner is 0.
+
+.. [1] It also can be a transitional state when grabbing the lock
+       with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
+       we need to set the bit0 before looking at the lock, and the owner may
+       be NULL in this small time, hence this can be a transitional state.
+
+.. [2] There is a small time when bit 0 is set but there are no
+       waiters. This can happen when grabbing the lock in the slow path.
+       To prevent a cmpxchg of the owner releasing the lock, we need to
+       set this bit before looking at the lock.
+
+BTW, there is still technically a "Pending Owner", it's just not called
+that anymore. The pending owner happens to be the top_waiter of a lock
+that has no owner and has been woken up to grab the lock.
diff --git a/Documentation/locking/rt-mutex.txt b/Documentation/locking/rt-mutex.txt
deleted file mode 100644
index 35793e003041..000000000000
--- a/Documentation/locking/rt-mutex.txt
+++ /dev/null
@@ -1,73 +0,0 @@
-RT-mutex subsystem with PI support
-----------------------------------
-
-RT-mutexes with priority inheritance are used to support PI-futexes,
-which enable pthread_mutex_t priority inheritance attributes
-(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details
-about PI-futexes.]
-
-This technology was developed in the -rt tree and streamlined for
-pthread_mutex support.
-
-Basic principles:
------------------
-
-RT-mutexes extend the semantics of simple mutexes by the priority
-inheritance protocol.
-
-A low priority owner of a rt-mutex inherits the priority of a higher
-priority waiter until the rt-mutex is released. If the temporarily
-boosted owner blocks on a rt-mutex itself it propagates the priority
-boosting to the owner of the other rt_mutex it gets blocked on. The
-priority boosting is immediately removed once the rt_mutex has been
-unlocked.
-
-This approach allows us to shorten the block of high-prio tasks on
-mutexes which protect shared resources. Priority inheritance is not a
-magic bullet for poorly designed applications, but it allows
-well-designed applications to use userspace locks in critical parts of
-an high priority thread, without losing determinism.
-
-The enqueueing of the waiters into the rtmutex waiter tree is done in
-priority order. For same priorities FIFO order is chosen. For each
-rtmutex, only the top priority waiter is enqueued into the owner's
-priority waiters tree. This tree too queues in priority order. Whenever
-the top priority waiter of a task changes (for example it timed out or
-got a signal), the priority of the owner task is readjusted. The
-priority enqueueing is handled by "pi_waiters".
-
-RT-mutexes are optimized for fastpath operations and have no internal
-locking overhead when locking an uncontended mutex or unlocking a mutex
-without waiters. The optimized fastpath operations require cmpxchg
-support. [If that is not available then the rt-mutex internal spinlock
-is used]
-
-The state of the rt-mutex is tracked via the owner field of the rt-mutex
-structure:
-
-lock->owner holds the task_struct pointer of the owner. Bit 0 is used to
-keep track of the "lock has waiters" state.
-
- owner        bit0
- NULL         0       lock is free (fast acquire possible)
- NULL         1       lock is free and has waiters and the top waiter
-			is going to take the lock*
- taskpointer  0       lock is held (fast release possible)
- taskpointer  1       lock is held and has waiters**
-
-The fast atomic compare exchange based acquire and release is only
-possible when bit 0 of lock->owner is 0.
-
-(*) It also can be a transitional state when grabbing the lock
-with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
-we need to set the bit0 before looking at the lock, and the owner may be
-NULL in this small time, hence this can be a transitional state.
-
-(**) There is a small time when bit 0 is set but there are no
-waiters. This can happen when grabbing the lock in the slow path.
-To prevent a cmpxchg of the owner releasing the lock, we need to
-set this bit before looking at the lock.
-
-BTW, there is still technically a "Pending Owner", it's just not called
-that anymore. The pending owner happens to be the top_waiter of a lock
-that has no owner and has been woken up to grab the lock.
diff --git a/Documentation/locking/spinlocks.rst b/Documentation/locking/spinlocks.rst
new file mode 100644
index 000000000000..098107fb7d86
--- /dev/null
+++ b/Documentation/locking/spinlocks.rst
@@ -0,0 +1,177 @@
+===============
+Locking lessons
+===============
+
+Lesson 1: Spin locks
+====================
+
+The most basic primitive for locking is spinlock::
+
+  static DEFINE_SPINLOCK(xxx_lock);
+
+	unsigned long flags;
+
+	spin_lock_irqsave(&xxx_lock, flags);
+	... critical section here ..
+	spin_unlock_irqrestore(&xxx_lock, flags);
+
+The above is always safe. It will disable interrupts _locally_, but the
+spinlock itself will guarantee the global lock, so it will guarantee that
+there is only one thread-of-control within the region(s) protected by that
+lock. This works well even under UP also, so the code does _not_ need to
+worry about UP vs SMP issues: the spinlocks work correctly under both.
+
+   NOTE! Implications of spin_locks for memory are further described in:
+
+     Documentation/memory-barriers.txt
+
+       (5) LOCK operations.
+
+       (6) UNLOCK operations.
+
+The above is usually pretty simple (you usually need and want only one
+spinlock for most things - using more than one spinlock can make things a
+lot more complex and even slower and is usually worth it only for
+sequences that you **know** need to be split up: avoid it at all cost if you
+aren't sure).
+
+This is really the only really hard part about spinlocks: once you start
+using spinlocks they tend to expand to areas you might not have noticed
+before, because you have to make sure the spinlocks correctly protect the
+shared data structures **everywhere** they are used. The spinlocks are most
+easily added to places that are completely independent of other code (for
+example, internal driver data structures that nobody else ever touches).
+
+   NOTE! The spin-lock is safe only when you **also** use the lock itself
+   to do locking across CPU's, which implies that EVERYTHING that
+   touches a shared variable has to agree about the spinlock they want
+   to use.
+
+----
+
+Lesson 2: reader-writer spinlocks.
+==================================
+
+If your data accesses have a very natural pattern where you usually tend
+to mostly read from the shared variables, the reader-writer locks
+(rw_lock) versions of the spinlocks are sometimes useful. They allow multiple
+readers to be in the same critical region at once, but if somebody wants
+to change the variables it has to get an exclusive write lock.
+
+   NOTE! reader-writer locks require more atomic memory operations than
+   simple spinlocks.  Unless the reader critical section is long, you
+   are better off just using spinlocks.
+
+The routines look the same as above::
+
+   rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
+
+	unsigned long flags;
+
+	read_lock_irqsave(&xxx_lock, flags);
+	.. critical section that only reads the info ...
+	read_unlock_irqrestore(&xxx_lock, flags);
+
+	write_lock_irqsave(&xxx_lock, flags);
+	.. read and write exclusive access to the info ...
+	write_unlock_irqrestore(&xxx_lock, flags);
+
+The above kind of lock may be useful for complex data structures like
+linked lists, especially searching for entries without changing the list
+itself.  The read lock allows many concurrent readers.  Anything that
+**changes** the list will have to get the write lock.
+
+   NOTE! RCU is better for list traversal, but requires careful
+   attention to design detail (see Documentation/RCU/listRCU.txt).
+
+Also, you cannot "upgrade" a read-lock to a write-lock, so if you at _any_
+time need to do any changes (even if you don't do it every time), you have
+to get the write-lock at the very beginning.
+
+   NOTE! We are working hard to remove reader-writer spinlocks in most
+   cases, so please don't add a new one without consensus.  (Instead, see
+   Documentation/RCU/rcu.txt for complete information.)
+
+----
+
+Lesson 3: spinlocks revisited.
+==============================
+
+The single spin-lock primitives above are by no means the only ones. They
+are the most safe ones, and the ones that work under all circumstances,
+but partly **because** they are safe they are also fairly slow. They are slower
+than they'd need to be, because they do have to disable interrupts
+(which is just a single instruction on a x86, but it's an expensive one -
+and on other architectures it can be worse).
+
+If you have a case where you have to protect a data structure across
+several CPU's and you want to use spinlocks you can potentially use
+cheaper versions of the spinlocks. IFF you know that the spinlocks are
+never used in interrupt handlers, you can use the non-irq versions::
+
+	spin_lock(&lock);
+	...
+	spin_unlock(&lock);
+
+(and the equivalent read-write versions too, of course). The spinlock will
+guarantee the same kind of exclusive access, and it will be much faster.
+This is useful if you know that the data in question is only ever
+manipulated from a "process context", ie no interrupts involved.
+
+The reasons you mustn't use these versions if you have interrupts that
+play with the spinlock is that you can get deadlocks::
+
+	spin_lock(&lock);
+	...
+		<- interrupt comes in:
+			spin_lock(&lock);
+
+where an interrupt tries to lock an already locked variable. This is ok if
+the other interrupt happens on another CPU, but it is _not_ ok if the
+interrupt happens on the same CPU that already holds the lock, because the
+lock will obviously never be released (because the interrupt is waiting
+for the lock, and the lock-holder is interrupted by the interrupt and will
+not continue until the interrupt has been processed).
+
+(This is also the reason why the irq-versions of the spinlocks only need
+to disable the _local_ interrupts - it's ok to use spinlocks in interrupts
+on other CPU's, because an interrupt on another CPU doesn't interrupt the
+CPU that holds the lock, so the lock-holder can continue and eventually
+releases the lock).
+
+Note that you can be clever with read-write locks and interrupts. For
+example, if you know that the interrupt only ever gets a read-lock, then
+you can use a non-irq version of read locks everywhere - because they
+don't block on each other (and thus there is no dead-lock wrt interrupts.
+But when you do the write-lock, you have to use the irq-safe version.
+
+For an example of being clever with rw-locks, see the "waitqueue_lock"
+handling in kernel/sched/core.c - nothing ever _changes_ a wait-queue from
+within an interrupt, they only read the queue in order to know whom to
+wake up. So read-locks are safe (which is good: they are very common
+indeed), while write-locks need to protect themselves against interrupts.
+
+		Linus
+
+----
+
+Reference information:
+======================
+
+For dynamic initialization, use spin_lock_init() or rwlock_init() as
+appropriate::
+
+   spinlock_t xxx_lock;
+   rwlock_t xxx_rw_lock;
+
+   static int __init xxx_init(void)
+   {
+	spin_lock_init(&xxx_lock);
+	rwlock_init(&xxx_rw_lock);
+	...
+   }
+
+   module_init(xxx_init);
+
+For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
+__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
diff --git a/Documentation/locking/spinlocks.txt b/Documentation/locking/spinlocks.txt
deleted file mode 100644
index ff35e40bdf5b..000000000000
--- a/Documentation/locking/spinlocks.txt
+++ /dev/null
@@ -1,167 +0,0 @@
-Lesson 1: Spin locks
-
-The most basic primitive for locking is spinlock.
-
-static DEFINE_SPINLOCK(xxx_lock);
-
-	unsigned long flags;
-
-	spin_lock_irqsave(&xxx_lock, flags);
-	... critical section here ..
-	spin_unlock_irqrestore(&xxx_lock, flags);
-
-The above is always safe. It will disable interrupts _locally_, but the
-spinlock itself will guarantee the global lock, so it will guarantee that
-there is only one thread-of-control within the region(s) protected by that
-lock. This works well even under UP also, so the code does _not_ need to
-worry about UP vs SMP issues: the spinlocks work correctly under both.
-
-   NOTE! Implications of spin_locks for memory are further described in:
-
-     Documentation/memory-barriers.txt
-       (5) LOCK operations.
-       (6) UNLOCK operations.
-
-The above is usually pretty simple (you usually need and want only one
-spinlock for most things - using more than one spinlock can make things a
-lot more complex and even slower and is usually worth it only for
-sequences that you _know_ need to be split up: avoid it at all cost if you
-aren't sure).
-
-This is really the only really hard part about spinlocks: once you start
-using spinlocks they tend to expand to areas you might not have noticed
-before, because you have to make sure the spinlocks correctly protect the
-shared data structures _everywhere_ they are used. The spinlocks are most
-easily added to places that are completely independent of other code (for
-example, internal driver data structures that nobody else ever touches).
-
-   NOTE! The spin-lock is safe only when you _also_ use the lock itself
-   to do locking across CPU's, which implies that EVERYTHING that
-   touches a shared variable has to agree about the spinlock they want
-   to use.
-
-----
-
-Lesson 2: reader-writer spinlocks.
-
-If your data accesses have a very natural pattern where you usually tend
-to mostly read from the shared variables, the reader-writer locks
-(rw_lock) versions of the spinlocks are sometimes useful. They allow multiple
-readers to be in the same critical region at once, but if somebody wants
-to change the variables it has to get an exclusive write lock.
-
-   NOTE! reader-writer locks require more atomic memory operations than
-   simple spinlocks.  Unless the reader critical section is long, you
-   are better off just using spinlocks.
-
-The routines look the same as above:
-
-   rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
-
-	unsigned long flags;
-
-	read_lock_irqsave(&xxx_lock, flags);
-	.. critical section that only reads the info ...
-	read_unlock_irqrestore(&xxx_lock, flags);
-
-	write_lock_irqsave(&xxx_lock, flags);
-	.. read and write exclusive access to the info ...
-	write_unlock_irqrestore(&xxx_lock, flags);
-
-The above kind of lock may be useful for complex data structures like
-linked lists, especially searching for entries without changing the list
-itself.  The read lock allows many concurrent readers.  Anything that
-_changes_ the list will have to get the write lock.
-
-   NOTE! RCU is better for list traversal, but requires careful
-   attention to design detail (see Documentation/RCU/listRCU.txt).
-
-Also, you cannot "upgrade" a read-lock to a write-lock, so if you at _any_
-time need to do any changes (even if you don't do it every time), you have
-to get the write-lock at the very beginning.
-
-   NOTE! We are working hard to remove reader-writer spinlocks in most
-   cases, so please don't add a new one without consensus.  (Instead, see
-   Documentation/RCU/rcu.txt for complete information.)
-
-----
-
-Lesson 3: spinlocks revisited.
-
-The single spin-lock primitives above are by no means the only ones. They
-are the most safe ones, and the ones that work under all circumstances,
-but partly _because_ they are safe they are also fairly slow. They are slower
-than they'd need to be, because they do have to disable interrupts
-(which is just a single instruction on a x86, but it's an expensive one -
-and on other architectures it can be worse).
-
-If you have a case where you have to protect a data structure across
-several CPU's and you want to use spinlocks you can potentially use
-cheaper versions of the spinlocks. IFF you know that the spinlocks are
-never used in interrupt handlers, you can use the non-irq versions:
-
-	spin_lock(&lock);
-	...
-	spin_unlock(&lock);
-
-(and the equivalent read-write versions too, of course). The spinlock will
-guarantee the same kind of exclusive access, and it will be much faster.
-This is useful if you know that the data in question is only ever
-manipulated from a "process context", ie no interrupts involved.
-
-The reasons you mustn't use these versions if you have interrupts that
-play with the spinlock is that you can get deadlocks:
-
-	spin_lock(&lock);
-	...
-		<- interrupt comes in:
-			spin_lock(&lock);
-
-where an interrupt tries to lock an already locked variable. This is ok if
-the other interrupt happens on another CPU, but it is _not_ ok if the
-interrupt happens on the same CPU that already holds the lock, because the
-lock will obviously never be released (because the interrupt is waiting
-for the lock, and the lock-holder is interrupted by the interrupt and will
-not continue until the interrupt has been processed).
-
-(This is also the reason why the irq-versions of the spinlocks only need
-to disable the _local_ interrupts - it's ok to use spinlocks in interrupts
-on other CPU's, because an interrupt on another CPU doesn't interrupt the
-CPU that holds the lock, so the lock-holder can continue and eventually
-releases the lock).
-
-Note that you can be clever with read-write locks and interrupts. For
-example, if you know that the interrupt only ever gets a read-lock, then
-you can use a non-irq version of read locks everywhere - because they
-don't block on each other (and thus there is no dead-lock wrt interrupts.
-But when you do the write-lock, you have to use the irq-safe version.
-
-For an example of being clever with rw-locks, see the "waitqueue_lock"
-handling in kernel/sched/core.c - nothing ever _changes_ a wait-queue from
-within an interrupt, they only read the queue in order to know whom to
-wake up. So read-locks are safe (which is good: they are very common
-indeed), while write-locks need to protect themselves against interrupts.
-
-		Linus
-
-----
-
-Reference information:
-
-For dynamic initialization, use spin_lock_init() or rwlock_init() as
-appropriate:
-
-   spinlock_t xxx_lock;
-   rwlock_t xxx_rw_lock;
-
-   static int __init xxx_init(void)
-   {
-	spin_lock_init(&xxx_lock);
-	rwlock_init(&xxx_rw_lock);
-	...
-   }
-
-   module_init(xxx_init);
-
-For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
-__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
diff --git a/Documentation/locking/ww-mutex-design.rst b/Documentation/locking/ww-mutex-design.rst
new file mode 100644
index 000000000000..1846c199da23
--- /dev/null
+++ b/Documentation/locking/ww-mutex-design.rst
@@ -0,0 +1,393 @@
+======================================
+Wound/Wait Deadlock-Proof Mutex Design
+======================================
+
+Please read mutex-design.txt first, as it applies to wait/wound mutexes too.
+
+Motivation for WW-Mutexes
+-------------------------
+
+GPU's do operations that commonly involve many buffers.  Those buffers
+can be shared across contexts/processes, exist in different memory
+domains (for example VRAM vs system memory), and so on.  And with
+PRIME / dmabuf, they can even be shared across devices.  So there are
+a handful of situations where the driver needs to wait for buffers to
+become ready.  If you think about this in terms of waiting on a buffer
+mutex for it to become available, this presents a problem because
+there is no way to guarantee that buffers appear in a execbuf/batch in
+the same order in all contexts.  That is directly under control of
+userspace, and a result of the sequence of GL calls that an application
+makes.	Which results in the potential for deadlock.  The problem gets
+more complex when you consider that the kernel may need to migrate the
+buffer(s) into VRAM before the GPU operates on the buffer(s), which
+may in turn require evicting some other buffers (and you don't want to
+evict other buffers which are already queued up to the GPU), but for a
+simplified understanding of the problem you can ignore this.
+
+The algorithm that the TTM graphics subsystem came up with for dealing with
+this problem is quite simple.  For each group of buffers (execbuf) that need
+to be locked, the caller would be assigned a unique reservation id/ticket,
+from a global counter.  In case of deadlock while locking all the buffers
+associated with a execbuf, the one with the lowest reservation ticket (i.e.
+the oldest task) wins, and the one with the higher reservation id (i.e. the
+younger task) unlocks all of the buffers that it has already locked, and then
+tries again.
+
+In the RDBMS literature, a reservation ticket is associated with a transaction.
+and the deadlock handling approach is called Wait-Die. The name is based on
+the actions of a locking thread when it encounters an already locked mutex.
+If the transaction holding the lock is younger, the locking transaction waits.
+If the transaction holding the lock is older, the locking transaction backs off
+and dies. Hence Wait-Die.
+There is also another algorithm called Wound-Wait:
+If the transaction holding the lock is younger, the locking transaction
+wounds the transaction holding the lock, requesting it to die.
+If the transaction holding the lock is older, it waits for the other
+transaction. Hence Wound-Wait.
+The two algorithms are both fair in that a transaction will eventually succeed.
+However, the Wound-Wait algorithm is typically stated to generate fewer backoffs
+compared to Wait-Die, but is, on the other hand, associated with more work than
+Wait-Die when recovering from a backoff. Wound-Wait is also a preemptive
+algorithm in that transactions are wounded by other transactions, and that
+requires a reliable way to pick up up the wounded condition and preempt the
+running transaction. Note that this is not the same as process preemption. A
+Wound-Wait transaction is considered preempted when it dies (returning
+-EDEADLK) following a wound.
+
+Concepts
+--------
+
+Compared to normal mutexes two additional concepts/objects show up in the lock
+interface for w/w mutexes:
+
+Acquire context: To ensure eventual forward progress it is important the a task
+trying to acquire locks doesn't grab a new reservation id, but keeps the one it
+acquired when starting the lock acquisition. This ticket is stored in the
+acquire context. Furthermore the acquire context keeps track of debugging state
+to catch w/w mutex interface abuse. An acquire context is representing a
+transaction.
+
+W/w class: In contrast to normal mutexes the lock class needs to be explicit for
+w/w mutexes, since it is required to initialize the acquire context. The lock
+class also specifies what algorithm to use, Wound-Wait or Wait-Die.
+
+Furthermore there are three different class of w/w lock acquire functions:
+
+* Normal lock acquisition with a context, using ww_mutex_lock.
+
+* Slowpath lock acquisition on the contending lock, used by the task that just
+  killed its transaction after having dropped all already acquired locks.
+  These functions have the _slow postfix.
+
+  From a simple semantics point-of-view the _slow functions are not strictly
+  required, since simply calling the normal ww_mutex_lock functions on the
+  contending lock (after having dropped all other already acquired locks) will
+  work correctly. After all if no other ww mutex has been acquired yet there's
+  no deadlock potential and hence the ww_mutex_lock call will block and not
+  prematurely return -EDEADLK. The advantage of the _slow functions is in
+  interface safety:
+
+  - ww_mutex_lock has a __must_check int return type, whereas ww_mutex_lock_slow
+    has a void return type. Note that since ww mutex code needs loops/retries
+    anyway the __must_check doesn't result in spurious warnings, even though the
+    very first lock operation can never fail.
+  - When full debugging is enabled ww_mutex_lock_slow checks that all acquired
+    ww mutex have been released (preventing deadlocks) and makes sure that we
+    block on the contending lock (preventing spinning through the -EDEADLK
+    slowpath until the contended lock can be acquired).
+
+* Functions to only acquire a single w/w mutex, which results in the exact same
+  semantics as a normal mutex. This is done by calling ww_mutex_lock with a NULL
+  context.
+
+  Again this is not strictly required. But often you only want to acquire a
+  single lock in which case it's pointless to set up an acquire context (and so
+  better to avoid grabbing a deadlock avoidance ticket).
+
+Of course, all the usual variants for handling wake-ups due to signals are also
+provided.
+
+Usage
+-----
+
+The algorithm (Wait-Die vs Wound-Wait) is chosen by using either
+DEFINE_WW_CLASS() (Wound-Wait) or DEFINE_WD_CLASS() (Wait-Die)
+As a rough rule of thumb, use Wound-Wait iff you
+expect the number of simultaneous competing transactions to be typically small,
+and you want to reduce the number of rollbacks.
+
+Three different ways to acquire locks within the same w/w class. Common
+definitions for methods #1 and #2::
+
+  static DEFINE_WW_CLASS(ww_class);
+
+  struct obj {
+	struct ww_mutex lock;
+	/* obj data */
+  };
+
+  struct obj_entry {
+	struct list_head head;
+	struct obj *obj;
+  };
+
+Method 1, using a list in execbuf->buffers that's not allowed to be reordered.
+This is useful if a list of required objects is already tracked somewhere.
+Furthermore the lock helper can use propagate the -EALREADY return code back to
+the caller as a signal that an object is twice on the list. This is useful if
+the list is constructed from userspace input and the ABI requires userspace to
+not have duplicate entries (e.g. for a gpu commandbuffer submission ioctl)::
+
+  int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+  {
+	struct obj *res_obj = NULL;
+	struct obj_entry *contended_entry = NULL;
+	struct obj_entry *entry;
+
+	ww_acquire_init(ctx, &ww_class);
+
+  retry:
+	list_for_each_entry (entry, list, head) {
+		if (entry->obj == res_obj) {
+			res_obj = NULL;
+			continue;
+		}
+		ret = ww_mutex_lock(&entry->obj->lock, ctx);
+		if (ret < 0) {
+			contended_entry = entry;
+			goto err;
+		}
+	}
+
+	ww_acquire_done(ctx);
+	return 0;
+
+  err:
+	list_for_each_entry_continue_reverse (entry, list, head)
+		ww_mutex_unlock(&entry->obj->lock);
+
+	if (res_obj)
+		ww_mutex_unlock(&res_obj->lock);
+
+	if (ret == -EDEADLK) {
+		/* we lost out in a seqno race, lock and retry.. */
+		ww_mutex_lock_slow(&contended_entry->obj->lock, ctx);
+		res_obj = contended_entry->obj;
+		goto retry;
+	}
+	ww_acquire_fini(ctx);
+
+	return ret;
+  }
+
+Method 2, using a list in execbuf->buffers that can be reordered. Same semantics
+of duplicate entry detection using -EALREADY as method 1 above. But the
+list-reordering allows for a bit more idiomatic code::
+
+  int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+  {
+	struct obj_entry *entry, *entry2;
+
+	ww_acquire_init(ctx, &ww_class);
+
+	list_for_each_entry (entry, list, head) {
+		ret = ww_mutex_lock(&entry->obj->lock, ctx);
+		if (ret < 0) {
+			entry2 = entry;
+
+			list_for_each_entry_continue_reverse (entry2, list, head)
+				ww_mutex_unlock(&entry2->obj->lock);
+
+			if (ret != -EDEADLK) {
+				ww_acquire_fini(ctx);
+				return ret;
+			}
+
+			/* we lost out in a seqno race, lock and retry.. */
+			ww_mutex_lock_slow(&entry->obj->lock, ctx);
+
+			/*
+			 * Move buf to head of the list, this will point
+			 * buf->next to the first unlocked entry,
+			 * restarting the for loop.
+			 */
+			list_del(&entry->head);
+			list_add(&entry->head, list);
+		}
+	}
+
+	ww_acquire_done(ctx);
+	return 0;
+  }
+
+Unlocking works the same way for both methods #1 and #2::
+
+  void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+  {
+	struct obj_entry *entry;
+
+	list_for_each_entry (entry, list, head)
+		ww_mutex_unlock(&entry->obj->lock);
+
+	ww_acquire_fini(ctx);
+  }
+
+Method 3 is useful if the list of objects is constructed ad-hoc and not upfront,
+e.g. when adjusting edges in a graph where each node has its own ww_mutex lock,
+and edges can only be changed when holding the locks of all involved nodes. w/w
+mutexes are a natural fit for such a case for two reasons:
+
+- They can handle lock-acquisition in any order which allows us to start walking
+  a graph from a starting point and then iteratively discovering new edges and
+  locking down the nodes those edges connect to.
+- Due to the -EALREADY return code signalling that a given objects is already
+  held there's no need for additional book-keeping to break cycles in the graph
+  or keep track off which looks are already held (when using more than one node
+  as a starting point).
+
+Note that this approach differs in two important ways from the above methods:
+
+- Since the list of objects is dynamically constructed (and might very well be
+  different when retrying due to hitting the -EDEADLK die condition) there's
+  no need to keep any object on a persistent list when it's not locked. We can
+  therefore move the list_head into the object itself.
+- On the other hand the dynamic object list construction also means that the -EALREADY return
+  code can't be propagated.
+
+Note also that methods #1 and #2 and method #3 can be combined, e.g. to first lock a
+list of starting nodes (passed in from userspace) using one of the above
+methods. And then lock any additional objects affected by the operations using
+method #3 below. The backoff/retry procedure will be a bit more involved, since
+when the dynamic locking step hits -EDEADLK we also need to unlock all the
+objects acquired with the fixed list. But the w/w mutex debug checks will catch
+any interface misuse for these cases.
+
+Also, method 3 can't fail the lock acquisition step since it doesn't return
+-EALREADY. Of course this would be different when using the _interruptible
+variants, but that's outside of the scope of these examples here::
+
+  struct obj {
+	struct ww_mutex ww_mutex;
+	struct list_head locked_list;
+  };
+
+  static DEFINE_WW_CLASS(ww_class);
+
+  void __unlock_objs(struct list_head *list)
+  {
+	struct obj *entry, *temp;
+
+	list_for_each_entry_safe (entry, temp, list, locked_list) {
+		/* need to do that before unlocking, since only the current lock holder is
+		allowed to use object */
+		list_del(&entry->locked_list);
+		ww_mutex_unlock(entry->ww_mutex)
+	}
+  }
+
+  void lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+  {
+	struct obj *obj;
+
+	ww_acquire_init(ctx, &ww_class);
+
+  retry:
+	/* re-init loop start state */
+	loop {
+		/* magic code which walks over a graph and decides which objects
+		 * to lock */
+
+		ret = ww_mutex_lock(obj->ww_mutex, ctx);
+		if (ret == -EALREADY) {
+			/* we have that one already, get to the next object */
+			continue;
+		}
+		if (ret == -EDEADLK) {
+			__unlock_objs(list);
+
+			ww_mutex_lock_slow(obj, ctx);
+			list_add(&entry->locked_list, list);
+			goto retry;
+		}
+
+		/* locked a new object, add it to the list */
+		list_add_tail(&entry->locked_list, list);
+	}
+
+	ww_acquire_done(ctx);
+	return 0;
+  }
+
+  void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+  {
+	__unlock_objs(list);
+	ww_acquire_fini(ctx);
+  }
+
+Method 4: Only lock one single objects. In that case deadlock detection and
+prevention is obviously overkill, since with grabbing just one lock you can't
+produce a deadlock within just one class. To simplify this case the w/w mutex
+api can be used with a NULL context.
+
+Implementation Details
+----------------------
+
+Design:
+^^^^^^^
+
+  ww_mutex currently encapsulates a struct mutex, this means no extra overhead for
+  normal mutex locks, which are far more common. As such there is only a small
+  increase in code size if wait/wound mutexes are not used.
+
+  We maintain the following invariants for the wait list:
+
+  (1) Waiters with an acquire context are sorted by stamp order; waiters
+      without an acquire context are interspersed in FIFO order.
+  (2) For Wait-Die, among waiters with contexts, only the first one can have
+      other locks acquired already (ctx->acquired > 0). Note that this waiter
+      may come after other waiters without contexts in the list.
+
+  The Wound-Wait preemption is implemented with a lazy-preemption scheme:
+  The wounded status of the transaction is checked only when there is
+  contention for a new lock and hence a true chance of deadlock. In that
+  situation, if the transaction is wounded, it backs off, clears the
+  wounded status and retries. A great benefit of implementing preemption in
+  this way is that the wounded transaction can identify a contending lock to
+  wait for before restarting the transaction. Just blindly restarting the
+  transaction would likely make the transaction end up in a situation where
+  it would have to back off again.
+
+  In general, not much contention is expected. The locks are typically used to
+  serialize access to resources for devices, and optimization focus should
+  therefore be directed towards the uncontended cases.
+
+Lockdep:
+^^^^^^^^
+
+  Special care has been taken to warn for as many cases of api abuse
+  as possible. Some common api abuses will be caught with
+  CONFIG_DEBUG_MUTEXES, but CONFIG_PROVE_LOCKING is recommended.
+
+  Some of the errors which will be warned about:
+   - Forgetting to call ww_acquire_fini or ww_acquire_init.
+   - Attempting to lock more mutexes after ww_acquire_done.
+   - Attempting to lock the wrong mutex after -EDEADLK and
+     unlocking all mutexes.
+   - Attempting to lock the right mutex after -EDEADLK,
+     before unlocking all mutexes.
+
+   - Calling ww_mutex_lock_slow before -EDEADLK was returned.
+
+   - Unlocking mutexes with the wrong unlock function.
+   - Calling one of the ww_acquire_* twice on the same context.
+   - Using a different ww_class for the mutex than for the ww_acquire_ctx.
+   - Normal lockdep errors that can result in deadlocks.
+
+  Some of the lockdep errors that can result in deadlocks:
+   - Calling ww_acquire_init to initialize a second ww_acquire_ctx before
+     having called ww_acquire_fini on the first.
+   - 'normal' deadlocks that can occur.
+
+FIXME:
+  Update this section once we have the TASK_DEADLOCK task state flag magic
+  implemented.
diff --git a/Documentation/locking/ww-mutex-design.txt b/Documentation/locking/ww-mutex-design.txt
deleted file mode 100644
index f0ed7c30e695..000000000000
--- a/Documentation/locking/ww-mutex-design.txt
+++ /dev/null
@@ -1,383 +0,0 @@
-Wound/Wait Deadlock-Proof Mutex Design
-======================================
-
-Please read mutex-design.txt first, as it applies to wait/wound mutexes too.
-
-Motivation for WW-Mutexes
--------------------------
-
-GPU's do operations that commonly involve many buffers.  Those buffers
-can be shared across contexts/processes, exist in different memory
-domains (for example VRAM vs system memory), and so on.  And with
-PRIME / dmabuf, they can even be shared across devices.  So there are
-a handful of situations where the driver needs to wait for buffers to
-become ready.  If you think about this in terms of waiting on a buffer
-mutex for it to become available, this presents a problem because
-there is no way to guarantee that buffers appear in a execbuf/batch in
-the same order in all contexts.  That is directly under control of
-userspace, and a result of the sequence of GL calls that an application
-makes.	Which results in the potential for deadlock.  The problem gets
-more complex when you consider that the kernel may need to migrate the
-buffer(s) into VRAM before the GPU operates on the buffer(s), which
-may in turn require evicting some other buffers (and you don't want to
-evict other buffers which are already queued up to the GPU), but for a
-simplified understanding of the problem you can ignore this.
-
-The algorithm that the TTM graphics subsystem came up with for dealing with
-this problem is quite simple.  For each group of buffers (execbuf) that need
-to be locked, the caller would be assigned a unique reservation id/ticket,
-from a global counter.  In case of deadlock while locking all the buffers
-associated with a execbuf, the one with the lowest reservation ticket (i.e.
-the oldest task) wins, and the one with the higher reservation id (i.e. the
-younger task) unlocks all of the buffers that it has already locked, and then
-tries again.
-
-In the RDBMS literature, a reservation ticket is associated with a transaction.
-and the deadlock handling approach is called Wait-Die. The name is based on
-the actions of a locking thread when it encounters an already locked mutex.
-If the transaction holding the lock is younger, the locking transaction waits.
-If the transaction holding the lock is older, the locking transaction backs off
-and dies. Hence Wait-Die.
-There is also another algorithm called Wound-Wait:
-If the transaction holding the lock is younger, the locking transaction
-wounds the transaction holding the lock, requesting it to die.
-If the transaction holding the lock is older, it waits for the other
-transaction. Hence Wound-Wait.
-The two algorithms are both fair in that a transaction will eventually succeed.
-However, the Wound-Wait algorithm is typically stated to generate fewer backoffs
-compared to Wait-Die, but is, on the other hand, associated with more work than
-Wait-Die when recovering from a backoff. Wound-Wait is also a preemptive
-algorithm in that transactions are wounded by other transactions, and that
-requires a reliable way to pick up up the wounded condition and preempt the
-running transaction. Note that this is not the same as process preemption. A
-Wound-Wait transaction is considered preempted when it dies (returning
--EDEADLK) following a wound.
-
-Concepts
---------
-
-Compared to normal mutexes two additional concepts/objects show up in the lock
-interface for w/w mutexes:
-
-Acquire context: To ensure eventual forward progress it is important the a task
-trying to acquire locks doesn't grab a new reservation id, but keeps the one it
-acquired when starting the lock acquisition. This ticket is stored in the
-acquire context. Furthermore the acquire context keeps track of debugging state
-to catch w/w mutex interface abuse. An acquire context is representing a
-transaction.
-
-W/w class: In contrast to normal mutexes the lock class needs to be explicit for
-w/w mutexes, since it is required to initialize the acquire context. The lock
-class also specifies what algorithm to use, Wound-Wait or Wait-Die.
-
-Furthermore there are three different class of w/w lock acquire functions:
-
-* Normal lock acquisition with a context, using ww_mutex_lock.
-
-* Slowpath lock acquisition on the contending lock, used by the task that just
-  killed its transaction after having dropped all already acquired locks.
-  These functions have the _slow postfix.
-
-  From a simple semantics point-of-view the _slow functions are not strictly
-  required, since simply calling the normal ww_mutex_lock functions on the
-  contending lock (after having dropped all other already acquired locks) will
-  work correctly. After all if no other ww mutex has been acquired yet there's
-  no deadlock potential and hence the ww_mutex_lock call will block and not
-  prematurely return -EDEADLK. The advantage of the _slow functions is in
-  interface safety:
-  - ww_mutex_lock has a __must_check int return type, whereas ww_mutex_lock_slow
-    has a void return type. Note that since ww mutex code needs loops/retries
-    anyway the __must_check doesn't result in spurious warnings, even though the
-    very first lock operation can never fail.
-  - When full debugging is enabled ww_mutex_lock_slow checks that all acquired
-    ww mutex have been released (preventing deadlocks) and makes sure that we
-    block on the contending lock (preventing spinning through the -EDEADLK
-    slowpath until the contended lock can be acquired).
-
-* Functions to only acquire a single w/w mutex, which results in the exact same
-  semantics as a normal mutex. This is done by calling ww_mutex_lock with a NULL
-  context.
-
-  Again this is not strictly required. But often you only want to acquire a
-  single lock in which case it's pointless to set up an acquire context (and so
-  better to avoid grabbing a deadlock avoidance ticket).
-
-Of course, all the usual variants for handling wake-ups due to signals are also
-provided.
-
-Usage
------
-
-The algorithm (Wait-Die vs Wound-Wait) is chosen by using either
-DEFINE_WW_CLASS() (Wound-Wait) or DEFINE_WD_CLASS() (Wait-Die)
-As a rough rule of thumb, use Wound-Wait iff you
-expect the number of simultaneous competing transactions to be typically small,
-and you want to reduce the number of rollbacks.
-
-Three different ways to acquire locks within the same w/w class. Common
-definitions for methods #1 and #2:
-
-static DEFINE_WW_CLASS(ww_class);
-
-struct obj {
-	struct ww_mutex lock;
-	/* obj data */
-};
-
-struct obj_entry {
-	struct list_head head;
-	struct obj *obj;
-};
-
-Method 1, using a list in execbuf->buffers that's not allowed to be reordered.
-This is useful if a list of required objects is already tracked somewhere.
-Furthermore the lock helper can use propagate the -EALREADY return code back to
-the caller as a signal that an object is twice on the list. This is useful if
-the list is constructed from userspace input and the ABI requires userspace to
-not have duplicate entries (e.g. for a gpu commandbuffer submission ioctl).
-
-int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	struct obj *res_obj = NULL;
-	struct obj_entry *contended_entry = NULL;
-	struct obj_entry *entry;
-
-	ww_acquire_init(ctx, &ww_class);
-
-retry:
-	list_for_each_entry (entry, list, head) {
-		if (entry->obj == res_obj) {
-			res_obj = NULL;
-			continue;
-		}
-		ret = ww_mutex_lock(&entry->obj->lock, ctx);
-		if (ret < 0) {
-			contended_entry = entry;
-			goto err;
-		}
-	}
-
-	ww_acquire_done(ctx);
-	return 0;
-
-err:
-	list_for_each_entry_continue_reverse (entry, list, head)
-		ww_mutex_unlock(&entry->obj->lock);
-
-	if (res_obj)
-		ww_mutex_unlock(&res_obj->lock);
-
-	if (ret == -EDEADLK) {
-		/* we lost out in a seqno race, lock and retry.. */
-		ww_mutex_lock_slow(&contended_entry->obj->lock, ctx);
-		res_obj = contended_entry->obj;
-		goto retry;
-	}
-	ww_acquire_fini(ctx);
-
-	return ret;
-}
-
-Method 2, using a list in execbuf->buffers that can be reordered. Same semantics
-of duplicate entry detection using -EALREADY as method 1 above. But the
-list-reordering allows for a bit more idiomatic code.
-
-int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	struct obj_entry *entry, *entry2;
-
-	ww_acquire_init(ctx, &ww_class);
-
-	list_for_each_entry (entry, list, head) {
-		ret = ww_mutex_lock(&entry->obj->lock, ctx);
-		if (ret < 0) {
-			entry2 = entry;
-
-			list_for_each_entry_continue_reverse (entry2, list, head)
-				ww_mutex_unlock(&entry2->obj->lock);
-
-			if (ret != -EDEADLK) {
-				ww_acquire_fini(ctx);
-				return ret;
-			}
-
-			/* we lost out in a seqno race, lock and retry.. */
-			ww_mutex_lock_slow(&entry->obj->lock, ctx);
-
-			/*
-			 * Move buf to head of the list, this will point
-			 * buf->next to the first unlocked entry,
-			 * restarting the for loop.
-			 */
-			list_del(&entry->head);
-			list_add(&entry->head, list);
-		}
-	}
-
-	ww_acquire_done(ctx);
-	return 0;
-}
-
-Unlocking works the same way for both methods #1 and #2:
-
-void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	struct obj_entry *entry;
-
-	list_for_each_entry (entry, list, head)
-		ww_mutex_unlock(&entry->obj->lock);
-
-	ww_acquire_fini(ctx);
-}
-
-Method 3 is useful if the list of objects is constructed ad-hoc and not upfront,
-e.g. when adjusting edges in a graph where each node has its own ww_mutex lock,
-and edges can only be changed when holding the locks of all involved nodes. w/w
-mutexes are a natural fit for such a case for two reasons:
-- They can handle lock-acquisition in any order which allows us to start walking
-  a graph from a starting point and then iteratively discovering new edges and
-  locking down the nodes those edges connect to.
-- Due to the -EALREADY return code signalling that a given objects is already
-  held there's no need for additional book-keeping to break cycles in the graph
-  or keep track off which looks are already held (when using more than one node
-  as a starting point).
-
-Note that this approach differs in two important ways from the above methods:
-- Since the list of objects is dynamically constructed (and might very well be
-  different when retrying due to hitting the -EDEADLK die condition) there's
-  no need to keep any object on a persistent list when it's not locked. We can
-  therefore move the list_head into the object itself.
-- On the other hand the dynamic object list construction also means that the -EALREADY return
-  code can't be propagated.
-
-Note also that methods #1 and #2 and method #3 can be combined, e.g. to first lock a
-list of starting nodes (passed in from userspace) using one of the above
-methods. And then lock any additional objects affected by the operations using
-method #3 below. The backoff/retry procedure will be a bit more involved, since
-when the dynamic locking step hits -EDEADLK we also need to unlock all the
-objects acquired with the fixed list. But the w/w mutex debug checks will catch
-any interface misuse for these cases.
-
-Also, method 3 can't fail the lock acquisition step since it doesn't return
--EALREADY. Of course this would be different when using the _interruptible
-variants, but that's outside of the scope of these examples here.
-
-struct obj {
-	struct ww_mutex ww_mutex;
-	struct list_head locked_list;
-};
-
-static DEFINE_WW_CLASS(ww_class);
-
-void __unlock_objs(struct list_head *list)
-{
-	struct obj *entry, *temp;
-
-	list_for_each_entry_safe (entry, temp, list, locked_list) {
-		/* need to do that before unlocking, since only the current lock holder is
-		allowed to use object */
-		list_del(&entry->locked_list);
-		ww_mutex_unlock(entry->ww_mutex)
-	}
-}
-
-void lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	struct obj *obj;
-
-	ww_acquire_init(ctx, &ww_class);
-
-retry:
-	/* re-init loop start state */
-	loop {
-		/* magic code which walks over a graph and decides which objects
-		 * to lock */
-
-		ret = ww_mutex_lock(obj->ww_mutex, ctx);
-		if (ret == -EALREADY) {
-			/* we have that one already, get to the next object */
-			continue;
-		}
-		if (ret == -EDEADLK) {
-			__unlock_objs(list);
-
-			ww_mutex_lock_slow(obj, ctx);
-			list_add(&entry->locked_list, list);
-			goto retry;
-		}
-
-		/* locked a new object, add it to the list */
-		list_add_tail(&entry->locked_list, list);
-	}
-
-	ww_acquire_done(ctx);
-	return 0;
-}
-
-void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	__unlock_objs(list);
-	ww_acquire_fini(ctx);
-}
-
-Method 4: Only lock one single objects. In that case deadlock detection and
-prevention is obviously overkill, since with grabbing just one lock you can't
-produce a deadlock within just one class. To simplify this case the w/w mutex
-api can be used with a NULL context.
-
-Implementation Details
-----------------------
-
-Design:
-  ww_mutex currently encapsulates a struct mutex, this means no extra overhead for
-  normal mutex locks, which are far more common. As such there is only a small
-  increase in code size if wait/wound mutexes are not used.
-
-  We maintain the following invariants for the wait list:
-  (1) Waiters with an acquire context are sorted by stamp order; waiters
-      without an acquire context are interspersed in FIFO order.
-  (2) For Wait-Die, among waiters with contexts, only the first one can have
-      other locks acquired already (ctx->acquired > 0). Note that this waiter
-      may come after other waiters without contexts in the list.
-
-  The Wound-Wait preemption is implemented with a lazy-preemption scheme:
-  The wounded status of the transaction is checked only when there is
-  contention for a new lock and hence a true chance of deadlock. In that
-  situation, if the transaction is wounded, it backs off, clears the
-  wounded status and retries. A great benefit of implementing preemption in
-  this way is that the wounded transaction can identify a contending lock to
-  wait for before restarting the transaction. Just blindly restarting the
-  transaction would likely make the transaction end up in a situation where
-  it would have to back off again.
-
-  In general, not much contention is expected. The locks are typically used to
-  serialize access to resources for devices, and optimization focus should
-  therefore be directed towards the uncontended cases.
-
-Lockdep:
-  Special care has been taken to warn for as many cases of api abuse
-  as possible. Some common api abuses will be caught with
-  CONFIG_DEBUG_MUTEXES, but CONFIG_PROVE_LOCKING is recommended.
-
-  Some of the errors which will be warned about:
-   - Forgetting to call ww_acquire_fini or ww_acquire_init.
-   - Attempting to lock more mutexes after ww_acquire_done.
-   - Attempting to lock the wrong mutex after -EDEADLK and
-     unlocking all mutexes.
-   - Attempting to lock the right mutex after -EDEADLK,
-     before unlocking all mutexes.
-
-   - Calling ww_mutex_lock_slow before -EDEADLK was returned.
-
-   - Unlocking mutexes with the wrong unlock function.
-   - Calling one of the ww_acquire_* twice on the same context.
-   - Using a different ww_class for the mutex than for the ww_acquire_ctx.
-   - Normal lockdep errors that can result in deadlocks.
-
-  Some of the lockdep errors that can result in deadlocks:
-   - Calling ww_acquire_init to initialize a second ww_acquire_ctx before
-     having called ww_acquire_fini on the first.
-   - 'normal' deadlocks that can occur.
-
-FIXME: Update this section once we have the TASK_DEADLOCK task state flag magic
-implemented.
diff --git a/Documentation/pi-futex.txt b/Documentation/pi-futex.txt
index b154f6c0c36e..c33ba2befbf8 100644
--- a/Documentation/pi-futex.txt
+++ b/Documentation/pi-futex.txt
@@ -119,4 +119,4 @@ properties of futexes, and all four combinations are possible: futex,
 robust-futex, PI-futex, robust+PI-futex.
 
 More details about priority inheritance can be found in
-Documentation/locking/rt-mutex.txt.
+Documentation/locking/rt-mutex.rst.
diff --git a/Documentation/translations/it_IT/kernel-hacking/locking.rst b/Documentation/translations/it_IT/kernel-hacking/locking.rst
index 5fd8a1abd2be..b9a6be4b8499 100644
--- a/Documentation/translations/it_IT/kernel-hacking/locking.rst
+++ b/Documentation/translations/it_IT/kernel-hacking/locking.rst
@@ -1404,7 +1404,7 @@ Riferimento per l'API dei Futex
 Approfondimenti
 ===============
 
--  ``Documentation/locking/spinlocks.txt``: la guida di Linus Torvalds agli
+-  ``Documentation/locking/spinlocks.rst``: la guida di Linus Torvalds agli
    spinlock del kernel.
 
 -  Unix Systems for Modern Architectures: Symmetric Multiprocessing and
-- 
cgit 


From 720594f691e5c8fb0624f3653b20b24ba8e57742 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sat, 13 Apr 2019 22:54:53 -0300
Subject: docs: connector: convert to ReST and rename to connector.rst

As it has some function definitions, move them to connector.h.

The remaining conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/connector/connector.rst | 156 +++++++++++++++++++++++++++
 Documentation/connector/connector.txt | 196 ----------------------------------
 2 files changed, 156 insertions(+), 196 deletions(-)
 create mode 100644 Documentation/connector/connector.rst
 delete mode 100644 Documentation/connector/connector.txt

(limited to 'Documentation')

diff --git a/Documentation/connector/connector.rst b/Documentation/connector/connector.rst
new file mode 100644
index 000000000000..24e26dc22dbf
--- /dev/null
+++ b/Documentation/connector/connector.rst
@@ -0,0 +1,156 @@
+:orphan:
+
+================
+Kernel Connector
+================
+
+Kernel connector - new netlink based userspace <-> kernel space easy
+to use communication module.
+
+The Connector driver makes it easy to connect various agents using a
+netlink based network.  One must register a callback and an identifier.
+When the driver receives a special netlink message with the appropriate
+identifier, the appropriate callback will be called.
+
+From the userspace point of view it's quite straightforward:
+
+	- socket();
+	- bind();
+	- send();
+	- recv();
+
+But if kernelspace wants to use the full power of such connections, the
+driver writer must create special sockets, must know about struct sk_buff
+handling, etc...  The Connector driver allows any kernelspace agents to use
+netlink based networking for inter-process communication in a significantly
+easier way::
+
+  int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
+  void cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __group, int gfp_mask);
+  void cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, int gfp_mask);
+
+  struct cb_id
+  {
+	__u32			idx;
+	__u32			val;
+  };
+
+idx and val are unique identifiers which must be registered in the
+connector.h header for in-kernel usage.  `void (*callback) (void *)` is a
+callback function which will be called when a message with above idx.val
+is received by the connector core.  The argument for that function must
+be dereferenced to `struct cn_msg *`::
+
+  struct cn_msg
+  {
+	struct cb_id		id;
+
+	__u32			seq;
+	__u32			ack;
+
+	__u32			len;	/* Length of the following data */
+	__u8			data[0];
+  };
+
+Connector interfaces
+====================
+
+ .. kernel-doc:: include/linux/connector.h
+
+ Note:
+   When registering new callback user, connector core assigns
+   netlink group to the user which is equal to its id.idx.
+
+Protocol description
+====================
+
+The current framework offers a transport layer with fixed headers.  The
+recommended protocol which uses such a header is as following:
+
+msg->seq and msg->ack are used to determine message genealogy.  When
+someone sends a message, they use a locally unique sequence and random
+acknowledge number.  The sequence number may be copied into
+nlmsghdr->nlmsg_seq too.
+
+The sequence number is incremented with each message sent.
+
+If you expect a reply to the message, then the sequence number in the
+received message MUST be the same as in the original message, and the
+acknowledge number MUST be the same + 1.
+
+If we receive a message and its sequence number is not equal to one we
+are expecting, then it is a new message.  If we receive a message and
+its sequence number is the same as one we are expecting, but its
+acknowledge is not equal to the sequence number in the original
+message + 1, then it is a new message.
+
+Obviously, the protocol header contains the above id.
+
+The connector allows event notification in the following form: kernel
+driver or userspace process can ask connector to notify it when
+selected ids will be turned on or off (registered or unregistered its
+callback).  It is done by sending a special command to the connector
+driver (it also registers itself with id={-1, -1}).
+
+As example of this usage can be found in the cn_test.c module which
+uses the connector to request notification and to send messages.
+
+Reliability
+===========
+
+Netlink itself is not a reliable protocol.  That means that messages can
+be lost due to memory pressure or process' receiving queue overflowed,
+so caller is warned that it must be prepared.  That is why the struct
+cn_msg [main connector's message header] contains u32 seq and u32 ack
+fields.
+
+Userspace usage
+===============
+
+2.6.14 has a new netlink socket implementation, which by default does not
+allow people to send data to netlink groups other than 1.
+So, if you wish to use a netlink socket (for example using connector)
+with a different group number, the userspace application must subscribe to
+that group first.  It can be achieved by the following pseudocode::
+
+  s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+
+  l_local.nl_family = AF_NETLINK;
+  l_local.nl_groups = 12345;
+  l_local.nl_pid = 0;
+
+  if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
+	perror("bind");
+	close(s);
+	return -1;
+  }
+
+  {
+	int on = l_local.nl_groups;
+	setsockopt(s, 270, 1, &on, sizeof(on));
+  }
+
+Where 270 above is SOL_NETLINK, and 1 is a NETLINK_ADD_MEMBERSHIP socket
+option.  To drop a multicast subscription, one should call the above socket
+option with the NETLINK_DROP_MEMBERSHIP parameter which is defined as 0.
+
+2.6.14 netlink code only allows to select a group which is less or equal to
+the maximum group number, which is used at netlink_kernel_create() time.
+In case of connector it is CN_NETLINK_USERS + 0xf, so if you want to use
+group number 12345, you must increment CN_NETLINK_USERS to that number.
+Additional 0xf numbers are allocated to be used by non-in-kernel users.
+
+Due to this limitation, group 0xffffffff does not work now, so one can
+not use add/remove connector's group notifications, but as far as I know,
+only cn_test.c test module used it.
+
+Some work in netlink area is still being done, so things can be changed in
+2.6.15 timeframe, if it will happen, documentation will be updated for that
+kernel.
+
+Code samples
+============
+
+Sample code for a connector test module and user space can be found
+in samples/connector/. To build this code, enable CONFIG_CONNECTOR
+and CONFIG_SAMPLES.
diff --git a/Documentation/connector/connector.txt b/Documentation/connector/connector.txt
deleted file mode 100644
index ab7ca897fab7..000000000000
--- a/Documentation/connector/connector.txt
+++ /dev/null
@@ -1,196 +0,0 @@
-/*****************************************/
-Kernel Connector.
-/*****************************************/
-
-Kernel connector - new netlink based userspace <-> kernel space easy
-to use communication module.
-
-The Connector driver makes it easy to connect various agents using a
-netlink based network.  One must register a callback and an identifier.
-When the driver receives a special netlink message with the appropriate
-identifier, the appropriate callback will be called.
-
-From the userspace point of view it's quite straightforward:
-
-	socket();
-	bind();
-	send();
-	recv();
-
-But if kernelspace wants to use the full power of such connections, the
-driver writer must create special sockets, must know about struct sk_buff
-handling, etc...  The Connector driver allows any kernelspace agents to use
-netlink based networking for inter-process communication in a significantly
-easier way:
-
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
-void cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __group, int gfp_mask);
-void cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, int gfp_mask);
-
-struct cb_id
-{
-	__u32			idx;
-	__u32			val;
-};
-
-idx and val are unique identifiers which must be registered in the
-connector.h header for in-kernel usage.  void (*callback) (void *) is a
-callback function which will be called when a message with above idx.val
-is received by the connector core.  The argument for that function must
-be dereferenced to struct cn_msg *.
-
-struct cn_msg
-{
-	struct cb_id		id;
-
-	__u32			seq;
-	__u32			ack;
-
-	__u32			len;		/* Length of the following data */
-	__u8			data[0];
-};
-
-/*****************************************/
-Connector interfaces.
-/*****************************************/
-
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
-
- Registers new callback with connector core.
-
- struct cb_id *id		- unique connector's user identifier.
-				  It must be registered in connector.h for legal in-kernel users.
- char *name			- connector's callback symbolic name.
- void (*callback) (struct cn..)	- connector's callback.
-				  cn_msg and the sender's credentials
-
-
-void cn_del_callback(struct cb_id *id);
-
- Unregisters new callback with connector core.
-
- struct cb_id *id		- unique connector's user identifier.
-
-
-int cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __groups, int gfp_mask);
-int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __groups, int gfp_mask);
-
- Sends message to the specified groups.  It can be safely called from
- softirq context, but may silently fail under strong memory pressure.
- If there are no listeners for given group -ESRCH can be returned.
-
- struct cn_msg *		- message header(with attached data).
- u16 len			- for *_multi multiple cn_msg messages can be sent
- u32 port			- destination port.
- 				  If non-zero the message will be sent to the
-				  given port, which should be set to the
-				  original sender.
- u32 __group			- destination group.
-				  If port and __group is zero, then appropriate group will
-				  be searched through all registered connector users,
-				  and message will be delivered to the group which was
-				  created for user with the same ID as in msg.
-				  If __group is not zero, then message will be delivered
-				  to the specified group.
- int gfp_mask			- GFP mask.
-
- Note: When registering new callback user, connector core assigns
- netlink group to the user which is equal to its id.idx.
-
-/*****************************************/
-Protocol description.
-/*****************************************/
-
-The current framework offers a transport layer with fixed headers.  The
-recommended protocol which uses such a header is as following:
-
-msg->seq and msg->ack are used to determine message genealogy.  When
-someone sends a message, they use a locally unique sequence and random
-acknowledge number.  The sequence number may be copied into
-nlmsghdr->nlmsg_seq too.
-
-The sequence number is incremented with each message sent.
-
-If you expect a reply to the message, then the sequence number in the
-received message MUST be the same as in the original message, and the
-acknowledge number MUST be the same + 1.
-
-If we receive a message and its sequence number is not equal to one we
-are expecting, then it is a new message.  If we receive a message and
-its sequence number is the same as one we are expecting, but its
-acknowledge is not equal to the sequence number in the original
-message + 1, then it is a new message.
-
-Obviously, the protocol header contains the above id.
-
-The connector allows event notification in the following form: kernel
-driver or userspace process can ask connector to notify it when
-selected ids will be turned on or off (registered or unregistered its
-callback).  It is done by sending a special command to the connector
-driver (it also registers itself with id={-1, -1}).
-
-As example of this usage can be found in the cn_test.c module which
-uses the connector to request notification and to send messages.
-
-/*****************************************/
-Reliability.
-/*****************************************/
-
-Netlink itself is not a reliable protocol.  That means that messages can
-be lost due to memory pressure or process' receiving queue overflowed,
-so caller is warned that it must be prepared.  That is why the struct
-cn_msg [main connector's message header] contains u32 seq and u32 ack
-fields.
-
-/*****************************************/
-Userspace usage.
-/*****************************************/
-
-2.6.14 has a new netlink socket implementation, which by default does not
-allow people to send data to netlink groups other than 1.
-So, if you wish to use a netlink socket (for example using connector)
-with a different group number, the userspace application must subscribe to
-that group first.  It can be achieved by the following pseudocode:
-
-s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
-
-l_local.nl_family = AF_NETLINK;
-l_local.nl_groups = 12345;
-l_local.nl_pid = 0;
-
-if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
-	perror("bind");
-	close(s);
-	return -1;
-}
-
-{
-	int on = l_local.nl_groups;
-	setsockopt(s, 270, 1, &on, sizeof(on));
-}
-
-Where 270 above is SOL_NETLINK, and 1 is a NETLINK_ADD_MEMBERSHIP socket
-option.  To drop a multicast subscription, one should call the above socket
-option with the NETLINK_DROP_MEMBERSHIP parameter which is defined as 0.
-
-2.6.14 netlink code only allows to select a group which is less or equal to
-the maximum group number, which is used at netlink_kernel_create() time.
-In case of connector it is CN_NETLINK_USERS + 0xf, so if you want to use
-group number 12345, you must increment CN_NETLINK_USERS to that number.
-Additional 0xf numbers are allocated to be used by non-in-kernel users.
-
-Due to this limitation, group 0xffffffff does not work now, so one can
-not use add/remove connector's group notifications, but as far as I know, 
-only cn_test.c test module used it.
-
-Some work in netlink area is still being done, so things can be changed in
-2.6.15 timeframe, if it will happen, documentation will be updated for that
-kernel.
-
-/*****************************************/
-Code samples
-/*****************************************/
-
-Sample code for a connector test module and user space can be found
-in samples/connector/. To build this code, enable CONFIG_CONNECTOR
-and CONFIG_SAMPLES.
-- 
cgit 


From 065504d5b45bc780b8da221162145a4c9ec67ffc Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sun, 14 Apr 2019 07:59:32 -0300
Subject: docs: lcd-panel-cgram.txt: convert docs to ReST and rename to *.rst

This small text file describes the usage of parallel port LCD
displays from userspace PoV. So, a good candidate for the
admin guide.

While this is not part of the admin-guide book, mark it as
:orphan:, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/auxdisplay/lcd-panel-cgram.rst | 29 ++++++++++++++++++++++++++++
 Documentation/auxdisplay/lcd-panel-cgram.txt | 24 -----------------------
 2 files changed, 29 insertions(+), 24 deletions(-)
 create mode 100644 Documentation/auxdisplay/lcd-panel-cgram.rst
 delete mode 100644 Documentation/auxdisplay/lcd-panel-cgram.txt

(limited to 'Documentation')

diff --git a/Documentation/auxdisplay/lcd-panel-cgram.rst b/Documentation/auxdisplay/lcd-panel-cgram.rst
new file mode 100644
index 000000000000..dfef50286018
--- /dev/null
+++ b/Documentation/auxdisplay/lcd-panel-cgram.rst
@@ -0,0 +1,29 @@
+:orphan:
+
+======================================
+Parallel port LCD/Keypad Panel support
+======================================
+
+Some LCDs allow you to define up to 8 characters, mapped to ASCII
+characters 0 to 7. The escape code to define a new character is
+'\e[LG' followed by one digit from 0 to 7, representing the character
+number, and up to 8 couples of hex digits terminated by a semi-colon
+(';'). Each couple of digits represents a line, with 1-bits for each
+illuminated pixel with LSB on the right. Lines are numbered from the
+top of the character to the bottom. On a 5x7 matrix, only the 5 lower
+bits of the 7 first bytes are used for each character. If the string
+is incomplete, only complete lines will be redefined. Here are some
+examples::
+
+  printf "\e[LG0010101050D1F0C04;"  => 0 = [enter]
+  printf "\e[LG1040E1F0000000000;"  => 1 = [up]
+  printf "\e[LG2000000001F0E0400;"  => 2 = [down]
+  printf "\e[LG3040E1F001F0E0400;"  => 3 = [up-down]
+  printf "\e[LG40002060E1E0E0602;"  => 4 = [left]
+  printf "\e[LG500080C0E0F0E0C08;"  => 5 = [right]
+  printf "\e[LG60016051516141400;"  => 6 = "IP"
+
+  printf "\e[LG00103071F1F070301;"  => big speaker
+  printf "\e[LG00002061E1E060200;"  => small speaker
+
+Willy
diff --git a/Documentation/auxdisplay/lcd-panel-cgram.txt b/Documentation/auxdisplay/lcd-panel-cgram.txt
deleted file mode 100644
index 7f82c905763d..000000000000
--- a/Documentation/auxdisplay/lcd-panel-cgram.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Some LCDs allow you to define up to 8 characters, mapped to ASCII
-characters 0 to 7. The escape code to define a new character is
-'\e[LG' followed by one digit from 0 to 7, representing the character
-number, and up to 8 couples of hex digits terminated by a semi-colon
-(';'). Each couple of digits represents a line, with 1-bits for each
-illuminated pixel with LSB on the right. Lines are numbered from the
-top of the character to the bottom. On a 5x7 matrix, only the 5 lower
-bits of the 7 first bytes are used for each character. If the string
-is incomplete, only complete lines will be redefined. Here are some
-examples :
-
-  printf "\e[LG0010101050D1F0C04;"  => 0 = [enter]
-  printf "\e[LG1040E1F0000000000;"  => 1 = [up]
-  printf "\e[LG2000000001F0E0400;"  => 2 = [down]
-  printf "\e[LG3040E1F001F0E0400;"  => 3 = [up-down]
-  printf "\e[LG40002060E1E0E0602;"  => 4 = [left]
-  printf "\e[LG500080C0E0F0E0C08;"  => 5 = [right]
-  printf "\e[LG60016051516141400;"  => 6 = "IP"
-
-  printf "\e[LG00103071F1F070301;"  => big speaker
-  printf "\e[LG00002061E1E060200;"  => small speaker
-
-Willy
-
-- 
cgit 


From 6f2846cc2ebae4a8c875389e3aedb0cda3c4f462 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sun, 14 Apr 2019 08:03:23 -0300
Subject: docs: lp855x-driver.txt: convert to ReST and move to kernel-api

This small file seems to be an attempt to start documenting
backlight drivers.

It contains descriptions of the controls for the driver
with could sound as an somewhat user-faced description, but
it's main focus is to describe, instead, the data that should
be passed via platform data and some driver-specific stuff.

While this is not part of the driver-api book, mark it as
:orphan:, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/backlight/lp855x-driver.rst | 83 +++++++++++++++++++++++++++++++
 Documentation/backlight/lp855x-driver.txt | 66 ------------------------
 2 files changed, 83 insertions(+), 66 deletions(-)
 create mode 100644 Documentation/backlight/lp855x-driver.rst
 delete mode 100644 Documentation/backlight/lp855x-driver.txt

(limited to 'Documentation')

diff --git a/Documentation/backlight/lp855x-driver.rst b/Documentation/backlight/lp855x-driver.rst
new file mode 100644
index 000000000000..62b7ed847a77
--- /dev/null
+++ b/Documentation/backlight/lp855x-driver.rst
@@ -0,0 +1,83 @@
+:orphan:
+
+====================
+Kernel driver lp855x
+====================
+
+Backlight driver for LP855x ICs
+
+Supported chips:
+
+	Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and
+	LP8557
+
+Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+
+Description
+-----------
+
+* Brightness control
+
+  Brightness can be controlled by the pwm input or the i2c command.
+  The lp855x driver supports both cases.
+
+* Device attributes
+
+  1) bl_ctl_mode
+
+  Backlight control mode.
+
+  Value: pwm based or register based
+
+  2) chip_id
+
+  The lp855x chip id.
+
+  Value: lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557
+
+Platform data for lp855x
+------------------------
+
+For supporting platform specific data, the lp855x platform data can be used.
+
+* name:
+	Backlight driver name. If it is not defined, default name is set.
+* device_control:
+	Value of DEVICE CONTROL register.
+* initial_brightness:
+	Initial value of backlight brightness.
+* period_ns:
+	Platform specific PWM period value. unit is nano.
+	Only valid when brightness is pwm input mode.
+* size_program:
+	Total size of lp855x_rom_data.
+* rom_data:
+	List of new eeprom/eprom registers.
+
+Examples
+========
+
+1) lp8552 platform data: i2c register mode with new eeprom data::
+
+    #define EEPROM_A5_ADDR	0xA5
+    #define EEPROM_A5_VAL	0x4f	/* EN_VSYNC=0 */
+
+    static struct lp855x_rom_data lp8552_eeprom_arr[] = {
+	{EEPROM_A5_ADDR, EEPROM_A5_VAL},
+    };
+
+    static struct lp855x_platform_data lp8552_pdata = {
+	.name = "lcd-bl",
+	.device_control = I2C_CONFIG(LP8552),
+	.initial_brightness = INITIAL_BRT,
+	.size_program = ARRAY_SIZE(lp8552_eeprom_arr),
+	.rom_data = lp8552_eeprom_arr,
+    };
+
+2) lp8556 platform data: pwm input mode with default rom data::
+
+    static struct lp855x_platform_data lp8556_pdata = {
+	.device_control = PWM_CONFIG(LP8556),
+	.initial_brightness = INITIAL_BRT,
+	.period_ns = 1000000,
+    };
diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt
deleted file mode 100644
index 01bce243d3d7..000000000000
--- a/Documentation/backlight/lp855x-driver.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-Kernel driver lp855x
-====================
-
-Backlight driver for LP855x ICs
-
-Supported chips:
-	Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and
-	LP8557
-
-Author: Milo(Woogyom) Kim <milo.kim@ti.com>
-
-Description
------------
-
-* Brightness control
-
-Brightness can be controlled by the pwm input or the i2c command.
-The lp855x driver supports both cases.
-
-* Device attributes
-
-1) bl_ctl_mode
-Backlight control mode.
-Value : pwm based or register based
-
-2) chip_id
-The lp855x chip id.
-Value : lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557
-
-Platform data for lp855x
-------------------------
-
-For supporting platform specific data, the lp855x platform data can be used.
-
-* name : Backlight driver name. If it is not defined, default name is set.
-* device_control : Value of DEVICE CONTROL register.
-* initial_brightness : Initial value of backlight brightness.
-* period_ns : Platform specific PWM period value. unit is nano.
-	     Only valid when brightness is pwm input mode.
-* size_program : Total size of lp855x_rom_data.
-* rom_data : List of new eeprom/eprom registers.
-
-example 1) lp8552 platform data : i2c register mode with new eeprom data
-
-#define EEPROM_A5_ADDR	0xA5
-#define EEPROM_A5_VAL	0x4f	/* EN_VSYNC=0 */
-
-static struct lp855x_rom_data lp8552_eeprom_arr[] = {
-	{EEPROM_A5_ADDR, EEPROM_A5_VAL},
-};
-
-static struct lp855x_platform_data lp8552_pdata = {
-	.name = "lcd-bl",
-	.device_control = I2C_CONFIG(LP8552),
-	.initial_brightness = INITIAL_BRT,
-	.size_program = ARRAY_SIZE(lp8552_eeprom_arr),
-	.rom_data = lp8552_eeprom_arr,
-};
-
-example 2) lp8556 platform data : pwm input mode with default rom data
-
-static struct lp855x_platform_data lp8556_pdata = {
-	.device_control = PWM_CONFIG(LP8556),
-	.initial_brightness = INITIAL_BRT,
-	.period_ns = 1000000,
-};
-- 
cgit 


From 23e02422877b7fac868d8610a4265003da4ac0f4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sun, 14 Apr 2019 08:27:15 -0300
Subject: docs: m68k: convert docs to ReST and rename to *.rst

Convert the m68k kernel-options.txt file to ReST.

The conversion is trivial, as the document is already on a format
close enough to ReST. Just some small adjustments were needed in
order to make it both good for being parsed while keeping it on
a good txt shape.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/admin-guide/kernel-parameters.rst |   2 +-
 Documentation/m68k/index.rst                    |  17 +
 Documentation/m68k/kernel-options.rst           | 911 ++++++++++++++++++++++++
 Documentation/m68k/kernel-options.txt           | 884 -----------------------
 4 files changed, 929 insertions(+), 885 deletions(-)
 create mode 100644 Documentation/m68k/index.rst
 create mode 100644 Documentation/m68k/kernel-options.rst
 delete mode 100644 Documentation/m68k/kernel-options.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index 5d29ba5ad88c..d05d531b4ec9 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -118,7 +118,7 @@ parameter is applicable::
 	LOOP	Loopback device support is enabled.
 	M68k	M68k architecture is enabled.
 			These options have more detailed description inside of
-			Documentation/m68k/kernel-options.txt.
+			Documentation/m68k/kernel-options.rst.
 	MDA	MDA console support is enabled.
 	MIPS	MIPS architecture is enabled.
 	MOUSE	Appropriate mouse support is enabled.
diff --git a/Documentation/m68k/index.rst b/Documentation/m68k/index.rst
new file mode 100644
index 000000000000..f3273ec075c3
--- /dev/null
+++ b/Documentation/m68k/index.rst
@@ -0,0 +1,17 @@
+:orphan:
+
+=================
+m68k Architecture
+=================
+
+.. toctree::
+   :maxdepth: 2
+
+   kernel-options
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/m68k/kernel-options.rst b/Documentation/m68k/kernel-options.rst
new file mode 100644
index 000000000000..cabd9419740d
--- /dev/null
+++ b/Documentation/m68k/kernel-options.rst
@@ -0,0 +1,911 @@
+===================================
+Command Line Options for Linux/m68k
+===================================
+
+Last Update: 2 May 1999
+
+Linux/m68k version: 2.2.6
+
+Author: Roman.Hodek@informatik.uni-erlangen.de (Roman Hodek)
+
+Update: jds@kom.auc.dk (Jes Sorensen) and faq@linux-m68k.org (Chris Lawrence)
+
+0) Introduction
+===============
+
+Often I've been asked which command line options the Linux/m68k
+kernel understands, or how the exact syntax for the ... option is, or
+... about the option ... . I hope, this document supplies all the
+answers...
+
+Note that some options might be outdated, their descriptions being
+incomplete or missing. Please update the information and send in the
+patches.
+
+
+1) Overview of the Kernel's Option Processing
+=============================================
+
+The kernel knows three kinds of options on its command line:
+
+  1) kernel options
+  2) environment settings
+  3) arguments for init
+
+To which of these classes an argument belongs is determined as
+follows: If the option is known to the kernel itself, i.e. if the name
+(the part before the '=') or, in some cases, the whole argument string
+is known to the kernel, it belongs to class 1. Otherwise, if the
+argument contains an '=', it is of class 2, and the definition is put
+into init's environment. All other arguments are passed to init as
+command line options.
+
+This document describes the valid kernel options for Linux/m68k in
+the version mentioned at the start of this file. Later revisions may
+add new such options, and some may be missing in older versions.
+
+In general, the value (the part after the '=') of an option is a
+list of values separated by commas. The interpretation of these values
+is up to the driver that "owns" the option. This association of
+options with drivers is also the reason that some are further
+subdivided.
+
+
+2) General Kernel Options
+=========================
+
+2.1) root=
+----------
+
+:Syntax: root=/dev/<device>
+:or:     root=<hex_number>
+
+This tells the kernel which device it should mount as the root
+filesystem. The device must be a block device with a valid filesystem
+on it.
+
+The first syntax gives the device by name. These names are converted
+into a major/minor number internally in the kernel in an unusual way.
+Normally, this "conversion" is done by the device files in /dev, but
+this isn't possible here, because the root filesystem (with /dev)
+isn't mounted yet... So the kernel parses the name itself, with some
+hardcoded name to number mappings. The name must always be a
+combination of two or three letters, followed by a decimal number.
+Valid names are::
+
+  /dev/ram: -> 0x0100 (initial ramdisk)
+  /dev/hda: -> 0x0300 (first IDE disk)
+  /dev/hdb: -> 0x0340 (second IDE disk)
+  /dev/sda: -> 0x0800 (first SCSI disk)
+  /dev/sdb: -> 0x0810 (second SCSI disk)
+  /dev/sdc: -> 0x0820 (third SCSI disk)
+  /dev/sdd: -> 0x0830 (forth SCSI disk)
+  /dev/sde: -> 0x0840 (fifth SCSI disk)
+  /dev/fd : -> 0x0200 (floppy disk)
+
+The name must be followed by a decimal number, that stands for the
+partition number. Internally, the value of the number is just
+added to the device number mentioned in the table above. The
+exceptions are /dev/ram and /dev/fd, where /dev/ram refers to an
+initial ramdisk loaded by your bootstrap program (please consult the
+instructions for your bootstrap program to find out how to load an
+initial ramdisk). As of kernel version 2.0.18 you must specify
+/dev/ram as the root device if you want to boot from an initial
+ramdisk. For the floppy devices, /dev/fd, the number stands for the
+floppy drive number (there are no partitions on floppy disks). I.e.,
+/dev/fd0 stands for the first drive, /dev/fd1 for the second, and so
+on. Since the number is just added, you can also force the disk format
+by adding a number greater than 3. If you look into your /dev
+directory, use can see the /dev/fd0D720 has major 2 and minor 16. You
+can specify this device for the root FS by writing "root=/dev/fd16" on
+the kernel command line.
+
+[Strange and maybe uninteresting stuff ON]
+
+This unusual translation of device names has some strange
+consequences: If, for example, you have a symbolic link from /dev/fd
+to /dev/fd0D720 as an abbreviation for floppy driver #0 in DD format,
+you cannot use this name for specifying the root device, because the
+kernel cannot see this symlink before mounting the root FS and it
+isn't in the table above. If you use it, the root device will not be
+set at all, without an error message. Another example: You cannot use a
+partition on e.g. the sixth SCSI disk as the root filesystem, if you
+want to specify it by name. This is, because only the devices up to
+/dev/sde are in the table above, but not /dev/sdf. Although, you can
+use the sixth SCSI disk for the root FS, but you have to specify the
+device by number... (see below). Or, even more strange, you can use the
+fact that there is no range checking of the partition number, and your
+knowledge that each disk uses 16 minors, and write "root=/dev/sde17"
+(for /dev/sdf1).
+
+[Strange and maybe uninteresting stuff OFF]
+
+If the device containing your root partition isn't in the table
+above, you can also specify it by major and minor numbers. These are
+written in hex, with no prefix and no separator between. E.g., if you
+have a CD with contents appropriate as a root filesystem in the first
+SCSI CD-ROM drive, you boot from it by "root=0b00". Here, hex "0b" =
+decimal 11 is the major of SCSI CD-ROMs, and the minor 0 stands for
+the first of these. You can find out all valid major numbers by
+looking into include/linux/major.h.
+
+In addition to major and minor numbers, if the device containing your
+root partition uses a partition table format with unique partition
+identifiers, then you may use them.  For instance,
+"root=PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF".  It is also
+possible to reference another partition on the same device using a
+known partition UUID as the starting point.  For example,
+if partition 5 of the device has the UUID of
+00112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as
+follows:
+
+  PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2
+
+Authoritative information can be found in
+"Documentation/admin-guide/kernel-parameters.rst".
+
+
+2.2) ro, rw
+-----------
+
+:Syntax: ro
+:or:     rw
+
+These two options tell the kernel whether it should mount the root
+filesystem read-only or read-write. The default is read-only, except
+for ramdisks, which default to read-write.
+
+
+2.3) debug
+----------
+
+:Syntax: debug
+
+This raises the kernel log level to 10 (the default is 7). This is the
+same level as set by the "dmesg" command, just that the maximum level
+selectable by dmesg is 8.
+
+
+2.4) debug=
+-----------
+
+:Syntax: debug=<device>
+
+This option causes certain kernel messages be printed to the selected
+debugging device. This can aid debugging the kernel, since the
+messages can be captured and analyzed on some other machine. Which
+devices are possible depends on the machine type. There are no checks
+for the validity of the device name. If the device isn't implemented,
+nothing happens.
+
+Messages logged this way are in general stack dumps after kernel
+memory faults or bad kernel traps, and kernel panics. To be exact: all
+messages of level 0 (panic messages) and all messages printed while
+the log level is 8 or more (their level doesn't matter). Before stack
+dumps, the kernel sets the log level to 10 automatically. A level of
+at least 8 can also be set by the "debug" command line option (see
+2.3) and at run time with "dmesg -n 8".
+
+Devices possible for Amiga:
+
+ - "ser":
+	  built-in serial port; parameters: 9600bps, 8N1
+ - "mem":
+	  Save the messages to a reserved area in chip mem. After
+          rebooting, they can be read under AmigaOS with the tool
+          'dmesg'.
+
+Devices possible for Atari:
+
+ - "ser1":
+	   ST-MFP serial port ("Modem1"); parameters: 9600bps, 8N1
+ - "ser2":
+	   SCC channel B serial port ("Modem2"); parameters: 9600bps, 8N1
+ - "ser" :
+	   default serial port
+           This is "ser2" for a Falcon, and "ser1" for any other machine
+ - "midi":
+	   The MIDI port; parameters: 31250bps, 8N1
+ - "par" :
+	   parallel port
+
+           The printing routine for this implements a timeout for the
+           case there's no printer connected (else the kernel would
+           lock up). The timeout is not exact, but usually a few
+           seconds.
+
+
+2.6) ramdisk_size=
+------------------
+
+:Syntax: ramdisk_size=<size>
+
+This option instructs the kernel to set up a ramdisk of the given
+size in KBytes. Do not use this option if the ramdisk contents are
+passed by bootstrap! In this case, the size is selected automatically
+and should not be overwritten.
+
+The only application is for root filesystems on floppy disks, that
+should be loaded into memory. To do that, select the corresponding
+size of the disk as ramdisk size, and set the root device to the disk
+drive (with "root=").
+
+
+2.7) swap=
+
+  I can't find any sign of this option in 2.2.6.
+
+2.8) buff=
+-----------
+
+  I can't find any sign of this option in 2.2.6.
+
+
+3) General Device Options (Amiga and Atari)
+===========================================
+
+3.1) ether=
+-----------
+
+:Syntax: ether=[<irq>[,<base_addr>[,<mem_start>[,<mem_end>]]]],<dev-name>
+
+<dev-name> is the name of a net driver, as specified in
+drivers/net/Space.c in the Linux source. Most prominent are eth0, ...
+eth3, sl0, ... sl3, ppp0, ..., ppp3, dummy, and lo.
+
+The non-ethernet drivers (sl, ppp, dummy, lo) obviously ignore the
+settings by this options. Also, the existing ethernet drivers for
+Linux/m68k (ariadne, a2065, hydra) don't use them because Zorro boards
+are really Plug-'n-Play, so the "ether=" option is useless altogether
+for Linux/m68k.
+
+
+3.2) hd=
+--------
+
+:Syntax: hd=<cylinders>,<heads>,<sectors>
+
+This option sets the disk geometry of an IDE disk. The first hd=
+option is for the first IDE disk, the second for the second one.
+(I.e., you can give this option twice.) In most cases, you won't have
+to use this option, since the kernel can obtain the geometry data
+itself. It exists just for the case that this fails for one of your
+disks.
+
+
+3.3) max_scsi_luns=
+-------------------
+
+:Syntax: max_scsi_luns=<n>
+
+Sets the maximum number of LUNs (logical units) of SCSI devices to
+be scanned. Valid values for <n> are between 1 and 8. Default is 8 if
+"Probe all LUNs on each SCSI device" was selected during the kernel
+configuration, else 1.
+
+
+3.4) st=
+--------
+
+:Syntax: st=<buffer_size>,[<write_thres>,[<max_buffers>]]
+
+Sets several parameters of the SCSI tape driver. <buffer_size> is
+the number of 512-byte buffers reserved for tape operations for each
+device. <write_thres> sets the number of blocks which must be filled
+to start an actual write operation to the tape. Maximum value is the
+total number of buffers. <max_buffer> limits the total number of
+buffers allocated for all tape devices.
+
+
+3.5) dmasound=
+--------------
+
+:Syntax: dmasound=[<buffers>,<buffer-size>[,<catch-radius>]]
+
+This option controls some configurations of the Linux/m68k DMA sound
+driver (Amiga and Atari): <buffers> is the number of buffers you want
+to use (minimum 4, default 4), <buffer-size> is the size of each
+buffer in kilobytes (minimum 4, default 32) and <catch-radius> says
+how much percent of error will be tolerated when setting a frequency
+(maximum 10, default 0). For example with 3% you can play 8000Hz
+AU-Files on the Falcon with its hardware frequency of 8195Hz and thus
+don't need to expand the sound.
+
+
+
+4) Options for Atari Only
+=========================
+
+4.1) video=
+-----------
+
+:Syntax: video=<fbname>:<sub-options...>
+
+The <fbname> parameter specifies the name of the frame buffer,
+eg. most atari users will want to specify `atafb` here. The
+<sub-options> is a comma-separated list of the sub-options listed
+below.
+
+NB:
+    Please notice that this option was renamed from `atavideo` to
+    `video` during the development of the 1.3.x kernels, thus you
+    might need to update your boot-scripts if upgrading to 2.x from
+    an 1.2.x kernel.
+
+NBB:
+    The behavior of video= was changed in 2.1.57 so the recommended
+    option is to specify the name of the frame buffer.
+
+4.1.1) Video Mode
+-----------------
+
+This sub-option may be any of the predefined video modes, as listed
+in atari/atafb.c in the Linux/m68k source tree. The kernel will
+activate the given video mode at boot time and make it the default
+mode, if the hardware allows. Currently defined names are:
+
+ - stlow           : 320x200x4
+ - stmid, default5 : 640x200x2
+ - sthigh, default4: 640x400x1
+ - ttlow           : 320x480x8, TT only
+ - ttmid, default1 : 640x480x4, TT only
+ - tthigh, default2: 1280x960x1, TT only
+ - vga2            : 640x480x1, Falcon only
+ - vga4            : 640x480x2, Falcon only
+ - vga16, default3 : 640x480x4, Falcon only
+ - vga256          : 640x480x8, Falcon only
+ - falh2           : 896x608x1, Falcon only
+ - falh16          : 896x608x4, Falcon only
+
+If no video mode is given on the command line, the kernel tries the
+modes names "default<n>" in turn, until one is possible with the
+hardware in use.
+
+A video mode setting doesn't make sense, if the external driver is
+activated by a "external:" sub-option.
+
+4.1.2) inverse
+--------------
+
+Invert the display. This affects both, text (consoles) and graphics
+(X) display. Usually, the background is chosen to be black. With this
+option, you can make the background white.
+
+4.1.3) font
+-----------
+
+:Syntax: font:<fontname>
+
+Specify the font to use in text modes. Currently you can choose only
+between `VGA8x8`, `VGA8x16` and `PEARL8x8`. `VGA8x8` is default, if the
+vertical size of the display is less than 400 pixel rows. Otherwise, the
+`VGA8x16` font is the default.
+
+4.1.4) `hwscroll_`
+------------------
+
+:Syntax: `hwscroll_<n>`
+
+The number of additional lines of video memory to reserve for
+speeding up the scrolling ("hardware scrolling"). Hardware scrolling
+is possible only if the kernel can set the video base address in steps
+fine enough. This is true for STE, MegaSTE, TT, and Falcon. It is not
+possible with plain STs and graphics cards (The former because the
+base address must be on a 256 byte boundary there, the latter because
+the kernel doesn't know how to set the base address at all.)
+
+By default, <n> is set to the number of visible text lines on the
+display. Thus, the amount of video memory is doubled, compared to no
+hardware scrolling. You can turn off the hardware scrolling altogether
+by setting <n> to 0.
+
+4.1.5) internal:
+----------------
+
+:Syntax: internal:<xres>;<yres>[;<xres_max>;<yres_max>;<offset>]
+
+This option specifies the capabilities of some extended internal video
+hardware, like e.g. OverScan. <xres> and <yres> give the (extended)
+dimensions of the screen.
+
+If your OverScan needs a black border, you have to write the last
+three arguments of the "internal:". <xres_max> is the maximum line
+length the hardware allows, <yres_max> the maximum number of lines.
+<offset> is the offset of the visible part of the screen memory to its
+physical start, in bytes.
+
+Often, extended interval video hardware has to be activated somehow.
+For this, see the "sw_*" options below.
+
+4.1.6) external:
+----------------
+
+:Syntax:
+  external:<xres>;<yres>;<depth>;<org>;<scrmem>[;<scrlen>[;<vgabase>
+  [;<colw>[;<coltype>[;<xres_virtual>]]]]]
+
+.. I had to break this line...
+
+This is probably the most complicated parameter... It specifies that
+you have some external video hardware (a graphics board), and how to
+use it under Linux/m68k. The kernel cannot know more about the hardware
+than you tell it here! The kernel also is unable to set or change any
+video modes, since it doesn't know about any board internal. So, you
+have to switch to that video mode before you start Linux, and cannot
+switch to another mode once Linux has started.
+
+The first 3 parameters of this sub-option should be obvious: <xres>,
+<yres> and <depth> give the dimensions of the screen and the number of
+planes (depth). The depth is the logarithm to base 2 of the number
+of colors possible. (Or, the other way round: The number of colors is
+2^depth).
+
+You have to tell the kernel furthermore how the video memory is
+organized. This is done by a letter as <org> parameter:
+
+ 'n':
+      "normal planes", i.e. one whole plane after another
+ 'i':
+      "interleaved planes", i.e. 16 bit of the first plane, than 16 bit
+      of the next, and so on... This mode is used only with the
+      built-in Atari video modes, I think there is no card that
+      supports this mode.
+ 'p':
+      "packed pixels", i.e. <depth> consecutive bits stand for all
+      planes of one pixel; this is the most common mode for 8 planes
+      (256 colors) on graphic cards
+ 't':
+      "true color" (more or less packed pixels, but without a color
+      lookup table); usually depth is 24
+
+For monochrome modes (i.e., <depth> is 1), the <org> letter has a
+different meaning:
+
+ 'n':
+      normal colors, i.e. 0=white, 1=black
+ 'i':
+      inverted colors, i.e. 0=black, 1=white
+
+The next important information about the video hardware is the base
+address of the video memory. That is given in the <scrmem> parameter,
+as a hexadecimal number with a "0x" prefix. You have to find out this
+address in the documentation of your hardware.
+
+The next parameter, <scrlen>, tells the kernel about the size of the
+video memory. If it's missing, the size is calculated from <xres>,
+<yres>, and <depth>. For now, it is not useful to write a value here.
+It would be used only for hardware scrolling (which isn't possible
+with the external driver, because the kernel cannot set the video base
+address), or for virtual resolutions under X (which the X server
+doesn't support yet). So, it's currently best to leave this field
+empty, either by ending the "external:" after the video address or by
+writing two consecutive semicolons, if you want to give a <vgabase>
+(it is allowed to leave this parameter empty).
+
+The <vgabase> parameter is optional. If it is not given, the kernel
+cannot read or write any color registers of the video hardware, and
+thus you have to set appropriate colors before you start Linux. But if
+your card is somehow VGA compatible, you can tell the kernel the base
+address of the VGA register set, so it can change the color lookup
+table. You have to look up this address in your board's documentation.
+To avoid misunderstandings: <vgabase> is the _base_ address, i.e. a 4k
+aligned address. For read/writing the color registers, the kernel
+uses the addresses vgabase+0x3c7...vgabase+0x3c9. The <vgabase>
+parameter is written in hexadecimal with a "0x" prefix, just as
+<scrmem>.
+
+<colw> is meaningful only if <vgabase> is specified. It tells the
+kernel how wide each of the color register is, i.e. the number of bits
+per single color (red/green/blue). Default is 6, another quite usual
+value is 8.
+
+Also <coltype> is used together with <vgabase>. It tells the kernel
+about the color register model of your gfx board. Currently, the types
+"vga" (which is also the default) and "mv300" (SANG MV300) are
+implemented.
+
+Parameter <xres_virtual> is required for ProMST or ET4000 cards where
+the physical linelength differs from the visible length. With ProMST,
+xres_virtual must be set to 2048. For ET4000, xres_virtual depends on the
+initialisation of the video-card.
+If you're missing a corresponding yres_virtual: the external part is legacy,
+therefore we don't support hardware-dependent functions like hardware-scroll,
+panning or blanking.
+
+4.1.7) eclock:
+--------------
+
+The external pixel clock attached to the Falcon VIDEL shifter. This
+currently works only with the ScreenWonder!
+
+4.1.8) monitorcap:
+-------------------
+
+:Syntax: monitorcap:<vmin>;<vmax>;<hmin>;<hmax>
+
+This describes the capabilities of a multisync monitor. Don't use it
+with a fixed-frequency monitor! For now, only the Falcon frame buffer
+uses the settings of "monitorcap:".
+
+<vmin> and <vmax> are the minimum and maximum, resp., vertical frequencies
+your monitor can work with, in Hz. <hmin> and <hmax> are the same for
+the horizontal frequency, in kHz.
+
+  The defaults are 58;62;31;32 (VGA compatible).
+
+  The defaults for TV/SC1224/SC1435 cover both PAL and NTSC standards.
+
+4.1.9) keep
+------------
+
+If this option is given, the framebuffer device doesn't do any video
+mode calculations and settings on its own. The only Atari fb device
+that does this currently is the Falcon.
+
+What you reach with this: Settings for unknown video extensions
+aren't overridden by the driver, so you can still use the mode found
+when booting, when the driver doesn't know to set this mode itself.
+But this also means, that you can't switch video modes anymore...
+
+An example where you may want to use "keep" is the ScreenBlaster for
+the Falcon.
+
+
+4.2) atamouse=
+--------------
+
+:Syntax: atamouse=<x-threshold>,[<y-threshold>]
+
+With this option, you can set the mouse movement reporting threshold.
+This is the number of pixels of mouse movement that have to accumulate
+before the IKBD sends a new mouse packet to the kernel. Higher values
+reduce the mouse interrupt load and thus reduce the chance of keyboard
+overruns. Lower values give a slightly faster mouse responses and
+slightly better mouse tracking.
+
+You can set the threshold in x and y separately, but usually this is
+of little practical use. If there's just one number in the option, it
+is used for both dimensions. The default value is 2 for both
+thresholds.
+
+
+4.3) ataflop=
+-------------
+
+:Syntax: ataflop=<drive type>[,<trackbuffering>[,<steprateA>[,<steprateB>]]]
+
+   The drive type may be 0, 1, or 2, for DD, HD, and ED, resp. This
+   setting affects how many buffers are reserved and which formats are
+   probed (see also below). The default is 1 (HD). Only one drive type
+   can be selected. If you have two disk drives, select the "better"
+   type.
+
+   The second parameter <trackbuffer> tells the kernel whether to use
+   track buffering (1) or not (0). The default is machine-dependent:
+   no for the Medusa and yes for all others.
+
+   With the two following parameters, you can change the default
+   steprate used for drive A and B, resp.
+
+
+4.4) atascsi=
+-------------
+
+:Syntax: atascsi=<can_queue>[,<cmd_per_lun>[,<scat-gat>[,<host-id>[,<tagged>]]]]
+
+This option sets some parameters for the Atari native SCSI driver.
+Generally, any number of arguments can be omitted from the end. And
+for each of the numbers, a negative value means "use default". The
+defaults depend on whether TT-style or Falcon-style SCSI is used.
+Below, defaults are noted as n/m, where the first value refers to
+TT-SCSI and the latter to Falcon-SCSI. If an illegal value is given
+for one parameter, an error message is printed and that one setting is
+ignored (others aren't affected).
+
+  <can_queue>:
+    This is the maximum number of SCSI commands queued internally to the
+    Atari SCSI driver. A value of 1 effectively turns off the driver
+    internal multitasking (if it causes problems). Legal values are >=
+    1. <can_queue> can be as high as you like, but values greater than
+    <cmd_per_lun> times the number of SCSI targets (LUNs) you have
+    don't make sense. Default: 16/8.
+
+  <cmd_per_lun>:
+    Maximum number of SCSI commands issued to the driver for one
+    logical unit (LUN, usually one SCSI target). Legal values start
+    from 1. If tagged queuing (see below) is not used, values greater
+    than 2 don't make sense, but waste memory. Otherwise, the maximum
+    is the number of command tags available to the driver (currently
+    32). Default: 8/1. (Note: Values > 1 seem to cause problems on a
+    Falcon, cause not yet known.)
+
+    The <cmd_per_lun> value at a great part determines the amount of
+    memory SCSI reserves for itself. The formula is rather
+    complicated, but I can give you some hints:
+
+      no scatter-gather:
+	cmd_per_lun * 232 bytes
+      full scatter-gather:
+	cmd_per_lun * approx. 17 Kbytes
+
+  <scat-gat>:
+    Size of the scatter-gather table, i.e. the number of requests
+    consecutive on the disk that can be merged into one SCSI command.
+    Legal values are between 0 and 255. Default: 255/0. Note: This
+    value is forced to 0 on a Falcon, since scatter-gather isn't
+    possible with the ST-DMA. Not using scatter-gather hurts
+    performance significantly.
+
+  <host-id>:
+    The SCSI ID to be used by the initiator (your Atari). This is
+    usually 7, the highest possible ID. Every ID on the SCSI bus must
+    be unique. Default: determined at run time: If the NV-RAM checksum
+    is valid, and bit 7 in byte 30 of the NV-RAM is set, the lower 3
+    bits of this byte are used as the host ID. (This method is defined
+    by Atari and also used by some TOS HD drivers.) If the above
+    isn't given, the default ID is 7. (both, TT and Falcon).
+
+  <tagged>:
+    0 means turn off tagged queuing support, all other values > 0 mean
+    use tagged queuing for targets that support it. Default: currently
+    off, but this may change when tagged queuing handling has been
+    proved to be reliable.
+
+    Tagged queuing means that more than one command can be issued to
+    one LUN, and the SCSI device itself orders the requests so they
+    can be performed in optimal order. Not all SCSI devices support
+    tagged queuing (:-().
+
+4.5 switches=
+-------------
+
+:Syntax: switches=<list of switches>
+
+With this option you can switch some hardware lines that are often
+used to enable/disable certain hardware extensions. Examples are
+OverScan, overclocking, ...
+
+The <list of switches> is a comma-separated list of the following
+items:
+
+  ikbd:
+	set RTS of the keyboard ACIA high
+  midi:
+	set RTS of the MIDI ACIA high
+  snd6:
+	set bit 6 of the PSG port A
+  snd7:
+	set bit 6 of the PSG port A
+
+It doesn't make sense to mention a switch more than once (no
+difference to only once), but you can give as many switches as you
+want to enable different features. The switch lines are set as early
+as possible during kernel initialization (even before determining the
+present hardware.)
+
+All of the items can also be prefixed with `ov_`, i.e. `ov_ikbd`,
+`ov_midi`, ... These options are meant for switching on an OverScan
+video extension. The difference to the bare option is that the
+switch-on is done after video initialization, and somehow synchronized
+to the HBLANK. A speciality is that ov_ikbd and ov_midi are switched
+off before rebooting, so that OverScan is disabled and TOS boots
+correctly.
+
+If you give an option both, with and without the `ov_` prefix, the
+earlier initialization (`ov_`-less) takes precedence. But the
+switching-off on reset still happens in this case.
+
+5) Options for Amiga Only:
+==========================
+
+5.1) video=
+-----------
+
+:Syntax: video=<fbname>:<sub-options...>
+
+The <fbname> parameter specifies the name of the frame buffer, valid
+options are `amifb`, `cyber`, 'virge', `retz3` and `clgen`, provided
+that the respective frame buffer devices have been compiled into the
+kernel (or compiled as loadable modules). The behavior of the <fbname>
+option was changed in 2.1.57 so it is now recommended to specify this
+option.
+
+The <sub-options> is a comma-separated list of the sub-options listed
+below. This option is organized similar to the Atari version of the
+"video"-option (4.1), but knows fewer sub-options.
+
+5.1.1) video mode
+-----------------
+
+Again, similar to the video mode for the Atari (see 4.1.1). Predefined
+modes depend on the used frame buffer device.
+
+OCS, ECS and AGA machines all use the color frame buffer. The following
+predefined video modes are available:
+
+NTSC modes:
+ - ntsc            : 640x200, 15 kHz, 60 Hz
+ - ntsc-lace       : 640x400, 15 kHz, 60 Hz interlaced
+
+PAL modes:
+ - pal             : 640x256, 15 kHz, 50 Hz
+ - pal-lace        : 640x512, 15 kHz, 50 Hz interlaced
+
+ECS modes:
+ - multiscan       : 640x480, 29 kHz, 57 Hz
+ - multiscan-lace  : 640x960, 29 kHz, 57 Hz interlaced
+ - euro36          : 640x200, 15 kHz, 72 Hz
+ - euro36-lace     : 640x400, 15 kHz, 72 Hz interlaced
+ - euro72          : 640x400, 29 kHz, 68 Hz
+ - euro72-lace     : 640x800, 29 kHz, 68 Hz interlaced
+ - super72         : 800x300, 23 kHz, 70 Hz
+ - super72-lace    : 800x600, 23 kHz, 70 Hz interlaced
+ - dblntsc-ff      : 640x400, 27 kHz, 57 Hz
+ - dblntsc-lace    : 640x800, 27 kHz, 57 Hz interlaced
+ - dblpal-ff       : 640x512, 27 kHz, 47 Hz
+ - dblpal-lace     : 640x1024, 27 kHz, 47 Hz interlaced
+ - dblntsc         : 640x200, 27 kHz, 57 Hz doublescan
+ - dblpal          : 640x256, 27 kHz, 47 Hz doublescan
+
+VGA modes:
+ - vga             : 640x480, 31 kHz, 60 Hz
+ - vga70           : 640x400, 31 kHz, 70 Hz
+
+Please notice that the ECS and VGA modes require either an ECS or AGA
+chipset, and that these modes are limited to 2-bit color for the ECS
+chipset and 8-bit color for the AGA chipset.
+
+5.1.2) depth
+------------
+
+:Syntax: depth:<nr. of bit-planes>
+
+Specify the number of bit-planes for the selected video-mode.
+
+5.1.3) inverse
+--------------
+
+Use inverted display (black on white). Functionally the same as the
+"inverse" sub-option for the Atari.
+
+5.1.4) font
+-----------
+
+:Syntax: font:<fontname>
+
+Specify the font to use in text modes. Functionally the same as the
+"font" sub-option for the Atari, except that `PEARL8x8` is used instead
+of `VGA8x8` if the vertical size of the display is less than 400 pixel
+rows.
+
+5.1.5) monitorcap:
+-------------------
+
+:Syntax: monitorcap:<vmin>;<vmax>;<hmin>;<hmax>
+
+This describes the capabilities of a multisync monitor. For now, only
+the color frame buffer uses the settings of "monitorcap:".
+
+<vmin> and <vmax> are the minimum and maximum, resp., vertical frequencies
+your monitor can work with, in Hz. <hmin> and <hmax> are the same for
+the horizontal frequency, in kHz.
+
+The defaults are 50;90;15;38 (Generic Amiga multisync monitor).
+
+
+5.2) fd_def_df0=
+----------------
+
+:Syntax: fd_def_df0=<value>
+
+Sets the df0 value for "silent" floppy drives. The value should be in
+hexadecimal with "0x" prefix.
+
+
+5.3) wd33c93=
+-------------
+
+:Syntax: wd33c93=<sub-options...>
+
+These options affect the A590/A2091, A3000 and GVP Series II SCSI
+controllers.
+
+The <sub-options> is a comma-separated list of the sub-options listed
+below.
+
+5.3.1) nosync
+-------------
+
+:Syntax: nosync:bitmask
+
+bitmask is a byte where the 1st 7 bits correspond with the 7
+possible SCSI devices. Set a bit to prevent sync negotiation on that
+device. To maintain backwards compatibility, a command-line such as
+"wd33c93=255" will be automatically translated to
+"wd33c93=nosync:0xff". The default is to disable sync negotiation for
+all devices, eg. nosync:0xff.
+
+5.3.2) period
+-------------
+
+:Syntax: period:ns
+
+`ns` is the minimum # of nanoseconds in a SCSI data transfer
+period. Default is 500; acceptable values are 250 - 1000.
+
+5.3.3) disconnect
+-----------------
+
+:Syntax: disconnect:x
+
+Specify x = 0 to never allow disconnects, 2 to always allow them.
+x = 1 does 'adaptive' disconnects, which is the default and generally
+the best choice.
+
+5.3.4) debug
+------------
+
+:Syntax: debug:x
+
+If `DEBUGGING_ON` is defined, x is a bit mask that causes various
+types of debug output to printed - see the DB_xxx defines in
+wd33c93.h.
+
+5.3.5) clock
+------------
+
+:Syntax: clock:x
+
+x = clock input in MHz for WD33c93 chip. Normal values would be from
+8 through 20. The default value depends on your hostadapter(s),
+default for the A3000 internal controller is 14, for the A2091 it's 8
+and for the GVP hostadapters it's either 8 or 14, depending on the
+hostadapter and the SCSI-clock jumper present on some GVP
+hostadapters.
+
+5.3.6) next
+-----------
+
+No argument. Used to separate blocks of keywords when there's more
+than one wd33c93-based host adapter in the system.
+
+5.3.7) nodma
+------------
+
+:Syntax: nodma:x
+
+If x is 1 (or if the option is just written as "nodma"), the WD33c93
+controller will not use DMA (= direct memory access) to access the
+Amiga's memory.  This is useful for some systems (like A3000's and
+A4000's with the A3640 accelerator, revision 3.0) that have problems
+using DMA to chip memory.  The default is 0, i.e. to use DMA if
+possible.
+
+
+5.4) gvp11=
+-----------
+
+:Syntax: gvp11=<addr-mask>
+
+The earlier versions of the GVP driver did not handle DMA
+address-mask settings correctly which made it necessary for some
+people to use this option, in order to get their GVP controller
+running under Linux. These problems have hopefully been solved and the
+use of this option is now highly unrecommended!
+
+Incorrect use can lead to unpredictable behavior, so please only use
+this option if you *know* what you are doing and have a reason to do
+so. In any case if you experience problems and need to use this
+option, please inform us about it by mailing to the Linux/68k kernel
+mailing list.
+
+The address mask set by this option specifies which addresses are
+valid for DMA with the GVP Series II SCSI controller. An address is
+valid, if no bits are set except the bits that are set in the mask,
+too.
+
+Some versions of the GVP can only DMA into a 24 bit address range,
+some can address a 25 bit address range while others can use the whole
+32 bit address range for DMA. The correct setting depends on your
+controller and should be autodetected by the driver. An example is the
+24 bit region which is specified by a mask of 0x00fffffe.
diff --git a/Documentation/m68k/kernel-options.txt b/Documentation/m68k/kernel-options.txt
deleted file mode 100644
index 79d21246c75a..000000000000
--- a/Documentation/m68k/kernel-options.txt
+++ /dev/null
@@ -1,884 +0,0 @@
-
-
-				  Command Line Options for Linux/m68k
-				  ===================================
-
-Last Update: 2 May 1999
-Linux/m68k version: 2.2.6
-Author: Roman.Hodek@informatik.uni-erlangen.de (Roman Hodek)
-Update: jds@kom.auc.dk (Jes Sorensen) and faq@linux-m68k.org (Chris Lawrence)
-
-0) Introduction
-===============
-
-  Often I've been asked which command line options the Linux/m68k
-kernel understands, or how the exact syntax for the ... option is, or
-... about the option ... . I hope, this document supplies all the
-answers...
-
-  Note that some options might be outdated, their descriptions being
-incomplete or missing. Please update the information and send in the
-patches.
-
-
-1) Overview of the Kernel's Option Processing
-=============================================
-
-The kernel knows three kinds of options on its command line:
-
-  1) kernel options
-  2) environment settings
-  3) arguments for init
-
-To which of these classes an argument belongs is determined as
-follows: If the option is known to the kernel itself, i.e. if the name
-(the part before the '=') or, in some cases, the whole argument string
-is known to the kernel, it belongs to class 1. Otherwise, if the
-argument contains an '=', it is of class 2, and the definition is put
-into init's environment. All other arguments are passed to init as
-command line options.
-
-  This document describes the valid kernel options for Linux/m68k in
-the version mentioned at the start of this file. Later revisions may
-add new such options, and some may be missing in older versions.
-
-  In general, the value (the part after the '=') of an option is a
-list of values separated by commas. The interpretation of these values
-is up to the driver that "owns" the option. This association of
-options with drivers is also the reason that some are further
-subdivided.
-
-
-2) General Kernel Options
-=========================
-
-2.1) root=
-----------
-
-Syntax: root=/dev/<device>
-    or: root=<hex_number>
-
-This tells the kernel which device it should mount as the root
-filesystem. The device must be a block device with a valid filesystem
-on it.
-
-  The first syntax gives the device by name. These names are converted
-into a major/minor number internally in the kernel in an unusual way.
-Normally, this "conversion" is done by the device files in /dev, but
-this isn't possible here, because the root filesystem (with /dev)
-isn't mounted yet... So the kernel parses the name itself, with some
-hardcoded name to number mappings. The name must always be a
-combination of two or three letters, followed by a decimal number.
-Valid names are:
-
-  /dev/ram: -> 0x0100 (initial ramdisk)
-  /dev/hda: -> 0x0300 (first IDE disk)
-  /dev/hdb: -> 0x0340 (second IDE disk)
-  /dev/sda: -> 0x0800 (first SCSI disk)
-  /dev/sdb: -> 0x0810 (second SCSI disk)
-  /dev/sdc: -> 0x0820 (third SCSI disk)
-  /dev/sdd: -> 0x0830 (forth SCSI disk)
-  /dev/sde: -> 0x0840 (fifth SCSI disk)
-  /dev/fd : -> 0x0200 (floppy disk)
-
-  The name must be followed by a decimal number, that stands for the
-partition number. Internally, the value of the number is just
-added to the device number mentioned in the table above. The
-exceptions are /dev/ram and /dev/fd, where /dev/ram refers to an
-initial ramdisk loaded by your bootstrap program (please consult the
-instructions for your bootstrap program to find out how to load an
-initial ramdisk). As of kernel version 2.0.18 you must specify
-/dev/ram as the root device if you want to boot from an initial
-ramdisk. For the floppy devices, /dev/fd, the number stands for the
-floppy drive number (there are no partitions on floppy disks). I.e.,
-/dev/fd0 stands for the first drive, /dev/fd1 for the second, and so
-on. Since the number is just added, you can also force the disk format
-by adding a number greater than 3. If you look into your /dev
-directory, use can see the /dev/fd0D720 has major 2 and minor 16. You
-can specify this device for the root FS by writing "root=/dev/fd16" on
-the kernel command line.
-
-[Strange and maybe uninteresting stuff ON]
-
-  This unusual translation of device names has some strange
-consequences: If, for example, you have a symbolic link from /dev/fd
-to /dev/fd0D720 as an abbreviation for floppy driver #0 in DD format,
-you cannot use this name for specifying the root device, because the
-kernel cannot see this symlink before mounting the root FS and it
-isn't in the table above. If you use it, the root device will not be 
-set at all, without an error message. Another example: You cannot use a
-partition on e.g. the sixth SCSI disk as the root filesystem, if you
-want to specify it by name. This is, because only the devices up to
-/dev/sde are in the table above, but not /dev/sdf. Although, you can
-use the sixth SCSI disk for the root FS, but you have to specify the
-device by number... (see below). Or, even more strange, you can use the
-fact that there is no range checking of the partition number, and your
-knowledge that each disk uses 16 minors, and write "root=/dev/sde17"
-(for /dev/sdf1).
-
-[Strange and maybe uninteresting stuff OFF]
-
-  If the device containing your root partition isn't in the table
-above, you can also specify it by major and minor numbers. These are
-written in hex, with no prefix and no separator between. E.g., if you
-have a CD with contents appropriate as a root filesystem in the first
-SCSI CD-ROM drive, you boot from it by "root=0b00". Here, hex "0b" =
-decimal 11 is the major of SCSI CD-ROMs, and the minor 0 stands for
-the first of these. You can find out all valid major numbers by
-looking into include/linux/major.h.
-
-In addition to major and minor numbers, if the device containing your
-root partition uses a partition table format with unique partition
-identifiers, then you may use them.  For instance,
-"root=PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF".  It is also
-possible to reference another partition on the same device using a
-known partition UUID as the starting point.  For example,
-if partition 5 of the device has the UUID of
-00112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as
-follows:
-  PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2
-
-Authoritative information can be found in
-"Documentation/admin-guide/kernel-parameters.rst".
-
-
-2.2) ro, rw
------------
-
-Syntax: ro
-    or: rw
-
-These two options tell the kernel whether it should mount the root
-filesystem read-only or read-write. The default is read-only, except
-for ramdisks, which default to read-write.
-
-
-2.3) debug
-----------
-
-Syntax: debug
-
-This raises the kernel log level to 10 (the default is 7). This is the
-same level as set by the "dmesg" command, just that the maximum level
-selectable by dmesg is 8.
-
-
-2.4) debug=
------------
-
-Syntax: debug=<device>
-
-This option causes certain kernel messages be printed to the selected
-debugging device. This can aid debugging the kernel, since the
-messages can be captured and analyzed on some other machine. Which
-devices are possible depends on the machine type. There are no checks
-for the validity of the device name. If the device isn't implemented,
-nothing happens.
-
-  Messages logged this way are in general stack dumps after kernel
-memory faults or bad kernel traps, and kernel panics. To be exact: all
-messages of level 0 (panic messages) and all messages printed while
-the log level is 8 or more (their level doesn't matter). Before stack
-dumps, the kernel sets the log level to 10 automatically. A level of
-at least 8 can also be set by the "debug" command line option (see
-2.3) and at run time with "dmesg -n 8".
-
-Devices possible for Amiga:
-
- - "ser": built-in serial port; parameters: 9600bps, 8N1
- - "mem": Save the messages to a reserved area in chip mem. After
-          rebooting, they can be read under AmigaOS with the tool
-          'dmesg'.
-
-Devices possible for Atari:
-
- - "ser1": ST-MFP serial port ("Modem1"); parameters: 9600bps, 8N1
- - "ser2": SCC channel B serial port ("Modem2"); parameters: 9600bps, 8N1
- - "ser" : default serial port
-           This is "ser2" for a Falcon, and "ser1" for any other machine
- - "midi": The MIDI port; parameters: 31250bps, 8N1
- - "par" : parallel port
-           The printing routine for this implements a timeout for the
-           case there's no printer connected (else the kernel would
-           lock up). The timeout is not exact, but usually a few
-           seconds.
-
-
-2.6) ramdisk_size=
--------------
-
-Syntax: ramdisk_size=<size>
-
-  This option instructs the kernel to set up a ramdisk of the given
-size in KBytes. Do not use this option if the ramdisk contents are
-passed by bootstrap! In this case, the size is selected automatically
-and should not be overwritten.
-
-  The only application is for root filesystems on floppy disks, that
-should be loaded into memory. To do that, select the corresponding
-size of the disk as ramdisk size, and set the root device to the disk
-drive (with "root=").
-
-
-2.7) swap=
-2.8) buff=
------------
-
-  I can't find any sign of these options in 2.2.6.
-
-
-3) General Device Options (Amiga and Atari)
-===========================================
-
-3.1) ether=
------------
-
-Syntax: ether=[<irq>[,<base_addr>[,<mem_start>[,<mem_end>]]]],<dev-name>
-
-  <dev-name> is the name of a net driver, as specified in
-drivers/net/Space.c in the Linux source. Most prominent are eth0, ...
-eth3, sl0, ... sl3, ppp0, ..., ppp3, dummy, and lo.
-
-  The non-ethernet drivers (sl, ppp, dummy, lo) obviously ignore the
-settings by this options. Also, the existing ethernet drivers for
-Linux/m68k (ariadne, a2065, hydra) don't use them because Zorro boards
-are really Plug-'n-Play, so the "ether=" option is useless altogether
-for Linux/m68k.
-
-
-3.2) hd=
---------
-
-Syntax: hd=<cylinders>,<heads>,<sectors>
-
-  This option sets the disk geometry of an IDE disk. The first hd=
-option is for the first IDE disk, the second for the second one.
-(I.e., you can give this option twice.) In most cases, you won't have
-to use this option, since the kernel can obtain the geometry data
-itself. It exists just for the case that this fails for one of your
-disks.
-
-
-3.3) max_scsi_luns=
--------------------
-
-Syntax: max_scsi_luns=<n>
-
-  Sets the maximum number of LUNs (logical units) of SCSI devices to
-be scanned. Valid values for <n> are between 1 and 8. Default is 8 if
-"Probe all LUNs on each SCSI device" was selected during the kernel
-configuration, else 1.
-
-
-3.4) st=
---------
-
-Syntax: st=<buffer_size>,[<write_thres>,[<max_buffers>]]
-
-  Sets several parameters of the SCSI tape driver. <buffer_size> is
-the number of 512-byte buffers reserved for tape operations for each
-device. <write_thres> sets the number of blocks which must be filled
-to start an actual write operation to the tape. Maximum value is the
-total number of buffers. <max_buffer> limits the total number of
-buffers allocated for all tape devices.
-
-
-3.5) dmasound=
---------------
-
-Syntax: dmasound=[<buffers>,<buffer-size>[,<catch-radius>]]
-
-  This option controls some configurations of the Linux/m68k DMA sound
-driver (Amiga and Atari): <buffers> is the number of buffers you want
-to use (minimum 4, default 4), <buffer-size> is the size of each
-buffer in kilobytes (minimum 4, default 32) and <catch-radius> says
-how much percent of error will be tolerated when setting a frequency
-(maximum 10, default 0). For example with 3% you can play 8000Hz
-AU-Files on the Falcon with its hardware frequency of 8195Hz and thus
-don't need to expand the sound.
-
-
-
-4) Options for Atari Only
-=========================
-
-4.1) video=
------------
-
-Syntax: video=<fbname>:<sub-options...>
-
-The <fbname> parameter specifies the name of the frame buffer,
-eg. most atari users will want to specify `atafb' here. The
-<sub-options> is a comma-separated list of the sub-options listed
-below.
-
-NB: Please notice that this option was renamed from `atavideo' to
-    `video' during the development of the 1.3.x kernels, thus you
-    might need to update your boot-scripts if upgrading to 2.x from
-    an 1.2.x kernel.
-
-NBB: The behavior of video= was changed in 2.1.57 so the recommended
-option is to specify the name of the frame buffer.
-
-4.1.1) Video Mode
------------------
-
-This sub-option may be any of the predefined video modes, as listed
-in atari/atafb.c in the Linux/m68k source tree. The kernel will
-activate the given video mode at boot time and make it the default
-mode, if the hardware allows. Currently defined names are:
-
- - stlow           : 320x200x4
- - stmid, default5 : 640x200x2
- - sthigh, default4: 640x400x1
- - ttlow           : 320x480x8, TT only
- - ttmid, default1 : 640x480x4, TT only
- - tthigh, default2: 1280x960x1, TT only
- - vga2            : 640x480x1, Falcon only
- - vga4            : 640x480x2, Falcon only
- - vga16, default3 : 640x480x4, Falcon only
- - vga256          : 640x480x8, Falcon only
- - falh2           : 896x608x1, Falcon only
- - falh16          : 896x608x4, Falcon only
-
-  If no video mode is given on the command line, the kernel tries the
-modes names "default<n>" in turn, until one is possible with the
-hardware in use.
-
-  A video mode setting doesn't make sense, if the external driver is
-activated by a "external:" sub-option.
-
-4.1.2) inverse
---------------
-
-Invert the display. This affects both, text (consoles) and graphics
-(X) display. Usually, the background is chosen to be black. With this
-option, you can make the background white.
-
-4.1.3) font
------------
-
-Syntax: font:<fontname>
-
-Specify the font to use in text modes. Currently you can choose only
-between `VGA8x8', `VGA8x16' and `PEARL8x8'. `VGA8x8' is default, if the
-vertical size of the display is less than 400 pixel rows. Otherwise, the
-`VGA8x16' font is the default.
-
-4.1.4) hwscroll_
-----------------
-
-Syntax: hwscroll_<n>
-
-The number of additional lines of video memory to reserve for
-speeding up the scrolling ("hardware scrolling"). Hardware scrolling
-is possible only if the kernel can set the video base address in steps
-fine enough. This is true for STE, MegaSTE, TT, and Falcon. It is not
-possible with plain STs and graphics cards (The former because the
-base address must be on a 256 byte boundary there, the latter because
-the kernel doesn't know how to set the base address at all.)
-
-  By default, <n> is set to the number of visible text lines on the
-display. Thus, the amount of video memory is doubled, compared to no
-hardware scrolling. You can turn off the hardware scrolling altogether
-by setting <n> to 0.
-
-4.1.5) internal:
-----------------
-
-Syntax: internal:<xres>;<yres>[;<xres_max>;<yres_max>;<offset>]
-
-This option specifies the capabilities of some extended internal video
-hardware, like e.g. OverScan. <xres> and <yres> give the (extended)
-dimensions of the screen.
-
-  If your OverScan needs a black border, you have to write the last
-three arguments of the "internal:". <xres_max> is the maximum line
-length the hardware allows, <yres_max> the maximum number of lines.
-<offset> is the offset of the visible part of the screen memory to its
-physical start, in bytes.
-
-  Often, extended interval video hardware has to be activated somehow.
-For this, see the "sw_*" options below.
-
-4.1.6) external:
-----------------
-
-Syntax:
-  external:<xres>;<yres>;<depth>;<org>;<scrmem>[;<scrlen>[;<vgabase>\
-           [;<colw>[;<coltype>[;<xres_virtual>]]]]]
-
-[I had to break this line...]
-
-  This is probably the most complicated parameter... It specifies that
-you have some external video hardware (a graphics board), and how to
-use it under Linux/m68k. The kernel cannot know more about the hardware
-than you tell it here! The kernel also is unable to set or change any
-video modes, since it doesn't know about any board internal. So, you
-have to switch to that video mode before you start Linux, and cannot
-switch to another mode once Linux has started.
-
-  The first 3 parameters of this sub-option should be obvious: <xres>,
-<yres> and <depth> give the dimensions of the screen and the number of
-planes (depth). The depth is the logarithm to base 2 of the number
-of colors possible. (Or, the other way round: The number of colors is
-2^depth).
-
-  You have to tell the kernel furthermore how the video memory is
-organized. This is done by a letter as <org> parameter:
-
- 'n': "normal planes", i.e. one whole plane after another
- 'i': "interleaved planes", i.e. 16 bit of the first plane, than 16 bit
-      of the next, and so on... This mode is used only with the
-	  built-in Atari video modes, I think there is no card that
-	  supports this mode.
- 'p': "packed pixels", i.e. <depth> consecutive bits stand for all
-	  planes of one pixel; this is the most common mode for 8 planes
-	  (256 colors) on graphic cards
- 't': "true color" (more or less packed pixels, but without a color
-	  lookup table); usually depth is 24
-
-For monochrome modes (i.e., <depth> is 1), the <org> letter has a
-different meaning:
-
- 'n': normal colors, i.e. 0=white, 1=black
- 'i': inverted colors, i.e. 0=black, 1=white
-
-  The next important information about the video hardware is the base
-address of the video memory. That is given in the <scrmem> parameter,
-as a hexadecimal number with a "0x" prefix. You have to find out this
-address in the documentation of your hardware.
-
-  The next parameter, <scrlen>, tells the kernel about the size of the
-video memory. If it's missing, the size is calculated from <xres>,
-<yres>, and <depth>. For now, it is not useful to write a value here.
-It would be used only for hardware scrolling (which isn't possible
-with the external driver, because the kernel cannot set the video base
-address), or for virtual resolutions under X (which the X server
-doesn't support yet). So, it's currently best to leave this field
-empty, either by ending the "external:" after the video address or by
-writing two consecutive semicolons, if you want to give a <vgabase>
-(it is allowed to leave this parameter empty).
-
-  The <vgabase> parameter is optional. If it is not given, the kernel
-cannot read or write any color registers of the video hardware, and
-thus you have to set appropriate colors before you start Linux. But if
-your card is somehow VGA compatible, you can tell the kernel the base
-address of the VGA register set, so it can change the color lookup
-table. You have to look up this address in your board's documentation.
-To avoid misunderstandings: <vgabase> is the _base_ address, i.e. a 4k
-aligned address. For read/writing the color registers, the kernel
-uses the addresses vgabase+0x3c7...vgabase+0x3c9. The <vgabase>
-parameter is written in hexadecimal with a "0x" prefix, just as
-<scrmem>.
-
-  <colw> is meaningful only if <vgabase> is specified. It tells the
-kernel how wide each of the color register is, i.e. the number of bits
-per single color (red/green/blue). Default is 6, another quite usual
-value is 8.
-
-  Also <coltype> is used together with <vgabase>. It tells the kernel
-about the color register model of your gfx board. Currently, the types
-"vga" (which is also the default) and "mv300" (SANG MV300) are
-implemented.
-
-  Parameter <xres_virtual> is required for ProMST or ET4000 cards where
-the physical linelength differs from the visible length. With ProMST, 
-xres_virtual must be set to 2048. For ET4000, xres_virtual depends on the
-initialisation of the video-card.
-If you're missing a corresponding yres_virtual: the external part is legacy,
-therefore we don't support hardware-dependent functions like hardware-scroll,
-panning or blanking.
-
-4.1.7) eclock:
---------------
-
-The external pixel clock attached to the Falcon VIDEL shifter. This
-currently works only with the ScreenWonder!
-
-4.1.8) monitorcap:
--------------------
-
-Syntax: monitorcap:<vmin>;<vmax>;<hmin>;<hmax>
-
-This describes the capabilities of a multisync monitor. Don't use it
-with a fixed-frequency monitor! For now, only the Falcon frame buffer
-uses the settings of "monitorcap:".
-
-  <vmin> and <vmax> are the minimum and maximum, resp., vertical frequencies
-your monitor can work with, in Hz. <hmin> and <hmax> are the same for
-the horizontal frequency, in kHz.
-
-  The defaults are 58;62;31;32 (VGA compatible).
-
-  The defaults for TV/SC1224/SC1435 cover both PAL and NTSC standards.
-
-4.1.9) keep
-------------
-
-If this option is given, the framebuffer device doesn't do any video
-mode calculations and settings on its own. The only Atari fb device
-that does this currently is the Falcon.
-
-  What you reach with this: Settings for unknown video extensions
-aren't overridden by the driver, so you can still use the mode found
-when booting, when the driver doesn't know to set this mode itself.
-But this also means, that you can't switch video modes anymore...
-
-  An example where you may want to use "keep" is the ScreenBlaster for
-the Falcon.
-
-
-4.2) atamouse=
---------------
-
-Syntax: atamouse=<x-threshold>,[<y-threshold>]
-
-  With this option, you can set the mouse movement reporting threshold.
-This is the number of pixels of mouse movement that have to accumulate
-before the IKBD sends a new mouse packet to the kernel. Higher values
-reduce the mouse interrupt load and thus reduce the chance of keyboard
-overruns. Lower values give a slightly faster mouse responses and
-slightly better mouse tracking.
-
-  You can set the threshold in x and y separately, but usually this is
-of little practical use. If there's just one number in the option, it
-is used for both dimensions. The default value is 2 for both
-thresholds.
-
-
-4.3) ataflop=
--------------
-
-Syntax: ataflop=<drive type>[,<trackbuffering>[,<steprateA>[,<steprateB>]]]
-
-   The drive type may be 0, 1, or 2, for DD, HD, and ED, resp. This
-   setting affects how many buffers are reserved and which formats are
-   probed (see also below). The default is 1 (HD). Only one drive type
-   can be selected. If you have two disk drives, select the "better"
-   type.
-
-   The second parameter <trackbuffer> tells the kernel whether to use
-   track buffering (1) or not (0). The default is machine-dependent:
-   no for the Medusa and yes for all others.
-
-   With the two following parameters, you can change the default
-   steprate used for drive A and B, resp. 
-
-
-4.4) atascsi=
--------------
-
-Syntax: atascsi=<can_queue>[,<cmd_per_lun>[,<scat-gat>[,<host-id>[,<tagged>]]]]
-
-  This option sets some parameters for the Atari native SCSI driver.
-Generally, any number of arguments can be omitted from the end. And
-for each of the numbers, a negative value means "use default". The
-defaults depend on whether TT-style or Falcon-style SCSI is used.
-Below, defaults are noted as n/m, where the first value refers to
-TT-SCSI and the latter to Falcon-SCSI. If an illegal value is given
-for one parameter, an error message is printed and that one setting is
-ignored (others aren't affected).
-
-  <can_queue>:
-    This is the maximum number of SCSI commands queued internally to the
-    Atari SCSI driver. A value of 1 effectively turns off the driver
-    internal multitasking (if it causes problems). Legal values are >=
-    1. <can_queue> can be as high as you like, but values greater than
-    <cmd_per_lun> times the number of SCSI targets (LUNs) you have
-    don't make sense. Default: 16/8.
-
-  <cmd_per_lun>:
-    Maximum number of SCSI commands issued to the driver for one
-    logical unit (LUN, usually one SCSI target). Legal values start
-    from 1. If tagged queuing (see below) is not used, values greater
-    than 2 don't make sense, but waste memory. Otherwise, the maximum
-    is the number of command tags available to the driver (currently
-    32). Default: 8/1. (Note: Values > 1 seem to cause problems on a
-    Falcon, cause not yet known.)
-
-      The <cmd_per_lun> value at a great part determines the amount of
-    memory SCSI reserves for itself. The formula is rather
-    complicated, but I can give you some hints:
-      no scatter-gather  : cmd_per_lun * 232 bytes
-      full scatter-gather: cmd_per_lun * approx. 17 Kbytes
-
-  <scat-gat>:
-    Size of the scatter-gather table, i.e. the number of requests
-    consecutive on the disk that can be merged into one SCSI command.
-    Legal values are between 0 and 255. Default: 255/0. Note: This
-    value is forced to 0 on a Falcon, since scatter-gather isn't
-    possible with the ST-DMA. Not using scatter-gather hurts
-    performance significantly.
-
-  <host-id>:
-    The SCSI ID to be used by the initiator (your Atari). This is
-    usually 7, the highest possible ID. Every ID on the SCSI bus must
-    be unique. Default: determined at run time: If the NV-RAM checksum
-    is valid, and bit 7 in byte 30 of the NV-RAM is set, the lower 3
-    bits of this byte are used as the host ID. (This method is defined
-    by Atari and also used by some TOS HD drivers.) If the above
-    isn't given, the default ID is 7. (both, TT and Falcon).
-
-  <tagged>:
-    0 means turn off tagged queuing support, all other values > 0 mean
-    use tagged queuing for targets that support it. Default: currently
-    off, but this may change when tagged queuing handling has been
-    proved to be reliable.
-
-    Tagged queuing means that more than one command can be issued to
-    one LUN, and the SCSI device itself orders the requests so they
-    can be performed in optimal order. Not all SCSI devices support
-    tagged queuing (:-().
-
-4.5 switches=
--------------
-
-Syntax: switches=<list of switches>
-
-  With this option you can switch some hardware lines that are often
-used to enable/disable certain hardware extensions. Examples are
-OverScan, overclocking, ...
-
-  The <list of switches> is a comma-separated list of the following
-items:
-
-  ikbd: set RTS of the keyboard ACIA high
-  midi: set RTS of the MIDI ACIA high
-  snd6: set bit 6 of the PSG port A
-  snd7: set bit 6 of the PSG port A
-
-It doesn't make sense to mention a switch more than once (no
-difference to only once), but you can give as many switches as you
-want to enable different features. The switch lines are set as early
-as possible during kernel initialization (even before determining the
-present hardware.)
-
-  All of the items can also be prefixed with "ov_", i.e. "ov_ikbd",
-"ov_midi", ... These options are meant for switching on an OverScan
-video extension. The difference to the bare option is that the
-switch-on is done after video initialization, and somehow synchronized
-to the HBLANK. A speciality is that ov_ikbd and ov_midi are switched
-off before rebooting, so that OverScan is disabled and TOS boots
-correctly.
-
-  If you give an option both, with and without the "ov_" prefix, the
-earlier initialization ("ov_"-less) takes precedence. But the
-switching-off on reset still happens in this case.
-
-5) Options for Amiga Only:
-==========================
-
-5.1) video=
------------
-
-Syntax: video=<fbname>:<sub-options...>
-
-The <fbname> parameter specifies the name of the frame buffer, valid
-options are `amifb', `cyber', 'virge', `retz3' and `clgen', provided
-that the respective frame buffer devices have been compiled into the
-kernel (or compiled as loadable modules). The behavior of the <fbname>
-option was changed in 2.1.57 so it is now recommended to specify this
-option.
-
-The <sub-options> is a comma-separated list of the sub-options listed
-below. This option is organized similar to the Atari version of the
-"video"-option (4.1), but knows fewer sub-options.
-
-5.1.1) video mode
------------------
-
-Again, similar to the video mode for the Atari (see 4.1.1). Predefined
-modes depend on the used frame buffer device.
-
-OCS, ECS and AGA machines all use the color frame buffer. The following
-predefined video modes are available:
-
-NTSC modes:
- - ntsc            : 640x200, 15 kHz, 60 Hz
- - ntsc-lace       : 640x400, 15 kHz, 60 Hz interlaced
-PAL modes:
- - pal             : 640x256, 15 kHz, 50 Hz
- - pal-lace        : 640x512, 15 kHz, 50 Hz interlaced
-ECS modes:
- - multiscan       : 640x480, 29 kHz, 57 Hz
- - multiscan-lace  : 640x960, 29 kHz, 57 Hz interlaced
- - euro36          : 640x200, 15 kHz, 72 Hz
- - euro36-lace     : 640x400, 15 kHz, 72 Hz interlaced
- - euro72          : 640x400, 29 kHz, 68 Hz
- - euro72-lace     : 640x800, 29 kHz, 68 Hz interlaced
- - super72         : 800x300, 23 kHz, 70 Hz
- - super72-lace    : 800x600, 23 kHz, 70 Hz interlaced
- - dblntsc-ff      : 640x400, 27 kHz, 57 Hz
- - dblntsc-lace    : 640x800, 27 kHz, 57 Hz interlaced
- - dblpal-ff       : 640x512, 27 kHz, 47 Hz
- - dblpal-lace     : 640x1024, 27 kHz, 47 Hz interlaced
- - dblntsc         : 640x200, 27 kHz, 57 Hz doublescan
- - dblpal          : 640x256, 27 kHz, 47 Hz doublescan
-VGA modes:
- - vga             : 640x480, 31 kHz, 60 Hz
- - vga70           : 640x400, 31 kHz, 70 Hz
-
-Please notice that the ECS and VGA modes require either an ECS or AGA
-chipset, and that these modes are limited to 2-bit color for the ECS
-chipset and 8-bit color for the AGA chipset.
-
-5.1.2) depth
-------------
-
-Syntax: depth:<nr. of bit-planes>
-
-Specify the number of bit-planes for the selected video-mode.
-
-5.1.3) inverse
---------------
-
-Use inverted display (black on white). Functionally the same as the
-"inverse" sub-option for the Atari.
-
-5.1.4) font
------------
-
-Syntax: font:<fontname>
-
-Specify the font to use in text modes. Functionally the same as the
-"font" sub-option for the Atari, except that `PEARL8x8' is used instead
-of `VGA8x8' if the vertical size of the display is less than 400 pixel
-rows.
-
-5.1.5) monitorcap:
--------------------
-
-Syntax: monitorcap:<vmin>;<vmax>;<hmin>;<hmax>
-
-This describes the capabilities of a multisync monitor. For now, only
-the color frame buffer uses the settings of "monitorcap:".
-
-  <vmin> and <vmax> are the minimum and maximum, resp., vertical frequencies
-your monitor can work with, in Hz. <hmin> and <hmax> are the same for
-the horizontal frequency, in kHz.
-
-  The defaults are 50;90;15;38 (Generic Amiga multisync monitor).
-
-
-5.2) fd_def_df0=
-----------------
-
-Syntax: fd_def_df0=<value>
-
-Sets the df0 value for "silent" floppy drives. The value should be in
-hexadecimal with "0x" prefix.
-
-
-5.3) wd33c93=
--------------
-
-Syntax: wd33c93=<sub-options...>
-
-These options affect the A590/A2091, A3000 and GVP Series II SCSI
-controllers.
-
-The <sub-options> is a comma-separated list of the sub-options listed
-below.
-
-5.3.1) nosync
--------------
-
-Syntax: nosync:bitmask
-
-  bitmask is a byte where the 1st 7 bits correspond with the 7
-possible SCSI devices. Set a bit to prevent sync negotiation on that
-device. To maintain backwards compatibility, a command-line such as
-"wd33c93=255" will be automatically translated to
-"wd33c93=nosync:0xff". The default is to disable sync negotiation for
-all devices, eg. nosync:0xff.
-
-5.3.2) period
--------------
-
-Syntax: period:ns
-
-  `ns' is the minimum # of nanoseconds in a SCSI data transfer
-period. Default is 500; acceptable values are 250 - 1000.
-
-5.3.3) disconnect
------------------
-
-Syntax: disconnect:x
-
-  Specify x = 0 to never allow disconnects, 2 to always allow them.
-x = 1 does 'adaptive' disconnects, which is the default and generally
-the best choice.
-
-5.3.4) debug
-------------
-
-Syntax: debug:x
-
-  If `DEBUGGING_ON' is defined, x is a bit mask that causes various
-types of debug output to printed - see the DB_xxx defines in
-wd33c93.h.
-
-5.3.5) clock
-------------
-
-Syntax: clock:x
-
-  x = clock input in MHz for WD33c93 chip. Normal values would be from
-8 through 20. The default value depends on your hostadapter(s),
-default for the A3000 internal controller is 14, for the A2091 it's 8
-and for the GVP hostadapters it's either 8 or 14, depending on the
-hostadapter and the SCSI-clock jumper present on some GVP
-hostadapters.
-
-5.3.6) next
------------
-
-  No argument. Used to separate blocks of keywords when there's more
-than one wd33c93-based host adapter in the system.
-
-5.3.7) nodma
-------------
-
-Syntax: nodma:x
-
-  If x is 1 (or if the option is just written as "nodma"), the WD33c93
-controller will not use DMA (= direct memory access) to access the
-Amiga's memory.  This is useful for some systems (like A3000's and
-A4000's with the A3640 accelerator, revision 3.0) that have problems
-using DMA to chip memory.  The default is 0, i.e. to use DMA if
-possible.
-
-
-5.4) gvp11=
------------
-
-Syntax: gvp11=<addr-mask>
-
-  The earlier versions of the GVP driver did not handle DMA
-address-mask settings correctly which made it necessary for some
-people to use this option, in order to get their GVP controller
-running under Linux. These problems have hopefully been solved and the
-use of this option is now highly unrecommended!
-
-  Incorrect use can lead to unpredictable behavior, so please only use
-this option if you *know* what you are doing and have a reason to do
-so. In any case if you experience problems and need to use this
-option, please inform us about it by mailing to the Linux/68k kernel
-mailing list.
-
-  The address mask set by this option specifies which addresses are
-valid for DMA with the GVP Series II SCSI controller. An address is
-valid, if no bits are set except the bits that are set in the mask,
-too.
-
-  Some versions of the GVP can only DMA into a 24 bit address range,
-some can address a 25 bit address range while others can use the whole
-32 bit address range for DMA. The correct setting depends on your
-controller and should be autodetected by the driver. An example is the
-24 bit region which is specified by a mask of 0x00fffffe.
-
-
-/* Local Variables: */
-/* mode: text       */
-/* End:             */
-- 
cgit 


From 01c0aa794305ae08eb977d0719e43577e93f9ef5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sun, 14 Apr 2019 08:37:58 -0300
Subject: docs: cma/debugfs.txt: convert docs to ReST and rename to *.rst

The debugfs interface for CMA should be there together with other
mm-related documents.

Convert this small file to ReST and move it to its rightful place.

The conversion is actually quite simple: just add a title for the
document. In order to make it to look better for the audience,
also mark the "echo" command as a literal block.

While this is not part of any book, mark it as :orphan:,
in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/cma/debugfs.rst | 27 +++++++++++++++++++++++++++
 Documentation/cma/debugfs.txt | 21 ---------------------
 2 files changed, 27 insertions(+), 21 deletions(-)
 create mode 100644 Documentation/cma/debugfs.rst
 delete mode 100644 Documentation/cma/debugfs.txt

(limited to 'Documentation')

diff --git a/Documentation/cma/debugfs.rst b/Documentation/cma/debugfs.rst
new file mode 100644
index 000000000000..518fe401b5ee
--- /dev/null
+++ b/Documentation/cma/debugfs.rst
@@ -0,0 +1,27 @@
+:orphan:
+
+=====================
+CMA Debugfs Interface
+=====================
+
+The CMA debugfs interface is useful to retrieve basic information out of the
+different CMA areas and to test allocation/release in each of the areas.
+
+Each CMA zone represents a directory under <debugfs>/cma/, indexed by the
+kernel's CMA index. So the first CMA zone would be:
+
+	<debugfs>/cma/cma-0
+
+The structure of the files created under that directory is as follows:
+
+ - [RO] base_pfn: The base PFN (Page Frame Number) of the zone.
+ - [RO] count: Amount of memory in the CMA area.
+ - [RO] order_per_bit: Order of pages represented by one bit.
+ - [RO] bitmap: The bitmap of page states in the zone.
+ - [WO] alloc: Allocate N pages from that CMA area. For example::
+
+	echo 5 > <debugfs>/cma/cma-2/alloc
+
+would try to allocate 5 pages from the cma-2 area.
+
+ - [WO] free: Free N pages from that CMA area, similar to the above.
diff --git a/Documentation/cma/debugfs.txt b/Documentation/cma/debugfs.txt
deleted file mode 100644
index 6cef20a8cedc..000000000000
--- a/Documentation/cma/debugfs.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-The CMA debugfs interface is useful to retrieve basic information out of the
-different CMA areas and to test allocation/release in each of the areas.
-
-Each CMA zone represents a directory under <debugfs>/cma/, indexed by the
-kernel's CMA index. So the first CMA zone would be:
-
-	<debugfs>/cma/cma-0
-
-The structure of the files created under that directory is as follows:
-
- - [RO] base_pfn: The base PFN (Page Frame Number) of the zone.
- - [RO] count: Amount of memory in the CMA area.
- - [RO] order_per_bit: Order of pages represented by one bit.
- - [RO] bitmap: The bitmap of page states in the zone.
- - [WO] alloc: Allocate N pages from that CMA area. For example:
-
-	echo 5 > <debugfs>/cma/cma-2/alloc
-
-would try to allocate 5 pages from the cma-2 area.
-
- - [WO] free: Free N pages from that CMA area, similar to the above.
-- 
cgit 


From 8db8acee4b326bfd5bc9a164a7f9ef844ec0fd2e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sun, 14 Apr 2019 08:44:17 -0300
Subject: docs: console.txt: convert docs to ReST and rename to *.rst

Convert this small file to ReST in preparation for adding it to
the driver-api book.

While this is not part of the driver-api book, mark it as
:orphan:, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 Documentation/console/console.rst | 152 ++++++++++++++++++++++++++++++++++++++
 Documentation/console/console.txt | 145 ------------------------------------
 Documentation/fb/fbcon.rst        |   4 +-
 3 files changed, 154 insertions(+), 147 deletions(-)
 create mode 100644 Documentation/console/console.rst
 delete mode 100644 Documentation/console/console.txt

(limited to 'Documentation')

diff --git a/Documentation/console/console.rst b/Documentation/console/console.rst
new file mode 100644
index 000000000000..b374141b027e
--- /dev/null
+++ b/Documentation/console/console.rst
@@ -0,0 +1,152 @@
+:orphan:
+
+===============
+Console Drivers
+===============
+
+The Linux kernel has 2 general types of console drivers.  The first type is
+assigned by the kernel to all the virtual consoles during the boot process.
+This type will be called 'system driver', and only one system driver is allowed
+to exist. The system driver is persistent and it can never be unloaded, though
+it may become inactive.
+
+The second type has to be explicitly loaded and unloaded. This will be called
+'modular driver' by this document. Multiple modular drivers can coexist at
+any time with each driver sharing the console with other drivers including
+the system driver. However, modular drivers cannot take over the console
+that is currently occupied by another modular driver. (Exception: Drivers that
+call do_take_over_console() will succeed in the takeover regardless of the type
+of driver occupying the consoles.) They can only take over the console that is
+occupied by the system driver. In the same token, if the modular driver is
+released by the console, the system driver will take over.
+
+Modular drivers, from the programmer's point of view, have to call::
+
+	 do_take_over_console() - load and bind driver to console layer
+	 give_up_console() - unload driver; it will only work if driver
+			     is fully unbound
+
+In newer kernels, the following are also available::
+
+	 do_register_con_driver()
+	 do_unregister_con_driver()
+
+If sysfs is enabled, the contents of /sys/class/vtconsole can be
+examined. This shows the console backends currently registered by the
+system which are named vtcon<n> where <n> is an integer from 0 to 15.
+Thus::
+
+       ls /sys/class/vtconsole
+       .  ..  vtcon0  vtcon1
+
+Each directory in /sys/class/vtconsole has 3 files::
+
+     ls /sys/class/vtconsole/vtcon0
+     .  ..  bind  name  uevent
+
+What do these files signify?
+
+     1. bind - this is a read/write file. It shows the status of the driver if
+        read, or acts to bind or unbind the driver to the virtual consoles
+        when written to. The possible values are:
+
+	0
+	  - means the driver is not bound and if echo'ed, commands the driver
+	    to unbind
+
+        1
+	  - means the driver is bound and if echo'ed, commands the driver to
+	    bind
+
+     2. name - read-only file. Shows the name of the driver in this format::
+
+	  cat /sys/class/vtconsole/vtcon0/name
+	  (S) VGA+
+
+	      '(S)' stands for a (S)ystem driver, i.e., it cannot be directly
+	      commanded to bind or unbind
+
+	      'VGA+' is the name of the driver
+
+	  cat /sys/class/vtconsole/vtcon1/name
+	  (M) frame buffer device
+
+	      In this case, '(M)' stands for a (M)odular driver, one that can be
+	      directly commanded to bind or unbind.
+
+     3. uevent - ignore this file
+
+When unbinding, the modular driver is detached first, and then the system
+driver takes over the consoles vacated by the driver. Binding, on the other
+hand, will bind the driver to the consoles that are currently occupied by a
+system driver.
+
+NOTE1:
+  Binding and unbinding must be selected in Kconfig. It's under::
+
+    Device Drivers ->
+	Character devices ->
+		Support for binding and unbinding console drivers
+
+NOTE2:
+  If any of the virtual consoles are in KD_GRAPHICS mode, then binding or
+  unbinding will not succeed. An example of an application that sets the
+  console to KD_GRAPHICS is X.
+
+How useful is this feature? This is very useful for console driver
+developers. By unbinding the driver from the console layer, one can unload the
+driver, make changes, recompile, reload and rebind the driver without any need
+for rebooting the kernel. For regular users who may want to switch from
+framebuffer console to VGA console and vice versa, this feature also makes
+this possible. (NOTE NOTE NOTE: Please read fbcon.txt under Documentation/fb
+for more details.)
+
+Notes for developers
+====================
+
+do_take_over_console() is now broken up into::
+
+     do_register_con_driver()
+     do_bind_con_driver() - private function
+
+give_up_console() is a wrapper to do_unregister_con_driver(), and a driver must
+be fully unbound for this call to succeed. con_is_bound() will check if the
+driver is bound or not.
+
+Guidelines for console driver writers
+=====================================
+
+In order for binding to and unbinding from the console to properly work,
+console drivers must follow these guidelines:
+
+1. All drivers, except system drivers, must call either do_register_con_driver()
+   or do_take_over_console(). do_register_con_driver() will just add the driver
+   to the console's internal list. It won't take over the
+   console. do_take_over_console(), as it name implies, will also take over (or
+   bind to) the console.
+
+2. All resources allocated during con->con_init() must be released in
+   con->con_deinit().
+
+3. All resources allocated in con->con_startup() must be released when the
+   driver, which was previously bound, becomes unbound.  The console layer
+   does not have a complementary call to con->con_startup() so it's up to the
+   driver to check when it's legal to release these resources. Calling
+   con_is_bound() in con->con_deinit() will help.  If the call returned
+   false(), then it's safe to release the resources.  This balance has to be
+   ensured because con->con_startup() can be called again when a request to
+   rebind the driver to the console arrives.
+
+4. Upon exit of the driver, ensure that the driver is totally unbound. If the
+   condition is satisfied, then the driver must call do_unregister_con_driver()
+   or give_up_console().
+
+5. do_unregister_con_driver() can also be called on conditions which make it
+   impossible for the driver to service console requests.  This can happen
+   with the framebuffer console that suddenly lost all of its drivers.
+
+The current crop of console drivers should still work correctly, but binding
+and unbinding them may cause problems. With minimal fixes, these drivers can
+be made to work correctly.
+
+Antonino Daplas <adaplas@pol.net>
diff --git a/Documentation/console/console.txt b/Documentation/console/console.txt
deleted file mode 100644
index d73c2ab4beda..000000000000
--- a/Documentation/console/console.txt
+++ /dev/null
@@ -1,145 +0,0 @@
-Console Drivers
-===============
-
-The Linux kernel has 2 general types of console drivers.  The first type is
-assigned by the kernel to all the virtual consoles during the boot process.
-This type will be called 'system driver', and only one system driver is allowed
-to exist. The system driver is persistent and it can never be unloaded, though
-it may become inactive.
-
-The second type has to be explicitly loaded and unloaded. This will be called
-'modular driver' by this document. Multiple modular drivers can coexist at
-any time with each driver sharing the console with other drivers including
-the system driver. However, modular drivers cannot take over the console
-that is currently occupied by another modular driver. (Exception: Drivers that
-call do_take_over_console() will succeed in the takeover regardless of the type
-of driver occupying the consoles.) They can only take over the console that is
-occupied by the system driver. In the same token, if the modular driver is
-released by the console, the system driver will take over.
-
-Modular drivers, from the programmer's point of view, have to call:
-
-	 do_take_over_console() - load and bind driver to console layer
-	 give_up_console() - unload driver; it will only work if driver
-			     is fully unbound
-
-In newer kernels, the following are also available:
-
-	 do_register_con_driver()
-	 do_unregister_con_driver()
-
-If sysfs is enabled, the contents of /sys/class/vtconsole can be
-examined. This shows the console backends currently registered by the
-system which are named vtcon<n> where <n> is an integer from 0 to 15. Thus:
-
-       ls /sys/class/vtconsole
-       .  ..  vtcon0  vtcon1
-
-Each directory in /sys/class/vtconsole has 3 files:
-
-     ls /sys/class/vtconsole/vtcon0
-     .  ..  bind  name  uevent
-
-What do these files signify?
-
-     1. bind - this is a read/write file. It shows the status of the driver if
-        read, or acts to bind or unbind the driver to the virtual consoles
-        when written to. The possible values are:
-
-	0 - means the driver is not bound and if echo'ed, commands the driver
-	    to unbind
-
-        1 - means the driver is bound and if echo'ed, commands the driver to
-	    bind
-
-     2. name - read-only file. Shows the name of the driver in this format:
-
-	cat /sys/class/vtconsole/vtcon0/name
-	(S) VGA+
-
-	    '(S)' stands for a (S)ystem driver, i.e., it cannot be directly
-	    commanded to bind or unbind
-
-	    'VGA+' is the name of the driver
-
-	cat /sys/class/vtconsole/vtcon1/name
-	(M) frame buffer device
-
-	    In this case, '(M)' stands for a (M)odular driver, one that can be
-	    directly commanded to bind or unbind.
-
-     3. uevent - ignore this file
-
-When unbinding, the modular driver is detached first, and then the system
-driver takes over the consoles vacated by the driver. Binding, on the other
-hand, will bind the driver to the consoles that are currently occupied by a
-system driver.
-
-NOTE1: Binding and unbinding must be selected in Kconfig. It's under:
-
-Device Drivers -> Character devices -> Support for binding and unbinding
-console drivers
-
-NOTE2: If any of the virtual consoles are in KD_GRAPHICS mode, then binding or
-unbinding will not succeed. An example of an application that sets the console
-to KD_GRAPHICS is X.
-
-How useful is this feature? This is very useful for console driver
-developers. By unbinding the driver from the console layer, one can unload the
-driver, make changes, recompile, reload and rebind the driver without any need
-for rebooting the kernel. For regular users who may want to switch from
-framebuffer console to VGA console and vice versa, this feature also makes
-this possible. (NOTE NOTE NOTE: Please read fbcon.txt under Documentation/fb
-for more details.)
-
-Notes for developers:
-=====================
-
-do_take_over_console() is now broken up into:
-
-     do_register_con_driver()
-     do_bind_con_driver() - private function
-
-give_up_console() is a wrapper to do_unregister_con_driver(), and a driver must
-be fully unbound for this call to succeed. con_is_bound() will check if the
-driver is bound or not.
-
-Guidelines for console driver writers:
-=====================================
-
-In order for binding to and unbinding from the console to properly work,
-console drivers must follow these guidelines:
-
-1. All drivers, except system drivers, must call either do_register_con_driver()
-   or do_take_over_console(). do_register_con_driver() will just add the driver
-   to the console's internal list. It won't take over the
-   console. do_take_over_console(), as it name implies, will also take over (or
-   bind to) the console.
-
-2. All resources allocated during con->con_init() must be released in
-   con->con_deinit().
-
-3. All resources allocated in con->con_startup() must be released when the
-   driver, which was previously bound, becomes unbound.  The console layer
-   does not have a complementary call to con->con_startup() so it's up to the
-   driver to check when it's legal to release these resources. Calling
-   con_is_bound() in con->con_deinit() will help.  If the call returned
-   false(), then it's safe to release the resources.  This balance has to be
-   ensured because con->con_startup() can be called again when a request to
-   rebind the driver to the console arrives.
-
-4. Upon exit of the driver, ensure that the driver is totally unbound. If the
-   condition is satisfied, then the driver must call do_unregister_con_driver()
-   or give_up_console().
-
-5. do_unregister_con_driver() can also be called on conditions which make it
-   impossible for the driver to service console requests.  This can happen
-   with the framebuffer console that suddenly lost all of its drivers.
-
-The current crop of console drivers should still work correctly, but binding
-and unbinding them may cause problems. With minimal fixes, these drivers can
-be made to work correctly.
-
-==========================
-Antonino Daplas <adaplas@pol.net>
-
diff --git a/Documentation/fb/fbcon.rst b/Documentation/fb/fbcon.rst
index 1da65b9000de..26bc5cdaabab 100644
--- a/Documentation/fb/fbcon.rst
+++ b/Documentation/fb/fbcon.rst
@@ -187,7 +187,7 @@ the hardware. Thus, in a VGA console::
 Assuming the VGA driver can be unloaded, one must first unbind the VGA driver
 from the console layer before unloading the driver.  The VGA driver cannot be
 unloaded if it is still bound to the console layer. (See
-Documentation/console/console.txt for more information).
+Documentation/console/console.rst for more information).
 
 This is more complicated in the case of the framebuffer console (fbcon),
 because fbcon is an intermediate layer between the console and the drivers::
@@ -204,7 +204,7 @@ fbcon. Thus, there is no need to explicitly unbind the fbdev drivers from
 fbcon.
 
 So, how do we unbind fbcon from the console? Part of the answer is in
-Documentation/console/console.txt. To summarize:
+Documentation/console/console.rst. To summarize:
 
 Echo a value to the bind file that represents the framebuffer console
 driver. So assuming vtcon1 represents fbcon, then::
-- 
cgit 


From 93d2c159673325624ef3f2d14ededfcdf76f948b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sun, 14 Apr 2019 08:50:59 -0300
Subject: docs: pti_intel_mid.txt: convert it to pti_intel_mid.rst

Convert this small file to ReST format and rename it.

Most of the conversion were related to adjusting whitespaces
in order for each section to be properly parsed.

While this is not part of any book, mark it as :orphan:, in order
to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/pti/pti_intel_mid.rst | 106 ++++++++++++++++++++++++++++++++++++
 Documentation/pti/pti_intel_mid.txt |  99 ---------------------------------
 2 files changed, 106 insertions(+), 99 deletions(-)
 create mode 100644 Documentation/pti/pti_intel_mid.rst
 delete mode 100644 Documentation/pti/pti_intel_mid.txt

(limited to 'Documentation')

diff --git a/Documentation/pti/pti_intel_mid.rst b/Documentation/pti/pti_intel_mid.rst
new file mode 100644
index 000000000000..ea05725174cb
--- /dev/null
+++ b/Documentation/pti/pti_intel_mid.rst
@@ -0,0 +1,106 @@
+:orphan:
+
+=============
+Intel MID PTI
+=============
+
+The Intel MID PTI project is HW implemented in Intel Atom
+system-on-a-chip designs based on the Parallel Trace
+Interface for MIPI P1149.7 cJTAG standard.  The kernel solution
+for this platform involves the following files::
+
+	./include/linux/pti.h
+	./drivers/.../n_tracesink.h
+	./drivers/.../n_tracerouter.c
+	./drivers/.../n_tracesink.c
+	./drivers/.../pti.c
+
+pti.c is the driver that enables various debugging features
+popular on platforms from certain mobile manufacturers.
+n_tracerouter.c and n_tracesink.c allow extra system information to
+be collected and routed to the pti driver, such as trace
+debugging data from a modem.  Although n_tracerouter
+and n_tracesink are a part of the complete PTI solution,
+these two line disciplines can work separately from
+pti.c and route any data stream from one /dev/tty node
+to another /dev/tty node via kernel-space.  This provides
+a stable, reliable connection that will not break unless
+the user-space application shuts down (plus avoids
+kernel->user->kernel context switch overheads of routing
+data).
+
+An example debugging usage for this driver system:
+
+  * Hook /dev/ttyPTI0 to syslogd.  Opening this port will also start
+    a console device to further capture debugging messages to PTI.
+  * Hook /dev/ttyPTI1 to modem debugging data to write to PTI HW.
+    This is where n_tracerouter and n_tracesink are used.
+  * Hook /dev/pti to a user-level debugging application for writing
+    to PTI HW.
+  * `Use mipi_` Kernel Driver API in other device drivers for
+    debugging to PTI by first requesting a PTI write address via
+    mipi_request_masterchannel(1).
+
+Below is example pseudo-code on how a 'privileged' application
+can hook up n_tracerouter and n_tracesink to any tty on
+a system.  'Privileged' means the application has enough
+privileges to successfully manipulate the ldisc drivers
+but is not just blindly executing as 'root'. Keep in mind
+the use of ioctl(,TIOCSETD,) is not specific to the n_tracerouter
+and n_tracesink line discpline drivers but is a generic
+operation for a program to use a line discpline driver
+on a tty port other than the default n_tty::
+
+  /////////// To hook up n_tracerouter and n_tracesink /////////
+
+  // Note that n_tracerouter depends on n_tracesink.
+  #include <errno.h>
+  #define ONE_TTY "/dev/ttyOne"
+  #define TWO_TTY "/dev/ttyTwo"
+
+  // needed global to hand onto ldisc connection
+  static int g_fd_source = -1;
+  static int g_fd_sink  = -1;
+
+  // these two vars used to grab LDISC values from loaded ldisc drivers
+  // in OS.  Look at /proc/tty/ldiscs to get the right numbers from
+  // the ldiscs loaded in the system.
+  int source_ldisc_num, sink_ldisc_num = -1;
+  int retval;
+
+  g_fd_source = open(ONE_TTY, O_RDWR); // must be R/W
+  g_fd_sink   = open(TWO_TTY, O_RDWR); // must be R/W
+
+  if (g_fd_source <= 0) || (g_fd_sink <= 0) {
+     // doubt you'll want to use these exact error lines of code
+     printf("Error on open(). errno: %d\n",errno);
+     return errno;
+  }
+
+  retval = ioctl(g_fd_sink, TIOCSETD, &sink_ldisc_num);
+  if (retval < 0) {
+     printf("Error on ioctl().  errno: %d\n", errno);
+     return errno;
+  }
+
+  retval = ioctl(g_fd_source, TIOCSETD, &source_ldisc_num);
+  if (retval < 0) {
+     printf("Error on ioctl().  errno: %d\n", errno);
+     return errno;
+  }
+
+  /////////// To disconnect n_tracerouter and n_tracesink ////////
+
+  // First make sure data through the ldiscs has stopped.
+
+  // Second, disconnect ldiscs.  This provides a
+  // little cleaner shutdown on tty stack.
+  sink_ldisc_num = 0;
+  source_ldisc_num = 0;
+  ioctl(g_fd_uart, TIOCSETD, &sink_ldisc_num);
+  ioctl(g_fd_gadget, TIOCSETD, &source_ldisc_num);
+
+  // Three, program closes connection, and cleanup:
+  close(g_fd_uart);
+  close(g_fd_gadget);
+  g_fd_uart = g_fd_gadget = NULL;
diff --git a/Documentation/pti/pti_intel_mid.txt b/Documentation/pti/pti_intel_mid.txt
deleted file mode 100644
index e7a5b6d1f7a9..000000000000
--- a/Documentation/pti/pti_intel_mid.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-The Intel MID PTI project is HW implemented in Intel Atom
-system-on-a-chip designs based on the Parallel Trace
-Interface for MIPI P1149.7 cJTAG standard.  The kernel solution
-for this platform involves the following files:
-
-./include/linux/pti.h
-./drivers/.../n_tracesink.h
-./drivers/.../n_tracerouter.c
-./drivers/.../n_tracesink.c
-./drivers/.../pti.c
-
-pti.c is the driver that enables various debugging features
-popular on platforms from certain mobile manufacturers.
-n_tracerouter.c and n_tracesink.c allow extra system information to
-be collected and routed to the pti driver, such as trace
-debugging data from a modem.  Although n_tracerouter
-and n_tracesink are a part of the complete PTI solution,
-these two line disciplines can work separately from
-pti.c and route any data stream from one /dev/tty node
-to another /dev/tty node via kernel-space.  This provides
-a stable, reliable connection that will not break unless
-the user-space application shuts down (plus avoids
-kernel->user->kernel context switch overheads of routing
-data).
-
-An example debugging usage for this driver system:
-   *Hook /dev/ttyPTI0 to syslogd.  Opening this port will also start
-    a console device to further capture debugging messages to PTI.
-   *Hook /dev/ttyPTI1 to modem debugging data to write to PTI HW.
-    This is where n_tracerouter and n_tracesink are used.
-   *Hook /dev/pti to a user-level debugging application for writing
-    to PTI HW.
-   *Use mipi_* Kernel Driver API in other device drivers for
-    debugging to PTI by first requesting a PTI write address via
-    mipi_request_masterchannel(1).
-
-Below is example pseudo-code on how a 'privileged' application
-can hook up n_tracerouter and n_tracesink to any tty on
-a system.  'Privileged' means the application has enough
-privileges to successfully manipulate the ldisc drivers
-but is not just blindly executing as 'root'. Keep in mind
-the use of ioctl(,TIOCSETD,) is not specific to the n_tracerouter
-and n_tracesink line discpline drivers but is a generic
-operation for a program to use a line discpline driver
-on a tty port other than the default n_tty.
-
-/////////// To hook up n_tracerouter and n_tracesink /////////
-
-// Note that n_tracerouter depends on n_tracesink.
-#include <errno.h>
-#define ONE_TTY "/dev/ttyOne"
-#define TWO_TTY "/dev/ttyTwo"
-
-// needed global to hand onto ldisc connection
-static int g_fd_source = -1;
-static int g_fd_sink  = -1;
-
-// these two vars used to grab LDISC values from loaded ldisc drivers
-// in OS.  Look at /proc/tty/ldiscs to get the right numbers from
-// the ldiscs loaded in the system.
-int source_ldisc_num, sink_ldisc_num = -1;
-int retval;
-
-g_fd_source = open(ONE_TTY, O_RDWR); // must be R/W
-g_fd_sink   = open(TWO_TTY, O_RDWR); // must be R/W
-
-if (g_fd_source <= 0) || (g_fd_sink <= 0) {
-   // doubt you'll want to use these exact error lines of code
-   printf("Error on open(). errno: %d\n",errno);
-   return errno;
-}
-
-retval = ioctl(g_fd_sink, TIOCSETD, &sink_ldisc_num);
-if (retval < 0) {
-   printf("Error on ioctl().  errno: %d\n", errno);
-   return errno;
-}
-
-retval = ioctl(g_fd_source, TIOCSETD, &source_ldisc_num);
-if (retval < 0) {
-   printf("Error on ioctl().  errno: %d\n", errno);
-   return errno;
-}
-
-/////////// To disconnect n_tracerouter and n_tracesink ////////
-
-// First make sure data through the ldiscs has stopped.
-
-// Second, disconnect ldiscs.  This provides a
-// little cleaner shutdown on tty stack.
-sink_ldisc_num = 0;
-source_ldisc_num = 0;
-ioctl(g_fd_uart, TIOCSETD, &sink_ldisc_num);
-ioctl(g_fd_gadget, TIOCSETD, &source_ldisc_num);
-
-// Three, program closes connection, and cleanup:
-close(g_fd_uart);
-close(g_fd_gadget);
-g_fd_uart = g_fd_gadget = NULL;
-- 
cgit 


From 0d07cf5e53a21e35289adc3ab99b6804ff0c3833 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sun, 14 Apr 2019 08:58:05 -0300
Subject: docs: early-userspace: convert docs to ReST and rename to *.rst

The two files there describes a Kernel API feature, used to
support early userspace stuff. Prepare for moving them to
the kernel API book by converting to ReST format.

The conversion itself was quite trivial: just add/mark a few
titles as such, add a literal block markup, add a table markup
and a few blank lines, in order to make Sphinx to properly parse it.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/early-userspace/README               | 151 --------------------
 Documentation/early-userspace/buffer-format.rst    | 119 ++++++++++++++++
 Documentation/early-userspace/buffer-format.txt    | 112 ---------------
 .../early-userspace/early_userspace_support.rst    | 154 +++++++++++++++++++++
 Documentation/early-userspace/index.rst            |  18 +++
 Documentation/filesystems/nfs/nfsroot.txt          |   2 +-
 .../filesystems/ramfs-rootfs-initramfs.txt         |   4 +-
 7 files changed, 294 insertions(+), 266 deletions(-)
 delete mode 100644 Documentation/early-userspace/README
 create mode 100644 Documentation/early-userspace/buffer-format.rst
 delete mode 100644 Documentation/early-userspace/buffer-format.txt
 create mode 100644 Documentation/early-userspace/early_userspace_support.rst
 create mode 100644 Documentation/early-userspace/index.rst

(limited to 'Documentation')

diff --git a/Documentation/early-userspace/README b/Documentation/early-userspace/README
deleted file mode 100644
index 955d667dc87e..000000000000
--- a/Documentation/early-userspace/README
+++ /dev/null
@@ -1,151 +0,0 @@
-Early userspace support
-=======================
-
-Last update: 2004-12-20 tlh
-
-
-"Early userspace" is a set of libraries and programs that provide
-various pieces of functionality that are important enough to be
-available while a Linux kernel is coming up, but that don't need to be
-run inside the kernel itself.
-
-It consists of several major infrastructure components:
-
-- gen_init_cpio, a program that builds a cpio-format archive
-  containing a root filesystem image.  This archive is compressed, and
-  the compressed image is linked into the kernel image.
-- initramfs, a chunk of code that unpacks the compressed cpio image
-  midway through the kernel boot process.
-- klibc, a userspace C library, currently packaged separately, that is
-  optimized for correctness and small size.
-
-The cpio file format used by initramfs is the "newc" (aka "cpio -H newc")
-format, and is documented in the file "buffer-format.txt".  There are
-two ways to add an early userspace image: specify an existing cpio
-archive to be used as the image or have the kernel build process build
-the image from specifications.
-
-CPIO ARCHIVE method
-
-You can create a cpio archive that contains the early userspace image.
-Your cpio archive should be specified in CONFIG_INITRAMFS_SOURCE and it
-will be used directly.  Only a single cpio file may be specified in
-CONFIG_INITRAMFS_SOURCE and directory and file names are not allowed in
-combination with a cpio archive.
-
-IMAGE BUILDING method
-
-The kernel build process can also build an early userspace image from
-source parts rather than supplying a cpio archive.  This method provides
-a way to create images with root-owned files even though the image was
-built by an unprivileged user.
-
-The image is specified as one or more sources in
-CONFIG_INITRAMFS_SOURCE.  Sources can be either directories or files -
-cpio archives are *not* allowed when building from sources.
-
-A source directory will have it and all of its contents packaged.  The
-specified directory name will be mapped to '/'.  When packaging a
-directory, limited user and group ID translation can be performed.
-INITRAMFS_ROOT_UID can be set to a user ID that needs to be mapped to
-user root (0).  INITRAMFS_ROOT_GID can be set to a group ID that needs
-to be mapped to group root (0).
-
-A source file must be directives in the format required by the
-usr/gen_init_cpio utility (run 'usr/gen_init_cpio -h' to get the
-file format).  The directives in the file will be passed directly to
-usr/gen_init_cpio.
-
-When a combination of directories and files are specified then the
-initramfs image will be an aggregate of all of them.  In this way a user
-can create a 'root-image' directory and install all files into it.
-Because device-special files cannot be created by a unprivileged user,
-special files can be listed in a 'root-files' file.  Both 'root-image'
-and 'root-files' can be listed in CONFIG_INITRAMFS_SOURCE and a complete
-early userspace image can be built by an unprivileged user.
-
-As a technical note, when directories and files are specified, the
-entire CONFIG_INITRAMFS_SOURCE is passed to
-usr/gen_initramfs_list.sh.  This means that CONFIG_INITRAMFS_SOURCE
-can really be interpreted as any legal argument to
-gen_initramfs_list.sh.  If a directory is specified as an argument then
-the contents are scanned, uid/gid translation is performed, and
-usr/gen_init_cpio file directives are output.  If a directory is
-specified as an argument to usr/gen_initramfs_list.sh then the
-contents of the file are simply copied to the output.  All of the output
-directives from directory scanning and file contents copying are
-processed by usr/gen_init_cpio.
-
-See also 'usr/gen_initramfs_list.sh -h'.
-
-Where's this all leading?
-=========================
-
-The klibc distribution contains some of the necessary software to make
-early userspace useful.  The klibc distribution is currently
-maintained separately from the kernel.
-
-You can obtain somewhat infrequent snapshots of klibc from
-https://www.kernel.org/pub/linux/libs/klibc/
-
-For active users, you are better off using the klibc git
-repository, at http://git.kernel.org/?p=libs/klibc/klibc.git
-
-The standalone klibc distribution currently provides three components,
-in addition to the klibc library:
-
-- ipconfig, a program that configures network interfaces.  It can
-  configure them statically, or use DHCP to obtain information
-  dynamically (aka "IP autoconfiguration").
-- nfsmount, a program that can mount an NFS filesystem.
-- kinit, the "glue" that uses ipconfig and nfsmount to replace the old
-  support for IP autoconfig, mount a filesystem over NFS, and continue
-  system boot using that filesystem as root.
-
-kinit is built as a single statically linked binary to save space.
-
-Eventually, several more chunks of kernel functionality will hopefully
-move to early userspace:
-
-- Almost all of init/do_mounts* (the beginning of this is already in
-  place)
-- ACPI table parsing
-- Insert unwieldy subsystem that doesn't really need to be in kernel
-  space here
-
-If kinit doesn't meet your current needs and you've got bytes to burn,
-the klibc distribution includes a small Bourne-compatible shell (ash)
-and a number of other utilities, so you can replace kinit and build
-custom initramfs images that meet your needs exactly.
-
-For questions and help, you can sign up for the early userspace
-mailing list at http://www.zytor.com/mailman/listinfo/klibc
-
-How does it work?
-=================
-
-The kernel has currently 3 ways to mount the root filesystem:
-
-a) all required device and filesystem drivers compiled into the kernel, no
-   initrd.  init/main.c:init() will call prepare_namespace() to mount the
-   final root filesystem, based on the root= option and optional init= to run
-   some other init binary than listed at the end of init/main.c:init().
-
-b) some device and filesystem drivers built as modules and stored in an
-   initrd.  The initrd must contain a binary '/linuxrc' which is supposed to
-   load these driver modules.  It is also possible to mount the final root
-   filesystem via linuxrc and use the pivot_root syscall.  The initrd is
-   mounted and executed via prepare_namespace().
-
-c) using initramfs.  The call to prepare_namespace() must be skipped.
-   This means that a binary must do all the work.  Said binary can be stored
-   into initramfs either via modifying usr/gen_init_cpio.c or via the new
-   initrd format, an cpio archive.  It must be called "/init".  This binary
-   is responsible to do all the things prepare_namespace() would do.
-
-   To maintain backwards compatibility, the /init binary will only run if it
-   comes via an initramfs cpio archive.  If this is not the case,
-   init/main.c:init() will run prepare_namespace() to mount the final root
-   and exec one of the predefined init binaries.
-
-Bryan O'Sullivan <bos@serpentine.com>
diff --git a/Documentation/early-userspace/buffer-format.rst b/Documentation/early-userspace/buffer-format.rst
new file mode 100644
index 000000000000..7f74e301fdf3
--- /dev/null
+++ b/Documentation/early-userspace/buffer-format.rst
@@ -0,0 +1,119 @@
+=======================
+initramfs buffer format
+=======================
+
+Al Viro, H. Peter Anvin
+
+Last revision: 2002-01-13
+
+Starting with kernel 2.5.x, the old "initial ramdisk" protocol is
+getting {replaced/complemented} with the new "initial ramfs"
+(initramfs) protocol.  The initramfs contents is passed using the same
+memory buffer protocol used by the initrd protocol, but the contents
+is different.  The initramfs buffer contains an archive which is
+expanded into a ramfs filesystem; this document details the format of
+the initramfs buffer format.
+
+The initramfs buffer format is based around the "newc" or "crc" CPIO
+formats, and can be created with the cpio(1) utility.  The cpio
+archive can be compressed using gzip(1).  One valid version of an
+initramfs buffer is thus a single .cpio.gz file.
+
+The full format of the initramfs buffer is defined by the following
+grammar, where::
+
+	*	is used to indicate "0 or more occurrences of"
+	(|)	indicates alternatives
+	+	indicates concatenation
+	GZIP()	indicates the gzip(1) of the operand
+	ALGN(n)	means padding with null bytes to an n-byte boundary
+
+	initramfs  := ("\0" | cpio_archive | cpio_gzip_archive)*
+
+	cpio_gzip_archive := GZIP(cpio_archive)
+
+	cpio_archive := cpio_file* + (<nothing> | cpio_trailer)
+
+	cpio_file := ALGN(4) + cpio_header + filename + "\0" + ALGN(4) + data
+
+	cpio_trailer := ALGN(4) + cpio_header + "TRAILER!!!\0" + ALGN(4)
+
+
+In human terms, the initramfs buffer contains a collection of
+compressed and/or uncompressed cpio archives (in the "newc" or "crc"
+formats); arbitrary amounts zero bytes (for padding) can be added
+between members.
+
+The cpio "TRAILER!!!" entry (cpio end-of-archive) is optional, but is
+not ignored; see "handling of hard links" below.
+
+The structure of the cpio_header is as follows (all fields contain
+hexadecimal ASCII numbers fully padded with '0' on the left to the
+full width of the field, for example, the integer 4780 is represented
+by the ASCII string "000012ac"):
+
+============= ================== ==============================================
+Field name    Field size	 Meaning
+============= ================== ==============================================
+c_magic	      6 bytes		 The string "070701" or "070702"
+c_ino	      8 bytes		 File inode number
+c_mode	      8 bytes		 File mode and permissions
+c_uid	      8 bytes		 File uid
+c_gid	      8 bytes		 File gid
+c_nlink	      8 bytes		 Number of links
+c_mtime	      8 bytes		 Modification time
+c_filesize    8 bytes		 Size of data field
+c_maj	      8 bytes		 Major part of file device number
+c_min	      8 bytes		 Minor part of file device number
+c_rmaj	      8 bytes		 Major part of device node reference
+c_rmin	      8 bytes		 Minor part of device node reference
+c_namesize    8 bytes		 Length of filename, including final \0
+c_chksum      8 bytes		 Checksum of data field if c_magic is 070702;
+				 otherwise zero
+============= ================== ==============================================
+
+The c_mode field matches the contents of st_mode returned by stat(2)
+on Linux, and encodes the file type and file permissions.
+
+The c_filesize should be zero for any file which is not a regular file
+or symlink.
+
+The c_chksum field contains a simple 32-bit unsigned sum of all the
+bytes in the data field.  cpio(1) refers to this as "crc", which is
+clearly incorrect (a cyclic redundancy check is a different and
+significantly stronger integrity check), however, this is the
+algorithm used.
+
+If the filename is "TRAILER!!!" this is actually an end-of-archive
+marker; the c_filesize for an end-of-archive marker must be zero.
+
+
+Handling of hard links
+======================
+
+When a nondirectory with c_nlink > 1 is seen, the (c_maj,c_min,c_ino)
+tuple is looked up in a tuple buffer.  If not found, it is entered in
+the tuple buffer and the entry is created as usual; if found, a hard
+link rather than a second copy of the file is created.  It is not
+necessary (but permitted) to include a second copy of the file
+contents; if the file contents is not included, the c_filesize field
+should be set to zero to indicate no data section follows.  If data is
+present, the previous instance of the file is overwritten; this allows
+the data-carrying instance of a file to occur anywhere in the sequence
+(GNU cpio is reported to attach the data to the last instance of a
+file only.)
+
+c_filesize must not be zero for a symlink.
+
+When a "TRAILER!!!" end-of-archive marker is seen, the tuple buffer is
+reset.  This permits archives which are generated independently to be
+concatenated.
+
+To combine file data from different sources (without having to
+regenerate the (c_maj,c_min,c_ino) fields), therefore, either one of
+the following techniques can be used:
+
+a) Separate the different file data sources with a "TRAILER!!!"
+   end-of-archive marker, or
+
+b) Make sure c_nlink == 1 for all nondirectory entries.
diff --git a/Documentation/early-userspace/buffer-format.txt b/Documentation/early-userspace/buffer-format.txt
deleted file mode 100644
index e1fd7f9dad16..000000000000
--- a/Documentation/early-userspace/buffer-format.txt
+++ /dev/null
@@ -1,112 +0,0 @@
-		       initramfs buffer format
-		       -----------------------
-
-		       Al Viro, H. Peter Anvin
-		      Last revision: 2002-01-13
-
-Starting with kernel 2.5.x, the old "initial ramdisk" protocol is
-getting {replaced/complemented} with the new "initial ramfs"
-(initramfs) protocol.  The initramfs contents is passed using the same
-memory buffer protocol used by the initrd protocol, but the contents
-is different.  The initramfs buffer contains an archive which is
-expanded into a ramfs filesystem; this document details the format of
-the initramfs buffer format.
-
-The initramfs buffer format is based around the "newc" or "crc" CPIO
-formats, and can be created with the cpio(1) utility.  The cpio
-archive can be compressed using gzip(1).  One valid version of an
-initramfs buffer is thus a single .cpio.gz file.
-
-The full format of the initramfs buffer is defined by the following
-grammar, where:
-	*	is used to indicate "0 or more occurrences of"
-	(|)	indicates alternatives
-	+	indicates concatenation
-	GZIP()	indicates the gzip(1) of the operand
-	ALGN(n)	means padding with null bytes to an n-byte boundary
-
-	initramfs  := ("\0" | cpio_archive | cpio_gzip_archive)*
-
-	cpio_gzip_archive := GZIP(cpio_archive)
-
-	cpio_archive := cpio_file* + (<nothing> | cpio_trailer)
-
-	cpio_file := ALGN(4) + cpio_header + filename + "\0" + ALGN(4) + data
-
-	cpio_trailer := ALGN(4) + cpio_header + "TRAILER!!!\0" + ALGN(4)
-
-
-In human terms, the initramfs buffer contains a collection of
-compressed and/or uncompressed cpio archives (in the "newc" or "crc"
-formats); arbitrary amounts zero bytes (for padding) can be added
-between members.
-
-The cpio "TRAILER!!!" entry (cpio end-of-archive) is optional, but is
-not ignored; see "handling of hard links" below.
-
-The structure of the cpio_header is as follows (all fields contain
-hexadecimal ASCII numbers fully padded with '0' on the left to the
-full width of the field, for example, the integer 4780 is represented
-by the ASCII string "000012ac"):
-
-Field name    Field size	 Meaning
-c_magic	      6 bytes		 The string "070701" or "070702"
-c_ino	      8 bytes		 File inode number
-c_mode	      8 bytes		 File mode and permissions
-c_uid	      8 bytes		 File uid
-c_gid	      8 bytes		 File gid
-c_nlink	      8 bytes		 Number of links
-c_mtime	      8 bytes		 Modification time
-c_filesize    8 bytes		 Size of data field
-c_maj	      8 bytes		 Major part of file device number
-c_min	      8 bytes		 Minor part of file device number
-c_rmaj	      8 bytes		 Major part of device node reference
-c_rmin	      8 bytes		 Minor part of device node reference
-c_namesize    8 bytes		 Length of filename, including final \0
-c_chksum      8 bytes		 Checksum of data field if c_magic is 070702;
-				 otherwise zero
-
-The c_mode field matches the contents of st_mode returned by stat(2)
-on Linux, and encodes the file type and file permissions.
-
-The c_filesize should be zero for any file which is not a regular file
-or symlink.
-
-The c_chksum field contains a simple 32-bit unsigned sum of all the
-bytes in the data field.  cpio(1) refers to this as "crc", which is
-clearly incorrect (a cyclic redundancy check is a different and
-significantly stronger integrity check), however, this is the
-algorithm used.
-
-If the filename is "TRAILER!!!" this is actually an end-of-archive
-marker; the c_filesize for an end-of-archive marker must be zero.
-
-
-*** Handling of hard links
-
-When a nondirectory with c_nlink > 1 is seen, the (c_maj,c_min,c_ino)
-tuple is looked up in a tuple buffer.  If not found, it is entered in
-the tuple buffer and the entry is created as usual; if found, a hard
-link rather than a second copy of the file is created.  It is not
-necessary (but permitted) to include a second copy of the file
-contents; if the file contents is not included, the c_filesize field
-should be set to zero to indicate no data section follows.  If data is
-present, the previous instance of the file is overwritten; this allows
-the data-carrying instance of a file to occur anywhere in the sequence
-(GNU cpio is reported to attach the data to the last instance of a
-file only.)
-
-c_filesize must not be zero for a symlink.
-
-When a "TRAILER!!!" end-of-archive marker is seen, the tuple buffer is
-reset.  This permits archives which are generated independently to be
-concatenated.
-
-To combine file data from different sources (without having to
-regenerate the (c_maj,c_min,c_ino) fields), therefore, either one of
-the following techniques can be used:
-
-a) Separate the different file data sources with a "TRAILER!!!"
-   end-of-archive marker, or
-
-b) Make sure c_nlink == 1 for all nondirectory entries.
diff --git a/Documentation/early-userspace/early_userspace_support.rst b/Documentation/early-userspace/early_userspace_support.rst
new file mode 100644
index 000000000000..3deefb34046b
--- /dev/null
+++ b/Documentation/early-userspace/early_userspace_support.rst
@@ -0,0 +1,154 @@
+=======================
+Early userspace support
+=======================
+
+Last update: 2004-12-20 tlh
+
+
+"Early userspace" is a set of libraries and programs that provide
+various pieces of functionality that are important enough to be
+available while a Linux kernel is coming up, but that don't need to be
+run inside the kernel itself.
+
+It consists of several major infrastructure components:
+
+- gen_init_cpio, a program that builds a cpio-format archive
+  containing a root filesystem image.  This archive is compressed, and
+  the compressed image is linked into the kernel image.
+- initramfs, a chunk of code that unpacks the compressed cpio image
+  midway through the kernel boot process.
+- klibc, a userspace C library, currently packaged separately, that is
+  optimized for correctness and small size.
+
+The cpio file format used by initramfs is the "newc" (aka "cpio -H newc")
+format, and is documented in the file "buffer-format.txt".  There are
+two ways to add an early userspace image: specify an existing cpio
+archive to be used as the image or have the kernel build process build
+the image from specifications.
+
+CPIO ARCHIVE method
+-------------------
+
+You can create a cpio archive that contains the early userspace image.
+Your cpio archive should be specified in CONFIG_INITRAMFS_SOURCE and it
+will be used directly.  Only a single cpio file may be specified in
+CONFIG_INITRAMFS_SOURCE and directory and file names are not allowed in
+combination with a cpio archive.
+
+IMAGE BUILDING method
+---------------------
+
+The kernel build process can also build an early userspace image from
+source parts rather than supplying a cpio archive.  This method provides
+a way to create images with root-owned files even though the image was
+built by an unprivileged user.
+
+The image is specified as one or more sources in
+CONFIG_INITRAMFS_SOURCE.  Sources can be either directories or files -
+cpio archives are *not* allowed when building from sources.
+
+A source directory will have it and all of its contents packaged.  The
+specified directory name will be mapped to '/'.  When packaging a
+directory, limited user and group ID translation can be performed.
+INITRAMFS_ROOT_UID can be set to a user ID that needs to be mapped to
+user root (0).  INITRAMFS_ROOT_GID can be set to a group ID that needs
+to be mapped to group root (0).
+
+A source file must be directives in the format required by the
+usr/gen_init_cpio utility (run 'usr/gen_init_cpio -h' to get the
+file format).  The directives in the file will be passed directly to
+usr/gen_init_cpio.
+
+When a combination of directories and files are specified then the
+initramfs image will be an aggregate of all of them.  In this way a user
+can create a 'root-image' directory and install all files into it.
+Because device-special files cannot be created by a unprivileged user,
+special files can be listed in a 'root-files' file.  Both 'root-image'
+and 'root-files' can be listed in CONFIG_INITRAMFS_SOURCE and a complete
+early userspace image can be built by an unprivileged user.
+
+As a technical note, when directories and files are specified, the
+entire CONFIG_INITRAMFS_SOURCE is passed to
+usr/gen_initramfs_list.sh.  This means that CONFIG_INITRAMFS_SOURCE
+can really be interpreted as any legal argument to
+gen_initramfs_list.sh.  If a directory is specified as an argument then
+the contents are scanned, uid/gid translation is performed, and
+usr/gen_init_cpio file directives are output.  If a directory is
+specified as an argument to usr/gen_initramfs_list.sh then the
+contents of the file are simply copied to the output.  All of the output
+directives from directory scanning and file contents copying are
+processed by usr/gen_init_cpio.
+
+See also 'usr/gen_initramfs_list.sh -h'.
+
+Where's this all leading?
+=========================
+
+The klibc distribution contains some of the necessary software to make
+early userspace useful.  The klibc distribution is currently
+maintained separately from the kernel.
+
+You can obtain somewhat infrequent snapshots of klibc from
+https://www.kernel.org/pub/linux/libs/klibc/
+
+For active users, you are better off using the klibc git
+repository, at http://git.kernel.org/?p=libs/klibc/klibc.git
+
+The standalone klibc distribution currently provides three components,
+in addition to the klibc library:
+
+- ipconfig, a program that configures network interfaces.  It can
+  configure them statically, or use DHCP to obtain information
+  dynamically (aka "IP autoconfiguration").
+- nfsmount, a program that can mount an NFS filesystem.
+- kinit, the "glue" that uses ipconfig and nfsmount to replace the old
+  support for IP autoconfig, mount a filesystem over NFS, and continue
+  system boot using that filesystem as root.
+
+kinit is built as a single statically linked binary to save space.
+
+Eventually, several more chunks of kernel functionality will hopefully
+move to early userspace:
+
+- Almost all of init/do_mounts* (the beginning of this is already in
+  place)
+- ACPI table parsing
+- Insert unwieldy subsystem that doesn't really need to be in kernel
+  space here
+
+If kinit doesn't meet your current needs and you've got bytes to burn,
+the klibc distribution includes a small Bourne-compatible shell (ash)
+and a number of other utilities, so you can replace kinit and build
+custom initramfs images that meet your needs exactly.
+
+For questions and help, you can sign up for the early userspace
+mailing list at http://www.zytor.com/mailman/listinfo/klibc
+
+How does it work?
+=================
+
+The kernel has currently 3 ways to mount the root filesystem:
+
+a) all required device and filesystem drivers compiled into the kernel, no
+   initrd.  init/main.c:init() will call prepare_namespace() to mount the
+   final root filesystem, based on the root= option and optional init= to run
+   some other init binary than listed at the end of init/main.c:init().
+
+b) some device and filesystem drivers built as modules and stored in an
+   initrd.  The initrd must contain a binary '/linuxrc' which is supposed to
+   load these driver modules.  It is also possible to mount the final root
+   filesystem via linuxrc and use the pivot_root syscall.  The initrd is
+   mounted and executed via prepare_namespace().
+
+c) using initramfs.  The call to prepare_namespace() must be skipped.
+   This means that a binary must do all the work.  Said binary can be stored
+   into initramfs either via modifying usr/gen_init_cpio.c or via the new
+   initrd format, an cpio archive.  It must be called "/init".  This binary
+   is responsible to do all the things prepare_namespace() would do.
+
+   To maintain backwards compatibility, the /init binary will only run if it
+   comes via an initramfs cpio archive.  If this is not the case,
+   init/main.c:init() will run prepare_namespace() to mount the final root
+   and exec one of the predefined init binaries.
+
+Bryan O'Sullivan <bos@serpentine.com>
diff --git a/Documentation/early-userspace/index.rst b/Documentation/early-userspace/index.rst
new file mode 100644
index 000000000000..2b8eb6132058
--- /dev/null
+++ b/Documentation/early-userspace/index.rst
@@ -0,0 +1,18 @@
+:orphan:
+
+===============
+Early Userspace
+===============
+
+.. toctree::
+    :maxdepth: 1
+
+    early_userspace_support
+    buffer-format
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt
index d2963123eb1c..4862d3d77e27 100644
--- a/Documentation/filesystems/nfs/nfsroot.txt
+++ b/Documentation/filesystems/nfs/nfsroot.txt
@@ -239,7 +239,7 @@ rdinit=<executable file>
   A description of the process of mounting the root file system can be
   found in:
 
-    Documentation/early-userspace/README
+    Documentation/early-userspace/early_userspace_support.rst
 
 
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
index 79637d227e85..fa985909dbca 100644
--- a/Documentation/filesystems/ramfs-rootfs-initramfs.txt
+++ b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
@@ -105,7 +105,7 @@ All this differs from the old initrd in several ways:
   - The old initrd file was a gzipped filesystem image (in some file format,
     such as ext2, that needed a driver built into the kernel), while the new
     initramfs archive is a gzipped cpio archive (like tar only simpler,
-    see cpio(1) and Documentation/early-userspace/buffer-format.txt).  The
+    see cpio(1) and Documentation/early-userspace/buffer-format.rst).  The
     kernel's cpio extraction code is not only extremely small, it's also
     __init text and data that can be discarded during the boot process.
 
@@ -159,7 +159,7 @@ One advantage of the configuration file is that root access is not required to
 set permissions or create device nodes in the new archive.  (Note that those
 two example "file" entries expect to find files named "init.sh" and "busybox" in
 a directory called "initramfs", under the linux-2.6.* directory.  See
-Documentation/early-userspace/README for more details.)
+Documentation/early-userspace/early_userspace_support.rst for more details.)
 
 The kernel does not depend on external cpio tools.  If you specify a
 directory instead of a configuration file, the kernel's build infrastructure
-- 
cgit 


From dc7a12bdfccd94c31f79e294f16f7549bd411b49 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sun, 14 Apr 2019 15:51:10 -0300
Subject: docs: arm: convert docs to ReST and rename to *.rst

Converts ARM the text files to ReST, preparing them to be an
architecture book.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - fix tables markups;
  - add some lists markups;
  - mark literal blocks;
  - adjust title markups.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Reviewed-by Corentin Labbe <clabbe.montjoie@gmail.com> # For sun4i-ss
---
 Documentation/arm/Booting                          | 218 ---------
 Documentation/arm/IXP4xx                           | 172 -------
 Documentation/arm/Interrupts                       | 167 -------
 Documentation/arm/Marvell/README                   | 395 ---------------
 Documentation/arm/Microchip/README                 | 169 -------
 Documentation/arm/Netwinder                        |  78 ---
 Documentation/arm/OMAP/DSS                         | 362 --------------
 Documentation/arm/OMAP/README                      |  11 -
 Documentation/arm/OMAP/omap_pm                     | 154 ------
 Documentation/arm/Porting                          | 135 ------
 Documentation/arm/README                           | 204 --------
 Documentation/arm/SA1100/ADSBitsy                  |  43 --
 Documentation/arm/SA1100/Assabet                   | 300 ------------
 Documentation/arm/SA1100/Brutus                    |  66 ---
 Documentation/arm/SA1100/CERF                      |  29 --
 Documentation/arm/SA1100/FreeBird                  |  21 -
 Documentation/arm/SA1100/GraphicsClient            |  98 ----
 Documentation/arm/SA1100/GraphicsMaster            |  53 --
 Documentation/arm/SA1100/HUW_WEBPANEL              |  17 -
 Documentation/arm/SA1100/Itsy                      |  39 --
 Documentation/arm/SA1100/LART                      |  14 -
 Documentation/arm/SA1100/PLEB                      |  11 -
 Documentation/arm/SA1100/Pangolin                  |  23 -
 Documentation/arm/SA1100/Tifon                     |   7 -
 Documentation/arm/SA1100/Yopy                      |   2 -
 Documentation/arm/SA1100/empeg                     |   2 -
 Documentation/arm/SA1100/nanoEngine                |  11 -
 Documentation/arm/SA1100/serial_UART               |  47 --
 Documentation/arm/SH-Mobile/.gitignore             |   1 -
 Documentation/arm/SPEAr/overview.txt               |  63 ---
 Documentation/arm/Samsung-S3C24XX/CPUfreq.txt      |  75 ---
 Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt    |  58 ---
 Documentation/arm/Samsung-S3C24XX/GPIO.txt         | 171 -------
 Documentation/arm/Samsung-S3C24XX/H1940.txt        |  40 --
 Documentation/arm/Samsung-S3C24XX/NAND.txt         |  30 --
 Documentation/arm/Samsung-S3C24XX/Overview.txt     | 318 ------------
 Documentation/arm/Samsung-S3C24XX/S3C2412.txt      | 120 -----
 Documentation/arm/Samsung-S3C24XX/S3C2413.txt      |  21 -
 Documentation/arm/Samsung-S3C24XX/SMDK2440.txt     |  56 ---
 Documentation/arm/Samsung-S3C24XX/Suspend.txt      | 137 ------
 Documentation/arm/Samsung-S3C24XX/USB-Host.txt     |  93 ----
 Documentation/arm/Samsung/Bootloader-interface.txt |  68 ---
 Documentation/arm/Samsung/GPIO.txt                 |  40 --
 Documentation/arm/Samsung/Overview.txt             |  86 ----
 .../arm/Samsung/clksrc-change-registers.awk        | 166 -------
 Documentation/arm/Setup                            | 129 -----
 Documentation/arm/VFP/release-notes.txt            |  55 ---
 Documentation/arm/arm.rst                          | 214 +++++++++
 Documentation/arm/booting.rst                      | 237 +++++++++
 Documentation/arm/cluster-pm-race-avoidance.rst    | 533 +++++++++++++++++++++
 Documentation/arm/cluster-pm-race-avoidance.txt    | 498 -------------------
 Documentation/arm/firmware.rst                     |  72 +++
 Documentation/arm/firmware.txt                     |  70 ---
 Documentation/arm/index.rst                        |  80 ++++
 Documentation/arm/interrupts.rst                   | 169 +++++++
 Documentation/arm/ixp4xx.rst                       | 173 +++++++
 Documentation/arm/kernel_mode_neon.rst             | 124 +++++
 Documentation/arm/kernel_mode_neon.txt             | 121 -----
 Documentation/arm/kernel_user_helpers.rst          | 268 +++++++++++
 Documentation/arm/kernel_user_helpers.txt          | 267 -----------
 Documentation/arm/keystone/Overview.txt            |  55 ---
 Documentation/arm/keystone/knav-qmss.rst           |  60 +++
 Documentation/arm/keystone/knav-qmss.txt           |  56 ---
 Documentation/arm/keystone/overview.rst            |  74 +++
 Documentation/arm/marvel.rst                       | 488 +++++++++++++++++++
 Documentation/arm/mem_alignment                    |  58 ---
 Documentation/arm/mem_alignment.rst                |  63 +++
 Documentation/arm/memory.rst                       |  93 ++++
 Documentation/arm/memory.txt                       |  88 ----
 Documentation/arm/microchip.rst                    | 204 ++++++++
 Documentation/arm/netwinder.rst                    |  85 ++++
 Documentation/arm/nwfpe/NOTES                      |  29 --
 Documentation/arm/nwfpe/README                     |  70 ---
 Documentation/arm/nwfpe/README.FPE                 | 156 ------
 Documentation/arm/nwfpe/TODO                       |  67 ---
 Documentation/arm/nwfpe/index.rst                  |  11 +
 Documentation/arm/nwfpe/netwinder-fpe.rst          | 162 +++++++
 Documentation/arm/nwfpe/notes.rst                  |  32 ++
 Documentation/arm/nwfpe/nwfpe.rst                  |  74 +++
 Documentation/arm/nwfpe/todo.rst                   |  72 +++
 Documentation/arm/omap/dss.rst                     | 372 ++++++++++++++
 Documentation/arm/omap/index.rst                   |  10 +
 Documentation/arm/omap/omap.rst                    |  18 +
 Documentation/arm/omap/omap_pm.rst                 | 165 +++++++
 Documentation/arm/porting.rst                      | 137 ++++++
 Documentation/arm/pxa/mfp.rst                      | 288 +++++++++++
 Documentation/arm/pxa/mfp.txt                      | 286 -----------
 Documentation/arm/sa1100/adsbitsy.rst              |  51 ++
 Documentation/arm/sa1100/assabet.rst               | 301 ++++++++++++
 Documentation/arm/sa1100/brutus.rst                |  69 +++
 Documentation/arm/sa1100/cerf.rst                  |  35 ++
 Documentation/arm/sa1100/freebird.rst              |  25 +
 Documentation/arm/sa1100/graphicsclient.rst        | 102 ++++
 Documentation/arm/sa1100/graphicsmaster.rst        |  60 +++
 Documentation/arm/sa1100/huw_webpanel.rst          |  21 +
 Documentation/arm/sa1100/index.rst                 |  23 +
 Documentation/arm/sa1100/itsy.rst                  |  47 ++
 Documentation/arm/sa1100/lart.rst                  |  15 +
 Documentation/arm/sa1100/nanoengine.rst            |  11 +
 Documentation/arm/sa1100/pangolin.rst              |  29 ++
 Documentation/arm/sa1100/pleb.rst                  |  13 +
 Documentation/arm/sa1100/serial_uart.rst           |  51 ++
 Documentation/arm/sa1100/tifon.rst                 |   7 +
 Documentation/arm/sa1100/yopy.rst                  |   5 +
 Documentation/arm/samsung-s3c24xx/cpufreq.rst      |  76 +++
 Documentation/arm/samsung-s3c24xx/eb2410itx.rst    |  59 +++
 Documentation/arm/samsung-s3c24xx/gpio.rst         | 172 +++++++
 Documentation/arm/samsung-s3c24xx/h1940.rst        |  41 ++
 Documentation/arm/samsung-s3c24xx/index.rst        |  18 +
 Documentation/arm/samsung-s3c24xx/nand.rst         |  30 ++
 Documentation/arm/samsung-s3c24xx/overview.rst     | 319 ++++++++++++
 Documentation/arm/samsung-s3c24xx/s3c2412.rst      | 121 +++++
 Documentation/arm/samsung-s3c24xx/s3c2413.rst      |  22 +
 Documentation/arm/samsung-s3c24xx/smdk2440.rst     |  57 +++
 Documentation/arm/samsung-s3c24xx/suspend.rst      | 137 ++++++
 Documentation/arm/samsung-s3c24xx/usb-host.rst     |  91 ++++
 Documentation/arm/samsung/bootloader-interface.rst |  81 ++++
 .../arm/samsung/clksrc-change-registers.awk        | 166 +++++++
 Documentation/arm/samsung/gpio.rst                 |  41 ++
 Documentation/arm/samsung/index.rst                |  10 +
 Documentation/arm/samsung/overview.rst             |  89 ++++
 Documentation/arm/setup.rst                        | 108 +++++
 Documentation/arm/sh-mobile/.gitignore             |   1 +
 Documentation/arm/spear/overview.rst               |  65 +++
 Documentation/arm/sti/overview.rst                 |  36 ++
 Documentation/arm/sti/overview.txt                 |  33 --
 Documentation/arm/sti/stih407-overview.rst         |  19 +
 Documentation/arm/sti/stih407-overview.txt         |  18 -
 Documentation/arm/sti/stih415-overview.rst         |  14 +
 Documentation/arm/sti/stih415-overview.txt         |  12 -
 Documentation/arm/sti/stih416-overview.rst         |  13 +
 Documentation/arm/sti/stih416-overview.txt         |  12 -
 Documentation/arm/sti/stih418-overview.rst         |  21 +
 Documentation/arm/sti/stih418-overview.txt         |  20 -
 Documentation/arm/stm32/overview.rst               |   2 -
 Documentation/arm/stm32/stm32f429-overview.rst     |   7 +-
 Documentation/arm/stm32/stm32f746-overview.rst     |   7 +-
 Documentation/arm/stm32/stm32f769-overview.rst     |   7 +-
 Documentation/arm/stm32/stm32h743-overview.rst     |   7 +-
 Documentation/arm/stm32/stm32mp157-overview.rst    |   3 +-
 Documentation/arm/sunxi.rst                        | 150 ++++++
 Documentation/arm/sunxi/README                     | 102 ----
 Documentation/arm/sunxi/clocks.rst                 |  57 +++
 Documentation/arm/sunxi/clocks.txt                 |  56 ---
 Documentation/arm/swp_emulation                    |  27 --
 Documentation/arm/swp_emulation.rst                |  27 ++
 Documentation/arm/tcm.rst                          | 161 +++++++
 Documentation/arm/tcm.txt                          | 155 ------
 Documentation/arm/uefi.rst                         |  67 +++
 Documentation/arm/uefi.txt                         |  60 ---
 Documentation/arm/vfp/release-notes.rst            |  57 +++
 Documentation/arm/vlocks.rst                       | 212 ++++++++
 Documentation/arm/vlocks.txt                       | 211 --------
 Documentation/devicetree/bindings/arm/xen.txt      |   2 +-
 Documentation/devicetree/booting-without-of.txt    |   4 +-
 Documentation/index.rst                            |   1 +
 Documentation/translations/zh_CN/arm/Booting       |   4 +-
 .../translations/zh_CN/arm/kernel_user_helpers.txt |   4 +-
 158 files changed, 7698 insertions(+), 7133 deletions(-)
 delete mode 100644 Documentation/arm/Booting
 delete mode 100644 Documentation/arm/IXP4xx
 delete mode 100644 Documentation/arm/Interrupts
 delete mode 100644 Documentation/arm/Marvell/README
 delete mode 100644 Documentation/arm/Microchip/README
 delete mode 100644 Documentation/arm/Netwinder
 delete mode 100644 Documentation/arm/OMAP/DSS
 delete mode 100644 Documentation/arm/OMAP/README
 delete mode 100644 Documentation/arm/OMAP/omap_pm
 delete mode 100644 Documentation/arm/Porting
 delete mode 100644 Documentation/arm/README
 delete mode 100644 Documentation/arm/SA1100/ADSBitsy
 delete mode 100644 Documentation/arm/SA1100/Assabet
 delete mode 100644 Documentation/arm/SA1100/Brutus
 delete mode 100644 Documentation/arm/SA1100/CERF
 delete mode 100644 Documentation/arm/SA1100/FreeBird
 delete mode 100644 Documentation/arm/SA1100/GraphicsClient
 delete mode 100644 Documentation/arm/SA1100/GraphicsMaster
 delete mode 100644 Documentation/arm/SA1100/HUW_WEBPANEL
 delete mode 100644 Documentation/arm/SA1100/Itsy
 delete mode 100644 Documentation/arm/SA1100/LART
 delete mode 100644 Documentation/arm/SA1100/PLEB
 delete mode 100644 Documentation/arm/SA1100/Pangolin
 delete mode 100644 Documentation/arm/SA1100/Tifon
 delete mode 100644 Documentation/arm/SA1100/Yopy
 delete mode 100644 Documentation/arm/SA1100/empeg
 delete mode 100644 Documentation/arm/SA1100/nanoEngine
 delete mode 100644 Documentation/arm/SA1100/serial_UART
 delete mode 100644 Documentation/arm/SH-Mobile/.gitignore
 delete mode 100644 Documentation/arm/SPEAr/overview.txt
 delete mode 100644 Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
 delete mode 100644 Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt
 delete mode 100644 Documentation/arm/Samsung-S3C24XX/GPIO.txt
 delete mode 100644 Documentation/arm/Samsung-S3C24XX/H1940.txt
 delete mode 100644 Documentation/arm/Samsung-S3C24XX/NAND.txt
 delete mode 100644 Documentation/arm/Samsung-S3C24XX/Overview.txt
 delete mode 100644 Documentation/arm/Samsung-S3C24XX/S3C2412.txt
 delete mode 100644 Documentation/arm/Samsung-S3C24XX/S3C2413.txt
 delete mode 100644 Documentation/arm/Samsung-S3C24XX/SMDK2440.txt
 delete mode 100644 Documentation/arm/Samsung-S3C24XX/Suspend.txt
 delete mode 100644 Documentation/arm/Samsung-S3C24XX/USB-Host.txt
 delete mode 100644 Documentation/arm/Samsung/Bootloader-interface.txt
 delete mode 100644 Documentation/arm/Samsung/GPIO.txt
 delete mode 100644 Documentation/arm/Samsung/Overview.txt
 delete mode 100755 Documentation/arm/Samsung/clksrc-change-registers.awk
 delete mode 100644 Documentation/arm/Setup
 delete mode 100644 Documentation/arm/VFP/release-notes.txt
 create mode 100644 Documentation/arm/arm.rst
 create mode 100644 Documentation/arm/booting.rst
 create mode 100644 Documentation/arm/cluster-pm-race-avoidance.rst
 delete mode 100644 Documentation/arm/cluster-pm-race-avoidance.txt
 create mode 100644 Documentation/arm/firmware.rst
 delete mode 100644 Documentation/arm/firmware.txt
 create mode 100644 Documentation/arm/index.rst
 create mode 100644 Documentation/arm/interrupts.rst
 create mode 100644 Documentation/arm/ixp4xx.rst
 create mode 100644 Documentation/arm/kernel_mode_neon.rst
 delete mode 100644 Documentation/arm/kernel_mode_neon.txt
 create mode 100644 Documentation/arm/kernel_user_helpers.rst
 delete mode 100644 Documentation/arm/kernel_user_helpers.txt
 delete mode 100644 Documentation/arm/keystone/Overview.txt
 create mode 100644 Documentation/arm/keystone/knav-qmss.rst
 delete mode 100644 Documentation/arm/keystone/knav-qmss.txt
 create mode 100644 Documentation/arm/keystone/overview.rst
 create mode 100644 Documentation/arm/marvel.rst
 delete mode 100644 Documentation/arm/mem_alignment
 create mode 100644 Documentation/arm/mem_alignment.rst
 create mode 100644 Documentation/arm/memory.rst
 delete mode 100644 Documentation/arm/memory.txt
 create mode 100644 Documentation/arm/microchip.rst
 create mode 100644 Documentation/arm/netwinder.rst
 delete mode 100644 Documentation/arm/nwfpe/NOTES
 delete mode 100644 Documentation/arm/nwfpe/README
 delete mode 100644 Documentation/arm/nwfpe/README.FPE
 delete mode 100644 Documentation/arm/nwfpe/TODO
 create mode 100644 Documentation/arm/nwfpe/index.rst
 create mode 100644 Documentation/arm/nwfpe/netwinder-fpe.rst
 create mode 100644 Documentation/arm/nwfpe/notes.rst
 create mode 100644 Documentation/arm/nwfpe/nwfpe.rst
 create mode 100644 Documentation/arm/nwfpe/todo.rst
 create mode 100644 Documentation/arm/omap/dss.rst
 create mode 100644 Documentation/arm/omap/index.rst
 create mode 100644 Documentation/arm/omap/omap.rst
 create mode 100644 Documentation/arm/omap/omap_pm.rst
 create mode 100644 Documentation/arm/porting.rst
 create mode 100644 Documentation/arm/pxa/mfp.rst
 delete mode 100644 Documentation/arm/pxa/mfp.txt
 create mode 100644 Documentation/arm/sa1100/adsbitsy.rst
 create mode 100644 Documentation/arm/sa1100/assabet.rst
 create mode 100644 Documentation/arm/sa1100/brutus.rst
 create mode 100644 Documentation/arm/sa1100/cerf.rst
 create mode 100644 Documentation/arm/sa1100/freebird.rst
 create mode 100644 Documentation/arm/sa1100/graphicsclient.rst
 create mode 100644 Documentation/arm/sa1100/graphicsmaster.rst
 create mode 100644 Documentation/arm/sa1100/huw_webpanel.rst
 create mode 100644 Documentation/arm/sa1100/index.rst
 create mode 100644 Documentation/arm/sa1100/itsy.rst
 create mode 100644 Documentation/arm/sa1100/lart.rst
 create mode 100644 Documentation/arm/sa1100/nanoengine.rst
 create mode 100644 Documentation/arm/sa1100/pangolin.rst
 create mode 100644 Documentation/arm/sa1100/pleb.rst
 create mode 100644 Documentation/arm/sa1100/serial_uart.rst
 create mode 100644 Documentation/arm/sa1100/tifon.rst
 create mode 100644 Documentation/arm/sa1100/yopy.rst
 create mode 100644 Documentation/arm/samsung-s3c24xx/cpufreq.rst
 create mode 100644 Documentation/arm/samsung-s3c24xx/eb2410itx.rst
 create mode 100644 Documentation/arm/samsung-s3c24xx/gpio.rst
 create mode 100644 Documentation/arm/samsung-s3c24xx/h1940.rst
 create mode 100644 Documentation/arm/samsung-s3c24xx/index.rst
 create mode 100644 Documentation/arm/samsung-s3c24xx/nand.rst
 create mode 100644 Documentation/arm/samsung-s3c24xx/overview.rst
 create mode 100644 Documentation/arm/samsung-s3c24xx/s3c2412.rst
 create mode 100644 Documentation/arm/samsung-s3c24xx/s3c2413.rst
 create mode 100644 Documentation/arm/samsung-s3c24xx/smdk2440.rst
 create mode 100644 Documentation/arm/samsung-s3c24xx/suspend.rst
 create mode 100644 Documentation/arm/samsung-s3c24xx/usb-host.rst
 create mode 100644 Documentation/arm/samsung/bootloader-interface.rst
 create mode 100755 Documentation/arm/samsung/clksrc-change-registers.awk
 create mode 100644 Documentation/arm/samsung/gpio.rst
 create mode 100644 Documentation/arm/samsung/index.rst
 create mode 100644 Documentation/arm/samsung/overview.rst
 create mode 100644 Documentation/arm/setup.rst
 create mode 100644 Documentation/arm/sh-mobile/.gitignore
 create mode 100644 Documentation/arm/spear/overview.rst
 create mode 100644 Documentation/arm/sti/overview.rst
 delete mode 100644 Documentation/arm/sti/overview.txt
 create mode 100644 Documentation/arm/sti/stih407-overview.rst
 delete mode 100644 Documentation/arm/sti/stih407-overview.txt
 create mode 100644 Documentation/arm/sti/stih415-overview.rst
 delete mode 100644 Documentation/arm/sti/stih415-overview.txt
 create mode 100644 Documentation/arm/sti/stih416-overview.rst
 delete mode 100644 Documentation/arm/sti/stih416-overview.txt
 create mode 100644 Documentation/arm/sti/stih418-overview.rst
 delete mode 100644 Documentation/arm/sti/stih418-overview.txt
 create mode 100644 Documentation/arm/sunxi.rst
 delete mode 100644 Documentation/arm/sunxi/README
 create mode 100644 Documentation/arm/sunxi/clocks.rst
 delete mode 100644 Documentation/arm/sunxi/clocks.txt
 delete mode 100644 Documentation/arm/swp_emulation
 create mode 100644 Documentation/arm/swp_emulation.rst
 create mode 100644 Documentation/arm/tcm.rst
 delete mode 100644 Documentation/arm/tcm.txt
 create mode 100644 Documentation/arm/uefi.rst
 delete mode 100644 Documentation/arm/uefi.txt
 create mode 100644 Documentation/arm/vfp/release-notes.rst
 create mode 100644 Documentation/arm/vlocks.rst
 delete mode 100644 Documentation/arm/vlocks.txt

(limited to 'Documentation')

diff --git a/Documentation/arm/Booting b/Documentation/arm/Booting
deleted file mode 100644
index f1f965ce93d6..000000000000
--- a/Documentation/arm/Booting
+++ /dev/null
@@ -1,218 +0,0 @@
-			Booting ARM Linux
-			=================
-
-Author:	Russell King
-Date  : 18 May 2002
-
-The following documentation is relevant to 2.4.18-rmk6 and beyond.
-
-In order to boot ARM Linux, you require a boot loader, which is a small
-program that runs before the main kernel.  The boot loader is expected
-to initialise various devices, and eventually call the Linux kernel,
-passing information to the kernel.
-
-Essentially, the boot loader should provide (as a minimum) the
-following:
-
-1. Setup and initialise the RAM.
-2. Initialise one serial port.
-3. Detect the machine type.
-4. Setup the kernel tagged list.
-5. Load initramfs.
-6. Call the kernel image.
-
-
-1. Setup and initialise RAM
----------------------------
-
-Existing boot loaders:		MANDATORY
-New boot loaders:		MANDATORY
-
-The boot loader is expected to find and initialise all RAM that the
-kernel will use for volatile data storage in the system.  It performs
-this in a machine dependent manner.  (It may use internal algorithms
-to automatically locate and size all RAM, or it may use knowledge of
-the RAM in the machine, or any other method the boot loader designer
-sees fit.)
-
-
-2. Initialise one serial port
------------------------------
-
-Existing boot loaders:		OPTIONAL, RECOMMENDED
-New boot loaders:		OPTIONAL, RECOMMENDED
-
-The boot loader should initialise and enable one serial port on the
-target.  This allows the kernel serial driver to automatically detect
-which serial port it should use for the kernel console (generally
-used for debugging purposes, or communication with the target.)
-
-As an alternative, the boot loader can pass the relevant 'console='
-option to the kernel via the tagged lists specifying the port, and
-serial format options as described in
-
-       Documentation/admin-guide/kernel-parameters.rst.
-
-
-3. Detect the machine type
---------------------------
-
-Existing boot loaders:		OPTIONAL
-New boot loaders:		MANDATORY except for DT-only platforms
-
-The boot loader should detect the machine type its running on by some
-method.  Whether this is a hard coded value or some algorithm that
-looks at the connected hardware is beyond the scope of this document.
-The boot loader must ultimately be able to provide a MACH_TYPE_xxx
-value to the kernel. (see linux/arch/arm/tools/mach-types).  This
-should be passed to the kernel in register r1.
-
-For DT-only platforms, the machine type will be determined by device
-tree.  set the machine type to all ones (~0).  This is not strictly
-necessary, but assures that it will not match any existing types.
-
-4. Setup boot data
-------------------
-
-Existing boot loaders:		OPTIONAL, HIGHLY RECOMMENDED
-New boot loaders:		MANDATORY
-
-The boot loader must provide either a tagged list or a dtb image for
-passing configuration data to the kernel.  The physical address of the
-boot data is passed to the kernel in register r2.
-
-4a. Setup the kernel tagged list
---------------------------------
-
-The boot loader must create and initialise the kernel tagged list.
-A valid tagged list starts with ATAG_CORE and ends with ATAG_NONE.
-The ATAG_CORE tag may or may not be empty.  An empty ATAG_CORE tag
-has the size field set to '2' (0x00000002).  The ATAG_NONE must set
-the size field to zero.
-
-Any number of tags can be placed in the list.  It is undefined
-whether a repeated tag appends to the information carried by the
-previous tag, or whether it replaces the information in its
-entirety; some tags behave as the former, others the latter.
-
-The boot loader must pass at a minimum the size and location of
-the system memory, and root filesystem location.  Therefore, the
-minimum tagged list should look:
-
-	+-----------+
-base ->	| ATAG_CORE |  |
-	+-----------+  |
-	| ATAG_MEM  |  | increasing address
-	+-----------+  |
-	| ATAG_NONE |  |
-	+-----------+  v
-
-The tagged list should be stored in system RAM.
-
-The tagged list must be placed in a region of memory where neither
-the kernel decompressor nor initrd 'bootp' program will overwrite
-it.  The recommended placement is in the first 16KiB of RAM.
-
-4b. Setup the device tree
--------------------------
-
-The boot loader must load a device tree image (dtb) into system ram
-at a 64bit aligned address and initialize it with the boot data.  The
-dtb format is documented in Documentation/devicetree/booting-without-of.txt.
-The kernel will look for the dtb magic value of 0xd00dfeed at the dtb
-physical address to determine if a dtb has been passed instead of a
-tagged list.
-
-The boot loader must pass at a minimum the size and location of the
-system memory, and the root filesystem location.  The dtb must be
-placed in a region of memory where the kernel decompressor will not
-overwrite it, while remaining within the region which will be covered
-by the kernel's low-memory mapping.
-
-A safe location is just above the 128MiB boundary from start of RAM.
-
-5. Load initramfs.
-------------------
-
-Existing boot loaders:		OPTIONAL
-New boot loaders:		OPTIONAL
-
-If an initramfs is in use then, as with the dtb, it must be placed in
-a region of memory where the kernel decompressor will not overwrite it
-while also with the region which will be covered by the kernel's
-low-memory mapping.
-
-A safe location is just above the device tree blob which itself will
-be loaded just above the 128MiB boundary from the start of RAM as
-recommended above.
-
-6. Calling the kernel image
----------------------------
-
-Existing boot loaders:		MANDATORY
-New boot loaders:		MANDATORY
-
-There are two options for calling the kernel zImage.  If the zImage
-is stored in flash, and is linked correctly to be run from flash,
-then it is legal for the boot loader to call the zImage in flash
-directly.
-
-The zImage may also be placed in system RAM and called there.  The
-kernel should be placed in the first 128MiB of RAM.  It is recommended
-that it is loaded above 32MiB in order to avoid the need to relocate
-prior to decompression, which will make the boot process slightly
-faster.
-
-When booting a raw (non-zImage) kernel the constraints are tighter.
-In this case the kernel must be loaded at an offset into system equal
-to TEXT_OFFSET - PAGE_OFFSET.
-
-In any case, the following conditions must be met:
-
-- Quiesce all DMA capable devices so that memory does not get
-  corrupted by bogus network packets or disk data. This will save
-  you many hours of debug.
-
-- CPU register settings
-  r0 = 0,
-  r1 = machine type number discovered in (3) above.
-  r2 = physical address of tagged list in system RAM, or
-       physical address of device tree block (dtb) in system RAM
-
-- CPU mode
-  All forms of interrupts must be disabled (IRQs and FIQs)
-
-  For CPUs which do not include the ARM virtualization extensions, the
-  CPU must be in SVC mode.  (A special exception exists for Angel)
-
-  CPUs which include support for the virtualization extensions can be
-  entered in HYP mode in order to enable the kernel to make full use of
-  these extensions.  This is the recommended boot method for such CPUs,
-  unless the virtualisations are already in use by a pre-installed
-  hypervisor.
-
-  If the kernel is not entered in HYP mode for any reason, it must be
-  entered in SVC mode.
-
-- Caches, MMUs
-  The MMU must be off.
-  Instruction cache may be on or off.
-  Data cache must be off.
-
-  If the kernel is entered in HYP mode, the above requirements apply to
-  the HYP mode configuration in addition to the ordinary PL1 (privileged
-  kernel modes) configuration.  In addition, all traps into the
-  hypervisor must be disabled, and PL1 access must be granted for all
-  peripherals and CPU resources for which this is architecturally
-  possible.  Except for entering in HYP mode, the system configuration
-  should be such that a kernel which does not include support for the
-  virtualization extensions can boot correctly without extra help.
-
-- The boot loader is expected to call the kernel image by jumping
-  directly to the first instruction of the kernel image.
-
-  On CPUs supporting the ARM instruction set, the entry must be
-  made in ARM state, even for a Thumb-2 kernel.
-
-  On CPUs supporting only the Thumb instruction set such as
-  Cortex-M class CPUs, the entry must be made in Thumb state.
diff --git a/Documentation/arm/IXP4xx b/Documentation/arm/IXP4xx
deleted file mode 100644
index e48b74de6ac0..000000000000
--- a/Documentation/arm/IXP4xx
+++ /dev/null
@@ -1,172 +0,0 @@
-
--------------------------------------------------------------------------
-Release Notes for Linux on Intel's IXP4xx Network Processor
-
-Maintained by Deepak Saxena <dsaxena@plexity.net>
--------------------------------------------------------------------------
-
-1. Overview
-
-Intel's IXP4xx network processor is a highly integrated SOC that
-is targeted for network applications, though it has become popular 
-in industrial control and other areas due to low cost and power
-consumption. The IXP4xx family currently consists of several processors
-that support different network offload functions such as encryption,
-routing, firewalling, etc. The IXP46x family is an updated version which
-supports faster speeds, new memory and flash configurations, and more
-integration such as an on-chip I2C controller.
-
-For more information on the various versions of the CPU, see:
-
-   http://developer.intel.com/design/network/products/npfamily/ixp4xx.htm
-
-Intel also made the IXCP1100 CPU for sometime which is an IXP4xx 
-stripped of much of the network intelligence.
-
-2. Linux Support
-
-Linux currently supports the following features on the IXP4xx chips:
-
-- Dual serial ports
-- PCI interface
-- Flash access (MTD/JFFS)
-- I2C through GPIO on IXP42x
-- GPIO for input/output/interrupts 
-  See arch/arm/mach-ixp4xx/include/mach/platform.h for access functions.
-- Timers (watchdog, OS)
-
-The following components of the chips are not supported by Linux and
-require the use of Intel's proprietary CSR software:
-
-- USB device interface
-- Network interfaces (HSS, Utopia, NPEs, etc)
-- Network offload functionality
-
-If you need to use any of the above, you need to download Intel's
-software from:
-
-   http://developer.intel.com/design/network/products/npfamily/ixp425.htm    
-
-DO NOT POST QUESTIONS TO THE LINUX MAILING LISTS REGARDING THE PROPRIETARY
-SOFTWARE.
-
-There are several websites that provide directions/pointers on using
-Intel's software:
-
-   http://sourceforge.net/projects/ixp4xx-osdg/
-   Open Source Developer's Guide for using uClinux and the Intel libraries 
-
-http://gatewaymaker.sourceforge.net/ 
-   Simple one page summary of building a gateway using an IXP425 and Linux
-
-http://ixp425.sourceforge.net/
-   ATM device driver for IXP425 that relies on Intel's libraries
-
-3. Known Issues/Limitations
-
-3a. Limited inbound PCI window
-
-The IXP4xx family allows for up to 256MB of memory but the PCI interface
-can only expose 64MB of that memory to the PCI bus. This means that if
-you are running with > 64MB, all PCI buffers outside of the accessible
-range will be bounced using the routines in arch/arm/common/dmabounce.c.
-   
-3b. Limited outbound PCI window
-
-IXP4xx provides two methods of accessing PCI memory space:
-
-1) A direct mapped window from 0x48000000 to 0x4bffffff (64MB).
-   To access PCI via this space, we simply ioremap() the BAR
-   into the kernel and we can use the standard read[bwl]/write[bwl]
-   macros. This is the preffered method due to speed but it
-   limits the system to just 64MB of PCI memory. This can be 
-   problamatic if using video cards and other memory-heavy devices.
-          
-2) If > 64MB of memory space is required, the IXP4xx can be 
-   configured to use indirect registers to access PCI This allows 
-   for up to 128MB (0x48000000 to 0x4fffffff) of memory on the bus. 
-   The disadvantage of this is that every PCI access requires 
-   three local register accesses plus a spinlock, but in some 
-   cases the performance hit is acceptable. In addition, you cannot 
-   mmap() PCI devices in this case due to the indirect nature
-   of the PCI window.
-
-By default, the direct method is used for performance reasons. If
-you need more PCI memory, enable the IXP4XX_INDIRECT_PCI config option.
-
-3c. GPIO as Interrupts
-
-Currently the code only handles level-sensitive GPIO interrupts 
-
-4. Supported platforms
-
-ADI Engineering Coyote Gateway Reference Platform
-http://www.adiengineering.com/productsCoyote.html
-
-   The ADI Coyote platform is reference design for those building 
-   small residential/office gateways. One NPE is connected to a 10/100
-   interface, one to 4-port 10/100 switch, and the third to and ADSL
-   interface. In addition, it also supports to POTs interfaces connected
-   via SLICs. Note that those are not supported by Linux ATM. Finally,
-   the platform has two mini-PCI slots used for 802.11[bga] cards.
-   Finally, there is an IDE port hanging off the expansion bus.
-
-Gateworks Avila Network Platform
-http://www.gateworks.com/support/overview.php
-
-   The Avila platform is basically and IXDP425 with the 4 PCI slots
-   replaced with mini-PCI slots and a CF IDE interface hanging off
-   the expansion bus.
-
-Intel IXDP425 Development Platform
-http://www.intel.com/design/network/products/npfamily/ixdpg425.htm  
-
-   This is Intel's standard reference platform for the IXDP425 and is 
-   also known as the Richfield board. It contains 4 PCI slots, 16MB
-   of flash, two 10/100 ports and one ADSL port.
-
-Intel IXDP465 Development Platform
-http://www.intel.com/design/network/products/npfamily/ixdp465.htm
-
-   This is basically an IXDP425 with an IXP465 and 32M of flash instead
-   of just 16.
-
-Intel IXDPG425 Development Platform
-
-   This is basically and ADI Coyote board with a NEC EHCI controller
-   added. One issue with this board is that the mini-PCI slots only
-   have the 3.3v line connected, so you can't use a PCI to mini-PCI
-   adapter with an E100 card. So to NFS root you need to use either
-   the CSR or a WiFi card and a ramdisk that BOOTPs and then does
-   a pivot_root to NFS.
-
-Motorola PrPMC1100 Processor Mezanine Card
-http://www.fountainsys.com
-
-   The PrPMC1100 is based on the IXCP1100 and is meant to plug into
-   and IXP2400/2800 system to act as the system controller. It simply
-   contains a CPU and 16MB of flash on the board and needs to be
-   plugged into a carrier board to function. Currently Linux only
-   supports the Motorola PrPMC carrier board for this platform.
-
-5. TODO LIST
-
-- Add support for Coyote IDE
-- Add support for edge-based GPIO interrupts
-- Add support for CF IDE on expansion bus
-
-6. Thanks
-
-The IXP4xx work has been funded by Intel Corp. and MontaVista Software, Inc.
-
-The following people have contributed patches/comments/etc:
-
-Lennerty Buytenhek
-Lutz Jaenicke
-Justin Mayfield
-Robert E. Ranslam
-[I know I've forgotten others, please email me to be added] 
-
--------------------------------------------------------------------------
-
-Last Update: 01/04/2005
diff --git a/Documentation/arm/Interrupts b/Documentation/arm/Interrupts
deleted file mode 100644
index f09ab1b90ef1..000000000000
--- a/Documentation/arm/Interrupts
+++ /dev/null
@@ -1,167 +0,0 @@
-2.5.2-rmk5
-----------
-
-This is the first kernel that contains a major shake up of some of the
-major architecture-specific subsystems.
-
-Firstly, it contains some pretty major changes to the way we handle the
-MMU TLB.  Each MMU TLB variant is now handled completely separately -
-we have TLB v3, TLB v4 (without write buffer), TLB v4 (with write buffer),
-and finally TLB v4 (with write buffer, with I TLB invalidate entry).
-There is more assembly code inside each of these functions, mainly to
-allow more flexible TLB handling for the future.
-
-Secondly, the IRQ subsystem.
-
-The 2.5 kernels will be having major changes to the way IRQs are handled.
-Unfortunately, this means that machine types that touch the irq_desc[]
-array (basically all machine types) will break, and this means every
-machine type that we currently have.
-
-Lets take an example.  On the Assabet with Neponset, we have:
-
-                  GPIO25                 IRR:2
-        SA1100 ------------> Neponset -----------> SA1111
-                                         IIR:1
-                                      -----------> USAR
-                                         IIR:0
-                                      -----------> SMC9196
-
-The way stuff currently works, all SA1111 interrupts are mutually
-exclusive of each other - if you're processing one interrupt from the
-SA1111 and another comes in, you have to wait for that interrupt to
-finish processing before you can service the new interrupt.  Eg, an
-IDE PIO-based interrupt on the SA1111 excludes all other SA1111 and
-SMC9196 interrupts until it has finished transferring its multi-sector
-data, which can be a long time.  Note also that since we loop in the
-SA1111 IRQ handler, SA1111 IRQs can hold off SMC9196 IRQs indefinitely.
-
-
-The new approach brings several new ideas...
-
-We introduce the concept of a "parent" and a "child".  For example,
-to the Neponset handler, the "parent" is GPIO25, and the "children"d
-are SA1111, SMC9196 and USAR.
-
-We also bring the idea of an IRQ "chip" (mainly to reduce the size of
-the irqdesc array).  This doesn't have to be a real "IC"; indeed the
-SA11x0 IRQs are handled by two separate "chip" structures, one for
-GPIO0-10, and another for all the rest.  It is just a container for
-the various operations (maybe this'll change to a better name).
-This structure has the following operations:
-
-struct irqchip {
-        /*
-         * Acknowledge the IRQ.
-         * If this is a level-based IRQ, then it is expected to mask the IRQ
-         * as well.
-         */
-        void (*ack)(unsigned int irq);
-        /*
-         * Mask the IRQ in hardware.
-         */
-        void (*mask)(unsigned int irq);
-        /*
-         * Unmask the IRQ in hardware.
-         */
-        void (*unmask)(unsigned int irq);
-        /*
-         * Re-run the IRQ
-         */
-        void (*rerun)(unsigned int irq);
-        /*
-         * Set the type of the IRQ.
-         */
-        int (*type)(unsigned int irq, unsigned int, type);
-};
-
-ack    - required.  May be the same function as mask for IRQs
-         handled by do_level_IRQ.
-mask   - required.
-unmask - required.
-rerun  - optional.  Not required if you're using do_level_IRQ for all
-         IRQs that use this 'irqchip'.  Generally expected to re-trigger
-         the hardware IRQ if possible.  If not, may call the handler
-	 directly.
-type   - optional.  If you don't support changing the type of an IRQ,
-         it should be null so people can detect if they are unable to
-         set the IRQ type.
-
-For each IRQ, we keep the following information:
-
-        - "disable" depth (number of disable_irq()s without enable_irq()s)
-        - flags indicating what we can do with this IRQ (valid, probe,
-          noautounmask) as before
-        - status of the IRQ (probing, enable, etc)
-        - chip
-        - per-IRQ handler
-        - irqaction structure list
-
-The handler can be one of the 3 standard handlers - "level", "edge" and
-"simple", or your own specific handler if you need to do something special.
-
-The "level" handler is what we currently have - its pretty simple.
-"edge" knows about the brokenness of such IRQ implementations - that you
-need to leave the hardware IRQ enabled while processing it, and queueing
-further IRQ events should the IRQ happen again while processing.  The
-"simple" handler is very basic, and does not perform any hardware
-manipulation, nor state tracking.  This is useful for things like the
-SMC9196 and USAR above.
-
-So, what's changed?
-
-1. Machine implementations must not write to the irqdesc array.
-
-2. New functions to manipulate the irqdesc array.  The first 4 are expected
-   to be useful only to machine specific code.  The last is recommended to
-   only be used by machine specific code, but may be used in drivers if
-   absolutely necessary.
-
-        set_irq_chip(irq,chip)
-
-                Set the mask/unmask methods for handling this IRQ
-
-        set_irq_handler(irq,handler)
-
-                Set the handler for this IRQ (level, edge, simple)
-
-        set_irq_chained_handler(irq,handler)
-
-                Set a "chained" handler for this IRQ - automatically
-                enables this IRQ (eg, Neponset and SA1111 handlers).
-
-        set_irq_flags(irq,flags)
-
-                Set the valid/probe/noautoenable flags.
-
-        set_irq_type(irq,type)
-
-                Set active the IRQ edge(s)/level.  This replaces the
-                SA1111 INTPOL manipulation, and the set_GPIO_IRQ_edge()
-                function.  Type should be one of IRQ_TYPE_xxx defined in
-		<linux/irq.h>
-
-3. set_GPIO_IRQ_edge() is obsolete, and should be replaced by set_irq_type.
-
-4. Direct access to SA1111 INTPOL is deprecated.  Use set_irq_type instead.
-
-5. A handler is expected to perform any necessary acknowledgement of the
-   parent IRQ via the correct chip specific function.  For instance, if
-   the SA1111 is directly connected to a SA1110 GPIO, then you should
-   acknowledge the SA1110 IRQ each time you re-read the SA1111 IRQ status.
-
-6. For any child which doesn't have its own IRQ enable/disable controls
-   (eg, SMC9196), the handler must mask or acknowledge the parent IRQ
-   while the child handler is called, and the child handler should be the
-   "simple" handler (not "edge" nor "level").  After the handler completes,
-   the parent IRQ should be unmasked, and the status of all children must
-   be re-checked for pending events.  (see the Neponset IRQ handler for
-   details).
-
-7. fixup_irq() is gone, as is arch/arm/mach-*/include/mach/irq.h
-
-Please note that this will not solve all problems - some of them are
-hardware based.  Mixing level-based and edge-based IRQs on the same
-parent signal (eg neponset) is one such area where a software based
-solution can't provide the full answer to low IRQ latency.
-
diff --git a/Documentation/arm/Marvell/README b/Documentation/arm/Marvell/README
deleted file mode 100644
index 56ada27c53be..000000000000
--- a/Documentation/arm/Marvell/README
+++ /dev/null
@@ -1,395 +0,0 @@
-ARM Marvell SoCs
-================
-
-This document lists all the ARM Marvell SoCs that are currently
-supported in mainline by the Linux kernel. As the Marvell families of
-SoCs are large and complex, it is hard to understand where the support
-for a particular SoC is available in the Linux kernel. This document
-tries to help in understanding where those SoCs are supported, and to
-match them with their corresponding public datasheet, when available.
-
-Orion family
-------------
-
-  Flavors:
-        88F5082
-        88F5181
-        88F5181L
-        88F5182
-               Datasheet               : http://www.embeddedarm.com/documentation/third-party/MV88F5182-datasheet.pdf
-               Programmer's User Guide : http://www.embeddedarm.com/documentation/third-party/MV88F5182-opensource-manual.pdf
-               User Manual             : http://www.embeddedarm.com/documentation/third-party/MV88F5182-usermanual.pdf
-        88F5281
-               Datasheet               : http://www.ocmodshop.com/images/reviews/networking/qnap_ts409u/marvel_88f5281_data_sheet.pdf
-        88F6183
-  Core: Feroceon 88fr331 (88f51xx) or 88fr531-vd (88f52xx) ARMv5 compatible
-  Linux kernel mach directory: arch/arm/mach-orion5x
-  Linux kernel plat directory: arch/arm/plat-orion
-
-Kirkwood family
----------------
-
-  Flavors:
-        88F6282 a.k.a Armada 300
-                Product Brief  : http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
-        88F6283 a.k.a Armada 310
-                Product Brief  : http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
-        88F6190
-                Product Brief  : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6190-003_WEB.pdf
-                Hardware Spec  : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
-                Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
-        88F6192
-                Product Brief  : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6192-003_ver1.pdf
-                Hardware Spec  : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
-                Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
-        88F6182
-        88F6180
-                Product Brief  : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6180-003_ver1.pdf
-                Hardware Spec  : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6180_OpenSource.pdf
-                Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
-        88F6281
-                Product Brief  : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6281-004_ver1.pdf
-                Hardware Spec  : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6281_OpenSource.pdf
-                Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
-  Homepage: http://www.marvell.com/embedded-processors/kirkwood/
-  Core: Feroceon 88fr131 ARMv5 compatible
-  Linux kernel mach directory: arch/arm/mach-mvebu
-  Linux kernel plat directory: none
-
-Discovery family
-----------------
-
-  Flavors:
-        MV78100
-                Product Brief  : http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78100-003_WEB.pdf
-                Hardware Spec  : http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78100_OpenSource.pdf
-                Functional Spec: http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
-        MV78200
-                Product Brief  : http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78200-002_WEB.pdf
-                Hardware Spec  : http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78200_OpenSource.pdf
-                Functional Spec: http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
-        MV76100
-                Not supported by the Linux kernel.
-
-  Core: Feroceon 88fr571-vd ARMv5 compatible
-
-  Linux kernel mach directory: arch/arm/mach-mv78xx0
-  Linux kernel plat directory: arch/arm/plat-orion
-
-EBU Armada family
------------------
-
-  Armada 370 Flavors:
-        88F6710
-        88F6707
-        88F6W11
-    Product Brief:   http://www.marvell.com/embedded-processors/armada-300/assets/Marvell_ARMADA_370_SoC.pdf
-    Hardware Spec:   http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-datasheet.pdf
-    Functional Spec: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-FunctionalSpec-datasheet.pdf
-    Core: Sheeva ARMv7 compatible PJ4B
-
-  Armada 375 Flavors:
-	88F6720
-    Product Brief: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA_375_SoC-01_product_brief.pdf
-    Core: ARM Cortex-A9
-
-  Armada 38x Flavors:
-	88F6810	Armada 380
-	88F6820 Armada 385
-	88F6828 Armada 388
-    Product infos:   http://www.marvell.com/embedded-processors/armada-38x/
-    Functional Spec: https://marvellcorp.wufoo.com/forms/marvell-armada-38x-functional-specifications/
-    Core: ARM Cortex-A9
-
-  Armada 39x Flavors:
-	88F6920 Armada 390
-	88F6928 Armada 398
-    Product infos: http://www.marvell.com/embedded-processors/armada-39x/
-    Core: ARM Cortex-A9
-
-  Armada XP Flavors:
-        MV78230
-        MV78260
-        MV78460
-    NOTE: not to be confused with the non-SMP 78xx0 SoCs
-    Product Brief: http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf
-    Functional Spec: http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf
-    Hardware Specs:
-      http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF
-      http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF
-      http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF
-    Core: Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP
-
-  Linux kernel mach directory: arch/arm/mach-mvebu
-  Linux kernel plat directory: none
-
-EBU Armada family ARMv8
------------------------
-
-  Armada 3710/3720 Flavors:
-	88F3710
-	88F3720
-	Core: ARM Cortex A53 (ARMv8)
-
-	Homepage: http://www.marvell.com/embedded-processors/armada-3700/
-	Product Brief: http://www.marvell.com/embedded-processors/assets/PB-88F3700-FNL.pdf
-	Device tree files: arch/arm64/boot/dts/marvell/armada-37*
-
-  Armada 7K Flavors:
-	88F7020 (AP806 Dual + one CP110)
-	88F7040 (AP806 Quad + one CP110)
-	Core: ARM Cortex A72
-
-	Homepage: http://www.marvell.com/embedded-processors/armada-70xx/
-	Product Brief: http://www.marvell.com/embedded-processors/assets/Armada7020PB-Jan2016.pdf
-		       http://www.marvell.com/embedded-processors/assets/Armada7040PB-Jan2016.pdf
-	Device tree files: arch/arm64/boot/dts/marvell/armada-70*
-
-  Armada 8K Flavors:
-	88F8020 (AP806 Dual + two CP110)
-	88F8040 (AP806 Quad + two CP110)
-	Core: ARM Cortex A72
-
-	Homepage: http://www.marvell.com/embedded-processors/armada-80xx/
-	Product Brief: http://www.marvell.com/embedded-processors/assets/Armada8020PB-Jan2016.pdf
-		       http://www.marvell.com/embedded-processors/assets/Armada8040PB-Jan2016.pdf
-	Device tree files: arch/arm64/boot/dts/marvell/armada-80*
-
-Avanta family
--------------
-
-  Flavors:
-       88F6510
-       88F6530P
-       88F6550
-       88F6560
-  Homepage     : http://www.marvell.com/broadband/
-  Product Brief: http://www.marvell.com/broadband/assets/Marvell_Avanta_88F6510_305_060-001_product_brief.pdf
-  No public datasheet available.
-
-  Core: ARMv5 compatible
-
-  Linux kernel mach directory: no code in mainline yet, planned for the future
-  Linux kernel plat directory: no code in mainline yet, planned for the future
-
-Storage family
---------------
-
-  Armada SP:
-	88RC1580
-    Product infos: http://www.marvell.com/storage/armada-sp/
-    Core: Sheeva ARMv7 comatible Quad-core PJ4C
-    (not supported in upstream Linux kernel)
-
-Dove family (application processor)
------------------------------------
-
-  Flavors:
-        88AP510 a.k.a Armada 510
-                Product Brief   : http://www.marvell.com/application-processors/armada-500/assets/Marvell_Armada510_SoC.pdf
-                Hardware Spec   : http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Hardware-Spec.pdf
-                Functional Spec : http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Functional-Spec.pdf
-  Homepage: http://www.marvell.com/application-processors/armada-500/
-  Core: ARMv7 compatible
-
-  Directory: arch/arm/mach-mvebu (DT enabled platforms)
-             arch/arm/mach-dove (non-DT enabled platforms)
-
-PXA 2xx/3xx/93x/95x family
---------------------------
-
-  Flavors:
-        PXA21x, PXA25x, PXA26x
-             Application processor only
-             Core: ARMv5 XScale1 core
-        PXA270, PXA271, PXA272
-             Product Brief         : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_pb.pdf
-             Design guide          : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_design_guide.pdf
-             Developers manual     : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_dev_man.pdf
-             Specification         : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_emts.pdf
-             Specification update  : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_spec_update.pdf
-             Application processor only
-             Core: ARMv5 XScale2 core
-        PXA300, PXA310, PXA320
-             PXA 300 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA300_PB_R4.pdf
-             PXA 310 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA310_PB_R4.pdf
-             PXA 320 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA320_PB_R4.pdf
-             Design guide          : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Design_Guide.pdf
-             Developers manual     : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Developers_Manual.zip
-             Specifications        : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_EMTS.pdf
-             Specification Update  : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Spec_Update.zip
-             Reference Manual      : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_TavorP_BootROM_Ref_Manual.pdf
-             Application processor only
-             Core: ARMv5 XScale3 core
-        PXA930, PXA935
-             Application processor with Communication processor
-             Core: ARMv5 XScale3 core
-        PXA955
-             Application processor with Communication processor
-             Core: ARMv7 compatible Sheeva PJ4 core
-
-   Comments:
-
-    * This line of SoCs originates from the XScale family developed by
-      Intel and acquired by Marvell in ~2006. The PXA21x, PXA25x,
-      PXA26x, PXA27x, PXA3xx and PXA93x were developed by Intel, while
-      the later PXA95x were developed by Marvell.
-
-    * Due to their XScale origin, these SoCs have virtually nothing in
-      common with the other (Kirkwood, Dove, etc.) families of Marvell
-      SoCs, except with the MMP/MMP2 family of SoCs.
-
-   Linux kernel mach directory: arch/arm/mach-pxa
-   Linux kernel plat directory: arch/arm/plat-pxa
-
-MMP/MMP2/MMP3 family (communication processor)
------------------------------------------
-
-   Flavors:
-        PXA168, a.k.a Armada 168
-             Homepage             : http://www.marvell.com/application-processors/armada-100/armada-168.jsp
-             Product brief        : http://www.marvell.com/application-processors/armada-100/assets/pxa_168_pb.pdf
-             Hardware manual      : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_datasheet.pdf
-             Software manual      : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_software_manual.pdf
-             Specification update : http://www.marvell.com/application-processors/armada-100/assets/ARMADA16x_Spec_update.pdf
-             Boot ROM manual      : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_ref_manual.pdf
-             App node package     : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_app_note_package.pdf
-             Application processor only
-             Core: ARMv5 compatible Marvell PJ1 88sv331 (Mohawk)
-        PXA910/PXA920
-             Homepage             : http://www.marvell.com/communication-processors/pxa910/
-             Product Brief        : http://www.marvell.com/communication-processors/pxa910/assets/Marvell_PXA910_Platform-001_PB_final.pdf
-             Application processor with Communication processor
-             Core: ARMv5 compatible Marvell PJ1 88sv331 (Mohawk)
-        PXA688, a.k.a. MMP2, a.k.a Armada 610
-             Product Brief        : http://www.marvell.com/application-processors/armada-600/assets/armada610_pb.pdf
-             Application processor only
-             Core: ARMv7 compatible Sheeva PJ4 88sv581x core
-	PXA2128, a.k.a. MMP3 (OLPC XO4, Linux support not upstream)
-	     Product Brief	  : http://www.marvell.com/application-processors/armada/pxa2128/assets/Marvell-ARMADA-PXA2128-SoC-PB.pdf
-	     Application processor only
-	     Core: Dual-core ARMv7 compatible Sheeva PJ4C core
-	PXA960/PXA968/PXA978 (Linux support not upstream)
-	     Application processor with Communication Processor
-	     Core: ARMv7 compatible Sheeva PJ4 core
-	PXA986/PXA988 (Linux support not upstream)
-	     Application processor with Communication Processor
-	     Core: Dual-core ARMv7 compatible Sheeva PJ4B-MP core
-	PXA1088/PXA1920 (Linux support not upstream)
-	     Application processor with Communication Processor
-	     Core: quad-core ARMv7 Cortex-A7
-	PXA1908/PXA1928/PXA1936
-	     Application processor with Communication Processor
-	     Core: multi-core ARMv8 Cortex-A53
-
-   Comments:
-
-    * This line of SoCs originates from the XScale family developed by
-      Intel and acquired by Marvell in ~2006. All the processors of
-      this MMP/MMP2 family were developed by Marvell.
-
-    * Due to their XScale origin, these SoCs have virtually nothing in
-      common with the other (Kirkwood, Dove, etc.) families of Marvell
-      SoCs, except with the PXA family of SoCs listed above.
-
-   Linux kernel mach directory: arch/arm/mach-mmp
-   Linux kernel plat directory: arch/arm/plat-pxa
-
-Berlin family (Multimedia Solutions)
--------------------------------------
-
-  Flavors:
-	88DE3010, Armada 1000 (no Linux support)
-		Core:		Marvell PJ1 (ARMv5TE), Dual-core
-		Product Brief:	http://www.marvell.com.cn/digital-entertainment/assets/armada_1000_pb.pdf
-	88DE3005, Armada 1500 Mini
-		Design name:	BG2CD
-		Core:		ARM Cortex-A9, PL310 L2CC
-	88DE3006, Armada 1500 Mini Plus
-		Design name:	BG2CDP
-		Core:		Dual Core ARM Cortex-A7
-	88DE3100, Armada 1500
-		Design name:	BG2
-		Core:		Marvell PJ4B-MP (ARMv7), Tauros3 L2CC
-	88DE3114, Armada 1500 Pro
-		Design name:	BG2Q
-		Core:		Quad Core ARM Cortex-A9, PL310 L2CC
-	88DE3214, Armada 1500 Pro 4K
-		Design name:	BG3
-		Core:		ARM Cortex-A15, CA15 integrated L2CC
-	88DE3218, ARMADA 1500 Ultra
-		Core:		ARM Cortex-A53
-
-  Homepage: https://www.synaptics.com/products/multimedia-solutions
-  Directory: arch/arm/mach-berlin
-
-  Comments:
-
-   * This line of SoCs is based on Marvell Sheeva or ARM Cortex CPUs
-     with Synopsys DesignWare (IRQ, GPIO, Timers, ...) and PXA IP (SDHCI, USB, ETH, ...).
-
-   * The Berlin family was acquired by Synaptics from Marvell in 2017.
-
-CPU Cores
----------
-
-The XScale cores were designed by Intel, and shipped by Marvell in the older
-PXA processors. Feroceon is a Marvell designed core that developed in-house,
-and that evolved into Sheeva. The XScale and Feroceon cores were phased out
-over time and replaced with Sheeva cores in later products, which subsequently
-got replaced with licensed ARM Cortex-A cores.
-
-  XScale 1
-	CPUID 0x69052xxx
-	ARMv5, iWMMXt
-  XScale 2
-	CPUID 0x69054xxx
-	ARMv5, iWMMXt
-  XScale 3
-	CPUID 0x69056xxx or 0x69056xxx
-	ARMv5, iWMMXt
-  Feroceon-1850 88fr331 "Mohawk"
-	CPUID 0x5615331x or 0x41xx926x
-	ARMv5TE, single issue
-  Feroceon-2850 88fr531-vd "Jolteon"
-	CPUID 0x5605531x or 0x41xx926x
-	ARMv5TE, VFP, dual-issue
-  Feroceon 88fr571-vd "Jolteon"
-	CPUID 0x5615571x
-	ARMv5TE, VFP, dual-issue
-  Feroceon 88fr131 "Mohawk-D"
-	CPUID 0x5625131x
-	ARMv5TE, single-issue in-order
-  Sheeva PJ1 88sv331 "Mohawk"
-	CPUID 0x561584xx
-	ARMv5, single-issue iWMMXt v2
-  Sheeva PJ4 88sv581x "Flareon"
-	CPUID 0x560f581x
-	ARMv7, idivt, optional iWMMXt v2
-  Sheeva PJ4B 88sv581x
-	CPUID 0x561f581x
-	ARMv7, idivt, optional iWMMXt v2
-  Sheeva PJ4B-MP / PJ4C
-	CPUID 0x562f584x
-	ARMv7, idivt/idiva, LPAE, optional iWMMXt v2 and/or NEON
-
-Long-term plans
----------------
-
- * Unify the mach-dove/, mach-mv78xx0/, mach-orion5x/ into the
-   mach-mvebu/ to support all SoCs from the Marvell EBU (Engineering
-   Business Unit) in a single mach-<foo> directory. The plat-orion/
-   would therefore disappear.
-
- * Unify the mach-mmp/ and mach-pxa/ into the same mach-pxa
-   directory. The plat-pxa/ would therefore disappear.
-
-Credits
--------
-
- Maen Suleiman <maen@marvell.com>
- Lior Amsalem <alior@marvell.com>
- Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
- Andrew Lunn <andrew@lunn.ch>
- Nicolas Pitre <nico@fluxnic.net>
- Eric Miao <eric.y.miao@gmail.com>
diff --git a/Documentation/arm/Microchip/README b/Documentation/arm/Microchip/README
deleted file mode 100644
index a366f37d38f1..000000000000
--- a/Documentation/arm/Microchip/README
+++ /dev/null
@@ -1,169 +0,0 @@
-ARM Microchip SoCs (aka AT91)
-=============================
-
-
-Introduction
-------------
-This document gives useful information about the ARM Microchip SoCs that are
-currently supported in Linux Mainline (you know, the one on kernel.org).
-
-It is important to note that the Microchip (previously Atmel) ARM-based MPU
-product line is historically named "AT91" or "at91" throughout the Linux kernel
-development process even if this product prefix has completely disappeared from
-the official Microchip product name. Anyway, files, directories, git trees,
-git branches/tags and email subject always contain this "at91" sub-string.
-
-
-AT91 SoCs
----------
-Documentation and detailed datasheet for each product are available on
-the Microchip website: http://www.microchip.com.
-
-  Flavors:
-    * ARM 920 based SoC
-      - at91rm9200
-        + Datasheet
-          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-1768-32-bit-ARM920T-Embedded-Microprocessor-AT91RM9200_Datasheet.pdf
-
-    * ARM 926 based SoCs
-      - at91sam9260
-        + Datasheet
-          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6221-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9260_Datasheet.pdf
-
-      - at91sam9xe
-        + Datasheet
-          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6254-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9XE_Datasheet.pdf
-
-      - at91sam9261
-        + Datasheet
-          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6062-ARM926EJ-S-Microprocessor-SAM9261_Datasheet.pdf
-
-      - at91sam9263
-        + Datasheet
-          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6249-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9263_Datasheet.pdf
-
-      - at91sam9rl
-        + Datasheet
-          http://ww1.microchip.com/downloads/en/DeviceDoc/doc6289.pdf
-
-      - at91sam9g20
-        + Datasheet
-          http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001516A.pdf
-
-      - at91sam9g45 family
-        - at91sam9g45
-        - at91sam9g46
-        - at91sam9m10
-        - at91sam9m11 (device superset)
-        + Datasheet
-          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6437-32-bit-ARM926-Embedded-Microprocessor-SAM9M11_Datasheet.pdf
-
-      - at91sam9x5 family (aka "The 5 series")
-        - at91sam9g15
-        - at91sam9g25
-        - at91sam9g35
-        - at91sam9x25
-        - at91sam9x35
-        + Datasheet (can be considered as covering the whole family)
-          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-11055-32-bit-ARM926EJ-S-Microcontroller-SAM9X35_Datasheet.pdf
-
-      - at91sam9n12
-        + Datasheet
-          http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001517A.pdf
-
-    * ARM Cortex-A5 based SoCs
-      - sama5d3 family
-        - sama5d31
-        - sama5d33
-        - sama5d34
-        - sama5d35
-        - sama5d36 (device superset)
-        + Datasheet
-          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-11121-32-bit-Cortex-A5-Microcontroller-SAMA5D3_Datasheet.pdf
-
-    * ARM Cortex-A5 + NEON based SoCs
-      - sama5d4 family
-        - sama5d41
-        - sama5d42
-        - sama5d43
-        - sama5d44 (device superset)
-        + Datasheet
-          http://ww1.microchip.com/downloads/en/DeviceDoc/60001525A.pdf
-
-      - sama5d2 family
-        - sama5d21
-        - sama5d22
-        - sama5d23
-        - sama5d24
-        - sama5d26
-        - sama5d27 (device superset)
-        - sama5d28 (device superset + environmental monitors)
-        + Datasheet
-          http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001476B.pdf
-
-    * ARM Cortex-M7 MCUs
-      - sams70 family
-        - sams70j19
-        - sams70j20
-        - sams70j21
-        - sams70n19
-        - sams70n20
-        - sams70n21
-        - sams70q19
-        - sams70q20
-        - sams70q21
-
-      - samv70 family
-        - samv70j19
-        - samv70j20
-        - samv70n19
-        - samv70n20
-        - samv70q19
-        - samv70q20
-
-      - samv71 family
-        - samv71j19
-        - samv71j20
-        - samv71j21
-        - samv71n19
-        - samv71n20
-        - samv71n21
-        - samv71q19
-        - samv71q20
-        - samv71q21
-
-        + Datasheet
-          http://ww1.microchip.com/downloads/en/DeviceDoc/60001527A.pdf
-
-
-Linux kernel information
-------------------------
-Linux kernel mach directory: arch/arm/mach-at91
-MAINTAINERS entry is: "ARM/Microchip (AT91) SoC support"
-
-
-Device Tree for AT91 SoCs and boards
-------------------------------------
-All AT91 SoCs are converted to Device Tree. Since Linux 3.19, these products
-must use this method to boot the Linux kernel.
-
-Work In Progress statement:
-Device Tree files and Device Tree bindings that apply to AT91 SoCs and boards are
-considered as "Unstable". To be completely clear, any at91 binding can change at
-any time. So, be sure to use a Device Tree Binary and a Kernel Image generated from
-the same source tree.
-Please refer to the Documentation/devicetree/bindings/ABI.txt file for a
-definition of a "Stable" binding/ABI.
-This statement will be removed by AT91 MAINTAINERS when appropriate.
-
-Naming conventions and best practice:
-- SoCs Device Tree Source Include files are named after the official name of
-  the product (at91sam9g20.dtsi or sama5d33.dtsi for instance).
-- Device Tree Source Include files (.dtsi) are used to collect common nodes that can be
-  shared across SoCs or boards (sama5d3.dtsi or at91sam9x5cm.dtsi for instance).
-  When collecting nodes for a particular peripheral or topic, the identifier have to
-  be placed at the end of the file name, separated with a "_" (at91sam9x5_can.dtsi
-  or sama5d3_gmac.dtsi for example).
-- board Device Tree Source files (.dts) are prefixed by the string "at91-" so
-  that they can be identified easily. Note that some files are historical exceptions
-  to this rule (sama5d3[13456]ek.dts, usb_a9g20.dts or animeo_ip.dts for example).
diff --git a/Documentation/arm/Netwinder b/Documentation/arm/Netwinder
deleted file mode 100644
index f1b457fbd3de..000000000000
--- a/Documentation/arm/Netwinder
+++ /dev/null
@@ -1,78 +0,0 @@
-NetWinder specific documentation
-================================
-
-The NetWinder is a small low-power computer, primarily designed
-to run Linux.  It is based around the StrongARM RISC processor,
-DC21285 PCI bridge, with PC-type hardware glued around it.
-
-Port usage
-==========
-
-Min    - Max	Description
----------------------------
-0x0000 - 0x000f	DMA1
-0x0020 - 0x0021	PIC1
-0x0060 - 0x006f	Keyboard
-0x0070 - 0x007f	RTC
-0x0080 - 0x0087	DMA1
-0x0088 - 0x008f	DMA2
-0x00a0 - 0x00a3	PIC2
-0x00c0 - 0x00df	DMA2
-0x0180 - 0x0187	IRDA
-0x01f0 - 0x01f6	ide0
-0x0201		Game port
-0x0203		RWA010 configuration read
-0x0220 - ?	SoundBlaster
-0x0250 - ?	WaveArtist
-0x0279		RWA010 configuration index
-0x02f8 - 0x02ff	Serial ttyS1
-0x0300 - 0x031f	Ether10
-0x0338		GPIO1
-0x033a		GPIO2
-0x0370 - 0x0371	W83977F configuration registers
-0x0388 - ?	AdLib
-0x03c0 - 0x03df	VGA
-0x03f6		ide0
-0x03f8 - 0x03ff	Serial ttyS0
-0x0400 - 0x0408	DC21143
-0x0480 - 0x0487	DMA1
-0x0488 - 0x048f	DMA2
-0x0a79		RWA010 configuration write
-0xe800 - 0xe80f	ide0/ide1 BM DMA
-
-
-Interrupt usage
-===============
-
-IRQ	type	Description
----------------------------
- 0	ISA	100Hz timer
- 1	ISA	Keyboard
- 2	ISA	cascade
- 3	ISA	Serial ttyS1
- 4	ISA	Serial ttyS0
- 5	ISA	PS/2 mouse
- 6	ISA	IRDA
- 7	ISA	Printer
- 8	ISA	RTC alarm
- 9	ISA
-10	ISA	GP10 (Orange reset button)
-11	ISA
-12	ISA	WaveArtist
-13	ISA
-14	ISA	hda1
-15	ISA
-
-DMA usage
-=========
-
-DMA	type	Description
----------------------------
- 0	ISA	IRDA
- 1	ISA
- 2	ISA	cascade
- 3	ISA	WaveArtist
- 4	ISA
- 5	ISA
- 6	ISA
- 7	ISA	WaveArtist
diff --git a/Documentation/arm/OMAP/DSS b/Documentation/arm/OMAP/DSS
deleted file mode 100644
index 4484e021290e..000000000000
--- a/Documentation/arm/OMAP/DSS
+++ /dev/null
@@ -1,362 +0,0 @@
-OMAP2/3 Display Subsystem
--------------------------
-
-This is an almost total rewrite of the OMAP FB driver in drivers/video/omap
-(let's call it DSS1). The main differences between DSS1 and DSS2 are DSI,
-TV-out and multiple display support, but there are lots of small improvements
-also.
-
-The DSS2 driver (omapdss module) is in arch/arm/plat-omap/dss/, and the FB,
-panel and controller drivers are in drivers/video/omap2/. DSS1 and DSS2 live
-currently side by side, you can choose which one to use.
-
-Features
---------
-
-Working and tested features include:
-
-- MIPI DPI (parallel) output
-- MIPI DSI output in command mode
-- MIPI DBI (RFBI) output
-- SDI output
-- TV output
-- All pieces can be compiled as a module or inside kernel
-- Use DISPC to update any of the outputs
-- Use CPU to update RFBI or DSI output
-- OMAP DISPC planes
-- RGB16, RGB24 packed, RGB24 unpacked
-- YUV2, UYVY
-- Scaling
-- Adjusting DSS FCK to find a good pixel clock
-- Use DSI DPLL to create DSS FCK
-
-Tested boards include:
-- OMAP3 SDP board
-- Beagle board
-- N810
-
-omapdss driver
---------------
-
-The DSS driver does not itself have any support for Linux framebuffer, V4L or
-such like the current ones, but it has an internal kernel API that upper level
-drivers can use.
-
-The DSS driver models OMAP's overlays, overlay managers and displays in a
-flexible way to enable non-common multi-display configuration. In addition to
-modelling the hardware overlays, omapdss supports virtual overlays and overlay
-managers. These can be used when updating a display with CPU or system DMA.
-
-omapdss driver support for audio
---------------------------------
-There exist several display technologies and standards that support audio as
-well. Hence, it is relevant to update the DSS device driver to provide an audio
-interface that may be used by an audio driver or any other driver interested in
-the functionality.
-
-The audio_enable function is intended to prepare the relevant
-IP for playback (e.g., enabling an audio FIFO, taking in/out of reset
-some IP, enabling companion chips, etc). It is intended to be called before
-audio_start. The audio_disable function performs the reverse operation and is
-intended to be called after audio_stop.
-
-While a given DSS device driver may support audio, it is possible that for
-certain configurations audio is not supported (e.g., an HDMI display using a
-VESA video timing). The audio_supported function is intended to query whether
-the current configuration of the display supports audio.
-
-The audio_config function is intended to configure all the relevant audio
-parameters of the display. In order to make the function independent of any
-specific DSS device driver, a struct omap_dss_audio is defined. Its purpose
-is to contain all the required parameters for audio configuration. At the
-moment, such structure contains pointers to IEC-60958 channel status word
-and CEA-861 audio infoframe structures. This should be enough to support
-HDMI and DisplayPort, as both are based on CEA-861 and IEC-60958.
-
-The audio_enable/disable, audio_config and audio_supported functions could be
-implemented as functions that may sleep. Hence, they should not be called
-while holding a spinlock or a readlock.
-
-The audio_start/audio_stop function is intended to effectively start/stop audio
-playback after the configuration has taken place. These functions are designed
-to be used in an atomic context. Hence, audio_start should return quickly and be
-called only after all the needed resources for audio playback (audio FIFOs,
-DMA channels, companion chips, etc) have been enabled to begin data transfers.
-audio_stop is designed to only stop the audio transfers. The resources used
-for playback are released using audio_disable.
-
-The enum omap_dss_audio_state may be used to help the implementations of
-the interface to keep track of the audio state. The initial state is _DISABLED;
-then, the state transitions to _CONFIGURED, and then, when it is ready to
-play audio, to _ENABLED. The state _PLAYING is used when the audio is being
-rendered.
-
-
-Panel and controller drivers
-----------------------------
-
-The drivers implement panel or controller specific functionality and are not
-usually visible to users except through omapfb driver.  They register
-themselves to the DSS driver.
-
-omapfb driver
--------------
-
-The omapfb driver implements arbitrary number of standard linux framebuffers.
-These framebuffers can be routed flexibly to any overlays, thus allowing very
-dynamic display architecture.
-
-The driver exports some omapfb specific ioctls, which are compatible with the
-ioctls in the old driver.
-
-The rest of the non standard features are exported via sysfs. Whether the final
-implementation will use sysfs, or ioctls, is still open.
-
-V4L2 drivers
-------------
-
-V4L2 is being implemented in TI.
-
-From omapdss point of view the V4L2 drivers should be similar to framebuffer
-driver.
-
-Architecture
---------------------
-
-Some clarification what the different components do:
-
-    - Framebuffer is a memory area inside OMAP's SRAM/SDRAM that contains the
-      pixel data for the image. Framebuffer has width and height and color
-      depth.
-    - Overlay defines where the pixels are read from and where they go on the
-      screen. The overlay may be smaller than framebuffer, thus displaying only
-      part of the framebuffer. The position of the overlay may be changed if
-      the overlay is smaller than the display.
-    - Overlay manager combines the overlays in to one image and feeds them to
-      display.
-    - Display is the actual physical display device.
-
-A framebuffer can be connected to multiple overlays to show the same pixel data
-on all of the overlays. Note that in this case the overlay input sizes must be
-the same, but, in case of video overlays, the output size can be different. Any
-framebuffer can be connected to any overlay.
-
-An overlay can be connected to one overlay manager. Also DISPC overlays can be
-connected only to DISPC overlay managers, and virtual overlays can be only
-connected to virtual overlays.
-
-An overlay manager can be connected to one display. There are certain
-restrictions which kinds of displays an overlay manager can be connected:
-
-    - DISPC TV overlay manager can be only connected to TV display.
-    - Virtual overlay managers can only be connected to DBI or DSI displays.
-    - DISPC LCD overlay manager can be connected to all displays, except TV
-      display.
-
-Sysfs
------
-The sysfs interface is mainly used for testing. I don't think sysfs
-interface is the best for this in the final version, but I don't quite know
-what would be the best interfaces for these things.
-
-The sysfs interface is divided to two parts: DSS and FB.
-
-/sys/class/graphics/fb? directory:
-mirror		0=off, 1=on
-rotate		Rotation 0-3 for 0, 90, 180, 270 degrees
-rotate_type	0 = DMA rotation, 1 = VRFB rotation
-overlays	List of overlay numbers to which framebuffer pixels go
-phys_addr	Physical address of the framebuffer
-virt_addr	Virtual address of the framebuffer
-size		Size of the framebuffer
-
-/sys/devices/platform/omapdss/overlay? directory:
-enabled		0=off, 1=on
-input_size	width,height (ie. the framebuffer size)
-manager		Destination overlay manager name
-name
-output_size	width,height
-position	x,y
-screen_width	width
-global_alpha   	global alpha 0-255 0=transparent 255=opaque
-
-/sys/devices/platform/omapdss/manager? directory:
-display				Destination display
-name
-alpha_blending_enabled		0=off, 1=on
-trans_key_enabled		0=off, 1=on
-trans_key_type			gfx-destination, video-source
-trans_key_value			transparency color key (RGB24)
-default_color			default background color (RGB24)
-
-/sys/devices/platform/omapdss/display? directory:
-ctrl_name	Controller name
-mirror		0=off, 1=on
-update_mode	0=off, 1=auto, 2=manual
-enabled		0=off, 1=on
-name
-rotate		Rotation 0-3 for 0, 90, 180, 270 degrees
-timings		Display timings (pixclock,xres/hfp/hbp/hsw,yres/vfp/vbp/vsw)
-		When writing, two special timings are accepted for tv-out:
-		"pal" and "ntsc"
-panel_name
-tear_elim	Tearing elimination 0=off, 1=on
-output_type	Output type (video encoder only): "composite" or "svideo"
-
-There are also some debugfs files at <debugfs>/omapdss/ which show information
-about clocks and registers.
-
-Examples
---------
-
-The following definitions have been made for the examples below:
-
-ovl0=/sys/devices/platform/omapdss/overlay0
-ovl1=/sys/devices/platform/omapdss/overlay1
-ovl2=/sys/devices/platform/omapdss/overlay2
-
-mgr0=/sys/devices/platform/omapdss/manager0
-mgr1=/sys/devices/platform/omapdss/manager1
-
-lcd=/sys/devices/platform/omapdss/display0
-dvi=/sys/devices/platform/omapdss/display1
-tv=/sys/devices/platform/omapdss/display2
-
-fb0=/sys/class/graphics/fb0
-fb1=/sys/class/graphics/fb1
-fb2=/sys/class/graphics/fb2
-
-Default setup on OMAP3 SDP
---------------------------
-
-Here's the default setup on OMAP3 SDP board. All planes go to LCD. DVI
-and TV-out are not in use. The columns from left to right are:
-framebuffers, overlays, overlay managers, displays. Framebuffers are
-handled by omapfb, and the rest by the DSS.
-
-FB0 --- GFX  -\            DVI
-FB1 --- VID1 --+- LCD ---- LCD
-FB2 --- VID2 -/   TV ----- TV
-
-Example: Switch from LCD to DVI
-----------------------
-
-w=`cat $dvi/timings | cut -d "," -f 2 | cut -d "/" -f 1`
-h=`cat $dvi/timings | cut -d "," -f 3 | cut -d "/" -f 1`
-
-echo "0" > $lcd/enabled
-echo "" > $mgr0/display
-fbset -fb /dev/fb0 -xres $w -yres $h -vxres $w -vyres $h
-# at this point you have to switch the dvi/lcd dip-switch from the omap board
-echo "dvi" > $mgr0/display
-echo "1" > $dvi/enabled
-
-After this the configuration looks like:
-
-FB0 --- GFX  -\         -- DVI
-FB1 --- VID1 --+- LCD -/   LCD
-FB2 --- VID2 -/   TV ----- TV
-
-Example: Clone GFX overlay to LCD and TV
--------------------------------
-
-w=`cat $tv/timings | cut -d "," -f 2 | cut -d "/" -f 1`
-h=`cat $tv/timings | cut -d "," -f 3 | cut -d "/" -f 1`
-
-echo "0" > $ovl0/enabled
-echo "0" > $ovl1/enabled
-
-echo "" > $fb1/overlays
-echo "0,1" > $fb0/overlays
-
-echo "$w,$h" > $ovl1/output_size
-echo "tv" > $ovl1/manager
-
-echo "1" > $ovl0/enabled
-echo "1" > $ovl1/enabled
-
-echo "1" > $tv/enabled
-
-After this the configuration looks like (only relevant parts shown):
-
-FB0 +-- GFX  ---- LCD ---- LCD
-     \- VID1 ---- TV  ---- TV
-
-Misc notes
-----------
-
-OMAP FB allocates the framebuffer memory using the standard dma allocator. You
-can enable Contiguous Memory Allocator (CONFIG_CMA) to improve the dma
-allocator, and if CMA is enabled, you use "cma=" kernel parameter to increase
-the global memory area for CMA.
-
-Using DSI DPLL to generate pixel clock it is possible produce the pixel clock
-of 86.5MHz (max possible), and with that you get 1280x1024@57 output from DVI.
-
-Rotation and mirroring currently only supports RGB565 and RGB8888 modes. VRFB
-does not support mirroring.
-
-VRFB rotation requires much more memory than non-rotated framebuffer, so you
-probably need to increase your vram setting before using VRFB rotation. Also,
-many applications may not work with VRFB if they do not pay attention to all
-framebuffer parameters.
-
-Kernel boot arguments
----------------------
-
-omapfb.mode=<display>:<mode>[,...]
-	- Default video mode for specified displays. For example,
-	  "dvi:800x400MR-24@60".  See drivers/video/modedb.c.
-	  There are also two special modes: "pal" and "ntsc" that
-	  can be used to tv out.
-
-omapfb.vram=<fbnum>:<size>[@<physaddr>][,...]
-	- VRAM allocated for a framebuffer. Normally omapfb allocates vram
-	  depending on the display size. With this you can manually allocate
-	  more or define the physical address of each framebuffer. For example,
-	  "1:4M" to allocate 4M for fb1.
-
-omapfb.debug=<y|n>
-	- Enable debug printing. You have to have OMAPFB debug support enabled
-	  in kernel config.
-
-omapfb.test=<y|n>
-	- Draw test pattern to framebuffer whenever framebuffer settings change.
-	  You need to have OMAPFB debug support enabled in kernel config.
-
-omapfb.vrfb=<y|n>
-	- Use VRFB rotation for all framebuffers.
-
-omapfb.rotate=<angle>
-	- Default rotation applied to all framebuffers.
-	  0 - 0 degree rotation
-	  1 - 90 degree rotation
-	  2 - 180 degree rotation
-	  3 - 270 degree rotation
-
-omapfb.mirror=<y|n>
-	- Default mirror for all framebuffers. Only works with DMA rotation.
-
-omapdss.def_disp=<display>
-	- Name of default display, to which all overlays will be connected.
-	  Common examples are "lcd" or "tv".
-
-omapdss.debug=<y|n>
-	- Enable debug printing. You have to have DSS debug support enabled in
-	  kernel config.
-
-TODO
-----
-
-DSS locking
-
-Error checking
-- Lots of checks are missing or implemented just as BUG()
-
-System DMA update for DSI
-- Can be used for RGB16 and RGB24P modes. Probably not for RGB24U (how
-  to skip the empty byte?)
-
-OMAP1 support
-- Not sure if needed
-
diff --git a/Documentation/arm/OMAP/README b/Documentation/arm/OMAP/README
deleted file mode 100644
index 90c6c57d61e8..000000000000
--- a/Documentation/arm/OMAP/README
+++ /dev/null
@@ -1,11 +0,0 @@
-This file contains documentation for running mainline
-kernel on omaps.
-
-KERNEL		NEW DEPENDENCIES
-v4.3+		Update is needed for custom .config files to make sure
-		CONFIG_REGULATOR_PBIAS is enabled for MMC1 to work
-		properly.
-
-v4.18+		Update is needed for custom .config files to make sure
-		CONFIG_MMC_SDHCI_OMAP is enabled for all MMC instances
-		to work in DRA7 and K2G based boards.
diff --git a/Documentation/arm/OMAP/omap_pm b/Documentation/arm/OMAP/omap_pm
deleted file mode 100644
index 4ae915a9f899..000000000000
--- a/Documentation/arm/OMAP/omap_pm
+++ /dev/null
@@ -1,154 +0,0 @@
-
-The OMAP PM interface
-=====================
-
-This document describes the temporary OMAP PM interface.  Driver
-authors use these functions to communicate minimum latency or
-throughput constraints to the kernel power management code.
-Over time, the intention is to merge features from the OMAP PM
-interface into the Linux PM QoS code.
-
-Drivers need to express PM parameters which:
-
-- support the range of power management parameters present in the TI SRF;
-
-- separate the drivers from the underlying PM parameter
-  implementation, whether it is the TI SRF or Linux PM QoS or Linux
-  latency framework or something else;
-
-- specify PM parameters in terms of fundamental units, such as
-  latency and throughput, rather than units which are specific to OMAP
-  or to particular OMAP variants;
-
-- allow drivers which are shared with other architectures (e.g.,
-  DaVinci) to add these constraints in a way which won't affect non-OMAP
-  systems,
-
-- can be implemented immediately with minimal disruption of other
-  architectures.
-
-
-This document proposes the OMAP PM interface, including the following
-five power management functions for driver code:
-
-1. Set the maximum MPU wakeup latency:
-   (*pdata->set_max_mpu_wakeup_lat)(struct device *dev, unsigned long t)
-
-2. Set the maximum device wakeup latency:
-   (*pdata->set_max_dev_wakeup_lat)(struct device *dev, unsigned long t)
-
-3. Set the maximum system DMA transfer start latency (CORE pwrdm):
-   (*pdata->set_max_sdma_lat)(struct device *dev, long t)
-
-4. Set the minimum bus throughput needed by a device:
-   (*pdata->set_min_bus_tput)(struct device *dev, u8 agent_id, unsigned long r)
-
-5. Return the number of times the device has lost context
-   (*pdata->get_dev_context_loss_count)(struct device *dev)
-
-
-Further documentation for all OMAP PM interface functions can be
-found in arch/arm/plat-omap/include/mach/omap-pm.h.
-
-
-The OMAP PM layer is intended to be temporary
----------------------------------------------
-
-The intention is that eventually the Linux PM QoS layer should support
-the range of power management features present in OMAP3.  As this
-happens, existing drivers using the OMAP PM interface can be modified
-to use the Linux PM QoS code; and the OMAP PM interface can disappear.
-
-
-Driver usage of the OMAP PM functions
--------------------------------------
-
-As the 'pdata' in the above examples indicates, these functions are
-exposed to drivers through function pointers in driver .platform_data
-structures.  The function pointers are initialized by the board-*.c
-files to point to the corresponding OMAP PM functions:
-.set_max_dev_wakeup_lat will point to
-omap_pm_set_max_dev_wakeup_lat(), etc.  Other architectures which do
-not support these functions should leave these function pointers set
-to NULL.  Drivers should use the following idiom:
-
-        if (pdata->set_max_dev_wakeup_lat)
-            (*pdata->set_max_dev_wakeup_lat)(dev, t);
-
-The most common usage of these functions will probably be to specify
-the maximum time from when an interrupt occurs, to when the device
-becomes accessible.  To accomplish this, driver writers should use the
-set_max_mpu_wakeup_lat() function to constrain the MPU wakeup
-latency, and the set_max_dev_wakeup_lat() function to constrain the
-device wakeup latency (from clk_enable() to accessibility).  For
-example,
-
-        /* Limit MPU wakeup latency */
-        if (pdata->set_max_mpu_wakeup_lat)
-            (*pdata->set_max_mpu_wakeup_lat)(dev, tc);
-
-        /* Limit device powerdomain wakeup latency */
-        if (pdata->set_max_dev_wakeup_lat)
-            (*pdata->set_max_dev_wakeup_lat)(dev, td);
-
-        /* total wakeup latency in this example: (tc + td) */
-
-The PM parameters can be overwritten by calling the function again
-with the new value.  The settings can be removed by calling the
-function with a t argument of -1 (except in the case of
-set_max_bus_tput(), which should be called with an r argument of 0).
-
-The fifth function above, omap_pm_get_dev_context_loss_count(),
-is intended as an optimization to allow drivers to determine whether the
-device has lost its internal context.  If context has been lost, the
-driver must restore its internal context before proceeding.
-
-
-Other specialized interface functions
--------------------------------------
-
-The five functions listed above are intended to be usable by any
-device driver.  DSPBridge and CPUFreq have a few special requirements.
-DSPBridge expresses target DSP performance levels in terms of OPP IDs.
-CPUFreq expresses target MPU performance levels in terms of MPU
-frequency.  The OMAP PM interface contains functions for these
-specialized cases to convert that input information (OPPs/MPU
-frequency) into the form that the underlying power management
-implementation needs:
-
-6. (*pdata->dsp_get_opp_table)(void)
-
-7. (*pdata->dsp_set_min_opp)(u8 opp_id)
-
-8. (*pdata->dsp_get_opp)(void)
-
-9. (*pdata->cpu_get_freq_table)(void)
-
-10. (*pdata->cpu_set_freq)(unsigned long f)
-
-11. (*pdata->cpu_get_freq)(void)
-
-Customizing OPP for platform
-============================
-Defining CONFIG_PM should enable OPP layer for the silicon
-and the registration of OPP table should take place automatically.
-However, in special cases, the default OPP table may need to be
-tweaked, for e.g.:
- * enable default OPPs which are disabled by default, but which
-   could be enabled on a platform
- * Disable an unsupported OPP on the platform
- * Define and add a custom opp table entry
-in these cases, the board file needs to do additional steps as follows:
-arch/arm/mach-omapx/board-xyz.c
-	#include "pm.h"
-	....
-	static void __init omap_xyz_init_irq(void)
-	{
-		....
-		/* Initialize the default table */
-		omapx_opp_init();
-		/* Do customization to the defaults */
-		....
-	}
-NOTE: omapx_opp_init will be omap3_opp_init or as required
-based on the omap family.
diff --git a/Documentation/arm/Porting b/Documentation/arm/Porting
deleted file mode 100644
index a492233931b9..000000000000
--- a/Documentation/arm/Porting
+++ /dev/null
@@ -1,135 +0,0 @@
-Taken from list archive at http://lists.arm.linux.org.uk/pipermail/linux-arm-kernel/2001-July/004064.html
-
-Initial definitions
--------------------
-
-The following symbol definitions rely on you knowing the translation that
-__virt_to_phys() does for your machine.  This macro converts the passed
-virtual address to a physical address.  Normally, it is simply:
-
-		phys = virt - PAGE_OFFSET + PHYS_OFFSET
-
-
-Decompressor Symbols
---------------------
-
-ZTEXTADDR
-	Start address of decompressor.  There's no point in talking about
-	virtual or physical addresses here, since the MMU will be off at
-	the time when you call the decompressor code.  You normally call
-	the kernel at this address to start it booting.  This doesn't have
-	to be located in RAM, it can be in flash or other read-only or
-	read-write addressable medium.
-
-ZBSSADDR
-	Start address of zero-initialised work area for the decompressor.
-	This must be pointing at RAM.  The decompressor will zero initialise
-	this for you.  Again, the MMU will be off.
-
-ZRELADDR
-	This is the address where the decompressed kernel will be written,
-	and eventually executed.  The following constraint must be valid:
-
-		__virt_to_phys(TEXTADDR) == ZRELADDR
-
-	The initial part of the kernel is carefully coded to be position
-	independent.
-
-INITRD_PHYS
-	Physical address to place the initial RAM disk.  Only relevant if
-	you are using the bootpImage stuff (which only works on the old
-	struct param_struct).
-
-INITRD_VIRT
-	Virtual address of the initial RAM disk.  The following  constraint
-	must be valid:
-
-		__virt_to_phys(INITRD_VIRT) == INITRD_PHYS
-
-PARAMS_PHYS
-	Physical address of the struct param_struct or tag list, giving the
-	kernel various parameters about its execution environment.
-
-
-Kernel Symbols
---------------
-
-PHYS_OFFSET
-	Physical start address of the first bank of RAM.
-
-PAGE_OFFSET
-	Virtual start address of the first bank of RAM.  During the kernel
-	boot phase, virtual address PAGE_OFFSET will be mapped to physical
-	address PHYS_OFFSET, along with any other mappings you supply.
-	This should be the same value as TASK_SIZE.
-
-TASK_SIZE
-	The maximum size of a user process in bytes.  Since user space
-	always starts at zero, this is the maximum address that a user
-	process can access+1.  The user space stack grows down from this
-	address.
-
-	Any virtual address below TASK_SIZE is deemed to be user process
-	area, and therefore managed dynamically on a process by process
-	basis by the kernel.  I'll call this the user segment.
-
-	Anything above TASK_SIZE is common to all processes.  I'll call
-	this the kernel segment.
-
-	(In other words, you can't put IO mappings below TASK_SIZE, and
-	hence PAGE_OFFSET).
-
-TEXTADDR
-	Virtual start address of kernel, normally PAGE_OFFSET + 0x8000.
-	This is where the kernel image ends up.  With the latest kernels,
-	it must be located at 32768 bytes into a 128MB region.  Previous
-	kernels placed a restriction of 256MB here.
-
-DATAADDR
-	Virtual address for the kernel data segment.  Must not be defined
-	when using the decompressor.
-
-VMALLOC_START
-VMALLOC_END
-	Virtual addresses bounding the vmalloc() area.  There must not be
-	any static mappings in this area; vmalloc will overwrite them.
-	The addresses must also be in the kernel segment (see above).
-	Normally, the vmalloc() area starts VMALLOC_OFFSET bytes above the
-	last virtual RAM address (found using variable high_memory).
-
-VMALLOC_OFFSET
-	Offset normally incorporated into VMALLOC_START to provide a hole
-	between virtual RAM and the vmalloc area.  We do this to allow
-	out of bounds memory accesses (eg, something writing off the end
-	of the mapped memory map) to be caught.  Normally set to 8MB.
-
-Architecture Specific Macros
-----------------------------
-
-BOOT_MEM(pram,pio,vio)
-	`pram' specifies the physical start address of RAM.  Must always
-	be present, and should be the same as PHYS_OFFSET.
-
-	`pio' is the physical address of an 8MB region containing IO for
-	use with the debugging macros in arch/arm/kernel/debug-armv.S.
-
-	`vio' is the virtual address of the 8MB debugging region.
-
-	It is expected that the debugging region will be re-initialised
-	by the architecture specific code later in the code (via the
-	MAPIO function).
-
-BOOT_PARAMS
-	Same as, and see PARAMS_PHYS.
-
-FIXUP(func)
-	Machine specific fixups, run before memory subsystems have been
-	initialised.
-
-MAPIO(func)
-	Machine specific function to map IO areas (including the debug
-	region above).
-
-INITIRQ(func)
-	Machine specific function to initialise interrupts.
-
diff --git a/Documentation/arm/README b/Documentation/arm/README
deleted file mode 100644
index 9d1e5b2c92e6..000000000000
--- a/Documentation/arm/README
+++ /dev/null
@@ -1,204 +0,0 @@
-			   ARM Linux 2.6
-			   =============
-
-    Please check <ftp://ftp.arm.linux.org.uk/pub/armlinux> for
-    updates.
-
-Compilation of kernel
----------------------
-
-  In order to compile ARM Linux, you will need a compiler capable of
-  generating ARM ELF code with GNU extensions.  GCC 3.3 is known to be
-  a good compiler.  Fortunately, you needn't guess.  The kernel will report
-  an error if your compiler is a recognized offender.
-
-  To build ARM Linux natively, you shouldn't have to alter the ARCH = line
-  in the top level Makefile.  However, if you don't have the ARM Linux ELF
-  tools installed as default, then you should change the CROSS_COMPILE
-  line as detailed below.
-
-  If you wish to cross-compile, then alter the following lines in the top
-  level make file:
-
-    ARCH = <whatever>
-	with
-    ARCH = arm
-
-	and
-
-    CROSS_COMPILE=
-	to
-    CROSS_COMPILE=<your-path-to-your-compiler-without-gcc>
-	eg.
-    CROSS_COMPILE=arm-linux-
-
-  Do a 'make config', followed by 'make Image' to build the kernel 
-  (arch/arm/boot/Image).  A compressed image can be built by doing a 
-  'make zImage' instead of 'make Image'.
-
-
-Bug reports etc
----------------
-
-  Please send patches to the patch system.  For more information, see
-  http://www.arm.linux.org.uk/developer/patches/info.php Always include some
-  explanation as to what the patch does and why it is needed.
-
-  Bug reports should be sent to linux-arm-kernel@lists.arm.linux.org.uk,
-  or submitted through the web form at
-  http://www.arm.linux.org.uk/developer/ 
-
-  When sending bug reports, please ensure that they contain all relevant
-  information, eg. the kernel messages that were printed before/during
-  the problem, what you were doing, etc.
-
-
-Include files
--------------
-
-  Several new include directories have been created under include/asm-arm,
-  which are there to reduce the clutter in the top-level directory.  These
-  directories, and their purpose is listed below:
-
-   arch-*	machine/platform specific header files
-   hardware	driver-internal ARM specific data structures/definitions
-   mach		descriptions of generic ARM to specific machine interfaces
-   proc-*	processor dependent header files (currently only two
-		categories)
-
-
-Machine/Platform support
-------------------------
-
-  The ARM tree contains support for a lot of different machine types.  To
-  continue supporting these differences, it has become necessary to split
-  machine-specific parts by directory.  For this, the machine category is
-  used to select which directories and files get included (we will use
-  $(MACHINE) to refer to the category)
-
-  To this end, we now have arch/arm/mach-$(MACHINE) directories which are
-  designed to house the non-driver files for a particular machine (eg, PCI,
-  memory management, architecture definitions etc).  For all future
-  machines, there should be a corresponding arch/arm/mach-$(MACHINE)/include/mach
-  directory.
-
-
-Modules
--------
-
-  Although modularisation is supported (and required for the FP emulator),
-  each module on an ARM2/ARM250/ARM3 machine when is loaded will take
-  memory up to the next 32k boundary due to the size of the pages.
-  Therefore, is modularisation on these machines really worth it?
-
-  However, ARM6 and up machines allow modules to take multiples of 4k, and
-  as such Acorn RiscPCs and other architectures using these processors can
-  make good use of modularisation.
-
-
-ADFS Image files
-----------------
-
-  You can access image files on your ADFS partitions by mounting the ADFS
-  partition, and then using the loopback device driver.  You must have
-  losetup installed.
-
-  Please note that the PCEmulator DOS partitions have a partition table at
-  the start, and as such, you will have to give '-o offset' to losetup.
-
-
-Request to developers
----------------------
-
-  When writing device drivers which include a separate assembler file, please
-  include it in with the C file, and not the arch/arm/lib directory.  This
-  allows the driver to be compiled as a loadable module without requiring
-  half the code to be compiled into the kernel image.
-
-  In general, try to avoid using assembler unless it is really necessary.  It
-  makes drivers far less easy to port to other hardware.
-
-
-ST506 hard drives
------------------
-
-  The ST506 hard drive controllers seem to be working fine (if a little
-  slowly).  At the moment they will only work off the controllers on an
-  A4x0's motherboard, but for it to work off a Podule just requires
-  someone with a podule to add the addresses for the IRQ mask and the
-  HDC base to the source.
-
-  As of 31/3/96 it works with two drives (you should get the ADFS
-  *configure harddrive set to 2). I've got an internal 20MB and a great
-  big external 5.25" FH 64MB drive (who could ever want more :-) ).
-
-  I've just got 240K/s off it (a dd with bs=128k); thats about half of what
-  RiscOS gets; but it's a heck of a lot better than the 50K/s I was getting
-  last week :-)
-
-  Known bug: Drive data errors can cause a hang; including cases where
-  the controller has fixed the error using ECC. (Possibly ONLY
-  in that case...hmm).
-
-
-1772 Floppy
------------
-  This also seems to work OK, but hasn't been stressed much lately.  It
-  hasn't got any code for disc change detection in there at the moment which
-  could be a bit of a problem!  Suggestions on the correct way to do this
-  are welcome.
-
-
-CONFIG_MACH_ and CONFIG_ARCH_
------------------------------
-  A change was made in 2003 to the macro names for new machines.
-  Historically, CONFIG_ARCH_ was used for the bonafide architecture,
-  e.g. SA1100, as well as implementations of the architecture,
-  e.g. Assabet.  It was decided to change the implementation macros
-  to read CONFIG_MACH_ for clarity.  Moreover, a retroactive fixup has
-  not been made because it would complicate patching.
-
-  Previous registrations may be found online.
-
-    <http://www.arm.linux.org.uk/developer/machines/>
-
-Kernel entry (head.S)
---------------------------
-  The initial entry into the kernel is via head.S, which uses machine
-  independent code.  The machine is selected by the value of 'r1' on
-  entry, which must be kept unique.
-
-  Due to the large number of machines which the ARM port of Linux provides
-  for, we have a method to manage this which ensures that we don't end up
-  duplicating large amounts of code.
-
-  We group machine (or platform) support code into machine classes.  A
-  class typically based around one or more system on a chip devices, and
-  acts as a natural container around the actual implementations.  These
-  classes are given directories - arch/arm/mach-<class> and
-  arch/arm/mach-<class> - which contain the source files to/include/mach
-  support the machine class.  This directories also contain any machine
-  specific supporting code.
-
-  For example, the SA1100 class is based upon the SA1100 and SA1110 SoC
-  devices, and contains the code to support the way the on-board and off-
-  board devices are used, or the device is setup, and provides that
-  machine specific "personality."
-
-  For platforms that support device tree (DT), the machine selection is
-  controlled at runtime by passing the device tree blob to the kernel.  At
-  compile-time, support for the machine type must be selected.  This allows for
-  a single multiplatform kernel build to be used for several machine types.
-
-  For platforms that do not use device tree, this machine selection is
-  controlled by the machine type ID, which acts both as a run-time and a
-  compile-time code selection method.  You can register a new machine via the
-  web site at:
-
-    <http://www.arm.linux.org.uk/developer/machines/>
-
-  Note: Please do not register a machine type for DT-only platforms.  If your
-  platform is DT-only, you do not need a registered machine type.
-
----
-Russell King (15/03/2004)
diff --git a/Documentation/arm/SA1100/ADSBitsy b/Documentation/arm/SA1100/ADSBitsy
deleted file mode 100644
index f9f62e8c0719..000000000000
--- a/Documentation/arm/SA1100/ADSBitsy
+++ /dev/null
@@ -1,43 +0,0 @@
-ADS Bitsy Single Board Computer
-(It is different from Bitsy(iPAQ) of Compaq)
-
-For more details, contact Applied Data Systems or see
-http://www.applieddata.net/products.html
-
-The Linux support for this product has been provided by
-Woojung Huh <whuh@applieddata.net>
-
-Use 'make adsbitsy_config' before any 'make config'.
-This will set up defaults for ADS Bitsy support.
-
-The kernel zImage is linked to be loaded and executed at 0xc0400000.
-
-Linux can  be used with the ADS BootLoader that ships with the
-newer rev boards. See their documentation on how to load Linux.
-
-Supported peripherals:
-- SA1100 LCD frame buffer (8/16bpp...sort of)
-- SA1111 USB Master
-- SA1100 serial port
-- pcmcia, compact flash
-- touchscreen(ucb1200)
-- console on LCD screen
-- serial ports (ttyS[0-2])
-  - ttyS0 is default for serial console
-
-To do:
-- everything else!  :-)
-
-Notes:
-
-- The flash on board is divided into 3 partitions.
-  You should be careful to use flash on board.
-  Its partition is different from GraphicsClient Plus and GraphicsMaster
-
-- 16bpp mode requires a different cable than what ships with the board.
-  Contact ADS or look through the manual to wire your own. Currently,
-  if you compile with 16bit mode support and switch into a lower bpp
-  mode, the timing is off so the image is corrupted.  This will be
-  fixed soon.
-
-Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
diff --git a/Documentation/arm/SA1100/Assabet b/Documentation/arm/SA1100/Assabet
deleted file mode 100644
index e08a6739e72c..000000000000
--- a/Documentation/arm/SA1100/Assabet
+++ /dev/null
@@ -1,300 +0,0 @@
-The Intel Assabet (SA-1110 evaluation) board
-============================================
-
-Please see:
-http://developer.intel.com
-
-Also some notes from John G Dorsey <jd5q@andrew.cmu.edu>:
-http://www.cs.cmu.edu/~wearable/software/assabet.html
-
-
-Building the kernel
--------------------
-
-To build the kernel with current defaults:
-
-	make assabet_config
-	make oldconfig
-	make zImage
-
-The resulting kernel image should be available in linux/arch/arm/boot/zImage.
-
-
-Installing a bootloader
------------------------
-
-A couple of bootloaders able to boot Linux on Assabet are available:
-
-BLOB (http://www.lartmaker.nl/lartware/blob/)
-
-   BLOB is a bootloader used within the LART project.  Some contributed
-   patches were merged into BLOB to add support for Assabet.
-
-Compaq's Bootldr + John Dorsey's patch for Assabet support
-(http://www.handhelds.org/Compaq/bootldr.html)
-(http://www.wearablegroup.org/software/bootldr/)
-
-   Bootldr is the bootloader developed by Compaq for the iPAQ Pocket PC.
-   John Dorsey has produced add-on patches to add support for Assabet and
-   the JFFS filesystem.
-
-RedBoot (http://sources.redhat.com/redboot/)
-
-   RedBoot is a bootloader developed by Red Hat based on the eCos RTOS
-   hardware abstraction layer.  It supports Assabet amongst many other
-   hardware platforms.
-
-RedBoot is currently the recommended choice since it's the only one to have
-networking support, and is the most actively maintained.
-
-Brief examples on how to boot Linux with RedBoot are shown below.  But first
-you need to have RedBoot installed in your flash memory.  A known to work
-precompiled RedBoot binary is available from the following location:
-
-ftp://ftp.netwinder.org/users/n/nico/
-ftp://ftp.arm.linux.org.uk/pub/linux/arm/people/nico/
-ftp://ftp.handhelds.org/pub/linux/arm/sa-1100-patches/
-
-Look for redboot-assabet*.tgz.  Some installation infos are provided in
-redboot-assabet*.txt.
-
-
-Initial RedBoot configuration
------------------------------
-
-The commands used here are explained in The RedBoot User's Guide available
-on-line at http://sources.redhat.com/ecos/docs.html.
-Please refer to it for explanations.
-
-If you have a CF network card (my Assabet kit contained a CF+ LP-E from
-Socket Communications Inc.), you should strongly consider using it for TFTP
-file transfers.  You must insert it before RedBoot runs since it can't detect
-it dynamically.
-
-To initialize the flash directory:
-
-	fis init -f
-
-To initialize the non-volatile settings, like whether you want to use BOOTP or
-a static IP address, etc, use this command:
-
-	fconfig -i
-
-
-Writing a kernel image into flash
----------------------------------
-
-First, the kernel image must be loaded into RAM.  If you have the zImage file
-available on a TFTP server:
-
-	load zImage -r -b 0x100000
-
-If you rather want to use Y-Modem upload over the serial port:
-
-	load -m ymodem -r -b 0x100000
-
-To write it to flash:
-
-	fis create "Linux kernel" -b 0x100000 -l 0xc0000
-
-
-Booting the kernel
-------------------
-
-The kernel still requires a filesystem to boot.  A ramdisk image can be loaded
-as follows:
-
-	load ramdisk_image.gz -r -b 0x800000
-
-Again, Y-Modem upload can be used instead of TFTP by replacing the file name
-by '-y ymodem'.
-
-Now the kernel can be retrieved from flash like this:
-
-	fis load "Linux kernel"
-
-or loaded as described previously.  To boot the kernel:
-
-	exec -b 0x100000 -l 0xc0000
-
-The ramdisk image could be stored into flash as well, but there are better
-solutions for on-flash filesystems as mentioned below.
-
-
-Using JFFS2
------------
-
-Using JFFS2 (the Second Journalling Flash File System) is probably the most
-convenient way to store a writable filesystem into flash.  JFFS2 is used in
-conjunction with the MTD layer which is responsible for low-level flash
-management.  More information on the Linux MTD can be found on-line at:
-http://www.linux-mtd.infradead.org/.  A JFFS howto with some infos about
-creating JFFS/JFFS2 images is available from the same site.
-
-For instance, a sample JFFS2 image can be retrieved from the same FTP sites
-mentioned below for the precompiled RedBoot image.
-
-To load this file:
-
-	load sample_img.jffs2 -r -b 0x100000
-
-The result should look like:
-
-RedBoot> load sample_img.jffs2 -r -b 0x100000
-Raw file loaded 0x00100000-0x00377424
-
-Now we must know the size of the unallocated flash:
-
-	fis free
-
-Result:
-
-RedBoot> fis free
-  0x500E0000 .. 0x503C0000
-
-The values above may be different depending on the size of the filesystem and
-the type of flash.  See their usage below as an example and take care of
-substituting yours appropriately.
-
-We must determine some values:
-
-size of unallocated flash:	0x503c0000 - 0x500e0000 = 0x2e0000
-size of the filesystem image:	0x00377424 - 0x00100000 = 0x277424
-
-We want to fit the filesystem image of course, but we also want to give it all
-the remaining flash space as well.  To write it:
-
-	fis unlock -f 0x500E0000 -l 0x2e0000
-	fis erase -f 0x500E0000 -l 0x2e0000
-	fis write -b 0x100000 -l 0x277424 -f 0x500E0000
-	fis create "JFFS2" -n -f 0x500E0000 -l 0x2e0000
-
-Now the filesystem is associated to a MTD "partition" once Linux has discovered
-what they are in the boot process.  From Redboot, the 'fis list' command
-displays them:
-
-RedBoot> fis list
-Name              FLASH addr  Mem addr    Length      Entry point
-RedBoot           0x50000000  0x50000000  0x00020000  0x00000000
-RedBoot config    0x503C0000  0x503C0000  0x00020000  0x00000000
-FIS directory     0x503E0000  0x503E0000  0x00020000  0x00000000
-Linux kernel      0x50020000  0x00100000  0x000C0000  0x00000000
-JFFS2             0x500E0000  0x500E0000  0x002E0000  0x00000000
-
-However Linux should display something like:
-
-SA1100 flash: probing 32-bit flash bus
-SA1100 flash: Found 2 x16 devices at 0x0 in 32-bit mode
-Using RedBoot partition definition
-Creating 5 MTD partitions on "SA1100 flash":
-0x00000000-0x00020000 : "RedBoot"
-0x00020000-0x000e0000 : "Linux kernel"
-0x000e0000-0x003c0000 : "JFFS2"
-0x003c0000-0x003e0000 : "RedBoot config"
-0x003e0000-0x00400000 : "FIS directory"
-
-What's important here is the position of the partition we are interested in,
-which is the third one.  Within Linux, this correspond to /dev/mtdblock2.
-Therefore to boot Linux with the kernel and its root filesystem in flash, we
-need this RedBoot command:
-
-	fis load "Linux kernel"
-	exec -b 0x100000 -l 0xc0000 -c "root=/dev/mtdblock2"
-
-Of course other filesystems than JFFS might be used, like cramfs for example.
-You might want to boot with a root filesystem over NFS, etc.  It is also
-possible, and sometimes more convenient, to flash a filesystem directly from
-within Linux while booted from a ramdisk or NFS.  The Linux MTD repository has
-many tools to deal with flash memory as well, to erase it for example.  JFFS2
-can then be mounted directly on a freshly erased partition and files can be
-copied over directly.  Etc...
-
-
-RedBoot scripting
------------------
-
-All the commands above aren't so useful if they have to be typed in every
-time the Assabet is rebooted.  Therefore it's possible to automate the boot
-process using RedBoot's scripting capability.
-
-For example, I use this to boot Linux with both the kernel and the ramdisk
-images retrieved from a TFTP server on the network:
-
-RedBoot> fconfig
-Run script at boot: false true
-Boot script:
-Enter script, terminate with empty line
->> load zImage -r -b 0x100000
->> load ramdisk_ks.gz -r -b 0x800000
->> exec -b 0x100000 -l 0xc0000
->>
-Boot script timeout (1000ms resolution): 3
-Use BOOTP for network configuration: true
-GDB connection port: 9000
-Network debug at boot time: false
-Update RedBoot non-volatile configuration - are you sure (y/n)? y
-
-Then, rebooting the Assabet is just a matter of waiting for the login prompt.
-
-
-
-Nicolas Pitre
-nico@fluxnic.net
-June 12, 2001
-
-
-Status of peripherals in -rmk tree (updated 14/10/2001)
--------------------------------------------------------
-
-Assabet:
- Serial ports:
-  Radio:		TX, RX, CTS, DSR, DCD, RI
-   PM:			Not tested.
-  COM:			TX, RX, CTS, DSR, DCD, RTS, DTR, PM
-   PM:			Not tested.
-  I2C:			Implemented, not fully tested.
-  L3:			Fully tested, pass.
-   PM:			Not tested.
-
- Video:
-  LCD:			Fully tested.  PM
-			(LCD doesn't like being blanked with
-			 neponset connected)
-  Video out:		Not fully
-
- Audio:
-  UDA1341:
-   Playback:		Fully tested, pass.
-   Record:		Implemented, not tested.
-   PM:			Not tested.
-
-  UCB1200:
-   Audio play:		Implemented, not heavily tested.
-   Audio rec:		Implemented, not heavily tested.
-   Telco audio play:	Implemented, not heavily tested.
-   Telco audio rec:	Implemented, not heavily tested.
-   POTS control:	No
-   Touchscreen:		Yes
-   PM:			Not tested.
-
- Other:
-  PCMCIA:
-   LPE:			Fully tested, pass.
-  USB:			No
-  IRDA:
-   SIR:			Fully tested, pass.
-   FIR:			Fully tested, pass.
-   PM:			Not tested.
-
-Neponset:
- Serial ports:
-  COM1,2:	TX, RX, CTS, DSR, DCD, RTS, DTR
-   PM:			Not tested.
-  USB:			Implemented, not heavily tested.
-  PCMCIA:		Implemented, not heavily tested.
-   PM:			Not tested.
-  CF:			Implemented, not heavily tested.
-   PM:			Not tested.
-
-More stuff can be found in the -np (Nicolas Pitre's) tree.
-
diff --git a/Documentation/arm/SA1100/Brutus b/Documentation/arm/SA1100/Brutus
deleted file mode 100644
index 6a3aa95e9bfd..000000000000
--- a/Documentation/arm/SA1100/Brutus
+++ /dev/null
@@ -1,66 +0,0 @@
-Brutus is an evaluation platform for the SA1100 manufactured by Intel.  
-For more details, see:
-
-http://developer.intel.com
-
-To compile for Brutus, you must issue the following commands:
-
-	make brutus_config
-	make config
-	[accept all the defaults]
-	make zImage
-
-The resulting kernel will end up in linux/arch/arm/boot/zImage.  This file
-must be loaded at 0xc0008000 in Brutus's memory and execution started at
-0xc0008000 as well with the value of registers r0 = 0 and r1 = 16 upon
-entry.
-
-But prior to execute the kernel, a ramdisk image must also be loaded in
-memory.  Use memory address 0xd8000000 for this.  Note that the file 
-containing the (compressed) ramdisk image must not exceed 4 MB.
-
-Typically, you'll need angelboot to load the kernel.
-The following angelboot.opt file should be used:
-
------ begin angelboot.opt -----
-base 0xc0008000
-entry 0xc0008000
-r0 0x00000000
-r1 0x00000010
-device /dev/ttyS0
-options "9600 8N1"
-baud 115200
-otherfile ramdisk_img.gz
-otherbase 0xd8000000
------ end angelboot.opt -----
-
-Then load the kernel and ramdisk with:
-
-	angelboot -f angelboot.opt zImage
-
-The first Brutus serial port (assumed to be linked to /dev/ttyS0 on your
-host PC) is used by angel to load the kernel and ramdisk image. The serial
-console is provided through the second Brutus serial port. To access it,
-you may use minicom configured with /dev/ttyS1, 9600 baud, 8N1, no flow
-control.
-
-Currently supported:
-	- RS232 serial ports
-	- audio output
-	- LCD screen
-	- keyboard
-	
-The actual Brutus support may not be complete without extra patches. 
-If such patches exist, they should be found from 
-ftp.netwinder.org/users/n/nico.
-
-A full PCMCIA support is still missing, although it's possible to hack
-some drivers in order to drive already inserted cards at boot time with
-little modifications.
-
-Any contribution is welcome.
-
-Please send patches to nico@fluxnic.net
-
-Have Fun !
-
diff --git a/Documentation/arm/SA1100/CERF b/Documentation/arm/SA1100/CERF
deleted file mode 100644
index b3d845301ef1..000000000000
--- a/Documentation/arm/SA1100/CERF
+++ /dev/null
@@ -1,29 +0,0 @@
-*** The StrongARM version of the CerfBoard/Cube has been discontinued ***
-
-The Intrinsyc CerfBoard is a StrongARM 1110-based computer on a board
-that measures approximately 2" square. It includes an Ethernet
-controller, an RS232-compatible serial port, a USB function port, and
-one CompactFlash+ slot on the back. Pictures can be found at the
-Intrinsyc website, http://www.intrinsyc.com.
-
-This document describes the support in the Linux kernel for the
-Intrinsyc CerfBoard.
-
-Supported in this version:
-   - CompactFlash+ slot (select PCMCIA in General Setup and any options
-     that may be required)
-   - Onboard Crystal CS8900 Ethernet controller (Cerf CS8900A support in
-     Network Devices)
-   - Serial ports with a serial console (hardcoded to 38400 8N1)
-
-In order to get this kernel onto your Cerf, you need a server that runs
-both BOOTP and TFTP. Detailed instructions should have come with your
-evaluation kit on how to use the bootloader. This series of commands
-will suffice:
-
-   make ARCH=arm CROSS_COMPILE=arm-linux- cerfcube_defconfig
-   make ARCH=arm CROSS_COMPILE=arm-linux- zImage
-   make ARCH=arm CROSS_COMPILE=arm-linux- modules
-   cp arch/arm/boot/zImage <TFTP directory>
-
-support@intrinsyc.com
diff --git a/Documentation/arm/SA1100/FreeBird b/Documentation/arm/SA1100/FreeBird
deleted file mode 100644
index ab9193663b2b..000000000000
--- a/Documentation/arm/SA1100/FreeBird
+++ /dev/null
@@ -1,21 +0,0 @@
-Freebird-1.1 is produced by Legend(C), Inc.
-http://web.archive.org/web/*/http://www.legend.com.cn
-and software/linux maintained by Coventive(C), Inc.
-(http://www.coventive.com)
-
-Based on the Nicolas's strongarm kernel tree.
-
-===============================================================
-Maintainer:
-
-Chester Kuo <chester@coventive.com>
-	    <chester@linux.org.tw>
-
-Author :
-Tim wu <timwu@coventive.com>
-CIH <cih@coventive.com>
-Eric Peng <ericpeng@coventive.com>
-Jeff Lee <jeff_lee@coventive.com>
-Allen Cheng
-Tony Liu <tonyliu@coventive.com>
-
diff --git a/Documentation/arm/SA1100/GraphicsClient b/Documentation/arm/SA1100/GraphicsClient
deleted file mode 100644
index 867bb35943af..000000000000
--- a/Documentation/arm/SA1100/GraphicsClient
+++ /dev/null
@@ -1,98 +0,0 @@
-ADS GraphicsClient Plus Single Board Computer
-
-For more details, contact Applied Data Systems or see
-http://www.applieddata.net/products.html
-
-The original Linux support for this product has been provided by 
-Nicolas Pitre <nico@fluxnic.net>. Continued development work by
-Woojung Huh <whuh@applieddata.net>
-
-It's currently possible to mount a root filesystem via NFS providing a
-complete Linux environment.  Otherwise a ramdisk image may be used.  The
-board supports MTD/JFFS, so you could also mount something on there.
-
-Use 'make graphicsclient_config' before any 'make config'.  This will set up
-defaults for GraphicsClient Plus support.
-
-The kernel zImage is linked to be loaded and executed at 0xc0200000.  
-Also the following registers should have the specified values upon entry:
-
-	r0 = 0
-	r1 = 29	(this is the GraphicsClient architecture number)
-
-Linux can  be used with the ADS BootLoader that ships with the
-newer rev boards. See their documentation on how to load Linux.
-Angel is not available for the GraphicsClient Plus AFAIK.
-
-There is a  board known as just the GraphicsClient that ADS used to
-produce but has end of lifed. This code will not work on the older
-board with the ADS bootloader, but should still work with Angel,
-as outlined below.  In any case, if you're planning on deploying
-something en masse, you should probably get the newer board.
-
-If using Angel on the older boards, here is a typical angel.opt option file
-if the kernel is loaded through the Angel Debug Monitor:
-
------ begin angelboot.opt -----
-base 0xc0200000
-entry 0xc0200000
-r0 0x00000000
-r1 0x0000001d
-device /dev/ttyS1
-options "38400 8N1"
-baud 115200
-#otherfile ramdisk.gz
-#otherbase 0xc0800000
-exec minicom
------ end angelboot.opt -----
-
-Then the kernel (and ramdisk if otherfile/otherbase lines above are
-uncommented) would be loaded with:
-
-	angelboot -f angelboot.opt zImage
-
-Here it is assumed that the board is connected to ttyS1 on your PC
-and that minicom is preconfigured with /dev/ttyS1, 38400 baud, 8N1, no flow
-control by default.
-
-If any other bootloader is used, ensure it accomplish the same, especially
-for r0/r1 register values before jumping into the kernel.
-
-
-Supported peripherals:
-- SA1100 LCD frame buffer (8/16bpp...sort of)
-- on-board SMC 92C96 ethernet NIC
-- SA1100 serial port
-- flash memory access (MTD/JFFS)
-- pcmcia
-- touchscreen(ucb1200)
-- ps/2 keyboard
-- console on LCD screen
-- serial ports (ttyS[0-2])
-  - ttyS0 is default for serial console
-- Smart I/O (ADC, keypad, digital inputs, etc)
-  See http://www.eurotech-inc.com/linux-sbc.asp for IOCTL documentation
-  and example user space code. ps/2 keybd is multiplexed through this driver
-
-To do:
-- UCB1200 audio with new ucb_generic layer
-- everything else!  :-)
-
-Notes:
-
-- The flash on board is divided into 3 partitions.  mtd0 is where
-  the ADS boot ROM and zImage is stored.  It's been marked as
-  read-only to keep you from blasting over the bootloader. :)  mtd1 is
-  for the ramdisk.gz image.  mtd2 is user flash space and can be
-  utilized for either JFFS or if you're feeling crazy, running ext2
-  on top of it. If you're not using the ADS bootloader, you're
-  welcome to blast over the mtd1 partition also.
-
-- 16bpp mode requires a different cable than what ships with the board.
-  Contact ADS or look through the manual to wire your own. Currently,
-  if you compile with 16bit mode support and switch into a lower bpp
-  mode, the timing is off so the image is corrupted.  This will be
-  fixed soon.
-
-Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
-
diff --git a/Documentation/arm/SA1100/GraphicsMaster b/Documentation/arm/SA1100/GraphicsMaster
deleted file mode 100644
index 9145088a0ba2..000000000000
--- a/Documentation/arm/SA1100/GraphicsMaster
+++ /dev/null
@@ -1,53 +0,0 @@
-ADS GraphicsMaster Single Board Computer
-
-For more details, contact Applied Data Systems or see
-http://www.applieddata.net/products.html
-
-The original Linux support for this product has been provided by
-Nicolas Pitre <nico@fluxnic.net>. Continued development work by
-Woojung Huh <whuh@applieddata.net>
-
-Use 'make graphicsmaster_config' before any 'make config'.
-This will set up defaults for GraphicsMaster support.
-
-The kernel zImage is linked to be loaded and executed at 0xc0400000.
-
-Linux can  be used with the ADS BootLoader that ships with the
-newer rev boards. See their documentation on how to load Linux.
-
-Supported peripherals:
-- SA1100 LCD frame buffer (8/16bpp...sort of)
-- SA1111 USB Master
-- on-board SMC 92C96 ethernet NIC
-- SA1100 serial port
-- flash memory access (MTD/JFFS)
-- pcmcia, compact flash
-- touchscreen(ucb1200)
-- ps/2 keyboard
-- console on LCD screen
-- serial ports (ttyS[0-2])
-  - ttyS0 is default for serial console
-- Smart I/O (ADC, keypad, digital inputs, etc)
-  See http://www.eurotech-inc.com/linux-sbc.asp for IOCTL documentation
-  and example user space code. ps/2 keybd is multiplexed through this driver
-
-To do:
-- everything else!  :-)
-
-Notes:
-
-- The flash on board is divided into 3 partitions.  mtd0 is where
-  the zImage is stored.  It's been marked as read-only to keep you
-  from blasting over the bootloader. :)  mtd1 is
-  for the ramdisk.gz image.  mtd2 is user flash space and can be
-  utilized for either JFFS or if you're feeling crazy, running ext2
-  on top of it. If you're not using the ADS bootloader, you're
-  welcome to blast over the mtd1 partition also.
-
-- 16bpp mode requires a different cable than what ships with the board.
-  Contact ADS or look through the manual to wire your own. Currently,
-  if you compile with 16bit mode support and switch into a lower bpp
-  mode, the timing is off so the image is corrupted.  This will be
-  fixed soon.
-
-Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
diff --git a/Documentation/arm/SA1100/HUW_WEBPANEL b/Documentation/arm/SA1100/HUW_WEBPANEL
deleted file mode 100644
index fd56b48d4833..000000000000
--- a/Documentation/arm/SA1100/HUW_WEBPANEL
+++ /dev/null
@@ -1,17 +0,0 @@
-The HUW_WEBPANEL is a product of the german company Hoeft & Wessel AG
-
-If you want more information, please visit
-http://www.hoeft-wessel.de
-
-To build the kernel:
-	make huw_webpanel_config
-	make oldconfig
-	[accept all defaults]
-	make zImage
-
-Mostly of the work is done by:
-Roman Jordan         jor@hoeft-wessel.de
-Christoph Schulz    schu@hoeft-wessel.de
-
-2000/12/18/
-
diff --git a/Documentation/arm/SA1100/Itsy b/Documentation/arm/SA1100/Itsy
deleted file mode 100644
index 44b94997fa0d..000000000000
--- a/Documentation/arm/SA1100/Itsy
+++ /dev/null
@@ -1,39 +0,0 @@
-Itsy is a research project done by the Western Research Lab, and Systems
-Research Center in Palo Alto, CA. The Itsy project is one of several
-research projects at Compaq that are related to pocket computing.
-
-For more information, see:
-
-	http://www.hpl.hp.com/downloads/crl/itsy/
-
-Notes on initial 2.4 Itsy support (8/27/2000) :
-The port was done on an Itsy version 1.5 machine with a daughtercard with
-64 Meg of DRAM and 32 Meg of Flash. The initial work includes support for
-serial console (to see what you're doing).  No other devices have been
-enabled.
-
-To build, do a "make menuconfig" (or xmenuconfig) and select Itsy support.
-Disable Flash and LCD support. and then do a make zImage.
-Finally, you will need to cd to arch/arm/boot/tools and execute a make there
-to build the params-itsy program used to boot the kernel.
-
-In order to install the port of 2.4 to the itsy, You will need to set the
-configuration parameters in the monitor as follows:
-Arg 1:0x08340000, Arg2: 0xC0000000, Arg3:18 (0x12), Arg4:0
-Make sure the start-routine address is set to 0x00060000.
-
-Next, flash the params-itsy program to 0x00060000 ("p 1 0x00060000" in the
-flash menu)  Flash the kernel in arch/arm/boot/zImage into 0x08340000
-("p 1 0x00340000").  Finally flash an initial ramdisk into 0xC8000000
-("p 2 0x0")  We used ramdisk-2-30.gz from the 0.11 version directory on
-handhelds.org.
-
-The serial connection we established was at:
- 8-bit data, no parity, 1 stop bit(s), 115200.00 b/s. in the monitor, in the
-params-itsy program, and in the kernel itself.  This can be changed, but
-not easily. The monitor parameters are easily changed, the params program
-setup is assembly outl's, and the kernel is a configuration item specific to
-the itsy. (i.e. grep for CONFIG_SA1100_ITSY and you'll find where it is.)
-
-
-This should get you a properly booting 2.4 kernel on the itsy.
diff --git a/Documentation/arm/SA1100/LART b/Documentation/arm/SA1100/LART
deleted file mode 100644
index 6d412b685598..000000000000
--- a/Documentation/arm/SA1100/LART
+++ /dev/null
@@ -1,14 +0,0 @@
-Linux Advanced Radio Terminal (LART)
-------------------------------------
-
-The LART is a small (7.5 x 10cm) SA-1100 board, designed for embedded
-applications. It has 32 MB DRAM, 4MB Flash ROM, double RS232 and all
-other StrongARM-gadgets. Almost all SA signals are directly accessible
-through a number of connectors. The powersupply accepts voltages
-between 3.5V and 16V and is overdimensioned to support a range of
-daughterboards. A quad Ethernet / IDE / PS2 / sound daughterboard
-is under development, with plenty of others in different stages of
-planning.
-
-The hardware designs for this board have been released under an open license;
-see the LART page at http://www.lartmaker.nl/ for more information.
diff --git a/Documentation/arm/SA1100/PLEB b/Documentation/arm/SA1100/PLEB
deleted file mode 100644
index b9c8a631a351..000000000000
--- a/Documentation/arm/SA1100/PLEB
+++ /dev/null
@@ -1,11 +0,0 @@
-The PLEB project was started as a student initiative at the School of
-Computer Science and Engineering, University of New South Wales to make a
-pocket computer capable of running the Linux Kernel.
-
-PLEB support has yet to be fully integrated.
-
-For more information, see:
-
-	http://www.cse.unsw.edu.au
-
-
diff --git a/Documentation/arm/SA1100/Pangolin b/Documentation/arm/SA1100/Pangolin
deleted file mode 100644
index 077a6120e129..000000000000
--- a/Documentation/arm/SA1100/Pangolin
+++ /dev/null
@@ -1,23 +0,0 @@
-Pangolin is a StrongARM 1110-based evaluation platform produced
-by Dialogue Technology (http://www.dialogue.com.tw/).
-It has EISA slots for ease of configuration with SDRAM/Flash
-memory card, USB/Serial/Audio card, Compact Flash card,
-PCMCIA/IDE card and TFT-LCD card.
-
-To compile for Pangolin, you must issue the following commands:
-
-	make pangolin_config
-	make oldconfig
-	make zImage
-
-Supported peripherals:
-- SA1110 serial port (UART1/UART2/UART3)
-- flash memory access
-- compact flash driver
-- UDA1341 sound driver
-- SA1100 LCD controller for 800x600 16bpp TFT-LCD
-- MQ-200 driver for 800x600 16bpp TFT-LCD
-- Penmount(touch panel) driver
-- PCMCIA driver
-- SMC91C94 LAN driver
-- IDE driver (experimental)
diff --git a/Documentation/arm/SA1100/Tifon b/Documentation/arm/SA1100/Tifon
deleted file mode 100644
index dd1934d9c851..000000000000
--- a/Documentation/arm/SA1100/Tifon
+++ /dev/null
@@ -1,7 +0,0 @@
-Tifon
------
-
-More info has to come...
-
-Contact: Peter Danielsson <peter.danielsson@era-t.ericsson.se>
-
diff --git a/Documentation/arm/SA1100/Yopy b/Documentation/arm/SA1100/Yopy
deleted file mode 100644
index e14f16d836ac..000000000000
--- a/Documentation/arm/SA1100/Yopy
+++ /dev/null
@@ -1,2 +0,0 @@
-See http://www.yopydeveloper.org for more.
-
diff --git a/Documentation/arm/SA1100/empeg b/Documentation/arm/SA1100/empeg
deleted file mode 100644
index 4ece4849a42c..000000000000
--- a/Documentation/arm/SA1100/empeg
+++ /dev/null
@@ -1,2 +0,0 @@
-See ../empeg/README
-
diff --git a/Documentation/arm/SA1100/nanoEngine b/Documentation/arm/SA1100/nanoEngine
deleted file mode 100644
index 48a7934f95f6..000000000000
--- a/Documentation/arm/SA1100/nanoEngine
+++ /dev/null
@@ -1,11 +0,0 @@
-nanoEngine
-----------
-
-"nanoEngine" is a SA1110 based single board computer from 
-Bright Star Engineering Inc.  See www.brightstareng.com/arm
-for more info.
-(Ref: Stuart Adams <sja@brightstareng.com>)
-
-Also visit Larry Doolittle's "Linux for the nanoEngine" site:
-http://www.brightstareng.com/arm/nanoeng.htm
-
diff --git a/Documentation/arm/SA1100/serial_UART b/Documentation/arm/SA1100/serial_UART
deleted file mode 100644
index a63966f1d083..000000000000
--- a/Documentation/arm/SA1100/serial_UART
+++ /dev/null
@@ -1,47 +0,0 @@
-The SA1100 serial port had its major/minor numbers officially assigned:
-
-> Date: Sun, 24 Sep 2000 21:40:27 -0700
-> From: H. Peter Anvin <hpa@transmeta.com>
-> To: Nicolas Pitre <nico@CAM.ORG>
-> Cc: Device List Maintainer <device@lanana.org>
-> Subject: Re: device
-> 
-> Okay.  Note that device numbers 204 and 205 are used for "low density
-> serial devices", so you will have a range of minors on those majors (the
-> tty device layer handles this just fine, so you don't have to worry about
-> doing anything special.)
-> 
-> So your assignments are:
-> 
-> 204 char        Low-density serial ports
->                   5 = /dev/ttySA0               SA1100 builtin serial port 0
->                   6 = /dev/ttySA1               SA1100 builtin serial port 1
->                   7 = /dev/ttySA2               SA1100 builtin serial port 2
-> 
-> 205 char        Low-density serial ports (alternate device)
->                   5 = /dev/cusa0                Callout device for ttySA0
->                   6 = /dev/cusa1                Callout device for ttySA1
->                   7 = /dev/cusa2                Callout device for ttySA2
->
-
-You must create those inodes in /dev on the root filesystem used
-by your SA1100-based device:
-
-	mknod ttySA0 c 204 5
-	mknod ttySA1 c 204 6
-	mknod ttySA2 c 204 7
-	mknod cusa0 c 205 5
-	mknod cusa1 c 205 6
-	mknod cusa2 c 205 7
-
-In addition to the creation of the appropriate device nodes above, you
-must ensure your user space applications make use of the correct device
-name. The classic example is the content of the /etc/inittab file where
-you might have a getty process started on ttyS0.  In this case:
-
-- replace occurrences of ttyS0 with ttySA0, ttyS1 with ttySA1, etc.
-
-- don't forget to add 'ttySA0', 'console', or the appropriate tty name
-  in /etc/securetty for root to be allowed to login as well.
-
-
diff --git a/Documentation/arm/SH-Mobile/.gitignore b/Documentation/arm/SH-Mobile/.gitignore
deleted file mode 100644
index c928dbf3cc88..000000000000
--- a/Documentation/arm/SH-Mobile/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-vrl4
diff --git a/Documentation/arm/SPEAr/overview.txt b/Documentation/arm/SPEAr/overview.txt
deleted file mode 100644
index 1b049be6c84f..000000000000
--- a/Documentation/arm/SPEAr/overview.txt
+++ /dev/null
@@ -1,63 +0,0 @@
-			SPEAr ARM Linux Overview
-			==========================
-
-Introduction
-------------
-
-  SPEAr (Structured Processor Enhanced Architecture).
-  weblink : http://www.st.com/spear
-
-  The ST Microelectronics SPEAr range of ARM9/CortexA9 System-on-Chip CPUs are
-  supported by the 'spear' platform of ARM Linux. Currently SPEAr1310,
-  SPEAr1340, SPEAr300, SPEAr310, SPEAr320 and SPEAr600 SOCs are supported.
-
-  Hierarchy in SPEAr is as follows:
-
-  SPEAr (Platform)
-	- SPEAr3XX (3XX SOC series, based on ARM9)
-		- SPEAr300 (SOC)
-			- SPEAr300 Evaluation Board
-		- SPEAr310 (SOC)
-			- SPEAr310 Evaluation Board
-		- SPEAr320 (SOC)
-			- SPEAr320 Evaluation Board
-	- SPEAr6XX (6XX SOC series, based on ARM9)
-		- SPEAr600 (SOC)
-			- SPEAr600 Evaluation Board
-	- SPEAr13XX (13XX SOC series, based on ARM CORTEXA9)
-		- SPEAr1310 (SOC)
-			- SPEAr1310 Evaluation Board
-		- SPEAr1340 (SOC)
-			- SPEAr1340 Evaluation Board
-
-  Configuration
-  -------------
-
-  A generic configuration is provided for each machine, and can be used as the
-  default by
-	make spear13xx_defconfig
-	make spear3xx_defconfig
-	make spear6xx_defconfig
-
-  Layout
-  ------
-
-  The common files for multiple machine families (SPEAr3xx, SPEAr6xx and
-  SPEAr13xx) are located in the platform code contained in arch/arm/plat-spear
-  with headers in plat/.
-
-  Each machine series have a directory with name arch/arm/mach-spear followed by
-  series name. Like mach-spear3xx, mach-spear6xx and mach-spear13xx.
-
-  Common file for machines of spear3xx family is mach-spear3xx/spear3xx.c, for
-  spear6xx is mach-spear6xx/spear6xx.c and for spear13xx family is
-  mach-spear13xx/spear13xx.c. mach-spear* also contain soc/machine specific
-  files, like spear1310.c, spear1340.c spear300.c, spear310.c, spear320.c and
-  spear600.c.  mach-spear* doesn't contains board specific files as they fully
-  support Flattened Device Tree.
-
-
-  Document Author
-  ---------------
-
-  Viresh Kumar <vireshk@kernel.org>, (c) 2010-2012 ST Microelectronics
diff --git a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt b/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
deleted file mode 100644
index fa968aa99d67..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-		S3C24XX CPUfreq support
-		=======================
-
-Introduction
-------------
-
- The S3C24XX series support a number of power saving systems, such as
- the ability to change the core, memory and peripheral operating
- frequencies. The core control is exported via the CPUFreq driver
- which has a number of different manual or automatic controls over the
- rate the core is running at.
-
- There are two forms of the driver depending on the specific CPU and
- how the clocks are arranged. The first implementation used as single
- PLL to feed the ARM, memory and peripherals via a series of dividers
- and muxes and this is the implementation that is documented here. A
- newer version where there is a separate PLL and clock divider for the
- ARM core is available as a separate driver.
-
-
-Layout
-------
-
- The code core manages the CPU specific drivers, any data that they
- need to register and the interface to the generic drivers/cpufreq
- system. Each CPU registers a driver to control the PLL, clock dividers
- and anything else associated with it. Any board that wants to use this
- framework needs to supply at least basic details of what is required.
-
- The core registers with drivers/cpufreq at init time if all the data
- necessary has been supplied.
-
-
-CPU support
------------
-
- The support for each CPU depends on the facilities provided by the
- SoC and the driver as each device has different PLL and clock chains
- associated with it.
-
-
-Slow Mode
----------
-
- The SLOW mode where the PLL is turned off altogether and the
- system is fed by the external crystal input is currently not
- supported.
-
-
-sysfs
------
-
- The core code exports extra information via sysfs in the directory
- devices/system/cpu/cpu0/arch-freq.
-
-
-Board Support
--------------
-
- Each board that wants to use the cpufreq code must register some basic
- information with the core driver to provide information about what the
- board requires and any restrictions being placed on it.
-
- The board needs to supply information about whether it needs the IO bank
- timings changing, any maximum frequency limits and information about the
- SDRAM refresh rate.
-
-
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2009 Simtec Electronics
-Licensed under GPLv2
diff --git a/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt b/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt
deleted file mode 100644
index b87292e05f2f..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-		Simtec Electronics EB2410ITX (BAST)
-		===================================
-
-	http://www.simtec.co.uk/products/EB2410ITX/
-
-Introduction
-------------
-
-  The EB2410ITX is a S3C2410 based development board with a variety of
-  peripherals and expansion connectors. This board is also known by
-  the shortened name of Bast.
-
-
-Configuration
--------------
-
-  To set the default configuration, use `make bast_defconfig` which
-  supports the commonly used features of this board.
-
-
-Support
--------
-
-  Official support information can be found on the Simtec Electronics
-  website, at the product page http://www.simtec.co.uk/products/EB2410ITX/
-
-  Useful links:
-
-    - Resources Page http://www.simtec.co.uk/products/EB2410ITX/resources.html
-
-    - Board FAQ at http://www.simtec.co.uk/products/EB2410ITX/faq.html
-
-    - Bootloader info http://www.simtec.co.uk/products/SWABLE/resources.html
-      and FAQ http://www.simtec.co.uk/products/SWABLE/faq.html
-
-
-MTD
----
-
-  The NAND and NOR support has been merged from the linux-mtd project.
-  Any problems, see http://www.linux-mtd.infradead.org/ for more
-  information or up-to-date versions of linux-mtd.
-
-
-IDE
----
-
-  Both onboard IDE ports are supported, however there is no support for
-  changing speed of devices, PIO Mode 4 capable drives should be used.
-
-
-Maintainers
------------
-
-  This board is maintained by Simtec Electronics.
-
-
-Copyright 2004 Ben Dooks, Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/GPIO.txt b/Documentation/arm/Samsung-S3C24XX/GPIO.txt
deleted file mode 100644
index e8f918b96123..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/GPIO.txt
+++ /dev/null
@@ -1,171 +0,0 @@
-			S3C24XX GPIO Control
-			====================
-
-Introduction
-------------
-
-  The s3c2410 kernel provides an interface to configure and
-  manipulate the state of the GPIO pins, and find out other
-  information about them.
-
-  There are a number of conditions attached to the configuration
-  of the s3c2410 GPIO system, please read the Samsung provided
-  data-sheet/users manual to find out the complete list.
-
-  See Documentation/arm/Samsung/GPIO.txt for the core implementation.
-
-
-GPIOLIB
--------
-
-  With the event of the GPIOLIB in drivers/gpio, support for some
-  of the GPIO functions such as reading and writing a pin will
-  be removed in favour of this common access method.
-
-  Once all the extant drivers have been converted, the functions
-  listed below will be removed (they may be marked as __deprecated
-  in the near future).
-
-  The following functions now either have a s3c_ specific variant
-  or are merged into gpiolib. See the definitions in
-  arch/arm/plat-samsung/include/plat/gpio-cfg.h:
-
-  s3c2410_gpio_setpin()		gpio_set_value() or gpio_direction_output()
-  s3c2410_gpio_getpin()		gpio_get_value() or gpio_direction_input()
-  s3c2410_gpio_getirq()		gpio_to_irq()
-  s3c2410_gpio_cfgpin()		s3c_gpio_cfgpin()
-  s3c2410_gpio_getcfg()		s3c_gpio_getcfg()
-  s3c2410_gpio_pullup()		s3c_gpio_setpull()
-
-
-GPIOLIB conversion
-------------------
-
-If you need to convert your board or driver to use gpiolib from the phased
-out s3c2410 API, then here are some notes on the process.
-
-1) If your board is exclusively using an GPIO, say to control peripheral
-   power, then it will require to claim the gpio with gpio_request() before
-   it can use it.
-
-   It is recommended to check the return value, with at least WARN_ON()
-   during initialisation.
-
-2) The s3c2410_gpio_cfgpin() can be directly replaced with s3c_gpio_cfgpin()
-   as they have the same arguments, and can either take the pin specific
-   values, or the more generic special-function-number arguments.
-
-3) s3c2410_gpio_pullup() changes have the problem that while the
-   s3c2410_gpio_pullup(x, 1) can be easily translated to the
-   s3c_gpio_setpull(x, S3C_GPIO_PULL_NONE), the s3c2410_gpio_pullup(x, 0)
-   are not so easy.
-
-   The s3c2410_gpio_pullup(x, 0) case enables the pull-up (or in the case
-   of some of the devices, a pull-down) and as such the new API distinguishes
-   between the UP and DOWN case. There is currently no 'just turn on' setting
-   which may be required if this becomes a problem.
-
-4) s3c2410_gpio_setpin() can be replaced by gpio_set_value(), the old call
-   does not implicitly configure the relevant gpio to output. The gpio
-   direction should be changed before using gpio_set_value().
-
-5) s3c2410_gpio_getpin() is replaceable by gpio_get_value() if the pin
-   has been set to input. It is currently unknown what the behaviour is
-   when using gpio_get_value() on an output pin (s3c2410_gpio_getpin
-   would return the value the pin is supposed to be outputting).
-
-6) s3c2410_gpio_getirq() should be directly replaceable with the
-   gpio_to_irq() call.
-
-The s3c2410_gpio and gpio_ calls have always operated on the same gpio
-numberspace, so there is no problem with converting the gpio numbering
-between the calls.
-
-
-Headers
--------
-
-  See arch/arm/mach-s3c24xx/include/mach/regs-gpio.h for the list
-  of GPIO pins, and the configuration values for them. This
-  is included by using #include <mach/regs-gpio.h>
-
-
-PIN Numbers
------------
-
-  Each pin has an unique number associated with it in regs-gpio.h,
-  e.g. S3C2410_GPA(0) or S3C2410_GPF(1). These defines are used to tell
-  the GPIO functions which pin is to be used.
-
-  With the conversion to gpiolib, there is no longer a direct conversion
-  from gpio pin number to register base address as in earlier kernels. This
-  is due to the number space required for newer SoCs where the later
-  GPIOs are not contiguous.
-
-
-Configuring a pin
------------------
-
-  The following function allows the configuration of a given pin to
-  be changed.
-
-    void s3c_gpio_cfgpin(unsigned int pin, unsigned int function);
-
-  e.g.:
-
-     s3c_gpio_cfgpin(S3C2410_GPA(0), S3C_GPIO_SFN(1));
-     s3c_gpio_cfgpin(S3C2410_GPE(8), S3C_GPIO_SFN(2));
-
-   which would turn GPA(0) into the lowest Address line A0, and set
-   GPE(8) to be connected to the SDIO/MMC controller's SDDAT1 line.
-
-
-Reading the current configuration
----------------------------------
-
-  The current configuration of a pin can be read by using standard
-  gpiolib function:
-
-  s3c_gpio_getcfg(unsigned int pin);
-
-  The return value will be from the same set of values which can be
-  passed to s3c_gpio_cfgpin().
-
-
-Configuring a pull-up resistor
-------------------------------
-
-  A large proportion of the GPIO pins on the S3C2410 can have weak
-  pull-up resistors enabled. This can be configured by the following
-  function:
-
-    void s3c_gpio_setpull(unsigned int pin, unsigned int to);
-
-  Where the to value is S3C_GPIO_PULL_NONE to set the pull-up off,
-  and S3C_GPIO_PULL_UP to enable the specified pull-up. Any other
-  values are currently undefined.
-
-
-Getting and setting the state of a PIN
---------------------------------------
-
-  These calls are now implemented by the relevant gpiolib calls, convert
-  your board or driver to use gpiolib.
-
-
-Getting the IRQ number associated with a PIN
---------------------------------------------
-
-  A standard gpiolib function can map the given pin number to an IRQ
-  number to pass to the IRQ system.
-
-   int gpio_to_irq(unsigned int pin);
-
-  Note, not all pins have an IRQ.
-
-
-Author
--------
-
-Ben Dooks, 03 October 2004
-Copyright 2004 Ben Dooks, Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/H1940.txt b/Documentation/arm/Samsung-S3C24XX/H1940.txt
deleted file mode 100644
index b738859b1fc0..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/H1940.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-		HP IPAQ H1940
-		=============
-
-http://www.handhelds.org/projects/h1940.html
-
-Introduction
-------------
-
-  The HP H1940 is a S3C2410 based handheld device, with
-  bluetooth connectivity.
-
-
-Support
--------
-
-  A variety of information is available
-
-  handhelds.org project page:
-
-    http://www.handhelds.org/projects/h1940.html
-
-  handhelds.org wiki page:
-
-    http://handhelds.org/moin/moin.cgi/HpIpaqH1940
-
-  Herbert Pötzl pages:
-
-    http://vserver.13thfloor.at/H1940/
-
-
-Maintainers
------------
-
-  This project is being maintained and developed by a variety
-  of people, including Ben Dooks, Arnaud Patard, and Herbert Pötzl.
-
-  Thanks to the many others who have also provided support.
-
-
-(c) 2005 Ben Dooks
diff --git a/Documentation/arm/Samsung-S3C24XX/NAND.txt b/Documentation/arm/Samsung-S3C24XX/NAND.txt
deleted file mode 100644
index bc478a3409b8..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/NAND.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-			S3C24XX NAND Support
-			====================
-
-Introduction
-------------
-
-Small Page NAND
----------------
-
-The driver uses a 512 byte (1 page) ECC code for this setup. The
-ECC code is not directly compatible with the default kernel ECC
-code, so the driver enforces its own OOB layout and ECC parameters
-
-Large Page NAND
----------------
-
-The driver is capable of handling NAND flash with a 2KiB page
-size, with support for hardware ECC generation and correction.
-
-Unlike the 512byte page mode, the driver generates ECC data for
-each 256 byte block in an 2KiB page. This means that more than
-one error in a page can be rectified. It also means that the
-OOB layout remains the default kernel layout for these flashes.
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2007 Simtec Electronics
-
diff --git a/Documentation/arm/Samsung-S3C24XX/Overview.txt b/Documentation/arm/Samsung-S3C24XX/Overview.txt
deleted file mode 100644
index 00d3c3141e21..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/Overview.txt
+++ /dev/null
@@ -1,318 +0,0 @@
-			S3C24XX ARM Linux Overview
-			==========================
-
-
-
-Introduction
-------------
-
-  The Samsung S3C24XX range of ARM9 System-on-Chip CPUs are supported
-  by the 's3c2410' architecture of ARM Linux. Currently the S3C2410,
-  S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443 and S3C2450 devices
-  are supported.
-
-  Support for the S3C2400 and S3C24A0 series was never completed and the
-  corresponding code has been removed after a while.  If someone wishes to
-  revive this effort, partial support can be retrieved from earlier Linux
-  versions.
-
-  The S3C2416 and S3C2450 devices are very similar and S3C2450 support is
-  included under the arch/arm/mach-s3c2416 directory. Note, while core
-  support for these SoCs is in, work on some of the extra peripherals
-  and extra interrupts is still ongoing.
-
-
-Configuration
--------------
-
-  A generic S3C2410 configuration is provided, and can be used as the
-  default by `make s3c2410_defconfig`. This configuration has support
-  for all the machines, and the commonly used features on them.
-
-  Certain machines may have their own default configurations as well,
-  please check the machine specific documentation.
-
-
-Layout
-------
-
-  The core support files are located in the platform code contained in
-  arch/arm/plat-s3c24xx with headers in include/asm-arm/plat-s3c24xx.
-  This directory should be kept to items shared between the platform
-  code (arch/arm/plat-s3c24xx) and the arch/arm/mach-s3c24* code.
-
-  Each cpu has a directory with the support files for it, and the
-  machines that carry the device. For example S3C2410 is contained
-  in arch/arm/mach-s3c2410 and S3C2440 in arch/arm/mach-s3c2440
-
-  Register, kernel and platform data definitions are held in the
-  arch/arm/mach-s3c2410 directory./include/mach
-
-arch/arm/plat-s3c24xx:
-
-  Files in here are either common to all the s3c24xx family,
-  or are common to only some of them with names to indicate this
-  status. The files that are not common to all are generally named
-  with the initial cpu they support in the series to ensure a short
-  name without any possibility of confusion with newer devices.
-
-  As an example, initially s3c244x would cover s3c2440 and s3c2442, but
-  with the s3c2443 which does not share many of the same drivers in
-  this directory, the name becomes invalid. We stick to s3c2440-<x>
-  to indicate a driver that is s3c2440 and s3c2442 compatible.
-
-  This does mean that to find the status of any given SoC, a number
-  of directories may need to be searched.
-
-
-Machines
---------
-
-  The currently supported machines are as follows:
-
-  Simtec Electronics EB2410ITX (BAST)
-
-    A general purpose development board, see EB2410ITX.txt for further
-    details
-
-  Simtec Electronics IM2440D20 (Osiris)
-
-    CPU Module from Simtec Electronics, with a S3C2440A CPU, nand flash
-    and a PCMCIA controller.
-
-  Samsung SMDK2410
-
-    Samsung's own development board, geared for PDA work.
-
-  Samsung/Aiji SMDK2412
-
-    The S3C2412 version of the SMDK2440.
-
-  Samsung/Aiji SMDK2413
-
-    The S3C2412 version of the SMDK2440.
-
-  Samsung/Meritech SMDK2440
-
-    The S3C2440 compatible version of the SMDK2440, which has the
-    option of an S3C2440 or S3C2442 CPU module.
-
-  Thorcom VR1000
-
-    Custom embedded board
-
-  HP IPAQ 1940
-
-    Handheld (IPAQ), available in several varieties
-
-  HP iPAQ rx3715
-
-    S3C2440 based IPAQ, with a number of variations depending on
-    features shipped.
-
-  Acer N30
-
-    A S3C2410 based PDA from Acer.  There is a Wiki page at
-    http://handhelds.org/moin/moin.cgi/AcerN30Documentation .
-
-  AML M5900
-
-    American Microsystems' M5900
-
-  Nex Vision Nexcoder
-  Nex Vision Otom
-
-    Two machines by Nex Vision
-
-
-Adding New Machines
--------------------
-
-  The architecture has been designed to support as many machines as can
-  be configured for it in one kernel build, and any future additions
-  should keep this in mind before altering items outside of their own
-  machine files.
-
-  Machine definitions should be kept in linux/arch/arm/mach-s3c2410,
-  and there are a number of examples that can be looked at.
-
-  Read the kernel patch submission policies as well as the
-  Documentation/arm directory before submitting patches. The
-  ARM kernel series is managed by Russell King, and has a patch system
-  located at http://www.arm.linux.org.uk/developer/patches/
-  as well as mailing lists that can be found from the same site.
-
-  As a courtesy, please notify <ben-linux@fluff.org> of any new
-  machines or other modifications.
-
-  Any large scale modifications, or new drivers should be discussed
-  on the ARM kernel mailing list (linux-arm-kernel) before being
-  attempted. See http://www.arm.linux.org.uk/mailinglists/ for the
-  mailing list information.
-
-
-I2C
----
-
-  The hardware I2C core in the CPU is supported in single master
-  mode, and can be configured via platform data.
-
-
-RTC
----
-
-  Support for the onboard RTC unit, including alarm function.
-
-  This has recently been upgraded to use the new RTC core,
-  and the module has been renamed to rtc-s3c to fit in with
-  the new rtc naming scheme.
-
-
-Watchdog
---------
-
-  The onchip watchdog is available via the standard watchdog
-  interface.
-
-
-NAND
-----
-
-  The current kernels now have support for the s3c2410 NAND
-  controller. If there are any problems the latest linux-mtd
-  code can be found from http://www.linux-mtd.infradead.org/
-
-  For more information see Documentation/arm/Samsung-S3C24XX/NAND.txt
-
-
-SD/MMC
-------
-
-  The SD/MMC hardware pre S3C2443 is supported in the current
-  kernel, the driver is drivers/mmc/host/s3cmci.c and supports
-  1 and 4 bit SD or MMC cards.
-
-  The SDIO behaviour of this driver has not been fully tested. There is no
-  current support for hardware SDIO interrupts.
-
-
-Serial
-------
-
-  The s3c2410 serial driver provides support for the internal
-  serial ports. These devices appear as /dev/ttySAC0 through 3.
-
-  To create device nodes for these, use the following commands
-
-    mknod ttySAC0 c 204 64
-    mknod ttySAC1 c 204 65
-    mknod ttySAC2 c 204 66
-
-
-GPIO
-----
-
-  The core contains support for manipulating the GPIO, see the
-  documentation in GPIO.txt in the same directory as this file.
-
-  Newer kernels carry GPIOLIB, and support is being moved towards
-  this with some of the older support in line to be removed.
-
-  As of v2.6.34, the move towards using gpiolib support is almost
-  complete, and very little of the old calls are left.
-
-  See Documentation/arm/Samsung-S3C24XX/GPIO.txt for the S3C24XX specific
-  support and Documentation/arm/Samsung/GPIO.txt for the core Samsung
-  implementation.
-
-
-Clock Management
-----------------
-
-  The core provides the interface defined in the header file
-  include/asm-arm/hardware/clock.h, to allow control over the
-  various clock units
-
-
-Suspend to RAM
---------------
-
-  For boards that provide support for suspend to RAM, the
-  system can be placed into low power suspend.
-
-  See Suspend.txt for more information.
-
-
-SPI
----
-
-  SPI drivers are available for both the in-built hardware
-  (although there is no DMA support yet) and a generic
-  GPIO based solution.
-
-
-LEDs
-----
-
-  There is support for GPIO based LEDs via a platform driver
-  in the LED subsystem.
-
-
-Platform Data
--------------
-
-  Whenever a device has platform specific data that is specified
-  on a per-machine basis, care should be taken to ensure the
-  following:
-
-    1) that default data is not left in the device to confuse the
-       driver if a machine does not set it at startup
-
-    2) the data should (if possible) be marked as __initdata,
-       to ensure that the data is thrown away if the machine is
-       not the one currently in use.
-
-       The best way of doing this is to make a function that
-       kmalloc()s an area of memory, and copies the __initdata
-       and then sets the relevant device's platform data. Making
-       the function `__init` takes care of ensuring it is discarded
-       with the rest of the initialisation code
-
-       static __init void s3c24xx_xxx_set_platdata(struct xxx_data *pd)
-       {
-           struct s3c2410_xxx_mach_info *npd;
-
-	   npd = kmalloc(sizeof(struct s3c2410_xxx_mach_info), GFP_KERNEL);
-	   if (npd) {
-	      memcpy(npd, pd, sizeof(struct s3c2410_xxx_mach_info));
-	      s3c_device_xxx.dev.platform_data = npd;
-	   } else {
-              printk(KERN_ERR "no memory for xxx platform data\n");
-	   }
-	}
-
-	Note, since the code is marked as __init, it should not be
-	exported outside arch/arm/mach-s3c2410/, or exported to
-	modules via EXPORT_SYMBOL() and related functions.
-
-
-Port Contributors
------------------
-
-  Ben Dooks (BJD)
-  Vincent Sanders
-  Herbert Potzl
-  Arnaud Patard (RTP)
-  Roc Wu
-  Klaus Fetscher
-  Dimitry Andric
-  Shannon Holland
-  Guillaume Gourat (NexVision)
-  Christer Weinigel (wingel) (Acer N30)
-  Lucas Correia Villa Real (S3C2400 port)
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2004-2006 Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/S3C2412.txt b/Documentation/arm/Samsung-S3C24XX/S3C2412.txt
deleted file mode 100644
index dc1fd362d3c1..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/S3C2412.txt
+++ /dev/null
@@ -1,120 +0,0 @@
-		S3C2412 ARM Linux Overview
-		==========================
-
-Introduction
-------------
-
-  The S3C2412 is part of the S3C24XX range of ARM9 System-on-Chip CPUs
-  from Samsung. This part has an ARM926-EJS core, capable of running up
-  to 266MHz (see data-sheet for more information)
-
-
-Clock
------
-
-  The core clock code provides a set of clocks to the drivers, and allows
-  for source selection and a number of other features.
-
-
-Power
------
-
-  No support for suspend/resume to RAM in the current system.
-
-
-DMA
----
-
-  No current support for DMA.
-
-
-GPIO
-----
-
-  There is support for setting the GPIO to input/output/special function
-  and reading or writing to them.
-
-
-UART
-----
-
-  The UART hardware is similar to the S3C2440, and is supported by the
-  s3c2410 driver in the drivers/serial directory.
-
-
-NAND
-----
-
-  The NAND hardware is similar to the S3C2440, and is supported by the
-  s3c2410 driver in the drivers/mtd/nand/raw directory.
-
-
-USB Host
---------
-
-  The USB hardware is similar to the S3C2410, with extended clock source
-  control. The OHCI portion is supported by the ohci-s3c2410 driver, and
-  the clock control selection is supported by the core clock code.
-
-
-USB Device
-----------
-
-  No current support in the kernel
-
-
-IRQs
-----
-
-  All the standard, and external interrupt sources are supported. The
-  extra sub-sources are not yet supported.
-
-
-RTC
----
-
-  The RTC hardware is similar to the S3C2410, and is supported by the
-  s3c2410-rtc driver.
-
-
-Watchdog
---------
-
-  The watchdog hardware is the same as the S3C2410, and is supported by
-  the s3c2410_wdt driver.
-
-
-MMC/SD/SDIO
------------
-
-  No current support for the MMC/SD/SDIO block.
-
-IIC
----
-
-  The IIC hardware is the same as the S3C2410, and is supported by the
-  i2c-s3c24xx driver.
-
-
-IIS
----
-
-  No current support for the IIS interface.
-
-
-SPI
----
-
-  No current support for the SPI interfaces.
-
-
-ATA
----
-
-  No current support for the on-board ATA block.
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2006 Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/S3C2413.txt b/Documentation/arm/Samsung-S3C24XX/S3C2413.txt
deleted file mode 100644
index 909bdc7dd7b5..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/S3C2413.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-		S3C2413 ARM Linux Overview
-		==========================
-
-Introduction
-------------
-
-  The S3C2413 is an extended version of the S3C2412, with an camera
-  interface and mobile DDR memory support. See the S3C2412 support
-  documentation for more information.
-
-
-Camera Interface
----------------
-
-  This block is currently not supported.
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2006 Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/SMDK2440.txt b/Documentation/arm/Samsung-S3C24XX/SMDK2440.txt
deleted file mode 100644
index 429390bd4684..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/SMDK2440.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-		Samsung/Meritech SMDK2440
-		=========================
-
-Introduction
-------------
-
-  The SMDK2440 is a two part evaluation board for the Samsung S3C2440
-  processor. It includes support for LCD, SmartMedia, Audio, SD and
-  10MBit Ethernet, and expansion headers for various signals, including
-  the camera and unused GPIO.
-
-
-Configuration
--------------
-
-  To set the default configuration, use `make smdk2440_defconfig` which
-  will configure the common features of this board, or use
-  `make s3c2410_config` to include support for all s3c2410/s3c2440 machines
-
-
-Support
--------
-
-  Ben Dooks' SMDK2440 site at http://www.fluff.org/ben/smdk2440/ which
-  includes linux based USB download tools.
-
-  Some of the h1940 patches that can be found from the H1940 project
-  site at http://www.handhelds.org/projects/h1940.html can also be
-  applied to this board.
-
-
-Peripherals
------------
-
-  There is no current support for any of the extra peripherals on the
-  base-board itself.
-
-
-MTD
----
-
-  The NAND flash should be supported by the in kernel MTD NAND support,
-  NOR flash will be added later.
-
-
-Maintainers
------------
-
-  This board is being maintained by Ben Dooks, for more info, see
-  http://www.fluff.org/ben/smdk2440/
-
-  Many thanks to Dimitry Andric of TomTom for the loan of the SMDK2440,
-  and to Simtec Electronics for allowing me time to work on this.
-
-
-(c) 2004 Ben Dooks
diff --git a/Documentation/arm/Samsung-S3C24XX/Suspend.txt b/Documentation/arm/Samsung-S3C24XX/Suspend.txt
deleted file mode 100644
index cb4f0c0cdf9d..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/Suspend.txt
+++ /dev/null
@@ -1,137 +0,0 @@
-			S3C24XX Suspend Support
-			=======================
-
-
-Introduction
-------------
-
-  The S3C24XX supports a low-power suspend mode, where the SDRAM is kept
-  in Self-Refresh mode, and all but the essential peripheral blocks are
-  powered down. For more information on how this works, please look
-  at the relevant CPU datasheet from Samsung.
-
-
-Requirements
-------------
-
-  1) A bootloader that can support the necessary resume operation
-
-  2) Support for at least 1 source for resume
-
-  3) CONFIG_PM enabled in the kernel
-
-  4) Any peripherals that are going to be powered down at the same
-     time require suspend/resume support.
-
-
-Resuming
---------
-
-  The S3C2410 user manual defines the process of sending the CPU to
-  sleep and how it resumes. The default behaviour of the Linux code
-  is to set the GSTATUS3 register to the physical address of the
-  code to resume Linux operation.
-
-  GSTATUS4 is currently left alone by the sleep code, and is free to
-  use for any other purposes (for example, the EB2410ITX uses this to
-  save memory configuration in).
-
-
-Machine Support
----------------
-
-  The machine specific functions must call the s3c_pm_init() function
-  to say that its bootloader is capable of resuming. This can be as
-  simple as adding the following to the machine's definition:
-
-  INITMACHINE(s3c_pm_init)
-
-  A board can do its own setup before calling s3c_pm_init, if it
-  needs to setup anything else for power management support.
-
-  There is currently no support for over-riding the default method of
-  saving the resume address, if your board requires it, then contact
-  the maintainer and discuss what is required.
-
-  Note, the original method of adding an late_initcall() is wrong,
-  and will end up initialising all compiled machines' pm init!
-
-  The following is an example of code used for testing wakeup from
-  an falling edge on IRQ_EINT0:
-
-
-static irqreturn_t button_irq(int irq, void *pw)
-{
-	return IRQ_HANDLED;
-}
-
-statuc void __init machine_init(void)
-{
-	...
-
-	request_irq(IRQ_EINT0, button_irq, IRQF_TRIGGER_FALLING,
-		   "button-irq-eint0", NULL);
-
-	enable_irq_wake(IRQ_EINT0);
-
-	s3c_pm_init();
-}
-
-
-Debugging
----------
-
-  There are several important things to remember when using PM suspend:
-
-  1) The uart drivers will disable the clocks to the UART blocks when
-     suspending, which means that use of printascii() or similar direct
-     access to the UARTs will cause the debug to stop.
-
-  2) While the pm code itself will attempt to re-enable the UART clocks,
-     care should be taken that any external clock sources that the UARTs
-     rely on are still enabled at that point.
-
-  3) If any debugging is placed in the resume path, then it must have the
-     relevant clocks and peripherals setup before use (ie, bootloader).
-
-     For example, if you transmit a character from the UART, the baud
-     rate and uart controls must be setup beforehand.
-
-
-Configuration
--------------
-
-  The S3C2410 specific configuration in `System Type` defines various
-  aspects of how the S3C2410 suspend and resume support is configured
-
-  `S3C2410 PM Suspend debug`
-
-    This option prints messages to the serial console before and after
-    the actual suspend, giving detailed information on what is
-    happening
-
-
-  `S3C2410 PM Suspend Memory CRC`
-
-    Allows the entire memory to be checksummed before and after the
-    suspend to see if there has been any corruption of the contents.
-
-    Note, the time to calculate the CRC is dependent on the CPU speed
-    and the size of memory. For an 64Mbyte RAM area on an 200MHz
-    S3C2410, this can take approximately 4 seconds to complete.
-
-    This support requires the CRC32 function to be enabled.
-
-
-  `S3C2410 PM Suspend CRC Chunksize (KiB)`
-
-    Defines the size of memory each CRC chunk covers. A smaller value
-    will mean that the CRC data block will take more memory, but will
-    identify any faults with better precision
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2004 Simtec Electronics
-
diff --git a/Documentation/arm/Samsung-S3C24XX/USB-Host.txt b/Documentation/arm/Samsung-S3C24XX/USB-Host.txt
deleted file mode 100644
index f82b1faefad5..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/USB-Host.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-			S3C24XX USB Host support
-			========================
-
-
-
-Introduction
-------------
-
-  This document details the S3C2410/S3C2440 in-built OHCI USB host support.
-
-Configuration
--------------
-
-  Enable at least the following kernel options:
-
-  menuconfig:
-
-   Device Drivers  --->
-     USB support  --->
-       <*> Support for Host-side USB
-       <*>   OHCI HCD support
-
-
-  .config:
-    CONFIG_USB
-    CONFIG_USB_OHCI_HCD
-
-
-  Once these options are configured, the standard set of USB device
-  drivers can be configured and used.
-
-
-Board Support
--------------
-
-  The driver attaches to a platform device, which will need to be
-  added by the board specific support file in linux/arch/arm/mach-s3c2410,
-  such as mach-bast.c or mach-smdk2410.c
-
-  The platform device's platform_data field is only needed if the
-  board implements extra power control or over-current monitoring.
-
-  The OHCI driver does not ensure the state of the S3C2410's MISCCTRL
-  register, so if both ports are to be used for the host, then it is
-  the board support file's responsibility to ensure that the second
-  port is configured to be connected to the OHCI core.
-
-
-Platform Data
--------------
-
-  See arch/arm/mach-s3c2410/include/mach/usb-control.h for the
-  descriptions of the platform device data. An implementation
-  can be found in linux/arch/arm/mach-s3c2410/usb-simtec.c .
-
-  The `struct s3c2410_hcd_info` contains a pair of functions
-  that get called to enable over-current detection, and to
-  control the port power status.
-
-  The ports are numbered 0 and 1.
-
-  power_control:
-
-    Called to enable or disable the power on the port.
-
-  enable_oc:
-
-    Called to enable or disable the over-current monitoring.
-    This should claim or release the resources being used to
-    check the power condition on the port, such as an IRQ.
-
-  report_oc:
-
-    The OHCI driver fills this field in for the over-current code
-    to call when there is a change to the over-current state on
-    an port. The ports argument is a bitmask of 1 bit per port,
-    with bit X being 1 for an over-current on port X.
-
-    The function s3c2410_usb_report_oc() has been provided to
-    ensure this is called correctly.
-
-  port[x]:
-
-    This is struct describes each port, 0 or 1. The platform driver
-    should set the flags field of each port to S3C_HCDFLG_USED if
-    the port is enabled.
-
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2005 Simtec Electronics
diff --git a/Documentation/arm/Samsung/Bootloader-interface.txt b/Documentation/arm/Samsung/Bootloader-interface.txt
deleted file mode 100644
index d17ed518a7ea..000000000000
--- a/Documentation/arm/Samsung/Bootloader-interface.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-      Interface between kernel and boot loaders on Exynos boards
-      ==========================================================
-
-Author: Krzysztof Kozlowski
-Date  : 6 June 2015
-
-The document tries to describe currently used interface between Linux kernel
-and boot loaders on Samsung Exynos based boards. This is not a definition
-of interface but rather a description of existing state, a reference
-for information purpose only.
-
-In the document "boot loader" means any of following: U-boot, proprietary
-SBOOT or any other firmware for ARMv7 and ARMv8 initializing the board before
-executing kernel.
-
-
-1. Non-Secure mode
-
-Address:      sysram_ns_base_addr
-Offset        Value                                        Purpose
-=============================================================================
-0x08          exynos_cpu_resume_ns, mcpm_entry_point       System suspend
-0x0c          0x00000bad (Magic cookie)                    System suspend
-0x1c          exynos4_secondary_startup                    Secondary CPU boot
-0x1c + 4*cpu  exynos4_secondary_startup (Exynos4412)       Secondary CPU boot
-0x20          0xfcba0d10 (Magic cookie)                    AFTR
-0x24          exynos_cpu_resume_ns                         AFTR
-0x28 + 4*cpu  0x8 (Magic cookie, Exynos3250)               AFTR
-0x28          0x0 or last value during resume (Exynos542x) System suspend
-
-
-2. Secure mode
-
-Address:      sysram_base_addr
-Offset        Value                                        Purpose
-=============================================================================
-0x00          exynos4_secondary_startup                    Secondary CPU boot
-0x04          exynos4_secondary_startup (Exynos542x)       Secondary CPU boot
-4*cpu         exynos4_secondary_startup (Exynos4412)       Secondary CPU boot
-0x20          exynos_cpu_resume (Exynos4210 r1.0)          AFTR
-0x24          0xfcba0d10 (Magic cookie, Exynos4210 r1.0)   AFTR
-
-Address:      pmu_base_addr
-Offset        Value                                        Purpose
-=============================================================================
-0x0800        exynos_cpu_resume                            AFTR, suspend
-0x0800        mcpm_entry_point (Exynos542x with MCPM)      AFTR, suspend
-0x0804        0xfcba0d10 (Magic cookie)                    AFTR
-0x0804        0x00000bad (Magic cookie)                    System suspend
-0x0814        exynos4_secondary_startup (Exynos4210 r1.1)  Secondary CPU boot
-0x0818        0xfcba0d10 (Magic cookie, Exynos4210 r1.1)   AFTR
-0x081C        exynos_cpu_resume (Exynos4210 r1.1)          AFTR
-
-
-3. Other (regardless of secure/non-secure mode)
-
-Address:      pmu_base_addr
-Offset        Value                           Purpose
-=============================================================================
-0x0908        Non-zero                        Secondary CPU boot up indicator
-                                              on Exynos3250 and Exynos542x
-
-
-4. Glossary
-
-AFTR - ARM Off Top Running, a low power mode, Cortex cores and many other
-modules are power gated, except the TOP modules
-MCPM - Multi-Cluster Power Management
diff --git a/Documentation/arm/Samsung/GPIO.txt b/Documentation/arm/Samsung/GPIO.txt
deleted file mode 100644
index 795adfd88081..000000000000
--- a/Documentation/arm/Samsung/GPIO.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-		Samsung GPIO implementation
-		===========================
-
-Introduction
-------------
-
-This outlines the Samsung GPIO implementation and the architecture
-specific calls provided alongside the drivers/gpio core.
-
-
-S3C24XX (Legacy)
-----------------
-
-See Documentation/arm/Samsung-S3C24XX/GPIO.txt for more information
-about these devices. Their implementation has been brought into line
-with the core samsung implementation described in this document.
-
-
-GPIOLIB integration
--------------------
-
-The gpio implementation uses gpiolib as much as possible, only providing
-specific calls for the items that require Samsung specific handling, such
-as pin special-function or pull resistor control.
-
-GPIO numbering is synchronised between the Samsung and gpiolib system.
-
-
-PIN configuration
------------------
-
-Pin configuration is specific to the Samsung architecture, with each SoC
-registering the necessary information for the core gpio configuration
-implementation to configure pins as necessary.
-
-The s3c_gpio_cfgpin() and s3c_gpio_setpull() provide the means for a
-driver or machine to change gpio configuration.
-
-See arch/arm/plat-samsung/include/plat/gpio-cfg.h for more information
-on these functions.
diff --git a/Documentation/arm/Samsung/Overview.txt b/Documentation/arm/Samsung/Overview.txt
deleted file mode 100644
index 8f7309bad460..000000000000
--- a/Documentation/arm/Samsung/Overview.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-		Samsung ARM Linux Overview
-		==========================
-
-Introduction
-------------
-
-  The Samsung range of ARM SoCs spans many similar devices, from the initial
-  ARM9 through to the newest ARM cores. This document shows an overview of
-  the current kernel support, how to use it and where to find the code
-  that supports this.
-
-  The currently supported SoCs are:
-
-  - S3C24XX: See Documentation/arm/Samsung-S3C24XX/Overview.txt for full list
-  - S3C64XX: S3C6400 and S3C6410
-  - S5PC110 / S5PV210
-
-
-S3C24XX Systems
----------------
-
-  There is still documentation in Documnetation/arm/Samsung-S3C24XX/ which
-  deals with the architecture and drivers specific to these devices.
-
-  See Documentation/arm/Samsung-S3C24XX/Overview.txt for more information
-  on the implementation details and specific support.
-
-
-Configuration
--------------
-
-  A number of configurations are supplied, as there is no current way of
-  unifying all the SoCs into one kernel.
-
-  s5pc110_defconfig - S5PC110 specific default configuration
-  s5pv210_defconfig - S5PV210 specific default configuration
-
-
-Layout
-------
-
-  The directory layout is currently being restructured, and consists of
-  several platform directories and then the machine specific directories
-  of the CPUs being built for.
-
-  plat-samsung provides the base for all the implementations, and is the
-  last in the line of include directories that are processed for the build
-  specific information. It contains the base clock, GPIO and device definitions
-  to get the system running.
-
-  plat-s3c24xx is for s3c24xx specific builds, see the S3C24XX docs.
-
-  plat-s5p is for s5p specific builds, and contains common support for the
-  S5P specific systems. Not all S5Ps use all the features in this directory
-  due to differences in the hardware.
-
-
-Layout changes
---------------
-
-  The old plat-s3c and plat-s5pc1xx directories have been removed, with
-  support moved to either plat-samsung or plat-s5p as necessary. These moves
-  where to simplify the include and dependency issues involved with having
-  so many different platform directories.
-
-
-Port Contributors
------------------
-
-  Ben Dooks (BJD)
-  Vincent Sanders
-  Herbert Potzl
-  Arnaud Patard (RTP)
-  Roc Wu
-  Klaus Fetscher
-  Dimitry Andric
-  Shannon Holland
-  Guillaume Gourat (NexVision)
-  Christer Weinigel (wingel) (Acer N30)
-  Lucas Correia Villa Real (S3C2400 port)
-
-
-Document Author
----------------
-
-Copyright 2009-2010 Ben Dooks <ben-linux@fluff.org>
diff --git a/Documentation/arm/Samsung/clksrc-change-registers.awk b/Documentation/arm/Samsung/clksrc-change-registers.awk
deleted file mode 100755
index 7be1b8aa7cd9..000000000000
--- a/Documentation/arm/Samsung/clksrc-change-registers.awk
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/awk -f
-#
-# Copyright 2010 Ben Dooks <ben-linux@fluff.org>
-#
-# Released under GPLv2
-
-# example usage
-# ./clksrc-change-registers.awk arch/arm/plat-s5pc1xx/include/plat/regs-clock.h < src > dst
-
-function extract_value(s)
-{
-    eqat = index(s, "=")
-    comat = index(s, ",")
-    return substr(s, eqat+2, (comat-eqat)-2)
-}
-
-function remove_brackets(b)
-{
-    return substr(b, 2, length(b)-2)
-}
-
-function splitdefine(l, p)
-{
-    r = split(l, tp)
-
-    p[0] = tp[2]
-    p[1] = remove_brackets(tp[3])
-}
-
-function find_length(f)
-{
-    if (0)
-	printf "find_length " f "\n" > "/dev/stderr"
-
-    if (f ~ /0x1/)
-	return 1
-    else if (f ~ /0x3/)
-	return 2
-    else if (f ~ /0x7/)
-	return 3
-    else if (f ~ /0xf/)
-	return 4
-
-    printf "unknown length " f "\n" > "/dev/stderr"
-    exit
-}
-
-function find_shift(s)
-{
-    id = index(s, "<")
-    if (id <= 0) {
-	printf "cannot find shift " s "\n" > "/dev/stderr"
-	exit
-    }
-
-    return substr(s, id+2)
-}
-
-
-BEGIN {
-    if (ARGC < 2) {
-	print "too few arguments" > "/dev/stderr"
-	exit
-    }
-
-# read the header file and find the mask values that we will need
-# to replace and create an associative array of values
-
-    while (getline line < ARGV[1] > 0) {
-	if (line ~ /\#define.*_MASK/ &&
-	    !(line ~ /USB_SIG_MASK/)) {
-	    splitdefine(line, fields)
-	    name = fields[0]
-	    if (0)
-		printf "MASK " line "\n" > "/dev/stderr"
-	    dmask[name,0] = find_length(fields[1])
-	    dmask[name,1] = find_shift(fields[1])
-	    if (0)
-		printf "=> '" name "' LENGTH=" dmask[name,0] " SHIFT=" dmask[name,1] "\n" > "/dev/stderr"
-	} else {
-	}
-    }
-
-    delete ARGV[1]
-}
-
-/clksrc_clk.*=.*{/ {
-    shift=""
-    mask=""
-    divshift=""
-    reg_div=""
-    reg_src=""
-    indent=1
-
-    print $0
-
-    for(; indent >= 1;) {
-	if ((getline line) <= 0) {
-	    printf "unexpected end of file" > "/dev/stderr"
-	    exit 1;
-	}
-
-	if (line ~ /\.shift/) {
-	    shift = extract_value(line)
-	} else if (line ~ /\.mask/) {
-	    mask = extract_value(line)
-	} else if (line ~ /\.reg_divider/) {
-	    reg_div = extract_value(line)
-	} else if (line ~ /\.reg_source/) {
-	    reg_src = extract_value(line)
-	} else if (line ~ /\.divider_shift/) {
-	    divshift = extract_value(line)
-	} else if (line ~ /{/) {
-		indent++
-		print line
-	    } else if (line ~ /}/) {
-	    indent--
-
-	    if (indent == 0) {
-		if (0) {
-		    printf "shift '" shift   "' ='" dmask[shift,0] "'\n" > "/dev/stderr"
-		    printf "mask  '" mask    "'\n" > "/dev/stderr"
-		    printf "dshft '" divshift "'\n" > "/dev/stderr"
-		    printf "rdiv  '" reg_div "'\n" > "/dev/stderr"
-		    printf "rsrc  '" reg_src "'\n" > "/dev/stderr"
-		}
-
-		generated = mask
-		sub(reg_src, reg_div, generated)
-
-		if (0) {
-		    printf "/* rsrc " reg_src " */\n"
-		    printf "/* rdiv " reg_div " */\n"
-		    printf "/* shift " shift " */\n"
-		    printf "/* mask " mask " */\n"
-		    printf "/* generated " generated " */\n"
-		}
-
-		if (reg_div != "") {
-		    printf "\t.reg_div = { "
-		    printf ".reg = " reg_div ", "
-		    printf ".shift = " dmask[generated,1] ", "
-		    printf ".size = " dmask[generated,0] ", "
-		    printf "},\n"
-		}
-
-		printf "\t.reg_src = { "
-		printf ".reg = " reg_src ", "
-		printf ".shift = " dmask[mask,1] ", "
-		printf ".size = " dmask[mask,0] ", "
-
-		printf "},\n"
-
-	    }
-
-	    print line
-	} else {
-	    print line
-	}
-
-	if (0)
-	    printf indent ":" line "\n" > "/dev/stderr"
-    }
-}
-
-// && ! /clksrc_clk.*=.*{/ { print $0 }
diff --git a/Documentation/arm/Setup b/Documentation/arm/Setup
deleted file mode 100644
index 0cb1e64bde80..000000000000
--- a/Documentation/arm/Setup
+++ /dev/null
@@ -1,129 +0,0 @@
-Kernel initialisation parameters on ARM Linux
----------------------------------------------
-
-The following document describes the kernel initialisation parameter
-structure, otherwise known as 'struct param_struct' which is used
-for most ARM Linux architectures.
-
-This structure is used to pass initialisation parameters from the
-kernel loader to the Linux kernel proper, and may be short lived
-through the kernel initialisation process.  As a general rule, it
-should not be referenced outside of arch/arm/kernel/setup.c:setup_arch().
-
-There are a lot of parameters listed in there, and they are described
-below:
-
- page_size
-
-   This parameter must be set to the page size of the machine, and
-   will be checked by the kernel.
-
- nr_pages
-
-   This is the total number of pages of memory in the system.  If
-   the memory is banked, then this should contain the total number
-   of pages in the system.
-
-   If the system contains separate VRAM, this value should not
-   include this information.
-
- ramdisk_size
-
-   This is now obsolete, and should not be used.
-
- flags
-
-   Various kernel flags, including:
-    bit 0 - 1 = mount root read only
-    bit 1 - unused
-    bit 2 - 0 = load ramdisk
-    bit 3 - 0 = prompt for ramdisk
-
- rootdev
-
-   major/minor number pair of device to mount as the root filesystem.
-
- video_num_cols
- video_num_rows
-
-   These two together describe the character size of the dummy console,
-   or VGA console character size.  They should not be used for any other
-   purpose.
-
-   It's generally a good idea to set these to be either standard VGA, or
-   the equivalent character size of your fbcon display.  This then allows
-   all the bootup messages to be displayed correctly.
-
- video_x
- video_y
-
-   This describes the character position of cursor on VGA console, and
-   is otherwise unused. (should not be used for other console types, and
-   should not be used for other purposes).
-
- memc_control_reg
-
-   MEMC chip control register for Acorn Archimedes and Acorn A5000
-   based machines.  May be used differently by different architectures.
-
- sounddefault
-
-   Default sound setting on Acorn machines.  May be used differently by
-   different architectures.
-
- adfsdrives
-
-   Number of ADFS/MFM disks.  May be used differently by different
-   architectures.
-
- bytes_per_char_h
- bytes_per_char_v
-
-   These are now obsolete, and should not be used.
-
- pages_in_bank[4]
-
-   Number of pages in each bank of the systems memory (used for RiscPC).
-   This is intended to be used on systems where the physical memory
-   is non-contiguous from the processors point of view.
-
- pages_in_vram
-
-   Number of pages in VRAM (used on Acorn RiscPC).  This value may also
-   be used by loaders if the size of the video RAM can't be obtained
-   from the hardware.
-
- initrd_start
- initrd_size
-
-   This describes the kernel virtual start address and size of the
-   initial ramdisk.
-
- rd_start
-
-   Start address in sectors of the ramdisk image on a floppy disk.
-
- system_rev
-
-   system revision number.
-
- system_serial_low
- system_serial_high
-
-   system 64-bit serial number
-
- mem_fclk_21285
-
-   The speed of the external oscillator to the 21285 (footbridge),
-   which control's the speed of the memory bus, timer & serial port.
-   Depending upon the speed of the cpu its value can be between
-   0-66 MHz. If no params are passed or a value of zero is passed,
-   then a value of 50 Mhz is the default on 21285 architectures.
-
- paths[8][128]
-
-   These are now obsolete, and should not be used.
-
- commandline
-
-   Kernel command line parameters.  Details can be found elsewhere.
diff --git a/Documentation/arm/VFP/release-notes.txt b/Documentation/arm/VFP/release-notes.txt
deleted file mode 100644
index 28a2795705ca..000000000000
--- a/Documentation/arm/VFP/release-notes.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-Release notes for Linux Kernel VFP support code
------------------------------------------------
-
-Date: 	20 May 2004
-Author:	Russell King
-
-This is the first release of the Linux Kernel VFP support code.  It
-provides support for the exceptions bounced from VFP hardware found
-on ARM926EJ-S.
-
-This release has been validated against the SoftFloat-2b library by
-John R. Hauser using the TestFloat-2a test suite.  Details of this
-library and test suite can be found at:
-
-   http://www.jhauser.us/arithmetic/SoftFloat.html
-
-The operations which have been tested with this package are:
-
- - fdiv
- - fsub
- - fadd
- - fmul
- - fcmp
- - fcmpe
- - fcvtd
- - fcvts
- - fsito
- - ftosi
- - fsqrt
-
-All the above pass softfloat tests with the following exceptions:
-
-- fadd/fsub shows some differences in the handling of +0 / -0 results
-  when input operands differ in signs.
-- the handling of underflow exceptions is slightly different.  If a
-  result underflows before rounding, but becomes a normalised number
-  after rounding, we do not signal an underflow exception.
-
-Other operations which have been tested by basic assembly-only tests
-are:
-
- - fcpy
- - fabs
- - fneg
- - ftoui
- - ftosiz
- - ftouiz
-
-The combination operations have not been tested:
-
- - fmac
- - fnmac
- - fmsc
- - fnmsc
- - fnmul
diff --git a/Documentation/arm/arm.rst b/Documentation/arm/arm.rst
new file mode 100644
index 000000000000..2edc509df92a
--- /dev/null
+++ b/Documentation/arm/arm.rst
@@ -0,0 +1,214 @@
+=======================
+ARM Linux 2.6 and upper
+=======================
+
+    Please check <ftp://ftp.arm.linux.org.uk/pub/armlinux> for
+    updates.
+
+Compilation of kernel
+---------------------
+
+  In order to compile ARM Linux, you will need a compiler capable of
+  generating ARM ELF code with GNU extensions.  GCC 3.3 is known to be
+  a good compiler.  Fortunately, you needn't guess.  The kernel will report
+  an error if your compiler is a recognized offender.
+
+  To build ARM Linux natively, you shouldn't have to alter the ARCH = line
+  in the top level Makefile.  However, if you don't have the ARM Linux ELF
+  tools installed as default, then you should change the CROSS_COMPILE
+  line as detailed below.
+
+  If you wish to cross-compile, then alter the following lines in the top
+  level make file::
+
+    ARCH = <whatever>
+
+  with::
+
+    ARCH = arm
+
+  and::
+
+    CROSS_COMPILE=
+
+  to::
+
+    CROSS_COMPILE=<your-path-to-your-compiler-without-gcc>
+
+  eg.::
+
+    CROSS_COMPILE=arm-linux-
+
+  Do a 'make config', followed by 'make Image' to build the kernel
+  (arch/arm/boot/Image).  A compressed image can be built by doing a
+  'make zImage' instead of 'make Image'.
+
+
+Bug reports etc
+---------------
+
+  Please send patches to the patch system.  For more information, see
+  http://www.arm.linux.org.uk/developer/patches/info.php Always include some
+  explanation as to what the patch does and why it is needed.
+
+  Bug reports should be sent to linux-arm-kernel@lists.arm.linux.org.uk,
+  or submitted through the web form at
+  http://www.arm.linux.org.uk/developer/
+
+  When sending bug reports, please ensure that they contain all relevant
+  information, eg. the kernel messages that were printed before/during
+  the problem, what you were doing, etc.
+
+
+Include files
+-------------
+
+  Several new include directories have been created under include/asm-arm,
+  which are there to reduce the clutter in the top-level directory.  These
+  directories, and their purpose is listed below:
+
+  ============= ==========================================================
+   `arch-*`	machine/platform specific header files
+   `hardware`	driver-internal ARM specific data structures/definitions
+   `mach`	descriptions of generic ARM to specific machine interfaces
+   `proc-*`	processor dependent header files (currently only two
+		categories)
+  ============= ==========================================================
+
+
+Machine/Platform support
+------------------------
+
+  The ARM tree contains support for a lot of different machine types.  To
+  continue supporting these differences, it has become necessary to split
+  machine-specific parts by directory.  For this, the machine category is
+  used to select which directories and files get included (we will use
+  $(MACHINE) to refer to the category)
+
+  To this end, we now have arch/arm/mach-$(MACHINE) directories which are
+  designed to house the non-driver files for a particular machine (eg, PCI,
+  memory management, architecture definitions etc).  For all future
+  machines, there should be a corresponding arch/arm/mach-$(MACHINE)/include/mach
+  directory.
+
+
+Modules
+-------
+
+  Although modularisation is supported (and required for the FP emulator),
+  each module on an ARM2/ARM250/ARM3 machine when is loaded will take
+  memory up to the next 32k boundary due to the size of the pages.
+  Therefore, is modularisation on these machines really worth it?
+
+  However, ARM6 and up machines allow modules to take multiples of 4k, and
+  as such Acorn RiscPCs and other architectures using these processors can
+  make good use of modularisation.
+
+
+ADFS Image files
+----------------
+
+  You can access image files on your ADFS partitions by mounting the ADFS
+  partition, and then using the loopback device driver.  You must have
+  losetup installed.
+
+  Please note that the PCEmulator DOS partitions have a partition table at
+  the start, and as such, you will have to give '-o offset' to losetup.
+
+
+Request to developers
+---------------------
+
+  When writing device drivers which include a separate assembler file, please
+  include it in with the C file, and not the arch/arm/lib directory.  This
+  allows the driver to be compiled as a loadable module without requiring
+  half the code to be compiled into the kernel image.
+
+  In general, try to avoid using assembler unless it is really necessary.  It
+  makes drivers far less easy to port to other hardware.
+
+
+ST506 hard drives
+-----------------
+
+  The ST506 hard drive controllers seem to be working fine (if a little
+  slowly).  At the moment they will only work off the controllers on an
+  A4x0's motherboard, but for it to work off a Podule just requires
+  someone with a podule to add the addresses for the IRQ mask and the
+  HDC base to the source.
+
+  As of 31/3/96 it works with two drives (you should get the ADFS
+  `*configure` harddrive set to 2). I've got an internal 20MB and a great
+  big external 5.25" FH 64MB drive (who could ever want more :-) ).
+
+  I've just got 240K/s off it (a dd with bs=128k); thats about half of what
+  RiscOS gets; but it's a heck of a lot better than the 50K/s I was getting
+  last week :-)
+
+  Known bug: Drive data errors can cause a hang; including cases where
+  the controller has fixed the error using ECC. (Possibly ONLY
+  in that case...hmm).
+
+
+1772 Floppy
+-----------
+  This also seems to work OK, but hasn't been stressed much lately.  It
+  hasn't got any code for disc change detection in there at the moment which
+  could be a bit of a problem!  Suggestions on the correct way to do this
+  are welcome.
+
+
+`CONFIG_MACH_` and `CONFIG_ARCH_`
+---------------------------------
+  A change was made in 2003 to the macro names for new machines.
+  Historically, `CONFIG_ARCH_` was used for the bonafide architecture,
+  e.g. SA1100, as well as implementations of the architecture,
+  e.g. Assabet.  It was decided to change the implementation macros
+  to read `CONFIG_MACH_` for clarity.  Moreover, a retroactive fixup has
+  not been made because it would complicate patching.
+
+  Previous registrations may be found online.
+
+    <http://www.arm.linux.org.uk/developer/machines/>
+
+Kernel entry (head.S)
+---------------------
+  The initial entry into the kernel is via head.S, which uses machine
+  independent code.  The machine is selected by the value of 'r1' on
+  entry, which must be kept unique.
+
+  Due to the large number of machines which the ARM port of Linux provides
+  for, we have a method to manage this which ensures that we don't end up
+  duplicating large amounts of code.
+
+  We group machine (or platform) support code into machine classes.  A
+  class typically based around one or more system on a chip devices, and
+  acts as a natural container around the actual implementations.  These
+  classes are given directories - arch/arm/mach-<class> and
+  arch/arm/mach-<class> - which contain the source files to/include/mach
+  support the machine class.  This directories also contain any machine
+  specific supporting code.
+
+  For example, the SA1100 class is based upon the SA1100 and SA1110 SoC
+  devices, and contains the code to support the way the on-board and off-
+  board devices are used, or the device is setup, and provides that
+  machine specific "personality."
+
+  For platforms that support device tree (DT), the machine selection is
+  controlled at runtime by passing the device tree blob to the kernel.  At
+  compile-time, support for the machine type must be selected.  This allows for
+  a single multiplatform kernel build to be used for several machine types.
+
+  For platforms that do not use device tree, this machine selection is
+  controlled by the machine type ID, which acts both as a run-time and a
+  compile-time code selection method.  You can register a new machine via the
+  web site at:
+
+    <http://www.arm.linux.org.uk/developer/machines/>
+
+  Note: Please do not register a machine type for DT-only platforms.  If your
+  platform is DT-only, you do not need a registered machine type.
+
+---
+
+Russell King (15/03/2004)
diff --git a/Documentation/arm/booting.rst b/Documentation/arm/booting.rst
new file mode 100644
index 000000000000..4babb6c6ae1e
--- /dev/null
+++ b/Documentation/arm/booting.rst
@@ -0,0 +1,237 @@
+=================
+Booting ARM Linux
+=================
+
+Author:	Russell King
+
+Date  : 18 May 2002
+
+The following documentation is relevant to 2.4.18-rmk6 and beyond.
+
+In order to boot ARM Linux, you require a boot loader, which is a small
+program that runs before the main kernel.  The boot loader is expected
+to initialise various devices, and eventually call the Linux kernel,
+passing information to the kernel.
+
+Essentially, the boot loader should provide (as a minimum) the
+following:
+
+1. Setup and initialise the RAM.
+2. Initialise one serial port.
+3. Detect the machine type.
+4. Setup the kernel tagged list.
+5. Load initramfs.
+6. Call the kernel image.
+
+
+1. Setup and initialise RAM
+---------------------------
+
+Existing boot loaders:
+	MANDATORY
+New boot loaders:
+	MANDATORY
+
+The boot loader is expected to find and initialise all RAM that the
+kernel will use for volatile data storage in the system.  It performs
+this in a machine dependent manner.  (It may use internal algorithms
+to automatically locate and size all RAM, or it may use knowledge of
+the RAM in the machine, or any other method the boot loader designer
+sees fit.)
+
+
+2. Initialise one serial port
+-----------------------------
+
+Existing boot loaders:
+	OPTIONAL, RECOMMENDED
+New boot loaders:
+	OPTIONAL, RECOMMENDED
+
+The boot loader should initialise and enable one serial port on the
+target.  This allows the kernel serial driver to automatically detect
+which serial port it should use for the kernel console (generally
+used for debugging purposes, or communication with the target.)
+
+As an alternative, the boot loader can pass the relevant 'console='
+option to the kernel via the tagged lists specifying the port, and
+serial format options as described in
+
+       Documentation/admin-guide/kernel-parameters.rst.
+
+
+3. Detect the machine type
+--------------------------
+
+Existing boot loaders:
+	OPTIONAL
+New boot loaders:
+	MANDATORY except for DT-only platforms
+
+The boot loader should detect the machine type its running on by some
+method.  Whether this is a hard coded value or some algorithm that
+looks at the connected hardware is beyond the scope of this document.
+The boot loader must ultimately be able to provide a MACH_TYPE_xxx
+value to the kernel. (see linux/arch/arm/tools/mach-types).  This
+should be passed to the kernel in register r1.
+
+For DT-only platforms, the machine type will be determined by device
+tree.  set the machine type to all ones (~0).  This is not strictly
+necessary, but assures that it will not match any existing types.
+
+4. Setup boot data
+------------------
+
+Existing boot loaders:
+	OPTIONAL, HIGHLY RECOMMENDED
+New boot loaders:
+	MANDATORY
+
+The boot loader must provide either a tagged list or a dtb image for
+passing configuration data to the kernel.  The physical address of the
+boot data is passed to the kernel in register r2.
+
+4a. Setup the kernel tagged list
+--------------------------------
+
+The boot loader must create and initialise the kernel tagged list.
+A valid tagged list starts with ATAG_CORE and ends with ATAG_NONE.
+The ATAG_CORE tag may or may not be empty.  An empty ATAG_CORE tag
+has the size field set to '2' (0x00000002).  The ATAG_NONE must set
+the size field to zero.
+
+Any number of tags can be placed in the list.  It is undefined
+whether a repeated tag appends to the information carried by the
+previous tag, or whether it replaces the information in its
+entirety; some tags behave as the former, others the latter.
+
+The boot loader must pass at a minimum the size and location of
+the system memory, and root filesystem location.  Therefore, the
+minimum tagged list should look::
+
+		+-----------+
+  base ->	| ATAG_CORE |  |
+		+-----------+  |
+		| ATAG_MEM  |  | increasing address
+		+-----------+  |
+		| ATAG_NONE |  |
+		+-----------+  v
+
+The tagged list should be stored in system RAM.
+
+The tagged list must be placed in a region of memory where neither
+the kernel decompressor nor initrd 'bootp' program will overwrite
+it.  The recommended placement is in the first 16KiB of RAM.
+
+4b. Setup the device tree
+-------------------------
+
+The boot loader must load a device tree image (dtb) into system ram
+at a 64bit aligned address and initialize it with the boot data.  The
+dtb format is documented in Documentation/devicetree/booting-without-of.txt.
+The kernel will look for the dtb magic value of 0xd00dfeed at the dtb
+physical address to determine if a dtb has been passed instead of a
+tagged list.
+
+The boot loader must pass at a minimum the size and location of the
+system memory, and the root filesystem location.  The dtb must be
+placed in a region of memory where the kernel decompressor will not
+overwrite it, while remaining within the region which will be covered
+by the kernel's low-memory mapping.
+
+A safe location is just above the 128MiB boundary from start of RAM.
+
+5. Load initramfs.
+------------------
+
+Existing boot loaders:
+	OPTIONAL
+New boot loaders:
+	OPTIONAL
+
+If an initramfs is in use then, as with the dtb, it must be placed in
+a region of memory where the kernel decompressor will not overwrite it
+while also with the region which will be covered by the kernel's
+low-memory mapping.
+
+A safe location is just above the device tree blob which itself will
+be loaded just above the 128MiB boundary from the start of RAM as
+recommended above.
+
+6. Calling the kernel image
+---------------------------
+
+Existing boot loaders:
+	MANDATORY
+New boot loaders:
+	MANDATORY
+
+There are two options for calling the kernel zImage.  If the zImage
+is stored in flash, and is linked correctly to be run from flash,
+then it is legal for the boot loader to call the zImage in flash
+directly.
+
+The zImage may also be placed in system RAM and called there.  The
+kernel should be placed in the first 128MiB of RAM.  It is recommended
+that it is loaded above 32MiB in order to avoid the need to relocate
+prior to decompression, which will make the boot process slightly
+faster.
+
+When booting a raw (non-zImage) kernel the constraints are tighter.
+In this case the kernel must be loaded at an offset into system equal
+to TEXT_OFFSET - PAGE_OFFSET.
+
+In any case, the following conditions must be met:
+
+- Quiesce all DMA capable devices so that memory does not get
+  corrupted by bogus network packets or disk data. This will save
+  you many hours of debug.
+
+- CPU register settings
+
+  - r0 = 0,
+  - r1 = machine type number discovered in (3) above.
+  - r2 = physical address of tagged list in system RAM, or
+    physical address of device tree block (dtb) in system RAM
+
+- CPU mode
+
+  All forms of interrupts must be disabled (IRQs and FIQs)
+
+  For CPUs which do not include the ARM virtualization extensions, the
+  CPU must be in SVC mode.  (A special exception exists for Angel)
+
+  CPUs which include support for the virtualization extensions can be
+  entered in HYP mode in order to enable the kernel to make full use of
+  these extensions.  This is the recommended boot method for such CPUs,
+  unless the virtualisations are already in use by a pre-installed
+  hypervisor.
+
+  If the kernel is not entered in HYP mode for any reason, it must be
+  entered in SVC mode.
+
+- Caches, MMUs
+
+  The MMU must be off.
+
+  Instruction cache may be on or off.
+
+  Data cache must be off.
+
+  If the kernel is entered in HYP mode, the above requirements apply to
+  the HYP mode configuration in addition to the ordinary PL1 (privileged
+  kernel modes) configuration.  In addition, all traps into the
+  hypervisor must be disabled, and PL1 access must be granted for all
+  peripherals and CPU resources for which this is architecturally
+  possible.  Except for entering in HYP mode, the system configuration
+  should be such that a kernel which does not include support for the
+  virtualization extensions can boot correctly without extra help.
+
+- The boot loader is expected to call the kernel image by jumping
+  directly to the first instruction of the kernel image.
+
+  On CPUs supporting the ARM instruction set, the entry must be
+  made in ARM state, even for a Thumb-2 kernel.
+
+  On CPUs supporting only the Thumb instruction set such as
+  Cortex-M class CPUs, the entry must be made in Thumb state.
diff --git a/Documentation/arm/cluster-pm-race-avoidance.rst b/Documentation/arm/cluster-pm-race-avoidance.rst
new file mode 100644
index 000000000000..aa58603d3f28
--- /dev/null
+++ b/Documentation/arm/cluster-pm-race-avoidance.rst
@@ -0,0 +1,533 @@
+=========================================================
+Cluster-wide Power-up/power-down race avoidance algorithm
+=========================================================
+
+This file documents the algorithm which is used to coordinate CPU and
+cluster setup and teardown operations and to manage hardware coherency
+controls safely.
+
+The section "Rationale" explains what the algorithm is for and why it is
+needed.  "Basic model" explains general concepts using a simplified view
+of the system.  The other sections explain the actual details of the
+algorithm in use.
+
+
+Rationale
+---------
+
+In a system containing multiple CPUs, it is desirable to have the
+ability to turn off individual CPUs when the system is idle, reducing
+power consumption and thermal dissipation.
+
+In a system containing multiple clusters of CPUs, it is also desirable
+to have the ability to turn off entire clusters.
+
+Turning entire clusters off and on is a risky business, because it
+involves performing potentially destructive operations affecting a group
+of independently running CPUs, while the OS continues to run.  This
+means that we need some coordination in order to ensure that critical
+cluster-level operations are only performed when it is truly safe to do
+so.
+
+Simple locking may not be sufficient to solve this problem, because
+mechanisms like Linux spinlocks may rely on coherency mechanisms which
+are not immediately enabled when a cluster powers up.  Since enabling or
+disabling those mechanisms may itself be a non-atomic operation (such as
+writing some hardware registers and invalidating large caches), other
+methods of coordination are required in order to guarantee safe
+power-down and power-up at the cluster level.
+
+The mechanism presented in this document describes a coherent memory
+based protocol for performing the needed coordination.  It aims to be as
+lightweight as possible, while providing the required safety properties.
+
+
+Basic model
+-----------
+
+Each cluster and CPU is assigned a state, as follows:
+
+	- DOWN
+	- COMING_UP
+	- UP
+	- GOING_DOWN
+
+::
+
+	    +---------> UP ----------+
+	    |                        v
+
+	COMING_UP                GOING_DOWN
+
+	    ^                        |
+	    +--------- DOWN <--------+
+
+
+DOWN:
+	The CPU or cluster is not coherent, and is either powered off or
+	suspended, or is ready to be powered off or suspended.
+
+COMING_UP:
+	The CPU or cluster has committed to moving to the UP state.
+	It may be part way through the process of initialisation and
+	enabling coherency.
+
+UP:
+	The CPU or cluster is active and coherent at the hardware
+	level.  A CPU in this state is not necessarily being used
+	actively by the kernel.
+
+GOING_DOWN:
+	The CPU or cluster has committed to moving to the DOWN
+	state.  It may be part way through the process of teardown and
+	coherency exit.
+
+
+Each CPU has one of these states assigned to it at any point in time.
+The CPU states are described in the "CPU state" section, below.
+
+Each cluster is also assigned a state, but it is necessary to split the
+state value into two parts (the "cluster" state and "inbound" state) and
+to introduce additional states in order to avoid races between different
+CPUs in the cluster simultaneously modifying the state.  The cluster-
+level states are described in the "Cluster state" section.
+
+To help distinguish the CPU states from cluster states in this
+discussion, the state names are given a `CPU_` prefix for the CPU states,
+and a `CLUSTER_` or `INBOUND_` prefix for the cluster states.
+
+
+CPU state
+---------
+
+In this algorithm, each individual core in a multi-core processor is
+referred to as a "CPU".  CPUs are assumed to be single-threaded:
+therefore, a CPU can only be doing one thing at a single point in time.
+
+This means that CPUs fit the basic model closely.
+
+The algorithm defines the following states for each CPU in the system:
+
+	- CPU_DOWN
+	- CPU_COMING_UP
+	- CPU_UP
+	- CPU_GOING_DOWN
+
+::
+
+	 cluster setup and
+	CPU setup complete          policy decision
+	      +-----------> CPU_UP ------------+
+	      |                                v
+
+	CPU_COMING_UP                   CPU_GOING_DOWN
+
+	      ^                                |
+	      +----------- CPU_DOWN <----------+
+	 policy decision           CPU teardown complete
+	or hardware event
+
+
+The definitions of the four states correspond closely to the states of
+the basic model.
+
+Transitions between states occur as follows.
+
+A trigger event (spontaneous) means that the CPU can transition to the
+next state as a result of making local progress only, with no
+requirement for any external event to happen.
+
+
+CPU_DOWN:
+	A CPU reaches the CPU_DOWN state when it is ready for
+	power-down.  On reaching this state, the CPU will typically
+	power itself down or suspend itself, via a WFI instruction or a
+	firmware call.
+
+	Next state:
+		CPU_COMING_UP
+	Conditions:
+		none
+
+	Trigger events:
+		a) an explicit hardware power-up operation, resulting
+		   from a policy decision on another CPU;
+
+		b) a hardware event, such as an interrupt.
+
+
+CPU_COMING_UP:
+	A CPU cannot start participating in hardware coherency until the
+	cluster is set up and coherent.  If the cluster is not ready,
+	then the CPU will wait in the CPU_COMING_UP state until the
+	cluster has been set up.
+
+	Next state:
+		CPU_UP
+	Conditions:
+		The CPU's parent cluster must be in CLUSTER_UP.
+	Trigger events:
+		Transition of the parent cluster to CLUSTER_UP.
+
+	Refer to the "Cluster state" section for a description of the
+	CLUSTER_UP state.
+
+
+CPU_UP:
+	When a CPU reaches the CPU_UP state, it is safe for the CPU to
+	start participating in local coherency.
+
+	This is done by jumping to the kernel's CPU resume code.
+
+	Note that the definition of this state is slightly different
+	from the basic model definition: CPU_UP does not mean that the
+	CPU is coherent yet, but it does mean that it is safe to resume
+	the kernel.  The kernel handles the rest of the resume
+	procedure, so the remaining steps are not visible as part of the
+	race avoidance algorithm.
+
+	The CPU remains in this state until an explicit policy decision
+	is made to shut down or suspend the CPU.
+
+	Next state:
+		CPU_GOING_DOWN
+	Conditions:
+		none
+	Trigger events:
+		explicit policy decision
+
+
+CPU_GOING_DOWN:
+	While in this state, the CPU exits coherency, including any
+	operations required to achieve this (such as cleaning data
+	caches).
+
+	Next state:
+		CPU_DOWN
+	Conditions:
+		local CPU teardown complete
+	Trigger events:
+		(spontaneous)
+
+
+Cluster state
+-------------
+
+A cluster is a group of connected CPUs with some common resources.
+Because a cluster contains multiple CPUs, it can be doing multiple
+things at the same time.  This has some implications.  In particular, a
+CPU can start up while another CPU is tearing the cluster down.
+
+In this discussion, the "outbound side" is the view of the cluster state
+as seen by a CPU tearing the cluster down.  The "inbound side" is the
+view of the cluster state as seen by a CPU setting the CPU up.
+
+In order to enable safe coordination in such situations, it is important
+that a CPU which is setting up the cluster can advertise its state
+independently of the CPU which is tearing down the cluster.  For this
+reason, the cluster state is split into two parts:
+
+	"cluster" state: The global state of the cluster; or the state
+	on the outbound side:
+
+		- CLUSTER_DOWN
+		- CLUSTER_UP
+		- CLUSTER_GOING_DOWN
+
+	"inbound" state: The state of the cluster on the inbound side.
+
+		- INBOUND_NOT_COMING_UP
+		- INBOUND_COMING_UP
+
+
+	The different pairings of these states results in six possible
+	states for the cluster as a whole::
+
+	                            CLUSTER_UP
+	          +==========> INBOUND_NOT_COMING_UP -------------+
+	          #                                               |
+	                                                          |
+	     CLUSTER_UP     <----+                                |
+	  INBOUND_COMING_UP      |                                v
+
+	          ^             CLUSTER_GOING_DOWN       CLUSTER_GOING_DOWN
+	          #              INBOUND_COMING_UP <=== INBOUND_NOT_COMING_UP
+
+	    CLUSTER_DOWN         |                                |
+	  INBOUND_COMING_UP <----+                                |
+	                                                          |
+	          ^                                               |
+	          +===========     CLUSTER_DOWN      <------------+
+	                       INBOUND_NOT_COMING_UP
+
+	Transitions -----> can only be made by the outbound CPU, and
+	only involve changes to the "cluster" state.
+
+	Transitions ===##> can only be made by the inbound CPU, and only
+	involve changes to the "inbound" state, except where there is no
+	further transition possible on the outbound side (i.e., the
+	outbound CPU has put the cluster into the CLUSTER_DOWN state).
+
+	The race avoidance algorithm does not provide a way to determine
+	which exact CPUs within the cluster play these roles.  This must
+	be decided in advance by some other means.  Refer to the section
+	"Last man and first man selection" for more explanation.
+
+
+	CLUSTER_DOWN/INBOUND_NOT_COMING_UP is the only state where the
+	cluster can actually be powered down.
+
+	The parallelism of the inbound and outbound CPUs is observed by
+	the existence of two different paths from CLUSTER_GOING_DOWN/
+	INBOUND_NOT_COMING_UP (corresponding to GOING_DOWN in the basic
+	model) to CLUSTER_DOWN/INBOUND_COMING_UP (corresponding to
+	COMING_UP in the basic model).  The second path avoids cluster
+	teardown completely.
+
+	CLUSTER_UP/INBOUND_COMING_UP is equivalent to UP in the basic
+	model.  The final transition to CLUSTER_UP/INBOUND_NOT_COMING_UP
+	is trivial and merely resets the state machine ready for the
+	next cycle.
+
+	Details of the allowable transitions follow.
+
+	The next state in each case is notated
+
+		<cluster state>/<inbound state> (<transitioner>)
+
+	where the <transitioner> is the side on which the transition
+	can occur; either the inbound or the outbound side.
+
+
+CLUSTER_DOWN/INBOUND_NOT_COMING_UP:
+	Next state:
+		CLUSTER_DOWN/INBOUND_COMING_UP (inbound)
+	Conditions:
+		none
+
+	Trigger events:
+		a) an explicit hardware power-up operation, resulting
+		   from a policy decision on another CPU;
+
+		b) a hardware event, such as an interrupt.
+
+
+CLUSTER_DOWN/INBOUND_COMING_UP:
+
+	In this state, an inbound CPU sets up the cluster, including
+	enabling of hardware coherency at the cluster level and any
+	other operations (such as cache invalidation) which are required
+	in order to achieve this.
+
+	The purpose of this state is to do sufficient cluster-level
+	setup to enable other CPUs in the cluster to enter coherency
+	safely.
+
+	Next state:
+		CLUSTER_UP/INBOUND_COMING_UP (inbound)
+	Conditions:
+		cluster-level setup and hardware coherency complete
+	Trigger events:
+		(spontaneous)
+
+
+CLUSTER_UP/INBOUND_COMING_UP:
+
+	Cluster-level setup is complete and hardware coherency is
+	enabled for the cluster.  Other CPUs in the cluster can safely
+	enter coherency.
+
+	This is a transient state, leading immediately to
+	CLUSTER_UP/INBOUND_NOT_COMING_UP.  All other CPUs on the cluster
+	should consider treat these two states as equivalent.
+
+	Next state:
+		CLUSTER_UP/INBOUND_NOT_COMING_UP (inbound)
+	Conditions:
+		none
+	Trigger events:
+		(spontaneous)
+
+
+CLUSTER_UP/INBOUND_NOT_COMING_UP:
+
+	Cluster-level setup is complete and hardware coherency is
+	enabled for the cluster.  Other CPUs in the cluster can safely
+	enter coherency.
+
+	The cluster will remain in this state until a policy decision is
+	made to power the cluster down.
+
+	Next state:
+		CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP (outbound)
+	Conditions:
+		none
+	Trigger events:
+		policy decision to power down the cluster
+
+
+CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP:
+
+	An outbound CPU is tearing the cluster down.  The selected CPU
+	must wait in this state until all CPUs in the cluster are in the
+	CPU_DOWN state.
+
+	When all CPUs are in the CPU_DOWN state, the cluster can be torn
+	down, for example by cleaning data caches and exiting
+	cluster-level coherency.
+
+	To avoid wasteful unnecessary teardown operations, the outbound
+	should check the inbound cluster state for asynchronous
+	transitions to INBOUND_COMING_UP.  Alternatively, individual
+	CPUs can be checked for entry into CPU_COMING_UP or CPU_UP.
+
+
+	Next states:
+
+	CLUSTER_DOWN/INBOUND_NOT_COMING_UP (outbound)
+		Conditions:
+			cluster torn down and ready to power off
+		Trigger events:
+			(spontaneous)
+
+	CLUSTER_GOING_DOWN/INBOUND_COMING_UP (inbound)
+		Conditions:
+			none
+
+		Trigger events:
+			a) an explicit hardware power-up operation,
+			   resulting from a policy decision on another
+			   CPU;
+
+			b) a hardware event, such as an interrupt.
+
+
+CLUSTER_GOING_DOWN/INBOUND_COMING_UP:
+
+	The cluster is (or was) being torn down, but another CPU has
+	come online in the meantime and is trying to set up the cluster
+	again.
+
+	If the outbound CPU observes this state, it has two choices:
+
+		a) back out of teardown, restoring the cluster to the
+		   CLUSTER_UP state;
+
+		b) finish tearing the cluster down and put the cluster
+		   in the CLUSTER_DOWN state; the inbound CPU will
+		   set up the cluster again from there.
+
+	Choice (a) permits the removal of some latency by avoiding
+	unnecessary teardown and setup operations in situations where
+	the cluster is not really going to be powered down.
+
+
+	Next states:
+
+	CLUSTER_UP/INBOUND_COMING_UP (outbound)
+		Conditions:
+				cluster-level setup and hardware
+				coherency complete
+
+		Trigger events:
+				(spontaneous)
+
+	CLUSTER_DOWN/INBOUND_COMING_UP (outbound)
+		Conditions:
+			cluster torn down and ready to power off
+
+		Trigger events:
+			(spontaneous)
+
+
+Last man and First man selection
+--------------------------------
+
+The CPU which performs cluster tear-down operations on the outbound side
+is commonly referred to as the "last man".
+
+The CPU which performs cluster setup on the inbound side is commonly
+referred to as the "first man".
+
+The race avoidance algorithm documented above does not provide a
+mechanism to choose which CPUs should play these roles.
+
+
+Last man:
+
+When shutting down the cluster, all the CPUs involved are initially
+executing Linux and hence coherent.  Therefore, ordinary spinlocks can
+be used to select a last man safely, before the CPUs become
+non-coherent.
+
+
+First man:
+
+Because CPUs may power up asynchronously in response to external wake-up
+events, a dynamic mechanism is needed to make sure that only one CPU
+attempts to play the first man role and do the cluster-level
+initialisation: any other CPUs must wait for this to complete before
+proceeding.
+
+Cluster-level initialisation may involve actions such as configuring
+coherency controls in the bus fabric.
+
+The current implementation in mcpm_head.S uses a separate mutual exclusion
+mechanism to do this arbitration.  This mechanism is documented in
+detail in vlocks.txt.
+
+
+Features and Limitations
+------------------------
+
+Implementation:
+
+	The current ARM-based implementation is split between
+	arch/arm/common/mcpm_head.S (low-level inbound CPU operations) and
+	arch/arm/common/mcpm_entry.c (everything else):
+
+	__mcpm_cpu_going_down() signals the transition of a CPU to the
+	CPU_GOING_DOWN state.
+
+	__mcpm_cpu_down() signals the transition of a CPU to the CPU_DOWN
+	state.
+
+	A CPU transitions to CPU_COMING_UP and then to CPU_UP via the
+	low-level power-up code in mcpm_head.S.  This could
+	involve CPU-specific setup code, but in the current
+	implementation it does not.
+
+	__mcpm_outbound_enter_critical() and __mcpm_outbound_leave_critical()
+	handle transitions from CLUSTER_UP to CLUSTER_GOING_DOWN
+	and from there to CLUSTER_DOWN or back to CLUSTER_UP (in
+	the case of an aborted cluster power-down).
+
+	These functions are more complex than the __mcpm_cpu_*()
+	functions due to the extra inter-CPU coordination which
+	is needed for safe transitions at the cluster level.
+
+	A cluster transitions from CLUSTER_DOWN back to CLUSTER_UP via
+	the low-level power-up code in mcpm_head.S.  This
+	typically involves platform-specific setup code,
+	provided by the platform-specific power_up_setup
+	function registered via mcpm_sync_init.
+
+Deep topologies:
+
+	As currently described and implemented, the algorithm does not
+	support CPU topologies involving more than two levels (i.e.,
+	clusters of clusters are not supported).  The algorithm could be
+	extended by replicating the cluster-level states for the
+	additional topological levels, and modifying the transition
+	rules for the intermediate (non-outermost) cluster levels.
+
+
+Colophon
+--------
+
+Originally created and documented by Dave Martin for Linaro Limited, in
+collaboration with Nicolas Pitre and Achin Gupta.
+
+Copyright (C) 2012-2013  Linaro Limited
+Distributed under the terms of Version 2 of the GNU General Public
+License, as defined in linux/COPYING.
diff --git a/Documentation/arm/cluster-pm-race-avoidance.txt b/Documentation/arm/cluster-pm-race-avoidance.txt
deleted file mode 100644
index 750b6fc24af9..000000000000
--- a/Documentation/arm/cluster-pm-race-avoidance.txt
+++ /dev/null
@@ -1,498 +0,0 @@
-Cluster-wide Power-up/power-down race avoidance algorithm
-=========================================================
-
-This file documents the algorithm which is used to coordinate CPU and
-cluster setup and teardown operations and to manage hardware coherency
-controls safely.
-
-The section "Rationale" explains what the algorithm is for and why it is
-needed.  "Basic model" explains general concepts using a simplified view
-of the system.  The other sections explain the actual details of the
-algorithm in use.
-
-
-Rationale
----------
-
-In a system containing multiple CPUs, it is desirable to have the
-ability to turn off individual CPUs when the system is idle, reducing
-power consumption and thermal dissipation.
-
-In a system containing multiple clusters of CPUs, it is also desirable
-to have the ability to turn off entire clusters.
-
-Turning entire clusters off and on is a risky business, because it
-involves performing potentially destructive operations affecting a group
-of independently running CPUs, while the OS continues to run.  This
-means that we need some coordination in order to ensure that critical
-cluster-level operations are only performed when it is truly safe to do
-so.
-
-Simple locking may not be sufficient to solve this problem, because
-mechanisms like Linux spinlocks may rely on coherency mechanisms which
-are not immediately enabled when a cluster powers up.  Since enabling or
-disabling those mechanisms may itself be a non-atomic operation (such as
-writing some hardware registers and invalidating large caches), other
-methods of coordination are required in order to guarantee safe
-power-down and power-up at the cluster level.
-
-The mechanism presented in this document describes a coherent memory
-based protocol for performing the needed coordination.  It aims to be as
-lightweight as possible, while providing the required safety properties.
-
-
-Basic model
------------
-
-Each cluster and CPU is assigned a state, as follows:
-
-	DOWN
-	COMING_UP
-	UP
-	GOING_DOWN
-
-	    +---------> UP ----------+
-	    |                        v
-
-	COMING_UP                GOING_DOWN
-
-	    ^                        |
-	    +--------- DOWN <--------+
-
-
-DOWN:	The CPU or cluster is not coherent, and is either powered off or
-	suspended, or is ready to be powered off or suspended.
-
-COMING_UP: The CPU or cluster has committed to moving to the UP state.
-	It may be part way through the process of initialisation and
-	enabling coherency.
-
-UP:	The CPU or cluster is active and coherent at the hardware
-	level.  A CPU in this state is not necessarily being used
-	actively by the kernel.
-
-GOING_DOWN: The CPU or cluster has committed to moving to the DOWN
-	state.  It may be part way through the process of teardown and
-	coherency exit.
-
-
-Each CPU has one of these states assigned to it at any point in time.
-The CPU states are described in the "CPU state" section, below.
-
-Each cluster is also assigned a state, but it is necessary to split the
-state value into two parts (the "cluster" state and "inbound" state) and
-to introduce additional states in order to avoid races between different
-CPUs in the cluster simultaneously modifying the state.  The cluster-
-level states are described in the "Cluster state" section.
-
-To help distinguish the CPU states from cluster states in this
-discussion, the state names are given a CPU_ prefix for the CPU states,
-and a CLUSTER_ or INBOUND_ prefix for the cluster states.
-
-
-CPU state
----------
-
-In this algorithm, each individual core in a multi-core processor is
-referred to as a "CPU".  CPUs are assumed to be single-threaded:
-therefore, a CPU can only be doing one thing at a single point in time.
-
-This means that CPUs fit the basic model closely.
-
-The algorithm defines the following states for each CPU in the system:
-
-	CPU_DOWN
-	CPU_COMING_UP
-	CPU_UP
-	CPU_GOING_DOWN
-
-	 cluster setup and
-	CPU setup complete          policy decision
-	      +-----------> CPU_UP ------------+
-	      |                                v
-
-	CPU_COMING_UP                   CPU_GOING_DOWN
-
-	      ^                                |
-	      +----------- CPU_DOWN <----------+
-	 policy decision           CPU teardown complete
-	or hardware event
-
-
-The definitions of the four states correspond closely to the states of
-the basic model.
-
-Transitions between states occur as follows.
-
-A trigger event (spontaneous) means that the CPU can transition to the
-next state as a result of making local progress only, with no
-requirement for any external event to happen.
-
-
-CPU_DOWN:
-
-	A CPU reaches the CPU_DOWN state when it is ready for
-	power-down.  On reaching this state, the CPU will typically
-	power itself down or suspend itself, via a WFI instruction or a
-	firmware call.
-
-	Next state:	CPU_COMING_UP
-	Conditions:	none
-
-	Trigger events:
-
-		a) an explicit hardware power-up operation, resulting
-		   from a policy decision on another CPU;
-
-		b) a hardware event, such as an interrupt.
-
-
-CPU_COMING_UP:
-
-	A CPU cannot start participating in hardware coherency until the
-	cluster is set up and coherent.  If the cluster is not ready,
-	then the CPU will wait in the CPU_COMING_UP state until the
-	cluster has been set up.
-
-	Next state:	CPU_UP
-	Conditions:	The CPU's parent cluster must be in CLUSTER_UP.
-	Trigger events:	Transition of the parent cluster to CLUSTER_UP.
-
-	Refer to the "Cluster state" section for a description of the
-	CLUSTER_UP state.
-
-
-CPU_UP:
-	When a CPU reaches the CPU_UP state, it is safe for the CPU to
-	start participating in local coherency.
-
-	This is done by jumping to the kernel's CPU resume code.
-
-	Note that the definition of this state is slightly different
-	from the basic model definition: CPU_UP does not mean that the
-	CPU is coherent yet, but it does mean that it is safe to resume
-	the kernel.  The kernel handles the rest of the resume
-	procedure, so the remaining steps are not visible as part of the
-	race avoidance algorithm.
-
-	The CPU remains in this state until an explicit policy decision
-	is made to shut down or suspend the CPU.
-
-	Next state:	CPU_GOING_DOWN
-	Conditions:	none
-	Trigger events:	explicit policy decision
-
-
-CPU_GOING_DOWN:
-
-	While in this state, the CPU exits coherency, including any
-	operations required to achieve this (such as cleaning data
-	caches).
-
-	Next state:	CPU_DOWN
-	Conditions:	local CPU teardown complete
-	Trigger events:	(spontaneous)
-
-
-Cluster state
--------------
-
-A cluster is a group of connected CPUs with some common resources.
-Because a cluster contains multiple CPUs, it can be doing multiple
-things at the same time.  This has some implications.  In particular, a
-CPU can start up while another CPU is tearing the cluster down.
-
-In this discussion, the "outbound side" is the view of the cluster state
-as seen by a CPU tearing the cluster down.  The "inbound side" is the
-view of the cluster state as seen by a CPU setting the CPU up.
-
-In order to enable safe coordination in such situations, it is important
-that a CPU which is setting up the cluster can advertise its state
-independently of the CPU which is tearing down the cluster.  For this
-reason, the cluster state is split into two parts:
-
-	"cluster" state: The global state of the cluster; or the state
-		on the outbound side:
-
-		CLUSTER_DOWN
-		CLUSTER_UP
-		CLUSTER_GOING_DOWN
-
-	"inbound" state: The state of the cluster on the inbound side.
-
-		INBOUND_NOT_COMING_UP
-		INBOUND_COMING_UP
-
-
-	The different pairings of these states results in six possible
-	states for the cluster as a whole:
-
-	                            CLUSTER_UP
-	          +==========> INBOUND_NOT_COMING_UP -------------+
-	          #                                               |
-	                                                          |
-	     CLUSTER_UP     <----+                                |
-	  INBOUND_COMING_UP      |                                v
-
-	          ^             CLUSTER_GOING_DOWN       CLUSTER_GOING_DOWN
-	          #              INBOUND_COMING_UP <=== INBOUND_NOT_COMING_UP
-
-	    CLUSTER_DOWN         |                                |
-	  INBOUND_COMING_UP <----+                                |
-	                                                          |
-	          ^                                               |
-	          +===========     CLUSTER_DOWN      <------------+
-	                       INBOUND_NOT_COMING_UP
-
-	Transitions -----> can only be made by the outbound CPU, and
-	only involve changes to the "cluster" state.
-
-	Transitions ===##> can only be made by the inbound CPU, and only
-	involve changes to the "inbound" state, except where there is no
-	further transition possible on the outbound side (i.e., the
-	outbound CPU has put the cluster into the CLUSTER_DOWN state).
-
-	The race avoidance algorithm does not provide a way to determine
-	which exact CPUs within the cluster play these roles.  This must
-	be decided in advance by some other means.  Refer to the section
-	"Last man and first man selection" for more explanation.
-
-
-	CLUSTER_DOWN/INBOUND_NOT_COMING_UP is the only state where the
-	cluster can actually be powered down.
-
-	The parallelism of the inbound and outbound CPUs is observed by
-	the existence of two different paths from CLUSTER_GOING_DOWN/
-	INBOUND_NOT_COMING_UP (corresponding to GOING_DOWN in the basic
-	model) to CLUSTER_DOWN/INBOUND_COMING_UP (corresponding to
-	COMING_UP in the basic model).  The second path avoids cluster
-	teardown completely.
-
-	CLUSTER_UP/INBOUND_COMING_UP is equivalent to UP in the basic
-	model.  The final transition to CLUSTER_UP/INBOUND_NOT_COMING_UP
-	is trivial and merely resets the state machine ready for the
-	next cycle.
-
-	Details of the allowable transitions follow.
-
-	The next state in each case is notated
-
-		<cluster state>/<inbound state> (<transitioner>)
-
-	where the <transitioner> is the side on which the transition
-	can occur; either the inbound or the outbound side.
-
-
-CLUSTER_DOWN/INBOUND_NOT_COMING_UP:
-
-	Next state:	CLUSTER_DOWN/INBOUND_COMING_UP (inbound)
-	Conditions:	none
-	Trigger events:
-
-		a) an explicit hardware power-up operation, resulting
-		   from a policy decision on another CPU;
-
-		b) a hardware event, such as an interrupt.
-
-
-CLUSTER_DOWN/INBOUND_COMING_UP:
-
-	In this state, an inbound CPU sets up the cluster, including
-	enabling of hardware coherency at the cluster level and any
-	other operations (such as cache invalidation) which are required
-	in order to achieve this.
-
-	The purpose of this state is to do sufficient cluster-level
-	setup to enable other CPUs in the cluster to enter coherency
-	safely.
-
-	Next state:	CLUSTER_UP/INBOUND_COMING_UP (inbound)
-	Conditions:	cluster-level setup and hardware coherency complete
-	Trigger events:	(spontaneous)
-
-
-CLUSTER_UP/INBOUND_COMING_UP:
-
-	Cluster-level setup is complete and hardware coherency is
-	enabled for the cluster.  Other CPUs in the cluster can safely
-	enter coherency.
-
-	This is a transient state, leading immediately to
-	CLUSTER_UP/INBOUND_NOT_COMING_UP.  All other CPUs on the cluster
-	should consider treat these two states as equivalent.
-
-	Next state:	CLUSTER_UP/INBOUND_NOT_COMING_UP (inbound)
-	Conditions:	none
-	Trigger events:	(spontaneous)
-
-
-CLUSTER_UP/INBOUND_NOT_COMING_UP:
-
-	Cluster-level setup is complete and hardware coherency is
-	enabled for the cluster.  Other CPUs in the cluster can safely
-	enter coherency.
-
-	The cluster will remain in this state until a policy decision is
-	made to power the cluster down.
-
-	Next state:	CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP (outbound)
-	Conditions:	none
-	Trigger events:	policy decision to power down the cluster
-
-
-CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP:
-
-	An outbound CPU is tearing the cluster down.  The selected CPU
-	must wait in this state until all CPUs in the cluster are in the
-	CPU_DOWN state.
-
-	When all CPUs are in the CPU_DOWN state, the cluster can be torn
-	down, for example by cleaning data caches and exiting
-	cluster-level coherency.
-
-	To avoid wasteful unnecessary teardown operations, the outbound
-	should check the inbound cluster state for asynchronous
-	transitions to INBOUND_COMING_UP.  Alternatively, individual
-	CPUs can be checked for entry into CPU_COMING_UP or CPU_UP.
-
-
-	Next states:
-
-	CLUSTER_DOWN/INBOUND_NOT_COMING_UP (outbound)
-		Conditions:	cluster torn down and ready to power off
-		Trigger events:	(spontaneous)
-
-	CLUSTER_GOING_DOWN/INBOUND_COMING_UP (inbound)
-		Conditions:	none
-		Trigger events:
-
-			a) an explicit hardware power-up operation,
-			   resulting from a policy decision on another
-			   CPU;
-
-			b) a hardware event, such as an interrupt.
-
-
-CLUSTER_GOING_DOWN/INBOUND_COMING_UP:
-
-	The cluster is (or was) being torn down, but another CPU has
-	come online in the meantime and is trying to set up the cluster
-	again.
-
-	If the outbound CPU observes this state, it has two choices:
-
-		a) back out of teardown, restoring the cluster to the
-		   CLUSTER_UP state;
-
-		b) finish tearing the cluster down and put the cluster
-		   in the CLUSTER_DOWN state; the inbound CPU will
-		   set up the cluster again from there.
-
-	Choice (a) permits the removal of some latency by avoiding
-	unnecessary teardown and setup operations in situations where
-	the cluster is not really going to be powered down.
-
-
-	Next states:
-
-	CLUSTER_UP/INBOUND_COMING_UP (outbound)
-		Conditions:	cluster-level setup and hardware
-				coherency complete
-		Trigger events:	(spontaneous)
-
-	CLUSTER_DOWN/INBOUND_COMING_UP (outbound)
-		Conditions:	cluster torn down and ready to power off
-		Trigger events:	(spontaneous)
-
-
-Last man and First man selection
---------------------------------
-
-The CPU which performs cluster tear-down operations on the outbound side
-is commonly referred to as the "last man".
-
-The CPU which performs cluster setup on the inbound side is commonly
-referred to as the "first man".
-
-The race avoidance algorithm documented above does not provide a
-mechanism to choose which CPUs should play these roles.
-
-
-Last man:
-
-When shutting down the cluster, all the CPUs involved are initially
-executing Linux and hence coherent.  Therefore, ordinary spinlocks can
-be used to select a last man safely, before the CPUs become
-non-coherent.
-
-
-First man:
-
-Because CPUs may power up asynchronously in response to external wake-up
-events, a dynamic mechanism is needed to make sure that only one CPU
-attempts to play the first man role and do the cluster-level
-initialisation: any other CPUs must wait for this to complete before
-proceeding.
-
-Cluster-level initialisation may involve actions such as configuring
-coherency controls in the bus fabric.
-
-The current implementation in mcpm_head.S uses a separate mutual exclusion
-mechanism to do this arbitration.  This mechanism is documented in
-detail in vlocks.txt.
-
-
-Features and Limitations
-------------------------
-
-Implementation:
-
-	The current ARM-based implementation is split between
-	arch/arm/common/mcpm_head.S (low-level inbound CPU operations) and
-	arch/arm/common/mcpm_entry.c (everything else):
-
-	__mcpm_cpu_going_down() signals the transition of a CPU to the
-		CPU_GOING_DOWN state.
-
-	__mcpm_cpu_down() signals the transition of a CPU to the CPU_DOWN
-		state.
-
-	A CPU transitions to CPU_COMING_UP and then to CPU_UP via the
-		low-level power-up code in mcpm_head.S.  This could
-		involve CPU-specific setup code, but in the current
-		implementation it does not.
-
-	__mcpm_outbound_enter_critical() and __mcpm_outbound_leave_critical()
-		handle transitions from CLUSTER_UP to CLUSTER_GOING_DOWN
-		and from there to CLUSTER_DOWN or back to CLUSTER_UP (in
-		the case of an aborted cluster power-down).
-
-		These functions are more complex than the __mcpm_cpu_*()
-		functions due to the extra inter-CPU coordination which
-		is needed for safe transitions at the cluster level.
-
-	A cluster transitions from CLUSTER_DOWN back to CLUSTER_UP via
-		the low-level power-up code in mcpm_head.S.  This
-		typically involves platform-specific setup code,
-		provided by the platform-specific power_up_setup
-		function registered via mcpm_sync_init.
-
-Deep topologies:
-
-	As currently described and implemented, the algorithm does not
-	support CPU topologies involving more than two levels (i.e.,
-	clusters of clusters are not supported).  The algorithm could be
-	extended by replicating the cluster-level states for the
-	additional topological levels, and modifying the transition
-	rules for the intermediate (non-outermost) cluster levels.
-
-
-Colophon
---------
-
-Originally created and documented by Dave Martin for Linaro Limited, in
-collaboration with Nicolas Pitre and Achin Gupta.
-
-Copyright (C) 2012-2013  Linaro Limited
-Distributed under the terms of Version 2 of the GNU General Public
-License, as defined in linux/COPYING.
diff --git a/Documentation/arm/firmware.rst b/Documentation/arm/firmware.rst
new file mode 100644
index 000000000000..efd844baec1d
--- /dev/null
+++ b/Documentation/arm/firmware.rst
@@ -0,0 +1,72 @@
+==========================================================================
+Interface for registering and calling firmware-specific operations for ARM
+==========================================================================
+
+Written by Tomasz Figa <t.figa@samsung.com>
+
+Some boards are running with secure firmware running in TrustZone secure
+world, which changes the way some things have to be initialized. This makes
+a need to provide an interface for such platforms to specify available firmware
+operations and call them when needed.
+
+Firmware operations can be specified by filling in a struct firmware_ops
+with appropriate callbacks and then registering it with register_firmware_ops()
+function::
+
+	void register_firmware_ops(const struct firmware_ops *ops)
+
+The ops pointer must be non-NULL. More information about struct firmware_ops
+and its members can be found in arch/arm/include/asm/firmware.h header.
+
+There is a default, empty set of operations provided, so there is no need to
+set anything if platform does not require firmware operations.
+
+To call a firmware operation, a helper macro is provided::
+
+	#define call_firmware_op(op, ...)				\
+		((firmware_ops->op) ? firmware_ops->op(__VA_ARGS__) : (-ENOSYS))
+
+the macro checks if the operation is provided and calls it or otherwise returns
+-ENOSYS to signal that given operation is not available (for example, to allow
+fallback to legacy operation).
+
+Example of registering firmware operations::
+
+	/* board file */
+
+	static int platformX_do_idle(void)
+	{
+		/* tell platformX firmware to enter idle */
+		return 0;
+	}
+
+	static int platformX_cpu_boot(int i)
+	{
+		/* tell platformX firmware to boot CPU i */
+		return 0;
+	}
+
+	static const struct firmware_ops platformX_firmware_ops = {
+		.do_idle        = exynos_do_idle,
+		.cpu_boot       = exynos_cpu_boot,
+		/* other operations not available on platformX */
+	};
+
+	/* init_early callback of machine descriptor */
+	static void __init board_init_early(void)
+	{
+		register_firmware_ops(&platformX_firmware_ops);
+	}
+
+Example of using a firmware operation::
+
+	/* some platform code, e.g. SMP initialization */
+
+	__raw_writel(__pa_symbol(exynos4_secondary_startup),
+		CPU1_BOOT_REG);
+
+	/* Call Exynos specific smc call */
+	if (call_firmware_op(cpu_boot, cpu) == -ENOSYS)
+		cpu_boot_legacy(...); /* Try legacy way */
+
+	gic_raise_softirq(cpumask_of(cpu), 1);
diff --git a/Documentation/arm/firmware.txt b/Documentation/arm/firmware.txt
deleted file mode 100644
index 7f175dbb427e..000000000000
--- a/Documentation/arm/firmware.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-Interface for registering and calling firmware-specific operations for ARM.
-----
-Written by Tomasz Figa <t.figa@samsung.com>
-
-Some boards are running with secure firmware running in TrustZone secure
-world, which changes the way some things have to be initialized. This makes
-a need to provide an interface for such platforms to specify available firmware
-operations and call them when needed.
-
-Firmware operations can be specified by filling in a struct firmware_ops
-with appropriate callbacks and then registering it with register_firmware_ops()
-function.
-
-	void register_firmware_ops(const struct firmware_ops *ops)
-
-The ops pointer must be non-NULL. More information about struct firmware_ops
-and its members can be found in arch/arm/include/asm/firmware.h header.
-
-There is a default, empty set of operations provided, so there is no need to
-set anything if platform does not require firmware operations.
-
-To call a firmware operation, a helper macro is provided
-
-	#define call_firmware_op(op, ...)				\
-		((firmware_ops->op) ? firmware_ops->op(__VA_ARGS__) : (-ENOSYS))
-
-the macro checks if the operation is provided and calls it or otherwise returns
--ENOSYS to signal that given operation is not available (for example, to allow
-fallback to legacy operation).
-
-Example of registering firmware operations:
-
-	/* board file */
-
-	static int platformX_do_idle(void)
-	{
-		/* tell platformX firmware to enter idle */
-		return 0;
-	}
-
-	static int platformX_cpu_boot(int i)
-	{
-		/* tell platformX firmware to boot CPU i */
-		return 0;
-	}
-
-	static const struct firmware_ops platformX_firmware_ops = {
-		.do_idle        = exynos_do_idle,
-		.cpu_boot       = exynos_cpu_boot,
-		/* other operations not available on platformX */
-	};
-
-	/* init_early callback of machine descriptor */
-	static void __init board_init_early(void)
-	{
-		register_firmware_ops(&platformX_firmware_ops);
-	}
-
-Example of using a firmware operation:
-
-	/* some platform code, e.g. SMP initialization */
-
-	__raw_writel(__pa_symbol(exynos4_secondary_startup),
-		CPU1_BOOT_REG);
-
-	/* Call Exynos specific smc call */
-	if (call_firmware_op(cpu_boot, cpu) == -ENOSYS)
-		cpu_boot_legacy(...); /* Try legacy way */
-
-	gic_raise_softirq(cpumask_of(cpu), 1);
diff --git a/Documentation/arm/index.rst b/Documentation/arm/index.rst
new file mode 100644
index 000000000000..bd316d1a1802
--- /dev/null
+++ b/Documentation/arm/index.rst
@@ -0,0 +1,80 @@
+﻿:orphan:
+
+================
+ARM Architecture
+================
+
+.. toctree::
+   :maxdepth: 1
+
+   arm
+   booting
+   cluster-pm-race-avoidance
+   firmware
+   interrupts
+   kernel_mode_neon
+   kernel_user_helpers
+   memory
+   mem_alignment
+   tcm
+   setup
+   swp_emulation
+   uefi
+   vlocks
+   porting
+
+SoC-specific documents
+======================
+
+.. toctree::
+   :maxdepth: 1
+
+   ixp4xx
+
+   marvel
+   microchip
+
+   netwinder
+   nwfpe/index
+
+   keystone/overview
+   keystone/knav-qmss
+
+   omap/index
+
+   pxa/mfp
+
+
+   sa1100/index
+
+   stm32/stm32f746-overview
+   stm32/overview
+   stm32/stm32h743-overview
+   stm32/stm32f769-overview
+   stm32/stm32f429-overview
+   stm32/stm32mp157-overview
+
+   sunxi
+
+   samsung/index
+   samsung-s3c24xx/index
+
+   sunxi/clocks
+
+   spear/overview
+
+   sti/stih416-overview
+   sti/stih407-overview
+   sti/stih418-overview
+   sti/overview
+   sti/stih415-overview
+
+   vfp/release-notes
+
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/arm/interrupts.rst b/Documentation/arm/interrupts.rst
new file mode 100644
index 000000000000..2ae70e0e9732
--- /dev/null
+++ b/Documentation/arm/interrupts.rst
@@ -0,0 +1,169 @@
+==========
+Interrupts
+==========
+
+2.5.2-rmk5:
+  This is the first kernel that contains a major shake up of some of the
+  major architecture-specific subsystems.
+
+Firstly, it contains some pretty major changes to the way we handle the
+MMU TLB.  Each MMU TLB variant is now handled completely separately -
+we have TLB v3, TLB v4 (without write buffer), TLB v4 (with write buffer),
+and finally TLB v4 (with write buffer, with I TLB invalidate entry).
+There is more assembly code inside each of these functions, mainly to
+allow more flexible TLB handling for the future.
+
+Secondly, the IRQ subsystem.
+
+The 2.5 kernels will be having major changes to the way IRQs are handled.
+Unfortunately, this means that machine types that touch the irq_desc[]
+array (basically all machine types) will break, and this means every
+machine type that we currently have.
+
+Lets take an example.  On the Assabet with Neponset, we have::
+
+                  GPIO25                 IRR:2
+        SA1100 ------------> Neponset -----------> SA1111
+                                         IIR:1
+                                      -----------> USAR
+                                         IIR:0
+                                      -----------> SMC9196
+
+The way stuff currently works, all SA1111 interrupts are mutually
+exclusive of each other - if you're processing one interrupt from the
+SA1111 and another comes in, you have to wait for that interrupt to
+finish processing before you can service the new interrupt.  Eg, an
+IDE PIO-based interrupt on the SA1111 excludes all other SA1111 and
+SMC9196 interrupts until it has finished transferring its multi-sector
+data, which can be a long time.  Note also that since we loop in the
+SA1111 IRQ handler, SA1111 IRQs can hold off SMC9196 IRQs indefinitely.
+
+
+The new approach brings several new ideas...
+
+We introduce the concept of a "parent" and a "child".  For example,
+to the Neponset handler, the "parent" is GPIO25, and the "children"d
+are SA1111, SMC9196 and USAR.
+
+We also bring the idea of an IRQ "chip" (mainly to reduce the size of
+the irqdesc array).  This doesn't have to be a real "IC"; indeed the
+SA11x0 IRQs are handled by two separate "chip" structures, one for
+GPIO0-10, and another for all the rest.  It is just a container for
+the various operations (maybe this'll change to a better name).
+This structure has the following operations::
+
+  struct irqchip {
+          /*
+           * Acknowledge the IRQ.
+           * If this is a level-based IRQ, then it is expected to mask the IRQ
+           * as well.
+           */
+          void (*ack)(unsigned int irq);
+          /*
+           * Mask the IRQ in hardware.
+           */
+          void (*mask)(unsigned int irq);
+          /*
+           * Unmask the IRQ in hardware.
+           */
+          void (*unmask)(unsigned int irq);
+          /*
+           * Re-run the IRQ
+           */
+          void (*rerun)(unsigned int irq);
+          /*
+           * Set the type of the IRQ.
+           */
+          int (*type)(unsigned int irq, unsigned int, type);
+  };
+
+ack
+       - required.  May be the same function as mask for IRQs
+         handled by do_level_IRQ.
+mask
+       - required.
+unmask
+       - required.
+rerun
+       - optional.  Not required if you're using do_level_IRQ for all
+         IRQs that use this 'irqchip'.  Generally expected to re-trigger
+         the hardware IRQ if possible.  If not, may call the handler
+	 directly.
+type
+       - optional.  If you don't support changing the type of an IRQ,
+         it should be null so people can detect if they are unable to
+         set the IRQ type.
+
+For each IRQ, we keep the following information:
+
+        - "disable" depth (number of disable_irq()s without enable_irq()s)
+        - flags indicating what we can do with this IRQ (valid, probe,
+          noautounmask) as before
+        - status of the IRQ (probing, enable, etc)
+        - chip
+        - per-IRQ handler
+        - irqaction structure list
+
+The handler can be one of the 3 standard handlers - "level", "edge" and
+"simple", or your own specific handler if you need to do something special.
+
+The "level" handler is what we currently have - its pretty simple.
+"edge" knows about the brokenness of such IRQ implementations - that you
+need to leave the hardware IRQ enabled while processing it, and queueing
+further IRQ events should the IRQ happen again while processing.  The
+"simple" handler is very basic, and does not perform any hardware
+manipulation, nor state tracking.  This is useful for things like the
+SMC9196 and USAR above.
+
+So, what's changed?
+===================
+
+1. Machine implementations must not write to the irqdesc array.
+
+2. New functions to manipulate the irqdesc array.  The first 4 are expected
+   to be useful only to machine specific code.  The last is recommended to
+   only be used by machine specific code, but may be used in drivers if
+   absolutely necessary.
+
+        set_irq_chip(irq,chip)
+                Set the mask/unmask methods for handling this IRQ
+
+        set_irq_handler(irq,handler)
+                Set the handler for this IRQ (level, edge, simple)
+
+        set_irq_chained_handler(irq,handler)
+                Set a "chained" handler for this IRQ - automatically
+                enables this IRQ (eg, Neponset and SA1111 handlers).
+
+        set_irq_flags(irq,flags)
+                Set the valid/probe/noautoenable flags.
+
+        set_irq_type(irq,type)
+                Set active the IRQ edge(s)/level.  This replaces the
+                SA1111 INTPOL manipulation, and the set_GPIO_IRQ_edge()
+                function.  Type should be one of IRQ_TYPE_xxx defined in
+		<linux/irq.h>
+
+3. set_GPIO_IRQ_edge() is obsolete, and should be replaced by set_irq_type.
+
+4. Direct access to SA1111 INTPOL is deprecated.  Use set_irq_type instead.
+
+5. A handler is expected to perform any necessary acknowledgement of the
+   parent IRQ via the correct chip specific function.  For instance, if
+   the SA1111 is directly connected to a SA1110 GPIO, then you should
+   acknowledge the SA1110 IRQ each time you re-read the SA1111 IRQ status.
+
+6. For any child which doesn't have its own IRQ enable/disable controls
+   (eg, SMC9196), the handler must mask or acknowledge the parent IRQ
+   while the child handler is called, and the child handler should be the
+   "simple" handler (not "edge" nor "level").  After the handler completes,
+   the parent IRQ should be unmasked, and the status of all children must
+   be re-checked for pending events.  (see the Neponset IRQ handler for
+   details).
+
+7. fixup_irq() is gone, as is `arch/arm/mach-*/include/mach/irq.h`
+
+Please note that this will not solve all problems - some of them are
+hardware based.  Mixing level-based and edge-based IRQs on the same
+parent signal (eg neponset) is one such area where a software based
+solution can't provide the full answer to low IRQ latency.
diff --git a/Documentation/arm/ixp4xx.rst b/Documentation/arm/ixp4xx.rst
new file mode 100644
index 000000000000..a57235616294
--- /dev/null
+++ b/Documentation/arm/ixp4xx.rst
@@ -0,0 +1,173 @@
+===========================================================
+Release Notes for Linux on Intel's IXP4xx Network Processor
+===========================================================
+
+Maintained by Deepak Saxena <dsaxena@plexity.net>
+-------------------------------------------------------------------------
+
+1. Overview
+
+Intel's IXP4xx network processor is a highly integrated SOC that
+is targeted for network applications, though it has become popular
+in industrial control and other areas due to low cost and power
+consumption. The IXP4xx family currently consists of several processors
+that support different network offload functions such as encryption,
+routing, firewalling, etc. The IXP46x family is an updated version which
+supports faster speeds, new memory and flash configurations, and more
+integration such as an on-chip I2C controller.
+
+For more information on the various versions of the CPU, see:
+
+   http://developer.intel.com/design/network/products/npfamily/ixp4xx.htm
+
+Intel also made the IXCP1100 CPU for sometime which is an IXP4xx
+stripped of much of the network intelligence.
+
+2. Linux Support
+
+Linux currently supports the following features on the IXP4xx chips:
+
+- Dual serial ports
+- PCI interface
+- Flash access (MTD/JFFS)
+- I2C through GPIO on IXP42x
+- GPIO for input/output/interrupts
+  See arch/arm/mach-ixp4xx/include/mach/platform.h for access functions.
+- Timers (watchdog, OS)
+
+The following components of the chips are not supported by Linux and
+require the use of Intel's proprietary CSR software:
+
+- USB device interface
+- Network interfaces (HSS, Utopia, NPEs, etc)
+- Network offload functionality
+
+If you need to use any of the above, you need to download Intel's
+software from:
+
+   http://developer.intel.com/design/network/products/npfamily/ixp425.htm
+
+DO NOT POST QUESTIONS TO THE LINUX MAILING LISTS REGARDING THE PROPRIETARY
+SOFTWARE.
+
+There are several websites that provide directions/pointers on using
+Intel's software:
+
+   - http://sourceforge.net/projects/ixp4xx-osdg/
+     Open Source Developer's Guide for using uClinux and the Intel libraries
+
+   - http://gatewaymaker.sourceforge.net/
+     Simple one page summary of building a gateway using an IXP425 and Linux
+
+   - http://ixp425.sourceforge.net/
+     ATM device driver for IXP425 that relies on Intel's libraries
+
+3. Known Issues/Limitations
+
+3a. Limited inbound PCI window
+
+The IXP4xx family allows for up to 256MB of memory but the PCI interface
+can only expose 64MB of that memory to the PCI bus. This means that if
+you are running with > 64MB, all PCI buffers outside of the accessible
+range will be bounced using the routines in arch/arm/common/dmabounce.c.
+
+3b. Limited outbound PCI window
+
+IXP4xx provides two methods of accessing PCI memory space:
+
+1) A direct mapped window from 0x48000000 to 0x4bffffff (64MB).
+   To access PCI via this space, we simply ioremap() the BAR
+   into the kernel and we can use the standard read[bwl]/write[bwl]
+   macros. This is the preffered method due to speed but it
+   limits the system to just 64MB of PCI memory. This can be
+   problamatic if using video cards and other memory-heavy devices.
+
+2) If > 64MB of memory space is required, the IXP4xx can be
+   configured to use indirect registers to access PCI This allows
+   for up to 128MB (0x48000000 to 0x4fffffff) of memory on the bus.
+   The disadvantage of this is that every PCI access requires
+   three local register accesses plus a spinlock, but in some
+   cases the performance hit is acceptable. In addition, you cannot
+   mmap() PCI devices in this case due to the indirect nature
+   of the PCI window.
+
+By default, the direct method is used for performance reasons. If
+you need more PCI memory, enable the IXP4XX_INDIRECT_PCI config option.
+
+3c. GPIO as Interrupts
+
+Currently the code only handles level-sensitive GPIO interrupts
+
+4. Supported platforms
+
+ADI Engineering Coyote Gateway Reference Platform
+http://www.adiengineering.com/productsCoyote.html
+
+   The ADI Coyote platform is reference design for those building
+   small residential/office gateways. One NPE is connected to a 10/100
+   interface, one to 4-port 10/100 switch, and the third to and ADSL
+   interface. In addition, it also supports to POTs interfaces connected
+   via SLICs. Note that those are not supported by Linux ATM. Finally,
+   the platform has two mini-PCI slots used for 802.11[bga] cards.
+   Finally, there is an IDE port hanging off the expansion bus.
+
+Gateworks Avila Network Platform
+http://www.gateworks.com/support/overview.php
+
+   The Avila platform is basically and IXDP425 with the 4 PCI slots
+   replaced with mini-PCI slots and a CF IDE interface hanging off
+   the expansion bus.
+
+Intel IXDP425 Development Platform
+http://www.intel.com/design/network/products/npfamily/ixdpg425.htm
+
+   This is Intel's standard reference platform for the IXDP425 and is
+   also known as the Richfield board. It contains 4 PCI slots, 16MB
+   of flash, two 10/100 ports and one ADSL port.
+
+Intel IXDP465 Development Platform
+http://www.intel.com/design/network/products/npfamily/ixdp465.htm
+
+   This is basically an IXDP425 with an IXP465 and 32M of flash instead
+   of just 16.
+
+Intel IXDPG425 Development Platform
+
+   This is basically and ADI Coyote board with a NEC EHCI controller
+   added. One issue with this board is that the mini-PCI slots only
+   have the 3.3v line connected, so you can't use a PCI to mini-PCI
+   adapter with an E100 card. So to NFS root you need to use either
+   the CSR or a WiFi card and a ramdisk that BOOTPs and then does
+   a pivot_root to NFS.
+
+Motorola PrPMC1100 Processor Mezanine Card
+http://www.fountainsys.com
+
+   The PrPMC1100 is based on the IXCP1100 and is meant to plug into
+   and IXP2400/2800 system to act as the system controller. It simply
+   contains a CPU and 16MB of flash on the board and needs to be
+   plugged into a carrier board to function. Currently Linux only
+   supports the Motorola PrPMC carrier board for this platform.
+
+5. TODO LIST
+
+- Add support for Coyote IDE
+- Add support for edge-based GPIO interrupts
+- Add support for CF IDE on expansion bus
+
+6. Thanks
+
+The IXP4xx work has been funded by Intel Corp. and MontaVista Software, Inc.
+
+The following people have contributed patches/comments/etc:
+
+- Lennerty Buytenhek
+- Lutz Jaenicke
+- Justin Mayfield
+- Robert E. Ranslam
+
+[I know I've forgotten others, please email me to be added]
+
+-------------------------------------------------------------------------
+
+Last Update: 01/04/2005
diff --git a/Documentation/arm/kernel_mode_neon.rst b/Documentation/arm/kernel_mode_neon.rst
new file mode 100644
index 000000000000..9bfb71a2a9b9
--- /dev/null
+++ b/Documentation/arm/kernel_mode_neon.rst
@@ -0,0 +1,124 @@
+================
+Kernel mode NEON
+================
+
+TL;DR summary
+-------------
+* Use only NEON instructions, or VFP instructions that don't rely on support
+  code
+* Isolate your NEON code in a separate compilation unit, and compile it with
+  '-march=armv7-a -mfpu=neon -mfloat-abi=softfp'
+* Put kernel_neon_begin() and kernel_neon_end() calls around the calls into your
+  NEON code
+* Don't sleep in your NEON code, and be aware that it will be executed with
+  preemption disabled
+
+
+Introduction
+------------
+It is possible to use NEON instructions (and in some cases, VFP instructions) in
+code that runs in kernel mode. However, for performance reasons, the NEON/VFP
+register file is not preserved and restored at every context switch or taken
+exception like the normal register file is, so some manual intervention is
+required. Furthermore, special care is required for code that may sleep [i.e.,
+may call schedule()], as NEON or VFP instructions will be executed in a
+non-preemptible section for reasons outlined below.
+
+
+Lazy preserve and restore
+-------------------------
+The NEON/VFP register file is managed using lazy preserve (on UP systems) and
+lazy restore (on both SMP and UP systems). This means that the register file is
+kept 'live', and is only preserved and restored when multiple tasks are
+contending for the NEON/VFP unit (or, in the SMP case, when a task migrates to
+another core). Lazy restore is implemented by disabling the NEON/VFP unit after
+every context switch, resulting in a trap when subsequently a NEON/VFP
+instruction is issued, allowing the kernel to step in and perform the restore if
+necessary.
+
+Any use of the NEON/VFP unit in kernel mode should not interfere with this, so
+it is required to do an 'eager' preserve of the NEON/VFP register file, and
+enable the NEON/VFP unit explicitly so no exceptions are generated on first
+subsequent use. This is handled by the function kernel_neon_begin(), which
+should be called before any kernel mode NEON or VFP instructions are issued.
+Likewise, the NEON/VFP unit should be disabled again after use to make sure user
+mode will hit the lazy restore trap upon next use. This is handled by the
+function kernel_neon_end().
+
+
+Interruptions in kernel mode
+----------------------------
+For reasons of performance and simplicity, it was decided that there shall be no
+preserve/restore mechanism for the kernel mode NEON/VFP register contents. This
+implies that interruptions of a kernel mode NEON section can only be allowed if
+they are guaranteed not to touch the NEON/VFP registers. For this reason, the
+following rules and restrictions apply in the kernel:
+* NEON/VFP code is not allowed in interrupt context;
+* NEON/VFP code is not allowed to sleep;
+* NEON/VFP code is executed with preemption disabled.
+
+If latency is a concern, it is possible to put back to back calls to
+kernel_neon_end() and kernel_neon_begin() in places in your code where none of
+the NEON registers are live. (Additional calls to kernel_neon_begin() should be
+reasonably cheap if no context switch occurred in the meantime)
+
+
+VFP and support code
+--------------------
+Earlier versions of VFP (prior to version 3) rely on software support for things
+like IEEE-754 compliant underflow handling etc. When the VFP unit needs such
+software assistance, it signals the kernel by raising an undefined instruction
+exception. The kernel responds by inspecting the VFP control registers and the
+current instruction and arguments, and emulates the instruction in software.
+
+Such software assistance is currently not implemented for VFP instructions
+executed in kernel mode. If such a condition is encountered, the kernel will
+fail and generate an OOPS.
+
+
+Separating NEON code from ordinary code
+---------------------------------------
+The compiler is not aware of the special significance of kernel_neon_begin() and
+kernel_neon_end(), i.e., that it is only allowed to issue NEON/VFP instructions
+between calls to these respective functions. Furthermore, GCC may generate NEON
+instructions of its own at -O3 level if -mfpu=neon is selected, and even if the
+kernel is currently compiled at -O2, future changes may result in NEON/VFP
+instructions appearing in unexpected places if no special care is taken.
+
+Therefore, the recommended and only supported way of using NEON/VFP in the
+kernel is by adhering to the following rules:
+
+* isolate the NEON code in a separate compilation unit and compile it with
+  '-march=armv7-a -mfpu=neon -mfloat-abi=softfp';
+* issue the calls to kernel_neon_begin(), kernel_neon_end() as well as the calls
+  into the unit containing the NEON code from a compilation unit which is *not*
+  built with the GCC flag '-mfpu=neon' set.
+
+As the kernel is compiled with '-msoft-float', the above will guarantee that
+both NEON and VFP instructions will only ever appear in designated compilation
+units at any optimization level.
+
+
+NEON assembler
+--------------
+NEON assembler is supported with no additional caveats as long as the rules
+above are followed.
+
+
+NEON code generated by GCC
+--------------------------
+The GCC option -ftree-vectorize (implied by -O3) tries to exploit implicit
+parallelism, and generates NEON code from ordinary C source code. This is fully
+supported as long as the rules above are followed.
+
+
+NEON intrinsics
+---------------
+NEON intrinsics are also supported. However, as code using NEON intrinsics
+relies on the GCC header <arm_neon.h>, (which #includes <stdint.h>), you should
+observe the following in addition to the rules above:
+
+* Compile the unit containing the NEON intrinsics with '-ffreestanding' so GCC
+  uses its builtin version of <stdint.h> (this is a C99 header which the kernel
+  does not supply);
+* Include <arm_neon.h> last, or at least after <linux/types.h>
diff --git a/Documentation/arm/kernel_mode_neon.txt b/Documentation/arm/kernel_mode_neon.txt
deleted file mode 100644
index b9e060c5b61e..000000000000
--- a/Documentation/arm/kernel_mode_neon.txt
+++ /dev/null
@@ -1,121 +0,0 @@
-Kernel mode NEON
-================
-
-TL;DR summary
--------------
-* Use only NEON instructions, or VFP instructions that don't rely on support
-  code
-* Isolate your NEON code in a separate compilation unit, and compile it with
-  '-march=armv7-a -mfpu=neon -mfloat-abi=softfp'
-* Put kernel_neon_begin() and kernel_neon_end() calls around the calls into your
-  NEON code
-* Don't sleep in your NEON code, and be aware that it will be executed with
-  preemption disabled
-
-
-Introduction
-------------
-It is possible to use NEON instructions (and in some cases, VFP instructions) in
-code that runs in kernel mode. However, for performance reasons, the NEON/VFP
-register file is not preserved and restored at every context switch or taken
-exception like the normal register file is, so some manual intervention is
-required. Furthermore, special care is required for code that may sleep [i.e.,
-may call schedule()], as NEON or VFP instructions will be executed in a
-non-preemptible section for reasons outlined below.
-
-
-Lazy preserve and restore
--------------------------
-The NEON/VFP register file is managed using lazy preserve (on UP systems) and
-lazy restore (on both SMP and UP systems). This means that the register file is
-kept 'live', and is only preserved and restored when multiple tasks are
-contending for the NEON/VFP unit (or, in the SMP case, when a task migrates to
-another core). Lazy restore is implemented by disabling the NEON/VFP unit after
-every context switch, resulting in a trap when subsequently a NEON/VFP
-instruction is issued, allowing the kernel to step in and perform the restore if
-necessary.
-
-Any use of the NEON/VFP unit in kernel mode should not interfere with this, so
-it is required to do an 'eager' preserve of the NEON/VFP register file, and
-enable the NEON/VFP unit explicitly so no exceptions are generated on first
-subsequent use. This is handled by the function kernel_neon_begin(), which
-should be called before any kernel mode NEON or VFP instructions are issued.
-Likewise, the NEON/VFP unit should be disabled again after use to make sure user
-mode will hit the lazy restore trap upon next use. This is handled by the
-function kernel_neon_end().
-
-
-Interruptions in kernel mode
-----------------------------
-For reasons of performance and simplicity, it was decided that there shall be no
-preserve/restore mechanism for the kernel mode NEON/VFP register contents. This
-implies that interruptions of a kernel mode NEON section can only be allowed if
-they are guaranteed not to touch the NEON/VFP registers. For this reason, the
-following rules and restrictions apply in the kernel:
-* NEON/VFP code is not allowed in interrupt context;
-* NEON/VFP code is not allowed to sleep;
-* NEON/VFP code is executed with preemption disabled.
-
-If latency is a concern, it is possible to put back to back calls to
-kernel_neon_end() and kernel_neon_begin() in places in your code where none of
-the NEON registers are live. (Additional calls to kernel_neon_begin() should be
-reasonably cheap if no context switch occurred in the meantime)
-
-
-VFP and support code
---------------------
-Earlier versions of VFP (prior to version 3) rely on software support for things
-like IEEE-754 compliant underflow handling etc. When the VFP unit needs such
-software assistance, it signals the kernel by raising an undefined instruction
-exception. The kernel responds by inspecting the VFP control registers and the
-current instruction and arguments, and emulates the instruction in software.
-
-Such software assistance is currently not implemented for VFP instructions
-executed in kernel mode. If such a condition is encountered, the kernel will
-fail and generate an OOPS.
-
-
-Separating NEON code from ordinary code
----------------------------------------
-The compiler is not aware of the special significance of kernel_neon_begin() and
-kernel_neon_end(), i.e., that it is only allowed to issue NEON/VFP instructions
-between calls to these respective functions. Furthermore, GCC may generate NEON
-instructions of its own at -O3 level if -mfpu=neon is selected, and even if the
-kernel is currently compiled at -O2, future changes may result in NEON/VFP
-instructions appearing in unexpected places if no special care is taken.
-
-Therefore, the recommended and only supported way of using NEON/VFP in the
-kernel is by adhering to the following rules:
-* isolate the NEON code in a separate compilation unit and compile it with
-  '-march=armv7-a -mfpu=neon -mfloat-abi=softfp';
-* issue the calls to kernel_neon_begin(), kernel_neon_end() as well as the calls
-  into the unit containing the NEON code from a compilation unit which is *not*
-  built with the GCC flag '-mfpu=neon' set.
-
-As the kernel is compiled with '-msoft-float', the above will guarantee that
-both NEON and VFP instructions will only ever appear in designated compilation
-units at any optimization level.
-
-
-NEON assembler
---------------
-NEON assembler is supported with no additional caveats as long as the rules
-above are followed.
-
-
-NEON code generated by GCC
---------------------------
-The GCC option -ftree-vectorize (implied by -O3) tries to exploit implicit
-parallelism, and generates NEON code from ordinary C source code. This is fully
-supported as long as the rules above are followed.
-
-
-NEON intrinsics
----------------
-NEON intrinsics are also supported. However, as code using NEON intrinsics
-relies on the GCC header <arm_neon.h>, (which #includes <stdint.h>), you should
-observe the following in addition to the rules above:
-* Compile the unit containing the NEON intrinsics with '-ffreestanding' so GCC
-  uses its builtin version of <stdint.h> (this is a C99 header which the kernel
-  does not supply);
-* Include <arm_neon.h> last, or at least after <linux/types.h>
diff --git a/Documentation/arm/kernel_user_helpers.rst b/Documentation/arm/kernel_user_helpers.rst
new file mode 100644
index 000000000000..eb6f3d916622
--- /dev/null
+++ b/Documentation/arm/kernel_user_helpers.rst
@@ -0,0 +1,268 @@
+============================
+Kernel-provided User Helpers
+============================
+
+These are segment of kernel provided user code reachable from user space
+at a fixed address in kernel memory.  This is used to provide user space
+with some operations which require kernel help because of unimplemented
+native feature and/or instructions in many ARM CPUs. The idea is for this
+code to be executed directly in user mode for best efficiency but which is
+too intimate with the kernel counter part to be left to user libraries.
+In fact this code might even differ from one CPU to another depending on
+the available instruction set, or whether it is a SMP systems. In other
+words, the kernel reserves the right to change this code as needed without
+warning. Only the entry points and their results as documented here are
+guaranteed to be stable.
+
+This is different from (but doesn't preclude) a full blown VDSO
+implementation, however a VDSO would prevent some assembly tricks with
+constants that allows for efficient branching to those code segments. And
+since those code segments only use a few cycles before returning to user
+code, the overhead of a VDSO indirect far call would add a measurable
+overhead to such minimalistic operations.
+
+User space is expected to bypass those helpers and implement those things
+inline (either in the code emitted directly by the compiler, or part of
+the implementation of a library call) when optimizing for a recent enough
+processor that has the necessary native support, but only if resulting
+binaries are already to be incompatible with earlier ARM processors due to
+usage of similar native instructions for other things.  In other words
+don't make binaries unable to run on earlier processors just for the sake
+of not using these kernel helpers if your compiled code is not going to
+use new instructions for other purpose.
+
+New helpers may be added over time, so an older kernel may be missing some
+helpers present in a newer kernel.  For this reason, programs must check
+the value of __kuser_helper_version (see below) before assuming that it is
+safe to call any particular helper.  This check should ideally be
+performed only once at process startup time, and execution aborted early
+if the required helpers are not provided by the kernel version that
+process is running on.
+
+kuser_helper_version
+--------------------
+
+Location:	0xffff0ffc
+
+Reference declaration::
+
+  extern int32_t __kuser_helper_version;
+
+Definition:
+
+  This field contains the number of helpers being implemented by the
+  running kernel.  User space may read this to determine the availability
+  of a particular helper.
+
+Usage example::
+
+  #define __kuser_helper_version (*(int32_t *)0xffff0ffc)
+
+  void check_kuser_version(void)
+  {
+	if (__kuser_helper_version < 2) {
+		fprintf(stderr, "can't do atomic operations, kernel too old\n");
+		abort();
+	}
+  }
+
+Notes:
+
+  User space may assume that the value of this field never changes
+  during the lifetime of any single process.  This means that this
+  field can be read once during the initialisation of a library or
+  startup phase of a program.
+
+kuser_get_tls
+-------------
+
+Location:	0xffff0fe0
+
+Reference prototype::
+
+  void * __kuser_get_tls(void);
+
+Input:
+
+  lr = return address
+
+Output:
+
+  r0 = TLS value
+
+Clobbered registers:
+
+  none
+
+Definition:
+
+  Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
+
+Usage example::
+
+  typedef void * (__kuser_get_tls_t)(void);
+  #define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
+
+  void foo()
+  {
+	void *tls = __kuser_get_tls();
+	printf("TLS = %p\n", tls);
+  }
+
+Notes:
+
+  - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12).
+
+kuser_cmpxchg
+-------------
+
+Location:	0xffff0fc0
+
+Reference prototype::
+
+  int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
+
+Input:
+
+  r0 = oldval
+  r1 = newval
+  r2 = ptr
+  lr = return address
+
+Output:
+
+  r0 = success code (zero or non-zero)
+  C flag = set if r0 == 0, clear if r0 != 0
+
+Clobbered registers:
+
+  r3, ip, flags
+
+Definition:
+
+  Atomically store newval in `*ptr` only if `*ptr` is equal to oldval.
+  Return zero if `*ptr` was changed or non-zero if no exchange happened.
+  The C flag is also set if `*ptr` was changed to allow for assembly
+  optimization in the calling code.
+
+Usage example::
+
+  typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
+  #define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
+
+  int atomic_add(volatile int *ptr, int val)
+  {
+	int old, new;
+
+	do {
+		old = *ptr;
+		new = old + val;
+	} while(__kuser_cmpxchg(old, new, ptr));
+
+	return new;
+  }
+
+Notes:
+
+  - This routine already includes memory barriers as needed.
+
+  - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12).
+
+kuser_memory_barrier
+--------------------
+
+Location:	0xffff0fa0
+
+Reference prototype::
+
+  void __kuser_memory_barrier(void);
+
+Input:
+
+  lr = return address
+
+Output:
+
+  none
+
+Clobbered registers:
+
+  none
+
+Definition:
+
+  Apply any needed memory barrier to preserve consistency with data modified
+  manually and __kuser_cmpxchg usage.
+
+Usage example::
+
+  typedef void (__kuser_dmb_t)(void);
+  #define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
+
+Notes:
+
+  - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15).
+
+kuser_cmpxchg64
+---------------
+
+Location:	0xffff0f60
+
+Reference prototype::
+
+  int __kuser_cmpxchg64(const int64_t *oldval,
+                        const int64_t *newval,
+                        volatile int64_t *ptr);
+
+Input:
+
+  r0 = pointer to oldval
+  r1 = pointer to newval
+  r2 = pointer to target value
+  lr = return address
+
+Output:
+
+  r0 = success code (zero or non-zero)
+  C flag = set if r0 == 0, clear if r0 != 0
+
+Clobbered registers:
+
+  r3, lr, flags
+
+Definition:
+
+  Atomically store the 64-bit value pointed by `*newval` in `*ptr` only if `*ptr`
+  is equal to the 64-bit value pointed by `*oldval`.  Return zero if `*ptr` was
+  changed or non-zero if no exchange happened.
+
+  The C flag is also set if `*ptr` was changed to allow for assembly
+  optimization in the calling code.
+
+Usage example::
+
+  typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
+                                    const int64_t *newval,
+                                    volatile int64_t *ptr);
+  #define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
+
+  int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
+  {
+	int64_t old, new;
+
+	do {
+		old = *ptr;
+		new = old + val;
+	} while(__kuser_cmpxchg64(&old, &new, ptr));
+
+	return new;
+  }
+
+Notes:
+
+  - This routine already includes memory barriers as needed.
+
+  - Due to the length of this sequence, this spans 2 conventional kuser
+    "slots", therefore 0xffff0f80 is not used as a valid entry point.
+
+  - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1).
diff --git a/Documentation/arm/kernel_user_helpers.txt b/Documentation/arm/kernel_user_helpers.txt
deleted file mode 100644
index 5673594717cf..000000000000
--- a/Documentation/arm/kernel_user_helpers.txt
+++ /dev/null
@@ -1,267 +0,0 @@
-Kernel-provided User Helpers
-============================
-
-These are segment of kernel provided user code reachable from user space
-at a fixed address in kernel memory.  This is used to provide user space
-with some operations which require kernel help because of unimplemented
-native feature and/or instructions in many ARM CPUs. The idea is for this
-code to be executed directly in user mode for best efficiency but which is
-too intimate with the kernel counter part to be left to user libraries.
-In fact this code might even differ from one CPU to another depending on
-the available instruction set, or whether it is a SMP systems. In other
-words, the kernel reserves the right to change this code as needed without
-warning. Only the entry points and their results as documented here are
-guaranteed to be stable.
-
-This is different from (but doesn't preclude) a full blown VDSO
-implementation, however a VDSO would prevent some assembly tricks with
-constants that allows for efficient branching to those code segments. And
-since those code segments only use a few cycles before returning to user
-code, the overhead of a VDSO indirect far call would add a measurable
-overhead to such minimalistic operations.
-
-User space is expected to bypass those helpers and implement those things
-inline (either in the code emitted directly by the compiler, or part of
-the implementation of a library call) when optimizing for a recent enough
-processor that has the necessary native support, but only if resulting
-binaries are already to be incompatible with earlier ARM processors due to
-usage of similar native instructions for other things.  In other words
-don't make binaries unable to run on earlier processors just for the sake
-of not using these kernel helpers if your compiled code is not going to
-use new instructions for other purpose.
-
-New helpers may be added over time, so an older kernel may be missing some
-helpers present in a newer kernel.  For this reason, programs must check
-the value of __kuser_helper_version (see below) before assuming that it is
-safe to call any particular helper.  This check should ideally be
-performed only once at process startup time, and execution aborted early
-if the required helpers are not provided by the kernel version that
-process is running on.
-
-kuser_helper_version
---------------------
-
-Location:	0xffff0ffc
-
-Reference declaration:
-
-  extern int32_t __kuser_helper_version;
-
-Definition:
-
-  This field contains the number of helpers being implemented by the
-  running kernel.  User space may read this to determine the availability
-  of a particular helper.
-
-Usage example:
-
-#define __kuser_helper_version (*(int32_t *)0xffff0ffc)
-
-void check_kuser_version(void)
-{
-	if (__kuser_helper_version < 2) {
-		fprintf(stderr, "can't do atomic operations, kernel too old\n");
-		abort();
-	}
-}
-
-Notes:
-
-  User space may assume that the value of this field never changes
-  during the lifetime of any single process.  This means that this
-  field can be read once during the initialisation of a library or
-  startup phase of a program.
-
-kuser_get_tls
--------------
-
-Location:	0xffff0fe0
-
-Reference prototype:
-
-  void * __kuser_get_tls(void);
-
-Input:
-
-  lr = return address
-
-Output:
-
-  r0 = TLS value
-
-Clobbered registers:
-
-  none
-
-Definition:
-
-  Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
-
-Usage example:
-
-typedef void * (__kuser_get_tls_t)(void);
-#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
-
-void foo()
-{
-	void *tls = __kuser_get_tls();
-	printf("TLS = %p\n", tls);
-}
-
-Notes:
-
-  - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12).
-
-kuser_cmpxchg
--------------
-
-Location:	0xffff0fc0
-
-Reference prototype:
-
-  int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
-
-Input:
-
-  r0 = oldval
-  r1 = newval
-  r2 = ptr
-  lr = return address
-
-Output:
-
-  r0 = success code (zero or non-zero)
-  C flag = set if r0 == 0, clear if r0 != 0
-
-Clobbered registers:
-
-  r3, ip, flags
-
-Definition:
-
-  Atomically store newval in *ptr only if *ptr is equal to oldval.
-  Return zero if *ptr was changed or non-zero if no exchange happened.
-  The C flag is also set if *ptr was changed to allow for assembly
-  optimization in the calling code.
-
-Usage example:
-
-typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
-#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
-
-int atomic_add(volatile int *ptr, int val)
-{
-	int old, new;
-
-	do {
-		old = *ptr;
-		new = old + val;
-	} while(__kuser_cmpxchg(old, new, ptr));
-
-	return new;
-}
-
-Notes:
-
-  - This routine already includes memory barriers as needed.
-
-  - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12).
-
-kuser_memory_barrier
---------------------
-
-Location:	0xffff0fa0
-
-Reference prototype:
-
-  void __kuser_memory_barrier(void);
-
-Input:
-
-  lr = return address
-
-Output:
-
-  none
-
-Clobbered registers:
-
-  none
-
-Definition:
-
-  Apply any needed memory barrier to preserve consistency with data modified
-  manually and __kuser_cmpxchg usage.
-
-Usage example:
-
-typedef void (__kuser_dmb_t)(void);
-#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
-
-Notes:
-
-  - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15).
-
-kuser_cmpxchg64
----------------
-
-Location:	0xffff0f60
-
-Reference prototype:
-
-  int __kuser_cmpxchg64(const int64_t *oldval,
-                        const int64_t *newval,
-                        volatile int64_t *ptr);
-
-Input:
-
-  r0 = pointer to oldval
-  r1 = pointer to newval
-  r2 = pointer to target value
-  lr = return address
-
-Output:
-
-  r0 = success code (zero or non-zero)
-  C flag = set if r0 == 0, clear if r0 != 0
-
-Clobbered registers:
-
-  r3, lr, flags
-
-Definition:
-
-  Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr
-  is equal to the 64-bit value pointed by *oldval.  Return zero if *ptr was
-  changed or non-zero if no exchange happened.
-
-  The C flag is also set if *ptr was changed to allow for assembly
-  optimization in the calling code.
-
-Usage example:
-
-typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
-                                  const int64_t *newval,
-                                  volatile int64_t *ptr);
-#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
-
-int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
-{
-	int64_t old, new;
-
-	do {
-		old = *ptr;
-		new = old + val;
-	} while(__kuser_cmpxchg64(&old, &new, ptr));
-
-	return new;
-}
-
-Notes:
-
-  - This routine already includes memory barriers as needed.
-
-  - Due to the length of this sequence, this spans 2 conventional kuser
-    "slots", therefore 0xffff0f80 is not used as a valid entry point.
-
-  - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1).
diff --git a/Documentation/arm/keystone/Overview.txt b/Documentation/arm/keystone/Overview.txt
deleted file mode 100644
index 400c0c270d2e..000000000000
--- a/Documentation/arm/keystone/Overview.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-		TI Keystone Linux Overview
-		--------------------------
-
-Introduction
-------------
-Keystone range of SoCs are based on ARM Cortex-A15 MPCore Processors
-and c66x DSP cores. This document describes essential information required
-for users to run Linux on Keystone based EVMs from Texas Instruments.
-
-Following SoCs  & EVMs are currently supported:-
-
------------- K2HK SoC and EVM --------------------------------------------------
-
-a.k.a Keystone 2 Hawking/Kepler SoC
-TCI6636K2H & TCI6636K2K: See documentation at
-	http://www.ti.com/product/tci6638k2k
-	http://www.ti.com/product/tci6638k2h
-
-EVM:
-http://www.advantech.com/Support/TI-EVM/EVMK2HX_sd.aspx
-
------------- K2E SoC and EVM ---------------------------------------------------
-
-a.k.a Keystone 2 Edison SoC
-K2E  -  66AK2E05: See documentation at
-	http://www.ti.com/product/66AK2E05/technicaldocuments
-
-EVM:
-https://www.einfochips.com/index.php/partnerships/texas-instruments/k2e-evm.html
-
------------- K2L SoC and EVM ---------------------------------------------------
-
-a.k.a Keystone 2 Lamarr SoC
-K2L  -  TCI6630K2L: See documentation at
-	http://www.ti.com/product/TCI6630K2L/technicaldocuments
-EVM:
-https://www.einfochips.com/index.php/partnerships/texas-instruments/k2l-evm.html
-
-Configuration
--------------
-
-All of the K2 SoCs/EVMs share a common defconfig, keystone_defconfig and same
-image is used to boot on individual EVMs. The platform configuration is
-specified through DTS. Following are the DTS used:-
-	K2HK EVM : k2hk-evm.dts
-	K2E EVM  : k2e-evm.dts
-	K2L EVM  : k2l-evm.dts
-
-The device tree documentation for the keystone machines are located at
-        Documentation/devicetree/bindings/arm/keystone/keystone.txt
-
-Document Author
----------------
-Murali Karicheri <m-karicheri2@ti.com>
-Copyright 2015 Texas Instruments
diff --git a/Documentation/arm/keystone/knav-qmss.rst b/Documentation/arm/keystone/knav-qmss.rst
new file mode 100644
index 000000000000..7f7638d80b42
--- /dev/null
+++ b/Documentation/arm/keystone/knav-qmss.rst
@@ -0,0 +1,60 @@
+======================================================================
+Texas Instruments Keystone Navigator Queue Management SubSystem driver
+======================================================================
+
+Driver source code path
+  drivers/soc/ti/knav_qmss.c
+  drivers/soc/ti/knav_qmss_acc.c
+
+The QMSS (Queue Manager Sub System) found on Keystone SOCs is one of
+the main hardware sub system which forms the backbone of the Keystone
+multi-core Navigator. QMSS consist of queue managers, packed-data structure
+processors(PDSP), linking RAM, descriptor pools and infrastructure
+Packet DMA.
+The Queue Manager is a hardware module that is responsible for accelerating
+management of the packet queues. Packets are queued/de-queued by writing or
+reading descriptor address to a particular memory mapped location. The PDSPs
+perform QMSS related functions like accumulation, QoS, or event management.
+Linking RAM registers are used to link the descriptors which are stored in
+descriptor RAM. Descriptor RAM is configurable as internal or external memory.
+The QMSS driver manages the PDSP setups, linking RAM regions,
+queue pool management (allocation, push, pop and notify) and descriptor
+pool management.
+
+knav qmss driver provides a set of APIs to drivers to open/close qmss queues,
+allocate descriptor pools, map the descriptors, push/pop to queues etc. For
+details of the available APIs, please refers to include/linux/soc/ti/knav_qmss.h
+
+DT documentation is available at
+Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
+
+Accumulator QMSS queues using PDSP firmware
+============================================
+The QMSS PDSP firmware support accumulator channel that can monitor a single
+queue or multiple contiguous queues. drivers/soc/ti/knav_qmss_acc.c is the
+driver that interface with the accumulator PDSP. This configures
+accumulator channels defined in DTS (example in DT documentation) to monitor
+1 or 32 queues per channel. More description on the firmware is available in
+CPPI/QMSS Low Level Driver document (docs/CPPI_QMSS_LLD_SDS.pdf) at
+
+	git://git.ti.com/keystone-rtos/qmss-lld.git
+
+k2_qmss_pdsp_acc48_k2_le_1_0_0_9.bin firmware supports upto 48 accumulator
+channels. This firmware is available under ti-keystone folder of
+firmware.git at
+
+   git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
+
+To use copy the firmware image to lib/firmware folder of the initramfs or
+ubifs file system and provide a sym link to k2_qmss_pdsp_acc48_k2_le_1_0_0_9.bin
+in the file system and boot up the kernel. User would see
+
+ "firmware file ks2_qmss_pdsp_acc48.bin downloaded for PDSP"
+
+in the boot up log if loading of firmware to PDSP is successful.
+
+Use of accumulated queues requires the firmware image to be present in the
+file system. The driver doesn't acc queues to the supported queue range if
+PDSP is not running in the SoC. The API call fails if there is a queue open
+request to an acc queue and PDSP is not running. So make sure to copy firmware
+to file system before using these queue types.
diff --git a/Documentation/arm/keystone/knav-qmss.txt b/Documentation/arm/keystone/knav-qmss.txt
deleted file mode 100644
index fcdb9fd5f53a..000000000000
--- a/Documentation/arm/keystone/knav-qmss.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-* Texas Instruments Keystone Navigator Queue Management SubSystem driver
-
-Driver source code path
-  drivers/soc/ti/knav_qmss.c
-  drivers/soc/ti/knav_qmss_acc.c
-
-The QMSS (Queue Manager Sub System) found on Keystone SOCs is one of
-the main hardware sub system which forms the backbone of the Keystone
-multi-core Navigator. QMSS consist of queue managers, packed-data structure
-processors(PDSP), linking RAM, descriptor pools and infrastructure
-Packet DMA.
-The Queue Manager is a hardware module that is responsible for accelerating
-management of the packet queues. Packets are queued/de-queued by writing or
-reading descriptor address to a particular memory mapped location. The PDSPs
-perform QMSS related functions like accumulation, QoS, or event management.
-Linking RAM registers are used to link the descriptors which are stored in
-descriptor RAM. Descriptor RAM is configurable as internal or external memory.
-The QMSS driver manages the PDSP setups, linking RAM regions,
-queue pool management (allocation, push, pop and notify) and descriptor
-pool management.
-
-knav qmss driver provides a set of APIs to drivers to open/close qmss queues,
-allocate descriptor pools, map the descriptors, push/pop to queues etc. For
-details of the available APIs, please refers to include/linux/soc/ti/knav_qmss.h
-
-DT documentation is available at
-Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
-
-Accumulator QMSS queues using PDSP firmware
-============================================
-The QMSS PDSP firmware support accumulator channel that can monitor a single
-queue or multiple contiguous queues. drivers/soc/ti/knav_qmss_acc.c is the
-driver that interface with the accumulator PDSP. This configures
-accumulator channels defined in DTS (example in DT documentation) to monitor
-1 or 32 queues per channel. More description on the firmware is available in
-CPPI/QMSS Low Level Driver document (docs/CPPI_QMSS_LLD_SDS.pdf) at
-	git://git.ti.com/keystone-rtos/qmss-lld.git
-
-k2_qmss_pdsp_acc48_k2_le_1_0_0_9.bin firmware supports upto 48 accumulator
-channels. This firmware is available under ti-keystone folder of
-firmware.git at
-   git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
-
-To use copy the firmware image to lib/firmware folder of the initramfs or
-ubifs file system and provide a sym link to k2_qmss_pdsp_acc48_k2_le_1_0_0_9.bin
-in the file system and boot up the kernel. User would see
-
- "firmware file ks2_qmss_pdsp_acc48.bin downloaded for PDSP"
-
-in the boot up log if loading of firmware to PDSP is successful.
-
-Use of accumulated queues requires the firmware image to be present in the
-file system. The driver doesn't acc queues to the supported queue range if
-PDSP is not running in the SoC. The API call fails if there is a queue open
-request to an acc queue and PDSP is not running. So make sure to copy firmware
-to file system before using these queue types.
diff --git a/Documentation/arm/keystone/overview.rst b/Documentation/arm/keystone/overview.rst
new file mode 100644
index 000000000000..cd90298c493c
--- /dev/null
+++ b/Documentation/arm/keystone/overview.rst
@@ -0,0 +1,74 @@
+==========================
+TI Keystone Linux Overview
+==========================
+
+Introduction
+------------
+Keystone range of SoCs are based on ARM Cortex-A15 MPCore Processors
+and c66x DSP cores. This document describes essential information required
+for users to run Linux on Keystone based EVMs from Texas Instruments.
+
+Following SoCs  & EVMs are currently supported:-
+
+K2HK SoC and EVM
+=================
+
+a.k.a Keystone 2 Hawking/Kepler SoC
+TCI6636K2H & TCI6636K2K: See documentation at
+
+	http://www.ti.com/product/tci6638k2k
+	http://www.ti.com/product/tci6638k2h
+
+EVM:
+  http://www.advantech.com/Support/TI-EVM/EVMK2HX_sd.aspx
+
+K2E SoC and EVM
+===============
+
+a.k.a Keystone 2 Edison SoC
+
+K2E  -  66AK2E05:
+
+See documentation at
+
+	http://www.ti.com/product/66AK2E05/technicaldocuments
+
+EVM:
+   https://www.einfochips.com/index.php/partnerships/texas-instruments/k2e-evm.html
+
+K2L SoC and EVM
+===============
+
+a.k.a Keystone 2 Lamarr SoC
+
+K2L  -  TCI6630K2L:
+
+See documentation at
+	http://www.ti.com/product/TCI6630K2L/technicaldocuments
+
+EVM:
+  https://www.einfochips.com/index.php/partnerships/texas-instruments/k2l-evm.html
+
+Configuration
+-------------
+
+All of the K2 SoCs/EVMs share a common defconfig, keystone_defconfig and same
+image is used to boot on individual EVMs. The platform configuration is
+specified through DTS. Following are the DTS used:
+
+	K2HK EVM:
+		k2hk-evm.dts
+	K2E EVM:
+		k2e-evm.dts
+	K2L EVM:
+		k2l-evm.dts
+
+The device tree documentation for the keystone machines are located at
+
+        Documentation/devicetree/bindings/arm/keystone/keystone.txt
+
+Document Author
+---------------
+Murali Karicheri <m-karicheri2@ti.com>
+
+Copyright 2015 Texas Instruments
diff --git a/Documentation/arm/marvel.rst b/Documentation/arm/marvel.rst
new file mode 100644
index 000000000000..16ab2eb085b8
--- /dev/null
+++ b/Documentation/arm/marvel.rst
@@ -0,0 +1,488 @@
+================
+ARM Marvell SoCs
+================
+
+This document lists all the ARM Marvell SoCs that are currently
+supported in mainline by the Linux kernel. As the Marvell families of
+SoCs are large and complex, it is hard to understand where the support
+for a particular SoC is available in the Linux kernel. This document
+tries to help in understanding where those SoCs are supported, and to
+match them with their corresponding public datasheet, when available.
+
+Orion family
+------------
+
+  Flavors:
+        - 88F5082
+        - 88F5181
+        - 88F5181L
+        - 88F5182
+
+               - Datasheet: http://www.embeddedarm.com/documentation/third-party/MV88F5182-datasheet.pdf
+               - Programmer's User Guide: http://www.embeddedarm.com/documentation/third-party/MV88F5182-opensource-manual.pdf
+               - User Manual: http://www.embeddedarm.com/documentation/third-party/MV88F5182-usermanual.pdf
+        - 88F5281
+
+               - Datasheet: http://www.ocmodshop.com/images/reviews/networking/qnap_ts409u/marvel_88f5281_data_sheet.pdf
+        - 88F6183
+  Core:
+	Feroceon 88fr331 (88f51xx) or 88fr531-vd (88f52xx) ARMv5 compatible
+  Linux kernel mach directory:
+	arch/arm/mach-orion5x
+  Linux kernel plat directory:
+	arch/arm/plat-orion
+
+Kirkwood family
+---------------
+
+  Flavors:
+        - 88F6282 a.k.a Armada 300
+
+                - Product Brief  : http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
+        - 88F6283 a.k.a Armada 310
+
+                - Product Brief  : http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
+        - 88F6190
+
+                - Product Brief  : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6190-003_WEB.pdf
+                - Hardware Spec  : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
+                - Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+        - 88F6192
+
+                - Product Brief  : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6192-003_ver1.pdf
+                - Hardware Spec  : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
+                - Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+        - 88F6182
+        - 88F6180
+
+                - Product Brief  : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6180-003_ver1.pdf
+                - Hardware Spec  : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6180_OpenSource.pdf
+                - Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+        - 88F6281
+
+                - Product Brief  : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6281-004_ver1.pdf
+                - Hardware Spec  : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6281_OpenSource.pdf
+                - Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+  Homepage:
+	http://www.marvell.com/embedded-processors/kirkwood/
+  Core:
+	Feroceon 88fr131 ARMv5 compatible
+  Linux kernel mach directory:
+	arch/arm/mach-mvebu
+  Linux kernel plat directory:
+	none
+
+Discovery family
+----------------
+
+  Flavors:
+        - MV78100
+
+                - Product Brief  : http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78100-003_WEB.pdf
+                - Hardware Spec  : http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78100_OpenSource.pdf
+                - Functional Spec: http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
+        - MV78200
+
+                - Product Brief  : http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78200-002_WEB.pdf
+                - Hardware Spec  : http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78200_OpenSource.pdf
+                - Functional Spec: http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
+        - MV76100
+
+                Not supported by the Linux kernel.
+
+  Core:
+	Feroceon 88fr571-vd ARMv5 compatible
+
+  Linux kernel mach directory:
+	arch/arm/mach-mv78xx0
+  Linux kernel plat directory:
+	arch/arm/plat-orion
+
+EBU Armada family
+-----------------
+
+  Armada 370 Flavors:
+        - 88F6710
+        - 88F6707
+        - 88F6W11
+
+    - Product Brief:   http://www.marvell.com/embedded-processors/armada-300/assets/Marvell_ARMADA_370_SoC.pdf
+    - Hardware Spec:   http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-datasheet.pdf
+    - Functional Spec: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-FunctionalSpec-datasheet.pdf
+
+  Core:
+	Sheeva ARMv7 compatible PJ4B
+
+  Armada 375 Flavors:
+	- 88F6720
+
+    - Product Brief: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA_375_SoC-01_product_brief.pdf
+
+  Core:
+	ARM Cortex-A9
+
+  Armada 38x Flavors:
+	- 88F6810	Armada 380
+	- 88F6820 Armada 385
+	- 88F6828 Armada 388
+
+    - Product infos:   http://www.marvell.com/embedded-processors/armada-38x/
+    - Functional Spec: https://marvellcorp.wufoo.com/forms/marvell-armada-38x-functional-specifications/
+
+  Core:
+	ARM Cortex-A9
+
+  Armada 39x Flavors:
+	- 88F6920 Armada 390
+	- 88F6928 Armada 398
+
+    - Product infos: http://www.marvell.com/embedded-processors/armada-39x/
+
+  Core:
+	ARM Cortex-A9
+
+  Armada XP Flavors:
+        - MV78230
+        - MV78260
+        - MV78460
+
+    NOTE:
+	not to be confused with the non-SMP 78xx0 SoCs
+
+    Product Brief:
+	http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf
+
+    Functional Spec:
+	http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf
+
+    - Hardware Specs:
+
+        - http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF
+        - http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF
+        - http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF
+
+  Core:
+	Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP
+
+  Linux kernel mach directory:
+	arch/arm/mach-mvebu
+  Linux kernel plat directory:
+	none
+
+EBU Armada family ARMv8
+-----------------------
+
+  Armada 3710/3720 Flavors:
+	- 88F3710
+	- 88F3720
+
+  Core:
+	ARM Cortex A53 (ARMv8)
+
+  Homepage:
+	http://www.marvell.com/embedded-processors/armada-3700/
+
+  Product Brief:
+	http://www.marvell.com/embedded-processors/assets/PB-88F3700-FNL.pdf
+
+  Device tree files:
+	arch/arm64/boot/dts/marvell/armada-37*
+
+  Armada 7K Flavors:
+	  - 88F7020 (AP806 Dual + one CP110)
+	  - 88F7040 (AP806 Quad + one CP110)
+
+  Core: ARM Cortex A72
+
+  Homepage:
+	http://www.marvell.com/embedded-processors/armada-70xx/
+
+  Product Brief:
+	  - http://www.marvell.com/embedded-processors/assets/Armada7020PB-Jan2016.pdf
+	  - http://www.marvell.com/embedded-processors/assets/Armada7040PB-Jan2016.pdf
+
+  Device tree files:
+	arch/arm64/boot/dts/marvell/armada-70*
+
+  Armada 8K Flavors:
+	- 88F8020 (AP806 Dual + two CP110)
+	- 88F8040 (AP806 Quad + two CP110)
+  Core:
+	ARM Cortex A72
+
+  Homepage:
+	http://www.marvell.com/embedded-processors/armada-80xx/
+
+  Product Brief:
+	  - http://www.marvell.com/embedded-processors/assets/Armada8020PB-Jan2016.pdf
+	  - http://www.marvell.com/embedded-processors/assets/Armada8040PB-Jan2016.pdf
+
+  Device tree files:
+	arch/arm64/boot/dts/marvell/armada-80*
+
+Avanta family
+-------------
+
+  Flavors:
+       - 88F6510
+       - 88F6530P
+       - 88F6550
+       - 88F6560
+
+  Homepage:
+	http://www.marvell.com/broadband/
+
+  Product Brief:
+	http://www.marvell.com/broadband/assets/Marvell_Avanta_88F6510_305_060-001_product_brief.pdf
+
+  No public datasheet available.
+
+  Core:
+	ARMv5 compatible
+
+  Linux kernel mach directory:
+	no code in mainline yet, planned for the future
+  Linux kernel plat directory:
+	no code in mainline yet, planned for the future
+
+Storage family
+--------------
+
+  Armada SP:
+	- 88RC1580
+
+  Product infos:
+	http://www.marvell.com/storage/armada-sp/
+
+  Core:
+	Sheeva ARMv7 comatible Quad-core PJ4C
+
+  (not supported in upstream Linux kernel)
+
+Dove family (application processor)
+-----------------------------------
+
+  Flavors:
+        - 88AP510 a.k.a Armada 510
+
+   Product Brief:
+	http://www.marvell.com/application-processors/armada-500/assets/Marvell_Armada510_SoC.pdf
+
+   Hardware Spec:
+	http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Hardware-Spec.pdf
+
+  Functional Spec:
+	http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Functional-Spec.pdf
+
+  Homepage:
+	http://www.marvell.com/application-processors/armada-500/
+
+  Core:
+	ARMv7 compatible
+
+  Directory:
+	- arch/arm/mach-mvebu (DT enabled platforms)
+        - arch/arm/mach-dove (non-DT enabled platforms)
+
+PXA 2xx/3xx/93x/95x family
+--------------------------
+
+  Flavors:
+        - PXA21x, PXA25x, PXA26x
+             - Application processor only
+             - Core: ARMv5 XScale1 core
+        - PXA270, PXA271, PXA272
+             - Product Brief         : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_pb.pdf
+             - Design guide          : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_design_guide.pdf
+             - Developers manual     : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_dev_man.pdf
+             - Specification         : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_emts.pdf
+             - Specification update  : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_spec_update.pdf
+             - Application processor only
+             - Core: ARMv5 XScale2 core
+        - PXA300, PXA310, PXA320
+             - PXA 300 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA300_PB_R4.pdf
+             - PXA 310 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA310_PB_R4.pdf
+             - PXA 320 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA320_PB_R4.pdf
+             - Design guide          : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Design_Guide.pdf
+             - Developers manual     : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Developers_Manual.zip
+             - Specifications        : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_EMTS.pdf
+             - Specification Update  : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Spec_Update.zip
+             - Reference Manual      : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_TavorP_BootROM_Ref_Manual.pdf
+             - Application processor only
+             - Core: ARMv5 XScale3 core
+        - PXA930, PXA935
+             - Application processor with Communication processor
+             - Core: ARMv5 XScale3 core
+        - PXA955
+             - Application processor with Communication processor
+             - Core: ARMv7 compatible Sheeva PJ4 core
+
+   Comments:
+
+    * This line of SoCs originates from the XScale family developed by
+      Intel and acquired by Marvell in ~2006. The PXA21x, PXA25x,
+      PXA26x, PXA27x, PXA3xx and PXA93x were developed by Intel, while
+      the later PXA95x were developed by Marvell.
+
+    * Due to their XScale origin, these SoCs have virtually nothing in
+      common with the other (Kirkwood, Dove, etc.) families of Marvell
+      SoCs, except with the MMP/MMP2 family of SoCs.
+
+   Linux kernel mach directory:
+	arch/arm/mach-pxa
+   Linux kernel plat directory:
+	arch/arm/plat-pxa
+
+MMP/MMP2/MMP3 family (communication processor)
+----------------------------------------------
+
+   Flavors:
+        - PXA168, a.k.a Armada 168
+             - Homepage             : http://www.marvell.com/application-processors/armada-100/armada-168.jsp
+             - Product brief        : http://www.marvell.com/application-processors/armada-100/assets/pxa_168_pb.pdf
+             - Hardware manual      : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_datasheet.pdf
+             - Software manual      : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_software_manual.pdf
+             - Specification update : http://www.marvell.com/application-processors/armada-100/assets/ARMADA16x_Spec_update.pdf
+             - Boot ROM manual      : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_ref_manual.pdf
+             - App node package     : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_app_note_package.pdf
+             - Application processor only
+             - Core: ARMv5 compatible Marvell PJ1 88sv331 (Mohawk)
+        - PXA910/PXA920
+             - Homepage             : http://www.marvell.com/communication-processors/pxa910/
+             - Product Brief        : http://www.marvell.com/communication-processors/pxa910/assets/Marvell_PXA910_Platform-001_PB_final.pdf
+             - Application processor with Communication processor
+             - Core: ARMv5 compatible Marvell PJ1 88sv331 (Mohawk)
+        - PXA688, a.k.a. MMP2, a.k.a Armada 610
+             - Product Brief        : http://www.marvell.com/application-processors/armada-600/assets/armada610_pb.pdf
+             - Application processor only
+             - Core: ARMv7 compatible Sheeva PJ4 88sv581x core
+	- PXA2128, a.k.a. MMP3 (OLPC XO4, Linux support not upstream)
+	     - Product Brief	  : http://www.marvell.com/application-processors/armada/pxa2128/assets/Marvell-ARMADA-PXA2128-SoC-PB.pdf
+	     - Application processor only
+	     - Core: Dual-core ARMv7 compatible Sheeva PJ4C core
+	- PXA960/PXA968/PXA978 (Linux support not upstream)
+	     - Application processor with Communication Processor
+	     - Core: ARMv7 compatible Sheeva PJ4 core
+	- PXA986/PXA988 (Linux support not upstream)
+	     - Application processor with Communication Processor
+	     - Core: Dual-core ARMv7 compatible Sheeva PJ4B-MP core
+	- PXA1088/PXA1920 (Linux support not upstream)
+	     - Application processor with Communication Processor
+	     - Core: quad-core ARMv7 Cortex-A7
+	- PXA1908/PXA1928/PXA1936
+	     - Application processor with Communication Processor
+	     - Core: multi-core ARMv8 Cortex-A53
+
+   Comments:
+
+    * This line of SoCs originates from the XScale family developed by
+      Intel and acquired by Marvell in ~2006. All the processors of
+      this MMP/MMP2 family were developed by Marvell.
+
+    * Due to their XScale origin, these SoCs have virtually nothing in
+      common with the other (Kirkwood, Dove, etc.) families of Marvell
+      SoCs, except with the PXA family of SoCs listed above.
+
+   Linux kernel mach directory:
+	arch/arm/mach-mmp
+   Linux kernel plat directory:
+	arch/arm/plat-pxa
+
+Berlin family (Multimedia Solutions)
+-------------------------------------
+
+  - Flavors:
+	- 88DE3010, Armada 1000 (no Linux support)
+		- Core:		Marvell PJ1 (ARMv5TE), Dual-core
+		- Product Brief:	http://www.marvell.com.cn/digital-entertainment/assets/armada_1000_pb.pdf
+	- 88DE3005, Armada 1500 Mini
+		- Design name:	BG2CD
+		- Core:		ARM Cortex-A9, PL310 L2CC
+	- 88DE3006, Armada 1500 Mini Plus
+		- Design name:	BG2CDP
+		- Core:		Dual Core ARM Cortex-A7
+	- 88DE3100, Armada 1500
+		- Design name:	BG2
+		- Core:		Marvell PJ4B-MP (ARMv7), Tauros3 L2CC
+	- 88DE3114, Armada 1500 Pro
+		- Design name:	BG2Q
+		- Core:		Quad Core ARM Cortex-A9, PL310 L2CC
+	- 88DE3214, Armada 1500 Pro 4K
+		- Design name:	BG3
+		- Core:		ARM Cortex-A15, CA15 integrated L2CC
+	- 88DE3218, ARMADA 1500 Ultra
+		- Core:		ARM Cortex-A53
+
+  Homepage: https://www.synaptics.com/products/multimedia-solutions
+  Directory: arch/arm/mach-berlin
+
+  Comments:
+
+   * This line of SoCs is based on Marvell Sheeva or ARM Cortex CPUs
+     with Synopsys DesignWare (IRQ, GPIO, Timers, ...) and PXA IP (SDHCI, USB, ETH, ...).
+
+   * The Berlin family was acquired by Synaptics from Marvell in 2017.
+
+CPU Cores
+---------
+
+The XScale cores were designed by Intel, and shipped by Marvell in the older
+PXA processors. Feroceon is a Marvell designed core that developed in-house,
+and that evolved into Sheeva. The XScale and Feroceon cores were phased out
+over time and replaced with Sheeva cores in later products, which subsequently
+got replaced with licensed ARM Cortex-A cores.
+
+  XScale 1
+	CPUID 0x69052xxx
+	ARMv5, iWMMXt
+  XScale 2
+	CPUID 0x69054xxx
+	ARMv5, iWMMXt
+  XScale 3
+	CPUID 0x69056xxx or 0x69056xxx
+	ARMv5, iWMMXt
+  Feroceon-1850 88fr331 "Mohawk"
+	CPUID 0x5615331x or 0x41xx926x
+	ARMv5TE, single issue
+  Feroceon-2850 88fr531-vd "Jolteon"
+	CPUID 0x5605531x or 0x41xx926x
+	ARMv5TE, VFP, dual-issue
+  Feroceon 88fr571-vd "Jolteon"
+	CPUID 0x5615571x
+	ARMv5TE, VFP, dual-issue
+  Feroceon 88fr131 "Mohawk-D"
+	CPUID 0x5625131x
+	ARMv5TE, single-issue in-order
+  Sheeva PJ1 88sv331 "Mohawk"
+	CPUID 0x561584xx
+	ARMv5, single-issue iWMMXt v2
+  Sheeva PJ4 88sv581x "Flareon"
+	CPUID 0x560f581x
+	ARMv7, idivt, optional iWMMXt v2
+  Sheeva PJ4B 88sv581x
+	CPUID 0x561f581x
+	ARMv7, idivt, optional iWMMXt v2
+  Sheeva PJ4B-MP / PJ4C
+	CPUID 0x562f584x
+	ARMv7, idivt/idiva, LPAE, optional iWMMXt v2 and/or NEON
+
+Long-term plans
+---------------
+
+ * Unify the mach-dove/, mach-mv78xx0/, mach-orion5x/ into the
+   mach-mvebu/ to support all SoCs from the Marvell EBU (Engineering
+   Business Unit) in a single mach-<foo> directory. The plat-orion/
+   would therefore disappear.
+
+ * Unify the mach-mmp/ and mach-pxa/ into the same mach-pxa
+   directory. The plat-pxa/ would therefore disappear.
+
+Credits
+-------
+
+- Maen Suleiman <maen@marvell.com>
+- Lior Amsalem <alior@marvell.com>
+- Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+- Andrew Lunn <andrew@lunn.ch>
+- Nicolas Pitre <nico@fluxnic.net>
+- Eric Miao <eric.y.miao@gmail.com>
diff --git a/Documentation/arm/mem_alignment b/Documentation/arm/mem_alignment
deleted file mode 100644
index e110e2781039..000000000000
--- a/Documentation/arm/mem_alignment
+++ /dev/null
@@ -1,58 +0,0 @@
-Too many problems popped up because of unnoticed misaligned memory access in
-kernel code lately.  Therefore the alignment fixup is now unconditionally
-configured in for SA11x0 based targets.  According to Alan Cox, this is a
-bad idea to configure it out, but Russell King has some good reasons for
-doing so on some f***ed up ARM architectures like the EBSA110.  However
-this is not the case on many design I'm aware of, like all SA11x0 based
-ones.
-
-Of course this is a bad idea to rely on the alignment trap to perform
-unaligned memory access in general.  If those access are predictable, you
-are better to use the macros provided by include/asm/unaligned.h.  The
-alignment trap can fixup misaligned access for the exception cases, but at
-a high performance cost.  It better be rare.
-
-Now for user space applications, it is possible to configure the alignment
-trap to SIGBUS any code performing unaligned access (good for debugging bad
-code), or even fixup the access by software like for kernel code.  The later
-mode isn't recommended for performance reasons (just think about the
-floating point emulation that works about the same way).  Fix your code
-instead!
-
-Please note that randomly changing the behaviour without good thought is
-real bad - it changes the behaviour of all unaligned instructions in user
-space, and might cause programs to fail unexpectedly.
-
-To change the alignment trap behavior, simply echo a number into
-/proc/cpu/alignment.  The number is made up from various bits:
-
-bit		behavior when set
----		-----------------
-
-0		A user process performing an unaligned memory access
-		will cause the kernel to print a message indicating
-		process name, pid, pc, instruction, address, and the
-		fault code.
-
-1		The kernel will attempt to fix up the user process
-		performing the unaligned access.  This is of course
-		slow (think about the floating point emulator) and
-		not recommended for production use.
-
-2		The kernel will send a SIGBUS signal to the user process
-		performing the unaligned access.
-
-Note that not all combinations are supported - only values 0 through 5.
-(6 and 7 don't make sense).
-
-For example, the following will turn on the warnings, but without
-fixing up or sending SIGBUS signals:
-
-	echo 1 > /proc/cpu/alignment
-
-You can also read the content of the same file to get statistical
-information on unaligned access occurrences plus the current mode of
-operation for user space code.
-
-
-Nicolas Pitre, Mar 13, 2001.  Modified Russell King, Nov 30, 2001.
diff --git a/Documentation/arm/mem_alignment.rst b/Documentation/arm/mem_alignment.rst
new file mode 100644
index 000000000000..aa22893b62bc
--- /dev/null
+++ b/Documentation/arm/mem_alignment.rst
@@ -0,0 +1,63 @@
+================
+Memory alignment
+================
+
+Too many problems popped up because of unnoticed misaligned memory access in
+kernel code lately.  Therefore the alignment fixup is now unconditionally
+configured in for SA11x0 based targets.  According to Alan Cox, this is a
+bad idea to configure it out, but Russell King has some good reasons for
+doing so on some f***ed up ARM architectures like the EBSA110.  However
+this is not the case on many design I'm aware of, like all SA11x0 based
+ones.
+
+Of course this is a bad idea to rely on the alignment trap to perform
+unaligned memory access in general.  If those access are predictable, you
+are better to use the macros provided by include/asm/unaligned.h.  The
+alignment trap can fixup misaligned access for the exception cases, but at
+a high performance cost.  It better be rare.
+
+Now for user space applications, it is possible to configure the alignment
+trap to SIGBUS any code performing unaligned access (good for debugging bad
+code), or even fixup the access by software like for kernel code.  The later
+mode isn't recommended for performance reasons (just think about the
+floating point emulation that works about the same way).  Fix your code
+instead!
+
+Please note that randomly changing the behaviour without good thought is
+real bad - it changes the behaviour of all unaligned instructions in user
+space, and might cause programs to fail unexpectedly.
+
+To change the alignment trap behavior, simply echo a number into
+/proc/cpu/alignment.  The number is made up from various bits:
+
+===		========================================================
+bit		behavior when set
+===		========================================================
+0		A user process performing an unaligned memory access
+		will cause the kernel to print a message indicating
+		process name, pid, pc, instruction, address, and the
+		fault code.
+
+1		The kernel will attempt to fix up the user process
+		performing the unaligned access.  This is of course
+		slow (think about the floating point emulator) and
+		not recommended for production use.
+
+2		The kernel will send a SIGBUS signal to the user process
+		performing the unaligned access.
+===		========================================================
+
+Note that not all combinations are supported - only values 0 through 5.
+(6 and 7 don't make sense).
+
+For example, the following will turn on the warnings, but without
+fixing up or sending SIGBUS signals::
+
+	echo 1 > /proc/cpu/alignment
+
+You can also read the content of the same file to get statistical
+information on unaligned access occurrences plus the current mode of
+operation for user space code.
+
+
+Nicolas Pitre, Mar 13, 2001.  Modified Russell King, Nov 30, 2001.
diff --git a/Documentation/arm/memory.rst b/Documentation/arm/memory.rst
new file mode 100644
index 000000000000..0521b4ce5c96
--- /dev/null
+++ b/Documentation/arm/memory.rst
@@ -0,0 +1,93 @@
+=================================
+Kernel Memory Layout on ARM Linux
+=================================
+
+		Russell King <rmk@arm.linux.org.uk>
+
+		     November 17, 2005 (2.6.15)
+
+This document describes the virtual memory layout which the Linux
+kernel uses for ARM processors.  It indicates which regions are
+free for platforms to use, and which are used by generic code.
+
+The ARM CPU is capable of addressing a maximum of 4GB virtual memory
+space, and this must be shared between user space processes, the
+kernel, and hardware devices.
+
+As the ARM architecture matures, it becomes necessary to reserve
+certain regions of VM space for use for new facilities; therefore
+this document may reserve more VM space over time.
+
+=============== =============== ===============================================
+Start		End		Use
+=============== =============== ===============================================
+ffff8000	ffffffff	copy_user_page / clear_user_page use.
+				For SA11xx and Xscale, this is used to
+				setup a minicache mapping.
+
+ffff4000	ffffffff	cache aliasing on ARMv6 and later CPUs.
+
+ffff1000	ffff7fff	Reserved.
+				Platforms must not use this address range.
+
+ffff0000	ffff0fff	CPU vector page.
+				The CPU vectors are mapped here if the
+				CPU supports vector relocation (control
+				register V bit.)
+
+fffe0000	fffeffff	XScale cache flush area.  This is used
+				in proc-xscale.S to flush the whole data
+				cache. (XScale does not have TCM.)
+
+fffe8000	fffeffff	DTCM mapping area for platforms with
+				DTCM mounted inside the CPU.
+
+fffe0000	fffe7fff	ITCM mapping area for platforms with
+				ITCM mounted inside the CPU.
+
+ffc00000	ffefffff	Fixmap mapping region.  Addresses provided
+				by fix_to_virt() will be located here.
+
+fee00000	feffffff	Mapping of PCI I/O space. This is a static
+				mapping within the vmalloc space.
+
+VMALLOC_START	VMALLOC_END-1	vmalloc() / ioremap() space.
+				Memory returned by vmalloc/ioremap will
+				be dynamically placed in this region.
+				Machine specific static mappings are also
+				located here through iotable_init().
+				VMALLOC_START is based upon the value
+				of the high_memory variable, and VMALLOC_END
+				is equal to 0xff800000.
+
+PAGE_OFFSET	high_memory-1	Kernel direct-mapped RAM region.
+				This maps the platforms RAM, and typically
+				maps all platform RAM in a 1:1 relationship.
+
+PKMAP_BASE	PAGE_OFFSET-1	Permanent kernel mappings
+				One way of mapping HIGHMEM pages into kernel
+				space.
+
+MODULES_VADDR	MODULES_END-1	Kernel module space
+				Kernel modules inserted via insmod are
+				placed here using dynamic mappings.
+
+00001000	TASK_SIZE-1	User space mappings
+				Per-thread mappings are placed here via
+				the mmap() system call.
+
+00000000	00000fff	CPU vector page / null pointer trap
+				CPUs which do not support vector remapping
+				place their vector page here.  NULL pointer
+				dereferences by both the kernel and user
+				space are also caught via this mapping.
+=============== =============== ===============================================
+
+Please note that mappings which collide with the above areas may result
+in a non-bootable kernel, or may cause the kernel to (eventually) panic
+at run time.
+
+Since future CPUs may impact the kernel mapping layout, user programs
+must not access any memory which is not mapped inside their 0x0001000
+to TASK_SIZE address range.  If they wish to access these areas, they
+must set up their own mappings using open() and mmap().
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt
deleted file mode 100644
index 546a39048eb0..000000000000
--- a/Documentation/arm/memory.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-		Kernel Memory Layout on ARM Linux
-
-		Russell King <rmk@arm.linux.org.uk>
-		     November 17, 2005 (2.6.15)
-
-This document describes the virtual memory layout which the Linux
-kernel uses for ARM processors.  It indicates which regions are
-free for platforms to use, and which are used by generic code.
-
-The ARM CPU is capable of addressing a maximum of 4GB virtual memory
-space, and this must be shared between user space processes, the
-kernel, and hardware devices.
-
-As the ARM architecture matures, it becomes necessary to reserve
-certain regions of VM space for use for new facilities; therefore
-this document may reserve more VM space over time.
-
-Start		End		Use
---------------------------------------------------------------------------
-ffff8000	ffffffff	copy_user_page / clear_user_page use.
-				For SA11xx and Xscale, this is used to
-				setup a minicache mapping.
-
-ffff4000	ffffffff	cache aliasing on ARMv6 and later CPUs.
-
-ffff1000	ffff7fff	Reserved.
-				Platforms must not use this address range.
-
-ffff0000	ffff0fff	CPU vector page.
-				The CPU vectors are mapped here if the
-				CPU supports vector relocation (control
-				register V bit.)
-
-fffe0000	fffeffff	XScale cache flush area.  This is used
-				in proc-xscale.S to flush the whole data
-				cache. (XScale does not have TCM.)
-
-fffe8000	fffeffff	DTCM mapping area for platforms with
-				DTCM mounted inside the CPU.
-
-fffe0000	fffe7fff	ITCM mapping area for platforms with
-				ITCM mounted inside the CPU.
-
-ffc00000	ffefffff	Fixmap mapping region.  Addresses provided
-				by fix_to_virt() will be located here.
-
-fee00000	feffffff	Mapping of PCI I/O space. This is a static
-				mapping within the vmalloc space.
-
-VMALLOC_START	VMALLOC_END-1	vmalloc() / ioremap() space.
-				Memory returned by vmalloc/ioremap will
-				be dynamically placed in this region.
-				Machine specific static mappings are also
-				located here through iotable_init().
-				VMALLOC_START is based upon the value
-				of the high_memory variable, and VMALLOC_END
-				is equal to 0xff800000.
-
-PAGE_OFFSET	high_memory-1	Kernel direct-mapped RAM region.
-				This maps the platforms RAM, and typically
-				maps all platform RAM in a 1:1 relationship.
-
-PKMAP_BASE	PAGE_OFFSET-1	Permanent kernel mappings
-				One way of mapping HIGHMEM pages into kernel
-				space.
-
-MODULES_VADDR	MODULES_END-1	Kernel module space
-				Kernel modules inserted via insmod are
-				placed here using dynamic mappings.
-
-00001000	TASK_SIZE-1	User space mappings
-				Per-thread mappings are placed here via
-				the mmap() system call.
-
-00000000	00000fff	CPU vector page / null pointer trap
-				CPUs which do not support vector remapping
-				place their vector page here.  NULL pointer
-				dereferences by both the kernel and user
-				space are also caught via this mapping.
-
-Please note that mappings which collide with the above areas may result
-in a non-bootable kernel, or may cause the kernel to (eventually) panic
-at run time.
-
-Since future CPUs may impact the kernel mapping layout, user programs
-must not access any memory which is not mapped inside their 0x0001000
-to TASK_SIZE address range.  If they wish to access these areas, they
-must set up their own mappings using open() and mmap().
diff --git a/Documentation/arm/microchip.rst b/Documentation/arm/microchip.rst
new file mode 100644
index 000000000000..c9a44c98e868
--- /dev/null
+++ b/Documentation/arm/microchip.rst
@@ -0,0 +1,204 @@
+=============================
+ARM Microchip SoCs (aka AT91)
+=============================
+
+
+Introduction
+------------
+This document gives useful information about the ARM Microchip SoCs that are
+currently supported in Linux Mainline (you know, the one on kernel.org).
+
+It is important to note that the Microchip (previously Atmel) ARM-based MPU
+product line is historically named "AT91" or "at91" throughout the Linux kernel
+development process even if this product prefix has completely disappeared from
+the official Microchip product name. Anyway, files, directories, git trees,
+git branches/tags and email subject always contain this "at91" sub-string.
+
+
+AT91 SoCs
+---------
+Documentation and detailed datasheet for each product are available on
+the Microchip website: http://www.microchip.com.
+
+  Flavors:
+    * ARM 920 based SoC
+      - at91rm9200
+
+          * Datasheet
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-1768-32-bit-ARM920T-Embedded-Microprocessor-AT91RM9200_Datasheet.pdf
+
+    * ARM 926 based SoCs
+      - at91sam9260
+
+          * Datasheet
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6221-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9260_Datasheet.pdf
+
+      - at91sam9xe
+
+          * Datasheet
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6254-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9XE_Datasheet.pdf
+
+      - at91sam9261
+
+          * Datasheet
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6062-ARM926EJ-S-Microprocessor-SAM9261_Datasheet.pdf
+
+      - at91sam9263
+
+          * Datasheet
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6249-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9263_Datasheet.pdf
+
+      - at91sam9rl
+
+          * Datasheet
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/doc6289.pdf
+
+      - at91sam9g20
+
+          * Datasheet
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001516A.pdf
+
+      - at91sam9g45 family
+        - at91sam9g45
+        - at91sam9g46
+        - at91sam9m10
+        - at91sam9m11 (device superset)
+
+          * Datasheet
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6437-32-bit-ARM926-Embedded-Microprocessor-SAM9M11_Datasheet.pdf
+
+      - at91sam9x5 family (aka "The 5 series")
+        - at91sam9g15
+        - at91sam9g25
+        - at91sam9g35
+        - at91sam9x25
+        - at91sam9x35
+
+          * Datasheet (can be considered as covering the whole family)
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-11055-32-bit-ARM926EJ-S-Microcontroller-SAM9X35_Datasheet.pdf
+
+      - at91sam9n12
+
+          * Datasheet
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001517A.pdf
+
+    * ARM Cortex-A5 based SoCs
+      - sama5d3 family
+
+        - sama5d31
+        - sama5d33
+        - sama5d34
+        - sama5d35
+        - sama5d36 (device superset)
+
+          * Datasheet
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-11121-32-bit-Cortex-A5-Microcontroller-SAMA5D3_Datasheet.pdf
+
+    * ARM Cortex-A5 + NEON based SoCs
+      - sama5d4 family
+
+        - sama5d41
+        - sama5d42
+        - sama5d43
+        - sama5d44 (device superset)
+
+          * Datasheet
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/60001525A.pdf
+
+      - sama5d2 family
+
+        - sama5d21
+        - sama5d22
+        - sama5d23
+        - sama5d24
+        - sama5d26
+        - sama5d27 (device superset)
+        - sama5d28 (device superset + environmental monitors)
+
+          * Datasheet
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001476B.pdf
+
+    * ARM Cortex-M7 MCUs
+      - sams70 family
+
+        - sams70j19
+        - sams70j20
+        - sams70j21
+        - sams70n19
+        - sams70n20
+        - sams70n21
+        - sams70q19
+        - sams70q20
+        - sams70q21
+
+      - samv70 family
+
+        - samv70j19
+        - samv70j20
+        - samv70n19
+        - samv70n20
+        - samv70q19
+        - samv70q20
+
+      - samv71 family
+
+        - samv71j19
+        - samv71j20
+        - samv71j21
+        - samv71n19
+        - samv71n20
+        - samv71n21
+        - samv71q19
+        - samv71q20
+        - samv71q21
+
+          * Datasheet
+
+          http://ww1.microchip.com/downloads/en/DeviceDoc/60001527A.pdf
+
+
+Linux kernel information
+------------------------
+Linux kernel mach directory: arch/arm/mach-at91
+MAINTAINERS entry is: "ARM/Microchip (AT91) SoC support"
+
+
+Device Tree for AT91 SoCs and boards
+------------------------------------
+All AT91 SoCs are converted to Device Tree. Since Linux 3.19, these products
+must use this method to boot the Linux kernel.
+
+Work In Progress statement:
+Device Tree files and Device Tree bindings that apply to AT91 SoCs and boards are
+considered as "Unstable". To be completely clear, any at91 binding can change at
+any time. So, be sure to use a Device Tree Binary and a Kernel Image generated from
+the same source tree.
+Please refer to the Documentation/devicetree/bindings/ABI.txt file for a
+definition of a "Stable" binding/ABI.
+This statement will be removed by AT91 MAINTAINERS when appropriate.
+
+Naming conventions and best practice:
+
+- SoCs Device Tree Source Include files are named after the official name of
+  the product (at91sam9g20.dtsi or sama5d33.dtsi for instance).
+- Device Tree Source Include files (.dtsi) are used to collect common nodes that can be
+  shared across SoCs or boards (sama5d3.dtsi or at91sam9x5cm.dtsi for instance).
+  When collecting nodes for a particular peripheral or topic, the identifier have to
+  be placed at the end of the file name, separated with a "_" (at91sam9x5_can.dtsi
+  or sama5d3_gmac.dtsi for example).
+- board Device Tree Source files (.dts) are prefixed by the string "at91-" so
+  that they can be identified easily. Note that some files are historical exceptions
+  to this rule (sama5d3[13456]ek.dts, usb_a9g20.dts or animeo_ip.dts for example).
diff --git a/Documentation/arm/netwinder.rst b/Documentation/arm/netwinder.rst
new file mode 100644
index 000000000000..8eab66caa2ac
--- /dev/null
+++ b/Documentation/arm/netwinder.rst
@@ -0,0 +1,85 @@
+================================
+NetWinder specific documentation
+================================
+
+The NetWinder is a small low-power computer, primarily designed
+to run Linux.  It is based around the StrongARM RISC processor,
+DC21285 PCI bridge, with PC-type hardware glued around it.
+
+Port usage
+==========
+
+=======  ====== ===============================
+Min      Max	Description
+=======  ====== ===============================
+0x0000   0x000f	DMA1
+0x0020   0x0021	PIC1
+0x0060   0x006f	Keyboard
+0x0070   0x007f	RTC
+0x0080   0x0087	DMA1
+0x0088   0x008f	DMA2
+0x00a0   0x00a3	PIC2
+0x00c0   0x00df	DMA2
+0x0180   0x0187	IRDA
+0x01f0   0x01f6	ide0
+0x0201		Game port
+0x0203		RWA010 configuration read
+0x0220   ?	SoundBlaster
+0x0250   ?	WaveArtist
+0x0279		RWA010 configuration index
+0x02f8   0x02ff	Serial ttyS1
+0x0300   0x031f	Ether10
+0x0338		GPIO1
+0x033a		GPIO2
+0x0370   0x0371	W83977F configuration registers
+0x0388   ?	AdLib
+0x03c0   0x03df	VGA
+0x03f6		ide0
+0x03f8   0x03ff	Serial ttyS0
+0x0400   0x0408	DC21143
+0x0480   0x0487	DMA1
+0x0488   0x048f	DMA2
+0x0a79		RWA010 configuration write
+0xe800   0xe80f	ide0/ide1 BM DMA
+=======  ====== ===============================
+
+
+Interrupt usage
+===============
+
+======= ======= ========================
+IRQ	type	Description
+======= ======= ========================
+ 0	ISA	100Hz timer
+ 1	ISA	Keyboard
+ 2	ISA	cascade
+ 3	ISA	Serial ttyS1
+ 4	ISA	Serial ttyS0
+ 5	ISA	PS/2 mouse
+ 6	ISA	IRDA
+ 7	ISA	Printer
+ 8	ISA	RTC alarm
+ 9	ISA
+10	ISA	GP10 (Orange reset button)
+11	ISA
+12	ISA	WaveArtist
+13	ISA
+14	ISA	hda1
+15	ISA
+======= ======= ========================
+
+DMA usage
+=========
+
+======= ======= ===========
+DMA	type	Description
+======= ======= ===========
+ 0	ISA	IRDA
+ 1	ISA
+ 2	ISA	cascade
+ 3	ISA	WaveArtist
+ 4	ISA
+ 5	ISA
+ 6	ISA
+ 7	ISA	WaveArtist
+======= ======= ===========
diff --git a/Documentation/arm/nwfpe/NOTES b/Documentation/arm/nwfpe/NOTES
deleted file mode 100644
index 40577b5a49d3..000000000000
--- a/Documentation/arm/nwfpe/NOTES
+++ /dev/null
@@ -1,29 +0,0 @@
-There seems to be a problem with exp(double) and our emulator.  I haven't
-been able to track it down yet.  This does not occur with the emulator
-supplied by Russell King.
-
-I also found one oddity in the emulator.  I don't think it is serious but
-will point it out.  The ARM calling conventions require floating point
-registers f4-f7 to be preserved over a function call.  The compiler quite
-often uses an stfe instruction to save f4 on the stack upon entry to a
-function, and an ldfe instruction to restore it before returning.
-
-I was looking at some code, that calculated a double result, stored it in f4
-then made a function call. Upon return from the function call the number in
-f4 had been converted to an extended value in the emulator.
-
-This is a side effect of the stfe instruction.  The double in f4 had to be
-converted to extended, then stored.  If an lfm/sfm combination had been used,
-then no conversion would occur.  This has performance considerations.  The
-result from the function call and f4 were used in a multiplication.  If the
-emulator sees a multiply of a double and extended, it promotes the double to
-extended, then does the multiply in extended precision.
-
-This code will cause this problem:
-
-double x, y, z;
-z = log(x)/log(y);
-
-The result of log(x) (a double) will be calculated, returned in f0, then
-moved to f4 to preserve it over the log(y) call.  The division will be done
-in extended precision, due to the stfe instruction used to save f4 in log(y).
diff --git a/Documentation/arm/nwfpe/README b/Documentation/arm/nwfpe/README
deleted file mode 100644
index 771871de0c8b..000000000000
--- a/Documentation/arm/nwfpe/README
+++ /dev/null
@@ -1,70 +0,0 @@
-This directory contains the version 0.92 test release of the NetWinder 
-Floating Point Emulator.
-
-The majority of the code was written by me, Scott Bambrough It is
-written in C, with a small number of routines in inline assembler
-where required.  It was written quickly, with a goal of implementing a
-working version of all the floating point instructions the compiler
-emits as the first target.  I have attempted to be as optimal as
-possible, but there remains much room for improvement.
-
-I have attempted to make the emulator as portable as possible.  One of
-the problems is with leading underscores on kernel symbols.  Elf
-kernels have no leading underscores, a.out compiled kernels do.  I
-have attempted to use the C_SYMBOL_NAME macro wherever this may be
-important.
-
-Another choice I made was in the file structure.  I have attempted to
-contain all operating system specific code in one module (fpmodule.*).
-All the other files contain emulator specific code.  This should allow
-others to port the emulator to NetBSD for instance relatively easily.
-
-The floating point operations are based on SoftFloat Release 2, by
-John Hauser.  SoftFloat is a software implementation of floating-point
-that conforms to the IEC/IEEE Standard for Binary Floating-point
-Arithmetic.  As many as four formats are supported: single precision,
-double precision, extended double precision, and quadruple precision.
-All operations required by the standard are implemented, except for
-conversions to and from decimal.  We use only the single precision,
-double precision and extended double precision formats.  The port of
-SoftFloat to the ARM was done by Phil Blundell, based on an earlier
-port of SoftFloat version 1 by Neil Carson for NetBSD/arm32.
-
-The file README.FPE contains a description of what has been implemented
-so far in the emulator.  The file TODO contains a information on what 
-remains to be done, and other ideas for the emulator.
-
-Bug reports, comments, suggestions should be directed to me at
-<scottb@netwinder.org>.  General reports of "this program doesn't
-work correctly when your emulator is installed" are useful for
-determining that bugs still exist; but are virtually useless when
-attempting to isolate the problem.  Please report them, but don't
-expect quick action.  Bugs still exist.  The problem remains in isolating
-which instruction contains the bug.  Small programs illustrating a specific
-problem are a godsend.
-
-Legal Notices
--------------
-
-The NetWinder Floating Point Emulator is free software.  Everything Rebel.com
-has written is provided under the GNU GPL.  See the file COPYING for copying
-conditions.  Excluded from the above is the SoftFloat code.  John Hauser's 
-legal notice for SoftFloat is included below.
-
--------------------------------------------------------------------------------
-SoftFloat Legal Notice
-
-SoftFloat was written by John R. Hauser.  This work was made possible in
-part by the International Computer Science Institute, located at Suite 600,
-1947 Center Street, Berkeley, California 94704.  Funding was partially
-provided by the National Science Foundation under grant MIP-9311980.  The
-original version of this code was written as part of a project to build
-a fixed-point vector processor in collaboration with the University of
-California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek.
-
-THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
-has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
-TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
-PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
-AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
--------------------------------------------------------------------------------
diff --git a/Documentation/arm/nwfpe/README.FPE b/Documentation/arm/nwfpe/README.FPE
deleted file mode 100644
index 26f5d7bb9a41..000000000000
--- a/Documentation/arm/nwfpe/README.FPE
+++ /dev/null
@@ -1,156 +0,0 @@
-The following describes the current state of the NetWinder's floating point
-emulator.
-
-In the following nomenclature is used to describe the floating point
-instructions.  It follows the conventions in the ARM manual.
-
-<S|D|E> = <single|double|extended>, no default
-{P|M|Z} = {round to +infinity,round to -infinity,round to zero},
-          default = round to nearest
-
-Note: items enclosed in {} are optional.
-
-Floating Point Coprocessor Data Transfer Instructions (CPDT)
-------------------------------------------------------------
-
-LDF/STF - load and store floating
-
-<LDF|STF>{cond}<S|D|E> Fd, Rn
-<LDF|STF>{cond}<S|D|E> Fd, [Rn, #<expression>]{!}
-<LDF|STF>{cond}<S|D|E> Fd, [Rn], #<expression>
-
-These instructions are fully implemented.
-
-LFM/SFM - load and store multiple floating
-
-Form 1 syntax:
-<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn]
-<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn, #<expression>]{!}
-<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn], #<expression>
-
-Form 2 syntax:
-<LFM|SFM>{cond}<FD,EA> Fd, <count>, [Rn]{!}
-
-These instructions are fully implemented.  They store/load three words
-for each floating point register into the memory location given in the 
-instruction.  The format in memory is unlikely to be compatible with
-other implementations, in particular the actual hardware.  Specific
-mention of this is made in the ARM manuals.  
-
-Floating Point Coprocessor Register Transfer Instructions (CPRT)
-----------------------------------------------------------------
-
-Conversions, read/write status/control register instructions
-
-FLT{cond}<S,D,E>{P,M,Z} Fn, Rd          Convert integer to floating point
-FIX{cond}{P,M,Z} Rd, Fn                 Convert floating point to integer
-WFS{cond} Rd                            Write floating point status register
-RFS{cond} Rd                            Read floating point status register
-WFC{cond} Rd                            Write floating point control register
-RFC{cond} Rd                            Read floating point control register
-
-FLT/FIX are fully implemented.
-
-RFS/WFS are fully implemented.
-
-RFC/WFC are fully implemented.  RFC/WFC are supervisor only instructions, and
-presently check the CPU mode, and do an invalid instruction trap if not called
-from supervisor mode.
-
-Compare instructions
-
-CMF{cond} Fn, Fm        Compare floating
-CMFE{cond} Fn, Fm       Compare floating with exception
-CNF{cond} Fn, Fm        Compare negated floating
-CNFE{cond} Fn, Fm       Compare negated floating with exception
-
-These are fully implemented.
-
-Floating Point Coprocessor Data Instructions (CPDT)
----------------------------------------------------
-
-Dyadic operations:
-
-ADF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - add
-SUF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - subtract
-RSF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse subtract
-MUF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - multiply
-DVF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - divide
-RDV{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse divide
-
-These are fully implemented.
-
-FML{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast multiply
-FDV{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast divide
-FRD{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast reverse divide
-
-These are fully implemented as well.  They use the same algorithm as the
-non-fast versions.  Hence, in this implementation their performance is
-equivalent to the MUF/DVF/RDV instructions.  This is acceptable according
-to the ARM manual.  The manual notes these are defined only for single
-operands, on the actual FPA11 hardware they do not work for double or
-extended precision operands.  The emulator currently does not check
-the requested permissions conditions, and performs the requested operation.
-
-RMF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - IEEE remainder
-
-This is fully implemented.
-
-Monadic operations:
-
-MVF{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - move
-MNF{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - move negated
-
-These are fully implemented.
-
-ABS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - absolute value
-SQT{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - square root
-RND{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - round
-
-These are fully implemented.
-
-URD{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - unnormalized round
-NRM{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - normalize
-
-These are implemented.  URD is implemented using the same code as the RND
-instruction.  Since URD cannot return a unnormalized number, NRM becomes
-a NOP.
-
-Library calls:
-
-POW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - power
-RPW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse power
-POL{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - polar angle (arctan2)
-
-LOG{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base 10
-LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e 
-EXP{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - exponent
-SIN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - sine
-COS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - cosine
-TAN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - tangent
-ASN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arcsine
-ACS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arccosine
-ATN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arctangent
-
-These are not implemented.  They are not currently issued by the compiler,
-and are handled by routines in libc.  These are not implemented by the FPA11
-hardware, but are handled by the floating point support code.  They should 
-be implemented in future versions.
-
-Signalling:
-
-Signals are implemented.  However current ELF kernels produced by Rebel.com
-have a bug in them that prevents the module from generating a SIGFPE.  This
-is caused by a failure to alias fp_current to the kernel variable
-current_set[0] correctly.
-
-The kernel provided with this distribution (vmlinux-nwfpe-0.93) contains
-a fix for this problem and also incorporates the current version of the
-emulator directly.  It is possible to run with no floating point module
-loaded with this kernel.  It is provided as a demonstration of the 
-technology and for those who want to do floating point work that depends
-on signals.  It is not strictly necessary to use the module.
-
-A module (either the one provided by Russell King, or the one in this 
-distribution) can be loaded to replace the functionality of the emulator
-built into the kernel.
diff --git a/Documentation/arm/nwfpe/TODO b/Documentation/arm/nwfpe/TODO
deleted file mode 100644
index 8027061b60eb..000000000000
--- a/Documentation/arm/nwfpe/TODO
+++ /dev/null
@@ -1,67 +0,0 @@
-TODO LIST
----------
-
-POW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - power
-RPW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse power
-POL{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - polar angle (arctan2)
-
-LOG{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base 10
-LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e 
-EXP{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - exponent
-SIN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - sine
-COS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - cosine
-TAN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - tangent
-ASN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arcsine
-ACS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arccosine
-ATN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arctangent
-
-These are not implemented.  They are not currently issued by the compiler,
-and are handled by routines in libc.  These are not implemented by the FPA11
-hardware, but are handled by the floating point support code.  They should 
-be implemented in future versions.
-
-There are a couple of ways to approach the implementation of these.  One
-method would be to use accurate table methods for these routines.  I have 
-a couple of papers by S. Gal from IBM's research labs in Haifa, Israel that
-seem to promise extreme accuracy (in the order of 99.8%) and reasonable speed.
-These methods are used in GLIBC for some of the transcendental functions.
-
-Another approach, which I know little about is CORDIC.  This stands for
-Coordinate Rotation Digital Computer, and is a method of computing 
-transcendental functions using mostly shifts and adds and a few
-multiplications and divisions.  The ARM excels at shifts and adds,
-so such a method could be promising, but requires more research to 
-determine if it is feasible.
-
-Rounding Methods
-
-The IEEE standard defines 4 rounding modes.  Round to nearest is the
-default, but rounding to + or - infinity or round to zero are also allowed.
-Many architectures allow the rounding mode to be specified by modifying bits
-in a control register.  Not so with the ARM FPA11 architecture.  To change
-the rounding mode one must specify it with each instruction.
-
-This has made porting some benchmarks difficult.  It is possible to
-introduce such a capability into the emulator.  The FPCR contains 
-bits describing the rounding mode.  The emulator could be altered to 
-examine a flag, which if set forced it to ignore the rounding mode in
-the instruction, and use the mode specified in the bits in the FPCR.
-
-This would require a method of getting/setting the flag, and the bits
-in the FPCR.  This requires a kernel call in ArmLinux, as WFC/RFC are
-supervisor only instructions.  If anyone has any ideas or comments I
-would like to hear them.
-
-[NOTE: pulled out from some docs on ARM floating point, specifically
- for the Acorn FPE, but not limited to it:
-
- The floating point control register (FPCR) may only be present in some
- implementations: it is there to control the hardware in an implementation-
- specific manner, for example to disable the floating point system.  The user
- mode of the ARM is not permitted to use this register (since the right is
- reserved to alter it between implementations) and the WFC and RFC
- instructions will trap if tried in user mode.
-
- Hence, the answer is yes, you could do this, but then you will run a high
- risk of becoming isolated if and when hardware FP emulation comes out
-		-- Russell].
diff --git a/Documentation/arm/nwfpe/index.rst b/Documentation/arm/nwfpe/index.rst
new file mode 100644
index 000000000000..21fa8ce192ae
--- /dev/null
+++ b/Documentation/arm/nwfpe/index.rst
@@ -0,0 +1,11 @@
+===================================
+NetWinder's floating point emulator
+===================================
+
+.. toctree::
+   :maxdepth: 1
+
+   nwfpe
+   netwinder-fpe
+   notes
+   todo
diff --git a/Documentation/arm/nwfpe/netwinder-fpe.rst b/Documentation/arm/nwfpe/netwinder-fpe.rst
new file mode 100644
index 000000000000..cbb320960fc4
--- /dev/null
+++ b/Documentation/arm/nwfpe/netwinder-fpe.rst
@@ -0,0 +1,162 @@
+=============
+Current State
+=============
+
+The following describes the current state of the NetWinder's floating point
+emulator.
+
+In the following nomenclature is used to describe the floating point
+instructions.  It follows the conventions in the ARM manual.
+
+::
+
+  <S|D|E> = <single|double|extended>, no default
+  {P|M|Z} = {round to +infinity,round to -infinity,round to zero},
+            default = round to nearest
+
+Note: items enclosed in {} are optional.
+
+Floating Point Coprocessor Data Transfer Instructions (CPDT)
+------------------------------------------------------------
+
+LDF/STF - load and store floating
+
+<LDF|STF>{cond}<S|D|E> Fd, Rn
+<LDF|STF>{cond}<S|D|E> Fd, [Rn, #<expression>]{!}
+<LDF|STF>{cond}<S|D|E> Fd, [Rn], #<expression>
+
+These instructions are fully implemented.
+
+LFM/SFM - load and store multiple floating
+
+Form 1 syntax:
+<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn]
+<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn, #<expression>]{!}
+<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn], #<expression>
+
+Form 2 syntax:
+<LFM|SFM>{cond}<FD,EA> Fd, <count>, [Rn]{!}
+
+These instructions are fully implemented.  They store/load three words
+for each floating point register into the memory location given in the
+instruction.  The format in memory is unlikely to be compatible with
+other implementations, in particular the actual hardware.  Specific
+mention of this is made in the ARM manuals.
+
+Floating Point Coprocessor Register Transfer Instructions (CPRT)
+----------------------------------------------------------------
+
+Conversions, read/write status/control register instructions
+
+FLT{cond}<S,D,E>{P,M,Z} Fn, Rd          Convert integer to floating point
+FIX{cond}{P,M,Z} Rd, Fn                 Convert floating point to integer
+WFS{cond} Rd                            Write floating point status register
+RFS{cond} Rd                            Read floating point status register
+WFC{cond} Rd                            Write floating point control register
+RFC{cond} Rd                            Read floating point control register
+
+FLT/FIX are fully implemented.
+
+RFS/WFS are fully implemented.
+
+RFC/WFC are fully implemented.  RFC/WFC are supervisor only instructions, and
+presently check the CPU mode, and do an invalid instruction trap if not called
+from supervisor mode.
+
+Compare instructions
+
+CMF{cond} Fn, Fm        Compare floating
+CMFE{cond} Fn, Fm       Compare floating with exception
+CNF{cond} Fn, Fm        Compare negated floating
+CNFE{cond} Fn, Fm       Compare negated floating with exception
+
+These are fully implemented.
+
+Floating Point Coprocessor Data Instructions (CPDT)
+---------------------------------------------------
+
+Dyadic operations:
+
+ADF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - add
+SUF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - subtract
+RSF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse subtract
+MUF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - multiply
+DVF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - divide
+RDV{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse divide
+
+These are fully implemented.
+
+FML{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast multiply
+FDV{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast divide
+FRD{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast reverse divide
+
+These are fully implemented as well.  They use the same algorithm as the
+non-fast versions.  Hence, in this implementation their performance is
+equivalent to the MUF/DVF/RDV instructions.  This is acceptable according
+to the ARM manual.  The manual notes these are defined only for single
+operands, on the actual FPA11 hardware they do not work for double or
+extended precision operands.  The emulator currently does not check
+the requested permissions conditions, and performs the requested operation.
+
+RMF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - IEEE remainder
+
+This is fully implemented.
+
+Monadic operations:
+
+MVF{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - move
+MNF{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - move negated
+
+These are fully implemented.
+
+ABS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - absolute value
+SQT{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - square root
+RND{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - round
+
+These are fully implemented.
+
+URD{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - unnormalized round
+NRM{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - normalize
+
+These are implemented.  URD is implemented using the same code as the RND
+instruction.  Since URD cannot return a unnormalized number, NRM becomes
+a NOP.
+
+Library calls:
+
+POW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - power
+RPW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse power
+POL{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - polar angle (arctan2)
+
+LOG{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base 10
+LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e
+EXP{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - exponent
+SIN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - sine
+COS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - cosine
+TAN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - tangent
+ASN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arcsine
+ACS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arccosine
+ATN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arctangent
+
+These are not implemented.  They are not currently issued by the compiler,
+and are handled by routines in libc.  These are not implemented by the FPA11
+hardware, but are handled by the floating point support code.  They should
+be implemented in future versions.
+
+Signalling:
+
+Signals are implemented.  However current ELF kernels produced by Rebel.com
+have a bug in them that prevents the module from generating a SIGFPE.  This
+is caused by a failure to alias fp_current to the kernel variable
+current_set[0] correctly.
+
+The kernel provided with this distribution (vmlinux-nwfpe-0.93) contains
+a fix for this problem and also incorporates the current version of the
+emulator directly.  It is possible to run with no floating point module
+loaded with this kernel.  It is provided as a demonstration of the
+technology and for those who want to do floating point work that depends
+on signals.  It is not strictly necessary to use the module.
+
+A module (either the one provided by Russell King, or the one in this
+distribution) can be loaded to replace the functionality of the emulator
+built into the kernel.
diff --git a/Documentation/arm/nwfpe/notes.rst b/Documentation/arm/nwfpe/notes.rst
new file mode 100644
index 000000000000..102e55af8439
--- /dev/null
+++ b/Documentation/arm/nwfpe/notes.rst
@@ -0,0 +1,32 @@
+Notes
+=====
+
+There seems to be a problem with exp(double) and our emulator.  I haven't
+been able to track it down yet.  This does not occur with the emulator
+supplied by Russell King.
+
+I also found one oddity in the emulator.  I don't think it is serious but
+will point it out.  The ARM calling conventions require floating point
+registers f4-f7 to be preserved over a function call.  The compiler quite
+often uses an stfe instruction to save f4 on the stack upon entry to a
+function, and an ldfe instruction to restore it before returning.
+
+I was looking at some code, that calculated a double result, stored it in f4
+then made a function call. Upon return from the function call the number in
+f4 had been converted to an extended value in the emulator.
+
+This is a side effect of the stfe instruction.  The double in f4 had to be
+converted to extended, then stored.  If an lfm/sfm combination had been used,
+then no conversion would occur.  This has performance considerations.  The
+result from the function call and f4 were used in a multiplication.  If the
+emulator sees a multiply of a double and extended, it promotes the double to
+extended, then does the multiply in extended precision.
+
+This code will cause this problem:
+
+double x, y, z;
+z = log(x)/log(y);
+
+The result of log(x) (a double) will be calculated, returned in f0, then
+moved to f4 to preserve it over the log(y) call.  The division will be done
+in extended precision, due to the stfe instruction used to save f4 in log(y).
diff --git a/Documentation/arm/nwfpe/nwfpe.rst b/Documentation/arm/nwfpe/nwfpe.rst
new file mode 100644
index 000000000000..35cd90dacbff
--- /dev/null
+++ b/Documentation/arm/nwfpe/nwfpe.rst
@@ -0,0 +1,74 @@
+Introduction
+============
+
+This directory contains the version 0.92 test release of the NetWinder
+Floating Point Emulator.
+
+The majority of the code was written by me, Scott Bambrough It is
+written in C, with a small number of routines in inline assembler
+where required.  It was written quickly, with a goal of implementing a
+working version of all the floating point instructions the compiler
+emits as the first target.  I have attempted to be as optimal as
+possible, but there remains much room for improvement.
+
+I have attempted to make the emulator as portable as possible.  One of
+the problems is with leading underscores on kernel symbols.  Elf
+kernels have no leading underscores, a.out compiled kernels do.  I
+have attempted to use the C_SYMBOL_NAME macro wherever this may be
+important.
+
+Another choice I made was in the file structure.  I have attempted to
+contain all operating system specific code in one module (fpmodule.*).
+All the other files contain emulator specific code.  This should allow
+others to port the emulator to NetBSD for instance relatively easily.
+
+The floating point operations are based on SoftFloat Release 2, by
+John Hauser.  SoftFloat is a software implementation of floating-point
+that conforms to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.  As many as four formats are supported: single precision,
+double precision, extended double precision, and quadruple precision.
+All operations required by the standard are implemented, except for
+conversions to and from decimal.  We use only the single precision,
+double precision and extended double precision formats.  The port of
+SoftFloat to the ARM was done by Phil Blundell, based on an earlier
+port of SoftFloat version 1 by Neil Carson for NetBSD/arm32.
+
+The file README.FPE contains a description of what has been implemented
+so far in the emulator.  The file TODO contains a information on what
+remains to be done, and other ideas for the emulator.
+
+Bug reports, comments, suggestions should be directed to me at
+<scottb@netwinder.org>.  General reports of "this program doesn't
+work correctly when your emulator is installed" are useful for
+determining that bugs still exist; but are virtually useless when
+attempting to isolate the problem.  Please report them, but don't
+expect quick action.  Bugs still exist.  The problem remains in isolating
+which instruction contains the bug.  Small programs illustrating a specific
+problem are a godsend.
+
+Legal Notices
+-------------
+
+The NetWinder Floating Point Emulator is free software.  Everything Rebel.com
+has written is provided under the GNU GPL.  See the file COPYING for copying
+conditions.  Excluded from the above is the SoftFloat code.  John Hauser's
+legal notice for SoftFloat is included below.
+
+-------------------------------------------------------------------------------
+
+SoftFloat Legal Notice
+
+SoftFloat was written by John R. Hauser.  This work was made possible in
+part by the International Computer Science Institute, located at Suite 600,
+1947 Center Street, Berkeley, California 94704.  Funding was partially
+provided by the National Science Foundation under grant MIP-9311980.  The
+original version of this code was written as part of a project to build
+a fixed-point vector processor in collaboration with the University of
+California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+-------------------------------------------------------------------------------
diff --git a/Documentation/arm/nwfpe/todo.rst b/Documentation/arm/nwfpe/todo.rst
new file mode 100644
index 000000000000..393f11b14540
--- /dev/null
+++ b/Documentation/arm/nwfpe/todo.rst
@@ -0,0 +1,72 @@
+TODO LIST
+=========
+
+::
+
+  POW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - power
+  RPW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse power
+  POL{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - polar angle (arctan2)
+
+  LOG{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base 10
+  LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e
+  EXP{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - exponent
+  SIN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - sine
+  COS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - cosine
+  TAN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - tangent
+  ASN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arcsine
+  ACS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arccosine
+  ATN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arctangent
+
+These are not implemented.  They are not currently issued by the compiler,
+and are handled by routines in libc.  These are not implemented by the FPA11
+hardware, but are handled by the floating point support code.  They should
+be implemented in future versions.
+
+There are a couple of ways to approach the implementation of these.  One
+method would be to use accurate table methods for these routines.  I have
+a couple of papers by S. Gal from IBM's research labs in Haifa, Israel that
+seem to promise extreme accuracy (in the order of 99.8%) and reasonable speed.
+These methods are used in GLIBC for some of the transcendental functions.
+
+Another approach, which I know little about is CORDIC.  This stands for
+Coordinate Rotation Digital Computer, and is a method of computing
+transcendental functions using mostly shifts and adds and a few
+multiplications and divisions.  The ARM excels at shifts and adds,
+so such a method could be promising, but requires more research to
+determine if it is feasible.
+
+Rounding Methods
+----------------
+
+The IEEE standard defines 4 rounding modes.  Round to nearest is the
+default, but rounding to + or - infinity or round to zero are also allowed.
+Many architectures allow the rounding mode to be specified by modifying bits
+in a control register.  Not so with the ARM FPA11 architecture.  To change
+the rounding mode one must specify it with each instruction.
+
+This has made porting some benchmarks difficult.  It is possible to
+introduce such a capability into the emulator.  The FPCR contains
+bits describing the rounding mode.  The emulator could be altered to
+examine a flag, which if set forced it to ignore the rounding mode in
+the instruction, and use the mode specified in the bits in the FPCR.
+
+This would require a method of getting/setting the flag, and the bits
+in the FPCR.  This requires a kernel call in ArmLinux, as WFC/RFC are
+supervisor only instructions.  If anyone has any ideas or comments I
+would like to hear them.
+
+NOTE:
+ pulled out from some docs on ARM floating point, specifically
+ for the Acorn FPE, but not limited to it:
+
+ The floating point control register (FPCR) may only be present in some
+ implementations: it is there to control the hardware in an implementation-
+ specific manner, for example to disable the floating point system.  The user
+ mode of the ARM is not permitted to use this register (since the right is
+ reserved to alter it between implementations) and the WFC and RFC
+ instructions will trap if tried in user mode.
+
+ Hence, the answer is yes, you could do this, but then you will run a high
+ risk of becoming isolated if and when hardware FP emulation comes out
+
+		-- Russell.
diff --git a/Documentation/arm/omap/dss.rst b/Documentation/arm/omap/dss.rst
new file mode 100644
index 000000000000..a40c4d9c717a
--- /dev/null
+++ b/Documentation/arm/omap/dss.rst
@@ -0,0 +1,372 @@
+=========================
+OMAP2/3 Display Subsystem
+=========================
+
+This is an almost total rewrite of the OMAP FB driver in drivers/video/omap
+(let's call it DSS1). The main differences between DSS1 and DSS2 are DSI,
+TV-out and multiple display support, but there are lots of small improvements
+also.
+
+The DSS2 driver (omapdss module) is in arch/arm/plat-omap/dss/, and the FB,
+panel and controller drivers are in drivers/video/omap2/. DSS1 and DSS2 live
+currently side by side, you can choose which one to use.
+
+Features
+--------
+
+Working and tested features include:
+
+- MIPI DPI (parallel) output
+- MIPI DSI output in command mode
+- MIPI DBI (RFBI) output
+- SDI output
+- TV output
+- All pieces can be compiled as a module or inside kernel
+- Use DISPC to update any of the outputs
+- Use CPU to update RFBI or DSI output
+- OMAP DISPC planes
+- RGB16, RGB24 packed, RGB24 unpacked
+- YUV2, UYVY
+- Scaling
+- Adjusting DSS FCK to find a good pixel clock
+- Use DSI DPLL to create DSS FCK
+
+Tested boards include:
+- OMAP3 SDP board
+- Beagle board
+- N810
+
+omapdss driver
+--------------
+
+The DSS driver does not itself have any support for Linux framebuffer, V4L or
+such like the current ones, but it has an internal kernel API that upper level
+drivers can use.
+
+The DSS driver models OMAP's overlays, overlay managers and displays in a
+flexible way to enable non-common multi-display configuration. In addition to
+modelling the hardware overlays, omapdss supports virtual overlays and overlay
+managers. These can be used when updating a display with CPU or system DMA.
+
+omapdss driver support for audio
+--------------------------------
+There exist several display technologies and standards that support audio as
+well. Hence, it is relevant to update the DSS device driver to provide an audio
+interface that may be used by an audio driver or any other driver interested in
+the functionality.
+
+The audio_enable function is intended to prepare the relevant
+IP for playback (e.g., enabling an audio FIFO, taking in/out of reset
+some IP, enabling companion chips, etc). It is intended to be called before
+audio_start. The audio_disable function performs the reverse operation and is
+intended to be called after audio_stop.
+
+While a given DSS device driver may support audio, it is possible that for
+certain configurations audio is not supported (e.g., an HDMI display using a
+VESA video timing). The audio_supported function is intended to query whether
+the current configuration of the display supports audio.
+
+The audio_config function is intended to configure all the relevant audio
+parameters of the display. In order to make the function independent of any
+specific DSS device driver, a struct omap_dss_audio is defined. Its purpose
+is to contain all the required parameters for audio configuration. At the
+moment, such structure contains pointers to IEC-60958 channel status word
+and CEA-861 audio infoframe structures. This should be enough to support
+HDMI and DisplayPort, as both are based on CEA-861 and IEC-60958.
+
+The audio_enable/disable, audio_config and audio_supported functions could be
+implemented as functions that may sleep. Hence, they should not be called
+while holding a spinlock or a readlock.
+
+The audio_start/audio_stop function is intended to effectively start/stop audio
+playback after the configuration has taken place. These functions are designed
+to be used in an atomic context. Hence, audio_start should return quickly and be
+called only after all the needed resources for audio playback (audio FIFOs,
+DMA channels, companion chips, etc) have been enabled to begin data transfers.
+audio_stop is designed to only stop the audio transfers. The resources used
+for playback are released using audio_disable.
+
+The enum omap_dss_audio_state may be used to help the implementations of
+the interface to keep track of the audio state. The initial state is _DISABLED;
+then, the state transitions to _CONFIGURED, and then, when it is ready to
+play audio, to _ENABLED. The state _PLAYING is used when the audio is being
+rendered.
+
+
+Panel and controller drivers
+----------------------------
+
+The drivers implement panel or controller specific functionality and are not
+usually visible to users except through omapfb driver.  They register
+themselves to the DSS driver.
+
+omapfb driver
+-------------
+
+The omapfb driver implements arbitrary number of standard linux framebuffers.
+These framebuffers can be routed flexibly to any overlays, thus allowing very
+dynamic display architecture.
+
+The driver exports some omapfb specific ioctls, which are compatible with the
+ioctls in the old driver.
+
+The rest of the non standard features are exported via sysfs. Whether the final
+implementation will use sysfs, or ioctls, is still open.
+
+V4L2 drivers
+------------
+
+V4L2 is being implemented in TI.
+
+From omapdss point of view the V4L2 drivers should be similar to framebuffer
+driver.
+
+Architecture
+--------------------
+
+Some clarification what the different components do:
+
+    - Framebuffer is a memory area inside OMAP's SRAM/SDRAM that contains the
+      pixel data for the image. Framebuffer has width and height and color
+      depth.
+    - Overlay defines where the pixels are read from and where they go on the
+      screen. The overlay may be smaller than framebuffer, thus displaying only
+      part of the framebuffer. The position of the overlay may be changed if
+      the overlay is smaller than the display.
+    - Overlay manager combines the overlays in to one image and feeds them to
+      display.
+    - Display is the actual physical display device.
+
+A framebuffer can be connected to multiple overlays to show the same pixel data
+on all of the overlays. Note that in this case the overlay input sizes must be
+the same, but, in case of video overlays, the output size can be different. Any
+framebuffer can be connected to any overlay.
+
+An overlay can be connected to one overlay manager. Also DISPC overlays can be
+connected only to DISPC overlay managers, and virtual overlays can be only
+connected to virtual overlays.
+
+An overlay manager can be connected to one display. There are certain
+restrictions which kinds of displays an overlay manager can be connected:
+
+    - DISPC TV overlay manager can be only connected to TV display.
+    - Virtual overlay managers can only be connected to DBI or DSI displays.
+    - DISPC LCD overlay manager can be connected to all displays, except TV
+      display.
+
+Sysfs
+-----
+The sysfs interface is mainly used for testing. I don't think sysfs
+interface is the best for this in the final version, but I don't quite know
+what would be the best interfaces for these things.
+
+The sysfs interface is divided to two parts: DSS and FB.
+
+/sys/class/graphics/fb? directory:
+mirror		0=off, 1=on
+rotate		Rotation 0-3 for 0, 90, 180, 270 degrees
+rotate_type	0 = DMA rotation, 1 = VRFB rotation
+overlays	List of overlay numbers to which framebuffer pixels go
+phys_addr	Physical address of the framebuffer
+virt_addr	Virtual address of the framebuffer
+size		Size of the framebuffer
+
+/sys/devices/platform/omapdss/overlay? directory:
+enabled		0=off, 1=on
+input_size	width,height (ie. the framebuffer size)
+manager		Destination overlay manager name
+name
+output_size	width,height
+position	x,y
+screen_width	width
+global_alpha   	global alpha 0-255 0=transparent 255=opaque
+
+/sys/devices/platform/omapdss/manager? directory:
+display				Destination display
+name
+alpha_blending_enabled		0=off, 1=on
+trans_key_enabled		0=off, 1=on
+trans_key_type			gfx-destination, video-source
+trans_key_value			transparency color key (RGB24)
+default_color			default background color (RGB24)
+
+/sys/devices/platform/omapdss/display? directory:
+
+=============== =============================================================
+ctrl_name	Controller name
+mirror		0=off, 1=on
+update_mode	0=off, 1=auto, 2=manual
+enabled		0=off, 1=on
+name
+rotate		Rotation 0-3 for 0, 90, 180, 270 degrees
+timings		Display timings (pixclock,xres/hfp/hbp/hsw,yres/vfp/vbp/vsw)
+		When writing, two special timings are accepted for tv-out:
+		"pal" and "ntsc"
+panel_name
+tear_elim	Tearing elimination 0=off, 1=on
+output_type	Output type (video encoder only): "composite" or "svideo"
+=============== =============================================================
+
+There are also some debugfs files at <debugfs>/omapdss/ which show information
+about clocks and registers.
+
+Examples
+--------
+
+The following definitions have been made for the examples below::
+
+	ovl0=/sys/devices/platform/omapdss/overlay0
+	ovl1=/sys/devices/platform/omapdss/overlay1
+	ovl2=/sys/devices/platform/omapdss/overlay2
+
+	mgr0=/sys/devices/platform/omapdss/manager0
+	mgr1=/sys/devices/platform/omapdss/manager1
+
+	lcd=/sys/devices/platform/omapdss/display0
+	dvi=/sys/devices/platform/omapdss/display1
+	tv=/sys/devices/platform/omapdss/display2
+
+	fb0=/sys/class/graphics/fb0
+	fb1=/sys/class/graphics/fb1
+	fb2=/sys/class/graphics/fb2
+
+Default setup on OMAP3 SDP
+--------------------------
+
+Here's the default setup on OMAP3 SDP board. All planes go to LCD. DVI
+and TV-out are not in use. The columns from left to right are:
+framebuffers, overlays, overlay managers, displays. Framebuffers are
+handled by omapfb, and the rest by the DSS::
+
+	FB0 --- GFX  -\            DVI
+	FB1 --- VID1 --+- LCD ---- LCD
+	FB2 --- VID2 -/   TV ----- TV
+
+Example: Switch from LCD to DVI
+-------------------------------
+
+::
+
+	w=`cat $dvi/timings | cut -d "," -f 2 | cut -d "/" -f 1`
+	h=`cat $dvi/timings | cut -d "," -f 3 | cut -d "/" -f 1`
+
+	echo "0" > $lcd/enabled
+	echo "" > $mgr0/display
+	fbset -fb /dev/fb0 -xres $w -yres $h -vxres $w -vyres $h
+	# at this point you have to switch the dvi/lcd dip-switch from the omap board
+	echo "dvi" > $mgr0/display
+	echo "1" > $dvi/enabled
+
+After this the configuration looks like:::
+
+	FB0 --- GFX  -\         -- DVI
+	FB1 --- VID1 --+- LCD -/   LCD
+	FB2 --- VID2 -/   TV ----- TV
+
+Example: Clone GFX overlay to LCD and TV
+----------------------------------------
+
+::
+
+	w=`cat $tv/timings | cut -d "," -f 2 | cut -d "/" -f 1`
+	h=`cat $tv/timings | cut -d "," -f 3 | cut -d "/" -f 1`
+
+	echo "0" > $ovl0/enabled
+	echo "0" > $ovl1/enabled
+
+	echo "" > $fb1/overlays
+	echo "0,1" > $fb0/overlays
+
+	echo "$w,$h" > $ovl1/output_size
+	echo "tv" > $ovl1/manager
+
+	echo "1" > $ovl0/enabled
+	echo "1" > $ovl1/enabled
+
+	echo "1" > $tv/enabled
+
+After this the configuration looks like (only relevant parts shown)::
+
+	FB0 +-- GFX  ---- LCD ---- LCD
+	\- VID1 ---- TV  ---- TV
+
+Misc notes
+----------
+
+OMAP FB allocates the framebuffer memory using the standard dma allocator. You
+can enable Contiguous Memory Allocator (CONFIG_CMA) to improve the dma
+allocator, and if CMA is enabled, you use "cma=" kernel parameter to increase
+the global memory area for CMA.
+
+Using DSI DPLL to generate pixel clock it is possible produce the pixel clock
+of 86.5MHz (max possible), and with that you get 1280x1024@57 output from DVI.
+
+Rotation and mirroring currently only supports RGB565 and RGB8888 modes. VRFB
+does not support mirroring.
+
+VRFB rotation requires much more memory than non-rotated framebuffer, so you
+probably need to increase your vram setting before using VRFB rotation. Also,
+many applications may not work with VRFB if they do not pay attention to all
+framebuffer parameters.
+
+Kernel boot arguments
+---------------------
+
+omapfb.mode=<display>:<mode>[,...]
+	- Default video mode for specified displays. For example,
+	  "dvi:800x400MR-24@60".  See drivers/video/modedb.c.
+	  There are also two special modes: "pal" and "ntsc" that
+	  can be used to tv out.
+
+omapfb.vram=<fbnum>:<size>[@<physaddr>][,...]
+	- VRAM allocated for a framebuffer. Normally omapfb allocates vram
+	  depending on the display size. With this you can manually allocate
+	  more or define the physical address of each framebuffer. For example,
+	  "1:4M" to allocate 4M for fb1.
+
+omapfb.debug=<y|n>
+	- Enable debug printing. You have to have OMAPFB debug support enabled
+	  in kernel config.
+
+omapfb.test=<y|n>
+	- Draw test pattern to framebuffer whenever framebuffer settings change.
+	  You need to have OMAPFB debug support enabled in kernel config.
+
+omapfb.vrfb=<y|n>
+	- Use VRFB rotation for all framebuffers.
+
+omapfb.rotate=<angle>
+	- Default rotation applied to all framebuffers.
+	  0 - 0 degree rotation
+	  1 - 90 degree rotation
+	  2 - 180 degree rotation
+	  3 - 270 degree rotation
+
+omapfb.mirror=<y|n>
+	- Default mirror for all framebuffers. Only works with DMA rotation.
+
+omapdss.def_disp=<display>
+	- Name of default display, to which all overlays will be connected.
+	  Common examples are "lcd" or "tv".
+
+omapdss.debug=<y|n>
+	- Enable debug printing. You have to have DSS debug support enabled in
+	  kernel config.
+
+TODO
+----
+
+DSS locking
+
+Error checking
+
+- Lots of checks are missing or implemented just as BUG()
+
+System DMA update for DSI
+
+- Can be used for RGB16 and RGB24P modes. Probably not for RGB24U (how
+  to skip the empty byte?)
+
+OMAP1 support
+
+- Not sure if needed
diff --git a/Documentation/arm/omap/index.rst b/Documentation/arm/omap/index.rst
new file mode 100644
index 000000000000..f1e9c11d9f9b
--- /dev/null
+++ b/Documentation/arm/omap/index.rst
@@ -0,0 +1,10 @@
+=======
+TI OMAP
+=======
+
+.. toctree::
+   :maxdepth: 1
+
+   omap
+   omap_pm
+   dss
diff --git a/Documentation/arm/omap/omap.rst b/Documentation/arm/omap/omap.rst
new file mode 100644
index 000000000000..f440c0f4613f
--- /dev/null
+++ b/Documentation/arm/omap/omap.rst
@@ -0,0 +1,18 @@
+============
+OMAP history
+============
+
+This file contains documentation for running mainline
+kernel on omaps.
+
+======		======================================================
+KERNEL		NEW DEPENDENCIES
+======		======================================================
+v4.3+		Update is needed for custom .config files to make sure
+		CONFIG_REGULATOR_PBIAS is enabled for MMC1 to work
+		properly.
+
+v4.18+		Update is needed for custom .config files to make sure
+		CONFIG_MMC_SDHCI_OMAP is enabled for all MMC instances
+		to work in DRA7 and K2G based boards.
+======		======================================================
diff --git a/Documentation/arm/omap/omap_pm.rst b/Documentation/arm/omap/omap_pm.rst
new file mode 100644
index 000000000000..a335e4c8ce2c
--- /dev/null
+++ b/Documentation/arm/omap/omap_pm.rst
@@ -0,0 +1,165 @@
+=====================
+The OMAP PM interface
+=====================
+
+This document describes the temporary OMAP PM interface.  Driver
+authors use these functions to communicate minimum latency or
+throughput constraints to the kernel power management code.
+Over time, the intention is to merge features from the OMAP PM
+interface into the Linux PM QoS code.
+
+Drivers need to express PM parameters which:
+
+- support the range of power management parameters present in the TI SRF;
+
+- separate the drivers from the underlying PM parameter
+  implementation, whether it is the TI SRF or Linux PM QoS or Linux
+  latency framework or something else;
+
+- specify PM parameters in terms of fundamental units, such as
+  latency and throughput, rather than units which are specific to OMAP
+  or to particular OMAP variants;
+
+- allow drivers which are shared with other architectures (e.g.,
+  DaVinci) to add these constraints in a way which won't affect non-OMAP
+  systems,
+
+- can be implemented immediately with minimal disruption of other
+  architectures.
+
+
+This document proposes the OMAP PM interface, including the following
+five power management functions for driver code:
+
+1. Set the maximum MPU wakeup latency::
+
+   (*pdata->set_max_mpu_wakeup_lat)(struct device *dev, unsigned long t)
+
+2. Set the maximum device wakeup latency::
+
+   (*pdata->set_max_dev_wakeup_lat)(struct device *dev, unsigned long t)
+
+3. Set the maximum system DMA transfer start latency (CORE pwrdm)::
+
+   (*pdata->set_max_sdma_lat)(struct device *dev, long t)
+
+4. Set the minimum bus throughput needed by a device::
+
+   (*pdata->set_min_bus_tput)(struct device *dev, u8 agent_id, unsigned long r)
+
+5. Return the number of times the device has lost context::
+
+   (*pdata->get_dev_context_loss_count)(struct device *dev)
+
+
+Further documentation for all OMAP PM interface functions can be
+found in arch/arm/plat-omap/include/mach/omap-pm.h.
+
+
+The OMAP PM layer is intended to be temporary
+---------------------------------------------
+
+The intention is that eventually the Linux PM QoS layer should support
+the range of power management features present in OMAP3.  As this
+happens, existing drivers using the OMAP PM interface can be modified
+to use the Linux PM QoS code; and the OMAP PM interface can disappear.
+
+
+Driver usage of the OMAP PM functions
+-------------------------------------
+
+As the 'pdata' in the above examples indicates, these functions are
+exposed to drivers through function pointers in driver .platform_data
+structures.  The function pointers are initialized by the `board-*.c`
+files to point to the corresponding OMAP PM functions:
+
+- set_max_dev_wakeup_lat will point to
+  omap_pm_set_max_dev_wakeup_lat(), etc.  Other architectures which do
+  not support these functions should leave these function pointers set
+  to NULL.  Drivers should use the following idiom::
+
+        if (pdata->set_max_dev_wakeup_lat)
+            (*pdata->set_max_dev_wakeup_lat)(dev, t);
+
+The most common usage of these functions will probably be to specify
+the maximum time from when an interrupt occurs, to when the device
+becomes accessible.  To accomplish this, driver writers should use the
+set_max_mpu_wakeup_lat() function to constrain the MPU wakeup
+latency, and the set_max_dev_wakeup_lat() function to constrain the
+device wakeup latency (from clk_enable() to accessibility).  For
+example::
+
+        /* Limit MPU wakeup latency */
+        if (pdata->set_max_mpu_wakeup_lat)
+            (*pdata->set_max_mpu_wakeup_lat)(dev, tc);
+
+        /* Limit device powerdomain wakeup latency */
+        if (pdata->set_max_dev_wakeup_lat)
+            (*pdata->set_max_dev_wakeup_lat)(dev, td);
+
+        /* total wakeup latency in this example: (tc + td) */
+
+The PM parameters can be overwritten by calling the function again
+with the new value.  The settings can be removed by calling the
+function with a t argument of -1 (except in the case of
+set_max_bus_tput(), which should be called with an r argument of 0).
+
+The fifth function above, omap_pm_get_dev_context_loss_count(),
+is intended as an optimization to allow drivers to determine whether the
+device has lost its internal context.  If context has been lost, the
+driver must restore its internal context before proceeding.
+
+
+Other specialized interface functions
+-------------------------------------
+
+The five functions listed above are intended to be usable by any
+device driver.  DSPBridge and CPUFreq have a few special requirements.
+DSPBridge expresses target DSP performance levels in terms of OPP IDs.
+CPUFreq expresses target MPU performance levels in terms of MPU
+frequency.  The OMAP PM interface contains functions for these
+specialized cases to convert that input information (OPPs/MPU
+frequency) into the form that the underlying power management
+implementation needs:
+
+6. `(*pdata->dsp_get_opp_table)(void)`
+
+7. `(*pdata->dsp_set_min_opp)(u8 opp_id)`
+
+8. `(*pdata->dsp_get_opp)(void)`
+
+9. `(*pdata->cpu_get_freq_table)(void)`
+
+10. `(*pdata->cpu_set_freq)(unsigned long f)`
+
+11. `(*pdata->cpu_get_freq)(void)`
+
+Customizing OPP for platform
+============================
+Defining CONFIG_PM should enable OPP layer for the silicon
+and the registration of OPP table should take place automatically.
+However, in special cases, the default OPP table may need to be
+tweaked, for e.g.:
+
+ * enable default OPPs which are disabled by default, but which
+   could be enabled on a platform
+ * Disable an unsupported OPP on the platform
+ * Define and add a custom opp table entry
+   in these cases, the board file needs to do additional steps as follows:
+
+arch/arm/mach-omapx/board-xyz.c::
+
+	#include "pm.h"
+	....
+	static void __init omap_xyz_init_irq(void)
+	{
+		....
+		/* Initialize the default table */
+		omapx_opp_init();
+		/* Do customization to the defaults */
+		....
+	}
+
+NOTE:
+  omapx_opp_init will be omap3_opp_init or as required
+  based on the omap family.
diff --git a/Documentation/arm/porting.rst b/Documentation/arm/porting.rst
new file mode 100644
index 000000000000..bd21958bdb2d
--- /dev/null
+++ b/Documentation/arm/porting.rst
@@ -0,0 +1,137 @@
+=======
+Porting
+=======
+
+Taken from list archive at http://lists.arm.linux.org.uk/pipermail/linux-arm-kernel/2001-July/004064.html
+
+Initial definitions
+-------------------
+
+The following symbol definitions rely on you knowing the translation that
+__virt_to_phys() does for your machine.  This macro converts the passed
+virtual address to a physical address.  Normally, it is simply:
+
+		phys = virt - PAGE_OFFSET + PHYS_OFFSET
+
+
+Decompressor Symbols
+--------------------
+
+ZTEXTADDR
+	Start address of decompressor.  There's no point in talking about
+	virtual or physical addresses here, since the MMU will be off at
+	the time when you call the decompressor code.  You normally call
+	the kernel at this address to start it booting.  This doesn't have
+	to be located in RAM, it can be in flash or other read-only or
+	read-write addressable medium.
+
+ZBSSADDR
+	Start address of zero-initialised work area for the decompressor.
+	This must be pointing at RAM.  The decompressor will zero initialise
+	this for you.  Again, the MMU will be off.
+
+ZRELADDR
+	This is the address where the decompressed kernel will be written,
+	and eventually executed.  The following constraint must be valid:
+
+		__virt_to_phys(TEXTADDR) == ZRELADDR
+
+	The initial part of the kernel is carefully coded to be position
+	independent.
+
+INITRD_PHYS
+	Physical address to place the initial RAM disk.  Only relevant if
+	you are using the bootpImage stuff (which only works on the old
+	struct param_struct).
+
+INITRD_VIRT
+	Virtual address of the initial RAM disk.  The following  constraint
+	must be valid:
+
+		__virt_to_phys(INITRD_VIRT) == INITRD_PHYS
+
+PARAMS_PHYS
+	Physical address of the struct param_struct or tag list, giving the
+	kernel various parameters about its execution environment.
+
+
+Kernel Symbols
+--------------
+
+PHYS_OFFSET
+	Physical start address of the first bank of RAM.
+
+PAGE_OFFSET
+	Virtual start address of the first bank of RAM.  During the kernel
+	boot phase, virtual address PAGE_OFFSET will be mapped to physical
+	address PHYS_OFFSET, along with any other mappings you supply.
+	This should be the same value as TASK_SIZE.
+
+TASK_SIZE
+	The maximum size of a user process in bytes.  Since user space
+	always starts at zero, this is the maximum address that a user
+	process can access+1.  The user space stack grows down from this
+	address.
+
+	Any virtual address below TASK_SIZE is deemed to be user process
+	area, and therefore managed dynamically on a process by process
+	basis by the kernel.  I'll call this the user segment.
+
+	Anything above TASK_SIZE is common to all processes.  I'll call
+	this the kernel segment.
+
+	(In other words, you can't put IO mappings below TASK_SIZE, and
+	hence PAGE_OFFSET).
+
+TEXTADDR
+	Virtual start address of kernel, normally PAGE_OFFSET + 0x8000.
+	This is where the kernel image ends up.  With the latest kernels,
+	it must be located at 32768 bytes into a 128MB region.  Previous
+	kernels placed a restriction of 256MB here.
+
+DATAADDR
+	Virtual address for the kernel data segment.  Must not be defined
+	when using the decompressor.
+
+VMALLOC_START / VMALLOC_END
+	Virtual addresses bounding the vmalloc() area.  There must not be
+	any static mappings in this area; vmalloc will overwrite them.
+	The addresses must also be in the kernel segment (see above).
+	Normally, the vmalloc() area starts VMALLOC_OFFSET bytes above the
+	last virtual RAM address (found using variable high_memory).
+
+VMALLOC_OFFSET
+	Offset normally incorporated into VMALLOC_START to provide a hole
+	between virtual RAM and the vmalloc area.  We do this to allow
+	out of bounds memory accesses (eg, something writing off the end
+	of the mapped memory map) to be caught.  Normally set to 8MB.
+
+Architecture Specific Macros
+----------------------------
+
+BOOT_MEM(pram,pio,vio)
+	`pram` specifies the physical start address of RAM.  Must always
+	be present, and should be the same as PHYS_OFFSET.
+
+	`pio` is the physical address of an 8MB region containing IO for
+	use with the debugging macros in arch/arm/kernel/debug-armv.S.
+
+	`vio` is the virtual address of the 8MB debugging region.
+
+	It is expected that the debugging region will be re-initialised
+	by the architecture specific code later in the code (via the
+	MAPIO function).
+
+BOOT_PARAMS
+	Same as, and see PARAMS_PHYS.
+
+FIXUP(func)
+	Machine specific fixups, run before memory subsystems have been
+	initialised.
+
+MAPIO(func)
+	Machine specific function to map IO areas (including the debug
+	region above).
+
+INITIRQ(func)
+	Machine specific function to initialise interrupts.
diff --git a/Documentation/arm/pxa/mfp.rst b/Documentation/arm/pxa/mfp.rst
new file mode 100644
index 000000000000..ac34e5d7ee44
--- /dev/null
+++ b/Documentation/arm/pxa/mfp.rst
@@ -0,0 +1,288 @@
+==============================================
+MFP Configuration for PXA2xx/PXA3xx Processors
+==============================================
+
+			Eric Miao <eric.miao@marvell.com>
+
+MFP stands for Multi-Function Pin, which is the pin-mux logic on PXA3xx and
+later PXA series processors.  This document describes the existing MFP API,
+and how board/platform driver authors could make use of it.
+
+Basic Concept
+=============
+
+Unlike the GPIO alternate function settings on PXA25x and PXA27x, a new MFP
+mechanism is introduced from PXA3xx to completely move the pin-mux functions
+out of the GPIO controller. In addition to pin-mux configurations, the MFP
+also controls the low power state, driving strength, pull-up/down and event
+detection of each pin.  Below is a diagram of internal connections between
+the MFP logic and the remaining SoC peripherals::
+
+ +--------+
+ |        |--(GPIO19)--+
+ |  GPIO  |            |
+ |        |--(GPIO...) |
+ +--------+            |
+                       |       +---------+
+ +--------+            +------>|         |
+ |  PWM2  |--(PWM_OUT)-------->|   MFP   |
+ +--------+            +------>|         |-------> to external PAD
+                       | +---->|         |
+ +--------+            | | +-->|         |
+ |  SSP2  |---(TXD)----+ | |   +---------+
+ +--------+              | |
+                         | |
+ +--------+              | |
+ | Keypad |--(MKOUT4)----+ |
+ +--------+                |
+                           |
+ +--------+                |
+ |  UART2 |---(TXD)--------+
+ +--------+
+
+NOTE: the external pad is named as MFP_PIN_GPIO19, it doesn't necessarily
+mean it's dedicated for GPIO19, only as a hint that internally this pin
+can be routed from GPIO19 of the GPIO controller.
+
+To better understand the change from PXA25x/PXA27x GPIO alternate function
+to this new MFP mechanism, here are several key points:
+
+  1. GPIO controller on PXA3xx is now a dedicated controller, same as other
+     internal controllers like PWM, SSP and UART, with 128 internal signals
+     which can be routed to external through one or more MFPs (e.g. GPIO<0>
+     can be routed through either MFP_PIN_GPIO0 as well as MFP_PIN_GPIO0_2,
+     see arch/arm/mach-pxa/mfp-pxa300.h)
+
+  2. Alternate function configuration is removed from this GPIO controller,
+     the remaining functions are pure GPIO-specific, i.e.
+
+       - GPIO signal level control
+       - GPIO direction control
+       - GPIO level change detection
+
+  3. Low power state for each pin is now controlled by MFP, this means the
+     PGSRx registers on PXA2xx are now useless on PXA3xx
+
+  4. Wakeup detection is now controlled by MFP, PWER does not control the
+     wakeup from GPIO(s) any more, depending on the sleeping state, ADxER
+     (as defined in pxa3xx-regs.h) controls the wakeup from MFP
+
+NOTE: with such a clear separation of MFP and GPIO, by GPIO<xx> we normally
+mean it is a GPIO signal, and by MFP<xxx> or pin xxx, we mean a physical
+pad (or ball).
+
+MFP API Usage
+=============
+
+For board code writers, here are some guidelines:
+
+1. include ONE of the following header files in your <board>.c:
+
+   - #include "mfp-pxa25x.h"
+   - #include "mfp-pxa27x.h"
+   - #include "mfp-pxa300.h"
+   - #include "mfp-pxa320.h"
+   - #include "mfp-pxa930.h"
+
+   NOTE: only one file in your <board>.c, depending on the processors used,
+   because pin configuration definitions may conflict in these file (i.e.
+   same name, different meaning and settings on different processors). E.g.
+   for zylonite platform, which support both PXA300/PXA310 and PXA320, two
+   separate files are introduced: zylonite_pxa300.c and zylonite_pxa320.c
+   (in addition to handle MFP configuration differences, they also handle
+   the other differences between the two combinations).
+
+   NOTE: PXA300 and PXA310 are almost identical in pin configurations (with
+   PXA310 supporting some additional ones), thus the difference is actually
+   covered in a single mfp-pxa300.h.
+
+2. prepare an array for the initial pin configurations, e.g.::
+
+     static unsigned long mainstone_pin_config[] __initdata = {
+	/* Chip Select */
+	GPIO15_nCS_1,
+
+	/* LCD - 16bpp Active TFT */
+	GPIOxx_TFT_LCD_16BPP,
+	GPIO16_PWM0_OUT,	/* Backlight */
+
+	/* MMC */
+	GPIO32_MMC_CLK,
+	GPIO112_MMC_CMD,
+	GPIO92_MMC_DAT_0,
+	GPIO109_MMC_DAT_1,
+	GPIO110_MMC_DAT_2,
+	GPIO111_MMC_DAT_3,
+
+	...
+
+	/* GPIO */
+	GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH,
+     };
+
+   a) once the pin configurations are passed to pxa{2xx,3xx}_mfp_config(),
+   and written to the actual registers, they are useless and may discard,
+   adding '__initdata' will help save some additional bytes here.
+
+   b) when there is only one possible pin configurations for a component,
+   some simplified definitions can be used, e.g. GPIOxx_TFT_LCD_16BPP on
+   PXA25x and PXA27x processors
+
+   c) if by board design, a pin can be configured to wake up the system
+   from low power state, it can be 'OR'ed with any of:
+
+      WAKEUP_ON_EDGE_BOTH
+      WAKEUP_ON_EDGE_RISE
+      WAKEUP_ON_EDGE_FALL
+      WAKEUP_ON_LEVEL_HIGH - specifically for enabling of keypad GPIOs,
+
+   to indicate that this pin has the capability of wake-up the system,
+   and on which edge(s). This, however, doesn't necessarily mean the
+   pin _will_ wakeup the system, it will only when set_irq_wake() is
+   invoked with the corresponding GPIO IRQ (GPIO_IRQ(xx) or gpio_to_irq())
+   and eventually calls gpio_set_wake() for the actual register setting.
+
+   d) although PXA3xx MFP supports edge detection on each pin, the
+   internal logic will only wakeup the system when those specific bits
+   in ADxER registers are set, which can be well mapped to the
+   corresponding peripheral, thus set_irq_wake() can be called with
+   the peripheral IRQ to enable the wakeup.
+
+
+MFP on PXA3xx
+=============
+
+Every external I/O pad on PXA3xx (excluding those for special purpose) has
+one MFP logic associated, and is controlled by one MFP register (MFPR).
+
+The MFPR has the following bit definitions (for PXA300/PXA310/PXA320)::
+
+ 31                        16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+  +-------------------------+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+  |         RESERVED        |PS|PU|PD|  DRIVE |SS|SD|SO|EC|EF|ER|--| AF_SEL |
+  +-------------------------+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+
+  Bit 3:   RESERVED
+  Bit 4:   EDGE_RISE_EN - enable detection of rising edge on this pin
+  Bit 5:   EDGE_FALL_EN - enable detection of falling edge on this pin
+  Bit 6:   EDGE_CLEAR   - disable edge detection on this pin
+  Bit 7:   SLEEP_OE_N   - enable outputs during low power modes
+  Bit 8:   SLEEP_DATA   - output data on the pin during low power modes
+  Bit 9:   SLEEP_SEL    - selection control for low power modes signals
+  Bit 13:  PULLDOWN_EN  - enable the internal pull-down resistor on this pin
+  Bit 14:  PULLUP_EN    - enable the internal pull-up resistor on this pin
+  Bit 15:  PULL_SEL     - pull state controlled by selected alternate function
+                          (0) or by PULL{UP,DOWN}_EN bits (1)
+
+  Bit 0 - 2: AF_SEL - alternate function selection, 8 possibilities, from 0-7
+  Bit 10-12: DRIVE  - drive strength and slew rate
+			0b000 - fast 1mA
+			0b001 - fast 2mA
+			0b002 - fast 3mA
+			0b003 - fast 4mA
+			0b004 - slow 6mA
+			0b005 - fast 6mA
+			0b006 - slow 10mA
+			0b007 - fast 10mA
+
+MFP Design for PXA2xx/PXA3xx
+============================
+
+Due to the difference of pin-mux handling between PXA2xx and PXA3xx, a unified
+MFP API is introduced to cover both series of processors.
+
+The basic idea of this design is to introduce definitions for all possible pin
+configurations, these definitions are processor and platform independent, and
+the actual API invoked to convert these definitions into register settings and
+make them effective there-after.
+
+Files Involved
+--------------
+
+  - arch/arm/mach-pxa/include/mach/mfp.h
+
+  for
+    1. Unified pin definitions - enum constants for all configurable pins
+    2. processor-neutral bit definitions for a possible MFP configuration
+
+  - arch/arm/mach-pxa/mfp-pxa3xx.h
+
+  for PXA3xx specific MFPR register bit definitions and PXA3xx common pin
+  configurations
+
+  - arch/arm/mach-pxa/mfp-pxa2xx.h
+
+  for PXA2xx specific definitions and PXA25x/PXA27x common pin configurations
+
+  - arch/arm/mach-pxa/mfp-pxa25x.h
+    arch/arm/mach-pxa/mfp-pxa27x.h
+    arch/arm/mach-pxa/mfp-pxa300.h
+    arch/arm/mach-pxa/mfp-pxa320.h
+    arch/arm/mach-pxa/mfp-pxa930.h
+
+  for processor specific definitions
+
+  - arch/arm/mach-pxa/mfp-pxa3xx.c
+  - arch/arm/mach-pxa/mfp-pxa2xx.c
+
+  for implementation of the pin configuration to take effect for the actual
+  processor.
+
+Pin Configuration
+-----------------
+
+  The following comments are copied from mfp.h (see the actual source code
+  for most updated info)::
+
+    /*
+     * a possible MFP configuration is represented by a 32-bit integer
+     *
+     * bit  0.. 9 - MFP Pin Number (1024 Pins Maximum)
+     * bit 10..12 - Alternate Function Selection
+     * bit 13..15 - Drive Strength
+     * bit 16..18 - Low Power Mode State
+     * bit 19..20 - Low Power Mode Edge Detection
+     * bit 21..22 - Run Mode Pull State
+     *
+     * to facilitate the definition, the following macros are provided
+     *
+     * MFP_CFG_DEFAULT - default MFP configuration value, with
+     * 		  alternate function = 0,
+     * 		  drive strength = fast 3mA (MFP_DS03X)
+     * 		  low power mode = default
+     * 		  edge detection = none
+     *
+     * MFP_CFG	- default MFPR value with alternate function
+     * MFP_CFG_DRV	- default MFPR value with alternate function and
+     * 		  pin drive strength
+     * MFP_CFG_LPM	- default MFPR value with alternate function and
+     * 		  low power mode
+     * MFP_CFG_X	- default MFPR value with alternate function,
+     * 		  pin drive strength and low power mode
+     */
+
+   Examples of pin configurations are::
+
+     #define GPIO94_SSP3_RXD		MFP_CFG_X(GPIO94, AF1, DS08X, FLOAT)
+
+   which reads GPIO94 can be configured as SSP3_RXD, with alternate function
+   selection of 1, driving strength of 0b101, and a float state in low power
+   modes.
+
+   NOTE: this is the default setting of this pin being configured as SSP3_RXD
+   which can be modified a bit in board code, though it is not recommended to
+   do so, simply because this default setting is usually carefully encoded,
+   and is supposed to work in most cases.
+
+Register Settings
+-----------------
+
+   Register settings on PXA3xx for a pin configuration is actually very
+   straight-forward, most bits can be converted directly into MFPR value
+   in a easier way. Two sets of MFPR values are calculated: the run-time
+   ones and the low power mode ones, to allow different settings.
+
+   The conversion from a generic pin configuration to the actual register
+   settings on PXA2xx is a bit complicated: many registers are involved,
+   including GAFRx, GPDRx, PGSRx, PWER, PKWR, PFER and PRER. Please see
+   mfp-pxa2xx.c for how the conversion is made.
diff --git a/Documentation/arm/pxa/mfp.txt b/Documentation/arm/pxa/mfp.txt
deleted file mode 100644
index 0b7cab978c02..000000000000
--- a/Documentation/arm/pxa/mfp.txt
+++ /dev/null
@@ -1,286 +0,0 @@
-                 MFP Configuration for PXA2xx/PXA3xx Processors
-
-			Eric Miao <eric.miao@marvell.com>
-
-MFP stands for Multi-Function Pin, which is the pin-mux logic on PXA3xx and
-later PXA series processors.  This document describes the existing MFP API,
-and how board/platform driver authors could make use of it.
-
- Basic Concept
-===============
-
-Unlike the GPIO alternate function settings on PXA25x and PXA27x, a new MFP
-mechanism is introduced from PXA3xx to completely move the pin-mux functions
-out of the GPIO controller. In addition to pin-mux configurations, the MFP
-also controls the low power state, driving strength, pull-up/down and event
-detection of each pin.  Below is a diagram of internal connections between
-the MFP logic and the remaining SoC peripherals:
-
- +--------+
- |        |--(GPIO19)--+
- |  GPIO  |            |
- |        |--(GPIO...) |
- +--------+            |
-                       |       +---------+
- +--------+            +------>|         |
- |  PWM2  |--(PWM_OUT)-------->|   MFP   |
- +--------+            +------>|         |-------> to external PAD
-                       | +---->|         |
- +--------+            | | +-->|         |
- |  SSP2  |---(TXD)----+ | |   +---------+
- +--------+              | |
-                         | |
- +--------+              | |
- | Keypad |--(MKOUT4)----+ |
- +--------+                |
-                           |
- +--------+                |
- |  UART2 |---(TXD)--------+
- +--------+
-
-NOTE: the external pad is named as MFP_PIN_GPIO19, it doesn't necessarily
-mean it's dedicated for GPIO19, only as a hint that internally this pin
-can be routed from GPIO19 of the GPIO controller.
-
-To better understand the change from PXA25x/PXA27x GPIO alternate function
-to this new MFP mechanism, here are several key points:
-
-  1. GPIO controller on PXA3xx is now a dedicated controller, same as other
-     internal controllers like PWM, SSP and UART, with 128 internal signals
-     which can be routed to external through one or more MFPs (e.g. GPIO<0>
-     can be routed through either MFP_PIN_GPIO0 as well as MFP_PIN_GPIO0_2,
-     see arch/arm/mach-pxa/mfp-pxa300.h)
-
-  2. Alternate function configuration is removed from this GPIO controller,
-     the remaining functions are pure GPIO-specific, i.e.
-
-       - GPIO signal level control
-       - GPIO direction control
-       - GPIO level change detection
-
-  3. Low power state for each pin is now controlled by MFP, this means the
-     PGSRx registers on PXA2xx are now useless on PXA3xx
-
-  4. Wakeup detection is now controlled by MFP, PWER does not control the
-     wakeup from GPIO(s) any more, depending on the sleeping state, ADxER
-     (as defined in pxa3xx-regs.h) controls the wakeup from MFP
-
-NOTE: with such a clear separation of MFP and GPIO, by GPIO<xx> we normally
-mean it is a GPIO signal, and by MFP<xxx> or pin xxx, we mean a physical
-pad (or ball).
-
- MFP API Usage
-===============
-
-For board code writers, here are some guidelines:
-
-1. include ONE of the following header files in your <board>.c:
-
-   - #include "mfp-pxa25x.h"
-   - #include "mfp-pxa27x.h"
-   - #include "mfp-pxa300.h"
-   - #include "mfp-pxa320.h"
-   - #include "mfp-pxa930.h"
-
-   NOTE: only one file in your <board>.c, depending on the processors used,
-   because pin configuration definitions may conflict in these file (i.e.
-   same name, different meaning and settings on different processors). E.g.
-   for zylonite platform, which support both PXA300/PXA310 and PXA320, two
-   separate files are introduced: zylonite_pxa300.c and zylonite_pxa320.c
-   (in addition to handle MFP configuration differences, they also handle
-   the other differences between the two combinations).
-
-   NOTE: PXA300 and PXA310 are almost identical in pin configurations (with
-   PXA310 supporting some additional ones), thus the difference is actually
-   covered in a single mfp-pxa300.h.
-
-2. prepare an array for the initial pin configurations, e.g.:
-
-   static unsigned long mainstone_pin_config[] __initdata = {
-	/* Chip Select */
-	GPIO15_nCS_1,
-
-	/* LCD - 16bpp Active TFT */
-	GPIOxx_TFT_LCD_16BPP,
-	GPIO16_PWM0_OUT,	/* Backlight */
-
-	/* MMC */
-	GPIO32_MMC_CLK,
-	GPIO112_MMC_CMD,
-	GPIO92_MMC_DAT_0,
-	GPIO109_MMC_DAT_1,
-	GPIO110_MMC_DAT_2,
-	GPIO111_MMC_DAT_3,
-
-	...
-
-	/* GPIO */
-	GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH,
-   };
-
-   a) once the pin configurations are passed to pxa{2xx,3xx}_mfp_config(),
-   and written to the actual registers, they are useless and may discard,
-   adding '__initdata' will help save some additional bytes here.
-
-   b) when there is only one possible pin configurations for a component,
-   some simplified definitions can be used, e.g. GPIOxx_TFT_LCD_16BPP on
-   PXA25x and PXA27x processors
-
-   c) if by board design, a pin can be configured to wake up the system
-   from low power state, it can be 'OR'ed with any of:
-
-      WAKEUP_ON_EDGE_BOTH
-      WAKEUP_ON_EDGE_RISE
-      WAKEUP_ON_EDGE_FALL
-      WAKEUP_ON_LEVEL_HIGH - specifically for enabling of keypad GPIOs,
-
-   to indicate that this pin has the capability of wake-up the system,
-   and on which edge(s). This, however, doesn't necessarily mean the
-   pin _will_ wakeup the system, it will only when set_irq_wake() is
-   invoked with the corresponding GPIO IRQ (GPIO_IRQ(xx) or gpio_to_irq())
-   and eventually calls gpio_set_wake() for the actual register setting.
-
-   d) although PXA3xx MFP supports edge detection on each pin, the
-   internal logic will only wakeup the system when those specific bits
-   in ADxER registers are set, which can be well mapped to the
-   corresponding peripheral, thus set_irq_wake() can be called with 
-   the peripheral IRQ to enable the wakeup.
-
-
- MFP on PXA3xx
-===============
-
-Every external I/O pad on PXA3xx (excluding those for special purpose) has
-one MFP logic associated, and is controlled by one MFP register (MFPR).
-
-The MFPR has the following bit definitions (for PXA300/PXA310/PXA320):
-
- 31                        16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-  +-------------------------+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
-  |         RESERVED        |PS|PU|PD|  DRIVE |SS|SD|SO|EC|EF|ER|--| AF_SEL |
-  +-------------------------+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
-
-  Bit 3:   RESERVED
-  Bit 4:   EDGE_RISE_EN - enable detection of rising edge on this pin
-  Bit 5:   EDGE_FALL_EN - enable detection of falling edge on this pin
-  Bit 6:   EDGE_CLEAR   - disable edge detection on this pin
-  Bit 7:   SLEEP_OE_N   - enable outputs during low power modes
-  Bit 8:   SLEEP_DATA   - output data on the pin during low power modes
-  Bit 9:   SLEEP_SEL    - selection control for low power modes signals
-  Bit 13:  PULLDOWN_EN  - enable the internal pull-down resistor on this pin
-  Bit 14:  PULLUP_EN    - enable the internal pull-up resistor on this pin
-  Bit 15:  PULL_SEL     - pull state controlled by selected alternate function
-                          (0) or by PULL{UP,DOWN}_EN bits (1)
-
-  Bit 0 - 2: AF_SEL - alternate function selection, 8 possibilities, from 0-7
-  Bit 10-12: DRIVE  - drive strength and slew rate
-			0b000 - fast 1mA
-			0b001 - fast 2mA
-			0b002 - fast 3mA
-			0b003 - fast 4mA
-			0b004 - slow 6mA
-			0b005 - fast 6mA
-			0b006 - slow 10mA
-			0b007 - fast 10mA
-
- MFP Design for PXA2xx/PXA3xx
-==============================
-
-Due to the difference of pin-mux handling between PXA2xx and PXA3xx, a unified
-MFP API is introduced to cover both series of processors.
-
-The basic idea of this design is to introduce definitions for all possible pin
-configurations, these definitions are processor and platform independent, and
-the actual API invoked to convert these definitions into register settings and
-make them effective there-after.
-
-  Files Involved
-  --------------
-
-  - arch/arm/mach-pxa/include/mach/mfp.h
-  
-  for
-    1. Unified pin definitions - enum constants for all configurable pins
-    2. processor-neutral bit definitions for a possible MFP configuration
-
-  - arch/arm/mach-pxa/mfp-pxa3xx.h
-
-  for PXA3xx specific MFPR register bit definitions and PXA3xx common pin
-  configurations
-
-  - arch/arm/mach-pxa/mfp-pxa2xx.h
-
-  for PXA2xx specific definitions and PXA25x/PXA27x common pin configurations
-
-  - arch/arm/mach-pxa/mfp-pxa25x.h
-    arch/arm/mach-pxa/mfp-pxa27x.h
-    arch/arm/mach-pxa/mfp-pxa300.h
-    arch/arm/mach-pxa/mfp-pxa320.h
-    arch/arm/mach-pxa/mfp-pxa930.h
-
-  for processor specific definitions
-
-  - arch/arm/mach-pxa/mfp-pxa3xx.c
-  - arch/arm/mach-pxa/mfp-pxa2xx.c
-
-  for implementation of the pin configuration to take effect for the actual
-  processor.
-
-  Pin Configuration
-  -----------------
-
-  The following comments are copied from mfp.h (see the actual source code
-  for most updated info)
-  
-  /*
-   * a possible MFP configuration is represented by a 32-bit integer
-   *
-   * bit  0.. 9 - MFP Pin Number (1024 Pins Maximum)
-   * bit 10..12 - Alternate Function Selection
-   * bit 13..15 - Drive Strength
-   * bit 16..18 - Low Power Mode State
-   * bit 19..20 - Low Power Mode Edge Detection
-   * bit 21..22 - Run Mode Pull State
-   *
-   * to facilitate the definition, the following macros are provided
-   *
-   * MFP_CFG_DEFAULT - default MFP configuration value, with
-   * 		  alternate function = 0,
-   * 		  drive strength = fast 3mA (MFP_DS03X)
-   * 		  low power mode = default
-   * 		  edge detection = none
-   *
-   * MFP_CFG	- default MFPR value with alternate function
-   * MFP_CFG_DRV	- default MFPR value with alternate function and
-   * 		  pin drive strength
-   * MFP_CFG_LPM	- default MFPR value with alternate function and
-   * 		  low power mode
-   * MFP_CFG_X	- default MFPR value with alternate function,
-   * 		  pin drive strength and low power mode
-   */
-
-   Examples of pin configurations are:
-
-   #define GPIO94_SSP3_RXD		MFP_CFG_X(GPIO94, AF1, DS08X, FLOAT)
-
-   which reads GPIO94 can be configured as SSP3_RXD, with alternate function
-   selection of 1, driving strength of 0b101, and a float state in low power
-   modes.
-
-   NOTE: this is the default setting of this pin being configured as SSP3_RXD
-   which can be modified a bit in board code, though it is not recommended to
-   do so, simply because this default setting is usually carefully encoded,
-   and is supposed to work in most cases.
-
-  Register Settings
-  -----------------
-
-   Register settings on PXA3xx for a pin configuration is actually very
-   straight-forward, most bits can be converted directly into MFPR value
-   in a easier way. Two sets of MFPR values are calculated: the run-time
-   ones and the low power mode ones, to allow different settings.
-
-   The conversion from a generic pin configuration to the actual register
-   settings on PXA2xx is a bit complicated: many registers are involved,
-   including GAFRx, GPDRx, PGSRx, PWER, PKWR, PFER and PRER. Please see
-   mfp-pxa2xx.c for how the conversion is made.
diff --git a/Documentation/arm/sa1100/adsbitsy.rst b/Documentation/arm/sa1100/adsbitsy.rst
new file mode 100644
index 000000000000..c179cb26b682
--- /dev/null
+++ b/Documentation/arm/sa1100/adsbitsy.rst
@@ -0,0 +1,51 @@
+===============================
+ADS Bitsy Single Board Computer
+===============================
+
+(It is different from Bitsy(iPAQ) of Compaq)
+
+For more details, contact Applied Data Systems or see
+http://www.applieddata.net/products.html
+
+The Linux support for this product has been provided by
+Woojung Huh <whuh@applieddata.net>
+
+Use 'make adsbitsy_config' before any 'make config'.
+This will set up defaults for ADS Bitsy support.
+
+The kernel zImage is linked to be loaded and executed at 0xc0400000.
+
+Linux can  be used with the ADS BootLoader that ships with the
+newer rev boards. See their documentation on how to load Linux.
+
+Supported peripherals
+=====================
+
+- SA1100 LCD frame buffer (8/16bpp...sort of)
+- SA1111 USB Master
+- SA1100 serial port
+- pcmcia, compact flash
+- touchscreen(ucb1200)
+- console on LCD screen
+- serial ports (ttyS[0-2])
+  - ttyS0 is default for serial console
+
+To do
+=====
+
+- everything else!  :-)
+
+Notes
+=====
+
+- The flash on board is divided into 3 partitions.
+  You should be careful to use flash on board.
+  Its partition is different from GraphicsClient Plus and GraphicsMaster
+
+- 16bpp mode requires a different cable than what ships with the board.
+  Contact ADS or look through the manual to wire your own. Currently,
+  if you compile with 16bit mode support and switch into a lower bpp
+  mode, the timing is off so the image is corrupted.  This will be
+  fixed soon.
+
+Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
diff --git a/Documentation/arm/sa1100/assabet.rst b/Documentation/arm/sa1100/assabet.rst
new file mode 100644
index 000000000000..3e704831c311
--- /dev/null
+++ b/Documentation/arm/sa1100/assabet.rst
@@ -0,0 +1,301 @@
+============================================
+The Intel Assabet (SA-1110 evaluation) board
+============================================
+
+Please see:
+http://developer.intel.com
+
+Also some notes from John G Dorsey <jd5q@andrew.cmu.edu>:
+http://www.cs.cmu.edu/~wearable/software/assabet.html
+
+
+Building the kernel
+-------------------
+
+To build the kernel with current defaults::
+
+	make assabet_config
+	make oldconfig
+	make zImage
+
+The resulting kernel image should be available in linux/arch/arm/boot/zImage.
+
+
+Installing a bootloader
+-----------------------
+
+A couple of bootloaders able to boot Linux on Assabet are available:
+
+BLOB (http://www.lartmaker.nl/lartware/blob/)
+
+   BLOB is a bootloader used within the LART project.  Some contributed
+   patches were merged into BLOB to add support for Assabet.
+
+Compaq's Bootldr + John Dorsey's patch for Assabet support
+(http://www.handhelds.org/Compaq/bootldr.html)
+(http://www.wearablegroup.org/software/bootldr/)
+
+   Bootldr is the bootloader developed by Compaq for the iPAQ Pocket PC.
+   John Dorsey has produced add-on patches to add support for Assabet and
+   the JFFS filesystem.
+
+RedBoot (http://sources.redhat.com/redboot/)
+
+   RedBoot is a bootloader developed by Red Hat based on the eCos RTOS
+   hardware abstraction layer.  It supports Assabet amongst many other
+   hardware platforms.
+
+RedBoot is currently the recommended choice since it's the only one to have
+networking support, and is the most actively maintained.
+
+Brief examples on how to boot Linux with RedBoot are shown below.  But first
+you need to have RedBoot installed in your flash memory.  A known to work
+precompiled RedBoot binary is available from the following location:
+
+- ftp://ftp.netwinder.org/users/n/nico/
+- ftp://ftp.arm.linux.org.uk/pub/linux/arm/people/nico/
+- ftp://ftp.handhelds.org/pub/linux/arm/sa-1100-patches/
+
+Look for redboot-assabet*.tgz.  Some installation infos are provided in
+redboot-assabet*.txt.
+
+
+Initial RedBoot configuration
+-----------------------------
+
+The commands used here are explained in The RedBoot User's Guide available
+on-line at http://sources.redhat.com/ecos/docs.html.
+Please refer to it for explanations.
+
+If you have a CF network card (my Assabet kit contained a CF+ LP-E from
+Socket Communications Inc.), you should strongly consider using it for TFTP
+file transfers.  You must insert it before RedBoot runs since it can't detect
+it dynamically.
+
+To initialize the flash directory::
+
+	fis init -f
+
+To initialize the non-volatile settings, like whether you want to use BOOTP or
+a static IP address, etc, use this command::
+
+	fconfig -i
+
+
+Writing a kernel image into flash
+---------------------------------
+
+First, the kernel image must be loaded into RAM.  If you have the zImage file
+available on a TFTP server::
+
+	load zImage -r -b 0x100000
+
+If you rather want to use Y-Modem upload over the serial port::
+
+	load -m ymodem -r -b 0x100000
+
+To write it to flash::
+
+	fis create "Linux kernel" -b 0x100000 -l 0xc0000
+
+
+Booting the kernel
+------------------
+
+The kernel still requires a filesystem to boot.  A ramdisk image can be loaded
+as follows::
+
+	load ramdisk_image.gz -r -b 0x800000
+
+Again, Y-Modem upload can be used instead of TFTP by replacing the file name
+by '-y ymodem'.
+
+Now the kernel can be retrieved from flash like this::
+
+	fis load "Linux kernel"
+
+or loaded as described previously.  To boot the kernel::
+
+	exec -b 0x100000 -l 0xc0000
+
+The ramdisk image could be stored into flash as well, but there are better
+solutions for on-flash filesystems as mentioned below.
+
+
+Using JFFS2
+-----------
+
+Using JFFS2 (the Second Journalling Flash File System) is probably the most
+convenient way to store a writable filesystem into flash.  JFFS2 is used in
+conjunction with the MTD layer which is responsible for low-level flash
+management.  More information on the Linux MTD can be found on-line at:
+http://www.linux-mtd.infradead.org/.  A JFFS howto with some infos about
+creating JFFS/JFFS2 images is available from the same site.
+
+For instance, a sample JFFS2 image can be retrieved from the same FTP sites
+mentioned below for the precompiled RedBoot image.
+
+To load this file::
+
+	load sample_img.jffs2 -r -b 0x100000
+
+The result should look like::
+
+	RedBoot> load sample_img.jffs2 -r -b 0x100000
+	Raw file loaded 0x00100000-0x00377424
+
+Now we must know the size of the unallocated flash::
+
+	fis free
+
+Result::
+
+	RedBoot> fis free
+	  0x500E0000 .. 0x503C0000
+
+The values above may be different depending on the size of the filesystem and
+the type of flash.  See their usage below as an example and take care of
+substituting yours appropriately.
+
+We must determine some values::
+
+	size of unallocated flash:	0x503c0000 - 0x500e0000 = 0x2e0000
+	size of the filesystem image:	0x00377424 - 0x00100000 = 0x277424
+
+We want to fit the filesystem image of course, but we also want to give it all
+the remaining flash space as well.  To write it::
+
+	fis unlock -f 0x500E0000 -l 0x2e0000
+	fis erase -f 0x500E0000 -l 0x2e0000
+	fis write -b 0x100000 -l 0x277424 -f 0x500E0000
+	fis create "JFFS2" -n -f 0x500E0000 -l 0x2e0000
+
+Now the filesystem is associated to a MTD "partition" once Linux has discovered
+what they are in the boot process.  From Redboot, the 'fis list' command
+displays them::
+
+	RedBoot> fis list
+	Name              FLASH addr  Mem addr    Length      Entry point
+	RedBoot           0x50000000  0x50000000  0x00020000  0x00000000
+	RedBoot config    0x503C0000  0x503C0000  0x00020000  0x00000000
+	FIS directory     0x503E0000  0x503E0000  0x00020000  0x00000000
+	Linux kernel      0x50020000  0x00100000  0x000C0000  0x00000000
+	JFFS2             0x500E0000  0x500E0000  0x002E0000  0x00000000
+
+However Linux should display something like::
+
+	SA1100 flash: probing 32-bit flash bus
+	SA1100 flash: Found 2 x16 devices at 0x0 in 32-bit mode
+	Using RedBoot partition definition
+	Creating 5 MTD partitions on "SA1100 flash":
+	0x00000000-0x00020000 : "RedBoot"
+	0x00020000-0x000e0000 : "Linux kernel"
+	0x000e0000-0x003c0000 : "JFFS2"
+	0x003c0000-0x003e0000 : "RedBoot config"
+	0x003e0000-0x00400000 : "FIS directory"
+
+What's important here is the position of the partition we are interested in,
+which is the third one.  Within Linux, this correspond to /dev/mtdblock2.
+Therefore to boot Linux with the kernel and its root filesystem in flash, we
+need this RedBoot command::
+
+	fis load "Linux kernel"
+	exec -b 0x100000 -l 0xc0000 -c "root=/dev/mtdblock2"
+
+Of course other filesystems than JFFS might be used, like cramfs for example.
+You might want to boot with a root filesystem over NFS, etc.  It is also
+possible, and sometimes more convenient, to flash a filesystem directly from
+within Linux while booted from a ramdisk or NFS.  The Linux MTD repository has
+many tools to deal with flash memory as well, to erase it for example.  JFFS2
+can then be mounted directly on a freshly erased partition and files can be
+copied over directly.  Etc...
+
+
+RedBoot scripting
+-----------------
+
+All the commands above aren't so useful if they have to be typed in every
+time the Assabet is rebooted.  Therefore it's possible to automate the boot
+process using RedBoot's scripting capability.
+
+For example, I use this to boot Linux with both the kernel and the ramdisk
+images retrieved from a TFTP server on the network::
+
+	RedBoot> fconfig
+	Run script at boot: false true
+	Boot script:
+	Enter script, terminate with empty line
+	>> load zImage -r -b 0x100000
+	>> load ramdisk_ks.gz -r -b 0x800000
+	>> exec -b 0x100000 -l 0xc0000
+	>>
+	Boot script timeout (1000ms resolution): 3
+	Use BOOTP for network configuration: true
+	GDB connection port: 9000
+	Network debug at boot time: false
+	Update RedBoot non-volatile configuration - are you sure (y/n)? y
+
+Then, rebooting the Assabet is just a matter of waiting for the login prompt.
+
+
+
+Nicolas Pitre
+nico@fluxnic.net
+
+June 12, 2001
+
+
+Status of peripherals in -rmk tree (updated 14/10/2001)
+-------------------------------------------------------
+
+Assabet:
+ Serial ports:
+  Radio:		TX, RX, CTS, DSR, DCD, RI
+   - PM:		Not tested.
+   - COM:		TX, RX, CTS, DSR, DCD, RTS, DTR, PM
+   - PM:		Not tested.
+   - I2C:		Implemented, not fully tested.
+   - L3:		Fully tested, pass.
+   - PM:		Not tested.
+
+ Video:
+  - LCD:		Fully tested.  PM
+
+   (LCD doesn't like being blanked with neponset connected)
+
+  - Video out:		Not fully
+
+ Audio:
+  UDA1341:
+  -  Playback:		Fully tested, pass.
+  -  Record:		Implemented, not tested.
+  -  PM:			Not tested.
+
+  UCB1200:
+  -  Audio play:	Implemented, not heavily tested.
+  -  Audio rec:		Implemented, not heavily tested.
+  -  Telco audio play:	Implemented, not heavily tested.
+  -  Telco audio rec:	Implemented, not heavily tested.
+  -  POTS control:	No
+  -  Touchscreen:	Yes
+  -  PM:		Not tested.
+
+ Other:
+  - PCMCIA:
+  - LPE:		Fully tested, pass.
+  - USB:		No
+  - IRDA:
+  - SIR:		Fully tested, pass.
+  - FIR:		Fully tested, pass.
+  - PM:			Not tested.
+
+Neponset:
+ Serial ports:
+  - COM1,2:		TX, RX, CTS, DSR, DCD, RTS, DTR
+  - PM:			Not tested.
+  - USB:		Implemented, not heavily tested.
+  - PCMCIA:		Implemented, not heavily tested.
+  - CF:			Implemented, not heavily tested.
+  - PM:			Not tested.
+
+More stuff can be found in the -np (Nicolas Pitre's) tree.
diff --git a/Documentation/arm/sa1100/brutus.rst b/Documentation/arm/sa1100/brutus.rst
new file mode 100644
index 000000000000..e1a23bee6d44
--- /dev/null
+++ b/Documentation/arm/sa1100/brutus.rst
@@ -0,0 +1,69 @@
+======
+Brutus
+======
+
+Brutus is an evaluation platform for the SA1100 manufactured by Intel.
+For more details, see:
+
+http://developer.intel.com
+
+To compile for Brutus, you must issue the following commands::
+
+	make brutus_config
+	make config
+	[accept all the defaults]
+	make zImage
+
+The resulting kernel will end up in linux/arch/arm/boot/zImage.  This file
+must be loaded at 0xc0008000 in Brutus's memory and execution started at
+0xc0008000 as well with the value of registers r0 = 0 and r1 = 16 upon
+entry.
+
+But prior to execute the kernel, a ramdisk image must also be loaded in
+memory.  Use memory address 0xd8000000 for this.  Note that the file
+containing the (compressed) ramdisk image must not exceed 4 MB.
+
+Typically, you'll need angelboot to load the kernel.
+The following angelboot.opt file should be used::
+
+	base 0xc0008000
+	entry 0xc0008000
+	r0 0x00000000
+	r1 0x00000010
+	device /dev/ttyS0
+	options "9600 8N1"
+	baud 115200
+	otherfile ramdisk_img.gz
+	otherbase 0xd8000000
+
+Then load the kernel and ramdisk with::
+
+	angelboot -f angelboot.opt zImage
+
+The first Brutus serial port (assumed to be linked to /dev/ttyS0 on your
+host PC) is used by angel to load the kernel and ramdisk image. The serial
+console is provided through the second Brutus serial port. To access it,
+you may use minicom configured with /dev/ttyS1, 9600 baud, 8N1, no flow
+control.
+
+Currently supported
+===================
+
+	- RS232 serial ports
+	- audio output
+	- LCD screen
+	- keyboard
+
+The actual Brutus support may not be complete without extra patches.
+If such patches exist, they should be found from
+ftp.netwinder.org/users/n/nico.
+
+A full PCMCIA support is still missing, although it's possible to hack
+some drivers in order to drive already inserted cards at boot time with
+little modifications.
+
+Any contribution is welcome.
+
+Please send patches to nico@fluxnic.net
+
+Have Fun !
diff --git a/Documentation/arm/sa1100/cerf.rst b/Documentation/arm/sa1100/cerf.rst
new file mode 100644
index 000000000000..7fa71b609bf9
--- /dev/null
+++ b/Documentation/arm/sa1100/cerf.rst
@@ -0,0 +1,35 @@
+==============
+CerfBoard/Cube
+==============
+
+*** The StrongARM version of the CerfBoard/Cube has been discontinued ***
+
+The Intrinsyc CerfBoard is a StrongARM 1110-based computer on a board
+that measures approximately 2" square. It includes an Ethernet
+controller, an RS232-compatible serial port, a USB function port, and
+one CompactFlash+ slot on the back. Pictures can be found at the
+Intrinsyc website, http://www.intrinsyc.com.
+
+This document describes the support in the Linux kernel for the
+Intrinsyc CerfBoard.
+
+Supported in this version
+=========================
+
+   - CompactFlash+ slot (select PCMCIA in General Setup and any options
+     that may be required)
+   - Onboard Crystal CS8900 Ethernet controller (Cerf CS8900A support in
+     Network Devices)
+   - Serial ports with a serial console (hardcoded to 38400 8N1)
+
+In order to get this kernel onto your Cerf, you need a server that runs
+both BOOTP and TFTP. Detailed instructions should have come with your
+evaluation kit on how to use the bootloader. This series of commands
+will suffice::
+
+   make ARCH=arm CROSS_COMPILE=arm-linux- cerfcube_defconfig
+   make ARCH=arm CROSS_COMPILE=arm-linux- zImage
+   make ARCH=arm CROSS_COMPILE=arm-linux- modules
+   cp arch/arm/boot/zImage <TFTP directory>
+
+support@intrinsyc.com
diff --git a/Documentation/arm/sa1100/freebird.rst b/Documentation/arm/sa1100/freebird.rst
new file mode 100644
index 000000000000..81043d0c6d64
--- /dev/null
+++ b/Documentation/arm/sa1100/freebird.rst
@@ -0,0 +1,25 @@
+========
+Freebird
+========
+
+Freebird-1.1 is produced by Legend(C), Inc.
+`http://web.archive.org/web/*/http://www.legend.com.cn`
+and software/linux maintained by Coventive(C), Inc.
+(http://www.coventive.com)
+
+Based on the Nicolas's strongarm kernel tree.
+
+Maintainer:
+
+Chester Kuo
+	- <chester@coventive.com>
+	- <chester@linux.org.tw>
+
+Author:
+
+- Tim wu <timwu@coventive.com>
+- CIH <cih@coventive.com>
+- Eric Peng <ericpeng@coventive.com>
+- Jeff Lee <jeff_lee@coventive.com>
+- Allen Cheng
+- Tony Liu <tonyliu@coventive.com>
diff --git a/Documentation/arm/sa1100/graphicsclient.rst b/Documentation/arm/sa1100/graphicsclient.rst
new file mode 100644
index 000000000000..a73d61c3ce91
--- /dev/null
+++ b/Documentation/arm/sa1100/graphicsclient.rst
@@ -0,0 +1,102 @@
+=============================================
+ADS GraphicsClient Plus Single Board Computer
+=============================================
+
+For more details, contact Applied Data Systems or see
+http://www.applieddata.net/products.html
+
+The original Linux support for this product has been provided by
+Nicolas Pitre <nico@fluxnic.net>. Continued development work by
+Woojung Huh <whuh@applieddata.net>
+
+It's currently possible to mount a root filesystem via NFS providing a
+complete Linux environment.  Otherwise a ramdisk image may be used.  The
+board supports MTD/JFFS, so you could also mount something on there.
+
+Use 'make graphicsclient_config' before any 'make config'.  This will set up
+defaults for GraphicsClient Plus support.
+
+The kernel zImage is linked to be loaded and executed at 0xc0200000.
+Also the following registers should have the specified values upon entry::
+
+	r0 = 0
+	r1 = 29	(this is the GraphicsClient architecture number)
+
+Linux can  be used with the ADS BootLoader that ships with the
+newer rev boards. See their documentation on how to load Linux.
+Angel is not available for the GraphicsClient Plus AFAIK.
+
+There is a  board known as just the GraphicsClient that ADS used to
+produce but has end of lifed. This code will not work on the older
+board with the ADS bootloader, but should still work with Angel,
+as outlined below.  In any case, if you're planning on deploying
+something en masse, you should probably get the newer board.
+
+If using Angel on the older boards, here is a typical angel.opt option file
+if the kernel is loaded through the Angel Debug Monitor::
+
+	base 0xc0200000
+	entry 0xc0200000
+	r0 0x00000000
+	r1 0x0000001d
+	device /dev/ttyS1
+	options "38400 8N1"
+	baud 115200
+	#otherfile ramdisk.gz
+	#otherbase 0xc0800000
+	exec minicom
+
+Then the kernel (and ramdisk if otherfile/otherbase lines above are
+uncommented) would be loaded with::
+
+	angelboot -f angelboot.opt zImage
+
+Here it is assumed that the board is connected to ttyS1 on your PC
+and that minicom is preconfigured with /dev/ttyS1, 38400 baud, 8N1, no flow
+control by default.
+
+If any other bootloader is used, ensure it accomplish the same, especially
+for r0/r1 register values before jumping into the kernel.
+
+
+Supported peripherals
+=====================
+
+- SA1100 LCD frame buffer (8/16bpp...sort of)
+- on-board SMC 92C96 ethernet NIC
+- SA1100 serial port
+- flash memory access (MTD/JFFS)
+- pcmcia
+- touchscreen(ucb1200)
+- ps/2 keyboard
+- console on LCD screen
+- serial ports (ttyS[0-2])
+  - ttyS0 is default for serial console
+- Smart I/O (ADC, keypad, digital inputs, etc)
+  See http://www.eurotech-inc.com/linux-sbc.asp for IOCTL documentation
+  and example user space code. ps/2 keybd is multiplexed through this driver
+
+To do
+=====
+
+- UCB1200 audio with new ucb_generic layer
+- everything else!  :-)
+
+Notes
+=====
+
+- The flash on board is divided into 3 partitions.  mtd0 is where
+  the ADS boot ROM and zImage is stored.  It's been marked as
+  read-only to keep you from blasting over the bootloader. :)  mtd1 is
+  for the ramdisk.gz image.  mtd2 is user flash space and can be
+  utilized for either JFFS or if you're feeling crazy, running ext2
+  on top of it. If you're not using the ADS bootloader, you're
+  welcome to blast over the mtd1 partition also.
+
+- 16bpp mode requires a different cable than what ships with the board.
+  Contact ADS or look through the manual to wire your own. Currently,
+  if you compile with 16bit mode support and switch into a lower bpp
+  mode, the timing is off so the image is corrupted.  This will be
+  fixed soon.
+
+Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
diff --git a/Documentation/arm/sa1100/graphicsmaster.rst b/Documentation/arm/sa1100/graphicsmaster.rst
new file mode 100644
index 000000000000..e39892514f0c
--- /dev/null
+++ b/Documentation/arm/sa1100/graphicsmaster.rst
@@ -0,0 +1,60 @@
+========================================
+ADS GraphicsMaster Single Board Computer
+========================================
+
+For more details, contact Applied Data Systems or see
+http://www.applieddata.net/products.html
+
+The original Linux support for this product has been provided by
+Nicolas Pitre <nico@fluxnic.net>. Continued development work by
+Woojung Huh <whuh@applieddata.net>
+
+Use 'make graphicsmaster_config' before any 'make config'.
+This will set up defaults for GraphicsMaster support.
+
+The kernel zImage is linked to be loaded and executed at 0xc0400000.
+
+Linux can  be used with the ADS BootLoader that ships with the
+newer rev boards. See their documentation on how to load Linux.
+
+Supported peripherals
+=====================
+
+- SA1100 LCD frame buffer (8/16bpp...sort of)
+- SA1111 USB Master
+- on-board SMC 92C96 ethernet NIC
+- SA1100 serial port
+- flash memory access (MTD/JFFS)
+- pcmcia, compact flash
+- touchscreen(ucb1200)
+- ps/2 keyboard
+- console on LCD screen
+- serial ports (ttyS[0-2])
+  - ttyS0 is default for serial console
+- Smart I/O (ADC, keypad, digital inputs, etc)
+  See http://www.eurotech-inc.com/linux-sbc.asp for IOCTL documentation
+  and example user space code. ps/2 keybd is multiplexed through this driver
+
+To do
+=====
+
+- everything else!  :-)
+
+Notes
+=====
+
+- The flash on board is divided into 3 partitions.  mtd0 is where
+  the zImage is stored.  It's been marked as read-only to keep you
+  from blasting over the bootloader. :)  mtd1 is
+  for the ramdisk.gz image.  mtd2 is user flash space and can be
+  utilized for either JFFS or if you're feeling crazy, running ext2
+  on top of it. If you're not using the ADS bootloader, you're
+  welcome to blast over the mtd1 partition also.
+
+- 16bpp mode requires a different cable than what ships with the board.
+  Contact ADS or look through the manual to wire your own. Currently,
+  if you compile with 16bit mode support and switch into a lower bpp
+  mode, the timing is off so the image is corrupted.  This will be
+  fixed soon.
+
+Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
diff --git a/Documentation/arm/sa1100/huw_webpanel.rst b/Documentation/arm/sa1100/huw_webpanel.rst
new file mode 100644
index 000000000000..1dc7ccb165f0
--- /dev/null
+++ b/Documentation/arm/sa1100/huw_webpanel.rst
@@ -0,0 +1,21 @@
+=======================
+Hoeft & Wessel Webpanel
+=======================
+
+The HUW_WEBPANEL is a product of the german company Hoeft & Wessel AG
+
+If you want more information, please visit
+http://www.hoeft-wessel.de
+
+To build the kernel::
+
+	make huw_webpanel_config
+	make oldconfig
+	[accept all defaults]
+	make zImage
+
+Mostly of the work is done by:
+Roman Jordan         jor@hoeft-wessel.de
+Christoph Schulz    schu@hoeft-wessel.de
+
+2000/12/18/
diff --git a/Documentation/arm/sa1100/index.rst b/Documentation/arm/sa1100/index.rst
new file mode 100644
index 000000000000..fb2385b3accf
--- /dev/null
+++ b/Documentation/arm/sa1100/index.rst
@@ -0,0 +1,23 @@
+====================
+Intel StrongARM 1100
+====================
+
+.. toctree::
+    :maxdepth: 1
+
+    adsbitsy
+    assabet
+    brutus
+    cerf
+    freebird
+    graphicsclient
+    graphicsmaster
+    huw_webpanel
+    itsy
+    lart
+    nanoengine
+    pangolin
+    pleb
+    serial_uart
+    tifon
+    yopy
diff --git a/Documentation/arm/sa1100/itsy.rst b/Documentation/arm/sa1100/itsy.rst
new file mode 100644
index 000000000000..f49896ba3ef1
--- /dev/null
+++ b/Documentation/arm/sa1100/itsy.rst
@@ -0,0 +1,47 @@
+====
+Itsy
+====
+
+Itsy is a research project done by the Western Research Lab, and Systems
+Research Center in Palo Alto, CA. The Itsy project is one of several
+research projects at Compaq that are related to pocket computing.
+
+For more information, see:
+
+	http://www.hpl.hp.com/downloads/crl/itsy/
+
+Notes on initial 2.4 Itsy support (8/27/2000) :
+
+The port was done on an Itsy version 1.5 machine with a daughtercard with
+64 Meg of DRAM and 32 Meg of Flash. The initial work includes support for
+serial console (to see what you're doing).  No other devices have been
+enabled.
+
+To build, do a "make menuconfig" (or xmenuconfig) and select Itsy support.
+Disable Flash and LCD support. and then do a make zImage.
+Finally, you will need to cd to arch/arm/boot/tools and execute a make there
+to build the params-itsy program used to boot the kernel.
+
+In order to install the port of 2.4 to the itsy, You will need to set the
+configuration parameters in the monitor as follows::
+
+	Arg 1:0x08340000, Arg2: 0xC0000000, Arg3:18 (0x12), Arg4:0
+
+Make sure the start-routine address is set to 0x00060000.
+
+Next, flash the params-itsy program to 0x00060000 ("p 1 0x00060000" in the
+flash menu)  Flash the kernel in arch/arm/boot/zImage into 0x08340000
+("p 1 0x00340000").  Finally flash an initial ramdisk into 0xC8000000
+("p 2 0x0")  We used ramdisk-2-30.gz from the 0.11 version directory on
+handhelds.org.
+
+The serial connection we established was at:
+
+8-bit data, no parity, 1 stop bit(s), 115200.00 b/s. in the monitor, in the
+params-itsy program, and in the kernel itself.  This can be changed, but
+not easily. The monitor parameters are easily changed, the params program
+setup is assembly outl's, and the kernel is a configuration item specific to
+the itsy. (i.e. grep for CONFIG_SA1100_ITSY and you'll find where it is.)
+
+
+This should get you a properly booting 2.4 kernel on the itsy.
diff --git a/Documentation/arm/sa1100/lart.rst b/Documentation/arm/sa1100/lart.rst
new file mode 100644
index 000000000000..94c0568d1095
--- /dev/null
+++ b/Documentation/arm/sa1100/lart.rst
@@ -0,0 +1,15 @@
+====================================
+Linux Advanced Radio Terminal (LART)
+====================================
+
+The LART is a small (7.5 x 10cm) SA-1100 board, designed for embedded
+applications. It has 32 MB DRAM, 4MB Flash ROM, double RS232 and all
+other StrongARM-gadgets. Almost all SA signals are directly accessible
+through a number of connectors. The powersupply accepts voltages
+between 3.5V and 16V and is overdimensioned to support a range of
+daughterboards. A quad Ethernet / IDE / PS2 / sound daughterboard
+is under development, with plenty of others in different stages of
+planning.
+
+The hardware designs for this board have been released under an open license;
+see the LART page at http://www.lartmaker.nl/ for more information.
diff --git a/Documentation/arm/sa1100/nanoengine.rst b/Documentation/arm/sa1100/nanoengine.rst
new file mode 100644
index 000000000000..47f1a14cf98a
--- /dev/null
+++ b/Documentation/arm/sa1100/nanoengine.rst
@@ -0,0 +1,11 @@
+==========
+nanoEngine
+==========
+
+"nanoEngine" is a SA1110 based single board computer from
+Bright Star Engineering Inc.  See www.brightstareng.com/arm
+for more info.
+(Ref: Stuart Adams <sja@brightstareng.com>)
+
+Also visit Larry Doolittle's "Linux for the nanoEngine" site:
+http://www.brightstareng.com/arm/nanoeng.htm
diff --git a/Documentation/arm/sa1100/pangolin.rst b/Documentation/arm/sa1100/pangolin.rst
new file mode 100644
index 000000000000..f0c5c1618553
--- /dev/null
+++ b/Documentation/arm/sa1100/pangolin.rst
@@ -0,0 +1,29 @@
+========
+Pangolin
+========
+
+Pangolin is a StrongARM 1110-based evaluation platform produced
+by Dialogue Technology (http://www.dialogue.com.tw/).
+It has EISA slots for ease of configuration with SDRAM/Flash
+memory card, USB/Serial/Audio card, Compact Flash card,
+PCMCIA/IDE card and TFT-LCD card.
+
+To compile for Pangolin, you must issue the following commands::
+
+	make pangolin_config
+	make oldconfig
+	make zImage
+
+Supported peripherals
+=====================
+
+- SA1110 serial port (UART1/UART2/UART3)
+- flash memory access
+- compact flash driver
+- UDA1341 sound driver
+- SA1100 LCD controller for 800x600 16bpp TFT-LCD
+- MQ-200 driver for 800x600 16bpp TFT-LCD
+- Penmount(touch panel) driver
+- PCMCIA driver
+- SMC91C94 LAN driver
+- IDE driver (experimental)
diff --git a/Documentation/arm/sa1100/pleb.rst b/Documentation/arm/sa1100/pleb.rst
new file mode 100644
index 000000000000..d5b732967aa3
--- /dev/null
+++ b/Documentation/arm/sa1100/pleb.rst
@@ -0,0 +1,13 @@
+====
+PLEB
+====
+
+The PLEB project was started as a student initiative at the School of
+Computer Science and Engineering, University of New South Wales to make a
+pocket computer capable of running the Linux Kernel.
+
+PLEB support has yet to be fully integrated.
+
+For more information, see:
+
+	http://www.cse.unsw.edu.au
diff --git a/Documentation/arm/sa1100/serial_uart.rst b/Documentation/arm/sa1100/serial_uart.rst
new file mode 100644
index 000000000000..ea983642b9be
--- /dev/null
+++ b/Documentation/arm/sa1100/serial_uart.rst
@@ -0,0 +1,51 @@
+==================
+SA1100 serial port
+==================
+
+The SA1100 serial port had its major/minor numbers officially assigned::
+
+  > Date: Sun, 24 Sep 2000 21:40:27 -0700
+  > From: H. Peter Anvin <hpa@transmeta.com>
+  > To: Nicolas Pitre <nico@CAM.ORG>
+  > Cc: Device List Maintainer <device@lanana.org>
+  > Subject: Re: device
+  >
+  > Okay.  Note that device numbers 204 and 205 are used for "low density
+  > serial devices", so you will have a range of minors on those majors (the
+  > tty device layer handles this just fine, so you don't have to worry about
+  > doing anything special.)
+  >
+  > So your assignments are:
+  >
+  > 204 char        Low-density serial ports
+  >                   5 = /dev/ttySA0               SA1100 builtin serial port 0
+  >                   6 = /dev/ttySA1               SA1100 builtin serial port 1
+  >                   7 = /dev/ttySA2               SA1100 builtin serial port 2
+  >
+  > 205 char        Low-density serial ports (alternate device)
+  >                   5 = /dev/cusa0                Callout device for ttySA0
+  >                   6 = /dev/cusa1                Callout device for ttySA1
+  >                   7 = /dev/cusa2                Callout device for ttySA2
+  >
+
+You must create those inodes in /dev on the root filesystem used
+by your SA1100-based device::
+
+	mknod ttySA0 c 204 5
+	mknod ttySA1 c 204 6
+	mknod ttySA2 c 204 7
+	mknod cusa0 c 205 5
+	mknod cusa1 c 205 6
+	mknod cusa2 c 205 7
+
+In addition to the creation of the appropriate device nodes above, you
+must ensure your user space applications make use of the correct device
+name. The classic example is the content of the /etc/inittab file where
+you might have a getty process started on ttyS0.
+
+In this case:
+
+- replace occurrences of ttyS0 with ttySA0, ttyS1 with ttySA1, etc.
+
+- don't forget to add 'ttySA0', 'console', or the appropriate tty name
+  in /etc/securetty for root to be allowed to login as well.
diff --git a/Documentation/arm/sa1100/tifon.rst b/Documentation/arm/sa1100/tifon.rst
new file mode 100644
index 000000000000..c26e910b9ea7
--- /dev/null
+++ b/Documentation/arm/sa1100/tifon.rst
@@ -0,0 +1,7 @@
+=====
+Tifon
+=====
+
+More info has to come...
+
+Contact: Peter Danielsson <peter.danielsson@era-t.ericsson.se>
diff --git a/Documentation/arm/sa1100/yopy.rst b/Documentation/arm/sa1100/yopy.rst
new file mode 100644
index 000000000000..5b35a5f61a44
--- /dev/null
+++ b/Documentation/arm/sa1100/yopy.rst
@@ -0,0 +1,5 @@
+====
+Yopy
+====
+
+See http://www.yopydeveloper.org for more.
diff --git a/Documentation/arm/samsung-s3c24xx/cpufreq.rst b/Documentation/arm/samsung-s3c24xx/cpufreq.rst
new file mode 100644
index 000000000000..2ddc26c03b1f
--- /dev/null
+++ b/Documentation/arm/samsung-s3c24xx/cpufreq.rst
@@ -0,0 +1,76 @@
+=======================
+S3C24XX CPUfreq support
+=======================
+
+Introduction
+------------
+
+ The S3C24XX series support a number of power saving systems, such as
+ the ability to change the core, memory and peripheral operating
+ frequencies. The core control is exported via the CPUFreq driver
+ which has a number of different manual or automatic controls over the
+ rate the core is running at.
+
+ There are two forms of the driver depending on the specific CPU and
+ how the clocks are arranged. The first implementation used as single
+ PLL to feed the ARM, memory and peripherals via a series of dividers
+ and muxes and this is the implementation that is documented here. A
+ newer version where there is a separate PLL and clock divider for the
+ ARM core is available as a separate driver.
+
+
+Layout
+------
+
+ The code core manages the CPU specific drivers, any data that they
+ need to register and the interface to the generic drivers/cpufreq
+ system. Each CPU registers a driver to control the PLL, clock dividers
+ and anything else associated with it. Any board that wants to use this
+ framework needs to supply at least basic details of what is required.
+
+ The core registers with drivers/cpufreq at init time if all the data
+ necessary has been supplied.
+
+
+CPU support
+-----------
+
+ The support for each CPU depends on the facilities provided by the
+ SoC and the driver as each device has different PLL and clock chains
+ associated with it.
+
+
+Slow Mode
+---------
+
+ The SLOW mode where the PLL is turned off altogether and the
+ system is fed by the external crystal input is currently not
+ supported.
+
+
+sysfs
+-----
+
+ The core code exports extra information via sysfs in the directory
+ devices/system/cpu/cpu0/arch-freq.
+
+
+Board Support
+-------------
+
+ Each board that wants to use the cpufreq code must register some basic
+ information with the core driver to provide information about what the
+ board requires and any restrictions being placed on it.
+
+ The board needs to supply information about whether it needs the IO bank
+ timings changing, any maximum frequency limits and information about the
+ SDRAM refresh rate.
+
+
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2009 Simtec Electronics
+Licensed under GPLv2
diff --git a/Documentation/arm/samsung-s3c24xx/eb2410itx.rst b/Documentation/arm/samsung-s3c24xx/eb2410itx.rst
new file mode 100644
index 000000000000..7863c93652f8
--- /dev/null
+++ b/Documentation/arm/samsung-s3c24xx/eb2410itx.rst
@@ -0,0 +1,59 @@
+===================================
+Simtec Electronics EB2410ITX (BAST)
+===================================
+
+	http://www.simtec.co.uk/products/EB2410ITX/
+
+Introduction
+------------
+
+  The EB2410ITX is a S3C2410 based development board with a variety of
+  peripherals and expansion connectors. This board is also known by
+  the shortened name of Bast.
+
+
+Configuration
+-------------
+
+  To set the default configuration, use `make bast_defconfig` which
+  supports the commonly used features of this board.
+
+
+Support
+-------
+
+  Official support information can be found on the Simtec Electronics
+  website, at the product page http://www.simtec.co.uk/products/EB2410ITX/
+
+  Useful links:
+
+    - Resources Page http://www.simtec.co.uk/products/EB2410ITX/resources.html
+
+    - Board FAQ at http://www.simtec.co.uk/products/EB2410ITX/faq.html
+
+    - Bootloader info http://www.simtec.co.uk/products/SWABLE/resources.html
+      and FAQ http://www.simtec.co.uk/products/SWABLE/faq.html
+
+
+MTD
+---
+
+  The NAND and NOR support has been merged from the linux-mtd project.
+  Any problems, see http://www.linux-mtd.infradead.org/ for more
+  information or up-to-date versions of linux-mtd.
+
+
+IDE
+---
+
+  Both onboard IDE ports are supported, however there is no support for
+  changing speed of devices, PIO Mode 4 capable drives should be used.
+
+
+Maintainers
+-----------
+
+  This board is maintained by Simtec Electronics.
+
+
+Copyright 2004 Ben Dooks, Simtec Electronics
diff --git a/Documentation/arm/samsung-s3c24xx/gpio.rst b/Documentation/arm/samsung-s3c24xx/gpio.rst
new file mode 100644
index 000000000000..f7c3d7d011a2
--- /dev/null
+++ b/Documentation/arm/samsung-s3c24xx/gpio.rst
@@ -0,0 +1,172 @@
+====================
+S3C24XX GPIO Control
+====================
+
+Introduction
+------------
+
+  The s3c2410 kernel provides an interface to configure and
+  manipulate the state of the GPIO pins, and find out other
+  information about them.
+
+  There are a number of conditions attached to the configuration
+  of the s3c2410 GPIO system, please read the Samsung provided
+  data-sheet/users manual to find out the complete list.
+
+  See Documentation/arm/samsung/gpio.rst for the core implementation.
+
+
+GPIOLIB
+-------
+
+  With the event of the GPIOLIB in drivers/gpio, support for some
+  of the GPIO functions such as reading and writing a pin will
+  be removed in favour of this common access method.
+
+  Once all the extant drivers have been converted, the functions
+  listed below will be removed (they may be marked as __deprecated
+  in the near future).
+
+  The following functions now either have a `s3c_` specific variant
+  or are merged into gpiolib. See the definitions in
+  arch/arm/plat-samsung/include/plat/gpio-cfg.h:
+
+  - s3c2410_gpio_setpin()	gpio_set_value() or gpio_direction_output()
+  - s3c2410_gpio_getpin()	gpio_get_value() or gpio_direction_input()
+  - s3c2410_gpio_getirq()	gpio_to_irq()
+  - s3c2410_gpio_cfgpin()	s3c_gpio_cfgpin()
+  - s3c2410_gpio_getcfg()	s3c_gpio_getcfg()
+  - s3c2410_gpio_pullup()	s3c_gpio_setpull()
+
+
+GPIOLIB conversion
+------------------
+
+If you need to convert your board or driver to use gpiolib from the phased
+out s3c2410 API, then here are some notes on the process.
+
+1) If your board is exclusively using an GPIO, say to control peripheral
+   power, then it will require to claim the gpio with gpio_request() before
+   it can use it.
+
+   It is recommended to check the return value, with at least WARN_ON()
+   during initialisation.
+
+2) The s3c2410_gpio_cfgpin() can be directly replaced with s3c_gpio_cfgpin()
+   as they have the same arguments, and can either take the pin specific
+   values, or the more generic special-function-number arguments.
+
+3) s3c2410_gpio_pullup() changes have the problem that while the
+   s3c2410_gpio_pullup(x, 1) can be easily translated to the
+   s3c_gpio_setpull(x, S3C_GPIO_PULL_NONE), the s3c2410_gpio_pullup(x, 0)
+   are not so easy.
+
+   The s3c2410_gpio_pullup(x, 0) case enables the pull-up (or in the case
+   of some of the devices, a pull-down) and as such the new API distinguishes
+   between the UP and DOWN case. There is currently no 'just turn on' setting
+   which may be required if this becomes a problem.
+
+4) s3c2410_gpio_setpin() can be replaced by gpio_set_value(), the old call
+   does not implicitly configure the relevant gpio to output. The gpio
+   direction should be changed before using gpio_set_value().
+
+5) s3c2410_gpio_getpin() is replaceable by gpio_get_value() if the pin
+   has been set to input. It is currently unknown what the behaviour is
+   when using gpio_get_value() on an output pin (s3c2410_gpio_getpin
+   would return the value the pin is supposed to be outputting).
+
+6) s3c2410_gpio_getirq() should be directly replaceable with the
+   gpio_to_irq() call.
+
+The s3c2410_gpio and `gpio_` calls have always operated on the same gpio
+numberspace, so there is no problem with converting the gpio numbering
+between the calls.
+
+
+Headers
+-------
+
+  See arch/arm/mach-s3c24xx/include/mach/regs-gpio.h for the list
+  of GPIO pins, and the configuration values for them. This
+  is included by using #include <mach/regs-gpio.h>
+
+
+PIN Numbers
+-----------
+
+  Each pin has an unique number associated with it in regs-gpio.h,
+  e.g. S3C2410_GPA(0) or S3C2410_GPF(1). These defines are used to tell
+  the GPIO functions which pin is to be used.
+
+  With the conversion to gpiolib, there is no longer a direct conversion
+  from gpio pin number to register base address as in earlier kernels. This
+  is due to the number space required for newer SoCs where the later
+  GPIOs are not contiguous.
+
+
+Configuring a pin
+-----------------
+
+  The following function allows the configuration of a given pin to
+  be changed.
+
+    void s3c_gpio_cfgpin(unsigned int pin, unsigned int function);
+
+  e.g.:
+
+     s3c_gpio_cfgpin(S3C2410_GPA(0), S3C_GPIO_SFN(1));
+     s3c_gpio_cfgpin(S3C2410_GPE(8), S3C_GPIO_SFN(2));
+
+   which would turn GPA(0) into the lowest Address line A0, and set
+   GPE(8) to be connected to the SDIO/MMC controller's SDDAT1 line.
+
+
+Reading the current configuration
+---------------------------------
+
+  The current configuration of a pin can be read by using standard
+  gpiolib function:
+
+  s3c_gpio_getcfg(unsigned int pin);
+
+  The return value will be from the same set of values which can be
+  passed to s3c_gpio_cfgpin().
+
+
+Configuring a pull-up resistor
+------------------------------
+
+  A large proportion of the GPIO pins on the S3C2410 can have weak
+  pull-up resistors enabled. This can be configured by the following
+  function:
+
+    void s3c_gpio_setpull(unsigned int pin, unsigned int to);
+
+  Where the to value is S3C_GPIO_PULL_NONE to set the pull-up off,
+  and S3C_GPIO_PULL_UP to enable the specified pull-up. Any other
+  values are currently undefined.
+
+
+Getting and setting the state of a PIN
+--------------------------------------
+
+  These calls are now implemented by the relevant gpiolib calls, convert
+  your board or driver to use gpiolib.
+
+
+Getting the IRQ number associated with a PIN
+--------------------------------------------
+
+  A standard gpiolib function can map the given pin number to an IRQ
+  number to pass to the IRQ system.
+
+   int gpio_to_irq(unsigned int pin);
+
+  Note, not all pins have an IRQ.
+
+
+Author
+-------
+
+Ben Dooks, 03 October 2004
+Copyright 2004 Ben Dooks, Simtec Electronics
diff --git a/Documentation/arm/samsung-s3c24xx/h1940.rst b/Documentation/arm/samsung-s3c24xx/h1940.rst
new file mode 100644
index 000000000000..62a562c178e3
--- /dev/null
+++ b/Documentation/arm/samsung-s3c24xx/h1940.rst
@@ -0,0 +1,41 @@
+=============
+HP IPAQ H1940
+=============
+
+http://www.handhelds.org/projects/h1940.html
+
+Introduction
+------------
+
+  The HP H1940 is a S3C2410 based handheld device, with
+  bluetooth connectivity.
+
+
+Support
+-------
+
+  A variety of information is available
+
+  handhelds.org project page:
+
+    http://www.handhelds.org/projects/h1940.html
+
+  handhelds.org wiki page:
+
+    http://handhelds.org/moin/moin.cgi/HpIpaqH1940
+
+  Herbert Pötzl pages:
+
+    http://vserver.13thfloor.at/H1940/
+
+
+Maintainers
+-----------
+
+  This project is being maintained and developed by a variety
+  of people, including Ben Dooks, Arnaud Patard, and Herbert Pötzl.
+
+  Thanks to the many others who have also provided support.
+
+
+(c) 2005 Ben Dooks
diff --git a/Documentation/arm/samsung-s3c24xx/index.rst b/Documentation/arm/samsung-s3c24xx/index.rst
new file mode 100644
index 000000000000..6c7b241cbf37
--- /dev/null
+++ b/Documentation/arm/samsung-s3c24xx/index.rst
@@ -0,0 +1,18 @@
+﻿==========================
+Samsung S3C24XX SoC Family
+==========================
+
+.. toctree::
+   :maxdepth: 1
+
+   h1940
+   gpio
+   cpufreq
+   suspend
+   usb-host
+   s3c2412
+   eb2410itx
+   nand
+   smdk2440
+   s3c2413
+   overview
diff --git a/Documentation/arm/samsung-s3c24xx/nand.rst b/Documentation/arm/samsung-s3c24xx/nand.rst
new file mode 100644
index 000000000000..938995694ee7
--- /dev/null
+++ b/Documentation/arm/samsung-s3c24xx/nand.rst
@@ -0,0 +1,30 @@
+====================
+S3C24XX NAND Support
+====================
+
+Introduction
+------------
+
+Small Page NAND
+---------------
+
+The driver uses a 512 byte (1 page) ECC code for this setup. The
+ECC code is not directly compatible with the default kernel ECC
+code, so the driver enforces its own OOB layout and ECC parameters
+
+Large Page NAND
+---------------
+
+The driver is capable of handling NAND flash with a 2KiB page
+size, with support for hardware ECC generation and correction.
+
+Unlike the 512byte page mode, the driver generates ECC data for
+each 256 byte block in an 2KiB page. This means that more than
+one error in a page can be rectified. It also means that the
+OOB layout remains the default kernel layout for these flashes.
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2007 Simtec Electronics
diff --git a/Documentation/arm/samsung-s3c24xx/overview.rst b/Documentation/arm/samsung-s3c24xx/overview.rst
new file mode 100644
index 000000000000..e9a1dc7276b5
--- /dev/null
+++ b/Documentation/arm/samsung-s3c24xx/overview.rst
@@ -0,0 +1,319 @@
+==========================
+S3C24XX ARM Linux Overview
+==========================
+
+
+
+Introduction
+------------
+
+  The Samsung S3C24XX range of ARM9 System-on-Chip CPUs are supported
+  by the 's3c2410' architecture of ARM Linux. Currently the S3C2410,
+  S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443 and S3C2450 devices
+  are supported.
+
+  Support for the S3C2400 and S3C24A0 series was never completed and the
+  corresponding code has been removed after a while.  If someone wishes to
+  revive this effort, partial support can be retrieved from earlier Linux
+  versions.
+
+  The S3C2416 and S3C2450 devices are very similar and S3C2450 support is
+  included under the arch/arm/mach-s3c2416 directory. Note, while core
+  support for these SoCs is in, work on some of the extra peripherals
+  and extra interrupts is still ongoing.
+
+
+Configuration
+-------------
+
+  A generic S3C2410 configuration is provided, and can be used as the
+  default by `make s3c2410_defconfig`. This configuration has support
+  for all the machines, and the commonly used features on them.
+
+  Certain machines may have their own default configurations as well,
+  please check the machine specific documentation.
+
+
+Layout
+------
+
+  The core support files are located in the platform code contained in
+  arch/arm/plat-s3c24xx with headers in include/asm-arm/plat-s3c24xx.
+  This directory should be kept to items shared between the platform
+  code (arch/arm/plat-s3c24xx) and the arch/arm/mach-s3c24* code.
+
+  Each cpu has a directory with the support files for it, and the
+  machines that carry the device. For example S3C2410 is contained
+  in arch/arm/mach-s3c2410 and S3C2440 in arch/arm/mach-s3c2440
+
+  Register, kernel and platform data definitions are held in the
+  arch/arm/mach-s3c2410 directory./include/mach
+
+arch/arm/plat-s3c24xx:
+
+  Files in here are either common to all the s3c24xx family,
+  or are common to only some of them with names to indicate this
+  status. The files that are not common to all are generally named
+  with the initial cpu they support in the series to ensure a short
+  name without any possibility of confusion with newer devices.
+
+  As an example, initially s3c244x would cover s3c2440 and s3c2442, but
+  with the s3c2443 which does not share many of the same drivers in
+  this directory, the name becomes invalid. We stick to s3c2440-<x>
+  to indicate a driver that is s3c2440 and s3c2442 compatible.
+
+  This does mean that to find the status of any given SoC, a number
+  of directories may need to be searched.
+
+
+Machines
+--------
+
+  The currently supported machines are as follows:
+
+  Simtec Electronics EB2410ITX (BAST)
+
+    A general purpose development board, see EB2410ITX.txt for further
+    details
+
+  Simtec Electronics IM2440D20 (Osiris)
+
+    CPU Module from Simtec Electronics, with a S3C2440A CPU, nand flash
+    and a PCMCIA controller.
+
+  Samsung SMDK2410
+
+    Samsung's own development board, geared for PDA work.
+
+  Samsung/Aiji SMDK2412
+
+    The S3C2412 version of the SMDK2440.
+
+  Samsung/Aiji SMDK2413
+
+    The S3C2412 version of the SMDK2440.
+
+  Samsung/Meritech SMDK2440
+
+    The S3C2440 compatible version of the SMDK2440, which has the
+    option of an S3C2440 or S3C2442 CPU module.
+
+  Thorcom VR1000
+
+    Custom embedded board
+
+  HP IPAQ 1940
+
+    Handheld (IPAQ), available in several varieties
+
+  HP iPAQ rx3715
+
+    S3C2440 based IPAQ, with a number of variations depending on
+    features shipped.
+
+  Acer N30
+
+    A S3C2410 based PDA from Acer.  There is a Wiki page at
+    http://handhelds.org/moin/moin.cgi/AcerN30Documentation .
+
+  AML M5900
+
+    American Microsystems' M5900
+
+  Nex Vision Nexcoder
+  Nex Vision Otom
+
+    Two machines by Nex Vision
+
+
+Adding New Machines
+-------------------
+
+  The architecture has been designed to support as many machines as can
+  be configured for it in one kernel build, and any future additions
+  should keep this in mind before altering items outside of their own
+  machine files.
+
+  Machine definitions should be kept in linux/arch/arm/mach-s3c2410,
+  and there are a number of examples that can be looked at.
+
+  Read the kernel patch submission policies as well as the
+  Documentation/arm directory before submitting patches. The
+  ARM kernel series is managed by Russell King, and has a patch system
+  located at http://www.arm.linux.org.uk/developer/patches/
+  as well as mailing lists that can be found from the same site.
+
+  As a courtesy, please notify <ben-linux@fluff.org> of any new
+  machines or other modifications.
+
+  Any large scale modifications, or new drivers should be discussed
+  on the ARM kernel mailing list (linux-arm-kernel) before being
+  attempted. See http://www.arm.linux.org.uk/mailinglists/ for the
+  mailing list information.
+
+
+I2C
+---
+
+  The hardware I2C core in the CPU is supported in single master
+  mode, and can be configured via platform data.
+
+
+RTC
+---
+
+  Support for the onboard RTC unit, including alarm function.
+
+  This has recently been upgraded to use the new RTC core,
+  and the module has been renamed to rtc-s3c to fit in with
+  the new rtc naming scheme.
+
+
+Watchdog
+--------
+
+  The onchip watchdog is available via the standard watchdog
+  interface.
+
+
+NAND
+----
+
+  The current kernels now have support for the s3c2410 NAND
+  controller. If there are any problems the latest linux-mtd
+  code can be found from http://www.linux-mtd.infradead.org/
+
+  For more information see Documentation/arm/samsung-s3c24xx/nand.rst
+
+
+SD/MMC
+------
+
+  The SD/MMC hardware pre S3C2443 is supported in the current
+  kernel, the driver is drivers/mmc/host/s3cmci.c and supports
+  1 and 4 bit SD or MMC cards.
+
+  The SDIO behaviour of this driver has not been fully tested. There is no
+  current support for hardware SDIO interrupts.
+
+
+Serial
+------
+
+  The s3c2410 serial driver provides support for the internal
+  serial ports. These devices appear as /dev/ttySAC0 through 3.
+
+  To create device nodes for these, use the following commands
+
+    mknod ttySAC0 c 204 64
+    mknod ttySAC1 c 204 65
+    mknod ttySAC2 c 204 66
+
+
+GPIO
+----
+
+  The core contains support for manipulating the GPIO, see the
+  documentation in GPIO.txt in the same directory as this file.
+
+  Newer kernels carry GPIOLIB, and support is being moved towards
+  this with some of the older support in line to be removed.
+
+  As of v2.6.34, the move towards using gpiolib support is almost
+  complete, and very little of the old calls are left.
+
+  See Documentation/arm/samsung-s3c24xx/gpio.rst for the S3C24XX specific
+  support and Documentation/arm/samsung/gpio.rst for the core Samsung
+  implementation.
+
+
+Clock Management
+----------------
+
+  The core provides the interface defined in the header file
+  include/asm-arm/hardware/clock.h, to allow control over the
+  various clock units
+
+
+Suspend to RAM
+--------------
+
+  For boards that provide support for suspend to RAM, the
+  system can be placed into low power suspend.
+
+  See Suspend.txt for more information.
+
+
+SPI
+---
+
+  SPI drivers are available for both the in-built hardware
+  (although there is no DMA support yet) and a generic
+  GPIO based solution.
+
+
+LEDs
+----
+
+  There is support for GPIO based LEDs via a platform driver
+  in the LED subsystem.
+
+
+Platform Data
+-------------
+
+  Whenever a device has platform specific data that is specified
+  on a per-machine basis, care should be taken to ensure the
+  following:
+
+    1) that default data is not left in the device to confuse the
+       driver if a machine does not set it at startup
+
+    2) the data should (if possible) be marked as __initdata,
+       to ensure that the data is thrown away if the machine is
+       not the one currently in use.
+
+       The best way of doing this is to make a function that
+       kmalloc()s an area of memory, and copies the __initdata
+       and then sets the relevant device's platform data. Making
+       the function `__init` takes care of ensuring it is discarded
+       with the rest of the initialisation code::
+
+         static __init void s3c24xx_xxx_set_platdata(struct xxx_data *pd)
+         {
+             struct s3c2410_xxx_mach_info *npd;
+
+	   npd = kmalloc(sizeof(struct s3c2410_xxx_mach_info), GFP_KERNEL);
+	   if (npd) {
+	      memcpy(npd, pd, sizeof(struct s3c2410_xxx_mach_info));
+	      s3c_device_xxx.dev.platform_data = npd;
+	   } else {
+                printk(KERN_ERR "no memory for xxx platform data\n");
+	   }
+	}
+
+	Note, since the code is marked as __init, it should not be
+	exported outside arch/arm/mach-s3c2410/, or exported to
+	modules via EXPORT_SYMBOL() and related functions.
+
+
+Port Contributors
+-----------------
+
+  Ben Dooks (BJD)
+  Vincent Sanders
+  Herbert Potzl
+  Arnaud Patard (RTP)
+  Roc Wu
+  Klaus Fetscher
+  Dimitry Andric
+  Shannon Holland
+  Guillaume Gourat (NexVision)
+  Christer Weinigel (wingel) (Acer N30)
+  Lucas Correia Villa Real (S3C2400 port)
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2004-2006 Simtec Electronics
diff --git a/Documentation/arm/samsung-s3c24xx/s3c2412.rst b/Documentation/arm/samsung-s3c24xx/s3c2412.rst
new file mode 100644
index 000000000000..68b985fc6bf4
--- /dev/null
+++ b/Documentation/arm/samsung-s3c24xx/s3c2412.rst
@@ -0,0 +1,121 @@
+==========================
+S3C2412 ARM Linux Overview
+==========================
+
+Introduction
+------------
+
+  The S3C2412 is part of the S3C24XX range of ARM9 System-on-Chip CPUs
+  from Samsung. This part has an ARM926-EJS core, capable of running up
+  to 266MHz (see data-sheet for more information)
+
+
+Clock
+-----
+
+  The core clock code provides a set of clocks to the drivers, and allows
+  for source selection and a number of other features.
+
+
+Power
+-----
+
+  No support for suspend/resume to RAM in the current system.
+
+
+DMA
+---
+
+  No current support for DMA.
+
+
+GPIO
+----
+
+  There is support for setting the GPIO to input/output/special function
+  and reading or writing to them.
+
+
+UART
+----
+
+  The UART hardware is similar to the S3C2440, and is supported by the
+  s3c2410 driver in the drivers/serial directory.
+
+
+NAND
+----
+
+  The NAND hardware is similar to the S3C2440, and is supported by the
+  s3c2410 driver in the drivers/mtd/nand/raw directory.
+
+
+USB Host
+--------
+
+  The USB hardware is similar to the S3C2410, with extended clock source
+  control. The OHCI portion is supported by the ohci-s3c2410 driver, and
+  the clock control selection is supported by the core clock code.
+
+
+USB Device
+----------
+
+  No current support in the kernel
+
+
+IRQs
+----
+
+  All the standard, and external interrupt sources are supported. The
+  extra sub-sources are not yet supported.
+
+
+RTC
+---
+
+  The RTC hardware is similar to the S3C2410, and is supported by the
+  s3c2410-rtc driver.
+
+
+Watchdog
+--------
+
+  The watchdog hardware is the same as the S3C2410, and is supported by
+  the s3c2410_wdt driver.
+
+
+MMC/SD/SDIO
+-----------
+
+  No current support for the MMC/SD/SDIO block.
+
+IIC
+---
+
+  The IIC hardware is the same as the S3C2410, and is supported by the
+  i2c-s3c24xx driver.
+
+
+IIS
+---
+
+  No current support for the IIS interface.
+
+
+SPI
+---
+
+  No current support for the SPI interfaces.
+
+
+ATA
+---
+
+  No current support for the on-board ATA block.
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2006 Simtec Electronics
diff --git a/Documentation/arm/samsung-s3c24xx/s3c2413.rst b/Documentation/arm/samsung-s3c24xx/s3c2413.rst
new file mode 100644
index 000000000000..1f51e207fc46
--- /dev/null
+++ b/Documentation/arm/samsung-s3c24xx/s3c2413.rst
@@ -0,0 +1,22 @@
+==========================
+S3C2413 ARM Linux Overview
+==========================
+
+Introduction
+------------
+
+  The S3C2413 is an extended version of the S3C2412, with an camera
+  interface and mobile DDR memory support. See the S3C2412 support
+  documentation for more information.
+
+
+Camera Interface
+----------------
+
+  This block is currently not supported.
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2006 Simtec Electronics
diff --git a/Documentation/arm/samsung-s3c24xx/smdk2440.rst b/Documentation/arm/samsung-s3c24xx/smdk2440.rst
new file mode 100644
index 000000000000..524fd0b4afaf
--- /dev/null
+++ b/Documentation/arm/samsung-s3c24xx/smdk2440.rst
@@ -0,0 +1,57 @@
+=========================
+Samsung/Meritech SMDK2440
+=========================
+
+Introduction
+------------
+
+  The SMDK2440 is a two part evaluation board for the Samsung S3C2440
+  processor. It includes support for LCD, SmartMedia, Audio, SD and
+  10MBit Ethernet, and expansion headers for various signals, including
+  the camera and unused GPIO.
+
+
+Configuration
+-------------
+
+  To set the default configuration, use `make smdk2440_defconfig` which
+  will configure the common features of this board, or use
+  `make s3c2410_config` to include support for all s3c2410/s3c2440 machines
+
+
+Support
+-------
+
+  Ben Dooks' SMDK2440 site at http://www.fluff.org/ben/smdk2440/ which
+  includes linux based USB download tools.
+
+  Some of the h1940 patches that can be found from the H1940 project
+  site at http://www.handhelds.org/projects/h1940.html can also be
+  applied to this board.
+
+
+Peripherals
+-----------
+
+  There is no current support for any of the extra peripherals on the
+  base-board itself.
+
+
+MTD
+---
+
+  The NAND flash should be supported by the in kernel MTD NAND support,
+  NOR flash will be added later.
+
+
+Maintainers
+-----------
+
+  This board is being maintained by Ben Dooks, for more info, see
+  http://www.fluff.org/ben/smdk2440/
+
+  Many thanks to Dimitry Andric of TomTom for the loan of the SMDK2440,
+  and to Simtec Electronics for allowing me time to work on this.
+
+
+(c) 2004 Ben Dooks
diff --git a/Documentation/arm/samsung-s3c24xx/suspend.rst b/Documentation/arm/samsung-s3c24xx/suspend.rst
new file mode 100644
index 000000000000..b4f3ae9fe76e
--- /dev/null
+++ b/Documentation/arm/samsung-s3c24xx/suspend.rst
@@ -0,0 +1,137 @@
+=======================
+S3C24XX Suspend Support
+=======================
+
+
+Introduction
+------------
+
+  The S3C24XX supports a low-power suspend mode, where the SDRAM is kept
+  in Self-Refresh mode, and all but the essential peripheral blocks are
+  powered down. For more information on how this works, please look
+  at the relevant CPU datasheet from Samsung.
+
+
+Requirements
+------------
+
+  1) A bootloader that can support the necessary resume operation
+
+  2) Support for at least 1 source for resume
+
+  3) CONFIG_PM enabled in the kernel
+
+  4) Any peripherals that are going to be powered down at the same
+     time require suspend/resume support.
+
+
+Resuming
+--------
+
+  The S3C2410 user manual defines the process of sending the CPU to
+  sleep and how it resumes. The default behaviour of the Linux code
+  is to set the GSTATUS3 register to the physical address of the
+  code to resume Linux operation.
+
+  GSTATUS4 is currently left alone by the sleep code, and is free to
+  use for any other purposes (for example, the EB2410ITX uses this to
+  save memory configuration in).
+
+
+Machine Support
+---------------
+
+  The machine specific functions must call the s3c_pm_init() function
+  to say that its bootloader is capable of resuming. This can be as
+  simple as adding the following to the machine's definition:
+
+  INITMACHINE(s3c_pm_init)
+
+  A board can do its own setup before calling s3c_pm_init, if it
+  needs to setup anything else for power management support.
+
+  There is currently no support for over-riding the default method of
+  saving the resume address, if your board requires it, then contact
+  the maintainer and discuss what is required.
+
+  Note, the original method of adding an late_initcall() is wrong,
+  and will end up initialising all compiled machines' pm init!
+
+  The following is an example of code used for testing wakeup from
+  an falling edge on IRQ_EINT0::
+
+
+    static irqreturn_t button_irq(int irq, void *pw)
+    {
+	return IRQ_HANDLED;
+    }
+
+    statuc void __init machine_init(void)
+    {
+	...
+
+	request_irq(IRQ_EINT0, button_irq, IRQF_TRIGGER_FALLING,
+		   "button-irq-eint0", NULL);
+
+	enable_irq_wake(IRQ_EINT0);
+
+	s3c_pm_init();
+    }
+
+
+Debugging
+---------
+
+  There are several important things to remember when using PM suspend:
+
+  1) The uart drivers will disable the clocks to the UART blocks when
+     suspending, which means that use of printascii() or similar direct
+     access to the UARTs will cause the debug to stop.
+
+  2) While the pm code itself will attempt to re-enable the UART clocks,
+     care should be taken that any external clock sources that the UARTs
+     rely on are still enabled at that point.
+
+  3) If any debugging is placed in the resume path, then it must have the
+     relevant clocks and peripherals setup before use (ie, bootloader).
+
+     For example, if you transmit a character from the UART, the baud
+     rate and uart controls must be setup beforehand.
+
+
+Configuration
+-------------
+
+  The S3C2410 specific configuration in `System Type` defines various
+  aspects of how the S3C2410 suspend and resume support is configured
+
+  `S3C2410 PM Suspend debug`
+
+    This option prints messages to the serial console before and after
+    the actual suspend, giving detailed information on what is
+    happening
+
+
+  `S3C2410 PM Suspend Memory CRC`
+
+    Allows the entire memory to be checksummed before and after the
+    suspend to see if there has been any corruption of the contents.
+
+    Note, the time to calculate the CRC is dependent on the CPU speed
+    and the size of memory. For an 64Mbyte RAM area on an 200MHz
+    S3C2410, this can take approximately 4 seconds to complete.
+
+    This support requires the CRC32 function to be enabled.
+
+
+  `S3C2410 PM Suspend CRC Chunksize (KiB)`
+
+    Defines the size of memory each CRC chunk covers. A smaller value
+    will mean that the CRC data block will take more memory, but will
+    identify any faults with better precision
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2004 Simtec Electronics
diff --git a/Documentation/arm/samsung-s3c24xx/usb-host.rst b/Documentation/arm/samsung-s3c24xx/usb-host.rst
new file mode 100644
index 000000000000..c84268bd1884
--- /dev/null
+++ b/Documentation/arm/samsung-s3c24xx/usb-host.rst
@@ -0,0 +1,91 @@
+========================
+S3C24XX USB Host support
+========================
+
+
+
+Introduction
+------------
+
+  This document details the S3C2410/S3C2440 in-built OHCI USB host support.
+
+Configuration
+-------------
+
+  Enable at least the following kernel options:
+
+  menuconfig::
+
+   Device Drivers  --->
+     USB support  --->
+       <*> Support for Host-side USB
+       <*>   OHCI HCD support
+
+
+  .config:
+
+    - CONFIG_USB
+    - CONFIG_USB_OHCI_HCD
+
+
+  Once these options are configured, the standard set of USB device
+  drivers can be configured and used.
+
+
+Board Support
+-------------
+
+  The driver attaches to a platform device, which will need to be
+  added by the board specific support file in linux/arch/arm/mach-s3c2410,
+  such as mach-bast.c or mach-smdk2410.c
+
+  The platform device's platform_data field is only needed if the
+  board implements extra power control or over-current monitoring.
+
+  The OHCI driver does not ensure the state of the S3C2410's MISCCTRL
+  register, so if both ports are to be used for the host, then it is
+  the board support file's responsibility to ensure that the second
+  port is configured to be connected to the OHCI core.
+
+
+Platform Data
+-------------
+
+  See arch/arm/mach-s3c2410/include/mach/usb-control.h for the
+  descriptions of the platform device data. An implementation
+  can be found in linux/arch/arm/mach-s3c2410/usb-simtec.c .
+
+  The `struct s3c2410_hcd_info` contains a pair of functions
+  that get called to enable over-current detection, and to
+  control the port power status.
+
+  The ports are numbered 0 and 1.
+
+  power_control:
+    Called to enable or disable the power on the port.
+
+  enable_oc:
+    Called to enable or disable the over-current monitoring.
+    This should claim or release the resources being used to
+    check the power condition on the port, such as an IRQ.
+
+  report_oc:
+    The OHCI driver fills this field in for the over-current code
+    to call when there is a change to the over-current state on
+    an port. The ports argument is a bitmask of 1 bit per port,
+    with bit X being 1 for an over-current on port X.
+
+    The function s3c2410_usb_report_oc() has been provided to
+    ensure this is called correctly.
+
+  port[x]:
+    This is struct describes each port, 0 or 1. The platform driver
+    should set the flags field of each port to S3C_HCDFLG_USED if
+    the port is enabled.
+
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2005 Simtec Electronics
diff --git a/Documentation/arm/samsung/bootloader-interface.rst b/Documentation/arm/samsung/bootloader-interface.rst
new file mode 100644
index 000000000000..a56f325dae78
--- /dev/null
+++ b/Documentation/arm/samsung/bootloader-interface.rst
@@ -0,0 +1,81 @@
+==========================================================
+Interface between kernel and boot loaders on Exynos boards
+==========================================================
+
+Author: Krzysztof Kozlowski
+
+Date  : 6 June 2015
+
+The document tries to describe currently used interface between Linux kernel
+and boot loaders on Samsung Exynos based boards. This is not a definition
+of interface but rather a description of existing state, a reference
+for information purpose only.
+
+In the document "boot loader" means any of following: U-boot, proprietary
+SBOOT or any other firmware for ARMv7 and ARMv8 initializing the board before
+executing kernel.
+
+
+1. Non-Secure mode
+
+Address:      sysram_ns_base_addr
+
+============= ============================================ ==================
+Offset        Value                                        Purpose
+============= ============================================ ==================
+0x08          exynos_cpu_resume_ns, mcpm_entry_point       System suspend
+0x0c          0x00000bad (Magic cookie)                    System suspend
+0x1c          exynos4_secondary_startup                    Secondary CPU boot
+0x1c + 4*cpu  exynos4_secondary_startup (Exynos4412)       Secondary CPU boot
+0x20          0xfcba0d10 (Magic cookie)                    AFTR
+0x24          exynos_cpu_resume_ns                         AFTR
+0x28 + 4*cpu  0x8 (Magic cookie, Exynos3250)               AFTR
+0x28          0x0 or last value during resume (Exynos542x) System suspend
+============= ============================================ ==================
+
+
+2. Secure mode
+
+Address:      sysram_base_addr
+
+============= ============================================ ==================
+Offset        Value                                        Purpose
+============= ============================================ ==================
+0x00          exynos4_secondary_startup                    Secondary CPU boot
+0x04          exynos4_secondary_startup (Exynos542x)       Secondary CPU boot
+4*cpu         exynos4_secondary_startup (Exynos4412)       Secondary CPU boot
+0x20          exynos_cpu_resume (Exynos4210 r1.0)          AFTR
+0x24          0xfcba0d10 (Magic cookie, Exynos4210 r1.0)   AFTR
+============= ============================================ ==================
+
+Address:      pmu_base_addr
+
+============= ============================================ ==================
+Offset        Value                                        Purpose
+============= ============================================ ==================
+0x0800        exynos_cpu_resume                            AFTR, suspend
+0x0800        mcpm_entry_point (Exynos542x with MCPM)      AFTR, suspend
+0x0804        0xfcba0d10 (Magic cookie)                    AFTR
+0x0804        0x00000bad (Magic cookie)                    System suspend
+0x0814        exynos4_secondary_startup (Exynos4210 r1.1)  Secondary CPU boot
+0x0818        0xfcba0d10 (Magic cookie, Exynos4210 r1.1)   AFTR
+0x081C        exynos_cpu_resume (Exynos4210 r1.1)          AFTR
+============= ============================================ ==================
+
+3. Other (regardless of secure/non-secure mode)
+
+Address:      pmu_base_addr
+
+============= =============================== ===============================
+Offset        Value                           Purpose
+============= =============================== ===============================
+0x0908        Non-zero                        Secondary CPU boot up indicator
+                                              on Exynos3250 and Exynos542x
+============= =============================== ===============================
+
+
+4. Glossary
+
+AFTR - ARM Off Top Running, a low power mode, Cortex cores and many other
+modules are power gated, except the TOP modules
+MCPM - Multi-Cluster Power Management
diff --git a/Documentation/arm/samsung/clksrc-change-registers.awk b/Documentation/arm/samsung/clksrc-change-registers.awk
new file mode 100755
index 000000000000..7be1b8aa7cd9
--- /dev/null
+++ b/Documentation/arm/samsung/clksrc-change-registers.awk
@@ -0,0 +1,166 @@
+#!/usr/bin/awk -f
+#
+# Copyright 2010 Ben Dooks <ben-linux@fluff.org>
+#
+# Released under GPLv2
+
+# example usage
+# ./clksrc-change-registers.awk arch/arm/plat-s5pc1xx/include/plat/regs-clock.h < src > dst
+
+function extract_value(s)
+{
+    eqat = index(s, "=")
+    comat = index(s, ",")
+    return substr(s, eqat+2, (comat-eqat)-2)
+}
+
+function remove_brackets(b)
+{
+    return substr(b, 2, length(b)-2)
+}
+
+function splitdefine(l, p)
+{
+    r = split(l, tp)
+
+    p[0] = tp[2]
+    p[1] = remove_brackets(tp[3])
+}
+
+function find_length(f)
+{
+    if (0)
+	printf "find_length " f "\n" > "/dev/stderr"
+
+    if (f ~ /0x1/)
+	return 1
+    else if (f ~ /0x3/)
+	return 2
+    else if (f ~ /0x7/)
+	return 3
+    else if (f ~ /0xf/)
+	return 4
+
+    printf "unknown length " f "\n" > "/dev/stderr"
+    exit
+}
+
+function find_shift(s)
+{
+    id = index(s, "<")
+    if (id <= 0) {
+	printf "cannot find shift " s "\n" > "/dev/stderr"
+	exit
+    }
+
+    return substr(s, id+2)
+}
+
+
+BEGIN {
+    if (ARGC < 2) {
+	print "too few arguments" > "/dev/stderr"
+	exit
+    }
+
+# read the header file and find the mask values that we will need
+# to replace and create an associative array of values
+
+    while (getline line < ARGV[1] > 0) {
+	if (line ~ /\#define.*_MASK/ &&
+	    !(line ~ /USB_SIG_MASK/)) {
+	    splitdefine(line, fields)
+	    name = fields[0]
+	    if (0)
+		printf "MASK " line "\n" > "/dev/stderr"
+	    dmask[name,0] = find_length(fields[1])
+	    dmask[name,1] = find_shift(fields[1])
+	    if (0)
+		printf "=> '" name "' LENGTH=" dmask[name,0] " SHIFT=" dmask[name,1] "\n" > "/dev/stderr"
+	} else {
+	}
+    }
+
+    delete ARGV[1]
+}
+
+/clksrc_clk.*=.*{/ {
+    shift=""
+    mask=""
+    divshift=""
+    reg_div=""
+    reg_src=""
+    indent=1
+
+    print $0
+
+    for(; indent >= 1;) {
+	if ((getline line) <= 0) {
+	    printf "unexpected end of file" > "/dev/stderr"
+	    exit 1;
+	}
+
+	if (line ~ /\.shift/) {
+	    shift = extract_value(line)
+	} else if (line ~ /\.mask/) {
+	    mask = extract_value(line)
+	} else if (line ~ /\.reg_divider/) {
+	    reg_div = extract_value(line)
+	} else if (line ~ /\.reg_source/) {
+	    reg_src = extract_value(line)
+	} else if (line ~ /\.divider_shift/) {
+	    divshift = extract_value(line)
+	} else if (line ~ /{/) {
+		indent++
+		print line
+	    } else if (line ~ /}/) {
+	    indent--
+
+	    if (indent == 0) {
+		if (0) {
+		    printf "shift '" shift   "' ='" dmask[shift,0] "'\n" > "/dev/stderr"
+		    printf "mask  '" mask    "'\n" > "/dev/stderr"
+		    printf "dshft '" divshift "'\n" > "/dev/stderr"
+		    printf "rdiv  '" reg_div "'\n" > "/dev/stderr"
+		    printf "rsrc  '" reg_src "'\n" > "/dev/stderr"
+		}
+
+		generated = mask
+		sub(reg_src, reg_div, generated)
+
+		if (0) {
+		    printf "/* rsrc " reg_src " */\n"
+		    printf "/* rdiv " reg_div " */\n"
+		    printf "/* shift " shift " */\n"
+		    printf "/* mask " mask " */\n"
+		    printf "/* generated " generated " */\n"
+		}
+
+		if (reg_div != "") {
+		    printf "\t.reg_div = { "
+		    printf ".reg = " reg_div ", "
+		    printf ".shift = " dmask[generated,1] ", "
+		    printf ".size = " dmask[generated,0] ", "
+		    printf "},\n"
+		}
+
+		printf "\t.reg_src = { "
+		printf ".reg = " reg_src ", "
+		printf ".shift = " dmask[mask,1] ", "
+		printf ".size = " dmask[mask,0] ", "
+
+		printf "},\n"
+
+	    }
+
+	    print line
+	} else {
+	    print line
+	}
+
+	if (0)
+	    printf indent ":" line "\n" > "/dev/stderr"
+    }
+}
+
+// && ! /clksrc_clk.*=.*{/ { print $0 }
diff --git a/Documentation/arm/samsung/gpio.rst b/Documentation/arm/samsung/gpio.rst
new file mode 100644
index 000000000000..5f7cadd7159e
--- /dev/null
+++ b/Documentation/arm/samsung/gpio.rst
@@ -0,0 +1,41 @@
+===========================
+Samsung GPIO implementation
+===========================
+
+Introduction
+------------
+
+This outlines the Samsung GPIO implementation and the architecture
+specific calls provided alongside the drivers/gpio core.
+
+
+S3C24XX (Legacy)
+----------------
+
+See Documentation/arm/samsung-s3c24xx/gpio.rst for more information
+about these devices. Their implementation has been brought into line
+with the core samsung implementation described in this document.
+
+
+GPIOLIB integration
+-------------------
+
+The gpio implementation uses gpiolib as much as possible, only providing
+specific calls for the items that require Samsung specific handling, such
+as pin special-function or pull resistor control.
+
+GPIO numbering is synchronised between the Samsung and gpiolib system.
+
+
+PIN configuration
+-----------------
+
+Pin configuration is specific to the Samsung architecture, with each SoC
+registering the necessary information for the core gpio configuration
+implementation to configure pins as necessary.
+
+The s3c_gpio_cfgpin() and s3c_gpio_setpull() provide the means for a
+driver or machine to change gpio configuration.
+
+See arch/arm/plat-samsung/include/plat/gpio-cfg.h for more information
+on these functions.
diff --git a/Documentation/arm/samsung/index.rst b/Documentation/arm/samsung/index.rst
new file mode 100644
index 000000000000..f54d95734362
--- /dev/null
+++ b/Documentation/arm/samsung/index.rst
@@ -0,0 +1,10 @@
+===========
+Samsung SoC
+===========
+
+.. toctree::
+   :maxdepth: 1
+
+   gpio
+   bootloader-interface
+   overview
diff --git a/Documentation/arm/samsung/overview.rst b/Documentation/arm/samsung/overview.rst
new file mode 100644
index 000000000000..e74307897416
--- /dev/null
+++ b/Documentation/arm/samsung/overview.rst
@@ -0,0 +1,89 @@
+==========================
+Samsung ARM Linux Overview
+==========================
+
+Introduction
+------------
+
+  The Samsung range of ARM SoCs spans many similar devices, from the initial
+  ARM9 through to the newest ARM cores. This document shows an overview of
+  the current kernel support, how to use it and where to find the code
+  that supports this.
+
+  The currently supported SoCs are:
+
+  - S3C24XX: See Documentation/arm/samsung-s3c24xx/overview.rst for full list
+  - S3C64XX: S3C6400 and S3C6410
+  - S5PC110 / S5PV210
+
+
+S3C24XX Systems
+---------------
+
+  There is still documentation in Documnetation/arm/Samsung-S3C24XX/ which
+  deals with the architecture and drivers specific to these devices.
+
+  See Documentation/arm/samsung-s3c24xx/overview.rst for more information
+  on the implementation details and specific support.
+
+
+Configuration
+-------------
+
+  A number of configurations are supplied, as there is no current way of
+  unifying all the SoCs into one kernel.
+
+  s5pc110_defconfig
+	- S5PC110 specific default configuration
+  s5pv210_defconfig
+	- S5PV210 specific default configuration
+
+
+Layout
+------
+
+  The directory layout is currently being restructured, and consists of
+  several platform directories and then the machine specific directories
+  of the CPUs being built for.
+
+  plat-samsung provides the base for all the implementations, and is the
+  last in the line of include directories that are processed for the build
+  specific information. It contains the base clock, GPIO and device definitions
+  to get the system running.
+
+  plat-s3c24xx is for s3c24xx specific builds, see the S3C24XX docs.
+
+  plat-s5p is for s5p specific builds, and contains common support for the
+  S5P specific systems. Not all S5Ps use all the features in this directory
+  due to differences in the hardware.
+
+
+Layout changes
+--------------
+
+  The old plat-s3c and plat-s5pc1xx directories have been removed, with
+  support moved to either plat-samsung or plat-s5p as necessary. These moves
+  where to simplify the include and dependency issues involved with having
+  so many different platform directories.
+
+
+Port Contributors
+-----------------
+
+  Ben Dooks (BJD)
+  Vincent Sanders
+  Herbert Potzl
+  Arnaud Patard (RTP)
+  Roc Wu
+  Klaus Fetscher
+  Dimitry Andric
+  Shannon Holland
+  Guillaume Gourat (NexVision)
+  Christer Weinigel (wingel) (Acer N30)
+  Lucas Correia Villa Real (S3C2400 port)
+
+
+Document Author
+---------------
+
+Copyright 2009-2010 Ben Dooks <ben-linux@fluff.org>
diff --git a/Documentation/arm/setup.rst b/Documentation/arm/setup.rst
new file mode 100644
index 000000000000..8e12ef3fb9a7
--- /dev/null
+++ b/Documentation/arm/setup.rst
@@ -0,0 +1,108 @@
+=============================================
+Kernel initialisation parameters on ARM Linux
+=============================================
+
+The following document describes the kernel initialisation parameter
+structure, otherwise known as 'struct param_struct' which is used
+for most ARM Linux architectures.
+
+This structure is used to pass initialisation parameters from the
+kernel loader to the Linux kernel proper, and may be short lived
+through the kernel initialisation process.  As a general rule, it
+should not be referenced outside of arch/arm/kernel/setup.c:setup_arch().
+
+There are a lot of parameters listed in there, and they are described
+below:
+
+ page_size
+   This parameter must be set to the page size of the machine, and
+   will be checked by the kernel.
+
+ nr_pages
+   This is the total number of pages of memory in the system.  If
+   the memory is banked, then this should contain the total number
+   of pages in the system.
+
+   If the system contains separate VRAM, this value should not
+   include this information.
+
+ ramdisk_size
+   This is now obsolete, and should not be used.
+
+ flags
+   Various kernel flags, including:
+
+    =====   ========================
+    bit 0   1 = mount root read only
+    bit 1   unused
+    bit 2   0 = load ramdisk
+    bit 3   0 = prompt for ramdisk
+    =====   ========================
+
+ rootdev
+   major/minor number pair of device to mount as the root filesystem.
+
+ video_num_cols / video_num_rows
+   These two together describe the character size of the dummy console,
+   or VGA console character size.  They should not be used for any other
+   purpose.
+
+   It's generally a good idea to set these to be either standard VGA, or
+   the equivalent character size of your fbcon display.  This then allows
+   all the bootup messages to be displayed correctly.
+
+ video_x / video_y
+   This describes the character position of cursor on VGA console, and
+   is otherwise unused. (should not be used for other console types, and
+   should not be used for other purposes).
+
+ memc_control_reg
+   MEMC chip control register for Acorn Archimedes and Acorn A5000
+   based machines.  May be used differently by different architectures.
+
+ sounddefault
+   Default sound setting on Acorn machines.  May be used differently by
+   different architectures.
+
+ adfsdrives
+   Number of ADFS/MFM disks.  May be used differently by different
+   architectures.
+
+ bytes_per_char_h / bytes_per_char_v
+   These are now obsolete, and should not be used.
+
+ pages_in_bank[4]
+   Number of pages in each bank of the systems memory (used for RiscPC).
+   This is intended to be used on systems where the physical memory
+   is non-contiguous from the processors point of view.
+
+ pages_in_vram
+   Number of pages in VRAM (used on Acorn RiscPC).  This value may also
+   be used by loaders if the size of the video RAM can't be obtained
+   from the hardware.
+
+ initrd_start / initrd_size
+   This describes the kernel virtual start address and size of the
+   initial ramdisk.
+
+ rd_start
+   Start address in sectors of the ramdisk image on a floppy disk.
+
+ system_rev
+   system revision number.
+
+ system_serial_low / system_serial_high
+   system 64-bit serial number
+
+ mem_fclk_21285
+   The speed of the external oscillator to the 21285 (footbridge),
+   which control's the speed of the memory bus, timer & serial port.
+   Depending upon the speed of the cpu its value can be between
+   0-66 MHz. If no params are passed or a value of zero is passed,
+   then a value of 50 Mhz is the default on 21285 architectures.
+
+ paths[8][128]
+   These are now obsolete, and should not be used.
+
+ commandline
+   Kernel command line parameters.  Details can be found elsewhere.
diff --git a/Documentation/arm/sh-mobile/.gitignore b/Documentation/arm/sh-mobile/.gitignore
new file mode 100644
index 000000000000..c928dbf3cc88
--- /dev/null
+++ b/Documentation/arm/sh-mobile/.gitignore
@@ -0,0 +1 @@
+vrl4
diff --git a/Documentation/arm/spear/overview.rst b/Documentation/arm/spear/overview.rst
new file mode 100644
index 000000000000..8a1a87aca427
--- /dev/null
+++ b/Documentation/arm/spear/overview.rst
@@ -0,0 +1,65 @@
+========================
+SPEAr ARM Linux Overview
+========================
+
+Introduction
+------------
+
+  SPEAr (Structured Processor Enhanced Architecture).
+  weblink : http://www.st.com/spear
+
+  The ST Microelectronics SPEAr range of ARM9/CortexA9 System-on-Chip CPUs are
+  supported by the 'spear' platform of ARM Linux. Currently SPEAr1310,
+  SPEAr1340, SPEAr300, SPEAr310, SPEAr320 and SPEAr600 SOCs are supported.
+
+  Hierarchy in SPEAr is as follows:
+
+  SPEAr (Platform)
+	- SPEAr3XX (3XX SOC series, based on ARM9)
+		- SPEAr300 (SOC)
+			- SPEAr300 Evaluation Board
+		- SPEAr310 (SOC)
+			- SPEAr310 Evaluation Board
+		- SPEAr320 (SOC)
+			- SPEAr320 Evaluation Board
+	- SPEAr6XX (6XX SOC series, based on ARM9)
+		- SPEAr600 (SOC)
+			- SPEAr600 Evaluation Board
+	- SPEAr13XX (13XX SOC series, based on ARM CORTEXA9)
+		- SPEAr1310 (SOC)
+			- SPEAr1310 Evaluation Board
+		- SPEAr1340 (SOC)
+			- SPEAr1340 Evaluation Board
+
+Configuration
+-------------
+
+  A generic configuration is provided for each machine, and can be used as the
+  default by::
+
+	make spear13xx_defconfig
+	make spear3xx_defconfig
+	make spear6xx_defconfig
+
+Layout
+------
+
+  The common files for multiple machine families (SPEAr3xx, SPEAr6xx and
+  SPEAr13xx) are located in the platform code contained in arch/arm/plat-spear
+  with headers in plat/.
+
+  Each machine series have a directory with name arch/arm/mach-spear followed by
+  series name. Like mach-spear3xx, mach-spear6xx and mach-spear13xx.
+
+  Common file for machines of spear3xx family is mach-spear3xx/spear3xx.c, for
+  spear6xx is mach-spear6xx/spear6xx.c and for spear13xx family is
+  mach-spear13xx/spear13xx.c. mach-spear* also contain soc/machine specific
+  files, like spear1310.c, spear1340.c spear300.c, spear310.c, spear320.c and
+  spear600.c.  mach-spear* doesn't contains board specific files as they fully
+  support Flattened Device Tree.
+
+
+Document Author
+---------------
+
+  Viresh Kumar <vireshk@kernel.org>, (c) 2010-2012 ST Microelectronics
diff --git a/Documentation/arm/sti/overview.rst b/Documentation/arm/sti/overview.rst
new file mode 100644
index 000000000000..70743617a74f
--- /dev/null
+++ b/Documentation/arm/sti/overview.rst
@@ -0,0 +1,36 @@
+======================
+STi ARM Linux Overview
+======================
+
+Introduction
+------------
+
+  The ST Microelectronics Multimedia and Application Processors range of
+  CortexA9 System-on-Chip are supported by the 'STi' platform of
+  ARM Linux. Currently STiH415, STiH416 SOCs are supported with both
+  B2000 and B2020 Reference boards.
+
+
+configuration
+-------------
+
+  A generic configuration is provided for both STiH415/416, and can be used as the
+  default by::
+
+	make stih41x_defconfig
+
+Layout
+------
+
+  All the files for multiple machine families (STiH415, STiH416, and STiG125)
+  are located in the platform code contained in arch/arm/mach-sti
+
+  There is a generic board board-dt.c in the mach folder which support
+  Flattened Device Tree, which means, It works with any compatible board with
+  Device Trees.
+
+
+Document Author
+---------------
+
+  Srinivas Kandagatla <srinivas.kandagatla@st.com>, (c) 2013 ST Microelectronics
diff --git a/Documentation/arm/sti/overview.txt b/Documentation/arm/sti/overview.txt
deleted file mode 100644
index 1a4e93d6027f..000000000000
--- a/Documentation/arm/sti/overview.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-			STi ARM Linux Overview
-			==========================
-
-Introduction
-------------
-
-  The ST Microelectronics Multimedia and Application Processors range of
-  CortexA9 System-on-Chip are supported by the 'STi' platform of
-  ARM Linux. Currently STiH415, STiH416 SOCs are supported with both
-  B2000 and B2020 Reference boards.
-
-
-  configuration
-  -------------
-
-  A generic configuration is provided for both STiH415/416, and can be used as the
-  default by
-	make stih41x_defconfig
-
-  Layout
-  ------
-  All the files for multiple machine families (STiH415, STiH416, and STiG125)
-  are located in the platform code contained in arch/arm/mach-sti
-
-  There is a generic board board-dt.c in the mach folder which support
-  Flattened Device Tree, which means, It works with any compatible board with
-  Device Trees.
-
-
-  Document Author
-  ---------------
-
-  Srinivas Kandagatla <srinivas.kandagatla@st.com>, (c) 2013 ST Microelectronics
diff --git a/Documentation/arm/sti/stih407-overview.rst b/Documentation/arm/sti/stih407-overview.rst
new file mode 100644
index 000000000000..027e75bc7b7c
--- /dev/null
+++ b/Documentation/arm/sti/stih407-overview.rst
@@ -0,0 +1,19 @@
+================
+STiH407 Overview
+================
+
+Introduction
+------------
+
+    The STiH407 is the new generation of SoC for Multi-HD, AVC set-top boxes
+    and server/connected client application for satellite, cable, terrestrial
+    and IP-STB markets.
+
+    Features
+    - ARM Cortex-A9 1.5 GHz dual core CPU (28nm)
+    - SATA2, USB 3.0, PCIe, Gbit Ethernet
+
+Document Author
+---------------
+
+  Maxime Coquelin <maxime.coquelin@st.com>, (c) 2014 ST Microelectronics
diff --git a/Documentation/arm/sti/stih407-overview.txt b/Documentation/arm/sti/stih407-overview.txt
deleted file mode 100644
index 3343f32f58bc..000000000000
--- a/Documentation/arm/sti/stih407-overview.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-			STiH407 Overview
-			================
-
-Introduction
-------------
-
-    The STiH407 is the new generation of SoC for Multi-HD, AVC set-top boxes
-    and server/connected client application for satellite, cable, terrestrial
-    and IP-STB markets.
-
-    Features
-    - ARM Cortex-A9 1.5 GHz dual core CPU (28nm)
-    - SATA2, USB 3.0, PCIe, Gbit Ethernet
-
-  Document Author
-  ---------------
-
-  Maxime Coquelin <maxime.coquelin@st.com>, (c) 2014 ST Microelectronics
diff --git a/Documentation/arm/sti/stih415-overview.rst b/Documentation/arm/sti/stih415-overview.rst
new file mode 100644
index 000000000000..b67452d610c4
--- /dev/null
+++ b/Documentation/arm/sti/stih415-overview.rst
@@ -0,0 +1,14 @@
+================
+STiH415 Overview
+================
+
+Introduction
+------------
+
+    The STiH415 is the next generation of HD, AVC set-top box processors
+    for satellite, cable, terrestrial and IP-STB markets.
+
+    Features:
+
+    - ARM Cortex-A9 1.0 GHz, dual-core CPU
+    - SATA2x2,USB 2.0x3, PCIe, Gbit Ethernet MACx2
diff --git a/Documentation/arm/sti/stih415-overview.txt b/Documentation/arm/sti/stih415-overview.txt
deleted file mode 100644
index 1383e33f265d..000000000000
--- a/Documentation/arm/sti/stih415-overview.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-			STiH415 Overview
-			================
-
-Introduction
-------------
-
-    The STiH415 is the next generation of HD, AVC set-top box processors
-    for satellite, cable, terrestrial and IP-STB markets.
-
-    Features
-    - ARM Cortex-A9 1.0 GHz, dual-core CPU
-    - SATA2x2,USB 2.0x3, PCIe, Gbit Ethernet MACx2
diff --git a/Documentation/arm/sti/stih416-overview.rst b/Documentation/arm/sti/stih416-overview.rst
new file mode 100644
index 000000000000..93f17d74d8db
--- /dev/null
+++ b/Documentation/arm/sti/stih416-overview.rst
@@ -0,0 +1,13 @@
+================
+STiH416 Overview
+================
+
+Introduction
+------------
+
+    The STiH416 is the next generation of HD, AVC set-top box processors
+    for satellite, cable, terrestrial and IP-STB markets.
+
+    Features
+    - ARM Cortex-A9 1.2 GHz dual core CPU
+    - SATA2x2,USB 2.0x3, PCIe, Gbit Ethernet MACx2
diff --git a/Documentation/arm/sti/stih416-overview.txt b/Documentation/arm/sti/stih416-overview.txt
deleted file mode 100644
index 558444c201c6..000000000000
--- a/Documentation/arm/sti/stih416-overview.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-			STiH416 Overview
-			================
-
-Introduction
-------------
-
-    The STiH416 is the next generation of HD, AVC set-top box processors
-    for satellite, cable, terrestrial and IP-STB markets.
-
-    Features
-    - ARM Cortex-A9 1.2 GHz dual core CPU
-    - SATA2x2,USB 2.0x3, PCIe, Gbit Ethernet MACx2
diff --git a/Documentation/arm/sti/stih418-overview.rst b/Documentation/arm/sti/stih418-overview.rst
new file mode 100644
index 000000000000..b563c1f4fe5a
--- /dev/null
+++ b/Documentation/arm/sti/stih418-overview.rst
@@ -0,0 +1,21 @@
+================
+STiH418 Overview
+================
+
+Introduction
+------------
+
+    The STiH418 is the new generation of SoC for UHDp60 set-top boxes
+    and server/connected client application for satellite, cable, terrestrial
+    and IP-STB markets.
+
+    Features
+    - ARM Cortex-A9 1.5 GHz quad core CPU (28nm)
+    - SATA2, USB 3.0, PCIe, Gbit Ethernet
+    - HEVC L5.1 Main 10
+    - VP9
+
+Document Author
+---------------
+
+  Maxime Coquelin <maxime.coquelin@st.com>, (c) 2015 ST Microelectronics
diff --git a/Documentation/arm/sti/stih418-overview.txt b/Documentation/arm/sti/stih418-overview.txt
deleted file mode 100644
index 1cd8fc80646d..000000000000
--- a/Documentation/arm/sti/stih418-overview.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-			STiH418 Overview
-			================
-
-Introduction
-------------
-
-    The STiH418 is the new generation of SoC for UHDp60 set-top boxes
-    and server/connected client application for satellite, cable, terrestrial
-    and IP-STB markets.
-
-    Features
-    - ARM Cortex-A9 1.5 GHz quad core CPU (28nm)
-    - SATA2, USB 3.0, PCIe, Gbit Ethernet
-    - HEVC L5.1 Main 10
-    - VP9
-
-  Document Author
-  ---------------
-
-  Maxime Coquelin <maxime.coquelin@st.com>, (c) 2015 ST Microelectronics
diff --git a/Documentation/arm/stm32/overview.rst b/Documentation/arm/stm32/overview.rst
index f7e734153860..85cfc8410798 100644
--- a/Documentation/arm/stm32/overview.rst
+++ b/Documentation/arm/stm32/overview.rst
@@ -1,5 +1,3 @@
-:orphan:
-
 ========================
 STM32 ARM Linux Overview
 ========================
diff --git a/Documentation/arm/stm32/stm32f429-overview.rst b/Documentation/arm/stm32/stm32f429-overview.rst
index 65bbb1c3b423..a7ebe8ea6697 100644
--- a/Documentation/arm/stm32/stm32f429-overview.rst
+++ b/Documentation/arm/stm32/stm32f429-overview.rst
@@ -1,5 +1,4 @@
-:orphan:
-
+==================
 STM32F429 Overview
 ==================
 
@@ -23,6 +22,4 @@ Datasheet and reference manual are publicly available on ST website (STM32F429_)
 
 .. _STM32F429: http://www.st.com/web/en/catalog/mmc/FM141/SC1169/SS1577/LN1806?ecmp=stm32f429-439_pron_pr-ces2014_nov2013
 
-:Authors:
-
-Maxime Coquelin <mcoquelin.stm32@gmail.com>
+:Authors: Maxime Coquelin <mcoquelin.stm32@gmail.com>
diff --git a/Documentation/arm/stm32/stm32f746-overview.rst b/Documentation/arm/stm32/stm32f746-overview.rst
index 42d593085015..78befddc7740 100644
--- a/Documentation/arm/stm32/stm32f746-overview.rst
+++ b/Documentation/arm/stm32/stm32f746-overview.rst
@@ -1,5 +1,4 @@
-:orphan:
-
+==================
 STM32F746 Overview
 ==================
 
@@ -30,6 +29,4 @@ Datasheet and reference manual are publicly available on ST website (STM32F746_)
 
 .. _STM32F746: http://www.st.com/content/st_com/en/products/microcontrollers/stm32-32-bit-arm-cortex-mcus/stm32f7-series/stm32f7x6/stm32f746ng.html
 
-:Authors:
-
-Alexandre Torgue <alexandre.torgue@st.com>
+:Authors: Alexandre Torgue <alexandre.torgue@st.com>
diff --git a/Documentation/arm/stm32/stm32f769-overview.rst b/Documentation/arm/stm32/stm32f769-overview.rst
index f6adac862b17..e482980ddf21 100644
--- a/Documentation/arm/stm32/stm32f769-overview.rst
+++ b/Documentation/arm/stm32/stm32f769-overview.rst
@@ -1,5 +1,4 @@
-:orphan:
-
+==================
 STM32F769 Overview
 ==================
 
@@ -32,6 +31,4 @@ Datasheet and reference manual are publicly available on ST website (STM32F769_)
 
 .. _STM32F769: http://www.st.com/content/st_com/en/products/microcontrollers/stm32-32-bit-arm-cortex-mcus/stm32-high-performance-mcus/stm32f7-series/stm32f7x9/stm32f769ni.html
 
-:Authors:
-
-Alexandre Torgue <alexandre.torgue@st.com>
+:Authors: Alexandre Torgue <alexandre.torgue@st.com>
diff --git a/Documentation/arm/stm32/stm32h743-overview.rst b/Documentation/arm/stm32/stm32h743-overview.rst
index c525835e7473..4e15f1a42730 100644
--- a/Documentation/arm/stm32/stm32h743-overview.rst
+++ b/Documentation/arm/stm32/stm32h743-overview.rst
@@ -1,5 +1,4 @@
-:orphan:
-
+==================
 STM32H743 Overview
 ==================
 
@@ -31,6 +30,4 @@ Datasheet and reference manual are publicly available on ST website (STM32H743_)
 
 .. _STM32H743: http://www.st.com/en/microcontrollers/stm32h7x3.html?querycriteria=productId=LN2033
 
-:Authors:
-
-Alexandre Torgue <alexandre.torgue@st.com>
+:Authors: Alexandre Torgue <alexandre.torgue@st.com>
diff --git a/Documentation/arm/stm32/stm32mp157-overview.rst b/Documentation/arm/stm32/stm32mp157-overview.rst
index 2c52cd020601..f62fdc8e7d8d 100644
--- a/Documentation/arm/stm32/stm32mp157-overview.rst
+++ b/Documentation/arm/stm32/stm32mp157-overview.rst
@@ -1,5 +1,4 @@
-:orphan:
-
+===================
 STM32MP157 Overview
 ===================
 
diff --git a/Documentation/arm/sunxi.rst b/Documentation/arm/sunxi.rst
new file mode 100644
index 000000000000..b037428aee98
--- /dev/null
+++ b/Documentation/arm/sunxi.rst
@@ -0,0 +1,150 @@
+==================
+ARM Allwinner SoCs
+==================
+
+This document lists all the ARM Allwinner SoCs that are currently
+supported in mainline by the Linux kernel. This document will also
+provide links to documentation and/or datasheet for these SoCs.
+
+SunXi family
+------------
+  Linux kernel mach directory: arch/arm/mach-sunxi
+
+  Flavors:
+
+    * ARM926 based SoCs
+      - Allwinner F20 (sun3i)
+
+        * Not Supported
+
+    * ARM Cortex-A8 based SoCs
+      - Allwinner A10 (sun4i)
+
+        * Datasheet
+
+	  http://dl.linux-sunxi.org/A10/A10%20Datasheet%20-%20v1.21%20%282012-04-06%29.pdf
+	* User Manual
+
+	  http://dl.linux-sunxi.org/A10/A10%20User%20Manual%20-%20v1.20%20%282012-04-09%2c%20DECRYPTED%29.pdf
+
+      - Allwinner A10s (sun5i)
+
+        * Datasheet
+
+          http://dl.linux-sunxi.org/A10s/A10s%20Datasheet%20-%20v1.20%20%282012-03-27%29.pdf
+
+      - Allwinner A13 / R8 (sun5i)
+
+        * Datasheet
+
+	  http://dl.linux-sunxi.org/A13/A13%20Datasheet%20-%20v1.12%20%282012-03-29%29.pdf
+        * User Manual
+
+          http://dl.linux-sunxi.org/A13/A13%20User%20Manual%20-%20v1.2%20%282013-01-08%29.pdf
+
+      - Next Thing Co GR8 (sun5i)
+
+    * Single ARM Cortex-A7 based SoCs
+      - Allwinner V3s (sun8i)
+
+        * Datasheet
+
+          http://linux-sunxi.org/File:Allwinner_V3s_Datasheet_V1.0.pdf
+
+    * Dual ARM Cortex-A7 based SoCs
+      - Allwinner A20 (sun7i)
+
+        * User Manual
+
+          http://dl.linux-sunxi.org/A20/A20%20User%20Manual%202013-03-22.pdf
+
+      - Allwinner A23 (sun8i)
+
+        * Datasheet
+
+          http://dl.linux-sunxi.org/A23/A23%20Datasheet%20V1.0%2020130830.pdf
+
+        * User Manual
+
+          http://dl.linux-sunxi.org/A23/A23%20User%20Manual%20V1.0%2020130830.pdf
+
+    * Quad ARM Cortex-A7 based SoCs
+      - Allwinner A31 (sun6i)
+
+        * Datasheet
+
+          http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf
+
+        * User Manual
+
+          http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20user%20manual%20V1.1%2020130630.pdf
+
+      - Allwinner A31s (sun6i)
+
+        * Datasheet
+
+          http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20datasheet%20V1.3%2020131106.pdf
+
+        * User Manual
+
+          http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20User%20Manual%20%20V1.0%2020130322.pdf
+
+      - Allwinner A33 (sun8i)
+
+        * Datasheet
+
+          http://dl.linux-sunxi.org/A33/A33%20Datasheet%20release%201.1.pdf
+
+        * User Manual
+
+          http://dl.linux-sunxi.org/A33/A33%20user%20manual%20release%201.1.pdf
+
+      - Allwinner H2+ (sun8i)
+
+        * No document available now, but is known to be working properly with
+          H3 drivers and memory map.
+
+      - Allwinner H3 (sun8i)
+
+        * Datasheet
+
+          http://dl.linux-sunxi.org/H3/Allwinner_H3_Datasheet_V1.0.pdf
+
+      - Allwinner R40 (sun8i)
+
+        * Datasheet
+
+          https://github.com/tinalinux/docs/raw/r40-v1.y/R40_Datasheet_V1.0.pdf
+
+        * User Manual
+
+          https://github.com/tinalinux/docs/raw/r40-v1.y/Allwinner_R40_User_Manual_V1.0.pdf
+
+    * Quad ARM Cortex-A15, Quad ARM Cortex-A7 based SoCs
+      - Allwinner A80
+
+        * Datasheet
+
+	  http://dl.linux-sunxi.org/A80/A80_Datasheet_Revision_1.0_0404.pdf
+
+    * Octa ARM Cortex-A7 based SoCs
+      - Allwinner A83T
+
+        * Datasheet
+
+          https://github.com/allwinner-zh/documents/raw/master/A83T/A83T_Datasheet_v1.3_20150510.pdf
+
+        * User Manual
+
+          https://github.com/allwinner-zh/documents/raw/master/A83T/A83T_User_Manual_v1.5.1_20150513.pdf
+
+    * Quad ARM Cortex-A53 based SoCs
+      - Allwinner A64
+
+        * Datasheet
+
+          http://dl.linux-sunxi.org/A64/A64_Datasheet_V1.1.pdf
+
+        * User Manual
+
+          http://dl.linux-sunxi.org/A64/Allwinner%20A64%20User%20Manual%20v1.0.pdf
diff --git a/Documentation/arm/sunxi/README b/Documentation/arm/sunxi/README
deleted file mode 100644
index f8efc21998bf..000000000000
--- a/Documentation/arm/sunxi/README
+++ /dev/null
@@ -1,102 +0,0 @@
-ARM Allwinner SoCs
-==================
-
-This document lists all the ARM Allwinner SoCs that are currently
-supported in mainline by the Linux kernel. This document will also
-provide links to documentation and/or datasheet for these SoCs.
-
-SunXi family
-------------
-  Linux kernel mach directory: arch/arm/mach-sunxi
-
-  Flavors:
-    * ARM926 based SoCs
-      - Allwinner F20 (sun3i)
-        + Not Supported
-
-    * ARM Cortex-A8 based SoCs
-      - Allwinner A10 (sun4i)
-        + Datasheet
-	  http://dl.linux-sunxi.org/A10/A10%20Datasheet%20-%20v1.21%20%282012-04-06%29.pdf
-	+ User Manual
-	  http://dl.linux-sunxi.org/A10/A10%20User%20Manual%20-%20v1.20%20%282012-04-09%2c%20DECRYPTED%29.pdf
-
-      - Allwinner A10s (sun5i)
-        + Datasheet
-          http://dl.linux-sunxi.org/A10s/A10s%20Datasheet%20-%20v1.20%20%282012-03-27%29.pdf
-
-      - Allwinner A13 / R8 (sun5i)
-        + Datasheet
-	  http://dl.linux-sunxi.org/A13/A13%20Datasheet%20-%20v1.12%20%282012-03-29%29.pdf
-        + User Manual
-          http://dl.linux-sunxi.org/A13/A13%20User%20Manual%20-%20v1.2%20%282013-01-08%29.pdf
-
-      - Next Thing Co GR8 (sun5i)
-
-    * Single ARM Cortex-A7 based SoCs
-      - Allwinner V3s (sun8i)
-        + Datasheet
-          http://linux-sunxi.org/File:Allwinner_V3s_Datasheet_V1.0.pdf
-
-    * Dual ARM Cortex-A7 based SoCs
-      - Allwinner A20 (sun7i)
-        + User Manual
-          http://dl.linux-sunxi.org/A20/A20%20User%20Manual%202013-03-22.pdf
-
-      - Allwinner A23 (sun8i)
-        + Datasheet
-          http://dl.linux-sunxi.org/A23/A23%20Datasheet%20V1.0%2020130830.pdf
-        + User Manual
-          http://dl.linux-sunxi.org/A23/A23%20User%20Manual%20V1.0%2020130830.pdf
-
-    * Quad ARM Cortex-A7 based SoCs
-      - Allwinner A31 (sun6i)
-        + Datasheet
-          http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf
-        + User Manual
-          http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20user%20manual%20V1.1%2020130630.pdf
-
-      - Allwinner A31s (sun6i)
-        + Datasheet
-          http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20datasheet%20V1.3%2020131106.pdf
-        + User Manual
-          http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20User%20Manual%20%20V1.0%2020130322.pdf
-
-      - Allwinner A33 (sun8i)
-        + Datasheet
-          http://dl.linux-sunxi.org/A33/A33%20Datasheet%20release%201.1.pdf
-        + User Manual
-          http://dl.linux-sunxi.org/A33/A33%20user%20manual%20release%201.1.pdf
-
-      - Allwinner H2+ (sun8i)
-        + No document available now, but is known to be working properly with
-          H3 drivers and memory map.
-
-      - Allwinner H3 (sun8i)
-        + Datasheet
-          http://dl.linux-sunxi.org/H3/Allwinner_H3_Datasheet_V1.0.pdf
-
-      - Allwinner R40 (sun8i)
-        + Datasheet
-          https://github.com/tinalinux/docs/raw/r40-v1.y/R40_Datasheet_V1.0.pdf
-        + User Manual
-          https://github.com/tinalinux/docs/raw/r40-v1.y/Allwinner_R40_User_Manual_V1.0.pdf
-
-    * Quad ARM Cortex-A15, Quad ARM Cortex-A7 based SoCs
-      - Allwinner A80
-        + Datasheet
-	  http://dl.linux-sunxi.org/A80/A80_Datasheet_Revision_1.0_0404.pdf
-
-    * Octa ARM Cortex-A7 based SoCs
-      - Allwinner A83T
-        + Datasheet
-          https://github.com/allwinner-zh/documents/raw/master/A83T/A83T_Datasheet_v1.3_20150510.pdf
-        + User Manual
-          https://github.com/allwinner-zh/documents/raw/master/A83T/A83T_User_Manual_v1.5.1_20150513.pdf
-
-    * Quad ARM Cortex-A53 based SoCs
-      - Allwinner A64
-        + Datasheet
-          http://dl.linux-sunxi.org/A64/A64_Datasheet_V1.1.pdf
-        + User Manual
-          http://dl.linux-sunxi.org/A64/Allwinner%20A64%20User%20Manual%20v1.0.pdf
diff --git a/Documentation/arm/sunxi/clocks.rst b/Documentation/arm/sunxi/clocks.rst
new file mode 100644
index 000000000000..23bd03f3e21f
--- /dev/null
+++ b/Documentation/arm/sunxi/clocks.rst
@@ -0,0 +1,57 @@
+=======================================================
+Frequently asked questions about the sunxi clock system
+=======================================================
+
+This document contains useful bits of information that people tend to ask
+about the sunxi clock system, as well as accompanying ASCII art when adequate.
+
+Q: Why is the main 24MHz oscillator gatable? Wouldn't that break the
+   system?
+
+A: The 24MHz oscillator allows gating to save power. Indeed, if gated
+   carelessly the system would stop functioning, but with the right
+   steps, one can gate it and keep the system running. Consider this
+   simplified suspend example:
+
+   While the system is operational, you would see something like::
+
+      24MHz         32kHz
+       |
+      PLL1
+       \
+        \_ CPU Mux
+             |
+           [CPU]
+
+   When you are about to suspend, you switch the CPU Mux to the 32kHz
+   oscillator::
+
+      24Mhz         32kHz
+       |              |
+      PLL1            |
+                     /
+           CPU Mux _/
+             |
+           [CPU]
+
+    Finally you can gate the main oscillator::
+
+                    32kHz
+                      |
+                      |
+                     /
+           CPU Mux _/
+             |
+           [CPU]
+
+Q: Were can I learn more about the sunxi clocks?
+
+A: The linux-sunxi wiki contains a page documenting the clock registers,
+   you can find it at
+
+        http://linux-sunxi.org/A10/CCM
+
+   The authoritative source for information at this time is the ccmu driver
+   released by Allwinner, you can find it at
+
+        https://github.com/linux-sunxi/linux-sunxi/tree/sunxi-3.0/arch/arm/mach-sun4i/clock/ccmu
diff --git a/Documentation/arm/sunxi/clocks.txt b/Documentation/arm/sunxi/clocks.txt
deleted file mode 100644
index e09a88aa3136..000000000000
--- a/Documentation/arm/sunxi/clocks.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-Frequently asked questions about the sunxi clock system
-=======================================================
-
-This document contains useful bits of information that people tend to ask
-about the sunxi clock system, as well as accompanying ASCII art when adequate.
-
-Q: Why is the main 24MHz oscillator gatable? Wouldn't that break the
-   system?
-
-A: The 24MHz oscillator allows gating to save power. Indeed, if gated
-   carelessly the system would stop functioning, but with the right
-   steps, one can gate it and keep the system running. Consider this
-   simplified suspend example:
-
-   While the system is operational, you would see something like
-
-      24MHz         32kHz
-       |
-      PLL1
-       \
-        \_ CPU Mux
-             |
-           [CPU]
-
-   When you are about to suspend, you switch the CPU Mux to the 32kHz
-   oscillator:
-
-      24Mhz         32kHz
-       |              |
-      PLL1            |
-                     /
-           CPU Mux _/
-             |
-           [CPU]
-
-    Finally you can gate the main oscillator
-
-                    32kHz
-                      |
-                      |
-                     /
-           CPU Mux _/
-             |
-           [CPU]
-
-Q: Were can I learn more about the sunxi clocks?
-
-A: The linux-sunxi wiki contains a page documenting the clock registers,
-   you can find it at
-
-        http://linux-sunxi.org/A10/CCM
-
-   The authoritative source for information at this time is the ccmu driver
-   released by Allwinner, you can find it at
-
-        https://github.com/linux-sunxi/linux-sunxi/tree/sunxi-3.0/arch/arm/mach-sun4i/clock/ccmu
diff --git a/Documentation/arm/swp_emulation b/Documentation/arm/swp_emulation
deleted file mode 100644
index af903d22fd93..000000000000
--- a/Documentation/arm/swp_emulation
+++ /dev/null
@@ -1,27 +0,0 @@
-Software emulation of deprecated SWP instruction (CONFIG_SWP_EMULATE)
----------------------------------------------------------------------
-
-ARMv6 architecture deprecates use of the SWP/SWPB instructions, and recommeds
-moving to the load-locked/store-conditional instructions LDREX and STREX.
-
-ARMv7 multiprocessing extensions introduce the ability to disable these
-instructions, triggering an undefined instruction exception when executed.
-Trapped instructions are emulated using an LDREX/STREX or LDREXB/STREXB
-sequence. If a memory access fault (an abort) occurs, a segmentation fault is
-signalled to the triggering process.
-
-/proc/cpu/swp_emulation holds some statistics/information, including the PID of
-the last process to trigger the emulation to be invocated. For example:
----
-Emulated SWP:		12
-Emulated SWPB:		0
-Aborted SWP{B}:		1
-Last process:		314
----
-
-NOTE: when accessing uncached shared regions, LDREX/STREX rely on an external
-transaction monitoring block called a global monitor to maintain update
-atomicity. If your system does not implement a global monitor, this option can
-cause programs that perform SWP operations to uncached memory to deadlock, as
-the STREX operation will always fail.
-
diff --git a/Documentation/arm/swp_emulation.rst b/Documentation/arm/swp_emulation.rst
new file mode 100644
index 000000000000..6a608a9c3715
--- /dev/null
+++ b/Documentation/arm/swp_emulation.rst
@@ -0,0 +1,27 @@
+Software emulation of deprecated SWP instruction (CONFIG_SWP_EMULATE)
+---------------------------------------------------------------------
+
+ARMv6 architecture deprecates use of the SWP/SWPB instructions, and recommeds
+moving to the load-locked/store-conditional instructions LDREX and STREX.
+
+ARMv7 multiprocessing extensions introduce the ability to disable these
+instructions, triggering an undefined instruction exception when executed.
+Trapped instructions are emulated using an LDREX/STREX or LDREXB/STREXB
+sequence. If a memory access fault (an abort) occurs, a segmentation fault is
+signalled to the triggering process.
+
+/proc/cpu/swp_emulation holds some statistics/information, including the PID of
+the last process to trigger the emulation to be invocated. For example::
+
+  Emulated SWP:		12
+  Emulated SWPB:		0
+  Aborted SWP{B}:		1
+  Last process:		314
+
+
+NOTE:
+  when accessing uncached shared regions, LDREX/STREX rely on an external
+  transaction monitoring block called a global monitor to maintain update
+  atomicity. If your system does not implement a global monitor, this option can
+  cause programs that perform SWP operations to uncached memory to deadlock, as
+  the STREX operation will always fail.
diff --git a/Documentation/arm/tcm.rst b/Documentation/arm/tcm.rst
new file mode 100644
index 000000000000..effd9c7bc968
--- /dev/null
+++ b/Documentation/arm/tcm.rst
@@ -0,0 +1,161 @@
+==================================================
+ARM TCM (Tightly-Coupled Memory) handling in Linux
+==================================================
+
+Written by Linus Walleij <linus.walleij@stericsson.com>
+
+Some ARM SoC:s have a so-called TCM (Tightly-Coupled Memory).
+This is usually just a few (4-64) KiB of RAM inside the ARM
+processor.
+
+Due to being embedded inside the CPU The TCM has a
+Harvard-architecture, so there is an ITCM (instruction TCM)
+and a DTCM (data TCM). The DTCM can not contain any
+instructions, but the ITCM can actually contain data.
+The size of DTCM or ITCM is minimum 4KiB so the typical
+minimum configuration is 4KiB ITCM and 4KiB DTCM.
+
+ARM CPU:s have special registers to read out status, physical
+location and size of TCM memories. arch/arm/include/asm/cputype.h
+defines a CPUID_TCM register that you can read out from the
+system control coprocessor. Documentation from ARM can be found
+at http://infocenter.arm.com, search for "TCM Status Register"
+to see documents for all CPUs. Reading this register you can
+determine if ITCM (bits 1-0) and/or DTCM (bit 17-16) is present
+in the machine.
+
+There is further a TCM region register (search for "TCM Region
+Registers" at the ARM site) that can report and modify the location
+size of TCM memories at runtime. This is used to read out and modify
+TCM location and size. Notice that this is not a MMU table: you
+actually move the physical location of the TCM around. At the
+place you put it, it will mask any underlying RAM from the
+CPU so it is usually wise not to overlap any physical RAM with
+the TCM.
+
+The TCM memory can then be remapped to another address again using
+the MMU, but notice that the TCM if often used in situations where
+the MMU is turned off. To avoid confusion the current Linux
+implementation will map the TCM 1 to 1 from physical to virtual
+memory in the location specified by the kernel. Currently Linux
+will map ITCM to 0xfffe0000 and on, and DTCM to 0xfffe8000 and
+on, supporting a maximum of 32KiB of ITCM and 32KiB of DTCM.
+
+Newer versions of the region registers also support dividing these
+TCMs in two separate banks, so for example an 8KiB ITCM is divided
+into two 4KiB banks with its own control registers. The idea is to
+be able to lock and hide one of the banks for use by the secure
+world (TrustZone).
+
+TCM is used for a few things:
+
+- FIQ and other interrupt handlers that need deterministic
+  timing and cannot wait for cache misses.
+
+- Idle loops where all external RAM is set to self-refresh
+  retention mode, so only on-chip RAM is accessible by
+  the CPU and then we hang inside ITCM waiting for an
+  interrupt.
+
+- Other operations which implies shutting off or reconfiguring
+  the external RAM controller.
+
+There is an interface for using TCM on the ARM architecture
+in <asm/tcm.h>. Using this interface it is possible to:
+
+- Define the physical address and size of ITCM and DTCM.
+
+- Tag functions to be compiled into ITCM.
+
+- Tag data and constants to be allocated to DTCM and ITCM.
+
+- Have the remaining TCM RAM added to a special
+  allocation pool with gen_pool_create() and gen_pool_add()
+  and provice tcm_alloc() and tcm_free() for this
+  memory. Such a heap is great for things like saving
+  device state when shutting off device power domains.
+
+A machine that has TCM memory shall select HAVE_TCM from
+arch/arm/Kconfig for itself. Code that needs to use TCM shall
+#include <asm/tcm.h>
+
+Functions to go into itcm can be tagged like this:
+int __tcmfunc foo(int bar);
+
+Since these are marked to become long_calls and you may want
+to have functions called locally inside the TCM without
+wasting space, there is also the __tcmlocalfunc prefix that
+will make the call relative.
+
+Variables to go into dtcm can be tagged like this::
+
+  int __tcmdata foo;
+
+Constants can be tagged like this::
+
+  int __tcmconst foo;
+
+To put assembler into TCM just use::
+
+  .section ".tcm.text" or .section ".tcm.data"
+
+respectively.
+
+Example code::
+
+  #include <asm/tcm.h>
+
+  /* Uninitialized data */
+  static u32 __tcmdata tcmvar;
+  /* Initialized data */
+  static u32 __tcmdata tcmassigned = 0x2BADBABEU;
+  /* Constant */
+  static const u32 __tcmconst tcmconst = 0xCAFEBABEU;
+
+  static void __tcmlocalfunc tcm_to_tcm(void)
+  {
+	int i;
+	for (i = 0; i < 100; i++)
+		tcmvar ++;
+  }
+
+  static void __tcmfunc hello_tcm(void)
+  {
+	/* Some abstract code that runs in ITCM */
+	int i;
+	for (i = 0; i < 100; i++) {
+		tcmvar ++;
+	}
+	tcm_to_tcm();
+  }
+
+  static void __init test_tcm(void)
+  {
+	u32 *tcmem;
+	int i;
+
+	hello_tcm();
+	printk("Hello TCM executed from ITCM RAM\n");
+
+	printk("TCM variable from testrun: %u @ %p\n", tcmvar, &tcmvar);
+	tcmvar = 0xDEADBEEFU;
+	printk("TCM variable: 0x%x @ %p\n", tcmvar, &tcmvar);
+
+	printk("TCM assigned variable: 0x%x @ %p\n", tcmassigned, &tcmassigned);
+
+	printk("TCM constant: 0x%x @ %p\n", tcmconst, &tcmconst);
+
+	/* Allocate some TCM memory from the pool */
+	tcmem = tcm_alloc(20);
+	if (tcmem) {
+		printk("TCM Allocated 20 bytes of TCM @ %p\n", tcmem);
+		tcmem[0] = 0xDEADBEEFU;
+		tcmem[1] = 0x2BADBABEU;
+		tcmem[2] = 0xCAFEBABEU;
+		tcmem[3] = 0xDEADBEEFU;
+		tcmem[4] = 0x2BADBABEU;
+		for (i = 0; i < 5; i++)
+			printk("TCM tcmem[%d] = %08x\n", i, tcmem[i]);
+		tcm_free(tcmem, 20);
+	}
+  }
diff --git a/Documentation/arm/tcm.txt b/Documentation/arm/tcm.txt
deleted file mode 100644
index 7c15871c1885..000000000000
--- a/Documentation/arm/tcm.txt
+++ /dev/null
@@ -1,155 +0,0 @@
-ARM TCM (Tightly-Coupled Memory) handling in Linux
-----
-Written by Linus Walleij <linus.walleij@stericsson.com>
-
-Some ARM SoC:s have a so-called TCM (Tightly-Coupled Memory).
-This is usually just a few (4-64) KiB of RAM inside the ARM
-processor.
-
-Due to being embedded inside the CPU The TCM has a
-Harvard-architecture, so there is an ITCM (instruction TCM)
-and a DTCM (data TCM). The DTCM can not contain any
-instructions, but the ITCM can actually contain data.
-The size of DTCM or ITCM is minimum 4KiB so the typical
-minimum configuration is 4KiB ITCM and 4KiB DTCM.
-
-ARM CPU:s have special registers to read out status, physical
-location and size of TCM memories. arch/arm/include/asm/cputype.h
-defines a CPUID_TCM register that you can read out from the
-system control coprocessor. Documentation from ARM can be found
-at http://infocenter.arm.com, search for "TCM Status Register"
-to see documents for all CPUs. Reading this register you can
-determine if ITCM (bits 1-0) and/or DTCM (bit 17-16) is present
-in the machine.
-
-There is further a TCM region register (search for "TCM Region
-Registers" at the ARM site) that can report and modify the location
-size of TCM memories at runtime. This is used to read out and modify
-TCM location and size. Notice that this is not a MMU table: you
-actually move the physical location of the TCM around. At the
-place you put it, it will mask any underlying RAM from the
-CPU so it is usually wise not to overlap any physical RAM with
-the TCM.
-
-The TCM memory can then be remapped to another address again using
-the MMU, but notice that the TCM if often used in situations where
-the MMU is turned off. To avoid confusion the current Linux
-implementation will map the TCM 1 to 1 from physical to virtual
-memory in the location specified by the kernel. Currently Linux
-will map ITCM to 0xfffe0000 and on, and DTCM to 0xfffe8000 and
-on, supporting a maximum of 32KiB of ITCM and 32KiB of DTCM.
-
-Newer versions of the region registers also support dividing these
-TCMs in two separate banks, so for example an 8KiB ITCM is divided
-into two 4KiB banks with its own control registers. The idea is to
-be able to lock and hide one of the banks for use by the secure
-world (TrustZone).
-
-TCM is used for a few things:
-
-- FIQ and other interrupt handlers that need deterministic
-  timing and cannot wait for cache misses.
-
-- Idle loops where all external RAM is set to self-refresh
-  retention mode, so only on-chip RAM is accessible by
-  the CPU and then we hang inside ITCM waiting for an
-  interrupt.
-
-- Other operations which implies shutting off or reconfiguring
-  the external RAM controller.
-
-There is an interface for using TCM on the ARM architecture
-in <asm/tcm.h>. Using this interface it is possible to:
-
-- Define the physical address and size of ITCM and DTCM.
-
-- Tag functions to be compiled into ITCM.
-
-- Tag data and constants to be allocated to DTCM and ITCM.
-
-- Have the remaining TCM RAM added to a special
-  allocation pool with gen_pool_create() and gen_pool_add()
-  and provice tcm_alloc() and tcm_free() for this
-  memory. Such a heap is great for things like saving
-  device state when shutting off device power domains.
-
-A machine that has TCM memory shall select HAVE_TCM from
-arch/arm/Kconfig for itself. Code that needs to use TCM shall
-#include <asm/tcm.h>
-
-Functions to go into itcm can be tagged like this:
-int __tcmfunc foo(int bar);
-
-Since these are marked to become long_calls and you may want
-to have functions called locally inside the TCM without
-wasting space, there is also the __tcmlocalfunc prefix that
-will make the call relative.
-
-Variables to go into dtcm can be tagged like this:
-int __tcmdata foo;
-
-Constants can be tagged like this:
-int __tcmconst foo;
-
-To put assembler into TCM just use
-.section ".tcm.text" or .section ".tcm.data"
-respectively.
-
-Example code:
-
-#include <asm/tcm.h>
-
-/* Uninitialized data */
-static u32 __tcmdata tcmvar;
-/* Initialized data */
-static u32 __tcmdata tcmassigned = 0x2BADBABEU;
-/* Constant */
-static const u32 __tcmconst tcmconst = 0xCAFEBABEU;
-
-static void __tcmlocalfunc tcm_to_tcm(void)
-{
-	int i;
-	for (i = 0; i < 100; i++)
-		tcmvar ++;
-}
-
-static void __tcmfunc hello_tcm(void)
-{
-	/* Some abstract code that runs in ITCM */
-	int i;
-	for (i = 0; i < 100; i++) {
-		tcmvar ++;
-	}
-	tcm_to_tcm();
-}
-
-static void __init test_tcm(void)
-{
-	u32 *tcmem;
-	int i;
-
-	hello_tcm();
-	printk("Hello TCM executed from ITCM RAM\n");
-
-	printk("TCM variable from testrun: %u @ %p\n", tcmvar, &tcmvar);
-	tcmvar = 0xDEADBEEFU;
-	printk("TCM variable: 0x%x @ %p\n", tcmvar, &tcmvar);
-
-	printk("TCM assigned variable: 0x%x @ %p\n", tcmassigned, &tcmassigned);
-
-	printk("TCM constant: 0x%x @ %p\n", tcmconst, &tcmconst);
-
-	/* Allocate some TCM memory from the pool */
-	tcmem = tcm_alloc(20);
-	if (tcmem) {
-		printk("TCM Allocated 20 bytes of TCM @ %p\n", tcmem);
-		tcmem[0] = 0xDEADBEEFU;
-		tcmem[1] = 0x2BADBABEU;
-		tcmem[2] = 0xCAFEBABEU;
-		tcmem[3] = 0xDEADBEEFU;
-		tcmem[4] = 0x2BADBABEU;
-		for (i = 0; i < 5; i++)
-			printk("TCM tcmem[%d] = %08x\n", i, tcmem[i]);
-		tcm_free(tcmem, 20);
-	}
-}
diff --git a/Documentation/arm/uefi.rst b/Documentation/arm/uefi.rst
new file mode 100644
index 000000000000..f868330df6be
--- /dev/null
+++ b/Documentation/arm/uefi.rst
@@ -0,0 +1,67 @@
+================================================
+The Unified Extensible Firmware Interface (UEFI)
+================================================
+
+UEFI, the Unified Extensible Firmware Interface, is a specification
+governing the behaviours of compatible firmware interfaces. It is
+maintained by the UEFI Forum - http://www.uefi.org/.
+
+UEFI is an evolution of its predecessor 'EFI', so the terms EFI and
+UEFI are used somewhat interchangeably in this document and associated
+source code. As a rule, anything new uses 'UEFI', whereas 'EFI' refers
+to legacy code or specifications.
+
+UEFI support in Linux
+=====================
+Booting on a platform with firmware compliant with the UEFI specification
+makes it possible for the kernel to support additional features:
+
+- UEFI Runtime Services
+- Retrieving various configuration information through the standardised
+  interface of UEFI configuration tables. (ACPI, SMBIOS, ...)
+
+For actually enabling [U]EFI support, enable:
+
+- CONFIG_EFI=y
+- CONFIG_EFI_VARS=y or m
+
+The implementation depends on receiving information about the UEFI environment
+in a Flattened Device Tree (FDT) - so is only available with CONFIG_OF.
+
+UEFI stub
+=========
+The "stub" is a feature that extends the Image/zImage into a valid UEFI
+PE/COFF executable, including a loader application that makes it possible to
+load the kernel directly from the UEFI shell, boot menu, or one of the
+lightweight bootloaders like Gummiboot or rEFInd.
+
+The kernel image built with stub support remains a valid kernel image for
+booting in non-UEFI environments.
+
+UEFI kernel support on ARM
+==========================
+UEFI kernel support on the ARM architectures (arm and arm64) is only available
+when boot is performed through the stub.
+
+When booting in UEFI mode, the stub deletes any memory nodes from a provided DT.
+Instead, the kernel reads the UEFI memory map.
+
+The stub populates the FDT /chosen node with (and the kernel scans for) the
+following parameters:
+
+==========================  ======   ===========================================
+Name                        Size     Description
+==========================  ======   ===========================================
+linux,uefi-system-table     64-bit   Physical address of the UEFI System Table.
+
+linux,uefi-mmap-start       64-bit   Physical address of the UEFI memory map,
+                                     populated by the UEFI GetMemoryMap() call.
+
+linux,uefi-mmap-size        32-bit   Size in bytes of the UEFI memory map
+                                     pointed to in previous entry.
+
+linux,uefi-mmap-desc-size   32-bit   Size in bytes of each entry in the UEFI
+                                     memory map.
+
+linux,uefi-mmap-desc-ver    32-bit   Version of the mmap descriptor format.
+==========================  ======   ===========================================
diff --git a/Documentation/arm/uefi.txt b/Documentation/arm/uefi.txt
deleted file mode 100644
index 6543a0adea8a..000000000000
--- a/Documentation/arm/uefi.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-UEFI, the Unified Extensible Firmware Interface, is a specification
-governing the behaviours of compatible firmware interfaces. It is
-maintained by the UEFI Forum - http://www.uefi.org/.
-
-UEFI is an evolution of its predecessor 'EFI', so the terms EFI and
-UEFI are used somewhat interchangeably in this document and associated
-source code. As a rule, anything new uses 'UEFI', whereas 'EFI' refers
-to legacy code or specifications.
-
-UEFI support in Linux
-=====================
-Booting on a platform with firmware compliant with the UEFI specification
-makes it possible for the kernel to support additional features:
-- UEFI Runtime Services
-- Retrieving various configuration information through the standardised
-  interface of UEFI configuration tables. (ACPI, SMBIOS, ...)
-
-For actually enabling [U]EFI support, enable:
-- CONFIG_EFI=y
-- CONFIG_EFI_VARS=y or m
-
-The implementation depends on receiving information about the UEFI environment
-in a Flattened Device Tree (FDT) - so is only available with CONFIG_OF.
-
-UEFI stub
-=========
-The "stub" is a feature that extends the Image/zImage into a valid UEFI
-PE/COFF executable, including a loader application that makes it possible to
-load the kernel directly from the UEFI shell, boot menu, or one of the
-lightweight bootloaders like Gummiboot or rEFInd.
-
-The kernel image built with stub support remains a valid kernel image for
-booting in non-UEFI environments.
-
-UEFI kernel support on ARM
-==========================
-UEFI kernel support on the ARM architectures (arm and arm64) is only available
-when boot is performed through the stub.
-
-When booting in UEFI mode, the stub deletes any memory nodes from a provided DT.
-Instead, the kernel reads the UEFI memory map.
-
-The stub populates the FDT /chosen node with (and the kernel scans for) the
-following parameters:
-________________________________________________________________________________
-Name                      | Size   | Description
-================================================================================
-linux,uefi-system-table   | 64-bit | Physical address of the UEFI System Table.
---------------------------------------------------------------------------------
-linux,uefi-mmap-start     | 64-bit | Physical address of the UEFI memory map,
-                          |        | populated by the UEFI GetMemoryMap() call.
---------------------------------------------------------------------------------
-linux,uefi-mmap-size      | 32-bit | Size in bytes of the UEFI memory map
-                          |        | pointed to in previous entry.
---------------------------------------------------------------------------------
-linux,uefi-mmap-desc-size | 32-bit | Size in bytes of each entry in the UEFI
-                          |        | memory map.
---------------------------------------------------------------------------------
-linux,uefi-mmap-desc-ver  | 32-bit | Version of the mmap descriptor format.
---------------------------------------------------------------------------------
diff --git a/Documentation/arm/vfp/release-notes.rst b/Documentation/arm/vfp/release-notes.rst
new file mode 100644
index 000000000000..c6b04937cee3
--- /dev/null
+++ b/Documentation/arm/vfp/release-notes.rst
@@ -0,0 +1,57 @@
+===============================================
+Release notes for Linux Kernel VFP support code
+===============================================
+
+Date: 	20 May 2004
+
+Author:	Russell King
+
+This is the first release of the Linux Kernel VFP support code.  It
+provides support for the exceptions bounced from VFP hardware found
+on ARM926EJ-S.
+
+This release has been validated against the SoftFloat-2b library by
+John R. Hauser using the TestFloat-2a test suite.  Details of this
+library and test suite can be found at:
+
+   http://www.jhauser.us/arithmetic/SoftFloat.html
+
+The operations which have been tested with this package are:
+
+ - fdiv
+ - fsub
+ - fadd
+ - fmul
+ - fcmp
+ - fcmpe
+ - fcvtd
+ - fcvts
+ - fsito
+ - ftosi
+ - fsqrt
+
+All the above pass softfloat tests with the following exceptions:
+
+- fadd/fsub shows some differences in the handling of +0 / -0 results
+  when input operands differ in signs.
+- the handling of underflow exceptions is slightly different.  If a
+  result underflows before rounding, but becomes a normalised number
+  after rounding, we do not signal an underflow exception.
+
+Other operations which have been tested by basic assembly-only tests
+are:
+
+ - fcpy
+ - fabs
+ - fneg
+ - ftoui
+ - ftosiz
+ - ftouiz
+
+The combination operations have not been tested:
+
+ - fmac
+ - fnmac
+ - fmsc
+ - fnmsc
+ - fnmul
diff --git a/Documentation/arm/vlocks.rst b/Documentation/arm/vlocks.rst
new file mode 100644
index 000000000000..a40a1742110b
--- /dev/null
+++ b/Documentation/arm/vlocks.rst
@@ -0,0 +1,212 @@
+======================================
+vlocks for Bare-Metal Mutual Exclusion
+======================================
+
+Voting Locks, or "vlocks" provide a simple low-level mutual exclusion
+mechanism, with reasonable but minimal requirements on the memory
+system.
+
+These are intended to be used to coordinate critical activity among CPUs
+which are otherwise non-coherent, in situations where the hardware
+provides no other mechanism to support this and ordinary spinlocks
+cannot be used.
+
+
+vlocks make use of the atomicity provided by the memory system for
+writes to a single memory location.  To arbitrate, every CPU "votes for
+itself", by storing a unique number to a common memory location.  The
+final value seen in that memory location when all the votes have been
+cast identifies the winner.
+
+In order to make sure that the election produces an unambiguous result
+in finite time, a CPU will only enter the election in the first place if
+no winner has been chosen and the election does not appear to have
+started yet.
+
+
+Algorithm
+---------
+
+The easiest way to explain the vlocks algorithm is with some pseudo-code::
+
+
+	int currently_voting[NR_CPUS] = { 0, };
+	int last_vote = -1; /* no votes yet */
+
+	bool vlock_trylock(int this_cpu)
+	{
+		/* signal our desire to vote */
+		currently_voting[this_cpu] = 1;
+		if (last_vote != -1) {
+			/* someone already volunteered himself */
+			currently_voting[this_cpu] = 0;
+			return false; /* not ourself */
+		}
+
+		/* let's suggest ourself */
+		last_vote = this_cpu;
+		currently_voting[this_cpu] = 0;
+
+		/* then wait until everyone else is done voting */
+		for_each_cpu(i) {
+			while (currently_voting[i] != 0)
+				/* wait */;
+		}
+
+		/* result */
+		if (last_vote == this_cpu)
+			return true; /* we won */
+		return false;
+	}
+
+	bool vlock_unlock(void)
+	{
+		last_vote = -1;
+	}
+
+
+The currently_voting[] array provides a way for the CPUs to determine
+whether an election is in progress, and plays a role analogous to the
+"entering" array in Lamport's bakery algorithm [1].
+
+However, once the election has started, the underlying memory system
+atomicity is used to pick the winner.  This avoids the need for a static
+priority rule to act as a tie-breaker, or any counters which could
+overflow.
+
+As long as the last_vote variable is globally visible to all CPUs, it
+will contain only one value that won't change once every CPU has cleared
+its currently_voting flag.
+
+
+Features and limitations
+------------------------
+
+ * vlocks are not intended to be fair.  In the contended case, it is the
+   _last_ CPU which attempts to get the lock which will be most likely
+   to win.
+
+   vlocks are therefore best suited to situations where it is necessary
+   to pick a unique winner, but it does not matter which CPU actually
+   wins.
+
+ * Like other similar mechanisms, vlocks will not scale well to a large
+   number of CPUs.
+
+   vlocks can be cascaded in a voting hierarchy to permit better scaling
+   if necessary, as in the following hypothetical example for 4096 CPUs::
+
+	/* first level: local election */
+	my_town = towns[(this_cpu >> 4) & 0xf];
+	I_won = vlock_trylock(my_town, this_cpu & 0xf);
+	if (I_won) {
+		/* we won the town election, let's go for the state */
+		my_state = states[(this_cpu >> 8) & 0xf];
+		I_won = vlock_lock(my_state, this_cpu & 0xf));
+		if (I_won) {
+			/* and so on */
+			I_won = vlock_lock(the_whole_country, this_cpu & 0xf];
+			if (I_won) {
+				/* ... */
+			}
+			vlock_unlock(the_whole_country);
+		}
+		vlock_unlock(my_state);
+	}
+	vlock_unlock(my_town);
+
+
+ARM implementation
+------------------
+
+The current ARM implementation [2] contains some optimisations beyond
+the basic algorithm:
+
+ * By packing the members of the currently_voting array close together,
+   we can read the whole array in one transaction (providing the number
+   of CPUs potentially contending the lock is small enough).  This
+   reduces the number of round-trips required to external memory.
+
+   In the ARM implementation, this means that we can use a single load
+   and comparison::
+
+	LDR	Rt, [Rn]
+	CMP	Rt, #0
+
+   ...in place of code equivalent to::
+
+	LDRB	Rt, [Rn]
+	CMP	Rt, #0
+	LDRBEQ	Rt, [Rn, #1]
+	CMPEQ	Rt, #0
+	LDRBEQ	Rt, [Rn, #2]
+	CMPEQ	Rt, #0
+	LDRBEQ	Rt, [Rn, #3]
+	CMPEQ	Rt, #0
+
+   This cuts down on the fast-path latency, as well as potentially
+   reducing bus contention in contended cases.
+
+   The optimisation relies on the fact that the ARM memory system
+   guarantees coherency between overlapping memory accesses of
+   different sizes, similarly to many other architectures.  Note that
+   we do not care which element of currently_voting appears in which
+   bits of Rt, so there is no need to worry about endianness in this
+   optimisation.
+
+   If there are too many CPUs to read the currently_voting array in
+   one transaction then multiple transations are still required.  The
+   implementation uses a simple loop of word-sized loads for this
+   case.  The number of transactions is still fewer than would be
+   required if bytes were loaded individually.
+
+
+   In principle, we could aggregate further by using LDRD or LDM, but
+   to keep the code simple this was not attempted in the initial
+   implementation.
+
+
+ * vlocks are currently only used to coordinate between CPUs which are
+   unable to enable their caches yet.  This means that the
+   implementation removes many of the barriers which would be required
+   when executing the algorithm in cached memory.
+
+   packing of the currently_voting array does not work with cached
+   memory unless all CPUs contending the lock are cache-coherent, due
+   to cache writebacks from one CPU clobbering values written by other
+   CPUs.  (Though if all the CPUs are cache-coherent, you should be
+   probably be using proper spinlocks instead anyway).
+
+
+ * The "no votes yet" value used for the last_vote variable is 0 (not
+   -1 as in the pseudocode).  This allows statically-allocated vlocks
+   to be implicitly initialised to an unlocked state simply by putting
+   them in .bss.
+
+   An offset is added to each CPU's ID for the purpose of setting this
+   variable, so that no CPU uses the value 0 for its ID.
+
+
+Colophon
+--------
+
+Originally created and documented by Dave Martin for Linaro Limited, for
+use in ARM-based big.LITTLE platforms, with review and input gratefully
+received from Nicolas Pitre and Achin Gupta.  Thanks to Nicolas for
+grabbing most of this text out of the relevant mail thread and writing
+up the pseudocode.
+
+Copyright (C) 2012-2013  Linaro Limited
+Distributed under the terms of Version 2 of the GNU General Public
+License, as defined in linux/COPYING.
+
+
+References
+----------
+
+[1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming
+    Problem", Communications of the ACM 17, 8 (August 1974), 453-455.
+
+    https://en.wikipedia.org/wiki/Lamport%27s_bakery_algorithm
+
+[2] linux/arch/arm/common/vlock.S, www.kernel.org.
diff --git a/Documentation/arm/vlocks.txt b/Documentation/arm/vlocks.txt
deleted file mode 100644
index 45731672c564..000000000000
--- a/Documentation/arm/vlocks.txt
+++ /dev/null
@@ -1,211 +0,0 @@
-vlocks for Bare-Metal Mutual Exclusion
-======================================
-
-Voting Locks, or "vlocks" provide a simple low-level mutual exclusion
-mechanism, with reasonable but minimal requirements on the memory
-system.
-
-These are intended to be used to coordinate critical activity among CPUs
-which are otherwise non-coherent, in situations where the hardware
-provides no other mechanism to support this and ordinary spinlocks
-cannot be used.
-
-
-vlocks make use of the atomicity provided by the memory system for
-writes to a single memory location.  To arbitrate, every CPU "votes for
-itself", by storing a unique number to a common memory location.  The
-final value seen in that memory location when all the votes have been
-cast identifies the winner.
-
-In order to make sure that the election produces an unambiguous result
-in finite time, a CPU will only enter the election in the first place if
-no winner has been chosen and the election does not appear to have
-started yet.
-
-
-Algorithm
----------
-
-The easiest way to explain the vlocks algorithm is with some pseudo-code:
-
-
-	int currently_voting[NR_CPUS] = { 0, };
-	int last_vote = -1; /* no votes yet */
-
-	bool vlock_trylock(int this_cpu)
-	{
-		/* signal our desire to vote */
-		currently_voting[this_cpu] = 1;
-		if (last_vote != -1) {
-			/* someone already volunteered himself */
-			currently_voting[this_cpu] = 0;
-			return false; /* not ourself */
-		}
-
-		/* let's suggest ourself */
-		last_vote = this_cpu;
-		currently_voting[this_cpu] = 0;
-
-		/* then wait until everyone else is done voting */
-		for_each_cpu(i) {
-			while (currently_voting[i] != 0)
-				/* wait */;
-		}
-
-		/* result */
-		if (last_vote == this_cpu)
-			return true; /* we won */
-		return false;
-	}
-
-	bool vlock_unlock(void)
-	{
-		last_vote = -1;
-	}
-
-
-The currently_voting[] array provides a way for the CPUs to determine
-whether an election is in progress, and plays a role analogous to the
-"entering" array in Lamport's bakery algorithm [1].
-
-However, once the election has started, the underlying memory system
-atomicity is used to pick the winner.  This avoids the need for a static
-priority rule to act as a tie-breaker, or any counters which could
-overflow.
-
-As long as the last_vote variable is globally visible to all CPUs, it
-will contain only one value that won't change once every CPU has cleared
-its currently_voting flag.
-
-
-Features and limitations
-------------------------
-
- * vlocks are not intended to be fair.  In the contended case, it is the
-   _last_ CPU which attempts to get the lock which will be most likely
-   to win.
-
-   vlocks are therefore best suited to situations where it is necessary
-   to pick a unique winner, but it does not matter which CPU actually
-   wins.
-
- * Like other similar mechanisms, vlocks will not scale well to a large
-   number of CPUs.
-
-   vlocks can be cascaded in a voting hierarchy to permit better scaling
-   if necessary, as in the following hypothetical example for 4096 CPUs:
-
-	/* first level: local election */
-	my_town = towns[(this_cpu >> 4) & 0xf];
-	I_won = vlock_trylock(my_town, this_cpu & 0xf);
-	if (I_won) {
-		/* we won the town election, let's go for the state */
-		my_state = states[(this_cpu >> 8) & 0xf];
-		I_won = vlock_lock(my_state, this_cpu & 0xf));
-		if (I_won) {
-			/* and so on */
-			I_won = vlock_lock(the_whole_country, this_cpu & 0xf];
-			if (I_won) {
-				/* ... */
-			}
-			vlock_unlock(the_whole_country);
-		}
-		vlock_unlock(my_state);
-	}
-	vlock_unlock(my_town);
-
-
-ARM implementation
-------------------
-
-The current ARM implementation [2] contains some optimisations beyond
-the basic algorithm:
-
- * By packing the members of the currently_voting array close together,
-   we can read the whole array in one transaction (providing the number
-   of CPUs potentially contending the lock is small enough).  This
-   reduces the number of round-trips required to external memory.
-
-   In the ARM implementation, this means that we can use a single load
-   and comparison:
-
-	LDR	Rt, [Rn]
-	CMP	Rt, #0
-
-   ...in place of code equivalent to:
-
-	LDRB	Rt, [Rn]
-	CMP	Rt, #0
-	LDRBEQ	Rt, [Rn, #1]
-	CMPEQ	Rt, #0
-	LDRBEQ	Rt, [Rn, #2]
-	CMPEQ	Rt, #0
-	LDRBEQ	Rt, [Rn, #3]
-	CMPEQ	Rt, #0
-
-   This cuts down on the fast-path latency, as well as potentially
-   reducing bus contention in contended cases.
-
-   The optimisation relies on the fact that the ARM memory system
-   guarantees coherency between overlapping memory accesses of
-   different sizes, similarly to many other architectures.  Note that
-   we do not care which element of currently_voting appears in which
-   bits of Rt, so there is no need to worry about endianness in this
-   optimisation.
-
-   If there are too many CPUs to read the currently_voting array in
-   one transaction then multiple transations are still required.  The
-   implementation uses a simple loop of word-sized loads for this
-   case.  The number of transactions is still fewer than would be
-   required if bytes were loaded individually.
-
-
-   In principle, we could aggregate further by using LDRD or LDM, but
-   to keep the code simple this was not attempted in the initial
-   implementation.
-
-
- * vlocks are currently only used to coordinate between CPUs which are
-   unable to enable their caches yet.  This means that the
-   implementation removes many of the barriers which would be required
-   when executing the algorithm in cached memory.
-
-   packing of the currently_voting array does not work with cached
-   memory unless all CPUs contending the lock are cache-coherent, due
-   to cache writebacks from one CPU clobbering values written by other
-   CPUs.  (Though if all the CPUs are cache-coherent, you should be
-   probably be using proper spinlocks instead anyway).
-
-
- * The "no votes yet" value used for the last_vote variable is 0 (not
-   -1 as in the pseudocode).  This allows statically-allocated vlocks
-   to be implicitly initialised to an unlocked state simply by putting
-   them in .bss.
-
-   An offset is added to each CPU's ID for the purpose of setting this
-   variable, so that no CPU uses the value 0 for its ID.
-
-
-Colophon
---------
-
-Originally created and documented by Dave Martin for Linaro Limited, for
-use in ARM-based big.LITTLE platforms, with review and input gratefully
-received from Nicolas Pitre and Achin Gupta.  Thanks to Nicolas for
-grabbing most of this text out of the relevant mail thread and writing
-up the pseudocode.
-
-Copyright (C) 2012-2013  Linaro Limited
-Distributed under the terms of Version 2 of the GNU General Public
-License, as defined in linux/COPYING.
-
-
-References
-----------
-
-[1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming
-    Problem", Communications of the ACM 17, 8 (August 1974), 453-455.
-
-    https://en.wikipedia.org/wiki/Lamport%27s_bakery_algorithm
-
-[2] linux/arch/arm/common/vlock.S, www.kernel.org.
diff --git a/Documentation/devicetree/bindings/arm/xen.txt b/Documentation/devicetree/bindings/arm/xen.txt
index c9b9321434ea..db5c56db30ec 100644
--- a/Documentation/devicetree/bindings/arm/xen.txt
+++ b/Documentation/devicetree/bindings/arm/xen.txt
@@ -54,7 +54,7 @@ hypervisor {
 };
 
 The format and meaning of the "xen,uefi-*" parameters are similar to those in
-Documentation/arm/uefi.txt, which are provided by the regular UEFI stub. However
+Documentation/arm/uefi.rst, which are provided by the regular UEFI stub. However
 they differ because they are provided by the Xen hypervisor, together with a set
 of UEFI runtime services implemented via hypercalls, see
 http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,platform.h.html.
diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt
index 60f8640f2b2f..4660ccee35a3 100644
--- a/Documentation/devicetree/booting-without-of.txt
+++ b/Documentation/devicetree/booting-without-of.txt
@@ -160,7 +160,7 @@ it with special cases.
    of the kernel image. That entry point supports two calling
    conventions.  A summary of the interface is described here.  A full
    description of the boot requirements is documented in
-   Documentation/arm/Booting
+   Documentation/arm/booting.rst
 
         a) ATAGS interface.  Minimal information is passed from firmware
         to the kernel with a tagged list of predefined parameters.
@@ -174,7 +174,7 @@ it with special cases.
         b) Entry with a flattened device-tree block.  Firmware loads the
         physical address of the flattened device tree block (dtb) into r2,
         r1 is not used, but it is considered good practice to use a valid
-        machine number as described in Documentation/arm/Booting.
+        machine number as described in Documentation/arm/booting.rst.
 
                 r0 : 0
 
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 216dc0e1e6f2..c6934d90363c 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -1,3 +1,4 @@
+
 .. The Linux Kernel documentation master file, created by
    sphinx-quickstart on Fri Feb 12 13:51:46 2016.
    You can adapt this file completely to your liking, but it should at least
diff --git a/Documentation/translations/zh_CN/arm/Booting b/Documentation/translations/zh_CN/arm/Booting
index 1fe866f8218f..562e9a2957e6 100644
--- a/Documentation/translations/zh_CN/arm/Booting
+++ b/Documentation/translations/zh_CN/arm/Booting
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm/Booting
+Chinese translated version of Documentation/arm/booting.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -9,7 +9,7 @@ or if there is a problem with the translation.
 Maintainer: Russell King <linux@arm.linux.org.uk>
 Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
 ---------------------------------------------------------------------
-Documentation/arm/Booting 的中文翻译
+Documentation/arm/booting.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/arm/kernel_user_helpers.txt b/Documentation/translations/zh_CN/arm/kernel_user_helpers.txt
index cd7fc8f34cf9..99af4363984d 100644
--- a/Documentation/translations/zh_CN/arm/kernel_user_helpers.txt
+++ b/Documentation/translations/zh_CN/arm/kernel_user_helpers.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm/kernel_user_helpers.txt
+Chinese translated version of Documentation/arm/kernel_user_helpers.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -10,7 +10,7 @@ Maintainer: Nicolas Pitre <nicolas.pitre@linaro.org>
 		Dave Martin <dave.martin@linaro.org>
 Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
 ---------------------------------------------------------------------
-Documentation/arm/kernel_user_helpers.txt 的中文翻译
+Documentation/arm/kernel_user_helpers.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
-- 
cgit 


From 2bbbf827d339032dbeda62f0a5f20d2fde07b0f5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 15 Apr 2019 18:39:27 -0300
Subject: docs: memory-devices: convert ti-emif.txt to ReST

Prepare this file to be moved to a kernel book by converting
it to ReST format and renaming it to ti-emif.rst.

While this is not part of any book, mark it as :orphan:, in order
to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/memory-devices/ti-emif.rst | 64 ++++++++++++++++++++++++++++++++
 Documentation/memory-devices/ti-emif.txt | 57 ----------------------------
 2 files changed, 64 insertions(+), 57 deletions(-)
 create mode 100644 Documentation/memory-devices/ti-emif.rst
 delete mode 100644 Documentation/memory-devices/ti-emif.txt

(limited to 'Documentation')

diff --git a/Documentation/memory-devices/ti-emif.rst b/Documentation/memory-devices/ti-emif.rst
new file mode 100644
index 000000000000..c9242294e63c
--- /dev/null
+++ b/Documentation/memory-devices/ti-emif.rst
@@ -0,0 +1,64 @@
+:orphan:
+
+===============================
+TI EMIF SDRAM Controller Driver
+===============================
+
+Author
+======
+Aneesh V <aneesh@ti.com>
+
+Location
+========
+driver/memory/emif.c
+
+Supported SoCs:
+===============
+TI OMAP44xx
+TI OMAP54xx
+
+Menuconfig option:
+==================
+Device Drivers
+	Memory devices
+		Texas Instruments EMIF driver
+
+Description
+===========
+This driver is for the EMIF module available in Texas Instruments
+SoCs. EMIF is an SDRAM controller that, based on its revision,
+supports one or more of DDR2, DDR3, and LPDDR2 SDRAM protocols.
+This driver takes care of only LPDDR2 memories presently. The
+functions of the driver includes re-configuring AC timing
+parameters and other settings during frequency, voltage and
+temperature changes
+
+Platform Data (see include/linux/platform_data/emif_plat.h)
+===========================================================
+DDR device details and other board dependent and SoC dependent
+information can be passed through platform data (struct emif_platform_data)
+
+- DDR device details: 'struct ddr_device_info'
+- Device AC timings: 'struct lpddr2_timings' and 'struct lpddr2_min_tck'
+- Custom configurations: customizable policy options through
+  'struct emif_custom_configs'
+- IP revision
+- PHY type
+
+Interface to the external world
+===============================
+EMIF driver registers notifiers for voltage and frequency changes
+affecting EMIF and takes appropriate actions when these are invoked.
+
+- freq_pre_notify_handling()
+- freq_post_notify_handling()
+- volt_notify_handling()
+
+Debugfs
+=======
+The driver creates two debugfs entries per device.
+
+- regcache_dump : dump of register values calculated and saved for all
+  frequencies used so far.
+- mr4 : last polled value of MR4 register in the LPDDR2 device. MR4
+  indicates the current temperature level of the device.
diff --git a/Documentation/memory-devices/ti-emif.txt b/Documentation/memory-devices/ti-emif.txt
deleted file mode 100644
index f4ad9a7d0f4b..000000000000
--- a/Documentation/memory-devices/ti-emif.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-TI EMIF SDRAM Controller Driver:
-
-Author
-========
-Aneesh V <aneesh@ti.com>
-
-Location
-============
-driver/memory/emif.c
-
-Supported SoCs:
-===================
-TI OMAP44xx
-TI OMAP54xx
-
-Menuconfig option:
-==========================
-Device Drivers
-	Memory devices
-		Texas Instruments EMIF driver
-
-Description
-===========
-This driver is for the EMIF module available in Texas Instruments
-SoCs. EMIF is an SDRAM controller that, based on its revision,
-supports one or more of DDR2, DDR3, and LPDDR2 SDRAM protocols.
-This driver takes care of only LPDDR2 memories presently. The
-functions of the driver includes re-configuring AC timing
-parameters and other settings during frequency, voltage and
-temperature changes
-
-Platform Data (see include/linux/platform_data/emif_plat.h):
-=====================================================================
-DDR device details and other board dependent and SoC dependent
-information can be passed through platform data (struct emif_platform_data)
-- DDR device details: 'struct ddr_device_info'
-- Device AC timings: 'struct lpddr2_timings' and 'struct lpddr2_min_tck'
-- Custom configurations: customizable policy options through
-  'struct emif_custom_configs'
-- IP revision
-- PHY type
-
-Interface to the external world:
-================================
-EMIF driver registers notifiers for voltage and frequency changes
-affecting EMIF and takes appropriate actions when these are invoked.
-- freq_pre_notify_handling()
-- freq_post_notify_handling()
-- volt_notify_handling()
-
-Debugfs
-========
-The driver creates two debugfs entries per device.
-- regcache_dump : dump of register values calculated and saved for all
-  frequencies used so far.
-- mr4 : last polled value of MR4 register in the LPDDR2 device. MR4
-  indicates the current temperature level of the device.
-- 
cgit 


From 675aaf05d8982d3d304d4652d1555714be8b4af2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 15 Apr 2019 18:46:48 -0300
Subject: docs: xen-tpmfront.txt: convert it to .rst

In order to be able to add this file to the security book,
we need first to convert it to reST.

While this is not part of any book, mark it as :orphan:, in order
to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/security/tpm/xen-tpmfront.rst | 126 ++++++++++++++++++++++++++++
 Documentation/security/tpm/xen-tpmfront.txt | 113 -------------------------
 2 files changed, 126 insertions(+), 113 deletions(-)
 create mode 100644 Documentation/security/tpm/xen-tpmfront.rst
 delete mode 100644 Documentation/security/tpm/xen-tpmfront.txt

(limited to 'Documentation')

diff --git a/Documentation/security/tpm/xen-tpmfront.rst b/Documentation/security/tpm/xen-tpmfront.rst
new file mode 100644
index 000000000000..98a16ab87360
--- /dev/null
+++ b/Documentation/security/tpm/xen-tpmfront.rst
@@ -0,0 +1,126 @@
+:orphan:
+
+﻿=============================
+Virtual TPM interface for Xen
+=============================
+
+Authors: Matthew Fioravante (JHUAPL), Daniel De Graaf (NSA)
+
+This document describes the virtual Trusted Platform Module (vTPM) subsystem for
+Xen. The reader is assumed to have familiarity with building and installing Xen,
+Linux, and a basic understanding of the TPM and vTPM concepts.
+
+Introduction
+------------
+
+The goal of this work is to provide a TPM functionality to a virtual guest
+operating system (in Xen terms, a DomU).  This allows programs to interact with
+a TPM in a virtual system the same way they interact with a TPM on the physical
+system.  Each guest gets its own unique, emulated, software TPM.  However, each
+of the vTPM's secrets (Keys, NVRAM, etc) are managed by a vTPM Manager domain,
+which seals the secrets to the Physical TPM.  If the process of creating each of
+these domains (manager, vTPM, and guest) is trusted, the vTPM subsystem extends
+the chain of trust rooted in the hardware TPM to virtual machines in Xen. Each
+major component of vTPM is implemented as a separate domain, providing secure
+separation guaranteed by the hypervisor. The vTPM domains are implemented in
+mini-os to reduce memory and processor overhead.
+
+This mini-os vTPM subsystem was built on top of the previous vTPM work done by
+IBM and Intel corporation.
+
+
+Design Overview
+---------------
+
+The architecture of vTPM is described below::
+
+  +------------------+
+  |    Linux DomU    | ...
+  |       |  ^       |
+  |       v  |       |
+  |   xen-tpmfront   |
+  +------------------+
+          |  ^
+          v  |
+  +------------------+
+  | mini-os/tpmback  |
+  |       |  ^       |
+  |       v  |       |
+  |  vtpm-stubdom    | ...
+  |       |  ^       |
+  |       v  |       |
+  | mini-os/tpmfront |
+  +------------------+
+          |  ^
+          v  |
+  +------------------+
+  | mini-os/tpmback  |
+  |       |  ^       |
+  |       v  |       |
+  | vtpmmgr-stubdom  |
+  |       |  ^       |
+  |       v  |       |
+  | mini-os/tpm_tis  |
+  +------------------+
+          |  ^
+          v  |
+  +------------------+
+  |   Hardware TPM   |
+  +------------------+
+
+* Linux DomU:
+	       The Linux based guest that wants to use a vTPM. There may be
+	       more than one of these.
+
+* xen-tpmfront.ko:
+		    Linux kernel virtual TPM frontend driver. This driver
+                    provides vTPM access to a Linux-based DomU.
+
+* mini-os/tpmback:
+		    Mini-os TPM backend driver. The Linux frontend driver
+		    connects to this backend driver to facilitate communications
+		    between the Linux DomU and its vTPM. This driver is also
+		    used by vtpmmgr-stubdom to communicate with vtpm-stubdom.
+
+* vtpm-stubdom:
+		 A mini-os stub domain that implements a vTPM. There is a
+		 one to one mapping between running vtpm-stubdom instances and
+                 logical vtpms on the system. The vTPM Platform Configuration
+                 Registers (PCRs) are normally all initialized to zero.
+
+* mini-os/tpmfront:
+		     Mini-os TPM frontend driver. The vTPM mini-os domain
+		     vtpm-stubdom uses this driver to communicate with
+		     vtpmmgr-stubdom. This driver is also used in mini-os
+		     domains such as pv-grub that talk to the vTPM domain.
+
+* vtpmmgr-stubdom:
+		    A mini-os domain that implements the vTPM manager. There is
+		    only one vTPM manager and it should be running during the
+		    entire lifetime of the machine.  This domain regulates
+		    access to the physical TPM on the system and secures the
+		    persistent state of each vTPM.
+
+* mini-os/tpm_tis:
+		    Mini-os TPM version 1.2 TPM Interface Specification (TIS)
+                    driver. This driver used by vtpmmgr-stubdom to talk directly to
+                    the hardware TPM. Communication is facilitated by mapping
+                    hardware memory pages into vtpmmgr-stubdom.
+
+* Hardware TPM:
+		The physical TPM that is soldered onto the motherboard.
+
+
+Integration With Xen
+--------------------
+
+Support for the vTPM driver was added in Xen using the libxl toolstack in Xen
+4.3.  See the Xen documentation (docs/misc/vtpm.txt) for details on setting up
+the vTPM and vTPM Manager stub domains.  Once the stub domains are running, a
+vTPM device is set up in the same manner as a disk or network device in the
+domain's configuration file.
+
+In order to use features such as IMA that require a TPM to be loaded prior to
+the initrd, the xen-tpmfront driver must be compiled in to the kernel.  If not
+using such features, the driver can be compiled as a module and will be loaded
+as usual.
diff --git a/Documentation/security/tpm/xen-tpmfront.txt b/Documentation/security/tpm/xen-tpmfront.txt
deleted file mode 100644
index 69346de87ff3..000000000000
--- a/Documentation/security/tpm/xen-tpmfront.txt
+++ /dev/null
@@ -1,113 +0,0 @@
-Virtual TPM interface for Xen
-
-Authors: Matthew Fioravante (JHUAPL), Daniel De Graaf (NSA)
-
-This document describes the virtual Trusted Platform Module (vTPM) subsystem for
-Xen. The reader is assumed to have familiarity with building and installing Xen,
-Linux, and a basic understanding of the TPM and vTPM concepts.
-
-INTRODUCTION
-
-The goal of this work is to provide a TPM functionality to a virtual guest
-operating system (in Xen terms, a DomU).  This allows programs to interact with
-a TPM in a virtual system the same way they interact with a TPM on the physical
-system.  Each guest gets its own unique, emulated, software TPM.  However, each
-of the vTPM's secrets (Keys, NVRAM, etc) are managed by a vTPM Manager domain,
-which seals the secrets to the Physical TPM.  If the process of creating each of
-these domains (manager, vTPM, and guest) is trusted, the vTPM subsystem extends
-the chain of trust rooted in the hardware TPM to virtual machines in Xen. Each
-major component of vTPM is implemented as a separate domain, providing secure
-separation guaranteed by the hypervisor. The vTPM domains are implemented in
-mini-os to reduce memory and processor overhead.
-
-This mini-os vTPM subsystem was built on top of the previous vTPM work done by
-IBM and Intel corporation.
-
-
-DESIGN OVERVIEW
----------------
-
-The architecture of vTPM is described below:
-
-+------------------+
-|    Linux DomU    | ...
-|       |  ^       |
-|       v  |       |
-|   xen-tpmfront   |
-+------------------+
-        |  ^
-        v  |
-+------------------+
-| mini-os/tpmback  |
-|       |  ^       |
-|       v  |       |
-|  vtpm-stubdom    | ...
-|       |  ^       |
-|       v  |       |
-| mini-os/tpmfront |
-+------------------+
-        |  ^
-        v  |
-+------------------+
-| mini-os/tpmback  |
-|       |  ^       |
-|       v  |       |
-| vtpmmgr-stubdom  |
-|       |  ^       |
-|       v  |       |
-| mini-os/tpm_tis  |
-+------------------+
-        |  ^
-        v  |
-+------------------+
-|   Hardware TPM   |
-+------------------+
-
- * Linux DomU: The Linux based guest that wants to use a vTPM. There may be
-	       more than one of these.
-
- * xen-tpmfront.ko: Linux kernel virtual TPM frontend driver. This driver
-                    provides vTPM access to a Linux-based DomU.
-
- * mini-os/tpmback: Mini-os TPM backend driver. The Linux frontend driver
-		    connects to this backend driver to facilitate communications
-		    between the Linux DomU and its vTPM. This driver is also
-		    used by vtpmmgr-stubdom to communicate with vtpm-stubdom.
-
- * vtpm-stubdom: A mini-os stub domain that implements a vTPM. There is a
-		 one to one mapping between running vtpm-stubdom instances and
-                 logical vtpms on the system. The vTPM Platform Configuration
-                 Registers (PCRs) are normally all initialized to zero.
-
- * mini-os/tpmfront: Mini-os TPM frontend driver. The vTPM mini-os domain
-		     vtpm-stubdom uses this driver to communicate with
-		     vtpmmgr-stubdom. This driver is also used in mini-os
-		     domains such as pv-grub that talk to the vTPM domain.
-
- * vtpmmgr-stubdom: A mini-os domain that implements the vTPM manager. There is
-		    only one vTPM manager and it should be running during the
-		    entire lifetime of the machine.  This domain regulates
-		    access to the physical TPM on the system and secures the
-		    persistent state of each vTPM.
-
- * mini-os/tpm_tis: Mini-os TPM version 1.2 TPM Interface Specification (TIS)
-                    driver. This driver used by vtpmmgr-stubdom to talk directly to
-                    the hardware TPM. Communication is facilitated by mapping
-                    hardware memory pages into vtpmmgr-stubdom.
-
- * Hardware TPM: The physical TPM that is soldered onto the motherboard.
-
-
-INTEGRATION WITH XEN
---------------------
-
-Support for the vTPM driver was added in Xen using the libxl toolstack in Xen
-4.3.  See the Xen documentation (docs/misc/vtpm.txt) for details on setting up
-the vTPM and vTPM Manager stub domains.  Once the stub domains are running, a
-vTPM device is set up in the same manner as a disk or network device in the
-domain's configuration file.
-
-In order to use features such as IMA that require a TPM to be loaded prior to
-the initrd, the xen-tpmfront driver must be compiled in to the kernel.  If not
-using such features, the driver can be compiled as a module and will be loaded
-as usual.
-- 
cgit 


From 619ba4516771bdfb96658e7a5f57e6551232549a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 19 Apr 2019 18:49:49 -0300
Subject: docs: bus-devices: ti-gpmc.rst: convert it to ReST

In order to be able to add this file to a book, it needs
first to be converted to ReST and renamed.

While this is not part of any book, mark it as :orphan:, in order
to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/bus-devices/ti-gpmc.rst | 179 ++++++++++++++++++++++++++++++++++
 Documentation/bus-devices/ti-gpmc.txt | 122 -----------------------
 2 files changed, 179 insertions(+), 122 deletions(-)
 create mode 100644 Documentation/bus-devices/ti-gpmc.rst
 delete mode 100644 Documentation/bus-devices/ti-gpmc.txt

(limited to 'Documentation')

diff --git a/Documentation/bus-devices/ti-gpmc.rst b/Documentation/bus-devices/ti-gpmc.rst
new file mode 100644
index 000000000000..87c366e418be
--- /dev/null
+++ b/Documentation/bus-devices/ti-gpmc.rst
@@ -0,0 +1,179 @@
+:orphan:
+
+========================================
+GPMC (General Purpose Memory Controller)
+========================================
+
+GPMC is an unified memory controller dedicated to interfacing external
+memory devices like
+
+ * Asynchronous SRAM like memories and application specific integrated
+   circuit devices.
+ * Asynchronous, synchronous, and page mode burst NOR flash devices
+   NAND flash
+ * Pseudo-SRAM devices
+
+GPMC is found on Texas Instruments SoC's (OMAP based)
+IP details: http://www.ti.com/lit/pdf/spruh73 section 7.1
+
+
+GPMC generic timing calculation:
+================================
+
+GPMC has certain timings that has to be programmed for proper
+functioning of the peripheral, while peripheral has another set of
+timings. To have peripheral work with gpmc, peripheral timings has to
+be translated to the form gpmc can understand. The way it has to be
+translated depends on the connected peripheral. Also there is a
+dependency for certain gpmc timings on gpmc clock frequency. Hence a
+generic timing routine was developed to achieve above requirements.
+
+Generic routine provides a generic method to calculate gpmc timings
+from gpmc peripheral timings. struct gpmc_device_timings fields has to
+be updated with timings from the datasheet of the peripheral that is
+connected to gpmc. A few of the peripheral timings can be fed either
+in time or in cycles, provision to handle this scenario has been
+provided (refer struct gpmc_device_timings definition). It may so
+happen that timing as specified by peripheral datasheet is not present
+in timing structure, in this scenario, try to correlate peripheral
+timing to the one available. If that doesn't work, try to add a new
+field as required by peripheral, educate generic timing routine to
+handle it, make sure that it does not break any of the existing.
+Then there may be cases where peripheral datasheet doesn't mention
+certain fields of struct gpmc_device_timings, zero those entries.
+
+Generic timing routine has been verified to work properly on
+multiple onenand's and tusb6010 peripherals.
+
+A word of caution: generic timing routine has been developed based
+on understanding of gpmc timings, peripheral timings, available
+custom timing routines, a kind of reverse engineering without
+most of the datasheets & hardware (to be exact none of those supported
+in mainline having custom timing routine) and by simulation.
+
+gpmc timing dependency on peripheral timings:
+
+[<gpmc_timing>: <peripheral timing1>, <peripheral timing2> ...]
+
+1. common
+
+cs_on:
+	t_ceasu
+adv_on:
+	t_avdasu, t_ceavd
+
+2. sync common
+
+sync_clk:
+	clk
+page_burst_access:
+	t_bacc
+clk_activation:
+	t_ces, t_avds
+
+3. read async muxed
+
+adv_rd_off:
+	t_avdp_r
+oe_on:
+	t_oeasu, t_aavdh
+access:
+	t_iaa, t_oe, t_ce, t_aa
+rd_cycle:
+	t_rd_cycle, t_cez_r, t_oez
+
+4. read async non-muxed
+
+adv_rd_off:
+	t_avdp_r
+oe_on:
+	t_oeasu
+access:
+	t_iaa, t_oe, t_ce, t_aa
+rd_cycle:
+	t_rd_cycle, t_cez_r, t_oez
+
+5. read sync muxed
+
+adv_rd_off:
+	t_avdp_r, t_avdh
+oe_on:
+	t_oeasu, t_ach, cyc_aavdh_oe
+access:
+	t_iaa, cyc_iaa, cyc_oe
+rd_cycle:
+	t_cez_r, t_oez, t_ce_rdyz
+
+6. read sync non-muxed
+
+adv_rd_off:
+	t_avdp_r
+oe_on:
+	t_oeasu
+access:
+	t_iaa, cyc_iaa, cyc_oe
+rd_cycle:
+	t_cez_r, t_oez, t_ce_rdyz
+
+7. write async muxed
+
+adv_wr_off:
+	t_avdp_w
+we_on, wr_data_mux_bus:
+	t_weasu, t_aavdh, cyc_aavhd_we
+we_off:
+	t_wpl
+cs_wr_off:
+	t_wph
+wr_cycle:
+	t_cez_w, t_wr_cycle
+
+8. write async non-muxed
+
+adv_wr_off:
+	t_avdp_w
+we_on, wr_data_mux_bus:
+	t_weasu
+we_off:
+	t_wpl
+cs_wr_off:
+	t_wph
+wr_cycle:
+	t_cez_w, t_wr_cycle
+
+9. write sync muxed
+
+adv_wr_off:
+	t_avdp_w, t_avdh
+we_on, wr_data_mux_bus:
+	t_weasu, t_rdyo, t_aavdh, cyc_aavhd_we
+we_off:
+	t_wpl, cyc_wpl
+cs_wr_off:
+	t_wph
+wr_cycle:
+	t_cez_w, t_ce_rdyz
+
+10. write sync non-muxed
+
+adv_wr_off:
+	t_avdp_w
+we_on, wr_data_mux_bus:
+	t_weasu, t_rdyo
+we_off:
+	t_wpl, cyc_wpl
+cs_wr_off:
+	t_wph
+wr_cycle:
+	t_cez_w, t_ce_rdyz
+
+
+Note:
+  Many of gpmc timings are dependent on other gpmc timings (a few
+  gpmc timings purely dependent on other gpmc timings, a reason that
+  some of the gpmc timings are missing above), and it will result in
+  indirect dependency of peripheral timings to gpmc timings other than
+  mentioned above, refer timing routine for more details. To know what
+  these peripheral timings correspond to, please see explanations in
+  struct gpmc_device_timings definition. And for gpmc timings refer
+  IP details (link above).
diff --git a/Documentation/bus-devices/ti-gpmc.txt b/Documentation/bus-devices/ti-gpmc.txt
deleted file mode 100644
index cc9ce57e0a26..000000000000
--- a/Documentation/bus-devices/ti-gpmc.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-GPMC (General Purpose Memory Controller):
-=========================================
-
-GPMC is an unified memory controller dedicated to interfacing external
-memory devices like
- * Asynchronous SRAM like memories and application specific integrated
-   circuit devices.
- * Asynchronous, synchronous, and page mode burst NOR flash devices
-   NAND flash
- * Pseudo-SRAM devices
-
-GPMC is found on Texas Instruments SoC's (OMAP based)
-IP details: http://www.ti.com/lit/pdf/spruh73 section 7.1
-
-
-GPMC generic timing calculation:
-================================
-
-GPMC has certain timings that has to be programmed for proper
-functioning of the peripheral, while peripheral has another set of
-timings. To have peripheral work with gpmc, peripheral timings has to
-be translated to the form gpmc can understand. The way it has to be
-translated depends on the connected peripheral. Also there is a
-dependency for certain gpmc timings on gpmc clock frequency. Hence a
-generic timing routine was developed to achieve above requirements.
-
-Generic routine provides a generic method to calculate gpmc timings
-from gpmc peripheral timings. struct gpmc_device_timings fields has to
-be updated with timings from the datasheet of the peripheral that is
-connected to gpmc. A few of the peripheral timings can be fed either
-in time or in cycles, provision to handle this scenario has been
-provided (refer struct gpmc_device_timings definition). It may so
-happen that timing as specified by peripheral datasheet is not present
-in timing structure, in this scenario, try to correlate peripheral
-timing to the one available. If that doesn't work, try to add a new
-field as required by peripheral, educate generic timing routine to
-handle it, make sure that it does not break any of the existing.
-Then there may be cases where peripheral datasheet doesn't mention
-certain fields of struct gpmc_device_timings, zero those entries.
-
-Generic timing routine has been verified to work properly on
-multiple onenand's and tusb6010 peripherals.
-
-A word of caution: generic timing routine has been developed based
-on understanding of gpmc timings, peripheral timings, available
-custom timing routines, a kind of reverse engineering without
-most of the datasheets & hardware (to be exact none of those supported
-in mainline having custom timing routine) and by simulation.
-
-gpmc timing dependency on peripheral timings:
-[<gpmc_timing>: <peripheral timing1>, <peripheral timing2> ...]
-
-1. common
-cs_on: t_ceasu
-adv_on: t_avdasu, t_ceavd
-
-2. sync common
-sync_clk: clk
-page_burst_access: t_bacc
-clk_activation: t_ces, t_avds
-
-3. read async muxed
-adv_rd_off: t_avdp_r
-oe_on: t_oeasu, t_aavdh
-access: t_iaa, t_oe, t_ce, t_aa
-rd_cycle: t_rd_cycle, t_cez_r, t_oez
-
-4. read async non-muxed
-adv_rd_off: t_avdp_r
-oe_on: t_oeasu
-access: t_iaa, t_oe, t_ce, t_aa
-rd_cycle: t_rd_cycle, t_cez_r, t_oez
-
-5. read sync muxed
-adv_rd_off: t_avdp_r, t_avdh
-oe_on: t_oeasu, t_ach, cyc_aavdh_oe
-access: t_iaa, cyc_iaa, cyc_oe
-rd_cycle: t_cez_r, t_oez, t_ce_rdyz
-
-6. read sync non-muxed
-adv_rd_off: t_avdp_r
-oe_on: t_oeasu
-access: t_iaa, cyc_iaa, cyc_oe
-rd_cycle: t_cez_r, t_oez, t_ce_rdyz
-
-7. write async muxed
-adv_wr_off: t_avdp_w
-we_on, wr_data_mux_bus: t_weasu, t_aavdh, cyc_aavhd_we
-we_off: t_wpl
-cs_wr_off: t_wph
-wr_cycle: t_cez_w, t_wr_cycle
-
-8. write async non-muxed
-adv_wr_off: t_avdp_w
-we_on, wr_data_mux_bus: t_weasu
-we_off: t_wpl
-cs_wr_off: t_wph
-wr_cycle: t_cez_w, t_wr_cycle
-
-9. write sync muxed
-adv_wr_off: t_avdp_w, t_avdh
-we_on, wr_data_mux_bus: t_weasu, t_rdyo, t_aavdh, cyc_aavhd_we
-we_off: t_wpl, cyc_wpl
-cs_wr_off: t_wph
-wr_cycle: t_cez_w, t_ce_rdyz
-
-10. write sync non-muxed
-adv_wr_off: t_avdp_w
-we_on, wr_data_mux_bus: t_weasu, t_rdyo
-we_off: t_wpl, cyc_wpl
-cs_wr_off: t_wph
-wr_cycle: t_cez_w, t_ce_rdyz
-
-
-Note: Many of gpmc timings are dependent on other gpmc timings (a few
-gpmc timings purely dependent on other gpmc timings, a reason that
-some of the gpmc timings are missing above), and it will result in
-indirect dependency of peripheral timings to gpmc timings other than
-mentioned above, refer timing routine for more details. To know what
-these peripheral timings correspond to, please see explanations in
-struct gpmc_device_timings definition. And for gpmc timings refer
-IP details (link above).
-- 
cgit 


From a278295ccc2ddd1dc0ac8423a12ff6dd74f0d502 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 15 Apr 2019 19:25:27 -0300
Subject: docs: nvmem: convert docs to ReST and rename to *.rst

In order to be able to add it into a doc book, we need first
convert it to ReST.

The conversion is actually:
  - add blank lines and identation in order to identify paragraphs;
  - mark literal blocks;
  - adjust title markups.

While this is not part of any book, mark it as :orphan:, in order
to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/nvmem/nvmem.rst | 189 ++++++++++++++++++++++++++++++++++++++++++
 Documentation/nvmem/nvmem.txt | 183 ----------------------------------------
 2 files changed, 189 insertions(+), 183 deletions(-)
 create mode 100644 Documentation/nvmem/nvmem.rst
 delete mode 100644 Documentation/nvmem/nvmem.txt

(limited to 'Documentation')

diff --git a/Documentation/nvmem/nvmem.rst b/Documentation/nvmem/nvmem.rst
new file mode 100644
index 000000000000..3866b6e066d5
--- /dev/null
+++ b/Documentation/nvmem/nvmem.rst
@@ -0,0 +1,189 @@
+:orphan:
+
+===============
+NVMEM Subsystem
+===============
+
+ Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+This document explains the NVMEM Framework along with the APIs provided,
+and how to use it.
+
+1. Introduction
+===============
+*NVMEM* is the abbreviation for Non Volatile Memory layer. It is used to
+retrieve configuration of SOC or Device specific data from non volatile
+memories like eeprom, efuses and so on.
+
+Before this framework existed, NVMEM drivers like eeprom were stored in
+drivers/misc, where they all had to duplicate pretty much the same code to
+register a sysfs file, allow in-kernel users to access the content of the
+devices they were driving, etc.
+
+This was also a problem as far as other in-kernel users were involved, since
+the solutions used were pretty much different from one driver to another, there
+was a rather big abstraction leak.
+
+This framework aims at solve these problems. It also introduces DT
+representation for consumer devices to go get the data they require (MAC
+Addresses, SoC/Revision ID, part numbers, and so on) from the NVMEMs. This
+framework is based on regmap, so that most of the abstraction available in
+regmap can be reused, across multiple types of buses.
+
+NVMEM Providers
++++++++++++++++
+
+NVMEM provider refers to an entity that implements methods to initialize, read
+and write the non-volatile memory.
+
+2. Registering/Unregistering the NVMEM provider
+===============================================
+
+A NVMEM provider can register with NVMEM core by supplying relevant
+nvmem configuration to nvmem_register(), on success core would return a valid
+nvmem_device pointer.
+
+nvmem_unregister(nvmem) is used to unregister a previously registered provider.
+
+For example, a simple qfprom case::
+
+  static struct nvmem_config econfig = {
+	.name = "qfprom",
+	.owner = THIS_MODULE,
+  };
+
+  static int qfprom_probe(struct platform_device *pdev)
+  {
+	...
+	econfig.dev = &pdev->dev;
+	nvmem = nvmem_register(&econfig);
+	...
+  }
+
+It is mandatory that the NVMEM provider has a regmap associated with its
+struct device. Failure to do would return error code from nvmem_register().
+
+Users of board files can define and register nvmem cells using the
+nvmem_cell_table struct::
+
+  static struct nvmem_cell_info foo_nvmem_cells[] = {
+	{
+		.name		= "macaddr",
+		.offset		= 0x7f00,
+		.bytes		= ETH_ALEN,
+	}
+  };
+
+  static struct nvmem_cell_table foo_nvmem_cell_table = {
+	.nvmem_name		= "i2c-eeprom",
+	.cells			= foo_nvmem_cells,
+	.ncells			= ARRAY_SIZE(foo_nvmem_cells),
+  };
+
+  nvmem_add_cell_table(&foo_nvmem_cell_table);
+
+Additionally it is possible to create nvmem cell lookup entries and register
+them with the nvmem framework from machine code as shown in the example below::
+
+  static struct nvmem_cell_lookup foo_nvmem_lookup = {
+	.nvmem_name		= "i2c-eeprom",
+	.cell_name		= "macaddr",
+	.dev_id			= "foo_mac.0",
+	.con_id			= "mac-address",
+  };
+
+  nvmem_add_cell_lookups(&foo_nvmem_lookup, 1);
+
+NVMEM Consumers
++++++++++++++++
+
+NVMEM consumers are the entities which make use of the NVMEM provider to
+read from and to NVMEM.
+
+3. NVMEM cell based consumer APIs
+=================================
+
+NVMEM cells are the data entries/fields in the NVMEM.
+The NVMEM framework provides 3 APIs to read/write NVMEM cells::
+
+  struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *name);
+  struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *name);
+
+  void nvmem_cell_put(struct nvmem_cell *cell);
+  void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
+
+  void *nvmem_cell_read(struct nvmem_cell *cell, ssize_t *len);
+  int nvmem_cell_write(struct nvmem_cell *cell, void *buf, ssize_t len);
+
+`*nvmem_cell_get()` apis will get a reference to nvmem cell for a given id,
+and nvmem_cell_read/write() can then read or write to the cell.
+Once the usage of the cell is finished the consumer should call
+`*nvmem_cell_put()` to free all the allocation memory for the cell.
+
+4. Direct NVMEM device based consumer APIs
+==========================================
+
+In some instances it is necessary to directly read/write the NVMEM.
+To facilitate such consumers NVMEM framework provides below apis::
+
+  struct nvmem_device *nvmem_device_get(struct device *dev, const char *name);
+  struct nvmem_device *devm_nvmem_device_get(struct device *dev,
+					   const char *name);
+  void nvmem_device_put(struct nvmem_device *nvmem);
+  int nvmem_device_read(struct nvmem_device *nvmem, unsigned int offset,
+		      size_t bytes, void *buf);
+  int nvmem_device_write(struct nvmem_device *nvmem, unsigned int offset,
+		       size_t bytes, void *buf);
+  int nvmem_device_cell_read(struct nvmem_device *nvmem,
+			   struct nvmem_cell_info *info, void *buf);
+  int nvmem_device_cell_write(struct nvmem_device *nvmem,
+			    struct nvmem_cell_info *info, void *buf);
+
+Before the consumers can read/write NVMEM directly, it should get hold
+of nvmem_controller from one of the `*nvmem_device_get()` api.
+
+The difference between these apis and cell based apis is that these apis always
+take nvmem_device as parameter.
+
+5. Releasing a reference to the NVMEM
+=====================================
+
+When a consumer no longer needs the NVMEM, it has to release the reference
+to the NVMEM it has obtained using the APIs mentioned in the above section.
+The NVMEM framework provides 2 APIs to release a reference to the NVMEM::
+
+  void nvmem_cell_put(struct nvmem_cell *cell);
+  void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
+  void nvmem_device_put(struct nvmem_device *nvmem);
+  void devm_nvmem_device_put(struct device *dev, struct nvmem_device *nvmem);
+
+Both these APIs are used to release a reference to the NVMEM and
+devm_nvmem_cell_put and devm_nvmem_device_put destroys the devres associated
+with this NVMEM.
+
+Userspace
++++++++++
+
+6. Userspace binary interface
+==============================
+
+Userspace can read/write the raw NVMEM file located at::
+
+	/sys/bus/nvmem/devices/*/nvmem
+
+ex::
+
+  hexdump /sys/bus/nvmem/devices/qfprom0/nvmem
+
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  00000a0 db10 2240 0000 e000 0c00 0c00 0000 0c00
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  ...
+  *
+  0001000
+
+7. DeviceTree Binding
+=====================
+
+See Documentation/devicetree/bindings/nvmem/nvmem.txt
diff --git a/Documentation/nvmem/nvmem.txt b/Documentation/nvmem/nvmem.txt
deleted file mode 100644
index fc2fe4b18655..000000000000
--- a/Documentation/nvmem/nvmem.txt
+++ /dev/null
@@ -1,183 +0,0 @@
-			    NVMEM SUBSYSTEM
-	  Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
-
-This document explains the NVMEM Framework along with the APIs provided,
-and how to use it.
-
-1. Introduction
-===============
-*NVMEM* is the abbreviation for Non Volatile Memory layer. It is used to
-retrieve configuration of SOC or Device specific data from non volatile
-memories like eeprom, efuses and so on.
-
-Before this framework existed, NVMEM drivers like eeprom were stored in
-drivers/misc, where they all had to duplicate pretty much the same code to
-register a sysfs file, allow in-kernel users to access the content of the
-devices they were driving, etc.
-
-This was also a problem as far as other in-kernel users were involved, since
-the solutions used were pretty much different from one driver to another, there
-was a rather big abstraction leak.
-
-This framework aims at solve these problems. It also introduces DT
-representation for consumer devices to go get the data they require (MAC
-Addresses, SoC/Revision ID, part numbers, and so on) from the NVMEMs. This
-framework is based on regmap, so that most of the abstraction available in
-regmap can be reused, across multiple types of buses.
-
-NVMEM Providers
-+++++++++++++++
-
-NVMEM provider refers to an entity that implements methods to initialize, read
-and write the non-volatile memory.
-
-2. Registering/Unregistering the NVMEM provider
-===============================================
-
-A NVMEM provider can register with NVMEM core by supplying relevant
-nvmem configuration to nvmem_register(), on success core would return a valid
-nvmem_device pointer.
-
-nvmem_unregister(nvmem) is used to unregister a previously registered provider.
-
-For example, a simple qfprom case:
-
-static struct nvmem_config econfig = {
-	.name = "qfprom",
-	.owner = THIS_MODULE,
-};
-
-static int qfprom_probe(struct platform_device *pdev)
-{
-	...
-	econfig.dev = &pdev->dev;
-	nvmem = nvmem_register(&econfig);
-	...
-}
-
-It is mandatory that the NVMEM provider has a regmap associated with its
-struct device. Failure to do would return error code from nvmem_register().
-
-Users of board files can define and register nvmem cells using the
-nvmem_cell_table struct:
-
-static struct nvmem_cell_info foo_nvmem_cells[] = {
-	{
-		.name		= "macaddr",
-		.offset		= 0x7f00,
-		.bytes		= ETH_ALEN,
-	}
-};
-
-static struct nvmem_cell_table foo_nvmem_cell_table = {
-	.nvmem_name		= "i2c-eeprom",
-	.cells			= foo_nvmem_cells,
-	.ncells			= ARRAY_SIZE(foo_nvmem_cells),
-};
-
-nvmem_add_cell_table(&foo_nvmem_cell_table);
-
-Additionally it is possible to create nvmem cell lookup entries and register
-them with the nvmem framework from machine code as shown in the example below:
-
-static struct nvmem_cell_lookup foo_nvmem_lookup = {
-	.nvmem_name		= "i2c-eeprom",
-	.cell_name		= "macaddr",
-	.dev_id			= "foo_mac.0",
-	.con_id			= "mac-address",
-};
-
-nvmem_add_cell_lookups(&foo_nvmem_lookup, 1);
-
-NVMEM Consumers
-+++++++++++++++
-
-NVMEM consumers are the entities which make use of the NVMEM provider to
-read from and to NVMEM.
-
-3. NVMEM cell based consumer APIs
-=================================
-
-NVMEM cells are the data entries/fields in the NVMEM.
-The NVMEM framework provides 3 APIs to read/write NVMEM cells.
-
-struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *name);
-struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *name);
-
-void nvmem_cell_put(struct nvmem_cell *cell);
-void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
-
-void *nvmem_cell_read(struct nvmem_cell *cell, ssize_t *len);
-int nvmem_cell_write(struct nvmem_cell *cell, void *buf, ssize_t len);
-
-*nvmem_cell_get() apis will get a reference to nvmem cell for a given id,
-and nvmem_cell_read/write() can then read or write to the cell.
-Once the usage of the cell is finished the consumer should call *nvmem_cell_put()
-to free all the allocation memory for the cell.
-
-4. Direct NVMEM device based consumer APIs
-==========================================
-
-In some instances it is necessary to directly read/write the NVMEM.
-To facilitate such consumers NVMEM framework provides below apis.
-
-struct nvmem_device *nvmem_device_get(struct device *dev, const char *name);
-struct nvmem_device *devm_nvmem_device_get(struct device *dev,
-					   const char *name);
-void nvmem_device_put(struct nvmem_device *nvmem);
-int nvmem_device_read(struct nvmem_device *nvmem, unsigned int offset,
-		      size_t bytes, void *buf);
-int nvmem_device_write(struct nvmem_device *nvmem, unsigned int offset,
-		       size_t bytes, void *buf);
-int nvmem_device_cell_read(struct nvmem_device *nvmem,
-			   struct nvmem_cell_info *info, void *buf);
-int nvmem_device_cell_write(struct nvmem_device *nvmem,
-			    struct nvmem_cell_info *info, void *buf);
-
-Before the consumers can read/write NVMEM directly, it should get hold
-of nvmem_controller from one of the *nvmem_device_get() api.
-
-The difference between these apis and cell based apis is that these apis always
-take nvmem_device as parameter.
-
-5. Releasing a reference to the NVMEM
-=====================================
-
-When a consumer no longer needs the NVMEM, it has to release the reference
-to the NVMEM it has obtained using the APIs mentioned in the above section.
-The NVMEM framework provides 2 APIs to release a reference to the NVMEM.
-
-void nvmem_cell_put(struct nvmem_cell *cell);
-void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
-void nvmem_device_put(struct nvmem_device *nvmem);
-void devm_nvmem_device_put(struct device *dev, struct nvmem_device *nvmem);
-
-Both these APIs are used to release a reference to the NVMEM and
-devm_nvmem_cell_put and devm_nvmem_device_put destroys the devres associated
-with this NVMEM.
-
-Userspace
-+++++++++
-
-6. Userspace binary interface
-==============================
-
-Userspace can read/write the raw NVMEM file located at
-/sys/bus/nvmem/devices/*/nvmem
-
-ex:
-
-hexdump /sys/bus/nvmem/devices/qfprom0/nvmem
-
-0000000 0000 0000 0000 0000 0000 0000 0000 0000
-*
-00000a0 db10 2240 0000 e000 0c00 0c00 0000 0c00
-0000000 0000 0000 0000 0000 0000 0000 0000 0000
-...
-*
-0001000
-
-7. DeviceTree Binding
-=====================
-
-See Documentation/devicetree/bindings/nvmem/nvmem.txt
-- 
cgit 


From 1945a035540e2cef0362a2e7e828f8cf547e86b8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 15 Apr 2019 19:27:55 -0300
Subject: docs: phy: convert samsung-usb2.txt to ReST format

In order to merge it into a Sphinx book, we need first to
convert to ReST.

While this is not part of any book, mark it as :orphan:, in order
to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/phy/samsung-usb2.rst | 139 +++++++++++++++++++++++++++++++++++++
 Documentation/phy/samsung-usb2.txt | 135 -----------------------------------
 2 files changed, 139 insertions(+), 135 deletions(-)
 create mode 100644 Documentation/phy/samsung-usb2.rst
 delete mode 100644 Documentation/phy/samsung-usb2.txt

(limited to 'Documentation')

diff --git a/Documentation/phy/samsung-usb2.rst b/Documentation/phy/samsung-usb2.rst
new file mode 100644
index 000000000000..98b5952fcb97
--- /dev/null
+++ b/Documentation/phy/samsung-usb2.rst
@@ -0,0 +1,139 @@
+:orphan:
+
+====================================
+Samsung USB 2.0 PHY adaptation layer
+====================================
+
+1. Description
+--------------
+
+The architecture of the USB 2.0 PHY module in Samsung SoCs is similar
+among many SoCs. In spite of the similarities it proved difficult to
+create a one driver that would fit all these PHY controllers. Often
+the differences were minor and were found in particular bits of the
+registers of the PHY. In some rare cases the order of register writes or
+the PHY powering up process had to be altered. This adaptation layer is
+a compromise between having separate drivers and having a single driver
+with added support for many special cases.
+
+2. Files description
+--------------------
+
+- phy-samsung-usb2.c
+   This is the main file of the adaptation layer. This file contains
+   the probe function and provides two callbacks to the Generic PHY
+   Framework. This two callbacks are used to power on and power off the
+   phy. They carry out the common work that has to be done on all version
+   of the PHY module. Depending on which SoC was chosen they execute SoC
+   specific callbacks. The specific SoC version is selected by choosing
+   the appropriate compatible string. In addition, this file contains
+   struct of_device_id definitions for particular SoCs.
+
+- phy-samsung-usb2.h
+   This is the include file. It declares the structures used by this
+   driver. In addition it should contain extern declarations for
+   structures that describe particular SoCs.
+
+3. Supporting SoCs
+------------------
+
+To support a new SoC a new file should be added to the drivers/phy
+directory. Each SoC's configuration is stored in an instance of the
+struct samsung_usb2_phy_config::
+
+  struct samsung_usb2_phy_config {
+	const struct samsung_usb2_common_phy *phys;
+	int (*rate_to_clk)(unsigned long, u32 *);
+	unsigned int num_phys;
+	bool has_mode_switch;
+  };
+
+The num_phys is the number of phys handled by the driver. `*phys` is an
+array that contains the configuration for each phy. The has_mode_switch
+property is a boolean flag that determines whether the SoC has USB host
+and device on a single pair of pins. If so, a special register has to
+be modified to change the internal routing of these pins between a USB
+device or host module.
+
+For example the configuration for Exynos 4210 is following::
+
+  const struct samsung_usb2_phy_config exynos4210_usb2_phy_config = {
+	.has_mode_switch        = 0,
+	.num_phys		= EXYNOS4210_NUM_PHYS,
+	.phys			= exynos4210_phys,
+	.rate_to_clk		= exynos4210_rate_to_clk,
+  }
+
+- `int (*rate_to_clk)(unsigned long, u32 *)`
+
+	The rate_to_clk callback is to convert the rate of the clock
+	used as the reference clock for the PHY module to the value
+	that should be written in the hardware register.
+
+The exynos4210_phys configuration array is as follows::
+
+  static const struct samsung_usb2_common_phy exynos4210_phys[] = {
+	{
+		.label		= "device",
+		.id		= EXYNOS4210_DEVICE,
+		.power_on	= exynos4210_power_on,
+		.power_off	= exynos4210_power_off,
+	},
+	{
+		.label		= "host",
+		.id		= EXYNOS4210_HOST,
+		.power_on	= exynos4210_power_on,
+		.power_off	= exynos4210_power_off,
+	},
+	{
+		.label		= "hsic0",
+		.id		= EXYNOS4210_HSIC0,
+		.power_on	= exynos4210_power_on,
+		.power_off	= exynos4210_power_off,
+	},
+	{
+		.label		= "hsic1",
+		.id		= EXYNOS4210_HSIC1,
+		.power_on	= exynos4210_power_on,
+		.power_off	= exynos4210_power_off,
+	},
+	{},
+  };
+
+- `int (*power_on)(struct samsung_usb2_phy_instance *);`
+  `int (*power_off)(struct samsung_usb2_phy_instance *);`
+
+	These two callbacks are used to power on and power off the phy
+	by modifying appropriate registers.
+
+Final change to the driver is adding appropriate compatible value to the
+phy-samsung-usb2.c file. In case of Exynos 4210 the following lines were
+added to the struct of_device_id samsung_usb2_phy_of_match[] array::
+
+  #ifdef CONFIG_PHY_EXYNOS4210_USB2
+	{
+		.compatible = "samsung,exynos4210-usb2-phy",
+		.data = &exynos4210_usb2_phy_config,
+	},
+  #endif
+
+To add further flexibility to the driver the Kconfig file enables to
+include support for selected SoCs in the compiled driver. The Kconfig
+entry for Exynos 4210 is following::
+
+  config PHY_EXYNOS4210_USB2
+	bool "Support for Exynos 4210"
+	depends on PHY_SAMSUNG_USB2
+	depends on CPU_EXYNOS4210
+	help
+	  Enable USB PHY support for Exynos 4210. This option requires that
+	  Samsung USB 2.0 PHY driver is enabled and means that support for this
+	  particular SoC is compiled in the driver. In case of Exynos 4210 four
+	  phys are available - device, host, HSCI0 and HSCI1.
+
+The newly created file that supports the new SoC has to be also added to the
+Makefile. In case of Exynos 4210 the added line is following::
+
+  obj-$(CONFIG_PHY_EXYNOS4210_USB2)       += phy-exynos4210-usb2.o
+
+After completing these steps the support for the new SoC should be ready.
diff --git a/Documentation/phy/samsung-usb2.txt b/Documentation/phy/samsung-usb2.txt
deleted file mode 100644
index ed12d437189d..000000000000
--- a/Documentation/phy/samsung-usb2.txt
+++ /dev/null
@@ -1,135 +0,0 @@
-.------------------------------------------------------------------------------+
-|			Samsung USB 2.0 PHY adaptation layer		       |
-+-----------------------------------------------------------------------------+'
-
-| 1. Description
-+----------------
-
-The architecture of the USB 2.0 PHY module in Samsung SoCs is similar
-among many SoCs. In spite of the similarities it proved difficult to
-create a one driver that would fit all these PHY controllers. Often
-the differences were minor and were found in particular bits of the
-registers of the PHY. In some rare cases the order of register writes or
-the PHY powering up process had to be altered. This adaptation layer is
-a compromise between having separate drivers and having a single driver
-with added support for many special cases.
-
-| 2. Files description
-+----------------------
-
-- phy-samsung-usb2.c
-   This is the main file of the adaptation layer. This file contains
-   the probe function and provides two callbacks to the Generic PHY
-   Framework. This two callbacks are used to power on and power off the
-   phy. They carry out the common work that has to be done on all version
-   of the PHY module. Depending on which SoC was chosen they execute SoC
-   specific callbacks. The specific SoC version is selected by choosing
-   the appropriate compatible string. In addition, this file contains
-   struct of_device_id definitions for particular SoCs.
-
-- phy-samsung-usb2.h
-   This is the include file. It declares the structures used by this
-   driver. In addition it should contain extern declarations for
-   structures that describe particular SoCs.
-
-| 3. Supporting SoCs
-+--------------------
-
-To support a new SoC a new file should be added to the drivers/phy
-directory. Each SoC's configuration is stored in an instance of the
-struct samsung_usb2_phy_config.
-
-struct samsung_usb2_phy_config {
-	const struct samsung_usb2_common_phy *phys;
-	int (*rate_to_clk)(unsigned long, u32 *);
-	unsigned int num_phys;
-	bool has_mode_switch;
-};
-
-The num_phys is the number of phys handled by the driver. *phys is an
-array that contains the configuration for each phy. The has_mode_switch
-property is a boolean flag that determines whether the SoC has USB host
-and device on a single pair of pins. If so, a special register has to
-be modified to change the internal routing of these pins between a USB
-device or host module.
-
-For example the configuration for Exynos 4210 is following:
-
-const struct samsung_usb2_phy_config exynos4210_usb2_phy_config = {
-	.has_mode_switch        = 0,
-	.num_phys		= EXYNOS4210_NUM_PHYS,
-	.phys			= exynos4210_phys,
-	.rate_to_clk		= exynos4210_rate_to_clk,
-}
-
-- int (*rate_to_clk)(unsigned long, u32 *)
-	The rate_to_clk callback is to convert the rate of the clock
-	used as the reference clock for the PHY module to the value
-	that should be written in the hardware register.
-
-The exynos4210_phys configuration array is as follows:
-
-static const struct samsung_usb2_common_phy exynos4210_phys[] = {
-	{
-		.label		= "device",
-		.id		= EXYNOS4210_DEVICE,
-		.power_on	= exynos4210_power_on,
-		.power_off	= exynos4210_power_off,
-	},
-	{
-		.label		= "host",
-		.id		= EXYNOS4210_HOST,
-		.power_on	= exynos4210_power_on,
-		.power_off	= exynos4210_power_off,
-	},
-	{
-		.label		= "hsic0",
-		.id		= EXYNOS4210_HSIC0,
-		.power_on	= exynos4210_power_on,
-		.power_off	= exynos4210_power_off,
-	},
-	{
-		.label		= "hsic1",
-		.id		= EXYNOS4210_HSIC1,
-		.power_on	= exynos4210_power_on,
-		.power_off	= exynos4210_power_off,
-	},
-	{},
-};
-
-- int (*power_on)(struct samsung_usb2_phy_instance *);
-- int (*power_off)(struct samsung_usb2_phy_instance *);
-	These two callbacks are used to power on and power off the phy
-	by modifying appropriate registers.
-
-Final change to the driver is adding appropriate compatible value to the
-phy-samsung-usb2.c file. In case of Exynos 4210 the following lines were
-added to the struct of_device_id samsung_usb2_phy_of_match[] array:
-
-#ifdef CONFIG_PHY_EXYNOS4210_USB2
-	{
-		.compatible = "samsung,exynos4210-usb2-phy",
-		.data = &exynos4210_usb2_phy_config,
-	},
-#endif
-
-To add further flexibility to the driver the Kconfig file enables to
-include support for selected SoCs in the compiled driver. The Kconfig
-entry for Exynos 4210 is following:
-
-config PHY_EXYNOS4210_USB2
-	bool "Support for Exynos 4210"
-	depends on PHY_SAMSUNG_USB2
-	depends on CPU_EXYNOS4210
-	help
-	  Enable USB PHY support for Exynos 4210. This option requires that
-	  Samsung USB 2.0 PHY driver is enabled and means that support for this
-	  particular SoC is compiled in the driver. In case of Exynos 4210 four
-	  phys are available - device, host, HSCI0 and HSCI1.
-
-The newly created file that supports the new SoC has to be also added to the
-Makefile. In case of Exynos 4210 the added line is following:
-
-obj-$(CONFIG_PHY_EXYNOS4210_USB2)       += phy-exynos4210-usb2.o
-
-After completing these steps the support for the new SoC should be ready.
-- 
cgit 


From eaf5211d8c00060a3b41a031a762c906d3603098 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 15 Apr 2019 23:00:35 -0300
Subject: docs: rbtree.txt: fix Sphinx build warnings

Ths file is already at ReST format. Yet, some recent changes
made it to produce a few warnings when building it with
Sphinx.

Those are trivially fixed by marking some literal blocks.

Fix them before adding it to the docs building system.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/rbtree.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
index c42a21b99046..523d54b60087 100644
--- a/Documentation/rbtree.txt
+++ b/Documentation/rbtree.txt
@@ -204,21 +204,21 @@ potentially expensive tree iterations. This is done at negligible runtime
 overhead for maintanence; albeit larger memory footprint.
 
 Similar to the rb_root structure, cached rbtrees are initialized to be
-empty via:
+empty via::
 
   struct rb_root_cached mytree = RB_ROOT_CACHED;
 
 Cached rbtree is simply a regular rb_root with an extra pointer to cache the
 leftmost node. This allows rb_root_cached to exist wherever rb_root does,
 which permits augmented trees to be supported as well as only a few extra
-interfaces:
+interfaces::
 
   struct rb_node *rb_first_cached(struct rb_root_cached *tree);
   void rb_insert_color_cached(struct rb_node *, struct rb_root_cached *, bool);
   void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
 
 Both insert and erase calls have their respective counterpart of augmented
-trees:
+trees::
 
   void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *,
 				  bool, struct rb_augment_callbacks *);
-- 
cgit 


From a36d053863a1b6cd6e79a632af01be014517f9ac Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 27 May 2019 15:59:13 -0300
Subject: docs: DMA-API-HOWTO.txt: fix an unmarked code block

When building with Sphinx, it would produce this warning:

    docs/Documentation/DMA-API-HOWTO.rst:222: WARNING: Definition list ends without a blank line; unexpected unindent.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/DMA-API-HOWTO.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt
index cb712a02f59f..358d495456d1 100644
--- a/Documentation/DMA-API-HOWTO.txt
+++ b/Documentation/DMA-API-HOWTO.txt
@@ -212,7 +212,7 @@ The standard 64-bit addressing device would do something like this::
 
 If the device only supports 32-bit addressing for descriptors in the
 coherent allocations, but supports full 64-bits for streaming mappings
-it would look like this:
+it would look like this::
 
 	if (dma_set_mask(dev, DMA_BIT_MASK(64))) {
 		dev_warn(dev, "mydev: No suitable DMA available\n");
-- 
cgit 


From c3123552aad3ffd7a35e16d4402231225165e343 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 17 Apr 2019 05:46:08 -0300
Subject: docs: accounting: convert to ReST

Rename the accounting documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/accounting/cgroupstats.rst      |  31 ++++
 Documentation/accounting/cgroupstats.txt      |  27 ----
 Documentation/accounting/delay-accounting.rst | 126 ++++++++++++++++
 Documentation/accounting/delay-accounting.txt | 117 ---------------
 Documentation/accounting/index.rst            |  14 ++
 Documentation/accounting/psi.rst              | 182 +++++++++++++++++++++++
 Documentation/accounting/psi.txt              | 180 -----------------------
 Documentation/accounting/taskstats-struct.rst | 199 ++++++++++++++++++++++++++
 Documentation/accounting/taskstats-struct.txt | 180 -----------------------
 Documentation/accounting/taskstats.rst        | 180 +++++++++++++++++++++++
 Documentation/accounting/taskstats.txt        | 181 -----------------------
 Documentation/admin-guide/cgroup-v2.rst       |   6 +-
 12 files changed, 735 insertions(+), 688 deletions(-)
 create mode 100644 Documentation/accounting/cgroupstats.rst
 delete mode 100644 Documentation/accounting/cgroupstats.txt
 create mode 100644 Documentation/accounting/delay-accounting.rst
 delete mode 100644 Documentation/accounting/delay-accounting.txt
 create mode 100644 Documentation/accounting/index.rst
 create mode 100644 Documentation/accounting/psi.rst
 delete mode 100644 Documentation/accounting/psi.txt
 create mode 100644 Documentation/accounting/taskstats-struct.rst
 delete mode 100644 Documentation/accounting/taskstats-struct.txt
 create mode 100644 Documentation/accounting/taskstats.rst
 delete mode 100644 Documentation/accounting/taskstats.txt

(limited to 'Documentation')

diff --git a/Documentation/accounting/cgroupstats.rst b/Documentation/accounting/cgroupstats.rst
new file mode 100644
index 000000000000..b9afc48f4ea2
--- /dev/null
+++ b/Documentation/accounting/cgroupstats.rst
@@ -0,0 +1,31 @@
+==================
+Control Groupstats
+==================
+
+Control Groupstats is inspired by the discussion at
+http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as
+suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263.
+
+Per cgroup statistics infrastructure re-uses code from the taskstats
+interface. A new set of cgroup operations are registered with commands
+and attributes specific to cgroups. It should be very easy to
+extend per cgroup statistics, by adding members to the cgroupstats
+structure.
+
+The current model for cgroupstats is a pull, a push model (to post
+statistics on interesting events), should be very easy to add. Currently
+user space requests for statistics by passing the cgroup path.
+Statistics about the state of all the tasks in the cgroup is returned to
+user space.
+
+NOTE: We currently rely on delay accounting for extracting information
+about tasks blocked on I/O. If CONFIG_TASK_DELAY_ACCT is disabled, this
+information will not be available.
+
+To extract cgroup statistics a utility very similar to getdelays.c
+has been developed, the sample output of the utility is shown below::
+
+  ~/balbir/cgroupstats # ./getdelays  -C "/sys/fs/cgroup/a"
+  sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
+  ~/balbir/cgroupstats # ./getdelays  -C "/sys/fs/cgroup"
+  sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.txt
deleted file mode 100644
index d16a9849e60e..000000000000
--- a/Documentation/accounting/cgroupstats.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-Control Groupstats is inspired by the discussion at
-http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as
-suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263.
-
-Per cgroup statistics infrastructure re-uses code from the taskstats
-interface. A new set of cgroup operations are registered with commands
-and attributes specific to cgroups. It should be very easy to
-extend per cgroup statistics, by adding members to the cgroupstats
-structure.
-
-The current model for cgroupstats is a pull, a push model (to post
-statistics on interesting events), should be very easy to add. Currently
-user space requests for statistics by passing the cgroup path.
-Statistics about the state of all the tasks in the cgroup is returned to
-user space.
-
-NOTE: We currently rely on delay accounting for extracting information
-about tasks blocked on I/O. If CONFIG_TASK_DELAY_ACCT is disabled, this
-information will not be available.
-
-To extract cgroup statistics a utility very similar to getdelays.c
-has been developed, the sample output of the utility is shown below
-
-~/balbir/cgroupstats # ./getdelays  -C "/sys/fs/cgroup/a"
-sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
-~/balbir/cgroupstats # ./getdelays  -C "/sys/fs/cgroup"
-sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst
new file mode 100644
index 000000000000..7cc7f5852da0
--- /dev/null
+++ b/Documentation/accounting/delay-accounting.rst
@@ -0,0 +1,126 @@
+================
+Delay accounting
+================
+
+Tasks encounter delays in execution when they wait
+for some kernel resource to become available e.g. a
+runnable task may wait for a free CPU to run on.
+
+The per-task delay accounting functionality measures
+the delays experienced by a task while
+
+a) waiting for a CPU (while being runnable)
+b) completion of synchronous block I/O initiated by the task
+c) swapping in pages
+d) memory reclaim
+
+and makes these statistics available to userspace through
+the taskstats interface.
+
+Such delays provide feedback for setting a task's cpu priority,
+io priority and rss limit values appropriately. Long delays for
+important tasks could be a trigger for raising its corresponding priority.
+
+The functionality, through its use of the taskstats interface, also provides
+delay statistics aggregated for all tasks (or threads) belonging to a
+thread group (corresponding to a traditional Unix process). This is a commonly
+needed aggregation that is more efficiently done by the kernel.
+
+Userspace utilities, particularly resource management applications, can also
+aggregate delay statistics into arbitrary groups. To enable this, delay
+statistics of a task are available both during its lifetime as well as on its
+exit, ensuring continuous and complete monitoring can be done.
+
+
+Interface
+---------
+
+Delay accounting uses the taskstats interface which is described
+in detail in a separate document in this directory. Taskstats returns a
+generic data structure to userspace corresponding to per-pid and per-tgid
+statistics. The delay accounting functionality populates specific fields of
+this structure. See
+
+     include/linux/taskstats.h
+
+for a description of the fields pertaining to delay accounting.
+It will generally be in the form of counters returning the cumulative
+delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
+
+Taking the difference of two successive readings of a given
+counter (say cpu_delay_total) for a task will give the delay
+experienced by the task waiting for the corresponding resource
+in that interval.
+
+When a task exits, records containing the per-task statistics
+are sent to userspace without requiring a command. If it is the last exiting
+task of a thread group, the per-tgid statistics are also sent. More details
+are given in the taskstats interface description.
+
+The getdelays.c userspace utility in tools/accounting directory allows simple
+commands to be run and the corresponding delay statistics to be displayed. It
+also serves as an example of using the taskstats interface.
+
+Usage
+-----
+
+Compile the kernel with::
+
+	CONFIG_TASK_DELAY_ACCT=y
+	CONFIG_TASKSTATS=y
+
+Delay accounting is enabled by default at boot up.
+To disable, add::
+
+   nodelayacct
+
+to the kernel boot options. The rest of the instructions
+below assume this has not been done.
+
+After the system has booted up, use a utility
+similar to  getdelays.c to access the delays
+seen by a given task or a task group (tgid).
+The utility also allows a given command to be
+executed and the corresponding delays to be
+seen.
+
+General format of the getdelays command::
+
+	getdelays [-t tgid] [-p pid] [-c cmd...]
+
+
+Get delays, since system boot, for pid 10::
+
+	# ./getdelays -p 10
+	(output similar to next case)
+
+Get sum of delays, since system boot, for all pids with tgid 5::
+
+	# ./getdelays -t 5
+
+
+	CPU	count	real total	virtual total	delay total
+		7876	92005750	100000000	24001500
+	IO	count	delay total
+		0	0
+	SWAP	count	delay total
+		0	0
+	RECLAIM	count	delay total
+		0	0
+
+Get delays seen in executing a given simple command::
+
+  # ./getdelays -c ls /
+
+  bin   data1  data3  data5  dev  home  media  opt   root  srv        sys  usr
+  boot  data2  data4  data6  etc  lib   mnt    proc  sbin  subdomain  tmp  var
+
+
+  CPU	count	real total	virtual total	delay total
+	6	4000250		4000000		0
+  IO	count	delay total
+	0	0
+  SWAP	count	delay total
+	0	0
+  RECLAIM	count	delay total
+	0	0
diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt
deleted file mode 100644
index 042ea59b5853..000000000000
--- a/Documentation/accounting/delay-accounting.txt
+++ /dev/null
@@ -1,117 +0,0 @@
-Delay accounting
-----------------
-
-Tasks encounter delays in execution when they wait
-for some kernel resource to become available e.g. a
-runnable task may wait for a free CPU to run on.
-
-The per-task delay accounting functionality measures
-the delays experienced by a task while
-
-a) waiting for a CPU (while being runnable)
-b) completion of synchronous block I/O initiated by the task
-c) swapping in pages
-d) memory reclaim
-
-and makes these statistics available to userspace through
-the taskstats interface.
-
-Such delays provide feedback for setting a task's cpu priority,
-io priority and rss limit values appropriately. Long delays for
-important tasks could be a trigger for raising its corresponding priority.
-
-The functionality, through its use of the taskstats interface, also provides
-delay statistics aggregated for all tasks (or threads) belonging to a
-thread group (corresponding to a traditional Unix process). This is a commonly
-needed aggregation that is more efficiently done by the kernel.
-
-Userspace utilities, particularly resource management applications, can also
-aggregate delay statistics into arbitrary groups. To enable this, delay
-statistics of a task are available both during its lifetime as well as on its
-exit, ensuring continuous and complete monitoring can be done.
-
-
-Interface
----------
-
-Delay accounting uses the taskstats interface which is described
-in detail in a separate document in this directory. Taskstats returns a
-generic data structure to userspace corresponding to per-pid and per-tgid
-statistics. The delay accounting functionality populates specific fields of
-this structure. See
-     include/linux/taskstats.h
-for a description of the fields pertaining to delay accounting.
-It will generally be in the form of counters returning the cumulative
-delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
-
-Taking the difference of two successive readings of a given
-counter (say cpu_delay_total) for a task will give the delay
-experienced by the task waiting for the corresponding resource
-in that interval.
-
-When a task exits, records containing the per-task statistics
-are sent to userspace without requiring a command. If it is the last exiting
-task of a thread group, the per-tgid statistics are also sent. More details
-are given in the taskstats interface description.
-
-The getdelays.c userspace utility in tools/accounting directory allows simple
-commands to be run and the corresponding delay statistics to be displayed. It
-also serves as an example of using the taskstats interface.
-
-Usage
------
-
-Compile the kernel with
-	CONFIG_TASK_DELAY_ACCT=y
-	CONFIG_TASKSTATS=y
-
-Delay accounting is enabled by default at boot up.
-To disable, add
-   nodelayacct
-to the kernel boot options. The rest of the instructions
-below assume this has not been done.
-
-After the system has booted up, use a utility
-similar to  getdelays.c to access the delays
-seen by a given task or a task group (tgid).
-The utility also allows a given command to be
-executed and the corresponding delays to be
-seen.
-
-General format of the getdelays command
-
-getdelays [-t tgid] [-p pid] [-c cmd...]
-
-
-Get delays, since system boot, for pid 10
-# ./getdelays -p 10
-(output similar to next case)
-
-Get sum of delays, since system boot, for all pids with tgid 5
-# ./getdelays -t 5
-
-
-CPU	count	real total	virtual total	delay total
-	7876	92005750	100000000	24001500
-IO	count	delay total
-	0	0
-SWAP	count	delay total
-	0	0
-RECLAIM	count	delay total
-	0	0
-
-Get delays seen in executing a given simple command
-# ./getdelays -c ls /
-
-bin   data1  data3  data5  dev  home  media  opt   root  srv        sys  usr
-boot  data2  data4  data6  etc  lib   mnt    proc  sbin  subdomain  tmp  var
-
-
-CPU	count	real total	virtual total	delay total
-	6	4000250		4000000		0
-IO	count	delay total
-	0	0
-SWAP	count	delay total
-	0	0
-RECLAIM	count	delay total
-	0	0
diff --git a/Documentation/accounting/index.rst b/Documentation/accounting/index.rst
new file mode 100644
index 000000000000..e1f6284b5ff3
--- /dev/null
+++ b/Documentation/accounting/index.rst
@@ -0,0 +1,14 @@
+:orphan:
+
+==========
+Accounting
+==========
+
+.. toctree::
+   :maxdepth: 1
+
+   cgroupstats
+   delay-accounting
+   psi
+   taskstats
+   taskstats-struct
diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst
new file mode 100644
index 000000000000..621111ce5740
--- /dev/null
+++ b/Documentation/accounting/psi.rst
@@ -0,0 +1,182 @@
+================================
+PSI - Pressure Stall Information
+================================
+
+:Date: April, 2018
+:Author: Johannes Weiner <hannes@cmpxchg.org>
+
+When CPU, memory or IO devices are contended, workloads experience
+latency spikes, throughput losses, and run the risk of OOM kills.
+
+Without an accurate measure of such contention, users are forced to
+either play it safe and under-utilize their hardware resources, or
+roll the dice and frequently suffer the disruptions resulting from
+excessive overcommit.
+
+The psi feature identifies and quantifies the disruptions caused by
+such resource crunches and the time impact it has on complex workloads
+or even entire systems.
+
+Having an accurate measure of productivity losses caused by resource
+scarcity aids users in sizing workloads to hardware--or provisioning
+hardware according to workload demand.
+
+As psi aggregates this information in realtime, systems can be managed
+dynamically using techniques such as load shedding, migrating jobs to
+other systems or data centers, or strategically pausing or killing low
+priority or restartable batch jobs.
+
+This allows maximizing hardware utilization without sacrificing
+workload health or risking major disruptions such as OOM kills.
+
+Pressure interface
+==================
+
+Pressure information for each resource is exported through the
+respective file in /proc/pressure/ -- cpu, memory, and io.
+
+The format for CPU is as such::
+
+	some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+and for memory and IO::
+
+	some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+	full avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+The "some" line indicates the share of time in which at least some
+tasks are stalled on a given resource.
+
+The "full" line indicates the share of time in which all non-idle
+tasks are stalled on a given resource simultaneously. In this state
+actual CPU cycles are going to waste, and a workload that spends
+extended time in this state is considered to be thrashing. This has
+severe impact on performance, and it's useful to distinguish this
+situation from a state where some tasks are stalled but the CPU is
+still doing productive work. As such, time spent in this subset of the
+stall state is tracked separately and exported in the "full" averages.
+
+The ratios (in %) are tracked as recent trends over ten, sixty, and
+three hundred second windows, which gives insight into short term events
+as well as medium and long term trends. The total absolute stall time
+(in us) is tracked and exported as well, to allow detection of latency
+spikes which wouldn't necessarily make a dent in the time averages,
+or to average trends over custom time frames.
+
+Monitoring for pressure thresholds
+==================================
+
+Users can register triggers and use poll() to be woken up when resource
+pressure exceeds certain thresholds.
+
+A trigger describes the maximum cumulative stall time over a specific
+time window, e.g. 100ms of total stall time within any 500ms window to
+generate a wakeup event.
+
+To register a trigger user has to open psi interface file under
+/proc/pressure/ representing the resource to be monitored and write the
+desired threshold and time window. The open file descriptor should be
+used to wait for trigger events using select(), poll() or epoll().
+The following format is used::
+
+	<some|full> <stall amount in us> <time window in us>
+
+For example writing "some 150000 1000000" into /proc/pressure/memory
+would add 150ms threshold for partial memory stall measured within
+1sec time window. Writing "full 50000 1000000" into /proc/pressure/io
+would add 50ms threshold for full io stall measured within 1sec time window.
+
+Triggers can be set on more than one psi metric and more than one trigger
+for the same psi metric can be specified. However for each trigger a separate
+file descriptor is required to be able to poll it separately from others,
+therefore for each trigger a separate open() syscall should be made even
+when opening the same psi interface file.
+
+Monitors activate only when system enters stall state for the monitored
+psi metric and deactivates upon exit from the stall state. While system is
+in the stall state psi signal growth is monitored at a rate of 10 times per
+tracking window.
+
+The kernel accepts window sizes ranging from 500ms to 10s, therefore min
+monitoring update interval is 50ms and max is 1s. Min limit is set to
+prevent overly frequent polling. Max limit is chosen as a high enough number
+after which monitors are most likely not needed and psi averages can be used
+instead.
+
+When activated, psi monitor stays active for at least the duration of one
+tracking window to avoid repeated activations/deactivations when system is
+bouncing in and out of the stall state.
+
+Notifications to the userspace are rate-limited to one per tracking window.
+
+The trigger will de-register when the file descriptor used to define the
+trigger  is closed.
+
+Userspace monitor usage example
+===============================
+
+::
+
+  #include <errno.h>
+  #include <fcntl.h>
+  #include <stdio.h>
+  #include <poll.h>
+  #include <string.h>
+  #include <unistd.h>
+
+  /*
+   * Monitor memory partial stall with 1s tracking window size
+   * and 150ms threshold.
+   */
+  int main() {
+	const char trig[] = "some 150000 1000000";
+	struct pollfd fds;
+	int n;
+
+	fds.fd = open("/proc/pressure/memory", O_RDWR | O_NONBLOCK);
+	if (fds.fd < 0) {
+		printf("/proc/pressure/memory open error: %s\n",
+			strerror(errno));
+		return 1;
+	}
+	fds.events = POLLPRI;
+
+	if (write(fds.fd, trig, strlen(trig) + 1) < 0) {
+		printf("/proc/pressure/memory write error: %s\n",
+			strerror(errno));
+		return 1;
+	}
+
+	printf("waiting for events...\n");
+	while (1) {
+		n = poll(&fds, 1, -1);
+		if (n < 0) {
+			printf("poll error: %s\n", strerror(errno));
+			return 1;
+		}
+		if (fds.revents & POLLERR) {
+			printf("got POLLERR, event source is gone\n");
+			return 0;
+		}
+		if (fds.revents & POLLPRI) {
+			printf("event triggered!\n");
+		} else {
+			printf("unknown event received: 0x%x\n", fds.revents);
+			return 1;
+		}
+	}
+
+	return 0;
+  }
+
+Cgroup2 interface
+=================
+
+In a system with a CONFIG_CGROUP=y kernel and the cgroup2 filesystem
+mounted, pressure stall information is also tracked for tasks grouped
+into cgroups. Each subdirectory in the cgroupfs mountpoint contains
+cpu.pressure, memory.pressure, and io.pressure files; the format is
+the same as the /proc/pressure/ files.
+
+Per-cgroup psi monitors can be specified and used the same way as
+system-wide ones.
diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.txt
deleted file mode 100644
index 5cbe5659e3b7..000000000000
--- a/Documentation/accounting/psi.txt
+++ /dev/null
@@ -1,180 +0,0 @@
-================================
-PSI - Pressure Stall Information
-================================
-
-:Date: April, 2018
-:Author: Johannes Weiner <hannes@cmpxchg.org>
-
-When CPU, memory or IO devices are contended, workloads experience
-latency spikes, throughput losses, and run the risk of OOM kills.
-
-Without an accurate measure of such contention, users are forced to
-either play it safe and under-utilize their hardware resources, or
-roll the dice and frequently suffer the disruptions resulting from
-excessive overcommit.
-
-The psi feature identifies and quantifies the disruptions caused by
-such resource crunches and the time impact it has on complex workloads
-or even entire systems.
-
-Having an accurate measure of productivity losses caused by resource
-scarcity aids users in sizing workloads to hardware--or provisioning
-hardware according to workload demand.
-
-As psi aggregates this information in realtime, systems can be managed
-dynamically using techniques such as load shedding, migrating jobs to
-other systems or data centers, or strategically pausing or killing low
-priority or restartable batch jobs.
-
-This allows maximizing hardware utilization without sacrificing
-workload health or risking major disruptions such as OOM kills.
-
-Pressure interface
-==================
-
-Pressure information for each resource is exported through the
-respective file in /proc/pressure/ -- cpu, memory, and io.
-
-The format for CPU is as such:
-
-some avg10=0.00 avg60=0.00 avg300=0.00 total=0
-
-and for memory and IO:
-
-some avg10=0.00 avg60=0.00 avg300=0.00 total=0
-full avg10=0.00 avg60=0.00 avg300=0.00 total=0
-
-The "some" line indicates the share of time in which at least some
-tasks are stalled on a given resource.
-
-The "full" line indicates the share of time in which all non-idle
-tasks are stalled on a given resource simultaneously. In this state
-actual CPU cycles are going to waste, and a workload that spends
-extended time in this state is considered to be thrashing. This has
-severe impact on performance, and it's useful to distinguish this
-situation from a state where some tasks are stalled but the CPU is
-still doing productive work. As such, time spent in this subset of the
-stall state is tracked separately and exported in the "full" averages.
-
-The ratios (in %) are tracked as recent trends over ten, sixty, and
-three hundred second windows, which gives insight into short term events
-as well as medium and long term trends. The total absolute stall time
-(in us) is tracked and exported as well, to allow detection of latency
-spikes which wouldn't necessarily make a dent in the time averages,
-or to average trends over custom time frames.
-
-Monitoring for pressure thresholds
-==================================
-
-Users can register triggers and use poll() to be woken up when resource
-pressure exceeds certain thresholds.
-
-A trigger describes the maximum cumulative stall time over a specific
-time window, e.g. 100ms of total stall time within any 500ms window to
-generate a wakeup event.
-
-To register a trigger user has to open psi interface file under
-/proc/pressure/ representing the resource to be monitored and write the
-desired threshold and time window. The open file descriptor should be
-used to wait for trigger events using select(), poll() or epoll().
-The following format is used:
-
-<some|full> <stall amount in us> <time window in us>
-
-For example writing "some 150000 1000000" into /proc/pressure/memory
-would add 150ms threshold for partial memory stall measured within
-1sec time window. Writing "full 50000 1000000" into /proc/pressure/io
-would add 50ms threshold for full io stall measured within 1sec time window.
-
-Triggers can be set on more than one psi metric and more than one trigger
-for the same psi metric can be specified. However for each trigger a separate
-file descriptor is required to be able to poll it separately from others,
-therefore for each trigger a separate open() syscall should be made even
-when opening the same psi interface file.
-
-Monitors activate only when system enters stall state for the monitored
-psi metric and deactivates upon exit from the stall state. While system is
-in the stall state psi signal growth is monitored at a rate of 10 times per
-tracking window.
-
-The kernel accepts window sizes ranging from 500ms to 10s, therefore min
-monitoring update interval is 50ms and max is 1s. Min limit is set to
-prevent overly frequent polling. Max limit is chosen as a high enough number
-after which monitors are most likely not needed and psi averages can be used
-instead.
-
-When activated, psi monitor stays active for at least the duration of one
-tracking window to avoid repeated activations/deactivations when system is
-bouncing in and out of the stall state.
-
-Notifications to the userspace are rate-limited to one per tracking window.
-
-The trigger will de-register when the file descriptor used to define the
-trigger  is closed.
-
-Userspace monitor usage example
-===============================
-
-#include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <poll.h>
-#include <string.h>
-#include <unistd.h>
-
-/*
- * Monitor memory partial stall with 1s tracking window size
- * and 150ms threshold.
- */
-int main() {
-	const char trig[] = "some 150000 1000000";
-	struct pollfd fds;
-	int n;
-
-	fds.fd = open("/proc/pressure/memory", O_RDWR | O_NONBLOCK);
-	if (fds.fd < 0) {
-		printf("/proc/pressure/memory open error: %s\n",
-			strerror(errno));
-		return 1;
-	}
-	fds.events = POLLPRI;
-
-	if (write(fds.fd, trig, strlen(trig) + 1) < 0) {
-		printf("/proc/pressure/memory write error: %s\n",
-			strerror(errno));
-		return 1;
-	}
-
-	printf("waiting for events...\n");
-	while (1) {
-		n = poll(&fds, 1, -1);
-		if (n < 0) {
-			printf("poll error: %s\n", strerror(errno));
-			return 1;
-		}
-		if (fds.revents & POLLERR) {
-			printf("got POLLERR, event source is gone\n");
-			return 0;
-		}
-		if (fds.revents & POLLPRI) {
-			printf("event triggered!\n");
-		} else {
-			printf("unknown event received: 0x%x\n", fds.revents);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-Cgroup2 interface
-=================
-
-In a system with a CONFIG_CGROUP=y kernel and the cgroup2 filesystem
-mounted, pressure stall information is also tracked for tasks grouped
-into cgroups. Each subdirectory in the cgroupfs mountpoint contains
-cpu.pressure, memory.pressure, and io.pressure files; the format is
-the same as the /proc/pressure/ files.
-
-Per-cgroup psi monitors can be specified and used the same way as
-system-wide ones.
diff --git a/Documentation/accounting/taskstats-struct.rst b/Documentation/accounting/taskstats-struct.rst
new file mode 100644
index 000000000000..ca90fd489c9a
--- /dev/null
+++ b/Documentation/accounting/taskstats-struct.rst
@@ -0,0 +1,199 @@
+====================
+The struct taskstats
+====================
+
+This document contains an explanation of the struct taskstats fields.
+
+There are three different groups of fields in the struct taskstats:
+
+1) Common and basic accounting fields
+    If CONFIG_TASKSTATS is set, the taskstats interface is enabled and
+    the common fields and basic accounting fields are collected for
+    delivery at do_exit() of a task.
+2) Delay accounting fields
+    These fields are placed between::
+
+	/* Delay accounting fields start */
+
+    and::
+
+	/* Delay accounting fields end */
+
+    Their values are collected if CONFIG_TASK_DELAY_ACCT is set.
+3) Extended accounting fields
+    These fields are placed between::
+
+	/* Extended accounting fields start */
+
+    and::
+
+	/* Extended accounting fields end */
+
+    Their values are collected if CONFIG_TASK_XACCT is set.
+
+4) Per-task and per-thread context switch count statistics
+
+5) Time accounting for SMT machines
+
+6) Extended delay accounting fields for memory reclaim
+
+Future extension should add fields to the end of the taskstats struct, and
+should not change the relative position of each field within the struct.
+
+::
+
+  struct taskstats {
+
+1) Common and basic accounting fields::
+
+	/* The version number of this struct. This field is always set to
+	 * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
+	 * Each time the struct is changed, the value should be incremented.
+	 */
+	__u16	version;
+
+	/* The exit code of a task. */
+	__u32	ac_exitcode;		/* Exit status */
+
+	/* The accounting flags of a task as defined in <linux/acct.h>
+	 * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG.
+	 */
+	__u8	ac_flag;		/* Record flags */
+
+	/* The value of task_nice() of a task. */
+	__u8	ac_nice;		/* task_nice */
+
+	/* The name of the command that started this task. */
+	char	ac_comm[TS_COMM_LEN];	/* Command name */
+
+	/* The scheduling discipline as set in task->policy field. */
+	__u8	ac_sched;		/* Scheduling discipline */
+
+	__u8	ac_pad[3];
+	__u32	ac_uid;			/* User ID */
+	__u32	ac_gid;			/* Group ID */
+	__u32	ac_pid;			/* Process ID */
+	__u32	ac_ppid;		/* Parent process ID */
+
+	/* The time when a task begins, in [secs] since 1970. */
+	__u32	ac_btime;		/* Begin time [sec since 1970] */
+
+	/* The elapsed time of a task, in [usec]. */
+	__u64	ac_etime;		/* Elapsed time [usec] */
+
+	/* The user CPU time of a task, in [usec]. */
+	__u64	ac_utime;		/* User CPU time [usec] */
+
+	/* The system CPU time of a task, in [usec]. */
+	__u64	ac_stime;		/* System CPU time [usec] */
+
+	/* The minor page fault count of a task, as set in task->min_flt. */
+	__u64	ac_minflt;		/* Minor Page Fault Count */
+
+	/* The major page fault count of a task, as set in task->maj_flt. */
+	__u64	ac_majflt;		/* Major Page Fault Count */
+
+
+2) Delay accounting fields::
+
+	/* Delay accounting fields start
+	 *
+	 * All values, until the comment "Delay accounting fields end" are
+	 * available only if delay accounting is enabled, even though the last
+	 * few fields are not delays
+	 *
+	 * xxx_count is the number of delay values recorded
+	 * xxx_delay_total is the corresponding cumulative delay in nanoseconds
+	 *
+	 * xxx_delay_total wraps around to zero on overflow
+	 * xxx_count incremented regardless of overflow
+	 */
+
+	/* Delay waiting for cpu, while runnable
+	 * count, delay_total NOT updated atomically
+	 */
+	__u64	cpu_count;
+	__u64	cpu_delay_total;
+
+	/* Following four fields atomically updated using task->delays->lock */
+
+	/* Delay waiting for synchronous block I/O to complete
+	 * does not account for delays in I/O submission
+	 */
+	__u64	blkio_count;
+	__u64	blkio_delay_total;
+
+	/* Delay waiting for page fault I/O (swap in only) */
+	__u64	swapin_count;
+	__u64	swapin_delay_total;
+
+	/* cpu "wall-clock" running time
+	 * On some architectures, value will adjust for cpu time stolen
+	 * from the kernel in involuntary waits due to virtualization.
+	 * Value is cumulative, in nanoseconds, without a corresponding count
+	 * and wraps around to zero silently on overflow
+	 */
+	__u64	cpu_run_real_total;
+
+	/* cpu "virtual" running time
+	 * Uses time intervals seen by the kernel i.e. no adjustment
+	 * for kernel's involuntary waits due to virtualization.
+	 * Value is cumulative, in nanoseconds, without a corresponding count
+	 * and wraps around to zero silently on overflow
+	 */
+	__u64	cpu_run_virtual_total;
+	/* Delay accounting fields end */
+	/* version 1 ends here */
+
+
+3) Extended accounting fields::
+
+	/* Extended accounting fields start */
+
+	/* Accumulated RSS usage in duration of a task, in MBytes-usecs.
+	 * The current rss usage is added to this counter every time
+	 * a tick is charged to a task's system time. So, at the end we
+	 * will have memory usage multiplied by system time. Thus an
+	 * average usage per system time unit can be calculated.
+	 */
+	__u64	coremem;		/* accumulated RSS usage in MB-usec */
+
+	/* Accumulated virtual memory usage in duration of a task.
+	 * Same as acct_rss_mem1 above except that we keep track of VM usage.
+	 */
+	__u64	virtmem;		/* accumulated VM usage in MB-usec */
+
+	/* High watermark of RSS usage in duration of a task, in KBytes. */
+	__u64	hiwater_rss;		/* High-watermark of RSS usage */
+
+	/* High watermark of VM  usage in duration of a task, in KBytes. */
+	__u64	hiwater_vm;		/* High-water virtual memory usage */
+
+	/* The following four fields are I/O statistics of a task. */
+	__u64	read_char;		/* bytes read */
+	__u64	write_char;		/* bytes written */
+	__u64	read_syscalls;		/* read syscalls */
+	__u64	write_syscalls;		/* write syscalls */
+
+	/* Extended accounting fields end */
+
+4) Per-task and per-thread statistics::
+
+	__u64	nvcsw;			/* Context voluntary switch counter */
+	__u64	nivcsw;			/* Context involuntary switch counter */
+
+5) Time accounting for SMT machines::
+
+	__u64	ac_utimescaled;		/* utime scaled on frequency etc */
+	__u64	ac_stimescaled;		/* stime scaled on frequency etc */
+	__u64	cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+6) Extended delay accounting fields for memory reclaim::
+
+	/* Delay waiting for memory reclaim */
+	__u64	freepages_count;
+	__u64	freepages_delay_total;
+
+::
+
+  }
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
deleted file mode 100644
index e7512c061c15..000000000000
--- a/Documentation/accounting/taskstats-struct.txt
+++ /dev/null
@@ -1,180 +0,0 @@
-The struct taskstats
---------------------
-
-This document contains an explanation of the struct taskstats fields.
-
-There are three different groups of fields in the struct taskstats:
-
-1) Common and basic accounting fields
-    If CONFIG_TASKSTATS is set, the taskstats interface is enabled and
-    the common fields and basic accounting fields are collected for
-    delivery at do_exit() of a task.
-2) Delay accounting fields
-    These fields are placed between
-    /* Delay accounting fields start */
-    and
-    /* Delay accounting fields end */
-    Their values are collected if CONFIG_TASK_DELAY_ACCT is set.
-3) Extended accounting fields
-    These fields are placed between
-    /* Extended accounting fields start */
-    and
-    /* Extended accounting fields end */
-    Their values are collected if CONFIG_TASK_XACCT is set.
-
-4) Per-task and per-thread context switch count statistics
-
-5) Time accounting for SMT machines
-
-6) Extended delay accounting fields for memory reclaim
-
-Future extension should add fields to the end of the taskstats struct, and
-should not change the relative position of each field within the struct.
-
-
-struct taskstats {
-
-1) Common and basic accounting fields:
-	/* The version number of this struct. This field is always set to
-	 * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
-	 * Each time the struct is changed, the value should be incremented.
-	 */
-	__u16	version;
-
-  	/* The exit code of a task. */
-	__u32	ac_exitcode;		/* Exit status */
-
-  	/* The accounting flags of a task as defined in <linux/acct.h>
-	 * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG.
-	 */
-	__u8	ac_flag;		/* Record flags */
-
-  	/* The value of task_nice() of a task. */
-	__u8	ac_nice;		/* task_nice */
-
-  	/* The name of the command that started this task. */
-	char	ac_comm[TS_COMM_LEN];	/* Command name */
-
-  	/* The scheduling discipline as set in task->policy field. */
-	__u8	ac_sched;		/* Scheduling discipline */
-
-	__u8	ac_pad[3];
-	__u32	ac_uid;			/* User ID */
-	__u32	ac_gid;			/* Group ID */
-	__u32	ac_pid;			/* Process ID */
-	__u32	ac_ppid;		/* Parent process ID */
-
-  	/* The time when a task begins, in [secs] since 1970. */
-	__u32	ac_btime;		/* Begin time [sec since 1970] */
-
-  	/* The elapsed time of a task, in [usec]. */
-	__u64	ac_etime;		/* Elapsed time [usec] */
-
-  	/* The user CPU time of a task, in [usec]. */
-	__u64	ac_utime;		/* User CPU time [usec] */
-
-  	/* The system CPU time of a task, in [usec]. */
-	__u64	ac_stime;		/* System CPU time [usec] */
-
-  	/* The minor page fault count of a task, as set in task->min_flt. */
-	__u64	ac_minflt;		/* Minor Page Fault Count */
-
-	/* The major page fault count of a task, as set in task->maj_flt. */
-	__u64	ac_majflt;		/* Major Page Fault Count */
-
-
-2) Delay accounting fields:
-	/* Delay accounting fields start
-	 *
-	 * All values, until the comment "Delay accounting fields end" are
-	 * available only if delay accounting is enabled, even though the last
-	 * few fields are not delays
-	 *
-	 * xxx_count is the number of delay values recorded
-	 * xxx_delay_total is the corresponding cumulative delay in nanoseconds
-	 *
-	 * xxx_delay_total wraps around to zero on overflow
-	 * xxx_count incremented regardless of overflow
-	 */
-
-	/* Delay waiting for cpu, while runnable
-	 * count, delay_total NOT updated atomically
-	 */
-	__u64	cpu_count;
-	__u64	cpu_delay_total;
-
-	/* Following four fields atomically updated using task->delays->lock */
-
-	/* Delay waiting for synchronous block I/O to complete
-	 * does not account for delays in I/O submission
-	 */
-	__u64	blkio_count;
-	__u64	blkio_delay_total;
-
-	/* Delay waiting for page fault I/O (swap in only) */
-	__u64	swapin_count;
-	__u64	swapin_delay_total;
-
-	/* cpu "wall-clock" running time
-	 * On some architectures, value will adjust for cpu time stolen
-	 * from the kernel in involuntary waits due to virtualization.
-	 * Value is cumulative, in nanoseconds, without a corresponding count
-	 * and wraps around to zero silently on overflow
-	 */
-	__u64	cpu_run_real_total;
-
-	/* cpu "virtual" running time
-	 * Uses time intervals seen by the kernel i.e. no adjustment
-	 * for kernel's involuntary waits due to virtualization.
-	 * Value is cumulative, in nanoseconds, without a corresponding count
-	 * and wraps around to zero silently on overflow
-	 */
-	__u64	cpu_run_virtual_total;
-	/* Delay accounting fields end */
-	/* version 1 ends here */
-
-
-3) Extended accounting fields
-	/* Extended accounting fields start */
-
-	/* Accumulated RSS usage in duration of a task, in MBytes-usecs.
-	 * The current rss usage is added to this counter every time
-	 * a tick is charged to a task's system time. So, at the end we
-	 * will have memory usage multiplied by system time. Thus an
-	 * average usage per system time unit can be calculated.
-	 */
-	__u64	coremem;		/* accumulated RSS usage in MB-usec */
-
-  	/* Accumulated virtual memory usage in duration of a task.
-	 * Same as acct_rss_mem1 above except that we keep track of VM usage.
-	 */
-	__u64	virtmem;		/* accumulated VM usage in MB-usec */
-
-  	/* High watermark of RSS usage in duration of a task, in KBytes. */
-	__u64	hiwater_rss;		/* High-watermark of RSS usage */
-
-  	/* High watermark of VM  usage in duration of a task, in KBytes. */
-	__u64	hiwater_vm;		/* High-water virtual memory usage */
-
-	/* The following four fields are I/O statistics of a task. */
-	__u64	read_char;		/* bytes read */
-	__u64	write_char;		/* bytes written */
-	__u64	read_syscalls;		/* read syscalls */
-	__u64	write_syscalls;		/* write syscalls */
-
-	/* Extended accounting fields end */
-
-4) Per-task and per-thread statistics
-	__u64	nvcsw;			/* Context voluntary switch counter */
-	__u64	nivcsw;			/* Context involuntary switch counter */
-
-5) Time accounting for SMT machines
-	__u64	ac_utimescaled;		/* utime scaled on frequency etc */
-	__u64	ac_stimescaled;		/* stime scaled on frequency etc */
-	__u64	cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
-
-6) Extended delay accounting fields for memory reclaim
-	/* Delay waiting for memory reclaim */
-	__u64	freepages_count;
-	__u64	freepages_delay_total;
-}
diff --git a/Documentation/accounting/taskstats.rst b/Documentation/accounting/taskstats.rst
new file mode 100644
index 000000000000..2a28b7f55c10
--- /dev/null
+++ b/Documentation/accounting/taskstats.rst
@@ -0,0 +1,180 @@
+=============================
+Per-task statistics interface
+=============================
+
+
+Taskstats is a netlink-based interface for sending per-task and
+per-process statistics from the kernel to userspace.
+
+Taskstats was designed for the following benefits:
+
+- efficiently provide statistics during lifetime of a task and on its exit
+- unified interface for multiple accounting subsystems
+- extensibility for use by future accounting patches
+
+Terminology
+-----------
+
+"pid", "tid" and "task" are used interchangeably and refer to the standard
+Linux task defined by struct task_struct.  per-pid stats are the same as
+per-task stats.
+
+"tgid", "process" and "thread group" are used interchangeably and refer to the
+tasks that share an mm_struct i.e. the traditional Unix process. Despite the
+use of tgid, there is no special treatment for the task that is thread group
+leader - a process is deemed alive as long as it has any task belonging to it.
+
+Usage
+-----
+
+To get statistics during a task's lifetime, userspace opens a unicast netlink
+socket (NETLINK_GENERIC family) and sends commands specifying a pid or a tgid.
+The response contains statistics for a task (if pid is specified) or the sum of
+statistics for all tasks of the process (if tgid is specified).
+
+To obtain statistics for tasks which are exiting, the userspace listener
+sends a register command and specifies a cpumask. Whenever a task exits on
+one of the cpus in the cpumask, its per-pid statistics are sent to the
+registered listener. Using cpumasks allows the data received by one listener
+to be limited and assists in flow control over the netlink interface and is
+explained in more detail below.
+
+If the exiting task is the last thread exiting its thread group,
+an additional record containing the per-tgid stats is also sent to userspace.
+The latter contains the sum of per-pid stats for all threads in the thread
+group, both past and present.
+
+getdelays.c is a simple utility demonstrating usage of the taskstats interface
+for reporting delay accounting statistics. Users can register cpumasks,
+send commands and process responses, listen for per-tid/tgid exit data,
+write the data received to a file and do basic flow control by increasing
+receive buffer sizes.
+
+Interface
+---------
+
+The user-kernel interface is encapsulated in include/linux/taskstats.h
+
+To avoid this documentation becoming obsolete as the interface evolves, only
+an outline of the current version is given. taskstats.h always overrides the
+description here.
+
+struct taskstats is the common accounting structure for both per-pid and
+per-tgid data. It is versioned and can be extended by each accounting subsystem
+that is added to the kernel. The fields and their semantics are defined in the
+taskstats.h file.
+
+The data exchanged between user and kernel space is a netlink message belonging
+to the NETLINK_GENERIC family and using the netlink attributes interface.
+The messages are in the format::
+
+    +----------+- - -+-------------+-------------------+
+    | nlmsghdr | Pad |  genlmsghdr | taskstats payload |
+    +----------+- - -+-------------+-------------------+
+
+
+The taskstats payload is one of the following three kinds:
+
+1. Commands: Sent from user to kernel. Commands to get data on
+a pid/tgid consist of one attribute, of type TASKSTATS_CMD_ATTR_PID/TGID,
+containing a u32 pid or tgid in the attribute payload. The pid/tgid denotes
+the task/process for which userspace wants statistics.
+
+Commands to register/deregister interest in exit data from a set of cpus
+consist of one attribute, of type
+TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK and contain a cpumask in the
+attribute payload. The cpumask is specified as an ascii string of
+comma-separated cpu ranges e.g. to listen to exit data from cpus 1,2,3,5,7,8
+the cpumask would be "1-3,5,7-8". If userspace forgets to deregister interest
+in cpus before closing the listening socket, the kernel cleans up its interest
+set over time. However, for the sake of efficiency, an explicit deregistration
+is advisable.
+
+2. Response for a command: sent from the kernel in response to a userspace
+command. The payload is a series of three attributes of type:
+
+a) TASKSTATS_TYPE_AGGR_PID/TGID : attribute containing no payload but indicates
+a pid/tgid will be followed by some stats.
+
+b) TASKSTATS_TYPE_PID/TGID: attribute whose payload is the pid/tgid whose stats
+are being returned.
+
+c) TASKSTATS_TYPE_STATS: attribute with a struct taskstats as payload. The
+same structure is used for both per-pid and per-tgid stats.
+
+3. New message sent by kernel whenever a task exits. The payload consists of a
+   series of attributes of the following type:
+
+a) TASKSTATS_TYPE_AGGR_PID: indicates next two attributes will be pid+stats
+b) TASKSTATS_TYPE_PID: contains exiting task's pid
+c) TASKSTATS_TYPE_STATS: contains the exiting task's per-pid stats
+d) TASKSTATS_TYPE_AGGR_TGID: indicates next two attributes will be tgid+stats
+e) TASKSTATS_TYPE_TGID: contains tgid of process to which task belongs
+f) TASKSTATS_TYPE_STATS: contains the per-tgid stats for exiting task's process
+
+
+per-tgid stats
+--------------
+
+Taskstats provides per-process stats, in addition to per-task stats, since
+resource management is often done at a process granularity and aggregating task
+stats in userspace alone is inefficient and potentially inaccurate (due to lack
+of atomicity).
+
+However, maintaining per-process, in addition to per-task stats, within the
+kernel has space and time overheads. To address this, the taskstats code
+accumulates each exiting task's statistics into a process-wide data structure.
+When the last task of a process exits, the process level data accumulated also
+gets sent to userspace (along with the per-task data).
+
+When a user queries to get per-tgid data, the sum of all other live threads in
+the group is added up and added to the accumulated total for previously exited
+threads of the same thread group.
+
+Extending taskstats
+-------------------
+
+There are two ways to extend the taskstats interface to export more
+per-task/process stats as patches to collect them get added to the kernel
+in future:
+
+1. Adding more fields to the end of the existing struct taskstats. Backward
+   compatibility is ensured by the version number within the
+   structure. Userspace will use only the fields of the struct that correspond
+   to the version its using.
+
+2. Defining separate statistic structs and using the netlink attributes
+   interface to return them. Since userspace processes each netlink attribute
+   independently, it can always ignore attributes whose type it does not
+   understand (because it is using an older version of the interface).
+
+
+Choosing between 1. and 2. is a matter of trading off flexibility and
+overhead. If only a few fields need to be added, then 1. is the preferable
+path since the kernel and userspace don't need to incur the overhead of
+processing new netlink attributes. But if the new fields expand the existing
+struct too much, requiring disparate userspace accounting utilities to
+unnecessarily receive large structures whose fields are of no interest, then
+extending the attributes structure would be worthwhile.
+
+Flow control for taskstats
+--------------------------
+
+When the rate of task exits becomes large, a listener may not be able to keep
+up with the kernel's rate of sending per-tid/tgid exit data leading to data
+loss. This possibility gets compounded when the taskstats structure gets
+extended and the number of cpus grows large.
+
+To avoid losing statistics, userspace should do one or more of the following:
+
+- increase the receive buffer sizes for the netlink sockets opened by
+  listeners to receive exit data.
+
+- create more listeners and reduce the number of cpus being listened to by
+  each listener. In the extreme case, there could be one listener for each cpu.
+  Users may also consider setting the cpu affinity of the listener to the subset
+  of cpus to which it listens, especially if they are listening to just one cpu.
+
+Despite these measures, if the userspace receives ENOBUFS error messages
+indicated overflow of receive buffers, it should take measures to handle the
+loss of data.
diff --git a/Documentation/accounting/taskstats.txt b/Documentation/accounting/taskstats.txt
deleted file mode 100644
index ff06b738bb88..000000000000
--- a/Documentation/accounting/taskstats.txt
+++ /dev/null
@@ -1,181 +0,0 @@
-Per-task statistics interface
------------------------------
-
-
-Taskstats is a netlink-based interface for sending per-task and
-per-process statistics from the kernel to userspace.
-
-Taskstats was designed for the following benefits:
-
-- efficiently provide statistics during lifetime of a task and on its exit
-- unified interface for multiple accounting subsystems
-- extensibility for use by future accounting patches
-
-Terminology
------------
-
-"pid", "tid" and "task" are used interchangeably and refer to the standard
-Linux task defined by struct task_struct.  per-pid stats are the same as
-per-task stats.
-
-"tgid", "process" and "thread group" are used interchangeably and refer to the
-tasks that share an mm_struct i.e. the traditional Unix process. Despite the
-use of tgid, there is no special treatment for the task that is thread group
-leader - a process is deemed alive as long as it has any task belonging to it.
-
-Usage
------
-
-To get statistics during a task's lifetime, userspace opens a unicast netlink
-socket (NETLINK_GENERIC family) and sends commands specifying a pid or a tgid.
-The response contains statistics for a task (if pid is specified) or the sum of
-statistics for all tasks of the process (if tgid is specified).
-
-To obtain statistics for tasks which are exiting, the userspace listener
-sends a register command and specifies a cpumask. Whenever a task exits on
-one of the cpus in the cpumask, its per-pid statistics are sent to the
-registered listener. Using cpumasks allows the data received by one listener
-to be limited and assists in flow control over the netlink interface and is
-explained in more detail below.
-
-If the exiting task is the last thread exiting its thread group,
-an additional record containing the per-tgid stats is also sent to userspace.
-The latter contains the sum of per-pid stats for all threads in the thread
-group, both past and present.
-
-getdelays.c is a simple utility demonstrating usage of the taskstats interface
-for reporting delay accounting statistics. Users can register cpumasks,
-send commands and process responses, listen for per-tid/tgid exit data,
-write the data received to a file and do basic flow control by increasing
-receive buffer sizes.
-
-Interface
----------
-
-The user-kernel interface is encapsulated in include/linux/taskstats.h
-
-To avoid this documentation becoming obsolete as the interface evolves, only
-an outline of the current version is given. taskstats.h always overrides the
-description here.
-
-struct taskstats is the common accounting structure for both per-pid and
-per-tgid data. It is versioned and can be extended by each accounting subsystem
-that is added to the kernel. The fields and their semantics are defined in the
-taskstats.h file.
-
-The data exchanged between user and kernel space is a netlink message belonging
-to the NETLINK_GENERIC family and using the netlink attributes interface.
-The messages are in the format
-
-    +----------+- - -+-------------+-------------------+
-    | nlmsghdr | Pad |  genlmsghdr | taskstats payload |
-    +----------+- - -+-------------+-------------------+
-
-
-The taskstats payload is one of the following three kinds:
-
-1. Commands: Sent from user to kernel. Commands to get data on
-a pid/tgid consist of one attribute, of type TASKSTATS_CMD_ATTR_PID/TGID,
-containing a u32 pid or tgid in the attribute payload. The pid/tgid denotes
-the task/process for which userspace wants statistics.
-
-Commands to register/deregister interest in exit data from a set of cpus
-consist of one attribute, of type
-TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK and contain a cpumask in the
-attribute payload. The cpumask is specified as an ascii string of
-comma-separated cpu ranges e.g. to listen to exit data from cpus 1,2,3,5,7,8
-the cpumask would be "1-3,5,7-8". If userspace forgets to deregister interest
-in cpus before closing the listening socket, the kernel cleans up its interest
-set over time. However, for the sake of efficiency, an explicit deregistration
-is advisable.
-
-2. Response for a command: sent from the kernel in response to a userspace
-command. The payload is a series of three attributes of type:
-
-a) TASKSTATS_TYPE_AGGR_PID/TGID : attribute containing no payload but indicates
-a pid/tgid will be followed by some stats.
-
-b) TASKSTATS_TYPE_PID/TGID: attribute whose payload is the pid/tgid whose stats
-are being returned.
-
-c) TASKSTATS_TYPE_STATS: attribute with a struct taskstats as payload. The
-same structure is used for both per-pid and per-tgid stats.
-
-3. New message sent by kernel whenever a task exits. The payload consists of a
-   series of attributes of the following type:
-
-a) TASKSTATS_TYPE_AGGR_PID: indicates next two attributes will be pid+stats
-b) TASKSTATS_TYPE_PID: contains exiting task's pid
-c) TASKSTATS_TYPE_STATS: contains the exiting task's per-pid stats
-d) TASKSTATS_TYPE_AGGR_TGID: indicates next two attributes will be tgid+stats
-e) TASKSTATS_TYPE_TGID: contains tgid of process to which task belongs
-f) TASKSTATS_TYPE_STATS: contains the per-tgid stats for exiting task's process
-
-
-per-tgid stats
---------------
-
-Taskstats provides per-process stats, in addition to per-task stats, since
-resource management is often done at a process granularity and aggregating task
-stats in userspace alone is inefficient and potentially inaccurate (due to lack
-of atomicity).
-
-However, maintaining per-process, in addition to per-task stats, within the
-kernel has space and time overheads. To address this, the taskstats code
-accumulates each exiting task's statistics into a process-wide data structure.
-When the last task of a process exits, the process level data accumulated also
-gets sent to userspace (along with the per-task data).
-
-When a user queries to get per-tgid data, the sum of all other live threads in
-the group is added up and added to the accumulated total for previously exited
-threads of the same thread group.
-
-Extending taskstats
--------------------
-
-There are two ways to extend the taskstats interface to export more
-per-task/process stats as patches to collect them get added to the kernel
-in future:
-
-1. Adding more fields to the end of the existing struct taskstats. Backward
-   compatibility is ensured by the version number within the
-   structure. Userspace will use only the fields of the struct that correspond
-   to the version its using.
-
-2. Defining separate statistic structs and using the netlink attributes
-   interface to return them. Since userspace processes each netlink attribute
-   independently, it can always ignore attributes whose type it does not
-   understand (because it is using an older version of the interface).
-
-
-Choosing between 1. and 2. is a matter of trading off flexibility and
-overhead. If only a few fields need to be added, then 1. is the preferable
-path since the kernel and userspace don't need to incur the overhead of
-processing new netlink attributes. But if the new fields expand the existing
-struct too much, requiring disparate userspace accounting utilities to
-unnecessarily receive large structures whose fields are of no interest, then
-extending the attributes structure would be worthwhile.
-
-Flow control for taskstats
---------------------------
-
-When the rate of task exits becomes large, a listener may not be able to keep
-up with the kernel's rate of sending per-tid/tgid exit data leading to data
-loss. This possibility gets compounded when the taskstats structure gets
-extended and the number of cpus grows large.
-
-To avoid losing statistics, userspace should do one or more of the following:
-
-- increase the receive buffer sizes for the netlink sockets opened by
-listeners to receive exit data.
-
-- create more listeners and reduce the number of cpus being listened to by
-each listener. In the extreme case, there could be one listener for each cpu.
-Users may also consider setting the cpu affinity of the listener to the subset
-of cpus to which it listens, especially if they are listening to just one cpu.
-
-Despite these measures, if the userspace receives ENOBUFS error messages
-indicated overflow of receive buffers, it should take measures to handle the
-loss of data.
-
-----
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index a9548de56ac9..080b18ce2a5d 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1014,7 +1014,7 @@ All time durations are in microseconds.
 	A read-only nested-key file which exists on non-root cgroups.
 
 	Shows pressure stall information for CPU. See
-	Documentation/accounting/psi.txt for details.
+	Documentation/accounting/psi.rst for details.
 
 
 Memory
@@ -1355,7 +1355,7 @@ PAGE_SIZE multiple when read back.
 	A read-only nested-key file which exists on non-root cgroups.
 
 	Shows pressure stall information for memory. See
-	Documentation/accounting/psi.txt for details.
+	Documentation/accounting/psi.rst for details.
 
 
 Usage Guidelines
@@ -1498,7 +1498,7 @@ IO Interface Files
 	A read-only nested-key file which exists on non-root cgroups.
 
 	Shows pressure stall information for IO. See
-	Documentation/accounting/psi.txt for details.
+	Documentation/accounting/psi.rst for details.
 
 
 Writeback
-- 
cgit 


From db9a0975a20c1f21c108b9d44545792d790593e4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 10:10:33 -0300
Subject: docs: ia64: convert to ReST

Rename the ia64 documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

There are two upper case file names. Rename them to
lower case, as we're working to avoid upper case file
names at Documentation.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/ia64/IRQ-redir.txt  |   69 ---
 Documentation/ia64/README         |   43 --
 Documentation/ia64/aliasing.rst   |  246 +++++++++
 Documentation/ia64/aliasing.txt   |  221 --------
 Documentation/ia64/efirtc.rst     |  144 +++++
 Documentation/ia64/efirtc.txt     |  128 -----
 Documentation/ia64/err_inject.rst | 1067 ++++++++++++++++++++++++++++++++++++
 Documentation/ia64/err_inject.txt | 1068 -------------------------------------
 Documentation/ia64/fsys.rst       |  303 +++++++++++
 Documentation/ia64/fsys.txt       |  286 ----------
 Documentation/ia64/ia64.rst       |   49 ++
 Documentation/ia64/index.rst      |   18 +
 Documentation/ia64/irq-redir.rst  |   80 +++
 Documentation/ia64/mca.rst        |  198 +++++++
 Documentation/ia64/mca.txt        |  194 -------
 Documentation/ia64/serial.rst     |  165 ++++++
 Documentation/ia64/serial.txt     |  151 ------
 Documentation/ia64/xen.rst        |  206 +++++++
 Documentation/ia64/xen.txt        |  183 -------
 19 files changed, 2476 insertions(+), 2343 deletions(-)
 delete mode 100644 Documentation/ia64/IRQ-redir.txt
 delete mode 100644 Documentation/ia64/README
 create mode 100644 Documentation/ia64/aliasing.rst
 delete mode 100644 Documentation/ia64/aliasing.txt
 create mode 100644 Documentation/ia64/efirtc.rst
 delete mode 100644 Documentation/ia64/efirtc.txt
 create mode 100644 Documentation/ia64/err_inject.rst
 delete mode 100644 Documentation/ia64/err_inject.txt
 create mode 100644 Documentation/ia64/fsys.rst
 delete mode 100644 Documentation/ia64/fsys.txt
 create mode 100644 Documentation/ia64/ia64.rst
 create mode 100644 Documentation/ia64/index.rst
 create mode 100644 Documentation/ia64/irq-redir.rst
 create mode 100644 Documentation/ia64/mca.rst
 delete mode 100644 Documentation/ia64/mca.txt
 create mode 100644 Documentation/ia64/serial.rst
 delete mode 100644 Documentation/ia64/serial.txt
 create mode 100644 Documentation/ia64/xen.rst
 delete mode 100644 Documentation/ia64/xen.txt

(limited to 'Documentation')

diff --git a/Documentation/ia64/IRQ-redir.txt b/Documentation/ia64/IRQ-redir.txt
deleted file mode 100644
index f7bd72261283..000000000000
--- a/Documentation/ia64/IRQ-redir.txt
+++ /dev/null
@@ -1,69 +0,0 @@
-IRQ affinity on IA64 platforms
-------------------------------
-                           07.01.2002, Erich Focht <efocht@ess.nec.de>
-
-
-By writing to /proc/irq/IRQ#/smp_affinity the interrupt routing can be
-controlled. The behavior on IA64 platforms is slightly different from
-that described in Documentation/IRQ-affinity.txt for i386 systems.
-
-Because of the usage of SAPIC mode and physical destination mode the
-IRQ target is one particular CPU and cannot be a mask of several
-CPUs. Only the first non-zero bit is taken into account.
-
-
-Usage examples:
-
-The target CPU has to be specified as a hexadecimal CPU mask. The
-first non-zero bit is the selected CPU. This format has been kept for
-compatibility reasons with i386.
-
-Set the delivery mode of interrupt 41 to fixed and route the
-interrupts to CPU #3 (logical CPU number) (2^3=0x08):
-     echo "8" >/proc/irq/41/smp_affinity
-
-Set the default route for IRQ number 41 to CPU 6 in lowest priority
-delivery mode (redirectable):
-     echo "r 40" >/proc/irq/41/smp_affinity
-
-The output of the command
-     cat /proc/irq/IRQ#/smp_affinity
-gives the target CPU mask for the specified interrupt vector. If the CPU
-mask is preceded by the character "r", the interrupt is redirectable
-(i.e. lowest priority mode routing is used), otherwise its route is
-fixed.
-
-
-
-Initialization and default behavior:
-
-If the platform features IRQ redirection (info provided by SAL) all
-IO-SAPIC interrupts are initialized with CPU#0 as their default target
-and the routing is the so called "lowest priority mode" (actually
-fixed SAPIC mode with hint). The XTP chipset registers are used as hints
-for the IRQ routing. Currently in Linux XTP registers can have three
-values:
-	- minimal for an idle task,
-	- normal if any other task runs,
-	- maximal if the CPU is going to be switched off.
-The IRQ is routed to the CPU with lowest XTP register value, the
-search begins at the default CPU. Therefore most of the interrupts
-will be handled by CPU #0.
-
-If the platform doesn't feature interrupt redirection IOSAPIC fixed
-routing is used. The target CPUs are distributed in a round robin
-manner. IRQs will be routed only to the selected target CPUs. Check
-with
-        cat /proc/interrupts
-
-
-
-Comments:
-
-On large (multi-node) systems it is recommended to route the IRQs to
-the node to which the corresponding device is connected.
-For systems like the NEC AzusA we get IRQ node-affinity for free. This
-is because usually the chipsets on each node redirect the interrupts
-only to their own CPUs (as they cannot see the XTP registers on the
-other nodes).
-
diff --git a/Documentation/ia64/README b/Documentation/ia64/README
deleted file mode 100644
index aa17f2154cba..000000000000
--- a/Documentation/ia64/README
+++ /dev/null
@@ -1,43 +0,0 @@
-        Linux kernel release 2.4.xx for the IA-64 Platform
-
-   These are the release notes for Linux version 2.4 for IA-64
-   platform.  This document provides information specific to IA-64
-   ONLY, to get additional information about the Linux kernel also
-   read the original Linux README provided with the kernel.
-
-INSTALLING the kernel:
-
- - IA-64 kernel installation is the same as the other platforms, see
-   original README for details.
-
-
-SOFTWARE REQUIREMENTS
-
-   Compiling and running this kernel requires an IA-64 compliant GCC
-   compiler.  And various software packages also compiled with an
-   IA-64 compliant GCC compiler.
-
-
-CONFIGURING the kernel:
-
-   Configuration is the same, see original README for details.
-
-
-COMPILING the kernel:
-
- - Compiling this kernel doesn't differ from other platform so read
-   the original README for details BUT make sure you have an IA-64
-   compliant GCC compiler.
-
-IA-64 SPECIFICS
-
- - General issues:
-
-    o Hardly any performance tuning has been done. Obvious targets
-      include the library routines (IP checksum, etc.). Less
-      obvious targets include making sure we don't flush the TLB
-      needlessly, etc.
-
-    o SMP locks cleanup/optimization
-
-    o IA32 support.  Currently experimental.  It mostly works.
diff --git a/Documentation/ia64/aliasing.rst b/Documentation/ia64/aliasing.rst
new file mode 100644
index 000000000000..a08b36aba015
--- /dev/null
+++ b/Documentation/ia64/aliasing.rst
@@ -0,0 +1,246 @@
+==================================
+Memory Attribute Aliasing on IA-64
+==================================
+
+Bjorn Helgaas <bjorn.helgaas@hp.com>
+
+May 4, 2006
+
+
+Memory Attributes
+=================
+
+    Itanium supports several attributes for virtual memory references.
+    The attribute is part of the virtual translation, i.e., it is
+    contained in the TLB entry.  The ones of most interest to the Linux
+    kernel are:
+
+	==		======================
+        WB		Write-back (cacheable)
+	UC		Uncacheable
+	WC		Write-coalescing
+	==		======================
+
+    System memory typically uses the WB attribute.  The UC attribute is
+    used for memory-mapped I/O devices.  The WC attribute is uncacheable
+    like UC is, but writes may be delayed and combined to increase
+    performance for things like frame buffers.
+
+    The Itanium architecture requires that we avoid accessing the same
+    page with both a cacheable mapping and an uncacheable mapping[1].
+
+    The design of the chipset determines which attributes are supported
+    on which regions of the address space.  For example, some chipsets
+    support either WB or UC access to main memory, while others support
+    only WB access.
+
+Memory Map
+==========
+
+    Platform firmware describes the physical memory map and the
+    supported attributes for each region.  At boot-time, the kernel uses
+    the EFI GetMemoryMap() interface.  ACPI can also describe memory
+    devices and the attributes they support, but Linux/ia64 currently
+    doesn't use this information.
+
+    The kernel uses the efi_memmap table returned from GetMemoryMap() to
+    learn the attributes supported by each region of physical address
+    space.  Unfortunately, this table does not completely describe the
+    address space because some machines omit some or all of the MMIO
+    regions from the map.
+
+    The kernel maintains another table, kern_memmap, which describes the
+    memory Linux is actually using and the attribute for each region.
+    This contains only system memory; it does not contain MMIO space.
+
+    The kern_memmap table typically contains only a subset of the system
+    memory described by the efi_memmap.  Linux/ia64 can't use all memory
+    in the system because of constraints imposed by the identity mapping
+    scheme.
+
+    The efi_memmap table is preserved unmodified because the original
+    boot-time information is required for kexec.
+
+Kernel Identify Mappings
+========================
+
+    Linux/ia64 identity mappings are done with large pages, currently
+    either 16MB or 64MB, referred to as "granules."  Cacheable mappings
+    are speculative[2], so the processor can read any location in the
+    page at any time, independent of the programmer's intentions.  This
+    means that to avoid attribute aliasing, Linux can create a cacheable
+    identity mapping only when the entire granule supports cacheable
+    access.
+
+    Therefore, kern_memmap contains only full granule-sized regions that
+    can referenced safely by an identity mapping.
+
+    Uncacheable mappings are not speculative, so the processor will
+    generate UC accesses only to locations explicitly referenced by
+    software.  This allows UC identity mappings to cover granules that
+    are only partially populated, or populated with a combination of UC
+    and WB regions.
+
+User Mappings
+=============
+
+    User mappings are typically done with 16K or 64K pages.  The smaller
+    page size allows more flexibility because only 16K or 64K has to be
+    homogeneous with respect to memory attributes.
+
+Potential Attribute Aliasing Cases
+==================================
+
+    There are several ways the kernel creates new mappings:
+
+mmap of /dev/mem
+----------------
+
+	This uses remap_pfn_range(), which creates user mappings.  These
+	mappings may be either WB or UC.  If the region being mapped
+	happens to be in kern_memmap, meaning that it may also be mapped
+	by a kernel identity mapping, the user mapping must use the same
+	attribute as the kernel mapping.
+
+	If the region is not in kern_memmap, the user mapping should use
+	an attribute reported as being supported in the EFI memory map.
+
+	Since the EFI memory map does not describe MMIO on some
+	machines, this should use an uncacheable mapping as a fallback.
+
+mmap of /sys/class/pci_bus/.../legacy_mem
+-----------------------------------------
+
+	This is very similar to mmap of /dev/mem, except that legacy_mem
+	only allows mmap of the one megabyte "legacy MMIO" area for a
+	specific PCI bus.  Typically this is the first megabyte of
+	physical address space, but it may be different on machines with
+	several VGA devices.
+
+	"X" uses this to access VGA frame buffers.  Using legacy_mem
+	rather than /dev/mem allows multiple instances of X to talk to
+	different VGA cards.
+
+	The /dev/mem mmap constraints apply.
+
+mmap of /proc/bus/pci/.../??.?
+------------------------------
+
+	This is an MMIO mmap of PCI functions, which additionally may or
+	may not be requested as using the WC attribute.
+
+	If WC is requested, and the region in kern_memmap is either WC
+	or UC, and the EFI memory map designates the region as WC, then
+	the WC mapping is allowed.
+
+	Otherwise, the user mapping must use the same attribute as the
+	kernel mapping.
+
+read/write of /dev/mem
+----------------------
+
+	This uses copy_from_user(), which implicitly uses a kernel
+	identity mapping.  This is obviously safe for things in
+	kern_memmap.
+
+	There may be corner cases of things that are not in kern_memmap,
+	but could be accessed this way.  For example, registers in MMIO
+	space are not in kern_memmap, but could be accessed with a UC
+	mapping.  This would not cause attribute aliasing.  But
+	registers typically can be accessed only with four-byte or
+	eight-byte accesses, and the copy_from_user() path doesn't allow
+	any control over the access size, so this would be dangerous.
+
+ioremap()
+---------
+
+	This returns a mapping for use inside the kernel.
+
+	If the region is in kern_memmap, we should use the attribute
+	specified there.
+
+	If the EFI memory map reports that the entire granule supports
+	WB, we should use that (granules that are partially reserved
+	or occupied by firmware do not appear in kern_memmap).
+
+	If the granule contains non-WB memory, but we can cover the
+	region safely with kernel page table mappings, we can use
+	ioremap_page_range() as most other architectures do.
+
+	Failing all of the above, we have to fall back to a UC mapping.
+
+Past Problem Cases
+==================
+
+mmap of various MMIO regions from /dev/mem by "X" on Intel platforms
+--------------------------------------------------------------------
+
+      The EFI memory map may not report these MMIO regions.
+
+      These must be allowed so that X will work.  This means that
+      when the EFI memory map is incomplete, every /dev/mem mmap must
+      succeed.  It may create either WB or UC user mappings, depending
+      on whether the region is in kern_memmap or the EFI memory map.
+
+mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled
+----------------------------------------------------------------------
+
+      The EFI memory map reports the following attributes:
+
+        =============== ======= ==================
+        0x00000-0x9FFFF WB only
+        0xA0000-0xBFFFF UC only (VGA frame buffer)
+        0xC0000-0xFFFFF WB only
+        =============== ======= ==================
+
+      This mmap is done with user pages, not kernel identity mappings,
+      so it is safe to use WB mappings.
+
+      The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000,
+      which uses a granule-sized UC mapping.  This granule will cover some
+      WB-only memory, but since UC is non-speculative, the processor will
+      never generate an uncacheable reference to the WB-only areas unless
+      the driver explicitly touches them.
+
+mmap of 0x0-0xFFFFF legacy_mem by "X"
+-------------------------------------
+
+      If the EFI memory map reports that the entire range supports the
+      same attributes, we can allow the mmap (and we will prefer WB if
+      supported, as is the case with HP sx[12]000 machines with VGA
+      disabled).
+
+      If EFI reports the range as partly WB and partly UC (as on sx[12]000
+      machines with VGA enabled), we must fail the mmap because there's no
+      safe attribute to use.
+
+      If EFI reports some of the range but not all (as on Intel firmware
+      that doesn't report the VGA frame buffer at all), we should fail the
+      mmap and force the user to map just the specific region of interest.
+
+mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled
+------------------------------------------------------------------------
+
+      The EFI memory map reports the following attributes::
+
+        0x00000-0xFFFFF WB only (no VGA MMIO hole)
+
+      This is a special case of the previous case, and the mmap should
+      fail for the same reason as above.
+
+read of /sys/devices/.../rom
+----------------------------
+
+      For VGA devices, this may cause an ioremap() of 0xC0000.  This
+      used to be done with a UC mapping, because the VGA frame buffer
+      at 0xA0000 prevents use of a WB granule.  The UC mapping causes
+      an MCA on HP sx[12]000 chipsets.
+
+      We should use WB page table mappings to avoid covering the VGA
+      frame buffer.
+
+Notes
+=====
+
+    [1] SDM rev 2.2, vol 2, sec 4.4.1.
+    [2] SDM rev 2.2, vol 2, sec 4.4.6.
diff --git a/Documentation/ia64/aliasing.txt b/Documentation/ia64/aliasing.txt
deleted file mode 100644
index 5a4dea6abebd..000000000000
--- a/Documentation/ia64/aliasing.txt
+++ /dev/null
@@ -1,221 +0,0 @@
-	         MEMORY ATTRIBUTE ALIASING ON IA-64
-
-			   Bjorn Helgaas
-		       <bjorn.helgaas@hp.com>
-			    May 4, 2006
-
-
-MEMORY ATTRIBUTES
-
-    Itanium supports several attributes for virtual memory references.
-    The attribute is part of the virtual translation, i.e., it is
-    contained in the TLB entry.  The ones of most interest to the Linux
-    kernel are:
-
-	WB		Write-back (cacheable)
-	UC		Uncacheable
-	WC		Write-coalescing
-
-    System memory typically uses the WB attribute.  The UC attribute is
-    used for memory-mapped I/O devices.  The WC attribute is uncacheable
-    like UC is, but writes may be delayed and combined to increase
-    performance for things like frame buffers.
-
-    The Itanium architecture requires that we avoid accessing the same
-    page with both a cacheable mapping and an uncacheable mapping[1].
-
-    The design of the chipset determines which attributes are supported
-    on which regions of the address space.  For example, some chipsets
-    support either WB or UC access to main memory, while others support
-    only WB access.
-
-MEMORY MAP
-
-    Platform firmware describes the physical memory map and the
-    supported attributes for each region.  At boot-time, the kernel uses
-    the EFI GetMemoryMap() interface.  ACPI can also describe memory
-    devices and the attributes they support, but Linux/ia64 currently
-    doesn't use this information.
-
-    The kernel uses the efi_memmap table returned from GetMemoryMap() to
-    learn the attributes supported by each region of physical address
-    space.  Unfortunately, this table does not completely describe the
-    address space because some machines omit some or all of the MMIO
-    regions from the map.
-
-    The kernel maintains another table, kern_memmap, which describes the
-    memory Linux is actually using and the attribute for each region.
-    This contains only system memory; it does not contain MMIO space.
-
-    The kern_memmap table typically contains only a subset of the system
-    memory described by the efi_memmap.  Linux/ia64 can't use all memory
-    in the system because of constraints imposed by the identity mapping
-    scheme.
-
-    The efi_memmap table is preserved unmodified because the original
-    boot-time information is required for kexec.
-
-KERNEL IDENTITY MAPPINGS
-
-    Linux/ia64 identity mappings are done with large pages, currently
-    either 16MB or 64MB, referred to as "granules."  Cacheable mappings
-    are speculative[2], so the processor can read any location in the
-    page at any time, independent of the programmer's intentions.  This
-    means that to avoid attribute aliasing, Linux can create a cacheable
-    identity mapping only when the entire granule supports cacheable
-    access.
-
-    Therefore, kern_memmap contains only full granule-sized regions that
-    can referenced safely by an identity mapping.
-
-    Uncacheable mappings are not speculative, so the processor will
-    generate UC accesses only to locations explicitly referenced by
-    software.  This allows UC identity mappings to cover granules that
-    are only partially populated, or populated with a combination of UC
-    and WB regions.
-
-USER MAPPINGS
-
-    User mappings are typically done with 16K or 64K pages.  The smaller
-    page size allows more flexibility because only 16K or 64K has to be
-    homogeneous with respect to memory attributes.
-
-POTENTIAL ATTRIBUTE ALIASING CASES
-
-    There are several ways the kernel creates new mappings:
-
-    mmap of /dev/mem
-
-	This uses remap_pfn_range(), which creates user mappings.  These
-	mappings may be either WB or UC.  If the region being mapped
-	happens to be in kern_memmap, meaning that it may also be mapped
-	by a kernel identity mapping, the user mapping must use the same
-	attribute as the kernel mapping.
-
-	If the region is not in kern_memmap, the user mapping should use
-	an attribute reported as being supported in the EFI memory map.
-
-	Since the EFI memory map does not describe MMIO on some
-	machines, this should use an uncacheable mapping as a fallback.
-
-    mmap of /sys/class/pci_bus/.../legacy_mem
-
-	This is very similar to mmap of /dev/mem, except that legacy_mem
-	only allows mmap of the one megabyte "legacy MMIO" area for a
-	specific PCI bus.  Typically this is the first megabyte of
-	physical address space, but it may be different on machines with
-	several VGA devices.
-
-	"X" uses this to access VGA frame buffers.  Using legacy_mem
-	rather than /dev/mem allows multiple instances of X to talk to
-	different VGA cards.
-
-	The /dev/mem mmap constraints apply.
-
-    mmap of /proc/bus/pci/.../??.?
-
-    	This is an MMIO mmap of PCI functions, which additionally may or
-	may not be requested as using the WC attribute.
-
-	If WC is requested, and the region in kern_memmap is either WC
-	or UC, and the EFI memory map designates the region as WC, then
-	the WC mapping is allowed.
-
-	Otherwise, the user mapping must use the same attribute as the
-	kernel mapping.
-
-    read/write of /dev/mem
-
-	This uses copy_from_user(), which implicitly uses a kernel
-	identity mapping.  This is obviously safe for things in
-	kern_memmap.
-
-	There may be corner cases of things that are not in kern_memmap,
-	but could be accessed this way.  For example, registers in MMIO
-	space are not in kern_memmap, but could be accessed with a UC
-	mapping.  This would not cause attribute aliasing.  But
-	registers typically can be accessed only with four-byte or
-	eight-byte accesses, and the copy_from_user() path doesn't allow
-	any control over the access size, so this would be dangerous.
-
-    ioremap()
-
-	This returns a mapping for use inside the kernel.
-
-	If the region is in kern_memmap, we should use the attribute
-	specified there.
-
-	If the EFI memory map reports that the entire granule supports
-	WB, we should use that (granules that are partially reserved
-	or occupied by firmware do not appear in kern_memmap).
-
-	If the granule contains non-WB memory, but we can cover the
-	region safely with kernel page table mappings, we can use
-	ioremap_page_range() as most other architectures do.
-
-	Failing all of the above, we have to fall back to a UC mapping.
-
-PAST PROBLEM CASES
-
-    mmap of various MMIO regions from /dev/mem by "X" on Intel platforms
-
-      The EFI memory map may not report these MMIO regions.
-
-      These must be allowed so that X will work.  This means that
-      when the EFI memory map is incomplete, every /dev/mem mmap must
-      succeed.  It may create either WB or UC user mappings, depending
-      on whether the region is in kern_memmap or the EFI memory map.
-
-    mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled
-
-      The EFI memory map reports the following attributes:
-        0x00000-0x9FFFF WB only
-        0xA0000-0xBFFFF UC only (VGA frame buffer)
-        0xC0000-0xFFFFF WB only
-
-      This mmap is done with user pages, not kernel identity mappings,
-      so it is safe to use WB mappings.
-
-      The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000,
-      which uses a granule-sized UC mapping.  This granule will cover some
-      WB-only memory, but since UC is non-speculative, the processor will
-      never generate an uncacheable reference to the WB-only areas unless
-      the driver explicitly touches them.
-
-    mmap of 0x0-0xFFFFF legacy_mem by "X"
-
-      If the EFI memory map reports that the entire range supports the
-      same attributes, we can allow the mmap (and we will prefer WB if
-      supported, as is the case with HP sx[12]000 machines with VGA
-      disabled).
-
-      If EFI reports the range as partly WB and partly UC (as on sx[12]000
-      machines with VGA enabled), we must fail the mmap because there's no
-      safe attribute to use.
-
-      If EFI reports some of the range but not all (as on Intel firmware
-      that doesn't report the VGA frame buffer at all), we should fail the
-      mmap and force the user to map just the specific region of interest.
-
-    mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled
-
-      The EFI memory map reports the following attributes:
-        0x00000-0xFFFFF WB only (no VGA MMIO hole)
-
-      This is a special case of the previous case, and the mmap should
-      fail for the same reason as above.
-
-    read of /sys/devices/.../rom
-
-      For VGA devices, this may cause an ioremap() of 0xC0000.  This
-      used to be done with a UC mapping, because the VGA frame buffer
-      at 0xA0000 prevents use of a WB granule.  The UC mapping causes
-      an MCA on HP sx[12]000 chipsets.
-
-      We should use WB page table mappings to avoid covering the VGA
-      frame buffer.
-
-NOTES
-
-    [1] SDM rev 2.2, vol 2, sec 4.4.1.
-    [2] SDM rev 2.2, vol 2, sec 4.4.6.
diff --git a/Documentation/ia64/efirtc.rst b/Documentation/ia64/efirtc.rst
new file mode 100644
index 000000000000..2f7ff5026308
--- /dev/null
+++ b/Documentation/ia64/efirtc.rst
@@ -0,0 +1,144 @@
+==========================
+EFI Real Time Clock driver
+==========================
+
+S. Eranian <eranian@hpl.hp.com>
+
+March 2000
+
+1. Introduction
+===============
+
+This document describes the efirtc.c driver has provided for
+the IA-64 platform.
+
+The purpose of this driver is to supply an API for kernel and user applications
+to get access to the Time Service offered by EFI version 0.92.
+
+EFI provides 4 calls one can make once the OS is booted: GetTime(),
+SetTime(), GetWakeupTime(), SetWakeupTime() which are all supported by this
+driver. We describe those calls as well the design of the driver in the
+following sections.
+
+2. Design Decisions
+===================
+
+The original ideas was to provide a very simple driver to get access to,
+at first, the time of day service. This is required in order to access, in a
+portable way, the CMOS clock. A program like /sbin/hwclock uses such a clock
+to initialize the system view of the time during boot.
+
+Because we wanted to minimize the impact on existing user-level apps using
+the CMOS clock, we decided to expose an API that was very similar to the one
+used today with the legacy RTC driver (driver/char/rtc.c). However, because
+EFI provides a simpler services, not all ioctl() are available. Also
+new ioctl()s have been introduced for things that EFI provides but not the
+legacy.
+
+EFI uses a slightly different way of representing the time, noticeably
+the reference date is different. Year is the using the full 4-digit format.
+The Epoch is January 1st 1998. For backward compatibility reasons we don't
+expose this new way of representing time. Instead we use something very
+similar to the struct tm, i.e. struct rtc_time, as used by hwclock.
+One of the reasons for doing it this way is to allow for EFI to still evolve
+without necessarily impacting any of the user applications. The decoupling
+enables flexibility and permits writing wrapper code is ncase things change.
+
+The driver exposes two interfaces, one via the device file and a set of
+ioctl()s. The other is read-only via the /proc filesystem.
+
+As of today we don't offer a /proc/sys interface.
+
+To allow for a uniform interface between the legacy RTC and EFI time service,
+we have created the include/linux/rtc.h header file to contain only the
+"public" API of the two drivers.  The specifics of the legacy RTC are still
+in include/linux/mc146818rtc.h.
+
+
+3. Time of day service
+======================
+
+The part of the driver gives access to the time of day service of EFI.
+Two ioctl()s, compatible with the legacy RTC calls:
+
+	Read the CMOS clock::
+
+		ioctl(d, RTC_RD_TIME, &rtc);
+
+	Write the CMOS clock::
+
+		ioctl(d, RTC_SET_TIME, &rtc);
+
+The rtc is a pointer to a data structure defined in rtc.h which is close
+to a struct tm::
+
+  struct rtc_time {
+          int tm_sec;
+          int tm_min;
+          int tm_hour;
+          int tm_mday;
+          int tm_mon;
+          int tm_year;
+          int tm_wday;
+          int tm_yday;
+          int tm_isdst;
+  };
+
+The driver takes care of converting back an forth between the EFI time and
+this format.
+
+Those two ioctl()s can be exercised with the hwclock command:
+
+For reading::
+
+	# /sbin/hwclock --show
+	Mon Mar  6 15:32:32 2000  -0.910248 seconds
+
+For setting::
+
+	# /sbin/hwclock --systohc
+
+Root privileges are required to be able to set the time of day.
+
+4. Wakeup Alarm service
+=======================
+
+EFI provides an API by which one can program when a machine should wakeup,
+i.e. reboot. This is very different from the alarm provided by the legacy
+RTC which is some kind of interval timer alarm. For this reason we don't use
+the same ioctl()s to get access to the service. Instead we have
+introduced 2 news ioctl()s to the interface of an RTC.
+
+We have added 2 new ioctl()s that are specific to the EFI driver:
+
+	Read the current state of the alarm::
+
+		ioctl(d, RTC_WKLAM_RD, &wkt)
+
+	Set the alarm or change its status::
+
+		ioctl(d, RTC_WKALM_SET, &wkt)
+
+The wkt structure encapsulates a struct rtc_time + 2 extra fields to get
+status information::
+
+  struct rtc_wkalrm {
+
+          unsigned char enabled; /* =1 if alarm is enabled */
+          unsigned char pending; /* =1 if alarm is pending  */
+
+          struct rtc_time time;
+  }
+
+As of today, none of the existing user-level apps supports this feature.
+However writing such a program should be hard by simply using those two
+ioctl().
+
+Root privileges are required to be able to set the alarm.
+
+5. References
+=============
+
+Checkout the following Web site for more information on EFI:
+
+http://developer.intel.com/technology/efi/
diff --git a/Documentation/ia64/efirtc.txt b/Documentation/ia64/efirtc.txt
deleted file mode 100644
index 057e6bebda8f..000000000000
--- a/Documentation/ia64/efirtc.txt
+++ /dev/null
@@ -1,128 +0,0 @@
-EFI Real Time Clock driver
--------------------------------
-S. Eranian <eranian@hpl.hp.com>
-March 2000
-
-I/ Introduction
-
-This document describes the efirtc.c driver has provided for
-the IA-64 platform. 
-
-The purpose of this driver is to supply an API for kernel and user applications
-to get access to the Time Service offered by EFI version 0.92.
-
-EFI provides 4 calls one can make once the OS is booted: GetTime(),
-SetTime(), GetWakeupTime(), SetWakeupTime() which are all supported by this
-driver. We describe those calls as well the design of the driver in the
-following sections.
-
-II/ Design Decisions
-
-The original ideas was to provide a very simple driver to get access to, 
-at first, the time of day service. This is required in order to access, in a 
-portable way, the CMOS clock. A program like /sbin/hwclock uses such a clock 
-to initialize the system view of the time during boot.
-
-Because we wanted to minimize the impact on existing user-level apps using
-the CMOS clock, we decided to expose an API that was very similar to the one
-used today with the legacy RTC driver (driver/char/rtc.c). However, because 
-EFI provides a simpler services, not all ioctl() are available. Also
-new ioctl()s have been introduced for things that EFI provides but not the 
-legacy.
-
-EFI uses a slightly different way of representing the time, noticeably
-the reference date is different. Year is the using the full 4-digit format.
-The Epoch is January 1st 1998. For backward compatibility reasons we don't
-expose this new way of representing time. Instead we use something very 
-similar to the struct tm, i.e. struct rtc_time, as used by hwclock.
-One of the reasons for doing it this way is to allow for EFI to still evolve
-without necessarily impacting any of the user applications. The decoupling
-enables flexibility and permits writing wrapper code is ncase things change.
-
-The driver exposes two interfaces, one via the device file and a set of
-ioctl()s. The other is read-only via the /proc filesystem. 
-
-As of today we don't offer a /proc/sys interface.
-
-To allow for a uniform interface between the legacy RTC and EFI time service,
-we have created the include/linux/rtc.h header file to contain only the 
-"public" API of the two drivers.  The specifics of the legacy RTC are still 
-in include/linux/mc146818rtc.h.
-
- 
-III/ Time of day service
-
-The part of the driver gives access to the time of day service of EFI.
-Two ioctl()s, compatible with the legacy RTC calls:
-
-	Read the CMOS clock: ioctl(d, RTC_RD_TIME, &rtc);
-
-	Write the CMOS clock: ioctl(d, RTC_SET_TIME, &rtc);
-
-The rtc is a pointer to a data structure defined in rtc.h which is close
-to a struct tm:
-
-struct rtc_time {
-        int tm_sec;
-        int tm_min;
-        int tm_hour;
-        int tm_mday;
-        int tm_mon;
-        int tm_year;
-        int tm_wday;
-        int tm_yday;
-        int tm_isdst;
-};
-
-The driver takes care of converting back an forth between the EFI time and
-this format.
-
-Those two ioctl()s can be exercised with the hwclock command:
-
-For reading:
-# /sbin/hwclock --show
-Mon Mar  6 15:32:32 2000  -0.910248 seconds
-
-For setting:
-# /sbin/hwclock --systohc
-
-Root privileges are required to be able to set the time of day.
-
-IV/ Wakeup Alarm service
-
-EFI provides an API by which one can program when a machine should wakeup,
-i.e. reboot. This is very different from the alarm provided by the legacy
-RTC which is some kind of interval timer alarm. For this reason we don't use
-the same ioctl()s to get access to the service. Instead we have
-introduced 2 news ioctl()s to the interface of an RTC. 
-
-We have added 2 new ioctl()s that are specific to the EFI driver:
-
-	Read the current state of the alarm
-	ioctl(d, RTC_WKLAM_RD, &wkt)
-
-	Set the alarm or change its status
-	ioctl(d, RTC_WKALM_SET, &wkt)
-
-The wkt structure encapsulates a struct rtc_time + 2 extra fields to get 
-status information:
-	
-struct rtc_wkalrm {
-
-        unsigned char enabled; /* =1 if alarm is enabled */
-        unsigned char pending; /* =1 if alarm is pending  */
-
-        struct rtc_time time;
-} 
-
-As of today, none of the existing user-level apps supports this feature.
-However writing such a program should be hard by simply using those two 
-ioctl(). 
-
-Root privileges are required to be able to set the alarm.
-
-V/ References.
-
-Checkout the following Web site for more information on EFI:
-
-http://developer.intel.com/technology/efi/
diff --git a/Documentation/ia64/err_inject.rst b/Documentation/ia64/err_inject.rst
new file mode 100644
index 000000000000..900f71e93a29
--- /dev/null
+++ b/Documentation/ia64/err_inject.rst
@@ -0,0 +1,1067 @@
+========================================
+IPF Machine Check (MC) error inject tool
+========================================
+
+IPF Machine Check (MC) error inject tool is used to inject MC
+errors from Linux. The tool is a test bed for IPF MC work flow including
+hardware correctable error handling, OS recoverable error handling, MC
+event logging, etc.
+
+The tool includes two parts: a kernel driver and a user application
+sample. The driver provides interface to PAL to inject error
+and query error injection capabilities. The driver code is in
+arch/ia64/kernel/err_inject.c. The application sample (shown below)
+provides a combination of various errors and calls the driver's interface
+(sysfs interface) to inject errors or query error injection capabilities.
+
+The tool can be used to test Intel IPF machine MC handling capabilities.
+It's especially useful for people who can not access hardware MC injection
+tool to inject error. It's also very useful to integrate with other
+software test suits to do stressful testing on IPF.
+
+Below is a sample application as part of the whole tool. The sample
+can be used as a working test tool. Or it can be expanded to include
+more features. It also can be a integrated into a library or other user
+application to have more thorough test.
+
+The sample application takes err.conf as error configuration input. GCC
+compiles the code. After you install err_inject driver, you can run
+this sample application to inject errors.
+
+Errata: Itanium 2 Processors Specification Update lists some errata against
+the pal_mc_error_inject PAL procedure. The following err.conf has been tested
+on latest Montecito PAL.
+
+err.conf::
+
+  #This is configuration file for err_inject_tool.
+  #The format of the each line is:
+  #cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer
+  #where
+  #	cpu: logical cpu number the error will be inject in.
+  #	loop: times the error will be injected.
+  #	interval: In second. every so often one error is injected.
+  #	err_type_info, err_struct_info: PAL parameters.
+  #
+  #Note: All values are hex w/o or w/ 0x prefix.
+
+
+  #On cpu2, inject only total 0x10 errors, interval 5 seconds
+  #corrected, data cache, hier-2, physical addr(assigned by tool code).
+  #working on Montecito latest PAL.
+  2, 10, 5, 4101, 95
+
+  #On cpu4, inject and consume total 0x10 errors, interval 5 seconds
+  #corrected, data cache, hier-2, physical addr(assigned by tool code).
+  #working on Montecito latest PAL.
+  4, 10, 5, 4109, 95
+
+  #On cpu15, inject and consume total 0x10 errors, interval 5 seconds
+  #recoverable, DTR0, hier-2.
+  #working on Montecito latest PAL.
+  0xf, 0x10, 5, 4249, 15
+
+The sample application source code:
+
+err_injection_tool.c::
+
+  /*
+   * This program is free software; you can redistribute it and/or modify
+   * it under the terms of the GNU General Public License as published by
+   * the Free Software Foundation; either version 2 of the License, or
+   * (at your option) any later version.
+   *
+   * This program is distributed in the hope that it will be useful, but
+   * WITHOUT ANY WARRANTY; without even the implied warranty of
+   * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+   * NON INFRINGEMENT.  See the GNU General Public License for more
+   * details.
+   *
+   * You should have received a copy of the GNU General Public License
+   * along with this program; if not, write to the Free Software
+   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+   *
+   * Copyright (C) 2006 Intel Co
+   *	Fenghua Yu <fenghua.yu@intel.com>
+   *
+   */
+  #include <sys/types.h>
+  #include <sys/stat.h>
+  #include <fcntl.h>
+  #include <stdio.h>
+  #include <sched.h>
+  #include <unistd.h>
+  #include <stdlib.h>
+  #include <stdarg.h>
+  #include <string.h>
+  #include <errno.h>
+  #include <time.h>
+  #include <sys/ipc.h>
+  #include <sys/sem.h>
+  #include <sys/wait.h>
+  #include <sys/mman.h>
+  #include <sys/shm.h>
+
+  #define MAX_FN_SIZE 		256
+  #define MAX_BUF_SIZE 		256
+  #define DATA_BUF_SIZE 		256
+  #define NR_CPUS 		512
+  #define MAX_TASK_NUM		2048
+  #define MIN_INTERVAL		5	// seconds
+  #define	ERR_DATA_BUFFER_SIZE 	3	// Three 8-byte.
+  #define PARA_FIELD_NUM		5
+  #define MASK_SIZE		(NR_CPUS/64)
+  #define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/"
+
+  int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
+
+  int verbose;
+  #define vbprintf if (verbose) printf
+
+  int log_info(int cpu, const char *fmt, ...)
+  {
+	FILE *log;
+	char fn[MAX_FN_SIZE];
+	char buf[MAX_BUF_SIZE];
+	va_list args;
+
+	sprintf(fn, "%d.log", cpu);
+	log=fopen(fn, "a+");
+	if (log==NULL) {
+		perror("Error open:");
+		return -1;
+	}
+
+	va_start(args, fmt);
+	vprintf(fmt, args);
+	memset(buf, 0, MAX_BUF_SIZE);
+	vsprintf(buf, fmt, args);
+	va_end(args);
+
+	fwrite(buf, sizeof(buf), 1, log);
+	fclose(log);
+
+	return 0;
+  }
+
+  typedef unsigned long u64;
+  typedef unsigned int  u32;
+
+  typedef union err_type_info_u {
+	struct {
+		u64	mode		: 3,	/* 0-2 */
+			err_inj		: 3,	/* 3-5 */
+			err_sev		: 2,	/* 6-7 */
+			err_struct	: 5,	/* 8-12 */
+			struct_hier	: 3,	/* 13-15 */
+			reserved	: 48;	/* 16-63 */
+	} err_type_info_u;
+	u64	err_type_info;
+  } err_type_info_t;
+
+  typedef union err_struct_info_u {
+	struct {
+		u64	siv		: 1,	/* 0	 */
+			c_t		: 2,	/* 1-2	 */
+			cl_p		: 3,	/* 3-5	 */
+			cl_id		: 3,	/* 6-8	 */
+			cl_dp		: 1,	/* 9	 */
+			reserved1	: 22,	/* 10-31 */
+			tiv		: 1,	/* 32	 */
+			trigger		: 4,	/* 33-36 */
+			trigger_pl 	: 3,	/* 37-39 */
+			reserved2 	: 24;	/* 40-63 */
+	} err_struct_info_cache;
+	struct {
+		u64	siv		: 1,	/* 0	 */
+			tt		: 2,	/* 1-2	 */
+			tc_tr		: 2,	/* 3-4	 */
+			tr_slot		: 8,	/* 5-12	 */
+			reserved1	: 19,	/* 13-31 */
+			tiv		: 1,	/* 32	 */
+			trigger		: 4,	/* 33-36 */
+			trigger_pl 	: 3,	/* 37-39 */
+			reserved2 	: 24;	/* 40-63 */
+	} err_struct_info_tlb;
+	struct {
+		u64	siv		: 1,	/* 0	 */
+			regfile_id	: 4,	/* 1-4	 */
+			reg_num		: 7,	/* 5-11	 */
+			reserved1	: 20,	/* 12-31 */
+			tiv		: 1,	/* 32	 */
+			trigger		: 4,	/* 33-36 */
+			trigger_pl 	: 3,	/* 37-39 */
+			reserved2 	: 24;	/* 40-63 */
+	} err_struct_info_register;
+	struct {
+		u64	reserved;
+	} err_struct_info_bus_processor_interconnect;
+	u64	err_struct_info;
+  } err_struct_info_t;
+
+  typedef union err_data_buffer_u {
+	struct {
+		u64	trigger_addr;		/* 0-63		*/
+		u64	inj_addr;		/* 64-127 	*/
+		u64	way		: 5,	/* 128-132	*/
+			index		: 20,	/* 133-152	*/
+					: 39;	/* 153-191	*/
+	} err_data_buffer_cache;
+	struct {
+		u64	trigger_addr;		/* 0-63		*/
+		u64	inj_addr;		/* 64-127 	*/
+		u64	way		: 5,	/* 128-132	*/
+			index		: 20,	/* 133-152	*/
+			reserved	: 39;	/* 153-191	*/
+	} err_data_buffer_tlb;
+	struct {
+		u64	trigger_addr;		/* 0-63		*/
+	} err_data_buffer_register;
+	struct {
+		u64	reserved;		/* 0-63		*/
+	} err_data_buffer_bus_processor_interconnect;
+	u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
+  } err_data_buffer_t;
+
+  typedef union capabilities_u {
+	struct {
+		u64	i		: 1,
+			d		: 1,
+			rv		: 1,
+			tag		: 1,
+			data		: 1,
+			mesi		: 1,
+			dp		: 1,
+			reserved1	: 3,
+			pa		: 1,
+			va		: 1,
+			wi		: 1,
+			reserved2	: 20,
+			trigger		: 1,
+			trigger_pl	: 1,
+			reserved3	: 30;
+	} capabilities_cache;
+	struct {
+		u64	d		: 1,
+			i		: 1,
+			rv		: 1,
+			tc		: 1,
+			tr		: 1,
+			reserved1	: 27,
+			trigger		: 1,
+			trigger_pl	: 1,
+			reserved2	: 30;
+	} capabilities_tlb;
+	struct {
+		u64	gr_b0		: 1,
+			gr_b1		: 1,
+			fr		: 1,
+			br		: 1,
+			pr		: 1,
+			ar		: 1,
+			cr		: 1,
+			rr		: 1,
+			pkr		: 1,
+			dbr		: 1,
+			ibr		: 1,
+			pmc		: 1,
+			pmd		: 1,
+			reserved1	: 3,
+			regnum		: 1,
+			reserved2	: 15,
+			trigger		: 1,
+			trigger_pl	: 1,
+			reserved3	: 30;
+	} capabilities_register;
+	struct {
+		u64	reserved;
+	} capabilities_bus_processor_interconnect;
+  } capabilities_t;
+
+  typedef struct resources_s {
+	u64	ibr0		: 1,
+		ibr2		: 1,
+		ibr4		: 1,
+		ibr6		: 1,
+		dbr0		: 1,
+		dbr2		: 1,
+		dbr4		: 1,
+		dbr6		: 1,
+		reserved	: 48;
+  } resources_t;
+
+
+  long get_page_size(void)
+  {
+	long page_size=sysconf(_SC_PAGESIZE);
+	return page_size;
+  }
+
+  #define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size())
+  #define SHM_SIZE (2*PAGE_SIZE*NR_CPUS)
+  #define SHM_VA 0x2000000100000000
+
+  int shmid;
+  void *shmaddr;
+
+  int create_shm(void)
+  {
+	key_t key;
+	char fn[MAX_FN_SIZE];
+
+	/* cpu0 is always existing */
+	sprintf(fn, PATH_FORMAT, 0);
+	if ((key = ftok(fn, 's')) == -1) {
+		perror("ftok");
+		return -1;
+	}
+
+	shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT);
+	if (shmid == -1) {
+		if (errno==EEXIST) {
+			shmid = shmget(key, SHM_SIZE, 0);
+			if (shmid == -1) {
+				perror("shmget");
+				return -1;
+			}
+		}
+		else {
+			perror("shmget");
+			return -1;
+		}
+	}
+	vbprintf("shmid=%d", shmid);
+
+	/* connect to the segment: */
+	shmaddr = shmat(shmid, (void *)SHM_VA, 0);
+	if (shmaddr == (void*)-1) {
+		perror("shmat");
+		return -1;
+	}
+
+	memset(shmaddr, 0, SHM_SIZE);
+	mlock(shmaddr, SHM_SIZE);
+
+	return 0;
+  }
+
+  int free_shm()
+  {
+	munlock(shmaddr, SHM_SIZE);
+          shmdt(shmaddr);
+	semctl(shmid, 0, IPC_RMID);
+
+	return 0;
+  }
+
+  #ifdef _SEM_SEMUN_UNDEFINED
+  union semun
+  {
+	int val;
+	struct semid_ds *buf;
+	unsigned short int *array;
+	struct seminfo *__buf;
+  };
+  #endif
+
+  u32 mode=1; /* 1: physical mode; 2: virtual mode. */
+  int one_lock=1;
+  key_t key[NR_CPUS];
+  int semid[NR_CPUS];
+
+  int create_sem(int cpu)
+  {
+	union semun arg;
+	char fn[MAX_FN_SIZE];
+	int sid;
+
+	sprintf(fn, PATH_FORMAT, cpu);
+	sprintf(fn, "%s/%s", fn, "err_type_info");
+	if ((key[cpu] = ftok(fn, 'e')) == -1) {
+		perror("ftok");
+		return -1;
+	}
+
+	if (semid[cpu]!=0)
+		return 0;
+
+	/* clear old semaphore */
+	if ((sid = semget(key[cpu], 1, 0)) != -1)
+		semctl(sid, 0, IPC_RMID);
+
+	/* get one semaphore */
+	if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) {
+		perror("semget");
+		printf("Please remove semaphore with key=0x%lx, then run the tool.\n",
+			(u64)key[cpu]);
+		return -1;
+	}
+
+	vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu,
+		(u64)key[cpu]);
+	/* initialize the semaphore to 1: */
+	arg.val = 1;
+	if (semctl(semid[cpu], 0, SETVAL, arg) == -1) {
+		perror("semctl");
+		return -1;
+	}
+
+	return 0;
+  }
+
+  static int lock(int cpu)
+  {
+	struct sembuf lock;
+
+	lock.sem_num = cpu;
+	lock.sem_op = 1;
+	semop(semid[cpu], &lock, 1);
+
+          return 0;
+  }
+
+  static int unlock(int cpu)
+  {
+	struct sembuf unlock;
+
+	unlock.sem_num = cpu;
+	unlock.sem_op = -1;
+	semop(semid[cpu], &unlock, 1);
+
+          return 0;
+  }
+
+  void free_sem(int cpu)
+  {
+	semctl(semid[cpu], 0, IPC_RMID);
+  }
+
+  int wr_multi(char *fn, unsigned long *data, int size)
+  {
+	int fd;
+	char buf[MAX_BUF_SIZE];
+	int ret;
+
+	if (size==1)
+		sprintf(buf, "%lx", *data);
+	else if (size==3)
+		sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]);
+	else {
+		fprintf(stderr,"write to file with wrong size!\n");
+		return -1;
+	}
+
+	fd=open(fn, O_RDWR);
+	if (!fd) {
+		perror("Error:");
+		return -1;
+	}
+	ret=write(fd, buf, sizeof(buf));
+	close(fd);
+	return ret;
+  }
+
+  int wr(char *fn, unsigned long data)
+  {
+	return wr_multi(fn, &data, 1);
+  }
+
+  int rd(char *fn, unsigned long *data)
+  {
+	int fd;
+	char buf[MAX_BUF_SIZE];
+
+	fd=open(fn, O_RDONLY);
+	if (fd<0) {
+		perror("Error:");
+		return -1;
+	}
+	read(fd, buf, MAX_BUF_SIZE);
+	*data=strtoul(buf, NULL, 16);
+	close(fd);
+	return 0;
+  }
+
+  int rd_status(char *path, int *status)
+  {
+	char fn[MAX_FN_SIZE];
+	sprintf(fn, "%s/status", path);
+	if (rd(fn, (u64*)status)<0) {
+		perror("status reading error.\n");
+		return -1;
+	}
+
+	return 0;
+  }
+
+  int rd_capabilities(char *path, u64 *capabilities)
+  {
+	char fn[MAX_FN_SIZE];
+	sprintf(fn, "%s/capabilities", path);
+	if (rd(fn, capabilities)<0) {
+		perror("capabilities reading error.\n");
+		return -1;
+	}
+
+	return 0;
+  }
+
+  int rd_all(char *path)
+  {
+	unsigned long err_type_info, err_struct_info, err_data_buffer;
+	int status;
+	unsigned long capabilities, resources;
+	char fn[MAX_FN_SIZE];
+
+	sprintf(fn, "%s/err_type_info", path);
+	if (rd(fn, &err_type_info)<0) {
+		perror("err_type_info reading error.\n");
+		return -1;
+	}
+	printf("err_type_info=%lx\n", err_type_info);
+
+	sprintf(fn, "%s/err_struct_info", path);
+	if (rd(fn, &err_struct_info)<0) {
+		perror("err_struct_info reading error.\n");
+		return -1;
+	}
+	printf("err_struct_info=%lx\n", err_struct_info);
+
+	sprintf(fn, "%s/err_data_buffer", path);
+	if (rd(fn, &err_data_buffer)<0) {
+		perror("err_data_buffer reading error.\n");
+		return -1;
+	}
+	printf("err_data_buffer=%lx\n", err_data_buffer);
+
+	sprintf(fn, "%s/status", path);
+	if (rd("status", (u64*)&status)<0) {
+		perror("status reading error.\n");
+		return -1;
+	}
+	printf("status=%d\n", status);
+
+	sprintf(fn, "%s/capabilities", path);
+	if (rd(fn,&capabilities)<0) {
+		perror("capabilities reading error.\n");
+		return -1;
+	}
+	printf("capabilities=%lx\n", capabilities);
+
+	sprintf(fn, "%s/resources", path);
+	if (rd(fn, &resources)<0) {
+		perror("resources reading error.\n");
+		return -1;
+	}
+	printf("resources=%lx\n", resources);
+
+	return 0;
+  }
+
+  int query_capabilities(char *path, err_type_info_t err_type_info,
+			u64 *capabilities)
+  {
+	char fn[MAX_FN_SIZE];
+	err_struct_info_t err_struct_info;
+	err_data_buffer_t err_data_buffer;
+
+	err_struct_info.err_struct_info=0;
+	memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8);
+
+	sprintf(fn, "%s/err_type_info", path);
+	wr(fn, err_type_info.err_type_info);
+	sprintf(fn, "%s/err_struct_info", path);
+	wr(fn, 0x0);
+	sprintf(fn, "%s/err_data_buffer", path);
+	wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
+
+	// Fire pal_mc_error_inject procedure.
+	sprintf(fn, "%s/call_start", path);
+	wr(fn, mode);
+
+	if (rd_capabilities(path, capabilities)<0)
+		return -1;
+
+	return 0;
+  }
+
+  int query_all_capabilities()
+  {
+	int status;
+	err_type_info_t err_type_info;
+	int err_sev, err_struct, struct_hier;
+	int cap=0;
+	u64 capabilities;
+	char path[MAX_FN_SIZE];
+
+	err_type_info.err_type_info=0;			// Initial
+	err_type_info.err_type_info_u.mode=0;		// Query mode;
+	err_type_info.err_type_info_u.err_inj=0;
+
+	printf("All capabilities implemented in pal_mc_error_inject:\n");
+	sprintf(path, PATH_FORMAT ,0);
+	for (err_sev=0;err_sev<3;err_sev++)
+		for (err_struct=0;err_struct<5;err_struct++)
+			for (struct_hier=0;struct_hier<5;struct_hier++)
+	{
+		status=-1;
+		capabilities=0;
+		err_type_info.err_type_info_u.err_sev=err_sev;
+		err_type_info.err_type_info_u.err_struct=err_struct;
+		err_type_info.err_type_info_u.struct_hier=struct_hier;
+
+		if (query_capabilities(path, err_type_info, &capabilities)<0)
+			continue;
+
+		if (rd_status(path, &status)<0)
+			continue;
+
+		if (status==0) {
+			cap=1;
+			printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ",
+				err_sev, err_struct, struct_hier);
+			printf("capabilities 0x%lx\n", capabilities);
+		}
+	}
+	if (!cap) {
+		printf("No capabilities supported.\n");
+		return 0;
+	}
+
+	return 0;
+  }
+
+  int err_inject(int cpu, char *path, err_type_info_t err_type_info,
+		err_struct_info_t err_struct_info,
+		err_data_buffer_t err_data_buffer)
+  {
+	int status;
+	char fn[MAX_FN_SIZE];
+
+	log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ",
+		err_type_info.err_type_info,
+		err_struct_info.err_struct_info);
+	log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n",
+		err_data_buffer.err_data_buffer[0],
+		err_data_buffer.err_data_buffer[1],
+		err_data_buffer.err_data_buffer[2]);
+	sprintf(fn, "%s/err_type_info", path);
+	wr(fn, err_type_info.err_type_info);
+	sprintf(fn, "%s/err_struct_info", path);
+	wr(fn, err_struct_info.err_struct_info);
+	sprintf(fn, "%s/err_data_buffer", path);
+	wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
+
+	// Fire pal_mc_error_inject procedure.
+	sprintf(fn, "%s/call_start", path);
+	wr(fn,mode);
+
+	if (rd_status(path, &status)<0) {
+		vbprintf("fail: read status\n");
+		return -100;
+	}
+
+	if (status!=0) {
+		log_info(cpu, "fail: status=%d\n", status);
+		return status;
+	}
+
+	return status;
+  }
+
+  static int construct_data_buf(char *path, err_type_info_t err_type_info,
+		err_struct_info_t err_struct_info,
+		err_data_buffer_t *err_data_buffer,
+		void *va1)
+  {
+	char fn[MAX_FN_SIZE];
+	u64 virt_addr=0, phys_addr=0;
+
+	vbprintf("va1=%lx\n", (u64)va1);
+	memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8);
+
+	switch (err_type_info.err_type_info_u.err_struct) {
+		case 1: // Cache
+			switch (err_struct_info.err_struct_info_cache.cl_id) {
+				case 1: //Virtual addr
+					err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1;
+					break;
+				case 2: //Phys addr
+					sprintf(fn, "%s/virtual_to_phys", path);
+					virt_addr=(u64)va1;
+					if (wr(fn,virt_addr)<0)
+						return -1;
+					rd(fn, &phys_addr);
+					err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr;
+					break;
+				default:
+					printf("Not supported cl_id\n");
+					break;
+			}
+			break;
+		case 2: //  TLB
+			break;
+		case 3: //  Register file
+			break;
+		case 4: //  Bus/system interconnect
+		default:
+			printf("Not supported err_struct\n");
+			break;
+	}
+
+	return 0;
+  }
+
+  typedef struct {
+	u64 cpu;
+	u64 loop;
+	u64 interval;
+	u64 err_type_info;
+	u64 err_struct_info;
+	u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
+  } parameters_t;
+
+  parameters_t line_para;
+  int para;
+
+  static int empty_data_buffer(u64 *err_data_buffer)
+  {
+	int empty=1;
+	int i;
+
+	for (i=0;i<ERR_DATA_BUFFER_SIZE; i++)
+	   if (err_data_buffer[i]!=-1)
+		empty=0;
+
+	return empty;
+  }
+
+  int err_inj()
+  {
+	err_type_info_t err_type_info;
+	err_struct_info_t err_struct_info;
+	err_data_buffer_t err_data_buffer;
+	int count;
+	FILE *fp;
+	unsigned long cpu, loop, interval, err_type_info_conf, err_struct_info_conf;
+	u64 err_data_buffer_conf[ERR_DATA_BUFFER_SIZE];
+	int num;
+	int i;
+	char path[MAX_FN_SIZE];
+	parameters_t parameters[MAX_TASK_NUM]={};
+	pid_t child_pid[MAX_TASK_NUM];
+	time_t current_time;
+	int status;
+
+	if (!para) {
+	    fp=fopen("err.conf", "r");
+	    if (fp==NULL) {
+		perror("Error open err.conf");
+		return -1;
+	    }
+
+	    num=0;
+	    while (!feof(fp)) {
+		char buf[256];
+		memset(buf,0,256);
+		fgets(buf, 256, fp);
+		count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
+				&cpu, &loop, &interval,&err_type_info_conf,
+				&err_struct_info_conf,
+				&err_data_buffer_conf[0],
+				&err_data_buffer_conf[1],
+				&err_data_buffer_conf[2]);
+		if (count!=PARA_FIELD_NUM+3) {
+			err_data_buffer_conf[0]=-1;
+			err_data_buffer_conf[1]=-1;
+			err_data_buffer_conf[2]=-1;
+			count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx\n",
+				&cpu, &loop, &interval,&err_type_info_conf,
+				&err_struct_info_conf);
+			if (count!=PARA_FIELD_NUM)
+				continue;
+		}
+
+		parameters[num].cpu=cpu;
+		parameters[num].loop=loop;
+		parameters[num].interval= interval>MIN_INTERVAL
+					  ?interval:MIN_INTERVAL;
+		parameters[num].err_type_info=err_type_info_conf;
+		parameters[num].err_struct_info=err_struct_info_conf;
+		memcpy(parameters[num++].err_data_buffer,
+			err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ;
+
+		if (num>=MAX_TASK_NUM)
+			break;
+	    }
+	}
+	else {
+		parameters[0].cpu=line_para.cpu;
+		parameters[0].loop=line_para.loop;
+		parameters[0].interval= line_para.interval>MIN_INTERVAL
+					  ?line_para.interval:MIN_INTERVAL;
+		parameters[0].err_type_info=line_para.err_type_info;
+		parameters[0].err_struct_info=line_para.err_struct_info;
+		memcpy(parameters[0].err_data_buffer,
+			line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ;
+
+		num=1;
+	}
+
+	/* Create semaphore: If one_lock, one semaphore for all processors.
+	   Otherwise, one semaphore for each processor. */
+	if (one_lock) {
+		if (create_sem(0)) {
+			printf("Can not create semaphore...exit\n");
+			free_sem(0);
+			return -1;
+		}
+	}
+	else {
+		for (i=0;i<num;i++) {
+		   if (create_sem(parameters[i].cpu)) {
+			printf("Can not create semaphore for cpu%d...exit\n",i);
+			free_sem(parameters[num].cpu);
+			return -1;
+		   }
+		}
+	}
+
+	/* Create a shm segment which will be used to inject/consume errors on.*/
+	if (create_shm()==-1) {
+		printf("Error to create shm...exit\n");
+		return -1;
+	}
+
+	for (i=0;i<num;i++) {
+		pid_t pid;
+
+		current_time=time(NULL);
+		log_info(parameters[i].cpu, "\nBegine at %s", ctime(&current_time));
+		log_info(parameters[i].cpu, "Configurations:\n");
+		log_info(parameters[i].cpu,"On cpu%ld: loop=%lx, interval=%lx(s)",
+			parameters[i].cpu,
+			parameters[i].loop,
+			parameters[i].interval);
+		log_info(parameters[i].cpu," err_type_info=%lx,err_struct_info=%lx\n",
+			parameters[i].err_type_info,
+			parameters[i].err_struct_info);
+
+		sprintf(path, PATH_FORMAT, (int)parameters[i].cpu);
+		err_type_info.err_type_info=parameters[i].err_type_info;
+		err_struct_info.err_struct_info=parameters[i].err_struct_info;
+		memcpy(err_data_buffer.err_data_buffer,
+			parameters[i].err_data_buffer,
+			ERR_DATA_BUFFER_SIZE*8);
+
+		pid=fork();
+		if (pid==0) {
+			unsigned long mask[MASK_SIZE];
+			int j, k;
+
+			void *va1, *va2;
+
+			/* Allocate two memory areas va1 and va2 in shm */
+			va1=shmaddr+parameters[i].cpu*PAGE_SIZE;
+			va2=shmaddr+parameters[i].cpu*PAGE_SIZE+PAGE_SIZE;
+
+			vbprintf("va1=%lx, va2=%lx\n", (u64)va1, (u64)va2);
+			memset(va1, 0x1, PAGE_SIZE);
+			memset(va2, 0x2, PAGE_SIZE);
+
+			if (empty_data_buffer(err_data_buffer.err_data_buffer))
+				/* If not specified yet, construct data buffer
+				 * with va1
+				 */
+				construct_data_buf(path, err_type_info,
+					err_struct_info, &err_data_buffer,va1);
+
+			for (j=0;j<MASK_SIZE;j++)
+				mask[j]=0;
+
+			cpu=parameters[i].cpu;
+			k = cpu%64;
+			j = cpu/64;
+			mask[j] = 1UL << k;
+
+			if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) {
+				perror("Error sched_setaffinity:");
+				return -1;
+			}
+
+			for (j=0; j<parameters[i].loop; j++) {
+				log_info(parameters[i].cpu,"Injection ");
+				log_info(parameters[i].cpu,"on cpu%ld: #%d/%ld ",
+
+					parameters[i].cpu,j+1, parameters[i].loop);
+
+				/* Hold the lock */
+				if (one_lock)
+					lock(0);
+				else
+				/* Hold lock on this cpu */
+					lock(parameters[i].cpu);
+
+				if ((status=err_inject(parameters[i].cpu,
+					   path, err_type_info,
+					   err_struct_info, err_data_buffer))
+					   ==0) {
+					/* consume the error for "inject only"*/
+					memcpy(va2, va1, PAGE_SIZE);
+					memcpy(va1, va2, PAGE_SIZE);
+					log_info(parameters[i].cpu,
+						"successful\n");
+				}
+				else {
+					log_info(parameters[i].cpu,"fail:");
+					log_info(parameters[i].cpu,
+						"status=%d\n", status);
+					unlock(parameters[i].cpu);
+					break;
+				}
+				if (one_lock)
+				/* Release the lock */
+					unlock(0);
+				/* Release lock on this cpu */
+				else
+					unlock(parameters[i].cpu);
+
+				if (j < parameters[i].loop-1)
+					sleep(parameters[i].interval);
+			}
+			current_time=time(NULL);
+			log_info(parameters[i].cpu, "Done at %s", ctime(&current_time));
+			return 0;
+		}
+		else if (pid<0) {
+			perror("Error fork:");
+			continue;
+		}
+		child_pid[i]=pid;
+	}
+	for (i=0;i<num;i++)
+		waitpid(child_pid[i], NULL, 0);
+
+	if (one_lock)
+		free_sem(0);
+	else
+		for (i=0;i<num;i++)
+			free_sem(parameters[i].cpu);
+
+	printf("All done.\n");
+
+	return 0;
+  }
+
+  void help()
+  {
+	printf("err_inject_tool:\n");
+	printf("\t-q: query all capabilities. default: off\n");
+	printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n");
+	printf("\t-i: inject errors. default: off\n");
+	printf("\t-l: one lock per cpu. default: one lock for all\n");
+	printf("\t-e: error parameters:\n");
+	printf("\t\tcpu,loop,interval,err_type_info,err_struct_info[,err_data_buffer[0],err_data_buffer[1],err_data_buffer[2]]\n");
+	printf("\t\t   cpu: logical cpu number the error will be inject in.\n");
+	printf("\t\t   loop: times the error will be injected.\n");
+	printf("\t\t   interval: In second. every so often one error is injected.\n");
+	printf("\t\t   err_type_info, err_struct_info: PAL parameters.\n");
+	printf("\t\t   err_data_buffer: PAL parameter. Optional. If not present,\n");
+	printf("\t\t                    it's constructed by tool automatically. Be\n");
+	printf("\t\t                    careful to provide err_data_buffer and make\n");
+	printf("\t\t                    sure it's working with the environment.\n");
+	printf("\t    Note:no space between error parameters.\n");
+	printf("\t    default: Take error parameters from err.conf instead of command line.\n");
+	printf("\t-v: verbose. default: off\n");
+	printf("\t-h: help\n\n");
+	printf("The tool will take err.conf file as ");
+	printf("input to inject single or multiple errors ");
+	printf("on one or multiple cpus in parallel.\n");
+  }
+
+  int main(int argc, char **argv)
+  {
+	char c;
+	int do_err_inj=0;
+	int do_query_all=0;
+	int count;
+	u32 m;
+
+	/* Default one lock for all cpu's */
+	one_lock=1;
+	while ((c = getopt(argc, argv, "m:iqvhle:")) != EOF)
+		switch (c) {
+			case 'm':	/* Procedure mode. 1: phys 2: virt */
+				count=sscanf(optarg, "%x", &m);
+				if (count!=1 || (m!=1 && m!=2)) {
+					printf("Wrong mode number.\n");
+					help();
+					return -1;
+				}
+				mode=m;
+				break;
+			case 'i':	/* Inject errors */
+				do_err_inj=1;
+				break;
+			case 'q':	/* Query */
+				do_query_all=1;
+				break;
+			case 'v':	/* Verbose */
+				verbose=1;
+				break;
+			case 'l':	/* One lock per cpu */
+				one_lock=0;
+				break;
+			case 'e':	/* error arguments */
+				/* Take parameters:
+				 * #cpu, loop, interval, err_type_info, err_struct_info[, err_data_buffer]
+				 * err_data_buffer is optional. Recommend not to specify
+				 * err_data_buffer. Better to use tool to generate it.
+				 */
+				count=sscanf(optarg,
+					"%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
+					&line_para.cpu,
+					&line_para.loop,
+					&line_para.interval,
+					&line_para.err_type_info,
+					&line_para.err_struct_info,
+					&line_para.err_data_buffer[0],
+					&line_para.err_data_buffer[1],
+					&line_para.err_data_buffer[2]);
+				if (count!=PARA_FIELD_NUM+3) {
+				    line_para.err_data_buffer[0]=-1,
+				    line_para.err_data_buffer[1]=-1,
+				    line_para.err_data_buffer[2]=-1;
+				    count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n",
+						&line_para.cpu,
+						&line_para.loop,
+						&line_para.interval,
+						&line_para.err_type_info,
+						&line_para.err_struct_info);
+				    if (count!=PARA_FIELD_NUM) {
+					printf("Wrong error arguments.\n");
+					help();
+					return -1;
+				    }
+				}
+				para=1;
+				break;
+			continue;
+				break;
+			case 'h':
+				help();
+				return 0;
+			default:
+				break;
+		}
+
+	if (do_query_all)
+		query_all_capabilities();
+	if (do_err_inj)
+		err_inj();
+
+	if (!do_query_all &&  !do_err_inj)
+		help();
+
+	return 0;
+  }
diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt
deleted file mode 100644
index 9f651c181429..000000000000
--- a/Documentation/ia64/err_inject.txt
+++ /dev/null
@@ -1,1068 +0,0 @@
-
-IPF Machine Check (MC) error inject tool
-========================================
-
-IPF Machine Check (MC) error inject tool is used to inject MC
-errors from Linux. The tool is a test bed for IPF MC work flow including
-hardware correctable error handling, OS recoverable error handling, MC
-event logging, etc.
-
-The tool includes two parts: a kernel driver and a user application
-sample. The driver provides interface to PAL to inject error
-and query error injection capabilities. The driver code is in
-arch/ia64/kernel/err_inject.c. The application sample (shown below)
-provides a combination of various errors and calls the driver's interface
-(sysfs interface) to inject errors or query error injection capabilities.
-
-The tool can be used to test Intel IPF machine MC handling capabilities.
-It's especially useful for people who can not access hardware MC injection
-tool to inject error. It's also very useful to integrate with other
-software test suits to do stressful testing on IPF.
-
-Below is a sample application as part of the whole tool. The sample
-can be used as a working test tool. Or it can be expanded to include
-more features. It also can be a integrated into a library or other user
-application to have more thorough test.
-
-The sample application takes err.conf as error configuration input. GCC
-compiles the code. After you install err_inject driver, you can run
-this sample application to inject errors.
-
-Errata: Itanium 2 Processors Specification Update lists some errata against
-the pal_mc_error_inject PAL procedure. The following err.conf has been tested
-on latest Montecito PAL.
-
-err.conf:
-
-#This is configuration file for err_inject_tool.
-#The format of the each line is:
-#cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer
-#where
-#	cpu: logical cpu number the error will be inject in.
-#	loop: times the error will be injected.
-#	interval: In second. every so often one error is injected.
-#	err_type_info, err_struct_info: PAL parameters.
-#
-#Note: All values are hex w/o or w/ 0x prefix.
-
-
-#On cpu2, inject only total 0x10 errors, interval 5 seconds
-#corrected, data cache, hier-2, physical addr(assigned by tool code).
-#working on Montecito latest PAL.
-2, 10, 5, 4101, 95
-
-#On cpu4, inject and consume total 0x10 errors, interval 5 seconds
-#corrected, data cache, hier-2, physical addr(assigned by tool code).
-#working on Montecito latest PAL.
-4, 10, 5, 4109, 95
-
-#On cpu15, inject and consume total 0x10 errors, interval 5 seconds
-#recoverable, DTR0, hier-2.
-#working on Montecito latest PAL.
-0xf, 0x10, 5, 4249, 15
-
-The sample application source code:
-
-err_injection_tool.c:
-
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Copyright (C) 2006 Intel Co
- *	Fenghua Yu <fenghua.yu@intel.com>
- *
- */
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <sched.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <errno.h>
-#include <time.h>
-#include <sys/ipc.h>
-#include <sys/sem.h>
-#include <sys/wait.h>
-#include <sys/mman.h>
-#include <sys/shm.h>
-
-#define MAX_FN_SIZE 		256
-#define MAX_BUF_SIZE 		256
-#define DATA_BUF_SIZE 		256
-#define NR_CPUS 		512
-#define MAX_TASK_NUM		2048
-#define MIN_INTERVAL		5	// seconds
-#define	ERR_DATA_BUFFER_SIZE 	3	// Three 8-byte.
-#define PARA_FIELD_NUM		5
-#define MASK_SIZE		(NR_CPUS/64)
-#define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/"
-
-int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
-
-int verbose;
-#define vbprintf if (verbose) printf
-
-int log_info(int cpu, const char *fmt, ...)
-{
-	FILE *log;
-	char fn[MAX_FN_SIZE];
-	char buf[MAX_BUF_SIZE];
-	va_list args;
-
-	sprintf(fn, "%d.log", cpu);
-	log=fopen(fn, "a+");
-	if (log==NULL) {
-		perror("Error open:");
-		return -1;
-	}
-
-	va_start(args, fmt);
-	vprintf(fmt, args);
-	memset(buf, 0, MAX_BUF_SIZE);
-	vsprintf(buf, fmt, args);
-	va_end(args);
-
-	fwrite(buf, sizeof(buf), 1, log);
-	fclose(log);
-
-	return 0;
-}
-
-typedef unsigned long u64;
-typedef unsigned int  u32;
-
-typedef union err_type_info_u {
-	struct {
-		u64	mode		: 3,	/* 0-2 */
-			err_inj		: 3,	/* 3-5 */
-			err_sev		: 2,	/* 6-7 */
-			err_struct	: 5,	/* 8-12 */
-			struct_hier	: 3,	/* 13-15 */
-			reserved	: 48;	/* 16-63 */
-	} err_type_info_u;
-	u64	err_type_info;
-} err_type_info_t;
-
-typedef union err_struct_info_u {
-	struct {
-		u64	siv		: 1,	/* 0	 */
-			c_t		: 2,	/* 1-2	 */
-			cl_p		: 3,	/* 3-5	 */
-			cl_id		: 3,	/* 6-8	 */
-			cl_dp		: 1,	/* 9	 */
-			reserved1	: 22,	/* 10-31 */
-			tiv		: 1,	/* 32	 */
-			trigger		: 4,	/* 33-36 */
-			trigger_pl 	: 3,	/* 37-39 */
-			reserved2 	: 24;	/* 40-63 */
-	} err_struct_info_cache;
-	struct {
-		u64	siv		: 1,	/* 0	 */
-			tt		: 2,	/* 1-2	 */
-			tc_tr		: 2,	/* 3-4	 */
-			tr_slot		: 8,	/* 5-12	 */
-			reserved1	: 19,	/* 13-31 */
-			tiv		: 1,	/* 32	 */
-			trigger		: 4,	/* 33-36 */
-			trigger_pl 	: 3,	/* 37-39 */
-			reserved2 	: 24;	/* 40-63 */
-	} err_struct_info_tlb;
-	struct {
-		u64	siv		: 1,	/* 0	 */
-			regfile_id	: 4,	/* 1-4	 */
-			reg_num		: 7,	/* 5-11	 */
-			reserved1	: 20,	/* 12-31 */
-			tiv		: 1,	/* 32	 */
-			trigger		: 4,	/* 33-36 */
-			trigger_pl 	: 3,	/* 37-39 */
-			reserved2 	: 24;	/* 40-63 */
-	} err_struct_info_register;
-	struct {
-		u64	reserved;
-	} err_struct_info_bus_processor_interconnect;
-	u64	err_struct_info;
-} err_struct_info_t;
-
-typedef union err_data_buffer_u {
-	struct {
-		u64	trigger_addr;		/* 0-63		*/
-		u64	inj_addr;		/* 64-127 	*/
-		u64	way		: 5,	/* 128-132	*/
-			index		: 20,	/* 133-152	*/
-					: 39;	/* 153-191	*/
-	} err_data_buffer_cache;
-	struct {
-		u64	trigger_addr;		/* 0-63		*/
-		u64	inj_addr;		/* 64-127 	*/
-		u64	way		: 5,	/* 128-132	*/
-			index		: 20,	/* 133-152	*/
-			reserved	: 39;	/* 153-191	*/
-	} err_data_buffer_tlb;
-	struct {
-		u64	trigger_addr;		/* 0-63		*/
-	} err_data_buffer_register;
-	struct {
-		u64	reserved;		/* 0-63		*/
-	} err_data_buffer_bus_processor_interconnect;
-	u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
-} err_data_buffer_t;
-
-typedef union capabilities_u {
-	struct {
-		u64	i		: 1,
-			d		: 1,
-			rv		: 1,
-			tag		: 1,
-			data		: 1,
-			mesi		: 1,
-			dp		: 1,
-			reserved1	: 3,
-			pa		: 1,
-			va		: 1,
-			wi		: 1,
-			reserved2	: 20,
-			trigger		: 1,
-			trigger_pl	: 1,
-			reserved3	: 30;
-	} capabilities_cache;
-	struct {
-		u64	d		: 1,
-			i		: 1,
-			rv		: 1,
-			tc		: 1,
-			tr		: 1,
-			reserved1	: 27,
-			trigger		: 1,
-			trigger_pl	: 1,
-			reserved2	: 30;
-	} capabilities_tlb;
-	struct {
-		u64	gr_b0		: 1,
-			gr_b1		: 1,
-			fr		: 1,
-			br		: 1,
-			pr		: 1,
-			ar		: 1,
-			cr		: 1,
-			rr		: 1,
-			pkr		: 1,
-			dbr		: 1,
-			ibr		: 1,
-			pmc		: 1,
-			pmd		: 1,
-			reserved1	: 3,
-			regnum		: 1,
-			reserved2	: 15,
-			trigger		: 1,
-			trigger_pl	: 1,
-			reserved3	: 30;
-	} capabilities_register;
-	struct {
-		u64	reserved;
-	} capabilities_bus_processor_interconnect;
-} capabilities_t;
-
-typedef struct resources_s {
-	u64	ibr0		: 1,
-		ibr2		: 1,
-		ibr4		: 1,
-		ibr6		: 1,
-		dbr0		: 1,
-		dbr2		: 1,
-		dbr4		: 1,
-		dbr6		: 1,
-		reserved	: 48;
-} resources_t;
-
-
-long get_page_size(void)
-{
-	long page_size=sysconf(_SC_PAGESIZE);
-	return page_size;
-}
-
-#define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size())
-#define SHM_SIZE (2*PAGE_SIZE*NR_CPUS)
-#define SHM_VA 0x2000000100000000
-
-int shmid;
-void *shmaddr;
-
-int create_shm(void)
-{
-	key_t key;
-	char fn[MAX_FN_SIZE];
-
-	/* cpu0 is always existing */
-	sprintf(fn, PATH_FORMAT, 0);
-	if ((key = ftok(fn, 's')) == -1) {
-		perror("ftok");
-		return -1;
-	}
-
-	shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT);
-	if (shmid == -1) {
-		if (errno==EEXIST) {
-			shmid = shmget(key, SHM_SIZE, 0);
-			if (shmid == -1) {
-				perror("shmget");
-				return -1;
-			}
-		}
-		else {
-			perror("shmget");
-			return -1;
-		}
-	}
-	vbprintf("shmid=%d", shmid);
-
-	/* connect to the segment: */
-	shmaddr = shmat(shmid, (void *)SHM_VA, 0);
-	if (shmaddr == (void*)-1) {
-		perror("shmat");
-		return -1;
-	}
-
-	memset(shmaddr, 0, SHM_SIZE);
-	mlock(shmaddr, SHM_SIZE);
-
-	return 0;
-}
-
-int free_shm()
-{
-	munlock(shmaddr, SHM_SIZE);
-        shmdt(shmaddr);
-	semctl(shmid, 0, IPC_RMID);
-
-	return 0;
-}
-
-#ifdef _SEM_SEMUN_UNDEFINED
-union semun
-{
-	int val;
-	struct semid_ds *buf;
-	unsigned short int *array;
-	struct seminfo *__buf;
-};
-#endif
-
-u32 mode=1; /* 1: physical mode; 2: virtual mode. */
-int one_lock=1;
-key_t key[NR_CPUS];
-int semid[NR_CPUS];
-
-int create_sem(int cpu)
-{
-	union semun arg;
-	char fn[MAX_FN_SIZE];
-	int sid;
-
-	sprintf(fn, PATH_FORMAT, cpu);
-	sprintf(fn, "%s/%s", fn, "err_type_info");
-	if ((key[cpu] = ftok(fn, 'e')) == -1) {
-		perror("ftok");
-		return -1;
-	}
-
-	if (semid[cpu]!=0)
-		return 0;
-
-	/* clear old semaphore */
-	if ((sid = semget(key[cpu], 1, 0)) != -1)
-		semctl(sid, 0, IPC_RMID);
-
-	/* get one semaphore */
-	if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) {
-		perror("semget");
-		printf("Please remove semaphore with key=0x%lx, then run the tool.\n",
-			(u64)key[cpu]);
-		return -1;
-	}
-
-	vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu,
-		(u64)key[cpu]);
-	/* initialize the semaphore to 1: */
-	arg.val = 1;
-	if (semctl(semid[cpu], 0, SETVAL, arg) == -1) {
-		perror("semctl");
-		return -1;
-	}
-
-	return 0;
-}
-
-static int lock(int cpu)
-{
-	struct sembuf lock;
-
-	lock.sem_num = cpu;
-	lock.sem_op = 1;
-	semop(semid[cpu], &lock, 1);
-
-        return 0;
-}
-
-static int unlock(int cpu)
-{
-	struct sembuf unlock;
-
-	unlock.sem_num = cpu;
-	unlock.sem_op = -1;
-	semop(semid[cpu], &unlock, 1);
-
-        return 0;
-}
-
-void free_sem(int cpu)
-{
-	semctl(semid[cpu], 0, IPC_RMID);
-}
-
-int wr_multi(char *fn, unsigned long *data, int size)
-{
-	int fd;
-	char buf[MAX_BUF_SIZE];
-	int ret;
-
-	if (size==1)
-		sprintf(buf, "%lx", *data);
-	else if (size==3)
-		sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]);
-	else {
-		fprintf(stderr,"write to file with wrong size!\n");
-		return -1;
-	}
-
-	fd=open(fn, O_RDWR);
-	if (!fd) {
-		perror("Error:");
-		return -1;
-	}
-	ret=write(fd, buf, sizeof(buf));
-	close(fd);
-	return ret;
-}
-
-int wr(char *fn, unsigned long data)
-{
-	return wr_multi(fn, &data, 1);
-}
-
-int rd(char *fn, unsigned long *data)
-{
-	int fd;
-	char buf[MAX_BUF_SIZE];
-
-	fd=open(fn, O_RDONLY);
-	if (fd<0) {
-		perror("Error:");
-		return -1;
-	}
-	read(fd, buf, MAX_BUF_SIZE);
-	*data=strtoul(buf, NULL, 16);
-	close(fd);
-	return 0;
-}
-
-int rd_status(char *path, int *status)
-{
-	char fn[MAX_FN_SIZE];
-	sprintf(fn, "%s/status", path);
-	if (rd(fn, (u64*)status)<0) {
-		perror("status reading error.\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-int rd_capabilities(char *path, u64 *capabilities)
-{
-	char fn[MAX_FN_SIZE];
-	sprintf(fn, "%s/capabilities", path);
-	if (rd(fn, capabilities)<0) {
-		perror("capabilities reading error.\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-int rd_all(char *path)
-{
-	unsigned long err_type_info, err_struct_info, err_data_buffer;
-	int status;
-	unsigned long capabilities, resources;
-	char fn[MAX_FN_SIZE];
-
-	sprintf(fn, "%s/err_type_info", path);
-	if (rd(fn, &err_type_info)<0) {
-		perror("err_type_info reading error.\n");
-		return -1;
-	}
-	printf("err_type_info=%lx\n", err_type_info);
-
-	sprintf(fn, "%s/err_struct_info", path);
-	if (rd(fn, &err_struct_info)<0) {
-		perror("err_struct_info reading error.\n");
-		return -1;
-	}
-	printf("err_struct_info=%lx\n", err_struct_info);
-
-	sprintf(fn, "%s/err_data_buffer", path);
-	if (rd(fn, &err_data_buffer)<0) {
-		perror("err_data_buffer reading error.\n");
-		return -1;
-	}
-	printf("err_data_buffer=%lx\n", err_data_buffer);
-
-	sprintf(fn, "%s/status", path);
-	if (rd("status", (u64*)&status)<0) {
-		perror("status reading error.\n");
-		return -1;
-	}
-	printf("status=%d\n", status);
-
-	sprintf(fn, "%s/capabilities", path);
-	if (rd(fn,&capabilities)<0) {
-		perror("capabilities reading error.\n");
-		return -1;
-	}
-	printf("capabilities=%lx\n", capabilities);
-
-	sprintf(fn, "%s/resources", path);
-	if (rd(fn, &resources)<0) {
-		perror("resources reading error.\n");
-		return -1;
-	}
-	printf("resources=%lx\n", resources);
-
-	return 0;
-}
-
-int query_capabilities(char *path, err_type_info_t err_type_info,
-			u64 *capabilities)
-{
-	char fn[MAX_FN_SIZE];
-	err_struct_info_t err_struct_info;
-	err_data_buffer_t err_data_buffer;
-
-	err_struct_info.err_struct_info=0;
-	memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8);
-
-	sprintf(fn, "%s/err_type_info", path);
-	wr(fn, err_type_info.err_type_info);
-	sprintf(fn, "%s/err_struct_info", path);
-	wr(fn, 0x0);
-	sprintf(fn, "%s/err_data_buffer", path);
-	wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
-
-	// Fire pal_mc_error_inject procedure.
-	sprintf(fn, "%s/call_start", path);
-	wr(fn, mode);
-
-	if (rd_capabilities(path, capabilities)<0)
-		return -1;
-
-	return 0;
-}
-
-int query_all_capabilities()
-{
-	int status;
-	err_type_info_t err_type_info;
-	int err_sev, err_struct, struct_hier;
-	int cap=0;
-	u64 capabilities;
-	char path[MAX_FN_SIZE];
-
-	err_type_info.err_type_info=0;			// Initial
-	err_type_info.err_type_info_u.mode=0;		// Query mode;
-	err_type_info.err_type_info_u.err_inj=0;
-
-	printf("All capabilities implemented in pal_mc_error_inject:\n");
-	sprintf(path, PATH_FORMAT ,0);
-	for (err_sev=0;err_sev<3;err_sev++)
-		for (err_struct=0;err_struct<5;err_struct++)
-			for (struct_hier=0;struct_hier<5;struct_hier++)
-	{
-		status=-1;
-		capabilities=0;
-		err_type_info.err_type_info_u.err_sev=err_sev;
-		err_type_info.err_type_info_u.err_struct=err_struct;
-		err_type_info.err_type_info_u.struct_hier=struct_hier;
-
-		if (query_capabilities(path, err_type_info, &capabilities)<0)
-			continue;
-
-		if (rd_status(path, &status)<0)
-			continue;
-
-		if (status==0) {
-			cap=1;
-			printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ",
-				err_sev, err_struct, struct_hier);
-			printf("capabilities 0x%lx\n", capabilities);
-		}
-	}
-	if (!cap) {
-		printf("No capabilities supported.\n");
-		return 0;
-	}
-
-	return 0;
-}
-
-int err_inject(int cpu, char *path, err_type_info_t err_type_info,
-		err_struct_info_t err_struct_info,
-		err_data_buffer_t err_data_buffer)
-{
-	int status;
-	char fn[MAX_FN_SIZE];
-
-	log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ",
-		err_type_info.err_type_info,
-		err_struct_info.err_struct_info);
-	log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n",
-		err_data_buffer.err_data_buffer[0],
-		err_data_buffer.err_data_buffer[1],
-		err_data_buffer.err_data_buffer[2]);
-	sprintf(fn, "%s/err_type_info", path);
-	wr(fn, err_type_info.err_type_info);
-	sprintf(fn, "%s/err_struct_info", path);
-	wr(fn, err_struct_info.err_struct_info);
-	sprintf(fn, "%s/err_data_buffer", path);
-	wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
-
-	// Fire pal_mc_error_inject procedure.
-	sprintf(fn, "%s/call_start", path);
-	wr(fn,mode);
-
-	if (rd_status(path, &status)<0) {
-		vbprintf("fail: read status\n");
-		return -100;
-	}
-
-	if (status!=0) {
-		log_info(cpu, "fail: status=%d\n", status);
-		return status;
-	}
-
-	return status;
-}
-
-static int construct_data_buf(char *path, err_type_info_t err_type_info,
-		err_struct_info_t err_struct_info,
-		err_data_buffer_t *err_data_buffer,
-		void *va1)
-{
-	char fn[MAX_FN_SIZE];
-	u64 virt_addr=0, phys_addr=0;
-
-	vbprintf("va1=%lx\n", (u64)va1);
-	memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8);
-
-	switch (err_type_info.err_type_info_u.err_struct) {
-		case 1: // Cache
-			switch (err_struct_info.err_struct_info_cache.cl_id) {
-				case 1: //Virtual addr
-					err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1;
-					break;
-				case 2: //Phys addr
-					sprintf(fn, "%s/virtual_to_phys", path);
-					virt_addr=(u64)va1;
-					if (wr(fn,virt_addr)<0)
-						return -1;
-					rd(fn, &phys_addr);
-					err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr;
-					break;
-				default:
-					printf("Not supported cl_id\n");
-					break;
-			}
-			break;
-		case 2: //  TLB
-			break;
-		case 3: //  Register file
-			break;
-		case 4: //  Bus/system interconnect
-		default:
-			printf("Not supported err_struct\n");
-			break;
-	}
-
-	return 0;
-}
-
-typedef struct {
-	u64 cpu;
-	u64 loop;
-	u64 interval;
-	u64 err_type_info;
-	u64 err_struct_info;
-	u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
-} parameters_t;
-
-parameters_t line_para;
-int para;
-
-static int empty_data_buffer(u64 *err_data_buffer)
-{
-	int empty=1;
-	int i;
-
-	for (i=0;i<ERR_DATA_BUFFER_SIZE; i++)
-	   if (err_data_buffer[i]!=-1)
-		empty=0;
-
-	return empty;
-}
-
-int err_inj()
-{
-	err_type_info_t err_type_info;
-	err_struct_info_t err_struct_info;
-	err_data_buffer_t err_data_buffer;
-	int count;
-	FILE *fp;
-	unsigned long cpu, loop, interval, err_type_info_conf, err_struct_info_conf;
-	u64 err_data_buffer_conf[ERR_DATA_BUFFER_SIZE];
-	int num;
-	int i;
-	char path[MAX_FN_SIZE];
-	parameters_t parameters[MAX_TASK_NUM]={};
-	pid_t child_pid[MAX_TASK_NUM];
-	time_t current_time;
-	int status;
-
-	if (!para) {
-	    fp=fopen("err.conf", "r");
-	    if (fp==NULL) {
-		perror("Error open err.conf");
-		return -1;
-	    }
-
-	    num=0;
-	    while (!feof(fp)) {
-		char buf[256];
-		memset(buf,0,256);
-		fgets(buf, 256, fp);
-		count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
-				&cpu, &loop, &interval,&err_type_info_conf,
-				&err_struct_info_conf,
-				&err_data_buffer_conf[0],
-				&err_data_buffer_conf[1],
-				&err_data_buffer_conf[2]);
-		if (count!=PARA_FIELD_NUM+3) {
-			err_data_buffer_conf[0]=-1;
-			err_data_buffer_conf[1]=-1;
-			err_data_buffer_conf[2]=-1;
-			count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx\n",
-				&cpu, &loop, &interval,&err_type_info_conf,
-				&err_struct_info_conf);
-			if (count!=PARA_FIELD_NUM)
-				continue;
-		}
-
-		parameters[num].cpu=cpu;
-		parameters[num].loop=loop;
-		parameters[num].interval= interval>MIN_INTERVAL
-					  ?interval:MIN_INTERVAL;
-		parameters[num].err_type_info=err_type_info_conf;
-		parameters[num].err_struct_info=err_struct_info_conf;
-		memcpy(parameters[num++].err_data_buffer,
-			err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ;
-
-		if (num>=MAX_TASK_NUM)
-			break;
-	    }
-	}
-	else {
-		parameters[0].cpu=line_para.cpu;
-		parameters[0].loop=line_para.loop;
-		parameters[0].interval= line_para.interval>MIN_INTERVAL
-					  ?line_para.interval:MIN_INTERVAL;
-		parameters[0].err_type_info=line_para.err_type_info;
-		parameters[0].err_struct_info=line_para.err_struct_info;
-		memcpy(parameters[0].err_data_buffer,
-			line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ;
-
-		num=1;
-	}
-
-	/* Create semaphore: If one_lock, one semaphore for all processors.
-	   Otherwise, one semaphore for each processor. */
-	if (one_lock) {
-		if (create_sem(0)) {
-			printf("Can not create semaphore...exit\n");
-			free_sem(0);
-			return -1;
-		}
-	}
-	else {
-		for (i=0;i<num;i++) {
-		   if (create_sem(parameters[i].cpu)) {
-			printf("Can not create semaphore for cpu%d...exit\n",i);
-			free_sem(parameters[num].cpu);
-			return -1;
-		   }
-		}
-	}
-
-	/* Create a shm segment which will be used to inject/consume errors on.*/
-	if (create_shm()==-1) {
-		printf("Error to create shm...exit\n");
-		return -1;
-	}
-
-	for (i=0;i<num;i++) {
-		pid_t pid;
-
-		current_time=time(NULL);
-		log_info(parameters[i].cpu, "\nBegine at %s", ctime(&current_time));
-		log_info(parameters[i].cpu, "Configurations:\n");
-		log_info(parameters[i].cpu,"On cpu%ld: loop=%lx, interval=%lx(s)",
-			parameters[i].cpu,
-			parameters[i].loop,
-			parameters[i].interval);
-		log_info(parameters[i].cpu," err_type_info=%lx,err_struct_info=%lx\n",
-			parameters[i].err_type_info,
-			parameters[i].err_struct_info);
-
-		sprintf(path, PATH_FORMAT, (int)parameters[i].cpu);
-		err_type_info.err_type_info=parameters[i].err_type_info;
-		err_struct_info.err_struct_info=parameters[i].err_struct_info;
-		memcpy(err_data_buffer.err_data_buffer,
-			parameters[i].err_data_buffer,
-			ERR_DATA_BUFFER_SIZE*8);
-
-		pid=fork();
-		if (pid==0) {
-			unsigned long mask[MASK_SIZE];
-			int j, k;
-
-			void *va1, *va2;
-
-			/* Allocate two memory areas va1 and va2 in shm */
-			va1=shmaddr+parameters[i].cpu*PAGE_SIZE;
-			va2=shmaddr+parameters[i].cpu*PAGE_SIZE+PAGE_SIZE;
-
-			vbprintf("va1=%lx, va2=%lx\n", (u64)va1, (u64)va2);
-			memset(va1, 0x1, PAGE_SIZE);
-			memset(va2, 0x2, PAGE_SIZE);
-
-			if (empty_data_buffer(err_data_buffer.err_data_buffer))
-				/* If not specified yet, construct data buffer
-				 * with va1
-				 */
-				construct_data_buf(path, err_type_info,
-					err_struct_info, &err_data_buffer,va1);
-
-			for (j=0;j<MASK_SIZE;j++)
-				mask[j]=0;
-
-			cpu=parameters[i].cpu;
-			k = cpu%64;
-			j = cpu/64;
-			mask[j] = 1UL << k;
-
-			if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) {
-				perror("Error sched_setaffinity:");
-				return -1;
-			}
-
-			for (j=0; j<parameters[i].loop; j++) {
-				log_info(parameters[i].cpu,"Injection ");
-				log_info(parameters[i].cpu,"on cpu%ld: #%d/%ld ",
-
-					parameters[i].cpu,j+1, parameters[i].loop);
-
-				/* Hold the lock */
-				if (one_lock)
-					lock(0);
-				else
-				/* Hold lock on this cpu */
-					lock(parameters[i].cpu);
-
-				if ((status=err_inject(parameters[i].cpu,
-					   path, err_type_info,
-					   err_struct_info, err_data_buffer))
-					   ==0) {
-					/* consume the error for "inject only"*/
-					memcpy(va2, va1, PAGE_SIZE);
-					memcpy(va1, va2, PAGE_SIZE);
-					log_info(parameters[i].cpu,
-						"successful\n");
-				}
-				else {
-					log_info(parameters[i].cpu,"fail:");
-					log_info(parameters[i].cpu,
-						"status=%d\n", status);
-					unlock(parameters[i].cpu);
-					break;
-				}
-				if (one_lock)
-				/* Release the lock */
-					unlock(0);
-				/* Release lock on this cpu */
-				else
-					unlock(parameters[i].cpu);
-
-				if (j < parameters[i].loop-1)
-					sleep(parameters[i].interval);
-			}
-			current_time=time(NULL);
-			log_info(parameters[i].cpu, "Done at %s", ctime(&current_time));
-			return 0;
-		}
-		else if (pid<0) {
-			perror("Error fork:");
-			continue;
-		}
-		child_pid[i]=pid;
-	}
-	for (i=0;i<num;i++)
-		waitpid(child_pid[i], NULL, 0);
-
-	if (one_lock)
-		free_sem(0);
-	else
-		for (i=0;i<num;i++)
-			free_sem(parameters[i].cpu);
-
-	printf("All done.\n");
-
-	return 0;
-}
-
-void help()
-{
-	printf("err_inject_tool:\n");
-	printf("\t-q: query all capabilities. default: off\n");
-	printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n");
-	printf("\t-i: inject errors. default: off\n");
-	printf("\t-l: one lock per cpu. default: one lock for all\n");
-	printf("\t-e: error parameters:\n");
-	printf("\t\tcpu,loop,interval,err_type_info,err_struct_info[,err_data_buffer[0],err_data_buffer[1],err_data_buffer[2]]\n");
-	printf("\t\t   cpu: logical cpu number the error will be inject in.\n");
-	printf("\t\t   loop: times the error will be injected.\n");
-	printf("\t\t   interval: In second. every so often one error is injected.\n");
-	printf("\t\t   err_type_info, err_struct_info: PAL parameters.\n");
-	printf("\t\t   err_data_buffer: PAL parameter. Optional. If not present,\n");
-	printf("\t\t                    it's constructed by tool automatically. Be\n");
-	printf("\t\t                    careful to provide err_data_buffer and make\n");
-	printf("\t\t                    sure it's working with the environment.\n");
-	printf("\t    Note:no space between error parameters.\n");
-	printf("\t    default: Take error parameters from err.conf instead of command line.\n");
-	printf("\t-v: verbose. default: off\n");
-	printf("\t-h: help\n\n");
-	printf("The tool will take err.conf file as ");
-	printf("input to inject single or multiple errors ");
-	printf("on one or multiple cpus in parallel.\n");
-}
-
-int main(int argc, char **argv)
-{
-	char c;
-	int do_err_inj=0;
-	int do_query_all=0;
-	int count;
-	u32 m;
-
-	/* Default one lock for all cpu's */
-	one_lock=1;
-	while ((c = getopt(argc, argv, "m:iqvhle:")) != EOF)
-		switch (c) {
-			case 'm':	/* Procedure mode. 1: phys 2: virt */
-				count=sscanf(optarg, "%x", &m);
-				if (count!=1 || (m!=1 && m!=2)) {
-					printf("Wrong mode number.\n");
-					help();
-					return -1;
-				}
-				mode=m;
-				break;
-			case 'i':	/* Inject errors */
-				do_err_inj=1;
-				break;
-			case 'q':	/* Query */
-				do_query_all=1;
-				break;
-			case 'v':	/* Verbose */
-				verbose=1;
-				break;
-			case 'l':	/* One lock per cpu */
-				one_lock=0;
-				break;
-			case 'e':	/* error arguments */
-				/* Take parameters:
-				 * #cpu, loop, interval, err_type_info, err_struct_info[, err_data_buffer]
-				 * err_data_buffer is optional. Recommend not to specify
-				 * err_data_buffer. Better to use tool to generate it.
-				 */
-				count=sscanf(optarg,
-					"%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
-					&line_para.cpu,
-					&line_para.loop,
-					&line_para.interval,
-					&line_para.err_type_info,
-					&line_para.err_struct_info,
-					&line_para.err_data_buffer[0],
-					&line_para.err_data_buffer[1],
-					&line_para.err_data_buffer[2]);
-				if (count!=PARA_FIELD_NUM+3) {
-				    line_para.err_data_buffer[0]=-1,
-				    line_para.err_data_buffer[1]=-1,
-			 	    line_para.err_data_buffer[2]=-1;
-				    count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n",
-						&line_para.cpu,
-						&line_para.loop,
-						&line_para.interval,
-						&line_para.err_type_info,
-						&line_para.err_struct_info);
-				    if (count!=PARA_FIELD_NUM) {
-					printf("Wrong error arguments.\n");
-					help();
-					return -1;
-				    }
-				}
-				para=1;
-				break;
-			continue;
-				break;
-			case 'h':
-				help();
-				return 0;
-			default:
-				break;
-		}
-
-	if (do_query_all)
-		query_all_capabilities();
-	if (do_err_inj)
-		err_inj();
-
-	if (!do_query_all &&  !do_err_inj)
-		help();
-
-	return 0;
-}
-
diff --git a/Documentation/ia64/fsys.rst b/Documentation/ia64/fsys.rst
new file mode 100644
index 000000000000..a702d2cc94b6
--- /dev/null
+++ b/Documentation/ia64/fsys.rst
@@ -0,0 +1,303 @@
+===================================
+Light-weight System Calls for IA-64
+===================================
+
+		        Started: 13-Jan-2003
+
+		    Last update: 27-Sep-2003
+
+	              David Mosberger-Tang
+		      <davidm@hpl.hp.com>
+
+Using the "epc" instruction effectively introduces a new mode of
+execution to the ia64 linux kernel.  We call this mode the
+"fsys-mode".  To recap, the normal states of execution are:
+
+  - kernel mode:
+	Both the register stack and the memory stack have been
+	switched over to kernel memory.  The user-level state is saved
+	in a pt-regs structure at the top of the kernel memory stack.
+
+  - user mode:
+	Both the register stack and the kernel stack are in
+	user memory.  The user-level state is contained in the
+	CPU registers.
+
+  - bank 0 interruption-handling mode:
+	This is the non-interruptible state which all
+	interruption-handlers start execution in.  The user-level
+	state remains in the CPU registers and some kernel state may
+	be stored in bank 0 of registers r16-r31.
+
+In contrast, fsys-mode has the following special properties:
+
+  - execution is at privilege level 0 (most-privileged)
+
+  - CPU registers may contain a mixture of user-level and kernel-level
+    state (it is the responsibility of the kernel to ensure that no
+    security-sensitive kernel-level state is leaked back to
+    user-level)
+
+  - execution is interruptible and preemptible (an fsys-mode handler
+    can disable interrupts and avoid all other interruption-sources
+    to avoid preemption)
+
+  - neither the memory-stack nor the register-stack can be trusted while
+    in fsys-mode (they point to the user-level stacks, which may
+    be invalid, or completely bogus addresses)
+
+In summary, fsys-mode is much more similar to running in user-mode
+than it is to running in kernel-mode.  Of course, given that the
+privilege level is at level 0, this means that fsys-mode requires some
+care (see below).
+
+
+How to tell fsys-mode
+=====================
+
+Linux operates in fsys-mode when (a) the privilege level is 0 (most
+privileged) and (b) the stacks have NOT been switched to kernel memory
+yet.  For convenience, the header file <asm-ia64/ptrace.h> provides
+three macros::
+
+	user_mode(regs)
+	user_stack(task,regs)
+	fsys_mode(task,regs)
+
+The "regs" argument is a pointer to a pt_regs structure.  The "task"
+argument is a pointer to the task structure to which the "regs"
+pointer belongs to.  user_mode() returns TRUE if the CPU state pointed
+to by "regs" was executing in user mode (privilege level 3).
+user_stack() returns TRUE if the state pointed to by "regs" was
+executing on the user-level stack(s).  Finally, fsys_mode() returns
+TRUE if the CPU state pointed to by "regs" was executing in fsys-mode.
+The fsys_mode() macro is equivalent to the expression::
+
+	!user_mode(regs) && user_stack(task,regs)
+
+How to write an fsyscall handler
+================================
+
+The file arch/ia64/kernel/fsys.S contains a table of fsyscall-handlers
+(fsyscall_table).  This table contains one entry for each system call.
+By default, a system call is handled by fsys_fallback_syscall().  This
+routine takes care of entering (full) kernel mode and calling the
+normal Linux system call handler.  For performance-critical system
+calls, it is possible to write a hand-tuned fsyscall_handler.  For
+example, fsys.S contains fsys_getpid(), which is a hand-tuned version
+of the getpid() system call.
+
+The entry and exit-state of an fsyscall handler is as follows:
+
+Machine state on entry to fsyscall handler
+------------------------------------------
+
+  ========= ===============================================================
+  r10	    0
+  r11	    saved ar.pfs (a user-level value)
+  r15	    system call number
+  r16	    "current" task pointer (in normal kernel-mode, this is in r13)
+  r32-r39   system call arguments
+  b6	    return address (a user-level value)
+  ar.pfs    previous frame-state (a user-level value)
+  PSR.be    cleared to zero (i.e., little-endian byte order is in effect)
+  -         all other registers may contain values passed in from user-mode
+  ========= ===============================================================
+
+Required machine state on exit to fsyscall handler
+--------------------------------------------------
+
+  ========= ===========================================================
+  r11	    saved ar.pfs (as passed into the fsyscall handler)
+  r15	    system call number (as passed into the fsyscall handler)
+  r32-r39   system call arguments (as passed into the fsyscall handler)
+  b6	    return address (as passed into the fsyscall handler)
+  ar.pfs    previous frame-state (as passed into the fsyscall handler)
+  ========= ===========================================================
+
+Fsyscall handlers can execute with very little overhead, but with that
+speed comes a set of restrictions:
+
+ * Fsyscall-handlers MUST check for any pending work in the flags
+   member of the thread-info structure and if any of the
+   TIF_ALLWORK_MASK flags are set, the handler needs to fall back on
+   doing a full system call (by calling fsys_fallback_syscall).
+
+ * Fsyscall-handlers MUST preserve incoming arguments (r32-r39, r11,
+   r15, b6, and ar.pfs) because they will be needed in case of a
+   system call restart.  Of course, all "preserved" registers also
+   must be preserved, in accordance to the normal calling conventions.
+
+ * Fsyscall-handlers MUST check argument registers for containing a
+   NaT value before using them in any way that could trigger a
+   NaT-consumption fault.  If a system call argument is found to
+   contain a NaT value, an fsyscall-handler may return immediately
+   with r8=EINVAL, r10=-1.
+
+ * Fsyscall-handlers MUST NOT use the "alloc" instruction or perform
+   any other operation that would trigger mandatory RSE
+   (register-stack engine) traffic.
+
+ * Fsyscall-handlers MUST NOT write to any stacked registers because
+   it is not safe to assume that user-level called a handler with the
+   proper number of arguments.
+
+ * Fsyscall-handlers need to be careful when accessing per-CPU variables:
+   unless proper safe-guards are taken (e.g., interruptions are avoided),
+   execution may be pre-empted and resumed on another CPU at any given
+   time.
+
+ * Fsyscall-handlers must be careful not to leak sensitive kernel'
+   information back to user-level.  In particular, before returning to
+   user-level, care needs to be taken to clear any scratch registers
+   that could contain sensitive information (note that the current
+   task pointer is not considered sensitive: it's already exposed
+   through ar.k6).
+
+ * Fsyscall-handlers MUST NOT access user-memory without first
+   validating access-permission (this can be done typically via
+   probe.r.fault and/or probe.w.fault) and without guarding against
+   memory access exceptions (this can be done with the EX() macros
+   defined by asmmacro.h).
+
+The above restrictions may seem draconian, but remember that it's
+possible to trade off some of the restrictions by paying a slightly
+higher overhead.  For example, if an fsyscall-handler could benefit
+from the shadow register bank, it could temporarily disable PSR.i and
+PSR.ic, switch to bank 0 (bsw.0) and then use the shadow registers as
+needed.  In other words, following the above rules yields extremely
+fast system call execution (while fully preserving system call
+semantics), but there is also a lot of flexibility in handling more
+complicated cases.
+
+Signal handling
+===============
+
+The delivery of (asynchronous) signals must be delayed until fsys-mode
+is exited.  This is accomplished with the help of the lower-privilege
+transfer trap: arch/ia64/kernel/process.c:do_notify_resume_user()
+checks whether the interrupted task was in fsys-mode and, if so, sets
+PSR.lp and returns immediately.  When fsys-mode is exited via the
+"br.ret" instruction that lowers the privilege level, a trap will
+occur.  The trap handler clears PSR.lp again and returns immediately.
+The kernel exit path then checks for and delivers any pending signals.
+
+PSR Handling
+============
+
+The "epc" instruction doesn't change the contents of PSR at all.  This
+is in contrast to a regular interruption, which clears almost all
+bits.  Because of that, some care needs to be taken to ensure things
+work as expected.  The following discussion describes how each PSR bit
+is handled.
+
+======= =======================================================================
+PSR.be	Cleared when entering fsys-mode.  A srlz.d instruction is used
+	to ensure the CPU is in little-endian mode before the first
+	load/store instruction is executed.  PSR.be is normally NOT
+	restored upon return from an fsys-mode handler.  In other
+	words, user-level code must not rely on PSR.be being preserved
+	across a system call.
+PSR.up	Unchanged.
+PSR.ac	Unchanged.
+PSR.mfl Unchanged.  Note: fsys-mode handlers must not write-registers!
+PSR.mfh	Unchanged.  Note: fsys-mode handlers must not write-registers!
+PSR.ic	Unchanged.  Note: fsys-mode handlers can clear the bit, if needed.
+PSR.i	Unchanged.  Note: fsys-mode handlers can clear the bit, if needed.
+PSR.pk	Unchanged.
+PSR.dt	Unchanged.
+PSR.dfl	Unchanged.  Note: fsys-mode handlers must not write-registers!
+PSR.dfh	Unchanged.  Note: fsys-mode handlers must not write-registers!
+PSR.sp	Unchanged.
+PSR.pp	Unchanged.
+PSR.di	Unchanged.
+PSR.si	Unchanged.
+PSR.db	Unchanged.  The kernel prevents user-level from setting a hardware
+	breakpoint that triggers at any privilege level other than
+	3 (user-mode).
+PSR.lp	Unchanged.
+PSR.tb	Lazy redirect.  If a taken-branch trap occurs while in
+	fsys-mode, the trap-handler modifies the saved machine state
+	such that execution resumes in the gate page at
+	syscall_via_break(), with privilege level 3.  Note: the
+	taken branch would occur on the branch invoking the
+	fsyscall-handler, at which point, by definition, a syscall
+	restart is still safe.  If the system call number is invalid,
+	the fsys-mode handler will return directly to user-level.  This
+	return will trigger a taken-branch trap, but since the trap is
+	taken _after_ restoring the privilege level, the CPU has already
+	left fsys-mode, so no special treatment is needed.
+PSR.rt	Unchanged.
+PSR.cpl	Cleared to 0.
+PSR.is	Unchanged (guaranteed to be 0 on entry to the gate page).
+PSR.mc	Unchanged.
+PSR.it	Unchanged (guaranteed to be 1).
+PSR.id	Unchanged.  Note: the ia64 linux kernel never sets this bit.
+PSR.da	Unchanged.  Note: the ia64 linux kernel never sets this bit.
+PSR.dd	Unchanged.  Note: the ia64 linux kernel never sets this bit.
+PSR.ss	Lazy redirect.  If set, "epc" will cause a Single Step Trap to
+	be taken.  The trap handler then modifies the saved machine
+	state such that execution resumes in the gate page at
+	syscall_via_break(), with privilege level 3.
+PSR.ri	Unchanged.
+PSR.ed	Unchanged.  Note: This bit could only have an effect if an fsys-mode
+	handler performed a speculative load that gets NaTted.  If so, this
+	would be the normal & expected behavior, so no special treatment is
+	needed.
+PSR.bn	Unchanged.  Note: fsys-mode handlers may clear the bit, if needed.
+	Doing so requires clearing PSR.i and PSR.ic as well.
+PSR.ia	Unchanged.  Note: the ia64 linux kernel never sets this bit.
+======= =======================================================================
+
+Using fast system calls
+=======================
+
+To use fast system calls, userspace applications need simply call
+__kernel_syscall_via_epc().  For example
+
+-- example fgettimeofday() call --
+
+-- fgettimeofday.S --
+
+::
+
+  #include <asm/asmmacro.h>
+
+  GLOBAL_ENTRY(fgettimeofday)
+  .prologue
+  .save ar.pfs, r11
+  mov r11 = ar.pfs
+  .body
+
+  mov r2 = 0xa000000000020660;;  // gate address
+			       // found by inspection of System.map for the
+			       // __kernel_syscall_via_epc() function.  See
+			       // below for how to do this for real.
+
+  mov b7 = r2
+  mov r15 = 1087		       // gettimeofday syscall
+  ;;
+  br.call.sptk.many b6 = b7
+  ;;
+
+  .restore sp
+
+  mov ar.pfs = r11
+  br.ret.sptk.many rp;;	      // return to caller
+  END(fgettimeofday)
+
+-- end fgettimeofday.S --
+
+In reality, getting the gate address is accomplished by two extra
+values passed via the ELF auxiliary vector (include/asm-ia64/elf.h)
+
+ * AT_SYSINFO : is the address of __kernel_syscall_via_epc()
+ * AT_SYSINFO_EHDR : is the address of the kernel gate ELF DSO
+
+The ELF DSO is a pre-linked library that is mapped in by the kernel at
+the gate page.  It is a proper ELF shared object so, with a dynamic
+loader that recognises the library, you should be able to make calls to
+the exported functions within it as with any other shared library.
+AT_SYSINFO points into the kernel DSO at the
+__kernel_syscall_via_epc() function for historical reasons (it was
+used before the kernel DSO) and as a convenience.
diff --git a/Documentation/ia64/fsys.txt b/Documentation/ia64/fsys.txt
deleted file mode 100644
index 59dd689d9b86..000000000000
--- a/Documentation/ia64/fsys.txt
+++ /dev/null
@@ -1,286 +0,0 @@
--*-Mode: outline-*-
-
-		Light-weight System Calls for IA-64
-		-----------------------------------
-
-		        Started: 13-Jan-2003
-		    Last update: 27-Sep-2003
-
-	              David Mosberger-Tang
-		      <davidm@hpl.hp.com>
-
-Using the "epc" instruction effectively introduces a new mode of
-execution to the ia64 linux kernel.  We call this mode the
-"fsys-mode".  To recap, the normal states of execution are:
-
-  - kernel mode:
-	Both the register stack and the memory stack have been
-	switched over to kernel memory.  The user-level state is saved
-	in a pt-regs structure at the top of the kernel memory stack.
-
-  - user mode:
-	Both the register stack and the kernel stack are in
-	user memory.  The user-level state is contained in the
-	CPU registers.
-
-  - bank 0 interruption-handling mode:
-	This is the non-interruptible state which all
-	interruption-handlers start execution in.  The user-level
-	state remains in the CPU registers and some kernel state may
-	be stored in bank 0 of registers r16-r31.
-
-In contrast, fsys-mode has the following special properties:
-
-  - execution is at privilege level 0 (most-privileged)
-
-  - CPU registers may contain a mixture of user-level and kernel-level
-    state (it is the responsibility of the kernel to ensure that no
-    security-sensitive kernel-level state is leaked back to
-    user-level)
-
-  - execution is interruptible and preemptible (an fsys-mode handler
-    can disable interrupts and avoid all other interruption-sources
-    to avoid preemption)
-
-  - neither the memory-stack nor the register-stack can be trusted while
-    in fsys-mode (they point to the user-level stacks, which may
-    be invalid, or completely bogus addresses)
-
-In summary, fsys-mode is much more similar to running in user-mode
-than it is to running in kernel-mode.  Of course, given that the
-privilege level is at level 0, this means that fsys-mode requires some
-care (see below).
-
-
-* How to tell fsys-mode
-
-Linux operates in fsys-mode when (a) the privilege level is 0 (most
-privileged) and (b) the stacks have NOT been switched to kernel memory
-yet.  For convenience, the header file <asm-ia64/ptrace.h> provides
-three macros:
-
-	user_mode(regs)
-	user_stack(task,regs)
-	fsys_mode(task,regs)
-
-The "regs" argument is a pointer to a pt_regs structure.  The "task"
-argument is a pointer to the task structure to which the "regs"
-pointer belongs to.  user_mode() returns TRUE if the CPU state pointed
-to by "regs" was executing in user mode (privilege level 3).
-user_stack() returns TRUE if the state pointed to by "regs" was
-executing on the user-level stack(s).  Finally, fsys_mode() returns
-TRUE if the CPU state pointed to by "regs" was executing in fsys-mode.
-The fsys_mode() macro is equivalent to the expression:
-
-	!user_mode(regs) && user_stack(task,regs)
-
-* How to write an fsyscall handler
-
-The file arch/ia64/kernel/fsys.S contains a table of fsyscall-handlers
-(fsyscall_table).  This table contains one entry for each system call.
-By default, a system call is handled by fsys_fallback_syscall().  This
-routine takes care of entering (full) kernel mode and calling the
-normal Linux system call handler.  For performance-critical system
-calls, it is possible to write a hand-tuned fsyscall_handler.  For
-example, fsys.S contains fsys_getpid(), which is a hand-tuned version
-of the getpid() system call.
-
-The entry and exit-state of an fsyscall handler is as follows:
-
-** Machine state on entry to fsyscall handler:
-
- - r10	  = 0
- - r11	  = saved ar.pfs (a user-level value)
- - r15	  = system call number
- - r16	  = "current" task pointer (in normal kernel-mode, this is in r13)
- - r32-r39 = system call arguments
- - b6	  = return address (a user-level value)
- - ar.pfs = previous frame-state (a user-level value)
- - PSR.be = cleared to zero (i.e., little-endian byte order is in effect)
- - all other registers may contain values passed in from user-mode
-
-** Required machine state on exit to fsyscall handler:
-
- - r11	  = saved ar.pfs (as passed into the fsyscall handler)
- - r15	  = system call number (as passed into the fsyscall handler)
- - r32-r39 = system call arguments (as passed into the fsyscall handler)
- - b6	  = return address (as passed into the fsyscall handler)
- - ar.pfs = previous frame-state (as passed into the fsyscall handler)
-
-Fsyscall handlers can execute with very little overhead, but with that
-speed comes a set of restrictions:
-
- o Fsyscall-handlers MUST check for any pending work in the flags
-   member of the thread-info structure and if any of the
-   TIF_ALLWORK_MASK flags are set, the handler needs to fall back on
-   doing a full system call (by calling fsys_fallback_syscall).
-
- o Fsyscall-handlers MUST preserve incoming arguments (r32-r39, r11,
-   r15, b6, and ar.pfs) because they will be needed in case of a
-   system call restart.  Of course, all "preserved" registers also
-   must be preserved, in accordance to the normal calling conventions.
-
- o Fsyscall-handlers MUST check argument registers for containing a
-   NaT value before using them in any way that could trigger a
-   NaT-consumption fault.  If a system call argument is found to
-   contain a NaT value, an fsyscall-handler may return immediately
-   with r8=EINVAL, r10=-1.
-
- o Fsyscall-handlers MUST NOT use the "alloc" instruction or perform
-   any other operation that would trigger mandatory RSE
-   (register-stack engine) traffic.
-
- o Fsyscall-handlers MUST NOT write to any stacked registers because
-   it is not safe to assume that user-level called a handler with the
-   proper number of arguments.
-
- o Fsyscall-handlers need to be careful when accessing per-CPU variables:
-   unless proper safe-guards are taken (e.g., interruptions are avoided),
-   execution may be pre-empted and resumed on another CPU at any given
-   time.
-
- o Fsyscall-handlers must be careful not to leak sensitive kernel'
-   information back to user-level.  In particular, before returning to
-   user-level, care needs to be taken to clear any scratch registers
-   that could contain sensitive information (note that the current
-   task pointer is not considered sensitive: it's already exposed
-   through ar.k6).
-
- o Fsyscall-handlers MUST NOT access user-memory without first
-   validating access-permission (this can be done typically via
-   probe.r.fault and/or probe.w.fault) and without guarding against
-   memory access exceptions (this can be done with the EX() macros
-   defined by asmmacro.h).
-
-The above restrictions may seem draconian, but remember that it's
-possible to trade off some of the restrictions by paying a slightly
-higher overhead.  For example, if an fsyscall-handler could benefit
-from the shadow register bank, it could temporarily disable PSR.i and
-PSR.ic, switch to bank 0 (bsw.0) and then use the shadow registers as
-needed.  In other words, following the above rules yields extremely
-fast system call execution (while fully preserving system call
-semantics), but there is also a lot of flexibility in handling more
-complicated cases.
-
-* Signal handling
-
-The delivery of (asynchronous) signals must be delayed until fsys-mode
-is exited.  This is accomplished with the help of the lower-privilege
-transfer trap: arch/ia64/kernel/process.c:do_notify_resume_user()
-checks whether the interrupted task was in fsys-mode and, if so, sets
-PSR.lp and returns immediately.  When fsys-mode is exited via the
-"br.ret" instruction that lowers the privilege level, a trap will
-occur.  The trap handler clears PSR.lp again and returns immediately.
-The kernel exit path then checks for and delivers any pending signals.
-
-* PSR Handling
-
-The "epc" instruction doesn't change the contents of PSR at all.  This
-is in contrast to a regular interruption, which clears almost all
-bits.  Because of that, some care needs to be taken to ensure things
-work as expected.  The following discussion describes how each PSR bit
-is handled.
-
-PSR.be	Cleared when entering fsys-mode.  A srlz.d instruction is used
-	to ensure the CPU is in little-endian mode before the first
-	load/store instruction is executed.  PSR.be is normally NOT
-	restored upon return from an fsys-mode handler.  In other
-	words, user-level code must not rely on PSR.be being preserved
-	across a system call.
-PSR.up	Unchanged.
-PSR.ac	Unchanged.
-PSR.mfl Unchanged.  Note: fsys-mode handlers must not write-registers!
-PSR.mfh	Unchanged.  Note: fsys-mode handlers must not write-registers!
-PSR.ic	Unchanged.  Note: fsys-mode handlers can clear the bit, if needed.
-PSR.i	Unchanged.  Note: fsys-mode handlers can clear the bit, if needed.
-PSR.pk	Unchanged.
-PSR.dt	Unchanged.
-PSR.dfl	Unchanged.  Note: fsys-mode handlers must not write-registers!
-PSR.dfh	Unchanged.  Note: fsys-mode handlers must not write-registers!
-PSR.sp	Unchanged.
-PSR.pp	Unchanged.
-PSR.di	Unchanged.
-PSR.si	Unchanged.
-PSR.db	Unchanged.  The kernel prevents user-level from setting a hardware
-	breakpoint that triggers at any privilege level other than 3 (user-mode).
-PSR.lp	Unchanged.
-PSR.tb	Lazy redirect.  If a taken-branch trap occurs while in
-	fsys-mode, the trap-handler modifies the saved machine state
-	such that execution resumes in the gate page at
-	syscall_via_break(), with privilege level 3.  Note: the
-	taken branch would occur on the branch invoking the
-	fsyscall-handler, at which point, by definition, a syscall
-	restart is still safe.  If the system call number is invalid,
-	the fsys-mode handler will return directly to user-level.  This
-	return will trigger a taken-branch trap, but since the trap is
-	taken _after_ restoring the privilege level, the CPU has already
-	left fsys-mode, so no special treatment is needed.
-PSR.rt	Unchanged.
-PSR.cpl	Cleared to 0.
-PSR.is	Unchanged (guaranteed to be 0 on entry to the gate page).
-PSR.mc	Unchanged.
-PSR.it	Unchanged (guaranteed to be 1).
-PSR.id	Unchanged.  Note: the ia64 linux kernel never sets this bit.
-PSR.da	Unchanged.  Note: the ia64 linux kernel never sets this bit.
-PSR.dd	Unchanged.  Note: the ia64 linux kernel never sets this bit.
-PSR.ss	Lazy redirect.  If set, "epc" will cause a Single Step Trap to
-	be taken.  The trap handler then modifies the saved machine
-	state such that execution resumes in the gate page at
-	syscall_via_break(), with privilege level 3.
-PSR.ri	Unchanged.
-PSR.ed	Unchanged.  Note: This bit could only have an effect if an fsys-mode
-	handler performed a speculative load that gets NaTted.  If so, this
-	would be the normal & expected behavior, so no special treatment is
-	needed.
-PSR.bn	Unchanged.  Note: fsys-mode handlers may clear the bit, if needed.
-	Doing so requires clearing PSR.i and PSR.ic as well.
-PSR.ia	Unchanged.  Note: the ia64 linux kernel never sets this bit.
-
-* Using fast system calls
-
-To use fast system calls, userspace applications need simply call
-__kernel_syscall_via_epc().  For example
-
--- example fgettimeofday() call --
--- fgettimeofday.S --
-
-#include <asm/asmmacro.h>
-
-GLOBAL_ENTRY(fgettimeofday)
-.prologue
-.save ar.pfs, r11
-mov r11 = ar.pfs
-.body 
-
-mov r2 = 0xa000000000020660;;  // gate address 
-			       // found by inspection of System.map for the 
-			       // __kernel_syscall_via_epc() function.  See
-			       // below for how to do this for real.
-
-mov b7 = r2
-mov r15 = 1087		       // gettimeofday syscall
-;;
-br.call.sptk.many b6 = b7
-;;
-
-.restore sp
-
-mov ar.pfs = r11
-br.ret.sptk.many rp;;	      // return to caller
-END(fgettimeofday)
-
--- end fgettimeofday.S --
-
-In reality, getting the gate address is accomplished by two extra
-values passed via the ELF auxiliary vector (include/asm-ia64/elf.h)
-
- o AT_SYSINFO : is the address of __kernel_syscall_via_epc()
- o AT_SYSINFO_EHDR : is the address of the kernel gate ELF DSO
-
-The ELF DSO is a pre-linked library that is mapped in by the kernel at
-the gate page.  It is a proper ELF shared object so, with a dynamic
-loader that recognises the library, you should be able to make calls to
-the exported functions within it as with any other shared library.
-AT_SYSINFO points into the kernel DSO at the
-__kernel_syscall_via_epc() function for historical reasons (it was
-used before the kernel DSO) and as a convenience.
diff --git a/Documentation/ia64/ia64.rst b/Documentation/ia64/ia64.rst
new file mode 100644
index 000000000000..b725019a9492
--- /dev/null
+++ b/Documentation/ia64/ia64.rst
@@ -0,0 +1,49 @@
+===========================================
+Linux kernel release for the IA-64 Platform
+===========================================
+
+   These are the release notes for Linux since version 2.4 for IA-64
+   platform.  This document provides information specific to IA-64
+   ONLY, to get additional information about the Linux kernel also
+   read the original Linux README provided with the kernel.
+
+Installing the Kernel
+=====================
+
+ - IA-64 kernel installation is the same as the other platforms, see
+   original README for details.
+
+
+Software Requirements
+=====================
+
+   Compiling and running this kernel requires an IA-64 compliant GCC
+   compiler.  And various software packages also compiled with an
+   IA-64 compliant GCC compiler.
+
+
+Configuring the Kernel
+======================
+
+   Configuration is the same, see original README for details.
+
+
+Compiling the Kernel:
+
+ - Compiling this kernel doesn't differ from other platform so read
+   the original README for details BUT make sure you have an IA-64
+   compliant GCC compiler.
+
+IA-64 Specifics
+===============
+
+ - General issues:
+
+    * Hardly any performance tuning has been done. Obvious targets
+      include the library routines (IP checksum, etc.). Less
+      obvious targets include making sure we don't flush the TLB
+      needlessly, etc.
+
+    * SMP locks cleanup/optimization
+
+    * IA32 support.  Currently experimental.  It mostly works.
diff --git a/Documentation/ia64/index.rst b/Documentation/ia64/index.rst
new file mode 100644
index 000000000000..a3e3052ad6e2
--- /dev/null
+++ b/Documentation/ia64/index.rst
@@ -0,0 +1,18 @@
+:orphan:
+
+==================
+IA-64 Architecture
+==================
+
+.. toctree::
+   :maxdepth: 1
+
+   ia64
+   aliasing
+   efirtc
+   err_inject
+   fsys
+   irq-redir
+   mca
+   serial
+   xen
diff --git a/Documentation/ia64/irq-redir.rst b/Documentation/ia64/irq-redir.rst
new file mode 100644
index 000000000000..39bf94484a15
--- /dev/null
+++ b/Documentation/ia64/irq-redir.rst
@@ -0,0 +1,80 @@
+==============================
+IRQ affinity on IA64 platforms
+==============================
+
+07.01.2002, Erich Focht <efocht@ess.nec.de>
+
+
+By writing to /proc/irq/IRQ#/smp_affinity the interrupt routing can be
+controlled. The behavior on IA64 platforms is slightly different from
+that described in Documentation/IRQ-affinity.txt for i386 systems.
+
+Because of the usage of SAPIC mode and physical destination mode the
+IRQ target is one particular CPU and cannot be a mask of several
+CPUs. Only the first non-zero bit is taken into account.
+
+
+Usage examples
+==============
+
+The target CPU has to be specified as a hexadecimal CPU mask. The
+first non-zero bit is the selected CPU. This format has been kept for
+compatibility reasons with i386.
+
+Set the delivery mode of interrupt 41 to fixed and route the
+interrupts to CPU #3 (logical CPU number) (2^3=0x08)::
+
+     echo "8" >/proc/irq/41/smp_affinity
+
+Set the default route for IRQ number 41 to CPU 6 in lowest priority
+delivery mode (redirectable)::
+
+     echo "r 40" >/proc/irq/41/smp_affinity
+
+The output of the command::
+
+     cat /proc/irq/IRQ#/smp_affinity
+
+gives the target CPU mask for the specified interrupt vector. If the CPU
+mask is preceded by the character "r", the interrupt is redirectable
+(i.e. lowest priority mode routing is used), otherwise its route is
+fixed.
+
+
+
+Initialization and default behavior
+===================================
+
+If the platform features IRQ redirection (info provided by SAL) all
+IO-SAPIC interrupts are initialized with CPU#0 as their default target
+and the routing is the so called "lowest priority mode" (actually
+fixed SAPIC mode with hint). The XTP chipset registers are used as hints
+for the IRQ routing. Currently in Linux XTP registers can have three
+values:
+
+	- minimal for an idle task,
+	- normal if any other task runs,
+	- maximal if the CPU is going to be switched off.
+
+The IRQ is routed to the CPU with lowest XTP register value, the
+search begins at the default CPU. Therefore most of the interrupts
+will be handled by CPU #0.
+
+If the platform doesn't feature interrupt redirection IOSAPIC fixed
+routing is used. The target CPUs are distributed in a round robin
+manner. IRQs will be routed only to the selected target CPUs. Check
+with::
+
+        cat /proc/interrupts
+
+
+
+Comments
+========
+
+On large (multi-node) systems it is recommended to route the IRQs to
+the node to which the corresponding device is connected.
+For systems like the NEC AzusA we get IRQ node-affinity for free. This
+is because usually the chipsets on each node redirect the interrupts
+only to their own CPUs (as they cannot see the XTP registers on the
+other nodes).
diff --git a/Documentation/ia64/mca.rst b/Documentation/ia64/mca.rst
new file mode 100644
index 000000000000..08270bba44a4
--- /dev/null
+++ b/Documentation/ia64/mca.rst
@@ -0,0 +1,198 @@
+=============================================================
+An ad-hoc collection of notes on IA64 MCA and INIT processing
+=============================================================
+
+Feel free to update it with notes about any area that is not clear.
+
+---
+
+MCA/INIT are completely asynchronous.  They can occur at any time, when
+the OS is in any state.  Including when one of the cpus is already
+holding a spinlock.  Trying to get any lock from MCA/INIT state is
+asking for deadlock.  Also the state of structures that are protected
+by locks is indeterminate, including linked lists.
+
+---
+
+The complicated ia64 MCA process.  All of this is mandated by Intel's
+specification for ia64 SAL, error recovery and unwind, it is not as
+if we have a choice here.
+
+* MCA occurs on one cpu, usually due to a double bit memory error.
+  This is the monarch cpu.
+
+* SAL sends an MCA rendezvous interrupt (which is a normal interrupt)
+  to all the other cpus, the slaves.
+
+* Slave cpus that receive the MCA interrupt call down into SAL, they
+  end up spinning disabled while the MCA is being serviced.
+
+* If any slave cpu was already spinning disabled when the MCA occurred
+  then it cannot service the MCA interrupt.  SAL waits ~20 seconds then
+  sends an unmaskable INIT event to the slave cpus that have not
+  already rendezvoused.
+
+* Because MCA/INIT can be delivered at any time, including when the cpu
+  is down in PAL in physical mode, the registers at the time of the
+  event are _completely_ undefined.  In particular the MCA/INIT
+  handlers cannot rely on the thread pointer, PAL physical mode can
+  (and does) modify TP.  It is allowed to do that as long as it resets
+  TP on return.  However MCA/INIT events expose us to these PAL
+  internal TP changes.  Hence curr_task().
+
+* If an MCA/INIT event occurs while the kernel was running (not user
+  space) and the kernel has called PAL then the MCA/INIT handler cannot
+  assume that the kernel stack is in a fit state to be used.  Mainly
+  because PAL may or may not maintain the stack pointer internally.
+  Because the MCA/INIT handlers cannot trust the kernel stack, they
+  have to use their own, per-cpu stacks.  The MCA/INIT stacks are
+  preformatted with just enough task state to let the relevant handlers
+  do their job.
+
+* Unlike most other architectures, the ia64 struct task is embedded in
+  the kernel stack[1].  So switching to a new kernel stack means that
+  we switch to a new task as well.  Because various bits of the kernel
+  assume that current points into the struct task, switching to a new
+  stack also means a new value for current.
+
+* Once all slaves have rendezvoused and are spinning disabled, the
+  monarch is entered.  The monarch now tries to diagnose the problem
+  and decide if it can recover or not.
+
+* Part of the monarch's job is to look at the state of all the other
+  tasks.  The only way to do that on ia64 is to call the unwinder,
+  as mandated by Intel.
+
+* The starting point for the unwind depends on whether a task is
+  running or not.  That is, whether it is on a cpu or is blocked.  The
+  monarch has to determine whether or not a task is on a cpu before it
+  knows how to start unwinding it.  The tasks that received an MCA or
+  INIT event are no longer running, they have been converted to blocked
+  tasks.  But (and its a big but), the cpus that received the MCA
+  rendezvous interrupt are still running on their normal kernel stacks!
+
+* To distinguish between these two cases, the monarch must know which
+  tasks are on a cpu and which are not.  Hence each slave cpu that
+  switches to an MCA/INIT stack, registers its new stack using
+  set_curr_task(), so the monarch can tell that the _original_ task is
+  no longer running on that cpu.  That gives us a decent chance of
+  getting a valid backtrace of the _original_ task.
+
+* MCA/INIT can be nested, to a depth of 2 on any cpu.  In the case of a
+  nested error, we want diagnostics on the MCA/INIT handler that
+  failed, not on the task that was originally running.  Again this
+  requires set_curr_task() so the MCA/INIT handlers can register their
+  own stack as running on that cpu.  Then a recursive error gets a
+  trace of the failing handler's "task".
+
+[1]
+    My (Keith Owens) original design called for ia64 to separate its
+    struct task and the kernel stacks.  Then the MCA/INIT data would be
+    chained stacks like i386 interrupt stacks.  But that required
+    radical surgery on the rest of ia64, plus extra hard wired TLB
+    entries with its associated performance degradation.  David
+    Mosberger vetoed that approach.  Which meant that separate kernel
+    stacks meant separate "tasks" for the MCA/INIT handlers.
+
+---
+
+INIT is less complicated than MCA.  Pressing the nmi button or using
+the equivalent command on the management console sends INIT to all
+cpus.  SAL picks one of the cpus as the monarch and the rest are
+slaves.  All the OS INIT handlers are entered at approximately the same
+time.  The OS monarch prints the state of all tasks and returns, after
+which the slaves return and the system resumes.
+
+At least that is what is supposed to happen.  Alas there are broken
+versions of SAL out there.  Some drive all the cpus as monarchs.  Some
+drive them all as slaves.  Some drive one cpu as monarch, wait for that
+cpu to return from the OS then drive the rest as slaves.  Some versions
+of SAL cannot even cope with returning from the OS, they spin inside
+SAL on resume.  The OS INIT code has workarounds for some of these
+broken SAL symptoms, but some simply cannot be fixed from the OS side.
+
+---
+
+The scheduler hooks used by ia64 (curr_task, set_curr_task) are layer
+violations.  Unfortunately MCA/INIT start off as massive layer
+violations (can occur at _any_ time) and they build from there.
+
+At least ia64 makes an attempt at recovering from hardware errors, but
+it is a difficult problem because of the asynchronous nature of these
+errors.  When processing an unmaskable interrupt we sometimes need
+special code to cope with our inability to take any locks.
+
+---
+
+How is ia64 MCA/INIT different from x86 NMI?
+
+* x86 NMI typically gets delivered to one cpu.  MCA/INIT gets sent to
+  all cpus.
+
+* x86 NMI cannot be nested.  MCA/INIT can be nested, to a depth of 2
+  per cpu.
+
+* x86 has a separate struct task which points to one of multiple kernel
+  stacks.  ia64 has the struct task embedded in the single kernel
+  stack, so switching stack means switching task.
+
+* x86 does not call the BIOS so the NMI handler does not have to worry
+  about any registers having changed.  MCA/INIT can occur while the cpu
+  is in PAL in physical mode, with undefined registers and an undefined
+  kernel stack.
+
+* i386 backtrace is not very sensitive to whether a process is running
+  or not.  ia64 unwind is very, very sensitive to whether a process is
+  running or not.
+
+---
+
+What happens when MCA/INIT is delivered what a cpu is running user
+space code?
+
+The user mode registers are stored in the RSE area of the MCA/INIT on
+entry to the OS and are restored from there on return to SAL, so user
+mode registers are preserved across a recoverable MCA/INIT.  Since the
+OS has no idea what unwind data is available for the user space stack,
+MCA/INIT never tries to backtrace user space.  Which means that the OS
+does not bother making the user space process look like a blocked task,
+i.e. the OS does not copy pt_regs and switch_stack to the user space
+stack.  Also the OS has no idea how big the user space RSE and memory
+stacks are, which makes it too risky to copy the saved state to a user
+mode stack.
+
+---
+
+How do we get a backtrace on the tasks that were running when MCA/INIT
+was delivered?
+
+mca.c:::ia64_mca_modify_original_stack().  That identifies and
+verifies the original kernel stack, copies the dirty registers from
+the MCA/INIT stack's RSE to the original stack's RSE, copies the
+skeleton struct pt_regs and switch_stack to the original stack, fills
+in the skeleton structures from the PAL minstate area and updates the
+original stack's thread.ksp.  That makes the original stack look
+exactly like any other blocked task, i.e. it now appears to be
+sleeping.  To get a backtrace, just start with thread.ksp for the
+original task and unwind like any other sleeping task.
+
+---
+
+How do we identify the tasks that were running when MCA/INIT was
+delivered?
+
+If the previous task has been verified and converted to a blocked
+state, then sos->prev_task on the MCA/INIT stack is updated to point to
+the previous task.  You can look at that field in dumps or debuggers.
+To help distinguish between the handler and the original tasks,
+handlers have _TIF_MCA_INIT set in thread_info.flags.
+
+The sos data is always in the MCA/INIT handler stack, at offset
+MCA_SOS_OFFSET.  You can get that value from mca_asm.h or calculate it
+as KERNEL_STACK_SIZE - sizeof(struct pt_regs) - sizeof(struct
+ia64_sal_os_state), with 16 byte alignment for all structures.
+
+Also the comm field of the MCA/INIT task is modified to include the pid
+of the original task, for humans to use.  For example, a comm field of
+'MCA 12159' means that pid 12159 was running when the MCA was
+delivered.
diff --git a/Documentation/ia64/mca.txt b/Documentation/ia64/mca.txt
deleted file mode 100644
index f097c60cba1b..000000000000
--- a/Documentation/ia64/mca.txt
+++ /dev/null
@@ -1,194 +0,0 @@
-An ad-hoc collection of notes on IA64 MCA and INIT processing.  Feel
-free to update it with notes about any area that is not clear.
-
----
-
-MCA/INIT are completely asynchronous.  They can occur at any time, when
-the OS is in any state.  Including when one of the cpus is already
-holding a spinlock.  Trying to get any lock from MCA/INIT state is
-asking for deadlock.  Also the state of structures that are protected
-by locks is indeterminate, including linked lists.
-
----
-
-The complicated ia64 MCA process.  All of this is mandated by Intel's
-specification for ia64 SAL, error recovery and unwind, it is not as
-if we have a choice here.
-
-* MCA occurs on one cpu, usually due to a double bit memory error.
-  This is the monarch cpu.
-
-* SAL sends an MCA rendezvous interrupt (which is a normal interrupt)
-  to all the other cpus, the slaves.
-
-* Slave cpus that receive the MCA interrupt call down into SAL, they
-  end up spinning disabled while the MCA is being serviced.
-
-* If any slave cpu was already spinning disabled when the MCA occurred
-  then it cannot service the MCA interrupt.  SAL waits ~20 seconds then
-  sends an unmaskable INIT event to the slave cpus that have not
-  already rendezvoused.
-
-* Because MCA/INIT can be delivered at any time, including when the cpu
-  is down in PAL in physical mode, the registers at the time of the
-  event are _completely_ undefined.  In particular the MCA/INIT
-  handlers cannot rely on the thread pointer, PAL physical mode can
-  (and does) modify TP.  It is allowed to do that as long as it resets
-  TP on return.  However MCA/INIT events expose us to these PAL
-  internal TP changes.  Hence curr_task().
-
-* If an MCA/INIT event occurs while the kernel was running (not user
-  space) and the kernel has called PAL then the MCA/INIT handler cannot
-  assume that the kernel stack is in a fit state to be used.  Mainly
-  because PAL may or may not maintain the stack pointer internally.
-  Because the MCA/INIT handlers cannot trust the kernel stack, they
-  have to use their own, per-cpu stacks.  The MCA/INIT stacks are
-  preformatted with just enough task state to let the relevant handlers
-  do their job.
-
-* Unlike most other architectures, the ia64 struct task is embedded in
-  the kernel stack[1].  So switching to a new kernel stack means that
-  we switch to a new task as well.  Because various bits of the kernel
-  assume that current points into the struct task, switching to a new
-  stack also means a new value for current.
-
-* Once all slaves have rendezvoused and are spinning disabled, the
-  monarch is entered.  The monarch now tries to diagnose the problem
-  and decide if it can recover or not.
-
-* Part of the monarch's job is to look at the state of all the other
-  tasks.  The only way to do that on ia64 is to call the unwinder,
-  as mandated by Intel.
-
-* The starting point for the unwind depends on whether a task is
-  running or not.  That is, whether it is on a cpu or is blocked.  The
-  monarch has to determine whether or not a task is on a cpu before it
-  knows how to start unwinding it.  The tasks that received an MCA or
-  INIT event are no longer running, they have been converted to blocked
-  tasks.  But (and its a big but), the cpus that received the MCA
-  rendezvous interrupt are still running on their normal kernel stacks!
-
-* To distinguish between these two cases, the monarch must know which
-  tasks are on a cpu and which are not.  Hence each slave cpu that
-  switches to an MCA/INIT stack, registers its new stack using
-  set_curr_task(), so the monarch can tell that the _original_ task is
-  no longer running on that cpu.  That gives us a decent chance of
-  getting a valid backtrace of the _original_ task.
-
-* MCA/INIT can be nested, to a depth of 2 on any cpu.  In the case of a
-  nested error, we want diagnostics on the MCA/INIT handler that
-  failed, not on the task that was originally running.  Again this
-  requires set_curr_task() so the MCA/INIT handlers can register their
-  own stack as running on that cpu.  Then a recursive error gets a
-  trace of the failing handler's "task".
-
-[1] My (Keith Owens) original design called for ia64 to separate its
-    struct task and the kernel stacks.  Then the MCA/INIT data would be
-    chained stacks like i386 interrupt stacks.  But that required
-    radical surgery on the rest of ia64, plus extra hard wired TLB
-    entries with its associated performance degradation.  David
-    Mosberger vetoed that approach.  Which meant that separate kernel
-    stacks meant separate "tasks" for the MCA/INIT handlers.
-
----
-
-INIT is less complicated than MCA.  Pressing the nmi button or using
-the equivalent command on the management console sends INIT to all
-cpus.  SAL picks one of the cpus as the monarch and the rest are
-slaves.  All the OS INIT handlers are entered at approximately the same
-time.  The OS monarch prints the state of all tasks and returns, after
-which the slaves return and the system resumes.
-
-At least that is what is supposed to happen.  Alas there are broken
-versions of SAL out there.  Some drive all the cpus as monarchs.  Some
-drive them all as slaves.  Some drive one cpu as monarch, wait for that
-cpu to return from the OS then drive the rest as slaves.  Some versions
-of SAL cannot even cope with returning from the OS, they spin inside
-SAL on resume.  The OS INIT code has workarounds for some of these
-broken SAL symptoms, but some simply cannot be fixed from the OS side.
-
----
-
-The scheduler hooks used by ia64 (curr_task, set_curr_task) are layer
-violations.  Unfortunately MCA/INIT start off as massive layer
-violations (can occur at _any_ time) and they build from there.
-
-At least ia64 makes an attempt at recovering from hardware errors, but
-it is a difficult problem because of the asynchronous nature of these
-errors.  When processing an unmaskable interrupt we sometimes need
-special code to cope with our inability to take any locks.
-
----
-
-How is ia64 MCA/INIT different from x86 NMI?
-
-* x86 NMI typically gets delivered to one cpu.  MCA/INIT gets sent to
-  all cpus.
-
-* x86 NMI cannot be nested.  MCA/INIT can be nested, to a depth of 2
-  per cpu.
-
-* x86 has a separate struct task which points to one of multiple kernel
-  stacks.  ia64 has the struct task embedded in the single kernel
-  stack, so switching stack means switching task.
-
-* x86 does not call the BIOS so the NMI handler does not have to worry
-  about any registers having changed.  MCA/INIT can occur while the cpu
-  is in PAL in physical mode, with undefined registers and an undefined
-  kernel stack.
-
-* i386 backtrace is not very sensitive to whether a process is running
-  or not.  ia64 unwind is very, very sensitive to whether a process is
-  running or not.
-
----
-
-What happens when MCA/INIT is delivered what a cpu is running user
-space code?
-
-The user mode registers are stored in the RSE area of the MCA/INIT on
-entry to the OS and are restored from there on return to SAL, so user
-mode registers are preserved across a recoverable MCA/INIT.  Since the
-OS has no idea what unwind data is available for the user space stack,
-MCA/INIT never tries to backtrace user space.  Which means that the OS
-does not bother making the user space process look like a blocked task,
-i.e. the OS does not copy pt_regs and switch_stack to the user space
-stack.  Also the OS has no idea how big the user space RSE and memory
-stacks are, which makes it too risky to copy the saved state to a user
-mode stack.
-
----
-
-How do we get a backtrace on the tasks that were running when MCA/INIT
-was delivered?
-
-mca.c:::ia64_mca_modify_original_stack().  That identifies and
-verifies the original kernel stack, copies the dirty registers from
-the MCA/INIT stack's RSE to the original stack's RSE, copies the
-skeleton struct pt_regs and switch_stack to the original stack, fills
-in the skeleton structures from the PAL minstate area and updates the
-original stack's thread.ksp.  That makes the original stack look
-exactly like any other blocked task, i.e. it now appears to be
-sleeping.  To get a backtrace, just start with thread.ksp for the
-original task and unwind like any other sleeping task.
-
----
-
-How do we identify the tasks that were running when MCA/INIT was
-delivered?
-
-If the previous task has been verified and converted to a blocked
-state, then sos->prev_task on the MCA/INIT stack is updated to point to
-the previous task.  You can look at that field in dumps or debuggers.
-To help distinguish between the handler and the original tasks,
-handlers have _TIF_MCA_INIT set in thread_info.flags.
-
-The sos data is always in the MCA/INIT handler stack, at offset
-MCA_SOS_OFFSET.  You can get that value from mca_asm.h or calculate it
-as KERNEL_STACK_SIZE - sizeof(struct pt_regs) - sizeof(struct
-ia64_sal_os_state), with 16 byte alignment for all structures.
-
-Also the comm field of the MCA/INIT task is modified to include the pid
-of the original task, for humans to use.  For example, a comm field of
-'MCA 12159' means that pid 12159 was running when the MCA was
-delivered.
diff --git a/Documentation/ia64/serial.rst b/Documentation/ia64/serial.rst
new file mode 100644
index 000000000000..1de70c305a79
--- /dev/null
+++ b/Documentation/ia64/serial.rst
@@ -0,0 +1,165 @@
+==============
+Serial Devices
+==============
+
+Serial Device Naming
+====================
+
+    As of 2.6.10, serial devices on ia64 are named based on the
+    order of ACPI and PCI enumeration.  The first device in the
+    ACPI namespace (if any) becomes /dev/ttyS0, the second becomes
+    /dev/ttyS1, etc., and PCI devices are named sequentially
+    starting after the ACPI devices.
+
+    Prior to 2.6.10, there were confusing exceptions to this:
+
+	- Firmware on some machines (mostly from HP) provides an HCDP
+	  table[1] that tells the kernel about devices that can be used
+	  as a serial console.  If the user specified "console=ttyS0"
+	  or the EFI ConOut path contained only UART devices, the
+	  kernel registered the device described by the HCDP as
+	  /dev/ttyS0.
+
+	- If there was no HCDP, we assumed there were UARTs at the
+	  legacy COM port addresses (I/O ports 0x3f8 and 0x2f8), so
+	  the kernel registered those as /dev/ttyS0 and /dev/ttyS1.
+
+    Any additional ACPI or PCI devices were registered sequentially
+    after /dev/ttyS0 as they were discovered.
+
+    With an HCDP, device names changed depending on EFI configuration
+    and "console=" arguments.  Without an HCDP, device names didn't
+    change, but we registered devices that might not really exist.
+
+    For example, an HP rx1600 with a single built-in serial port
+    (described in the ACPI namespace) plus an MP[2] (a PCI device) has
+    these ports:
+
+      ==========  ==========     ============    ============   =======
+      Type        MMIO           pre-2.6.10      pre-2.6.10     2.6.10+
+		  address
+				 (EFI console    (EFI console
+                                 on builtin)     on MP port)
+      ==========  ==========     ============    ============   =======
+      builtin     0xff5e0000        ttyS0           ttyS1         ttyS0
+      MP UPS      0xf8031000        ttyS1           ttyS2         ttyS1
+      MP Console  0xf8030000        ttyS2           ttyS0         ttyS2
+      MP 2        0xf8030010        ttyS3           ttyS3         ttyS3
+      MP 3        0xf8030038        ttyS4           ttyS4         ttyS4
+      ==========  ==========     ============    ============   =======
+
+Console Selection
+=================
+
+    EFI knows what your console devices are, but it doesn't tell the
+    kernel quite enough to actually locate them.  The DIG64 HCDP
+    table[1] does tell the kernel where potential serial console
+    devices are, but not all firmware supplies it.  Also, EFI supports
+    multiple simultaneous consoles and doesn't tell the kernel which
+    should be the "primary" one.
+
+    So how do you tell Linux which console device to use?
+
+	- If your firmware supplies the HCDP, it is simplest to
+	  configure EFI with a single device (either a UART or a VGA
+	  card) as the console.  Then you don't need to tell Linux
+	  anything; the kernel will automatically use the EFI console.
+
+	  (This works only in 2.6.6 or later; prior to that you had
+	  to specify "console=ttyS0" to get a serial console.)
+
+	- Without an HCDP, Linux defaults to a VGA console unless you
+	  specify a "console=" argument.
+
+    NOTE: Don't assume that a serial console device will be /dev/ttyS0.
+    It might be ttyS1, ttyS2, etc.  Make sure you have the appropriate
+    entries in /etc/inittab (for getty) and /etc/securetty (to allow
+    root login).
+
+Early Serial Console
+====================
+
+    The kernel can't start using a serial console until it knows where
+    the device lives.  Normally this happens when the driver enumerates
+    all the serial devices, which can happen a minute or more after the
+    kernel starts booting.
+
+    2.6.10 and later kernels have an "early uart" driver that works
+    very early in the boot process.  The kernel will automatically use
+    this if the user supplies an argument like "console=uart,io,0x3f8",
+    or if the EFI console path contains only a UART device and the
+    firmware supplies an HCDP.
+
+Troubleshooting Serial Console Problems
+=======================================
+
+    No kernel output after elilo prints "Uncompressing Linux... done":
+
+	- You specified "console=ttyS0" but Linux changed the device
+	  to which ttyS0 refers.  Configure exactly one EFI console
+	  device[3] and remove the "console=" option.
+
+	- The EFI console path contains both a VGA device and a UART.
+	  EFI and elilo use both, but Linux defaults to VGA.  Remove
+	  the VGA device from the EFI console path[3].
+
+	- Multiple UARTs selected as EFI console devices.  EFI and
+	  elilo use all selected devices, but Linux uses only one.
+	  Make sure only one UART is selected in the EFI console
+	  path[3].
+
+	- You're connected to an HP MP port[2] but have a non-MP UART
+	  selected as EFI console device.  EFI uses the MP as a
+	  console device even when it isn't explicitly selected.
+	  Either move the console cable to the non-MP UART, or change
+	  the EFI console path[3] to the MP UART.
+
+    Long pause (60+ seconds) between "Uncompressing Linux... done" and
+    start of kernel output:
+
+	- No early console because you used "console=ttyS<n>".  Remove
+	  the "console=" option if your firmware supplies an HCDP.
+
+	- If you don't have an HCDP, the kernel doesn't know where
+	  your console lives until the driver discovers serial
+	  devices.  Use "console=uart,io,0x3f8" (or appropriate
+	  address for your machine).
+
+    Kernel and init script output works fine, but no "login:" prompt:
+
+	- Add getty entry to /etc/inittab for console tty.  Look for
+	  the "Adding console on ttyS<n>" message that tells you which
+	  device is the console.
+
+    "login:" prompt, but can't login as root:
+
+	- Add entry to /etc/securetty for console tty.
+
+    No ACPI serial devices found in 2.6.17 or later:
+
+	- Turn on CONFIG_PNP and CONFIG_PNPACPI.  Prior to 2.6.17, ACPI
+	  serial devices were discovered by 8250_acpi.  In 2.6.17,
+	  8250_acpi was replaced by the combination of 8250_pnp and
+	  CONFIG_PNPACPI.
+
+
+
+[1]
+    http://www.dig64.org/specifications/agreement
+    The table was originally defined as the "HCDP" for "Headless
+    Console/Debug Port."  The current version is the "PCDP" for
+    "Primary Console and Debug Port Devices."
+
+[2]
+    The HP MP (management processor) is a PCI device that provides
+    several UARTs.  One of the UARTs is often used as a console; the
+    EFI Boot Manager identifies it as "Acpi(HWP0002,700)/Pci(...)/Uart".
+    The external connection is usually a 25-pin connector, and a
+    special dongle converts that to three 9-pin connectors, one of
+    which is labelled "Console."
+
+[3]
+    EFI console devices are configured using the EFI Boot Manager
+    "Boot option maintenance" menu.  You may have to interrupt the
+    boot sequence to use this menu, and you will have to reset the
+    box after changing console configuration.
diff --git a/Documentation/ia64/serial.txt b/Documentation/ia64/serial.txt
deleted file mode 100644
index a63d2c54329b..000000000000
--- a/Documentation/ia64/serial.txt
+++ /dev/null
@@ -1,151 +0,0 @@
-SERIAL DEVICE NAMING
-
-    As of 2.6.10, serial devices on ia64 are named based on the
-    order of ACPI and PCI enumeration.  The first device in the
-    ACPI namespace (if any) becomes /dev/ttyS0, the second becomes
-    /dev/ttyS1, etc., and PCI devices are named sequentially
-    starting after the ACPI devices.
-
-    Prior to 2.6.10, there were confusing exceptions to this:
-
-	- Firmware on some machines (mostly from HP) provides an HCDP
-	  table[1] that tells the kernel about devices that can be used
-	  as a serial console.  If the user specified "console=ttyS0"
-	  or the EFI ConOut path contained only UART devices, the
-	  kernel registered the device described by the HCDP as
-	  /dev/ttyS0.
-
-	- If there was no HCDP, we assumed there were UARTs at the
-	  legacy COM port addresses (I/O ports 0x3f8 and 0x2f8), so
-	  the kernel registered those as /dev/ttyS0 and /dev/ttyS1.
-
-    Any additional ACPI or PCI devices were registered sequentially
-    after /dev/ttyS0 as they were discovered.
-
-    With an HCDP, device names changed depending on EFI configuration
-    and "console=" arguments.  Without an HCDP, device names didn't
-    change, but we registered devices that might not really exist.
-
-    For example, an HP rx1600 with a single built-in serial port
-    (described in the ACPI namespace) plus an MP[2] (a PCI device) has
-    these ports:
-
-                                  pre-2.6.10      pre-2.6.10
-                    MMIO         (EFI console    (EFI console
-                   address        on builtin)     on MP port)    2.6.10
-                  ==========      ==========      ==========     ======
-      builtin     0xff5e0000        ttyS0           ttyS1         ttyS0
-      MP UPS      0xf8031000        ttyS1           ttyS2         ttyS1
-      MP Console  0xf8030000        ttyS2           ttyS0         ttyS2
-      MP 2        0xf8030010        ttyS3           ttyS3         ttyS3
-      MP 3        0xf8030038        ttyS4           ttyS4         ttyS4
-
-CONSOLE SELECTION
-
-    EFI knows what your console devices are, but it doesn't tell the
-    kernel quite enough to actually locate them.  The DIG64 HCDP
-    table[1] does tell the kernel where potential serial console
-    devices are, but not all firmware supplies it.  Also, EFI supports
-    multiple simultaneous consoles and doesn't tell the kernel which
-    should be the "primary" one.
-
-    So how do you tell Linux which console device to use?
-
-	- If your firmware supplies the HCDP, it is simplest to
-	  configure EFI with a single device (either a UART or a VGA
-	  card) as the console.  Then you don't need to tell Linux
-	  anything; the kernel will automatically use the EFI console.
-
-	  (This works only in 2.6.6 or later; prior to that you had
-	  to specify "console=ttyS0" to get a serial console.)
-
-	- Without an HCDP, Linux defaults to a VGA console unless you
-	  specify a "console=" argument.
-
-    NOTE: Don't assume that a serial console device will be /dev/ttyS0.
-    It might be ttyS1, ttyS2, etc.  Make sure you have the appropriate
-    entries in /etc/inittab (for getty) and /etc/securetty (to allow
-    root login).
-
-EARLY SERIAL CONSOLE
-
-    The kernel can't start using a serial console until it knows where
-    the device lives.  Normally this happens when the driver enumerates
-    all the serial devices, which can happen a minute or more after the
-    kernel starts booting.
-
-    2.6.10 and later kernels have an "early uart" driver that works
-    very early in the boot process.  The kernel will automatically use
-    this if the user supplies an argument like "console=uart,io,0x3f8",
-    or if the EFI console path contains only a UART device and the
-    firmware supplies an HCDP.
-
-TROUBLESHOOTING SERIAL CONSOLE PROBLEMS
-
-    No kernel output after elilo prints "Uncompressing Linux... done":
-
-	- You specified "console=ttyS0" but Linux changed the device
-	  to which ttyS0 refers.  Configure exactly one EFI console
-	  device[3] and remove the "console=" option.
-
-	- The EFI console path contains both a VGA device and a UART.
-	  EFI and elilo use both, but Linux defaults to VGA.  Remove
-	  the VGA device from the EFI console path[3].
-
-	- Multiple UARTs selected as EFI console devices.  EFI and
-	  elilo use all selected devices, but Linux uses only one.
-	  Make sure only one UART is selected in the EFI console
-	  path[3].
-
-	- You're connected to an HP MP port[2] but have a non-MP UART
-	  selected as EFI console device.  EFI uses the MP as a
-	  console device even when it isn't explicitly selected.
-	  Either move the console cable to the non-MP UART, or change
-	  the EFI console path[3] to the MP UART.
-
-    Long pause (60+ seconds) between "Uncompressing Linux... done" and
-    start of kernel output:
-
-	- No early console because you used "console=ttyS<n>".  Remove
-	  the "console=" option if your firmware supplies an HCDP.
-
-	- If you don't have an HCDP, the kernel doesn't know where
-	  your console lives until the driver discovers serial
-	  devices.  Use "console=uart,io,0x3f8" (or appropriate
-	  address for your machine).
-
-    Kernel and init script output works fine, but no "login:" prompt:
-
-	- Add getty entry to /etc/inittab for console tty.  Look for
-	  the "Adding console on ttyS<n>" message that tells you which
-	  device is the console.
-
-    "login:" prompt, but can't login as root:
-
-	- Add entry to /etc/securetty for console tty.
-
-    No ACPI serial devices found in 2.6.17 or later:
-
-	- Turn on CONFIG_PNP and CONFIG_PNPACPI.  Prior to 2.6.17, ACPI
-	  serial devices were discovered by 8250_acpi.  In 2.6.17,
-	  8250_acpi was replaced by the combination of 8250_pnp and
-	  CONFIG_PNPACPI.
-
-
-
-[1] http://www.dig64.org/specifications/agreement 
-    The table was originally defined as the "HCDP" for "Headless
-    Console/Debug Port."  The current version is the "PCDP" for
-    "Primary Console and Debug Port Devices."
-
-[2] The HP MP (management processor) is a PCI device that provides
-    several UARTs.  One of the UARTs is often used as a console; the
-    EFI Boot Manager identifies it as "Acpi(HWP0002,700)/Pci(...)/Uart".
-    The external connection is usually a 25-pin connector, and a
-    special dongle converts that to three 9-pin connectors, one of
-    which is labelled "Console."
-
-[3] EFI console devices are configured using the EFI Boot Manager
-    "Boot option maintenance" menu.  You may have to interrupt the
-    boot sequence to use this menu, and you will have to reset the
-    box after changing console configuration.
diff --git a/Documentation/ia64/xen.rst b/Documentation/ia64/xen.rst
new file mode 100644
index 000000000000..831339c74441
--- /dev/null
+++ b/Documentation/ia64/xen.rst
@@ -0,0 +1,206 @@
+********************************************************
+Recipe for getting/building/running Xen/ia64 with pv_ops
+********************************************************
+This recipe describes how to get xen-ia64 source and build it,
+and run domU with pv_ops.
+
+Requirements
+============
+
+  - python
+  - mercurial
+    it (aka "hg") is an open-source source code
+    management software. See the below.
+    http://www.selenic.com/mercurial/wiki/
+  - git
+  - bridge-utils
+
+Getting and Building Xen and Dom0
+=================================
+
+  My environment is:
+
+    - Machine  : Tiger4
+    - Domain0 OS  : RHEL5
+    - DomainU OS  : RHEL5
+
+ 1. Download source::
+
+	# hg clone http://xenbits.xensource.com/ext/ia64/xen-unstable.hg
+	# cd xen-unstable.hg
+	# hg clone http://xenbits.xensource.com/ext/ia64/linux-2.6.18-xen.hg
+
+ 2. # make world
+
+ 3. # make install-tools
+
+ 4. copy kernels and xen::
+
+	# cp xen/xen.gz /boot/efi/efi/redhat/
+	# cp build-linux-2.6.18-xen_ia64/vmlinux.gz \
+	/boot/efi/efi/redhat/vmlinuz-2.6.18.8-xen
+
+ 5. make initrd for Dom0/DomU::
+
+	# make -C linux-2.6.18-xen.hg ARCH=ia64 modules_install \
+          O=$(pwd)/build-linux-2.6.18-xen_ia64
+	# mkinitrd -f /boot/efi/efi/redhat/initrd-2.6.18.8-xen.img \
+	  2.6.18.8-xen --builtin mptspi --builtin mptbase \
+	  --builtin mptscsih --builtin uhci-hcd --builtin ohci-hcd \
+	  --builtin ehci-hcd
+
+Making a disk image for guest OS
+================================
+
+ 1. make file::
+
+      # dd if=/dev/zero of=/root/rhel5.img bs=1M seek=4096 count=0
+      # mke2fs -F -j /root/rhel5.img
+      # mount -o loop /root/rhel5.img /mnt
+      # cp -ax /{dev,var,etc,usr,bin,sbin,lib} /mnt
+      # mkdir /mnt/{root,proc,sys,home,tmp}
+
+      Note: You may miss some device files. If so, please create them
+      with mknod. Or you can use tar instead of cp.
+
+ 2. modify DomU's fstab::
+
+      # vi /mnt/etc/fstab
+         /dev/xvda1  /            ext3    defaults        1 1
+         none        /dev/pts     devpts  gid=5,mode=620  0 0
+         none        /dev/shm     tmpfs   defaults        0 0
+         none        /proc        proc    defaults        0 0
+         none        /sys         sysfs   defaults        0 0
+
+ 3. modify inittab
+
+    set runlevel to 3 to avoid X trying to start::
+
+      # vi /mnt/etc/inittab
+         id:3:initdefault:
+
+    Start a getty on the hvc0 console::
+
+       X0:2345:respawn:/sbin/mingetty hvc0
+
+    tty1-6 mingetty can be commented out
+
+ 4. add hvc0 into /etc/securetty::
+
+      # vi /mnt/etc/securetty (add hvc0)
+
+ 5. umount::
+
+      # umount /mnt
+
+FYI, virt-manager can also make a disk image for guest OS.
+It's GUI tools and easy to make it.
+
+Boot Xen & Domain0
+==================
+
+ 1. replace elilo
+    elilo of RHEL5 can boot Xen and Dom0.
+    If you use old elilo (e.g RHEL4), please download from the below
+    http://elilo.sourceforge.net/cgi-bin/blosxom
+    and copy into /boot/efi/efi/redhat/::
+
+      # cp elilo-3.6-ia64.efi /boot/efi/efi/redhat/elilo.efi
+
+ 2. modify elilo.conf (like the below)::
+
+      # vi /boot/efi/efi/redhat/elilo.conf
+      prompt
+      timeout=20
+      default=xen
+      relocatable
+
+      image=vmlinuz-2.6.18.8-xen
+             label=xen
+             vmm=xen.gz
+             initrd=initrd-2.6.18.8-xen.img
+             read-only
+             append=" -- rhgb root=/dev/sda2"
+
+The append options before "--" are for xen hypervisor,
+the options after "--" are for dom0.
+
+FYI, your machine may need console options like
+"com1=19200,8n1 console=vga,com1". For example,
+append="com1=19200,8n1 console=vga,com1 -- rhgb console=tty0 \
+console=ttyS0 root=/dev/sda2"
+
+Getting and Building domU with pv_ops
+=====================================
+
+ 1. get pv_ops tree::
+
+      # git clone http://people.valinux.co.jp/~yamahata/xen-ia64/linux-2.6-xen-ia64.git/
+
+ 2. git branch (if necessary)::
+
+      # cd linux-2.6-xen-ia64/
+      # git checkout -b your_branch origin/xen-ia64-domu-minimal-2008may19
+
+   Note:
+     The current branch is xen-ia64-domu-minimal-2008may19.
+     But you would find the new branch. You can see with
+     "git branch -r" to get the branch lists.
+
+       http://people.valinux.co.jp/~yamahata/xen-ia64/for_eagl/linux-2.6-ia64-pv-ops.git/
+
+     is also available.
+
+     The tree is based on
+
+      git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6 test)
+
+ 3. copy .config for pv_ops of domU::
+
+      # cp arch/ia64/configs/xen_domu_wip_defconfig .config
+
+ 4. make kernel with pv_ops::
+
+      # make oldconfig
+      # make
+
+ 5. install the kernel and initrd::
+
+      # cp vmlinux.gz /boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU
+      # make modules_install
+      # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img \
+        2.6.26-rc3xen-ia64-08941-g1b12161 --builtin mptspi \
+        --builtin mptbase --builtin mptscsih --builtin uhci-hcd \
+        --builtin ohci-hcd --builtin ehci-hcd
+
+Boot DomainU with pv_ops
+========================
+
+ 1. make config of DomU::
+
+     # vi /etc/xen/rhel5
+       kernel = "/boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU"
+       ramdisk = "/boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img"
+       vcpus = 1
+       memory = 512
+       name = "rhel5"
+       disk = [ 'file:/root/rhel5.img,xvda1,w' ]
+       root = "/dev/xvda1 ro"
+       extra= "rhgb console=hvc0"
+
+ 2. After boot xen and dom0, start xend::
+
+	# /etc/init.d/xend start
+
+   ( In the debugging case, `# XEND_DEBUG=1 xend trace_start` )
+
+ 3. start domU::
+
+	# xm create -c rhel5
+
+Reference
+=========
+- Wiki of Xen/IA64 upstream merge
+  http://wiki.xensource.com/xenwiki/XenIA64/UpstreamMerge
+
+Written by Akio Takebe <takebe_akio@jp.fujitsu.com> on 28 May 2008
diff --git a/Documentation/ia64/xen.txt b/Documentation/ia64/xen.txt
deleted file mode 100644
index a12c74ce2773..000000000000
--- a/Documentation/ia64/xen.txt
+++ /dev/null
@@ -1,183 +0,0 @@
-       Recipe for getting/building/running Xen/ia64 with pv_ops
-       --------------------------------------------------------
-
-This recipe describes how to get xen-ia64 source and build it,
-and run domU with pv_ops.
-
-============
-Requirements
-============
-
-  - python
-  - mercurial
-    it (aka "hg") is an open-source source code
-    management software. See the below.
-    http://www.selenic.com/mercurial/wiki/
-  - git
-  - bridge-utils
-
-=================================
-Getting and Building Xen and Dom0
-=================================
-
-  My environment is;
-    Machine  : Tiger4
-    Domain0 OS  : RHEL5
-    DomainU OS  : RHEL5
-
- 1. Download source
-    # hg clone http://xenbits.xensource.com/ext/ia64/xen-unstable.hg
-    # cd xen-unstable.hg
-    # hg clone http://xenbits.xensource.com/ext/ia64/linux-2.6.18-xen.hg
-
- 2. # make world
-
- 3. # make install-tools
-
- 4. copy kernels and xen
-    # cp xen/xen.gz /boot/efi/efi/redhat/
-    # cp build-linux-2.6.18-xen_ia64/vmlinux.gz \
-      /boot/efi/efi/redhat/vmlinuz-2.6.18.8-xen
-
- 5. make initrd for Dom0/DomU
-    # make -C linux-2.6.18-xen.hg ARCH=ia64 modules_install \
-      O=$(pwd)/build-linux-2.6.18-xen_ia64
-    # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6.18.8-xen.img \
-      2.6.18.8-xen --builtin mptspi --builtin mptbase \
-      --builtin mptscsih --builtin uhci-hcd --builtin ohci-hcd \
-      --builtin ehci-hcd
-
-================================
-Making a disk image for guest OS
-================================
-
- 1. make file
-    # dd if=/dev/zero of=/root/rhel5.img bs=1M seek=4096 count=0
-    # mke2fs -F -j /root/rhel5.img
-    # mount -o loop /root/rhel5.img /mnt
-    # cp -ax /{dev,var,etc,usr,bin,sbin,lib} /mnt
-    # mkdir /mnt/{root,proc,sys,home,tmp}
-
-    Note: You may miss some device files. If so, please create them
-    with mknod. Or you can use tar instead of cp.
-
- 2. modify DomU's fstab
-    # vi /mnt/etc/fstab
-       /dev/xvda1  /            ext3    defaults        1 1
-       none        /dev/pts     devpts  gid=5,mode=620  0 0
-       none        /dev/shm     tmpfs   defaults        0 0
-       none        /proc        proc    defaults        0 0
-       none        /sys         sysfs   defaults        0 0
-
- 3. modify inittab
-    set runlevel to 3 to avoid X trying to start
-    # vi /mnt/etc/inittab
-       id:3:initdefault:
-    Start a getty on the hvc0 console
-       X0:2345:respawn:/sbin/mingetty hvc0
-    tty1-6 mingetty can be commented out
-
- 4. add hvc0 into /etc/securetty
-    # vi /mnt/etc/securetty (add hvc0)
-
- 5. umount
-    # umount /mnt
-
-FYI, virt-manager can also make a disk image for guest OS.
-It's GUI tools and easy to make it.
-
-==================
-Boot Xen & Domain0
-==================
-
- 1. replace elilo
-    elilo of RHEL5 can boot Xen and Dom0.
-    If you use old elilo (e.g RHEL4), please download from the below
-    http://elilo.sourceforge.net/cgi-bin/blosxom
-    and copy into /boot/efi/efi/redhat/
-    # cp elilo-3.6-ia64.efi /boot/efi/efi/redhat/elilo.efi
-
- 2. modify elilo.conf (like the below)
-    # vi /boot/efi/efi/redhat/elilo.conf
-     prompt
-     timeout=20
-     default=xen
-     relocatable
-
-     image=vmlinuz-2.6.18.8-xen
-             label=xen
-             vmm=xen.gz
-             initrd=initrd-2.6.18.8-xen.img
-             read-only
-             append=" -- rhgb root=/dev/sda2"
-
-The append options before "--" are for xen hypervisor,
-the options after "--" are for dom0.
-
-FYI, your machine may need console options like
-"com1=19200,8n1 console=vga,com1". For example,
-append="com1=19200,8n1 console=vga,com1 -- rhgb console=tty0 \
-console=ttyS0 root=/dev/sda2"
-
-=====================================
-Getting and Building domU with pv_ops
-=====================================
-
- 1. get pv_ops tree
-    # git clone http://people.valinux.co.jp/~yamahata/xen-ia64/linux-2.6-xen-ia64.git/
-
- 2. git branch (if necessary)
-    # cd linux-2.6-xen-ia64/
-    # git checkout -b your_branch origin/xen-ia64-domu-minimal-2008may19
-    (Note: The current branch is xen-ia64-domu-minimal-2008may19.
-    But you would find the new branch. You can see with
-    "git branch -r" to get the branch lists.
-    http://people.valinux.co.jp/~yamahata/xen-ia64/for_eagl/linux-2.6-ia64-pv-ops.git/
-    is also available. The tree is based on
-    git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6 test)
-
-
- 3. copy .config for pv_ops of domU
-    # cp arch/ia64/configs/xen_domu_wip_defconfig .config
-
- 4. make kernel with pv_ops
-    # make oldconfig
-    # make
-
- 5. install the kernel and initrd
-    # cp vmlinux.gz /boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU
-    # make modules_install
-    # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img \
-      2.6.26-rc3xen-ia64-08941-g1b12161 --builtin mptspi \
-      --builtin mptbase --builtin mptscsih --builtin uhci-hcd \
-      --builtin ohci-hcd --builtin ehci-hcd
-
-========================
-Boot DomainU with pv_ops
-========================
-
- 1. make config of DomU
-   # vi /etc/xen/rhel5
-     kernel = "/boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU"
-     ramdisk = "/boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img"
-     vcpus = 1
-     memory = 512
-     name = "rhel5"
-     disk = [ 'file:/root/rhel5.img,xvda1,w' ]
-     root = "/dev/xvda1 ro"
-     extra= "rhgb console=hvc0"
-
- 2. After boot xen and dom0, start xend
-   # /etc/init.d/xend start
-   ( In the debugging case, # XEND_DEBUG=1 xend trace_start )
-
- 3. start domU
-   # xm create -c rhel5
-
-=========
-Reference
-=========
-- Wiki of Xen/IA64 upstream merge
-  http://wiki.xensource.com/xenwiki/XenIA64/UpstreamMerge
-
-Written by Akio Takebe <takebe_akio@jp.fujitsu.com> on 28 May 2008
-- 
cgit 


From b02f1651ff7758c4db0d759ab765d39986a79f5a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 11:12:57 -0300
Subject: docs: laptops: convert to ReST

Rename the laptops documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com>
---
 Documentation/ABI/testing/sysfs-block-device       |    2 +-
 .../ABI/testing/sysfs-platform-asus-laptop         |    2 +-
 Documentation/admin-guide/kernel-parameters.txt    |    2 +-
 Documentation/laptops/asus-laptop.rst              |  271 ++++
 Documentation/laptops/asus-laptop.txt              |  257 ----
 Documentation/laptops/disk-shock-protection.rst    |  151 ++
 Documentation/laptops/disk-shock-protection.txt    |  149 --
 Documentation/laptops/index.rst                    |   17 +
 Documentation/laptops/laptop-mode.rst              |  781 ++++++++++
 Documentation/laptops/laptop-mode.txt              |  782 ----------
 Documentation/laptops/sony-laptop.rst              |  174 +++
 Documentation/laptops/sony-laptop.txt              |  144 --
 Documentation/laptops/sonypi.rst                   |  160 ++
 Documentation/laptops/sonypi.txt                   |  152 --
 Documentation/laptops/thinkpad-acpi.rst            | 1562 ++++++++++++++++++++
 Documentation/laptops/thinkpad-acpi.txt            | 1487 -------------------
 Documentation/laptops/toshiba_haps.rst             |   87 ++
 Documentation/laptops/toshiba_haps.txt             |   76 -
 Documentation/sysctl/vm.txt                        |    4 +-
 19 files changed, 3208 insertions(+), 3052 deletions(-)
 create mode 100644 Documentation/laptops/asus-laptop.rst
 delete mode 100644 Documentation/laptops/asus-laptop.txt
 create mode 100644 Documentation/laptops/disk-shock-protection.rst
 delete mode 100644 Documentation/laptops/disk-shock-protection.txt
 create mode 100644 Documentation/laptops/index.rst
 create mode 100644 Documentation/laptops/laptop-mode.rst
 delete mode 100644 Documentation/laptops/laptop-mode.txt
 create mode 100644 Documentation/laptops/sony-laptop.rst
 delete mode 100644 Documentation/laptops/sony-laptop.txt
 create mode 100644 Documentation/laptops/sonypi.rst
 delete mode 100644 Documentation/laptops/sonypi.txt
 create mode 100644 Documentation/laptops/thinkpad-acpi.rst
 delete mode 100644 Documentation/laptops/thinkpad-acpi.txt
 create mode 100644 Documentation/laptops/toshiba_haps.rst
 delete mode 100644 Documentation/laptops/toshiba_haps.txt

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-block-device b/Documentation/ABI/testing/sysfs-block-device
index 82ef6eab042d..0d57bbb4fddc 100644
--- a/Documentation/ABI/testing/sysfs-block-device
+++ b/Documentation/ABI/testing/sysfs-block-device
@@ -45,7 +45,7 @@ Description:
 		- Values below -2 are rejected with -EINVAL
 
 		For more information, see
-		Documentation/laptops/disk-shock-protection.txt
+		Documentation/laptops/disk-shock-protection.rst
 
 
 What:		/sys/block/*/device/ncq_prio_enable
diff --git a/Documentation/ABI/testing/sysfs-platform-asus-laptop b/Documentation/ABI/testing/sysfs-platform-asus-laptop
index cd9d667c3da2..d67fa4bafa70 100644
--- a/Documentation/ABI/testing/sysfs-platform-asus-laptop
+++ b/Documentation/ABI/testing/sysfs-platform-asus-laptop
@@ -31,7 +31,7 @@ Description:
 		To control the LED display, use the following :
 		    echo 0x0T000DDD > /sys/devices/platform/asus_laptop/
 		where T control the 3 letters display, and DDD the 3 digits display.
-		The DDD table can be found in Documentation/laptops/asus-laptop.txt
+		The DDD table can be found in Documentation/laptops/asus-laptop.rst
 
 What:		/sys/devices/platform/asus_laptop/bluetooth
 Date:		January 2007
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ed104a44e8b2..a342dd5c95a9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4347,7 +4347,7 @@
 			Format: <integer>
 
 	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
-			See Documentation/laptops/sonypi.txt
+			See Documentation/laptops/sonypi.rst
 
 	spectre_v2=	[X86] Control mitigation of Spectre variant 2
 			(indirect branch speculation) vulnerability.
diff --git a/Documentation/laptops/asus-laptop.rst b/Documentation/laptops/asus-laptop.rst
new file mode 100644
index 000000000000..95176321a25a
--- /dev/null
+++ b/Documentation/laptops/asus-laptop.rst
@@ -0,0 +1,271 @@
+==================
+Asus Laptop Extras
+==================
+
+Version 0.1
+
+August 6, 2009
+
+Corentin Chary <corentincj@iksaif.net>
+http://acpi4asus.sf.net/
+
+ This driver provides support for extra features of ACPI-compatible ASUS laptops.
+ It may also support some MEDION, JVC or VICTOR laptops (such as MEDION 9675 or
+ VICTOR XP7210 for example). It makes all the extra buttons generate input
+ events (like keyboards).
+
+ On some models adds support for changing the display brightness and output,
+ switching the LCD backlight on and off, and most importantly, allows you to
+ blink those fancy LEDs intended for reporting mail and wireless status.
+
+This driver supersedes the old asus_acpi driver.
+
+Requirements
+------------
+
+  Kernel 2.6.X sources, configured for your computer, with ACPI support.
+  You also need CONFIG_INPUT and CONFIG_ACPI.
+
+Status
+------
+
+ The features currently supported are the following (see below for
+ detailed description):
+
+ - Fn key combinations
+ - Bluetooth enable and disable
+ - Wlan enable and disable
+ - GPS enable and disable
+ - Video output switching
+ - Ambient Light Sensor on and off
+ - LED control
+ - LED Display control
+ - LCD brightness control
+ - LCD on and off
+
+ A compatibility table by model and feature is maintained on the web
+ site, http://acpi4asus.sf.net/.
+
+Usage
+-----
+
+  Try "modprobe asus-laptop". Check your dmesg (simply type dmesg). You should
+  see some lines like this :
+
+      Asus Laptop Extras version 0.42
+        - L2D model detected.
+
+  If it is not the output you have on your laptop, send it (and the laptop's
+  DSDT) to me.
+
+  That's all, now, all the events generated by the hotkeys of your laptop
+  should be reported via netlink events. You can check with
+  "acpi_genl monitor" (part of the acpica project).
+
+  Hotkeys are also reported as input keys (like keyboards) you can check
+  which key are supported using "xev" under X11.
+
+  You can get information on the version of your DSDT table by reading the
+  /sys/devices/platform/asus-laptop/infos entry. If you have a question or a
+  bug report to do, please include the output of this entry.
+
+LEDs
+----
+
+  You can modify LEDs be echoing values to `/sys/class/leds/asus/*/brightness`::
+
+    echo 1 >  /sys/class/leds/asus::mail/brightness
+
+  will switch the mail LED on.
+
+  You can also know if they are on/off by reading their content and use
+  kernel triggers like disk-activity or heartbeat.
+
+Backlight
+---------
+
+  You can control lcd backlight power and brightness with
+  /sys/class/backlight/asus-laptop/. Brightness Values are between 0 and 15.
+
+Wireless devices
+----------------
+
+  You can turn the internal Bluetooth adapter on/off with the bluetooth entry
+  (only on models with Bluetooth). This usually controls the associated LED.
+  Same for Wlan adapter.
+
+Display switching
+-----------------
+
+  Note: the display switching code is currently considered EXPERIMENTAL.
+
+  Switching works for the following models:
+
+    - L3800C
+    - A2500H
+    - L5800C
+    - M5200N
+    - W1000N (albeit with some glitches)
+    - M6700R
+    - A6JC
+    - F3J
+
+  Switching doesn't work for the following:
+
+    - M3700N
+    - L2X00D (locks the laptop under certain conditions)
+
+  To switch the displays, echo values from 0 to 15 to
+  /sys/devices/platform/asus-laptop/display. The significance of those values
+  is as follows:
+
+  +-------+-----+-----+-----+-----+-----+
+  | Bin   | Val | DVI | TV  | CRT | LCD |
+  +-------+-----+-----+-----+-----+-----+
+  | 0000  |   0 |     |     |     |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 0001  |   1 |     |     |     |  X  |
+  +-------+-----+-----+-----+-----+-----+
+  | 0010  |   2 |     |     |  X  |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 0011  |   3 |     |     |  X  |  X  |
+  +-------+-----+-----+-----+-----+-----+
+  | 0100  |   4 |     |  X  |     |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 0101  |   5 |     |  X  |     | X   |
+  +-------+-----+-----+-----+-----+-----+
+  | 0110  |   6 |     |  X  |  X  |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 0111  |   7 |     |  X  |  X  |  X  |
+  +-------+-----+-----+-----+-----+-----+
+  | 1000  |   8 |  X  |     |     |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 1001  |   9 |  X  |     |     |  X  |
+  +-------+-----+-----+-----+-----+-----+
+  | 1010  |  10 |  X  |     |  X  |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 1011  |  11 |  X  |     |  X  |  X  |
+  +-------+-----+-----+-----+-----+-----+
+  | 1100  |  12 |  X  |  X  |     |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 1101  |  13 |  X  |  X  |     |  X  |
+  +-------+-----+-----+-----+-----+-----+
+  | 1110  |  14 |  X  |  X  |  X  |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 1111  |  15 |  X  |  X  |  X  |  X  |
+  +-------+-----+-----+-----+-----+-----+
+
+  In most cases, the appropriate displays must be plugged in for the above
+  combinations to work. TV-Out may need to be initialized at boot time.
+
+  Debugging:
+
+  1) Check whether the Fn+F8 key:
+
+     a) does not lock the laptop (try a boot with noapic / nolapic if it does)
+     b) generates events (0x6n, where n is the value corresponding to the
+        configuration above)
+     c) actually works
+
+     Record the disp value at every configuration.
+  2) Echo values from 0 to 15 to /sys/devices/platform/asus-laptop/display.
+     Record its value, note any change. If nothing changes, try a broader range,
+     up to 65535.
+  3) Send ANY output (both positive and negative reports are needed, unless your
+     machine is already listed above) to the acpi4asus-user mailing list.
+
+  Note: on some machines (e.g. L3C), after the module has been loaded, only 0x6n
+  events are generated and no actual switching occurs. In such a case, a line
+  like::
+
+    echo $((10#$arg-60)) > /sys/devices/platform/asus-laptop/display
+
+  will usually do the trick ($arg is the 0000006n-like event passed to acpid).
+
+  Note: there is currently no reliable way to read display status on xxN
+  (Centrino) models.
+
+LED display
+-----------
+
+  Some models like the W1N have a LED display that can be used to display
+  several items of information.
+
+  LED display works for the following models:
+
+    - W1000N
+    - W1J
+
+  To control the LED display, use the following::
+
+    echo 0x0T000DDD > /sys/devices/platform/asus-laptop/
+
+  where T control the 3 letters display, and DDD the 3 digits display,
+  according to the tables below::
+
+         DDD (digits)
+         000 to 999 = display digits
+         AAA        = ---
+         BBB to FFF = turn-off
+
+         T  (type)
+         0 = off
+         1 = dvd
+         2 = vcd
+         3 = mp3
+         4 = cd
+         5 = tv
+         6 = cpu
+         7 = vol
+
+  For example "echo 0x01000001 >/sys/devices/platform/asus-laptop/ledd"
+  would display "DVD001".
+
+Driver options
+--------------
+
+ Options can be passed to the asus-laptop driver using the standard
+ module argument syntax (<param>=<value> when passing the option to the
+ module or asus-laptop.<param>=<value> on the kernel boot line when
+ asus-laptop is statically linked into the kernel).
+
+	     wapf: WAPF defines the behavior of the Fn+Fx wlan key
+		   The significance of values is yet to be found, but
+		   most of the time:
+
+		   - 0x0 should do nothing
+		   - 0x1 should allow to control the device with Fn+Fx key.
+		   - 0x4 should send an ACPI event (0x88) while pressing the Fn+Fx key
+		   - 0x5 like 0x1 or 0x4
+
+ The default value is 0x1.
+
+Unsupported models
+------------------
+
+ These models will never be supported by this module, as they use a completely
+ different mechanism to handle LEDs and extra stuff (meaning we have no clue
+ how it works):
+
+ - ASUS A1300 (A1B), A1370D
+ - ASUS L7300G
+ - ASUS L8400
+
+Patches, Errors, Questions
+--------------------------
+
+ I appreciate any success or failure
+ reports, especially if they add to or correct the compatibility table.
+ Please include the following information in your report:
+
+ - Asus model name
+ - a copy of your ACPI tables, using the "acpidump" utility
+ - a copy of /sys/devices/platform/asus-laptop/infos
+ - which driver features work and which don't
+ - the observed behavior of non-working features
+
+ Any other comments or patches are also more than welcome.
+
+ acpi4asus-user@lists.sourceforge.net
+
+ http://sourceforge.net/projects/acpi4asus
diff --git a/Documentation/laptops/asus-laptop.txt b/Documentation/laptops/asus-laptop.txt
deleted file mode 100644
index 5f2858712aa0..000000000000
--- a/Documentation/laptops/asus-laptop.txt
+++ /dev/null
@@ -1,257 +0,0 @@
-Asus Laptop Extras
-
-Version 0.1
-August 6, 2009
-
-Corentin Chary <corentincj@iksaif.net>
-http://acpi4asus.sf.net/
-
- This driver provides support for extra features of ACPI-compatible ASUS laptops.
- It may also support some MEDION, JVC or VICTOR laptops (such as MEDION 9675 or
- VICTOR XP7210 for example). It makes all the extra buttons generate input
- events (like keyboards).
- On some models adds support for changing the display brightness and output,
- switching the LCD backlight on and off, and most importantly, allows you to
- blink those fancy LEDs intended for reporting mail and wireless status.
-
-This driver supercedes the old asus_acpi driver.
-
-Requirements
-------------
-
-  Kernel 2.6.X sources, configured for your computer, with ACPI support.
-  You also need CONFIG_INPUT and CONFIG_ACPI.
-
-Status
-------
-
- The features currently supported are the following (see below for
- detailed description):
-
- - Fn key combinations
- - Bluetooth enable and disable
- - Wlan enable and disable
- - GPS enable and disable
- - Video output switching
- - Ambient Light Sensor on and off
- - LED control
- - LED Display control
- - LCD brightness control
- - LCD on and off
-
- A compatibility table by model and feature is maintained on the web
- site, http://acpi4asus.sf.net/.
-
-Usage
------
-
-  Try "modprobe asus-laptop". Check your dmesg (simply type dmesg). You should
-  see some lines like this :
-
-      Asus Laptop Extras version 0.42
-        L2D model detected.
-
-  If it is not the output you have on your laptop, send it (and the laptop's
-  DSDT) to me.
-
-  That's all, now, all the events generated by the hotkeys of your laptop
-  should be reported via netlink events. You can check with
-  "acpi_genl monitor" (part of the acpica project).
-
-  Hotkeys are also reported as input keys (like keyboards) you can check
-  which key are supported using "xev" under X11.
-
-  You can get information on the version of your DSDT table by reading the
-  /sys/devices/platform/asus-laptop/infos entry. If you have a question or a
-  bug report to do, please include the output of this entry.
-
-LEDs
-----
-
-  You can modify LEDs be echoing values to /sys/class/leds/asus::*/brightness :
-    echo 1 >  /sys/class/leds/asus::mail/brightness
-  will switch the mail LED on.
-  You can also know if they are on/off by reading their content and use
-  kernel triggers like disk-activity or heartbeat.
-
-Backlight
----------
-
-  You can control lcd backlight power and brightness with
-  /sys/class/backlight/asus-laptop/. Brightness Values are between 0 and 15.
-
-Wireless devices
----------------
-
-  You can turn the internal Bluetooth adapter on/off with the bluetooth entry
-  (only on models with Bluetooth). This usually controls the associated LED.
-  Same for Wlan adapter.
-
-Display switching
------------------
-
-  Note: the display switching code is currently considered EXPERIMENTAL.
-
-  Switching works for the following models:
-    L3800C
-    A2500H
-    L5800C
-    M5200N
-    W1000N (albeit with some glitches)
-    M6700R
-    A6JC
-    F3J
-
-  Switching doesn't work for the following:
-    M3700N
-    L2X00D (locks the laptop under certain conditions)
-
-  To switch the displays, echo values from 0 to 15 to
-  /sys/devices/platform/asus-laptop/display. The significance of those values
-  is as follows:
-
-  +-------+-----+-----+-----+-----+-----+
-  | Bin   | Val | DVI | TV  | CRT | LCD |
-  +-------+-----+-----+-----+-----+-----+
-  + 0000  +   0 +     +     +     +     +
-  +-------+-----+-----+-----+-----+-----+
-  + 0001  +   1 +     +     +     +  X  +
-  +-------+-----+-----+-----+-----+-----+
-  + 0010  +   2 +     +     +  X  +     +
-  +-------+-----+-----+-----+-----+-----+
-  + 0011  +   3 +     +     +  X  +  X  +
-  +-------+-----+-----+-----+-----+-----+
-  + 0100  +   4 +     +  X  +     +     +
-  +-------+-----+-----+-----+-----+-----+
-  + 0101  +   5 +     +  X  +     + X   +
-  +-------+-----+-----+-----+-----+-----+
-  + 0110  +   6 +     +  X  +  X  +     +
-  +-------+-----+-----+-----+-----+-----+
-  + 0111  +   7 +     +  X  +  X  +  X  +
-  +-------+-----+-----+-----+-----+-----+
-  + 1000  +   8 +  X  +     +     +     +
-  +-------+-----+-----+-----+-----+-----+
-  + 1001  +   9 +  X  +     +     +  X  +
-  +-------+-----+-----+-----+-----+-----+
-  + 1010  +  10 +  X  +     +  X  +     +
-  +-------+-----+-----+-----+-----+-----+
-  + 1011  +  11 +  X  +     +  X  +  X  +
-  +-------+-----+-----+-----+-----+-----+
-  + 1100  +  12 +  X  +  X  +     +     +
-  +-------+-----+-----+-----+-----+-----+
-  + 1101  +  13 +  X  +  X  +     +  X  +
-  +-------+-----+-----+-----+-----+-----+
-  + 1110  +  14 +  X  +  X  +  X  +     +
-  +-------+-----+-----+-----+-----+-----+
-  + 1111  +  15 +  X  +  X  +  X  +  X  +
-  +-------+-----+-----+-----+-----+-----+
-
-  In most cases, the appropriate displays must be plugged in for the above
-  combinations to work. TV-Out may need to be initialized at boot time.
-
-  Debugging:
-  1) Check whether the Fn+F8 key:
-     a) does not lock the laptop (try a boot with noapic / nolapic if it does)
-     b) generates events (0x6n, where n is the value corresponding to the
-        configuration above)
-     c) actually works
-     Record the disp value at every configuration.
-  2) Echo values from 0 to 15 to /sys/devices/platform/asus-laptop/display.
-     Record its value, note any change. If nothing changes, try a broader range,
-     up to 65535.
-  3) Send ANY output (both positive and negative reports are needed, unless your
-     machine is already listed above) to the acpi4asus-user mailing list.
-
-  Note: on some machines (e.g. L3C), after the module has been loaded, only 0x6n
-  events are generated and no actual switching occurs. In such a case, a line
-  like:
-
-    echo $((10#$arg-60)) > /sys/devices/platform/asus-laptop/display
-
-  will usually do the trick ($arg is the 0000006n-like event passed to acpid).
-
-  Note: there is currently no reliable way to read display status on xxN
-  (Centrino) models.
-
-LED display
------------
-
-  Some models like the W1N have a LED display that can be used to display
-  several items of information.
-
-  LED display works for the following models:
-    W1000N
-    W1J
-
-  To control the LED display, use the following :
-
-    echo 0x0T000DDD > /sys/devices/platform/asus-laptop/
-
-  where T control the 3 letters display, and DDD the 3 digits display,
-  according to the tables below.
-
-         DDD (digits)
-         000 to 999 = display digits
-         AAA        = ---
-         BBB to FFF = turn-off
-
-         T  (type)
-         0 = off
-         1 = dvd
-         2 = vcd
-         3 = mp3
-         4 = cd
-         5 = tv
-         6 = cpu
-         7 = vol
-
-  For example "echo 0x01000001 >/sys/devices/platform/asus-laptop/ledd"
-  would display "DVD001".
-
-Driver options:
----------------
-
- Options can be passed to the asus-laptop driver using the standard
- module argument syntax (<param>=<value> when passing the option to the
- module or asus-laptop.<param>=<value> on the kernel boot line when
- asus-laptop is statically linked into the kernel).
-
-	     wapf: WAPF defines the behavior of the Fn+Fx wlan key
-		   The significance of values is yet to be found, but
-		   most of the time:
-		   - 0x0 should do nothing
-		   - 0x1 should allow to control the device with Fn+Fx key.
-		   - 0x4 should send an ACPI event (0x88) while pressing the Fn+Fx key
-		   - 0x5 like 0x1 or 0x4
-
- The default value is 0x1.
-
-Unsupported models
-------------------
-
- These models will never be supported by this module, as they use a completely
- different mechanism to handle LEDs and extra stuff (meaning we have no clue
- how it works):
-
- - ASUS A1300 (A1B), A1370D
- - ASUS L7300G
- - ASUS L8400
-
-Patches, Errors, Questions:
---------------------------
-
- I appreciate any success or failure
- reports, especially if they add to or correct the compatibility table.
- Please include the following information in your report:
-
- - Asus model name
- - a copy of your ACPI tables, using the "acpidump" utility
- - a copy of /sys/devices/platform/asus-laptop/infos
- - which driver features work and which don't
- - the observed behavior of non-working features
-
- Any other comments or patches are also more than welcome.
-
- acpi4asus-user@lists.sourceforge.net
- http://sourceforge.net/projects/acpi4asus
-
diff --git a/Documentation/laptops/disk-shock-protection.rst b/Documentation/laptops/disk-shock-protection.rst
new file mode 100644
index 000000000000..e97c5f78d8c3
--- /dev/null
+++ b/Documentation/laptops/disk-shock-protection.rst
@@ -0,0 +1,151 @@
+==========================
+Hard disk shock protection
+==========================
+
+Author: Elias Oltmanns <eo@nebensachen.de>
+
+Last modified: 2008-10-03
+
+
+.. 0. Contents
+
+   1. Intro
+   2. The interface
+   3. References
+   4. CREDITS
+
+
+1. Intro
+--------
+
+ATA/ATAPI-7 specifies the IDLE IMMEDIATE command with unload feature.
+Issuing this command should cause the drive to switch to idle mode and
+unload disk heads. This feature is being used in modern laptops in
+conjunction with accelerometers and appropriate software to implement
+a shock protection facility. The idea is to stop all I/O operations on
+the internal hard drive and park its heads on the ramp when critical
+situations are anticipated. The desire to have such a feature
+available on GNU/Linux systems has been the original motivation to
+implement a generic disk head parking interface in the Linux kernel.
+Please note, however, that other components have to be set up on your
+system in order to get disk shock protection working (see
+section 3. References below for pointers to more information about
+that).
+
+
+2. The interface
+----------------
+
+For each ATA device, the kernel exports the file
+`block/*/device/unload_heads` in sysfs (here assumed to be mounted under
+/sys). Access to `/sys/block/*/device/unload_heads` is denied with
+-EOPNOTSUPP if the device does not support the unload feature.
+Otherwise, writing an integer value to this file will take the heads
+of the respective drive off the platter and block all I/O operations
+for the specified number of milliseconds. When the timeout expires and
+no further disk head park request has been issued in the meantime,
+normal operation will be resumed. The maximal value accepted for a
+timeout is 30000 milliseconds. Exceeding this limit will return
+-EOVERFLOW, but heads will be parked anyway and the timeout will be
+set to 30 seconds. However, you can always change a timeout to any
+value between 0 and 30000 by issuing a subsequent head park request
+before the timeout of the previous one has expired. In particular, the
+total timeout can exceed 30 seconds and, more importantly, you can
+cancel a previously set timeout and resume normal operation
+immediately by specifying a timeout of 0. Values below -2 are rejected
+with -EINVAL (see below for the special meaning of -1 and -2). If the
+timeout specified for a recent head park request has not yet expired,
+reading from `/sys/block/*/device/unload_heads` will report the number
+of milliseconds remaining until normal operation will be resumed;
+otherwise, reading the unload_heads attribute will return 0.
+
+For example, do the following in order to park the heads of drive
+/dev/sda and stop all I/O operations for five seconds::
+
+	# echo 5000 > /sys/block/sda/device/unload_heads
+
+A simple::
+
+	# cat /sys/block/sda/device/unload_heads
+
+will show you how many milliseconds are left before normal operation
+will be resumed.
+
+A word of caution: The fact that the interface operates on a basis of
+milliseconds may raise expectations that cannot be satisfied in
+reality. In fact, the ATA specs clearly state that the time for an
+unload operation to complete is vendor specific. The hint in ATA-7
+that this will typically be within 500 milliseconds apparently has
+been dropped in ATA-8.
+
+There is a technical detail of this implementation that may cause some
+confusion and should be discussed here. When a head park request has
+been issued to a device successfully, all I/O operations on the
+controller port this device is attached to will be deferred. That is
+to say, any other device that may be connected to the same port will
+be affected too. The only exception is that a subsequent head unload
+request to that other device will be executed immediately. Further
+operations on that port will be deferred until the timeout specified
+for either device on the port has expired. As far as PATA (old style
+IDE) configurations are concerned, there can only be two devices
+attached to any single port. In SATA world we have port multipliers
+which means that a user-issued head parking request to one device may
+actually result in stopping I/O to a whole bunch of devices. However,
+since this feature is supposed to be used on laptops and does not seem
+to be very useful in any other environment, there will be mostly one
+device per port. Even if the CD/DVD writer happens to be connected to
+the same port as the hard drive, it generally *should* recover just
+fine from the occasional buffer under-run incurred by a head park
+request to the HD. Actually, when you are using an ide driver rather
+than its libata counterpart (i.e. your disk is called /dev/hda
+instead of /dev/sda), then parking the heads of one drive (drive X)
+will generally not affect the mode of operation of another drive
+(drive Y) on the same port as described above. It is only when a port
+reset is required to recover from an exception on drive Y that further
+I/O operations on that drive (and the reset itself) will be delayed
+until drive X is no longer in the parked state.
+
+Finally, there are some hard drives that only comply with an earlier
+version of the ATA standard than ATA-7, but do support the unload
+feature nonetheless. Unfortunately, there is no safe way Linux can
+detect these devices, so you won't be able to write to the
+unload_heads attribute. If you know that your device really does
+support the unload feature (for instance, because the vendor of your
+laptop or the hard drive itself told you so), then you can tell the
+kernel to enable the usage of this feature for that drive by writing
+the special value -1 to the unload_heads attribute::
+
+	# echo -1 > /sys/block/sda/device/unload_heads
+
+will enable the feature for /dev/sda, and giving -2 instead of -1 will
+disable it again.
+
+
+3. References
+-------------
+
+There are several laptops from different vendors featuring shock
+protection capabilities. As manufacturers have refused to support open
+source development of the required software components so far, Linux
+support for shock protection varies considerably between different
+hardware implementations. Ideally, this section should contain a list
+of pointers at different projects aiming at an implementation of shock
+protection on different systems. Unfortunately, I only know of a
+single project which, although still considered experimental, is fit
+for use. Please feel free to add projects that have been the victims
+of my ignorance.
+
+- http://www.thinkwiki.org/wiki/HDAPS
+
+  See this page for information about Linux support of the hard disk
+  active protection system as implemented in IBM/Lenovo Thinkpads.
+
+
+4. CREDITS
+----------
+
+This implementation of disk head parking has been inspired by a patch
+originally published by Jon Escombe <lists@dresco.co.uk>. My efforts
+to develop an implementation of this feature that is fit to be merged
+into mainline have been aided by various kernel developers, in
+particular by Tejun Heo and Bartlomiej Zolnierkiewicz.
diff --git a/Documentation/laptops/disk-shock-protection.txt b/Documentation/laptops/disk-shock-protection.txt
deleted file mode 100644
index 0e6ba2663834..000000000000
--- a/Documentation/laptops/disk-shock-protection.txt
+++ /dev/null
@@ -1,149 +0,0 @@
-Hard disk shock protection
-==========================
-
-Author: Elias Oltmanns <eo@nebensachen.de>
-Last modified: 2008-10-03
-
-
-0. Contents
------------
-
-1. Intro
-2. The interface
-3. References
-4. CREDITS
-
-
-1. Intro
---------
-
-ATA/ATAPI-7 specifies the IDLE IMMEDIATE command with unload feature.
-Issuing this command should cause the drive to switch to idle mode and
-unload disk heads. This feature is being used in modern laptops in
-conjunction with accelerometers and appropriate software to implement
-a shock protection facility. The idea is to stop all I/O operations on
-the internal hard drive and park its heads on the ramp when critical
-situations are anticipated. The desire to have such a feature
-available on GNU/Linux systems has been the original motivation to
-implement a generic disk head parking interface in the Linux kernel.
-Please note, however, that other components have to be set up on your
-system in order to get disk shock protection working (see
-section 3. References below for pointers to more information about
-that).
-
-
-2. The interface
-----------------
-
-For each ATA device, the kernel exports the file
-block/*/device/unload_heads in sysfs (here assumed to be mounted under
-/sys). Access to /sys/block/*/device/unload_heads is denied with
--EOPNOTSUPP if the device does not support the unload feature.
-Otherwise, writing an integer value to this file will take the heads
-of the respective drive off the platter and block all I/O operations
-for the specified number of milliseconds. When the timeout expires and
-no further disk head park request has been issued in the meantime,
-normal operation will be resumed. The maximal value accepted for a
-timeout is 30000 milliseconds. Exceeding this limit will return
--EOVERFLOW, but heads will be parked anyway and the timeout will be
-set to 30 seconds. However, you can always change a timeout to any
-value between 0 and 30000 by issuing a subsequent head park request
-before the timeout of the previous one has expired. In particular, the
-total timeout can exceed 30 seconds and, more importantly, you can
-cancel a previously set timeout and resume normal operation
-immediately by specifying a timeout of 0. Values below -2 are rejected
-with -EINVAL (see below for the special meaning of -1 and -2). If the
-timeout specified for a recent head park request has not yet expired,
-reading from /sys/block/*/device/unload_heads will report the number
-of milliseconds remaining until normal operation will be resumed;
-otherwise, reading the unload_heads attribute will return 0.
-
-For example, do the following in order to park the heads of drive
-/dev/sda and stop all I/O operations for five seconds:
-
-# echo 5000 > /sys/block/sda/device/unload_heads
-
-A simple
-
-# cat /sys/block/sda/device/unload_heads
-
-will show you how many milliseconds are left before normal operation
-will be resumed.
-
-A word of caution: The fact that the interface operates on a basis of
-milliseconds may raise expectations that cannot be satisfied in
-reality. In fact, the ATA specs clearly state that the time for an
-unload operation to complete is vendor specific. The hint in ATA-7
-that this will typically be within 500 milliseconds apparently has
-been dropped in ATA-8.
-
-There is a technical detail of this implementation that may cause some
-confusion and should be discussed here. When a head park request has
-been issued to a device successfully, all I/O operations on the
-controller port this device is attached to will be deferred. That is
-to say, any other device that may be connected to the same port will
-be affected too. The only exception is that a subsequent head unload
-request to that other device will be executed immediately. Further
-operations on that port will be deferred until the timeout specified
-for either device on the port has expired. As far as PATA (old style
-IDE) configurations are concerned, there can only be two devices
-attached to any single port. In SATA world we have port multipliers
-which means that a user-issued head parking request to one device may
-actually result in stopping I/O to a whole bunch of devices. However,
-since this feature is supposed to be used on laptops and does not seem
-to be very useful in any other environment, there will be mostly one
-device per port. Even if the CD/DVD writer happens to be connected to
-the same port as the hard drive, it generally *should* recover just
-fine from the occasional buffer under-run incurred by a head park
-request to the HD. Actually, when you are using an ide driver rather
-than its libata counterpart (i.e. your disk is called /dev/hda
-instead of /dev/sda), then parking the heads of one drive (drive X)
-will generally not affect the mode of operation of another drive
-(drive Y) on the same port as described above. It is only when a port
-reset is required to recover from an exception on drive Y that further
-I/O operations on that drive (and the reset itself) will be delayed
-until drive X is no longer in the parked state.
-
-Finally, there are some hard drives that only comply with an earlier
-version of the ATA standard than ATA-7, but do support the unload
-feature nonetheless. Unfortunately, there is no safe way Linux can
-detect these devices, so you won't be able to write to the
-unload_heads attribute. If you know that your device really does
-support the unload feature (for instance, because the vendor of your
-laptop or the hard drive itself told you so), then you can tell the
-kernel to enable the usage of this feature for that drive by writing
-the special value -1 to the unload_heads attribute:
-
-# echo -1 > /sys/block/sda/device/unload_heads
-
-will enable the feature for /dev/sda, and giving -2 instead of -1 will
-disable it again.
-
-
-3. References
--------------
-
-There are several laptops from different vendors featuring shock
-protection capabilities. As manufacturers have refused to support open
-source development of the required software components so far, Linux
-support for shock protection varies considerably between different
-hardware implementations. Ideally, this section should contain a list
-of pointers at different projects aiming at an implementation of shock
-protection on different systems. Unfortunately, I only know of a
-single project which, although still considered experimental, is fit
-for use. Please feel free to add projects that have been the victims
-of my ignorance.
-
-- http://www.thinkwiki.org/wiki/HDAPS
-  See this page for information about Linux support of the hard disk
-  active protection system as implemented in IBM/Lenovo Thinkpads.
-
-
-4. CREDITS
-----------
-
-This implementation of disk head parking has been inspired by a patch
-originally published by Jon Escombe <lists@dresco.co.uk>. My efforts
-to develop an implementation of this feature that is fit to be merged
-into mainline have been aided by various kernel developers, in
-particular by Tejun Heo and Bartlomiej Zolnierkiewicz.
diff --git a/Documentation/laptops/index.rst b/Documentation/laptops/index.rst
new file mode 100644
index 000000000000..001a30910d09
--- /dev/null
+++ b/Documentation/laptops/index.rst
@@ -0,0 +1,17 @@
+:orphan:
+
+==============
+Laptop Drivers
+==============
+
+.. toctree::
+   :maxdepth: 1
+
+   asus-laptop
+   disk-shock-protection
+   laptop-mode
+   lg-laptop
+   sony-laptop
+   sonypi
+   thinkpad-acpi
+   toshiba_haps
diff --git a/Documentation/laptops/laptop-mode.rst b/Documentation/laptops/laptop-mode.rst
new file mode 100644
index 000000000000..c984c4262f2e
--- /dev/null
+++ b/Documentation/laptops/laptop-mode.rst
@@ -0,0 +1,781 @@
+===============================================
+How to conserve battery power using laptop-mode
+===============================================
+
+Document Author: Bart Samwel (bart@samwel.tk)
+
+Date created: January 2, 2004
+
+Last modified: December 06, 2004
+
+Introduction
+------------
+
+Laptop mode is used to minimize the time that the hard disk needs to be spun up,
+to conserve battery power on laptops. It has been reported to cause significant
+power savings.
+
+.. Contents
+
+   * Introduction
+   * Installation
+   * Caveats
+   * The Details
+   * Tips & Tricks
+   * Control script
+   * ACPI integration
+   * Monitoring tool
+
+
+Installation
+------------
+
+To use laptop mode, you don't need to set any kernel configuration options
+or anything. Simply install all the files included in this document, and
+laptop mode will automatically be started when you're on battery. For
+your convenience, a tarball containing an installer can be downloaded at:
+
+	http://www.samwel.tk/laptop_mode/laptop_mode/
+
+To configure laptop mode, you need to edit the configuration file, which is
+located in /etc/default/laptop-mode on Debian-based systems, or in
+/etc/sysconfig/laptop-mode on other systems.
+
+Unfortunately, automatic enabling of laptop mode does not work for
+laptops that don't have ACPI. On those laptops, you need to start laptop
+mode manually. To start laptop mode, run "laptop_mode start", and to
+stop it, run "laptop_mode stop". (Note: The laptop mode tools package now
+has experimental support for APM, you might want to try that first.)
+
+
+Caveats
+-------
+
+* The downside of laptop mode is that you have a chance of losing up to 10
+  minutes of work. If you cannot afford this, don't use it! The supplied ACPI
+  scripts automatically turn off laptop mode when the battery almost runs out,
+  so that you won't lose any data at the end of your battery life.
+
+* Most desktop hard drives have a very limited lifetime measured in spindown
+  cycles, typically about 50.000 times (it's usually listed on the spec sheet).
+  Check your drive's rating, and don't wear down your drive's lifetime if you
+  don't need to.
+
+* If you mount some of your ext3/reiserfs filesystems with the -n option, then
+  the control script will not be able to remount them correctly. You must set
+  DO_REMOUNTS=0 in the control script, otherwise it will remount them with the
+  wrong options -- or it will fail because it cannot write to /etc/mtab.
+
+* If you have your filesystems listed as type "auto" in fstab, like I did, then
+  the control script will not recognize them as filesystems that need remounting.
+  You must list the filesystems with their true type instead.
+
+* It has been reported that some versions of the mutt mail client use file access
+  times to determine whether a folder contains new mail. If you use mutt and
+  experience this, you must disable the noatime remounting by setting the option
+  DO_REMOUNT_NOATIME to 0 in the configuration file.
+
+
+The Details
+-----------
+
+Laptop mode is controlled by the knob /proc/sys/vm/laptop_mode. This knob is
+present for all kernels that have the laptop mode patch, regardless of any
+configuration options. When the knob is set, any physical disk I/O (that might
+have caused the hard disk to spin up) causes Linux to flush all dirty blocks. The
+result of this is that after a disk has spun down, it will not be spun up
+anymore to write dirty blocks, because those blocks had already been written
+immediately after the most recent read operation. The value of the laptop_mode
+knob determines the time between the occurrence of disk I/O and when the flush
+is triggered. A sensible value for the knob is 5 seconds. Setting the knob to
+0 disables laptop mode.
+
+To increase the effectiveness of the laptop_mode strategy, the laptop_mode
+control script increases dirty_expire_centisecs and dirty_writeback_centisecs in
+/proc/sys/vm to about 10 minutes (by default), which means that pages that are
+dirtied are not forced to be written to disk as often. The control script also
+changes the dirty background ratio, so that background writeback of dirty pages
+is not done anymore. Combined with a higher commit value (also 10 minutes) for
+ext3 or ReiserFS filesystems (also done automatically by the control script),
+this results in concentration of disk activity in a small time interval which
+occurs only once every 10 minutes, or whenever the disk is forced to spin up by
+a cache miss. The disk can then be spun down in the periods of inactivity.
+
+If you want to find out which process caused the disk to spin up, you can
+gather information by setting the flag /proc/sys/vm/block_dump. When this flag
+is set, Linux reports all disk read and write operations that take place, and
+all block dirtyings done to files. This makes it possible to debug why a disk
+needs to spin up, and to increase battery life even more. The output of
+block_dump is written to the kernel output, and it can be retrieved using
+"dmesg". When you use block_dump and your kernel logging level also includes
+kernel debugging messages, you probably want to turn off klogd, otherwise
+the output of block_dump will be logged, causing disk activity that is not
+normally there.
+
+
+Configuration
+-------------
+
+The laptop mode configuration file is located in /etc/default/laptop-mode on
+Debian-based systems, or in /etc/sysconfig/laptop-mode on other systems. It
+contains the following options:
+
+MAX_AGE:
+
+Maximum time, in seconds, of hard drive spindown time that you are
+comfortable with. Worst case, it's possible that you could lose this
+amount of work if your battery fails while you're in laptop mode.
+
+MINIMUM_BATTERY_MINUTES:
+
+Automatically disable laptop mode if the remaining number of minutes of
+battery power is less than this value. Default is 10 minutes.
+
+AC_HD/BATT_HD:
+
+The idle timeout that should be set on your hard drive when laptop mode
+is active (BATT_HD) and when it is not active (AC_HD). The defaults are
+20 seconds (value 4) for BATT_HD  and 2 hours (value 244) for AC_HD. The
+possible values are those listed in the manual page for "hdparm" for the
+"-S" option.
+
+HD:
+
+The devices for which the spindown timeout should be adjusted by laptop mode.
+Default is /dev/hda. If you specify multiple devices, separate them by a space.
+
+READAHEAD:
+
+Disk readahead, in 512-byte sectors, while laptop mode is active. A large
+readahead can prevent disk accesses for things like executable pages (which are
+loaded on demand while the application executes) and sequentially accessed data
+(MP3s).
+
+DO_REMOUNTS:
+
+The control script automatically remounts any mounted journaled filesystems
+with appropriate commit interval options. When this option is set to 0, this
+feature is disabled.
+
+DO_REMOUNT_NOATIME:
+
+When remounting, should the filesystems be remounted with the noatime option?
+Normally, this is set to "1" (enabled), but there may be programs that require
+access time recording.
+
+DIRTY_RATIO:
+
+The percentage of memory that is allowed to contain "dirty" or unsaved data
+before a writeback is forced, while laptop mode is active. Corresponds to
+the /proc/sys/vm/dirty_ratio sysctl.
+
+DIRTY_BACKGROUND_RATIO:
+
+The percentage of memory that is allowed to contain "dirty" or unsaved data
+after a forced writeback is done due to an exceeding of DIRTY_RATIO. Set
+this nice and low. This corresponds to the /proc/sys/vm/dirty_background_ratio
+sysctl.
+
+Note that the behaviour of dirty_background_ratio is quite different
+when laptop mode is active and when it isn't. When laptop mode is inactive,
+dirty_background_ratio is the threshold percentage at which background writeouts
+start taking place. When laptop mode is active, however, background writeouts
+are disabled, and the dirty_background_ratio only determines how much writeback
+is done when dirty_ratio is reached.
+
+DO_CPU:
+
+Enable CPU frequency scaling when in laptop mode. (Requires CPUFreq to be setup.
+See Documentation/admin-guide/pm/cpufreq.rst for more info. Disabled by default.)
+
+CPU_MAXFREQ:
+
+When on battery, what is the maximum CPU speed that the system should use? Legal
+values are "slowest" for the slowest speed that your CPU is able to operate at,
+or a value listed in /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies.
+
+
+Tips & Tricks
+-------------
+
+* Bartek Kania reports getting up to 50 minutes of extra battery life (on top
+  of his regular 3 to 3.5 hours) using a spindown time of 5 seconds (BATT_HD=1).
+
+* You can spin down the disk while playing MP3, by setting disk readahead
+  to 8MB (READAHEAD=16384). Effectively, the disk will read a complete MP3 at
+  once, and will then spin down while the MP3 is playing. (Thanks to Bartek
+  Kania.)
+
+* Drew Scott Daniels observed: "I don't know why, but when I decrease the number
+  of colours that my display uses it consumes less battery power. I've seen
+  this on powerbooks too. I hope that this is a piece of information that
+  might be useful to the Laptop Mode patch or its users."
+
+* In syslog.conf, you can prefix entries with a dash `-` to omit syncing the
+  file after every logging. When you're using laptop-mode and your disk doesn't
+  spin down, this is a likely culprit.
+
+* Richard Atterer observed that laptop mode does not work well with noflushd
+  (http://noflushd.sourceforge.net/), it seems that noflushd prevents laptop-mode
+  from doing its thing.
+
+* If you're worried about your data, you might want to consider using a USB
+  memory stick or something like that as a "working area". (Be aware though
+  that flash memory can only handle a limited number of writes, and overuse
+  may wear out your memory stick pretty quickly. Do _not_ use journalling
+  filesystems on flash memory sticks.)
+
+
+Configuration file for control and ACPI battery scripts
+-------------------------------------------------------
+
+This allows the tunables to be changed for the scripts via an external
+configuration file
+
+It should be installed as /etc/default/laptop-mode on Debian, and as
+/etc/sysconfig/laptop-mode on Red Hat, SUSE, Mandrake, and other work-alikes.
+
+Config file::
+
+  # Maximum time, in seconds, of hard drive spindown time that you are
+  # comfortable with. Worst case, it's possible that you could lose this
+  # amount of work if your battery fails you while in laptop mode.
+  #MAX_AGE=600
+
+  # Automatically disable laptop mode when the number of minutes of battery
+  # that you have left goes below this threshold.
+  MINIMUM_BATTERY_MINUTES=10
+
+  # Read-ahead, in 512-byte sectors. You can spin down the disk while playing MP3/OGG
+  # by setting the disk readahead to 8MB (READAHEAD=16384). Effectively, the disk
+  # will read a complete MP3 at once, and will then spin down while the MP3/OGG is
+  # playing.
+  #READAHEAD=4096
+
+  # Shall we remount journaled fs. with appropriate commit interval? (1=yes)
+  #DO_REMOUNTS=1
+
+  # And shall we add the "noatime" option to that as well? (1=yes)
+  #DO_REMOUNT_NOATIME=1
+
+  # Dirty synchronous ratio.  At this percentage of dirty pages the process
+  # which
+  # calls write() does its own writeback
+  #DIRTY_RATIO=40
+
+  #
+  # Allowed dirty background ratio, in percent.  Once DIRTY_RATIO has been
+  # exceeded, the kernel will wake flusher threads which will then reduce the
+  # amount of dirty memory to dirty_background_ratio.  Set this nice and low,
+  # so once some writeout has commenced, we do a lot of it.
+  #
+  #DIRTY_BACKGROUND_RATIO=5
+
+  # kernel default dirty buffer age
+  #DEF_AGE=30
+  #DEF_UPDATE=5
+  #DEF_DIRTY_BACKGROUND_RATIO=10
+  #DEF_DIRTY_RATIO=40
+  #DEF_XFS_AGE_BUFFER=15
+  #DEF_XFS_SYNC_INTERVAL=30
+  #DEF_XFS_BUFD_INTERVAL=1
+
+  # This must be adjusted manually to the value of HZ in the running kernel
+  # on 2.4, until the XFS people change their 2.4 external interfaces to work in
+  # centisecs. This can be automated, but it's a work in progress that still
+  # needs# some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for
+  # external interfaces, and that is currently always set to 100. So you don't
+  # need to change this on 2.6.
+  #XFS_HZ=100
+
+  # Should the maximum CPU frequency be adjusted down while on battery?
+  # Requires CPUFreq to be setup.
+  # See Documentation/admin-guide/pm/cpufreq.rst for more info
+  #DO_CPU=0
+
+  # When on battery what is the maximum CPU speed that the system should
+  # use? Legal values are "slowest" for the slowest speed that your
+  # CPU is able to operate at, or a value listed in:
+  # /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies
+  # Only applicable if DO_CPU=1.
+  #CPU_MAXFREQ=slowest
+
+  # Idle timeout for your hard drive (man hdparm for valid values, -S option)
+  # Default is 2 hours on AC (AC_HD=244) and 20 seconds for battery (BATT_HD=4).
+  #AC_HD=244
+  #BATT_HD=4
+
+  # The drives for which to adjust the idle timeout. Separate them by a space,
+  # e.g. HD="/dev/hda /dev/hdb".
+  #HD="/dev/hda"
+
+  # Set the spindown timeout on a hard drive?
+  #DO_HD=1
+
+
+Control script
+--------------
+
+Please note that this control script works for the Linux 2.4 and 2.6 series (thanks
+to Kiko Piris).
+
+Control script::
+
+  #!/bin/bash
+
+  # start or stop laptop_mode, best run by a power management daemon when
+  # ac gets connected/disconnected from a laptop
+  #
+  # install as /sbin/laptop_mode
+  #
+  # Contributors to this script:   Kiko Piris
+  #				 Bart Samwel
+  #				 Micha Feigin
+  #				 Andrew Morton
+  #				 Herve Eychenne
+  #				 Dax Kelson
+  #
+  # Original Linux 2.4 version by: Jens Axboe
+
+  #############################################################################
+
+  # Source config
+  if [ -f /etc/default/laptop-mode ] ; then
+	# Debian
+	. /etc/default/laptop-mode
+  elif [ -f /etc/sysconfig/laptop-mode ] ; then
+	# Others
+          . /etc/sysconfig/laptop-mode
+  fi
+
+  # Don't raise an error if the config file is incomplete
+  # set defaults instead:
+
+  # Maximum time, in seconds, of hard drive spindown time that you are
+  # comfortable with. Worst case, it's possible that you could lose this
+  # amount of work if your battery fails you while in laptop mode.
+  MAX_AGE=${MAX_AGE:-'600'}
+
+  # Read-ahead, in kilobytes
+  READAHEAD=${READAHEAD:-'4096'}
+
+  # Shall we remount journaled fs. with appropriate commit interval? (1=yes)
+  DO_REMOUNTS=${DO_REMOUNTS:-'1'}
+
+  # And shall we add the "noatime" option to that as well? (1=yes)
+  DO_REMOUNT_NOATIME=${DO_REMOUNT_NOATIME:-'1'}
+
+  # Shall we adjust the idle timeout on a hard drive?
+  DO_HD=${DO_HD:-'1'}
+
+  # Adjust idle timeout on which hard drive?
+  HD="${HD:-'/dev/hda'}"
+
+  # spindown time for HD (hdparm -S values)
+  AC_HD=${AC_HD:-'244'}
+  BATT_HD=${BATT_HD:-'4'}
+
+  # Dirty synchronous ratio.  At this percentage of dirty pages the process which
+  # calls write() does its own writeback
+  DIRTY_RATIO=${DIRTY_RATIO:-'40'}
+
+  # cpu frequency scaling
+  # See Documentation/admin-guide/pm/cpufreq.rst for more info
+  DO_CPU=${CPU_MANAGE:-'0'}
+  CPU_MAXFREQ=${CPU_MAXFREQ:-'slowest'}
+
+  #
+  # Allowed dirty background ratio, in percent.  Once DIRTY_RATIO has been
+  # exceeded, the kernel will wake flusher threads which will then reduce the
+  # amount of dirty memory to dirty_background_ratio.  Set this nice and low,
+  # so once some writeout has commenced, we do a lot of it.
+  #
+  DIRTY_BACKGROUND_RATIO=${DIRTY_BACKGROUND_RATIO:-'5'}
+
+  # kernel default dirty buffer age
+  DEF_AGE=${DEF_AGE:-'30'}
+  DEF_UPDATE=${DEF_UPDATE:-'5'}
+  DEF_DIRTY_BACKGROUND_RATIO=${DEF_DIRTY_BACKGROUND_RATIO:-'10'}
+  DEF_DIRTY_RATIO=${DEF_DIRTY_RATIO:-'40'}
+  DEF_XFS_AGE_BUFFER=${DEF_XFS_AGE_BUFFER:-'15'}
+  DEF_XFS_SYNC_INTERVAL=${DEF_XFS_SYNC_INTERVAL:-'30'}
+  DEF_XFS_BUFD_INTERVAL=${DEF_XFS_BUFD_INTERVAL:-'1'}
+
+  # This must be adjusted manually to the value of HZ in the running kernel
+  # on 2.4, until the XFS people change their 2.4 external interfaces to work in
+  # centisecs. This can be automated, but it's a work in progress that still needs
+  # some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for external
+  # interfaces, and that is currently always set to 100. So you don't need to
+  # change this on 2.6.
+  XFS_HZ=${XFS_HZ:-'100'}
+
+  #############################################################################
+
+  KLEVEL="$(uname -r |
+               {
+	       IFS='.' read a b c
+	       echo $a.$b
+	     }
+  )"
+  case "$KLEVEL" in
+	"2.4"|"2.6")
+		;;
+	*)
+		echo "Unhandled kernel version: $KLEVEL ('uname -r' = '$(uname -r)')" >&2
+		exit 1
+		;;
+  esac
+
+  if [ ! -e /proc/sys/vm/laptop_mode ] ; then
+	echo "Kernel is not patched with laptop_mode patch." >&2
+	exit 1
+  fi
+
+  if [ ! -w /proc/sys/vm/laptop_mode ] ; then
+	echo "You do not have enough privileges to enable laptop_mode." >&2
+	exit 1
+  fi
+
+  # Remove an option (the first parameter) of the form option=<number> from
+  # a mount options string (the rest of the parameters).
+  parse_mount_opts () {
+	OPT="$1"
+	shift
+	echo ",$*," | sed		\
+	 -e 's/,'"$OPT"'=[0-9]*,/,/g'	\
+	 -e 's/,,*/,/g'			\
+	 -e 's/^,//'			\
+	 -e 's/,$//'
+  }
+
+  # Remove an option (the first parameter) without any arguments from
+  # a mount option string (the rest of the parameters).
+  parse_nonumber_mount_opts () {
+	OPT="$1"
+	shift
+	echo ",$*," | sed		\
+	 -e 's/,'"$OPT"',/,/g'		\
+	 -e 's/,,*/,/g'			\
+	 -e 's/^,//'			\
+	 -e 's/,$//'
+  }
+
+  # Find out the state of a yes/no option (e.g. "atime"/"noatime") in
+  # fstab for a given filesystem, and use this state to replace the
+  # value of the option in another mount options string. The device
+  # is the first argument, the option name the second, and the default
+  # value the third. The remainder is the mount options string.
+  #
+  # Example:
+  # parse_yesno_opts_wfstab /dev/hda1 atime atime defaults,noatime
+  #
+  # If fstab contains, say, "rw" for this filesystem, then the result
+  # will be "defaults,atime".
+  parse_yesno_opts_wfstab () {
+	L_DEV="$1"
+	OPT="$2"
+	DEF_OPT="$3"
+	shift 3
+	L_OPTS="$*"
+	PARSEDOPTS1="$(parse_nonumber_mount_opts $OPT $L_OPTS)"
+	PARSEDOPTS1="$(parse_nonumber_mount_opts no$OPT $PARSEDOPTS1)"
+	# Watch for a default atime in fstab
+	FSTAB_OPTS="$(awk '$1 == "'$L_DEV'" { print $4 }' /etc/fstab)"
+	if echo "$FSTAB_OPTS" | grep "$OPT" > /dev/null ; then
+		# option specified in fstab: extract the value and use it
+		if echo "$FSTAB_OPTS" | grep "no$OPT" > /dev/null ; then
+			echo "$PARSEDOPTS1,no$OPT"
+		else
+			# no$OPT not found -- so we must have $OPT.
+			echo "$PARSEDOPTS1,$OPT"
+		fi
+	else
+		# option not specified in fstab -- choose the default.
+		echo "$PARSEDOPTS1,$DEF_OPT"
+	fi
+  }
+
+  # Find out the state of a numbered option (e.g. "commit=NNN") in
+  # fstab for a given filesystem, and use this state to replace the
+  # value of the option in another mount options string. The device
+  # is the first argument, and the option name the second. The
+  # remainder is the mount options string in which the replacement
+  # must be done.
+  #
+  # Example:
+  # parse_mount_opts_wfstab /dev/hda1 commit defaults,commit=7
+  #
+  # If fstab contains, say, "commit=3,rw" for this filesystem, then the
+  # result will be "rw,commit=3".
+  parse_mount_opts_wfstab () {
+	L_DEV="$1"
+	OPT="$2"
+	shift 2
+	L_OPTS="$*"
+	PARSEDOPTS1="$(parse_mount_opts $OPT $L_OPTS)"
+	# Watch for a default commit in fstab
+	FSTAB_OPTS="$(awk '$1 == "'$L_DEV'" { print $4 }' /etc/fstab)"
+	if echo "$FSTAB_OPTS" | grep "$OPT=" > /dev/null ; then
+		# option specified in fstab: extract the value, and use it
+		echo -n "$PARSEDOPTS1,$OPT="
+		echo ",$FSTAB_OPTS," | sed \
+		 -e 's/.*,'"$OPT"'=//'	\
+		 -e 's/,.*//'
+	else
+		# option not specified in fstab: set it to 0
+		echo "$PARSEDOPTS1,$OPT=0"
+	fi
+  }
+
+  deduce_fstype () {
+	MP="$1"
+	# My root filesystem unfortunately has
+	# type "unknown" in /etc/mtab. If we encounter
+	# "unknown", we try to get the type from fstab.
+	cat /etc/fstab |
+	grep -v '^#' |
+	while read FSTAB_DEV FSTAB_MP FSTAB_FST FSTAB_OPTS FSTAB_DUMP FSTAB_DUMP ; do
+		if [ "$FSTAB_MP" = "$MP" ]; then
+			echo $FSTAB_FST
+			exit 0
+		fi
+	done
+  }
+
+  if [ $DO_REMOUNT_NOATIME -eq 1 ] ; then
+	NOATIME_OPT=",noatime"
+  fi
+
+  case "$1" in
+	start)
+		AGE=$((100*$MAX_AGE))
+		XFS_AGE=$(($XFS_HZ*$MAX_AGE))
+		echo -n "Starting laptop_mode"
+
+		if [ -d /proc/sys/vm/pagebuf ] ; then
+			# (For 2.4 and early 2.6.)
+			# This only needs to be set, not reset -- it is only used when
+			# laptop mode is enabled.
+			echo $XFS_AGE > /proc/sys/vm/pagebuf/lm_flush_age
+			echo $XFS_AGE > /proc/sys/fs/xfs/lm_sync_interval
+		elif [ -f /proc/sys/fs/xfs/lm_age_buffer ] ; then
+			# (A couple of early 2.6 laptop mode patches had these.)
+			# The same goes for these.
+			echo $XFS_AGE > /proc/sys/fs/xfs/lm_age_buffer
+			echo $XFS_AGE > /proc/sys/fs/xfs/lm_sync_interval
+		elif [ -f /proc/sys/fs/xfs/age_buffer ] ; then
+			# (2.6.6)
+			# But not for these -- they are also used in normal
+			# operation.
+			echo $XFS_AGE > /proc/sys/fs/xfs/age_buffer
+			echo $XFS_AGE > /proc/sys/fs/xfs/sync_interval
+		elif [ -f /proc/sys/fs/xfs/age_buffer_centisecs ] ; then
+			# (2.6.7 upwards)
+			# And not for these either. These are in centisecs,
+			# not USER_HZ, so we have to use $AGE, not $XFS_AGE.
+			echo $AGE > /proc/sys/fs/xfs/age_buffer_centisecs
+			echo $AGE > /proc/sys/fs/xfs/xfssyncd_centisecs
+			echo 3000 > /proc/sys/fs/xfs/xfsbufd_centisecs
+		fi
+
+		case "$KLEVEL" in
+			"2.4")
+				echo 1					> /proc/sys/vm/laptop_mode
+				echo "30 500 0 0 $AGE $AGE 60 20 0"	> /proc/sys/vm/bdflush
+				;;
+			"2.6")
+				echo 5					> /proc/sys/vm/laptop_mode
+				echo "$AGE"				> /proc/sys/vm/dirty_writeback_centisecs
+				echo "$AGE"				> /proc/sys/vm/dirty_expire_centisecs
+				echo "$DIRTY_RATIO"			> /proc/sys/vm/dirty_ratio
+				echo "$DIRTY_BACKGROUND_RATIO"		> /proc/sys/vm/dirty_background_ratio
+				;;
+		esac
+		if [ $DO_REMOUNTS -eq 1 ]; then
+			cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do
+				PARSEDOPTS="$(parse_mount_opts "$OPTS")"
+				if [ "$FST" = 'unknown' ]; then
+					FST=$(deduce_fstype $MP)
+				fi
+				case "$FST" in
+					"ext3"|"reiserfs")
+						PARSEDOPTS="$(parse_mount_opts commit "$OPTS")"
+						mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE$NOATIME_OPT
+						;;
+					"xfs")
+						mount $DEV -t $FST $MP -o remount,$OPTS$NOATIME_OPT
+						;;
+				esac
+				if [ -b $DEV ] ; then
+					blockdev --setra $(($READAHEAD * 2)) $DEV
+				fi
+			done
+		fi
+		if [ $DO_HD -eq 1 ] ; then
+			for THISHD in $HD ; do
+				/sbin/hdparm -S $BATT_HD $THISHD > /dev/null 2>&1
+				/sbin/hdparm -B 1 $THISHD > /dev/null 2>&1
+			done
+		fi
+		if [ $DO_CPU -eq 1 -a -e /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq ]; then
+			if [ $CPU_MAXFREQ = 'slowest' ]; then
+				CPU_MAXFREQ=`cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq`
+			fi
+			echo $CPU_MAXFREQ > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
+		fi
+		echo "."
+		;;
+	stop)
+		U_AGE=$((100*$DEF_UPDATE))
+		B_AGE=$((100*$DEF_AGE))
+		echo -n "Stopping laptop_mode"
+		echo 0 > /proc/sys/vm/laptop_mode
+		if [ -f /proc/sys/fs/xfs/age_buffer -a ! -f /proc/sys/fs/xfs/lm_age_buffer ] ; then
+			# These need to be restored, if there are no lm_*.
+			echo $(($XFS_HZ*$DEF_XFS_AGE_BUFFER))	 	> /proc/sys/fs/xfs/age_buffer
+			echo $(($XFS_HZ*$DEF_XFS_SYNC_INTERVAL)) 	> /proc/sys/fs/xfs/sync_interval
+		elif [ -f /proc/sys/fs/xfs/age_buffer_centisecs ] ; then
+			# These need to be restored as well.
+			echo $((100*$DEF_XFS_AGE_BUFFER))	> /proc/sys/fs/xfs/age_buffer_centisecs
+			echo $((100*$DEF_XFS_SYNC_INTERVAL))	> /proc/sys/fs/xfs/xfssyncd_centisecs
+			echo $((100*$DEF_XFS_BUFD_INTERVAL))	> /proc/sys/fs/xfs/xfsbufd_centisecs
+		fi
+		case "$KLEVEL" in
+			"2.4")
+				echo "30 500 0 0 $U_AGE $B_AGE 60 20 0"	> /proc/sys/vm/bdflush
+				;;
+			"2.6")
+				echo "$U_AGE"				> /proc/sys/vm/dirty_writeback_centisecs
+				echo "$B_AGE"				> /proc/sys/vm/dirty_expire_centisecs
+				echo "$DEF_DIRTY_RATIO"			> /proc/sys/vm/dirty_ratio
+				echo "$DEF_DIRTY_BACKGROUND_RATIO"	> /proc/sys/vm/dirty_background_ratio
+				;;
+		esac
+		if [ $DO_REMOUNTS -eq 1 ] ; then
+			cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do
+				# Reset commit and atime options to defaults.
+				if [ "$FST" = 'unknown' ]; then
+					FST=$(deduce_fstype $MP)
+				fi
+				case "$FST" in
+					"ext3"|"reiserfs")
+						PARSEDOPTS="$(parse_mount_opts_wfstab $DEV commit $OPTS)"
+						PARSEDOPTS="$(parse_yesno_opts_wfstab $DEV atime atime $PARSEDOPTS)"
+						mount $DEV -t $FST $MP -o remount,$PARSEDOPTS
+						;;
+					"xfs")
+						PARSEDOPTS="$(parse_yesno_opts_wfstab $DEV atime atime $OPTS)"
+						mount $DEV -t $FST $MP -o remount,$PARSEDOPTS
+						;;
+				esac
+				if [ -b $DEV ] ; then
+					blockdev --setra 256 $DEV
+				fi
+			done
+		fi
+		if [ $DO_HD -eq 1 ] ; then
+			for THISHD in $HD ; do
+				/sbin/hdparm -S $AC_HD $THISHD > /dev/null 2>&1
+				/sbin/hdparm -B 255 $THISHD > /dev/null 2>&1
+			done
+		fi
+		if [ $DO_CPU -eq 1 -a -e /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq ]; then
+			echo `cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq` > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
+		fi
+		echo "."
+		;;
+	*)
+		echo "Usage: $0 {start|stop}" 2>&1
+		exit 1
+		;;
+
+  esac
+
+  exit 0
+
+
+ACPI integration
+----------------
+
+Dax Kelson submitted this so that the ACPI acpid daemon will
+kick off the laptop_mode script and run hdparm. The part that
+automatically disables laptop mode when the battery is low was
+written by Jan Topinski.
+
+/etc/acpi/events/ac_adapter::
+
+	event=ac_adapter
+	action=/etc/acpi/actions/ac.sh %e
+
+/etc/acpi/events/battery::
+
+	event=battery.*
+	action=/etc/acpi/actions/battery.sh %e
+
+/etc/acpi/actions/ac.sh::
+
+  #!/bin/bash
+
+  # ac on/offline event handler
+
+  status=`awk '/^state: / { print $2 }' /proc/acpi/ac_adapter/$2/state`
+
+  case $status in
+          "on-line")
+                  /sbin/laptop_mode stop
+                  exit 0
+          ;;
+          "off-line")
+                  /sbin/laptop_mode start
+                  exit 0
+          ;;
+  esac
+
+
+/etc/acpi/actions/battery.sh::
+
+  #! /bin/bash
+
+  # Automatically disable laptop mode when the battery almost runs out.
+
+  BATT_INFO=/proc/acpi/battery/$2/state
+
+  if [[ -f /proc/sys/vm/laptop_mode ]]
+  then
+     LM=`cat /proc/sys/vm/laptop_mode`
+     if [[ $LM -gt 0 ]]
+     then
+       if [[ -f $BATT_INFO ]]
+       then
+          # Source the config file only now that we know we need
+          if [ -f /etc/default/laptop-mode ] ; then
+                  # Debian
+                  . /etc/default/laptop-mode
+          elif [ -f /etc/sysconfig/laptop-mode ] ; then
+                  # Others
+                  . /etc/sysconfig/laptop-mode
+          fi
+          MINIMUM_BATTERY_MINUTES=${MINIMUM_BATTERY_MINUTES:-'10'}
+
+          ACTION="`cat $BATT_INFO | grep charging | cut -c 26-`"
+          if [[ ACTION -eq "discharging" ]]
+          then
+             PRESENT_RATE=`cat $BATT_INFO | grep "present rate:" | sed  "s/.* \([0-9][0-9]* \).*/\1/" `
+             REMAINING=`cat $BATT_INFO | grep "remaining capacity:" | sed  "s/.* \([0-9][0-9]* \).*/\1/" `
+          fi
+          if (($REMAINING * 60 / $PRESENT_RATE < $MINIMUM_BATTERY_MINUTES))
+          then
+             /sbin/laptop_mode stop
+          fi
+       else
+         logger -p daemon.warning "You are using laptop mode and your battery interface $BATT_INFO is missing. This may lead to loss of data when the battery runs out. Check kernel ACPI support and /proc/acpi/battery folder, and edit /etc/acpi/battery.sh to set BATT_INFO to the correct path."
+       fi
+     fi
+  fi
+
+
+Monitoring tool
+---------------
+
+Bartek Kania submitted this, it can be used to measure how much time your disk
+spends spun up/down.  See tools/laptop/dslm/dslm.c
diff --git a/Documentation/laptops/laptop-mode.txt b/Documentation/laptops/laptop-mode.txt
deleted file mode 100644
index 1c707fc9b141..000000000000
--- a/Documentation/laptops/laptop-mode.txt
+++ /dev/null
@@ -1,782 +0,0 @@
-How to conserve battery power using laptop-mode
------------------------------------------------
-
-Document Author: Bart Samwel (bart@samwel.tk)
-Date created: January 2, 2004
-Last modified: December 06, 2004
-
-Introduction
-------------
-
-Laptop mode is used to minimize the time that the hard disk needs to be spun up,
-to conserve battery power on laptops. It has been reported to cause significant
-power savings.
-
-Contents
---------
-
-* Introduction
-* Installation
-* Caveats
-* The Details
-* Tips & Tricks
-* Control script
-* ACPI integration
-* Monitoring tool
-
-
-Installation
-------------
-
-To use laptop mode, you don't need to set any kernel configuration options
-or anything. Simply install all the files included in this document, and
-laptop mode will automatically be started when you're on battery. For
-your convenience, a tarball containing an installer can be downloaded at:
-
-http://www.samwel.tk/laptop_mode/laptop_mode/
-
-To configure laptop mode, you need to edit the configuration file, which is
-located in /etc/default/laptop-mode on Debian-based systems, or in
-/etc/sysconfig/laptop-mode on other systems.
-
-Unfortunately, automatic enabling of laptop mode does not work for
-laptops that don't have ACPI. On those laptops, you need to start laptop
-mode manually. To start laptop mode, run "laptop_mode start", and to
-stop it, run "laptop_mode stop". (Note: The laptop mode tools package now
-has experimental support for APM, you might want to try that first.)
-
-
-Caveats
--------
-
-* The downside of laptop mode is that you have a chance of losing up to 10
-  minutes of work. If you cannot afford this, don't use it! The supplied ACPI
-  scripts automatically turn off laptop mode when the battery almost runs out,
-  so that you won't lose any data at the end of your battery life.
-
-* Most desktop hard drives have a very limited lifetime measured in spindown
-  cycles, typically about 50.000 times (it's usually listed on the spec sheet).
-  Check your drive's rating, and don't wear down your drive's lifetime if you
-  don't need to.
-
-* If you mount some of your ext3/reiserfs filesystems with the -n option, then
-  the control script will not be able to remount them correctly. You must set
-  DO_REMOUNTS=0 in the control script, otherwise it will remount them with the
-  wrong options -- or it will fail because it cannot write to /etc/mtab.
-
-* If you have your filesystems listed as type "auto" in fstab, like I did, then
-  the control script will not recognize them as filesystems that need remounting.
-  You must list the filesystems with their true type instead.
-
-* It has been reported that some versions of the mutt mail client use file access
-  times to determine whether a folder contains new mail. If you use mutt and
-  experience this, you must disable the noatime remounting by setting the option
-  DO_REMOUNT_NOATIME to 0 in the configuration file.
-
-
-The Details
------------
-
-Laptop mode is controlled by the knob /proc/sys/vm/laptop_mode. This knob is
-present for all kernels that have the laptop mode patch, regardless of any
-configuration options. When the knob is set, any physical disk I/O (that might
-have caused the hard disk to spin up) causes Linux to flush all dirty blocks. The
-result of this is that after a disk has spun down, it will not be spun up
-anymore to write dirty blocks, because those blocks had already been written
-immediately after the most recent read operation. The value of the laptop_mode
-knob determines the time between the occurrence of disk I/O and when the flush
-is triggered. A sensible value for the knob is 5 seconds. Setting the knob to
-0 disables laptop mode.
-
-To increase the effectiveness of the laptop_mode strategy, the laptop_mode
-control script increases dirty_expire_centisecs and dirty_writeback_centisecs in
-/proc/sys/vm to about 10 minutes (by default), which means that pages that are
-dirtied are not forced to be written to disk as often. The control script also
-changes the dirty background ratio, so that background writeback of dirty pages
-is not done anymore. Combined with a higher commit value (also 10 minutes) for
-ext3 or ReiserFS filesystems (also done automatically by the control script),
-this results in concentration of disk activity in a small time interval which
-occurs only once every 10 minutes, or whenever the disk is forced to spin up by
-a cache miss. The disk can then be spun down in the periods of inactivity.
-
-If you want to find out which process caused the disk to spin up, you can
-gather information by setting the flag /proc/sys/vm/block_dump. When this flag
-is set, Linux reports all disk read and write operations that take place, and
-all block dirtyings done to files. This makes it possible to debug why a disk
-needs to spin up, and to increase battery life even more. The output of
-block_dump is written to the kernel output, and it can be retrieved using
-"dmesg". When you use block_dump and your kernel logging level also includes
-kernel debugging messages, you probably want to turn off klogd, otherwise
-the output of block_dump will be logged, causing disk activity that is not
-normally there.
-
-
-Configuration
--------------
-
-The laptop mode configuration file is located in /etc/default/laptop-mode on
-Debian-based systems, or in /etc/sysconfig/laptop-mode on other systems. It
-contains the following options:
-
-MAX_AGE:
-
-Maximum time, in seconds, of hard drive spindown time that you are
-comfortable with. Worst case, it's possible that you could lose this
-amount of work if your battery fails while you're in laptop mode.
-
-MINIMUM_BATTERY_MINUTES:
-
-Automatically disable laptop mode if the remaining number of minutes of
-battery power is less than this value. Default is 10 minutes.
-
-AC_HD/BATT_HD:
-
-The idle timeout that should be set on your hard drive when laptop mode
-is active (BATT_HD) and when it is not active (AC_HD). The defaults are
-20 seconds (value 4) for BATT_HD  and 2 hours (value 244) for AC_HD. The
-possible values are those listed in the manual page for "hdparm" for the
-"-S" option.
-
-HD:
-
-The devices for which the spindown timeout should be adjusted by laptop mode.
-Default is /dev/hda. If you specify multiple devices, separate them by a space.
-
-READAHEAD:
-
-Disk readahead, in 512-byte sectors, while laptop mode is active. A large
-readahead can prevent disk accesses for things like executable pages (which are
-loaded on demand while the application executes) and sequentially accessed data
-(MP3s).
-
-DO_REMOUNTS:
-
-The control script automatically remounts any mounted journaled filesystems
-with appropriate commit interval options. When this option is set to 0, this
-feature is disabled.
-
-DO_REMOUNT_NOATIME:
-
-When remounting, should the filesystems be remounted with the noatime option?
-Normally, this is set to "1" (enabled), but there may be programs that require
-access time recording.
-
-DIRTY_RATIO:
-
-The percentage of memory that is allowed to contain "dirty" or unsaved data
-before a writeback is forced, while laptop mode is active. Corresponds to
-the /proc/sys/vm/dirty_ratio sysctl.
-
-DIRTY_BACKGROUND_RATIO:
-
-The percentage of memory that is allowed to contain "dirty" or unsaved data
-after a forced writeback is done due to an exceeding of DIRTY_RATIO. Set
-this nice and low. This corresponds to the /proc/sys/vm/dirty_background_ratio
-sysctl.
-
-Note that the behaviour of dirty_background_ratio is quite different
-when laptop mode is active and when it isn't. When laptop mode is inactive,
-dirty_background_ratio is the threshold percentage at which background writeouts
-start taking place. When laptop mode is active, however, background writeouts
-are disabled, and the dirty_background_ratio only determines how much writeback
-is done when dirty_ratio is reached.
-
-DO_CPU:
-
-Enable CPU frequency scaling when in laptop mode. (Requires CPUFreq to be setup.
-See Documentation/admin-guide/pm/cpufreq.rst for more info. Disabled by default.)
-
-CPU_MAXFREQ:
-
-When on battery, what is the maximum CPU speed that the system should use? Legal
-values are "slowest" for the slowest speed that your CPU is able to operate at,
-or a value listed in /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies.
-
-
-Tips & Tricks
--------------
-
-* Bartek Kania reports getting up to 50 minutes of extra battery life (on top
-  of his regular 3 to 3.5 hours) using a spindown time of 5 seconds (BATT_HD=1).
-
-* You can spin down the disk while playing MP3, by setting disk readahead
-  to 8MB (READAHEAD=16384). Effectively, the disk will read a complete MP3 at
-  once, and will then spin down while the MP3 is playing. (Thanks to Bartek
-  Kania.)
-
-* Drew Scott Daniels observed: "I don't know why, but when I decrease the number
-  of colours that my display uses it consumes less battery power. I've seen
-  this on powerbooks too. I hope that this is a piece of information that
-  might be useful to the Laptop Mode patch or its users."
-
-* In syslog.conf, you can prefix entries with a dash ``-'' to omit syncing the
-  file after every logging. When you're using laptop-mode and your disk doesn't
-  spin down, this is a likely culprit.
-
-* Richard Atterer observed that laptop mode does not work well with noflushd
-  (http://noflushd.sourceforge.net/), it seems that noflushd prevents laptop-mode
-  from doing its thing.
-
-* If you're worried about your data, you might want to consider using a USB
-  memory stick or something like that as a "working area". (Be aware though
-  that flash memory can only handle a limited number of writes, and overuse
-  may wear out your memory stick pretty quickly. Do _not_ use journalling
-  filesystems on flash memory sticks.)
-
-
-Configuration file for control and ACPI battery scripts
--------------------------------------------------------
-
-This allows the tunables to be changed for the scripts via an external
-configuration file
-
-It should be installed as /etc/default/laptop-mode on Debian, and as
-/etc/sysconfig/laptop-mode on Red Hat, SUSE, Mandrake, and other work-alikes.
-
---------------------CONFIG FILE BEGIN-------------------------------------------
-# Maximum time, in seconds, of hard drive spindown time that you are
-# comfortable with. Worst case, it's possible that you could lose this
-# amount of work if your battery fails you while in laptop mode.
-#MAX_AGE=600
-
-# Automatically disable laptop mode when the number of minutes of battery
-# that you have left goes below this threshold.
-MINIMUM_BATTERY_MINUTES=10
-
-# Read-ahead, in 512-byte sectors. You can spin down the disk while playing MP3/OGG
-# by setting the disk readahead to 8MB (READAHEAD=16384). Effectively, the disk
-# will read a complete MP3 at once, and will then spin down while the MP3/OGG is
-# playing.
-#READAHEAD=4096
-
-# Shall we remount journaled fs. with appropriate commit interval? (1=yes)
-#DO_REMOUNTS=1
-
-# And shall we add the "noatime" option to that as well? (1=yes)
-#DO_REMOUNT_NOATIME=1
-
-# Dirty synchronous ratio.  At this percentage of dirty pages the process
-# which
-# calls write() does its own writeback
-#DIRTY_RATIO=40
-
-#
-# Allowed dirty background ratio, in percent.  Once DIRTY_RATIO has been
-# exceeded, the kernel will wake flusher threads which will then reduce the
-# amount of dirty memory to dirty_background_ratio.  Set this nice and low,
-# so once some writeout has commenced, we do a lot of it.
-#
-#DIRTY_BACKGROUND_RATIO=5
-
-# kernel default dirty buffer age
-#DEF_AGE=30
-#DEF_UPDATE=5
-#DEF_DIRTY_BACKGROUND_RATIO=10
-#DEF_DIRTY_RATIO=40
-#DEF_XFS_AGE_BUFFER=15
-#DEF_XFS_SYNC_INTERVAL=30
-#DEF_XFS_BUFD_INTERVAL=1
-
-# This must be adjusted manually to the value of HZ in the running kernel
-# on 2.4, until the XFS people change their 2.4 external interfaces to work in
-# centisecs. This can be automated, but it's a work in progress that still
-# needs# some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for
-# external interfaces, and that is currently always set to 100. So you don't
-# need to change this on 2.6.
-#XFS_HZ=100
-
-# Should the maximum CPU frequency be adjusted down while on battery?
-# Requires CPUFreq to be setup.
-# See Documentation/admin-guide/pm/cpufreq.rst for more info
-#DO_CPU=0
-
-# When on battery what is the maximum CPU speed that the system should
-# use? Legal values are "slowest" for the slowest speed that your
-# CPU is able to operate at, or a value listed in:
-# /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies
-# Only applicable if DO_CPU=1.
-#CPU_MAXFREQ=slowest
-
-# Idle timeout for your hard drive (man hdparm for valid values, -S option)
-# Default is 2 hours on AC (AC_HD=244) and 20 seconds for battery (BATT_HD=4).
-#AC_HD=244
-#BATT_HD=4
-
-# The drives for which to adjust the idle timeout. Separate them by a space,
-# e.g. HD="/dev/hda /dev/hdb".
-#HD="/dev/hda"
-
-# Set the spindown timeout on a hard drive?
-#DO_HD=1
-
---------------------CONFIG FILE END---------------------------------------------
-
-
-Control script
---------------
-
-Please note that this control script works for the Linux 2.4 and 2.6 series (thanks
-to Kiko Piris).
-
---------------------CONTROL SCRIPT BEGIN----------------------------------------
-#!/bin/bash
-
-# start or stop laptop_mode, best run by a power management daemon when
-# ac gets connected/disconnected from a laptop
-#
-# install as /sbin/laptop_mode
-#
-# Contributors to this script:   Kiko Piris
-#				 Bart Samwel
-#				 Micha Feigin
-#				 Andrew Morton
-#				 Herve Eychenne
-#				 Dax Kelson
-#
-# Original Linux 2.4 version by: Jens Axboe
-
-#############################################################################
-
-# Source config
-if [ -f /etc/default/laptop-mode ] ; then
-	# Debian
-	. /etc/default/laptop-mode
-elif [ -f /etc/sysconfig/laptop-mode ] ; then
-	# Others
-        . /etc/sysconfig/laptop-mode
-fi
-
-# Don't raise an error if the config file is incomplete
-# set defaults instead:
-
-# Maximum time, in seconds, of hard drive spindown time that you are
-# comfortable with. Worst case, it's possible that you could lose this
-# amount of work if your battery fails you while in laptop mode.
-MAX_AGE=${MAX_AGE:-'600'}
-
-# Read-ahead, in kilobytes
-READAHEAD=${READAHEAD:-'4096'}
-
-# Shall we remount journaled fs. with appropriate commit interval? (1=yes)
-DO_REMOUNTS=${DO_REMOUNTS:-'1'}
-
-# And shall we add the "noatime" option to that as well? (1=yes)
-DO_REMOUNT_NOATIME=${DO_REMOUNT_NOATIME:-'1'}
-
-# Shall we adjust the idle timeout on a hard drive?
-DO_HD=${DO_HD:-'1'}
-
-# Adjust idle timeout on which hard drive?
-HD="${HD:-'/dev/hda'}"
-
-# spindown time for HD (hdparm -S values)
-AC_HD=${AC_HD:-'244'}
-BATT_HD=${BATT_HD:-'4'}
-
-# Dirty synchronous ratio.  At this percentage of dirty pages the process which
-# calls write() does its own writeback
-DIRTY_RATIO=${DIRTY_RATIO:-'40'}
-
-# cpu frequency scaling
-# See Documentation/admin-guide/pm/cpufreq.rst for more info
-DO_CPU=${CPU_MANAGE:-'0'}
-CPU_MAXFREQ=${CPU_MAXFREQ:-'slowest'}
-
-#
-# Allowed dirty background ratio, in percent.  Once DIRTY_RATIO has been
-# exceeded, the kernel will wake flusher threads which will then reduce the
-# amount of dirty memory to dirty_background_ratio.  Set this nice and low,
-# so once some writeout has commenced, we do a lot of it.
-#
-DIRTY_BACKGROUND_RATIO=${DIRTY_BACKGROUND_RATIO:-'5'}
-
-# kernel default dirty buffer age
-DEF_AGE=${DEF_AGE:-'30'}
-DEF_UPDATE=${DEF_UPDATE:-'5'}
-DEF_DIRTY_BACKGROUND_RATIO=${DEF_DIRTY_BACKGROUND_RATIO:-'10'}
-DEF_DIRTY_RATIO=${DEF_DIRTY_RATIO:-'40'}
-DEF_XFS_AGE_BUFFER=${DEF_XFS_AGE_BUFFER:-'15'}
-DEF_XFS_SYNC_INTERVAL=${DEF_XFS_SYNC_INTERVAL:-'30'}
-DEF_XFS_BUFD_INTERVAL=${DEF_XFS_BUFD_INTERVAL:-'1'}
-
-# This must be adjusted manually to the value of HZ in the running kernel
-# on 2.4, until the XFS people change their 2.4 external interfaces to work in
-# centisecs. This can be automated, but it's a work in progress that still needs
-# some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for external
-# interfaces, and that is currently always set to 100. So you don't need to
-# change this on 2.6.
-XFS_HZ=${XFS_HZ:-'100'}
-
-#############################################################################
-
-KLEVEL="$(uname -r |
-             {
-	       IFS='.' read a b c
-	       echo $a.$b
-	     }
-)"
-case "$KLEVEL" in
-	"2.4"|"2.6")
-		;;
-	*)
-		echo "Unhandled kernel version: $KLEVEL ('uname -r' = '$(uname -r)')" >&2
-		exit 1
-		;;
-esac
-
-if [ ! -e /proc/sys/vm/laptop_mode ] ; then
-	echo "Kernel is not patched with laptop_mode patch." >&2
-	exit 1
-fi
-
-if [ ! -w /proc/sys/vm/laptop_mode ] ; then
-	echo "You do not have enough privileges to enable laptop_mode." >&2
-	exit 1
-fi
-
-# Remove an option (the first parameter) of the form option=<number> from
-# a mount options string (the rest of the parameters).
-parse_mount_opts () {
-	OPT="$1"
-	shift
-	echo ",$*," | sed		\
-	 -e 's/,'"$OPT"'=[0-9]*,/,/g'	\
-	 -e 's/,,*/,/g'			\
-	 -e 's/^,//'			\
-	 -e 's/,$//'
-}
-
-# Remove an option (the first parameter) without any arguments from
-# a mount option string (the rest of the parameters).
-parse_nonumber_mount_opts () {
-	OPT="$1"
-	shift
-	echo ",$*," | sed		\
-	 -e 's/,'"$OPT"',/,/g'		\
-	 -e 's/,,*/,/g'			\
-	 -e 's/^,//'			\
-	 -e 's/,$//'
-}
-
-# Find out the state of a yes/no option (e.g. "atime"/"noatime") in
-# fstab for a given filesystem, and use this state to replace the
-# value of the option in another mount options string. The device
-# is the first argument, the option name the second, and the default
-# value the third. The remainder is the mount options string.
-#
-# Example:
-# parse_yesno_opts_wfstab /dev/hda1 atime atime defaults,noatime
-#
-# If fstab contains, say, "rw" for this filesystem, then the result
-# will be "defaults,atime".
-parse_yesno_opts_wfstab () {
-	L_DEV="$1"
-	OPT="$2"
-	DEF_OPT="$3"
-	shift 3
-	L_OPTS="$*"
-	PARSEDOPTS1="$(parse_nonumber_mount_opts $OPT $L_OPTS)"
-	PARSEDOPTS1="$(parse_nonumber_mount_opts no$OPT $PARSEDOPTS1)"
-	# Watch for a default atime in fstab
-	FSTAB_OPTS="$(awk '$1 == "'$L_DEV'" { print $4 }' /etc/fstab)"
-	if echo "$FSTAB_OPTS" | grep "$OPT" > /dev/null ; then
-		# option specified in fstab: extract the value and use it
-		if echo "$FSTAB_OPTS" | grep "no$OPT" > /dev/null ; then
-			echo "$PARSEDOPTS1,no$OPT"
-		else
-			# no$OPT not found -- so we must have $OPT.
-			echo "$PARSEDOPTS1,$OPT"
-		fi
-	else
-		# option not specified in fstab -- choose the default.
-		echo "$PARSEDOPTS1,$DEF_OPT"
-	fi
-}
-
-# Find out the state of a numbered option (e.g. "commit=NNN") in
-# fstab for a given filesystem, and use this state to replace the
-# value of the option in another mount options string. The device
-# is the first argument, and the option name the second. The
-# remainder is the mount options string in which the replacement
-# must be done.
-#
-# Example:
-# parse_mount_opts_wfstab /dev/hda1 commit defaults,commit=7
-#
-# If fstab contains, say, "commit=3,rw" for this filesystem, then the
-# result will be "rw,commit=3".
-parse_mount_opts_wfstab () {
-	L_DEV="$1"
-	OPT="$2"
-	shift 2
-	L_OPTS="$*"
-	PARSEDOPTS1="$(parse_mount_opts $OPT $L_OPTS)"
-	# Watch for a default commit in fstab
-	FSTAB_OPTS="$(awk '$1 == "'$L_DEV'" { print $4 }' /etc/fstab)"
-	if echo "$FSTAB_OPTS" | grep "$OPT=" > /dev/null ; then
-		# option specified in fstab: extract the value, and use it
-		echo -n "$PARSEDOPTS1,$OPT="
-		echo ",$FSTAB_OPTS," | sed \
-		 -e 's/.*,'"$OPT"'=//'	\
-		 -e 's/,.*//'
-	else
-		# option not specified in fstab: set it to 0
-		echo "$PARSEDOPTS1,$OPT=0"
-	fi
-}
-
-deduce_fstype () {
-	MP="$1"
-	# My root filesystem unfortunately has
-	# type "unknown" in /etc/mtab. If we encounter
-	# "unknown", we try to get the type from fstab.
-	cat /etc/fstab |
-	grep -v '^#' |
-	while read FSTAB_DEV FSTAB_MP FSTAB_FST FSTAB_OPTS FSTAB_DUMP FSTAB_DUMP ; do
-		if [ "$FSTAB_MP" = "$MP" ]; then
-			echo $FSTAB_FST
-			exit 0
-		fi
-	done
-}
-
-if [ $DO_REMOUNT_NOATIME -eq 1 ] ; then
-	NOATIME_OPT=",noatime"
-fi
-
-case "$1" in
-	start)
-		AGE=$((100*$MAX_AGE))
-		XFS_AGE=$(($XFS_HZ*$MAX_AGE))
-		echo -n "Starting laptop_mode"
-
-		if [ -d /proc/sys/vm/pagebuf ] ; then
-			# (For 2.4 and early 2.6.)
-			# This only needs to be set, not reset -- it is only used when
-			# laptop mode is enabled.
-			echo $XFS_AGE > /proc/sys/vm/pagebuf/lm_flush_age
-			echo $XFS_AGE > /proc/sys/fs/xfs/lm_sync_interval
-		elif [ -f /proc/sys/fs/xfs/lm_age_buffer ] ; then
-			# (A couple of early 2.6 laptop mode patches had these.)
-			# The same goes for these.
-			echo $XFS_AGE > /proc/sys/fs/xfs/lm_age_buffer
-			echo $XFS_AGE > /proc/sys/fs/xfs/lm_sync_interval
-		elif [ -f /proc/sys/fs/xfs/age_buffer ] ; then
-			# (2.6.6)
-			# But not for these -- they are also used in normal
-			# operation.
-			echo $XFS_AGE > /proc/sys/fs/xfs/age_buffer
-			echo $XFS_AGE > /proc/sys/fs/xfs/sync_interval
-		elif [ -f /proc/sys/fs/xfs/age_buffer_centisecs ] ; then
-			# (2.6.7 upwards)
-			# And not for these either. These are in centisecs,
-			# not USER_HZ, so we have to use $AGE, not $XFS_AGE.
-			echo $AGE > /proc/sys/fs/xfs/age_buffer_centisecs
-			echo $AGE > /proc/sys/fs/xfs/xfssyncd_centisecs
-			echo 3000 > /proc/sys/fs/xfs/xfsbufd_centisecs
-		fi
-
-		case "$KLEVEL" in
-			"2.4")
-				echo 1					> /proc/sys/vm/laptop_mode
-				echo "30 500 0 0 $AGE $AGE 60 20 0"	> /proc/sys/vm/bdflush
-				;;
-			"2.6")
-				echo 5					> /proc/sys/vm/laptop_mode
-				echo "$AGE"				> /proc/sys/vm/dirty_writeback_centisecs
-				echo "$AGE"				> /proc/sys/vm/dirty_expire_centisecs
-				echo "$DIRTY_RATIO"			> /proc/sys/vm/dirty_ratio
-				echo "$DIRTY_BACKGROUND_RATIO"		> /proc/sys/vm/dirty_background_ratio
-				;;
-		esac
-		if [ $DO_REMOUNTS -eq 1 ]; then
-			cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do
-				PARSEDOPTS="$(parse_mount_opts "$OPTS")"
-				if [ "$FST" = 'unknown' ]; then
-					FST=$(deduce_fstype $MP)
-				fi
-				case "$FST" in
-					"ext3"|"reiserfs")
-						PARSEDOPTS="$(parse_mount_opts commit "$OPTS")"
-						mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE$NOATIME_OPT
-						;;
-					"xfs")
-						mount $DEV -t $FST $MP -o remount,$OPTS$NOATIME_OPT
-						;;
-				esac
-				if [ -b $DEV ] ; then
-					blockdev --setra $(($READAHEAD * 2)) $DEV
-				fi
-			done
-		fi
-		if [ $DO_HD -eq 1 ] ; then
-			for THISHD in $HD ; do
-				/sbin/hdparm -S $BATT_HD $THISHD > /dev/null 2>&1
-				/sbin/hdparm -B 1 $THISHD > /dev/null 2>&1
-			done
-		fi
-		if [ $DO_CPU -eq 1 -a -e /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq ]; then
-			if [ $CPU_MAXFREQ = 'slowest' ]; then
-				CPU_MAXFREQ=`cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq`
-			fi
-			echo $CPU_MAXFREQ > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
-		fi
-		echo "."
-		;;
-	stop)
-		U_AGE=$((100*$DEF_UPDATE))
-		B_AGE=$((100*$DEF_AGE))
-		echo -n "Stopping laptop_mode"
-		echo 0 > /proc/sys/vm/laptop_mode
-		if [ -f /proc/sys/fs/xfs/age_buffer -a ! -f /proc/sys/fs/xfs/lm_age_buffer ] ; then
-			# These need to be restored, if there are no lm_*.
-			echo $(($XFS_HZ*$DEF_XFS_AGE_BUFFER))	 	> /proc/sys/fs/xfs/age_buffer
-			echo $(($XFS_HZ*$DEF_XFS_SYNC_INTERVAL)) 	> /proc/sys/fs/xfs/sync_interval
-		elif [ -f /proc/sys/fs/xfs/age_buffer_centisecs ] ; then
-			# These need to be restored as well.
-			echo $((100*$DEF_XFS_AGE_BUFFER))	> /proc/sys/fs/xfs/age_buffer_centisecs
-			echo $((100*$DEF_XFS_SYNC_INTERVAL))	> /proc/sys/fs/xfs/xfssyncd_centisecs
-			echo $((100*$DEF_XFS_BUFD_INTERVAL))	> /proc/sys/fs/xfs/xfsbufd_centisecs
-		fi
-		case "$KLEVEL" in
-			"2.4")
-				echo "30 500 0 0 $U_AGE $B_AGE 60 20 0"	> /proc/sys/vm/bdflush
-				;;
-			"2.6")
-				echo "$U_AGE"				> /proc/sys/vm/dirty_writeback_centisecs
-				echo "$B_AGE"				> /proc/sys/vm/dirty_expire_centisecs
-				echo "$DEF_DIRTY_RATIO"			> /proc/sys/vm/dirty_ratio
-				echo "$DEF_DIRTY_BACKGROUND_RATIO"	> /proc/sys/vm/dirty_background_ratio
-				;;
-		esac
-		if [ $DO_REMOUNTS -eq 1 ] ; then
-			cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do
-				# Reset commit and atime options to defaults.
-				if [ "$FST" = 'unknown' ]; then
-					FST=$(deduce_fstype $MP)
-				fi
-				case "$FST" in
-					"ext3"|"reiserfs")
-						PARSEDOPTS="$(parse_mount_opts_wfstab $DEV commit $OPTS)"
-						PARSEDOPTS="$(parse_yesno_opts_wfstab $DEV atime atime $PARSEDOPTS)"
-						mount $DEV -t $FST $MP -o remount,$PARSEDOPTS
-						;;
-					"xfs")
-						PARSEDOPTS="$(parse_yesno_opts_wfstab $DEV atime atime $OPTS)"
-						mount $DEV -t $FST $MP -o remount,$PARSEDOPTS
-						;;
-				esac
-				if [ -b $DEV ] ; then
-					blockdev --setra 256 $DEV
-				fi
-			done
-		fi
-		if [ $DO_HD -eq 1 ] ; then
-			for THISHD in $HD ; do
-				/sbin/hdparm -S $AC_HD $THISHD > /dev/null 2>&1
-				/sbin/hdparm -B 255 $THISHD > /dev/null 2>&1
-			done
-		fi
-		if [ $DO_CPU -eq 1 -a -e /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq ]; then
-			echo `cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq` > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
-		fi
-		echo "."
-		;;
-	*)
-		echo "Usage: $0 {start|stop}" 2>&1
-		exit 1
-		;;
-
-esac
-
-exit 0
---------------------CONTROL SCRIPT END------------------------------------------
-
-
-ACPI integration
-----------------
-
-Dax Kelson submitted this so that the ACPI acpid daemon will
-kick off the laptop_mode script and run hdparm. The part that
-automatically disables laptop mode when the battery is low was
-written by Jan Topinski.
-
------------------/etc/acpi/events/ac_adapter BEGIN------------------------------
-event=ac_adapter
-action=/etc/acpi/actions/ac.sh %e
-----------------/etc/acpi/events/ac_adapter END---------------------------------
-
-
------------------/etc/acpi/events/battery BEGIN---------------------------------
-event=battery.*
-action=/etc/acpi/actions/battery.sh %e
-----------------/etc/acpi/events/battery END------------------------------------
-
-
-----------------/etc/acpi/actions/ac.sh BEGIN-----------------------------------
-#!/bin/bash
-
-# ac on/offline event handler
-
-status=`awk '/^state: / { print $2 }' /proc/acpi/ac_adapter/$2/state`
-
-case $status in
-        "on-line")
-                /sbin/laptop_mode stop
-                exit 0
-        ;;
-        "off-line")
-                /sbin/laptop_mode start
-                exit 0
-        ;;
-esac
----------------------------/etc/acpi/actions/ac.sh END--------------------------
-
-
----------------------------/etc/acpi/actions/battery.sh BEGIN-------------------
-#! /bin/bash
-
-# Automatically disable laptop mode when the battery almost runs out.
-
-BATT_INFO=/proc/acpi/battery/$2/state
-
-if [[ -f /proc/sys/vm/laptop_mode ]]
-then
-   LM=`cat /proc/sys/vm/laptop_mode`
-   if [[ $LM -gt 0 ]]
-   then
-     if [[ -f $BATT_INFO ]]
-     then
-        # Source the config file only now that we know we need
-        if [ -f /etc/default/laptop-mode ] ; then
-                # Debian
-                . /etc/default/laptop-mode
-        elif [ -f /etc/sysconfig/laptop-mode ] ; then
-                # Others
-                . /etc/sysconfig/laptop-mode
-        fi
-        MINIMUM_BATTERY_MINUTES=${MINIMUM_BATTERY_MINUTES:-'10'}
-
-        ACTION="`cat $BATT_INFO | grep charging | cut -c 26-`"
-        if [[ ACTION -eq "discharging" ]]
-        then
-           PRESENT_RATE=`cat $BATT_INFO | grep "present rate:" | sed  "s/.* \([0-9][0-9]* \).*/\1/" `
-           REMAINING=`cat $BATT_INFO | grep "remaining capacity:" | sed  "s/.* \([0-9][0-9]* \).*/\1/" `
-        fi
-        if (($REMAINING * 60 / $PRESENT_RATE < $MINIMUM_BATTERY_MINUTES))
-        then
-           /sbin/laptop_mode stop
-        fi
-     else
-       logger -p daemon.warning "You are using laptop mode and your battery interface $BATT_INFO is missing. This may lead to loss of data when the battery runs out. Check kernel ACPI support and /proc/acpi/battery folder, and edit /etc/acpi/battery.sh to set BATT_INFO to the correct path."
-     fi
-   fi
-fi
----------------------------/etc/acpi/actions/battery.sh END--------------------
-
-
-Monitoring tool
----------------
-
-Bartek Kania submitted this, it can be used to measure how much time your disk
-spends spun up/down.  See tools/laptop/dslm/dslm.c
diff --git a/Documentation/laptops/sony-laptop.rst b/Documentation/laptops/sony-laptop.rst
new file mode 100644
index 000000000000..9edcc7f6612f
--- /dev/null
+++ b/Documentation/laptops/sony-laptop.rst
@@ -0,0 +1,174 @@
+=========================================
+Sony Notebook Control Driver (SNC) Readme
+=========================================
+
+	- Copyright (C) 2004- 2005 Stelian Pop <stelian@popies.net>
+	- Copyright (C) 2007 Mattia Dongili <malattia@linux.it>
+
+This mini-driver drives the SNC and SPIC device present in the ACPI BIOS of the
+Sony Vaio laptops. This driver mixes both devices functions under the same
+(hopefully consistent) interface. This also means that the sonypi driver is
+obsoleted by sony-laptop now.
+
+Fn keys (hotkeys):
+------------------
+
+Some models report hotkeys through the SNC or SPIC devices, such events are
+reported both through the ACPI subsystem as acpi events and through the INPUT
+subsystem. See the logs of /proc/bus/input/devices to find out what those
+events are and which input devices are created by the driver.
+Additionally, loading the driver with the debug option will report all events
+in the kernel log.
+
+The "scancodes" passed to the input system (that can be remapped with udev)
+are indexes to the table "sony_laptop_input_keycode_map" in the sony-laptop.c
+module.  For example the "FN/E" key combination (EJECTCD on some models)
+generates the scancode 20 (0x14).
+
+Backlight control:
+------------------
+If your laptop model supports it, you will find sysfs files in the
+/sys/class/backlight/sony/
+directory. You will be able to query and set the current screen
+brightness:
+
+	======================	=========================================
+	brightness		get/set screen brightness (an integer
+				between 0 and 7)
+	actual_brightness	reading from this file will query the HW
+				to get real brightness value
+	max_brightness		the maximum brightness value
+	======================	=========================================
+
+
+Platform specific:
+------------------
+Loading the sony-laptop module will create a
+/sys/devices/platform/sony-laptop/
+directory populated with some files.
+
+You then read/write integer values from/to those files by using
+standard UNIX tools.
+
+The files are:
+
+	======================	==========================================
+	brightness_default	screen brightness which will be set
+				when the laptop will be rebooted
+	cdpower			power on/off the internal CD drive
+	audiopower		power on/off the internal sound card
+	lanpower		power on/off the internal ethernet card
+				(only in debug mode)
+	bluetoothpower		power on/off the internal bluetooth device
+	fanspeed		get/set the fan speed
+	======================	==========================================
+
+Note that some files may be missing if they are not supported
+by your particular laptop model.
+
+Example usage::
+
+	# echo "1" > /sys/devices/platform/sony-laptop/brightness_default
+
+sets the lowest screen brightness for the next and later reboots
+
+::
+
+	# echo "8" > /sys/devices/platform/sony-laptop/brightness_default
+
+sets the highest screen brightness for the next and later reboots
+
+::
+
+	# cat /sys/devices/platform/sony-laptop/brightness_default
+
+retrieves the value
+
+::
+
+	# echo "0" > /sys/devices/platform/sony-laptop/audiopower
+
+powers off the sound card
+
+::
+
+	# echo "1" > /sys/devices/platform/sony-laptop/audiopower
+
+powers on the sound card.
+
+
+RFkill control:
+---------------
+More recent Vaio models expose a consistent set of ACPI methods to
+control radio frequency emitting devices. If you are a lucky owner of
+such a laptop you will find the necessary rfkill devices under
+/sys/class/rfkill. Check those starting with sony-* in::
+
+	# grep . /sys/class/rfkill/*/{state,name}
+
+
+Development:
+------------
+
+If you want to help with the development of this driver (and
+you are not afraid of any side effects doing strange things with
+your ACPI BIOS could have on your laptop), load the driver and
+pass the option 'debug=1'.
+
+REPEAT:
+	**DON'T DO THIS IF YOU DON'T LIKE RISKY BUSINESS.**
+
+In your kernel logs you will find the list of all ACPI methods
+the SNC device has on your laptop.
+
+* For new models you will see a long list of meaningless method names,
+  reading the DSDT table source should reveal that:
+
+(1) the SNC device uses an internal capability lookup table
+(2) SN00 is used to find values in the lookup table
+(3) SN06 and SN07 are used to call into the real methods based on
+    offsets you can obtain iterating the table using SN00
+(4) SN02 used to enable events.
+
+Some values in the capability lookup table are more or less known, see
+the code for all sony_call_snc_handle calls, others are more obscure.
+
+* For old models you can see the GCDP/GCDP methods used to pwer on/off
+  the CD drive, but there are others and they are usually different from
+  model to model.
+
+**I HAVE NO IDEA WHAT THOSE METHODS DO.**
+
+The sony-laptop driver creates, for some of those methods (the most
+current ones found on several Vaio models), an entry under
+/sys/devices/platform/sony-laptop, just like the 'cdpower' one.
+You can create other entries corresponding to your own laptop methods by
+further editing the source (see the 'sony_nc_values' table, and add a new
+entry to this table with your get/set method names using the
+SNC_HANDLE_NAMES macro).
+
+Your mission, should you accept it, is to try finding out what
+those entries are for, by reading/writing random values from/to those
+files and find out what is the impact on your laptop.
+
+Should you find anything interesting, please report it back to me,
+I will not disavow all knowledge of your actions :)
+
+See also http://www.linux.it/~malattia/wiki/index.php/Sony_drivers for other
+useful info.
+
+Bugs/Limitations:
+-----------------
+
+* This driver is not based on official documentation from Sony
+  (because there is none), so there is no guarantee this driver
+  will work at all, or do the right thing. Although this hasn't
+  happened to me, this driver could do very bad things to your
+  laptop, including permanent damage.
+
+* The sony-laptop and sonypi drivers do not interact at all. In the
+  future, sonypi will be removed and replaced by sony-laptop.
+
+* spicctrl, which is the userspace tool used to communicate with the
+  sonypi driver (through /dev/sonypi) is deprecated as well since all
+  its features are now available under the sysfs tree via sony-laptop.
diff --git a/Documentation/laptops/sony-laptop.txt b/Documentation/laptops/sony-laptop.txt
deleted file mode 100644
index 978b1e615155..000000000000
--- a/Documentation/laptops/sony-laptop.txt
+++ /dev/null
@@ -1,144 +0,0 @@
-Sony Notebook Control Driver (SNC) Readme
------------------------------------------
-	Copyright (C) 2004- 2005 Stelian Pop <stelian@popies.net>
-	Copyright (C) 2007 Mattia Dongili <malattia@linux.it>
-
-This mini-driver drives the SNC and SPIC device present in the ACPI BIOS of the
-Sony Vaio laptops. This driver mixes both devices functions under the same
-(hopefully consistent) interface. This also means that the sonypi driver is
-obsoleted by sony-laptop now.
-
-Fn keys (hotkeys):
-------------------
-Some models report hotkeys through the SNC or SPIC devices, such events are
-reported both through the ACPI subsystem as acpi events and through the INPUT
-subsystem. See the logs of /proc/bus/input/devices to find out what those
-events are and which input devices are created by the driver.
-Additionally, loading the driver with the debug option will report all events
-in the kernel log.
-
-The "scancodes" passed to the input system (that can be remapped with udev)
-are indexes to the table "sony_laptop_input_keycode_map" in the sony-laptop.c
-module.  For example the "FN/E" key combination (EJECTCD on some models)
-generates the scancode 20 (0x14).
-
-Backlight control:
-------------------
-If your laptop model supports it, you will find sysfs files in the
-/sys/class/backlight/sony/
-directory. You will be able to query and set the current screen
-brightness:
-	brightness		get/set screen brightness (an integer
-				between 0 and 7)
-	actual_brightness	reading from this file will query the HW
-				to get real brightness value
-	max_brightness		the maximum brightness value
-
-
-Platform specific:
-------------------
-Loading the sony-laptop module will create a
-/sys/devices/platform/sony-laptop/
-directory populated with some files.
-
-You then read/write integer values from/to those files by using
-standard UNIX tools.
-
-The files are:
-	brightness_default	screen brightness which will be set
-				when the laptop will be rebooted
-	cdpower			power on/off the internal CD drive
-	audiopower		power on/off the internal sound card
-	lanpower		power on/off the internal ethernet card
-				(only in debug mode)
-	bluetoothpower		power on/off the internal bluetooth device
-	fanspeed		get/set the fan speed
-
-Note that some files may be missing if they are not supported
-by your particular laptop model.
-
-Example usage:
-	# echo "1" > /sys/devices/platform/sony-laptop/brightness_default
-sets the lowest screen brightness for the next and later reboots,
-	# echo "8" > /sys/devices/platform/sony-laptop/brightness_default
-sets the highest screen brightness for the next and later reboots,
-	# cat /sys/devices/platform/sony-laptop/brightness_default
-retrieves the value.
-
-	# echo "0" > /sys/devices/platform/sony-laptop/audiopower
-powers off the sound card,
-	# echo "1" > /sys/devices/platform/sony-laptop/audiopower
-powers on the sound card.
-
-
-RFkill control:
----------------
-More recent Vaio models expose a consistent set of ACPI methods to
-control radio frequency emitting devices. If you are a lucky owner of
-such a laptop you will find the necessary rfkill devices under
-/sys/class/rfkill. Check those starting with sony-* in
-	# grep . /sys/class/rfkill/*/{state,name}
-
-
-Development:
-------------
-
-If you want to help with the development of this driver (and
-you are not afraid of any side effects doing strange things with
-your ACPI BIOS could have on your laptop), load the driver and
-pass the option 'debug=1'.
-
-REPEAT: DON'T DO THIS IF YOU DON'T LIKE RISKY BUSINESS.
-
-In your kernel logs you will find the list of all ACPI methods
-the SNC device has on your laptop.
-
-* For new models you will see a long list of meaningless method names,
-reading the DSDT table source should reveal that:
-(1) the SNC device uses an internal capability lookup table
-(2) SN00 is used to find values in the lookup table
-(3) SN06 and SN07 are used to call into the real methods based on
-    offsets you can obtain iterating the table using SN00
-(4) SN02 used to enable events.
-Some values in the capability lookup table are more or less known, see
-the code for all sony_call_snc_handle calls, others are more obscure.
-
-* For old models you can see the GCDP/GCDP methods used to pwer on/off
-the CD drive, but there are others and they are usually different from
-model to model.
-
-I HAVE NO IDEA WHAT THOSE METHODS DO.
-
-The sony-laptop driver creates, for some of those methods (the most
-current ones found on several Vaio models), an entry under
-/sys/devices/platform/sony-laptop, just like the 'cdpower' one.
-You can create other entries corresponding to your own laptop methods by
-further editing the source (see the 'sony_nc_values' table, and add a new
-entry to this table with your get/set method names using the
-SNC_HANDLE_NAMES macro).
-
-Your mission, should you accept it, is to try finding out what
-those entries are for, by reading/writing random values from/to those
-files and find out what is the impact on your laptop.
-
-Should you find anything interesting, please report it back to me,
-I will not disavow all knowledge of your actions :)
-
-See also http://www.linux.it/~malattia/wiki/index.php/Sony_drivers for other
-useful info.
-
-Bugs/Limitations:
------------------
-
-* This driver is not based on official documentation from Sony
-  (because there is none), so there is no guarantee this driver
-  will work at all, or do the right thing. Although this hasn't
-  happened to me, this driver could do very bad things to your
-  laptop, including permanent damage.
-
-* The sony-laptop and sonypi drivers do not interact at all. In the
-  future, sonypi will be removed and replaced by sony-laptop.
-
-* spicctrl, which is the userspace tool used to communicate with the
-  sonypi driver (through /dev/sonypi) is deprecated as well since all
-  its features are now available under the sysfs tree via sony-laptop.
diff --git a/Documentation/laptops/sonypi.rst b/Documentation/laptops/sonypi.rst
new file mode 100644
index 000000000000..2a1975ed7ee4
--- /dev/null
+++ b/Documentation/laptops/sonypi.rst
@@ -0,0 +1,160 @@
+==================================================
+Sony Programmable I/O Control Device Driver Readme
+==================================================
+
+	- Copyright (C) 2001-2004 Stelian Pop <stelian@popies.net>
+	- Copyright (C) 2001-2002 Alcôve <www.alcove.com>
+	- Copyright (C) 2001 Michael Ashley <m.ashley@unsw.edu.au>
+	- Copyright (C) 2001 Junichi Morita <jun1m@mars.dti.ne.jp>
+	- Copyright (C) 2000 Takaya Kinjo <t-kinjo@tc4.so-net.ne.jp>
+	- Copyright (C) 2000 Andrew Tridgell <tridge@samba.org>
+
+This driver enables access to the Sony Programmable I/O Control Device which
+can be found in many Sony Vaio laptops. Some newer Sony laptops (seems to be
+limited to new FX series laptops, at least the FX501 and the FX702) lack a
+sonypi device and are not supported at all by this driver.
+
+It will give access (through a user space utility) to some events those laptops
+generate, like:
+
+	- jogdial events (the small wheel on the side of Vaios)
+	- capture button events (only on Vaio Picturebook series)
+	- Fn keys
+	- bluetooth button (only on C1VR model)
+	- programmable keys, back, help, zoom, thumbphrase buttons, etc.
+	  (when available)
+
+Those events (see linux/sonypi.h) can be polled using the character device node
+/dev/sonypi (major 10, minor auto allocated or specified as a option).
+A simple daemon which translates the jogdial movements into mouse wheel events
+can be downloaded at: <http://popies.net/sonypi/>
+
+Another option to intercept the events is to get them directly through the
+input layer.
+
+This driver supports also some ioctl commands for setting the LCD screen
+brightness and querying the batteries charge information (some more
+commands may be added in the future).
+
+This driver can also be used to set the camera controls on Picturebook series
+(brightness, contrast etc), and is used by the video4linux driver for the
+Motion Eye camera.
+
+Please note that this driver was created by reverse engineering the Windows
+driver and the ACPI BIOS, because Sony doesn't agree to release any programming
+specs for its laptops. If someone convinces them to do so, drop me a note.
+
+Driver options:
+---------------
+
+Several options can be passed to the sonypi driver using the standard
+module argument syntax (<param>=<value> when passing the option to the
+module or sonypi.<param>=<value> on the kernel boot line when sonypi is
+statically linked into the kernel). Those options are:
+
+	=============== =======================================================
+	minor: 		minor number of the misc device /dev/sonypi,
+			default is -1 (automatic allocation, see /proc/misc
+			or kernel logs)
+
+	camera:		if you have a PictureBook series Vaio (with the
+			integrated MotionEye camera), set this parameter to 1
+			in order to let the driver access to the camera
+
+	fnkeyinit:	on some Vaios (C1VE, C1VR etc), the Fn key events don't
+			get enabled unless you set this parameter to 1.
+			Do not use this option unless it's actually necessary,
+			some Vaio models don't deal well with this option.
+			This option is available only if the kernel is
+			compiled without ACPI support (since it conflicts
+			with it and it shouldn't be required anyway if
+			ACPI is already enabled).
+
+	verbose:	set to 1 to print unknown events received from the
+			sonypi device.
+			set to 2 to print all events received from the
+			sonypi device.
+
+	compat:		uses some compatibility code for enabling the sonypi
+			events. If the driver worked for you in the past
+			(prior to version 1.5) and does not work anymore,
+			add this option and report to the author.
+
+	mask:		event mask telling the driver what events will be
+			reported to the user. This parameter is required for
+			some Vaio models where the hardware reuses values
+			used in other Vaio models (like the FX series who does
+			not have a jogdial but reuses the jogdial events for
+			programmable keys events). The default event mask is
+			set to 0xffffffff, meaning that all possible events
+			will be tried. You can use the following bits to
+			construct your own event mask (from
+			drivers/char/sonypi.h):
+
+				========================	======
+				SONYPI_JOGGER_MASK 		0x0001
+				SONYPI_CAPTURE_MASK 		0x0002
+				SONYPI_FNKEY_MASK 		0x0004
+				SONYPI_BLUETOOTH_MASK 		0x0008
+				SONYPI_PKEY_MASK 		0x0010
+				SONYPI_BACK_MASK 		0x0020
+				SONYPI_HELP_MASK 		0x0040
+				SONYPI_LID_MASK 		0x0080
+				SONYPI_ZOOM_MASK 		0x0100
+				SONYPI_THUMBPHRASE_MASK 	0x0200
+				SONYPI_MEYE_MASK		0x0400
+				SONYPI_MEMORYSTICK_MASK		0x0800
+				SONYPI_BATTERY_MASK		0x1000
+				SONYPI_WIRELESS_MASK		0x2000
+				========================	======
+
+	useinput:	if set (which is the default) two input devices are
+			created, one which interprets the jogdial events as
+			mouse events, the other one which acts like a
+			keyboard reporting the pressing of the special keys.
+	=============== =======================================================
+
+Module use:
+-----------
+
+In order to automatically load the sonypi module on use, you can put those
+lines a configuration file in /etc/modprobe.d/::
+
+	alias char-major-10-250 sonypi
+	options sonypi minor=250
+
+This supposes the use of minor 250 for the sonypi device::
+
+	# mknod /dev/sonypi c 10 250
+
+Bugs:
+-----
+
+	- several users reported that this driver disables the BIOS-managed
+	  Fn-keys which put the laptop in sleeping state, or switch the
+	  external monitor on/off. There is no workaround yet, since this
+	  driver disables all APM management for those keys, by enabling the
+	  ACPI management (and the ACPI core stuff is not complete yet). If
+	  you have one of those laptops with working Fn keys and want to
+	  continue to use them, don't use this driver.
+
+	- some users reported that the laptop speed is lower (dhrystone
+	  tested) when using the driver with the fnkeyinit parameter. I cannot
+	  reproduce it on my laptop and not all users have this problem.
+	  This happens because the fnkeyinit parameter enables the ACPI
+	  mode (but without additional ACPI control, like processor
+	  speed handling etc). Use ACPI instead of APM if it works on your
+	  laptop.
+
+	- sonypi lacks the ability to distinguish between certain key
+	  events on some models.
+
+	- some models with the nvidia card (geforce go 6200 tc) uses a
+	  different way to adjust the backlighting of the screen. There
+	  is a userspace utility to adjust the brightness on those models,
+	  which can be downloaded from
+	  http://www.acc.umu.se/~erikw/program/smartdimmer-0.1.tar.bz2
+
+	- since all development was done by reverse engineering, there is
+	  *absolutely no guarantee* that this driver will not crash your
+	  laptop. Permanently.
diff --git a/Documentation/laptops/sonypi.txt b/Documentation/laptops/sonypi.txt
deleted file mode 100644
index 606bdb9ce036..000000000000
--- a/Documentation/laptops/sonypi.txt
+++ /dev/null
@@ -1,152 +0,0 @@
-Sony Programmable I/O Control Device Driver Readme
---------------------------------------------------
-	Copyright (C) 2001-2004 Stelian Pop <stelian@popies.net>
-	Copyright (C) 2001-2002 Alcôve <www.alcove.com>
-	Copyright (C) 2001 Michael Ashley <m.ashley@unsw.edu.au>
-	Copyright (C) 2001 Junichi Morita <jun1m@mars.dti.ne.jp>
-	Copyright (C) 2000 Takaya Kinjo <t-kinjo@tc4.so-net.ne.jp>
-	Copyright (C) 2000 Andrew Tridgell <tridge@samba.org>
-
-This driver enables access to the Sony Programmable I/O Control Device which
-can be found in many Sony Vaio laptops. Some newer Sony laptops (seems to be
-limited to new FX series laptops, at least the FX501 and the FX702) lack a
-sonypi device and are not supported at all by this driver.
-
-It will give access (through a user space utility) to some events those laptops
-generate, like:
-	- jogdial events (the small wheel on the side of Vaios)
-	- capture button events (only on Vaio Picturebook series)
-	- Fn keys
-	- bluetooth button (only on C1VR model)
-	- programmable keys, back, help, zoom, thumbphrase buttons, etc.
-	  (when available)
-
-Those events (see linux/sonypi.h) can be polled using the character device node
-/dev/sonypi (major 10, minor auto allocated or specified as a option).
-A simple daemon which translates the jogdial movements into mouse wheel events
-can be downloaded at: <http://popies.net/sonypi/>
-
-Another option to intercept the events is to get them directly through the
-input layer.
-
-This driver supports also some ioctl commands for setting the LCD screen
-brightness and querying the batteries charge information (some more
-commands may be added in the future).
-
-This driver can also be used to set the camera controls on Picturebook series
-(brightness, contrast etc), and is used by the video4linux driver for the
-Motion Eye camera.
-
-Please note that this driver was created by reverse engineering the Windows
-driver and the ACPI BIOS, because Sony doesn't agree to release any programming
-specs for its laptops. If someone convinces them to do so, drop me a note.
-
-Driver options:
----------------
-
-Several options can be passed to the sonypi driver using the standard
-module argument syntax (<param>=<value> when passing the option to the
-module or sonypi.<param>=<value> on the kernel boot line when sonypi is
-statically linked into the kernel). Those options are:
-
-	minor: 		minor number of the misc device /dev/sonypi,
-			default is -1 (automatic allocation, see /proc/misc
-			or kernel logs)
-
-	camera:		if you have a PictureBook series Vaio (with the
-			integrated MotionEye camera), set this parameter to 1
-			in order to let the driver access to the camera
-
-	fnkeyinit:	on some Vaios (C1VE, C1VR etc), the Fn key events don't
-			get enabled unless you set this parameter to 1.
-			Do not use this option unless it's actually necessary,
-			some Vaio models don't deal well with this option.
-			This option is available only if the kernel is
-			compiled without ACPI support (since it conflicts
-			with it and it shouldn't be required anyway if
-			ACPI is already enabled).
-
-	verbose:	set to 1 to print unknown events received from the
-			sonypi device.
-			set to 2 to print all events received from the
-			sonypi device.
-
-	compat:		uses some compatibility code for enabling the sonypi
-			events. If the driver worked for you in the past
-			(prior to version 1.5) and does not work anymore,
-			add this option and report to the author.
-
-	mask:		event mask telling the driver what events will be
-			reported to the user. This parameter is required for
-			some Vaio models where the hardware reuses values
-			used in other Vaio models (like the FX series who does
-			not have a jogdial but reuses the jogdial events for
-			programmable keys events). The default event mask is
-			set to 0xffffffff, meaning that all possible events
-			will be tried. You can use the following bits to
-			construct your own event mask (from
-			drivers/char/sonypi.h):
-				SONYPI_JOGGER_MASK 		0x0001
-				SONYPI_CAPTURE_MASK 		0x0002
-				SONYPI_FNKEY_MASK 		0x0004
-				SONYPI_BLUETOOTH_MASK 		0x0008
-				SONYPI_PKEY_MASK 		0x0010
-				SONYPI_BACK_MASK 		0x0020
-				SONYPI_HELP_MASK 		0x0040
-				SONYPI_LID_MASK 		0x0080
-				SONYPI_ZOOM_MASK 		0x0100
-				SONYPI_THUMBPHRASE_MASK 	0x0200
-				SONYPI_MEYE_MASK		0x0400
-				SONYPI_MEMORYSTICK_MASK		0x0800
-				SONYPI_BATTERY_MASK		0x1000
-				SONYPI_WIRELESS_MASK		0x2000
-
-	useinput:	if set (which is the default) two input devices are
-			created, one which interprets the jogdial events as
-			mouse events, the other one which acts like a
-			keyboard reporting the pressing of the special keys.
-
-Module use:
------------
-
-In order to automatically load the sonypi module on use, you can put those
-lines a configuration file in /etc/modprobe.d/:
-
-	alias char-major-10-250 sonypi
-	options sonypi minor=250
-
-This supposes the use of minor 250 for the sonypi device:
-
-	# mknod /dev/sonypi c 10 250
-
-Bugs:
------
-
-	- several users reported that this driver disables the BIOS-managed
-	  Fn-keys which put the laptop in sleeping state, or switch the
-	  external monitor on/off. There is no workaround yet, since this
-	  driver disables all APM management for those keys, by enabling the
-	  ACPI management (and the ACPI core stuff is not complete yet). If
-	  you have one of those laptops with working Fn keys and want to
-	  continue to use them, don't use this driver.
-
-	- some users reported that the laptop speed is lower (dhrystone
-	  tested) when using the driver with the fnkeyinit parameter. I cannot
-	  reproduce it on my laptop and not all users have this problem.
-	  This happens because the fnkeyinit parameter enables the ACPI
-	  mode (but without additional ACPI control, like processor
-	  speed handling etc). Use ACPI instead of APM if it works on your
-	  laptop.
-
-	- sonypi lacks the ability to distinguish between certain key
-	  events on some models.
-
-	- some models with the nvidia card (geforce go 6200 tc) uses a
-	  different way to adjust the backlighting of the screen. There
-	  is a userspace utility to adjust the brightness on those models,
-	  which can be downloaded from
-	  http://www.acc.umu.se/~erikw/program/smartdimmer-0.1.tar.bz2
-
-	- since all development was done by reverse engineering, there is
-	  _absolutely no guarantee_ that this driver will not crash your
-	  laptop. Permanently.
diff --git a/Documentation/laptops/thinkpad-acpi.rst b/Documentation/laptops/thinkpad-acpi.rst
new file mode 100644
index 000000000000..19d52fc3c5e9
--- /dev/null
+++ b/Documentation/laptops/thinkpad-acpi.rst
@@ -0,0 +1,1562 @@
+===========================
+ThinkPad ACPI Extras Driver
+===========================
+
+Version 0.25
+
+October 16th,  2013
+
+- Borislav Deianov <borislav@users.sf.net>
+- Henrique de Moraes Holschuh <hmh@hmh.eng.br>
+
+http://ibm-acpi.sf.net/
+
+This is a Linux driver for the IBM and Lenovo ThinkPad laptops. It
+supports various features of these laptops which are accessible
+through the ACPI and ACPI EC framework, but not otherwise fully
+supported by the generic Linux ACPI drivers.
+
+This driver used to be named ibm-acpi until kernel 2.6.21 and release
+0.13-20070314.  It used to be in the drivers/acpi tree, but it was
+moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel
+2.6.22, and release 0.14.  It was moved to drivers/platform/x86 for
+kernel 2.6.29 and release 0.22.
+
+The driver is named "thinkpad-acpi".  In some places, like module
+names and log messages, "thinkpad_acpi" is used because of userspace
+issues.
+
+"tpacpi" is used as a shorthand where "thinkpad-acpi" would be too
+long due to length limitations on some Linux kernel versions.
+
+Status
+------
+
+The features currently supported are the following (see below for
+detailed description):
+
+	- Fn key combinations
+	- Bluetooth enable and disable
+	- video output switching, expansion control
+	- ThinkLight on and off
+	- CMOS/UCMS control
+	- LED control
+	- ACPI sounds
+	- temperature sensors
+	- Experimental: embedded controller register dump
+	- LCD brightness control
+	- Volume control
+	- Fan control and monitoring: fan speed, fan enable/disable
+	- WAN enable and disable
+	- UWB enable and disable
+
+A compatibility table by model and feature is maintained on the web
+site, http://ibm-acpi.sf.net/. I appreciate any success or failure
+reports, especially if they add to or correct the compatibility table.
+Please include the following information in your report:
+
+	- ThinkPad model name
+	- a copy of your ACPI tables, using the "acpidump" utility
+	- a copy of the output of dmidecode, with serial numbers
+	  and UUIDs masked off
+	- which driver features work and which don't
+	- the observed behavior of non-working features
+
+Any other comments or patches are also more than welcome.
+
+
+Installation
+------------
+
+If you are compiling this driver as included in the Linux kernel
+sources, look for the CONFIG_THINKPAD_ACPI Kconfig option.
+It is located on the menu path: "Device Drivers" -> "X86 Platform
+Specific Device Drivers" -> "ThinkPad ACPI Laptop Extras".
+
+
+Features
+--------
+
+The driver exports two different interfaces to userspace, which can be
+used to access the features it provides.  One is a legacy procfs-based
+interface, which will be removed at some time in the future.  The other
+is a new sysfs-based interface which is not complete yet.
+
+The procfs interface creates the /proc/acpi/ibm directory.  There is a
+file under that directory for each feature it supports.  The procfs
+interface is mostly frozen, and will change very little if at all: it
+will not be extended to add any new functionality in the driver, instead
+all new functionality will be implemented on the sysfs interface.
+
+The sysfs interface tries to blend in the generic Linux sysfs subsystems
+and classes as much as possible.  Since some of these subsystems are not
+yet ready or stabilized, it is expected that this interface will change,
+and any and all userspace programs must deal with it.
+
+
+Notes about the sysfs interface
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Unlike what was done with the procfs interface, correctness when talking
+to the sysfs interfaces will be enforced, as will correctness in the
+thinkpad-acpi's implementation of sysfs interfaces.
+
+Also, any bugs in the thinkpad-acpi sysfs driver code or in the
+thinkpad-acpi's implementation of the sysfs interfaces will be fixed for
+maximum correctness, even if that means changing an interface in
+non-compatible ways.  As these interfaces mature both in the kernel and
+in thinkpad-acpi, such changes should become quite rare.
+
+Applications interfacing to the thinkpad-acpi sysfs interfaces must
+follow all sysfs guidelines and correctly process all errors (the sysfs
+interface makes extensive use of errors).  File descriptors and open /
+close operations to the sysfs inodes must also be properly implemented.
+
+The version of thinkpad-acpi's sysfs interface is exported by the driver
+as a driver attribute (see below).
+
+Sysfs driver attributes are on the driver's sysfs attribute space,
+for 2.6.23+ this is /sys/bus/platform/drivers/thinkpad_acpi/ and
+/sys/bus/platform/drivers/thinkpad_hwmon/
+
+Sysfs device attributes are on the thinkpad_acpi device sysfs attribute
+space, for 2.6.23+ this is /sys/devices/platform/thinkpad_acpi/.
+
+Sysfs device attributes for the sensors and fan are on the
+thinkpad_hwmon device's sysfs attribute space, but you should locate it
+looking for a hwmon device with the name attribute of "thinkpad", or
+better yet, through libsensors. For 4.14+ sysfs attributes were moved to the
+hwmon device (/sys/bus/platform/devices/thinkpad_hwmon/hwmon/hwmon? or
+/sys/class/hwmon/hwmon?).
+
+Driver version
+--------------
+
+procfs: /proc/acpi/ibm/driver
+
+sysfs driver attribute: version
+
+The driver name and version. No commands can be written to this file.
+
+
+Sysfs interface version
+-----------------------
+
+sysfs driver attribute: interface_version
+
+Version of the thinkpad-acpi sysfs interface, as an unsigned long
+(output in hex format: 0xAAAABBCC), where:
+
+	AAAA
+	  - major revision
+	BB
+	  - minor revision
+	CC
+	  - bugfix revision
+
+The sysfs interface version changelog for the driver can be found at the
+end of this document.  Changes to the sysfs interface done by the kernel
+subsystems are not documented here, nor are they tracked by this
+attribute.
+
+Changes to the thinkpad-acpi sysfs interface are only considered
+non-experimental when they are submitted to Linux mainline, at which
+point the changes in this interface are documented and interface_version
+may be updated.  If you are using any thinkpad-acpi features not yet
+sent to mainline for merging, you do so on your own risk: these features
+may disappear, or be implemented in a different and incompatible way by
+the time they are merged in Linux mainline.
+
+Changes that are backwards-compatible by nature (e.g. the addition of
+attributes that do not change the way the other attributes work) do not
+always warrant an update of interface_version.  Therefore, one must
+expect that an attribute might not be there, and deal with it properly
+(an attribute not being there *is* a valid way to make it clear that a
+feature is not available in sysfs).
+
+
+Hot keys
+--------
+
+procfs: /proc/acpi/ibm/hotkey
+
+sysfs device attribute: hotkey_*
+
+In a ThinkPad, the ACPI HKEY handler is responsible for communicating
+some important events and also keyboard hot key presses to the operating
+system.  Enabling the hotkey functionality of thinkpad-acpi signals the
+firmware that such a driver is present, and modifies how the ThinkPad
+firmware will behave in many situations.
+
+The driver enables the HKEY ("hot key") event reporting automatically
+when loaded, and disables it when it is removed.
+
+The driver will report HKEY events in the following format::
+
+	ibm/hotkey HKEY 00000080 0000xxxx
+
+Some of these events refer to hot key presses, but not all of them.
+
+The driver will generate events over the input layer for hot keys and
+radio switches, and over the ACPI netlink layer for other events.  The
+input layer support accepts the standard IOCTLs to remap the keycodes
+assigned to each hot key.
+
+The hot key bit mask allows some control over which hot keys generate
+events.  If a key is "masked" (bit set to 0 in the mask), the firmware
+will handle it.  If it is "unmasked", it signals the firmware that
+thinkpad-acpi would prefer to handle it, if the firmware would be so
+kind to allow it (and it often doesn't!).
+
+Not all bits in the mask can be modified.  Not all bits that can be
+modified do anything.  Not all hot keys can be individually controlled
+by the mask.  Some models do not support the mask at all.  The behaviour
+of the mask is, therefore, highly dependent on the ThinkPad model.
+
+The driver will filter out any unmasked hotkeys, so even if the firmware
+doesn't allow disabling an specific hotkey, the driver will not report
+events for unmasked hotkeys.
+
+Note that unmasking some keys prevents their default behavior.  For
+example, if Fn+F5 is unmasked, that key will no longer enable/disable
+Bluetooth by itself in firmware.
+
+Note also that not all Fn key combinations are supported through ACPI
+depending on the ThinkPad model and firmware version.  On those
+ThinkPads, it is still possible to support some extra hotkeys by
+polling the "CMOS NVRAM" at least 10 times per second.  The driver
+attempts to enables this functionality automatically when required.
+
+procfs notes
+^^^^^^^^^^^^
+
+The following commands can be written to the /proc/acpi/ibm/hotkey file::
+
+	echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys
+	echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys
+	... any other 8-hex-digit mask ...
+	echo reset > /proc/acpi/ibm/hotkey -- restore the recommended mask
+
+The following commands have been deprecated and will cause the kernel
+to log a warning::
+
+	echo enable > /proc/acpi/ibm/hotkey -- does nothing
+	echo disable > /proc/acpi/ibm/hotkey -- returns an error
+
+The procfs interface does not support NVRAM polling control.  So as to
+maintain maximum bug-to-bug compatibility, it does not report any masks,
+nor does it allow one to manipulate the hot key mask when the firmware
+does not support masks at all, even if NVRAM polling is in use.
+
+sysfs notes
+^^^^^^^^^^^
+
+	hotkey_bios_enabled:
+		DEPRECATED, WILL BE REMOVED SOON.
+
+		Returns 0.
+
+	hotkey_bios_mask:
+		DEPRECATED, DON'T USE, WILL BE REMOVED IN THE FUTURE.
+
+		Returns the hot keys mask when thinkpad-acpi was loaded.
+		Upon module unload, the hot keys mask will be restored
+		to this value.   This is always 0x80c, because those are
+		the hotkeys that were supported by ancient firmware
+		without mask support.
+
+	hotkey_enable:
+		DEPRECATED, WILL BE REMOVED SOON.
+
+		0: returns -EPERM
+		1: does nothing
+
+	hotkey_mask:
+		bit mask to enable reporting (and depending on
+		the firmware, ACPI event generation) for each hot key
+		(see above).  Returns the current status of the hot keys
+		mask, and allows one to modify it.
+
+	hotkey_all_mask:
+		bit mask that should enable event reporting for all
+		supported hot keys, when echoed to hotkey_mask above.
+		Unless you know which events need to be handled
+		passively (because the firmware *will* handle them
+		anyway), do *not* use hotkey_all_mask.  Use
+		hotkey_recommended_mask, instead. You have been warned.
+
+	hotkey_recommended_mask:
+		bit mask that should enable event reporting for all
+		supported hot keys, except those which are always
+		handled by the firmware anyway.  Echo it to
+		hotkey_mask above, to use.  This is the default mask
+		used by the driver.
+
+	hotkey_source_mask:
+		bit mask that selects which hot keys will the driver
+		poll the NVRAM for.  This is auto-detected by the driver
+		based on the capabilities reported by the ACPI firmware,
+		but it can be overridden at runtime.
+
+		Hot keys whose bits are set in hotkey_source_mask are
+		polled for in NVRAM, and reported as hotkey events if
+		enabled in hotkey_mask.  Only a few hot keys are
+		available through CMOS NVRAM polling.
+
+		Warning: when in NVRAM mode, the volume up/down/mute
+		keys are synthesized according to changes in the mixer,
+		which uses a single volume up or volume down hotkey
+		press to unmute, as per the ThinkPad volume mixer user
+		interface.  When in ACPI event mode, volume up/down/mute
+		events are reported by the firmware and can behave
+		differently (and that behaviour changes with firmware
+		version -- not just with firmware models -- as well as
+		OSI(Linux) state).
+
+	hotkey_poll_freq:
+		frequency in Hz for hot key polling. It must be between
+		0 and 25 Hz.  Polling is only carried out when strictly
+		needed.
+
+		Setting hotkey_poll_freq to zero disables polling, and
+		will cause hot key presses that require NVRAM polling
+		to never be reported.
+
+		Setting hotkey_poll_freq too low may cause repeated
+		pressings of the same hot key to be misreported as a
+		single key press, or to not even be detected at all.
+		The recommended polling frequency is 10Hz.
+
+	hotkey_radio_sw:
+		If the ThinkPad has a hardware radio switch, this
+		attribute will read 0 if the switch is in the "radios
+		disabled" position, and 1 if the switch is in the
+		"radios enabled" position.
+
+		This attribute has poll()/select() support.
+
+	hotkey_tablet_mode:
+		If the ThinkPad has tablet capabilities, this attribute
+		will read 0 if the ThinkPad is in normal mode, and
+		1 if the ThinkPad is in tablet mode.
+
+		This attribute has poll()/select() support.
+
+	wakeup_reason:
+		Set to 1 if the system is waking up because the user
+		requested a bay ejection.  Set to 2 if the system is
+		waking up because the user requested the system to
+		undock.  Set to zero for normal wake-ups or wake-ups
+		due to unknown reasons.
+
+		This attribute has poll()/select() support.
+
+	wakeup_hotunplug_complete:
+		Set to 1 if the system was waken up because of an
+		undock or bay ejection request, and that request
+		was successfully completed.  At this point, it might
+		be useful to send the system back to sleep, at the
+		user's choice.  Refer to HKEY events 0x4003 and
+		0x3003, below.
+
+		This attribute has poll()/select() support.
+
+input layer notes
+^^^^^^^^^^^^^^^^^
+
+A Hot key is mapped to a single input layer EV_KEY event, possibly
+followed by an EV_MSC MSC_SCAN event that shall contain that key's scan
+code.  An EV_SYN event will always be generated to mark the end of the
+event block.
+
+Do not use the EV_MSC MSC_SCAN events to process keys.  They are to be
+used as a helper to remap keys, only.  They are particularly useful when
+remapping KEY_UNKNOWN keys.
+
+The events are available in an input device, with the following id:
+
+	==============  ==============================
+	Bus		BUS_HOST
+	vendor		0x1014 (PCI_VENDOR_ID_IBM)  or
+			0x17aa (PCI_VENDOR_ID_LENOVO)
+	product		0x5054 ("TP")
+	version		0x4101
+	==============  ==============================
+
+The version will have its LSB incremented if the keymap changes in a
+backwards-compatible way.  The MSB shall always be 0x41 for this input
+device.  If the MSB is not 0x41, do not use the device as described in
+this section, as it is either something else (e.g. another input device
+exported by a thinkpad driver, such as HDAPS) or its functionality has
+been changed in a non-backwards compatible way.
+
+Adding other event types for other functionalities shall be considered a
+backwards-compatible change for this input device.
+
+Thinkpad-acpi Hot Key event map (version 0x4101):
+
+=======	=======	==============	==============================================
+ACPI	Scan
+event	code	Key		Notes
+=======	=======	==============	==============================================
+0x1001	0x00	FN+F1		-
+
+0x1002	0x01	FN+F2		IBM: battery (rare)
+				Lenovo: Screen lock
+
+0x1003	0x02	FN+F3		Many IBM models always report
+				this hot key, even with hot keys
+				disabled or with Fn+F3 masked
+				off
+				IBM: screen lock, often turns
+				off the ThinkLight as side-effect
+				Lenovo: battery
+
+0x1004	0x03	FN+F4		Sleep button (ACPI sleep button
+				semantics, i.e. sleep-to-RAM).
+				It always generates some kind
+				of event, either the hot key
+				event or an ACPI sleep button
+				event. The firmware may
+				refuse to generate further FN+F4
+				key presses until a S3 or S4 ACPI
+				sleep cycle is performed or some
+				time passes.
+
+0x1005	0x04	FN+F5		Radio.  Enables/disables
+				the internal Bluetooth hardware
+				and W-WAN card if left in control
+				of the firmware.  Does not affect
+				the WLAN card.
+				Should be used to turn on/off all
+				radios (Bluetooth+W-WAN+WLAN),
+				really.
+
+0x1006	0x05	FN+F6		-
+
+0x1007	0x06	FN+F7		Video output cycle.
+				Do you feel lucky today?
+
+0x1008	0x07	FN+F8		IBM: toggle screen expand
+				Lenovo: configure UltraNav,
+				or toggle screen expand
+
+0x1009	0x08	FN+F9		-
+
+...	...	...		...
+
+0x100B	0x0A	FN+F11		-
+
+0x100C	0x0B	FN+F12		Sleep to disk.  You are always
+				supposed to handle it yourself,
+				either through the ACPI event,
+				or through a hotkey event.
+				The firmware may refuse to
+				generate further FN+F12 key
+				press events until a S3 or S4
+				ACPI sleep cycle is performed,
+				or some time passes.
+
+0x100D	0x0C	FN+BACKSPACE	-
+0x100E	0x0D	FN+INSERT	-
+0x100F	0x0E	FN+DELETE	-
+
+0x1010	0x0F	FN+HOME		Brightness up.  This key is
+				always handled by the firmware
+				in IBM ThinkPads, even when
+				unmasked.  Just leave it alone.
+				For Lenovo ThinkPads with a new
+				BIOS, it has to be handled either
+				by the ACPI OSI, or by userspace.
+				The driver does the right thing,
+				never mess with this.
+0x1011	0x10	FN+END		Brightness down.  See brightness
+				up for details.
+
+0x1012	0x11	FN+PGUP		ThinkLight toggle.  This key is
+				always handled by the firmware,
+				even when unmasked.
+
+0x1013	0x12	FN+PGDOWN	-
+
+0x1014	0x13	FN+SPACE	Zoom key
+
+0x1015	0x14	VOLUME UP	Internal mixer volume up. This
+				key is always handled by the
+				firmware, even when unmasked.
+				NOTE: Lenovo seems to be changing
+				this.
+0x1016	0x15	VOLUME DOWN	Internal mixer volume up. This
+				key is always handled by the
+				firmware, even when unmasked.
+				NOTE: Lenovo seems to be changing
+				this.
+0x1017	0x16	MUTE		Mute internal mixer. This
+				key is always handled by the
+				firmware, even when unmasked.
+
+0x1018	0x17	THINKPAD	ThinkPad/Access IBM/Lenovo key
+
+0x1019	0x18	unknown
+
+...	...	...
+
+0x1020	0x1F	unknown
+=======	=======	==============	==============================================
+
+The ThinkPad firmware does not allow one to differentiate when most hot
+keys are pressed or released (either that, or we don't know how to, yet).
+For these keys, the driver generates a set of events for a key press and
+immediately issues the same set of events for a key release.  It is
+unknown by the driver if the ThinkPad firmware triggered these events on
+hot key press or release, but the firmware will do it for either one, not
+both.
+
+If a key is mapped to KEY_RESERVED, it generates no input events at all.
+If a key is mapped to KEY_UNKNOWN, it generates an input event that
+includes an scan code.  If a key is mapped to anything else, it will
+generate input device EV_KEY events.
+
+In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW
+events for switches:
+
+==============	==============================================
+SW_RFKILL_ALL	T60 and later hardware rfkill rocker switch
+SW_TABLET_MODE	Tablet ThinkPads HKEY events 0x5009 and 0x500A
+==============	==============================================
+
+Non hotkey ACPI HKEY event map
+------------------------------
+
+Events that are never propagated by the driver:
+
+======		==================================================
+0x2304		System is waking up from suspend to undock
+0x2305		System is waking up from suspend to eject bay
+0x2404		System is waking up from hibernation to undock
+0x2405		System is waking up from hibernation to eject bay
+0x5001		Lid closed
+0x5002		Lid opened
+0x5009		Tablet swivel: switched to tablet mode
+0x500A		Tablet swivel: switched to normal mode
+0x5010		Brightness level changed/control event
+0x6000		KEYBOARD: Numlock key pressed
+0x6005		KEYBOARD: Fn key pressed (TO BE VERIFIED)
+0x7000		Radio Switch may have changed state
+======		==================================================
+
+
+Events that are propagated by the driver to userspace:
+
+======		=====================================================
+0x2313		ALARM: System is waking up from suspend because
+		the battery is nearly empty
+0x2413		ALARM: System is waking up from hibernation because
+		the battery is nearly empty
+0x3003		Bay ejection (see 0x2x05) complete, can sleep again
+0x3006		Bay hotplug request (hint to power up SATA link when
+		the optical drive tray is ejected)
+0x4003		Undocked (see 0x2x04), can sleep again
+0x4010		Docked into hotplug port replicator (non-ACPI dock)
+0x4011		Undocked from hotplug port replicator (non-ACPI dock)
+0x500B		Tablet pen inserted into its storage bay
+0x500C		Tablet pen removed from its storage bay
+0x6011		ALARM: battery is too hot
+0x6012		ALARM: battery is extremely hot
+0x6021		ALARM: a sensor is too hot
+0x6022		ALARM: a sensor is extremely hot
+0x6030		System thermal table changed
+0x6032		Thermal Control command set completion  (DYTC, Windows)
+0x6040		Nvidia Optimus/AC adapter related (TO BE VERIFIED)
+0x60C0		X1 Yoga 2016, Tablet mode status changed
+0x60F0		Thermal Transformation changed (GMTS, Windows)
+======		=====================================================
+
+Battery nearly empty alarms are a last resort attempt to get the
+operating system to hibernate or shutdown cleanly (0x2313), or shutdown
+cleanly (0x2413) before power is lost.  They must be acted upon, as the
+wake up caused by the firmware will have negated most safety nets...
+
+When any of the "too hot" alarms happen, according to Lenovo the user
+should suspend or hibernate the laptop (and in the case of battery
+alarms, unplug the AC adapter) to let it cool down.  These alarms do
+signal that something is wrong, they should never happen on normal
+operating conditions.
+
+The "extremely hot" alarms are emergencies.  According to Lenovo, the
+operating system is to force either an immediate suspend or hibernate
+cycle, or a system shutdown.  Obviously, something is very wrong if this
+happens.
+
+
+Brightness hotkey notes
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Don't mess with the brightness hotkeys in a Thinkpad.  If you want
+notifications for OSD, use the sysfs backlight class event support.
+
+The driver will issue KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN events
+automatically for the cases were userspace has to do something to
+implement brightness changes.  When you override these events, you will
+either fail to handle properly the ThinkPads that require explicit
+action to change backlight brightness, or the ThinkPads that require
+that no action be taken to work properly.
+
+
+Bluetooth
+---------
+
+procfs: /proc/acpi/ibm/bluetooth
+
+sysfs device attribute: bluetooth_enable (deprecated)
+
+sysfs rfkill class: switch "tpacpi_bluetooth_sw"
+
+This feature shows the presence and current state of a ThinkPad
+Bluetooth device in the internal ThinkPad CDC slot.
+
+If the ThinkPad supports it, the Bluetooth state is stored in NVRAM,
+so it is kept across reboots and power-off.
+
+Procfs notes
+^^^^^^^^^^^^
+
+If Bluetooth is installed, the following commands can be used::
+
+	echo enable > /proc/acpi/ibm/bluetooth
+	echo disable > /proc/acpi/ibm/bluetooth
+
+Sysfs notes
+^^^^^^^^^^^
+
+	If the Bluetooth CDC card is installed, it can be enabled /
+	disabled through the "bluetooth_enable" thinkpad-acpi device
+	attribute, and its current status can also be queried.
+
+	enable:
+
+		- 0: disables Bluetooth / Bluetooth is disabled
+		- 1: enables Bluetooth / Bluetooth is enabled.
+
+	Note: this interface has been superseded by the	generic rfkill
+	class.  It has been deprecated, and it will be removed in year
+	2010.
+
+	rfkill controller switch "tpacpi_bluetooth_sw": refer to
+	Documentation/rfkill.txt for details.
+
+
+Video output control -- /proc/acpi/ibm/video
+--------------------------------------------
+
+This feature allows control over the devices used for video output -
+LCD, CRT or DVI (if available). The following commands are available::
+
+	echo lcd_enable > /proc/acpi/ibm/video
+	echo lcd_disable > /proc/acpi/ibm/video
+	echo crt_enable > /proc/acpi/ibm/video
+	echo crt_disable > /proc/acpi/ibm/video
+	echo dvi_enable > /proc/acpi/ibm/video
+	echo dvi_disable > /proc/acpi/ibm/video
+	echo auto_enable > /proc/acpi/ibm/video
+	echo auto_disable > /proc/acpi/ibm/video
+	echo expand_toggle > /proc/acpi/ibm/video
+	echo video_switch > /proc/acpi/ibm/video
+
+NOTE:
+  Access to this feature is restricted to processes owning the
+  CAP_SYS_ADMIN capability for safety reasons, as it can interact badly
+  enough with some versions of X.org to crash it.
+
+Each video output device can be enabled or disabled individually.
+Reading /proc/acpi/ibm/video shows the status of each device.
+
+Automatic video switching can be enabled or disabled.  When automatic
+video switching is enabled, certain events (e.g. opening the lid,
+docking or undocking) cause the video output device to change
+automatically. While this can be useful, it also causes flickering
+and, on the X40, video corruption. By disabling automatic switching,
+the flickering or video corruption can be avoided.
+
+The video_switch command cycles through the available video outputs
+(it simulates the behavior of Fn-F7).
+
+Video expansion can be toggled through this feature. This controls
+whether the display is expanded to fill the entire LCD screen when a
+mode with less than full resolution is used. Note that the current
+video expansion status cannot be determined through this feature.
+
+Note that on many models (particularly those using Radeon graphics
+chips) the X driver configures the video card in a way which prevents
+Fn-F7 from working. This also disables the video output switching
+features of this driver, as it uses the same ACPI methods as
+Fn-F7. Video switching on the console should still work.
+
+UPDATE: refer to https://bugs.freedesktop.org/show_bug.cgi?id=2000
+
+
+ThinkLight control
+------------------
+
+procfs: /proc/acpi/ibm/light
+
+sysfs attributes: as per LED class, for the "tpacpi::thinklight" LED
+
+procfs notes
+^^^^^^^^^^^^
+
+The ThinkLight status can be read and set through the procfs interface.  A
+few models which do not make the status available will show the ThinkLight
+status as "unknown". The available commands are::
+
+	echo on  > /proc/acpi/ibm/light
+	echo off > /proc/acpi/ibm/light
+
+sysfs notes
+^^^^^^^^^^^
+
+The ThinkLight sysfs interface is documented by the LED class
+documentation, in Documentation/leds/leds-class.rst.  The ThinkLight LED name
+is "tpacpi::thinklight".
+
+Due to limitations in the sysfs LED class, if the status of the ThinkLight
+cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
+It is impossible to know if the status returned through sysfs is valid.
+
+
+CMOS/UCMS control
+-----------------
+
+procfs: /proc/acpi/ibm/cmos
+
+sysfs device attribute: cmos_command
+
+This feature is mostly used internally by the ACPI firmware to keep the legacy
+CMOS NVRAM bits in sync with the current machine state, and to record this
+state so that the ThinkPad will retain such settings across reboots.
+
+Some of these commands actually perform actions in some ThinkPad models, but
+this is expected to disappear more and more in newer models.  As an example, in
+a T43 and in a X40, commands 12 and 13 still control the ThinkLight state for
+real, but commands 0 to 2 don't control the mixer anymore (they have been
+phased out) and just update the NVRAM.
+
+The range of valid cmos command numbers is 0 to 21, but not all have an
+effect and the behavior varies from model to model.  Here is the behavior
+on the X40 (tpb is the ThinkPad Buttons utility):
+
+	- 0 - Related to "Volume down" key press
+	- 1 - Related to "Volume up" key press
+	- 2 - Related to "Mute on" key press
+	- 3 - Related to "Access IBM" key press
+	- 4 - Related to "LCD brightness up" key press
+	- 5 - Related to "LCD brightness down" key press
+	- 11 - Related to "toggle screen expansion" key press/function
+	- 12 - Related to "ThinkLight on"
+	- 13 - Related to "ThinkLight off"
+	- 14 - Related to "ThinkLight" key press (toggle ThinkLight)
+
+The cmos command interface is prone to firmware split-brain problems, as
+in newer ThinkPads it is just a compatibility layer.  Do not use it, it is
+exported just as a debug tool.
+
+
+LED control
+-----------
+
+procfs: /proc/acpi/ibm/led
+sysfs attributes: as per LED class, see below for names
+
+Some of the LED indicators can be controlled through this feature.  On
+some older ThinkPad models, it is possible to query the status of the
+LED indicators as well.  Newer ThinkPads cannot query the real status
+of the LED indicators.
+
+Because misuse of the LEDs could induce an unaware user to perform
+dangerous actions (like undocking or ejecting a bay device while the
+buses are still active), or mask an important alarm (such as a nearly
+empty battery, or a broken battery), access to most LEDs is
+restricted.
+
+Unrestricted access to all LEDs requires that thinkpad-acpi be
+compiled with the CONFIG_THINKPAD_ACPI_UNSAFE_LEDS option enabled.
+Distributions must never enable this option.  Individual users that
+are aware of the consequences are welcome to enabling it.
+
+Audio mute and microphone mute LEDs are supported, but currently not
+visible to userspace. They are used by the snd-hda-intel audio driver.
+
+procfs notes
+^^^^^^^^^^^^
+
+The available commands are::
+
+	echo '<LED number> on' >/proc/acpi/ibm/led
+	echo '<LED number> off' >/proc/acpi/ibm/led
+	echo '<LED number> blink' >/proc/acpi/ibm/led
+
+The <LED number> range is 0 to 15. The set of LEDs that can be
+controlled varies from model to model. Here is the common ThinkPad
+mapping:
+
+	- 0 - power
+	- 1 - battery (orange)
+	- 2 - battery (green)
+	- 3 - UltraBase/dock
+	- 4 - UltraBay
+	- 5 - UltraBase battery slot
+	- 6 - (unknown)
+	- 7 - standby
+	- 8 - dock status 1
+	- 9 - dock status 2
+	- 10, 11 - (unknown)
+	- 12 - thinkvantage
+	- 13, 14, 15 - (unknown)
+
+All of the above can be turned on and off and can be made to blink.
+
+sysfs notes
+^^^^^^^^^^^
+
+The ThinkPad LED sysfs interface is described in detail by the LED class
+documentation, in Documentation/leds/leds-class.rst.
+
+The LEDs are named (in LED ID order, from 0 to 12):
+"tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt",
+"tpacpi::dock_active", "tpacpi::bay_active", "tpacpi::dock_batt",
+"tpacpi::unknown_led", "tpacpi::standby", "tpacpi::dock_status1",
+"tpacpi::dock_status2", "tpacpi::unknown_led2", "tpacpi::unknown_led3",
+"tpacpi::thinkvantage".
+
+Due to limitations in the sysfs LED class, if the status of the LED
+indicators cannot be read due to an error, thinkpad-acpi will report it as
+a brightness of zero (same as LED off).
+
+If the thinkpad firmware doesn't support reading the current status,
+trying to read the current LED brightness will just return whatever
+brightness was last written to that attribute.
+
+These LEDs can blink using hardware acceleration.  To request that a
+ThinkPad indicator LED should blink in hardware accelerated mode, use the
+"timer" trigger, and leave the delay_on and delay_off parameters set to
+zero (to request hardware acceleration autodetection).
+
+LEDs that are known not to exist in a given ThinkPad model are not
+made available through the sysfs interface.  If you have a dock and you
+notice there are LEDs listed for your ThinkPad that do not exist (and
+are not in the dock), or if you notice that there are missing LEDs,
+a report to ibm-acpi-devel@lists.sourceforge.net is appreciated.
+
+
+ACPI sounds -- /proc/acpi/ibm/beep
+----------------------------------
+
+The BEEP method is used internally by the ACPI firmware to provide
+audible alerts in various situations. This feature allows the same
+sounds to be triggered manually.
+
+The commands are non-negative integer numbers::
+
+	echo <number> >/proc/acpi/ibm/beep
+
+The valid <number> range is 0 to 17. Not all numbers trigger sounds
+and the sounds vary from model to model. Here is the behavior on the
+X40:
+
+	- 0 - stop a sound in progress (but use 17 to stop 16)
+	- 2 - two beeps, pause, third beep ("low battery")
+	- 3 - single beep
+	- 4 - high, followed by low-pitched beep ("unable")
+	- 5 - single beep
+	- 6 - very high, followed by high-pitched beep ("AC/DC")
+	- 7 - high-pitched beep
+	- 9 - three short beeps
+	- 10 - very long beep
+	- 12 - low-pitched beep
+	- 15 - three high-pitched beeps repeating constantly, stop with 0
+	- 16 - one medium-pitched beep repeating constantly, stop with 17
+	- 17 - stop 16
+
+
+Temperature sensors
+-------------------
+
+procfs: /proc/acpi/ibm/thermal
+
+sysfs device attributes: (hwmon "thinkpad") temp*_input
+
+Most ThinkPads include six or more separate temperature sensors but only
+expose the CPU temperature through the standard ACPI methods.  This
+feature shows readings from up to eight different sensors on older
+ThinkPads, and up to sixteen different sensors on newer ThinkPads.
+
+For example, on the X40, a typical output may be:
+
+temperatures:
+	42 42 45 41 36 -128 33 -128
+
+On the T43/p, a typical output may be:
+
+temperatures:
+	48 48 36 52 38 -128 31 -128 48 52 48 -128 -128 -128 -128 -128
+
+The mapping of thermal sensors to physical locations varies depending on
+system-board model (and thus, on ThinkPad model).
+
+http://thinkwiki.org/wiki/Thermal_Sensors is a public wiki page that
+tries to track down these locations for various models.
+
+Most (newer?) models seem to follow this pattern:
+
+- 1:  CPU
+- 2:  (depends on model)
+- 3:  (depends on model)
+- 4:  GPU
+- 5:  Main battery: main sensor
+- 6:  Bay battery: main sensor
+- 7:  Main battery: secondary sensor
+- 8:  Bay battery: secondary sensor
+- 9-15: (depends on model)
+
+For the R51 (source: Thomas Gruber):
+
+- 2:  Mini-PCI
+- 3:  Internal HDD
+
+For the T43, T43/p (source: Shmidoax/Thinkwiki.org)
+http://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_T43.2C_T43p
+
+- 2:  System board, left side (near PCMCIA slot), reported as HDAPS temp
+- 3:  PCMCIA slot
+- 9:  MCH (northbridge) to DRAM Bus
+- 10: Clock-generator, mini-pci card and ICH (southbridge), under Mini-PCI
+      card, under touchpad
+- 11: Power regulator, underside of system board, below F2 key
+
+The A31 has a very atypical layout for the thermal sensors
+(source: Milos Popovic, http://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_A31)
+
+- 1:  CPU
+- 2:  Main Battery: main sensor
+- 3:  Power Converter
+- 4:  Bay Battery: main sensor
+- 5:  MCH (northbridge)
+- 6:  PCMCIA/ambient
+- 7:  Main Battery: secondary sensor
+- 8:  Bay Battery: secondary sensor
+
+
+Procfs notes
+^^^^^^^^^^^^
+
+	Readings from sensors that are not available return -128.
+	No commands can be written to this file.
+
+Sysfs notes
+^^^^^^^^^^^
+
+	Sensors that are not available return the ENXIO error.  This
+	status may change at runtime, as there are hotplug thermal
+	sensors, like those inside the batteries and docks.
+
+	thinkpad-acpi thermal sensors are reported through the hwmon
+	subsystem, and follow all of the hwmon guidelines at
+	Documentation/hwmon.
+
+EXPERIMENTAL: Embedded controller register dump
+-----------------------------------------------
+
+This feature is not included in the thinkpad driver anymore.
+Instead the EC can be accessed through /sys/kernel/debug/ec with
+a userspace tool which can be found here:
+ftp://ftp.suse.com/pub/people/trenn/sources/ec
+
+Use it to determine the register holding the fan
+speed on some models. To do that, do the following:
+
+	- make sure the battery is fully charged
+	- make sure the fan is running
+	- use above mentioned tool to read out the EC
+
+Often fan and temperature values vary between
+readings. Since temperatures don't change vary fast, you can take
+several quick dumps to eliminate them.
+
+You can use a similar method to figure out the meaning of other
+embedded controller registers - e.g. make sure nothing else changes
+except the charging or discharging battery to determine which
+registers contain the current battery capacity, etc. If you experiment
+with this, do send me your results (including some complete dumps with
+a description of the conditions when they were taken.)
+
+
+LCD brightness control
+----------------------
+
+procfs: /proc/acpi/ibm/brightness
+
+sysfs backlight device "thinkpad_screen"
+
+This feature allows software control of the LCD brightness on ThinkPad
+models which don't have a hardware brightness slider.
+
+It has some limitations: the LCD backlight cannot be actually turned
+on or off by this interface, it just controls the backlight brightness
+level.
+
+On IBM (and some of the earlier Lenovo) ThinkPads, the backlight control
+has eight brightness levels, ranging from 0 to 7.  Some of the levels
+may not be distinct.  Later Lenovo models that implement the ACPI
+display backlight brightness control methods have 16 levels, ranging
+from 0 to 15.
+
+For IBM ThinkPads, there are two interfaces to the firmware for direct
+brightness control, EC and UCMS (or CMOS).  To select which one should be
+used, use the brightness_mode module parameter: brightness_mode=1 selects
+EC mode, brightness_mode=2 selects UCMS mode, brightness_mode=3 selects EC
+mode with NVRAM backing (so that brightness changes are remembered across
+shutdown/reboot).
+
+The driver tries to select which interface to use from a table of
+defaults for each ThinkPad model.  If it makes a wrong choice, please
+report this as a bug, so that we can fix it.
+
+Lenovo ThinkPads only support brightness_mode=2 (UCMS).
+
+When display backlight brightness controls are available through the
+standard ACPI interface, it is best to use it instead of this direct
+ThinkPad-specific interface.  The driver will disable its native
+backlight brightness control interface if it detects that the standard
+ACPI interface is available in the ThinkPad.
+
+If you want to use the thinkpad-acpi backlight brightness control
+instead of the generic ACPI video backlight brightness control for some
+reason, you should use the acpi_backlight=vendor kernel parameter.
+
+The brightness_enable module parameter can be used to control whether
+the LCD brightness control feature will be enabled when available.
+brightness_enable=0 forces it to be disabled.  brightness_enable=1
+forces it to be enabled when available, even if the standard ACPI
+interface is also available.
+
+Procfs notes
+^^^^^^^^^^^^
+
+The available commands are::
+
+	echo up   >/proc/acpi/ibm/brightness
+	echo down >/proc/acpi/ibm/brightness
+	echo 'level <level>' >/proc/acpi/ibm/brightness
+
+Sysfs notes
+^^^^^^^^^^^
+
+The interface is implemented through the backlight sysfs class, which is
+poorly documented at this time.
+
+Locate the thinkpad_screen device under /sys/class/backlight, and inside
+it there will be the following attributes:
+
+	max_brightness:
+		Reads the maximum brightness the hardware can be set to.
+		The minimum is always zero.
+
+	actual_brightness:
+		Reads what brightness the screen is set to at this instant.
+
+	brightness:
+		Writes request the driver to change brightness to the
+		given value.  Reads will tell you what brightness the
+		driver is trying to set the display to when "power" is set
+		to zero and the display has not been dimmed by a kernel
+		power management event.
+
+	power:
+		power management mode, where 0 is "display on", and 1 to 3
+		will dim the display backlight to brightness level 0
+		because thinkpad-acpi cannot really turn the backlight
+		off.  Kernel power management events can temporarily
+		increase the current power management level, i.e. they can
+		dim the display.
+
+
+WARNING:
+
+    Whatever you do, do NOT ever call thinkpad-acpi backlight-level change
+    interface and the ACPI-based backlight level change interface
+    (available on newer BIOSes, and driven by the Linux ACPI video driver)
+    at the same time.  The two will interact in bad ways, do funny things,
+    and maybe reduce the life of the backlight lamps by needlessly kicking
+    its level up and down at every change.
+
+
+Volume control (Console Audio control)
+--------------------------------------
+
+procfs: /proc/acpi/ibm/volume
+
+ALSA: "ThinkPad Console Audio Control", default ID: "ThinkPadEC"
+
+NOTE: by default, the volume control interface operates in read-only
+mode, as it is supposed to be used for on-screen-display purposes.
+The read/write mode can be enabled through the use of the
+"volume_control=1" module parameter.
+
+NOTE: distros are urged to not enable volume_control by default, this
+should be done by the local admin only.  The ThinkPad UI is for the
+console audio control to be done through the volume keys only, and for
+the desktop environment to just provide on-screen-display feedback.
+Software volume control should be done only in the main AC97/HDA
+mixer.
+
+
+About the ThinkPad Console Audio control
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ThinkPads have a built-in amplifier and muting circuit that drives the
+console headphone and speakers.  This circuit is after the main AC97
+or HDA mixer in the audio path, and under exclusive control of the
+firmware.
+
+ThinkPads have three special hotkeys to interact with the console
+audio control: volume up, volume down and mute.
+
+It is worth noting that the normal way the mute function works (on
+ThinkPads that do not have a "mute LED") is:
+
+1. Press mute to mute.  It will *always* mute, you can press it as
+   many times as you want, and the sound will remain mute.
+
+2. Press either volume key to unmute the ThinkPad (it will _not_
+   change the volume, it will just unmute).
+
+This is a very superior design when compared to the cheap software-only
+mute-toggle solution found on normal consumer laptops:  you can be
+absolutely sure the ThinkPad will not make noise if you press the mute
+button, no matter the previous state.
+
+The IBM ThinkPads, and the earlier Lenovo ThinkPads have variable-gain
+amplifiers driving the speakers and headphone output, and the firmware
+also handles volume control for the headphone and speakers on these
+ThinkPads without any help from the operating system (this volume
+control stage exists after the main AC97 or HDA mixer in the audio
+path).
+
+The newer Lenovo models only have firmware mute control, and depend on
+the main HDA mixer to do volume control (which is done by the operating
+system).  In this case, the volume keys are filtered out for unmute
+key press (there are some firmware bugs in this area) and delivered as
+normal key presses to the operating system (thinkpad-acpi is not
+involved).
+
+
+The ThinkPad-ACPI volume control
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The preferred way to interact with the Console Audio control is the
+ALSA interface.
+
+The legacy procfs interface allows one to read the current state,
+and if volume control is enabled, accepts the following commands::
+
+	echo up   >/proc/acpi/ibm/volume
+	echo down >/proc/acpi/ibm/volume
+	echo mute >/proc/acpi/ibm/volume
+	echo unmute >/proc/acpi/ibm/volume
+	echo 'level <level>' >/proc/acpi/ibm/volume
+
+The <level> number range is 0 to 14 although not all of them may be
+distinct. To unmute the volume after the mute command, use either the
+up or down command (the level command will not unmute the volume), or
+the unmute command.
+
+You can use the volume_capabilities parameter to tell the driver
+whether your thinkpad has volume control or mute-only control:
+volume_capabilities=1 for mixers with mute and volume control,
+volume_capabilities=2 for mixers with only mute control.
+
+If the driver misdetects the capabilities for your ThinkPad model,
+please report this to ibm-acpi-devel@lists.sourceforge.net, so that we
+can update the driver.
+
+There are two strategies for volume control.  To select which one
+should be used, use the volume_mode module parameter: volume_mode=1
+selects EC mode, and volume_mode=3 selects EC mode with NVRAM backing
+(so that volume/mute changes are remembered across shutdown/reboot).
+
+The driver will operate in volume_mode=3 by default. If that does not
+work well on your ThinkPad model, please report this to
+ibm-acpi-devel@lists.sourceforge.net.
+
+The driver supports the standard ALSA module parameters.  If the ALSA
+mixer is disabled, the driver will disable all volume functionality.
+
+
+Fan control and monitoring: fan speed, fan enable/disable
+---------------------------------------------------------
+
+procfs: /proc/acpi/ibm/fan
+
+sysfs device attributes: (hwmon "thinkpad") fan1_input, pwm1, pwm1_enable, fan2_input
+
+sysfs hwmon driver attributes: fan_watchdog
+
+NOTE NOTE NOTE:
+   fan control operations are disabled by default for
+   safety reasons.  To enable them, the module parameter "fan_control=1"
+   must be given to thinkpad-acpi.
+
+This feature attempts to show the current fan speed, control mode and
+other fan data that might be available.  The speed is read directly
+from the hardware registers of the embedded controller.  This is known
+to work on later R, T, X and Z series ThinkPads but may show a bogus
+value on other models.
+
+Some Lenovo ThinkPads support a secondary fan.  This fan cannot be
+controlled separately, it shares the main fan control.
+
+Fan levels
+^^^^^^^^^^
+
+Most ThinkPad fans work in "levels" at the firmware interface.  Level 0
+stops the fan.  The higher the level, the higher the fan speed, although
+adjacent levels often map to the same fan speed.  7 is the highest
+level, where the fan reaches the maximum recommended speed.
+
+Level "auto" means the EC changes the fan level according to some
+internal algorithm, usually based on readings from the thermal sensors.
+
+There is also a "full-speed" level, also known as "disengaged" level.
+In this level, the EC disables the speed-locked closed-loop fan control,
+and drives the fan as fast as it can go, which might exceed hardware
+limits, so use this level with caution.
+
+The fan usually ramps up or down slowly from one speed to another, and
+it is normal for the EC to take several seconds to react to fan
+commands.  The full-speed level may take up to two minutes to ramp up to
+maximum speed, and in some ThinkPads, the tachometer readings go stale
+while the EC is transitioning to the full-speed level.
+
+WARNING WARNING WARNING: do not leave the fan disabled unless you are
+monitoring all of the temperature sensor readings and you are ready to
+enable it if necessary to avoid overheating.
+
+An enabled fan in level "auto" may stop spinning if the EC decides the
+ThinkPad is cool enough and doesn't need the extra airflow.  This is
+normal, and the EC will spin the fan up if the various thermal readings
+rise too much.
+
+On the X40, this seems to depend on the CPU and HDD temperatures.
+Specifically, the fan is turned on when either the CPU temperature
+climbs to 56 degrees or the HDD temperature climbs to 46 degrees.  The
+fan is turned off when the CPU temperature drops to 49 degrees and the
+HDD temperature drops to 41 degrees.  These thresholds cannot
+currently be controlled.
+
+The ThinkPad's ACPI DSDT code will reprogram the fan on its own when
+certain conditions are met.  It will override any fan programming done
+through thinkpad-acpi.
+
+The thinkpad-acpi kernel driver can be programmed to revert the fan
+level to a safe setting if userspace does not issue one of the procfs
+fan commands: "enable", "disable", "level" or "watchdog", or if there
+are no writes to pwm1_enable (or to pwm1 *if and only if* pwm1_enable is
+set to 1, manual mode) within a configurable amount of time of up to
+120 seconds.  This functionality is called fan safety watchdog.
+
+Note that the watchdog timer stops after it enables the fan.  It will be
+rearmed again automatically (using the same interval) when one of the
+above mentioned fan commands is received.  The fan watchdog is,
+therefore, not suitable to protect against fan mode changes made through
+means other than the "enable", "disable", and "level" procfs fan
+commands, or the hwmon fan control sysfs interface.
+
+Procfs notes
+^^^^^^^^^^^^
+
+The fan may be enabled or disabled with the following commands::
+
+	echo enable  >/proc/acpi/ibm/fan
+	echo disable >/proc/acpi/ibm/fan
+
+Placing a fan on level 0 is the same as disabling it.  Enabling a fan
+will try to place it in a safe level if it is too slow or disabled.
+
+The fan level can be controlled with the command::
+
+	echo 'level <level>' > /proc/acpi/ibm/fan
+
+Where <level> is an integer from 0 to 7, or one of the words "auto" or
+"full-speed" (without the quotes).  Not all ThinkPads support the "auto"
+and "full-speed" levels.  The driver accepts "disengaged" as an alias for
+"full-speed", and reports it as "disengaged" for backwards
+compatibility.
+
+On the X31 and X40 (and ONLY on those models), the fan speed can be
+controlled to a certain degree.  Once the fan is running, it can be
+forced to run faster or slower with the following command::
+
+	echo 'speed <speed>' > /proc/acpi/ibm/fan
+
+The sustainable range of fan speeds on the X40 appears to be from about
+3700 to about 7350. Values outside this range either do not have any
+effect or the fan speed eventually settles somewhere in that range.  The
+fan cannot be stopped or started with this command.  This functionality
+is incomplete, and not available through the sysfs interface.
+
+To program the safety watchdog, use the "watchdog" command::
+
+	echo 'watchdog <interval in seconds>' > /proc/acpi/ibm/fan
+
+If you want to disable the watchdog, use 0 as the interval.
+
+Sysfs notes
+^^^^^^^^^^^
+
+The sysfs interface follows the hwmon subsystem guidelines for the most
+part, and the exception is the fan safety watchdog.
+
+Writes to any of the sysfs attributes may return the EINVAL error if
+that operation is not supported in a given ThinkPad or if the parameter
+is out-of-bounds, and EPERM if it is forbidden.  They may also return
+EINTR (interrupted system call), and EIO (I/O error while trying to talk
+to the firmware).
+
+Features not yet implemented by the driver return ENOSYS.
+
+hwmon device attribute pwm1_enable:
+	- 0: PWM offline (fan is set to full-speed mode)
+	- 1: Manual PWM control (use pwm1 to set fan level)
+	- 2: Hardware PWM control (EC "auto" mode)
+	- 3: reserved (Software PWM control, not implemented yet)
+
+	Modes 0 and 2 are not supported by all ThinkPads, and the
+	driver is not always able to detect this.  If it does know a
+	mode is unsupported, it will return -EINVAL.
+
+hwmon device attribute pwm1:
+	Fan level, scaled from the firmware values of 0-7 to the hwmon
+	scale of 0-255.  0 means fan stopped, 255 means highest normal
+	speed (level 7).
+
+	This attribute only commands the fan if pmw1_enable is set to 1
+	(manual PWM control).
+
+hwmon device attribute fan1_input:
+	Fan tachometer reading, in RPM.  May go stale on certain
+	ThinkPads while the EC transitions the PWM to offline mode,
+	which can take up to two minutes.  May return rubbish on older
+	ThinkPads.
+
+hwmon device attribute fan2_input:
+	Fan tachometer reading, in RPM, for the secondary fan.
+	Available only on some ThinkPads.  If the secondary fan is
+	not installed, will always read 0.
+
+hwmon driver attribute fan_watchdog:
+	Fan safety watchdog timer interval, in seconds.  Minimum is
+	1 second, maximum is 120 seconds.  0 disables the watchdog.
+
+To stop the fan: set pwm1 to zero, and pwm1_enable to 1.
+
+To start the fan in a safe mode: set pwm1_enable to 2.  If that fails
+with EINVAL, try to set pwm1_enable to 1 and pwm1 to at least 128 (255
+would be the safest choice, though).
+
+
+WAN
+---
+
+procfs: /proc/acpi/ibm/wan
+
+sysfs device attribute: wwan_enable (deprecated)
+
+sysfs rfkill class: switch "tpacpi_wwan_sw"
+
+This feature shows the presence and current state of the built-in
+Wireless WAN device.
+
+If the ThinkPad supports it, the WWAN state is stored in NVRAM,
+so it is kept across reboots and power-off.
+
+It was tested on a Lenovo ThinkPad X60. It should probably work on other
+ThinkPad models which come with this module installed.
+
+Procfs notes
+^^^^^^^^^^^^
+
+If the W-WAN card is installed, the following commands can be used::
+
+	echo enable > /proc/acpi/ibm/wan
+	echo disable > /proc/acpi/ibm/wan
+
+Sysfs notes
+^^^^^^^^^^^
+
+	If the W-WAN card is installed, it can be enabled /
+	disabled through the "wwan_enable" thinkpad-acpi device
+	attribute, and its current status can also be queried.
+
+	enable:
+		- 0: disables WWAN card / WWAN card is disabled
+		- 1: enables WWAN card / WWAN card is enabled.
+
+	Note: this interface has been superseded by the	generic rfkill
+	class.  It has been deprecated, and it will be removed in year
+	2010.
+
+	rfkill controller switch "tpacpi_wwan_sw": refer to
+	Documentation/rfkill.txt for details.
+
+
+EXPERIMENTAL: UWB
+-----------------
+
+This feature is considered EXPERIMENTAL because it has not been extensively
+tested and validated in various ThinkPad models yet.  The feature may not
+work as expected. USE WITH CAUTION! To use this feature, you need to supply
+the experimental=1 parameter when loading the module.
+
+sysfs rfkill class: switch "tpacpi_uwb_sw"
+
+This feature exports an rfkill controller for the UWB device, if one is
+present and enabled in the BIOS.
+
+Sysfs notes
+^^^^^^^^^^^
+
+	rfkill controller switch "tpacpi_uwb_sw": refer to
+	Documentation/rfkill.txt for details.
+
+Adaptive keyboard
+-----------------
+
+sysfs device attribute: adaptive_kbd_mode
+
+This sysfs attribute controls the keyboard "face" that will be shown on the
+Lenovo X1 Carbon 2nd gen (2014)'s adaptive keyboard. The value can be read
+and set.
+
+- 1 = Home mode
+- 2 = Web-browser mode
+- 3 = Web-conference mode
+- 4 = Function mode
+- 5 = Layflat mode
+
+For more details about which buttons will appear depending on the mode, please
+review the laptop's user guide:
+http://www.lenovo.com/shop/americas/content/user_guides/x1carbon_2_ug_en.pdf
+
+Multiple Commands, Module Parameters
+------------------------------------
+
+Multiple commands can be written to the proc files in one shot by
+separating them with commas, for example::
+
+	echo enable,0xffff > /proc/acpi/ibm/hotkey
+	echo lcd_disable,crt_enable > /proc/acpi/ibm/video
+
+Commands can also be specified when loading the thinkpad-acpi module,
+for example::
+
+	modprobe thinkpad_acpi hotkey=enable,0xffff video=auto_disable
+
+
+Enabling debugging output
+-------------------------
+
+The module takes a debug parameter which can be used to selectively
+enable various classes of debugging output, for example::
+
+	 modprobe thinkpad_acpi debug=0xffff
+
+will enable all debugging output classes.  It takes a bitmask, so
+to enable more than one output class, just add their values.
+
+	=============		======================================
+	Debug bitmask		Description
+	=============		======================================
+	0x8000			Disclose PID of userspace programs
+				accessing some functions of the driver
+	0x0001			Initialization and probing
+	0x0002			Removal
+	0x0004			RF Transmitter control (RFKILL)
+				(bluetooth, WWAN, UWB...)
+	0x0008			HKEY event interface, hotkeys
+	0x0010			Fan control
+	0x0020			Backlight brightness
+	0x0040			Audio mixer/volume control
+	=============		======================================
+
+There is also a kernel build option to enable more debugging
+information, which may be necessary to debug driver problems.
+
+The level of debugging information output by the driver can be changed
+at runtime through sysfs, using the driver attribute debug_level.  The
+attribute takes the same bitmask as the debug module parameter above.
+
+
+Force loading of module
+-----------------------
+
+If thinkpad-acpi refuses to detect your ThinkPad, you can try to specify
+the module parameter force_load=1.  Regardless of whether this works or
+not, please contact ibm-acpi-devel@lists.sourceforge.net with a report.
+
+
+Sysfs interface changelog
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+=========	===============================================================
+0x000100:	Initial sysfs support, as a single platform driver and
+		device.
+0x000200:	Hot key support for 32 hot keys, and radio slider switch
+		support.
+0x010000:	Hot keys are now handled by default over the input
+		layer, the radio switch generates input event EV_RADIO,
+		and the driver enables hot key handling by default in
+		the firmware.
+
+0x020000:	ABI fix: added a separate hwmon platform device and
+		driver, which must be located by name (thinkpad)
+		and the hwmon class for libsensors4 (lm-sensors 3)
+		compatibility.  Moved all hwmon attributes to this
+		new platform device.
+
+0x020100:	Marker for thinkpad-acpi with hot key NVRAM polling
+		support.  If you must, use it to know you should not
+		start a userspace NVRAM poller (allows to detect when
+		NVRAM is compiled out by the user because it is
+		unneeded/undesired in the first place).
+0x020101:	Marker for thinkpad-acpi with hot key NVRAM polling
+		and proper hotkey_mask semantics (version 8 of the
+		NVRAM polling patch).  Some development snapshots of
+		0.18 had an earlier version that did strange things
+		to hotkey_mask.
+
+0x020200:	Add poll()/select() support to the following attributes:
+		hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason
+
+0x020300:	hotkey enable/disable support removed, attributes
+		hotkey_bios_enabled and hotkey_enable deprecated and
+		marked for removal.
+
+0x020400:	Marker for 16 LEDs support.  Also, LEDs that are known
+		to not exist in a given model are not registered with
+		the LED sysfs class anymore.
+
+0x020500:	Updated hotkey driver, hotkey_mask is always available
+		and it is always able to disable hot keys.  Very old
+		thinkpads are properly supported.  hotkey_bios_mask
+		is deprecated and marked for removal.
+
+0x020600:	Marker for backlight change event support.
+
+0x020700:	Support for mute-only mixers.
+		Volume control in read-only mode by default.
+		Marker for ALSA mixer support.
+
+0x030000:	Thermal and fan sysfs attributes were moved to the hwmon
+		device instead of being attached to the backing platform
+		device.
+=========	===============================================================
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
deleted file mode 100644
index 75ef063622d2..000000000000
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ /dev/null
@@ -1,1487 +0,0 @@
-		     ThinkPad ACPI Extras Driver
-
-                            Version 0.25
-                        October 16th,  2013
-
-               Borislav Deianov <borislav@users.sf.net>
-             Henrique de Moraes Holschuh <hmh@hmh.eng.br>
-                      http://ibm-acpi.sf.net/
-
-
-This is a Linux driver for the IBM and Lenovo ThinkPad laptops. It
-supports various features of these laptops which are accessible
-through the ACPI and ACPI EC framework, but not otherwise fully
-supported by the generic Linux ACPI drivers.
-
-This driver used to be named ibm-acpi until kernel 2.6.21 and release
-0.13-20070314.  It used to be in the drivers/acpi tree, but it was
-moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel
-2.6.22, and release 0.14.  It was moved to drivers/platform/x86 for
-kernel 2.6.29 and release 0.22.
-
-The driver is named "thinkpad-acpi".  In some places, like module
-names and log messages, "thinkpad_acpi" is used because of userspace
-issues.
-
-"tpacpi" is used as a shorthand where "thinkpad-acpi" would be too
-long due to length limitations on some Linux kernel versions.
-
-Status
-------
-
-The features currently supported are the following (see below for
-detailed description):
-
-	- Fn key combinations
-	- Bluetooth enable and disable
-	- video output switching, expansion control
-	- ThinkLight on and off
-	- CMOS/UCMS control
-	- LED control
-	- ACPI sounds
-	- temperature sensors
-	- Experimental: embedded controller register dump
-	- LCD brightness control
-	- Volume control
-	- Fan control and monitoring: fan speed, fan enable/disable
-	- WAN enable and disable
-	- UWB enable and disable
-
-A compatibility table by model and feature is maintained on the web
-site, http://ibm-acpi.sf.net/. I appreciate any success or failure
-reports, especially if they add to or correct the compatibility table.
-Please include the following information in your report:
-
-	- ThinkPad model name
-	- a copy of your ACPI tables, using the "acpidump" utility
-	- a copy of the output of dmidecode, with serial numbers
-	  and UUIDs masked off
-	- which driver features work and which don't
-	- the observed behavior of non-working features
-
-Any other comments or patches are also more than welcome.
-
-
-Installation
-------------
-
-If you are compiling this driver as included in the Linux kernel
-sources, look for the CONFIG_THINKPAD_ACPI Kconfig option.
-It is located on the menu path: "Device Drivers" -> "X86 Platform
-Specific Device Drivers" -> "ThinkPad ACPI Laptop Extras".
-
-
-Features
---------
-
-The driver exports two different interfaces to userspace, which can be
-used to access the features it provides.  One is a legacy procfs-based
-interface, which will be removed at some time in the future.  The other
-is a new sysfs-based interface which is not complete yet.
-
-The procfs interface creates the /proc/acpi/ibm directory.  There is a
-file under that directory for each feature it supports.  The procfs
-interface is mostly frozen, and will change very little if at all: it
-will not be extended to add any new functionality in the driver, instead
-all new functionality will be implemented on the sysfs interface.
-
-The sysfs interface tries to blend in the generic Linux sysfs subsystems
-and classes as much as possible.  Since some of these subsystems are not
-yet ready or stabilized, it is expected that this interface will change,
-and any and all userspace programs must deal with it.
-
-
-Notes about the sysfs interface:
-
-Unlike what was done with the procfs interface, correctness when talking
-to the sysfs interfaces will be enforced, as will correctness in the
-thinkpad-acpi's implementation of sysfs interfaces.
-
-Also, any bugs in the thinkpad-acpi sysfs driver code or in the
-thinkpad-acpi's implementation of the sysfs interfaces will be fixed for
-maximum correctness, even if that means changing an interface in
-non-compatible ways.  As these interfaces mature both in the kernel and
-in thinkpad-acpi, such changes should become quite rare.
-
-Applications interfacing to the thinkpad-acpi sysfs interfaces must
-follow all sysfs guidelines and correctly process all errors (the sysfs
-interface makes extensive use of errors).  File descriptors and open /
-close operations to the sysfs inodes must also be properly implemented.
-
-The version of thinkpad-acpi's sysfs interface is exported by the driver
-as a driver attribute (see below).
-
-Sysfs driver attributes are on the driver's sysfs attribute space,
-for 2.6.23+ this is /sys/bus/platform/drivers/thinkpad_acpi/ and
-/sys/bus/platform/drivers/thinkpad_hwmon/
-
-Sysfs device attributes are on the thinkpad_acpi device sysfs attribute
-space, for 2.6.23+ this is /sys/devices/platform/thinkpad_acpi/.
-
-Sysfs device attributes for the sensors and fan are on the
-thinkpad_hwmon device's sysfs attribute space, but you should locate it
-looking for a hwmon device with the name attribute of "thinkpad", or
-better yet, through libsensors. For 4.14+ sysfs attributes were moved to the
-hwmon device (/sys/bus/platform/devices/thinkpad_hwmon/hwmon/hwmon? or
-/sys/class/hwmon/hwmon?).
-
-Driver version
---------------
-
-procfs: /proc/acpi/ibm/driver
-sysfs driver attribute: version
-
-The driver name and version. No commands can be written to this file.
-
-
-Sysfs interface version
------------------------
-
-sysfs driver attribute: interface_version
-
-Version of the thinkpad-acpi sysfs interface, as an unsigned long
-(output in hex format: 0xAAAABBCC), where:
-	AAAA - major revision
-	BB - minor revision
-	CC - bugfix revision
-
-The sysfs interface version changelog for the driver can be found at the
-end of this document.  Changes to the sysfs interface done by the kernel
-subsystems are not documented here, nor are they tracked by this
-attribute.
-
-Changes to the thinkpad-acpi sysfs interface are only considered
-non-experimental when they are submitted to Linux mainline, at which
-point the changes in this interface are documented and interface_version
-may be updated.  If you are using any thinkpad-acpi features not yet
-sent to mainline for merging, you do so on your own risk: these features
-may disappear, or be implemented in a different and incompatible way by
-the time they are merged in Linux mainline.
-
-Changes that are backwards-compatible by nature (e.g. the addition of
-attributes that do not change the way the other attributes work) do not
-always warrant an update of interface_version.  Therefore, one must
-expect that an attribute might not be there, and deal with it properly
-(an attribute not being there *is* a valid way to make it clear that a
-feature is not available in sysfs).
-
-
-Hot keys
---------
-
-procfs: /proc/acpi/ibm/hotkey
-sysfs device attribute: hotkey_*
-
-In a ThinkPad, the ACPI HKEY handler is responsible for communicating
-some important events and also keyboard hot key presses to the operating
-system.  Enabling the hotkey functionality of thinkpad-acpi signals the
-firmware that such a driver is present, and modifies how the ThinkPad
-firmware will behave in many situations.
-
-The driver enables the HKEY ("hot key") event reporting automatically
-when loaded, and disables it when it is removed.
-
-The driver will report HKEY events in the following format:
-
-	ibm/hotkey HKEY 00000080 0000xxxx
-
-Some of these events refer to hot key presses, but not all of them.
-
-The driver will generate events over the input layer for hot keys and
-radio switches, and over the ACPI netlink layer for other events.  The
-input layer support accepts the standard IOCTLs to remap the keycodes
-assigned to each hot key.
-
-The hot key bit mask allows some control over which hot keys generate
-events.  If a key is "masked" (bit set to 0 in the mask), the firmware
-will handle it.  If it is "unmasked", it signals the firmware that
-thinkpad-acpi would prefer to handle it, if the firmware would be so
-kind to allow it (and it often doesn't!).
-
-Not all bits in the mask can be modified.  Not all bits that can be
-modified do anything.  Not all hot keys can be individually controlled
-by the mask.  Some models do not support the mask at all.  The behaviour
-of the mask is, therefore, highly dependent on the ThinkPad model.
-
-The driver will filter out any unmasked hotkeys, so even if the firmware
-doesn't allow disabling an specific hotkey, the driver will not report
-events for unmasked hotkeys.
-
-Note that unmasking some keys prevents their default behavior.  For
-example, if Fn+F5 is unmasked, that key will no longer enable/disable
-Bluetooth by itself in firmware.
-
-Note also that not all Fn key combinations are supported through ACPI
-depending on the ThinkPad model and firmware version.  On those
-ThinkPads, it is still possible to support some extra hotkeys by
-polling the "CMOS NVRAM" at least 10 times per second.  The driver
-attempts to enables this functionality automatically when required.
-
-procfs notes:
-
-The following commands can be written to the /proc/acpi/ibm/hotkey file:
-
-	echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys
-	echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys
-	... any other 8-hex-digit mask ...
-	echo reset > /proc/acpi/ibm/hotkey -- restore the recommended mask
-
-The following commands have been deprecated and will cause the kernel
-to log a warning:
-
-	echo enable > /proc/acpi/ibm/hotkey -- does nothing
-	echo disable > /proc/acpi/ibm/hotkey -- returns an error
-
-The procfs interface does not support NVRAM polling control.  So as to
-maintain maximum bug-to-bug compatibility, it does not report any masks,
-nor does it allow one to manipulate the hot key mask when the firmware
-does not support masks at all, even if NVRAM polling is in use.
-
-sysfs notes:
-
-	hotkey_bios_enabled:
-		DEPRECATED, WILL BE REMOVED SOON.
-
-		Returns 0.
-
-	hotkey_bios_mask:
-		DEPRECATED, DON'T USE, WILL BE REMOVED IN THE FUTURE.
-
-		Returns the hot keys mask when thinkpad-acpi was loaded.
-		Upon module unload, the hot keys mask will be restored
-		to this value.   This is always 0x80c, because those are
-		the hotkeys that were supported by ancient firmware
-		without mask support.
-
-	hotkey_enable:
-		DEPRECATED, WILL BE REMOVED SOON.
-
-		0: returns -EPERM
-		1: does nothing
-
-	hotkey_mask:
-		bit mask to enable reporting (and depending on
-		the firmware, ACPI event generation) for each hot key
-		(see above).  Returns the current status of the hot keys
-		mask, and allows one to modify it.
-
-	hotkey_all_mask:
-		bit mask that should enable event reporting for all
-		supported hot keys, when echoed to hotkey_mask above.
-		Unless you know which events need to be handled
-		passively (because the firmware *will* handle them
-		anyway), do *not* use hotkey_all_mask.  Use
-		hotkey_recommended_mask, instead. You have been warned.
-
-	hotkey_recommended_mask:
-		bit mask that should enable event reporting for all
-		supported hot keys, except those which are always
-		handled by the firmware anyway.  Echo it to
-		hotkey_mask above, to use.  This is the default mask
-		used by the driver.
-
-	hotkey_source_mask:
-		bit mask that selects which hot keys will the driver
-		poll the NVRAM for.  This is auto-detected by the driver
-		based on the capabilities reported by the ACPI firmware,
-		but it can be overridden at runtime.
-
-		Hot keys whose bits are set in hotkey_source_mask are
-		polled for in NVRAM, and reported as hotkey events if
-		enabled in hotkey_mask.  Only a few hot keys are
-		available through CMOS NVRAM polling.
-
-		Warning: when in NVRAM mode, the volume up/down/mute
-		keys are synthesized according to changes in the mixer,
-		which uses a single volume up or volume down hotkey
-		press to unmute, as per the ThinkPad volume mixer user
-		interface.  When in ACPI event mode, volume up/down/mute
-		events are reported by the firmware and can behave
-		differently (and that behaviour changes with firmware
-		version -- not just with firmware models -- as well as
-		OSI(Linux) state).
-
-	hotkey_poll_freq:
-		frequency in Hz for hot key polling. It must be between
-		0 and 25 Hz.  Polling is only carried out when strictly
-		needed.
-
-		Setting hotkey_poll_freq to zero disables polling, and
-		will cause hot key presses that require NVRAM polling
-		to never be reported.
-
-		Setting hotkey_poll_freq too low may cause repeated
-		pressings of the same hot key to be misreported as a
-		single key press, or to not even be detected at all.
-		The recommended polling frequency is 10Hz.
-
-	hotkey_radio_sw:
-		If the ThinkPad has a hardware radio switch, this
-		attribute will read 0 if the switch is in the "radios
-		disabled" position, and 1 if the switch is in the
-		"radios enabled" position.
-
-		This attribute has poll()/select() support.
-
-	hotkey_tablet_mode:
-		If the ThinkPad has tablet capabilities, this attribute
-		will read 0 if the ThinkPad is in normal mode, and
-		1 if the ThinkPad is in tablet mode.
-
-		This attribute has poll()/select() support.
-
-	wakeup_reason:
-		Set to 1 if the system is waking up because the user
-		requested a bay ejection.  Set to 2 if the system is
-		waking up because the user requested the system to
-		undock.  Set to zero for normal wake-ups or wake-ups
-		due to unknown reasons.
-
-		This attribute has poll()/select() support.
-
-	wakeup_hotunplug_complete:
-		Set to 1 if the system was waken up because of an
-		undock or bay ejection request, and that request
-		was successfully completed.  At this point, it might
-		be useful to send the system back to sleep, at the
-		user's choice.  Refer to HKEY events 0x4003 and
-		0x3003, below.
-
-		This attribute has poll()/select() support.
-
-input layer notes:
-
-A Hot key is mapped to a single input layer EV_KEY event, possibly
-followed by an EV_MSC MSC_SCAN event that shall contain that key's scan
-code.  An EV_SYN event will always be generated to mark the end of the
-event block.
-
-Do not use the EV_MSC MSC_SCAN events to process keys.  They are to be
-used as a helper to remap keys, only.  They are particularly useful when
-remapping KEY_UNKNOWN keys.
-
-The events are available in an input device, with the following id:
-
-	Bus:		BUS_HOST
-	vendor:		0x1014 (PCI_VENDOR_ID_IBM)  or
-			0x17aa (PCI_VENDOR_ID_LENOVO)
-	product:	0x5054 ("TP")
-	version:	0x4101
-
-The version will have its LSB incremented if the keymap changes in a
-backwards-compatible way.  The MSB shall always be 0x41 for this input
-device.  If the MSB is not 0x41, do not use the device as described in
-this section, as it is either something else (e.g. another input device
-exported by a thinkpad driver, such as HDAPS) or its functionality has
-been changed in a non-backwards compatible way.
-
-Adding other event types for other functionalities shall be considered a
-backwards-compatible change for this input device.
-
-Thinkpad-acpi Hot Key event map (version 0x4101):
-
-ACPI	Scan
-event	code	Key		Notes
-
-0x1001	0x00	FN+F1		-
-
-0x1002	0x01	FN+F2		IBM: battery (rare)
-				Lenovo: Screen lock
-
-0x1003	0x02	FN+F3		Many IBM models always report
-				this hot key, even with hot keys
-				disabled or with Fn+F3 masked
-				off
-				IBM: screen lock, often turns
-				off the ThinkLight as side-effect
-				Lenovo: battery
-
-0x1004	0x03	FN+F4		Sleep button (ACPI sleep button
-				semantics, i.e. sleep-to-RAM).
-				It always generates some kind
-				of event, either the hot key
-				event or an ACPI sleep button
-				event. The firmware may
-				refuse to generate further FN+F4
-				key presses until a S3 or S4 ACPI
-				sleep cycle is performed or some
-				time passes.
-
-0x1005	0x04	FN+F5		Radio.  Enables/disables
-				the internal Bluetooth hardware
-				and W-WAN card if left in control
-				of the firmware.  Does not affect
-				the WLAN card.
-				Should be used to turn on/off all
-				radios (Bluetooth+W-WAN+WLAN),
-				really.
-
-0x1006	0x05	FN+F6		-
-
-0x1007	0x06	FN+F7		Video output cycle.
-				Do you feel lucky today?
-
-0x1008	0x07	FN+F8		IBM: toggle screen expand
-				Lenovo: configure UltraNav,
-				or toggle screen expand
-
-0x1009	0x08	FN+F9		-
-	..	..		..
-0x100B	0x0A	FN+F11		-
-
-0x100C	0x0B	FN+F12		Sleep to disk.  You are always
-				supposed to handle it yourself,
-				either through the ACPI event,
-				or through a hotkey event.
-				The firmware may refuse to
-				generate further FN+F12 key
-				press events until a S3 or S4
-				ACPI sleep cycle is performed,
-				or some time passes.
-
-0x100D	0x0C	FN+BACKSPACE	-
-0x100E	0x0D	FN+INSERT	-
-0x100F	0x0E	FN+DELETE	-
-
-0x1010	0x0F	FN+HOME		Brightness up.  This key is
-				always handled by the firmware
-				in IBM ThinkPads, even when
-				unmasked.  Just leave it alone.
-				For Lenovo ThinkPads with a new
-				BIOS, it has to be handled either
-				by the ACPI OSI, or by userspace.
-				The driver does the right thing,
-				never mess with this.
-0x1011	0x10	FN+END		Brightness down.  See brightness
-				up for details.
-
-0x1012	0x11	FN+PGUP		ThinkLight toggle.  This key is
-				always handled by the firmware,
-				even when unmasked.
-
-0x1013	0x12	FN+PGDOWN	-
-
-0x1014	0x13	FN+SPACE	Zoom key
-
-0x1015	0x14	VOLUME UP	Internal mixer volume up. This
-				key is always handled by the
-				firmware, even when unmasked.
-				NOTE: Lenovo seems to be changing
-				this.
-0x1016	0x15	VOLUME DOWN	Internal mixer volume up. This
-				key is always handled by the
-				firmware, even when unmasked.
-				NOTE: Lenovo seems to be changing
-				this.
-0x1017	0x16	MUTE		Mute internal mixer. This
-				key is always handled by the
-				firmware, even when unmasked.
-
-0x1018	0x17	THINKPAD	ThinkPad/Access IBM/Lenovo key
-
-0x1019	0x18	unknown
-..	..	..
-0x1020	0x1F	unknown
-
-The ThinkPad firmware does not allow one to differentiate when most hot
-keys are pressed or released (either that, or we don't know how to, yet).
-For these keys, the driver generates a set of events for a key press and
-immediately issues the same set of events for a key release.  It is
-unknown by the driver if the ThinkPad firmware triggered these events on
-hot key press or release, but the firmware will do it for either one, not
-both.
-
-If a key is mapped to KEY_RESERVED, it generates no input events at all.
-If a key is mapped to KEY_UNKNOWN, it generates an input event that
-includes an scan code.  If a key is mapped to anything else, it will
-generate input device EV_KEY events.
-
-In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW
-events for switches:
-
-SW_RFKILL_ALL	T60 and later hardware rfkill rocker switch
-SW_TABLET_MODE	Tablet ThinkPads HKEY events 0x5009 and 0x500A
-
-Non hotkey ACPI HKEY event map:
--------------------------------
-
-Events that are never propagated by the driver:
-
-0x2304		System is waking up from suspend to undock
-0x2305		System is waking up from suspend to eject bay
-0x2404		System is waking up from hibernation to undock
-0x2405		System is waking up from hibernation to eject bay
-0x5001		Lid closed
-0x5002		Lid opened
-0x5009		Tablet swivel: switched to tablet mode
-0x500A		Tablet swivel: switched to normal mode
-0x5010		Brightness level changed/control event
-0x6000		KEYBOARD: Numlock key pressed
-0x6005		KEYBOARD: Fn key pressed (TO BE VERIFIED)
-0x7000		Radio Switch may have changed state
-
-
-Events that are propagated by the driver to userspace:
-
-0x2313		ALARM: System is waking up from suspend because
-		the battery is nearly empty
-0x2413		ALARM: System is waking up from hibernation because
-		the battery is nearly empty
-0x3003		Bay ejection (see 0x2x05) complete, can sleep again
-0x3006		Bay hotplug request (hint to power up SATA link when
-		the optical drive tray is ejected)
-0x4003		Undocked (see 0x2x04), can sleep again
-0x4010		Docked into hotplug port replicator (non-ACPI dock)
-0x4011		Undocked from hotplug port replicator (non-ACPI dock)
-0x500B		Tablet pen inserted into its storage bay
-0x500C		Tablet pen removed from its storage bay
-0x6011		ALARM: battery is too hot
-0x6012		ALARM: battery is extremely hot
-0x6021		ALARM: a sensor is too hot
-0x6022		ALARM: a sensor is extremely hot
-0x6030		System thermal table changed
-0x6032		Thermal Control command set completion  (DYTC, Windows)
-0x6040		Nvidia Optimus/AC adapter related (TO BE VERIFIED)
-0x60C0		X1 Yoga 2016, Tablet mode status changed
-0x60F0		Thermal Transformation changed (GMTS, Windows)
-
-Battery nearly empty alarms are a last resort attempt to get the
-operating system to hibernate or shutdown cleanly (0x2313), or shutdown
-cleanly (0x2413) before power is lost.  They must be acted upon, as the
-wake up caused by the firmware will have negated most safety nets...
-
-When any of the "too hot" alarms happen, according to Lenovo the user
-should suspend or hibernate the laptop (and in the case of battery
-alarms, unplug the AC adapter) to let it cool down.  These alarms do
-signal that something is wrong, they should never happen on normal
-operating conditions.
-
-The "extremely hot" alarms are emergencies.  According to Lenovo, the
-operating system is to force either an immediate suspend or hibernate
-cycle, or a system shutdown.  Obviously, something is very wrong if this
-happens.
-
-
-Brightness hotkey notes:
-
-Don't mess with the brightness hotkeys in a Thinkpad.  If you want
-notifications for OSD, use the sysfs backlight class event support.
-
-The driver will issue KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN events
-automatically for the cases were userspace has to do something to
-implement brightness changes.  When you override these events, you will
-either fail to handle properly the ThinkPads that require explicit
-action to change backlight brightness, or the ThinkPads that require
-that no action be taken to work properly.
-
-
-Bluetooth
----------
-
-procfs: /proc/acpi/ibm/bluetooth
-sysfs device attribute: bluetooth_enable (deprecated)
-sysfs rfkill class: switch "tpacpi_bluetooth_sw"
-
-This feature shows the presence and current state of a ThinkPad
-Bluetooth device in the internal ThinkPad CDC slot.
-
-If the ThinkPad supports it, the Bluetooth state is stored in NVRAM,
-so it is kept across reboots and power-off.
-
-Procfs notes:
-
-If Bluetooth is installed, the following commands can be used:
-
-	echo enable > /proc/acpi/ibm/bluetooth
-	echo disable > /proc/acpi/ibm/bluetooth
-
-Sysfs notes:
-
-	If the Bluetooth CDC card is installed, it can be enabled /
-	disabled through the "bluetooth_enable" thinkpad-acpi device
-	attribute, and its current status can also be queried.
-
-	enable:
-		0: disables Bluetooth / Bluetooth is disabled
-		1: enables Bluetooth / Bluetooth is enabled.
-
-	Note: this interface has been superseded by the	generic rfkill
-	class.  It has been deprecated, and it will be removed in year
-	2010.
-
-	rfkill controller switch "tpacpi_bluetooth_sw": refer to
-	Documentation/rfkill.txt for details.
-
-
-Video output control -- /proc/acpi/ibm/video
---------------------------------------------
-
-This feature allows control over the devices used for video output -
-LCD, CRT or DVI (if available). The following commands are available:
-
-	echo lcd_enable > /proc/acpi/ibm/video
-	echo lcd_disable > /proc/acpi/ibm/video
-	echo crt_enable > /proc/acpi/ibm/video
-	echo crt_disable > /proc/acpi/ibm/video
-	echo dvi_enable > /proc/acpi/ibm/video
-	echo dvi_disable > /proc/acpi/ibm/video
-	echo auto_enable > /proc/acpi/ibm/video
-	echo auto_disable > /proc/acpi/ibm/video
-	echo expand_toggle > /proc/acpi/ibm/video
-	echo video_switch > /proc/acpi/ibm/video
-
-NOTE: Access to this feature is restricted to processes owning the
-CAP_SYS_ADMIN capability for safety reasons, as it can interact badly
-enough with some versions of X.org to crash it.
-
-Each video output device can be enabled or disabled individually.
-Reading /proc/acpi/ibm/video shows the status of each device.
-
-Automatic video switching can be enabled or disabled.  When automatic
-video switching is enabled, certain events (e.g. opening the lid,
-docking or undocking) cause the video output device to change
-automatically. While this can be useful, it also causes flickering
-and, on the X40, video corruption. By disabling automatic switching,
-the flickering or video corruption can be avoided.
-
-The video_switch command cycles through the available video outputs
-(it simulates the behavior of Fn-F7).
-
-Video expansion can be toggled through this feature. This controls
-whether the display is expanded to fill the entire LCD screen when a
-mode with less than full resolution is used. Note that the current
-video expansion status cannot be determined through this feature.
-
-Note that on many models (particularly those using Radeon graphics
-chips) the X driver configures the video card in a way which prevents
-Fn-F7 from working. This also disables the video output switching
-features of this driver, as it uses the same ACPI methods as
-Fn-F7. Video switching on the console should still work.
-
-UPDATE: refer to https://bugs.freedesktop.org/show_bug.cgi?id=2000
-
-
-ThinkLight control
-------------------
-
-procfs: /proc/acpi/ibm/light
-sysfs attributes: as per LED class, for the "tpacpi::thinklight" LED
-
-procfs notes:
-
-The ThinkLight status can be read and set through the procfs interface.  A
-few models which do not make the status available will show the ThinkLight
-status as "unknown". The available commands are:
-
-	echo on  > /proc/acpi/ibm/light
-	echo off > /proc/acpi/ibm/light
-
-sysfs notes:
-
-The ThinkLight sysfs interface is documented by the LED class
-documentation, in Documentation/leds/leds-class.rst.  The ThinkLight LED name
-is "tpacpi::thinklight".
-
-Due to limitations in the sysfs LED class, if the status of the ThinkLight
-cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
-It is impossible to know if the status returned through sysfs is valid.
-
-
-CMOS/UCMS control
------------------
-
-procfs: /proc/acpi/ibm/cmos
-sysfs device attribute: cmos_command
-
-This feature is mostly used internally by the ACPI firmware to keep the legacy
-CMOS NVRAM bits in sync with the current machine state, and to record this
-state so that the ThinkPad will retain such settings across reboots.
-
-Some of these commands actually perform actions in some ThinkPad models, but
-this is expected to disappear more and more in newer models.  As an example, in
-a T43 and in a X40, commands 12 and 13 still control the ThinkLight state for
-real, but commands 0 to 2 don't control the mixer anymore (they have been
-phased out) and just update the NVRAM.
-
-The range of valid cmos command numbers is 0 to 21, but not all have an
-effect and the behavior varies from model to model.  Here is the behavior
-on the X40 (tpb is the ThinkPad Buttons utility):
-
-	0 - Related to "Volume down" key press
-	1 - Related to "Volume up" key press
-	2 - Related to "Mute on" key press
-	3 - Related to "Access IBM" key press
-	4 - Related to "LCD brightness up" key press
-	5 - Related to "LCD brightness down" key press
-	11 - Related to "toggle screen expansion" key press/function
-	12 - Related to "ThinkLight on"
-	13 - Related to "ThinkLight off"
-	14 - Related to "ThinkLight" key press (toggle ThinkLight)
-
-The cmos command interface is prone to firmware split-brain problems, as
-in newer ThinkPads it is just a compatibility layer.  Do not use it, it is
-exported just as a debug tool.
-
-
-LED control
------------
-
-procfs: /proc/acpi/ibm/led
-sysfs attributes: as per LED class, see below for names
-
-Some of the LED indicators can be controlled through this feature.  On
-some older ThinkPad models, it is possible to query the status of the
-LED indicators as well.  Newer ThinkPads cannot query the real status
-of the LED indicators.
-
-Because misuse of the LEDs could induce an unaware user to perform
-dangerous actions (like undocking or ejecting a bay device while the
-buses are still active), or mask an important alarm (such as a nearly
-empty battery, or a broken battery), access to most LEDs is
-restricted.
-
-Unrestricted access to all LEDs requires that thinkpad-acpi be
-compiled with the CONFIG_THINKPAD_ACPI_UNSAFE_LEDS option enabled.
-Distributions must never enable this option.  Individual users that
-are aware of the consequences are welcome to enabling it.
-
-Audio mute and microphone mute LEDs are supported, but currently not
-visible to userspace. They are used by the snd-hda-intel audio driver.
-
-procfs notes:
-
-The available commands are:
-
-	echo '<LED number> on' >/proc/acpi/ibm/led
-	echo '<LED number> off' >/proc/acpi/ibm/led
-	echo '<LED number> blink' >/proc/acpi/ibm/led
-
-The <LED number> range is 0 to 15. The set of LEDs that can be
-controlled varies from model to model. Here is the common ThinkPad
-mapping:
-
-	0 - power
-	1 - battery (orange)
-	2 - battery (green)
-	3 - UltraBase/dock
-	4 - UltraBay
-	5 - UltraBase battery slot
-	6 - (unknown)
-	7 - standby
-	8 - dock status 1
-	9 - dock status 2
-	10, 11 - (unknown)
-	12 - thinkvantage
-	13, 14, 15 - (unknown)
-
-All of the above can be turned on and off and can be made to blink.
-
-sysfs notes:
-
-The ThinkPad LED sysfs interface is described in detail by the LED class
-documentation, in Documentation/leds/leds-class.rst.
-
-The LEDs are named (in LED ID order, from 0 to 12):
-"tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt",
-"tpacpi::dock_active", "tpacpi::bay_active", "tpacpi::dock_batt",
-"tpacpi::unknown_led", "tpacpi::standby", "tpacpi::dock_status1",
-"tpacpi::dock_status2", "tpacpi::unknown_led2", "tpacpi::unknown_led3",
-"tpacpi::thinkvantage".
-
-Due to limitations in the sysfs LED class, if the status of the LED
-indicators cannot be read due to an error, thinkpad-acpi will report it as
-a brightness of zero (same as LED off).
-
-If the thinkpad firmware doesn't support reading the current status,
-trying to read the current LED brightness will just return whatever
-brightness was last written to that attribute.
-
-These LEDs can blink using hardware acceleration.  To request that a
-ThinkPad indicator LED should blink in hardware accelerated mode, use the
-"timer" trigger, and leave the delay_on and delay_off parameters set to
-zero (to request hardware acceleration autodetection).
-
-LEDs that are known not to exist in a given ThinkPad model are not
-made available through the sysfs interface.  If you have a dock and you
-notice there are LEDs listed for your ThinkPad that do not exist (and
-are not in the dock), or if you notice that there are missing LEDs,
-a report to ibm-acpi-devel@lists.sourceforge.net is appreciated.
-
-
-ACPI sounds -- /proc/acpi/ibm/beep
-----------------------------------
-
-The BEEP method is used internally by the ACPI firmware to provide
-audible alerts in various situations. This feature allows the same
-sounds to be triggered manually.
-
-The commands are non-negative integer numbers:
-
-	echo <number> >/proc/acpi/ibm/beep
-
-The valid <number> range is 0 to 17. Not all numbers trigger sounds
-and the sounds vary from model to model. Here is the behavior on the
-X40:
-
-	0 - stop a sound in progress (but use 17 to stop 16)
-	2 - two beeps, pause, third beep ("low battery")
-	3 - single beep
-	4 - high, followed by low-pitched beep ("unable")
-	5 - single beep
-	6 - very high, followed by high-pitched beep ("AC/DC")
-	7 - high-pitched beep
-	9 - three short beeps
-	10 - very long beep
-	12 - low-pitched beep
-	15 - three high-pitched beeps repeating constantly, stop with 0
-	16 - one medium-pitched beep repeating constantly, stop with 17
-	17 - stop 16
-
-
-Temperature sensors
--------------------
-
-procfs: /proc/acpi/ibm/thermal
-sysfs device attributes: (hwmon "thinkpad") temp*_input
-
-Most ThinkPads include six or more separate temperature sensors but only
-expose the CPU temperature through the standard ACPI methods.  This
-feature shows readings from up to eight different sensors on older
-ThinkPads, and up to sixteen different sensors on newer ThinkPads.
-
-For example, on the X40, a typical output may be:
-temperatures:   42 42 45 41 36 -128 33 -128
-
-On the T43/p, a typical output may be:
-temperatures:   48 48 36 52 38 -128 31 -128 48 52 48 -128 -128 -128 -128 -128
-
-The mapping of thermal sensors to physical locations varies depending on
-system-board model (and thus, on ThinkPad model).
-
-http://thinkwiki.org/wiki/Thermal_Sensors is a public wiki page that
-tries to track down these locations for various models.
-
-Most (newer?) models seem to follow this pattern:
-
-1:  CPU
-2:  (depends on model)
-3:  (depends on model)
-4:  GPU
-5:  Main battery: main sensor
-6:  Bay battery: main sensor
-7:  Main battery: secondary sensor
-8:  Bay battery: secondary sensor
-9-15: (depends on model)
-
-For the R51 (source: Thomas Gruber):
-2:  Mini-PCI
-3:  Internal HDD
-
-For the T43, T43/p (source: Shmidoax/Thinkwiki.org)
-http://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_T43.2C_T43p
-2:  System board, left side (near PCMCIA slot), reported as HDAPS temp
-3:  PCMCIA slot
-9:  MCH (northbridge) to DRAM Bus
-10: Clock-generator, mini-pci card and ICH (southbridge), under Mini-PCI
-    card, under touchpad
-11: Power regulator, underside of system board, below F2 key
-
-The A31 has a very atypical layout for the thermal sensors
-(source: Milos Popovic, http://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_A31)
-1:  CPU
-2:  Main Battery: main sensor
-3:  Power Converter
-4:  Bay Battery: main sensor
-5:  MCH (northbridge)
-6:  PCMCIA/ambient
-7:  Main Battery: secondary sensor
-8:  Bay Battery: secondary sensor
-
-
-Procfs notes:
-	Readings from sensors that are not available return -128.
-	No commands can be written to this file.
-
-Sysfs notes:
-	Sensors that are not available return the ENXIO error.  This
-	status may change at runtime, as there are hotplug thermal
-	sensors, like those inside the batteries and docks.
-
-	thinkpad-acpi thermal sensors are reported through the hwmon
-	subsystem, and follow all of the hwmon guidelines at
-	Documentation/hwmon.
-
-EXPERIMENTAL: Embedded controller register dump
------------------------------------------------
-
-This feature is not included in the thinkpad driver anymore.
-Instead the EC can be accessed through /sys/kernel/debug/ec with
-a userspace tool which can be found here:
-ftp://ftp.suse.com/pub/people/trenn/sources/ec
-
-Use it to determine the register holding the fan
-speed on some models. To do that, do the following:
-	- make sure the battery is fully charged
-	- make sure the fan is running
-	- use above mentioned tool to read out the EC
-
-Often fan and temperature values vary between
-readings. Since temperatures don't change vary fast, you can take
-several quick dumps to eliminate them.
-
-You can use a similar method to figure out the meaning of other
-embedded controller registers - e.g. make sure nothing else changes
-except the charging or discharging battery to determine which
-registers contain the current battery capacity, etc. If you experiment
-with this, do send me your results (including some complete dumps with
-a description of the conditions when they were taken.)
-
-
-LCD brightness control
-----------------------
-
-procfs: /proc/acpi/ibm/brightness
-sysfs backlight device "thinkpad_screen"
-
-This feature allows software control of the LCD brightness on ThinkPad
-models which don't have a hardware brightness slider.
-
-It has some limitations: the LCD backlight cannot be actually turned
-on or off by this interface, it just controls the backlight brightness
-level.
-
-On IBM (and some of the earlier Lenovo) ThinkPads, the backlight control
-has eight brightness levels, ranging from 0 to 7.  Some of the levels
-may not be distinct.  Later Lenovo models that implement the ACPI
-display backlight brightness control methods have 16 levels, ranging
-from 0 to 15.
-
-For IBM ThinkPads, there are two interfaces to the firmware for direct
-brightness control, EC and UCMS (or CMOS).  To select which one should be
-used, use the brightness_mode module parameter: brightness_mode=1 selects
-EC mode, brightness_mode=2 selects UCMS mode, brightness_mode=3 selects EC
-mode with NVRAM backing (so that brightness changes are remembered across
-shutdown/reboot).
-
-The driver tries to select which interface to use from a table of
-defaults for each ThinkPad model.  If it makes a wrong choice, please
-report this as a bug, so that we can fix it.
-
-Lenovo ThinkPads only support brightness_mode=2 (UCMS).
-
-When display backlight brightness controls are available through the
-standard ACPI interface, it is best to use it instead of this direct
-ThinkPad-specific interface.  The driver will disable its native
-backlight brightness control interface if it detects that the standard
-ACPI interface is available in the ThinkPad.
-
-If you want to use the thinkpad-acpi backlight brightness control
-instead of the generic ACPI video backlight brightness control for some
-reason, you should use the acpi_backlight=vendor kernel parameter.
-
-The brightness_enable module parameter can be used to control whether
-the LCD brightness control feature will be enabled when available.
-brightness_enable=0 forces it to be disabled.  brightness_enable=1
-forces it to be enabled when available, even if the standard ACPI
-interface is also available.
-
-Procfs notes:
-
-	The available commands are:
-
-	echo up   >/proc/acpi/ibm/brightness
-	echo down >/proc/acpi/ibm/brightness
-	echo 'level <level>' >/proc/acpi/ibm/brightness
-
-Sysfs notes:
-
-The interface is implemented through the backlight sysfs class, which is
-poorly documented at this time.
-
-Locate the thinkpad_screen device under /sys/class/backlight, and inside
-it there will be the following attributes:
-
-	max_brightness:
-		Reads the maximum brightness the hardware can be set to.
-		The minimum is always zero.
-
-	actual_brightness:
-		Reads what brightness the screen is set to at this instant.
-
-	brightness:
-		Writes request the driver to change brightness to the
-		given value.  Reads will tell you what brightness the
-		driver is trying to set the display to when "power" is set
-		to zero and the display has not been dimmed by a kernel
-		power management event.
-
-	power:
-		power management mode, where 0 is "display on", and 1 to 3
-		will dim the display backlight to brightness level 0
-		because thinkpad-acpi cannot really turn the backlight
-		off.  Kernel power management events can temporarily
-		increase the current power management level, i.e. they can
-		dim the display.
-
-
-WARNING:
-
-    Whatever you do, do NOT ever call thinkpad-acpi backlight-level change
-    interface and the ACPI-based backlight level change interface
-    (available on newer BIOSes, and driven by the Linux ACPI video driver)
-    at the same time.  The two will interact in bad ways, do funny things,
-    and maybe reduce the life of the backlight lamps by needlessly kicking
-    its level up and down at every change.
-
-
-Volume control (Console Audio control)
---------------------------------------
-
-procfs: /proc/acpi/ibm/volume
-ALSA: "ThinkPad Console Audio Control", default ID: "ThinkPadEC"
-
-NOTE: by default, the volume control interface operates in read-only
-mode, as it is supposed to be used for on-screen-display purposes.
-The read/write mode can be enabled through the use of the
-"volume_control=1" module parameter.
-
-NOTE: distros are urged to not enable volume_control by default, this
-should be done by the local admin only.  The ThinkPad UI is for the
-console audio control to be done through the volume keys only, and for
-the desktop environment to just provide on-screen-display feedback.
-Software volume control should be done only in the main AC97/HDA
-mixer.
-
-
-About the ThinkPad Console Audio control:
-
-ThinkPads have a built-in amplifier and muting circuit that drives the
-console headphone and speakers.  This circuit is after the main AC97
-or HDA mixer in the audio path, and under exclusive control of the
-firmware.
-
-ThinkPads have three special hotkeys to interact with the console
-audio control: volume up, volume down and mute.
-
-It is worth noting that the normal way the mute function works (on
-ThinkPads that do not have a "mute LED") is:
-
-1. Press mute to mute.  It will *always* mute, you can press it as
-   many times as you want, and the sound will remain mute.
-
-2. Press either volume key to unmute the ThinkPad (it will _not_
-   change the volume, it will just unmute).
-
-This is a very superior design when compared to the cheap software-only
-mute-toggle solution found on normal consumer laptops:  you can be
-absolutely sure the ThinkPad will not make noise if you press the mute
-button, no matter the previous state.
-
-The IBM ThinkPads, and the earlier Lenovo ThinkPads have variable-gain
-amplifiers driving the speakers and headphone output, and the firmware
-also handles volume control for the headphone and speakers on these
-ThinkPads without any help from the operating system (this volume
-control stage exists after the main AC97 or HDA mixer in the audio
-path).
-
-The newer Lenovo models only have firmware mute control, and depend on
-the main HDA mixer to do volume control (which is done by the operating
-system).  In this case, the volume keys are filtered out for unmute
-key press (there are some firmware bugs in this area) and delivered as
-normal key presses to the operating system (thinkpad-acpi is not
-involved).
-
-
-The ThinkPad-ACPI volume control:
-
-The preferred way to interact with the Console Audio control is the
-ALSA interface.
-
-The legacy procfs interface allows one to read the current state,
-and if volume control is enabled, accepts the following commands:
-
-	echo up   >/proc/acpi/ibm/volume
-	echo down >/proc/acpi/ibm/volume
-	echo mute >/proc/acpi/ibm/volume
-	echo unmute >/proc/acpi/ibm/volume
-	echo 'level <level>' >/proc/acpi/ibm/volume
-
-The <level> number range is 0 to 14 although not all of them may be
-distinct. To unmute the volume after the mute command, use either the
-up or down command (the level command will not unmute the volume), or
-the unmute command.
-
-You can use the volume_capabilities parameter to tell the driver
-whether your thinkpad has volume control or mute-only control:
-volume_capabilities=1 for mixers with mute and volume control,
-volume_capabilities=2 for mixers with only mute control.
-
-If the driver misdetects the capabilities for your ThinkPad model,
-please report this to ibm-acpi-devel@lists.sourceforge.net, so that we
-can update the driver.
-
-There are two strategies for volume control.  To select which one
-should be used, use the volume_mode module parameter: volume_mode=1
-selects EC mode, and volume_mode=3 selects EC mode with NVRAM backing
-(so that volume/mute changes are remembered across shutdown/reboot).
-
-The driver will operate in volume_mode=3 by default. If that does not
-work well on your ThinkPad model, please report this to
-ibm-acpi-devel@lists.sourceforge.net.
-
-The driver supports the standard ALSA module parameters.  If the ALSA
-mixer is disabled, the driver will disable all volume functionality.
-
-
-Fan control and monitoring: fan speed, fan enable/disable
----------------------------------------------------------
-
-procfs: /proc/acpi/ibm/fan
-sysfs device attributes: (hwmon "thinkpad") fan1_input, pwm1,
-			  pwm1_enable, fan2_input
-sysfs hwmon driver attributes: fan_watchdog
-
-NOTE NOTE NOTE: fan control operations are disabled by default for
-safety reasons.  To enable them, the module parameter "fan_control=1"
-must be given to thinkpad-acpi.
-
-This feature attempts to show the current fan speed, control mode and
-other fan data that might be available.  The speed is read directly
-from the hardware registers of the embedded controller.  This is known
-to work on later R, T, X and Z series ThinkPads but may show a bogus
-value on other models.
-
-Some Lenovo ThinkPads support a secondary fan.  This fan cannot be
-controlled separately, it shares the main fan control.
-
-Fan levels:
-
-Most ThinkPad fans work in "levels" at the firmware interface.  Level 0
-stops the fan.  The higher the level, the higher the fan speed, although
-adjacent levels often map to the same fan speed.  7 is the highest
-level, where the fan reaches the maximum recommended speed.
-
-Level "auto" means the EC changes the fan level according to some
-internal algorithm, usually based on readings from the thermal sensors.
-
-There is also a "full-speed" level, also known as "disengaged" level.
-In this level, the EC disables the speed-locked closed-loop fan control,
-and drives the fan as fast as it can go, which might exceed hardware
-limits, so use this level with caution.
-
-The fan usually ramps up or down slowly from one speed to another, and
-it is normal for the EC to take several seconds to react to fan
-commands.  The full-speed level may take up to two minutes to ramp up to
-maximum speed, and in some ThinkPads, the tachometer readings go stale
-while the EC is transitioning to the full-speed level.
-
-WARNING WARNING WARNING: do not leave the fan disabled unless you are
-monitoring all of the temperature sensor readings and you are ready to
-enable it if necessary to avoid overheating.
-
-An enabled fan in level "auto" may stop spinning if the EC decides the
-ThinkPad is cool enough and doesn't need the extra airflow.  This is
-normal, and the EC will spin the fan up if the various thermal readings
-rise too much.
-
-On the X40, this seems to depend on the CPU and HDD temperatures.
-Specifically, the fan is turned on when either the CPU temperature
-climbs to 56 degrees or the HDD temperature climbs to 46 degrees.  The
-fan is turned off when the CPU temperature drops to 49 degrees and the
-HDD temperature drops to 41 degrees.  These thresholds cannot
-currently be controlled.
-
-The ThinkPad's ACPI DSDT code will reprogram the fan on its own when
-certain conditions are met.  It will override any fan programming done
-through thinkpad-acpi.
-
-The thinkpad-acpi kernel driver can be programmed to revert the fan
-level to a safe setting if userspace does not issue one of the procfs
-fan commands: "enable", "disable", "level" or "watchdog", or if there
-are no writes to pwm1_enable (or to pwm1 *if and only if* pwm1_enable is
-set to 1, manual mode) within a configurable amount of time of up to
-120 seconds.  This functionality is called fan safety watchdog.
-
-Note that the watchdog timer stops after it enables the fan.  It will be
-rearmed again automatically (using the same interval) when one of the
-above mentioned fan commands is received.  The fan watchdog is,
-therefore, not suitable to protect against fan mode changes made through
-means other than the "enable", "disable", and "level" procfs fan
-commands, or the hwmon fan control sysfs interface.
-
-Procfs notes:
-
-The fan may be enabled or disabled with the following commands:
-
-	echo enable  >/proc/acpi/ibm/fan
-	echo disable >/proc/acpi/ibm/fan
-
-Placing a fan on level 0 is the same as disabling it.  Enabling a fan
-will try to place it in a safe level if it is too slow or disabled.
-
-The fan level can be controlled with the command:
-
-	echo 'level <level>' > /proc/acpi/ibm/fan
-
-Where <level> is an integer from 0 to 7, or one of the words "auto" or
-"full-speed" (without the quotes).  Not all ThinkPads support the "auto"
-and "full-speed" levels.  The driver accepts "disengaged" as an alias for
-"full-speed", and reports it as "disengaged" for backwards
-compatibility.
-
-On the X31 and X40 (and ONLY on those models), the fan speed can be
-controlled to a certain degree.  Once the fan is running, it can be
-forced to run faster or slower with the following command:
-
-	echo 'speed <speed>' > /proc/acpi/ibm/fan
-
-The sustainable range of fan speeds on the X40 appears to be from about
-3700 to about 7350. Values outside this range either do not have any
-effect or the fan speed eventually settles somewhere in that range.  The
-fan cannot be stopped or started with this command.  This functionality
-is incomplete, and not available through the sysfs interface.
-
-To program the safety watchdog, use the "watchdog" command.
-
-	echo 'watchdog <interval in seconds>' > /proc/acpi/ibm/fan
-
-If you want to disable the watchdog, use 0 as the interval.
-
-Sysfs notes:
-
-The sysfs interface follows the hwmon subsystem guidelines for the most
-part, and the exception is the fan safety watchdog.
-
-Writes to any of the sysfs attributes may return the EINVAL error if
-that operation is not supported in a given ThinkPad or if the parameter
-is out-of-bounds, and EPERM if it is forbidden.  They may also return
-EINTR (interrupted system call), and EIO (I/O error while trying to talk
-to the firmware).
-
-Features not yet implemented by the driver return ENOSYS.
-
-hwmon device attribute pwm1_enable:
-	0: PWM offline (fan is set to full-speed mode)
-	1: Manual PWM control (use pwm1 to set fan level)
-	2: Hardware PWM control (EC "auto" mode)
-	3: reserved (Software PWM control, not implemented yet)
-
-	Modes 0 and 2 are not supported by all ThinkPads, and the
-	driver is not always able to detect this.  If it does know a
-	mode is unsupported, it will return -EINVAL.
-
-hwmon device attribute pwm1:
-	Fan level, scaled from the firmware values of 0-7 to the hwmon
-	scale of 0-255.  0 means fan stopped, 255 means highest normal
-	speed (level 7).
-
-	This attribute only commands the fan if pmw1_enable is set to 1
-	(manual PWM control).
-
-hwmon device attribute fan1_input:
-	Fan tachometer reading, in RPM.  May go stale on certain
-	ThinkPads while the EC transitions the PWM to offline mode,
-	which can take up to two minutes.  May return rubbish on older
-	ThinkPads.
-
-hwmon device attribute fan2_input:
-	Fan tachometer reading, in RPM, for the secondary fan.
-	Available only on some ThinkPads.  If the secondary fan is
-	not installed, will always read 0.
-
-hwmon driver attribute fan_watchdog:
-	Fan safety watchdog timer interval, in seconds.  Minimum is
-	1 second, maximum is 120 seconds.  0 disables the watchdog.
-
-To stop the fan: set pwm1 to zero, and pwm1_enable to 1.
-
-To start the fan in a safe mode: set pwm1_enable to 2.  If that fails
-with EINVAL, try to set pwm1_enable to 1 and pwm1 to at least 128 (255
-would be the safest choice, though).
-
-
-WAN
----
-
-procfs: /proc/acpi/ibm/wan
-sysfs device attribute: wwan_enable (deprecated)
-sysfs rfkill class: switch "tpacpi_wwan_sw"
-
-This feature shows the presence and current state of the built-in
-Wireless WAN device.
-
-If the ThinkPad supports it, the WWAN state is stored in NVRAM,
-so it is kept across reboots and power-off.
-
-It was tested on a Lenovo ThinkPad X60. It should probably work on other
-ThinkPad models which come with this module installed.
-
-Procfs notes:
-
-If the W-WAN card is installed, the following commands can be used:
-
-	echo enable > /proc/acpi/ibm/wan
-	echo disable > /proc/acpi/ibm/wan
-
-Sysfs notes:
-
-	If the W-WAN card is installed, it can be enabled /
-	disabled through the "wwan_enable" thinkpad-acpi device
-	attribute, and its current status can also be queried.
-
-	enable:
-		0: disables WWAN card / WWAN card is disabled
-		1: enables WWAN card / WWAN card is enabled.
-
-	Note: this interface has been superseded by the	generic rfkill
-	class.  It has been deprecated, and it will be removed in year
-	2010.
-
-	rfkill controller switch "tpacpi_wwan_sw": refer to
-	Documentation/rfkill.txt for details.
-
-
-EXPERIMENTAL: UWB
------------------
-
-This feature is considered EXPERIMENTAL because it has not been extensively
-tested and validated in various ThinkPad models yet.  The feature may not
-work as expected. USE WITH CAUTION! To use this feature, you need to supply
-the experimental=1 parameter when loading the module.
-
-sysfs rfkill class: switch "tpacpi_uwb_sw"
-
-This feature exports an rfkill controller for the UWB device, if one is
-present and enabled in the BIOS.
-
-Sysfs notes:
-
-	rfkill controller switch "tpacpi_uwb_sw": refer to
-	Documentation/rfkill.txt for details.
-
-Adaptive keyboard
------------------
-
-sysfs device attribute: adaptive_kbd_mode
-
-This sysfs attribute controls the keyboard "face" that will be shown on the
-Lenovo X1 Carbon 2nd gen (2014)'s adaptive keyboard. The value can be read
-and set.
-
-1 = Home mode
-2 = Web-browser mode
-3 = Web-conference mode
-4 = Function mode
-5 = Layflat mode
-
-For more details about which buttons will appear depending on the mode, please
-review the laptop's user guide:
-http://www.lenovo.com/shop/americas/content/user_guides/x1carbon_2_ug_en.pdf
-
-Multiple Commands, Module Parameters
-------------------------------------
-
-Multiple commands can be written to the proc files in one shot by
-separating them with commas, for example:
-
-	echo enable,0xffff > /proc/acpi/ibm/hotkey
-	echo lcd_disable,crt_enable > /proc/acpi/ibm/video
-
-Commands can also be specified when loading the thinkpad-acpi module,
-for example:
-
-	modprobe thinkpad_acpi hotkey=enable,0xffff video=auto_disable
-
-
-Enabling debugging output
--------------------------
-
-The module takes a debug parameter which can be used to selectively
-enable various classes of debugging output, for example:
-
-	 modprobe thinkpad_acpi debug=0xffff
-
-will enable all debugging output classes.  It takes a bitmask, so
-to enable more than one output class, just add their values.
-
-	Debug bitmask		Description
-	0x8000			Disclose PID of userspace programs
-				accessing some functions of the driver
-	0x0001			Initialization and probing
-	0x0002			Removal
-	0x0004			RF Transmitter control (RFKILL)
-				(bluetooth, WWAN, UWB...)
-	0x0008			HKEY event interface, hotkeys
-	0x0010			Fan control
-	0x0020			Backlight brightness
-	0x0040			Audio mixer/volume control
-
-There is also a kernel build option to enable more debugging
-information, which may be necessary to debug driver problems.
-
-The level of debugging information output by the driver can be changed
-at runtime through sysfs, using the driver attribute debug_level.  The
-attribute takes the same bitmask as the debug module parameter above.
-
-
-Force loading of module
------------------------
-
-If thinkpad-acpi refuses to detect your ThinkPad, you can try to specify
-the module parameter force_load=1.  Regardless of whether this works or
-not, please contact ibm-acpi-devel@lists.sourceforge.net with a report.
-
-
-Sysfs interface changelog:
-
-0x000100:	Initial sysfs support, as a single platform driver and
-		device.
-0x000200:	Hot key support for 32 hot keys, and radio slider switch
-		support.
-0x010000:	Hot keys are now handled by default over the input
-		layer, the radio switch generates input event EV_RADIO,
-		and the driver enables hot key handling by default in
-		the firmware.
-
-0x020000:	ABI fix: added a separate hwmon platform device and
-		driver, which must be located by name (thinkpad)
-		and the hwmon class for libsensors4 (lm-sensors 3)
-		compatibility.  Moved all hwmon attributes to this
-		new platform device.
-
-0x020100:	Marker for thinkpad-acpi with hot key NVRAM polling
-		support.  If you must, use it to know you should not
-		start a userspace NVRAM poller (allows to detect when
-		NVRAM is compiled out by the user because it is
-		unneeded/undesired in the first place).
-0x020101:	Marker for thinkpad-acpi with hot key NVRAM polling
-		and proper hotkey_mask semantics (version 8 of the
-		NVRAM polling patch).  Some development snapshots of
-		0.18 had an earlier version that did strange things
-		to hotkey_mask.
-
-0x020200:	Add poll()/select() support to the following attributes:
-		hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason
-
-0x020300:	hotkey enable/disable support removed, attributes
-		hotkey_bios_enabled and hotkey_enable deprecated and
-		marked for removal.
-
-0x020400:	Marker for 16 LEDs support.  Also, LEDs that are known
-		to not exist in a given model are not registered with
-		the LED sysfs class anymore.
-
-0x020500:	Updated hotkey driver, hotkey_mask is always available
-		and it is always able to disable hot keys.  Very old
-		thinkpads are properly supported.  hotkey_bios_mask
-		is deprecated and marked for removal.
-
-0x020600:	Marker for backlight change event support.
-
-0x020700:	Support for mute-only mixers.
-		Volume control in read-only mode by default.
-		Marker for ALSA mixer support.
-
-0x030000:	Thermal and fan sysfs attributes were moved to the hwmon
-		device instead of being attached to the backing platform
-		device.
diff --git a/Documentation/laptops/toshiba_haps.rst b/Documentation/laptops/toshiba_haps.rst
new file mode 100644
index 000000000000..11dfc428c080
--- /dev/null
+++ b/Documentation/laptops/toshiba_haps.rst
@@ -0,0 +1,87 @@
+====================================
+Toshiba HDD Active Protection Sensor
+====================================
+
+Kernel driver: toshiba_haps
+
+Author: Azael Avalos <coproscefalo@gmail.com>
+
+
+.. 0. Contents
+
+   1. Description
+   2. Interface
+   3. Accelerometer axes
+   4. Supported devices
+   5. Usage
+
+
+1. Description
+--------------
+
+This driver provides support for the accelerometer found in various Toshiba
+laptops, being called "Toshiba HDD Protection - Shock Sensor" officially,
+and detects laptops automatically with this device.
+On Windows, Toshiba provided software monitors this device and provides
+automatic HDD protection (head unload) on sudden moves or harsh vibrations,
+however, this driver only provides a notification via a sysfs file to let
+userspace tools or daemons act accordingly, as well as providing a sysfs
+file to set the desired protection level or sensor sensibility.
+
+
+2. Interface
+------------
+
+This device comes with 3 methods:
+
+====	=====================================================================
+_STA    Checks existence of the device, returning Zero if the device does not
+	exists or is not supported.
+PTLV    Sets the desired protection level.
+RSSS    Shuts down the HDD protection interface for a few seconds,
+	then restores normal operation.
+====	=====================================================================
+
+Note:
+  The presence of Solid State Drives (SSD) can make this driver to fail loading,
+  given the fact that such drives have no movable parts, and thus, not requiring
+  any "protection" as well as failing during the evaluation of the _STA method
+  found under this device.
+
+
+3. Accelerometer axes
+---------------------
+
+This device does not report any axes, however, to query the sensor position
+a couple HCI (Hardware Configuration Interface) calls (0x6D and 0xA6) are
+provided to query such information, handled by the kernel module toshiba_acpi
+since kernel version 3.15.
+
+
+4. Supported devices
+--------------------
+
+This driver binds itself to the ACPI device TOS620A, and any Toshiba laptop
+with this device is supported, given the fact that they have the presence of
+conventional HDD and not only SSD, or a combination of both HDD and SSD.
+
+
+5. Usage
+--------
+
+The sysfs files under /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS620A:00/ are:
+
+================   ============================================================
+protection_level   The protection_level is readable and writeable, and
+		   provides a way to let userspace query the current protection
+		   level, as well as set the desired protection level, the
+		   available protection levels are:
+
+		   ============   =======   ==========   ========
+		   0 - Disabled   1 - Low   2 - Medium   3 - High
+		   ============   =======   ==========   ========
+
+reset_protection   The reset_protection entry is writeable only, being "1"
+		   the only parameter it accepts, it is used to trigger
+		   a reset of the protection interface.
+================   ============================================================
diff --git a/Documentation/laptops/toshiba_haps.txt b/Documentation/laptops/toshiba_haps.txt
deleted file mode 100644
index 0c1d88dedbde..000000000000
--- a/Documentation/laptops/toshiba_haps.txt
+++ /dev/null
@@ -1,76 +0,0 @@
-Kernel driver toshiba_haps
-Toshiba HDD Active Protection Sensor
-====================================
-
-Author: Azael Avalos <coproscefalo@gmail.com>
-
-
-0. Contents
------------
-
-1. Description
-2. Interface
-3. Accelerometer axes
-4. Supported devices
-5. Usage
-
-
-1. Description
---------------
-
-This driver provides support for the accelerometer found in various Toshiba
-laptops, being called "Toshiba HDD Protection - Shock Sensor" officially,
-and detects laptops automatically with this device.
-On Windows, Toshiba provided software monitors this device and provides
-automatic HDD protection (head unload) on sudden moves or harsh vibrations,
-however, this driver only provides a notification via a sysfs file to let
-userspace tools or daemons act accordingly, as well as providing a sysfs
-file to set the desired protection level or sensor sensibility.
-
-
-2. Interface
-------------
-
-This device comes with 3 methods:
-_STA -  Checks existence of the device, returning Zero if the device does not
-	exists or is not supported.
-PTLV -  Sets the desired protection level.
-RSSS -  Shuts down the HDD protection interface for a few seconds,
-	then restores normal operation.
-
-Note:
-The presence of Solid State Drives (SSD) can make this driver to fail loading,
-given the fact that such drives have no movable parts, and thus, not requiring
-any "protection" as well as failing during the evaluation of the _STA method
-found under this device.
-
-
-3. Accelerometer axes
----------------------
-
-This device does not report any axes, however, to query the sensor position
-a couple HCI (Hardware Configuration Interface) calls (0x6D and 0xA6) are
-provided to query such information, handled by the kernel module toshiba_acpi
-since kernel version 3.15.
-
-
-4. Supported devices
---------------------
-
-This driver binds itself to the ACPI device TOS620A, and any Toshiba laptop
-with this device is supported, given the fact that they have the presence of
-conventional HDD and not only SSD, or a combination of both HDD and SSD.
-
-
-5. Usage
---------
-
-The sysfs files under /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS620A:00/ are:
-protection_level - The protection_level is readable and writeable, and
-		   provides a way to let userspace query the current protection
-		   level, as well as set the desired protection level, the
-		   available protection levels are:
-		   0 - Disabled | 1 - Low | 2 - Medium | 3 - High
-reset_protection - The reset_protection entry is writeable only, being "1"
-		   the only parameter it accepts, it is used to trigger
-		   a reset of the protection interface.
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 749322060f10..c5f0d44433a2 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -102,7 +102,7 @@ Changing this takes effect whenever an application requests memory.
 block_dump
 
 block_dump enables block I/O debugging when set to a nonzero value. More
-information on block I/O debugging is in Documentation/laptops/laptop-mode.txt.
+information on block I/O debugging is in Documentation/laptops/laptop-mode.rst.
 
 ==============================================================
 
@@ -286,7 +286,7 @@ shared memory segment using hugetlb page.
 laptop_mode
 
 laptop_mode is a knob that controls "laptop mode". All the things that are
-controlled by this knob are discussed in Documentation/laptops/laptop-mode.txt.
+controlled by this knob are discussed in Documentation/laptops/laptop-mode.rst.
 
 ==============================================================
 
-- 
cgit 


From 20a78ae9ed297f217537211e3304f525326ee517 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 12:43:16 -0300
Subject: docs: namespaces: convert to ReST

Rename the namespaces documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

There are two upper case file names. Rename them to
lower case, as we're working to avoid upper case file
names at Documentation.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/namespaces/compatibility-list.rst | 43 +++++++++++++++++++++++++
 Documentation/namespaces/compatibility-list.txt | 39 ----------------------
 Documentation/namespaces/index.rst              | 11 +++++++
 Documentation/namespaces/resource-control.rst   | 18 +++++++++++
 Documentation/namespaces/resource-control.txt   | 14 --------
 5 files changed, 72 insertions(+), 53 deletions(-)
 create mode 100644 Documentation/namespaces/compatibility-list.rst
 delete mode 100644 Documentation/namespaces/compatibility-list.txt
 create mode 100644 Documentation/namespaces/index.rst
 create mode 100644 Documentation/namespaces/resource-control.rst
 delete mode 100644 Documentation/namespaces/resource-control.txt

(limited to 'Documentation')

diff --git a/Documentation/namespaces/compatibility-list.rst b/Documentation/namespaces/compatibility-list.rst
new file mode 100644
index 000000000000..318800b2a943
--- /dev/null
+++ b/Documentation/namespaces/compatibility-list.rst
@@ -0,0 +1,43 @@
+=============================
+Namespaces compatibility list
+=============================
+
+This document contains the information about the problems user
+may have when creating tasks living in different namespaces.
+
+Here's the summary. This matrix shows the known problems, that
+occur when tasks share some namespace (the columns) while living
+in different other namespaces (the rows):
+
+====	===	===	===	===	====	===
+-	UTS	IPC	VFS	PID	User	Net
+====	===	===	===	===	====	===
+UTS	 X
+IPC		 X	 1
+VFS			 X
+PID		 1	 1	 X
+User		 2	 2		 X
+Net						 X
+====	===	===	===	===	====	===
+
+1. Both the IPC and the PID namespaces provide IDs to address
+   object inside the kernel. E.g. semaphore with IPCID or
+   process group with pid.
+
+   In both cases, tasks shouldn't try exposing this ID to some
+   other task living in a different namespace via a shared filesystem
+   or IPC shmem/message. The fact is that this ID is only valid
+   within the namespace it was obtained in and may refer to some
+   other object in another namespace.
+
+2. Intentionally, two equal user IDs in different user namespaces
+   should not be equal from the VFS point of view. In other
+   words, user 10 in one user namespace shouldn't have the same
+   access permissions to files, belonging to user 10 in another
+   namespace.
+
+   The same is true for the IPC namespaces being shared - two users
+   from different user namespaces should not access the same IPC objects
+   even having equal UIDs.
+
+   But currently this is not so.
diff --git a/Documentation/namespaces/compatibility-list.txt b/Documentation/namespaces/compatibility-list.txt
deleted file mode 100644
index defc5589bfcd..000000000000
--- a/Documentation/namespaces/compatibility-list.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-	Namespaces compatibility list
-
-This document contains the information about the problems user
-may have when creating tasks living in different namespaces.
-
-Here's the summary. This matrix shows the known problems, that
-occur when tasks share some namespace (the columns) while living
-in different other namespaces (the rows):
-
-	UTS	IPC	VFS	PID	User	Net
-UTS	 X
-IPC		 X	 1
-VFS			 X
-PID		 1	 1	 X
-User		 2	 2		 X
-Net						 X
-
-1. Both the IPC and the PID namespaces provide IDs to address
-   object inside the kernel. E.g. semaphore with IPCID or
-   process group with pid.
-
-   In both cases, tasks shouldn't try exposing this ID to some
-   other task living in a different namespace via a shared filesystem
-   or IPC shmem/message. The fact is that this ID is only valid
-   within the namespace it was obtained in and may refer to some
-   other object in another namespace.
-
-2. Intentionally, two equal user IDs in different user namespaces
-   should not be equal from the VFS point of view. In other
-   words, user 10 in one user namespace shouldn't have the same
-   access permissions to files, belonging to user 10 in another
-   namespace.
-
-   The same is true for the IPC namespaces being shared - two users
-   from different user namespaces should not access the same IPC objects
-   even having equal UIDs.
-
-   But currently this is not so.
-
diff --git a/Documentation/namespaces/index.rst b/Documentation/namespaces/index.rst
new file mode 100644
index 000000000000..bf40625dd11a
--- /dev/null
+++ b/Documentation/namespaces/index.rst
@@ -0,0 +1,11 @@
+:orphan:
+
+==========
+Namespaces
+==========
+
+.. toctree::
+   :maxdepth: 1
+
+   compatibility-list
+   resource-control
diff --git a/Documentation/namespaces/resource-control.rst b/Documentation/namespaces/resource-control.rst
new file mode 100644
index 000000000000..369556e00f0c
--- /dev/null
+++ b/Documentation/namespaces/resource-control.rst
@@ -0,0 +1,18 @@
+===========================
+Namespaces research control
+===========================
+
+There are a lot of kinds of objects in the kernel that don't have
+individual limits or that have limits that are ineffective when a set
+of processes is allowed to switch user ids.  With user namespaces
+enabled in a kernel for people who don't trust their users or their
+users programs to play nice this problems becomes more acute.
+
+Therefore it is recommended that memory control groups be enabled in
+kernels that enable user namespaces, and it is further recommended
+that userspace configure memory control groups to limit how much
+memory user's they don't trust to play nice can use.
+
+Memory control groups can be configured by installing the libcgroup
+package present on most distros editing /etc/cgrules.conf,
+/etc/cgconfig.conf and setting up libpam-cgroup.
diff --git a/Documentation/namespaces/resource-control.txt b/Documentation/namespaces/resource-control.txt
deleted file mode 100644
index abc13c394738..000000000000
--- a/Documentation/namespaces/resource-control.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-There are a lot of kinds of objects in the kernel that don't have
-individual limits or that have limits that are ineffective when a set
-of processes is allowed to switch user ids.  With user namespaces
-enabled in a kernel for people who don't trust their users or their
-users programs to play nice this problems becomes more acute.
-
-Therefore it is recommended that memory control groups be enabled in
-kernels that enable user namespaces, and it is further recommended
-that userspace configure memory control groups to limit how much
-memory user's they don't trust to play nice can use.
-
-Memory control groups can be configured by installing the libcgroup
-package present on most distros editing /etc/cgrules.conf,
-/etc/cgconfig.conf and setting up libpam-cgroup.
-- 
cgit 


From 9e678dd886c11fad6511ffad4d400e3abde81d64 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 13:02:23 -0300
Subject: docs: nfc: convert to ReST

Rename the nfc documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/nfc/index.rst     |  11 ++
 Documentation/nfc/nfc-hci.rst   | 311 ++++++++++++++++++++++++++++++++++++++++
 Documentation/nfc/nfc-hci.txt   | 290 -------------------------------------
 Documentation/nfc/nfc-pn544.rst |  34 +++++
 Documentation/nfc/nfc-pn544.txt |  32 -----
 5 files changed, 356 insertions(+), 322 deletions(-)
 create mode 100644 Documentation/nfc/index.rst
 create mode 100644 Documentation/nfc/nfc-hci.rst
 delete mode 100644 Documentation/nfc/nfc-hci.txt
 create mode 100644 Documentation/nfc/nfc-pn544.rst
 delete mode 100644 Documentation/nfc/nfc-pn544.txt

(limited to 'Documentation')

diff --git a/Documentation/nfc/index.rst b/Documentation/nfc/index.rst
new file mode 100644
index 000000000000..4f4947fce80d
--- /dev/null
+++ b/Documentation/nfc/index.rst
@@ -0,0 +1,11 @@
+:orphan:
+
+========================
+Near Field Communication
+========================
+
+.. toctree::
+   :maxdepth: 1
+
+   nfc-hci
+   nfc-pn544
diff --git a/Documentation/nfc/nfc-hci.rst b/Documentation/nfc/nfc-hci.rst
new file mode 100644
index 000000000000..eb8a1a14e919
--- /dev/null
+++ b/Documentation/nfc/nfc-hci.rst
@@ -0,0 +1,311 @@
+========================
+HCI backend for NFC Core
+========================
+
+- Author: Eric Lapuyade, Samuel Ortiz
+- Contact: eric.lapuyade@intel.com, samuel.ortiz@intel.com
+
+General
+-------
+
+The HCI layer implements much of the ETSI TS 102 622 V10.2.0 specification. It
+enables easy writing of HCI-based NFC drivers. The HCI layer runs as an NFC Core
+backend, implementing an abstract nfc device and translating NFC Core API
+to HCI commands and events.
+
+HCI
+---
+
+HCI registers as an nfc device with NFC Core. Requests coming from userspace are
+routed through netlink sockets to NFC Core and then to HCI. From this point,
+they are translated in a sequence of HCI commands sent to the HCI layer in the
+host controller (the chip). Commands can be executed synchronously (the sending
+context blocks waiting for response) or asynchronously (the response is returned
+from HCI Rx context).
+HCI events can also be received from the host controller. They will be handled
+and a translation will be forwarded to NFC Core as needed. There are hooks to
+let the HCI driver handle proprietary events or override standard behavior.
+HCI uses 2 execution contexts:
+
+- one for executing commands : nfc_hci_msg_tx_work(). Only one command
+  can be executing at any given moment.
+- one for dispatching received events and commands : nfc_hci_msg_rx_work().
+
+HCI Session initialization
+--------------------------
+
+The Session initialization is an HCI standard which must unfortunately
+support proprietary gates. This is the reason why the driver will pass a list
+of proprietary gates that must be part of the session. HCI will ensure all
+those gates have pipes connected when the hci device is set up.
+In case the chip supports pre-opened gates and pseudo-static pipes, the driver
+can pass that information to HCI core.
+
+HCI Gates and Pipes
+-------------------
+
+A gate defines the 'port' where some service can be found. In order to access
+a service, one must create a pipe to that gate and open it. In this
+implementation, pipes are totally hidden. The public API only knows gates.
+This is consistent with the driver need to send commands to proprietary gates
+without knowing the pipe connected to it.
+
+Driver interface
+----------------
+
+A driver is generally written in two parts : the physical link management and
+the HCI management. This makes it easier to maintain a driver for a chip that
+can be connected using various phy (i2c, spi, ...)
+
+HCI Management
+--------------
+
+A driver would normally register itself with HCI and provide the following
+entry points::
+
+  struct nfc_hci_ops {
+	int (*open)(struct nfc_hci_dev *hdev);
+	void (*close)(struct nfc_hci_dev *hdev);
+	int (*hci_ready) (struct nfc_hci_dev *hdev);
+	int (*xmit) (struct nfc_hci_dev *hdev, struct sk_buff *skb);
+	int (*start_poll) (struct nfc_hci_dev *hdev,
+			   u32 im_protocols, u32 tm_protocols);
+	int (*dep_link_up)(struct nfc_hci_dev *hdev, struct nfc_target *target,
+			   u8 comm_mode, u8 *gb, size_t gb_len);
+	int (*dep_link_down)(struct nfc_hci_dev *hdev);
+	int (*target_from_gate) (struct nfc_hci_dev *hdev, u8 gate,
+				 struct nfc_target *target);
+	int (*complete_target_discovered) (struct nfc_hci_dev *hdev, u8 gate,
+					   struct nfc_target *target);
+	int (*im_transceive) (struct nfc_hci_dev *hdev,
+			      struct nfc_target *target, struct sk_buff *skb,
+			      data_exchange_cb_t cb, void *cb_context);
+	int (*tm_send)(struct nfc_hci_dev *hdev, struct sk_buff *skb);
+	int (*check_presence)(struct nfc_hci_dev *hdev,
+			      struct nfc_target *target);
+	int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event,
+			      struct sk_buff *skb);
+  };
+
+- open() and close() shall turn the hardware on and off.
+- hci_ready() is an optional entry point that is called right after the hci
+  session has been set up. The driver can use it to do additional initialization
+  that must be performed using HCI commands.
+- xmit() shall simply write a frame to the physical link.
+- start_poll() is an optional entrypoint that shall set the hardware in polling
+  mode. This must be implemented only if the hardware uses proprietary gates or a
+  mechanism slightly different from the HCI standard.
+- dep_link_up() is called after a p2p target has been detected, to finish
+  the p2p connection setup with hardware parameters that need to be passed back
+  to nfc core.
+- dep_link_down() is called to bring the p2p link down.
+- target_from_gate() is an optional entrypoint to return the nfc protocols
+  corresponding to a proprietary gate.
+- complete_target_discovered() is an optional entry point to let the driver
+  perform additional proprietary processing necessary to auto activate the
+  discovered target.
+- im_transceive() must be implemented by the driver if proprietary HCI commands
+  are required to send data to the tag. Some tag types will require custom
+  commands, others can be written to using the standard HCI commands. The driver
+  can check the tag type and either do proprietary processing, or return 1 to ask
+  for standard processing. The data exchange command itself must be sent
+  asynchronously.
+- tm_send() is called to send data in the case of a p2p connection
+- check_presence() is an optional entry point that will be called regularly
+  by the core to check that an activated tag is still in the field. If this is
+  not implemented, the core will not be able to push tag_lost events to the user
+  space
+- event_received() is called to handle an event coming from the chip. Driver
+  can handle the event or return 1 to let HCI attempt standard processing.
+
+On the rx path, the driver is responsible to push incoming HCP frames to HCI
+using nfc_hci_recv_frame(). HCI will take care of re-aggregation and handling
+This must be done from a context that can sleep.
+
+PHY Management
+--------------
+
+The physical link (i2c, ...) management is defined by the following structure::
+
+  struct nfc_phy_ops {
+	int (*write)(void *dev_id, struct sk_buff *skb);
+	int (*enable)(void *dev_id);
+	void (*disable)(void *dev_id);
+  };
+
+enable():
+	turn the phy on (power on), make it ready to transfer data
+disable():
+	turn the phy off
+write():
+	Send a data frame to the chip. Note that to enable higher
+	layers such as an llc to store the frame for re-emission, this
+	function must not alter the skb. It must also not return a positive
+	result (return 0 for success, negative for failure).
+
+Data coming from the chip shall be sent directly to nfc_hci_recv_frame().
+
+LLC
+---
+
+Communication between the CPU and the chip often requires some link layer
+protocol. Those are isolated as modules managed by the HCI layer. There are
+currently two modules : nop (raw transfert) and shdlc.
+A new llc must implement the following functions::
+
+  struct nfc_llc_ops {
+	void *(*init) (struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
+		       rcv_to_hci_t rcv_to_hci, int tx_headroom,
+		       int tx_tailroom, int *rx_headroom, int *rx_tailroom,
+		       llc_failure_t llc_failure);
+	void (*deinit) (struct nfc_llc *llc);
+	int (*start) (struct nfc_llc *llc);
+	int (*stop) (struct nfc_llc *llc);
+	void (*rcv_from_drv) (struct nfc_llc *llc, struct sk_buff *skb);
+	int (*xmit_from_hci) (struct nfc_llc *llc, struct sk_buff *skb);
+  };
+
+init():
+	allocate and init your private storage
+deinit():
+	cleanup
+start():
+	establish the logical connection
+stop ():
+	terminate the logical connection
+rcv_from_drv():
+	handle data coming from the chip, going to HCI
+xmit_from_hci():
+	handle data sent by HCI, going to the chip
+
+The llc must be registered with nfc before it can be used. Do that by
+calling::
+
+	nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
+
+Again, note that the llc does not handle the physical link. It is thus very
+easy to mix any physical link with any llc for a given chip driver.
+
+Included Drivers
+----------------
+
+An HCI based driver for an NXP PN544, connected through I2C bus, and using
+shdlc is included.
+
+Execution Contexts
+------------------
+
+The execution contexts are the following:
+- IRQ handler (IRQH):
+fast, cannot sleep. sends incoming frames to HCI where they are passed to
+the current llc. In case of shdlc, the frame is queued in shdlc rx queue.
+
+- SHDLC State Machine worker (SMW)
+
+  Only when llc_shdlc is used: handles shdlc rx & tx queues.
+
+  Dispatches HCI cmd responses.
+
+- HCI Tx Cmd worker (MSGTXWQ)
+
+  Serializes execution of HCI commands.
+
+  Completes execution in case of response timeout.
+
+- HCI Rx worker (MSGRXWQ)
+
+  Dispatches incoming HCI commands or events.
+
+- Syscall context from a userspace call (SYSCALL)
+
+  Any entrypoint in HCI called from NFC Core
+
+Workflow executing an HCI command (using shdlc)
+-----------------------------------------------
+
+Executing an HCI command can easily be performed synchronously using the
+following API::
+
+  int nfc_hci_send_cmd (struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
+			const u8 *param, size_t param_len, struct sk_buff **skb)
+
+The API must be invoked from a context that can sleep. Most of the time, this
+will be the syscall context. skb will return the result that was received in
+the response.
+
+Internally, execution is asynchronous. So all this API does is to enqueue the
+HCI command, setup a local wait queue on stack, and wait_event() for completion.
+The wait is not interruptible because it is guaranteed that the command will
+complete after some short timeout anyway.
+
+MSGTXWQ context will then be scheduled and invoke nfc_hci_msg_tx_work().
+This function will dequeue the next pending command and send its HCP fragments
+to the lower layer which happens to be shdlc. It will then start a timer to be
+able to complete the command with a timeout error if no response arrive.
+
+SMW context gets scheduled and invokes nfc_shdlc_sm_work(). This function
+handles shdlc framing in and out. It uses the driver xmit to send frames and
+receives incoming frames in an skb queue filled from the driver IRQ handler.
+SHDLC I(nformation) frames payload are HCP fragments. They are aggregated to
+form complete HCI frames, which can be a response, command, or event.
+
+HCI Responses are dispatched immediately from this context to unblock
+waiting command execution. Response processing involves invoking the completion
+callback that was provided by nfc_hci_msg_tx_work() when it sent the command.
+The completion callback will then wake the syscall context.
+
+It is also possible to execute the command asynchronously using this API::
+
+  static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
+				       const u8 *param, size_t param_len,
+				       data_exchange_cb_t cb, void *cb_context)
+
+The workflow is the same, except that the API call returns immediately, and
+the callback will be called with the result from the SMW context.
+
+Workflow receiving an HCI event or command
+------------------------------------------
+
+HCI commands or events are not dispatched from SMW context. Instead, they are
+queued to HCI rx_queue and will be dispatched from HCI rx worker
+context (MSGRXWQ). This is done this way to allow a cmd or event handler
+to also execute other commands (for example, handling the
+NFC_HCI_EVT_TARGET_DISCOVERED event from PN544 requires to issue an
+ANY_GET_PARAMETER to the reader A gate to get information on the target
+that was discovered).
+
+Typically, such an event will be propagated to NFC Core from MSGRXWQ context.
+
+Error management
+----------------
+
+Errors that occur synchronously with the execution of an NFC Core request are
+simply returned as the execution result of the request. These are easy.
+
+Errors that occur asynchronously (e.g. in a background protocol handling thread)
+must be reported such that upper layers don't stay ignorant that something
+went wrong below and know that expected events will probably never happen.
+Handling of these errors is done as follows:
+
+- driver (pn544) fails to deliver an incoming frame: it stores the error such
+  that any subsequent call to the driver will result in this error. Then it
+  calls the standard nfc_shdlc_recv_frame() with a NULL argument to report the
+  problem above. shdlc stores a EREMOTEIO sticky status, which will trigger
+  SMW to report above in turn.
+
+- SMW is basically a background thread to handle incoming and outgoing shdlc
+  frames. This thread will also check the shdlc sticky status and report to HCI
+  when it discovers it is not able to run anymore because of an unrecoverable
+  error that happened within shdlc or below. If the problem occurs during shdlc
+  connection, the error is reported through the connect completion.
+
+- HCI: if an internal HCI error happens (frame is lost), or HCI is reported an
+  error from a lower layer, HCI will either complete the currently executing
+  command with that error, or notify NFC Core directly if no command is
+  executing.
+
+- NFC Core: when NFC Core is notified of an error from below and polling is
+  active, it will send a tag discovered event with an empty tag list to the user
+  space to let it know that the poll operation will never be able to detect a
+  tag. If polling is not active and the error was sticky, lower levels will
+  return it at next invocation.
diff --git a/Documentation/nfc/nfc-hci.txt b/Documentation/nfc/nfc-hci.txt
deleted file mode 100644
index 0dc078cab972..000000000000
--- a/Documentation/nfc/nfc-hci.txt
+++ /dev/null
@@ -1,290 +0,0 @@
-HCI backend for NFC Core
-
-Author: Eric Lapuyade, Samuel Ortiz
-Contact: eric.lapuyade@intel.com, samuel.ortiz@intel.com
-
-General
--------
-
-The HCI layer implements much of the ETSI TS 102 622 V10.2.0 specification. It
-enables easy writing of HCI-based NFC drivers. The HCI layer runs as an NFC Core
-backend, implementing an abstract nfc device and translating NFC Core API
-to HCI commands and events.
-
-HCI
----
-
-HCI registers as an nfc device with NFC Core. Requests coming from userspace are
-routed through netlink sockets to NFC Core and then to HCI. From this point,
-they are translated in a sequence of HCI commands sent to the HCI layer in the
-host controller (the chip). Commands can be executed synchronously (the sending
-context blocks waiting for response) or asynchronously (the response is returned
-from HCI Rx context).
-HCI events can also be received from the host controller. They will be handled
-and a translation will be forwarded to NFC Core as needed. There are hooks to
-let the HCI driver handle proprietary events or override standard behavior.
-HCI uses 2 execution contexts:
-- one for executing commands : nfc_hci_msg_tx_work(). Only one command
-can be executing at any given moment.
-- one for dispatching received events and commands : nfc_hci_msg_rx_work().
-
-HCI Session initialization:
----------------------------
-
-The Session initialization is an HCI standard which must unfortunately
-support proprietary gates. This is the reason why the driver will pass a list
-of proprietary gates that must be part of the session. HCI will ensure all
-those gates have pipes connected when the hci device is set up.
-In case the chip supports pre-opened gates and pseudo-static pipes, the driver
-can pass that information to HCI core.
-
-HCI Gates and Pipes
--------------------
-
-A gate defines the 'port' where some service can be found. In order to access
-a service, one must create a pipe to that gate and open it. In this
-implementation, pipes are totally hidden. The public API only knows gates.
-This is consistent with the driver need to send commands to proprietary gates
-without knowing the pipe connected to it.
-
-Driver interface
-----------------
-
-A driver is generally written in two parts : the physical link management and
-the HCI management. This makes it easier to maintain a driver for a chip that
-can be connected using various phy (i2c, spi, ...)
-
-HCI Management
---------------
-
-A driver would normally register itself with HCI and provide the following
-entry points:
-
-struct nfc_hci_ops {
-	int (*open)(struct nfc_hci_dev *hdev);
-	void (*close)(struct nfc_hci_dev *hdev);
-	int (*hci_ready) (struct nfc_hci_dev *hdev);
-	int (*xmit) (struct nfc_hci_dev *hdev, struct sk_buff *skb);
-	int (*start_poll) (struct nfc_hci_dev *hdev,
-			   u32 im_protocols, u32 tm_protocols);
-	int (*dep_link_up)(struct nfc_hci_dev *hdev, struct nfc_target *target,
-			   u8 comm_mode, u8 *gb, size_t gb_len);
-	int (*dep_link_down)(struct nfc_hci_dev *hdev);
-	int (*target_from_gate) (struct nfc_hci_dev *hdev, u8 gate,
-				 struct nfc_target *target);
-	int (*complete_target_discovered) (struct nfc_hci_dev *hdev, u8 gate,
-					   struct nfc_target *target);
-	int (*im_transceive) (struct nfc_hci_dev *hdev,
-			      struct nfc_target *target, struct sk_buff *skb,
-			      data_exchange_cb_t cb, void *cb_context);
-	int (*tm_send)(struct nfc_hci_dev *hdev, struct sk_buff *skb);
-	int (*check_presence)(struct nfc_hci_dev *hdev,
-			      struct nfc_target *target);
-	int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event,
-			      struct sk_buff *skb);
-};
-
-- open() and close() shall turn the hardware on and off.
-- hci_ready() is an optional entry point that is called right after the hci
-session has been set up. The driver can use it to do additional initialization
-that must be performed using HCI commands.
-- xmit() shall simply write a frame to the physical link.
-- start_poll() is an optional entrypoint that shall set the hardware in polling
-mode. This must be implemented only if the hardware uses proprietary gates or a
-mechanism slightly different from the HCI standard.
-- dep_link_up() is called after a p2p target has been detected, to finish
-the p2p connection setup with hardware parameters that need to be passed back
-to nfc core.
-- dep_link_down() is called to bring the p2p link down.
-- target_from_gate() is an optional entrypoint to return the nfc protocols
-corresponding to a proprietary gate.
-- complete_target_discovered() is an optional entry point to let the driver
-perform additional proprietary processing necessary to auto activate the
-discovered target.
-- im_transceive() must be implemented by the driver if proprietary HCI commands
-are required to send data to the tag. Some tag types will require custom
-commands, others can be written to using the standard HCI commands. The driver
-can check the tag type and either do proprietary processing, or return 1 to ask
-for standard processing. The data exchange command itself must be sent
-asynchronously.
-- tm_send() is called to send data in the case of a p2p connection
-- check_presence() is an optional entry point that will be called regularly
-by the core to check that an activated tag is still in the field. If this is
-not implemented, the core will not be able to push tag_lost events to the user
-space
-- event_received() is called to handle an event coming from the chip. Driver
-can handle the event or return 1 to let HCI attempt standard processing.
-
-On the rx path, the driver is responsible to push incoming HCP frames to HCI
-using nfc_hci_recv_frame(). HCI will take care of re-aggregation and handling
-This must be done from a context that can sleep.
-
-PHY Management
---------------
-
-The physical link (i2c, ...) management is defined by the following structure:
-
-struct nfc_phy_ops {
-	int (*write)(void *dev_id, struct sk_buff *skb);
-	int (*enable)(void *dev_id);
-	void (*disable)(void *dev_id);
-};
-
-enable(): turn the phy on (power on), make it ready to transfer data
-disable(): turn the phy off
-write(): Send a data frame to the chip. Note that to enable higher
-layers such as an llc to store the frame for re-emission, this function must
-not alter the skb. It must also not return a positive result (return 0 for
-success, negative for failure).
-
-Data coming from the chip shall be sent directly to nfc_hci_recv_frame().
-
-LLC
----
-
-Communication between the CPU and the chip often requires some link layer
-protocol. Those are isolated as modules managed by the HCI layer. There are
-currently two modules : nop (raw transfert) and shdlc.
-A new llc must implement the following functions:
-
-struct nfc_llc_ops {
-	void *(*init) (struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
-		       rcv_to_hci_t rcv_to_hci, int tx_headroom,
-		       int tx_tailroom, int *rx_headroom, int *rx_tailroom,
-		       llc_failure_t llc_failure);
-	void (*deinit) (struct nfc_llc *llc);
-	int (*start) (struct nfc_llc *llc);
-	int (*stop) (struct nfc_llc *llc);
-	void (*rcv_from_drv) (struct nfc_llc *llc, struct sk_buff *skb);
-	int (*xmit_from_hci) (struct nfc_llc *llc, struct sk_buff *skb);
-};
-
-- init() : allocate and init your private storage
-- deinit() : cleanup
-- start() : establish the logical connection
-- stop () : terminate the logical connection
-- rcv_from_drv() : handle data coming from the chip, going to HCI
-- xmit_from_hci() : handle data sent by HCI, going to the chip
-
-The llc must be registered with nfc before it can be used. Do that by
-calling nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
-
-Again, note that the llc does not handle the physical link. It is thus very
-easy to mix any physical link with any llc for a given chip driver.
-
-Included Drivers
-----------------
-
-An HCI based driver for an NXP PN544, connected through I2C bus, and using
-shdlc is included.
-
-Execution Contexts
-------------------
-
-The execution contexts are the following:
-- IRQ handler (IRQH):
-fast, cannot sleep. sends incoming frames to HCI where they are passed to
-the current llc. In case of shdlc, the frame is queued in shdlc rx queue.
-
-- SHDLC State Machine worker (SMW)
-Only when llc_shdlc is used: handles shdlc rx & tx queues.
-Dispatches HCI cmd responses.
-
-- HCI Tx Cmd worker (MSGTXWQ)
-Serializes execution of HCI commands. Completes execution in case of response
-timeout.
-
-- HCI Rx worker (MSGRXWQ)
-Dispatches incoming HCI commands or events.
-
-- Syscall context from a userspace call (SYSCALL)
-Any entrypoint in HCI called from NFC Core
-
-Workflow executing an HCI command (using shdlc)
------------------------------------------------
-
-Executing an HCI command can easily be performed synchronously using the
-following API:
-
-int nfc_hci_send_cmd (struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
-			const u8 *param, size_t param_len, struct sk_buff **skb)
-
-The API must be invoked from a context that can sleep. Most of the time, this
-will be the syscall context. skb will return the result that was received in
-the response.
-
-Internally, execution is asynchronous. So all this API does is to enqueue the
-HCI command, setup a local wait queue on stack, and wait_event() for completion.
-The wait is not interruptible because it is guaranteed that the command will
-complete after some short timeout anyway.
-
-MSGTXWQ context will then be scheduled and invoke nfc_hci_msg_tx_work().
-This function will dequeue the next pending command and send its HCP fragments
-to the lower layer which happens to be shdlc. It will then start a timer to be
-able to complete the command with a timeout error if no response arrive.
-
-SMW context gets scheduled and invokes nfc_shdlc_sm_work(). This function
-handles shdlc framing in and out. It uses the driver xmit to send frames and
-receives incoming frames in an skb queue filled from the driver IRQ handler.
-SHDLC I(nformation) frames payload are HCP fragments. They are aggregated to
-form complete HCI frames, which can be a response, command, or event.
-
-HCI Responses are dispatched immediately from this context to unblock
-waiting command execution. Response processing involves invoking the completion
-callback that was provided by nfc_hci_msg_tx_work() when it sent the command.
-The completion callback will then wake the syscall context.
-
-It is also possible to execute the command asynchronously using this API:
-
-static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
-			       const u8 *param, size_t param_len,
-			       data_exchange_cb_t cb, void *cb_context)
-
-The workflow is the same, except that the API call returns immediately, and
-the callback will be called with the result from the SMW context.
-
-Workflow receiving an HCI event or command
-------------------------------------------
-
-HCI commands or events are not dispatched from SMW context. Instead, they are
-queued to HCI rx_queue and will be dispatched from HCI rx worker
-context (MSGRXWQ). This is done this way to allow a cmd or event handler
-to also execute other commands (for example, handling the
-NFC_HCI_EVT_TARGET_DISCOVERED event from PN544 requires to issue an
-ANY_GET_PARAMETER to the reader A gate to get information on the target
-that was discovered).
-
-Typically, such an event will be propagated to NFC Core from MSGRXWQ context.
-
-Error management
-----------------
-
-Errors that occur synchronously with the execution of an NFC Core request are
-simply returned as the execution result of the request. These are easy.
-
-Errors that occur asynchronously (e.g. in a background protocol handling thread)
-must be reported such that upper layers don't stay ignorant that something
-went wrong below and know that expected events will probably never happen.
-Handling of these errors is done as follows:
-
-- driver (pn544) fails to deliver an incoming frame: it stores the error such
-that any subsequent call to the driver will result in this error. Then it calls
-the standard nfc_shdlc_recv_frame() with a NULL argument to report the problem
-above. shdlc stores a EREMOTEIO sticky status, which will trigger SMW to
-report above in turn.
-
-- SMW is basically a background thread to handle incoming and outgoing shdlc
-frames. This thread will also check the shdlc sticky status and report to HCI
-when it discovers it is not able to run anymore because of an unrecoverable
-error that happened within shdlc or below. If the problem occurs during shdlc
-connection, the error is reported through the connect completion.
-
-- HCI: if an internal HCI error happens (frame is lost), or HCI is reported an
-error from a lower layer, HCI will either complete the currently executing
-command with that error, or notify NFC Core directly if no command is executing.
-
-- NFC Core: when NFC Core is notified of an error from below and polling is
-active, it will send a tag discovered event with an empty tag list to the user
-space to let it know that the poll operation will never be able to detect a tag.
-If polling is not active and the error was sticky, lower levels will return it
-at next invocation.
diff --git a/Documentation/nfc/nfc-pn544.rst b/Documentation/nfc/nfc-pn544.rst
new file mode 100644
index 000000000000..6b2d8aae0c4e
--- /dev/null
+++ b/Documentation/nfc/nfc-pn544.rst
@@ -0,0 +1,34 @@
+============================================================================
+Kernel driver for the NXP Semiconductors PN544 Near Field Communication chip
+============================================================================
+
+
+General
+-------
+
+The PN544 is an integrated transmission module for contactless
+communication. The driver goes under drives/nfc/ and is compiled as a
+module named "pn544".
+
+Host Interfaces: I2C, SPI and HSU, this driver supports currently only I2C.
+
+Protocols
+---------
+
+In the normal (HCI) mode and in the firmware update mode read and
+write functions behave a bit differently because the message formats
+or the protocols are different.
+
+In the normal (HCI) mode the protocol used is derived from the ETSI
+HCI specification. The firmware is updated using a specific protocol,
+which is different from HCI.
+
+HCI messages consist of an eight bit header and the message body. The
+header contains the message length. Maximum size for an HCI message is
+33. In HCI mode sent messages are tested for a correct
+checksum. Firmware update messages have the length in the second (MSB)
+and third (LSB) bytes of the message. The maximum FW message length is
+1024 bytes.
+
+For the ETSI HCI specification see
+http://www.etsi.org/WebSite/Technologies/ProtocolSpecification.aspx
diff --git a/Documentation/nfc/nfc-pn544.txt b/Documentation/nfc/nfc-pn544.txt
deleted file mode 100644
index b36ca14ca2d6..000000000000
--- a/Documentation/nfc/nfc-pn544.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Kernel driver for the NXP Semiconductors PN544 Near Field
-Communication chip
-
-General
--------
-
-The PN544 is an integrated transmission module for contactless
-communication. The driver goes under drives/nfc/ and is compiled as a
-module named "pn544".
-
-Host Interfaces: I2C, SPI and HSU, this driver supports currently only I2C.
-
-Protocols
----------
-
-In the normal (HCI) mode and in the firmware update mode read and
-write functions behave a bit differently because the message formats
-or the protocols are different.
-
-In the normal (HCI) mode the protocol used is derived from the ETSI
-HCI specification. The firmware is updated using a specific protocol,
-which is different from HCI.
-
-HCI messages consist of an eight bit header and the message body. The
-header contains the message length. Maximum size for an HCI message is
-33. In HCI mode sent messages are tested for a correct
-checksum. Firmware update messages have the length in the second (MSB)
-and third (LSB) bytes of the message. The maximum FW message length is
-1024 bytes.
-
-For the ETSI HCI specification see
-http://www.etsi.org/WebSite/Technologies/ProtocolSpecification.aspx
-- 
cgit 


From 7ed44d59f1959942b8d882e6eeea51616b72e2ec Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 13:31:33 -0300
Subject: docs: md: convert to ReST

Rename the md documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/md/index.rst       |  12 ++
 Documentation/md/md-cluster.rst  | 385 +++++++++++++++++++++++++++++++++++++++
 Documentation/md/md-cluster.txt  | 325 ---------------------------------
 Documentation/md/raid5-cache.rst | 111 +++++++++++
 Documentation/md/raid5-cache.txt | 109 -----------
 Documentation/md/raid5-ppl.rst   |  47 +++++
 Documentation/md/raid5-ppl.txt   |  45 -----
 7 files changed, 555 insertions(+), 479 deletions(-)
 create mode 100644 Documentation/md/index.rst
 create mode 100644 Documentation/md/md-cluster.rst
 delete mode 100644 Documentation/md/md-cluster.txt
 create mode 100644 Documentation/md/raid5-cache.rst
 delete mode 100644 Documentation/md/raid5-cache.txt
 create mode 100644 Documentation/md/raid5-ppl.rst
 delete mode 100644 Documentation/md/raid5-ppl.txt

(limited to 'Documentation')

diff --git a/Documentation/md/index.rst b/Documentation/md/index.rst
new file mode 100644
index 000000000000..c4db34ed327d
--- /dev/null
+++ b/Documentation/md/index.rst
@@ -0,0 +1,12 @@
+:orphan:
+
+====
+RAID
+====
+
+.. toctree::
+   :maxdepth: 1
+
+   md-cluster
+   raid5-cache
+   raid5-ppl
diff --git a/Documentation/md/md-cluster.rst b/Documentation/md/md-cluster.rst
new file mode 100644
index 000000000000..96eb52cec7eb
--- /dev/null
+++ b/Documentation/md/md-cluster.rst
@@ -0,0 +1,385 @@
+==========
+MD Cluster
+==========
+
+The cluster MD is a shared-device RAID for a cluster, it supports
+two levels: raid1 and raid10 (limited support).
+
+
+1. On-disk format
+=================
+
+Separate write-intent-bitmaps are used for each cluster node.
+The bitmaps record all writes that may have been started on that node,
+and may not yet have finished. The on-disk layout is::
+
+  0                    4k                     8k                    12k
+  -------------------------------------------------------------------
+  | idle                | md super            | bm super [0] + bits |
+  | bm bits[0, contd]   | bm super[1] + bits  | bm bits[1, contd]   |
+  | bm super[2] + bits  | bm bits [2, contd]  | bm super[3] + bits  |
+  | bm bits [3, contd]  |                     |                     |
+
+During "normal" functioning we assume the filesystem ensures that only
+one node writes to any given block at a time, so a write request will
+
+ - set the appropriate bit (if not already set)
+ - commit the write to all mirrors
+ - schedule the bit to be cleared after a timeout.
+
+Reads are just handled normally. It is up to the filesystem to ensure
+one node doesn't read from a location where another node (or the same
+node) is writing.
+
+
+2. DLM Locks for management
+===========================
+
+There are three groups of locks for managing the device:
+
+2.1 Bitmap lock resource (bm_lockres)
+-------------------------------------
+
+ The bm_lockres protects individual node bitmaps. They are named in
+ the form bitmap000 for node 1, bitmap001 for node 2 and so on. When a
+ node joins the cluster, it acquires the lock in PW mode and it stays
+ so during the lifetime the node is part of the cluster. The lock
+ resource number is based on the slot number returned by the DLM
+ subsystem. Since DLM starts node count from one and bitmap slots
+ start from zero, one is subtracted from the DLM slot number to arrive
+ at the bitmap slot number.
+
+ The LVB of the bitmap lock for a particular node records the range
+ of sectors that are being re-synced by that node.  No other
+ node may write to those sectors.  This is used when a new nodes
+ joins the cluster.
+
+2.2 Message passing locks
+-------------------------
+
+ Each node has to communicate with other nodes when starting or ending
+ resync, and for metadata superblock updates.  This communication is
+ managed through three locks: "token", "message", and "ack", together
+ with the Lock Value Block (LVB) of one of the "message" lock.
+
+2.3 new-device management
+-------------------------
+
+ A single lock: "no-new-dev" is used to co-ordinate the addition of
+ new devices - this must be synchronized across the array.
+ Normally all nodes hold a concurrent-read lock on this device.
+
+3. Communication
+================
+
+ Messages can be broadcast to all nodes, and the sender waits for all
+ other nodes to acknowledge the message before proceeding.  Only one
+ message can be processed at a time.
+
+3.1 Message Types
+-----------------
+
+ There are six types of messages which are passed:
+
+3.1.1 METADATA_UPDATED
+^^^^^^^^^^^^^^^^^^^^^^
+
+   informs other nodes that the metadata has
+   been updated, and the node must re-read the md superblock. This is
+   performed synchronously. It is primarily used to signal device
+   failure.
+
+3.1.2 RESYNCING
+^^^^^^^^^^^^^^^
+   informs other nodes that a resync is initiated or
+   ended so that each node may suspend or resume the region.  Each
+   RESYNCING message identifies a range of the devices that the
+   sending node is about to resync. This overrides any previous
+   notification from that node: only one ranged can be resynced at a
+   time per-node.
+
+3.1.3 NEWDISK
+^^^^^^^^^^^^^
+
+   informs other nodes that a device is being added to
+   the array. Message contains an identifier for that device.  See
+   below for further details.
+
+3.1.4 REMOVE
+^^^^^^^^^^^^
+
+   A failed or spare device is being removed from the
+   array. The slot-number of the device is included in the message.
+
+ 3.1.5 RE_ADD:
+
+   A failed device is being re-activated - the assumption
+   is that it has been determined to be working again.
+
+ 3.1.6 BITMAP_NEEDS_SYNC:
+
+   If a node is stopped locally but the bitmap
+   isn't clean, then another node is informed to take the ownership of
+   resync.
+
+3.2 Communication mechanism
+---------------------------
+
+ The DLM LVB is used to communicate within nodes of the cluster. There
+ are three resources used for the purpose:
+
+3.2.1 token
+^^^^^^^^^^^
+   The resource which protects the entire communication
+   system. The node having the token resource is allowed to
+   communicate.
+
+3.2.2 message
+^^^^^^^^^^^^^
+   The lock resource which carries the data to communicate.
+
+3.2.3 ack
+^^^^^^^^^
+
+   The resource, acquiring which means the message has been
+   acknowledged by all nodes in the cluster. The BAST of the resource
+   is used to inform the receiving node that a node wants to
+   communicate.
+
+The algorithm is:
+
+ 1. receive status - all nodes have concurrent-reader lock on "ack"::
+
+	sender                         receiver                 receiver
+	"ack":CR                       "ack":CR                 "ack":CR
+
+ 2. sender get EX on "token",
+    sender get EX on "message"::
+
+	sender                        receiver                 receiver
+	"token":EX                    "ack":CR                 "ack":CR
+	"message":EX
+	"ack":CR
+
+    Sender checks that it still needs to send a message. Messages
+    received or other events that happened while waiting for the
+    "token" may have made this message inappropriate or redundant.
+
+ 3. sender writes LVB
+
+    sender down-convert "message" from EX to CW
+
+    sender try to get EX of "ack"
+
+    ::
+
+      [ wait until all receivers have *processed* the "message" ]
+
+                                       [ triggered by bast of "ack" ]
+                                       receiver get CR on "message"
+                                       receiver read LVB
+                                       receiver processes the message
+                                       [ wait finish ]
+                                       receiver releases "ack"
+                                       receiver tries to get PR on "message"
+
+     sender                         receiver                  receiver
+     "token":EX                     "message":CR              "message":CR
+     "message":CW
+     "ack":EX
+
+ 4. triggered by grant of EX on "ack" (indicating all receivers
+    have processed message)
+
+    sender down-converts "ack" from EX to CR
+
+    sender releases "message"
+
+    sender releases "token"
+
+    ::
+
+                                 receiver upconvert to PR on "message"
+                                 receiver get CR of "ack"
+                                 receiver release "message"
+
+     sender                      receiver                   receiver
+     "ack":CR                    "ack":CR                   "ack":CR
+
+
+4. Handling Failures
+====================
+
+4.1 Node Failure
+----------------
+
+ When a node fails, the DLM informs the cluster with the slot
+ number. The node starts a cluster recovery thread. The cluster
+ recovery thread:
+
+	- acquires the bitmap<number> lock of the failed node
+	- opens the bitmap
+	- reads the bitmap of the failed node
+	- copies the set bitmap to local node
+	- cleans the bitmap of the failed node
+	- releases bitmap<number> lock of the failed node
+	- initiates resync of the bitmap on the current node
+	  md_check_recovery is invoked within recover_bitmaps,
+	  then md_check_recovery -> metadata_update_start/finish,
+	  it will lock the communication by lock_comm.
+	  Which means when one node is resyncing it blocks all
+	  other nodes from writing anywhere on the array.
+
+ The resync process is the regular md resync. However, in a clustered
+ environment when a resync is performed, it needs to tell other nodes
+ of the areas which are suspended. Before a resync starts, the node
+ send out RESYNCING with the (lo,hi) range of the area which needs to
+ be suspended. Each node maintains a suspend_list, which contains the
+ list of ranges which are currently suspended. On receiving RESYNCING,
+ the node adds the range to the suspend_list. Similarly, when the node
+ performing resync finishes, it sends RESYNCING with an empty range to
+ other nodes and other nodes remove the corresponding entry from the
+ suspend_list.
+
+ A helper function, ->area_resyncing() can be used to check if a
+ particular I/O range should be suspended or not.
+
+4.2 Device Failure
+==================
+
+ Device failures are handled and communicated with the metadata update
+ routine.  When a node detects a device failure it does not allow
+ any further writes to that device until the failure has been
+ acknowledged by all other nodes.
+
+5. Adding a new Device
+----------------------
+
+ For adding a new device, it is necessary that all nodes "see" the new
+ device to be added. For this, the following algorithm is used:
+
+   1.  Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
+       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CLUSTER_ADD)
+   2.  Node 1 sends a NEWDISK message with uuid and slot number
+   3.  Other nodes issue kobject_uevent_env with uuid and slot number
+       (Steps 4,5 could be a udev rule)
+   4.  In userspace, the node searches for the disk, perhaps
+       using blkid -t SUB_UUID=""
+   5.  Other nodes issue either of the following depending on whether
+       the disk was found:
+       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and
+       disc.number set to slot number)
+       ioctl(CLUSTERED_DISK_NACK)
+   6.  Other nodes drop lock on "no-new-devs" (CR) if device is found
+   7.  Node 1 attempts EX lock on "no-new-dev"
+   8.  If node 1 gets the lock, it sends METADATA_UPDATED after
+       unmarking the disk as SpareLocal
+   9.  If not (get "no-new-dev" lock), it fails the operation and sends
+       METADATA_UPDATED.
+   10. Other nodes get the information whether a disk is added or not
+       by the following METADATA_UPDATED.
+
+6. Module interface
+===================
+
+ There are 17 call-backs which the md core can make to the cluster
+ module.  Understanding these can give a good overview of the whole
+ process.
+
+6.1 join(nodes) and leave()
+---------------------------
+
+ These are called when an array is started with a clustered bitmap,
+ and when the array is stopped.  join() ensures the cluster is
+ available and initializes the various resources.
+ Only the first 'nodes' nodes in the cluster can use the array.
+
+6.2 slot_number()
+-----------------
+
+ Reports the slot number advised by the cluster infrastructure.
+ Range is from 0 to nodes-1.
+
+6.3 resync_info_update()
+------------------------
+
+ This updates the resync range that is stored in the bitmap lock.
+ The starting point is updated as the resync progresses.  The
+ end point is always the end of the array.
+ It does *not* send a RESYNCING message.
+
+6.4 resync_start(), resync_finish()
+-----------------------------------
+
+ These are called when resync/recovery/reshape starts or stops.
+ They update the resyncing range in the bitmap lock and also
+ send a RESYNCING message.  resync_start reports the whole
+ array as resyncing, resync_finish reports none of it.
+
+ resync_finish() also sends a BITMAP_NEEDS_SYNC message which
+ allows some other node to take over.
+
+6.5 metadata_update_start(), metadata_update_finish(), metadata_update_cancel()
+-------------------------------------------------------------------------------
+
+ metadata_update_start is used to get exclusive access to
+ the metadata.  If a change is still needed once that access is
+ gained, metadata_update_finish() will send a METADATA_UPDATE
+ message to all other nodes, otherwise metadata_update_cancel()
+ can be used to release the lock.
+
+6.6 area_resyncing()
+--------------------
+
+ This combines two elements of functionality.
+
+ Firstly, it will check if any node is currently resyncing
+ anything in a given range of sectors.  If any resync is found,
+ then the caller will avoid writing or read-balancing in that
+ range.
+
+ Secondly, while node recovery is happening it reports that
+ all areas are resyncing for READ requests.  This avoids races
+ between the cluster-filesystem and the cluster-RAID handling
+ a node failure.
+
+6.7 add_new_disk_start(), add_new_disk_finish(), new_disk_ack()
+---------------------------------------------------------------
+
+ These are used to manage the new-disk protocol described above.
+ When a new device is added, add_new_disk_start() is called before
+ it is bound to the array and, if that succeeds, add_new_disk_finish()
+ is called the device is fully added.
+
+ When a device is added in acknowledgement to a previous
+ request, or when the device is declared "unavailable",
+ new_disk_ack() is called.
+
+6.8 remove_disk()
+-----------------
+
+ This is called when a spare or failed device is removed from
+ the array.  It causes a REMOVE message to be send to other nodes.
+
+6.9 gather_bitmaps()
+--------------------
+
+ This sends a RE_ADD message to all other nodes and then
+ gathers bitmap information from all bitmaps.  This combined
+ bitmap is then used to recovery the re-added device.
+
+6.10 lock_all_bitmaps() and unlock_all_bitmaps()
+------------------------------------------------
+
+ These are called when change bitmap to none. If a node plans
+ to clear the cluster raid's bitmap, it need to make sure no other
+ nodes are using the raid which is achieved by lock all bitmap
+ locks within the cluster, and also those locks are unlocked
+ accordingly.
+
+7. Unsupported features
+=======================
+
+There are somethings which are not supported by cluster MD yet.
+
+- change array_sectors.
diff --git a/Documentation/md/md-cluster.txt b/Documentation/md/md-cluster.txt
deleted file mode 100644
index e1055f105cf5..000000000000
--- a/Documentation/md/md-cluster.txt
+++ /dev/null
@@ -1,325 +0,0 @@
-The cluster MD is a shared-device RAID for a cluster, it supports
-two levels: raid1 and raid10 (limited support).
-
-
-1. On-disk format
-
-Separate write-intent-bitmaps are used for each cluster node.
-The bitmaps record all writes that may have been started on that node,
-and may not yet have finished. The on-disk layout is:
-
-0                    4k                     8k                    12k
--------------------------------------------------------------------
-| idle                | md super            | bm super [0] + bits |
-| bm bits[0, contd]   | bm super[1] + bits  | bm bits[1, contd]   |
-| bm super[2] + bits  | bm bits [2, contd]  | bm super[3] + bits  |
-| bm bits [3, contd]  |                     |                     |
-
-During "normal" functioning we assume the filesystem ensures that only
-one node writes to any given block at a time, so a write request will
-
- - set the appropriate bit (if not already set)
- - commit the write to all mirrors
- - schedule the bit to be cleared after a timeout.
-
-Reads are just handled normally. It is up to the filesystem to ensure
-one node doesn't read from a location where another node (or the same
-node) is writing.
-
-
-2. DLM Locks for management
-
-There are three groups of locks for managing the device:
-
-2.1 Bitmap lock resource (bm_lockres)
-
- The bm_lockres protects individual node bitmaps. They are named in
- the form bitmap000 for node 1, bitmap001 for node 2 and so on. When a
- node joins the cluster, it acquires the lock in PW mode and it stays
- so during the lifetime the node is part of the cluster. The lock
- resource number is based on the slot number returned by the DLM
- subsystem. Since DLM starts node count from one and bitmap slots
- start from zero, one is subtracted from the DLM slot number to arrive
- at the bitmap slot number.
-
- The LVB of the bitmap lock for a particular node records the range
- of sectors that are being re-synced by that node.  No other
- node may write to those sectors.  This is used when a new nodes
- joins the cluster.
-
-2.2 Message passing locks
-
- Each node has to communicate with other nodes when starting or ending
- resync, and for metadata superblock updates.  This communication is
- managed through three locks: "token", "message", and "ack", together
- with the Lock Value Block (LVB) of one of the "message" lock.
-
-2.3 new-device management
-
- A single lock: "no-new-dev" is used to co-ordinate the addition of
- new devices - this must be synchronized across the array.
- Normally all nodes hold a concurrent-read lock on this device.
-
-3. Communication
-
- Messages can be broadcast to all nodes, and the sender waits for all
- other nodes to acknowledge the message before proceeding.  Only one
- message can be processed at a time.
-
-3.1 Message Types
-
- There are six types of messages which are passed:
-
- 3.1.1 METADATA_UPDATED: informs other nodes that the metadata has
-   been updated, and the node must re-read the md superblock. This is
-   performed synchronously. It is primarily used to signal device
-   failure.
-
- 3.1.2 RESYNCING: informs other nodes that a resync is initiated or
-   ended so that each node may suspend or resume the region.  Each
-   RESYNCING message identifies a range of the devices that the
-   sending node is about to resync. This overrides any previous
-   notification from that node: only one ranged can be resynced at a
-   time per-node.
-
- 3.1.3 NEWDISK: informs other nodes that a device is being added to
-   the array. Message contains an identifier for that device.  See
-   below for further details.
-
- 3.1.4 REMOVE: A failed or spare device is being removed from the
-   array. The slot-number of the device is included in the message.
-
- 3.1.5 RE_ADD: A failed device is being re-activated - the assumption
-   is that it has been determined to be working again.
-
- 3.1.6 BITMAP_NEEDS_SYNC: if a node is stopped locally but the bitmap
-   isn't clean, then another node is informed to take the ownership of
-   resync.
-
-3.2 Communication mechanism
-
- The DLM LVB is used to communicate within nodes of the cluster. There
- are three resources used for the purpose:
-
-  3.2.1 token: The resource which protects the entire communication
-   system. The node having the token resource is allowed to
-   communicate.
-
-  3.2.2 message: The lock resource which carries the data to
-   communicate.
-
-  3.2.3 ack: The resource, acquiring which means the message has been
-   acknowledged by all nodes in the cluster. The BAST of the resource
-   is used to inform the receiving node that a node wants to
-   communicate.
-
-The algorithm is:
-
- 1. receive status - all nodes have concurrent-reader lock on "ack".
-
-   sender                         receiver                 receiver
-   "ack":CR                       "ack":CR                 "ack":CR
-
- 2. sender get EX on "token"
-    sender get EX on "message"
-    sender                        receiver                 receiver
-    "token":EX                    "ack":CR                 "ack":CR
-    "message":EX
-    "ack":CR
-
-    Sender checks that it still needs to send a message. Messages
-    received or other events that happened while waiting for the
-    "token" may have made this message inappropriate or redundant.
-
- 3. sender writes LVB.
-    sender down-convert "message" from EX to CW
-    sender try to get EX of "ack"
-    [ wait until all receivers have *processed* the "message" ]
-
-                                     [ triggered by bast of "ack" ]
-                                     receiver get CR on "message"
-                                     receiver read LVB
-                                     receiver processes the message
-                                     [ wait finish ]
-                                     receiver releases "ack"
-                                     receiver tries to get PR on "message"
-
-   sender                         receiver                  receiver
-   "token":EX                     "message":CR              "message":CR
-   "message":CW
-   "ack":EX
-
- 4. triggered by grant of EX on "ack" (indicating all receivers
-    have processed message)
-    sender down-converts "ack" from EX to CR
-    sender releases "message"
-    sender releases "token"
-                               receiver upconvert to PR on "message"
-                               receiver get CR of "ack"
-                               receiver release "message"
-
-   sender                      receiver                   receiver
-   "ack":CR                    "ack":CR                   "ack":CR
-
-
-4. Handling Failures
-
-4.1 Node Failure
-
- When a node fails, the DLM informs the cluster with the slot
- number. The node starts a cluster recovery thread. The cluster
- recovery thread:
-
-	- acquires the bitmap<number> lock of the failed node
-	- opens the bitmap
-	- reads the bitmap of the failed node
-	- copies the set bitmap to local node
-	- cleans the bitmap of the failed node
-	- releases bitmap<number> lock of the failed node
-	- initiates resync of the bitmap on the current node
-		md_check_recovery is invoked within recover_bitmaps,
-		then md_check_recovery -> metadata_update_start/finish,
-		it will lock the communication by lock_comm.
-		Which means when one node is resyncing it blocks all
-		other nodes from writing anywhere on the array.
-
- The resync process is the regular md resync. However, in a clustered
- environment when a resync is performed, it needs to tell other nodes
- of the areas which are suspended. Before a resync starts, the node
- send out RESYNCING with the (lo,hi) range of the area which needs to
- be suspended. Each node maintains a suspend_list, which contains the
- list of ranges which are currently suspended. On receiving RESYNCING,
- the node adds the range to the suspend_list. Similarly, when the node
- performing resync finishes, it sends RESYNCING with an empty range to
- other nodes and other nodes remove the corresponding entry from the
- suspend_list.
-
- A helper function, ->area_resyncing() can be used to check if a
- particular I/O range should be suspended or not.
-
-4.2 Device Failure
-
- Device failures are handled and communicated with the metadata update
- routine.  When a node detects a device failure it does not allow
- any further writes to that device until the failure has been
- acknowledged by all other nodes.
-
-5. Adding a new Device
-
- For adding a new device, it is necessary that all nodes "see" the new
- device to be added. For this, the following algorithm is used:
-
-    1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
-       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CLUSTER_ADD)
-    2. Node 1 sends a NEWDISK message with uuid and slot number
-    3. Other nodes issue kobject_uevent_env with uuid and slot number
-       (Steps 4,5 could be a udev rule)
-    4. In userspace, the node searches for the disk, perhaps
-       using blkid -t SUB_UUID=""
-    5. Other nodes issue either of the following depending on whether
-       the disk was found:
-       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and
-             disc.number set to slot number)
-       ioctl(CLUSTERED_DISK_NACK)
-    6. Other nodes drop lock on "no-new-devs" (CR) if device is found
-    7. Node 1 attempts EX lock on "no-new-dev"
-    8. If node 1 gets the lock, it sends METADATA_UPDATED after
-       unmarking the disk as SpareLocal
-    9. If not (get "no-new-dev" lock), it fails the operation and sends
-       METADATA_UPDATED.
-   10. Other nodes get the information whether a disk is added or not
-       by the following METADATA_UPDATED.
-
-6. Module interface.
-
- There are 17 call-backs which the md core can make to the cluster
- module.  Understanding these can give a good overview of the whole
- process.
-
-6.1 join(nodes) and leave()
-
- These are called when an array is started with a clustered bitmap,
- and when the array is stopped.  join() ensures the cluster is
- available and initializes the various resources.
- Only the first 'nodes' nodes in the cluster can use the array.
-
-6.2 slot_number()
-
- Reports the slot number advised by the cluster infrastructure.
- Range is from 0 to nodes-1.
-
-6.3 resync_info_update()
-
- This updates the resync range that is stored in the bitmap lock.
- The starting point is updated as the resync progresses.  The
- end point is always the end of the array.
- It does *not* send a RESYNCING message.
-
-6.4 resync_start(), resync_finish()
-
- These are called when resync/recovery/reshape starts or stops.
- They update the resyncing range in the bitmap lock and also
- send a RESYNCING message.  resync_start reports the whole
- array as resyncing, resync_finish reports none of it.
-
- resync_finish() also sends a BITMAP_NEEDS_SYNC message which
- allows some other node to take over.
-
-6.5 metadata_update_start(), metadata_update_finish(),
-    metadata_update_cancel().
-
- metadata_update_start is used to get exclusive access to
- the metadata.  If a change is still needed once that access is
- gained, metadata_update_finish() will send a METADATA_UPDATE
- message to all other nodes, otherwise metadata_update_cancel()
- can be used to release the lock.
-
-6.6 area_resyncing()
-
- This combines two elements of functionality.
-
- Firstly, it will check if any node is currently resyncing
- anything in a given range of sectors.  If any resync is found,
- then the caller will avoid writing or read-balancing in that
- range.
-
- Secondly, while node recovery is happening it reports that
- all areas are resyncing for READ requests.  This avoids races
- between the cluster-filesystem and the cluster-RAID handling
- a node failure.
-
-6.7 add_new_disk_start(), add_new_disk_finish(), new_disk_ack()
-
- These are used to manage the new-disk protocol described above.
- When a new device is added, add_new_disk_start() is called before
- it is bound to the array and, if that succeeds, add_new_disk_finish()
- is called the device is fully added.
-
- When a device is added in acknowledgement to a previous
- request, or when the device is declared "unavailable",
- new_disk_ack() is called.
-
-6.8 remove_disk()
-
- This is called when a spare or failed device is removed from
- the array.  It causes a REMOVE message to be send to other nodes.
-
-6.9 gather_bitmaps()
-
- This sends a RE_ADD message to all other nodes and then
- gathers bitmap information from all bitmaps.  This combined
- bitmap is then used to recovery the re-added device.
-
-6.10 lock_all_bitmaps() and unlock_all_bitmaps()
-
- These are called when change bitmap to none. If a node plans
- to clear the cluster raid's bitmap, it need to make sure no other
- nodes are using the raid which is achieved by lock all bitmap
- locks within the cluster, and also those locks are unlocked
- accordingly.
-
-7. Unsupported features
-
-There are somethings which are not supported by cluster MD yet.
-
-- change array_sectors.
diff --git a/Documentation/md/raid5-cache.rst b/Documentation/md/raid5-cache.rst
new file mode 100644
index 000000000000..d7a15f44a7c3
--- /dev/null
+++ b/Documentation/md/raid5-cache.rst
@@ -0,0 +1,111 @@
+================
+RAID 4/5/6 cache
+================
+
+Raid 4/5/6 could include an extra disk for data cache besides normal RAID
+disks. The role of RAID disks isn't changed with the cache disk. The cache disk
+caches data to the RAID disks. The cache can be in write-through (supported
+since 4.4) or write-back mode (supported since 4.10). mdadm (supported since
+3.4) has a new option '--write-journal' to create array with cache. Please
+refer to mdadm manual for details. By default (RAID array starts), the cache is
+in write-through mode. A user can switch it to write-back mode by::
+
+	echo "write-back" > /sys/block/md0/md/journal_mode
+
+And switch it back to write-through mode by::
+
+	echo "write-through" > /sys/block/md0/md/journal_mode
+
+In both modes, all writes to the array will hit cache disk first. This means
+the cache disk must be fast and sustainable.
+
+write-through mode
+==================
+
+This mode mainly fixes the 'write hole' issue. For RAID 4/5/6 array, an unclean
+shutdown can cause data in some stripes to not be in consistent state, eg, data
+and parity don't match. The reason is that a stripe write involves several RAID
+disks and it's possible the writes don't hit all RAID disks yet before the
+unclean shutdown. We call an array degraded if it has inconsistent data. MD
+tries to resync the array to bring it back to normal state. But before the
+resync completes, any system crash will expose the chance of real data
+corruption in the RAID array. This problem is called 'write hole'.
+
+The write-through cache will cache all data on cache disk first. After the data
+is safe on the cache disk, the data will be flushed onto RAID disks. The
+two-step write will guarantee MD can recover correct data after unclean
+shutdown even the array is degraded. Thus the cache can close the 'write hole'.
+
+In write-through mode, MD reports IO completion to upper layer (usually
+filesystems) after the data is safe on RAID disks, so cache disk failure
+doesn't cause data loss. Of course cache disk failure means the array is
+exposed to 'write hole' again.
+
+In write-through mode, the cache disk isn't required to be big. Several
+hundreds megabytes are enough.
+
+write-back mode
+===============
+
+write-back mode fixes the 'write hole' issue too, since all write data is
+cached on cache disk. But the main goal of 'write-back' cache is to speed up
+write. If a write crosses all RAID disks of a stripe, we call it full-stripe
+write. For non-full-stripe writes, MD must read old data before the new parity
+can be calculated. These synchronous reads hurt write throughput. Some writes
+which are sequential but not dispatched in the same time will suffer from this
+overhead too. Write-back cache will aggregate the data and flush the data to
+RAID disks only after the data becomes a full stripe write. This will
+completely avoid the overhead, so it's very helpful for some workloads. A
+typical workload which does sequential write followed by fsync is an example.
+
+In write-back mode, MD reports IO completion to upper layer (usually
+filesystems) right after the data hits cache disk. The data is flushed to raid
+disks later after specific conditions met. So cache disk failure will cause
+data loss.
+
+In write-back mode, MD also caches data in memory. The memory cache includes
+the same data stored on cache disk, so a power loss doesn't cause data loss.
+The memory cache size has performance impact for the array. It's recommended
+the size is big. A user can configure the size by::
+
+	echo "2048" > /sys/block/md0/md/stripe_cache_size
+
+Too small cache disk will make the write aggregation less efficient in this
+mode depending on the workloads. It's recommended to use a cache disk with at
+least several gigabytes size in write-back mode.
+
+The implementation
+==================
+
+The write-through and write-back cache use the same disk format. The cache disk
+is organized as a simple write log. The log consists of 'meta data' and 'data'
+pairs. The meta data describes the data. It also includes checksum and sequence
+ID for recovery identification. Data can be IO data and parity data. Data is
+checksumed too. The checksum is stored in the meta data ahead of the data. The
+checksum is an optimization because MD can write meta and data freely without
+worry about the order. MD superblock has a field pointed to the valid meta data
+of log head.
+
+The log implementation is pretty straightforward. The difficult part is the
+order in which MD writes data to cache disk and RAID disks. Specifically, in
+write-through mode, MD calculates parity for IO data, writes both IO data and
+parity to the log, writes the data and parity to RAID disks after the data and
+parity is settled down in log and finally the IO is finished. Read just reads
+from raid disks as usual.
+
+In write-back mode, MD writes IO data to the log and reports IO completion. The
+data is also fully cached in memory at that time, which means read must query
+memory cache. If some conditions are met, MD will flush the data to RAID disks.
+MD will calculate parity for the data and write parity into the log. After this
+is finished, MD will write both data and parity into RAID disks, then MD can
+release the memory cache. The flush conditions could be stripe becomes a full
+stripe write, free cache disk space is low or free in-kernel memory cache space
+is low.
+
+After an unclean shutdown, MD does recovery. MD reads all meta data and data
+from the log. The sequence ID and checksum will help us detect corrupted meta
+data and data. If MD finds a stripe with data and valid parities (1 parity for
+raid4/5 and 2 for raid6), MD will write the data and parities to RAID disks. If
+parities are incompleted, they are discarded. If part of data is corrupted,
+they are discarded too. MD then loads valid data and writes them to RAID disks
+in normal way.
diff --git a/Documentation/md/raid5-cache.txt b/Documentation/md/raid5-cache.txt
deleted file mode 100644
index 2b210f295786..000000000000
--- a/Documentation/md/raid5-cache.txt
+++ /dev/null
@@ -1,109 +0,0 @@
-RAID5 cache
-
-Raid 4/5/6 could include an extra disk for data cache besides normal RAID
-disks. The role of RAID disks isn't changed with the cache disk. The cache disk
-caches data to the RAID disks. The cache can be in write-through (supported
-since 4.4) or write-back mode (supported since 4.10). mdadm (supported since
-3.4) has a new option '--write-journal' to create array with cache. Please
-refer to mdadm manual for details. By default (RAID array starts), the cache is
-in write-through mode. A user can switch it to write-back mode by:
-
-echo "write-back" > /sys/block/md0/md/journal_mode
-
-And switch it back to write-through mode by:
-
-echo "write-through" > /sys/block/md0/md/journal_mode
-
-In both modes, all writes to the array will hit cache disk first. This means
-the cache disk must be fast and sustainable.
-
--------------------------------------
-write-through mode:
-
-This mode mainly fixes the 'write hole' issue. For RAID 4/5/6 array, an unclean
-shutdown can cause data in some stripes to not be in consistent state, eg, data
-and parity don't match. The reason is that a stripe write involves several RAID
-disks and it's possible the writes don't hit all RAID disks yet before the
-unclean shutdown. We call an array degraded if it has inconsistent data. MD
-tries to resync the array to bring it back to normal state. But before the
-resync completes, any system crash will expose the chance of real data
-corruption in the RAID array. This problem is called 'write hole'.
-
-The write-through cache will cache all data on cache disk first. After the data
-is safe on the cache disk, the data will be flushed onto RAID disks. The
-two-step write will guarantee MD can recover correct data after unclean
-shutdown even the array is degraded. Thus the cache can close the 'write hole'.
-
-In write-through mode, MD reports IO completion to upper layer (usually
-filesystems) after the data is safe on RAID disks, so cache disk failure
-doesn't cause data loss. Of course cache disk failure means the array is
-exposed to 'write hole' again.
-
-In write-through mode, the cache disk isn't required to be big. Several
-hundreds megabytes are enough.
-
---------------------------------------
-write-back mode:
-
-write-back mode fixes the 'write hole' issue too, since all write data is
-cached on cache disk. But the main goal of 'write-back' cache is to speed up
-write. If a write crosses all RAID disks of a stripe, we call it full-stripe
-write. For non-full-stripe writes, MD must read old data before the new parity
-can be calculated. These synchronous reads hurt write throughput. Some writes
-which are sequential but not dispatched in the same time will suffer from this
-overhead too. Write-back cache will aggregate the data and flush the data to
-RAID disks only after the data becomes a full stripe write. This will
-completely avoid the overhead, so it's very helpful for some workloads. A
-typical workload which does sequential write followed by fsync is an example.
-
-In write-back mode, MD reports IO completion to upper layer (usually
-filesystems) right after the data hits cache disk. The data is flushed to raid
-disks later after specific conditions met. So cache disk failure will cause
-data loss.
-
-In write-back mode, MD also caches data in memory. The memory cache includes
-the same data stored on cache disk, so a power loss doesn't cause data loss.
-The memory cache size has performance impact for the array. It's recommended
-the size is big. A user can configure the size by:
-
-echo "2048" > /sys/block/md0/md/stripe_cache_size
-
-Too small cache disk will make the write aggregation less efficient in this
-mode depending on the workloads. It's recommended to use a cache disk with at
-least several gigabytes size in write-back mode.
-
---------------------------------------
-The implementation:
-
-The write-through and write-back cache use the same disk format. The cache disk
-is organized as a simple write log. The log consists of 'meta data' and 'data'
-pairs. The meta data describes the data. It also includes checksum and sequence
-ID for recovery identification. Data can be IO data and parity data. Data is
-checksumed too. The checksum is stored in the meta data ahead of the data. The
-checksum is an optimization because MD can write meta and data freely without
-worry about the order. MD superblock has a field pointed to the valid meta data
-of log head.
-
-The log implementation is pretty straightforward. The difficult part is the
-order in which MD writes data to cache disk and RAID disks. Specifically, in
-write-through mode, MD calculates parity for IO data, writes both IO data and
-parity to the log, writes the data and parity to RAID disks after the data and
-parity is settled down in log and finally the IO is finished. Read just reads
-from raid disks as usual.
-
-In write-back mode, MD writes IO data to the log and reports IO completion. The
-data is also fully cached in memory at that time, which means read must query
-memory cache. If some conditions are met, MD will flush the data to RAID disks.
-MD will calculate parity for the data and write parity into the log. After this
-is finished, MD will write both data and parity into RAID disks, then MD can
-release the memory cache. The flush conditions could be stripe becomes a full
-stripe write, free cache disk space is low or free in-kernel memory cache space
-is low.
-
-After an unclean shutdown, MD does recovery. MD reads all meta data and data
-from the log. The sequence ID and checksum will help us detect corrupted meta
-data and data. If MD finds a stripe with data and valid parities (1 parity for
-raid4/5 and 2 for raid6), MD will write the data and parities to RAID disks. If
-parities are incompleted, they are discarded. If part of data is corrupted,
-they are discarded too. MD then loads valid data and writes them to RAID disks
-in normal way.
diff --git a/Documentation/md/raid5-ppl.rst b/Documentation/md/raid5-ppl.rst
new file mode 100644
index 000000000000..357e5515bc55
--- /dev/null
+++ b/Documentation/md/raid5-ppl.rst
@@ -0,0 +1,47 @@
+==================
+Partial Parity Log
+==================
+
+Partial Parity Log (PPL) is a feature available for RAID5 arrays. The issue
+addressed by PPL is that after a dirty shutdown, parity of a particular stripe
+may become inconsistent with data on other member disks. If the array is also
+in degraded state, there is no way to recalculate parity, because one of the
+disks is missing. This can lead to silent data corruption when rebuilding the
+array or using it is as degraded - data calculated from parity for array blocks
+that have not been touched by a write request during the unclean shutdown can
+be incorrect. Such condition is known as the RAID5 Write Hole. Because of
+this, md by default does not allow starting a dirty degraded array.
+
+Partial parity for a write operation is the XOR of stripe data chunks not
+modified by this write. It is just enough data needed for recovering from the
+write hole. XORing partial parity with the modified chunks produces parity for
+the stripe, consistent with its state before the write operation, regardless of
+which chunk writes have completed. If one of the not modified data disks of
+this stripe is missing, this updated parity can be used to recover its
+contents. PPL recovery is also performed when starting an array after an
+unclean shutdown and all disks are available, eliminating the need to resync
+the array. Because of this, using write-intent bitmap and PPL together is not
+supported.
+
+When handling a write request PPL writes partial parity before new data and
+parity are dispatched to disks. PPL is a distributed log - it is stored on
+array member drives in the metadata area, on the parity drive of a particular
+stripe.  It does not require a dedicated journaling drive. Write performance is
+reduced by up to 30%-40% but it scales with the number of drives in the array
+and the journaling drive does not become a bottleneck or a single point of
+failure.
+
+Unlike raid5-cache, the other solution in md for closing the write hole, PPL is
+not a true journal. It does not protect from losing in-flight data, only from
+silent data corruption. If a dirty disk of a stripe is lost, no PPL recovery is
+performed for this stripe (parity is not updated). So it is possible to have
+arbitrary data in the written part of a stripe if that disk is lost. In such
+case the behavior is the same as in plain raid5.
+
+PPL is available for md version-1 metadata and external (specifically IMSM)
+metadata arrays. It can be enabled using mdadm option --consistency-policy=ppl.
+
+There is a limitation of maximum 64 disks in the array for PPL. It allows to
+keep data structures and implementation simple. RAID5 arrays with so many disks
+are not likely due to high risk of multiple disks failure. Such restriction
+should not be a real life limitation.
diff --git a/Documentation/md/raid5-ppl.txt b/Documentation/md/raid5-ppl.txt
deleted file mode 100644
index bfa092589e00..000000000000
--- a/Documentation/md/raid5-ppl.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-Partial Parity Log
-
-Partial Parity Log (PPL) is a feature available for RAID5 arrays. The issue
-addressed by PPL is that after a dirty shutdown, parity of a particular stripe
-may become inconsistent with data on other member disks. If the array is also
-in degraded state, there is no way to recalculate parity, because one of the
-disks is missing. This can lead to silent data corruption when rebuilding the
-array or using it is as degraded - data calculated from parity for array blocks
-that have not been touched by a write request during the unclean shutdown can
-be incorrect. Such condition is known as the RAID5 Write Hole. Because of
-this, md by default does not allow starting a dirty degraded array.
-
-Partial parity for a write operation is the XOR of stripe data chunks not
-modified by this write. It is just enough data needed for recovering from the
-write hole. XORing partial parity with the modified chunks produces parity for
-the stripe, consistent with its state before the write operation, regardless of
-which chunk writes have completed. If one of the not modified data disks of
-this stripe is missing, this updated parity can be used to recover its
-contents. PPL recovery is also performed when starting an array after an
-unclean shutdown and all disks are available, eliminating the need to resync
-the array. Because of this, using write-intent bitmap and PPL together is not
-supported.
-
-When handling a write request PPL writes partial parity before new data and
-parity are dispatched to disks. PPL is a distributed log - it is stored on
-array member drives in the metadata area, on the parity drive of a particular
-stripe.  It does not require a dedicated journaling drive. Write performance is
-reduced by up to 30%-40% but it scales with the number of drives in the array
-and the journaling drive does not become a bottleneck or a single point of
-failure.
-
-Unlike raid5-cache, the other solution in md for closing the write hole, PPL is
-not a true journal. It does not protect from losing in-flight data, only from
-silent data corruption. If a dirty disk of a stripe is lost, no PPL recovery is
-performed for this stripe (parity is not updated). So it is possible to have
-arbitrary data in the written part of a stripe if that disk is lost. In such
-case the behavior is the same as in plain raid5.
-
-PPL is available for md version-1 metadata and external (specifically IMSM)
-metadata arrays. It can be enabled using mdadm option --consistency-policy=ppl.
-
-There is a limitation of maximum 64 disks in the array for PPL. It allows to
-keep data structures and implementation simple. RAID5 arrays with so many disks
-are not likely due to high risk of multiple disks failure. Such restriction
-should not be a real life limitation.
-- 
cgit 


From 6e58e2d81367308ffd891bd0b34d47e9104e7ae4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 13:44:38 -0300
Subject: docs: mtd: convert to ReST

Rename the mtd documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

It should be noticed that Sphinx doesn't handle very well
URLs with dots in the middle. Thankfully, internally, the '.'
char is translated to %2E, so we can jus use %2E instead of
dots, and this will work fine on both text and processed files.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/mtd/index.rst     |  12 +
 Documentation/mtd/intel-spi.rst |  90 +++++
 Documentation/mtd/intel-spi.txt |  88 -----
 Documentation/mtd/nand_ecc.rst  | 763 ++++++++++++++++++++++++++++++++++++++++
 Documentation/mtd/nand_ecc.txt  | 714 -------------------------------------
 Documentation/mtd/spi-nor.rst   |  66 ++++
 Documentation/mtd/spi-nor.txt   |  65 ----
 7 files changed, 931 insertions(+), 867 deletions(-)
 create mode 100644 Documentation/mtd/index.rst
 create mode 100644 Documentation/mtd/intel-spi.rst
 delete mode 100644 Documentation/mtd/intel-spi.txt
 create mode 100644 Documentation/mtd/nand_ecc.rst
 delete mode 100644 Documentation/mtd/nand_ecc.txt
 create mode 100644 Documentation/mtd/spi-nor.rst
 delete mode 100644 Documentation/mtd/spi-nor.txt

(limited to 'Documentation')

diff --git a/Documentation/mtd/index.rst b/Documentation/mtd/index.rst
new file mode 100644
index 000000000000..4fdae418ac97
--- /dev/null
+++ b/Documentation/mtd/index.rst
@@ -0,0 +1,12 @@
+:orphan:
+
+==============================
+Memory Technology Device (MTD)
+==============================
+
+.. toctree::
+   :maxdepth: 1
+
+   intel-spi
+   nand_ecc
+   spi-nor
diff --git a/Documentation/mtd/intel-spi.rst b/Documentation/mtd/intel-spi.rst
new file mode 100644
index 000000000000..0e6d9cd5388d
--- /dev/null
+++ b/Documentation/mtd/intel-spi.rst
@@ -0,0 +1,90 @@
+==============================
+Upgrading BIOS using intel-spi
+==============================
+
+Many Intel CPUs like Baytrail and Braswell include SPI serial flash host
+controller which is used to hold BIOS and other platform specific data.
+Since contents of the SPI serial flash is crucial for machine to function,
+it is typically protected by different hardware protection mechanisms to
+avoid accidental (or on purpose) overwrite of the content.
+
+Not all manufacturers protect the SPI serial flash, mainly because it
+allows upgrading the BIOS image directly from an OS.
+
+The intel-spi driver makes it possible to read and write the SPI serial
+flash, if certain protection bits are not set and locked. If it finds
+any of them set, the whole MTD device is made read-only to prevent
+partial overwrites. By default the driver exposes SPI serial flash
+contents as read-only but it can be changed from kernel command line,
+passing "intel-spi.writeable=1".
+
+Please keep in mind that overwriting the BIOS image on SPI serial flash
+might render the machine unbootable and requires special equipment like
+Dediprog to revive. You have been warned!
+
+Below are the steps how to upgrade MinnowBoard MAX BIOS directly from
+Linux.
+
+ 1) Download and extract the latest Minnowboard MAX BIOS SPI image
+    [1]. At the time writing this the latest image is v92.
+
+ 2) Install mtd-utils package [2]. We need this in order to erase the SPI
+    serial flash. Distros like Debian and Fedora have this prepackaged with
+    name "mtd-utils".
+
+ 3) Add "intel-spi.writeable=1" to the kernel command line and reboot
+    the board (you can also reload the driver passing "writeable=1" as
+    module parameter to modprobe).
+
+ 4) Once the board is up and running again, find the right MTD partition
+    (it is named as "BIOS")::
+
+	# cat /proc/mtd
+	dev:    size   erasesize  name
+	mtd0: 00800000 00001000 "BIOS"
+
+    So here it will be /dev/mtd0 but it may vary.
+
+ 5) Make backup of the existing image first::
+
+	# dd if=/dev/mtd0ro of=bios.bak
+	16384+0 records in
+	16384+0 records out
+	8388608 bytes (8.4 MB) copied, 10.0269 s, 837 kB/s
+
+ 6) Verify the backup:
+
+	# sha1sum /dev/mtd0ro bios.bak
+	fdbb011920572ca6c991377c4b418a0502668b73  /dev/mtd0ro
+	fdbb011920572ca6c991377c4b418a0502668b73  bios.bak
+
+    The SHA1 sums must match. Otherwise do not continue any further!
+
+ 7) Erase the SPI serial flash. After this step, do not reboot the
+    board! Otherwise it will not start anymore::
+
+	# flash_erase /dev/mtd0 0 0
+	Erasing 4 Kibyte @ 7ff000 -- 100 % complete
+
+ 8) Once completed without errors you can write the new BIOS image:
+
+    # dd if=MNW2MAX1.X64.0092.R01.1605221712.bin of=/dev/mtd0
+
+ 9) Verify that the new content of the SPI serial flash matches the new
+    BIOS image::
+
+	# sha1sum /dev/mtd0ro MNW2MAX1.X64.0092.R01.1605221712.bin
+	9b4df9e4be2057fceec3a5529ec3d950836c87a2  /dev/mtd0ro
+	9b4df9e4be2057fceec3a5529ec3d950836c87a2 MNW2MAX1.X64.0092.R01.1605221712.bin
+
+    The SHA1 sums should match.
+
+ 10) Now you can reboot your board and observe the new BIOS starting up
+     properly.
+
+References
+----------
+
+[1] https://firmware.intel.com/sites/default/files/MinnowBoard%2EMAX_%2EX64%2E92%2ER01%2Ezip
+
+[2] http://www.linux-mtd.infradead.org/
diff --git a/Documentation/mtd/intel-spi.txt b/Documentation/mtd/intel-spi.txt
deleted file mode 100644
index bc357729c2cb..000000000000
--- a/Documentation/mtd/intel-spi.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-Upgrading BIOS using intel-spi
-------------------------------
-
-Many Intel CPUs like Baytrail and Braswell include SPI serial flash host
-controller which is used to hold BIOS and other platform specific data.
-Since contents of the SPI serial flash is crucial for machine to function,
-it is typically protected by different hardware protection mechanisms to
-avoid accidental (or on purpose) overwrite of the content.
-
-Not all manufacturers protect the SPI serial flash, mainly because it
-allows upgrading the BIOS image directly from an OS.
-
-The intel-spi driver makes it possible to read and write the SPI serial
-flash, if certain protection bits are not set and locked. If it finds
-any of them set, the whole MTD device is made read-only to prevent
-partial overwrites. By default the driver exposes SPI serial flash
-contents as read-only but it can be changed from kernel command line,
-passing "intel-spi.writeable=1".
-
-Please keep in mind that overwriting the BIOS image on SPI serial flash
-might render the machine unbootable and requires special equipment like
-Dediprog to revive. You have been warned!
-
-Below are the steps how to upgrade MinnowBoard MAX BIOS directly from
-Linux.
-
- 1) Download and extract the latest Minnowboard MAX BIOS SPI image
-    [1]. At the time writing this the latest image is v92.
-
- 2) Install mtd-utils package [2]. We need this in order to erase the SPI
-    serial flash. Distros like Debian and Fedora have this prepackaged with
-    name "mtd-utils".
-
- 3) Add "intel-spi.writeable=1" to the kernel command line and reboot
-    the board (you can also reload the driver passing "writeable=1" as
-    module parameter to modprobe).
-
- 4) Once the board is up and running again, find the right MTD partition
-    (it is named as "BIOS"):
-
-    # cat /proc/mtd
-    dev:    size   erasesize  name
-    mtd0: 00800000 00001000 "BIOS"
-
-    So here it will be /dev/mtd0 but it may vary.
-
- 5) Make backup of the existing image first:
-
-    # dd if=/dev/mtd0ro of=bios.bak
-    16384+0 records in
-    16384+0 records out
-    8388608 bytes (8.4 MB) copied, 10.0269 s, 837 kB/s
-
- 6) Verify the backup
-
-    # sha1sum /dev/mtd0ro bios.bak
-    fdbb011920572ca6c991377c4b418a0502668b73  /dev/mtd0ro
-    fdbb011920572ca6c991377c4b418a0502668b73  bios.bak
-
-    The SHA1 sums must match. Otherwise do not continue any further!
-
- 7) Erase the SPI serial flash. After this step, do not reboot the
-    board! Otherwise it will not start anymore.
-
-    # flash_erase /dev/mtd0 0 0
-    Erasing 4 Kibyte @ 7ff000 -- 100 % complete
-
- 8) Once completed without errors you can write the new BIOS image:
-
-    # dd if=MNW2MAX1.X64.0092.R01.1605221712.bin of=/dev/mtd0
-
- 9) Verify that the new content of the SPI serial flash matches the new
-    BIOS image:
-
-    # sha1sum /dev/mtd0ro MNW2MAX1.X64.0092.R01.1605221712.bin
-    9b4df9e4be2057fceec3a5529ec3d950836c87a2  /dev/mtd0ro
-    9b4df9e4be2057fceec3a5529ec3d950836c87a2 MNW2MAX1.X64.0092.R01.1605221712.bin
-
-    The SHA1 sums should match.
-
- 10) Now you can reboot your board and observe the new BIOS starting up
-     properly.
-
-References
-----------
-
-[1] https://firmware.intel.com/sites/default/files/MinnowBoard.MAX_.X64.92.R01.zip
-[2] http://www.linux-mtd.infradead.org/
diff --git a/Documentation/mtd/nand_ecc.rst b/Documentation/mtd/nand_ecc.rst
new file mode 100644
index 000000000000..e8d3c53a5056
--- /dev/null
+++ b/Documentation/mtd/nand_ecc.rst
@@ -0,0 +1,763 @@
+==========================
+NAND Error-correction Code
+==========================
+
+Introduction
+============
+
+Having looked at the linux mtd/nand driver and more specific at nand_ecc.c
+I felt there was room for optimisation. I bashed the code for a few hours
+performing tricks like table lookup removing superfluous code etc.
+After that the speed was increased by 35-40%.
+Still I was not too happy as I felt there was additional room for improvement.
+
+Bad! I was hooked.
+I decided to annotate my steps in this file. Perhaps it is useful to someone
+or someone learns something from it.
+
+
+The problem
+===========
+
+NAND flash (at least SLC one) typically has sectors of 256 bytes.
+However NAND flash is not extremely reliable so some error detection
+(and sometimes correction) is needed.
+
+This is done by means of a Hamming code. I'll try to explain it in
+laymans terms (and apologies to all the pro's in the field in case I do
+not use the right terminology, my coding theory class was almost 30
+years ago, and I must admit it was not one of my favourites).
+
+As I said before the ecc calculation is performed on sectors of 256
+bytes. This is done by calculating several parity bits over the rows and
+columns. The parity used is even parity which means that the parity bit = 1
+if the data over which the parity is calculated is 1 and the parity bit = 0
+if the data over which the parity is calculated is 0. So the total
+number of bits over the data over which the parity is calculated + the
+parity bit is even. (see wikipedia if you can't follow this).
+Parity is often calculated by means of an exclusive or operation,
+sometimes also referred to as xor. In C the operator for xor is ^
+
+Back to ecc.
+Let's give a small figure:
+
+=========  ==== ==== ==== ==== ==== ==== ==== ====   === === === === ====
+byte   0:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp4 ... rp14
+byte   1:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp2 rp4 ... rp14
+byte   2:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp4 ... rp14
+byte   3:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp4 ... rp14
+byte   4:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp5 ... rp14
+...
+byte 254:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp5 ... rp15
+byte 255:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp5 ... rp15
+           cp1  cp0  cp1  cp0  cp1  cp0  cp1  cp0
+           cp3  cp3  cp2  cp2  cp3  cp3  cp2  cp2
+           cp5  cp5  cp5  cp5  cp4  cp4  cp4  cp4
+=========  ==== ==== ==== ==== ==== ==== ==== ====   === === === === ====
+
+This figure represents a sector of 256 bytes.
+cp is my abbreviation for column parity, rp for row parity.
+
+Let's start to explain column parity.
+
+- cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
+
+  so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even.
+
+Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7.
+
+- cp2 is the parity over bit0, bit1, bit4 and bit5
+- cp3 is the parity over bit2, bit3, bit6 and bit7.
+- cp4 is the parity over bit0, bit1, bit2 and bit3.
+- cp5 is the parity over bit4, bit5, bit6 and bit7.
+
+Note that each of cp0 .. cp5 is exactly one bit.
+
+Row parity actually works almost the same.
+
+- rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254)
+- rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255)
+- rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ...
+  (so handle two bytes, then skip 2 bytes).
+- rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...)
+- for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc.
+
+  so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...)
+- and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, ..
+
+The story now becomes quite boring. I guess you get the idea.
+
+- rp6 covers 8 bytes then skips 8 etc
+- rp7 skips 8 bytes then covers 8 etc
+- rp8 covers 16 bytes then skips 16 etc
+- rp9 skips 16 bytes then covers 16 etc
+- rp10 covers 32 bytes then skips 32 etc
+- rp11 skips 32 bytes then covers 32 etc
+- rp12 covers 64 bytes then skips 64 etc
+- rp13 skips 64 bytes then covers 64 etc
+- rp14 covers 128 bytes then skips 128
+- rp15 skips 128 bytes then covers 128
+
+In the end the parity bits are grouped together in three bytes as
+follows:
+
+=====  ===== ===== ===== ===== ===== ===== ===== =====
+ECC    Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0
+=====  ===== ===== ===== ===== ===== ===== ===== =====
+ECC 0   rp07  rp06  rp05  rp04  rp03  rp02  rp01  rp00
+ECC 1   rp15  rp14  rp13  rp12  rp11  rp10  rp09  rp08
+ECC 2   cp5   cp4   cp3   cp2   cp1   cp0      1     1
+=====  ===== ===== ===== ===== ===== ===== ===== =====
+
+I detected after writing this that ST application note AN1823
+(http://www.st.com/stonline/) gives a much
+nicer picture.(but they use line parity as term where I use row parity)
+Oh well, I'm graphically challenged, so suffer with me for a moment :-)
+
+And I could not reuse the ST picture anyway for copyright reasons.
+
+
+Attempt 0
+=========
+
+Implementing the parity calculation is pretty simple.
+In C pseudocode::
+
+  for (i = 0; i < 256; i++)
+  {
+    if (i & 0x01)
+       rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+    else
+       rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp0;
+    if (i & 0x02)
+       rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
+    else
+       rp2 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp2;
+    if (i & 0x04)
+      rp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp5;
+    else
+      rp4 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp4;
+    if (i & 0x08)
+      rp7 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp7;
+    else
+      rp6 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp6;
+    if (i & 0x10)
+      rp9 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp9;
+    else
+      rp8 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp8;
+    if (i & 0x20)
+      rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
+    else
+      rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
+    if (i & 0x40)
+      rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
+    else
+      rp12 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp12;
+    if (i & 0x80)
+      rp15 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp15;
+    else
+      rp14 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp14;
+    cp0 = bit6 ^ bit4 ^ bit2 ^ bit0 ^ cp0;
+    cp1 = bit7 ^ bit5 ^ bit3 ^ bit1 ^ cp1;
+    cp2 = bit5 ^ bit4 ^ bit1 ^ bit0 ^ cp2;
+    cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3
+    cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4
+    cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5
+  }
+
+
+Analysis 0
+==========
+
+C does have bitwise operators but not really operators to do the above
+efficiently (and most hardware has no such instructions either).
+Therefore without implementing this it was clear that the code above was
+not going to bring me a Nobel prize :-)
+
+Fortunately the exclusive or operation is commutative, so we can combine
+the values in any order. So instead of calculating all the bits
+individually, let us try to rearrange things.
+For the column parity this is easy. We can just xor the bytes and in the
+end filter out the relevant bits. This is pretty nice as it will bring
+all cp calculation out of the for loop.
+
+Similarly we can first xor the bytes for the various rows.
+This leads to:
+
+
+Attempt 1
+=========
+
+::
+
+  const char parity[256] = {
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
+  };
+
+  void ecc1(const unsigned char *buf, unsigned char *code)
+  {
+      int i;
+      const unsigned char *bp = buf;
+      unsigned char cur;
+      unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+      unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+      unsigned char par;
+
+      par = 0;
+      rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+      rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+      rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+      rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+      for (i = 0; i < 256; i++)
+      {
+          cur = *bp++;
+          par ^= cur;
+          if (i & 0x01) rp1 ^= cur; else rp0 ^= cur;
+          if (i & 0x02) rp3 ^= cur; else rp2 ^= cur;
+          if (i & 0x04) rp5 ^= cur; else rp4 ^= cur;
+          if (i & 0x08) rp7 ^= cur; else rp6 ^= cur;
+          if (i & 0x10) rp9 ^= cur; else rp8 ^= cur;
+          if (i & 0x20) rp11 ^= cur; else rp10 ^= cur;
+          if (i & 0x40) rp13 ^= cur; else rp12 ^= cur;
+          if (i & 0x80) rp15 ^= cur; else rp14 ^= cur;
+      }
+      code[0] =
+          (parity[rp7] << 7) |
+          (parity[rp6] << 6) |
+          (parity[rp5] << 5) |
+          (parity[rp4] << 4) |
+          (parity[rp3] << 3) |
+          (parity[rp2] << 2) |
+          (parity[rp1] << 1) |
+          (parity[rp0]);
+      code[1] =
+          (parity[rp15] << 7) |
+          (parity[rp14] << 6) |
+          (parity[rp13] << 5) |
+          (parity[rp12] << 4) |
+          (parity[rp11] << 3) |
+          (parity[rp10] << 2) |
+          (parity[rp9]  << 1) |
+          (parity[rp8]);
+      code[2] =
+          (parity[par & 0xf0] << 7) |
+          (parity[par & 0x0f] << 6) |
+          (parity[par & 0xcc] << 5) |
+          (parity[par & 0x33] << 4) |
+          (parity[par & 0xaa] << 3) |
+          (parity[par & 0x55] << 2);
+      code[0] = ~code[0];
+      code[1] = ~code[1];
+      code[2] = ~code[2];
+  }
+
+Still pretty straightforward. The last three invert statements are there to
+give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash
+all data is 0xff, so the checksum then matches.
+
+I also introduced the parity lookup. I expected this to be the fastest
+way to calculate the parity, but I will investigate alternatives later
+on.
+
+
+Analysis 1
+==========
+
+The code works, but is not terribly efficient. On my system it took
+almost 4 times as much time as the linux driver code. But hey, if it was
+*that* easy this would have been done long before.
+No pain. no gain.
+
+Fortunately there is plenty of room for improvement.
+
+In step 1 we moved from bit-wise calculation to byte-wise calculation.
+However in C we can also use the unsigned long data type and virtually
+every modern microprocessor supports 32 bit operations, so why not try
+to write our code in such a way that we process data in 32 bit chunks.
+
+Of course this means some modification as the row parity is byte by
+byte. A quick analysis:
+for the column parity we use the par variable. When extending to 32 bits
+we can in the end easily calculate rp0 and rp1 from it.
+(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
+respectively, from MSB to LSB)
+also rp2 and rp3 can be easily retrieved from par as rp3 covers the
+first two MSBs and rp2 covers the last two LSBs.
+
+Note that of course now the loop is executed only 64 times (256/4).
+And note that care must taken wrt byte ordering. The way bytes are
+ordered in a long is machine dependent, and might affect us.
+Anyway, if there is an issue: this code is developed on x86 (to be
+precise: a DELL PC with a D920 Intel CPU)
+
+And of course the performance might depend on alignment, but I expect
+that the I/O buffers in the nand driver are aligned properly (and
+otherwise that should be fixed to get maximum performance).
+
+Let's give it a try...
+
+
+Attempt 2
+=========
+
+::
+
+  extern const char parity[256];
+
+  void ecc2(const unsigned char *buf, unsigned char *code)
+  {
+      int i;
+      const unsigned long *bp = (unsigned long *)buf;
+      unsigned long cur;
+      unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+      unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+      unsigned long par;
+
+      par = 0;
+      rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+      rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+      rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+      rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+      for (i = 0; i < 64; i++)
+      {
+          cur = *bp++;
+          par ^= cur;
+          if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+          if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+          if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+          if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+          if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+          if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+      }
+      /*
+         we need to adapt the code generation for the fact that rp vars are now
+         long; also the column parity calculation needs to be changed.
+         we'll bring rp4 to 15 back to single byte entities by shifting and
+         xoring
+      */
+      rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff;
+      rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff;
+      rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff;
+      rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff;
+      rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff;
+      rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff;
+      rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff;
+      rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff;
+      rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff;
+      rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff;
+      rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff;
+      rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff;
+      rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff;
+      rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff;
+      par ^= (par >> 16);
+      rp1 = (par >> 8); rp1 &= 0xff;
+      rp0 = (par & 0xff);
+      par ^= (par >> 8); par &= 0xff;
+
+      code[0] =
+          (parity[rp7] << 7) |
+          (parity[rp6] << 6) |
+          (parity[rp5] << 5) |
+          (parity[rp4] << 4) |
+          (parity[rp3] << 3) |
+          (parity[rp2] << 2) |
+          (parity[rp1] << 1) |
+          (parity[rp0]);
+      code[1] =
+          (parity[rp15] << 7) |
+          (parity[rp14] << 6) |
+          (parity[rp13] << 5) |
+          (parity[rp12] << 4) |
+          (parity[rp11] << 3) |
+          (parity[rp10] << 2) |
+          (parity[rp9]  << 1) |
+          (parity[rp8]);
+      code[2] =
+          (parity[par & 0xf0] << 7) |
+          (parity[par & 0x0f] << 6) |
+          (parity[par & 0xcc] << 5) |
+          (parity[par & 0x33] << 4) |
+          (parity[par & 0xaa] << 3) |
+          (parity[par & 0x55] << 2);
+      code[0] = ~code[0];
+      code[1] = ~code[1];
+      code[2] = ~code[2];
+  }
+
+The parity array is not shown any more. Note also that for these
+examples I kinda deviated from my regular programming style by allowing
+multiple statements on a line, not using { } in then and else blocks
+with only a single statement and by using operators like ^=
+
+
+Analysis 2
+==========
+
+The code (of course) works, and hurray: we are a little bit faster than
+the linux driver code (about 15%). But wait, don't cheer too quickly.
+There is more to be gained.
+If we look at e.g. rp14 and rp15 we see that we either xor our data with
+rp14 or with rp15. However we also have par which goes over all data.
+This means there is no need to calculate rp14 as it can be calculated from
+rp15 through rp14 = par ^ rp15, because par = rp14 ^ rp15;
+(or if desired we can avoid calculating rp15 and calculate it from
+rp14).  That is why some places refer to inverse parity.
+Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
+Effectively this means we can eliminate the else clause from the if
+statements. Also we can optimise the calculation in the end a little bit
+by going from long to byte first. Actually we can even avoid the table
+lookups
+
+Attempt 3
+=========
+
+Odd replaced::
+
+          if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+          if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+          if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+          if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+          if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+          if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+
+with::
+
+          if (i & 0x01) rp5 ^= cur;
+          if (i & 0x02) rp7 ^= cur;
+          if (i & 0x04) rp9 ^= cur;
+          if (i & 0x08) rp11 ^= cur;
+          if (i & 0x10) rp13 ^= cur;
+          if (i & 0x20) rp15 ^= cur;
+
+and outside the loop added::
+
+          rp4  = par ^ rp5;
+          rp6  = par ^ rp7;
+          rp8  = par ^ rp9;
+          rp10  = par ^ rp11;
+          rp12  = par ^ rp13;
+          rp14  = par ^ rp15;
+
+And after that the code takes about 30% more time, although the number of
+statements is reduced. This is also reflected in the assembly code.
+
+
+Analysis 3
+==========
+
+Very weird. Guess it has to do with caching or instruction parallellism
+or so. I also tried on an eeePC (Celeron, clocked at 900 Mhz). Interesting
+observation was that this one is only 30% slower (according to time)
+executing the code as my 3Ghz D920 processor.
+
+Well, it was expected not to be easy so maybe instead move to a
+different track: let's move back to the code from attempt2 and do some
+loop unrolling. This will eliminate a few if statements. I'll try
+different amounts of unrolling to see what works best.
+
+
+Attempt 4
+=========
+
+Unrolled the loop 1, 2, 3 and 4 times.
+For 4 the code starts with::
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++;
+        par ^= cur;
+        rp4 ^= cur;
+        rp6 ^= cur;
+        rp8 ^= cur;
+        rp10 ^= cur;
+        if (i & 0x1) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x2) rp15 ^= cur; else rp14 ^= cur;
+        cur = *bp++;
+        par ^= cur;
+        rp5 ^= cur;
+        rp6 ^= cur;
+        ...
+
+
+Analysis 4
+==========
+
+Unrolling once gains about 15%
+
+Unrolling twice keeps the gain at about 15%
+
+Unrolling three times gives a gain of 30% compared to attempt 2.
+
+Unrolling four times gives a marginal improvement compared to unrolling
+three times.
+
+I decided to proceed with a four time unrolled loop anyway. It was my gut
+feeling that in the next steps I would obtain additional gain from it.
+
+The next step was triggered by the fact that par contains the xor of all
+bytes and rp4 and rp5 each contain the xor of half of the bytes.
+So in effect par = rp4 ^ rp5. But as xor is commutative we can also say
+that rp5 = par ^ rp4. So no need to keep both rp4 and rp5 around. We can
+eliminate rp5 (or rp4, but I already foresaw another optimisation).
+The same holds for rp6/7, rp8/9, rp10/11 rp12/13 and rp14/15.
+
+
+Attempt 5
+=========
+
+Effectively so all odd digit rp assignments in the loop were removed.
+This included the else clause of the if statements.
+Of course after the loop we need to correct things by adding code like::
+
+    rp5 = par ^ rp4;
+
+Also the initial assignments (rp5 = 0; etc) could be removed.
+Along the line I also removed the initialisation of rp0/1/2/3.
+
+
+Analysis 5
+==========
+
+Measurements showed this was a good move. The run-time roughly halved
+compared with attempt 4 with 4 times unrolled, and we only require 1/3rd
+of the processor time compared to the current code in the linux kernel.
+
+However, still I thought there was more. I didn't like all the if
+statements. Why not keep a running parity and only keep the last if
+statement. Time for yet another version!
+
+
+Attempt 6
+=========
+
+THe code within the for loop was changed to::
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++; tmppar  = cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= cur;
+
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+
+        par ^= tmppar;
+        if ((i & 0x1) == 0) rp12 ^= tmppar;
+        if ((i & 0x2) == 0) rp14 ^= tmppar;
+    }
+
+As you can see tmppar is used to accumulate the parity within a for
+iteration. In the last 3 statements is added to par and, if needed,
+to rp12 and rp14.
+
+While making the changes I also found that I could exploit that tmppar
+contains the running parity for this iteration. So instead of having:
+rp4 ^= cur; rp6 ^= cur;
+I removed the rp6 ^= cur; statement and did rp6 ^= tmppar; on next
+statement. A similar change was done for rp8 and rp10
+
+
+Analysis 6
+==========
+
+Measuring this code again showed big gain. When executing the original
+linux code 1 million times, this took about 1 second on my system.
+(using time to measure the performance). After this iteration I was back
+to 0.075 sec. Actually I had to decide to start measuring over 10
+million iterations in order not to lose too much accuracy. This one
+definitely seemed to be the jackpot!
+
+There is a little bit more room for improvement though. There are three
+places with statements::
+
+	rp4 ^= cur; rp6 ^= cur;
+
+It seems more efficient to also maintain a variable rp4_6 in the while
+loop; This eliminates 3 statements per loop. Of course after the loop we
+need to correct by adding::
+
+	rp4 ^= rp4_6;
+	rp6 ^= rp4_6
+
+Furthermore there are 4 sequential assignments to rp8. This can be
+encoded slightly more efficiently by saving tmppar before those 4 lines
+and later do rp8 = rp8 ^ tmppar ^ notrp8;
+(where notrp8 is the value of rp8 before those 4 lines).
+Again a use of the commutative property of xor.
+Time for a new test!
+
+
+Attempt 7
+=========
+
+The new code now looks like::
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++; tmppar  = cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+        notrp8 = tmppar;
+        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+        rp8 = rp8 ^ tmppar ^ notrp8;
+
+        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+
+        par ^= tmppar;
+        if ((i & 0x1) == 0) rp12 ^= tmppar;
+        if ((i & 0x2) == 0) rp14 ^= tmppar;
+    }
+    rp4 ^= rp4_6;
+    rp6 ^= rp4_6;
+
+
+Not a big change, but every penny counts :-)
+
+
+Analysis 7
+==========
+
+Actually this made things worse. Not very much, but I don't want to move
+into the wrong direction. Maybe something to investigate later. Could
+have to do with caching again.
+
+Guess that is what there is to win within the loop. Maybe unrolling one
+more time will help. I'll keep the optimisations from 7 for now.
+
+
+Attempt 8
+=========
+
+Unrolled the loop one more time.
+
+
+Analysis 8
+==========
+
+This makes things worse. Let's stick with attempt 6 and continue from there.
+Although it seems that the code within the loop cannot be optimised
+further there is still room to optimize the generation of the ecc codes.
+We can simply calculate the total parity. If this is 0 then rp4 = rp5
+etc. If the parity is 1, then rp4 = !rp5;
+
+But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
+in the result byte and then do something like::
+
+    code[0] |= (code[0] << 1);
+
+Lets test this.
+
+
+Attempt 9
+=========
+
+Changed the code but again this slightly degrades performance. Tried all
+kind of other things, like having dedicated parity arrays to avoid the
+shift after parity[rp7] << 7; No gain.
+Change the lookup using the parity array by using shift operators (e.g.
+replace parity[rp7] << 7 with::
+
+	rp7 ^= (rp7 << 4);
+	rp7 ^= (rp7 << 2);
+	rp7 ^= (rp7 << 1);
+	rp7 &= 0x80;
+
+No gain.
+
+The only marginal change was inverting the parity bits, so we can remove
+the last three invert statements.
+
+Ah well, pity this does not deliver more. Then again 10 million
+iterations using the linux driver code takes between 13 and 13.5
+seconds, whereas my code now takes about 0.73 seconds for those 10
+million iterations. So basically I've improved the performance by a
+factor 18 on my system. Not that bad. Of course on different hardware
+you will get different results. No warranties!
+
+But of course there is no such thing as a free lunch. The codesize almost
+tripled (from 562 bytes to 1434 bytes). Then again, it is not that much.
+
+
+Correcting errors
+=================
+
+For correcting errors I again used the ST application note as a starter,
+but I also peeked at the existing code.
+
+The algorithm itself is pretty straightforward. Just xor the given and
+the calculated ecc. If all bytes are 0 there is no problem. If 11 bits
+are 1 we have one correctable bit error. If there is 1 bit 1, we have an
+error in the given ecc code.
+
+It proved to be fastest to do some table lookups. Performance gain
+introduced by this is about a factor 2 on my system when a repair had to
+be done, and 1% or so if no repair had to be done.
+
+Code size increased from 330 bytes to 686 bytes for this function.
+(gcc 4.2, -O3)
+
+
+Conclusion
+==========
+
+The gain when calculating the ecc is tremendous. Om my development hardware
+a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
+embedded system with a MIPS core a factor 7 was obtained.
+
+On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
+5 (big endian mode, gcc 4.1.2, -O3)
+
+For correction not much gain could be obtained (as bitflips are rare). Then
+again there are also much less cycles spent there.
+
+It seems there is not much more gain possible in this, at least when
+programmed in C. Of course it might be possible to squeeze something more
+out of it with an assembler program, but due to pipeline behaviour etc
+this is very tricky (at least for intel hw).
+
+Author: Frans Meulenbroeks
+
+Copyright (C) 2008 Koninklijke Philips Electronics NV.
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt
deleted file mode 100644
index f8c3284bf6a7..000000000000
--- a/Documentation/mtd/nand_ecc.txt
+++ /dev/null
@@ -1,714 +0,0 @@
-Introduction
-============
-
-Having looked at the linux mtd/nand driver and more specific at nand_ecc.c
-I felt there was room for optimisation. I bashed the code for a few hours
-performing tricks like table lookup removing superfluous code etc.
-After that the speed was increased by 35-40%.
-Still I was not too happy as I felt there was additional room for improvement.
-
-Bad! I was hooked.
-I decided to annotate my steps in this file. Perhaps it is useful to someone
-or someone learns something from it.
-
-
-The problem
-===========
-
-NAND flash (at least SLC one) typically has sectors of 256 bytes.
-However NAND flash is not extremely reliable so some error detection
-(and sometimes correction) is needed.
-
-This is done by means of a Hamming code. I'll try to explain it in
-laymans terms (and apologies to all the pro's in the field in case I do
-not use the right terminology, my coding theory class was almost 30
-years ago, and I must admit it was not one of my favourites).
-
-As I said before the ecc calculation is performed on sectors of 256
-bytes. This is done by calculating several parity bits over the rows and
-columns. The parity used is even parity which means that the parity bit = 1
-if the data over which the parity is calculated is 1 and the parity bit = 0
-if the data over which the parity is calculated is 0. So the total
-number of bits over the data over which the parity is calculated + the
-parity bit is even. (see wikipedia if you can't follow this).
-Parity is often calculated by means of an exclusive or operation,
-sometimes also referred to as xor. In C the operator for xor is ^
-
-Back to ecc.
-Let's give a small figure:
-
-byte   0:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp4 ... rp14
-byte   1:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp2 rp4 ... rp14
-byte   2:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp4 ... rp14
-byte   3:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp4 ... rp14
-byte   4:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp5 ... rp14
-....
-byte 254:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp5 ... rp15
-byte 255:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp5 ... rp15
-           cp1  cp0  cp1  cp0  cp1  cp0  cp1  cp0
-           cp3  cp3  cp2  cp2  cp3  cp3  cp2  cp2
-           cp5  cp5  cp5  cp5  cp4  cp4  cp4  cp4
-
-This figure represents a sector of 256 bytes.
-cp is my abbreviation for column parity, rp for row parity.
-
-Let's start to explain column parity.
-cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
-so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even.
-Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7.
-cp2 is the parity over bit0, bit1, bit4 and bit5
-cp3 is the parity over bit2, bit3, bit6 and bit7.
-cp4 is the parity over bit0, bit1, bit2 and bit3.
-cp5 is the parity over bit4, bit5, bit6 and bit7.
-Note that each of cp0 .. cp5 is exactly one bit.
-
-Row parity actually works almost the same.
-rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254)
-rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255)
-rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ...
-(so handle two bytes, then skip 2 bytes).
-rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...)
-for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc.
-so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...)
-and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, ..
-The story now becomes quite boring. I guess you get the idea.
-rp6 covers 8 bytes then skips 8 etc
-rp7 skips 8 bytes then covers 8 etc
-rp8 covers 16 bytes then skips 16 etc
-rp9 skips 16 bytes then covers 16 etc
-rp10 covers 32 bytes then skips 32 etc
-rp11 skips 32 bytes then covers 32 etc
-rp12 covers 64 bytes then skips 64 etc
-rp13 skips 64 bytes then covers 64 etc
-rp14 covers 128 bytes then skips 128
-rp15 skips 128 bytes then covers 128
-
-In the end the parity bits are grouped together in three bytes as
-follows:
-ECC    Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0
-ECC 0   rp07  rp06  rp05  rp04  rp03  rp02  rp01  rp00
-ECC 1   rp15  rp14  rp13  rp12  rp11  rp10  rp09  rp08
-ECC 2   cp5   cp4   cp3   cp2   cp1   cp0      1     1
-
-I detected after writing this that ST application note AN1823
-(http://www.st.com/stonline/) gives a much
-nicer picture.(but they use line parity as term where I use row parity)
-Oh well, I'm graphically challenged, so suffer with me for a moment :-)
-And I could not reuse the ST picture anyway for copyright reasons.
-
-
-Attempt 0
-=========
-
-Implementing the parity calculation is pretty simple.
-In C pseudocode:
-for (i = 0; i < 256; i++)
-{
-    if (i & 0x01)
-       rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
-    else
-       rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp0;
-    if (i & 0x02)
-       rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
-    else
-       rp2 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp2;
-    if (i & 0x04)
-      rp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp5;
-    else
-      rp4 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp4;
-    if (i & 0x08)
-      rp7 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp7;
-    else
-      rp6 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp6;
-    if (i & 0x10)
-      rp9 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp9;
-    else
-      rp8 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp8;
-    if (i & 0x20)
-      rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
-    else
-      rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
-    if (i & 0x40)
-      rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
-    else
-      rp12 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp12;
-    if (i & 0x80)
-      rp15 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp15;
-    else
-      rp14 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp14;
-    cp0 = bit6 ^ bit4 ^ bit2 ^ bit0 ^ cp0;
-    cp1 = bit7 ^ bit5 ^ bit3 ^ bit1 ^ cp1;
-    cp2 = bit5 ^ bit4 ^ bit1 ^ bit0 ^ cp2;
-    cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3
-    cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4
-    cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5
-}
-
-
-Analysis 0
-==========
-
-C does have bitwise operators but not really operators to do the above
-efficiently (and most hardware has no such instructions either).
-Therefore without implementing this it was clear that the code above was
-not going to bring me a Nobel prize :-)
-
-Fortunately the exclusive or operation is commutative, so we can combine
-the values in any order. So instead of calculating all the bits
-individually, let us try to rearrange things.
-For the column parity this is easy. We can just xor the bytes and in the
-end filter out the relevant bits. This is pretty nice as it will bring
-all cp calculation out of the for loop.
-
-Similarly we can first xor the bytes for the various rows.
-This leads to:
-
-
-Attempt 1
-=========
-
-const char parity[256] = {
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
-};
-
-void ecc1(const unsigned char *buf, unsigned char *code)
-{
-    int i;
-    const unsigned char *bp = buf;
-    unsigned char cur;
-    unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
-    unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
-    unsigned char par;
-
-    par = 0;
-    rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
-    rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
-    rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
-    rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
-
-    for (i = 0; i < 256; i++)
-    {
-        cur = *bp++;
-        par ^= cur;
-        if (i & 0x01) rp1 ^= cur; else rp0 ^= cur;
-        if (i & 0x02) rp3 ^= cur; else rp2 ^= cur;
-        if (i & 0x04) rp5 ^= cur; else rp4 ^= cur;
-        if (i & 0x08) rp7 ^= cur; else rp6 ^= cur;
-        if (i & 0x10) rp9 ^= cur; else rp8 ^= cur;
-        if (i & 0x20) rp11 ^= cur; else rp10 ^= cur;
-        if (i & 0x40) rp13 ^= cur; else rp12 ^= cur;
-        if (i & 0x80) rp15 ^= cur; else rp14 ^= cur;
-    }
-    code[0] =
-        (parity[rp7] << 7) |
-        (parity[rp6] << 6) |
-        (parity[rp5] << 5) |
-        (parity[rp4] << 4) |
-        (parity[rp3] << 3) |
-        (parity[rp2] << 2) |
-        (parity[rp1] << 1) |
-        (parity[rp0]);
-    code[1] =
-        (parity[rp15] << 7) |
-        (parity[rp14] << 6) |
-        (parity[rp13] << 5) |
-        (parity[rp12] << 4) |
-        (parity[rp11] << 3) |
-        (parity[rp10] << 2) |
-        (parity[rp9]  << 1) |
-        (parity[rp8]);
-    code[2] =
-        (parity[par & 0xf0] << 7) |
-        (parity[par & 0x0f] << 6) |
-        (parity[par & 0xcc] << 5) |
-        (parity[par & 0x33] << 4) |
-        (parity[par & 0xaa] << 3) |
-        (parity[par & 0x55] << 2);
-    code[0] = ~code[0];
-    code[1] = ~code[1];
-    code[2] = ~code[2];
-}
-
-Still pretty straightforward. The last three invert statements are there to
-give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash
-all data is 0xff, so the checksum then matches.
-
-I also introduced the parity lookup. I expected this to be the fastest
-way to calculate the parity, but I will investigate alternatives later
-on.
-
-
-Analysis 1
-==========
-
-The code works, but is not terribly efficient. On my system it took
-almost 4 times as much time as the linux driver code. But hey, if it was
-*that* easy this would have been done long before.
-No pain. no gain.
-
-Fortunately there is plenty of room for improvement.
-
-In step 1 we moved from bit-wise calculation to byte-wise calculation.
-However in C we can also use the unsigned long data type and virtually
-every modern microprocessor supports 32 bit operations, so why not try
-to write our code in such a way that we process data in 32 bit chunks.
-
-Of course this means some modification as the row parity is byte by
-byte. A quick analysis:
-for the column parity we use the par variable. When extending to 32 bits
-we can in the end easily calculate rp0 and rp1 from it.
-(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
-respectively, from MSB to LSB)
-also rp2 and rp3 can be easily retrieved from par as rp3 covers the
-first two MSBs and rp2 covers the last two LSBs.
-
-Note that of course now the loop is executed only 64 times (256/4).
-And note that care must taken wrt byte ordering. The way bytes are
-ordered in a long is machine dependent, and might affect us.
-Anyway, if there is an issue: this code is developed on x86 (to be
-precise: a DELL PC with a D920 Intel CPU)
-
-And of course the performance might depend on alignment, but I expect
-that the I/O buffers in the nand driver are aligned properly (and
-otherwise that should be fixed to get maximum performance).
-
-Let's give it a try...
-
-
-Attempt 2
-=========
-
-extern const char parity[256];
-
-void ecc2(const unsigned char *buf, unsigned char *code)
-{
-    int i;
-    const unsigned long *bp = (unsigned long *)buf;
-    unsigned long cur;
-    unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
-    unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
-    unsigned long par;
-
-    par = 0;
-    rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
-    rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
-    rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
-    rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
-
-    for (i = 0; i < 64; i++)
-    {
-        cur = *bp++;
-        par ^= cur;
-        if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
-        if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
-        if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
-        if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
-        if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
-        if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
-    }
-    /*
-       we need to adapt the code generation for the fact that rp vars are now
-       long; also the column parity calculation needs to be changed.
-       we'll bring rp4 to 15 back to single byte entities by shifting and
-       xoring
-    */
-    rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff;
-    rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff;
-    rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff;
-    rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff;
-    rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff;
-    rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff;
-    rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff;
-    rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff;
-    rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff;
-    rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff;
-    rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff;
-    rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff;
-    rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff;
-    rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff;
-    par ^= (par >> 16);
-    rp1 = (par >> 8); rp1 &= 0xff;
-    rp0 = (par & 0xff);
-    par ^= (par >> 8); par &= 0xff;
-
-    code[0] =
-        (parity[rp7] << 7) |
-        (parity[rp6] << 6) |
-        (parity[rp5] << 5) |
-        (parity[rp4] << 4) |
-        (parity[rp3] << 3) |
-        (parity[rp2] << 2) |
-        (parity[rp1] << 1) |
-        (parity[rp0]);
-    code[1] =
-        (parity[rp15] << 7) |
-        (parity[rp14] << 6) |
-        (parity[rp13] << 5) |
-        (parity[rp12] << 4) |
-        (parity[rp11] << 3) |
-        (parity[rp10] << 2) |
-        (parity[rp9]  << 1) |
-        (parity[rp8]);
-    code[2] =
-        (parity[par & 0xf0] << 7) |
-        (parity[par & 0x0f] << 6) |
-        (parity[par & 0xcc] << 5) |
-        (parity[par & 0x33] << 4) |
-        (parity[par & 0xaa] << 3) |
-        (parity[par & 0x55] << 2);
-    code[0] = ~code[0];
-    code[1] = ~code[1];
-    code[2] = ~code[2];
-}
-
-The parity array is not shown any more. Note also that for these
-examples I kinda deviated from my regular programming style by allowing
-multiple statements on a line, not using { } in then and else blocks
-with only a single statement and by using operators like ^=
-
-
-Analysis 2
-==========
-
-The code (of course) works, and hurray: we are a little bit faster than
-the linux driver code (about 15%). But wait, don't cheer too quickly.
-There is more to be gained.
-If we look at e.g. rp14 and rp15 we see that we either xor our data with
-rp14 or with rp15. However we also have par which goes over all data.
-This means there is no need to calculate rp14 as it can be calculated from
-rp15 through rp14 = par ^ rp15, because par = rp14 ^ rp15;
-(or if desired we can avoid calculating rp15 and calculate it from
-rp14).  That is why some places refer to inverse parity.
-Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
-Effectively this means we can eliminate the else clause from the if
-statements. Also we can optimise the calculation in the end a little bit
-by going from long to byte first. Actually we can even avoid the table
-lookups
-
-Attempt 3
-=========
-
-Odd replaced:
-        if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
-        if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
-        if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
-        if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
-        if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
-        if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
-with
-        if (i & 0x01) rp5 ^= cur;
-        if (i & 0x02) rp7 ^= cur;
-        if (i & 0x04) rp9 ^= cur;
-        if (i & 0x08) rp11 ^= cur;
-        if (i & 0x10) rp13 ^= cur;
-        if (i & 0x20) rp15 ^= cur;
-
-        and outside the loop added:
-        rp4  = par ^ rp5;
-        rp6  = par ^ rp7;
-        rp8  = par ^ rp9;
-        rp10  = par ^ rp11;
-        rp12  = par ^ rp13;
-        rp14  = par ^ rp15;
-
-And after that the code takes about 30% more time, although the number of
-statements is reduced. This is also reflected in the assembly code.
-
-
-Analysis 3
-==========
-
-Very weird. Guess it has to do with caching or instruction parallellism
-or so. I also tried on an eeePC (Celeron, clocked at 900 Mhz). Interesting
-observation was that this one is only 30% slower (according to time)
-executing the code as my 3Ghz D920 processor.
-
-Well, it was expected not to be easy so maybe instead move to a
-different track: let's move back to the code from attempt2 and do some
-loop unrolling. This will eliminate a few if statements. I'll try
-different amounts of unrolling to see what works best.
-
-
-Attempt 4
-=========
-
-Unrolled the loop 1, 2, 3 and 4 times.
-For 4 the code starts with:
-
-    for (i = 0; i < 4; i++)
-    {
-        cur = *bp++;
-        par ^= cur;
-        rp4 ^= cur;
-        rp6 ^= cur;
-        rp8 ^= cur;
-        rp10 ^= cur;
-        if (i & 0x1) rp13 ^= cur; else rp12 ^= cur;
-        if (i & 0x2) rp15 ^= cur; else rp14 ^= cur;
-        cur = *bp++;
-        par ^= cur;
-        rp5 ^= cur;
-        rp6 ^= cur;
-        ...
-
-
-Analysis 4
-==========
-
-Unrolling once gains about 15%
-Unrolling twice keeps the gain at about 15%
-Unrolling three times gives a gain of 30% compared to attempt 2.
-Unrolling four times gives a marginal improvement compared to unrolling
-three times.
-
-I decided to proceed with a four time unrolled loop anyway. It was my gut
-feeling that in the next steps I would obtain additional gain from it.
-
-The next step was triggered by the fact that par contains the xor of all
-bytes and rp4 and rp5 each contain the xor of half of the bytes.
-So in effect par = rp4 ^ rp5. But as xor is commutative we can also say
-that rp5 = par ^ rp4. So no need to keep both rp4 and rp5 around. We can
-eliminate rp5 (or rp4, but I already foresaw another optimisation).
-The same holds for rp6/7, rp8/9, rp10/11 rp12/13 and rp14/15.
-
-
-Attempt 5
-=========
-
-Effectively so all odd digit rp assignments in the loop were removed.
-This included the else clause of the if statements.
-Of course after the loop we need to correct things by adding code like:
-    rp5 = par ^ rp4;
-Also the initial assignments (rp5 = 0; etc) could be removed.
-Along the line I also removed the initialisation of rp0/1/2/3.
-
-
-Analysis 5
-==========
-
-Measurements showed this was a good move. The run-time roughly halved
-compared with attempt 4 with 4 times unrolled, and we only require 1/3rd
-of the processor time compared to the current code in the linux kernel.
-
-However, still I thought there was more. I didn't like all the if
-statements. Why not keep a running parity and only keep the last if
-statement. Time for yet another version!
-
-
-Attempt 6
-=========
-
-THe code within the for loop was changed to:
-
-    for (i = 0; i < 4; i++)
-    {
-        cur = *bp++; tmppar  = cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
-
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
-
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp8 ^= cur;
-
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur;
-
-        par ^= tmppar;
-        if ((i & 0x1) == 0) rp12 ^= tmppar;
-        if ((i & 0x2) == 0) rp14 ^= tmppar;
-    }
-
-As you can see tmppar is used to accumulate the parity within a for
-iteration. In the last 3 statements is added to par and, if needed,
-to rp12 and rp14.
-
-While making the changes I also found that I could exploit that tmppar
-contains the running parity for this iteration. So instead of having:
-rp4 ^= cur; rp6 ^= cur;
-I removed the rp6 ^= cur; statement and did rp6 ^= tmppar; on next
-statement. A similar change was done for rp8 and rp10
-
-
-Analysis 6
-==========
-
-Measuring this code again showed big gain. When executing the original
-linux code 1 million times, this took about 1 second on my system.
-(using time to measure the performance). After this iteration I was back
-to 0.075 sec. Actually I had to decide to start measuring over 10
-million iterations in order not to lose too much accuracy. This one
-definitely seemed to be the jackpot!
-
-There is a little bit more room for improvement though. There are three
-places with statements:
-rp4 ^= cur; rp6 ^= cur;
-It seems more efficient to also maintain a variable rp4_6 in the while
-loop; This eliminates 3 statements per loop. Of course after the loop we
-need to correct by adding:
-    rp4 ^= rp4_6;
-    rp6 ^= rp4_6
-Furthermore there are 4 sequential assignments to rp8. This can be
-encoded slightly more efficiently by saving tmppar before those 4 lines
-and later do rp8 = rp8 ^ tmppar ^ notrp8;
-(where notrp8 is the value of rp8 before those 4 lines).
-Again a use of the commutative property of xor.
-Time for a new test!
-
-
-Attempt 7
-=========
-
-The new code now looks like:
-
-    for (i = 0; i < 4; i++)
-    {
-        cur = *bp++; tmppar  = cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
-
-        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
-
-        notrp8 = tmppar;
-        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur;
-        rp8 = rp8 ^ tmppar ^ notrp8;
-
-        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur;
-
-        par ^= tmppar;
-        if ((i & 0x1) == 0) rp12 ^= tmppar;
-        if ((i & 0x2) == 0) rp14 ^= tmppar;
-    }
-    rp4 ^= rp4_6;
-    rp6 ^= rp4_6;
-
-
-Not a big change, but every penny counts :-)
-
-
-Analysis 7
-==========
-
-Actually this made things worse. Not very much, but I don't want to move
-into the wrong direction. Maybe something to investigate later. Could
-have to do with caching again.
-
-Guess that is what there is to win within the loop. Maybe unrolling one
-more time will help. I'll keep the optimisations from 7 for now.
-
-
-Attempt 8
-=========
-
-Unrolled the loop one more time.
-
-
-Analysis 8
-==========
-
-This makes things worse. Let's stick with attempt 6 and continue from there.
-Although it seems that the code within the loop cannot be optimised
-further there is still room to optimize the generation of the ecc codes.
-We can simply calculate the total parity. If this is 0 then rp4 = rp5
-etc. If the parity is 1, then rp4 = !rp5;
-But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
-in the result byte and then do something like
-    code[0] |= (code[0] << 1);
-Lets test this.
-
-
-Attempt 9
-=========
-
-Changed the code but again this slightly degrades performance. Tried all
-kind of other things, like having dedicated parity arrays to avoid the
-shift after parity[rp7] << 7; No gain.
-Change the lookup using the parity array by using shift operators (e.g.
-replace parity[rp7] << 7 with:
-rp7 ^= (rp7 << 4);
-rp7 ^= (rp7 << 2);
-rp7 ^= (rp7 << 1);
-rp7 &= 0x80;
-No gain.
-
-The only marginal change was inverting the parity bits, so we can remove
-the last three invert statements.
-
-Ah well, pity this does not deliver more. Then again 10 million
-iterations using the linux driver code takes between 13 and 13.5
-seconds, whereas my code now takes about 0.73 seconds for those 10
-million iterations. So basically I've improved the performance by a
-factor 18 on my system. Not that bad. Of course on different hardware
-you will get different results. No warranties!
-
-But of course there is no such thing as a free lunch. The codesize almost
-tripled (from 562 bytes to 1434 bytes). Then again, it is not that much.
-
-
-Correcting errors
-=================
-
-For correcting errors I again used the ST application note as a starter,
-but I also peeked at the existing code.
-The algorithm itself is pretty straightforward. Just xor the given and
-the calculated ecc. If all bytes are 0 there is no problem. If 11 bits
-are 1 we have one correctable bit error. If there is 1 bit 1, we have an
-error in the given ecc code.
-It proved to be fastest to do some table lookups. Performance gain
-introduced by this is about a factor 2 on my system when a repair had to
-be done, and 1% or so if no repair had to be done.
-Code size increased from 330 bytes to 686 bytes for this function.
-(gcc 4.2, -O3)
-
-
-Conclusion
-==========
-
-The gain when calculating the ecc is tremendous. Om my development hardware
-a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
-embedded system with a MIPS core a factor 7 was obtained.
-On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
-5 (big endian mode, gcc 4.1.2, -O3)
-For correction not much gain could be obtained (as bitflips are rare). Then
-again there are also much less cycles spent there.
-
-It seems there is not much more gain possible in this, at least when
-programmed in C. Of course it might be possible to squeeze something more
-out of it with an assembler program, but due to pipeline behaviour etc
-this is very tricky (at least for intel hw).
-
-Author: Frans Meulenbroeks
-Copyright (C) 2008 Koninklijke Philips Electronics NV.
diff --git a/Documentation/mtd/spi-nor.rst b/Documentation/mtd/spi-nor.rst
new file mode 100644
index 000000000000..f5333e3bf486
--- /dev/null
+++ b/Documentation/mtd/spi-nor.rst
@@ -0,0 +1,66 @@
+=================
+SPI NOR framework
+=================
+
+Part I - Why do we need this framework?
+---------------------------------------
+
+SPI bus controllers (drivers/spi/) only deal with streams of bytes; the bus
+controller operates agnostic of the specific device attached. However, some
+controllers (such as Freescale's QuadSPI controller) cannot easily handle
+arbitrary streams of bytes, but rather are designed specifically for SPI NOR.
+
+In particular, Freescale's QuadSPI controller must know the NOR commands to
+find the right LUT sequence. Unfortunately, the SPI subsystem has no notion of
+opcodes, addresses, or data payloads; a SPI controller simply knows to send or
+receive bytes (Tx and Rx). Therefore, we must define a new layering scheme under
+which the controller driver is aware of the opcodes, addressing, and other
+details of the SPI NOR protocol.
+
+Part II - How does the framework work?
+--------------------------------------
+
+This framework just adds a new layer between the MTD and the SPI bus driver.
+With this new layer, the SPI NOR controller driver does not depend on the
+m25p80 code anymore.
+
+Before this framework, the layer is like::
+
+                   MTD
+         ------------------------
+                  m25p80
+         ------------------------
+	       SPI bus driver
+         ------------------------
+	        SPI NOR chip
+
+   After this framework, the layer is like:
+                   MTD
+         ------------------------
+              SPI NOR framework
+         ------------------------
+                  m25p80
+         ------------------------
+	       SPI bus driver
+         ------------------------
+	       SPI NOR chip
+
+  With the SPI NOR controller driver (Freescale QuadSPI), it looks like:
+                   MTD
+         ------------------------
+              SPI NOR framework
+         ------------------------
+                fsl-quadSPI
+         ------------------------
+	       SPI NOR chip
+
+Part III - How can drivers use the framework?
+---------------------------------------------
+
+The main API is spi_nor_scan(). Before you call the hook, a driver should
+initialize the necessary fields for spi_nor{}. Please see
+drivers/mtd/spi-nor/spi-nor.c for detail. Please also refer to fsl-quadspi.c
+when you want to write a new driver for a SPI NOR controller.
+Another API is spi_nor_restore(), this is used to restore the status of SPI
+flash chip such as addressing mode. Call it whenever detach the driver from
+device or reboot the system.
diff --git a/Documentation/mtd/spi-nor.txt b/Documentation/mtd/spi-nor.txt
deleted file mode 100644
index da1fbff5a24c..000000000000
--- a/Documentation/mtd/spi-nor.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-                          SPI NOR framework
-               ============================================
-
-Part I - Why do we need this framework?
----------------------------------------
-
-SPI bus controllers (drivers/spi/) only deal with streams of bytes; the bus
-controller operates agnostic of the specific device attached. However, some
-controllers (such as Freescale's QuadSPI controller) cannot easily handle
-arbitrary streams of bytes, but rather are designed specifically for SPI NOR.
-
-In particular, Freescale's QuadSPI controller must know the NOR commands to
-find the right LUT sequence. Unfortunately, the SPI subsystem has no notion of
-opcodes, addresses, or data payloads; a SPI controller simply knows to send or
-receive bytes (Tx and Rx). Therefore, we must define a new layering scheme under
-which the controller driver is aware of the opcodes, addressing, and other
-details of the SPI NOR protocol.
-
-Part II - How does the framework work?
---------------------------------------
-
-This framework just adds a new layer between the MTD and the SPI bus driver.
-With this new layer, the SPI NOR controller driver does not depend on the
-m25p80 code anymore.
-
-   Before this framework, the layer is like:
-
-                   MTD
-         ------------------------
-                  m25p80
-         ------------------------
-	       SPI bus driver
-         ------------------------
-	        SPI NOR chip
-
-   After this framework, the layer is like:
-                   MTD
-         ------------------------
-              SPI NOR framework
-         ------------------------
-                  m25p80
-         ------------------------
-	       SPI bus driver
-         ------------------------
-	       SPI NOR chip
-
-  With the SPI NOR controller driver (Freescale QuadSPI), it looks like:
-                   MTD
-         ------------------------
-              SPI NOR framework
-         ------------------------
-                fsl-quadSPI
-         ------------------------
-	       SPI NOR chip
-
-Part III - How can drivers use the framework?
----------------------------------------------
-
-The main API is spi_nor_scan(). Before you call the hook, a driver should
-initialize the necessary fields for spi_nor{}. Please see
-drivers/mtd/spi-nor/spi-nor.c for detail. Please also refer to fsl-quadspi.c
-when you want to write a new driver for a SPI NOR controller.
-Another API is spi_nor_restore(), this is used to restore the status of SPI
-flash chip such as addressing mode. Call it whenever detach the driver from
-device or reboot the system.
-- 
cgit 


From b0a4aa950c68b5010831ecfc450510c64e4d80ba Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 14:21:14 -0300
Subject: docs: nvdimm: convert to ReST

Rename the nvdimm documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/nvdimm/btt.rst      | 285 ++++++++++++
 Documentation/nvdimm/btt.txt      | 273 ------------
 Documentation/nvdimm/index.rst    |  12 +
 Documentation/nvdimm/nvdimm.rst   | 887 ++++++++++++++++++++++++++++++++++++++
 Documentation/nvdimm/nvdimm.txt   | 815 ----------------------------------
 Documentation/nvdimm/security.rst | 143 ++++++
 Documentation/nvdimm/security.txt | 141 ------
 7 files changed, 1327 insertions(+), 1229 deletions(-)
 create mode 100644 Documentation/nvdimm/btt.rst
 delete mode 100644 Documentation/nvdimm/btt.txt
 create mode 100644 Documentation/nvdimm/index.rst
 create mode 100644 Documentation/nvdimm/nvdimm.rst
 delete mode 100644 Documentation/nvdimm/nvdimm.txt
 create mode 100644 Documentation/nvdimm/security.rst
 delete mode 100644 Documentation/nvdimm/security.txt

(limited to 'Documentation')

diff --git a/Documentation/nvdimm/btt.rst b/Documentation/nvdimm/btt.rst
new file mode 100644
index 000000000000..2d8269f834bd
--- /dev/null
+++ b/Documentation/nvdimm/btt.rst
@@ -0,0 +1,285 @@
+=============================
+BTT - Block Translation Table
+=============================
+
+
+1. Introduction
+===============
+
+Persistent memory based storage is able to perform IO at byte (or more
+accurately, cache line) granularity. However, we often want to expose such
+storage as traditional block devices. The block drivers for persistent memory
+will do exactly this. However, they do not provide any atomicity guarantees.
+Traditional SSDs typically provide protection against torn sectors in hardware,
+using stored energy in capacitors to complete in-flight block writes, or perhaps
+in firmware. We don't have this luxury with persistent memory - if a write is in
+progress, and we experience a power failure, the block will contain a mix of old
+and new data. Applications may not be prepared to handle such a scenario.
+
+The Block Translation Table (BTT) provides atomic sector update semantics for
+persistent memory devices, so that applications that rely on sector writes not
+being torn can continue to do so. The BTT manifests itself as a stacked block
+device, and reserves a portion of the underlying storage for its metadata. At
+the heart of it, is an indirection table that re-maps all the blocks on the
+volume. It can be thought of as an extremely simple file system that only
+provides atomic sector updates.
+
+
+2. Static Layout
+================
+
+The underlying storage on which a BTT can be laid out is not limited in any way.
+The BTT, however, splits the available space into chunks of up to 512 GiB,
+called "Arenas".
+
+Each arena follows the same layout for its metadata, and all references in an
+arena are internal to it (with the exception of one field that points to the
+next arena). The following depicts the "On-disk" metadata layout::
+
+
+    Backing Store     +------->  Arena
+  +---------------+   |   +------------------+
+  |               |   |   | Arena info block |
+  |    Arena 0    +---+   |       4K         |
+  |     512G      |       +------------------+
+  |               |       |                  |
+  +---------------+       |                  |
+  |               |       |                  |
+  |    Arena 1    |       |   Data Blocks    |
+  |     512G      |       |                  |
+  |               |       |                  |
+  +---------------+       |                  |
+  |       .       |       |                  |
+  |       .       |       |                  |
+  |       .       |       |                  |
+  |               |       |                  |
+  |               |       |                  |
+  +---------------+       +------------------+
+                          |                  |
+                          |     BTT Map      |
+                          |                  |
+                          |                  |
+                          +------------------+
+                          |                  |
+                          |     BTT Flog     |
+                          |                  |
+                          +------------------+
+                          | Info block copy  |
+                          |       4K         |
+                          +------------------+
+
+
+3. Theory of Operation
+======================
+
+
+a. The BTT Map
+--------------
+
+The map is a simple lookup/indirection table that maps an LBA to an internal
+block. Each map entry is 32 bits. The two most significant bits are special
+flags, and the remaining form the internal block number.
+
+======== =============================================================
+Bit      Description
+======== =============================================================
+31 - 30	 Error and Zero flags - Used in the following way:
+
+	   == ==  ====================================================
+	   31 30  Description
+	   == ==  ====================================================
+	   0  0	  Initial state. Reads return zeroes; Premap = Postmap
+	   0  1	  Zero state: Reads return zeroes
+	   1  0	  Error state: Reads fail; Writes clear 'E' bit
+	   1  1	  Normal Block – has valid postmap
+	   == ==  ====================================================
+
+29 - 0	 Mappings to internal 'postmap' blocks
+======== =============================================================
+
+
+Some of the terminology that will be subsequently used:
+
+============	================================================================
+External LBA	LBA as made visible to upper layers.
+ABA		Arena Block Address - Block offset/number within an arena
+Premap ABA	The block offset into an arena, which was decided upon by range
+		checking the External LBA
+Postmap ABA	The block number in the "Data Blocks" area obtained after
+		indirection from the map
+nfree		The number of free blocks that are maintained at any given time.
+		This is the number of concurrent writes that can happen to the
+		arena.
+============	================================================================
+
+
+For example, after adding a BTT, we surface a disk of 1024G. We get a read for
+the external LBA at 768G. This falls into the second arena, and of the 512G
+worth of blocks that this arena contributes, this block is at 256G. Thus, the
+premap ABA is 256G. We now refer to the map, and find out the mapping for block
+'X' (256G) points to block 'Y', say '64'. Thus the postmap ABA is 64.
+
+
+b. The BTT Flog
+---------------
+
+The BTT provides sector atomicity by making every write an "allocating write",
+i.e. Every write goes to a "free" block. A running list of free blocks is
+maintained in the form of the BTT flog. 'Flog' is a combination of the words
+"free list" and "log". The flog contains 'nfree' entries, and an entry contains:
+
+========  =====================================================================
+lba       The premap ABA that is being written to
+old_map   The old postmap ABA - after 'this' write completes, this will be a
+	  free block.
+new_map   The new postmap ABA. The map will up updated to reflect this
+	  lba->postmap_aba mapping, but we log it here in case we have to
+	  recover.
+seq	  Sequence number to mark which of the 2 sections of this flog entry is
+	  valid/newest. It cycles between 01->10->11->01 (binary) under normal
+	  operation, with 00 indicating an uninitialized state.
+lba'	  alternate lba entry
+old_map'  alternate old postmap entry
+new_map'  alternate new postmap entry
+seq'	  alternate sequence number.
+========  =====================================================================
+
+Each of the above fields is 32-bit, making one entry 32 bytes. Entries are also
+padded to 64 bytes to avoid cache line sharing or aliasing. Flog updates are
+done such that for any entry being written, it:
+a. overwrites the 'old' section in the entry based on sequence numbers
+b. writes the 'new' section such that the sequence number is written last.
+
+
+c. The concept of lanes
+-----------------------
+
+While 'nfree' describes the number of concurrent IOs an arena can process
+concurrently, 'nlanes' is the number of IOs the BTT device as a whole can
+process::
+
+	nlanes = min(nfree, num_cpus)
+
+A lane number is obtained at the start of any IO, and is used for indexing into
+all the on-disk and in-memory data structures for the duration of the IO. If
+there are more CPUs than the max number of available lanes, than lanes are
+protected by spinlocks.
+
+
+d. In-memory data structure: Read Tracking Table (RTT)
+------------------------------------------------------
+
+Consider a case where we have two threads, one doing reads and the other,
+writes. We can hit a condition where the writer thread grabs a free block to do
+a new IO, but the (slow) reader thread is still reading from it. In other words,
+the reader consulted a map entry, and started reading the corresponding block. A
+writer started writing to the same external LBA, and finished the write updating
+the map for that external LBA to point to its new postmap ABA. At this point the
+internal, postmap block that the reader is (still) reading has been inserted
+into the list of free blocks. If another write comes in for the same LBA, it can
+grab this free block, and start writing to it, causing the reader to read
+incorrect data. To prevent this, we introduce the RTT.
+
+The RTT is a simple, per arena table with 'nfree' entries. Every reader inserts
+into rtt[lane_number], the postmap ABA it is reading, and clears it after the
+read is complete. Every writer thread, after grabbing a free block, checks the
+RTT for its presence. If the postmap free block is in the RTT, it waits till the
+reader clears the RTT entry, and only then starts writing to it.
+
+
+e. In-memory data structure: map locks
+--------------------------------------
+
+Consider a case where two writer threads are writing to the same LBA. There can
+be a race in the following sequence of steps::
+
+	free[lane] = map[premap_aba]
+	map[premap_aba] = postmap_aba
+
+Both threads can update their respective free[lane] with the same old, freed
+postmap_aba. This has made the layout inconsistent by losing a free entry, and
+at the same time, duplicating another free entry for two lanes.
+
+To solve this, we could have a single map lock (per arena) that has to be taken
+before performing the above sequence, but we feel that could be too contentious.
+Instead we use an array of (nfree) map_locks that is indexed by
+(premap_aba modulo nfree).
+
+
+f. Reconstruction from the Flog
+-------------------------------
+
+On startup, we analyze the BTT flog to create our list of free blocks. We walk
+through all the entries, and for each lane, of the set of two possible
+'sections', we always look at the most recent one only (based on the sequence
+number). The reconstruction rules/steps are simple:
+
+- Read map[log_entry.lba].
+- If log_entry.new matches the map entry, then log_entry.old is free.
+- If log_entry.new does not match the map entry, then log_entry.new is free.
+  (This case can only be caused by power-fails/unsafe shutdowns)
+
+
+g. Summarizing - Read and Write flows
+-------------------------------------
+
+Read:
+
+1.  Convert external LBA to arena number + pre-map ABA
+2.  Get a lane (and take lane_lock)
+3.  Read map to get the entry for this pre-map ABA
+4.  Enter post-map ABA into RTT[lane]
+5.  If TRIM flag set in map, return zeroes, and end IO (go to step 8)
+6.  If ERROR flag set in map, end IO with EIO (go to step 8)
+7.  Read data from this block
+8.  Remove post-map ABA entry from RTT[lane]
+9.  Release lane (and lane_lock)
+
+Write:
+
+1.  Convert external LBA to Arena number + pre-map ABA
+2.  Get a lane (and take lane_lock)
+3.  Use lane to index into in-memory free list and obtain a new block, next flog
+    index, next sequence number
+4.  Scan the RTT to check if free block is present, and spin/wait if it is.
+5.  Write data to this free block
+6.  Read map to get the existing post-map ABA entry for this pre-map ABA
+7.  Write flog entry: [premap_aba / old postmap_aba / new postmap_aba / seq_num]
+8.  Write new post-map ABA into map.
+9.  Write old post-map entry into the free list
+10. Calculate next sequence number and write into the free list entry
+11. Release lane (and lane_lock)
+
+
+4. Error Handling
+=================
+
+An arena would be in an error state if any of the metadata is corrupted
+irrecoverably, either due to a bug or a media error. The following conditions
+indicate an error:
+
+- Info block checksum does not match (and recovering from the copy also fails)
+- All internal available blocks are not uniquely and entirely addressed by the
+  sum of mapped blocks and free blocks (from the BTT flog).
+- Rebuilding free list from the flog reveals missing/duplicate/impossible
+  entries
+- A map entry is out of bounds
+
+If any of these error conditions are encountered, the arena is put into a read
+only state using a flag in the info block.
+
+
+5. Usage
+========
+
+The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem
+(pmem, or blk mode). The easiest way to set up such a namespace is using the
+'ndctl' utility [1]:
+
+For example, the ndctl command line to setup a btt with a 4k sector size is::
+
+    ndctl create-namespace -f -e namespace0.0 -m sector -l 4k
+
+See ndctl create-namespace --help for more options.
+
+[1]: https://github.com/pmem/ndctl
diff --git a/Documentation/nvdimm/btt.txt b/Documentation/nvdimm/btt.txt
deleted file mode 100644
index e293fb664924..000000000000
--- a/Documentation/nvdimm/btt.txt
+++ /dev/null
@@ -1,273 +0,0 @@
-BTT - Block Translation Table
-=============================
-
-
-1. Introduction
----------------
-
-Persistent memory based storage is able to perform IO at byte (or more
-accurately, cache line) granularity. However, we often want to expose such
-storage as traditional block devices. The block drivers for persistent memory
-will do exactly this. However, they do not provide any atomicity guarantees.
-Traditional SSDs typically provide protection against torn sectors in hardware,
-using stored energy in capacitors to complete in-flight block writes, or perhaps
-in firmware. We don't have this luxury with persistent memory - if a write is in
-progress, and we experience a power failure, the block will contain a mix of old
-and new data. Applications may not be prepared to handle such a scenario.
-
-The Block Translation Table (BTT) provides atomic sector update semantics for
-persistent memory devices, so that applications that rely on sector writes not
-being torn can continue to do so. The BTT manifests itself as a stacked block
-device, and reserves a portion of the underlying storage for its metadata. At
-the heart of it, is an indirection table that re-maps all the blocks on the
-volume. It can be thought of as an extremely simple file system that only
-provides atomic sector updates.
-
-
-2. Static Layout
-----------------
-
-The underlying storage on which a BTT can be laid out is not limited in any way.
-The BTT, however, splits the available space into chunks of up to 512 GiB,
-called "Arenas".
-
-Each arena follows the same layout for its metadata, and all references in an
-arena are internal to it (with the exception of one field that points to the
-next arena). The following depicts the "On-disk" metadata layout:
-
-
-  Backing Store     +------->  Arena
-+---------------+   |   +------------------+
-|               |   |   | Arena info block |
-|    Arena 0    +---+   |       4K         |
-|     512G      |       +------------------+
-|               |       |                  |
-+---------------+       |                  |
-|               |       |                  |
-|    Arena 1    |       |   Data Blocks    |
-|     512G      |       |                  |
-|               |       |                  |
-+---------------+       |                  |
-|       .       |       |                  |
-|       .       |       |                  |
-|       .       |       |                  |
-|               |       |                  |
-|               |       |                  |
-+---------------+       +------------------+
-                        |                  |
-                        |     BTT Map      |
-                        |                  |
-                        |                  |
-                        +------------------+
-                        |                  |
-                        |     BTT Flog     |
-                        |                  |
-                        +------------------+
-                        | Info block copy  |
-                        |       4K         |
-                        +------------------+
-
-
-3. Theory of Operation
-----------------------
-
-
-a. The BTT Map
---------------
-
-The map is a simple lookup/indirection table that maps an LBA to an internal
-block. Each map entry is 32 bits. The two most significant bits are special
-flags, and the remaining form the internal block number.
-
-Bit      Description
-31 - 30	: Error and Zero flags - Used in the following way:
-	 Bit		      Description
-	31 30
-	-----------------------------------------------------------------------
-	 00	Initial state. Reads return zeroes; Premap = Postmap
-	 01	Zero state: Reads return zeroes
-	 10	Error state: Reads fail; Writes clear 'E' bit
-	 11	Normal Block – has valid postmap
-
-
-29 - 0	: Mappings to internal 'postmap' blocks
-
-
-Some of the terminology that will be subsequently used:
-
-External LBA  : LBA as made visible to upper layers.
-ABA           : Arena Block Address - Block offset/number within an arena
-Premap ABA    : The block offset into an arena, which was decided upon by range
-		checking the External LBA
-Postmap ABA   : The block number in the "Data Blocks" area obtained after
-		indirection from the map
-nfree	      : The number of free blocks that are maintained at any given time.
-		This is the number of concurrent writes that can happen to the
-		arena.
-
-
-For example, after adding a BTT, we surface a disk of 1024G. We get a read for
-the external LBA at 768G. This falls into the second arena, and of the 512G
-worth of blocks that this arena contributes, this block is at 256G. Thus, the
-premap ABA is 256G. We now refer to the map, and find out the mapping for block
-'X' (256G) points to block 'Y', say '64'. Thus the postmap ABA is 64.
-
-
-b. The BTT Flog
----------------
-
-The BTT provides sector atomicity by making every write an "allocating write",
-i.e. Every write goes to a "free" block. A running list of free blocks is
-maintained in the form of the BTT flog. 'Flog' is a combination of the words
-"free list" and "log". The flog contains 'nfree' entries, and an entry contains:
-
-lba     : The premap ABA that is being written to
-old_map : The old postmap ABA - after 'this' write completes, this will be a
-	  free block.
-new_map : The new postmap ABA. The map will up updated to reflect this
-	  lba->postmap_aba mapping, but we log it here in case we have to
-	  recover.
-seq	: Sequence number to mark which of the 2 sections of this flog entry is
-	  valid/newest. It cycles between 01->10->11->01 (binary) under normal
-	  operation, with 00 indicating an uninitialized state.
-lba'	: alternate lba entry
-old_map': alternate old postmap entry
-new_map': alternate new postmap entry
-seq'	: alternate sequence number.
-
-Each of the above fields is 32-bit, making one entry 32 bytes. Entries are also
-padded to 64 bytes to avoid cache line sharing or aliasing. Flog updates are
-done such that for any entry being written, it:
-a. overwrites the 'old' section in the entry based on sequence numbers
-b. writes the 'new' section such that the sequence number is written last.
-
-
-c. The concept of lanes
------------------------
-
-While 'nfree' describes the number of concurrent IOs an arena can process
-concurrently, 'nlanes' is the number of IOs the BTT device as a whole can
-process.
- nlanes = min(nfree, num_cpus)
-A lane number is obtained at the start of any IO, and is used for indexing into
-all the on-disk and in-memory data structures for the duration of the IO. If
-there are more CPUs than the max number of available lanes, than lanes are
-protected by spinlocks.
-
-
-d. In-memory data structure: Read Tracking Table (RTT)
-------------------------------------------------------
-
-Consider a case where we have two threads, one doing reads and the other,
-writes. We can hit a condition where the writer thread grabs a free block to do
-a new IO, but the (slow) reader thread is still reading from it. In other words,
-the reader consulted a map entry, and started reading the corresponding block. A
-writer started writing to the same external LBA, and finished the write updating
-the map for that external LBA to point to its new postmap ABA. At this point the
-internal, postmap block that the reader is (still) reading has been inserted
-into the list of free blocks. If another write comes in for the same LBA, it can
-grab this free block, and start writing to it, causing the reader to read
-incorrect data. To prevent this, we introduce the RTT.
-
-The RTT is a simple, per arena table with 'nfree' entries. Every reader inserts
-into rtt[lane_number], the postmap ABA it is reading, and clears it after the
-read is complete. Every writer thread, after grabbing a free block, checks the
-RTT for its presence. If the postmap free block is in the RTT, it waits till the
-reader clears the RTT entry, and only then starts writing to it.
-
-
-e. In-memory data structure: map locks
---------------------------------------
-
-Consider a case where two writer threads are writing to the same LBA. There can
-be a race in the following sequence of steps:
-
-free[lane] = map[premap_aba]
-map[premap_aba] = postmap_aba
-
-Both threads can update their respective free[lane] with the same old, freed
-postmap_aba. This has made the layout inconsistent by losing a free entry, and
-at the same time, duplicating another free entry for two lanes.
-
-To solve this, we could have a single map lock (per arena) that has to be taken
-before performing the above sequence, but we feel that could be too contentious.
-Instead we use an array of (nfree) map_locks that is indexed by
-(premap_aba modulo nfree).
-
-
-f. Reconstruction from the Flog
--------------------------------
-
-On startup, we analyze the BTT flog to create our list of free blocks. We walk
-through all the entries, and for each lane, of the set of two possible
-'sections', we always look at the most recent one only (based on the sequence
-number). The reconstruction rules/steps are simple:
-- Read map[log_entry.lba].
-- If log_entry.new matches the map entry, then log_entry.old is free.
-- If log_entry.new does not match the map entry, then log_entry.new is free.
-  (This case can only be caused by power-fails/unsafe shutdowns)
-
-
-g. Summarizing - Read and Write flows
--------------------------------------
-
-Read:
-
-1.  Convert external LBA to arena number + pre-map ABA
-2.  Get a lane (and take lane_lock)
-3.  Read map to get the entry for this pre-map ABA
-4.  Enter post-map ABA into RTT[lane]
-5.  If TRIM flag set in map, return zeroes, and end IO (go to step 8)
-6.  If ERROR flag set in map, end IO with EIO (go to step 8)
-7.  Read data from this block
-8.  Remove post-map ABA entry from RTT[lane]
-9.  Release lane (and lane_lock)
-
-Write:
-
-1.  Convert external LBA to Arena number + pre-map ABA
-2.  Get a lane (and take lane_lock)
-3.  Use lane to index into in-memory free list and obtain a new block, next flog
-        index, next sequence number
-4.  Scan the RTT to check if free block is present, and spin/wait if it is.
-5.  Write data to this free block
-6.  Read map to get the existing post-map ABA entry for this pre-map ABA
-7.  Write flog entry: [premap_aba / old postmap_aba / new postmap_aba / seq_num]
-8.  Write new post-map ABA into map.
-9.  Write old post-map entry into the free list
-10. Calculate next sequence number and write into the free list entry
-11. Release lane (and lane_lock)
-
-
-4. Error Handling
-=================
-
-An arena would be in an error state if any of the metadata is corrupted
-irrecoverably, either due to a bug or a media error. The following conditions
-indicate an error:
-- Info block checksum does not match (and recovering from the copy also fails)
-- All internal available blocks are not uniquely and entirely addressed by the
-  sum of mapped blocks and free blocks (from the BTT flog).
-- Rebuilding free list from the flog reveals missing/duplicate/impossible
-  entries
-- A map entry is out of bounds
-
-If any of these error conditions are encountered, the arena is put into a read
-only state using a flag in the info block.
-
-
-5. Usage
-========
-
-The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem
-(pmem, or blk mode). The easiest way to set up such a namespace is using the
-'ndctl' utility [1]:
-
-For example, the ndctl command line to setup a btt with a 4k sector size is:
-
-    ndctl create-namespace -f -e namespace0.0 -m sector -l 4k
-
-See ndctl create-namespace --help for more options.
-
-[1]: https://github.com/pmem/ndctl
-
diff --git a/Documentation/nvdimm/index.rst b/Documentation/nvdimm/index.rst
new file mode 100644
index 000000000000..1a3402d3775e
--- /dev/null
+++ b/Documentation/nvdimm/index.rst
@@ -0,0 +1,12 @@
+:orphan:
+
+===================================
+Non-Volatile Memory Device (NVDIMM)
+===================================
+
+.. toctree::
+   :maxdepth: 1
+
+   nvdimm
+   btt
+   security
diff --git a/Documentation/nvdimm/nvdimm.rst b/Documentation/nvdimm/nvdimm.rst
new file mode 100644
index 000000000000..08f855cbb4e6
--- /dev/null
+++ b/Documentation/nvdimm/nvdimm.rst
@@ -0,0 +1,887 @@
+===============================
+LIBNVDIMM: Non-Volatile Devices
+===============================
+
+libnvdimm - kernel / libndctl - userspace helper library
+
+linux-nvdimm@lists.01.org
+
+Version 13
+
+.. contents:
+
+	Glossary
+	Overview
+	    Supporting Documents
+	    Git Trees
+	LIBNVDIMM PMEM and BLK
+	Why BLK?
+	    PMEM vs BLK
+	        BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX
+	Example NVDIMM Platform
+	LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API
+	    LIBNDCTL: Context
+	        libndctl: instantiate a new library context example
+	    LIBNVDIMM/LIBNDCTL: Bus
+	        libnvdimm: control class device in /sys/class
+	        libnvdimm: bus
+	        libndctl: bus enumeration example
+	    LIBNVDIMM/LIBNDCTL: DIMM (NMEM)
+	        libnvdimm: DIMM (NMEM)
+	        libndctl: DIMM enumeration example
+	    LIBNVDIMM/LIBNDCTL: Region
+	        libnvdimm: region
+	        libndctl: region enumeration example
+	        Why Not Encode the Region Type into the Region Name?
+	        How Do I Determine the Major Type of a Region?
+	    LIBNVDIMM/LIBNDCTL: Namespace
+	        libnvdimm: namespace
+	        libndctl: namespace enumeration example
+	        libndctl: namespace creation example
+	        Why the Term "namespace"?
+	    LIBNVDIMM/LIBNDCTL: Block Translation Table "btt"
+	        libnvdimm: btt layout
+	        libndctl: btt creation example
+	Summary LIBNDCTL Diagram
+
+
+Glossary
+========
+
+PMEM:
+  A system-physical-address range where writes are persistent.  A
+  block device composed of PMEM is capable of DAX.  A PMEM address range
+  may span an interleave of several DIMMs.
+
+BLK:
+  A set of one or more programmable memory mapped apertures provided
+  by a DIMM to access its media.  This indirection precludes the
+  performance benefit of interleaving, but enables DIMM-bounded failure
+  modes.
+
+DPA:
+  DIMM Physical Address, is a DIMM-relative offset.  With one DIMM in
+  the system there would be a 1:1 system-physical-address:DPA association.
+  Once more DIMMs are added a memory controller interleave must be
+  decoded to determine the DPA associated with a given
+  system-physical-address.  BLK capacity always has a 1:1 relationship
+  with a single-DIMM's DPA range.
+
+DAX:
+  File system extensions to bypass the page cache and block layer to
+  mmap persistent memory, from a PMEM block device, directly into a
+  process address space.
+
+DSM:
+  Device Specific Method: ACPI method to to control specific
+  device - in this case the firmware.
+
+DCR:
+  NVDIMM Control Region Structure defined in ACPI 6 Section 5.2.25.5.
+  It defines a vendor-id, device-id, and interface format for a given DIMM.
+
+BTT:
+  Block Translation Table: Persistent memory is byte addressable.
+  Existing software may have an expectation that the power-fail-atomicity
+  of writes is at least one sector, 512 bytes.  The BTT is an indirection
+  table with atomic update semantics to front a PMEM/BLK block device
+  driver and present arbitrary atomic sector sizes.
+
+LABEL:
+  Metadata stored on a DIMM device that partitions and identifies
+  (persistently names) storage between PMEM and BLK.  It also partitions
+  BLK storage to host BTTs with different parameters per BLK-partition.
+  Note that traditional partition tables, GPT/MBR, are layered on top of a
+  BLK or PMEM device.
+
+
+Overview
+========
+
+The LIBNVDIMM subsystem provides support for three types of NVDIMMs, namely,
+PMEM, BLK, and NVDIMM devices that can simultaneously support both PMEM
+and BLK mode access.  These three modes of operation are described by
+the "NVDIMM Firmware Interface Table" (NFIT) in ACPI 6.  While the LIBNVDIMM
+implementation is generic and supports pre-NFIT platforms, it was guided
+by the superset of capabilities need to support this ACPI 6 definition
+for NVDIMM resources.  The bulk of the kernel implementation is in place
+to handle the case where DPA accessible via PMEM is aliased with DPA
+accessible via BLK.  When that occurs a LABEL is needed to reserve DPA
+for exclusive access via one mode a time.
+
+Supporting Documents
+--------------------
+
+ACPI 6:
+	http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf
+NVDIMM Namespace:
+	http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf
+DSM Interface Example:
+	http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
+Driver Writer's Guide:
+	http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf
+
+Git Trees
+---------
+
+LIBNVDIMM:
+	https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git
+LIBNDCTL:
+	https://github.com/pmem/ndctl.git
+PMEM:
+	https://github.com/01org/prd
+
+
+LIBNVDIMM PMEM and BLK
+======================
+
+Prior to the arrival of the NFIT, non-volatile memory was described to a
+system in various ad-hoc ways.  Usually only the bare minimum was
+provided, namely, a single system-physical-address range where writes
+are expected to be durable after a system power loss.  Now, the NFIT
+specification standardizes not only the description of PMEM, but also
+BLK and platform message-passing entry points for control and
+configuration.
+
+For each NVDIMM access method (PMEM, BLK), LIBNVDIMM provides a block
+device driver:
+
+    1. PMEM (nd_pmem.ko): Drives a system-physical-address range.  This
+       range is contiguous in system memory and may be interleaved (hardware
+       memory controller striped) across multiple DIMMs.  When interleaved the
+       platform may optionally provide details of which DIMMs are participating
+       in the interleave.
+
+       Note that while LIBNVDIMM describes system-physical-address ranges that may
+       alias with BLK access as ND_NAMESPACE_PMEM ranges and those without
+       alias as ND_NAMESPACE_IO ranges, to the nd_pmem driver there is no
+       distinction.  The different device-types are an implementation detail
+       that userspace can exploit to implement policies like "only interface
+       with address ranges from certain DIMMs".  It is worth noting that when
+       aliasing is present and a DIMM lacks a label, then no block device can
+       be created by default as userspace needs to do at least one allocation
+       of DPA to the PMEM range.  In contrast ND_NAMESPACE_IO ranges, once
+       registered, can be immediately attached to nd_pmem.
+
+    2. BLK (nd_blk.ko): This driver performs I/O using a set of platform
+       defined apertures.  A set of apertures will access just one DIMM.
+       Multiple windows (apertures) allow multiple concurrent accesses, much like
+       tagged-command-queuing, and would likely be used by different threads or
+       different CPUs.
+
+       The NFIT specification defines a standard format for a BLK-aperture, but
+       the spec also allows for vendor specific layouts, and non-NFIT BLK
+       implementations may have other designs for BLK I/O.  For this reason
+       "nd_blk" calls back into platform-specific code to perform the I/O.
+
+       One such implementation is defined in the "Driver Writer's Guide" and "DSM
+       Interface Example".
+
+
+Why BLK?
+========
+
+While PMEM provides direct byte-addressable CPU-load/store access to
+NVDIMM storage, it does not provide the best system RAS (recovery,
+availability, and serviceability) model.  An access to a corrupted
+system-physical-address address causes a CPU exception while an access
+to a corrupted address through an BLK-aperture causes that block window
+to raise an error status in a register.  The latter is more aligned with
+the standard error model that host-bus-adapter attached disks present.
+
+Also, if an administrator ever wants to replace a memory it is easier to
+service a system at DIMM module boundaries.  Compare this to PMEM where
+data could be interleaved in an opaque hardware specific manner across
+several DIMMs.
+
+PMEM vs BLK
+-----------
+
+BLK-apertures solve these RAS problems, but their presence is also the
+major contributing factor to the complexity of the ND subsystem.  They
+complicate the implementation because PMEM and BLK alias in DPA space.
+Any given DIMM's DPA-range may contribute to one or more
+system-physical-address sets of interleaved DIMMs, *and* may also be
+accessed in its entirety through its BLK-aperture.  Accessing a DPA
+through a system-physical-address while simultaneously accessing the
+same DPA through a BLK-aperture has undefined results.  For this reason,
+DIMMs with this dual interface configuration include a DSM function to
+store/retrieve a LABEL.  The LABEL effectively partitions the DPA-space
+into exclusive system-physical-address and BLK-aperture accessible
+regions.  For simplicity a DIMM is allowed a PMEM "region" per each
+interleave set in which it is a member.  The remaining DPA space can be
+carved into an arbitrary number of BLK devices with discontiguous
+extents.
+
+BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+One of the few
+reasons to allow multiple BLK namespaces per REGION is so that each
+BLK-namespace can be configured with a BTT with unique atomic sector
+sizes.  While a PMEM device can host a BTT the LABEL specification does
+not provide for a sector size to be specified for a PMEM namespace.
+
+This is due to the expectation that the primary usage model for PMEM is
+via DAX, and the BTT is incompatible with DAX.  However, for the cases
+where an application or filesystem still needs atomic sector update
+guarantees it can register a BTT on a PMEM device or partition.  See
+LIBNVDIMM/NDCTL: Block Translation Table "btt"
+
+
+Example NVDIMM Platform
+=======================
+
+For the remainder of this document the following diagram will be
+referenced for any example sysfs layouts::
+
+
+                               (a)               (b)           DIMM   BLK-REGION
+            +-------------------+--------+--------+--------+
+  +------+  |       pm0.0       | blk2.0 | pm1.0  | blk2.1 |    0      region2
+  | imc0 +--+- - - region0- - - +--------+        +--------+
+  +--+---+  |       pm0.0       | blk3.0 | pm1.0  | blk3.1 |    1      region3
+     |      +-------------------+--------v        v--------+
+  +--+---+                               |                 |
+  | cpu0 |                                     region1
+  +--+---+                               |                 |
+     |      +----------------------------^        ^--------+
+  +--+---+  |           blk4.0           | pm1.0  | blk4.0 |    2      region4
+  | imc1 +--+----------------------------|        +--------+
+  +------+  |           blk5.0           | pm1.0  | blk5.0 |    3      region5
+            +----------------------------+--------+--------+
+
+In this platform we have four DIMMs and two memory controllers in one
+socket.  Each unique interface (BLK or PMEM) to DPA space is identified
+by a region device with a dynamically assigned id (REGION0 - REGION5).
+
+    1. The first portion of DIMM0 and DIMM1 are interleaved as REGION0. A
+       single PMEM namespace is created in the REGION0-SPA-range that spans most
+       of DIMM0 and DIMM1 with a user-specified name of "pm0.0". Some of that
+       interleaved system-physical-address range is reclaimed as BLK-aperture
+       accessed space starting at DPA-offset (a) into each DIMM.  In that
+       reclaimed space we create two BLK-aperture "namespaces" from REGION2 and
+       REGION3 where "blk2.0" and "blk3.0" are just human readable names that
+       could be set to any user-desired name in the LABEL.
+
+    2. In the last portion of DIMM0 and DIMM1 we have an interleaved
+       system-physical-address range, REGION1, that spans those two DIMMs as
+       well as DIMM2 and DIMM3.  Some of REGION1 is allocated to a PMEM namespace
+       named "pm1.0", the rest is reclaimed in 4 BLK-aperture namespaces (for
+       each DIMM in the interleave set), "blk2.1", "blk3.1", "blk4.0", and
+       "blk5.0".
+
+    3. The portion of DIMM2 and DIMM3 that do not participate in the REGION1
+       interleaved system-physical-address range (i.e. the DPA address past
+       offset (b) are also included in the "blk4.0" and "blk5.0" namespaces.
+       Note, that this example shows that BLK-aperture namespaces don't need to
+       be contiguous in DPA-space.
+
+    This bus is provided by the kernel under the device
+    /sys/devices/platform/nfit_test.0 when CONFIG_NFIT_TEST is enabled and
+    the nfit_test.ko module is loaded.  This not only test LIBNVDIMM but the
+    acpi_nfit.ko driver as well.
+
+
+LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API
+========================================================
+
+What follows is a description of the LIBNVDIMM sysfs layout and a
+corresponding object hierarchy diagram as viewed through the LIBNDCTL
+API.  The example sysfs paths and diagrams are relative to the Example
+NVDIMM Platform which is also the LIBNVDIMM bus used in the LIBNDCTL unit
+test.
+
+LIBNDCTL: Context
+-----------------
+
+Every API call in the LIBNDCTL library requires a context that holds the
+logging parameters and other library instance state.  The library is
+based on the libabc template:
+
+	https://git.kernel.org/cgit/linux/kernel/git/kay/libabc.git
+
+LIBNDCTL: instantiate a new library context example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+::
+
+	struct ndctl_ctx *ctx;
+
+	if (ndctl_new(&ctx) == 0)
+		return ctx;
+	else
+		return NULL;
+
+LIBNVDIMM/LIBNDCTL: Bus
+-----------------------
+
+A bus has a 1:1 relationship with an NFIT.  The current expectation for
+ACPI based systems is that there is only ever one platform-global NFIT.
+That said, it is trivial to register multiple NFITs, the specification
+does not preclude it.  The infrastructure supports multiple busses and
+we use this capability to test multiple NFIT configurations in the unit
+test.
+
+LIBNVDIMM: control class device in /sys/class
+---------------------------------------------
+
+This character device accepts DSM messages to be passed to DIMM
+identified by its NFIT handle::
+
+	/sys/class/nd/ndctl0
+	|-- dev
+	|-- device -> ../../../ndbus0
+	|-- subsystem -> ../../../../../../../class/nd
+
+
+
+LIBNVDIMM: bus
+--------------
+
+::
+
+	struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+	       struct nvdimm_bus_descriptor *nfit_desc);
+
+::
+
+	/sys/devices/platform/nfit_test.0/ndbus0
+	|-- commands
+	|-- nd
+	|-- nfit
+	|-- nmem0
+	|-- nmem1
+	|-- nmem2
+	|-- nmem3
+	|-- power
+	|-- provider
+	|-- region0
+	|-- region1
+	|-- region2
+	|-- region3
+	|-- region4
+	|-- region5
+	|-- uevent
+	`-- wait_probe
+
+LIBNDCTL: bus enumeration example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Find the bus handle that describes the bus from Example NVDIMM Platform::
+
+	static struct ndctl_bus *get_bus_by_provider(struct ndctl_ctx *ctx,
+			const char *provider)
+	{
+		struct ndctl_bus *bus;
+
+		ndctl_bus_foreach(ctx, bus)
+			if (strcmp(provider, ndctl_bus_get_provider(bus)) == 0)
+				return bus;
+
+		return NULL;
+	}
+
+	bus = get_bus_by_provider(ctx, "nfit_test.0");
+
+
+LIBNVDIMM/LIBNDCTL: DIMM (NMEM)
+-------------------------------
+
+The DIMM device provides a character device for sending commands to
+hardware, and it is a container for LABELs.  If the DIMM is defined by
+NFIT then an optional 'nfit' attribute sub-directory is available to add
+NFIT-specifics.
+
+Note that the kernel device name for "DIMMs" is "nmemX".  The NFIT
+describes these devices via "Memory Device to System Physical Address
+Range Mapping Structure", and there is no requirement that they actually
+be physical DIMMs, so we use a more generic name.
+
+LIBNVDIMM: DIMM (NMEM)
+^^^^^^^^^^^^^^^^^^^^^^
+
+::
+
+	struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
+			const struct attribute_group **groups, unsigned long flags,
+			unsigned long *dsm_mask);
+
+::
+
+	/sys/devices/platform/nfit_test.0/ndbus0
+	|-- nmem0
+	|   |-- available_slots
+	|   |-- commands
+	|   |-- dev
+	|   |-- devtype
+	|   |-- driver -> ../../../../../bus/nd/drivers/nvdimm
+	|   |-- modalias
+	|   |-- nfit
+	|   |   |-- device
+	|   |   |-- format
+	|   |   |-- handle
+	|   |   |-- phys_id
+	|   |   |-- rev_id
+	|   |   |-- serial
+	|   |   `-- vendor
+	|   |-- state
+	|   |-- subsystem -> ../../../../../bus/nd
+	|   `-- uevent
+	|-- nmem1
+	[..]
+
+
+LIBNDCTL: DIMM enumeration example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Note, in this example we are assuming NFIT-defined DIMMs which are
+identified by an "nfit_handle" a 32-bit value where:
+
+   - Bit 3:0 DIMM number within the memory channel
+   - Bit 7:4 memory channel number
+   - Bit 11:8 memory controller ID
+   - Bit 15:12 socket ID (within scope of a Node controller if node
+     controller is present)
+   - Bit 27:16 Node Controller ID
+   - Bit 31:28 Reserved
+
+::
+
+	static struct ndctl_dimm *get_dimm_by_handle(struct ndctl_bus *bus,
+	       unsigned int handle)
+	{
+		struct ndctl_dimm *dimm;
+
+		ndctl_dimm_foreach(bus, dimm)
+			if (ndctl_dimm_get_handle(dimm) == handle)
+				return dimm;
+
+		return NULL;
+	}
+
+	#define DIMM_HANDLE(n, s, i, c, d) \
+		(((n & 0xfff) << 16) | ((s & 0xf) << 12) | ((i & 0xf) << 8) \
+		 | ((c & 0xf) << 4) | (d & 0xf))
+
+	dimm = get_dimm_by_handle(bus, DIMM_HANDLE(0, 0, 0, 0, 0));
+
+LIBNVDIMM/LIBNDCTL: Region
+--------------------------
+
+A generic REGION device is registered for each PMEM range or BLK-aperture
+set.  Per the example there are 6 regions: 2 PMEM and 4 BLK-aperture
+sets on the "nfit_test.0" bus.  The primary role of regions are to be a
+container of "mappings".  A mapping is a tuple of <DIMM,
+DPA-start-offset, length>.
+
+LIBNVDIMM provides a built-in driver for these REGION devices.  This driver
+is responsible for reconciling the aliased DPA mappings across all
+regions, parsing the LABEL, if present, and then emitting NAMESPACE
+devices with the resolved/exclusive DPA-boundaries for the nd_pmem or
+nd_blk device driver to consume.
+
+In addition to the generic attributes of "mapping"s, "interleave_ways"
+and "size" the REGION device also exports some convenience attributes.
+"nstype" indicates the integer type of namespace-device this region
+emits, "devtype" duplicates the DEVTYPE variable stored by udev at the
+'add' event, "modalias" duplicates the MODALIAS variable stored by udev
+at the 'add' event, and finally, the optional "spa_index" is provided in
+the case where the region is defined by a SPA.
+
+LIBNVDIMM: region::
+
+	struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
+			struct nd_region_desc *ndr_desc);
+	struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
+			struct nd_region_desc *ndr_desc);
+
+::
+
+	/sys/devices/platform/nfit_test.0/ndbus0
+	|-- region0
+	|   |-- available_size
+	|   |-- btt0
+	|   |-- btt_seed
+	|   |-- devtype
+	|   |-- driver -> ../../../../../bus/nd/drivers/nd_region
+	|   |-- init_namespaces
+	|   |-- mapping0
+	|   |-- mapping1
+	|   |-- mappings
+	|   |-- modalias
+	|   |-- namespace0.0
+	|   |-- namespace_seed
+	|   |-- numa_node
+	|   |-- nfit
+	|   |   `-- spa_index
+	|   |-- nstype
+	|   |-- set_cookie
+	|   |-- size
+	|   |-- subsystem -> ../../../../../bus/nd
+	|   `-- uevent
+	|-- region1
+	[..]
+
+LIBNDCTL: region enumeration example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sample region retrieval routines based on NFIT-unique data like
+"spa_index" (interleave set id) for PMEM and "nfit_handle" (dimm id) for
+BLK::
+
+	static struct ndctl_region *get_pmem_region_by_spa_index(struct ndctl_bus *bus,
+			unsigned int spa_index)
+	{
+		struct ndctl_region *region;
+
+		ndctl_region_foreach(bus, region) {
+			if (ndctl_region_get_type(region) != ND_DEVICE_REGION_PMEM)
+				continue;
+			if (ndctl_region_get_spa_index(region) == spa_index)
+				return region;
+		}
+		return NULL;
+	}
+
+	static struct ndctl_region *get_blk_region_by_dimm_handle(struct ndctl_bus *bus,
+			unsigned int handle)
+	{
+		struct ndctl_region *region;
+
+		ndctl_region_foreach(bus, region) {
+			struct ndctl_mapping *map;
+
+			if (ndctl_region_get_type(region) != ND_DEVICE_REGION_BLOCK)
+				continue;
+			ndctl_mapping_foreach(region, map) {
+				struct ndctl_dimm *dimm = ndctl_mapping_get_dimm(map);
+
+				if (ndctl_dimm_get_handle(dimm) == handle)
+					return region;
+			}
+		}
+		return NULL;
+	}
+
+
+Why Not Encode the Region Type into the Region Name?
+----------------------------------------------------
+
+At first glance it seems since NFIT defines just PMEM and BLK interface
+types that we should simply name REGION devices with something derived
+from those type names.  However, the ND subsystem explicitly keeps the
+REGION name generic and expects userspace to always consider the
+region-attributes for four reasons:
+
+    1. There are already more than two REGION and "namespace" types.  For
+       PMEM there are two subtypes.  As mentioned previously we have PMEM where
+       the constituent DIMM devices are known and anonymous PMEM.  For BLK
+       regions the NFIT specification already anticipates vendor specific
+       implementations.  The exact distinction of what a region contains is in
+       the region-attributes not the region-name or the region-devtype.
+
+    2. A region with zero child-namespaces is a possible configuration.  For
+       example, the NFIT allows for a DCR to be published without a
+       corresponding BLK-aperture.  This equates to a DIMM that can only accept
+       control/configuration messages, but no i/o through a descendant block
+       device.  Again, this "type" is advertised in the attributes ('mappings'
+       == 0) and the name does not tell you much.
+
+    3. What if a third major interface type arises in the future?  Outside
+       of vendor specific implementations, it's not difficult to envision a
+       third class of interface type beyond BLK and PMEM.  With a generic name
+       for the REGION level of the device-hierarchy old userspace
+       implementations can still make sense of new kernel advertised
+       region-types.  Userspace can always rely on the generic region
+       attributes like "mappings", "size", etc and the expected child devices
+       named "namespace".  This generic format of the device-model hierarchy
+       allows the LIBNVDIMM and LIBNDCTL implementations to be more uniform and
+       future-proof.
+
+    4. There are more robust mechanisms for determining the major type of a
+       region than a device name.  See the next section, How Do I Determine the
+       Major Type of a Region?
+
+How Do I Determine the Major Type of a Region?
+----------------------------------------------
+
+Outside of the blanket recommendation of "use libndctl", or simply
+looking at the kernel header (/usr/include/linux/ndctl.h) to decode the
+"nstype" integer attribute, here are some other options.
+
+1. module alias lookup
+^^^^^^^^^^^^^^^^^^^^^^
+
+    The whole point of region/namespace device type differentiation is to
+    decide which block-device driver will attach to a given LIBNVDIMM namespace.
+    One can simply use the modalias to lookup the resulting module.  It's
+    important to note that this method is robust in the presence of a
+    vendor-specific driver down the road.  If a vendor-specific
+    implementation wants to supplant the standard nd_blk driver it can with
+    minimal impact to the rest of LIBNVDIMM.
+
+    In fact, a vendor may also want to have a vendor-specific region-driver
+    (outside of nd_region).  For example, if a vendor defined its own LABEL
+    format it would need its own region driver to parse that LABEL and emit
+    the resulting namespaces.  The output from module resolution is more
+    accurate than a region-name or region-devtype.
+
+2. udev
+^^^^^^^
+
+    The kernel "devtype" is registered in the udev database::
+
+	# udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region0
+	P: /devices/platform/nfit_test.0/ndbus0/region0
+	E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region0
+	E: DEVTYPE=nd_pmem
+	E: MODALIAS=nd:t2
+	E: SUBSYSTEM=nd
+
+	# udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region4
+	P: /devices/platform/nfit_test.0/ndbus0/region4
+	E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region4
+	E: DEVTYPE=nd_blk
+	E: MODALIAS=nd:t3
+	E: SUBSYSTEM=nd
+
+    ...and is available as a region attribute, but keep in mind that the
+    "devtype" does not indicate sub-type variations and scripts should
+    really be understanding the other attributes.
+
+3. type specific attributes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+    As it currently stands a BLK-aperture region will never have a
+    "nfit/spa_index" attribute, but neither will a non-NFIT PMEM region.  A
+    BLK region with a "mappings" value of 0 is, as mentioned above, a DIMM
+    that does not allow I/O.  A PMEM region with a "mappings" value of zero
+    is a simple system-physical-address range.
+
+
+LIBNVDIMM/LIBNDCTL: Namespace
+-----------------------------
+
+A REGION, after resolving DPA aliasing and LABEL specified boundaries,
+surfaces one or more "namespace" devices.  The arrival of a "namespace"
+device currently triggers either the nd_blk or nd_pmem driver to load
+and register a disk/block device.
+
+LIBNVDIMM: namespace
+^^^^^^^^^^^^^^^^^^^^
+
+Here is a sample layout from the three major types of NAMESPACE where
+namespace0.0 represents DIMM-info-backed PMEM (note that it has a 'uuid'
+attribute), namespace2.0 represents a BLK namespace (note it has a
+'sector_size' attribute) that, and namespace6.0 represents an anonymous
+PMEM namespace (note that has no 'uuid' attribute due to not support a
+LABEL)::
+
+	/sys/devices/platform/nfit_test.0/ndbus0/region0/namespace0.0
+	|-- alt_name
+	|-- devtype
+	|-- dpa_extents
+	|-- force_raw
+	|-- modalias
+	|-- numa_node
+	|-- resource
+	|-- size
+	|-- subsystem -> ../../../../../../bus/nd
+	|-- type
+	|-- uevent
+	`-- uuid
+	/sys/devices/platform/nfit_test.0/ndbus0/region2/namespace2.0
+	|-- alt_name
+	|-- devtype
+	|-- dpa_extents
+	|-- force_raw
+	|-- modalias
+	|-- numa_node
+	|-- sector_size
+	|-- size
+	|-- subsystem -> ../../../../../../bus/nd
+	|-- type
+	|-- uevent
+	`-- uuid
+	/sys/devices/platform/nfit_test.1/ndbus1/region6/namespace6.0
+	|-- block
+	|   `-- pmem0
+	|-- devtype
+	|-- driver -> ../../../../../../bus/nd/drivers/pmem
+	|-- force_raw
+	|-- modalias
+	|-- numa_node
+	|-- resource
+	|-- size
+	|-- subsystem -> ../../../../../../bus/nd
+	|-- type
+	`-- uevent
+
+LIBNDCTL: namespace enumeration example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Namespaces are indexed relative to their parent region, example below.
+These indexes are mostly static from boot to boot, but subsystem makes
+no guarantees in this regard.  For a static namespace identifier use its
+'uuid' attribute.
+
+::
+
+  static struct ndctl_namespace
+  *get_namespace_by_id(struct ndctl_region *region, unsigned int id)
+  {
+          struct ndctl_namespace *ndns;
+
+          ndctl_namespace_foreach(region, ndns)
+                  if (ndctl_namespace_get_id(ndns) == id)
+                          return ndns;
+
+          return NULL;
+  }
+
+LIBNDCTL: namespace creation example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Idle namespaces are automatically created by the kernel if a given
+region has enough available capacity to create a new namespace.
+Namespace instantiation involves finding an idle namespace and
+configuring it.  For the most part the setting of namespace attributes
+can occur in any order, the only constraint is that 'uuid' must be set
+before 'size'.  This enables the kernel to track DPA allocations
+internally with a static identifier::
+
+  static int configure_namespace(struct ndctl_region *region,
+                  struct ndctl_namespace *ndns,
+                  struct namespace_parameters *parameters)
+  {
+          char devname[50];
+
+          snprintf(devname, sizeof(devname), "namespace%d.%d",
+                          ndctl_region_get_id(region), paramaters->id);
+
+          ndctl_namespace_set_alt_name(ndns, devname);
+          /* 'uuid' must be set prior to setting size! */
+          ndctl_namespace_set_uuid(ndns, paramaters->uuid);
+          ndctl_namespace_set_size(ndns, paramaters->size);
+          /* unlike pmem namespaces, blk namespaces have a sector size */
+          if (parameters->lbasize)
+                  ndctl_namespace_set_sector_size(ndns, parameters->lbasize);
+          ndctl_namespace_enable(ndns);
+  }
+
+
+Why the Term "namespace"?
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+    1. Why not "volume" for instance?  "volume" ran the risk of confusing
+       ND (libnvdimm subsystem) to a volume manager like device-mapper.
+
+    2. The term originated to describe the sub-devices that can be created
+       within a NVME controller (see the nvme specification:
+       http://www.nvmexpress.org/specifications/), and NFIT namespaces are
+       meant to parallel the capabilities and configurability of
+       NVME-namespaces.
+
+
+LIBNVDIMM/LIBNDCTL: Block Translation Table "btt"
+-------------------------------------------------
+
+A BTT (design document: http://pmem.io/2014/09/23/btt.html) is a stacked
+block device driver that fronts either the whole block device or a
+partition of a block device emitted by either a PMEM or BLK NAMESPACE.
+
+LIBNVDIMM: btt layout
+^^^^^^^^^^^^^^^^^^^^^
+
+Every region will start out with at least one BTT device which is the
+seed device.  To activate it set the "namespace", "uuid", and
+"sector_size" attributes and then bind the device to the nd_pmem or
+nd_blk driver depending on the region type::
+
+	/sys/devices/platform/nfit_test.1/ndbus0/region0/btt0/
+	|-- namespace
+	|-- delete
+	|-- devtype
+	|-- modalias
+	|-- numa_node
+	|-- sector_size
+	|-- subsystem -> ../../../../../bus/nd
+	|-- uevent
+	`-- uuid
+
+LIBNDCTL: btt creation example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Similar to namespaces an idle BTT device is automatically created per
+region.  Each time this "seed" btt device is configured and enabled a new
+seed is created.  Creating a BTT configuration involves two steps of
+finding and idle BTT and assigning it to consume a PMEM or BLK namespace::
+
+	static struct ndctl_btt *get_idle_btt(struct ndctl_region *region)
+	{
+		struct ndctl_btt *btt;
+
+		ndctl_btt_foreach(region, btt)
+			if (!ndctl_btt_is_enabled(btt)
+					&& !ndctl_btt_is_configured(btt))
+				return btt;
+
+		return NULL;
+	}
+
+	static int configure_btt(struct ndctl_region *region,
+			struct btt_parameters *parameters)
+	{
+		btt = get_idle_btt(region);
+
+		ndctl_btt_set_uuid(btt, parameters->uuid);
+		ndctl_btt_set_sector_size(btt, parameters->sector_size);
+		ndctl_btt_set_namespace(btt, parameters->ndns);
+		/* turn off raw mode device */
+		ndctl_namespace_disable(parameters->ndns);
+		/* turn on btt access */
+		ndctl_btt_enable(btt);
+	}
+
+Once instantiated a new inactive btt seed device will appear underneath
+the region.
+
+Once a "namespace" is removed from a BTT that instance of the BTT device
+will be deleted or otherwise reset to default values.  This deletion is
+only at the device model level.  In order to destroy a BTT the "info
+block" needs to be destroyed.  Note, that to destroy a BTT the media
+needs to be written in raw mode.  By default, the kernel will autodetect
+the presence of a BTT and disable raw mode.  This autodetect behavior
+can be suppressed by enabling raw mode for the namespace via the
+ndctl_namespace_set_raw_mode() API.
+
+
+Summary LIBNDCTL Diagram
+------------------------
+
+For the given example above, here is the view of the objects as seen by the
+LIBNDCTL API::
+
+              +---+
+              |CTX|    +---------+   +--------------+  +---------------+
+              +-+-+  +-> REGION0 +---> NAMESPACE0.0 +--> PMEM8 "pm0.0" |
+                |    | +---------+   +--------------+  +---------------+
+  +-------+     |    | +---------+   +--------------+  +---------------+
+  | DIMM0 <-+   |    +-> REGION1 +---> NAMESPACE1.0 +--> PMEM6 "pm1.0" |
+  +-------+ |   |    | +---------+   +--------------+  +---------------+
+  | DIMM1 <-+ +-v--+ | +---------+   +--------------+  +---------------+
+  +-------+ +-+BUS0+---> REGION2 +-+-> NAMESPACE2.0 +--> ND6  "blk2.0" |
+  | DIMM2 <-+ +----+ | +---------+ | +--------------+  +----------------------+
+  +-------+ |        |             +-> NAMESPACE2.1 +--> ND5  "blk2.1" | BTT2 |
+  | DIMM3 <-+        |               +--------------+  +----------------------+
+  +-------+          | +---------+   +--------------+  +---------------+
+                     +-> REGION3 +-+-> NAMESPACE3.0 +--> ND4  "blk3.0" |
+                     | +---------+ | +--------------+  +----------------------+
+                     |             +-> NAMESPACE3.1 +--> ND3  "blk3.1" | BTT1 |
+                     |               +--------------+  +----------------------+
+                     | +---------+   +--------------+  +---------------+
+                     +-> REGION4 +---> NAMESPACE4.0 +--> ND2  "blk4.0" |
+                     | +---------+   +--------------+  +---------------+
+                     | +---------+   +--------------+  +----------------------+
+                     +-> REGION5 +---> NAMESPACE5.0 +--> ND1  "blk5.0" | BTT0 |
+                       +---------+   +--------------+  +---------------+------+
diff --git a/Documentation/nvdimm/nvdimm.txt b/Documentation/nvdimm/nvdimm.txt
deleted file mode 100644
index 1669f626b037..000000000000
--- a/Documentation/nvdimm/nvdimm.txt
+++ /dev/null
@@ -1,815 +0,0 @@
-			  LIBNVDIMM: Non-Volatile Devices
-	      libnvdimm - kernel / libndctl - userspace helper library
-			   linux-nvdimm@lists.01.org
-				      v13
-
-
-	Glossary
-	Overview
-	    Supporting Documents
-	    Git Trees
-	LIBNVDIMM PMEM and BLK
-	Why BLK?
-	    PMEM vs BLK
-	        BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX
-	Example NVDIMM Platform
-	LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API
-	    LIBNDCTL: Context
-	        libndctl: instantiate a new library context example
-	    LIBNVDIMM/LIBNDCTL: Bus
-	        libnvdimm: control class device in /sys/class
-	        libnvdimm: bus
-	        libndctl: bus enumeration example
-	    LIBNVDIMM/LIBNDCTL: DIMM (NMEM)
-	        libnvdimm: DIMM (NMEM)
-	        libndctl: DIMM enumeration example
-	    LIBNVDIMM/LIBNDCTL: Region
-	        libnvdimm: region
-	        libndctl: region enumeration example
-	        Why Not Encode the Region Type into the Region Name?
-	        How Do I Determine the Major Type of a Region?
-	    LIBNVDIMM/LIBNDCTL: Namespace
-	        libnvdimm: namespace
-	        libndctl: namespace enumeration example
-	        libndctl: namespace creation example
-	        Why the Term "namespace"?
-	    LIBNVDIMM/LIBNDCTL: Block Translation Table "btt"
-	        libnvdimm: btt layout
-	        libndctl: btt creation example
-	Summary LIBNDCTL Diagram
-
-
-Glossary
---------
-
-PMEM: A system-physical-address range where writes are persistent.  A
-block device composed of PMEM is capable of DAX.  A PMEM address range
-may span an interleave of several DIMMs.
-
-BLK: A set of one or more programmable memory mapped apertures provided
-by a DIMM to access its media.  This indirection precludes the
-performance benefit of interleaving, but enables DIMM-bounded failure
-modes.
-
-DPA: DIMM Physical Address, is a DIMM-relative offset.  With one DIMM in
-the system there would be a 1:1 system-physical-address:DPA association.
-Once more DIMMs are added a memory controller interleave must be
-decoded to determine the DPA associated with a given
-system-physical-address.  BLK capacity always has a 1:1 relationship
-with a single-DIMM's DPA range.
-
-DAX: File system extensions to bypass the page cache and block layer to
-mmap persistent memory, from a PMEM block device, directly into a
-process address space.
-
-DSM: Device Specific Method: ACPI method to to control specific
-device - in this case the firmware.
-
-DCR: NVDIMM Control Region Structure defined in ACPI 6 Section 5.2.25.5.
-It defines a vendor-id, device-id, and interface format for a given DIMM.
-
-BTT: Block Translation Table: Persistent memory is byte addressable.
-Existing software may have an expectation that the power-fail-atomicity
-of writes is at least one sector, 512 bytes.  The BTT is an indirection
-table with atomic update semantics to front a PMEM/BLK block device
-driver and present arbitrary atomic sector sizes.
-
-LABEL: Metadata stored on a DIMM device that partitions and identifies
-(persistently names) storage between PMEM and BLK.  It also partitions
-BLK storage to host BTTs with different parameters per BLK-partition.
-Note that traditional partition tables, GPT/MBR, are layered on top of a
-BLK or PMEM device.
-
-
-Overview
---------
-
-The LIBNVDIMM subsystem provides support for three types of NVDIMMs, namely,
-PMEM, BLK, and NVDIMM devices that can simultaneously support both PMEM
-and BLK mode access.  These three modes of operation are described by
-the "NVDIMM Firmware Interface Table" (NFIT) in ACPI 6.  While the LIBNVDIMM
-implementation is generic and supports pre-NFIT platforms, it was guided
-by the superset of capabilities need to support this ACPI 6 definition
-for NVDIMM resources.  The bulk of the kernel implementation is in place
-to handle the case where DPA accessible via PMEM is aliased with DPA
-accessible via BLK.  When that occurs a LABEL is needed to reserve DPA
-for exclusive access via one mode a time.
-
-Supporting Documents
-ACPI 6: http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf
-NVDIMM Namespace: http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf
-DSM Interface Example: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
-Driver Writer's Guide: http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf
-
-Git Trees
-LIBNVDIMM: https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git
-LIBNDCTL: https://github.com/pmem/ndctl.git
-PMEM: https://github.com/01org/prd
-
-
-LIBNVDIMM PMEM and BLK
-------------------
-
-Prior to the arrival of the NFIT, non-volatile memory was described to a
-system in various ad-hoc ways.  Usually only the bare minimum was
-provided, namely, a single system-physical-address range where writes
-are expected to be durable after a system power loss.  Now, the NFIT
-specification standardizes not only the description of PMEM, but also
-BLK and platform message-passing entry points for control and
-configuration.
-
-For each NVDIMM access method (PMEM, BLK), LIBNVDIMM provides a block
-device driver:
-
-    1. PMEM (nd_pmem.ko): Drives a system-physical-address range.  This
-    range is contiguous in system memory and may be interleaved (hardware
-    memory controller striped) across multiple DIMMs.  When interleaved the
-    platform may optionally provide details of which DIMMs are participating
-    in the interleave.
-
-    Note that while LIBNVDIMM describes system-physical-address ranges that may
-    alias with BLK access as ND_NAMESPACE_PMEM ranges and those without
-    alias as ND_NAMESPACE_IO ranges, to the nd_pmem driver there is no
-    distinction.  The different device-types are an implementation detail
-    that userspace can exploit to implement policies like "only interface
-    with address ranges from certain DIMMs".  It is worth noting that when
-    aliasing is present and a DIMM lacks a label, then no block device can
-    be created by default as userspace needs to do at least one allocation
-    of DPA to the PMEM range.  In contrast ND_NAMESPACE_IO ranges, once
-    registered, can be immediately attached to nd_pmem.
-
-    2. BLK (nd_blk.ko): This driver performs I/O using a set of platform
-    defined apertures.  A set of apertures will access just one DIMM.
-    Multiple windows (apertures) allow multiple concurrent accesses, much like
-    tagged-command-queuing, and would likely be used by different threads or
-    different CPUs.
-
-    The NFIT specification defines a standard format for a BLK-aperture, but
-    the spec also allows for vendor specific layouts, and non-NFIT BLK
-    implementations may have other designs for BLK I/O.  For this reason
-    "nd_blk" calls back into platform-specific code to perform the I/O.
-    One such implementation is defined in the "Driver Writer's Guide" and "DSM
-    Interface Example".
-
-
-Why BLK?
---------
-
-While PMEM provides direct byte-addressable CPU-load/store access to
-NVDIMM storage, it does not provide the best system RAS (recovery,
-availability, and serviceability) model.  An access to a corrupted
-system-physical-address address causes a CPU exception while an access
-to a corrupted address through an BLK-aperture causes that block window
-to raise an error status in a register.  The latter is more aligned with
-the standard error model that host-bus-adapter attached disks present.
-Also, if an administrator ever wants to replace a memory it is easier to
-service a system at DIMM module boundaries.  Compare this to PMEM where
-data could be interleaved in an opaque hardware specific manner across
-several DIMMs.
-
-PMEM vs BLK
-BLK-apertures solve these RAS problems, but their presence is also the
-major contributing factor to the complexity of the ND subsystem.  They
-complicate the implementation because PMEM and BLK alias in DPA space.
-Any given DIMM's DPA-range may contribute to one or more
-system-physical-address sets of interleaved DIMMs, *and* may also be
-accessed in its entirety through its BLK-aperture.  Accessing a DPA
-through a system-physical-address while simultaneously accessing the
-same DPA through a BLK-aperture has undefined results.  For this reason,
-DIMMs with this dual interface configuration include a DSM function to
-store/retrieve a LABEL.  The LABEL effectively partitions the DPA-space
-into exclusive system-physical-address and BLK-aperture accessible
-regions.  For simplicity a DIMM is allowed a PMEM "region" per each
-interleave set in which it is a member.  The remaining DPA space can be
-carved into an arbitrary number of BLK devices with discontiguous
-extents.
-
-BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX
---------------------------------------------------
-
-One of the few
-reasons to allow multiple BLK namespaces per REGION is so that each
-BLK-namespace can be configured with a BTT with unique atomic sector
-sizes.  While a PMEM device can host a BTT the LABEL specification does
-not provide for a sector size to be specified for a PMEM namespace.
-This is due to the expectation that the primary usage model for PMEM is
-via DAX, and the BTT is incompatible with DAX.  However, for the cases
-where an application or filesystem still needs atomic sector update
-guarantees it can register a BTT on a PMEM device or partition.  See
-LIBNVDIMM/NDCTL: Block Translation Table "btt"
-
-
-Example NVDIMM Platform
------------------------
-
-For the remainder of this document the following diagram will be
-referenced for any example sysfs layouts.
-
-
-                             (a)               (b)           DIMM   BLK-REGION
-          +-------------------+--------+--------+--------+
-+------+  |       pm0.0       | blk2.0 | pm1.0  | blk2.1 |    0      region2
-| imc0 +--+- - - region0- - - +--------+        +--------+
-+--+---+  |       pm0.0       | blk3.0 | pm1.0  | blk3.1 |    1      region3
-   |      +-------------------+--------v        v--------+
-+--+---+                               |                 |
-| cpu0 |                                     region1
-+--+---+                               |                 |
-   |      +----------------------------^        ^--------+
-+--+---+  |           blk4.0           | pm1.0  | blk4.0 |    2      region4
-| imc1 +--+----------------------------|        +--------+
-+------+  |           blk5.0           | pm1.0  | blk5.0 |    3      region5
-          +----------------------------+--------+--------+
-
-In this platform we have four DIMMs and two memory controllers in one
-socket.  Each unique interface (BLK or PMEM) to DPA space is identified
-by a region device with a dynamically assigned id (REGION0 - REGION5).
-
-    1. The first portion of DIMM0 and DIMM1 are interleaved as REGION0. A
-    single PMEM namespace is created in the REGION0-SPA-range that spans most
-    of DIMM0 and DIMM1 with a user-specified name of "pm0.0". Some of that
-    interleaved system-physical-address range is reclaimed as BLK-aperture
-    accessed space starting at DPA-offset (a) into each DIMM.  In that
-    reclaimed space we create two BLK-aperture "namespaces" from REGION2 and
-    REGION3 where "blk2.0" and "blk3.0" are just human readable names that
-    could be set to any user-desired name in the LABEL.
-
-    2. In the last portion of DIMM0 and DIMM1 we have an interleaved
-    system-physical-address range, REGION1, that spans those two DIMMs as
-    well as DIMM2 and DIMM3.  Some of REGION1 is allocated to a PMEM namespace
-    named "pm1.0", the rest is reclaimed in 4 BLK-aperture namespaces (for
-    each DIMM in the interleave set), "blk2.1", "blk3.1", "blk4.0", and
-    "blk5.0".
-
-    3. The portion of DIMM2 and DIMM3 that do not participate in the REGION1
-    interleaved system-physical-address range (i.e. the DPA address past
-    offset (b) are also included in the "blk4.0" and "blk5.0" namespaces.
-    Note, that this example shows that BLK-aperture namespaces don't need to
-    be contiguous in DPA-space.
-
-    This bus is provided by the kernel under the device
-    /sys/devices/platform/nfit_test.0 when CONFIG_NFIT_TEST is enabled and
-    the nfit_test.ko module is loaded.  This not only test LIBNVDIMM but the
-    acpi_nfit.ko driver as well.
-
-
-LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API
-----------------------------------------------------
-
-What follows is a description of the LIBNVDIMM sysfs layout and a
-corresponding object hierarchy diagram as viewed through the LIBNDCTL
-API.  The example sysfs paths and diagrams are relative to the Example
-NVDIMM Platform which is also the LIBNVDIMM bus used in the LIBNDCTL unit
-test.
-
-LIBNDCTL: Context
-Every API call in the LIBNDCTL library requires a context that holds the
-logging parameters and other library instance state.  The library is
-based on the libabc template:
-https://git.kernel.org/cgit/linux/kernel/git/kay/libabc.git
-
-LIBNDCTL: instantiate a new library context example
-
-	struct ndctl_ctx *ctx;
-
-	if (ndctl_new(&ctx) == 0)
-		return ctx;
-	else
-		return NULL;
-
-LIBNVDIMM/LIBNDCTL: Bus
--------------------
-
-A bus has a 1:1 relationship with an NFIT.  The current expectation for
-ACPI based systems is that there is only ever one platform-global NFIT.
-That said, it is trivial to register multiple NFITs, the specification
-does not preclude it.  The infrastructure supports multiple busses and
-we use this capability to test multiple NFIT configurations in the unit
-test.
-
-LIBNVDIMM: control class device in /sys/class
-
-This character device accepts DSM messages to be passed to DIMM
-identified by its NFIT handle.
-
-	/sys/class/nd/ndctl0
-	|-- dev
-	|-- device -> ../../../ndbus0
-	|-- subsystem -> ../../../../../../../class/nd
-
-
-
-LIBNVDIMM: bus
-
-	struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
-	       struct nvdimm_bus_descriptor *nfit_desc);
-
-	/sys/devices/platform/nfit_test.0/ndbus0
-	|-- commands
-	|-- nd
-	|-- nfit
-	|-- nmem0
-	|-- nmem1
-	|-- nmem2
-	|-- nmem3
-	|-- power
-	|-- provider
-	|-- region0
-	|-- region1
-	|-- region2
-	|-- region3
-	|-- region4
-	|-- region5
-	|-- uevent
-	`-- wait_probe
-
-LIBNDCTL: bus enumeration example
-Find the bus handle that describes the bus from Example NVDIMM Platform
-
-	static struct ndctl_bus *get_bus_by_provider(struct ndctl_ctx *ctx,
-			const char *provider)
-	{
-		struct ndctl_bus *bus;
-
-		ndctl_bus_foreach(ctx, bus)
-			if (strcmp(provider, ndctl_bus_get_provider(bus)) == 0)
-				return bus;
-
-		return NULL;
-	}
-
-	bus = get_bus_by_provider(ctx, "nfit_test.0");
-
-
-LIBNVDIMM/LIBNDCTL: DIMM (NMEM)
----------------------------
-
-The DIMM device provides a character device for sending commands to
-hardware, and it is a container for LABELs.  If the DIMM is defined by
-NFIT then an optional 'nfit' attribute sub-directory is available to add
-NFIT-specifics.
-
-Note that the kernel device name for "DIMMs" is "nmemX".  The NFIT
-describes these devices via "Memory Device to System Physical Address
-Range Mapping Structure", and there is no requirement that they actually
-be physical DIMMs, so we use a more generic name.
-
-LIBNVDIMM: DIMM (NMEM)
-
-	struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
-			const struct attribute_group **groups, unsigned long flags,
-			unsigned long *dsm_mask);
-
-	/sys/devices/platform/nfit_test.0/ndbus0
-	|-- nmem0
-	|   |-- available_slots
-	|   |-- commands
-	|   |-- dev
-	|   |-- devtype
-	|   |-- driver -> ../../../../../bus/nd/drivers/nvdimm
-	|   |-- modalias
-	|   |-- nfit
-	|   |   |-- device
-	|   |   |-- format
-	|   |   |-- handle
-	|   |   |-- phys_id
-	|   |   |-- rev_id
-	|   |   |-- serial
-	|   |   `-- vendor
-	|   |-- state
-	|   |-- subsystem -> ../../../../../bus/nd
-	|   `-- uevent
-	|-- nmem1
-	[..]
-
-
-LIBNDCTL: DIMM enumeration example
-
-Note, in this example we are assuming NFIT-defined DIMMs which are
-identified by an "nfit_handle" a 32-bit value where:
-Bit 3:0 DIMM number within the memory channel
-Bit 7:4 memory channel number
-Bit 11:8 memory controller ID
-Bit 15:12 socket ID (within scope of a Node controller if node controller is present)
-Bit 27:16 Node Controller ID
-Bit 31:28 Reserved
-
-	static struct ndctl_dimm *get_dimm_by_handle(struct ndctl_bus *bus,
-	       unsigned int handle)
-	{
-		struct ndctl_dimm *dimm;
-
-		ndctl_dimm_foreach(bus, dimm)
-			if (ndctl_dimm_get_handle(dimm) == handle)
-				return dimm;
-
-		return NULL;
-	}
-
-	#define DIMM_HANDLE(n, s, i, c, d) \
-		(((n & 0xfff) << 16) | ((s & 0xf) << 12) | ((i & 0xf) << 8) \
-		 | ((c & 0xf) << 4) | (d & 0xf))
-
-	dimm = get_dimm_by_handle(bus, DIMM_HANDLE(0, 0, 0, 0, 0));
-
-LIBNVDIMM/LIBNDCTL: Region
-----------------------
-
-A generic REGION device is registered for each PMEM range or BLK-aperture
-set.  Per the example there are 6 regions: 2 PMEM and 4 BLK-aperture
-sets on the "nfit_test.0" bus.  The primary role of regions are to be a
-container of "mappings".  A mapping is a tuple of <DIMM,
-DPA-start-offset, length>.
-
-LIBNVDIMM provides a built-in driver for these REGION devices.  This driver
-is responsible for reconciling the aliased DPA mappings across all
-regions, parsing the LABEL, if present, and then emitting NAMESPACE
-devices with the resolved/exclusive DPA-boundaries for the nd_pmem or
-nd_blk device driver to consume.
-
-In addition to the generic attributes of "mapping"s, "interleave_ways"
-and "size" the REGION device also exports some convenience attributes.
-"nstype" indicates the integer type of namespace-device this region
-emits, "devtype" duplicates the DEVTYPE variable stored by udev at the
-'add' event, "modalias" duplicates the MODALIAS variable stored by udev
-at the 'add' event, and finally, the optional "spa_index" is provided in
-the case where the region is defined by a SPA.
-
-LIBNVDIMM: region
-
-	struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
-			struct nd_region_desc *ndr_desc);
-	struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
-			struct nd_region_desc *ndr_desc);
-
-	/sys/devices/platform/nfit_test.0/ndbus0
-	|-- region0
-	|   |-- available_size
-	|   |-- btt0
-	|   |-- btt_seed
-	|   |-- devtype
-	|   |-- driver -> ../../../../../bus/nd/drivers/nd_region
-	|   |-- init_namespaces
-	|   |-- mapping0
-	|   |-- mapping1
-	|   |-- mappings
-	|   |-- modalias
-	|   |-- namespace0.0
-	|   |-- namespace_seed
-	|   |-- numa_node
-	|   |-- nfit
-	|   |   `-- spa_index
-	|   |-- nstype
-	|   |-- set_cookie
-	|   |-- size
-	|   |-- subsystem -> ../../../../../bus/nd
-	|   `-- uevent
-	|-- region1
-	[..]
-
-LIBNDCTL: region enumeration example
-
-Sample region retrieval routines based on NFIT-unique data like
-"spa_index" (interleave set id) for PMEM and "nfit_handle" (dimm id) for
-BLK.
-
-	static struct ndctl_region *get_pmem_region_by_spa_index(struct ndctl_bus *bus,
-			unsigned int spa_index)
-	{
-		struct ndctl_region *region;
-
-		ndctl_region_foreach(bus, region) {
-			if (ndctl_region_get_type(region) != ND_DEVICE_REGION_PMEM)
-				continue;
-			if (ndctl_region_get_spa_index(region) == spa_index)
-				return region;
-		}
-		return NULL;
-	}
-
-	static struct ndctl_region *get_blk_region_by_dimm_handle(struct ndctl_bus *bus,
-			unsigned int handle)
-	{
-		struct ndctl_region *region;
-
-		ndctl_region_foreach(bus, region) {
-			struct ndctl_mapping *map;
-
-			if (ndctl_region_get_type(region) != ND_DEVICE_REGION_BLOCK)
-				continue;
-			ndctl_mapping_foreach(region, map) {
-				struct ndctl_dimm *dimm = ndctl_mapping_get_dimm(map);
-
-				if (ndctl_dimm_get_handle(dimm) == handle)
-					return region;
-			}
-		}
-		return NULL;
-	}
-
-
-Why Not Encode the Region Type into the Region Name?
-----------------------------------------------------
-
-At first glance it seems since NFIT defines just PMEM and BLK interface
-types that we should simply name REGION devices with something derived
-from those type names.  However, the ND subsystem explicitly keeps the
-REGION name generic and expects userspace to always consider the
-region-attributes for four reasons:
-
-    1. There are already more than two REGION and "namespace" types.  For
-    PMEM there are two subtypes.  As mentioned previously we have PMEM where
-    the constituent DIMM devices are known and anonymous PMEM.  For BLK
-    regions the NFIT specification already anticipates vendor specific
-    implementations.  The exact distinction of what a region contains is in
-    the region-attributes not the region-name or the region-devtype.
-
-    2. A region with zero child-namespaces is a possible configuration.  For
-    example, the NFIT allows for a DCR to be published without a
-    corresponding BLK-aperture.  This equates to a DIMM that can only accept
-    control/configuration messages, but no i/o through a descendant block
-    device.  Again, this "type" is advertised in the attributes ('mappings'
-    == 0) and the name does not tell you much.
-
-    3. What if a third major interface type arises in the future?  Outside
-    of vendor specific implementations, it's not difficult to envision a
-    third class of interface type beyond BLK and PMEM.  With a generic name
-    for the REGION level of the device-hierarchy old userspace
-    implementations can still make sense of new kernel advertised
-    region-types.  Userspace can always rely on the generic region
-    attributes like "mappings", "size", etc and the expected child devices
-    named "namespace".  This generic format of the device-model hierarchy
-    allows the LIBNVDIMM and LIBNDCTL implementations to be more uniform and
-    future-proof.
-
-    4. There are more robust mechanisms for determining the major type of a
-    region than a device name.  See the next section, How Do I Determine the
-    Major Type of a Region?
-
-How Do I Determine the Major Type of a Region?
-----------------------------------------------
-
-Outside of the blanket recommendation of "use libndctl", or simply
-looking at the kernel header (/usr/include/linux/ndctl.h) to decode the
-"nstype" integer attribute, here are some other options.
-
-    1. module alias lookup:
-
-    The whole point of region/namespace device type differentiation is to
-    decide which block-device driver will attach to a given LIBNVDIMM namespace.
-    One can simply use the modalias to lookup the resulting module.  It's
-    important to note that this method is robust in the presence of a
-    vendor-specific driver down the road.  If a vendor-specific
-    implementation wants to supplant the standard nd_blk driver it can with
-    minimal impact to the rest of LIBNVDIMM.
-
-    In fact, a vendor may also want to have a vendor-specific region-driver
-    (outside of nd_region).  For example, if a vendor defined its own LABEL
-    format it would need its own region driver to parse that LABEL and emit
-    the resulting namespaces.  The output from module resolution is more
-    accurate than a region-name or region-devtype.
-
-    2. udev:
-
-    The kernel "devtype" is registered in the udev database
-    # udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region0
-    P: /devices/platform/nfit_test.0/ndbus0/region0
-    E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region0
-    E: DEVTYPE=nd_pmem
-    E: MODALIAS=nd:t2
-    E: SUBSYSTEM=nd
-
-    # udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region4
-    P: /devices/platform/nfit_test.0/ndbus0/region4
-    E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region4
-    E: DEVTYPE=nd_blk
-    E: MODALIAS=nd:t3
-    E: SUBSYSTEM=nd
-
-    ...and is available as a region attribute, but keep in mind that the
-    "devtype" does not indicate sub-type variations and scripts should
-    really be understanding the other attributes.
-
-    3. type specific attributes:
-
-    As it currently stands a BLK-aperture region will never have a
-    "nfit/spa_index" attribute, but neither will a non-NFIT PMEM region.  A
-    BLK region with a "mappings" value of 0 is, as mentioned above, a DIMM
-    that does not allow I/O.  A PMEM region with a "mappings" value of zero
-    is a simple system-physical-address range.
-
-
-LIBNVDIMM/LIBNDCTL: Namespace
--------------------------
-
-A REGION, after resolving DPA aliasing and LABEL specified boundaries,
-surfaces one or more "namespace" devices.  The arrival of a "namespace"
-device currently triggers either the nd_blk or nd_pmem driver to load
-and register a disk/block device.
-
-LIBNVDIMM: namespace
-Here is a sample layout from the three major types of NAMESPACE where
-namespace0.0 represents DIMM-info-backed PMEM (note that it has a 'uuid'
-attribute), namespace2.0 represents a BLK namespace (note it has a
-'sector_size' attribute) that, and namespace6.0 represents an anonymous
-PMEM namespace (note that has no 'uuid' attribute due to not support a
-LABEL).
-
-	/sys/devices/platform/nfit_test.0/ndbus0/region0/namespace0.0
-	|-- alt_name
-	|-- devtype
-	|-- dpa_extents
-	|-- force_raw
-	|-- modalias
-	|-- numa_node
-	|-- resource
-	|-- size
-	|-- subsystem -> ../../../../../../bus/nd
-	|-- type
-	|-- uevent
-	`-- uuid
-	/sys/devices/platform/nfit_test.0/ndbus0/region2/namespace2.0
-	|-- alt_name
-	|-- devtype
-	|-- dpa_extents
-	|-- force_raw
-	|-- modalias
-	|-- numa_node
-	|-- sector_size
-	|-- size
-	|-- subsystem -> ../../../../../../bus/nd
-	|-- type
-	|-- uevent
-	`-- uuid
-	/sys/devices/platform/nfit_test.1/ndbus1/region6/namespace6.0
-	|-- block
-	|   `-- pmem0
-	|-- devtype
-	|-- driver -> ../../../../../../bus/nd/drivers/pmem
-	|-- force_raw
-	|-- modalias
-	|-- numa_node
-	|-- resource
-	|-- size
-	|-- subsystem -> ../../../../../../bus/nd
-	|-- type
-	`-- uevent
-
-LIBNDCTL: namespace enumeration example
-Namespaces are indexed relative to their parent region, example below.
-These indexes are mostly static from boot to boot, but subsystem makes
-no guarantees in this regard.  For a static namespace identifier use its
-'uuid' attribute.
-
-static struct ndctl_namespace *get_namespace_by_id(struct ndctl_region *region,
-                unsigned int id)
-{
-        struct ndctl_namespace *ndns;
-
-        ndctl_namespace_foreach(region, ndns)
-                if (ndctl_namespace_get_id(ndns) == id)
-                        return ndns;
-
-        return NULL;
-}
-
-LIBNDCTL: namespace creation example
-Idle namespaces are automatically created by the kernel if a given
-region has enough available capacity to create a new namespace.
-Namespace instantiation involves finding an idle namespace and
-configuring it.  For the most part the setting of namespace attributes
-can occur in any order, the only constraint is that 'uuid' must be set
-before 'size'.  This enables the kernel to track DPA allocations
-internally with a static identifier.
-
-static int configure_namespace(struct ndctl_region *region,
-                struct ndctl_namespace *ndns,
-                struct namespace_parameters *parameters)
-{
-        char devname[50];
-
-        snprintf(devname, sizeof(devname), "namespace%d.%d",
-                        ndctl_region_get_id(region), paramaters->id);
-
-        ndctl_namespace_set_alt_name(ndns, devname);
-        /* 'uuid' must be set prior to setting size! */
-        ndctl_namespace_set_uuid(ndns, paramaters->uuid);
-        ndctl_namespace_set_size(ndns, paramaters->size);
-        /* unlike pmem namespaces, blk namespaces have a sector size */
-        if (parameters->lbasize)
-                ndctl_namespace_set_sector_size(ndns, parameters->lbasize);
-        ndctl_namespace_enable(ndns);
-}
-
-
-Why the Term "namespace"?
-
-    1. Why not "volume" for instance?  "volume" ran the risk of confusing
-    ND (libnvdimm subsystem) to a volume manager like device-mapper.
-
-    2. The term originated to describe the sub-devices that can be created
-    within a NVME controller (see the nvme specification:
-    http://www.nvmexpress.org/specifications/), and NFIT namespaces are
-    meant to parallel the capabilities and configurability of
-    NVME-namespaces.
-
-
-LIBNVDIMM/LIBNDCTL: Block Translation Table "btt"
----------------------------------------------
-
-A BTT (design document: http://pmem.io/2014/09/23/btt.html) is a stacked
-block device driver that fronts either the whole block device or a
-partition of a block device emitted by either a PMEM or BLK NAMESPACE.
-
-LIBNVDIMM: btt layout
-Every region will start out with at least one BTT device which is the
-seed device.  To activate it set the "namespace", "uuid", and
-"sector_size" attributes and then bind the device to the nd_pmem or
-nd_blk driver depending on the region type.
-
-	/sys/devices/platform/nfit_test.1/ndbus0/region0/btt0/
-	|-- namespace
-	|-- delete
-	|-- devtype
-	|-- modalias
-	|-- numa_node
-	|-- sector_size
-	|-- subsystem -> ../../../../../bus/nd
-	|-- uevent
-	`-- uuid
-
-LIBNDCTL: btt creation example
-Similar to namespaces an idle BTT device is automatically created per
-region.  Each time this "seed" btt device is configured and enabled a new
-seed is created.  Creating a BTT configuration involves two steps of
-finding and idle BTT and assigning it to consume a PMEM or BLK namespace.
-
-	static struct ndctl_btt *get_idle_btt(struct ndctl_region *region)
-	{
-		struct ndctl_btt *btt;
-
-		ndctl_btt_foreach(region, btt)
-			if (!ndctl_btt_is_enabled(btt)
-					&& !ndctl_btt_is_configured(btt))
-				return btt;
-
-		return NULL;
-	}
-
-	static int configure_btt(struct ndctl_region *region,
-			struct btt_parameters *parameters)
-	{
-		btt = get_idle_btt(region);
-
-		ndctl_btt_set_uuid(btt, parameters->uuid);
-		ndctl_btt_set_sector_size(btt, parameters->sector_size);
-		ndctl_btt_set_namespace(btt, parameters->ndns);
-		/* turn off raw mode device */
-		ndctl_namespace_disable(parameters->ndns);
-		/* turn on btt access */
-		ndctl_btt_enable(btt);
-	}
-
-Once instantiated a new inactive btt seed device will appear underneath
-the region.
-
-Once a "namespace" is removed from a BTT that instance of the BTT device
-will be deleted or otherwise reset to default values.  This deletion is
-only at the device model level.  In order to destroy a BTT the "info
-block" needs to be destroyed.  Note, that to destroy a BTT the media
-needs to be written in raw mode.  By default, the kernel will autodetect
-the presence of a BTT and disable raw mode.  This autodetect behavior
-can be suppressed by enabling raw mode for the namespace via the
-ndctl_namespace_set_raw_mode() API.
-
-
-Summary LIBNDCTL Diagram
-------------------------
-
-For the given example above, here is the view of the objects as seen by the
-LIBNDCTL API:
-            +---+
-            |CTX|    +---------+   +--------------+  +---------------+
-            +-+-+  +-> REGION0 +---> NAMESPACE0.0 +--> PMEM8 "pm0.0" |
-              |    | +---------+   +--------------+  +---------------+
-+-------+     |    | +---------+   +--------------+  +---------------+
-| DIMM0 <-+   |    +-> REGION1 +---> NAMESPACE1.0 +--> PMEM6 "pm1.0" |
-+-------+ |   |    | +---------+   +--------------+  +---------------+
-| DIMM1 <-+ +-v--+ | +---------+   +--------------+  +---------------+
-+-------+ +-+BUS0+---> REGION2 +-+-> NAMESPACE2.0 +--> ND6  "blk2.0" |
-| DIMM2 <-+ +----+ | +---------+ | +--------------+  +----------------------+
-+-------+ |        |             +-> NAMESPACE2.1 +--> ND5  "blk2.1" | BTT2 |
-| DIMM3 <-+        |               +--------------+  +----------------------+
-+-------+          | +---------+   +--------------+  +---------------+
-                   +-> REGION3 +-+-> NAMESPACE3.0 +--> ND4  "blk3.0" |
-                   | +---------+ | +--------------+  +----------------------+
-                   |             +-> NAMESPACE3.1 +--> ND3  "blk3.1" | BTT1 |
-                   |               +--------------+  +----------------------+
-                   | +---------+   +--------------+  +---------------+
-                   +-> REGION4 +---> NAMESPACE4.0 +--> ND2  "blk4.0" |
-                   | +---------+   +--------------+  +---------------+
-                   | +---------+   +--------------+  +----------------------+
-                   +-> REGION5 +---> NAMESPACE5.0 +--> ND1  "blk5.0" | BTT0 |
-                     +---------+   +--------------+  +---------------+------+
-
-
diff --git a/Documentation/nvdimm/security.rst b/Documentation/nvdimm/security.rst
new file mode 100644
index 000000000000..ad9dea099b34
--- /dev/null
+++ b/Documentation/nvdimm/security.rst
@@ -0,0 +1,143 @@
+===============
+NVDIMM Security
+===============
+
+1. Introduction
+---------------
+
+With the introduction of Intel Device Specific Methods (DSM) v1.8
+specification [1], security DSMs are introduced. The spec added the following
+security DSMs: "get security state", "set passphrase", "disable passphrase",
+"unlock unit", "freeze lock", "secure erase", and "overwrite". A security_ops
+data structure has been added to struct dimm in order to support the security
+operations and generic APIs are exposed to allow vendor neutral operations.
+
+2. Sysfs Interface
+------------------
+The "security" sysfs attribute is provided in the nvdimm sysfs directory. For
+example:
+/sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/security
+
+The "show" attribute of that attribute will display the security state for
+that DIMM. The following states are available: disabled, unlocked, locked,
+frozen, and overwrite. If security is not supported, the sysfs attribute
+will not be visible.
+
+The "store" attribute takes several commands when it is being written to
+in order to support some of the security functionalities:
+update <old_keyid> <new_keyid> - enable or update passphrase.
+disable <keyid> - disable enabled security and remove key.
+freeze - freeze changing of security states.
+erase <keyid> - delete existing user encryption key.
+overwrite <keyid> - wipe the entire nvdimm.
+master_update <keyid> <new_keyid> - enable or update master passphrase.
+master_erase <keyid> - delete existing user encryption key.
+
+3. Key Management
+-----------------
+
+The key is associated to the payload by the DIMM id. For example:
+# cat /sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/nfit/id
+8089-a2-1740-00000133
+The DIMM id would be provided along with the key payload (passphrase) to
+the kernel.
+
+The security keys are managed on the basis of a single key per DIMM. The
+key "passphrase" is expected to be 32bytes long. This is similar to the ATA
+security specification [2]. A key is initially acquired via the request_key()
+kernel API call during nvdimm unlock. It is up to the user to make sure that
+all the keys are in the kernel user keyring for unlock.
+
+A nvdimm encrypted-key of format enc32 has the description format of:
+nvdimm:<bus-provider-specific-unique-id>
+
+See file ``Documentation/security/keys/trusted-encrypted.rst`` for creating
+encrypted-keys of enc32 format. TPM usage with a master trusted key is
+preferred for sealing the encrypted-keys.
+
+4. Unlocking
+------------
+When the DIMMs are being enumerated by the kernel, the kernel will attempt to
+retrieve the key from the kernel user keyring. This is the only time
+a locked DIMM can be unlocked. Once unlocked, the DIMM will remain unlocked
+until reboot. Typically an entity (i.e. shell script) will inject all the
+relevant encrypted-keys into the kernel user keyring during the initramfs phase.
+This provides the unlock function access to all the related keys that contain
+the passphrase for the respective nvdimms.  It is also recommended that the
+keys are injected before libnvdimm is loaded by modprobe.
+
+5. Update
+---------
+When doing an update, it is expected that the existing key is removed from
+the kernel user keyring and reinjected as different (old) key. It's irrelevant
+what the key description is for the old key since we are only interested in the
+keyid when doing the update operation. It is also expected that the new key
+is injected with the description format described from earlier in this
+document.  The update command written to the sysfs attribute will be with
+the format:
+update <old keyid> <new keyid>
+
+If there is no old keyid due to a security enabling, then a 0 should be
+passed in.
+
+6. Freeze
+---------
+The freeze operation does not require any keys. The security config can be
+frozen by a user with root privelege.
+
+7. Disable
+----------
+The security disable command format is:
+disable <keyid>
+
+An key with the current passphrase payload that is tied to the nvdimm should be
+in the kernel user keyring.
+
+8. Secure Erase
+---------------
+The command format for doing a secure erase is:
+erase <keyid>
+
+An key with the current passphrase payload that is tied to the nvdimm should be
+in the kernel user keyring.
+
+9. Overwrite
+------------
+The command format for doing an overwrite is:
+overwrite <keyid>
+
+Overwrite can be done without a key if security is not enabled. A key serial
+of 0 can be passed in to indicate no key.
+
+The sysfs attribute "security" can be polled to wait on overwrite completion.
+Overwrite can last tens of minutes or more depending on nvdimm size.
+
+An encrypted-key with the current user passphrase that is tied to the nvdimm
+should be injected and its keyid should be passed in via sysfs.
+
+10. Master Update
+-----------------
+The command format for doing a master update is:
+update <old keyid> <new keyid>
+
+The operating mechanism for master update is identical to update except the
+master passphrase key is passed to the kernel. The master passphrase key
+is just another encrypted-key.
+
+This command is only available when security is disabled.
+
+11. Master Erase
+----------------
+The command format for doing a master erase is:
+master_erase <current keyid>
+
+This command has the same operating mechanism as erase except the master
+passphrase key is passed to the kernel. The master passphrase key is just
+another encrypted-key.
+
+This command is only available when the master security is enabled, indicated
+by the extended security status.
+
+[1]: http://pmem.io/documents/NVDIMM_DSM_Interface-V1.8.pdf
+
+[2]: http://www.t13.org/documents/UploadedDocuments/docs2006/e05179r4-ACS-SecurityClarifications.pdf
diff --git a/Documentation/nvdimm/security.txt b/Documentation/nvdimm/security.txt
deleted file mode 100644
index 4c36c05ca98e..000000000000
--- a/Documentation/nvdimm/security.txt
+++ /dev/null
@@ -1,141 +0,0 @@
-NVDIMM SECURITY
-===============
-
-1. Introduction
----------------
-
-With the introduction of Intel Device Specific Methods (DSM) v1.8
-specification [1], security DSMs are introduced. The spec added the following
-security DSMs: "get security state", "set passphrase", "disable passphrase",
-"unlock unit", "freeze lock", "secure erase", and "overwrite". A security_ops
-data structure has been added to struct dimm in order to support the security
-operations and generic APIs are exposed to allow vendor neutral operations.
-
-2. Sysfs Interface
-------------------
-The "security" sysfs attribute is provided in the nvdimm sysfs directory. For
-example:
-/sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/security
-
-The "show" attribute of that attribute will display the security state for
-that DIMM. The following states are available: disabled, unlocked, locked,
-frozen, and overwrite. If security is not supported, the sysfs attribute
-will not be visible.
-
-The "store" attribute takes several commands when it is being written to
-in order to support some of the security functionalities:
-update <old_keyid> <new_keyid> - enable or update passphrase.
-disable <keyid> - disable enabled security and remove key.
-freeze - freeze changing of security states.
-erase <keyid> - delete existing user encryption key.
-overwrite <keyid> - wipe the entire nvdimm.
-master_update <keyid> <new_keyid> - enable or update master passphrase.
-master_erase <keyid> - delete existing user encryption key.
-
-3. Key Management
------------------
-
-The key is associated to the payload by the DIMM id. For example:
-# cat /sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/nfit/id
-8089-a2-1740-00000133
-The DIMM id would be provided along with the key payload (passphrase) to
-the kernel.
-
-The security keys are managed on the basis of a single key per DIMM. The
-key "passphrase" is expected to be 32bytes long. This is similar to the ATA
-security specification [2]. A key is initially acquired via the request_key()
-kernel API call during nvdimm unlock. It is up to the user to make sure that
-all the keys are in the kernel user keyring for unlock.
-
-A nvdimm encrypted-key of format enc32 has the description format of:
-nvdimm:<bus-provider-specific-unique-id>
-
-See file ``Documentation/security/keys/trusted-encrypted.rst`` for creating
-encrypted-keys of enc32 format. TPM usage with a master trusted key is
-preferred for sealing the encrypted-keys.
-
-4. Unlocking
-------------
-When the DIMMs are being enumerated by the kernel, the kernel will attempt to
-retrieve the key from the kernel user keyring. This is the only time
-a locked DIMM can be unlocked. Once unlocked, the DIMM will remain unlocked
-until reboot. Typically an entity (i.e. shell script) will inject all the
-relevant encrypted-keys into the kernel user keyring during the initramfs phase.
-This provides the unlock function access to all the related keys that contain
-the passphrase for the respective nvdimms.  It is also recommended that the
-keys are injected before libnvdimm is loaded by modprobe.
-
-5. Update
----------
-When doing an update, it is expected that the existing key is removed from
-the kernel user keyring and reinjected as different (old) key. It's irrelevant
-what the key description is for the old key since we are only interested in the
-keyid when doing the update operation. It is also expected that the new key
-is injected with the description format described from earlier in this
-document.  The update command written to the sysfs attribute will be with
-the format:
-update <old keyid> <new keyid>
-
-If there is no old keyid due to a security enabling, then a 0 should be
-passed in.
-
-6. Freeze
----------
-The freeze operation does not require any keys. The security config can be
-frozen by a user with root privelege.
-
-7. Disable
-----------
-The security disable command format is:
-disable <keyid>
-
-An key with the current passphrase payload that is tied to the nvdimm should be
-in the kernel user keyring.
-
-8. Secure Erase
----------------
-The command format for doing a secure erase is:
-erase <keyid>
-
-An key with the current passphrase payload that is tied to the nvdimm should be
-in the kernel user keyring.
-
-9. Overwrite
-------------
-The command format for doing an overwrite is:
-overwrite <keyid>
-
-Overwrite can be done without a key if security is not enabled. A key serial
-of 0 can be passed in to indicate no key.
-
-The sysfs attribute "security" can be polled to wait on overwrite completion.
-Overwrite can last tens of minutes or more depending on nvdimm size.
-
-An encrypted-key with the current user passphrase that is tied to the nvdimm
-should be injected and its keyid should be passed in via sysfs.
-
-10. Master Update
------------------
-The command format for doing a master update is:
-update <old keyid> <new keyid>
-
-The operating mechanism for master update is identical to update except the
-master passphrase key is passed to the kernel. The master passphrase key
-is just another encrypted-key.
-
-This command is only available when security is disabled.
-
-11. Master Erase
-----------------
-The command format for doing a master erase is:
-master_erase <current keyid>
-
-This command has the same operating mechanism as erase except the master
-passphrase key is passed to the kernel. The master passphrase key is just
-another encrypted-key.
-
-This command is only available when the master security is enabled, indicated
-by the extended security status.
-
-[1]: http://pmem.io/documents/NVDIMM_DSM_Interface-V1.8.pdf
-[2]: http://www.t13.org/documents/UploadedDocuments/docs2006/e05179r4-ACS-SecurityClarifications.pdf
-- 
cgit 


From 8ea0afa3b801e9fe3ff676c3e60e74afa1a0848a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 14:34:34 -0300
Subject: docs: xtensa: convert to ReST

Rename the xtensa documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/xtensa/atomctl.rst |  51 ++++++++++
 Documentation/xtensa/atomctl.txt |  44 ---------
 Documentation/xtensa/booting.rst |  22 +++++
 Documentation/xtensa/booting.txt |  19 ----
 Documentation/xtensa/index.rst   |  12 +++
 Documentation/xtensa/mmu.rst     | 195 +++++++++++++++++++++++++++++++++++++++
 Documentation/xtensa/mmu.txt     | 189 -------------------------------------
 7 files changed, 280 insertions(+), 252 deletions(-)
 create mode 100644 Documentation/xtensa/atomctl.rst
 delete mode 100644 Documentation/xtensa/atomctl.txt
 create mode 100644 Documentation/xtensa/booting.rst
 delete mode 100644 Documentation/xtensa/booting.txt
 create mode 100644 Documentation/xtensa/index.rst
 create mode 100644 Documentation/xtensa/mmu.rst
 delete mode 100644 Documentation/xtensa/mmu.txt

(limited to 'Documentation')

diff --git a/Documentation/xtensa/atomctl.rst b/Documentation/xtensa/atomctl.rst
new file mode 100644
index 000000000000..1ecbd0ba9a2e
--- /dev/null
+++ b/Documentation/xtensa/atomctl.rst
@@ -0,0 +1,51 @@
+===========================================
+Atomic Operation Control (ATOMCTL) Register
+===========================================
+
+We Have Atomic Operation Control (ATOMCTL) Register.
+This register determines the effect of using a S32C1I instruction
+with various combinations of:
+
+     1. With and without an Coherent Cache Controller which
+        can do Atomic Transactions to the memory internally.
+
+     2. With and without An Intelligent Memory Controller which
+        can do Atomic Transactions itself.
+
+The Core comes up with a default value of for the three types of cache ops::
+
+      0x28: (WB: Internal, WT: Internal, BY:Exception)
+
+On the FPGA Cards we typically simulate an Intelligent Memory controller
+which can implement  RCW transactions. For FPGA cards with an External
+Memory controller we let it to the atomic operations internally while
+doing a Cached (WB) transaction and use the Memory RCW for un-cached
+operations.
+
+For systems without an coherent cache controller, non-MX, we always
+use the memory controllers RCW, thought non-MX controlers likely
+support the Internal Operation.
+
+CUSTOMER-WARNING:
+   Virtually all customers buy their memory controllers from vendors that
+   don't support atomic RCW memory transactions and will likely want to
+   configure this register to not use RCW.
+
+Developers might find using RCW in Bypass mode convenient when testing
+with the cache being bypassed; for example studying cache alias problems.
+
+See Section 4.3.12.4 of ISA; Bits::
+
+                             WB     WT      BY
+                           5   4 | 3   2 | 1   0
+
+=========    ==================      ==================      ===============
+  2 Bit
+  Field
+  Values     WB - Write Back         WT - Write Thru         BY - Bypass
+=========    ==================      ==================      ===============
+    0        Exception               Exception               Exception
+    1        RCW Transaction         RCW Transaction         RCW Transaction
+    2        Internal Operation      Internal Operation      Reserved
+    3        Reserved                Reserved                Reserved
+=========    ==================      ==================      ===============
diff --git a/Documentation/xtensa/atomctl.txt b/Documentation/xtensa/atomctl.txt
deleted file mode 100644
index 1da783ac200c..000000000000
--- a/Documentation/xtensa/atomctl.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-We Have Atomic Operation Control (ATOMCTL) Register.
-This register determines the effect of using a S32C1I instruction
-with various combinations of:
-
-     1. With and without an Coherent Cache Controller which
-        can do Atomic Transactions to the memory internally.
-
-     2. With and without An Intelligent Memory Controller which
-        can do Atomic Transactions itself.
-
-The Core comes up with a default value of for the three types of cache ops:
-
-      0x28: (WB: Internal, WT: Internal, BY:Exception)
-
-On the FPGA Cards we typically simulate an Intelligent Memory controller
-which can implement  RCW transactions. For FPGA cards with an External
-Memory controller we let it to the atomic operations internally while
-doing a Cached (WB) transaction and use the Memory RCW for un-cached
-operations.
-
-For systems without an coherent cache controller, non-MX, we always
-use the memory controllers RCW, thought non-MX controlers likely
-support the Internal Operation.
-
-CUSTOMER-WARNING:
-   Virtually all customers buy their memory controllers from vendors that
-   don't support atomic RCW memory transactions and will likely want to
-   configure this register to not use RCW.
-
-Developers might find using RCW in Bypass mode convenient when testing
-with the cache being bypassed; for example studying cache alias problems.
-
-See Section 4.3.12.4 of ISA; Bits:
-
-                             WB     WT      BY
-                           5   4 | 3   2 | 1   0
-  2 Bit
-  Field
-  Values     WB - Write Back         WT - Write Thru         BY - Bypass
----------    ---------------         -----------------     ----------------
-    0        Exception               Exception               Exception
-    1        RCW Transaction         RCW Transaction         RCW Transaction
-    2        Internal Operation      Internal Operation      Reserved
-    3        Reserved                Reserved                Reserved
diff --git a/Documentation/xtensa/booting.rst b/Documentation/xtensa/booting.rst
new file mode 100644
index 000000000000..e1b83707e5b6
--- /dev/null
+++ b/Documentation/xtensa/booting.rst
@@ -0,0 +1,22 @@
+=====================================
+Passing boot parameters to the kernel
+=====================================
+
+Boot parameters are represented as a TLV list in the memory. Please see
+arch/xtensa/include/asm/bootparam.h for definition of the bp_tag structure and
+tag value constants. First entry in the list must have type BP_TAG_FIRST, last
+entry must have type BP_TAG_LAST. The address of the first list entry is
+passed to the kernel in the register a2. The address type depends on MMU type:
+
+- For configurations without MMU, with region protection or with MPU the
+  address must be the physical address.
+- For configurations with region translarion MMU or with MMUv3 and CONFIG_MMU=n
+  the address must be a valid address in the current mapping. The kernel will
+  not change the mapping on its own.
+- For configurations with MMUv2 the address must be a virtual address in the
+  default virtual mapping (0xd0000000..0xffffffff).
+- For configurations with MMUv3 and CONFIG_MMU=y the address may be either a
+  virtual or physical address. In either case it must be within the default
+  virtual mapping. It is considered physical if it is within the range of
+  physical addresses covered by the default KSEG mapping (XCHAL_KSEG_PADDR..
+  XCHAL_KSEG_PADDR + XCHAL_KSEG_SIZE), otherwise it is considered virtual.
diff --git a/Documentation/xtensa/booting.txt b/Documentation/xtensa/booting.txt
deleted file mode 100644
index 402b33a2619f..000000000000
--- a/Documentation/xtensa/booting.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-Passing boot parameters to the kernel.
-
-Boot parameters are represented as a TLV list in the memory. Please see
-arch/xtensa/include/asm/bootparam.h for definition of the bp_tag structure and
-tag value constants. First entry in the list must have type BP_TAG_FIRST, last
-entry must have type BP_TAG_LAST. The address of the first list entry is
-passed to the kernel in the register a2. The address type depends on MMU type:
-- For configurations without MMU, with region protection or with MPU the
-  address must be the physical address.
-- For configurations with region translarion MMU or with MMUv3 and CONFIG_MMU=n
-  the address must be a valid address in the current mapping. The kernel will
-  not change the mapping on its own.
-- For configurations with MMUv2 the address must be a virtual address in the
-  default virtual mapping (0xd0000000..0xffffffff).
-- For configurations with MMUv3 and CONFIG_MMU=y the address may be either a
-  virtual or physical address. In either case it must be within the default
-  virtual mapping. It is considered physical if it is within the range of
-  physical addresses covered by the default KSEG mapping (XCHAL_KSEG_PADDR..
-  XCHAL_KSEG_PADDR + XCHAL_KSEG_SIZE), otherwise it is considered virtual.
diff --git a/Documentation/xtensa/index.rst b/Documentation/xtensa/index.rst
new file mode 100644
index 000000000000..5a24e365e35f
--- /dev/null
+++ b/Documentation/xtensa/index.rst
@@ -0,0 +1,12 @@
+:orphan:
+
+===================
+Xtensa Architecture
+===================
+
+.. toctree::
+   :maxdepth: 1
+
+   atomctl
+   booting
+   mmu
diff --git a/Documentation/xtensa/mmu.rst b/Documentation/xtensa/mmu.rst
new file mode 100644
index 000000000000..e52a12960fdc
--- /dev/null
+++ b/Documentation/xtensa/mmu.rst
@@ -0,0 +1,195 @@
+=============================
+MMUv3 initialization sequence
+=============================
+
+The code in the initialize_mmu macro sets up MMUv3 memory mapping
+identically to MMUv2 fixed memory mapping. Depending on
+CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX symbol this code is
+located in addresses it was linked for (symbol undefined), or not
+(symbol defined), so it needs to be position-independent.
+
+The code has the following assumptions:
+
+  - This code fragment is run only on an MMU v3.
+  - TLBs are in their reset state.
+  - ITLBCFG and DTLBCFG are zero (reset state).
+  - RASID is 0x04030201 (reset state).
+  - PS.RING is zero (reset state).
+  - LITBASE is zero (reset state, PC-relative literals); required to be PIC.
+
+TLB setup proceeds along the following steps.
+
+  Legend:
+
+    - VA = virtual address (two upper nibbles of it);
+    - PA = physical address (two upper nibbles of it);
+    - pc = physical range that contains this code;
+
+After step 2, we jump to virtual address in the range 0x40000000..0x5fffffff
+or 0x00000000..0x1fffffff, depending on whether the kernel was loaded below
+0x40000000 or above. That address corresponds to next instruction to execute
+in this code. After step 4, we jump to intended (linked) address of this code.
+The scheme below assumes that the kernel is loaded below 0x40000000.
+
+ ====== =====  =====  =====  =====   ====== =====  =====
+ -      Step0  Step1  Step2  Step3          Step4  Step5
+
+   VA      PA     PA     PA     PA     VA      PA     PA
+ ====== =====  =====  =====  =====   ====== =====  =====
+ E0..FF -> E0  -> E0  -> E0          F0..FF -> F0  -> F0
+ C0..DF -> C0  -> C0  -> C0          E0..EF -> F0  -> F0
+ A0..BF -> A0  -> A0  -> A0          D8..DF -> 00  -> 00
+ 80..9F -> 80  -> 80  -> 80          D0..D7 -> 00  -> 00
+ 60..7F -> 60  -> 60  -> 60
+ 40..5F -> 40         -> pc  -> pc   40..5F -> pc
+ 20..3F -> 20  -> 20  -> 20
+ 00..1F -> 00  -> 00  -> 00
+ ====== =====  =====  =====  =====   ====== =====  =====
+
+The default location of IO peripherals is above 0xf0000000. This may be changed
+using a "ranges" property in a device tree simple-bus node. See the Devicetree
+Specification, section 4.5 for details on the syntax and semantics of
+simple-bus nodes. The following limitations apply:
+
+1. Only top level simple-bus nodes are considered
+
+2. Only one (first) simple-bus node is considered
+
+3. Empty "ranges" properties are not supported
+
+4. Only the first triplet in the "ranges" property is considered
+
+5. The parent-bus-address value is rounded down to the nearest 256MB boundary
+
+6. The IO area covers the entire 256MB segment of parent-bus-address; the
+   "ranges" triplet length field is ignored
+
+
+MMUv3 address space layouts.
+============================
+
+Default MMUv2-compatible layout::
+
+                        Symbol                   VADDR       Size
+  +------------------+
+  | Userspace        |                           0x00000000  TASK_SIZE
+  +------------------+                           0x40000000
+  +------------------+
+  | Page table       |  XCHAL_PAGE_TABLE_VADDR   0x80000000  XCHAL_PAGE_TABLE_SIZE
+  +------------------+
+  | KASAN shadow map |  KASAN_SHADOW_START       0x80400000  KASAN_SHADOW_SIZE
+  +------------------+                           0x8e400000
+  +------------------+
+  | VMALLOC area     |  VMALLOC_START            0xc0000000  128MB - 64KB
+  +------------------+  VMALLOC_END
+  | Cache aliasing   |  TLBTEMP_BASE_1           0xc7ff0000  DCACHE_WAY_SIZE
+  | remap area 1     |
+  +------------------+
+  | Cache aliasing   |  TLBTEMP_BASE_2                       DCACHE_WAY_SIZE
+  | remap area 2     |
+  +------------------+
+  +------------------+
+  | KMAP area        |  PKMAP_BASE                           PTRS_PER_PTE *
+  |                  |                                       DCACHE_N_COLORS *
+  |                  |                                       PAGE_SIZE
+  |                  |                                       (4MB * DCACHE_N_COLORS)
+  +------------------+
+  | Atomic KMAP area |  FIXADDR_START                        KM_TYPE_NR *
+  |                  |                                       NR_CPUS *
+  |                  |                                       DCACHE_N_COLORS *
+  |                  |                                       PAGE_SIZE
+  +------------------+  FIXADDR_TOP              0xcffff000
+  +------------------+
+  | Cached KSEG      |  XCHAL_KSEG_CACHED_VADDR  0xd0000000  128MB
+  +------------------+
+  | Uncached KSEG    |  XCHAL_KSEG_BYPASS_VADDR  0xd8000000  128MB
+  +------------------+
+  | Cached KIO       |  XCHAL_KIO_CACHED_VADDR   0xe0000000  256MB
+  +------------------+
+  | Uncached KIO     |  XCHAL_KIO_BYPASS_VADDR   0xf0000000  256MB
+  +------------------+
+
+
+256MB cached + 256MB uncached layout::
+
+                        Symbol                   VADDR       Size
+  +------------------+
+  | Userspace        |                           0x00000000  TASK_SIZE
+  +------------------+                           0x40000000
+  +------------------+
+  | Page table       |  XCHAL_PAGE_TABLE_VADDR   0x80000000  XCHAL_PAGE_TABLE_SIZE
+  +------------------+
+  | KASAN shadow map |  KASAN_SHADOW_START       0x80400000  KASAN_SHADOW_SIZE
+  +------------------+                           0x8e400000
+  +------------------+
+  | VMALLOC area     |  VMALLOC_START            0xa0000000  128MB - 64KB
+  +------------------+  VMALLOC_END
+  | Cache aliasing   |  TLBTEMP_BASE_1           0xa7ff0000  DCACHE_WAY_SIZE
+  | remap area 1     |
+  +------------------+
+  | Cache aliasing   |  TLBTEMP_BASE_2                       DCACHE_WAY_SIZE
+  | remap area 2     |
+  +------------------+
+  +------------------+
+  | KMAP area        |  PKMAP_BASE                           PTRS_PER_PTE *
+  |                  |                                       DCACHE_N_COLORS *
+  |                  |                                       PAGE_SIZE
+  |                  |                                       (4MB * DCACHE_N_COLORS)
+  +------------------+
+  | Atomic KMAP area |  FIXADDR_START                        KM_TYPE_NR *
+  |                  |                                       NR_CPUS *
+  |                  |                                       DCACHE_N_COLORS *
+  |                  |                                       PAGE_SIZE
+  +------------------+  FIXADDR_TOP              0xaffff000
+  +------------------+
+  | Cached KSEG      |  XCHAL_KSEG_CACHED_VADDR  0xb0000000  256MB
+  +------------------+
+  | Uncached KSEG    |  XCHAL_KSEG_BYPASS_VADDR  0xc0000000  256MB
+  +------------------+
+  +------------------+
+  | Cached KIO       |  XCHAL_KIO_CACHED_VADDR   0xe0000000  256MB
+  +------------------+
+  | Uncached KIO     |  XCHAL_KIO_BYPASS_VADDR   0xf0000000  256MB
+  +------------------+
+
+
+512MB cached + 512MB uncached layout::
+
+                        Symbol                   VADDR       Size
+  +------------------+
+  | Userspace        |                           0x00000000  TASK_SIZE
+  +------------------+                           0x40000000
+  +------------------+
+  | Page table       |  XCHAL_PAGE_TABLE_VADDR   0x80000000  XCHAL_PAGE_TABLE_SIZE
+  +------------------+
+  | KASAN shadow map |  KASAN_SHADOW_START       0x80400000  KASAN_SHADOW_SIZE
+  +------------------+                           0x8e400000
+  +------------------+
+  | VMALLOC area     |  VMALLOC_START            0x90000000  128MB - 64KB
+  +------------------+  VMALLOC_END
+  | Cache aliasing   |  TLBTEMP_BASE_1           0x97ff0000  DCACHE_WAY_SIZE
+  | remap area 1     |
+  +------------------+
+  | Cache aliasing   |  TLBTEMP_BASE_2                       DCACHE_WAY_SIZE
+  | remap area 2     |
+  +------------------+
+  +------------------+
+  | KMAP area        |  PKMAP_BASE                           PTRS_PER_PTE *
+  |                  |                                       DCACHE_N_COLORS *
+  |                  |                                       PAGE_SIZE
+  |                  |                                       (4MB * DCACHE_N_COLORS)
+  +------------------+
+  | Atomic KMAP area |  FIXADDR_START                        KM_TYPE_NR *
+  |                  |                                       NR_CPUS *
+  |                  |                                       DCACHE_N_COLORS *
+  |                  |                                       PAGE_SIZE
+  +------------------+  FIXADDR_TOP              0x9ffff000
+  +------------------+
+  | Cached KSEG      |  XCHAL_KSEG_CACHED_VADDR  0xa0000000  512MB
+  +------------------+
+  | Uncached KSEG    |  XCHAL_KSEG_BYPASS_VADDR  0xc0000000  512MB
+  +------------------+
+  | Cached KIO       |  XCHAL_KIO_CACHED_VADDR   0xe0000000  256MB
+  +------------------+
+  | Uncached KIO     |  XCHAL_KIO_BYPASS_VADDR   0xf0000000  256MB
+  +------------------+
diff --git a/Documentation/xtensa/mmu.txt b/Documentation/xtensa/mmu.txt
deleted file mode 100644
index 318114de63f3..000000000000
--- a/Documentation/xtensa/mmu.txt
+++ /dev/null
@@ -1,189 +0,0 @@
-MMUv3 initialization sequence.
-
-The code in the initialize_mmu macro sets up MMUv3 memory mapping
-identically to MMUv2 fixed memory mapping. Depending on
-CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX symbol this code is
-located in addresses it was linked for (symbol undefined), or not
-(symbol defined), so it needs to be position-independent.
-
-The code has the following assumptions:
-  This code fragment is run only on an MMU v3.
-  TLBs are in their reset state.
-  ITLBCFG and DTLBCFG are zero (reset state).
-  RASID is 0x04030201 (reset state).
-  PS.RING is zero (reset state).
-  LITBASE is zero (reset state, PC-relative literals); required to be PIC.
-
-TLB setup proceeds along the following steps.
-
-  Legend:
-    VA = virtual address (two upper nibbles of it);
-    PA = physical address (two upper nibbles of it);
-    pc = physical range that contains this code;
-
-After step 2, we jump to virtual address in the range 0x40000000..0x5fffffff
-or 0x00000000..0x1fffffff, depending on whether the kernel was loaded below
-0x40000000 or above. That address corresponds to next instruction to execute
-in this code. After step 4, we jump to intended (linked) address of this code.
-The scheme below assumes that the kernel is loaded below 0x40000000.
-
-        Step0  Step1  Step2  Step3          Step4  Step5
-        =====  =====  =====  =====          =====  =====
-   VA      PA     PA     PA     PA     VA      PA     PA
- ------    --     --     --     --   ------    --     --
- E0..FF -> E0  -> E0  -> E0          F0..FF -> F0  -> F0
- C0..DF -> C0  -> C0  -> C0          E0..EF -> F0  -> F0
- A0..BF -> A0  -> A0  -> A0          D8..DF -> 00  -> 00
- 80..9F -> 80  -> 80  -> 80          D0..D7 -> 00  -> 00
- 60..7F -> 60  -> 60  -> 60
- 40..5F -> 40         -> pc  -> pc   40..5F -> pc
- 20..3F -> 20  -> 20  -> 20
- 00..1F -> 00  -> 00  -> 00
-
-The default location of IO peripherals is above 0xf0000000. This may be changed
-using a "ranges" property in a device tree simple-bus node. See the Devicetree
-Specification, section 4.5 for details on the syntax and semantics of
-simple-bus nodes. The following limitations apply:
-
-1. Only top level simple-bus nodes are considered
-
-2. Only one (first) simple-bus node is considered
-
-3. Empty "ranges" properties are not supported
-
-4. Only the first triplet in the "ranges" property is considered
-
-5. The parent-bus-address value is rounded down to the nearest 256MB boundary
-
-6. The IO area covers the entire 256MB segment of parent-bus-address; the
-   "ranges" triplet length field is ignored
-
-
-MMUv3 address space layouts.
-============================
-
-Default MMUv2-compatible layout.
-
-                      Symbol                   VADDR       Size
-+------------------+
-| Userspace        |                           0x00000000  TASK_SIZE
-+------------------+                           0x40000000
-+------------------+
-| Page table       |  XCHAL_PAGE_TABLE_VADDR   0x80000000  XCHAL_PAGE_TABLE_SIZE
-+------------------+
-| KASAN shadow map |  KASAN_SHADOW_START       0x80400000  KASAN_SHADOW_SIZE
-+------------------+                           0x8e400000
-+------------------+
-| VMALLOC area     |  VMALLOC_START            0xc0000000  128MB - 64KB
-+------------------+  VMALLOC_END
-| Cache aliasing   |  TLBTEMP_BASE_1           0xc7ff0000  DCACHE_WAY_SIZE
-| remap area 1     |
-+------------------+
-| Cache aliasing   |  TLBTEMP_BASE_2                       DCACHE_WAY_SIZE
-| remap area 2     |
-+------------------+
-+------------------+
-| KMAP area        |  PKMAP_BASE                           PTRS_PER_PTE *
-|                  |                                       DCACHE_N_COLORS *
-|                  |                                       PAGE_SIZE
-|                  |                                       (4MB * DCACHE_N_COLORS)
-+------------------+
-| Atomic KMAP area |  FIXADDR_START                        KM_TYPE_NR *
-|                  |                                       NR_CPUS *
-|                  |                                       DCACHE_N_COLORS *
-|                  |                                       PAGE_SIZE
-+------------------+  FIXADDR_TOP              0xcffff000
-+------------------+
-| Cached KSEG      |  XCHAL_KSEG_CACHED_VADDR  0xd0000000  128MB
-+------------------+
-| Uncached KSEG    |  XCHAL_KSEG_BYPASS_VADDR  0xd8000000  128MB
-+------------------+
-| Cached KIO       |  XCHAL_KIO_CACHED_VADDR   0xe0000000  256MB
-+------------------+
-| Uncached KIO     |  XCHAL_KIO_BYPASS_VADDR   0xf0000000  256MB
-+------------------+
-
-
-256MB cached + 256MB uncached layout.
-
-                      Symbol                   VADDR       Size
-+------------------+
-| Userspace        |                           0x00000000  TASK_SIZE
-+------------------+                           0x40000000
-+------------------+
-| Page table       |  XCHAL_PAGE_TABLE_VADDR   0x80000000  XCHAL_PAGE_TABLE_SIZE
-+------------------+
-| KASAN shadow map |  KASAN_SHADOW_START       0x80400000  KASAN_SHADOW_SIZE
-+------------------+                           0x8e400000
-+------------------+
-| VMALLOC area     |  VMALLOC_START            0xa0000000  128MB - 64KB
-+------------------+  VMALLOC_END
-| Cache aliasing   |  TLBTEMP_BASE_1           0xa7ff0000  DCACHE_WAY_SIZE
-| remap area 1     |
-+------------------+
-| Cache aliasing   |  TLBTEMP_BASE_2                       DCACHE_WAY_SIZE
-| remap area 2     |
-+------------------+
-+------------------+
-| KMAP area        |  PKMAP_BASE                           PTRS_PER_PTE *
-|                  |                                       DCACHE_N_COLORS *
-|                  |                                       PAGE_SIZE
-|                  |                                       (4MB * DCACHE_N_COLORS)
-+------------------+
-| Atomic KMAP area |  FIXADDR_START                        KM_TYPE_NR *
-|                  |                                       NR_CPUS *
-|                  |                                       DCACHE_N_COLORS *
-|                  |                                       PAGE_SIZE
-+------------------+  FIXADDR_TOP              0xaffff000
-+------------------+
-| Cached KSEG      |  XCHAL_KSEG_CACHED_VADDR  0xb0000000  256MB
-+------------------+
-| Uncached KSEG    |  XCHAL_KSEG_BYPASS_VADDR  0xc0000000  256MB
-+------------------+
-+------------------+
-| Cached KIO       |  XCHAL_KIO_CACHED_VADDR   0xe0000000  256MB
-+------------------+
-| Uncached KIO     |  XCHAL_KIO_BYPASS_VADDR   0xf0000000  256MB
-+------------------+
-
-
-512MB cached + 512MB uncached layout.
-
-                      Symbol                   VADDR       Size
-+------------------+
-| Userspace        |                           0x00000000  TASK_SIZE
-+------------------+                           0x40000000
-+------------------+
-| Page table       |  XCHAL_PAGE_TABLE_VADDR   0x80000000  XCHAL_PAGE_TABLE_SIZE
-+------------------+
-| KASAN shadow map |  KASAN_SHADOW_START       0x80400000  KASAN_SHADOW_SIZE
-+------------------+                           0x8e400000
-+------------------+
-| VMALLOC area     |  VMALLOC_START            0x90000000  128MB - 64KB
-+------------------+  VMALLOC_END
-| Cache aliasing   |  TLBTEMP_BASE_1           0x97ff0000  DCACHE_WAY_SIZE
-| remap area 1     |
-+------------------+
-| Cache aliasing   |  TLBTEMP_BASE_2                       DCACHE_WAY_SIZE
-| remap area 2     |
-+------------------+
-+------------------+
-| KMAP area        |  PKMAP_BASE                           PTRS_PER_PTE *
-|                  |                                       DCACHE_N_COLORS *
-|                  |                                       PAGE_SIZE
-|                  |                                       (4MB * DCACHE_N_COLORS)
-+------------------+
-| Atomic KMAP area |  FIXADDR_START                        KM_TYPE_NR *
-|                  |                                       NR_CPUS *
-|                  |                                       DCACHE_N_COLORS *
-|                  |                                       PAGE_SIZE
-+------------------+  FIXADDR_TOP              0x9ffff000
-+------------------+
-| Cached KSEG      |  XCHAL_KSEG_CACHED_VADDR  0xa0000000  512MB
-+------------------+
-| Uncached KSEG    |  XCHAL_KSEG_BYPASS_VADDR  0xc0000000  512MB
-+------------------+
-| Cached KIO       |  XCHAL_KIO_CACHED_VADDR   0xe0000000  256MB
-+------------------+
-| Uncached KIO     |  XCHAL_KIO_BYPASS_VADDR   0xf0000000  256MB
-+------------------+
-- 
cgit 


From f408510c4ff38965289bb53e8462861ad05dfada Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 14:44:06 -0300
Subject: docs: mmc: convert to ReST

Rename the mmc documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/mmc/index.rst         | 13 +++++
 Documentation/mmc/mmc-async-req.rst | 98 +++++++++++++++++++++++++++++++++++++
 Documentation/mmc/mmc-async-req.txt | 87 --------------------------------
 Documentation/mmc/mmc-dev-attrs.rst | 91 ++++++++++++++++++++++++++++++++++
 Documentation/mmc/mmc-dev-attrs.txt | 77 -----------------------------
 Documentation/mmc/mmc-dev-parts.rst | 41 ++++++++++++++++
 Documentation/mmc/mmc-dev-parts.txt | 40 ---------------
 Documentation/mmc/mmc-tools.rst     | 37 ++++++++++++++
 Documentation/mmc/mmc-tools.txt     | 34 -------------
 9 files changed, 280 insertions(+), 238 deletions(-)
 create mode 100644 Documentation/mmc/index.rst
 create mode 100644 Documentation/mmc/mmc-async-req.rst
 delete mode 100644 Documentation/mmc/mmc-async-req.txt
 create mode 100644 Documentation/mmc/mmc-dev-attrs.rst
 delete mode 100644 Documentation/mmc/mmc-dev-attrs.txt
 create mode 100644 Documentation/mmc/mmc-dev-parts.rst
 delete mode 100644 Documentation/mmc/mmc-dev-parts.txt
 create mode 100644 Documentation/mmc/mmc-tools.rst
 delete mode 100644 Documentation/mmc/mmc-tools.txt

(limited to 'Documentation')

diff --git a/Documentation/mmc/index.rst b/Documentation/mmc/index.rst
new file mode 100644
index 000000000000..3305478ddadb
--- /dev/null
+++ b/Documentation/mmc/index.rst
@@ -0,0 +1,13 @@
+:orphan:
+
+========================
+MMC/SD/SDIO card support
+========================
+
+.. toctree::
+   :maxdepth: 1
+
+   mmc-dev-attrs
+   mmc-dev-parts
+   mmc-async-req
+   mmc-tools
diff --git a/Documentation/mmc/mmc-async-req.rst b/Documentation/mmc/mmc-async-req.rst
new file mode 100644
index 000000000000..0f7197c9c3b5
--- /dev/null
+++ b/Documentation/mmc/mmc-async-req.rst
@@ -0,0 +1,98 @@
+========================
+MMC Asynchronous Request
+========================
+
+Rationale
+=========
+
+How significant is the cache maintenance overhead?
+
+It depends. Fast eMMC and multiple cache levels with speculative cache
+pre-fetch makes the cache overhead relatively significant. If the DMA
+preparations for the next request are done in parallel with the current
+transfer, the DMA preparation overhead would not affect the MMC performance.
+
+The intention of non-blocking (asynchronous) MMC requests is to minimize the
+time between when an MMC request ends and another MMC request begins.
+
+Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and
+dma_unmap_sg are processing. Using non-blocking MMC requests makes it
+possible to prepare the caches for next job in parallel with an active
+MMC request.
+
+MMC block driver
+================
+
+The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking.
+
+The increase in throughput is proportional to the time it takes to
+prepare (major part of preparations are dma_map_sg() and dma_unmap_sg())
+a request and how fast the memory is. The faster the MMC/SD is the
+more significant the prepare request time becomes. Roughly the expected
+performance gain is 5% for large writes and 10% on large reads on a L2 cache
+platform. In power save mode, when clocks run on a lower frequency, the DMA
+preparation may cost even more. As long as these slower preparations are run
+in parallel with the transfer performance won't be affected.
+
+Details on measurements from IOZone and mmc_test
+================================================
+
+https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req
+
+MMC core API extension
+======================
+
+There is one new public function mmc_start_req().
+
+It starts a new MMC command request for a host. The function isn't
+truly non-blocking. If there is an ongoing async request it waits
+for completion of that request and starts the new one and returns. It
+doesn't wait for the new request to complete. If there is no ongoing
+request it starts the new request and returns immediately.
+
+MMC host extensions
+===================
+
+There are two optional members in the mmc_host_ops -- pre_req() and
+post_req() -- that the host driver may implement in order to move work
+to before and after the actual mmc_host_ops.request() function is called.
+
+In the DMA case pre_req() may do dma_map_sg() and prepare the DMA
+descriptor, and post_req() runs the dma_unmap_sg().
+
+Optimize for the first request
+==============================
+
+The first request in a series of requests can't be prepared in parallel
+with the previous transfer, since there is no previous request.
+
+The argument is_first_req in pre_req() indicates that there is no previous
+request. The host driver may optimize for this scenario to minimize
+the performance loss. A way to optimize for this is to split the current
+request in two chunks, prepare the first chunk and start the request,
+and finally prepare the second chunk and start the transfer.
+
+Pseudocode to handle is_first_req scenario with minimal prepare overhead::
+
+  if (is_first_req && req->size > threshold)
+     /* start MMC transfer for the complete transfer size */
+     mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
+
+     /*
+      * Begin to prepare DMA while cmd is being processed by MMC.
+      * The first chunk of the request should take the same time
+      * to prepare as the "MMC process command time".
+      * If prepare time exceeds MMC cmd time
+      * the transfer is delayed, guesstimate max 4k as first chunk size.
+      */
+      prepare_1st_chunk_for_dma(req);
+      /* flush pending desc to the DMAC (dmaengine.h) */
+      dma_issue_pending(req->dma_desc);
+
+      prepare_2nd_chunk_for_dma(req);
+      /*
+       * The second issue_pending should be called before MMC runs out
+       * of the first chunk. If the MMC runs out of the first data chunk
+       * before this call, the transfer is delayed.
+       */
+      dma_issue_pending(req->dma_desc);
diff --git a/Documentation/mmc/mmc-async-req.txt b/Documentation/mmc/mmc-async-req.txt
deleted file mode 100644
index ae1907b10e4a..000000000000
--- a/Documentation/mmc/mmc-async-req.txt
+++ /dev/null
@@ -1,87 +0,0 @@
-Rationale
-=========
-
-How significant is the cache maintenance overhead?
-It depends. Fast eMMC and multiple cache levels with speculative cache
-pre-fetch makes the cache overhead relatively significant. If the DMA
-preparations for the next request are done in parallel with the current
-transfer, the DMA preparation overhead would not affect the MMC performance.
-The intention of non-blocking (asynchronous) MMC requests is to minimize the
-time between when an MMC request ends and another MMC request begins.
-Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and
-dma_unmap_sg are processing. Using non-blocking MMC requests makes it
-possible to prepare the caches for next job in parallel with an active
-MMC request.
-
-MMC block driver
-================
-
-The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking.
-The increase in throughput is proportional to the time it takes to
-prepare (major part of preparations are dma_map_sg() and dma_unmap_sg())
-a request and how fast the memory is. The faster the MMC/SD is the
-more significant the prepare request time becomes. Roughly the expected
-performance gain is 5% for large writes and 10% on large reads on a L2 cache
-platform. In power save mode, when clocks run on a lower frequency, the DMA
-preparation may cost even more. As long as these slower preparations are run
-in parallel with the transfer performance won't be affected.
-
-Details on measurements from IOZone and mmc_test
-================================================
-
-https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req
-
-MMC core API extension
-======================
-
-There is one new public function mmc_start_req().
-It starts a new MMC command request for a host. The function isn't
-truly non-blocking. If there is an ongoing async request it waits
-for completion of that request and starts the new one and returns. It
-doesn't wait for the new request to complete. If there is no ongoing
-request it starts the new request and returns immediately.
-
-MMC host extensions
-===================
-
-There are two optional members in the mmc_host_ops -- pre_req() and
-post_req() -- that the host driver may implement in order to move work
-to before and after the actual mmc_host_ops.request() function is called.
-In the DMA case pre_req() may do dma_map_sg() and prepare the DMA
-descriptor, and post_req() runs the dma_unmap_sg().
-
-Optimize for the first request
-==============================
-
-The first request in a series of requests can't be prepared in parallel
-with the previous transfer, since there is no previous request.
-The argument is_first_req in pre_req() indicates that there is no previous
-request. The host driver may optimize for this scenario to minimize
-the performance loss. A way to optimize for this is to split the current
-request in two chunks, prepare the first chunk and start the request,
-and finally prepare the second chunk and start the transfer.
-
-Pseudocode to handle is_first_req scenario with minimal prepare overhead:
-
-if (is_first_req && req->size > threshold)
-   /* start MMC transfer for the complete transfer size */
-   mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
-
-   /*
-    * Begin to prepare DMA while cmd is being processed by MMC.
-    * The first chunk of the request should take the same time
-    * to prepare as the "MMC process command time".
-    * If prepare time exceeds MMC cmd time
-    * the transfer is delayed, guesstimate max 4k as first chunk size.
-    */
-    prepare_1st_chunk_for_dma(req);
-    /* flush pending desc to the DMAC (dmaengine.h) */
-    dma_issue_pending(req->dma_desc);
-
-    prepare_2nd_chunk_for_dma(req);
-    /*
-     * The second issue_pending should be called before MMC runs out
-     * of the first chunk. If the MMC runs out of the first data chunk
-     * before this call, the transfer is delayed.
-     */
-    dma_issue_pending(req->dma_desc);
diff --git a/Documentation/mmc/mmc-dev-attrs.rst b/Documentation/mmc/mmc-dev-attrs.rst
new file mode 100644
index 000000000000..4f44b1b730d6
--- /dev/null
+++ b/Documentation/mmc/mmc-dev-attrs.rst
@@ -0,0 +1,91 @@
+==================================
+SD and MMC Block Device Attributes
+==================================
+
+These attributes are defined for the block devices associated with the
+SD or MMC device.
+
+The following attributes are read/write.
+
+	========		===============================================
+	force_ro		Enforce read-only access even if write protect 					switch is off.
+	========		===============================================
+
+SD and MMC Device Attributes
+============================
+
+All attributes are read-only.
+
+	======================	===============================================
+	cid			Card Identification Register
+	csd			Card Specific Data Register
+	scr			SD Card Configuration Register (SD only)
+	date			Manufacturing Date (from CID Register)
+	fwrev			Firmware/Product Revision (from CID Register)
+				(SD and MMCv1 only)
+	hwrev			Hardware/Product Revision (from CID Register)
+				(SD and MMCv1 only)
+	manfid			Manufacturer ID (from CID Register)
+	name			Product Name (from CID Register)
+	oemid			OEM/Application ID (from CID Register)
+	prv			Product Revision (from CID Register)
+				(SD and MMCv4 only)
+	serial			Product Serial Number (from CID Register)
+	erase_size		Erase group size
+	preferred_erase_size	Preferred erase size
+	raw_rpmb_size_mult	RPMB partition size
+	rel_sectors		Reliable write sector count
+	ocr 			Operation Conditions Register
+	dsr			Driver Stage Register
+	cmdq_en			Command Queue enabled:
+
+					1 => enabled, 0 => not enabled
+	======================	===============================================
+
+Note on Erase Size and Preferred Erase Size:
+
+	"erase_size" is the  minimum size, in bytes, of an erase
+	operation.  For MMC, "erase_size" is the erase group size
+	reported by the card.  Note that "erase_size" does not apply
+	to trim or secure trim operations where the minimum size is
+	always one 512 byte sector.  For SD, "erase_size" is 512
+	if the card is block-addressed, 0 otherwise.
+
+	SD/MMC cards can erase an arbitrarily large area up to and
+	including the whole card.  When erasing a large area it may
+	be desirable to do it in smaller chunks for three reasons:
+
+	     1. A single erase command will make all other I/O on
+		the card wait.  This is not a problem if the whole card
+		is being erased, but erasing one partition will make
+		I/O for another partition on the same card wait for the
+		duration of the erase - which could be a several
+		minutes.
+	     2. To be able to inform the user of erase progress.
+	     3. The erase timeout becomes too large to be very
+		useful.  Because the erase timeout contains a margin
+		which is multiplied by the size of the erase area,
+		the value can end up being several minutes for large
+		areas.
+
+	"erase_size" is not the most efficient unit to erase
+	(especially for SD where it is just one sector),
+	hence "preferred_erase_size" provides a good chunk
+	size for erasing large areas.
+
+	For MMC, "preferred_erase_size" is the high-capacity
+	erase size if a card specifies one, otherwise it is
+	based on the capacity of the card.
+
+	For SD, "preferred_erase_size" is the allocation unit
+	size specified by the card.
+
+	"preferred_erase_size" is in bytes.
+
+Note on raw_rpmb_size_mult:
+
+	"raw_rpmb_size_mult" is a multiple of 128kB block.
+
+	RPMB size in byte is calculated by using the following equation:
+
+		RPMB partition size = 128kB x raw_rpmb_size_mult
diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt
deleted file mode 100644
index 4ad0bb17f343..000000000000
--- a/Documentation/mmc/mmc-dev-attrs.txt
+++ /dev/null
@@ -1,77 +0,0 @@
-SD and MMC Block Device Attributes
-==================================
-
-These attributes are defined for the block devices associated with the
-SD or MMC device.
-
-The following attributes are read/write.
-
-	force_ro		Enforce read-only access even if write protect switch is off.
-
-SD and MMC Device Attributes
-============================
-
-All attributes are read-only.
-
-	cid			Card Identification Register
-	csd			Card Specific Data Register
-	scr			SD Card Configuration Register (SD only)
-	date			Manufacturing Date (from CID Register)
-	fwrev			Firmware/Product Revision (from CID Register) (SD and MMCv1 only)
-	hwrev			Hardware/Product Revision (from CID Register) (SD and MMCv1 only)
-	manfid			Manufacturer ID (from CID Register)
-	name			Product Name (from CID Register)
-	oemid			OEM/Application ID (from CID Register)
-	prv			Product Revision (from CID Register) (SD and MMCv4 only)
-	serial			Product Serial Number (from CID Register)
-	erase_size		Erase group size
-	preferred_erase_size	Preferred erase size
-	raw_rpmb_size_mult	RPMB partition size
-	rel_sectors		Reliable write sector count
-	ocr 			Operation Conditions Register
-	dsr			Driver Stage Register
-	cmdq_en			Command Queue enabled: 1 => enabled, 0 => not enabled
-
-Note on Erase Size and Preferred Erase Size:
-
-	"erase_size" is the  minimum size, in bytes, of an erase
-	operation.  For MMC, "erase_size" is the erase group size
-	reported by the card.  Note that "erase_size" does not apply
-	to trim or secure trim operations where the minimum size is
-	always one 512 byte sector.  For SD, "erase_size" is 512
-	if the card is block-addressed, 0 otherwise.
-
-	SD/MMC cards can erase an arbitrarily large area up to and
-	including the whole card.  When erasing a large area it may
-	be desirable to do it in smaller chunks for three reasons:
-		1. A single erase command will make all other I/O on
-		the card wait.  This is not a problem if the whole card
-		is being erased, but erasing one partition will make
-		I/O for another partition on the same card wait for the
-		duration of the erase - which could be a several
-		minutes.
-		2. To be able to inform the user of erase progress.
-		3. The erase timeout becomes too large to be very
-		useful.  Because the erase timeout contains a margin
-		which is multiplied by the size of the erase area,
-		the value can end up being several minutes for large
-		areas.
-
-	"erase_size" is not the most efficient unit to erase
-	(especially for SD where it is just one sector),
-	hence "preferred_erase_size" provides a good chunk
-	size for erasing large areas.
-
-	For MMC, "preferred_erase_size" is the high-capacity
-	erase size if a card specifies one, otherwise it is
-	based on the capacity of the card.
-
-	For SD, "preferred_erase_size" is the allocation unit
-	size specified by the card.
-
-	"preferred_erase_size" is in bytes.
-
-Note on raw_rpmb_size_mult:
-	"raw_rpmb_size_mult" is a multiple of 128kB block.
-	RPMB size in byte is calculated by using the following equation:
-	RPMB partition size = 128kB x raw_rpmb_size_mult
diff --git a/Documentation/mmc/mmc-dev-parts.rst b/Documentation/mmc/mmc-dev-parts.rst
new file mode 100644
index 000000000000..995922f1f744
--- /dev/null
+++ b/Documentation/mmc/mmc-dev-parts.rst
@@ -0,0 +1,41 @@
+============================
+SD and MMC Device Partitions
+============================
+
+Device partitions are additional logical block devices present on the
+SD/MMC device.
+
+As of this writing, MMC boot partitions as supported and exposed as
+/dev/mmcblkXboot0 and /dev/mmcblkXboot1, where X is the index of the
+parent /dev/mmcblkX.
+
+MMC Boot Partitions
+===================
+
+Read and write access is provided to the two MMC boot partitions. Due to
+the sensitive nature of the boot partition contents, which often store
+a bootloader or bootloader configuration tables crucial to booting the
+platform, write access is disabled by default to reduce the chance of
+accidental bricking.
+
+To enable write access to /dev/mmcblkXbootY, disable the forced read-only
+access with::
+
+	echo 0 > /sys/block/mmcblkXbootY/force_ro
+
+To re-enable read-only access::
+
+	echo 1 > /sys/block/mmcblkXbootY/force_ro
+
+The boot partitions can also be locked read only until the next power on,
+with::
+
+	echo 1 > /sys/block/mmcblkXbootY/ro_lock_until_next_power_on
+
+This is a feature of the card and not of the kernel. If the card does
+not support boot partition locking, the file will not exist. If the
+feature has been disabled on the card, the file will be read-only.
+
+The boot partitions can also be locked permanently, but this feature is
+not accessible through sysfs in order to avoid accidental or malicious
+bricking.
diff --git a/Documentation/mmc/mmc-dev-parts.txt b/Documentation/mmc/mmc-dev-parts.txt
deleted file mode 100644
index f08d078d43cf..000000000000
--- a/Documentation/mmc/mmc-dev-parts.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-SD and MMC Device Partitions
-============================
-
-Device partitions are additional logical block devices present on the
-SD/MMC device.
-
-As of this writing, MMC boot partitions as supported and exposed as
-/dev/mmcblkXboot0 and /dev/mmcblkXboot1, where X is the index of the
-parent /dev/mmcblkX.
-
-MMC Boot Partitions
-===================
-
-Read and write access is provided to the two MMC boot partitions. Due to
-the sensitive nature of the boot partition contents, which often store
-a bootloader or bootloader configuration tables crucial to booting the
-platform, write access is disabled by default to reduce the chance of
-accidental bricking.
-
-To enable write access to /dev/mmcblkXbootY, disable the forced read-only
-access with:
-
-echo 0 > /sys/block/mmcblkXbootY/force_ro
-
-To re-enable read-only access:
-
-echo 1 > /sys/block/mmcblkXbootY/force_ro
-
-The boot partitions can also be locked read only until the next power on,
-with:
-
-echo 1 > /sys/block/mmcblkXbootY/ro_lock_until_next_power_on
-
-This is a feature of the card and not of the kernel. If the card does
-not support boot partition locking, the file will not exist. If the
-feature has been disabled on the card, the file will be read-only.
-
-The boot partitions can also be locked permanently, but this feature is
-not accessible through sysfs in order to avoid accidental or malicious
-bricking.
diff --git a/Documentation/mmc/mmc-tools.rst b/Documentation/mmc/mmc-tools.rst
new file mode 100644
index 000000000000..54406093768b
--- /dev/null
+++ b/Documentation/mmc/mmc-tools.rst
@@ -0,0 +1,37 @@
+======================
+MMC tools introduction
+======================
+
+There is one MMC test tools called mmc-utils, which is maintained by Chris Ball,
+you can find it at the below public git repository:
+
+	http://git.kernel.org/cgit/linux/kernel/git/cjb/mmc-utils.git/
+
+Functions
+=========
+
+The mmc-utils tools can do the following:
+
+ - Print and parse extcsd data.
+ - Determine the eMMC writeprotect status.
+ - Set the eMMC writeprotect status.
+ - Set the eMMC data sector size to 4KB by disabling emulation.
+ - Create general purpose partition.
+ - Enable the enhanced user area.
+ - Enable write reliability per partition.
+ - Print the response to STATUS_SEND (CMD13).
+ - Enable the boot partition.
+ - Set Boot Bus Conditions.
+ - Enable the eMMC BKOPS feature.
+ - Permanently enable the eMMC H/W Reset feature.
+ - Permanently disable the eMMC H/W Reset feature.
+ - Send Sanitize command.
+ - Program authentication key for the device.
+ - Counter value for the rpmb device will be read to stdout.
+ - Read from rpmb device to output.
+ - Write to rpmb device from data file.
+ - Enable the eMMC cache feature.
+ - Disable the eMMC cache feature.
+ - Print and parse CID data.
+ - Print and parse CSD data.
+ - Print and parse SCR data.
diff --git a/Documentation/mmc/mmc-tools.txt b/Documentation/mmc/mmc-tools.txt
deleted file mode 100644
index 735509c165d5..000000000000
--- a/Documentation/mmc/mmc-tools.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-MMC tools introduction
-======================
-
-There is one MMC test tools called mmc-utils, which is maintained by Chris Ball,
-you can find it at the below public git repository:
-http://git.kernel.org/cgit/linux/kernel/git/cjb/mmc-utils.git/
-
-Functions
-=========
-
-The mmc-utils tools can do the following:
- - Print and parse extcsd data.
- - Determine the eMMC writeprotect status.
- - Set the eMMC writeprotect status.
- - Set the eMMC data sector size to 4KB by disabling emulation.
- - Create general purpose partition.
- - Enable the enhanced user area.
- - Enable write reliability per partition.
- - Print the response to STATUS_SEND (CMD13).
- - Enable the boot partition.
- - Set Boot Bus Conditions.
- - Enable the eMMC BKOPS feature.
- - Permanently enable the eMMC H/W Reset feature.
- - Permanently disable the eMMC H/W Reset feature.
- - Send Sanitize command.
- - Program authentication key for the device.
- - Counter value for the rpmb device will be read to stdout.
- - Read from rpmb device to output.
- - Write to rpmb device from data file.
- - Enable the eMMC cache feature.
- - Disable the eMMC cache feature.
- - Print and parse CID data.
- - Print and parse CSD data.
- - Print and parse SCR data.
-- 
cgit 


From 08536105d93fe371743709b85350db141bafc51f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 11:21:26 -0300
Subject: docs: ioctl-number.txt: convert it to ReST format

The conversion itself is simple: add a markup for the
title of this file and add markups for both tables.

Yet, the big table here with IOCTL numbers is badly formatted:
on several lines, the "Include File" column has some values that
are bigger than the reserved space there.

Also, on several places, a comment was misplaced at the "Include
File" space.

So, most of the work here is to actually ensure that each field
will be properly fixed.

Also worth to mention that some URLs have the asterisk character
on it. Well, Sphinx has an issue with asterisks in the middle
of an string. As this is URL, use the alternate format: %2A.

As a side effect of this patch, it is now a lot easier to see that
some reserved ioctl numbers are missing the include files
where it is supposed to be used.

PS.: While this is part of a subdir, I opted to convert this
single file alone, as this file has a potential of conflicts,
as most subsystem maintainers touch it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/ioctl/ioctl-number.rst               | 363 +++++++++++++++++++++
 Documentation/ioctl/ioctl-number.txt               | 351 --------------------
 Documentation/process/submit-checklist.rst         |   2 +-
 .../it_IT/process/submit-checklist.rst             |   2 +-
 .../zh_CN/process/submit-checklist.rst             |   2 +-
 5 files changed, 366 insertions(+), 354 deletions(-)
 create mode 100644 Documentation/ioctl/ioctl-number.rst
 delete mode 100644 Documentation/ioctl/ioctl-number.txt

(limited to 'Documentation')

diff --git a/Documentation/ioctl/ioctl-number.rst b/Documentation/ioctl/ioctl-number.rst
new file mode 100644
index 000000000000..fcf9623a599f
--- /dev/null
+++ b/Documentation/ioctl/ioctl-number.rst
@@ -0,0 +1,363 @@
+:orphan:
+
+=============
+Ioctl Numbers
+=============
+
+19 October 1999
+
+Michael Elizabeth Chastain
+<mec@shout.net>
+
+If you are adding new ioctl's to the kernel, you should use the _IO
+macros defined in <linux/ioctl.h>:
+
+    ====== == ============================================
+    _IO    an ioctl with no parameters
+    _IOW   an ioctl with write parameters (copy_from_user)
+    _IOR   an ioctl with read parameters  (copy_to_user)
+    _IOWR  an ioctl with both write and read parameters.
+    ====== == ============================================
+
+'Write' and 'read' are from the user's point of view, just like the
+system calls 'write' and 'read'.  For example, a SET_FOO ioctl would
+be _IOW, although the kernel would actually read data from user space;
+a GET_FOO ioctl would be _IOR, although the kernel would actually write
+data to user space.
+
+The first argument to _IO, _IOW, _IOR, or _IOWR is an identifying letter
+or number from the table below.  Because of the large number of drivers,
+many drivers share a partial letter with other drivers.
+
+If you are writing a driver for a new device and need a letter, pick an
+unused block with enough room for expansion: 32 to 256 ioctl commands.
+You can register the block by patching this file and submitting the
+patch to Linus Torvalds.  Or you can e-mail me at <mec@shout.net> and
+I'll register one for you.
+
+The second argument to _IO, _IOW, _IOR, or _IOWR is a sequence number
+to distinguish ioctls from each other.  The third argument to _IOW,
+_IOR, or _IOWR is the type of the data going into the kernel or coming
+out of the kernel (e.g.  'int' or 'struct foo').  NOTE!  Do NOT use
+sizeof(arg) as the third argument as this results in your ioctl thinking
+it passes an argument of type size_t.
+
+Some devices use their major number as the identifier; this is OK, as
+long as it is unique.  Some devices are irregular and don't follow any
+convention at all.
+
+Following this convention is good because:
+
+(1) Keeping the ioctl's globally unique helps error checking:
+    if a program calls an ioctl on the wrong device, it will get an
+    error rather than some unexpected behaviour.
+
+(2) The 'strace' build procedure automatically finds ioctl numbers
+    defined with _IO, _IOW, _IOR, or _IOWR.
+
+(3) 'strace' can decode numbers back into useful names when the
+    numbers are unique.
+
+(4) People looking for ioctls can grep for them more easily when
+    this convention is used to define the ioctl numbers.
+
+(5) When following the convention, the driver code can use generic
+    code to copy the parameters between user and kernel space.
+
+This table lists ioctls visible from user land for Linux/x86.  It contains
+most drivers up to 2.6.31, but I know I am missing some.  There has been
+no attempt to list non-X86 architectures or ioctls from drivers/staging/.
+
+====  =====  ======================================================= ================================================================
+Code  Seq#    Include File                                           Comments
+      (hex)
+====  =====  ======================================================= ================================================================
+0x00  00-1F  linux/fs.h                                              conflict!
+0x00  00-1F  scsi/scsi_ioctl.h                                       conflict!
+0x00  00-1F  linux/fb.h                                              conflict!
+0x00  00-1F  linux/wavefront.h                                       conflict!
+0x02  all    linux/fd.h
+0x03  all    linux/hdreg.h
+0x04  D2-DC  linux/umsdos_fs.h                                       Dead since 2.6.11, but don't reuse these.
+0x06  all    linux/lp.h
+0x09  all    linux/raid/md_u.h
+0x10  00-0F  drivers/char/s390/vmcp.h
+0x10  10-1F  arch/s390/include/uapi/sclp_ctl.h
+0x10  20-2F  arch/s390/include/uapi/asm/hypfs.h
+0x12  all    linux/fs.h
+             linux/blkpg.h
+0x1b  all                                                            InfiniBand Subsystem
+                                                                     <http://infiniband.sourceforge.net/>
+0x20  all    drivers/cdrom/cm206.h
+0x22  all    scsi/sg.h
+'!'   00-1F  uapi/linux/seccomp.h
+'#'   00-3F                                                          IEEE 1394 Subsystem
+                                                                     Block for the entire subsystem
+'$'   00-0F  linux/perf_counter.h, linux/perf_event.h
+'%'   00-0F  include/uapi/linux/stm.h                                System Trace Module subsystem
+                                                                     <mailto:alexander.shishkin@linux.intel.com>
+'&'   00-07  drivers/firewire/nosy-user.h
+'1'   00-1F  linux/timepps.h                                         PPS kit from Ulrich Windl
+                                                                     <ftp://ftp.de.kernel.org/pub/linux/daemons/ntp/PPS/>
+'2'   01-04  linux/i2o.h
+'3'   00-0F  drivers/s390/char/raw3270.h                             conflict!
+'3'   00-1F  linux/suspend_ioctls.h,                                 conflict!
+             kernel/power/user.c
+'8'   all                                                            SNP8023 advanced NIC card
+                                                                     <mailto:mcr@solidum.com>
+';'   64-7F  linux/vfio.h
+'@'   00-0F  linux/radeonfb.h                                        conflict!
+'@'   00-0F  drivers/video/aty/aty128fb.c                            conflict!
+'A'   00-1F  linux/apm_bios.h                                        conflict!
+'A'   00-0F  linux/agpgart.h,                                        conflict!
+             drivers/char/agp/compat_ioctl.h
+'A'   00-7F  sound/asound.h                                          conflict!
+'B'   00-1F  linux/cciss_ioctl.h                                     conflict!
+'B'   00-0F  include/linux/pmu.h                                     conflict!
+'B'   C0-FF  advanced bbus                                           <mailto:maassen@uni-freiburg.de>
+'C'   all    linux/soundcard.h                                       conflict!
+'C'   01-2F  linux/capi.h                                            conflict!
+'C'   F0-FF  drivers/net/wan/cosa.h                                  conflict!
+'D'   all    arch/s390/include/asm/dasd.h
+'D'   40-5F  drivers/scsi/dpt/dtpi_ioctl.h
+'D'   05     drivers/scsi/pmcraid.h
+'E'   all    linux/input.h                                           conflict!
+'E'   00-0F  xen/evtchn.h                                            conflict!
+'F'   all    linux/fb.h                                              conflict!
+'F'   01-02  drivers/scsi/pmcraid.h                                  conflict!
+'F'   20     drivers/video/fsl-diu-fb.h                              conflict!
+'F'   20     drivers/video/intelfb/intelfb.h                         conflict!
+'F'   20     linux/ivtvfb.h                                          conflict!
+'F'   20     linux/matroxfb.h                                        conflict!
+'F'   20     drivers/video/aty/atyfb_base.c                          conflict!
+'F'   00-0F  video/da8xx-fb.h                                        conflict!
+'F'   80-8F  linux/arcfb.h                                           conflict!
+'F'   DD     video/sstfb.h                                           conflict!
+'G'   00-3F  drivers/misc/sgi-gru/grulib.h                           conflict!
+'G'   00-0F  linux/gigaset_dev.h                                     conflict!
+'H'   00-7F  linux/hiddev.h                                          conflict!
+'H'   00-0F  linux/hidraw.h                                          conflict!
+'H'   01     linux/mei.h                                             conflict!
+'H'   02     linux/mei.h                                             conflict!
+'H'   03     linux/mei.h                                             conflict!
+'H'   00-0F  sound/asound.h                                          conflict!
+'H'   20-40  sound/asound_fm.h                                       conflict!
+'H'   80-8F  sound/sfnt_info.h                                       conflict!
+'H'   10-8F  sound/emu10k1.h                                         conflict!
+'H'   10-1F  sound/sb16_csp.h                                        conflict!
+'H'   10-1F  sound/hda_hwdep.h                                       conflict!
+'H'   40-4F  sound/hdspm.h                                           conflict!
+'H'   40-4F  sound/hdsp.h                                            conflict!
+'H'   90     sound/usb/usx2y/usb_stream.h
+'H'   A0     uapi/linux/usb/cdc-wdm.h
+'H'   C0-F0  net/bluetooth/hci.h                                     conflict!
+'H'   C0-DF  net/bluetooth/hidp/hidp.h                               conflict!
+'H'   C0-DF  net/bluetooth/cmtp/cmtp.h                               conflict!
+'H'   C0-DF  net/bluetooth/bnep/bnep.h                               conflict!
+'H'   F1     linux/hid-roccat.h                                      <mailto:erazor_de@users.sourceforge.net>
+'H'   F8-FA  sound/firewire.h
+'I'   all    linux/isdn.h                                            conflict!
+'I'   00-0F  drivers/isdn/divert/isdn_divert.h                       conflict!
+'I'   40-4F  linux/mISDNif.h                                         conflict!
+'J'   00-1F  drivers/scsi/gdth_ioctl.h
+'K'   all    linux/kd.h
+'L'   00-1F  linux/loop.h                                            conflict!
+'L'   10-1F  drivers/scsi/mpt3sas/mpt3sas_ctl.h                      conflict!
+'L'   20-2F  linux/lightnvm.h
+'L'   E0-FF  linux/ppdd.h                                            encrypted disk device driver
+                                                                     <http://linux01.gwdg.de/~alatham/ppdd.html>
+'M'   all    linux/soundcard.h                                       conflict!
+'M'   01-16  mtd/mtd-abi.h                                           conflict!
+      and    drivers/mtd/mtdchar.c
+'M'   01-03  drivers/scsi/megaraid/megaraid_sas.h
+'M'   00-0F  drivers/video/fsl-diu-fb.h                              conflict!
+'N'   00-1F  drivers/usb/scanner.h
+'N'   40-7F  drivers/block/nvme.c
+'O'   00-06  mtd/ubi-user.h                                          UBI
+'P'   all    linux/soundcard.h                                       conflict!
+'P'   60-6F  sound/sscape_ioctl.h                                    conflict!
+'P'   00-0F  drivers/usb/class/usblp.c                               conflict!
+'P'   01-09  drivers/misc/pci_endpoint_test.c                        conflict!
+'Q'   all    linux/soundcard.h
+'R'   00-1F  linux/random.h                                          conflict!
+'R'   01     linux/rfkill.h                                          conflict!
+'R'   C0-DF  net/bluetooth/rfcomm.h
+'S'   all    linux/cdrom.h                                           conflict!
+'S'   80-81  scsi/scsi_ioctl.h                                       conflict!
+'S'   82-FF  scsi/scsi.h                                             conflict!
+'S'   00-7F  sound/asequencer.h                                      conflict!
+'T'   all    linux/soundcard.h                                       conflict!
+'T'   00-AF  sound/asound.h                                          conflict!
+'T'   all    arch/x86/include/asm/ioctls.h                           conflict!
+'T'   C0-DF  linux/if_tun.h                                          conflict!
+'U'   all    sound/asound.h                                          conflict!
+'U'   00-CF  linux/uinput.h                                          conflict!
+'U'   00-EF  linux/usbdevice_fs.h
+'U'   C0-CF  drivers/bluetooth/hci_uart.h
+'V'   all    linux/vt.h                                              conflict!
+'V'   all    linux/videodev2.h                                       conflict!
+'V'   C0     linux/ivtvfb.h                                          conflict!
+'V'   C0     linux/ivtv.h                                            conflict!
+'V'   C0     media/davinci/vpfe_capture.h                            conflict!
+'V'   C0     media/si4713.h                                          conflict!
+'W'   00-1F  linux/watchdog.h                                        conflict!
+'W'   00-1F  linux/wanrouter.h                                       conflict! (pre 3.9)
+'W'   00-3F  sound/asound.h                                          conflict!
+'W'   40-5F  drivers/pci/switch/switchtec.c
+'X'   all    fs/xfs/xfs_fs.h,                                        conflict!
+             fs/xfs/linux-2.6/xfs_ioctl32.h,
+             include/linux/falloc.h,
+             linux/fs.h,
+'X'   all    fs/ocfs2/ocfs_fs.h                                      conflict!
+'X'   01     linux/pktcdvd.h                                         conflict!
+'Y'   all    linux/cyclades.h
+'Z'   14-15  drivers/message/fusion/mptctl.h
+'['   00-3F  linux/usb/tmc.h                                         USB Test and Measurement Devices
+                                                                     <mailto:gregkh@linuxfoundation.org>
+'a'   all    linux/atm*.h, linux/sonet.h                             ATM on linux
+                                                                     <http://lrcwww.epfl.ch/>
+'a'   00-0F  drivers/crypto/qat/qat_common/adf_cfg_common.h          conflict! qat driver
+'b'   00-FF                                                          conflict! bit3 vme host bridge
+                                                                     <mailto:natalia@nikhefk.nikhef.nl>
+'c'   all    linux/cm4000_cs.h                                       conflict!
+'c'   00-7F  linux/comstats.h                                        conflict!
+'c'   00-7F  linux/coda.h                                            conflict!
+'c'   00-1F  linux/chio.h                                            conflict!
+'c'   80-9F  arch/s390/include/asm/chsc.h                            conflict!
+'c'   A0-AF  arch/x86/include/asm/msr.h conflict!
+'d'   00-FF  linux/char/drm/drm.h                                    conflict!
+'d'   02-40  pcmcia/ds.h                                             conflict!
+'d'   F0-FF  linux/digi1.h
+'e'   all    linux/digi1.h                                           conflict!
+'f'   00-1F  linux/ext2_fs.h                                         conflict!
+'f'   00-1F  linux/ext3_fs.h                                         conflict!
+'f'   00-0F  fs/jfs/jfs_dinode.h                                     conflict!
+'f'   00-0F  fs/ext4/ext4.h                                          conflict!
+'f'   00-0F  linux/fs.h                                              conflict!
+'f'   00-0F  fs/ocfs2/ocfs2_fs.h                                     conflict!
+'g'   00-0F  linux/usb/gadgetfs.h
+'g'   20-2F  linux/usb/g_printer.h
+'h'   00-7F                                                          conflict! Charon filesystem
+                                                                     <mailto:zapman@interlan.net>
+'h'   00-1F  linux/hpet.h                                            conflict!
+'h'   80-8F  fs/hfsplus/ioctl.c
+'i'   00-3F  linux/i2o-dev.h                                         conflict!
+'i'   0B-1F  linux/ipmi.h                                            conflict!
+'i'   80-8F  linux/i8k.h
+'j'   00-3F  linux/joystick.h
+'k'   00-0F  linux/spi/spidev.h                                      conflict!
+'k'   00-05  video/kyro.h                                            conflict!
+'k'   10-17  linux/hsi/hsi_char.h                                    HSI character device
+'l'   00-3F  linux/tcfs_fs.h                                         transparent cryptographic file system
+                                                                     <http://web.archive.org/web/%2A/http://mikonos.dia.unisa.it/tcfs>
+'l'   40-7F  linux/udf_fs_i.h                                        in development:
+                                                                     <http://sourceforge.net/projects/linux-udf/>
+'m'   00-09  linux/mmtimer.h                                         conflict!
+'m'   all    linux/mtio.h                                            conflict!
+'m'   all    linux/soundcard.h                                       conflict!
+'m'   all    linux/synclink.h                                        conflict!
+'m'   00-19  drivers/message/fusion/mptctl.h                         conflict!
+'m'   00     drivers/scsi/megaraid/megaraid_ioctl.h                  conflict!
+'n'   00-7F  linux/ncp_fs.h and fs/ncpfs/ioctl.c
+'n'   80-8F  uapi/linux/nilfs2_api.h                                 NILFS2
+'n'   E0-FF  linux/matroxfb.h                                        matroxfb
+'o'   00-1F  fs/ocfs2/ocfs2_fs.h                                     OCFS2
+'o'   00-03  mtd/ubi-user.h                                          conflict! (OCFS2 and UBI overlaps)
+'o'   40-41  mtd/ubi-user.h                                          UBI
+'o'   01-A1  `linux/dvb/*.h`                                         DVB
+'p'   00-0F  linux/phantom.h                                         conflict! (OpenHaptics needs this)
+'p'   00-1F  linux/rtc.h                                             conflict!
+'p'   00-3F  linux/mc146818rtc.h                                     conflict!
+'p'   40-7F  linux/nvram.h
+'p'   80-9F  linux/ppdev.h                                           user-space parport
+                                                                     <mailto:tim@cyberelk.net>
+'p'   A1-A5  linux/pps.h                                             LinuxPPS
+                                                                     <mailto:giometti@linux.it>
+'q'   00-1F  linux/serio.h
+'q'   80-FF  linux/telephony.h                                       Internet PhoneJACK, Internet LineJACK
+             linux/ixjuser.h                                         <http://web.archive.org/web/%2A/http://www.quicknet.net>
+'r'   00-1F  linux/msdos_fs.h and fs/fat/dir.c
+'s'   all    linux/cdk.h
+'t'   00-7F  linux/ppp-ioctl.h
+'t'   80-8F  linux/isdn_ppp.h
+'t'   90-91  linux/toshiba.h                                         toshiba and toshiba_acpi SMM
+'u'   00-1F  linux/smb_fs.h                                          gone
+'u'   20-3F  linux/uvcvideo.h                                        USB video class host driver
+'u'   40-4f  linux/udmabuf.h                                         userspace dma-buf misc device
+'v'   00-1F  linux/ext2_fs.h                                         conflict!
+'v'   00-1F  linux/fs.h                                              conflict!
+'v'   00-0F  linux/sonypi.h                                          conflict!
+'v'   00-0F  media/v4l2-subdev.h                                     conflict!
+'v'   C0-FF  linux/meye.h                                            conflict!
+'w'   all                                                            CERN SCI driver
+'y'   00-1F                                                          packet based user level communications
+                                                                     <mailto:zapman@interlan.net>
+'z'   00-3F                                                          CAN bus card conflict!
+                                                                     <mailto:hdstich@connectu.ulm.circular.de>
+'z'   40-7F                                                          CAN bus card conflict!
+                                                                     <mailto:oe@port.de>
+'z'   10-4F  drivers/s390/crypto/zcrypt_api.h                        conflict!
+'|'   00-7F  linux/media.h
+0x80  00-1F  linux/fb.h
+0x89  00-06  arch/x86/include/asm/sockios.h
+0x89  0B-DF  linux/sockios.h
+0x89  E0-EF  linux/sockios.h                                         SIOCPROTOPRIVATE range
+0x89  E0-EF  linux/dn.h                                              PROTOPRIVATE range
+0x89  F0-FF  linux/sockios.h                                         SIOCDEVPRIVATE range
+0x8B  all    linux/wireless.h
+0x8C  00-3F                                                          WiNRADiO driver
+                                                                     <http://www.winradio.com.au/>
+0x90  00     drivers/cdrom/sbpcd.h
+0x92  00-0F  drivers/usb/mon/mon_bin.c
+0x93  60-7F  linux/auto_fs.h
+0x94  all    fs/btrfs/ioctl.h                                        Btrfs filesystem
+             and linux/fs.h                                          some lifted to vfs/generic
+0x97  00-7F  fs/ceph/ioctl.h                                         Ceph file system
+0x99  00-0F                                                          537-Addinboard driver
+                                                                     <mailto:buk@buks.ipn.de>
+0xA0  all    linux/sdp/sdp.h                                         Industrial Device Project
+                                                                     <mailto:kenji@bitgate.com>
+0xA1  0      linux/vtpm_proxy.h                                      TPM Emulator Proxy Driver
+0xA3  80-8F                                                          Port ACL  in development:
+                                                                     <mailto:tlewis@mindspring.com>
+0xA3  90-9F  linux/dtlk.h
+0xA4  00-1F  uapi/linux/tee.h                                        Generic TEE subsystem
+0xAA  00-3F  linux/uapi/linux/userfaultfd.h
+0xAB  00-1F  linux/nbd.h
+0xAC  00-1F  linux/raw.h
+0xAD  00                                                             Netfilter device in development:
+                                                                     <mailto:rusty@rustcorp.com.au>
+0xAE  all    linux/kvm.h                                             Kernel-based Virtual Machine
+                                                                     <mailto:kvm@vger.kernel.org>
+0xAF  00-1F  linux/fsl_hypervisor.h                                  Freescale hypervisor
+0xB0  all                                                            RATIO devices in development:
+                                                                     <mailto:vgo@ratio.de>
+0xB1  00-1F                                                          PPPoX
+                                                                     <mailto:mostrows@styx.uwaterloo.ca>
+0xB3  00     linux/mmc/ioctl.h
+0xB4  00-0F  linux/gpio.h                                            <mailto:linux-gpio@vger.kernel.org>
+0xB5  00-0F  uapi/linux/rpmsg.h                                      <mailto:linux-remoteproc@vger.kernel.org>
+0xB6  all    linux/fpga-dfl.h
+0xC0  00-0F  linux/usb/iowarrior.h
+0xCA  00-0F  uapi/misc/cxl.h
+0xCA  10-2F  uapi/misc/ocxl.h
+0xCA  80-BF  uapi/scsi/cxlflash_ioctl.h
+0xCB  00-1F                                                          CBM serial IEC bus in development:
+                                                                     <mailto:michael.klein@puffin.lb.shuttle.de>
+0xCC  00-0F  drivers/misc/ibmvmc.h                                   pseries VMC driver
+0xCD  01     linux/reiserfs_fs.h
+0xCF  02     fs/cifs/ioctl.c
+0xDB  00-0F  drivers/char/mwave/mwavepub.h
+0xDD  00-3F                                                          ZFCP device driver see drivers/s390/scsi/
+                                                                     <mailto:aherrman@de.ibm.com>
+0xE5  00-3F  linux/fuse.h
+0xEC  00-01  drivers/platform/chrome/cros_ec_dev.h                   ChromeOS EC driver
+0xF3  00-3F  drivers/usb/misc/sisusbvga/sisusb.h                     sisfb (in development)
+                                                                     <mailto:thomas@winischhofer.net>
+0xF4  00-1F  video/mbxfb.h                                           mbxfb
+                                                                     <mailto:raph@8d.com>
+0xF6  all                                                            LTTng Linux Trace Toolkit Next Generation
+                                                                     <mailto:mathieu.desnoyers@efficios.com>
+0xFD  all    linux/dm-ioctl.h
+0xFE  all    linux/isst_if.h
+====  =====  ======================================================= ================================================================
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
deleted file mode 100644
index ab0b3f686454..000000000000
--- a/Documentation/ioctl/ioctl-number.txt
+++ /dev/null
@@ -1,351 +0,0 @@
-Ioctl Numbers
-19 October 1999
-Michael Elizabeth Chastain
-<mec@shout.net>
-
-If you are adding new ioctl's to the kernel, you should use the _IO
-macros defined in <linux/ioctl.h>:
-
-    _IO    an ioctl with no parameters
-    _IOW   an ioctl with write parameters (copy_from_user)
-    _IOR   an ioctl with read parameters  (copy_to_user)
-    _IOWR  an ioctl with both write and read parameters.
-
-'Write' and 'read' are from the user's point of view, just like the
-system calls 'write' and 'read'.  For example, a SET_FOO ioctl would
-be _IOW, although the kernel would actually read data from user space;
-a GET_FOO ioctl would be _IOR, although the kernel would actually write
-data to user space.
-
-The first argument to _IO, _IOW, _IOR, or _IOWR is an identifying letter
-or number from the table below.  Because of the large number of drivers,
-many drivers share a partial letter with other drivers.
-
-If you are writing a driver for a new device and need a letter, pick an
-unused block with enough room for expansion: 32 to 256 ioctl commands.
-You can register the block by patching this file and submitting the
-patch to Linus Torvalds.  Or you can e-mail me at <mec@shout.net> and
-I'll register one for you.
-
-The second argument to _IO, _IOW, _IOR, or _IOWR is a sequence number
-to distinguish ioctls from each other.  The third argument to _IOW,
-_IOR, or _IOWR is the type of the data going into the kernel or coming
-out of the kernel (e.g.  'int' or 'struct foo').  NOTE!  Do NOT use
-sizeof(arg) as the third argument as this results in your ioctl thinking
-it passes an argument of type size_t.
-
-Some devices use their major number as the identifier; this is OK, as
-long as it is unique.  Some devices are irregular and don't follow any
-convention at all.
-
-Following this convention is good because:
-
-(1) Keeping the ioctl's globally unique helps error checking:
-    if a program calls an ioctl on the wrong device, it will get an
-    error rather than some unexpected behaviour.
-
-(2) The 'strace' build procedure automatically finds ioctl numbers
-    defined with _IO, _IOW, _IOR, or _IOWR.
-
-(3) 'strace' can decode numbers back into useful names when the
-    numbers are unique.
-
-(4) People looking for ioctls can grep for them more easily when
-    this convention is used to define the ioctl numbers.
-
-(5) When following the convention, the driver code can use generic
-    code to copy the parameters between user and kernel space.
-
-This table lists ioctls visible from user land for Linux/x86.  It contains
-most drivers up to 2.6.31, but I know I am missing some.  There has been
-no attempt to list non-X86 architectures or ioctls from drivers/staging/.
-
-Code  Seq#(hex)	Include File		Comments
-========================================================
-0x00	00-1F	linux/fs.h		conflict!
-0x00	00-1F	scsi/scsi_ioctl.h	conflict!
-0x00	00-1F	linux/fb.h		conflict!
-0x00	00-1F	linux/wavefront.h	conflict!
-0x02	all	linux/fd.h
-0x03	all	linux/hdreg.h
-0x04	D2-DC	linux/umsdos_fs.h	Dead since 2.6.11, but don't reuse these.
-0x06	all	linux/lp.h
-0x09	all	linux/raid/md_u.h
-0x10	00-0F	drivers/char/s390/vmcp.h
-0x10	10-1F	arch/s390/include/uapi/sclp_ctl.h
-0x10	20-2F	arch/s390/include/uapi/asm/hypfs.h
-0x12	all	linux/fs.h
-		linux/blkpg.h
-0x1b	all	InfiniBand Subsystem	<http://infiniband.sourceforge.net/>
-0x20	all	drivers/cdrom/cm206.h
-0x22	all	scsi/sg.h
-'!'	00-1F	uapi/linux/seccomp.h
-'#'	00-3F	IEEE 1394 Subsystem	Block for the entire subsystem
-'$'	00-0F	linux/perf_counter.h, linux/perf_event.h
-'%'	00-0F	include/uapi/linux/stm.h
-					System Trace Module subsystem
-					<mailto:alexander.shishkin@linux.intel.com>
-'&'	00-07	drivers/firewire/nosy-user.h
-'1'	00-1F	<linux/timepps.h>	PPS kit from Ulrich Windl
-					<ftp://ftp.de.kernel.org/pub/linux/daemons/ntp/PPS/>
-'2'	01-04	linux/i2o.h
-'3'	00-0F	drivers/s390/char/raw3270.h	conflict!
-'3'	00-1F	linux/suspend_ioctls.h	conflict!
-		and kernel/power/user.c
-'8'	all				SNP8023 advanced NIC card
-					<mailto:mcr@solidum.com>
-';'	64-7F	linux/vfio.h
-'@'	00-0F	linux/radeonfb.h	conflict!
-'@'	00-0F	drivers/video/aty/aty128fb.c	conflict!
-'A'	00-1F	linux/apm_bios.h	conflict!
-'A'	00-0F	linux/agpgart.h		conflict!
-		and drivers/char/agp/compat_ioctl.h
-'A'	00-7F	sound/asound.h		conflict!
-'B'	00-1F	linux/cciss_ioctl.h	conflict!
-'B'	00-0F	include/linux/pmu.h	conflict!
-'B'	C0-FF				advanced bbus
-					<mailto:maassen@uni-freiburg.de>
-'C'	all	linux/soundcard.h	conflict!
-'C'	01-2F	linux/capi.h		conflict!
-'C'	F0-FF	drivers/net/wan/cosa.h	conflict!
-'D'	all	arch/s390/include/asm/dasd.h
-'D'	40-5F	drivers/scsi/dpt/dtpi_ioctl.h
-'D'	05	drivers/scsi/pmcraid.h
-'E'	all	linux/input.h		conflict!
-'E'	00-0F	xen/evtchn.h		conflict!
-'F'	all	linux/fb.h		conflict!
-'F'	01-02	drivers/scsi/pmcraid.h	conflict!
-'F'	20	drivers/video/fsl-diu-fb.h	conflict!
-'F'	20	drivers/video/intelfb/intelfb.h	conflict!
-'F'	20	linux/ivtvfb.h		conflict!
-'F'	20	linux/matroxfb.h	conflict!
-'F'	20	drivers/video/aty/atyfb_base.c	conflict!
-'F'	00-0F	video/da8xx-fb.h	conflict!
-'F'	80-8F	linux/arcfb.h		conflict!
-'F'	DD	video/sstfb.h		conflict!
-'G'	00-3F	drivers/misc/sgi-gru/grulib.h	conflict!
-'G'	00-0F	linux/gigaset_dev.h	conflict!
-'H'	00-7F	linux/hiddev.h		conflict!
-'H'	00-0F	linux/hidraw.h		conflict!
-'H'	01	linux/mei.h		conflict!
-'H'	02	linux/mei.h		conflict!
-'H'	03	linux/mei.h		conflict!
-'H'	00-0F	sound/asound.h		conflict!
-'H'	20-40	sound/asound_fm.h	conflict!
-'H'	80-8F	sound/sfnt_info.h	conflict!
-'H'	10-8F	sound/emu10k1.h		conflict!
-'H'	10-1F	sound/sb16_csp.h	conflict!
-'H'	10-1F	sound/hda_hwdep.h	conflict!
-'H'	40-4F	sound/hdspm.h		conflict!
-'H'	40-4F	sound/hdsp.h		conflict!
-'H'	90	sound/usb/usx2y/usb_stream.h
-'H'	A0	uapi/linux/usb/cdc-wdm.h
-'H'	C0-F0	net/bluetooth/hci.h	conflict!
-'H'	C0-DF	net/bluetooth/hidp/hidp.h	conflict!
-'H'	C0-DF	net/bluetooth/cmtp/cmtp.h	conflict!
-'H'	C0-DF	net/bluetooth/bnep/bnep.h	conflict!
-'H'	F1	linux/hid-roccat.h	<mailto:erazor_de@users.sourceforge.net>
-'H'	F8-FA	sound/firewire.h
-'I'	all	linux/isdn.h		conflict!
-'I'	00-0F	drivers/isdn/divert/isdn_divert.h	conflict!
-'I'	40-4F	linux/mISDNif.h		conflict!
-'J'	00-1F	drivers/scsi/gdth_ioctl.h
-'K'	all	linux/kd.h
-'L'	00-1F	linux/loop.h		conflict!
-'L'	10-1F	drivers/scsi/mpt3sas/mpt3sas_ctl.h	conflict!
-'L'	20-2F	linux/lightnvm.h
-'L'	E0-FF	linux/ppdd.h		encrypted disk device driver
-					<http://linux01.gwdg.de/~alatham/ppdd.html>
-'M'	all	linux/soundcard.h	conflict!
-'M'	01-16	mtd/mtd-abi.h		conflict!
-		and drivers/mtd/mtdchar.c
-'M'	01-03	drivers/scsi/megaraid/megaraid_sas.h
-'M'	00-0F	drivers/video/fsl-diu-fb.h	conflict!
-'N'	00-1F	drivers/usb/scanner.h
-'N'	40-7F	drivers/block/nvme.c
-'O'     00-06   mtd/ubi-user.h		UBI
-'P'	all	linux/soundcard.h	conflict!
-'P'	60-6F	sound/sscape_ioctl.h	conflict!
-'P'	00-0F	drivers/usb/class/usblp.c	conflict!
-'P'	01-09	drivers/misc/pci_endpoint_test.c	conflict!
-'Q'	all	linux/soundcard.h
-'R'	00-1F	linux/random.h		conflict!
-'R'	01	linux/rfkill.h		conflict!
-'R'	C0-DF	net/bluetooth/rfcomm.h
-'S'	all	linux/cdrom.h		conflict!
-'S'	80-81	scsi/scsi_ioctl.h	conflict!
-'S'	82-FF	scsi/scsi.h		conflict!
-'S'	00-7F	sound/asequencer.h	conflict!
-'T'	all	linux/soundcard.h	conflict!
-'T'	00-AF	sound/asound.h		conflict!
-'T'	all	arch/x86/include/asm/ioctls.h	conflict!
-'T'	C0-DF	linux/if_tun.h		conflict!
-'U'	all	sound/asound.h		conflict!
-'U'	00-CF	linux/uinput.h		conflict!
-'U'	00-EF	linux/usbdevice_fs.h
-'U'	C0-CF	drivers/bluetooth/hci_uart.h
-'V'	all	linux/vt.h		conflict!
-'V'	all	linux/videodev2.h	conflict!
-'V'	C0	linux/ivtvfb.h		conflict!
-'V'	C0	linux/ivtv.h		conflict!
-'V'	C0	media/davinci/vpfe_capture.h	conflict!
-'V'	C0	media/si4713.h		conflict!
-'W'	00-1F	linux/watchdog.h	conflict!
-'W'	00-1F	linux/wanrouter.h	conflict!		(pre 3.9)
-'W'	00-3F	sound/asound.h		conflict!
-'W'	40-5F   drivers/pci/switch/switchtec.c
-'X'	all	fs/xfs/xfs_fs.h		conflict!
-		and fs/xfs/linux-2.6/xfs_ioctl32.h
-		and include/linux/falloc.h
-		and linux/fs.h
-'X'	all	fs/ocfs2/ocfs_fs.h	conflict!
-'X'	01	linux/pktcdvd.h		conflict!
-'Y'	all	linux/cyclades.h
-'Z'	14-15	drivers/message/fusion/mptctl.h
-'['	00-3F	linux/usb/tmc.h		USB Test and Measurement Devices
-					<mailto:gregkh@linuxfoundation.org>
-'a'	all	linux/atm*.h, linux/sonet.h	ATM on linux
-					<http://lrcwww.epfl.ch/>
-'a'	00-0F	drivers/crypto/qat/qat_common/adf_cfg_common.h	conflict! qat driver
-'b'	00-FF				conflict! bit3 vme host bridge
-					<mailto:natalia@nikhefk.nikhef.nl>
-'c'	all	linux/cm4000_cs.h	conflict!
-'c'	00-7F	linux/comstats.h	conflict!
-'c'	00-7F	linux/coda.h		conflict!
-'c'	00-1F	linux/chio.h		conflict!
-'c'	80-9F	arch/s390/include/asm/chsc.h	conflict!
-'c'	A0-AF   arch/x86/include/asm/msr.h	conflict!
-'d'	00-FF	linux/char/drm/drm.h	conflict!
-'d'	02-40	pcmcia/ds.h		conflict!
-'d'	F0-FF	linux/digi1.h
-'e'	all	linux/digi1.h		conflict!
-'f'	00-1F	linux/ext2_fs.h		conflict!
-'f'	00-1F	linux/ext3_fs.h		conflict!
-'f'	00-0F	fs/jfs/jfs_dinode.h	conflict!
-'f'	00-0F	fs/ext4/ext4.h		conflict!
-'f'	00-0F	linux/fs.h		conflict!
-'f'	00-0F	fs/ocfs2/ocfs2_fs.h	conflict!
-'g'	00-0F	linux/usb/gadgetfs.h
-'g'	20-2F	linux/usb/g_printer.h
-'h'	00-7F				conflict! Charon filesystem
-					<mailto:zapman@interlan.net>
-'h'	00-1F	linux/hpet.h		conflict!
-'h'	80-8F	fs/hfsplus/ioctl.c
-'i'	00-3F	linux/i2o-dev.h		conflict!
-'i'	0B-1F	linux/ipmi.h		conflict!
-'i'	80-8F	linux/i8k.h
-'j'	00-3F	linux/joystick.h
-'k'	00-0F	linux/spi/spidev.h	conflict!
-'k'	00-05	video/kyro.h		conflict!
-'k'	10-17	linux/hsi/hsi_char.h	HSI character device
-'l'	00-3F	linux/tcfs_fs.h		transparent cryptographic file system
-					<http://web.archive.org/web/*/http://mikonos.dia.unisa.it/tcfs>
-'l'	40-7F	linux/udf_fs_i.h	in development:
-					<http://sourceforge.net/projects/linux-udf/>
-'m'	00-09	linux/mmtimer.h		conflict!
-'m'	all	linux/mtio.h		conflict!
-'m'	all	linux/soundcard.h	conflict!
-'m'	all	linux/synclink.h	conflict!
-'m'	00-19	drivers/message/fusion/mptctl.h	conflict!
-'m'	00	drivers/scsi/megaraid/megaraid_ioctl.h	conflict!
-'n'	00-7F	linux/ncp_fs.h and fs/ncpfs/ioctl.c
-'n'	80-8F	uapi/linux/nilfs2_api.h	NILFS2
-'n'	E0-FF	linux/matroxfb.h	matroxfb
-'o'	00-1F	fs/ocfs2/ocfs2_fs.h	OCFS2
-'o'     00-03   mtd/ubi-user.h		conflict! (OCFS2 and UBI overlaps)
-'o'     40-41   mtd/ubi-user.h		UBI
-'o'     01-A1   linux/dvb/*.h		DVB
-'p'	00-0F	linux/phantom.h		conflict! (OpenHaptics needs this)
-'p'	00-1F	linux/rtc.h		conflict!
-'p'	00-3F	linux/mc146818rtc.h	conflict!
-'p'	40-7F	linux/nvram.h
-'p'	80-9F	linux/ppdev.h		user-space parport
-					<mailto:tim@cyberelk.net>
-'p'	A1-A5	linux/pps.h		LinuxPPS
-					<mailto:giometti@linux.it>
-'q'	00-1F	linux/serio.h
-'q'	80-FF	linux/telephony.h	Internet PhoneJACK, Internet LineJACK
-		linux/ixjuser.h		<http://web.archive.org/web/*/http://www.quicknet.net>
-'r'	00-1F	linux/msdos_fs.h and fs/fat/dir.c
-'s'	all	linux/cdk.h
-'t'	00-7F	linux/ppp-ioctl.h
-'t'	80-8F	linux/isdn_ppp.h
-'t'	90-91	linux/toshiba.h		toshiba and toshiba_acpi SMM
-'u'	00-1F	linux/smb_fs.h		gone
-'u'	20-3F	linux/uvcvideo.h	USB video class host driver
-'u'	40-4f	linux/udmabuf.h		userspace dma-buf misc device
-'v'	00-1F	linux/ext2_fs.h		conflict!
-'v'	00-1F	linux/fs.h		conflict!
-'v'	00-0F	linux/sonypi.h		conflict!
-'v'	00-0F	media/v4l2-subdev.h	conflict!
-'v'	C0-FF	linux/meye.h		conflict!
-'w'	all				CERN SCI driver
-'y'	00-1F				packet based user level communications
-					<mailto:zapman@interlan.net>
-'z'	00-3F				CAN bus card	conflict!
-					<mailto:hdstich@connectu.ulm.circular.de>
-'z'	40-7F				CAN bus card	conflict!
-					<mailto:oe@port.de>
-'z'	10-4F	drivers/s390/crypto/zcrypt_api.h	conflict!
-'|'	00-7F	linux/media.h
-0x80	00-1F	linux/fb.h
-0x89	00-06	arch/x86/include/asm/sockios.h
-0x89	0B-DF	linux/sockios.h
-0x89	E0-EF	linux/sockios.h		SIOCPROTOPRIVATE range
-0x89	E0-EF	linux/dn.h		PROTOPRIVATE range
-0x89	F0-FF	linux/sockios.h		SIOCDEVPRIVATE range
-0x8B	all	linux/wireless.h
-0x8C	00-3F				WiNRADiO driver
-					<http://www.winradio.com.au/>
-0x90	00	drivers/cdrom/sbpcd.h
-0x92	00-0F	drivers/usb/mon/mon_bin.c
-0x93	60-7F	linux/auto_fs.h
-0x94	all	fs/btrfs/ioctl.h	Btrfs filesystem
-		and linux/fs.h		some lifted to vfs/generic
-0x97	00-7F	fs/ceph/ioctl.h		Ceph file system
-0x99	00-0F				537-Addinboard driver
-					<mailto:buk@buks.ipn.de>
-0xA0	all	linux/sdp/sdp.h		Industrial Device Project
-					<mailto:kenji@bitgate.com>
-0xA1	0	linux/vtpm_proxy.h	TPM Emulator Proxy Driver
-0xA3	80-8F	Port ACL		in development:
-					<mailto:tlewis@mindspring.com>
-0xA3	90-9F	linux/dtlk.h
-0xA4	00-1F	uapi/linux/tee.h	Generic TEE subsystem
-0xAA	00-3F	linux/uapi/linux/userfaultfd.h
-0xAB	00-1F	linux/nbd.h
-0xAC	00-1F	linux/raw.h
-0xAD	00	Netfilter device	in development:
-					<mailto:rusty@rustcorp.com.au>
-0xAE	all	linux/kvm.h		Kernel-based Virtual Machine
-					<mailto:kvm@vger.kernel.org>
-0xAF	00-1F	linux/fsl_hypervisor.h	Freescale hypervisor
-0xB0	all	RATIO devices		in development:
-					<mailto:vgo@ratio.de>
-0xB1	00-1F	PPPoX			<mailto:mostrows@styx.uwaterloo.ca>
-0xB3	00	linux/mmc/ioctl.h
-0xB4	00-0F	linux/gpio.h		<mailto:linux-gpio@vger.kernel.org>
-0xB5	00-0F	uapi/linux/rpmsg.h	<mailto:linux-remoteproc@vger.kernel.org>
-0xB6	all	linux/fpga-dfl.h
-0xC0	00-0F	linux/usb/iowarrior.h
-0xCA	00-0F	uapi/misc/cxl.h
-0xCA	10-2F	uapi/misc/ocxl.h
-0xCA	80-BF	uapi/scsi/cxlflash_ioctl.h
-0xCB	00-1F	CBM serial IEC bus	in development:
-					<mailto:michael.klein@puffin.lb.shuttle.de>
-0xCC	00-0F	drivers/misc/ibmvmc.h    pseries VMC driver
-0xCD	01	linux/reiserfs_fs.h
-0xCF	02	fs/cifs/ioctl.c
-0xDB	00-0F	drivers/char/mwave/mwavepub.h
-0xDD	00-3F	ZFCP device driver	see drivers/s390/scsi/
-					<mailto:aherrman@de.ibm.com>
-0xE5	00-3F	linux/fuse.h
-0xEC	00-01	drivers/platform/chrome/cros_ec_dev.h	ChromeOS EC driver
-0xF3	00-3F	drivers/usb/misc/sisusbvga/sisusb.h	sisfb (in development)
-					<mailto:thomas@winischhofer.net>
-0xF4	00-1F	video/mbxfb.h		mbxfb
-					<mailto:raph@8d.com>
-0xF6	all	LTTng			Linux Trace Toolkit Next Generation
-					<mailto:mathieu.desnoyers@efficios.com>
-0xFD	all	linux/dm-ioctl.h
-0xFE	all	linux/isst_if.h
diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst
index 365efc9e4aa8..8e56337d422d 100644
--- a/Documentation/process/submit-checklist.rst
+++ b/Documentation/process/submit-checklist.rst
@@ -107,7 +107,7 @@ and elsewhere regarding submitting Linux kernel patches.
     and why.
 
 26) If any ioctl's are added by the patch, then also update
-    ``Documentation/ioctl/ioctl-number.txt``.
+    ``Documentation/ioctl/ioctl-number.rst``.
 
 27) If your modified source code depends on or uses any of the kernel
     APIs or features that are related to the following ``Kconfig`` symbols,
diff --git a/Documentation/translations/it_IT/process/submit-checklist.rst b/Documentation/translations/it_IT/process/submit-checklist.rst
index ea74cae958d7..995ee69fab11 100644
--- a/Documentation/translations/it_IT/process/submit-checklist.rst
+++ b/Documentation/translations/it_IT/process/submit-checklist.rst
@@ -117,7 +117,7 @@ sottomissione delle patch, in particolare
     sorgenti che ne spieghi la logica: cosa fanno e perché.
 
 25) Se la patch aggiunge nuove chiamate ioctl, allora aggiornate
-    ``Documentation/ioctl/ioctl-number.txt``.
+    ``Documentation/ioctl/ioctl-number.rst``.
 
 26) Se il codice che avete modificato dipende o usa una qualsiasi interfaccia o
     funzionalità del kernel che è associata a uno dei seguenti simboli
diff --git a/Documentation/translations/zh_CN/process/submit-checklist.rst b/Documentation/translations/zh_CN/process/submit-checklist.rst
index f4785d2b0491..8738c55e42a2 100644
--- a/Documentation/translations/zh_CN/process/submit-checklist.rst
+++ b/Documentation/translations/zh_CN/process/submit-checklist.rst
@@ -97,7 +97,7 @@ Linux内核补丁提交清单
 24) 所有内存屏障例如 ``barrier()``, ``rmb()``, ``wmb()`` 都需要源代码中的注
     释来解释它们正在执行的操作及其原因的逻辑。
 
-25) 如果补丁添加了任何ioctl，那么也要更新 ``Documentation/ioctl/ioctl-number.txt``
+25) 如果补丁添加了任何ioctl，那么也要更新 ``Documentation/ioctl/ioctl-number.rst``
 
 26) 如果修改后的源代码依赖或使用与以下 ``Kconfig`` 符号相关的任何内核API或
     功能，则在禁用相关 ``Kconfig`` 符号和/或 ``=m`` （如果该选项可用）的情况
-- 
cgit 


From 5c04dceaa152d9dd9fe94dec6594965069e19e9e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 11:38:26 -0300
Subject: docs: ioctl: convert to ReST

Rename the iio documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

The cdrom.txt and hdio.txt have their own particular syntax.
In order to speedup the conversion, I used a small ancillary
perl script:

	my $d;
	$d .= $_ while(<>);
	$d =~ s/(\nCDROM\S+)\s+(\w[^\n]*)/$1\n\t$2\n/g;
	$d =~ s/(\nHDIO\S+)\s+(\w[^\n]*)/$1\n\t$2\n/g;
	$d =~ s/(\n\s*usage:)[\s\n]*(\w[^\n]*)/$1:\n\n\t  $2\n/g;
	$d =~ s/(\n\s*)(E\w+[\s\n]*\w[^\n]*)/$1- $2/g;
	$d =~ s/(\n\s*)(inputs|outputs|notes):\s*(\w[^\n]*)/$1$2:\n\t\t$3\n/g;
	print $d;

It basically add blank lines on a few interesting places. The
script is not perfect: still several things require manual work,
but it saved quite some time doing some obvious stuff.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/ioctl/botching-up-ioctls.rst |  225 +++++
 Documentation/ioctl/botching-up-ioctls.txt |  224 -----
 Documentation/ioctl/cdrom.rst              | 1233 +++++++++++++++++++++++++
 Documentation/ioctl/cdrom.txt              |  967 --------------------
 Documentation/ioctl/hdio.rst               | 1342 ++++++++++++++++++++++++++++
 Documentation/ioctl/hdio.txt               | 1071 ----------------------
 Documentation/ioctl/index.rst              |   16 +
 Documentation/ioctl/ioctl-decoding.rst     |   31 +
 Documentation/ioctl/ioctl-decoding.txt     |   24 -
 9 files changed, 2847 insertions(+), 2286 deletions(-)
 create mode 100644 Documentation/ioctl/botching-up-ioctls.rst
 delete mode 100644 Documentation/ioctl/botching-up-ioctls.txt
 create mode 100644 Documentation/ioctl/cdrom.rst
 delete mode 100644 Documentation/ioctl/cdrom.txt
 create mode 100644 Documentation/ioctl/hdio.rst
 delete mode 100644 Documentation/ioctl/hdio.txt
 create mode 100644 Documentation/ioctl/index.rst
 create mode 100644 Documentation/ioctl/ioctl-decoding.rst
 delete mode 100644 Documentation/ioctl/ioctl-decoding.txt

(limited to 'Documentation')

diff --git a/Documentation/ioctl/botching-up-ioctls.rst b/Documentation/ioctl/botching-up-ioctls.rst
new file mode 100644
index 000000000000..ac697fef3545
--- /dev/null
+++ b/Documentation/ioctl/botching-up-ioctls.rst
@@ -0,0 +1,225 @@
+=================================
+(How to avoid) Botching up ioctls
+=================================
+
+From: http://blog.ffwll.ch/2013/11/botching-up-ioctls.html
+
+By: Daniel Vetter, Copyright © 2013 Intel Corporation
+
+One clear insight kernel graphics hackers gained in the past few years is that
+trying to come up with a unified interface to manage the execution units and
+memory on completely different GPUs is a futile effort. So nowadays every
+driver has its own set of ioctls to allocate memory and submit work to the GPU.
+Which is nice, since there's no more insanity in the form of fake-generic, but
+actually only used once interfaces. But the clear downside is that there's much
+more potential to screw things up.
+
+To avoid repeating all the same mistakes again I've written up some of the
+lessons learned while botching the job for the drm/i915 driver. Most of these
+only cover technicalities and not the big-picture issues like what the command
+submission ioctl exactly should look like. Learning these lessons is probably
+something every GPU driver has to do on its own.
+
+
+Prerequisites
+-------------
+
+First the prerequisites. Without these you have already failed, because you
+will need to add a 32-bit compat layer:
+
+ * Only use fixed sized integers. To avoid conflicts with typedefs in userspace
+   the kernel has special types like __u32, __s64. Use them.
+
+ * Align everything to the natural size and use explicit padding. 32-bit
+   platforms don't necessarily align 64-bit values to 64-bit boundaries, but
+   64-bit platforms do. So we always need padding to the natural size to get
+   this right.
+
+ * Pad the entire struct to a multiple of 64-bits if the structure contains
+   64-bit types - the structure size will otherwise differ on 32-bit versus
+   64-bit. Having a different structure size hurts when passing arrays of
+   structures to the kernel, or if the kernel checks the structure size, which
+   e.g. the drm core does.
+
+ * Pointers are __u64, cast from/to a uintprt_t on the userspace side and
+   from/to a void __user * in the kernel. Try really hard not to delay this
+   conversion or worse, fiddle the raw __u64 through your code since that
+   diminishes the checking tools like sparse can provide. The macro
+   u64_to_user_ptr can be used in the kernel to avoid warnings about integers
+   and pointres of different sizes.
+
+
+Basics
+------
+
+With the joys of writing a compat layer avoided we can take a look at the basic
+fumbles. Neglecting these will make backward and forward compatibility a real
+pain. And since getting things wrong on the first attempt is guaranteed you
+will have a second iteration or at least an extension for any given interface.
+
+ * Have a clear way for userspace to figure out whether your new ioctl or ioctl
+   extension is supported on a given kernel. If you can't rely on old kernels
+   rejecting the new flags/modes or ioctls (since doing that was botched in the
+   past) then you need a driver feature flag or revision number somewhere.
+
+ * Have a plan for extending ioctls with new flags or new fields at the end of
+   the structure. The drm core checks the passed-in size for each ioctl call
+   and zero-extends any mismatches between kernel and userspace. That helps,
+   but isn't a complete solution since newer userspace on older kernels won't
+   notice that the newly added fields at the end get ignored. So this still
+   needs a new driver feature flags.
+
+ * Check all unused fields and flags and all the padding for whether it's 0,
+   and reject the ioctl if that's not the case. Otherwise your nice plan for
+   future extensions is going right down the gutters since someone will submit
+   an ioctl struct with random stack garbage in the yet unused parts. Which
+   then bakes in the ABI that those fields can never be used for anything else
+   but garbage. This is also the reason why you must explicitly pad all
+   structures, even if you never use them in an array - the padding the compiler
+   might insert could contain garbage.
+
+ * Have simple testcases for all of the above.
+
+
+Fun with Error Paths
+--------------------
+
+Nowadays we don't have any excuse left any more for drm drivers being neat
+little root exploits. This means we both need full input validation and solid
+error handling paths - GPUs will die eventually in the oddmost corner cases
+anyway:
+
+ * The ioctl must check for array overflows. Also it needs to check for
+   over/underflows and clamping issues of integer values in general. The usual
+   example is sprite positioning values fed directly into the hardware with the
+   hardware just having 12 bits or so. Works nicely until some odd display
+   server doesn't bother with clamping itself and the cursor wraps around the
+   screen.
+
+ * Have simple testcases for every input validation failure case in your ioctl.
+   Check that the error code matches your expectations. And finally make sure
+   that you only test for one single error path in each subtest by submitting
+   otherwise perfectly valid data. Without this an earlier check might reject
+   the ioctl already and shadow the codepath you actually want to test, hiding
+   bugs and regressions.
+
+ * Make all your ioctls restartable. First X really loves signals and second
+   this will allow you to test 90% of all error handling paths by just
+   interrupting your main test suite constantly with signals. Thanks to X's
+   love for signal you'll get an excellent base coverage of all your error
+   paths pretty much for free for graphics drivers. Also, be consistent with
+   how you handle ioctl restarting - e.g. drm has a tiny drmIoctl helper in its
+   userspace library. The i915 driver botched this with the set_tiling ioctl,
+   now we're stuck forever with some arcane semantics in both the kernel and
+   userspace.
+
+ * If you can't make a given codepath restartable make a stuck task at least
+   killable. GPUs just die and your users won't like you more if you hang their
+   entire box (by means of an unkillable X process). If the state recovery is
+   still too tricky have a timeout or hangcheck safety net as a last-ditch
+   effort in case the hardware has gone bananas.
+
+ * Have testcases for the really tricky corner cases in your error recovery code
+   - it's way too easy to create a deadlock between your hangcheck code and
+   waiters.
+
+
+Time, Waiting and Missing it
+----------------------------
+
+GPUs do most everything asynchronously, so we have a need to time operations and
+wait for outstanding ones. This is really tricky business; at the moment none of
+the ioctls supported by the drm/i915 get this fully right, which means there's
+still tons more lessons to learn here.
+
+ * Use CLOCK_MONOTONIC as your reference time, always. It's what alsa, drm and
+   v4l use by default nowadays. But let userspace know which timestamps are
+   derived from different clock domains like your main system clock (provided
+   by the kernel) or some independent hardware counter somewhere else. Clocks
+   will mismatch if you look close enough, but if performance measuring tools
+   have this information they can at least compensate. If your userspace can
+   get at the raw values of some clocks (e.g. through in-command-stream
+   performance counter sampling instructions) consider exposing those also.
+
+ * Use __s64 seconds plus __u64 nanoseconds to specify time. It's not the most
+   convenient time specification, but it's mostly the standard.
+
+ * Check that input time values are normalized and reject them if not. Note
+   that the kernel native struct ktime has a signed integer for both seconds
+   and nanoseconds, so beware here.
+
+ * For timeouts, use absolute times. If you're a good fellow and made your
+   ioctl restartable relative timeouts tend to be too coarse and can
+   indefinitely extend your wait time due to rounding on each restart.
+   Especially if your reference clock is something really slow like the display
+   frame counter. With a spec lawyer hat on this isn't a bug since timeouts can
+   always be extended - but users will surely hate you if their neat animations
+   starts to stutter due to this.
+
+ * Consider ditching any synchronous wait ioctls with timeouts and just deliver
+   an asynchronous event on a pollable file descriptor. It fits much better
+   into event driven applications' main loop.
+
+ * Have testcases for corner-cases, especially whether the return values for
+   already-completed events, successful waits and timed-out waits are all sane
+   and suiting to your needs.
+
+
+Leaking Resources, Not
+----------------------
+
+A full-blown drm driver essentially implements a little OS, but specialized to
+the given GPU platforms. This means a driver needs to expose tons of handles
+for different objects and other resources to userspace. Doing that right
+entails its own little set of pitfalls:
+
+ * Always attach the lifetime of your dynamically created resources to the
+   lifetime of a file descriptor. Consider using a 1:1 mapping if your resource
+   needs to be shared across processes -  fd-passing over unix domain sockets
+   also simplifies lifetime management for userspace.
+
+ * Always have O_CLOEXEC support.
+
+ * Ensure that you have sufficient insulation between different clients. By
+   default pick a private per-fd namespace which forces any sharing to be done
+   explicitly. Only go with a more global per-device namespace if the objects
+   are truly device-unique. One counterexample in the drm modeset interfaces is
+   that the per-device modeset objects like connectors share a namespace with
+   framebuffer objects, which mostly are not shared at all. A separate
+   namespace, private by default, for framebuffers would have been more
+   suitable.
+
+ * Think about uniqueness requirements for userspace handles. E.g. for most drm
+   drivers it's a userspace bug to submit the same object twice in the same
+   command submission ioctl. But then if objects are shareable userspace needs
+   to know whether it has seen an imported object from a different process
+   already or not. I haven't tried this myself yet due to lack of a new class
+   of objects, but consider using inode numbers on your shared file descriptors
+   as unique identifiers - it's how real files are told apart, too.
+   Unfortunately this requires a full-blown virtual filesystem in the kernel.
+
+
+Last, but not Least
+-------------------
+
+Not every problem needs a new ioctl:
+
+ * Think hard whether you really want a driver-private interface. Of course
+   it's much quicker to push a driver-private interface than engaging in
+   lengthy discussions for a more generic solution. And occasionally doing a
+   private interface to spearhead a new concept is what's required. But in the
+   end, once the generic interface comes around you'll end up maintainer two
+   interfaces. Indefinitely.
+
+ * Consider other interfaces than ioctls. A sysfs attribute is much better for
+   per-device settings, or for child objects with fairly static lifetimes (like
+   output connectors in drm with all the detection override attributes). Or
+   maybe only your testsuite needs this interface, and then debugfs with its
+   disclaimer of not having a stable ABI would be better.
+
+Finally, the name of the game is to get it right on the first attempt, since if
+your driver proves popular and your hardware platforms long-lived then you'll
+be stuck with a given ioctl essentially forever. You can try to deprecate
+horrible ioctls on newer iterations of your hardware, but generally it takes
+years to accomplish this. And then again years until the last user able to
+complain about regressions disappears, too.
diff --git a/Documentation/ioctl/botching-up-ioctls.txt b/Documentation/ioctl/botching-up-ioctls.txt
deleted file mode 100644
index 883fb034bd04..000000000000
--- a/Documentation/ioctl/botching-up-ioctls.txt
+++ /dev/null
@@ -1,224 +0,0 @@
-(How to avoid) Botching up ioctls
-=================================
-
-From: http://blog.ffwll.ch/2013/11/botching-up-ioctls.html
-
-By: Daniel Vetter, Copyright © 2013 Intel Corporation
-
-One clear insight kernel graphics hackers gained in the past few years is that
-trying to come up with a unified interface to manage the execution units and
-memory on completely different GPUs is a futile effort. So nowadays every
-driver has its own set of ioctls to allocate memory and submit work to the GPU.
-Which is nice, since there's no more insanity in the form of fake-generic, but
-actually only used once interfaces. But the clear downside is that there's much
-more potential to screw things up.
-
-To avoid repeating all the same mistakes again I've written up some of the
-lessons learned while botching the job for the drm/i915 driver. Most of these
-only cover technicalities and not the big-picture issues like what the command
-submission ioctl exactly should look like. Learning these lessons is probably
-something every GPU driver has to do on its own.
-
-
-Prerequisites
--------------
-
-First the prerequisites. Without these you have already failed, because you
-will need to add a 32-bit compat layer:
-
- * Only use fixed sized integers. To avoid conflicts with typedefs in userspace
-   the kernel has special types like __u32, __s64. Use them.
-
- * Align everything to the natural size and use explicit padding. 32-bit
-   platforms don't necessarily align 64-bit values to 64-bit boundaries, but
-   64-bit platforms do. So we always need padding to the natural size to get
-   this right.
-
- * Pad the entire struct to a multiple of 64-bits if the structure contains
-   64-bit types - the structure size will otherwise differ on 32-bit versus
-   64-bit. Having a different structure size hurts when passing arrays of
-   structures to the kernel, or if the kernel checks the structure size, which
-   e.g. the drm core does.
-
- * Pointers are __u64, cast from/to a uintprt_t on the userspace side and
-   from/to a void __user * in the kernel. Try really hard not to delay this
-   conversion or worse, fiddle the raw __u64 through your code since that
-   diminishes the checking tools like sparse can provide. The macro
-   u64_to_user_ptr can be used in the kernel to avoid warnings about integers
-   and pointres of different sizes.
-
-
-Basics
-------
-
-With the joys of writing a compat layer avoided we can take a look at the basic
-fumbles. Neglecting these will make backward and forward compatibility a real
-pain. And since getting things wrong on the first attempt is guaranteed you
-will have a second iteration or at least an extension for any given interface.
-
- * Have a clear way for userspace to figure out whether your new ioctl or ioctl
-   extension is supported on a given kernel. If you can't rely on old kernels
-   rejecting the new flags/modes or ioctls (since doing that was botched in the
-   past) then you need a driver feature flag or revision number somewhere.
-
- * Have a plan for extending ioctls with new flags or new fields at the end of
-   the structure. The drm core checks the passed-in size for each ioctl call
-   and zero-extends any mismatches between kernel and userspace. That helps,
-   but isn't a complete solution since newer userspace on older kernels won't
-   notice that the newly added fields at the end get ignored. So this still
-   needs a new driver feature flags.
-
- * Check all unused fields and flags and all the padding for whether it's 0,
-   and reject the ioctl if that's not the case. Otherwise your nice plan for
-   future extensions is going right down the gutters since someone will submit
-   an ioctl struct with random stack garbage in the yet unused parts. Which
-   then bakes in the ABI that those fields can never be used for anything else
-   but garbage. This is also the reason why you must explicitly pad all
-   structures, even if you never use them in an array - the padding the compiler
-   might insert could contain garbage.
-
- * Have simple testcases for all of the above.
-
-
-Fun with Error Paths
---------------------
-
-Nowadays we don't have any excuse left any more for drm drivers being neat
-little root exploits. This means we both need full input validation and solid
-error handling paths - GPUs will die eventually in the oddmost corner cases
-anyway:
-
- * The ioctl must check for array overflows. Also it needs to check for
-   over/underflows and clamping issues of integer values in general. The usual
-   example is sprite positioning values fed directly into the hardware with the
-   hardware just having 12 bits or so. Works nicely until some odd display
-   server doesn't bother with clamping itself and the cursor wraps around the
-   screen.
-
- * Have simple testcases for every input validation failure case in your ioctl.
-   Check that the error code matches your expectations. And finally make sure
-   that you only test for one single error path in each subtest by submitting
-   otherwise perfectly valid data. Without this an earlier check might reject
-   the ioctl already and shadow the codepath you actually want to test, hiding
-   bugs and regressions.
-
- * Make all your ioctls restartable. First X really loves signals and second
-   this will allow you to test 90% of all error handling paths by just
-   interrupting your main test suite constantly with signals. Thanks to X's
-   love for signal you'll get an excellent base coverage of all your error
-   paths pretty much for free for graphics drivers. Also, be consistent with
-   how you handle ioctl restarting - e.g. drm has a tiny drmIoctl helper in its
-   userspace library. The i915 driver botched this with the set_tiling ioctl,
-   now we're stuck forever with some arcane semantics in both the kernel and
-   userspace.
-
- * If you can't make a given codepath restartable make a stuck task at least
-   killable. GPUs just die and your users won't like you more if you hang their
-   entire box (by means of an unkillable X process). If the state recovery is
-   still too tricky have a timeout or hangcheck safety net as a last-ditch
-   effort in case the hardware has gone bananas.
-
- * Have testcases for the really tricky corner cases in your error recovery code
-   - it's way too easy to create a deadlock between your hangcheck code and
-   waiters.
-
-
-Time, Waiting and Missing it
-----------------------------
-
-GPUs do most everything asynchronously, so we have a need to time operations and
-wait for outstanding ones. This is really tricky business; at the moment none of
-the ioctls supported by the drm/i915 get this fully right, which means there's
-still tons more lessons to learn here.
-
- * Use CLOCK_MONOTONIC as your reference time, always. It's what alsa, drm and
-   v4l use by default nowadays. But let userspace know which timestamps are
-   derived from different clock domains like your main system clock (provided
-   by the kernel) or some independent hardware counter somewhere else. Clocks
-   will mismatch if you look close enough, but if performance measuring tools
-   have this information they can at least compensate. If your userspace can
-   get at the raw values of some clocks (e.g. through in-command-stream
-   performance counter sampling instructions) consider exposing those also.
-
- * Use __s64 seconds plus __u64 nanoseconds to specify time. It's not the most
-   convenient time specification, but it's mostly the standard.
-
- * Check that input time values are normalized and reject them if not. Note
-   that the kernel native struct ktime has a signed integer for both seconds
-   and nanoseconds, so beware here.
-
- * For timeouts, use absolute times. If you're a good fellow and made your
-   ioctl restartable relative timeouts tend to be too coarse and can
-   indefinitely extend your wait time due to rounding on each restart.
-   Especially if your reference clock is something really slow like the display
-   frame counter. With a spec lawyer hat on this isn't a bug since timeouts can
-   always be extended - but users will surely hate you if their neat animations
-   starts to stutter due to this.
-
- * Consider ditching any synchronous wait ioctls with timeouts and just deliver
-   an asynchronous event on a pollable file descriptor. It fits much better
-   into event driven applications' main loop.
-
- * Have testcases for corner-cases, especially whether the return values for
-   already-completed events, successful waits and timed-out waits are all sane
-   and suiting to your needs.
-
-
-Leaking Resources, Not
-----------------------
-
-A full-blown drm driver essentially implements a little OS, but specialized to
-the given GPU platforms. This means a driver needs to expose tons of handles
-for different objects and other resources to userspace. Doing that right
-entails its own little set of pitfalls:
-
- * Always attach the lifetime of your dynamically created resources to the
-   lifetime of a file descriptor. Consider using a 1:1 mapping if your resource
-   needs to be shared across processes -  fd-passing over unix domain sockets
-   also simplifies lifetime management for userspace.
-
- * Always have O_CLOEXEC support.
-
- * Ensure that you have sufficient insulation between different clients. By
-   default pick a private per-fd namespace which forces any sharing to be done
-   explicitly. Only go with a more global per-device namespace if the objects
-   are truly device-unique. One counterexample in the drm modeset interfaces is
-   that the per-device modeset objects like connectors share a namespace with
-   framebuffer objects, which mostly are not shared at all. A separate
-   namespace, private by default, for framebuffers would have been more
-   suitable.
-
- * Think about uniqueness requirements for userspace handles. E.g. for most drm
-   drivers it's a userspace bug to submit the same object twice in the same
-   command submission ioctl. But then if objects are shareable userspace needs
-   to know whether it has seen an imported object from a different process
-   already or not. I haven't tried this myself yet due to lack of a new class
-   of objects, but consider using inode numbers on your shared file descriptors
-   as unique identifiers - it's how real files are told apart, too.
-   Unfortunately this requires a full-blown virtual filesystem in the kernel.
-
-
-Last, but not Least
--------------------
-
-Not every problem needs a new ioctl:
-
- * Think hard whether you really want a driver-private interface. Of course
-   it's much quicker to push a driver-private interface than engaging in
-   lengthy discussions for a more generic solution. And occasionally doing a
-   private interface to spearhead a new concept is what's required. But in the
-   end, once the generic interface comes around you'll end up maintainer two
-   interfaces. Indefinitely.
-
- * Consider other interfaces than ioctls. A sysfs attribute is much better for
-   per-device settings, or for child objects with fairly static lifetimes (like
-   output connectors in drm with all the detection override attributes). Or
-   maybe only your testsuite needs this interface, and then debugfs with its
-   disclaimer of not having a stable ABI would be better.
-
-Finally, the name of the game is to get it right on the first attempt, since if
-your driver proves popular and your hardware platforms long-lived then you'll
-be stuck with a given ioctl essentially forever. You can try to deprecate
-horrible ioctls on newer iterations of your hardware, but generally it takes
-years to accomplish this. And then again years until the last user able to
-complain about regressions disappears, too.
diff --git a/Documentation/ioctl/cdrom.rst b/Documentation/ioctl/cdrom.rst
new file mode 100644
index 000000000000..3b4c0506de46
--- /dev/null
+++ b/Documentation/ioctl/cdrom.rst
@@ -0,0 +1,1233 @@
+============================
+Summary of CDROM ioctl calls
+============================
+
+- Edward A. Falk <efalk@google.com>
+
+November, 2004
+
+This document attempts to describe the ioctl(2) calls supported by
+the CDROM layer.  These are by-and-large implemented (as of Linux 2.6)
+in drivers/cdrom/cdrom.c and drivers/block/scsi_ioctl.c
+
+ioctl values are listed in <linux/cdrom.h>.  As of this writing, they
+are as follows:
+
+	======================	===============================================
+	CDROMPAUSE		Pause Audio Operation
+	CDROMRESUME		Resume paused Audio Operation
+	CDROMPLAYMSF		Play Audio MSF (struct cdrom_msf)
+	CDROMPLAYTRKIND		Play Audio Track/index (struct cdrom_ti)
+	CDROMREADTOCHDR		Read TOC header (struct cdrom_tochdr)
+	CDROMREADTOCENTRY	Read TOC entry (struct cdrom_tocentry)
+	CDROMSTOP		Stop the cdrom drive
+	CDROMSTART		Start the cdrom drive
+	CDROMEJECT		Ejects the cdrom media
+	CDROMVOLCTRL		Control output volume (struct cdrom_volctrl)
+	CDROMSUBCHNL		Read subchannel data (struct cdrom_subchnl)
+	CDROMREADMODE2		Read CDROM mode 2 data (2336 Bytes)
+				(struct cdrom_read)
+	CDROMREADMODE1		Read CDROM mode 1 data (2048 Bytes)
+				(struct cdrom_read)
+	CDROMREADAUDIO		(struct cdrom_read_audio)
+	CDROMEJECT_SW		enable(1)/disable(0) auto-ejecting
+	CDROMMULTISESSION	Obtain the start-of-last-session
+				address of multi session disks
+				(struct cdrom_multisession)
+	CDROM_GET_MCN		Obtain the "Universal Product Code"
+				if available (struct cdrom_mcn)
+	CDROM_GET_UPC		Deprecated, use CDROM_GET_MCN instead.
+	CDROMRESET		hard-reset the drive
+	CDROMVOLREAD		Get the drive's volume setting
+				(struct cdrom_volctrl)
+	CDROMREADRAW		read data in raw mode (2352 Bytes)
+				(struct cdrom_read)
+	CDROMREADCOOKED		read data in cooked mode
+	CDROMSEEK		seek msf address
+	CDROMPLAYBLK		scsi-cd only, (struct cdrom_blk)
+	CDROMREADALL		read all 2646 bytes
+	CDROMGETSPINDOWN	return 4-bit spindown value
+	CDROMSETSPINDOWN	set 4-bit spindown value
+	CDROMCLOSETRAY		pendant of CDROMEJECT
+	CDROM_SET_OPTIONS	Set behavior options
+	CDROM_CLEAR_OPTIONS	Clear behavior options
+	CDROM_SELECT_SPEED	Set the CD-ROM speed
+	CDROM_SELECT_DISC	Select disc (for juke-boxes)
+	CDROM_MEDIA_CHANGED	Check is media changed
+	CDROM_DRIVE_STATUS	Get tray position, etc.
+	CDROM_DISC_STATUS	Get disc type, etc.
+	CDROM_CHANGER_NSLOTS	Get number of slots
+	CDROM_LOCKDOOR		lock or unlock door
+	CDROM_DEBUG		Turn debug messages on/off
+	CDROM_GET_CAPABILITY	get capabilities
+	CDROMAUDIOBUFSIZ	set the audio buffer size
+	DVD_READ_STRUCT		Read structure
+	DVD_WRITE_STRUCT	Write structure
+	DVD_AUTH		Authentication
+	CDROM_SEND_PACKET	send a packet to the drive
+	CDROM_NEXT_WRITABLE	get next writable block
+	CDROM_LAST_WRITTEN	get last block written on disc
+	======================	===============================================
+
+
+The information that follows was determined from reading kernel source
+code.  It is likely that some corrections will be made over time.
+
+------------------------------------------------------------------------------
+
+General:
+
+	Unless otherwise specified, all ioctl calls return 0 on success
+	and -1 with errno set to an appropriate value on error.  (Some
+	ioctls return non-negative data values.)
+
+	Unless otherwise specified, all ioctl calls return -1 and set
+	errno to EFAULT on a failed attempt to copy data to or from user
+	address space.
+
+	Individual drivers may return error codes not listed here.
+
+	Unless otherwise specified, all data structures and constants
+	are defined in <linux/cdrom.h>
+
+------------------------------------------------------------------------------
+
+
+CDROMPAUSE
+	Pause Audio Operation
+
+
+	usage::
+
+	  ioctl(fd, CDROMPAUSE, 0);
+
+
+	inputs:
+		none
+
+
+	outputs:
+		none
+
+
+	error return:
+	  - ENOSYS	cd drive not audio-capable.
+
+
+CDROMRESUME
+	Resume paused Audio Operation
+
+
+	usage::
+
+	  ioctl(fd, CDROMRESUME, 0);
+
+
+	inputs:
+		none
+
+
+	outputs:
+		none
+
+
+	error return:
+	  - ENOSYS	cd drive not audio-capable.
+
+
+CDROMPLAYMSF
+	Play Audio MSF
+
+	(struct cdrom_msf)
+
+
+	usage::
+
+	  struct cdrom_msf msf;
+
+	  ioctl(fd, CDROMPLAYMSF, &msf);
+
+	inputs:
+		cdrom_msf structure, describing a segment of music to play
+
+
+	outputs:
+		none
+
+
+	error return:
+	  - ENOSYS	cd drive not audio-capable.
+
+	notes:
+		- MSF stands for minutes-seconds-frames
+		- LBA stands for logical block address
+		- Segment is described as start and end times, where each time
+		  is described as minutes:seconds:frames.
+		  A frame is 1/75 of a second.
+
+
+CDROMPLAYTRKIND
+	Play Audio Track/index
+
+	(struct cdrom_ti)
+
+
+	usage::
+
+	  struct cdrom_ti ti;
+
+	  ioctl(fd, CDROMPLAYTRKIND, &ti);
+
+	inputs:
+		cdrom_ti structure, describing a segment of music to play
+
+
+	outputs:
+		none
+
+
+	error return:
+	  - ENOSYS	cd drive not audio-capable.
+
+	notes:
+		- Segment is described as start and end times, where each time
+		  is described as a track and an index.
+
+
+
+CDROMREADTOCHDR
+	Read TOC header
+
+	(struct cdrom_tochdr)
+
+
+	usage::
+
+	  cdrom_tochdr header;
+
+	  ioctl(fd, CDROMREADTOCHDR, &header);
+
+	inputs:
+		cdrom_tochdr structure
+
+
+	outputs:
+		cdrom_tochdr structure
+
+
+	error return:
+	  - ENOSYS	cd drive not audio-capable.
+
+
+
+CDROMREADTOCENTRY
+	Read TOC entry
+
+	(struct cdrom_tocentry)
+
+
+	usage::
+
+	  struct cdrom_tocentry entry;
+
+	  ioctl(fd, CDROMREADTOCENTRY, &entry);
+
+	inputs:
+		cdrom_tocentry structure
+
+
+	outputs:
+		cdrom_tocentry structure
+
+
+	error return:
+	  - ENOSYS	cd drive not audio-capable.
+	  - EINVAL	entry.cdte_format not CDROM_MSF or CDROM_LBA
+	  - EINVAL	requested track out of bounds
+	  - EIO		I/O error reading TOC
+
+	notes:
+		- TOC stands for Table Of Contents
+		- MSF stands for minutes-seconds-frames
+		- LBA stands for logical block address
+
+
+
+CDROMSTOP
+	Stop the cdrom drive
+
+
+	usage::
+
+	  ioctl(fd, CDROMSTOP, 0);
+
+
+	inputs:
+		none
+
+
+	outputs:
+		none
+
+
+	error return:
+	  - ENOSYS	cd drive not audio-capable.
+
+	notes:
+	  - Exact interpretation of this ioctl depends on the device,
+	    but most seem to spin the drive down.
+
+
+CDROMSTART
+	Start the cdrom drive
+
+
+	usage::
+
+	  ioctl(fd, CDROMSTART, 0);
+
+
+	inputs:
+		none
+
+
+	outputs:
+		none
+
+
+	error return:
+	  - ENOSYS	cd drive not audio-capable.
+
+	notes:
+	  - Exact interpretation of this ioctl depends on the device,
+	    but most seem to spin the drive up and/or close the tray.
+	    Other devices ignore the ioctl completely.
+
+
+CDROMEJECT
+	- Ejects the cdrom media
+
+
+	usage::
+
+	  ioctl(fd, CDROMEJECT, 0);
+
+
+	inputs:
+		none
+
+
+	outputs:
+		none
+
+
+	error returns:
+	  - ENOSYS	cd drive not capable of ejecting
+	  - EBUSY	other processes are accessing drive, or door is locked
+
+	notes:
+		- See CDROM_LOCKDOOR, below.
+
+
+
+
+CDROMCLOSETRAY
+	pendant of CDROMEJECT
+
+
+	usage::
+
+	  ioctl(fd, CDROMCLOSETRAY, 0);
+
+
+	inputs:
+		none
+
+
+	outputs:
+		none
+
+
+	error returns:
+	  - ENOSYS	cd drive not capable of closing the tray
+	  - EBUSY	other processes are accessing drive, or door is locked
+
+	notes:
+		- See CDROM_LOCKDOOR, below.
+
+
+
+
+CDROMVOLCTRL
+	Control output volume (struct cdrom_volctrl)
+
+
+	usage::
+
+	  struct cdrom_volctrl volume;
+
+	  ioctl(fd, CDROMVOLCTRL, &volume);
+
+	inputs:
+		cdrom_volctrl structure containing volumes for up to 4
+		channels.
+
+	outputs:
+		none
+
+
+	error return:
+	  - ENOSYS	cd drive not audio-capable.
+
+
+
+CDROMVOLREAD
+	Get the drive's volume setting
+
+	(struct cdrom_volctrl)
+
+
+	usage::
+
+	  struct cdrom_volctrl volume;
+
+	  ioctl(fd, CDROMVOLREAD, &volume);
+
+	inputs:
+		none
+
+
+	outputs:
+		The current volume settings.
+
+
+	error return:
+	  - ENOSYS	cd drive not audio-capable.
+
+
+
+CDROMSUBCHNL
+	Read subchannel data
+
+	(struct cdrom_subchnl)
+
+
+	usage::
+
+	  struct cdrom_subchnl q;
+
+	  ioctl(fd, CDROMSUBCHNL, &q);
+
+	inputs:
+		cdrom_subchnl structure
+
+
+	outputs:
+		cdrom_subchnl structure
+
+
+	error return:
+	  - ENOSYS	cd drive not audio-capable.
+	  - EINVAL	format not CDROM_MSF or CDROM_LBA
+
+	notes:
+		- Format is converted to CDROM_MSF or CDROM_LBA
+		  as per user request on return
+
+
+
+CDROMREADRAW
+	read data in raw mode (2352 Bytes)
+
+	(struct cdrom_read)
+
+	usage::
+
+	  union {
+
+	    struct cdrom_msf msf;		/* input */
+	    char buffer[CD_FRAMESIZE_RAW];	/* return */
+	  } arg;
+	  ioctl(fd, CDROMREADRAW, &arg);
+
+	inputs:
+		cdrom_msf structure indicating an address to read.
+
+		Only the start values are significant.
+
+	outputs:
+		Data written to address provided by user.
+
+
+	error return:
+	  - EINVAL	address less than 0, or msf less than 0:2:0
+	  - ENOMEM	out of memory
+
+	notes:
+		- As of 2.6.8.1, comments in <linux/cdrom.h> indicate that this
+		  ioctl accepts a cdrom_read structure, but actual source code
+		  reads a cdrom_msf structure and writes a buffer of data to
+		  the same address.
+
+		- MSF values are converted to LBA values via this formula::
+
+		    lba = (((m * CD_SECS) + s) * CD_FRAMES + f) - CD_MSF_OFFSET;
+
+
+
+
+CDROMREADMODE1
+	Read CDROM mode 1 data (2048 Bytes)
+
+	(struct cdrom_read)
+
+	notes:
+		Identical to CDROMREADRAW except that block size is
+		CD_FRAMESIZE (2048) bytes
+
+
+
+CDROMREADMODE2
+	Read CDROM mode 2 data (2336 Bytes)
+
+	(struct cdrom_read)
+
+	notes:
+		Identical to CDROMREADRAW except that block size is
+		CD_FRAMESIZE_RAW0 (2336) bytes
+
+
+
+CDROMREADAUDIO
+	(struct cdrom_read_audio)
+
+	usage::
+
+	  struct cdrom_read_audio ra;
+
+	  ioctl(fd, CDROMREADAUDIO, &ra);
+
+	inputs:
+		cdrom_read_audio structure containing read start
+		point and length
+
+	outputs:
+		audio data, returned to buffer indicated by ra
+
+
+	error return:
+	  - EINVAL	format not CDROM_MSF or CDROM_LBA
+	  - EINVAL	nframes not in range [1 75]
+	  - ENXIO	drive has no queue (probably means invalid fd)
+	  - ENOMEM	out of memory
+
+
+CDROMEJECT_SW
+	enable(1)/disable(0) auto-ejecting
+
+
+	usage::
+
+	  int val;
+
+	  ioctl(fd, CDROMEJECT_SW, val);
+
+	inputs:
+		Flag specifying auto-eject flag.
+
+
+	outputs:
+		none
+
+
+	error return:
+	  - ENOSYS	Drive is not capable of ejecting.
+	  - EBUSY	Door is locked
+
+
+
+
+CDROMMULTISESSION
+	Obtain the start-of-last-session address of multi session disks
+
+	(struct cdrom_multisession)
+
+	usage::
+
+	  struct cdrom_multisession ms_info;
+
+	  ioctl(fd, CDROMMULTISESSION, &ms_info);
+
+	inputs:
+		cdrom_multisession structure containing desired
+
+	  format.
+
+	outputs:
+		cdrom_multisession structure is filled with last_session
+		information.
+
+	error return:
+	  - EINVAL	format not CDROM_MSF or CDROM_LBA
+
+
+CDROM_GET_MCN
+	Obtain the "Universal Product Code"
+	if available
+
+	(struct cdrom_mcn)
+
+
+	usage::
+
+	  struct cdrom_mcn mcn;
+
+	  ioctl(fd, CDROM_GET_MCN, &mcn);
+
+	inputs:
+		none
+
+
+	outputs:
+		Universal Product Code
+
+
+	error return:
+	  - ENOSYS	Drive is not capable of reading MCN data.
+
+	notes:
+		- Source code comments state::
+
+		    The following function is implemented, although very few
+		    audio discs give Universal Product Code information, which
+		    should just be the Medium Catalog Number on the box.  Note,
+		    that the way the code is written on the CD is /not/ uniform
+		    across all discs!
+
+
+
+
+CDROM_GET_UPC
+	CDROM_GET_MCN  (deprecated)
+
+
+	Not implemented, as of 2.6.8.1
+
+
+
+CDROMRESET
+	hard-reset the drive
+
+
+	usage::
+
+	  ioctl(fd, CDROMRESET, 0);
+
+
+	inputs:
+		none
+
+
+	outputs:
+		none
+
+
+	error return:
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - ENOSYS	Drive is not capable of resetting.
+
+
+
+
+CDROMREADCOOKED
+	read data in cooked mode
+
+
+	usage::
+
+	  u8 buffer[CD_FRAMESIZE]
+
+	  ioctl(fd, CDROMREADCOOKED, buffer);
+
+	inputs:
+		none
+
+
+	outputs:
+		2048 bytes of data, "cooked" mode.
+
+
+	notes:
+		Not implemented on all drives.
+
+
+
+
+
+CDROMREADALL
+	read all 2646 bytes
+
+
+	Same as CDROMREADCOOKED, but reads 2646 bytes.
+
+
+
+CDROMSEEK
+	seek msf address
+
+
+	usage::
+
+	  struct cdrom_msf msf;
+
+	  ioctl(fd, CDROMSEEK, &msf);
+
+	inputs:
+		MSF address to seek to.
+
+
+	outputs:
+		none
+
+
+
+
+CDROMPLAYBLK
+	scsi-cd only
+
+	(struct cdrom_blk)
+
+
+	usage::
+
+	  struct cdrom_blk blk;
+
+	  ioctl(fd, CDROMPLAYBLK, &blk);
+
+	inputs:
+		Region to play
+
+
+	outputs:
+		none
+
+
+
+
+CDROMGETSPINDOWN
+	usage::
+
+	  char spindown;
+
+	  ioctl(fd, CDROMGETSPINDOWN, &spindown);
+
+	inputs:
+		none
+
+
+	outputs:
+		The value of the current 4-bit spindown value.
+
+
+
+
+
+CDROMSETSPINDOWN
+	usage::
+
+	  char spindown
+
+	  ioctl(fd, CDROMSETSPINDOWN, &spindown);
+
+	inputs:
+		4-bit value used to control spindown (TODO: more detail here)
+
+
+	outputs:
+		none
+
+
+
+
+
+
+CDROM_SET_OPTIONS
+	Set behavior options
+
+
+	usage::
+
+	  int options;
+
+	  ioctl(fd, CDROM_SET_OPTIONS, options);
+
+	inputs:
+		New values for drive options.  The logical 'or' of:
+
+	    ==============      ==================================
+	    CDO_AUTO_CLOSE	close tray on first open(2)
+	    CDO_AUTO_EJECT	open tray on last release
+	    CDO_USE_FFLAGS	use O_NONBLOCK information on open
+	    CDO_LOCK		lock tray on open files
+	    CDO_CHECK_TYPE	check type on open for data
+	    ==============      ==================================
+
+	outputs:
+		Returns the resulting options settings in the
+		ioctl return value.  Returns -1 on error.
+
+	error return:
+	  - ENOSYS	selected option(s) not supported by drive.
+
+
+
+
+CDROM_CLEAR_OPTIONS
+	Clear behavior options
+
+
+	Same as CDROM_SET_OPTIONS, except that selected options are
+	turned off.
+
+
+
+CDROM_SELECT_SPEED
+	Set the CD-ROM speed
+
+
+	usage::
+
+	  int speed;
+
+	  ioctl(fd, CDROM_SELECT_SPEED, speed);
+
+	inputs:
+		New drive speed.
+
+
+	outputs:
+		none
+
+
+	error return:
+	  - ENOSYS	speed selection not supported by drive.
+
+
+
+CDROM_SELECT_DISC
+	Select disc (for juke-boxes)
+
+
+	usage::
+
+	  int disk;
+
+	  ioctl(fd, CDROM_SELECT_DISC, disk);
+
+	inputs:
+		Disk to load into drive.
+
+
+	outputs:
+		none
+
+
+	error return:
+	  - EINVAL	Disk number beyond capacity of drive
+
+
+
+CDROM_MEDIA_CHANGED
+	Check is media changed
+
+
+	usage::
+
+	  int slot;
+
+	  ioctl(fd, CDROM_MEDIA_CHANGED, slot);
+
+	inputs:
+		Slot number to be tested, always zero except for jukeboxes.
+
+		May also be special values CDSL_NONE or CDSL_CURRENT
+
+	outputs:
+		Ioctl return value is 0 or 1 depending on whether the media
+
+	  has been changed, or -1 on error.
+
+	error returns:
+	  - ENOSYS	Drive can't detect media change
+	  - EINVAL	Slot number beyond capacity of drive
+	  - ENOMEM	Out of memory
+
+
+
+CDROM_DRIVE_STATUS
+	Get tray position, etc.
+
+
+	usage::
+
+	  int slot;
+
+	  ioctl(fd, CDROM_DRIVE_STATUS, slot);
+
+	inputs:
+		Slot number to be tested, always zero except for jukeboxes.
+
+		May also be special values CDSL_NONE or CDSL_CURRENT
+
+	outputs:
+		Ioctl return value will be one of the following values
+
+	  from <linux/cdrom.h>:
+
+	    =================== ==========================
+	    CDS_NO_INFO		Information not available.
+	    CDS_NO_DISC
+	    CDS_TRAY_OPEN
+	    CDS_DRIVE_NOT_READY
+	    CDS_DISC_OK
+	    -1			error
+	    =================== ==========================
+
+	error returns:
+	  - ENOSYS	Drive can't detect drive status
+	  - EINVAL	Slot number beyond capacity of drive
+	  - ENOMEM	Out of memory
+
+
+
+
+CDROM_DISC_STATUS
+	Get disc type, etc.
+
+
+	usage::
+
+	  ioctl(fd, CDROM_DISC_STATUS, 0);
+
+
+	inputs:
+		none
+
+
+	outputs:
+		Ioctl return value will be one of the following values
+
+	  from <linux/cdrom.h>:
+
+	    - CDS_NO_INFO
+	    - CDS_AUDIO
+	    - CDS_MIXED
+	    - CDS_XA_2_2
+	    - CDS_XA_2_1
+	    - CDS_DATA_1
+
+	error returns:
+		none at present
+
+	notes:
+	    - Source code comments state::
+
+
+		Ok, this is where problems start.  The current interface for
+		the CDROM_DISC_STATUS ioctl is flawed.  It makes the false
+		assumption that CDs are all CDS_DATA_1 or all CDS_AUDIO, etc.
+		Unfortunately, while this is often the case, it is also
+		very common for CDs to have some tracks with data, and some
+		tracks with audio.	Just because I feel like it, I declare
+		the following to be the best way to cope.  If the CD has
+		ANY data tracks on it, it will be returned as a data CD.
+		If it has any XA tracks, I will return it as that.	Now I
+		could simplify this interface by combining these returns with
+		the above, but this more clearly demonstrates the problem
+		with the current interface.  Too bad this wasn't designed
+		to use bitmasks...	       -Erik
+
+		Well, now we have the option CDS_MIXED: a mixed-type CD.
+		User level programmers might feel the ioctl is not very
+		useful.
+				---david
+
+
+
+
+CDROM_CHANGER_NSLOTS
+	Get number of slots
+
+
+	usage::
+
+	  ioctl(fd, CDROM_CHANGER_NSLOTS, 0);
+
+
+	inputs:
+		none
+
+
+	outputs:
+		The ioctl return value will be the number of slots in a
+		CD changer.  Typically 1 for non-multi-disk devices.
+
+	error returns:
+		none
+
+
+
+CDROM_LOCKDOOR
+	lock or unlock door
+
+
+	usage::
+
+	  int lock;
+
+	  ioctl(fd, CDROM_LOCKDOOR, lock);
+
+	inputs:
+		Door lock flag, 1=lock, 0=unlock
+
+
+	outputs:
+		none
+
+
+	error returns:
+	  - EDRIVE_CANT_DO_THIS
+
+				Door lock function not supported.
+	  - EBUSY
+
+				Attempt to unlock when multiple users
+				have the drive open and not CAP_SYS_ADMIN
+
+	notes:
+		As of 2.6.8.1, the lock flag is a global lock, meaning that
+		all CD drives will be locked or unlocked together.  This is
+		probably a bug.
+
+		The EDRIVE_CANT_DO_THIS value is defined in <linux/cdrom.h>
+		and is currently (2.6.8.1) the same as EOPNOTSUPP
+
+
+
+CDROM_DEBUG
+	Turn debug messages on/off
+
+
+	usage::
+
+	  int debug;
+
+	  ioctl(fd, CDROM_DEBUG, debug);
+
+	inputs:
+		Cdrom debug flag, 0=disable, 1=enable
+
+
+	outputs:
+		The ioctl return value will be the new debug flag.
+
+
+	error return:
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+
+
+
+CDROM_GET_CAPABILITY
+	get capabilities
+
+
+	usage::
+
+	  ioctl(fd, CDROM_GET_CAPABILITY, 0);
+
+
+	inputs:
+		none
+
+
+	outputs:
+		The ioctl return value is the current device capability
+		flags.  See CDC_CLOSE_TRAY, CDC_OPEN_TRAY, etc.
+
+
+
+CDROMAUDIOBUFSIZ
+	set the audio buffer size
+
+
+	usage::
+
+	  int arg;
+
+	  ioctl(fd, CDROMAUDIOBUFSIZ, val);
+
+	inputs:
+		New audio buffer size
+
+
+	outputs:
+		The ioctl return value is the new audio buffer size, or -1
+		on error.
+
+	error return:
+	  - ENOSYS	Not supported by this driver.
+
+	notes:
+		Not supported by all drivers.
+
+
+
+
+DVD_READ_STRUCT			Read structure
+
+	usage::
+
+	  dvd_struct s;
+
+	  ioctl(fd, DVD_READ_STRUCT, &s);
+
+	inputs:
+		dvd_struct structure, containing:
+
+	    =================== ==========================================
+	    type		specifies the information desired, one of
+				DVD_STRUCT_PHYSICAL, DVD_STRUCT_COPYRIGHT,
+				DVD_STRUCT_DISCKEY, DVD_STRUCT_BCA,
+				DVD_STRUCT_MANUFACT
+	    physical.layer_num	desired layer, indexed from 0
+	    copyright.layer_num	desired layer, indexed from 0
+	    disckey.agid
+	    =================== ==========================================
+
+	outputs:
+		dvd_struct structure, containing:
+
+	    =================== ================================
+	    physical		for type == DVD_STRUCT_PHYSICAL
+	    copyright		for type == DVD_STRUCT_COPYRIGHT
+	    disckey.value	for type == DVD_STRUCT_DISCKEY
+	    bca.{len,value}	for type == DVD_STRUCT_BCA
+	    manufact.{len,valu}	for type == DVD_STRUCT_MANUFACT
+	    =================== ================================
+
+	error returns:
+	  - EINVAL	physical.layer_num exceeds number of layers
+	  - EIO		Received invalid response from drive
+
+
+
+DVD_WRITE_STRUCT		Write structure
+
+	Not implemented, as of 2.6.8.1
+
+
+
+DVD_AUTH			Authentication
+
+	usage::
+
+	  dvd_authinfo ai;
+
+	  ioctl(fd, DVD_AUTH, &ai);
+
+	inputs:
+		dvd_authinfo structure.  See <linux/cdrom.h>
+
+
+	outputs:
+		dvd_authinfo structure.
+
+
+	error return:
+	  - ENOTTY	ai.type not recognized.
+
+
+
+CDROM_SEND_PACKET
+	send a packet to the drive
+
+
+	usage::
+
+	  struct cdrom_generic_command cgc;
+
+	  ioctl(fd, CDROM_SEND_PACKET, &cgc);
+
+	inputs:
+		cdrom_generic_command structure containing the packet to send.
+
+
+	outputs:
+		none
+
+	  cdrom_generic_command structure containing results.
+
+	error return:
+	  - EIO
+
+			command failed.
+	  - EPERM
+
+			Operation not permitted, either because a
+			write command was attempted on a drive which
+			is opened read-only, or because the command
+			requires CAP_SYS_RAWIO
+	  - EINVAL
+
+			cgc.data_direction not set
+
+
+
+CDROM_NEXT_WRITABLE
+	get next writable block
+
+
+	usage::
+
+	  long next;
+
+	  ioctl(fd, CDROM_NEXT_WRITABLE, &next);
+
+	inputs:
+		none
+
+
+	outputs:
+		The next writable block.
+
+
+	notes:
+		If the device does not support this ioctl directly, the
+
+	  ioctl will return CDROM_LAST_WRITTEN + 7.
+
+
+
+CDROM_LAST_WRITTEN
+	get last block written on disc
+
+
+	usage::
+
+	  long last;
+
+	  ioctl(fd, CDROM_LAST_WRITTEN, &last);
+
+	inputs:
+		none
+
+
+	outputs:
+		The last block written on disc
+
+
+	notes:
+		If the device does not support this ioctl directly, the
+		result is derived from the disc's table of contents.  If the
+		table of contents can't be read, this ioctl returns an
+		error.
diff --git a/Documentation/ioctl/cdrom.txt b/Documentation/ioctl/cdrom.txt
deleted file mode 100644
index a4d62a9d6771..000000000000
--- a/Documentation/ioctl/cdrom.txt
+++ /dev/null
@@ -1,967 +0,0 @@
-		Summary of CDROM ioctl calls.
-		============================
-
-		Edward A. Falk <efalk@google.com>
-
-		November, 2004
-
-This document attempts to describe the ioctl(2) calls supported by
-the CDROM layer.  These are by-and-large implemented (as of Linux 2.6)
-in drivers/cdrom/cdrom.c and drivers/block/scsi_ioctl.c
-
-ioctl values are listed in <linux/cdrom.h>.  As of this writing, they
-are as follows:
-
-	CDROMPAUSE		Pause Audio Operation
-	CDROMRESUME		Resume paused Audio Operation
-	CDROMPLAYMSF		Play Audio MSF (struct cdrom_msf)
-	CDROMPLAYTRKIND		Play Audio Track/index (struct cdrom_ti)
-	CDROMREADTOCHDR		Read TOC header (struct cdrom_tochdr)
-	CDROMREADTOCENTRY	Read TOC entry (struct cdrom_tocentry)
-	CDROMSTOP		Stop the cdrom drive
-	CDROMSTART		Start the cdrom drive
-	CDROMEJECT		Ejects the cdrom media
-	CDROMVOLCTRL		Control output volume (struct cdrom_volctrl)
-	CDROMSUBCHNL		Read subchannel data (struct cdrom_subchnl)
-	CDROMREADMODE2		Read CDROM mode 2 data (2336 Bytes)
-					   (struct cdrom_read)
-	CDROMREADMODE1		Read CDROM mode 1 data (2048 Bytes)
-					   (struct cdrom_read)
-	CDROMREADAUDIO		(struct cdrom_read_audio)
-	CDROMEJECT_SW		enable(1)/disable(0) auto-ejecting
-	CDROMMULTISESSION	Obtain the start-of-last-session
-				  address of multi session disks
-				  (struct cdrom_multisession)
-	CDROM_GET_MCN		Obtain the "Universal Product Code"
-				   if available (struct cdrom_mcn)
-	CDROM_GET_UPC		Deprecated, use CDROM_GET_MCN instead.
-	CDROMRESET		hard-reset the drive
-	CDROMVOLREAD		Get the drive's volume setting
-					  (struct cdrom_volctrl)
-	CDROMREADRAW		read data in raw mode (2352 Bytes)
-					   (struct cdrom_read)
-	CDROMREADCOOKED		read data in cooked mode
-	CDROMSEEK		seek msf address
-	CDROMPLAYBLK		scsi-cd only, (struct cdrom_blk)
-	CDROMREADALL		read all 2646 bytes
-	CDROMGETSPINDOWN	return 4-bit spindown value
-	CDROMSETSPINDOWN	set 4-bit spindown value
-	CDROMCLOSETRAY		pendant of CDROMEJECT
-	CDROM_SET_OPTIONS	Set behavior options
-	CDROM_CLEAR_OPTIONS	Clear behavior options
-	CDROM_SELECT_SPEED	Set the CD-ROM speed
-	CDROM_SELECT_DISC	Select disc (for juke-boxes)
-	CDROM_MEDIA_CHANGED	Check is media changed
-	CDROM_DRIVE_STATUS	Get tray position, etc.
-	CDROM_DISC_STATUS	Get disc type, etc.
-	CDROM_CHANGER_NSLOTS	Get number of slots
-	CDROM_LOCKDOOR		lock or unlock door
-	CDROM_DEBUG		Turn debug messages on/off
-	CDROM_GET_CAPABILITY	get capabilities
-	CDROMAUDIOBUFSIZ	set the audio buffer size
-	DVD_READ_STRUCT		Read structure
-	DVD_WRITE_STRUCT	Write structure
-	DVD_AUTH		Authentication
-	CDROM_SEND_PACKET	send a packet to the drive
-	CDROM_NEXT_WRITABLE	get next writable block
-	CDROM_LAST_WRITTEN	get last block written on disc
-
-
-The information that follows was determined from reading kernel source
-code.  It is likely that some corrections will be made over time.
-
-
-
-
-
-
-
-General:
-
-	Unless otherwise specified, all ioctl calls return 0 on success
-	and -1 with errno set to an appropriate value on error.  (Some
-	ioctls return non-negative data values.)
-
-	Unless otherwise specified, all ioctl calls return -1 and set
-	errno to EFAULT on a failed attempt to copy data to or from user
-	address space.
-
-	Individual drivers may return error codes not listed here.
-
-	Unless otherwise specified, all data structures and constants
-	are defined in <linux/cdrom.h>
-
-
-
-
-CDROMPAUSE			Pause Audio Operation
-
-	usage:
-
-	  ioctl(fd, CDROMPAUSE, 0);
-
-	inputs:		none
-
-	outputs:	none
-
-	error return:
-	  ENOSYS	cd drive not audio-capable.
-
-
-CDROMRESUME			Resume paused Audio Operation
-
-	usage:
-
-	  ioctl(fd, CDROMRESUME, 0);
-
-	inputs:		none
-
-	outputs:	none
-
-	error return:
-	  ENOSYS	cd drive not audio-capable.
-
-
-CDROMPLAYMSF			Play Audio MSF (struct cdrom_msf)
-
-	usage:
-
-	  struct cdrom_msf msf;
-	  ioctl(fd, CDROMPLAYMSF, &msf);
-
-	inputs:
-	  cdrom_msf structure, describing a segment of music to play
-
-	outputs:	none
-
-	error return:
-	  ENOSYS	cd drive not audio-capable.
-
-	notes:
-	  MSF stands for minutes-seconds-frames
-	  LBA stands for logical block address
-
-	  Segment is described as start and end times, where each time
-	  is described as minutes:seconds:frames.  A frame is 1/75 of
-	  a second.
-
-
-CDROMPLAYTRKIND			Play Audio Track/index (struct cdrom_ti)
-
-	usage:
-
-	  struct cdrom_ti ti;
-	  ioctl(fd, CDROMPLAYTRKIND, &ti);
-
-	inputs:
-	  cdrom_ti structure, describing a segment of music to play
-
-	outputs:	none
-
-	error return:
-	  ENOSYS	cd drive not audio-capable.
-
-	notes:
-	  Segment is described as start and end times, where each time
-	  is described as a track and an index.
-
-
-
-CDROMREADTOCHDR			Read TOC header (struct cdrom_tochdr)
-
-	usage:
-
-	  cdrom_tochdr header;
-	  ioctl(fd, CDROMREADTOCHDR, &header);
-
-	inputs:
-	  cdrom_tochdr structure
-
-	outputs:
-	  cdrom_tochdr structure
-
-	error return:
-	  ENOSYS	cd drive not audio-capable.
-
-
-
-CDROMREADTOCENTRY		Read TOC entry (struct cdrom_tocentry)
-
-	usage:
-
-	  struct cdrom_tocentry entry;
-	  ioctl(fd, CDROMREADTOCENTRY, &entry);
-
-	inputs:
-	  cdrom_tocentry structure
-
-	outputs:
-	  cdrom_tocentry structure
-
-	error return:
-	  ENOSYS	cd drive not audio-capable.
-	  EINVAL	entry.cdte_format not CDROM_MSF or CDROM_LBA
-	  EINVAL	requested track out of bounds
-	  EIO		I/O error reading TOC
-
-	notes:
-	  TOC stands for Table Of Contents
-	  MSF stands for minutes-seconds-frames
-	  LBA stands for logical block address
-
-
-
-CDROMSTOP			Stop the cdrom drive
-
-	usage:
-
-	  ioctl(fd, CDROMSTOP, 0);
-
-	inputs:		none
-
-	outputs:	none
-
-	error return:
-	  ENOSYS	cd drive not audio-capable.
-
-	notes:
-	  Exact interpretation of this ioctl depends on the device,
-	  but most seem to spin the drive down.
-
-
-CDROMSTART			Start the cdrom drive
-
-	usage:
-
-	  ioctl(fd, CDROMSTART, 0);
-
-	inputs:		none
-
-	outputs:	none
-
-	error return:
-	  ENOSYS	cd drive not audio-capable.
-
-	notes:
-	  Exact interpretation of this ioctl depends on the device,
-	  but most seem to spin the drive up and/or close the tray.
-	  Other devices ignore the ioctl completely.
-
-
-CDROMEJECT			Ejects the cdrom media
-
-	usage:
-
-	  ioctl(fd, CDROMEJECT, 0);
-
-	inputs:		none
-
-	outputs:	none
-
-	error returns:
-	  ENOSYS	cd drive not capable of ejecting
-	  EBUSY		other processes are accessing drive, or door is locked
-
-	notes:
-	  See CDROM_LOCKDOOR, below.
-
-
-
-CDROMCLOSETRAY			pendant of CDROMEJECT
-
-	usage:
-
-	  ioctl(fd, CDROMCLOSETRAY, 0);
-
-	inputs:		none
-
-	outputs:	none
-
-	error returns:
-	  ENOSYS	cd drive not capable of closing the tray
-	  EBUSY		other processes are accessing drive, or door is locked
-
-	notes:
-	  See CDROM_LOCKDOOR, below.
-
-
-
-CDROMVOLCTRL			Control output volume (struct cdrom_volctrl)
-
-	usage:
-
-	  struct cdrom_volctrl volume;
-	  ioctl(fd, CDROMVOLCTRL, &volume);
-
-	inputs:
-	  cdrom_volctrl structure containing volumes for up to 4
-	  channels.
-
-	outputs:	none
-
-	error return:
-	  ENOSYS	cd drive not audio-capable.
-
-
-
-CDROMVOLREAD			Get the drive's volume setting
-					  (struct cdrom_volctrl)
-
-	usage:
-
-	  struct cdrom_volctrl volume;
-	  ioctl(fd, CDROMVOLREAD, &volume);
-
-	inputs:		none
-
-	outputs:
-	  The current volume settings.
-
-	error return:
-	  ENOSYS	cd drive not audio-capable.
-
-
-
-CDROMSUBCHNL			Read subchannel data (struct cdrom_subchnl)
-
-	usage:
-
-	  struct cdrom_subchnl q;
-	  ioctl(fd, CDROMSUBCHNL, &q);
-
-	inputs:
-	  cdrom_subchnl structure
-
-	outputs:
-	  cdrom_subchnl structure
-
-	error return:
-	  ENOSYS	cd drive not audio-capable.
-	  EINVAL	format not CDROM_MSF or CDROM_LBA
-
-	notes:
-	  Format is converted to CDROM_MSF or CDROM_LBA
-	  as per user request on return
-
-
-
-CDROMREADRAW			read data in raw mode (2352 Bytes)
-					   (struct cdrom_read)
-
-	usage:
-
-	  union {
-	    struct cdrom_msf msf;		/* input */
-	    char buffer[CD_FRAMESIZE_RAW];	/* return */
-	  } arg;
-	  ioctl(fd, CDROMREADRAW, &arg);
-
-	inputs:
-	  cdrom_msf structure indicating an address to read.
-	  Only the start values are significant.
-
-	outputs:
-	  Data written to address provided by user.
-
-	error return:
-	  EINVAL	address less than 0, or msf less than 0:2:0
-	  ENOMEM	out of memory
-
-	notes:
-	  As of 2.6.8.1, comments in <linux/cdrom.h> indicate that this
-	  ioctl accepts a cdrom_read structure, but actual source code
-	  reads a cdrom_msf structure and writes a buffer of data to
-	  the same address.
-
-	  MSF values are converted to LBA values via this formula:
-
-	    lba = (((m * CD_SECS) + s) * CD_FRAMES + f) - CD_MSF_OFFSET;
-
-
-
-
-CDROMREADMODE1			Read CDROM mode 1 data (2048 Bytes)
-					   (struct cdrom_read)
-
-	notes:
-	  Identical to CDROMREADRAW except that block size is
-	  CD_FRAMESIZE (2048) bytes
-
-
-
-CDROMREADMODE2			Read CDROM mode 2 data (2336 Bytes)
-					   (struct cdrom_read)
-
-	notes:
-	  Identical to CDROMREADRAW except that block size is
-	  CD_FRAMESIZE_RAW0 (2336) bytes
-
-
-
-CDROMREADAUDIO			(struct cdrom_read_audio)
-
-	usage:
-
-	  struct cdrom_read_audio ra;
-	  ioctl(fd, CDROMREADAUDIO, &ra);
-
-	inputs:
-	  cdrom_read_audio structure containing read start
-	  point and length
-
-	outputs:
-	  audio data, returned to buffer indicated by ra
-
-	error return:
-	  EINVAL	format not CDROM_MSF or CDROM_LBA
-	  EINVAL	nframes not in range [1 75]
-	  ENXIO		drive has no queue (probably means invalid fd)
-	  ENOMEM	out of memory
-
-
-CDROMEJECT_SW			enable(1)/disable(0) auto-ejecting
-
-	usage:
-
-	  int val;
-	  ioctl(fd, CDROMEJECT_SW, val);
-
-	inputs:
-	  Flag specifying auto-eject flag.
-
-	outputs:	none
-
-	error return:
-	  ENOSYS	Drive is not capable of ejecting.
-	  EBUSY		Door is locked
-
-
-
-
-CDROMMULTISESSION		Obtain the start-of-last-session
-				  address of multi session disks
-				  (struct cdrom_multisession)
-	usage:
-
-	  struct cdrom_multisession ms_info;
-	  ioctl(fd, CDROMMULTISESSION, &ms_info);
-
-	inputs:
-	  cdrom_multisession structure containing desired
-	  format.
-
-	outputs:
-	  cdrom_multisession structure is filled with last_session
-	  information.
-
-	error return:
-	  EINVAL	format not CDROM_MSF or CDROM_LBA
-
-
-CDROM_GET_MCN			Obtain the "Universal Product Code"
-				   if available (struct cdrom_mcn)
-
-	usage:
-
-	  struct cdrom_mcn mcn;
-	  ioctl(fd, CDROM_GET_MCN, &mcn);
-
-	inputs:		none
-
-	outputs:
-	  Universal Product Code
-
-	error return:
-	  ENOSYS	Drive is not capable of reading MCN data.
-
-	notes:
-	  Source code comments state:
-
-	    The following function is implemented, although very few
-	    audio discs give Universal Product Code information, which
-	    should just be the Medium Catalog Number on the box.  Note,
-	    that the way the code is written on the CD is /not/ uniform
-	    across all discs!
-
-
-
-
-CDROM_GET_UPC			CDROM_GET_MCN  (deprecated)
-
-	Not implemented, as of 2.6.8.1
-
-
-
-CDROMRESET			hard-reset the drive
-
-	usage:
-
-	  ioctl(fd, CDROMRESET, 0);
-
-	inputs:		none
-
-	outputs:	none
-
-	error return:
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  ENOSYS	Drive is not capable of resetting.
-
-
-
-
-CDROMREADCOOKED			read data in cooked mode
-
-	usage:
-
-	  u8 buffer[CD_FRAMESIZE]
-	  ioctl(fd, CDROMREADCOOKED, buffer);
-
-	inputs:		none
-
-	outputs:
-	  2048 bytes of data, "cooked" mode.
-
-	notes:
-	  Not implemented on all drives.
-
-
-
-
-CDROMREADALL			read all 2646 bytes
-
-	Same as CDROMREADCOOKED, but reads 2646 bytes.
-
-
-
-CDROMSEEK			seek msf address
-
-	usage:
-
-	  struct cdrom_msf msf;
-	  ioctl(fd, CDROMSEEK, &msf);
-
-	inputs:
-	  MSF address to seek to.
-
-	outputs:	none
-
-
-
-CDROMPLAYBLK			scsi-cd only, (struct cdrom_blk)
-
-	usage:
-
-	  struct cdrom_blk blk;
-	  ioctl(fd, CDROMPLAYBLK, &blk);
-
-	inputs:
-	  Region to play
-
-	outputs:	none
-
-
-
-CDROMGETSPINDOWN
-
-	usage:
-
-	  char spindown;
-	  ioctl(fd, CDROMGETSPINDOWN, &spindown);
-
-	inputs:		none
-
-	outputs:
-	  The value of the current 4-bit spindown value.
-
-
-
-
-CDROMSETSPINDOWN
-
-	usage:
-
-	  char spindown
-	  ioctl(fd, CDROMSETSPINDOWN, &spindown);
-
-	inputs:
-	  4-bit value used to control spindown (TODO: more detail here)
-
-	outputs:	none
-
-
-
-
-
-CDROM_SET_OPTIONS		Set behavior options
-
-	usage:
-
-	  int options;
-	  ioctl(fd, CDROM_SET_OPTIONS, options);
-
-	inputs:
-	  New values for drive options.  The logical 'or' of:
-	    CDO_AUTO_CLOSE	close tray on first open(2)
-	    CDO_AUTO_EJECT	open tray on last release
-	    CDO_USE_FFLAGS	use O_NONBLOCK information on open
-	    CDO_LOCK		lock tray on open files
-	    CDO_CHECK_TYPE	check type on open for data
-
-	outputs:
-	  Returns the resulting options settings in the
-	  ioctl return value.  Returns -1 on error.
-
-	error return:
-	  ENOSYS	selected option(s) not supported by drive.
-
-
-
-
-CDROM_CLEAR_OPTIONS		Clear behavior options
-
-	Same as CDROM_SET_OPTIONS, except that selected options are
-	turned off.
-
-
-
-CDROM_SELECT_SPEED		Set the CD-ROM speed
-
-	usage:
-
-	  int speed;
-	  ioctl(fd, CDROM_SELECT_SPEED, speed);
-
-	inputs:
-	  New drive speed.
-
-	outputs:	none
-
-	error return:
-	  ENOSYS	speed selection not supported by drive.
-
-
-
-CDROM_SELECT_DISC		Select disc (for juke-boxes)
-
-	usage:
-
-	  int disk;
-	  ioctl(fd, CDROM_SELECT_DISC, disk);
-
-	inputs:
-	  Disk to load into drive.
-
-	outputs:	none
-
-	error return:
-	  EINVAL	Disk number beyond capacity of drive
-
-
-
-CDROM_MEDIA_CHANGED		Check is media changed
-
-	usage:
-
-	  int slot;
-	  ioctl(fd, CDROM_MEDIA_CHANGED, slot);
-
-	inputs:
-	  Slot number to be tested, always zero except for jukeboxes.
-	  May also be special values CDSL_NONE or CDSL_CURRENT
-
-	outputs:
-	  Ioctl return value is 0 or 1 depending on whether the media
-	  has been changed, or -1 on error.
-
-	error returns:
-	  ENOSYS	Drive can't detect media change
-	  EINVAL	Slot number beyond capacity of drive
-	  ENOMEM	Out of memory
-
-
-
-CDROM_DRIVE_STATUS		Get tray position, etc.
-
-	usage:
-
-	  int slot;
-	  ioctl(fd, CDROM_DRIVE_STATUS, slot);
-
-	inputs:
-	  Slot number to be tested, always zero except for jukeboxes.
-	  May also be special values CDSL_NONE or CDSL_CURRENT
-
-	outputs:
-	  Ioctl return value will be one of the following values
-	  from <linux/cdrom.h>:
-
-	    CDS_NO_INFO		Information not available.
-	    CDS_NO_DISC
-	    CDS_TRAY_OPEN
-	    CDS_DRIVE_NOT_READY
-	    CDS_DISC_OK
-	    -1			error
-
-	error returns:
-	  ENOSYS	Drive can't detect drive status
-	  EINVAL	Slot number beyond capacity of drive
-	  ENOMEM	Out of memory
-
-
-
-
-CDROM_DISC_STATUS		Get disc type, etc.
-
-	usage:
-
-	  ioctl(fd, CDROM_DISC_STATUS, 0);
-
-	inputs:		none
-
-	outputs:
-	  Ioctl return value will be one of the following values
-	  from <linux/cdrom.h>:
-	    CDS_NO_INFO
-	    CDS_AUDIO
-	    CDS_MIXED
-	    CDS_XA_2_2
-	    CDS_XA_2_1
-	    CDS_DATA_1
-
-	error returns:	none at present
-
-	notes:
-	  Source code comments state:
-
-	    Ok, this is where problems start.  The current interface for
-	    the CDROM_DISC_STATUS ioctl is flawed.  It makes the false
-	    assumption that CDs are all CDS_DATA_1 or all CDS_AUDIO, etc.
-	    Unfortunately, while this is often the case, it is also
-	    very common for CDs to have some tracks with data, and some
-	    tracks with audio.	Just because I feel like it, I declare
-	    the following to be the best way to cope.  If the CD has
-	    ANY data tracks on it, it will be returned as a data CD.
-	    If it has any XA tracks, I will return it as that.	Now I
-	    could simplify this interface by combining these returns with
-	    the above, but this more clearly demonstrates the problem
-	    with the current interface.  Too bad this wasn't designed
-	    to use bitmasks...	       -Erik
-
-	    Well, now we have the option CDS_MIXED: a mixed-type CD.
-	    User level programmers might feel the ioctl is not very
-	    useful.
-			---david
-
-
-
-
-CDROM_CHANGER_NSLOTS		Get number of slots
-
-	usage:
-
-	  ioctl(fd, CDROM_CHANGER_NSLOTS, 0);
-
-	inputs:		none
-
-	outputs:
-	  The ioctl return value will be the number of slots in a
-	  CD changer.  Typically 1 for non-multi-disk devices.
-
-	error returns:	none
-
-
-
-CDROM_LOCKDOOR			lock or unlock door
-
-	usage:
-
-	  int lock;
-	  ioctl(fd, CDROM_LOCKDOOR, lock);
-
-	inputs:
-	  Door lock flag, 1=lock, 0=unlock
-
-	outputs:	none
-
-	error returns:
-	  EDRIVE_CANT_DO_THIS	Door lock function not supported.
-	  EBUSY			Attempt to unlock when multiple users
-	  			have the drive open and not CAP_SYS_ADMIN
-
-	notes:
-	  As of 2.6.8.1, the lock flag is a global lock, meaning that
-	  all CD drives will be locked or unlocked together.  This is
-	  probably a bug.
-
-	  The EDRIVE_CANT_DO_THIS value is defined in <linux/cdrom.h>
-	  and is currently (2.6.8.1) the same as EOPNOTSUPP
-
-
-
-CDROM_DEBUG			Turn debug messages on/off
-
-	usage:
-
-	  int debug;
-	  ioctl(fd, CDROM_DEBUG, debug);
-
-	inputs:
-	  Cdrom debug flag, 0=disable, 1=enable
-
-	outputs:
-	  The ioctl return value will be the new debug flag.
-
-	error return:
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-
-
-
-CDROM_GET_CAPABILITY		get capabilities
-
-	usage:
-
-	  ioctl(fd, CDROM_GET_CAPABILITY, 0);
-
-	inputs:		none
-
-	outputs:
-	  The ioctl return value is the current device capability
-	  flags.  See CDC_CLOSE_TRAY, CDC_OPEN_TRAY, etc.
-
-
-
-CDROMAUDIOBUFSIZ		set the audio buffer size
-
-	usage:
-
-	  int arg;
-	  ioctl(fd, CDROMAUDIOBUFSIZ, val);
-
-	inputs:
-	  New audio buffer size
-
-	outputs:
-	  The ioctl return value is the new audio buffer size, or -1
-	  on error.
-
-	error return:
-	  ENOSYS	Not supported by this driver.
-
-	notes:
-	  Not supported by all drivers.
-
-
-
-DVD_READ_STRUCT			Read structure
-
-	usage:
-
-	  dvd_struct s;
-	  ioctl(fd, DVD_READ_STRUCT, &s);
-
-	inputs:
-	  dvd_struct structure, containing:
-	    type		specifies the information desired, one of
-	    			DVD_STRUCT_PHYSICAL, DVD_STRUCT_COPYRIGHT,
-				DVD_STRUCT_DISCKEY, DVD_STRUCT_BCA,
-				DVD_STRUCT_MANUFACT
-	    physical.layer_num	desired layer, indexed from 0
-	    copyright.layer_num	desired layer, indexed from 0
-	    disckey.agid
-
-	outputs:
-	  dvd_struct structure, containing:
-	    physical		for type == DVD_STRUCT_PHYSICAL
-	    copyright		for type == DVD_STRUCT_COPYRIGHT
-	    disckey.value	for type == DVD_STRUCT_DISCKEY
-	    bca.{len,value}	for type == DVD_STRUCT_BCA
-	    manufact.{len,valu}	for type == DVD_STRUCT_MANUFACT
-
-	error returns:
-	  EINVAL	physical.layer_num exceeds number of layers
-	  EIO		Received invalid response from drive
-
-
-
-DVD_WRITE_STRUCT		Write structure
-
-	Not implemented, as of 2.6.8.1
-
-
-
-DVD_AUTH			Authentication
-
-	usage:
-
-	  dvd_authinfo ai;
-	  ioctl(fd, DVD_AUTH, &ai);
-
-	inputs:
-	  dvd_authinfo structure.  See <linux/cdrom.h>
-
-	outputs:
-	  dvd_authinfo structure.
-
-	error return:
-	  ENOTTY	ai.type not recognized.
-
-
-
-CDROM_SEND_PACKET		send a packet to the drive
-
-	usage:
-
-	  struct cdrom_generic_command cgc;
-	  ioctl(fd, CDROM_SEND_PACKET, &cgc);
-
-	inputs:
-	  cdrom_generic_command structure containing the packet to send.
-
-	outputs:	none
-	  cdrom_generic_command structure containing results.
-
-	error return:
-	  EIO		command failed.
-	  EPERM		Operation not permitted, either because a
-			write command was attempted on a drive which
-			is opened read-only, or because the command
-			requires CAP_SYS_RAWIO
-	  EINVAL	cgc.data_direction not set
-
-
-
-CDROM_NEXT_WRITABLE		get next writable block
-
-	usage:
-
-	  long next;
-	  ioctl(fd, CDROM_NEXT_WRITABLE, &next);
-
-	inputs:		none
-
-	outputs:
-	  The next writable block.
-
-	notes:
-	  If the device does not support this ioctl directly, the
-	  ioctl will return CDROM_LAST_WRITTEN + 7.
-
-
-
-CDROM_LAST_WRITTEN		get last block written on disc
-
-	usage:
-
-	  long last;
-	  ioctl(fd, CDROM_LAST_WRITTEN, &last);
-
-	inputs:		none
-
-	outputs:
-	  The last block written on disc
-
-	notes:
-	  If the device does not support this ioctl directly, the
-	  result is derived from the disc's table of contents.  If the
-	  table of contents can't be read, this ioctl returns an
-	  error.
diff --git a/Documentation/ioctl/hdio.rst b/Documentation/ioctl/hdio.rst
new file mode 100644
index 000000000000..e822e3dff176
--- /dev/null
+++ b/Documentation/ioctl/hdio.rst
@@ -0,0 +1,1342 @@
+==============================
+Summary of `HDIO_` ioctl calls
+==============================
+
+- Edward A. Falk <efalk@google.com>
+
+November, 2004
+
+This document attempts to describe the ioctl(2) calls supported by
+the HD/IDE layer.  These are by-and-large implemented (as of Linux 2.6)
+in drivers/ide/ide.c and drivers/block/scsi_ioctl.c
+
+ioctl values are listed in <linux/hdreg.h>.  As of this writing, they
+are as follows:
+
+    ioctls that pass argument pointers to user space:
+
+	=======================	=======================================
+	HDIO_GETGEO		get device geometry
+	HDIO_GET_UNMASKINTR	get current unmask setting
+	HDIO_GET_MULTCOUNT	get current IDE blockmode setting
+	HDIO_GET_QDMA		get use-qdma flag
+	HDIO_SET_XFER		set transfer rate via proc
+	HDIO_OBSOLETE_IDENTITY	OBSOLETE, DO NOT USE
+	HDIO_GET_KEEPSETTINGS	get keep-settings-on-reset flag
+	HDIO_GET_32BIT		get current io_32bit setting
+	HDIO_GET_NOWERR		get ignore-write-error flag
+	HDIO_GET_DMA		get use-dma flag
+	HDIO_GET_NICE		get nice flags
+	HDIO_GET_IDENTITY	get IDE identification info
+	HDIO_GET_WCACHE		get write cache mode on|off
+	HDIO_GET_ACOUSTIC	get acoustic value
+	HDIO_GET_ADDRESS	get sector addressing mode
+	HDIO_GET_BUSSTATE	get the bus state of the hwif
+	HDIO_TRISTATE_HWIF	execute a channel tristate
+	HDIO_DRIVE_RESET	execute a device reset
+	HDIO_DRIVE_TASKFILE	execute raw taskfile
+	HDIO_DRIVE_TASK		execute task and special drive command
+	HDIO_DRIVE_CMD		execute a special drive command
+	HDIO_DRIVE_CMD_AEB	HDIO_DRIVE_TASK
+	=======================	=======================================
+
+    ioctls that pass non-pointer values:
+
+	=======================	=======================================
+	HDIO_SET_MULTCOUNT	change IDE blockmode
+	HDIO_SET_UNMASKINTR	permit other irqs during I/O
+	HDIO_SET_KEEPSETTINGS	keep ioctl settings on reset
+	HDIO_SET_32BIT		change io_32bit flags
+	HDIO_SET_NOWERR		change ignore-write-error flag
+	HDIO_SET_DMA		change use-dma flag
+	HDIO_SET_PIO_MODE	reconfig interface to new speed
+	HDIO_SCAN_HWIF		register and (re)scan interface
+	HDIO_SET_NICE		set nice flags
+	HDIO_UNREGISTER_HWIF	unregister interface
+	HDIO_SET_WCACHE		change write cache enable-disable
+	HDIO_SET_ACOUSTIC	change acoustic behavior
+	HDIO_SET_BUSSTATE	set the bus state of the hwif
+	HDIO_SET_QDMA		change use-qdma flag
+	HDIO_SET_ADDRESS	change lba addressing modes
+
+	HDIO_SET_IDE_SCSI	Set scsi emulation mode on/off
+	HDIO_SET_SCSI_IDE	not implemented yet
+	=======================	=======================================
+
+
+The information that follows was determined from reading kernel source
+code.  It is likely that some corrections will be made over time.
+
+------------------------------------------------------------------------------
+
+General:
+
+	Unless otherwise specified, all ioctl calls return 0 on success
+	and -1 with errno set to an appropriate value on error.
+
+	Unless otherwise specified, all ioctl calls return -1 and set
+	errno to EFAULT on a failed attempt to copy data to or from user
+	address space.
+
+	Unless otherwise specified, all data structures and constants
+	are defined in <linux/hdreg.h>
+
+------------------------------------------------------------------------------
+
+HDIO_GETGEO
+	get device geometry
+
+
+	usage::
+
+	  struct hd_geometry geom;
+
+	  ioctl(fd, HDIO_GETGEO, &geom);
+
+
+	inputs:
+		none
+
+
+
+	outputs:
+		hd_geometry structure containing:
+
+
+	    =========	==================================
+	    heads	number of heads
+	    sectors	number of sectors/track
+	    cylinders	number of cylinders, mod 65536
+	    start	starting sector of this partition.
+	    =========	==================================
+
+
+	error returns:
+	  - EINVAL
+
+			if the device is not a disk drive or floppy drive,
+			or if the user passes a null pointer
+
+
+	notes:
+		Not particularly useful with modern disk drives, whose geometry
+		is a polite fiction anyway.  Modern drives are addressed
+		purely by sector number nowadays (lba addressing), and the
+		drive geometry is an abstraction which is actually subject
+		to change.  Currently (as of Nov 2004), the geometry values
+		are the "bios" values -- presumably the values the drive had
+		when Linux first booted.
+
+		In addition, the cylinders field of the hd_geometry is an
+		unsigned short, meaning that on most architectures, this
+		ioctl will not return a meaningful value on drives with more
+		than 65535 tracks.
+
+		The start field is unsigned long, meaning that it will not
+		contain a meaningful value for disks over 219 Gb in size.
+
+
+
+
+HDIO_GET_UNMASKINTR
+	get current unmask setting
+
+
+	usage::
+
+	  long val;
+
+	  ioctl(fd, HDIO_GET_UNMASKINTR, &val);
+
+	inputs:
+		none
+
+
+
+	outputs:
+		The value of the drive's current unmask setting
+
+
+
+
+
+HDIO_SET_UNMASKINTR
+	permit other irqs during I/O
+
+
+	usage::
+
+	  unsigned long val;
+
+	  ioctl(fd, HDIO_SET_UNMASKINTR, val);
+
+	inputs:
+		New value for unmask flag
+
+
+
+	outputs:
+		none
+
+
+
+	error return:
+	  - EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - EINVAL	value out of range [0 1]
+	  - EBUSY	Controller busy
+
+
+
+
+HDIO_GET_MULTCOUNT
+	get current IDE blockmode setting
+
+
+	usage::
+
+	  long val;
+
+	  ioctl(fd, HDIO_GET_MULTCOUNT, &val);
+
+	inputs:
+		none
+
+
+
+	outputs:
+		The value of the current IDE block mode setting.  This
+		controls how many sectors the drive will transfer per
+		interrupt.
+
+
+
+HDIO_SET_MULTCOUNT
+	change IDE blockmode
+
+
+	usage::
+
+	  int val;
+
+	  ioctl(fd, HDIO_SET_MULTCOUNT, val);
+
+	inputs:
+		New value for IDE block mode setting.  This controls how many
+		sectors the drive will transfer per interrupt.
+
+	outputs:
+		none
+
+
+
+	error return:
+	  - EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - EINVAL	value out of range supported by disk.
+	  - EBUSY	Controller busy or blockmode already set.
+	  - EIO		Drive did not accept new block mode.
+
+	notes:
+	  Source code comments read::
+
+	    This is tightly woven into the driver->do_special cannot
+	    touch.  DON'T do it again until a total personality rewrite
+	    is committed.
+
+	  If blockmode has already been set, this ioctl will fail with
+	  -EBUSY
+
+
+
+HDIO_GET_QDMA
+	get use-qdma flag
+
+
+	Not implemented, as of 2.6.8.1
+
+
+
+HDIO_SET_XFER
+	set transfer rate via proc
+
+
+	Not implemented, as of 2.6.8.1
+
+
+
+HDIO_OBSOLETE_IDENTITY
+	OBSOLETE, DO NOT USE
+
+
+	Same as HDIO_GET_IDENTITY (see below), except that it only
+	returns the first 142 bytes of drive identity information.
+
+
+
+HDIO_GET_IDENTITY
+	get IDE identification info
+
+
+	usage::
+
+	  unsigned char identity[512];
+
+	  ioctl(fd, HDIO_GET_IDENTITY, identity);
+
+	inputs:
+		none
+
+
+
+	outputs:
+		ATA drive identity information.  For full description, see
+		the IDENTIFY DEVICE and IDENTIFY PACKET DEVICE commands in
+		the ATA specification.
+
+	error returns:
+	  - EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
+	  - ENOMSG	IDENTIFY DEVICE information not available
+
+	notes:
+		Returns information that was obtained when the drive was
+		probed.  Some of this information is subject to change, and
+		this ioctl does not re-probe the drive to update the
+		information.
+
+		This information is also available from /proc/ide/hdX/identify
+
+
+
+HDIO_GET_KEEPSETTINGS
+	get keep-settings-on-reset flag
+
+
+	usage::
+
+	  long val;
+
+	  ioctl(fd, HDIO_GET_KEEPSETTINGS, &val);
+
+	inputs:
+		none
+
+
+
+	outputs:
+		The value of the current "keep settings" flag
+
+
+
+	notes:
+		When set, indicates that kernel should restore settings
+		after a drive reset.
+
+
+
+HDIO_SET_KEEPSETTINGS
+	keep ioctl settings on reset
+
+
+	usage::
+
+	  long val;
+
+	  ioctl(fd, HDIO_SET_KEEPSETTINGS, val);
+
+	inputs:
+		New value for keep_settings flag
+
+
+
+	outputs:
+		none
+
+
+
+	error return:
+	  - EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - EINVAL	value out of range [0 1]
+	  - EBUSY		Controller busy
+
+
+
+HDIO_GET_32BIT
+	get current io_32bit setting
+
+
+	usage::
+
+	  long val;
+
+	  ioctl(fd, HDIO_GET_32BIT, &val);
+
+	inputs:
+		none
+
+
+
+	outputs:
+		The value of the current io_32bit setting
+
+
+
+	notes:
+		0=16-bit, 1=32-bit, 2,3 = 32bit+sync
+
+
+
+
+
+HDIO_GET_NOWERR
+	get ignore-write-error flag
+
+
+	usage::
+
+	  long val;
+
+	  ioctl(fd, HDIO_GET_NOWERR, &val);
+
+	inputs:
+		none
+
+
+
+	outputs:
+		The value of the current ignore-write-error flag
+
+
+
+
+
+HDIO_GET_DMA
+	get use-dma flag
+
+
+	usage::
+
+	  long val;
+
+	  ioctl(fd, HDIO_GET_DMA, &val);
+
+	inputs:
+		none
+
+
+
+	outputs:
+		The value of the current use-dma flag
+
+
+
+
+
+HDIO_GET_NICE
+	get nice flags
+
+
+	usage::
+
+	  long nice;
+
+	  ioctl(fd, HDIO_GET_NICE, &nice);
+
+	inputs:
+		none
+
+
+
+	outputs:
+		The drive's "nice" values.
+
+
+
+	notes:
+		Per-drive flags which determine when the system will give more
+		bandwidth to other devices sharing the same IDE bus.
+
+		See <linux/hdreg.h>, near symbol IDE_NICE_DSC_OVERLAP.
+
+
+
+
+HDIO_SET_NICE
+	set nice flags
+
+
+	usage::
+
+	  unsigned long nice;
+
+	  ...
+	  ioctl(fd, HDIO_SET_NICE, nice);
+
+	inputs:
+		bitmask of nice flags.
+
+
+
+	outputs:
+		none
+
+
+
+	error returns:
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - EPERM	Flags other than DSC_OVERLAP and NICE_1 set.
+	  - EPERM	DSC_OVERLAP specified but not supported by drive
+
+	notes:
+		This ioctl sets the DSC_OVERLAP and NICE_1 flags from values
+		provided by the user.
+
+		Nice flags are listed in <linux/hdreg.h>, starting with
+		IDE_NICE_DSC_OVERLAP.  These values represent shifts.
+
+
+
+
+
+HDIO_GET_WCACHE
+	get write cache mode on|off
+
+
+	usage::
+
+	  long val;
+
+	  ioctl(fd, HDIO_GET_WCACHE, &val);
+
+	inputs:
+		none
+
+
+
+	outputs:
+		The value of the current write cache mode
+
+
+
+
+
+HDIO_GET_ACOUSTIC
+	get acoustic value
+
+
+	usage::
+
+	  long val;
+
+	  ioctl(fd, HDIO_GET_ACOUSTIC, &val);
+
+	inputs:
+		none
+
+
+
+	outputs:
+		The value of the current acoustic settings
+
+
+
+	notes:
+		See HDIO_SET_ACOUSTIC
+
+
+
+
+
+HDIO_GET_ADDRESS
+	usage::
+
+
+	  long val;
+
+	  ioctl(fd, HDIO_GET_ADDRESS, &val);
+
+	inputs:
+		none
+
+
+
+	outputs:
+		The value of the current addressing mode:
+
+	    =  ===================
+	    0  28-bit
+	    1  48-bit
+	    2  48-bit doing 28-bit
+	    3  64-bit
+	    =  ===================
+
+
+
+HDIO_GET_BUSSTATE
+	get the bus state of the hwif
+
+
+	usage::
+
+	  long state;
+
+	  ioctl(fd, HDIO_SCAN_HWIF, &state);
+
+	inputs:
+		none
+
+
+
+	outputs:
+		Current power state of the IDE bus.  One of BUSSTATE_OFF,
+		BUSSTATE_ON, or BUSSTATE_TRISTATE
+
+	error returns:
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+
+
+
+
+HDIO_SET_BUSSTATE
+	set the bus state of the hwif
+
+
+	usage::
+
+	  int state;
+
+	  ...
+	  ioctl(fd, HDIO_SCAN_HWIF, state);
+
+	inputs:
+		Desired IDE power state.  One of BUSSTATE_OFF, BUSSTATE_ON,
+		or BUSSTATE_TRISTATE
+
+	outputs:
+		none
+
+
+
+	error returns:
+	  - EACCES	Access denied:  requires CAP_SYS_RAWIO
+	  - EOPNOTSUPP	Hardware interface does not support bus power control
+
+
+
+
+HDIO_TRISTATE_HWIF
+	execute a channel tristate
+
+
+	Not implemented, as of 2.6.8.1.  See HDIO_SET_BUSSTATE
+
+
+
+HDIO_DRIVE_RESET
+	execute a device reset
+
+
+	usage::
+
+	  int args[3]
+
+	  ...
+	  ioctl(fd, HDIO_DRIVE_RESET, args);
+
+	inputs:
+		none
+
+
+
+	outputs:
+		none
+
+
+
+	error returns:
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - ENXIO	No such device:	phy dead or ctl_addr == 0
+	  - EIO		I/O error:	reset timed out or hardware error
+
+	notes:
+
+	  - Execute a reset on the device as soon as the current IO
+	    operation has completed.
+
+	  - Executes an ATAPI soft reset if applicable, otherwise
+	    executes an ATA soft reset on the controller.
+
+
+
+HDIO_DRIVE_TASKFILE
+	execute raw taskfile
+
+
+	Note:
+		If you don't have a copy of the ANSI ATA specification
+		handy, you should probably ignore this ioctl.
+
+	- Execute an ATA disk command directly by writing the "taskfile"
+	  registers of the drive.  Requires ADMIN and RAWIO access
+	  privileges.
+
+	usage::
+
+	  struct {
+
+	    ide_task_request_t req_task;
+	    u8 outbuf[OUTPUT_SIZE];
+	    u8 inbuf[INPUT_SIZE];
+	  } task;
+	  memset(&task.req_task, 0, sizeof(task.req_task));
+	  task.req_task.out_size = sizeof(task.outbuf);
+	  task.req_task.in_size = sizeof(task.inbuf);
+	  ...
+	  ioctl(fd, HDIO_DRIVE_TASKFILE, &task);
+	  ...
+
+	inputs:
+
+	  (See below for details on memory area passed to ioctl.)
+
+	  ============	===================================================
+	  io_ports[8]	values to be written to taskfile registers
+	  hob_ports[8]	high-order bytes, for extended commands.
+	  out_flags	flags indicating which registers are valid
+	  in_flags	flags indicating which registers should be returned
+	  data_phase	see below
+	  req_cmd	command type to be executed
+	  out_size	size of output buffer
+	  outbuf	buffer of data to be transmitted to disk
+	  inbuf		buffer of data to be received from disk (see [1])
+	  ============	===================================================
+
+	outputs:
+
+	  ===========	====================================================
+	  io_ports[]	values returned in the taskfile registers
+	  hob_ports[]	high-order bytes, for extended commands.
+	  out_flags	flags indicating which registers are valid (see [2])
+	  in_flags	flags indicating which registers should be returned
+	  outbuf	buffer of data to be transmitted to disk (see [1])
+	  inbuf		buffer of data to be received from disk
+	  ===========	====================================================
+
+	error returns:
+	  - EACCES	CAP_SYS_ADMIN or CAP_SYS_RAWIO privilege not set.
+	  - ENOMSG	Device is not a disk drive.
+	  - ENOMEM	Unable to allocate memory for task
+	  - EFAULT	req_cmd == TASKFILE_IN_OUT (not implemented as of 2.6.8)
+	  - EPERM
+
+			req_cmd == TASKFILE_MULTI_OUT and drive
+			multi-count not yet set.
+	  - EIO		Drive failed the command.
+
+	notes:
+
+	  [1] READ THE FOLLOWING NOTES *CAREFULLY*.  THIS IOCTL IS
+	  FULL OF GOTCHAS.  Extreme caution should be used with using
+	  this ioctl.  A mistake can easily corrupt data or hang the
+	  system.
+
+	  [2] Both the input and output buffers are copied from the
+	  user and written back to the user, even when not used.
+
+	  [3] If one or more bits are set in out_flags and in_flags is
+	  zero, the following values are used for in_flags.all and
+	  written back into in_flags on completion.
+
+	   * IDE_TASKFILE_STD_IN_FLAGS | (IDE_HOB_STD_IN_FLAGS << 8)
+	     if LBA48 addressing is enabled for the drive
+	   * IDE_TASKFILE_STD_IN_FLAGS
+	     if CHS/LBA28
+
+	  The association between in_flags.all and each enable
+	  bitfield flips depending on endianness; fortunately, TASKFILE
+	  only uses inflags.b.data bit and ignores all other bits.
+	  The end result is that, on any endian machines, it has no
+	  effect other than modifying in_flags on completion.
+
+	  [4] The default value of SELECT is (0xa0|DEV_bit|LBA_bit)
+	  except for four drives per port chipsets.  For four drives
+	  per port chipsets, it's (0xa0|DEV_bit|LBA_bit) for the first
+	  pair and (0x80|DEV_bit|LBA_bit) for the second pair.
+
+	  [5] The argument to the ioctl is a pointer to a region of
+	  memory containing a ide_task_request_t structure, followed
+	  by an optional buffer of data to be transmitted to the
+	  drive, followed by an optional buffer to receive data from
+	  the drive.
+
+	  Command is passed to the disk drive via the ide_task_request_t
+	  structure, which contains these fields:
+
+	    ============	===============================================
+	    io_ports[8]		values for the taskfile registers
+	    hob_ports[8]	high-order bytes, for extended commands
+	    out_flags		flags indicating which entries in the
+				io_ports[] and hob_ports[] arrays
+				contain valid values.  Type ide_reg_valid_t.
+	    in_flags		flags indicating which entries in the
+				io_ports[] and hob_ports[] arrays
+				are expected to contain valid values
+				on return.
+	    data_phase		See below
+	    req_cmd		Command type, see below
+	    out_size		output (user->drive) buffer size, bytes
+	    in_size		input (drive->user) buffer size, bytes
+	    ============	===============================================
+
+	  When out_flags is zero, the following registers are loaded.
+
+	    ============	===============================================
+	    HOB_FEATURE		If the drive supports LBA48
+	    HOB_NSECTOR		If the drive supports LBA48
+	    HOB_SECTOR		If the drive supports LBA48
+	    HOB_LCYL		If the drive supports LBA48
+	    HOB_HCYL		If the drive supports LBA48
+	    FEATURE
+	    NSECTOR
+	    SECTOR
+	    LCYL
+	    HCYL
+	    SELECT		First, masked with 0xE0 if LBA48, 0xEF
+				otherwise; then, or'ed with the default
+				value of SELECT.
+	    ============	===============================================
+
+	  If any bit in out_flags is set, the following registers are loaded.
+
+	    ============	===============================================
+	    HOB_DATA		If out_flags.b.data is set.  HOB_DATA will
+				travel on DD8-DD15 on little endian machines
+				and on DD0-DD7 on big endian machines.
+	    DATA		If out_flags.b.data is set.  DATA will
+				travel on DD0-DD7 on little endian machines
+				and on DD8-DD15 on big endian machines.
+	    HOB_NSECTOR		If out_flags.b.nsector_hob is set
+	    HOB_SECTOR		If out_flags.b.sector_hob is set
+	    HOB_LCYL		If out_flags.b.lcyl_hob is set
+	    HOB_HCYL		If out_flags.b.hcyl_hob is set
+	    FEATURE		If out_flags.b.feature is set
+	    NSECTOR		If out_flags.b.nsector is set
+	    SECTOR		If out_flags.b.sector is set
+	    LCYL		If out_flags.b.lcyl is set
+	    HCYL		If out_flags.b.hcyl is set
+	    SELECT		Or'ed with the default value of SELECT and
+				loaded regardless of out_flags.b.select.
+	    ============	===============================================
+
+	  Taskfile registers are read back from the drive into
+	  {io|hob}_ports[] after the command completes iff one of the
+	  following conditions is met; otherwise, the original values
+	  will be written back, unchanged.
+
+	    1. The drive fails the command (EIO).
+	    2. One or more than one bits are set in out_flags.
+	    3. The requested data_phase is TASKFILE_NO_DATA.
+
+	    ============	===============================================
+	    HOB_DATA		If in_flags.b.data is set.  It will contain
+				DD8-DD15 on little endian machines and
+				DD0-DD7 on big endian machines.
+	    DATA		If in_flags.b.data is set.  It will contain
+				DD0-DD7 on little endian machines and
+				DD8-DD15 on big endian machines.
+	    HOB_FEATURE		If the drive supports LBA48
+	    HOB_NSECTOR		If the drive supports LBA48
+	    HOB_SECTOR		If the drive supports LBA48
+	    HOB_LCYL		If the drive supports LBA48
+	    HOB_HCYL		If the drive supports LBA48
+	    NSECTOR
+	    SECTOR
+	    LCYL
+	    HCYL
+	    ============	===============================================
+
+	  The data_phase field describes the data transfer to be
+	  performed.  Value is one of:
+
+	    ===================        ========================================
+	    TASKFILE_IN
+	    TASKFILE_MULTI_IN
+	    TASKFILE_OUT
+	    TASKFILE_MULTI_OUT
+	    TASKFILE_IN_OUT
+	    TASKFILE_IN_DMA
+	    TASKFILE_IN_DMAQ		== IN_DMA (queueing not supported)
+	    TASKFILE_OUT_DMA
+	    TASKFILE_OUT_DMAQ		== OUT_DMA (queueing not supported)
+	    TASKFILE_P_IN		unimplemented
+	    TASKFILE_P_IN_DMA		unimplemented
+	    TASKFILE_P_IN_DMAQ		unimplemented
+	    TASKFILE_P_OUT		unimplemented
+	    TASKFILE_P_OUT_DMA		unimplemented
+	    TASKFILE_P_OUT_DMAQ		unimplemented
+	    ===================        ========================================
+
+	  The req_cmd field classifies the command type.  It may be
+	  one of:
+
+	    ========================    =======================================
+	    IDE_DRIVE_TASK_NO_DATA
+	    IDE_DRIVE_TASK_SET_XFER	unimplemented
+	    IDE_DRIVE_TASK_IN
+	    IDE_DRIVE_TASK_OUT		unimplemented
+	    IDE_DRIVE_TASK_RAW_WRITE
+	    ========================    =======================================
+
+	  [6] Do not access {in|out}_flags->all except for resetting
+	  all the bits.  Always access individual bit fields.  ->all
+	  value will flip depending on endianness.  For the same
+	  reason, do not use IDE_{TASKFILE|HOB}_STD_{OUT|IN}_FLAGS
+	  constants defined in hdreg.h.
+
+
+
+HDIO_DRIVE_CMD
+	execute a special drive command
+
+
+	Note:  If you don't have a copy of the ANSI ATA specification
+	handy, you should probably ignore this ioctl.
+
+	usage::
+
+	  u8 args[4+XFER_SIZE];
+
+	  ...
+	  ioctl(fd, HDIO_DRIVE_CMD, args);
+
+	inputs:
+	    Commands other than WIN_SMART:
+
+	    =======     =======
+	    args[0]	COMMAND
+	    args[1]	NSECTOR
+	    args[2]	FEATURE
+	    args[3]	NSECTOR
+	    =======     =======
+
+	    WIN_SMART:
+
+	    =======     =======
+	    args[0]	COMMAND
+	    args[1]	SECTOR
+	    args[2]	FEATURE
+	    args[3]	NSECTOR
+	    =======     =======
+
+	outputs:
+		args[] buffer is filled with register values followed by any
+
+
+	  data returned by the disk.
+
+	    ========	====================================================
+	    args[0]	status
+	    args[1]	error
+	    args[2]	NSECTOR
+	    args[3]	undefined
+	    args[4+]	NSECTOR * 512 bytes of data returned by the command.
+	    ========	====================================================
+
+	error returns:
+	  - EACCES	Access denied:  requires CAP_SYS_RAWIO
+	  - ENOMEM	Unable to allocate memory for task
+	  - EIO		Drive reports error
+
+	notes:
+
+	  [1] For commands other than WIN_SMART, args[1] should equal
+	  args[3].  SECTOR, LCYL and HCYL are undefined.  For
+	  WIN_SMART, 0x4f and 0xc2 are loaded into LCYL and HCYL
+	  respectively.  In both cases SELECT will contain the default
+	  value for the drive.  Please refer to HDIO_DRIVE_TASKFILE
+	  notes for the default value of SELECT.
+
+	  [2] If NSECTOR value is greater than zero and the drive sets
+	  DRQ when interrupting for the command, NSECTOR * 512 bytes
+	  are read from the device into the area following NSECTOR.
+	  In the above example, the area would be
+	  args[4..4+XFER_SIZE].  16bit PIO is used regardless of
+	  HDIO_SET_32BIT setting.
+
+	  [3] If COMMAND == WIN_SETFEATURES && FEATURE == SETFEATURES_XFER
+	  && NSECTOR >= XFER_SW_DMA_0 && the drive supports any DMA
+	  mode, IDE driver will try to tune the transfer mode of the
+	  drive accordingly.
+
+
+
+HDIO_DRIVE_TASK
+	execute task and special drive command
+
+
+	Note:  If you don't have a copy of the ANSI ATA specification
+	handy, you should probably ignore this ioctl.
+
+	usage::
+
+	  u8 args[7];
+
+	  ...
+	  ioctl(fd, HDIO_DRIVE_TASK, args);
+
+	inputs:
+	    Taskfile register values:
+
+	    =======	=======
+	    args[0]	COMMAND
+	    args[1]	FEATURE
+	    args[2]	NSECTOR
+	    args[3]	SECTOR
+	    args[4]	LCYL
+	    args[5]	HCYL
+	    args[6]	SELECT
+	    =======	=======
+
+	outputs:
+	    Taskfile register values:
+
+
+	    =======	=======
+	    args[0]	status
+	    args[1]	error
+	    args[2]	NSECTOR
+	    args[3]	SECTOR
+	    args[4]	LCYL
+	    args[5]	HCYL
+	    args[6]	SELECT
+	    =======	=======
+
+	error returns:
+	  - EACCES	Access denied:  requires CAP_SYS_RAWIO
+	  - ENOMEM	Unable to allocate memory for task
+	  - ENOMSG	Device is not a disk drive.
+	  - EIO		Drive failed the command.
+
+	notes:
+
+	  [1] DEV bit (0x10) of SELECT register is ignored and the
+	  appropriate value for the drive is used.  All other bits
+	  are used unaltered.
+
+
+
+HDIO_DRIVE_CMD_AEB
+	HDIO_DRIVE_TASK
+
+
+	Not implemented, as of 2.6.8.1
+
+
+
+HDIO_SET_32BIT
+	change io_32bit flags
+
+
+	usage::
+
+	  int val;
+
+	  ioctl(fd, HDIO_SET_32BIT, val);
+
+	inputs:
+		New value for io_32bit flag
+
+
+
+	outputs:
+		none
+
+
+
+	error return:
+	  - EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - EINVAL	value out of range [0 3]
+	  - EBUSY	Controller busy
+
+
+
+
+HDIO_SET_NOWERR
+	change ignore-write-error flag
+
+
+	usage::
+
+	  int val;
+
+	  ioctl(fd, HDIO_SET_NOWERR, val);
+
+	inputs:
+		New value for ignore-write-error flag.  Used for ignoring
+
+
+	  WRERR_STAT
+
+	outputs:
+		none
+
+
+
+	error return:
+	  - EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - EINVAL	value out of range [0 1]
+	  - EBUSY		Controller busy
+
+
+
+HDIO_SET_DMA
+	change use-dma flag
+
+
+	usage::
+
+	  long val;
+
+	  ioctl(fd, HDIO_SET_DMA, val);
+
+	inputs:
+		New value for use-dma flag
+
+
+
+	outputs:
+		none
+
+
+
+	error return:
+	  - EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - EINVAL	value out of range [0 1]
+	  - EBUSY	Controller busy
+
+
+
+HDIO_SET_PIO_MODE
+	reconfig interface to new speed
+
+
+	usage::
+
+	  long val;
+
+	  ioctl(fd, HDIO_SET_PIO_MODE, val);
+
+	inputs:
+		New interface speed.
+
+
+
+	outputs:
+		none
+
+
+
+	error return:
+	  - EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - EINVAL	value out of range [0 255]
+	  - EBUSY	Controller busy
+
+
+
+HDIO_SCAN_HWIF
+	register and (re)scan interface
+
+
+	usage::
+
+	  int args[3]
+
+	  ...
+	  ioctl(fd, HDIO_SCAN_HWIF, args);
+
+	inputs:
+
+	  =======	=========================
+	  args[0]	io address to probe
+
+
+	  args[1]	control address to probe
+	  args[2]	irq number
+	  =======	=========================
+
+	outputs:
+		none
+
+
+
+	error returns:
+	  - EACCES	Access denied:  requires CAP_SYS_RAWIO
+	  - EIO		Probe failed.
+
+	notes:
+		This ioctl initializes the addresses and irq for a disk
+		controller, probes for drives, and creates /proc/ide
+		interfaces as appropriate.
+
+
+
+HDIO_UNREGISTER_HWIF
+	unregister interface
+
+
+	usage::
+
+	  int index;
+
+	  ioctl(fd, HDIO_UNREGISTER_HWIF, index);
+
+	inputs:
+		index		index of hardware interface to unregister
+
+
+
+	outputs:
+		none
+
+
+
+	error returns:
+	  - EACCES	Access denied:  requires CAP_SYS_RAWIO
+
+	notes:
+		This ioctl removes a hardware interface from the kernel.
+
+		Currently (2.6.8) this ioctl silently fails if any drive on
+		the interface is busy.
+
+
+
+HDIO_SET_WCACHE
+	change write cache enable-disable
+
+
+	usage::
+
+	  int val;
+
+	  ioctl(fd, HDIO_SET_WCACHE, val);
+
+	inputs:
+		New value for write cache enable
+
+
+
+	outputs:
+		none
+
+
+
+	error return:
+	  - EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - EINVAL	value out of range [0 1]
+	  - EBUSY	Controller busy
+
+
+
+HDIO_SET_ACOUSTIC
+	change acoustic behavior
+
+
+	usage::
+
+	  int val;
+
+	  ioctl(fd, HDIO_SET_ACOUSTIC, val);
+
+	inputs:
+		New value for drive acoustic settings
+
+
+
+	outputs:
+		none
+
+
+
+	error return:
+	  - EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - EINVAL	value out of range [0 254]
+	  - EBUSY	Controller busy
+
+
+
+HDIO_SET_QDMA
+	change use-qdma flag
+
+
+	Not implemented, as of 2.6.8.1
+
+
+
+HDIO_SET_ADDRESS
+	change lba addressing modes
+
+
+	usage::
+
+	  int val;
+
+	  ioctl(fd, HDIO_SET_ADDRESS, val);
+
+	inputs:
+		New value for addressing mode
+
+	    =   ===================
+	    0   28-bit
+	    1   48-bit
+	    2   48-bit doing 28-bit
+	    =   ===================
+
+	outputs:
+		none
+
+
+
+	error return:
+	  - EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - EINVAL	value out of range [0 2]
+	  - EBUSY		Controller busy
+	  - EIO		Drive does not support lba48 mode.
+
+
+HDIO_SET_IDE_SCSI
+	usage::
+
+
+	  long val;
+
+	  ioctl(fd, HDIO_SET_IDE_SCSI, val);
+
+	inputs:
+		New value for scsi emulation mode (?)
+
+
+
+	outputs:
+		none
+
+
+
+	error return:
+	  - EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
+	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
+	  - EINVAL	value out of range [0 1]
+	  - EBUSY	Controller busy
+
+
+
+HDIO_SET_SCSI_IDE
+	Not implemented, as of 2.6.8.1
diff --git a/Documentation/ioctl/hdio.txt b/Documentation/ioctl/hdio.txt
deleted file mode 100644
index 18eb98c44ffe..000000000000
--- a/Documentation/ioctl/hdio.txt
+++ /dev/null
@@ -1,1071 +0,0 @@
-		Summary of HDIO_ ioctl calls.
-		============================
-
-		Edward A. Falk <efalk@google.com>
-
-		November, 2004
-
-This document attempts to describe the ioctl(2) calls supported by
-the HD/IDE layer.  These are by-and-large implemented (as of Linux 2.6)
-in drivers/ide/ide.c and drivers/block/scsi_ioctl.c
-
-ioctl values are listed in <linux/hdreg.h>.  As of this writing, they
-are as follows:
-
-    ioctls that pass argument pointers to user space:
-
-	HDIO_GETGEO		get device geometry
-	HDIO_GET_UNMASKINTR	get current unmask setting
-	HDIO_GET_MULTCOUNT	get current IDE blockmode setting
-	HDIO_GET_QDMA		get use-qdma flag
-	HDIO_SET_XFER		set transfer rate via proc
-	HDIO_OBSOLETE_IDENTITY	OBSOLETE, DO NOT USE
-	HDIO_GET_KEEPSETTINGS	get keep-settings-on-reset flag
-	HDIO_GET_32BIT		get current io_32bit setting
-	HDIO_GET_NOWERR		get ignore-write-error flag
-	HDIO_GET_DMA		get use-dma flag
-	HDIO_GET_NICE		get nice flags
-	HDIO_GET_IDENTITY	get IDE identification info
-	HDIO_GET_WCACHE		get write cache mode on|off
-	HDIO_GET_ACOUSTIC	get acoustic value
-	HDIO_GET_ADDRESS	get sector addressing mode
-	HDIO_GET_BUSSTATE	get the bus state of the hwif
-	HDIO_TRISTATE_HWIF	execute a channel tristate
-	HDIO_DRIVE_RESET	execute a device reset
-	HDIO_DRIVE_TASKFILE	execute raw taskfile
-	HDIO_DRIVE_TASK		execute task and special drive command
-	HDIO_DRIVE_CMD		execute a special drive command
-	HDIO_DRIVE_CMD_AEB	HDIO_DRIVE_TASK
-
-    ioctls that pass non-pointer values:
-
-	HDIO_SET_MULTCOUNT	change IDE blockmode
-	HDIO_SET_UNMASKINTR	permit other irqs during I/O
-	HDIO_SET_KEEPSETTINGS	keep ioctl settings on reset
-	HDIO_SET_32BIT		change io_32bit flags
-	HDIO_SET_NOWERR		change ignore-write-error flag
-	HDIO_SET_DMA		change use-dma flag
-	HDIO_SET_PIO_MODE	reconfig interface to new speed
-	HDIO_SCAN_HWIF		register and (re)scan interface
-	HDIO_SET_NICE		set nice flags
-	HDIO_UNREGISTER_HWIF	unregister interface
-	HDIO_SET_WCACHE		change write cache enable-disable
-	HDIO_SET_ACOUSTIC	change acoustic behavior
-	HDIO_SET_BUSSTATE	set the bus state of the hwif
-	HDIO_SET_QDMA		change use-qdma flag
-	HDIO_SET_ADDRESS	change lba addressing modes
-
-	HDIO_SET_IDE_SCSI	Set scsi emulation mode on/off
-	HDIO_SET_SCSI_IDE	not implemented yet
-
-
-The information that follows was determined from reading kernel source
-code.  It is likely that some corrections will be made over time.
-
-
-
-
-
-
-
-General:
-
-	Unless otherwise specified, all ioctl calls return 0 on success
-	and -1 with errno set to an appropriate value on error.
-
-	Unless otherwise specified, all ioctl calls return -1 and set
-	errno to EFAULT on a failed attempt to copy data to or from user
-	address space.
-
-	Unless otherwise specified, all data structures and constants
-	are defined in <linux/hdreg.h>
-
-
-
-HDIO_GETGEO			get device geometry
-
-	usage:
-
-	  struct hd_geometry geom;
-	  ioctl(fd, HDIO_GETGEO, &geom);
-
-
-	inputs:		none
-
-	outputs:
-
-	  hd_geometry structure containing:
-
-	    heads	number of heads
-	    sectors	number of sectors/track
-	    cylinders	number of cylinders, mod 65536
-	    start	starting sector of this partition.
-
-
-	error returns:
-	  EINVAL	if the device is not a disk drive or floppy drive,
-	  		or if the user passes a null pointer
-
-
-	notes:
-
-	  Not particularly useful with modern disk drives, whose geometry
-	  is a polite fiction anyway.  Modern drives are addressed
-	  purely by sector number nowadays (lba addressing), and the
-	  drive geometry is an abstraction which is actually subject
-	  to change.  Currently (as of Nov 2004), the geometry values
-	  are the "bios" values -- presumably the values the drive had
-	  when Linux first booted.
-
-	  In addition, the cylinders field of the hd_geometry is an
-	  unsigned short, meaning that on most architectures, this
-	  ioctl will not return a meaningful value on drives with more
-	  than 65535 tracks.
-
-	  The start field is unsigned long, meaning that it will not
-	  contain a meaningful value for disks over 219 Gb in size.
-
-
-
-
-HDIO_GET_UNMASKINTR		get current unmask setting
-
-	usage:
-
-	  long val;
-	  ioctl(fd, HDIO_GET_UNMASKINTR, &val);
-
-	inputs:		none
-
-	outputs:
-	  The value of the drive's current unmask setting
-
-
-
-HDIO_SET_UNMASKINTR		permit other irqs during I/O
-
-	usage:
-
-	  unsigned long val;
-	  ioctl(fd, HDIO_SET_UNMASKINTR, val);
-
-	inputs:
-	  New value for unmask flag
-
-	outputs:	none
-
-	error return:
-	  EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  EINVAL	value out of range [0 1]
-	  EBUSY		Controller busy
-
-
-
-
-HDIO_GET_MULTCOUNT		get current IDE blockmode setting
-
-	usage:
-
-	  long val;
-	  ioctl(fd, HDIO_GET_MULTCOUNT, &val);
-
-	inputs:		none
-
-	outputs:
-	  The value of the current IDE block mode setting.  This
-	  controls how many sectors the drive will transfer per
-	  interrupt.
-
-
-
-HDIO_SET_MULTCOUNT		change IDE blockmode
-
-	usage:
-
-	  int val;
-	  ioctl(fd, HDIO_SET_MULTCOUNT, val);
-
-	inputs:
-	  New value for IDE block mode setting.  This controls how many
-	  sectors the drive will transfer per interrupt.
-
-	outputs:	none
-
-	error return:
-	  EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  EINVAL	value out of range supported by disk.
-	  EBUSY		Controller busy or blockmode already set.
-	  EIO		Drive did not accept new block mode.
-
-	notes:
-
-	  Source code comments read:
-
-	    This is tightly woven into the driver->do_special cannot
-	    touch.  DON'T do it again until a total personality rewrite
-	    is committed.
-
-	  If blockmode has already been set, this ioctl will fail with
-	  EBUSY
-
-
-
-HDIO_GET_QDMA			get use-qdma flag
-
-	Not implemented, as of 2.6.8.1
-
-
-
-HDIO_SET_XFER			set transfer rate via proc
-
-	Not implemented, as of 2.6.8.1
-
-
-
-HDIO_OBSOLETE_IDENTITY		OBSOLETE, DO NOT USE
-
-	Same as HDIO_GET_IDENTITY (see below), except that it only
-	returns the first 142 bytes of drive identity information.
-
-
-
-HDIO_GET_IDENTITY		get IDE identification info
-
-	usage:
-
-	  unsigned char identity[512];
-	  ioctl(fd, HDIO_GET_IDENTITY, identity);
-
-	inputs:		none
-
-	outputs:
-
-	  ATA drive identity information.  For full description, see
-	  the IDENTIFY DEVICE and IDENTIFY PACKET DEVICE commands in
-	  the ATA specification.
-
-	error returns:
-	  EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
-	  ENOMSG	IDENTIFY DEVICE information not available
-
-	notes:
-
-	  Returns information that was obtained when the drive was
-	  probed.  Some of this information is subject to change, and
-	  this ioctl does not re-probe the drive to update the
-	  information.
-
-	  This information is also available from /proc/ide/hdX/identify
-
-
-
-HDIO_GET_KEEPSETTINGS		get keep-settings-on-reset flag
-
-	usage:
-
-	  long val;
-	  ioctl(fd, HDIO_GET_KEEPSETTINGS, &val);
-
-	inputs:		none
-
-	outputs:
-	  The value of the current "keep settings" flag
-
-	notes:
-
-	  When set, indicates that kernel should restore settings
-	  after a drive reset.
-
-
-
-HDIO_SET_KEEPSETTINGS		keep ioctl settings on reset
-
-	usage:
-
-	  long val;
-	  ioctl(fd, HDIO_SET_KEEPSETTINGS, val);
-
-	inputs:
-	  New value for keep_settings flag
-
-	outputs:	none
-
-	error return:
-	  EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  EINVAL	value out of range [0 1]
-	  EBUSY		Controller busy
-
-
-
-HDIO_GET_32BIT			get current io_32bit setting
-
-	usage:
-
-	  long val;
-	  ioctl(fd, HDIO_GET_32BIT, &val);
-
-	inputs:		none
-
-	outputs:
-	  The value of the current io_32bit setting
-
-	notes:
-
-	  0=16-bit, 1=32-bit, 2,3 = 32bit+sync
-
-
-
-HDIO_GET_NOWERR			get ignore-write-error flag
-
-	usage:
-
-	  long val;
-	  ioctl(fd, HDIO_GET_NOWERR, &val);
-
-	inputs:		none
-
-	outputs:
-	  The value of the current ignore-write-error flag
-
-
-
-HDIO_GET_DMA			get use-dma flag
-
-	usage:
-
-	  long val;
-	  ioctl(fd, HDIO_GET_DMA, &val);
-
-	inputs:		none
-
-	outputs:
-	  The value of the current use-dma flag
-
-
-
-HDIO_GET_NICE			get nice flags
-
-	usage:
-
-	  long nice;
-	  ioctl(fd, HDIO_GET_NICE, &nice);
-
-	inputs:		none
-
-	outputs:
-
-	  The drive's "nice" values.
-
-	notes:
-
-	  Per-drive flags which determine when the system will give more
-	  bandwidth to other devices sharing the same IDE bus.
-	  See <linux/hdreg.h>, near symbol IDE_NICE_DSC_OVERLAP.
-
-
-
-
-HDIO_SET_NICE			set nice flags
-
-	usage:
-
-	  unsigned long nice;
-	  ...
-	  ioctl(fd, HDIO_SET_NICE, nice);
-
-	inputs:
-	  bitmask of nice flags.
-
-	outputs:	none
-
-	error returns:
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  EPERM		Flags other than DSC_OVERLAP and NICE_1 set.
-	  EPERM		DSC_OVERLAP specified but not supported by drive
-
-	notes:
-
-	  This ioctl sets the DSC_OVERLAP and NICE_1 flags from values
-	  provided by the user.
-
-	  Nice flags are listed in <linux/hdreg.h>, starting with
-	  IDE_NICE_DSC_OVERLAP.  These values represent shifts.
-
-
-
-
-
-HDIO_GET_WCACHE			get write cache mode on|off
-
-	usage:
-
-	  long val;
-	  ioctl(fd, HDIO_GET_WCACHE, &val);
-
-	inputs:		none
-
-	outputs:
-	  The value of the current write cache mode
-
-
-
-HDIO_GET_ACOUSTIC		get acoustic value
-
-	usage:
-
-	  long val;
-	  ioctl(fd, HDIO_GET_ACOUSTIC, &val);
-
-	inputs:		none
-
-	outputs:
-	  The value of the current acoustic settings
-
-	notes:
-
-	  See HDIO_SET_ACOUSTIC
-
-
-
-HDIO_GET_ADDRESS
-
-	usage:
-
-	  long val;
-	  ioctl(fd, HDIO_GET_ADDRESS, &val);
-
-	inputs:		none
-
-	outputs:
-	  The value of the current addressing mode:
-	    0 = 28-bit
-	    1 = 48-bit
-	    2 = 48-bit doing 28-bit
-	    3 = 64-bit
-
-
-
-HDIO_GET_BUSSTATE		get the bus state of the hwif
-
-	usage:
-
-	  long state;
-	  ioctl(fd, HDIO_SCAN_HWIF, &state);
-
-	inputs:		none
-
-	outputs:
-	  Current power state of the IDE bus.  One of BUSSTATE_OFF,
-	  BUSSTATE_ON, or BUSSTATE_TRISTATE
-
-	error returns:
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-
-
-
-
-HDIO_SET_BUSSTATE		set the bus state of the hwif
-
-	usage:
-
-	  int state;
-	  ...
-	  ioctl(fd, HDIO_SCAN_HWIF, state);
-
-	inputs:
-	  Desired IDE power state.  One of BUSSTATE_OFF, BUSSTATE_ON,
-	  or BUSSTATE_TRISTATE
-
-	outputs:	none
-
-	error returns:
-	  EACCES	Access denied:  requires CAP_SYS_RAWIO
-	  EOPNOTSUPP	Hardware interface does not support bus power control
-
-
-
-
-HDIO_TRISTATE_HWIF		execute a channel tristate
-
-	Not implemented, as of 2.6.8.1.  See HDIO_SET_BUSSTATE
-
-
-
-HDIO_DRIVE_RESET		execute a device reset
-
-	usage:
-
-	  int args[3]
-	  ...
-	  ioctl(fd, HDIO_DRIVE_RESET, args);
-
-	inputs:		none
-
-	outputs:	none
-
-	error returns:
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  ENXIO		No such device:	phy dead or ctl_addr == 0
-	  EIO		I/O error:	reset timed out or hardware error
-
-	notes:
-
-	  Execute a reset on the device as soon as the current IO
-	  operation has completed.
-
-	  Executes an ATAPI soft reset if applicable, otherwise
-	  executes an ATA soft reset on the controller.
-
-
-
-HDIO_DRIVE_TASKFILE		execute raw taskfile
-
-	Note:  If you don't have a copy of the ANSI ATA specification
-	handy, you should probably ignore this ioctl.
-
-	Execute an ATA disk command directly by writing the "taskfile"
-	registers of the drive.  Requires ADMIN and RAWIO access
-	privileges.
-
-	usage:
-
-	  struct {
-	    ide_task_request_t req_task;
-	    u8 outbuf[OUTPUT_SIZE];
-	    u8 inbuf[INPUT_SIZE];
-	  } task;
-	  memset(&task.req_task, 0, sizeof(task.req_task));
-	  task.req_task.out_size = sizeof(task.outbuf);
-	  task.req_task.in_size = sizeof(task.inbuf);
-	  ...
-	  ioctl(fd, HDIO_DRIVE_TASKFILE, &task);
-	  ...
-
-	inputs:
-
-	  (See below for details on memory area passed to ioctl.)
-
-	  io_ports[8]	values to be written to taskfile registers
-	  hob_ports[8]	high-order bytes, for extended commands.
-	  out_flags	flags indicating which registers are valid
-	  in_flags	flags indicating which registers should be returned
-	  data_phase	see below
-	  req_cmd	command type to be executed
-	  out_size	size of output buffer
-	  outbuf	buffer of data to be transmitted to disk
-	  inbuf		buffer of data to be received from disk (see [1])
-
-	outputs:
-
-	  io_ports[]	values returned in the taskfile registers
-	  hob_ports[]	high-order bytes, for extended commands.
-	  out_flags	flags indicating which registers are valid (see [2])
-	  in_flags	flags indicating which registers should be returned
-	  outbuf	buffer of data to be transmitted to disk (see [1])
-	  inbuf		buffer of data to be received from disk
-
-	error returns:
-	  EACCES	CAP_SYS_ADMIN or CAP_SYS_RAWIO privilege not set.
-	  ENOMSG	Device is not a disk drive.
-	  ENOMEM	Unable to allocate memory for task
-	  EFAULT	req_cmd == TASKFILE_IN_OUT (not implemented as of 2.6.8)
-	  EPERM		req_cmd == TASKFILE_MULTI_OUT and drive
-	  		multi-count not yet set.
-	  EIO		Drive failed the command.
-
-	notes:
-
-	  [1] READ THE FOLLOWING NOTES *CAREFULLY*.  THIS IOCTL IS
-	  FULL OF GOTCHAS.  Extreme caution should be used with using
-	  this ioctl.  A mistake can easily corrupt data or hang the
-	  system.
-
-	  [2] Both the input and output buffers are copied from the
-	  user and written back to the user, even when not used.
-
-	  [3] If one or more bits are set in out_flags and in_flags is
-	  zero, the following values are used for in_flags.all and
-	  written back into in_flags on completion.
-
-	   * IDE_TASKFILE_STD_IN_FLAGS | (IDE_HOB_STD_IN_FLAGS << 8)
-	     if LBA48 addressing is enabled for the drive
-	   * IDE_TASKFILE_STD_IN_FLAGS
-	     if CHS/LBA28
-
-	  The association between in_flags.all and each enable
-	  bitfield flips depending on endianness; fortunately, TASKFILE
-	  only uses inflags.b.data bit and ignores all other bits.
-	  The end result is that, on any endian machines, it has no
-	  effect other than modifying in_flags on completion.
-
-	  [4] The default value of SELECT is (0xa0|DEV_bit|LBA_bit)
-	  except for four drives per port chipsets.  For four drives
-	  per port chipsets, it's (0xa0|DEV_bit|LBA_bit) for the first
-	  pair and (0x80|DEV_bit|LBA_bit) for the second pair.
-
-	  [5] The argument to the ioctl is a pointer to a region of
-	  memory containing a ide_task_request_t structure, followed
-	  by an optional buffer of data to be transmitted to the
-	  drive, followed by an optional buffer to receive data from
-	  the drive.
-
-	  Command is passed to the disk drive via the ide_task_request_t
-	  structure, which contains these fields:
-
-	    io_ports[8]		values for the taskfile registers
-	    hob_ports[8]	high-order bytes, for extended commands
-	    out_flags		flags indicating which entries in the
-	    			io_ports[] and hob_ports[] arrays
-				contain valid values.  Type ide_reg_valid_t.
-	    in_flags		flags indicating which entries in the
-	    			io_ports[] and hob_ports[] arrays
-				are expected to contain valid values
-				on return.
-	    data_phase		See below
-	    req_cmd		Command type, see below
-	    out_size		output (user->drive) buffer size, bytes
-	    in_size		input (drive->user) buffer size, bytes
-
-	  When out_flags is zero, the following registers are loaded.
-
-	    HOB_FEATURE		If the drive supports LBA48
-	    HOB_NSECTOR		If the drive supports LBA48
-	    HOB_SECTOR		If the drive supports LBA48
-	    HOB_LCYL		If the drive supports LBA48
-	    HOB_HCYL		If the drive supports LBA48
-	    FEATURE
-	    NSECTOR
-	    SECTOR
-	    LCYL
-	    HCYL
-	    SELECT		First, masked with 0xE0 if LBA48, 0xEF
-				otherwise; then, or'ed with the default
-				value of SELECT.
-
-	  If any bit in out_flags is set, the following registers are loaded.
-
-	    HOB_DATA		If out_flags.b.data is set.  HOB_DATA will
-				travel on DD8-DD15 on little endian machines
-				and on DD0-DD7 on big endian machines.
-	    DATA		If out_flags.b.data is set.  DATA will
-				travel on DD0-DD7 on little endian machines
-				and on DD8-DD15 on big endian machines.
-	    HOB_NSECTOR		If out_flags.b.nsector_hob is set
-	    HOB_SECTOR		If out_flags.b.sector_hob is set
-	    HOB_LCYL		If out_flags.b.lcyl_hob is set
-	    HOB_HCYL		If out_flags.b.hcyl_hob is set
-	    FEATURE		If out_flags.b.feature is set
-	    NSECTOR		If out_flags.b.nsector is set
-	    SECTOR		If out_flags.b.sector is set
-	    LCYL		If out_flags.b.lcyl is set
-	    HCYL		If out_flags.b.hcyl is set
-	    SELECT		Or'ed with the default value of SELECT and
-				loaded regardless of out_flags.b.select.
-
-	  Taskfile registers are read back from the drive into
-	  {io|hob}_ports[] after the command completes iff one of the
-	  following conditions is met; otherwise, the original values
-	  will be written back, unchanged.
-
-	    1. The drive fails the command (EIO).
-	    2. One or more than one bits are set in out_flags.
-	    3. The requested data_phase is TASKFILE_NO_DATA.
-
-	    HOB_DATA		If in_flags.b.data is set.  It will contain
-				DD8-DD15 on little endian machines and
-				DD0-DD7 on big endian machines.
-	    DATA		If in_flags.b.data is set.  It will contain
-				DD0-DD7 on little endian machines and
-				DD8-DD15 on big endian machines.
-	    HOB_FEATURE		If the drive supports LBA48
-	    HOB_NSECTOR		If the drive supports LBA48
-	    HOB_SECTOR		If the drive supports LBA48
-	    HOB_LCYL		If the drive supports LBA48
-	    HOB_HCYL		If the drive supports LBA48
-	    NSECTOR
-	    SECTOR
-	    LCYL
-	    HCYL
-
-	  The data_phase field describes the data transfer to be
-	  performed.  Value is one of:
-
-	    TASKFILE_IN
-	    TASKFILE_MULTI_IN
-	    TASKFILE_OUT
-	    TASKFILE_MULTI_OUT
-	    TASKFILE_IN_OUT
-	    TASKFILE_IN_DMA
-	    TASKFILE_IN_DMAQ		== IN_DMA (queueing not supported)
-	    TASKFILE_OUT_DMA
-	    TASKFILE_OUT_DMAQ		== OUT_DMA (queueing not supported)
-	    TASKFILE_P_IN		unimplemented
-	    TASKFILE_P_IN_DMA		unimplemented
-	    TASKFILE_P_IN_DMAQ		unimplemented
-	    TASKFILE_P_OUT		unimplemented
-	    TASKFILE_P_OUT_DMA		unimplemented
-	    TASKFILE_P_OUT_DMAQ		unimplemented
-
-	  The req_cmd field classifies the command type.  It may be
-	  one of:
-
-	    IDE_DRIVE_TASK_NO_DATA
-	    IDE_DRIVE_TASK_SET_XFER	unimplemented
-	    IDE_DRIVE_TASK_IN
-	    IDE_DRIVE_TASK_OUT		unimplemented
-	    IDE_DRIVE_TASK_RAW_WRITE
-
-	  [6] Do not access {in|out}_flags->all except for resetting
-	  all the bits.  Always access individual bit fields.  ->all
-	  value will flip depending on endianness.  For the same
-	  reason, do not use IDE_{TASKFILE|HOB}_STD_{OUT|IN}_FLAGS
-	  constants defined in hdreg.h.
-
-
-
-HDIO_DRIVE_CMD			execute a special drive command
-
-	Note:  If you don't have a copy of the ANSI ATA specification
-	handy, you should probably ignore this ioctl.
-
-	usage:
-
-	  u8 args[4+XFER_SIZE];
-	  ...
-	  ioctl(fd, HDIO_DRIVE_CMD, args);
-
-	inputs:
-
-	  Commands other than WIN_SMART
-	    args[0]	COMMAND
-	    args[1]	NSECTOR
-	    args[2]	FEATURE
-	    args[3]	NSECTOR
-
-	  WIN_SMART
-	    args[0]	COMMAND
-	    args[1]	SECTOR
-	    args[2]	FEATURE
-	    args[3]	NSECTOR
-
-	outputs:
-
-	  args[] buffer is filled with register values followed by any
-	  data returned by the disk.
-	    args[0]	status
-	    args[1]	error
-	    args[2]	NSECTOR
-	    args[3]	undefined
-	    args[4+]	NSECTOR * 512 bytes of data returned by the command.
-
-	error returns:
-	  EACCES	Access denied:  requires CAP_SYS_RAWIO
-	  ENOMEM	Unable to allocate memory for task
-	  EIO		Drive reports error
-
-	notes:
-
-	  [1] For commands other than WIN_SMART, args[1] should equal
-	  args[3].  SECTOR, LCYL and HCYL are undefined.  For
-	  WIN_SMART, 0x4f and 0xc2 are loaded into LCYL and HCYL
-	  respectively.  In both cases SELECT will contain the default
-	  value for the drive.  Please refer to HDIO_DRIVE_TASKFILE
-	  notes for the default value of SELECT.
-
-	  [2] If NSECTOR value is greater than zero and the drive sets
-	  DRQ when interrupting for the command, NSECTOR * 512 bytes
-	  are read from the device into the area following NSECTOR.
-	  In the above example, the area would be
-	  args[4..4+XFER_SIZE].  16bit PIO is used regardless of
-	  HDIO_SET_32BIT setting.
-
-	  [3] If COMMAND == WIN_SETFEATURES && FEATURE == SETFEATURES_XFER
-	  && NSECTOR >= XFER_SW_DMA_0 && the drive supports any DMA
-	  mode, IDE driver will try to tune the transfer mode of the
-	  drive accordingly.
-
-
-
-HDIO_DRIVE_TASK			execute task and special drive command
-
-	Note:  If you don't have a copy of the ANSI ATA specification
-	handy, you should probably ignore this ioctl.
-
-	usage:
-
-	  u8 args[7];
-	  ...
-	  ioctl(fd, HDIO_DRIVE_TASK, args);
-
-	inputs:
-
-	  Taskfile register values:
-	    args[0]	COMMAND
-	    args[1]	FEATURE
-	    args[2]	NSECTOR
-	    args[3]	SECTOR
-	    args[4]	LCYL
-	    args[5]	HCYL
-	    args[6]	SELECT
-
-	outputs:
-
-	  Taskfile register values:
-	    args[0]	status
-	    args[1]	error
-	    args[2]	NSECTOR
-	    args[3]	SECTOR
-	    args[4]	LCYL
-	    args[5]	HCYL
-	    args[6]	SELECT
-
-	error returns:
-	  EACCES	Access denied:  requires CAP_SYS_RAWIO
-	  ENOMEM	Unable to allocate memory for task
-	  ENOMSG	Device is not a disk drive.
-	  EIO		Drive failed the command.
-
-	notes:
-
-	  [1] DEV bit (0x10) of SELECT register is ignored and the
-	  appropriate value for the drive is used.  All other bits
-	  are used unaltered.
-
-
-
-HDIO_DRIVE_CMD_AEB		HDIO_DRIVE_TASK
-
-	Not implemented, as of 2.6.8.1
-
-
-
-HDIO_SET_32BIT			change io_32bit flags
-
-	usage:
-
-	  int val;
-	  ioctl(fd, HDIO_SET_32BIT, val);
-
-	inputs:
-	  New value for io_32bit flag
-
-	outputs:	none
-
-	error return:
-	  EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  EINVAL	value out of range [0 3]
-	  EBUSY		Controller busy
-
-
-
-
-HDIO_SET_NOWERR			change ignore-write-error flag
-
-	usage:
-
-	  int val;
-	  ioctl(fd, HDIO_SET_NOWERR, val);
-
-	inputs:
-	  New value for ignore-write-error flag.  Used for ignoring
-	  WRERR_STAT
-
-	outputs:	none
-
-	error return:
-	  EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  EINVAL	value out of range [0 1]
-	  EBUSY		Controller busy
-
-
-
-HDIO_SET_DMA			change use-dma flag
-
-	usage:
-
-	  long val;
-	  ioctl(fd, HDIO_SET_DMA, val);
-
-	inputs:
-	  New value for use-dma flag
-
-	outputs:	none
-
-	error return:
-	  EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  EINVAL	value out of range [0 1]
-	  EBUSY		Controller busy
-
-
-
-HDIO_SET_PIO_MODE		reconfig interface to new speed
-
-	usage:
-
-	  long val;
-	  ioctl(fd, HDIO_SET_PIO_MODE, val);
-
-	inputs:
-	  New interface speed.
-
-	outputs:	none
-
-	error return:
-	  EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  EINVAL	value out of range [0 255]
-	  EBUSY		Controller busy
-
-
-
-HDIO_SCAN_HWIF			register and (re)scan interface
-
-	usage:
-
-	  int args[3]
-	  ...
-	  ioctl(fd, HDIO_SCAN_HWIF, args);
-
-	inputs:
-	  args[0]	io address to probe
-	  args[1]	control address to probe
-	  args[2]	irq number
-
-	outputs:	none
-
-	error returns:
-	  EACCES	Access denied:  requires CAP_SYS_RAWIO
-	  EIO		Probe failed.
-
-	notes:
-
-	  This ioctl initializes the addresses and irq for a disk
-	  controller, probes for drives, and creates /proc/ide
-	  interfaces as appropriate.
-
-
-
-HDIO_UNREGISTER_HWIF		unregister interface
-
-	usage:
-
-	  int index;
-	  ioctl(fd, HDIO_UNREGISTER_HWIF, index);
-
-	inputs:
-	  index		index of hardware interface to unregister
-
-	outputs:	none
-
-	error returns:
-	  EACCES	Access denied:  requires CAP_SYS_RAWIO
-
-	notes:
-
-	  This ioctl removes a hardware interface from the kernel.
-
-	  Currently (2.6.8) this ioctl silently fails if any drive on
-	  the interface is busy.
-
-
-
-HDIO_SET_WCACHE			change write cache enable-disable
-
-	usage:
-
-	  int val;
-	  ioctl(fd, HDIO_SET_WCACHE, val);
-
-	inputs:
-	  New value for write cache enable
-
-	outputs:	none
-
-	error return:
-	  EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  EINVAL	value out of range [0 1]
-	  EBUSY		Controller busy
-
-
-
-HDIO_SET_ACOUSTIC		change acoustic behavior
-
-	usage:
-
-	  int val;
-	  ioctl(fd, HDIO_SET_ACOUSTIC, val);
-
-	inputs:
-	  New value for drive acoustic settings
-
-	outputs:	none
-
-	error return:
-	  EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  EINVAL	value out of range [0 254]
-	  EBUSY		Controller busy
-
-
-
-HDIO_SET_QDMA			change use-qdma flag
-
-	Not implemented, as of 2.6.8.1
-
-
-
-HDIO_SET_ADDRESS		change lba addressing modes
-
-	usage:
-
-	  int val;
-	  ioctl(fd, HDIO_SET_ADDRESS, val);
-
-	inputs:
-	  New value for addressing mode
-	    0 = 28-bit
-	    1 = 48-bit
-	    2 = 48-bit doing 28-bit
-
-	outputs:	none
-
-	error return:
-	  EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  EINVAL	value out of range [0 2]
-	  EBUSY		Controller busy
-	  EIO		Drive does not support lba48 mode.
-
-
-HDIO_SET_IDE_SCSI
-
-	usage:
-
-	  long val;
-	  ioctl(fd, HDIO_SET_IDE_SCSI, val);
-
-	inputs:
-	  New value for scsi emulation mode (?)
-
-	outputs:	none
-
-	error return:
-	  EINVAL	(bdev != bdev->bd_contains) (not sure what this means)
-	  EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  EINVAL	value out of range [0 1]
-	  EBUSY		Controller busy
-
-
-
-HDIO_SET_SCSI_IDE
-
-	Not implemented, as of 2.6.8.1
-
-
diff --git a/Documentation/ioctl/index.rst b/Documentation/ioctl/index.rst
new file mode 100644
index 000000000000..1a6f437566e3
--- /dev/null
+++ b/Documentation/ioctl/index.rst
@@ -0,0 +1,16 @@
+:orphan:
+
+======
+IOCTLs
+======
+
+.. toctree::
+   :maxdepth: 1
+
+   ioctl-number
+
+   botching-up-ioctls
+   ioctl-decoding
+
+   cdrom
+   hdio
diff --git a/Documentation/ioctl/ioctl-decoding.rst b/Documentation/ioctl/ioctl-decoding.rst
new file mode 100644
index 000000000000..380d6bb3e3ea
--- /dev/null
+++ b/Documentation/ioctl/ioctl-decoding.rst
@@ -0,0 +1,31 @@
+==============================
+Decoding an IOCTL Magic Number
+==============================
+
+To decode a hex IOCTL code:
+
+Most architectures use this generic format, but check
+include/ARCH/ioctl.h for specifics, e.g. powerpc
+uses 3 bits to encode read/write and 13 bits for size.
+
+ ====== ==================================
+ bits   meaning
+ ====== ==================================
+ 31-30	00 - no parameters: uses _IO macro
+	10 - read: _IOR
+	01 - write: _IOW
+	11 - read/write: _IOWR
+
+ 29-16	size of arguments
+
+ 15-8	ascii character supposedly
+	unique to each driver
+
+ 7-0	function #
+ ====== ==================================
+
+
+So for example 0x82187201 is a read with arg length of 0x218,
+character 'r' function 1. Grepping the source reveals this is::
+
+	#define VFAT_IOCTL_READDIR_BOTH         _IOR('r', 1, struct dirent [2])
diff --git a/Documentation/ioctl/ioctl-decoding.txt b/Documentation/ioctl/ioctl-decoding.txt
deleted file mode 100644
index e35efb0cec2e..000000000000
--- a/Documentation/ioctl/ioctl-decoding.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-To decode a hex IOCTL code:
-
-Most architectures use this generic format, but check
-include/ARCH/ioctl.h for specifics, e.g. powerpc
-uses 3 bits to encode read/write and 13 bits for size.
-
- bits    meaning
- 31-30	00 - no parameters: uses _IO macro
-	10 - read: _IOR
-	01 - write: _IOW
-	11 - read/write: _IOWR
-
- 29-16	size of arguments
-
- 15-8	ascii character supposedly
-	unique to each driver
-
- 7-0	function #
-
-
-So for example 0x82187201 is a read with arg length of 0x218,
-character 'r' function 1. Grepping the source reveals this is:
-
-#define VFAT_IOCTL_READDIR_BOTH         _IOR('r', 1, struct dirent [2])
-- 
cgit 


From e0ae154404c33477473244f286b1193364144289 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 16:49:39 -0300
Subject: docs: rapidio: convert to ReST

Rename the rapidio documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/rapidio/index.rst      |  15 ++
 Documentation/rapidio/mport_cdev.rst | 110 +++++++++++
 Documentation/rapidio/mport_cdev.txt | 107 -----------
 Documentation/rapidio/rapidio.rst    | 362 +++++++++++++++++++++++++++++++++++
 Documentation/rapidio/rapidio.txt    | 351 ---------------------------------
 Documentation/rapidio/rio_cm.rst     | 135 +++++++++++++
 Documentation/rapidio/rio_cm.txt     | 119 ------------
 Documentation/rapidio/sysfs.rst      |   7 +
 Documentation/rapidio/sysfs.txt      |   3 -
 Documentation/rapidio/tsi721.rst     | 112 +++++++++++
 Documentation/rapidio/tsi721.txt     |  97 ----------
 11 files changed, 741 insertions(+), 677 deletions(-)
 create mode 100644 Documentation/rapidio/index.rst
 create mode 100644 Documentation/rapidio/mport_cdev.rst
 delete mode 100644 Documentation/rapidio/mport_cdev.txt
 create mode 100644 Documentation/rapidio/rapidio.rst
 delete mode 100644 Documentation/rapidio/rapidio.txt
 create mode 100644 Documentation/rapidio/rio_cm.rst
 delete mode 100644 Documentation/rapidio/rio_cm.txt
 create mode 100644 Documentation/rapidio/sysfs.rst
 delete mode 100644 Documentation/rapidio/sysfs.txt
 create mode 100644 Documentation/rapidio/tsi721.rst
 delete mode 100644 Documentation/rapidio/tsi721.txt

(limited to 'Documentation')

diff --git a/Documentation/rapidio/index.rst b/Documentation/rapidio/index.rst
new file mode 100644
index 000000000000..ab7b5541b346
--- /dev/null
+++ b/Documentation/rapidio/index.rst
@@ -0,0 +1,15 @@
+:orphan:
+
+===========================
+The Linux RapidIO Subsystem
+===========================
+
+.. toctree::
+   :maxdepth: 1
+
+   rapidio
+   sysfs
+
+   tsi721
+   mport_cdev
+   rio_cm
diff --git a/Documentation/rapidio/mport_cdev.rst b/Documentation/rapidio/mport_cdev.rst
new file mode 100644
index 000000000000..df77a7f7be7d
--- /dev/null
+++ b/Documentation/rapidio/mport_cdev.rst
@@ -0,0 +1,110 @@
+==================================================================
+RapidIO subsystem mport character device driver (rio_mport_cdev.c)
+==================================================================
+
+1. Overview
+===========
+
+This device driver is the result of collaboration within the RapidIO.org
+Software Task Group (STG) between Texas Instruments, Freescale,
+Prodrive Technologies, Nokia Networks, BAE and IDT.  Additional input was
+received from other members of RapidIO.org. The objective was to create a
+character mode driver interface which exposes the capabilities of RapidIO
+devices directly to applications, in a manner that allows the numerous and
+varied RapidIO implementations to interoperate.
+
+This driver (MPORT_CDEV) provides access to basic RapidIO subsystem operations
+for user-space applications. Most of RapidIO operations are supported through
+'ioctl' system calls.
+
+When loaded this device driver creates filesystem nodes named rio_mportX in /dev
+directory for each registered RapidIO mport device. 'X' in the node name matches
+to unique port ID assigned to each local mport device.
+
+Using available set of ioctl commands user-space applications can perform
+following RapidIO bus and subsystem operations:
+
+- Reads and writes from/to configuration registers of mport devices
+  (RIO_MPORT_MAINT_READ_LOCAL/RIO_MPORT_MAINT_WRITE_LOCAL)
+- Reads and writes from/to configuration registers of remote RapidIO devices.
+  This operations are defined as RapidIO Maintenance reads/writes in RIO spec.
+  (RIO_MPORT_MAINT_READ_REMOTE/RIO_MPORT_MAINT_WRITE_REMOTE)
+- Set RapidIO Destination ID for mport devices (RIO_MPORT_MAINT_HDID_SET)
+- Set RapidIO Component Tag for mport devices (RIO_MPORT_MAINT_COMPTAG_SET)
+- Query logical index of mport devices (RIO_MPORT_MAINT_PORT_IDX_GET)
+- Query capabilities and RapidIO link configuration of mport devices
+  (RIO_MPORT_GET_PROPERTIES)
+- Enable/Disable reporting of RapidIO doorbell events to user-space applications
+  (RIO_ENABLE_DOORBELL_RANGE/RIO_DISABLE_DOORBELL_RANGE)
+- Enable/Disable reporting of RIO port-write events to user-space applications
+  (RIO_ENABLE_PORTWRITE_RANGE/RIO_DISABLE_PORTWRITE_RANGE)
+- Query/Control type of events reported through this driver: doorbells,
+  port-writes or both (RIO_SET_EVENT_MASK/RIO_GET_EVENT_MASK)
+- Configure/Map mport's outbound requests window(s) for specific size,
+  RapidIO destination ID, hopcount and request type
+  (RIO_MAP_OUTBOUND/RIO_UNMAP_OUTBOUND)
+- Configure/Map mport's inbound requests window(s) for specific size,
+  RapidIO base address and local memory base address
+  (RIO_MAP_INBOUND/RIO_UNMAP_INBOUND)
+- Allocate/Free contiguous DMA coherent memory buffer for DMA data transfers
+  to/from remote RapidIO devices (RIO_ALLOC_DMA/RIO_FREE_DMA)
+- Initiate DMA data transfers to/from remote RapidIO devices (RIO_TRANSFER).
+  Supports blocking, asynchronous and posted (a.k.a 'fire-and-forget') data
+  transfer modes.
+- Check/Wait for completion of asynchronous DMA data transfer
+  (RIO_WAIT_FOR_ASYNC)
+- Manage device objects supported by RapidIO subsystem (RIO_DEV_ADD/RIO_DEV_DEL).
+  This allows implementation of various RapidIO fabric enumeration algorithms
+  as user-space applications while using remaining functionality provided by
+  kernel RapidIO subsystem.
+
+2. Hardware Compatibility
+=========================
+
+This device driver uses standard interfaces defined by kernel RapidIO subsystem
+and therefore it can be used with any mport device driver registered by RapidIO
+subsystem with limitations set by available mport implementation.
+
+At this moment the most common limitation is availability of RapidIO-specific
+DMA engine framework for specific mport device. Users should verify available
+functionality of their platform when planning to use this driver:
+
+- IDT Tsi721 PCIe-to-RapidIO bridge device and its mport device driver are fully
+  compatible with this driver.
+- Freescale SoCs 'fsl_rio' mport driver does not have implementation for RapidIO
+  specific DMA engine support and therefore DMA data transfers mport_cdev driver
+  are not available.
+
+3. Module parameters
+====================
+
+- 'dma_timeout'
+      - DMA transfer completion timeout (in msec, default value 3000).
+        This parameter set a maximum completion wait time for SYNC mode DMA
+        transfer requests and for RIO_WAIT_FOR_ASYNC ioctl requests.
+
+- 'dbg_level'
+      - This parameter allows to control amount of debug information
+        generated by this device driver. This parameter is formed by set of
+        bit masks that correspond to the specific functional blocks.
+        For mask definitions see 'drivers/rapidio/devices/rio_mport_cdev.c'
+        This parameter can be changed dynamically.
+        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
+
+4. Known problems
+=================
+
+  None.
+
+5. User-space Applications and API
+==================================
+
+API library and applications that use this device driver are available from
+RapidIO.org.
+
+6. TODO List
+============
+
+- Add support for sending/receiving "raw" RapidIO messaging packets.
+- Add memory mapped DMA data transfers as an option when RapidIO-specific DMA
+  is not available.
diff --git a/Documentation/rapidio/mport_cdev.txt b/Documentation/rapidio/mport_cdev.txt
deleted file mode 100644
index a53f786ee2e9..000000000000
--- a/Documentation/rapidio/mport_cdev.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-RapidIO subsystem mport character device driver (rio_mport_cdev.c)
-==================================================================
-
-Version History:
-----------------
-  1.0.0 - Initial driver release.
-
-==================================================================
-
-I. Overview
-
-This device driver is the result of collaboration within the RapidIO.org
-Software Task Group (STG) between Texas Instruments, Freescale,
-Prodrive Technologies, Nokia Networks, BAE and IDT.  Additional input was
-received from other members of RapidIO.org. The objective was to create a
-character mode driver interface which exposes the capabilities of RapidIO
-devices directly to applications, in a manner that allows the numerous and
-varied RapidIO implementations to interoperate.
-
-This driver (MPORT_CDEV) provides access to basic RapidIO subsystem operations
-for user-space applications. Most of RapidIO operations are supported through
-'ioctl' system calls.
-
-When loaded this device driver creates filesystem nodes named rio_mportX in /dev
-directory for each registered RapidIO mport device. 'X' in the node name matches
-to unique port ID assigned to each local mport device.
-
-Using available set of ioctl commands user-space applications can perform
-following RapidIO bus and subsystem operations:
-
-- Reads and writes from/to configuration registers of mport devices
-    (RIO_MPORT_MAINT_READ_LOCAL/RIO_MPORT_MAINT_WRITE_LOCAL)
-- Reads and writes from/to configuration registers of remote RapidIO devices.
-  This operations are defined as RapidIO Maintenance reads/writes in RIO spec.
-    (RIO_MPORT_MAINT_READ_REMOTE/RIO_MPORT_MAINT_WRITE_REMOTE)
-- Set RapidIO Destination ID for mport devices (RIO_MPORT_MAINT_HDID_SET)
-- Set RapidIO Component Tag for mport devices (RIO_MPORT_MAINT_COMPTAG_SET)
-- Query logical index of mport devices (RIO_MPORT_MAINT_PORT_IDX_GET)
-- Query capabilities and RapidIO link configuration of mport devices
-    (RIO_MPORT_GET_PROPERTIES)
-- Enable/Disable reporting of RapidIO doorbell events to user-space applications
-    (RIO_ENABLE_DOORBELL_RANGE/RIO_DISABLE_DOORBELL_RANGE)
-- Enable/Disable reporting of RIO port-write events to user-space applications
-    (RIO_ENABLE_PORTWRITE_RANGE/RIO_DISABLE_PORTWRITE_RANGE)
-- Query/Control type of events reported through this driver: doorbells,
-  port-writes or both (RIO_SET_EVENT_MASK/RIO_GET_EVENT_MASK)
-- Configure/Map mport's outbound requests window(s) for specific size,
-  RapidIO destination ID, hopcount and request type
-    (RIO_MAP_OUTBOUND/RIO_UNMAP_OUTBOUND)
-- Configure/Map mport's inbound requests window(s) for specific size,
-  RapidIO base address and local memory base address
-    (RIO_MAP_INBOUND/RIO_UNMAP_INBOUND)
-- Allocate/Free contiguous DMA coherent memory buffer for DMA data transfers
-  to/from remote RapidIO devices (RIO_ALLOC_DMA/RIO_FREE_DMA)
-- Initiate DMA data transfers to/from remote RapidIO devices (RIO_TRANSFER).
-  Supports blocking, asynchronous and posted (a.k.a 'fire-and-forget') data
-  transfer modes.
-- Check/Wait for completion of asynchronous DMA data transfer
-    (RIO_WAIT_FOR_ASYNC)
-- Manage device objects supported by RapidIO subsystem (RIO_DEV_ADD/RIO_DEV_DEL).
-  This allows implementation of various RapidIO fabric enumeration algorithms
-  as user-space applications while using remaining functionality provided by
-  kernel RapidIO subsystem.
-
-II. Hardware Compatibility
-
-This device driver uses standard interfaces defined by kernel RapidIO subsystem
-and therefore it can be used with any mport device driver registered by RapidIO
-subsystem with limitations set by available mport implementation.
-
-At this moment the most common limitation is availability of RapidIO-specific
-DMA engine framework for specific mport device. Users should verify available
-functionality of their platform when planning to use this driver:
-
-- IDT Tsi721 PCIe-to-RapidIO bridge device and its mport device driver are fully
-  compatible with this driver.
-- Freescale SoCs 'fsl_rio' mport driver does not have implementation for RapidIO
-  specific DMA engine support and therefore DMA data transfers mport_cdev driver
-  are not available.
-
-III. Module parameters
-
-- 'dma_timeout' - DMA transfer completion timeout (in msec, default value 3000).
-        This parameter set a maximum completion wait time for SYNC mode DMA
-        transfer requests and for RIO_WAIT_FOR_ASYNC ioctl requests.
-
-- 'dbg_level' - This parameter allows to control amount of debug information
-        generated by this device driver. This parameter is formed by set of
-        bit masks that correspond to the specific functional blocks.
-        For mask definitions see 'drivers/rapidio/devices/rio_mport_cdev.c'
-        This parameter can be changed dynamically.
-        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
-
-IV. Known problems
-
-  None.
-
-V. User-space Applications and API
-
-API library and applications that use this device driver are available from
-RapidIO.org.
-
-VI. TODO List
-
-- Add support for sending/receiving "raw" RapidIO messaging packets.
-- Add memory mapped DMA data transfers as an option when RapidIO-specific DMA
-  is not available.
diff --git a/Documentation/rapidio/rapidio.rst b/Documentation/rapidio/rapidio.rst
new file mode 100644
index 000000000000..fb8942d3ba85
--- /dev/null
+++ b/Documentation/rapidio/rapidio.rst
@@ -0,0 +1,362 @@
+============
+Introduction
+============
+
+The RapidIO standard is a packet-based fabric interconnect standard designed for
+use in embedded systems. Development of the RapidIO standard is directed by the
+RapidIO Trade Association (RTA). The current version of the RapidIO specification
+is publicly available for download from the RTA web-site [1].
+
+This document describes the basics of the Linux RapidIO subsystem and provides
+information on its major components.
+
+1 Overview
+==========
+
+Because the RapidIO subsystem follows the Linux device model it is integrated
+into the kernel similarly to other buses by defining RapidIO-specific device and
+bus types and registering them within the device model.
+
+The Linux RapidIO subsystem is architecture independent and therefore defines
+architecture-specific interfaces that provide support for common RapidIO
+subsystem operations.
+
+2. Core Components
+==================
+
+A typical RapidIO network is a combination of endpoints and switches.
+Each of these components is represented in the subsystem by an associated data
+structure. The core logical components of the RapidIO subsystem are defined
+in include/linux/rio.h file.
+
+2.1 Master Port
+---------------
+
+A master port (or mport) is a RapidIO interface controller that is local to the
+processor executing the Linux code. A master port generates and receives RapidIO
+packets (transactions). In the RapidIO subsystem each master port is represented
+by a rio_mport data structure. This structure contains master port specific
+resources such as mailboxes and doorbells. The rio_mport also includes a unique
+host device ID that is valid when a master port is configured as an enumerating
+host.
+
+RapidIO master ports are serviced by subsystem specific mport device drivers
+that provide functionality defined for this subsystem. To provide a hardware
+independent interface for RapidIO subsystem operations, rio_mport structure
+includes rio_ops data structure which contains pointers to hardware specific
+implementations of RapidIO functions.
+
+2.2 Device
+----------
+
+A RapidIO device is any endpoint (other than mport) or switch in the network.
+All devices are presented in the RapidIO subsystem by corresponding rio_dev data
+structure. Devices form one global device list and per-network device lists
+(depending on number of available mports and networks).
+
+2.3 Switch
+----------
+
+A RapidIO switch is a special class of device that routes packets between its
+ports towards their final destination. The packet destination port within a
+switch is defined by an internal routing table. A switch is presented in the
+RapidIO subsystem by rio_dev data structure expanded by additional rio_switch
+data structure, which contains switch specific information such as copy of the
+routing table and pointers to switch specific functions.
+
+The RapidIO subsystem defines the format and initialization method for subsystem
+specific switch drivers that are designed to provide hardware-specific
+implementation of common switch management routines.
+
+2.4 Network
+-----------
+
+A RapidIO network is a combination of interconnected endpoint and switch devices.
+Each RapidIO network known to the system is represented by corresponding rio_net
+data structure. This structure includes lists of all devices and local master
+ports that form the same network. It also contains a pointer to the default
+master port that is used to communicate with devices within the network.
+
+2.5 Device Drivers
+------------------
+
+RapidIO device-specific drivers follow Linux Kernel Driver Model and are
+intended to support specific RapidIO devices attached to the RapidIO network.
+
+2.6 Subsystem Interfaces
+------------------------
+
+RapidIO interconnect specification defines features that may be used to provide
+one or more common service layers for all participating RapidIO devices. These
+common services may act separately from device-specific drivers or be used by
+device-specific drivers. Example of such service provider is the RIONET driver
+which implements Ethernet-over-RapidIO interface. Because only one driver can be
+registered for a device, all common RapidIO services have to be registered as
+subsystem interfaces. This allows to have multiple common services attached to
+the same device without blocking attachment of a device-specific driver.
+
+3. Subsystem Initialization
+===========================
+
+In order to initialize the RapidIO subsystem, a platform must initialize and
+register at least one master port within the RapidIO network. To register mport
+within the subsystem controller driver's initialization code calls function
+rio_register_mport() for each available master port.
+
+After all active master ports are registered with a RapidIO subsystem,
+an enumeration and/or discovery routine may be called automatically or
+by user-space command.
+
+RapidIO subsystem can be configured to be built as a statically linked or
+modular component of the kernel (see details below).
+
+4. Enumeration and Discovery
+============================
+
+4.1 Overview
+------------
+
+RapidIO subsystem configuration options allow users to build enumeration and
+discovery methods as statically linked components or loadable modules.
+An enumeration/discovery method implementation and available input parameters
+define how any given method can be attached to available RapidIO mports:
+simply to all available mports OR individually to the specified mport device.
+
+Depending on selected enumeration/discovery build configuration, there are
+several methods to initiate an enumeration and/or discovery process:
+
+  (a) Statically linked enumeration and discovery process can be started
+  automatically during kernel initialization time using corresponding module
+  parameters. This was the original method used since introduction of RapidIO
+  subsystem. Now this method relies on enumerator module parameter which is
+  'rio-scan.scan' for existing basic enumeration/discovery method.
+  When automatic start of enumeration/discovery is used a user has to ensure
+  that all discovering endpoints are started before the enumerating endpoint
+  and are waiting for enumeration to be completed.
+  Configuration option CONFIG_RAPIDIO_DISC_TIMEOUT defines time that discovering
+  endpoint waits for enumeration to be completed. If the specified timeout
+  expires the discovery process is terminated without obtaining RapidIO network
+  information. NOTE: a timed out discovery process may be restarted later using
+  a user-space command as it is described below (if the given endpoint was
+  enumerated successfully).
+
+  (b) Statically linked enumeration and discovery process can be started by
+  a command from user space. This initiation method provides more flexibility
+  for a system startup compared to the option (a) above. After all participating
+  endpoints have been successfully booted, an enumeration process shall be
+  started first by issuing a user-space command, after an enumeration is
+  completed a discovery process can be started on all remaining endpoints.
+
+  (c) Modular enumeration and discovery process can be started by a command from
+  user space. After an enumeration/discovery module is loaded, a network scan
+  process can be started by issuing a user-space command.
+  Similar to the option (b) above, an enumerator has to be started first.
+
+  (d) Modular enumeration and discovery process can be started by a module
+  initialization routine. In this case an enumerating module shall be loaded
+  first.
+
+When a network scan process is started it calls an enumeration or discovery
+routine depending on the configured role of a master port: host or agent.
+
+Enumeration is performed by a master port if it is configured as a host port by
+assigning a host destination ID greater than or equal to zero. The host
+destination ID can be assigned to a master port using various methods depending
+on RapidIO subsystem build configuration:
+
+  (a) For a statically linked RapidIO subsystem core use command line parameter
+  "rapidio.hdid=" with a list of destination ID assignments in order of mport
+  device registration. For example, in a system with two RapidIO controllers
+  the command line parameter "rapidio.hdid=-1,7" will result in assignment of
+  the host destination ID=7 to the second RapidIO controller, while the first
+  one will be assigned destination ID=-1.
+
+  (b) If the RapidIO subsystem core is built as a loadable module, in addition
+  to the method shown above, the host destination ID(s) can be specified using
+  traditional methods of passing module parameter "hdid=" during its loading:
+
+  - from command line: "modprobe rapidio hdid=-1,7", or
+  - from modprobe configuration file using configuration command "options",
+    like in this example: "options rapidio hdid=-1,7". An example of modprobe
+    configuration file is provided in the section below.
+
+NOTES:
+  (i) if "hdid=" parameter is omitted all available mport will be assigned
+  destination ID = -1;
+
+  (ii) the "hdid=" parameter in systems with multiple mports can have
+  destination ID assignments omitted from the end of list (default = -1).
+
+If the host device ID for a specific master port is set to -1, the discovery
+process will be performed for it.
+
+The enumeration and discovery routines use RapidIO maintenance transactions
+to access the configuration space of devices.
+
+NOTE: If RapidIO switch-specific device drivers are built as loadable modules
+they must be loaded before enumeration/discovery process starts.
+This requirement is cased by the fact that enumeration/discovery methods invoke
+vendor-specific callbacks on early stages.
+
+4.2 Automatic Start of Enumeration and Discovery
+------------------------------------------------
+
+Automatic enumeration/discovery start method is applicable only to built-in
+enumeration/discovery RapidIO configuration selection. To enable automatic
+enumeration/discovery start by existing basic enumerator method set use boot
+command line parameter "rio-scan.scan=1".
+
+This configuration requires synchronized start of all RapidIO endpoints that
+form a network which will be enumerated/discovered. Discovering endpoints have
+to be started before an enumeration starts to ensure that all RapidIO
+controllers have been initialized and are ready to be discovered. Configuration
+parameter CONFIG_RAPIDIO_DISC_TIMEOUT defines time (in seconds) which
+a discovering endpoint will wait for enumeration to be completed.
+
+When automatic enumeration/discovery start is selected, basic method's
+initialization routine calls rio_init_mports() to perform enumeration or
+discovery for all known mport devices.
+
+Depending on RapidIO network size and configuration this automatic
+enumeration/discovery start method may be difficult to use due to the
+requirement for synchronized start of all endpoints.
+
+4.3 User-space Start of Enumeration and Discovery
+-------------------------------------------------
+
+User-space start of enumeration and discovery can be used with built-in and
+modular build configurations. For user-space controlled start RapidIO subsystem
+creates the sysfs write-only attribute file '/sys/bus/rapidio/scan'. To initiate
+an enumeration or discovery process on specific mport device, a user needs to
+write mport_ID (not RapidIO destination ID) into that file. The mport_ID is a
+sequential number (0 ... RIO_MAX_MPORTS) assigned during mport device
+registration. For example for machine with single RapidIO controller, mport_ID
+for that controller always will be 0.
+
+To initiate RapidIO enumeration/discovery on all available mports a user may
+write '-1' (or RIO_MPORT_ANY) into the scan attribute file.
+
+4.4 Basic Enumeration Method
+----------------------------
+
+This is an original enumeration/discovery method which is available since
+first release of RapidIO subsystem code. The enumeration process is
+implemented according to the enumeration algorithm outlined in the RapidIO
+Interconnect Specification: Annex I [1].
+
+This method can be configured as statically linked or loadable module.
+The method's single parameter "scan" allows to trigger the enumeration/discovery
+process from module initialization routine.
+
+This enumeration/discovery method can be started only once and does not support
+unloading if it is built as a module.
+
+The enumeration process traverses the network using a recursive depth-first
+algorithm. When a new device is found, the enumerator takes ownership of that
+device by writing into the Host Device ID Lock CSR. It does this to ensure that
+the enumerator has exclusive right to enumerate the device. If device ownership
+is successfully acquired, the enumerator allocates a new rio_dev structure and
+initializes it according to device capabilities.
+
+If the device is an endpoint, a unique device ID is assigned to it and its value
+is written into the device's Base Device ID CSR.
+
+If the device is a switch, the enumerator allocates an additional rio_switch
+structure to store switch specific information. Then the switch's vendor ID and
+device ID are queried against a table of known RapidIO switches. Each switch
+table entry contains a pointer to a switch-specific initialization routine that
+initializes pointers to the rest of switch specific operations, and performs
+hardware initialization if necessary. A RapidIO switch does not have a unique
+device ID; it relies on hopcount and routing for device ID of an attached
+endpoint if access to its configuration registers is required. If a switch (or
+chain of switches) does not have any endpoint (except enumerator) attached to
+it, a fake device ID will be assigned to configure a route to that switch.
+In the case of a chain of switches without endpoint, one fake device ID is used
+to configure a route through the entire chain and switches are differentiated by
+their hopcount value.
+
+For both endpoints and switches the enumerator writes a unique component tag
+into device's Component Tag CSR. That unique value is used by the error
+management notification mechanism to identify a device that is reporting an
+error management event.
+
+Enumeration beyond a switch is completed by iterating over each active egress
+port of that switch. For each active link, a route to a default device ID
+(0xFF for 8-bit systems and 0xFFFF for 16-bit systems) is temporarily written
+into the routing table. The algorithm recurs by calling itself with hopcount + 1
+and the default device ID in order to access the device on the active port.
+
+After the host has completed enumeration of the entire network it releases
+devices by clearing device ID locks (calls rio_clear_locks()). For each endpoint
+in the system, it sets the Discovered bit in the Port General Control CSR
+to indicate that enumeration is completed and agents are allowed to execute
+passive discovery of the network.
+
+The discovery process is performed by agents and is similar to the enumeration
+process that is described above. However, the discovery process is performed
+without changes to the existing routing because agents only gather information
+about RapidIO network structure and are building an internal map of discovered
+devices. This way each Linux-based component of the RapidIO subsystem has
+a complete view of the network. The discovery process can be performed
+simultaneously by several agents. After initializing its RapidIO master port
+each agent waits for enumeration completion by the host for the configured wait
+time period. If this wait time period expires before enumeration is completed,
+an agent skips RapidIO discovery and continues with remaining kernel
+initialization.
+
+4.5 Adding New Enumeration/Discovery Method
+-------------------------------------------
+
+RapidIO subsystem code organization allows addition of new enumeration/discovery
+methods as new configuration options without significant impact to the core
+RapidIO code.
+
+A new enumeration/discovery method has to be attached to one or more mport
+devices before an enumeration/discovery process can be started. Normally,
+method's module initialization routine calls rio_register_scan() to attach
+an enumerator to a specified mport device (or devices). The basic enumerator
+implementation demonstrates this process.
+
+4.6 Using Loadable RapidIO Switch Drivers
+-----------------------------------------
+
+In the case when RapidIO switch drivers are built as loadable modules a user
+must ensure that they are loaded before the enumeration/discovery starts.
+This process can be automated by specifying pre- or post- dependencies in the
+RapidIO-specific modprobe configuration file as shown in the example below.
+
+File /etc/modprobe.d/rapidio.conf::
+
+  # Configure RapidIO subsystem modules
+
+  # Set enumerator host destination ID (overrides kernel command line option)
+  options rapidio hdid=-1,2
+
+  # Load RapidIO switch drivers immediately after rapidio core module was loaded
+  softdep rapidio post: idt_gen2 idtcps tsi57x
+
+  # OR :
+
+  # Load RapidIO switch drivers just before rio-scan enumerator module is loaded
+  softdep rio-scan pre: idt_gen2 idtcps tsi57x
+
+  --------------------------
+
+NOTE:
+  In the example above, one of "softdep" commands must be removed or
+  commented out to keep required module loading sequence.
+
+5. References
+=============
+
+[1] RapidIO Trade Association. RapidIO Interconnect Specifications.
+    http://www.rapidio.org.
+
+[2] Rapidio TA. Technology Comparisons.
+    http://www.rapidio.org/education/technology_comparisons/
+
+[3] RapidIO support for Linux.
+    http://lwn.net/Articles/139118/
+
+[4] Matt Porter. RapidIO for Linux. Ottawa Linux Symposium, 2005
+    http://www.kernel.org/doc/ols/2005/ols2005v2-pages-43-56.pdf
diff --git a/Documentation/rapidio/rapidio.txt b/Documentation/rapidio/rapidio.txt
deleted file mode 100644
index 28fbd877f85a..000000000000
--- a/Documentation/rapidio/rapidio.txt
+++ /dev/null
@@ -1,351 +0,0 @@
-                          The Linux RapidIO Subsystem
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The RapidIO standard is a packet-based fabric interconnect standard designed for
-use in embedded systems. Development of the RapidIO standard is directed by the
-RapidIO Trade Association (RTA). The current version of the RapidIO specification
-is publicly available for download from the RTA web-site [1].
-
-This document describes the basics of the Linux RapidIO subsystem and provides
-information on its major components.
-
-1 Overview
-----------
-
-Because the RapidIO subsystem follows the Linux device model it is integrated
-into the kernel similarly to other buses by defining RapidIO-specific device and
-bus types and registering them within the device model.
-
-The Linux RapidIO subsystem is architecture independent and therefore defines
-architecture-specific interfaces that provide support for common RapidIO
-subsystem operations.
-
-2. Core Components
-------------------
-
-A typical RapidIO network is a combination of endpoints and switches.
-Each of these components is represented in the subsystem by an associated data
-structure. The core logical components of the RapidIO subsystem are defined
-in include/linux/rio.h file.
-
-2.1 Master Port
-
-A master port (or mport) is a RapidIO interface controller that is local to the
-processor executing the Linux code. A master port generates and receives RapidIO
-packets (transactions). In the RapidIO subsystem each master port is represented
-by a rio_mport data structure. This structure contains master port specific
-resources such as mailboxes and doorbells. The rio_mport also includes a unique
-host device ID that is valid when a master port is configured as an enumerating
-host.
-
-RapidIO master ports are serviced by subsystem specific mport device drivers
-that provide functionality defined for this subsystem. To provide a hardware
-independent interface for RapidIO subsystem operations, rio_mport structure
-includes rio_ops data structure which contains pointers to hardware specific
-implementations of RapidIO functions.
-
-2.2 Device
-
-A RapidIO device is any endpoint (other than mport) or switch in the network.
-All devices are presented in the RapidIO subsystem by corresponding rio_dev data
-structure. Devices form one global device list and per-network device lists
-(depending on number of available mports and networks).
-
-2.3 Switch
-
-A RapidIO switch is a special class of device that routes packets between its
-ports towards their final destination. The packet destination port within a
-switch is defined by an internal routing table. A switch is presented in the
-RapidIO subsystem by rio_dev data structure expanded by additional rio_switch
-data structure, which contains switch specific information such as copy of the
-routing table and pointers to switch specific functions.
-
-The RapidIO subsystem defines the format and initialization method for subsystem
-specific switch drivers that are designed to provide hardware-specific
-implementation of common switch management routines.
-
-2.4 Network
-
-A RapidIO network is a combination of interconnected endpoint and switch devices.
-Each RapidIO network known to the system is represented by corresponding rio_net
-data structure. This structure includes lists of all devices and local master
-ports that form the same network. It also contains a pointer to the default
-master port that is used to communicate with devices within the network.
-
-2.5 Device Drivers
-
-RapidIO device-specific drivers follow Linux Kernel Driver Model and are
-intended to support specific RapidIO devices attached to the RapidIO network.
-
-2.6 Subsystem Interfaces
-
-RapidIO interconnect specification defines features that may be used to provide
-one or more common service layers for all participating RapidIO devices. These
-common services may act separately from device-specific drivers or be used by
-device-specific drivers. Example of such service provider is the RIONET driver
-which implements Ethernet-over-RapidIO interface. Because only one driver can be
-registered for a device, all common RapidIO services have to be registered as
-subsystem interfaces. This allows to have multiple common services attached to
-the same device without blocking attachment of a device-specific driver.
-
-3. Subsystem Initialization
----------------------------
-
-In order to initialize the RapidIO subsystem, a platform must initialize and
-register at least one master port within the RapidIO network. To register mport
-within the subsystem controller driver's initialization code calls function
-rio_register_mport() for each available master port.
-
-After all active master ports are registered with a RapidIO subsystem,
-an enumeration and/or discovery routine may be called automatically or
-by user-space command.
-
-RapidIO subsystem can be configured to be built as a statically linked or
-modular component of the kernel (see details below).
-
-4. Enumeration and Discovery
-----------------------------
-
-4.1 Overview
-------------
-
-RapidIO subsystem configuration options allow users to build enumeration and
-discovery methods as statically linked components or loadable modules.
-An enumeration/discovery method implementation and available input parameters
-define how any given method can be attached to available RapidIO mports:
-simply to all available mports OR individually to the specified mport device.
-
-Depending on selected enumeration/discovery build configuration, there are
-several methods to initiate an enumeration and/or discovery process:
-
-  (a) Statically linked enumeration and discovery process can be started
-  automatically during kernel initialization time using corresponding module
-  parameters. This was the original method used since introduction of RapidIO
-  subsystem. Now this method relies on enumerator module parameter which is
-  'rio-scan.scan' for existing basic enumeration/discovery method.
-  When automatic start of enumeration/discovery is used a user has to ensure
-  that all discovering endpoints are started before the enumerating endpoint
-  and are waiting for enumeration to be completed.
-  Configuration option CONFIG_RAPIDIO_DISC_TIMEOUT defines time that discovering
-  endpoint waits for enumeration to be completed. If the specified timeout
-  expires the discovery process is terminated without obtaining RapidIO network
-  information. NOTE: a timed out discovery process may be restarted later using
-  a user-space command as it is described below (if the given endpoint was
-  enumerated successfully).
-
-  (b) Statically linked enumeration and discovery process can be started by
-  a command from user space. This initiation method provides more flexibility
-  for a system startup compared to the option (a) above. After all participating
-  endpoints have been successfully booted, an enumeration process shall be
-  started first by issuing a user-space command, after an enumeration is
-  completed a discovery process can be started on all remaining endpoints.
-
-  (c) Modular enumeration and discovery process can be started by a command from
-  user space. After an enumeration/discovery module is loaded, a network scan
-  process can be started by issuing a user-space command.
-  Similar to the option (b) above, an enumerator has to be started first.
-
-  (d) Modular enumeration and discovery process can be started by a module
-  initialization routine. In this case an enumerating module shall be loaded
-  first.
-
-When a network scan process is started it calls an enumeration or discovery
-routine depending on the configured role of a master port: host or agent.
-
-Enumeration is performed by a master port if it is configured as a host port by
-assigning a host destination ID greater than or equal to zero. The host
-destination ID can be assigned to a master port using various methods depending
-on RapidIO subsystem build configuration:
-
-  (a) For a statically linked RapidIO subsystem core use command line parameter
-  "rapidio.hdid=" with a list of destination ID assignments in order of mport
-  device registration. For example, in a system with two RapidIO controllers
-  the command line parameter "rapidio.hdid=-1,7" will result in assignment of
-  the host destination ID=7 to the second RapidIO controller, while the first
-  one will be assigned destination ID=-1.
-
-  (b) If the RapidIO subsystem core is built as a loadable module, in addition
-  to the method shown above, the host destination ID(s) can be specified using
-  traditional methods of passing module parameter "hdid=" during its loading:
-  - from command line: "modprobe rapidio hdid=-1,7", or
-  - from modprobe configuration file using configuration command "options",
-    like in this example: "options rapidio hdid=-1,7". An example of modprobe
-    configuration file is provided in the section below.
-
-  NOTES:
-  (i) if "hdid=" parameter is omitted all available mport will be assigned
-  destination ID = -1;
-  (ii) the "hdid=" parameter in systems with multiple mports can have
-  destination ID assignments omitted from the end of list (default = -1).
-
-If the host device ID for a specific master port is set to -1, the discovery
-process will be performed for it.
-
-The enumeration and discovery routines use RapidIO maintenance transactions
-to access the configuration space of devices.
-
-NOTE: If RapidIO switch-specific device drivers are built as loadable modules
-they must be loaded before enumeration/discovery process starts.
-This requirement is cased by the fact that enumeration/discovery methods invoke
-vendor-specific callbacks on early stages.
-
-4.2 Automatic Start of Enumeration and Discovery
-------------------------------------------------
-
-Automatic enumeration/discovery start method is applicable only to built-in
-enumeration/discovery RapidIO configuration selection. To enable automatic
-enumeration/discovery start by existing basic enumerator method set use boot
-command line parameter "rio-scan.scan=1".
-
-This configuration requires synchronized start of all RapidIO endpoints that
-form a network which will be enumerated/discovered. Discovering endpoints have
-to be started before an enumeration starts to ensure that all RapidIO
-controllers have been initialized and are ready to be discovered. Configuration
-parameter CONFIG_RAPIDIO_DISC_TIMEOUT defines time (in seconds) which
-a discovering endpoint will wait for enumeration to be completed.
-
-When automatic enumeration/discovery start is selected, basic method's
-initialization routine calls rio_init_mports() to perform enumeration or
-discovery for all known mport devices.
-
-Depending on RapidIO network size and configuration this automatic
-enumeration/discovery start method may be difficult to use due to the
-requirement for synchronized start of all endpoints.
-
-4.3 User-space Start of Enumeration and Discovery
--------------------------------------------------
-
-User-space start of enumeration and discovery can be used with built-in and
-modular build configurations. For user-space controlled start RapidIO subsystem
-creates the sysfs write-only attribute file '/sys/bus/rapidio/scan'. To initiate
-an enumeration or discovery process on specific mport device, a user needs to
-write mport_ID (not RapidIO destination ID) into that file. The mport_ID is a
-sequential number (0 ... RIO_MAX_MPORTS) assigned during mport device
-registration. For example for machine with single RapidIO controller, mport_ID
-for that controller always will be 0.
-
-To initiate RapidIO enumeration/discovery on all available mports a user may
-write '-1' (or RIO_MPORT_ANY) into the scan attribute file.
-
-4.4 Basic Enumeration Method
-----------------------------
-
-This is an original enumeration/discovery method which is available since
-first release of RapidIO subsystem code. The enumeration process is
-implemented according to the enumeration algorithm outlined in the RapidIO
-Interconnect Specification: Annex I [1].
-
-This method can be configured as statically linked or loadable module.
-The method's single parameter "scan" allows to trigger the enumeration/discovery
-process from module initialization routine.
-
-This enumeration/discovery method can be started only once and does not support
-unloading if it is built as a module.
-
-The enumeration process traverses the network using a recursive depth-first
-algorithm. When a new device is found, the enumerator takes ownership of that
-device by writing into the Host Device ID Lock CSR. It does this to ensure that
-the enumerator has exclusive right to enumerate the device. If device ownership
-is successfully acquired, the enumerator allocates a new rio_dev structure and
-initializes it according to device capabilities.
-
-If the device is an endpoint, a unique device ID is assigned to it and its value
-is written into the device's Base Device ID CSR.
-
-If the device is a switch, the enumerator allocates an additional rio_switch
-structure to store switch specific information. Then the switch's vendor ID and
-device ID are queried against a table of known RapidIO switches. Each switch
-table entry contains a pointer to a switch-specific initialization routine that
-initializes pointers to the rest of switch specific operations, and performs
-hardware initialization if necessary. A RapidIO switch does not have a unique
-device ID; it relies on hopcount and routing for device ID of an attached
-endpoint if access to its configuration registers is required. If a switch (or
-chain of switches) does not have any endpoint (except enumerator) attached to
-it, a fake device ID will be assigned to configure a route to that switch.
-In the case of a chain of switches without endpoint, one fake device ID is used
-to configure a route through the entire chain and switches are differentiated by
-their hopcount value.
-
-For both endpoints and switches the enumerator writes a unique component tag
-into device's Component Tag CSR. That unique value is used by the error
-management notification mechanism to identify a device that is reporting an
-error management event.
-
-Enumeration beyond a switch is completed by iterating over each active egress
-port of that switch. For each active link, a route to a default device ID
-(0xFF for 8-bit systems and 0xFFFF for 16-bit systems) is temporarily written
-into the routing table. The algorithm recurs by calling itself with hopcount + 1
-and the default device ID in order to access the device on the active port.
-
-After the host has completed enumeration of the entire network it releases
-devices by clearing device ID locks (calls rio_clear_locks()). For each endpoint
-in the system, it sets the Discovered bit in the Port General Control CSR
-to indicate that enumeration is completed and agents are allowed to execute
-passive discovery of the network.
-
-The discovery process is performed by agents and is similar to the enumeration
-process that is described above. However, the discovery process is performed
-without changes to the existing routing because agents only gather information
-about RapidIO network structure and are building an internal map of discovered
-devices. This way each Linux-based component of the RapidIO subsystem has
-a complete view of the network. The discovery process can be performed
-simultaneously by several agents. After initializing its RapidIO master port
-each agent waits for enumeration completion by the host for the configured wait
-time period. If this wait time period expires before enumeration is completed,
-an agent skips RapidIO discovery and continues with remaining kernel
-initialization.
-
-4.5 Adding New Enumeration/Discovery Method
--------------------------------------------
-
-RapidIO subsystem code organization allows addition of new enumeration/discovery
-methods as new configuration options without significant impact to the core
-RapidIO code.
-
-A new enumeration/discovery method has to be attached to one or more mport
-devices before an enumeration/discovery process can be started. Normally,
-method's module initialization routine calls rio_register_scan() to attach
-an enumerator to a specified mport device (or devices). The basic enumerator
-implementation demonstrates this process.
-
-4.6 Using Loadable RapidIO Switch Drivers
------------------------------------------
-
-In the case when RapidIO switch drivers are built as loadable modules a user
-must ensure that they are loaded before the enumeration/discovery starts.
-This process can be automated by specifying pre- or post- dependencies in the
-RapidIO-specific modprobe configuration file as shown in the example below.
-
-  File /etc/modprobe.d/rapidio.conf:
-  ----------------------------------
-
-  # Configure RapidIO subsystem modules
-
-  # Set enumerator host destination ID (overrides kernel command line option)
-  options rapidio hdid=-1,2
-
-  # Load RapidIO switch drivers immediately after rapidio core module was loaded
-  softdep rapidio post: idt_gen2 idtcps tsi57x
-
-  # OR :
-
-  # Load RapidIO switch drivers just before rio-scan enumerator module is loaded
-  softdep rio-scan pre: idt_gen2 idtcps tsi57x
-
-  --------------------------
-
-NOTE: In the example above, one of "softdep" commands must be removed or
-commented out to keep required module loading sequence.
-
-A. References
--------------
-
-[1] RapidIO Trade Association. RapidIO Interconnect Specifications.
-    http://www.rapidio.org.
-[2] Rapidio TA. Technology Comparisons.
-    http://www.rapidio.org/education/technology_comparisons/
-[3] RapidIO support for Linux.
-    http://lwn.net/Articles/139118/
-[4] Matt Porter. RapidIO for Linux. Ottawa Linux Symposium, 2005
-    http://www.kernel.org/doc/ols/2005/ols2005v2-pages-43-56.pdf
diff --git a/Documentation/rapidio/rio_cm.rst b/Documentation/rapidio/rio_cm.rst
new file mode 100644
index 000000000000..5294430a7a74
--- /dev/null
+++ b/Documentation/rapidio/rio_cm.rst
@@ -0,0 +1,135 @@
+==========================================================================
+RapidIO subsystem Channelized Messaging character device driver (rio_cm.c)
+==========================================================================
+
+
+1. Overview
+===========
+
+This device driver is the result of collaboration within the RapidIO.org
+Software Task Group (STG) between Texas Instruments, Prodrive Technologies,
+Nokia Networks, BAE and IDT.  Additional input was received from other members
+of RapidIO.org.
+
+The objective was to create a character mode driver interface which exposes
+messaging capabilities of RapidIO endpoint devices (mports) directly
+to applications, in a manner that allows the numerous and varied RapidIO
+implementations to interoperate.
+
+This driver (RIO_CM) provides to user-space applications shared access to
+RapidIO mailbox messaging resources.
+
+RapidIO specification (Part 2) defines that endpoint devices may have up to four
+messaging mailboxes in case of multi-packet message (up to 4KB) and
+up to 64 mailboxes if single-packet messages (up to 256 B) are used. In addition
+to protocol definition limitations, a particular hardware implementation can
+have reduced number of messaging mailboxes.  RapidIO aware applications must
+therefore share the messaging resources of a RapidIO endpoint.
+
+Main purpose of this device driver is to provide RapidIO mailbox messaging
+capability to large number of user-space processes by introducing socket-like
+operations using a single messaging mailbox.  This allows applications to
+use the limited RapidIO messaging hardware resources efficiently.
+
+Most of device driver's operations are supported through 'ioctl' system calls.
+
+When loaded this device driver creates a single file system node named rio_cm
+in /dev directory common for all registered RapidIO mport devices.
+
+Following ioctl commands are available to user-space applications:
+
+- RIO_CM_MPORT_GET_LIST:
+    Returns to caller list of local mport devices that
+    support messaging operations (number of entries up to RIO_MAX_MPORTS).
+    Each list entry is combination of mport's index in the system and RapidIO
+    destination ID assigned to the port.
+- RIO_CM_EP_GET_LIST_SIZE:
+    Returns number of messaging capable remote endpoints
+    in a RapidIO network associated with the specified mport device.
+- RIO_CM_EP_GET_LIST:
+    Returns list of RapidIO destination IDs for messaging
+    capable remote endpoints (peers) available in a RapidIO network associated
+    with the specified mport device.
+- RIO_CM_CHAN_CREATE:
+    Creates RapidIO message exchange channel data structure
+    with channel ID assigned automatically or as requested by a caller.
+- RIO_CM_CHAN_BIND:
+    Binds the specified channel data structure to the specified
+    mport device.
+- RIO_CM_CHAN_LISTEN:
+    Enables listening for connection requests on the specified
+    channel.
+- RIO_CM_CHAN_ACCEPT:
+    Accepts a connection request from peer on the specified
+    channel. If wait timeout for this request is specified by a caller it is
+    a blocking call. If timeout set to 0 this is non-blocking call - ioctl
+    handler checks for a pending connection request and if one is not available
+    exits with -EGAIN error status immediately.
+- RIO_CM_CHAN_CONNECT:
+    Sends a connection request to a remote peer/channel.
+- RIO_CM_CHAN_SEND:
+    Sends a data message through the specified channel.
+    The handler for this request assumes that message buffer specified by
+    a caller includes the reserved space for a packet header required by
+    this driver.
+- RIO_CM_CHAN_RECEIVE:
+    Receives a data message through a connected channel.
+    If the channel does not have an incoming message ready to return this ioctl
+    handler will wait for new message until timeout specified by a caller
+    expires. If timeout value is set to 0, ioctl handler uses a default value
+    defined by MAX_SCHEDULE_TIMEOUT.
+- RIO_CM_CHAN_CLOSE:
+    Closes a specified channel and frees associated buffers.
+    If the specified channel is in the CONNECTED state, sends close notification
+    to the remote peer.
+
+The ioctl command codes and corresponding data structures intended for use by
+user-space applications are defined in 'include/uapi/linux/rio_cm_cdev.h'.
+
+2. Hardware Compatibility
+=========================
+
+This device driver uses standard interfaces defined by kernel RapidIO subsystem
+and therefore it can be used with any mport device driver registered by RapidIO
+subsystem with limitations set by available mport HW implementation of messaging
+mailboxes.
+
+3. Module parameters
+====================
+
+- 'dbg_level'
+      - This parameter allows to control amount of debug information
+        generated by this device driver. This parameter is formed by set of
+        bit masks that correspond to the specific functional block.
+        For mask definitions see 'drivers/rapidio/devices/rio_cm.c'
+        This parameter can be changed dynamically.
+        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
+
+- 'cmbox'
+      - Number of RapidIO mailbox to use (default value is 1).
+        This parameter allows to set messaging mailbox number that will be used
+        within entire RapidIO network. It can be used when default mailbox is
+        used by other device drivers or is not supported by some nodes in the
+        RapidIO network.
+
+- 'chstart'
+      - Start channel number for dynamic assignment. Default value - 256.
+        Allows to exclude channel numbers below this parameter from dynamic
+        allocation to avoid conflicts with software components that use
+        reserved predefined channel numbers.
+
+4. Known problems
+=================
+
+  None.
+
+5. User-space Applications and API Library
+==========================================
+
+Messaging API library and applications that use this device driver are available
+from RapidIO.org.
+
+6. TODO List
+============
+
+- Add support for system notification messages (reserved channel 0).
diff --git a/Documentation/rapidio/rio_cm.txt b/Documentation/rapidio/rio_cm.txt
deleted file mode 100644
index 27aa401f1126..000000000000
--- a/Documentation/rapidio/rio_cm.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-RapidIO subsystem Channelized Messaging character device driver (rio_cm.c)
-==========================================================================
-
-Version History:
-----------------
-  1.0.0 - Initial driver release.
-
-==========================================================================
-
-I. Overview
-
-This device driver is the result of collaboration within the RapidIO.org
-Software Task Group (STG) between Texas Instruments, Prodrive Technologies,
-Nokia Networks, BAE and IDT.  Additional input was received from other members
-of RapidIO.org.
-
-The objective was to create a character mode driver interface which exposes
-messaging capabilities of RapidIO endpoint devices (mports) directly
-to applications, in a manner that allows the numerous and varied RapidIO
-implementations to interoperate.
-
-This driver (RIO_CM) provides to user-space applications shared access to
-RapidIO mailbox messaging resources.
-
-RapidIO specification (Part 2) defines that endpoint devices may have up to four
-messaging mailboxes in case of multi-packet message (up to 4KB) and
-up to 64 mailboxes if single-packet messages (up to 256 B) are used. In addition
-to protocol definition limitations, a particular hardware implementation can
-have reduced number of messaging mailboxes.  RapidIO aware applications must
-therefore share the messaging resources of a RapidIO endpoint.
-
-Main purpose of this device driver is to provide RapidIO mailbox messaging
-capability to large number of user-space processes by introducing socket-like
-operations using a single messaging mailbox.  This allows applications to
-use the limited RapidIO messaging hardware resources efficiently.
-
-Most of device driver's operations are supported through 'ioctl' system calls.
-
-When loaded this device driver creates a single file system node named rio_cm
-in /dev directory common for all registered RapidIO mport devices.
-
-Following ioctl commands are available to user-space applications:
-
-- RIO_CM_MPORT_GET_LIST : Returns to caller list of local mport devices that
-    support messaging operations (number of entries up to RIO_MAX_MPORTS).
-    Each list entry is combination of mport's index in the system and RapidIO
-    destination ID assigned to the port.
-- RIO_CM_EP_GET_LIST_SIZE : Returns number of messaging capable remote endpoints
-    in a RapidIO network associated with the specified mport device.
-- RIO_CM_EP_GET_LIST : Returns list of RapidIO destination IDs for messaging
-    capable remote endpoints (peers) available in a RapidIO network associated
-    with the specified mport device.
-- RIO_CM_CHAN_CREATE : Creates RapidIO message exchange channel data structure
-    with channel ID assigned automatically or as requested by a caller.
-- RIO_CM_CHAN_BIND : Binds the specified channel data structure to the specified
-    mport device.
-- RIO_CM_CHAN_LISTEN : Enables listening for connection requests on the specified
-    channel.
-- RIO_CM_CHAN_ACCEPT : Accepts a connection request from peer on the specified
-    channel. If wait timeout for this request is specified by a caller it is
-    a blocking call. If timeout set to 0 this is non-blocking call - ioctl
-    handler checks for a pending connection request and if one is not available
-    exits with -EGAIN error status immediately.
-- RIO_CM_CHAN_CONNECT : Sends a connection request to a remote peer/channel.
-- RIO_CM_CHAN_SEND : Sends a data message through the specified channel.
-    The handler for this request assumes that message buffer specified by
-    a caller includes the reserved space for a packet header required by
-    this driver.
-- RIO_CM_CHAN_RECEIVE : Receives a data message through a connected channel.
-    If the channel does not have an incoming message ready to return this ioctl
-    handler will wait for new message until timeout specified by a caller
-    expires. If timeout value is set to 0, ioctl handler uses a default value
-    defined by MAX_SCHEDULE_TIMEOUT.
-- RIO_CM_CHAN_CLOSE : Closes a specified channel and frees associated buffers.
-    If the specified channel is in the CONNECTED state, sends close notification
-    to the remote peer.
-
-The ioctl command codes and corresponding data structures intended for use by
-user-space applications are defined in 'include/uapi/linux/rio_cm_cdev.h'.
-
-II. Hardware Compatibility
-
-This device driver uses standard interfaces defined by kernel RapidIO subsystem
-and therefore it can be used with any mport device driver registered by RapidIO
-subsystem with limitations set by available mport HW implementation of messaging
-mailboxes.
-
-III. Module parameters
-
-- 'dbg_level' - This parameter allows to control amount of debug information
-        generated by this device driver. This parameter is formed by set of
-        bit masks that correspond to the specific functional block.
-        For mask definitions see 'drivers/rapidio/devices/rio_cm.c'
-        This parameter can be changed dynamically.
-        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
-
-- 'cmbox' - Number of RapidIO mailbox to use (default value is 1).
-        This parameter allows to set messaging mailbox number that will be used
-        within entire RapidIO network. It can be used when default mailbox is
-        used by other device drivers or is not supported by some nodes in the
-        RapidIO network.
-
-- 'chstart' - Start channel number for dynamic assignment. Default value - 256.
-        Allows to exclude channel numbers below this parameter from dynamic
-        allocation to avoid conflicts with software components that use
-        reserved predefined channel numbers.
-
-IV. Known problems
-
-  None.
-
-V. User-space Applications and API Library
-
-Messaging API library and applications that use this device driver are available
-from RapidIO.org.
-
-VI. TODO List
-
-- Add support for system notification messages (reserved channel 0).
diff --git a/Documentation/rapidio/sysfs.rst b/Documentation/rapidio/sysfs.rst
new file mode 100644
index 000000000000..540f72683496
--- /dev/null
+++ b/Documentation/rapidio/sysfs.rst
@@ -0,0 +1,7 @@
+=============
+Sysfs entries
+=============
+
+The RapidIO sysfs files have moved to:
+Documentation/ABI/testing/sysfs-bus-rapidio and
+Documentation/ABI/testing/sysfs-class-rapidio
diff --git a/Documentation/rapidio/sysfs.txt b/Documentation/rapidio/sysfs.txt
deleted file mode 100644
index a1adac888e6e..000000000000
--- a/Documentation/rapidio/sysfs.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-The RapidIO sysfs files have moved to:
-Documentation/ABI/testing/sysfs-bus-rapidio and
-Documentation/ABI/testing/sysfs-class-rapidio
diff --git a/Documentation/rapidio/tsi721.rst b/Documentation/rapidio/tsi721.rst
new file mode 100644
index 000000000000..42aea438cd20
--- /dev/null
+++ b/Documentation/rapidio/tsi721.rst
@@ -0,0 +1,112 @@
+=========================================================================
+RapidIO subsystem mport driver for IDT Tsi721 PCI Express-to-SRIO bridge.
+=========================================================================
+
+1. Overview
+===========
+
+This driver implements all currently defined RapidIO mport callback functions.
+It supports maintenance read and write operations, inbound and outbound RapidIO
+doorbells, inbound maintenance port-writes and RapidIO messaging.
+
+To generate SRIO maintenance transactions this driver uses one of Tsi721 DMA
+channels. This mechanism provides access to larger range of hop counts and
+destination IDs without need for changes in outbound window translation.
+
+RapidIO messaging support uses dedicated messaging channels for each mailbox.
+For inbound messages this driver uses destination ID matching to forward messages
+into the corresponding message queue. Messaging callbacks are implemented to be
+fully compatible with RIONET driver (Ethernet over RapidIO messaging services).
+
+1. Module parameters:
+
+- 'dbg_level'
+      - This parameter allows to control amount of debug information
+        generated by this device driver. This parameter is formed by set of
+        This parameter can be changed bit masks that correspond to the specific
+        functional block.
+        For mask definitions see 'drivers/rapidio/devices/tsi721.h'
+        This parameter can be changed dynamically.
+        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
+
+- 'dma_desc_per_channel'
+      - This parameter defines number of hardware buffer
+        descriptors allocated for each registered Tsi721 DMA channel.
+        Its default value is 128.
+
+- 'dma_txqueue_sz'
+      - DMA transactions queue size. Defines number of pending
+        transaction requests that can be accepted by each DMA channel.
+        Default value is 16.
+
+- 'dma_sel'
+      - DMA channel selection mask. Bitmask that defines which hardware
+        DMA channels (0 ... 6) will be registered with DmaEngine core.
+        If bit is set to 1, the corresponding DMA channel will be registered.
+        DMA channels not selected by this mask will not be used by this device
+        driver. Default value is 0x7f (use all channels).
+
+- 'pcie_mrrs'
+      - override value for PCIe Maximum Read Request Size (MRRS).
+        This parameter gives an ability to override MRRS value set during PCIe
+        configuration process. Tsi721 supports read request sizes up to 4096B.
+        Value for this parameter must be set as defined by PCIe specification:
+        0 = 128B, 1 = 256B, 2 = 512B, 3 = 1024B, 4 = 2048B and 5 = 4096B.
+        Default value is '-1' (= keep platform setting).
+
+- 'mbox_sel'
+      - RIO messaging MBOX selection mask. This is a bitmask that defines
+        messaging MBOXes are managed by this device driver. Mask bits 0 - 3
+        correspond to MBOX0 - MBOX3. MBOX is under driver's control if the
+        corresponding bit is set to '1'. Default value is 0x0f (= all).
+
+2. Known problems
+=================
+
+  None.
+
+3. DMA Engine Support
+=====================
+
+Tsi721 mport driver supports DMA data transfers between local system memory and
+remote RapidIO devices. This functionality is implemented according to SLAVE
+mode API defined by common Linux kernel DMA Engine framework.
+
+Depending on system requirements RapidIO DMA operations can be included/excluded
+by setting CONFIG_RAPIDIO_DMA_ENGINE option. Tsi721 miniport driver uses seven
+out of eight available BDMA channels to support DMA data transfers.
+One BDMA channel is reserved for generation of maintenance read/write requests.
+
+If Tsi721 mport driver have been built with RAPIDIO_DMA_ENGINE support included,
+this driver will accept DMA-specific module parameter:
+
+  "dma_desc_per_channel"
+			 - defines number of hardware buffer descriptors used by
+                           each BDMA channel of Tsi721 (by default - 128).
+
+4. Version History
+
+  =====   ====================================================================
+  1.1.0   DMA operations re-worked to support data scatter/gather lists larger
+          than hardware buffer descriptors ring.
+  1.0.0   Initial driver release.
+  =====   ====================================================================
+
+5.  License
+===========
+
+  Copyright(c) 2011 Integrated Device Technology, Inc. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by the Free
+  Software Foundation; either version 2 of the License, or (at your option)
+  any later version.
+
+  This program is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
diff --git a/Documentation/rapidio/tsi721.txt b/Documentation/rapidio/tsi721.txt
deleted file mode 100644
index cd2a2935d51d..000000000000
--- a/Documentation/rapidio/tsi721.txt
+++ /dev/null
@@ -1,97 +0,0 @@
-RapidIO subsystem mport driver for IDT Tsi721 PCI Express-to-SRIO bridge.
-=========================================================================
-
-I. Overview
-
-This driver implements all currently defined RapidIO mport callback functions.
-It supports maintenance read and write operations, inbound and outbound RapidIO
-doorbells, inbound maintenance port-writes and RapidIO messaging.
-
-To generate SRIO maintenance transactions this driver uses one of Tsi721 DMA
-channels. This mechanism provides access to larger range of hop counts and
-destination IDs without need for changes in outbound window translation.
-
-RapidIO messaging support uses dedicated messaging channels for each mailbox.
-For inbound messages this driver uses destination ID matching to forward messages
-into the corresponding message queue. Messaging callbacks are implemented to be
-fully compatible with RIONET driver (Ethernet over RapidIO messaging services).
-
-1. Module parameters:
-- 'dbg_level' - This parameter allows to control amount of debug information
-        generated by this device driver. This parameter is formed by set of
-        This parameter can be changed bit masks that correspond to the specific
-        functional block.
-        For mask definitions see 'drivers/rapidio/devices/tsi721.h'
-        This parameter can be changed dynamically.
-        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
-
-- 'dma_desc_per_channel' - This parameter defines number of hardware buffer
-        descriptors allocated for each registered Tsi721 DMA channel.
-        Its default value is 128.
-
-- 'dma_txqueue_sz' - DMA transactions queue size. Defines number of pending
-        transaction requests that can be accepted by each DMA channel.
-        Default value is 16.
-
-- 'dma_sel' - DMA channel selection mask. Bitmask that defines which hardware
-        DMA channels (0 ... 6) will be registered with DmaEngine core.
-        If bit is set to 1, the corresponding DMA channel will be registered.
-        DMA channels not selected by this mask will not be used by this device
-        driver. Default value is 0x7f (use all channels).
-
-- 'pcie_mrrs' - override value for PCIe Maximum Read Request Size (MRRS).
-        This parameter gives an ability to override MRRS value set during PCIe
-        configuration process. Tsi721 supports read request sizes up to 4096B.
-        Value for this parameter must be set as defined by PCIe specification:
-        0 = 128B, 1 = 256B, 2 = 512B, 3 = 1024B, 4 = 2048B and 5 = 4096B.
-        Default value is '-1' (= keep platform setting).
-
-- 'mbox_sel' - RIO messaging MBOX selection mask. This is a bitmask that defines
-        messaging MBOXes are managed by this device driver. Mask bits 0 - 3
-        correspond to MBOX0 - MBOX3. MBOX is under driver's control if the
-        corresponding bit is set to '1'. Default value is 0x0f (= all).
-
-II. Known problems
-
-  None.
-
-III. DMA Engine Support
-
-Tsi721 mport driver supports DMA data transfers between local system memory and
-remote RapidIO devices. This functionality is implemented according to SLAVE
-mode API defined by common Linux kernel DMA Engine framework.
-
-Depending on system requirements RapidIO DMA operations can be included/excluded
-by setting CONFIG_RAPIDIO_DMA_ENGINE option. Tsi721 miniport driver uses seven
-out of eight available BDMA channels to support DMA data transfers.
-One BDMA channel is reserved for generation of maintenance read/write requests.
-
-If Tsi721 mport driver have been built with RAPIDIO_DMA_ENGINE support included,
-this driver will accept DMA-specific module parameter:
-  "dma_desc_per_channel" - defines number of hardware buffer descriptors used by
-                           each BDMA channel of Tsi721 (by default - 128).
-
-IV. Version History
-
-  1.1.0 - DMA operations re-worked to support data scatter/gather lists larger
-          than hardware buffer descriptors ring.
-  1.0.0 - Initial driver release.
-
-V.  License
------------------------------------------------
-
-  Copyright(c) 2011 Integrated Device Technology, Inc. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms of the GNU General Public License as published by the Free
-  Software Foundation; either version 2 of the License, or (at your option)
-  any later version.
-
-  This program is distributed in the hope that it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-- 
cgit 


From 39443104c7d3f2b05a4a330fbcef6da68f80d60b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 17:29:24 -0300
Subject: docs: blockdev: convert to ReST

Rename the blockdev documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

The drbd sub-directory contains some graphs and data flows.
Add those too to the documentation.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/admin-guide/kernel-parameters.txt   |  18 +-
 Documentation/blockdev/drbd/README.txt            |  16 -
 Documentation/blockdev/drbd/data-structure-v9.rst |  42 +++
 Documentation/blockdev/drbd/data-structure-v9.txt |  38 --
 Documentation/blockdev/drbd/figures.rst           |  28 ++
 Documentation/blockdev/drbd/index.rst             |  19 +
 Documentation/blockdev/floppy.rst                 | 255 +++++++++++++
 Documentation/blockdev/floppy.txt                 | 245 ------------
 Documentation/blockdev/index.rst                  |  16 +
 Documentation/blockdev/nbd.rst                    |  31 ++
 Documentation/blockdev/nbd.txt                    |  31 --
 Documentation/blockdev/paride.rst                 | 439 ++++++++++++++++++++++
 Documentation/blockdev/paride.txt                 | 417 --------------------
 Documentation/blockdev/ramdisk.rst                | 177 +++++++++
 Documentation/blockdev/ramdisk.txt                | 174 ---------
 Documentation/blockdev/zram.rst                   | 422 +++++++++++++++++++++
 Documentation/blockdev/zram.txt                   | 355 -----------------
 17 files changed, 1438 insertions(+), 1285 deletions(-)
 delete mode 100644 Documentation/blockdev/drbd/README.txt
 create mode 100644 Documentation/blockdev/drbd/data-structure-v9.rst
 delete mode 100644 Documentation/blockdev/drbd/data-structure-v9.txt
 create mode 100644 Documentation/blockdev/drbd/figures.rst
 create mode 100644 Documentation/blockdev/drbd/index.rst
 create mode 100644 Documentation/blockdev/floppy.rst
 delete mode 100644 Documentation/blockdev/floppy.txt
 create mode 100644 Documentation/blockdev/index.rst
 create mode 100644 Documentation/blockdev/nbd.rst
 delete mode 100644 Documentation/blockdev/nbd.txt
 create mode 100644 Documentation/blockdev/paride.rst
 delete mode 100644 Documentation/blockdev/paride.txt
 create mode 100644 Documentation/blockdev/ramdisk.rst
 delete mode 100644 Documentation/blockdev/ramdisk.txt
 create mode 100644 Documentation/blockdev/zram.rst
 delete mode 100644 Documentation/blockdev/zram.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a342dd5c95a9..6b2adda1cc03 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1249,7 +1249,7 @@
 			See also Documentation/fault-injection/.
 
 	floppy=		[HW]
-			See Documentation/blockdev/floppy.txt.
+			See Documentation/blockdev/floppy.rst.
 
 	force_pal_cache_flush
 			[IA-64] Avoid check_sal_cache_flush which may hang on
@@ -2234,7 +2234,7 @@
 	memblock=debug	[KNL] Enable memblock debug messages.
 
 	load_ramdisk=	[RAM] List of ramdisks to load from floppy
-			See Documentation/blockdev/ramdisk.txt.
+			See Documentation/blockdev/ramdisk.rst.
 
 	lockd.nlm_grace_period=P  [NFS] Assign grace period.
 			Format: <integer>
@@ -3268,7 +3268,7 @@
 
 	pcd.		[PARIDE]
 			See header of drivers/block/paride/pcd.c.
-			See also Documentation/blockdev/paride.txt.
+			See also Documentation/blockdev/paride.rst.
 
 	pci=option[,option...]	[PCI] various PCI subsystem options.
 
@@ -3512,7 +3512,7 @@
 			needed on a platform with proper driver support.
 
 	pd.		[PARIDE]
-			See Documentation/blockdev/paride.txt.
+			See Documentation/blockdev/paride.rst.
 
 	pdcchassis=	[PARISC,HW] Disable/Enable PDC Chassis Status codes at
 			boot time.
@@ -3527,10 +3527,10 @@
 			and performance comparison.
 
 	pf.		[PARIDE]
-			See Documentation/blockdev/paride.txt.
+			See Documentation/blockdev/paride.rst.
 
 	pg.		[PARIDE]
-			See Documentation/blockdev/paride.txt.
+			See Documentation/blockdev/paride.rst.
 
 	pirq=		[SMP,APIC] Manual mp-table setup
 			See Documentation/x86/i386/IO-APIC.rst.
@@ -3642,7 +3642,7 @@
 
 	prompt_ramdisk=	[RAM] List of RAM disks to prompt for floppy disk
 			before loading.
-			See Documentation/blockdev/ramdisk.txt.
+			See Documentation/blockdev/ramdisk.rst.
 
 	psi=		[KNL] Enable or disable pressure stall information
 			tracking.
@@ -3664,7 +3664,7 @@
 	pstore.backend=	Specify the name of the pstore backend to use
 
 	pt.		[PARIDE]
-			See Documentation/blockdev/paride.txt.
+			See Documentation/blockdev/paride.rst.
 
 	pti=		[X86_64] Control Page Table Isolation of user and
 			kernel address spaces.  Disabling this feature
@@ -3693,7 +3693,7 @@
 			See Documentation/admin-guide/md.rst.
 
 	ramdisk_size=	[RAM] Sizes of RAM disks in kilobytes
-			See Documentation/blockdev/ramdisk.txt.
+			See Documentation/blockdev/ramdisk.rst.
 
 	random.trust_cpu={on,off}
 			[KNL] Enable or disable trusting the use of the
diff --git a/Documentation/blockdev/drbd/README.txt b/Documentation/blockdev/drbd/README.txt
deleted file mode 100644
index 627b0a1bf35e..000000000000
--- a/Documentation/blockdev/drbd/README.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-Description
-
-  DRBD is a shared-nothing, synchronously replicated block device. It
-  is designed to serve as a building block for high availability
-  clusters and in this context, is a "drop-in" replacement for shared
-  storage. Simplistically, you could see it as a network RAID 1.
-
-  Please visit http://www.drbd.org to find out more.
-
-The here included files are intended to help understand the implementation
-
-DRBD-8.3-data-packets.svg, DRBD-data-packets.svg  
-  relates some functions, and write packets.
-
-conn-states-8.dot, disk-states-8.dot, node-states-8.dot
-  The sub graphs of DRBD's state transitions
diff --git a/Documentation/blockdev/drbd/data-structure-v9.rst b/Documentation/blockdev/drbd/data-structure-v9.rst
new file mode 100644
index 000000000000..66036b901644
--- /dev/null
+++ b/Documentation/blockdev/drbd/data-structure-v9.rst
@@ -0,0 +1,42 @@
+================================
+kernel data structure for DRBD-9
+================================
+
+This describes the in kernel data structure for DRBD-9. Starting with
+Linux v3.14 we are reorganizing DRBD to use this data structure.
+
+Basic Data Structure
+====================
+
+A node has a number of DRBD resources.  Each such resource has a number of
+devices (aka volumes) and connections to other nodes ("peer nodes"). Each DRBD
+device is represented by a block device locally.
+
+The DRBD objects are interconnected to form a matrix as depicted below; a
+drbd_peer_device object sits at each intersection between a drbd_device and a
+drbd_connection::
+
+  /--------------+---------------+.....+---------------\
+  |   resource   |    device     |     |    device     |
+  +--------------+---------------+.....+---------------+
+  |  connection  |  peer_device  |     |  peer_device  |
+  +--------------+---------------+.....+---------------+
+  :              :               :     :               :
+  :              :               :     :               :
+  +--------------+---------------+.....+---------------+
+  |  connection  |  peer_device  |     |  peer_device  |
+  \--------------+---------------+.....+---------------/
+
+In this table, horizontally, devices can be accessed from resources by their
+volume number.  Likewise, peer_devices can be accessed from connections by
+their volume number.  Objects in the vertical direction are connected by double
+linked lists.  There are back pointers from peer_devices to their connections a
+devices, and from connections and devices to their resource.
+
+All resources are in the drbd_resources double-linked list.  In addition, all
+devices can be accessed by their minor device number via the drbd_devices idr.
+
+The drbd_resource, drbd_connection, and drbd_device objects are reference
+counted.  The peer_device objects only serve to establish the links between
+devices and connections; their lifetime is determined by the lifetime of the
+device and connection which they reference.
diff --git a/Documentation/blockdev/drbd/data-structure-v9.txt b/Documentation/blockdev/drbd/data-structure-v9.txt
deleted file mode 100644
index 1e52a0e32624..000000000000
--- a/Documentation/blockdev/drbd/data-structure-v9.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-This describes the in kernel data structure for DRBD-9. Starting with
-Linux v3.14 we are reorganizing DRBD to use this data structure.
-
-Basic Data Structure
-====================
-
-A node has a number of DRBD resources.  Each such resource has a number of
-devices (aka volumes) and connections to other nodes ("peer nodes"). Each DRBD
-device is represented by a block device locally.
-
-The DRBD objects are interconnected to form a matrix as depicted below; a
-drbd_peer_device object sits at each intersection between a drbd_device and a
-drbd_connection:
-
-  /--------------+---------------+.....+---------------\
-  |   resource   |    device     |     |    device     |
-  +--------------+---------------+.....+---------------+
-  |  connection  |  peer_device  |     |  peer_device  |
-  +--------------+---------------+.....+---------------+
-  :              :               :     :               :
-  :              :               :     :               :
-  +--------------+---------------+.....+---------------+
-  |  connection  |  peer_device  |     |  peer_device  |
-  \--------------+---------------+.....+---------------/
-
-In this table, horizontally, devices can be accessed from resources by their
-volume number.  Likewise, peer_devices can be accessed from connections by
-their volume number.  Objects in the vertical direction are connected by double
-linked lists.  There are back pointers from peer_devices to their connections a
-devices, and from connections and devices to their resource.
-
-All resources are in the drbd_resources double-linked list.  In addition, all
-devices can be accessed by their minor device number via the drbd_devices idr.
-
-The drbd_resource, drbd_connection, and drbd_device objects are reference
-counted.  The peer_device objects only serve to establish the links between
-devices and connections; their lifetime is determined by the lifetime of the
-device and connection which they reference.
diff --git a/Documentation/blockdev/drbd/figures.rst b/Documentation/blockdev/drbd/figures.rst
new file mode 100644
index 000000000000..3e3fd4b8a478
--- /dev/null
+++ b/Documentation/blockdev/drbd/figures.rst
@@ -0,0 +1,28 @@
+.. The here included files are intended to help understand the implementation
+
+Data flows that Relate some functions, and write packets
+========================================================
+
+.. kernel-figure:: DRBD-8.3-data-packets.svg
+    :alt:   DRBD-8.3-data-packets.svg
+    :align: center
+
+.. kernel-figure:: DRBD-data-packets.svg
+    :alt:   DRBD-data-packets.svg
+    :align: center
+
+
+Sub graphs of DRBD's state transitions
+======================================
+
+.. kernel-figure:: conn-states-8.dot
+    :alt:   conn-states-8.dot
+    :align: center
+
+.. kernel-figure:: disk-states-8.dot
+    :alt:   disk-states-8.dot
+    :align: center
+
+.. kernel-figure:: node-states-8.dot
+    :alt:   node-states-8.dot
+    :align: center
diff --git a/Documentation/blockdev/drbd/index.rst b/Documentation/blockdev/drbd/index.rst
new file mode 100644
index 000000000000..68ecd5c113e9
--- /dev/null
+++ b/Documentation/blockdev/drbd/index.rst
@@ -0,0 +1,19 @@
+==========================================
+Distributed Replicated Block Device - DRBD
+==========================================
+
+Description
+===========
+
+  DRBD is a shared-nothing, synchronously replicated block device. It
+  is designed to serve as a building block for high availability
+  clusters and in this context, is a "drop-in" replacement for shared
+  storage. Simplistically, you could see it as a network RAID 1.
+
+  Please visit http://www.drbd.org to find out more.
+
+.. toctree::
+   :maxdepth: 1
+
+   data-structure-v9
+   figures
diff --git a/Documentation/blockdev/floppy.rst b/Documentation/blockdev/floppy.rst
new file mode 100644
index 000000000000..4a8f31cf4139
--- /dev/null
+++ b/Documentation/blockdev/floppy.rst
@@ -0,0 +1,255 @@
+=============
+Floppy Driver
+=============
+
+FAQ list:
+=========
+
+A FAQ list may be found in the fdutils package (see below), and also
+at <http://fdutils.linux.lu/faq.html>.
+
+
+LILO configuration options (Thinkpad users, read this)
+======================================================
+
+The floppy driver is configured using the 'floppy=' option in
+lilo. This option can be typed at the boot prompt, or entered in the
+lilo configuration file.
+
+Example: If your kernel is called linux-2.6.9, type the following line
+at the lilo boot prompt (if you have a thinkpad)::
+
+ linux-2.6.9 floppy=thinkpad
+
+You may also enter the following line in /etc/lilo.conf, in the description
+of linux-2.6.9::
+
+ append = "floppy=thinkpad"
+
+Several floppy related options may be given, example::
+
+ linux-2.6.9 floppy=daring floppy=two_fdc
+ append = "floppy=daring floppy=two_fdc"
+
+If you give options both in the lilo config file and on the boot
+prompt, the option strings of both places are concatenated, the boot
+prompt options coming last. That's why there are also options to
+restore the default behavior.
+
+
+Module configuration options
+============================
+
+If you use the floppy driver as a module, use the following syntax::
+
+	modprobe floppy floppy="<options>"
+
+Example::
+
+	modprobe floppy floppy="omnibook messages"
+
+If you need certain options enabled every time you load the floppy driver,
+you can put::
+
+	options floppy floppy="omnibook messages"
+
+in a configuration file in /etc/modprobe.d/.
+
+
+The floppy driver related options are:
+
+ floppy=asus_pci
+	Sets the bit mask to allow only units 0 and 1. (default)
+
+ floppy=daring
+	Tells the floppy driver that you have a well behaved floppy controller.
+	This allows more efficient and smoother operation, but may fail on
+	certain controllers. This may speed up certain operations.
+
+ floppy=0,daring
+	Tells the floppy driver that your floppy controller should be used
+	with caution.
+
+ floppy=one_fdc
+	Tells the floppy driver that you have only one floppy controller.
+	(default)
+
+ floppy=two_fdc / floppy=<address>,two_fdc
+	Tells the floppy driver that you have two floppy controllers.
+	The second floppy controller is assumed to be at <address>.
+	This option is not needed if the second controller is at address
+	0x370, and if you use the 'cmos' option.
+
+ floppy=thinkpad
+	Tells the floppy driver that you have a Thinkpad. Thinkpads use an
+	inverted convention for the disk change line.
+
+ floppy=0,thinkpad
+	Tells the floppy driver that you don't have a Thinkpad.
+
+ floppy=omnibook / floppy=nodma
+	Tells the floppy driver not to use Dma for data transfers.
+	This is needed on HP Omnibooks, which don't have a workable
+	DMA channel for the floppy driver. This option is also useful
+	if you frequently get "Unable to allocate DMA memory" messages.
+	Indeed, dma memory needs to be continuous in physical memory,
+	and is thus harder to find, whereas non-dma buffers may be
+	allocated in virtual memory. However, I advise against this if
+	you have an FDC without a FIFO (8272A or 82072). 82072A and
+	later are OK. You also need at least a 486 to use nodma.
+	If you use nodma mode, I suggest you also set the FIFO
+	threshold to 10 or lower, in order to limit the number of data
+	transfer interrupts.
+
+	If you have a FIFO-able FDC, the floppy driver automatically
+	falls back on non DMA mode if no DMA-able memory can be found.
+	If you want to avoid this, explicitly ask for 'yesdma'.
+
+ floppy=yesdma
+	Tells the floppy driver that a workable DMA channel is available.
+	(default)
+
+ floppy=nofifo
+	Disables the FIFO entirely. This is needed if you get "Bus
+	master arbitration error" messages from your Ethernet card (or
+	from other devices) while accessing the floppy.
+
+ floppy=usefifo
+	Enables the FIFO. (default)
+
+ floppy=<threshold>,fifo_depth
+	Sets the FIFO threshold. This is mostly relevant in DMA
+	mode. If this is higher, the floppy driver tolerates more
+	interrupt latency, but it triggers more interrupts (i.e. it
+	imposes more load on the rest of the system). If this is
+	lower, the interrupt latency should be lower too (faster
+	processor). The benefit of a lower threshold is less
+	interrupts.
+
+	To tune the fifo threshold, switch on over/underrun messages
+	using 'floppycontrol --messages'. Then access a floppy
+	disk. If you get a huge amount of "Over/Underrun - retrying"
+	messages, then the fifo threshold is too low. Try with a
+	higher value, until you only get an occasional Over/Underrun.
+	It is a good idea to compile the floppy driver as a module
+	when doing this tuning. Indeed, it allows to try different
+	fifo values without rebooting the machine for each test. Note
+	that you need to do 'floppycontrol --messages' every time you
+	re-insert the module.
+
+	Usually, tuning the fifo threshold should not be needed, as
+	the default (0xa) is reasonable.
+
+ floppy=<drive>,<type>,cmos
+	Sets the CMOS type of <drive> to <type>. This is mandatory if
+	you have more than two floppy drives (only two can be
+	described in the physical CMOS), or if your BIOS uses
+	non-standard CMOS types. The CMOS types are:
+
+	       ==  ==================================
+		0  Use the value of the physical CMOS
+		1  5 1/4 DD
+		2  5 1/4 HD
+		3  3 1/2 DD
+		4  3 1/2 HD
+		5  3 1/2 ED
+		6  3 1/2 ED
+	       16  unknown or not installed
+	       ==  ==================================
+
+	(Note: there are two valid types for ED drives. This is because 5 was
+	initially chosen to represent floppy *tapes*, and 6 for ED drives.
+	AMI ignored this, and used 5 for ED drives. That's why the floppy
+	driver handles both.)
+
+ floppy=unexpected_interrupts
+	Print a warning message when an unexpected interrupt is received.
+	(default)
+
+ floppy=no_unexpected_interrupts / floppy=L40SX
+	Don't print a message when an unexpected interrupt is received. This
+	is needed on IBM L40SX laptops in certain video modes. (There seems
+	to be an interaction between video and floppy. The unexpected
+	interrupts affect only performance, and can be safely ignored.)
+
+ floppy=broken_dcl
+	Don't use the disk change line, but assume that the disk was
+	changed whenever the device node is reopened. Needed on some
+	boxes where the disk change line is broken or unsupported.
+	This should be regarded as a stopgap measure, indeed it makes
+	floppy operation less efficient due to unneeded cache
+	flushings, and slightly more unreliable. Please verify your
+	cable, connection and jumper settings if you have any DCL
+	problems. However, some older drives, and also some laptops
+	are known not to have a DCL.
+
+ floppy=debug
+	Print debugging messages.
+
+ floppy=messages
+	Print informational messages for some operations (disk change
+	notifications, warnings about over and underruns, and about
+	autodetection).
+
+ floppy=silent_dcl_clear
+	Uses a less noisy way to clear the disk change line (which
+	doesn't involve seeks). Implied by 'daring' option.
+
+ floppy=<nr>,irq
+	Sets the floppy IRQ to <nr> instead of 6.
+
+ floppy=<nr>,dma
+	Sets the floppy DMA channel to <nr> instead of 2.
+
+ floppy=slow
+	Use PS/2 stepping rate::
+
+	   PS/2 floppies have much slower step rates than regular floppies.
+	   It's been recommended that take about 1/4 of the default speed
+	   in some more extreme cases.
+
+
+Supporting utilities and additional documentation:
+==================================================
+
+Additional parameters of the floppy driver can be configured at
+runtime. Utilities which do this can be found in the fdutils package.
+This package also contains a new version of mtools which allows to
+access high capacity disks (up to 1992K on a high density 3 1/2 disk!).
+It also contains additional documentation about the floppy driver.
+
+The latest version can be found at fdutils homepage:
+
+ http://fdutils.linux.lu
+
+The fdutils releases can be found at:
+
+ http://fdutils.linux.lu/download.html
+
+ http://www.tux.org/pub/knaff/fdutils/
+
+ ftp://metalab.unc.edu/pub/Linux/utils/disk-management/
+
+Reporting problems about the floppy driver
+==========================================
+
+If you have a question or a bug report about the floppy driver, mail
+me at Alain.Knaff@poboxes.com . If you post to Usenet, preferably use
+comp.os.linux.hardware. As the volume in these groups is rather high,
+be sure to include the word "floppy" (or "FLOPPY") in the subject
+line.  If the reported problem happens when mounting floppy disks, be
+sure to mention also the type of the filesystem in the subject line.
+
+Be sure to read the FAQ before mailing/posting any bug reports!
+
+Alain
+
+Changelog
+=========
+
+10-30-2004 :
+		Cleanup, updating, add reference to module configuration.
+		James Nelson <james4765@gmail.com>
+
+6-3-2000 :
+		Original Document
diff --git a/Documentation/blockdev/floppy.txt b/Documentation/blockdev/floppy.txt
deleted file mode 100644
index e2240f5ab64d..000000000000
--- a/Documentation/blockdev/floppy.txt
+++ /dev/null
@@ -1,245 +0,0 @@
-This file describes the floppy driver.
-
-FAQ list:
-=========
-
- A FAQ list may be found in the fdutils package (see below), and also
-at <http://fdutils.linux.lu/faq.html>.
-
-
-LILO configuration options (Thinkpad users, read this)
-======================================================
-
- The floppy driver is configured using the 'floppy=' option in
-lilo. This option can be typed at the boot prompt, or entered in the
-lilo configuration file.
-
- Example: If your kernel is called linux-2.6.9, type the following line
-at the lilo boot prompt (if you have a thinkpad):
-
- linux-2.6.9 floppy=thinkpad
-
-You may also enter the following line in /etc/lilo.conf, in the description
-of linux-2.6.9:
-
- append = "floppy=thinkpad"
-
- Several floppy related options may be given, example:
-
- linux-2.6.9 floppy=daring floppy=two_fdc
- append = "floppy=daring floppy=two_fdc"
-
- If you give options both in the lilo config file and on the boot
-prompt, the option strings of both places are concatenated, the boot
-prompt options coming last. That's why there are also options to
-restore the default behavior.
-
-
-Module configuration options
-============================
-
- If you use the floppy driver as a module, use the following syntax:
-modprobe floppy floppy="<options>"
-
-Example:
- modprobe floppy floppy="omnibook messages"
-
- If you need certain options enabled every time you load the floppy driver,
-you can put:
-
- options floppy floppy="omnibook messages"
-
-in a configuration file in /etc/modprobe.d/.
-
-
- The floppy driver related options are:
-
- floppy=asus_pci
-	Sets the bit mask to allow only units 0 and 1. (default)
-
- floppy=daring
-	Tells the floppy driver that you have a well behaved floppy controller.
-	This allows more efficient and smoother operation, but may fail on
-	certain controllers. This may speed up certain operations.
-
- floppy=0,daring
-	Tells the floppy driver that your floppy controller should be used
-	with caution.
-
- floppy=one_fdc
-	Tells the floppy driver that you have only one floppy controller.
-	(default)
-
- floppy=two_fdc
- floppy=<address>,two_fdc
-	Tells the floppy driver that you have two floppy controllers.
-	The second floppy controller is assumed to be at <address>.
-	This option is not needed if the second controller is at address
-	0x370, and if you use the 'cmos' option.
-
- floppy=thinkpad
-	Tells the floppy driver that you have a Thinkpad. Thinkpads use an
-	inverted convention for the disk change line.
-
- floppy=0,thinkpad
-	Tells the floppy driver that you don't have a Thinkpad.
-
- floppy=omnibook
- floppy=nodma
-	Tells the floppy driver not to use Dma for data transfers.
-	This is needed on HP Omnibooks, which don't have a workable
-	DMA channel for the floppy driver. This option is also useful
-	if you frequently get "Unable to allocate DMA memory" messages.
-	Indeed, dma memory needs to be continuous in physical memory,
-	and is thus harder to find, whereas non-dma buffers may be
-	allocated in virtual memory. However, I advise against this if
-	you have an FDC without a FIFO (8272A or 82072). 82072A and
-	later are OK. You also need at least a 486 to use nodma.
-	If you use nodma mode, I suggest you also set the FIFO
-	threshold to 10 or lower, in order to limit the number of data
-	transfer interrupts.
-
-	If you have a FIFO-able FDC, the floppy driver automatically
-	falls back on non DMA mode if no DMA-able memory can be found.
-	If you want to avoid this, explicitly ask for 'yesdma'.
-
- floppy=yesdma
-	Tells the floppy driver that a workable DMA channel is available.
-	(default)
-
- floppy=nofifo
-	Disables the FIFO entirely. This is needed if you get "Bus
-	master arbitration error" messages from your Ethernet card (or
-	from other devices) while accessing the floppy.
-
- floppy=usefifo
-	Enables the FIFO. (default)
-
- floppy=<threshold>,fifo_depth
-	Sets the FIFO threshold. This is mostly relevant in DMA
-	mode. If this is higher, the floppy driver tolerates more
-	interrupt latency, but it triggers more interrupts (i.e. it
-	imposes more load on the rest of the system). If this is
-	lower, the interrupt latency should be lower too (faster
-	processor). The benefit of a lower threshold is less
-	interrupts.
-
-	To tune the fifo threshold, switch on over/underrun messages
-	using 'floppycontrol --messages'. Then access a floppy
-	disk. If you get a huge amount of "Over/Underrun - retrying"
-	messages, then the fifo threshold is too low. Try with a
-	higher value, until you only get an occasional Over/Underrun.
-	It is a good idea to compile the floppy driver as a module
-	when doing this tuning. Indeed, it allows to try different
-	fifo values without rebooting the machine for each test. Note
-	that you need to do 'floppycontrol --messages' every time you
-	re-insert the module.
-
-	Usually, tuning the fifo threshold should not be needed, as
-	the default (0xa) is reasonable.
-
- floppy=<drive>,<type>,cmos
-	Sets the CMOS type of <drive> to <type>. This is mandatory if
-	you have more than two floppy drives (only two can be
-	described in the physical CMOS), or if your BIOS uses
-	non-standard CMOS types. The CMOS types are:
-
-		0 - Use the value of the physical CMOS
-		1 - 5 1/4 DD
-		2 - 5 1/4 HD
-		3 - 3 1/2 DD
-		4 - 3 1/2 HD
-		5 - 3 1/2 ED
-		6 - 3 1/2 ED
-	       16 - unknown or not installed
-
-	(Note: there are two valid types for ED drives. This is because 5 was
-	initially chosen to represent floppy *tapes*, and 6 for ED drives.
-	AMI ignored this, and used 5 for ED drives. That's why the floppy
-	driver handles both.)
-
- floppy=unexpected_interrupts
-	Print a warning message when an unexpected interrupt is received.
-	(default)
-
- floppy=no_unexpected_interrupts
- floppy=L40SX
-	Don't print a message when an unexpected interrupt is received. This
-	is needed on IBM L40SX laptops in certain video modes. (There seems
-	to be an interaction between video and floppy. The unexpected
-	interrupts affect only performance, and can be safely ignored.)
-
- floppy=broken_dcl
-	Don't use the disk change line, but assume that the disk was
-	changed whenever the device node is reopened. Needed on some
-	boxes where the disk change line is broken or unsupported.
-	This should be regarded as a stopgap measure, indeed it makes
-	floppy operation less efficient due to unneeded cache
-	flushings, and slightly more unreliable. Please verify your
-	cable, connection and jumper settings if you have any DCL
-	problems. However, some older drives, and also some laptops
-	are known not to have a DCL.
-
- floppy=debug
-	Print debugging messages.
-
- floppy=messages
-	Print informational messages for some operations (disk change
-	notifications, warnings about over and underruns, and about
-	autodetection).
-
- floppy=silent_dcl_clear
-	Uses a less noisy way to clear the disk change line (which
-	doesn't involve seeks). Implied by 'daring' option.
-
- floppy=<nr>,irq
-	Sets the floppy IRQ to <nr> instead of 6.
-
- floppy=<nr>,dma
-	Sets the floppy DMA channel to <nr> instead of 2.
-
- floppy=slow
-	Use PS/2 stepping rate:
-	 " PS/2 floppies have much slower step rates than regular floppies.
-	   It's been recommended that take about 1/4 of the default speed
-	   in some more extreme cases."
-
-
-Supporting utilities and additional documentation:
-==================================================
-
- Additional parameters of the floppy driver can be configured at
-runtime. Utilities which do this can be found in the fdutils package.
-This package also contains a new version of mtools which allows to
-access high capacity disks (up to 1992K on a high density 3 1/2 disk!).
-It also contains additional documentation about the floppy driver.
-
-The latest version can be found at fdutils homepage:
- http://fdutils.linux.lu
-
-The fdutils releases can be found at:
- http://fdutils.linux.lu/download.html
- http://www.tux.org/pub/knaff/fdutils/
- ftp://metalab.unc.edu/pub/Linux/utils/disk-management/
-
-Reporting problems about the floppy driver
-==========================================
-
- If you have a question or a bug report about the floppy driver, mail
-me at Alain.Knaff@poboxes.com . If you post to Usenet, preferably use
-comp.os.linux.hardware. As the volume in these groups is rather high,
-be sure to include the word "floppy" (or "FLOPPY") in the subject
-line.  If the reported problem happens when mounting floppy disks, be
-sure to mention also the type of the filesystem in the subject line.
-
- Be sure to read the FAQ before mailing/posting any bug reports!
-
- Alain
-
-Changelog
-=========
-
-10-30-2004 :	Cleanup, updating, add reference to module configuration.
-		James Nelson <james4765@gmail.com>
-
-6-3-2000 :	Original Document
diff --git a/Documentation/blockdev/index.rst b/Documentation/blockdev/index.rst
new file mode 100644
index 000000000000..a9af6ed8b4aa
--- /dev/null
+++ b/Documentation/blockdev/index.rst
@@ -0,0 +1,16 @@
+:orphan:
+
+===========================
+The Linux RapidIO Subsystem
+===========================
+
+.. toctree::
+   :maxdepth: 1
+
+   floppy
+   nbd
+   paride
+   ramdisk
+   zram
+
+   drbd/index
diff --git a/Documentation/blockdev/nbd.rst b/Documentation/blockdev/nbd.rst
new file mode 100644
index 000000000000..d78dfe559dcf
--- /dev/null
+++ b/Documentation/blockdev/nbd.rst
@@ -0,0 +1,31 @@
+==================================
+Network Block Device (TCP version)
+==================================
+
+1) Overview
+-----------
+
+What is it: With this compiled in the kernel (or as a module), Linux
+can use a remote server as one of its block devices. So every time
+the client computer wants to read, e.g., /dev/nb0, it sends a
+request over TCP to the server, which will reply with the data read.
+This can be used for stations with low disk space (or even diskless)
+to borrow disk space from another computer.
+Unlike NFS, it is possible to put any filesystem on it, etc.
+
+For more information, or to download the nbd-client and nbd-server
+tools, go to http://nbd.sf.net/.
+
+The nbd kernel module need only be installed on the client
+system, as the nbd-server is completely in userspace. In fact,
+the nbd-server has been successfully ported to other operating
+systems, including Windows.
+
+A) NBD parameters
+-----------------
+
+max_part
+	Number of partitions per device (default: 0).
+
+nbds_max
+	Number of block devices that should be initialized (default: 16).
diff --git a/Documentation/blockdev/nbd.txt b/Documentation/blockdev/nbd.txt
deleted file mode 100644
index db242ea2bce8..000000000000
--- a/Documentation/blockdev/nbd.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Network Block Device (TCP version)
-==================================
-
-1) Overview
------------
-
-What is it: With this compiled in the kernel (or as a module), Linux
-can use a remote server as one of its block devices. So every time
-the client computer wants to read, e.g., /dev/nb0, it sends a
-request over TCP to the server, which will reply with the data read.
-This can be used for stations with low disk space (or even diskless)
-to borrow disk space from another computer.
-Unlike NFS, it is possible to put any filesystem on it, etc.
-
-For more information, or to download the nbd-client and nbd-server
-tools, go to http://nbd.sf.net/.
-
-The nbd kernel module need only be installed on the client
-system, as the nbd-server is completely in userspace. In fact,
-the nbd-server has been successfully ported to other operating
-systems, including Windows.
-
-A) NBD parameters
------------------
-
-max_part
-	Number of partitions per device (default: 0).
-
-nbds_max
-	Number of block devices that should be initialized (default: 16).
-
diff --git a/Documentation/blockdev/paride.rst b/Documentation/blockdev/paride.rst
new file mode 100644
index 000000000000..87b4278bf314
--- /dev/null
+++ b/Documentation/blockdev/paride.rst
@@ -0,0 +1,439 @@
+===================================
+Linux and parallel port IDE devices
+===================================
+
+PARIDE v1.03   (c) 1997-8  Grant Guenther <grant@torque.net>
+
+1. Introduction
+===============
+
+Owing to the simplicity and near universality of the parallel port interface
+to personal computers, many external devices such as portable hard-disk,
+CD-ROM, LS-120 and tape drives use the parallel port to connect to their
+host computer.  While some devices (notably scanners) use ad-hoc methods
+to pass commands and data through the parallel port interface, most
+external devices are actually identical to an internal model, but with
+a parallel-port adapter chip added in.  Some of the original parallel port
+adapters were little more than mechanisms for multiplexing a SCSI bus.
+(The Iomega PPA-3 adapter used in the ZIP drives is an example of this
+approach).  Most current designs, however, take a different approach.
+The adapter chip reproduces a small ISA or IDE bus in the external device
+and the communication protocol provides operations for reading and writing
+device registers, as well as data block transfer functions.  Sometimes,
+the device being addressed via the parallel cable is a standard SCSI
+controller like an NCR 5380.  The "ditto" family of external tape
+drives use the ISA replicator to interface a floppy disk controller,
+which is then connected to a floppy-tape mechanism.  The vast majority
+of external parallel port devices, however, are now based on standard
+IDE type devices, which require no intermediate controller.  If one
+were to open up a parallel port CD-ROM drive, for instance, one would
+find a standard ATAPI CD-ROM drive, a power supply, and a single adapter
+that interconnected a standard PC parallel port cable and a standard
+IDE cable.  It is usually possible to exchange the CD-ROM device with
+any other device using the IDE interface.
+
+The document describes the support in Linux for parallel port IDE
+devices.  It does not cover parallel port SCSI devices, "ditto" tape
+drives or scanners.  Many different devices are supported by the
+parallel port IDE subsystem, including:
+
+	- MicroSolutions backpack CD-ROM
+	- MicroSolutions backpack PD/CD
+	- MicroSolutions backpack hard-drives
+	- MicroSolutions backpack 8000t tape drive
+	- SyQuest EZ-135, EZ-230 & SparQ drives
+	- Avatar Shark
+	- Imation Superdisk LS-120
+	- Maxell Superdisk LS-120
+	- FreeCom Power CD
+	- Hewlett-Packard 5GB and 8GB tape drives
+	- Hewlett-Packard 7100 and 7200 CD-RW drives
+
+as well as most of the clone and no-name products on the market.
+
+To support such a wide range of devices, PARIDE, the parallel port IDE
+subsystem, is actually structured in three parts.   There is a base
+paride module which provides a registry and some common methods for
+accessing the parallel ports.  The second component is a set of
+high-level drivers for each of the different types of supported devices:
+
+	===	=============
+	pd	IDE disk
+	pcd	ATAPI CD-ROM
+	pf	ATAPI disk
+	pt	ATAPI tape
+	pg	ATAPI generic
+	===	=============
+
+(Currently, the pg driver is only used with CD-R drives).
+
+The high-level drivers function according to the relevant standards.
+The third component of PARIDE is a set of low-level protocol drivers
+for each of the parallel port IDE adapter chips.  Thanks to the interest
+and encouragement of Linux users from many parts of the world,
+support is available for almost all known adapter protocols:
+
+	====    ====================================== ====
+        aten    ATEN EH-100                            (HK)
+        bpck    Microsolutions backpack                (US)
+        comm    DataStor (old-type) "commuter" adapter (TW)
+        dstr    DataStor EP-2000                       (TW)
+        epat    Shuttle EPAT                           (UK)
+        epia    Shuttle EPIA                           (UK)
+	fit2    FIT TD-2000			       (US)
+	fit3    FIT TD-3000			       (US)
+	friq    Freecom IQ cable                       (DE)
+        frpw    Freecom Power                          (DE)
+        kbic    KingByte KBIC-951A and KBIC-971A       (TW)
+	ktti    KT Technology PHd adapter              (SG)
+        on20    OnSpec 90c20                           (US)
+        on26    OnSpec 90c26                           (US)
+	====    ====================================== ====
+
+
+2. Using the PARIDE subsystem
+=============================
+
+While configuring the Linux kernel, you may choose either to build
+the PARIDE drivers into your kernel, or to build them as modules.
+
+In either case, you will need to select "Parallel port IDE device support"
+as well as at least one of the high-level drivers and at least one
+of the parallel port communication protocols.  If you do not know
+what kind of parallel port adapter is used in your drive, you could
+begin by checking the file names and any text files on your DOS
+installation floppy.  Alternatively, you can look at the markings on
+the adapter chip itself.  That's usually sufficient to identify the
+correct device.
+
+You can actually select all the protocol modules, and allow the PARIDE
+subsystem to try them all for you.
+
+For the "brand-name" products listed above, here are the protocol
+and high-level drivers that you would use:
+
+	================	============	======	========
+	Manufacturer		Model		Driver	Protocol
+	================	============	======	========
+	MicroSolutions		CD-ROM		pcd	bpck
+	MicroSolutions		PD drive	pf	bpck
+	MicroSolutions		hard-drive	pd	bpck
+	MicroSolutions          8000t tape      pt      bpck
+	SyQuest			EZ, SparQ	pd	epat
+	Imation			Superdisk	pf	epat
+	Maxell                  Superdisk       pf      friq
+	Avatar			Shark		pd	epat
+	FreeCom			CD-ROM		pcd	frpw
+	Hewlett-Packard		5GB Tape	pt	epat
+	Hewlett-Packard		7200e (CD)	pcd	epat
+	Hewlett-Packard		7200e (CD-R)	pg	epat
+	================	============	======	========
+
+2.1  Configuring built-in drivers
+---------------------------------
+
+We recommend that you get to know how the drivers work and how to
+configure them as loadable modules, before attempting to compile a
+kernel with the drivers built-in.
+
+If you built all of your PARIDE support directly into your kernel,
+and you have just a single parallel port IDE device, your kernel should
+locate it automatically for you.  If you have more than one device,
+you may need to give some command line options to your bootloader
+(eg: LILO), how to do that is beyond the scope of this document.
+
+The high-level drivers accept a number of command line parameters, all
+of which are documented in the source files in linux/drivers/block/paride.
+By default, each driver will automatically try all parallel ports it
+can find, and all protocol types that have been installed, until it finds
+a parallel port IDE adapter.  Once it finds one, the probe stops.  So,
+if you have more than one device, you will need to tell the drivers
+how to identify them.  This requires specifying the port address, the
+protocol identification number and, for some devices, the drive's
+chain ID.  While your system is booting, a number of messages are
+displayed on the console.  Like all such messages, they can be
+reviewed with the 'dmesg' command.  Among those messages will be
+some lines like::
+
+	paride: bpck registered as protocol 0
+	paride: epat registered as protocol 1
+
+The numbers will always be the same until you build a new kernel with
+different protocol selections.  You should note these numbers as you
+will need them to identify the devices.
+
+If you happen to be using a MicroSolutions backpack device, you will
+also need to know the unit ID number for each drive.  This is usually
+the last two digits of the drive's serial number (but read MicroSolutions'
+documentation about this).
+
+As an example, let's assume that you have a MicroSolutions PD/CD drive
+with unit ID number 36 connected to the parallel port at 0x378, a SyQuest
+EZ-135 connected to the chained port on the PD/CD drive and also an
+Imation Superdisk connected to port 0x278.  You could give the following
+options on your boot command::
+
+	pd.drive0=0x378,1 pf.drive0=0x278,1 pf.drive1=0x378,0,36
+
+In the last option, pf.drive1 configures device /dev/pf1, the 0x378
+is the parallel port base address, the 0 is the protocol registration
+number and 36 is the chain ID.
+
+Please note:  while PARIDE will work both with and without the
+PARPORT parallel port sharing system that is included by the
+"Parallel port support" option, PARPORT must be included and enabled
+if you want to use chains of devices on the same parallel port.
+
+2.2  Loading and configuring PARIDE as modules
+----------------------------------------------
+
+It is much faster and simpler to get to understand the PARIDE drivers
+if you use them as loadable kernel modules.
+
+Note 1:
+	using these drivers with the "kerneld" automatic module loading
+	system is not recommended for beginners, and is not documented here.
+
+Note 2:
+	if you build PARPORT support as a loadable module, PARIDE must
+	also be built as loadable modules, and PARPORT must be loaded before
+	the PARIDE modules.
+
+To use PARIDE, you must begin by::
+
+	insmod paride
+
+this loads a base module which provides a registry for the protocols,
+among other tasks.
+
+Then, load as many of the protocol modules as you think you might need.
+As you load each module, it will register the protocols that it supports,
+and print a log message to your kernel log file and your console. For
+example::
+
+	# insmod epat
+	paride: epat registered as protocol 0
+	# insmod kbic
+	paride: k951 registered as protocol 1
+        paride: k971 registered as protocol 2
+
+Finally, you can load high-level drivers for each kind of device that
+you have connected.  By default, each driver will autoprobe for a single
+device, but you can support up to four similar devices by giving their
+individual co-ordinates when you load the driver.
+
+For example, if you had two no-name CD-ROM drives both using the
+KingByte KBIC-951A adapter, one on port 0x378 and the other on 0x3bc
+you could give the following command::
+
+	# insmod pcd drive0=0x378,1 drive1=0x3bc,1
+
+For most adapters, giving a port address and protocol number is sufficient,
+but check the source files in linux/drivers/block/paride for more
+information.  (Hopefully someone will write some man pages one day !).
+
+As another example, here's what happens when PARPORT is installed, and
+a SyQuest EZ-135 is attached to port 0x378::
+
+	# insmod paride
+	paride: version 1.0 installed
+	# insmod epat
+	paride: epat registered as protocol 0
+	# insmod pd
+	pd: pd version 1.0, major 45, cluster 64, nice 0
+	pda: Sharing parport1 at 0x378
+	pda: epat 1.0, Shuttle EPAT chip c3 at 0x378, mode 5 (EPP-32), delay 1
+	pda: SyQuest EZ135A, 262144 blocks [128M], (512/16/32), removable media
+	 pda: pda1
+
+Note that the last line is the output from the generic partition table
+scanner - in this case it reports that it has found a disk with one partition.
+
+2.3  Using a PARIDE device
+--------------------------
+
+Once the drivers have been loaded, you can access PARIDE devices in the
+same way as their traditional counterparts.  You will probably need to
+create the device "special files".  Here is a simple script that you can
+cut to a file and execute::
+
+  #!/bin/bash
+  #
+  # mkd -- a script to create the device special files for the PARIDE subsystem
+  #
+  function mkdev {
+    mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1
+  }
+  #
+  function pd {
+    D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) )
+    mkdev pd$D b 45 $[ $1 * 16 ]
+    for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+    do mkdev pd$D$P b 45 $[ $1 * 16 + $P ]
+    done
+  }
+  #
+  cd /dev
+  #
+  for u in 0 1 2 3 ; do pd $u ; done
+  for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done
+  for u in 0 1 2 3 ; do mkdev pf$u  b 47 $u ; done
+  for u in 0 1 2 3 ; do mkdev pt$u  c 96 $u ; done
+  for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done
+  for u in 0 1 2 3 ; do mkdev pg$u  c 97 $u ; done
+  #
+  # end of mkd
+
+With the device files and drivers in place, you can access PARIDE devices
+like any other Linux device.   For example, to mount a CD-ROM in pcd0, use::
+
+	mount /dev/pcd0 /cdrom
+
+If you have a fresh Avatar Shark cartridge, and the drive is pda, you
+might do something like::
+
+	fdisk /dev/pda		-- make a new partition table with
+				   partition 1 of type 83
+
+	mke2fs /dev/pda1	-- to build the file system
+
+	mkdir /shark		-- make a place to mount the disk
+
+	mount /dev/pda1 /shark
+
+Devices like the Imation superdisk work in the same way, except that
+they do not have a partition table.  For example to make a 120MB
+floppy that you could share with a DOS system::
+
+	mkdosfs /dev/pf0
+	mount /dev/pf0 /mnt
+
+
+2.4  The pf driver
+------------------
+
+The pf driver is intended for use with parallel port ATAPI disk
+devices.  The most common devices in this category are PD drives
+and LS-120 drives.  Traditionally, media for these devices are not
+partitioned.  Consequently, the pf driver does not support partitioned
+media.  This may be changed in a future version of the driver.
+
+2.5  Using the pt driver
+------------------------
+
+The pt driver for parallel port ATAPI tape drives is a minimal driver.
+It does not yet support many of the standard tape ioctl operations.
+For best performance, a block size of 32KB should be used.  You will
+probably want to set the parallel port delay to 0, if you can.
+
+2.6  Using the pg driver
+------------------------
+
+The pg driver can be used in conjunction with the cdrecord program
+to create CD-ROMs.  Please get cdrecord version 1.6.1 or later
+from ftp://ftp.fokus.gmd.de/pub/unix/cdrecord/ .  To record CD-R media
+your parallel port should ideally be set to EPP mode, and the "port delay"
+should be set to 0.  With those settings it is possible to record at 2x
+speed without any buffer underruns.  If you cannot get the driver to work
+in EPP mode, try to use "bidirectional" or "PS/2" mode and 1x speeds only.
+
+
+3. Troubleshooting
+==================
+
+3.1  Use EPP mode if you can
+----------------------------
+
+The most common problems that people report with the PARIDE drivers
+concern the parallel port CMOS settings.  At this time, none of the
+PARIDE protocol modules support ECP mode, or any ECP combination modes.
+If you are able to do so, please set your parallel port into EPP mode
+using your CMOS setup procedure.
+
+3.2  Check the port delay
+-------------------------
+
+Some parallel ports cannot reliably transfer data at full speed.  To
+offset the errors, the PARIDE protocol modules introduce a "port
+delay" between each access to the i/o ports.  Each protocol sets
+a default value for this delay.  In most cases, the user can override
+the default and set it to 0 - resulting in somewhat higher transfer
+rates.  In some rare cases (especially with older 486 systems) the
+default delays are not long enough.  if you experience corrupt data
+transfers, or unexpected failures, you may wish to increase the
+port delay.   The delay can be programmed using the "driveN" parameters
+to each of the high-level drivers.  Please see the notes above, or
+read the comments at the beginning of the driver source files in
+linux/drivers/block/paride.
+
+3.3  Some drives need a printer reset
+-------------------------------------
+
+There appear to be a number of "noname" external drives on the market
+that do not always power up correctly.  We have noticed this with some
+drives based on OnSpec and older Freecom adapters.  In these rare cases,
+the adapter can often be reinitialised by issuing a "printer reset" on
+the parallel port.  As the reset operation is potentially disruptive in
+multiple device environments, the PARIDE drivers will not do it
+automatically.  You can however, force a printer reset by doing::
+
+	insmod lp reset=1
+	rmmod lp
+
+If you have one of these marginal cases, you should probably build
+your paride drivers as modules, and arrange to do the printer reset
+before loading the PARIDE drivers.
+
+3.4  Use the verbose option and dmesg if you need help
+------------------------------------------------------
+
+While a lot of testing has gone into these drivers to make them work
+as smoothly as possible, problems will arise.  If you do have problems,
+please check all the obvious things first:  does the drive work in
+DOS with the manufacturer's drivers ?  If that doesn't yield any useful
+clues, then please make sure that only one drive is hooked to your system,
+and that either (a) PARPORT is enabled or (b) no other device driver
+is using your parallel port (check in /proc/ioports).  Then, load the
+appropriate drivers (you can load several protocol modules if you want)
+as in::
+
+	# insmod paride
+	# insmod epat
+	# insmod bpck
+	# insmod kbic
+	...
+	# insmod pd verbose=1
+
+(using the correct driver for the type of device you have, of course).
+The verbose=1 parameter will cause the drivers to log a trace of their
+activity as they attempt to locate your drive.
+
+Use 'dmesg' to capture a log of all the PARIDE messages (any messages
+beginning with paride:, a protocol module's name or a driver's name) and
+include that with your bug report.  You can submit a bug report in one
+of two ways.  Either send it directly to the author of the PARIDE suite,
+by e-mail to grant@torque.net, or join the linux-parport mailing list
+and post your report there.
+
+3.5  For more information or help
+---------------------------------
+
+You can join the linux-parport mailing list by sending a mail message
+to:
+
+		linux-parport-request@torque.net
+
+with the single word::
+
+		subscribe
+
+in the body of the mail message (not in the subject line).   Please be
+sure that your mail program is correctly set up when you do this,  as
+the list manager is a robot that will subscribe you using the reply
+address in your mail headers.  REMOVE any anti-spam gimmicks you may
+have in your mail headers, when sending mail to the list server.
+
+You might also find some useful information on the linux-parport
+web pages (although they are not always up to date) at
+
+	http://web.archive.org/web/%2E/http://www.torque.net/parport/
diff --git a/Documentation/blockdev/paride.txt b/Documentation/blockdev/paride.txt
deleted file mode 100644
index ee6717e3771d..000000000000
--- a/Documentation/blockdev/paride.txt
+++ /dev/null
@@ -1,417 +0,0 @@
-
-		Linux and parallel port IDE devices
-
-PARIDE v1.03   (c) 1997-8  Grant Guenther <grant@torque.net>
-
-1. Introduction
-
-Owing to the simplicity and near universality of the parallel port interface
-to personal computers, many external devices such as portable hard-disk,
-CD-ROM, LS-120 and tape drives use the parallel port to connect to their
-host computer.  While some devices (notably scanners) use ad-hoc methods
-to pass commands and data through the parallel port interface, most 
-external devices are actually identical to an internal model, but with
-a parallel-port adapter chip added in.  Some of the original parallel port
-adapters were little more than mechanisms for multiplexing a SCSI bus.
-(The Iomega PPA-3 adapter used in the ZIP drives is an example of this
-approach).  Most current designs, however, take a different approach.
-The adapter chip reproduces a small ISA or IDE bus in the external device
-and the communication protocol provides operations for reading and writing
-device registers, as well as data block transfer functions.  Sometimes,
-the device being addressed via the parallel cable is a standard SCSI
-controller like an NCR 5380.  The "ditto" family of external tape
-drives use the ISA replicator to interface a floppy disk controller,
-which is then connected to a floppy-tape mechanism.  The vast majority
-of external parallel port devices, however, are now based on standard
-IDE type devices, which require no intermediate controller.  If one
-were to open up a parallel port CD-ROM drive, for instance, one would
-find a standard ATAPI CD-ROM drive, a power supply, and a single adapter
-that interconnected a standard PC parallel port cable and a standard
-IDE cable.  It is usually possible to exchange the CD-ROM device with
-any other device using the IDE interface. 
-
-The document describes the support in Linux for parallel port IDE
-devices.  It does not cover parallel port SCSI devices, "ditto" tape
-drives or scanners.  Many different devices are supported by the 
-parallel port IDE subsystem, including:
-
-	MicroSolutions backpack CD-ROM
-	MicroSolutions backpack PD/CD
-	MicroSolutions backpack hard-drives
-	MicroSolutions backpack 8000t tape drive
-	SyQuest EZ-135, EZ-230 & SparQ drives
-	Avatar Shark
-	Imation Superdisk LS-120
-	Maxell Superdisk LS-120
-	FreeCom Power CD 
-	Hewlett-Packard 5GB and 8GB tape drives
-	Hewlett-Packard 7100 and 7200 CD-RW drives
-
-as well as most of the clone and no-name products on the market.
-
-To support such a wide range of devices, PARIDE, the parallel port IDE
-subsystem, is actually structured in three parts.   There is a base
-paride module which provides a registry and some common methods for
-accessing the parallel ports.  The second component is a set of 
-high-level drivers for each of the different types of supported devices: 
-
-	pd	IDE disk
-	pcd	ATAPI CD-ROM
-	pf	ATAPI disk
-	pt	ATAPI tape
-	pg	ATAPI generic
-
-(Currently, the pg driver is only used with CD-R drives).
-
-The high-level drivers function according to the relevant standards.
-The third component of PARIDE is a set of low-level protocol drivers
-for each of the parallel port IDE adapter chips.  Thanks to the interest
-and encouragement of Linux users from many parts of the world, 
-support is available for almost all known adapter protocols:
-
-        aten    ATEN EH-100                            (HK)
-        bpck    Microsolutions backpack                (US)
-        comm    DataStor (old-type) "commuter" adapter (TW)
-        dstr    DataStor EP-2000                       (TW)
-        epat    Shuttle EPAT                           (UK)
-        epia    Shuttle EPIA                           (UK)
-	fit2    FIT TD-2000			       (US)
-	fit3    FIT TD-3000			       (US)
-	friq    Freecom IQ cable                       (DE)
-        frpw    Freecom Power                          (DE)
-        kbic    KingByte KBIC-951A and KBIC-971A       (TW)
-	ktti    KT Technology PHd adapter              (SG)
-        on20    OnSpec 90c20                           (US)
-        on26    OnSpec 90c26                           (US)
-
-
-2. Using the PARIDE subsystem
-
-While configuring the Linux kernel, you may choose either to build
-the PARIDE drivers into your kernel, or to build them as modules.
-
-In either case, you will need to select "Parallel port IDE device support"
-as well as at least one of the high-level drivers and at least one
-of the parallel port communication protocols.  If you do not know
-what kind of parallel port adapter is used in your drive, you could
-begin by checking the file names and any text files on your DOS 
-installation floppy.  Alternatively, you can look at the markings on
-the adapter chip itself.  That's usually sufficient to identify the
-correct device.  
-
-You can actually select all the protocol modules, and allow the PARIDE
-subsystem to try them all for you.
-
-For the "brand-name" products listed above, here are the protocol
-and high-level drivers that you would use:
-
-	Manufacturer		Model		Driver	Protocol
-	
-	MicroSolutions		CD-ROM		pcd	bpck
-	MicroSolutions		PD drive	pf	bpck
-	MicroSolutions		hard-drive	pd	bpck
-	MicroSolutions          8000t tape      pt      bpck
-	SyQuest			EZ, SparQ	pd	epat
-	Imation			Superdisk	pf	epat
-	Maxell                  Superdisk       pf      friq
-	Avatar			Shark		pd	epat
-	FreeCom			CD-ROM		pcd	frpw
-	Hewlett-Packard		5GB Tape	pt	epat
-	Hewlett-Packard		7200e (CD)	pcd	epat
-	Hewlett-Packard		7200e (CD-R)	pg	epat
-
-2.1  Configuring built-in drivers
-
-We recommend that you get to know how the drivers work and how to
-configure them as loadable modules, before attempting to compile a
-kernel with the drivers built-in.
-
-If you built all of your PARIDE support directly into your kernel,
-and you have just a single parallel port IDE device, your kernel should
-locate it automatically for you.  If you have more than one device,
-you may need to give some command line options to your bootloader
-(eg: LILO), how to do that is beyond the scope of this document.
-
-The high-level drivers accept a number of command line parameters, all
-of which are documented in the source files in linux/drivers/block/paride.
-By default, each driver will automatically try all parallel ports it
-can find, and all protocol types that have been installed, until it finds
-a parallel port IDE adapter.  Once it finds one, the probe stops.  So,
-if you have more than one device, you will need to tell the drivers
-how to identify them.  This requires specifying the port address, the
-protocol identification number and, for some devices, the drive's
-chain ID.  While your system is booting, a number of messages are
-displayed on the console.  Like all such messages, they can be
-reviewed with the 'dmesg' command.  Among those messages will be
-some lines like:
-
-	paride: bpck registered as protocol 0
-	paride: epat registered as protocol 1
-
-The numbers will always be the same until you build a new kernel with
-different protocol selections.  You should note these numbers as you
-will need them to identify the devices.
-
-If you happen to be using a MicroSolutions backpack device, you will
-also need to know the unit ID number for each drive.  This is usually
-the last two digits of the drive's serial number (but read MicroSolutions'
-documentation about this).
-
-As an example, let's assume that you have a MicroSolutions PD/CD drive
-with unit ID number 36 connected to the parallel port at 0x378, a SyQuest 
-EZ-135 connected to the chained port on the PD/CD drive and also an 
-Imation Superdisk connected to port 0x278.  You could give the following 
-options on your boot command:
-
-	pd.drive0=0x378,1 pf.drive0=0x278,1 pf.drive1=0x378,0,36
-
-In the last option, pf.drive1 configures device /dev/pf1, the 0x378
-is the parallel port base address, the 0 is the protocol registration
-number and 36 is the chain ID.
-
-Please note:  while PARIDE will work both with and without the 
-PARPORT parallel port sharing system that is included by the
-"Parallel port support" option, PARPORT must be included and enabled
-if you want to use chains of devices on the same parallel port.
-
-2.2  Loading and configuring PARIDE as modules
-
-It is much faster and simpler to get to understand the PARIDE drivers
-if you use them as loadable kernel modules.   
-
-Note 1:  using these drivers with the "kerneld" automatic module loading
-system is not recommended for beginners, and is not documented here.  
-
-Note 2:  if you build PARPORT support as a loadable module, PARIDE must
-also be built as loadable modules, and PARPORT must be loaded before the
-PARIDE modules.
-
-To use PARIDE, you must begin by 
-
-	insmod paride
-
-this loads a base module which provides a registry for the protocols,
-among other tasks.
-
-Then, load as many of the protocol modules as you think you might need.
-As you load each module, it will register the protocols that it supports,
-and print a log message to your kernel log file and your console. For 
-example:
-
-	# insmod epat
-	paride: epat registered as protocol 0
-	# insmod kbic
-	paride: k951 registered as protocol 1
-        paride: k971 registered as protocol 2
-
-Finally, you can load high-level drivers for each kind of device that
-you have connected.  By default, each driver will autoprobe for a single 
-device, but you can support up to four similar devices by giving their
-individual co-ordinates when you load the driver.
-
-For example, if you had two no-name CD-ROM drives both using the
-KingByte KBIC-951A adapter, one on port 0x378 and the other on 0x3bc
-you could give the following command:
-
-	# insmod pcd drive0=0x378,1 drive1=0x3bc,1
-
-For most adapters, giving a port address and protocol number is sufficient,
-but check the source files in linux/drivers/block/paride for more 
-information.  (Hopefully someone will write some man pages one day !).
-
-As another example, here's what happens when PARPORT is installed, and
-a SyQuest EZ-135 is attached to port 0x378:
-
-	# insmod paride
-	paride: version 1.0 installed
-	# insmod epat
-	paride: epat registered as protocol 0
-	# insmod pd
-	pd: pd version 1.0, major 45, cluster 64, nice 0
-	pda: Sharing parport1 at 0x378
-	pda: epat 1.0, Shuttle EPAT chip c3 at 0x378, mode 5 (EPP-32), delay 1
-	pda: SyQuest EZ135A, 262144 blocks [128M], (512/16/32), removable media
-	 pda: pda1
-
-Note that the last line is the output from the generic partition table
-scanner - in this case it reports that it has found a disk with one partition.
-
-2.3  Using a PARIDE device
-
-Once the drivers have been loaded, you can access PARIDE devices in the
-same way as their traditional counterparts.  You will probably need to
-create the device "special files".  Here is a simple script that you can
-cut to a file and execute:
-
-#!/bin/bash
-#
-# mkd -- a script to create the device special files for the PARIDE subsystem
-#
-function mkdev {
-  mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1
-}
-#
-function pd {
-  D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) )
-  mkdev pd$D b 45 $[ $1 * 16 ]
-  for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
-  do mkdev pd$D$P b 45 $[ $1 * 16 + $P ]
-  done
-}
-#
-cd /dev
-#
-for u in 0 1 2 3 ; do pd $u ; done
-for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done 
-for u in 0 1 2 3 ; do mkdev pf$u  b 47 $u ; done 
-for u in 0 1 2 3 ; do mkdev pt$u  c 96 $u ; done 
-for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done 
-for u in 0 1 2 3 ; do mkdev pg$u  c 97 $u ; done 
-#
-# end of mkd
-
-With the device files and drivers in place, you can access PARIDE devices
-like any other Linux device.   For example, to mount a CD-ROM in pcd0, use:
-
-	mount /dev/pcd0 /cdrom
-
-If you have a fresh Avatar Shark cartridge, and the drive is pda, you
-might do something like:
-
-	fdisk /dev/pda		-- make a new partition table with
-				   partition 1 of type 83
-
-	mke2fs /dev/pda1	-- to build the file system
-
-	mkdir /shark		-- make a place to mount the disk
-
-	mount /dev/pda1 /shark
-
-Devices like the Imation superdisk work in the same way, except that
-they do not have a partition table.  For example to make a 120MB
-floppy that you could share with a DOS system:
-
-	mkdosfs /dev/pf0
-	mount /dev/pf0 /mnt
-
-
-2.4  The pf driver
-
-The pf driver is intended for use with parallel port ATAPI disk
-devices.  The most common devices in this category are PD drives
-and LS-120 drives.  Traditionally, media for these devices are not
-partitioned.  Consequently, the pf driver does not support partitioned
-media.  This may be changed in a future version of the driver. 
-
-2.5  Using the pt driver
-
-The pt driver for parallel port ATAPI tape drives is a minimal driver.
-It does not yet support many of the standard tape ioctl operations. 
-For best performance, a block size of 32KB should be used.  You will
-probably want to set the parallel port delay to 0, if you can.
-
-2.6  Using the pg driver
-
-The pg driver can be used in conjunction with the cdrecord program
-to create CD-ROMs.  Please get cdrecord version 1.6.1 or later
-from ftp://ftp.fokus.gmd.de/pub/unix/cdrecord/ .  To record CD-R media 
-your parallel port should ideally be set to EPP mode, and the "port delay" 
-should be set to 0.  With those settings it is possible to record at 2x 
-speed without any buffer underruns.  If you cannot get the driver to work
-in EPP mode, try to use "bidirectional" or "PS/2" mode and 1x speeds only.
-
-
-3. Troubleshooting
-
-3.1  Use EPP mode if you can
-
-The most common problems that people report with the PARIDE drivers
-concern the parallel port CMOS settings.  At this time, none of the
-PARIDE protocol modules support ECP mode, or any ECP combination modes.
-If you are able to do so, please set your parallel port into EPP mode
-using your CMOS setup procedure.
-
-3.2  Check the port delay
-
-Some parallel ports cannot reliably transfer data at full speed.  To
-offset the errors, the PARIDE protocol modules introduce a "port
-delay" between each access to the i/o ports.  Each protocol sets
-a default value for this delay.  In most cases, the user can override
-the default and set it to 0 - resulting in somewhat higher transfer
-rates.  In some rare cases (especially with older 486 systems) the
-default delays are not long enough.  if you experience corrupt data
-transfers, or unexpected failures, you may wish to increase the
-port delay.   The delay can be programmed using the "driveN" parameters
-to each of the high-level drivers.  Please see the notes above, or
-read the comments at the beginning of the driver source files in
-linux/drivers/block/paride.
-
-3.3  Some drives need a printer reset
-
-There appear to be a number of "noname" external drives on the market
-that do not always power up correctly.  We have noticed this with some
-drives based on OnSpec and older Freecom adapters.  In these rare cases,
-the adapter can often be reinitialised by issuing a "printer reset" on
-the parallel port.  As the reset operation is potentially disruptive in 
-multiple device environments, the PARIDE drivers will not do it 
-automatically.  You can however, force a printer reset by doing:
-
-	insmod lp reset=1
-	rmmod lp
-
-If you have one of these marginal cases, you should probably build
-your paride drivers as modules, and arrange to do the printer reset
-before loading the PARIDE drivers. 
-
-3.4  Use the verbose option and dmesg if you need help
-
-While a lot of testing has gone into these drivers to make them work
-as smoothly as possible, problems will arise.  If you do have problems,
-please check all the obvious things first:  does the drive work in
-DOS with the manufacturer's drivers ?  If that doesn't yield any useful
-clues, then please make sure that only one drive is hooked to your system,
-and that either (a) PARPORT is enabled or (b) no other device driver
-is using your parallel port (check in /proc/ioports).  Then, load the
-appropriate drivers (you can load several protocol modules if you want)
-as in:
-
-	# insmod paride
-	# insmod epat
-	# insmod bpck
-	# insmod kbic
-	...
-	# insmod pd verbose=1
-
-(using the correct driver for the type of device you have, of course).
-The verbose=1 parameter will cause the drivers to log a trace of their
-activity as they attempt to locate your drive.
-
-Use 'dmesg' to capture a log of all the PARIDE messages (any messages
-beginning with paride:, a protocol module's name or a driver's name) and
-include that with your bug report.  You can submit a bug report in one
-of two ways.  Either send it directly to the author of the PARIDE suite,
-by e-mail to grant@torque.net, or join the linux-parport mailing list
-and post your report there.
-
-3.5  For more information or help
-
-You can join the linux-parport mailing list by sending a mail message
-to 
-		linux-parport-request@torque.net
-
-with the single word 
-
-		subscribe
-
-in the body of the mail message (not in the subject line).   Please be
-sure that your mail program is correctly set up when you do this,  as
-the list manager is a robot that will subscribe you using the reply
-address in your mail headers.  REMOVE any anti-spam gimmicks you may
-have in your mail headers, when sending mail to the list server.
-
-You might also find some useful information on the linux-parport
-web pages (although they are not always up to date) at
-
-	http://web.archive.org/web/*/http://www.torque.net/parport/
-
-
diff --git a/Documentation/blockdev/ramdisk.rst b/Documentation/blockdev/ramdisk.rst
new file mode 100644
index 000000000000..b7c2268f8dec
--- /dev/null
+++ b/Documentation/blockdev/ramdisk.rst
@@ -0,0 +1,177 @@
+==========================================
+Using the RAM disk block device with Linux
+==========================================
+
+.. Contents:
+
+	1) Overview
+	2) Kernel Command Line Parameters
+	3) Using "rdev -r"
+	4) An Example of Creating a Compressed RAM Disk
+
+
+1) Overview
+-----------
+
+The RAM disk driver is a way to use main system memory as a block device.  It
+is required for initrd, an initial filesystem used if you need to load modules
+in order to access the root filesystem (see Documentation/admin-guide/initrd.rst).  It can
+also be used for a temporary filesystem for crypto work, since the contents
+are erased on reboot.
+
+The RAM disk dynamically grows as more space is required. It does this by using
+RAM from the buffer cache. The driver marks the buffers it is using as dirty
+so that the VM subsystem does not try to reclaim them later.
+
+The RAM disk supports up to 16 RAM disks by default, and can be reconfigured
+to support an unlimited number of RAM disks (at your own risk).  Just change
+the configuration symbol BLK_DEV_RAM_COUNT in the Block drivers config menu
+and (re)build the kernel.
+
+To use RAM disk support with your system, run './MAKEDEV ram' from the /dev
+directory.  RAM disks are all major number 1, and start with minor number 0
+for /dev/ram0, etc.  If used, modern kernels use /dev/ram0 for an initrd.
+
+The new RAM disk also has the ability to load compressed RAM disk images,
+allowing one to squeeze more programs onto an average installation or
+rescue floppy disk.
+
+
+2) Parameters
+---------------------------------
+
+2a) Kernel Command Line Parameters
+
+	ramdisk_size=N
+		Size of the ramdisk.
+
+This parameter tells the RAM disk driver to set up RAM disks of N k size.  The
+default is 4096 (4 MB).
+
+2b) Module parameters
+
+	rd_nr
+		/dev/ramX devices created.
+
+	max_part
+		Maximum partition number.
+
+	rd_size
+		See ramdisk_size.
+
+3) Using "rdev -r"
+------------------
+
+The usage of the word (two bytes) that "rdev -r" sets in the kernel image is
+as follows. The low 11 bits (0 -> 10) specify an offset (in 1 k blocks) of up
+to 2 MB (2^11) of where to find the RAM disk (this used to be the size). Bit
+14 indicates that a RAM disk is to be loaded, and bit 15 indicates whether a
+prompt/wait sequence is to be given before trying to read the RAM disk. Since
+the RAM disk dynamically grows as data is being written into it, a size field
+is not required. Bits 11 to 13 are not currently used and may as well be zero.
+These numbers are no magical secrets, as seen below::
+
+  ./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK     0x07FF
+  ./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG          0x8000
+  ./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG            0x4000
+
+Consider a typical two floppy disk setup, where you will have the
+kernel on disk one, and have already put a RAM disk image onto disk #2.
+
+Hence you want to set bits 0 to 13 as 0, meaning that your RAM disk
+starts at an offset of 0 kB from the beginning of the floppy.
+The command line equivalent is: "ramdisk_start=0"
+
+You want bit 14 as one, indicating that a RAM disk is to be loaded.
+The command line equivalent is: "load_ramdisk=1"
+
+You want bit 15 as one, indicating that you want a prompt/keypress
+sequence so that you have a chance to switch floppy disks.
+The command line equivalent is: "prompt_ramdisk=1"
+
+Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word.
+So to create disk one of the set, you would do::
+
+	/usr/src/linux# cat arch/x86/boot/zImage > /dev/fd0
+	/usr/src/linux# rdev /dev/fd0 /dev/fd0
+	/usr/src/linux# rdev -r /dev/fd0 49152
+
+If you make a boot disk that has LILO, then for the above, you would use::
+
+	append = "ramdisk_start=0 load_ramdisk=1 prompt_ramdisk=1"
+
+Since the default start = 0 and the default prompt = 1, you could use::
+
+	append = "load_ramdisk=1"
+
+
+4) An Example of Creating a Compressed RAM Disk
+-----------------------------------------------
+
+To create a RAM disk image, you will need a spare block device to
+construct it on. This can be the RAM disk device itself, or an
+unused disk partition (such as an unmounted swap partition). For this
+example, we will use the RAM disk device, "/dev/ram0".
+
+Note: This technique should not be done on a machine with less than 8 MB
+of RAM. If using a spare disk partition instead of /dev/ram0, then this
+restriction does not apply.
+
+a) Decide on the RAM disk size that you want. Say 2 MB for this example.
+   Create it by writing to the RAM disk device. (This step is not currently
+   required, but may be in the future.) It is wise to zero out the
+   area (esp. for disks) so that maximal compression is achieved for
+   the unused blocks of the image that you are about to create::
+
+	dd if=/dev/zero of=/dev/ram0 bs=1k count=2048
+
+b) Make a filesystem on it. Say ext2fs for this example::
+
+	mke2fs -vm0 /dev/ram0 2048
+
+c) Mount it, copy the files you want to it (eg: /etc/* /dev/* ...)
+   and unmount it again.
+
+d) Compress the contents of the RAM disk. The level of compression
+   will be approximately 50% of the space used by the files. Unused
+   space on the RAM disk will compress to almost nothing::
+
+	dd if=/dev/ram0 bs=1k count=2048 | gzip -v9 > /tmp/ram_image.gz
+
+e) Put the kernel onto the floppy::
+
+	dd if=zImage of=/dev/fd0 bs=1k
+
+f) Put the RAM disk image onto the floppy, after the kernel. Use an offset
+   that is slightly larger than the kernel, so that you can put another
+   (possibly larger) kernel onto the same floppy later without overlapping
+   the RAM disk image. An offset of 400 kB for kernels about 350 kB in
+   size would be reasonable. Make sure offset+size of ram_image.gz is
+   not larger than the total space on your floppy (usually 1440 kB)::
+
+	dd if=/tmp/ram_image.gz of=/dev/fd0 bs=1k seek=400
+
+g) Use "rdev" to set the boot device, RAM disk offset, prompt flag, etc.
+   For prompt_ramdisk=1, load_ramdisk=1, ramdisk_start=400, one would
+   have 2^15 + 2^14 + 400 = 49552::
+
+	rdev /dev/fd0 /dev/fd0
+	rdev -r /dev/fd0 49552
+
+That is it. You now have your boot/root compressed RAM disk floppy. Some
+users may wish to combine steps (d) and (f) by using a pipe.
+
+
+						Paul Gortmaker 12/95
+
+Changelog:
+----------
+
+10-22-04 :
+		Updated to reflect changes in command line options, remove
+		obsolete references, general cleanup.
+		James Nelson (james4765@gmail.com)
+
+
+12-95 :
+		Original Document
diff --git a/Documentation/blockdev/ramdisk.txt b/Documentation/blockdev/ramdisk.txt
deleted file mode 100644
index 501e12e0323e..000000000000
--- a/Documentation/blockdev/ramdisk.txt
+++ /dev/null
@@ -1,174 +0,0 @@
-Using the RAM disk block device with Linux
-------------------------------------------
-
-Contents:
-
-	1) Overview
-	2) Kernel Command Line Parameters
-	3) Using "rdev -r"
-	4) An Example of Creating a Compressed RAM Disk
-
-
-1) Overview
------------
-
-The RAM disk driver is a way to use main system memory as a block device.  It
-is required for initrd, an initial filesystem used if you need to load modules
-in order to access the root filesystem (see Documentation/admin-guide/initrd.rst).  It can
-also be used for a temporary filesystem for crypto work, since the contents
-are erased on reboot.
-
-The RAM disk dynamically grows as more space is required. It does this by using
-RAM from the buffer cache. The driver marks the buffers it is using as dirty
-so that the VM subsystem does not try to reclaim them later.
-
-The RAM disk supports up to 16 RAM disks by default, and can be reconfigured
-to support an unlimited number of RAM disks (at your own risk).  Just change
-the configuration symbol BLK_DEV_RAM_COUNT in the Block drivers config menu
-and (re)build the kernel.
-
-To use RAM disk support with your system, run './MAKEDEV ram' from the /dev
-directory.  RAM disks are all major number 1, and start with minor number 0
-for /dev/ram0, etc.  If used, modern kernels use /dev/ram0 for an initrd.
-
-The new RAM disk also has the ability to load compressed RAM disk images,
-allowing one to squeeze more programs onto an average installation or
-rescue floppy disk.
-
-
-2) Parameters
----------------------------------
-
-2a) Kernel Command Line Parameters
-
-	ramdisk_size=N
-	==============
-
-This parameter tells the RAM disk driver to set up RAM disks of N k size.  The
-default is 4096 (4 MB).
-
-2b) Module parameters
-
-	rd_nr
-	=====
-	/dev/ramX devices created.
-
-	max_part
-	========
-	Maximum partition number.
-
-	rd_size
-	=======
-	See ramdisk_size.
-
-3) Using "rdev -r"
-------------------
-
-The usage of the word (two bytes) that "rdev -r" sets in the kernel image is
-as follows. The low 11 bits (0 -> 10) specify an offset (in 1 k blocks) of up
-to 2 MB (2^11) of where to find the RAM disk (this used to be the size). Bit
-14 indicates that a RAM disk is to be loaded, and bit 15 indicates whether a
-prompt/wait sequence is to be given before trying to read the RAM disk. Since
-the RAM disk dynamically grows as data is being written into it, a size field
-is not required. Bits 11 to 13 are not currently used and may as well be zero.
-These numbers are no magical secrets, as seen below:
-
-./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK     0x07FF
-./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG          0x8000
-./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG            0x4000
-
-Consider a typical two floppy disk setup, where you will have the
-kernel on disk one, and have already put a RAM disk image onto disk #2.
-
-Hence you want to set bits 0 to 13 as 0, meaning that your RAM disk
-starts at an offset of 0 kB from the beginning of the floppy.
-The command line equivalent is: "ramdisk_start=0"
-
-You want bit 14 as one, indicating that a RAM disk is to be loaded.
-The command line equivalent is: "load_ramdisk=1"
-
-You want bit 15 as one, indicating that you want a prompt/keypress
-sequence so that you have a chance to switch floppy disks.
-The command line equivalent is: "prompt_ramdisk=1"
-
-Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word.
-So to create disk one of the set, you would do:
-
-	/usr/src/linux# cat arch/x86/boot/zImage > /dev/fd0
-	/usr/src/linux# rdev /dev/fd0 /dev/fd0
-	/usr/src/linux# rdev -r /dev/fd0 49152
-
-If you make a boot disk that has LILO, then for the above, you would use:
-	append = "ramdisk_start=0 load_ramdisk=1 prompt_ramdisk=1"
-Since the default start = 0 and the default prompt = 1, you could use:
-	append = "load_ramdisk=1"
-
-
-4) An Example of Creating a Compressed RAM Disk
-----------------------------------------------
-
-To create a RAM disk image, you will need a spare block device to
-construct it on. This can be the RAM disk device itself, or an
-unused disk partition (such as an unmounted swap partition). For this
-example, we will use the RAM disk device, "/dev/ram0".
-
-Note: This technique should not be done on a machine with less than 8 MB
-of RAM. If using a spare disk partition instead of /dev/ram0, then this
-restriction does not apply.
-
-a) Decide on the RAM disk size that you want. Say 2 MB for this example.
-   Create it by writing to the RAM disk device. (This step is not currently
-   required, but may be in the future.) It is wise to zero out the
-   area (esp. for disks) so that maximal compression is achieved for
-   the unused blocks of the image that you are about to create.
-
-	dd if=/dev/zero of=/dev/ram0 bs=1k count=2048
-
-b) Make a filesystem on it. Say ext2fs for this example.
-
-	mke2fs -vm0 /dev/ram0 2048
-
-c) Mount it, copy the files you want to it (eg: /etc/* /dev/* ...)
-   and unmount it again.
-
-d) Compress the contents of the RAM disk. The level of compression
-   will be approximately 50% of the space used by the files. Unused
-   space on the RAM disk will compress to almost nothing.
-
-	dd if=/dev/ram0 bs=1k count=2048 | gzip -v9 > /tmp/ram_image.gz
-
-e) Put the kernel onto the floppy
-
-	dd if=zImage of=/dev/fd0 bs=1k
-
-f) Put the RAM disk image onto the floppy, after the kernel. Use an offset
-   that is slightly larger than the kernel, so that you can put another
-   (possibly larger) kernel onto the same floppy later without overlapping
-   the RAM disk image. An offset of 400 kB for kernels about 350 kB in
-   size would be reasonable. Make sure offset+size of ram_image.gz is
-   not larger than the total space on your floppy (usually 1440 kB).
-
-	dd if=/tmp/ram_image.gz of=/dev/fd0 bs=1k seek=400
-
-g) Use "rdev" to set the boot device, RAM disk offset, prompt flag, etc.
-   For prompt_ramdisk=1, load_ramdisk=1, ramdisk_start=400, one would
-   have 2^15 + 2^14 + 400 = 49552.
-
-	rdev /dev/fd0 /dev/fd0
-	rdev -r /dev/fd0 49552
-
-That is it. You now have your boot/root compressed RAM disk floppy. Some
-users may wish to combine steps (d) and (f) by using a pipe.
-
---------------------------------------------------------------------------
-						Paul Gortmaker 12/95
-
-Changelog:
-----------
-
-10-22-04 :	Updated to reflect changes in command line options, remove
-		obsolete references, general cleanup.
-		James Nelson (james4765@gmail.com)
-
-
-12-95 :		Original Document
diff --git a/Documentation/blockdev/zram.rst b/Documentation/blockdev/zram.rst
new file mode 100644
index 000000000000..2111231c9c0f
--- /dev/null
+++ b/Documentation/blockdev/zram.rst
@@ -0,0 +1,422 @@
+========================================
+zram: Compressed RAM based block devices
+========================================
+
+Introduction
+============
+
+The zram module creates RAM based block devices named /dev/zram<id>
+(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
+in memory itself. These disks allow very fast I/O and compression provides
+good amounts of memory savings. Some of the usecases include /tmp storage,
+use as swap disks, various caches under /var and maybe many more :)
+
+Statistics for individual zram devices are exported through sysfs nodes at
+/sys/block/zram<id>/
+
+Usage
+=====
+
+There are several ways to configure and manage zram device(-s):
+
+a) using zram and zram_control sysfs attributes
+b) using zramctl utility, provided by util-linux (util-linux@vger.kernel.org).
+
+In this document we will describe only 'manual' zram configuration steps,
+IOW, zram and zram_control sysfs attributes.
+
+In order to get a better idea about zramctl please consult util-linux
+documentation, zramctl man-page or `zramctl --help`. Please be informed
+that zram maintainers do not develop/maintain util-linux or zramctl, should
+you have any questions please contact util-linux@vger.kernel.org
+
+Following shows a typical sequence of steps for using zram.
+
+WARNING
+=======
+
+For the sake of simplicity we skip error checking parts in most of the
+examples below. However, it is your sole responsibility to handle errors.
+
+zram sysfs attributes always return negative values in case of errors.
+The list of possible return codes:
+
+========  =============================================================
+-EBUSY	  an attempt to modify an attribute that cannot be changed once
+	  the device has been initialised. Please reset device first;
+-ENOMEM	  zram was not able to allocate enough memory to fulfil your
+	  needs;
+-EINVAL	  invalid input has been provided.
+========  =============================================================
+
+If you use 'echo', the returned value that is changed by 'echo' utility,
+and, in general case, something like::
+
+	echo 3 > /sys/block/zram0/max_comp_streams
+	if [ $? -ne 0 ];
+		handle_error
+	fi
+
+should suffice.
+
+1) Load Module
+==============
+
+::
+
+	modprobe zram num_devices=4
+	This creates 4 devices: /dev/zram{0,1,2,3}
+
+num_devices parameter is optional and tells zram how many devices should be
+pre-created. Default: 1.
+
+2) Set max number of compression streams
+========================================
+
+Regardless the value passed to this attribute, ZRAM will always
+allocate multiple compression streams - one per online CPUs - thus
+allowing several concurrent compression operations. The number of
+allocated compression streams goes down when some of the CPUs
+become offline. There is no single-compression-stream mode anymore,
+unless you are running a UP system or has only 1 CPU online.
+
+To find out how many streams are currently available::
+
+	cat /sys/block/zram0/max_comp_streams
+
+3) Select compression algorithm
+===============================
+
+Using comp_algorithm device attribute one can see available and
+currently selected (shown in square brackets) compression algorithms,
+change selected compression algorithm (once the device is initialised
+there is no way to change compression algorithm).
+
+Examples::
+
+	#show supported compression algorithms
+	cat /sys/block/zram0/comp_algorithm
+	lzo [lz4]
+
+	#select lzo compression algorithm
+	echo lzo > /sys/block/zram0/comp_algorithm
+
+For the time being, the `comp_algorithm` content does not necessarily
+show every compression algorithm supported by the kernel. We keep this
+list primarily to simplify device configuration and one can configure
+a new device with a compression algorithm that is not listed in
+`comp_algorithm`. The thing is that, internally, ZRAM uses Crypto API
+and, if some of the algorithms were built as modules, it's impossible
+to list all of them using, for instance, /proc/crypto or any other
+method. This, however, has an advantage of permitting the usage of
+custom crypto compression modules (implementing S/W or H/W compression).
+
+4) Set Disksize
+===============
+
+Set disk size by writing the value to sysfs node 'disksize'.
+The value can be either in bytes or you can use mem suffixes.
+Examples::
+
+	# Initialize /dev/zram0 with 50MB disksize
+	echo $((50*1024*1024)) > /sys/block/zram0/disksize
+
+	# Using mem suffixes
+	echo 256K > /sys/block/zram0/disksize
+	echo 512M > /sys/block/zram0/disksize
+	echo 1G > /sys/block/zram0/disksize
+
+Note:
+There is little point creating a zram of greater than twice the size of memory
+since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
+size of the disk when not in use so a huge zram is wasteful.
+
+5) Set memory limit: Optional
+=============================
+
+Set memory limit by writing the value to sysfs node 'mem_limit'.
+The value can be either in bytes or you can use mem suffixes.
+In addition, you could change the value in runtime.
+Examples::
+
+	# limit /dev/zram0 with 50MB memory
+	echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
+
+	# Using mem suffixes
+	echo 256K > /sys/block/zram0/mem_limit
+	echo 512M > /sys/block/zram0/mem_limit
+	echo 1G > /sys/block/zram0/mem_limit
+
+	# To disable memory limit
+	echo 0 > /sys/block/zram0/mem_limit
+
+6) Activate
+===========
+
+::
+
+	mkswap /dev/zram0
+	swapon /dev/zram0
+
+	mkfs.ext4 /dev/zram1
+	mount /dev/zram1 /tmp
+
+7) Add/remove zram devices
+==========================
+
+zram provides a control interface, which enables dynamic (on-demand) device
+addition and removal.
+
+In order to add a new /dev/zramX device, perform read operation on hot_add
+attribute. This will return either new device's device id (meaning that you
+can use /dev/zram<id>) or error code.
+
+Example::
+
+	cat /sys/class/zram-control/hot_add
+	1
+
+To remove the existing /dev/zramX device (where X is a device id)
+execute::
+
+	echo X > /sys/class/zram-control/hot_remove
+
+8) Stats
+========
+
+Per-device statistics are exported as various nodes under /sys/block/zram<id>/
+
+A brief description of exported device attributes. For more details please
+read Documentation/ABI/testing/sysfs-block-zram.
+
+======================  ======  ===============================================
+Name            	access            description
+======================  ======  ===============================================
+disksize          	RW	show and set the device's disk size
+initstate         	RO	shows the initialization state of the device
+reset             	WO	trigger device reset
+mem_used_max      	WO	reset the `mem_used_max` counter (see later)
+mem_limit         	WO	specifies the maximum amount of memory ZRAM can
+				use to store the compressed data
+writeback_limit   	WO	specifies the maximum amount of write IO zram
+				can write out to backing device as 4KB unit
+writeback_limit_enable  RW	show and set writeback_limit feature
+max_comp_streams  	RW	the number of possible concurrent compress
+				operations
+comp_algorithm    	RW	show and change the compression algorithm
+compact           	WO	trigger memory compaction
+debug_stat        	RO	this file is used for zram debugging purposes
+backing_dev	  	RW	set up backend storage for zram to write out
+idle		  	WO	mark allocated slot as idle
+======================  ======  ===============================================
+
+
+User space is advised to use the following files to read the device statistics.
+
+File /sys/block/zram<id>/stat
+
+Represents block layer statistics. Read Documentation/block/stat.txt for
+details.
+
+File /sys/block/zram<id>/io_stat
+
+The stat file represents device's I/O statistics not accounted by block
+layer and, thus, not available in zram<id>/stat file. It consists of a
+single line of text and contains the following stats separated by
+whitespace:
+
+ =============    =============================================================
+ failed_reads     The number of failed reads
+ failed_writes    The number of failed writes
+ invalid_io       The number of non-page-size-aligned I/O requests
+ notify_free      Depending on device usage scenario it may account
+
+                  a) the number of pages freed because of swap slot free
+                     notifications
+                  b) the number of pages freed because of
+                     REQ_OP_DISCARD requests sent by bio. The former ones are
+                     sent to a swap block device when a swap slot is freed,
+                     which implies that this disk is being used as a swap disk.
+
+                  The latter ones are sent by filesystem mounted with
+                  discard option, whenever some data blocks are getting
+                  discarded.
+ =============    =============================================================
+
+File /sys/block/zram<id>/mm_stat
+
+The stat file represents device's mm statistics. It consists of a single
+line of text and contains the following stats separated by whitespace:
+
+ ================ =============================================================
+ orig_data_size   uncompressed size of data stored in this disk.
+		  This excludes same-element-filled pages (same_pages) since
+		  no memory is allocated for them.
+                  Unit: bytes
+ compr_data_size  compressed size of data stored in this disk
+ mem_used_total   the amount of memory allocated for this disk. This
+                  includes allocator fragmentation and metadata overhead,
+                  allocated for this disk. So, allocator space efficiency
+                  can be calculated using compr_data_size and this statistic.
+                  Unit: bytes
+ mem_limit        the maximum amount of memory ZRAM can use to store
+                  the compressed data
+ mem_used_max     the maximum amount of memory zram have consumed to
+                  store the data
+ same_pages       the number of same element filled pages written to this disk.
+                  No memory is allocated for such pages.
+ pages_compacted  the number of pages freed during compaction
+ huge_pages	  the number of incompressible pages
+ ================ =============================================================
+
+File /sys/block/zram<id>/bd_stat
+
+The stat file represents device's backing device statistics. It consists of
+a single line of text and contains the following stats separated by whitespace:
+
+ ============== =============================================================
+ bd_count	size of data written in backing device.
+		Unit: 4K bytes
+ bd_reads	the number of reads from backing device
+		Unit: 4K bytes
+ bd_writes	the number of writes to backing device
+		Unit: 4K bytes
+ ============== =============================================================
+
+9) Deactivate
+=============
+
+::
+
+	swapoff /dev/zram0
+	umount /dev/zram1
+
+10) Reset
+=========
+
+	Write any positive value to 'reset' sysfs node::
+
+		echo 1 > /sys/block/zram0/reset
+		echo 1 > /sys/block/zram1/reset
+
+	This frees all the memory allocated for the given device and
+	resets the disksize to zero. You must set the disksize again
+	before reusing the device.
+
+Optional Feature
+================
+
+writeback
+---------
+
+With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page
+to backing storage rather than keeping it in memory.
+To use the feature, admin should set up backing device via::
+
+	echo /dev/sda5 > /sys/block/zramX/backing_dev
+
+before disksize setting. It supports only partition at this moment.
+If admin want to use incompressible page writeback, they could do via::
+
+	echo huge > /sys/block/zramX/write
+
+To use idle page writeback, first, user need to declare zram pages
+as idle::
+
+	echo all > /sys/block/zramX/idle
+
+From now on, any pages on zram are idle pages. The idle mark
+will be removed until someone request access of the block.
+IOW, unless there is access request, those pages are still idle pages.
+
+Admin can request writeback of those idle pages at right timing via::
+
+	echo idle > /sys/block/zramX/writeback
+
+With the command, zram writeback idle pages from memory to the storage.
+
+If there are lots of write IO with flash device, potentially, it has
+flash wearout problem so that admin needs to design write limitation
+to guarantee storage health for entire product life.
+
+To overcome the concern, zram supports "writeback_limit" feature.
+The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
+any writeback. IOW, if admin want to apply writeback budget, he should
+enable writeback_limit_enable via::
+
+	$ echo 1 > /sys/block/zramX/writeback_limit_enable
+
+Once writeback_limit_enable is set, zram doesn't allow any writeback
+until admin set the budget via /sys/block/zramX/writeback_limit.
+
+(If admin doesn't enable writeback_limit_enable, writeback_limit's value
+assigned via /sys/block/zramX/writeback_limit is meaninless.)
+
+If admin want to limit writeback as per-day 400M, he could do it
+like below::
+
+	$ MB_SHIFT=20
+	$ 4K_SHIFT=12
+	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+		/sys/block/zram0/writeback_limit.
+	$ echo 1 > /sys/block/zram0/writeback_limit_enable
+
+If admin want to allow further write again once the bugdet is exausted,
+he could do it like below::
+
+	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+		/sys/block/zram0/writeback_limit
+
+If admin want to see remaining writeback budget since he set::
+
+	$ cat /sys/block/zramX/writeback_limit
+
+If admin want to disable writeback limit, he could do::
+
+	$ echo 0 > /sys/block/zramX/writeback_limit_enable
+
+The writeback_limit count will reset whenever you reset zram(e.g.,
+system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
+writeback happened until you reset the zram to allocate extra writeback
+budget in next setting is user's job.
+
+If admin want to measure writeback count in a certain period, he could
+know it via /sys/block/zram0/bd_stat's 3rd column.
+
+memory tracking
+===============
+
+With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
+zram block. It could be useful to catch cold or incompressible
+pages of the process with*pagemap.
+
+If you enable the feature, you could see block state via
+/sys/kernel/debug/zram/zram0/block_state". The output is as follows::
+
+	  300    75.033841 .wh.
+	  301    63.806904 s...
+	  302    63.806919 ..hi
+
+First column
+	zram's block index.
+Second column
+	access time since the system was booted
+Third column
+	state of the block:
+
+	s:
+		same page
+	w:
+		written page to backing store
+	h:
+		huge page
+	i:
+		idle page
+
+First line of above example says 300th block is accessed at 75.033841sec
+and the block's state is huge so it is written back to the backing
+storage. It's a debugging feature so anyone shouldn't rely on it to work
+properly.
+
+Nitin Gupta
+ngupta@vflare.org
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
deleted file mode 100644
index 4df0ce271085..000000000000
--- a/Documentation/blockdev/zram.txt
+++ /dev/null
@@ -1,355 +0,0 @@
-zram: Compressed RAM based block devices
-----------------------------------------
-
-* Introduction
-
-The zram module creates RAM based block devices named /dev/zram<id>
-(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
-in memory itself. These disks allow very fast I/O and compression provides
-good amounts of memory savings. Some of the usecases include /tmp storage,
-use as swap disks, various caches under /var and maybe many more :)
-
-Statistics for individual zram devices are exported through sysfs nodes at
-/sys/block/zram<id>/
-
-* Usage
-
-There are several ways to configure and manage zram device(-s):
-a) using zram and zram_control sysfs attributes
-b) using zramctl utility, provided by util-linux (util-linux@vger.kernel.org).
-
-In this document we will describe only 'manual' zram configuration steps,
-IOW, zram and zram_control sysfs attributes.
-
-In order to get a better idea about zramctl please consult util-linux
-documentation, zramctl man-page or `zramctl --help'. Please be informed
-that zram maintainers do not develop/maintain util-linux or zramctl, should
-you have any questions please contact util-linux@vger.kernel.org
-
-Following shows a typical sequence of steps for using zram.
-
-WARNING
-=======
-For the sake of simplicity we skip error checking parts in most of the
-examples below. However, it is your sole responsibility to handle errors.
-
-zram sysfs attributes always return negative values in case of errors.
-The list of possible return codes:
--EBUSY	-- an attempt to modify an attribute that cannot be changed once
-the device has been initialised. Please reset device first;
--ENOMEM	-- zram was not able to allocate enough memory to fulfil your
-needs;
--EINVAL	-- invalid input has been provided.
-
-If you use 'echo', the returned value that is changed by 'echo' utility,
-and, in general case, something like:
-
-	echo 3 > /sys/block/zram0/max_comp_streams
-	if [ $? -ne 0 ];
-		handle_error
-	fi
-
-should suffice.
-
-1) Load Module:
-	modprobe zram num_devices=4
-	This creates 4 devices: /dev/zram{0,1,2,3}
-
-num_devices parameter is optional and tells zram how many devices should be
-pre-created. Default: 1.
-
-2) Set max number of compression streams
-Regardless the value passed to this attribute, ZRAM will always
-allocate multiple compression streams - one per online CPUs - thus
-allowing several concurrent compression operations. The number of
-allocated compression streams goes down when some of the CPUs
-become offline. There is no single-compression-stream mode anymore,
-unless you are running a UP system or has only 1 CPU online.
-
-To find out how many streams are currently available:
-	cat /sys/block/zram0/max_comp_streams
-
-3) Select compression algorithm
-Using comp_algorithm device attribute one can see available and
-currently selected (shown in square brackets) compression algorithms,
-change selected compression algorithm (once the device is initialised
-there is no way to change compression algorithm).
-
-Examples:
-	#show supported compression algorithms
-	cat /sys/block/zram0/comp_algorithm
-	lzo [lz4]
-
-	#select lzo compression algorithm
-	echo lzo > /sys/block/zram0/comp_algorithm
-
-For the time being, the `comp_algorithm' content does not necessarily
-show every compression algorithm supported by the kernel. We keep this
-list primarily to simplify device configuration and one can configure
-a new device with a compression algorithm that is not listed in
-`comp_algorithm'. The thing is that, internally, ZRAM uses Crypto API
-and, if some of the algorithms were built as modules, it's impossible
-to list all of them using, for instance, /proc/crypto or any other
-method. This, however, has an advantage of permitting the usage of
-custom crypto compression modules (implementing S/W or H/W compression).
-
-4) Set Disksize
-Set disk size by writing the value to sysfs node 'disksize'.
-The value can be either in bytes or you can use mem suffixes.
-Examples:
-	# Initialize /dev/zram0 with 50MB disksize
-	echo $((50*1024*1024)) > /sys/block/zram0/disksize
-
-	# Using mem suffixes
-	echo 256K > /sys/block/zram0/disksize
-	echo 512M > /sys/block/zram0/disksize
-	echo 1G > /sys/block/zram0/disksize
-
-Note:
-There is little point creating a zram of greater than twice the size of memory
-since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
-size of the disk when not in use so a huge zram is wasteful.
-
-5) Set memory limit: Optional
-Set memory limit by writing the value to sysfs node 'mem_limit'.
-The value can be either in bytes or you can use mem suffixes.
-In addition, you could change the value in runtime.
-Examples:
-	# limit /dev/zram0 with 50MB memory
-	echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
-
-	# Using mem suffixes
-	echo 256K > /sys/block/zram0/mem_limit
-	echo 512M > /sys/block/zram0/mem_limit
-	echo 1G > /sys/block/zram0/mem_limit
-
-	# To disable memory limit
-	echo 0 > /sys/block/zram0/mem_limit
-
-6) Activate:
-	mkswap /dev/zram0
-	swapon /dev/zram0
-
-	mkfs.ext4 /dev/zram1
-	mount /dev/zram1 /tmp
-
-7) Add/remove zram devices
-
-zram provides a control interface, which enables dynamic (on-demand) device
-addition and removal.
-
-In order to add a new /dev/zramX device, perform read operation on hot_add
-attribute. This will return either new device's device id (meaning that you
-can use /dev/zram<id>) or error code.
-
-Example:
-	cat /sys/class/zram-control/hot_add
-	1
-
-To remove the existing /dev/zramX device (where X is a device id)
-execute
-	echo X > /sys/class/zram-control/hot_remove
-
-8) Stats:
-Per-device statistics are exported as various nodes under /sys/block/zram<id>/
-
-A brief description of exported device attributes. For more details please
-read Documentation/ABI/testing/sysfs-block-zram.
-
-Name            	access            description
-----            	------            -----------
-disksize          	RW	show and set the device's disk size
-initstate         	RO	shows the initialization state of the device
-reset             	WO	trigger device reset
-mem_used_max      	WO	reset the `mem_used_max' counter (see later)
-mem_limit         	WO	specifies the maximum amount of memory ZRAM can use
-				to store the compressed data
-writeback_limit   	WO	specifies the maximum amount of write IO zram can
-				write out to backing device as 4KB unit
-writeback_limit_enable  RW	show and set writeback_limit feature
-max_comp_streams  	RW	the number of possible concurrent compress operations
-comp_algorithm    	RW	show and change the compression algorithm
-compact           	WO	trigger memory compaction
-debug_stat        	RO	this file is used for zram debugging purposes
-backing_dev	  	RW	set up backend storage for zram to write out
-idle		  	WO	mark allocated slot as idle
-
-
-User space is advised to use the following files to read the device statistics.
-
-File /sys/block/zram<id>/stat
-
-Represents block layer statistics. Read Documentation/block/stat.txt for
-details.
-
-File /sys/block/zram<id>/io_stat
-
-The stat file represents device's I/O statistics not accounted by block
-layer and, thus, not available in zram<id>/stat file. It consists of a
-single line of text and contains the following stats separated by
-whitespace:
- failed_reads     the number of failed reads
- failed_writes    the number of failed writes
- invalid_io       the number of non-page-size-aligned I/O requests
- notify_free      Depending on device usage scenario it may account
-                  a) the number of pages freed because of swap slot free
-                  notifications or b) the number of pages freed because of
-                  REQ_OP_DISCARD requests sent by bio. The former ones are
-                  sent to a swap block device when a swap slot is freed,
-                  which implies that this disk is being used as a swap disk.
-                  The latter ones are sent by filesystem mounted with
-                  discard option, whenever some data blocks are getting
-                  discarded.
-
-File /sys/block/zram<id>/mm_stat
-
-The stat file represents device's mm statistics. It consists of a single
-line of text and contains the following stats separated by whitespace:
- orig_data_size   uncompressed size of data stored in this disk.
-		  This excludes same-element-filled pages (same_pages) since
-		  no memory is allocated for them.
-                  Unit: bytes
- compr_data_size  compressed size of data stored in this disk
- mem_used_total   the amount of memory allocated for this disk. This
-                  includes allocator fragmentation and metadata overhead,
-                  allocated for this disk. So, allocator space efficiency
-                  can be calculated using compr_data_size and this statistic.
-                  Unit: bytes
- mem_limit        the maximum amount of memory ZRAM can use to store
-                  the compressed data
- mem_used_max     the maximum amount of memory zram have consumed to
-                  store the data
- same_pages       the number of same element filled pages written to this disk.
-                  No memory is allocated for such pages.
- pages_compacted  the number of pages freed during compaction
- huge_pages	  the number of incompressible pages
-
-File /sys/block/zram<id>/bd_stat
-
-The stat file represents device's backing device statistics. It consists of
-a single line of text and contains the following stats separated by whitespace:
- bd_count	size of data written in backing device.
-		Unit: 4K bytes
- bd_reads	the number of reads from backing device
-		Unit: 4K bytes
- bd_writes	the number of writes to backing device
-		Unit: 4K bytes
-
-9) Deactivate:
-	swapoff /dev/zram0
-	umount /dev/zram1
-
-10) Reset:
-	Write any positive value to 'reset' sysfs node
-	echo 1 > /sys/block/zram0/reset
-	echo 1 > /sys/block/zram1/reset
-
-	This frees all the memory allocated for the given device and
-	resets the disksize to zero. You must set the disksize again
-	before reusing the device.
-
-* Optional Feature
-
-= writeback
-
-With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page
-to backing storage rather than keeping it in memory.
-To use the feature, admin should set up backing device via
-
-	"echo /dev/sda5 > /sys/block/zramX/backing_dev"
-
-before disksize setting. It supports only partition at this moment.
-If admin want to use incompressible page writeback, they could do via
-
-	"echo huge > /sys/block/zramX/write"
-
-To use idle page writeback, first, user need to declare zram pages
-as idle.
-
-	"echo all > /sys/block/zramX/idle"
-
-From now on, any pages on zram are idle pages. The idle mark
-will be removed until someone request access of the block.
-IOW, unless there is access request, those pages are still idle pages.
-
-Admin can request writeback of those idle pages at right timing via
-
-	"echo idle > /sys/block/zramX/writeback"
-
-With the command, zram writeback idle pages from memory to the storage.
-
-If there are lots of write IO with flash device, potentially, it has
-flash wearout problem so that admin needs to design write limitation
-to guarantee storage health for entire product life.
-
-To overcome the concern, zram supports "writeback_limit" feature.
-The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
-any writeback. IOW, if admin want to apply writeback budget, he should
-enable writeback_limit_enable via
-
-	$ echo 1 > /sys/block/zramX/writeback_limit_enable
-
-Once writeback_limit_enable is set, zram doesn't allow any writeback
-until admin set the budget via /sys/block/zramX/writeback_limit.
-
-(If admin doesn't enable writeback_limit_enable, writeback_limit's value
-assigned via /sys/block/zramX/writeback_limit is meaninless.)
-
-If admin want to limit writeback as per-day 400M, he could do it
-like below.
-
-	$ MB_SHIFT=20
-	$ 4K_SHIFT=12
-	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
-		/sys/block/zram0/writeback_limit.
-	$ echo 1 > /sys/block/zram0/writeback_limit_enable
-
-If admin want to allow further write again once the bugdet is exausted,
-he could do it like below
-
-	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
-		/sys/block/zram0/writeback_limit
-
-If admin want to see remaining writeback budget since he set,
-
-	$ cat /sys/block/zramX/writeback_limit
-
-If admin want to disable writeback limit, he could do
-
-	$ echo 0 > /sys/block/zramX/writeback_limit_enable
-
-The writeback_limit count will reset whenever you reset zram(e.g.,
-system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
-writeback happened until you reset the zram to allocate extra writeback
-budget in next setting is user's job.
-
-If admin want to measure writeback count in a certain period, he could
-know it via /sys/block/zram0/bd_stat's 3rd column.
-
-= memory tracking
-
-With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
-zram block. It could be useful to catch cold or incompressible
-pages of the process with*pagemap.
-If you enable the feature, you could see block state via
-/sys/kernel/debug/zram/zram0/block_state". The output is as follows,
-
-	  300    75.033841 .wh.
-	  301    63.806904 s...
-	  302    63.806919 ..hi
-
-First column is zram's block index.
-Second column is access time since the system was booted
-Third column is state of the block.
-(s: same page
-w: written page to backing store
-h: huge page
-i: idle page)
-
-First line of above example says 300th block is accessed at 75.033841sec
-and the block's state is huge so it is written back to the backing
-storage. It's a debugging feature so anyone shouldn't rely on it to work
-properly.
-
-Nitin Gupta
-ngupta@vflare.org
-- 
cgit 


From 6baec31591cee0f2f6d446abb81c828499a6ed23 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 17:35:40 -0300
Subject: docs: perf: convert to ReST

Rename the perf documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/perf/arm-ccn.rst       | 61 ++++++++++++++++++++++++++++++++++++
 Documentation/perf/arm-ccn.txt       | 59 ----------------------------------
 Documentation/perf/arm_dsu_pmu.rst   | 29 +++++++++++++++++
 Documentation/perf/arm_dsu_pmu.txt   | 28 -----------------
 Documentation/perf/hisi-pmu.rst      | 60 +++++++++++++++++++++++++++++++++++
 Documentation/perf/hisi-pmu.txt      | 53 -------------------------------
 Documentation/perf/index.rst         | 16 ++++++++++
 Documentation/perf/qcom_l2_pmu.rst   | 39 +++++++++++++++++++++++
 Documentation/perf/qcom_l2_pmu.txt   | 38 ----------------------
 Documentation/perf/qcom_l3_pmu.rst   | 26 +++++++++++++++
 Documentation/perf/qcom_l3_pmu.txt   | 25 ---------------
 Documentation/perf/thunderx2-pmu.rst | 42 +++++++++++++++++++++++++
 Documentation/perf/thunderx2-pmu.txt | 41 ------------------------
 Documentation/perf/xgene-pmu.rst     | 49 +++++++++++++++++++++++++++++
 Documentation/perf/xgene-pmu.txt     | 48 ----------------------------
 15 files changed, 322 insertions(+), 292 deletions(-)
 create mode 100644 Documentation/perf/arm-ccn.rst
 delete mode 100644 Documentation/perf/arm-ccn.txt
 create mode 100644 Documentation/perf/arm_dsu_pmu.rst
 delete mode 100644 Documentation/perf/arm_dsu_pmu.txt
 create mode 100644 Documentation/perf/hisi-pmu.rst
 delete mode 100644 Documentation/perf/hisi-pmu.txt
 create mode 100644 Documentation/perf/index.rst
 create mode 100644 Documentation/perf/qcom_l2_pmu.rst
 delete mode 100644 Documentation/perf/qcom_l2_pmu.txt
 create mode 100644 Documentation/perf/qcom_l3_pmu.rst
 delete mode 100644 Documentation/perf/qcom_l3_pmu.txt
 create mode 100644 Documentation/perf/thunderx2-pmu.rst
 delete mode 100644 Documentation/perf/thunderx2-pmu.txt
 create mode 100644 Documentation/perf/xgene-pmu.rst
 delete mode 100644 Documentation/perf/xgene-pmu.txt

(limited to 'Documentation')

diff --git a/Documentation/perf/arm-ccn.rst b/Documentation/perf/arm-ccn.rst
new file mode 100644
index 000000000000..832b0c64023a
--- /dev/null
+++ b/Documentation/perf/arm-ccn.rst
@@ -0,0 +1,61 @@
+==========================
+ARM Cache Coherent Network
+==========================
+
+CCN-504 is a ring-bus interconnect consisting of 11 crosspoints
+(XPs), with each crosspoint supporting up to two device ports,
+so nodes (devices) 0 and 1 are connected to crosspoint 0,
+nodes 2 and 3 to crosspoint 1 etc.
+
+PMU (perf) driver
+-----------------
+
+The CCN driver registers a perf PMU driver, which provides
+description of available events and configuration options
+in sysfs, see /sys/bus/event_source/devices/ccn*.
+
+The "format" directory describes format of the config, config1
+and config2 fields of the perf_event_attr structure. The "events"
+directory provides configuration templates for all documented
+events, that can be used with perf tool. For example "xp_valid_flit"
+is an equivalent of "type=0x8,event=0x4". Other parameters must be
+explicitly specified.
+
+For events originating from device, "node" defines its index.
+
+Crosspoint PMU events require "xp" (index), "bus" (bus number)
+and "vc" (virtual channel ID).
+
+Crosspoint watchpoint-based events (special "event" value 0xfe)
+require "xp" and "vc" as as above plus "port" (device port index),
+"dir" (transmit/receive direction), comparator values ("cmp_l"
+and "cmp_h") and "mask", being index of the comparator mask.
+
+Masks are defined separately from the event description
+(due to limited number of the config values) in the "cmp_mask"
+directory, with first 8 configurable by user and additional
+4 hardcoded for the most frequent use cases.
+
+Cycle counter is described by a "type" value 0xff and does
+not require any other settings.
+
+The driver also provides a "cpumask" sysfs attribute, which contains
+a single CPU ID, of the processor which will be used to handle all
+the CCN PMU events. It is recommended that the user space tools
+request the events on this processor (if not, the perf_event->cpu value
+will be overwritten anyway). In case of this processor being offlined,
+the events are migrated to another one and the attribute is updated.
+
+Example of perf tool use::
+
+  / # perf list | grep ccn
+    ccn/cycles/                                        [Kernel PMU event]
+  <...>
+    ccn/xp_valid_flit,xp=?,port=?,vc=?,dir=?/          [Kernel PMU event]
+  <...>
+
+  / # perf stat -a -e ccn/cycles/,ccn/xp_valid_flit,xp=1,port=0,vc=1,dir=1/ \
+                                                                         sleep 1
+
+The driver does not support sampling, therefore "perf record" will
+not work. Per-task (without "-a") perf sessions are not supported.
diff --git a/Documentation/perf/arm-ccn.txt b/Documentation/perf/arm-ccn.txt
deleted file mode 100644
index 15cdb7bc57c3..000000000000
--- a/Documentation/perf/arm-ccn.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-ARM Cache Coherent Network
-==========================
-
-CCN-504 is a ring-bus interconnect consisting of 11 crosspoints
-(XPs), with each crosspoint supporting up to two device ports,
-so nodes (devices) 0 and 1 are connected to crosspoint 0,
-nodes 2 and 3 to crosspoint 1 etc.
-
-PMU (perf) driver
------------------
-
-The CCN driver registers a perf PMU driver, which provides
-description of available events and configuration options
-in sysfs, see /sys/bus/event_source/devices/ccn*.
-
-The "format" directory describes format of the config, config1
-and config2 fields of the perf_event_attr structure. The "events"
-directory provides configuration templates for all documented
-events, that can be used with perf tool. For example "xp_valid_flit"
-is an equivalent of "type=0x8,event=0x4". Other parameters must be
-explicitly specified.
-
-For events originating from device, "node" defines its index.
-
-Crosspoint PMU events require "xp" (index), "bus" (bus number)
-and "vc" (virtual channel ID).
-
-Crosspoint watchpoint-based events (special "event" value 0xfe)
-require "xp" and "vc" as as above plus "port" (device port index),
-"dir" (transmit/receive direction), comparator values ("cmp_l"
-and "cmp_h") and "mask", being index of the comparator mask.
-Masks are defined separately from the event description
-(due to limited number of the config values) in the "cmp_mask"
-directory, with first 8 configurable by user and additional
-4 hardcoded for the most frequent use cases.
-
-Cycle counter is described by a "type" value 0xff and does
-not require any other settings.
-
-The driver also provides a "cpumask" sysfs attribute, which contains
-a single CPU ID, of the processor which will be used to handle all
-the CCN PMU events. It is recommended that the user space tools
-request the events on this processor (if not, the perf_event->cpu value
-will be overwritten anyway). In case of this processor being offlined,
-the events are migrated to another one and the attribute is updated.
-
-Example of perf tool use:
-
-/ # perf list | grep ccn
-  ccn/cycles/                                        [Kernel PMU event]
-<...>
-  ccn/xp_valid_flit,xp=?,port=?,vc=?,dir=?/          [Kernel PMU event]
-<...>
-
-/ # perf stat -a -e ccn/cycles/,ccn/xp_valid_flit,xp=1,port=0,vc=1,dir=1/ \
-                                                                       sleep 1
-
-The driver does not support sampling, therefore "perf record" will
-not work. Per-task (without "-a") perf sessions are not supported.
diff --git a/Documentation/perf/arm_dsu_pmu.rst b/Documentation/perf/arm_dsu_pmu.rst
new file mode 100644
index 000000000000..7fd34db75d13
--- /dev/null
+++ b/Documentation/perf/arm_dsu_pmu.rst
@@ -0,0 +1,29 @@
+==================================
+ARM DynamIQ Shared Unit (DSU) PMU
+==================================
+
+ARM DynamIQ Shared Unit integrates one or more cores with an L3 memory system,
+control logic and external interfaces to form a multicore cluster. The PMU
+allows counting the various events related to the L3 cache, Snoop Control Unit
+etc, using 32bit independent counters. It also provides a 64bit cycle counter.
+
+The PMU can only be accessed via CPU system registers and are common to the
+cores connected to the same DSU. Like most of the other uncore PMUs, DSU
+PMU doesn't support process specific events and cannot be used in sampling mode.
+
+The DSU provides a bitmap for a subset of implemented events via hardware
+registers. There is no way for the driver to determine if the other events
+are available or not. Hence the driver exposes only those events advertised
+by the DSU, in "events" directory under::
+
+  /sys/bus/event_sources/devices/arm_dsu_<N>/
+
+The user should refer to the TRM of the product to figure out the supported events
+and use the raw event code for the unlisted events.
+
+The driver also exposes the CPUs connected to the DSU instance in "associated_cpus".
+
+
+e.g usage::
+
+	perf stat -a -e arm_dsu_0/cycles/
diff --git a/Documentation/perf/arm_dsu_pmu.txt b/Documentation/perf/arm_dsu_pmu.txt
deleted file mode 100644
index d611e15f5add..000000000000
--- a/Documentation/perf/arm_dsu_pmu.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-ARM DynamIQ Shared Unit (DSU) PMU
-==================================
-
-ARM DynamIQ Shared Unit integrates one or more cores with an L3 memory system,
-control logic and external interfaces to form a multicore cluster. The PMU
-allows counting the various events related to the L3 cache, Snoop Control Unit
-etc, using 32bit independent counters. It also provides a 64bit cycle counter.
-
-The PMU can only be accessed via CPU system registers and are common to the
-cores connected to the same DSU. Like most of the other uncore PMUs, DSU
-PMU doesn't support process specific events and cannot be used in sampling mode.
-
-The DSU provides a bitmap for a subset of implemented events via hardware
-registers. There is no way for the driver to determine if the other events
-are available or not. Hence the driver exposes only those events advertised
-by the DSU, in "events" directory under :
-
-  /sys/bus/event_sources/devices/arm_dsu_<N>/
-
-The user should refer to the TRM of the product to figure out the supported events
-and use the raw event code for the unlisted events.
-
-The driver also exposes the CPUs connected to the DSU instance in "associated_cpus".
-
-
-e.g usage :
-
-	perf stat -a -e arm_dsu_0/cycles/
diff --git a/Documentation/perf/hisi-pmu.rst b/Documentation/perf/hisi-pmu.rst
new file mode 100644
index 000000000000..404a5c3d9d00
--- /dev/null
+++ b/Documentation/perf/hisi-pmu.rst
@@ -0,0 +1,60 @@
+======================================================
+HiSilicon SoC uncore Performance Monitoring Unit (PMU)
+======================================================
+
+The HiSilicon SoC chip includes various independent system device PMUs
+such as L3 cache (L3C), Hydra Home Agent (HHA) and DDRC. These PMUs are
+independent and have hardware logic to gather statistics and performance
+information.
+
+The HiSilicon SoC encapsulates multiple CPU and IO dies. Each CPU cluster
+(CCL) is made up of 4 cpu cores sharing one L3 cache; each CPU die is
+called Super CPU cluster (SCCL) and is made up of 6 CCLs. Each SCCL has
+two HHAs (0 - 1) and four DDRCs (0 - 3), respectively.
+
+HiSilicon SoC uncore PMU driver
+-------------------------------
+
+Each device PMU has separate registers for event counting, control and
+interrupt, and the PMU driver shall register perf PMU drivers like L3C,
+HHA and DDRC etc. The available events and configuration options shall
+be described in the sysfs, see:
+
+/sys/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>/, or
+/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>.
+The "perf list" command shall list the available events from sysfs.
+
+Each L3C, HHA and DDRC is registered as a separate PMU with perf. The PMU
+name will appear in event listing as hisi_sccl<sccl-id>_module<index-id>.
+where "sccl-id" is the identifier of the SCCL and "index-id" is the index of
+module.
+
+e.g. hisi_sccl3_l3c0/rd_hit_cpipe is READ_HIT_CPIPE event of L3C index #0 in
+SCCL ID #3.
+
+e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in
+SCCL ID #1.
+
+The driver also provides a "cpumask" sysfs attribute, which shows the CPU core
+ID used to count the uncore PMU event.
+
+Example usage of perf::
+
+  $# perf list
+  hisi_sccl3_l3c0/rd_hit_cpipe/ [kernel PMU event]
+  ------------------------------------------
+  hisi_sccl3_l3c0/wr_hit_cpipe/ [kernel PMU event]
+  ------------------------------------------
+  hisi_sccl1_l3c0/rd_hit_cpipe/ [kernel PMU event]
+  ------------------------------------------
+  hisi_sccl1_l3c0/wr_hit_cpipe/ [kernel PMU event]
+  ------------------------------------------
+
+  $# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5
+  $# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5
+
+The current driver does not support sampling. So "perf record" is unsupported.
+Also attach to a task is unsupported as the events are all uncore.
+
+Note: Please contact the maintainer for a complete list of events supported for
+the PMU devices in the SoC and its information if needed.
diff --git a/Documentation/perf/hisi-pmu.txt b/Documentation/perf/hisi-pmu.txt
deleted file mode 100644
index 267a028b2741..000000000000
--- a/Documentation/perf/hisi-pmu.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-HiSilicon SoC uncore Performance Monitoring Unit (PMU)
-======================================================
-The HiSilicon SoC chip includes various independent system device PMUs
-such as L3 cache (L3C), Hydra Home Agent (HHA) and DDRC. These PMUs are
-independent and have hardware logic to gather statistics and performance
-information.
-
-The HiSilicon SoC encapsulates multiple CPU and IO dies. Each CPU cluster
-(CCL) is made up of 4 cpu cores sharing one L3 cache; each CPU die is
-called Super CPU cluster (SCCL) and is made up of 6 CCLs. Each SCCL has
-two HHAs (0 - 1) and four DDRCs (0 - 3), respectively.
-
-HiSilicon SoC uncore PMU driver
----------------------------------------
-Each device PMU has separate registers for event counting, control and
-interrupt, and the PMU driver shall register perf PMU drivers like L3C,
-HHA and DDRC etc. The available events and configuration options shall
-be described in the sysfs, see :
-/sys/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>/, or
-/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>.
-The "perf list" command shall list the available events from sysfs.
-
-Each L3C, HHA and DDRC is registered as a separate PMU with perf. The PMU
-name will appear in event listing as hisi_sccl<sccl-id>_module<index-id>.
-where "sccl-id" is the identifier of the SCCL and "index-id" is the index of
-module.
-e.g. hisi_sccl3_l3c0/rd_hit_cpipe is READ_HIT_CPIPE event of L3C index #0 in
-SCCL ID #3.
-e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in
-SCCL ID #1.
-
-The driver also provides a "cpumask" sysfs attribute, which shows the CPU core
-ID used to count the uncore PMU event.
-
-Example usage of perf:
-$# perf list
-hisi_sccl3_l3c0/rd_hit_cpipe/ [kernel PMU event]
-------------------------------------------
-hisi_sccl3_l3c0/wr_hit_cpipe/ [kernel PMU event]
-------------------------------------------
-hisi_sccl1_l3c0/rd_hit_cpipe/ [kernel PMU event]
-------------------------------------------
-hisi_sccl1_l3c0/wr_hit_cpipe/ [kernel PMU event]
-------------------------------------------
-
-$# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5
-$# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5
-
-The current driver does not support sampling. So "perf record" is unsupported.
-Also attach to a task is unsupported as the events are all uncore.
-
-Note: Please contact the maintainer for a complete list of events supported for
-the PMU devices in the SoC and its information if needed.
diff --git a/Documentation/perf/index.rst b/Documentation/perf/index.rst
new file mode 100644
index 000000000000..4bf848e27f26
--- /dev/null
+++ b/Documentation/perf/index.rst
@@ -0,0 +1,16 @@
+:orphan:
+
+===========================
+Performance monitor support
+===========================
+
+.. toctree::
+   :maxdepth: 1
+
+   hisi-pmu
+   qcom_l2_pmu
+   qcom_l3_pmu
+   arm-ccn
+   xgene-pmu
+   arm_dsu_pmu
+   thunderx2-pmu
diff --git a/Documentation/perf/qcom_l2_pmu.rst b/Documentation/perf/qcom_l2_pmu.rst
new file mode 100644
index 000000000000..c130178a4a55
--- /dev/null
+++ b/Documentation/perf/qcom_l2_pmu.rst
@@ -0,0 +1,39 @@
+=====================================================================
+Qualcomm Technologies Level-2 Cache Performance Monitoring Unit (PMU)
+=====================================================================
+
+This driver supports the L2 cache clusters found in Qualcomm Technologies
+Centriq SoCs. There are multiple physical L2 cache clusters, each with their
+own PMU. Each cluster has one or more CPUs associated with it.
+
+There is one logical L2 PMU exposed, which aggregates the results from
+the physical PMUs.
+
+The driver provides a description of its available events and configuration
+options in sysfs, see /sys/devices/l2cache_0.
+
+The "format" directory describes the format of the events.
+
+Events can be envisioned as a 2-dimensional array. Each column represents
+a group of events. There are 8 groups. Only one entry from each
+group can be in use at a time. If multiple events from the same group
+are specified, the conflicting events cannot be counted at the same time.
+
+Events are specified as 0xCCG, where CC is 2 hex digits specifying
+the code (array row) and G specifies the group (column) 0-7.
+
+In addition there is a cycle counter event specified by the value 0xFE
+which is outside the above scheme.
+
+The driver provides a "cpumask" sysfs attribute which contains a mask
+consisting of one CPU per cluster which will be used to handle all the PMU
+events on that cluster.
+
+Examples for use with perf::
+
+  perf stat -e l2cache_0/config=0x001/,l2cache_0/config=0x042/ -a sleep 1
+
+  perf stat -e l2cache_0/config=0xfe/ -C 2 sleep 1
+
+The driver does not support sampling, therefore "perf record" will
+not work. Per-task perf sessions are not supported.
diff --git a/Documentation/perf/qcom_l2_pmu.txt b/Documentation/perf/qcom_l2_pmu.txt
deleted file mode 100644
index b25b97659ab9..000000000000
--- a/Documentation/perf/qcom_l2_pmu.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-Qualcomm Technologies Level-2 Cache Performance Monitoring Unit (PMU)
-=====================================================================
-
-This driver supports the L2 cache clusters found in Qualcomm Technologies
-Centriq SoCs. There are multiple physical L2 cache clusters, each with their
-own PMU. Each cluster has one or more CPUs associated with it.
-
-There is one logical L2 PMU exposed, which aggregates the results from
-the physical PMUs.
-
-The driver provides a description of its available events and configuration
-options in sysfs, see /sys/devices/l2cache_0.
-
-The "format" directory describes the format of the events.
-
-Events can be envisioned as a 2-dimensional array. Each column represents
-a group of events. There are 8 groups. Only one entry from each
-group can be in use at a time. If multiple events from the same group
-are specified, the conflicting events cannot be counted at the same time.
-
-Events are specified as 0xCCG, where CC is 2 hex digits specifying
-the code (array row) and G specifies the group (column) 0-7.
-
-In addition there is a cycle counter event specified by the value 0xFE
-which is outside the above scheme.
-
-The driver provides a "cpumask" sysfs attribute which contains a mask
-consisting of one CPU per cluster which will be used to handle all the PMU
-events on that cluster.
-
-Examples for use with perf:
-
-  perf stat -e l2cache_0/config=0x001/,l2cache_0/config=0x042/ -a sleep 1
-
-  perf stat -e l2cache_0/config=0xfe/ -C 2 sleep 1
-
-The driver does not support sampling, therefore "perf record" will
-not work. Per-task perf sessions are not supported.
diff --git a/Documentation/perf/qcom_l3_pmu.rst b/Documentation/perf/qcom_l3_pmu.rst
new file mode 100644
index 000000000000..a3d014a46bfd
--- /dev/null
+++ b/Documentation/perf/qcom_l3_pmu.rst
@@ -0,0 +1,26 @@
+===========================================================================
+Qualcomm Datacenter Technologies L3 Cache Performance Monitoring Unit (PMU)
+===========================================================================
+
+This driver supports the L3 cache PMUs found in Qualcomm Datacenter Technologies
+Centriq SoCs. The L3 cache on these SOCs is composed of multiple slices, shared
+by all cores within a socket. Each slice is exposed as a separate uncore perf
+PMU with device name l3cache_<socket>_<instance>. User space is responsible
+for aggregating across slices.
+
+The driver provides a description of its available events and configuration
+options in sysfs, see /sys/devices/l3cache*. Given that these are uncore PMUs
+the driver also exposes a "cpumask" sysfs attribute which contains a mask
+consisting of one CPU per socket which will be used to handle all the PMU
+events on that socket.
+
+The hardware implements 32bit event counters and has a flat 8bit event space
+exposed via the "event" format attribute. In addition to the 32bit physical
+counters the driver supports virtual 64bit hardware counters by using hardware
+counter chaining. This feature is exposed via the "lc" (long counter) format
+flag. E.g.::
+
+  perf stat -e l3cache_0_0/read-miss,lc/
+
+Given that these are uncore PMUs the driver does not support sampling, therefore
+"perf record" will not work. Per-task perf sessions are not supported.
diff --git a/Documentation/perf/qcom_l3_pmu.txt b/Documentation/perf/qcom_l3_pmu.txt
deleted file mode 100644
index 96b3a9444a0d..000000000000
--- a/Documentation/perf/qcom_l3_pmu.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Qualcomm Datacenter Technologies L3 Cache Performance Monitoring Unit (PMU)
-===========================================================================
-
-This driver supports the L3 cache PMUs found in Qualcomm Datacenter Technologies
-Centriq SoCs. The L3 cache on these SOCs is composed of multiple slices, shared
-by all cores within a socket. Each slice is exposed as a separate uncore perf
-PMU with device name l3cache_<socket>_<instance>. User space is responsible
-for aggregating across slices.
-
-The driver provides a description of its available events and configuration
-options in sysfs, see /sys/devices/l3cache*. Given that these are uncore PMUs
-the driver also exposes a "cpumask" sysfs attribute which contains a mask
-consisting of one CPU per socket which will be used to handle all the PMU
-events on that socket.
-
-The hardware implements 32bit event counters and has a flat 8bit event space
-exposed via the "event" format attribute. In addition to the 32bit physical
-counters the driver supports virtual 64bit hardware counters by using hardware
-counter chaining. This feature is exposed via the "lc" (long counter) format
-flag. E.g.:
-
-  perf stat -e l3cache_0_0/read-miss,lc/
-
-Given that these are uncore PMUs the driver does not support sampling, therefore
-"perf record" will not work. Per-task perf sessions are not supported.
diff --git a/Documentation/perf/thunderx2-pmu.rst b/Documentation/perf/thunderx2-pmu.rst
new file mode 100644
index 000000000000..08e33675853a
--- /dev/null
+++ b/Documentation/perf/thunderx2-pmu.rst
@@ -0,0 +1,42 @@
+=============================================================
+Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE)
+=============================================================
+
+The ThunderX2 SoC PMU consists of independent, system-wide, per-socket
+PMUs such as the Level 3 Cache (L3C) and DDR4 Memory Controller (DMC).
+
+The DMC has 8 interleaved channels and the L3C has 16 interleaved tiles.
+Events are counted for the default channel (i.e. channel 0) and prorated
+to the total number of channels/tiles.
+
+The DMC and L3C support up to 4 counters. Counters are independently
+programmable and can be started and stopped individually. Each counter
+can be set to a different event. Counters are 32-bit and do not support
+an overflow interrupt; they are read every 2 seconds.
+
+PMU UNCORE (perf) driver:
+
+The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and
+L3C devices.  Each PMU can be used to count up to 4 events
+simultaneously. The PMUs provide a description of their available events
+and configuration options under sysfs, see
+/sys/devices/uncore_<l3c_S/dmc_S/>; S is the socket id.
+
+The driver does not support sampling, therefore "perf record" will not
+work. Per-task perf sessions are also not supported.
+
+Examples::
+
+  # perf stat -a -e uncore_dmc_0/cnt_cycles/ sleep 1
+
+  # perf stat -a -e \
+  uncore_dmc_0/cnt_cycles/,\
+  uncore_dmc_0/data_transfers/,\
+  uncore_dmc_0/read_txns/,\
+  uncore_dmc_0/write_txns/ sleep 1
+
+  # perf stat -a -e \
+  uncore_l3c_0/read_request/,\
+  uncore_l3c_0/read_hit/,\
+  uncore_l3c_0/inv_request/,\
+  uncore_l3c_0/inv_hit/ sleep 1
diff --git a/Documentation/perf/thunderx2-pmu.txt b/Documentation/perf/thunderx2-pmu.txt
deleted file mode 100644
index dffc57143736..000000000000
--- a/Documentation/perf/thunderx2-pmu.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE)
-=============================================================
-
-The ThunderX2 SoC PMU consists of independent, system-wide, per-socket
-PMUs such as the Level 3 Cache (L3C) and DDR4 Memory Controller (DMC).
-
-The DMC has 8 interleaved channels and the L3C has 16 interleaved tiles.
-Events are counted for the default channel (i.e. channel 0) and prorated
-to the total number of channels/tiles.
-
-The DMC and L3C support up to 4 counters. Counters are independently
-programmable and can be started and stopped individually. Each counter
-can be set to a different event. Counters are 32-bit and do not support
-an overflow interrupt; they are read every 2 seconds.
-
-PMU UNCORE (perf) driver:
-
-The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and
-L3C devices.  Each PMU can be used to count up to 4 events
-simultaneously. The PMUs provide a description of their available events
-and configuration options under sysfs, see
-/sys/devices/uncore_<l3c_S/dmc_S/>; S is the socket id.
-
-The driver does not support sampling, therefore "perf record" will not
-work. Per-task perf sessions are also not supported.
-
-Examples:
-
-# perf stat -a -e uncore_dmc_0/cnt_cycles/ sleep 1
-
-# perf stat -a -e \
-uncore_dmc_0/cnt_cycles/,\
-uncore_dmc_0/data_transfers/,\
-uncore_dmc_0/read_txns/,\
-uncore_dmc_0/write_txns/ sleep 1
-
-# perf stat -a -e \
-uncore_l3c_0/read_request/,\
-uncore_l3c_0/read_hit/,\
-uncore_l3c_0/inv_request/,\
-uncore_l3c_0/inv_hit/ sleep 1
diff --git a/Documentation/perf/xgene-pmu.rst b/Documentation/perf/xgene-pmu.rst
new file mode 100644
index 000000000000..644f8ed89152
--- /dev/null
+++ b/Documentation/perf/xgene-pmu.rst
@@ -0,0 +1,49 @@
+================================================
+APM X-Gene SoC Performance Monitoring Unit (PMU)
+================================================
+
+X-Gene SoC PMU consists of various independent system device PMUs such as
+L3 cache(s), I/O bridge(s), memory controller bridge(s) and memory
+controller(s). These PMU devices are loosely architected to follow the
+same model as the PMU for ARM cores. The PMUs share the same top level
+interrupt and status CSR region.
+
+PMU (perf) driver
+-----------------
+
+The xgene-pmu driver registers several perf PMU drivers. Each of the perf
+driver provides description of its available events and configuration options
+in sysfs, see /sys/devices/<l3cX/iobX/mcbX/mcX>/.
+
+The "format" directory describes format of the config (event ID),
+config1 (agent ID) fields of the perf_event_attr structure. The "events"
+directory provides configuration templates for all supported event types that
+can be used with perf tool. For example, "l3c0/bank-fifo-full/" is an
+equivalent of "l3c0/config=0x0b/".
+
+Most of the SoC PMU has a specific list of agent ID used for monitoring
+performance of a specific datapath. For example, agents of a L3 cache can be
+a specific CPU or an I/O bridge. Each PMU has a set of 2 registers capable of
+masking the agents from which the request come from. If the bit with
+the bit number corresponding to the agent is set, the event is counted only if
+it is caused by a request from that agent. Each agent ID bit is inversely mapped
+to a corresponding bit in "config1" field. By default, the event will be
+counted for all agent requests (config1 = 0x0). For all the supported agents of
+each PMU, please refer to APM X-Gene User Manual.
+
+Each perf driver also provides a "cpumask" sysfs attribute, which contains a
+single CPU ID of the processor which will be used to handle all the PMU events.
+
+Example for perf tool use::
+
+ / # perf list | grep -e l3c -e iob -e mcb -e mc
+   l3c0/ackq-full/                                    [Kernel PMU event]
+ <...>
+   mcb1/mcb-csw-stall/                                [Kernel PMU event]
+
+ / # perf stat -a -e l3c0/read-miss/,mcb1/csw-write-request/ sleep 1
+
+ / # perf stat -a -e l3c0/read-miss,config1=0xfffffffffffffffe/ sleep 1
+
+The driver does not support sampling, therefore "perf record" will
+not work. Per-task (without "-a") perf sessions are not supported.
diff --git a/Documentation/perf/xgene-pmu.txt b/Documentation/perf/xgene-pmu.txt
deleted file mode 100644
index d7cff4454e5b..000000000000
--- a/Documentation/perf/xgene-pmu.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-APM X-Gene SoC Performance Monitoring Unit (PMU)
-================================================
-
-X-Gene SoC PMU consists of various independent system device PMUs such as
-L3 cache(s), I/O bridge(s), memory controller bridge(s) and memory
-controller(s). These PMU devices are loosely architected to follow the
-same model as the PMU for ARM cores. The PMUs share the same top level
-interrupt and status CSR region.
-
-PMU (perf) driver
------------------
-
-The xgene-pmu driver registers several perf PMU drivers. Each of the perf
-driver provides description of its available events and configuration options
-in sysfs, see /sys/devices/<l3cX/iobX/mcbX/mcX>/.
-
-The "format" directory describes format of the config (event ID),
-config1 (agent ID) fields of the perf_event_attr structure. The "events"
-directory provides configuration templates for all supported event types that
-can be used with perf tool. For example, "l3c0/bank-fifo-full/" is an
-equivalent of "l3c0/config=0x0b/".
-
-Most of the SoC PMU has a specific list of agent ID used for monitoring
-performance of a specific datapath. For example, agents of a L3 cache can be
-a specific CPU or an I/O bridge. Each PMU has a set of 2 registers capable of
-masking the agents from which the request come from. If the bit with
-the bit number corresponding to the agent is set, the event is counted only if
-it is caused by a request from that agent. Each agent ID bit is inversely mapped
-to a corresponding bit in "config1" field. By default, the event will be
-counted for all agent requests (config1 = 0x0). For all the supported agents of
-each PMU, please refer to APM X-Gene User Manual.
-
-Each perf driver also provides a "cpumask" sysfs attribute, which contains a
-single CPU ID of the processor which will be used to handle all the PMU events.
-
-Example for perf tool use:
-
- / # perf list | grep -e l3c -e iob -e mcb -e mc
-   l3c0/ackq-full/                                    [Kernel PMU event]
- <...>
-   mcb1/mcb-csw-stall/                                [Kernel PMU event]
-
- / # perf stat -a -e l3c0/read-miss/,mcb1/csw-write-request/ sleep 1
-
- / # perf stat -a -e l3c0/read-miss,config1=0xfffffffffffffffe/ sleep 1
-
-The driver does not support sampling, therefore "perf record" will
-not work. Per-task (without "-a") perf sessions are not supported.
-- 
cgit 


From 53b9537509654a6267c3f56b4d2e7409b9089686 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 18:35:54 -0300
Subject: docs: sysctl: convert to ReST

Rename the /proc/sys/ documentation files to ReST, using the
README file as a template for an index.rst, adding the other
files there via TOC markup.

Despite being written on different times with different
styles, try to make them somewhat coherent with a similar
look and feel, ensuring that they'll look nice as both
raw text file and as via the html output produced by the
Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/admin-guide/kernel-parameters.txt |    2 +-
 Documentation/admin-guide/mm/index.rst          |    2 +-
 Documentation/admin-guide/mm/ksm.rst            |    2 +-
 Documentation/core-api/printk-formats.rst       |    2 +-
 Documentation/networking/ip-sysctl.txt          |    2 +-
 Documentation/sysctl/README                     |   76 --
 Documentation/sysctl/abi.rst                    |   67 ++
 Documentation/sysctl/abi.txt                    |   54 --
 Documentation/sysctl/fs.rst                     |  384 ++++++++
 Documentation/sysctl/fs.txt                     |  374 -------
 Documentation/sysctl/index.rst                  |  100 ++
 Documentation/sysctl/kernel.rst                 | 1177 +++++++++++++++++++++++
 Documentation/sysctl/kernel.txt                 | 1129 ----------------------
 Documentation/sysctl/net.rst                    |  461 +++++++++
 Documentation/sysctl/net.txt                    |  422 --------
 Documentation/sysctl/sunrpc.rst                 |   25 +
 Documentation/sysctl/sunrpc.txt                 |   20 -
 Documentation/sysctl/user.rst                   |   78 ++
 Documentation/sysctl/user.txt                   |   66 --
 Documentation/sysctl/vm.rst                     |  964 +++++++++++++++++++
 Documentation/sysctl/vm.txt                     |  946 ------------------
 Documentation/vm/unevictable-lru.rst            |    2 +-
 22 files changed, 3262 insertions(+), 3093 deletions(-)
 delete mode 100644 Documentation/sysctl/README
 create mode 100644 Documentation/sysctl/abi.rst
 delete mode 100644 Documentation/sysctl/abi.txt
 create mode 100644 Documentation/sysctl/fs.rst
 delete mode 100644 Documentation/sysctl/fs.txt
 create mode 100644 Documentation/sysctl/index.rst
 create mode 100644 Documentation/sysctl/kernel.rst
 delete mode 100644 Documentation/sysctl/kernel.txt
 create mode 100644 Documentation/sysctl/net.rst
 delete mode 100644 Documentation/sysctl/net.txt
 create mode 100644 Documentation/sysctl/sunrpc.rst
 delete mode 100644 Documentation/sysctl/sunrpc.txt
 create mode 100644 Documentation/sysctl/user.rst
 delete mode 100644 Documentation/sysctl/user.txt
 create mode 100644 Documentation/sysctl/vm.rst
 delete mode 100644 Documentation/sysctl/vm.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6b2adda1cc03..01123f1de354 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3144,7 +3144,7 @@
 	numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
 			'node', 'default' can be specified
 			This can be set from sysctl after boot.
-			See Documentation/sysctl/vm.txt for details.
+			See Documentation/sysctl/vm.rst for details.
 
 	ohci1394_dma=early	[HW] enable debugging via the ohci1394 driver.
 			See Documentation/debugging-via-ohci1394.txt for more
diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst
index ddf8d8d33377..f5e92f33f96e 100644
--- a/Documentation/admin-guide/mm/index.rst
+++ b/Documentation/admin-guide/mm/index.rst
@@ -11,7 +11,7 @@ processes address space and many other cool things.
 Linux memory management is a complex system with many configurable
 settings. Most of these settings are available via ``/proc``
 filesystem and can be quired and adjusted using ``sysctl``. These APIs
-are described in Documentation/sysctl/vm.txt and in `man 5 proc`_.
+are described in Documentation/sysctl/vm.rst and in `man 5 proc`_.
 
 .. _man 5 proc: http://man7.org/linux/man-pages/man5/proc.5.html
 
diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst
index 9303786632d1..7b2b8767c0b4 100644
--- a/Documentation/admin-guide/mm/ksm.rst
+++ b/Documentation/admin-guide/mm/ksm.rst
@@ -59,7 +59,7 @@ MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
 
 If a region of memory must be split into at least one new MADV_MERGEABLE
 or MADV_UNMERGEABLE region, the madvise may return ENOMEM if the process
-will exceed ``vm.max_map_count`` (see Documentation/sysctl/vm.txt).
+will exceed ``vm.max_map_count`` (see Documentation/sysctl/vm.rst).
 
 Like other madvise calls, they are intended for use on mapped areas of
 the user address space: they will report ENOMEM if the specified range
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index 75d2bbe9813f..1d8e748f909f 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -119,7 +119,7 @@ Kernel Pointers
 
 For printing kernel pointers which should be hidden from unprivileged
 users. The behaviour of %pK depends on the kptr_restrict sysctl - see
-Documentation/sysctl/kernel.txt for more details.
+Documentation/sysctl/kernel.rst for more details.
 
 Unmodified Addresses
 --------------------
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 48c79e78817b..5c3399cde1c4 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -2287,7 +2287,7 @@ addr_scope_policy - INTEGER
 
 
 /proc/sys/net/core/*
-	Please see: Documentation/sysctl/net.txt for descriptions of these entries.
+	Please see: Documentation/sysctl/net.rst for descriptions of these entries.
 
 
 /proc/sys/net/unix/*
diff --git a/Documentation/sysctl/README b/Documentation/sysctl/README
deleted file mode 100644
index d5f24ab0ecc3..000000000000
--- a/Documentation/sysctl/README
+++ /dev/null
@@ -1,76 +0,0 @@
-Documentation for /proc/sys/		kernel version 2.2.10
-	(c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
-
-'Why', I hear you ask, 'would anyone even _want_ documentation
-for them sysctl files? If anybody really needs it, it's all in
-the source...'
-
-Well, this documentation is written because some people either
-don't know they need to tweak something, or because they don't
-have the time or knowledge to read the source code.
-
-Furthermore, the programmers who built sysctl have built it to
-be actually used, not just for the fun of programming it :-)
-
-==============================================================
-
-Legal blurb:
-
-As usual, there are two main things to consider:
-1. you get what you pay for
-2. it's free
-
-The consequences are that I won't guarantee the correctness of
-this document, and if you come to me complaining about how you
-screwed up your system because of wrong documentation, I won't
-feel sorry for you. I might even laugh at you...
-
-But of course, if you _do_ manage to screw up your system using
-only the sysctl options used in this file, I'd like to hear of
-it. Not only to have a great laugh, but also to make sure that
-you're the last RTFMing person to screw up.
-
-In short, e-mail your suggestions, corrections and / or horror
-stories to: <riel@nl.linux.org>
-
-Rik van Riel.
-
-==============================================================
-
-Introduction:
-
-Sysctl is a means of configuring certain aspects of the kernel
-at run-time, and the /proc/sys/ directory is there so that you
-don't even need special tools to do it!
-In fact, there are only four things needed to use these config
-facilities:
-- a running Linux system
-- root access
-- common sense (this is especially hard to come by these days)
-- knowledge of what all those values mean
-
-As a quick 'ls /proc/sys' will show, the directory consists of
-several (arch-dependent?) subdirs. Each subdir is mainly about
-one part of the kernel, so you can do configuration on a piece
-by piece basis, or just some 'thematic frobbing'.
-
-The subdirs are about:
-abi/		execution domains & personalities
-debug/		<empty>
-dev/		device specific information (eg dev/cdrom/info)
-fs/		specific filesystems
-		filehandle, inode, dentry and quota tuning
-		binfmt_misc <Documentation/admin-guide/binfmt-misc.rst>
-kernel/		global kernel info / tuning
-		miscellaneous stuff
-net/		networking stuff, for documentation look in:
-		<Documentation/networking/>
-proc/		<empty>
-sunrpc/		SUN Remote Procedure Call (NFS)
-vm/		memory management tuning
-		buffer and cache management
-user/		Per user per user namespace limits
-
-These are the subdirs I have on my system. There might be more
-or other subdirs in another setup. If you see another dir, I'd
-really like to hear about it :-)
diff --git a/Documentation/sysctl/abi.rst b/Documentation/sysctl/abi.rst
new file mode 100644
index 000000000000..599bcde7f0b7
--- /dev/null
+++ b/Documentation/sysctl/abi.rst
@@ -0,0 +1,67 @@
+================================
+Documentation for /proc/sys/abi/
+================================
+
+kernel version 2.6.0.test2
+
+Copyright (c) 2003,  Fabian Frederick <ffrederick@users.sourceforge.net>
+
+For general info: index.rst.
+
+------------------------------------------------------------------------------
+
+This path is binary emulation relevant aka personality types aka abi.
+When a process is executed, it's linked to an exec_domain whose
+personality is defined using values available from /proc/sys/abi.
+You can find further details about abi in include/linux/personality.h.
+
+Here are the files featuring in 2.6 kernel:
+
+- defhandler_coff
+- defhandler_elf
+- defhandler_lcall7
+- defhandler_libcso
+- fake_utsname
+- trace
+
+defhandler_coff
+---------------
+
+defined value:
+	PER_SCOSVR3::
+
+		0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS | SHORT_INODE
+
+defhandler_elf
+--------------
+
+defined value:
+	PER_LINUX::
+
+		0
+
+defhandler_lcall7
+-----------------
+
+defined value :
+	PER_SVR4::
+
+		0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
+
+defhandler_libsco
+-----------------
+
+defined value:
+	PER_SVR4::
+
+		0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
+
+fake_utsname
+------------
+
+Unused
+
+trace
+-----
+
+Unused
diff --git a/Documentation/sysctl/abi.txt b/Documentation/sysctl/abi.txt
deleted file mode 100644
index 63f4ebcf652c..000000000000
--- a/Documentation/sysctl/abi.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-Documentation for /proc/sys/abi/* kernel version 2.6.0.test2
-	(c) 2003,  Fabian Frederick <ffrederick@users.sourceforge.net>
-
-For general info : README.
-
-==============================================================
-
-This path is binary emulation relevant aka personality types aka abi.
-When a process is executed, it's linked to an exec_domain whose
-personality is defined using values available from /proc/sys/abi.
-You can find further details about abi in include/linux/personality.h.
-
-Here are the files featuring in 2.6 kernel :
-
-- defhandler_coff
-- defhandler_elf
-- defhandler_lcall7
-- defhandler_libcso
-- fake_utsname
-- trace
-
-===========================================================
-defhandler_coff:
-defined value :
-PER_SCOSVR3
-0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS | SHORT_INODE
-
-===========================================================
-defhandler_elf:
-defined value :
-PER_LINUX
-0
-
-===========================================================
-defhandler_lcall7:
-defined value :
-PER_SVR4
-0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
-
-===========================================================
-defhandler_libsco:
-defined value:
-PER_SVR4
-0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
-
-===========================================================
-fake_utsname:
-Unused
-
-===========================================================
-trace:
-Unused
-
-===========================================================
diff --git a/Documentation/sysctl/fs.rst b/Documentation/sysctl/fs.rst
new file mode 100644
index 000000000000..2a45119e3331
--- /dev/null
+++ b/Documentation/sysctl/fs.rst
@@ -0,0 +1,384 @@
+===============================
+Documentation for /proc/sys/fs/
+===============================
+
+kernel version 2.2.10
+
+Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
+
+Copyright (c) 2009,        Shen Feng<shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in intro.rst.
+
+------------------------------------------------------------------------------
+
+This file contains documentation for the sysctl files in
+/proc/sys/fs/ and is valid for Linux kernel version 2.2.
+
+The files in this directory can be used to tune and monitor
+miscellaneous and general things in the operation of the Linux
+kernel. Since some of the files _can_ be used to screw up your
+system, it is advisable to read both documentation and source
+before actually making adjustments.
+
+1. /proc/sys/fs
+===============
+
+Currently, these files are in /proc/sys/fs:
+
+- aio-max-nr
+- aio-nr
+- dentry-state
+- dquot-max
+- dquot-nr
+- file-max
+- file-nr
+- inode-max
+- inode-nr
+- inode-state
+- nr_open
+- overflowuid
+- overflowgid
+- pipe-user-pages-hard
+- pipe-user-pages-soft
+- protected_fifos
+- protected_hardlinks
+- protected_regular
+- protected_symlinks
+- suid_dumpable
+- super-max
+- super-nr
+
+
+aio-nr & aio-max-nr
+-------------------
+
+aio-nr is the running total of the number of events specified on the
+io_setup system call for all currently active aio contexts.  If aio-nr
+reaches aio-max-nr then io_setup will fail with EAGAIN.  Note that
+raising aio-max-nr does not result in the pre-allocation or re-sizing
+of any kernel data structures.
+
+
+dentry-state
+------------
+
+From linux/include/linux/dcache.h::
+
+  struct dentry_stat_t dentry_stat {
+        int nr_dentry;
+        int nr_unused;
+        int age_limit;         /* age in seconds */
+        int want_pages;        /* pages requested by system */
+        int nr_negative;       /* # of unused negative dentries */
+        int dummy;             /* Reserved for future use */
+  };
+
+Dentries are dynamically allocated and deallocated.
+
+nr_dentry shows the total number of dentries allocated (active
++ unused). nr_unused shows the number of dentries that are not
+actively used, but are saved in the LRU list for future reuse.
+
+Age_limit is the age in seconds after which dcache entries
+can be reclaimed when memory is short and want_pages is
+nonzero when shrink_dcache_pages() has been called and the
+dcache isn't pruned yet.
+
+nr_negative shows the number of unused dentries that are also
+negative dentries which do not map to any files. Instead,
+they help speeding up rejection of non-existing files provided
+by the users.
+
+
+dquot-max & dquot-nr
+--------------------
+
+The file dquot-max shows the maximum number of cached disk
+quota entries.
+
+The file dquot-nr shows the number of allocated disk quota
+entries and the number of free disk quota entries.
+
+If the number of free cached disk quotas is very low and
+you have some awesome number of simultaneous system users,
+you might want to raise the limit.
+
+
+file-max & file-nr
+------------------
+
+The value in file-max denotes the maximum number of file-
+handles that the Linux kernel will allocate. When you get lots
+of error messages about running out of file handles, you might
+want to increase this limit.
+
+Historically,the kernel was able to allocate file handles
+dynamically, but not to free them again. The three values in
+file-nr denote the number of allocated file handles, the number
+of allocated but unused file handles, and the maximum number of
+file handles. Linux 2.6 always reports 0 as the number of free
+file handles -- this is not an error, it just means that the
+number of allocated file handles exactly matches the number of
+used file handles.
+
+Attempts to allocate more file descriptors than file-max are
+reported with printk, look for "VFS: file-max limit <number>
+reached".
+
+
+nr_open
+-------
+
+This denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
+
+inode-max, inode-nr & inode-state
+---------------------------------
+
+As with file handles, the kernel allocates the inode structures
+dynamically, but can't free them yet.
+
+The value in inode-max denotes the maximum number of inode
+handlers. This value should be 3-4 times larger than the value
+in file-max, since stdin, stdout and network sockets also
+need an inode struct to handle them. When you regularly run
+out of inodes, you need to increase this value.
+
+The file inode-nr contains the first two items from
+inode-state, so we'll skip to that file...
+
+Inode-state contains three actual numbers and four dummies.
+The actual numbers are, in order of appearance, nr_inodes,
+nr_free_inodes and preshrink.
+
+Nr_inodes stands for the number of inodes the system has
+allocated, this can be slightly more than inode-max because
+Linux allocates them one pageful at a time.
+
+Nr_free_inodes represents the number of free inodes (?) and
+preshrink is nonzero when the nr_inodes > inode-max and the
+system needs to prune the inode list instead of allocating
+more.
+
+
+overflowgid & overflowuid
+-------------------------
+
+Some filesystems only support 16-bit UIDs and GIDs, although in Linux
+UIDs and GIDs are 32 bits. When one of these filesystems is mounted
+with writes enabled, any UID or GID that would exceed 65535 is translated
+to a fixed value before being written to disk.
+
+These sysctls allow you to change the value of the fixed UID and GID.
+The default is 65534.
+
+
+pipe-user-pages-hard
+--------------------
+
+Maximum total number of pages a non-privileged user may allocate for pipes.
+Once this limit is reached, no new pipes may be allocated until usage goes
+below the limit again. When set to 0, no limit is applied, which is the default
+setting.
+
+
+pipe-user-pages-soft
+--------------------
+
+Maximum total number of pages a non-privileged user may allocate for pipes
+before the pipe size gets limited to a single page. Once this limit is reached,
+new pipes will be limited to a single page in size for this user in order to
+limit total memory usage, and trying to increase them using fcntl() will be
+denied until usage goes below the limit again. The default value allows to
+allocate up to 1024 pipes at their default size. When set to 0, no limit is
+applied.
+
+
+protected_fifos
+---------------
+
+The intent of this protection is to avoid unintentional writes to
+an attacker-controlled FIFO, where a program expected to create a regular
+file.
+
+When set to "0", writing to FIFOs is unrestricted.
+
+When set to "1" don't allow O_CREAT open on FIFOs that we don't own
+in world writable sticky directories, unless they are owned by the
+owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+This protection is based on the restrictions in Openwall.
+
+
+protected_hardlinks
+--------------------
+
+A long-standing class of security issues is the hardlink-based
+time-of-check-time-of-use race, most commonly seen in world-writable
+directories like /tmp. The common method of exploitation of this flaw
+is to cross privilege boundaries when following a given hardlink (i.e. a
+root process follows a hardlink created by another user). Additionally,
+on systems without separated partitions, this stops unauthorized users
+from "pinning" vulnerable setuid/setgid files against being upgraded by
+the administrator, or linking to special files.
+
+When set to "0", hardlink creation behavior is unrestricted.
+
+When set to "1" hardlinks cannot be created by users if they do not
+already own the source file, or do not have read/write access to it.
+
+This protection is based on the restrictions in Openwall and grsecurity.
+
+
+protected_regular
+-----------------
+
+This protection is similar to protected_fifos, but it
+avoids writes to an attacker-controlled regular file, where a program
+expected to create one.
+
+When set to "0", writing to regular files is unrestricted.
+
+When set to "1" don't allow O_CREAT open on regular files that we
+don't own in world writable sticky directories, unless they are
+owned by the owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+
+protected_symlinks
+------------------
+
+A long-standing class of security issues is the symlink-based
+time-of-check-time-of-use race, most commonly seen in world-writable
+directories like /tmp. The common method of exploitation of this flaw
+is to cross privilege boundaries when following a given symlink (i.e. a
+root process follows a symlink belonging to another user). For a likely
+incomplete list of hundreds of examples across the years, please see:
+http://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=/tmp
+
+When set to "0", symlink following behavior is unrestricted.
+
+When set to "1" symlinks are permitted to be followed only when outside
+a sticky world-writable directory, or when the uid of the symlink and
+follower match, or when the directory owner matches the symlink's owner.
+
+This protection is based on the restrictions in Openwall and grsecurity.
+
+
+suid_dumpable:
+--------------
+
+This value can be used to query and set the core dump mode for setuid
+or otherwise protected/tainted binaries. The modes are
+
+=   ==========  ===============================================================
+0   (default)	traditional behaviour. Any process which has changed
+		privilege levels or is execute only will not be dumped.
+1   (debug)	all processes dump core when possible. The core dump is
+		owned by the current user and no security is applied. This is
+		intended for system debugging situations only.
+		Ptrace is unchecked.
+		This is insecure as it allows regular users to examine the
+		memory contents of privileged processes.
+2   (suidsafe)	any binary which normally would not be dumped is dumped
+		anyway, but only if the "core_pattern" kernel sysctl is set to
+		either a pipe handler or a fully qualified path. (For more
+		details on this limitation, see CVE-2006-2451.) This mode is
+		appropriate when administrators are attempting to debug
+		problems in a normal environment, and either have a core dump
+		pipe handler that knows to treat privileged core dumps with
+		care, or specific directory defined for catching core dumps.
+		If a core dump happens without a pipe handler or fully
+		qualified path, a message will be emitted to syslog warning
+		about the lack of a correct setting.
+=   ==========  ===============================================================
+
+
+super-max & super-nr
+--------------------
+
+These numbers control the maximum number of superblocks, and
+thus the maximum number of mounted filesystems the kernel
+can have. You only need to increase super-max if you need to
+mount more filesystems than the current value in super-max
+allows you to.
+
+
+aio-nr & aio-max-nr
+-------------------
+
+aio-nr shows the current system-wide number of asynchronous io
+requests.  aio-max-nr allows you to change the maximum value
+aio-nr can grow to.
+
+
+mount-max
+---------
+
+This denotes the maximum number of mounts that may exist
+in a mount namespace.
+
+
+
+2. /proc/sys/fs/binfmt_misc
+===========================
+
+Documentation for the files in /proc/sys/fs/binfmt_misc is
+in Documentation/admin-guide/binfmt-misc.rst.
+
+
+3. /proc/sys/fs/mqueue - POSIX message queues filesystem
+========================================================
+
+
+The "mqueue"  filesystem provides  the necessary kernel features to enable the
+creation of a  user space  library that  implements  the  POSIX message queues
+API (as noted by the  MSG tag in the  POSIX 1003.1-2001 version  of the System
+Interfaces specification.)
+
+The "mqueue" filesystem contains values for determining/setting  the amount of
+resources used by the file system.
+
+/proc/sys/fs/mqueue/queues_max is a read/write  file for  setting/getting  the
+maximum number of message queues allowed on the system.
+
+/proc/sys/fs/mqueue/msg_max  is  a  read/write file  for  setting/getting  the
+maximum number of messages in a queue value.  In fact it is the limiting value
+for another (user) limit which is set in mq_open invocation. This attribute of
+a queue must be less or equal then msg_max.
+
+/proc/sys/fs/mqueue/msgsize_max is  a read/write  file for setting/getting the
+maximum  message size value (it is every  message queue's attribute set during
+its creation).
+
+/proc/sys/fs/mqueue/msg_default is  a read/write  file for setting/getting the
+default number of messages in a queue value if attr parameter of mq_open(2) is
+NULL. If it exceed msg_max, the default value is initialized msg_max.
+
+/proc/sys/fs/mqueue/msgsize_default is a read/write file for setting/getting
+the default message size value if attr parameter of mq_open(2) is NULL. If it
+exceed msgsize_max, the default value is initialized msgsize_max.
+
+4. /proc/sys/fs/epoll - Configuration options for the epoll interface
+=====================================================================
+
+This directory contains configuration options for the epoll(7) interface.
+
+max_user_watches
+----------------
+
+Every epoll file descriptor can store a number of files to be monitored
+for event readiness. Each one of these monitored files constitutes a "watch".
+This configuration option sets the maximum number of "watches" that are
+allowed for each user.
+Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes
+on a 64bit one.
+The current default value for  max_user_watches  is the 1/32 of the available
+low memory, divided for the "watch" cost in bytes.
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
deleted file mode 100644
index ebc679bcb2dc..000000000000
--- a/Documentation/sysctl/fs.txt
+++ /dev/null
@@ -1,374 +0,0 @@
-Documentation for /proc/sys/fs/*	kernel version 2.2.10
-	(c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
-	(c) 2009,        Shen Feng<shen@cn.fujitsu.com>
-
-For general info and legal blurb, please look in README.
-
-==============================================================
-
-This file contains documentation for the sysctl files in
-/proc/sys/fs/ and is valid for Linux kernel version 2.2.
-
-The files in this directory can be used to tune and monitor
-miscellaneous and general things in the operation of the Linux
-kernel. Since some of the files _can_ be used to screw up your
-system, it is advisable to read both documentation and source
-before actually making adjustments.
-
-1. /proc/sys/fs
-----------------------------------------------------------
-
-Currently, these files are in /proc/sys/fs:
-- aio-max-nr
-- aio-nr
-- dentry-state
-- dquot-max
-- dquot-nr
-- file-max
-- file-nr
-- inode-max
-- inode-nr
-- inode-state
-- nr_open
-- overflowuid
-- overflowgid
-- pipe-user-pages-hard
-- pipe-user-pages-soft
-- protected_fifos
-- protected_hardlinks
-- protected_regular
-- protected_symlinks
-- suid_dumpable
-- super-max
-- super-nr
-
-==============================================================
-
-aio-nr & aio-max-nr:
-
-aio-nr is the running total of the number of events specified on the
-io_setup system call for all currently active aio contexts.  If aio-nr
-reaches aio-max-nr then io_setup will fail with EAGAIN.  Note that
-raising aio-max-nr does not result in the pre-allocation or re-sizing
-of any kernel data structures.
-
-==============================================================
-
-dentry-state:
-
-From linux/include/linux/dcache.h:
---------------------------------------------------------------
-struct dentry_stat_t dentry_stat {
-        int nr_dentry;
-        int nr_unused;
-        int age_limit;         /* age in seconds */
-        int want_pages;        /* pages requested by system */
-        int nr_negative;       /* # of unused negative dentries */
-        int dummy;             /* Reserved for future use */
-};
---------------------------------------------------------------
-
-Dentries are dynamically allocated and deallocated.
-
-nr_dentry shows the total number of dentries allocated (active
-+ unused). nr_unused shows the number of dentries that are not
-actively used, but are saved in the LRU list for future reuse.
-
-Age_limit is the age in seconds after which dcache entries
-can be reclaimed when memory is short and want_pages is
-nonzero when shrink_dcache_pages() has been called and the
-dcache isn't pruned yet.
-
-nr_negative shows the number of unused dentries that are also
-negative dentries which do not map to any files. Instead,
-they help speeding up rejection of non-existing files provided
-by the users.
-
-==============================================================
-
-dquot-max & dquot-nr:
-
-The file dquot-max shows the maximum number of cached disk
-quota entries.
-
-The file dquot-nr shows the number of allocated disk quota
-entries and the number of free disk quota entries.
-
-If the number of free cached disk quotas is very low and
-you have some awesome number of simultaneous system users,
-you might want to raise the limit.
-
-==============================================================
-
-file-max & file-nr:
-
-The value in file-max denotes the maximum number of file-
-handles that the Linux kernel will allocate. When you get lots
-of error messages about running out of file handles, you might
-want to increase this limit.
-
-Historically,the kernel was able to allocate file handles
-dynamically, but not to free them again. The three values in
-file-nr denote the number of allocated file handles, the number
-of allocated but unused file handles, and the maximum number of
-file handles. Linux 2.6 always reports 0 as the number of free
-file handles -- this is not an error, it just means that the
-number of allocated file handles exactly matches the number of
-used file handles.
-
-Attempts to allocate more file descriptors than file-max are
-reported with printk, look for "VFS: file-max limit <number>
-reached".
-==============================================================
-
-nr_open:
-
-This denotes the maximum number of file-handles a process can
-allocate. Default value is 1024*1024 (1048576) which should be
-enough for most machines. Actual limit depends on RLIMIT_NOFILE
-resource limit.
-
-==============================================================
-
-inode-max, inode-nr & inode-state:
-
-As with file handles, the kernel allocates the inode structures
-dynamically, but can't free them yet.
-
-The value in inode-max denotes the maximum number of inode
-handlers. This value should be 3-4 times larger than the value
-in file-max, since stdin, stdout and network sockets also
-need an inode struct to handle them. When you regularly run
-out of inodes, you need to increase this value.
-
-The file inode-nr contains the first two items from
-inode-state, so we'll skip to that file...
-
-Inode-state contains three actual numbers and four dummies.
-The actual numbers are, in order of appearance, nr_inodes,
-nr_free_inodes and preshrink.
-
-Nr_inodes stands for the number of inodes the system has
-allocated, this can be slightly more than inode-max because
-Linux allocates them one pageful at a time.
-
-Nr_free_inodes represents the number of free inodes (?) and
-preshrink is nonzero when the nr_inodes > inode-max and the
-system needs to prune the inode list instead of allocating
-more.
-
-==============================================================
-
-overflowgid & overflowuid:
-
-Some filesystems only support 16-bit UIDs and GIDs, although in Linux
-UIDs and GIDs are 32 bits. When one of these filesystems is mounted
-with writes enabled, any UID or GID that would exceed 65535 is translated
-to a fixed value before being written to disk.
-
-These sysctls allow you to change the value of the fixed UID and GID.
-The default is 65534.
-
-==============================================================
-
-pipe-user-pages-hard:
-
-Maximum total number of pages a non-privileged user may allocate for pipes.
-Once this limit is reached, no new pipes may be allocated until usage goes
-below the limit again. When set to 0, no limit is applied, which is the default
-setting.
-
-==============================================================
-
-pipe-user-pages-soft:
-
-Maximum total number of pages a non-privileged user may allocate for pipes
-before the pipe size gets limited to a single page. Once this limit is reached,
-new pipes will be limited to a single page in size for this user in order to
-limit total memory usage, and trying to increase them using fcntl() will be
-denied until usage goes below the limit again. The default value allows to
-allocate up to 1024 pipes at their default size. When set to 0, no limit is
-applied.
-
-==============================================================
-
-protected_fifos:
-
-The intent of this protection is to avoid unintentional writes to
-an attacker-controlled FIFO, where a program expected to create a regular
-file.
-
-When set to "0", writing to FIFOs is unrestricted.
-
-When set to "1" don't allow O_CREAT open on FIFOs that we don't own
-in world writable sticky directories, unless they are owned by the
-owner of the directory.
-
-When set to "2" it also applies to group writable sticky directories.
-
-This protection is based on the restrictions in Openwall.
-
-==============================================================
-
-protected_hardlinks:
-
-A long-standing class of security issues is the hardlink-based
-time-of-check-time-of-use race, most commonly seen in world-writable
-directories like /tmp. The common method of exploitation of this flaw
-is to cross privilege boundaries when following a given hardlink (i.e. a
-root process follows a hardlink created by another user). Additionally,
-on systems without separated partitions, this stops unauthorized users
-from "pinning" vulnerable setuid/setgid files against being upgraded by
-the administrator, or linking to special files.
-
-When set to "0", hardlink creation behavior is unrestricted.
-
-When set to "1" hardlinks cannot be created by users if they do not
-already own the source file, or do not have read/write access to it.
-
-This protection is based on the restrictions in Openwall and grsecurity.
-
-==============================================================
-
-protected_regular:
-
-This protection is similar to protected_fifos, but it
-avoids writes to an attacker-controlled regular file, where a program
-expected to create one.
-
-When set to "0", writing to regular files is unrestricted.
-
-When set to "1" don't allow O_CREAT open on regular files that we
-don't own in world writable sticky directories, unless they are
-owned by the owner of the directory.
-
-When set to "2" it also applies to group writable sticky directories.
-
-==============================================================
-
-protected_symlinks:
-
-A long-standing class of security issues is the symlink-based
-time-of-check-time-of-use race, most commonly seen in world-writable
-directories like /tmp. The common method of exploitation of this flaw
-is to cross privilege boundaries when following a given symlink (i.e. a
-root process follows a symlink belonging to another user). For a likely
-incomplete list of hundreds of examples across the years, please see:
-http://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=/tmp
-
-When set to "0", symlink following behavior is unrestricted.
-
-When set to "1" symlinks are permitted to be followed only when outside
-a sticky world-writable directory, or when the uid of the symlink and
-follower match, or when the directory owner matches the symlink's owner.
-
-This protection is based on the restrictions in Openwall and grsecurity.
-
-==============================================================
-
-suid_dumpable:
-
-This value can be used to query and set the core dump mode for setuid
-or otherwise protected/tainted binaries. The modes are
-
-0 - (default) - traditional behaviour. Any process which has changed
-	privilege levels or is execute only will not be dumped.
-1 - (debug) - all processes dump core when possible. The core dump is
-	owned by the current user and no security is applied. This is
-	intended for system debugging situations only. Ptrace is unchecked.
-	This is insecure as it allows regular users to examine the memory
-	contents of privileged processes.
-2 - (suidsafe) - any binary which normally would not be dumped is dumped
-	anyway, but only if the "core_pattern" kernel sysctl is set to
-	either a pipe handler or a fully qualified path. (For more details
-	on this limitation, see CVE-2006-2451.) This mode is appropriate
-	when administrators are attempting to debug problems in a normal
-	environment, and either have a core dump pipe handler that knows
-	to treat privileged core dumps with care, or specific directory
-	defined for catching core dumps. If a core dump happens without
-	a pipe handler or fully qualifid path, a message will be emitted
-	to syslog warning about the lack of a correct setting.
-
-==============================================================
-
-super-max & super-nr:
-
-These numbers control the maximum number of superblocks, and
-thus the maximum number of mounted filesystems the kernel
-can have. You only need to increase super-max if you need to
-mount more filesystems than the current value in super-max
-allows you to.
-
-==============================================================
-
-aio-nr & aio-max-nr:
-
-aio-nr shows the current system-wide number of asynchronous io
-requests.  aio-max-nr allows you to change the maximum value
-aio-nr can grow to.
-
-==============================================================
-
-mount-max:
-
-This denotes the maximum number of mounts that may exist
-in a mount namespace.
-
-==============================================================
-
-
-2. /proc/sys/fs/binfmt_misc
-----------------------------------------------------------
-
-Documentation for the files in /proc/sys/fs/binfmt_misc is
-in Documentation/admin-guide/binfmt-misc.rst.
-
-
-3. /proc/sys/fs/mqueue - POSIX message queues filesystem
-----------------------------------------------------------
-
-The "mqueue"  filesystem provides  the necessary kernel features to enable the
-creation of a  user space  library that  implements  the  POSIX message queues
-API (as noted by the  MSG tag in the  POSIX 1003.1-2001 version  of the System
-Interfaces specification.)
-
-The "mqueue" filesystem contains values for determining/setting  the amount of
-resources used by the file system.
-
-/proc/sys/fs/mqueue/queues_max is a read/write  file for  setting/getting  the
-maximum number of message queues allowed on the system.
-
-/proc/sys/fs/mqueue/msg_max  is  a  read/write file  for  setting/getting  the
-maximum number of messages in a queue value.  In fact it is the limiting value
-for another (user) limit which is set in mq_open invocation. This attribute of
-a queue must be less or equal then msg_max.
-
-/proc/sys/fs/mqueue/msgsize_max is  a read/write  file for setting/getting the
-maximum  message size value (it is every  message queue's attribute set during
-its creation).
-
-/proc/sys/fs/mqueue/msg_default is  a read/write  file for setting/getting the
-default number of messages in a queue value if attr parameter of mq_open(2) is
-NULL. If it exceed msg_max, the default value is initialized msg_max.
-
-/proc/sys/fs/mqueue/msgsize_default is a read/write file for setting/getting
-the default message size value if attr parameter of mq_open(2) is NULL. If it
-exceed msgsize_max, the default value is initialized msgsize_max.
-
-4. /proc/sys/fs/epoll - Configuration options for the epoll interface
---------------------------------------------------------
-
-This directory contains configuration options for the epoll(7) interface.
-
-max_user_watches
-----------------
-
-Every epoll file descriptor can store a number of files to be monitored
-for event readiness. Each one of these monitored files constitutes a "watch".
-This configuration option sets the maximum number of "watches" that are
-allowed for each user.
-Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes
-on a 64bit one.
-The current default value for  max_user_watches  is the 1/32 of the available
-low memory, divided for the "watch" cost in bytes.
-
diff --git a/Documentation/sysctl/index.rst b/Documentation/sysctl/index.rst
new file mode 100644
index 000000000000..efbcde8c1c9c
--- /dev/null
+++ b/Documentation/sysctl/index.rst
@@ -0,0 +1,100 @@
+:orphan:
+
+===========================
+Documentation for /proc/sys
+===========================
+
+Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
+
+------------------------------------------------------------------------------
+
+'Why', I hear you ask, 'would anyone even _want_ documentation
+for them sysctl files? If anybody really needs it, it's all in
+the source...'
+
+Well, this documentation is written because some people either
+don't know they need to tweak something, or because they don't
+have the time or knowledge to read the source code.
+
+Furthermore, the programmers who built sysctl have built it to
+be actually used, not just for the fun of programming it :-)
+
+------------------------------------------------------------------------------
+
+Legal blurb:
+
+As usual, there are two main things to consider:
+
+1. you get what you pay for
+2. it's free
+
+The consequences are that I won't guarantee the correctness of
+this document, and if you come to me complaining about how you
+screwed up your system because of wrong documentation, I won't
+feel sorry for you. I might even laugh at you...
+
+But of course, if you _do_ manage to screw up your system using
+only the sysctl options used in this file, I'd like to hear of
+it. Not only to have a great laugh, but also to make sure that
+you're the last RTFMing person to screw up.
+
+In short, e-mail your suggestions, corrections and / or horror
+stories to: <riel@nl.linux.org>
+
+Rik van Riel.
+
+--------------------------------------------------------------
+
+Introduction
+============
+
+Sysctl is a means of configuring certain aspects of the kernel
+at run-time, and the /proc/sys/ directory is there so that you
+don't even need special tools to do it!
+In fact, there are only four things needed to use these config
+facilities:
+
+- a running Linux system
+- root access
+- common sense (this is especially hard to come by these days)
+- knowledge of what all those values mean
+
+As a quick 'ls /proc/sys' will show, the directory consists of
+several (arch-dependent?) subdirs. Each subdir is mainly about
+one part of the kernel, so you can do configuration on a piece
+by piece basis, or just some 'thematic frobbing'.
+
+This documentation is about:
+
+=============== ===============================================================
+abi/		execution domains & personalities
+debug/		<empty>
+dev/		device specific information (eg dev/cdrom/info)
+fs/		specific filesystems
+		filehandle, inode, dentry and quota tuning
+		binfmt_misc <Documentation/admin-guide/binfmt-misc.rst>
+kernel/		global kernel info / tuning
+		miscellaneous stuff
+net/		networking stuff, for documentation look in:
+		<Documentation/networking/>
+proc/		<empty>
+sunrpc/		SUN Remote Procedure Call (NFS)
+vm/		memory management tuning
+		buffer and cache management
+user/		Per user per user namespace limits
+=============== ===============================================================
+
+These are the subdirs I have on my system. There might be more
+or other subdirs in another setup. If you see another dir, I'd
+really like to hear about it :-)
+
+.. toctree::
+   :maxdepth: 1
+
+   abi
+   fs
+   kernel
+   net
+   sunrpc
+   user
+   vm
diff --git a/Documentation/sysctl/kernel.rst b/Documentation/sysctl/kernel.rst
new file mode 100644
index 000000000000..a0c1d4ce403a
--- /dev/null
+++ b/Documentation/sysctl/kernel.rst
@@ -0,0 +1,1177 @@
+===================================
+Documentation for /proc/sys/kernel/
+===================================
+
+kernel version 2.2.10
+
+Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
+
+Copyright (c) 2009,        Shen Feng<shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
+
+This file contains documentation for the sysctl files in
+/proc/sys/kernel/ and is valid for Linux kernel version 2.2.
+
+The files in this directory can be used to tune and monitor
+miscellaneous and general things in the operation of the Linux
+kernel. Since some of the files _can_ be used to screw up your
+system, it is advisable to read both documentation and source
+before actually making adjustments.
+
+Currently, these files might (depending on your configuration)
+show up in /proc/sys/kernel:
+
+- acct
+- acpi_video_flags
+- auto_msgmni
+- bootloader_type	     [ X86 only ]
+- bootloader_version	     [ X86 only ]
+- cap_last_cap
+- core_pattern
+- core_pipe_limit
+- core_uses_pid
+- ctrl-alt-del
+- dmesg_restrict
+- domainname
+- hostname
+- hotplug
+- hardlockup_all_cpu_backtrace
+- hardlockup_panic
+- hung_task_panic
+- hung_task_check_count
+- hung_task_timeout_secs
+- hung_task_check_interval_secs
+- hung_task_warnings
+- hyperv_record_panic_msg
+- kexec_load_disabled
+- kptr_restrict
+- l2cr                        [ PPC only ]
+- modprobe                    ==> Documentation/debugging-modules.txt
+- modules_disabled
+- msg_next_id		      [ sysv ipc ]
+- msgmax
+- msgmnb
+- msgmni
+- nmi_watchdog
+- osrelease
+- ostype
+- overflowgid
+- overflowuid
+- panic
+- panic_on_oops
+- panic_on_stackoverflow
+- panic_on_unrecovered_nmi
+- panic_on_warn
+- panic_print
+- panic_on_rcu_stall
+- perf_cpu_time_max_percent
+- perf_event_paranoid
+- perf_event_max_stack
+- perf_event_mlock_kb
+- perf_event_max_contexts_per_stack
+- pid_max
+- powersave-nap               [ PPC only ]
+- printk
+- printk_delay
+- printk_ratelimit
+- printk_ratelimit_burst
+- pty                         ==> Documentation/filesystems/devpts.txt
+- randomize_va_space
+- real-root-dev               ==> Documentation/admin-guide/initrd.rst
+- reboot-cmd                  [ SPARC only ]
+- rtsig-max
+- rtsig-nr
+- sched_energy_aware
+- seccomp/                    ==> Documentation/userspace-api/seccomp_filter.rst
+- sem
+- sem_next_id		      [ sysv ipc ]
+- sg-big-buff                 [ generic SCSI device (sg) ]
+- shm_next_id		      [ sysv ipc ]
+- shm_rmid_forced
+- shmall
+- shmmax                      [ sysv ipc ]
+- shmmni
+- softlockup_all_cpu_backtrace
+- soft_watchdog
+- stack_erasing
+- stop-a                      [ SPARC only ]
+- sysrq                       ==> Documentation/admin-guide/sysrq.rst
+- sysctl_writes_strict
+- tainted                     ==> Documentation/admin-guide/tainted-kernels.rst
+- threads-max
+- unknown_nmi_panic
+- watchdog
+- watchdog_thresh
+- version
+
+
+acct:
+=====
+
+highwater lowwater frequency
+
+If BSD-style process accounting is enabled these values control
+its behaviour. If free space on filesystem where the log lives
+goes below <lowwater>% accounting suspends. If free space gets
+above <highwater>% accounting resumes. <Frequency> determines
+how often do we check the amount of free space (value is in
+seconds). Default:
+4 2 30
+That is, suspend accounting if there left <= 2% free; resume it
+if we got >=4%; consider information about amount of free space
+valid for 30 seconds.
+
+
+acpi_video_flags:
+=================
+
+flags
+
+See Doc*/kernel/power/video.txt, it allows mode of video boot to be
+set during run time.
+
+
+auto_msgmni:
+============
+
+This variable has no effect and may be removed in future kernel
+releases. Reading it always returns 0.
+Up to Linux 3.17, it enabled/disabled automatic recomputing of msgmni
+upon memory add/remove or upon ipc namespace creation/removal.
+Echoing "1" into this file enabled msgmni automatic recomputing.
+Echoing "0" turned it off. auto_msgmni default value was 1.
+
+
+bootloader_type:
+================
+
+x86 bootloader identification
+
+This gives the bootloader type number as indicated by the bootloader,
+shifted left by 4, and OR'd with the low four bits of the bootloader
+version.  The reason for this encoding is that this used to match the
+type_of_loader field in the kernel header; the encoding is kept for
+backwards compatibility.  That is, if the full bootloader type number
+is 0x15 and the full version number is 0x234, this file will contain
+the value 340 = 0x154.
+
+See the type_of_loader and ext_loader_type fields in
+Documentation/x86/boot.rst for additional information.
+
+
+bootloader_version:
+===================
+
+x86 bootloader version
+
+The complete bootloader version number.  In the example above, this
+file will contain the value 564 = 0x234.
+
+See the type_of_loader and ext_loader_ver fields in
+Documentation/x86/boot.rst for additional information.
+
+
+cap_last_cap:
+=============
+
+Highest valid capability of the running kernel.  Exports
+CAP_LAST_CAP from the kernel.
+
+
+core_pattern:
+=============
+
+core_pattern is used to specify a core dumpfile pattern name.
+
+* max length 127 characters; default value is "core"
+* core_pattern is used as a pattern template for the output filename;
+  certain string patterns (beginning with '%') are substituted with
+  their actual values.
+* backward compatibility with core_uses_pid:
+
+	If core_pattern does not include "%p" (default does not)
+	and core_uses_pid is set, then .PID will be appended to
+	the filename.
+
+* corename format specifiers::
+
+	%<NUL>	'%' is dropped
+	%%	output one '%'
+	%p	pid
+	%P	global pid (init PID namespace)
+	%i	tid
+	%I	global tid (init PID namespace)
+	%u	uid (in initial user namespace)
+	%g	gid (in initial user namespace)
+	%d	dump mode, matches PR_SET_DUMPABLE and
+		/proc/sys/fs/suid_dumpable
+	%s	signal number
+	%t	UNIX time of dump
+	%h	hostname
+	%e	executable filename (may be shortened)
+	%E	executable path
+	%<OTHER> both are dropped
+
+* If the first character of the pattern is a '|', the kernel will treat
+  the rest of the pattern as a command to run.  The core dump will be
+  written to the standard input of that program instead of to a file.
+
+
+core_pipe_limit:
+================
+
+This sysctl is only applicable when core_pattern is configured to pipe
+core files to a user space helper (when the first character of
+core_pattern is a '|', see above).  When collecting cores via a pipe
+to an application, it is occasionally useful for the collecting
+application to gather data about the crashing process from its
+/proc/pid directory.  In order to do this safely, the kernel must wait
+for the collecting process to exit, so as not to remove the crashing
+processes proc files prematurely.  This in turn creates the
+possibility that a misbehaving userspace collecting process can block
+the reaping of a crashed process simply by never exiting.  This sysctl
+defends against that.  It defines how many concurrent crashing
+processes may be piped to user space applications in parallel.  If
+this value is exceeded, then those crashing processes above that value
+are noted via the kernel log and their cores are skipped.  0 is a
+special value, indicating that unlimited processes may be captured in
+parallel, but that no waiting will take place (i.e. the collecting
+process is not guaranteed access to /proc/<crashing pid>/).  This
+value defaults to 0.
+
+
+core_uses_pid:
+==============
+
+The default coredump filename is "core".  By setting
+core_uses_pid to 1, the coredump filename becomes core.PID.
+If core_pattern does not include "%p" (default does not)
+and core_uses_pid is set, then .PID will be appended to
+the filename.
+
+
+ctrl-alt-del:
+=============
+
+When the value in this file is 0, ctrl-alt-del is trapped and
+sent to the init(1) program to handle a graceful restart.
+When, however, the value is > 0, Linux's reaction to a Vulcan
+Nerve Pinch (tm) will be an immediate reboot, without even
+syncing its dirty buffers.
+
+Note:
+  when a program (like dosemu) has the keyboard in 'raw'
+  mode, the ctrl-alt-del is intercepted by the program before it
+  ever reaches the kernel tty layer, and it's up to the program
+  to decide what to do with it.
+
+
+dmesg_restrict:
+===============
+
+This toggle indicates whether unprivileged users are prevented
+from using dmesg(8) to view messages from the kernel's log buffer.
+When dmesg_restrict is set to (0) there are no restrictions. When
+dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use
+dmesg(8).
+
+The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the
+default value of dmesg_restrict.
+
+
+domainname & hostname:
+======================
+
+These files can be used to set the NIS/YP domainname and the
+hostname of your box in exactly the same way as the commands
+domainname and hostname, i.e.::
+
+	# echo "darkstar" > /proc/sys/kernel/hostname
+	# echo "mydomain" > /proc/sys/kernel/domainname
+
+has the same effect as::
+
+	# hostname "darkstar"
+	# domainname "mydomain"
+
+Note, however, that the classic darkstar.frop.org has the
+hostname "darkstar" and DNS (Internet Domain Name Server)
+domainname "frop.org", not to be confused with the NIS (Network
+Information Service) or YP (Yellow Pages) domainname. These two
+domain names are in general different. For a detailed discussion
+see the hostname(1) man page.
+
+
+hardlockup_all_cpu_backtrace:
+=============================
+
+This value controls the hard lockup detector behavior when a hard
+lockup condition is detected as to whether or not to gather further
+debug information. If enabled, arch-specific all-CPU stack dumping
+will be initiated.
+
+0: do nothing. This is the default behavior.
+
+1: on detection capture more debug information.
+
+
+hardlockup_panic:
+=================
+
+This parameter can be used to control whether the kernel panics
+when a hard lockup is detected.
+
+   0 - don't panic on hard lockup
+   1 - panic on hard lockup
+
+See Documentation/lockup-watchdogs.txt for more information.  This can
+also be set using the nmi_watchdog kernel parameter.
+
+
+hotplug:
+========
+
+Path for the hotplug policy agent.
+Default value is "/sbin/hotplug".
+
+
+hung_task_panic:
+================
+
+Controls the kernel's behavior when a hung task is detected.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+0: continue operation. This is the default behavior.
+
+1: panic immediately.
+
+
+hung_task_check_count:
+======================
+
+The upper bound on the number of tasks that are checked.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+
+hung_task_timeout_secs:
+=======================
+
+When a task in D state did not get scheduled
+for more than this value report a warning.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+0: means infinite timeout - no checking done.
+
+Possible values to set are in range {0..LONG_MAX/HZ}.
+
+
+hung_task_check_interval_secs:
+==============================
+
+Hung task check interval. If hung task checking is enabled
+(see hung_task_timeout_secs), the check is done every
+hung_task_check_interval_secs seconds.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+0 (default): means use hung_task_timeout_secs as checking interval.
+Possible values to set are in range {0..LONG_MAX/HZ}.
+
+
+hung_task_warnings:
+===================
+
+The maximum number of warnings to report. During a check interval
+if a hung task is detected, this value is decreased by 1.
+When this value reaches 0, no more warnings will be reported.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+-1: report an infinite number of warnings.
+
+
+hyperv_record_panic_msg:
+========================
+
+Controls whether the panic kmsg data should be reported to Hyper-V.
+
+0: do not report panic kmsg data.
+
+1: report the panic kmsg data. This is the default behavior.
+
+
+kexec_load_disabled:
+====================
+
+A toggle indicating if the kexec_load syscall has been disabled. This
+value defaults to 0 (false: kexec_load enabled), but can be set to 1
+(true: kexec_load disabled). Once true, kexec can no longer be used, and
+the toggle cannot be set back to false. This allows a kexec image to be
+loaded before disabling the syscall, allowing a system to set up (and
+later use) an image without it being altered. Generally used together
+with the "modules_disabled" sysctl.
+
+
+kptr_restrict:
+==============
+
+This toggle indicates whether restrictions are placed on
+exposing kernel addresses via /proc and other interfaces.
+
+When kptr_restrict is set to 0 (the default) the address is hashed before
+printing. (This is the equivalent to %p.)
+
+When kptr_restrict is set to (1), kernel pointers printed using the %pK
+format specifier will be replaced with 0's unless the user has CAP_SYSLOG
+and effective user and group ids are equal to the real ids. This is
+because %pK checks are done at read() time rather than open() time, so
+if permissions are elevated between the open() and the read() (e.g via
+a setuid binary) then %pK will not leak kernel pointers to unprivileged
+users. Note, this is a temporary solution only. The correct long-term
+solution is to do the permission checks at open() time. Consider removing
+world read permissions from files that use %pK, and using dmesg_restrict
+to protect against uses of %pK in dmesg(8) if leaking kernel pointer
+values to unprivileged users is a concern.
+
+When kptr_restrict is set to (2), kernel pointers printed using
+%pK will be replaced with 0's regardless of privileges.
+
+
+l2cr: (PPC only)
+================
+
+This flag controls the L2 cache of G3 processor boards. If
+0, the cache is disabled. Enabled if nonzero.
+
+
+modules_disabled:
+=================
+
+A toggle value indicating if modules are allowed to be loaded
+in an otherwise modular kernel.  This toggle defaults to off
+(0), but can be set true (1).  Once true, modules can be
+neither loaded nor unloaded, and the toggle cannot be set back
+to false.  Generally used with the "kexec_load_disabled" toggle.
+
+
+msg_next_id, sem_next_id, and shm_next_id:
+==========================================
+
+These three toggles allows to specify desired id for next allocated IPC
+object: message, semaphore or shared memory respectively.
+
+By default they are equal to -1, which means generic allocation logic.
+Possible values to set are in range {0..INT_MAX}.
+
+Notes:
+  1) kernel doesn't guarantee, that new object will have desired id. So,
+     it's up to userspace, how to handle an object with "wrong" id.
+  2) Toggle with non-default value will be set back to -1 by kernel after
+     successful IPC object allocation. If an IPC object allocation syscall
+     fails, it is undefined if the value remains unmodified or is reset to -1.
+
+
+nmi_watchdog:
+=============
+
+This parameter can be used to control the NMI watchdog
+(i.e. the hard lockup detector) on x86 systems.
+
+0 - disable the hard lockup detector
+
+1 - enable the hard lockup detector
+
+The hard lockup detector monitors each CPU for its ability to respond to
+timer interrupts. The mechanism utilizes CPU performance counter registers
+that are programmed to generate Non-Maskable Interrupts (NMIs) periodically
+while a CPU is busy. Hence, the alternative name 'NMI watchdog'.
+
+The NMI watchdog is disabled by default if the kernel is running as a guest
+in a KVM virtual machine. This default can be overridden by adding::
+
+   nmi_watchdog=1
+
+to the guest kernel command line (see Documentation/admin-guide/kernel-parameters.rst).
+
+
+numa_balancing:
+===============
+
+Enables/disables automatic page fault based NUMA memory
+balancing. Memory is moved automatically to nodes
+that access it often.
+
+Enables/disables automatic NUMA memory balancing. On NUMA machines, there
+is a performance penalty if remote memory is accessed by a CPU. When this
+feature is enabled the kernel samples what task thread is accessing memory
+by periodically unmapping pages and later trapping a page fault. At the
+time of the page fault, it is determined if the data being accessed should
+be migrated to a local memory node.
+
+The unmapping of pages and trapping faults incur additional overhead that
+ideally is offset by improved memory locality but there is no universal
+guarantee. If the target workload is already bound to NUMA nodes then this
+feature should be disabled. Otherwise, if the system overhead from the
+feature is too high then the rate the kernel samples for NUMA hinting
+faults may be controlled by the numa_balancing_scan_period_min_ms,
+numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
+numa_balancing_scan_size_mb, and numa_balancing_settle_count sysctls.
+
+numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
+===============================================================================================================================
+
+
+Automatic NUMA balancing scans tasks address space and unmaps pages to
+detect if pages are properly placed or if the data should be migrated to a
+memory node local to where the task is running.  Every "scan delay" the task
+scans the next "scan size" number of pages in its address space. When the
+end of the address space is reached the scanner restarts from the beginning.
+
+In combination, the "scan delay" and "scan size" determine the scan rate.
+When "scan delay" decreases, the scan rate increases.  The scan delay and
+hence the scan rate of every task is adaptive and depends on historical
+behaviour. If pages are properly placed then the scan delay increases,
+otherwise the scan delay decreases.  The "scan size" is not adaptive but
+the higher the "scan size", the higher the scan rate.
+
+Higher scan rates incur higher system overhead as page faults must be
+trapped and potentially data must be migrated. However, the higher the scan
+rate, the more quickly a tasks memory is migrated to a local node if the
+workload pattern changes and minimises performance impact due to remote
+memory accesses. These sysctls control the thresholds for scan delays and
+the number of pages scanned.
+
+numa_balancing_scan_period_min_ms is the minimum time in milliseconds to
+scan a tasks virtual memory. It effectively controls the maximum scanning
+rate for each task.
+
+numa_balancing_scan_delay_ms is the starting "scan delay" used for a task
+when it initially forks.
+
+numa_balancing_scan_period_max_ms is the maximum time in milliseconds to
+scan a tasks virtual memory. It effectively controls the minimum scanning
+rate for each task.
+
+numa_balancing_scan_size_mb is how many megabytes worth of pages are
+scanned for a given scan.
+
+
+osrelease, ostype & version:
+============================
+
+::
+
+  # cat osrelease
+  2.1.88
+  # cat ostype
+  Linux
+  # cat version
+  #5 Wed Feb 25 21:49:24 MET 1998
+
+The files osrelease and ostype should be clear enough. Version
+needs a little more clarification however. The '#5' means that
+this is the fifth kernel built from this source base and the
+date behind it indicates the time the kernel was built.
+The only way to tune these values is to rebuild the kernel :-)
+
+
+overflowgid & overflowuid:
+==========================
+
+if your architecture did not always support 32-bit UIDs (i.e. arm,
+i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to
+applications that use the old 16-bit UID/GID system calls, if the
+actual UID or GID would exceed 65535.
+
+These sysctls allow you to change the value of the fixed UID and GID.
+The default is 65534.
+
+
+panic:
+======
+
+The value in this file represents the number of seconds the kernel
+waits before rebooting on a panic. When you use the software watchdog,
+the recommended setting is 60.
+
+
+panic_on_io_nmi:
+================
+
+Controls the kernel's behavior when a CPU receives an NMI caused by
+an IO error.
+
+0: try to continue operation (default)
+
+1: panic immediately. The IO error triggered an NMI. This indicates a
+   serious system condition which could result in IO data corruption.
+   Rather than continuing, panicking might be a better choice. Some
+   servers issue this sort of NMI when the dump button is pushed,
+   and you can use this option to take a crash dump.
+
+
+panic_on_oops:
+==============
+
+Controls the kernel's behaviour when an oops or BUG is encountered.
+
+0: try to continue operation
+
+1: panic immediately.  If the `panic` sysctl is also non-zero then the
+   machine will be rebooted.
+
+
+panic_on_stackoverflow:
+=======================
+
+Controls the kernel's behavior when detecting the overflows of
+kernel, IRQ and exception stacks except a user stack.
+This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
+
+0: try to continue operation.
+
+1: panic immediately.
+
+
+panic_on_unrecovered_nmi:
+=========================
+
+The default Linux behaviour on an NMI of either memory or unknown is
+to continue operation. For many environments such as scientific
+computing it is preferable that the box is taken out and the error
+dealt with than an uncorrected parity/ECC error get propagated.
+
+A small number of systems do generate NMI's for bizarre random reasons
+such as power management so the default is off. That sysctl works like
+the existing panic controls already in that directory.
+
+
+panic_on_warn:
+==============
+
+Calls panic() in the WARN() path when set to 1.  This is useful to avoid
+a kernel rebuild when attempting to kdump at the location of a WARN().
+
+0: only WARN(), default behaviour.
+
+1: call panic() after printing out WARN() location.
+
+
+panic_print:
+============
+
+Bitmask for printing system info when panic happens. User can chose
+combination of the following bits:
+
+=====  ========================================
+bit 0  print all tasks info
+bit 1  print system memory info
+bit 2  print timer info
+bit 3  print locks info if CONFIG_LOCKDEP is on
+bit 4  print ftrace buffer
+=====  ========================================
+
+So for example to print tasks and memory info on panic, user can::
+
+  echo 3 > /proc/sys/kernel/panic_print
+
+
+panic_on_rcu_stall:
+===================
+
+When set to 1, calls panic() after RCU stall detection messages. This
+is useful to define the root cause of RCU stalls using a vmcore.
+
+0: do not panic() when RCU stall takes place, default behavior.
+
+1: panic() after printing RCU stall messages.
+
+
+perf_cpu_time_max_percent:
+==========================
+
+Hints to the kernel how much CPU time it should be allowed to
+use to handle perf sampling events.  If the perf subsystem
+is informed that its samples are exceeding this limit, it
+will drop its sampling frequency to attempt to reduce its CPU
+usage.
+
+Some perf sampling happens in NMIs.  If these samples
+unexpectedly take too long to execute, the NMIs can become
+stacked up next to each other so much that nothing else is
+allowed to execute.
+
+0:
+   disable the mechanism.  Do not monitor or correct perf's
+   sampling rate no matter how CPU time it takes.
+
+1-100:
+   attempt to throttle perf's sample rate to this
+   percentage of CPU.  Note: the kernel calculates an
+   "expected" length of each sample event.  100 here means
+   100% of that expected length.  Even if this is set to
+   100, you may still see sample throttling if this
+   length is exceeded.  Set to 0 if you truly do not care
+   how much CPU is consumed.
+
+
+perf_event_paranoid:
+====================
+
+Controls use of the performance events system by unprivileged
+users (without CAP_SYS_ADMIN).  The default value is 2.
+
+===  ==================================================================
+ -1  Allow use of (almost) all events by all users
+
+     Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
+
+>=0  Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN
+
+     Disallow raw tracepoint access by users without CAP_SYS_ADMIN
+
+>=1  Disallow CPU event access by users without CAP_SYS_ADMIN
+
+>=2  Disallow kernel profiling by users without CAP_SYS_ADMIN
+===  ==================================================================
+
+
+perf_event_max_stack:
+=====================
+
+Controls maximum number of stack frames to copy for (attr.sample_type &
+PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
+'perf record -g' or 'perf trace --call-graph fp'.
+
+This can only be done when no events are in use that have callchains
+enabled, otherwise writing to this file will return -EBUSY.
+
+The default value is 127.
+
+
+perf_event_mlock_kb:
+====================
+
+Control size of per-cpu ring buffer not counted agains mlock limit.
+
+The default value is 512 + 1 page
+
+
+perf_event_max_contexts_per_stack:
+==================================
+
+Controls maximum number of stack frame context entries for
+(attr.sample_type & PERF_SAMPLE_CALLCHAIN) configured events, for
+instance, when using 'perf record -g' or 'perf trace --call-graph fp'.
+
+This can only be done when no events are in use that have callchains
+enabled, otherwise writing to this file will return -EBUSY.
+
+The default value is 8.
+
+
+pid_max:
+========
+
+PID allocation wrap value.  When the kernel's next PID value
+reaches this value, it wraps back to a minimum PID value.
+PIDs of value pid_max or larger are not allocated.
+
+
+ns_last_pid:
+============
+
+The last pid allocated in the current (the one task using this sysctl
+lives in) pid namespace. When selecting a pid for a next task on fork
+kernel tries to allocate a number starting from this one.
+
+
+powersave-nap: (PPC only)
+=========================
+
+If set, Linux-PPC will use the 'nap' mode of powersaving,
+otherwise the 'doze' mode will be used.
+
+==============================================================
+
+printk:
+=======
+
+The four values in printk denote: console_loglevel,
+default_message_loglevel, minimum_console_loglevel and
+default_console_loglevel respectively.
+
+These values influence printk() behavior when printing or
+logging error messages. See 'man 2 syslog' for more info on
+the different loglevels.
+
+- console_loglevel:
+	messages with a higher priority than
+	this will be printed to the console
+- default_message_loglevel:
+	messages without an explicit priority
+	will be printed with this priority
+- minimum_console_loglevel:
+	minimum (highest) value to which
+	console_loglevel can be set
+- default_console_loglevel:
+	default value for console_loglevel
+
+
+printk_delay:
+=============
+
+Delay each printk message in printk_delay milliseconds
+
+Value from 0 - 10000 is allowed.
+
+
+printk_ratelimit:
+=================
+
+Some warning messages are rate limited. printk_ratelimit specifies
+the minimum length of time between these messages (in jiffies), by
+default we allow one every 5 seconds.
+
+A value of 0 will disable rate limiting.
+
+
+printk_ratelimit_burst:
+=======================
+
+While long term we enforce one message per printk_ratelimit
+seconds, we do allow a burst of messages to pass through.
+printk_ratelimit_burst specifies the number of messages we can
+send before ratelimiting kicks in.
+
+
+printk_devkmsg:
+===============
+
+Control the logging to /dev/kmsg from userspace:
+
+ratelimit:
+	default, ratelimited
+
+on: unlimited logging to /dev/kmsg from userspace
+
+off: logging to /dev/kmsg disabled
+
+The kernel command line parameter printk.devkmsg= overrides this and is
+a one-time setting until next reboot: once set, it cannot be changed by
+this sysctl interface anymore.
+
+
+randomize_va_space:
+===================
+
+This option can be used to select the type of process address
+space randomization that is used in the system, for architectures
+that support this feature.
+
+==  ===========================================================================
+0   Turn the process address space randomization off.  This is the
+    default for architectures that do not support this feature anyways,
+    and kernels that are booted with the "norandmaps" parameter.
+
+1   Make the addresses of mmap base, stack and VDSO page randomized.
+    This, among other things, implies that shared libraries will be
+    loaded to random addresses.  Also for PIE-linked binaries, the
+    location of code start is randomized.  This is the default if the
+    CONFIG_COMPAT_BRK option is enabled.
+
+2   Additionally enable heap randomization.  This is the default if
+    CONFIG_COMPAT_BRK is disabled.
+
+    There are a few legacy applications out there (such as some ancient
+    versions of libc.so.5 from 1996) that assume that brk area starts
+    just after the end of the code+bss.  These applications break when
+    start of the brk area is randomized.  There are however no known
+    non-legacy applications that would be broken this way, so for most
+    systems it is safe to choose full randomization.
+
+    Systems with ancient and/or broken binaries should be configured
+    with CONFIG_COMPAT_BRK enabled, which excludes the heap from process
+    address space randomization.
+==  ===========================================================================
+
+
+reboot-cmd: (Sparc only)
+========================
+
+??? This seems to be a way to give an argument to the Sparc
+ROM/Flash boot loader. Maybe to tell it what to do after
+rebooting. ???
+
+
+rtsig-max & rtsig-nr:
+=====================
+
+The file rtsig-max can be used to tune the maximum number
+of POSIX realtime (queued) signals that can be outstanding
+in the system.
+
+rtsig-nr shows the number of RT signals currently queued.
+
+
+sched_energy_aware:
+===================
+
+Enables/disables Energy Aware Scheduling (EAS). EAS starts
+automatically on platforms where it can run (that is,
+platforms with asymmetric CPU topologies and having an Energy
+Model available). If your platform happens to meet the
+requirements for EAS but you do not want to use it, change
+this value to 0.
+
+
+sched_schedstats:
+=================
+
+Enables/disables scheduler statistics. Enabling this feature
+incurs a small amount of overhead in the scheduler but is
+useful for debugging and performance tuning.
+
+
+sg-big-buff:
+============
+
+This file shows the size of the generic SCSI (sg) buffer.
+You can't tune it just yet, but you could change it on
+compile time by editing include/scsi/sg.h and changing
+the value of SG_BIG_BUFF.
+
+There shouldn't be any reason to change this value. If
+you can come up with one, you probably know what you
+are doing anyway :)
+
+
+shmall:
+=======
+
+This parameter sets the total amount of shared memory pages that
+can be used system wide. Hence, SHMALL should always be at least
+ceil(shmmax/PAGE_SIZE).
+
+If you are not sure what the default PAGE_SIZE is on your Linux
+system, you can run the following command:
+
+	# getconf PAGE_SIZE
+
+
+shmmax:
+=======
+
+This value can be used to query and set the run time limit
+on the maximum shared memory segment size that can be created.
+Shared memory segments up to 1Gb are now supported in the
+kernel.  This value defaults to SHMMAX.
+
+
+shm_rmid_forced:
+================
+
+Linux lets you set resource limits, including how much memory one
+process can consume, via setrlimit(2).  Unfortunately, shared memory
+segments are allowed to exist without association with any process, and
+thus might not be counted against any resource limits.  If enabled,
+shared memory segments are automatically destroyed when their attach
+count becomes zero after a detach or a process termination.  It will
+also destroy segments that were created, but never attached to, on exit
+from the process.  The only use left for IPC_RMID is to immediately
+destroy an unattached segment.  Of course, this breaks the way things are
+defined, so some applications might stop working.  Note that this
+feature will do you no good unless you also configure your resource
+limits (in particular, RLIMIT_AS and RLIMIT_NPROC).  Most systems don't
+need this.
+
+Note that if you change this from 0 to 1, already created segments
+without users and with a dead originative process will be destroyed.
+
+
+sysctl_writes_strict:
+=====================
+
+Control how file position affects the behavior of updating sysctl values
+via the /proc/sys interface:
+
+  ==   ======================================================================
+  -1   Legacy per-write sysctl value handling, with no printk warnings.
+       Each write syscall must fully contain the sysctl value to be
+       written, and multiple writes on the same sysctl file descriptor
+       will rewrite the sysctl value, regardless of file position.
+   0   Same behavior as above, but warn about processes that perform writes
+       to a sysctl file descriptor when the file position is not 0.
+   1   (default) Respect file position when writing sysctl strings. Multiple
+       writes will append to the sysctl value buffer. Anything past the max
+       length of the sysctl value buffer will be ignored. Writes to numeric
+       sysctl entries must always be at file position 0 and the value must
+       be fully contained in the buffer sent in the write syscall.
+  ==   ======================================================================
+
+
+softlockup_all_cpu_backtrace:
+=============================
+
+This value controls the soft lockup detector thread's behavior
+when a soft lockup condition is detected as to whether or not
+to gather further debug information. If enabled, each cpu will
+be issued an NMI and instructed to capture stack trace.
+
+This feature is only applicable for architectures which support
+NMI.
+
+0: do nothing. This is the default behavior.
+
+1: on detection capture more debug information.
+
+
+soft_watchdog:
+==============
+
+This parameter can be used to control the soft lockup detector.
+
+   0 - disable the soft lockup detector
+
+   1 - enable the soft lockup detector
+
+The soft lockup detector monitors CPUs for threads that are hogging the CPUs
+without rescheduling voluntarily, and thus prevent the 'watchdog/N' threads
+from running. The mechanism depends on the CPUs ability to respond to timer
+interrupts which are needed for the 'watchdog/N' threads to be woken up by
+the watchdog timer function, otherwise the NMI watchdog - if enabled - can
+detect a hard lockup condition.
+
+
+stack_erasing:
+==============
+
+This parameter can be used to control kernel stack erasing at the end
+of syscalls for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK.
+
+That erasing reduces the information which kernel stack leak bugs
+can reveal and blocks some uninitialized stack variable attacks.
+The tradeoff is the performance impact: on a single CPU system kernel
+compilation sees a 1% slowdown, other systems and workloads may vary.
+
+  0: kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
+
+  1: kernel stack erasing is enabled (default), it is performed before
+     returning to the userspace at the end of syscalls.
+
+
+tainted
+=======
+
+Non-zero if the kernel has been tainted. Numeric values, which can be
+ORed together. The letters are seen in "Tainted" line of Oops reports.
+
+======  =====  ==============================================================
+     1  `(P)`  proprietary module was loaded
+     2  `(F)`  module was force loaded
+     4  `(S)`  SMP kernel oops on an officially SMP incapable processor
+     8  `(R)`  module was force unloaded
+    16  `(M)`  processor reported a Machine Check Exception (MCE)
+    32  `(B)`  bad page referenced or some unexpected page flags
+    64  `(U)`  taint requested by userspace application
+   128  `(D)`  kernel died recently, i.e. there was an OOPS or BUG
+   256  `(A)`  an ACPI table was overridden by user
+   512  `(W)`  kernel issued warning
+  1024  `(C)`  staging driver was loaded
+  2048  `(I)`  workaround for bug in platform firmware applied
+  4096  `(O)`  externally-built ("out-of-tree") module was loaded
+  8192  `(E)`  unsigned module was loaded
+ 16384  `(L)`  soft lockup occurred
+ 32768  `(K)`  kernel has been live patched
+ 65536  `(X)`  Auxiliary taint, defined and used by for distros
+131072  `(T)`  The kernel was built with the struct randomization plugin
+======  =====  ==============================================================
+
+See Documentation/admin-guide/tainted-kernels.rst for more information.
+
+
+threads-max:
+============
+
+This value controls the maximum number of threads that can be created
+using fork().
+
+During initialization the kernel sets this value such that even if the
+maximum number of threads is created, the thread structures occupy only
+a part (1/8th) of the available RAM pages.
+
+The minimum value that can be written to threads-max is 20.
+
+The maximum value that can be written to threads-max is given by the
+constant FUTEX_TID_MASK (0x3fffffff).
+
+If a value outside of this range is written to threads-max an error
+EINVAL occurs.
+
+The value written is checked against the available RAM pages. If the
+thread structures would occupy too much (more than 1/8th) of the
+available RAM pages threads-max is reduced accordingly.
+
+
+unknown_nmi_panic:
+==================
+
+The value in this file affects behavior of handling NMI. When the
+value is non-zero, unknown NMI is trapped and then panic occurs. At
+that time, kernel debugging information is displayed on console.
+
+NMI switch that most IA32 servers have fires unknown NMI up, for
+example.  If a system hangs up, try pressing the NMI switch.
+
+
+watchdog:
+=========
+
+This parameter can be used to disable or enable the soft lockup detector
+_and_ the NMI watchdog (i.e. the hard lockup detector) at the same time.
+
+   0 - disable both lockup detectors
+
+   1 - enable both lockup detectors
+
+The soft lockup detector and the NMI watchdog can also be disabled or
+enabled individually, using the soft_watchdog and nmi_watchdog parameters.
+If the watchdog parameter is read, for example by executing::
+
+   cat /proc/sys/kernel/watchdog
+
+the output of this command (0 or 1) shows the logical OR of soft_watchdog
+and nmi_watchdog.
+
+
+watchdog_cpumask:
+=================
+
+This value can be used to control on which cpus the watchdog may run.
+The default cpumask is all possible cores, but if NO_HZ_FULL is
+enabled in the kernel config, and cores are specified with the
+nohz_full= boot argument, those cores are excluded by default.
+Offline cores can be included in this mask, and if the core is later
+brought online, the watchdog will be started based on the mask value.
+
+Typically this value would only be touched in the nohz_full case
+to re-enable cores that by default were not running the watchdog,
+if a kernel lockup was suspected on those cores.
+
+The argument value is the standard cpulist format for cpumasks,
+so for example to enable the watchdog on cores 0, 2, 3, and 4 you
+might say::
+
+  echo 0,2-4 > /proc/sys/kernel/watchdog_cpumask
+
+
+watchdog_thresh:
+================
+
+This value can be used to control the frequency of hrtimer and NMI
+events and the soft and hard lockup thresholds. The default threshold
+is 10 seconds.
+
+The softlockup threshold is (2 * watchdog_thresh). Setting this
+tunable to zero will disable lockup detection altogether.
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
deleted file mode 100644
index 1b2fe17cd2fa..000000000000
--- a/Documentation/sysctl/kernel.txt
+++ /dev/null
@@ -1,1129 +0,0 @@
-Documentation for /proc/sys/kernel/*	kernel version 2.2.10
-	(c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
-	(c) 2009,        Shen Feng<shen@cn.fujitsu.com>
-
-For general info and legal blurb, please look in README.
-
-==============================================================
-
-This file contains documentation for the sysctl files in
-/proc/sys/kernel/ and is valid for Linux kernel version 2.2.
-
-The files in this directory can be used to tune and monitor
-miscellaneous and general things in the operation of the Linux
-kernel. Since some of the files _can_ be used to screw up your
-system, it is advisable to read both documentation and source
-before actually making adjustments.
-
-Currently, these files might (depending on your configuration)
-show up in /proc/sys/kernel:
-
-- acct
-- acpi_video_flags
-- auto_msgmni
-- bootloader_type	     [ X86 only ]
-- bootloader_version	     [ X86 only ]
-- cap_last_cap
-- core_pattern
-- core_pipe_limit
-- core_uses_pid
-- ctrl-alt-del
-- dmesg_restrict
-- domainname
-- hostname
-- hotplug
-- hardlockup_all_cpu_backtrace
-- hardlockup_panic
-- hung_task_panic
-- hung_task_check_count
-- hung_task_timeout_secs
-- hung_task_check_interval_secs
-- hung_task_warnings
-- hyperv_record_panic_msg
-- kexec_load_disabled
-- kptr_restrict
-- l2cr                        [ PPC only ]
-- modprobe                    ==> Documentation/debugging-modules.txt
-- modules_disabled
-- msg_next_id		      [ sysv ipc ]
-- msgmax
-- msgmnb
-- msgmni
-- nmi_watchdog
-- osrelease
-- ostype
-- overflowgid
-- overflowuid
-- panic
-- panic_on_oops
-- panic_on_stackoverflow
-- panic_on_unrecovered_nmi
-- panic_on_warn
-- panic_print
-- panic_on_rcu_stall
-- perf_cpu_time_max_percent
-- perf_event_paranoid
-- perf_event_max_stack
-- perf_event_mlock_kb
-- perf_event_max_contexts_per_stack
-- pid_max
-- powersave-nap               [ PPC only ]
-- printk
-- printk_delay
-- printk_ratelimit
-- printk_ratelimit_burst
-- pty                         ==> Documentation/filesystems/devpts.txt
-- randomize_va_space
-- real-root-dev               ==> Documentation/admin-guide/initrd.rst
-- reboot-cmd                  [ SPARC only ]
-- rtsig-max
-- rtsig-nr
-- sched_energy_aware
-- seccomp/                    ==> Documentation/userspace-api/seccomp_filter.rst
-- sem
-- sem_next_id		      [ sysv ipc ]
-- sg-big-buff                 [ generic SCSI device (sg) ]
-- shm_next_id		      [ sysv ipc ]
-- shm_rmid_forced
-- shmall
-- shmmax                      [ sysv ipc ]
-- shmmni
-- softlockup_all_cpu_backtrace
-- soft_watchdog
-- stack_erasing
-- stop-a                      [ SPARC only ]
-- sysrq                       ==> Documentation/admin-guide/sysrq.rst
-- sysctl_writes_strict
-- tainted                     ==> Documentation/admin-guide/tainted-kernels.rst
-- threads-max
-- unknown_nmi_panic
-- watchdog
-- watchdog_thresh
-- version
-
-==============================================================
-
-acct:
-
-highwater lowwater frequency
-
-If BSD-style process accounting is enabled these values control
-its behaviour. If free space on filesystem where the log lives
-goes below <lowwater>% accounting suspends. If free space gets
-above <highwater>% accounting resumes. <Frequency> determines
-how often do we check the amount of free space (value is in
-seconds). Default:
-4 2 30
-That is, suspend accounting if there left <= 2% free; resume it
-if we got >=4%; consider information about amount of free space
-valid for 30 seconds.
-
-==============================================================
-
-acpi_video_flags:
-
-flags
-
-See Doc*/kernel/power/video.txt, it allows mode of video boot to be
-set during run time.
-
-==============================================================
-
-auto_msgmni:
-
-This variable has no effect and may be removed in future kernel
-releases. Reading it always returns 0.
-Up to Linux 3.17, it enabled/disabled automatic recomputing of msgmni
-upon memory add/remove or upon ipc namespace creation/removal.
-Echoing "1" into this file enabled msgmni automatic recomputing.
-Echoing "0" turned it off. auto_msgmni default value was 1.
-
-
-==============================================================
-
-bootloader_type:
-
-x86 bootloader identification
-
-This gives the bootloader type number as indicated by the bootloader,
-shifted left by 4, and OR'd with the low four bits of the bootloader
-version.  The reason for this encoding is that this used to match the
-type_of_loader field in the kernel header; the encoding is kept for
-backwards compatibility.  That is, if the full bootloader type number
-is 0x15 and the full version number is 0x234, this file will contain
-the value 340 = 0x154.
-
-See the type_of_loader and ext_loader_type fields in
-Documentation/x86/boot.rst for additional information.
-
-==============================================================
-
-bootloader_version:
-
-x86 bootloader version
-
-The complete bootloader version number.  In the example above, this
-file will contain the value 564 = 0x234.
-
-See the type_of_loader and ext_loader_ver fields in
-Documentation/x86/boot.rst for additional information.
-
-==============================================================
-
-cap_last_cap
-
-Highest valid capability of the running kernel.  Exports
-CAP_LAST_CAP from the kernel.
-
-==============================================================
-
-core_pattern:
-
-core_pattern is used to specify a core dumpfile pattern name.
-. max length 127 characters; default value is "core"
-. core_pattern is used as a pattern template for the output filename;
-  certain string patterns (beginning with '%') are substituted with
-  their actual values.
-. backward compatibility with core_uses_pid:
-	If core_pattern does not include "%p" (default does not)
-	and core_uses_pid is set, then .PID will be appended to
-	the filename.
-. corename format specifiers:
-	%<NUL>	'%' is dropped
-	%%	output one '%'
-	%p	pid
-	%P	global pid (init PID namespace)
-	%i	tid
-	%I	global tid (init PID namespace)
-	%u	uid (in initial user namespace)
-	%g	gid (in initial user namespace)
-	%d	dump mode, matches PR_SET_DUMPABLE and
-		/proc/sys/fs/suid_dumpable
-	%s	signal number
-	%t	UNIX time of dump
-	%h	hostname
-	%e	executable filename (may be shortened)
-	%E	executable path
-	%<OTHER> both are dropped
-. If the first character of the pattern is a '|', the kernel will treat
-  the rest of the pattern as a command to run.  The core dump will be
-  written to the standard input of that program instead of to a file.
-
-==============================================================
-
-core_pipe_limit:
-
-This sysctl is only applicable when core_pattern is configured to pipe
-core files to a user space helper (when the first character of
-core_pattern is a '|', see above).  When collecting cores via a pipe
-to an application, it is occasionally useful for the collecting
-application to gather data about the crashing process from its
-/proc/pid directory.  In order to do this safely, the kernel must wait
-for the collecting process to exit, so as not to remove the crashing
-processes proc files prematurely.  This in turn creates the
-possibility that a misbehaving userspace collecting process can block
-the reaping of a crashed process simply by never exiting.  This sysctl
-defends against that.  It defines how many concurrent crashing
-processes may be piped to user space applications in parallel.  If
-this value is exceeded, then those crashing processes above that value
-are noted via the kernel log and their cores are skipped.  0 is a
-special value, indicating that unlimited processes may be captured in
-parallel, but that no waiting will take place (i.e. the collecting
-process is not guaranteed access to /proc/<crashing pid>/).  This
-value defaults to 0.
-
-==============================================================
-
-core_uses_pid:
-
-The default coredump filename is "core".  By setting
-core_uses_pid to 1, the coredump filename becomes core.PID.
-If core_pattern does not include "%p" (default does not)
-and core_uses_pid is set, then .PID will be appended to
-the filename.
-
-==============================================================
-
-ctrl-alt-del:
-
-When the value in this file is 0, ctrl-alt-del is trapped and
-sent to the init(1) program to handle a graceful restart.
-When, however, the value is > 0, Linux's reaction to a Vulcan
-Nerve Pinch (tm) will be an immediate reboot, without even
-syncing its dirty buffers.
-
-Note: when a program (like dosemu) has the keyboard in 'raw'
-mode, the ctrl-alt-del is intercepted by the program before it
-ever reaches the kernel tty layer, and it's up to the program
-to decide what to do with it.
-
-==============================================================
-
-dmesg_restrict:
-
-This toggle indicates whether unprivileged users are prevented
-from using dmesg(8) to view messages from the kernel's log buffer.
-When dmesg_restrict is set to (0) there are no restrictions. When
-dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use
-dmesg(8).
-
-The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the
-default value of dmesg_restrict.
-
-==============================================================
-
-domainname & hostname:
-
-These files can be used to set the NIS/YP domainname and the
-hostname of your box in exactly the same way as the commands
-domainname and hostname, i.e.:
-# echo "darkstar" > /proc/sys/kernel/hostname
-# echo "mydomain" > /proc/sys/kernel/domainname
-has the same effect as
-# hostname "darkstar"
-# domainname "mydomain"
-
-Note, however, that the classic darkstar.frop.org has the
-hostname "darkstar" and DNS (Internet Domain Name Server)
-domainname "frop.org", not to be confused with the NIS (Network
-Information Service) or YP (Yellow Pages) domainname. These two
-domain names are in general different. For a detailed discussion
-see the hostname(1) man page.
-
-==============================================================
-hardlockup_all_cpu_backtrace:
-
-This value controls the hard lockup detector behavior when a hard
-lockup condition is detected as to whether or not to gather further
-debug information. If enabled, arch-specific all-CPU stack dumping
-will be initiated.
-
-0: do nothing. This is the default behavior.
-
-1: on detection capture more debug information.
-==============================================================
-
-hardlockup_panic:
-
-This parameter can be used to control whether the kernel panics
-when a hard lockup is detected.
-
-   0 - don't panic on hard lockup
-   1 - panic on hard lockup
-
-See Documentation/lockup-watchdogs.txt for more information.  This can
-also be set using the nmi_watchdog kernel parameter.
-
-==============================================================
-
-hotplug:
-
-Path for the hotplug policy agent.
-Default value is "/sbin/hotplug".
-
-==============================================================
-
-hung_task_panic:
-
-Controls the kernel's behavior when a hung task is detected.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
-
-0: continue operation. This is the default behavior.
-
-1: panic immediately.
-
-==============================================================
-
-hung_task_check_count:
-
-The upper bound on the number of tasks that are checked.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
-
-==============================================================
-
-hung_task_timeout_secs:
-
-When a task in D state did not get scheduled
-for more than this value report a warning.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
-
-0: means infinite timeout - no checking done.
-Possible values to set are in range {0..LONG_MAX/HZ}.
-
-==============================================================
-
-hung_task_check_interval_secs:
-
-Hung task check interval. If hung task checking is enabled
-(see hung_task_timeout_secs), the check is done every
-hung_task_check_interval_secs seconds.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
-
-0 (default): means use hung_task_timeout_secs as checking interval.
-Possible values to set are in range {0..LONG_MAX/HZ}.
-
-==============================================================
-
-hung_task_warnings:
-
-The maximum number of warnings to report. During a check interval
-if a hung task is detected, this value is decreased by 1.
-When this value reaches 0, no more warnings will be reported.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
-
--1: report an infinite number of warnings.
-
-==============================================================
-
-hyperv_record_panic_msg:
-
-Controls whether the panic kmsg data should be reported to Hyper-V.
-
-0: do not report panic kmsg data.
-
-1: report the panic kmsg data. This is the default behavior.
-
-==============================================================
-
-kexec_load_disabled:
-
-A toggle indicating if the kexec_load syscall has been disabled. This
-value defaults to 0 (false: kexec_load enabled), but can be set to 1
-(true: kexec_load disabled). Once true, kexec can no longer be used, and
-the toggle cannot be set back to false. This allows a kexec image to be
-loaded before disabling the syscall, allowing a system to set up (and
-later use) an image without it being altered. Generally used together
-with the "modules_disabled" sysctl.
-
-==============================================================
-
-kptr_restrict:
-
-This toggle indicates whether restrictions are placed on
-exposing kernel addresses via /proc and other interfaces.
-
-When kptr_restrict is set to 0 (the default) the address is hashed before
-printing. (This is the equivalent to %p.)
-
-When kptr_restrict is set to (1), kernel pointers printed using the %pK
-format specifier will be replaced with 0's unless the user has CAP_SYSLOG
-and effective user and group ids are equal to the real ids. This is
-because %pK checks are done at read() time rather than open() time, so
-if permissions are elevated between the open() and the read() (e.g via
-a setuid binary) then %pK will not leak kernel pointers to unprivileged
-users. Note, this is a temporary solution only. The correct long-term
-solution is to do the permission checks at open() time. Consider removing
-world read permissions from files that use %pK, and using dmesg_restrict
-to protect against uses of %pK in dmesg(8) if leaking kernel pointer
-values to unprivileged users is a concern.
-
-When kptr_restrict is set to (2), kernel pointers printed using
-%pK will be replaced with 0's regardless of privileges.
-
-==============================================================
-
-l2cr: (PPC only)
-
-This flag controls the L2 cache of G3 processor boards. If
-0, the cache is disabled. Enabled if nonzero.
-
-==============================================================
-
-modules_disabled:
-
-A toggle value indicating if modules are allowed to be loaded
-in an otherwise modular kernel.  This toggle defaults to off
-(0), but can be set true (1).  Once true, modules can be
-neither loaded nor unloaded, and the toggle cannot be set back
-to false.  Generally used with the "kexec_load_disabled" toggle.
-
-==============================================================
-
-msg_next_id, sem_next_id, and shm_next_id:
-
-These three toggles allows to specify desired id for next allocated IPC
-object: message, semaphore or shared memory respectively.
-
-By default they are equal to -1, which means generic allocation logic.
-Possible values to set are in range {0..INT_MAX}.
-
-Notes:
-1) kernel doesn't guarantee, that new object will have desired id. So,
-it's up to userspace, how to handle an object with "wrong" id.
-2) Toggle with non-default value will be set back to -1 by kernel after
-successful IPC object allocation. If an IPC object allocation syscall
-fails, it is undefined if the value remains unmodified or is reset to -1.
-
-==============================================================
-
-nmi_watchdog:
-
-This parameter can be used to control the NMI watchdog
-(i.e. the hard lockup detector) on x86 systems.
-
-   0 - disable the hard lockup detector
-   1 - enable the hard lockup detector
-
-The hard lockup detector monitors each CPU for its ability to respond to
-timer interrupts. The mechanism utilizes CPU performance counter registers
-that are programmed to generate Non-Maskable Interrupts (NMIs) periodically
-while a CPU is busy. Hence, the alternative name 'NMI watchdog'.
-
-The NMI watchdog is disabled by default if the kernel is running as a guest
-in a KVM virtual machine. This default can be overridden by adding
-
-   nmi_watchdog=1
-
-to the guest kernel command line (see Documentation/admin-guide/kernel-parameters.rst).
-
-==============================================================
-
-numa_balancing
-
-Enables/disables automatic page fault based NUMA memory
-balancing. Memory is moved automatically to nodes
-that access it often.
-
-Enables/disables automatic NUMA memory balancing. On NUMA machines, there
-is a performance penalty if remote memory is accessed by a CPU. When this
-feature is enabled the kernel samples what task thread is accessing memory
-by periodically unmapping pages and later trapping a page fault. At the
-time of the page fault, it is determined if the data being accessed should
-be migrated to a local memory node.
-
-The unmapping of pages and trapping faults incur additional overhead that
-ideally is offset by improved memory locality but there is no universal
-guarantee. If the target workload is already bound to NUMA nodes then this
-feature should be disabled. Otherwise, if the system overhead from the
-feature is too high then the rate the kernel samples for NUMA hinting
-faults may be controlled by the numa_balancing_scan_period_min_ms,
-numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
-numa_balancing_scan_size_mb, and numa_balancing_settle_count sysctls.
-
-==============================================================
-
-numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms,
-numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
-
-Automatic NUMA balancing scans tasks address space and unmaps pages to
-detect if pages are properly placed or if the data should be migrated to a
-memory node local to where the task is running.  Every "scan delay" the task
-scans the next "scan size" number of pages in its address space. When the
-end of the address space is reached the scanner restarts from the beginning.
-
-In combination, the "scan delay" and "scan size" determine the scan rate.
-When "scan delay" decreases, the scan rate increases.  The scan delay and
-hence the scan rate of every task is adaptive and depends on historical
-behaviour. If pages are properly placed then the scan delay increases,
-otherwise the scan delay decreases.  The "scan size" is not adaptive but
-the higher the "scan size", the higher the scan rate.
-
-Higher scan rates incur higher system overhead as page faults must be
-trapped and potentially data must be migrated. However, the higher the scan
-rate, the more quickly a tasks memory is migrated to a local node if the
-workload pattern changes and minimises performance impact due to remote
-memory accesses. These sysctls control the thresholds for scan delays and
-the number of pages scanned.
-
-numa_balancing_scan_period_min_ms is the minimum time in milliseconds to
-scan a tasks virtual memory. It effectively controls the maximum scanning
-rate for each task.
-
-numa_balancing_scan_delay_ms is the starting "scan delay" used for a task
-when it initially forks.
-
-numa_balancing_scan_period_max_ms is the maximum time in milliseconds to
-scan a tasks virtual memory. It effectively controls the minimum scanning
-rate for each task.
-
-numa_balancing_scan_size_mb is how many megabytes worth of pages are
-scanned for a given scan.
-
-==============================================================
-
-osrelease, ostype & version:
-
-# cat osrelease
-2.1.88
-# cat ostype
-Linux
-# cat version
-#5 Wed Feb 25 21:49:24 MET 1998
-
-The files osrelease and ostype should be clear enough. Version
-needs a little more clarification however. The '#5' means that
-this is the fifth kernel built from this source base and the
-date behind it indicates the time the kernel was built.
-The only way to tune these values is to rebuild the kernel :-)
-
-==============================================================
-
-overflowgid & overflowuid:
-
-if your architecture did not always support 32-bit UIDs (i.e. arm,
-i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to
-applications that use the old 16-bit UID/GID system calls, if the
-actual UID or GID would exceed 65535.
-
-These sysctls allow you to change the value of the fixed UID and GID.
-The default is 65534.
-
-==============================================================
-
-panic:
-
-The value in this file represents the number of seconds the kernel
-waits before rebooting on a panic. When you use the software watchdog,
-the recommended setting is 60.
-
-==============================================================
-
-panic_on_io_nmi:
-
-Controls the kernel's behavior when a CPU receives an NMI caused by
-an IO error.
-
-0: try to continue operation (default)
-
-1: panic immediately. The IO error triggered an NMI. This indicates a
-   serious system condition which could result in IO data corruption.
-   Rather than continuing, panicking might be a better choice. Some
-   servers issue this sort of NMI when the dump button is pushed,
-   and you can use this option to take a crash dump.
-
-==============================================================
-
-panic_on_oops:
-
-Controls the kernel's behaviour when an oops or BUG is encountered.
-
-0: try to continue operation
-
-1: panic immediately.  If the `panic' sysctl is also non-zero then the
-   machine will be rebooted.
-
-==============================================================
-
-panic_on_stackoverflow:
-
-Controls the kernel's behavior when detecting the overflows of
-kernel, IRQ and exception stacks except a user stack.
-This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
-
-0: try to continue operation.
-
-1: panic immediately.
-
-==============================================================
-
-panic_on_unrecovered_nmi:
-
-The default Linux behaviour on an NMI of either memory or unknown is
-to continue operation. For many environments such as scientific
-computing it is preferable that the box is taken out and the error
-dealt with than an uncorrected parity/ECC error get propagated.
-
-A small number of systems do generate NMI's for bizarre random reasons
-such as power management so the default is off. That sysctl works like
-the existing panic controls already in that directory.
-
-==============================================================
-
-panic_on_warn:
-
-Calls panic() in the WARN() path when set to 1.  This is useful to avoid
-a kernel rebuild when attempting to kdump at the location of a WARN().
-
-0: only WARN(), default behaviour.
-
-1: call panic() after printing out WARN() location.
-
-==============================================================
-
-panic_print:
-
-Bitmask for printing system info when panic happens. User can chose
-combination of the following bits:
-
-bit 0: print all tasks info
-bit 1: print system memory info
-bit 2: print timer info
-bit 3: print locks info if CONFIG_LOCKDEP is on
-bit 4: print ftrace buffer
-
-So for example to print tasks and memory info on panic, user can:
-  echo 3 > /proc/sys/kernel/panic_print
-
-==============================================================
-
-panic_on_rcu_stall:
-
-When set to 1, calls panic() after RCU stall detection messages. This
-is useful to define the root cause of RCU stalls using a vmcore.
-
-0: do not panic() when RCU stall takes place, default behavior.
-
-1: panic() after printing RCU stall messages.
-
-==============================================================
-
-perf_cpu_time_max_percent:
-
-Hints to the kernel how much CPU time it should be allowed to
-use to handle perf sampling events.  If the perf subsystem
-is informed that its samples are exceeding this limit, it
-will drop its sampling frequency to attempt to reduce its CPU
-usage.
-
-Some perf sampling happens in NMIs.  If these samples
-unexpectedly take too long to execute, the NMIs can become
-stacked up next to each other so much that nothing else is
-allowed to execute.
-
-0: disable the mechanism.  Do not monitor or correct perf's
-   sampling rate no matter how CPU time it takes.
-
-1-100: attempt to throttle perf's sample rate to this
-   percentage of CPU.  Note: the kernel calculates an
-   "expected" length of each sample event.  100 here means
-   100% of that expected length.  Even if this is set to
-   100, you may still see sample throttling if this
-   length is exceeded.  Set to 0 if you truly do not care
-   how much CPU is consumed.
-
-==============================================================
-
-perf_event_paranoid:
-
-Controls use of the performance events system by unprivileged
-users (without CAP_SYS_ADMIN).  The default value is 2.
-
- -1: Allow use of (almost) all events by all users
-     Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
->=0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN
-     Disallow raw tracepoint access by users without CAP_SYS_ADMIN
->=1: Disallow CPU event access by users without CAP_SYS_ADMIN
->=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
-
-==============================================================
-
-perf_event_max_stack:
-
-Controls maximum number of stack frames to copy for (attr.sample_type &
-PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
-'perf record -g' or 'perf trace --call-graph fp'.
-
-This can only be done when no events are in use that have callchains
-enabled, otherwise writing to this file will return -EBUSY.
-
-The default value is 127.
-
-==============================================================
-
-perf_event_mlock_kb:
-
-Control size of per-cpu ring buffer not counted agains mlock limit.
-
-The default value is 512 + 1 page
-
-==============================================================
-
-perf_event_max_contexts_per_stack:
-
-Controls maximum number of stack frame context entries for
-(attr.sample_type & PERF_SAMPLE_CALLCHAIN) configured events, for
-instance, when using 'perf record -g' or 'perf trace --call-graph fp'.
-
-This can only be done when no events are in use that have callchains
-enabled, otherwise writing to this file will return -EBUSY.
-
-The default value is 8.
-
-==============================================================
-
-pid_max:
-
-PID allocation wrap value.  When the kernel's next PID value
-reaches this value, it wraps back to a minimum PID value.
-PIDs of value pid_max or larger are not allocated.
-
-==============================================================
-
-ns_last_pid:
-
-The last pid allocated in the current (the one task using this sysctl
-lives in) pid namespace. When selecting a pid for a next task on fork
-kernel tries to allocate a number starting from this one.
-
-==============================================================
-
-powersave-nap: (PPC only)
-
-If set, Linux-PPC will use the 'nap' mode of powersaving,
-otherwise the 'doze' mode will be used.
-
-==============================================================
-
-printk:
-
-The four values in printk denote: console_loglevel,
-default_message_loglevel, minimum_console_loglevel and
-default_console_loglevel respectively.
-
-These values influence printk() behavior when printing or
-logging error messages. See 'man 2 syslog' for more info on
-the different loglevels.
-
-- console_loglevel: messages with a higher priority than
-  this will be printed to the console
-- default_message_loglevel: messages without an explicit priority
-  will be printed with this priority
-- minimum_console_loglevel: minimum (highest) value to which
-  console_loglevel can be set
-- default_console_loglevel: default value for console_loglevel
-
-==============================================================
-
-printk_delay:
-
-Delay each printk message in printk_delay milliseconds
-
-Value from 0 - 10000 is allowed.
-
-==============================================================
-
-printk_ratelimit:
-
-Some warning messages are rate limited. printk_ratelimit specifies
-the minimum length of time between these messages (in jiffies), by
-default we allow one every 5 seconds.
-
-A value of 0 will disable rate limiting.
-
-==============================================================
-
-printk_ratelimit_burst:
-
-While long term we enforce one message per printk_ratelimit
-seconds, we do allow a burst of messages to pass through.
-printk_ratelimit_burst specifies the number of messages we can
-send before ratelimiting kicks in.
-
-==============================================================
-
-printk_devkmsg:
-
-Control the logging to /dev/kmsg from userspace:
-
-ratelimit: default, ratelimited
-on: unlimited logging to /dev/kmsg from userspace
-off: logging to /dev/kmsg disabled
-
-The kernel command line parameter printk.devkmsg= overrides this and is
-a one-time setting until next reboot: once set, it cannot be changed by
-this sysctl interface anymore.
-
-==============================================================
-
-randomize_va_space:
-
-This option can be used to select the type of process address
-space randomization that is used in the system, for architectures
-that support this feature.
-
-0 - Turn the process address space randomization off.  This is the
-    default for architectures that do not support this feature anyways,
-    and kernels that are booted with the "norandmaps" parameter.
-
-1 - Make the addresses of mmap base, stack and VDSO page randomized.
-    This, among other things, implies that shared libraries will be
-    loaded to random addresses.  Also for PIE-linked binaries, the
-    location of code start is randomized.  This is the default if the
-    CONFIG_COMPAT_BRK option is enabled.
-
-2 - Additionally enable heap randomization.  This is the default if
-    CONFIG_COMPAT_BRK is disabled.
-
-    There are a few legacy applications out there (such as some ancient
-    versions of libc.so.5 from 1996) that assume that brk area starts
-    just after the end of the code+bss.  These applications break when
-    start of the brk area is randomized.  There are however no known
-    non-legacy applications that would be broken this way, so for most
-    systems it is safe to choose full randomization.
-
-    Systems with ancient and/or broken binaries should be configured
-    with CONFIG_COMPAT_BRK enabled, which excludes the heap from process
-    address space randomization.
-
-==============================================================
-
-reboot-cmd: (Sparc only)
-
-??? This seems to be a way to give an argument to the Sparc
-ROM/Flash boot loader. Maybe to tell it what to do after
-rebooting. ???
-
-==============================================================
-
-rtsig-max & rtsig-nr:
-
-The file rtsig-max can be used to tune the maximum number
-of POSIX realtime (queued) signals that can be outstanding
-in the system.
-
-rtsig-nr shows the number of RT signals currently queued.
-
-==============================================================
-
-sched_energy_aware:
-
-Enables/disables Energy Aware Scheduling (EAS). EAS starts
-automatically on platforms where it can run (that is,
-platforms with asymmetric CPU topologies and having an Energy
-Model available). If your platform happens to meet the
-requirements for EAS but you do not want to use it, change
-this value to 0.
-
-==============================================================
-
-sched_schedstats:
-
-Enables/disables scheduler statistics. Enabling this feature
-incurs a small amount of overhead in the scheduler but is
-useful for debugging and performance tuning.
-
-==============================================================
-
-sg-big-buff:
-
-This file shows the size of the generic SCSI (sg) buffer.
-You can't tune it just yet, but you could change it on
-compile time by editing include/scsi/sg.h and changing
-the value of SG_BIG_BUFF.
-
-There shouldn't be any reason to change this value. If
-you can come up with one, you probably know what you
-are doing anyway :)
-
-==============================================================
-
-shmall:
-
-This parameter sets the total amount of shared memory pages that
-can be used system wide. Hence, SHMALL should always be at least
-ceil(shmmax/PAGE_SIZE).
-
-If you are not sure what the default PAGE_SIZE is on your Linux
-system, you can run the following command:
-
-# getconf PAGE_SIZE
-
-==============================================================
-
-shmmax:
-
-This value can be used to query and set the run time limit
-on the maximum shared memory segment size that can be created.
-Shared memory segments up to 1Gb are now supported in the
-kernel.  This value defaults to SHMMAX.
-
-==============================================================
-
-shm_rmid_forced:
-
-Linux lets you set resource limits, including how much memory one
-process can consume, via setrlimit(2).  Unfortunately, shared memory
-segments are allowed to exist without association with any process, and
-thus might not be counted against any resource limits.  If enabled,
-shared memory segments are automatically destroyed when their attach
-count becomes zero after a detach or a process termination.  It will
-also destroy segments that were created, but never attached to, on exit
-from the process.  The only use left for IPC_RMID is to immediately
-destroy an unattached segment.  Of course, this breaks the way things are
-defined, so some applications might stop working.  Note that this
-feature will do you no good unless you also configure your resource
-limits (in particular, RLIMIT_AS and RLIMIT_NPROC).  Most systems don't
-need this.
-
-Note that if you change this from 0 to 1, already created segments
-without users and with a dead originative process will be destroyed.
-
-==============================================================
-
-sysctl_writes_strict:
-
-Control how file position affects the behavior of updating sysctl values
-via the /proc/sys interface:
-
-  -1 - Legacy per-write sysctl value handling, with no printk warnings.
-       Each write syscall must fully contain the sysctl value to be
-       written, and multiple writes on the same sysctl file descriptor
-       will rewrite the sysctl value, regardless of file position.
-   0 - Same behavior as above, but warn about processes that perform writes
-       to a sysctl file descriptor when the file position is not 0.
-   1 - (default) Respect file position when writing sysctl strings. Multiple
-       writes will append to the sysctl value buffer. Anything past the max
-       length of the sysctl value buffer will be ignored. Writes to numeric
-       sysctl entries must always be at file position 0 and the value must
-       be fully contained in the buffer sent in the write syscall.
-
-==============================================================
-
-softlockup_all_cpu_backtrace:
-
-This value controls the soft lockup detector thread's behavior
-when a soft lockup condition is detected as to whether or not
-to gather further debug information. If enabled, each cpu will
-be issued an NMI and instructed to capture stack trace.
-
-This feature is only applicable for architectures which support
-NMI.
-
-0: do nothing. This is the default behavior.
-
-1: on detection capture more debug information.
-
-==============================================================
-
-soft_watchdog
-
-This parameter can be used to control the soft lockup detector.
-
-   0 - disable the soft lockup detector
-   1 - enable the soft lockup detector
-
-The soft lockup detector monitors CPUs for threads that are hogging the CPUs
-without rescheduling voluntarily, and thus prevent the 'watchdog/N' threads
-from running. The mechanism depends on the CPUs ability to respond to timer
-interrupts which are needed for the 'watchdog/N' threads to be woken up by
-the watchdog timer function, otherwise the NMI watchdog - if enabled - can
-detect a hard lockup condition.
-
-==============================================================
-
-stack_erasing
-
-This parameter can be used to control kernel stack erasing at the end
-of syscalls for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK.
-
-That erasing reduces the information which kernel stack leak bugs
-can reveal and blocks some uninitialized stack variable attacks.
-The tradeoff is the performance impact: on a single CPU system kernel
-compilation sees a 1% slowdown, other systems and workloads may vary.
-
-  0: kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
-
-  1: kernel stack erasing is enabled (default), it is performed before
-     returning to the userspace at the end of syscalls.
-==============================================================
-
-tainted
-
-Non-zero if the kernel has been tainted. Numeric values, which can be
-ORed together. The letters are seen in "Tainted" line of Oops reports.
-
-     1 (P): proprietary module was loaded
-     2 (F): module was force loaded
-     4 (S): SMP kernel oops on an officially SMP incapable processor
-     8 (R): module was force unloaded
-    16 (M): processor reported a Machine Check Exception (MCE)
-    32 (B): bad page referenced or some unexpected page flags
-    64 (U): taint requested by userspace application
-   128 (D): kernel died recently, i.e. there was an OOPS or BUG
-   256 (A): an ACPI table was overridden by user
-   512 (W): kernel issued warning
-  1024 (C): staging driver was loaded
-  2048 (I): workaround for bug in platform firmware applied
-  4096 (O): externally-built ("out-of-tree") module was loaded
-  8192 (E): unsigned module was loaded
- 16384 (L): soft lockup occurred
- 32768 (K): kernel has been live patched
- 65536 (X): Auxiliary taint, defined and used by for distros
-131072 (T): The kernel was built with the struct randomization plugin
-
-See Documentation/admin-guide/tainted-kernels.rst for more information.
-
-==============================================================
-
-threads-max
-
-This value controls the maximum number of threads that can be created
-using fork().
-
-During initialization the kernel sets this value such that even if the
-maximum number of threads is created, the thread structures occupy only
-a part (1/8th) of the available RAM pages.
-
-The minimum value that can be written to threads-max is 20.
-The maximum value that can be written to threads-max is given by the
-constant FUTEX_TID_MASK (0x3fffffff).
-If a value outside of this range is written to threads-max an error
-EINVAL occurs.
-
-The value written is checked against the available RAM pages. If the
-thread structures would occupy too much (more than 1/8th) of the
-available RAM pages threads-max is reduced accordingly.
-
-==============================================================
-
-unknown_nmi_panic:
-
-The value in this file affects behavior of handling NMI. When the
-value is non-zero, unknown NMI is trapped and then panic occurs. At
-that time, kernel debugging information is displayed on console.
-
-NMI switch that most IA32 servers have fires unknown NMI up, for
-example.  If a system hangs up, try pressing the NMI switch.
-
-==============================================================
-
-watchdog:
-
-This parameter can be used to disable or enable the soft lockup detector
-_and_ the NMI watchdog (i.e. the hard lockup detector) at the same time.
-
-   0 - disable both lockup detectors
-   1 - enable both lockup detectors
-
-The soft lockup detector and the NMI watchdog can also be disabled or
-enabled individually, using the soft_watchdog and nmi_watchdog parameters.
-If the watchdog parameter is read, for example by executing
-
-   cat /proc/sys/kernel/watchdog
-
-the output of this command (0 or 1) shows the logical OR of soft_watchdog
-and nmi_watchdog.
-
-==============================================================
-
-watchdog_cpumask:
-
-This value can be used to control on which cpus the watchdog may run.
-The default cpumask is all possible cores, but if NO_HZ_FULL is
-enabled in the kernel config, and cores are specified with the
-nohz_full= boot argument, those cores are excluded by default.
-Offline cores can be included in this mask, and if the core is later
-brought online, the watchdog will be started based on the mask value.
-
-Typically this value would only be touched in the nohz_full case
-to re-enable cores that by default were not running the watchdog,
-if a kernel lockup was suspected on those cores.
-
-The argument value is the standard cpulist format for cpumasks,
-so for example to enable the watchdog on cores 0, 2, 3, and 4 you
-might say:
-
-  echo 0,2-4 > /proc/sys/kernel/watchdog_cpumask
-
-==============================================================
-
-watchdog_thresh:
-
-This value can be used to control the frequency of hrtimer and NMI
-events and the soft and hard lockup thresholds. The default threshold
-is 10 seconds.
-
-The softlockup threshold is (2 * watchdog_thresh). Setting this
-tunable to zero will disable lockup detection altogether.
-
-==============================================================
diff --git a/Documentation/sysctl/net.rst b/Documentation/sysctl/net.rst
new file mode 100644
index 000000000000..a7d44e71019d
--- /dev/null
+++ b/Documentation/sysctl/net.rst
@@ -0,0 +1,461 @@
+================================
+Documentation for /proc/sys/net/
+================================
+
+Copyright
+
+Copyright (c) 1999
+
+	- Terrehon Bowden <terrehon@pacbell.net>
+	- Bodo Bauer <bb@ricochet.net>
+
+Copyright (c) 2000
+
+	- Jorge Nerin <comandante@zaralinux.com>
+
+Copyright (c) 2009
+
+	- Shen Feng <shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
+
+This file contains the documentation for the sysctl files in
+/proc/sys/net
+
+The interface  to  the  networking  parts  of  the  kernel  is  located  in
+/proc/sys/net. The following table shows all possible subdirectories.  You may
+see only some of them, depending on your kernel's configuration.
+
+
+Table : Subdirectories in /proc/sys/net
+
+ ========= =================== = ========== ==================
+ Directory Content               Directory  Content
+ ========= =================== = ========== ==================
+ core      General parameter     appletalk  Appletalk protocol
+ unix      Unix domain sockets   netrom     NET/ROM
+ 802       E802 protocol         ax25       AX25
+ ethernet  Ethernet protocol     rose       X.25 PLP layer
+ ipv4      IP version 4          x25        X.25 protocol
+ ipx       IPX                   token-ring IBM token ring
+ bridge    Bridging              decnet     DEC net
+ ipv6      IP version 6          tipc       TIPC
+ ========= =================== = ========== ==================
+
+1. /proc/sys/net/core - Network core options
+============================================
+
+bpf_jit_enable
+--------------
+
+This enables the BPF Just in Time (JIT) compiler. BPF is a flexible
+and efficient infrastructure allowing to execute bytecode at various
+hook points. It is used in a number of Linux kernel subsystems such
+as networking (e.g. XDP, tc), tracing (e.g. kprobes, uprobes, tracepoints)
+and security (e.g. seccomp). LLVM has a BPF back end that can compile
+restricted C into a sequence of BPF instructions. After program load
+through bpf(2) and passing a verifier in the kernel, a JIT will then
+translate these BPF proglets into native CPU instructions. There are
+two flavors of JITs, the newer eBPF JIT currently supported on:
+
+  - x86_64
+  - x86_32
+  - arm64
+  - arm32
+  - ppc64
+  - sparc64
+  - mips64
+  - s390x
+  - riscv
+
+And the older cBPF JIT supported on the following archs:
+
+  - mips
+  - ppc
+  - sparc
+
+eBPF JITs are a superset of cBPF JITs, meaning the kernel will
+migrate cBPF instructions into eBPF instructions and then JIT
+compile them transparently. Older cBPF JITs can only translate
+tcpdump filters, seccomp rules, etc, but not mentioned eBPF
+programs loaded through bpf(2).
+
+Values:
+
+	- 0 - disable the JIT (default value)
+	- 1 - enable the JIT
+	- 2 - enable the JIT and ask the compiler to emit traces on kernel log.
+
+bpf_jit_harden
+--------------
+
+This enables hardening for the BPF JIT compiler. Supported are eBPF
+JIT backends. Enabling hardening trades off performance, but can
+mitigate JIT spraying.
+
+Values:
+
+	- 0 - disable JIT hardening (default value)
+	- 1 - enable JIT hardening for unprivileged users only
+	- 2 - enable JIT hardening for all users
+
+bpf_jit_kallsyms
+----------------
+
+When BPF JIT compiler is enabled, then compiled images are unknown
+addresses to the kernel, meaning they neither show up in traces nor
+in /proc/kallsyms. This enables export of these addresses, which can
+be used for debugging/tracing. If bpf_jit_harden is enabled, this
+feature is disabled.
+
+Values :
+
+	- 0 - disable JIT kallsyms export (default value)
+	- 1 - enable JIT kallsyms export for privileged users only
+
+bpf_jit_limit
+-------------
+
+This enforces a global limit for memory allocations to the BPF JIT
+compiler in order to reject unprivileged JIT requests once it has
+been surpassed. bpf_jit_limit contains the value of the global limit
+in bytes.
+
+dev_weight
+----------
+
+The maximum number of packets that kernel can handle on a NAPI interrupt,
+it's a Per-CPU variable. For drivers that support LRO or GRO_HW, a hardware
+aggregated packet is counted as one packet in this context.
+
+Default: 64
+
+dev_weight_rx_bias
+------------------
+
+RPS (e.g. RFS, aRFS) processing is competing with the registered NAPI poll function
+of the driver for the per softirq cycle netdev_budget. This parameter influences
+the proportion of the configured netdev_budget that is spent on RPS based packet
+processing during RX softirq cycles. It is further meant for making current
+dev_weight adaptable for asymmetric CPU needs on RX/TX side of the network stack.
+(see dev_weight_tx_bias) It is effective on a per CPU basis. Determination is based
+on dev_weight and is calculated multiplicative (dev_weight * dev_weight_rx_bias).
+
+Default: 1
+
+dev_weight_tx_bias
+------------------
+
+Scales the maximum number of packets that can be processed during a TX softirq cycle.
+Effective on a per CPU basis. Allows scaling of current dev_weight for asymmetric
+net stack processing needs. Be careful to avoid making TX softirq processing a CPU hog.
+
+Calculation is based on dev_weight (dev_weight * dev_weight_tx_bias).
+
+Default: 1
+
+default_qdisc
+-------------
+
+The default queuing discipline to use for network devices. This allows
+overriding the default of pfifo_fast with an alternative. Since the default
+queuing discipline is created without additional parameters so is best suited
+to queuing disciplines that work well without configuration like stochastic
+fair queue (sfq), CoDel (codel) or fair queue CoDel (fq_codel). Don't use
+queuing disciplines like Hierarchical Token Bucket or Deficit Round Robin
+which require setting up classes and bandwidths. Note that physical multiqueue
+interfaces still use mq as root qdisc, which in turn uses this default for its
+leaves. Virtual devices (like e.g. lo or veth) ignore this setting and instead
+default to noqueue.
+
+Default: pfifo_fast
+
+busy_read
+---------
+
+Low latency busy poll timeout for socket reads. (needs CONFIG_NET_RX_BUSY_POLL)
+Approximate time in us to busy loop waiting for packets on the device queue.
+This sets the default value of the SO_BUSY_POLL socket option.
+Can be set or overridden per socket by setting socket option SO_BUSY_POLL,
+which is the preferred method of enabling. If you need to enable the feature
+globally via sysctl, a value of 50 is recommended.
+
+Will increase power usage.
+
+Default: 0 (off)
+
+busy_poll
+----------------
+Low latency busy poll timeout for poll and select. (needs CONFIG_NET_RX_BUSY_POLL)
+Approximate time in us to busy loop waiting for events.
+Recommended value depends on the number of sockets you poll on.
+For several sockets 50, for several hundreds 100.
+For more than that you probably want to use epoll.
+Note that only sockets with SO_BUSY_POLL set will be busy polled,
+so you want to either selectively set SO_BUSY_POLL on those sockets or set
+sysctl.net.busy_read globally.
+
+Will increase power usage.
+
+Default: 0 (off)
+
+rmem_default
+------------
+
+The default setting of the socket receive buffer in bytes.
+
+rmem_max
+--------
+
+The maximum receive socket buffer size in bytes.
+
+tstamp_allow_data
+-----------------
+Allow processes to receive tx timestamps looped together with the original
+packet contents. If disabled, transmit timestamp requests from unprivileged
+processes are dropped unless socket option SOF_TIMESTAMPING_OPT_TSONLY is set.
+
+Default: 1 (on)
+
+
+wmem_default
+------------
+
+The default setting (in bytes) of the socket send buffer.
+
+wmem_max
+--------
+
+The maximum send socket buffer size in bytes.
+
+message_burst and message_cost
+------------------------------
+
+These parameters  are used to limit the warning messages written to the kernel
+log from  the  networking  code.  They  enforce  a  rate  limit  to  make  a
+denial-of-service attack  impossible. A higher message_cost factor, results in
+fewer messages that will be written. Message_burst controls when messages will
+be dropped.  The  default  settings  limit  warning messages to one every five
+seconds.
+
+warnings
+--------
+
+This sysctl is now unused.
+
+This was used to control console messages from the networking stack that
+occur because of problems on the network like duplicate address or bad
+checksums.
+
+These messages are now emitted at KERN_DEBUG and can generally be enabled
+and controlled by the dynamic_debug facility.
+
+netdev_budget
+-------------
+
+Maximum number of packets taken from all interfaces in one polling cycle (NAPI
+poll). In one polling cycle interfaces which are registered to polling are
+probed in a round-robin manner. Also, a polling cycle may not exceed
+netdev_budget_usecs microseconds, even if netdev_budget has not been
+exhausted.
+
+netdev_budget_usecs
+---------------------
+
+Maximum number of microseconds in one NAPI polling cycle. Polling
+will exit when either netdev_budget_usecs have elapsed during the
+poll cycle or the number of packets processed reaches netdev_budget.
+
+netdev_max_backlog
+------------------
+
+Maximum number  of  packets,  queued  on  the  INPUT  side, when the interface
+receives packets faster than kernel can process them.
+
+netdev_rss_key
+--------------
+
+RSS (Receive Side Scaling) enabled drivers use a 40 bytes host key that is
+randomly generated.
+Some user space might need to gather its content even if drivers do not
+provide ethtool -x support yet.
+
+::
+
+  myhost:~# cat /proc/sys/net/core/netdev_rss_key
+  84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8: ... (52 bytes total)
+
+File contains nul bytes if no driver ever called netdev_rss_key_fill() function.
+
+Note:
+  /proc/sys/net/core/netdev_rss_key contains 52 bytes of key,
+  but most drivers only use 40 bytes of it.
+
+::
+
+  myhost:~# ethtool -x eth0
+  RX flow hash indirection table for eth0 with 8 RX ring(s):
+      0:    0     1     2     3     4     5     6     7
+  RSS hash key:
+  84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8:43:e3:c9:0c:fd:17:55:c2:3a:4d:69:ed:f1:42:89
+
+netdev_tstamp_prequeue
+----------------------
+
+If set to 0, RX packet timestamps can be sampled after RPS processing, when
+the target CPU processes packets. It might give some delay on timestamps, but
+permit to distribute the load on several cpus.
+
+If set to 1 (default), timestamps are sampled as soon as possible, before
+queueing.
+
+optmem_max
+----------
+
+Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
+of struct cmsghdr structures with appended data.
+
+fb_tunnels_only_for_init_net
+----------------------------
+
+Controls if fallback tunnels (like tunl0, gre0, gretap0, erspan0,
+sit0, ip6tnl0, ip6gre0) are automatically created when a new
+network namespace is created, if corresponding tunnel is present
+in initial network namespace.
+If set to 1, these devices are not automatically created, and
+user space is responsible for creating them if needed.
+
+Default : 0  (for compatibility reasons)
+
+devconf_inherit_init_net
+------------------------
+
+Controls if a new network namespace should inherit all current
+settings under /proc/sys/net/{ipv4,ipv6}/conf/{all,default}/. By
+default, we keep the current behavior: for IPv4 we inherit all current
+settings from init_net and for IPv6 we reset all settings to default.
+
+If set to 1, both IPv4 and IPv6 settings are forced to inherit from
+current ones in init_net. If set to 2, both IPv4 and IPv6 settings are
+forced to reset to their default values.
+
+Default : 0  (for compatibility reasons)
+
+2. /proc/sys/net/unix - Parameters for Unix domain sockets
+----------------------------------------------------------
+
+There is only one file in this directory.
+unix_dgram_qlen limits the max number of datagrams queued in Unix domain
+socket's buffer. It will not take effect unless PF_UNIX flag is specified.
+
+
+3. /proc/sys/net/ipv4 - IPV4 settings
+-------------------------------------
+Please see: Documentation/networking/ip-sysctl.txt and ipvs-sysctl.txt for
+descriptions of these entries.
+
+
+4. Appletalk
+------------
+
+The /proc/sys/net/appletalk  directory  holds the Appletalk configuration data
+when Appletalk is loaded. The configurable parameters are:
+
+aarp-expiry-time
+----------------
+
+The amount  of  time  we keep an ARP entry before expiring it. Used to age out
+old hosts.
+
+aarp-resolve-time
+-----------------
+
+The amount of time we will spend trying to resolve an Appletalk address.
+
+aarp-retransmit-limit
+---------------------
+
+The number of times we will retransmit a query before giving up.
+
+aarp-tick-time
+--------------
+
+Controls the rate at which expires are checked.
+
+The directory  /proc/net/appletalk  holds the list of active Appletalk sockets
+on a machine.
+
+The fields  indicate  the DDP type, the local address (in network:node format)
+the remote  address,  the  size of the transmit pending queue, the size of the
+received queue  (bytes waiting for applications to read) the state and the uid
+owning the socket.
+
+/proc/net/atalk_iface lists  all  the  interfaces  configured for appletalk.It
+shows the  name  of the interface, its Appletalk address, the network range on
+that address  (or  network number for phase 1 networks), and the status of the
+interface.
+
+/proc/net/atalk_route lists  each  known  network  route.  It lists the target
+(network) that the route leads to, the router (may be directly connected), the
+route flags, and the device the route is using.
+
+
+5. IPX
+------
+
+The IPX protocol has no tunable values in proc/sys/net.
+
+The IPX  protocol  does,  however,  provide  proc/net/ipx. This lists each IPX
+socket giving  the  local  and  remote  addresses  in  Novell  format (that is
+network:node:port). In  accordance  with  the  strange  Novell  tradition,
+everything but the port is in hex. Not_Connected is displayed for sockets that
+are not  tied to a specific remote address. The Tx and Rx queue sizes indicate
+the number  of  bytes  pending  for  transmission  and  reception.  The  state
+indicates the  state  the  socket  is  in and the uid is the owning uid of the
+socket.
+
+The /proc/net/ipx_interface  file lists all IPX interfaces. For each interface
+it gives  the network number, the node number, and indicates if the network is
+the primary  network.  It  also  indicates  which  device  it  is bound to (or
+Internal for  internal  networks)  and  the  Frame  Type if appropriate. Linux
+supports 802.3,  802.2,  802.2  SNAP  and DIX (Blue Book) ethernet framing for
+IPX.
+
+The /proc/net/ipx_route  table  holds  a list of IPX routes. For each route it
+gives the  destination  network, the router node (or Directly) and the network
+address of the router (or Connected) for internal networks.
+
+6. TIPC
+-------
+
+tipc_rmem
+---------
+
+The TIPC protocol now has a tunable for the receive memory, similar to the
+tcp_rmem - i.e. a vector of 3 INTEGERs: (min, default, max)
+
+::
+
+    # cat /proc/sys/net/tipc/tipc_rmem
+    4252725 34021800        68043600
+    #
+
+The max value is set to CONN_OVERLOAD_LIMIT, and the default and min values
+are scaled (shifted) versions of that same value.  Note that the min value
+is not at this point in time used in any meaningful way, but the triplet is
+preserved in order to be consistent with things like tcp_rmem.
+
+named_timeout
+-------------
+
+TIPC name table updates are distributed asynchronously in a cluster, without
+any form of transaction handling. This means that different race scenarios are
+possible. One such is that a name withdrawal sent out by one node and received
+by another node may arrive after a second, overlapping name publication already
+has been accepted from a third node, although the conflicting updates
+originally may have been issued in the correct sequential order.
+If named_timeout is nonzero, failed topology updates will be placed on a defer
+queue until another event arrives that clears the error, or until the timeout
+expires. Value is in milliseconds.
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
deleted file mode 100644
index 2ae91d3873bb..000000000000
--- a/Documentation/sysctl/net.txt
+++ /dev/null
@@ -1,422 +0,0 @@
-Documentation for /proc/sys/net/*
-	(c) 1999		Terrehon Bowden <terrehon@pacbell.net>
-				Bodo Bauer <bb@ricochet.net>
-	(c) 2000		Jorge Nerin <comandante@zaralinux.com>
-	(c) 2009		Shen Feng <shen@cn.fujitsu.com>
-
-For general info and legal blurb, please look in README.
-
-==============================================================
-
-This file contains the documentation for the sysctl files in
-/proc/sys/net
-
-The interface  to  the  networking  parts  of  the  kernel  is  located  in
-/proc/sys/net. The following table shows all possible subdirectories.  You may
-see only some of them, depending on your kernel's configuration.
-
-
-Table : Subdirectories in /proc/sys/net
-..............................................................................
- Directory Content             Directory  Content
- core      General parameter   appletalk  Appletalk protocol
- unix      Unix domain sockets netrom     NET/ROM
- 802       E802 protocol       ax25       AX25
- ethernet  Ethernet protocol   rose       X.25 PLP layer
- ipv4      IP version 4        x25        X.25 protocol
- ipx       IPX                 token-ring IBM token ring
- bridge    Bridging            decnet     DEC net
- ipv6      IP version 6        tipc       TIPC
-..............................................................................
-
-1. /proc/sys/net/core - Network core options
--------------------------------------------------------
-
-bpf_jit_enable
---------------
-
-This enables the BPF Just in Time (JIT) compiler. BPF is a flexible
-and efficient infrastructure allowing to execute bytecode at various
-hook points. It is used in a number of Linux kernel subsystems such
-as networking (e.g. XDP, tc), tracing (e.g. kprobes, uprobes, tracepoints)
-and security (e.g. seccomp). LLVM has a BPF back end that can compile
-restricted C into a sequence of BPF instructions. After program load
-through bpf(2) and passing a verifier in the kernel, a JIT will then
-translate these BPF proglets into native CPU instructions. There are
-two flavors of JITs, the newer eBPF JIT currently supported on:
-  - x86_64
-  - x86_32
-  - arm64
-  - arm32
-  - ppc64
-  - sparc64
-  - mips64
-  - s390x
-  - riscv
-
-And the older cBPF JIT supported on the following archs:
-  - mips
-  - ppc
-  - sparc
-
-eBPF JITs are a superset of cBPF JITs, meaning the kernel will
-migrate cBPF instructions into eBPF instructions and then JIT
-compile them transparently. Older cBPF JITs can only translate
-tcpdump filters, seccomp rules, etc, but not mentioned eBPF
-programs loaded through bpf(2).
-
-Values :
-	0 - disable the JIT (default value)
-	1 - enable the JIT
-	2 - enable the JIT and ask the compiler to emit traces on kernel log.
-
-bpf_jit_harden
---------------
-
-This enables hardening for the BPF JIT compiler. Supported are eBPF
-JIT backends. Enabling hardening trades off performance, but can
-mitigate JIT spraying.
-Values :
-	0 - disable JIT hardening (default value)
-	1 - enable JIT hardening for unprivileged users only
-	2 - enable JIT hardening for all users
-
-bpf_jit_kallsyms
-----------------
-
-When BPF JIT compiler is enabled, then compiled images are unknown
-addresses to the kernel, meaning they neither show up in traces nor
-in /proc/kallsyms. This enables export of these addresses, which can
-be used for debugging/tracing. If bpf_jit_harden is enabled, this
-feature is disabled.
-Values :
-	0 - disable JIT kallsyms export (default value)
-	1 - enable JIT kallsyms export for privileged users only
-
-bpf_jit_limit
--------------
-
-This enforces a global limit for memory allocations to the BPF JIT
-compiler in order to reject unprivileged JIT requests once it has
-been surpassed. bpf_jit_limit contains the value of the global limit
-in bytes.
-
-dev_weight
---------------
-
-The maximum number of packets that kernel can handle on a NAPI interrupt,
-it's a Per-CPU variable. For drivers that support LRO or GRO_HW, a hardware
-aggregated packet is counted as one packet in this context.
-
-Default: 64
-
-dev_weight_rx_bias
---------------
-
-RPS (e.g. RFS, aRFS) processing is competing with the registered NAPI poll function
-of the driver for the per softirq cycle netdev_budget. This parameter influences
-the proportion of the configured netdev_budget that is spent on RPS based packet
-processing during RX softirq cycles. It is further meant for making current
-dev_weight adaptable for asymmetric CPU needs on RX/TX side of the network stack.
-(see dev_weight_tx_bias) It is effective on a per CPU basis. Determination is based
-on dev_weight and is calculated multiplicative (dev_weight * dev_weight_rx_bias).
-Default: 1
-
-dev_weight_tx_bias
---------------
-
-Scales the maximum number of packets that can be processed during a TX softirq cycle.
-Effective on a per CPU basis. Allows scaling of current dev_weight for asymmetric
-net stack processing needs. Be careful to avoid making TX softirq processing a CPU hog.
-Calculation is based on dev_weight (dev_weight * dev_weight_tx_bias).
-Default: 1
-
-default_qdisc
---------------
-
-The default queuing discipline to use for network devices. This allows
-overriding the default of pfifo_fast with an alternative. Since the default
-queuing discipline is created without additional parameters so is best suited
-to queuing disciplines that work well without configuration like stochastic
-fair queue (sfq), CoDel (codel) or fair queue CoDel (fq_codel). Don't use
-queuing disciplines like Hierarchical Token Bucket or Deficit Round Robin
-which require setting up classes and bandwidths. Note that physical multiqueue
-interfaces still use mq as root qdisc, which in turn uses this default for its
-leaves. Virtual devices (like e.g. lo or veth) ignore this setting and instead
-default to noqueue.
-Default: pfifo_fast
-
-busy_read
-----------------
-Low latency busy poll timeout for socket reads. (needs CONFIG_NET_RX_BUSY_POLL)
-Approximate time in us to busy loop waiting for packets on the device queue.
-This sets the default value of the SO_BUSY_POLL socket option.
-Can be set or overridden per socket by setting socket option SO_BUSY_POLL,
-which is the preferred method of enabling. If you need to enable the feature
-globally via sysctl, a value of 50 is recommended.
-Will increase power usage.
-Default: 0 (off)
-
-busy_poll
-----------------
-Low latency busy poll timeout for poll and select. (needs CONFIG_NET_RX_BUSY_POLL)
-Approximate time in us to busy loop waiting for events.
-Recommended value depends on the number of sockets you poll on.
-For several sockets 50, for several hundreds 100.
-For more than that you probably want to use epoll.
-Note that only sockets with SO_BUSY_POLL set will be busy polled,
-so you want to either selectively set SO_BUSY_POLL on those sockets or set
-sysctl.net.busy_read globally.
-Will increase power usage.
-Default: 0 (off)
-
-rmem_default
-------------
-
-The default setting of the socket receive buffer in bytes.
-
-rmem_max
---------
-
-The maximum receive socket buffer size in bytes.
-
-tstamp_allow_data
------------------
-Allow processes to receive tx timestamps looped together with the original
-packet contents. If disabled, transmit timestamp requests from unprivileged
-processes are dropped unless socket option SOF_TIMESTAMPING_OPT_TSONLY is set.
-Default: 1 (on)
-
-
-wmem_default
-------------
-
-The default setting (in bytes) of the socket send buffer.
-
-wmem_max
---------
-
-The maximum send socket buffer size in bytes.
-
-message_burst and message_cost
-------------------------------
-
-These parameters  are used to limit the warning messages written to the kernel
-log from  the  networking  code.  They  enforce  a  rate  limit  to  make  a
-denial-of-service attack  impossible. A higher message_cost factor, results in
-fewer messages that will be written. Message_burst controls when messages will
-be dropped.  The  default  settings  limit  warning messages to one every five
-seconds.
-
-warnings
---------
-
-This sysctl is now unused.
-
-This was used to control console messages from the networking stack that
-occur because of problems on the network like duplicate address or bad
-checksums.
-
-These messages are now emitted at KERN_DEBUG and can generally be enabled
-and controlled by the dynamic_debug facility.
-
-netdev_budget
--------------
-
-Maximum number of packets taken from all interfaces in one polling cycle (NAPI
-poll). In one polling cycle interfaces which are registered to polling are
-probed in a round-robin manner. Also, a polling cycle may not exceed
-netdev_budget_usecs microseconds, even if netdev_budget has not been
-exhausted.
-
-netdev_budget_usecs
----------------------
-
-Maximum number of microseconds in one NAPI polling cycle. Polling
-will exit when either netdev_budget_usecs have elapsed during the
-poll cycle or the number of packets processed reaches netdev_budget.
-
-netdev_max_backlog
-------------------
-
-Maximum number  of  packets,  queued  on  the  INPUT  side, when the interface
-receives packets faster than kernel can process them.
-
-netdev_rss_key
---------------
-
-RSS (Receive Side Scaling) enabled drivers use a 40 bytes host key that is
-randomly generated.
-Some user space might need to gather its content even if drivers do not
-provide ethtool -x support yet.
-
-myhost:~# cat /proc/sys/net/core/netdev_rss_key
-84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8: ... (52 bytes total)
-
-File contains nul bytes if no driver ever called netdev_rss_key_fill() function.
-Note:
-/proc/sys/net/core/netdev_rss_key contains 52 bytes of key,
-but most drivers only use 40 bytes of it.
-
-myhost:~# ethtool -x eth0
-RX flow hash indirection table for eth0 with 8 RX ring(s):
-    0:    0     1     2     3     4     5     6     7
-RSS hash key:
-84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8:43:e3:c9:0c:fd:17:55:c2:3a:4d:69:ed:f1:42:89
-
-netdev_tstamp_prequeue
-----------------------
-
-If set to 0, RX packet timestamps can be sampled after RPS processing, when
-the target CPU processes packets. It might give some delay on timestamps, but
-permit to distribute the load on several cpus.
-
-If set to 1 (default), timestamps are sampled as soon as possible, before
-queueing.
-
-optmem_max
-----------
-
-Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
-of struct cmsghdr structures with appended data.
-
-fb_tunnels_only_for_init_net
-----------------------------
-
-Controls if fallback tunnels (like tunl0, gre0, gretap0, erspan0,
-sit0, ip6tnl0, ip6gre0) are automatically created when a new
-network namespace is created, if corresponding tunnel is present
-in initial network namespace.
-If set to 1, these devices are not automatically created, and
-user space is responsible for creating them if needed.
-
-Default : 0  (for compatibility reasons)
-
-devconf_inherit_init_net
-----------------------------
-
-Controls if a new network namespace should inherit all current
-settings under /proc/sys/net/{ipv4,ipv6}/conf/{all,default}/. By
-default, we keep the current behavior: for IPv4 we inherit all current
-settings from init_net and for IPv6 we reset all settings to default.
-
-If set to 1, both IPv4 and IPv6 settings are forced to inherit from
-current ones in init_net. If set to 2, both IPv4 and IPv6 settings are
-forced to reset to their default values.
-
-Default : 0  (for compatibility reasons)
-
-2. /proc/sys/net/unix - Parameters for Unix domain sockets
--------------------------------------------------------
-
-There is only one file in this directory.
-unix_dgram_qlen limits the max number of datagrams queued in Unix domain
-socket's buffer. It will not take effect unless PF_UNIX flag is specified.
-
-
-3. /proc/sys/net/ipv4 - IPV4 settings
--------------------------------------------------------
-Please see: Documentation/networking/ip-sysctl.txt and ipvs-sysctl.txt for
-descriptions of these entries.
-
-
-4. Appletalk
--------------------------------------------------------
-
-The /proc/sys/net/appletalk  directory  holds the Appletalk configuration data
-when Appletalk is loaded. The configurable parameters are:
-
-aarp-expiry-time
-----------------
-
-The amount  of  time  we keep an ARP entry before expiring it. Used to age out
-old hosts.
-
-aarp-resolve-time
------------------
-
-The amount of time we will spend trying to resolve an Appletalk address.
-
-aarp-retransmit-limit
----------------------
-
-The number of times we will retransmit a query before giving up.
-
-aarp-tick-time
---------------
-
-Controls the rate at which expires are checked.
-
-The directory  /proc/net/appletalk  holds the list of active Appletalk sockets
-on a machine.
-
-The fields  indicate  the DDP type, the local address (in network:node format)
-the remote  address,  the  size of the transmit pending queue, the size of the
-received queue  (bytes waiting for applications to read) the state and the uid
-owning the socket.
-
-/proc/net/atalk_iface lists  all  the  interfaces  configured for appletalk.It
-shows the  name  of the interface, its Appletalk address, the network range on
-that address  (or  network number for phase 1 networks), and the status of the
-interface.
-
-/proc/net/atalk_route lists  each  known  network  route.  It lists the target
-(network) that the route leads to, the router (may be directly connected), the
-route flags, and the device the route is using.
-
-
-5. IPX
--------------------------------------------------------
-
-The IPX protocol has no tunable values in proc/sys/net.
-
-The IPX  protocol  does,  however,  provide  proc/net/ipx. This lists each IPX
-socket giving  the  local  and  remote  addresses  in  Novell  format (that is
-network:node:port). In  accordance  with  the  strange  Novell  tradition,
-everything but the port is in hex. Not_Connected is displayed for sockets that
-are not  tied to a specific remote address. The Tx and Rx queue sizes indicate
-the number  of  bytes  pending  for  transmission  and  reception.  The  state
-indicates the  state  the  socket  is  in and the uid is the owning uid of the
-socket.
-
-The /proc/net/ipx_interface  file lists all IPX interfaces. For each interface
-it gives  the network number, the node number, and indicates if the network is
-the primary  network.  It  also  indicates  which  device  it  is bound to (or
-Internal for  internal  networks)  and  the  Frame  Type if appropriate. Linux
-supports 802.3,  802.2,  802.2  SNAP  and DIX (Blue Book) ethernet framing for
-IPX.
-
-The /proc/net/ipx_route  table  holds  a list of IPX routes. For each route it
-gives the  destination  network, the router node (or Directly) and the network
-address of the router (or Connected) for internal networks.
-
-6. TIPC
--------------------------------------------------------
-
-tipc_rmem
-----------
-
-The TIPC protocol now has a tunable for the receive memory, similar to the
-tcp_rmem - i.e. a vector of 3 INTEGERs: (min, default, max)
-
-    # cat /proc/sys/net/tipc/tipc_rmem
-    4252725 34021800        68043600
-    #
-
-The max value is set to CONN_OVERLOAD_LIMIT, and the default and min values
-are scaled (shifted) versions of that same value.  Note that the min value
-is not at this point in time used in any meaningful way, but the triplet is
-preserved in order to be consistent with things like tcp_rmem.
-
-named_timeout
---------------
-
-TIPC name table updates are distributed asynchronously in a cluster, without
-any form of transaction handling. This means that different race scenarios are
-possible. One such is that a name withdrawal sent out by one node and received
-by another node may arrive after a second, overlapping name publication already
-has been accepted from a third node, although the conflicting updates
-originally may have been issued in the correct sequential order.
-If named_timeout is nonzero, failed topology updates will be placed on a defer
-queue until another event arrives that clears the error, or until the timeout
-expires. Value is in milliseconds.
diff --git a/Documentation/sysctl/sunrpc.rst b/Documentation/sysctl/sunrpc.rst
new file mode 100644
index 000000000000..09780a682afd
--- /dev/null
+++ b/Documentation/sysctl/sunrpc.rst
@@ -0,0 +1,25 @@
+===================================
+Documentation for /proc/sys/sunrpc/
+===================================
+
+kernel version 2.2.10
+
+Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
+
+This file contains the documentation for the sysctl files in
+/proc/sys/sunrpc and is valid for Linux kernel version 2.2.
+
+The files in this directory can be used to (re)set the debug
+flags of the SUN Remote Procedure Call (RPC) subsystem in
+the Linux kernel. This stuff is used for NFS, KNFSD and
+maybe a few other things as well.
+
+The files in there are used to control the debugging flags:
+rpc_debug, nfs_debug, nfsd_debug and nlm_debug.
+
+These flags are for kernel hackers only. You should read the
+source code in net/sunrpc/ for more information.
diff --git a/Documentation/sysctl/sunrpc.txt b/Documentation/sysctl/sunrpc.txt
deleted file mode 100644
index ae1ecac6f85a..000000000000
--- a/Documentation/sysctl/sunrpc.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-Documentation for /proc/sys/sunrpc/*	kernel version 2.2.10
-	(c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
-
-For general info and legal blurb, please look in README.
-
-==============================================================
-
-This file contains the documentation for the sysctl files in
-/proc/sys/sunrpc and is valid for Linux kernel version 2.2.
-
-The files in this directory can be used to (re)set the debug
-flags of the SUN Remote Procedure Call (RPC) subsystem in
-the Linux kernel. This stuff is used for NFS, KNFSD and
-maybe a few other things as well.
-
-The files in there are used to control the debugging flags:
-rpc_debug, nfs_debug, nfsd_debug and nlm_debug.
-
-These flags are for kernel hackers only. You should read the
-source code in net/sunrpc/ for more information.
diff --git a/Documentation/sysctl/user.rst b/Documentation/sysctl/user.rst
new file mode 100644
index 000000000000..650eaa03f15e
--- /dev/null
+++ b/Documentation/sysctl/user.rst
@@ -0,0 +1,78 @@
+=================================
+Documentation for /proc/sys/user/
+=================================
+
+kernel version 4.9.0
+
+Copyright (c) 2016		Eric Biederman <ebiederm@xmission.com>
+
+------------------------------------------------------------------------------
+
+This file contains the documentation for the sysctl files in
+/proc/sys/user.
+
+The files in this directory can be used to override the default
+limits on the number of namespaces and other objects that have
+per user per user namespace limits.
+
+The primary purpose of these limits is to stop programs that
+malfunction and attempt to create a ridiculous number of objects,
+before the malfunction becomes a system wide problem.  It is the
+intention that the defaults of these limits are set high enough that
+no program in normal operation should run into these limits.
+
+The creation of per user per user namespace objects are charged to
+the user in the user namespace who created the object and
+verified to be below the per user limit in that user namespace.
+
+The creation of objects is also charged to all of the users
+who created user namespaces the creation of the object happens
+in (user namespaces can be nested) and verified to be below the per user
+limits in the user namespaces of those users.
+
+This recursive counting of created objects ensures that creating a
+user namespace does not allow a user to escape their current limits.
+
+Currently, these files are in /proc/sys/user:
+
+max_cgroup_namespaces
+=====================
+
+  The maximum number of cgroup namespaces that any user in the current
+  user namespace may create.
+
+max_ipc_namespaces
+==================
+
+  The maximum number of ipc namespaces that any user in the current
+  user namespace may create.
+
+max_mnt_namespaces
+==================
+
+  The maximum number of mount namespaces that any user in the current
+  user namespace may create.
+
+max_net_namespaces
+==================
+
+  The maximum number of network namespaces that any user in the
+  current user namespace may create.
+
+max_pid_namespaces
+==================
+
+  The maximum number of pid namespaces that any user in the current
+  user namespace may create.
+
+max_user_namespaces
+===================
+
+  The maximum number of user namespaces that any user in the current
+  user namespace may create.
+
+max_uts_namespaces
+==================
+
+  The maximum number of user namespaces that any user in the current
+  user namespace may create.
diff --git a/Documentation/sysctl/user.txt b/Documentation/sysctl/user.txt
deleted file mode 100644
index a5882865836e..000000000000
--- a/Documentation/sysctl/user.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-Documentation for /proc/sys/user/*	kernel version 4.9.0
-	(c) 2016		Eric Biederman <ebiederm@xmission.com>
-
-==============================================================
-
-This file contains the documentation for the sysctl files in
-/proc/sys/user.
-
-The files in this directory can be used to override the default
-limits on the number of namespaces and other objects that have
-per user per user namespace limits.
-
-The primary purpose of these limits is to stop programs that
-malfunction and attempt to create a ridiculous number of objects,
-before the malfunction becomes a system wide problem.  It is the
-intention that the defaults of these limits are set high enough that
-no program in normal operation should run into these limits.
-
-The creation of per user per user namespace objects are charged to
-the user in the user namespace who created the object and
-verified to be below the per user limit in that user namespace.
-
-The creation of objects is also charged to all of the users
-who created user namespaces the creation of the object happens
-in (user namespaces can be nested) and verified to be below the per user
-limits in the user namespaces of those users.
-
-This recursive counting of created objects ensures that creating a
-user namespace does not allow a user to escape their current limits.
-
-Currently, these files are in /proc/sys/user:
-
-- max_cgroup_namespaces
-
-  The maximum number of cgroup namespaces that any user in the current
-  user namespace may create.
-
-- max_ipc_namespaces
-
-  The maximum number of ipc namespaces that any user in the current
-  user namespace may create.
-
-- max_mnt_namespaces
-
-  The maximum number of mount namespaces that any user in the current
-  user namespace may create.
-
-- max_net_namespaces
-
-  The maximum number of network namespaces that any user in the
-  current user namespace may create.
-
-- max_pid_namespaces
-
-  The maximum number of pid namespaces that any user in the current
-  user namespace may create.
-
-- max_user_namespaces
-
-  The maximum number of user namespaces that any user in the current
-  user namespace may create.
-
-- max_uts_namespaces
-
-  The maximum number of user namespaces that any user in the current
-  user namespace may create.
diff --git a/Documentation/sysctl/vm.rst b/Documentation/sysctl/vm.rst
new file mode 100644
index 000000000000..5aceb5cd5ce7
--- /dev/null
+++ b/Documentation/sysctl/vm.rst
@@ -0,0 +1,964 @@
+===============================
+Documentation for /proc/sys/vm/
+===============================
+
+kernel version 2.6.29
+
+Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
+
+Copyright (c) 2008         Peter W. Morreale <pmorreale@novell.com>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
+
+This file contains the documentation for the sysctl files in
+/proc/sys/vm and is valid for Linux kernel version 2.6.29.
+
+The files in this directory can be used to tune the operation
+of the virtual memory (VM) subsystem of the Linux kernel and
+the writeout of dirty data to disk.
+
+Default values and initialization routines for most of these
+files can be found in mm/swap.c.
+
+Currently, these files are in /proc/sys/vm:
+
+- admin_reserve_kbytes
+- block_dump
+- compact_memory
+- compact_unevictable_allowed
+- dirty_background_bytes
+- dirty_background_ratio
+- dirty_bytes
+- dirty_expire_centisecs
+- dirty_ratio
+- dirtytime_expire_seconds
+- dirty_writeback_centisecs
+- drop_caches
+- extfrag_threshold
+- hugetlb_shm_group
+- laptop_mode
+- legacy_va_layout
+- lowmem_reserve_ratio
+- max_map_count
+- memory_failure_early_kill
+- memory_failure_recovery
+- min_free_kbytes
+- min_slab_ratio
+- min_unmapped_ratio
+- mmap_min_addr
+- mmap_rnd_bits
+- mmap_rnd_compat_bits
+- nr_hugepages
+- nr_hugepages_mempolicy
+- nr_overcommit_hugepages
+- nr_trim_pages         (only if CONFIG_MMU=n)
+- numa_zonelist_order
+- oom_dump_tasks
+- oom_kill_allocating_task
+- overcommit_kbytes
+- overcommit_memory
+- overcommit_ratio
+- page-cluster
+- panic_on_oom
+- percpu_pagelist_fraction
+- stat_interval
+- stat_refresh
+- numa_stat
+- swappiness
+- unprivileged_userfaultfd
+- user_reserve_kbytes
+- vfs_cache_pressure
+- watermark_boost_factor
+- watermark_scale_factor
+- zone_reclaim_mode
+
+
+admin_reserve_kbytes
+====================
+
+The amount of free memory in the system that should be reserved for users
+with the capability cap_sys_admin.
+
+admin_reserve_kbytes defaults to min(3% of free pages, 8MB)
+
+That should provide enough for the admin to log in and kill a process,
+if necessary, under the default overcommit 'guess' mode.
+
+Systems running under overcommit 'never' should increase this to account
+for the full Virtual Memory Size of programs used to recover. Otherwise,
+root may not be able to log in to recover the system.
+
+How do you calculate a minimum useful reserve?
+
+sshd or login + bash (or some other shell) + top (or ps, kill, etc.)
+
+For overcommit 'guess', we can sum resident set sizes (RSS).
+On x86_64 this is about 8MB.
+
+For overcommit 'never', we can take the max of their virtual sizes (VSZ)
+and add the sum of their RSS.
+On x86_64 this is about 128MB.
+
+Changing this takes effect whenever an application requests memory.
+
+
+block_dump
+==========
+
+block_dump enables block I/O debugging when set to a nonzero value. More
+information on block I/O debugging is in Documentation/laptops/laptop-mode.rst.
+
+
+compact_memory
+==============
+
+Available only when CONFIG_COMPACTION is set. When 1 is written to the file,
+all zones are compacted such that free memory is available in contiguous
+blocks where possible. This can be important for example in the allocation of
+huge pages although processes will also directly compact memory as required.
+
+
+compact_unevictable_allowed
+===========================
+
+Available only when CONFIG_COMPACTION is set. When set to 1, compaction is
+allowed to examine the unevictable lru (mlocked pages) for pages to compact.
+This should be used on systems where stalls for minor page faults are an
+acceptable trade for large contiguous free memory.  Set to 0 to prevent
+compaction from moving pages that are unevictable.  Default value is 1.
+
+
+dirty_background_bytes
+======================
+
+Contains the amount of dirty memory at which the background kernel
+flusher threads will start writeback.
+
+Note:
+  dirty_background_bytes is the counterpart of dirty_background_ratio. Only
+  one of them may be specified at a time. When one sysctl is written it is
+  immediately taken into account to evaluate the dirty memory limits and the
+  other appears as 0 when read.
+
+
+dirty_background_ratio
+======================
+
+Contains, as a percentage of total available memory that contains free pages
+and reclaimable pages, the number of pages at which the background kernel
+flusher threads will start writing out dirty data.
+
+The total available memory is not equal to total system memory.
+
+
+dirty_bytes
+===========
+
+Contains the amount of dirty memory at which a process generating disk writes
+will itself start writeback.
+
+Note: dirty_bytes is the counterpart of dirty_ratio. Only one of them may be
+specified at a time. When one sysctl is written it is immediately taken into
+account to evaluate the dirty memory limits and the other appears as 0 when
+read.
+
+Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any
+value lower than this limit will be ignored and the old configuration will be
+retained.
+
+
+dirty_expire_centisecs
+======================
+
+This tunable is used to define when dirty data is old enough to be eligible
+for writeout by the kernel flusher threads.  It is expressed in 100'ths
+of a second.  Data which has been dirty in-memory for longer than this
+interval will be written out next time a flusher thread wakes up.
+
+
+dirty_ratio
+===========
+
+Contains, as a percentage of total available memory that contains free pages
+and reclaimable pages, the number of pages at which a process which is
+generating disk writes will itself start writing out dirty data.
+
+The total available memory is not equal to total system memory.
+
+
+dirtytime_expire_seconds
+========================
+
+When a lazytime inode is constantly having its pages dirtied, the inode with
+an updated timestamp will never get chance to be written out.  And, if the
+only thing that has happened on the file system is a dirtytime inode caused
+by an atime update, a worker will be scheduled to make sure that inode
+eventually gets pushed out to disk.  This tunable is used to define when dirty
+inode is old enough to be eligible for writeback by the kernel flusher threads.
+And, it is also used as the interval to wakeup dirtytime_writeback thread.
+
+
+dirty_writeback_centisecs
+=========================
+
+The kernel flusher threads will periodically wake up and write `old` data
+out to disk.  This tunable expresses the interval between those wakeups, in
+100'ths of a second.
+
+Setting this to zero disables periodic writeback altogether.
+
+
+drop_caches
+===========
+
+Writing to this will cause the kernel to drop clean caches, as well as
+reclaimable slab objects like dentries and inodes.  Once dropped, their
+memory becomes free.
+
+To free pagecache::
+
+	echo 1 > /proc/sys/vm/drop_caches
+
+To free reclaimable slab objects (includes dentries and inodes)::
+
+	echo 2 > /proc/sys/vm/drop_caches
+
+To free slab objects and pagecache::
+
+	echo 3 > /proc/sys/vm/drop_caches
+
+This is a non-destructive operation and will not free any dirty objects.
+To increase the number of objects freed by this operation, the user may run
+`sync` prior to writing to /proc/sys/vm/drop_caches.  This will minimize the
+number of dirty objects on the system and create more candidates to be
+dropped.
+
+This file is not a means to control the growth of the various kernel caches
+(inodes, dentries, pagecache, etc...)  These objects are automatically
+reclaimed by the kernel when memory is needed elsewhere on the system.
+
+Use of this file can cause performance problems.  Since it discards cached
+objects, it may cost a significant amount of I/O and CPU to recreate the
+dropped objects, especially if they were under heavy use.  Because of this,
+use outside of a testing or debugging environment is not recommended.
+
+You may see informational messages in your kernel log when this file is
+used::
+
+	cat (1234): drop_caches: 3
+
+These are informational only.  They do not mean that anything is wrong
+with your system.  To disable them, echo 4 (bit 2) into drop_caches.
+
+
+extfrag_threshold
+=================
+
+This parameter affects whether the kernel will compact memory or direct
+reclaim to satisfy a high-order allocation. The extfrag/extfrag_index file in
+debugfs shows what the fragmentation index for each order is in each zone in
+the system. Values tending towards 0 imply allocations would fail due to lack
+of memory, values towards 1000 imply failures are due to fragmentation and -1
+implies that the allocation will succeed as long as watermarks are met.
+
+The kernel will not compact memory in a zone if the
+fragmentation index is <= extfrag_threshold. The default value is 500.
+
+
+highmem_is_dirtyable
+====================
+
+Available only for systems with CONFIG_HIGHMEM enabled (32b systems).
+
+This parameter controls whether the high memory is considered for dirty
+writers throttling.  This is not the case by default which means that
+only the amount of memory directly visible/usable by the kernel can
+be dirtied. As a result, on systems with a large amount of memory and
+lowmem basically depleted writers might be throttled too early and
+streaming writes can get very slow.
+
+Changing the value to non zero would allow more memory to be dirtied
+and thus allow writers to write more data which can be flushed to the
+storage more effectively. Note this also comes with a risk of pre-mature
+OOM killer because some writers (e.g. direct block device writes) can
+only use the low memory and they can fill it up with dirty data without
+any throttling.
+
+
+hugetlb_shm_group
+=================
+
+hugetlb_shm_group contains group id that is allowed to create SysV
+shared memory segment using hugetlb page.
+
+
+laptop_mode
+===========
+
+laptop_mode is a knob that controls "laptop mode". All the things that are
+controlled by this knob are discussed in Documentation/laptops/laptop-mode.rst.
+
+
+legacy_va_layout
+================
+
+If non-zero, this sysctl disables the new 32-bit mmap layout - the kernel
+will use the legacy (2.4) layout for all processes.
+
+
+lowmem_reserve_ratio
+====================
+
+For some specialised workloads on highmem machines it is dangerous for
+the kernel to allow process memory to be allocated from the "lowmem"
+zone.  This is because that memory could then be pinned via the mlock()
+system call, or by unavailability of swapspace.
+
+And on large highmem machines this lack of reclaimable lowmem memory
+can be fatal.
+
+So the Linux page allocator has a mechanism which prevents allocations
+which *could* use highmem from using too much lowmem.  This means that
+a certain amount of lowmem is defended from the possibility of being
+captured into pinned user memory.
+
+(The same argument applies to the old 16 megabyte ISA DMA region.  This
+mechanism will also defend that region from allocations which could use
+highmem or lowmem).
+
+The `lowmem_reserve_ratio` tunable determines how aggressive the kernel is
+in defending these lower zones.
+
+If you have a machine which uses highmem or ISA DMA and your
+applications are using mlock(), or if you are running with no swap then
+you probably should change the lowmem_reserve_ratio setting.
+
+The lowmem_reserve_ratio is an array. You can see them by reading this file::
+
+	% cat /proc/sys/vm/lowmem_reserve_ratio
+	256     256     32
+
+But, these values are not used directly. The kernel calculates # of protection
+pages for each zones from them. These are shown as array of protection pages
+in /proc/zoneinfo like followings. (This is an example of x86-64 box).
+Each zone has an array of protection pages like this::
+
+  Node 0, zone      DMA
+    pages free     1355
+          min      3
+          low      3
+          high     4
+	:
+	:
+      numa_other   0
+          protection: (0, 2004, 2004, 2004)
+	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    pagesets
+      cpu: 0 pcp: 0
+          :
+
+These protections are added to score to judge whether this zone should be used
+for page allocation or should be reclaimed.
+
+In this example, if normal pages (index=2) are required to this DMA zone and
+watermark[WMARK_HIGH] is used for watermark, the kernel judges this zone should
+not be used because pages_free(1355) is smaller than watermark + protection[2]
+(4 + 2004 = 2008). If this protection value is 0, this zone would be used for
+normal page requirement. If requirement is DMA zone(index=0), protection[0]
+(=0) is used.
+
+zone[i]'s protection[j] is calculated by following expression::
+
+  (i < j):
+    zone[i]->protection[j]
+    = (total sums of managed_pages from zone[i+1] to zone[j] on the node)
+      / lowmem_reserve_ratio[i];
+  (i = j):
+     (should not be protected. = 0;
+  (i > j):
+     (not necessary, but looks 0)
+
+The default values of lowmem_reserve_ratio[i] are
+
+    === ====================================
+    256 (if zone[i] means DMA or DMA32 zone)
+    32  (others)
+    === ====================================
+
+As above expression, they are reciprocal number of ratio.
+256 means 1/256. # of protection pages becomes about "0.39%" of total managed
+pages of higher zones on the node.
+
+If you would like to protect more pages, smaller values are effective.
+The minimum value is 1 (1/1 -> 100%). The value less than 1 completely
+disables protection of the pages.
+
+
+max_map_count:
+==============
+
+This file contains the maximum number of memory map areas a process
+may have. Memory map areas are used as a side-effect of calling
+malloc, directly by mmap, mprotect, and madvise, and also when loading
+shared libraries.
+
+While most applications need less than a thousand maps, certain
+programs, particularly malloc debuggers, may consume lots of them,
+e.g., up to one or two maps per allocation.
+
+The default value is 65536.
+
+
+memory_failure_early_kill:
+==========================
+
+Control how to kill processes when uncorrected memory error (typically
+a 2bit error in a memory module) is detected in the background by hardware
+that cannot be handled by the kernel. In some cases (like the page
+still having a valid copy on disk) the kernel will handle the failure
+transparently without affecting any applications. But if there is
+no other uptodate copy of the data it will kill to prevent any data
+corruptions from propagating.
+
+1: Kill all processes that have the corrupted and not reloadable page mapped
+as soon as the corruption is detected.  Note this is not supported
+for a few types of pages, like kernel internally allocated data or
+the swap cache, but works for the majority of user pages.
+
+0: Only unmap the corrupted page from all processes and only kill a process
+who tries to access it.
+
+The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
+handle this if they want to.
+
+This is only active on architectures/platforms with advanced machine
+check handling and depends on the hardware capabilities.
+
+Applications can override this setting individually with the PR_MCE_KILL prctl
+
+
+memory_failure_recovery
+=======================
+
+Enable memory failure recovery (when supported by the platform)
+
+1: Attempt recovery.
+
+0: Always panic on a memory failure.
+
+
+min_free_kbytes
+===============
+
+This is used to force the Linux VM to keep a minimum number
+of kilobytes free.  The VM uses this number to compute a
+watermark[WMARK_MIN] value for each lowmem zone in the system.
+Each lowmem zone gets a number of reserved free pages based
+proportionally on its size.
+
+Some minimal amount of memory is needed to satisfy PF_MEMALLOC
+allocations; if you set this to lower than 1024KB, your system will
+become subtly broken, and prone to deadlock under high loads.
+
+Setting this too high will OOM your machine instantly.
+
+
+min_slab_ratio
+==============
+
+This is available only on NUMA kernels.
+
+A percentage of the total pages in each zone.  On Zone reclaim
+(fallback from the local zone occurs) slabs will be reclaimed if more
+than this percentage of pages in a zone are reclaimable slab pages.
+This insures that the slab growth stays under control even in NUMA
+systems that rarely perform global reclaim.
+
+The default is 5 percent.
+
+Note that slab reclaim is triggered in a per zone / node fashion.
+The process of reclaiming slab memory is currently not node specific
+and may not be fast.
+
+
+min_unmapped_ratio
+==================
+
+This is available only on NUMA kernels.
+
+This is a percentage of the total pages in each zone. Zone reclaim will
+only occur if more than this percentage of pages are in a state that
+zone_reclaim_mode allows to be reclaimed.
+
+If zone_reclaim_mode has the value 4 OR'd, then the percentage is compared
+against all file-backed unmapped pages including swapcache pages and tmpfs
+files. Otherwise, only unmapped pages backed by normal files but not tmpfs
+files and similar are considered.
+
+The default is 1 percent.
+
+
+mmap_min_addr
+=============
+
+This file indicates the amount of address space  which a user process will
+be restricted from mmapping.  Since kernel null dereference bugs could
+accidentally operate based on the information in the first couple of pages
+of memory userspace processes should not be allowed to write to them.  By
+default this value is set to 0 and no protections will be enforced by the
+security module.  Setting this value to something like 64k will allow the
+vast majority of applications to work correctly and provide defense in depth
+against future potential kernel bugs.
+
+
+mmap_rnd_bits
+=============
+
+This value can be used to select the number of bits to use to
+determine the random offset to the base address of vma regions
+resulting from mmap allocations on architectures which support
+tuning address space randomization.  This value will be bounded
+by the architecture's minimum and maximum supported values.
+
+This value can be changed after boot using the
+/proc/sys/vm/mmap_rnd_bits tunable
+
+
+mmap_rnd_compat_bits
+====================
+
+This value can be used to select the number of bits to use to
+determine the random offset to the base address of vma regions
+resulting from mmap allocations for applications run in
+compatibility mode on architectures which support tuning address
+space randomization.  This value will be bounded by the
+architecture's minimum and maximum supported values.
+
+This value can be changed after boot using the
+/proc/sys/vm/mmap_rnd_compat_bits tunable
+
+
+nr_hugepages
+============
+
+Change the minimum size of the hugepage pool.
+
+See Documentation/admin-guide/mm/hugetlbpage.rst
+
+
+nr_hugepages_mempolicy
+======================
+
+Change the size of the hugepage pool at run-time on a specific
+set of NUMA nodes.
+
+See Documentation/admin-guide/mm/hugetlbpage.rst
+
+
+nr_overcommit_hugepages
+=======================
+
+Change the maximum size of the hugepage pool. The maximum is
+nr_hugepages + nr_overcommit_hugepages.
+
+See Documentation/admin-guide/mm/hugetlbpage.rst
+
+
+nr_trim_pages
+=============
+
+This is available only on NOMMU kernels.
+
+This value adjusts the excess page trimming behaviour of power-of-2 aligned
+NOMMU mmap allocations.
+
+A value of 0 disables trimming of allocations entirely, while a value of 1
+trims excess pages aggressively. Any value >= 1 acts as the watermark where
+trimming of allocations is initiated.
+
+The default value is 1.
+
+See Documentation/nommu-mmap.txt for more information.
+
+
+numa_zonelist_order
+===================
+
+This sysctl is only for NUMA and it is deprecated. Anything but
+Node order will fail!
+
+'where the memory is allocated from' is controlled by zonelists.
+
+(This documentation ignores ZONE_HIGHMEM/ZONE_DMA32 for simple explanation.
+you may be able to read ZONE_DMA as ZONE_DMA32...)
+
+In non-NUMA case, a zonelist for GFP_KERNEL is ordered as following.
+ZONE_NORMAL -> ZONE_DMA
+This means that a memory allocation request for GFP_KERNEL will
+get memory from ZONE_DMA only when ZONE_NORMAL is not available.
+
+In NUMA case, you can think of following 2 types of order.
+Assume 2 node NUMA and below is zonelist of Node(0)'s GFP_KERNEL::
+
+  (A) Node(0) ZONE_NORMAL -> Node(0) ZONE_DMA -> Node(1) ZONE_NORMAL
+  (B) Node(0) ZONE_NORMAL -> Node(1) ZONE_NORMAL -> Node(0) ZONE_DMA.
+
+Type(A) offers the best locality for processes on Node(0), but ZONE_DMA
+will be used before ZONE_NORMAL exhaustion. This increases possibility of
+out-of-memory(OOM) of ZONE_DMA because ZONE_DMA is tend to be small.
+
+Type(B) cannot offer the best locality but is more robust against OOM of
+the DMA zone.
+
+Type(A) is called as "Node" order. Type (B) is "Zone" order.
+
+"Node order" orders the zonelists by node, then by zone within each node.
+Specify "[Nn]ode" for node order
+
+"Zone Order" orders the zonelists by zone type, then by node within each
+zone.  Specify "[Zz]one" for zone order.
+
+Specify "[Dd]efault" to request automatic configuration.
+
+On 32-bit, the Normal zone needs to be preserved for allocations accessible
+by the kernel, so "zone" order will be selected.
+
+On 64-bit, devices that require DMA32/DMA are relatively rare, so "node"
+order will be selected.
+
+Default order is recommended unless this is causing problems for your
+system/application.
+
+
+oom_dump_tasks
+==============
+
+Enables a system-wide task dump (excluding kernel threads) to be produced
+when the kernel performs an OOM-killing and includes such information as
+pid, uid, tgid, vm size, rss, pgtables_bytes, swapents, oom_score_adj
+score, and name.  This is helpful to determine why the OOM killer was
+invoked, to identify the rogue task that caused it, and to determine why
+the OOM killer chose the task it did to kill.
+
+If this is set to zero, this information is suppressed.  On very
+large systems with thousands of tasks it may not be feasible to dump
+the memory state information for each one.  Such systems should not
+be forced to incur a performance penalty in OOM conditions when the
+information may not be desired.
+
+If this is set to non-zero, this information is shown whenever the
+OOM killer actually kills a memory-hogging task.
+
+The default value is 1 (enabled).
+
+
+oom_kill_allocating_task
+========================
+
+This enables or disables killing the OOM-triggering task in
+out-of-memory situations.
+
+If this is set to zero, the OOM killer will scan through the entire
+tasklist and select a task based on heuristics to kill.  This normally
+selects a rogue memory-hogging task that frees up a large amount of
+memory when killed.
+
+If this is set to non-zero, the OOM killer simply kills the task that
+triggered the out-of-memory condition.  This avoids the expensive
+tasklist scan.
+
+If panic_on_oom is selected, it takes precedence over whatever value
+is used in oom_kill_allocating_task.
+
+The default value is 0.
+
+
+overcommit_kbytes
+=================
+
+When overcommit_memory is set to 2, the committed address space is not
+permitted to exceed swap plus this amount of physical RAM. See below.
+
+Note: overcommit_kbytes is the counterpart of overcommit_ratio. Only one
+of them may be specified at a time. Setting one disables the other (which
+then appears as 0 when read).
+
+
+overcommit_memory
+=================
+
+This value contains a flag that enables memory overcommitment.
+
+When this flag is 0, the kernel attempts to estimate the amount
+of free memory left when userspace requests more memory.
+
+When this flag is 1, the kernel pretends there is always enough
+memory until it actually runs out.
+
+When this flag is 2, the kernel uses a "never overcommit"
+policy that attempts to prevent any overcommit of memory.
+Note that user_reserve_kbytes affects this policy.
+
+This feature can be very useful because there are a lot of
+programs that malloc() huge amounts of memory "just-in-case"
+and don't use much of it.
+
+The default value is 0.
+
+See Documentation/vm/overcommit-accounting.rst and
+mm/util.c::__vm_enough_memory() for more information.
+
+
+overcommit_ratio
+================
+
+When overcommit_memory is set to 2, the committed address
+space is not permitted to exceed swap plus this percentage
+of physical RAM.  See above.
+
+
+page-cluster
+============
+
+page-cluster controls the number of pages up to which consecutive pages
+are read in from swap in a single attempt. This is the swap counterpart
+to page cache readahead.
+The mentioned consecutivity is not in terms of virtual/physical addresses,
+but consecutive on swap space - that means they were swapped out together.
+
+It is a logarithmic value - setting it to zero means "1 page", setting
+it to 1 means "2 pages", setting it to 2 means "4 pages", etc.
+Zero disables swap readahead completely.
+
+The default value is three (eight pages at a time).  There may be some
+small benefits in tuning this to a different value if your workload is
+swap-intensive.
+
+Lower values mean lower latencies for initial faults, but at the same time
+extra faults and I/O delays for following faults if they would have been part of
+that consecutive pages readahead would have brought in.
+
+
+panic_on_oom
+============
+
+This enables or disables panic on out-of-memory feature.
+
+If this is set to 0, the kernel will kill some rogue process,
+called oom_killer.  Usually, oom_killer can kill rogue processes and
+system will survive.
+
+If this is set to 1, the kernel panics when out-of-memory happens.
+However, if a process limits using nodes by mempolicy/cpusets,
+and those nodes become memory exhaustion status, one process
+may be killed by oom-killer. No panic occurs in this case.
+Because other nodes' memory may be free. This means system total status
+may be not fatal yet.
+
+If this is set to 2, the kernel panics compulsorily even on the
+above-mentioned. Even oom happens under memory cgroup, the whole
+system panics.
+
+The default value is 0.
+
+1 and 2 are for failover of clustering. Please select either
+according to your policy of failover.
+
+panic_on_oom=2+kdump gives you very strong tool to investigate
+why oom happens. You can get snapshot.
+
+
+percpu_pagelist_fraction
+========================
+
+This is the fraction of pages at most (high mark pcp->high) in each zone that
+are allocated for each per cpu page list.  The min value for this is 8.  It
+means that we don't allow more than 1/8th of pages in each zone to be
+allocated in any single per_cpu_pagelist.  This entry only changes the value
+of hot per cpu pagelists.  User can specify a number like 100 to allocate
+1/100th of each zone to each per cpu page list.
+
+The batch value of each per cpu pagelist is also updated as a result.  It is
+set to pcp->high/4.  The upper limit of batch is (PAGE_SHIFT * 8)
+
+The initial value is zero.  Kernel does not use this value at boot time to set
+the high water marks for each per cpu page list.  If the user writes '0' to this
+sysctl, it will revert to this default behavior.
+
+
+stat_interval
+=============
+
+The time interval between which vm statistics are updated.  The default
+is 1 second.
+
+
+stat_refresh
+============
+
+Any read or write (by root only) flushes all the per-cpu vm statistics
+into their global totals, for more accurate reports when testing
+e.g. cat /proc/sys/vm/stat_refresh /proc/meminfo
+
+As a side-effect, it also checks for negative totals (elsewhere reported
+as 0) and "fails" with EINVAL if any are found, with a warning in dmesg.
+(At time of writing, a few stats are known sometimes to be found negative,
+with no ill effects: errors and warnings on these stats are suppressed.)
+
+
+numa_stat
+=========
+
+This interface allows runtime configuration of numa statistics.
+
+When page allocation performance becomes a bottleneck and you can tolerate
+some possible tool breakage and decreased numa counter precision, you can
+do::
+
+	echo 0 > /proc/sys/vm/numa_stat
+
+When page allocation performance is not a bottleneck and you want all
+tooling to work, you can do::
+
+	echo 1 > /proc/sys/vm/numa_stat
+
+
+swappiness
+==========
+
+This control is used to define how aggressive the kernel will swap
+memory pages.  Higher values will increase aggressiveness, lower values
+decrease the amount of swap.  A value of 0 instructs the kernel not to
+initiate swap until the amount of free and file-backed pages is less
+than the high water mark in a zone.
+
+The default value is 60.
+
+
+unprivileged_userfaultfd
+========================
+
+This flag controls whether unprivileged users can use the userfaultfd
+system calls.  Set this to 1 to allow unprivileged users to use the
+userfaultfd system calls, or set this to 0 to restrict userfaultfd to only
+privileged users (with SYS_CAP_PTRACE capability).
+
+The default value is 1.
+
+
+user_reserve_kbytes
+===================
+
+When overcommit_memory is set to 2, "never overcommit" mode, reserve
+min(3% of current process size, user_reserve_kbytes) of free memory.
+This is intended to prevent a user from starting a single memory hogging
+process, such that they cannot recover (kill the hog).
+
+user_reserve_kbytes defaults to min(3% of the current process size, 128MB).
+
+If this is reduced to zero, then the user will be allowed to allocate
+all free memory with a single process, minus admin_reserve_kbytes.
+Any subsequent attempts to execute a command will result in
+"fork: Cannot allocate memory".
+
+Changing this takes effect whenever an application requests memory.
+
+
+vfs_cache_pressure
+==================
+
+This percentage value controls the tendency of the kernel to reclaim
+the memory which is used for caching of directory and inode objects.
+
+At the default value of vfs_cache_pressure=100 the kernel will attempt to
+reclaim dentries and inodes at a "fair" rate with respect to pagecache and
+swapcache reclaim.  Decreasing vfs_cache_pressure causes the kernel to prefer
+to retain dentry and inode caches. When vfs_cache_pressure=0, the kernel will
+never reclaim dentries and inodes due to memory pressure and this can easily
+lead to out-of-memory conditions. Increasing vfs_cache_pressure beyond 100
+causes the kernel to prefer to reclaim dentries and inodes.
+
+Increasing vfs_cache_pressure significantly beyond 100 may have negative
+performance impact. Reclaim code needs to take various locks to find freeable
+directory and inode objects. With vfs_cache_pressure=1000, it will look for
+ten times more freeable objects than there are.
+
+
+watermark_boost_factor
+======================
+
+This factor controls the level of reclaim when memory is being fragmented.
+It defines the percentage of the high watermark of a zone that will be
+reclaimed if pages of different mobility are being mixed within pageblocks.
+The intent is that compaction has less work to do in the future and to
+increase the success rate of future high-order allocations such as SLUB
+allocations, THP and hugetlbfs pages.
+
+To make it sensible with respect to the watermark_scale_factor
+parameter, the unit is in fractions of 10,000. The default value of
+15,000 on !DISCONTIGMEM configurations means that up to 150% of the high
+watermark will be reclaimed in the event of a pageblock being mixed due
+to fragmentation. The level of reclaim is determined by the number of
+fragmentation events that occurred in the recent past. If this value is
+smaller than a pageblock then a pageblocks worth of pages will be reclaimed
+(e.g.  2MB on 64-bit x86). A boost factor of 0 will disable the feature.
+
+
+watermark_scale_factor
+======================
+
+This factor controls the aggressiveness of kswapd. It defines the
+amount of memory left in a node/system before kswapd is woken up and
+how much memory needs to be free before kswapd goes back to sleep.
+
+The unit is in fractions of 10,000. The default value of 10 means the
+distances between watermarks are 0.1% of the available memory in the
+node/system. The maximum value is 1000, or 10% of memory.
+
+A high rate of threads entering direct reclaim (allocstall) or kswapd
+going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate
+that the number of free pages kswapd maintains for latency reasons is
+too small for the allocation bursts occurring in the system. This knob
+can then be used to tune kswapd aggressiveness accordingly.
+
+
+zone_reclaim_mode
+=================
+
+Zone_reclaim_mode allows someone to set more or less aggressive approaches to
+reclaim memory when a zone runs out of memory. If it is set to zero then no
+zone reclaim occurs. Allocations will be satisfied from other zones / nodes
+in the system.
+
+This is value OR'ed together of
+
+=	===================================
+1	Zone reclaim on
+2	Zone reclaim writes dirty pages out
+4	Zone reclaim swaps pages
+=	===================================
+
+zone_reclaim_mode is disabled by default.  For file servers or workloads
+that benefit from having their data cached, zone_reclaim_mode should be
+left disabled as the caching effect is likely to be more important than
+data locality.
+
+zone_reclaim may be enabled if it's known that the workload is partitioned
+such that each partition fits within a NUMA node and that accessing remote
+memory would cause a measurable performance reduction.  The page allocator
+will then reclaim easily reusable pages (those page cache pages that are
+currently not used) before allocating off node pages.
+
+Allowing zone reclaim to write out pages stops processes that are
+writing large amounts of data from dirtying pages on other nodes. Zone
+reclaim will write out dirty pages if a zone fills up and so effectively
+throttle the process. This may decrease the performance of a single process
+since it cannot use all of system memory to buffer the outgoing writes
+anymore but it preserve the memory on other nodes so that the performance
+of other processes running on other nodes will not be affected.
+
+Allowing regular swap effectively restricts allocations to the local
+node unless explicitly overridden by memory policies or cpuset
+configurations.
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
deleted file mode 100644
index c5f0d44433a2..000000000000
--- a/Documentation/sysctl/vm.txt
+++ /dev/null
@@ -1,946 +0,0 @@
-Documentation for /proc/sys/vm/*	kernel version 2.6.29
-	(c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
-	(c) 2008         Peter W. Morreale <pmorreale@novell.com>
-
-For general info and legal blurb, please look in README.
-
-==============================================================
-
-This file contains the documentation for the sysctl files in
-/proc/sys/vm and is valid for Linux kernel version 2.6.29.
-
-The files in this directory can be used to tune the operation
-of the virtual memory (VM) subsystem of the Linux kernel and
-the writeout of dirty data to disk.
-
-Default values and initialization routines for most of these
-files can be found in mm/swap.c.
-
-Currently, these files are in /proc/sys/vm:
-
-- admin_reserve_kbytes
-- block_dump
-- compact_memory
-- compact_unevictable_allowed
-- dirty_background_bytes
-- dirty_background_ratio
-- dirty_bytes
-- dirty_expire_centisecs
-- dirty_ratio
-- dirtytime_expire_seconds
-- dirty_writeback_centisecs
-- drop_caches
-- extfrag_threshold
-- hugetlb_shm_group
-- laptop_mode
-- legacy_va_layout
-- lowmem_reserve_ratio
-- max_map_count
-- memory_failure_early_kill
-- memory_failure_recovery
-- min_free_kbytes
-- min_slab_ratio
-- min_unmapped_ratio
-- mmap_min_addr
-- mmap_rnd_bits
-- mmap_rnd_compat_bits
-- nr_hugepages
-- nr_hugepages_mempolicy
-- nr_overcommit_hugepages
-- nr_trim_pages         (only if CONFIG_MMU=n)
-- numa_zonelist_order
-- oom_dump_tasks
-- oom_kill_allocating_task
-- overcommit_kbytes
-- overcommit_memory
-- overcommit_ratio
-- page-cluster
-- panic_on_oom
-- percpu_pagelist_fraction
-- stat_interval
-- stat_refresh
-- numa_stat
-- swappiness
-- unprivileged_userfaultfd
-- user_reserve_kbytes
-- vfs_cache_pressure
-- watermark_boost_factor
-- watermark_scale_factor
-- zone_reclaim_mode
-
-==============================================================
-
-admin_reserve_kbytes
-
-The amount of free memory in the system that should be reserved for users
-with the capability cap_sys_admin.
-
-admin_reserve_kbytes defaults to min(3% of free pages, 8MB)
-
-That should provide enough for the admin to log in and kill a process,
-if necessary, under the default overcommit 'guess' mode.
-
-Systems running under overcommit 'never' should increase this to account
-for the full Virtual Memory Size of programs used to recover. Otherwise,
-root may not be able to log in to recover the system.
-
-How do you calculate a minimum useful reserve?
-
-sshd or login + bash (or some other shell) + top (or ps, kill, etc.)
-
-For overcommit 'guess', we can sum resident set sizes (RSS).
-On x86_64 this is about 8MB.
-
-For overcommit 'never', we can take the max of their virtual sizes (VSZ)
-and add the sum of their RSS.
-On x86_64 this is about 128MB.
-
-Changing this takes effect whenever an application requests memory.
-
-==============================================================
-
-block_dump
-
-block_dump enables block I/O debugging when set to a nonzero value. More
-information on block I/O debugging is in Documentation/laptops/laptop-mode.rst.
-
-==============================================================
-
-compact_memory
-
-Available only when CONFIG_COMPACTION is set. When 1 is written to the file,
-all zones are compacted such that free memory is available in contiguous
-blocks where possible. This can be important for example in the allocation of
-huge pages although processes will also directly compact memory as required.
-
-==============================================================
-
-compact_unevictable_allowed
-
-Available only when CONFIG_COMPACTION is set. When set to 1, compaction is
-allowed to examine the unevictable lru (mlocked pages) for pages to compact.
-This should be used on systems where stalls for minor page faults are an
-acceptable trade for large contiguous free memory.  Set to 0 to prevent
-compaction from moving pages that are unevictable.  Default value is 1.
-
-==============================================================
-
-dirty_background_bytes
-
-Contains the amount of dirty memory at which the background kernel
-flusher threads will start writeback.
-
-Note: dirty_background_bytes is the counterpart of dirty_background_ratio. Only
-one of them may be specified at a time. When one sysctl is written it is
-immediately taken into account to evaluate the dirty memory limits and the
-other appears as 0 when read.
-
-==============================================================
-
-dirty_background_ratio
-
-Contains, as a percentage of total available memory that contains free pages
-and reclaimable pages, the number of pages at which the background kernel
-flusher threads will start writing out dirty data.
-
-The total available memory is not equal to total system memory.
-
-==============================================================
-
-dirty_bytes
-
-Contains the amount of dirty memory at which a process generating disk writes
-will itself start writeback.
-
-Note: dirty_bytes is the counterpart of dirty_ratio. Only one of them may be
-specified at a time. When one sysctl is written it is immediately taken into
-account to evaluate the dirty memory limits and the other appears as 0 when
-read.
-
-Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any
-value lower than this limit will be ignored and the old configuration will be
-retained.
-
-==============================================================
-
-dirty_expire_centisecs
-
-This tunable is used to define when dirty data is old enough to be eligible
-for writeout by the kernel flusher threads.  It is expressed in 100'ths
-of a second.  Data which has been dirty in-memory for longer than this
-interval will be written out next time a flusher thread wakes up.
-
-==============================================================
-
-dirty_ratio
-
-Contains, as a percentage of total available memory that contains free pages
-and reclaimable pages, the number of pages at which a process which is
-generating disk writes will itself start writing out dirty data.
-
-The total available memory is not equal to total system memory.
-
-==============================================================
-
-dirtytime_expire_seconds
-
-When a lazytime inode is constantly having its pages dirtied, the inode with
-an updated timestamp will never get chance to be written out.  And, if the
-only thing that has happened on the file system is a dirtytime inode caused
-by an atime update, a worker will be scheduled to make sure that inode
-eventually gets pushed out to disk.  This tunable is used to define when dirty
-inode is old enough to be eligible for writeback by the kernel flusher threads.
-And, it is also used as the interval to wakeup dirtytime_writeback thread.
-
-==============================================================
-
-dirty_writeback_centisecs
-
-The kernel flusher threads will periodically wake up and write `old' data
-out to disk.  This tunable expresses the interval between those wakeups, in
-100'ths of a second.
-
-Setting this to zero disables periodic writeback altogether.
-
-==============================================================
-
-drop_caches
-
-Writing to this will cause the kernel to drop clean caches, as well as
-reclaimable slab objects like dentries and inodes.  Once dropped, their
-memory becomes free.
-
-To free pagecache:
-	echo 1 > /proc/sys/vm/drop_caches
-To free reclaimable slab objects (includes dentries and inodes):
-	echo 2 > /proc/sys/vm/drop_caches
-To free slab objects and pagecache:
-	echo 3 > /proc/sys/vm/drop_caches
-
-This is a non-destructive operation and will not free any dirty objects.
-To increase the number of objects freed by this operation, the user may run
-`sync' prior to writing to /proc/sys/vm/drop_caches.  This will minimize the
-number of dirty objects on the system and create more candidates to be
-dropped.
-
-This file is not a means to control the growth of the various kernel caches
-(inodes, dentries, pagecache, etc...)  These objects are automatically
-reclaimed by the kernel when memory is needed elsewhere on the system.
-
-Use of this file can cause performance problems.  Since it discards cached
-objects, it may cost a significant amount of I/O and CPU to recreate the
-dropped objects, especially if they were under heavy use.  Because of this,
-use outside of a testing or debugging environment is not recommended.
-
-You may see informational messages in your kernel log when this file is
-used:
-
-	cat (1234): drop_caches: 3
-
-These are informational only.  They do not mean that anything is wrong
-with your system.  To disable them, echo 4 (bit 2) into drop_caches.
-
-==============================================================
-
-extfrag_threshold
-
-This parameter affects whether the kernel will compact memory or direct
-reclaim to satisfy a high-order allocation. The extfrag/extfrag_index file in
-debugfs shows what the fragmentation index for each order is in each zone in
-the system. Values tending towards 0 imply allocations would fail due to lack
-of memory, values towards 1000 imply failures are due to fragmentation and -1
-implies that the allocation will succeed as long as watermarks are met.
-
-The kernel will not compact memory in a zone if the
-fragmentation index is <= extfrag_threshold. The default value is 500.
-
-==============================================================
-
-highmem_is_dirtyable
-
-Available only for systems with CONFIG_HIGHMEM enabled (32b systems).
-
-This parameter controls whether the high memory is considered for dirty
-writers throttling.  This is not the case by default which means that
-only the amount of memory directly visible/usable by the kernel can
-be dirtied. As a result, on systems with a large amount of memory and
-lowmem basically depleted writers might be throttled too early and
-streaming writes can get very slow.
-
-Changing the value to non zero would allow more memory to be dirtied
-and thus allow writers to write more data which can be flushed to the
-storage more effectively. Note this also comes with a risk of pre-mature
-OOM killer because some writers (e.g. direct block device writes) can
-only use the low memory and they can fill it up with dirty data without
-any throttling.
-
-==============================================================
-
-hugetlb_shm_group
-
-hugetlb_shm_group contains group id that is allowed to create SysV
-shared memory segment using hugetlb page.
-
-==============================================================
-
-laptop_mode
-
-laptop_mode is a knob that controls "laptop mode". All the things that are
-controlled by this knob are discussed in Documentation/laptops/laptop-mode.rst.
-
-==============================================================
-
-legacy_va_layout
-
-If non-zero, this sysctl disables the new 32-bit mmap layout - the kernel
-will use the legacy (2.4) layout for all processes.
-
-==============================================================
-
-lowmem_reserve_ratio
-
-For some specialised workloads on highmem machines it is dangerous for
-the kernel to allow process memory to be allocated from the "lowmem"
-zone.  This is because that memory could then be pinned via the mlock()
-system call, or by unavailability of swapspace.
-
-And on large highmem machines this lack of reclaimable lowmem memory
-can be fatal.
-
-So the Linux page allocator has a mechanism which prevents allocations
-which _could_ use highmem from using too much lowmem.  This means that
-a certain amount of lowmem is defended from the possibility of being
-captured into pinned user memory.
-
-(The same argument applies to the old 16 megabyte ISA DMA region.  This
-mechanism will also defend that region from allocations which could use
-highmem or lowmem).
-
-The `lowmem_reserve_ratio' tunable determines how aggressive the kernel is
-in defending these lower zones.
-
-If you have a machine which uses highmem or ISA DMA and your
-applications are using mlock(), or if you are running with no swap then
-you probably should change the lowmem_reserve_ratio setting.
-
-The lowmem_reserve_ratio is an array. You can see them by reading this file.
--
-% cat /proc/sys/vm/lowmem_reserve_ratio
-256     256     32
--
-
-But, these values are not used directly. The kernel calculates # of protection
-pages for each zones from them. These are shown as array of protection pages
-in /proc/zoneinfo like followings. (This is an example of x86-64 box).
-Each zone has an array of protection pages like this.
-
--
-Node 0, zone      DMA
-  pages free     1355
-        min      3
-        low      3
-        high     4
-	:
-	:
-    numa_other   0
-        protection: (0, 2004, 2004, 2004)
-	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  pagesets
-    cpu: 0 pcp: 0
-        :
--
-These protections are added to score to judge whether this zone should be used
-for page allocation or should be reclaimed.
-
-In this example, if normal pages (index=2) are required to this DMA zone and
-watermark[WMARK_HIGH] is used for watermark, the kernel judges this zone should
-not be used because pages_free(1355) is smaller than watermark + protection[2]
-(4 + 2004 = 2008). If this protection value is 0, this zone would be used for
-normal page requirement. If requirement is DMA zone(index=0), protection[0]
-(=0) is used.
-
-zone[i]'s protection[j] is calculated by following expression.
-
-(i < j):
-  zone[i]->protection[j]
-  = (total sums of managed_pages from zone[i+1] to zone[j] on the node)
-    / lowmem_reserve_ratio[i];
-(i = j):
-   (should not be protected. = 0;
-(i > j):
-   (not necessary, but looks 0)
-
-The default values of lowmem_reserve_ratio[i] are
-    256 (if zone[i] means DMA or DMA32 zone)
-    32  (others).
-As above expression, they are reciprocal number of ratio.
-256 means 1/256. # of protection pages becomes about "0.39%" of total managed
-pages of higher zones on the node.
-
-If you would like to protect more pages, smaller values are effective.
-The minimum value is 1 (1/1 -> 100%). The value less than 1 completely
-disables protection of the pages.
-
-==============================================================
-
-max_map_count:
-
-This file contains the maximum number of memory map areas a process
-may have. Memory map areas are used as a side-effect of calling
-malloc, directly by mmap, mprotect, and madvise, and also when loading
-shared libraries.
-
-While most applications need less than a thousand maps, certain
-programs, particularly malloc debuggers, may consume lots of them,
-e.g., up to one or two maps per allocation.
-
-The default value is 65536.
-
-=============================================================
-
-memory_failure_early_kill:
-
-Control how to kill processes when uncorrected memory error (typically
-a 2bit error in a memory module) is detected in the background by hardware
-that cannot be handled by the kernel. In some cases (like the page
-still having a valid copy on disk) the kernel will handle the failure
-transparently without affecting any applications. But if there is
-no other uptodate copy of the data it will kill to prevent any data
-corruptions from propagating.
-
-1: Kill all processes that have the corrupted and not reloadable page mapped
-as soon as the corruption is detected.  Note this is not supported
-for a few types of pages, like kernel internally allocated data or
-the swap cache, but works for the majority of user pages.
-
-0: Only unmap the corrupted page from all processes and only kill a process
-who tries to access it.
-
-The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
-handle this if they want to.
-
-This is only active on architectures/platforms with advanced machine
-check handling and depends on the hardware capabilities.
-
-Applications can override this setting individually with the PR_MCE_KILL prctl
-
-==============================================================
-
-memory_failure_recovery
-
-Enable memory failure recovery (when supported by the platform)
-
-1: Attempt recovery.
-
-0: Always panic on a memory failure.
-
-==============================================================
-
-min_free_kbytes:
-
-This is used to force the Linux VM to keep a minimum number
-of kilobytes free.  The VM uses this number to compute a
-watermark[WMARK_MIN] value for each lowmem zone in the system.
-Each lowmem zone gets a number of reserved free pages based
-proportionally on its size.
-
-Some minimal amount of memory is needed to satisfy PF_MEMALLOC
-allocations; if you set this to lower than 1024KB, your system will
-become subtly broken, and prone to deadlock under high loads.
-
-Setting this too high will OOM your machine instantly.
-
-=============================================================
-
-min_slab_ratio:
-
-This is available only on NUMA kernels.
-
-A percentage of the total pages in each zone.  On Zone reclaim
-(fallback from the local zone occurs) slabs will be reclaimed if more
-than this percentage of pages in a zone are reclaimable slab pages.
-This insures that the slab growth stays under control even in NUMA
-systems that rarely perform global reclaim.
-
-The default is 5 percent.
-
-Note that slab reclaim is triggered in a per zone / node fashion.
-The process of reclaiming slab memory is currently not node specific
-and may not be fast.
-
-=============================================================
-
-min_unmapped_ratio:
-
-This is available only on NUMA kernels.
-
-This is a percentage of the total pages in each zone. Zone reclaim will
-only occur if more than this percentage of pages are in a state that
-zone_reclaim_mode allows to be reclaimed.
-
-If zone_reclaim_mode has the value 4 OR'd, then the percentage is compared
-against all file-backed unmapped pages including swapcache pages and tmpfs
-files. Otherwise, only unmapped pages backed by normal files but not tmpfs
-files and similar are considered.
-
-The default is 1 percent.
-
-==============================================================
-
-mmap_min_addr
-
-This file indicates the amount of address space  which a user process will
-be restricted from mmapping.  Since kernel null dereference bugs could
-accidentally operate based on the information in the first couple of pages
-of memory userspace processes should not be allowed to write to them.  By
-default this value is set to 0 and no protections will be enforced by the
-security module.  Setting this value to something like 64k will allow the
-vast majority of applications to work correctly and provide defense in depth
-against future potential kernel bugs.
-
-==============================================================
-
-mmap_rnd_bits:
-
-This value can be used to select the number of bits to use to
-determine the random offset to the base address of vma regions
-resulting from mmap allocations on architectures which support
-tuning address space randomization.  This value will be bounded
-by the architecture's minimum and maximum supported values.
-
-This value can be changed after boot using the
-/proc/sys/vm/mmap_rnd_bits tunable
-
-==============================================================
-
-mmap_rnd_compat_bits:
-
-This value can be used to select the number of bits to use to
-determine the random offset to the base address of vma regions
-resulting from mmap allocations for applications run in
-compatibility mode on architectures which support tuning address
-space randomization.  This value will be bounded by the
-architecture's minimum and maximum supported values.
-
-This value can be changed after boot using the
-/proc/sys/vm/mmap_rnd_compat_bits tunable
-
-==============================================================
-
-nr_hugepages
-
-Change the minimum size of the hugepage pool.
-
-See Documentation/admin-guide/mm/hugetlbpage.rst
-
-==============================================================
-
-nr_hugepages_mempolicy
-
-Change the size of the hugepage pool at run-time on a specific
-set of NUMA nodes.
-
-See Documentation/admin-guide/mm/hugetlbpage.rst
-
-==============================================================
-
-nr_overcommit_hugepages
-
-Change the maximum size of the hugepage pool. The maximum is
-nr_hugepages + nr_overcommit_hugepages.
-
-See Documentation/admin-guide/mm/hugetlbpage.rst
-
-==============================================================
-
-nr_trim_pages
-
-This is available only on NOMMU kernels.
-
-This value adjusts the excess page trimming behaviour of power-of-2 aligned
-NOMMU mmap allocations.
-
-A value of 0 disables trimming of allocations entirely, while a value of 1
-trims excess pages aggressively. Any value >= 1 acts as the watermark where
-trimming of allocations is initiated.
-
-The default value is 1.
-
-See Documentation/nommu-mmap.txt for more information.
-
-==============================================================
-
-numa_zonelist_order
-
-This sysctl is only for NUMA and it is deprecated. Anything but
-Node order will fail!
-
-'where the memory is allocated from' is controlled by zonelists.
-(This documentation ignores ZONE_HIGHMEM/ZONE_DMA32 for simple explanation.
- you may be able to read ZONE_DMA as ZONE_DMA32...)
-
-In non-NUMA case, a zonelist for GFP_KERNEL is ordered as following.
-ZONE_NORMAL -> ZONE_DMA
-This means that a memory allocation request for GFP_KERNEL will
-get memory from ZONE_DMA only when ZONE_NORMAL is not available.
-
-In NUMA case, you can think of following 2 types of order.
-Assume 2 node NUMA and below is zonelist of Node(0)'s GFP_KERNEL
-
-(A) Node(0) ZONE_NORMAL -> Node(0) ZONE_DMA -> Node(1) ZONE_NORMAL
-(B) Node(0) ZONE_NORMAL -> Node(1) ZONE_NORMAL -> Node(0) ZONE_DMA.
-
-Type(A) offers the best locality for processes on Node(0), but ZONE_DMA
-will be used before ZONE_NORMAL exhaustion. This increases possibility of
-out-of-memory(OOM) of ZONE_DMA because ZONE_DMA is tend to be small.
-
-Type(B) cannot offer the best locality but is more robust against OOM of
-the DMA zone.
-
-Type(A) is called as "Node" order. Type (B) is "Zone" order.
-
-"Node order" orders the zonelists by node, then by zone within each node.
-Specify "[Nn]ode" for node order
-
-"Zone Order" orders the zonelists by zone type, then by node within each
-zone.  Specify "[Zz]one" for zone order.
-
-Specify "[Dd]efault" to request automatic configuration.
-
-On 32-bit, the Normal zone needs to be preserved for allocations accessible
-by the kernel, so "zone" order will be selected.
-
-On 64-bit, devices that require DMA32/DMA are relatively rare, so "node"
-order will be selected.
-
-Default order is recommended unless this is causing problems for your
-system/application.
-
-==============================================================
-
-oom_dump_tasks
-
-Enables a system-wide task dump (excluding kernel threads) to be produced
-when the kernel performs an OOM-killing and includes such information as
-pid, uid, tgid, vm size, rss, pgtables_bytes, swapents, oom_score_adj
-score, and name.  This is helpful to determine why the OOM killer was
-invoked, to identify the rogue task that caused it, and to determine why
-the OOM killer chose the task it did to kill.
-
-If this is set to zero, this information is suppressed.  On very
-large systems with thousands of tasks it may not be feasible to dump
-the memory state information for each one.  Such systems should not
-be forced to incur a performance penalty in OOM conditions when the
-information may not be desired.
-
-If this is set to non-zero, this information is shown whenever the
-OOM killer actually kills a memory-hogging task.
-
-The default value is 1 (enabled).
-
-==============================================================
-
-oom_kill_allocating_task
-
-This enables or disables killing the OOM-triggering task in
-out-of-memory situations.
-
-If this is set to zero, the OOM killer will scan through the entire
-tasklist and select a task based on heuristics to kill.  This normally
-selects a rogue memory-hogging task that frees up a large amount of
-memory when killed.
-
-If this is set to non-zero, the OOM killer simply kills the task that
-triggered the out-of-memory condition.  This avoids the expensive
-tasklist scan.
-
-If panic_on_oom is selected, it takes precedence over whatever value
-is used in oom_kill_allocating_task.
-
-The default value is 0.
-
-==============================================================
-
-overcommit_kbytes:
-
-When overcommit_memory is set to 2, the committed address space is not
-permitted to exceed swap plus this amount of physical RAM. See below.
-
-Note: overcommit_kbytes is the counterpart of overcommit_ratio. Only one
-of them may be specified at a time. Setting one disables the other (which
-then appears as 0 when read).
-
-==============================================================
-
-overcommit_memory:
-
-This value contains a flag that enables memory overcommitment.
-
-When this flag is 0, the kernel attempts to estimate the amount
-of free memory left when userspace requests more memory.
-
-When this flag is 1, the kernel pretends there is always enough
-memory until it actually runs out.
-
-When this flag is 2, the kernel uses a "never overcommit"
-policy that attempts to prevent any overcommit of memory.
-Note that user_reserve_kbytes affects this policy.
-
-This feature can be very useful because there are a lot of
-programs that malloc() huge amounts of memory "just-in-case"
-and don't use much of it.
-
-The default value is 0.
-
-See Documentation/vm/overcommit-accounting.rst and
-mm/util.c::__vm_enough_memory() for more information.
-
-==============================================================
-
-overcommit_ratio:
-
-When overcommit_memory is set to 2, the committed address
-space is not permitted to exceed swap plus this percentage
-of physical RAM.  See above.
-
-==============================================================
-
-page-cluster
-
-page-cluster controls the number of pages up to which consecutive pages
-are read in from swap in a single attempt. This is the swap counterpart
-to page cache readahead.
-The mentioned consecutivity is not in terms of virtual/physical addresses,
-but consecutive on swap space - that means they were swapped out together.
-
-It is a logarithmic value - setting it to zero means "1 page", setting
-it to 1 means "2 pages", setting it to 2 means "4 pages", etc.
-Zero disables swap readahead completely.
-
-The default value is three (eight pages at a time).  There may be some
-small benefits in tuning this to a different value if your workload is
-swap-intensive.
-
-Lower values mean lower latencies for initial faults, but at the same time
-extra faults and I/O delays for following faults if they would have been part of
-that consecutive pages readahead would have brought in.
-
-=============================================================
-
-panic_on_oom
-
-This enables or disables panic on out-of-memory feature.
-
-If this is set to 0, the kernel will kill some rogue process,
-called oom_killer.  Usually, oom_killer can kill rogue processes and
-system will survive.
-
-If this is set to 1, the kernel panics when out-of-memory happens.
-However, if a process limits using nodes by mempolicy/cpusets,
-and those nodes become memory exhaustion status, one process
-may be killed by oom-killer. No panic occurs in this case.
-Because other nodes' memory may be free. This means system total status
-may be not fatal yet.
-
-If this is set to 2, the kernel panics compulsorily even on the
-above-mentioned. Even oom happens under memory cgroup, the whole
-system panics.
-
-The default value is 0.
-1 and 2 are for failover of clustering. Please select either
-according to your policy of failover.
-panic_on_oom=2+kdump gives you very strong tool to investigate
-why oom happens. You can get snapshot.
-
-=============================================================
-
-percpu_pagelist_fraction
-
-This is the fraction of pages at most (high mark pcp->high) in each zone that
-are allocated for each per cpu page list.  The min value for this is 8.  It
-means that we don't allow more than 1/8th of pages in each zone to be
-allocated in any single per_cpu_pagelist.  This entry only changes the value
-of hot per cpu pagelists.  User can specify a number like 100 to allocate
-1/100th of each zone to each per cpu page list.
-
-The batch value of each per cpu pagelist is also updated as a result.  It is
-set to pcp->high/4.  The upper limit of batch is (PAGE_SHIFT * 8)
-
-The initial value is zero.  Kernel does not use this value at boot time to set
-the high water marks for each per cpu page list.  If the user writes '0' to this
-sysctl, it will revert to this default behavior.
-
-==============================================================
-
-stat_interval
-
-The time interval between which vm statistics are updated.  The default
-is 1 second.
-
-==============================================================
-
-stat_refresh
-
-Any read or write (by root only) flushes all the per-cpu vm statistics
-into their global totals, for more accurate reports when testing
-e.g. cat /proc/sys/vm/stat_refresh /proc/meminfo
-
-As a side-effect, it also checks for negative totals (elsewhere reported
-as 0) and "fails" with EINVAL if any are found, with a warning in dmesg.
-(At time of writing, a few stats are known sometimes to be found negative,
-with no ill effects: errors and warnings on these stats are suppressed.)
-
-==============================================================
-
-numa_stat
-
-This interface allows runtime configuration of numa statistics.
-
-When page allocation performance becomes a bottleneck and you can tolerate
-some possible tool breakage and decreased numa counter precision, you can
-do:
-	echo 0 > /proc/sys/vm/numa_stat
-
-When page allocation performance is not a bottleneck and you want all
-tooling to work, you can do:
-	echo 1 > /proc/sys/vm/numa_stat
-
-==============================================================
-
-swappiness
-
-This control is used to define how aggressive the kernel will swap
-memory pages.  Higher values will increase aggressiveness, lower values
-decrease the amount of swap.  A value of 0 instructs the kernel not to
-initiate swap until the amount of free and file-backed pages is less
-than the high water mark in a zone.
-
-The default value is 60.
-
-==============================================================
-
-unprivileged_userfaultfd
-
-This flag controls whether unprivileged users can use the userfaultfd
-system calls.  Set this to 1 to allow unprivileged users to use the
-userfaultfd system calls, or set this to 0 to restrict userfaultfd to only
-privileged users (with SYS_CAP_PTRACE capability).
-
-The default value is 1.
-
-==============================================================
-
-- user_reserve_kbytes
-
-When overcommit_memory is set to 2, "never overcommit" mode, reserve
-min(3% of current process size, user_reserve_kbytes) of free memory.
-This is intended to prevent a user from starting a single memory hogging
-process, such that they cannot recover (kill the hog).
-
-user_reserve_kbytes defaults to min(3% of the current process size, 128MB).
-
-If this is reduced to zero, then the user will be allowed to allocate
-all free memory with a single process, minus admin_reserve_kbytes.
-Any subsequent attempts to execute a command will result in
-"fork: Cannot allocate memory".
-
-Changing this takes effect whenever an application requests memory.
-
-==============================================================
-
-vfs_cache_pressure
-------------------
-
-This percentage value controls the tendency of the kernel to reclaim
-the memory which is used for caching of directory and inode objects.
-
-At the default value of vfs_cache_pressure=100 the kernel will attempt to
-reclaim dentries and inodes at a "fair" rate with respect to pagecache and
-swapcache reclaim.  Decreasing vfs_cache_pressure causes the kernel to prefer
-to retain dentry and inode caches. When vfs_cache_pressure=0, the kernel will
-never reclaim dentries and inodes due to memory pressure and this can easily
-lead to out-of-memory conditions. Increasing vfs_cache_pressure beyond 100
-causes the kernel to prefer to reclaim dentries and inodes.
-
-Increasing vfs_cache_pressure significantly beyond 100 may have negative
-performance impact. Reclaim code needs to take various locks to find freeable
-directory and inode objects. With vfs_cache_pressure=1000, it will look for
-ten times more freeable objects than there are.
-
-=============================================================
-
-watermark_boost_factor:
-
-This factor controls the level of reclaim when memory is being fragmented.
-It defines the percentage of the high watermark of a zone that will be
-reclaimed if pages of different mobility are being mixed within pageblocks.
-The intent is that compaction has less work to do in the future and to
-increase the success rate of future high-order allocations such as SLUB
-allocations, THP and hugetlbfs pages.
-
-To make it sensible with respect to the watermark_scale_factor
-parameter, the unit is in fractions of 10,000. The default value of
-15,000 on !DISCONTIGMEM configurations means that up to 150% of the high
-watermark will be reclaimed in the event of a pageblock being mixed due
-to fragmentation. The level of reclaim is determined by the number of
-fragmentation events that occurred in the recent past. If this value is
-smaller than a pageblock then a pageblocks worth of pages will be reclaimed
-(e.g.  2MB on 64-bit x86). A boost factor of 0 will disable the feature.
-
-=============================================================
-
-watermark_scale_factor:
-
-This factor controls the aggressiveness of kswapd. It defines the
-amount of memory left in a node/system before kswapd is woken up and
-how much memory needs to be free before kswapd goes back to sleep.
-
-The unit is in fractions of 10,000. The default value of 10 means the
-distances between watermarks are 0.1% of the available memory in the
-node/system. The maximum value is 1000, or 10% of memory.
-
-A high rate of threads entering direct reclaim (allocstall) or kswapd
-going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate
-that the number of free pages kswapd maintains for latency reasons is
-too small for the allocation bursts occurring in the system. This knob
-can then be used to tune kswapd aggressiveness accordingly.
-
-==============================================================
-
-zone_reclaim_mode:
-
-Zone_reclaim_mode allows someone to set more or less aggressive approaches to
-reclaim memory when a zone runs out of memory. If it is set to zero then no
-zone reclaim occurs. Allocations will be satisfied from other zones / nodes
-in the system.
-
-This is value ORed together of
-
-1	= Zone reclaim on
-2	= Zone reclaim writes dirty pages out
-4	= Zone reclaim swaps pages
-
-zone_reclaim_mode is disabled by default.  For file servers or workloads
-that benefit from having their data cached, zone_reclaim_mode should be
-left disabled as the caching effect is likely to be more important than
-data locality.
-
-zone_reclaim may be enabled if it's known that the workload is partitioned
-such that each partition fits within a NUMA node and that accessing remote
-memory would cause a measurable performance reduction.  The page allocator
-will then reclaim easily reusable pages (those page cache pages that are
-currently not used) before allocating off node pages.
-
-Allowing zone reclaim to write out pages stops processes that are
-writing large amounts of data from dirtying pages on other nodes. Zone
-reclaim will write out dirty pages if a zone fills up and so effectively
-throttle the process. This may decrease the performance of a single process
-since it cannot use all of system memory to buffer the outgoing writes
-anymore but it preserve the memory on other nodes so that the performance
-of other processes running on other nodes will not be affected.
-
-Allowing regular swap effectively restricts allocations to the local
-node unless explicitly overridden by memory policies or cpuset
-configurations.
-
-============ End of Document =================================
diff --git a/Documentation/vm/unevictable-lru.rst b/Documentation/vm/unevictable-lru.rst
index c6d94118fbcc..8ba656f37cd8 100644
--- a/Documentation/vm/unevictable-lru.rst
+++ b/Documentation/vm/unevictable-lru.rst
@@ -439,7 +439,7 @@ Compacting MLOCKED Pages
 
 The unevictable LRU can be scanned for compactable regions and the default
 behavior is to do so.  /proc/sys/vm/compact_unevictable_allowed controls
-this behavior (see Documentation/sysctl/vm.txt).  Once scanning of the
+this behavior (see Documentation/sysctl/vm.rst).  Once scanning of the
 unevictable LRU is enabled, the work of compaction is mostly handled by
 the page migration code and the same work flow as described in MIGRATING
 MLOCKED PAGES will apply.
-- 
cgit 


From 898bd37a92063e46bc8d7b870781cecd66234f92 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 18 Apr 2019 19:45:00 -0300
Subject: docs: block: convert to ReST

Rename the block documentation files to ReST, add an
index for them and adjust in order to produce a nice html
output via the Sphinx build system.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/admin-guide/kernel-parameters.txt |    8 +-
 Documentation/block/bfq-iosched.rst             |  597 ++++++++++++
 Documentation/block/bfq-iosched.txt             |  583 -----------
 Documentation/block/biodoc.rst                  | 1168 +++++++++++++++++++++++
 Documentation/block/biodoc.txt                  | 1076 ---------------------
 Documentation/block/biovecs.rst                 |  146 +++
 Documentation/block/biovecs.txt                 |  144 ---
 Documentation/block/capability.rst              |   18 +
 Documentation/block/capability.txt              |   15 -
 Documentation/block/cmdline-partition.rst       |   53 +
 Documentation/block/cmdline-partition.txt       |   46 -
 Documentation/block/data-integrity.rst          |  291 ++++++
 Documentation/block/data-integrity.txt          |  281 ------
 Documentation/block/deadline-iosched.rst        |   72 ++
 Documentation/block/deadline-iosched.txt        |   75 --
 Documentation/block/index.rst                   |   25 +
 Documentation/block/ioprio.rst                  |  182 ++++
 Documentation/block/ioprio.txt                  |  183 ----
 Documentation/block/kyber-iosched.rst           |   15 +
 Documentation/block/kyber-iosched.txt           |   14 -
 Documentation/block/null_blk.rst                |  126 +++
 Documentation/block/null_blk.txt                |   99 --
 Documentation/block/pr.rst                      |  119 +++
 Documentation/block/pr.txt                      |  119 ---
 Documentation/block/queue-sysfs.rst             |  254 +++++
 Documentation/block/queue-sysfs.txt             |  253 -----
 Documentation/block/request.rst                 |   99 ++
 Documentation/block/request.txt                 |   88 --
 Documentation/block/stat.rst                    |   93 ++
 Documentation/block/stat.txt                    |   86 --
 Documentation/block/switching-sched.rst         |   39 +
 Documentation/block/switching-sched.txt         |   35 -
 Documentation/block/writeback_cache_control.rst |   86 ++
 Documentation/block/writeback_cache_control.txt |   86 --
 Documentation/blockdev/zram.rst                 |    2 +-
 35 files changed, 3388 insertions(+), 3188 deletions(-)
 create mode 100644 Documentation/block/bfq-iosched.rst
 delete mode 100644 Documentation/block/bfq-iosched.txt
 create mode 100644 Documentation/block/biodoc.rst
 delete mode 100644 Documentation/block/biodoc.txt
 create mode 100644 Documentation/block/biovecs.rst
 delete mode 100644 Documentation/block/biovecs.txt
 create mode 100644 Documentation/block/capability.rst
 delete mode 100644 Documentation/block/capability.txt
 create mode 100644 Documentation/block/cmdline-partition.rst
 delete mode 100644 Documentation/block/cmdline-partition.txt
 create mode 100644 Documentation/block/data-integrity.rst
 delete mode 100644 Documentation/block/data-integrity.txt
 create mode 100644 Documentation/block/deadline-iosched.rst
 delete mode 100644 Documentation/block/deadline-iosched.txt
 create mode 100644 Documentation/block/index.rst
 create mode 100644 Documentation/block/ioprio.rst
 delete mode 100644 Documentation/block/ioprio.txt
 create mode 100644 Documentation/block/kyber-iosched.rst
 delete mode 100644 Documentation/block/kyber-iosched.txt
 create mode 100644 Documentation/block/null_blk.rst
 delete mode 100644 Documentation/block/null_blk.txt
 create mode 100644 Documentation/block/pr.rst
 delete mode 100644 Documentation/block/pr.txt
 create mode 100644 Documentation/block/queue-sysfs.rst
 delete mode 100644 Documentation/block/queue-sysfs.txt
 create mode 100644 Documentation/block/request.rst
 delete mode 100644 Documentation/block/request.txt
 create mode 100644 Documentation/block/stat.rst
 delete mode 100644 Documentation/block/stat.txt
 create mode 100644 Documentation/block/switching-sched.rst
 delete mode 100644 Documentation/block/switching-sched.txt
 create mode 100644 Documentation/block/writeback_cache_control.rst
 delete mode 100644 Documentation/block/writeback_cache_control.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 01123f1de354..e8e28cac32a3 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -430,7 +430,7 @@
 
 	blkdevparts=	Manual partition parsing of block device(s) for
 			embedded devices based on command line input.
-			See Documentation/block/cmdline-partition.txt
+			See Documentation/block/cmdline-partition.rst
 
 	boot_delay=	Milliseconds to delay each printk during boot.
 			Values larger than 10 seconds (10000) are changed to
@@ -1199,9 +1199,9 @@
 
 	elevator=	[IOSCHED]
 			Format: { "mq-deadline" | "kyber" | "bfq" }
-			See Documentation/block/deadline-iosched.txt,
-			Documentation/block/kyber-iosched.txt and
-			Documentation/block/bfq-iosched.txt for details.
+			See Documentation/block/deadline-iosched.rst,
+			Documentation/block/kyber-iosched.rst and
+			Documentation/block/bfq-iosched.rst for details.
 
 	elfcorehdr=[size[KMG]@]offset[KMG] [IA64,PPC,SH,X86,S390]
 			Specifies physical address of start of kernel core
diff --git a/Documentation/block/bfq-iosched.rst b/Documentation/block/bfq-iosched.rst
new file mode 100644
index 000000000000..2c13b2fc1888
--- /dev/null
+++ b/Documentation/block/bfq-iosched.rst
@@ -0,0 +1,597 @@
+==========================
+BFQ (Budget Fair Queueing)
+==========================
+
+BFQ is a proportional-share I/O scheduler, with some extra
+low-latency capabilities. In addition to cgroups support (blkio or io
+controllers), BFQ's main features are:
+
+- BFQ guarantees a high system and application responsiveness, and a
+  low latency for time-sensitive applications, such as audio or video
+  players;
+- BFQ distributes bandwidth, and not just time, among processes or
+  groups (switching back to time distribution when needed to keep
+  throughput high).
+
+In its default configuration, BFQ privileges latency over
+throughput. So, when needed for achieving a lower latency, BFQ builds
+schedules that may lead to a lower throughput. If your main or only
+goal, for a given device, is to achieve the maximum-possible
+throughput at all times, then do switch off all low-latency heuristics
+for that device, by setting low_latency to 0. See Section 3 for
+details on how to configure BFQ for the desired tradeoff between
+latency and throughput, or on how to maximize throughput.
+
+As every I/O scheduler, BFQ adds some overhead to per-I/O-request
+processing. To give an idea of this overhead, the total,
+single-lock-protected, per-request processing time of BFQ---i.e., the
+sum of the execution times of the request insertion, dispatch and
+completion hooks---is, e.g., 1.9 us on an Intel Core i7-2760QM@2.40GHz
+(dated CPU for notebooks; time measured with simple code
+instrumentation, and using the throughput-sync.sh script of the S
+suite [1], in performance-profiling mode). To put this result into
+context, the total, single-lock-protected, per-request execution time
+of the lightest I/O scheduler available in blk-mq, mq-deadline, is 0.7
+us (mq-deadline is ~800 LOC, against ~10500 LOC for BFQ).
+
+Scheduling overhead further limits the maximum IOPS that a CPU can
+process (already limited by the execution of the rest of the I/O
+stack). To give an idea of the limits with BFQ, on slow or average
+CPUs, here are, first, the limits of BFQ for three different CPUs, on,
+respectively, an average laptop, an old desktop, and a cheap embedded
+system, in case full hierarchical support is enabled (i.e.,
+CONFIG_BFQ_GROUP_IOSCHED is set), but CONFIG_BFQ_CGROUP_DEBUG is not
+set (Section 4-2):
+- Intel i7-4850HQ: 400 KIOPS
+- AMD A8-3850: 250 KIOPS
+- ARM CortexTM-A53 Octa-core: 80 KIOPS
+
+If CONFIG_BFQ_CGROUP_DEBUG is set (and of course full hierarchical
+support is enabled), then the sustainable throughput with BFQ
+decreases, because all blkio.bfq* statistics are created and updated
+(Section 4-2). For BFQ, this leads to the following maximum
+sustainable throughputs, on the same systems as above:
+- Intel i7-4850HQ: 310 KIOPS
+- AMD A8-3850: 200 KIOPS
+- ARM CortexTM-A53 Octa-core: 56 KIOPS
+
+BFQ works for multi-queue devices too.
+
+.. The table of contents follow. Impatients can just jump to Section 3.
+
+.. CONTENTS
+
+   1. When may BFQ be useful?
+    1-1 Personal systems
+    1-2 Server systems
+   2. How does BFQ work?
+   3. What are BFQ's tunables and how to properly configure BFQ?
+   4. BFQ group scheduling
+    4-1 Service guarantees provided
+    4-2 Interface
+
+1. When may BFQ be useful?
+==========================
+
+BFQ provides the following benefits on personal and server systems.
+
+1-1 Personal systems
+--------------------
+
+Low latency for interactive applications
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Regardless of the actual background workload, BFQ guarantees that, for
+interactive tasks, the storage device is virtually as responsive as if
+it was idle. For example, even if one or more of the following
+background workloads are being executed:
+
+- one or more large files are being read, written or copied,
+- a tree of source files is being compiled,
+- one or more virtual machines are performing I/O,
+- a software update is in progress,
+- indexing daemons are scanning filesystems and updating their
+  databases,
+
+starting an application or loading a file from within an application
+takes about the same time as if the storage device was idle. As a
+comparison, with CFQ, NOOP or DEADLINE, and in the same conditions,
+applications experience high latencies, or even become unresponsive
+until the background workload terminates (also on SSDs).
+
+Low latency for soft real-time applications
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Also soft real-time applications, such as audio and video
+players/streamers, enjoy a low latency and a low drop rate, regardless
+of the background I/O workload. As a consequence, these applications
+do not suffer from almost any glitch due to the background workload.
+
+Higher speed for code-development tasks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If some additional workload happens to be executed in parallel, then
+BFQ executes the I/O-related components of typical code-development
+tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
+NOOP or DEADLINE.
+
+High throughput
+^^^^^^^^^^^^^^^
+
+On hard disks, BFQ achieves up to 30% higher throughput than CFQ, and
+up to 150% higher throughput than DEADLINE and NOOP, with all the
+sequential workloads considered in our tests. With random workloads,
+and with all the workloads on flash-based devices, BFQ achieves,
+instead, about the same throughput as the other schedulers.
+
+Strong fairness, bandwidth and delay guarantees
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+BFQ distributes the device throughput, and not just the device time,
+among I/O-bound applications in proportion their weights, with any
+workload and regardless of the device parameters. From these bandwidth
+guarantees, it is possible to compute tight per-I/O-request delay
+guarantees by a simple formula. If not configured for strict service
+guarantees, BFQ switches to time-based resource sharing (only) for
+applications that would otherwise cause a throughput loss.
+
+1-2 Server systems
+------------------
+
+Most benefits for server systems follow from the same service
+properties as above. In particular, regardless of whether additional,
+possibly heavy workloads are being served, BFQ guarantees:
+
+* audio and video-streaming with zero or very low jitter and drop
+  rate;
+
+* fast retrieval of WEB pages and embedded objects;
+
+* real-time recording of data in live-dumping applications (e.g.,
+  packet logging);
+
+* responsiveness in local and remote access to a server.
+
+
+2. How does BFQ work?
+=====================
+
+BFQ is a proportional-share I/O scheduler, whose general structure,
+plus a lot of code, are borrowed from CFQ.
+
+- Each process doing I/O on a device is associated with a weight and a
+  `(bfq_)queue`.
+
+- BFQ grants exclusive access to the device, for a while, to one queue
+  (process) at a time, and implements this service model by
+  associating every queue with a budget, measured in number of
+  sectors.
+
+  - After a queue is granted access to the device, the budget of the
+    queue is decremented, on each request dispatch, by the size of the
+    request.
+
+  - The in-service queue is expired, i.e., its service is suspended,
+    only if one of the following events occurs: 1) the queue finishes
+    its budget, 2) the queue empties, 3) a "budget timeout" fires.
+
+    - The budget timeout prevents processes doing random I/O from
+      holding the device for too long and dramatically reducing
+      throughput.
+
+    - Actually, as in CFQ, a queue associated with a process issuing
+      sync requests may not be expired immediately when it empties. In
+      contrast, BFQ may idle the device for a short time interval,
+      giving the process the chance to go on being served if it issues
+      a new request in time. Device idling typically boosts the
+      throughput on rotational devices and on non-queueing flash-based
+      devices, if processes do synchronous and sequential I/O. In
+      addition, under BFQ, device idling is also instrumental in
+      guaranteeing the desired throughput fraction to processes
+      issuing sync requests (see the description of the slice_idle
+      tunable in this document, or [1, 2], for more details).
+
+      - With respect to idling for service guarantees, if several
+	processes are competing for the device at the same time, but
+	all processes and groups have the same weight, then BFQ
+	guarantees the expected throughput distribution without ever
+	idling the device. Throughput is thus as high as possible in
+	this common scenario.
+
+     - On flash-based storage with internal queueing of commands
+       (typically NCQ), device idling happens to be always detrimental
+       for throughput. So, with these devices, BFQ performs idling
+       only when strictly needed for service guarantees, i.e., for
+       guaranteeing low latency or fairness. In these cases, overall
+       throughput may be sub-optimal. No solution currently exists to
+       provide both strong service guarantees and optimal throughput
+       on devices with internal queueing.
+
+  - If low-latency mode is enabled (default configuration), BFQ
+    executes some special heuristics to detect interactive and soft
+    real-time applications (e.g., video or audio players/streamers),
+    and to reduce their latency. The most important action taken to
+    achieve this goal is to give to the queues associated with these
+    applications more than their fair share of the device
+    throughput. For brevity, we call just "weight-raising" the whole
+    sets of actions taken by BFQ to privilege these queues. In
+    particular, BFQ provides a milder form of weight-raising for
+    interactive applications, and a stronger form for soft real-time
+    applications.
+
+  - BFQ automatically deactivates idling for queues born in a burst of
+    queue creations. In fact, these queues are usually associated with
+    the processes of applications and services that benefit mostly
+    from a high throughput. Examples are systemd during boot, or git
+    grep.
+
+  - As CFQ, BFQ merges queues performing interleaved I/O, i.e.,
+    performing random I/O that becomes mostly sequential if
+    merged. Differently from CFQ, BFQ achieves this goal with a more
+    reactive mechanism, called Early Queue Merge (EQM). EQM is so
+    responsive in detecting interleaved I/O (cooperating processes),
+    that it enables BFQ to achieve a high throughput, by queue
+    merging, even for queues for which CFQ needs a different
+    mechanism, preemption, to get a high throughput. As such EQM is a
+    unified mechanism to achieve a high throughput with interleaved
+    I/O.
+
+  - Queues are scheduled according to a variant of WF2Q+, named
+    B-WF2Q+, and implemented using an augmented rb-tree to preserve an
+    O(log N) overall complexity.  See [2] for more details. B-WF2Q+ is
+    also ready for hierarchical scheduling, details in Section 4.
+
+  - B-WF2Q+ guarantees a tight deviation with respect to an ideal,
+    perfectly fair, and smooth service. In particular, B-WF2Q+
+    guarantees that each queue receives a fraction of the device
+    throughput proportional to its weight, even if the throughput
+    fluctuates, and regardless of: the device parameters, the current
+    workload and the budgets assigned to the queue.
+
+  - The last, budget-independence, property (although probably
+    counterintuitive in the first place) is definitely beneficial, for
+    the following reasons:
+
+    - First, with any proportional-share scheduler, the maximum
+      deviation with respect to an ideal service is proportional to
+      the maximum budget (slice) assigned to queues. As a consequence,
+      BFQ can keep this deviation tight not only because of the
+      accurate service of B-WF2Q+, but also because BFQ *does not*
+      need to assign a larger budget to a queue to let the queue
+      receive a higher fraction of the device throughput.
+
+    - Second, BFQ is free to choose, for every process (queue), the
+      budget that best fits the needs of the process, or best
+      leverages the I/O pattern of the process. In particular, BFQ
+      updates queue budgets with a simple feedback-loop algorithm that
+      allows a high throughput to be achieved, while still providing
+      tight latency guarantees to time-sensitive applications. When
+      the in-service queue expires, this algorithm computes the next
+      budget of the queue so as to:
+
+      - Let large budgets be eventually assigned to the queues
+	associated with I/O-bound applications performing sequential
+	I/O: in fact, the longer these applications are served once
+	got access to the device, the higher the throughput is.
+
+      - Let small budgets be eventually assigned to the queues
+	associated with time-sensitive applications (which typically
+	perform sporadic and short I/O), because, the smaller the
+	budget assigned to a queue waiting for service is, the sooner
+	B-WF2Q+ will serve that queue (Subsec 3.3 in [2]).
+
+- If several processes are competing for the device at the same time,
+  but all processes and groups have the same weight, then BFQ
+  guarantees the expected throughput distribution without ever idling
+  the device. It uses preemption instead. Throughput is then much
+  higher in this common scenario.
+
+- ioprio classes are served in strict priority order, i.e.,
+  lower-priority queues are not served as long as there are
+  higher-priority queues.  Among queues in the same class, the
+  bandwidth is distributed in proportion to the weight of each
+  queue. A very thin extra bandwidth is however guaranteed to
+  the Idle class, to prevent it from starving.
+
+
+3. What are BFQ's tunables and how to properly configure BFQ?
+=============================================================
+
+Most BFQ tunables affect service guarantees (basically latency and
+fairness) and throughput. For full details on how to choose the
+desired tradeoff between service guarantees and throughput, see the
+parameters slice_idle, strict_guarantees and low_latency. For details
+on how to maximise throughput, see slice_idle, timeout_sync and
+max_budget. The other performance-related parameters have been
+inherited from, and have been preserved mostly for compatibility with
+CFQ. So far, no performance improvement has been reported after
+changing the latter parameters in BFQ.
+
+In particular, the tunables back_seek-max, back_seek_penalty,
+fifo_expire_async and fifo_expire_sync below are the same as in
+CFQ. Their description is just copied from that for CFQ. Some
+considerations in the description of slice_idle are copied from CFQ
+too.
+
+per-process ioprio and weight
+-----------------------------
+
+Unless the cgroups interface is used (see "4. BFQ group scheduling"),
+weights can be assigned to processes only indirectly, through I/O
+priorities, and according to the relation:
+weight = (IOPRIO_BE_NR - ioprio) * 10.
+
+Beware that, if low-latency is set, then BFQ automatically raises the
+weight of the queues associated with interactive and soft real-time
+applications. Unset this tunable if you need/want to control weights.
+
+slice_idle
+----------
+
+This parameter specifies how long BFQ should idle for next I/O
+request, when certain sync BFQ queues become empty. By default
+slice_idle is a non-zero value. Idling has a double purpose: boosting
+throughput and making sure that the desired throughput distribution is
+respected (see the description of how BFQ works, and, if needed, the
+papers referred there).
+
+As for throughput, idling can be very helpful on highly seeky media
+like single spindle SATA/SAS disks where we can cut down on overall
+number of seeks and see improved throughput.
+
+Setting slice_idle to 0 will remove all the idling on queues and one
+should see an overall improved throughput on faster storage devices
+like multiple SATA/SAS disks in hardware RAID configuration, as well
+as flash-based storage with internal command queueing (and
+parallelism).
+
+So depending on storage and workload, it might be useful to set
+slice_idle=0.  In general for SATA/SAS disks and software RAID of
+SATA/SAS disks keeping slice_idle enabled should be useful. For any
+configurations where there are multiple spindles behind single LUN
+(Host based hardware RAID controller or for storage arrays), or with
+flash-based fast storage, setting slice_idle=0 might end up in better
+throughput and acceptable latencies.
+
+Idling is however necessary to have service guarantees enforced in
+case of differentiated weights or differentiated I/O-request lengths.
+To see why, suppose that a given BFQ queue A must get several I/O
+requests served for each request served for another queue B. Idling
+ensures that, if A makes a new I/O request slightly after becoming
+empty, then no request of B is dispatched in the middle, and thus A
+does not lose the possibility to get more than one request dispatched
+before the next request of B is dispatched. Note that idling
+guarantees the desired differentiated treatment of queues only in
+terms of I/O-request dispatches. To guarantee that the actual service
+order then corresponds to the dispatch order, the strict_guarantees
+tunable must be set too.
+
+There is an important flipside for idling: apart from the above cases
+where it is beneficial also for throughput, idling can severely impact
+throughput. One important case is random workload. Because of this
+issue, BFQ tends to avoid idling as much as possible, when it is not
+beneficial also for throughput (as detailed in Section 2). As a
+consequence of this behavior, and of further issues described for the
+strict_guarantees tunable, short-term service guarantees may be
+occasionally violated. And, in some cases, these guarantees may be
+more important than guaranteeing maximum throughput. For example, in
+video playing/streaming, a very low drop rate may be more important
+than maximum throughput. In these cases, consider setting the
+strict_guarantees parameter.
+
+slice_idle_us
+-------------
+
+Controls the same tuning parameter as slice_idle, but in microseconds.
+Either tunable can be used to set idling behavior.  Afterwards, the
+other tunable will reflect the newly set value in sysfs.
+
+strict_guarantees
+-----------------
+
+If this parameter is set (default: unset), then BFQ
+
+- always performs idling when the in-service queue becomes empty;
+
+- forces the device to serve one I/O request at a time, by dispatching a
+  new request only if there is no outstanding request.
+
+In the presence of differentiated weights or I/O-request sizes, both
+the above conditions are needed to guarantee that every BFQ queue
+receives its allotted share of the bandwidth. The first condition is
+needed for the reasons explained in the description of the slice_idle
+tunable.  The second condition is needed because all modern storage
+devices reorder internally-queued requests, which may trivially break
+the service guarantees enforced by the I/O scheduler.
+
+Setting strict_guarantees may evidently affect throughput.
+
+back_seek_max
+-------------
+
+This specifies, given in Kbytes, the maximum "distance" for backward seeking.
+The distance is the amount of space from the current head location to the
+sectors that are backward in terms of distance.
+
+This parameter allows the scheduler to anticipate requests in the "backward"
+direction and consider them as being the "next" if they are within this
+distance from the current head location.
+
+back_seek_penalty
+-----------------
+
+This parameter is used to compute the cost of backward seeking. If the
+backward distance of request is just 1/back_seek_penalty from a "front"
+request, then the seeking cost of two requests is considered equivalent.
+
+So scheduler will not bias toward one or the other request (otherwise scheduler
+will bias toward front request). Default value of back_seek_penalty is 2.
+
+fifo_expire_async
+-----------------
+
+This parameter is used to set the timeout of asynchronous requests. Default
+value of this is 248ms.
+
+fifo_expire_sync
+----------------
+
+This parameter is used to set the timeout of synchronous requests. Default
+value of this is 124ms. In case to favor synchronous requests over asynchronous
+one, this value should be decreased relative to fifo_expire_async.
+
+low_latency
+-----------
+
+This parameter is used to enable/disable BFQ's low latency mode. By
+default, low latency mode is enabled. If enabled, interactive and soft
+real-time applications are privileged and experience a lower latency,
+as explained in more detail in the description of how BFQ works.
+
+DISABLE this mode if you need full control on bandwidth
+distribution. In fact, if it is enabled, then BFQ automatically
+increases the bandwidth share of privileged applications, as the main
+means to guarantee a lower latency to them.
+
+In addition, as already highlighted at the beginning of this document,
+DISABLE this mode if your only goal is to achieve a high throughput.
+In fact, privileging the I/O of some application over the rest may
+entail a lower throughput. To achieve the highest-possible throughput
+on a non-rotational device, setting slice_idle to 0 may be needed too
+(at the cost of giving up any strong guarantee on fairness and low
+latency).
+
+timeout_sync
+------------
+
+Maximum amount of device time that can be given to a task (queue) once
+it has been selected for service. On devices with costly seeks,
+increasing this time usually increases maximum throughput. On the
+opposite end, increasing this time coarsens the granularity of the
+short-term bandwidth and latency guarantees, especially if the
+following parameter is set to zero.
+
+max_budget
+----------
+
+Maximum amount of service, measured in sectors, that can be provided
+to a BFQ queue once it is set in service (of course within the limits
+of the above timeout). According to what said in the description of
+the algorithm, larger values increase the throughput in proportion to
+the percentage of sequential I/O requests issued. The price of larger
+values is that they coarsen the granularity of short-term bandwidth
+and latency guarantees.
+
+The default value is 0, which enables auto-tuning: BFQ sets max_budget
+to the maximum number of sectors that can be served during
+timeout_sync, according to the estimated peak rate.
+
+For specific devices, some users have occasionally reported to have
+reached a higher throughput by setting max_budget explicitly, i.e., by
+setting max_budget to a higher value than 0. In particular, they have
+set max_budget to higher values than those to which BFQ would have set
+it with auto-tuning. An alternative way to achieve this goal is to
+just increase the value of timeout_sync, leaving max_budget equal to 0.
+
+weights
+-------
+
+Read-only parameter, used to show the weights of the currently active
+BFQ queues.
+
+
+4. Group scheduling with BFQ
+============================
+
+BFQ supports both cgroups-v1 and cgroups-v2 io controllers, namely
+blkio and io. In particular, BFQ supports weight-based proportional
+share. To activate cgroups support, set BFQ_GROUP_IOSCHED.
+
+4-1 Service guarantees provided
+-------------------------------
+
+With BFQ, proportional share means true proportional share of the
+device bandwidth, according to group weights. For example, a group
+with weight 200 gets twice the bandwidth, and not just twice the time,
+of a group with weight 100.
+
+BFQ supports hierarchies (group trees) of any depth. Bandwidth is
+distributed among groups and processes in the expected way: for each
+group, the children of the group share the whole bandwidth of the
+group in proportion to their weights. In particular, this implies
+that, for each leaf group, every process of the group receives the
+same share of the whole group bandwidth, unless the ioprio of the
+process is modified.
+
+The resource-sharing guarantee for a group may partially or totally
+switch from bandwidth to time, if providing bandwidth guarantees to
+the group lowers the throughput too much. This switch occurs on a
+per-process basis: if a process of a leaf group causes throughput loss
+if served in such a way to receive its share of the bandwidth, then
+BFQ switches back to just time-based proportional share for that
+process.
+
+4-2 Interface
+-------------
+
+To get proportional sharing of bandwidth with BFQ for a given device,
+BFQ must of course be the active scheduler for that device.
+
+Within each group directory, the names of the files associated with
+BFQ-specific cgroup parameters and stats begin with the "bfq."
+prefix. So, with cgroups-v1 or cgroups-v2, the full prefix for
+BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group
+parameter to set the weight of a group with BFQ is blkio.bfq.weight
+or io.bfq.weight.
+
+As for cgroups-v1 (blkio controller), the exact set of stat files
+created, and kept up-to-date by bfq, depends on whether
+CONFIG_BFQ_CGROUP_DEBUG is set. If it is set, then bfq creates all
+the stat files documented in
+Documentation/cgroup-v1/blkio-controller.rst. If, instead,
+CONFIG_BFQ_CGROUP_DEBUG is not set, then bfq creates only the files::
+
+  blkio.bfq.io_service_bytes
+  blkio.bfq.io_service_bytes_recursive
+  blkio.bfq.io_serviced
+  blkio.bfq.io_serviced_recursive
+
+The value of CONFIG_BFQ_CGROUP_DEBUG greatly influences the maximum
+throughput sustainable with bfq, because updating the blkio.bfq.*
+stats is rather costly, especially for some of the stats enabled by
+CONFIG_BFQ_CGROUP_DEBUG.
+
+Parameters to set
+-----------------
+
+For each group, there is only the following parameter to set.
+
+weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the
+group inside its parent. Available values: 1..10000 (default 100). The
+linear mapping between ioprio and weights, described at the beginning
+of the tunable section, is still valid, but all weights higher than
+IOPRIO_BE_NR*10 are mapped to ioprio 0.
+
+Recall that, if low-latency is set, then BFQ automatically raises the
+weight of the queues associated with interactive and soft real-time
+applications. Unset this tunable if you need/want to control weights.
+
+
+[1]
+    P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
+    Scheduler", Proceedings of the First Workshop on Mobile System
+    Technologies (MST-2015), May 2015.
+
+    http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
+
+[2]
+    P. Valente and M. Andreolini, "Improving Application
+    Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of
+    the 5th Annual International Systems and Storage Conference
+    (SYSTOR '12), June 2012.
+
+    Slightly extended version:
+
+    http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite-results.pdf
+
+[3]
+   https://github.com/Algodev-github/S
diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
deleted file mode 100644
index bbd6eb5bbb07..000000000000
--- a/Documentation/block/bfq-iosched.txt
+++ /dev/null
@@ -1,583 +0,0 @@
-BFQ (Budget Fair Queueing)
-==========================
-
-BFQ is a proportional-share I/O scheduler, with some extra
-low-latency capabilities. In addition to cgroups support (blkio or io
-controllers), BFQ's main features are:
-- BFQ guarantees a high system and application responsiveness, and a
-  low latency for time-sensitive applications, such as audio or video
-  players;
-- BFQ distributes bandwidth, and not just time, among processes or
-  groups (switching back to time distribution when needed to keep
-  throughput high).
-
-In its default configuration, BFQ privileges latency over
-throughput. So, when needed for achieving a lower latency, BFQ builds
-schedules that may lead to a lower throughput. If your main or only
-goal, for a given device, is to achieve the maximum-possible
-throughput at all times, then do switch off all low-latency heuristics
-for that device, by setting low_latency to 0. See Section 3 for
-details on how to configure BFQ for the desired tradeoff between
-latency and throughput, or on how to maximize throughput.
-
-As every I/O scheduler, BFQ adds some overhead to per-I/O-request
-processing. To give an idea of this overhead, the total,
-single-lock-protected, per-request processing time of BFQ---i.e., the
-sum of the execution times of the request insertion, dispatch and
-completion hooks---is, e.g., 1.9 us on an Intel Core i7-2760QM@2.40GHz
-(dated CPU for notebooks; time measured with simple code
-instrumentation, and using the throughput-sync.sh script of the S
-suite [1], in performance-profiling mode). To put this result into
-context, the total, single-lock-protected, per-request execution time
-of the lightest I/O scheduler available in blk-mq, mq-deadline, is 0.7
-us (mq-deadline is ~800 LOC, against ~10500 LOC for BFQ).
-
-Scheduling overhead further limits the maximum IOPS that a CPU can
-process (already limited by the execution of the rest of the I/O
-stack). To give an idea of the limits with BFQ, on slow or average
-CPUs, here are, first, the limits of BFQ for three different CPUs, on,
-respectively, an average laptop, an old desktop, and a cheap embedded
-system, in case full hierarchical support is enabled (i.e.,
-CONFIG_BFQ_GROUP_IOSCHED is set), but CONFIG_BFQ_CGROUP_DEBUG is not
-set (Section 4-2):
-- Intel i7-4850HQ: 400 KIOPS
-- AMD A8-3850: 250 KIOPS
-- ARM CortexTM-A53 Octa-core: 80 KIOPS
-
-If CONFIG_BFQ_CGROUP_DEBUG is set (and of course full hierarchical
-support is enabled), then the sustainable throughput with BFQ
-decreases, because all blkio.bfq* statistics are created and updated
-(Section 4-2). For BFQ, this leads to the following maximum
-sustainable throughputs, on the same systems as above:
-- Intel i7-4850HQ: 310 KIOPS
-- AMD A8-3850: 200 KIOPS
-- ARM CortexTM-A53 Octa-core: 56 KIOPS
-
-BFQ works for multi-queue devices too.
-
-The table of contents follow. Impatients can just jump to Section 3.
-
-CONTENTS
-
-1. When may BFQ be useful?
- 1-1 Personal systems
- 1-2 Server systems
-2. How does BFQ work?
-3. What are BFQ's tunables and how to properly configure BFQ?
-4. BFQ group scheduling
- 4-1 Service guarantees provided
- 4-2 Interface
-
-1. When may BFQ be useful?
-==========================
-
-BFQ provides the following benefits on personal and server systems.
-
-1-1 Personal systems
---------------------
-
-Low latency for interactive applications
-
-Regardless of the actual background workload, BFQ guarantees that, for
-interactive tasks, the storage device is virtually as responsive as if
-it was idle. For example, even if one or more of the following
-background workloads are being executed:
-- one or more large files are being read, written or copied,
-- a tree of source files is being compiled,
-- one or more virtual machines are performing I/O,
-- a software update is in progress,
-- indexing daemons are scanning filesystems and updating their
-  databases,
-starting an application or loading a file from within an application
-takes about the same time as if the storage device was idle. As a
-comparison, with CFQ, NOOP or DEADLINE, and in the same conditions,
-applications experience high latencies, or even become unresponsive
-until the background workload terminates (also on SSDs).
-
-Low latency for soft real-time applications
-
-Also soft real-time applications, such as audio and video
-players/streamers, enjoy a low latency and a low drop rate, regardless
-of the background I/O workload. As a consequence, these applications
-do not suffer from almost any glitch due to the background workload.
-
-Higher speed for code-development tasks
-
-If some additional workload happens to be executed in parallel, then
-BFQ executes the I/O-related components of typical code-development
-tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
-NOOP or DEADLINE.
-
-High throughput
-
-On hard disks, BFQ achieves up to 30% higher throughput than CFQ, and
-up to 150% higher throughput than DEADLINE and NOOP, with all the
-sequential workloads considered in our tests. With random workloads,
-and with all the workloads on flash-based devices, BFQ achieves,
-instead, about the same throughput as the other schedulers.
-
-Strong fairness, bandwidth and delay guarantees
-
-BFQ distributes the device throughput, and not just the device time,
-among I/O-bound applications in proportion their weights, with any
-workload and regardless of the device parameters. From these bandwidth
-guarantees, it is possible to compute tight per-I/O-request delay
-guarantees by a simple formula. If not configured for strict service
-guarantees, BFQ switches to time-based resource sharing (only) for
-applications that would otherwise cause a throughput loss.
-
-1-2 Server systems
-------------------
-
-Most benefits for server systems follow from the same service
-properties as above. In particular, regardless of whether additional,
-possibly heavy workloads are being served, BFQ guarantees:
-
-. audio and video-streaming with zero or very low jitter and drop
-  rate;
-
-. fast retrieval of WEB pages and embedded objects;
-
-. real-time recording of data in live-dumping applications (e.g.,
-  packet logging);
-
-. responsiveness in local and remote access to a server.
-
-
-2. How does BFQ work?
-=====================
-
-BFQ is a proportional-share I/O scheduler, whose general structure,
-plus a lot of code, are borrowed from CFQ.
-
-- Each process doing I/O on a device is associated with a weight and a
-  (bfq_)queue.
-
-- BFQ grants exclusive access to the device, for a while, to one queue
-  (process) at a time, and implements this service model by
-  associating every queue with a budget, measured in number of
-  sectors.
-
-  - After a queue is granted access to the device, the budget of the
-    queue is decremented, on each request dispatch, by the size of the
-    request.
-
-  - The in-service queue is expired, i.e., its service is suspended,
-    only if one of the following events occurs: 1) the queue finishes
-    its budget, 2) the queue empties, 3) a "budget timeout" fires.
-
-    - The budget timeout prevents processes doing random I/O from
-      holding the device for too long and dramatically reducing
-      throughput.
-
-    - Actually, as in CFQ, a queue associated with a process issuing
-      sync requests may not be expired immediately when it empties. In
-      contrast, BFQ may idle the device for a short time interval,
-      giving the process the chance to go on being served if it issues
-      a new request in time. Device idling typically boosts the
-      throughput on rotational devices and on non-queueing flash-based
-      devices, if processes do synchronous and sequential I/O. In
-      addition, under BFQ, device idling is also instrumental in
-      guaranteeing the desired throughput fraction to processes
-      issuing sync requests (see the description of the slice_idle
-      tunable in this document, or [1, 2], for more details).
-
-      - With respect to idling for service guarantees, if several
-	processes are competing for the device at the same time, but
-	all processes and groups have the same weight, then BFQ
-	guarantees the expected throughput distribution without ever
-	idling the device. Throughput is thus as high as possible in
-	this common scenario.
-
-     - On flash-based storage with internal queueing of commands
-       (typically NCQ), device idling happens to be always detrimental
-       for throughput. So, with these devices, BFQ performs idling
-       only when strictly needed for service guarantees, i.e., for
-       guaranteeing low latency or fairness. In these cases, overall
-       throughput may be sub-optimal. No solution currently exists to
-       provide both strong service guarantees and optimal throughput
-       on devices with internal queueing.
-
-  - If low-latency mode is enabled (default configuration), BFQ
-    executes some special heuristics to detect interactive and soft
-    real-time applications (e.g., video or audio players/streamers),
-    and to reduce their latency. The most important action taken to
-    achieve this goal is to give to the queues associated with these
-    applications more than their fair share of the device
-    throughput. For brevity, we call just "weight-raising" the whole
-    sets of actions taken by BFQ to privilege these queues. In
-    particular, BFQ provides a milder form of weight-raising for
-    interactive applications, and a stronger form for soft real-time
-    applications.
-
-  - BFQ automatically deactivates idling for queues born in a burst of
-    queue creations. In fact, these queues are usually associated with
-    the processes of applications and services that benefit mostly
-    from a high throughput. Examples are systemd during boot, or git
-    grep.
-
-  - As CFQ, BFQ merges queues performing interleaved I/O, i.e.,
-    performing random I/O that becomes mostly sequential if
-    merged. Differently from CFQ, BFQ achieves this goal with a more
-    reactive mechanism, called Early Queue Merge (EQM). EQM is so
-    responsive in detecting interleaved I/O (cooperating processes),
-    that it enables BFQ to achieve a high throughput, by queue
-    merging, even for queues for which CFQ needs a different
-    mechanism, preemption, to get a high throughput. As such EQM is a
-    unified mechanism to achieve a high throughput with interleaved
-    I/O.
-
-  - Queues are scheduled according to a variant of WF2Q+, named
-    B-WF2Q+, and implemented using an augmented rb-tree to preserve an
-    O(log N) overall complexity.  See [2] for more details. B-WF2Q+ is
-    also ready for hierarchical scheduling, details in Section 4.
-
-  - B-WF2Q+ guarantees a tight deviation with respect to an ideal,
-    perfectly fair, and smooth service. In particular, B-WF2Q+
-    guarantees that each queue receives a fraction of the device
-    throughput proportional to its weight, even if the throughput
-    fluctuates, and regardless of: the device parameters, the current
-    workload and the budgets assigned to the queue.
-
-  - The last, budget-independence, property (although probably
-    counterintuitive in the first place) is definitely beneficial, for
-    the following reasons:
-
-    - First, with any proportional-share scheduler, the maximum
-      deviation with respect to an ideal service is proportional to
-      the maximum budget (slice) assigned to queues. As a consequence,
-      BFQ can keep this deviation tight not only because of the
-      accurate service of B-WF2Q+, but also because BFQ *does not*
-      need to assign a larger budget to a queue to let the queue
-      receive a higher fraction of the device throughput.
-
-    - Second, BFQ is free to choose, for every process (queue), the
-      budget that best fits the needs of the process, or best
-      leverages the I/O pattern of the process. In particular, BFQ
-      updates queue budgets with a simple feedback-loop algorithm that
-      allows a high throughput to be achieved, while still providing
-      tight latency guarantees to time-sensitive applications. When
-      the in-service queue expires, this algorithm computes the next
-      budget of the queue so as to:
-
-      - Let large budgets be eventually assigned to the queues
-	associated with I/O-bound applications performing sequential
-	I/O: in fact, the longer these applications are served once
-	got access to the device, the higher the throughput is.
-
-      - Let small budgets be eventually assigned to the queues
-	associated with time-sensitive applications (which typically
-	perform sporadic and short I/O), because, the smaller the
-	budget assigned to a queue waiting for service is, the sooner
-	B-WF2Q+ will serve that queue (Subsec 3.3 in [2]).
-
-- If several processes are competing for the device at the same time,
-  but all processes and groups have the same weight, then BFQ
-  guarantees the expected throughput distribution without ever idling
-  the device. It uses preemption instead. Throughput is then much
-  higher in this common scenario.
-
-- ioprio classes are served in strict priority order, i.e.,
-  lower-priority queues are not served as long as there are
-  higher-priority queues.  Among queues in the same class, the
-  bandwidth is distributed in proportion to the weight of each
-  queue. A very thin extra bandwidth is however guaranteed to
-  the Idle class, to prevent it from starving.
-
-
-3. What are BFQ's tunables and how to properly configure BFQ?
-=============================================================
-
-Most BFQ tunables affect service guarantees (basically latency and
-fairness) and throughput. For full details on how to choose the
-desired tradeoff between service guarantees and throughput, see the
-parameters slice_idle, strict_guarantees and low_latency. For details
-on how to maximise throughput, see slice_idle, timeout_sync and
-max_budget. The other performance-related parameters have been
-inherited from, and have been preserved mostly for compatibility with
-CFQ. So far, no performance improvement has been reported after
-changing the latter parameters in BFQ.
-
-In particular, the tunables back_seek-max, back_seek_penalty,
-fifo_expire_async and fifo_expire_sync below are the same as in
-CFQ. Their description is just copied from that for CFQ. Some
-considerations in the description of slice_idle are copied from CFQ
-too.
-
-per-process ioprio and weight
------------------------------
-
-Unless the cgroups interface is used (see "4. BFQ group scheduling"),
-weights can be assigned to processes only indirectly, through I/O
-priorities, and according to the relation:
-weight = (IOPRIO_BE_NR - ioprio) * 10.
-
-Beware that, if low-latency is set, then BFQ automatically raises the
-weight of the queues associated with interactive and soft real-time
-applications. Unset this tunable if you need/want to control weights.
-
-slice_idle
-----------
-
-This parameter specifies how long BFQ should idle for next I/O
-request, when certain sync BFQ queues become empty. By default
-slice_idle is a non-zero value. Idling has a double purpose: boosting
-throughput and making sure that the desired throughput distribution is
-respected (see the description of how BFQ works, and, if needed, the
-papers referred there).
-
-As for throughput, idling can be very helpful on highly seeky media
-like single spindle SATA/SAS disks where we can cut down on overall
-number of seeks and see improved throughput.
-
-Setting slice_idle to 0 will remove all the idling on queues and one
-should see an overall improved throughput on faster storage devices
-like multiple SATA/SAS disks in hardware RAID configuration, as well
-as flash-based storage with internal command queueing (and
-parallelism).
-
-So depending on storage and workload, it might be useful to set
-slice_idle=0.  In general for SATA/SAS disks and software RAID of
-SATA/SAS disks keeping slice_idle enabled should be useful. For any
-configurations where there are multiple spindles behind single LUN
-(Host based hardware RAID controller or for storage arrays), or with
-flash-based fast storage, setting slice_idle=0 might end up in better
-throughput and acceptable latencies.
-
-Idling is however necessary to have service guarantees enforced in
-case of differentiated weights or differentiated I/O-request lengths.
-To see why, suppose that a given BFQ queue A must get several I/O
-requests served for each request served for another queue B. Idling
-ensures that, if A makes a new I/O request slightly after becoming
-empty, then no request of B is dispatched in the middle, and thus A
-does not lose the possibility to get more than one request dispatched
-before the next request of B is dispatched. Note that idling
-guarantees the desired differentiated treatment of queues only in
-terms of I/O-request dispatches. To guarantee that the actual service
-order then corresponds to the dispatch order, the strict_guarantees
-tunable must be set too.
-
-There is an important flipside for idling: apart from the above cases
-where it is beneficial also for throughput, idling can severely impact
-throughput. One important case is random workload. Because of this
-issue, BFQ tends to avoid idling as much as possible, when it is not
-beneficial also for throughput (as detailed in Section 2). As a
-consequence of this behavior, and of further issues described for the
-strict_guarantees tunable, short-term service guarantees may be
-occasionally violated. And, in some cases, these guarantees may be
-more important than guaranteeing maximum throughput. For example, in
-video playing/streaming, a very low drop rate may be more important
-than maximum throughput. In these cases, consider setting the
-strict_guarantees parameter.
-
-slice_idle_us
--------------
-
-Controls the same tuning parameter as slice_idle, but in microseconds.
-Either tunable can be used to set idling behavior.  Afterwards, the
-other tunable will reflect the newly set value in sysfs.
-
-strict_guarantees
------------------
-
-If this parameter is set (default: unset), then BFQ
-
-- always performs idling when the in-service queue becomes empty;
-
-- forces the device to serve one I/O request at a time, by dispatching a
-  new request only if there is no outstanding request.
-
-In the presence of differentiated weights or I/O-request sizes, both
-the above conditions are needed to guarantee that every BFQ queue
-receives its allotted share of the bandwidth. The first condition is
-needed for the reasons explained in the description of the slice_idle
-tunable.  The second condition is needed because all modern storage
-devices reorder internally-queued requests, which may trivially break
-the service guarantees enforced by the I/O scheduler.
-
-Setting strict_guarantees may evidently affect throughput.
-
-back_seek_max
--------------
-
-This specifies, given in Kbytes, the maximum "distance" for backward seeking.
-The distance is the amount of space from the current head location to the
-sectors that are backward in terms of distance.
-
-This parameter allows the scheduler to anticipate requests in the "backward"
-direction and consider them as being the "next" if they are within this
-distance from the current head location.
-
-back_seek_penalty
------------------
-
-This parameter is used to compute the cost of backward seeking. If the
-backward distance of request is just 1/back_seek_penalty from a "front"
-request, then the seeking cost of two requests is considered equivalent.
-
-So scheduler will not bias toward one or the other request (otherwise scheduler
-will bias toward front request). Default value of back_seek_penalty is 2.
-
-fifo_expire_async
------------------
-
-This parameter is used to set the timeout of asynchronous requests. Default
-value of this is 248ms.
-
-fifo_expire_sync
-----------------
-
-This parameter is used to set the timeout of synchronous requests. Default
-value of this is 124ms. In case to favor synchronous requests over asynchronous
-one, this value should be decreased relative to fifo_expire_async.
-
-low_latency
------------
-
-This parameter is used to enable/disable BFQ's low latency mode. By
-default, low latency mode is enabled. If enabled, interactive and soft
-real-time applications are privileged and experience a lower latency,
-as explained in more detail in the description of how BFQ works.
-
-DISABLE this mode if you need full control on bandwidth
-distribution. In fact, if it is enabled, then BFQ automatically
-increases the bandwidth share of privileged applications, as the main
-means to guarantee a lower latency to them.
-
-In addition, as already highlighted at the beginning of this document,
-DISABLE this mode if your only goal is to achieve a high throughput.
-In fact, privileging the I/O of some application over the rest may
-entail a lower throughput. To achieve the highest-possible throughput
-on a non-rotational device, setting slice_idle to 0 may be needed too
-(at the cost of giving up any strong guarantee on fairness and low
-latency).
-
-timeout_sync
-------------
-
-Maximum amount of device time that can be given to a task (queue) once
-it has been selected for service. On devices with costly seeks,
-increasing this time usually increases maximum throughput. On the
-opposite end, increasing this time coarsens the granularity of the
-short-term bandwidth and latency guarantees, especially if the
-following parameter is set to zero.
-
-max_budget
-----------
-
-Maximum amount of service, measured in sectors, that can be provided
-to a BFQ queue once it is set in service (of course within the limits
-of the above timeout). According to what said in the description of
-the algorithm, larger values increase the throughput in proportion to
-the percentage of sequential I/O requests issued. The price of larger
-values is that they coarsen the granularity of short-term bandwidth
-and latency guarantees.
-
-The default value is 0, which enables auto-tuning: BFQ sets max_budget
-to the maximum number of sectors that can be served during
-timeout_sync, according to the estimated peak rate.
-
-For specific devices, some users have occasionally reported to have
-reached a higher throughput by setting max_budget explicitly, i.e., by
-setting max_budget to a higher value than 0. In particular, they have
-set max_budget to higher values than those to which BFQ would have set
-it with auto-tuning. An alternative way to achieve this goal is to
-just increase the value of timeout_sync, leaving max_budget equal to 0.
-
-weights
--------
-
-Read-only parameter, used to show the weights of the currently active
-BFQ queues.
-
-
-4. Group scheduling with BFQ
-============================
-
-BFQ supports both cgroups-v1 and cgroups-v2 io controllers, namely
-blkio and io. In particular, BFQ supports weight-based proportional
-share. To activate cgroups support, set BFQ_GROUP_IOSCHED.
-
-4-1 Service guarantees provided
--------------------------------
-
-With BFQ, proportional share means true proportional share of the
-device bandwidth, according to group weights. For example, a group
-with weight 200 gets twice the bandwidth, and not just twice the time,
-of a group with weight 100.
-
-BFQ supports hierarchies (group trees) of any depth. Bandwidth is
-distributed among groups and processes in the expected way: for each
-group, the children of the group share the whole bandwidth of the
-group in proportion to their weights. In particular, this implies
-that, for each leaf group, every process of the group receives the
-same share of the whole group bandwidth, unless the ioprio of the
-process is modified.
-
-The resource-sharing guarantee for a group may partially or totally
-switch from bandwidth to time, if providing bandwidth guarantees to
-the group lowers the throughput too much. This switch occurs on a
-per-process basis: if a process of a leaf group causes throughput loss
-if served in such a way to receive its share of the bandwidth, then
-BFQ switches back to just time-based proportional share for that
-process.
-
-4-2 Interface
--------------
-
-To get proportional sharing of bandwidth with BFQ for a given device,
-BFQ must of course be the active scheduler for that device.
-
-Within each group directory, the names of the files associated with
-BFQ-specific cgroup parameters and stats begin with the "bfq."
-prefix. So, with cgroups-v1 or cgroups-v2, the full prefix for
-BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group
-parameter to set the weight of a group with BFQ is blkio.bfq.weight
-or io.bfq.weight.
-
-As for cgroups-v1 (blkio controller), the exact set of stat files
-created, and kept up-to-date by bfq, depends on whether
-CONFIG_BFQ_CGROUP_DEBUG is set. If it is set, then bfq creates all
-the stat files documented in
-Documentation/cgroup-v1/blkio-controller.rst. If, instead,
-CONFIG_BFQ_CGROUP_DEBUG is not set, then bfq creates only the files
-blkio.bfq.io_service_bytes
-blkio.bfq.io_service_bytes_recursive
-blkio.bfq.io_serviced
-blkio.bfq.io_serviced_recursive
-
-The value of CONFIG_BFQ_CGROUP_DEBUG greatly influences the maximum
-throughput sustainable with bfq, because updating the blkio.bfq.*
-stats is rather costly, especially for some of the stats enabled by
-CONFIG_BFQ_CGROUP_DEBUG.
-
-Parameters to set
------------------
-
-For each group, there is only the following parameter to set.
-
-weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the
-group inside its parent. Available values: 1..10000 (default 100). The
-linear mapping between ioprio and weights, described at the beginning
-of the tunable section, is still valid, but all weights higher than
-IOPRIO_BE_NR*10 are mapped to ioprio 0.
-
-Recall that, if low-latency is set, then BFQ automatically raises the
-weight of the queues associated with interactive and soft real-time
-applications. Unset this tunable if you need/want to control weights.
-
-
-[1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
-    Scheduler", Proceedings of the First Workshop on Mobile System
-    Technologies (MST-2015), May 2015.
-    http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
-
-[2] P. Valente and M. Andreolini, "Improving Application
-    Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of
-    the 5th Annual International Systems and Storage Conference
-    (SYSTOR '12), June 2012.
-    Slightly extended version:
-    http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite-
-							results.pdf
-
-[3] https://github.com/Algodev-github/S
diff --git a/Documentation/block/biodoc.rst b/Documentation/block/biodoc.rst
new file mode 100644
index 000000000000..d6e30b680405
--- /dev/null
+++ b/Documentation/block/biodoc.rst
@@ -0,0 +1,1168 @@
+=====================================================
+Notes on the Generic Block Layer Rewrite in Linux 2.5
+=====================================================
+
+.. note::
+
+	It seems that there are lot of outdated stuff here. This seems
+	to be written somewhat as a task list. Yet, eventually, something
+	here might still be useful.
+
+Notes Written on Jan 15, 2002:
+	- Jens Axboe <jens.axboe@oracle.com>
+	- Suparna Bhattacharya <suparna@in.ibm.com>
+
+Last Updated May 2, 2002
+
+September 2003: Updated I/O Scheduler portions
+	- Nick Piggin <npiggin@kernel.dk>
+
+Introduction
+============
+
+These are some notes describing some aspects of the 2.5 block layer in the
+context of the bio rewrite. The idea is to bring out some of the key
+changes and a glimpse of the rationale behind those changes.
+
+Please mail corrections & suggestions to suparna@in.ibm.com.
+
+Credits
+=======
+
+2.5 bio rewrite:
+	- Jens Axboe <jens.axboe@oracle.com>
+
+Many aspects of the generic block layer redesign were driven by and evolved
+over discussions, prior patches and the collective experience of several
+people. See sections 8 and 9 for a list of some related references.
+
+The following people helped with review comments and inputs for this
+document:
+
+	- Christoph Hellwig <hch@infradead.org>
+	- Arjan van de Ven <arjanv@redhat.com>
+	- Randy Dunlap <rdunlap@xenotime.net>
+	- Andre Hedrick <andre@linux-ide.org>
+
+The following people helped with fixes/contributions to the bio patches
+while it was still work-in-progress:
+
+	- David S. Miller <davem@redhat.com>
+
+
+.. Description of Contents:
+
+   1. Scope for tuning of logic to various needs
+     1.1 Tuning based on device or low level driver capabilities
+	- Per-queue parameters
+	- Highmem I/O support
+	- I/O scheduler modularization
+     1.2 Tuning based on high level requirements/capabilities
+	1.2.1 Request Priority/Latency
+     1.3 Direct access/bypass to lower layers for diagnostics and special
+	 device operations
+	1.3.1 Pre-built commands
+   2. New flexible and generic but minimalist i/o structure or descriptor
+      (instead of using buffer heads at the i/o layer)
+     2.1 Requirements/Goals addressed
+     2.2 The bio struct in detail (multi-page io unit)
+     2.3 Changes in the request structure
+   3. Using bios
+     3.1 Setup/teardown (allocation, splitting)
+     3.2 Generic bio helper routines
+       3.2.1 Traversing segments and completion units in a request
+       3.2.2 Setting up DMA scatterlists
+       3.2.3 I/O completion
+       3.2.4 Implications for drivers that do not interpret bios (don't handle
+	  multiple segments)
+     3.3 I/O submission
+   4. The I/O scheduler
+   5. Scalability related changes
+     5.1 Granular locking: Removal of io_request_lock
+     5.2 Prepare for transition to 64 bit sector_t
+   6. Other Changes/Implications
+     6.1 Partition re-mapping handled by the generic block layer
+   7. A few tips on migration of older drivers
+   8. A list of prior/related/impacted patches/ideas
+   9. Other References/Discussion Threads
+
+
+Bio Notes
+=========
+
+Let us discuss the changes in the context of how some overall goals for the
+block layer are addressed.
+
+1. Scope for tuning the generic logic to satisfy various requirements
+=====================================================================
+
+The block layer design supports adaptable abstractions to handle common
+processing with the ability to tune the logic to an appropriate extent
+depending on the nature of the device and the requirements of the caller.
+One of the objectives of the rewrite was to increase the degree of tunability
+and to enable higher level code to utilize underlying device/driver
+capabilities to the maximum extent for better i/o performance. This is
+important especially in the light of ever improving hardware capabilities
+and application/middleware software designed to take advantage of these
+capabilities.
+
+1.1 Tuning based on low level device / driver capabilities
+----------------------------------------------------------
+
+Sophisticated devices with large built-in caches, intelligent i/o scheduling
+optimizations, high memory DMA support, etc may find some of the
+generic processing an overhead, while for less capable devices the
+generic functionality is essential for performance or correctness reasons.
+Knowledge of some of the capabilities or parameters of the device should be
+used at the generic block layer to take the right decisions on
+behalf of the driver.
+
+How is this achieved ?
+
+Tuning at a per-queue level:
+
+i. Per-queue limits/values exported to the generic layer by the driver
+
+Various parameters that the generic i/o scheduler logic uses are set at
+a per-queue level (e.g maximum request size, maximum number of segments in
+a scatter-gather list, logical block size)
+
+Some parameters that were earlier available as global arrays indexed by
+major/minor are now directly associated with the queue. Some of these may
+move into the block device structure in the future. Some characteristics
+have been incorporated into a queue flags field rather than separate fields
+in themselves.  There are blk_queue_xxx functions to set the parameters,
+rather than update the fields directly
+
+Some new queue property settings:
+
+	blk_queue_bounce_limit(q, u64 dma_address)
+		Enable I/O to highmem pages, dma_address being the
+		limit. No highmem default.
+
+	blk_queue_max_sectors(q, max_sectors)
+		Sets two variables that limit the size of the request.
+
+		- The request queue's max_sectors, which is a soft size in
+		  units of 512 byte sectors, and could be dynamically varied
+		  by the core kernel.
+
+		- The request queue's max_hw_sectors, which is a hard limit
+		  and reflects the maximum size request a driver can handle
+		  in units of 512 byte sectors.
+
+		The default for both max_sectors and max_hw_sectors is
+		255. The upper limit of max_sectors is 1024.
+
+	blk_queue_max_phys_segments(q, max_segments)
+		Maximum physical segments you can handle in a request. 128
+		default (driver limit). (See 3.2.2)
+
+	blk_queue_max_hw_segments(q, max_segments)
+		Maximum dma segments the hardware can handle in a request. 128
+		default (host adapter limit, after dma remapping).
+		(See 3.2.2)
+
+	blk_queue_max_segment_size(q, max_seg_size)
+		Maximum size of a clustered segment, 64kB default.
+
+	blk_queue_logical_block_size(q, logical_block_size)
+		Lowest possible sector size that the hardware can operate
+		on, 512 bytes default.
+
+New queue flags:
+
+	QUEUE_FLAG_CLUSTER (see 3.2.2)
+	QUEUE_FLAG_QUEUED (see 3.2.4)
+
+
+ii. High-mem i/o capabilities are now considered the default
+
+The generic bounce buffer logic, present in 2.4, where the block layer would
+by default copyin/out i/o requests on high-memory buffers to low-memory buffers
+assuming that the driver wouldn't be able to handle it directly, has been
+changed in 2.5. The bounce logic is now applied only for memory ranges
+for which the device cannot handle i/o. A driver can specify this by
+setting the queue bounce limit for the request queue for the device
+(blk_queue_bounce_limit()). This avoids the inefficiencies of the copyin/out
+where a device is capable of handling high memory i/o.
+
+In order to enable high-memory i/o where the device is capable of supporting
+it, the pci dma mapping routines and associated data structures have now been
+modified to accomplish a direct page -> bus translation, without requiring
+a virtual address mapping (unlike the earlier scheme of virtual address
+-> bus translation). So this works uniformly for high-memory pages (which
+do not have a corresponding kernel virtual address space mapping) and
+low-memory pages.
+
+Note: Please refer to Documentation/DMA-API-HOWTO.txt for a discussion
+on PCI high mem DMA aspects and mapping of scatter gather lists, and support
+for 64 bit PCI.
+
+Special handling is required only for cases where i/o needs to happen on
+pages at physical memory addresses beyond what the device can support. In these
+cases, a bounce bio representing a buffer from the supported memory range
+is used for performing the i/o with copyin/copyout as needed depending on
+the type of the operation.  For example, in case of a read operation, the
+data read has to be copied to the original buffer on i/o completion, so a
+callback routine is set up to do this, while for write, the data is copied
+from the original buffer to the bounce buffer prior to issuing the
+operation. Since an original buffer may be in a high memory area that's not
+mapped in kernel virtual addr, a kmap operation may be required for
+performing the copy, and special care may be needed in the completion path
+as it may not be in irq context. Special care is also required (by way of
+GFP flags) when allocating bounce buffers, to avoid certain highmem
+deadlock possibilities.
+
+It is also possible that a bounce buffer may be allocated from high-memory
+area that's not mapped in kernel virtual addr, but within the range that the
+device can use directly; so the bounce page may need to be kmapped during
+copy operations. [Note: This does not hold in the current implementation,
+though]
+
+There are some situations when pages from high memory may need to
+be kmapped, even if bounce buffers are not necessary. For example a device
+may need to abort DMA operations and revert to PIO for the transfer, in
+which case a virtual mapping of the page is required. For SCSI it is also
+done in some scenarios where the low level driver cannot be trusted to
+handle a single sg entry correctly. The driver is expected to perform the
+kmaps as needed on such occasions as appropriate. A driver could also use
+the blk_queue_bounce() routine on its own to bounce highmem i/o to low
+memory for specific requests if so desired.
+
+iii. The i/o scheduler algorithm itself can be replaced/set as appropriate
+
+As in 2.4, it is possible to plugin a brand new i/o scheduler for a particular
+queue or pick from (copy) existing generic schedulers and replace/override
+certain portions of it. The 2.5 rewrite provides improved modularization
+of the i/o scheduler. There are more pluggable callbacks, e.g for init,
+add request, extract request, which makes it possible to abstract specific
+i/o scheduling algorithm aspects and details outside of the generic loop.
+It also makes it possible to completely hide the implementation details of
+the i/o scheduler from block drivers.
+
+I/O scheduler wrappers are to be used instead of accessing the queue directly.
+See section 4. The I/O scheduler for details.
+
+1.2 Tuning Based on High level code capabilities
+------------------------------------------------
+
+i. Application capabilities for raw i/o
+
+This comes from some of the high-performance database/middleware
+requirements where an application prefers to make its own i/o scheduling
+decisions based on an understanding of the access patterns and i/o
+characteristics
+
+ii. High performance filesystems or other higher level kernel code's
+capabilities
+
+Kernel components like filesystems could also take their own i/o scheduling
+decisions for optimizing performance. Journalling filesystems may need
+some control over i/o ordering.
+
+What kind of support exists at the generic block layer for this ?
+
+The flags and rw fields in the bio structure can be used for some tuning
+from above e.g indicating that an i/o is just a readahead request, or priority
+settings (currently unused). As far as user applications are concerned they
+would need an additional mechanism either via open flags or ioctls, or some
+other upper level mechanism to communicate such settings to block.
+
+1.2.1 Request Priority/Latency
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Todo/Under discussion::
+
+  Arjan's proposed request priority scheme allows higher levels some broad
+  control (high/med/low) over the priority  of an i/o request vs other pending
+  requests in the queue. For example it allows reads for bringing in an
+  executable page on demand to be given a higher priority over pending write
+  requests which haven't aged too much on the queue. Potentially this priority
+  could even be exposed to applications in some manner, providing higher level
+  tunability. Time based aging avoids starvation of lower priority
+  requests. Some bits in the bi_opf flags field in the bio structure are
+  intended to be used for this priority information.
+
+
+1.3 Direct Access to Low level Device/Driver Capabilities (Bypass mode)
+-----------------------------------------------------------------------
+
+(e.g Diagnostics, Systems Management)
+
+There are situations where high-level code needs to have direct access to
+the low level device capabilities or requires the ability to issue commands
+to the device bypassing some of the intermediate i/o layers.
+These could, for example, be special control commands issued through ioctl
+interfaces, or could be raw read/write commands that stress the drive's
+capabilities for certain kinds of fitness tests. Having direct interfaces at
+multiple levels without having to pass through upper layers makes
+it possible to perform bottom up validation of the i/o path, layer by
+layer, starting from the media.
+
+The normal i/o submission interfaces, e.g submit_bio, could be bypassed
+for specially crafted requests which such ioctl or diagnostics
+interfaces would typically use, and the elevator add_request routine
+can instead be used to directly insert such requests in the queue or preferably
+the blk_do_rq routine can be used to place the request on the queue and
+wait for completion. Alternatively, sometimes the caller might just
+invoke a lower level driver specific interface with the request as a
+parameter.
+
+If the request is a means for passing on special information associated with
+the command, then such information is associated with the request->special
+field (rather than misuse the request->buffer field which is meant for the
+request data buffer's virtual mapping).
+
+For passing request data, the caller must build up a bio descriptor
+representing the concerned memory buffer if the underlying driver interprets
+bio segments or uses the block layer end*request* functions for i/o
+completion. Alternatively one could directly use the request->buffer field to
+specify the virtual address of the buffer, if the driver expects buffer
+addresses passed in this way and ignores bio entries for the request type
+involved. In the latter case, the driver would modify and manage the
+request->buffer, request->sector and request->nr_sectors or
+request->current_nr_sectors fields itself rather than using the block layer
+end_request or end_that_request_first completion interfaces.
+(See 2.3 or Documentation/block/request.rst for a brief explanation of
+the request structure fields)
+
+::
+
+  [TBD: end_that_request_last should be usable even in this case;
+  Perhaps an end_that_direct_request_first routine could be implemented to make
+  handling direct requests easier for such drivers; Also for drivers that
+  expect bios, a helper function could be provided for setting up a bio
+  corresponding to a data buffer]
+
+  <JENS: I dont understand the above, why is end_that_request_first() not
+  usable? Or _last for that matter. I must be missing something>
+
+  <SUP: What I meant here was that if the request doesn't have a bio, then
+   end_that_request_first doesn't modify nr_sectors or current_nr_sectors,
+   and hence can't be used for advancing request state settings on the
+   completion of partial transfers. The driver has to modify these fields
+   directly by hand.
+   This is because end_that_request_first only iterates over the bio list,
+   and always returns 0 if there are none associated with the request.
+   _last works OK in this case, and is not a problem, as I mentioned earlier
+  >
+
+1.3.1 Pre-built Commands
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+A request can be created with a pre-built custom command  to be sent directly
+to the device. The cmd block in the request structure has room for filling
+in the command bytes. (i.e rq->cmd is now 16 bytes in size, and meant for
+command pre-building, and the type of the request is now indicated
+through rq->flags instead of via rq->cmd)
+
+The request structure flags can be set up to indicate the type of request
+in such cases (REQ_PC: direct packet command passed to driver, REQ_BLOCK_PC:
+packet command issued via blk_do_rq, REQ_SPECIAL: special request).
+
+It can help to pre-build device commands for requests in advance.
+Drivers can now specify a request prepare function (q->prep_rq_fn) that the
+block layer would invoke to pre-build device commands for a given request,
+or perform other preparatory processing for the request. This is routine is
+called by elv_next_request(), i.e. typically just before servicing a request.
+(The prepare function would not be called for requests that have RQF_DONTPREP
+enabled)
+
+Aside:
+  Pre-building could possibly even be done early, i.e before placing the
+  request on the queue, rather than construct the command on the fly in the
+  driver while servicing the request queue when it may affect latencies in
+  interrupt context or responsiveness in general. One way to add early
+  pre-building would be to do it whenever we fail to merge on a request.
+  Now REQ_NOMERGE is set in the request flags to skip this one in the future,
+  which means that it will not change before we feed it to the device. So
+  the pre-builder hook can be invoked there.
+
+
+2. Flexible and generic but minimalist i/o structure/descriptor
+===============================================================
+
+2.1 Reason for a new structure and requirements addressed
+---------------------------------------------------------
+
+Prior to 2.5, buffer heads were used as the unit of i/o at the generic block
+layer, and the low level request structure was associated with a chain of
+buffer heads for a contiguous i/o request. This led to certain inefficiencies
+when it came to large i/o requests and readv/writev style operations, as it
+forced such requests to be broken up into small chunks before being passed
+on to the generic block layer, only to be merged by the i/o scheduler
+when the underlying device was capable of handling the i/o in one shot.
+Also, using the buffer head as an i/o structure for i/os that didn't originate
+from the buffer cache unnecessarily added to the weight of the descriptors
+which were generated for each such chunk.
+
+The following were some of the goals and expectations considered in the
+redesign of the block i/o data structure in 2.5.
+
+1.  Should be appropriate as a descriptor for both raw and buffered i/o  -
+    avoid cache related fields which are irrelevant in the direct/page i/o path,
+    or filesystem block size alignment restrictions which may not be relevant
+    for raw i/o.
+2.  Ability to represent high-memory buffers (which do not have a virtual
+    address mapping in kernel address space).
+3.  Ability to represent large i/os w/o unnecessarily breaking them up (i.e
+    greater than PAGE_SIZE chunks in one shot)
+4.  At the same time, ability to retain independent identity of i/os from
+    different sources or i/o units requiring individual completion (e.g. for
+    latency reasons)
+5.  Ability to represent an i/o involving multiple physical memory segments
+    (including non-page aligned page fragments, as specified via readv/writev)
+    without unnecessarily breaking it up, if the underlying device is capable of
+    handling it.
+6.  Preferably should be based on a memory descriptor structure that can be
+    passed around different types of subsystems or layers, maybe even
+    networking, without duplication or extra copies of data/descriptor fields
+    themselves in the process
+7.  Ability to handle the possibility of splits/merges as the structure passes
+    through layered drivers (lvm, md, evms), with minimal overhead.
+
+The solution was to define a new structure (bio)  for the block layer,
+instead of using the buffer head structure (bh) directly, the idea being
+avoidance of some associated baggage and limitations. The bio structure
+is uniformly used for all i/o at the block layer ; it forms a part of the
+bh structure for buffered i/o, and in the case of raw/direct i/o kiobufs are
+mapped to bio structures.
+
+2.2 The bio struct
+------------------
+
+The bio structure uses a vector representation pointing to an array of tuples
+of <page, offset, len> to describe the i/o buffer, and has various other
+fields describing i/o parameters and state that needs to be maintained for
+performing the i/o.
+
+Notice that this representation means that a bio has no virtual address
+mapping at all (unlike buffer heads).
+
+::
+
+  struct bio_vec {
+       struct page     *bv_page;
+       unsigned short  bv_len;
+       unsigned short  bv_offset;
+  };
+
+  /*
+   * main unit of I/O for the block layer and lower layers (ie drivers)
+   */
+  struct bio {
+       struct bio          *bi_next;    /* request queue link */
+       struct block_device *bi_bdev;	/* target device */
+       unsigned long       bi_flags;    /* status, command, etc */
+       unsigned long       bi_opf;       /* low bits: r/w, high: priority */
+
+       unsigned int	bi_vcnt;     /* how may bio_vec's */
+       struct bvec_iter	bi_iter;	/* current index into bio_vec array */
+
+       unsigned int	bi_size;     /* total size in bytes */
+       unsigned short	bi_hw_segments; /* segments after DMA remapping */
+       unsigned int	bi_max;	     /* max bio_vecs we can hold
+                                        used as index into pool */
+       struct bio_vec   *bi_io_vec;  /* the actual vec list */
+       bio_end_io_t	*bi_end_io;  /* bi_end_io (bio) */
+       atomic_t		bi_cnt;	     /* pin count: free when it hits zero */
+       void             *bi_private;
+  };
+
+With this multipage bio design:
+
+- Large i/os can be sent down in one go using a bio_vec list consisting
+  of an array of <page, offset, len> fragments (similar to the way fragments
+  are represented in the zero-copy network code)
+- Splitting of an i/o request across multiple devices (as in the case of
+  lvm or raid) is achieved by cloning the bio (where the clone points to
+  the same bi_io_vec array, but with the index and size accordingly modified)
+- A linked list of bios is used as before for unrelated merges [*]_ - this
+  avoids reallocs and makes independent completions easier to handle.
+- Code that traverses the req list can find all the segments of a bio
+  by using rq_for_each_segment.  This handles the fact that a request
+  has multiple bios, each of which can have multiple segments.
+- Drivers which can't process a large bio in one shot can use the bi_iter
+  field to keep track of the next bio_vec entry to process.
+  (e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
+  [TBD: Should preferably also have a bi_voffset and bi_vlen to avoid modifying
+  bi_offset an len fields]
+
+.. [*]
+
+	unrelated merges -- a request ends up containing two or more bios that
+	didn't originate from the same place.
+
+bi_end_io() i/o callback gets called on i/o completion of the entire bio.
+
+At a lower level, drivers build a scatter gather list from the merged bios.
+The scatter gather list is in the form of an array of <page, offset, len>
+entries with their corresponding dma address mappings filled in at the
+appropriate time. As an optimization, contiguous physical pages can be
+covered by a single entry where <page> refers to the first page and <len>
+covers the range of pages (up to 16 contiguous pages could be covered this
+way). There is a helper routine (blk_rq_map_sg) which drivers can use to build
+the sg list.
+
+Note: Right now the only user of bios with more than one page is ll_rw_kio,
+which in turn means that only raw I/O uses it (direct i/o may not work
+right now). The intent however is to enable clustering of pages etc to
+become possible. The pagebuf abstraction layer from SGI also uses multi-page
+bios, but that is currently not included in the stock development kernels.
+The same is true of Andrew Morton's work-in-progress multipage bio writeout
+and readahead patches.
+
+2.3 Changes in the Request Structure
+------------------------------------
+
+The request structure is the structure that gets passed down to low level
+drivers. The block layer make_request function builds up a request structure,
+places it on the queue and invokes the drivers request_fn. The driver makes
+use of block layer helper routine elv_next_request to pull the next request
+off the queue. Control or diagnostic functions might bypass block and directly
+invoke underlying driver entry points passing in a specially constructed
+request structure.
+
+Only some relevant fields (mainly those which changed or may be referred
+to in some of the discussion here) are listed below, not necessarily in
+the order in which they occur in the structure (see include/linux/blkdev.h)
+Refer to Documentation/block/request.rst for details about all the request
+structure fields and a quick reference about the layers which are
+supposed to use or modify those fields::
+
+  struct request {
+	struct list_head queuelist;  /* Not meant to be directly accessed by
+					the driver.
+					Used by q->elv_next_request_fn
+					rq->queue is gone
+					*/
+	.
+	.
+	unsigned char cmd[16]; /* prebuilt command data block */
+	unsigned long flags;   /* also includes earlier rq->cmd settings */
+	.
+	.
+	sector_t sector; /* this field is now of type sector_t instead of int
+			    preparation for 64 bit sectors */
+	.
+	.
+
+	/* Number of scatter-gather DMA addr+len pairs after
+	 * physical address coalescing is performed.
+	 */
+	unsigned short nr_phys_segments;
+
+	/* Number of scatter-gather addr+len pairs after
+	 * physical and DMA remapping hardware coalescing is performed.
+	 * This is the number of scatter-gather entries the driver
+	 * will actually have to deal with after DMA mapping is done.
+	 */
+	unsigned short nr_hw_segments;
+
+	/* Various sector counts */
+	unsigned long nr_sectors;  /* no. of sectors left: driver modifiable */
+	unsigned long hard_nr_sectors;  /* block internal copy of above */
+	unsigned int current_nr_sectors; /* no. of sectors left in the
+					   current segment:driver modifiable */
+	unsigned long hard_cur_sectors; /* block internal copy of the above */
+	.
+	.
+	int tag;	/* command tag associated with request */
+	void *special;  /* same as before */
+	char *buffer;   /* valid only for low memory buffers up to
+			 current_nr_sectors */
+	.
+	.
+	struct bio *bio, *biotail;  /* bio list instead of bh */
+	struct request_list *rl;
+  }
+
+See the req_ops and req_flag_bits definitions for an explanation of the various
+flags available. Some bits are used by the block layer or i/o scheduler.
+
+The behaviour of the various sector counts are almost the same as before,
+except that since we have multi-segment bios, current_nr_sectors refers
+to the numbers of sectors in the current segment being processed which could
+be one of the many segments in the current bio (i.e i/o completion unit).
+The nr_sectors value refers to the total number of sectors in the whole
+request that remain to be transferred (no change). The purpose of the
+hard_xxx values is for block to remember these counts every time it hands
+over the request to the driver. These values are updated by block on
+end_that_request_first, i.e. every time the driver completes a part of the
+transfer and invokes block end*request helpers to mark this. The
+driver should not modify these values. The block layer sets up the
+nr_sectors and current_nr_sectors fields (based on the corresponding
+hard_xxx values and the number of bytes transferred) and updates it on
+every transfer that invokes end_that_request_first. It does the same for the
+buffer, bio, bio->bi_iter fields too.
+
+The buffer field is just a virtual address mapping of the current segment
+of the i/o buffer in cases where the buffer resides in low-memory. For high
+memory i/o, this field is not valid and must not be used by drivers.
+
+Code that sets up its own request structures and passes them down to
+a driver needs to be careful about interoperation with the block layer helper
+functions which the driver uses. (Section 1.3)
+
+3. Using bios
+=============
+
+3.1 Setup/Teardown
+------------------
+
+There are routines for managing the allocation, and reference counting, and
+freeing of bios (bio_alloc, bio_get, bio_put).
+
+This makes use of Ingo Molnar's mempool implementation, which enables
+subsystems like bio to maintain their own reserve memory pools for guaranteed
+deadlock-free allocations during extreme VM load. For example, the VM
+subsystem makes use of the block layer to writeout dirty pages in order to be
+able to free up memory space, a case which needs careful handling. The
+allocation logic draws from the preallocated emergency reserve in situations
+where it cannot allocate through normal means. If the pool is empty and it
+can wait, then it would trigger action that would help free up memory or
+replenish the pool (without deadlocking) and wait for availability in the pool.
+If it is in IRQ context, and hence not in a position to do this, allocation
+could fail if the pool is empty. In general mempool always first tries to
+perform allocation without having to wait, even if it means digging into the
+pool as long it is not less that 50% full.
+
+On a free, memory is released to the pool or directly freed depending on
+the current availability in the pool. The mempool interface lets the
+subsystem specify the routines to be used for normal alloc and free. In the
+case of bio, these routines make use of the standard slab allocator.
+
+The caller of bio_alloc is expected to taken certain steps to avoid
+deadlocks, e.g. avoid trying to allocate more memory from the pool while
+already holding memory obtained from the pool.
+
+::
+
+  [TBD: This is a potential issue, though a rare possibility
+   in the bounce bio allocation that happens in the current code, since
+   it ends up allocating a second bio from the same pool while
+   holding the original bio ]
+
+Memory allocated from the pool should be released back within a limited
+amount of time (in the case of bio, that would be after the i/o is completed).
+This ensures that if part of the pool has been used up, some work (in this
+case i/o) must already be in progress and memory would be available when it
+is over. If allocating from multiple pools in the same code path, the order
+or hierarchy of allocation needs to be consistent, just the way one deals
+with multiple locks.
+
+The bio_alloc routine also needs to allocate the bio_vec_list (bvec_alloc())
+for a non-clone bio. There are the 6 pools setup for different size biovecs,
+so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the
+given size from these slabs.
+
+The bio_get() routine may be used to hold an extra reference on a bio prior
+to i/o submission, if the bio fields are likely to be accessed after the
+i/o is issued (since the bio may otherwise get freed in case i/o completion
+happens in the meantime).
+
+The bio_clone_fast() routine may be used to duplicate a bio, where the clone
+shares the bio_vec_list with the original bio (i.e. both point to the
+same bio_vec_list). This would typically be used for splitting i/o requests
+in lvm or md.
+
+3.2 Generic bio helper Routines
+-------------------------------
+
+3.2.1 Traversing segments and completion units in a request
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The macro rq_for_each_segment() should be used for traversing the bios
+in the request list (drivers should avoid directly trying to do it
+themselves). Using these helpers should also make it easier to cope
+with block changes in the future.
+
+::
+
+	struct req_iterator iter;
+	rq_for_each_segment(bio_vec, rq, iter)
+		/* bio_vec is now current segment */
+
+I/O completion callbacks are per-bio rather than per-segment, so drivers
+that traverse bio chains on completion need to keep that in mind. Drivers
+which don't make a distinction between segments and completion units would
+need to be reorganized to support multi-segment bios.
+
+3.2.2 Setting up DMA scatterlists
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The blk_rq_map_sg() helper routine would be used for setting up scatter
+gather lists from a request, so a driver need not do it on its own.
+
+	nr_segments = blk_rq_map_sg(q, rq, scatterlist);
+
+The helper routine provides a level of abstraction which makes it easier
+to modify the internals of request to scatterlist conversion down the line
+without breaking drivers. The blk_rq_map_sg routine takes care of several
+things like collapsing physically contiguous segments (if QUEUE_FLAG_CLUSTER
+is set) and correct segment accounting to avoid exceeding the limits which
+the i/o hardware can handle, based on various queue properties.
+
+- Prevents a clustered segment from crossing a 4GB mem boundary
+- Avoids building segments that would exceed the number of physical
+  memory segments that the driver can handle (phys_segments) and the
+  number that the underlying hardware can handle at once, accounting for
+  DMA remapping (hw_segments)  (i.e. IOMMU aware limits).
+
+Routines which the low level driver can use to set up the segment limits:
+
+blk_queue_max_hw_segments() : Sets an upper limit of the maximum number of
+hw data segments in a request (i.e. the maximum number of address/length
+pairs the host adapter can actually hand to the device at once)
+
+blk_queue_max_phys_segments() : Sets an upper limit on the maximum number
+of physical data segments in a request (i.e. the largest sized scatter list
+a driver could handle)
+
+3.2.3 I/O completion
+^^^^^^^^^^^^^^^^^^^^
+
+The existing generic block layer helper routines end_request,
+end_that_request_first and end_that_request_last can be used for i/o
+completion (and setting things up so the rest of the i/o or the next
+request can be kicked of) as before. With the introduction of multi-page
+bio support, end_that_request_first requires an additional argument indicating
+the number of sectors completed.
+
+3.2.4 Implications for drivers that do not interpret bios
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+(don't handle multiple segments)
+
+Drivers that do not interpret bios e.g those which do not handle multiple
+segments and do not support i/o into high memory addresses (require bounce
+buffers) and expect only virtually mapped buffers, can access the rq->buffer
+field. As before the driver should use current_nr_sectors to determine the
+size of remaining data in the current segment (that is the maximum it can
+transfer in one go unless it interprets segments), and rely on the block layer
+end_request, or end_that_request_first/last to take care of all accounting
+and transparent mapping of the next bio segment when a segment boundary
+is crossed on completion of a transfer. (The end*request* functions should
+be used if only if the request has come down from block/bio path, not for
+direct access requests which only specify rq->buffer without a valid rq->bio)
+
+3.3 I/O Submission
+------------------
+
+The routine submit_bio() is used to submit a single io. Higher level i/o
+routines make use of this:
+
+(a) Buffered i/o:
+
+The routine submit_bh() invokes submit_bio() on a bio corresponding to the
+bh, allocating the bio if required. ll_rw_block() uses submit_bh() as before.
+
+(b) Kiobuf i/o (for raw/direct i/o):
+
+The ll_rw_kio() routine breaks up the kiobuf into page sized chunks and
+maps the array to one or more multi-page bios, issuing submit_bio() to
+perform the i/o on each of these.
+
+The embedded bh array in the kiobuf structure has been removed and no
+preallocation of bios is done for kiobufs. [The intent is to remove the
+blocks array as well, but it's currently in there to kludge around direct i/o.]
+Thus kiobuf allocation has switched back to using kmalloc rather than vmalloc.
+
+Todo/Observation:
+
+ A single kiobuf structure is assumed to correspond to a contiguous range
+ of data, so brw_kiovec() invokes ll_rw_kio for each kiobuf in a kiovec.
+ So right now it wouldn't work for direct i/o on non-contiguous blocks.
+ This is to be resolved.  The eventual direction is to replace kiobuf
+ by kvec's.
+
+ Badari Pulavarty has a patch to implement direct i/o correctly using
+ bio and kvec.
+
+
+(c) Page i/o:
+
+Todo/Under discussion:
+
+ Andrew Morton's multi-page bio patches attempt to issue multi-page
+ writeouts (and reads) from the page cache, by directly building up
+ large bios for submission completely bypassing the usage of buffer
+ heads. This work is still in progress.
+
+ Christoph Hellwig had some code that uses bios for page-io (rather than
+ bh). This isn't included in bio as yet. Christoph was also working on a
+ design for representing virtual/real extents as an entity and modifying
+ some of the address space ops interfaces to utilize this abstraction rather
+ than buffer_heads. (This is somewhat along the lines of the SGI XFS pagebuf
+ abstraction, but intended to be as lightweight as possible).
+
+(d) Direct access i/o:
+
+Direct access requests that do not contain bios would be submitted differently
+as discussed earlier in section 1.3.
+
+Aside:
+
+  Kvec i/o:
+
+  Ben LaHaise's aio code uses a slightly different structure instead
+  of kiobufs, called a kvec_cb. This contains an array of <page, offset, len>
+  tuples (very much like the networking code), together with a callback function
+  and data pointer. This is embedded into a brw_cb structure when passed
+  to brw_kvec_async().
+
+  Now it should be possible to directly map these kvecs to a bio. Just as while
+  cloning, in this case rather than PRE_BUILT bio_vecs, we set the bi_io_vec
+  array pointer to point to the veclet array in kvecs.
+
+  TBD: In order for this to work, some changes are needed in the way multi-page
+  bios are handled today. The values of the tuples in such a vector passed in
+  from higher level code should not be modified by the block layer in the course
+  of its request processing, since that would make it hard for the higher layer
+  to continue to use the vector descriptor (kvec) after i/o completes. Instead,
+  all such transient state should either be maintained in the request structure,
+  and passed on in some way to the endio completion routine.
+
+
+4. The I/O scheduler
+====================
+
+I/O scheduler, a.k.a. elevator, is implemented in two layers.  Generic dispatch
+queue and specific I/O schedulers.  Unless stated otherwise, elevator is used
+to refer to both parts and I/O scheduler to specific I/O schedulers.
+
+Block layer implements generic dispatch queue in `block/*.c`.
+The generic dispatch queue is responsible for requeueing, handling non-fs
+requests and all other subtleties.
+
+Specific I/O schedulers are responsible for ordering normal filesystem
+requests.  They can also choose to delay certain requests to improve
+throughput or whatever purpose.  As the plural form indicates, there are
+multiple I/O schedulers.  They can be built as modules but at least one should
+be built inside the kernel.  Each queue can choose different one and can also
+change to another one dynamically.
+
+A block layer call to the i/o scheduler follows the convention elv_xxx(). This
+calls elevator_xxx_fn in the elevator switch (block/elevator.c). Oh, xxx
+and xxx might not match exactly, but use your imagination. If an elevator
+doesn't implement a function, the switch does nothing or some minimal house
+keeping work.
+
+4.1. I/O scheduler API
+----------------------
+
+The functions an elevator may implement are: (* are mandatory)
+
+=============================== ================================================
+elevator_merge_fn		called to query requests for merge with a bio
+
+elevator_merge_req_fn		called when two requests get merged. the one
+				which gets merged into the other one will be
+				never seen by I/O scheduler again. IOW, after
+				being merged, the request is gone.
+
+elevator_merged_fn		called when a request in the scheduler has been
+				involved in a merge. It is used in the deadline
+				scheduler for example, to reposition the request
+				if its sorting order has changed.
+
+elevator_allow_merge_fn		called whenever the block layer determines
+				that a bio can be merged into an existing
+				request safely. The io scheduler may still
+				want to stop a merge at this point if it
+				results in some sort of conflict internally,
+				this hook allows it to do that. Note however
+				that two *requests* can still be merged at later
+				time. Currently the io scheduler has no way to
+				prevent that. It can only learn about the fact
+				from elevator_merge_req_fn callback.
+
+elevator_dispatch_fn*		fills the dispatch queue with ready requests.
+				I/O schedulers are free to postpone requests by
+				not filling the dispatch queue unless @force
+				is non-zero.  Once dispatched, I/O schedulers
+				are not allowed to manipulate the requests -
+				they belong to generic dispatch queue.
+
+elevator_add_req_fn*		called to add a new request into the scheduler
+
+elevator_former_req_fn
+elevator_latter_req_fn		These return the request before or after the
+				one specified in disk sort order. Used by the
+				block layer to find merge possibilities.
+
+elevator_completed_req_fn	called when a request is completed.
+
+elevator_may_queue_fn		returns true if the scheduler wants to allow the
+				current context to queue a new request even if
+				it is over the queue limit. This must be used
+				very carefully!!
+
+elevator_set_req_fn
+elevator_put_req_fn		Must be used to allocate and free any elevator
+				specific storage for a request.
+
+elevator_activate_req_fn	Called when device driver first sees a request.
+				I/O schedulers can use this callback to
+				determine when actual execution of a request
+				starts.
+elevator_deactivate_req_fn	Called when device driver decides to delay
+				a request by requeueing it.
+
+elevator_init_fn*
+elevator_exit_fn		Allocate and free any elevator specific storage
+				for a queue.
+=============================== ================================================
+
+4.2 Request flows seen by I/O schedulers
+----------------------------------------
+
+All requests seen by I/O schedulers strictly follow one of the following three
+flows.
+
+ set_req_fn ->
+
+ i.   add_req_fn -> (merged_fn ->)* -> dispatch_fn -> activate_req_fn ->
+      (deactivate_req_fn -> activate_req_fn ->)* -> completed_req_fn
+ ii.  add_req_fn -> (merged_fn ->)* -> merge_req_fn
+ iii. [none]
+
+ -> put_req_fn
+
+4.3 I/O scheduler implementation
+--------------------------------
+
+The generic i/o scheduler algorithm attempts to sort/merge/batch requests for
+optimal disk scan and request servicing performance (based on generic
+principles and device capabilities), optimized for:
+
+i.   improved throughput
+ii.  improved latency
+iii. better utilization of h/w & CPU time
+
+Characteristics:
+
+i. Binary tree
+AS and deadline i/o schedulers use red black binary trees for disk position
+sorting and searching, and a fifo linked list for time-based searching. This
+gives good scalability and good availability of information. Requests are
+almost always dispatched in disk sort order, so a cache is kept of the next
+request in sort order to prevent binary tree lookups.
+
+This arrangement is not a generic block layer characteristic however, so
+elevators may implement queues as they please.
+
+ii. Merge hash
+AS and deadline use a hash table indexed by the last sector of a request. This
+enables merging code to quickly look up "back merge" candidates, even when
+multiple I/O streams are being performed at once on one disk.
+
+"Front merges", a new request being merged at the front of an existing request,
+are far less common than "back merges" due to the nature of most I/O patterns.
+Front merges are handled by the binary trees in AS and deadline schedulers.
+
+iii. Plugging the queue to batch requests in anticipation of opportunities for
+     merge/sort optimizations
+
+Plugging is an approach that the current i/o scheduling algorithm resorts to so
+that it collects up enough requests in the queue to be able to take
+advantage of the sorting/merging logic in the elevator. If the
+queue is empty when a request comes in, then it plugs the request queue
+(sort of like plugging the bath tub of a vessel to get fluid to build up)
+till it fills up with a few more requests, before starting to service
+the requests. This provides an opportunity to merge/sort the requests before
+passing them down to the device. There are various conditions when the queue is
+unplugged (to open up the flow again), either through a scheduled task or
+could be on demand. For example wait_on_buffer sets the unplugging going
+through sync_buffer() running blk_run_address_space(mapping). Or the caller
+can do it explicity through blk_unplug(bdev). So in the read case,
+the queue gets explicitly unplugged as part of waiting for completion on that
+buffer.
+
+Aside:
+  This is kind of controversial territory, as it's not clear if plugging is
+  always the right thing to do. Devices typically have their own queues,
+  and allowing a big queue to build up in software, while letting the device be
+  idle for a while may not always make sense. The trick is to handle the fine
+  balance between when to plug and when to open up. Also now that we have
+  multi-page bios being queued in one shot, we may not need to wait to merge
+  a big request from the broken up pieces coming by.
+
+4.4 I/O contexts
+----------------
+
+I/O contexts provide a dynamically allocated per process data area. They may
+be used in I/O schedulers, and in the block layer (could be used for IO statis,
+priorities for example). See `*io_context` in block/ll_rw_blk.c, and as-iosched.c
+for an example of usage in an i/o scheduler.
+
+
+5. Scalability related changes
+==============================
+
+5.1 Granular Locking: io_request_lock replaced by a per-queue lock
+------------------------------------------------------------------
+
+The global io_request_lock has been removed as of 2.5, to avoid
+the scalability bottleneck it was causing, and has been replaced by more
+granular locking. The request queue structure has a pointer to the
+lock to be used for that queue. As a result, locking can now be
+per-queue, with a provision for sharing a lock across queues if
+necessary (e.g the scsi layer sets the queue lock pointers to the
+corresponding adapter lock, which results in a per host locking
+granularity). The locking semantics are the same, i.e. locking is
+still imposed by the block layer, grabbing the lock before
+request_fn execution which it means that lots of older drivers
+should still be SMP safe. Drivers are free to drop the queue
+lock themselves, if required. Drivers that explicitly used the
+io_request_lock for serialization need to be modified accordingly.
+Usually it's as easy as adding a global lock::
+
+	static DEFINE_SPINLOCK(my_driver_lock);
+
+and passing the address to that lock to blk_init_queue().
+
+5.2 64 bit sector numbers (sector_t prepares for 64 bit support)
+----------------------------------------------------------------
+
+The sector number used in the bio structure has been changed to sector_t,
+which could be defined as 64 bit in preparation for 64 bit sector support.
+
+6. Other Changes/Implications
+=============================
+
+6.1 Partition re-mapping handled by the generic block layer
+-----------------------------------------------------------
+
+In 2.5 some of the gendisk/partition related code has been reorganized.
+Now the generic block layer performs partition-remapping early and thus
+provides drivers with a sector number relative to whole device, rather than
+having to take partition number into account in order to arrive at the true
+sector number. The routine blk_partition_remap() is invoked by
+generic_make_request even before invoking the queue specific make_request_fn,
+so the i/o scheduler also gets to operate on whole disk sector numbers. This
+should typically not require changes to block drivers, it just never gets
+to invoke its own partition sector offset calculations since all bios
+sent are offset from the beginning of the device.
+
+
+7. A Few Tips on Migration of older drivers
+===========================================
+
+Old-style drivers that just use CURRENT and ignores clustered requests,
+may not need much change.  The generic layer will automatically handle
+clustered requests, multi-page bios, etc for the driver.
+
+For a low performance driver or hardware that is PIO driven or just doesn't
+support scatter-gather changes should be minimal too.
+
+The following are some points to keep in mind when converting old drivers
+to bio.
+
+Drivers should use elv_next_request to pick up requests and are no longer
+supposed to handle looping directly over the request list.
+(struct request->queue has been removed)
+
+Now end_that_request_first takes an additional number_of_sectors argument.
+It used to handle always just the first buffer_head in a request, now
+it will loop and handle as many sectors (on a bio-segment granularity)
+as specified.
+
+Now bh->b_end_io is replaced by bio->bi_end_io, but most of the time the
+right thing to use is bio_endio(bio) instead.
+
+If the driver is dropping the io_request_lock from its request_fn strategy,
+then it just needs to replace that with q->queue_lock instead.
+
+As described in Sec 1.1, drivers can set max sector size, max segment size
+etc per queue now. Drivers that used to define their own merge functions i
+to handle things like this can now just use the blk_queue_* functions at
+blk_init_queue time.
+
+Drivers no longer have to map a {partition, sector offset} into the
+correct absolute location anymore, this is done by the block layer, so
+where a driver received a request ala this before::
+
+	rq->rq_dev = mk_kdev(3, 5);	/* /dev/hda5 */
+	rq->sector = 0;			/* first sector on hda5 */
+
+it will now see::
+
+	rq->rq_dev = mk_kdev(3, 0);	/* /dev/hda */
+	rq->sector = 123128;		/* offset from start of disk */
+
+As mentioned, there is no virtual mapping of a bio. For DMA, this is
+not a problem as the driver probably never will need a virtual mapping.
+Instead it needs a bus mapping (dma_map_page for a single segment or
+use dma_map_sg for scatter gather) to be able to ship it to the driver. For
+PIO drivers (or drivers that need to revert to PIO transfer once in a
+while (IDE for example)), where the CPU is doing the actual data
+transfer a virtual mapping is needed. If the driver supports highmem I/O,
+(Sec 1.1, (ii) ) it needs to use kmap_atomic or similar to temporarily map
+a bio into the virtual address space.
+
+
+8. Prior/Related/Impacted patches
+=================================
+
+8.1. Earlier kiobuf patches (sct/axboe/chait/hch/mkp)
+-----------------------------------------------------
+
+- orig kiobuf & raw i/o patches (now in 2.4 tree)
+- direct kiobuf based i/o to devices (no intermediate bh's)
+- page i/o using kiobuf
+- kiobuf splitting for lvm (mkp)
+- elevator support for kiobuf request merging (axboe)
+
+8.2. Zero-copy networking (Dave Miller)
+---------------------------------------
+
+8.3. SGI XFS - pagebuf patches - use of kiobufs
+-----------------------------------------------
+8.4. Multi-page pioent patch for bio (Christoph Hellwig)
+--------------------------------------------------------
+8.5. Direct i/o implementation (Andrea Arcangeli) since 2.4.10-pre11
+--------------------------------------------------------------------
+8.6. Async i/o implementation patch (Ben LaHaise)
+-------------------------------------------------
+8.7. EVMS layering design (IBM EVMS team)
+-----------------------------------------
+8.8. Larger page cache size patch (Ben LaHaise) and Large page size (Daniel Phillips)
+-------------------------------------------------------------------------------------
+
+    => larger contiguous physical memory buffers
+
+8.9. VM reservations patch (Ben LaHaise)
+----------------------------------------
+8.10. Write clustering patches ? (Marcelo/Quintela/Riel ?)
+----------------------------------------------------------
+8.11. Block device in page cache patch (Andrea Archangeli) - now in 2.4.10+
+---------------------------------------------------------------------------
+8.12. Multiple block-size transfers for faster raw i/o (Shailabh Nagar, Badari)
+-------------------------------------------------------------------------------
+8.13  Priority based i/o scheduler - prepatches (Arjan van de Ven)
+------------------------------------------------------------------
+8.14  IDE Taskfile i/o patch (Andre Hedrick)
+--------------------------------------------
+8.15  Multi-page writeout and readahead patches (Andrew Morton)
+---------------------------------------------------------------
+8.16  Direct i/o patches for 2.5 using kvec and bio (Badari Pulavarthy)
+-----------------------------------------------------------------------
+
+9. Other References
+===================
+
+9.1 The Splice I/O Model
+------------------------
+
+Larry McVoy (and subsequent discussions on lkml, and Linus' comments - Jan 2001
+
+9.2 Discussions about kiobuf and bh design
+------------------------------------------
+
+On lkml between sct, linus, alan et al - Feb-March 2001 (many of the
+initial thoughts that led to bio were brought up in this discussion thread)
+
+9.3 Discussions on mempool on lkml - Dec 2001.
+----------------------------------------------
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
deleted file mode 100644
index 31c177663ed5..000000000000
--- a/Documentation/block/biodoc.txt
+++ /dev/null
@@ -1,1076 +0,0 @@
-	Notes on the Generic Block Layer Rewrite in Linux 2.5
-	=====================================================
-
-Notes Written on Jan 15, 2002:
-	Jens Axboe <jens.axboe@oracle.com>
-	Suparna Bhattacharya <suparna@in.ibm.com>
-
-Last Updated May 2, 2002
-September 2003: Updated I/O Scheduler portions
-	Nick Piggin <npiggin@kernel.dk>
-
-Introduction:
-
-These are some notes describing some aspects of the 2.5 block layer in the
-context of the bio rewrite. The idea is to bring out some of the key
-changes and a glimpse of the rationale behind those changes.
-
-Please mail corrections & suggestions to suparna@in.ibm.com.
-
-Credits:
----------
-
-2.5 bio rewrite:
-	Jens Axboe <jens.axboe@oracle.com>
-
-Many aspects of the generic block layer redesign were driven by and evolved
-over discussions, prior patches and the collective experience of several
-people. See sections 8 and 9 for a list of some related references.
-
-The following people helped with review comments and inputs for this
-document:
-	Christoph Hellwig <hch@infradead.org>
-	Arjan van de Ven <arjanv@redhat.com>
-	Randy Dunlap <rdunlap@xenotime.net>
-	Andre Hedrick <andre@linux-ide.org>
-
-The following people helped with fixes/contributions to the bio patches
-while it was still work-in-progress:
-	David S. Miller <davem@redhat.com>
-
-
-Description of Contents:
-------------------------
-
-1. Scope for tuning of logic to various needs
-  1.1 Tuning based on device or low level driver capabilities
-	- Per-queue parameters
-	- Highmem I/O support
-	- I/O scheduler modularization
-  1.2 Tuning based on high level requirements/capabilities
-	1.2.1 Request Priority/Latency
-  1.3 Direct access/bypass to lower layers for diagnostics and special
-      device operations
-	1.3.1 Pre-built commands
-2. New flexible and generic but minimalist i/o structure or descriptor
-   (instead of using buffer heads at the i/o layer)
-  2.1 Requirements/Goals addressed
-  2.2 The bio struct in detail (multi-page io unit)
-  2.3 Changes in the request structure
-3. Using bios
-  3.1 Setup/teardown (allocation, splitting)
-  3.2 Generic bio helper routines
-    3.2.1 Traversing segments and completion units in a request
-    3.2.2 Setting up DMA scatterlists
-    3.2.3 I/O completion
-    3.2.4 Implications for drivers that do not interpret bios (don't handle
- 	  multiple segments)
-  3.3 I/O submission
-4. The I/O scheduler
-5. Scalability related changes
-  5.1 Granular locking: Removal of io_request_lock
-  5.2 Prepare for transition to 64 bit sector_t
-6. Other Changes/Implications
-  6.1 Partition re-mapping handled by the generic block layer
-7. A few tips on migration of older drivers
-8. A list of prior/related/impacted patches/ideas
-9. Other References/Discussion Threads
-
----------------------------------------------------------------------------
-
-Bio Notes
---------
-
-Let us discuss the changes in the context of how some overall goals for the
-block layer are addressed.
-
-1. Scope for tuning the generic logic to satisfy various requirements
-
-The block layer design supports adaptable abstractions to handle common
-processing with the ability to tune the logic to an appropriate extent
-depending on the nature of the device and the requirements of the caller.
-One of the objectives of the rewrite was to increase the degree of tunability
-and to enable higher level code to utilize underlying device/driver
-capabilities to the maximum extent for better i/o performance. This is
-important especially in the light of ever improving hardware capabilities
-and application/middleware software designed to take advantage of these
-capabilities.
-
-1.1 Tuning based on low level device / driver capabilities
-
-Sophisticated devices with large built-in caches, intelligent i/o scheduling
-optimizations, high memory DMA support, etc may find some of the
-generic processing an overhead, while for less capable devices the
-generic functionality is essential for performance or correctness reasons.
-Knowledge of some of the capabilities or parameters of the device should be
-used at the generic block layer to take the right decisions on
-behalf of the driver.
-
-How is this achieved ?
-
-Tuning at a per-queue level:
-
-i. Per-queue limits/values exported to the generic layer by the driver
-
-Various parameters that the generic i/o scheduler logic uses are set at
-a per-queue level (e.g maximum request size, maximum number of segments in
-a scatter-gather list, logical block size)
-
-Some parameters that were earlier available as global arrays indexed by
-major/minor are now directly associated with the queue. Some of these may
-move into the block device structure in the future. Some characteristics
-have been incorporated into a queue flags field rather than separate fields
-in themselves.  There are blk_queue_xxx functions to set the parameters,
-rather than update the fields directly
-
-Some new queue property settings:
-
-	blk_queue_bounce_limit(q, u64 dma_address)
-		Enable I/O to highmem pages, dma_address being the
-		limit. No highmem default.
-
-	blk_queue_max_sectors(q, max_sectors)
-		Sets two variables that limit the size of the request.
-
-		- The request queue's max_sectors, which is a soft size in
-		units of 512 byte sectors, and could be dynamically varied
-		by the core kernel.
-
-		- The request queue's max_hw_sectors, which is a hard limit
-		and reflects the maximum size request a driver can handle
-		in units of 512 byte sectors.
-
-		The default for both max_sectors and max_hw_sectors is
-		255. The upper limit of max_sectors is 1024.
-
-	blk_queue_max_phys_segments(q, max_segments)
-		Maximum physical segments you can handle in a request. 128
-		default (driver limit). (See 3.2.2)
-
-	blk_queue_max_hw_segments(q, max_segments)
-		Maximum dma segments the hardware can handle in a request. 128
-		default (host adapter limit, after dma remapping).
-		(See 3.2.2)
-
-	blk_queue_max_segment_size(q, max_seg_size)
-		Maximum size of a clustered segment, 64kB default.
-
-	blk_queue_logical_block_size(q, logical_block_size)
-		Lowest possible sector size that the hardware can operate
-		on, 512 bytes default.
-
-New queue flags:
-
-	QUEUE_FLAG_CLUSTER (see 3.2.2)
-	QUEUE_FLAG_QUEUED (see 3.2.4)
-
-
-ii. High-mem i/o capabilities are now considered the default
-
-The generic bounce buffer logic, present in 2.4, where the block layer would
-by default copyin/out i/o requests on high-memory buffers to low-memory buffers
-assuming that the driver wouldn't be able to handle it directly, has been
-changed in 2.5. The bounce logic is now applied only for memory ranges
-for which the device cannot handle i/o. A driver can specify this by
-setting the queue bounce limit for the request queue for the device
-(blk_queue_bounce_limit()). This avoids the inefficiencies of the copyin/out
-where a device is capable of handling high memory i/o.
-
-In order to enable high-memory i/o where the device is capable of supporting
-it, the pci dma mapping routines and associated data structures have now been
-modified to accomplish a direct page -> bus translation, without requiring
-a virtual address mapping (unlike the earlier scheme of virtual address
--> bus translation). So this works uniformly for high-memory pages (which
-do not have a corresponding kernel virtual address space mapping) and
-low-memory pages.
-
-Note: Please refer to Documentation/DMA-API-HOWTO.txt for a discussion
-on PCI high mem DMA aspects and mapping of scatter gather lists, and support
-for 64 bit PCI.
-
-Special handling is required only for cases where i/o needs to happen on
-pages at physical memory addresses beyond what the device can support. In these
-cases, a bounce bio representing a buffer from the supported memory range
-is used for performing the i/o with copyin/copyout as needed depending on
-the type of the operation.  For example, in case of a read operation, the
-data read has to be copied to the original buffer on i/o completion, so a
-callback routine is set up to do this, while for write, the data is copied
-from the original buffer to the bounce buffer prior to issuing the
-operation. Since an original buffer may be in a high memory area that's not
-mapped in kernel virtual addr, a kmap operation may be required for
-performing the copy, and special care may be needed in the completion path
-as it may not be in irq context. Special care is also required (by way of
-GFP flags) when allocating bounce buffers, to avoid certain highmem
-deadlock possibilities.
-
-It is also possible that a bounce buffer may be allocated from high-memory
-area that's not mapped in kernel virtual addr, but within the range that the
-device can use directly; so the bounce page may need to be kmapped during
-copy operations. [Note: This does not hold in the current implementation,
-though]
-
-There are some situations when pages from high memory may need to
-be kmapped, even if bounce buffers are not necessary. For example a device
-may need to abort DMA operations and revert to PIO for the transfer, in
-which case a virtual mapping of the page is required. For SCSI it is also
-done in some scenarios where the low level driver cannot be trusted to
-handle a single sg entry correctly. The driver is expected to perform the
-kmaps as needed on such occasions as appropriate. A driver could also use
-the blk_queue_bounce() routine on its own to bounce highmem i/o to low
-memory for specific requests if so desired.
-
-iii. The i/o scheduler algorithm itself can be replaced/set as appropriate
-
-As in 2.4, it is possible to plugin a brand new i/o scheduler for a particular
-queue or pick from (copy) existing generic schedulers and replace/override
-certain portions of it. The 2.5 rewrite provides improved modularization
-of the i/o scheduler. There are more pluggable callbacks, e.g for init,
-add request, extract request, which makes it possible to abstract specific
-i/o scheduling algorithm aspects and details outside of the generic loop.
-It also makes it possible to completely hide the implementation details of
-the i/o scheduler from block drivers.
-
-I/O scheduler wrappers are to be used instead of accessing the queue directly.
-See section 4. The I/O scheduler for details.
-
-1.2 Tuning Based on High level code capabilities
-
-i. Application capabilities for raw i/o
-
-This comes from some of the high-performance database/middleware
-requirements where an application prefers to make its own i/o scheduling
-decisions based on an understanding of the access patterns and i/o
-characteristics
-
-ii. High performance filesystems or other higher level kernel code's
-capabilities
-
-Kernel components like filesystems could also take their own i/o scheduling
-decisions for optimizing performance. Journalling filesystems may need
-some control over i/o ordering.
-
-What kind of support exists at the generic block layer for this ?
-
-The flags and rw fields in the bio structure can be used for some tuning
-from above e.g indicating that an i/o is just a readahead request, or priority
-settings (currently unused). As far as user applications are concerned they
-would need an additional mechanism either via open flags or ioctls, or some
-other upper level mechanism to communicate such settings to block.
-
-1.2.1 Request Priority/Latency
-
-Todo/Under discussion:
-Arjan's proposed request priority scheme allows higher levels some broad
-  control (high/med/low) over the priority  of an i/o request vs other pending
-  requests in the queue. For example it allows reads for bringing in an
-  executable page on demand to be given a higher priority over pending write
-  requests which haven't aged too much on the queue. Potentially this priority
-  could even be exposed to applications in some manner, providing higher level
-  tunability. Time based aging avoids starvation of lower priority
-  requests. Some bits in the bi_opf flags field in the bio structure are
-  intended to be used for this priority information.
-
-
-1.3 Direct Access to Low level Device/Driver Capabilities (Bypass mode)
-    (e.g Diagnostics, Systems Management)
-
-There are situations where high-level code needs to have direct access to
-the low level device capabilities or requires the ability to issue commands
-to the device bypassing some of the intermediate i/o layers.
-These could, for example, be special control commands issued through ioctl
-interfaces, or could be raw read/write commands that stress the drive's
-capabilities for certain kinds of fitness tests. Having direct interfaces at
-multiple levels without having to pass through upper layers makes
-it possible to perform bottom up validation of the i/o path, layer by
-layer, starting from the media.
-
-The normal i/o submission interfaces, e.g submit_bio, could be bypassed
-for specially crafted requests which such ioctl or diagnostics
-interfaces would typically use, and the elevator add_request routine
-can instead be used to directly insert such requests in the queue or preferably
-the blk_do_rq routine can be used to place the request on the queue and
-wait for completion. Alternatively, sometimes the caller might just
-invoke a lower level driver specific interface with the request as a
-parameter.
-
-If the request is a means for passing on special information associated with
-the command, then such information is associated with the request->special
-field (rather than misuse the request->buffer field which is meant for the
-request data buffer's virtual mapping).
-
-For passing request data, the caller must build up a bio descriptor
-representing the concerned memory buffer if the underlying driver interprets
-bio segments or uses the block layer end*request* functions for i/o
-completion. Alternatively one could directly use the request->buffer field to
-specify the virtual address of the buffer, if the driver expects buffer
-addresses passed in this way and ignores bio entries for the request type
-involved. In the latter case, the driver would modify and manage the
-request->buffer, request->sector and request->nr_sectors or
-request->current_nr_sectors fields itself rather than using the block layer
-end_request or end_that_request_first completion interfaces.
-(See 2.3 or Documentation/block/request.txt for a brief explanation of
-the request structure fields)
-
-[TBD: end_that_request_last should be usable even in this case;
-Perhaps an end_that_direct_request_first routine could be implemented to make
-handling direct requests easier for such drivers; Also for drivers that
-expect bios, a helper function could be provided for setting up a bio
-corresponding to a data buffer]
-
-<JENS: I dont understand the above, why is end_that_request_first() not
-usable? Or _last for that matter. I must be missing something>
-<SUP: What I meant here was that if the request doesn't have a bio, then
- end_that_request_first doesn't modify nr_sectors or current_nr_sectors,
- and hence can't be used for advancing request state settings on the
- completion of partial transfers. The driver has to modify these fields 
- directly by hand.
- This is because end_that_request_first only iterates over the bio list,
- and always returns 0 if there are none associated with the request.
- _last works OK in this case, and is not a problem, as I mentioned earlier
->
-
-1.3.1 Pre-built Commands
-
-A request can be created with a pre-built custom command  to be sent directly
-to the device. The cmd block in the request structure has room for filling
-in the command bytes. (i.e rq->cmd is now 16 bytes in size, and meant for
-command pre-building, and the type of the request is now indicated
-through rq->flags instead of via rq->cmd)
-
-The request structure flags can be set up to indicate the type of request
-in such cases (REQ_PC: direct packet command passed to driver, REQ_BLOCK_PC:
-packet command issued via blk_do_rq, REQ_SPECIAL: special request).
-
-It can help to pre-build device commands for requests in advance.
-Drivers can now specify a request prepare function (q->prep_rq_fn) that the
-block layer would invoke to pre-build device commands for a given request,
-or perform other preparatory processing for the request. This is routine is
-called by elv_next_request(), i.e. typically just before servicing a request.
-(The prepare function would not be called for requests that have RQF_DONTPREP
-enabled)
-
-Aside:
-  Pre-building could possibly even be done early, i.e before placing the
-  request on the queue, rather than construct the command on the fly in the
-  driver while servicing the request queue when it may affect latencies in
-  interrupt context or responsiveness in general. One way to add early
-  pre-building would be to do it whenever we fail to merge on a request.
-  Now REQ_NOMERGE is set in the request flags to skip this one in the future,
-  which means that it will not change before we feed it to the device. So
-  the pre-builder hook can be invoked there.
-
-
-2. Flexible and generic but minimalist i/o structure/descriptor.
-
-2.1 Reason for a new structure and requirements addressed
-
-Prior to 2.5, buffer heads were used as the unit of i/o at the generic block
-layer, and the low level request structure was associated with a chain of
-buffer heads for a contiguous i/o request. This led to certain inefficiencies
-when it came to large i/o requests and readv/writev style operations, as it
-forced such requests to be broken up into small chunks before being passed
-on to the generic block layer, only to be merged by the i/o scheduler
-when the underlying device was capable of handling the i/o in one shot.
-Also, using the buffer head as an i/o structure for i/os that didn't originate
-from the buffer cache unnecessarily added to the weight of the descriptors
-which were generated for each such chunk.
-
-The following were some of the goals and expectations considered in the
-redesign of the block i/o data structure in 2.5.
-
-i.  Should be appropriate as a descriptor for both raw and buffered i/o  -
-    avoid cache related fields which are irrelevant in the direct/page i/o path,
-    or filesystem block size alignment restrictions which may not be relevant
-    for raw i/o.
-ii. Ability to represent high-memory buffers (which do not have a virtual
-    address mapping in kernel address space).
-iii.Ability to represent large i/os w/o unnecessarily breaking them up (i.e
-    greater than PAGE_SIZE chunks in one shot)
-iv. At the same time, ability to retain independent identity of i/os from
-    different sources or i/o units requiring individual completion (e.g. for
-    latency reasons)
-v.  Ability to represent an i/o involving multiple physical memory segments
-    (including non-page aligned page fragments, as specified via readv/writev)
-    without unnecessarily breaking it up, if the underlying device is capable of
-    handling it.
-vi. Preferably should be based on a memory descriptor structure that can be
-    passed around different types of subsystems or layers, maybe even
-    networking, without duplication or extra copies of data/descriptor fields
-    themselves in the process
-vii.Ability to handle the possibility of splits/merges as the structure passes
-    through layered drivers (lvm, md, evms), with minimal overhead.
-
-The solution was to define a new structure (bio)  for the block layer,
-instead of using the buffer head structure (bh) directly, the idea being
-avoidance of some associated baggage and limitations. The bio structure
-is uniformly used for all i/o at the block layer ; it forms a part of the
-bh structure for buffered i/o, and in the case of raw/direct i/o kiobufs are
-mapped to bio structures.
-
-2.2 The bio struct
-
-The bio structure uses a vector representation pointing to an array of tuples
-of <page, offset, len> to describe the i/o buffer, and has various other
-fields describing i/o parameters and state that needs to be maintained for
-performing the i/o.
-
-Notice that this representation means that a bio has no virtual address
-mapping at all (unlike buffer heads).
-
-struct bio_vec {
-       struct page     *bv_page;
-       unsigned short  bv_len;
-       unsigned short  bv_offset;
-};
-
-/*
- * main unit of I/O for the block layer and lower layers (ie drivers)
- */
-struct bio {
-       struct bio          *bi_next;    /* request queue link */
-       struct block_device *bi_bdev;	/* target device */
-       unsigned long       bi_flags;    /* status, command, etc */
-       unsigned long       bi_opf;       /* low bits: r/w, high: priority */
-
-       unsigned int	bi_vcnt;     /* how may bio_vec's */
-       struct bvec_iter	bi_iter;	/* current index into bio_vec array */
-
-       unsigned int	bi_size;     /* total size in bytes */
-       unsigned short	bi_hw_segments; /* segments after DMA remapping */
-       unsigned int	bi_max;	     /* max bio_vecs we can hold
-                                        used as index into pool */
-       struct bio_vec   *bi_io_vec;  /* the actual vec list */
-       bio_end_io_t	*bi_end_io;  /* bi_end_io (bio) */
-       atomic_t		bi_cnt;	     /* pin count: free when it hits zero */
-       void             *bi_private;
-};
-
-With this multipage bio design:
-
-- Large i/os can be sent down in one go using a bio_vec list consisting
-  of an array of <page, offset, len> fragments (similar to the way fragments
-  are represented in the zero-copy network code)
-- Splitting of an i/o request across multiple devices (as in the case of
-  lvm or raid) is achieved by cloning the bio (where the clone points to
-  the same bi_io_vec array, but with the index and size accordingly modified)
-- A linked list of bios is used as before for unrelated merges (*) - this
-  avoids reallocs and makes independent completions easier to handle.
-- Code that traverses the req list can find all the segments of a bio
-  by using rq_for_each_segment.  This handles the fact that a request
-  has multiple bios, each of which can have multiple segments.
-- Drivers which can't process a large bio in one shot can use the bi_iter
-  field to keep track of the next bio_vec entry to process.
-  (e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
-  [TBD: Should preferably also have a bi_voffset and bi_vlen to avoid modifying
-   bi_offset an len fields]
-
-(*) unrelated merges -- a request ends up containing two or more bios that
-    didn't originate from the same place.
-
-bi_end_io() i/o callback gets called on i/o completion of the entire bio.
-
-At a lower level, drivers build a scatter gather list from the merged bios.
-The scatter gather list is in the form of an array of <page, offset, len>
-entries with their corresponding dma address mappings filled in at the
-appropriate time. As an optimization, contiguous physical pages can be
-covered by a single entry where <page> refers to the first page and <len>
-covers the range of pages (up to 16 contiguous pages could be covered this
-way). There is a helper routine (blk_rq_map_sg) which drivers can use to build
-the sg list.
-
-Note: Right now the only user of bios with more than one page is ll_rw_kio,
-which in turn means that only raw I/O uses it (direct i/o may not work
-right now). The intent however is to enable clustering of pages etc to
-become possible. The pagebuf abstraction layer from SGI also uses multi-page
-bios, but that is currently not included in the stock development kernels.
-The same is true of Andrew Morton's work-in-progress multipage bio writeout 
-and readahead patches.
-
-2.3 Changes in the Request Structure
-
-The request structure is the structure that gets passed down to low level
-drivers. The block layer make_request function builds up a request structure,
-places it on the queue and invokes the drivers request_fn. The driver makes
-use of block layer helper routine elv_next_request to pull the next request
-off the queue. Control or diagnostic functions might bypass block and directly
-invoke underlying driver entry points passing in a specially constructed
-request structure.
-
-Only some relevant fields (mainly those which changed or may be referred
-to in some of the discussion here) are listed below, not necessarily in
-the order in which they occur in the structure (see include/linux/blkdev.h)
-Refer to Documentation/block/request.txt for details about all the request
-structure fields and a quick reference about the layers which are
-supposed to use or modify those fields.
-
-struct request {
-	struct list_head queuelist;  /* Not meant to be directly accessed by
-					the driver.
-					Used by q->elv_next_request_fn
-					rq->queue is gone
-					*/
-	.
-	.
-	unsigned char cmd[16]; /* prebuilt command data block */
-	unsigned long flags;   /* also includes earlier rq->cmd settings */
-	.
-	.
-	sector_t sector; /* this field is now of type sector_t instead of int
-			    preparation for 64 bit sectors */
-	.
-	.
-
-	/* Number of scatter-gather DMA addr+len pairs after
-	 * physical address coalescing is performed.
-	 */
-	unsigned short nr_phys_segments;
-
-	/* Number of scatter-gather addr+len pairs after
-	 * physical and DMA remapping hardware coalescing is performed.
-	 * This is the number of scatter-gather entries the driver
-	 * will actually have to deal with after DMA mapping is done.
-	 */
-	unsigned short nr_hw_segments;
-
-	/* Various sector counts */
-	unsigned long nr_sectors;  /* no. of sectors left: driver modifiable */
-	unsigned long hard_nr_sectors;  /* block internal copy of above */
-	unsigned int current_nr_sectors; /* no. of sectors left in the
-					   current segment:driver modifiable */
-	unsigned long hard_cur_sectors; /* block internal copy of the above */
-	.
-	.
-	int tag;	/* command tag associated with request */
-	void *special;  /* same as before */
-	char *buffer;   /* valid only for low memory buffers up to
-			 current_nr_sectors */
-	.
-	.
-	struct bio *bio, *biotail;  /* bio list instead of bh */
-	struct request_list *rl;
-}
-	
-See the req_ops and req_flag_bits definitions for an explanation of the various
-flags available. Some bits are used by the block layer or i/o scheduler.
-	
-The behaviour of the various sector counts are almost the same as before,
-except that since we have multi-segment bios, current_nr_sectors refers
-to the numbers of sectors in the current segment being processed which could
-be one of the many segments in the current bio (i.e i/o completion unit).
-The nr_sectors value refers to the total number of sectors in the whole
-request that remain to be transferred (no change). The purpose of the
-hard_xxx values is for block to remember these counts every time it hands
-over the request to the driver. These values are updated by block on
-end_that_request_first, i.e. every time the driver completes a part of the
-transfer and invokes block end*request helpers to mark this. The
-driver should not modify these values. The block layer sets up the
-nr_sectors and current_nr_sectors fields (based on the corresponding
-hard_xxx values and the number of bytes transferred) and updates it on
-every transfer that invokes end_that_request_first. It does the same for the
-buffer, bio, bio->bi_iter fields too.
-
-The buffer field is just a virtual address mapping of the current segment
-of the i/o buffer in cases where the buffer resides in low-memory. For high
-memory i/o, this field is not valid and must not be used by drivers.
-
-Code that sets up its own request structures and passes them down to
-a driver needs to be careful about interoperation with the block layer helper
-functions which the driver uses. (Section 1.3)
-
-3. Using bios
-
-3.1 Setup/Teardown
-
-There are routines for managing the allocation, and reference counting, and
-freeing of bios (bio_alloc, bio_get, bio_put).
-
-This makes use of Ingo Molnar's mempool implementation, which enables
-subsystems like bio to maintain their own reserve memory pools for guaranteed
-deadlock-free allocations during extreme VM load. For example, the VM
-subsystem makes use of the block layer to writeout dirty pages in order to be
-able to free up memory space, a case which needs careful handling. The
-allocation logic draws from the preallocated emergency reserve in situations
-where it cannot allocate through normal means. If the pool is empty and it
-can wait, then it would trigger action that would help free up memory or
-replenish the pool (without deadlocking) and wait for availability in the pool.
-If it is in IRQ context, and hence not in a position to do this, allocation
-could fail if the pool is empty. In general mempool always first tries to
-perform allocation without having to wait, even if it means digging into the
-pool as long it is not less that 50% full.
-
-On a free, memory is released to the pool or directly freed depending on
-the current availability in the pool. The mempool interface lets the
-subsystem specify the routines to be used for normal alloc and free. In the
-case of bio, these routines make use of the standard slab allocator.
-
-The caller of bio_alloc is expected to taken certain steps to avoid
-deadlocks, e.g. avoid trying to allocate more memory from the pool while
-already holding memory obtained from the pool.
-[TBD: This is a potential issue, though a rare possibility
- in the bounce bio allocation that happens in the current code, since
- it ends up allocating a second bio from the same pool while
- holding the original bio ]
-
-Memory allocated from the pool should be released back within a limited
-amount of time (in the case of bio, that would be after the i/o is completed).
-This ensures that if part of the pool has been used up, some work (in this
-case i/o) must already be in progress and memory would be available when it
-is over. If allocating from multiple pools in the same code path, the order
-or hierarchy of allocation needs to be consistent, just the way one deals
-with multiple locks.
-
-The bio_alloc routine also needs to allocate the bio_vec_list (bvec_alloc())
-for a non-clone bio. There are the 6 pools setup for different size biovecs,
-so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the
-given size from these slabs.
-
-The bio_get() routine may be used to hold an extra reference on a bio prior
-to i/o submission, if the bio fields are likely to be accessed after the
-i/o is issued (since the bio may otherwise get freed in case i/o completion
-happens in the meantime).
-
-The bio_clone_fast() routine may be used to duplicate a bio, where the clone
-shares the bio_vec_list with the original bio (i.e. both point to the
-same bio_vec_list). This would typically be used for splitting i/o requests
-in lvm or md.
-
-3.2 Generic bio helper Routines
-
-3.2.1 Traversing segments and completion units in a request
-
-The macro rq_for_each_segment() should be used for traversing the bios
-in the request list (drivers should avoid directly trying to do it
-themselves). Using these helpers should also make it easier to cope
-with block changes in the future.
-
-	struct req_iterator iter;
-	rq_for_each_segment(bio_vec, rq, iter)
-		/* bio_vec is now current segment */
-
-I/O completion callbacks are per-bio rather than per-segment, so drivers
-that traverse bio chains on completion need to keep that in mind. Drivers
-which don't make a distinction between segments and completion units would
-need to be reorganized to support multi-segment bios.
-
-3.2.2 Setting up DMA scatterlists
-
-The blk_rq_map_sg() helper routine would be used for setting up scatter
-gather lists from a request, so a driver need not do it on its own.
-
-	nr_segments = blk_rq_map_sg(q, rq, scatterlist);
-
-The helper routine provides a level of abstraction which makes it easier
-to modify the internals of request to scatterlist conversion down the line
-without breaking drivers. The blk_rq_map_sg routine takes care of several
-things like collapsing physically contiguous segments (if QUEUE_FLAG_CLUSTER
-is set) and correct segment accounting to avoid exceeding the limits which
-the i/o hardware can handle, based on various queue properties.
-
-- Prevents a clustered segment from crossing a 4GB mem boundary
-- Avoids building segments that would exceed the number of physical
-  memory segments that the driver can handle (phys_segments) and the
-  number that the underlying hardware can handle at once, accounting for
-  DMA remapping (hw_segments)  (i.e. IOMMU aware limits).
-
-Routines which the low level driver can use to set up the segment limits:
-
-blk_queue_max_hw_segments() : Sets an upper limit of the maximum number of
-hw data segments in a request (i.e. the maximum number of address/length
-pairs the host adapter can actually hand to the device at once)
-
-blk_queue_max_phys_segments() : Sets an upper limit on the maximum number
-of physical data segments in a request (i.e. the largest sized scatter list
-a driver could handle)
-
-3.2.3 I/O completion
-
-The existing generic block layer helper routines end_request,
-end_that_request_first and end_that_request_last can be used for i/o
-completion (and setting things up so the rest of the i/o or the next
-request can be kicked of) as before. With the introduction of multi-page
-bio support, end_that_request_first requires an additional argument indicating
-the number of sectors completed.
-
-3.2.4 Implications for drivers that do not interpret bios (don't handle
- multiple segments)
-
-Drivers that do not interpret bios e.g those which do not handle multiple
-segments and do not support i/o into high memory addresses (require bounce
-buffers) and expect only virtually mapped buffers, can access the rq->buffer
-field. As before the driver should use current_nr_sectors to determine the
-size of remaining data in the current segment (that is the maximum it can
-transfer in one go unless it interprets segments), and rely on the block layer
-end_request, or end_that_request_first/last to take care of all accounting
-and transparent mapping of the next bio segment when a segment boundary
-is crossed on completion of a transfer. (The end*request* functions should
-be used if only if the request has come down from block/bio path, not for
-direct access requests which only specify rq->buffer without a valid rq->bio)
-
-3.3 I/O Submission
-
-The routine submit_bio() is used to submit a single io. Higher level i/o
-routines make use of this:
-
-(a) Buffered i/o:
-The routine submit_bh() invokes submit_bio() on a bio corresponding to the
-bh, allocating the bio if required. ll_rw_block() uses submit_bh() as before.
-
-(b) Kiobuf i/o (for raw/direct i/o):
-The ll_rw_kio() routine breaks up the kiobuf into page sized chunks and
-maps the array to one or more multi-page bios, issuing submit_bio() to
-perform the i/o on each of these.
-
-The embedded bh array in the kiobuf structure has been removed and no
-preallocation of bios is done for kiobufs. [The intent is to remove the
-blocks array as well, but it's currently in there to kludge around direct i/o.]
-Thus kiobuf allocation has switched back to using kmalloc rather than vmalloc.
-
-Todo/Observation:
-
- A single kiobuf structure is assumed to correspond to a contiguous range
- of data, so brw_kiovec() invokes ll_rw_kio for each kiobuf in a kiovec.
- So right now it wouldn't work for direct i/o on non-contiguous blocks.
- This is to be resolved.  The eventual direction is to replace kiobuf
- by kvec's.
-
- Badari Pulavarty has a patch to implement direct i/o correctly using
- bio and kvec.
-
-
-(c) Page i/o:
-Todo/Under discussion:
-
- Andrew Morton's multi-page bio patches attempt to issue multi-page
- writeouts (and reads) from the page cache, by directly building up
- large bios for submission completely bypassing the usage of buffer
- heads. This work is still in progress.
-
- Christoph Hellwig had some code that uses bios for page-io (rather than
- bh). This isn't included in bio as yet. Christoph was also working on a
- design for representing virtual/real extents as an entity and modifying
- some of the address space ops interfaces to utilize this abstraction rather
- than buffer_heads. (This is somewhat along the lines of the SGI XFS pagebuf
- abstraction, but intended to be as lightweight as possible).
-
-(d) Direct access i/o:
-Direct access requests that do not contain bios would be submitted differently
-as discussed earlier in section 1.3.
-
-Aside:
-
-  Kvec i/o:
-
-  Ben LaHaise's aio code uses a slightly different structure instead
-  of kiobufs, called a kvec_cb. This contains an array of <page, offset, len>
-  tuples (very much like the networking code), together with a callback function
-  and data pointer. This is embedded into a brw_cb structure when passed
-  to brw_kvec_async().
-
-  Now it should be possible to directly map these kvecs to a bio. Just as while
-  cloning, in this case rather than PRE_BUILT bio_vecs, we set the bi_io_vec
-  array pointer to point to the veclet array in kvecs.
-
-  TBD: In order for this to work, some changes are needed in the way multi-page
-  bios are handled today. The values of the tuples in such a vector passed in
-  from higher level code should not be modified by the block layer in the course
-  of its request processing, since that would make it hard for the higher layer
-  to continue to use the vector descriptor (kvec) after i/o completes. Instead,
-  all such transient state should either be maintained in the request structure,
-  and passed on in some way to the endio completion routine.
-
-
-4. The I/O scheduler
-I/O scheduler, a.k.a. elevator, is implemented in two layers.  Generic dispatch
-queue and specific I/O schedulers.  Unless stated otherwise, elevator is used
-to refer to both parts and I/O scheduler to specific I/O schedulers.
-
-Block layer implements generic dispatch queue in block/*.c.
-The generic dispatch queue is responsible for requeueing, handling non-fs
-requests and all other subtleties.
-
-Specific I/O schedulers are responsible for ordering normal filesystem
-requests.  They can also choose to delay certain requests to improve
-throughput or whatever purpose.  As the plural form indicates, there are
-multiple I/O schedulers.  They can be built as modules but at least one should
-be built inside the kernel.  Each queue can choose different one and can also
-change to another one dynamically.
-
-A block layer call to the i/o scheduler follows the convention elv_xxx(). This
-calls elevator_xxx_fn in the elevator switch (block/elevator.c). Oh, xxx
-and xxx might not match exactly, but use your imagination. If an elevator
-doesn't implement a function, the switch does nothing or some minimal house
-keeping work.
-
-4.1. I/O scheduler API
-
-The functions an elevator may implement are: (* are mandatory)
-elevator_merge_fn		called to query requests for merge with a bio
-
-elevator_merge_req_fn		called when two requests get merged. the one
-				which gets merged into the other one will be
-				never seen by I/O scheduler again. IOW, after
-				being merged, the request is gone.
-
-elevator_merged_fn		called when a request in the scheduler has been
-				involved in a merge. It is used in the deadline
-				scheduler for example, to reposition the request
-				if its sorting order has changed.
-
-elevator_allow_merge_fn		called whenever the block layer determines
-				that a bio can be merged into an existing
-				request safely. The io scheduler may still
-				want to stop a merge at this point if it
-				results in some sort of conflict internally,
-				this hook allows it to do that. Note however
-				that two *requests* can still be merged at later
-				time. Currently the io scheduler has no way to
-				prevent that. It can only learn about the fact
-				from elevator_merge_req_fn callback.
-
-elevator_dispatch_fn*		fills the dispatch queue with ready requests.
-				I/O schedulers are free to postpone requests by
-				not filling the dispatch queue unless @force
-				is non-zero.  Once dispatched, I/O schedulers
-				are not allowed to manipulate the requests -
-				they belong to generic dispatch queue.
-
-elevator_add_req_fn*		called to add a new request into the scheduler
-
-elevator_former_req_fn
-elevator_latter_req_fn		These return the request before or after the
-				one specified in disk sort order. Used by the
-				block layer to find merge possibilities.
-
-elevator_completed_req_fn	called when a request is completed.
-
-elevator_may_queue_fn		returns true if the scheduler wants to allow the
-				current context to queue a new request even if
-				it is over the queue limit. This must be used
-				very carefully!!
-
-elevator_set_req_fn
-elevator_put_req_fn		Must be used to allocate and free any elevator
-				specific storage for a request.
-
-elevator_activate_req_fn	Called when device driver first sees a request.
-				I/O schedulers can use this callback to
-				determine when actual execution of a request
-				starts.
-elevator_deactivate_req_fn	Called when device driver decides to delay
-				a request by requeueing it.
-
-elevator_init_fn*
-elevator_exit_fn		Allocate and free any elevator specific storage
-				for a queue.
-
-4.2 Request flows seen by I/O schedulers
-All requests seen by I/O schedulers strictly follow one of the following three
-flows.
-
- set_req_fn ->
-
- i.   add_req_fn -> (merged_fn ->)* -> dispatch_fn -> activate_req_fn ->
-      (deactivate_req_fn -> activate_req_fn ->)* -> completed_req_fn
- ii.  add_req_fn -> (merged_fn ->)* -> merge_req_fn
- iii. [none]
-
- -> put_req_fn
-
-4.3 I/O scheduler implementation
-The generic i/o scheduler algorithm attempts to sort/merge/batch requests for
-optimal disk scan and request servicing performance (based on generic
-principles and device capabilities), optimized for:
-i.   improved throughput
-ii.  improved latency
-iii. better utilization of h/w & CPU time
-
-Characteristics:
-
-i. Binary tree
-AS and deadline i/o schedulers use red black binary trees for disk position
-sorting and searching, and a fifo linked list for time-based searching. This
-gives good scalability and good availability of information. Requests are
-almost always dispatched in disk sort order, so a cache is kept of the next
-request in sort order to prevent binary tree lookups.
-
-This arrangement is not a generic block layer characteristic however, so
-elevators may implement queues as they please.
-
-ii. Merge hash
-AS and deadline use a hash table indexed by the last sector of a request. This
-enables merging code to quickly look up "back merge" candidates, even when
-multiple I/O streams are being performed at once on one disk.
-
-"Front merges", a new request being merged at the front of an existing request,
-are far less common than "back merges" due to the nature of most I/O patterns.
-Front merges are handled by the binary trees in AS and deadline schedulers.
-
-iii. Plugging the queue to batch requests in anticipation of opportunities for
-     merge/sort optimizations
-
-Plugging is an approach that the current i/o scheduling algorithm resorts to so
-that it collects up enough requests in the queue to be able to take
-advantage of the sorting/merging logic in the elevator. If the
-queue is empty when a request comes in, then it plugs the request queue
-(sort of like plugging the bath tub of a vessel to get fluid to build up)
-till it fills up with a few more requests, before starting to service
-the requests. This provides an opportunity to merge/sort the requests before
-passing them down to the device. There are various conditions when the queue is
-unplugged (to open up the flow again), either through a scheduled task or
-could be on demand. For example wait_on_buffer sets the unplugging going
-through sync_buffer() running blk_run_address_space(mapping). Or the caller
-can do it explicity through blk_unplug(bdev). So in the read case,
-the queue gets explicitly unplugged as part of waiting for completion on that
-buffer.
-
-Aside:
-  This is kind of controversial territory, as it's not clear if plugging is
-  always the right thing to do. Devices typically have their own queues,
-  and allowing a big queue to build up in software, while letting the device be
-  idle for a while may not always make sense. The trick is to handle the fine
-  balance between when to plug and when to open up. Also now that we have
-  multi-page bios being queued in one shot, we may not need to wait to merge
-  a big request from the broken up pieces coming by.
-
-4.4 I/O contexts
-I/O contexts provide a dynamically allocated per process data area. They may
-be used in I/O schedulers, and in the block layer (could be used for IO statis,
-priorities for example). See *io_context in block/ll_rw_blk.c, and as-iosched.c
-for an example of usage in an i/o scheduler.
-
-
-5. Scalability related changes
-
-5.1 Granular Locking: io_request_lock replaced by a per-queue lock
-
-The global io_request_lock has been removed as of 2.5, to avoid
-the scalability bottleneck it was causing, and has been replaced by more
-granular locking. The request queue structure has a pointer to the
-lock to be used for that queue. As a result, locking can now be
-per-queue, with a provision for sharing a lock across queues if
-necessary (e.g the scsi layer sets the queue lock pointers to the
-corresponding adapter lock, which results in a per host locking
-granularity). The locking semantics are the same, i.e. locking is
-still imposed by the block layer, grabbing the lock before
-request_fn execution which it means that lots of older drivers
-should still be SMP safe. Drivers are free to drop the queue
-lock themselves, if required. Drivers that explicitly used the
-io_request_lock for serialization need to be modified accordingly.
-Usually it's as easy as adding a global lock:
-
-	static DEFINE_SPINLOCK(my_driver_lock);
-
-and passing the address to that lock to blk_init_queue().
-
-5.2 64 bit sector numbers (sector_t prepares for 64 bit support)
-
-The sector number used in the bio structure has been changed to sector_t,
-which could be defined as 64 bit in preparation for 64 bit sector support.
-
-6. Other Changes/Implications
-
-6.1 Partition re-mapping handled by the generic block layer
-
-In 2.5 some of the gendisk/partition related code has been reorganized.
-Now the generic block layer performs partition-remapping early and thus
-provides drivers with a sector number relative to whole device, rather than
-having to take partition number into account in order to arrive at the true
-sector number. The routine blk_partition_remap() is invoked by
-generic_make_request even before invoking the queue specific make_request_fn,
-so the i/o scheduler also gets to operate on whole disk sector numbers. This
-should typically not require changes to block drivers, it just never gets
-to invoke its own partition sector offset calculations since all bios
-sent are offset from the beginning of the device.
-
-
-7. A Few Tips on Migration of older drivers
-
-Old-style drivers that just use CURRENT and ignores clustered requests,
-may not need much change.  The generic layer will automatically handle
-clustered requests, multi-page bios, etc for the driver.
-
-For a low performance driver or hardware that is PIO driven or just doesn't
-support scatter-gather changes should be minimal too.
-
-The following are some points to keep in mind when converting old drivers
-to bio.
-
-Drivers should use elv_next_request to pick up requests and are no longer
-supposed to handle looping directly over the request list.
-(struct request->queue has been removed)
-
-Now end_that_request_first takes an additional number_of_sectors argument.
-It used to handle always just the first buffer_head in a request, now
-it will loop and handle as many sectors (on a bio-segment granularity)
-as specified.
-
-Now bh->b_end_io is replaced by bio->bi_end_io, but most of the time the
-right thing to use is bio_endio(bio) instead.
-
-If the driver is dropping the io_request_lock from its request_fn strategy,
-then it just needs to replace that with q->queue_lock instead.
-
-As described in Sec 1.1, drivers can set max sector size, max segment size
-etc per queue now. Drivers that used to define their own merge functions i
-to handle things like this can now just use the blk_queue_* functions at
-blk_init_queue time.
-
-Drivers no longer have to map a {partition, sector offset} into the
-correct absolute location anymore, this is done by the block layer, so
-where a driver received a request ala this before:
-
-	rq->rq_dev = mk_kdev(3, 5);	/* /dev/hda5 */
-	rq->sector = 0;			/* first sector on hda5 */
-
-  it will now see
-
-	rq->rq_dev = mk_kdev(3, 0);	/* /dev/hda */
-	rq->sector = 123128;		/* offset from start of disk */
-
-As mentioned, there is no virtual mapping of a bio. For DMA, this is
-not a problem as the driver probably never will need a virtual mapping.
-Instead it needs a bus mapping (dma_map_page for a single segment or
-use dma_map_sg for scatter gather) to be able to ship it to the driver. For
-PIO drivers (or drivers that need to revert to PIO transfer once in a
-while (IDE for example)), where the CPU is doing the actual data
-transfer a virtual mapping is needed. If the driver supports highmem I/O,
-(Sec 1.1, (ii) ) it needs to use kmap_atomic or similar to temporarily map
-a bio into the virtual address space.
-
-
-8. Prior/Related/Impacted patches
-
-8.1. Earlier kiobuf patches (sct/axboe/chait/hch/mkp)
-- orig kiobuf & raw i/o patches (now in 2.4 tree)
-- direct kiobuf based i/o to devices (no intermediate bh's)
-- page i/o using kiobuf
-- kiobuf splitting for lvm (mkp)
-- elevator support for kiobuf request merging (axboe)
-8.2. Zero-copy networking (Dave Miller)
-8.3. SGI XFS - pagebuf patches - use of kiobufs
-8.4. Multi-page pioent patch for bio (Christoph Hellwig)
-8.5. Direct i/o implementation (Andrea Arcangeli) since 2.4.10-pre11
-8.6. Async i/o implementation patch (Ben LaHaise)
-8.7. EVMS layering design (IBM EVMS team)
-8.8. Larger page cache size patch (Ben LaHaise) and
-     Large page size (Daniel Phillips)
-    => larger contiguous physical memory buffers
-8.9. VM reservations patch (Ben LaHaise)
-8.10. Write clustering patches ? (Marcelo/Quintela/Riel ?)
-8.11. Block device in page cache patch (Andrea Archangeli) - now in 2.4.10+
-8.12. Multiple block-size transfers for faster raw i/o (Shailabh Nagar,
-      Badari)
-8.13  Priority based i/o scheduler - prepatches (Arjan van de Ven)
-8.14  IDE Taskfile i/o patch (Andre Hedrick)
-8.15  Multi-page writeout and readahead patches (Andrew Morton)
-8.16  Direct i/o patches for 2.5 using kvec and bio (Badari Pulavarthy)
-
-9. Other References:
-
-9.1 The Splice I/O Model - Larry McVoy (and subsequent discussions on lkml,
-and Linus' comments - Jan 2001)
-9.2 Discussions about kiobuf and bh design on lkml between sct, linus, alan
-et al - Feb-March 2001 (many of the initial thoughts that led to bio were
-brought up in this discussion thread)
-9.3 Discussions on mempool on lkml - Dec 2001.
-
diff --git a/Documentation/block/biovecs.rst b/Documentation/block/biovecs.rst
new file mode 100644
index 000000000000..86fa66c87172
--- /dev/null
+++ b/Documentation/block/biovecs.rst
@@ -0,0 +1,146 @@
+======================================
+Immutable biovecs and biovec iterators
+======================================
+
+Kent Overstreet <kmo@daterainc.com>
+
+As of 3.13, biovecs should never be modified after a bio has been submitted.
+Instead, we have a new struct bvec_iter which represents a range of a biovec -
+the iterator will be modified as the bio is completed, not the biovec.
+
+More specifically, old code that needed to partially complete a bio would
+update bi_sector and bi_size, and advance bi_idx to the next biovec. If it
+ended up partway through a biovec, it would increment bv_offset and decrement
+bv_len by the number of bytes completed in that biovec.
+
+In the new scheme of things, everything that must be mutated in order to
+partially complete a bio is segregated into struct bvec_iter: bi_sector,
+bi_size and bi_idx have been moved there; and instead of modifying bv_offset
+and bv_len, struct bvec_iter has bi_bvec_done, which represents the number of
+bytes completed in the current bvec.
+
+There are a bunch of new helper macros for hiding the gory details - in
+particular, presenting the illusion of partially completed biovecs so that
+normal code doesn't have to deal with bi_bvec_done.
+
+ * Driver code should no longer refer to biovecs directly; we now have
+   bio_iovec() and bio_iter_iovec() macros that return literal struct biovecs,
+   constructed from the raw biovecs but taking into account bi_bvec_done and
+   bi_size.
+
+   bio_for_each_segment() has been updated to take a bvec_iter argument
+   instead of an integer (that corresponded to bi_idx); for a lot of code the
+   conversion just required changing the types of the arguments to
+   bio_for_each_segment().
+
+ * Advancing a bvec_iter is done with bio_advance_iter(); bio_advance() is a
+   wrapper around bio_advance_iter() that operates on bio->bi_iter, and also
+   advances the bio integrity's iter if present.
+
+   There is a lower level advance function - bvec_iter_advance() - which takes
+   a pointer to a biovec, not a bio; this is used by the bio integrity code.
+
+What's all this get us?
+=======================
+
+Having a real iterator, and making biovecs immutable, has a number of
+advantages:
+
+ * Before, iterating over bios was very awkward when you weren't processing
+   exactly one bvec at a time - for example, bio_copy_data() in fs/bio.c,
+   which copies the contents of one bio into another. Because the biovecs
+   wouldn't necessarily be the same size, the old code was tricky convoluted -
+   it had to walk two different bios at the same time, keeping both bi_idx and
+   and offset into the current biovec for each.
+
+   The new code is much more straightforward - have a look. This sort of
+   pattern comes up in a lot of places; a lot of drivers were essentially open
+   coding bvec iterators before, and having common implementation considerably
+   simplifies a lot of code.
+
+ * Before, any code that might need to use the biovec after the bio had been
+   completed (perhaps to copy the data somewhere else, or perhaps to resubmit
+   it somewhere else if there was an error) had to save the entire bvec array
+   - again, this was being done in a fair number of places.
+
+ * Biovecs can be shared between multiple bios - a bvec iter can represent an
+   arbitrary range of an existing biovec, both starting and ending midway
+   through biovecs. This is what enables efficient splitting of arbitrary
+   bios. Note that this means we _only_ use bi_size to determine when we've
+   reached the end of a bio, not bi_vcnt - and the bio_iovec() macro takes
+   bi_size into account when constructing biovecs.
+
+ * Splitting bios is now much simpler. The old bio_split() didn't even work on
+   bios with more than a single bvec! Now, we can efficiently split arbitrary
+   size bios - because the new bio can share the old bio's biovec.
+
+   Care must be taken to ensure the biovec isn't freed while the split bio is
+   still using it, in case the original bio completes first, though. Using
+   bio_chain() when splitting bios helps with this.
+
+ * Submitting partially completed bios is now perfectly fine - this comes up
+   occasionally in stacking block drivers and various code (e.g. md and
+   bcache) had some ugly workarounds for this.
+
+   It used to be the case that submitting a partially completed bio would work
+   fine to _most_ devices, but since accessing the raw bvec array was the
+   norm, not all drivers would respect bi_idx and those would break. Now,
+   since all drivers _must_ go through the bvec iterator - and have been
+   audited to make sure they are - submitting partially completed bios is
+   perfectly fine.
+
+Other implications:
+===================
+
+ * Almost all usage of bi_idx is now incorrect and has been removed; instead,
+   where previously you would have used bi_idx you'd now use a bvec_iter,
+   probably passing it to one of the helper macros.
+
+   I.e. instead of using bio_iovec_idx() (or bio->bi_iovec[bio->bi_idx]), you
+   now use bio_iter_iovec(), which takes a bvec_iter and returns a
+   literal struct bio_vec - constructed on the fly from the raw biovec but
+   taking into account bi_bvec_done (and bi_size).
+
+ * bi_vcnt can't be trusted or relied upon by driver code - i.e. anything that
+   doesn't actually own the bio. The reason is twofold: firstly, it's not
+   actually needed for iterating over the bio anymore - we only use bi_size.
+   Secondly, when cloning a bio and reusing (a portion of) the original bio's
+   biovec, in order to calculate bi_vcnt for the new bio we'd have to iterate
+   over all the biovecs in the new bio - which is silly as it's not needed.
+
+   So, don't use bi_vcnt anymore.
+
+ * The current interface allows the block layer to split bios as needed, so we
+   could eliminate a lot of complexity particularly in stacked drivers. Code
+   that creates bios can then create whatever size bios are convenient, and
+   more importantly stacked drivers don't have to deal with both their own bio
+   size limitations and the limitations of the underlying devices. Thus
+   there's no need to define ->merge_bvec_fn() callbacks for individual block
+   drivers.
+
+Usage of helpers:
+=================
+
+* The following helpers whose names have the suffix of `_all` can only be used
+  on non-BIO_CLONED bio. They are usually used by filesystem code. Drivers
+  shouldn't use them because the bio may have been split before it reached the
+  driver.
+
+::
+
+	bio_for_each_segment_all()
+	bio_first_bvec_all()
+	bio_first_page_all()
+	bio_last_bvec_all()
+
+* The following helpers iterate over single-page segment. The passed 'struct
+  bio_vec' will contain a single-page IO vector during the iteration::
+
+	bio_for_each_segment()
+	bio_for_each_segment_all()
+
+* The following helpers iterate over multi-page bvec. The passed 'struct
+  bio_vec' will contain a multi-page IO vector during the iteration::
+
+	bio_for_each_bvec()
+	rq_for_each_bvec()
diff --git a/Documentation/block/biovecs.txt b/Documentation/block/biovecs.txt
deleted file mode 100644
index ce6eccaf5df7..000000000000
--- a/Documentation/block/biovecs.txt
+++ /dev/null
@@ -1,144 +0,0 @@
-
-Immutable biovecs and biovec iterators:
-=======================================
-
-Kent Overstreet <kmo@daterainc.com>
-
-As of 3.13, biovecs should never be modified after a bio has been submitted.
-Instead, we have a new struct bvec_iter which represents a range of a biovec -
-the iterator will be modified as the bio is completed, not the biovec.
-
-More specifically, old code that needed to partially complete a bio would
-update bi_sector and bi_size, and advance bi_idx to the next biovec. If it
-ended up partway through a biovec, it would increment bv_offset and decrement
-bv_len by the number of bytes completed in that biovec.
-
-In the new scheme of things, everything that must be mutated in order to
-partially complete a bio is segregated into struct bvec_iter: bi_sector,
-bi_size and bi_idx have been moved there; and instead of modifying bv_offset
-and bv_len, struct bvec_iter has bi_bvec_done, which represents the number of
-bytes completed in the current bvec.
-
-There are a bunch of new helper macros for hiding the gory details - in
-particular, presenting the illusion of partially completed biovecs so that
-normal code doesn't have to deal with bi_bvec_done.
-
- * Driver code should no longer refer to biovecs directly; we now have
-   bio_iovec() and bio_iter_iovec() macros that return literal struct biovecs,
-   constructed from the raw biovecs but taking into account bi_bvec_done and
-   bi_size.
-
-   bio_for_each_segment() has been updated to take a bvec_iter argument
-   instead of an integer (that corresponded to bi_idx); for a lot of code the
-   conversion just required changing the types of the arguments to
-   bio_for_each_segment().
-
- * Advancing a bvec_iter is done with bio_advance_iter(); bio_advance() is a
-   wrapper around bio_advance_iter() that operates on bio->bi_iter, and also
-   advances the bio integrity's iter if present.
-
-   There is a lower level advance function - bvec_iter_advance() - which takes
-   a pointer to a biovec, not a bio; this is used by the bio integrity code.
-
-What's all this get us?
-=======================
-
-Having a real iterator, and making biovecs immutable, has a number of
-advantages:
-
- * Before, iterating over bios was very awkward when you weren't processing
-   exactly one bvec at a time - for example, bio_copy_data() in fs/bio.c,
-   which copies the contents of one bio into another. Because the biovecs
-   wouldn't necessarily be the same size, the old code was tricky convoluted -
-   it had to walk two different bios at the same time, keeping both bi_idx and
-   and offset into the current biovec for each.
-
-   The new code is much more straightforward - have a look. This sort of
-   pattern comes up in a lot of places; a lot of drivers were essentially open
-   coding bvec iterators before, and having common implementation considerably
-   simplifies a lot of code.
-
- * Before, any code that might need to use the biovec after the bio had been
-   completed (perhaps to copy the data somewhere else, or perhaps to resubmit
-   it somewhere else if there was an error) had to save the entire bvec array
-   - again, this was being done in a fair number of places.
-
- * Biovecs can be shared between multiple bios - a bvec iter can represent an
-   arbitrary range of an existing biovec, both starting and ending midway
-   through biovecs. This is what enables efficient splitting of arbitrary
-   bios. Note that this means we _only_ use bi_size to determine when we've
-   reached the end of a bio, not bi_vcnt - and the bio_iovec() macro takes
-   bi_size into account when constructing biovecs.
-
- * Splitting bios is now much simpler. The old bio_split() didn't even work on
-   bios with more than a single bvec! Now, we can efficiently split arbitrary
-   size bios - because the new bio can share the old bio's biovec.
-
-   Care must be taken to ensure the biovec isn't freed while the split bio is
-   still using it, in case the original bio completes first, though. Using
-   bio_chain() when splitting bios helps with this.
-
- * Submitting partially completed bios is now perfectly fine - this comes up
-   occasionally in stacking block drivers and various code (e.g. md and
-   bcache) had some ugly workarounds for this.
-
-   It used to be the case that submitting a partially completed bio would work
-   fine to _most_ devices, but since accessing the raw bvec array was the
-   norm, not all drivers would respect bi_idx and those would break. Now,
-   since all drivers _must_ go through the bvec iterator - and have been
-   audited to make sure they are - submitting partially completed bios is
-   perfectly fine.
-
-Other implications:
-===================
-
- * Almost all usage of bi_idx is now incorrect and has been removed; instead,
-   where previously you would have used bi_idx you'd now use a bvec_iter,
-   probably passing it to one of the helper macros.
-
-   I.e. instead of using bio_iovec_idx() (or bio->bi_iovec[bio->bi_idx]), you
-   now use bio_iter_iovec(), which takes a bvec_iter and returns a
-   literal struct bio_vec - constructed on the fly from the raw biovec but
-   taking into account bi_bvec_done (and bi_size).
-
- * bi_vcnt can't be trusted or relied upon by driver code - i.e. anything that
-   doesn't actually own the bio. The reason is twofold: firstly, it's not
-   actually needed for iterating over the bio anymore - we only use bi_size.
-   Secondly, when cloning a bio and reusing (a portion of) the original bio's
-   biovec, in order to calculate bi_vcnt for the new bio we'd have to iterate
-   over all the biovecs in the new bio - which is silly as it's not needed.
-
-   So, don't use bi_vcnt anymore.
-
- * The current interface allows the block layer to split bios as needed, so we
-   could eliminate a lot of complexity particularly in stacked drivers. Code
-   that creates bios can then create whatever size bios are convenient, and
-   more importantly stacked drivers don't have to deal with both their own bio
-   size limitations and the limitations of the underlying devices. Thus
-   there's no need to define ->merge_bvec_fn() callbacks for individual block
-   drivers.
-
-Usage of helpers:
-=================
-
-* The following helpers whose names have the suffix of "_all" can only be used
-on non-BIO_CLONED bio. They are usually used by filesystem code. Drivers
-shouldn't use them because the bio may have been split before it reached the
-driver.
-
-	bio_for_each_segment_all()
-	bio_first_bvec_all()
-	bio_first_page_all()
-	bio_last_bvec_all()
-
-* The following helpers iterate over single-page segment. The passed 'struct
-bio_vec' will contain a single-page IO vector during the iteration
-
-	bio_for_each_segment()
-	bio_for_each_segment_all()
-
-* The following helpers iterate over multi-page bvec. The passed 'struct
-bio_vec' will contain a multi-page IO vector during the iteration
-
-	bio_for_each_bvec()
-	rq_for_each_bvec()
diff --git a/Documentation/block/capability.rst b/Documentation/block/capability.rst
new file mode 100644
index 000000000000..2cf258d64bbe
--- /dev/null
+++ b/Documentation/block/capability.rst
@@ -0,0 +1,18 @@
+===============================
+Generic Block Device Capability
+===============================
+
+This file documents the sysfs file block/<disk>/capability
+
+capability is a hex word indicating which capabilities a specific disk
+supports.  For more information on bits not listed here, see
+include/linux/genhd.h
+
+GENHD_FL_MEDIA_CHANGE_NOTIFY
+----------------------------
+
+Value: 4
+
+When this bit is set, the disk supports Asynchronous Notification
+of media change events.  These events will be broadcast to user
+space via kernel uevent.
diff --git a/Documentation/block/capability.txt b/Documentation/block/capability.txt
deleted file mode 100644
index 2f1729424ef4..000000000000
--- a/Documentation/block/capability.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Generic Block Device Capability
-===============================================================================
-This file documents the sysfs file block/<disk>/capability
-
-capability is a hex word indicating which capabilities a specific disk
-supports.  For more information on bits not listed here, see
-include/linux/genhd.h
-
-Capability				Value
--------------------------------------------------------------------------------
-GENHD_FL_MEDIA_CHANGE_NOTIFY		4
-	When this bit is set, the disk supports Asynchronous Notification
-	of media change events.  These events will be broadcast to user
-	space via kernel uevent.
-
diff --git a/Documentation/block/cmdline-partition.rst b/Documentation/block/cmdline-partition.rst
new file mode 100644
index 000000000000..530bedff548a
--- /dev/null
+++ b/Documentation/block/cmdline-partition.rst
@@ -0,0 +1,53 @@
+==============================================
+Embedded device command line partition parsing
+==============================================
+
+The "blkdevparts" command line option adds support for reading the
+block device partition table from the kernel command line.
+
+It is typically used for fixed block (eMMC) embedded devices.
+It has no MBR, so saves storage space. Bootloader can be easily accessed
+by absolute address of data on the block device.
+Users can easily change the partition.
+
+The format for the command line is just like mtdparts:
+
+blkdevparts=<blkdev-def>[;<blkdev-def>]
+  <blkdev-def> := <blkdev-id>:<partdef>[,<partdef>]
+    <partdef> := <size>[@<offset>](part-name)
+
+<blkdev-id>
+    block device disk name. Embedded device uses fixed block device.
+    Its disk name is also fixed, such as: mmcblk0, mmcblk1, mmcblk0boot0.
+
+<size>
+    partition size, in bytes, such as: 512, 1m, 1G.
+    size may contain an optional suffix of (upper or lower case):
+
+      K, M, G, T, P, E.
+
+    "-" is used to denote all remaining space.
+
+<offset>
+    partition start address, in bytes.
+    offset may contain an optional suffix of (upper or lower case):
+
+      K, M, G, T, P, E.
+
+(part-name)
+    partition name. Kernel sends uevent with "PARTNAME". Application can
+    create a link to block device partition with the name "PARTNAME".
+    User space application can access partition by partition name.
+
+Example:
+
+    eMMC disk names are "mmcblk0" and "mmcblk0boot0".
+
+  bootargs::
+
+    'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
+
+  dmesg::
+
+    mmcblk0: p1(data0) p2(data1) p3()
+    mmcblk0boot0: p1(boot) p2(kernel)
diff --git a/Documentation/block/cmdline-partition.txt b/Documentation/block/cmdline-partition.txt
deleted file mode 100644
index 760a3f7c3ed4..000000000000
--- a/Documentation/block/cmdline-partition.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-Embedded device command line partition parsing
-=====================================================================
-
-The "blkdevparts" command line option adds support for reading the
-block device partition table from the kernel command line.
-
-It is typically used for fixed block (eMMC) embedded devices.
-It has no MBR, so saves storage space. Bootloader can be easily accessed
-by absolute address of data on the block device.
-Users can easily change the partition.
-
-The format for the command line is just like mtdparts:
-
-blkdevparts=<blkdev-def>[;<blkdev-def>]
-  <blkdev-def> := <blkdev-id>:<partdef>[,<partdef>]
-    <partdef> := <size>[@<offset>](part-name)
-
-<blkdev-id>
-    block device disk name. Embedded device uses fixed block device.
-    Its disk name is also fixed, such as: mmcblk0, mmcblk1, mmcblk0boot0.
-
-<size>
-    partition size, in bytes, such as: 512, 1m, 1G.
-    size may contain an optional suffix of (upper or lower case):
-      K, M, G, T, P, E.
-    "-" is used to denote all remaining space.
-
-<offset>
-    partition start address, in bytes.
-    offset may contain an optional suffix of (upper or lower case):
-      K, M, G, T, P, E.
-
-(part-name)
-    partition name. Kernel sends uevent with "PARTNAME". Application can
-    create a link to block device partition with the name "PARTNAME".
-    User space application can access partition by partition name.
-
-Example:
-    eMMC disk names are "mmcblk0" and "mmcblk0boot0".
-
-  bootargs:
-    'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
-
-  dmesg:
-    mmcblk0: p1(data0) p2(data1) p3()
-    mmcblk0boot0: p1(boot) p2(kernel)
diff --git a/Documentation/block/data-integrity.rst b/Documentation/block/data-integrity.rst
new file mode 100644
index 000000000000..4f2452a95c43
--- /dev/null
+++ b/Documentation/block/data-integrity.rst
@@ -0,0 +1,291 @@
+﻿==============
+Data Integrity
+==============
+
+1. Introduction
+===============
+
+Modern filesystems feature checksumming of data and metadata to
+protect against data corruption.  However, the detection of the
+corruption is done at read time which could potentially be months
+after the data was written.  At that point the original data that the
+application tried to write is most likely lost.
+
+The solution is to ensure that the disk is actually storing what the
+application meant it to.  Recent additions to both the SCSI family
+protocols (SBC Data Integrity Field, SCC protection proposal) as well
+as SATA/T13 (External Path Protection) try to remedy this by adding
+support for appending integrity metadata to an I/O.  The integrity
+metadata (or protection information in SCSI terminology) includes a
+checksum for each sector as well as an incrementing counter that
+ensures the individual sectors are written in the right order.  And
+for some protection schemes also that the I/O is written to the right
+place on disk.
+
+Current storage controllers and devices implement various protective
+measures, for instance checksumming and scrubbing.  But these
+technologies are working in their own isolated domains or at best
+between adjacent nodes in the I/O path.  The interesting thing about
+DIF and the other integrity extensions is that the protection format
+is well defined and every node in the I/O path can verify the
+integrity of the I/O and reject it if corruption is detected.  This
+allows not only corruption prevention but also isolation of the point
+of failure.
+
+2. The Data Integrity Extensions
+================================
+
+As written, the protocol extensions only protect the path between
+controller and storage device.  However, many controllers actually
+allow the operating system to interact with the integrity metadata
+(IMD).  We have been working with several FC/SAS HBA vendors to enable
+the protection information to be transferred to and from their
+controllers.
+
+The SCSI Data Integrity Field works by appending 8 bytes of protection
+information to each sector.  The data + integrity metadata is stored
+in 520 byte sectors on disk.  Data + IMD are interleaved when
+transferred between the controller and target.  The T13 proposal is
+similar.
+
+Because it is highly inconvenient for operating systems to deal with
+520 (and 4104) byte sectors, we approached several HBA vendors and
+encouraged them to allow separation of the data and integrity metadata
+scatter-gather lists.
+
+The controller will interleave the buffers on write and split them on
+read.  This means that Linux can DMA the data buffers to and from
+host memory without changes to the page cache.
+
+Also, the 16-bit CRC checksum mandated by both the SCSI and SATA specs
+is somewhat heavy to compute in software.  Benchmarks found that
+calculating this checksum had a significant impact on system
+performance for a number of workloads.  Some controllers allow a
+lighter-weight checksum to be used when interfacing with the operating
+system.  Emulex, for instance, supports the TCP/IP checksum instead.
+The IP checksum received from the OS is converted to the 16-bit CRC
+when writing and vice versa.  This allows the integrity metadata to be
+generated by Linux or the application at very low cost (comparable to
+software RAID5).
+
+The IP checksum is weaker than the CRC in terms of detecting bit
+errors.  However, the strength is really in the separation of the data
+buffers and the integrity metadata.  These two distinct buffers must
+match up for an I/O to complete.
+
+The separation of the data and integrity metadata buffers as well as
+the choice in checksums is referred to as the Data Integrity
+Extensions.  As these extensions are outside the scope of the protocol
+bodies (T10, T13), Oracle and its partners are trying to standardize
+them within the Storage Networking Industry Association.
+
+3. Kernel Changes
+=================
+
+The data integrity framework in Linux enables protection information
+to be pinned to I/Os and sent to/received from controllers that
+support it.
+
+The advantage to the integrity extensions in SCSI and SATA is that
+they enable us to protect the entire path from application to storage
+device.  However, at the same time this is also the biggest
+disadvantage. It means that the protection information must be in a
+format that can be understood by the disk.
+
+Generally Linux/POSIX applications are agnostic to the intricacies of
+the storage devices they are accessing.  The virtual filesystem switch
+and the block layer make things like hardware sector size and
+transport protocols completely transparent to the application.
+
+However, this level of detail is required when preparing the
+protection information to send to a disk.  Consequently, the very
+concept of an end-to-end protection scheme is a layering violation.
+It is completely unreasonable for an application to be aware whether
+it is accessing a SCSI or SATA disk.
+
+The data integrity support implemented in Linux attempts to hide this
+from the application.  As far as the application (and to some extent
+the kernel) is concerned, the integrity metadata is opaque information
+that's attached to the I/O.
+
+The current implementation allows the block layer to automatically
+generate the protection information for any I/O.  Eventually the
+intent is to move the integrity metadata calculation to userspace for
+user data.  Metadata and other I/O that originates within the kernel
+will still use the automatic generation interface.
+
+Some storage devices allow each hardware sector to be tagged with a
+16-bit value.  The owner of this tag space is the owner of the block
+device.  I.e. the filesystem in most cases.  The filesystem can use
+this extra space to tag sectors as they see fit.  Because the tag
+space is limited, the block interface allows tagging bigger chunks by
+way of interleaving.  This way, 8*16 bits of information can be
+attached to a typical 4KB filesystem block.
+
+This also means that applications such as fsck and mkfs will need
+access to manipulate the tags from user space.  A passthrough
+interface for this is being worked on.
+
+
+4. Block Layer Implementation Details
+=====================================
+
+4.1 Bio
+-------
+
+The data integrity patches add a new field to struct bio when
+CONFIG_BLK_DEV_INTEGRITY is enabled.  bio_integrity(bio) returns a
+pointer to a struct bip which contains the bio integrity payload.
+Essentially a bip is a trimmed down struct bio which holds a bio_vec
+containing the integrity metadata and the required housekeeping
+information (bvec pool, vector count, etc.)
+
+A kernel subsystem can enable data integrity protection on a bio by
+calling bio_integrity_alloc(bio).  This will allocate and attach the
+bip to the bio.
+
+Individual pages containing integrity metadata can subsequently be
+attached using bio_integrity_add_page().
+
+bio_free() will automatically free the bip.
+
+
+4.2 Block Device
+----------------
+
+Because the format of the protection data is tied to the physical
+disk, each block device has been extended with a block integrity
+profile (struct blk_integrity).  This optional profile is registered
+with the block layer using blk_integrity_register().
+
+The profile contains callback functions for generating and verifying
+the protection data, as well as getting and setting application tags.
+The profile also contains a few constants to aid in completing,
+merging and splitting the integrity metadata.
+
+Layered block devices will need to pick a profile that's appropriate
+for all subdevices.  blk_integrity_compare() can help with that.  DM
+and MD linear, RAID0 and RAID1 are currently supported.  RAID4/5/6
+will require extra work due to the application tag.
+
+
+5.0 Block Layer Integrity API
+=============================
+
+5.1 Normal Filesystem
+---------------------
+
+    The normal filesystem is unaware that the underlying block device
+    is capable of sending/receiving integrity metadata.  The IMD will
+    be automatically generated by the block layer at submit_bio() time
+    in case of a WRITE.  A READ request will cause the I/O integrity
+    to be verified upon completion.
+
+    IMD generation and verification can be toggled using the::
+
+      /sys/block/<bdev>/integrity/write_generate
+
+    and::
+
+      /sys/block/<bdev>/integrity/read_verify
+
+    flags.
+
+
+5.2 Integrity-Aware Filesystem
+------------------------------
+
+    A filesystem that is integrity-aware can prepare I/Os with IMD
+    attached.  It can also use the application tag space if this is
+    supported by the block device.
+
+
+    `bool bio_integrity_prep(bio);`
+
+      To generate IMD for WRITE and to set up buffers for READ, the
+      filesystem must call bio_integrity_prep(bio).
+
+      Prior to calling this function, the bio data direction and start
+      sector must be set, and the bio should have all data pages
+      added.  It is up to the caller to ensure that the bio does not
+      change while I/O is in progress.
+      Complete bio with error if prepare failed for some reson.
+
+
+5.3 Passing Existing Integrity Metadata
+---------------------------------------
+
+    Filesystems that either generate their own integrity metadata or
+    are capable of transferring IMD from user space can use the
+    following calls:
+
+
+    `struct bip * bio_integrity_alloc(bio, gfp_mask, nr_pages);`
+
+      Allocates the bio integrity payload and hangs it off of the bio.
+      nr_pages indicate how many pages of protection data need to be
+      stored in the integrity bio_vec list (similar to bio_alloc()).
+
+      The integrity payload will be freed at bio_free() time.
+
+
+    `int bio_integrity_add_page(bio, page, len, offset);`
+
+      Attaches a page containing integrity metadata to an existing
+      bio.  The bio must have an existing bip,
+      i.e. bio_integrity_alloc() must have been called.  For a WRITE,
+      the integrity metadata in the pages must be in a format
+      understood by the target device with the notable exception that
+      the sector numbers will be remapped as the request traverses the
+      I/O stack.  This implies that the pages added using this call
+      will be modified during I/O!  The first reference tag in the
+      integrity metadata must have a value of bip->bip_sector.
+
+      Pages can be added using bio_integrity_add_page() as long as
+      there is room in the bip bio_vec array (nr_pages).
+
+      Upon completion of a READ operation, the attached pages will
+      contain the integrity metadata received from the storage device.
+      It is up to the receiver to process them and verify data
+      integrity upon completion.
+
+
+5.4 Registering A Block Device As Capable Of Exchanging Integrity Metadata
+--------------------------------------------------------------------------
+
+    To enable integrity exchange on a block device the gendisk must be
+    registered as capable:
+
+    `int blk_integrity_register(gendisk, blk_integrity);`
+
+      The blk_integrity struct is a template and should contain the
+      following::
+
+        static struct blk_integrity my_profile = {
+            .name                   = "STANDARDSBODY-TYPE-VARIANT-CSUM",
+            .generate_fn            = my_generate_fn,
+	    .verify_fn              = my_verify_fn,
+	    .tuple_size             = sizeof(struct my_tuple_size),
+	    .tag_size               = <tag bytes per hw sector>,
+        };
+
+      'name' is a text string which will be visible in sysfs.  This is
+      part of the userland API so chose it carefully and never change
+      it.  The format is standards body-type-variant.
+      E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC.
+
+      'generate_fn' generates appropriate integrity metadata (for WRITE).
+
+      'verify_fn' verifies that the data buffer matches the integrity
+      metadata.
+
+      'tuple_size' must be set to match the size of the integrity
+      metadata per sector.  I.e. 8 for DIF and EPP.
+
+      'tag_size' must be set to identify how many bytes of tag space
+      are available per hardware sector.  For DIF this is either 2 or
+      0 depending on the value of the Control Mode Page ATO bit.
+
+----------------------------------------------------------------------
+
+2007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
deleted file mode 100644
index 934c44ea0c57..000000000000
--- a/Documentation/block/data-integrity.txt
+++ /dev/null
@@ -1,281 +0,0 @@
-----------------------------------------------------------------------
-1. INTRODUCTION
-
-Modern filesystems feature checksumming of data and metadata to
-protect against data corruption.  However, the detection of the
-corruption is done at read time which could potentially be months
-after the data was written.  At that point the original data that the
-application tried to write is most likely lost.
-
-The solution is to ensure that the disk is actually storing what the
-application meant it to.  Recent additions to both the SCSI family
-protocols (SBC Data Integrity Field, SCC protection proposal) as well
-as SATA/T13 (External Path Protection) try to remedy this by adding
-support for appending integrity metadata to an I/O.  The integrity
-metadata (or protection information in SCSI terminology) includes a
-checksum for each sector as well as an incrementing counter that
-ensures the individual sectors are written in the right order.  And
-for some protection schemes also that the I/O is written to the right
-place on disk.
-
-Current storage controllers and devices implement various protective
-measures, for instance checksumming and scrubbing.  But these
-technologies are working in their own isolated domains or at best
-between adjacent nodes in the I/O path.  The interesting thing about
-DIF and the other integrity extensions is that the protection format
-is well defined and every node in the I/O path can verify the
-integrity of the I/O and reject it if corruption is detected.  This
-allows not only corruption prevention but also isolation of the point
-of failure.
-
-----------------------------------------------------------------------
-2. THE DATA INTEGRITY EXTENSIONS
-
-As written, the protocol extensions only protect the path between
-controller and storage device.  However, many controllers actually
-allow the operating system to interact with the integrity metadata
-(IMD).  We have been working with several FC/SAS HBA vendors to enable
-the protection information to be transferred to and from their
-controllers.
-
-The SCSI Data Integrity Field works by appending 8 bytes of protection
-information to each sector.  The data + integrity metadata is stored
-in 520 byte sectors on disk.  Data + IMD are interleaved when
-transferred between the controller and target.  The T13 proposal is
-similar.
-
-Because it is highly inconvenient for operating systems to deal with
-520 (and 4104) byte sectors, we approached several HBA vendors and
-encouraged them to allow separation of the data and integrity metadata
-scatter-gather lists.
-
-The controller will interleave the buffers on write and split them on
-read.  This means that Linux can DMA the data buffers to and from
-host memory without changes to the page cache.
-
-Also, the 16-bit CRC checksum mandated by both the SCSI and SATA specs
-is somewhat heavy to compute in software.  Benchmarks found that
-calculating this checksum had a significant impact on system
-performance for a number of workloads.  Some controllers allow a
-lighter-weight checksum to be used when interfacing with the operating
-system.  Emulex, for instance, supports the TCP/IP checksum instead.
-The IP checksum received from the OS is converted to the 16-bit CRC
-when writing and vice versa.  This allows the integrity metadata to be
-generated by Linux or the application at very low cost (comparable to
-software RAID5).
-
-The IP checksum is weaker than the CRC in terms of detecting bit
-errors.  However, the strength is really in the separation of the data
-buffers and the integrity metadata.  These two distinct buffers must
-match up for an I/O to complete.
-
-The separation of the data and integrity metadata buffers as well as
-the choice in checksums is referred to as the Data Integrity
-Extensions.  As these extensions are outside the scope of the protocol
-bodies (T10, T13), Oracle and its partners are trying to standardize
-them within the Storage Networking Industry Association.
-
-----------------------------------------------------------------------
-3. KERNEL CHANGES
-
-The data integrity framework in Linux enables protection information
-to be pinned to I/Os and sent to/received from controllers that
-support it.
-
-The advantage to the integrity extensions in SCSI and SATA is that
-they enable us to protect the entire path from application to storage
-device.  However, at the same time this is also the biggest
-disadvantage. It means that the protection information must be in a
-format that can be understood by the disk.
-
-Generally Linux/POSIX applications are agnostic to the intricacies of
-the storage devices they are accessing.  The virtual filesystem switch
-and the block layer make things like hardware sector size and
-transport protocols completely transparent to the application.
-
-However, this level of detail is required when preparing the
-protection information to send to a disk.  Consequently, the very
-concept of an end-to-end protection scheme is a layering violation.
-It is completely unreasonable for an application to be aware whether
-it is accessing a SCSI or SATA disk.
-
-The data integrity support implemented in Linux attempts to hide this
-from the application.  As far as the application (and to some extent
-the kernel) is concerned, the integrity metadata is opaque information
-that's attached to the I/O.
-
-The current implementation allows the block layer to automatically
-generate the protection information for any I/O.  Eventually the
-intent is to move the integrity metadata calculation to userspace for
-user data.  Metadata and other I/O that originates within the kernel
-will still use the automatic generation interface.
-
-Some storage devices allow each hardware sector to be tagged with a
-16-bit value.  The owner of this tag space is the owner of the block
-device.  I.e. the filesystem in most cases.  The filesystem can use
-this extra space to tag sectors as they see fit.  Because the tag
-space is limited, the block interface allows tagging bigger chunks by
-way of interleaving.  This way, 8*16 bits of information can be
-attached to a typical 4KB filesystem block.
-
-This also means that applications such as fsck and mkfs will need
-access to manipulate the tags from user space.  A passthrough
-interface for this is being worked on.
-
-
-----------------------------------------------------------------------
-4. BLOCK LAYER IMPLEMENTATION DETAILS
-
-4.1 BIO
-
-The data integrity patches add a new field to struct bio when
-CONFIG_BLK_DEV_INTEGRITY is enabled.  bio_integrity(bio) returns a
-pointer to a struct bip which contains the bio integrity payload.
-Essentially a bip is a trimmed down struct bio which holds a bio_vec
-containing the integrity metadata and the required housekeeping
-information (bvec pool, vector count, etc.)
-
-A kernel subsystem can enable data integrity protection on a bio by
-calling bio_integrity_alloc(bio).  This will allocate and attach the
-bip to the bio.
-
-Individual pages containing integrity metadata can subsequently be
-attached using bio_integrity_add_page().
-
-bio_free() will automatically free the bip.
-
-
-4.2 BLOCK DEVICE
-
-Because the format of the protection data is tied to the physical
-disk, each block device has been extended with a block integrity
-profile (struct blk_integrity).  This optional profile is registered
-with the block layer using blk_integrity_register().
-
-The profile contains callback functions for generating and verifying
-the protection data, as well as getting and setting application tags.
-The profile also contains a few constants to aid in completing,
-merging and splitting the integrity metadata.
-
-Layered block devices will need to pick a profile that's appropriate
-for all subdevices.  blk_integrity_compare() can help with that.  DM
-and MD linear, RAID0 and RAID1 are currently supported.  RAID4/5/6
-will require extra work due to the application tag.
-
-
-----------------------------------------------------------------------
-5.0 BLOCK LAYER INTEGRITY API
-
-5.1 NORMAL FILESYSTEM
-
-    The normal filesystem is unaware that the underlying block device
-    is capable of sending/receiving integrity metadata.  The IMD will
-    be automatically generated by the block layer at submit_bio() time
-    in case of a WRITE.  A READ request will cause the I/O integrity
-    to be verified upon completion.
-
-    IMD generation and verification can be toggled using the
-
-      /sys/block/<bdev>/integrity/write_generate
-
-    and
-
-      /sys/block/<bdev>/integrity/read_verify
-
-    flags.
-
-
-5.2 INTEGRITY-AWARE FILESYSTEM
-
-    A filesystem that is integrity-aware can prepare I/Os with IMD
-    attached.  It can also use the application tag space if this is
-    supported by the block device.
-
-
-    bool bio_integrity_prep(bio);
-
-      To generate IMD for WRITE and to set up buffers for READ, the
-      filesystem must call bio_integrity_prep(bio).
-
-      Prior to calling this function, the bio data direction and start
-      sector must be set, and the bio should have all data pages
-      added.  It is up to the caller to ensure that the bio does not
-      change while I/O is in progress.
-      Complete bio with error if prepare failed for some reson.
-
-
-5.3 PASSING EXISTING INTEGRITY METADATA
-
-    Filesystems that either generate their own integrity metadata or
-    are capable of transferring IMD from user space can use the
-    following calls:
-
-
-    struct bip * bio_integrity_alloc(bio, gfp_mask, nr_pages);
-
-      Allocates the bio integrity payload and hangs it off of the bio.
-      nr_pages indicate how many pages of protection data need to be
-      stored in the integrity bio_vec list (similar to bio_alloc()).
-
-      The integrity payload will be freed at bio_free() time.
-
-
-    int bio_integrity_add_page(bio, page, len, offset);
-
-      Attaches a page containing integrity metadata to an existing
-      bio.  The bio must have an existing bip,
-      i.e. bio_integrity_alloc() must have been called.  For a WRITE,
-      the integrity metadata in the pages must be in a format
-      understood by the target device with the notable exception that
-      the sector numbers will be remapped as the request traverses the
-      I/O stack.  This implies that the pages added using this call
-      will be modified during I/O!  The first reference tag in the
-      integrity metadata must have a value of bip->bip_sector.
-
-      Pages can be added using bio_integrity_add_page() as long as
-      there is room in the bip bio_vec array (nr_pages).
-
-      Upon completion of a READ operation, the attached pages will
-      contain the integrity metadata received from the storage device.
-      It is up to the receiver to process them and verify data
-      integrity upon completion.
-
-
-5.4 REGISTERING A BLOCK DEVICE AS CAPABLE OF EXCHANGING INTEGRITY
-    METADATA
-
-    To enable integrity exchange on a block device the gendisk must be
-    registered as capable:
-
-    int blk_integrity_register(gendisk, blk_integrity);
-
-      The blk_integrity struct is a template and should contain the
-      following:
-
-        static struct blk_integrity my_profile = {
-            .name                   = "STANDARDSBODY-TYPE-VARIANT-CSUM",
-            .generate_fn            = my_generate_fn,
-       	    .verify_fn              = my_verify_fn,
-	    .tuple_size             = sizeof(struct my_tuple_size),
-	    .tag_size               = <tag bytes per hw sector>,
-        };
-
-      'name' is a text string which will be visible in sysfs.  This is
-      part of the userland API so chose it carefully and never change
-      it.  The format is standards body-type-variant.
-      E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC.
-
-      'generate_fn' generates appropriate integrity metadata (for WRITE).
-
-      'verify_fn' verifies that the data buffer matches the integrity
-      metadata.
-
-      'tuple_size' must be set to match the size of the integrity
-      metadata per sector.  I.e. 8 for DIF and EPP.
-
-      'tag_size' must be set to identify how many bytes of tag space
-      are available per hardware sector.  For DIF this is either 2 or
-      0 depending on the value of the Control Mode Page ATO bit.
-
-----------------------------------------------------------------------
-2007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/Documentation/block/deadline-iosched.rst b/Documentation/block/deadline-iosched.rst
new file mode 100644
index 000000000000..9f5c5a4c370e
--- /dev/null
+++ b/Documentation/block/deadline-iosched.rst
@@ -0,0 +1,72 @@
+==============================
+Deadline IO scheduler tunables
+==============================
+
+This little file attempts to document how the deadline io scheduler works.
+In particular, it will clarify the meaning of the exposed tunables that may be
+of interest to power users.
+
+Selecting IO schedulers
+-----------------------
+Refer to Documentation/block/switching-sched.rst for information on
+selecting an io scheduler on a per-device basis.
+
+------------------------------------------------------------------------------
+
+read_expire	(in ms)
+-----------------------
+
+The goal of the deadline io scheduler is to attempt to guarantee a start
+service time for a request. As we focus mainly on read latencies, this is
+tunable. When a read request first enters the io scheduler, it is assigned
+a deadline that is the current time + the read_expire value in units of
+milliseconds.
+
+
+write_expire	(in ms)
+-----------------------
+
+Similar to read_expire mentioned above, but for writes.
+
+
+fifo_batch	(number of requests)
+------------------------------------
+
+Requests are grouped into ``batches`` of a particular data direction (read or
+write) which are serviced in increasing sector order.  To limit extra seeking,
+deadline expiries are only checked between batches.  fifo_batch controls the
+maximum number of requests per batch.
+
+This parameter tunes the balance between per-request latency and aggregate
+throughput.  When low latency is the primary concern, smaller is better (where
+a value of 1 yields first-come first-served behaviour).  Increasing fifo_batch
+generally improves throughput, at the cost of latency variation.
+
+
+writes_starved	(number of dispatches)
+--------------------------------------
+
+When we have to move requests from the io scheduler queue to the block
+device dispatch queue, we always give a preference to reads. However, we
+don't want to starve writes indefinitely either. So writes_starved controls
+how many times we give preference to reads over writes. When that has been
+done writes_starved number of times, we dispatch some writes based on the
+same criteria as reads.
+
+
+front_merges	(bool)
+----------------------
+
+Sometimes it happens that a request enters the io scheduler that is contiguous
+with a request that is already on the queue. Either it fits in the back of that
+request, or it fits at the front. That is called either a back merge candidate
+or a front merge candidate. Due to the way files are typically laid out,
+back merges are much more common than front merges. For some work loads, you
+may even know that it is a waste of time to spend any time attempting to
+front merge requests. Setting front_merges to 0 disables this functionality.
+Front merges may still occur due to the cached last_merge hint, but since
+that comes at basically 0 cost we leave that on. We simply disable the
+rbtree front sector lookup when the io scheduler merge function is called.
+
+
+Nov 11 2002, Jens Axboe <jens.axboe@oracle.com>
diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.txt
deleted file mode 100644
index 2d82c80322cb..000000000000
--- a/Documentation/block/deadline-iosched.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-Deadline IO scheduler tunables
-==============================
-
-This little file attempts to document how the deadline io scheduler works.
-In particular, it will clarify the meaning of the exposed tunables that may be
-of interest to power users.
-
-Selecting IO schedulers
------------------------
-Refer to Documentation/block/switching-sched.txt for information on
-selecting an io scheduler on a per-device basis.
-
-
-********************************************************************************
-
-
-read_expire	(in ms)
------------
-
-The goal of the deadline io scheduler is to attempt to guarantee a start
-service time for a request. As we focus mainly on read latencies, this is
-tunable. When a read request first enters the io scheduler, it is assigned
-a deadline that is the current time + the read_expire value in units of
-milliseconds.
-
-
-write_expire	(in ms)
------------
-
-Similar to read_expire mentioned above, but for writes.
-
-
-fifo_batch	(number of requests)
-----------
-
-Requests are grouped into ``batches'' of a particular data direction (read or
-write) which are serviced in increasing sector order.  To limit extra seeking,
-deadline expiries are only checked between batches.  fifo_batch controls the
-maximum number of requests per batch.
-
-This parameter tunes the balance between per-request latency and aggregate
-throughput.  When low latency is the primary concern, smaller is better (where
-a value of 1 yields first-come first-served behaviour).  Increasing fifo_batch
-generally improves throughput, at the cost of latency variation.
-
-
-writes_starved	(number of dispatches)
---------------
-
-When we have to move requests from the io scheduler queue to the block
-device dispatch queue, we always give a preference to reads. However, we
-don't want to starve writes indefinitely either. So writes_starved controls
-how many times we give preference to reads over writes. When that has been
-done writes_starved number of times, we dispatch some writes based on the
-same criteria as reads.
-
-
-front_merges	(bool)
-------------
-
-Sometimes it happens that a request enters the io scheduler that is contiguous
-with a request that is already on the queue. Either it fits in the back of that
-request, or it fits at the front. That is called either a back merge candidate
-or a front merge candidate. Due to the way files are typically laid out,
-back merges are much more common than front merges. For some work loads, you
-may even know that it is a waste of time to spend any time attempting to
-front merge requests. Setting front_merges to 0 disables this functionality.
-Front merges may still occur due to the cached last_merge hint, but since
-that comes at basically 0 cost we leave that on. We simply disable the
-rbtree front sector lookup when the io scheduler merge function is called.
-
-
-Nov 11 2002, Jens Axboe <jens.axboe@oracle.com>
-
-
diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst
new file mode 100644
index 000000000000..8cd226a0e86e
--- /dev/null
+++ b/Documentation/block/index.rst
@@ -0,0 +1,25 @@
+:orphan:
+
+=====
+Block
+=====
+
+.. toctree::
+   :maxdepth: 1
+
+   bfq-iosched
+   biodoc
+   biovecs
+   capability
+   cmdline-partition
+   data-integrity
+   deadline-iosched
+   ioprio
+   kyber-iosched
+   null_blk
+   pr
+   queue-sysfs
+   request
+   stat
+   switching-sched
+   writeback_cache_control
diff --git a/Documentation/block/ioprio.rst b/Documentation/block/ioprio.rst
new file mode 100644
index 000000000000..f72b0de65af7
--- /dev/null
+++ b/Documentation/block/ioprio.rst
@@ -0,0 +1,182 @@
+===================
+Block io priorities
+===================
+
+
+Intro
+-----
+
+With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
+priorities are supported for reads on files.  This enables users to io nice
+processes or process groups, similar to what has been possible with cpu
+scheduling for ages.  This document mainly details the current possibilities
+with cfq; other io schedulers do not support io priorities thus far.
+
+Scheduling classes
+------------------
+
+CFQ implements three generic scheduling classes that determine how io is
+served for a process.
+
+IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
+higher priority than any other in the system, processes from this class are
+given first access to the disk every time. Thus it needs to be used with some
+care, one io RT process can starve the entire system. Within the RT class,
+there are 8 levels of class data that determine exactly how much time this
+process needs the disk for on each service. In the future this might change
+to be more directly mappable to performance, by passing in a wanted data
+rate instead.
+
+IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
+for any process that hasn't set a specific io priority. The class data
+determines how much io bandwidth the process will get, it's directly mappable
+to the cpu nice levels just more coarsely implemented. 0 is the highest
+BE prio level, 7 is the lowest. The mapping between cpu nice level and io
+nice level is determined as: io_nice = (cpu_nice + 20) / 5.
+
+IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
+level only get io time when no one else needs the disk. The idle class has no
+class data, since it doesn't really apply here.
+
+Tools
+-----
+
+See below for a sample ionice tool. Usage::
+
+	# ionice -c<class> -n<level> -p<pid>
+
+If pid isn't given, the current process is assumed. IO priority settings
+are inherited on fork, so you can use ionice to start the process at a given
+level::
+
+	# ionice -c2 -n0 /bin/ls
+
+will run ls at the best-effort scheduling class at the highest priority.
+For a running process, you can give the pid instead::
+
+	# ionice -c1 -n2 -p100
+
+will change pid 100 to run at the realtime scheduling class, at priority 2.
+
+ionice.c tool::
+
+  #include <stdio.h>
+  #include <stdlib.h>
+  #include <errno.h>
+  #include <getopt.h>
+  #include <unistd.h>
+  #include <sys/ptrace.h>
+  #include <asm/unistd.h>
+
+  extern int sys_ioprio_set(int, int, int);
+  extern int sys_ioprio_get(int, int);
+
+  #if defined(__i386__)
+  #define __NR_ioprio_set		289
+  #define __NR_ioprio_get		290
+  #elif defined(__ppc__)
+  #define __NR_ioprio_set		273
+  #define __NR_ioprio_get		274
+  #elif defined(__x86_64__)
+  #define __NR_ioprio_set		251
+  #define __NR_ioprio_get		252
+  #elif defined(__ia64__)
+  #define __NR_ioprio_set		1274
+  #define __NR_ioprio_get		1275
+  #else
+  #error "Unsupported arch"
+  #endif
+
+  static inline int ioprio_set(int which, int who, int ioprio)
+  {
+	return syscall(__NR_ioprio_set, which, who, ioprio);
+  }
+
+  static inline int ioprio_get(int which, int who)
+  {
+	return syscall(__NR_ioprio_get, which, who);
+  }
+
+  enum {
+	IOPRIO_CLASS_NONE,
+	IOPRIO_CLASS_RT,
+	IOPRIO_CLASS_BE,
+	IOPRIO_CLASS_IDLE,
+  };
+
+  enum {
+	IOPRIO_WHO_PROCESS = 1,
+	IOPRIO_WHO_PGRP,
+	IOPRIO_WHO_USER,
+  };
+
+  #define IOPRIO_CLASS_SHIFT	13
+
+  const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
+
+  int main(int argc, char *argv[])
+  {
+	int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
+	int c, pid = 0;
+
+	while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
+		switch (c) {
+		case 'n':
+			ioprio = strtol(optarg, NULL, 10);
+			set = 1;
+			break;
+		case 'c':
+			ioprio_class = strtol(optarg, NULL, 10);
+			set = 1;
+			break;
+		case 'p':
+			pid = strtol(optarg, NULL, 10);
+			break;
+		}
+	}
+
+	switch (ioprio_class) {
+		case IOPRIO_CLASS_NONE:
+			ioprio_class = IOPRIO_CLASS_BE;
+			break;
+		case IOPRIO_CLASS_RT:
+		case IOPRIO_CLASS_BE:
+			break;
+		case IOPRIO_CLASS_IDLE:
+			ioprio = 7;
+			break;
+		default:
+			printf("bad prio class %d\n", ioprio_class);
+			return 1;
+	}
+
+	if (!set) {
+		if (!pid && argv[optind])
+			pid = strtol(argv[optind], NULL, 10);
+
+		ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
+
+		printf("pid=%d, %d\n", pid, ioprio);
+
+		if (ioprio == -1)
+			perror("ioprio_get");
+		else {
+			ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
+			ioprio = ioprio & 0xff;
+			printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
+		}
+	} else {
+		if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
+			perror("ioprio_set");
+			return 1;
+		}
+
+		if (argv[optind])
+			execvp(argv[optind], &argv[optind]);
+	}
+
+	return 0;
+  }
+
+
+March 11 2005, Jens Axboe <jens.axboe@oracle.com>
diff --git a/Documentation/block/ioprio.txt b/Documentation/block/ioprio.txt
deleted file mode 100644
index 8ed8c59380b4..000000000000
--- a/Documentation/block/ioprio.txt
+++ /dev/null
@@ -1,183 +0,0 @@
-Block io priorities
-===================
-
-
-Intro
------
-
-With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
-priorities are supported for reads on files.  This enables users to io nice
-processes or process groups, similar to what has been possible with cpu
-scheduling for ages.  This document mainly details the current possibilities
-with cfq; other io schedulers do not support io priorities thus far.
-
-Scheduling classes
-------------------
-
-CFQ implements three generic scheduling classes that determine how io is
-served for a process.
-
-IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
-higher priority than any other in the system, processes from this class are
-given first access to the disk every time. Thus it needs to be used with some
-care, one io RT process can starve the entire system. Within the RT class,
-there are 8 levels of class data that determine exactly how much time this
-process needs the disk for on each service. In the future this might change
-to be more directly mappable to performance, by passing in a wanted data
-rate instead.
-
-IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
-for any process that hasn't set a specific io priority. The class data
-determines how much io bandwidth the process will get, it's directly mappable
-to the cpu nice levels just more coarsely implemented. 0 is the highest
-BE prio level, 7 is the lowest. The mapping between cpu nice level and io
-nice level is determined as: io_nice = (cpu_nice + 20) / 5.
-
-IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
-level only get io time when no one else needs the disk. The idle class has no
-class data, since it doesn't really apply here.
-
-Tools
------
-
-See below for a sample ionice tool. Usage:
-
-# ionice -c<class> -n<level> -p<pid>
-
-If pid isn't given, the current process is assumed. IO priority settings
-are inherited on fork, so you can use ionice to start the process at a given
-level:
-
-# ionice -c2 -n0 /bin/ls
-
-will run ls at the best-effort scheduling class at the highest priority.
-For a running process, you can give the pid instead:
-
-# ionice -c1 -n2 -p100
-
-will change pid 100 to run at the realtime scheduling class, at priority 2.
-
----> snip ionice.c tool <---
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <getopt.h>
-#include <unistd.h>
-#include <sys/ptrace.h>
-#include <asm/unistd.h>
-
-extern int sys_ioprio_set(int, int, int);
-extern int sys_ioprio_get(int, int);
-
-#if defined(__i386__)
-#define __NR_ioprio_set		289
-#define __NR_ioprio_get		290
-#elif defined(__ppc__)
-#define __NR_ioprio_set		273
-#define __NR_ioprio_get		274
-#elif defined(__x86_64__)
-#define __NR_ioprio_set		251
-#define __NR_ioprio_get		252
-#elif defined(__ia64__)
-#define __NR_ioprio_set		1274
-#define __NR_ioprio_get		1275
-#else
-#error "Unsupported arch"
-#endif
-
-static inline int ioprio_set(int which, int who, int ioprio)
-{
-	return syscall(__NR_ioprio_set, which, who, ioprio);
-}
-
-static inline int ioprio_get(int which, int who)
-{
-	return syscall(__NR_ioprio_get, which, who);
-}
-
-enum {
-	IOPRIO_CLASS_NONE,
-	IOPRIO_CLASS_RT,
-	IOPRIO_CLASS_BE,
-	IOPRIO_CLASS_IDLE,
-};
-
-enum {
-	IOPRIO_WHO_PROCESS = 1,
-	IOPRIO_WHO_PGRP,
-	IOPRIO_WHO_USER,
-};
-
-#define IOPRIO_CLASS_SHIFT	13
-
-const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
-
-int main(int argc, char *argv[])
-{
-	int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
-	int c, pid = 0;
-
-	while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
-		switch (c) {
-		case 'n':
-			ioprio = strtol(optarg, NULL, 10);
-			set = 1;
-			break;
-		case 'c':
-			ioprio_class = strtol(optarg, NULL, 10);
-			set = 1;
-			break;
-		case 'p':
-			pid = strtol(optarg, NULL, 10);
-			break;
-		}
-	}
-
-	switch (ioprio_class) {
-		case IOPRIO_CLASS_NONE:
-			ioprio_class = IOPRIO_CLASS_BE;
-			break;
-		case IOPRIO_CLASS_RT:
-		case IOPRIO_CLASS_BE:
-			break;
-		case IOPRIO_CLASS_IDLE:
-			ioprio = 7;
-			break;
-		default:
-			printf("bad prio class %d\n", ioprio_class);
-			return 1;
-	}
-
-	if (!set) {
-		if (!pid && argv[optind])
-			pid = strtol(argv[optind], NULL, 10);
-
-		ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
-
-		printf("pid=%d, %d\n", pid, ioprio);
-
-		if (ioprio == -1)
-			perror("ioprio_get");
-		else {
-			ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
-			ioprio = ioprio & 0xff;
-			printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
-		}
-	} else {
-		if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
-			perror("ioprio_set");
-			return 1;
-		}
-
-		if (argv[optind])
-			execvp(argv[optind], &argv[optind]);
-	}
-
-	return 0;
-}
-
----> snip ionice.c tool <---
-
-
-March 11 2005, Jens Axboe <jens.axboe@oracle.com>
diff --git a/Documentation/block/kyber-iosched.rst b/Documentation/block/kyber-iosched.rst
new file mode 100644
index 000000000000..3e164dd0617c
--- /dev/null
+++ b/Documentation/block/kyber-iosched.rst
@@ -0,0 +1,15 @@
+============================
+Kyber I/O scheduler tunables
+============================
+
+The only two tunables for the Kyber scheduler are the target latencies for
+reads and synchronous writes. Kyber will throttle requests in order to meet
+these target latencies.
+
+read_lat_nsec
+-------------
+Target latency for reads (in nanoseconds).
+
+write_lat_nsec
+--------------
+Target latency for synchronous writes (in nanoseconds).
diff --git a/Documentation/block/kyber-iosched.txt b/Documentation/block/kyber-iosched.txt
deleted file mode 100644
index e94feacd7edc..000000000000
--- a/Documentation/block/kyber-iosched.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Kyber I/O scheduler tunables
-===========================
-
-The only two tunables for the Kyber scheduler are the target latencies for
-reads and synchronous writes. Kyber will throttle requests in order to meet
-these target latencies.
-
-read_lat_nsec
--------------
-Target latency for reads (in nanoseconds).
-
-write_lat_nsec
---------------
-Target latency for synchronous writes (in nanoseconds).
diff --git a/Documentation/block/null_blk.rst b/Documentation/block/null_blk.rst
new file mode 100644
index 000000000000..31451d80783c
--- /dev/null
+++ b/Documentation/block/null_blk.rst
@@ -0,0 +1,126 @@
+========================
+Null block device driver
+========================
+
+1. Overview
+===========
+
+The null block device (/dev/nullb*) is used for benchmarking the various
+block-layer implementations. It emulates a block device of X gigabytes in size.
+The following instances are possible:
+
+  Single-queue block-layer
+
+    - Request-based.
+    - Single submission queue per device.
+    - Implements IO scheduling algorithms (CFQ, Deadline, noop).
+
+  Multi-queue block-layer
+
+    - Request-based.
+    - Configurable submission queues per device.
+
+  No block-layer (Known as bio-based)
+
+    - Bio-based. IO requests are submitted directly to the device driver.
+    - Directly accepts bio data structure and returns them.
+
+All of them have a completion queue for each core in the system.
+
+2. Module parameters applicable for all instances
+=================================================
+
+queue_mode=[0-2]: Default: 2-Multi-queue
+  Selects which block-layer the module should instantiate with.
+
+  =  ============
+  0  Bio-based
+  1  Single-queue
+  2  Multi-queue
+  =  ============
+
+home_node=[0--nr_nodes]: Default: NUMA_NO_NODE
+  Selects what CPU node the data structures are allocated from.
+
+gb=[Size in GB]: Default: 250GB
+  The size of the device reported to the system.
+
+bs=[Block size (in bytes)]: Default: 512 bytes
+  The block size reported to the system.
+
+nr_devices=[Number of devices]: Default: 1
+  Number of block devices instantiated. They are instantiated as /dev/nullb0,
+  etc.
+
+irqmode=[0-2]: Default: 1-Soft-irq
+  The completion mode used for completing IOs to the block-layer.
+
+  =  ===========================================================================
+  0  None.
+  1  Soft-irq. Uses IPI to complete IOs across CPU nodes. Simulates the overhead
+     when IOs are issued from another CPU node than the home the device is
+     connected to.
+  2  Timer: Waits a specific period (completion_nsec) for each IO before
+     completion.
+  =  ===========================================================================
+
+completion_nsec=[ns]: Default: 10,000ns
+  Combined with irqmode=2 (timer). The time each completion event must wait.
+
+submit_queues=[1..nr_cpus]:
+  The number of submission queues attached to the device driver. If unset, it
+  defaults to 1. For multi-queue, it is ignored when use_per_node_hctx module
+  parameter is 1.
+
+hw_queue_depth=[0..qdepth]: Default: 64
+  The hardware queue depth of the device.
+
+III: Multi-queue specific parameters
+
+use_per_node_hctx=[0/1]: Default: 0
+
+  =  =====================================================================
+  0  The number of submit queues are set to the value of the submit_queues
+     parameter.
+  1  The multi-queue block layer is instantiated with a hardware dispatch
+     queue for each CPU node in the system.
+  =  =====================================================================
+
+no_sched=[0/1]: Default: 0
+
+  =  ======================================
+  0  nullb* use default blk-mq io scheduler
+  1  nullb* doesn't use io scheduler
+  =  ======================================
+
+blocking=[0/1]: Default: 0
+
+  =  ===============================================================
+  0  Register as a non-blocking blk-mq driver device.
+  1  Register as a blocking blk-mq driver device, null_blk will set
+     the BLK_MQ_F_BLOCKING flag, indicating that it sometimes/always
+     needs to block in its ->queue_rq() function.
+  =  ===============================================================
+
+shared_tags=[0/1]: Default: 0
+
+  =  ================================================================
+  0  Tag set is not shared.
+  1  Tag set shared between devices for blk-mq. Only makes sense with
+     nr_devices > 1, otherwise there's no tag set to share.
+  =  ================================================================
+
+zoned=[0/1]: Default: 0
+
+  =  ======================================================================
+  0  Block device is exposed as a random-access block device.
+  1  Block device is exposed as a host-managed zoned block device. Requires
+     CONFIG_BLK_DEV_ZONED.
+  =  ======================================================================
+
+zone_size=[MB]: Default: 256
+  Per zone size when exposed as a zoned block device. Must be a power of two.
+
+zone_nr_conv=[nr_conv]: Default: 0
+  The number of conventional zones to create when block device is zoned.  If
+  zone_nr_conv >= nr_zones, it will be reduced to nr_zones - 1.
diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt
deleted file mode 100644
index 41f0a3d33bbd..000000000000
--- a/Documentation/block/null_blk.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-Null block device driver
-================================================================================
-
-I. Overview
-
-The null block device (/dev/nullb*) is used for benchmarking the various
-block-layer implementations. It emulates a block device of X gigabytes in size.
-The following instances are possible:
-
-  Single-queue block-layer
-    - Request-based.
-    - Single submission queue per device.
-    - Implements IO scheduling algorithms (CFQ, Deadline, noop).
-  Multi-queue block-layer
-    - Request-based.
-    - Configurable submission queues per device.
-  No block-layer (Known as bio-based)
-    - Bio-based. IO requests are submitted directly to the device driver.
-    - Directly accepts bio data structure and returns them.
-
-All of them have a completion queue for each core in the system.
-
-II. Module parameters applicable for all instances:
-
-queue_mode=[0-2]: Default: 2-Multi-queue
-  Selects which block-layer the module should instantiate with.
-
-  0: Bio-based.
-  1: Single-queue.
-  2: Multi-queue.
-
-home_node=[0--nr_nodes]: Default: NUMA_NO_NODE
-  Selects what CPU node the data structures are allocated from.
-
-gb=[Size in GB]: Default: 250GB
-  The size of the device reported to the system.
-
-bs=[Block size (in bytes)]: Default: 512 bytes
-  The block size reported to the system.
-
-nr_devices=[Number of devices]: Default: 1
-  Number of block devices instantiated. They are instantiated as /dev/nullb0,
-  etc.
-
-irqmode=[0-2]: Default: 1-Soft-irq
-  The completion mode used for completing IOs to the block-layer.
-
-  0: None.
-  1: Soft-irq. Uses IPI to complete IOs across CPU nodes. Simulates the overhead
-     when IOs are issued from another CPU node than the home the device is
-     connected to.
-  2: Timer: Waits a specific period (completion_nsec) for each IO before
-     completion.
-
-completion_nsec=[ns]: Default: 10,000ns
-  Combined with irqmode=2 (timer). The time each completion event must wait.
-
-submit_queues=[1..nr_cpus]:
-  The number of submission queues attached to the device driver. If unset, it
-  defaults to 1. For multi-queue, it is ignored when use_per_node_hctx module
-  parameter is 1.
-
-hw_queue_depth=[0..qdepth]: Default: 64
-  The hardware queue depth of the device.
-
-III: Multi-queue specific parameters
-
-use_per_node_hctx=[0/1]: Default: 0
-  0: The number of submit queues are set to the value of the submit_queues
-     parameter.
-  1: The multi-queue block layer is instantiated with a hardware dispatch
-     queue for each CPU node in the system.
-
-no_sched=[0/1]: Default: 0
-  0: nullb* use default blk-mq io scheduler.
-  1: nullb* doesn't use io scheduler.
-
-blocking=[0/1]: Default: 0
-  0: Register as a non-blocking blk-mq driver device.
-  1: Register as a blocking blk-mq driver device, null_blk will set
-     the BLK_MQ_F_BLOCKING flag, indicating that it sometimes/always
-     needs to block in its ->queue_rq() function.
-
-shared_tags=[0/1]: Default: 0
-  0: Tag set is not shared.
-  1: Tag set shared between devices for blk-mq. Only makes sense with
-     nr_devices > 1, otherwise there's no tag set to share.
-
-zoned=[0/1]: Default: 0
-  0: Block device is exposed as a random-access block device.
-  1: Block device is exposed as a host-managed zoned block device. Requires
-     CONFIG_BLK_DEV_ZONED.
-
-zone_size=[MB]: Default: 256
-  Per zone size when exposed as a zoned block device. Must be a power of two.
-
-zone_nr_conv=[nr_conv]: Default: 0
-  The number of conventional zones to create when block device is zoned.  If
-  zone_nr_conv >= nr_zones, it will be reduced to nr_zones - 1.
diff --git a/Documentation/block/pr.rst b/Documentation/block/pr.rst
new file mode 100644
index 000000000000..30ea1c2e39eb
--- /dev/null
+++ b/Documentation/block/pr.rst
@@ -0,0 +1,119 @@
+===============================================
+Block layer support for Persistent Reservations
+===============================================
+
+The Linux kernel supports a user space interface for simplified
+Persistent Reservations which map to block devices that support
+these (like SCSI). Persistent Reservations allow restricting
+access to block devices to specific initiators in a shared storage
+setup.
+
+This document gives a general overview of the support ioctl commands.
+For a more detailed reference please refer the the SCSI Primary
+Commands standard, specifically the section on Reservations and the
+"PERSISTENT RESERVE IN" and "PERSISTENT RESERVE OUT" commands.
+
+All implementations are expected to ensure the reservations survive
+a power loss and cover all connections in a multi path environment.
+These behaviors are optional in SPC but will be automatically applied
+by Linux.
+
+
+The following types of reservations are supported:
+--------------------------------------------------
+
+ - PR_WRITE_EXCLUSIVE
+	Only the initiator that owns the reservation can write to the
+	device.  Any initiator can read from the device.
+
+ - PR_EXCLUSIVE_ACCESS
+	Only the initiator that owns the reservation can access the
+	device.
+
+ - PR_WRITE_EXCLUSIVE_REG_ONLY
+	Only initiators with a registered key can write to the device,
+	Any initiator can read from the device.
+
+ - PR_EXCLUSIVE_ACCESS_REG_ONLY
+	Only initiators with a registered key can access the device.
+
+ - PR_WRITE_EXCLUSIVE_ALL_REGS
+
+	Only initiators with a registered key can write to the device,
+	Any initiator can read from the device.
+	All initiators with a registered key are considered reservation
+	holders.
+	Please reference the SPC spec on the meaning of a reservation
+	holder if you want to use this type.
+
+ - PR_EXCLUSIVE_ACCESS_ALL_REGS
+	Only initiators with a registered key can access the device.
+	All initiators with a registered key are considered reservation
+	holders.
+	Please reference the SPC spec on the meaning of a reservation
+	holder if you want to use this type.
+
+
+The following ioctl are supported:
+----------------------------------
+
+1. IOC_PR_REGISTER
+^^^^^^^^^^^^^^^^^^
+
+This ioctl command registers a new reservation if the new_key argument
+is non-null.  If no existing reservation exists old_key must be zero,
+if an existing reservation should be replaced old_key must contain
+the old reservation key.
+
+If the new_key argument is 0 it unregisters the existing reservation passed
+in old_key.
+
+
+2. IOC_PR_RESERVE
+^^^^^^^^^^^^^^^^^
+
+This ioctl command reserves the device and thus restricts access for other
+devices based on the type argument.  The key argument must be the existing
+reservation key for the device as acquired by the IOC_PR_REGISTER,
+IOC_PR_REGISTER_IGNORE, IOC_PR_PREEMPT or IOC_PR_PREEMPT_ABORT commands.
+
+
+3. IOC_PR_RELEASE
+^^^^^^^^^^^^^^^^^
+
+This ioctl command releases the reservation specified by key and flags
+and thus removes any access restriction implied by it.
+
+
+4. IOC_PR_PREEMPT
+^^^^^^^^^^^^^^^^^
+
+This ioctl command releases the existing reservation referred to by
+old_key and replaces it with a new reservation of type for the
+reservation key new_key.
+
+
+5. IOC_PR_PREEMPT_ABORT
+^^^^^^^^^^^^^^^^^^^^^^^
+
+This ioctl command works like IOC_PR_PREEMPT except that it also aborts
+any outstanding command sent over a connection identified by old_key.
+
+6. IOC_PR_CLEAR
+^^^^^^^^^^^^^^^
+
+This ioctl command unregisters both key and any other reservation key
+registered with the device and drops any existing reservation.
+
+
+Flags
+-----
+
+All the ioctls have a flag field.  Currently only one flag is supported:
+
+ - PR_FL_IGNORE_KEY
+	Ignore the existing reservation key.  This is commonly supported for
+	IOC_PR_REGISTER, and some implementation may support the flag for
+	IOC_PR_RESERVE.
+
+For all unknown flags the kernel will return -EOPNOTSUPP.
diff --git a/Documentation/block/pr.txt b/Documentation/block/pr.txt
deleted file mode 100644
index ac9b8e70e64b..000000000000
--- a/Documentation/block/pr.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-
-Block layer support for Persistent Reservations
-===============================================
-
-The Linux kernel supports a user space interface for simplified
-Persistent Reservations which map to block devices that support
-these (like SCSI). Persistent Reservations allow restricting
-access to block devices to specific initiators in a shared storage
-setup.
-
-This document gives a general overview of the support ioctl commands.
-For a more detailed reference please refer the the SCSI Primary
-Commands standard, specifically the section on Reservations and the
-"PERSISTENT RESERVE IN" and "PERSISTENT RESERVE OUT" commands.
-
-All implementations are expected to ensure the reservations survive
-a power loss and cover all connections in a multi path environment.
-These behaviors are optional in SPC but will be automatically applied
-by Linux.
-
-
-The following types of reservations are supported:
---------------------------------------------------
-
- - PR_WRITE_EXCLUSIVE
-
-	Only the initiator that owns the reservation can write to the
-	device.  Any initiator can read from the device.
-
- - PR_EXCLUSIVE_ACCESS
-
-	Only the initiator that owns the reservation can access the
-	device.
-
- - PR_WRITE_EXCLUSIVE_REG_ONLY
-
-	Only initiators with a registered key can write to the device,
-	Any initiator can read from the device.
-
- - PR_EXCLUSIVE_ACCESS_REG_ONLY
-
-	Only initiators with a registered key can access the device.
-
- - PR_WRITE_EXCLUSIVE_ALL_REGS
-
-	Only initiators with a registered key can write to the device,
-	Any initiator can read from the device.
-	All initiators with a registered key are considered reservation
-	holders.
-	Please reference the SPC spec on the meaning of a reservation
-	holder if you want to use this type. 
-
- - PR_EXCLUSIVE_ACCESS_ALL_REGS
-
-	Only initiators with a registered key can access the device.
-	All initiators with a registered key are considered reservation
-	holders.
-	Please reference the SPC spec on the meaning of a reservation
-	holder if you want to use this type. 
-
-
-The following ioctl are supported:
-----------------------------------
-
-1. IOC_PR_REGISTER
-
-This ioctl command registers a new reservation if the new_key argument
-is non-null.  If no existing reservation exists old_key must be zero,
-if an existing reservation should be replaced old_key must contain
-the old reservation key.
-
-If the new_key argument is 0 it unregisters the existing reservation passed
-in old_key.
-
-
-2. IOC_PR_RESERVE
-
-This ioctl command reserves the device and thus restricts access for other
-devices based on the type argument.  The key argument must be the existing
-reservation key for the device as acquired by the IOC_PR_REGISTER,
-IOC_PR_REGISTER_IGNORE, IOC_PR_PREEMPT or IOC_PR_PREEMPT_ABORT commands.
-
-
-3. IOC_PR_RELEASE
-
-This ioctl command releases the reservation specified by key and flags
-and thus removes any access restriction implied by it.
-
-
-4. IOC_PR_PREEMPT
-
-This ioctl command releases the existing reservation referred to by
-old_key and replaces it with a new reservation of type for the
-reservation key new_key.
-
-
-5. IOC_PR_PREEMPT_ABORT
-
-This ioctl command works like IOC_PR_PREEMPT except that it also aborts
-any outstanding command sent over a connection identified by old_key.
-
-6. IOC_PR_CLEAR
-
-This ioctl command unregisters both key and any other reservation key
-registered with the device and drops any existing reservation.
-
-
-Flags
------
-
-All the ioctls have a flag field.  Currently only one flag is supported:
-
- - PR_FL_IGNORE_KEY
-
-	Ignore the existing reservation key.  This is commonly supported for
-	IOC_PR_REGISTER, and some implementation may support the flag for
-	IOC_PR_RESERVE.
-
-For all unknown flags the kernel will return -EOPNOTSUPP.
diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
new file mode 100644
index 000000000000..6a8513af9201
--- /dev/null
+++ b/Documentation/block/queue-sysfs.rst
@@ -0,0 +1,254 @@
+=================
+Queue sysfs files
+=================
+
+This text file will detail the queue files that are located in the sysfs tree
+for each block device. Note that stacked devices typically do not export
+any settings, since their queue merely functions are a remapping target.
+These files are the ones found in the /sys/block/xxx/queue/ directory.
+
+Files denoted with a RO postfix are readonly and the RW postfix means
+read-write.
+
+add_random (RW)
+---------------
+This file allows to turn off the disk entropy contribution. Default
+value of this file is '1'(on).
+
+chunk_sectors (RO)
+------------------
+This has different meaning depending on the type of the block device.
+For a RAID device (dm-raid), chunk_sectors indicates the size in 512B sectors
+of the RAID volume stripe segment. For a zoned block device, either host-aware
+or host-managed, chunk_sectors indicates the size in 512B sectors of the zones
+of the device, with the eventual exception of the last zone of the device which
+may be smaller.
+
+dax (RO)
+--------
+This file indicates whether the device supports Direct Access (DAX),
+used by CPU-addressable storage to bypass the pagecache.  It shows '1'
+if true, '0' if not.
+
+discard_granularity (RO)
+------------------------
+This shows the size of internal allocation of the device in bytes, if
+reported by the device. A value of '0' means device does not support
+the discard functionality.
+
+discard_max_hw_bytes (RO)
+-------------------------
+Devices that support discard functionality may have internal limits on
+the number of bytes that can be trimmed or unmapped in a single operation.
+The discard_max_bytes parameter is set by the device driver to the maximum
+number of bytes that can be discarded in a single operation. Discard
+requests issued to the device must not exceed this limit. A discard_max_bytes
+value of 0 means that the device does not support discard functionality.
+
+discard_max_bytes (RW)
+----------------------
+While discard_max_hw_bytes is the hardware limit for the device, this
+setting is the software limit. Some devices exhibit large latencies when
+large discards are issued, setting this value lower will make Linux issue
+smaller discards and potentially help reduce latencies induced by large
+discard operations.
+
+discard_zeroes_data (RO)
+------------------------
+Obsolete. Always zero.
+
+fua (RO)
+--------
+Whether or not the block driver supports the FUA flag for write requests.
+FUA stands for Force Unit Access. If the FUA flag is set that means that
+write requests must bypass the volatile cache of the storage device.
+
+hw_sector_size (RO)
+-------------------
+This is the hardware sector size of the device, in bytes.
+
+io_poll (RW)
+------------
+When read, this file shows whether polling is enabled (1) or disabled
+(0).  Writing '0' to this file will disable polling for this device.
+Writing any non-zero value will enable this feature.
+
+io_poll_delay (RW)
+------------------
+If polling is enabled, this controls what kind of polling will be
+performed. It defaults to -1, which is classic polling. In this mode,
+the CPU will repeatedly ask for completions without giving up any time.
+If set to 0, a hybrid polling mode is used, where the kernel will attempt
+to make an educated guess at when the IO will complete. Based on this
+guess, the kernel will put the process issuing IO to sleep for an amount
+of time, before entering a classic poll loop. This mode might be a
+little slower than pure classic polling, but it will be more efficient.
+If set to a value larger than 0, the kernel will put the process issuing
+IO to sleep for this amount of microseconds before entering classic
+polling.
+
+io_timeout (RW)
+---------------
+io_timeout is the request timeout in milliseconds. If a request does not
+complete in this time then the block driver timeout handler is invoked.
+That timeout handler can decide to retry the request, to fail it or to start
+a device recovery strategy.
+
+iostats (RW)
+-------------
+This file is used to control (on/off) the iostats accounting of the
+disk.
+
+logical_block_size (RO)
+-----------------------
+This is the logical block size of the device, in bytes.
+
+max_discard_segments (RO)
+-------------------------
+The maximum number of DMA scatter/gather entries in a discard request.
+
+max_hw_sectors_kb (RO)
+----------------------
+This is the maximum number of kilobytes supported in a single data transfer.
+
+max_integrity_segments (RO)
+---------------------------
+Maximum number of elements in a DMA scatter/gather list with integrity
+data that will be submitted by the block layer core to the associated
+block driver.
+
+max_sectors_kb (RW)
+-------------------
+This is the maximum number of kilobytes that the block layer will allow
+for a filesystem request. Must be smaller than or equal to the maximum
+size allowed by the hardware.
+
+max_segments (RO)
+-----------------
+Maximum number of elements in a DMA scatter/gather list that is submitted
+to the associated block driver.
+
+max_segment_size (RO)
+---------------------
+Maximum size in bytes of a single element in a DMA scatter/gather list.
+
+minimum_io_size (RO)
+--------------------
+This is the smallest preferred IO size reported by the device.
+
+nomerges (RW)
+-------------
+This enables the user to disable the lookup logic involved with IO
+merging requests in the block layer. By default (0) all merges are
+enabled. When set to 1 only simple one-hit merges will be tried. When
+set to 2 no merge algorithms will be tried (including one-hit or more
+complex tree/hash lookups).
+
+nr_requests (RW)
+----------------
+This controls how many requests may be allocated in the block layer for
+read or write requests. Note that the total allocated number may be twice
+this amount, since it applies only to reads or writes (not the accumulated
+sum).
+
+To avoid priority inversion through request starvation, a request
+queue maintains a separate request pool per each cgroup when
+CONFIG_BLK_CGROUP is enabled, and this parameter applies to each such
+per-block-cgroup request pool.  IOW, if there are N block cgroups,
+each request queue may have up to N request pools, each independently
+regulated by nr_requests.
+
+nr_zones (RO)
+-------------
+For zoned block devices (zoned attribute indicating "host-managed" or
+"host-aware"), this indicates the total number of zones of the device.
+This is always 0 for regular block devices.
+
+optimal_io_size (RO)
+--------------------
+This is the optimal IO size reported by the device.
+
+physical_block_size (RO)
+------------------------
+This is the physical block size of device, in bytes.
+
+read_ahead_kb (RW)
+------------------
+Maximum number of kilobytes to read-ahead for filesystems on this block
+device.
+
+rotational (RW)
+---------------
+This file is used to stat if the device is of rotational type or
+non-rotational type.
+
+rq_affinity (RW)
+----------------
+If this option is '1', the block layer will migrate request completions to the
+cpu "group" that originally submitted the request. For some workloads this
+provides a significant reduction in CPU cycles due to caching effects.
+
+For storage configurations that need to maximize distribution of completion
+processing setting this option to '2' forces the completion to run on the
+requesting cpu (bypassing the "group" aggregation logic).
+
+scheduler (RW)
+--------------
+When read, this file will display the current and available IO schedulers
+for this block device. The currently active IO scheduler will be enclosed
+in [] brackets. Writing an IO scheduler name to this file will switch
+control of this block device to that new IO scheduler. Note that writing
+an IO scheduler name to this file will attempt to load that IO scheduler
+module, if it isn't already present in the system.
+
+write_cache (RW)
+----------------
+When read, this file will display whether the device has write back
+caching enabled or not. It will return "write back" for the former
+case, and "write through" for the latter. Writing to this file can
+change the kernels view of the device, but it doesn't alter the
+device state. This means that it might not be safe to toggle the
+setting from "write back" to "write through", since that will also
+eliminate cache flushes issued by the kernel.
+
+write_same_max_bytes (RO)
+-------------------------
+This is the number of bytes the device can write in a single write-same
+command.  A value of '0' means write-same is not supported by this
+device.
+
+wbt_lat_usec (RW)
+-----------------
+If the device is registered for writeback throttling, then this file shows
+the target minimum read latency. If this latency is exceeded in a given
+window of time (see wb_window_usec), then the writeback throttling will start
+scaling back writes. Writing a value of '0' to this file disables the
+feature. Writing a value of '-1' to this file resets the value to the
+default setting.
+
+throttle_sample_time (RW)
+-------------------------
+This is the time window that blk-throttle samples data, in millisecond.
+blk-throttle makes decision based on the samplings. Lower time means cgroups
+have more smooth throughput, but higher CPU overhead. This exists only when
+CONFIG_BLK_DEV_THROTTLING_LOW is enabled.
+
+write_zeroes_max_bytes (RO)
+---------------------------
+For block drivers that support REQ_OP_WRITE_ZEROES, the maximum number of
+bytes that can be zeroed at once. The value 0 means that REQ_OP_WRITE_ZEROES
+is not supported.
+
+zoned (RO)
+----------
+This indicates if the device is a zoned block device and the zone model of the
+device if it is indeed zoned. The possible values indicated by zoned are
+"none" for regular block devices and "host-aware" or "host-managed" for zoned
+block devices. The characteristics of host-aware and host-managed zoned block
+devices are described in the ZBC (Zoned Block Commands) and ZAC
+(Zoned Device ATA Command Set) standards. These standards also define the
+"drive-managed" zone model. However, since drive-managed zoned block devices
+do not support zone commands, they will be treated as regular block devices
+and zoned will report "none".
+
+Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
deleted file mode 100644
index b40b5b7cebd9..000000000000
--- a/Documentation/block/queue-sysfs.txt
+++ /dev/null
@@ -1,253 +0,0 @@
-Queue sysfs files
-=================
-
-This text file will detail the queue files that are located in the sysfs tree
-for each block device. Note that stacked devices typically do not export
-any settings, since their queue merely functions are a remapping target.
-These files are the ones found in the /sys/block/xxx/queue/ directory.
-
-Files denoted with a RO postfix are readonly and the RW postfix means
-read-write.
-
-add_random (RW)
-----------------
-This file allows to turn off the disk entropy contribution. Default
-value of this file is '1'(on).
-
-chunk_sectors (RO)
-------------------
-This has different meaning depending on the type of the block device.
-For a RAID device (dm-raid), chunk_sectors indicates the size in 512B sectors
-of the RAID volume stripe segment. For a zoned block device, either host-aware
-or host-managed, chunk_sectors indicates the size in 512B sectors of the zones
-of the device, with the eventual exception of the last zone of the device which
-may be smaller.
-
-dax (RO)
---------
-This file indicates whether the device supports Direct Access (DAX),
-used by CPU-addressable storage to bypass the pagecache.  It shows '1'
-if true, '0' if not.
-
-discard_granularity (RO)
------------------------
-This shows the size of internal allocation of the device in bytes, if
-reported by the device. A value of '0' means device does not support
-the discard functionality.
-
-discard_max_hw_bytes (RO)
-----------------------
-Devices that support discard functionality may have internal limits on
-the number of bytes that can be trimmed or unmapped in a single operation.
-The discard_max_bytes parameter is set by the device driver to the maximum
-number of bytes that can be discarded in a single operation. Discard
-requests issued to the device must not exceed this limit. A discard_max_bytes
-value of 0 means that the device does not support discard functionality.
-
-discard_max_bytes (RW)
-----------------------
-While discard_max_hw_bytes is the hardware limit for the device, this
-setting is the software limit. Some devices exhibit large latencies when
-large discards are issued, setting this value lower will make Linux issue
-smaller discards and potentially help reduce latencies induced by large
-discard operations.
-
-discard_zeroes_data (RO)
-------------------------
-Obsolete. Always zero.
-
-fua (RO)
---------
-Whether or not the block driver supports the FUA flag for write requests.
-FUA stands for Force Unit Access. If the FUA flag is set that means that
-write requests must bypass the volatile cache of the storage device.
-
-hw_sector_size (RO)
--------------------
-This is the hardware sector size of the device, in bytes.
-
-io_poll (RW)
-------------
-When read, this file shows whether polling is enabled (1) or disabled
-(0).  Writing '0' to this file will disable polling for this device.
-Writing any non-zero value will enable this feature.
-
-io_poll_delay (RW)
-------------------
-If polling is enabled, this controls what kind of polling will be
-performed. It defaults to -1, which is classic polling. In this mode,
-the CPU will repeatedly ask for completions without giving up any time.
-If set to 0, a hybrid polling mode is used, where the kernel will attempt
-to make an educated guess at when the IO will complete. Based on this
-guess, the kernel will put the process issuing IO to sleep for an amount
-of time, before entering a classic poll loop. This mode might be a
-little slower than pure classic polling, but it will be more efficient.
-If set to a value larger than 0, the kernel will put the process issuing
-IO to sleep for this amount of microseconds before entering classic
-polling.
-
-io_timeout (RW)
----------------
-io_timeout is the request timeout in milliseconds. If a request does not
-complete in this time then the block driver timeout handler is invoked.
-That timeout handler can decide to retry the request, to fail it or to start
-a device recovery strategy.
-
-iostats (RW)
--------------
-This file is used to control (on/off) the iostats accounting of the
-disk.
-
-logical_block_size (RO)
------------------------
-This is the logical block size of the device, in bytes.
-
-max_discard_segments (RO)
--------------------------
-The maximum number of DMA scatter/gather entries in a discard request.
-
-max_hw_sectors_kb (RO)
-----------------------
-This is the maximum number of kilobytes supported in a single data transfer.
-
-max_integrity_segments (RO)
----------------------------
-Maximum number of elements in a DMA scatter/gather list with integrity
-data that will be submitted by the block layer core to the associated
-block driver.
-
-max_sectors_kb (RW)
--------------------
-This is the maximum number of kilobytes that the block layer will allow
-for a filesystem request. Must be smaller than or equal to the maximum
-size allowed by the hardware.
-
-max_segments (RO)
------------------
-Maximum number of elements in a DMA scatter/gather list that is submitted
-to the associated block driver.
-
-max_segment_size (RO)
----------------------
-Maximum size in bytes of a single element in a DMA scatter/gather list.
-
-minimum_io_size (RO)
---------------------
-This is the smallest preferred IO size reported by the device.
-
-nomerges (RW)
--------------
-This enables the user to disable the lookup logic involved with IO
-merging requests in the block layer. By default (0) all merges are
-enabled. When set to 1 only simple one-hit merges will be tried. When
-set to 2 no merge algorithms will be tried (including one-hit or more
-complex tree/hash lookups).
-
-nr_requests (RW)
-----------------
-This controls how many requests may be allocated in the block layer for
-read or write requests. Note that the total allocated number may be twice
-this amount, since it applies only to reads or writes (not the accumulated
-sum).
-
-To avoid priority inversion through request starvation, a request
-queue maintains a separate request pool per each cgroup when
-CONFIG_BLK_CGROUP is enabled, and this parameter applies to each such
-per-block-cgroup request pool.  IOW, if there are N block cgroups,
-each request queue may have up to N request pools, each independently
-regulated by nr_requests.
-
-nr_zones (RO)
--------------
-For zoned block devices (zoned attribute indicating "host-managed" or
-"host-aware"), this indicates the total number of zones of the device.
-This is always 0 for regular block devices.
-
-optimal_io_size (RO)
---------------------
-This is the optimal IO size reported by the device.
-
-physical_block_size (RO)
-------------------------
-This is the physical block size of device, in bytes.
-
-read_ahead_kb (RW)
-------------------
-Maximum number of kilobytes to read-ahead for filesystems on this block
-device.
-
-rotational (RW)
----------------
-This file is used to stat if the device is of rotational type or
-non-rotational type.
-
-rq_affinity (RW)
-----------------
-If this option is '1', the block layer will migrate request completions to the
-cpu "group" that originally submitted the request. For some workloads this
-provides a significant reduction in CPU cycles due to caching effects.
-
-For storage configurations that need to maximize distribution of completion
-processing setting this option to '2' forces the completion to run on the
-requesting cpu (bypassing the "group" aggregation logic).
-
-scheduler (RW)
---------------
-When read, this file will display the current and available IO schedulers
-for this block device. The currently active IO scheduler will be enclosed
-in [] brackets. Writing an IO scheduler name to this file will switch
-control of this block device to that new IO scheduler. Note that writing
-an IO scheduler name to this file will attempt to load that IO scheduler
-module, if it isn't already present in the system.
-
-write_cache (RW)
-----------------
-When read, this file will display whether the device has write back
-caching enabled or not. It will return "write back" for the former
-case, and "write through" for the latter. Writing to this file can
-change the kernels view of the device, but it doesn't alter the
-device state. This means that it might not be safe to toggle the
-setting from "write back" to "write through", since that will also
-eliminate cache flushes issued by the kernel.
-
-write_same_max_bytes (RO)
--------------------------
-This is the number of bytes the device can write in a single write-same
-command.  A value of '0' means write-same is not supported by this
-device.
-
-wbt_lat_usec (RW)
------------------
-If the device is registered for writeback throttling, then this file shows
-the target minimum read latency. If this latency is exceeded in a given
-window of time (see wb_window_usec), then the writeback throttling will start
-scaling back writes. Writing a value of '0' to this file disables the
-feature. Writing a value of '-1' to this file resets the value to the
-default setting.
-
-throttle_sample_time (RW)
--------------------------
-This is the time window that blk-throttle samples data, in millisecond.
-blk-throttle makes decision based on the samplings. Lower time means cgroups
-have more smooth throughput, but higher CPU overhead. This exists only when
-CONFIG_BLK_DEV_THROTTLING_LOW is enabled.
-
-write_zeroes_max_bytes (RO)
----------------------------
-For block drivers that support REQ_OP_WRITE_ZEROES, the maximum number of
-bytes that can be zeroed at once. The value 0 means that REQ_OP_WRITE_ZEROES
-is not supported.
-
-zoned (RO)
-----------
-This indicates if the device is a zoned block device and the zone model of the
-device if it is indeed zoned. The possible values indicated by zoned are
-"none" for regular block devices and "host-aware" or "host-managed" for zoned
-block devices. The characteristics of host-aware and host-managed zoned block
-devices are described in the ZBC (Zoned Block Commands) and ZAC
-(Zoned Device ATA Command Set) standards. These standards also define the
-"drive-managed" zone model. However, since drive-managed zoned block devices
-do not support zone commands, they will be treated as regular block devices
-and zoned will report "none".
-
-Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/Documentation/block/request.rst b/Documentation/block/request.rst
new file mode 100644
index 000000000000..747021e1ffdb
--- /dev/null
+++ b/Documentation/block/request.rst
@@ -0,0 +1,99 @@
+============================
+struct request documentation
+============================
+
+Jens Axboe <jens.axboe@oracle.com> 27/05/02
+
+
+.. FIXME:
+   No idea about what does mean - seems just some noise, so comment it
+
+   1.0
+   Index
+
+   2.0 Struct request members classification
+
+       2.1 struct request members explanation
+
+   3.0
+
+
+   2.0
+
+
+
+Short explanation of request members
+====================================
+
+Classification flags:
+
+	=	====================
+	D	driver member
+	B	block layer member
+	I	I/O scheduler member
+	=	====================
+
+Unless an entry contains a D classification, a device driver must not access
+this member. Some members may contain D classifications, but should only be
+access through certain macros or functions (eg ->flags).
+
+<linux/blkdev.h>
+
+=============================== ======= =======================================
+Member				Flag	Comment
+=============================== ======= =======================================
+struct list_head queuelist	BI	Organization on various internal
+					queues
+
+``void *elevator_private``	I	I/O scheduler private data
+
+unsigned char cmd[16]		D	Driver can use this for setting up
+					a cdb before execution, see
+					blk_queue_prep_rq
+
+unsigned long flags		DBI	Contains info about data direction,
+					request type, etc.
+
+int rq_status			D	Request status bits
+
+kdev_t rq_dev			DBI	Target device
+
+int errors			DB	Error counts
+
+sector_t sector			DBI	Target location
+
+unsigned long hard_nr_sectors	B	Used to keep sector sane
+
+unsigned long nr_sectors	DBI	Total number of sectors in request
+
+unsigned long hard_nr_sectors	B	Used to keep nr_sectors sane
+
+unsigned short nr_phys_segments	DB	Number of physical scatter gather
+					segments in a request
+
+unsigned short nr_hw_segments	DB	Number of hardware scatter gather
+					segments in a request
+
+unsigned int current_nr_sectors	DB	Number of sectors in first segment
+					of request
+
+unsigned int hard_cur_sectors	B	Used to keep current_nr_sectors sane
+
+int tag				DB	TCQ tag, if assigned
+
+``void *special``		D	Free to be used by driver
+
+``char *buffer``		D	Map of first segment, also see
+					section on bouncing SECTION
+
+``struct completion *waiting``	D	Can be used by driver to get signalled
+					on request completion
+
+``struct bio *bio``		DBI	First bio in request
+
+``struct bio *biotail``		DBI	Last bio in request
+
+``struct request_queue *q``	DB	Request queue this request belongs to
+
+``struct request_list *rl``	B	Request list this request came from
+=============================== ======= =======================================
diff --git a/Documentation/block/request.txt b/Documentation/block/request.txt
deleted file mode 100644
index 754e104ed369..000000000000
--- a/Documentation/block/request.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-
-struct request documentation
-
-Jens Axboe <jens.axboe@oracle.com> 27/05/02
-
-1.0
-Index
-
-2.0 Struct request members classification
-
-	2.1 struct request members explanation
-
-3.0
-
-
-2.0
-Short explanation of request members
-
-Classification flags:
-
-	D	driver member
-	B	block layer member
-	I	I/O scheduler member
-
-Unless an entry contains a D classification, a device driver must not access
-this member. Some members may contain D classifications, but should only be
-access through certain macros or functions (eg ->flags).
-
-<linux/blkdev.h>
-
-2.1
-Member				Flag	Comment
-------				----	-------
-
-struct list_head queuelist	BI	Organization on various internal
-					queues
-
-void *elevator_private		I	I/O scheduler private data
-
-unsigned char cmd[16]		D	Driver can use this for setting up
-					a cdb before execution, see
-					blk_queue_prep_rq
-
-unsigned long flags		DBI	Contains info about data direction,
-					request type, etc.
-
-int rq_status			D	Request status bits
-
-kdev_t rq_dev			DBI	Target device
-
-int errors			DB	Error counts
-
-sector_t sector			DBI	Target location
-
-unsigned long hard_nr_sectors	B	Used to keep sector sane
-
-unsigned long nr_sectors	DBI	Total number of sectors in request
-
-unsigned long hard_nr_sectors	B	Used to keep nr_sectors sane
-
-unsigned short nr_phys_segments	DB	Number of physical scatter gather
-					segments in a request
-
-unsigned short nr_hw_segments	DB	Number of hardware scatter gather
-					segments in a request
-
-unsigned int current_nr_sectors	DB	Number of sectors in first segment
-					of request
-
-unsigned int hard_cur_sectors	B	Used to keep current_nr_sectors sane
-
-int tag				DB	TCQ tag, if assigned
-
-void *special			D	Free to be used by driver
-
-char *buffer			D	Map of first segment, also see
-					section on bouncing SECTION
-
-struct completion *waiting	D	Can be used by driver to get signalled
-					on request completion
-
-struct bio *bio			DBI	First bio in request
-
-struct bio *biotail		DBI	Last bio in request
-
-struct request_queue *q		DB	Request queue this request belongs to
-
-struct request_list *rl		B	Request list this request came from
diff --git a/Documentation/block/stat.rst b/Documentation/block/stat.rst
new file mode 100644
index 000000000000..9c07bc22b0bc
--- /dev/null
+++ b/Documentation/block/stat.rst
@@ -0,0 +1,93 @@
+===============================================
+Block layer statistics in /sys/block/<dev>/stat
+===============================================
+
+This file documents the contents of the /sys/block/<dev>/stat file.
+
+The stat file provides several statistics about the state of block
+device <dev>.
+
+Q.
+   Why are there multiple statistics in a single file?  Doesn't sysfs
+   normally contain a single value per file?
+
+A.
+   By having a single file, the kernel can guarantee that the statistics
+   represent a consistent snapshot of the state of the device.  If the
+   statistics were exported as multiple files containing one statistic
+   each, it would be impossible to guarantee that a set of readings
+   represent a single point in time.
+
+The stat file consists of a single line of text containing 11 decimal
+values separated by whitespace.  The fields are summarized in the
+following table, and described in more detail below.
+
+
+=============== ============= =================================================
+Name            units         description
+=============== ============= =================================================
+read I/Os       requests      number of read I/Os processed
+read merges     requests      number of read I/Os merged with in-queue I/O
+read sectors    sectors       number of sectors read
+read ticks      milliseconds  total wait time for read requests
+write I/Os      requests      number of write I/Os processed
+write merges    requests      number of write I/Os merged with in-queue I/O
+write sectors   sectors       number of sectors written
+write ticks     milliseconds  total wait time for write requests
+in_flight       requests      number of I/Os currently in flight
+io_ticks        milliseconds  total time this block device has been active
+time_in_queue   milliseconds  total wait time for all requests
+discard I/Os    requests      number of discard I/Os processed
+discard merges  requests      number of discard I/Os merged with in-queue I/O
+discard sectors sectors       number of sectors discarded
+discard ticks   milliseconds  total wait time for discard requests
+=============== ============= =================================================
+
+read I/Os, write I/Os, discard I/0s
+===================================
+
+These values increment when an I/O request completes.
+
+read merges, write merges, discard merges
+=========================================
+
+These values increment when an I/O request is merged with an
+already-queued I/O request.
+
+read sectors, write sectors, discard_sectors
+============================================
+
+These values count the number of sectors read from, written to, or
+discarded from this block device.  The "sectors" in question are the
+standard UNIX 512-byte sectors, not any device- or filesystem-specific
+block size.  The counters are incremented when the I/O completes.
+
+read ticks, write ticks, discard ticks
+======================================
+
+These values count the number of milliseconds that I/O requests have
+waited on this block device.  If there are multiple I/O requests waiting,
+these values will increase at a rate greater than 1000/second; for
+example, if 60 read requests wait for an average of 30 ms, the read_ticks
+field will increase by 60*30 = 1800.
+
+in_flight
+=========
+
+This value counts the number of I/O requests that have been issued to
+the device driver but have not yet completed.  It does not include I/O
+requests that are in the queue but not yet issued to the device driver.
+
+io_ticks
+========
+
+This value counts the number of milliseconds during which the device has
+had I/O requests queued.
+
+time_in_queue
+=============
+
+This value counts the number of milliseconds that I/O requests have waited
+on this block device.  If there are multiple I/O requests waiting, this
+value will increase as the product of the number of milliseconds times the
+number of requests waiting (see "read ticks" above for an example).
diff --git a/Documentation/block/stat.txt b/Documentation/block/stat.txt
deleted file mode 100644
index 0aace9cc536c..000000000000
--- a/Documentation/block/stat.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-Block layer statistics in /sys/block/<dev>/stat
-===============================================
-
-This file documents the contents of the /sys/block/<dev>/stat file.
-
-The stat file provides several statistics about the state of block
-device <dev>.
-
-Q. Why are there multiple statistics in a single file?  Doesn't sysfs
-   normally contain a single value per file?
-A. By having a single file, the kernel can guarantee that the statistics
-   represent a consistent snapshot of the state of the device.  If the
-   statistics were exported as multiple files containing one statistic
-   each, it would be impossible to guarantee that a set of readings
-   represent a single point in time.
-
-The stat file consists of a single line of text containing 11 decimal
-values separated by whitespace.  The fields are summarized in the
-following table, and described in more detail below.
-
-Name            units         description
-----            -----         -----------
-read I/Os       requests      number of read I/Os processed
-read merges     requests      number of read I/Os merged with in-queue I/O
-read sectors    sectors       number of sectors read
-read ticks      milliseconds  total wait time for read requests
-write I/Os      requests      number of write I/Os processed
-write merges    requests      number of write I/Os merged with in-queue I/O
-write sectors   sectors       number of sectors written
-write ticks     milliseconds  total wait time for write requests
-in_flight       requests      number of I/Os currently in flight
-io_ticks        milliseconds  total time this block device has been active
-time_in_queue   milliseconds  total wait time for all requests
-discard I/Os    requests      number of discard I/Os processed
-discard merges  requests      number of discard I/Os merged with in-queue I/O
-discard sectors sectors       number of sectors discarded
-discard ticks   milliseconds  total wait time for discard requests
-
-read I/Os, write I/Os, discard I/0s
-===================================
-
-These values increment when an I/O request completes.
-
-read merges, write merges, discard merges
-=========================================
-
-These values increment when an I/O request is merged with an
-already-queued I/O request.
-
-read sectors, write sectors, discard_sectors
-============================================
-
-These values count the number of sectors read from, written to, or
-discarded from this block device.  The "sectors" in question are the
-standard UNIX 512-byte sectors, not any device- or filesystem-specific
-block size.  The counters are incremented when the I/O completes.
-
-read ticks, write ticks, discard ticks
-======================================
-
-These values count the number of milliseconds that I/O requests have
-waited on this block device.  If there are multiple I/O requests waiting,
-these values will increase at a rate greater than 1000/second; for
-example, if 60 read requests wait for an average of 30 ms, the read_ticks
-field will increase by 60*30 = 1800.
-
-in_flight
-=========
-
-This value counts the number of I/O requests that have been issued to
-the device driver but have not yet completed.  It does not include I/O
-requests that are in the queue but not yet issued to the device driver.
-
-io_ticks
-========
-
-This value counts the number of milliseconds during which the device has
-had I/O requests queued.
-
-time_in_queue
-=============
-
-This value counts the number of milliseconds that I/O requests have waited
-on this block device.  If there are multiple I/O requests waiting, this
-value will increase as the product of the number of milliseconds times the
-number of requests waiting (see "read ticks" above for an example).
diff --git a/Documentation/block/switching-sched.rst b/Documentation/block/switching-sched.rst
new file mode 100644
index 000000000000..42042417380e
--- /dev/null
+++ b/Documentation/block/switching-sched.rst
@@ -0,0 +1,39 @@
+===================
+Switching Scheduler
+===================
+
+To choose IO schedulers at boot time, use the argument 'elevator=deadline'.
+'noop' and 'cfq' (the default) are also available. IO schedulers are assigned
+globally at boot time only presently.
+
+Each io queue has a set of io scheduler tunables associated with it. These
+tunables control how the io scheduler works. You can find these entries
+in::
+
+	/sys/block/<device>/queue/iosched
+
+assuming that you have sysfs mounted on /sys. If you don't have sysfs mounted,
+you can do so by typing::
+
+	# mount none /sys -t sysfs
+
+It is possible to change the IO scheduler for a given block device on
+the fly to select one of mq-deadline, none, bfq, or kyber schedulers -
+which can improve that device's throughput.
+
+To set a specific scheduler, simply do this::
+
+	echo SCHEDNAME > /sys/block/DEV/queue/scheduler
+
+where SCHEDNAME is the name of a defined IO scheduler, and DEV is the
+device name (hda, hdb, sga, or whatever you happen to have).
+
+The list of defined schedulers can be found by simply doing
+a "cat /sys/block/DEV/queue/scheduler" - the list of valid names
+will be displayed, with the currently selected scheduler in brackets::
+
+  # cat /sys/block/sda/queue/scheduler
+  [mq-deadline] kyber bfq none
+  # echo none >/sys/block/sda/queue/scheduler
+  # cat /sys/block/sda/queue/scheduler
+  [none] mq-deadline kyber bfq
diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt
deleted file mode 100644
index 7977f6fb8b20..000000000000
--- a/Documentation/block/switching-sched.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-To choose IO schedulers at boot time, use the argument 'elevator=deadline'.
-'noop' and 'cfq' (the default) are also available. IO schedulers are assigned
-globally at boot time only presently.
-
-Each io queue has a set of io scheduler tunables associated with it. These
-tunables control how the io scheduler works. You can find these entries
-in:
-
-/sys/block/<device>/queue/iosched
-
-assuming that you have sysfs mounted on /sys. If you don't have sysfs mounted,
-you can do so by typing:
-
-# mount none /sys -t sysfs
-
-It is possible to change the IO scheduler for a given block device on
-the fly to select one of mq-deadline, none, bfq, or kyber schedulers -
-which can improve that device's throughput.
-
-To set a specific scheduler, simply do this:
-
-echo SCHEDNAME > /sys/block/DEV/queue/scheduler
-
-where SCHEDNAME is the name of a defined IO scheduler, and DEV is the
-device name (hda, hdb, sga, or whatever you happen to have).
-
-The list of defined schedulers can be found by simply doing
-a "cat /sys/block/DEV/queue/scheduler" - the list of valid names
-will be displayed, with the currently selected scheduler in brackets:
-
-# cat /sys/block/sda/queue/scheduler
-[mq-deadline] kyber bfq none
-# echo none >/sys/block/sda/queue/scheduler
-# cat /sys/block/sda/queue/scheduler
-[none] mq-deadline kyber bfq
diff --git a/Documentation/block/writeback_cache_control.rst b/Documentation/block/writeback_cache_control.rst
new file mode 100644
index 000000000000..2c752c57c14c
--- /dev/null
+++ b/Documentation/block/writeback_cache_control.rst
@@ -0,0 +1,86 @@
+==========================================
+Explicit volatile write back cache control
+==========================================
+
+Introduction
+------------
+
+Many storage devices, especially in the consumer market, come with volatile
+write back caches.  That means the devices signal I/O completion to the
+operating system before data actually has hit the non-volatile storage.  This
+behavior obviously speeds up various workloads, but it means the operating
+system needs to force data out to the non-volatile storage when it performs
+a data integrity operation like fsync, sync or an unmount.
+
+The Linux block layer provides two simple mechanisms that let filesystems
+control the caching behavior of the storage device.  These mechanisms are
+a forced cache flush, and the Force Unit Access (FUA) flag for requests.
+
+
+Explicit cache flushes
+----------------------
+
+The REQ_PREFLUSH flag can be OR ed into the r/w flags of a bio submitted from
+the filesystem and will make sure the volatile cache of the storage device
+has been flushed before the actual I/O operation is started.  This explicitly
+guarantees that previously completed write requests are on non-volatile
+storage before the flagged bio starts. In addition the REQ_PREFLUSH flag can be
+set on an otherwise empty bio structure, which causes only an explicit cache
+flush without any dependent I/O.  It is recommend to use
+the blkdev_issue_flush() helper for a pure cache flush.
+
+
+Forced Unit Access
+------------------
+
+The REQ_FUA flag can be OR ed into the r/w flags of a bio submitted from the
+filesystem and will make sure that I/O completion for this request is only
+signaled after the data has been committed to non-volatile storage.
+
+
+Implementation details for filesystems
+--------------------------------------
+
+Filesystems can simply set the REQ_PREFLUSH and REQ_FUA bits and do not have to
+worry if the underlying devices need any explicit cache flushing and how
+the Forced Unit Access is implemented.  The REQ_PREFLUSH and REQ_FUA flags
+may both be set on a single bio.
+
+
+Implementation details for make_request_fn based block drivers
+--------------------------------------------------------------
+
+These drivers will always see the REQ_PREFLUSH and REQ_FUA bits as they sit
+directly below the submit_bio interface.  For remapping drivers the REQ_FUA
+bits need to be propagated to underlying devices, and a global flush needs
+to be implemented for bios with the REQ_PREFLUSH bit set.  For real device
+drivers that do not have a volatile cache the REQ_PREFLUSH and REQ_FUA bits
+on non-empty bios can simply be ignored, and REQ_PREFLUSH requests without
+data can be completed successfully without doing any work.  Drivers for
+devices with volatile caches need to implement the support for these
+flags themselves without any help from the block layer.
+
+
+Implementation details for request_fn based block drivers
+---------------------------------------------------------
+
+For devices that do not support volatile write caches there is no driver
+support required, the block layer completes empty REQ_PREFLUSH requests before
+entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from
+requests that have a payload.  For devices with volatile write caches the
+driver needs to tell the block layer that it supports flushing caches by
+doing::
+
+	blk_queue_write_cache(sdkp->disk->queue, true, false);
+
+and handle empty REQ_OP_FLUSH requests in its prep_fn/request_fn.  Note that
+REQ_PREFLUSH requests with a payload are automatically turned into a sequence
+of an empty REQ_OP_FLUSH request followed by the actual write by the block
+layer.  For devices that also support the FUA bit the block layer needs
+to be told to pass through the REQ_FUA bit using::
+
+	blk_queue_write_cache(sdkp->disk->queue, true, true);
+
+and the driver must handle write requests that have the REQ_FUA bit set
+in prep_fn/request_fn.  If the FUA bit is not natively supported the block
+layer turns it into an empty REQ_OP_FLUSH request after the actual write.
diff --git a/Documentation/block/writeback_cache_control.txt b/Documentation/block/writeback_cache_control.txt
deleted file mode 100644
index 8a6bdada5f6b..000000000000
--- a/Documentation/block/writeback_cache_control.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-
-Explicit volatile write back cache control
-=====================================
-
-Introduction
-------------
-
-Many storage devices, especially in the consumer market, come with volatile
-write back caches.  That means the devices signal I/O completion to the
-operating system before data actually has hit the non-volatile storage.  This
-behavior obviously speeds up various workloads, but it means the operating
-system needs to force data out to the non-volatile storage when it performs
-a data integrity operation like fsync, sync or an unmount.
-
-The Linux block layer provides two simple mechanisms that let filesystems
-control the caching behavior of the storage device.  These mechanisms are
-a forced cache flush, and the Force Unit Access (FUA) flag for requests.
-
-
-Explicit cache flushes
-----------------------
-
-The REQ_PREFLUSH flag can be OR ed into the r/w flags of a bio submitted from
-the filesystem and will make sure the volatile cache of the storage device
-has been flushed before the actual I/O operation is started.  This explicitly
-guarantees that previously completed write requests are on non-volatile
-storage before the flagged bio starts. In addition the REQ_PREFLUSH flag can be
-set on an otherwise empty bio structure, which causes only an explicit cache
-flush without any dependent I/O.  It is recommend to use
-the blkdev_issue_flush() helper for a pure cache flush.
-
-
-Forced Unit Access
------------------
-
-The REQ_FUA flag can be OR ed into the r/w flags of a bio submitted from the
-filesystem and will make sure that I/O completion for this request is only
-signaled after the data has been committed to non-volatile storage.
-
-
-Implementation details for filesystems
---------------------------------------
-
-Filesystems can simply set the REQ_PREFLUSH and REQ_FUA bits and do not have to
-worry if the underlying devices need any explicit cache flushing and how
-the Forced Unit Access is implemented.  The REQ_PREFLUSH and REQ_FUA flags
-may both be set on a single bio.
-
-
-Implementation details for make_request_fn based block drivers
---------------------------------------------------------------
-
-These drivers will always see the REQ_PREFLUSH and REQ_FUA bits as they sit
-directly below the submit_bio interface.  For remapping drivers the REQ_FUA
-bits need to be propagated to underlying devices, and a global flush needs
-to be implemented for bios with the REQ_PREFLUSH bit set.  For real device
-drivers that do not have a volatile cache the REQ_PREFLUSH and REQ_FUA bits
-on non-empty bios can simply be ignored, and REQ_PREFLUSH requests without
-data can be completed successfully without doing any work.  Drivers for
-devices with volatile caches need to implement the support for these
-flags themselves without any help from the block layer.
-
-
-Implementation details for request_fn based block drivers
---------------------------------------------------------------
-
-For devices that do not support volatile write caches there is no driver
-support required, the block layer completes empty REQ_PREFLUSH requests before
-entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from
-requests that have a payload.  For devices with volatile write caches the
-driver needs to tell the block layer that it supports flushing caches by
-doing:
-
-	blk_queue_write_cache(sdkp->disk->queue, true, false);
-
-and handle empty REQ_OP_FLUSH requests in its prep_fn/request_fn.  Note that
-REQ_PREFLUSH requests with a payload are automatically turned into a sequence
-of an empty REQ_OP_FLUSH request followed by the actual write by the block
-layer.  For devices that also support the FUA bit the block layer needs
-to be told to pass through the REQ_FUA bit using:
-
-	blk_queue_write_cache(sdkp->disk->queue, true, true);
-
-and the driver must handle write requests that have the REQ_FUA bit set
-in prep_fn/request_fn.  If the FUA bit is not natively supported the block
-layer turns it into an empty REQ_OP_FLUSH request after the actual write.
diff --git a/Documentation/blockdev/zram.rst b/Documentation/blockdev/zram.rst
index 2111231c9c0f..6eccf13219ff 100644
--- a/Documentation/blockdev/zram.rst
+++ b/Documentation/blockdev/zram.rst
@@ -215,7 +215,7 @@ User space is advised to use the following files to read the device statistics.
 
 File /sys/block/zram<id>/stat
 
-Represents block layer statistics. Read Documentation/block/stat.txt for
+Represents block layer statistics. Read Documentation/block/stat.rst for
 details.
 
 File /sys/block/zram<id>/io_stat
-- 
cgit 


From d5ccd65ab6272f21f442695b0022a4f553d818e5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 19 Apr 2019 19:01:18 -0300
Subject: docs: move gcc_plugins.txt to core-api and rename to .rst

The gcc_plugins.txt file is already a ReST file. Move it
to the core-api book while renaming it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
---
 Documentation/core-api/gcc-plugins.rst | 93 ++++++++++++++++++++++++++++++++++
 Documentation/core-api/index.rst       |  1 +
 Documentation/gcc-plugins.txt          | 93 ----------------------------------
 3 files changed, 94 insertions(+), 93 deletions(-)
 create mode 100644 Documentation/core-api/gcc-plugins.rst
 delete mode 100644 Documentation/gcc-plugins.txt

(limited to 'Documentation')

diff --git a/Documentation/core-api/gcc-plugins.rst b/Documentation/core-api/gcc-plugins.rst
new file mode 100644
index 000000000000..8502f24396fb
--- /dev/null
+++ b/Documentation/core-api/gcc-plugins.rst
@@ -0,0 +1,93 @@
+=========================
+GCC plugin infrastructure
+=========================
+
+
+Introduction
+============
+
+GCC plugins are loadable modules that provide extra features to the
+compiler [1]_. They are useful for runtime instrumentation and static analysis.
+We can analyse, change and add further code during compilation via
+callbacks [2]_, GIMPLE [3]_, IPA [4]_ and RTL passes [5]_.
+
+The GCC plugin infrastructure of the kernel supports all gcc versions from
+4.5 to 6.0, building out-of-tree modules, cross-compilation and building in a
+separate directory.
+Plugin source files have to be compilable by both a C and a C++ compiler as well
+because gcc versions 4.5 and 4.6 are compiled by a C compiler,
+gcc-4.7 can be compiled by a C or a C++ compiler,
+and versions 4.8+ can only be compiled by a C++ compiler.
+
+Currently the GCC plugin infrastructure supports only the x86, arm, arm64 and
+powerpc architectures.
+
+This infrastructure was ported from grsecurity [6]_ and PaX [7]_.
+
+--
+
+.. [1] https://gcc.gnu.org/onlinedocs/gccint/Plugins.html
+.. [2] https://gcc.gnu.org/onlinedocs/gccint/Plugin-API.html#Plugin-API
+.. [3] https://gcc.gnu.org/onlinedocs/gccint/GIMPLE.html
+.. [4] https://gcc.gnu.org/onlinedocs/gccint/IPA.html
+.. [5] https://gcc.gnu.org/onlinedocs/gccint/RTL.html
+.. [6] https://grsecurity.net/
+.. [7] https://pax.grsecurity.net/
+
+
+Files
+=====
+
+**$(src)/scripts/gcc-plugins**
+
+	This is the directory of the GCC plugins.
+
+**$(src)/scripts/gcc-plugins/gcc-common.h**
+
+	This is a compatibility header for GCC plugins.
+	It should be always included instead of individual gcc headers.
+
+**$(src)/scripts/gcc-plugin.sh**
+
+	This script checks the availability of the included headers in
+	gcc-common.h and chooses the proper host compiler to build the plugins
+	(gcc-4.7 can be built by either gcc or g++).
+
+**$(src)/scripts/gcc-plugins/gcc-generate-gimple-pass.h,
+$(src)/scripts/gcc-plugins/gcc-generate-ipa-pass.h,
+$(src)/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h,
+$(src)/scripts/gcc-plugins/gcc-generate-rtl-pass.h**
+
+	These headers automatically generate the registration structures for
+	GIMPLE, SIMPLE_IPA, IPA and RTL passes. They support all gcc versions
+	from 4.5 to 6.0.
+	They should be preferred to creating the structures by hand.
+
+
+Usage
+=====
+
+You must install the gcc plugin headers for your gcc version,
+e.g., on Ubuntu for gcc-4.9::
+
+	apt-get install gcc-4.9-plugin-dev
+
+Enable a GCC plugin based feature in the kernel config::
+
+	CONFIG_GCC_PLUGIN_CYC_COMPLEXITY = y
+
+To compile only the plugin(s)::
+
+	make gcc-plugins
+
+or just run the kernel make and compile the whole kernel with
+the cyclomatic complexity GCC plugin.
+
+
+4. How to add a new GCC plugin
+==============================
+
+The GCC plugins are in $(src)/scripts/gcc-plugins/. You can use a file or a directory
+here. It must be added to $(src)/scripts/gcc-plugins/Makefile,
+$(src)/scripts/Makefile.gcc-plugins and $(src)/arch/Kconfig.
+See the cyc_complexity_plugin.c (CONFIG_GCC_PLUGIN_CYC_COMPLEXITY) GCC plugin.
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 322ac954b390..da0ed972d224 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -36,6 +36,7 @@ Core utilities
    memory-hotplug
    protection-keys
    ../RCU/index
+   gcc-plugins
 
 
 Interfaces for kernel debugging
diff --git a/Documentation/gcc-plugins.txt b/Documentation/gcc-plugins.txt
deleted file mode 100644
index 8502f24396fb..000000000000
--- a/Documentation/gcc-plugins.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-=========================
-GCC plugin infrastructure
-=========================
-
-
-Introduction
-============
-
-GCC plugins are loadable modules that provide extra features to the
-compiler [1]_. They are useful for runtime instrumentation and static analysis.
-We can analyse, change and add further code during compilation via
-callbacks [2]_, GIMPLE [3]_, IPA [4]_ and RTL passes [5]_.
-
-The GCC plugin infrastructure of the kernel supports all gcc versions from
-4.5 to 6.0, building out-of-tree modules, cross-compilation and building in a
-separate directory.
-Plugin source files have to be compilable by both a C and a C++ compiler as well
-because gcc versions 4.5 and 4.6 are compiled by a C compiler,
-gcc-4.7 can be compiled by a C or a C++ compiler,
-and versions 4.8+ can only be compiled by a C++ compiler.
-
-Currently the GCC plugin infrastructure supports only the x86, arm, arm64 and
-powerpc architectures.
-
-This infrastructure was ported from grsecurity [6]_ and PaX [7]_.
-
---
-
-.. [1] https://gcc.gnu.org/onlinedocs/gccint/Plugins.html
-.. [2] https://gcc.gnu.org/onlinedocs/gccint/Plugin-API.html#Plugin-API
-.. [3] https://gcc.gnu.org/onlinedocs/gccint/GIMPLE.html
-.. [4] https://gcc.gnu.org/onlinedocs/gccint/IPA.html
-.. [5] https://gcc.gnu.org/onlinedocs/gccint/RTL.html
-.. [6] https://grsecurity.net/
-.. [7] https://pax.grsecurity.net/
-
-
-Files
-=====
-
-**$(src)/scripts/gcc-plugins**
-
-	This is the directory of the GCC plugins.
-
-**$(src)/scripts/gcc-plugins/gcc-common.h**
-
-	This is a compatibility header for GCC plugins.
-	It should be always included instead of individual gcc headers.
-
-**$(src)/scripts/gcc-plugin.sh**
-
-	This script checks the availability of the included headers in
-	gcc-common.h and chooses the proper host compiler to build the plugins
-	(gcc-4.7 can be built by either gcc or g++).
-
-**$(src)/scripts/gcc-plugins/gcc-generate-gimple-pass.h,
-$(src)/scripts/gcc-plugins/gcc-generate-ipa-pass.h,
-$(src)/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h,
-$(src)/scripts/gcc-plugins/gcc-generate-rtl-pass.h**
-
-	These headers automatically generate the registration structures for
-	GIMPLE, SIMPLE_IPA, IPA and RTL passes. They support all gcc versions
-	from 4.5 to 6.0.
-	They should be preferred to creating the structures by hand.
-
-
-Usage
-=====
-
-You must install the gcc plugin headers for your gcc version,
-e.g., on Ubuntu for gcc-4.9::
-
-	apt-get install gcc-4.9-plugin-dev
-
-Enable a GCC plugin based feature in the kernel config::
-
-	CONFIG_GCC_PLUGIN_CYC_COMPLEXITY = y
-
-To compile only the plugin(s)::
-
-	make gcc-plugins
-
-or just run the kernel make and compile the whole kernel with
-the cyclomatic complexity GCC plugin.
-
-
-4. How to add a new GCC plugin
-==============================
-
-The GCC plugins are in $(src)/scripts/gcc-plugins/. You can use a file or a directory
-here. It must be added to $(src)/scripts/gcc-plugins/Makefile,
-$(src)/scripts/Makefile.gcc-plugins and $(src)/arch/Kconfig.
-See the cyc_complexity_plugin.c (CONFIG_GCC_PLUGIN_CYC_COMPLEXITY) GCC plugin.
-- 
cgit 


From 74684f8ff44e8b9cf85542762ec347b96bd92559 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 26 Jun 2019 12:24:01 -0300
Subject: docs: logo.txt: rename it to COPYING-logo

This file has nothing to do with the Kernel documentation. It
contains the copyright permissions for Tux at Documentation/logo.gif.

So, rename it accordingly.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/COPYING-logo | 13 +++++++++++++
 Documentation/logo.txt     | 13 -------------
 2 files changed, 13 insertions(+), 13 deletions(-)
 create mode 100644 Documentation/COPYING-logo
 delete mode 100644 Documentation/logo.txt

(limited to 'Documentation')

diff --git a/Documentation/COPYING-logo b/Documentation/COPYING-logo
new file mode 100644
index 000000000000..296f0f7f67eb
--- /dev/null
+++ b/Documentation/COPYING-logo
@@ -0,0 +1,13 @@
+This is the full-colour version of the currently unofficial Linux logo
+("currently unofficial" just means that there has been no paperwork and
+that I have not really announced it yet).  It was created by Larry Ewing,
+and is freely usable as long as you acknowledge Larry as the original
+artist. 
+
+Note that there are black-and-white versions of this available that
+scale down to smaller sizes and are better for letterheads or whatever
+you want to use it for: for the full range of logos take a look at
+Larry's web-page:
+
+	http://www.isc.tamu.edu/~lewing/linux/
+
diff --git a/Documentation/logo.txt b/Documentation/logo.txt
deleted file mode 100644
index 296f0f7f67eb..000000000000
--- a/Documentation/logo.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-This is the full-colour version of the currently unofficial Linux logo
-("currently unofficial" just means that there has been no paperwork and
-that I have not really announced it yet).  It was created by Larry Ewing,
-and is freely usable as long as you acknowledge Larry as the original
-artist. 
-
-Note that there are black-and-white versions of this available that
-scale down to smaller sizes and are better for letterheads or whatever
-you want to use it for: for the full range of logos take a look at
-Larry's web-page:
-
-	http://www.isc.tamu.edu/~lewing/linux/
-
-- 
cgit 


From d2bdd48a652bd0f7a5c78f3e418b4529fc469e1f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 16:03:23 -0300
Subject: docs: rapidio: add it to the driver API

This is actually a subsystem description, with contains both
kAPI and uAPI.

While it should ideally be slplit, let's place it at driver-api,
as most things are related to kAPI and driver-specific info.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/admin-guide/index.rst             |   1 +
 Documentation/admin-guide/rapidio.rst           | 107 +++++++
 Documentation/driver-api/index.rst              |   2 +-
 Documentation/driver-api/rapidio.rst            | 107 -------
 Documentation/driver-api/rapidio/index.rst      |  13 +
 Documentation/driver-api/rapidio/mport_cdev.rst | 110 +++++++
 Documentation/driver-api/rapidio/rapidio.rst    | 362 ++++++++++++++++++++++++
 Documentation/driver-api/rapidio/rio_cm.rst     | 135 +++++++++
 Documentation/driver-api/rapidio/sysfs.rst      |   7 +
 Documentation/driver-api/rapidio/tsi721.rst     | 112 ++++++++
 Documentation/rapidio/index.rst                 |  15 -
 Documentation/rapidio/mport_cdev.rst            | 110 -------
 Documentation/rapidio/rapidio.rst               | 362 ------------------------
 Documentation/rapidio/rio_cm.rst                | 135 ---------
 Documentation/rapidio/sysfs.rst                 |   7 -
 Documentation/rapidio/tsi721.rst                | 112 --------
 16 files changed, 848 insertions(+), 849 deletions(-)
 create mode 100644 Documentation/admin-guide/rapidio.rst
 delete mode 100644 Documentation/driver-api/rapidio.rst
 create mode 100644 Documentation/driver-api/rapidio/index.rst
 create mode 100644 Documentation/driver-api/rapidio/mport_cdev.rst
 create mode 100644 Documentation/driver-api/rapidio/rapidio.rst
 create mode 100644 Documentation/driver-api/rapidio/rio_cm.rst
 create mode 100644 Documentation/driver-api/rapidio/sysfs.rst
 create mode 100644 Documentation/driver-api/rapidio/tsi721.rst
 delete mode 100644 Documentation/rapidio/index.rst
 delete mode 100644 Documentation/rapidio/mport_cdev.rst
 delete mode 100644 Documentation/rapidio/rapidio.rst
 delete mode 100644 Documentation/rapidio/rio_cm.rst
 delete mode 100644 Documentation/rapidio/sysfs.rst
 delete mode 100644 Documentation/rapidio/tsi721.rst

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 24fbe0568eff..8853c95ef0d4 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -61,6 +61,7 @@ configure specific aspects of kernel behavior to your liking.
    parport
    md
    module-signing
+   rapidio
    sysrq
    unicode
    vga-softcursor
diff --git a/Documentation/admin-guide/rapidio.rst b/Documentation/admin-guide/rapidio.rst
new file mode 100644
index 000000000000..71ff658ab78e
--- /dev/null
+++ b/Documentation/admin-guide/rapidio.rst
@@ -0,0 +1,107 @@
+=======================
+RapidIO Subsystem Guide
+=======================
+
+:Author: Matt Porter
+
+Introduction
+============
+
+RapidIO is a high speed switched fabric interconnect with features aimed
+at the embedded market. RapidIO provides support for memory-mapped I/O
+as well as message-based transactions over the switched fabric network.
+RapidIO has a standardized discovery mechanism not unlike the PCI bus
+standard that allows simple detection of devices in a network.
+
+This documentation is provided for developers intending to support
+RapidIO on new architectures, write new drivers, or to understand the
+subsystem internals.
+
+Known Bugs and Limitations
+==========================
+
+Bugs
+----
+
+None. ;)
+
+Limitations
+-----------
+
+1. Access/management of RapidIO memory regions is not supported
+
+2. Multiple host enumeration is not supported
+
+RapidIO driver interface
+========================
+
+Drivers are provided a set of calls in order to interface with the
+subsystem to gather info on devices, request/map memory region
+resources, and manage mailboxes/doorbells.
+
+Functions
+---------
+
+.. kernel-doc:: include/linux/rio_drv.h
+   :internal:
+
+.. kernel-doc:: drivers/rapidio/rio-driver.c
+   :export:
+
+.. kernel-doc:: drivers/rapidio/rio.c
+   :export:
+
+Internals
+=========
+
+This chapter contains the autogenerated documentation of the RapidIO
+subsystem.
+
+Structures
+----------
+
+.. kernel-doc:: include/linux/rio.h
+   :internal:
+
+Enumeration and Discovery
+-------------------------
+
+.. kernel-doc:: drivers/rapidio/rio-scan.c
+   :internal:
+
+Driver functionality
+--------------------
+
+.. kernel-doc:: drivers/rapidio/rio.c
+   :internal:
+
+.. kernel-doc:: drivers/rapidio/rio-access.c
+   :internal:
+
+Device model support
+--------------------
+
+.. kernel-doc:: drivers/rapidio/rio-driver.c
+   :internal:
+
+PPC32 support
+-------------
+
+.. kernel-doc:: arch/powerpc/sysdev/fsl_rio.c
+   :internal:
+
+Credits
+=======
+
+The following people have contributed to the RapidIO subsystem directly
+or indirectly:
+
+1. Matt Porter\ mporter@kernel.crashing.org
+
+2. Randy Vinson\ rvinson@mvista.com
+
+3. Dan Malek\ dan@embeddedalley.com
+
+The following people have contributed to this document:
+
+1. Matt Porter\ mporter@kernel.crashing.org
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 6cd750a03ea0..d665cd9ab95f 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -45,7 +45,7 @@ available subsections can be seen below.
    miscellaneous
    mei/index
    w1
-   rapidio
+   rapidio/index
    s390-drivers
    vme
    80211/index
diff --git a/Documentation/driver-api/rapidio.rst b/Documentation/driver-api/rapidio.rst
deleted file mode 100644
index 71ff658ab78e..000000000000
--- a/Documentation/driver-api/rapidio.rst
+++ /dev/null
@@ -1,107 +0,0 @@
-=======================
-RapidIO Subsystem Guide
-=======================
-
-:Author: Matt Porter
-
-Introduction
-============
-
-RapidIO is a high speed switched fabric interconnect with features aimed
-at the embedded market. RapidIO provides support for memory-mapped I/O
-as well as message-based transactions over the switched fabric network.
-RapidIO has a standardized discovery mechanism not unlike the PCI bus
-standard that allows simple detection of devices in a network.
-
-This documentation is provided for developers intending to support
-RapidIO on new architectures, write new drivers, or to understand the
-subsystem internals.
-
-Known Bugs and Limitations
-==========================
-
-Bugs
-----
-
-None. ;)
-
-Limitations
------------
-
-1. Access/management of RapidIO memory regions is not supported
-
-2. Multiple host enumeration is not supported
-
-RapidIO driver interface
-========================
-
-Drivers are provided a set of calls in order to interface with the
-subsystem to gather info on devices, request/map memory region
-resources, and manage mailboxes/doorbells.
-
-Functions
----------
-
-.. kernel-doc:: include/linux/rio_drv.h
-   :internal:
-
-.. kernel-doc:: drivers/rapidio/rio-driver.c
-   :export:
-
-.. kernel-doc:: drivers/rapidio/rio.c
-   :export:
-
-Internals
-=========
-
-This chapter contains the autogenerated documentation of the RapidIO
-subsystem.
-
-Structures
-----------
-
-.. kernel-doc:: include/linux/rio.h
-   :internal:
-
-Enumeration and Discovery
--------------------------
-
-.. kernel-doc:: drivers/rapidio/rio-scan.c
-   :internal:
-
-Driver functionality
---------------------
-
-.. kernel-doc:: drivers/rapidio/rio.c
-   :internal:
-
-.. kernel-doc:: drivers/rapidio/rio-access.c
-   :internal:
-
-Device model support
---------------------
-
-.. kernel-doc:: drivers/rapidio/rio-driver.c
-   :internal:
-
-PPC32 support
--------------
-
-.. kernel-doc:: arch/powerpc/sysdev/fsl_rio.c
-   :internal:
-
-Credits
-=======
-
-The following people have contributed to the RapidIO subsystem directly
-or indirectly:
-
-1. Matt Porter\ mporter@kernel.crashing.org
-
-2. Randy Vinson\ rvinson@mvista.com
-
-3. Dan Malek\ dan@embeddedalley.com
-
-The following people have contributed to this document:
-
-1. Matt Porter\ mporter@kernel.crashing.org
diff --git a/Documentation/driver-api/rapidio/index.rst b/Documentation/driver-api/rapidio/index.rst
new file mode 100644
index 000000000000..4c5e51a05134
--- /dev/null
+++ b/Documentation/driver-api/rapidio/index.rst
@@ -0,0 +1,13 @@
+===========================
+The Linux RapidIO Subsystem
+===========================
+
+.. toctree::
+   :maxdepth: 1
+
+   rapidio
+   sysfs
+
+   tsi721
+   mport_cdev
+   rio_cm
diff --git a/Documentation/driver-api/rapidio/mport_cdev.rst b/Documentation/driver-api/rapidio/mport_cdev.rst
new file mode 100644
index 000000000000..df77a7f7be7d
--- /dev/null
+++ b/Documentation/driver-api/rapidio/mport_cdev.rst
@@ -0,0 +1,110 @@
+==================================================================
+RapidIO subsystem mport character device driver (rio_mport_cdev.c)
+==================================================================
+
+1. Overview
+===========
+
+This device driver is the result of collaboration within the RapidIO.org
+Software Task Group (STG) between Texas Instruments, Freescale,
+Prodrive Technologies, Nokia Networks, BAE and IDT.  Additional input was
+received from other members of RapidIO.org. The objective was to create a
+character mode driver interface which exposes the capabilities of RapidIO
+devices directly to applications, in a manner that allows the numerous and
+varied RapidIO implementations to interoperate.
+
+This driver (MPORT_CDEV) provides access to basic RapidIO subsystem operations
+for user-space applications. Most of RapidIO operations are supported through
+'ioctl' system calls.
+
+When loaded this device driver creates filesystem nodes named rio_mportX in /dev
+directory for each registered RapidIO mport device. 'X' in the node name matches
+to unique port ID assigned to each local mport device.
+
+Using available set of ioctl commands user-space applications can perform
+following RapidIO bus and subsystem operations:
+
+- Reads and writes from/to configuration registers of mport devices
+  (RIO_MPORT_MAINT_READ_LOCAL/RIO_MPORT_MAINT_WRITE_LOCAL)
+- Reads and writes from/to configuration registers of remote RapidIO devices.
+  This operations are defined as RapidIO Maintenance reads/writes in RIO spec.
+  (RIO_MPORT_MAINT_READ_REMOTE/RIO_MPORT_MAINT_WRITE_REMOTE)
+- Set RapidIO Destination ID for mport devices (RIO_MPORT_MAINT_HDID_SET)
+- Set RapidIO Component Tag for mport devices (RIO_MPORT_MAINT_COMPTAG_SET)
+- Query logical index of mport devices (RIO_MPORT_MAINT_PORT_IDX_GET)
+- Query capabilities and RapidIO link configuration of mport devices
+  (RIO_MPORT_GET_PROPERTIES)
+- Enable/Disable reporting of RapidIO doorbell events to user-space applications
+  (RIO_ENABLE_DOORBELL_RANGE/RIO_DISABLE_DOORBELL_RANGE)
+- Enable/Disable reporting of RIO port-write events to user-space applications
+  (RIO_ENABLE_PORTWRITE_RANGE/RIO_DISABLE_PORTWRITE_RANGE)
+- Query/Control type of events reported through this driver: doorbells,
+  port-writes or both (RIO_SET_EVENT_MASK/RIO_GET_EVENT_MASK)
+- Configure/Map mport's outbound requests window(s) for specific size,
+  RapidIO destination ID, hopcount and request type
+  (RIO_MAP_OUTBOUND/RIO_UNMAP_OUTBOUND)
+- Configure/Map mport's inbound requests window(s) for specific size,
+  RapidIO base address and local memory base address
+  (RIO_MAP_INBOUND/RIO_UNMAP_INBOUND)
+- Allocate/Free contiguous DMA coherent memory buffer for DMA data transfers
+  to/from remote RapidIO devices (RIO_ALLOC_DMA/RIO_FREE_DMA)
+- Initiate DMA data transfers to/from remote RapidIO devices (RIO_TRANSFER).
+  Supports blocking, asynchronous and posted (a.k.a 'fire-and-forget') data
+  transfer modes.
+- Check/Wait for completion of asynchronous DMA data transfer
+  (RIO_WAIT_FOR_ASYNC)
+- Manage device objects supported by RapidIO subsystem (RIO_DEV_ADD/RIO_DEV_DEL).
+  This allows implementation of various RapidIO fabric enumeration algorithms
+  as user-space applications while using remaining functionality provided by
+  kernel RapidIO subsystem.
+
+2. Hardware Compatibility
+=========================
+
+This device driver uses standard interfaces defined by kernel RapidIO subsystem
+and therefore it can be used with any mport device driver registered by RapidIO
+subsystem with limitations set by available mport implementation.
+
+At this moment the most common limitation is availability of RapidIO-specific
+DMA engine framework for specific mport device. Users should verify available
+functionality of their platform when planning to use this driver:
+
+- IDT Tsi721 PCIe-to-RapidIO bridge device and its mport device driver are fully
+  compatible with this driver.
+- Freescale SoCs 'fsl_rio' mport driver does not have implementation for RapidIO
+  specific DMA engine support and therefore DMA data transfers mport_cdev driver
+  are not available.
+
+3. Module parameters
+====================
+
+- 'dma_timeout'
+      - DMA transfer completion timeout (in msec, default value 3000).
+        This parameter set a maximum completion wait time for SYNC mode DMA
+        transfer requests and for RIO_WAIT_FOR_ASYNC ioctl requests.
+
+- 'dbg_level'
+      - This parameter allows to control amount of debug information
+        generated by this device driver. This parameter is formed by set of
+        bit masks that correspond to the specific functional blocks.
+        For mask definitions see 'drivers/rapidio/devices/rio_mport_cdev.c'
+        This parameter can be changed dynamically.
+        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
+
+4. Known problems
+=================
+
+  None.
+
+5. User-space Applications and API
+==================================
+
+API library and applications that use this device driver are available from
+RapidIO.org.
+
+6. TODO List
+============
+
+- Add support for sending/receiving "raw" RapidIO messaging packets.
+- Add memory mapped DMA data transfers as an option when RapidIO-specific DMA
+  is not available.
diff --git a/Documentation/driver-api/rapidio/rapidio.rst b/Documentation/driver-api/rapidio/rapidio.rst
new file mode 100644
index 000000000000..fb8942d3ba85
--- /dev/null
+++ b/Documentation/driver-api/rapidio/rapidio.rst
@@ -0,0 +1,362 @@
+============
+Introduction
+============
+
+The RapidIO standard is a packet-based fabric interconnect standard designed for
+use in embedded systems. Development of the RapidIO standard is directed by the
+RapidIO Trade Association (RTA). The current version of the RapidIO specification
+is publicly available for download from the RTA web-site [1].
+
+This document describes the basics of the Linux RapidIO subsystem and provides
+information on its major components.
+
+1 Overview
+==========
+
+Because the RapidIO subsystem follows the Linux device model it is integrated
+into the kernel similarly to other buses by defining RapidIO-specific device and
+bus types and registering them within the device model.
+
+The Linux RapidIO subsystem is architecture independent and therefore defines
+architecture-specific interfaces that provide support for common RapidIO
+subsystem operations.
+
+2. Core Components
+==================
+
+A typical RapidIO network is a combination of endpoints and switches.
+Each of these components is represented in the subsystem by an associated data
+structure. The core logical components of the RapidIO subsystem are defined
+in include/linux/rio.h file.
+
+2.1 Master Port
+---------------
+
+A master port (or mport) is a RapidIO interface controller that is local to the
+processor executing the Linux code. A master port generates and receives RapidIO
+packets (transactions). In the RapidIO subsystem each master port is represented
+by a rio_mport data structure. This structure contains master port specific
+resources such as mailboxes and doorbells. The rio_mport also includes a unique
+host device ID that is valid when a master port is configured as an enumerating
+host.
+
+RapidIO master ports are serviced by subsystem specific mport device drivers
+that provide functionality defined for this subsystem. To provide a hardware
+independent interface for RapidIO subsystem operations, rio_mport structure
+includes rio_ops data structure which contains pointers to hardware specific
+implementations of RapidIO functions.
+
+2.2 Device
+----------
+
+A RapidIO device is any endpoint (other than mport) or switch in the network.
+All devices are presented in the RapidIO subsystem by corresponding rio_dev data
+structure. Devices form one global device list and per-network device lists
+(depending on number of available mports and networks).
+
+2.3 Switch
+----------
+
+A RapidIO switch is a special class of device that routes packets between its
+ports towards their final destination. The packet destination port within a
+switch is defined by an internal routing table. A switch is presented in the
+RapidIO subsystem by rio_dev data structure expanded by additional rio_switch
+data structure, which contains switch specific information such as copy of the
+routing table and pointers to switch specific functions.
+
+The RapidIO subsystem defines the format and initialization method for subsystem
+specific switch drivers that are designed to provide hardware-specific
+implementation of common switch management routines.
+
+2.4 Network
+-----------
+
+A RapidIO network is a combination of interconnected endpoint and switch devices.
+Each RapidIO network known to the system is represented by corresponding rio_net
+data structure. This structure includes lists of all devices and local master
+ports that form the same network. It also contains a pointer to the default
+master port that is used to communicate with devices within the network.
+
+2.5 Device Drivers
+------------------
+
+RapidIO device-specific drivers follow Linux Kernel Driver Model and are
+intended to support specific RapidIO devices attached to the RapidIO network.
+
+2.6 Subsystem Interfaces
+------------------------
+
+RapidIO interconnect specification defines features that may be used to provide
+one or more common service layers for all participating RapidIO devices. These
+common services may act separately from device-specific drivers or be used by
+device-specific drivers. Example of such service provider is the RIONET driver
+which implements Ethernet-over-RapidIO interface. Because only one driver can be
+registered for a device, all common RapidIO services have to be registered as
+subsystem interfaces. This allows to have multiple common services attached to
+the same device without blocking attachment of a device-specific driver.
+
+3. Subsystem Initialization
+===========================
+
+In order to initialize the RapidIO subsystem, a platform must initialize and
+register at least one master port within the RapidIO network. To register mport
+within the subsystem controller driver's initialization code calls function
+rio_register_mport() for each available master port.
+
+After all active master ports are registered with a RapidIO subsystem,
+an enumeration and/or discovery routine may be called automatically or
+by user-space command.
+
+RapidIO subsystem can be configured to be built as a statically linked or
+modular component of the kernel (see details below).
+
+4. Enumeration and Discovery
+============================
+
+4.1 Overview
+------------
+
+RapidIO subsystem configuration options allow users to build enumeration and
+discovery methods as statically linked components or loadable modules.
+An enumeration/discovery method implementation and available input parameters
+define how any given method can be attached to available RapidIO mports:
+simply to all available mports OR individually to the specified mport device.
+
+Depending on selected enumeration/discovery build configuration, there are
+several methods to initiate an enumeration and/or discovery process:
+
+  (a) Statically linked enumeration and discovery process can be started
+  automatically during kernel initialization time using corresponding module
+  parameters. This was the original method used since introduction of RapidIO
+  subsystem. Now this method relies on enumerator module parameter which is
+  'rio-scan.scan' for existing basic enumeration/discovery method.
+  When automatic start of enumeration/discovery is used a user has to ensure
+  that all discovering endpoints are started before the enumerating endpoint
+  and are waiting for enumeration to be completed.
+  Configuration option CONFIG_RAPIDIO_DISC_TIMEOUT defines time that discovering
+  endpoint waits for enumeration to be completed. If the specified timeout
+  expires the discovery process is terminated without obtaining RapidIO network
+  information. NOTE: a timed out discovery process may be restarted later using
+  a user-space command as it is described below (if the given endpoint was
+  enumerated successfully).
+
+  (b) Statically linked enumeration and discovery process can be started by
+  a command from user space. This initiation method provides more flexibility
+  for a system startup compared to the option (a) above. After all participating
+  endpoints have been successfully booted, an enumeration process shall be
+  started first by issuing a user-space command, after an enumeration is
+  completed a discovery process can be started on all remaining endpoints.
+
+  (c) Modular enumeration and discovery process can be started by a command from
+  user space. After an enumeration/discovery module is loaded, a network scan
+  process can be started by issuing a user-space command.
+  Similar to the option (b) above, an enumerator has to be started first.
+
+  (d) Modular enumeration and discovery process can be started by a module
+  initialization routine. In this case an enumerating module shall be loaded
+  first.
+
+When a network scan process is started it calls an enumeration or discovery
+routine depending on the configured role of a master port: host or agent.
+
+Enumeration is performed by a master port if it is configured as a host port by
+assigning a host destination ID greater than or equal to zero. The host
+destination ID can be assigned to a master port using various methods depending
+on RapidIO subsystem build configuration:
+
+  (a) For a statically linked RapidIO subsystem core use command line parameter
+  "rapidio.hdid=" with a list of destination ID assignments in order of mport
+  device registration. For example, in a system with two RapidIO controllers
+  the command line parameter "rapidio.hdid=-1,7" will result in assignment of
+  the host destination ID=7 to the second RapidIO controller, while the first
+  one will be assigned destination ID=-1.
+
+  (b) If the RapidIO subsystem core is built as a loadable module, in addition
+  to the method shown above, the host destination ID(s) can be specified using
+  traditional methods of passing module parameter "hdid=" during its loading:
+
+  - from command line: "modprobe rapidio hdid=-1,7", or
+  - from modprobe configuration file using configuration command "options",
+    like in this example: "options rapidio hdid=-1,7". An example of modprobe
+    configuration file is provided in the section below.
+
+NOTES:
+  (i) if "hdid=" parameter is omitted all available mport will be assigned
+  destination ID = -1;
+
+  (ii) the "hdid=" parameter in systems with multiple mports can have
+  destination ID assignments omitted from the end of list (default = -1).
+
+If the host device ID for a specific master port is set to -1, the discovery
+process will be performed for it.
+
+The enumeration and discovery routines use RapidIO maintenance transactions
+to access the configuration space of devices.
+
+NOTE: If RapidIO switch-specific device drivers are built as loadable modules
+they must be loaded before enumeration/discovery process starts.
+This requirement is cased by the fact that enumeration/discovery methods invoke
+vendor-specific callbacks on early stages.
+
+4.2 Automatic Start of Enumeration and Discovery
+------------------------------------------------
+
+Automatic enumeration/discovery start method is applicable only to built-in
+enumeration/discovery RapidIO configuration selection. To enable automatic
+enumeration/discovery start by existing basic enumerator method set use boot
+command line parameter "rio-scan.scan=1".
+
+This configuration requires synchronized start of all RapidIO endpoints that
+form a network which will be enumerated/discovered. Discovering endpoints have
+to be started before an enumeration starts to ensure that all RapidIO
+controllers have been initialized and are ready to be discovered. Configuration
+parameter CONFIG_RAPIDIO_DISC_TIMEOUT defines time (in seconds) which
+a discovering endpoint will wait for enumeration to be completed.
+
+When automatic enumeration/discovery start is selected, basic method's
+initialization routine calls rio_init_mports() to perform enumeration or
+discovery for all known mport devices.
+
+Depending on RapidIO network size and configuration this automatic
+enumeration/discovery start method may be difficult to use due to the
+requirement for synchronized start of all endpoints.
+
+4.3 User-space Start of Enumeration and Discovery
+-------------------------------------------------
+
+User-space start of enumeration and discovery can be used with built-in and
+modular build configurations. For user-space controlled start RapidIO subsystem
+creates the sysfs write-only attribute file '/sys/bus/rapidio/scan'. To initiate
+an enumeration or discovery process on specific mport device, a user needs to
+write mport_ID (not RapidIO destination ID) into that file. The mport_ID is a
+sequential number (0 ... RIO_MAX_MPORTS) assigned during mport device
+registration. For example for machine with single RapidIO controller, mport_ID
+for that controller always will be 0.
+
+To initiate RapidIO enumeration/discovery on all available mports a user may
+write '-1' (or RIO_MPORT_ANY) into the scan attribute file.
+
+4.4 Basic Enumeration Method
+----------------------------
+
+This is an original enumeration/discovery method which is available since
+first release of RapidIO subsystem code. The enumeration process is
+implemented according to the enumeration algorithm outlined in the RapidIO
+Interconnect Specification: Annex I [1].
+
+This method can be configured as statically linked or loadable module.
+The method's single parameter "scan" allows to trigger the enumeration/discovery
+process from module initialization routine.
+
+This enumeration/discovery method can be started only once and does not support
+unloading if it is built as a module.
+
+The enumeration process traverses the network using a recursive depth-first
+algorithm. When a new device is found, the enumerator takes ownership of that
+device by writing into the Host Device ID Lock CSR. It does this to ensure that
+the enumerator has exclusive right to enumerate the device. If device ownership
+is successfully acquired, the enumerator allocates a new rio_dev structure and
+initializes it according to device capabilities.
+
+If the device is an endpoint, a unique device ID is assigned to it and its value
+is written into the device's Base Device ID CSR.
+
+If the device is a switch, the enumerator allocates an additional rio_switch
+structure to store switch specific information. Then the switch's vendor ID and
+device ID are queried against a table of known RapidIO switches. Each switch
+table entry contains a pointer to a switch-specific initialization routine that
+initializes pointers to the rest of switch specific operations, and performs
+hardware initialization if necessary. A RapidIO switch does not have a unique
+device ID; it relies on hopcount and routing for device ID of an attached
+endpoint if access to its configuration registers is required. If a switch (or
+chain of switches) does not have any endpoint (except enumerator) attached to
+it, a fake device ID will be assigned to configure a route to that switch.
+In the case of a chain of switches without endpoint, one fake device ID is used
+to configure a route through the entire chain and switches are differentiated by
+their hopcount value.
+
+For both endpoints and switches the enumerator writes a unique component tag
+into device's Component Tag CSR. That unique value is used by the error
+management notification mechanism to identify a device that is reporting an
+error management event.
+
+Enumeration beyond a switch is completed by iterating over each active egress
+port of that switch. For each active link, a route to a default device ID
+(0xFF for 8-bit systems and 0xFFFF for 16-bit systems) is temporarily written
+into the routing table. The algorithm recurs by calling itself with hopcount + 1
+and the default device ID in order to access the device on the active port.
+
+After the host has completed enumeration of the entire network it releases
+devices by clearing device ID locks (calls rio_clear_locks()). For each endpoint
+in the system, it sets the Discovered bit in the Port General Control CSR
+to indicate that enumeration is completed and agents are allowed to execute
+passive discovery of the network.
+
+The discovery process is performed by agents and is similar to the enumeration
+process that is described above. However, the discovery process is performed
+without changes to the existing routing because agents only gather information
+about RapidIO network structure and are building an internal map of discovered
+devices. This way each Linux-based component of the RapidIO subsystem has
+a complete view of the network. The discovery process can be performed
+simultaneously by several agents. After initializing its RapidIO master port
+each agent waits for enumeration completion by the host for the configured wait
+time period. If this wait time period expires before enumeration is completed,
+an agent skips RapidIO discovery and continues with remaining kernel
+initialization.
+
+4.5 Adding New Enumeration/Discovery Method
+-------------------------------------------
+
+RapidIO subsystem code organization allows addition of new enumeration/discovery
+methods as new configuration options without significant impact to the core
+RapidIO code.
+
+A new enumeration/discovery method has to be attached to one or more mport
+devices before an enumeration/discovery process can be started. Normally,
+method's module initialization routine calls rio_register_scan() to attach
+an enumerator to a specified mport device (or devices). The basic enumerator
+implementation demonstrates this process.
+
+4.6 Using Loadable RapidIO Switch Drivers
+-----------------------------------------
+
+In the case when RapidIO switch drivers are built as loadable modules a user
+must ensure that they are loaded before the enumeration/discovery starts.
+This process can be automated by specifying pre- or post- dependencies in the
+RapidIO-specific modprobe configuration file as shown in the example below.
+
+File /etc/modprobe.d/rapidio.conf::
+
+  # Configure RapidIO subsystem modules
+
+  # Set enumerator host destination ID (overrides kernel command line option)
+  options rapidio hdid=-1,2
+
+  # Load RapidIO switch drivers immediately after rapidio core module was loaded
+  softdep rapidio post: idt_gen2 idtcps tsi57x
+
+  # OR :
+
+  # Load RapidIO switch drivers just before rio-scan enumerator module is loaded
+  softdep rio-scan pre: idt_gen2 idtcps tsi57x
+
+  --------------------------
+
+NOTE:
+  In the example above, one of "softdep" commands must be removed or
+  commented out to keep required module loading sequence.
+
+5. References
+=============
+
+[1] RapidIO Trade Association. RapidIO Interconnect Specifications.
+    http://www.rapidio.org.
+
+[2] Rapidio TA. Technology Comparisons.
+    http://www.rapidio.org/education/technology_comparisons/
+
+[3] RapidIO support for Linux.
+    http://lwn.net/Articles/139118/
+
+[4] Matt Porter. RapidIO for Linux. Ottawa Linux Symposium, 2005
+    http://www.kernel.org/doc/ols/2005/ols2005v2-pages-43-56.pdf
diff --git a/Documentation/driver-api/rapidio/rio_cm.rst b/Documentation/driver-api/rapidio/rio_cm.rst
new file mode 100644
index 000000000000..5294430a7a74
--- /dev/null
+++ b/Documentation/driver-api/rapidio/rio_cm.rst
@@ -0,0 +1,135 @@
+==========================================================================
+RapidIO subsystem Channelized Messaging character device driver (rio_cm.c)
+==========================================================================
+
+
+1. Overview
+===========
+
+This device driver is the result of collaboration within the RapidIO.org
+Software Task Group (STG) between Texas Instruments, Prodrive Technologies,
+Nokia Networks, BAE and IDT.  Additional input was received from other members
+of RapidIO.org.
+
+The objective was to create a character mode driver interface which exposes
+messaging capabilities of RapidIO endpoint devices (mports) directly
+to applications, in a manner that allows the numerous and varied RapidIO
+implementations to interoperate.
+
+This driver (RIO_CM) provides to user-space applications shared access to
+RapidIO mailbox messaging resources.
+
+RapidIO specification (Part 2) defines that endpoint devices may have up to four
+messaging mailboxes in case of multi-packet message (up to 4KB) and
+up to 64 mailboxes if single-packet messages (up to 256 B) are used. In addition
+to protocol definition limitations, a particular hardware implementation can
+have reduced number of messaging mailboxes.  RapidIO aware applications must
+therefore share the messaging resources of a RapidIO endpoint.
+
+Main purpose of this device driver is to provide RapidIO mailbox messaging
+capability to large number of user-space processes by introducing socket-like
+operations using a single messaging mailbox.  This allows applications to
+use the limited RapidIO messaging hardware resources efficiently.
+
+Most of device driver's operations are supported through 'ioctl' system calls.
+
+When loaded this device driver creates a single file system node named rio_cm
+in /dev directory common for all registered RapidIO mport devices.
+
+Following ioctl commands are available to user-space applications:
+
+- RIO_CM_MPORT_GET_LIST:
+    Returns to caller list of local mport devices that
+    support messaging operations (number of entries up to RIO_MAX_MPORTS).
+    Each list entry is combination of mport's index in the system and RapidIO
+    destination ID assigned to the port.
+- RIO_CM_EP_GET_LIST_SIZE:
+    Returns number of messaging capable remote endpoints
+    in a RapidIO network associated with the specified mport device.
+- RIO_CM_EP_GET_LIST:
+    Returns list of RapidIO destination IDs for messaging
+    capable remote endpoints (peers) available in a RapidIO network associated
+    with the specified mport device.
+- RIO_CM_CHAN_CREATE:
+    Creates RapidIO message exchange channel data structure
+    with channel ID assigned automatically or as requested by a caller.
+- RIO_CM_CHAN_BIND:
+    Binds the specified channel data structure to the specified
+    mport device.
+- RIO_CM_CHAN_LISTEN:
+    Enables listening for connection requests on the specified
+    channel.
+- RIO_CM_CHAN_ACCEPT:
+    Accepts a connection request from peer on the specified
+    channel. If wait timeout for this request is specified by a caller it is
+    a blocking call. If timeout set to 0 this is non-blocking call - ioctl
+    handler checks for a pending connection request and if one is not available
+    exits with -EGAIN error status immediately.
+- RIO_CM_CHAN_CONNECT:
+    Sends a connection request to a remote peer/channel.
+- RIO_CM_CHAN_SEND:
+    Sends a data message through the specified channel.
+    The handler for this request assumes that message buffer specified by
+    a caller includes the reserved space for a packet header required by
+    this driver.
+- RIO_CM_CHAN_RECEIVE:
+    Receives a data message through a connected channel.
+    If the channel does not have an incoming message ready to return this ioctl
+    handler will wait for new message until timeout specified by a caller
+    expires. If timeout value is set to 0, ioctl handler uses a default value
+    defined by MAX_SCHEDULE_TIMEOUT.
+- RIO_CM_CHAN_CLOSE:
+    Closes a specified channel and frees associated buffers.
+    If the specified channel is in the CONNECTED state, sends close notification
+    to the remote peer.
+
+The ioctl command codes and corresponding data structures intended for use by
+user-space applications are defined in 'include/uapi/linux/rio_cm_cdev.h'.
+
+2. Hardware Compatibility
+=========================
+
+This device driver uses standard interfaces defined by kernel RapidIO subsystem
+and therefore it can be used with any mport device driver registered by RapidIO
+subsystem with limitations set by available mport HW implementation of messaging
+mailboxes.
+
+3. Module parameters
+====================
+
+- 'dbg_level'
+      - This parameter allows to control amount of debug information
+        generated by this device driver. This parameter is formed by set of
+        bit masks that correspond to the specific functional block.
+        For mask definitions see 'drivers/rapidio/devices/rio_cm.c'
+        This parameter can be changed dynamically.
+        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
+
+- 'cmbox'
+      - Number of RapidIO mailbox to use (default value is 1).
+        This parameter allows to set messaging mailbox number that will be used
+        within entire RapidIO network. It can be used when default mailbox is
+        used by other device drivers or is not supported by some nodes in the
+        RapidIO network.
+
+- 'chstart'
+      - Start channel number for dynamic assignment. Default value - 256.
+        Allows to exclude channel numbers below this parameter from dynamic
+        allocation to avoid conflicts with software components that use
+        reserved predefined channel numbers.
+
+4. Known problems
+=================
+
+  None.
+
+5. User-space Applications and API Library
+==========================================
+
+Messaging API library and applications that use this device driver are available
+from RapidIO.org.
+
+6. TODO List
+============
+
+- Add support for system notification messages (reserved channel 0).
diff --git a/Documentation/driver-api/rapidio/sysfs.rst b/Documentation/driver-api/rapidio/sysfs.rst
new file mode 100644
index 000000000000..540f72683496
--- /dev/null
+++ b/Documentation/driver-api/rapidio/sysfs.rst
@@ -0,0 +1,7 @@
+=============
+Sysfs entries
+=============
+
+The RapidIO sysfs files have moved to:
+Documentation/ABI/testing/sysfs-bus-rapidio and
+Documentation/ABI/testing/sysfs-class-rapidio
diff --git a/Documentation/driver-api/rapidio/tsi721.rst b/Documentation/driver-api/rapidio/tsi721.rst
new file mode 100644
index 000000000000..42aea438cd20
--- /dev/null
+++ b/Documentation/driver-api/rapidio/tsi721.rst
@@ -0,0 +1,112 @@
+=========================================================================
+RapidIO subsystem mport driver for IDT Tsi721 PCI Express-to-SRIO bridge.
+=========================================================================
+
+1. Overview
+===========
+
+This driver implements all currently defined RapidIO mport callback functions.
+It supports maintenance read and write operations, inbound and outbound RapidIO
+doorbells, inbound maintenance port-writes and RapidIO messaging.
+
+To generate SRIO maintenance transactions this driver uses one of Tsi721 DMA
+channels. This mechanism provides access to larger range of hop counts and
+destination IDs without need for changes in outbound window translation.
+
+RapidIO messaging support uses dedicated messaging channels for each mailbox.
+For inbound messages this driver uses destination ID matching to forward messages
+into the corresponding message queue. Messaging callbacks are implemented to be
+fully compatible with RIONET driver (Ethernet over RapidIO messaging services).
+
+1. Module parameters:
+
+- 'dbg_level'
+      - This parameter allows to control amount of debug information
+        generated by this device driver. This parameter is formed by set of
+        This parameter can be changed bit masks that correspond to the specific
+        functional block.
+        For mask definitions see 'drivers/rapidio/devices/tsi721.h'
+        This parameter can be changed dynamically.
+        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
+
+- 'dma_desc_per_channel'
+      - This parameter defines number of hardware buffer
+        descriptors allocated for each registered Tsi721 DMA channel.
+        Its default value is 128.
+
+- 'dma_txqueue_sz'
+      - DMA transactions queue size. Defines number of pending
+        transaction requests that can be accepted by each DMA channel.
+        Default value is 16.
+
+- 'dma_sel'
+      - DMA channel selection mask. Bitmask that defines which hardware
+        DMA channels (0 ... 6) will be registered with DmaEngine core.
+        If bit is set to 1, the corresponding DMA channel will be registered.
+        DMA channels not selected by this mask will not be used by this device
+        driver. Default value is 0x7f (use all channels).
+
+- 'pcie_mrrs'
+      - override value for PCIe Maximum Read Request Size (MRRS).
+        This parameter gives an ability to override MRRS value set during PCIe
+        configuration process. Tsi721 supports read request sizes up to 4096B.
+        Value for this parameter must be set as defined by PCIe specification:
+        0 = 128B, 1 = 256B, 2 = 512B, 3 = 1024B, 4 = 2048B and 5 = 4096B.
+        Default value is '-1' (= keep platform setting).
+
+- 'mbox_sel'
+      - RIO messaging MBOX selection mask. This is a bitmask that defines
+        messaging MBOXes are managed by this device driver. Mask bits 0 - 3
+        correspond to MBOX0 - MBOX3. MBOX is under driver's control if the
+        corresponding bit is set to '1'. Default value is 0x0f (= all).
+
+2. Known problems
+=================
+
+  None.
+
+3. DMA Engine Support
+=====================
+
+Tsi721 mport driver supports DMA data transfers between local system memory and
+remote RapidIO devices. This functionality is implemented according to SLAVE
+mode API defined by common Linux kernel DMA Engine framework.
+
+Depending on system requirements RapidIO DMA operations can be included/excluded
+by setting CONFIG_RAPIDIO_DMA_ENGINE option. Tsi721 miniport driver uses seven
+out of eight available BDMA channels to support DMA data transfers.
+One BDMA channel is reserved for generation of maintenance read/write requests.
+
+If Tsi721 mport driver have been built with RAPIDIO_DMA_ENGINE support included,
+this driver will accept DMA-specific module parameter:
+
+  "dma_desc_per_channel"
+			 - defines number of hardware buffer descriptors used by
+                           each BDMA channel of Tsi721 (by default - 128).
+
+4. Version History
+
+  =====   ====================================================================
+  1.1.0   DMA operations re-worked to support data scatter/gather lists larger
+          than hardware buffer descriptors ring.
+  1.0.0   Initial driver release.
+  =====   ====================================================================
+
+5.  License
+===========
+
+  Copyright(c) 2011 Integrated Device Technology, Inc. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by the Free
+  Software Foundation; either version 2 of the License, or (at your option)
+  any later version.
+
+  This program is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
diff --git a/Documentation/rapidio/index.rst b/Documentation/rapidio/index.rst
deleted file mode 100644
index ab7b5541b346..000000000000
--- a/Documentation/rapidio/index.rst
+++ /dev/null
@@ -1,15 +0,0 @@
-:orphan:
-
-===========================
-The Linux RapidIO Subsystem
-===========================
-
-.. toctree::
-   :maxdepth: 1
-
-   rapidio
-   sysfs
-
-   tsi721
-   mport_cdev
-   rio_cm
diff --git a/Documentation/rapidio/mport_cdev.rst b/Documentation/rapidio/mport_cdev.rst
deleted file mode 100644
index df77a7f7be7d..000000000000
--- a/Documentation/rapidio/mport_cdev.rst
+++ /dev/null
@@ -1,110 +0,0 @@
-==================================================================
-RapidIO subsystem mport character device driver (rio_mport_cdev.c)
-==================================================================
-
-1. Overview
-===========
-
-This device driver is the result of collaboration within the RapidIO.org
-Software Task Group (STG) between Texas Instruments, Freescale,
-Prodrive Technologies, Nokia Networks, BAE and IDT.  Additional input was
-received from other members of RapidIO.org. The objective was to create a
-character mode driver interface which exposes the capabilities of RapidIO
-devices directly to applications, in a manner that allows the numerous and
-varied RapidIO implementations to interoperate.
-
-This driver (MPORT_CDEV) provides access to basic RapidIO subsystem operations
-for user-space applications. Most of RapidIO operations are supported through
-'ioctl' system calls.
-
-When loaded this device driver creates filesystem nodes named rio_mportX in /dev
-directory for each registered RapidIO mport device. 'X' in the node name matches
-to unique port ID assigned to each local mport device.
-
-Using available set of ioctl commands user-space applications can perform
-following RapidIO bus and subsystem operations:
-
-- Reads and writes from/to configuration registers of mport devices
-  (RIO_MPORT_MAINT_READ_LOCAL/RIO_MPORT_MAINT_WRITE_LOCAL)
-- Reads and writes from/to configuration registers of remote RapidIO devices.
-  This operations are defined as RapidIO Maintenance reads/writes in RIO spec.
-  (RIO_MPORT_MAINT_READ_REMOTE/RIO_MPORT_MAINT_WRITE_REMOTE)
-- Set RapidIO Destination ID for mport devices (RIO_MPORT_MAINT_HDID_SET)
-- Set RapidIO Component Tag for mport devices (RIO_MPORT_MAINT_COMPTAG_SET)
-- Query logical index of mport devices (RIO_MPORT_MAINT_PORT_IDX_GET)
-- Query capabilities and RapidIO link configuration of mport devices
-  (RIO_MPORT_GET_PROPERTIES)
-- Enable/Disable reporting of RapidIO doorbell events to user-space applications
-  (RIO_ENABLE_DOORBELL_RANGE/RIO_DISABLE_DOORBELL_RANGE)
-- Enable/Disable reporting of RIO port-write events to user-space applications
-  (RIO_ENABLE_PORTWRITE_RANGE/RIO_DISABLE_PORTWRITE_RANGE)
-- Query/Control type of events reported through this driver: doorbells,
-  port-writes or both (RIO_SET_EVENT_MASK/RIO_GET_EVENT_MASK)
-- Configure/Map mport's outbound requests window(s) for specific size,
-  RapidIO destination ID, hopcount and request type
-  (RIO_MAP_OUTBOUND/RIO_UNMAP_OUTBOUND)
-- Configure/Map mport's inbound requests window(s) for specific size,
-  RapidIO base address and local memory base address
-  (RIO_MAP_INBOUND/RIO_UNMAP_INBOUND)
-- Allocate/Free contiguous DMA coherent memory buffer for DMA data transfers
-  to/from remote RapidIO devices (RIO_ALLOC_DMA/RIO_FREE_DMA)
-- Initiate DMA data transfers to/from remote RapidIO devices (RIO_TRANSFER).
-  Supports blocking, asynchronous and posted (a.k.a 'fire-and-forget') data
-  transfer modes.
-- Check/Wait for completion of asynchronous DMA data transfer
-  (RIO_WAIT_FOR_ASYNC)
-- Manage device objects supported by RapidIO subsystem (RIO_DEV_ADD/RIO_DEV_DEL).
-  This allows implementation of various RapidIO fabric enumeration algorithms
-  as user-space applications while using remaining functionality provided by
-  kernel RapidIO subsystem.
-
-2. Hardware Compatibility
-=========================
-
-This device driver uses standard interfaces defined by kernel RapidIO subsystem
-and therefore it can be used with any mport device driver registered by RapidIO
-subsystem with limitations set by available mport implementation.
-
-At this moment the most common limitation is availability of RapidIO-specific
-DMA engine framework for specific mport device. Users should verify available
-functionality of their platform when planning to use this driver:
-
-- IDT Tsi721 PCIe-to-RapidIO bridge device and its mport device driver are fully
-  compatible with this driver.
-- Freescale SoCs 'fsl_rio' mport driver does not have implementation for RapidIO
-  specific DMA engine support and therefore DMA data transfers mport_cdev driver
-  are not available.
-
-3. Module parameters
-====================
-
-- 'dma_timeout'
-      - DMA transfer completion timeout (in msec, default value 3000).
-        This parameter set a maximum completion wait time for SYNC mode DMA
-        transfer requests and for RIO_WAIT_FOR_ASYNC ioctl requests.
-
-- 'dbg_level'
-      - This parameter allows to control amount of debug information
-        generated by this device driver. This parameter is formed by set of
-        bit masks that correspond to the specific functional blocks.
-        For mask definitions see 'drivers/rapidio/devices/rio_mport_cdev.c'
-        This parameter can be changed dynamically.
-        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
-
-4. Known problems
-=================
-
-  None.
-
-5. User-space Applications and API
-==================================
-
-API library and applications that use this device driver are available from
-RapidIO.org.
-
-6. TODO List
-============
-
-- Add support for sending/receiving "raw" RapidIO messaging packets.
-- Add memory mapped DMA data transfers as an option when RapidIO-specific DMA
-  is not available.
diff --git a/Documentation/rapidio/rapidio.rst b/Documentation/rapidio/rapidio.rst
deleted file mode 100644
index fb8942d3ba85..000000000000
--- a/Documentation/rapidio/rapidio.rst
+++ /dev/null
@@ -1,362 +0,0 @@
-============
-Introduction
-============
-
-The RapidIO standard is a packet-based fabric interconnect standard designed for
-use in embedded systems. Development of the RapidIO standard is directed by the
-RapidIO Trade Association (RTA). The current version of the RapidIO specification
-is publicly available for download from the RTA web-site [1].
-
-This document describes the basics of the Linux RapidIO subsystem and provides
-information on its major components.
-
-1 Overview
-==========
-
-Because the RapidIO subsystem follows the Linux device model it is integrated
-into the kernel similarly to other buses by defining RapidIO-specific device and
-bus types and registering them within the device model.
-
-The Linux RapidIO subsystem is architecture independent and therefore defines
-architecture-specific interfaces that provide support for common RapidIO
-subsystem operations.
-
-2. Core Components
-==================
-
-A typical RapidIO network is a combination of endpoints and switches.
-Each of these components is represented in the subsystem by an associated data
-structure. The core logical components of the RapidIO subsystem are defined
-in include/linux/rio.h file.
-
-2.1 Master Port
----------------
-
-A master port (or mport) is a RapidIO interface controller that is local to the
-processor executing the Linux code. A master port generates and receives RapidIO
-packets (transactions). In the RapidIO subsystem each master port is represented
-by a rio_mport data structure. This structure contains master port specific
-resources such as mailboxes and doorbells. The rio_mport also includes a unique
-host device ID that is valid when a master port is configured as an enumerating
-host.
-
-RapidIO master ports are serviced by subsystem specific mport device drivers
-that provide functionality defined for this subsystem. To provide a hardware
-independent interface for RapidIO subsystem operations, rio_mport structure
-includes rio_ops data structure which contains pointers to hardware specific
-implementations of RapidIO functions.
-
-2.2 Device
-----------
-
-A RapidIO device is any endpoint (other than mport) or switch in the network.
-All devices are presented in the RapidIO subsystem by corresponding rio_dev data
-structure. Devices form one global device list and per-network device lists
-(depending on number of available mports and networks).
-
-2.3 Switch
-----------
-
-A RapidIO switch is a special class of device that routes packets between its
-ports towards their final destination. The packet destination port within a
-switch is defined by an internal routing table. A switch is presented in the
-RapidIO subsystem by rio_dev data structure expanded by additional rio_switch
-data structure, which contains switch specific information such as copy of the
-routing table and pointers to switch specific functions.
-
-The RapidIO subsystem defines the format and initialization method for subsystem
-specific switch drivers that are designed to provide hardware-specific
-implementation of common switch management routines.
-
-2.4 Network
------------
-
-A RapidIO network is a combination of interconnected endpoint and switch devices.
-Each RapidIO network known to the system is represented by corresponding rio_net
-data structure. This structure includes lists of all devices and local master
-ports that form the same network. It also contains a pointer to the default
-master port that is used to communicate with devices within the network.
-
-2.5 Device Drivers
-------------------
-
-RapidIO device-specific drivers follow Linux Kernel Driver Model and are
-intended to support specific RapidIO devices attached to the RapidIO network.
-
-2.6 Subsystem Interfaces
-------------------------
-
-RapidIO interconnect specification defines features that may be used to provide
-one or more common service layers for all participating RapidIO devices. These
-common services may act separately from device-specific drivers or be used by
-device-specific drivers. Example of such service provider is the RIONET driver
-which implements Ethernet-over-RapidIO interface. Because only one driver can be
-registered for a device, all common RapidIO services have to be registered as
-subsystem interfaces. This allows to have multiple common services attached to
-the same device without blocking attachment of a device-specific driver.
-
-3. Subsystem Initialization
-===========================
-
-In order to initialize the RapidIO subsystem, a platform must initialize and
-register at least one master port within the RapidIO network. To register mport
-within the subsystem controller driver's initialization code calls function
-rio_register_mport() for each available master port.
-
-After all active master ports are registered with a RapidIO subsystem,
-an enumeration and/or discovery routine may be called automatically or
-by user-space command.
-
-RapidIO subsystem can be configured to be built as a statically linked or
-modular component of the kernel (see details below).
-
-4. Enumeration and Discovery
-============================
-
-4.1 Overview
-------------
-
-RapidIO subsystem configuration options allow users to build enumeration and
-discovery methods as statically linked components or loadable modules.
-An enumeration/discovery method implementation and available input parameters
-define how any given method can be attached to available RapidIO mports:
-simply to all available mports OR individually to the specified mport device.
-
-Depending on selected enumeration/discovery build configuration, there are
-several methods to initiate an enumeration and/or discovery process:
-
-  (a) Statically linked enumeration and discovery process can be started
-  automatically during kernel initialization time using corresponding module
-  parameters. This was the original method used since introduction of RapidIO
-  subsystem. Now this method relies on enumerator module parameter which is
-  'rio-scan.scan' for existing basic enumeration/discovery method.
-  When automatic start of enumeration/discovery is used a user has to ensure
-  that all discovering endpoints are started before the enumerating endpoint
-  and are waiting for enumeration to be completed.
-  Configuration option CONFIG_RAPIDIO_DISC_TIMEOUT defines time that discovering
-  endpoint waits for enumeration to be completed. If the specified timeout
-  expires the discovery process is terminated without obtaining RapidIO network
-  information. NOTE: a timed out discovery process may be restarted later using
-  a user-space command as it is described below (if the given endpoint was
-  enumerated successfully).
-
-  (b) Statically linked enumeration and discovery process can be started by
-  a command from user space. This initiation method provides more flexibility
-  for a system startup compared to the option (a) above. After all participating
-  endpoints have been successfully booted, an enumeration process shall be
-  started first by issuing a user-space command, after an enumeration is
-  completed a discovery process can be started on all remaining endpoints.
-
-  (c) Modular enumeration and discovery process can be started by a command from
-  user space. After an enumeration/discovery module is loaded, a network scan
-  process can be started by issuing a user-space command.
-  Similar to the option (b) above, an enumerator has to be started first.
-
-  (d) Modular enumeration and discovery process can be started by a module
-  initialization routine. In this case an enumerating module shall be loaded
-  first.
-
-When a network scan process is started it calls an enumeration or discovery
-routine depending on the configured role of a master port: host or agent.
-
-Enumeration is performed by a master port if it is configured as a host port by
-assigning a host destination ID greater than or equal to zero. The host
-destination ID can be assigned to a master port using various methods depending
-on RapidIO subsystem build configuration:
-
-  (a) For a statically linked RapidIO subsystem core use command line parameter
-  "rapidio.hdid=" with a list of destination ID assignments in order of mport
-  device registration. For example, in a system with two RapidIO controllers
-  the command line parameter "rapidio.hdid=-1,7" will result in assignment of
-  the host destination ID=7 to the second RapidIO controller, while the first
-  one will be assigned destination ID=-1.
-
-  (b) If the RapidIO subsystem core is built as a loadable module, in addition
-  to the method shown above, the host destination ID(s) can be specified using
-  traditional methods of passing module parameter "hdid=" during its loading:
-
-  - from command line: "modprobe rapidio hdid=-1,7", or
-  - from modprobe configuration file using configuration command "options",
-    like in this example: "options rapidio hdid=-1,7". An example of modprobe
-    configuration file is provided in the section below.
-
-NOTES:
-  (i) if "hdid=" parameter is omitted all available mport will be assigned
-  destination ID = -1;
-
-  (ii) the "hdid=" parameter in systems with multiple mports can have
-  destination ID assignments omitted from the end of list (default = -1).
-
-If the host device ID for a specific master port is set to -1, the discovery
-process will be performed for it.
-
-The enumeration and discovery routines use RapidIO maintenance transactions
-to access the configuration space of devices.
-
-NOTE: If RapidIO switch-specific device drivers are built as loadable modules
-they must be loaded before enumeration/discovery process starts.
-This requirement is cased by the fact that enumeration/discovery methods invoke
-vendor-specific callbacks on early stages.
-
-4.2 Automatic Start of Enumeration and Discovery
-------------------------------------------------
-
-Automatic enumeration/discovery start method is applicable only to built-in
-enumeration/discovery RapidIO configuration selection. To enable automatic
-enumeration/discovery start by existing basic enumerator method set use boot
-command line parameter "rio-scan.scan=1".
-
-This configuration requires synchronized start of all RapidIO endpoints that
-form a network which will be enumerated/discovered. Discovering endpoints have
-to be started before an enumeration starts to ensure that all RapidIO
-controllers have been initialized and are ready to be discovered. Configuration
-parameter CONFIG_RAPIDIO_DISC_TIMEOUT defines time (in seconds) which
-a discovering endpoint will wait for enumeration to be completed.
-
-When automatic enumeration/discovery start is selected, basic method's
-initialization routine calls rio_init_mports() to perform enumeration or
-discovery for all known mport devices.
-
-Depending on RapidIO network size and configuration this automatic
-enumeration/discovery start method may be difficult to use due to the
-requirement for synchronized start of all endpoints.
-
-4.3 User-space Start of Enumeration and Discovery
--------------------------------------------------
-
-User-space start of enumeration and discovery can be used with built-in and
-modular build configurations. For user-space controlled start RapidIO subsystem
-creates the sysfs write-only attribute file '/sys/bus/rapidio/scan'. To initiate
-an enumeration or discovery process on specific mport device, a user needs to
-write mport_ID (not RapidIO destination ID) into that file. The mport_ID is a
-sequential number (0 ... RIO_MAX_MPORTS) assigned during mport device
-registration. For example for machine with single RapidIO controller, mport_ID
-for that controller always will be 0.
-
-To initiate RapidIO enumeration/discovery on all available mports a user may
-write '-1' (or RIO_MPORT_ANY) into the scan attribute file.
-
-4.4 Basic Enumeration Method
-----------------------------
-
-This is an original enumeration/discovery method which is available since
-first release of RapidIO subsystem code. The enumeration process is
-implemented according to the enumeration algorithm outlined in the RapidIO
-Interconnect Specification: Annex I [1].
-
-This method can be configured as statically linked or loadable module.
-The method's single parameter "scan" allows to trigger the enumeration/discovery
-process from module initialization routine.
-
-This enumeration/discovery method can be started only once and does not support
-unloading if it is built as a module.
-
-The enumeration process traverses the network using a recursive depth-first
-algorithm. When a new device is found, the enumerator takes ownership of that
-device by writing into the Host Device ID Lock CSR. It does this to ensure that
-the enumerator has exclusive right to enumerate the device. If device ownership
-is successfully acquired, the enumerator allocates a new rio_dev structure and
-initializes it according to device capabilities.
-
-If the device is an endpoint, a unique device ID is assigned to it and its value
-is written into the device's Base Device ID CSR.
-
-If the device is a switch, the enumerator allocates an additional rio_switch
-structure to store switch specific information. Then the switch's vendor ID and
-device ID are queried against a table of known RapidIO switches. Each switch
-table entry contains a pointer to a switch-specific initialization routine that
-initializes pointers to the rest of switch specific operations, and performs
-hardware initialization if necessary. A RapidIO switch does not have a unique
-device ID; it relies on hopcount and routing for device ID of an attached
-endpoint if access to its configuration registers is required. If a switch (or
-chain of switches) does not have any endpoint (except enumerator) attached to
-it, a fake device ID will be assigned to configure a route to that switch.
-In the case of a chain of switches without endpoint, one fake device ID is used
-to configure a route through the entire chain and switches are differentiated by
-their hopcount value.
-
-For both endpoints and switches the enumerator writes a unique component tag
-into device's Component Tag CSR. That unique value is used by the error
-management notification mechanism to identify a device that is reporting an
-error management event.
-
-Enumeration beyond a switch is completed by iterating over each active egress
-port of that switch. For each active link, a route to a default device ID
-(0xFF for 8-bit systems and 0xFFFF for 16-bit systems) is temporarily written
-into the routing table. The algorithm recurs by calling itself with hopcount + 1
-and the default device ID in order to access the device on the active port.
-
-After the host has completed enumeration of the entire network it releases
-devices by clearing device ID locks (calls rio_clear_locks()). For each endpoint
-in the system, it sets the Discovered bit in the Port General Control CSR
-to indicate that enumeration is completed and agents are allowed to execute
-passive discovery of the network.
-
-The discovery process is performed by agents and is similar to the enumeration
-process that is described above. However, the discovery process is performed
-without changes to the existing routing because agents only gather information
-about RapidIO network structure and are building an internal map of discovered
-devices. This way each Linux-based component of the RapidIO subsystem has
-a complete view of the network. The discovery process can be performed
-simultaneously by several agents. After initializing its RapidIO master port
-each agent waits for enumeration completion by the host for the configured wait
-time period. If this wait time period expires before enumeration is completed,
-an agent skips RapidIO discovery and continues with remaining kernel
-initialization.
-
-4.5 Adding New Enumeration/Discovery Method
--------------------------------------------
-
-RapidIO subsystem code organization allows addition of new enumeration/discovery
-methods as new configuration options without significant impact to the core
-RapidIO code.
-
-A new enumeration/discovery method has to be attached to one or more mport
-devices before an enumeration/discovery process can be started. Normally,
-method's module initialization routine calls rio_register_scan() to attach
-an enumerator to a specified mport device (or devices). The basic enumerator
-implementation demonstrates this process.
-
-4.6 Using Loadable RapidIO Switch Drivers
------------------------------------------
-
-In the case when RapidIO switch drivers are built as loadable modules a user
-must ensure that they are loaded before the enumeration/discovery starts.
-This process can be automated by specifying pre- or post- dependencies in the
-RapidIO-specific modprobe configuration file as shown in the example below.
-
-File /etc/modprobe.d/rapidio.conf::
-
-  # Configure RapidIO subsystem modules
-
-  # Set enumerator host destination ID (overrides kernel command line option)
-  options rapidio hdid=-1,2
-
-  # Load RapidIO switch drivers immediately after rapidio core module was loaded
-  softdep rapidio post: idt_gen2 idtcps tsi57x
-
-  # OR :
-
-  # Load RapidIO switch drivers just before rio-scan enumerator module is loaded
-  softdep rio-scan pre: idt_gen2 idtcps tsi57x
-
-  --------------------------
-
-NOTE:
-  In the example above, one of "softdep" commands must be removed or
-  commented out to keep required module loading sequence.
-
-5. References
-=============
-
-[1] RapidIO Trade Association. RapidIO Interconnect Specifications.
-    http://www.rapidio.org.
-
-[2] Rapidio TA. Technology Comparisons.
-    http://www.rapidio.org/education/technology_comparisons/
-
-[3] RapidIO support for Linux.
-    http://lwn.net/Articles/139118/
-
-[4] Matt Porter. RapidIO for Linux. Ottawa Linux Symposium, 2005
-    http://www.kernel.org/doc/ols/2005/ols2005v2-pages-43-56.pdf
diff --git a/Documentation/rapidio/rio_cm.rst b/Documentation/rapidio/rio_cm.rst
deleted file mode 100644
index 5294430a7a74..000000000000
--- a/Documentation/rapidio/rio_cm.rst
+++ /dev/null
@@ -1,135 +0,0 @@
-==========================================================================
-RapidIO subsystem Channelized Messaging character device driver (rio_cm.c)
-==========================================================================
-
-
-1. Overview
-===========
-
-This device driver is the result of collaboration within the RapidIO.org
-Software Task Group (STG) between Texas Instruments, Prodrive Technologies,
-Nokia Networks, BAE and IDT.  Additional input was received from other members
-of RapidIO.org.
-
-The objective was to create a character mode driver interface which exposes
-messaging capabilities of RapidIO endpoint devices (mports) directly
-to applications, in a manner that allows the numerous and varied RapidIO
-implementations to interoperate.
-
-This driver (RIO_CM) provides to user-space applications shared access to
-RapidIO mailbox messaging resources.
-
-RapidIO specification (Part 2) defines that endpoint devices may have up to four
-messaging mailboxes in case of multi-packet message (up to 4KB) and
-up to 64 mailboxes if single-packet messages (up to 256 B) are used. In addition
-to protocol definition limitations, a particular hardware implementation can
-have reduced number of messaging mailboxes.  RapidIO aware applications must
-therefore share the messaging resources of a RapidIO endpoint.
-
-Main purpose of this device driver is to provide RapidIO mailbox messaging
-capability to large number of user-space processes by introducing socket-like
-operations using a single messaging mailbox.  This allows applications to
-use the limited RapidIO messaging hardware resources efficiently.
-
-Most of device driver's operations are supported through 'ioctl' system calls.
-
-When loaded this device driver creates a single file system node named rio_cm
-in /dev directory common for all registered RapidIO mport devices.
-
-Following ioctl commands are available to user-space applications:
-
-- RIO_CM_MPORT_GET_LIST:
-    Returns to caller list of local mport devices that
-    support messaging operations (number of entries up to RIO_MAX_MPORTS).
-    Each list entry is combination of mport's index in the system and RapidIO
-    destination ID assigned to the port.
-- RIO_CM_EP_GET_LIST_SIZE:
-    Returns number of messaging capable remote endpoints
-    in a RapidIO network associated with the specified mport device.
-- RIO_CM_EP_GET_LIST:
-    Returns list of RapidIO destination IDs for messaging
-    capable remote endpoints (peers) available in a RapidIO network associated
-    with the specified mport device.
-- RIO_CM_CHAN_CREATE:
-    Creates RapidIO message exchange channel data structure
-    with channel ID assigned automatically or as requested by a caller.
-- RIO_CM_CHAN_BIND:
-    Binds the specified channel data structure to the specified
-    mport device.
-- RIO_CM_CHAN_LISTEN:
-    Enables listening for connection requests on the specified
-    channel.
-- RIO_CM_CHAN_ACCEPT:
-    Accepts a connection request from peer on the specified
-    channel. If wait timeout for this request is specified by a caller it is
-    a blocking call. If timeout set to 0 this is non-blocking call - ioctl
-    handler checks for a pending connection request and if one is not available
-    exits with -EGAIN error status immediately.
-- RIO_CM_CHAN_CONNECT:
-    Sends a connection request to a remote peer/channel.
-- RIO_CM_CHAN_SEND:
-    Sends a data message through the specified channel.
-    The handler for this request assumes that message buffer specified by
-    a caller includes the reserved space for a packet header required by
-    this driver.
-- RIO_CM_CHAN_RECEIVE:
-    Receives a data message through a connected channel.
-    If the channel does not have an incoming message ready to return this ioctl
-    handler will wait for new message until timeout specified by a caller
-    expires. If timeout value is set to 0, ioctl handler uses a default value
-    defined by MAX_SCHEDULE_TIMEOUT.
-- RIO_CM_CHAN_CLOSE:
-    Closes a specified channel and frees associated buffers.
-    If the specified channel is in the CONNECTED state, sends close notification
-    to the remote peer.
-
-The ioctl command codes and corresponding data structures intended for use by
-user-space applications are defined in 'include/uapi/linux/rio_cm_cdev.h'.
-
-2. Hardware Compatibility
-=========================
-
-This device driver uses standard interfaces defined by kernel RapidIO subsystem
-and therefore it can be used with any mport device driver registered by RapidIO
-subsystem with limitations set by available mport HW implementation of messaging
-mailboxes.
-
-3. Module parameters
-====================
-
-- 'dbg_level'
-      - This parameter allows to control amount of debug information
-        generated by this device driver. This parameter is formed by set of
-        bit masks that correspond to the specific functional block.
-        For mask definitions see 'drivers/rapidio/devices/rio_cm.c'
-        This parameter can be changed dynamically.
-        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
-
-- 'cmbox'
-      - Number of RapidIO mailbox to use (default value is 1).
-        This parameter allows to set messaging mailbox number that will be used
-        within entire RapidIO network. It can be used when default mailbox is
-        used by other device drivers or is not supported by some nodes in the
-        RapidIO network.
-
-- 'chstart'
-      - Start channel number for dynamic assignment. Default value - 256.
-        Allows to exclude channel numbers below this parameter from dynamic
-        allocation to avoid conflicts with software components that use
-        reserved predefined channel numbers.
-
-4. Known problems
-=================
-
-  None.
-
-5. User-space Applications and API Library
-==========================================
-
-Messaging API library and applications that use this device driver are available
-from RapidIO.org.
-
-6. TODO List
-============
-
-- Add support for system notification messages (reserved channel 0).
diff --git a/Documentation/rapidio/sysfs.rst b/Documentation/rapidio/sysfs.rst
deleted file mode 100644
index 540f72683496..000000000000
--- a/Documentation/rapidio/sysfs.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-=============
-Sysfs entries
-=============
-
-The RapidIO sysfs files have moved to:
-Documentation/ABI/testing/sysfs-bus-rapidio and
-Documentation/ABI/testing/sysfs-class-rapidio
diff --git a/Documentation/rapidio/tsi721.rst b/Documentation/rapidio/tsi721.rst
deleted file mode 100644
index 42aea438cd20..000000000000
--- a/Documentation/rapidio/tsi721.rst
+++ /dev/null
@@ -1,112 +0,0 @@
-=========================================================================
-RapidIO subsystem mport driver for IDT Tsi721 PCI Express-to-SRIO bridge.
-=========================================================================
-
-1. Overview
-===========
-
-This driver implements all currently defined RapidIO mport callback functions.
-It supports maintenance read and write operations, inbound and outbound RapidIO
-doorbells, inbound maintenance port-writes and RapidIO messaging.
-
-To generate SRIO maintenance transactions this driver uses one of Tsi721 DMA
-channels. This mechanism provides access to larger range of hop counts and
-destination IDs without need for changes in outbound window translation.
-
-RapidIO messaging support uses dedicated messaging channels for each mailbox.
-For inbound messages this driver uses destination ID matching to forward messages
-into the corresponding message queue. Messaging callbacks are implemented to be
-fully compatible with RIONET driver (Ethernet over RapidIO messaging services).
-
-1. Module parameters:
-
-- 'dbg_level'
-      - This parameter allows to control amount of debug information
-        generated by this device driver. This parameter is formed by set of
-        This parameter can be changed bit masks that correspond to the specific
-        functional block.
-        For mask definitions see 'drivers/rapidio/devices/tsi721.h'
-        This parameter can be changed dynamically.
-        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
-
-- 'dma_desc_per_channel'
-      - This parameter defines number of hardware buffer
-        descriptors allocated for each registered Tsi721 DMA channel.
-        Its default value is 128.
-
-- 'dma_txqueue_sz'
-      - DMA transactions queue size. Defines number of pending
-        transaction requests that can be accepted by each DMA channel.
-        Default value is 16.
-
-- 'dma_sel'
-      - DMA channel selection mask. Bitmask that defines which hardware
-        DMA channels (0 ... 6) will be registered with DmaEngine core.
-        If bit is set to 1, the corresponding DMA channel will be registered.
-        DMA channels not selected by this mask will not be used by this device
-        driver. Default value is 0x7f (use all channels).
-
-- 'pcie_mrrs'
-      - override value for PCIe Maximum Read Request Size (MRRS).
-        This parameter gives an ability to override MRRS value set during PCIe
-        configuration process. Tsi721 supports read request sizes up to 4096B.
-        Value for this parameter must be set as defined by PCIe specification:
-        0 = 128B, 1 = 256B, 2 = 512B, 3 = 1024B, 4 = 2048B and 5 = 4096B.
-        Default value is '-1' (= keep platform setting).
-
-- 'mbox_sel'
-      - RIO messaging MBOX selection mask. This is a bitmask that defines
-        messaging MBOXes are managed by this device driver. Mask bits 0 - 3
-        correspond to MBOX0 - MBOX3. MBOX is under driver's control if the
-        corresponding bit is set to '1'. Default value is 0x0f (= all).
-
-2. Known problems
-=================
-
-  None.
-
-3. DMA Engine Support
-=====================
-
-Tsi721 mport driver supports DMA data transfers between local system memory and
-remote RapidIO devices. This functionality is implemented according to SLAVE
-mode API defined by common Linux kernel DMA Engine framework.
-
-Depending on system requirements RapidIO DMA operations can be included/excluded
-by setting CONFIG_RAPIDIO_DMA_ENGINE option. Tsi721 miniport driver uses seven
-out of eight available BDMA channels to support DMA data transfers.
-One BDMA channel is reserved for generation of maintenance read/write requests.
-
-If Tsi721 mport driver have been built with RAPIDIO_DMA_ENGINE support included,
-this driver will accept DMA-specific module parameter:
-
-  "dma_desc_per_channel"
-			 - defines number of hardware buffer descriptors used by
-                           each BDMA channel of Tsi721 (by default - 128).
-
-4. Version History
-
-  =====   ====================================================================
-  1.1.0   DMA operations re-worked to support data scatter/gather lists larger
-          than hardware buffer descriptors ring.
-  1.0.0   Initial driver release.
-  =====   ====================================================================
-
-5.  License
-===========
-
-  Copyright(c) 2011 Integrated Device Technology, Inc. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms of the GNU General Public License as published by the Free
-  Software Foundation; either version 2 of the License, or (at your option)
-  any later version.
-
-  This program is distributed in the hope that it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-- 
cgit 


From 59809fe88224db24432ad50e62fd8d5f0df738a1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 16:06:08 -0300
Subject: docs: perf: move to the admin-guide

The perf infrastructure is used for userspace to track issues.
At least a good part of what's described here is related to
it.

So, add it to the admin-guide.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/admin-guide/index.rst              |  1 +
 Documentation/admin-guide/perf/arm-ccn.rst       | 61 ++++++++++++++++++++++++
 Documentation/admin-guide/perf/arm_dsu_pmu.rst   | 29 +++++++++++
 Documentation/admin-guide/perf/hisi-pmu.rst      | 60 +++++++++++++++++++++++
 Documentation/admin-guide/perf/index.rst         | 14 ++++++
 Documentation/admin-guide/perf/qcom_l2_pmu.rst   | 39 +++++++++++++++
 Documentation/admin-guide/perf/qcom_l3_pmu.rst   | 26 ++++++++++
 Documentation/admin-guide/perf/thunderx2-pmu.rst | 42 ++++++++++++++++
 Documentation/admin-guide/perf/xgene-pmu.rst     | 49 +++++++++++++++++++
 Documentation/perf/arm-ccn.rst                   | 61 ------------------------
 Documentation/perf/arm_dsu_pmu.rst               | 29 -----------
 Documentation/perf/hisi-pmu.rst                  | 60 -----------------------
 Documentation/perf/index.rst                     | 16 -------
 Documentation/perf/qcom_l2_pmu.rst               | 39 ---------------
 Documentation/perf/qcom_l3_pmu.rst               | 26 ----------
 Documentation/perf/thunderx2-pmu.rst             | 42 ----------------
 Documentation/perf/xgene-pmu.rst                 | 49 -------------------
 17 files changed, 321 insertions(+), 322 deletions(-)
 create mode 100644 Documentation/admin-guide/perf/arm-ccn.rst
 create mode 100644 Documentation/admin-guide/perf/arm_dsu_pmu.rst
 create mode 100644 Documentation/admin-guide/perf/hisi-pmu.rst
 create mode 100644 Documentation/admin-guide/perf/index.rst
 create mode 100644 Documentation/admin-guide/perf/qcom_l2_pmu.rst
 create mode 100644 Documentation/admin-guide/perf/qcom_l3_pmu.rst
 create mode 100644 Documentation/admin-guide/perf/thunderx2-pmu.rst
 create mode 100644 Documentation/admin-guide/perf/xgene-pmu.rst
 delete mode 100644 Documentation/perf/arm-ccn.rst
 delete mode 100644 Documentation/perf/arm_dsu_pmu.rst
 delete mode 100644 Documentation/perf/hisi-pmu.rst
 delete mode 100644 Documentation/perf/index.rst
 delete mode 100644 Documentation/perf/qcom_l2_pmu.rst
 delete mode 100644 Documentation/perf/qcom_l3_pmu.rst
 delete mode 100644 Documentation/perf/thunderx2-pmu.rst
 delete mode 100644 Documentation/perf/xgene-pmu.rst

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 8853c95ef0d4..f40c4b5a181b 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -38,6 +38,7 @@ problems and bugs in particular.
    ramoops
    dynamic-debug-howto
    init
+   perf/index
 
 This is the beginning of a section with information of interest to
 application developers.  Documents covering various aspects of the kernel
diff --git a/Documentation/admin-guide/perf/arm-ccn.rst b/Documentation/admin-guide/perf/arm-ccn.rst
new file mode 100644
index 000000000000..832b0c64023a
--- /dev/null
+++ b/Documentation/admin-guide/perf/arm-ccn.rst
@@ -0,0 +1,61 @@
+==========================
+ARM Cache Coherent Network
+==========================
+
+CCN-504 is a ring-bus interconnect consisting of 11 crosspoints
+(XPs), with each crosspoint supporting up to two device ports,
+so nodes (devices) 0 and 1 are connected to crosspoint 0,
+nodes 2 and 3 to crosspoint 1 etc.
+
+PMU (perf) driver
+-----------------
+
+The CCN driver registers a perf PMU driver, which provides
+description of available events and configuration options
+in sysfs, see /sys/bus/event_source/devices/ccn*.
+
+The "format" directory describes format of the config, config1
+and config2 fields of the perf_event_attr structure. The "events"
+directory provides configuration templates for all documented
+events, that can be used with perf tool. For example "xp_valid_flit"
+is an equivalent of "type=0x8,event=0x4". Other parameters must be
+explicitly specified.
+
+For events originating from device, "node" defines its index.
+
+Crosspoint PMU events require "xp" (index), "bus" (bus number)
+and "vc" (virtual channel ID).
+
+Crosspoint watchpoint-based events (special "event" value 0xfe)
+require "xp" and "vc" as as above plus "port" (device port index),
+"dir" (transmit/receive direction), comparator values ("cmp_l"
+and "cmp_h") and "mask", being index of the comparator mask.
+
+Masks are defined separately from the event description
+(due to limited number of the config values) in the "cmp_mask"
+directory, with first 8 configurable by user and additional
+4 hardcoded for the most frequent use cases.
+
+Cycle counter is described by a "type" value 0xff and does
+not require any other settings.
+
+The driver also provides a "cpumask" sysfs attribute, which contains
+a single CPU ID, of the processor which will be used to handle all
+the CCN PMU events. It is recommended that the user space tools
+request the events on this processor (if not, the perf_event->cpu value
+will be overwritten anyway). In case of this processor being offlined,
+the events are migrated to another one and the attribute is updated.
+
+Example of perf tool use::
+
+  / # perf list | grep ccn
+    ccn/cycles/                                        [Kernel PMU event]
+  <...>
+    ccn/xp_valid_flit,xp=?,port=?,vc=?,dir=?/          [Kernel PMU event]
+  <...>
+
+  / # perf stat -a -e ccn/cycles/,ccn/xp_valid_flit,xp=1,port=0,vc=1,dir=1/ \
+                                                                         sleep 1
+
+The driver does not support sampling, therefore "perf record" will
+not work. Per-task (without "-a") perf sessions are not supported.
diff --git a/Documentation/admin-guide/perf/arm_dsu_pmu.rst b/Documentation/admin-guide/perf/arm_dsu_pmu.rst
new file mode 100644
index 000000000000..7fd34db75d13
--- /dev/null
+++ b/Documentation/admin-guide/perf/arm_dsu_pmu.rst
@@ -0,0 +1,29 @@
+==================================
+ARM DynamIQ Shared Unit (DSU) PMU
+==================================
+
+ARM DynamIQ Shared Unit integrates one or more cores with an L3 memory system,
+control logic and external interfaces to form a multicore cluster. The PMU
+allows counting the various events related to the L3 cache, Snoop Control Unit
+etc, using 32bit independent counters. It also provides a 64bit cycle counter.
+
+The PMU can only be accessed via CPU system registers and are common to the
+cores connected to the same DSU. Like most of the other uncore PMUs, DSU
+PMU doesn't support process specific events and cannot be used in sampling mode.
+
+The DSU provides a bitmap for a subset of implemented events via hardware
+registers. There is no way for the driver to determine if the other events
+are available or not. Hence the driver exposes only those events advertised
+by the DSU, in "events" directory under::
+
+  /sys/bus/event_sources/devices/arm_dsu_<N>/
+
+The user should refer to the TRM of the product to figure out the supported events
+and use the raw event code for the unlisted events.
+
+The driver also exposes the CPUs connected to the DSU instance in "associated_cpus".
+
+
+e.g usage::
+
+	perf stat -a -e arm_dsu_0/cycles/
diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst
new file mode 100644
index 000000000000..404a5c3d9d00
--- /dev/null
+++ b/Documentation/admin-guide/perf/hisi-pmu.rst
@@ -0,0 +1,60 @@
+======================================================
+HiSilicon SoC uncore Performance Monitoring Unit (PMU)
+======================================================
+
+The HiSilicon SoC chip includes various independent system device PMUs
+such as L3 cache (L3C), Hydra Home Agent (HHA) and DDRC. These PMUs are
+independent and have hardware logic to gather statistics and performance
+information.
+
+The HiSilicon SoC encapsulates multiple CPU and IO dies. Each CPU cluster
+(CCL) is made up of 4 cpu cores sharing one L3 cache; each CPU die is
+called Super CPU cluster (SCCL) and is made up of 6 CCLs. Each SCCL has
+two HHAs (0 - 1) and four DDRCs (0 - 3), respectively.
+
+HiSilicon SoC uncore PMU driver
+-------------------------------
+
+Each device PMU has separate registers for event counting, control and
+interrupt, and the PMU driver shall register perf PMU drivers like L3C,
+HHA and DDRC etc. The available events and configuration options shall
+be described in the sysfs, see:
+
+/sys/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>/, or
+/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>.
+The "perf list" command shall list the available events from sysfs.
+
+Each L3C, HHA and DDRC is registered as a separate PMU with perf. The PMU
+name will appear in event listing as hisi_sccl<sccl-id>_module<index-id>.
+where "sccl-id" is the identifier of the SCCL and "index-id" is the index of
+module.
+
+e.g. hisi_sccl3_l3c0/rd_hit_cpipe is READ_HIT_CPIPE event of L3C index #0 in
+SCCL ID #3.
+
+e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in
+SCCL ID #1.
+
+The driver also provides a "cpumask" sysfs attribute, which shows the CPU core
+ID used to count the uncore PMU event.
+
+Example usage of perf::
+
+  $# perf list
+  hisi_sccl3_l3c0/rd_hit_cpipe/ [kernel PMU event]
+  ------------------------------------------
+  hisi_sccl3_l3c0/wr_hit_cpipe/ [kernel PMU event]
+  ------------------------------------------
+  hisi_sccl1_l3c0/rd_hit_cpipe/ [kernel PMU event]
+  ------------------------------------------
+  hisi_sccl1_l3c0/wr_hit_cpipe/ [kernel PMU event]
+  ------------------------------------------
+
+  $# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5
+  $# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5
+
+The current driver does not support sampling. So "perf record" is unsupported.
+Also attach to a task is unsupported as the events are all uncore.
+
+Note: Please contact the maintainer for a complete list of events supported for
+the PMU devices in the SoC and its information if needed.
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
new file mode 100644
index 000000000000..9d445451ea18
--- /dev/null
+++ b/Documentation/admin-guide/perf/index.rst
@@ -0,0 +1,14 @@
+===========================
+Performance monitor support
+===========================
+
+.. toctree::
+   :maxdepth: 1
+
+   hisi-pmu
+   qcom_l2_pmu
+   qcom_l3_pmu
+   arm-ccn
+   xgene-pmu
+   arm_dsu_pmu
+   thunderx2-pmu
diff --git a/Documentation/admin-guide/perf/qcom_l2_pmu.rst b/Documentation/admin-guide/perf/qcom_l2_pmu.rst
new file mode 100644
index 000000000000..c130178a4a55
--- /dev/null
+++ b/Documentation/admin-guide/perf/qcom_l2_pmu.rst
@@ -0,0 +1,39 @@
+=====================================================================
+Qualcomm Technologies Level-2 Cache Performance Monitoring Unit (PMU)
+=====================================================================
+
+This driver supports the L2 cache clusters found in Qualcomm Technologies
+Centriq SoCs. There are multiple physical L2 cache clusters, each with their
+own PMU. Each cluster has one or more CPUs associated with it.
+
+There is one logical L2 PMU exposed, which aggregates the results from
+the physical PMUs.
+
+The driver provides a description of its available events and configuration
+options in sysfs, see /sys/devices/l2cache_0.
+
+The "format" directory describes the format of the events.
+
+Events can be envisioned as a 2-dimensional array. Each column represents
+a group of events. There are 8 groups. Only one entry from each
+group can be in use at a time. If multiple events from the same group
+are specified, the conflicting events cannot be counted at the same time.
+
+Events are specified as 0xCCG, where CC is 2 hex digits specifying
+the code (array row) and G specifies the group (column) 0-7.
+
+In addition there is a cycle counter event specified by the value 0xFE
+which is outside the above scheme.
+
+The driver provides a "cpumask" sysfs attribute which contains a mask
+consisting of one CPU per cluster which will be used to handle all the PMU
+events on that cluster.
+
+Examples for use with perf::
+
+  perf stat -e l2cache_0/config=0x001/,l2cache_0/config=0x042/ -a sleep 1
+
+  perf stat -e l2cache_0/config=0xfe/ -C 2 sleep 1
+
+The driver does not support sampling, therefore "perf record" will
+not work. Per-task perf sessions are not supported.
diff --git a/Documentation/admin-guide/perf/qcom_l3_pmu.rst b/Documentation/admin-guide/perf/qcom_l3_pmu.rst
new file mode 100644
index 000000000000..a3d014a46bfd
--- /dev/null
+++ b/Documentation/admin-guide/perf/qcom_l3_pmu.rst
@@ -0,0 +1,26 @@
+===========================================================================
+Qualcomm Datacenter Technologies L3 Cache Performance Monitoring Unit (PMU)
+===========================================================================
+
+This driver supports the L3 cache PMUs found in Qualcomm Datacenter Technologies
+Centriq SoCs. The L3 cache on these SOCs is composed of multiple slices, shared
+by all cores within a socket. Each slice is exposed as a separate uncore perf
+PMU with device name l3cache_<socket>_<instance>. User space is responsible
+for aggregating across slices.
+
+The driver provides a description of its available events and configuration
+options in sysfs, see /sys/devices/l3cache*. Given that these are uncore PMUs
+the driver also exposes a "cpumask" sysfs attribute which contains a mask
+consisting of one CPU per socket which will be used to handle all the PMU
+events on that socket.
+
+The hardware implements 32bit event counters and has a flat 8bit event space
+exposed via the "event" format attribute. In addition to the 32bit physical
+counters the driver supports virtual 64bit hardware counters by using hardware
+counter chaining. This feature is exposed via the "lc" (long counter) format
+flag. E.g.::
+
+  perf stat -e l3cache_0_0/read-miss,lc/
+
+Given that these are uncore PMUs the driver does not support sampling, therefore
+"perf record" will not work. Per-task perf sessions are not supported.
diff --git a/Documentation/admin-guide/perf/thunderx2-pmu.rst b/Documentation/admin-guide/perf/thunderx2-pmu.rst
new file mode 100644
index 000000000000..08e33675853a
--- /dev/null
+++ b/Documentation/admin-guide/perf/thunderx2-pmu.rst
@@ -0,0 +1,42 @@
+=============================================================
+Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE)
+=============================================================
+
+The ThunderX2 SoC PMU consists of independent, system-wide, per-socket
+PMUs such as the Level 3 Cache (L3C) and DDR4 Memory Controller (DMC).
+
+The DMC has 8 interleaved channels and the L3C has 16 interleaved tiles.
+Events are counted for the default channel (i.e. channel 0) and prorated
+to the total number of channels/tiles.
+
+The DMC and L3C support up to 4 counters. Counters are independently
+programmable and can be started and stopped individually. Each counter
+can be set to a different event. Counters are 32-bit and do not support
+an overflow interrupt; they are read every 2 seconds.
+
+PMU UNCORE (perf) driver:
+
+The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and
+L3C devices.  Each PMU can be used to count up to 4 events
+simultaneously. The PMUs provide a description of their available events
+and configuration options under sysfs, see
+/sys/devices/uncore_<l3c_S/dmc_S/>; S is the socket id.
+
+The driver does not support sampling, therefore "perf record" will not
+work. Per-task perf sessions are also not supported.
+
+Examples::
+
+  # perf stat -a -e uncore_dmc_0/cnt_cycles/ sleep 1
+
+  # perf stat -a -e \
+  uncore_dmc_0/cnt_cycles/,\
+  uncore_dmc_0/data_transfers/,\
+  uncore_dmc_0/read_txns/,\
+  uncore_dmc_0/write_txns/ sleep 1
+
+  # perf stat -a -e \
+  uncore_l3c_0/read_request/,\
+  uncore_l3c_0/read_hit/,\
+  uncore_l3c_0/inv_request/,\
+  uncore_l3c_0/inv_hit/ sleep 1
diff --git a/Documentation/admin-guide/perf/xgene-pmu.rst b/Documentation/admin-guide/perf/xgene-pmu.rst
new file mode 100644
index 000000000000..644f8ed89152
--- /dev/null
+++ b/Documentation/admin-guide/perf/xgene-pmu.rst
@@ -0,0 +1,49 @@
+================================================
+APM X-Gene SoC Performance Monitoring Unit (PMU)
+================================================
+
+X-Gene SoC PMU consists of various independent system device PMUs such as
+L3 cache(s), I/O bridge(s), memory controller bridge(s) and memory
+controller(s). These PMU devices are loosely architected to follow the
+same model as the PMU for ARM cores. The PMUs share the same top level
+interrupt and status CSR region.
+
+PMU (perf) driver
+-----------------
+
+The xgene-pmu driver registers several perf PMU drivers. Each of the perf
+driver provides description of its available events and configuration options
+in sysfs, see /sys/devices/<l3cX/iobX/mcbX/mcX>/.
+
+The "format" directory describes format of the config (event ID),
+config1 (agent ID) fields of the perf_event_attr structure. The "events"
+directory provides configuration templates for all supported event types that
+can be used with perf tool. For example, "l3c0/bank-fifo-full/" is an
+equivalent of "l3c0/config=0x0b/".
+
+Most of the SoC PMU has a specific list of agent ID used for monitoring
+performance of a specific datapath. For example, agents of a L3 cache can be
+a specific CPU or an I/O bridge. Each PMU has a set of 2 registers capable of
+masking the agents from which the request come from. If the bit with
+the bit number corresponding to the agent is set, the event is counted only if
+it is caused by a request from that agent. Each agent ID bit is inversely mapped
+to a corresponding bit in "config1" field. By default, the event will be
+counted for all agent requests (config1 = 0x0). For all the supported agents of
+each PMU, please refer to APM X-Gene User Manual.
+
+Each perf driver also provides a "cpumask" sysfs attribute, which contains a
+single CPU ID of the processor which will be used to handle all the PMU events.
+
+Example for perf tool use::
+
+ / # perf list | grep -e l3c -e iob -e mcb -e mc
+   l3c0/ackq-full/                                    [Kernel PMU event]
+ <...>
+   mcb1/mcb-csw-stall/                                [Kernel PMU event]
+
+ / # perf stat -a -e l3c0/read-miss/,mcb1/csw-write-request/ sleep 1
+
+ / # perf stat -a -e l3c0/read-miss,config1=0xfffffffffffffffe/ sleep 1
+
+The driver does not support sampling, therefore "perf record" will
+not work. Per-task (without "-a") perf sessions are not supported.
diff --git a/Documentation/perf/arm-ccn.rst b/Documentation/perf/arm-ccn.rst
deleted file mode 100644
index 832b0c64023a..000000000000
--- a/Documentation/perf/arm-ccn.rst
+++ /dev/null
@@ -1,61 +0,0 @@
-==========================
-ARM Cache Coherent Network
-==========================
-
-CCN-504 is a ring-bus interconnect consisting of 11 crosspoints
-(XPs), with each crosspoint supporting up to two device ports,
-so nodes (devices) 0 and 1 are connected to crosspoint 0,
-nodes 2 and 3 to crosspoint 1 etc.
-
-PMU (perf) driver
------------------
-
-The CCN driver registers a perf PMU driver, which provides
-description of available events and configuration options
-in sysfs, see /sys/bus/event_source/devices/ccn*.
-
-The "format" directory describes format of the config, config1
-and config2 fields of the perf_event_attr structure. The "events"
-directory provides configuration templates for all documented
-events, that can be used with perf tool. For example "xp_valid_flit"
-is an equivalent of "type=0x8,event=0x4". Other parameters must be
-explicitly specified.
-
-For events originating from device, "node" defines its index.
-
-Crosspoint PMU events require "xp" (index), "bus" (bus number)
-and "vc" (virtual channel ID).
-
-Crosspoint watchpoint-based events (special "event" value 0xfe)
-require "xp" and "vc" as as above plus "port" (device port index),
-"dir" (transmit/receive direction), comparator values ("cmp_l"
-and "cmp_h") and "mask", being index of the comparator mask.
-
-Masks are defined separately from the event description
-(due to limited number of the config values) in the "cmp_mask"
-directory, with first 8 configurable by user and additional
-4 hardcoded for the most frequent use cases.
-
-Cycle counter is described by a "type" value 0xff and does
-not require any other settings.
-
-The driver also provides a "cpumask" sysfs attribute, which contains
-a single CPU ID, of the processor which will be used to handle all
-the CCN PMU events. It is recommended that the user space tools
-request the events on this processor (if not, the perf_event->cpu value
-will be overwritten anyway). In case of this processor being offlined,
-the events are migrated to another one and the attribute is updated.
-
-Example of perf tool use::
-
-  / # perf list | grep ccn
-    ccn/cycles/                                        [Kernel PMU event]
-  <...>
-    ccn/xp_valid_flit,xp=?,port=?,vc=?,dir=?/          [Kernel PMU event]
-  <...>
-
-  / # perf stat -a -e ccn/cycles/,ccn/xp_valid_flit,xp=1,port=0,vc=1,dir=1/ \
-                                                                         sleep 1
-
-The driver does not support sampling, therefore "perf record" will
-not work. Per-task (without "-a") perf sessions are not supported.
diff --git a/Documentation/perf/arm_dsu_pmu.rst b/Documentation/perf/arm_dsu_pmu.rst
deleted file mode 100644
index 7fd34db75d13..000000000000
--- a/Documentation/perf/arm_dsu_pmu.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-==================================
-ARM DynamIQ Shared Unit (DSU) PMU
-==================================
-
-ARM DynamIQ Shared Unit integrates one or more cores with an L3 memory system,
-control logic and external interfaces to form a multicore cluster. The PMU
-allows counting the various events related to the L3 cache, Snoop Control Unit
-etc, using 32bit independent counters. It also provides a 64bit cycle counter.
-
-The PMU can only be accessed via CPU system registers and are common to the
-cores connected to the same DSU. Like most of the other uncore PMUs, DSU
-PMU doesn't support process specific events and cannot be used in sampling mode.
-
-The DSU provides a bitmap for a subset of implemented events via hardware
-registers. There is no way for the driver to determine if the other events
-are available or not. Hence the driver exposes only those events advertised
-by the DSU, in "events" directory under::
-
-  /sys/bus/event_sources/devices/arm_dsu_<N>/
-
-The user should refer to the TRM of the product to figure out the supported events
-and use the raw event code for the unlisted events.
-
-The driver also exposes the CPUs connected to the DSU instance in "associated_cpus".
-
-
-e.g usage::
-
-	perf stat -a -e arm_dsu_0/cycles/
diff --git a/Documentation/perf/hisi-pmu.rst b/Documentation/perf/hisi-pmu.rst
deleted file mode 100644
index 404a5c3d9d00..000000000000
--- a/Documentation/perf/hisi-pmu.rst
+++ /dev/null
@@ -1,60 +0,0 @@
-======================================================
-HiSilicon SoC uncore Performance Monitoring Unit (PMU)
-======================================================
-
-The HiSilicon SoC chip includes various independent system device PMUs
-such as L3 cache (L3C), Hydra Home Agent (HHA) and DDRC. These PMUs are
-independent and have hardware logic to gather statistics and performance
-information.
-
-The HiSilicon SoC encapsulates multiple CPU and IO dies. Each CPU cluster
-(CCL) is made up of 4 cpu cores sharing one L3 cache; each CPU die is
-called Super CPU cluster (SCCL) and is made up of 6 CCLs. Each SCCL has
-two HHAs (0 - 1) and four DDRCs (0 - 3), respectively.
-
-HiSilicon SoC uncore PMU driver
--------------------------------
-
-Each device PMU has separate registers for event counting, control and
-interrupt, and the PMU driver shall register perf PMU drivers like L3C,
-HHA and DDRC etc. The available events and configuration options shall
-be described in the sysfs, see:
-
-/sys/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>/, or
-/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>.
-The "perf list" command shall list the available events from sysfs.
-
-Each L3C, HHA and DDRC is registered as a separate PMU with perf. The PMU
-name will appear in event listing as hisi_sccl<sccl-id>_module<index-id>.
-where "sccl-id" is the identifier of the SCCL and "index-id" is the index of
-module.
-
-e.g. hisi_sccl3_l3c0/rd_hit_cpipe is READ_HIT_CPIPE event of L3C index #0 in
-SCCL ID #3.
-
-e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in
-SCCL ID #1.
-
-The driver also provides a "cpumask" sysfs attribute, which shows the CPU core
-ID used to count the uncore PMU event.
-
-Example usage of perf::
-
-  $# perf list
-  hisi_sccl3_l3c0/rd_hit_cpipe/ [kernel PMU event]
-  ------------------------------------------
-  hisi_sccl3_l3c0/wr_hit_cpipe/ [kernel PMU event]
-  ------------------------------------------
-  hisi_sccl1_l3c0/rd_hit_cpipe/ [kernel PMU event]
-  ------------------------------------------
-  hisi_sccl1_l3c0/wr_hit_cpipe/ [kernel PMU event]
-  ------------------------------------------
-
-  $# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5
-  $# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5
-
-The current driver does not support sampling. So "perf record" is unsupported.
-Also attach to a task is unsupported as the events are all uncore.
-
-Note: Please contact the maintainer for a complete list of events supported for
-the PMU devices in the SoC and its information if needed.
diff --git a/Documentation/perf/index.rst b/Documentation/perf/index.rst
deleted file mode 100644
index 4bf848e27f26..000000000000
--- a/Documentation/perf/index.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-:orphan:
-
-===========================
-Performance monitor support
-===========================
-
-.. toctree::
-   :maxdepth: 1
-
-   hisi-pmu
-   qcom_l2_pmu
-   qcom_l3_pmu
-   arm-ccn
-   xgene-pmu
-   arm_dsu_pmu
-   thunderx2-pmu
diff --git a/Documentation/perf/qcom_l2_pmu.rst b/Documentation/perf/qcom_l2_pmu.rst
deleted file mode 100644
index c130178a4a55..000000000000
--- a/Documentation/perf/qcom_l2_pmu.rst
+++ /dev/null
@@ -1,39 +0,0 @@
-=====================================================================
-Qualcomm Technologies Level-2 Cache Performance Monitoring Unit (PMU)
-=====================================================================
-
-This driver supports the L2 cache clusters found in Qualcomm Technologies
-Centriq SoCs. There are multiple physical L2 cache clusters, each with their
-own PMU. Each cluster has one or more CPUs associated with it.
-
-There is one logical L2 PMU exposed, which aggregates the results from
-the physical PMUs.
-
-The driver provides a description of its available events and configuration
-options in sysfs, see /sys/devices/l2cache_0.
-
-The "format" directory describes the format of the events.
-
-Events can be envisioned as a 2-dimensional array. Each column represents
-a group of events. There are 8 groups. Only one entry from each
-group can be in use at a time. If multiple events from the same group
-are specified, the conflicting events cannot be counted at the same time.
-
-Events are specified as 0xCCG, where CC is 2 hex digits specifying
-the code (array row) and G specifies the group (column) 0-7.
-
-In addition there is a cycle counter event specified by the value 0xFE
-which is outside the above scheme.
-
-The driver provides a "cpumask" sysfs attribute which contains a mask
-consisting of one CPU per cluster which will be used to handle all the PMU
-events on that cluster.
-
-Examples for use with perf::
-
-  perf stat -e l2cache_0/config=0x001/,l2cache_0/config=0x042/ -a sleep 1
-
-  perf stat -e l2cache_0/config=0xfe/ -C 2 sleep 1
-
-The driver does not support sampling, therefore "perf record" will
-not work. Per-task perf sessions are not supported.
diff --git a/Documentation/perf/qcom_l3_pmu.rst b/Documentation/perf/qcom_l3_pmu.rst
deleted file mode 100644
index a3d014a46bfd..000000000000
--- a/Documentation/perf/qcom_l3_pmu.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-===========================================================================
-Qualcomm Datacenter Technologies L3 Cache Performance Monitoring Unit (PMU)
-===========================================================================
-
-This driver supports the L3 cache PMUs found in Qualcomm Datacenter Technologies
-Centriq SoCs. The L3 cache on these SOCs is composed of multiple slices, shared
-by all cores within a socket. Each slice is exposed as a separate uncore perf
-PMU with device name l3cache_<socket>_<instance>. User space is responsible
-for aggregating across slices.
-
-The driver provides a description of its available events and configuration
-options in sysfs, see /sys/devices/l3cache*. Given that these are uncore PMUs
-the driver also exposes a "cpumask" sysfs attribute which contains a mask
-consisting of one CPU per socket which will be used to handle all the PMU
-events on that socket.
-
-The hardware implements 32bit event counters and has a flat 8bit event space
-exposed via the "event" format attribute. In addition to the 32bit physical
-counters the driver supports virtual 64bit hardware counters by using hardware
-counter chaining. This feature is exposed via the "lc" (long counter) format
-flag. E.g.::
-
-  perf stat -e l3cache_0_0/read-miss,lc/
-
-Given that these are uncore PMUs the driver does not support sampling, therefore
-"perf record" will not work. Per-task perf sessions are not supported.
diff --git a/Documentation/perf/thunderx2-pmu.rst b/Documentation/perf/thunderx2-pmu.rst
deleted file mode 100644
index 08e33675853a..000000000000
--- a/Documentation/perf/thunderx2-pmu.rst
+++ /dev/null
@@ -1,42 +0,0 @@
-=============================================================
-Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE)
-=============================================================
-
-The ThunderX2 SoC PMU consists of independent, system-wide, per-socket
-PMUs such as the Level 3 Cache (L3C) and DDR4 Memory Controller (DMC).
-
-The DMC has 8 interleaved channels and the L3C has 16 interleaved tiles.
-Events are counted for the default channel (i.e. channel 0) and prorated
-to the total number of channels/tiles.
-
-The DMC and L3C support up to 4 counters. Counters are independently
-programmable and can be started and stopped individually. Each counter
-can be set to a different event. Counters are 32-bit and do not support
-an overflow interrupt; they are read every 2 seconds.
-
-PMU UNCORE (perf) driver:
-
-The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and
-L3C devices.  Each PMU can be used to count up to 4 events
-simultaneously. The PMUs provide a description of their available events
-and configuration options under sysfs, see
-/sys/devices/uncore_<l3c_S/dmc_S/>; S is the socket id.
-
-The driver does not support sampling, therefore "perf record" will not
-work. Per-task perf sessions are also not supported.
-
-Examples::
-
-  # perf stat -a -e uncore_dmc_0/cnt_cycles/ sleep 1
-
-  # perf stat -a -e \
-  uncore_dmc_0/cnt_cycles/,\
-  uncore_dmc_0/data_transfers/,\
-  uncore_dmc_0/read_txns/,\
-  uncore_dmc_0/write_txns/ sleep 1
-
-  # perf stat -a -e \
-  uncore_l3c_0/read_request/,\
-  uncore_l3c_0/read_hit/,\
-  uncore_l3c_0/inv_request/,\
-  uncore_l3c_0/inv_hit/ sleep 1
diff --git a/Documentation/perf/xgene-pmu.rst b/Documentation/perf/xgene-pmu.rst
deleted file mode 100644
index 644f8ed89152..000000000000
--- a/Documentation/perf/xgene-pmu.rst
+++ /dev/null
@@ -1,49 +0,0 @@
-================================================
-APM X-Gene SoC Performance Monitoring Unit (PMU)
-================================================
-
-X-Gene SoC PMU consists of various independent system device PMUs such as
-L3 cache(s), I/O bridge(s), memory controller bridge(s) and memory
-controller(s). These PMU devices are loosely architected to follow the
-same model as the PMU for ARM cores. The PMUs share the same top level
-interrupt and status CSR region.
-
-PMU (perf) driver
------------------
-
-The xgene-pmu driver registers several perf PMU drivers. Each of the perf
-driver provides description of its available events and configuration options
-in sysfs, see /sys/devices/<l3cX/iobX/mcbX/mcX>/.
-
-The "format" directory describes format of the config (event ID),
-config1 (agent ID) fields of the perf_event_attr structure. The "events"
-directory provides configuration templates for all supported event types that
-can be used with perf tool. For example, "l3c0/bank-fifo-full/" is an
-equivalent of "l3c0/config=0x0b/".
-
-Most of the SoC PMU has a specific list of agent ID used for monitoring
-performance of a specific datapath. For example, agents of a L3 cache can be
-a specific CPU or an I/O bridge. Each PMU has a set of 2 registers capable of
-masking the agents from which the request come from. If the bit with
-the bit number corresponding to the agent is set, the event is counted only if
-it is caused by a request from that agent. Each agent ID bit is inversely mapped
-to a corresponding bit in "config1" field. By default, the event will be
-counted for all agent requests (config1 = 0x0). For all the supported agents of
-each PMU, please refer to APM X-Gene User Manual.
-
-Each perf driver also provides a "cpumask" sysfs attribute, which contains a
-single CPU ID of the processor which will be used to handle all the PMU events.
-
-Example for perf tool use::
-
- / # perf list | grep -e l3c -e iob -e mcb -e mc
-   l3c0/ackq-full/                                    [Kernel PMU event]
- <...>
-   mcb1/mcb-csw-stall/                                [Kernel PMU event]
-
- / # perf stat -a -e l3c0/read-miss/,mcb1/csw-write-request/ sleep 1
-
- / # perf stat -a -e l3c0/read-miss,config1=0xfffffffffffffffe/ sleep 1
-
-The driver does not support sampling, therefore "perf record" will
-not work. Per-task (without "-a") perf sessions are not supported.
-- 
cgit 


From ae4a05027e2f883fb5f822e48d67cacc26bf60e1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 16:32:31 -0300
Subject: docs: nvdimm: add it to the driver-api book

The descriptions here are from Kernel driver's PoV.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/driver-api/index.rst           |   1 +
 Documentation/driver-api/nvdimm/btt.rst      | 285 +++++++++
 Documentation/driver-api/nvdimm/index.rst    |  10 +
 Documentation/driver-api/nvdimm/nvdimm.rst   | 887 +++++++++++++++++++++++++++
 Documentation/driver-api/nvdimm/security.rst | 143 +++++
 Documentation/nvdimm/btt.rst                 | 285 ---------
 Documentation/nvdimm/index.rst               |  12 -
 Documentation/nvdimm/nvdimm.rst              | 887 ---------------------------
 Documentation/nvdimm/security.rst            | 143 -----
 9 files changed, 1326 insertions(+), 1327 deletions(-)
 create mode 100644 Documentation/driver-api/nvdimm/btt.rst
 create mode 100644 Documentation/driver-api/nvdimm/index.rst
 create mode 100644 Documentation/driver-api/nvdimm/nvdimm.rst
 create mode 100644 Documentation/driver-api/nvdimm/security.rst
 delete mode 100644 Documentation/nvdimm/btt.rst
 delete mode 100644 Documentation/nvdimm/index.rst
 delete mode 100644 Documentation/nvdimm/nvdimm.rst
 delete mode 100644 Documentation/nvdimm/security.rst

(limited to 'Documentation')

diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index d665cd9ab95f..410dd7110772 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -44,6 +44,7 @@ available subsections can be seen below.
    mtdnand
    miscellaneous
    mei/index
+   nvdimm/index
    w1
    rapidio/index
    s390-drivers
diff --git a/Documentation/driver-api/nvdimm/btt.rst b/Documentation/driver-api/nvdimm/btt.rst
new file mode 100644
index 000000000000..2d8269f834bd
--- /dev/null
+++ b/Documentation/driver-api/nvdimm/btt.rst
@@ -0,0 +1,285 @@
+=============================
+BTT - Block Translation Table
+=============================
+
+
+1. Introduction
+===============
+
+Persistent memory based storage is able to perform IO at byte (or more
+accurately, cache line) granularity. However, we often want to expose such
+storage as traditional block devices. The block drivers for persistent memory
+will do exactly this. However, they do not provide any atomicity guarantees.
+Traditional SSDs typically provide protection against torn sectors in hardware,
+using stored energy in capacitors to complete in-flight block writes, or perhaps
+in firmware. We don't have this luxury with persistent memory - if a write is in
+progress, and we experience a power failure, the block will contain a mix of old
+and new data. Applications may not be prepared to handle such a scenario.
+
+The Block Translation Table (BTT) provides atomic sector update semantics for
+persistent memory devices, so that applications that rely on sector writes not
+being torn can continue to do so. The BTT manifests itself as a stacked block
+device, and reserves a portion of the underlying storage for its metadata. At
+the heart of it, is an indirection table that re-maps all the blocks on the
+volume. It can be thought of as an extremely simple file system that only
+provides atomic sector updates.
+
+
+2. Static Layout
+================
+
+The underlying storage on which a BTT can be laid out is not limited in any way.
+The BTT, however, splits the available space into chunks of up to 512 GiB,
+called "Arenas".
+
+Each arena follows the same layout for its metadata, and all references in an
+arena are internal to it (with the exception of one field that points to the
+next arena). The following depicts the "On-disk" metadata layout::
+
+
+    Backing Store     +------->  Arena
+  +---------------+   |   +------------------+
+  |               |   |   | Arena info block |
+  |    Arena 0    +---+   |       4K         |
+  |     512G      |       +------------------+
+  |               |       |                  |
+  +---------------+       |                  |
+  |               |       |                  |
+  |    Arena 1    |       |   Data Blocks    |
+  |     512G      |       |                  |
+  |               |       |                  |
+  +---------------+       |                  |
+  |       .       |       |                  |
+  |       .       |       |                  |
+  |       .       |       |                  |
+  |               |       |                  |
+  |               |       |                  |
+  +---------------+       +------------------+
+                          |                  |
+                          |     BTT Map      |
+                          |                  |
+                          |                  |
+                          +------------------+
+                          |                  |
+                          |     BTT Flog     |
+                          |                  |
+                          +------------------+
+                          | Info block copy  |
+                          |       4K         |
+                          +------------------+
+
+
+3. Theory of Operation
+======================
+
+
+a. The BTT Map
+--------------
+
+The map is a simple lookup/indirection table that maps an LBA to an internal
+block. Each map entry is 32 bits. The two most significant bits are special
+flags, and the remaining form the internal block number.
+
+======== =============================================================
+Bit      Description
+======== =============================================================
+31 - 30	 Error and Zero flags - Used in the following way:
+
+	   == ==  ====================================================
+	   31 30  Description
+	   == ==  ====================================================
+	   0  0	  Initial state. Reads return zeroes; Premap = Postmap
+	   0  1	  Zero state: Reads return zeroes
+	   1  0	  Error state: Reads fail; Writes clear 'E' bit
+	   1  1	  Normal Block – has valid postmap
+	   == ==  ====================================================
+
+29 - 0	 Mappings to internal 'postmap' blocks
+======== =============================================================
+
+
+Some of the terminology that will be subsequently used:
+
+============	================================================================
+External LBA	LBA as made visible to upper layers.
+ABA		Arena Block Address - Block offset/number within an arena
+Premap ABA	The block offset into an arena, which was decided upon by range
+		checking the External LBA
+Postmap ABA	The block number in the "Data Blocks" area obtained after
+		indirection from the map
+nfree		The number of free blocks that are maintained at any given time.
+		This is the number of concurrent writes that can happen to the
+		arena.
+============	================================================================
+
+
+For example, after adding a BTT, we surface a disk of 1024G. We get a read for
+the external LBA at 768G. This falls into the second arena, and of the 512G
+worth of blocks that this arena contributes, this block is at 256G. Thus, the
+premap ABA is 256G. We now refer to the map, and find out the mapping for block
+'X' (256G) points to block 'Y', say '64'. Thus the postmap ABA is 64.
+
+
+b. The BTT Flog
+---------------
+
+The BTT provides sector atomicity by making every write an "allocating write",
+i.e. Every write goes to a "free" block. A running list of free blocks is
+maintained in the form of the BTT flog. 'Flog' is a combination of the words
+"free list" and "log". The flog contains 'nfree' entries, and an entry contains:
+
+========  =====================================================================
+lba       The premap ABA that is being written to
+old_map   The old postmap ABA - after 'this' write completes, this will be a
+	  free block.
+new_map   The new postmap ABA. The map will up updated to reflect this
+	  lba->postmap_aba mapping, but we log it here in case we have to
+	  recover.
+seq	  Sequence number to mark which of the 2 sections of this flog entry is
+	  valid/newest. It cycles between 01->10->11->01 (binary) under normal
+	  operation, with 00 indicating an uninitialized state.
+lba'	  alternate lba entry
+old_map'  alternate old postmap entry
+new_map'  alternate new postmap entry
+seq'	  alternate sequence number.
+========  =====================================================================
+
+Each of the above fields is 32-bit, making one entry 32 bytes. Entries are also
+padded to 64 bytes to avoid cache line sharing or aliasing. Flog updates are
+done such that for any entry being written, it:
+a. overwrites the 'old' section in the entry based on sequence numbers
+b. writes the 'new' section such that the sequence number is written last.
+
+
+c. The concept of lanes
+-----------------------
+
+While 'nfree' describes the number of concurrent IOs an arena can process
+concurrently, 'nlanes' is the number of IOs the BTT device as a whole can
+process::
+
+	nlanes = min(nfree, num_cpus)
+
+A lane number is obtained at the start of any IO, and is used for indexing into
+all the on-disk and in-memory data structures for the duration of the IO. If
+there are more CPUs than the max number of available lanes, than lanes are
+protected by spinlocks.
+
+
+d. In-memory data structure: Read Tracking Table (RTT)
+------------------------------------------------------
+
+Consider a case where we have two threads, one doing reads and the other,
+writes. We can hit a condition where the writer thread grabs a free block to do
+a new IO, but the (slow) reader thread is still reading from it. In other words,
+the reader consulted a map entry, and started reading the corresponding block. A
+writer started writing to the same external LBA, and finished the write updating
+the map for that external LBA to point to its new postmap ABA. At this point the
+internal, postmap block that the reader is (still) reading has been inserted
+into the list of free blocks. If another write comes in for the same LBA, it can
+grab this free block, and start writing to it, causing the reader to read
+incorrect data. To prevent this, we introduce the RTT.
+
+The RTT is a simple, per arena table with 'nfree' entries. Every reader inserts
+into rtt[lane_number], the postmap ABA it is reading, and clears it after the
+read is complete. Every writer thread, after grabbing a free block, checks the
+RTT for its presence. If the postmap free block is in the RTT, it waits till the
+reader clears the RTT entry, and only then starts writing to it.
+
+
+e. In-memory data structure: map locks
+--------------------------------------
+
+Consider a case where two writer threads are writing to the same LBA. There can
+be a race in the following sequence of steps::
+
+	free[lane] = map[premap_aba]
+	map[premap_aba] = postmap_aba
+
+Both threads can update their respective free[lane] with the same old, freed
+postmap_aba. This has made the layout inconsistent by losing a free entry, and
+at the same time, duplicating another free entry for two lanes.
+
+To solve this, we could have a single map lock (per arena) that has to be taken
+before performing the above sequence, but we feel that could be too contentious.
+Instead we use an array of (nfree) map_locks that is indexed by
+(premap_aba modulo nfree).
+
+
+f. Reconstruction from the Flog
+-------------------------------
+
+On startup, we analyze the BTT flog to create our list of free blocks. We walk
+through all the entries, and for each lane, of the set of two possible
+'sections', we always look at the most recent one only (based on the sequence
+number). The reconstruction rules/steps are simple:
+
+- Read map[log_entry.lba].
+- If log_entry.new matches the map entry, then log_entry.old is free.
+- If log_entry.new does not match the map entry, then log_entry.new is free.
+  (This case can only be caused by power-fails/unsafe shutdowns)
+
+
+g. Summarizing - Read and Write flows
+-------------------------------------
+
+Read:
+
+1.  Convert external LBA to arena number + pre-map ABA
+2.  Get a lane (and take lane_lock)
+3.  Read map to get the entry for this pre-map ABA
+4.  Enter post-map ABA into RTT[lane]
+5.  If TRIM flag set in map, return zeroes, and end IO (go to step 8)
+6.  If ERROR flag set in map, end IO with EIO (go to step 8)
+7.  Read data from this block
+8.  Remove post-map ABA entry from RTT[lane]
+9.  Release lane (and lane_lock)
+
+Write:
+
+1.  Convert external LBA to Arena number + pre-map ABA
+2.  Get a lane (and take lane_lock)
+3.  Use lane to index into in-memory free list and obtain a new block, next flog
+    index, next sequence number
+4.  Scan the RTT to check if free block is present, and spin/wait if it is.
+5.  Write data to this free block
+6.  Read map to get the existing post-map ABA entry for this pre-map ABA
+7.  Write flog entry: [premap_aba / old postmap_aba / new postmap_aba / seq_num]
+8.  Write new post-map ABA into map.
+9.  Write old post-map entry into the free list
+10. Calculate next sequence number and write into the free list entry
+11. Release lane (and lane_lock)
+
+
+4. Error Handling
+=================
+
+An arena would be in an error state if any of the metadata is corrupted
+irrecoverably, either due to a bug or a media error. The following conditions
+indicate an error:
+
+- Info block checksum does not match (and recovering from the copy also fails)
+- All internal available blocks are not uniquely and entirely addressed by the
+  sum of mapped blocks and free blocks (from the BTT flog).
+- Rebuilding free list from the flog reveals missing/duplicate/impossible
+  entries
+- A map entry is out of bounds
+
+If any of these error conditions are encountered, the arena is put into a read
+only state using a flag in the info block.
+
+
+5. Usage
+========
+
+The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem
+(pmem, or blk mode). The easiest way to set up such a namespace is using the
+'ndctl' utility [1]:
+
+For example, the ndctl command line to setup a btt with a 4k sector size is::
+
+    ndctl create-namespace -f -e namespace0.0 -m sector -l 4k
+
+See ndctl create-namespace --help for more options.
+
+[1]: https://github.com/pmem/ndctl
diff --git a/Documentation/driver-api/nvdimm/index.rst b/Documentation/driver-api/nvdimm/index.rst
new file mode 100644
index 000000000000..19dc8ee371dc
--- /dev/null
+++ b/Documentation/driver-api/nvdimm/index.rst
@@ -0,0 +1,10 @@
+===================================
+Non-Volatile Memory Device (NVDIMM)
+===================================
+
+.. toctree::
+   :maxdepth: 1
+
+   nvdimm
+   btt
+   security
diff --git a/Documentation/driver-api/nvdimm/nvdimm.rst b/Documentation/driver-api/nvdimm/nvdimm.rst
new file mode 100644
index 000000000000..08f855cbb4e6
--- /dev/null
+++ b/Documentation/driver-api/nvdimm/nvdimm.rst
@@ -0,0 +1,887 @@
+===============================
+LIBNVDIMM: Non-Volatile Devices
+===============================
+
+libnvdimm - kernel / libndctl - userspace helper library
+
+linux-nvdimm@lists.01.org
+
+Version 13
+
+.. contents:
+
+	Glossary
+	Overview
+	    Supporting Documents
+	    Git Trees
+	LIBNVDIMM PMEM and BLK
+	Why BLK?
+	    PMEM vs BLK
+	        BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX
+	Example NVDIMM Platform
+	LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API
+	    LIBNDCTL: Context
+	        libndctl: instantiate a new library context example
+	    LIBNVDIMM/LIBNDCTL: Bus
+	        libnvdimm: control class device in /sys/class
+	        libnvdimm: bus
+	        libndctl: bus enumeration example
+	    LIBNVDIMM/LIBNDCTL: DIMM (NMEM)
+	        libnvdimm: DIMM (NMEM)
+	        libndctl: DIMM enumeration example
+	    LIBNVDIMM/LIBNDCTL: Region
+	        libnvdimm: region
+	        libndctl: region enumeration example
+	        Why Not Encode the Region Type into the Region Name?
+	        How Do I Determine the Major Type of a Region?
+	    LIBNVDIMM/LIBNDCTL: Namespace
+	        libnvdimm: namespace
+	        libndctl: namespace enumeration example
+	        libndctl: namespace creation example
+	        Why the Term "namespace"?
+	    LIBNVDIMM/LIBNDCTL: Block Translation Table "btt"
+	        libnvdimm: btt layout
+	        libndctl: btt creation example
+	Summary LIBNDCTL Diagram
+
+
+Glossary
+========
+
+PMEM:
+  A system-physical-address range where writes are persistent.  A
+  block device composed of PMEM is capable of DAX.  A PMEM address range
+  may span an interleave of several DIMMs.
+
+BLK:
+  A set of one or more programmable memory mapped apertures provided
+  by a DIMM to access its media.  This indirection precludes the
+  performance benefit of interleaving, but enables DIMM-bounded failure
+  modes.
+
+DPA:
+  DIMM Physical Address, is a DIMM-relative offset.  With one DIMM in
+  the system there would be a 1:1 system-physical-address:DPA association.
+  Once more DIMMs are added a memory controller interleave must be
+  decoded to determine the DPA associated with a given
+  system-physical-address.  BLK capacity always has a 1:1 relationship
+  with a single-DIMM's DPA range.
+
+DAX:
+  File system extensions to bypass the page cache and block layer to
+  mmap persistent memory, from a PMEM block device, directly into a
+  process address space.
+
+DSM:
+  Device Specific Method: ACPI method to to control specific
+  device - in this case the firmware.
+
+DCR:
+  NVDIMM Control Region Structure defined in ACPI 6 Section 5.2.25.5.
+  It defines a vendor-id, device-id, and interface format for a given DIMM.
+
+BTT:
+  Block Translation Table: Persistent memory is byte addressable.
+  Existing software may have an expectation that the power-fail-atomicity
+  of writes is at least one sector, 512 bytes.  The BTT is an indirection
+  table with atomic update semantics to front a PMEM/BLK block device
+  driver and present arbitrary atomic sector sizes.
+
+LABEL:
+  Metadata stored on a DIMM device that partitions and identifies
+  (persistently names) storage between PMEM and BLK.  It also partitions
+  BLK storage to host BTTs with different parameters per BLK-partition.
+  Note that traditional partition tables, GPT/MBR, are layered on top of a
+  BLK or PMEM device.
+
+
+Overview
+========
+
+The LIBNVDIMM subsystem provides support for three types of NVDIMMs, namely,
+PMEM, BLK, and NVDIMM devices that can simultaneously support both PMEM
+and BLK mode access.  These three modes of operation are described by
+the "NVDIMM Firmware Interface Table" (NFIT) in ACPI 6.  While the LIBNVDIMM
+implementation is generic and supports pre-NFIT platforms, it was guided
+by the superset of capabilities need to support this ACPI 6 definition
+for NVDIMM resources.  The bulk of the kernel implementation is in place
+to handle the case where DPA accessible via PMEM is aliased with DPA
+accessible via BLK.  When that occurs a LABEL is needed to reserve DPA
+for exclusive access via one mode a time.
+
+Supporting Documents
+--------------------
+
+ACPI 6:
+	http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf
+NVDIMM Namespace:
+	http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf
+DSM Interface Example:
+	http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
+Driver Writer's Guide:
+	http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf
+
+Git Trees
+---------
+
+LIBNVDIMM:
+	https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git
+LIBNDCTL:
+	https://github.com/pmem/ndctl.git
+PMEM:
+	https://github.com/01org/prd
+
+
+LIBNVDIMM PMEM and BLK
+======================
+
+Prior to the arrival of the NFIT, non-volatile memory was described to a
+system in various ad-hoc ways.  Usually only the bare minimum was
+provided, namely, a single system-physical-address range where writes
+are expected to be durable after a system power loss.  Now, the NFIT
+specification standardizes not only the description of PMEM, but also
+BLK and platform message-passing entry points for control and
+configuration.
+
+For each NVDIMM access method (PMEM, BLK), LIBNVDIMM provides a block
+device driver:
+
+    1. PMEM (nd_pmem.ko): Drives a system-physical-address range.  This
+       range is contiguous in system memory and may be interleaved (hardware
+       memory controller striped) across multiple DIMMs.  When interleaved the
+       platform may optionally provide details of which DIMMs are participating
+       in the interleave.
+
+       Note that while LIBNVDIMM describes system-physical-address ranges that may
+       alias with BLK access as ND_NAMESPACE_PMEM ranges and those without
+       alias as ND_NAMESPACE_IO ranges, to the nd_pmem driver there is no
+       distinction.  The different device-types are an implementation detail
+       that userspace can exploit to implement policies like "only interface
+       with address ranges from certain DIMMs".  It is worth noting that when
+       aliasing is present and a DIMM lacks a label, then no block device can
+       be created by default as userspace needs to do at least one allocation
+       of DPA to the PMEM range.  In contrast ND_NAMESPACE_IO ranges, once
+       registered, can be immediately attached to nd_pmem.
+
+    2. BLK (nd_blk.ko): This driver performs I/O using a set of platform
+       defined apertures.  A set of apertures will access just one DIMM.
+       Multiple windows (apertures) allow multiple concurrent accesses, much like
+       tagged-command-queuing, and would likely be used by different threads or
+       different CPUs.
+
+       The NFIT specification defines a standard format for a BLK-aperture, but
+       the spec also allows for vendor specific layouts, and non-NFIT BLK
+       implementations may have other designs for BLK I/O.  For this reason
+       "nd_blk" calls back into platform-specific code to perform the I/O.
+
+       One such implementation is defined in the "Driver Writer's Guide" and "DSM
+       Interface Example".
+
+
+Why BLK?
+========
+
+While PMEM provides direct byte-addressable CPU-load/store access to
+NVDIMM storage, it does not provide the best system RAS (recovery,
+availability, and serviceability) model.  An access to a corrupted
+system-physical-address address causes a CPU exception while an access
+to a corrupted address through an BLK-aperture causes that block window
+to raise an error status in a register.  The latter is more aligned with
+the standard error model that host-bus-adapter attached disks present.
+
+Also, if an administrator ever wants to replace a memory it is easier to
+service a system at DIMM module boundaries.  Compare this to PMEM where
+data could be interleaved in an opaque hardware specific manner across
+several DIMMs.
+
+PMEM vs BLK
+-----------
+
+BLK-apertures solve these RAS problems, but their presence is also the
+major contributing factor to the complexity of the ND subsystem.  They
+complicate the implementation because PMEM and BLK alias in DPA space.
+Any given DIMM's DPA-range may contribute to one or more
+system-physical-address sets of interleaved DIMMs, *and* may also be
+accessed in its entirety through its BLK-aperture.  Accessing a DPA
+through a system-physical-address while simultaneously accessing the
+same DPA through a BLK-aperture has undefined results.  For this reason,
+DIMMs with this dual interface configuration include a DSM function to
+store/retrieve a LABEL.  The LABEL effectively partitions the DPA-space
+into exclusive system-physical-address and BLK-aperture accessible
+regions.  For simplicity a DIMM is allowed a PMEM "region" per each
+interleave set in which it is a member.  The remaining DPA space can be
+carved into an arbitrary number of BLK devices with discontiguous
+extents.
+
+BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+One of the few
+reasons to allow multiple BLK namespaces per REGION is so that each
+BLK-namespace can be configured with a BTT with unique atomic sector
+sizes.  While a PMEM device can host a BTT the LABEL specification does
+not provide for a sector size to be specified for a PMEM namespace.
+
+This is due to the expectation that the primary usage model for PMEM is
+via DAX, and the BTT is incompatible with DAX.  However, for the cases
+where an application or filesystem still needs atomic sector update
+guarantees it can register a BTT on a PMEM device or partition.  See
+LIBNVDIMM/NDCTL: Block Translation Table "btt"
+
+
+Example NVDIMM Platform
+=======================
+
+For the remainder of this document the following diagram will be
+referenced for any example sysfs layouts::
+
+
+                               (a)               (b)           DIMM   BLK-REGION
+            +-------------------+--------+--------+--------+
+  +------+  |       pm0.0       | blk2.0 | pm1.0  | blk2.1 |    0      region2
+  | imc0 +--+- - - region0- - - +--------+        +--------+
+  +--+---+  |       pm0.0       | blk3.0 | pm1.0  | blk3.1 |    1      region3
+     |      +-------------------+--------v        v--------+
+  +--+---+                               |                 |
+  | cpu0 |                                     region1
+  +--+---+                               |                 |
+     |      +----------------------------^        ^--------+
+  +--+---+  |           blk4.0           | pm1.0  | blk4.0 |    2      region4
+  | imc1 +--+----------------------------|        +--------+
+  +------+  |           blk5.0           | pm1.0  | blk5.0 |    3      region5
+            +----------------------------+--------+--------+
+
+In this platform we have four DIMMs and two memory controllers in one
+socket.  Each unique interface (BLK or PMEM) to DPA space is identified
+by a region device with a dynamically assigned id (REGION0 - REGION5).
+
+    1. The first portion of DIMM0 and DIMM1 are interleaved as REGION0. A
+       single PMEM namespace is created in the REGION0-SPA-range that spans most
+       of DIMM0 and DIMM1 with a user-specified name of "pm0.0". Some of that
+       interleaved system-physical-address range is reclaimed as BLK-aperture
+       accessed space starting at DPA-offset (a) into each DIMM.  In that
+       reclaimed space we create two BLK-aperture "namespaces" from REGION2 and
+       REGION3 where "blk2.0" and "blk3.0" are just human readable names that
+       could be set to any user-desired name in the LABEL.
+
+    2. In the last portion of DIMM0 and DIMM1 we have an interleaved
+       system-physical-address range, REGION1, that spans those two DIMMs as
+       well as DIMM2 and DIMM3.  Some of REGION1 is allocated to a PMEM namespace
+       named "pm1.0", the rest is reclaimed in 4 BLK-aperture namespaces (for
+       each DIMM in the interleave set), "blk2.1", "blk3.1", "blk4.0", and
+       "blk5.0".
+
+    3. The portion of DIMM2 and DIMM3 that do not participate in the REGION1
+       interleaved system-physical-address range (i.e. the DPA address past
+       offset (b) are also included in the "blk4.0" and "blk5.0" namespaces.
+       Note, that this example shows that BLK-aperture namespaces don't need to
+       be contiguous in DPA-space.
+
+    This bus is provided by the kernel under the device
+    /sys/devices/platform/nfit_test.0 when CONFIG_NFIT_TEST is enabled and
+    the nfit_test.ko module is loaded.  This not only test LIBNVDIMM but the
+    acpi_nfit.ko driver as well.
+
+
+LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API
+========================================================
+
+What follows is a description of the LIBNVDIMM sysfs layout and a
+corresponding object hierarchy diagram as viewed through the LIBNDCTL
+API.  The example sysfs paths and diagrams are relative to the Example
+NVDIMM Platform which is also the LIBNVDIMM bus used in the LIBNDCTL unit
+test.
+
+LIBNDCTL: Context
+-----------------
+
+Every API call in the LIBNDCTL library requires a context that holds the
+logging parameters and other library instance state.  The library is
+based on the libabc template:
+
+	https://git.kernel.org/cgit/linux/kernel/git/kay/libabc.git
+
+LIBNDCTL: instantiate a new library context example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+::
+
+	struct ndctl_ctx *ctx;
+
+	if (ndctl_new(&ctx) == 0)
+		return ctx;
+	else
+		return NULL;
+
+LIBNVDIMM/LIBNDCTL: Bus
+-----------------------
+
+A bus has a 1:1 relationship with an NFIT.  The current expectation for
+ACPI based systems is that there is only ever one platform-global NFIT.
+That said, it is trivial to register multiple NFITs, the specification
+does not preclude it.  The infrastructure supports multiple busses and
+we use this capability to test multiple NFIT configurations in the unit
+test.
+
+LIBNVDIMM: control class device in /sys/class
+---------------------------------------------
+
+This character device accepts DSM messages to be passed to DIMM
+identified by its NFIT handle::
+
+	/sys/class/nd/ndctl0
+	|-- dev
+	|-- device -> ../../../ndbus0
+	|-- subsystem -> ../../../../../../../class/nd
+
+
+
+LIBNVDIMM: bus
+--------------
+
+::
+
+	struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+	       struct nvdimm_bus_descriptor *nfit_desc);
+
+::
+
+	/sys/devices/platform/nfit_test.0/ndbus0
+	|-- commands
+	|-- nd
+	|-- nfit
+	|-- nmem0
+	|-- nmem1
+	|-- nmem2
+	|-- nmem3
+	|-- power
+	|-- provider
+	|-- region0
+	|-- region1
+	|-- region2
+	|-- region3
+	|-- region4
+	|-- region5
+	|-- uevent
+	`-- wait_probe
+
+LIBNDCTL: bus enumeration example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Find the bus handle that describes the bus from Example NVDIMM Platform::
+
+	static struct ndctl_bus *get_bus_by_provider(struct ndctl_ctx *ctx,
+			const char *provider)
+	{
+		struct ndctl_bus *bus;
+
+		ndctl_bus_foreach(ctx, bus)
+			if (strcmp(provider, ndctl_bus_get_provider(bus)) == 0)
+				return bus;
+
+		return NULL;
+	}
+
+	bus = get_bus_by_provider(ctx, "nfit_test.0");
+
+
+LIBNVDIMM/LIBNDCTL: DIMM (NMEM)
+-------------------------------
+
+The DIMM device provides a character device for sending commands to
+hardware, and it is a container for LABELs.  If the DIMM is defined by
+NFIT then an optional 'nfit' attribute sub-directory is available to add
+NFIT-specifics.
+
+Note that the kernel device name for "DIMMs" is "nmemX".  The NFIT
+describes these devices via "Memory Device to System Physical Address
+Range Mapping Structure", and there is no requirement that they actually
+be physical DIMMs, so we use a more generic name.
+
+LIBNVDIMM: DIMM (NMEM)
+^^^^^^^^^^^^^^^^^^^^^^
+
+::
+
+	struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
+			const struct attribute_group **groups, unsigned long flags,
+			unsigned long *dsm_mask);
+
+::
+
+	/sys/devices/platform/nfit_test.0/ndbus0
+	|-- nmem0
+	|   |-- available_slots
+	|   |-- commands
+	|   |-- dev
+	|   |-- devtype
+	|   |-- driver -> ../../../../../bus/nd/drivers/nvdimm
+	|   |-- modalias
+	|   |-- nfit
+	|   |   |-- device
+	|   |   |-- format
+	|   |   |-- handle
+	|   |   |-- phys_id
+	|   |   |-- rev_id
+	|   |   |-- serial
+	|   |   `-- vendor
+	|   |-- state
+	|   |-- subsystem -> ../../../../../bus/nd
+	|   `-- uevent
+	|-- nmem1
+	[..]
+
+
+LIBNDCTL: DIMM enumeration example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Note, in this example we are assuming NFIT-defined DIMMs which are
+identified by an "nfit_handle" a 32-bit value where:
+
+   - Bit 3:0 DIMM number within the memory channel
+   - Bit 7:4 memory channel number
+   - Bit 11:8 memory controller ID
+   - Bit 15:12 socket ID (within scope of a Node controller if node
+     controller is present)
+   - Bit 27:16 Node Controller ID
+   - Bit 31:28 Reserved
+
+::
+
+	static struct ndctl_dimm *get_dimm_by_handle(struct ndctl_bus *bus,
+	       unsigned int handle)
+	{
+		struct ndctl_dimm *dimm;
+
+		ndctl_dimm_foreach(bus, dimm)
+			if (ndctl_dimm_get_handle(dimm) == handle)
+				return dimm;
+
+		return NULL;
+	}
+
+	#define DIMM_HANDLE(n, s, i, c, d) \
+		(((n & 0xfff) << 16) | ((s & 0xf) << 12) | ((i & 0xf) << 8) \
+		 | ((c & 0xf) << 4) | (d & 0xf))
+
+	dimm = get_dimm_by_handle(bus, DIMM_HANDLE(0, 0, 0, 0, 0));
+
+LIBNVDIMM/LIBNDCTL: Region
+--------------------------
+
+A generic REGION device is registered for each PMEM range or BLK-aperture
+set.  Per the example there are 6 regions: 2 PMEM and 4 BLK-aperture
+sets on the "nfit_test.0" bus.  The primary role of regions are to be a
+container of "mappings".  A mapping is a tuple of <DIMM,
+DPA-start-offset, length>.
+
+LIBNVDIMM provides a built-in driver for these REGION devices.  This driver
+is responsible for reconciling the aliased DPA mappings across all
+regions, parsing the LABEL, if present, and then emitting NAMESPACE
+devices with the resolved/exclusive DPA-boundaries for the nd_pmem or
+nd_blk device driver to consume.
+
+In addition to the generic attributes of "mapping"s, "interleave_ways"
+and "size" the REGION device also exports some convenience attributes.
+"nstype" indicates the integer type of namespace-device this region
+emits, "devtype" duplicates the DEVTYPE variable stored by udev at the
+'add' event, "modalias" duplicates the MODALIAS variable stored by udev
+at the 'add' event, and finally, the optional "spa_index" is provided in
+the case where the region is defined by a SPA.
+
+LIBNVDIMM: region::
+
+	struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
+			struct nd_region_desc *ndr_desc);
+	struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
+			struct nd_region_desc *ndr_desc);
+
+::
+
+	/sys/devices/platform/nfit_test.0/ndbus0
+	|-- region0
+	|   |-- available_size
+	|   |-- btt0
+	|   |-- btt_seed
+	|   |-- devtype
+	|   |-- driver -> ../../../../../bus/nd/drivers/nd_region
+	|   |-- init_namespaces
+	|   |-- mapping0
+	|   |-- mapping1
+	|   |-- mappings
+	|   |-- modalias
+	|   |-- namespace0.0
+	|   |-- namespace_seed
+	|   |-- numa_node
+	|   |-- nfit
+	|   |   `-- spa_index
+	|   |-- nstype
+	|   |-- set_cookie
+	|   |-- size
+	|   |-- subsystem -> ../../../../../bus/nd
+	|   `-- uevent
+	|-- region1
+	[..]
+
+LIBNDCTL: region enumeration example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sample region retrieval routines based on NFIT-unique data like
+"spa_index" (interleave set id) for PMEM and "nfit_handle" (dimm id) for
+BLK::
+
+	static struct ndctl_region *get_pmem_region_by_spa_index(struct ndctl_bus *bus,
+			unsigned int spa_index)
+	{
+		struct ndctl_region *region;
+
+		ndctl_region_foreach(bus, region) {
+			if (ndctl_region_get_type(region) != ND_DEVICE_REGION_PMEM)
+				continue;
+			if (ndctl_region_get_spa_index(region) == spa_index)
+				return region;
+		}
+		return NULL;
+	}
+
+	static struct ndctl_region *get_blk_region_by_dimm_handle(struct ndctl_bus *bus,
+			unsigned int handle)
+	{
+		struct ndctl_region *region;
+
+		ndctl_region_foreach(bus, region) {
+			struct ndctl_mapping *map;
+
+			if (ndctl_region_get_type(region) != ND_DEVICE_REGION_BLOCK)
+				continue;
+			ndctl_mapping_foreach(region, map) {
+				struct ndctl_dimm *dimm = ndctl_mapping_get_dimm(map);
+
+				if (ndctl_dimm_get_handle(dimm) == handle)
+					return region;
+			}
+		}
+		return NULL;
+	}
+
+
+Why Not Encode the Region Type into the Region Name?
+----------------------------------------------------
+
+At first glance it seems since NFIT defines just PMEM and BLK interface
+types that we should simply name REGION devices with something derived
+from those type names.  However, the ND subsystem explicitly keeps the
+REGION name generic and expects userspace to always consider the
+region-attributes for four reasons:
+
+    1. There are already more than two REGION and "namespace" types.  For
+       PMEM there are two subtypes.  As mentioned previously we have PMEM where
+       the constituent DIMM devices are known and anonymous PMEM.  For BLK
+       regions the NFIT specification already anticipates vendor specific
+       implementations.  The exact distinction of what a region contains is in
+       the region-attributes not the region-name or the region-devtype.
+
+    2. A region with zero child-namespaces is a possible configuration.  For
+       example, the NFIT allows for a DCR to be published without a
+       corresponding BLK-aperture.  This equates to a DIMM that can only accept
+       control/configuration messages, but no i/o through a descendant block
+       device.  Again, this "type" is advertised in the attributes ('mappings'
+       == 0) and the name does not tell you much.
+
+    3. What if a third major interface type arises in the future?  Outside
+       of vendor specific implementations, it's not difficult to envision a
+       third class of interface type beyond BLK and PMEM.  With a generic name
+       for the REGION level of the device-hierarchy old userspace
+       implementations can still make sense of new kernel advertised
+       region-types.  Userspace can always rely on the generic region
+       attributes like "mappings", "size", etc and the expected child devices
+       named "namespace".  This generic format of the device-model hierarchy
+       allows the LIBNVDIMM and LIBNDCTL implementations to be more uniform and
+       future-proof.
+
+    4. There are more robust mechanisms for determining the major type of a
+       region than a device name.  See the next section, How Do I Determine the
+       Major Type of a Region?
+
+How Do I Determine the Major Type of a Region?
+----------------------------------------------
+
+Outside of the blanket recommendation of "use libndctl", or simply
+looking at the kernel header (/usr/include/linux/ndctl.h) to decode the
+"nstype" integer attribute, here are some other options.
+
+1. module alias lookup
+^^^^^^^^^^^^^^^^^^^^^^
+
+    The whole point of region/namespace device type differentiation is to
+    decide which block-device driver will attach to a given LIBNVDIMM namespace.
+    One can simply use the modalias to lookup the resulting module.  It's
+    important to note that this method is robust in the presence of a
+    vendor-specific driver down the road.  If a vendor-specific
+    implementation wants to supplant the standard nd_blk driver it can with
+    minimal impact to the rest of LIBNVDIMM.
+
+    In fact, a vendor may also want to have a vendor-specific region-driver
+    (outside of nd_region).  For example, if a vendor defined its own LABEL
+    format it would need its own region driver to parse that LABEL and emit
+    the resulting namespaces.  The output from module resolution is more
+    accurate than a region-name or region-devtype.
+
+2. udev
+^^^^^^^
+
+    The kernel "devtype" is registered in the udev database::
+
+	# udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region0
+	P: /devices/platform/nfit_test.0/ndbus0/region0
+	E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region0
+	E: DEVTYPE=nd_pmem
+	E: MODALIAS=nd:t2
+	E: SUBSYSTEM=nd
+
+	# udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region4
+	P: /devices/platform/nfit_test.0/ndbus0/region4
+	E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region4
+	E: DEVTYPE=nd_blk
+	E: MODALIAS=nd:t3
+	E: SUBSYSTEM=nd
+
+    ...and is available as a region attribute, but keep in mind that the
+    "devtype" does not indicate sub-type variations and scripts should
+    really be understanding the other attributes.
+
+3. type specific attributes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+    As it currently stands a BLK-aperture region will never have a
+    "nfit/spa_index" attribute, but neither will a non-NFIT PMEM region.  A
+    BLK region with a "mappings" value of 0 is, as mentioned above, a DIMM
+    that does not allow I/O.  A PMEM region with a "mappings" value of zero
+    is a simple system-physical-address range.
+
+
+LIBNVDIMM/LIBNDCTL: Namespace
+-----------------------------
+
+A REGION, after resolving DPA aliasing and LABEL specified boundaries,
+surfaces one or more "namespace" devices.  The arrival of a "namespace"
+device currently triggers either the nd_blk or nd_pmem driver to load
+and register a disk/block device.
+
+LIBNVDIMM: namespace
+^^^^^^^^^^^^^^^^^^^^
+
+Here is a sample layout from the three major types of NAMESPACE where
+namespace0.0 represents DIMM-info-backed PMEM (note that it has a 'uuid'
+attribute), namespace2.0 represents a BLK namespace (note it has a
+'sector_size' attribute) that, and namespace6.0 represents an anonymous
+PMEM namespace (note that has no 'uuid' attribute due to not support a
+LABEL)::
+
+	/sys/devices/platform/nfit_test.0/ndbus0/region0/namespace0.0
+	|-- alt_name
+	|-- devtype
+	|-- dpa_extents
+	|-- force_raw
+	|-- modalias
+	|-- numa_node
+	|-- resource
+	|-- size
+	|-- subsystem -> ../../../../../../bus/nd
+	|-- type
+	|-- uevent
+	`-- uuid
+	/sys/devices/platform/nfit_test.0/ndbus0/region2/namespace2.0
+	|-- alt_name
+	|-- devtype
+	|-- dpa_extents
+	|-- force_raw
+	|-- modalias
+	|-- numa_node
+	|-- sector_size
+	|-- size
+	|-- subsystem -> ../../../../../../bus/nd
+	|-- type
+	|-- uevent
+	`-- uuid
+	/sys/devices/platform/nfit_test.1/ndbus1/region6/namespace6.0
+	|-- block
+	|   `-- pmem0
+	|-- devtype
+	|-- driver -> ../../../../../../bus/nd/drivers/pmem
+	|-- force_raw
+	|-- modalias
+	|-- numa_node
+	|-- resource
+	|-- size
+	|-- subsystem -> ../../../../../../bus/nd
+	|-- type
+	`-- uevent
+
+LIBNDCTL: namespace enumeration example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Namespaces are indexed relative to their parent region, example below.
+These indexes are mostly static from boot to boot, but subsystem makes
+no guarantees in this regard.  For a static namespace identifier use its
+'uuid' attribute.
+
+::
+
+  static struct ndctl_namespace
+  *get_namespace_by_id(struct ndctl_region *region, unsigned int id)
+  {
+          struct ndctl_namespace *ndns;
+
+          ndctl_namespace_foreach(region, ndns)
+                  if (ndctl_namespace_get_id(ndns) == id)
+                          return ndns;
+
+          return NULL;
+  }
+
+LIBNDCTL: namespace creation example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Idle namespaces are automatically created by the kernel if a given
+region has enough available capacity to create a new namespace.
+Namespace instantiation involves finding an idle namespace and
+configuring it.  For the most part the setting of namespace attributes
+can occur in any order, the only constraint is that 'uuid' must be set
+before 'size'.  This enables the kernel to track DPA allocations
+internally with a static identifier::
+
+  static int configure_namespace(struct ndctl_region *region,
+                  struct ndctl_namespace *ndns,
+                  struct namespace_parameters *parameters)
+  {
+          char devname[50];
+
+          snprintf(devname, sizeof(devname), "namespace%d.%d",
+                          ndctl_region_get_id(region), paramaters->id);
+
+          ndctl_namespace_set_alt_name(ndns, devname);
+          /* 'uuid' must be set prior to setting size! */
+          ndctl_namespace_set_uuid(ndns, paramaters->uuid);
+          ndctl_namespace_set_size(ndns, paramaters->size);
+          /* unlike pmem namespaces, blk namespaces have a sector size */
+          if (parameters->lbasize)
+                  ndctl_namespace_set_sector_size(ndns, parameters->lbasize);
+          ndctl_namespace_enable(ndns);
+  }
+
+
+Why the Term "namespace"?
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+    1. Why not "volume" for instance?  "volume" ran the risk of confusing
+       ND (libnvdimm subsystem) to a volume manager like device-mapper.
+
+    2. The term originated to describe the sub-devices that can be created
+       within a NVME controller (see the nvme specification:
+       http://www.nvmexpress.org/specifications/), and NFIT namespaces are
+       meant to parallel the capabilities and configurability of
+       NVME-namespaces.
+
+
+LIBNVDIMM/LIBNDCTL: Block Translation Table "btt"
+-------------------------------------------------
+
+A BTT (design document: http://pmem.io/2014/09/23/btt.html) is a stacked
+block device driver that fronts either the whole block device or a
+partition of a block device emitted by either a PMEM or BLK NAMESPACE.
+
+LIBNVDIMM: btt layout
+^^^^^^^^^^^^^^^^^^^^^
+
+Every region will start out with at least one BTT device which is the
+seed device.  To activate it set the "namespace", "uuid", and
+"sector_size" attributes and then bind the device to the nd_pmem or
+nd_blk driver depending on the region type::
+
+	/sys/devices/platform/nfit_test.1/ndbus0/region0/btt0/
+	|-- namespace
+	|-- delete
+	|-- devtype
+	|-- modalias
+	|-- numa_node
+	|-- sector_size
+	|-- subsystem -> ../../../../../bus/nd
+	|-- uevent
+	`-- uuid
+
+LIBNDCTL: btt creation example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Similar to namespaces an idle BTT device is automatically created per
+region.  Each time this "seed" btt device is configured and enabled a new
+seed is created.  Creating a BTT configuration involves two steps of
+finding and idle BTT and assigning it to consume a PMEM or BLK namespace::
+
+	static struct ndctl_btt *get_idle_btt(struct ndctl_region *region)
+	{
+		struct ndctl_btt *btt;
+
+		ndctl_btt_foreach(region, btt)
+			if (!ndctl_btt_is_enabled(btt)
+					&& !ndctl_btt_is_configured(btt))
+				return btt;
+
+		return NULL;
+	}
+
+	static int configure_btt(struct ndctl_region *region,
+			struct btt_parameters *parameters)
+	{
+		btt = get_idle_btt(region);
+
+		ndctl_btt_set_uuid(btt, parameters->uuid);
+		ndctl_btt_set_sector_size(btt, parameters->sector_size);
+		ndctl_btt_set_namespace(btt, parameters->ndns);
+		/* turn off raw mode device */
+		ndctl_namespace_disable(parameters->ndns);
+		/* turn on btt access */
+		ndctl_btt_enable(btt);
+	}
+
+Once instantiated a new inactive btt seed device will appear underneath
+the region.
+
+Once a "namespace" is removed from a BTT that instance of the BTT device
+will be deleted or otherwise reset to default values.  This deletion is
+only at the device model level.  In order to destroy a BTT the "info
+block" needs to be destroyed.  Note, that to destroy a BTT the media
+needs to be written in raw mode.  By default, the kernel will autodetect
+the presence of a BTT and disable raw mode.  This autodetect behavior
+can be suppressed by enabling raw mode for the namespace via the
+ndctl_namespace_set_raw_mode() API.
+
+
+Summary LIBNDCTL Diagram
+------------------------
+
+For the given example above, here is the view of the objects as seen by the
+LIBNDCTL API::
+
+              +---+
+              |CTX|    +---------+   +--------------+  +---------------+
+              +-+-+  +-> REGION0 +---> NAMESPACE0.0 +--> PMEM8 "pm0.0" |
+                |    | +---------+   +--------------+  +---------------+
+  +-------+     |    | +---------+   +--------------+  +---------------+
+  | DIMM0 <-+   |    +-> REGION1 +---> NAMESPACE1.0 +--> PMEM6 "pm1.0" |
+  +-------+ |   |    | +---------+   +--------------+  +---------------+
+  | DIMM1 <-+ +-v--+ | +---------+   +--------------+  +---------------+
+  +-------+ +-+BUS0+---> REGION2 +-+-> NAMESPACE2.0 +--> ND6  "blk2.0" |
+  | DIMM2 <-+ +----+ | +---------+ | +--------------+  +----------------------+
+  +-------+ |        |             +-> NAMESPACE2.1 +--> ND5  "blk2.1" | BTT2 |
+  | DIMM3 <-+        |               +--------------+  +----------------------+
+  +-------+          | +---------+   +--------------+  +---------------+
+                     +-> REGION3 +-+-> NAMESPACE3.0 +--> ND4  "blk3.0" |
+                     | +---------+ | +--------------+  +----------------------+
+                     |             +-> NAMESPACE3.1 +--> ND3  "blk3.1" | BTT1 |
+                     |               +--------------+  +----------------------+
+                     | +---------+   +--------------+  +---------------+
+                     +-> REGION4 +---> NAMESPACE4.0 +--> ND2  "blk4.0" |
+                     | +---------+   +--------------+  +---------------+
+                     | +---------+   +--------------+  +----------------------+
+                     +-> REGION5 +---> NAMESPACE5.0 +--> ND1  "blk5.0" | BTT0 |
+                       +---------+   +--------------+  +---------------+------+
diff --git a/Documentation/driver-api/nvdimm/security.rst b/Documentation/driver-api/nvdimm/security.rst
new file mode 100644
index 000000000000..ad9dea099b34
--- /dev/null
+++ b/Documentation/driver-api/nvdimm/security.rst
@@ -0,0 +1,143 @@
+===============
+NVDIMM Security
+===============
+
+1. Introduction
+---------------
+
+With the introduction of Intel Device Specific Methods (DSM) v1.8
+specification [1], security DSMs are introduced. The spec added the following
+security DSMs: "get security state", "set passphrase", "disable passphrase",
+"unlock unit", "freeze lock", "secure erase", and "overwrite". A security_ops
+data structure has been added to struct dimm in order to support the security
+operations and generic APIs are exposed to allow vendor neutral operations.
+
+2. Sysfs Interface
+------------------
+The "security" sysfs attribute is provided in the nvdimm sysfs directory. For
+example:
+/sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/security
+
+The "show" attribute of that attribute will display the security state for
+that DIMM. The following states are available: disabled, unlocked, locked,
+frozen, and overwrite. If security is not supported, the sysfs attribute
+will not be visible.
+
+The "store" attribute takes several commands when it is being written to
+in order to support some of the security functionalities:
+update <old_keyid> <new_keyid> - enable or update passphrase.
+disable <keyid> - disable enabled security and remove key.
+freeze - freeze changing of security states.
+erase <keyid> - delete existing user encryption key.
+overwrite <keyid> - wipe the entire nvdimm.
+master_update <keyid> <new_keyid> - enable or update master passphrase.
+master_erase <keyid> - delete existing user encryption key.
+
+3. Key Management
+-----------------
+
+The key is associated to the payload by the DIMM id. For example:
+# cat /sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/nfit/id
+8089-a2-1740-00000133
+The DIMM id would be provided along with the key payload (passphrase) to
+the kernel.
+
+The security keys are managed on the basis of a single key per DIMM. The
+key "passphrase" is expected to be 32bytes long. This is similar to the ATA
+security specification [2]. A key is initially acquired via the request_key()
+kernel API call during nvdimm unlock. It is up to the user to make sure that
+all the keys are in the kernel user keyring for unlock.
+
+A nvdimm encrypted-key of format enc32 has the description format of:
+nvdimm:<bus-provider-specific-unique-id>
+
+See file ``Documentation/security/keys/trusted-encrypted.rst`` for creating
+encrypted-keys of enc32 format. TPM usage with a master trusted key is
+preferred for sealing the encrypted-keys.
+
+4. Unlocking
+------------
+When the DIMMs are being enumerated by the kernel, the kernel will attempt to
+retrieve the key from the kernel user keyring. This is the only time
+a locked DIMM can be unlocked. Once unlocked, the DIMM will remain unlocked
+until reboot. Typically an entity (i.e. shell script) will inject all the
+relevant encrypted-keys into the kernel user keyring during the initramfs phase.
+This provides the unlock function access to all the related keys that contain
+the passphrase for the respective nvdimms.  It is also recommended that the
+keys are injected before libnvdimm is loaded by modprobe.
+
+5. Update
+---------
+When doing an update, it is expected that the existing key is removed from
+the kernel user keyring and reinjected as different (old) key. It's irrelevant
+what the key description is for the old key since we are only interested in the
+keyid when doing the update operation. It is also expected that the new key
+is injected with the description format described from earlier in this
+document.  The update command written to the sysfs attribute will be with
+the format:
+update <old keyid> <new keyid>
+
+If there is no old keyid due to a security enabling, then a 0 should be
+passed in.
+
+6. Freeze
+---------
+The freeze operation does not require any keys. The security config can be
+frozen by a user with root privelege.
+
+7. Disable
+----------
+The security disable command format is:
+disable <keyid>
+
+An key with the current passphrase payload that is tied to the nvdimm should be
+in the kernel user keyring.
+
+8. Secure Erase
+---------------
+The command format for doing a secure erase is:
+erase <keyid>
+
+An key with the current passphrase payload that is tied to the nvdimm should be
+in the kernel user keyring.
+
+9. Overwrite
+------------
+The command format for doing an overwrite is:
+overwrite <keyid>
+
+Overwrite can be done without a key if security is not enabled. A key serial
+of 0 can be passed in to indicate no key.
+
+The sysfs attribute "security" can be polled to wait on overwrite completion.
+Overwrite can last tens of minutes or more depending on nvdimm size.
+
+An encrypted-key with the current user passphrase that is tied to the nvdimm
+should be injected and its keyid should be passed in via sysfs.
+
+10. Master Update
+-----------------
+The command format for doing a master update is:
+update <old keyid> <new keyid>
+
+The operating mechanism for master update is identical to update except the
+master passphrase key is passed to the kernel. The master passphrase key
+is just another encrypted-key.
+
+This command is only available when security is disabled.
+
+11. Master Erase
+----------------
+The command format for doing a master erase is:
+master_erase <current keyid>
+
+This command has the same operating mechanism as erase except the master
+passphrase key is passed to the kernel. The master passphrase key is just
+another encrypted-key.
+
+This command is only available when the master security is enabled, indicated
+by the extended security status.
+
+[1]: http://pmem.io/documents/NVDIMM_DSM_Interface-V1.8.pdf
+
+[2]: http://www.t13.org/documents/UploadedDocuments/docs2006/e05179r4-ACS-SecurityClarifications.pdf
diff --git a/Documentation/nvdimm/btt.rst b/Documentation/nvdimm/btt.rst
deleted file mode 100644
index 2d8269f834bd..000000000000
--- a/Documentation/nvdimm/btt.rst
+++ /dev/null
@@ -1,285 +0,0 @@
-=============================
-BTT - Block Translation Table
-=============================
-
-
-1. Introduction
-===============
-
-Persistent memory based storage is able to perform IO at byte (or more
-accurately, cache line) granularity. However, we often want to expose such
-storage as traditional block devices. The block drivers for persistent memory
-will do exactly this. However, they do not provide any atomicity guarantees.
-Traditional SSDs typically provide protection against torn sectors in hardware,
-using stored energy in capacitors to complete in-flight block writes, or perhaps
-in firmware. We don't have this luxury with persistent memory - if a write is in
-progress, and we experience a power failure, the block will contain a mix of old
-and new data. Applications may not be prepared to handle such a scenario.
-
-The Block Translation Table (BTT) provides atomic sector update semantics for
-persistent memory devices, so that applications that rely on sector writes not
-being torn can continue to do so. The BTT manifests itself as a stacked block
-device, and reserves a portion of the underlying storage for its metadata. At
-the heart of it, is an indirection table that re-maps all the blocks on the
-volume. It can be thought of as an extremely simple file system that only
-provides atomic sector updates.
-
-
-2. Static Layout
-================
-
-The underlying storage on which a BTT can be laid out is not limited in any way.
-The BTT, however, splits the available space into chunks of up to 512 GiB,
-called "Arenas".
-
-Each arena follows the same layout for its metadata, and all references in an
-arena are internal to it (with the exception of one field that points to the
-next arena). The following depicts the "On-disk" metadata layout::
-
-
-    Backing Store     +------->  Arena
-  +---------------+   |   +------------------+
-  |               |   |   | Arena info block |
-  |    Arena 0    +---+   |       4K         |
-  |     512G      |       +------------------+
-  |               |       |                  |
-  +---------------+       |                  |
-  |               |       |                  |
-  |    Arena 1    |       |   Data Blocks    |
-  |     512G      |       |                  |
-  |               |       |                  |
-  +---------------+       |                  |
-  |       .       |       |                  |
-  |       .       |       |                  |
-  |       .       |       |                  |
-  |               |       |                  |
-  |               |       |                  |
-  +---------------+       +------------------+
-                          |                  |
-                          |     BTT Map      |
-                          |                  |
-                          |                  |
-                          +------------------+
-                          |                  |
-                          |     BTT Flog     |
-                          |                  |
-                          +------------------+
-                          | Info block copy  |
-                          |       4K         |
-                          +------------------+
-
-
-3. Theory of Operation
-======================
-
-
-a. The BTT Map
---------------
-
-The map is a simple lookup/indirection table that maps an LBA to an internal
-block. Each map entry is 32 bits. The two most significant bits are special
-flags, and the remaining form the internal block number.
-
-======== =============================================================
-Bit      Description
-======== =============================================================
-31 - 30	 Error and Zero flags - Used in the following way:
-
-	   == ==  ====================================================
-	   31 30  Description
-	   == ==  ====================================================
-	   0  0	  Initial state. Reads return zeroes; Premap = Postmap
-	   0  1	  Zero state: Reads return zeroes
-	   1  0	  Error state: Reads fail; Writes clear 'E' bit
-	   1  1	  Normal Block – has valid postmap
-	   == ==  ====================================================
-
-29 - 0	 Mappings to internal 'postmap' blocks
-======== =============================================================
-
-
-Some of the terminology that will be subsequently used:
-
-============	================================================================
-External LBA	LBA as made visible to upper layers.
-ABA		Arena Block Address - Block offset/number within an arena
-Premap ABA	The block offset into an arena, which was decided upon by range
-		checking the External LBA
-Postmap ABA	The block number in the "Data Blocks" area obtained after
-		indirection from the map
-nfree		The number of free blocks that are maintained at any given time.
-		This is the number of concurrent writes that can happen to the
-		arena.
-============	================================================================
-
-
-For example, after adding a BTT, we surface a disk of 1024G. We get a read for
-the external LBA at 768G. This falls into the second arena, and of the 512G
-worth of blocks that this arena contributes, this block is at 256G. Thus, the
-premap ABA is 256G. We now refer to the map, and find out the mapping for block
-'X' (256G) points to block 'Y', say '64'. Thus the postmap ABA is 64.
-
-
-b. The BTT Flog
----------------
-
-The BTT provides sector atomicity by making every write an "allocating write",
-i.e. Every write goes to a "free" block. A running list of free blocks is
-maintained in the form of the BTT flog. 'Flog' is a combination of the words
-"free list" and "log". The flog contains 'nfree' entries, and an entry contains:
-
-========  =====================================================================
-lba       The premap ABA that is being written to
-old_map   The old postmap ABA - after 'this' write completes, this will be a
-	  free block.
-new_map   The new postmap ABA. The map will up updated to reflect this
-	  lba->postmap_aba mapping, but we log it here in case we have to
-	  recover.
-seq	  Sequence number to mark which of the 2 sections of this flog entry is
-	  valid/newest. It cycles between 01->10->11->01 (binary) under normal
-	  operation, with 00 indicating an uninitialized state.
-lba'	  alternate lba entry
-old_map'  alternate old postmap entry
-new_map'  alternate new postmap entry
-seq'	  alternate sequence number.
-========  =====================================================================
-
-Each of the above fields is 32-bit, making one entry 32 bytes. Entries are also
-padded to 64 bytes to avoid cache line sharing or aliasing. Flog updates are
-done such that for any entry being written, it:
-a. overwrites the 'old' section in the entry based on sequence numbers
-b. writes the 'new' section such that the sequence number is written last.
-
-
-c. The concept of lanes
------------------------
-
-While 'nfree' describes the number of concurrent IOs an arena can process
-concurrently, 'nlanes' is the number of IOs the BTT device as a whole can
-process::
-
-	nlanes = min(nfree, num_cpus)
-
-A lane number is obtained at the start of any IO, and is used for indexing into
-all the on-disk and in-memory data structures for the duration of the IO. If
-there are more CPUs than the max number of available lanes, than lanes are
-protected by spinlocks.
-
-
-d. In-memory data structure: Read Tracking Table (RTT)
-------------------------------------------------------
-
-Consider a case where we have two threads, one doing reads and the other,
-writes. We can hit a condition where the writer thread grabs a free block to do
-a new IO, but the (slow) reader thread is still reading from it. In other words,
-the reader consulted a map entry, and started reading the corresponding block. A
-writer started writing to the same external LBA, and finished the write updating
-the map for that external LBA to point to its new postmap ABA. At this point the
-internal, postmap block that the reader is (still) reading has been inserted
-into the list of free blocks. If another write comes in for the same LBA, it can
-grab this free block, and start writing to it, causing the reader to read
-incorrect data. To prevent this, we introduce the RTT.
-
-The RTT is a simple, per arena table with 'nfree' entries. Every reader inserts
-into rtt[lane_number], the postmap ABA it is reading, and clears it after the
-read is complete. Every writer thread, after grabbing a free block, checks the
-RTT for its presence. If the postmap free block is in the RTT, it waits till the
-reader clears the RTT entry, and only then starts writing to it.
-
-
-e. In-memory data structure: map locks
---------------------------------------
-
-Consider a case where two writer threads are writing to the same LBA. There can
-be a race in the following sequence of steps::
-
-	free[lane] = map[premap_aba]
-	map[premap_aba] = postmap_aba
-
-Both threads can update their respective free[lane] with the same old, freed
-postmap_aba. This has made the layout inconsistent by losing a free entry, and
-at the same time, duplicating another free entry for two lanes.
-
-To solve this, we could have a single map lock (per arena) that has to be taken
-before performing the above sequence, but we feel that could be too contentious.
-Instead we use an array of (nfree) map_locks that is indexed by
-(premap_aba modulo nfree).
-
-
-f. Reconstruction from the Flog
--------------------------------
-
-On startup, we analyze the BTT flog to create our list of free blocks. We walk
-through all the entries, and for each lane, of the set of two possible
-'sections', we always look at the most recent one only (based on the sequence
-number). The reconstruction rules/steps are simple:
-
-- Read map[log_entry.lba].
-- If log_entry.new matches the map entry, then log_entry.old is free.
-- If log_entry.new does not match the map entry, then log_entry.new is free.
-  (This case can only be caused by power-fails/unsafe shutdowns)
-
-
-g. Summarizing - Read and Write flows
--------------------------------------
-
-Read:
-
-1.  Convert external LBA to arena number + pre-map ABA
-2.  Get a lane (and take lane_lock)
-3.  Read map to get the entry for this pre-map ABA
-4.  Enter post-map ABA into RTT[lane]
-5.  If TRIM flag set in map, return zeroes, and end IO (go to step 8)
-6.  If ERROR flag set in map, end IO with EIO (go to step 8)
-7.  Read data from this block
-8.  Remove post-map ABA entry from RTT[lane]
-9.  Release lane (and lane_lock)
-
-Write:
-
-1.  Convert external LBA to Arena number + pre-map ABA
-2.  Get a lane (and take lane_lock)
-3.  Use lane to index into in-memory free list and obtain a new block, next flog
-    index, next sequence number
-4.  Scan the RTT to check if free block is present, and spin/wait if it is.
-5.  Write data to this free block
-6.  Read map to get the existing post-map ABA entry for this pre-map ABA
-7.  Write flog entry: [premap_aba / old postmap_aba / new postmap_aba / seq_num]
-8.  Write new post-map ABA into map.
-9.  Write old post-map entry into the free list
-10. Calculate next sequence number and write into the free list entry
-11. Release lane (and lane_lock)
-
-
-4. Error Handling
-=================
-
-An arena would be in an error state if any of the metadata is corrupted
-irrecoverably, either due to a bug or a media error. The following conditions
-indicate an error:
-
-- Info block checksum does not match (and recovering from the copy also fails)
-- All internal available blocks are not uniquely and entirely addressed by the
-  sum of mapped blocks and free blocks (from the BTT flog).
-- Rebuilding free list from the flog reveals missing/duplicate/impossible
-  entries
-- A map entry is out of bounds
-
-If any of these error conditions are encountered, the arena is put into a read
-only state using a flag in the info block.
-
-
-5. Usage
-========
-
-The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem
-(pmem, or blk mode). The easiest way to set up such a namespace is using the
-'ndctl' utility [1]:
-
-For example, the ndctl command line to setup a btt with a 4k sector size is::
-
-    ndctl create-namespace -f -e namespace0.0 -m sector -l 4k
-
-See ndctl create-namespace --help for more options.
-
-[1]: https://github.com/pmem/ndctl
diff --git a/Documentation/nvdimm/index.rst b/Documentation/nvdimm/index.rst
deleted file mode 100644
index 1a3402d3775e..000000000000
--- a/Documentation/nvdimm/index.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-:orphan:
-
-===================================
-Non-Volatile Memory Device (NVDIMM)
-===================================
-
-.. toctree::
-   :maxdepth: 1
-
-   nvdimm
-   btt
-   security
diff --git a/Documentation/nvdimm/nvdimm.rst b/Documentation/nvdimm/nvdimm.rst
deleted file mode 100644
index 08f855cbb4e6..000000000000
--- a/Documentation/nvdimm/nvdimm.rst
+++ /dev/null
@@ -1,887 +0,0 @@
-===============================
-LIBNVDIMM: Non-Volatile Devices
-===============================
-
-libnvdimm - kernel / libndctl - userspace helper library
-
-linux-nvdimm@lists.01.org
-
-Version 13
-
-.. contents:
-
-	Glossary
-	Overview
-	    Supporting Documents
-	    Git Trees
-	LIBNVDIMM PMEM and BLK
-	Why BLK?
-	    PMEM vs BLK
-	        BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX
-	Example NVDIMM Platform
-	LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API
-	    LIBNDCTL: Context
-	        libndctl: instantiate a new library context example
-	    LIBNVDIMM/LIBNDCTL: Bus
-	        libnvdimm: control class device in /sys/class
-	        libnvdimm: bus
-	        libndctl: bus enumeration example
-	    LIBNVDIMM/LIBNDCTL: DIMM (NMEM)
-	        libnvdimm: DIMM (NMEM)
-	        libndctl: DIMM enumeration example
-	    LIBNVDIMM/LIBNDCTL: Region
-	        libnvdimm: region
-	        libndctl: region enumeration example
-	        Why Not Encode the Region Type into the Region Name?
-	        How Do I Determine the Major Type of a Region?
-	    LIBNVDIMM/LIBNDCTL: Namespace
-	        libnvdimm: namespace
-	        libndctl: namespace enumeration example
-	        libndctl: namespace creation example
-	        Why the Term "namespace"?
-	    LIBNVDIMM/LIBNDCTL: Block Translation Table "btt"
-	        libnvdimm: btt layout
-	        libndctl: btt creation example
-	Summary LIBNDCTL Diagram
-
-
-Glossary
-========
-
-PMEM:
-  A system-physical-address range where writes are persistent.  A
-  block device composed of PMEM is capable of DAX.  A PMEM address range
-  may span an interleave of several DIMMs.
-
-BLK:
-  A set of one or more programmable memory mapped apertures provided
-  by a DIMM to access its media.  This indirection precludes the
-  performance benefit of interleaving, but enables DIMM-bounded failure
-  modes.
-
-DPA:
-  DIMM Physical Address, is a DIMM-relative offset.  With one DIMM in
-  the system there would be a 1:1 system-physical-address:DPA association.
-  Once more DIMMs are added a memory controller interleave must be
-  decoded to determine the DPA associated with a given
-  system-physical-address.  BLK capacity always has a 1:1 relationship
-  with a single-DIMM's DPA range.
-
-DAX:
-  File system extensions to bypass the page cache and block layer to
-  mmap persistent memory, from a PMEM block device, directly into a
-  process address space.
-
-DSM:
-  Device Specific Method: ACPI method to to control specific
-  device - in this case the firmware.
-
-DCR:
-  NVDIMM Control Region Structure defined in ACPI 6 Section 5.2.25.5.
-  It defines a vendor-id, device-id, and interface format for a given DIMM.
-
-BTT:
-  Block Translation Table: Persistent memory is byte addressable.
-  Existing software may have an expectation that the power-fail-atomicity
-  of writes is at least one sector, 512 bytes.  The BTT is an indirection
-  table with atomic update semantics to front a PMEM/BLK block device
-  driver and present arbitrary atomic sector sizes.
-
-LABEL:
-  Metadata stored on a DIMM device that partitions and identifies
-  (persistently names) storage between PMEM and BLK.  It also partitions
-  BLK storage to host BTTs with different parameters per BLK-partition.
-  Note that traditional partition tables, GPT/MBR, are layered on top of a
-  BLK or PMEM device.
-
-
-Overview
-========
-
-The LIBNVDIMM subsystem provides support for three types of NVDIMMs, namely,
-PMEM, BLK, and NVDIMM devices that can simultaneously support both PMEM
-and BLK mode access.  These three modes of operation are described by
-the "NVDIMM Firmware Interface Table" (NFIT) in ACPI 6.  While the LIBNVDIMM
-implementation is generic and supports pre-NFIT platforms, it was guided
-by the superset of capabilities need to support this ACPI 6 definition
-for NVDIMM resources.  The bulk of the kernel implementation is in place
-to handle the case where DPA accessible via PMEM is aliased with DPA
-accessible via BLK.  When that occurs a LABEL is needed to reserve DPA
-for exclusive access via one mode a time.
-
-Supporting Documents
---------------------
-
-ACPI 6:
-	http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf
-NVDIMM Namespace:
-	http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf
-DSM Interface Example:
-	http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
-Driver Writer's Guide:
-	http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf
-
-Git Trees
----------
-
-LIBNVDIMM:
-	https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git
-LIBNDCTL:
-	https://github.com/pmem/ndctl.git
-PMEM:
-	https://github.com/01org/prd
-
-
-LIBNVDIMM PMEM and BLK
-======================
-
-Prior to the arrival of the NFIT, non-volatile memory was described to a
-system in various ad-hoc ways.  Usually only the bare minimum was
-provided, namely, a single system-physical-address range where writes
-are expected to be durable after a system power loss.  Now, the NFIT
-specification standardizes not only the description of PMEM, but also
-BLK and platform message-passing entry points for control and
-configuration.
-
-For each NVDIMM access method (PMEM, BLK), LIBNVDIMM provides a block
-device driver:
-
-    1. PMEM (nd_pmem.ko): Drives a system-physical-address range.  This
-       range is contiguous in system memory and may be interleaved (hardware
-       memory controller striped) across multiple DIMMs.  When interleaved the
-       platform may optionally provide details of which DIMMs are participating
-       in the interleave.
-
-       Note that while LIBNVDIMM describes system-physical-address ranges that may
-       alias with BLK access as ND_NAMESPACE_PMEM ranges and those without
-       alias as ND_NAMESPACE_IO ranges, to the nd_pmem driver there is no
-       distinction.  The different device-types are an implementation detail
-       that userspace can exploit to implement policies like "only interface
-       with address ranges from certain DIMMs".  It is worth noting that when
-       aliasing is present and a DIMM lacks a label, then no block device can
-       be created by default as userspace needs to do at least one allocation
-       of DPA to the PMEM range.  In contrast ND_NAMESPACE_IO ranges, once
-       registered, can be immediately attached to nd_pmem.
-
-    2. BLK (nd_blk.ko): This driver performs I/O using a set of platform
-       defined apertures.  A set of apertures will access just one DIMM.
-       Multiple windows (apertures) allow multiple concurrent accesses, much like
-       tagged-command-queuing, and would likely be used by different threads or
-       different CPUs.
-
-       The NFIT specification defines a standard format for a BLK-aperture, but
-       the spec also allows for vendor specific layouts, and non-NFIT BLK
-       implementations may have other designs for BLK I/O.  For this reason
-       "nd_blk" calls back into platform-specific code to perform the I/O.
-
-       One such implementation is defined in the "Driver Writer's Guide" and "DSM
-       Interface Example".
-
-
-Why BLK?
-========
-
-While PMEM provides direct byte-addressable CPU-load/store access to
-NVDIMM storage, it does not provide the best system RAS (recovery,
-availability, and serviceability) model.  An access to a corrupted
-system-physical-address address causes a CPU exception while an access
-to a corrupted address through an BLK-aperture causes that block window
-to raise an error status in a register.  The latter is more aligned with
-the standard error model that host-bus-adapter attached disks present.
-
-Also, if an administrator ever wants to replace a memory it is easier to
-service a system at DIMM module boundaries.  Compare this to PMEM where
-data could be interleaved in an opaque hardware specific manner across
-several DIMMs.
-
-PMEM vs BLK
------------
-
-BLK-apertures solve these RAS problems, but their presence is also the
-major contributing factor to the complexity of the ND subsystem.  They
-complicate the implementation because PMEM and BLK alias in DPA space.
-Any given DIMM's DPA-range may contribute to one or more
-system-physical-address sets of interleaved DIMMs, *and* may also be
-accessed in its entirety through its BLK-aperture.  Accessing a DPA
-through a system-physical-address while simultaneously accessing the
-same DPA through a BLK-aperture has undefined results.  For this reason,
-DIMMs with this dual interface configuration include a DSM function to
-store/retrieve a LABEL.  The LABEL effectively partitions the DPA-space
-into exclusive system-physical-address and BLK-aperture accessible
-regions.  For simplicity a DIMM is allowed a PMEM "region" per each
-interleave set in which it is a member.  The remaining DPA space can be
-carved into an arbitrary number of BLK devices with discontiguous
-extents.
-
-BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-One of the few
-reasons to allow multiple BLK namespaces per REGION is so that each
-BLK-namespace can be configured with a BTT with unique atomic sector
-sizes.  While a PMEM device can host a BTT the LABEL specification does
-not provide for a sector size to be specified for a PMEM namespace.
-
-This is due to the expectation that the primary usage model for PMEM is
-via DAX, and the BTT is incompatible with DAX.  However, for the cases
-where an application or filesystem still needs atomic sector update
-guarantees it can register a BTT on a PMEM device or partition.  See
-LIBNVDIMM/NDCTL: Block Translation Table "btt"
-
-
-Example NVDIMM Platform
-=======================
-
-For the remainder of this document the following diagram will be
-referenced for any example sysfs layouts::
-
-
-                               (a)               (b)           DIMM   BLK-REGION
-            +-------------------+--------+--------+--------+
-  +------+  |       pm0.0       | blk2.0 | pm1.0  | blk2.1 |    0      region2
-  | imc0 +--+- - - region0- - - +--------+        +--------+
-  +--+---+  |       pm0.0       | blk3.0 | pm1.0  | blk3.1 |    1      region3
-     |      +-------------------+--------v        v--------+
-  +--+---+                               |                 |
-  | cpu0 |                                     region1
-  +--+---+                               |                 |
-     |      +----------------------------^        ^--------+
-  +--+---+  |           blk4.0           | pm1.0  | blk4.0 |    2      region4
-  | imc1 +--+----------------------------|        +--------+
-  +------+  |           blk5.0           | pm1.0  | blk5.0 |    3      region5
-            +----------------------------+--------+--------+
-
-In this platform we have four DIMMs and two memory controllers in one
-socket.  Each unique interface (BLK or PMEM) to DPA space is identified
-by a region device with a dynamically assigned id (REGION0 - REGION5).
-
-    1. The first portion of DIMM0 and DIMM1 are interleaved as REGION0. A
-       single PMEM namespace is created in the REGION0-SPA-range that spans most
-       of DIMM0 and DIMM1 with a user-specified name of "pm0.0". Some of that
-       interleaved system-physical-address range is reclaimed as BLK-aperture
-       accessed space starting at DPA-offset (a) into each DIMM.  In that
-       reclaimed space we create two BLK-aperture "namespaces" from REGION2 and
-       REGION3 where "blk2.0" and "blk3.0" are just human readable names that
-       could be set to any user-desired name in the LABEL.
-
-    2. In the last portion of DIMM0 and DIMM1 we have an interleaved
-       system-physical-address range, REGION1, that spans those two DIMMs as
-       well as DIMM2 and DIMM3.  Some of REGION1 is allocated to a PMEM namespace
-       named "pm1.0", the rest is reclaimed in 4 BLK-aperture namespaces (for
-       each DIMM in the interleave set), "blk2.1", "blk3.1", "blk4.0", and
-       "blk5.0".
-
-    3. The portion of DIMM2 and DIMM3 that do not participate in the REGION1
-       interleaved system-physical-address range (i.e. the DPA address past
-       offset (b) are also included in the "blk4.0" and "blk5.0" namespaces.
-       Note, that this example shows that BLK-aperture namespaces don't need to
-       be contiguous in DPA-space.
-
-    This bus is provided by the kernel under the device
-    /sys/devices/platform/nfit_test.0 when CONFIG_NFIT_TEST is enabled and
-    the nfit_test.ko module is loaded.  This not only test LIBNVDIMM but the
-    acpi_nfit.ko driver as well.
-
-
-LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API
-========================================================
-
-What follows is a description of the LIBNVDIMM sysfs layout and a
-corresponding object hierarchy diagram as viewed through the LIBNDCTL
-API.  The example sysfs paths and diagrams are relative to the Example
-NVDIMM Platform which is also the LIBNVDIMM bus used in the LIBNDCTL unit
-test.
-
-LIBNDCTL: Context
------------------
-
-Every API call in the LIBNDCTL library requires a context that holds the
-logging parameters and other library instance state.  The library is
-based on the libabc template:
-
-	https://git.kernel.org/cgit/linux/kernel/git/kay/libabc.git
-
-LIBNDCTL: instantiate a new library context example
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-::
-
-	struct ndctl_ctx *ctx;
-
-	if (ndctl_new(&ctx) == 0)
-		return ctx;
-	else
-		return NULL;
-
-LIBNVDIMM/LIBNDCTL: Bus
------------------------
-
-A bus has a 1:1 relationship with an NFIT.  The current expectation for
-ACPI based systems is that there is only ever one platform-global NFIT.
-That said, it is trivial to register multiple NFITs, the specification
-does not preclude it.  The infrastructure supports multiple busses and
-we use this capability to test multiple NFIT configurations in the unit
-test.
-
-LIBNVDIMM: control class device in /sys/class
----------------------------------------------
-
-This character device accepts DSM messages to be passed to DIMM
-identified by its NFIT handle::
-
-	/sys/class/nd/ndctl0
-	|-- dev
-	|-- device -> ../../../ndbus0
-	|-- subsystem -> ../../../../../../../class/nd
-
-
-
-LIBNVDIMM: bus
---------------
-
-::
-
-	struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
-	       struct nvdimm_bus_descriptor *nfit_desc);
-
-::
-
-	/sys/devices/platform/nfit_test.0/ndbus0
-	|-- commands
-	|-- nd
-	|-- nfit
-	|-- nmem0
-	|-- nmem1
-	|-- nmem2
-	|-- nmem3
-	|-- power
-	|-- provider
-	|-- region0
-	|-- region1
-	|-- region2
-	|-- region3
-	|-- region4
-	|-- region5
-	|-- uevent
-	`-- wait_probe
-
-LIBNDCTL: bus enumeration example
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Find the bus handle that describes the bus from Example NVDIMM Platform::
-
-	static struct ndctl_bus *get_bus_by_provider(struct ndctl_ctx *ctx,
-			const char *provider)
-	{
-		struct ndctl_bus *bus;
-
-		ndctl_bus_foreach(ctx, bus)
-			if (strcmp(provider, ndctl_bus_get_provider(bus)) == 0)
-				return bus;
-
-		return NULL;
-	}
-
-	bus = get_bus_by_provider(ctx, "nfit_test.0");
-
-
-LIBNVDIMM/LIBNDCTL: DIMM (NMEM)
--------------------------------
-
-The DIMM device provides a character device for sending commands to
-hardware, and it is a container for LABELs.  If the DIMM is defined by
-NFIT then an optional 'nfit' attribute sub-directory is available to add
-NFIT-specifics.
-
-Note that the kernel device name for "DIMMs" is "nmemX".  The NFIT
-describes these devices via "Memory Device to System Physical Address
-Range Mapping Structure", and there is no requirement that they actually
-be physical DIMMs, so we use a more generic name.
-
-LIBNVDIMM: DIMM (NMEM)
-^^^^^^^^^^^^^^^^^^^^^^
-
-::
-
-	struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
-			const struct attribute_group **groups, unsigned long flags,
-			unsigned long *dsm_mask);
-
-::
-
-	/sys/devices/platform/nfit_test.0/ndbus0
-	|-- nmem0
-	|   |-- available_slots
-	|   |-- commands
-	|   |-- dev
-	|   |-- devtype
-	|   |-- driver -> ../../../../../bus/nd/drivers/nvdimm
-	|   |-- modalias
-	|   |-- nfit
-	|   |   |-- device
-	|   |   |-- format
-	|   |   |-- handle
-	|   |   |-- phys_id
-	|   |   |-- rev_id
-	|   |   |-- serial
-	|   |   `-- vendor
-	|   |-- state
-	|   |-- subsystem -> ../../../../../bus/nd
-	|   `-- uevent
-	|-- nmem1
-	[..]
-
-
-LIBNDCTL: DIMM enumeration example
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Note, in this example we are assuming NFIT-defined DIMMs which are
-identified by an "nfit_handle" a 32-bit value where:
-
-   - Bit 3:0 DIMM number within the memory channel
-   - Bit 7:4 memory channel number
-   - Bit 11:8 memory controller ID
-   - Bit 15:12 socket ID (within scope of a Node controller if node
-     controller is present)
-   - Bit 27:16 Node Controller ID
-   - Bit 31:28 Reserved
-
-::
-
-	static struct ndctl_dimm *get_dimm_by_handle(struct ndctl_bus *bus,
-	       unsigned int handle)
-	{
-		struct ndctl_dimm *dimm;
-
-		ndctl_dimm_foreach(bus, dimm)
-			if (ndctl_dimm_get_handle(dimm) == handle)
-				return dimm;
-
-		return NULL;
-	}
-
-	#define DIMM_HANDLE(n, s, i, c, d) \
-		(((n & 0xfff) << 16) | ((s & 0xf) << 12) | ((i & 0xf) << 8) \
-		 | ((c & 0xf) << 4) | (d & 0xf))
-
-	dimm = get_dimm_by_handle(bus, DIMM_HANDLE(0, 0, 0, 0, 0));
-
-LIBNVDIMM/LIBNDCTL: Region
---------------------------
-
-A generic REGION device is registered for each PMEM range or BLK-aperture
-set.  Per the example there are 6 regions: 2 PMEM and 4 BLK-aperture
-sets on the "nfit_test.0" bus.  The primary role of regions are to be a
-container of "mappings".  A mapping is a tuple of <DIMM,
-DPA-start-offset, length>.
-
-LIBNVDIMM provides a built-in driver for these REGION devices.  This driver
-is responsible for reconciling the aliased DPA mappings across all
-regions, parsing the LABEL, if present, and then emitting NAMESPACE
-devices with the resolved/exclusive DPA-boundaries for the nd_pmem or
-nd_blk device driver to consume.
-
-In addition to the generic attributes of "mapping"s, "interleave_ways"
-and "size" the REGION device also exports some convenience attributes.
-"nstype" indicates the integer type of namespace-device this region
-emits, "devtype" duplicates the DEVTYPE variable stored by udev at the
-'add' event, "modalias" duplicates the MODALIAS variable stored by udev
-at the 'add' event, and finally, the optional "spa_index" is provided in
-the case where the region is defined by a SPA.
-
-LIBNVDIMM: region::
-
-	struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
-			struct nd_region_desc *ndr_desc);
-	struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
-			struct nd_region_desc *ndr_desc);
-
-::
-
-	/sys/devices/platform/nfit_test.0/ndbus0
-	|-- region0
-	|   |-- available_size
-	|   |-- btt0
-	|   |-- btt_seed
-	|   |-- devtype
-	|   |-- driver -> ../../../../../bus/nd/drivers/nd_region
-	|   |-- init_namespaces
-	|   |-- mapping0
-	|   |-- mapping1
-	|   |-- mappings
-	|   |-- modalias
-	|   |-- namespace0.0
-	|   |-- namespace_seed
-	|   |-- numa_node
-	|   |-- nfit
-	|   |   `-- spa_index
-	|   |-- nstype
-	|   |-- set_cookie
-	|   |-- size
-	|   |-- subsystem -> ../../../../../bus/nd
-	|   `-- uevent
-	|-- region1
-	[..]
-
-LIBNDCTL: region enumeration example
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Sample region retrieval routines based on NFIT-unique data like
-"spa_index" (interleave set id) for PMEM and "nfit_handle" (dimm id) for
-BLK::
-
-	static struct ndctl_region *get_pmem_region_by_spa_index(struct ndctl_bus *bus,
-			unsigned int spa_index)
-	{
-		struct ndctl_region *region;
-
-		ndctl_region_foreach(bus, region) {
-			if (ndctl_region_get_type(region) != ND_DEVICE_REGION_PMEM)
-				continue;
-			if (ndctl_region_get_spa_index(region) == spa_index)
-				return region;
-		}
-		return NULL;
-	}
-
-	static struct ndctl_region *get_blk_region_by_dimm_handle(struct ndctl_bus *bus,
-			unsigned int handle)
-	{
-		struct ndctl_region *region;
-
-		ndctl_region_foreach(bus, region) {
-			struct ndctl_mapping *map;
-
-			if (ndctl_region_get_type(region) != ND_DEVICE_REGION_BLOCK)
-				continue;
-			ndctl_mapping_foreach(region, map) {
-				struct ndctl_dimm *dimm = ndctl_mapping_get_dimm(map);
-
-				if (ndctl_dimm_get_handle(dimm) == handle)
-					return region;
-			}
-		}
-		return NULL;
-	}
-
-
-Why Not Encode the Region Type into the Region Name?
-----------------------------------------------------
-
-At first glance it seems since NFIT defines just PMEM and BLK interface
-types that we should simply name REGION devices with something derived
-from those type names.  However, the ND subsystem explicitly keeps the
-REGION name generic and expects userspace to always consider the
-region-attributes for four reasons:
-
-    1. There are already more than two REGION and "namespace" types.  For
-       PMEM there are two subtypes.  As mentioned previously we have PMEM where
-       the constituent DIMM devices are known and anonymous PMEM.  For BLK
-       regions the NFIT specification already anticipates vendor specific
-       implementations.  The exact distinction of what a region contains is in
-       the region-attributes not the region-name or the region-devtype.
-
-    2. A region with zero child-namespaces is a possible configuration.  For
-       example, the NFIT allows for a DCR to be published without a
-       corresponding BLK-aperture.  This equates to a DIMM that can only accept
-       control/configuration messages, but no i/o through a descendant block
-       device.  Again, this "type" is advertised in the attributes ('mappings'
-       == 0) and the name does not tell you much.
-
-    3. What if a third major interface type arises in the future?  Outside
-       of vendor specific implementations, it's not difficult to envision a
-       third class of interface type beyond BLK and PMEM.  With a generic name
-       for the REGION level of the device-hierarchy old userspace
-       implementations can still make sense of new kernel advertised
-       region-types.  Userspace can always rely on the generic region
-       attributes like "mappings", "size", etc and the expected child devices
-       named "namespace".  This generic format of the device-model hierarchy
-       allows the LIBNVDIMM and LIBNDCTL implementations to be more uniform and
-       future-proof.
-
-    4. There are more robust mechanisms for determining the major type of a
-       region than a device name.  See the next section, How Do I Determine the
-       Major Type of a Region?
-
-How Do I Determine the Major Type of a Region?
-----------------------------------------------
-
-Outside of the blanket recommendation of "use libndctl", or simply
-looking at the kernel header (/usr/include/linux/ndctl.h) to decode the
-"nstype" integer attribute, here are some other options.
-
-1. module alias lookup
-^^^^^^^^^^^^^^^^^^^^^^
-
-    The whole point of region/namespace device type differentiation is to
-    decide which block-device driver will attach to a given LIBNVDIMM namespace.
-    One can simply use the modalias to lookup the resulting module.  It's
-    important to note that this method is robust in the presence of a
-    vendor-specific driver down the road.  If a vendor-specific
-    implementation wants to supplant the standard nd_blk driver it can with
-    minimal impact to the rest of LIBNVDIMM.
-
-    In fact, a vendor may also want to have a vendor-specific region-driver
-    (outside of nd_region).  For example, if a vendor defined its own LABEL
-    format it would need its own region driver to parse that LABEL and emit
-    the resulting namespaces.  The output from module resolution is more
-    accurate than a region-name or region-devtype.
-
-2. udev
-^^^^^^^
-
-    The kernel "devtype" is registered in the udev database::
-
-	# udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region0
-	P: /devices/platform/nfit_test.0/ndbus0/region0
-	E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region0
-	E: DEVTYPE=nd_pmem
-	E: MODALIAS=nd:t2
-	E: SUBSYSTEM=nd
-
-	# udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region4
-	P: /devices/platform/nfit_test.0/ndbus0/region4
-	E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region4
-	E: DEVTYPE=nd_blk
-	E: MODALIAS=nd:t3
-	E: SUBSYSTEM=nd
-
-    ...and is available as a region attribute, but keep in mind that the
-    "devtype" does not indicate sub-type variations and scripts should
-    really be understanding the other attributes.
-
-3. type specific attributes
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-    As it currently stands a BLK-aperture region will never have a
-    "nfit/spa_index" attribute, but neither will a non-NFIT PMEM region.  A
-    BLK region with a "mappings" value of 0 is, as mentioned above, a DIMM
-    that does not allow I/O.  A PMEM region with a "mappings" value of zero
-    is a simple system-physical-address range.
-
-
-LIBNVDIMM/LIBNDCTL: Namespace
------------------------------
-
-A REGION, after resolving DPA aliasing and LABEL specified boundaries,
-surfaces one or more "namespace" devices.  The arrival of a "namespace"
-device currently triggers either the nd_blk or nd_pmem driver to load
-and register a disk/block device.
-
-LIBNVDIMM: namespace
-^^^^^^^^^^^^^^^^^^^^
-
-Here is a sample layout from the three major types of NAMESPACE where
-namespace0.0 represents DIMM-info-backed PMEM (note that it has a 'uuid'
-attribute), namespace2.0 represents a BLK namespace (note it has a
-'sector_size' attribute) that, and namespace6.0 represents an anonymous
-PMEM namespace (note that has no 'uuid' attribute due to not support a
-LABEL)::
-
-	/sys/devices/platform/nfit_test.0/ndbus0/region0/namespace0.0
-	|-- alt_name
-	|-- devtype
-	|-- dpa_extents
-	|-- force_raw
-	|-- modalias
-	|-- numa_node
-	|-- resource
-	|-- size
-	|-- subsystem -> ../../../../../../bus/nd
-	|-- type
-	|-- uevent
-	`-- uuid
-	/sys/devices/platform/nfit_test.0/ndbus0/region2/namespace2.0
-	|-- alt_name
-	|-- devtype
-	|-- dpa_extents
-	|-- force_raw
-	|-- modalias
-	|-- numa_node
-	|-- sector_size
-	|-- size
-	|-- subsystem -> ../../../../../../bus/nd
-	|-- type
-	|-- uevent
-	`-- uuid
-	/sys/devices/platform/nfit_test.1/ndbus1/region6/namespace6.0
-	|-- block
-	|   `-- pmem0
-	|-- devtype
-	|-- driver -> ../../../../../../bus/nd/drivers/pmem
-	|-- force_raw
-	|-- modalias
-	|-- numa_node
-	|-- resource
-	|-- size
-	|-- subsystem -> ../../../../../../bus/nd
-	|-- type
-	`-- uevent
-
-LIBNDCTL: namespace enumeration example
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Namespaces are indexed relative to their parent region, example below.
-These indexes are mostly static from boot to boot, but subsystem makes
-no guarantees in this regard.  For a static namespace identifier use its
-'uuid' attribute.
-
-::
-
-  static struct ndctl_namespace
-  *get_namespace_by_id(struct ndctl_region *region, unsigned int id)
-  {
-          struct ndctl_namespace *ndns;
-
-          ndctl_namespace_foreach(region, ndns)
-                  if (ndctl_namespace_get_id(ndns) == id)
-                          return ndns;
-
-          return NULL;
-  }
-
-LIBNDCTL: namespace creation example
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Idle namespaces are automatically created by the kernel if a given
-region has enough available capacity to create a new namespace.
-Namespace instantiation involves finding an idle namespace and
-configuring it.  For the most part the setting of namespace attributes
-can occur in any order, the only constraint is that 'uuid' must be set
-before 'size'.  This enables the kernel to track DPA allocations
-internally with a static identifier::
-
-  static int configure_namespace(struct ndctl_region *region,
-                  struct ndctl_namespace *ndns,
-                  struct namespace_parameters *parameters)
-  {
-          char devname[50];
-
-          snprintf(devname, sizeof(devname), "namespace%d.%d",
-                          ndctl_region_get_id(region), paramaters->id);
-
-          ndctl_namespace_set_alt_name(ndns, devname);
-          /* 'uuid' must be set prior to setting size! */
-          ndctl_namespace_set_uuid(ndns, paramaters->uuid);
-          ndctl_namespace_set_size(ndns, paramaters->size);
-          /* unlike pmem namespaces, blk namespaces have a sector size */
-          if (parameters->lbasize)
-                  ndctl_namespace_set_sector_size(ndns, parameters->lbasize);
-          ndctl_namespace_enable(ndns);
-  }
-
-
-Why the Term "namespace"?
-^^^^^^^^^^^^^^^^^^^^^^^^^
-
-    1. Why not "volume" for instance?  "volume" ran the risk of confusing
-       ND (libnvdimm subsystem) to a volume manager like device-mapper.
-
-    2. The term originated to describe the sub-devices that can be created
-       within a NVME controller (see the nvme specification:
-       http://www.nvmexpress.org/specifications/), and NFIT namespaces are
-       meant to parallel the capabilities and configurability of
-       NVME-namespaces.
-
-
-LIBNVDIMM/LIBNDCTL: Block Translation Table "btt"
--------------------------------------------------
-
-A BTT (design document: http://pmem.io/2014/09/23/btt.html) is a stacked
-block device driver that fronts either the whole block device or a
-partition of a block device emitted by either a PMEM or BLK NAMESPACE.
-
-LIBNVDIMM: btt layout
-^^^^^^^^^^^^^^^^^^^^^
-
-Every region will start out with at least one BTT device which is the
-seed device.  To activate it set the "namespace", "uuid", and
-"sector_size" attributes and then bind the device to the nd_pmem or
-nd_blk driver depending on the region type::
-
-	/sys/devices/platform/nfit_test.1/ndbus0/region0/btt0/
-	|-- namespace
-	|-- delete
-	|-- devtype
-	|-- modalias
-	|-- numa_node
-	|-- sector_size
-	|-- subsystem -> ../../../../../bus/nd
-	|-- uevent
-	`-- uuid
-
-LIBNDCTL: btt creation example
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Similar to namespaces an idle BTT device is automatically created per
-region.  Each time this "seed" btt device is configured and enabled a new
-seed is created.  Creating a BTT configuration involves two steps of
-finding and idle BTT and assigning it to consume a PMEM or BLK namespace::
-
-	static struct ndctl_btt *get_idle_btt(struct ndctl_region *region)
-	{
-		struct ndctl_btt *btt;
-
-		ndctl_btt_foreach(region, btt)
-			if (!ndctl_btt_is_enabled(btt)
-					&& !ndctl_btt_is_configured(btt))
-				return btt;
-
-		return NULL;
-	}
-
-	static int configure_btt(struct ndctl_region *region,
-			struct btt_parameters *parameters)
-	{
-		btt = get_idle_btt(region);
-
-		ndctl_btt_set_uuid(btt, parameters->uuid);
-		ndctl_btt_set_sector_size(btt, parameters->sector_size);
-		ndctl_btt_set_namespace(btt, parameters->ndns);
-		/* turn off raw mode device */
-		ndctl_namespace_disable(parameters->ndns);
-		/* turn on btt access */
-		ndctl_btt_enable(btt);
-	}
-
-Once instantiated a new inactive btt seed device will appear underneath
-the region.
-
-Once a "namespace" is removed from a BTT that instance of the BTT device
-will be deleted or otherwise reset to default values.  This deletion is
-only at the device model level.  In order to destroy a BTT the "info
-block" needs to be destroyed.  Note, that to destroy a BTT the media
-needs to be written in raw mode.  By default, the kernel will autodetect
-the presence of a BTT and disable raw mode.  This autodetect behavior
-can be suppressed by enabling raw mode for the namespace via the
-ndctl_namespace_set_raw_mode() API.
-
-
-Summary LIBNDCTL Diagram
-------------------------
-
-For the given example above, here is the view of the objects as seen by the
-LIBNDCTL API::
-
-              +---+
-              |CTX|    +---------+   +--------------+  +---------------+
-              +-+-+  +-> REGION0 +---> NAMESPACE0.0 +--> PMEM8 "pm0.0" |
-                |    | +---------+   +--------------+  +---------------+
-  +-------+     |    | +---------+   +--------------+  +---------------+
-  | DIMM0 <-+   |    +-> REGION1 +---> NAMESPACE1.0 +--> PMEM6 "pm1.0" |
-  +-------+ |   |    | +---------+   +--------------+  +---------------+
-  | DIMM1 <-+ +-v--+ | +---------+   +--------------+  +---------------+
-  +-------+ +-+BUS0+---> REGION2 +-+-> NAMESPACE2.0 +--> ND6  "blk2.0" |
-  | DIMM2 <-+ +----+ | +---------+ | +--------------+  +----------------------+
-  +-------+ |        |             +-> NAMESPACE2.1 +--> ND5  "blk2.1" | BTT2 |
-  | DIMM3 <-+        |               +--------------+  +----------------------+
-  +-------+          | +---------+   +--------------+  +---------------+
-                     +-> REGION3 +-+-> NAMESPACE3.0 +--> ND4  "blk3.0" |
-                     | +---------+ | +--------------+  +----------------------+
-                     |             +-> NAMESPACE3.1 +--> ND3  "blk3.1" | BTT1 |
-                     |               +--------------+  +----------------------+
-                     | +---------+   +--------------+  +---------------+
-                     +-> REGION4 +---> NAMESPACE4.0 +--> ND2  "blk4.0" |
-                     | +---------+   +--------------+  +---------------+
-                     | +---------+   +--------------+  +----------------------+
-                     +-> REGION5 +---> NAMESPACE5.0 +--> ND1  "blk5.0" | BTT0 |
-                       +---------+   +--------------+  +---------------+------+
diff --git a/Documentation/nvdimm/security.rst b/Documentation/nvdimm/security.rst
deleted file mode 100644
index ad9dea099b34..000000000000
--- a/Documentation/nvdimm/security.rst
+++ /dev/null
@@ -1,143 +0,0 @@
-===============
-NVDIMM Security
-===============
-
-1. Introduction
----------------
-
-With the introduction of Intel Device Specific Methods (DSM) v1.8
-specification [1], security DSMs are introduced. The spec added the following
-security DSMs: "get security state", "set passphrase", "disable passphrase",
-"unlock unit", "freeze lock", "secure erase", and "overwrite". A security_ops
-data structure has been added to struct dimm in order to support the security
-operations and generic APIs are exposed to allow vendor neutral operations.
-
-2. Sysfs Interface
-------------------
-The "security" sysfs attribute is provided in the nvdimm sysfs directory. For
-example:
-/sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/security
-
-The "show" attribute of that attribute will display the security state for
-that DIMM. The following states are available: disabled, unlocked, locked,
-frozen, and overwrite. If security is not supported, the sysfs attribute
-will not be visible.
-
-The "store" attribute takes several commands when it is being written to
-in order to support some of the security functionalities:
-update <old_keyid> <new_keyid> - enable or update passphrase.
-disable <keyid> - disable enabled security and remove key.
-freeze - freeze changing of security states.
-erase <keyid> - delete existing user encryption key.
-overwrite <keyid> - wipe the entire nvdimm.
-master_update <keyid> <new_keyid> - enable or update master passphrase.
-master_erase <keyid> - delete existing user encryption key.
-
-3. Key Management
------------------
-
-The key is associated to the payload by the DIMM id. For example:
-# cat /sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/nfit/id
-8089-a2-1740-00000133
-The DIMM id would be provided along with the key payload (passphrase) to
-the kernel.
-
-The security keys are managed on the basis of a single key per DIMM. The
-key "passphrase" is expected to be 32bytes long. This is similar to the ATA
-security specification [2]. A key is initially acquired via the request_key()
-kernel API call during nvdimm unlock. It is up to the user to make sure that
-all the keys are in the kernel user keyring for unlock.
-
-A nvdimm encrypted-key of format enc32 has the description format of:
-nvdimm:<bus-provider-specific-unique-id>
-
-See file ``Documentation/security/keys/trusted-encrypted.rst`` for creating
-encrypted-keys of enc32 format. TPM usage with a master trusted key is
-preferred for sealing the encrypted-keys.
-
-4. Unlocking
-------------
-When the DIMMs are being enumerated by the kernel, the kernel will attempt to
-retrieve the key from the kernel user keyring. This is the only time
-a locked DIMM can be unlocked. Once unlocked, the DIMM will remain unlocked
-until reboot. Typically an entity (i.e. shell script) will inject all the
-relevant encrypted-keys into the kernel user keyring during the initramfs phase.
-This provides the unlock function access to all the related keys that contain
-the passphrase for the respective nvdimms.  It is also recommended that the
-keys are injected before libnvdimm is loaded by modprobe.
-
-5. Update
----------
-When doing an update, it is expected that the existing key is removed from
-the kernel user keyring and reinjected as different (old) key. It's irrelevant
-what the key description is for the old key since we are only interested in the
-keyid when doing the update operation. It is also expected that the new key
-is injected with the description format described from earlier in this
-document.  The update command written to the sysfs attribute will be with
-the format:
-update <old keyid> <new keyid>
-
-If there is no old keyid due to a security enabling, then a 0 should be
-passed in.
-
-6. Freeze
----------
-The freeze operation does not require any keys. The security config can be
-frozen by a user with root privelege.
-
-7. Disable
-----------
-The security disable command format is:
-disable <keyid>
-
-An key with the current passphrase payload that is tied to the nvdimm should be
-in the kernel user keyring.
-
-8. Secure Erase
----------------
-The command format for doing a secure erase is:
-erase <keyid>
-
-An key with the current passphrase payload that is tied to the nvdimm should be
-in the kernel user keyring.
-
-9. Overwrite
-------------
-The command format for doing an overwrite is:
-overwrite <keyid>
-
-Overwrite can be done without a key if security is not enabled. A key serial
-of 0 can be passed in to indicate no key.
-
-The sysfs attribute "security" can be polled to wait on overwrite completion.
-Overwrite can last tens of minutes or more depending on nvdimm size.
-
-An encrypted-key with the current user passphrase that is tied to the nvdimm
-should be injected and its keyid should be passed in via sysfs.
-
-10. Master Update
------------------
-The command format for doing a master update is:
-update <old keyid> <new keyid>
-
-The operating mechanism for master update is identical to update except the
-master passphrase key is passed to the kernel. The master passphrase key
-is just another encrypted-key.
-
-This command is only available when security is disabled.
-
-11. Master Erase
-----------------
-The command format for doing a master erase is:
-master_erase <current keyid>
-
-This command has the same operating mechanism as erase except the master
-passphrase key is passed to the kernel. The master passphrase key is just
-another encrypted-key.
-
-This command is only available when the master security is enabled, indicated
-by the extended security status.
-
-[1]: http://pmem.io/documents/NVDIMM_DSM_Interface-V1.8.pdf
-
-[2]: http://www.t13.org/documents/UploadedDocuments/docs2006/e05179r4-ACS-SecurityClarifications.pdf
-- 
cgit 


From bf6b7a742e3f82b3132e149fb17761e84207f9f1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 16:33:50 -0300
Subject: docs: namespace: move it to the admin-guide

As stated at the documentation, this is meant to be for
users to better understand namespaces.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/admin-guide/index.rst                |  1 +
 .../admin-guide/namespaces/compatibility-list.rst  | 43 ++++++++++++++++++++++
 Documentation/admin-guide/namespaces/index.rst     |  9 +++++
 .../admin-guide/namespaces/resource-control.rst    | 18 +++++++++
 Documentation/namespaces/compatibility-list.rst    | 43 ----------------------
 Documentation/namespaces/index.rst                 | 11 ------
 Documentation/namespaces/resource-control.rst      | 18 ---------
 7 files changed, 71 insertions(+), 72 deletions(-)
 create mode 100644 Documentation/admin-guide/namespaces/compatibility-list.rst
 create mode 100644 Documentation/admin-guide/namespaces/index.rst
 create mode 100644 Documentation/admin-guide/namespaces/resource-control.rst
 delete mode 100644 Documentation/namespaces/compatibility-list.rst
 delete mode 100644 Documentation/namespaces/index.rst
 delete mode 100644 Documentation/namespaces/resource-control.rst

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index f40c4b5a181b..abc2c4e83939 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -77,6 +77,7 @@ configure specific aspects of kernel behavior to your liking.
    thunderbolt
    LSM/index
    mm/index
+   namespaces/index
    perf-security
    acpi/index
 
diff --git a/Documentation/admin-guide/namespaces/compatibility-list.rst b/Documentation/admin-guide/namespaces/compatibility-list.rst
new file mode 100644
index 000000000000..318800b2a943
--- /dev/null
+++ b/Documentation/admin-guide/namespaces/compatibility-list.rst
@@ -0,0 +1,43 @@
+=============================
+Namespaces compatibility list
+=============================
+
+This document contains the information about the problems user
+may have when creating tasks living in different namespaces.
+
+Here's the summary. This matrix shows the known problems, that
+occur when tasks share some namespace (the columns) while living
+in different other namespaces (the rows):
+
+====	===	===	===	===	====	===
+-	UTS	IPC	VFS	PID	User	Net
+====	===	===	===	===	====	===
+UTS	 X
+IPC		 X	 1
+VFS			 X
+PID		 1	 1	 X
+User		 2	 2		 X
+Net						 X
+====	===	===	===	===	====	===
+
+1. Both the IPC and the PID namespaces provide IDs to address
+   object inside the kernel. E.g. semaphore with IPCID or
+   process group with pid.
+
+   In both cases, tasks shouldn't try exposing this ID to some
+   other task living in a different namespace via a shared filesystem
+   or IPC shmem/message. The fact is that this ID is only valid
+   within the namespace it was obtained in and may refer to some
+   other object in another namespace.
+
+2. Intentionally, two equal user IDs in different user namespaces
+   should not be equal from the VFS point of view. In other
+   words, user 10 in one user namespace shouldn't have the same
+   access permissions to files, belonging to user 10 in another
+   namespace.
+
+   The same is true for the IPC namespaces being shared - two users
+   from different user namespaces should not access the same IPC objects
+   even having equal UIDs.
+
+   But currently this is not so.
diff --git a/Documentation/admin-guide/namespaces/index.rst b/Documentation/admin-guide/namespaces/index.rst
new file mode 100644
index 000000000000..713ec4949fa7
--- /dev/null
+++ b/Documentation/admin-guide/namespaces/index.rst
@@ -0,0 +1,9 @@
+==========
+Namespaces
+==========
+
+.. toctree::
+   :maxdepth: 1
+
+   compatibility-list
+   resource-control
diff --git a/Documentation/admin-guide/namespaces/resource-control.rst b/Documentation/admin-guide/namespaces/resource-control.rst
new file mode 100644
index 000000000000..369556e00f0c
--- /dev/null
+++ b/Documentation/admin-guide/namespaces/resource-control.rst
@@ -0,0 +1,18 @@
+===========================
+Namespaces research control
+===========================
+
+There are a lot of kinds of objects in the kernel that don't have
+individual limits or that have limits that are ineffective when a set
+of processes is allowed to switch user ids.  With user namespaces
+enabled in a kernel for people who don't trust their users or their
+users programs to play nice this problems becomes more acute.
+
+Therefore it is recommended that memory control groups be enabled in
+kernels that enable user namespaces, and it is further recommended
+that userspace configure memory control groups to limit how much
+memory user's they don't trust to play nice can use.
+
+Memory control groups can be configured by installing the libcgroup
+package present on most distros editing /etc/cgrules.conf,
+/etc/cgconfig.conf and setting up libpam-cgroup.
diff --git a/Documentation/namespaces/compatibility-list.rst b/Documentation/namespaces/compatibility-list.rst
deleted file mode 100644
index 318800b2a943..000000000000
--- a/Documentation/namespaces/compatibility-list.rst
+++ /dev/null
@@ -1,43 +0,0 @@
-=============================
-Namespaces compatibility list
-=============================
-
-This document contains the information about the problems user
-may have when creating tasks living in different namespaces.
-
-Here's the summary. This matrix shows the known problems, that
-occur when tasks share some namespace (the columns) while living
-in different other namespaces (the rows):
-
-====	===	===	===	===	====	===
--	UTS	IPC	VFS	PID	User	Net
-====	===	===	===	===	====	===
-UTS	 X
-IPC		 X	 1
-VFS			 X
-PID		 1	 1	 X
-User		 2	 2		 X
-Net						 X
-====	===	===	===	===	====	===
-
-1. Both the IPC and the PID namespaces provide IDs to address
-   object inside the kernel. E.g. semaphore with IPCID or
-   process group with pid.
-
-   In both cases, tasks shouldn't try exposing this ID to some
-   other task living in a different namespace via a shared filesystem
-   or IPC shmem/message. The fact is that this ID is only valid
-   within the namespace it was obtained in and may refer to some
-   other object in another namespace.
-
-2. Intentionally, two equal user IDs in different user namespaces
-   should not be equal from the VFS point of view. In other
-   words, user 10 in one user namespace shouldn't have the same
-   access permissions to files, belonging to user 10 in another
-   namespace.
-
-   The same is true for the IPC namespaces being shared - two users
-   from different user namespaces should not access the same IPC objects
-   even having equal UIDs.
-
-   But currently this is not so.
diff --git a/Documentation/namespaces/index.rst b/Documentation/namespaces/index.rst
deleted file mode 100644
index bf40625dd11a..000000000000
--- a/Documentation/namespaces/index.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-:orphan:
-
-==========
-Namespaces
-==========
-
-.. toctree::
-   :maxdepth: 1
-
-   compatibility-list
-   resource-control
diff --git a/Documentation/namespaces/resource-control.rst b/Documentation/namespaces/resource-control.rst
deleted file mode 100644
index 369556e00f0c..000000000000
--- a/Documentation/namespaces/resource-control.rst
+++ /dev/null
@@ -1,18 +0,0 @@
-===========================
-Namespaces research control
-===========================
-
-There are a lot of kinds of objects in the kernel that don't have
-individual limits or that have limits that are ineffective when a set
-of processes is allowed to switch user ids.  With user namespaces
-enabled in a kernel for people who don't trust their users or their
-users programs to play nice this problems becomes more acute.
-
-Therefore it is recommended that memory control groups be enabled in
-kernels that enable user namespaces, and it is further recommended
-that userspace configure memory control groups to limit how much
-memory user's they don't trust to play nice can use.
-
-Memory control groups can be configured by installing the libcgroup
-package present on most distros editing /etc/cgrules.conf,
-/etc/cgconfig.conf and setting up libpam-cgroup.
-- 
cgit 


From 43f6c0787c1781b951d686e8302377fcf85ccb8a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 16:40:16 -0300
Subject: docs: mtd: move it to the driver-api book

While I was tempted to move it to admin-guide, as some docs
there are more userspace-faced, there are some very technical
discussions about memory error correction code from the Kernel
implementer's PoV. So, let's place it inside the driver-api
book.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/driver-api/index.rst         |   1 +
 Documentation/driver-api/mtd/index.rst     |  10 +
 Documentation/driver-api/mtd/intel-spi.rst |  90 ++++
 Documentation/driver-api/mtd/nand_ecc.rst  | 763 +++++++++++++++++++++++++++++
 Documentation/driver-api/mtd/spi-nor.rst   |  66 +++
 Documentation/mtd/index.rst                |  12 -
 Documentation/mtd/intel-spi.rst            |  90 ----
 Documentation/mtd/nand_ecc.rst             | 763 -----------------------------
 Documentation/mtd/spi-nor.rst              |  66 ---
 9 files changed, 930 insertions(+), 931 deletions(-)
 create mode 100644 Documentation/driver-api/mtd/index.rst
 create mode 100644 Documentation/driver-api/mtd/intel-spi.rst
 create mode 100644 Documentation/driver-api/mtd/nand_ecc.rst
 create mode 100644 Documentation/driver-api/mtd/spi-nor.rst
 delete mode 100644 Documentation/mtd/index.rst
 delete mode 100644 Documentation/mtd/intel-spi.rst
 delete mode 100644 Documentation/mtd/nand_ecc.rst
 delete mode 100644 Documentation/mtd/spi-nor.rst

(limited to 'Documentation')

diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 410dd7110772..7ecc65093493 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -44,6 +44,7 @@ available subsections can be seen below.
    mtdnand
    miscellaneous
    mei/index
+   mtd/index
    nvdimm/index
    w1
    rapidio/index
diff --git a/Documentation/driver-api/mtd/index.rst b/Documentation/driver-api/mtd/index.rst
new file mode 100644
index 000000000000..2e0e7cc4055e
--- /dev/null
+++ b/Documentation/driver-api/mtd/index.rst
@@ -0,0 +1,10 @@
+==============================
+Memory Technology Device (MTD)
+==============================
+
+.. toctree::
+   :maxdepth: 1
+
+   intel-spi
+   nand_ecc
+   spi-nor
diff --git a/Documentation/driver-api/mtd/intel-spi.rst b/Documentation/driver-api/mtd/intel-spi.rst
new file mode 100644
index 000000000000..0e6d9cd5388d
--- /dev/null
+++ b/Documentation/driver-api/mtd/intel-spi.rst
@@ -0,0 +1,90 @@
+==============================
+Upgrading BIOS using intel-spi
+==============================
+
+Many Intel CPUs like Baytrail and Braswell include SPI serial flash host
+controller which is used to hold BIOS and other platform specific data.
+Since contents of the SPI serial flash is crucial for machine to function,
+it is typically protected by different hardware protection mechanisms to
+avoid accidental (or on purpose) overwrite of the content.
+
+Not all manufacturers protect the SPI serial flash, mainly because it
+allows upgrading the BIOS image directly from an OS.
+
+The intel-spi driver makes it possible to read and write the SPI serial
+flash, if certain protection bits are not set and locked. If it finds
+any of them set, the whole MTD device is made read-only to prevent
+partial overwrites. By default the driver exposes SPI serial flash
+contents as read-only but it can be changed from kernel command line,
+passing "intel-spi.writeable=1".
+
+Please keep in mind that overwriting the BIOS image on SPI serial flash
+might render the machine unbootable and requires special equipment like
+Dediprog to revive. You have been warned!
+
+Below are the steps how to upgrade MinnowBoard MAX BIOS directly from
+Linux.
+
+ 1) Download and extract the latest Minnowboard MAX BIOS SPI image
+    [1]. At the time writing this the latest image is v92.
+
+ 2) Install mtd-utils package [2]. We need this in order to erase the SPI
+    serial flash. Distros like Debian and Fedora have this prepackaged with
+    name "mtd-utils".
+
+ 3) Add "intel-spi.writeable=1" to the kernel command line and reboot
+    the board (you can also reload the driver passing "writeable=1" as
+    module parameter to modprobe).
+
+ 4) Once the board is up and running again, find the right MTD partition
+    (it is named as "BIOS")::
+
+	# cat /proc/mtd
+	dev:    size   erasesize  name
+	mtd0: 00800000 00001000 "BIOS"
+
+    So here it will be /dev/mtd0 but it may vary.
+
+ 5) Make backup of the existing image first::
+
+	# dd if=/dev/mtd0ro of=bios.bak
+	16384+0 records in
+	16384+0 records out
+	8388608 bytes (8.4 MB) copied, 10.0269 s, 837 kB/s
+
+ 6) Verify the backup:
+
+	# sha1sum /dev/mtd0ro bios.bak
+	fdbb011920572ca6c991377c4b418a0502668b73  /dev/mtd0ro
+	fdbb011920572ca6c991377c4b418a0502668b73  bios.bak
+
+    The SHA1 sums must match. Otherwise do not continue any further!
+
+ 7) Erase the SPI serial flash. After this step, do not reboot the
+    board! Otherwise it will not start anymore::
+
+	# flash_erase /dev/mtd0 0 0
+	Erasing 4 Kibyte @ 7ff000 -- 100 % complete
+
+ 8) Once completed without errors you can write the new BIOS image:
+
+    # dd if=MNW2MAX1.X64.0092.R01.1605221712.bin of=/dev/mtd0
+
+ 9) Verify that the new content of the SPI serial flash matches the new
+    BIOS image::
+
+	# sha1sum /dev/mtd0ro MNW2MAX1.X64.0092.R01.1605221712.bin
+	9b4df9e4be2057fceec3a5529ec3d950836c87a2  /dev/mtd0ro
+	9b4df9e4be2057fceec3a5529ec3d950836c87a2 MNW2MAX1.X64.0092.R01.1605221712.bin
+
+    The SHA1 sums should match.
+
+ 10) Now you can reboot your board and observe the new BIOS starting up
+     properly.
+
+References
+----------
+
+[1] https://firmware.intel.com/sites/default/files/MinnowBoard%2EMAX_%2EX64%2E92%2ER01%2Ezip
+
+[2] http://www.linux-mtd.infradead.org/
diff --git a/Documentation/driver-api/mtd/nand_ecc.rst b/Documentation/driver-api/mtd/nand_ecc.rst
new file mode 100644
index 000000000000..e8d3c53a5056
--- /dev/null
+++ b/Documentation/driver-api/mtd/nand_ecc.rst
@@ -0,0 +1,763 @@
+==========================
+NAND Error-correction Code
+==========================
+
+Introduction
+============
+
+Having looked at the linux mtd/nand driver and more specific at nand_ecc.c
+I felt there was room for optimisation. I bashed the code for a few hours
+performing tricks like table lookup removing superfluous code etc.
+After that the speed was increased by 35-40%.
+Still I was not too happy as I felt there was additional room for improvement.
+
+Bad! I was hooked.
+I decided to annotate my steps in this file. Perhaps it is useful to someone
+or someone learns something from it.
+
+
+The problem
+===========
+
+NAND flash (at least SLC one) typically has sectors of 256 bytes.
+However NAND flash is not extremely reliable so some error detection
+(and sometimes correction) is needed.
+
+This is done by means of a Hamming code. I'll try to explain it in
+laymans terms (and apologies to all the pro's in the field in case I do
+not use the right terminology, my coding theory class was almost 30
+years ago, and I must admit it was not one of my favourites).
+
+As I said before the ecc calculation is performed on sectors of 256
+bytes. This is done by calculating several parity bits over the rows and
+columns. The parity used is even parity which means that the parity bit = 1
+if the data over which the parity is calculated is 1 and the parity bit = 0
+if the data over which the parity is calculated is 0. So the total
+number of bits over the data over which the parity is calculated + the
+parity bit is even. (see wikipedia if you can't follow this).
+Parity is often calculated by means of an exclusive or operation,
+sometimes also referred to as xor. In C the operator for xor is ^
+
+Back to ecc.
+Let's give a small figure:
+
+=========  ==== ==== ==== ==== ==== ==== ==== ====   === === === === ====
+byte   0:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp4 ... rp14
+byte   1:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp2 rp4 ... rp14
+byte   2:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp4 ... rp14
+byte   3:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp4 ... rp14
+byte   4:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp5 ... rp14
+...
+byte 254:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp5 ... rp15
+byte 255:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp5 ... rp15
+           cp1  cp0  cp1  cp0  cp1  cp0  cp1  cp0
+           cp3  cp3  cp2  cp2  cp3  cp3  cp2  cp2
+           cp5  cp5  cp5  cp5  cp4  cp4  cp4  cp4
+=========  ==== ==== ==== ==== ==== ==== ==== ====   === === === === ====
+
+This figure represents a sector of 256 bytes.
+cp is my abbreviation for column parity, rp for row parity.
+
+Let's start to explain column parity.
+
+- cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
+
+  so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even.
+
+Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7.
+
+- cp2 is the parity over bit0, bit1, bit4 and bit5
+- cp3 is the parity over bit2, bit3, bit6 and bit7.
+- cp4 is the parity over bit0, bit1, bit2 and bit3.
+- cp5 is the parity over bit4, bit5, bit6 and bit7.
+
+Note that each of cp0 .. cp5 is exactly one bit.
+
+Row parity actually works almost the same.
+
+- rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254)
+- rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255)
+- rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ...
+  (so handle two bytes, then skip 2 bytes).
+- rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...)
+- for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc.
+
+  so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...)
+- and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, ..
+
+The story now becomes quite boring. I guess you get the idea.
+
+- rp6 covers 8 bytes then skips 8 etc
+- rp7 skips 8 bytes then covers 8 etc
+- rp8 covers 16 bytes then skips 16 etc
+- rp9 skips 16 bytes then covers 16 etc
+- rp10 covers 32 bytes then skips 32 etc
+- rp11 skips 32 bytes then covers 32 etc
+- rp12 covers 64 bytes then skips 64 etc
+- rp13 skips 64 bytes then covers 64 etc
+- rp14 covers 128 bytes then skips 128
+- rp15 skips 128 bytes then covers 128
+
+In the end the parity bits are grouped together in three bytes as
+follows:
+
+=====  ===== ===== ===== ===== ===== ===== ===== =====
+ECC    Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0
+=====  ===== ===== ===== ===== ===== ===== ===== =====
+ECC 0   rp07  rp06  rp05  rp04  rp03  rp02  rp01  rp00
+ECC 1   rp15  rp14  rp13  rp12  rp11  rp10  rp09  rp08
+ECC 2   cp5   cp4   cp3   cp2   cp1   cp0      1     1
+=====  ===== ===== ===== ===== ===== ===== ===== =====
+
+I detected after writing this that ST application note AN1823
+(http://www.st.com/stonline/) gives a much
+nicer picture.(but they use line parity as term where I use row parity)
+Oh well, I'm graphically challenged, so suffer with me for a moment :-)
+
+And I could not reuse the ST picture anyway for copyright reasons.
+
+
+Attempt 0
+=========
+
+Implementing the parity calculation is pretty simple.
+In C pseudocode::
+
+  for (i = 0; i < 256; i++)
+  {
+    if (i & 0x01)
+       rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+    else
+       rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp0;
+    if (i & 0x02)
+       rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
+    else
+       rp2 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp2;
+    if (i & 0x04)
+      rp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp5;
+    else
+      rp4 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp4;
+    if (i & 0x08)
+      rp7 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp7;
+    else
+      rp6 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp6;
+    if (i & 0x10)
+      rp9 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp9;
+    else
+      rp8 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp8;
+    if (i & 0x20)
+      rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
+    else
+      rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
+    if (i & 0x40)
+      rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
+    else
+      rp12 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp12;
+    if (i & 0x80)
+      rp15 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp15;
+    else
+      rp14 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp14;
+    cp0 = bit6 ^ bit4 ^ bit2 ^ bit0 ^ cp0;
+    cp1 = bit7 ^ bit5 ^ bit3 ^ bit1 ^ cp1;
+    cp2 = bit5 ^ bit4 ^ bit1 ^ bit0 ^ cp2;
+    cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3
+    cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4
+    cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5
+  }
+
+
+Analysis 0
+==========
+
+C does have bitwise operators but not really operators to do the above
+efficiently (and most hardware has no such instructions either).
+Therefore without implementing this it was clear that the code above was
+not going to bring me a Nobel prize :-)
+
+Fortunately the exclusive or operation is commutative, so we can combine
+the values in any order. So instead of calculating all the bits
+individually, let us try to rearrange things.
+For the column parity this is easy. We can just xor the bytes and in the
+end filter out the relevant bits. This is pretty nice as it will bring
+all cp calculation out of the for loop.
+
+Similarly we can first xor the bytes for the various rows.
+This leads to:
+
+
+Attempt 1
+=========
+
+::
+
+  const char parity[256] = {
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
+  };
+
+  void ecc1(const unsigned char *buf, unsigned char *code)
+  {
+      int i;
+      const unsigned char *bp = buf;
+      unsigned char cur;
+      unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+      unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+      unsigned char par;
+
+      par = 0;
+      rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+      rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+      rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+      rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+      for (i = 0; i < 256; i++)
+      {
+          cur = *bp++;
+          par ^= cur;
+          if (i & 0x01) rp1 ^= cur; else rp0 ^= cur;
+          if (i & 0x02) rp3 ^= cur; else rp2 ^= cur;
+          if (i & 0x04) rp5 ^= cur; else rp4 ^= cur;
+          if (i & 0x08) rp7 ^= cur; else rp6 ^= cur;
+          if (i & 0x10) rp9 ^= cur; else rp8 ^= cur;
+          if (i & 0x20) rp11 ^= cur; else rp10 ^= cur;
+          if (i & 0x40) rp13 ^= cur; else rp12 ^= cur;
+          if (i & 0x80) rp15 ^= cur; else rp14 ^= cur;
+      }
+      code[0] =
+          (parity[rp7] << 7) |
+          (parity[rp6] << 6) |
+          (parity[rp5] << 5) |
+          (parity[rp4] << 4) |
+          (parity[rp3] << 3) |
+          (parity[rp2] << 2) |
+          (parity[rp1] << 1) |
+          (parity[rp0]);
+      code[1] =
+          (parity[rp15] << 7) |
+          (parity[rp14] << 6) |
+          (parity[rp13] << 5) |
+          (parity[rp12] << 4) |
+          (parity[rp11] << 3) |
+          (parity[rp10] << 2) |
+          (parity[rp9]  << 1) |
+          (parity[rp8]);
+      code[2] =
+          (parity[par & 0xf0] << 7) |
+          (parity[par & 0x0f] << 6) |
+          (parity[par & 0xcc] << 5) |
+          (parity[par & 0x33] << 4) |
+          (parity[par & 0xaa] << 3) |
+          (parity[par & 0x55] << 2);
+      code[0] = ~code[0];
+      code[1] = ~code[1];
+      code[2] = ~code[2];
+  }
+
+Still pretty straightforward. The last three invert statements are there to
+give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash
+all data is 0xff, so the checksum then matches.
+
+I also introduced the parity lookup. I expected this to be the fastest
+way to calculate the parity, but I will investigate alternatives later
+on.
+
+
+Analysis 1
+==========
+
+The code works, but is not terribly efficient. On my system it took
+almost 4 times as much time as the linux driver code. But hey, if it was
+*that* easy this would have been done long before.
+No pain. no gain.
+
+Fortunately there is plenty of room for improvement.
+
+In step 1 we moved from bit-wise calculation to byte-wise calculation.
+However in C we can also use the unsigned long data type and virtually
+every modern microprocessor supports 32 bit operations, so why not try
+to write our code in such a way that we process data in 32 bit chunks.
+
+Of course this means some modification as the row parity is byte by
+byte. A quick analysis:
+for the column parity we use the par variable. When extending to 32 bits
+we can in the end easily calculate rp0 and rp1 from it.
+(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
+respectively, from MSB to LSB)
+also rp2 and rp3 can be easily retrieved from par as rp3 covers the
+first two MSBs and rp2 covers the last two LSBs.
+
+Note that of course now the loop is executed only 64 times (256/4).
+And note that care must taken wrt byte ordering. The way bytes are
+ordered in a long is machine dependent, and might affect us.
+Anyway, if there is an issue: this code is developed on x86 (to be
+precise: a DELL PC with a D920 Intel CPU)
+
+And of course the performance might depend on alignment, but I expect
+that the I/O buffers in the nand driver are aligned properly (and
+otherwise that should be fixed to get maximum performance).
+
+Let's give it a try...
+
+
+Attempt 2
+=========
+
+::
+
+  extern const char parity[256];
+
+  void ecc2(const unsigned char *buf, unsigned char *code)
+  {
+      int i;
+      const unsigned long *bp = (unsigned long *)buf;
+      unsigned long cur;
+      unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+      unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+      unsigned long par;
+
+      par = 0;
+      rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+      rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+      rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+      rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+      for (i = 0; i < 64; i++)
+      {
+          cur = *bp++;
+          par ^= cur;
+          if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+          if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+          if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+          if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+          if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+          if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+      }
+      /*
+         we need to adapt the code generation for the fact that rp vars are now
+         long; also the column parity calculation needs to be changed.
+         we'll bring rp4 to 15 back to single byte entities by shifting and
+         xoring
+      */
+      rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff;
+      rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff;
+      rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff;
+      rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff;
+      rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff;
+      rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff;
+      rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff;
+      rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff;
+      rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff;
+      rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff;
+      rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff;
+      rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff;
+      rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff;
+      rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff;
+      par ^= (par >> 16);
+      rp1 = (par >> 8); rp1 &= 0xff;
+      rp0 = (par & 0xff);
+      par ^= (par >> 8); par &= 0xff;
+
+      code[0] =
+          (parity[rp7] << 7) |
+          (parity[rp6] << 6) |
+          (parity[rp5] << 5) |
+          (parity[rp4] << 4) |
+          (parity[rp3] << 3) |
+          (parity[rp2] << 2) |
+          (parity[rp1] << 1) |
+          (parity[rp0]);
+      code[1] =
+          (parity[rp15] << 7) |
+          (parity[rp14] << 6) |
+          (parity[rp13] << 5) |
+          (parity[rp12] << 4) |
+          (parity[rp11] << 3) |
+          (parity[rp10] << 2) |
+          (parity[rp9]  << 1) |
+          (parity[rp8]);
+      code[2] =
+          (parity[par & 0xf0] << 7) |
+          (parity[par & 0x0f] << 6) |
+          (parity[par & 0xcc] << 5) |
+          (parity[par & 0x33] << 4) |
+          (parity[par & 0xaa] << 3) |
+          (parity[par & 0x55] << 2);
+      code[0] = ~code[0];
+      code[1] = ~code[1];
+      code[2] = ~code[2];
+  }
+
+The parity array is not shown any more. Note also that for these
+examples I kinda deviated from my regular programming style by allowing
+multiple statements on a line, not using { } in then and else blocks
+with only a single statement and by using operators like ^=
+
+
+Analysis 2
+==========
+
+The code (of course) works, and hurray: we are a little bit faster than
+the linux driver code (about 15%). But wait, don't cheer too quickly.
+There is more to be gained.
+If we look at e.g. rp14 and rp15 we see that we either xor our data with
+rp14 or with rp15. However we also have par which goes over all data.
+This means there is no need to calculate rp14 as it can be calculated from
+rp15 through rp14 = par ^ rp15, because par = rp14 ^ rp15;
+(or if desired we can avoid calculating rp15 and calculate it from
+rp14).  That is why some places refer to inverse parity.
+Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
+Effectively this means we can eliminate the else clause from the if
+statements. Also we can optimise the calculation in the end a little bit
+by going from long to byte first. Actually we can even avoid the table
+lookups
+
+Attempt 3
+=========
+
+Odd replaced::
+
+          if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+          if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+          if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+          if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+          if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+          if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+
+with::
+
+          if (i & 0x01) rp5 ^= cur;
+          if (i & 0x02) rp7 ^= cur;
+          if (i & 0x04) rp9 ^= cur;
+          if (i & 0x08) rp11 ^= cur;
+          if (i & 0x10) rp13 ^= cur;
+          if (i & 0x20) rp15 ^= cur;
+
+and outside the loop added::
+
+          rp4  = par ^ rp5;
+          rp6  = par ^ rp7;
+          rp8  = par ^ rp9;
+          rp10  = par ^ rp11;
+          rp12  = par ^ rp13;
+          rp14  = par ^ rp15;
+
+And after that the code takes about 30% more time, although the number of
+statements is reduced. This is also reflected in the assembly code.
+
+
+Analysis 3
+==========
+
+Very weird. Guess it has to do with caching or instruction parallellism
+or so. I also tried on an eeePC (Celeron, clocked at 900 Mhz). Interesting
+observation was that this one is only 30% slower (according to time)
+executing the code as my 3Ghz D920 processor.
+
+Well, it was expected not to be easy so maybe instead move to a
+different track: let's move back to the code from attempt2 and do some
+loop unrolling. This will eliminate a few if statements. I'll try
+different amounts of unrolling to see what works best.
+
+
+Attempt 4
+=========
+
+Unrolled the loop 1, 2, 3 and 4 times.
+For 4 the code starts with::
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++;
+        par ^= cur;
+        rp4 ^= cur;
+        rp6 ^= cur;
+        rp8 ^= cur;
+        rp10 ^= cur;
+        if (i & 0x1) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x2) rp15 ^= cur; else rp14 ^= cur;
+        cur = *bp++;
+        par ^= cur;
+        rp5 ^= cur;
+        rp6 ^= cur;
+        ...
+
+
+Analysis 4
+==========
+
+Unrolling once gains about 15%
+
+Unrolling twice keeps the gain at about 15%
+
+Unrolling three times gives a gain of 30% compared to attempt 2.
+
+Unrolling four times gives a marginal improvement compared to unrolling
+three times.
+
+I decided to proceed with a four time unrolled loop anyway. It was my gut
+feeling that in the next steps I would obtain additional gain from it.
+
+The next step was triggered by the fact that par contains the xor of all
+bytes and rp4 and rp5 each contain the xor of half of the bytes.
+So in effect par = rp4 ^ rp5. But as xor is commutative we can also say
+that rp5 = par ^ rp4. So no need to keep both rp4 and rp5 around. We can
+eliminate rp5 (or rp4, but I already foresaw another optimisation).
+The same holds for rp6/7, rp8/9, rp10/11 rp12/13 and rp14/15.
+
+
+Attempt 5
+=========
+
+Effectively so all odd digit rp assignments in the loop were removed.
+This included the else clause of the if statements.
+Of course after the loop we need to correct things by adding code like::
+
+    rp5 = par ^ rp4;
+
+Also the initial assignments (rp5 = 0; etc) could be removed.
+Along the line I also removed the initialisation of rp0/1/2/3.
+
+
+Analysis 5
+==========
+
+Measurements showed this was a good move. The run-time roughly halved
+compared with attempt 4 with 4 times unrolled, and we only require 1/3rd
+of the processor time compared to the current code in the linux kernel.
+
+However, still I thought there was more. I didn't like all the if
+statements. Why not keep a running parity and only keep the last if
+statement. Time for yet another version!
+
+
+Attempt 6
+=========
+
+THe code within the for loop was changed to::
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++; tmppar  = cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= cur;
+
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+
+        par ^= tmppar;
+        if ((i & 0x1) == 0) rp12 ^= tmppar;
+        if ((i & 0x2) == 0) rp14 ^= tmppar;
+    }
+
+As you can see tmppar is used to accumulate the parity within a for
+iteration. In the last 3 statements is added to par and, if needed,
+to rp12 and rp14.
+
+While making the changes I also found that I could exploit that tmppar
+contains the running parity for this iteration. So instead of having:
+rp4 ^= cur; rp6 ^= cur;
+I removed the rp6 ^= cur; statement and did rp6 ^= tmppar; on next
+statement. A similar change was done for rp8 and rp10
+
+
+Analysis 6
+==========
+
+Measuring this code again showed big gain. When executing the original
+linux code 1 million times, this took about 1 second on my system.
+(using time to measure the performance). After this iteration I was back
+to 0.075 sec. Actually I had to decide to start measuring over 10
+million iterations in order not to lose too much accuracy. This one
+definitely seemed to be the jackpot!
+
+There is a little bit more room for improvement though. There are three
+places with statements::
+
+	rp4 ^= cur; rp6 ^= cur;
+
+It seems more efficient to also maintain a variable rp4_6 in the while
+loop; This eliminates 3 statements per loop. Of course after the loop we
+need to correct by adding::
+
+	rp4 ^= rp4_6;
+	rp6 ^= rp4_6
+
+Furthermore there are 4 sequential assignments to rp8. This can be
+encoded slightly more efficiently by saving tmppar before those 4 lines
+and later do rp8 = rp8 ^ tmppar ^ notrp8;
+(where notrp8 is the value of rp8 before those 4 lines).
+Again a use of the commutative property of xor.
+Time for a new test!
+
+
+Attempt 7
+=========
+
+The new code now looks like::
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++; tmppar  = cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+        notrp8 = tmppar;
+        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+        rp8 = rp8 ^ tmppar ^ notrp8;
+
+        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+
+        par ^= tmppar;
+        if ((i & 0x1) == 0) rp12 ^= tmppar;
+        if ((i & 0x2) == 0) rp14 ^= tmppar;
+    }
+    rp4 ^= rp4_6;
+    rp6 ^= rp4_6;
+
+
+Not a big change, but every penny counts :-)
+
+
+Analysis 7
+==========
+
+Actually this made things worse. Not very much, but I don't want to move
+into the wrong direction. Maybe something to investigate later. Could
+have to do with caching again.
+
+Guess that is what there is to win within the loop. Maybe unrolling one
+more time will help. I'll keep the optimisations from 7 for now.
+
+
+Attempt 8
+=========
+
+Unrolled the loop one more time.
+
+
+Analysis 8
+==========
+
+This makes things worse. Let's stick with attempt 6 and continue from there.
+Although it seems that the code within the loop cannot be optimised
+further there is still room to optimize the generation of the ecc codes.
+We can simply calculate the total parity. If this is 0 then rp4 = rp5
+etc. If the parity is 1, then rp4 = !rp5;
+
+But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
+in the result byte and then do something like::
+
+    code[0] |= (code[0] << 1);
+
+Lets test this.
+
+
+Attempt 9
+=========
+
+Changed the code but again this slightly degrades performance. Tried all
+kind of other things, like having dedicated parity arrays to avoid the
+shift after parity[rp7] << 7; No gain.
+Change the lookup using the parity array by using shift operators (e.g.
+replace parity[rp7] << 7 with::
+
+	rp7 ^= (rp7 << 4);
+	rp7 ^= (rp7 << 2);
+	rp7 ^= (rp7 << 1);
+	rp7 &= 0x80;
+
+No gain.
+
+The only marginal change was inverting the parity bits, so we can remove
+the last three invert statements.
+
+Ah well, pity this does not deliver more. Then again 10 million
+iterations using the linux driver code takes between 13 and 13.5
+seconds, whereas my code now takes about 0.73 seconds for those 10
+million iterations. So basically I've improved the performance by a
+factor 18 on my system. Not that bad. Of course on different hardware
+you will get different results. No warranties!
+
+But of course there is no such thing as a free lunch. The codesize almost
+tripled (from 562 bytes to 1434 bytes). Then again, it is not that much.
+
+
+Correcting errors
+=================
+
+For correcting errors I again used the ST application note as a starter,
+but I also peeked at the existing code.
+
+The algorithm itself is pretty straightforward. Just xor the given and
+the calculated ecc. If all bytes are 0 there is no problem. If 11 bits
+are 1 we have one correctable bit error. If there is 1 bit 1, we have an
+error in the given ecc code.
+
+It proved to be fastest to do some table lookups. Performance gain
+introduced by this is about a factor 2 on my system when a repair had to
+be done, and 1% or so if no repair had to be done.
+
+Code size increased from 330 bytes to 686 bytes for this function.
+(gcc 4.2, -O3)
+
+
+Conclusion
+==========
+
+The gain when calculating the ecc is tremendous. Om my development hardware
+a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
+embedded system with a MIPS core a factor 7 was obtained.
+
+On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
+5 (big endian mode, gcc 4.1.2, -O3)
+
+For correction not much gain could be obtained (as bitflips are rare). Then
+again there are also much less cycles spent there.
+
+It seems there is not much more gain possible in this, at least when
+programmed in C. Of course it might be possible to squeeze something more
+out of it with an assembler program, but due to pipeline behaviour etc
+this is very tricky (at least for intel hw).
+
+Author: Frans Meulenbroeks
+
+Copyright (C) 2008 Koninklijke Philips Electronics NV.
diff --git a/Documentation/driver-api/mtd/spi-nor.rst b/Documentation/driver-api/mtd/spi-nor.rst
new file mode 100644
index 000000000000..f5333e3bf486
--- /dev/null
+++ b/Documentation/driver-api/mtd/spi-nor.rst
@@ -0,0 +1,66 @@
+=================
+SPI NOR framework
+=================
+
+Part I - Why do we need this framework?
+---------------------------------------
+
+SPI bus controllers (drivers/spi/) only deal with streams of bytes; the bus
+controller operates agnostic of the specific device attached. However, some
+controllers (such as Freescale's QuadSPI controller) cannot easily handle
+arbitrary streams of bytes, but rather are designed specifically for SPI NOR.
+
+In particular, Freescale's QuadSPI controller must know the NOR commands to
+find the right LUT sequence. Unfortunately, the SPI subsystem has no notion of
+opcodes, addresses, or data payloads; a SPI controller simply knows to send or
+receive bytes (Tx and Rx). Therefore, we must define a new layering scheme under
+which the controller driver is aware of the opcodes, addressing, and other
+details of the SPI NOR protocol.
+
+Part II - How does the framework work?
+--------------------------------------
+
+This framework just adds a new layer between the MTD and the SPI bus driver.
+With this new layer, the SPI NOR controller driver does not depend on the
+m25p80 code anymore.
+
+Before this framework, the layer is like::
+
+                   MTD
+         ------------------------
+                  m25p80
+         ------------------------
+	       SPI bus driver
+         ------------------------
+	        SPI NOR chip
+
+   After this framework, the layer is like:
+                   MTD
+         ------------------------
+              SPI NOR framework
+         ------------------------
+                  m25p80
+         ------------------------
+	       SPI bus driver
+         ------------------------
+	       SPI NOR chip
+
+  With the SPI NOR controller driver (Freescale QuadSPI), it looks like:
+                   MTD
+         ------------------------
+              SPI NOR framework
+         ------------------------
+                fsl-quadSPI
+         ------------------------
+	       SPI NOR chip
+
+Part III - How can drivers use the framework?
+---------------------------------------------
+
+The main API is spi_nor_scan(). Before you call the hook, a driver should
+initialize the necessary fields for spi_nor{}. Please see
+drivers/mtd/spi-nor/spi-nor.c for detail. Please also refer to fsl-quadspi.c
+when you want to write a new driver for a SPI NOR controller.
+Another API is spi_nor_restore(), this is used to restore the status of SPI
+flash chip such as addressing mode. Call it whenever detach the driver from
+device or reboot the system.
diff --git a/Documentation/mtd/index.rst b/Documentation/mtd/index.rst
deleted file mode 100644
index 4fdae418ac97..000000000000
--- a/Documentation/mtd/index.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-:orphan:
-
-==============================
-Memory Technology Device (MTD)
-==============================
-
-.. toctree::
-   :maxdepth: 1
-
-   intel-spi
-   nand_ecc
-   spi-nor
diff --git a/Documentation/mtd/intel-spi.rst b/Documentation/mtd/intel-spi.rst
deleted file mode 100644
index 0e6d9cd5388d..000000000000
--- a/Documentation/mtd/intel-spi.rst
+++ /dev/null
@@ -1,90 +0,0 @@
-==============================
-Upgrading BIOS using intel-spi
-==============================
-
-Many Intel CPUs like Baytrail and Braswell include SPI serial flash host
-controller which is used to hold BIOS and other platform specific data.
-Since contents of the SPI serial flash is crucial for machine to function,
-it is typically protected by different hardware protection mechanisms to
-avoid accidental (or on purpose) overwrite of the content.
-
-Not all manufacturers protect the SPI serial flash, mainly because it
-allows upgrading the BIOS image directly from an OS.
-
-The intel-spi driver makes it possible to read and write the SPI serial
-flash, if certain protection bits are not set and locked. If it finds
-any of them set, the whole MTD device is made read-only to prevent
-partial overwrites. By default the driver exposes SPI serial flash
-contents as read-only but it can be changed from kernel command line,
-passing "intel-spi.writeable=1".
-
-Please keep in mind that overwriting the BIOS image on SPI serial flash
-might render the machine unbootable and requires special equipment like
-Dediprog to revive. You have been warned!
-
-Below are the steps how to upgrade MinnowBoard MAX BIOS directly from
-Linux.
-
- 1) Download and extract the latest Minnowboard MAX BIOS SPI image
-    [1]. At the time writing this the latest image is v92.
-
- 2) Install mtd-utils package [2]. We need this in order to erase the SPI
-    serial flash. Distros like Debian and Fedora have this prepackaged with
-    name "mtd-utils".
-
- 3) Add "intel-spi.writeable=1" to the kernel command line and reboot
-    the board (you can also reload the driver passing "writeable=1" as
-    module parameter to modprobe).
-
- 4) Once the board is up and running again, find the right MTD partition
-    (it is named as "BIOS")::
-
-	# cat /proc/mtd
-	dev:    size   erasesize  name
-	mtd0: 00800000 00001000 "BIOS"
-
-    So here it will be /dev/mtd0 but it may vary.
-
- 5) Make backup of the existing image first::
-
-	# dd if=/dev/mtd0ro of=bios.bak
-	16384+0 records in
-	16384+0 records out
-	8388608 bytes (8.4 MB) copied, 10.0269 s, 837 kB/s
-
- 6) Verify the backup:
-
-	# sha1sum /dev/mtd0ro bios.bak
-	fdbb011920572ca6c991377c4b418a0502668b73  /dev/mtd0ro
-	fdbb011920572ca6c991377c4b418a0502668b73  bios.bak
-
-    The SHA1 sums must match. Otherwise do not continue any further!
-
- 7) Erase the SPI serial flash. After this step, do not reboot the
-    board! Otherwise it will not start anymore::
-
-	# flash_erase /dev/mtd0 0 0
-	Erasing 4 Kibyte @ 7ff000 -- 100 % complete
-
- 8) Once completed without errors you can write the new BIOS image:
-
-    # dd if=MNW2MAX1.X64.0092.R01.1605221712.bin of=/dev/mtd0
-
- 9) Verify that the new content of the SPI serial flash matches the new
-    BIOS image::
-
-	# sha1sum /dev/mtd0ro MNW2MAX1.X64.0092.R01.1605221712.bin
-	9b4df9e4be2057fceec3a5529ec3d950836c87a2  /dev/mtd0ro
-	9b4df9e4be2057fceec3a5529ec3d950836c87a2 MNW2MAX1.X64.0092.R01.1605221712.bin
-
-    The SHA1 sums should match.
-
- 10) Now you can reboot your board and observe the new BIOS starting up
-     properly.
-
-References
-----------
-
-[1] https://firmware.intel.com/sites/default/files/MinnowBoard%2EMAX_%2EX64%2E92%2ER01%2Ezip
-
-[2] http://www.linux-mtd.infradead.org/
diff --git a/Documentation/mtd/nand_ecc.rst b/Documentation/mtd/nand_ecc.rst
deleted file mode 100644
index e8d3c53a5056..000000000000
--- a/Documentation/mtd/nand_ecc.rst
+++ /dev/null
@@ -1,763 +0,0 @@
-==========================
-NAND Error-correction Code
-==========================
-
-Introduction
-============
-
-Having looked at the linux mtd/nand driver and more specific at nand_ecc.c
-I felt there was room for optimisation. I bashed the code for a few hours
-performing tricks like table lookup removing superfluous code etc.
-After that the speed was increased by 35-40%.
-Still I was not too happy as I felt there was additional room for improvement.
-
-Bad! I was hooked.
-I decided to annotate my steps in this file. Perhaps it is useful to someone
-or someone learns something from it.
-
-
-The problem
-===========
-
-NAND flash (at least SLC one) typically has sectors of 256 bytes.
-However NAND flash is not extremely reliable so some error detection
-(and sometimes correction) is needed.
-
-This is done by means of a Hamming code. I'll try to explain it in
-laymans terms (and apologies to all the pro's in the field in case I do
-not use the right terminology, my coding theory class was almost 30
-years ago, and I must admit it was not one of my favourites).
-
-As I said before the ecc calculation is performed on sectors of 256
-bytes. This is done by calculating several parity bits over the rows and
-columns. The parity used is even parity which means that the parity bit = 1
-if the data over which the parity is calculated is 1 and the parity bit = 0
-if the data over which the parity is calculated is 0. So the total
-number of bits over the data over which the parity is calculated + the
-parity bit is even. (see wikipedia if you can't follow this).
-Parity is often calculated by means of an exclusive or operation,
-sometimes also referred to as xor. In C the operator for xor is ^
-
-Back to ecc.
-Let's give a small figure:
-
-=========  ==== ==== ==== ==== ==== ==== ==== ====   === === === === ====
-byte   0:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp4 ... rp14
-byte   1:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp2 rp4 ... rp14
-byte   2:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp4 ... rp14
-byte   3:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp4 ... rp14
-byte   4:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp5 ... rp14
-...
-byte 254:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp5 ... rp15
-byte 255:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp5 ... rp15
-           cp1  cp0  cp1  cp0  cp1  cp0  cp1  cp0
-           cp3  cp3  cp2  cp2  cp3  cp3  cp2  cp2
-           cp5  cp5  cp5  cp5  cp4  cp4  cp4  cp4
-=========  ==== ==== ==== ==== ==== ==== ==== ====   === === === === ====
-
-This figure represents a sector of 256 bytes.
-cp is my abbreviation for column parity, rp for row parity.
-
-Let's start to explain column parity.
-
-- cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
-
-  so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even.
-
-Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7.
-
-- cp2 is the parity over bit0, bit1, bit4 and bit5
-- cp3 is the parity over bit2, bit3, bit6 and bit7.
-- cp4 is the parity over bit0, bit1, bit2 and bit3.
-- cp5 is the parity over bit4, bit5, bit6 and bit7.
-
-Note that each of cp0 .. cp5 is exactly one bit.
-
-Row parity actually works almost the same.
-
-- rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254)
-- rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255)
-- rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ...
-  (so handle two bytes, then skip 2 bytes).
-- rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...)
-- for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc.
-
-  so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...)
-- and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, ..
-
-The story now becomes quite boring. I guess you get the idea.
-
-- rp6 covers 8 bytes then skips 8 etc
-- rp7 skips 8 bytes then covers 8 etc
-- rp8 covers 16 bytes then skips 16 etc
-- rp9 skips 16 bytes then covers 16 etc
-- rp10 covers 32 bytes then skips 32 etc
-- rp11 skips 32 bytes then covers 32 etc
-- rp12 covers 64 bytes then skips 64 etc
-- rp13 skips 64 bytes then covers 64 etc
-- rp14 covers 128 bytes then skips 128
-- rp15 skips 128 bytes then covers 128
-
-In the end the parity bits are grouped together in three bytes as
-follows:
-
-=====  ===== ===== ===== ===== ===== ===== ===== =====
-ECC    Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0
-=====  ===== ===== ===== ===== ===== ===== ===== =====
-ECC 0   rp07  rp06  rp05  rp04  rp03  rp02  rp01  rp00
-ECC 1   rp15  rp14  rp13  rp12  rp11  rp10  rp09  rp08
-ECC 2   cp5   cp4   cp3   cp2   cp1   cp0      1     1
-=====  ===== ===== ===== ===== ===== ===== ===== =====
-
-I detected after writing this that ST application note AN1823
-(http://www.st.com/stonline/) gives a much
-nicer picture.(but they use line parity as term where I use row parity)
-Oh well, I'm graphically challenged, so suffer with me for a moment :-)
-
-And I could not reuse the ST picture anyway for copyright reasons.
-
-
-Attempt 0
-=========
-
-Implementing the parity calculation is pretty simple.
-In C pseudocode::
-
-  for (i = 0; i < 256; i++)
-  {
-    if (i & 0x01)
-       rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
-    else
-       rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp0;
-    if (i & 0x02)
-       rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
-    else
-       rp2 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp2;
-    if (i & 0x04)
-      rp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp5;
-    else
-      rp4 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp4;
-    if (i & 0x08)
-      rp7 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp7;
-    else
-      rp6 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp6;
-    if (i & 0x10)
-      rp9 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp9;
-    else
-      rp8 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp8;
-    if (i & 0x20)
-      rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
-    else
-      rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
-    if (i & 0x40)
-      rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
-    else
-      rp12 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp12;
-    if (i & 0x80)
-      rp15 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp15;
-    else
-      rp14 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp14;
-    cp0 = bit6 ^ bit4 ^ bit2 ^ bit0 ^ cp0;
-    cp1 = bit7 ^ bit5 ^ bit3 ^ bit1 ^ cp1;
-    cp2 = bit5 ^ bit4 ^ bit1 ^ bit0 ^ cp2;
-    cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3
-    cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4
-    cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5
-  }
-
-
-Analysis 0
-==========
-
-C does have bitwise operators but not really operators to do the above
-efficiently (and most hardware has no such instructions either).
-Therefore without implementing this it was clear that the code above was
-not going to bring me a Nobel prize :-)
-
-Fortunately the exclusive or operation is commutative, so we can combine
-the values in any order. So instead of calculating all the bits
-individually, let us try to rearrange things.
-For the column parity this is easy. We can just xor the bytes and in the
-end filter out the relevant bits. This is pretty nice as it will bring
-all cp calculation out of the for loop.
-
-Similarly we can first xor the bytes for the various rows.
-This leads to:
-
-
-Attempt 1
-=========
-
-::
-
-  const char parity[256] = {
-      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-      1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-      0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
-  };
-
-  void ecc1(const unsigned char *buf, unsigned char *code)
-  {
-      int i;
-      const unsigned char *bp = buf;
-      unsigned char cur;
-      unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
-      unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
-      unsigned char par;
-
-      par = 0;
-      rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
-      rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
-      rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
-      rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
-
-      for (i = 0; i < 256; i++)
-      {
-          cur = *bp++;
-          par ^= cur;
-          if (i & 0x01) rp1 ^= cur; else rp0 ^= cur;
-          if (i & 0x02) rp3 ^= cur; else rp2 ^= cur;
-          if (i & 0x04) rp5 ^= cur; else rp4 ^= cur;
-          if (i & 0x08) rp7 ^= cur; else rp6 ^= cur;
-          if (i & 0x10) rp9 ^= cur; else rp8 ^= cur;
-          if (i & 0x20) rp11 ^= cur; else rp10 ^= cur;
-          if (i & 0x40) rp13 ^= cur; else rp12 ^= cur;
-          if (i & 0x80) rp15 ^= cur; else rp14 ^= cur;
-      }
-      code[0] =
-          (parity[rp7] << 7) |
-          (parity[rp6] << 6) |
-          (parity[rp5] << 5) |
-          (parity[rp4] << 4) |
-          (parity[rp3] << 3) |
-          (parity[rp2] << 2) |
-          (parity[rp1] << 1) |
-          (parity[rp0]);
-      code[1] =
-          (parity[rp15] << 7) |
-          (parity[rp14] << 6) |
-          (parity[rp13] << 5) |
-          (parity[rp12] << 4) |
-          (parity[rp11] << 3) |
-          (parity[rp10] << 2) |
-          (parity[rp9]  << 1) |
-          (parity[rp8]);
-      code[2] =
-          (parity[par & 0xf0] << 7) |
-          (parity[par & 0x0f] << 6) |
-          (parity[par & 0xcc] << 5) |
-          (parity[par & 0x33] << 4) |
-          (parity[par & 0xaa] << 3) |
-          (parity[par & 0x55] << 2);
-      code[0] = ~code[0];
-      code[1] = ~code[1];
-      code[2] = ~code[2];
-  }
-
-Still pretty straightforward. The last three invert statements are there to
-give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash
-all data is 0xff, so the checksum then matches.
-
-I also introduced the parity lookup. I expected this to be the fastest
-way to calculate the parity, but I will investigate alternatives later
-on.
-
-
-Analysis 1
-==========
-
-The code works, but is not terribly efficient. On my system it took
-almost 4 times as much time as the linux driver code. But hey, if it was
-*that* easy this would have been done long before.
-No pain. no gain.
-
-Fortunately there is plenty of room for improvement.
-
-In step 1 we moved from bit-wise calculation to byte-wise calculation.
-However in C we can also use the unsigned long data type and virtually
-every modern microprocessor supports 32 bit operations, so why not try
-to write our code in such a way that we process data in 32 bit chunks.
-
-Of course this means some modification as the row parity is byte by
-byte. A quick analysis:
-for the column parity we use the par variable. When extending to 32 bits
-we can in the end easily calculate rp0 and rp1 from it.
-(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
-respectively, from MSB to LSB)
-also rp2 and rp3 can be easily retrieved from par as rp3 covers the
-first two MSBs and rp2 covers the last two LSBs.
-
-Note that of course now the loop is executed only 64 times (256/4).
-And note that care must taken wrt byte ordering. The way bytes are
-ordered in a long is machine dependent, and might affect us.
-Anyway, if there is an issue: this code is developed on x86 (to be
-precise: a DELL PC with a D920 Intel CPU)
-
-And of course the performance might depend on alignment, but I expect
-that the I/O buffers in the nand driver are aligned properly (and
-otherwise that should be fixed to get maximum performance).
-
-Let's give it a try...
-
-
-Attempt 2
-=========
-
-::
-
-  extern const char parity[256];
-
-  void ecc2(const unsigned char *buf, unsigned char *code)
-  {
-      int i;
-      const unsigned long *bp = (unsigned long *)buf;
-      unsigned long cur;
-      unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
-      unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
-      unsigned long par;
-
-      par = 0;
-      rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
-      rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
-      rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
-      rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
-
-      for (i = 0; i < 64; i++)
-      {
-          cur = *bp++;
-          par ^= cur;
-          if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
-          if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
-          if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
-          if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
-          if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
-          if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
-      }
-      /*
-         we need to adapt the code generation for the fact that rp vars are now
-         long; also the column parity calculation needs to be changed.
-         we'll bring rp4 to 15 back to single byte entities by shifting and
-         xoring
-      */
-      rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff;
-      rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff;
-      rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff;
-      rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff;
-      rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff;
-      rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff;
-      rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff;
-      rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff;
-      rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff;
-      rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff;
-      rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff;
-      rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff;
-      rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff;
-      rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff;
-      par ^= (par >> 16);
-      rp1 = (par >> 8); rp1 &= 0xff;
-      rp0 = (par & 0xff);
-      par ^= (par >> 8); par &= 0xff;
-
-      code[0] =
-          (parity[rp7] << 7) |
-          (parity[rp6] << 6) |
-          (parity[rp5] << 5) |
-          (parity[rp4] << 4) |
-          (parity[rp3] << 3) |
-          (parity[rp2] << 2) |
-          (parity[rp1] << 1) |
-          (parity[rp0]);
-      code[1] =
-          (parity[rp15] << 7) |
-          (parity[rp14] << 6) |
-          (parity[rp13] << 5) |
-          (parity[rp12] << 4) |
-          (parity[rp11] << 3) |
-          (parity[rp10] << 2) |
-          (parity[rp9]  << 1) |
-          (parity[rp8]);
-      code[2] =
-          (parity[par & 0xf0] << 7) |
-          (parity[par & 0x0f] << 6) |
-          (parity[par & 0xcc] << 5) |
-          (parity[par & 0x33] << 4) |
-          (parity[par & 0xaa] << 3) |
-          (parity[par & 0x55] << 2);
-      code[0] = ~code[0];
-      code[1] = ~code[1];
-      code[2] = ~code[2];
-  }
-
-The parity array is not shown any more. Note also that for these
-examples I kinda deviated from my regular programming style by allowing
-multiple statements on a line, not using { } in then and else blocks
-with only a single statement and by using operators like ^=
-
-
-Analysis 2
-==========
-
-The code (of course) works, and hurray: we are a little bit faster than
-the linux driver code (about 15%). But wait, don't cheer too quickly.
-There is more to be gained.
-If we look at e.g. rp14 and rp15 we see that we either xor our data with
-rp14 or with rp15. However we also have par which goes over all data.
-This means there is no need to calculate rp14 as it can be calculated from
-rp15 through rp14 = par ^ rp15, because par = rp14 ^ rp15;
-(or if desired we can avoid calculating rp15 and calculate it from
-rp14).  That is why some places refer to inverse parity.
-Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
-Effectively this means we can eliminate the else clause from the if
-statements. Also we can optimise the calculation in the end a little bit
-by going from long to byte first. Actually we can even avoid the table
-lookups
-
-Attempt 3
-=========
-
-Odd replaced::
-
-          if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
-          if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
-          if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
-          if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
-          if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
-          if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
-
-with::
-
-          if (i & 0x01) rp5 ^= cur;
-          if (i & 0x02) rp7 ^= cur;
-          if (i & 0x04) rp9 ^= cur;
-          if (i & 0x08) rp11 ^= cur;
-          if (i & 0x10) rp13 ^= cur;
-          if (i & 0x20) rp15 ^= cur;
-
-and outside the loop added::
-
-          rp4  = par ^ rp5;
-          rp6  = par ^ rp7;
-          rp8  = par ^ rp9;
-          rp10  = par ^ rp11;
-          rp12  = par ^ rp13;
-          rp14  = par ^ rp15;
-
-And after that the code takes about 30% more time, although the number of
-statements is reduced. This is also reflected in the assembly code.
-
-
-Analysis 3
-==========
-
-Very weird. Guess it has to do with caching or instruction parallellism
-or so. I also tried on an eeePC (Celeron, clocked at 900 Mhz). Interesting
-observation was that this one is only 30% slower (according to time)
-executing the code as my 3Ghz D920 processor.
-
-Well, it was expected not to be easy so maybe instead move to a
-different track: let's move back to the code from attempt2 and do some
-loop unrolling. This will eliminate a few if statements. I'll try
-different amounts of unrolling to see what works best.
-
-
-Attempt 4
-=========
-
-Unrolled the loop 1, 2, 3 and 4 times.
-For 4 the code starts with::
-
-    for (i = 0; i < 4; i++)
-    {
-        cur = *bp++;
-        par ^= cur;
-        rp4 ^= cur;
-        rp6 ^= cur;
-        rp8 ^= cur;
-        rp10 ^= cur;
-        if (i & 0x1) rp13 ^= cur; else rp12 ^= cur;
-        if (i & 0x2) rp15 ^= cur; else rp14 ^= cur;
-        cur = *bp++;
-        par ^= cur;
-        rp5 ^= cur;
-        rp6 ^= cur;
-        ...
-
-
-Analysis 4
-==========
-
-Unrolling once gains about 15%
-
-Unrolling twice keeps the gain at about 15%
-
-Unrolling three times gives a gain of 30% compared to attempt 2.
-
-Unrolling four times gives a marginal improvement compared to unrolling
-three times.
-
-I decided to proceed with a four time unrolled loop anyway. It was my gut
-feeling that in the next steps I would obtain additional gain from it.
-
-The next step was triggered by the fact that par contains the xor of all
-bytes and rp4 and rp5 each contain the xor of half of the bytes.
-So in effect par = rp4 ^ rp5. But as xor is commutative we can also say
-that rp5 = par ^ rp4. So no need to keep both rp4 and rp5 around. We can
-eliminate rp5 (or rp4, but I already foresaw another optimisation).
-The same holds for rp6/7, rp8/9, rp10/11 rp12/13 and rp14/15.
-
-
-Attempt 5
-=========
-
-Effectively so all odd digit rp assignments in the loop were removed.
-This included the else clause of the if statements.
-Of course after the loop we need to correct things by adding code like::
-
-    rp5 = par ^ rp4;
-
-Also the initial assignments (rp5 = 0; etc) could be removed.
-Along the line I also removed the initialisation of rp0/1/2/3.
-
-
-Analysis 5
-==========
-
-Measurements showed this was a good move. The run-time roughly halved
-compared with attempt 4 with 4 times unrolled, and we only require 1/3rd
-of the processor time compared to the current code in the linux kernel.
-
-However, still I thought there was more. I didn't like all the if
-statements. Why not keep a running parity and only keep the last if
-statement. Time for yet another version!
-
-
-Attempt 6
-=========
-
-THe code within the for loop was changed to::
-
-    for (i = 0; i < 4; i++)
-    {
-        cur = *bp++; tmppar  = cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
-
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
-
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp8 ^= cur;
-
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur;
-
-        par ^= tmppar;
-        if ((i & 0x1) == 0) rp12 ^= tmppar;
-        if ((i & 0x2) == 0) rp14 ^= tmppar;
-    }
-
-As you can see tmppar is used to accumulate the parity within a for
-iteration. In the last 3 statements is added to par and, if needed,
-to rp12 and rp14.
-
-While making the changes I also found that I could exploit that tmppar
-contains the running parity for this iteration. So instead of having:
-rp4 ^= cur; rp6 ^= cur;
-I removed the rp6 ^= cur; statement and did rp6 ^= tmppar; on next
-statement. A similar change was done for rp8 and rp10
-
-
-Analysis 6
-==========
-
-Measuring this code again showed big gain. When executing the original
-linux code 1 million times, this took about 1 second on my system.
-(using time to measure the performance). After this iteration I was back
-to 0.075 sec. Actually I had to decide to start measuring over 10
-million iterations in order not to lose too much accuracy. This one
-definitely seemed to be the jackpot!
-
-There is a little bit more room for improvement though. There are three
-places with statements::
-
-	rp4 ^= cur; rp6 ^= cur;
-
-It seems more efficient to also maintain a variable rp4_6 in the while
-loop; This eliminates 3 statements per loop. Of course after the loop we
-need to correct by adding::
-
-	rp4 ^= rp4_6;
-	rp6 ^= rp4_6
-
-Furthermore there are 4 sequential assignments to rp8. This can be
-encoded slightly more efficiently by saving tmppar before those 4 lines
-and later do rp8 = rp8 ^ tmppar ^ notrp8;
-(where notrp8 is the value of rp8 before those 4 lines).
-Again a use of the commutative property of xor.
-Time for a new test!
-
-
-Attempt 7
-=========
-
-The new code now looks like::
-
-    for (i = 0; i < 4; i++)
-    {
-        cur = *bp++; tmppar  = cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
-
-        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
-
-        notrp8 = tmppar;
-        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur;
-        rp8 = rp8 ^ tmppar ^ notrp8;
-
-        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
-        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-        cur = *bp++; tmppar ^= cur;
-
-        par ^= tmppar;
-        if ((i & 0x1) == 0) rp12 ^= tmppar;
-        if ((i & 0x2) == 0) rp14 ^= tmppar;
-    }
-    rp4 ^= rp4_6;
-    rp6 ^= rp4_6;
-
-
-Not a big change, but every penny counts :-)
-
-
-Analysis 7
-==========
-
-Actually this made things worse. Not very much, but I don't want to move
-into the wrong direction. Maybe something to investigate later. Could
-have to do with caching again.
-
-Guess that is what there is to win within the loop. Maybe unrolling one
-more time will help. I'll keep the optimisations from 7 for now.
-
-
-Attempt 8
-=========
-
-Unrolled the loop one more time.
-
-
-Analysis 8
-==========
-
-This makes things worse. Let's stick with attempt 6 and continue from there.
-Although it seems that the code within the loop cannot be optimised
-further there is still room to optimize the generation of the ecc codes.
-We can simply calculate the total parity. If this is 0 then rp4 = rp5
-etc. If the parity is 1, then rp4 = !rp5;
-
-But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
-in the result byte and then do something like::
-
-    code[0] |= (code[0] << 1);
-
-Lets test this.
-
-
-Attempt 9
-=========
-
-Changed the code but again this slightly degrades performance. Tried all
-kind of other things, like having dedicated parity arrays to avoid the
-shift after parity[rp7] << 7; No gain.
-Change the lookup using the parity array by using shift operators (e.g.
-replace parity[rp7] << 7 with::
-
-	rp7 ^= (rp7 << 4);
-	rp7 ^= (rp7 << 2);
-	rp7 ^= (rp7 << 1);
-	rp7 &= 0x80;
-
-No gain.
-
-The only marginal change was inverting the parity bits, so we can remove
-the last three invert statements.
-
-Ah well, pity this does not deliver more. Then again 10 million
-iterations using the linux driver code takes between 13 and 13.5
-seconds, whereas my code now takes about 0.73 seconds for those 10
-million iterations. So basically I've improved the performance by a
-factor 18 on my system. Not that bad. Of course on different hardware
-you will get different results. No warranties!
-
-But of course there is no such thing as a free lunch. The codesize almost
-tripled (from 562 bytes to 1434 bytes). Then again, it is not that much.
-
-
-Correcting errors
-=================
-
-For correcting errors I again used the ST application note as a starter,
-but I also peeked at the existing code.
-
-The algorithm itself is pretty straightforward. Just xor the given and
-the calculated ecc. If all bytes are 0 there is no problem. If 11 bits
-are 1 we have one correctable bit error. If there is 1 bit 1, we have an
-error in the given ecc code.
-
-It proved to be fastest to do some table lookups. Performance gain
-introduced by this is about a factor 2 on my system when a repair had to
-be done, and 1% or so if no repair had to be done.
-
-Code size increased from 330 bytes to 686 bytes for this function.
-(gcc 4.2, -O3)
-
-
-Conclusion
-==========
-
-The gain when calculating the ecc is tremendous. Om my development hardware
-a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
-embedded system with a MIPS core a factor 7 was obtained.
-
-On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
-5 (big endian mode, gcc 4.1.2, -O3)
-
-For correction not much gain could be obtained (as bitflips are rare). Then
-again there are also much less cycles spent there.
-
-It seems there is not much more gain possible in this, at least when
-programmed in C. Of course it might be possible to squeeze something more
-out of it with an assembler program, but due to pipeline behaviour etc
-this is very tricky (at least for intel hw).
-
-Author: Frans Meulenbroeks
-
-Copyright (C) 2008 Koninklijke Philips Electronics NV.
diff --git a/Documentation/mtd/spi-nor.rst b/Documentation/mtd/spi-nor.rst
deleted file mode 100644
index f5333e3bf486..000000000000
--- a/Documentation/mtd/spi-nor.rst
+++ /dev/null
@@ -1,66 +0,0 @@
-=================
-SPI NOR framework
-=================
-
-Part I - Why do we need this framework?
----------------------------------------
-
-SPI bus controllers (drivers/spi/) only deal with streams of bytes; the bus
-controller operates agnostic of the specific device attached. However, some
-controllers (such as Freescale's QuadSPI controller) cannot easily handle
-arbitrary streams of bytes, but rather are designed specifically for SPI NOR.
-
-In particular, Freescale's QuadSPI controller must know the NOR commands to
-find the right LUT sequence. Unfortunately, the SPI subsystem has no notion of
-opcodes, addresses, or data payloads; a SPI controller simply knows to send or
-receive bytes (Tx and Rx). Therefore, we must define a new layering scheme under
-which the controller driver is aware of the opcodes, addressing, and other
-details of the SPI NOR protocol.
-
-Part II - How does the framework work?
---------------------------------------
-
-This framework just adds a new layer between the MTD and the SPI bus driver.
-With this new layer, the SPI NOR controller driver does not depend on the
-m25p80 code anymore.
-
-Before this framework, the layer is like::
-
-                   MTD
-         ------------------------
-                  m25p80
-         ------------------------
-	       SPI bus driver
-         ------------------------
-	        SPI NOR chip
-
-   After this framework, the layer is like:
-                   MTD
-         ------------------------
-              SPI NOR framework
-         ------------------------
-                  m25p80
-         ------------------------
-	       SPI bus driver
-         ------------------------
-	       SPI NOR chip
-
-  With the SPI NOR controller driver (Freescale QuadSPI), it looks like:
-                   MTD
-         ------------------------
-              SPI NOR framework
-         ------------------------
-                fsl-quadSPI
-         ------------------------
-	       SPI NOR chip
-
-Part III - How can drivers use the framework?
----------------------------------------------
-
-The main API is spi_nor_scan(). Before you call the hook, a driver should
-initialize the necessary fields for spi_nor{}. Please see
-drivers/mtd/spi-nor/spi-nor.c for detail. Please also refer to fsl-quadspi.c
-when you want to write a new driver for a SPI NOR controller.
-Another API is spi_nor_restore(), this is used to restore the status of SPI
-flash chip such as addressing mode. Call it whenever detach the driver from
-device or reboot the system.
-- 
cgit 


From e253d2c551ce876a374d533fbcc9e8f31142dcad Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 16:46:30 -0300
Subject: docs: nfc: add it to the driver-api book

Most of the descriptions here are oriented to a Kernel developer.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/driver-api/index.rst         |   1 +
 Documentation/driver-api/nfc/index.rst     |   9 +
 Documentation/driver-api/nfc/nfc-hci.rst   | 311 +++++++++++++++++++++++++++++
 Documentation/driver-api/nfc/nfc-pn544.rst |  34 ++++
 Documentation/nfc/index.rst                |  11 -
 Documentation/nfc/nfc-hci.rst              | 311 -----------------------------
 Documentation/nfc/nfc-pn544.rst            |  34 ----
 7 files changed, 355 insertions(+), 356 deletions(-)
 create mode 100644 Documentation/driver-api/nfc/index.rst
 create mode 100644 Documentation/driver-api/nfc/nfc-hci.rst
 create mode 100644 Documentation/driver-api/nfc/nfc-pn544.rst
 delete mode 100644 Documentation/nfc/index.rst
 delete mode 100644 Documentation/nfc/nfc-hci.rst
 delete mode 100644 Documentation/nfc/nfc-pn544.rst

(limited to 'Documentation')

diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 7ecc65093493..d6bf4a37cefe 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -56,6 +56,7 @@ available subsections can be seen below.
    pinctl
    gpio/index
    misc_devices
+   nfc/index
    dmaengine/index
    slimbus
    soundwire/index
diff --git a/Documentation/driver-api/nfc/index.rst b/Documentation/driver-api/nfc/index.rst
new file mode 100644
index 000000000000..3afb2c0c2e3c
--- /dev/null
+++ b/Documentation/driver-api/nfc/index.rst
@@ -0,0 +1,9 @@
+========================
+Near Field Communication
+========================
+
+.. toctree::
+   :maxdepth: 1
+
+   nfc-hci
+   nfc-pn544
diff --git a/Documentation/driver-api/nfc/nfc-hci.rst b/Documentation/driver-api/nfc/nfc-hci.rst
new file mode 100644
index 000000000000..eb8a1a14e919
--- /dev/null
+++ b/Documentation/driver-api/nfc/nfc-hci.rst
@@ -0,0 +1,311 @@
+========================
+HCI backend for NFC Core
+========================
+
+- Author: Eric Lapuyade, Samuel Ortiz
+- Contact: eric.lapuyade@intel.com, samuel.ortiz@intel.com
+
+General
+-------
+
+The HCI layer implements much of the ETSI TS 102 622 V10.2.0 specification. It
+enables easy writing of HCI-based NFC drivers. The HCI layer runs as an NFC Core
+backend, implementing an abstract nfc device and translating NFC Core API
+to HCI commands and events.
+
+HCI
+---
+
+HCI registers as an nfc device with NFC Core. Requests coming from userspace are
+routed through netlink sockets to NFC Core and then to HCI. From this point,
+they are translated in a sequence of HCI commands sent to the HCI layer in the
+host controller (the chip). Commands can be executed synchronously (the sending
+context blocks waiting for response) or asynchronously (the response is returned
+from HCI Rx context).
+HCI events can also be received from the host controller. They will be handled
+and a translation will be forwarded to NFC Core as needed. There are hooks to
+let the HCI driver handle proprietary events or override standard behavior.
+HCI uses 2 execution contexts:
+
+- one for executing commands : nfc_hci_msg_tx_work(). Only one command
+  can be executing at any given moment.
+- one for dispatching received events and commands : nfc_hci_msg_rx_work().
+
+HCI Session initialization
+--------------------------
+
+The Session initialization is an HCI standard which must unfortunately
+support proprietary gates. This is the reason why the driver will pass a list
+of proprietary gates that must be part of the session. HCI will ensure all
+those gates have pipes connected when the hci device is set up.
+In case the chip supports pre-opened gates and pseudo-static pipes, the driver
+can pass that information to HCI core.
+
+HCI Gates and Pipes
+-------------------
+
+A gate defines the 'port' where some service can be found. In order to access
+a service, one must create a pipe to that gate and open it. In this
+implementation, pipes are totally hidden. The public API only knows gates.
+This is consistent with the driver need to send commands to proprietary gates
+without knowing the pipe connected to it.
+
+Driver interface
+----------------
+
+A driver is generally written in two parts : the physical link management and
+the HCI management. This makes it easier to maintain a driver for a chip that
+can be connected using various phy (i2c, spi, ...)
+
+HCI Management
+--------------
+
+A driver would normally register itself with HCI and provide the following
+entry points::
+
+  struct nfc_hci_ops {
+	int (*open)(struct nfc_hci_dev *hdev);
+	void (*close)(struct nfc_hci_dev *hdev);
+	int (*hci_ready) (struct nfc_hci_dev *hdev);
+	int (*xmit) (struct nfc_hci_dev *hdev, struct sk_buff *skb);
+	int (*start_poll) (struct nfc_hci_dev *hdev,
+			   u32 im_protocols, u32 tm_protocols);
+	int (*dep_link_up)(struct nfc_hci_dev *hdev, struct nfc_target *target,
+			   u8 comm_mode, u8 *gb, size_t gb_len);
+	int (*dep_link_down)(struct nfc_hci_dev *hdev);
+	int (*target_from_gate) (struct nfc_hci_dev *hdev, u8 gate,
+				 struct nfc_target *target);
+	int (*complete_target_discovered) (struct nfc_hci_dev *hdev, u8 gate,
+					   struct nfc_target *target);
+	int (*im_transceive) (struct nfc_hci_dev *hdev,
+			      struct nfc_target *target, struct sk_buff *skb,
+			      data_exchange_cb_t cb, void *cb_context);
+	int (*tm_send)(struct nfc_hci_dev *hdev, struct sk_buff *skb);
+	int (*check_presence)(struct nfc_hci_dev *hdev,
+			      struct nfc_target *target);
+	int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event,
+			      struct sk_buff *skb);
+  };
+
+- open() and close() shall turn the hardware on and off.
+- hci_ready() is an optional entry point that is called right after the hci
+  session has been set up. The driver can use it to do additional initialization
+  that must be performed using HCI commands.
+- xmit() shall simply write a frame to the physical link.
+- start_poll() is an optional entrypoint that shall set the hardware in polling
+  mode. This must be implemented only if the hardware uses proprietary gates or a
+  mechanism slightly different from the HCI standard.
+- dep_link_up() is called after a p2p target has been detected, to finish
+  the p2p connection setup with hardware parameters that need to be passed back
+  to nfc core.
+- dep_link_down() is called to bring the p2p link down.
+- target_from_gate() is an optional entrypoint to return the nfc protocols
+  corresponding to a proprietary gate.
+- complete_target_discovered() is an optional entry point to let the driver
+  perform additional proprietary processing necessary to auto activate the
+  discovered target.
+- im_transceive() must be implemented by the driver if proprietary HCI commands
+  are required to send data to the tag. Some tag types will require custom
+  commands, others can be written to using the standard HCI commands. The driver
+  can check the tag type and either do proprietary processing, or return 1 to ask
+  for standard processing. The data exchange command itself must be sent
+  asynchronously.
+- tm_send() is called to send data in the case of a p2p connection
+- check_presence() is an optional entry point that will be called regularly
+  by the core to check that an activated tag is still in the field. If this is
+  not implemented, the core will not be able to push tag_lost events to the user
+  space
+- event_received() is called to handle an event coming from the chip. Driver
+  can handle the event or return 1 to let HCI attempt standard processing.
+
+On the rx path, the driver is responsible to push incoming HCP frames to HCI
+using nfc_hci_recv_frame(). HCI will take care of re-aggregation and handling
+This must be done from a context that can sleep.
+
+PHY Management
+--------------
+
+The physical link (i2c, ...) management is defined by the following structure::
+
+  struct nfc_phy_ops {
+	int (*write)(void *dev_id, struct sk_buff *skb);
+	int (*enable)(void *dev_id);
+	void (*disable)(void *dev_id);
+  };
+
+enable():
+	turn the phy on (power on), make it ready to transfer data
+disable():
+	turn the phy off
+write():
+	Send a data frame to the chip. Note that to enable higher
+	layers such as an llc to store the frame for re-emission, this
+	function must not alter the skb. It must also not return a positive
+	result (return 0 for success, negative for failure).
+
+Data coming from the chip shall be sent directly to nfc_hci_recv_frame().
+
+LLC
+---
+
+Communication between the CPU and the chip often requires some link layer
+protocol. Those are isolated as modules managed by the HCI layer. There are
+currently two modules : nop (raw transfert) and shdlc.
+A new llc must implement the following functions::
+
+  struct nfc_llc_ops {
+	void *(*init) (struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
+		       rcv_to_hci_t rcv_to_hci, int tx_headroom,
+		       int tx_tailroom, int *rx_headroom, int *rx_tailroom,
+		       llc_failure_t llc_failure);
+	void (*deinit) (struct nfc_llc *llc);
+	int (*start) (struct nfc_llc *llc);
+	int (*stop) (struct nfc_llc *llc);
+	void (*rcv_from_drv) (struct nfc_llc *llc, struct sk_buff *skb);
+	int (*xmit_from_hci) (struct nfc_llc *llc, struct sk_buff *skb);
+  };
+
+init():
+	allocate and init your private storage
+deinit():
+	cleanup
+start():
+	establish the logical connection
+stop ():
+	terminate the logical connection
+rcv_from_drv():
+	handle data coming from the chip, going to HCI
+xmit_from_hci():
+	handle data sent by HCI, going to the chip
+
+The llc must be registered with nfc before it can be used. Do that by
+calling::
+
+	nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
+
+Again, note that the llc does not handle the physical link. It is thus very
+easy to mix any physical link with any llc for a given chip driver.
+
+Included Drivers
+----------------
+
+An HCI based driver for an NXP PN544, connected through I2C bus, and using
+shdlc is included.
+
+Execution Contexts
+------------------
+
+The execution contexts are the following:
+- IRQ handler (IRQH):
+fast, cannot sleep. sends incoming frames to HCI where they are passed to
+the current llc. In case of shdlc, the frame is queued in shdlc rx queue.
+
+- SHDLC State Machine worker (SMW)
+
+  Only when llc_shdlc is used: handles shdlc rx & tx queues.
+
+  Dispatches HCI cmd responses.
+
+- HCI Tx Cmd worker (MSGTXWQ)
+
+  Serializes execution of HCI commands.
+
+  Completes execution in case of response timeout.
+
+- HCI Rx worker (MSGRXWQ)
+
+  Dispatches incoming HCI commands or events.
+
+- Syscall context from a userspace call (SYSCALL)
+
+  Any entrypoint in HCI called from NFC Core
+
+Workflow executing an HCI command (using shdlc)
+-----------------------------------------------
+
+Executing an HCI command can easily be performed synchronously using the
+following API::
+
+  int nfc_hci_send_cmd (struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
+			const u8 *param, size_t param_len, struct sk_buff **skb)
+
+The API must be invoked from a context that can sleep. Most of the time, this
+will be the syscall context. skb will return the result that was received in
+the response.
+
+Internally, execution is asynchronous. So all this API does is to enqueue the
+HCI command, setup a local wait queue on stack, and wait_event() for completion.
+The wait is not interruptible because it is guaranteed that the command will
+complete after some short timeout anyway.
+
+MSGTXWQ context will then be scheduled and invoke nfc_hci_msg_tx_work().
+This function will dequeue the next pending command and send its HCP fragments
+to the lower layer which happens to be shdlc. It will then start a timer to be
+able to complete the command with a timeout error if no response arrive.
+
+SMW context gets scheduled and invokes nfc_shdlc_sm_work(). This function
+handles shdlc framing in and out. It uses the driver xmit to send frames and
+receives incoming frames in an skb queue filled from the driver IRQ handler.
+SHDLC I(nformation) frames payload are HCP fragments. They are aggregated to
+form complete HCI frames, which can be a response, command, or event.
+
+HCI Responses are dispatched immediately from this context to unblock
+waiting command execution. Response processing involves invoking the completion
+callback that was provided by nfc_hci_msg_tx_work() when it sent the command.
+The completion callback will then wake the syscall context.
+
+It is also possible to execute the command asynchronously using this API::
+
+  static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
+				       const u8 *param, size_t param_len,
+				       data_exchange_cb_t cb, void *cb_context)
+
+The workflow is the same, except that the API call returns immediately, and
+the callback will be called with the result from the SMW context.
+
+Workflow receiving an HCI event or command
+------------------------------------------
+
+HCI commands or events are not dispatched from SMW context. Instead, they are
+queued to HCI rx_queue and will be dispatched from HCI rx worker
+context (MSGRXWQ). This is done this way to allow a cmd or event handler
+to also execute other commands (for example, handling the
+NFC_HCI_EVT_TARGET_DISCOVERED event from PN544 requires to issue an
+ANY_GET_PARAMETER to the reader A gate to get information on the target
+that was discovered).
+
+Typically, such an event will be propagated to NFC Core from MSGRXWQ context.
+
+Error management
+----------------
+
+Errors that occur synchronously with the execution of an NFC Core request are
+simply returned as the execution result of the request. These are easy.
+
+Errors that occur asynchronously (e.g. in a background protocol handling thread)
+must be reported such that upper layers don't stay ignorant that something
+went wrong below and know that expected events will probably never happen.
+Handling of these errors is done as follows:
+
+- driver (pn544) fails to deliver an incoming frame: it stores the error such
+  that any subsequent call to the driver will result in this error. Then it
+  calls the standard nfc_shdlc_recv_frame() with a NULL argument to report the
+  problem above. shdlc stores a EREMOTEIO sticky status, which will trigger
+  SMW to report above in turn.
+
+- SMW is basically a background thread to handle incoming and outgoing shdlc
+  frames. This thread will also check the shdlc sticky status and report to HCI
+  when it discovers it is not able to run anymore because of an unrecoverable
+  error that happened within shdlc or below. If the problem occurs during shdlc
+  connection, the error is reported through the connect completion.
+
+- HCI: if an internal HCI error happens (frame is lost), or HCI is reported an
+  error from a lower layer, HCI will either complete the currently executing
+  command with that error, or notify NFC Core directly if no command is
+  executing.
+
+- NFC Core: when NFC Core is notified of an error from below and polling is
+  active, it will send a tag discovered event with an empty tag list to the user
+  space to let it know that the poll operation will never be able to detect a
+  tag. If polling is not active and the error was sticky, lower levels will
+  return it at next invocation.
diff --git a/Documentation/driver-api/nfc/nfc-pn544.rst b/Documentation/driver-api/nfc/nfc-pn544.rst
new file mode 100644
index 000000000000..6b2d8aae0c4e
--- /dev/null
+++ b/Documentation/driver-api/nfc/nfc-pn544.rst
@@ -0,0 +1,34 @@
+============================================================================
+Kernel driver for the NXP Semiconductors PN544 Near Field Communication chip
+============================================================================
+
+
+General
+-------
+
+The PN544 is an integrated transmission module for contactless
+communication. The driver goes under drives/nfc/ and is compiled as a
+module named "pn544".
+
+Host Interfaces: I2C, SPI and HSU, this driver supports currently only I2C.
+
+Protocols
+---------
+
+In the normal (HCI) mode and in the firmware update mode read and
+write functions behave a bit differently because the message formats
+or the protocols are different.
+
+In the normal (HCI) mode the protocol used is derived from the ETSI
+HCI specification. The firmware is updated using a specific protocol,
+which is different from HCI.
+
+HCI messages consist of an eight bit header and the message body. The
+header contains the message length. Maximum size for an HCI message is
+33. In HCI mode sent messages are tested for a correct
+checksum. Firmware update messages have the length in the second (MSB)
+and third (LSB) bytes of the message. The maximum FW message length is
+1024 bytes.
+
+For the ETSI HCI specification see
+http://www.etsi.org/WebSite/Technologies/ProtocolSpecification.aspx
diff --git a/Documentation/nfc/index.rst b/Documentation/nfc/index.rst
deleted file mode 100644
index 4f4947fce80d..000000000000
--- a/Documentation/nfc/index.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-:orphan:
-
-========================
-Near Field Communication
-========================
-
-.. toctree::
-   :maxdepth: 1
-
-   nfc-hci
-   nfc-pn544
diff --git a/Documentation/nfc/nfc-hci.rst b/Documentation/nfc/nfc-hci.rst
deleted file mode 100644
index eb8a1a14e919..000000000000
--- a/Documentation/nfc/nfc-hci.rst
+++ /dev/null
@@ -1,311 +0,0 @@
-========================
-HCI backend for NFC Core
-========================
-
-- Author: Eric Lapuyade, Samuel Ortiz
-- Contact: eric.lapuyade@intel.com, samuel.ortiz@intel.com
-
-General
--------
-
-The HCI layer implements much of the ETSI TS 102 622 V10.2.0 specification. It
-enables easy writing of HCI-based NFC drivers. The HCI layer runs as an NFC Core
-backend, implementing an abstract nfc device and translating NFC Core API
-to HCI commands and events.
-
-HCI
----
-
-HCI registers as an nfc device with NFC Core. Requests coming from userspace are
-routed through netlink sockets to NFC Core and then to HCI. From this point,
-they are translated in a sequence of HCI commands sent to the HCI layer in the
-host controller (the chip). Commands can be executed synchronously (the sending
-context blocks waiting for response) or asynchronously (the response is returned
-from HCI Rx context).
-HCI events can also be received from the host controller. They will be handled
-and a translation will be forwarded to NFC Core as needed. There are hooks to
-let the HCI driver handle proprietary events or override standard behavior.
-HCI uses 2 execution contexts:
-
-- one for executing commands : nfc_hci_msg_tx_work(). Only one command
-  can be executing at any given moment.
-- one for dispatching received events and commands : nfc_hci_msg_rx_work().
-
-HCI Session initialization
---------------------------
-
-The Session initialization is an HCI standard which must unfortunately
-support proprietary gates. This is the reason why the driver will pass a list
-of proprietary gates that must be part of the session. HCI will ensure all
-those gates have pipes connected when the hci device is set up.
-In case the chip supports pre-opened gates and pseudo-static pipes, the driver
-can pass that information to HCI core.
-
-HCI Gates and Pipes
--------------------
-
-A gate defines the 'port' where some service can be found. In order to access
-a service, one must create a pipe to that gate and open it. In this
-implementation, pipes are totally hidden. The public API only knows gates.
-This is consistent with the driver need to send commands to proprietary gates
-without knowing the pipe connected to it.
-
-Driver interface
-----------------
-
-A driver is generally written in two parts : the physical link management and
-the HCI management. This makes it easier to maintain a driver for a chip that
-can be connected using various phy (i2c, spi, ...)
-
-HCI Management
---------------
-
-A driver would normally register itself with HCI and provide the following
-entry points::
-
-  struct nfc_hci_ops {
-	int (*open)(struct nfc_hci_dev *hdev);
-	void (*close)(struct nfc_hci_dev *hdev);
-	int (*hci_ready) (struct nfc_hci_dev *hdev);
-	int (*xmit) (struct nfc_hci_dev *hdev, struct sk_buff *skb);
-	int (*start_poll) (struct nfc_hci_dev *hdev,
-			   u32 im_protocols, u32 tm_protocols);
-	int (*dep_link_up)(struct nfc_hci_dev *hdev, struct nfc_target *target,
-			   u8 comm_mode, u8 *gb, size_t gb_len);
-	int (*dep_link_down)(struct nfc_hci_dev *hdev);
-	int (*target_from_gate) (struct nfc_hci_dev *hdev, u8 gate,
-				 struct nfc_target *target);
-	int (*complete_target_discovered) (struct nfc_hci_dev *hdev, u8 gate,
-					   struct nfc_target *target);
-	int (*im_transceive) (struct nfc_hci_dev *hdev,
-			      struct nfc_target *target, struct sk_buff *skb,
-			      data_exchange_cb_t cb, void *cb_context);
-	int (*tm_send)(struct nfc_hci_dev *hdev, struct sk_buff *skb);
-	int (*check_presence)(struct nfc_hci_dev *hdev,
-			      struct nfc_target *target);
-	int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event,
-			      struct sk_buff *skb);
-  };
-
-- open() and close() shall turn the hardware on and off.
-- hci_ready() is an optional entry point that is called right after the hci
-  session has been set up. The driver can use it to do additional initialization
-  that must be performed using HCI commands.
-- xmit() shall simply write a frame to the physical link.
-- start_poll() is an optional entrypoint that shall set the hardware in polling
-  mode. This must be implemented only if the hardware uses proprietary gates or a
-  mechanism slightly different from the HCI standard.
-- dep_link_up() is called after a p2p target has been detected, to finish
-  the p2p connection setup with hardware parameters that need to be passed back
-  to nfc core.
-- dep_link_down() is called to bring the p2p link down.
-- target_from_gate() is an optional entrypoint to return the nfc protocols
-  corresponding to a proprietary gate.
-- complete_target_discovered() is an optional entry point to let the driver
-  perform additional proprietary processing necessary to auto activate the
-  discovered target.
-- im_transceive() must be implemented by the driver if proprietary HCI commands
-  are required to send data to the tag. Some tag types will require custom
-  commands, others can be written to using the standard HCI commands. The driver
-  can check the tag type and either do proprietary processing, or return 1 to ask
-  for standard processing. The data exchange command itself must be sent
-  asynchronously.
-- tm_send() is called to send data in the case of a p2p connection
-- check_presence() is an optional entry point that will be called regularly
-  by the core to check that an activated tag is still in the field. If this is
-  not implemented, the core will not be able to push tag_lost events to the user
-  space
-- event_received() is called to handle an event coming from the chip. Driver
-  can handle the event or return 1 to let HCI attempt standard processing.
-
-On the rx path, the driver is responsible to push incoming HCP frames to HCI
-using nfc_hci_recv_frame(). HCI will take care of re-aggregation and handling
-This must be done from a context that can sleep.
-
-PHY Management
---------------
-
-The physical link (i2c, ...) management is defined by the following structure::
-
-  struct nfc_phy_ops {
-	int (*write)(void *dev_id, struct sk_buff *skb);
-	int (*enable)(void *dev_id);
-	void (*disable)(void *dev_id);
-  };
-
-enable():
-	turn the phy on (power on), make it ready to transfer data
-disable():
-	turn the phy off
-write():
-	Send a data frame to the chip. Note that to enable higher
-	layers such as an llc to store the frame for re-emission, this
-	function must not alter the skb. It must also not return a positive
-	result (return 0 for success, negative for failure).
-
-Data coming from the chip shall be sent directly to nfc_hci_recv_frame().
-
-LLC
----
-
-Communication between the CPU and the chip often requires some link layer
-protocol. Those are isolated as modules managed by the HCI layer. There are
-currently two modules : nop (raw transfert) and shdlc.
-A new llc must implement the following functions::
-
-  struct nfc_llc_ops {
-	void *(*init) (struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
-		       rcv_to_hci_t rcv_to_hci, int tx_headroom,
-		       int tx_tailroom, int *rx_headroom, int *rx_tailroom,
-		       llc_failure_t llc_failure);
-	void (*deinit) (struct nfc_llc *llc);
-	int (*start) (struct nfc_llc *llc);
-	int (*stop) (struct nfc_llc *llc);
-	void (*rcv_from_drv) (struct nfc_llc *llc, struct sk_buff *skb);
-	int (*xmit_from_hci) (struct nfc_llc *llc, struct sk_buff *skb);
-  };
-
-init():
-	allocate and init your private storage
-deinit():
-	cleanup
-start():
-	establish the logical connection
-stop ():
-	terminate the logical connection
-rcv_from_drv():
-	handle data coming from the chip, going to HCI
-xmit_from_hci():
-	handle data sent by HCI, going to the chip
-
-The llc must be registered with nfc before it can be used. Do that by
-calling::
-
-	nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
-
-Again, note that the llc does not handle the physical link. It is thus very
-easy to mix any physical link with any llc for a given chip driver.
-
-Included Drivers
-----------------
-
-An HCI based driver for an NXP PN544, connected through I2C bus, and using
-shdlc is included.
-
-Execution Contexts
-------------------
-
-The execution contexts are the following:
-- IRQ handler (IRQH):
-fast, cannot sleep. sends incoming frames to HCI where they are passed to
-the current llc. In case of shdlc, the frame is queued in shdlc rx queue.
-
-- SHDLC State Machine worker (SMW)
-
-  Only when llc_shdlc is used: handles shdlc rx & tx queues.
-
-  Dispatches HCI cmd responses.
-
-- HCI Tx Cmd worker (MSGTXWQ)
-
-  Serializes execution of HCI commands.
-
-  Completes execution in case of response timeout.
-
-- HCI Rx worker (MSGRXWQ)
-
-  Dispatches incoming HCI commands or events.
-
-- Syscall context from a userspace call (SYSCALL)
-
-  Any entrypoint in HCI called from NFC Core
-
-Workflow executing an HCI command (using shdlc)
------------------------------------------------
-
-Executing an HCI command can easily be performed synchronously using the
-following API::
-
-  int nfc_hci_send_cmd (struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
-			const u8 *param, size_t param_len, struct sk_buff **skb)
-
-The API must be invoked from a context that can sleep. Most of the time, this
-will be the syscall context. skb will return the result that was received in
-the response.
-
-Internally, execution is asynchronous. So all this API does is to enqueue the
-HCI command, setup a local wait queue on stack, and wait_event() for completion.
-The wait is not interruptible because it is guaranteed that the command will
-complete after some short timeout anyway.
-
-MSGTXWQ context will then be scheduled and invoke nfc_hci_msg_tx_work().
-This function will dequeue the next pending command and send its HCP fragments
-to the lower layer which happens to be shdlc. It will then start a timer to be
-able to complete the command with a timeout error if no response arrive.
-
-SMW context gets scheduled and invokes nfc_shdlc_sm_work(). This function
-handles shdlc framing in and out. It uses the driver xmit to send frames and
-receives incoming frames in an skb queue filled from the driver IRQ handler.
-SHDLC I(nformation) frames payload are HCP fragments. They are aggregated to
-form complete HCI frames, which can be a response, command, or event.
-
-HCI Responses are dispatched immediately from this context to unblock
-waiting command execution. Response processing involves invoking the completion
-callback that was provided by nfc_hci_msg_tx_work() when it sent the command.
-The completion callback will then wake the syscall context.
-
-It is also possible to execute the command asynchronously using this API::
-
-  static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
-				       const u8 *param, size_t param_len,
-				       data_exchange_cb_t cb, void *cb_context)
-
-The workflow is the same, except that the API call returns immediately, and
-the callback will be called with the result from the SMW context.
-
-Workflow receiving an HCI event or command
-------------------------------------------
-
-HCI commands or events are not dispatched from SMW context. Instead, they are
-queued to HCI rx_queue and will be dispatched from HCI rx worker
-context (MSGRXWQ). This is done this way to allow a cmd or event handler
-to also execute other commands (for example, handling the
-NFC_HCI_EVT_TARGET_DISCOVERED event from PN544 requires to issue an
-ANY_GET_PARAMETER to the reader A gate to get information on the target
-that was discovered).
-
-Typically, such an event will be propagated to NFC Core from MSGRXWQ context.
-
-Error management
-----------------
-
-Errors that occur synchronously with the execution of an NFC Core request are
-simply returned as the execution result of the request. These are easy.
-
-Errors that occur asynchronously (e.g. in a background protocol handling thread)
-must be reported such that upper layers don't stay ignorant that something
-went wrong below and know that expected events will probably never happen.
-Handling of these errors is done as follows:
-
-- driver (pn544) fails to deliver an incoming frame: it stores the error such
-  that any subsequent call to the driver will result in this error. Then it
-  calls the standard nfc_shdlc_recv_frame() with a NULL argument to report the
-  problem above. shdlc stores a EREMOTEIO sticky status, which will trigger
-  SMW to report above in turn.
-
-- SMW is basically a background thread to handle incoming and outgoing shdlc
-  frames. This thread will also check the shdlc sticky status and report to HCI
-  when it discovers it is not able to run anymore because of an unrecoverable
-  error that happened within shdlc or below. If the problem occurs during shdlc
-  connection, the error is reported through the connect completion.
-
-- HCI: if an internal HCI error happens (frame is lost), or HCI is reported an
-  error from a lower layer, HCI will either complete the currently executing
-  command with that error, or notify NFC Core directly if no command is
-  executing.
-
-- NFC Core: when NFC Core is notified of an error from below and polling is
-  active, it will send a tag discovered event with an empty tag list to the user
-  space to let it know that the poll operation will never be able to detect a
-  tag. If polling is not active and the error was sticky, lower levels will
-  return it at next invocation.
diff --git a/Documentation/nfc/nfc-pn544.rst b/Documentation/nfc/nfc-pn544.rst
deleted file mode 100644
index 6b2d8aae0c4e..000000000000
--- a/Documentation/nfc/nfc-pn544.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-============================================================================
-Kernel driver for the NXP Semiconductors PN544 Near Field Communication chip
-============================================================================
-
-
-General
--------
-
-The PN544 is an integrated transmission module for contactless
-communication. The driver goes under drives/nfc/ and is compiled as a
-module named "pn544".
-
-Host Interfaces: I2C, SPI and HSU, this driver supports currently only I2C.
-
-Protocols
----------
-
-In the normal (HCI) mode and in the firmware update mode read and
-write functions behave a bit differently because the message formats
-or the protocols are different.
-
-In the normal (HCI) mode the protocol used is derived from the ETSI
-HCI specification. The firmware is updated using a specific protocol,
-which is different from HCI.
-
-HCI messages consist of an eight bit header and the message body. The
-header contains the message length. Maximum size for an HCI message is
-33. In HCI mode sent messages are tested for a correct
-checksum. Firmware update messages have the length in the second (MSB)
-and third (LSB) bytes of the message. The maximum FW message length is
-1024 bytes.
-
-For the ETSI HCI specification see
-http://www.etsi.org/WebSite/Technologies/ProtocolSpecification.aspx
-- 
cgit 


From 19024c09c243c5107f738286459a0dd85697b089 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 16:48:15 -0300
Subject: docs: mmc: move it to the driver-api

Most of the stuff here is related to the kAPI.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/driver-api/index.rst             |  1 +
 Documentation/driver-api/mmc/index.rst         | 11 +++
 Documentation/driver-api/mmc/mmc-async-req.rst | 98 ++++++++++++++++++++++++++
 Documentation/driver-api/mmc/mmc-dev-attrs.rst | 91 ++++++++++++++++++++++++
 Documentation/driver-api/mmc/mmc-dev-parts.rst | 41 +++++++++++
 Documentation/driver-api/mmc/mmc-tools.rst     | 37 ++++++++++
 Documentation/mmc/index.rst                    | 13 ----
 Documentation/mmc/mmc-async-req.rst            | 98 --------------------------
 Documentation/mmc/mmc-dev-attrs.rst            | 91 ------------------------
 Documentation/mmc/mmc-dev-parts.rst            | 41 -----------
 Documentation/mmc/mmc-tools.rst                | 37 ----------
 11 files changed, 279 insertions(+), 280 deletions(-)
 create mode 100644 Documentation/driver-api/mmc/index.rst
 create mode 100644 Documentation/driver-api/mmc/mmc-async-req.rst
 create mode 100644 Documentation/driver-api/mmc/mmc-dev-attrs.rst
 create mode 100644 Documentation/driver-api/mmc/mmc-dev-parts.rst
 create mode 100644 Documentation/driver-api/mmc/mmc-tools.rst
 delete mode 100644 Documentation/mmc/index.rst
 delete mode 100644 Documentation/mmc/mmc-async-req.rst
 delete mode 100644 Documentation/mmc/mmc-dev-attrs.rst
 delete mode 100644 Documentation/mmc/mmc-dev-parts.rst
 delete mode 100644 Documentation/mmc/mmc-tools.rst

(limited to 'Documentation')

diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index d6bf4a37cefe..25f85d3021aa 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -45,6 +45,7 @@ available subsections can be seen below.
    miscellaneous
    mei/index
    mtd/index
+   mmc/index
    nvdimm/index
    w1
    rapidio/index
diff --git a/Documentation/driver-api/mmc/index.rst b/Documentation/driver-api/mmc/index.rst
new file mode 100644
index 000000000000..9aaf64951a8c
--- /dev/null
+++ b/Documentation/driver-api/mmc/index.rst
@@ -0,0 +1,11 @@
+========================
+MMC/SD/SDIO card support
+========================
+
+.. toctree::
+   :maxdepth: 1
+
+   mmc-dev-attrs
+   mmc-dev-parts
+   mmc-async-req
+   mmc-tools
diff --git a/Documentation/driver-api/mmc/mmc-async-req.rst b/Documentation/driver-api/mmc/mmc-async-req.rst
new file mode 100644
index 000000000000..0f7197c9c3b5
--- /dev/null
+++ b/Documentation/driver-api/mmc/mmc-async-req.rst
@@ -0,0 +1,98 @@
+========================
+MMC Asynchronous Request
+========================
+
+Rationale
+=========
+
+How significant is the cache maintenance overhead?
+
+It depends. Fast eMMC and multiple cache levels with speculative cache
+pre-fetch makes the cache overhead relatively significant. If the DMA
+preparations for the next request are done in parallel with the current
+transfer, the DMA preparation overhead would not affect the MMC performance.
+
+The intention of non-blocking (asynchronous) MMC requests is to minimize the
+time between when an MMC request ends and another MMC request begins.
+
+Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and
+dma_unmap_sg are processing. Using non-blocking MMC requests makes it
+possible to prepare the caches for next job in parallel with an active
+MMC request.
+
+MMC block driver
+================
+
+The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking.
+
+The increase in throughput is proportional to the time it takes to
+prepare (major part of preparations are dma_map_sg() and dma_unmap_sg())
+a request and how fast the memory is. The faster the MMC/SD is the
+more significant the prepare request time becomes. Roughly the expected
+performance gain is 5% for large writes and 10% on large reads on a L2 cache
+platform. In power save mode, when clocks run on a lower frequency, the DMA
+preparation may cost even more. As long as these slower preparations are run
+in parallel with the transfer performance won't be affected.
+
+Details on measurements from IOZone and mmc_test
+================================================
+
+https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req
+
+MMC core API extension
+======================
+
+There is one new public function mmc_start_req().
+
+It starts a new MMC command request for a host. The function isn't
+truly non-blocking. If there is an ongoing async request it waits
+for completion of that request and starts the new one and returns. It
+doesn't wait for the new request to complete. If there is no ongoing
+request it starts the new request and returns immediately.
+
+MMC host extensions
+===================
+
+There are two optional members in the mmc_host_ops -- pre_req() and
+post_req() -- that the host driver may implement in order to move work
+to before and after the actual mmc_host_ops.request() function is called.
+
+In the DMA case pre_req() may do dma_map_sg() and prepare the DMA
+descriptor, and post_req() runs the dma_unmap_sg().
+
+Optimize for the first request
+==============================
+
+The first request in a series of requests can't be prepared in parallel
+with the previous transfer, since there is no previous request.
+
+The argument is_first_req in pre_req() indicates that there is no previous
+request. The host driver may optimize for this scenario to minimize
+the performance loss. A way to optimize for this is to split the current
+request in two chunks, prepare the first chunk and start the request,
+and finally prepare the second chunk and start the transfer.
+
+Pseudocode to handle is_first_req scenario with minimal prepare overhead::
+
+  if (is_first_req && req->size > threshold)
+     /* start MMC transfer for the complete transfer size */
+     mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
+
+     /*
+      * Begin to prepare DMA while cmd is being processed by MMC.
+      * The first chunk of the request should take the same time
+      * to prepare as the "MMC process command time".
+      * If prepare time exceeds MMC cmd time
+      * the transfer is delayed, guesstimate max 4k as first chunk size.
+      */
+      prepare_1st_chunk_for_dma(req);
+      /* flush pending desc to the DMAC (dmaengine.h) */
+      dma_issue_pending(req->dma_desc);
+
+      prepare_2nd_chunk_for_dma(req);
+      /*
+       * The second issue_pending should be called before MMC runs out
+       * of the first chunk. If the MMC runs out of the first data chunk
+       * before this call, the transfer is delayed.
+       */
+      dma_issue_pending(req->dma_desc);
diff --git a/Documentation/driver-api/mmc/mmc-dev-attrs.rst b/Documentation/driver-api/mmc/mmc-dev-attrs.rst
new file mode 100644
index 000000000000..4f44b1b730d6
--- /dev/null
+++ b/Documentation/driver-api/mmc/mmc-dev-attrs.rst
@@ -0,0 +1,91 @@
+==================================
+SD and MMC Block Device Attributes
+==================================
+
+These attributes are defined for the block devices associated with the
+SD or MMC device.
+
+The following attributes are read/write.
+
+	========		===============================================
+	force_ro		Enforce read-only access even if write protect 					switch is off.
+	========		===============================================
+
+SD and MMC Device Attributes
+============================
+
+All attributes are read-only.
+
+	======================	===============================================
+	cid			Card Identification Register
+	csd			Card Specific Data Register
+	scr			SD Card Configuration Register (SD only)
+	date			Manufacturing Date (from CID Register)
+	fwrev			Firmware/Product Revision (from CID Register)
+				(SD and MMCv1 only)
+	hwrev			Hardware/Product Revision (from CID Register)
+				(SD and MMCv1 only)
+	manfid			Manufacturer ID (from CID Register)
+	name			Product Name (from CID Register)
+	oemid			OEM/Application ID (from CID Register)
+	prv			Product Revision (from CID Register)
+				(SD and MMCv4 only)
+	serial			Product Serial Number (from CID Register)
+	erase_size		Erase group size
+	preferred_erase_size	Preferred erase size
+	raw_rpmb_size_mult	RPMB partition size
+	rel_sectors		Reliable write sector count
+	ocr 			Operation Conditions Register
+	dsr			Driver Stage Register
+	cmdq_en			Command Queue enabled:
+
+					1 => enabled, 0 => not enabled
+	======================	===============================================
+
+Note on Erase Size and Preferred Erase Size:
+
+	"erase_size" is the  minimum size, in bytes, of an erase
+	operation.  For MMC, "erase_size" is the erase group size
+	reported by the card.  Note that "erase_size" does not apply
+	to trim or secure trim operations where the minimum size is
+	always one 512 byte sector.  For SD, "erase_size" is 512
+	if the card is block-addressed, 0 otherwise.
+
+	SD/MMC cards can erase an arbitrarily large area up to and
+	including the whole card.  When erasing a large area it may
+	be desirable to do it in smaller chunks for three reasons:
+
+	     1. A single erase command will make all other I/O on
+		the card wait.  This is not a problem if the whole card
+		is being erased, but erasing one partition will make
+		I/O for another partition on the same card wait for the
+		duration of the erase - which could be a several
+		minutes.
+	     2. To be able to inform the user of erase progress.
+	     3. The erase timeout becomes too large to be very
+		useful.  Because the erase timeout contains a margin
+		which is multiplied by the size of the erase area,
+		the value can end up being several minutes for large
+		areas.
+
+	"erase_size" is not the most efficient unit to erase
+	(especially for SD where it is just one sector),
+	hence "preferred_erase_size" provides a good chunk
+	size for erasing large areas.
+
+	For MMC, "preferred_erase_size" is the high-capacity
+	erase size if a card specifies one, otherwise it is
+	based on the capacity of the card.
+
+	For SD, "preferred_erase_size" is the allocation unit
+	size specified by the card.
+
+	"preferred_erase_size" is in bytes.
+
+Note on raw_rpmb_size_mult:
+
+	"raw_rpmb_size_mult" is a multiple of 128kB block.
+
+	RPMB size in byte is calculated by using the following equation:
+
+		RPMB partition size = 128kB x raw_rpmb_size_mult
diff --git a/Documentation/driver-api/mmc/mmc-dev-parts.rst b/Documentation/driver-api/mmc/mmc-dev-parts.rst
new file mode 100644
index 000000000000..995922f1f744
--- /dev/null
+++ b/Documentation/driver-api/mmc/mmc-dev-parts.rst
@@ -0,0 +1,41 @@
+============================
+SD and MMC Device Partitions
+============================
+
+Device partitions are additional logical block devices present on the
+SD/MMC device.
+
+As of this writing, MMC boot partitions as supported and exposed as
+/dev/mmcblkXboot0 and /dev/mmcblkXboot1, where X is the index of the
+parent /dev/mmcblkX.
+
+MMC Boot Partitions
+===================
+
+Read and write access is provided to the two MMC boot partitions. Due to
+the sensitive nature of the boot partition contents, which often store
+a bootloader or bootloader configuration tables crucial to booting the
+platform, write access is disabled by default to reduce the chance of
+accidental bricking.
+
+To enable write access to /dev/mmcblkXbootY, disable the forced read-only
+access with::
+
+	echo 0 > /sys/block/mmcblkXbootY/force_ro
+
+To re-enable read-only access::
+
+	echo 1 > /sys/block/mmcblkXbootY/force_ro
+
+The boot partitions can also be locked read only until the next power on,
+with::
+
+	echo 1 > /sys/block/mmcblkXbootY/ro_lock_until_next_power_on
+
+This is a feature of the card and not of the kernel. If the card does
+not support boot partition locking, the file will not exist. If the
+feature has been disabled on the card, the file will be read-only.
+
+The boot partitions can also be locked permanently, but this feature is
+not accessible through sysfs in order to avoid accidental or malicious
+bricking.
diff --git a/Documentation/driver-api/mmc/mmc-tools.rst b/Documentation/driver-api/mmc/mmc-tools.rst
new file mode 100644
index 000000000000..54406093768b
--- /dev/null
+++ b/Documentation/driver-api/mmc/mmc-tools.rst
@@ -0,0 +1,37 @@
+======================
+MMC tools introduction
+======================
+
+There is one MMC test tools called mmc-utils, which is maintained by Chris Ball,
+you can find it at the below public git repository:
+
+	http://git.kernel.org/cgit/linux/kernel/git/cjb/mmc-utils.git/
+
+Functions
+=========
+
+The mmc-utils tools can do the following:
+
+ - Print and parse extcsd data.
+ - Determine the eMMC writeprotect status.
+ - Set the eMMC writeprotect status.
+ - Set the eMMC data sector size to 4KB by disabling emulation.
+ - Create general purpose partition.
+ - Enable the enhanced user area.
+ - Enable write reliability per partition.
+ - Print the response to STATUS_SEND (CMD13).
+ - Enable the boot partition.
+ - Set Boot Bus Conditions.
+ - Enable the eMMC BKOPS feature.
+ - Permanently enable the eMMC H/W Reset feature.
+ - Permanently disable the eMMC H/W Reset feature.
+ - Send Sanitize command.
+ - Program authentication key for the device.
+ - Counter value for the rpmb device will be read to stdout.
+ - Read from rpmb device to output.
+ - Write to rpmb device from data file.
+ - Enable the eMMC cache feature.
+ - Disable the eMMC cache feature.
+ - Print and parse CID data.
+ - Print and parse CSD data.
+ - Print and parse SCR data.
diff --git a/Documentation/mmc/index.rst b/Documentation/mmc/index.rst
deleted file mode 100644
index 3305478ddadb..000000000000
--- a/Documentation/mmc/index.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-:orphan:
-
-========================
-MMC/SD/SDIO card support
-========================
-
-.. toctree::
-   :maxdepth: 1
-
-   mmc-dev-attrs
-   mmc-dev-parts
-   mmc-async-req
-   mmc-tools
diff --git a/Documentation/mmc/mmc-async-req.rst b/Documentation/mmc/mmc-async-req.rst
deleted file mode 100644
index 0f7197c9c3b5..000000000000
--- a/Documentation/mmc/mmc-async-req.rst
+++ /dev/null
@@ -1,98 +0,0 @@
-========================
-MMC Asynchronous Request
-========================
-
-Rationale
-=========
-
-How significant is the cache maintenance overhead?
-
-It depends. Fast eMMC and multiple cache levels with speculative cache
-pre-fetch makes the cache overhead relatively significant. If the DMA
-preparations for the next request are done in parallel with the current
-transfer, the DMA preparation overhead would not affect the MMC performance.
-
-The intention of non-blocking (asynchronous) MMC requests is to minimize the
-time between when an MMC request ends and another MMC request begins.
-
-Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and
-dma_unmap_sg are processing. Using non-blocking MMC requests makes it
-possible to prepare the caches for next job in parallel with an active
-MMC request.
-
-MMC block driver
-================
-
-The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking.
-
-The increase in throughput is proportional to the time it takes to
-prepare (major part of preparations are dma_map_sg() and dma_unmap_sg())
-a request and how fast the memory is. The faster the MMC/SD is the
-more significant the prepare request time becomes. Roughly the expected
-performance gain is 5% for large writes and 10% on large reads on a L2 cache
-platform. In power save mode, when clocks run on a lower frequency, the DMA
-preparation may cost even more. As long as these slower preparations are run
-in parallel with the transfer performance won't be affected.
-
-Details on measurements from IOZone and mmc_test
-================================================
-
-https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req
-
-MMC core API extension
-======================
-
-There is one new public function mmc_start_req().
-
-It starts a new MMC command request for a host. The function isn't
-truly non-blocking. If there is an ongoing async request it waits
-for completion of that request and starts the new one and returns. It
-doesn't wait for the new request to complete. If there is no ongoing
-request it starts the new request and returns immediately.
-
-MMC host extensions
-===================
-
-There are two optional members in the mmc_host_ops -- pre_req() and
-post_req() -- that the host driver may implement in order to move work
-to before and after the actual mmc_host_ops.request() function is called.
-
-In the DMA case pre_req() may do dma_map_sg() and prepare the DMA
-descriptor, and post_req() runs the dma_unmap_sg().
-
-Optimize for the first request
-==============================
-
-The first request in a series of requests can't be prepared in parallel
-with the previous transfer, since there is no previous request.
-
-The argument is_first_req in pre_req() indicates that there is no previous
-request. The host driver may optimize for this scenario to minimize
-the performance loss. A way to optimize for this is to split the current
-request in two chunks, prepare the first chunk and start the request,
-and finally prepare the second chunk and start the transfer.
-
-Pseudocode to handle is_first_req scenario with minimal prepare overhead::
-
-  if (is_first_req && req->size > threshold)
-     /* start MMC transfer for the complete transfer size */
-     mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
-
-     /*
-      * Begin to prepare DMA while cmd is being processed by MMC.
-      * The first chunk of the request should take the same time
-      * to prepare as the "MMC process command time".
-      * If prepare time exceeds MMC cmd time
-      * the transfer is delayed, guesstimate max 4k as first chunk size.
-      */
-      prepare_1st_chunk_for_dma(req);
-      /* flush pending desc to the DMAC (dmaengine.h) */
-      dma_issue_pending(req->dma_desc);
-
-      prepare_2nd_chunk_for_dma(req);
-      /*
-       * The second issue_pending should be called before MMC runs out
-       * of the first chunk. If the MMC runs out of the first data chunk
-       * before this call, the transfer is delayed.
-       */
-      dma_issue_pending(req->dma_desc);
diff --git a/Documentation/mmc/mmc-dev-attrs.rst b/Documentation/mmc/mmc-dev-attrs.rst
deleted file mode 100644
index 4f44b1b730d6..000000000000
--- a/Documentation/mmc/mmc-dev-attrs.rst
+++ /dev/null
@@ -1,91 +0,0 @@
-==================================
-SD and MMC Block Device Attributes
-==================================
-
-These attributes are defined for the block devices associated with the
-SD or MMC device.
-
-The following attributes are read/write.
-
-	========		===============================================
-	force_ro		Enforce read-only access even if write protect 					switch is off.
-	========		===============================================
-
-SD and MMC Device Attributes
-============================
-
-All attributes are read-only.
-
-	======================	===============================================
-	cid			Card Identification Register
-	csd			Card Specific Data Register
-	scr			SD Card Configuration Register (SD only)
-	date			Manufacturing Date (from CID Register)
-	fwrev			Firmware/Product Revision (from CID Register)
-				(SD and MMCv1 only)
-	hwrev			Hardware/Product Revision (from CID Register)
-				(SD and MMCv1 only)
-	manfid			Manufacturer ID (from CID Register)
-	name			Product Name (from CID Register)
-	oemid			OEM/Application ID (from CID Register)
-	prv			Product Revision (from CID Register)
-				(SD and MMCv4 only)
-	serial			Product Serial Number (from CID Register)
-	erase_size		Erase group size
-	preferred_erase_size	Preferred erase size
-	raw_rpmb_size_mult	RPMB partition size
-	rel_sectors		Reliable write sector count
-	ocr 			Operation Conditions Register
-	dsr			Driver Stage Register
-	cmdq_en			Command Queue enabled:
-
-					1 => enabled, 0 => not enabled
-	======================	===============================================
-
-Note on Erase Size and Preferred Erase Size:
-
-	"erase_size" is the  minimum size, in bytes, of an erase
-	operation.  For MMC, "erase_size" is the erase group size
-	reported by the card.  Note that "erase_size" does not apply
-	to trim or secure trim operations where the minimum size is
-	always one 512 byte sector.  For SD, "erase_size" is 512
-	if the card is block-addressed, 0 otherwise.
-
-	SD/MMC cards can erase an arbitrarily large area up to and
-	including the whole card.  When erasing a large area it may
-	be desirable to do it in smaller chunks for three reasons:
-
-	     1. A single erase command will make all other I/O on
-		the card wait.  This is not a problem if the whole card
-		is being erased, but erasing one partition will make
-		I/O for another partition on the same card wait for the
-		duration of the erase - which could be a several
-		minutes.
-	     2. To be able to inform the user of erase progress.
-	     3. The erase timeout becomes too large to be very
-		useful.  Because the erase timeout contains a margin
-		which is multiplied by the size of the erase area,
-		the value can end up being several minutes for large
-		areas.
-
-	"erase_size" is not the most efficient unit to erase
-	(especially for SD where it is just one sector),
-	hence "preferred_erase_size" provides a good chunk
-	size for erasing large areas.
-
-	For MMC, "preferred_erase_size" is the high-capacity
-	erase size if a card specifies one, otherwise it is
-	based on the capacity of the card.
-
-	For SD, "preferred_erase_size" is the allocation unit
-	size specified by the card.
-
-	"preferred_erase_size" is in bytes.
-
-Note on raw_rpmb_size_mult:
-
-	"raw_rpmb_size_mult" is a multiple of 128kB block.
-
-	RPMB size in byte is calculated by using the following equation:
-
-		RPMB partition size = 128kB x raw_rpmb_size_mult
diff --git a/Documentation/mmc/mmc-dev-parts.rst b/Documentation/mmc/mmc-dev-parts.rst
deleted file mode 100644
index 995922f1f744..000000000000
--- a/Documentation/mmc/mmc-dev-parts.rst
+++ /dev/null
@@ -1,41 +0,0 @@
-============================
-SD and MMC Device Partitions
-============================
-
-Device partitions are additional logical block devices present on the
-SD/MMC device.
-
-As of this writing, MMC boot partitions as supported and exposed as
-/dev/mmcblkXboot0 and /dev/mmcblkXboot1, where X is the index of the
-parent /dev/mmcblkX.
-
-MMC Boot Partitions
-===================
-
-Read and write access is provided to the two MMC boot partitions. Due to
-the sensitive nature of the boot partition contents, which often store
-a bootloader or bootloader configuration tables crucial to booting the
-platform, write access is disabled by default to reduce the chance of
-accidental bricking.
-
-To enable write access to /dev/mmcblkXbootY, disable the forced read-only
-access with::
-
-	echo 0 > /sys/block/mmcblkXbootY/force_ro
-
-To re-enable read-only access::
-
-	echo 1 > /sys/block/mmcblkXbootY/force_ro
-
-The boot partitions can also be locked read only until the next power on,
-with::
-
-	echo 1 > /sys/block/mmcblkXbootY/ro_lock_until_next_power_on
-
-This is a feature of the card and not of the kernel. If the card does
-not support boot partition locking, the file will not exist. If the
-feature has been disabled on the card, the file will be read-only.
-
-The boot partitions can also be locked permanently, but this feature is
-not accessible through sysfs in order to avoid accidental or malicious
-bricking.
diff --git a/Documentation/mmc/mmc-tools.rst b/Documentation/mmc/mmc-tools.rst
deleted file mode 100644
index 54406093768b..000000000000
--- a/Documentation/mmc/mmc-tools.rst
+++ /dev/null
@@ -1,37 +0,0 @@
-======================
-MMC tools introduction
-======================
-
-There is one MMC test tools called mmc-utils, which is maintained by Chris Ball,
-you can find it at the below public git repository:
-
-	http://git.kernel.org/cgit/linux/kernel/git/cjb/mmc-utils.git/
-
-Functions
-=========
-
-The mmc-utils tools can do the following:
-
- - Print and parse extcsd data.
- - Determine the eMMC writeprotect status.
- - Set the eMMC writeprotect status.
- - Set the eMMC data sector size to 4KB by disabling emulation.
- - Create general purpose partition.
- - Enable the enhanced user area.
- - Enable write reliability per partition.
- - Print the response to STATUS_SEND (CMD13).
- - Enable the boot partition.
- - Set Boot Bus Conditions.
- - Enable the eMMC BKOPS feature.
- - Permanently enable the eMMC H/W Reset feature.
- - Permanently disable the eMMC H/W Reset feature.
- - Send Sanitize command.
- - Program authentication key for the device.
- - Counter value for the rpmb device will be read to stdout.
- - Read from rpmb device to output.
- - Write to rpmb device from data file.
- - Enable the eMMC cache feature.
- - Disable the eMMC cache feature.
- - Print and parse CID data.
- - Print and parse CSD data.
- - Print and parse SCR data.
-- 
cgit 


From c0b11a50aee643ac40ded5dbcd48189ee0926ee4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 16:50:07 -0300
Subject: docs: md: move it to the driver-api book

The docs there were meant to be read by a Kernel developer.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/driver-api/index.rst          |   1 +
 Documentation/driver-api/md/index.rst       |  10 +
 Documentation/driver-api/md/md-cluster.rst  | 385 ++++++++++++++++++++++++++++
 Documentation/driver-api/md/raid5-cache.rst | 111 ++++++++
 Documentation/driver-api/md/raid5-ppl.rst   |  47 ++++
 Documentation/md/index.rst                  |  12 -
 Documentation/md/md-cluster.rst             | 385 ----------------------------
 Documentation/md/raid5-cache.rst            | 111 --------
 Documentation/md/raid5-ppl.rst              |  47 ----
 9 files changed, 554 insertions(+), 555 deletions(-)
 create mode 100644 Documentation/driver-api/md/index.rst
 create mode 100644 Documentation/driver-api/md/md-cluster.rst
 create mode 100644 Documentation/driver-api/md/raid5-cache.rst
 create mode 100644 Documentation/driver-api/md/raid5-ppl.rst
 delete mode 100644 Documentation/md/index.rst
 delete mode 100644 Documentation/md/md-cluster.rst
 delete mode 100644 Documentation/md/raid5-cache.rst
 delete mode 100644 Documentation/md/raid5-ppl.rst

(limited to 'Documentation')

diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 25f85d3021aa..b5179bf2ada2 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -56,6 +56,7 @@ available subsections can be seen below.
    firmware/index
    pinctl
    gpio/index
+   md/index
    misc_devices
    nfc/index
    dmaengine/index
diff --git a/Documentation/driver-api/md/index.rst b/Documentation/driver-api/md/index.rst
new file mode 100644
index 000000000000..205080891a1a
--- /dev/null
+++ b/Documentation/driver-api/md/index.rst
@@ -0,0 +1,10 @@
+====
+RAID
+====
+
+.. toctree::
+   :maxdepth: 1
+
+   md-cluster
+   raid5-cache
+   raid5-ppl
diff --git a/Documentation/driver-api/md/md-cluster.rst b/Documentation/driver-api/md/md-cluster.rst
new file mode 100644
index 000000000000..96eb52cec7eb
--- /dev/null
+++ b/Documentation/driver-api/md/md-cluster.rst
@@ -0,0 +1,385 @@
+==========
+MD Cluster
+==========
+
+The cluster MD is a shared-device RAID for a cluster, it supports
+two levels: raid1 and raid10 (limited support).
+
+
+1. On-disk format
+=================
+
+Separate write-intent-bitmaps are used for each cluster node.
+The bitmaps record all writes that may have been started on that node,
+and may not yet have finished. The on-disk layout is::
+
+  0                    4k                     8k                    12k
+  -------------------------------------------------------------------
+  | idle                | md super            | bm super [0] + bits |
+  | bm bits[0, contd]   | bm super[1] + bits  | bm bits[1, contd]   |
+  | bm super[2] + bits  | bm bits [2, contd]  | bm super[3] + bits  |
+  | bm bits [3, contd]  |                     |                     |
+
+During "normal" functioning we assume the filesystem ensures that only
+one node writes to any given block at a time, so a write request will
+
+ - set the appropriate bit (if not already set)
+ - commit the write to all mirrors
+ - schedule the bit to be cleared after a timeout.
+
+Reads are just handled normally. It is up to the filesystem to ensure
+one node doesn't read from a location where another node (or the same
+node) is writing.
+
+
+2. DLM Locks for management
+===========================
+
+There are three groups of locks for managing the device:
+
+2.1 Bitmap lock resource (bm_lockres)
+-------------------------------------
+
+ The bm_lockres protects individual node bitmaps. They are named in
+ the form bitmap000 for node 1, bitmap001 for node 2 and so on. When a
+ node joins the cluster, it acquires the lock in PW mode and it stays
+ so during the lifetime the node is part of the cluster. The lock
+ resource number is based on the slot number returned by the DLM
+ subsystem. Since DLM starts node count from one and bitmap slots
+ start from zero, one is subtracted from the DLM slot number to arrive
+ at the bitmap slot number.
+
+ The LVB of the bitmap lock for a particular node records the range
+ of sectors that are being re-synced by that node.  No other
+ node may write to those sectors.  This is used when a new nodes
+ joins the cluster.
+
+2.2 Message passing locks
+-------------------------
+
+ Each node has to communicate with other nodes when starting or ending
+ resync, and for metadata superblock updates.  This communication is
+ managed through three locks: "token", "message", and "ack", together
+ with the Lock Value Block (LVB) of one of the "message" lock.
+
+2.3 new-device management
+-------------------------
+
+ A single lock: "no-new-dev" is used to co-ordinate the addition of
+ new devices - this must be synchronized across the array.
+ Normally all nodes hold a concurrent-read lock on this device.
+
+3. Communication
+================
+
+ Messages can be broadcast to all nodes, and the sender waits for all
+ other nodes to acknowledge the message before proceeding.  Only one
+ message can be processed at a time.
+
+3.1 Message Types
+-----------------
+
+ There are six types of messages which are passed:
+
+3.1.1 METADATA_UPDATED
+^^^^^^^^^^^^^^^^^^^^^^
+
+   informs other nodes that the metadata has
+   been updated, and the node must re-read the md superblock. This is
+   performed synchronously. It is primarily used to signal device
+   failure.
+
+3.1.2 RESYNCING
+^^^^^^^^^^^^^^^
+   informs other nodes that a resync is initiated or
+   ended so that each node may suspend or resume the region.  Each
+   RESYNCING message identifies a range of the devices that the
+   sending node is about to resync. This overrides any previous
+   notification from that node: only one ranged can be resynced at a
+   time per-node.
+
+3.1.3 NEWDISK
+^^^^^^^^^^^^^
+
+   informs other nodes that a device is being added to
+   the array. Message contains an identifier for that device.  See
+   below for further details.
+
+3.1.4 REMOVE
+^^^^^^^^^^^^
+
+   A failed or spare device is being removed from the
+   array. The slot-number of the device is included in the message.
+
+ 3.1.5 RE_ADD:
+
+   A failed device is being re-activated - the assumption
+   is that it has been determined to be working again.
+
+ 3.1.6 BITMAP_NEEDS_SYNC:
+
+   If a node is stopped locally but the bitmap
+   isn't clean, then another node is informed to take the ownership of
+   resync.
+
+3.2 Communication mechanism
+---------------------------
+
+ The DLM LVB is used to communicate within nodes of the cluster. There
+ are three resources used for the purpose:
+
+3.2.1 token
+^^^^^^^^^^^
+   The resource which protects the entire communication
+   system. The node having the token resource is allowed to
+   communicate.
+
+3.2.2 message
+^^^^^^^^^^^^^
+   The lock resource which carries the data to communicate.
+
+3.2.3 ack
+^^^^^^^^^
+
+   The resource, acquiring which means the message has been
+   acknowledged by all nodes in the cluster. The BAST of the resource
+   is used to inform the receiving node that a node wants to
+   communicate.
+
+The algorithm is:
+
+ 1. receive status - all nodes have concurrent-reader lock on "ack"::
+
+	sender                         receiver                 receiver
+	"ack":CR                       "ack":CR                 "ack":CR
+
+ 2. sender get EX on "token",
+    sender get EX on "message"::
+
+	sender                        receiver                 receiver
+	"token":EX                    "ack":CR                 "ack":CR
+	"message":EX
+	"ack":CR
+
+    Sender checks that it still needs to send a message. Messages
+    received or other events that happened while waiting for the
+    "token" may have made this message inappropriate or redundant.
+
+ 3. sender writes LVB
+
+    sender down-convert "message" from EX to CW
+
+    sender try to get EX of "ack"
+
+    ::
+
+      [ wait until all receivers have *processed* the "message" ]
+
+                                       [ triggered by bast of "ack" ]
+                                       receiver get CR on "message"
+                                       receiver read LVB
+                                       receiver processes the message
+                                       [ wait finish ]
+                                       receiver releases "ack"
+                                       receiver tries to get PR on "message"
+
+     sender                         receiver                  receiver
+     "token":EX                     "message":CR              "message":CR
+     "message":CW
+     "ack":EX
+
+ 4. triggered by grant of EX on "ack" (indicating all receivers
+    have processed message)
+
+    sender down-converts "ack" from EX to CR
+
+    sender releases "message"
+
+    sender releases "token"
+
+    ::
+
+                                 receiver upconvert to PR on "message"
+                                 receiver get CR of "ack"
+                                 receiver release "message"
+
+     sender                      receiver                   receiver
+     "ack":CR                    "ack":CR                   "ack":CR
+
+
+4. Handling Failures
+====================
+
+4.1 Node Failure
+----------------
+
+ When a node fails, the DLM informs the cluster with the slot
+ number. The node starts a cluster recovery thread. The cluster
+ recovery thread:
+
+	- acquires the bitmap<number> lock of the failed node
+	- opens the bitmap
+	- reads the bitmap of the failed node
+	- copies the set bitmap to local node
+	- cleans the bitmap of the failed node
+	- releases bitmap<number> lock of the failed node
+	- initiates resync of the bitmap on the current node
+	  md_check_recovery is invoked within recover_bitmaps,
+	  then md_check_recovery -> metadata_update_start/finish,
+	  it will lock the communication by lock_comm.
+	  Which means when one node is resyncing it blocks all
+	  other nodes from writing anywhere on the array.
+
+ The resync process is the regular md resync. However, in a clustered
+ environment when a resync is performed, it needs to tell other nodes
+ of the areas which are suspended. Before a resync starts, the node
+ send out RESYNCING with the (lo,hi) range of the area which needs to
+ be suspended. Each node maintains a suspend_list, which contains the
+ list of ranges which are currently suspended. On receiving RESYNCING,
+ the node adds the range to the suspend_list. Similarly, when the node
+ performing resync finishes, it sends RESYNCING with an empty range to
+ other nodes and other nodes remove the corresponding entry from the
+ suspend_list.
+
+ A helper function, ->area_resyncing() can be used to check if a
+ particular I/O range should be suspended or not.
+
+4.2 Device Failure
+==================
+
+ Device failures are handled and communicated with the metadata update
+ routine.  When a node detects a device failure it does not allow
+ any further writes to that device until the failure has been
+ acknowledged by all other nodes.
+
+5. Adding a new Device
+----------------------
+
+ For adding a new device, it is necessary that all nodes "see" the new
+ device to be added. For this, the following algorithm is used:
+
+   1.  Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
+       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CLUSTER_ADD)
+   2.  Node 1 sends a NEWDISK message with uuid and slot number
+   3.  Other nodes issue kobject_uevent_env with uuid and slot number
+       (Steps 4,5 could be a udev rule)
+   4.  In userspace, the node searches for the disk, perhaps
+       using blkid -t SUB_UUID=""
+   5.  Other nodes issue either of the following depending on whether
+       the disk was found:
+       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and
+       disc.number set to slot number)
+       ioctl(CLUSTERED_DISK_NACK)
+   6.  Other nodes drop lock on "no-new-devs" (CR) if device is found
+   7.  Node 1 attempts EX lock on "no-new-dev"
+   8.  If node 1 gets the lock, it sends METADATA_UPDATED after
+       unmarking the disk as SpareLocal
+   9.  If not (get "no-new-dev" lock), it fails the operation and sends
+       METADATA_UPDATED.
+   10. Other nodes get the information whether a disk is added or not
+       by the following METADATA_UPDATED.
+
+6. Module interface
+===================
+
+ There are 17 call-backs which the md core can make to the cluster
+ module.  Understanding these can give a good overview of the whole
+ process.
+
+6.1 join(nodes) and leave()
+---------------------------
+
+ These are called when an array is started with a clustered bitmap,
+ and when the array is stopped.  join() ensures the cluster is
+ available and initializes the various resources.
+ Only the first 'nodes' nodes in the cluster can use the array.
+
+6.2 slot_number()
+-----------------
+
+ Reports the slot number advised by the cluster infrastructure.
+ Range is from 0 to nodes-1.
+
+6.3 resync_info_update()
+------------------------
+
+ This updates the resync range that is stored in the bitmap lock.
+ The starting point is updated as the resync progresses.  The
+ end point is always the end of the array.
+ It does *not* send a RESYNCING message.
+
+6.4 resync_start(), resync_finish()
+-----------------------------------
+
+ These are called when resync/recovery/reshape starts or stops.
+ They update the resyncing range in the bitmap lock and also
+ send a RESYNCING message.  resync_start reports the whole
+ array as resyncing, resync_finish reports none of it.
+
+ resync_finish() also sends a BITMAP_NEEDS_SYNC message which
+ allows some other node to take over.
+
+6.5 metadata_update_start(), metadata_update_finish(), metadata_update_cancel()
+-------------------------------------------------------------------------------
+
+ metadata_update_start is used to get exclusive access to
+ the metadata.  If a change is still needed once that access is
+ gained, metadata_update_finish() will send a METADATA_UPDATE
+ message to all other nodes, otherwise metadata_update_cancel()
+ can be used to release the lock.
+
+6.6 area_resyncing()
+--------------------
+
+ This combines two elements of functionality.
+
+ Firstly, it will check if any node is currently resyncing
+ anything in a given range of sectors.  If any resync is found,
+ then the caller will avoid writing or read-balancing in that
+ range.
+
+ Secondly, while node recovery is happening it reports that
+ all areas are resyncing for READ requests.  This avoids races
+ between the cluster-filesystem and the cluster-RAID handling
+ a node failure.
+
+6.7 add_new_disk_start(), add_new_disk_finish(), new_disk_ack()
+---------------------------------------------------------------
+
+ These are used to manage the new-disk protocol described above.
+ When a new device is added, add_new_disk_start() is called before
+ it is bound to the array and, if that succeeds, add_new_disk_finish()
+ is called the device is fully added.
+
+ When a device is added in acknowledgement to a previous
+ request, or when the device is declared "unavailable",
+ new_disk_ack() is called.
+
+6.8 remove_disk()
+-----------------
+
+ This is called when a spare or failed device is removed from
+ the array.  It causes a REMOVE message to be send to other nodes.
+
+6.9 gather_bitmaps()
+--------------------
+
+ This sends a RE_ADD message to all other nodes and then
+ gathers bitmap information from all bitmaps.  This combined
+ bitmap is then used to recovery the re-added device.
+
+6.10 lock_all_bitmaps() and unlock_all_bitmaps()
+------------------------------------------------
+
+ These are called when change bitmap to none. If a node plans
+ to clear the cluster raid's bitmap, it need to make sure no other
+ nodes are using the raid which is achieved by lock all bitmap
+ locks within the cluster, and also those locks are unlocked
+ accordingly.
+
+7. Unsupported features
+=======================
+
+There are somethings which are not supported by cluster MD yet.
+
+- change array_sectors.
diff --git a/Documentation/driver-api/md/raid5-cache.rst b/Documentation/driver-api/md/raid5-cache.rst
new file mode 100644
index 000000000000..d7a15f44a7c3
--- /dev/null
+++ b/Documentation/driver-api/md/raid5-cache.rst
@@ -0,0 +1,111 @@
+================
+RAID 4/5/6 cache
+================
+
+Raid 4/5/6 could include an extra disk for data cache besides normal RAID
+disks. The role of RAID disks isn't changed with the cache disk. The cache disk
+caches data to the RAID disks. The cache can be in write-through (supported
+since 4.4) or write-back mode (supported since 4.10). mdadm (supported since
+3.4) has a new option '--write-journal' to create array with cache. Please
+refer to mdadm manual for details. By default (RAID array starts), the cache is
+in write-through mode. A user can switch it to write-back mode by::
+
+	echo "write-back" > /sys/block/md0/md/journal_mode
+
+And switch it back to write-through mode by::
+
+	echo "write-through" > /sys/block/md0/md/journal_mode
+
+In both modes, all writes to the array will hit cache disk first. This means
+the cache disk must be fast and sustainable.
+
+write-through mode
+==================
+
+This mode mainly fixes the 'write hole' issue. For RAID 4/5/6 array, an unclean
+shutdown can cause data in some stripes to not be in consistent state, eg, data
+and parity don't match. The reason is that a stripe write involves several RAID
+disks and it's possible the writes don't hit all RAID disks yet before the
+unclean shutdown. We call an array degraded if it has inconsistent data. MD
+tries to resync the array to bring it back to normal state. But before the
+resync completes, any system crash will expose the chance of real data
+corruption in the RAID array. This problem is called 'write hole'.
+
+The write-through cache will cache all data on cache disk first. After the data
+is safe on the cache disk, the data will be flushed onto RAID disks. The
+two-step write will guarantee MD can recover correct data after unclean
+shutdown even the array is degraded. Thus the cache can close the 'write hole'.
+
+In write-through mode, MD reports IO completion to upper layer (usually
+filesystems) after the data is safe on RAID disks, so cache disk failure
+doesn't cause data loss. Of course cache disk failure means the array is
+exposed to 'write hole' again.
+
+In write-through mode, the cache disk isn't required to be big. Several
+hundreds megabytes are enough.
+
+write-back mode
+===============
+
+write-back mode fixes the 'write hole' issue too, since all write data is
+cached on cache disk. But the main goal of 'write-back' cache is to speed up
+write. If a write crosses all RAID disks of a stripe, we call it full-stripe
+write. For non-full-stripe writes, MD must read old data before the new parity
+can be calculated. These synchronous reads hurt write throughput. Some writes
+which are sequential but not dispatched in the same time will suffer from this
+overhead too. Write-back cache will aggregate the data and flush the data to
+RAID disks only after the data becomes a full stripe write. This will
+completely avoid the overhead, so it's very helpful for some workloads. A
+typical workload which does sequential write followed by fsync is an example.
+
+In write-back mode, MD reports IO completion to upper layer (usually
+filesystems) right after the data hits cache disk. The data is flushed to raid
+disks later after specific conditions met. So cache disk failure will cause
+data loss.
+
+In write-back mode, MD also caches data in memory. The memory cache includes
+the same data stored on cache disk, so a power loss doesn't cause data loss.
+The memory cache size has performance impact for the array. It's recommended
+the size is big. A user can configure the size by::
+
+	echo "2048" > /sys/block/md0/md/stripe_cache_size
+
+Too small cache disk will make the write aggregation less efficient in this
+mode depending on the workloads. It's recommended to use a cache disk with at
+least several gigabytes size in write-back mode.
+
+The implementation
+==================
+
+The write-through and write-back cache use the same disk format. The cache disk
+is organized as a simple write log. The log consists of 'meta data' and 'data'
+pairs. The meta data describes the data. It also includes checksum and sequence
+ID for recovery identification. Data can be IO data and parity data. Data is
+checksumed too. The checksum is stored in the meta data ahead of the data. The
+checksum is an optimization because MD can write meta and data freely without
+worry about the order. MD superblock has a field pointed to the valid meta data
+of log head.
+
+The log implementation is pretty straightforward. The difficult part is the
+order in which MD writes data to cache disk and RAID disks. Specifically, in
+write-through mode, MD calculates parity for IO data, writes both IO data and
+parity to the log, writes the data and parity to RAID disks after the data and
+parity is settled down in log and finally the IO is finished. Read just reads
+from raid disks as usual.
+
+In write-back mode, MD writes IO data to the log and reports IO completion. The
+data is also fully cached in memory at that time, which means read must query
+memory cache. If some conditions are met, MD will flush the data to RAID disks.
+MD will calculate parity for the data and write parity into the log. After this
+is finished, MD will write both data and parity into RAID disks, then MD can
+release the memory cache. The flush conditions could be stripe becomes a full
+stripe write, free cache disk space is low or free in-kernel memory cache space
+is low.
+
+After an unclean shutdown, MD does recovery. MD reads all meta data and data
+from the log. The sequence ID and checksum will help us detect corrupted meta
+data and data. If MD finds a stripe with data and valid parities (1 parity for
+raid4/5 and 2 for raid6), MD will write the data and parities to RAID disks. If
+parities are incompleted, they are discarded. If part of data is corrupted,
+they are discarded too. MD then loads valid data and writes them to RAID disks
+in normal way.
diff --git a/Documentation/driver-api/md/raid5-ppl.rst b/Documentation/driver-api/md/raid5-ppl.rst
new file mode 100644
index 000000000000..357e5515bc55
--- /dev/null
+++ b/Documentation/driver-api/md/raid5-ppl.rst
@@ -0,0 +1,47 @@
+==================
+Partial Parity Log
+==================
+
+Partial Parity Log (PPL) is a feature available for RAID5 arrays. The issue
+addressed by PPL is that after a dirty shutdown, parity of a particular stripe
+may become inconsistent with data on other member disks. If the array is also
+in degraded state, there is no way to recalculate parity, because one of the
+disks is missing. This can lead to silent data corruption when rebuilding the
+array or using it is as degraded - data calculated from parity for array blocks
+that have not been touched by a write request during the unclean shutdown can
+be incorrect. Such condition is known as the RAID5 Write Hole. Because of
+this, md by default does not allow starting a dirty degraded array.
+
+Partial parity for a write operation is the XOR of stripe data chunks not
+modified by this write. It is just enough data needed for recovering from the
+write hole. XORing partial parity with the modified chunks produces parity for
+the stripe, consistent with its state before the write operation, regardless of
+which chunk writes have completed. If one of the not modified data disks of
+this stripe is missing, this updated parity can be used to recover its
+contents. PPL recovery is also performed when starting an array after an
+unclean shutdown and all disks are available, eliminating the need to resync
+the array. Because of this, using write-intent bitmap and PPL together is not
+supported.
+
+When handling a write request PPL writes partial parity before new data and
+parity are dispatched to disks. PPL is a distributed log - it is stored on
+array member drives in the metadata area, on the parity drive of a particular
+stripe.  It does not require a dedicated journaling drive. Write performance is
+reduced by up to 30%-40% but it scales with the number of drives in the array
+and the journaling drive does not become a bottleneck or a single point of
+failure.
+
+Unlike raid5-cache, the other solution in md for closing the write hole, PPL is
+not a true journal. It does not protect from losing in-flight data, only from
+silent data corruption. If a dirty disk of a stripe is lost, no PPL recovery is
+performed for this stripe (parity is not updated). So it is possible to have
+arbitrary data in the written part of a stripe if that disk is lost. In such
+case the behavior is the same as in plain raid5.
+
+PPL is available for md version-1 metadata and external (specifically IMSM)
+metadata arrays. It can be enabled using mdadm option --consistency-policy=ppl.
+
+There is a limitation of maximum 64 disks in the array for PPL. It allows to
+keep data structures and implementation simple. RAID5 arrays with so many disks
+are not likely due to high risk of multiple disks failure. Such restriction
+should not be a real life limitation.
diff --git a/Documentation/md/index.rst b/Documentation/md/index.rst
deleted file mode 100644
index c4db34ed327d..000000000000
--- a/Documentation/md/index.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-:orphan:
-
-====
-RAID
-====
-
-.. toctree::
-   :maxdepth: 1
-
-   md-cluster
-   raid5-cache
-   raid5-ppl
diff --git a/Documentation/md/md-cluster.rst b/Documentation/md/md-cluster.rst
deleted file mode 100644
index 96eb52cec7eb..000000000000
--- a/Documentation/md/md-cluster.rst
+++ /dev/null
@@ -1,385 +0,0 @@
-==========
-MD Cluster
-==========
-
-The cluster MD is a shared-device RAID for a cluster, it supports
-two levels: raid1 and raid10 (limited support).
-
-
-1. On-disk format
-=================
-
-Separate write-intent-bitmaps are used for each cluster node.
-The bitmaps record all writes that may have been started on that node,
-and may not yet have finished. The on-disk layout is::
-
-  0                    4k                     8k                    12k
-  -------------------------------------------------------------------
-  | idle                | md super            | bm super [0] + bits |
-  | bm bits[0, contd]   | bm super[1] + bits  | bm bits[1, contd]   |
-  | bm super[2] + bits  | bm bits [2, contd]  | bm super[3] + bits  |
-  | bm bits [3, contd]  |                     |                     |
-
-During "normal" functioning we assume the filesystem ensures that only
-one node writes to any given block at a time, so a write request will
-
- - set the appropriate bit (if not already set)
- - commit the write to all mirrors
- - schedule the bit to be cleared after a timeout.
-
-Reads are just handled normally. It is up to the filesystem to ensure
-one node doesn't read from a location where another node (or the same
-node) is writing.
-
-
-2. DLM Locks for management
-===========================
-
-There are three groups of locks for managing the device:
-
-2.1 Bitmap lock resource (bm_lockres)
--------------------------------------
-
- The bm_lockres protects individual node bitmaps. They are named in
- the form bitmap000 for node 1, bitmap001 for node 2 and so on. When a
- node joins the cluster, it acquires the lock in PW mode and it stays
- so during the lifetime the node is part of the cluster. The lock
- resource number is based on the slot number returned by the DLM
- subsystem. Since DLM starts node count from one and bitmap slots
- start from zero, one is subtracted from the DLM slot number to arrive
- at the bitmap slot number.
-
- The LVB of the bitmap lock for a particular node records the range
- of sectors that are being re-synced by that node.  No other
- node may write to those sectors.  This is used when a new nodes
- joins the cluster.
-
-2.2 Message passing locks
--------------------------
-
- Each node has to communicate with other nodes when starting or ending
- resync, and for metadata superblock updates.  This communication is
- managed through three locks: "token", "message", and "ack", together
- with the Lock Value Block (LVB) of one of the "message" lock.
-
-2.3 new-device management
--------------------------
-
- A single lock: "no-new-dev" is used to co-ordinate the addition of
- new devices - this must be synchronized across the array.
- Normally all nodes hold a concurrent-read lock on this device.
-
-3. Communication
-================
-
- Messages can be broadcast to all nodes, and the sender waits for all
- other nodes to acknowledge the message before proceeding.  Only one
- message can be processed at a time.
-
-3.1 Message Types
------------------
-
- There are six types of messages which are passed:
-
-3.1.1 METADATA_UPDATED
-^^^^^^^^^^^^^^^^^^^^^^
-
-   informs other nodes that the metadata has
-   been updated, and the node must re-read the md superblock. This is
-   performed synchronously. It is primarily used to signal device
-   failure.
-
-3.1.2 RESYNCING
-^^^^^^^^^^^^^^^
-   informs other nodes that a resync is initiated or
-   ended so that each node may suspend or resume the region.  Each
-   RESYNCING message identifies a range of the devices that the
-   sending node is about to resync. This overrides any previous
-   notification from that node: only one ranged can be resynced at a
-   time per-node.
-
-3.1.3 NEWDISK
-^^^^^^^^^^^^^
-
-   informs other nodes that a device is being added to
-   the array. Message contains an identifier for that device.  See
-   below for further details.
-
-3.1.4 REMOVE
-^^^^^^^^^^^^
-
-   A failed or spare device is being removed from the
-   array. The slot-number of the device is included in the message.
-
- 3.1.5 RE_ADD:
-
-   A failed device is being re-activated - the assumption
-   is that it has been determined to be working again.
-
- 3.1.6 BITMAP_NEEDS_SYNC:
-
-   If a node is stopped locally but the bitmap
-   isn't clean, then another node is informed to take the ownership of
-   resync.
-
-3.2 Communication mechanism
----------------------------
-
- The DLM LVB is used to communicate within nodes of the cluster. There
- are three resources used for the purpose:
-
-3.2.1 token
-^^^^^^^^^^^
-   The resource which protects the entire communication
-   system. The node having the token resource is allowed to
-   communicate.
-
-3.2.2 message
-^^^^^^^^^^^^^
-   The lock resource which carries the data to communicate.
-
-3.2.3 ack
-^^^^^^^^^
-
-   The resource, acquiring which means the message has been
-   acknowledged by all nodes in the cluster. The BAST of the resource
-   is used to inform the receiving node that a node wants to
-   communicate.
-
-The algorithm is:
-
- 1. receive status - all nodes have concurrent-reader lock on "ack"::
-
-	sender                         receiver                 receiver
-	"ack":CR                       "ack":CR                 "ack":CR
-
- 2. sender get EX on "token",
-    sender get EX on "message"::
-
-	sender                        receiver                 receiver
-	"token":EX                    "ack":CR                 "ack":CR
-	"message":EX
-	"ack":CR
-
-    Sender checks that it still needs to send a message. Messages
-    received or other events that happened while waiting for the
-    "token" may have made this message inappropriate or redundant.
-
- 3. sender writes LVB
-
-    sender down-convert "message" from EX to CW
-
-    sender try to get EX of "ack"
-
-    ::
-
-      [ wait until all receivers have *processed* the "message" ]
-
-                                       [ triggered by bast of "ack" ]
-                                       receiver get CR on "message"
-                                       receiver read LVB
-                                       receiver processes the message
-                                       [ wait finish ]
-                                       receiver releases "ack"
-                                       receiver tries to get PR on "message"
-
-     sender                         receiver                  receiver
-     "token":EX                     "message":CR              "message":CR
-     "message":CW
-     "ack":EX
-
- 4. triggered by grant of EX on "ack" (indicating all receivers
-    have processed message)
-
-    sender down-converts "ack" from EX to CR
-
-    sender releases "message"
-
-    sender releases "token"
-
-    ::
-
-                                 receiver upconvert to PR on "message"
-                                 receiver get CR of "ack"
-                                 receiver release "message"
-
-     sender                      receiver                   receiver
-     "ack":CR                    "ack":CR                   "ack":CR
-
-
-4. Handling Failures
-====================
-
-4.1 Node Failure
-----------------
-
- When a node fails, the DLM informs the cluster with the slot
- number. The node starts a cluster recovery thread. The cluster
- recovery thread:
-
-	- acquires the bitmap<number> lock of the failed node
-	- opens the bitmap
-	- reads the bitmap of the failed node
-	- copies the set bitmap to local node
-	- cleans the bitmap of the failed node
-	- releases bitmap<number> lock of the failed node
-	- initiates resync of the bitmap on the current node
-	  md_check_recovery is invoked within recover_bitmaps,
-	  then md_check_recovery -> metadata_update_start/finish,
-	  it will lock the communication by lock_comm.
-	  Which means when one node is resyncing it blocks all
-	  other nodes from writing anywhere on the array.
-
- The resync process is the regular md resync. However, in a clustered
- environment when a resync is performed, it needs to tell other nodes
- of the areas which are suspended. Before a resync starts, the node
- send out RESYNCING with the (lo,hi) range of the area which needs to
- be suspended. Each node maintains a suspend_list, which contains the
- list of ranges which are currently suspended. On receiving RESYNCING,
- the node adds the range to the suspend_list. Similarly, when the node
- performing resync finishes, it sends RESYNCING with an empty range to
- other nodes and other nodes remove the corresponding entry from the
- suspend_list.
-
- A helper function, ->area_resyncing() can be used to check if a
- particular I/O range should be suspended or not.
-
-4.2 Device Failure
-==================
-
- Device failures are handled and communicated with the metadata update
- routine.  When a node detects a device failure it does not allow
- any further writes to that device until the failure has been
- acknowledged by all other nodes.
-
-5. Adding a new Device
-----------------------
-
- For adding a new device, it is necessary that all nodes "see" the new
- device to be added. For this, the following algorithm is used:
-
-   1.  Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
-       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CLUSTER_ADD)
-   2.  Node 1 sends a NEWDISK message with uuid and slot number
-   3.  Other nodes issue kobject_uevent_env with uuid and slot number
-       (Steps 4,5 could be a udev rule)
-   4.  In userspace, the node searches for the disk, perhaps
-       using blkid -t SUB_UUID=""
-   5.  Other nodes issue either of the following depending on whether
-       the disk was found:
-       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and
-       disc.number set to slot number)
-       ioctl(CLUSTERED_DISK_NACK)
-   6.  Other nodes drop lock on "no-new-devs" (CR) if device is found
-   7.  Node 1 attempts EX lock on "no-new-dev"
-   8.  If node 1 gets the lock, it sends METADATA_UPDATED after
-       unmarking the disk as SpareLocal
-   9.  If not (get "no-new-dev" lock), it fails the operation and sends
-       METADATA_UPDATED.
-   10. Other nodes get the information whether a disk is added or not
-       by the following METADATA_UPDATED.
-
-6. Module interface
-===================
-
- There are 17 call-backs which the md core can make to the cluster
- module.  Understanding these can give a good overview of the whole
- process.
-
-6.1 join(nodes) and leave()
----------------------------
-
- These are called when an array is started with a clustered bitmap,
- and when the array is stopped.  join() ensures the cluster is
- available and initializes the various resources.
- Only the first 'nodes' nodes in the cluster can use the array.
-
-6.2 slot_number()
------------------
-
- Reports the slot number advised by the cluster infrastructure.
- Range is from 0 to nodes-1.
-
-6.3 resync_info_update()
-------------------------
-
- This updates the resync range that is stored in the bitmap lock.
- The starting point is updated as the resync progresses.  The
- end point is always the end of the array.
- It does *not* send a RESYNCING message.
-
-6.4 resync_start(), resync_finish()
------------------------------------
-
- These are called when resync/recovery/reshape starts or stops.
- They update the resyncing range in the bitmap lock and also
- send a RESYNCING message.  resync_start reports the whole
- array as resyncing, resync_finish reports none of it.
-
- resync_finish() also sends a BITMAP_NEEDS_SYNC message which
- allows some other node to take over.
-
-6.5 metadata_update_start(), metadata_update_finish(), metadata_update_cancel()
--------------------------------------------------------------------------------
-
- metadata_update_start is used to get exclusive access to
- the metadata.  If a change is still needed once that access is
- gained, metadata_update_finish() will send a METADATA_UPDATE
- message to all other nodes, otherwise metadata_update_cancel()
- can be used to release the lock.
-
-6.6 area_resyncing()
---------------------
-
- This combines two elements of functionality.
-
- Firstly, it will check if any node is currently resyncing
- anything in a given range of sectors.  If any resync is found,
- then the caller will avoid writing or read-balancing in that
- range.
-
- Secondly, while node recovery is happening it reports that
- all areas are resyncing for READ requests.  This avoids races
- between the cluster-filesystem and the cluster-RAID handling
- a node failure.
-
-6.7 add_new_disk_start(), add_new_disk_finish(), new_disk_ack()
----------------------------------------------------------------
-
- These are used to manage the new-disk protocol described above.
- When a new device is added, add_new_disk_start() is called before
- it is bound to the array and, if that succeeds, add_new_disk_finish()
- is called the device is fully added.
-
- When a device is added in acknowledgement to a previous
- request, or when the device is declared "unavailable",
- new_disk_ack() is called.
-
-6.8 remove_disk()
------------------
-
- This is called when a spare or failed device is removed from
- the array.  It causes a REMOVE message to be send to other nodes.
-
-6.9 gather_bitmaps()
---------------------
-
- This sends a RE_ADD message to all other nodes and then
- gathers bitmap information from all bitmaps.  This combined
- bitmap is then used to recovery the re-added device.
-
-6.10 lock_all_bitmaps() and unlock_all_bitmaps()
-------------------------------------------------
-
- These are called when change bitmap to none. If a node plans
- to clear the cluster raid's bitmap, it need to make sure no other
- nodes are using the raid which is achieved by lock all bitmap
- locks within the cluster, and also those locks are unlocked
- accordingly.
-
-7. Unsupported features
-=======================
-
-There are somethings which are not supported by cluster MD yet.
-
-- change array_sectors.
diff --git a/Documentation/md/raid5-cache.rst b/Documentation/md/raid5-cache.rst
deleted file mode 100644
index d7a15f44a7c3..000000000000
--- a/Documentation/md/raid5-cache.rst
+++ /dev/null
@@ -1,111 +0,0 @@
-================
-RAID 4/5/6 cache
-================
-
-Raid 4/5/6 could include an extra disk for data cache besides normal RAID
-disks. The role of RAID disks isn't changed with the cache disk. The cache disk
-caches data to the RAID disks. The cache can be in write-through (supported
-since 4.4) or write-back mode (supported since 4.10). mdadm (supported since
-3.4) has a new option '--write-journal' to create array with cache. Please
-refer to mdadm manual for details. By default (RAID array starts), the cache is
-in write-through mode. A user can switch it to write-back mode by::
-
-	echo "write-back" > /sys/block/md0/md/journal_mode
-
-And switch it back to write-through mode by::
-
-	echo "write-through" > /sys/block/md0/md/journal_mode
-
-In both modes, all writes to the array will hit cache disk first. This means
-the cache disk must be fast and sustainable.
-
-write-through mode
-==================
-
-This mode mainly fixes the 'write hole' issue. For RAID 4/5/6 array, an unclean
-shutdown can cause data in some stripes to not be in consistent state, eg, data
-and parity don't match. The reason is that a stripe write involves several RAID
-disks and it's possible the writes don't hit all RAID disks yet before the
-unclean shutdown. We call an array degraded if it has inconsistent data. MD
-tries to resync the array to bring it back to normal state. But before the
-resync completes, any system crash will expose the chance of real data
-corruption in the RAID array. This problem is called 'write hole'.
-
-The write-through cache will cache all data on cache disk first. After the data
-is safe on the cache disk, the data will be flushed onto RAID disks. The
-two-step write will guarantee MD can recover correct data after unclean
-shutdown even the array is degraded. Thus the cache can close the 'write hole'.
-
-In write-through mode, MD reports IO completion to upper layer (usually
-filesystems) after the data is safe on RAID disks, so cache disk failure
-doesn't cause data loss. Of course cache disk failure means the array is
-exposed to 'write hole' again.
-
-In write-through mode, the cache disk isn't required to be big. Several
-hundreds megabytes are enough.
-
-write-back mode
-===============
-
-write-back mode fixes the 'write hole' issue too, since all write data is
-cached on cache disk. But the main goal of 'write-back' cache is to speed up
-write. If a write crosses all RAID disks of a stripe, we call it full-stripe
-write. For non-full-stripe writes, MD must read old data before the new parity
-can be calculated. These synchronous reads hurt write throughput. Some writes
-which are sequential but not dispatched in the same time will suffer from this
-overhead too. Write-back cache will aggregate the data and flush the data to
-RAID disks only after the data becomes a full stripe write. This will
-completely avoid the overhead, so it's very helpful for some workloads. A
-typical workload which does sequential write followed by fsync is an example.
-
-In write-back mode, MD reports IO completion to upper layer (usually
-filesystems) right after the data hits cache disk. The data is flushed to raid
-disks later after specific conditions met. So cache disk failure will cause
-data loss.
-
-In write-back mode, MD also caches data in memory. The memory cache includes
-the same data stored on cache disk, so a power loss doesn't cause data loss.
-The memory cache size has performance impact for the array. It's recommended
-the size is big. A user can configure the size by::
-
-	echo "2048" > /sys/block/md0/md/stripe_cache_size
-
-Too small cache disk will make the write aggregation less efficient in this
-mode depending on the workloads. It's recommended to use a cache disk with at
-least several gigabytes size in write-back mode.
-
-The implementation
-==================
-
-The write-through and write-back cache use the same disk format. The cache disk
-is organized as a simple write log. The log consists of 'meta data' and 'data'
-pairs. The meta data describes the data. It also includes checksum and sequence
-ID for recovery identification. Data can be IO data and parity data. Data is
-checksumed too. The checksum is stored in the meta data ahead of the data. The
-checksum is an optimization because MD can write meta and data freely without
-worry about the order. MD superblock has a field pointed to the valid meta data
-of log head.
-
-The log implementation is pretty straightforward. The difficult part is the
-order in which MD writes data to cache disk and RAID disks. Specifically, in
-write-through mode, MD calculates parity for IO data, writes both IO data and
-parity to the log, writes the data and parity to RAID disks after the data and
-parity is settled down in log and finally the IO is finished. Read just reads
-from raid disks as usual.
-
-In write-back mode, MD writes IO data to the log and reports IO completion. The
-data is also fully cached in memory at that time, which means read must query
-memory cache. If some conditions are met, MD will flush the data to RAID disks.
-MD will calculate parity for the data and write parity into the log. After this
-is finished, MD will write both data and parity into RAID disks, then MD can
-release the memory cache. The flush conditions could be stripe becomes a full
-stripe write, free cache disk space is low or free in-kernel memory cache space
-is low.
-
-After an unclean shutdown, MD does recovery. MD reads all meta data and data
-from the log. The sequence ID and checksum will help us detect corrupted meta
-data and data. If MD finds a stripe with data and valid parities (1 parity for
-raid4/5 and 2 for raid6), MD will write the data and parities to RAID disks. If
-parities are incompleted, they are discarded. If part of data is corrupted,
-they are discarded too. MD then loads valid data and writes them to RAID disks
-in normal way.
diff --git a/Documentation/md/raid5-ppl.rst b/Documentation/md/raid5-ppl.rst
deleted file mode 100644
index 357e5515bc55..000000000000
--- a/Documentation/md/raid5-ppl.rst
+++ /dev/null
@@ -1,47 +0,0 @@
-==================
-Partial Parity Log
-==================
-
-Partial Parity Log (PPL) is a feature available for RAID5 arrays. The issue
-addressed by PPL is that after a dirty shutdown, parity of a particular stripe
-may become inconsistent with data on other member disks. If the array is also
-in degraded state, there is no way to recalculate parity, because one of the
-disks is missing. This can lead to silent data corruption when rebuilding the
-array or using it is as degraded - data calculated from parity for array blocks
-that have not been touched by a write request during the unclean shutdown can
-be incorrect. Such condition is known as the RAID5 Write Hole. Because of
-this, md by default does not allow starting a dirty degraded array.
-
-Partial parity for a write operation is the XOR of stripe data chunks not
-modified by this write. It is just enough data needed for recovering from the
-write hole. XORing partial parity with the modified chunks produces parity for
-the stripe, consistent with its state before the write operation, regardless of
-which chunk writes have completed. If one of the not modified data disks of
-this stripe is missing, this updated parity can be used to recover its
-contents. PPL recovery is also performed when starting an array after an
-unclean shutdown and all disks are available, eliminating the need to resync
-the array. Because of this, using write-intent bitmap and PPL together is not
-supported.
-
-When handling a write request PPL writes partial parity before new data and
-parity are dispatched to disks. PPL is a distributed log - it is stored on
-array member drives in the metadata area, on the parity drive of a particular
-stripe.  It does not require a dedicated journaling drive. Write performance is
-reduced by up to 30%-40% but it scales with the number of drives in the array
-and the journaling drive does not become a bottleneck or a single point of
-failure.
-
-Unlike raid5-cache, the other solution in md for closing the write hole, PPL is
-not a true journal. It does not protect from losing in-flight data, only from
-silent data corruption. If a dirty disk of a stripe is lost, no PPL recovery is
-performed for this stripe (parity is not updated). So it is possible to have
-arbitrary data in the written part of a stripe if that disk is lost. In such
-case the behavior is the same as in plain raid5.
-
-PPL is available for md version-1 metadata and external (specifically IMSM)
-metadata arrays. It can be enabled using mdadm option --consistency-policy=ppl.
-
-There is a limitation of maximum 64 disks in the array for PPL. It allows to
-keep data structures and implementation simple. RAID5 arrays with so many disks
-are not likely due to high risk of multiple disks failure. Such restriction
-should not be a real life limitation.
-- 
cgit 


From 09fdc957ad0d0ee83c00cd1e0c3a605047f63bf7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 16:51:34 -0300
Subject: docs: leds: add it to the driver-api book

The contents of leds driver docs is messy: it has lots of
admin-guide stuff and kernel internal ones, just like other
driver subsystems.

I'm opting to keep the dir at the same place and just add
a link to it. This makes clearer that this require changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/index.rst      | 1 +
 Documentation/leds/index.rst | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/index.rst b/Documentation/index.rst
index c6934d90363c..c4f9610b6167 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -91,6 +91,7 @@ needed).
 
    driver-api/index
    core-api/index
+   leds/index
    media/index
    networking/index
    input/index
diff --git a/Documentation/leds/index.rst b/Documentation/leds/index.rst
index 9885f7c1b75d..060f4e485897 100644
--- a/Documentation/leds/index.rst
+++ b/Documentation/leds/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ====
 LEDs
-- 
cgit 


From 616b81db2fa757f48895242ea6aaf3c1a1ad22f4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 17:13:24 -0300
Subject: docs: ioctl: add it to the uAPI guide

While 100% of its contents is userspace, let's keep the dir
at the same place, as this is a well-known location.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/index.rst              | 1 +
 Documentation/ioctl/index.rst        | 2 +-
 Documentation/ioctl/ioctl-number.rst | 2 --
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/index.rst b/Documentation/index.rst
index c4f9610b6167..864daf8805a4 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -56,6 +56,7 @@ the kernel interface as seen by application developers.
    :maxdepth: 2
 
    userspace-api/index
+   ioctl/index
 
 
 Introduction to kernel development
diff --git a/Documentation/ioctl/index.rst b/Documentation/ioctl/index.rst
index 1a6f437566e3..0f0a857f6615 100644
--- a/Documentation/ioctl/index.rst
+++ b/Documentation/ioctl/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ======
 IOCTLs
diff --git a/Documentation/ioctl/ioctl-number.rst b/Documentation/ioctl/ioctl-number.rst
index fcf9623a599f..7f8dcae7a230 100644
--- a/Documentation/ioctl/ioctl-number.rst
+++ b/Documentation/ioctl/ioctl-number.rst
@@ -1,5 +1,3 @@
-:orphan:
-
 =============
 Ioctl Numbers
 =============
-- 
cgit 


From 9b1f44028ff2e051816517781153e10a2d748dc3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 17:15:10 -0300
Subject: docs: interconnect.rst: add it to the driver-api guide

This is intended for Kernel hackers audience.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Reviewed-by: Georgi Djakov <georgi.djakov@linaro.org>
---
 Documentation/driver-api/index.rst          |  1 +
 Documentation/driver-api/interconnect.rst   | 93 ++++++++++++++++++++++++++++
 Documentation/interconnect/interconnect.rst | 95 -----------------------------
 3 files changed, 94 insertions(+), 95 deletions(-)
 create mode 100644 Documentation/driver-api/interconnect.rst
 delete mode 100644 Documentation/interconnect/interconnect.rst

(limited to 'Documentation')

diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index b5179bf2ada2..baa77a666e46 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -36,6 +36,7 @@ available subsections can be seen below.
    i2c
    ipmb
    i3c/index
+   interconnect
    hsi
    edac
    scsi
diff --git a/Documentation/driver-api/interconnect.rst b/Documentation/driver-api/interconnect.rst
new file mode 100644
index 000000000000..c3e004893796
--- /dev/null
+++ b/Documentation/driver-api/interconnect.rst
@@ -0,0 +1,93 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+GENERIC SYSTEM INTERCONNECT SUBSYSTEM
+=====================================
+
+Introduction
+------------
+
+This framework is designed to provide a standard kernel interface to control
+the settings of the interconnects on an SoC. These settings can be throughput,
+latency and priority between multiple interconnected devices or functional
+blocks. This can be controlled dynamically in order to save power or provide
+maximum performance.
+
+The interconnect bus is hardware with configurable parameters, which can be
+set on a data path according to the requests received from various drivers.
+An example of interconnect buses are the interconnects between various
+components or functional blocks in chipsets. There can be multiple interconnects
+on an SoC that can be multi-tiered.
+
+Below is a simplified diagram of a real-world SoC interconnect bus topology.
+
+::
+
+ +----------------+    +----------------+
+ | HW Accelerator |--->|      M NoC     |<---------------+
+ +----------------+    +----------------+                |
+                         |      |                    +------------+
+  +-----+  +-------------+      V       +------+     |            |
+  | DDR |  |                +--------+  | PCIe |     |            |
+  +-----+  |                | Slaves |  +------+     |            |
+    ^ ^    |                +--------+     |         |   C NoC    |
+    | |    V                               V         |            |
+ +------------------+   +------------------------+   |            |   +-----+
+ |                  |-->|                        |-->|            |-->| CPU |
+ |                  |-->|                        |<--|            |   +-----+
+ |     Mem NoC      |   |         S NoC          |   +------------+
+ |                  |<--|                        |---------+    |
+ |                  |<--|                        |<------+ |    |   +--------+
+ +------------------+   +------------------------+       | |    +-->| Slaves |
+   ^  ^    ^    ^          ^                             | |        +--------+
+   |  |    |    |          |                             | V
+ +------+  |  +-----+   +-----+  +---------+   +----------------+   +--------+
+ | CPUs |  |  | GPU |   | DSP |  | Masters |-->|       P NoC    |-->| Slaves |
+ +------+  |  +-----+   +-----+  +---------+   +----------------+   +--------+
+           |
+       +-------+
+       | Modem |
+       +-------+
+
+Terminology
+-----------
+
+Interconnect provider is the software definition of the interconnect hardware.
+The interconnect providers on the above diagram are M NoC, S NoC, C NoC, P NoC
+and Mem NoC.
+
+Interconnect node is the software definition of the interconnect hardware
+port. Each interconnect provider consists of multiple interconnect nodes,
+which are connected to other SoC components including other interconnect
+providers. The point on the diagram where the CPUs connect to the memory is
+called an interconnect node, which belongs to the Mem NoC interconnect provider.
+
+Interconnect endpoints are the first or the last element of the path. Every
+endpoint is a node, but not every node is an endpoint.
+
+Interconnect path is everything between two endpoints including all the nodes
+that have to be traversed to reach from a source to destination node. It may
+include multiple master-slave pairs across several interconnect providers.
+
+Interconnect consumers are the entities which make use of the data paths exposed
+by the providers. The consumers send requests to providers requesting various
+throughput, latency and priority. Usually the consumers are device drivers, that
+send request based on their needs. An example for a consumer is a video decoder
+that supports various formats and image sizes.
+
+Interconnect providers
+----------------------
+
+Interconnect provider is an entity that implements methods to initialize and
+configure interconnect bus hardware. The interconnect provider drivers should
+be registered with the interconnect provider core.
+
+.. kernel-doc:: include/linux/interconnect-provider.h
+
+Interconnect consumers
+----------------------
+
+Interconnect consumers are the clients which use the interconnect APIs to
+get paths between endpoints and set their bandwidth/latency/QoS requirements
+for these interconnect paths.  These interfaces are not currently
+documented.
diff --git a/Documentation/interconnect/interconnect.rst b/Documentation/interconnect/interconnect.rst
deleted file mode 100644
index 56e331dab70e..000000000000
--- a/Documentation/interconnect/interconnect.rst
+++ /dev/null
@@ -1,95 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-:orphan:
-
-=====================================
-GENERIC SYSTEM INTERCONNECT SUBSYSTEM
-=====================================
-
-Introduction
-------------
-
-This framework is designed to provide a standard kernel interface to control
-the settings of the interconnects on an SoC. These settings can be throughput,
-latency and priority between multiple interconnected devices or functional
-blocks. This can be controlled dynamically in order to save power or provide
-maximum performance.
-
-The interconnect bus is hardware with configurable parameters, which can be
-set on a data path according to the requests received from various drivers.
-An example of interconnect buses are the interconnects between various
-components or functional blocks in chipsets. There can be multiple interconnects
-on an SoC that can be multi-tiered.
-
-Below is a simplified diagram of a real-world SoC interconnect bus topology.
-
-::
-
- +----------------+    +----------------+
- | HW Accelerator |--->|      M NoC     |<---------------+
- +----------------+    +----------------+                |
-                         |      |                    +------------+
-  +-----+  +-------------+      V       +------+     |            |
-  | DDR |  |                +--------+  | PCIe |     |            |
-  +-----+  |                | Slaves |  +------+     |            |
-    ^ ^    |                +--------+     |         |   C NoC    |
-    | |    V                               V         |            |
- +------------------+   +------------------------+   |            |   +-----+
- |                  |-->|                        |-->|            |-->| CPU |
- |                  |-->|                        |<--|            |   +-----+
- |     Mem NoC      |   |         S NoC          |   +------------+
- |                  |<--|                        |---------+    |
- |                  |<--|                        |<------+ |    |   +--------+
- +------------------+   +------------------------+       | |    +-->| Slaves |
-   ^  ^    ^    ^          ^                             | |        +--------+
-   |  |    |    |          |                             | V
- +------+  |  +-----+   +-----+  +---------+   +----------------+   +--------+
- | CPUs |  |  | GPU |   | DSP |  | Masters |-->|       P NoC    |-->| Slaves |
- +------+  |  +-----+   +-----+  +---------+   +----------------+   +--------+
-           |
-       +-------+
-       | Modem |
-       +-------+
-
-Terminology
------------
-
-Interconnect provider is the software definition of the interconnect hardware.
-The interconnect providers on the above diagram are M NoC, S NoC, C NoC, P NoC
-and Mem NoC.
-
-Interconnect node is the software definition of the interconnect hardware
-port. Each interconnect provider consists of multiple interconnect nodes,
-which are connected to other SoC components including other interconnect
-providers. The point on the diagram where the CPUs connect to the memory is
-called an interconnect node, which belongs to the Mem NoC interconnect provider.
-
-Interconnect endpoints are the first or the last element of the path. Every
-endpoint is a node, but not every node is an endpoint.
-
-Interconnect path is everything between two endpoints including all the nodes
-that have to be traversed to reach from a source to destination node. It may
-include multiple master-slave pairs across several interconnect providers.
-
-Interconnect consumers are the entities which make use of the data paths exposed
-by the providers. The consumers send requests to providers requesting various
-throughput, latency and priority. Usually the consumers are device drivers, that
-send request based on their needs. An example for a consumer is a video decoder
-that supports various formats and image sizes.
-
-Interconnect providers
-----------------------
-
-Interconnect provider is an entity that implements methods to initialize and
-configure interconnect bus hardware. The interconnect provider drivers should
-be registered with the interconnect provider core.
-
-.. kernel-doc:: include/linux/interconnect-provider.h
-
-Interconnect consumers
-----------------------
-
-Interconnect consumers are the clients which use the interconnect APIs to
-get paths between endpoints and set their bandwidth/latency/QoS requirements
-for these interconnect paths.  These interfaces are not currently
-documented.
-- 
cgit 


From 159a5e78bdcabb1f87ee5536182a99a307ae0bac Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 22 Apr 2019 16:10:26 -0300
Subject: docs: add arch doc directories to the index

Now that several arch documents were converted to ReST,
add their indexes to Documentation/index.rst and remove the
:orphan:  from them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/arm/index.rst    | 2 --
 Documentation/arm64/index.rst  | 2 --
 Documentation/ia64/index.rst   | 2 --
 Documentation/index.rst        | 9 +++++++++
 Documentation/m68k/index.rst   | 2 +-
 Documentation/riscv/index.rst  | 2 --
 Documentation/s390/index.rst   | 2 --
 Documentation/sparc/index.rst  | 2 --
 Documentation/xtensa/index.rst | 2 +-
 9 files changed, 11 insertions(+), 14 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/arm/index.rst b/Documentation/arm/index.rst
index bd316d1a1802..9c2f781f4685 100644
--- a/Documentation/arm/index.rst
+++ b/Documentation/arm/index.rst
@@ -1,5 +1,3 @@
-﻿:orphan:
-
 ================
 ARM Architecture
 ================
diff --git a/Documentation/arm64/index.rst b/Documentation/arm64/index.rst
index 018b7836ecb7..96b696ba4e6c 100644
--- a/Documentation/arm64/index.rst
+++ b/Documentation/arm64/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
 ==================
 ARM64 Architecture
 ==================
diff --git a/Documentation/ia64/index.rst b/Documentation/ia64/index.rst
index a3e3052ad6e2..ef99475f672b 100644
--- a/Documentation/ia64/index.rst
+++ b/Documentation/ia64/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
 ==================
 IA-64 Architecture
 ==================
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 864daf8805a4..a322c8721d13 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -117,7 +117,16 @@ implementation.
    :maxdepth: 2
 
    sh/index
+   arm/index
+   arm64/index
+   ia64/index
+   m68k/index
+   riscv/index
+   s390/index
+   sh/index
+   sparc/index
    x86/index
+   xtensa/index
 
 Filesystem Documentation
 ------------------------
diff --git a/Documentation/m68k/index.rst b/Documentation/m68k/index.rst
index f3273ec075c3..3a5ba7fe1703 100644
--- a/Documentation/m68k/index.rst
+++ b/Documentation/m68k/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 =================
 m68k Architecture
diff --git a/Documentation/riscv/index.rst b/Documentation/riscv/index.rst
index c4b906d9b5a7..e3ca0922a8c2 100644
--- a/Documentation/riscv/index.rst
+++ b/Documentation/riscv/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
 ===================
 RISC-V architecture
 ===================
diff --git a/Documentation/s390/index.rst b/Documentation/s390/index.rst
index 1a914da2a07b..4602312909d3 100644
--- a/Documentation/s390/index.rst
+++ b/Documentation/s390/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
 =================
 s390 Architecture
 =================
diff --git a/Documentation/sparc/index.rst b/Documentation/sparc/index.rst
index 91f7d6643dd5..71cff621f243 100644
--- a/Documentation/sparc/index.rst
+++ b/Documentation/sparc/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
 ==================
 Sparc Architecture
 ==================
diff --git a/Documentation/xtensa/index.rst b/Documentation/xtensa/index.rst
index 5a24e365e35f..52fa04eb39a3 100644
--- a/Documentation/xtensa/index.rst
+++ b/Documentation/xtensa/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ===================
 Xtensa Architecture
-- 
cgit 


From 6cf2a73cb2bc422a03984b285a63632c27f8c4e4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 12:40:23 -0300
Subject: docs: device-mapper: move it to the admin-guide

The DM support describes lots of aspects related to mapped
disk partitions from the userspace PoV.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../admin-guide/device-mapper/cache-policies.rst   | 131 +++++++
 Documentation/admin-guide/device-mapper/cache.rst  | 337 ++++++++++++++++
 Documentation/admin-guide/device-mapper/delay.rst  |  31 ++
 .../admin-guide/device-mapper/dm-crypt.rst         | 173 +++++++++
 .../admin-guide/device-mapper/dm-dust.txt          | 272 +++++++++++++
 .../admin-guide/device-mapper/dm-flakey.rst        |  74 ++++
 .../admin-guide/device-mapper/dm-init.rst          | 125 ++++++
 .../admin-guide/device-mapper/dm-integrity.rst     | 259 +++++++++++++
 Documentation/admin-guide/device-mapper/dm-io.rst  |  75 ++++
 Documentation/admin-guide/device-mapper/dm-log.rst |  57 +++
 .../admin-guide/device-mapper/dm-queue-length.rst  |  48 +++
 .../admin-guide/device-mapper/dm-raid.rst          | 419 ++++++++++++++++++++
 .../admin-guide/device-mapper/dm-service-time.rst  | 101 +++++
 .../admin-guide/device-mapper/dm-uevent.rst        | 110 ++++++
 .../admin-guide/device-mapper/dm-zoned.rst         | 146 +++++++
 Documentation/admin-guide/device-mapper/era.rst    | 116 ++++++
 Documentation/admin-guide/device-mapper/index.rst  |  42 ++
 Documentation/admin-guide/device-mapper/kcopyd.rst |  47 +++
 Documentation/admin-guide/device-mapper/linear.rst |  63 +++
 .../admin-guide/device-mapper/log-writes.rst       | 145 +++++++
 .../admin-guide/device-mapper/persistent-data.rst  |  88 +++++
 .../admin-guide/device-mapper/snapshot.rst         | 196 ++++++++++
 .../admin-guide/device-mapper/statistics.rst       | 225 +++++++++++
 .../admin-guide/device-mapper/striped.rst          |  61 +++
 Documentation/admin-guide/device-mapper/switch.rst | 141 +++++++
 .../device-mapper/thin-provisioning.rst            | 427 +++++++++++++++++++++
 .../admin-guide/device-mapper/unstriped.rst        | 135 +++++++
 Documentation/admin-guide/device-mapper/verity.rst | 229 +++++++++++
 .../admin-guide/device-mapper/writecache.rst       |  79 ++++
 Documentation/admin-guide/device-mapper/zero.rst   |  37 ++
 Documentation/admin-guide/index.rst                |   1 +
 Documentation/device-mapper/cache-policies.rst     | 131 -------
 Documentation/device-mapper/cache.rst              | 337 ----------------
 Documentation/device-mapper/delay.rst              |  31 --
 Documentation/device-mapper/dm-crypt.rst           | 173 ---------
 Documentation/device-mapper/dm-dust.txt            | 272 -------------
 Documentation/device-mapper/dm-flakey.rst          |  74 ----
 Documentation/device-mapper/dm-init.rst            | 125 ------
 Documentation/device-mapper/dm-integrity.rst       | 259 -------------
 Documentation/device-mapper/dm-io.rst              |  75 ----
 Documentation/device-mapper/dm-log.rst             |  57 ---
 Documentation/device-mapper/dm-queue-length.rst    |  48 ---
 Documentation/device-mapper/dm-raid.rst            | 419 --------------------
 Documentation/device-mapper/dm-service-time.rst    | 101 -----
 Documentation/device-mapper/dm-uevent.rst          | 110 ------
 Documentation/device-mapper/dm-zoned.rst           | 146 -------
 Documentation/device-mapper/era.rst                | 116 ------
 Documentation/device-mapper/index.rst              |  44 ---
 Documentation/device-mapper/kcopyd.rst             |  47 ---
 Documentation/device-mapper/linear.rst             |  63 ---
 Documentation/device-mapper/log-writes.rst         | 145 -------
 Documentation/device-mapper/persistent-data.rst    |  88 -----
 Documentation/device-mapper/snapshot.rst           | 196 ----------
 Documentation/device-mapper/statistics.rst         | 225 -----------
 Documentation/device-mapper/striped.rst            |  61 ---
 Documentation/device-mapper/switch.rst             | 141 -------
 Documentation/device-mapper/thin-provisioning.rst  | 427 ---------------------
 Documentation/device-mapper/unstriped.rst          | 135 -------
 Documentation/device-mapper/verity.rst             | 229 -----------
 Documentation/device-mapper/writecache.rst         |  79 ----
 Documentation/device-mapper/zero.rst               |  37 --
 61 files changed, 4390 insertions(+), 4391 deletions(-)
 create mode 100644 Documentation/admin-guide/device-mapper/cache-policies.rst
 create mode 100644 Documentation/admin-guide/device-mapper/cache.rst
 create mode 100644 Documentation/admin-guide/device-mapper/delay.rst
 create mode 100644 Documentation/admin-guide/device-mapper/dm-crypt.rst
 create mode 100644 Documentation/admin-guide/device-mapper/dm-dust.txt
 create mode 100644 Documentation/admin-guide/device-mapper/dm-flakey.rst
 create mode 100644 Documentation/admin-guide/device-mapper/dm-init.rst
 create mode 100644 Documentation/admin-guide/device-mapper/dm-integrity.rst
 create mode 100644 Documentation/admin-guide/device-mapper/dm-io.rst
 create mode 100644 Documentation/admin-guide/device-mapper/dm-log.rst
 create mode 100644 Documentation/admin-guide/device-mapper/dm-queue-length.rst
 create mode 100644 Documentation/admin-guide/device-mapper/dm-raid.rst
 create mode 100644 Documentation/admin-guide/device-mapper/dm-service-time.rst
 create mode 100644 Documentation/admin-guide/device-mapper/dm-uevent.rst
 create mode 100644 Documentation/admin-guide/device-mapper/dm-zoned.rst
 create mode 100644 Documentation/admin-guide/device-mapper/era.rst
 create mode 100644 Documentation/admin-guide/device-mapper/index.rst
 create mode 100644 Documentation/admin-guide/device-mapper/kcopyd.rst
 create mode 100644 Documentation/admin-guide/device-mapper/linear.rst
 create mode 100644 Documentation/admin-guide/device-mapper/log-writes.rst
 create mode 100644 Documentation/admin-guide/device-mapper/persistent-data.rst
 create mode 100644 Documentation/admin-guide/device-mapper/snapshot.rst
 create mode 100644 Documentation/admin-guide/device-mapper/statistics.rst
 create mode 100644 Documentation/admin-guide/device-mapper/striped.rst
 create mode 100644 Documentation/admin-guide/device-mapper/switch.rst
 create mode 100644 Documentation/admin-guide/device-mapper/thin-provisioning.rst
 create mode 100644 Documentation/admin-guide/device-mapper/unstriped.rst
 create mode 100644 Documentation/admin-guide/device-mapper/verity.rst
 create mode 100644 Documentation/admin-guide/device-mapper/writecache.rst
 create mode 100644 Documentation/admin-guide/device-mapper/zero.rst
 delete mode 100644 Documentation/device-mapper/cache-policies.rst
 delete mode 100644 Documentation/device-mapper/cache.rst
 delete mode 100644 Documentation/device-mapper/delay.rst
 delete mode 100644 Documentation/device-mapper/dm-crypt.rst
 delete mode 100644 Documentation/device-mapper/dm-dust.txt
 delete mode 100644 Documentation/device-mapper/dm-flakey.rst
 delete mode 100644 Documentation/device-mapper/dm-init.rst
 delete mode 100644 Documentation/device-mapper/dm-integrity.rst
 delete mode 100644 Documentation/device-mapper/dm-io.rst
 delete mode 100644 Documentation/device-mapper/dm-log.rst
 delete mode 100644 Documentation/device-mapper/dm-queue-length.rst
 delete mode 100644 Documentation/device-mapper/dm-raid.rst
 delete mode 100644 Documentation/device-mapper/dm-service-time.rst
 delete mode 100644 Documentation/device-mapper/dm-uevent.rst
 delete mode 100644 Documentation/device-mapper/dm-zoned.rst
 delete mode 100644 Documentation/device-mapper/era.rst
 delete mode 100644 Documentation/device-mapper/index.rst
 delete mode 100644 Documentation/device-mapper/kcopyd.rst
 delete mode 100644 Documentation/device-mapper/linear.rst
 delete mode 100644 Documentation/device-mapper/log-writes.rst
 delete mode 100644 Documentation/device-mapper/persistent-data.rst
 delete mode 100644 Documentation/device-mapper/snapshot.rst
 delete mode 100644 Documentation/device-mapper/statistics.rst
 delete mode 100644 Documentation/device-mapper/striped.rst
 delete mode 100644 Documentation/device-mapper/switch.rst
 delete mode 100644 Documentation/device-mapper/thin-provisioning.rst
 delete mode 100644 Documentation/device-mapper/unstriped.rst
 delete mode 100644 Documentation/device-mapper/verity.rst
 delete mode 100644 Documentation/device-mapper/writecache.rst
 delete mode 100644 Documentation/device-mapper/zero.rst

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/device-mapper/cache-policies.rst b/Documentation/admin-guide/device-mapper/cache-policies.rst
new file mode 100644
index 000000000000..b17fe352fc41
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/cache-policies.rst
@@ -0,0 +1,131 @@
+=============================
+Guidance for writing policies
+=============================
+
+Try to keep transactionality out of it.  The core is careful to
+avoid asking about anything that is migrating.  This is a pain, but
+makes it easier to write the policies.
+
+Mappings are loaded into the policy at construction time.
+
+Every bio that is mapped by the target is referred to the policy.
+The policy can return a simple HIT or MISS or issue a migration.
+
+Currently there's no way for the policy to issue background work,
+e.g. to start writing back dirty blocks that are going to be evicted
+soon.
+
+Because we map bios, rather than requests it's easy for the policy
+to get fooled by many small bios.  For this reason the core target
+issues periodic ticks to the policy.  It's suggested that the policy
+doesn't update states (eg, hit counts) for a block more than once
+for each tick.  The core ticks by watching bios complete, and so
+trying to see when the io scheduler has let the ios run.
+
+
+Overview of supplied cache replacement policies
+===============================================
+
+multiqueue (mq)
+---------------
+
+This policy is now an alias for smq (see below).
+
+The following tunables are accepted, but have no effect::
+
+	'sequential_threshold <#nr_sequential_ios>'
+	'random_threshold <#nr_random_ios>'
+	'read_promote_adjustment <value>'
+	'write_promote_adjustment <value>'
+	'discard_promote_adjustment <value>'
+
+Stochastic multiqueue (smq)
+---------------------------
+
+This policy is the default.
+
+The stochastic multi-queue (smq) policy addresses some of the problems
+with the multiqueue (mq) policy.
+
+The smq policy (vs mq) offers the promise of less memory utilization,
+improved performance and increased adaptability in the face of changing
+workloads.  smq also does not have any cumbersome tuning knobs.
+
+Users may switch from "mq" to "smq" simply by appropriately reloading a
+DM table that is using the cache target.  Doing so will cause all of the
+mq policy's hints to be dropped.  Also, performance of the cache may
+degrade slightly until smq recalculates the origin device's hotspots
+that should be cached.
+
+Memory usage
+^^^^^^^^^^^^
+
+The mq policy used a lot of memory; 88 bytes per cache block on a 64
+bit machine.
+
+smq uses 28bit indexes to implement its data structures rather than
+pointers.  It avoids storing an explicit hit count for each block.  It
+has a 'hotspot' queue, rather than a pre-cache, which uses a quarter of
+the entries (each hotspot block covers a larger area than a single
+cache block).
+
+All this means smq uses ~25bytes per cache block.  Still a lot of
+memory, but a substantial improvement nontheless.
+
+Level balancing
+^^^^^^^^^^^^^^^
+
+mq placed entries in different levels of the multiqueue structures
+based on their hit count (~ln(hit count)).  This meant the bottom
+levels generally had the most entries, and the top ones had very
+few.  Having unbalanced levels like this reduced the efficacy of the
+multiqueue.
+
+smq does not maintain a hit count, instead it swaps hit entries with
+the least recently used entry from the level above.  The overall
+ordering being a side effect of this stochastic process.  With this
+scheme we can decide how many entries occupy each multiqueue level,
+resulting in better promotion/demotion decisions.
+
+Adaptability:
+The mq policy maintained a hit count for each cache block.  For a
+different block to get promoted to the cache its hit count has to
+exceed the lowest currently in the cache.  This meant it could take a
+long time for the cache to adapt between varying IO patterns.
+
+smq doesn't maintain hit counts, so a lot of this problem just goes
+away.  In addition it tracks performance of the hotspot queue, which
+is used to decide which blocks to promote.  If the hotspot queue is
+performing badly then it starts moving entries more quickly between
+levels.  This lets it adapt to new IO patterns very quickly.
+
+Performance
+^^^^^^^^^^^
+
+Testing smq shows substantially better performance than mq.
+
+cleaner
+-------
+
+The cleaner writes back all dirty blocks in a cache to decommission it.
+
+Examples
+========
+
+The syntax for a table is::
+
+	cache <metadata dev> <cache dev> <origin dev> <block size>
+	<#feature_args> [<feature arg>]*
+	<policy> <#policy_args> [<policy arg>]*
+
+The syntax to send a message using the dmsetup command is::
+
+	dmsetup message <mapped device> 0 sequential_threshold 1024
+	dmsetup message <mapped device> 0 random_threshold 8
+
+Using dmsetup::
+
+	dmsetup create blah --table "0 268435456 cache /dev/sdb /dev/sdc \
+	    /dev/sdd 512 0 mq 4 sequential_threshold 1024 random_threshold 8"
+	creates a 128GB large mapped device named 'blah' with the
+	sequential threshold set to 1024 and the random_threshold set to 8.
diff --git a/Documentation/admin-guide/device-mapper/cache.rst b/Documentation/admin-guide/device-mapper/cache.rst
new file mode 100644
index 000000000000..f15e5254d05b
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/cache.rst
@@ -0,0 +1,337 @@
+=====
+Cache
+=====
+
+Introduction
+============
+
+dm-cache is a device mapper target written by Joe Thornber, Heinz
+Mauelshagen, and Mike Snitzer.
+
+It aims to improve performance of a block device (eg, a spindle) by
+dynamically migrating some of its data to a faster, smaller device
+(eg, an SSD).
+
+This device-mapper solution allows us to insert this caching at
+different levels of the dm stack, for instance above the data device for
+a thin-provisioning pool.  Caching solutions that are integrated more
+closely with the virtual memory system should give better performance.
+
+The target reuses the metadata library used in the thin-provisioning
+library.
+
+The decision as to what data to migrate and when is left to a plug-in
+policy module.  Several of these have been written as we experiment,
+and we hope other people will contribute others for specific io
+scenarios (eg. a vm image server).
+
+Glossary
+========
+
+  Migration
+	       Movement of the primary copy of a logical block from one
+	       device to the other.
+  Promotion
+	       Migration from slow device to fast device.
+  Demotion
+	       Migration from fast device to slow device.
+
+The origin device always contains a copy of the logical block, which
+may be out of date or kept in sync with the copy on the cache device
+(depending on policy).
+
+Design
+======
+
+Sub-devices
+-----------
+
+The target is constructed by passing three devices to it (along with
+other parameters detailed later):
+
+1. An origin device - the big, slow one.
+
+2. A cache device - the small, fast one.
+
+3. A small metadata device - records which blocks are in the cache,
+   which are dirty, and extra hints for use by the policy object.
+   This information could be put on the cache device, but having it
+   separate allows the volume manager to configure it differently,
+   e.g. as a mirror for extra robustness.  This metadata device may only
+   be used by a single cache device.
+
+Fixed block size
+----------------
+
+The origin is divided up into blocks of a fixed size.  This block size
+is configurable when you first create the cache.  Typically we've been
+using block sizes of 256KB - 1024KB.  The block size must be between 64
+sectors (32KB) and 2097152 sectors (1GB) and a multiple of 64 sectors (32KB).
+
+Having a fixed block size simplifies the target a lot.  But it is
+something of a compromise.  For instance, a small part of a block may be
+getting hit a lot, yet the whole block will be promoted to the cache.
+So large block sizes are bad because they waste cache space.  And small
+block sizes are bad because they increase the amount of metadata (both
+in core and on disk).
+
+Cache operating modes
+---------------------
+
+The cache has three operating modes: writeback, writethrough and
+passthrough.
+
+If writeback, the default, is selected then a write to a block that is
+cached will go only to the cache and the block will be marked dirty in
+the metadata.
+
+If writethrough is selected then a write to a cached block will not
+complete until it has hit both the origin and cache devices.  Clean
+blocks should remain clean.
+
+If passthrough is selected, useful when the cache contents are not known
+to be coherent with the origin device, then all reads are served from
+the origin device (all reads miss the cache) and all writes are
+forwarded to the origin device; additionally, write hits cause cache
+block invalidates.  To enable passthrough mode the cache must be clean.
+Passthrough mode allows a cache device to be activated without having to
+worry about coherency.  Coherency that exists is maintained, although
+the cache will gradually cool as writes take place.  If the coherency of
+the cache can later be verified, or established through use of the
+"invalidate_cblocks" message, the cache device can be transitioned to
+writethrough or writeback mode while still warm.  Otherwise, the cache
+contents can be discarded prior to transitioning to the desired
+operating mode.
+
+A simple cleaner policy is provided, which will clean (write back) all
+dirty blocks in a cache.  Useful for decommissioning a cache or when
+shrinking a cache.  Shrinking the cache's fast device requires all cache
+blocks, in the area of the cache being removed, to be clean.  If the
+area being removed from the cache still contains dirty blocks the resize
+will fail.  Care must be taken to never reduce the volume used for the
+cache's fast device until the cache is clean.  This is of particular
+importance if writeback mode is used.  Writethrough and passthrough
+modes already maintain a clean cache.  Future support to partially clean
+the cache, above a specified threshold, will allow for keeping the cache
+warm and in writeback mode during resize.
+
+Migration throttling
+--------------------
+
+Migrating data between the origin and cache device uses bandwidth.
+The user can set a throttle to prevent more than a certain amount of
+migration occurring at any one time.  Currently we're not taking any
+account of normal io traffic going to the devices.  More work needs
+doing here to avoid migrating during those peak io moments.
+
+For the time being, a message "migration_threshold <#sectors>"
+can be used to set the maximum number of sectors being migrated,
+the default being 2048 sectors (1MB).
+
+Updating on-disk metadata
+-------------------------
+
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second.  This
+means the cache behaves like a physical disk that has a volatile write
+cache.  If power is lost you may lose some recent writes.  The metadata
+should always be consistent in spite of any crash.
+
+The 'dirty' state for a cache block changes far too frequently for us
+to keep updating it on the fly.  So we treat it as a hint.  In normal
+operation it will be written when the dm device is suspended.  If the
+system crashes all cache blocks will be assumed dirty when restarted.
+
+Per-block policy hints
+----------------------
+
+Policy plug-ins can store a chunk of data per cache block.  It's up to
+the policy how big this chunk is, but it should be kept small.  Like the
+dirty flags this data is lost if there's a crash so a safe fallback
+value should always be possible.
+
+Policy hints affect performance, not correctness.
+
+Policy messaging
+----------------
+
+Policies will have different tunables, specific to each one, so we
+need a generic way of getting and setting these.  Device-mapper
+messages are used.  Refer to cache-policies.txt.
+
+Discard bitset resolution
+-------------------------
+
+We can avoid copying data during migration if we know the block has
+been discarded.  A prime example of this is when mkfs discards the
+whole block device.  We store a bitset tracking the discard state of
+blocks.  However, we allow this bitset to have a different block size
+from the cache blocks.  This is because we need to track the discard
+state for all of the origin device (compare with the dirty bitset
+which is just for the smaller cache device).
+
+Target interface
+================
+
+Constructor
+-----------
+
+  ::
+
+   cache <metadata dev> <cache dev> <origin dev> <block size>
+         <#feature args> [<feature arg>]*
+         <policy> <#policy args> [policy args]*
+
+ ================ =======================================================
+ metadata dev     fast device holding the persistent metadata
+ cache dev	  fast device holding cached data blocks
+ origin dev	  slow device holding original data blocks
+ block size       cache unit size in sectors
+
+ #feature args    number of feature arguments passed
+ feature args     writethrough or passthrough (The default is writeback.)
+
+ policy           the replacement policy to use
+ #policy args     an even number of arguments corresponding to
+                  key/value pairs passed to the policy
+ policy args      key/value pairs passed to the policy
+		  E.g. 'sequential_threshold 1024'
+		  See cache-policies.txt for details.
+ ================ =======================================================
+
+Optional feature arguments are:
+
+
+   ==================== ========================================================
+   writethrough		write through caching that prohibits cache block
+			content from being different from origin block content.
+			Without this argument, the default behaviour is to write
+			back cache block contents later for performance reasons,
+			so they may differ from the corresponding origin blocks.
+
+   passthrough		a degraded mode useful for various cache coherency
+			situations (e.g., rolling back snapshots of
+			underlying storage).	 Reads and writes always go to
+			the origin.	If a write goes to a cached origin
+			block, then the cache block is invalidated.
+			To enable passthrough mode the cache must be clean.
+
+   metadata2		use version 2 of the metadata.  This stores the dirty
+			bits in a separate btree, which improves speed of
+			shutting down the cache.
+
+   no_discard_passdown	disable passing down discards from the cache
+			to the origin's data device.
+   ==================== ========================================================
+
+A policy called 'default' is always registered.  This is an alias for
+the policy we currently think is giving best all round performance.
+
+As the default policy could vary between kernels, if you are relying on
+the characteristics of a specific policy, always request it by name.
+
+Status
+------
+
+::
+
+  <metadata block size> <#used metadata blocks>/<#total metadata blocks>
+  <cache block size> <#used cache blocks>/<#total cache blocks>
+  <#read hits> <#read misses> <#write hits> <#write misses>
+  <#demotions> <#promotions> <#dirty> <#features> <features>*
+  <#core args> <core args>* <policy name> <#policy args> <policy args>*
+  <cache metadata mode>
+
+
+========================= =====================================================
+metadata block size	  Fixed block size for each metadata block in
+			  sectors
+#used metadata blocks	  Number of metadata blocks used
+#total metadata blocks	  Total number of metadata blocks
+cache block size	  Configurable block size for the cache device
+			  in sectors
+#used cache blocks	  Number of blocks resident in the cache
+#total cache blocks	  Total number of cache blocks
+#read hits		  Number of times a READ bio has been mapped
+			  to the cache
+#read misses		  Number of times a READ bio has been mapped
+			  to the origin
+#write hits		  Number of times a WRITE bio has been mapped
+			  to the cache
+#write misses		  Number of times a WRITE bio has been
+			  mapped to the origin
+#demotions		  Number of times a block has been removed
+			  from the cache
+#promotions		  Number of times a block has been moved to
+			  the cache
+#dirty			  Number of blocks in the cache that differ
+			  from the origin
+#feature args		  Number of feature args to follow
+feature args		  'writethrough' (optional)
+#core args		  Number of core arguments (must be even)
+core args		  Key/value pairs for tuning the core
+			  e.g. migration_threshold
+policy name		  Name of the policy
+#policy args		  Number of policy arguments to follow (must be even)
+policy args		  Key/value pairs e.g. sequential_threshold
+cache metadata mode       ro if read-only, rw if read-write
+
+			  In serious cases where even a read-only mode is
+			  deemed unsafe no further I/O will be permitted and
+			  the status will just contain the string 'Fail'.
+			  The userspace recovery tools should then be used.
+needs_check		  'needs_check' if set, '-' if not set
+			  A metadata operation has failed, resulting in the
+			  needs_check flag being set in the metadata's
+			  superblock.  The metadata device must be
+			  deactivated and checked/repaired before the
+			  cache can be made fully operational again.
+			  '-' indicates	needs_check is not set.
+========================= =====================================================
+
+Messages
+--------
+
+Policies will have different tunables, specific to each one, so we
+need a generic way of getting and setting these.  Device-mapper
+messages are used.  (A sysfs interface would also be possible.)
+
+The message format is::
+
+   <key> <value>
+
+E.g.::
+
+   dmsetup message my_cache 0 sequential_threshold 1024
+
+
+Invalidation is removing an entry from the cache without writing it
+back.  Cache blocks can be invalidated via the invalidate_cblocks
+message, which takes an arbitrary number of cblock ranges.  Each cblock
+range's end value is "one past the end", meaning 5-10 expresses a range
+of values from 5 to 9.  Each cblock must be expressed as a decimal
+value, in the future a variant message that takes cblock ranges
+expressed in hexadecimal may be needed to better support efficient
+invalidation of larger caches.  The cache must be in passthrough mode
+when invalidate_cblocks is used::
+
+   invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*
+
+E.g.::
+
+   dmsetup message my_cache 0 invalidate_cblocks 2345 3456-4567 5678-6789
+
+Examples
+========
+
+The test suite can be found here:
+
+https://github.com/jthornber/device-mapper-test-suite
+
+::
+
+  dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
+	  /dev/mapper/ssd /dev/mapper/origin 512 1 writeback default 0'
+  dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
+	  /dev/mapper/ssd /dev/mapper/origin 1024 1 writeback \
+	  mq 4 sequential_threshold 1024 random_threshold 8'
diff --git a/Documentation/admin-guide/device-mapper/delay.rst b/Documentation/admin-guide/device-mapper/delay.rst
new file mode 100644
index 000000000000..917ba8c33359
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/delay.rst
@@ -0,0 +1,31 @@
+========
+dm-delay
+========
+
+Device-Mapper's "delay" target delays reads and/or writes
+and maps them to different devices.
+
+Parameters::
+
+    <device> <offset> <delay> [<write_device> <write_offset> <write_delay>
+			       [<flush_device> <flush_offset> <flush_delay>]]
+
+With separate write parameters, the first set is only used for reads.
+Offsets are specified in sectors.
+Delays are specified in milliseconds.
+
+Example scripts
+===============
+
+::
+
+	#!/bin/sh
+	# Create device delaying rw operation for 500ms
+	echo "0 `blockdev --getsz $1` delay $1 0 500" | dmsetup create delayed
+
+::
+
+	#!/bin/sh
+	# Create device delaying only write operation for 500ms and
+	# splitting reads and writes to different devices $1 $2
+	echo "0 `blockdev --getsz $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst
new file mode 100644
index 000000000000..8f4a3f889d43
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst
@@ -0,0 +1,173 @@
+========
+dm-crypt
+========
+
+Device-Mapper's "crypt" target provides transparent encryption of block devices
+using the kernel crypto API.
+
+For a more detailed description of supported parameters see:
+https://gitlab.com/cryptsetup/cryptsetup/wikis/DMCrypt
+
+Parameters::
+
+	      <cipher> <key> <iv_offset> <device path> \
+	      <offset> [<#opt_params> <opt_params>]
+
+<cipher>
+    Encryption cipher, encryption mode and Initial Vector (IV) generator.
+
+    The cipher specifications format is::
+
+       cipher[:keycount]-chainmode-ivmode[:ivopts]
+
+    Examples::
+
+       aes-cbc-essiv:sha256
+       aes-xts-plain64
+       serpent-xts-plain64
+
+    Cipher format also supports direct specification with kernel crypt API
+    format (selected by capi: prefix). The IV specification is the same
+    as for the first format type.
+    This format is mainly used for specification of authenticated modes.
+
+    The crypto API cipher specifications format is::
+
+        capi:cipher_api_spec-ivmode[:ivopts]
+
+    Examples::
+
+        capi:cbc(aes)-essiv:sha256
+        capi:xts(aes)-plain64
+
+    Examples of authenticated modes::
+
+        capi:gcm(aes)-random
+        capi:authenc(hmac(sha256),xts(aes))-random
+        capi:rfc7539(chacha20,poly1305)-random
+
+    The /proc/crypto contains a list of curently loaded crypto modes.
+
+<key>
+    Key used for encryption. It is encoded either as a hexadecimal number
+    or it can be passed as <key_string> prefixed with single colon
+    character (':') for keys residing in kernel keyring service.
+    You can only use key sizes that are valid for the selected cipher
+    in combination with the selected iv mode.
+    Note that for some iv modes the key string can contain additional
+    keys (for example IV seed) so the key contains more parts concatenated
+    into a single string.
+
+<key_string>
+    The kernel keyring key is identified by string in following format:
+    <key_size>:<key_type>:<key_description>.
+
+<key_size>
+    The encryption key size in bytes. The kernel key payload size must match
+    the value passed in <key_size>.
+
+<key_type>
+    Either 'logon' or 'user' kernel key type.
+
+<key_description>
+    The kernel keyring key description crypt target should look for
+    when loading key of <key_type>.
+
+<keycount>
+    Multi-key compatibility mode. You can define <keycount> keys and
+    then sectors are encrypted according to their offsets (sector 0 uses key0;
+    sector 1 uses key1 etc.).  <keycount> must be a power of two.
+
+<iv_offset>
+    The IV offset is a sector count that is added to the sector number
+    before creating the IV.
+
+<device path>
+    This is the device that is going to be used as backend and contains the
+    encrypted data.  You can specify it as a path like /dev/xxx or a device
+    number <major>:<minor>.
+
+<offset>
+    Starting sector within the device where the encrypted data begins.
+
+<#opt_params>
+    Number of optional parameters. If there are no optional parameters,
+    the optional paramaters section can be skipped or #opt_params can be zero.
+    Otherwise #opt_params is the number of following arguments.
+
+    Example of optional parameters section:
+        3 allow_discards same_cpu_crypt submit_from_crypt_cpus
+
+allow_discards
+    Block discard requests (a.k.a. TRIM) are passed through the crypt device.
+    The default is to ignore discard requests.
+
+    WARNING: Assess the specific security risks carefully before enabling this
+    option.  For example, allowing discards on encrypted devices may lead to
+    the leak of information about the ciphertext device (filesystem type,
+    used space etc.) if the discarded blocks can be located easily on the
+    device later.
+
+same_cpu_crypt
+    Perform encryption using the same cpu that IO was submitted on.
+    The default is to use an unbound workqueue so that encryption work
+    is automatically balanced between available CPUs.
+
+submit_from_crypt_cpus
+    Disable offloading writes to a separate thread after encryption.
+    There are some situations where offloading write bios from the
+    encryption threads to a single thread degrades performance
+    significantly.  The default is to offload write bios to the same
+    thread because it benefits CFQ to have writes submitted using the
+    same context.
+
+integrity:<bytes>:<type>
+    The device requires additional <bytes> metadata per-sector stored
+    in per-bio integrity structure. This metadata must by provided
+    by underlying dm-integrity target.
+
+    The <type> can be "none" if metadata is used only for persistent IV.
+
+    For Authenticated Encryption with Additional Data (AEAD)
+    the <type> is "aead". An AEAD mode additionally calculates and verifies
+    integrity for the encrypted device. The additional space is then
+    used for storing authentication tag (and persistent IV if needed).
+
+sector_size:<bytes>
+    Use <bytes> as the encryption unit instead of 512 bytes sectors.
+    This option can be in range 512 - 4096 bytes and must be power of two.
+    Virtual device will announce this size as a minimal IO and logical sector.
+
+iv_large_sectors
+   IV generators will use sector number counted in <sector_size> units
+   instead of default 512 bytes sectors.
+
+   For example, if <sector_size> is 4096 bytes, plain64 IV for the second
+   sector will be 8 (without flag) and 1 if iv_large_sectors is present.
+   The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
+   if this flag is specified.
+
+Example scripts
+===============
+LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
+encryption with dm-crypt using the 'cryptsetup' utility, see
+https://gitlab.com/cryptsetup/cryptsetup
+
+::
+
+	#!/bin/sh
+	# Create a crypt device using dmsetup
+	dmsetup create crypt1 --table "0 `blockdev --getsz $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0"
+
+::
+
+	#!/bin/sh
+	# Create a crypt device using dmsetup when encryption key is stored in keyring service
+	dmsetup create crypt2 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 :32:logon:my_prefix:my_key 0 $1 0"
+
+::
+
+	#!/bin/sh
+	# Create a crypt device using cryptsetup and LUKS header with default cipher
+	cryptsetup luksFormat $1
+	cryptsetup luksOpen $1 crypt1
diff --git a/Documentation/admin-guide/device-mapper/dm-dust.txt b/Documentation/admin-guide/device-mapper/dm-dust.txt
new file mode 100644
index 000000000000..954d402a1f6a
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-dust.txt
@@ -0,0 +1,272 @@
+dm-dust
+=======
+
+This target emulates the behavior of bad sectors at arbitrary
+locations, and the ability to enable the emulation of the failures
+at an arbitrary time.
+
+This target behaves similarly to a linear target.  At a given time,
+the user can send a message to the target to start failing read
+requests on specific blocks (to emulate the behavior of a hard disk
+drive with bad sectors).
+
+When the failure behavior is enabled (i.e.: when the output of
+"dmsetup status" displays "fail_read_on_bad_block"), reads of blocks
+in the "bad block list" will fail with EIO ("Input/output error").
+
+Writes of blocks in the "bad block list will result in the following:
+
+1. Remove the block from the "bad block list".
+2. Successfully complete the write.
+
+This emulates the "remapped sector" behavior of a drive with bad
+sectors.
+
+Normally, a drive that is encountering bad sectors will most likely
+encounter more bad sectors, at an unknown time or location.
+With dm-dust, the user can use the "addbadblock" and "removebadblock"
+messages to add arbitrary bad blocks at new locations, and the
+"enable" and "disable" messages to modulate the state of whether the
+configured "bad blocks" will be treated as bad, or bypassed.
+This allows the pre-writing of test data and metadata prior to
+simulating a "failure" event where bad sectors start to appear.
+
+Table parameters:
+-----------------
+<device_path> <offset> <blksz>
+
+Mandatory parameters:
+    <device_path>: path to the block device.
+    <offset>: offset to data area from start of device_path
+    <blksz>: block size in bytes
+	     (minimum 512, maximum 1073741824, must be a power of 2)
+
+Usage instructions:
+-------------------
+
+First, find the size (in 512-byte sectors) of the device to be used:
+
+$ sudo blockdev --getsz /dev/vdb1
+33552384
+
+Create the dm-dust device:
+(For a device with a block size of 512 bytes)
+$ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 512'
+
+(For a device with a block size of 4096 bytes)
+$ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 4096'
+
+Check the status of the read behavior ("bypass" indicates that all I/O
+will be passed through to the underlying device):
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 bypass
+
+$ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=128 iflag=direct
+128+0 records in
+128+0 records out
+
+$ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
+128+0 records in
+128+0 records out
+
+Adding and removing bad blocks:
+-------------------------------
+
+At any time (i.e.: whether the device has the "bad block" emulation
+enabled or disabled), bad blocks may be added or removed from the
+device via the "addbadblock" and "removebadblock" messages:
+
+$ sudo dmsetup message dust1 0 addbadblock 60
+kernel: device-mapper: dust: badblock added at block 60
+
+$ sudo dmsetup message dust1 0 addbadblock 67
+kernel: device-mapper: dust: badblock added at block 67
+
+$ sudo dmsetup message dust1 0 addbadblock 72
+kernel: device-mapper: dust: badblock added at block 72
+
+These bad blocks will be stored in the "bad block list".
+While the device is in "bypass" mode, reads and writes will succeed:
+
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 bypass
+
+Enabling block read failures:
+-----------------------------
+
+To enable the "fail read on bad block" behavior, send the "enable" message:
+
+$ sudo dmsetup message dust1 0 enable
+kernel: device-mapper: dust: enabling read failures on bad sectors
+
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 fail_read_on_bad_block
+
+With the device in "fail read on bad block" mode, attempting to read a
+block will encounter an "Input/output error":
+
+$ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=1 skip=67 iflag=direct
+dd: error reading '/dev/mapper/dust1': Input/output error
+0+0 records in
+0+0 records out
+0 bytes copied, 0.00040651 s, 0.0 kB/s
+
+...and writing to the bad blocks will remove the blocks from the list,
+therefore emulating the "remap" behavior of hard disk drives:
+
+$ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
+128+0 records in
+128+0 records out
+
+kernel: device-mapper: dust: block 60 removed from badblocklist by write
+kernel: device-mapper: dust: block 67 removed from badblocklist by write
+kernel: device-mapper: dust: block 72 removed from badblocklist by write
+kernel: device-mapper: dust: block 87 removed from badblocklist by write
+
+Bad block add/remove error handling:
+------------------------------------
+
+Attempting to add a bad block that already exists in the list will
+result in an "Invalid argument" error, as well as a helpful message:
+
+$ sudo dmsetup message dust1 0 addbadblock 88
+device-mapper: message ioctl on dust1  failed: Invalid argument
+kernel: device-mapper: dust: block 88 already in badblocklist
+
+Attempting to remove a bad block that doesn't exist in the list will
+result in an "Invalid argument" error, as well as a helpful message:
+
+$ sudo dmsetup message dust1 0 removebadblock 87
+device-mapper: message ioctl on dust1  failed: Invalid argument
+kernel: device-mapper: dust: block 87 not found in badblocklist
+
+Counting the number of bad blocks in the bad block list:
+--------------------------------------------------------
+
+To count the number of bad blocks configured in the device, run the
+following message command:
+
+$ sudo dmsetup message dust1 0 countbadblocks
+
+A message will print with the number of bad blocks currently
+configured on the device:
+
+kernel: device-mapper: dust: countbadblocks: 895 badblock(s) found
+
+Querying for specific bad blocks:
+---------------------------------
+
+To find out if a specific block is in the bad block list, run the
+following message command:
+
+$ sudo dmsetup message dust1 0 queryblock 72
+
+The following message will print if the block is in the list:
+device-mapper: dust: queryblock: block 72 found in badblocklist
+
+The following message will print if the block is in the list:
+device-mapper: dust: queryblock: block 72 not found in badblocklist
+
+The "queryblock" message command will work in both the "enabled"
+and "disabled" modes, allowing the verification of whether a block
+will be treated as "bad" without having to issue I/O to the device,
+or having to "enable" the bad block emulation.
+
+Clearing the bad block list:
+----------------------------
+
+To clear the bad block list (without needing to individually run
+a "removebadblock" message command for every block), run the
+following message command:
+
+$ sudo dmsetup message dust1 0 clearbadblocks
+
+After clearing the bad block list, the following message will appear:
+
+kernel: device-mapper: dust: clearbadblocks: badblocks cleared
+
+If there were no bad blocks to clear, the following message will
+appear:
+
+kernel: device-mapper: dust: clearbadblocks: no badblocks found
+
+Message commands list:
+----------------------
+
+Below is a list of the messages that can be sent to a dust device:
+
+Operations on blocks (requires a <blknum> argument):
+
+addbadblock <blknum>
+queryblock <blknum>
+removebadblock <blknum>
+
+...where <blknum> is a block number within range of the device
+  (corresponding to the block size of the device.)
+
+Single argument message commands:
+
+countbadblocks
+clearbadblocks
+disable
+enable
+quiet
+
+Device removal:
+---------------
+
+When finished, remove the device via the "dmsetup remove" command:
+
+$ sudo dmsetup remove dust1
+
+Quiet mode:
+-----------
+
+On test runs with many bad blocks, it may be desirable to avoid
+excessive logging (from bad blocks added, removed, or "remapped").
+This can be done by enabling "quiet mode" via the following message:
+
+$ sudo dmsetup message dust1 0 quiet
+
+This will suppress log messages from add / remove / removed by write
+operations.  Log messages from "countbadblocks" or "queryblock"
+message commands will still print in quiet mode.
+
+The status of quiet mode can be seen by running "dmsetup status":
+
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 fail_read_on_bad_block quiet
+
+To disable quiet mode, send the "quiet" message again:
+
+$ sudo dmsetup message dust1 0 quiet
+
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 fail_read_on_bad_block verbose
+
+(The presence of "verbose" indicates normal logging.)
+
+"Why not...?"
+-------------
+
+scsi_debug has a "medium error" mode that can fail reads on one
+specified sector (sector 0x1234, hardcoded in the source code), but
+it uses RAM for the persistent storage, which drastically decreases
+the potential device size.
+
+dm-flakey fails all I/O from all block locations at a specified time
+frequency, and not a given point in time.
+
+When a bad sector occurs on a hard disk drive, reads to that sector
+are failed by the device, usually resulting in an error code of EIO
+("I/O error") or ENODATA ("No data available").  However, a write to
+the sector may succeed, and result in the sector becoming readable
+after the device controller no longer experiences errors reading the
+sector (or after a reallocation of the sector).  However, there may
+be bad sectors that occur on the device in the future, in a different,
+unpredictable location.
+
+This target seeks to provide a device that can exhibit the behavior
+of a bad sector at a known sector location, at a known time, based
+on a large storage device (at least tens of gigabytes, not occupying
+system memory).
diff --git a/Documentation/admin-guide/device-mapper/dm-flakey.rst b/Documentation/admin-guide/device-mapper/dm-flakey.rst
new file mode 100644
index 000000000000..86138735879d
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-flakey.rst
@@ -0,0 +1,74 @@
+=========
+dm-flakey
+=========
+
+This target is the same as the linear target except that it exhibits
+unreliable behaviour periodically.  It's been found useful in simulating
+failing devices for testing purposes.
+
+Starting from the time the table is loaded, the device is available for
+<up interval> seconds, then exhibits unreliable behaviour for <down
+interval> seconds, and then this cycle repeats.
+
+Also, consider using this in combination with the dm-delay target too,
+which can delay reads and writes and/or send them to different
+underlying devices.
+
+Table parameters
+----------------
+
+::
+
+  <dev path> <offset> <up interval> <down interval> \
+    [<num_features> [<feature arguments>]]
+
+Mandatory parameters:
+
+    <dev path>:
+        Full pathname to the underlying block-device, or a
+        "major:minor" device-number.
+    <offset>:
+        Starting sector within the device.
+    <up interval>:
+        Number of seconds device is available.
+    <down interval>:
+        Number of seconds device returns errors.
+
+Optional feature parameters:
+
+  If no feature parameters are present, during the periods of
+  unreliability, all I/O returns errors.
+
+  drop_writes:
+	All write I/O is silently ignored.
+	Read I/O is handled correctly.
+
+  error_writes:
+	All write I/O is failed with an error signalled.
+	Read I/O is handled correctly.
+
+  corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
+	During <down interval>, replace <Nth_byte> of the data of
+	each matching bio with <value>.
+
+    <Nth_byte>:
+	The offset of the byte to replace.
+	Counting starts at 1, to replace the first byte.
+    <direction>:
+	Either 'r' to corrupt reads or 'w' to corrupt writes.
+	'w' is incompatible with drop_writes.
+    <value>:
+	The value (from 0-255) to write.
+    <flags>:
+	Perform the replacement only if bio->bi_opf has all the
+	selected flags set.
+
+Examples:
+
+Replaces the 32nd byte of READ bios with the value 1::
+
+  corrupt_bio_byte 32 r 1 0
+
+Replaces the 224th byte of REQ_META (=32) bios with the value 0::
+
+  corrupt_bio_byte 224 w 0 32
diff --git a/Documentation/admin-guide/device-mapper/dm-init.rst b/Documentation/admin-guide/device-mapper/dm-init.rst
new file mode 100644
index 000000000000..e5242ff17e9b
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-init.rst
@@ -0,0 +1,125 @@
+================================
+Early creation of mapped devices
+================================
+
+It is possible to configure a device-mapper device to act as the root device for
+your system in two ways.
+
+The first is to build an initial ramdisk which boots to a minimal userspace
+which configures the device, then pivot_root(8) in to it.
+
+The second is to create one or more device-mappers using the module parameter
+"dm-mod.create=" through the kernel boot command line argument.
+
+The format is specified as a string of data separated by commas and optionally
+semi-colons, where:
+
+ - a comma is used to separate fields like name, uuid, flags and table
+   (specifies one device)
+ - a semi-colon is used to separate devices.
+
+So the format will look like this::
+
+ dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
+
+Where::
+
+	<name>		::= The device name.
+	<uuid>		::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | ""
+	<minor>		::= The device minor number | ""
+	<flags>		::= "ro" | "rw"
+	<table>		::= <start_sector> <num_sectors> <target_type> <target_args>
+	<target_type>	::= "verity" | "linear" | ... (see list below)
+
+The dm line should be equivalent to the one used by the dmsetup tool with the
+`--concise` argument.
+
+Target types
+============
+
+Not all target types are available as there are serious risks in allowing
+activation of certain DM targets without first using userspace tools to check
+the validity of associated metadata.
+
+======================= =======================================================
+`cache`			constrained, userspace should verify cache device
+`crypt`			allowed
+`delay`			allowed
+`era`			constrained, userspace should verify metadata device
+`flakey`		constrained, meant for test
+`linear`		allowed
+`log-writes`		constrained, userspace should verify metadata device
+`mirror`		constrained, userspace should verify main/mirror device
+`raid`			constrained, userspace should verify metadata device
+`snapshot`		constrained, userspace should verify src/dst device
+`snapshot-origin`	allowed
+`snapshot-merge`	constrained, userspace should verify src/dst device
+`striped`		allowed
+`switch`		constrained, userspace should verify dev path
+`thin`			constrained, requires dm target message from userspace
+`thin-pool`		constrained, requires dm target message from userspace
+`verity`		allowed
+`writecache`		constrained, userspace should verify cache device
+`zero`			constrained, not meant for rootfs
+======================= =======================================================
+
+If the target is not listed above, it is constrained by default (not tested).
+
+Examples
+========
+An example of booting to a linear array made up of user-mode linux block
+devices::
+
+  dm-mod.create="lroot,,,rw, 0 4096 linear 98:16 0, 4096 4096 linear 98:32 0" root=/dev/dm-0
+
+This will boot to a rw dm-linear target of 8192 sectors split across two block
+devices identified by their major:minor numbers.  After boot, udev will rename
+this target to /dev/mapper/lroot (depending on the rules). No uuid was assigned.
+
+An example of multiple device-mappers, with the dm-mod.create="..." contents
+is shown here split on multiple lines for readability::
+
+  dm-linear,,1,rw,
+    0 32768 linear 8:1 0,
+    32768 1024000 linear 8:2 0;
+  dm-verity,,3,ro,
+    0 1638400 verity 1 /dev/sdc1 /dev/sdc2 4096 4096 204800 1 sha256
+    ac87db56303c9c1da433d7209b5a6ef3e4779df141200cbd7c157dcb8dd89c42
+    5ebfe87f7df3235b80a117ebc4078e44f55045487ad4a96581d1adb564615b51
+
+Other examples (per target):
+
+"crypt"::
+
+  dm-crypt,,8,ro,
+    0 1048576 crypt aes-xts-plain64
+    babebabebabebabebabebabebabebabebabebabebabebabebabebabebabebabe 0
+    /dev/sda 0 1 allow_discards
+
+"delay"::
+
+  dm-delay,,4,ro,0 409600 delay /dev/sda1 0 500
+
+"linear"::
+
+  dm-linear,,,rw,
+    0 32768 linear /dev/sda1 0,
+    32768 1024000 linear /dev/sda2 0,
+    1056768 204800 linear /dev/sda3 0,
+    1261568 512000 linear /dev/sda4 0
+
+"snapshot-origin"::
+
+  dm-snap-orig,,4,ro,0 409600 snapshot-origin 8:2
+
+"striped"::
+
+  dm-striped,,4,ro,0 1638400 striped 4 4096
+  /dev/sda1 0 /dev/sda2 0 /dev/sda3 0 /dev/sda4 0
+
+"verity"::
+
+  dm-verity,,4,ro,
+    0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256
+    fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd
+    51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584
diff --git a/Documentation/admin-guide/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst
new file mode 100644
index 000000000000..a30aa91b5fbe
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst
@@ -0,0 +1,259 @@
+============
+dm-integrity
+============
+
+The dm-integrity target emulates a block device that has additional
+per-sector tags that can be used for storing integrity information.
+
+A general problem with storing integrity tags with every sector is that
+writing the sector and the integrity tag must be atomic - i.e. in case of
+crash, either both sector and integrity tag or none of them is written.
+
+To guarantee write atomicity, the dm-integrity target uses journal, it
+writes sector data and integrity tags into a journal, commits the journal
+and then copies the data and integrity tags to their respective location.
+
+The dm-integrity target can be used with the dm-crypt target - in this
+situation the dm-crypt target creates the integrity data and passes them
+to the dm-integrity target via bio_integrity_payload attached to the bio.
+In this mode, the dm-crypt and dm-integrity targets provide authenticated
+disk encryption - if the attacker modifies the encrypted device, an I/O
+error is returned instead of random data.
+
+The dm-integrity target can also be used as a standalone target, in this
+mode it calculates and verifies the integrity tag internally. In this
+mode, the dm-integrity target can be used to detect silent data
+corruption on the disk or in the I/O path.
+
+There's an alternate mode of operation where dm-integrity uses bitmap
+instead of a journal. If a bit in the bitmap is 1, the corresponding
+region's data and integrity tags are not synchronized - if the machine
+crashes, the unsynchronized regions will be recalculated. The bitmap mode
+is faster than the journal mode, because we don't have to write the data
+twice, but it is also less reliable, because if data corruption happens
+when the machine crashes, it may not be detected.
+
+When loading the target for the first time, the kernel driver will format
+the device. But it will only format the device if the superblock contains
+zeroes. If the superblock is neither valid nor zeroed, the dm-integrity
+target can't be loaded.
+
+To use the target for the first time:
+
+1. overwrite the superblock with zeroes
+2. load the dm-integrity target with one-sector size, the kernel driver
+   will format the device
+3. unload the dm-integrity target
+4. read the "provided_data_sectors" value from the superblock
+5. load the dm-integrity target with the the target size
+   "provided_data_sectors"
+6. if you want to use dm-integrity with dm-crypt, load the dm-crypt target
+   with the size "provided_data_sectors"
+
+
+Target arguments:
+
+1. the underlying block device
+
+2. the number of reserved sector at the beginning of the device - the
+   dm-integrity won't read of write these sectors
+
+3. the size of the integrity tag (if "-" is used, the size is taken from
+   the internal-hash algorithm)
+
+4. mode:
+
+	D - direct writes (without journal)
+		in this mode, journaling is
+		not used and data sectors and integrity tags are written
+		separately. In case of crash, it is possible that the data
+		and integrity tag doesn't match.
+	J - journaled writes
+		data and integrity tags are written to the
+		journal and atomicity is guaranteed. In case of crash,
+		either both data and tag or none of them are written. The
+		journaled mode degrades write throughput twice because the
+		data have to be written twice.
+	B - bitmap mode - data and metadata are written without any
+		synchronization, the driver maintains a bitmap of dirty
+		regions where data and metadata don't match. This mode can
+		only be used with internal hash.
+	R - recovery mode - in this mode, journal is not replayed,
+		checksums are not checked and writes to the device are not
+		allowed. This mode is useful for data recovery if the
+		device cannot be activated in any of the other standard
+		modes.
+
+5. the number of additional arguments
+
+Additional arguments:
+
+journal_sectors:number
+	The size of journal, this argument is used only if formatting the
+	device. If the device is already formatted, the value from the
+	superblock is used.
+
+interleave_sectors:number
+	The number of interleaved sectors. This values is rounded down to
+	a power of two. If the device is already formatted, the value from
+	the superblock is used.
+
+meta_device:device
+	Don't interleave the data and metadata on on device. Use a
+	separate device for metadata.
+
+buffer_sectors:number
+	The number of sectors in one buffer. The value is rounded down to
+	a power of two.
+
+	The tag area is accessed using buffers, the buffer size is
+	configurable. The large buffer size means that the I/O size will
+	be larger, but there could be less I/Os issued.
+
+journal_watermark:number
+	The journal watermark in percents. When the size of the journal
+	exceeds this watermark, the thread that flushes the journal will
+	be started.
+
+commit_time:number
+	Commit time in milliseconds. When this time passes, the journal is
+	written. The journal is also written immediatelly if the FLUSH
+	request is received.
+
+internal_hash:algorithm(:key)	(the key is optional)
+	Use internal hash or crc.
+	When this argument is used, the dm-integrity target won't accept
+	integrity tags from the upper target, but it will automatically
+	generate and verify the integrity tags.
+
+	You can use a crc algorithm (such as crc32), then integrity target
+	will protect the data against accidental corruption.
+	You can also use a hmac algorithm (for example
+	"hmac(sha256):0123456789abcdef"), in this mode it will provide
+	cryptographic authentication of the data without encryption.
+
+	When this argument is not used, the integrity tags are accepted
+	from an upper layer target, such as dm-crypt. The upper layer
+	target should check the validity of the integrity tags.
+
+recalculate
+	Recalculate the integrity tags automatically. It is only valid
+	when using internal hash.
+
+journal_crypt:algorithm(:key)	(the key is optional)
+	Encrypt the journal using given algorithm to make sure that the
+	attacker can't read the journal. You can use a block cipher here
+	(such as "cbc(aes)") or a stream cipher (for example "chacha20",
+	"salsa20", "ctr(aes)" or "ecb(arc4)").
+
+	The journal contains history of last writes to the block device,
+	an attacker reading the journal could see the last sector nubmers
+	that were written. From the sector numbers, the attacker can infer
+	the size of files that were written. To protect against this
+	situation, you can encrypt the journal.
+
+journal_mac:algorithm(:key)	(the key is optional)
+	Protect sector numbers in the journal from accidental or malicious
+	modification. To protect against accidental modification, use a
+	crc algorithm, to protect against malicious modification, use a
+	hmac algorithm with a key.
+
+	This option is not needed when using internal-hash because in this
+	mode, the integrity of journal entries is checked when replaying
+	the journal. Thus, modified sector number would be detected at
+	this stage.
+
+block_size:number
+	The size of a data block in bytes.  The larger the block size the
+	less overhead there is for per-block integrity metadata.
+	Supported values are 512, 1024, 2048 and 4096 bytes.  If not
+	specified the default block size is 512 bytes.
+
+sectors_per_bit:number
+	In the bitmap mode, this parameter specifies the number of
+	512-byte sectors that corresponds to one bitmap bit.
+
+bitmap_flush_interval:number
+	The bitmap flush interval in milliseconds. The metadata buffers
+	are synchronized when this interval expires.
+
+
+The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
+be changed when reloading the target (load an inactive table and swap the
+tables with suspend and resume). The other arguments should not be changed
+when reloading the target because the layout of disk data depend on them
+and the reloaded target would be non-functional.
+
+
+The layout of the formatted block device:
+
+* reserved sectors
+    (they are not used by this target, they can be used for
+    storing LUKS metadata or for other purpose), the size of the reserved
+    area is specified in the target arguments
+
+* superblock (4kiB)
+	* magic string - identifies that the device was formatted
+	* version
+	* log2(interleave sectors)
+	* integrity tag size
+	* the number of journal sections
+	* provided data sectors - the number of sectors that this target
+	  provides (i.e. the size of the device minus the size of all
+	  metadata and padding). The user of this target should not send
+	  bios that access data beyond the "provided data sectors" limit.
+	* flags
+	    SB_FLAG_HAVE_JOURNAL_MAC
+		- a flag is set if journal_mac is used
+	    SB_FLAG_RECALCULATING
+		- recalculating is in progress
+	    SB_FLAG_DIRTY_BITMAP
+		- journal area contains the bitmap of dirty
+		  blocks
+	* log2(sectors per block)
+	* a position where recalculating finished
+* journal
+	The journal is divided into sections, each section contains:
+
+	* metadata area (4kiB), it contains journal entries
+
+	  - every journal entry contains:
+
+		* logical sector (specifies where the data and tag should
+		  be written)
+		* last 8 bytes of data
+		* integrity tag (the size is specified in the superblock)
+
+	  - every metadata sector ends with
+
+		* mac (8-bytes), all the macs in 8 metadata sectors form a
+		  64-byte value. It is used to store hmac of sector
+		  numbers in the journal section, to protect against a
+		  possibility that the attacker tampers with sector
+		  numbers in the journal.
+		* commit id
+
+	* data area (the size is variable; it depends on how many journal
+	  entries fit into the metadata area)
+
+	    - every sector in the data area contains:
+
+		* data (504 bytes of data, the last 8 bytes are stored in
+		  the journal entry)
+		* commit id
+
+	To test if the whole journal section was written correctly, every
+	512-byte sector of the journal ends with 8-byte commit id. If the
+	commit id matches on all sectors in a journal section, then it is
+	assumed that the section was written correctly. If the commit id
+	doesn't match, the section was written partially and it should not
+	be replayed.
+
+* one or more runs of interleaved tags and data.
+    Each run contains:
+
+	* tag area - it contains integrity tags. There is one tag for each
+	  sector in the data area
+	* data area - it contains data sectors. The number of data sectors
+	  in one run must be a power of two. log2 of this value is stored
+	  in the superblock.
diff --git a/Documentation/admin-guide/device-mapper/dm-io.rst b/Documentation/admin-guide/device-mapper/dm-io.rst
new file mode 100644
index 000000000000..d2492917a1f5
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-io.rst
@@ -0,0 +1,75 @@
+=====
+dm-io
+=====
+
+Dm-io provides synchronous and asynchronous I/O services. There are three
+types of I/O services available, and each type has a sync and an async
+version.
+
+The user must set up an io_region structure to describe the desired location
+of the I/O. Each io_region indicates a block-device along with the starting
+sector and size of the region::
+
+   struct io_region {
+      struct block_device *bdev;
+      sector_t sector;
+      sector_t count;
+   };
+
+Dm-io can read from one io_region or write to one or more io_regions. Writes
+to multiple regions are specified by an array of io_region structures.
+
+The first I/O service type takes a list of memory pages as the data buffer for
+the I/O, along with an offset into the first page::
+
+   struct page_list {
+      struct page_list *next;
+      struct page *page;
+   };
+
+   int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
+                  struct page_list *pl, unsigned int offset,
+                  unsigned long *error_bits);
+   int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
+                   struct page_list *pl, unsigned int offset,
+                   io_notify_fn fn, void *context);
+
+The second I/O service type takes an array of bio vectors as the data buffer
+for the I/O. This service can be handy if the caller has a pre-assembled bio,
+but wants to direct different portions of the bio to different devices::
+
+   int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where,
+                       int rw, struct bio_vec *bvec,
+                       unsigned long *error_bits);
+   int dm_io_async_bvec(unsigned int num_regions, struct io_region *where,
+                        int rw, struct bio_vec *bvec,
+                        io_notify_fn fn, void *context);
+
+The third I/O service type takes a pointer to a vmalloc'd memory buffer as the
+data buffer for the I/O. This service can be handy if the caller needs to do
+I/O to a large region but doesn't want to allocate a large number of individual
+memory pages::
+
+   int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
+                     void *data, unsigned long *error_bits);
+   int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
+                      void *data, io_notify_fn fn, void *context);
+
+Callers of the asynchronous I/O services must include the name of a completion
+callback routine and a pointer to some context data for the I/O::
+
+   typedef void (*io_notify_fn)(unsigned long error, void *context);
+
+The "error" parameter in this callback, as well as the `*error` parameter in
+all of the synchronous versions, is a bitset (instead of a simple error value).
+In the case of an write-I/O to multiple regions, this bitset allows dm-io to
+indicate success or failure on each individual region.
+
+Before using any of the dm-io services, the user should call dm_io_get()
+and specify the number of pages they expect to perform I/O on concurrently.
+Dm-io will attempt to resize its mempool to make sure enough pages are
+always available in order to avoid unnecessary waiting while performing I/O.
+
+When the user is finished using the dm-io services, they should call
+dm_io_put() and specify the same number of pages that were given on the
+dm_io_get() call.
diff --git a/Documentation/admin-guide/device-mapper/dm-log.rst b/Documentation/admin-guide/device-mapper/dm-log.rst
new file mode 100644
index 000000000000..ba4fce39bc27
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-log.rst
@@ -0,0 +1,57 @@
+=====================
+Device-Mapper Logging
+=====================
+The device-mapper logging code is used by some of the device-mapper
+RAID targets to track regions of the disk that are not consistent.
+A region (or portion of the address space) of the disk may be
+inconsistent because a RAID stripe is currently being operated on or
+a machine died while the region was being altered.  In the case of
+mirrors, a region would be considered dirty/inconsistent while you
+are writing to it because the writes need to be replicated for all
+the legs of the mirror and may not reach the legs at the same time.
+Once all writes are complete, the region is considered clean again.
+
+There is a generic logging interface that the device-mapper RAID
+implementations use to perform logging operations (see
+dm_dirty_log_type in include/linux/dm-dirty-log.h).  Various different
+logging implementations are available and provide different
+capabilities.  The list includes:
+
+==============	==============================================================
+Type		Files
+==============	==============================================================
+disk		drivers/md/dm-log.c
+core		drivers/md/dm-log.c
+userspace	drivers/md/dm-log-userspace* include/linux/dm-log-userspace.h
+==============	==============================================================
+
+The "disk" log type
+-------------------
+This log implementation commits the log state to disk.  This way, the
+logging state survives reboots/crashes.
+
+The "core" log type
+-------------------
+This log implementation keeps the log state in memory.  The log state
+will not survive a reboot or crash, but there may be a small boost in
+performance.  This method can also be used if no storage device is
+available for storing log state.
+
+The "userspace" log type
+------------------------
+This log type simply provides a way to export the log API to userspace,
+so log implementations can be done there.  This is done by forwarding most
+logging requests to userspace, where a daemon receives and processes the
+request.
+
+The structure used for communication between kernel and userspace are
+located in include/linux/dm-log-userspace.h.  Due to the frequency,
+diversity, and 2-way communication nature of the exchanges between
+kernel and userspace, 'connector' is used as the interface for
+communication.
+
+There are currently two userspace log implementations that leverage this
+framework - "clustered-disk" and "clustered-core".  These implementations
+provide a cluster-coherent log for shared-storage.  Device-mapper mirroring
+can be used in a shared-storage environment when the cluster log implementations
+are employed.
diff --git a/Documentation/admin-guide/device-mapper/dm-queue-length.rst b/Documentation/admin-guide/device-mapper/dm-queue-length.rst
new file mode 100644
index 000000000000..d8e381c1cb02
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-queue-length.rst
@@ -0,0 +1,48 @@
+===============
+dm-queue-length
+===============
+
+dm-queue-length is a path selector module for device-mapper targets,
+which selects a path with the least number of in-flight I/Os.
+The path selector name is 'queue-length'.
+
+Table parameters for each path: [<repeat_count>]
+
+::
+
+	<repeat_count>: The number of I/Os to dispatch using the selected
+			path before switching to the next path.
+			If not given, internal default is used. To check
+			the default value, see the activated table.
+
+Status for each path: <status> <fail-count> <in-flight>
+
+::
+
+	<status>: 'A' if the path is active, 'F' if the path is failed.
+	<fail-count>: The number of path failures.
+	<in-flight>: The number of in-flight I/Os on the path.
+
+
+Algorithm
+=========
+
+dm-queue-length increments/decrements 'in-flight' when an I/O is
+dispatched/completed respectively.
+dm-queue-length selects a path with the minimum 'in-flight'.
+
+
+Examples
+========
+In case that 2 paths (sda and sdb) are used with repeat_count == 128.
+
+::
+
+  # echo "0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128" \
+    dmsetup create test
+  #
+  # dmsetup table
+  test: 0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128
+  #
+  # dmsetup status
+  test: 0 10 multipath 2 0 0 0 1 1 E 0 2 1 8:0 A 0 0 8:16 A 0 0
diff --git a/Documentation/admin-guide/device-mapper/dm-raid.rst b/Documentation/admin-guide/device-mapper/dm-raid.rst
new file mode 100644
index 000000000000..2fe255b130fb
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-raid.rst
@@ -0,0 +1,419 @@
+=======
+dm-raid
+=======
+
+The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
+It allows the MD RAID drivers to be accessed using a device-mapper
+interface.
+
+
+Mapping Table Interface
+-----------------------
+The target is named "raid" and it accepts the following parameters::
+
+  <raid_type> <#raid_params> <raid_params> \
+    <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
+
+<raid_type>:
+
+  ============= ===============================================================
+  raid0		RAID0 striping (no resilience)
+  raid1		RAID1 mirroring
+  raid4		RAID4 with dedicated last parity disk
+  raid5_n 	RAID5 with dedicated last parity disk supporting takeover
+		Same as raid4
+
+		- Transitory layout
+  raid5_la	RAID5 left asymmetric
+
+		- rotating parity 0 with data continuation
+  raid5_ra	RAID5 right asymmetric
+
+		- rotating parity N with data continuation
+  raid5_ls	RAID5 left symmetric
+
+		- rotating parity 0 with data restart
+  raid5_rs 	RAID5 right symmetric
+
+		- rotating parity N with data restart
+  raid6_zr	RAID6 zero restart
+
+		- rotating parity zero (left-to-right) with data restart
+  raid6_nr	RAID6 N restart
+
+		- rotating parity N (right-to-left) with data restart
+  raid6_nc	RAID6 N continue
+
+		- rotating parity N (right-to-left) with data continuation
+  raid6_n_6	RAID6 with dedicate parity disks
+
+		- parity and Q-syndrome on the last 2 disks;
+		  layout for takeover from/to raid4/raid5_n
+  raid6_la_6	Same as "raid_la" plus dedicated last Q-syndrome disk
+
+		- layout for takeover from raid5_la from/to raid6
+  raid6_ra_6	Same as "raid5_ra" dedicated last Q-syndrome disk
+
+		- layout for takeover from raid5_ra from/to raid6
+  raid6_ls_6	Same as "raid5_ls" dedicated last Q-syndrome disk
+
+		- layout for takeover from raid5_ls from/to raid6
+  raid6_rs_6	Same as "raid5_rs" dedicated last Q-syndrome disk
+
+		- layout for takeover from raid5_rs from/to raid6
+  raid10        Various RAID10 inspired algorithms chosen by additional params
+		(see raid10_format and raid10_copies below)
+
+		- RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
+		- RAID1E: Integrated Adjacent Stripe Mirroring
+		- RAID1E: Integrated Offset Stripe Mirroring
+		- and other similar RAID10 variants
+  ============= ===============================================================
+
+  Reference: Chapter 4 of
+  http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
+
+<#raid_params>: The number of parameters that follow.
+
+<raid_params> consists of
+
+    Mandatory parameters:
+        <chunk_size>:
+		      Chunk size in sectors.  This parameter is often known as
+		      "stripe size".  It is the only mandatory parameter and
+		      is placed first.
+
+    followed by optional parameters (in any order):
+	[sync|nosync]
+		Force or prevent RAID initialization.
+
+	[rebuild <idx>]
+		Rebuild drive number 'idx' (first drive is 0).
+
+	[daemon_sleep <ms>]
+		Interval between runs of the bitmap daemon that
+		clear bits.  A longer interval means less bitmap I/O but
+		resyncing after a failure is likely to take longer.
+
+	[min_recovery_rate <kB/sec/disk>]
+		Throttle RAID initialization
+	[max_recovery_rate <kB/sec/disk>]
+		Throttle RAID initialization
+	[write_mostly <idx>]
+		Mark drive index 'idx' write-mostly.
+	[max_write_behind <sectors>]
+		See '--write-behind=' (man mdadm)
+	[stripe_cache <sectors>]
+		Stripe cache size (RAID 4/5/6 only)
+	[region_size <sectors>]
+		The region_size multiplied by the number of regions is the
+		logical size of the array.  The bitmap records the device
+		synchronisation state for each region.
+
+        [raid10_copies   <# copies>], [raid10_format   <near|far|offset>]
+		These two options are used to alter the default layout of
+		a RAID10 configuration.  The number of copies is can be
+		specified, but the default is 2.  There are also three
+		variations to how the copies are laid down - the default
+		is "near".  Near copies are what most people think of with
+		respect to mirroring.  If these options are left unspecified,
+		or 'raid10_copies 2' and/or 'raid10_format near' are given,
+		then the layouts for 2, 3 and 4 devices	are:
+
+		========	 ==========	   ==============
+		2 drives         3 drives          4 drives
+		========	 ==========	   ==============
+		A1  A1           A1  A1  A2        A1  A1  A2  A2
+		A2  A2           A2  A3  A3        A3  A3  A4  A4
+		A3  A3           A4  A4  A5        A5  A5  A6  A6
+		A4  A4           A5  A6  A6        A7  A7  A8  A8
+		..  ..           ..  ..  ..        ..  ..  ..  ..
+		========	 ==========	   ==============
+
+		The 2-device layout is equivalent 2-way RAID1.  The 4-device
+		layout is what a traditional RAID10 would look like.  The
+		3-device layout is what might be called a 'RAID1E - Integrated
+		Adjacent Stripe Mirroring'.
+
+		If 'raid10_copies 2' and 'raid10_format far', then the layouts
+		for 2, 3 and 4 devices are:
+
+		========	     ============	  ===================
+		2 drives             3 drives             4 drives
+		========	     ============	  ===================
+		A1  A2               A1   A2   A3         A1   A2   A3   A4
+		A3  A4               A4   A5   A6         A5   A6   A7   A8
+		A5  A6               A7   A8   A9         A9   A10  A11  A12
+		..  ..               ..   ..   ..         ..   ..   ..   ..
+		A2  A1               A3   A1   A2         A2   A1   A4   A3
+		A4  A3               A6   A4   A5         A6   A5   A8   A7
+		A6  A5               A9   A7   A8         A10  A9   A12  A11
+		..  ..               ..   ..   ..         ..   ..   ..   ..
+		========	     ============	  ===================
+
+		If 'raid10_copies 2' and 'raid10_format offset', then the
+		layouts for 2, 3 and 4 devices are:
+
+		========       ==========         ================
+		2 drives       3 drives           4 drives
+		========       ==========         ================
+		A1  A2         A1  A2  A3         A1  A2  A3  A4
+		A2  A1         A3  A1  A2         A2  A1  A4  A3
+		A3  A4         A4  A5  A6         A5  A6  A7  A8
+		A4  A3         A6  A4  A5         A6  A5  A8  A7
+		A5  A6         A7  A8  A9         A9  A10 A11 A12
+		A6  A5         A9  A7  A8         A10 A9  A12 A11
+		..  ..         ..  ..  ..         ..  ..  ..  ..
+		========       ==========         ================
+
+		Here we see layouts closely akin to 'RAID1E - Integrated
+		Offset Stripe Mirroring'.
+
+        [delta_disks <N>]
+		The delta_disks option value (-251 < N < +251) triggers
+		device removal (negative value) or device addition (positive
+		value) to any reshape supporting raid levels 4/5/6 and 10.
+		RAID levels 4/5/6 allow for addition of devices (metadata
+		and data device tuple), raid10_near and raid10_offset only
+		allow for device addition. raid10_far does not support any
+		reshaping at all.
+		A minimum of devices have to be kept to enforce resilience,
+		which is 3 devices for raid4/5 and 4 devices for raid6.
+
+        [data_offset <sectors>]
+		This option value defines the offset into each data device
+		where the data starts. This is used to provide out-of-place
+		reshaping space to avoid writing over data while
+		changing the layout of stripes, hence an interruption/crash
+		may happen at any time without the risk of losing data.
+		E.g. when adding devices to an existing raid set during
+		forward reshaping, the out-of-place space will be allocated
+		at the beginning of each raid device. The kernel raid4/5/6/10
+		MD personalities supporting such device addition will read the data from
+		the existing first stripes (those with smaller number of stripes)
+		starting at data_offset to fill up a new stripe with the larger
+		number of stripes, calculate the redundancy blocks (CRC/Q-syndrome)
+		and write that new stripe to offset 0. Same will be applied to all
+		N-1 other new stripes. This out-of-place scheme is used to change
+		the RAID type (i.e. the allocation algorithm) as well, e.g.
+		changing from raid5_ls to raid5_n.
+
+	[journal_dev <dev>]
+		This option adds a journal device to raid4/5/6 raid sets and
+		uses it to close the 'write hole' caused by the non-atomic updates
+		to the component devices which can cause data loss during recovery.
+		The journal device is used as writethrough thus causing writes to
+		be throttled versus non-journaled raid4/5/6 sets.
+		Takeover/reshape is not possible with a raid4/5/6 journal device;
+		it has to be deconfigured before requesting these.
+
+	[journal_mode <mode>]
+		This option sets the caching mode on journaled raid4/5/6 raid sets
+		(see 'journal_dev <dev>' above) to 'writethrough' or 'writeback'.
+		If 'writeback' is selected the journal device has to be resilient
+		and must not suffer from the 'write hole' problem itself (e.g. use
+		raid1 or raid10) to avoid a single point of failure.
+
+<#raid_devs>: The number of devices composing the array.
+	Each device consists of two entries.  The first is the device
+	containing the metadata (if any); the second is the one containing the
+	data. A Maximum of 64 metadata/data device entries are supported
+	up to target version 1.8.0.
+	1.9.0 supports up to 253 which is enforced by the used MD kernel runtime.
+
+	If a drive has failed or is missing at creation time, a '-' can be
+	given for both the metadata and data drives for a given position.
+
+
+Example Tables
+--------------
+
+::
+
+  # RAID4 - 4 data drives, 1 parity (no metadata devices)
+  # No metadata devices specified to hold superblock/bitmap info
+  # Chunk size of 1MiB
+  # (Lines separated for easy reading)
+
+  0 1960893648 raid \
+          raid4 1 2048 \
+          5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
+
+  # RAID4 - 4 data drives, 1 parity (with metadata devices)
+  # Chunk size of 1MiB, force RAID initialization,
+  #       min recovery rate at 20 kiB/sec/disk
+
+  0 1960893648 raid \
+          raid4 4 2048 sync min_recovery_rate 20 \
+          5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
+
+
+Status Output
+-------------
+'dmsetup table' displays the table used to construct the mapping.
+The optional parameters are always printed in the order listed
+above with "sync" or "nosync" always output ahead of the other
+arguments, regardless of the order used when originally loading the table.
+Arguments that can be repeated are ordered by value.
+
+
+'dmsetup status' yields information on the state and health of the array.
+The output is as follows (normally a single line, but expanded here for
+clarity)::
+
+  1: <s> <l> raid \
+  2:      <raid_type> <#devices> <health_chars> \
+  3:      <sync_ratio> <sync_action> <mismatch_cnt>
+
+Line 1 is the standard output produced by device-mapper.
+
+Line 2 & 3 are produced by the raid target and are best explained by example::
+
+        0 1960893648 raid raid4 5 AAAAA 2/490221568 init 0
+
+Here we can see the RAID type is raid4, there are 5 devices - all of
+which are 'A'live, and the array is 2/490221568 complete with its initial
+recovery.  Here is a fuller description of the individual fields:
+
+	=============== =========================================================
+	<raid_type>     Same as the <raid_type> used to create the array.
+	<health_chars>  One char for each device, indicating:
+
+			- 'A' = alive and in-sync
+			- 'a' = alive but not in-sync
+			- 'D' = dead/failed.
+	<sync_ratio>    The ratio indicating how much of the array has undergone
+			the process described by 'sync_action'.  If the
+			'sync_action' is "check" or "repair", then the process
+			of "resync" or "recover" can be considered complete.
+	<sync_action>   One of the following possible states:
+
+			idle
+				- No synchronization action is being performed.
+			frozen
+				- The current action has been halted.
+			resync
+				- Array is undergoing its initial synchronization
+				  or is resynchronizing after an unclean shutdown
+				  (possibly aided by a bitmap).
+			recover
+				- A device in the array is being rebuilt or
+				  replaced.
+			check
+				- A user-initiated full check of the array is
+				  being performed.  All blocks are read and
+				  checked for consistency.  The number of
+				  discrepancies found are recorded in
+				  <mismatch_cnt>.  No changes are made to the
+				  array by this action.
+			repair
+				- The same as "check", but discrepancies are
+				  corrected.
+			reshape
+				- The array is undergoing a reshape.
+	<mismatch_cnt>  The number of discrepancies found between mirror copies
+			in RAID1/10 or wrong parity values found in RAID4/5/6.
+			This value is valid only after a "check" of the array
+			is performed.  A healthy array has a 'mismatch_cnt' of 0.
+	<data_offset>   The current data offset to the start of the user data on
+			each component device of a raid set (see the respective
+			raid parameter to support out-of-place reshaping).
+	<journal_char>	- 'A' - active write-through journal device.
+			- 'a' - active write-back journal device.
+			- 'D' - dead journal device.
+			- '-' - no journal device.
+	=============== =========================================================
+
+
+Message Interface
+-----------------
+The dm-raid target will accept certain actions through the 'message' interface.
+('man dmsetup' for more information on the message interface.)  These actions
+include:
+
+	========= ================================================
+	"idle"    Halt the current sync action.
+	"frozen"  Freeze the current sync action.
+	"resync"  Initiate/continue a resync.
+	"recover" Initiate/continue a recover process.
+	"check"   Initiate a check (i.e. a "scrub") of the array.
+	"repair"  Initiate a repair of the array.
+	========= ================================================
+
+
+Discard Support
+---------------
+The implementation of discard support among hardware vendors varies.
+When a block is discarded, some storage devices will return zeroes when
+the block is read.  These devices set the 'discard_zeroes_data'
+attribute.  Other devices will return random data.  Confusingly, some
+devices that advertise 'discard_zeroes_data' will not reliably return
+zeroes when discarded blocks are read!  Since RAID 4/5/6 uses blocks
+from a number of devices to calculate parity blocks and (for performance
+reasons) relies on 'discard_zeroes_data' being reliable, it is important
+that the devices be consistent.  Blocks may be discarded in the middle
+of a RAID 4/5/6 stripe and if subsequent read results are not
+consistent, the parity blocks may be calculated differently at any time;
+making the parity blocks useless for redundancy.  It is important to
+understand how your hardware behaves with discards if you are going to
+enable discards with RAID 4/5/6.
+
+Since the behavior of storage devices is unreliable in this respect,
+even when reporting 'discard_zeroes_data', by default RAID 4/5/6
+discard support is disabled -- this ensures data integrity at the
+expense of losing some performance.
+
+Storage devices that properly support 'discard_zeroes_data' are
+increasingly whitelisted in the kernel and can thus be trusted.
+
+For trusted devices, the following dm-raid module parameter can be set
+to safely enable discard support for RAID 4/5/6:
+
+    'devices_handle_discards_safely'
+
+
+Version History
+---------------
+
+::
+
+ 1.0.0	Initial version.  Support for RAID 4/5/6
+ 1.1.0	Added support for RAID 1
+ 1.2.0	Handle creation of arrays that contain failed devices.
+ 1.3.0	Added support for RAID 10
+ 1.3.1	Allow device replacement/rebuild for RAID 10
+ 1.3.2	Fix/improve redundancy checking for RAID10
+ 1.4.0	Non-functional change.  Removes arg from mapping function.
+ 1.4.1	RAID10 fix redundancy validation checks (commit 55ebbb5).
+ 1.4.2	Add RAID10 "far" and "offset" algorithm support.
+ 1.5.0	Add message interface to allow manipulation of the sync_action.
+	New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
+ 1.5.1	Add ability to restore transiently failed devices on resume.
+ 1.5.2	'mismatch_cnt' is zero unless [last_]sync_action is "check".
+ 1.6.0	Add discard support (and devices_handle_discard_safely module param).
+ 1.7.0	Add support for MD RAID0 mappings.
+ 1.8.0	Explicitly check for compatible flags in the superblock metadata
+	and reject to start the raid set if any are set by a newer
+	target version, thus avoiding data corruption on a raid set
+	with a reshape in progress.
+ 1.9.0	Add support for RAID level takeover/reshape/region size
+	and set size reduction.
+ 1.9.1	Fix activation of existing RAID 4/10 mapped devices
+ 1.9.2	Don't emit '- -' on the status table line in case the constructor
+	fails reading a superblock. Correctly emit 'maj:min1 maj:min2' and
+	'D' on the status line.  If '- -' is passed into the constructor, emit
+	'- -' on the table line and '-' as the status line health character.
+ 1.10.0	Add support for raid4/5/6 journal device
+ 1.10.1	Fix data corruption on reshape request
+ 1.11.0	Fix table line argument order
+	(wrong raid10_copies/raid10_format sequence)
+ 1.11.1	Add raid4/5/6 journal write-back support via journal_mode option
+ 1.12.1	Fix for MD deadlock between mddev_suspend() and md_write_start() available
+ 1.13.0	Fix dev_health status at end of "recover" (was 'a', now 'A')
+ 1.13.1	Fix deadlock caused by early md_stop_writes().  Also fix size an
+	state races.
+ 1.13.2	Fix raid redundancy validation and avoid keeping raid set frozen
+ 1.14.0	Fix reshape race on small devices.  Fix stripe adding reshape
+	deadlock/potential data corruption.  Update superblock when
+	specific devices are requested via rebuild.  Fix RAID leg
+	rebuild errors.
diff --git a/Documentation/admin-guide/device-mapper/dm-service-time.rst b/Documentation/admin-guide/device-mapper/dm-service-time.rst
new file mode 100644
index 000000000000..facf277fc13c
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-service-time.rst
@@ -0,0 +1,101 @@
+===============
+dm-service-time
+===============
+
+dm-service-time is a path selector module for device-mapper targets,
+which selects a path with the shortest estimated service time for
+the incoming I/O.
+
+The service time for each path is estimated by dividing the total size
+of in-flight I/Os on a path with the performance value of the path.
+The performance value is a relative throughput value among all paths
+in a path-group, and it can be specified as a table argument.
+
+The path selector name is 'service-time'.
+
+Table parameters for each path:
+
+    [<repeat_count> [<relative_throughput>]]
+	<repeat_count>:
+			The number of I/Os to dispatch using the selected
+			path before switching to the next path.
+			If not given, internal default is used.  To check
+			the default value, see the activated table.
+	<relative_throughput>:
+			The relative throughput value of the path
+			among all paths in the path-group.
+			The valid range is 0-100.
+			If not given, minimum value '1' is used.
+			If '0' is given, the path isn't selected while
+			other paths having a positive value are available.
+
+Status for each path:
+
+    <status> <fail-count> <in-flight-size> <relative_throughput>
+	<status>:
+		'A' if the path is active, 'F' if the path is failed.
+	<fail-count>:
+		The number of path failures.
+	<in-flight-size>:
+		The size of in-flight I/Os on the path.
+	<relative_throughput>:
+		The relative throughput value of the path
+		among all paths in the path-group.
+
+
+Algorithm
+=========
+
+dm-service-time adds the I/O size to 'in-flight-size' when the I/O is
+dispatched and subtracts when completed.
+Basically, dm-service-time selects a path having minimum service time
+which is calculated by::
+
+	('in-flight-size' + 'size-of-incoming-io') / 'relative_throughput'
+
+However, some optimizations below are used to reduce the calculation
+as much as possible.
+
+	1. If the paths have the same 'relative_throughput', skip
+	   the division and just compare the 'in-flight-size'.
+
+	2. If the paths have the same 'in-flight-size', skip the division
+	   and just compare the 'relative_throughput'.
+
+	3. If some paths have non-zero 'relative_throughput' and others
+	   have zero 'relative_throughput', ignore those paths with zero
+	   'relative_throughput'.
+
+If such optimizations can't be applied, calculate service time, and
+compare service time.
+If calculated service time is equal, the path having maximum
+'relative_throughput' may be better.  So compare 'relative_throughput'
+then.
+
+
+Examples
+========
+In case that 2 paths (sda and sdb) are used with repeat_count == 128
+and sda has an average throughput 1GB/s and sdb has 4GB/s,
+'relative_throughput' value may be '1' for sda and '4' for sdb::
+
+  # echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4" \
+    dmsetup create test
+  #
+  # dmsetup table
+  test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4
+  #
+  # dmsetup status
+  test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 1 8:16 A 0 0 4
+
+
+Or '2' for sda and '8' for sdb would be also true::
+
+  # echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8" \
+    dmsetup create test
+  #
+  # dmsetup table
+  test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8
+  #
+  # dmsetup status
+  test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 2 8:16 A 0 0 8
diff --git a/Documentation/admin-guide/device-mapper/dm-uevent.rst b/Documentation/admin-guide/device-mapper/dm-uevent.rst
new file mode 100644
index 000000000000..4a8ee8d069c9
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-uevent.rst
@@ -0,0 +1,110 @@
+====================
+device-mapper uevent
+====================
+
+The device-mapper uevent code adds the capability to device-mapper to create
+and send kobject uevents (uevents).  Previously device-mapper events were only
+available through the ioctl interface.  The advantage of the uevents interface
+is the event contains environment attributes providing increased context for
+the event avoiding the need to query the state of the device-mapper device after
+the event is received.
+
+There are two functions currently for device-mapper events.  The first function
+listed creates the event and the second function sends the event(s)::
+
+  void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
+                      const char *path, unsigned nr_valid_paths)
+
+  void dm_send_uevents(struct list_head *events, struct kobject *kobj)
+
+
+The variables added to the uevent environment are:
+
+Variable Name: DM_TARGET
+------------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description:
+:Value: Name of device-mapper target that generated the event.
+
+Variable Name: DM_ACTION
+------------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description:
+:Value: Device-mapper specific action that caused the uevent action.
+	PATH_FAILED - A path has failed;
+	PATH_REINSTATED - A path has been reinstated.
+
+Variable Name: DM_SEQNUM
+------------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: unsigned integer
+:Description: A sequence number for this specific device-mapper device.
+:Value: Valid unsigned integer range.
+
+Variable Name: DM_PATH
+----------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description: Major and minor number of the path device pertaining to this
+	      event.
+:Value: Path name in the form of "Major:Minor"
+
+Variable Name: DM_NR_VALID_PATHS
+--------------------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: unsigned integer
+:Description:
+:Value: Valid unsigned integer range.
+
+Variable Name: DM_NAME
+----------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description: Name of the device-mapper device.
+:Value: Name
+
+Variable Name: DM_UUID
+----------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description: UUID of the device-mapper device.
+:Value: UUID. (Empty string if there isn't one.)
+
+An example of the uevents generated as captured by udevmonitor is shown
+below
+
+1.) Path failure::
+
+	UEVENT[1192521009.711215] change@/block/dm-3
+	ACTION=change
+	DEVPATH=/block/dm-3
+	SUBSYSTEM=block
+	DM_TARGET=multipath
+	DM_ACTION=PATH_FAILED
+	DM_SEQNUM=1
+	DM_PATH=8:32
+	DM_NR_VALID_PATHS=0
+	DM_NAME=mpath2
+	DM_UUID=mpath-35333333000002328
+	MINOR=3
+	MAJOR=253
+	SEQNUM=1130
+
+2.) Path reinstate::
+
+	UEVENT[1192521132.989927] change@/block/dm-3
+	ACTION=change
+	DEVPATH=/block/dm-3
+	SUBSYSTEM=block
+	DM_TARGET=multipath
+	DM_ACTION=PATH_REINSTATED
+	DM_SEQNUM=2
+	DM_PATH=8:32
+	DM_NR_VALID_PATHS=1
+	DM_NAME=mpath2
+	DM_UUID=mpath-35333333000002328
+	MINOR=3
+	MAJOR=253
+	SEQNUM=1131
diff --git a/Documentation/admin-guide/device-mapper/dm-zoned.rst b/Documentation/admin-guide/device-mapper/dm-zoned.rst
new file mode 100644
index 000000000000..07f56ebc1730
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-zoned.rst
@@ -0,0 +1,146 @@
+========
+dm-zoned
+========
+
+The dm-zoned device mapper target exposes a zoned block device (ZBC and
+ZAC compliant devices) as a regular block device without any write
+pattern constraints. In effect, it implements a drive-managed zoned
+block device which hides from the user (a file system or an application
+doing raw block device accesses) the sequential write constraints of
+host-managed zoned block devices and can mitigate the potential
+device-side performance degradation due to excessive random writes on
+host-aware zoned block devices.
+
+For a more detailed description of the zoned block device models and
+their constraints see (for SCSI devices):
+
+http://www.t10.org/drafts.htm#ZBC_Family
+
+and (for ATA devices):
+
+http://www.t13.org/Documents/UploadedDocuments/docs2015/di537r05-Zoned_Device_ATA_Command_Set_ZAC.pdf
+
+The dm-zoned implementation is simple and minimizes system overhead (CPU
+and memory usage as well as storage capacity loss). For a 10TB
+host-managed disk with 256 MB zones, dm-zoned memory usage per disk
+instance is at most 4.5 MB and as little as 5 zones will be used
+internally for storing metadata and performaing reclaim operations.
+
+dm-zoned target devices are formatted and checked using the dmzadm
+utility available at:
+
+https://github.com/hgst/dm-zoned-tools
+
+Algorithm
+=========
+
+dm-zoned implements an on-disk buffering scheme to handle non-sequential
+write accesses to the sequential zones of a zoned block device.
+Conventional zones are used for caching as well as for storing internal
+metadata.
+
+The zones of the device are separated into 2 types:
+
+1) Metadata zones: these are conventional zones used to store metadata.
+Metadata zones are not reported as useable capacity to the user.
+
+2) Data zones: all remaining zones, the vast majority of which will be
+sequential zones used exclusively to store user data. The conventional
+zones of the device may be used also for buffering user random writes.
+Data in these zones may be directly mapped to the conventional zone, but
+later moved to a sequential zone so that the conventional zone can be
+reused for buffering incoming random writes.
+
+dm-zoned exposes a logical device with a sector size of 4096 bytes,
+irrespective of the physical sector size of the backend zoned block
+device being used. This allows reducing the amount of metadata needed to
+manage valid blocks (blocks written).
+
+The on-disk metadata format is as follows:
+
+1) The first block of the first conventional zone found contains the
+super block which describes the on disk amount and position of metadata
+blocks.
+
+2) Following the super block, a set of blocks is used to describe the
+mapping of the logical device blocks. The mapping is done per chunk of
+blocks, with the chunk size equal to the zoned block device size. The
+mapping table is indexed by chunk number and each mapping entry
+indicates the zone number of the device storing the chunk of data. Each
+mapping entry may also indicate if the zone number of a conventional
+zone used to buffer random modification to the data zone.
+
+3) A set of blocks used to store bitmaps indicating the validity of
+blocks in the data zones follows the mapping table. A valid block is
+defined as a block that was written and not discarded. For a buffered
+data chunk, a block is always valid only in the data zone mapping the
+chunk or in the buffer zone of the chunk.
+
+For a logical chunk mapped to a conventional zone, all write operations
+are processed by directly writing to the zone. If the mapping zone is a
+sequential zone, the write operation is processed directly only if the
+write offset within the logical chunk is equal to the write pointer
+offset within of the sequential data zone (i.e. the write operation is
+aligned on the zone write pointer). Otherwise, write operations are
+processed indirectly using a buffer zone. In that case, an unused
+conventional zone is allocated and assigned to the chunk being
+accessed. Writing a block to the buffer zone of a chunk will
+automatically invalidate the same block in the sequential zone mapping
+the chunk. If all blocks of the sequential zone become invalid, the zone
+is freed and the chunk buffer zone becomes the primary zone mapping the
+chunk, resulting in native random write performance similar to a regular
+block device.
+
+Read operations are processed according to the block validity
+information provided by the bitmaps. Valid blocks are read either from
+the sequential zone mapping a chunk, or if the chunk is buffered, from
+the buffer zone assigned. If the accessed chunk has no mapping, or the
+accessed blocks are invalid, the read buffer is zeroed and the read
+operation terminated.
+
+After some time, the limited number of convnetional zones available may
+be exhausted (all used to map chunks or buffer sequential zones) and
+unaligned writes to unbuffered chunks become impossible. To avoid this
+situation, a reclaim process regularly scans used conventional zones and
+tries to reclaim the least recently used zones by copying the valid
+blocks of the buffer zone to a free sequential zone. Once the copy
+completes, the chunk mapping is updated to point to the sequential zone
+and the buffer zone freed for reuse.
+
+Metadata Protection
+===================
+
+To protect metadata against corruption in case of sudden power loss or
+system crash, 2 sets of metadata zones are used. One set, the primary
+set, is used as the main metadata region, while the secondary set is
+used as a staging area. Modified metadata is first written to the
+secondary set and validated by updating the super block in the secondary
+set, a generation counter is used to indicate that this set contains the
+newest metadata. Once this operation completes, in place of metadata
+block updates can be done in the primary metadata set. This ensures that
+one of the set is always consistent (all modifications committed or none
+at all). Flush operations are used as a commit point. Upon reception of
+a flush request, metadata modification activity is temporarily blocked
+(for both incoming BIO processing and reclaim process) and all dirty
+metadata blocks are staged and updated. Normal operation is then
+resumed. Flushing metadata thus only temporarily delays write and
+discard requests. Read requests can be processed concurrently while
+metadata flush is being executed.
+
+Usage
+=====
+
+A zoned block device must first be formatted using the dmzadm tool. This
+will analyze the device zone configuration, determine where to place the
+metadata sets on the device and initialize the metadata sets.
+
+Ex::
+
+	dmzadm --format /dev/sdxx
+
+For a formatted device, the target can be created normally with the
+dmsetup utility. The only parameter that dm-zoned requires is the
+underlying zoned block device name. Ex::
+
+	echo "0 `blockdev --getsize ${dev}` zoned ${dev}" | \
+	dmsetup create dmz-`basename ${dev}`
diff --git a/Documentation/admin-guide/device-mapper/era.rst b/Documentation/admin-guide/device-mapper/era.rst
new file mode 100644
index 000000000000..90dd5c670b9f
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/era.rst
@@ -0,0 +1,116 @@
+======
+dm-era
+======
+
+Introduction
+============
+
+dm-era is a target that behaves similar to the linear target.  In
+addition it keeps track of which blocks were written within a user
+defined period of time called an 'era'.  Each era target instance
+maintains the current era as a monotonically increasing 32-bit
+counter.
+
+Use cases include tracking changed blocks for backup software, and
+partially invalidating the contents of a cache to restore cache
+coherency after rolling back a vendor snapshot.
+
+Constructor
+===========
+
+era <metadata dev> <origin dev> <block size>
+
+ ================ ======================================================
+ metadata dev     fast device holding the persistent metadata
+ origin dev	  device holding data blocks that may change
+ block size       block size of origin data device, granularity that is
+		  tracked by the target
+ ================ ======================================================
+
+Messages
+========
+
+None of the dm messages take any arguments.
+
+checkpoint
+----------
+
+Possibly move to a new era.  You shouldn't assume the era has
+incremented.  After sending this message, you should check the
+current era via the status line.
+
+take_metadata_snap
+------------------
+
+Create a clone of the metadata, to allow a userland process to read it.
+
+drop_metadata_snap
+------------------
+
+Drop the metadata snapshot.
+
+Status
+======
+
+<metadata block size> <#used metadata blocks>/<#total metadata blocks>
+<current era> <held metadata root | '-'>
+
+========================= ==============================================
+metadata block size	  Fixed block size for each metadata block in
+			  sectors
+#used metadata blocks	  Number of metadata blocks used
+#total metadata blocks	  Total number of metadata blocks
+current era		  The current era
+held metadata root	  The location, in blocks, of the metadata root
+			  that has been 'held' for userspace read
+			  access. '-' indicates there is no held root
+========================= ==============================================
+
+Detailed use case
+=================
+
+The scenario of invalidating a cache when rolling back a vendor
+snapshot was the primary use case when developing this target:
+
+Taking a vendor snapshot
+------------------------
+
+- Send a checkpoint message to the era target
+- Make a note of the current era in its status line
+- Take vendor snapshot (the era and snapshot should be forever
+  associated now).
+
+Rolling back to an vendor snapshot
+----------------------------------
+
+- Cache enters passthrough mode (see: dm-cache's docs in cache.txt)
+- Rollback vendor storage
+- Take metadata snapshot
+- Ascertain which blocks have been written since the snapshot was taken
+  by checking each block's era
+- Invalidate those blocks in the caching software
+- Cache returns to writeback/writethrough mode
+
+Memory usage
+============
+
+The target uses a bitset to record writes in the current era.  It also
+has a spare bitset ready for switching over to a new era.  Other than
+that it uses a few 4k blocks for updating metadata::
+
+   (4 * nr_blocks) bytes + buffers
+
+Resilience
+==========
+
+Metadata is updated on disk before a write to a previously unwritten
+block is performed.  As such dm-era should not be effected by a hard
+crash such as power failure.
+
+Userland tools
+==============
+
+Userland tools are found in the increasingly poorly named
+thin-provisioning-tools project:
+
+    https://github.com/jthornber/thin-provisioning-tools
diff --git a/Documentation/admin-guide/device-mapper/index.rst b/Documentation/admin-guide/device-mapper/index.rst
new file mode 100644
index 000000000000..c77c58b8f67b
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/index.rst
@@ -0,0 +1,42 @@
+=============
+Device Mapper
+=============
+
+.. toctree::
+    :maxdepth: 1
+
+    cache-policies
+    cache
+    delay
+    dm-crypt
+    dm-flakey
+    dm-init
+    dm-integrity
+    dm-io
+    dm-log
+    dm-queue-length
+    dm-raid
+    dm-service-time
+    dm-uevent
+    dm-zoned
+    era
+    kcopyd
+    linear
+    log-writes
+    persistent-data
+    snapshot
+    statistics
+    striped
+    switch
+    thin-provisioning
+    unstriped
+    verity
+    writecache
+    zero
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/admin-guide/device-mapper/kcopyd.rst b/Documentation/admin-guide/device-mapper/kcopyd.rst
new file mode 100644
index 000000000000..7651d395127f
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/kcopyd.rst
@@ -0,0 +1,47 @@
+======
+kcopyd
+======
+
+Kcopyd provides the ability to copy a range of sectors from one block-device
+to one or more other block-devices, with an asynchronous completion
+notification. It is used by dm-snapshot and dm-mirror.
+
+Users of kcopyd must first create a client and indicate how many memory pages
+to set aside for their copy jobs. This is done with a call to
+kcopyd_client_create()::
+
+   int kcopyd_client_create(unsigned int num_pages,
+                            struct kcopyd_client **result);
+
+To start a copy job, the user must set up io_region structures to describe
+the source and destinations of the copy. Each io_region indicates a
+block-device along with the starting sector and size of the region. The source
+of the copy is given as one io_region structure, and the destinations of the
+copy are given as an array of io_region structures::
+
+   struct io_region {
+      struct block_device *bdev;
+      sector_t sector;
+      sector_t count;
+   };
+
+To start the copy, the user calls kcopyd_copy(), passing in the client
+pointer, pointers to the source and destination io_regions, the name of a
+completion callback routine, and a pointer to some context data for the copy::
+
+   int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
+                   unsigned int num_dests, struct io_region *dests,
+                   unsigned int flags, kcopyd_notify_fn fn, void *context);
+
+   typedef void (*kcopyd_notify_fn)(int read_err, unsigned int write_err,
+				    void *context);
+
+When the copy completes, kcopyd will call the user's completion routine,
+passing back the user's context pointer. It will also indicate if a read or
+write error occurred during the copy.
+
+When a user is done with all their copy jobs, they should call
+kcopyd_client_destroy() to delete the kcopyd client, which will release the
+associated memory pages::
+
+   void kcopyd_client_destroy(struct kcopyd_client *kc);
diff --git a/Documentation/admin-guide/device-mapper/linear.rst b/Documentation/admin-guide/device-mapper/linear.rst
new file mode 100644
index 000000000000..9d17fc6e64a9
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/linear.rst
@@ -0,0 +1,63 @@
+=========
+dm-linear
+=========
+
+Device-Mapper's "linear" target maps a linear range of the Device-Mapper
+device onto a linear range of another device.  This is the basic building
+block of logical volume managers.
+
+Parameters: <dev path> <offset>
+    <dev path>:
+	Full pathname to the underlying block-device, or a
+        "major:minor" device-number.
+    <offset>:
+	Starting sector within the device.
+
+
+Example scripts
+===============
+
+::
+
+  #!/bin/sh
+  # Create an identity mapping for a device
+  echo "0 `blockdev --getsz $1` linear $1 0" | dmsetup create identity
+
+::
+
+  #!/bin/sh
+  # Join 2 devices together
+  size1=`blockdev --getsz $1`
+  size2=`blockdev --getsz $2`
+  echo "0 $size1 linear $1 0
+  $size1 $size2 linear $2 0" | dmsetup create joined
+
+::
+
+  #!/usr/bin/perl -w
+  # Split a device into 4M chunks and then join them together in reverse order.
+
+  my $name = "reverse";
+  my $extent_size = 4 * 1024 * 2;
+  my $dev = $ARGV[0];
+  my $table = "";
+  my $count = 0;
+
+  if (!defined($dev)) {
+          die("Please specify a device.\n");
+  }
+
+  my $dev_size = `blockdev --getsz $dev`;
+  my $extents = int($dev_size / $extent_size) -
+                (($dev_size % $extent_size) ? 1 : 0);
+
+  while ($extents > 0) {
+          my $this_start = $count * $extent_size;
+          $extents--;
+          $count++;
+          my $this_offset = $extents * $extent_size;
+
+          $table .= "$this_start $extent_size linear $dev $this_offset\n";
+  }
+
+  `echo \"$table\" | dmsetup create $name`;
diff --git a/Documentation/admin-guide/device-mapper/log-writes.rst b/Documentation/admin-guide/device-mapper/log-writes.rst
new file mode 100644
index 000000000000..23141f2ffb7c
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/log-writes.rst
@@ -0,0 +1,145 @@
+=============
+dm-log-writes
+=============
+
+This target takes 2 devices, one to pass all IO to normally, and one to log all
+of the write operations to.  This is intended for file system developers wishing
+to verify the integrity of metadata or data as the file system is written to.
+There is a log_write_entry written for every WRITE request and the target is
+able to take arbitrary data from userspace to insert into the log.  The data
+that is in the WRITE requests is copied into the log to make the replay happen
+exactly as it happened originally.
+
+Log Ordering
+============
+
+We log things in order of completion once we are sure the write is no longer in
+cache.  This means that normal WRITE requests are not actually logged until the
+next REQ_PREFLUSH request.  This is to make it easier for userspace to replay
+the log in a way that correlates to what is on disk and not what is in cache,
+to make it easier to detect improper waiting/flushing.
+
+This works by attaching all WRITE requests to a list once the write completes.
+Once we see a REQ_PREFLUSH request we splice this list onto the request and once
+the FLUSH request completes we log all of the WRITEs and then the FLUSH.  Only
+completed WRITEs, at the time the REQ_PREFLUSH is issued, are added in order to
+simulate the worst case scenario with regard to power failures.  Consider the
+following example (W means write, C means complete):
+
+	W1,W2,W3,C3,C2,Wflush,C1,Cflush
+
+The log would show the following:
+
+	W3,W2,flush,W1....
+
+Again this is to simulate what is actually on disk, this allows us to detect
+cases where a power failure at a particular point in time would create an
+inconsistent file system.
+
+Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as
+they complete as those requests will obviously bypass the device cache.
+
+Any REQ_OP_DISCARD requests are treated like WRITE requests.  Otherwise we would
+have all the DISCARD requests, and then the WRITE requests and then the FLUSH
+request.  Consider the following example:
+
+	WRITE block 1, DISCARD block 1, FLUSH
+
+If we logged DISCARD when it completed, the replay would look like this:
+
+	DISCARD 1, WRITE 1, FLUSH
+
+which isn't quite what happened and wouldn't be caught during the log replay.
+
+Target interface
+================
+
+i) Constructor
+
+   log-writes <dev_path> <log_dev_path>
+
+   ============= ==============================================
+   dev_path	 Device that all of the IO will go to normally.
+   log_dev_path  Device where the log entries are written to.
+   ============= ==============================================
+
+ii) Status
+
+    <#logged entries> <highest allocated sector>
+
+    =========================== ========================
+    #logged entries	        Number of logged entries
+    highest allocated sector    Highest allocated sector
+    =========================== ========================
+
+iii) Messages
+
+    mark <description>
+
+	You can use a dmsetup message to set an arbitrary mark in a log.
+	For example say you want to fsck a file system after every
+	write, but first you need to replay up to the mkfs to make sure
+	we're fsck'ing something reasonable, you would do something like
+	this::
+
+	  mkfs.btrfs -f /dev/mapper/log
+	  dmsetup message log 0 mark mkfs
+	  <run test>
+
+	This would allow you to replay the log up to the mkfs mark and
+	then replay from that point on doing the fsck check in the
+	interval that you want.
+
+	Every log has a mark at the end labeled "dm-log-writes-end".
+
+Userspace component
+===================
+
+There is a userspace tool that will replay the log for you in various ways.
+It can be found here: https://github.com/josefbacik/log-writes
+
+Example usage
+=============
+
+Say you want to test fsync on your file system.  You would do something like
+this::
+
+  TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+  dmsetup create log --table "$TABLE"
+  mkfs.btrfs -f /dev/mapper/log
+  dmsetup message log 0 mark mkfs
+
+  mount /dev/mapper/log /mnt/btrfs-test
+  <some test that does fsync at the end>
+  dmsetup message log 0 mark fsync
+  md5sum /mnt/btrfs-test/foo
+  umount /mnt/btrfs-test
+
+  dmsetup remove log
+  replay-log --log /dev/sdc --replay /dev/sdb --end-mark fsync
+  mount /dev/sdb /mnt/btrfs-test
+  md5sum /mnt/btrfs-test/foo
+  <verify md5sum's are correct>
+
+  Another option is to do a complicated file system operation and verify the file
+  system is consistent during the entire operation.  You could do this with:
+
+  TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+  dmsetup create log --table "$TABLE"
+  mkfs.btrfs -f /dev/mapper/log
+  dmsetup message log 0 mark mkfs
+
+  mount /dev/mapper/log /mnt/btrfs-test
+  <fsstress to dirty the fs>
+  btrfs filesystem balance /mnt/btrfs-test
+  umount /mnt/btrfs-test
+  dmsetup remove log
+
+  replay-log --log /dev/sdc --replay /dev/sdb --end-mark mkfs
+  btrfsck /dev/sdb
+  replay-log --log /dev/sdc --replay /dev/sdb --start-mark mkfs \
+	--fsck "btrfsck /dev/sdb" --check fua
+
+And that will replay the log until it sees a FUA request, run the fsck command
+and if the fsck passes it will replay to the next FUA, until it is completed or
+the fsck command exists abnormally.
diff --git a/Documentation/admin-guide/device-mapper/persistent-data.rst b/Documentation/admin-guide/device-mapper/persistent-data.rst
new file mode 100644
index 000000000000..2065c3c5a091
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/persistent-data.rst
@@ -0,0 +1,88 @@
+===============
+Persistent data
+===============
+
+Introduction
+============
+
+The more-sophisticated device-mapper targets require complex metadata
+that is managed in kernel.  In late 2010 we were seeing that various
+different targets were rolling their own data structures, for example:
+
+- Mikulas Patocka's multisnap implementation
+- Heinz Mauelshagen's thin provisioning target
+- Another btree-based caching target posted to dm-devel
+- Another multi-snapshot target based on a design of Daniel Phillips
+
+Maintaining these data structures takes a lot of work, so if possible
+we'd like to reduce the number.
+
+The persistent-data library is an attempt to provide a re-usable
+framework for people who want to store metadata in device-mapper
+targets.  It's currently used by the thin-provisioning target and an
+upcoming hierarchical storage target.
+
+Overview
+========
+
+The main documentation is in the header files which can all be found
+under drivers/md/persistent-data.
+
+The block manager
+-----------------
+
+dm-block-manager.[hc]
+
+This provides access to the data on disk in fixed sized-blocks.  There
+is a read/write locking interface to prevent concurrent accesses, and
+keep data that is being used in the cache.
+
+Clients of persistent-data are unlikely to use this directly.
+
+The transaction manager
+-----------------------
+
+dm-transaction-manager.[hc]
+
+This restricts access to blocks and enforces copy-on-write semantics.
+The only way you can get hold of a writable block through the
+transaction manager is by shadowing an existing block (ie. doing
+copy-on-write) or allocating a fresh one.  Shadowing is elided within
+the same transaction so performance is reasonable.  The commit method
+ensures that all data is flushed before it writes the superblock.
+On power failure your metadata will be as it was when last committed.
+
+The Space Maps
+--------------
+
+dm-space-map.h
+dm-space-map-metadata.[hc]
+dm-space-map-disk.[hc]
+
+On-disk data structures that keep track of reference counts of blocks.
+Also acts as the allocator of new blocks.  Currently two
+implementations: a simpler one for managing blocks on a different
+device (eg. thinly-provisioned data blocks); and one for managing
+the metadata space.  The latter is complicated by the need to store
+its own data within the space it's managing.
+
+The data structures
+-------------------
+
+dm-btree.[hc]
+dm-btree-remove.c
+dm-btree-spine.c
+dm-btree-internal.h
+
+Currently there is only one data structure, a hierarchical btree.
+There are plans to add more.  For example, something with an
+array-like interface would see a lot of use.
+
+The btree is 'hierarchical' in that you can define it to be composed
+of nested btrees, and take multiple keys.  For example, the
+thin-provisioning target uses a btree with two levels of nesting.
+The first maps a device id to a mapping tree, and that in turn maps a
+virtual block to a physical block.
+
+Values stored in the btrees can have arbitrary size.  Keys are always
+64bits, although nesting allows you to use multiple keys.
diff --git a/Documentation/admin-guide/device-mapper/snapshot.rst b/Documentation/admin-guide/device-mapper/snapshot.rst
new file mode 100644
index 000000000000..ccdd8b587a74
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/snapshot.rst
@@ -0,0 +1,196 @@
+==============================
+Device-mapper snapshot support
+==============================
+
+Device-mapper allows you, without massive data copying:
+
+-  To create snapshots of any block device i.e. mountable, saved states of
+   the block device which are also writable without interfering with the
+   original content;
+-  To create device "forks", i.e. multiple different versions of the
+   same data stream.
+-  To merge a snapshot of a block device back into the snapshot's origin
+   device.
+
+In the first two cases, dm copies only the chunks of data that get
+changed and uses a separate copy-on-write (COW) block device for
+storage.
+
+For snapshot merge the contents of the COW storage are merged back into
+the origin device.
+
+
+There are three dm targets available:
+snapshot, snapshot-origin, and snapshot-merge.
+
+-  snapshot-origin <origin>
+
+which will normally have one or more snapshots based on it.
+Reads will be mapped directly to the backing device. For each write, the
+original data will be saved in the <COW device> of each snapshot to keep
+its visible content unchanged, at least until the <COW device> fills up.
+
+
+-  snapshot <origin> <COW device> <persistent?> <chunksize>
+   [<# feature args> [<arg>]*]
+
+A snapshot of the <origin> block device is created. Changed chunks of
+<chunksize> sectors will be stored on the <COW device>.  Writes will
+only go to the <COW device>.  Reads will come from the <COW device> or
+from <origin> for unchanged data.  <COW device> will often be
+smaller than the origin and if it fills up the snapshot will become
+useless and be disabled, returning errors.  So it is important to monitor
+the amount of free space and expand the <COW device> before it fills up.
+
+<persistent?> is P (Persistent) or N (Not persistent - will not survive
+after reboot).  O (Overflow) can be added as a persistent store option
+to allow userspace to advertise its support for seeing "Overflow" in the
+snapshot status.  So supported store types are "P", "PO" and "N".
+
+The difference between persistent and transient is with transient
+snapshots less metadata must be saved on disk - they can be kept in
+memory by the kernel.
+
+When loading or unloading the snapshot target, the corresponding
+snapshot-origin or snapshot-merge target must be suspended. A failure to
+suspend the origin target could result in data corruption.
+
+Optional features:
+
+   discard_zeroes_cow - a discard issued to the snapshot device that
+   maps to entire chunks to will zero the corresponding exception(s) in
+   the snapshot's exception store.
+
+   discard_passdown_origin - a discard to the snapshot device is passed
+   down to the snapshot-origin's underlying device.  This doesn't cause
+   copy-out to the snapshot exception store because the snapshot-origin
+   target is bypassed.
+
+   The discard_passdown_origin feature depends on the discard_zeroes_cow
+   feature being enabled.
+
+
+-  snapshot-merge <origin> <COW device> <persistent> <chunksize>
+   [<# feature args> [<arg>]*]
+
+takes the same table arguments as the snapshot target except it only
+works with persistent snapshots.  This target assumes the role of the
+"snapshot-origin" target and must not be loaded if the "snapshot-origin"
+is still present for <origin>.
+
+Creates a merging snapshot that takes control of the changed chunks
+stored in the <COW device> of an existing snapshot, through a handover
+procedure, and merges these chunks back into the <origin>.  Once merging
+has started (in the background) the <origin> may be opened and the merge
+will continue while I/O is flowing to it.  Changes to the <origin> are
+deferred until the merging snapshot's corresponding chunk(s) have been
+merged.  Once merging has started the snapshot device, associated with
+the "snapshot" target, will return -EIO when accessed.
+
+
+How snapshot is used by LVM2
+============================
+When you create the first LVM2 snapshot of a volume, four dm devices are used:
+
+1) a device containing the original mapping table of the source volume;
+2) a device used as the <COW device>;
+3) a "snapshot" device, combining #1 and #2, which is the visible snapshot
+   volume;
+4) the "original" volume (which uses the device number used by the original
+   source volume), whose table is replaced by a "snapshot-origin" mapping
+   from device #1.
+
+A fixed naming scheme is used, so with the following commands::
+
+  lvcreate -L 1G -n base volumeGroup
+  lvcreate -L 100M --snapshot -n snap volumeGroup/base
+
+we'll have this situation (with volumes in above order)::
+
+  # dmsetup table|grep volumeGroup
+
+  volumeGroup-base-real: 0 2097152 linear 8:19 384
+  volumeGroup-snap-cow: 0 204800 linear 8:19 2097536
+  volumeGroup-snap: 0 2097152 snapshot 254:11 254:12 P 16
+  volumeGroup-base: 0 2097152 snapshot-origin 254:11
+
+  # ls -lL /dev/mapper/volumeGroup-*
+  brw-------  1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
+  brw-------  1 root root 254, 12 29 ago 18:15 /dev/mapper/volumeGroup-snap-cow
+  brw-------  1 root root 254, 13 29 ago 18:15 /dev/mapper/volumeGroup-snap
+  brw-------  1 root root 254, 10 29 ago 18:14 /dev/mapper/volumeGroup-base
+
+
+How snapshot-merge is used by LVM2
+==================================
+A merging snapshot assumes the role of the "snapshot-origin" while
+merging.  As such the "snapshot-origin" is replaced with
+"snapshot-merge".  The "-real" device is not changed and the "-cow"
+device is renamed to <origin name>-cow to aid LVM2's cleanup of the
+merging snapshot after it completes.  The "snapshot" that hands over its
+COW device to the "snapshot-merge" is deactivated (unless using lvchange
+--refresh); but if it is left active it will simply return I/O errors.
+
+A snapshot will merge into its origin with the following command::
+
+  lvconvert --merge volumeGroup/snap
+
+we'll now have this situation::
+
+  # dmsetup table|grep volumeGroup
+
+  volumeGroup-base-real: 0 2097152 linear 8:19 384
+  volumeGroup-base-cow: 0 204800 linear 8:19 2097536
+  volumeGroup-base: 0 2097152 snapshot-merge 254:11 254:12 P 16
+
+  # ls -lL /dev/mapper/volumeGroup-*
+  brw-------  1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
+  brw-------  1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow
+  brw-------  1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base
+
+
+How to determine when a merging is complete
+===========================================
+The snapshot-merge and snapshot status lines end with:
+
+  <sectors_allocated>/<total_sectors> <metadata_sectors>
+
+Both <sectors_allocated> and <total_sectors> include both data and metadata.
+During merging, the number of sectors allocated gets smaller and
+smaller.  Merging has finished when the number of sectors holding data
+is zero, in other words <sectors_allocated> == <metadata_sectors>.
+
+Here is a practical example (using a hybrid of lvm and dmsetup commands)::
+
+  # lvs
+    LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
+    base    volumeGroup owi-a- 4.00g
+    snap    volumeGroup swi-a- 1.00g base  18.97
+
+  # dmsetup status volumeGroup-snap
+  0 8388608 snapshot 397896/2097152 1560
+                                    ^^^^ metadata sectors
+
+  # lvconvert --merge -b volumeGroup/snap
+    Merging of volume snap started.
+
+  # lvs volumeGroup/snap
+    LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
+    base    volumeGroup Owi-a- 4.00g          17.23
+
+  # dmsetup status volumeGroup-base
+  0 8388608 snapshot-merge 281688/2097152 1104
+
+  # dmsetup status volumeGroup-base
+  0 8388608 snapshot-merge 180480/2097152 712
+
+  # dmsetup status volumeGroup-base
+  0 8388608 snapshot-merge 16/2097152 16
+
+Merging has finished.
+
+::
+
+  # lvs
+    LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
+    base    volumeGroup owi-a- 4.00g
diff --git a/Documentation/admin-guide/device-mapper/statistics.rst b/Documentation/admin-guide/device-mapper/statistics.rst
new file mode 100644
index 000000000000..3d80a9f850cc
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/statistics.rst
@@ -0,0 +1,225 @@
+=============
+DM statistics
+=============
+
+Device Mapper supports the collection of I/O statistics on user-defined
+regions of a DM device.	 If no regions are defined no statistics are
+collected so there isn't any performance impact.  Only bio-based DM
+devices are currently supported.
+
+Each user-defined region specifies a starting sector, length and step.
+Individual statistics will be collected for each step-sized area within
+the range specified.
+
+The I/O statistics counters for each step-sized area of a region are
+in the same format as `/sys/block/*/stat` or `/proc/diskstats` (see:
+Documentation/iostats.txt).  But two extra counters (12 and 13) are
+provided: total time spent reading and writing.  When the histogram
+argument is used, the 14th parameter is reported that represents the
+histogram of latencies.  All these counters may be accessed by sending
+the @stats_print message to the appropriate DM device via dmsetup.
+
+The reported times are in milliseconds and the granularity depends on
+the kernel ticks.  When the option precise_timestamps is used, the
+reported times are in nanoseconds.
+
+Each region has a corresponding unique identifier, which we call a
+region_id, that is assigned when the region is created.	 The region_id
+must be supplied when querying statistics about the region, deleting the
+region, etc.  Unique region_ids enable multiple userspace programs to
+request and process statistics for the same DM device without stepping
+on each other's data.
+
+The creation of DM statistics will allocate memory via kmalloc or
+fallback to using vmalloc space.  At most, 1/4 of the overall system
+memory may be allocated by DM statistics.  The admin can see how much
+memory is used by reading:
+
+	/sys/module/dm_mod/parameters/stats_current_allocated_bytes
+
+Messages
+========
+
+    @stats_create <range> <step> [<number_of_optional_arguments> <optional_arguments>...] [<program_id> [<aux_data>]]
+	Create a new region and return the region_id.
+
+	<range>
+	  "-"
+		whole device
+	  "<start_sector>+<length>"
+		a range of <length> 512-byte sectors
+		starting with <start_sector>.
+
+	<step>
+	  "<area_size>"
+		the range is subdivided into areas each containing
+		<area_size> sectors.
+	  "/<number_of_areas>"
+		the range is subdivided into the specified
+		number of areas.
+
+	<number_of_optional_arguments>
+	  The number of optional arguments
+
+	<optional_arguments>
+	  The following optional arguments are supported:
+
+	  precise_timestamps
+		use precise timer with nanosecond resolution
+		instead of the "jiffies" variable.  When this argument is
+		used, the resulting times are in nanoseconds instead of
+		milliseconds.  Precise timestamps are a little bit slower
+		to obtain than jiffies-based timestamps.
+	  histogram:n1,n2,n3,n4,...
+		collect histogram of latencies.  The
+		numbers n1, n2, etc are times that represent the boundaries
+		of the histogram.  If precise_timestamps is not used, the
+		times are in milliseconds, otherwise they are in
+		nanoseconds.  For each range, the kernel will report the
+		number of requests that completed within this range. For
+		example, if we use "histogram:10,20,30", the kernel will
+		report four numbers a:b:c:d. a is the number of requests
+		that took 0-10 ms to complete, b is the number of requests
+		that took 10-20 ms to complete, c is the number of requests
+		that took 20-30 ms to complete and d is the number of
+		requests that took more than 30 ms to complete.
+
+	<program_id>
+	  An optional parameter.  A name that uniquely identifies
+	  the userspace owner of the range.  This groups ranges together
+	  so that userspace programs can identify the ranges they
+	  created and ignore those created by others.
+	  The kernel returns this string back in the output of
+	  @stats_list message, but it doesn't use it for anything else.
+	  If we omit the number of optional arguments, program id must not
+	  be a number, otherwise it would be interpreted as the number of
+	  optional arguments.
+
+	<aux_data>
+	  An optional parameter.  A word that provides auxiliary data
+	  that is useful to the client program that created the range.
+	  The kernel returns this string back in the output of
+	  @stats_list message, but it doesn't use this value for anything.
+
+    @stats_delete <region_id>
+	Delete the region with the specified id.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+    @stats_clear <region_id>
+	Clear all the counters except the in-flight i/o counters.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+    @stats_list [<program_id>]
+	List all regions registered with @stats_create.
+
+	<program_id>
+	  An optional parameter.
+	  If this parameter is specified, only matching regions
+	  are returned.
+	  If it is not specified, all regions are returned.
+
+	Output format:
+	  <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
+	        precise_timestamps histogram:n1,n2,n3,...
+
+	The strings "precise_timestamps" and "histogram" are printed only
+	if they were specified when creating the region.
+
+    @stats_print <region_id> [<starting_line> <number_of_lines>]
+	Print counters for each step-sized area of a region.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<starting_line>
+	  The index of the starting line in the output.
+	  If omitted, all lines are returned.
+
+	<number_of_lines>
+	  The number of lines to include in the output.
+	  If omitted, all lines are returned.
+
+	Output format for each step-sized area of a region:
+
+	  <start_sector>+<length>
+		counters
+
+	  The first 11 counters have the same meaning as
+	  `/sys/block/*/stat or /proc/diskstats`.
+
+	  Please refer to Documentation/iostats.txt for details.
+
+	  1. the number of reads completed
+	  2. the number of reads merged
+	  3. the number of sectors read
+	  4. the number of milliseconds spent reading
+	  5. the number of writes completed
+	  6. the number of writes merged
+	  7. the number of sectors written
+	  8. the number of milliseconds spent writing
+	  9. the number of I/Os currently in progress
+	  10. the number of milliseconds spent doing I/Os
+	  11. the weighted number of milliseconds spent doing I/Os
+
+	  Additional counters:
+
+	  12. the total time spent reading in milliseconds
+	  13. the total time spent writing in milliseconds
+
+    @stats_print_clear <region_id> [<starting_line> <number_of_lines>]
+	Atomically print and then clear all the counters except the
+	in-flight i/o counters.	 Useful when the client consuming the
+	statistics does not want to lose any statistics (those updated
+	between printing and clearing).
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<starting_line>
+	  The index of the starting line in the output.
+	  If omitted, all lines are printed and then cleared.
+
+	<number_of_lines>
+	  The number of lines to process.
+	  If omitted, all lines are printed and then cleared.
+
+    @stats_set_aux <region_id> <aux_data>
+	Store auxiliary data aux_data for the specified region.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<aux_data>
+	  The string that identifies data which is useful to the client
+	  program that created the range.  The kernel returns this
+	  string back in the output of @stats_list message, but it
+	  doesn't use this value for anything.
+
+Examples
+========
+
+Subdivide the DM device 'vol' into 100 pieces and start collecting
+statistics on them::
+
+  dmsetup message vol 0 @stats_create - /100
+
+Set the auxiliary data string to "foo bar baz" (the escape for each
+space must also be escaped, otherwise the shell will consume them)::
+
+  dmsetup message vol 0 @stats_set_aux 0 foo\\ bar\\ baz
+
+List the statistics::
+
+  dmsetup message vol 0 @stats_list
+
+Print the statistics::
+
+  dmsetup message vol 0 @stats_print 0
+
+Delete the statistics::
+
+  dmsetup message vol 0 @stats_delete 0
diff --git a/Documentation/admin-guide/device-mapper/striped.rst b/Documentation/admin-guide/device-mapper/striped.rst
new file mode 100644
index 000000000000..e9a8da192ae1
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/striped.rst
@@ -0,0 +1,61 @@
+=========
+dm-stripe
+=========
+
+Device-Mapper's "striped" target is used to create a striped (i.e. RAID-0)
+device across one or more underlying devices. Data is written in "chunks",
+with consecutive chunks rotating among the underlying devices. This can
+potentially provide improved I/O throughput by utilizing several physical
+devices in parallel.
+
+Parameters: <num devs> <chunk size> [<dev path> <offset>]+
+    <num devs>:
+	Number of underlying devices.
+    <chunk size>:
+	Size of each chunk of data. Must be at least as
+        large as the system's PAGE_SIZE.
+    <dev path>:
+	Full pathname to the underlying block-device, or a
+	"major:minor" device-number.
+    <offset>:
+	Starting sector within the device.
+
+One or more underlying devices can be specified. The striped device size must
+be a multiple of the chunk size multiplied by the number of underlying devices.
+
+
+Example scripts
+===============
+
+::
+
+  #!/usr/bin/perl -w
+  # Create a striped device across any number of underlying devices. The device
+  # will be called "stripe_dev" and have a chunk-size of 128k.
+
+  my $chunk_size = 128 * 2;
+  my $dev_name = "stripe_dev";
+  my $num_devs = @ARGV;
+  my @devs = @ARGV;
+  my ($min_dev_size, $stripe_dev_size, $i);
+
+  if (!$num_devs) {
+          die("Specify at least one device\n");
+  }
+
+  $min_dev_size = `blockdev --getsz $devs[0]`;
+  for ($i = 1; $i < $num_devs; $i++) {
+          my $this_size = `blockdev --getsz $devs[$i]`;
+          $min_dev_size = ($min_dev_size < $this_size) ?
+                          $min_dev_size : $this_size;
+  }
+
+  $stripe_dev_size = $min_dev_size * $num_devs;
+  $stripe_dev_size -= $stripe_dev_size % ($chunk_size * $num_devs);
+
+  $table = "0 $stripe_dev_size striped $num_devs $chunk_size";
+  for ($i = 0; $i < $num_devs; $i++) {
+          $table .= " $devs[$i] 0";
+  }
+
+  `echo $table | dmsetup create $dev_name`;
diff --git a/Documentation/admin-guide/device-mapper/switch.rst b/Documentation/admin-guide/device-mapper/switch.rst
new file mode 100644
index 000000000000..7dde06be1a4f
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/switch.rst
@@ -0,0 +1,141 @@
+=========
+dm-switch
+=========
+
+The device-mapper switch target creates a device that supports an
+arbitrary mapping of fixed-size regions of I/O across a fixed set of
+paths.  The path used for any specific region can be switched
+dynamically by sending the target a message.
+
+It maps I/O to underlying block devices efficiently when there is a large
+number of fixed-sized address regions but there is no simple pattern
+that would allow for a compact representation of the mapping such as
+dm-stripe.
+
+Background
+----------
+
+Dell EqualLogic and some other iSCSI storage arrays use a distributed
+frameless architecture.  In this architecture, the storage group
+consists of a number of distinct storage arrays ("members") each having
+independent controllers, disk storage and network adapters.  When a LUN
+is created it is spread across multiple members.  The details of the
+spreading are hidden from initiators connected to this storage system.
+The storage group exposes a single target discovery portal, no matter
+how many members are being used.  When iSCSI sessions are created, each
+session is connected to an eth port on a single member.  Data to a LUN
+can be sent on any iSCSI session, and if the blocks being accessed are
+stored on another member the I/O will be forwarded as required.  This
+forwarding is invisible to the initiator.  The storage layout is also
+dynamic, and the blocks stored on disk may be moved from member to
+member as needed to balance the load.
+
+This architecture simplifies the management and configuration of both
+the storage group and initiators.  In a multipathing configuration, it
+is possible to set up multiple iSCSI sessions to use multiple network
+interfaces on both the host and target to take advantage of the
+increased network bandwidth.  An initiator could use a simple round
+robin algorithm to send I/O across all paths and let the storage array
+members forward it as necessary, but there is a performance advantage to
+sending data directly to the correct member.
+
+A device-mapper table already lets you map different regions of a
+device onto different targets.  However in this architecture the LUN is
+spread with an address region size on the order of 10s of MBs, which
+means the resulting table could have more than a million entries and
+consume far too much memory.
+
+Using this device-mapper switch target we can now build a two-layer
+device hierarchy:
+
+    Upper Tier - Determine which array member the I/O should be sent to.
+    Lower Tier - Load balance amongst paths to a particular member.
+
+The lower tier consists of a single dm multipath device for each member.
+Each of these multipath devices contains the set of paths directly to
+the array member in one priority group, and leverages existing path
+selectors to load balance amongst these paths.  We also build a
+non-preferred priority group containing paths to other array members for
+failover reasons.
+
+The upper tier consists of a single dm-switch device.  This device uses
+a bitmap to look up the location of the I/O and choose the appropriate
+lower tier device to route the I/O.  By using a bitmap we are able to
+use 4 bits for each address range in a 16 member group (which is very
+large for us).  This is a much denser representation than the dm table
+b-tree can achieve.
+
+Construction Parameters
+=======================
+
+    <num_paths> <region_size> <num_optional_args> [<optional_args>...] [<dev_path> <offset>]+
+	<num_paths>
+	    The number of paths across which to distribute the I/O.
+
+	<region_size>
+	    The number of 512-byte sectors in a region. Each region can be redirected
+	    to any of the available paths.
+
+	<num_optional_args>
+	    The number of optional arguments. Currently, no optional arguments
+	    are supported and so this must be zero.
+
+	<dev_path>
+	    The block device that represents a specific path to the device.
+
+	<offset>
+	    The offset of the start of data on the specific <dev_path> (in units
+	    of 512-byte sectors). This number is added to the sector number when
+	    forwarding the request to the specific path. Typically it is zero.
+
+Messages
+========
+
+set_region_mappings <index>:<path_nr> [<index>]:<path_nr> [<index>]:<path_nr>...
+
+Modify the region table by specifying which regions are redirected to
+which paths.
+
+<index>
+    The region number (region size was specified in constructor parameters).
+    If index is omitted, the next region (previous index + 1) is used.
+    Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+<path_nr>
+    The path number in the range 0 ... (<num_paths> - 1).
+    Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+R<n>,<m>
+    This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
+    are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
+    slots.
+
+Status
+======
+
+No status line is reported.
+
+Example
+=======
+
+Assume that you have volumes vg1/switch0 vg1/switch1 vg1/switch2 with
+the same size.
+
+Create a switch device with 64kB region size::
+
+    dmsetup create switch --table "0 `blockdev --getsz /dev/vg1/switch0`
+	switch 3 128 0 /dev/vg1/switch0 0 /dev/vg1/switch1 0 /dev/vg1/switch2 0"
+
+Set mappings for the first 7 entries to point to devices switch0, switch1,
+switch2, switch0, switch1, switch2, switch1::
+
+    dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1
+
+Set repetitive mapping. This command::
+
+    dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
+
+is equivalent to::
+
+    dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
+	:1 :2 :1 :2 :1 :2 :1 :2 :1 :2
diff --git a/Documentation/admin-guide/device-mapper/thin-provisioning.rst b/Documentation/admin-guide/device-mapper/thin-provisioning.rst
new file mode 100644
index 000000000000..bafebf79da4b
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/thin-provisioning.rst
@@ -0,0 +1,427 @@
+=================
+Thin provisioning
+=================
+
+Introduction
+============
+
+This document describes a collection of device-mapper targets that
+between them implement thin-provisioning and snapshots.
+
+The main highlight of this implementation, compared to the previous
+implementation of snapshots, is that it allows many virtual devices to
+be stored on the same data volume.  This simplifies administration and
+allows the sharing of data between volumes, thus reducing disk usage.
+
+Another significant feature is support for an arbitrary depth of
+recursive snapshots (snapshots of snapshots of snapshots ...).  The
+previous implementation of snapshots did this by chaining together
+lookup tables, and so performance was O(depth).  This new
+implementation uses a single data structure to avoid this degradation
+with depth.  Fragmentation may still be an issue, however, in some
+scenarios.
+
+Metadata is stored on a separate device from data, giving the
+administrator some freedom, for example to:
+
+- Improve metadata resilience by storing metadata on a mirrored volume
+  but data on a non-mirrored one.
+
+- Improve performance by storing the metadata on SSD.
+
+Status
+======
+
+These targets are considered safe for production use.  But different use
+cases will have different performance characteristics, for example due
+to fragmentation of the data volume.
+
+If you find this software is not performing as expected please mail
+dm-devel@redhat.com with details and we'll try our best to improve
+things for you.
+
+Userspace tools for checking and repairing the metadata have been fully
+developed and are available as 'thin_check' and 'thin_repair'.  The name
+of the package that provides these utilities varies by distribution (on
+a Red Hat distribution it is named 'device-mapper-persistent-data').
+
+Cookbook
+========
+
+This section describes some quick recipes for using thin provisioning.
+They use the dmsetup program to control the device-mapper driver
+directly.  End users will be advised to use a higher-level volume
+manager such as LVM2 once support has been added.
+
+Pool device
+-----------
+
+The pool device ties together the metadata volume and the data volume.
+It maps I/O linearly to the data volume and updates the metadata via
+two mechanisms:
+
+- Function calls from the thin targets
+
+- Device-mapper 'messages' from userspace which control the creation of new
+  virtual devices amongst other things.
+
+Setting up a fresh pool device
+------------------------------
+
+Setting up a pool device requires a valid metadata device, and a
+data device.  If you do not have an existing metadata device you can
+make one by zeroing the first 4k to indicate empty metadata.
+
+    dd if=/dev/zero of=$metadata_dev bs=4096 count=1
+
+The amount of metadata you need will vary according to how many blocks
+are shared between thin devices (i.e. through snapshots).  If you have
+less sharing than average you'll need a larger-than-average metadata device.
+
+As a guide, we suggest you calculate the number of bytes to use in the
+metadata device as 48 * $data_dev_size / $data_block_size but round it up
+to 2MB if the answer is smaller.  If you're creating large numbers of
+snapshots which are recording large amounts of change, you may find you
+need to increase this.
+
+The largest size supported is 16GB: If the device is larger,
+a warning will be issued and the excess space will not be used.
+
+Reloading a pool table
+----------------------
+
+You may reload a pool's table, indeed this is how the pool is resized
+if it runs out of space.  (N.B. While specifying a different metadata
+device when reloading is not forbidden at the moment, things will go
+wrong if it does not route I/O to exactly the same on-disk location as
+previously.)
+
+Using an existing pool device
+-----------------------------
+
+::
+
+    dmsetup create pool \
+	--table "0 20971520 thin-pool $metadata_dev $data_dev \
+		 $data_block_size $low_water_mark"
+
+$data_block_size gives the smallest unit of disk space that can be
+allocated at a time expressed in units of 512-byte sectors.
+$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a
+multiple of 128 (64KB).  $data_block_size cannot be changed after the
+thin-pool is created.  People primarily interested in thin provisioning
+may want to use a value such as 1024 (512KB).  People doing lots of
+snapshotting may want a smaller value such as 128 (64KB).  If you are
+not zeroing newly-allocated data, a larger $data_block_size in the
+region of 256000 (128MB) is suggested.
+
+$low_water_mark is expressed in blocks of size $data_block_size.  If
+free space on the data device drops below this level then a dm event
+will be triggered which a userspace daemon should catch allowing it to
+extend the pool device.  Only one such event will be sent.
+
+No special event is triggered if a just resumed device's free space is below
+the low water mark. However, resuming a device always triggers an
+event; a userspace daemon should verify that free space exceeds the low
+water mark when handling this event.
+
+A low water mark for the metadata device is maintained in the kernel and
+will trigger a dm event if free space on the metadata device drops below
+it.
+
+Updating on-disk metadata
+-------------------------
+
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second.  This
+means the thin-provisioning target behaves like a physical disk that has
+a volatile write cache.  If power is lost you may lose some recent
+writes.  The metadata should always be consistent in spite of any crash.
+
+If data space is exhausted the pool will either error or queue IO
+according to the configuration (see: error_if_no_space).  If metadata
+space is exhausted or a metadata operation fails: the pool will error IO
+until the pool is taken offline and repair is performed to 1) fix any
+potential inconsistencies and 2) clear the flag that imposes repair.
+Once the pool's metadata device is repaired it may be resized, which
+will allow the pool to return to normal operation.  Note that if a pool
+is flagged as needing repair, the pool's data and metadata devices
+cannot be resized until repair is performed.  It should also be noted
+that when the pool's metadata space is exhausted the current metadata
+transaction is aborted.  Given that the pool will cache IO whose
+completion may have already been acknowledged to upper IO layers
+(e.g. filesystem) it is strongly suggested that consistency checks
+(e.g. fsck) be performed on those layers when repair of the pool is
+required.
+
+Thin provisioning
+-----------------
+
+i) Creating a new thinly-provisioned volume.
+
+  To create a new thinly- provisioned volume you must send a message to an
+  active pool device, /dev/mapper/pool in this example::
+
+    dmsetup message /dev/mapper/pool 0 "create_thin 0"
+
+  Here '0' is an identifier for the volume, a 24-bit number.  It's up
+  to the caller to allocate and manage these identifiers.  If the
+  identifier is already in use, the message will fail with -EEXIST.
+
+ii) Using a thinly-provisioned volume.
+
+  Thinly-provisioned volumes are activated using the 'thin' target::
+
+    dmsetup create thin --table "0 2097152 thin /dev/mapper/pool 0"
+
+  The last parameter is the identifier for the thinp device.
+
+Internal snapshots
+------------------
+
+i) Creating an internal snapshot.
+
+  Snapshots are created with another message to the pool.
+
+  N.B.  If the origin device that you wish to snapshot is active, you
+  must suspend it before creating the snapshot to avoid corruption.
+  This is NOT enforced at the moment, so please be careful!
+
+  ::
+
+    dmsetup suspend /dev/mapper/thin
+    dmsetup message /dev/mapper/pool 0 "create_snap 1 0"
+    dmsetup resume /dev/mapper/thin
+
+  Here '1' is the identifier for the volume, a 24-bit number.  '0' is the
+  identifier for the origin device.
+
+ii) Using an internal snapshot.
+
+  Once created, the user doesn't have to worry about any connection
+  between the origin and the snapshot.  Indeed the snapshot is no
+  different from any other thinly-provisioned device and can be
+  snapshotted itself via the same method.  It's perfectly legal to
+  have only one of them active, and there's no ordering requirement on
+  activating or removing them both.  (This differs from conventional
+  device-mapper snapshots.)
+
+  Activate it exactly the same way as any other thinly-provisioned volume::
+
+    dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"
+
+External snapshots
+------------------
+
+You can use an external **read only** device as an origin for a
+thinly-provisioned volume.  Any read to an unprovisioned area of the
+thin device will be passed through to the origin.  Writes trigger
+the allocation of new blocks as usual.
+
+One use case for this is VM hosts that want to run guests on
+thinly-provisioned volumes but have the base image on another device
+(possibly shared between many VMs).
+
+You must not write to the origin device if you use this technique!
+Of course, you may write to the thin device and take internal snapshots
+of the thin volume.
+
+i) Creating a snapshot of an external device
+
+  This is the same as creating a thin device.
+  You don't mention the origin at this stage.
+
+  ::
+
+    dmsetup message /dev/mapper/pool 0 "create_thin 0"
+
+ii) Using a snapshot of an external device.
+
+  Append an extra parameter to the thin target specifying the origin::
+
+    dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 0 /dev/image"
+
+  N.B. All descendants (internal snapshots) of this snapshot require the
+  same extra origin parameter.
+
+Deactivation
+------------
+
+All devices using a pool must be deactivated before the pool itself
+can be.
+
+::
+
+    dmsetup remove thin
+    dmsetup remove snap
+    dmsetup remove pool
+
+Reference
+=========
+
+'thin-pool' target
+------------------
+
+i) Constructor
+
+    ::
+
+      thin-pool <metadata dev> <data dev> <data block size (sectors)> \
+	        <low water mark (blocks)> [<number of feature args> [<arg>]*]
+
+    Optional feature arguments:
+
+      skip_block_zeroing:
+	Skip the zeroing of newly-provisioned blocks.
+
+      ignore_discard:
+	Disable discard support.
+
+      no_discard_passdown:
+	Don't pass discards down to the underlying
+	data device, but just remove the mapping.
+
+      read_only:
+		 Don't allow any changes to be made to the pool
+		 metadata.  This mode is only available after the
+		 thin-pool has been created and first used in full
+		 read/write mode.  It cannot be specified on initial
+		 thin-pool creation.
+
+      error_if_no_space:
+	Error IOs, instead of queueing, if no space.
+
+    Data block size must be between 64KB (128 sectors) and 1GB
+    (2097152 sectors) inclusive.
+
+
+ii) Status
+
+    ::
+
+      <transaction id> <used metadata blocks>/<total metadata blocks>
+      <used data blocks>/<total data blocks> <held metadata root>
+      ro|rw|out_of_data_space [no_]discard_passdown [error|queue]_if_no_space
+      needs_check|- metadata_low_watermark
+
+    transaction id:
+	A 64-bit number used by userspace to help synchronise with metadata
+	from volume managers.
+
+    used data blocks / total data blocks
+	If the number of free blocks drops below the pool's low water mark a
+	dm event will be sent to userspace.  This event is edge-triggered and
+	it will occur only once after each resume so volume manager writers
+	should register for the event and then check the target's status.
+
+    held metadata root:
+	The location, in blocks, of the metadata root that has been
+	'held' for userspace read access.  '-' indicates there is no
+	held root.
+
+    discard_passdown|no_discard_passdown
+	Whether or not discards are actually being passed down to the
+	underlying device.  When this is enabled when loading the table,
+	it can get disabled if the underlying device doesn't support it.
+
+    ro|rw|out_of_data_space
+	If the pool encounters certain types of device failures it will
+	drop into a read-only metadata mode in which no changes to
+	the pool metadata (like allocating new blocks) are permitted.
+
+	In serious cases where even a read-only mode is deemed unsafe
+	no further I/O will be permitted and the status will just
+	contain the string 'Fail'.  The userspace recovery tools
+	should then be used.
+
+    error_if_no_space|queue_if_no_space
+	If the pool runs out of data or metadata space, the pool will
+	either queue or error the IO destined to the data device.  The
+	default is to queue the IO until more space is added or the
+	'no_space_timeout' expires.  The 'no_space_timeout' dm-thin-pool
+	module parameter can be used to change this timeout -- it
+	defaults to 60 seconds but may be disabled using a value of 0.
+
+    needs_check
+	A metadata operation has failed, resulting in the needs_check
+	flag being set in the metadata's superblock.  The metadata
+	device must be deactivated and checked/repaired before the
+	thin-pool can be made fully operational again.  '-' indicates
+	needs_check is not set.
+
+    metadata_low_watermark:
+	Value of metadata low watermark in blocks.  The kernel sets this
+	value internally but userspace needs to know this value to
+	determine if an event was caused by crossing this threshold.
+
+iii) Messages
+
+    create_thin <dev id>
+	Create a new thinly-provisioned device.
+	<dev id> is an arbitrary unique 24-bit identifier chosen by
+	the caller.
+
+    create_snap <dev id> <origin id>
+	Create a new snapshot of another thinly-provisioned device.
+	<dev id> is an arbitrary unique 24-bit identifier chosen by
+	the caller.
+	<origin id> is the identifier of the thinly-provisioned device
+	of which the new device will be a snapshot.
+
+    delete <dev id>
+	Deletes a thin device.  Irreversible.
+
+    set_transaction_id <current id> <new id>
+	Userland volume managers, such as LVM, need a way to
+	synchronise their external metadata with the internal metadata of the
+	pool target.  The thin-pool target offers to store an
+	arbitrary 64-bit transaction id and return it on the target's
+	status line.  To avoid races you must provide what you think
+	the current transaction id is when you change it with this
+	compare-and-swap message.
+
+    reserve_metadata_snap
+        Reserve a copy of the data mapping btree for use by userland.
+        This allows userland to inspect the mappings as they were when
+        this message was executed.  Use the pool's status command to
+        get the root block associated with the metadata snapshot.
+
+    release_metadata_snap
+        Release a previously reserved copy of the data mapping btree.
+
+'thin' target
+-------------
+
+i) Constructor
+
+    ::
+
+        thin <pool dev> <dev id> [<external origin dev>]
+
+    pool dev:
+	the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
+
+    dev id:
+	the internal device identifier of the device to be
+	activated.
+
+    external origin dev:
+	an optional block device outside the pool to be treated as a
+	read-only snapshot origin: reads to unprovisioned areas of the
+	thin target will be mapped to this device.
+
+The pool doesn't store any size against the thin devices.  If you
+load a thin target that is smaller than you've been using previously,
+then you'll have no access to blocks mapped beyond the end.  If you
+load a target that is bigger than before, then extra blocks will be
+provisioned as and when needed.
+
+ii) Status
+
+    <nr mapped sectors> <highest mapped sector>
+	If the pool has encountered device errors and failed, the status
+	will just contain the string 'Fail'.  The userspace recovery
+	tools should then be used.
+
+    In the case where <nr mapped sectors> is 0, there is no highest
+    mapped sector and the value of <highest mapped sector> is unspecified.
diff --git a/Documentation/admin-guide/device-mapper/unstriped.rst b/Documentation/admin-guide/device-mapper/unstriped.rst
new file mode 100644
index 000000000000..0a8d3eb3f072
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/unstriped.rst
@@ -0,0 +1,135 @@
+================================
+Device-mapper "unstriped" target
+================================
+
+Introduction
+============
+
+The device-mapper "unstriped" target provides a transparent mechanism to
+unstripe a device-mapper "striped" target to access the underlying disks
+without having to touch the true backing block-device.  It can also be
+used to unstripe a hardware RAID-0 to access backing disks.
+
+Parameters:
+<number of stripes> <chunk size> <stripe #> <dev_path> <offset>
+
+<number of stripes>
+        The number of stripes in the RAID 0.
+
+<chunk size>
+	The amount of 512B sectors in the chunk striping.
+
+<dev_path>
+	The block device you wish to unstripe.
+
+<stripe #>
+        The stripe number within the device that corresponds to physical
+        drive you wish to unstripe.  This must be 0 indexed.
+
+
+Why use this module?
+====================
+
+An example of undoing an existing dm-stripe
+-------------------------------------------
+
+This small bash script will setup 4 loop devices and use the existing
+striped target to combine the 4 devices into one.  It then will use
+the unstriped target ontop of the striped device to access the
+individual backing loop devices.  We write data to the newly exposed
+unstriped devices and verify the data written matches the correct
+underlying device on the striped array::
+
+  #!/bin/bash
+
+  MEMBER_SIZE=$((128 * 1024 * 1024))
+  NUM=4
+  SEQ_END=$((${NUM}-1))
+  CHUNK=256
+  BS=4096
+
+  RAID_SIZE=$((${MEMBER_SIZE}*${NUM}/512))
+  DM_PARMS="0 ${RAID_SIZE} striped ${NUM} ${CHUNK}"
+  COUNT=$((${MEMBER_SIZE} / ${BS}))
+
+  for i in $(seq 0 ${SEQ_END}); do
+    dd if=/dev/zero of=member-${i} bs=${MEMBER_SIZE} count=1 oflag=direct
+    losetup /dev/loop${i} member-${i}
+    DM_PARMS+=" /dev/loop${i} 0"
+  done
+
+  echo $DM_PARMS | dmsetup create raid0
+  for i in $(seq 0 ${SEQ_END}); do
+    echo "0 1 unstriped ${NUM} ${CHUNK} ${i} /dev/mapper/raid0 0" | dmsetup create set-${i}
+  done;
+
+  for i in $(seq 0 ${SEQ_END}); do
+    dd if=/dev/urandom of=/dev/mapper/set-${i} bs=${BS} count=${COUNT} oflag=direct
+    diff /dev/mapper/set-${i} member-${i}
+  done;
+
+  for i in $(seq 0 ${SEQ_END}); do
+    dmsetup remove set-${i}
+  done
+
+  dmsetup remove raid0
+
+  for i in $(seq 0 ${SEQ_END}); do
+    losetup -d /dev/loop${i}
+    rm -f member-${i}
+  done
+
+Another example
+---------------
+
+Intel NVMe drives contain two cores on the physical device.
+Each core of the drive has segregated access to its LBA range.
+The current LBA model has a RAID 0 128k chunk on each core, resulting
+in a 256k stripe across the two cores::
+
+   Core 0:       Core 1:
+  __________    __________
+  | LBA 512|    | LBA 768|
+  | LBA 0  |    | LBA 256|
+  ----------    ----------
+
+The purpose of this unstriping is to provide better QoS in noisy
+neighbor environments. When two partitions are created on the
+aggregate drive without this unstriping, reads on one partition
+can affect writes on another partition.  This is because the partitions
+are striped across the two cores.  When we unstripe this hardware RAID 0
+and make partitions on each new exposed device the two partitions are now
+physically separated.
+
+With the dm-unstriped target we're able to segregate an fio script that
+has read and write jobs that are independent of each other.  Compared to
+when we run the test on a combined drive with partitions, we were able
+to get a 92% reduction in read latency using this device mapper target.
+
+
+Example dmsetup usage
+=====================
+
+unstriped ontop of Intel NVMe device that has 2 cores
+-----------------------------------------------------
+
+::
+
+  dmsetup create nvmset0 --table '0 512 unstriped 2 256 0 /dev/nvme0n1 0'
+  dmsetup create nvmset1 --table '0 512 unstriped 2 256 1 /dev/nvme0n1 0'
+
+There will now be two devices that expose Intel NVMe core 0 and 1
+respectively::
+
+  /dev/mapper/nvmset0
+  /dev/mapper/nvmset1
+
+unstriped ontop of striped with 4 drives using 128K chunk size
+--------------------------------------------------------------
+
+::
+
+  dmsetup create raid_disk0 --table '0 512 unstriped 4 256 0 /dev/mapper/striped 0'
+  dmsetup create raid_disk1 --table '0 512 unstriped 4 256 1 /dev/mapper/striped 0'
+  dmsetup create raid_disk2 --table '0 512 unstriped 4 256 2 /dev/mapper/striped 0'
+  dmsetup create raid_disk3 --table '0 512 unstriped 4 256 3 /dev/mapper/striped 0'
diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst
new file mode 100644
index 000000000000..a4d1c1476d72
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/verity.rst
@@ -0,0 +1,229 @@
+=========
+dm-verity
+=========
+
+Device-Mapper's "verity" target provides transparent integrity checking of
+block devices using a cryptographic digest provided by the kernel crypto API.
+This target is read-only.
+
+Construction Parameters
+=======================
+
+::
+
+    <version> <dev> <hash_dev>
+    <data_block_size> <hash_block_size>
+    <num_data_blocks> <hash_start_block>
+    <algorithm> <digest> <salt>
+    [<#opt_params> <opt_params>]
+
+<version>
+    This is the type of the on-disk hash format.
+
+    0 is the original format used in the Chromium OS.
+      The salt is appended when hashing, digests are stored continuously and
+      the rest of the block is padded with zeroes.
+
+    1 is the current format that should be used for new devices.
+      The salt is prepended when hashing and each digest is
+      padded with zeroes to the power of two.
+
+<dev>
+    This is the device containing data, the integrity of which needs to be
+    checked.  It may be specified as a path, like /dev/sdaX, or a device number,
+    <major>:<minor>.
+
+<hash_dev>
+    This is the device that supplies the hash tree data.  It may be
+    specified similarly to the device path and may be the same device.  If the
+    same device is used, the hash_start should be outside the configured
+    dm-verity device.
+
+<data_block_size>
+    The block size on a data device in bytes.
+    Each block corresponds to one digest on the hash device.
+
+<hash_block_size>
+    The size of a hash block in bytes.
+
+<num_data_blocks>
+    The number of data blocks on the data device.  Additional blocks are
+    inaccessible.  You can place hashes to the same partition as data, in this
+    case hashes are placed after <num_data_blocks>.
+
+<hash_start_block>
+    This is the offset, in <hash_block_size>-blocks, from the start of hash_dev
+    to the root block of the hash tree.
+
+<algorithm>
+    The cryptographic hash algorithm used for this device.  This should
+    be the name of the algorithm, like "sha1".
+
+<digest>
+    The hexadecimal encoding of the cryptographic hash of the root hash block
+    and the salt.  This hash should be trusted as there is no other authenticity
+    beyond this point.
+
+<salt>
+    The hexadecimal encoding of the salt value.
+
+<#opt_params>
+    Number of optional parameters. If there are no optional parameters,
+    the optional paramaters section can be skipped or #opt_params can be zero.
+    Otherwise #opt_params is the number of following arguments.
+
+    Example of optional parameters section:
+        1 ignore_corruption
+
+ignore_corruption
+    Log corrupted blocks, but allow read operations to proceed normally.
+
+restart_on_corruption
+    Restart the system when a corrupted block is discovered. This option is
+    not compatible with ignore_corruption and requires user space support to
+    avoid restart loops.
+
+ignore_zero_blocks
+    Do not verify blocks that are expected to contain zeroes and always return
+    zeroes instead. This may be useful if the partition contains unused blocks
+    that are not guaranteed to contain zeroes.
+
+use_fec_from_device <fec_dev>
+    Use forward error correction (FEC) to recover from corruption if hash
+    verification fails. Use encoding data from the specified device. This
+    may be the same device where data and hash blocks reside, in which case
+    fec_start must be outside data and hash areas.
+
+    If the encoding data covers additional metadata, it must be accessible
+    on the hash device after the hash blocks.
+
+    Note: block sizes for data and hash devices must match. Also, if the
+    verity <dev> is encrypted the <fec_dev> should be too.
+
+fec_roots <num>
+    Number of generator roots. This equals to the number of parity bytes in
+    the encoding data. For example, in RS(M, N) encoding, the number of roots
+    is M-N.
+
+fec_blocks <num>
+    The number of encoding data blocks on the FEC device. The block size for
+    the FEC device is <data_block_size>.
+
+fec_start <offset>
+    This is the offset, in <data_block_size> blocks, from the start of the
+    FEC device to the beginning of the encoding data.
+
+check_at_most_once
+    Verify data blocks only the first time they are read from the data device,
+    rather than every time.  This reduces the overhead of dm-verity so that it
+    can be used on systems that are memory and/or CPU constrained.  However, it
+    provides a reduced level of security because only offline tampering of the
+    data device's content will be detected, not online tampering.
+
+    Hash blocks are still verified each time they are read from the hash device,
+    since verification of hash blocks is less performance critical than data
+    blocks, and a hash block will not be verified any more after all the data
+    blocks it covers have been verified anyway.
+
+Theory of operation
+===================
+
+dm-verity is meant to be set up as part of a verified boot path.  This
+may be anything ranging from a boot using tboot or trustedgrub to just
+booting from a known-good device (like a USB drive or CD).
+
+When a dm-verity device is configured, it is expected that the caller
+has been authenticated in some way (cryptographic signatures, etc).
+After instantiation, all hashes will be verified on-demand during
+disk access.  If they cannot be verified up to the root node of the
+tree, the root hash, then the I/O will fail.  This should detect
+tampering with any data on the device and the hash data.
+
+Cryptographic hashes are used to assert the integrity of the device on a
+per-block basis. This allows for a lightweight hash computation on first read
+into the page cache. Block hashes are stored linearly, aligned to the nearest
+block size.
+
+If forward error correction (FEC) support is enabled any recovery of
+corrupted data will be verified using the cryptographic hash of the
+corresponding data. This is why combining error correction with
+integrity checking is essential.
+
+Hash Tree
+---------
+
+Each node in the tree is a cryptographic hash.  If it is a leaf node, the hash
+of some data block on disk is calculated. If it is an intermediary node,
+the hash of a number of child nodes is calculated.
+
+Each entry in the tree is a collection of neighboring nodes that fit in one
+block.  The number is determined based on block_size and the size of the
+selected cryptographic digest algorithm.  The hashes are linearly-ordered in
+this entry and any unaligned trailing space is ignored but included when
+calculating the parent node.
+
+The tree looks something like:
+
+	alg = sha256, num_blocks = 32768, block_size = 4096
+
+::
+
+                                 [   root    ]
+                                /    . . .    \
+                     [entry_0]                 [entry_1]
+                    /  . . .  \                 . . .   \
+         [entry_0_0]   . . .  [entry_0_127]    . . . .  [entry_1_127]
+           / ... \             /   . . .  \             /           \
+     blk_0 ... blk_127  blk_16256   blk_16383      blk_32640 . . . blk_32767
+
+
+On-disk format
+==============
+
+The verity kernel code does not read the verity metadata on-disk header.
+It only reads the hash blocks which directly follow the header.
+It is expected that a user-space tool will verify the integrity of the
+verity header.
+
+Alternatively, the header can be omitted and the dmsetup parameters can
+be passed via the kernel command-line in a rooted chain of trust where
+the command-line is verified.
+
+Directly following the header (and with sector number padded to the next hash
+block boundary) are the hash blocks which are stored a depth at a time
+(starting from the root), sorted in order of increasing index.
+
+The full specification of kernel parameters and on-disk metadata format
+is available at the cryptsetup project's wiki page
+
+  https://gitlab.com/cryptsetup/cryptsetup/wikis/DMVerity
+
+Status
+======
+V (for Valid) is returned if every check performed so far was valid.
+If any check failed, C (for Corruption) is returned.
+
+Example
+=======
+Set up a device::
+
+  # dmsetup create vroot --readonly --table \
+    "0 2097152 verity 1 /dev/sda1 /dev/sda2 4096 4096 262144 1 sha256 "\
+    "4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 "\
+    "1234000000000000000000000000000000000000000000000000000000000000"
+
+A command line tool veritysetup is available to compute or verify
+the hash tree or activate the kernel device. This is available from
+the cryptsetup upstream repository https://gitlab.com/cryptsetup/cryptsetup/
+(as a libcryptsetup extension).
+
+Create hash on the device::
+
+  # veritysetup format /dev/sda1 /dev/sda2
+  ...
+  Root hash: 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
+
+Activate the device::
+
+  # veritysetup create vroot /dev/sda1 /dev/sda2 \
+    4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
diff --git a/Documentation/admin-guide/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst
new file mode 100644
index 000000000000..d3d7690f5e8d
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/writecache.rst
@@ -0,0 +1,79 @@
+=================
+Writecache target
+=================
+
+The writecache target caches writes on persistent memory or on SSD. It
+doesn't cache reads because reads are supposed to be cached in page cache
+in normal RAM.
+
+When the device is constructed, the first sector should be zeroed or the
+first sector should contain valid superblock from previous invocation.
+
+Constructor parameters:
+
+1. type of the cache device - "p" or "s"
+
+	- p - persistent memory
+	- s - SSD
+2. the underlying device that will be cached
+3. the cache device
+4. block size (4096 is recommended; the maximum block size is the page
+   size)
+5. the number of optional parameters (the parameters with an argument
+   count as two)
+
+	start_sector n		(default: 0)
+		offset from the start of cache device in 512-byte sectors
+	high_watermark n	(default: 50)
+		start writeback when the number of used blocks reach this
+		watermark
+	low_watermark x		(default: 45)
+		stop writeback when the number of used blocks drops below
+		this watermark
+	writeback_jobs n	(default: unlimited)
+		limit the number of blocks that are in flight during
+		writeback. Setting this value reduces writeback
+		throughput, but it may improve latency of read requests
+	autocommit_blocks n	(default: 64 for pmem, 65536 for ssd)
+		when the application writes this amount of blocks without
+		issuing the FLUSH request, the blocks are automatically
+		commited
+	autocommit_time ms	(default: 1000)
+		autocommit time in milliseconds. The data is automatically
+		commited if this time passes and no FLUSH request is
+		received
+	fua			(by default on)
+		applicable only to persistent memory - use the FUA flag
+		when writing data from persistent memory back to the
+		underlying device
+	nofua
+		applicable only to persistent memory - don't use the FUA
+		flag when writing back data and send the FLUSH request
+		afterwards
+
+		- some underlying devices perform better with fua, some
+		  with nofua. The user should test it
+
+Status:
+1. error indicator - 0 if there was no error, otherwise error number
+2. the number of blocks
+3. the number of free blocks
+4. the number of blocks under writeback
+
+Messages:
+	flush
+		flush the cache device. The message returns successfully
+		if the cache device was flushed without an error
+	flush_on_suspend
+		flush the cache device on next suspend. Use this message
+		when you are going to remove the cache device. The proper
+		sequence for removing the cache device is:
+
+		1. send the "flush_on_suspend" message
+		2. load an inactive table with a linear target that maps
+		   to the underlying device
+		3. suspend the device
+		4. ask for status and verify that there are no errors
+		5. resume the device, so that it will use the linear
+		   target
+		6. the cache device is now inactive and it can be deleted
diff --git a/Documentation/admin-guide/device-mapper/zero.rst b/Documentation/admin-guide/device-mapper/zero.rst
new file mode 100644
index 000000000000..11fb5cf4597c
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/zero.rst
@@ -0,0 +1,37 @@
+=======
+dm-zero
+=======
+
+Device-Mapper's "zero" target provides a block-device that always returns
+zero'd data on reads and silently drops writes. This is similar behavior to
+/dev/zero, but as a block-device instead of a character-device.
+
+Dm-zero has no target-specific parameters.
+
+One very interesting use of dm-zero is for creating "sparse" devices in
+conjunction with dm-snapshot. A sparse device reports a device-size larger
+than the amount of actual storage space available for that device. A user can
+write data anywhere within the sparse device and read it back like a normal
+device. Reads to previously unwritten areas will return a zero'd buffer. When
+enough data has been written to fill up the actual storage space, the sparse
+device is deactivated. This can be very useful for testing device and
+filesystem limitations.
+
+To create a sparse device, start by creating a dm-zero device that's the
+desired size of the sparse device. For this example, we'll assume a 10TB
+sparse device::
+
+  TEN_TERABYTES=`expr 10 \* 1024 \* 1024 \* 1024 \* 2`   # 10 TB in sectors
+  echo "0 $TEN_TERABYTES zero" | dmsetup create zero1
+
+Then create a snapshot of the zero device, using any available block-device as
+the COW device. The size of the COW device will determine the amount of real
+space available to the sparse device. For this example, we'll assume /dev/sdb1
+is an available 10GB partition::
+
+  echo "0 $TEN_TERABYTES snapshot /dev/mapper/zero1 /dev/sdb1 p 128" | \
+     dmsetup create sparse1
+
+This will create a 10TB sparse device called /dev/mapper/sparse1 that has
+10GB of actual storage space available. If more than 10GB of data is written
+to this device, it will start returning I/O errors.
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index abc2c4e83939..64e97a969857 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -80,6 +80,7 @@ configure specific aspects of kernel behavior to your liking.
    namespaces/index
    perf-security
    acpi/index
+   device-mapper/index
 
 .. only::  subproject and html
 
diff --git a/Documentation/device-mapper/cache-policies.rst b/Documentation/device-mapper/cache-policies.rst
deleted file mode 100644
index b17fe352fc41..000000000000
--- a/Documentation/device-mapper/cache-policies.rst
+++ /dev/null
@@ -1,131 +0,0 @@
-=============================
-Guidance for writing policies
-=============================
-
-Try to keep transactionality out of it.  The core is careful to
-avoid asking about anything that is migrating.  This is a pain, but
-makes it easier to write the policies.
-
-Mappings are loaded into the policy at construction time.
-
-Every bio that is mapped by the target is referred to the policy.
-The policy can return a simple HIT or MISS or issue a migration.
-
-Currently there's no way for the policy to issue background work,
-e.g. to start writing back dirty blocks that are going to be evicted
-soon.
-
-Because we map bios, rather than requests it's easy for the policy
-to get fooled by many small bios.  For this reason the core target
-issues periodic ticks to the policy.  It's suggested that the policy
-doesn't update states (eg, hit counts) for a block more than once
-for each tick.  The core ticks by watching bios complete, and so
-trying to see when the io scheduler has let the ios run.
-
-
-Overview of supplied cache replacement policies
-===============================================
-
-multiqueue (mq)
----------------
-
-This policy is now an alias for smq (see below).
-
-The following tunables are accepted, but have no effect::
-
-	'sequential_threshold <#nr_sequential_ios>'
-	'random_threshold <#nr_random_ios>'
-	'read_promote_adjustment <value>'
-	'write_promote_adjustment <value>'
-	'discard_promote_adjustment <value>'
-
-Stochastic multiqueue (smq)
----------------------------
-
-This policy is the default.
-
-The stochastic multi-queue (smq) policy addresses some of the problems
-with the multiqueue (mq) policy.
-
-The smq policy (vs mq) offers the promise of less memory utilization,
-improved performance and increased adaptability in the face of changing
-workloads.  smq also does not have any cumbersome tuning knobs.
-
-Users may switch from "mq" to "smq" simply by appropriately reloading a
-DM table that is using the cache target.  Doing so will cause all of the
-mq policy's hints to be dropped.  Also, performance of the cache may
-degrade slightly until smq recalculates the origin device's hotspots
-that should be cached.
-
-Memory usage
-^^^^^^^^^^^^
-
-The mq policy used a lot of memory; 88 bytes per cache block on a 64
-bit machine.
-
-smq uses 28bit indexes to implement its data structures rather than
-pointers.  It avoids storing an explicit hit count for each block.  It
-has a 'hotspot' queue, rather than a pre-cache, which uses a quarter of
-the entries (each hotspot block covers a larger area than a single
-cache block).
-
-All this means smq uses ~25bytes per cache block.  Still a lot of
-memory, but a substantial improvement nontheless.
-
-Level balancing
-^^^^^^^^^^^^^^^
-
-mq placed entries in different levels of the multiqueue structures
-based on their hit count (~ln(hit count)).  This meant the bottom
-levels generally had the most entries, and the top ones had very
-few.  Having unbalanced levels like this reduced the efficacy of the
-multiqueue.
-
-smq does not maintain a hit count, instead it swaps hit entries with
-the least recently used entry from the level above.  The overall
-ordering being a side effect of this stochastic process.  With this
-scheme we can decide how many entries occupy each multiqueue level,
-resulting in better promotion/demotion decisions.
-
-Adaptability:
-The mq policy maintained a hit count for each cache block.  For a
-different block to get promoted to the cache its hit count has to
-exceed the lowest currently in the cache.  This meant it could take a
-long time for the cache to adapt between varying IO patterns.
-
-smq doesn't maintain hit counts, so a lot of this problem just goes
-away.  In addition it tracks performance of the hotspot queue, which
-is used to decide which blocks to promote.  If the hotspot queue is
-performing badly then it starts moving entries more quickly between
-levels.  This lets it adapt to new IO patterns very quickly.
-
-Performance
-^^^^^^^^^^^
-
-Testing smq shows substantially better performance than mq.
-
-cleaner
--------
-
-The cleaner writes back all dirty blocks in a cache to decommission it.
-
-Examples
-========
-
-The syntax for a table is::
-
-	cache <metadata dev> <cache dev> <origin dev> <block size>
-	<#feature_args> [<feature arg>]*
-	<policy> <#policy_args> [<policy arg>]*
-
-The syntax to send a message using the dmsetup command is::
-
-	dmsetup message <mapped device> 0 sequential_threshold 1024
-	dmsetup message <mapped device> 0 random_threshold 8
-
-Using dmsetup::
-
-	dmsetup create blah --table "0 268435456 cache /dev/sdb /dev/sdc \
-	    /dev/sdd 512 0 mq 4 sequential_threshold 1024 random_threshold 8"
-	creates a 128GB large mapped device named 'blah' with the
-	sequential threshold set to 1024 and the random_threshold set to 8.
diff --git a/Documentation/device-mapper/cache.rst b/Documentation/device-mapper/cache.rst
deleted file mode 100644
index f15e5254d05b..000000000000
--- a/Documentation/device-mapper/cache.rst
+++ /dev/null
@@ -1,337 +0,0 @@
-=====
-Cache
-=====
-
-Introduction
-============
-
-dm-cache is a device mapper target written by Joe Thornber, Heinz
-Mauelshagen, and Mike Snitzer.
-
-It aims to improve performance of a block device (eg, a spindle) by
-dynamically migrating some of its data to a faster, smaller device
-(eg, an SSD).
-
-This device-mapper solution allows us to insert this caching at
-different levels of the dm stack, for instance above the data device for
-a thin-provisioning pool.  Caching solutions that are integrated more
-closely with the virtual memory system should give better performance.
-
-The target reuses the metadata library used in the thin-provisioning
-library.
-
-The decision as to what data to migrate and when is left to a plug-in
-policy module.  Several of these have been written as we experiment,
-and we hope other people will contribute others for specific io
-scenarios (eg. a vm image server).
-
-Glossary
-========
-
-  Migration
-	       Movement of the primary copy of a logical block from one
-	       device to the other.
-  Promotion
-	       Migration from slow device to fast device.
-  Demotion
-	       Migration from fast device to slow device.
-
-The origin device always contains a copy of the logical block, which
-may be out of date or kept in sync with the copy on the cache device
-(depending on policy).
-
-Design
-======
-
-Sub-devices
------------
-
-The target is constructed by passing three devices to it (along with
-other parameters detailed later):
-
-1. An origin device - the big, slow one.
-
-2. A cache device - the small, fast one.
-
-3. A small metadata device - records which blocks are in the cache,
-   which are dirty, and extra hints for use by the policy object.
-   This information could be put on the cache device, but having it
-   separate allows the volume manager to configure it differently,
-   e.g. as a mirror for extra robustness.  This metadata device may only
-   be used by a single cache device.
-
-Fixed block size
-----------------
-
-The origin is divided up into blocks of a fixed size.  This block size
-is configurable when you first create the cache.  Typically we've been
-using block sizes of 256KB - 1024KB.  The block size must be between 64
-sectors (32KB) and 2097152 sectors (1GB) and a multiple of 64 sectors (32KB).
-
-Having a fixed block size simplifies the target a lot.  But it is
-something of a compromise.  For instance, a small part of a block may be
-getting hit a lot, yet the whole block will be promoted to the cache.
-So large block sizes are bad because they waste cache space.  And small
-block sizes are bad because they increase the amount of metadata (both
-in core and on disk).
-
-Cache operating modes
----------------------
-
-The cache has three operating modes: writeback, writethrough and
-passthrough.
-
-If writeback, the default, is selected then a write to a block that is
-cached will go only to the cache and the block will be marked dirty in
-the metadata.
-
-If writethrough is selected then a write to a cached block will not
-complete until it has hit both the origin and cache devices.  Clean
-blocks should remain clean.
-
-If passthrough is selected, useful when the cache contents are not known
-to be coherent with the origin device, then all reads are served from
-the origin device (all reads miss the cache) and all writes are
-forwarded to the origin device; additionally, write hits cause cache
-block invalidates.  To enable passthrough mode the cache must be clean.
-Passthrough mode allows a cache device to be activated without having to
-worry about coherency.  Coherency that exists is maintained, although
-the cache will gradually cool as writes take place.  If the coherency of
-the cache can later be verified, or established through use of the
-"invalidate_cblocks" message, the cache device can be transitioned to
-writethrough or writeback mode while still warm.  Otherwise, the cache
-contents can be discarded prior to transitioning to the desired
-operating mode.
-
-A simple cleaner policy is provided, which will clean (write back) all
-dirty blocks in a cache.  Useful for decommissioning a cache or when
-shrinking a cache.  Shrinking the cache's fast device requires all cache
-blocks, in the area of the cache being removed, to be clean.  If the
-area being removed from the cache still contains dirty blocks the resize
-will fail.  Care must be taken to never reduce the volume used for the
-cache's fast device until the cache is clean.  This is of particular
-importance if writeback mode is used.  Writethrough and passthrough
-modes already maintain a clean cache.  Future support to partially clean
-the cache, above a specified threshold, will allow for keeping the cache
-warm and in writeback mode during resize.
-
-Migration throttling
---------------------
-
-Migrating data between the origin and cache device uses bandwidth.
-The user can set a throttle to prevent more than a certain amount of
-migration occurring at any one time.  Currently we're not taking any
-account of normal io traffic going to the devices.  More work needs
-doing here to avoid migrating during those peak io moments.
-
-For the time being, a message "migration_threshold <#sectors>"
-can be used to set the maximum number of sectors being migrated,
-the default being 2048 sectors (1MB).
-
-Updating on-disk metadata
--------------------------
-
-On-disk metadata is committed every time a FLUSH or FUA bio is written.
-If no such requests are made then commits will occur every second.  This
-means the cache behaves like a physical disk that has a volatile write
-cache.  If power is lost you may lose some recent writes.  The metadata
-should always be consistent in spite of any crash.
-
-The 'dirty' state for a cache block changes far too frequently for us
-to keep updating it on the fly.  So we treat it as a hint.  In normal
-operation it will be written when the dm device is suspended.  If the
-system crashes all cache blocks will be assumed dirty when restarted.
-
-Per-block policy hints
-----------------------
-
-Policy plug-ins can store a chunk of data per cache block.  It's up to
-the policy how big this chunk is, but it should be kept small.  Like the
-dirty flags this data is lost if there's a crash so a safe fallback
-value should always be possible.
-
-Policy hints affect performance, not correctness.
-
-Policy messaging
-----------------
-
-Policies will have different tunables, specific to each one, so we
-need a generic way of getting and setting these.  Device-mapper
-messages are used.  Refer to cache-policies.txt.
-
-Discard bitset resolution
--------------------------
-
-We can avoid copying data during migration if we know the block has
-been discarded.  A prime example of this is when mkfs discards the
-whole block device.  We store a bitset tracking the discard state of
-blocks.  However, we allow this bitset to have a different block size
-from the cache blocks.  This is because we need to track the discard
-state for all of the origin device (compare with the dirty bitset
-which is just for the smaller cache device).
-
-Target interface
-================
-
-Constructor
------------
-
-  ::
-
-   cache <metadata dev> <cache dev> <origin dev> <block size>
-         <#feature args> [<feature arg>]*
-         <policy> <#policy args> [policy args]*
-
- ================ =======================================================
- metadata dev     fast device holding the persistent metadata
- cache dev	  fast device holding cached data blocks
- origin dev	  slow device holding original data blocks
- block size       cache unit size in sectors
-
- #feature args    number of feature arguments passed
- feature args     writethrough or passthrough (The default is writeback.)
-
- policy           the replacement policy to use
- #policy args     an even number of arguments corresponding to
-                  key/value pairs passed to the policy
- policy args      key/value pairs passed to the policy
-		  E.g. 'sequential_threshold 1024'
-		  See cache-policies.txt for details.
- ================ =======================================================
-
-Optional feature arguments are:
-
-
-   ==================== ========================================================
-   writethrough		write through caching that prohibits cache block
-			content from being different from origin block content.
-			Without this argument, the default behaviour is to write
-			back cache block contents later for performance reasons,
-			so they may differ from the corresponding origin blocks.
-
-   passthrough		a degraded mode useful for various cache coherency
-			situations (e.g., rolling back snapshots of
-			underlying storage).	 Reads and writes always go to
-			the origin.	If a write goes to a cached origin
-			block, then the cache block is invalidated.
-			To enable passthrough mode the cache must be clean.
-
-   metadata2		use version 2 of the metadata.  This stores the dirty
-			bits in a separate btree, which improves speed of
-			shutting down the cache.
-
-   no_discard_passdown	disable passing down discards from the cache
-			to the origin's data device.
-   ==================== ========================================================
-
-A policy called 'default' is always registered.  This is an alias for
-the policy we currently think is giving best all round performance.
-
-As the default policy could vary between kernels, if you are relying on
-the characteristics of a specific policy, always request it by name.
-
-Status
-------
-
-::
-
-  <metadata block size> <#used metadata blocks>/<#total metadata blocks>
-  <cache block size> <#used cache blocks>/<#total cache blocks>
-  <#read hits> <#read misses> <#write hits> <#write misses>
-  <#demotions> <#promotions> <#dirty> <#features> <features>*
-  <#core args> <core args>* <policy name> <#policy args> <policy args>*
-  <cache metadata mode>
-
-
-========================= =====================================================
-metadata block size	  Fixed block size for each metadata block in
-			  sectors
-#used metadata blocks	  Number of metadata blocks used
-#total metadata blocks	  Total number of metadata blocks
-cache block size	  Configurable block size for the cache device
-			  in sectors
-#used cache blocks	  Number of blocks resident in the cache
-#total cache blocks	  Total number of cache blocks
-#read hits		  Number of times a READ bio has been mapped
-			  to the cache
-#read misses		  Number of times a READ bio has been mapped
-			  to the origin
-#write hits		  Number of times a WRITE bio has been mapped
-			  to the cache
-#write misses		  Number of times a WRITE bio has been
-			  mapped to the origin
-#demotions		  Number of times a block has been removed
-			  from the cache
-#promotions		  Number of times a block has been moved to
-			  the cache
-#dirty			  Number of blocks in the cache that differ
-			  from the origin
-#feature args		  Number of feature args to follow
-feature args		  'writethrough' (optional)
-#core args		  Number of core arguments (must be even)
-core args		  Key/value pairs for tuning the core
-			  e.g. migration_threshold
-policy name		  Name of the policy
-#policy args		  Number of policy arguments to follow (must be even)
-policy args		  Key/value pairs e.g. sequential_threshold
-cache metadata mode       ro if read-only, rw if read-write
-
-			  In serious cases where even a read-only mode is
-			  deemed unsafe no further I/O will be permitted and
-			  the status will just contain the string 'Fail'.
-			  The userspace recovery tools should then be used.
-needs_check		  'needs_check' if set, '-' if not set
-			  A metadata operation has failed, resulting in the
-			  needs_check flag being set in the metadata's
-			  superblock.  The metadata device must be
-			  deactivated and checked/repaired before the
-			  cache can be made fully operational again.
-			  '-' indicates	needs_check is not set.
-========================= =====================================================
-
-Messages
---------
-
-Policies will have different tunables, specific to each one, so we
-need a generic way of getting and setting these.  Device-mapper
-messages are used.  (A sysfs interface would also be possible.)
-
-The message format is::
-
-   <key> <value>
-
-E.g.::
-
-   dmsetup message my_cache 0 sequential_threshold 1024
-
-
-Invalidation is removing an entry from the cache without writing it
-back.  Cache blocks can be invalidated via the invalidate_cblocks
-message, which takes an arbitrary number of cblock ranges.  Each cblock
-range's end value is "one past the end", meaning 5-10 expresses a range
-of values from 5 to 9.  Each cblock must be expressed as a decimal
-value, in the future a variant message that takes cblock ranges
-expressed in hexadecimal may be needed to better support efficient
-invalidation of larger caches.  The cache must be in passthrough mode
-when invalidate_cblocks is used::
-
-   invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*
-
-E.g.::
-
-   dmsetup message my_cache 0 invalidate_cblocks 2345 3456-4567 5678-6789
-
-Examples
-========
-
-The test suite can be found here:
-
-https://github.com/jthornber/device-mapper-test-suite
-
-::
-
-  dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
-	  /dev/mapper/ssd /dev/mapper/origin 512 1 writeback default 0'
-  dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
-	  /dev/mapper/ssd /dev/mapper/origin 1024 1 writeback \
-	  mq 4 sequential_threshold 1024 random_threshold 8'
diff --git a/Documentation/device-mapper/delay.rst b/Documentation/device-mapper/delay.rst
deleted file mode 100644
index 917ba8c33359..000000000000
--- a/Documentation/device-mapper/delay.rst
+++ /dev/null
@@ -1,31 +0,0 @@
-========
-dm-delay
-========
-
-Device-Mapper's "delay" target delays reads and/or writes
-and maps them to different devices.
-
-Parameters::
-
-    <device> <offset> <delay> [<write_device> <write_offset> <write_delay>
-			       [<flush_device> <flush_offset> <flush_delay>]]
-
-With separate write parameters, the first set is only used for reads.
-Offsets are specified in sectors.
-Delays are specified in milliseconds.
-
-Example scripts
-===============
-
-::
-
-	#!/bin/sh
-	# Create device delaying rw operation for 500ms
-	echo "0 `blockdev --getsz $1` delay $1 0 500" | dmsetup create delayed
-
-::
-
-	#!/bin/sh
-	# Create device delaying only write operation for 500ms and
-	# splitting reads and writes to different devices $1 $2
-	echo "0 `blockdev --getsz $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
diff --git a/Documentation/device-mapper/dm-crypt.rst b/Documentation/device-mapper/dm-crypt.rst
deleted file mode 100644
index 8f4a3f889d43..000000000000
--- a/Documentation/device-mapper/dm-crypt.rst
+++ /dev/null
@@ -1,173 +0,0 @@
-========
-dm-crypt
-========
-
-Device-Mapper's "crypt" target provides transparent encryption of block devices
-using the kernel crypto API.
-
-For a more detailed description of supported parameters see:
-https://gitlab.com/cryptsetup/cryptsetup/wikis/DMCrypt
-
-Parameters::
-
-	      <cipher> <key> <iv_offset> <device path> \
-	      <offset> [<#opt_params> <opt_params>]
-
-<cipher>
-    Encryption cipher, encryption mode and Initial Vector (IV) generator.
-
-    The cipher specifications format is::
-
-       cipher[:keycount]-chainmode-ivmode[:ivopts]
-
-    Examples::
-
-       aes-cbc-essiv:sha256
-       aes-xts-plain64
-       serpent-xts-plain64
-
-    Cipher format also supports direct specification with kernel crypt API
-    format (selected by capi: prefix). The IV specification is the same
-    as for the first format type.
-    This format is mainly used for specification of authenticated modes.
-
-    The crypto API cipher specifications format is::
-
-        capi:cipher_api_spec-ivmode[:ivopts]
-
-    Examples::
-
-        capi:cbc(aes)-essiv:sha256
-        capi:xts(aes)-plain64
-
-    Examples of authenticated modes::
-
-        capi:gcm(aes)-random
-        capi:authenc(hmac(sha256),xts(aes))-random
-        capi:rfc7539(chacha20,poly1305)-random
-
-    The /proc/crypto contains a list of curently loaded crypto modes.
-
-<key>
-    Key used for encryption. It is encoded either as a hexadecimal number
-    or it can be passed as <key_string> prefixed with single colon
-    character (':') for keys residing in kernel keyring service.
-    You can only use key sizes that are valid for the selected cipher
-    in combination with the selected iv mode.
-    Note that for some iv modes the key string can contain additional
-    keys (for example IV seed) so the key contains more parts concatenated
-    into a single string.
-
-<key_string>
-    The kernel keyring key is identified by string in following format:
-    <key_size>:<key_type>:<key_description>.
-
-<key_size>
-    The encryption key size in bytes. The kernel key payload size must match
-    the value passed in <key_size>.
-
-<key_type>
-    Either 'logon' or 'user' kernel key type.
-
-<key_description>
-    The kernel keyring key description crypt target should look for
-    when loading key of <key_type>.
-
-<keycount>
-    Multi-key compatibility mode. You can define <keycount> keys and
-    then sectors are encrypted according to their offsets (sector 0 uses key0;
-    sector 1 uses key1 etc.).  <keycount> must be a power of two.
-
-<iv_offset>
-    The IV offset is a sector count that is added to the sector number
-    before creating the IV.
-
-<device path>
-    This is the device that is going to be used as backend and contains the
-    encrypted data.  You can specify it as a path like /dev/xxx or a device
-    number <major>:<minor>.
-
-<offset>
-    Starting sector within the device where the encrypted data begins.
-
-<#opt_params>
-    Number of optional parameters. If there are no optional parameters,
-    the optional paramaters section can be skipped or #opt_params can be zero.
-    Otherwise #opt_params is the number of following arguments.
-
-    Example of optional parameters section:
-        3 allow_discards same_cpu_crypt submit_from_crypt_cpus
-
-allow_discards
-    Block discard requests (a.k.a. TRIM) are passed through the crypt device.
-    The default is to ignore discard requests.
-
-    WARNING: Assess the specific security risks carefully before enabling this
-    option.  For example, allowing discards on encrypted devices may lead to
-    the leak of information about the ciphertext device (filesystem type,
-    used space etc.) if the discarded blocks can be located easily on the
-    device later.
-
-same_cpu_crypt
-    Perform encryption using the same cpu that IO was submitted on.
-    The default is to use an unbound workqueue so that encryption work
-    is automatically balanced between available CPUs.
-
-submit_from_crypt_cpus
-    Disable offloading writes to a separate thread after encryption.
-    There are some situations where offloading write bios from the
-    encryption threads to a single thread degrades performance
-    significantly.  The default is to offload write bios to the same
-    thread because it benefits CFQ to have writes submitted using the
-    same context.
-
-integrity:<bytes>:<type>
-    The device requires additional <bytes> metadata per-sector stored
-    in per-bio integrity structure. This metadata must by provided
-    by underlying dm-integrity target.
-
-    The <type> can be "none" if metadata is used only for persistent IV.
-
-    For Authenticated Encryption with Additional Data (AEAD)
-    the <type> is "aead". An AEAD mode additionally calculates and verifies
-    integrity for the encrypted device. The additional space is then
-    used for storing authentication tag (and persistent IV if needed).
-
-sector_size:<bytes>
-    Use <bytes> as the encryption unit instead of 512 bytes sectors.
-    This option can be in range 512 - 4096 bytes and must be power of two.
-    Virtual device will announce this size as a minimal IO and logical sector.
-
-iv_large_sectors
-   IV generators will use sector number counted in <sector_size> units
-   instead of default 512 bytes sectors.
-
-   For example, if <sector_size> is 4096 bytes, plain64 IV for the second
-   sector will be 8 (without flag) and 1 if iv_large_sectors is present.
-   The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
-   if this flag is specified.
-
-Example scripts
-===============
-LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
-encryption with dm-crypt using the 'cryptsetup' utility, see
-https://gitlab.com/cryptsetup/cryptsetup
-
-::
-
-	#!/bin/sh
-	# Create a crypt device using dmsetup
-	dmsetup create crypt1 --table "0 `blockdev --getsz $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0"
-
-::
-
-	#!/bin/sh
-	# Create a crypt device using dmsetup when encryption key is stored in keyring service
-	dmsetup create crypt2 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 :32:logon:my_prefix:my_key 0 $1 0"
-
-::
-
-	#!/bin/sh
-	# Create a crypt device using cryptsetup and LUKS header with default cipher
-	cryptsetup luksFormat $1
-	cryptsetup luksOpen $1 crypt1
diff --git a/Documentation/device-mapper/dm-dust.txt b/Documentation/device-mapper/dm-dust.txt
deleted file mode 100644
index 954d402a1f6a..000000000000
--- a/Documentation/device-mapper/dm-dust.txt
+++ /dev/null
@@ -1,272 +0,0 @@
-dm-dust
-=======
-
-This target emulates the behavior of bad sectors at arbitrary
-locations, and the ability to enable the emulation of the failures
-at an arbitrary time.
-
-This target behaves similarly to a linear target.  At a given time,
-the user can send a message to the target to start failing read
-requests on specific blocks (to emulate the behavior of a hard disk
-drive with bad sectors).
-
-When the failure behavior is enabled (i.e.: when the output of
-"dmsetup status" displays "fail_read_on_bad_block"), reads of blocks
-in the "bad block list" will fail with EIO ("Input/output error").
-
-Writes of blocks in the "bad block list will result in the following:
-
-1. Remove the block from the "bad block list".
-2. Successfully complete the write.
-
-This emulates the "remapped sector" behavior of a drive with bad
-sectors.
-
-Normally, a drive that is encountering bad sectors will most likely
-encounter more bad sectors, at an unknown time or location.
-With dm-dust, the user can use the "addbadblock" and "removebadblock"
-messages to add arbitrary bad blocks at new locations, and the
-"enable" and "disable" messages to modulate the state of whether the
-configured "bad blocks" will be treated as bad, or bypassed.
-This allows the pre-writing of test data and metadata prior to
-simulating a "failure" event where bad sectors start to appear.
-
-Table parameters:
------------------
-<device_path> <offset> <blksz>
-
-Mandatory parameters:
-    <device_path>: path to the block device.
-    <offset>: offset to data area from start of device_path
-    <blksz>: block size in bytes
-	     (minimum 512, maximum 1073741824, must be a power of 2)
-
-Usage instructions:
--------------------
-
-First, find the size (in 512-byte sectors) of the device to be used:
-
-$ sudo blockdev --getsz /dev/vdb1
-33552384
-
-Create the dm-dust device:
-(For a device with a block size of 512 bytes)
-$ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 512'
-
-(For a device with a block size of 4096 bytes)
-$ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 4096'
-
-Check the status of the read behavior ("bypass" indicates that all I/O
-will be passed through to the underlying device):
-$ sudo dmsetup status dust1
-0 33552384 dust 252:17 bypass
-
-$ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=128 iflag=direct
-128+0 records in
-128+0 records out
-
-$ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
-128+0 records in
-128+0 records out
-
-Adding and removing bad blocks:
--------------------------------
-
-At any time (i.e.: whether the device has the "bad block" emulation
-enabled or disabled), bad blocks may be added or removed from the
-device via the "addbadblock" and "removebadblock" messages:
-
-$ sudo dmsetup message dust1 0 addbadblock 60
-kernel: device-mapper: dust: badblock added at block 60
-
-$ sudo dmsetup message dust1 0 addbadblock 67
-kernel: device-mapper: dust: badblock added at block 67
-
-$ sudo dmsetup message dust1 0 addbadblock 72
-kernel: device-mapper: dust: badblock added at block 72
-
-These bad blocks will be stored in the "bad block list".
-While the device is in "bypass" mode, reads and writes will succeed:
-
-$ sudo dmsetup status dust1
-0 33552384 dust 252:17 bypass
-
-Enabling block read failures:
------------------------------
-
-To enable the "fail read on bad block" behavior, send the "enable" message:
-
-$ sudo dmsetup message dust1 0 enable
-kernel: device-mapper: dust: enabling read failures on bad sectors
-
-$ sudo dmsetup status dust1
-0 33552384 dust 252:17 fail_read_on_bad_block
-
-With the device in "fail read on bad block" mode, attempting to read a
-block will encounter an "Input/output error":
-
-$ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=1 skip=67 iflag=direct
-dd: error reading '/dev/mapper/dust1': Input/output error
-0+0 records in
-0+0 records out
-0 bytes copied, 0.00040651 s, 0.0 kB/s
-
-...and writing to the bad blocks will remove the blocks from the list,
-therefore emulating the "remap" behavior of hard disk drives:
-
-$ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
-128+0 records in
-128+0 records out
-
-kernel: device-mapper: dust: block 60 removed from badblocklist by write
-kernel: device-mapper: dust: block 67 removed from badblocklist by write
-kernel: device-mapper: dust: block 72 removed from badblocklist by write
-kernel: device-mapper: dust: block 87 removed from badblocklist by write
-
-Bad block add/remove error handling:
-------------------------------------
-
-Attempting to add a bad block that already exists in the list will
-result in an "Invalid argument" error, as well as a helpful message:
-
-$ sudo dmsetup message dust1 0 addbadblock 88
-device-mapper: message ioctl on dust1  failed: Invalid argument
-kernel: device-mapper: dust: block 88 already in badblocklist
-
-Attempting to remove a bad block that doesn't exist in the list will
-result in an "Invalid argument" error, as well as a helpful message:
-
-$ sudo dmsetup message dust1 0 removebadblock 87
-device-mapper: message ioctl on dust1  failed: Invalid argument
-kernel: device-mapper: dust: block 87 not found in badblocklist
-
-Counting the number of bad blocks in the bad block list:
---------------------------------------------------------
-
-To count the number of bad blocks configured in the device, run the
-following message command:
-
-$ sudo dmsetup message dust1 0 countbadblocks
-
-A message will print with the number of bad blocks currently
-configured on the device:
-
-kernel: device-mapper: dust: countbadblocks: 895 badblock(s) found
-
-Querying for specific bad blocks:
----------------------------------
-
-To find out if a specific block is in the bad block list, run the
-following message command:
-
-$ sudo dmsetup message dust1 0 queryblock 72
-
-The following message will print if the block is in the list:
-device-mapper: dust: queryblock: block 72 found in badblocklist
-
-The following message will print if the block is in the list:
-device-mapper: dust: queryblock: block 72 not found in badblocklist
-
-The "queryblock" message command will work in both the "enabled"
-and "disabled" modes, allowing the verification of whether a block
-will be treated as "bad" without having to issue I/O to the device,
-or having to "enable" the bad block emulation.
-
-Clearing the bad block list:
-----------------------------
-
-To clear the bad block list (without needing to individually run
-a "removebadblock" message command for every block), run the
-following message command:
-
-$ sudo dmsetup message dust1 0 clearbadblocks
-
-After clearing the bad block list, the following message will appear:
-
-kernel: device-mapper: dust: clearbadblocks: badblocks cleared
-
-If there were no bad blocks to clear, the following message will
-appear:
-
-kernel: device-mapper: dust: clearbadblocks: no badblocks found
-
-Message commands list:
-----------------------
-
-Below is a list of the messages that can be sent to a dust device:
-
-Operations on blocks (requires a <blknum> argument):
-
-addbadblock <blknum>
-queryblock <blknum>
-removebadblock <blknum>
-
-...where <blknum> is a block number within range of the device
-  (corresponding to the block size of the device.)
-
-Single argument message commands:
-
-countbadblocks
-clearbadblocks
-disable
-enable
-quiet
-
-Device removal:
----------------
-
-When finished, remove the device via the "dmsetup remove" command:
-
-$ sudo dmsetup remove dust1
-
-Quiet mode:
------------
-
-On test runs with many bad blocks, it may be desirable to avoid
-excessive logging (from bad blocks added, removed, or "remapped").
-This can be done by enabling "quiet mode" via the following message:
-
-$ sudo dmsetup message dust1 0 quiet
-
-This will suppress log messages from add / remove / removed by write
-operations.  Log messages from "countbadblocks" or "queryblock"
-message commands will still print in quiet mode.
-
-The status of quiet mode can be seen by running "dmsetup status":
-
-$ sudo dmsetup status dust1
-0 33552384 dust 252:17 fail_read_on_bad_block quiet
-
-To disable quiet mode, send the "quiet" message again:
-
-$ sudo dmsetup message dust1 0 quiet
-
-$ sudo dmsetup status dust1
-0 33552384 dust 252:17 fail_read_on_bad_block verbose
-
-(The presence of "verbose" indicates normal logging.)
-
-"Why not...?"
--------------
-
-scsi_debug has a "medium error" mode that can fail reads on one
-specified sector (sector 0x1234, hardcoded in the source code), but
-it uses RAM for the persistent storage, which drastically decreases
-the potential device size.
-
-dm-flakey fails all I/O from all block locations at a specified time
-frequency, and not a given point in time.
-
-When a bad sector occurs on a hard disk drive, reads to that sector
-are failed by the device, usually resulting in an error code of EIO
-("I/O error") or ENODATA ("No data available").  However, a write to
-the sector may succeed, and result in the sector becoming readable
-after the device controller no longer experiences errors reading the
-sector (or after a reallocation of the sector).  However, there may
-be bad sectors that occur on the device in the future, in a different,
-unpredictable location.
-
-This target seeks to provide a device that can exhibit the behavior
-of a bad sector at a known sector location, at a known time, based
-on a large storage device (at least tens of gigabytes, not occupying
-system memory).
diff --git a/Documentation/device-mapper/dm-flakey.rst b/Documentation/device-mapper/dm-flakey.rst
deleted file mode 100644
index 86138735879d..000000000000
--- a/Documentation/device-mapper/dm-flakey.rst
+++ /dev/null
@@ -1,74 +0,0 @@
-=========
-dm-flakey
-=========
-
-This target is the same as the linear target except that it exhibits
-unreliable behaviour periodically.  It's been found useful in simulating
-failing devices for testing purposes.
-
-Starting from the time the table is loaded, the device is available for
-<up interval> seconds, then exhibits unreliable behaviour for <down
-interval> seconds, and then this cycle repeats.
-
-Also, consider using this in combination with the dm-delay target too,
-which can delay reads and writes and/or send them to different
-underlying devices.
-
-Table parameters
-----------------
-
-::
-
-  <dev path> <offset> <up interval> <down interval> \
-    [<num_features> [<feature arguments>]]
-
-Mandatory parameters:
-
-    <dev path>:
-        Full pathname to the underlying block-device, or a
-        "major:minor" device-number.
-    <offset>:
-        Starting sector within the device.
-    <up interval>:
-        Number of seconds device is available.
-    <down interval>:
-        Number of seconds device returns errors.
-
-Optional feature parameters:
-
-  If no feature parameters are present, during the periods of
-  unreliability, all I/O returns errors.
-
-  drop_writes:
-	All write I/O is silently ignored.
-	Read I/O is handled correctly.
-
-  error_writes:
-	All write I/O is failed with an error signalled.
-	Read I/O is handled correctly.
-
-  corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
-	During <down interval>, replace <Nth_byte> of the data of
-	each matching bio with <value>.
-
-    <Nth_byte>:
-	The offset of the byte to replace.
-	Counting starts at 1, to replace the first byte.
-    <direction>:
-	Either 'r' to corrupt reads or 'w' to corrupt writes.
-	'w' is incompatible with drop_writes.
-    <value>:
-	The value (from 0-255) to write.
-    <flags>:
-	Perform the replacement only if bio->bi_opf has all the
-	selected flags set.
-
-Examples:
-
-Replaces the 32nd byte of READ bios with the value 1::
-
-  corrupt_bio_byte 32 r 1 0
-
-Replaces the 224th byte of REQ_META (=32) bios with the value 0::
-
-  corrupt_bio_byte 224 w 0 32
diff --git a/Documentation/device-mapper/dm-init.rst b/Documentation/device-mapper/dm-init.rst
deleted file mode 100644
index e5242ff17e9b..000000000000
--- a/Documentation/device-mapper/dm-init.rst
+++ /dev/null
@@ -1,125 +0,0 @@
-================================
-Early creation of mapped devices
-================================
-
-It is possible to configure a device-mapper device to act as the root device for
-your system in two ways.
-
-The first is to build an initial ramdisk which boots to a minimal userspace
-which configures the device, then pivot_root(8) in to it.
-
-The second is to create one or more device-mappers using the module parameter
-"dm-mod.create=" through the kernel boot command line argument.
-
-The format is specified as a string of data separated by commas and optionally
-semi-colons, where:
-
- - a comma is used to separate fields like name, uuid, flags and table
-   (specifies one device)
- - a semi-colon is used to separate devices.
-
-So the format will look like this::
-
- dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
-
-Where::
-
-	<name>		::= The device name.
-	<uuid>		::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | ""
-	<minor>		::= The device minor number | ""
-	<flags>		::= "ro" | "rw"
-	<table>		::= <start_sector> <num_sectors> <target_type> <target_args>
-	<target_type>	::= "verity" | "linear" | ... (see list below)
-
-The dm line should be equivalent to the one used by the dmsetup tool with the
-`--concise` argument.
-
-Target types
-============
-
-Not all target types are available as there are serious risks in allowing
-activation of certain DM targets without first using userspace tools to check
-the validity of associated metadata.
-
-======================= =======================================================
-`cache`			constrained, userspace should verify cache device
-`crypt`			allowed
-`delay`			allowed
-`era`			constrained, userspace should verify metadata device
-`flakey`		constrained, meant for test
-`linear`		allowed
-`log-writes`		constrained, userspace should verify metadata device
-`mirror`		constrained, userspace should verify main/mirror device
-`raid`			constrained, userspace should verify metadata device
-`snapshot`		constrained, userspace should verify src/dst device
-`snapshot-origin`	allowed
-`snapshot-merge`	constrained, userspace should verify src/dst device
-`striped`		allowed
-`switch`		constrained, userspace should verify dev path
-`thin`			constrained, requires dm target message from userspace
-`thin-pool`		constrained, requires dm target message from userspace
-`verity`		allowed
-`writecache`		constrained, userspace should verify cache device
-`zero`			constrained, not meant for rootfs
-======================= =======================================================
-
-If the target is not listed above, it is constrained by default (not tested).
-
-Examples
-========
-An example of booting to a linear array made up of user-mode linux block
-devices::
-
-  dm-mod.create="lroot,,,rw, 0 4096 linear 98:16 0, 4096 4096 linear 98:32 0" root=/dev/dm-0
-
-This will boot to a rw dm-linear target of 8192 sectors split across two block
-devices identified by their major:minor numbers.  After boot, udev will rename
-this target to /dev/mapper/lroot (depending on the rules). No uuid was assigned.
-
-An example of multiple device-mappers, with the dm-mod.create="..." contents
-is shown here split on multiple lines for readability::
-
-  dm-linear,,1,rw,
-    0 32768 linear 8:1 0,
-    32768 1024000 linear 8:2 0;
-  dm-verity,,3,ro,
-    0 1638400 verity 1 /dev/sdc1 /dev/sdc2 4096 4096 204800 1 sha256
-    ac87db56303c9c1da433d7209b5a6ef3e4779df141200cbd7c157dcb8dd89c42
-    5ebfe87f7df3235b80a117ebc4078e44f55045487ad4a96581d1adb564615b51
-
-Other examples (per target):
-
-"crypt"::
-
-  dm-crypt,,8,ro,
-    0 1048576 crypt aes-xts-plain64
-    babebabebabebabebabebabebabebabebabebabebabebabebabebabebabebabe 0
-    /dev/sda 0 1 allow_discards
-
-"delay"::
-
-  dm-delay,,4,ro,0 409600 delay /dev/sda1 0 500
-
-"linear"::
-
-  dm-linear,,,rw,
-    0 32768 linear /dev/sda1 0,
-    32768 1024000 linear /dev/sda2 0,
-    1056768 204800 linear /dev/sda3 0,
-    1261568 512000 linear /dev/sda4 0
-
-"snapshot-origin"::
-
-  dm-snap-orig,,4,ro,0 409600 snapshot-origin 8:2
-
-"striped"::
-
-  dm-striped,,4,ro,0 1638400 striped 4 4096
-  /dev/sda1 0 /dev/sda2 0 /dev/sda3 0 /dev/sda4 0
-
-"verity"::
-
-  dm-verity,,4,ro,
-    0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256
-    fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd
-    51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584
diff --git a/Documentation/device-mapper/dm-integrity.rst b/Documentation/device-mapper/dm-integrity.rst
deleted file mode 100644
index a30aa91b5fbe..000000000000
--- a/Documentation/device-mapper/dm-integrity.rst
+++ /dev/null
@@ -1,259 +0,0 @@
-============
-dm-integrity
-============
-
-The dm-integrity target emulates a block device that has additional
-per-sector tags that can be used for storing integrity information.
-
-A general problem with storing integrity tags with every sector is that
-writing the sector and the integrity tag must be atomic - i.e. in case of
-crash, either both sector and integrity tag or none of them is written.
-
-To guarantee write atomicity, the dm-integrity target uses journal, it
-writes sector data and integrity tags into a journal, commits the journal
-and then copies the data and integrity tags to their respective location.
-
-The dm-integrity target can be used with the dm-crypt target - in this
-situation the dm-crypt target creates the integrity data and passes them
-to the dm-integrity target via bio_integrity_payload attached to the bio.
-In this mode, the dm-crypt and dm-integrity targets provide authenticated
-disk encryption - if the attacker modifies the encrypted device, an I/O
-error is returned instead of random data.
-
-The dm-integrity target can also be used as a standalone target, in this
-mode it calculates and verifies the integrity tag internally. In this
-mode, the dm-integrity target can be used to detect silent data
-corruption on the disk or in the I/O path.
-
-There's an alternate mode of operation where dm-integrity uses bitmap
-instead of a journal. If a bit in the bitmap is 1, the corresponding
-region's data and integrity tags are not synchronized - if the machine
-crashes, the unsynchronized regions will be recalculated. The bitmap mode
-is faster than the journal mode, because we don't have to write the data
-twice, but it is also less reliable, because if data corruption happens
-when the machine crashes, it may not be detected.
-
-When loading the target for the first time, the kernel driver will format
-the device. But it will only format the device if the superblock contains
-zeroes. If the superblock is neither valid nor zeroed, the dm-integrity
-target can't be loaded.
-
-To use the target for the first time:
-
-1. overwrite the superblock with zeroes
-2. load the dm-integrity target with one-sector size, the kernel driver
-   will format the device
-3. unload the dm-integrity target
-4. read the "provided_data_sectors" value from the superblock
-5. load the dm-integrity target with the the target size
-   "provided_data_sectors"
-6. if you want to use dm-integrity with dm-crypt, load the dm-crypt target
-   with the size "provided_data_sectors"
-
-
-Target arguments:
-
-1. the underlying block device
-
-2. the number of reserved sector at the beginning of the device - the
-   dm-integrity won't read of write these sectors
-
-3. the size of the integrity tag (if "-" is used, the size is taken from
-   the internal-hash algorithm)
-
-4. mode:
-
-	D - direct writes (without journal)
-		in this mode, journaling is
-		not used and data sectors and integrity tags are written
-		separately. In case of crash, it is possible that the data
-		and integrity tag doesn't match.
-	J - journaled writes
-		data and integrity tags are written to the
-		journal and atomicity is guaranteed. In case of crash,
-		either both data and tag or none of them are written. The
-		journaled mode degrades write throughput twice because the
-		data have to be written twice.
-	B - bitmap mode - data and metadata are written without any
-		synchronization, the driver maintains a bitmap of dirty
-		regions where data and metadata don't match. This mode can
-		only be used with internal hash.
-	R - recovery mode - in this mode, journal is not replayed,
-		checksums are not checked and writes to the device are not
-		allowed. This mode is useful for data recovery if the
-		device cannot be activated in any of the other standard
-		modes.
-
-5. the number of additional arguments
-
-Additional arguments:
-
-journal_sectors:number
-	The size of journal, this argument is used only if formatting the
-	device. If the device is already formatted, the value from the
-	superblock is used.
-
-interleave_sectors:number
-	The number of interleaved sectors. This values is rounded down to
-	a power of two. If the device is already formatted, the value from
-	the superblock is used.
-
-meta_device:device
-	Don't interleave the data and metadata on on device. Use a
-	separate device for metadata.
-
-buffer_sectors:number
-	The number of sectors in one buffer. The value is rounded down to
-	a power of two.
-
-	The tag area is accessed using buffers, the buffer size is
-	configurable. The large buffer size means that the I/O size will
-	be larger, but there could be less I/Os issued.
-
-journal_watermark:number
-	The journal watermark in percents. When the size of the journal
-	exceeds this watermark, the thread that flushes the journal will
-	be started.
-
-commit_time:number
-	Commit time in milliseconds. When this time passes, the journal is
-	written. The journal is also written immediatelly if the FLUSH
-	request is received.
-
-internal_hash:algorithm(:key)	(the key is optional)
-	Use internal hash or crc.
-	When this argument is used, the dm-integrity target won't accept
-	integrity tags from the upper target, but it will automatically
-	generate and verify the integrity tags.
-
-	You can use a crc algorithm (such as crc32), then integrity target
-	will protect the data against accidental corruption.
-	You can also use a hmac algorithm (for example
-	"hmac(sha256):0123456789abcdef"), in this mode it will provide
-	cryptographic authentication of the data without encryption.
-
-	When this argument is not used, the integrity tags are accepted
-	from an upper layer target, such as dm-crypt. The upper layer
-	target should check the validity of the integrity tags.
-
-recalculate
-	Recalculate the integrity tags automatically. It is only valid
-	when using internal hash.
-
-journal_crypt:algorithm(:key)	(the key is optional)
-	Encrypt the journal using given algorithm to make sure that the
-	attacker can't read the journal. You can use a block cipher here
-	(such as "cbc(aes)") or a stream cipher (for example "chacha20",
-	"salsa20", "ctr(aes)" or "ecb(arc4)").
-
-	The journal contains history of last writes to the block device,
-	an attacker reading the journal could see the last sector nubmers
-	that were written. From the sector numbers, the attacker can infer
-	the size of files that were written. To protect against this
-	situation, you can encrypt the journal.
-
-journal_mac:algorithm(:key)	(the key is optional)
-	Protect sector numbers in the journal from accidental or malicious
-	modification. To protect against accidental modification, use a
-	crc algorithm, to protect against malicious modification, use a
-	hmac algorithm with a key.
-
-	This option is not needed when using internal-hash because in this
-	mode, the integrity of journal entries is checked when replaying
-	the journal. Thus, modified sector number would be detected at
-	this stage.
-
-block_size:number
-	The size of a data block in bytes.  The larger the block size the
-	less overhead there is for per-block integrity metadata.
-	Supported values are 512, 1024, 2048 and 4096 bytes.  If not
-	specified the default block size is 512 bytes.
-
-sectors_per_bit:number
-	In the bitmap mode, this parameter specifies the number of
-	512-byte sectors that corresponds to one bitmap bit.
-
-bitmap_flush_interval:number
-	The bitmap flush interval in milliseconds. The metadata buffers
-	are synchronized when this interval expires.
-
-
-The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
-be changed when reloading the target (load an inactive table and swap the
-tables with suspend and resume). The other arguments should not be changed
-when reloading the target because the layout of disk data depend on them
-and the reloaded target would be non-functional.
-
-
-The layout of the formatted block device:
-
-* reserved sectors
-    (they are not used by this target, they can be used for
-    storing LUKS metadata or for other purpose), the size of the reserved
-    area is specified in the target arguments
-
-* superblock (4kiB)
-	* magic string - identifies that the device was formatted
-	* version
-	* log2(interleave sectors)
-	* integrity tag size
-	* the number of journal sections
-	* provided data sectors - the number of sectors that this target
-	  provides (i.e. the size of the device minus the size of all
-	  metadata and padding). The user of this target should not send
-	  bios that access data beyond the "provided data sectors" limit.
-	* flags
-	    SB_FLAG_HAVE_JOURNAL_MAC
-		- a flag is set if journal_mac is used
-	    SB_FLAG_RECALCULATING
-		- recalculating is in progress
-	    SB_FLAG_DIRTY_BITMAP
-		- journal area contains the bitmap of dirty
-		  blocks
-	* log2(sectors per block)
-	* a position where recalculating finished
-* journal
-	The journal is divided into sections, each section contains:
-
-	* metadata area (4kiB), it contains journal entries
-
-	  - every journal entry contains:
-
-		* logical sector (specifies where the data and tag should
-		  be written)
-		* last 8 bytes of data
-		* integrity tag (the size is specified in the superblock)
-
-	  - every metadata sector ends with
-
-		* mac (8-bytes), all the macs in 8 metadata sectors form a
-		  64-byte value. It is used to store hmac of sector
-		  numbers in the journal section, to protect against a
-		  possibility that the attacker tampers with sector
-		  numbers in the journal.
-		* commit id
-
-	* data area (the size is variable; it depends on how many journal
-	  entries fit into the metadata area)
-
-	    - every sector in the data area contains:
-
-		* data (504 bytes of data, the last 8 bytes are stored in
-		  the journal entry)
-		* commit id
-
-	To test if the whole journal section was written correctly, every
-	512-byte sector of the journal ends with 8-byte commit id. If the
-	commit id matches on all sectors in a journal section, then it is
-	assumed that the section was written correctly. If the commit id
-	doesn't match, the section was written partially and it should not
-	be replayed.
-
-* one or more runs of interleaved tags and data.
-    Each run contains:
-
-	* tag area - it contains integrity tags. There is one tag for each
-	  sector in the data area
-	* data area - it contains data sectors. The number of data sectors
-	  in one run must be a power of two. log2 of this value is stored
-	  in the superblock.
diff --git a/Documentation/device-mapper/dm-io.rst b/Documentation/device-mapper/dm-io.rst
deleted file mode 100644
index d2492917a1f5..000000000000
--- a/Documentation/device-mapper/dm-io.rst
+++ /dev/null
@@ -1,75 +0,0 @@
-=====
-dm-io
-=====
-
-Dm-io provides synchronous and asynchronous I/O services. There are three
-types of I/O services available, and each type has a sync and an async
-version.
-
-The user must set up an io_region structure to describe the desired location
-of the I/O. Each io_region indicates a block-device along with the starting
-sector and size of the region::
-
-   struct io_region {
-      struct block_device *bdev;
-      sector_t sector;
-      sector_t count;
-   };
-
-Dm-io can read from one io_region or write to one or more io_regions. Writes
-to multiple regions are specified by an array of io_region structures.
-
-The first I/O service type takes a list of memory pages as the data buffer for
-the I/O, along with an offset into the first page::
-
-   struct page_list {
-      struct page_list *next;
-      struct page *page;
-   };
-
-   int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
-                  struct page_list *pl, unsigned int offset,
-                  unsigned long *error_bits);
-   int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
-                   struct page_list *pl, unsigned int offset,
-                   io_notify_fn fn, void *context);
-
-The second I/O service type takes an array of bio vectors as the data buffer
-for the I/O. This service can be handy if the caller has a pre-assembled bio,
-but wants to direct different portions of the bio to different devices::
-
-   int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where,
-                       int rw, struct bio_vec *bvec,
-                       unsigned long *error_bits);
-   int dm_io_async_bvec(unsigned int num_regions, struct io_region *where,
-                        int rw, struct bio_vec *bvec,
-                        io_notify_fn fn, void *context);
-
-The third I/O service type takes a pointer to a vmalloc'd memory buffer as the
-data buffer for the I/O. This service can be handy if the caller needs to do
-I/O to a large region but doesn't want to allocate a large number of individual
-memory pages::
-
-   int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
-                     void *data, unsigned long *error_bits);
-   int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
-                      void *data, io_notify_fn fn, void *context);
-
-Callers of the asynchronous I/O services must include the name of a completion
-callback routine and a pointer to some context data for the I/O::
-
-   typedef void (*io_notify_fn)(unsigned long error, void *context);
-
-The "error" parameter in this callback, as well as the `*error` parameter in
-all of the synchronous versions, is a bitset (instead of a simple error value).
-In the case of an write-I/O to multiple regions, this bitset allows dm-io to
-indicate success or failure on each individual region.
-
-Before using any of the dm-io services, the user should call dm_io_get()
-and specify the number of pages they expect to perform I/O on concurrently.
-Dm-io will attempt to resize its mempool to make sure enough pages are
-always available in order to avoid unnecessary waiting while performing I/O.
-
-When the user is finished using the dm-io services, they should call
-dm_io_put() and specify the same number of pages that were given on the
-dm_io_get() call.
diff --git a/Documentation/device-mapper/dm-log.rst b/Documentation/device-mapper/dm-log.rst
deleted file mode 100644
index ba4fce39bc27..000000000000
--- a/Documentation/device-mapper/dm-log.rst
+++ /dev/null
@@ -1,57 +0,0 @@
-=====================
-Device-Mapper Logging
-=====================
-The device-mapper logging code is used by some of the device-mapper
-RAID targets to track regions of the disk that are not consistent.
-A region (or portion of the address space) of the disk may be
-inconsistent because a RAID stripe is currently being operated on or
-a machine died while the region was being altered.  In the case of
-mirrors, a region would be considered dirty/inconsistent while you
-are writing to it because the writes need to be replicated for all
-the legs of the mirror and may not reach the legs at the same time.
-Once all writes are complete, the region is considered clean again.
-
-There is a generic logging interface that the device-mapper RAID
-implementations use to perform logging operations (see
-dm_dirty_log_type in include/linux/dm-dirty-log.h).  Various different
-logging implementations are available and provide different
-capabilities.  The list includes:
-
-==============	==============================================================
-Type		Files
-==============	==============================================================
-disk		drivers/md/dm-log.c
-core		drivers/md/dm-log.c
-userspace	drivers/md/dm-log-userspace* include/linux/dm-log-userspace.h
-==============	==============================================================
-
-The "disk" log type
--------------------
-This log implementation commits the log state to disk.  This way, the
-logging state survives reboots/crashes.
-
-The "core" log type
--------------------
-This log implementation keeps the log state in memory.  The log state
-will not survive a reboot or crash, but there may be a small boost in
-performance.  This method can also be used if no storage device is
-available for storing log state.
-
-The "userspace" log type
-------------------------
-This log type simply provides a way to export the log API to userspace,
-so log implementations can be done there.  This is done by forwarding most
-logging requests to userspace, where a daemon receives and processes the
-request.
-
-The structure used for communication between kernel and userspace are
-located in include/linux/dm-log-userspace.h.  Due to the frequency,
-diversity, and 2-way communication nature of the exchanges between
-kernel and userspace, 'connector' is used as the interface for
-communication.
-
-There are currently two userspace log implementations that leverage this
-framework - "clustered-disk" and "clustered-core".  These implementations
-provide a cluster-coherent log for shared-storage.  Device-mapper mirroring
-can be used in a shared-storage environment when the cluster log implementations
-are employed.
diff --git a/Documentation/device-mapper/dm-queue-length.rst b/Documentation/device-mapper/dm-queue-length.rst
deleted file mode 100644
index d8e381c1cb02..000000000000
--- a/Documentation/device-mapper/dm-queue-length.rst
+++ /dev/null
@@ -1,48 +0,0 @@
-===============
-dm-queue-length
-===============
-
-dm-queue-length is a path selector module for device-mapper targets,
-which selects a path with the least number of in-flight I/Os.
-The path selector name is 'queue-length'.
-
-Table parameters for each path: [<repeat_count>]
-
-::
-
-	<repeat_count>: The number of I/Os to dispatch using the selected
-			path before switching to the next path.
-			If not given, internal default is used. To check
-			the default value, see the activated table.
-
-Status for each path: <status> <fail-count> <in-flight>
-
-::
-
-	<status>: 'A' if the path is active, 'F' if the path is failed.
-	<fail-count>: The number of path failures.
-	<in-flight>: The number of in-flight I/Os on the path.
-
-
-Algorithm
-=========
-
-dm-queue-length increments/decrements 'in-flight' when an I/O is
-dispatched/completed respectively.
-dm-queue-length selects a path with the minimum 'in-flight'.
-
-
-Examples
-========
-In case that 2 paths (sda and sdb) are used with repeat_count == 128.
-
-::
-
-  # echo "0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128" \
-    dmsetup create test
-  #
-  # dmsetup table
-  test: 0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128
-  #
-  # dmsetup status
-  test: 0 10 multipath 2 0 0 0 1 1 E 0 2 1 8:0 A 0 0 8:16 A 0 0
diff --git a/Documentation/device-mapper/dm-raid.rst b/Documentation/device-mapper/dm-raid.rst
deleted file mode 100644
index 2fe255b130fb..000000000000
--- a/Documentation/device-mapper/dm-raid.rst
+++ /dev/null
@@ -1,419 +0,0 @@
-=======
-dm-raid
-=======
-
-The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
-It allows the MD RAID drivers to be accessed using a device-mapper
-interface.
-
-
-Mapping Table Interface
------------------------
-The target is named "raid" and it accepts the following parameters::
-
-  <raid_type> <#raid_params> <raid_params> \
-    <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
-
-<raid_type>:
-
-  ============= ===============================================================
-  raid0		RAID0 striping (no resilience)
-  raid1		RAID1 mirroring
-  raid4		RAID4 with dedicated last parity disk
-  raid5_n 	RAID5 with dedicated last parity disk supporting takeover
-		Same as raid4
-
-		- Transitory layout
-  raid5_la	RAID5 left asymmetric
-
-		- rotating parity 0 with data continuation
-  raid5_ra	RAID5 right asymmetric
-
-		- rotating parity N with data continuation
-  raid5_ls	RAID5 left symmetric
-
-		- rotating parity 0 with data restart
-  raid5_rs 	RAID5 right symmetric
-
-		- rotating parity N with data restart
-  raid6_zr	RAID6 zero restart
-
-		- rotating parity zero (left-to-right) with data restart
-  raid6_nr	RAID6 N restart
-
-		- rotating parity N (right-to-left) with data restart
-  raid6_nc	RAID6 N continue
-
-		- rotating parity N (right-to-left) with data continuation
-  raid6_n_6	RAID6 with dedicate parity disks
-
-		- parity and Q-syndrome on the last 2 disks;
-		  layout for takeover from/to raid4/raid5_n
-  raid6_la_6	Same as "raid_la" plus dedicated last Q-syndrome disk
-
-		- layout for takeover from raid5_la from/to raid6
-  raid6_ra_6	Same as "raid5_ra" dedicated last Q-syndrome disk
-
-		- layout for takeover from raid5_ra from/to raid6
-  raid6_ls_6	Same as "raid5_ls" dedicated last Q-syndrome disk
-
-		- layout for takeover from raid5_ls from/to raid6
-  raid6_rs_6	Same as "raid5_rs" dedicated last Q-syndrome disk
-
-		- layout for takeover from raid5_rs from/to raid6
-  raid10        Various RAID10 inspired algorithms chosen by additional params
-		(see raid10_format and raid10_copies below)
-
-		- RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
-		- RAID1E: Integrated Adjacent Stripe Mirroring
-		- RAID1E: Integrated Offset Stripe Mirroring
-		- and other similar RAID10 variants
-  ============= ===============================================================
-
-  Reference: Chapter 4 of
-  http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
-
-<#raid_params>: The number of parameters that follow.
-
-<raid_params> consists of
-
-    Mandatory parameters:
-        <chunk_size>:
-		      Chunk size in sectors.  This parameter is often known as
-		      "stripe size".  It is the only mandatory parameter and
-		      is placed first.
-
-    followed by optional parameters (in any order):
-	[sync|nosync]
-		Force or prevent RAID initialization.
-
-	[rebuild <idx>]
-		Rebuild drive number 'idx' (first drive is 0).
-
-	[daemon_sleep <ms>]
-		Interval between runs of the bitmap daemon that
-		clear bits.  A longer interval means less bitmap I/O but
-		resyncing after a failure is likely to take longer.
-
-	[min_recovery_rate <kB/sec/disk>]
-		Throttle RAID initialization
-	[max_recovery_rate <kB/sec/disk>]
-		Throttle RAID initialization
-	[write_mostly <idx>]
-		Mark drive index 'idx' write-mostly.
-	[max_write_behind <sectors>]
-		See '--write-behind=' (man mdadm)
-	[stripe_cache <sectors>]
-		Stripe cache size (RAID 4/5/6 only)
-	[region_size <sectors>]
-		The region_size multiplied by the number of regions is the
-		logical size of the array.  The bitmap records the device
-		synchronisation state for each region.
-
-        [raid10_copies   <# copies>], [raid10_format   <near|far|offset>]
-		These two options are used to alter the default layout of
-		a RAID10 configuration.  The number of copies is can be
-		specified, but the default is 2.  There are also three
-		variations to how the copies are laid down - the default
-		is "near".  Near copies are what most people think of with
-		respect to mirroring.  If these options are left unspecified,
-		or 'raid10_copies 2' and/or 'raid10_format near' are given,
-		then the layouts for 2, 3 and 4 devices	are:
-
-		========	 ==========	   ==============
-		2 drives         3 drives          4 drives
-		========	 ==========	   ==============
-		A1  A1           A1  A1  A2        A1  A1  A2  A2
-		A2  A2           A2  A3  A3        A3  A3  A4  A4
-		A3  A3           A4  A4  A5        A5  A5  A6  A6
-		A4  A4           A5  A6  A6        A7  A7  A8  A8
-		..  ..           ..  ..  ..        ..  ..  ..  ..
-		========	 ==========	   ==============
-
-		The 2-device layout is equivalent 2-way RAID1.  The 4-device
-		layout is what a traditional RAID10 would look like.  The
-		3-device layout is what might be called a 'RAID1E - Integrated
-		Adjacent Stripe Mirroring'.
-
-		If 'raid10_copies 2' and 'raid10_format far', then the layouts
-		for 2, 3 and 4 devices are:
-
-		========	     ============	  ===================
-		2 drives             3 drives             4 drives
-		========	     ============	  ===================
-		A1  A2               A1   A2   A3         A1   A2   A3   A4
-		A3  A4               A4   A5   A6         A5   A6   A7   A8
-		A5  A6               A7   A8   A9         A9   A10  A11  A12
-		..  ..               ..   ..   ..         ..   ..   ..   ..
-		A2  A1               A3   A1   A2         A2   A1   A4   A3
-		A4  A3               A6   A4   A5         A6   A5   A8   A7
-		A6  A5               A9   A7   A8         A10  A9   A12  A11
-		..  ..               ..   ..   ..         ..   ..   ..   ..
-		========	     ============	  ===================
-
-		If 'raid10_copies 2' and 'raid10_format offset', then the
-		layouts for 2, 3 and 4 devices are:
-
-		========       ==========         ================
-		2 drives       3 drives           4 drives
-		========       ==========         ================
-		A1  A2         A1  A2  A3         A1  A2  A3  A4
-		A2  A1         A3  A1  A2         A2  A1  A4  A3
-		A3  A4         A4  A5  A6         A5  A6  A7  A8
-		A4  A3         A6  A4  A5         A6  A5  A8  A7
-		A5  A6         A7  A8  A9         A9  A10 A11 A12
-		A6  A5         A9  A7  A8         A10 A9  A12 A11
-		..  ..         ..  ..  ..         ..  ..  ..  ..
-		========       ==========         ================
-
-		Here we see layouts closely akin to 'RAID1E - Integrated
-		Offset Stripe Mirroring'.
-
-        [delta_disks <N>]
-		The delta_disks option value (-251 < N < +251) triggers
-		device removal (negative value) or device addition (positive
-		value) to any reshape supporting raid levels 4/5/6 and 10.
-		RAID levels 4/5/6 allow for addition of devices (metadata
-		and data device tuple), raid10_near and raid10_offset only
-		allow for device addition. raid10_far does not support any
-		reshaping at all.
-		A minimum of devices have to be kept to enforce resilience,
-		which is 3 devices for raid4/5 and 4 devices for raid6.
-
-        [data_offset <sectors>]
-		This option value defines the offset into each data device
-		where the data starts. This is used to provide out-of-place
-		reshaping space to avoid writing over data while
-		changing the layout of stripes, hence an interruption/crash
-		may happen at any time without the risk of losing data.
-		E.g. when adding devices to an existing raid set during
-		forward reshaping, the out-of-place space will be allocated
-		at the beginning of each raid device. The kernel raid4/5/6/10
-		MD personalities supporting such device addition will read the data from
-		the existing first stripes (those with smaller number of stripes)
-		starting at data_offset to fill up a new stripe with the larger
-		number of stripes, calculate the redundancy blocks (CRC/Q-syndrome)
-		and write that new stripe to offset 0. Same will be applied to all
-		N-1 other new stripes. This out-of-place scheme is used to change
-		the RAID type (i.e. the allocation algorithm) as well, e.g.
-		changing from raid5_ls to raid5_n.
-
-	[journal_dev <dev>]
-		This option adds a journal device to raid4/5/6 raid sets and
-		uses it to close the 'write hole' caused by the non-atomic updates
-		to the component devices which can cause data loss during recovery.
-		The journal device is used as writethrough thus causing writes to
-		be throttled versus non-journaled raid4/5/6 sets.
-		Takeover/reshape is not possible with a raid4/5/6 journal device;
-		it has to be deconfigured before requesting these.
-
-	[journal_mode <mode>]
-		This option sets the caching mode on journaled raid4/5/6 raid sets
-		(see 'journal_dev <dev>' above) to 'writethrough' or 'writeback'.
-		If 'writeback' is selected the journal device has to be resilient
-		and must not suffer from the 'write hole' problem itself (e.g. use
-		raid1 or raid10) to avoid a single point of failure.
-
-<#raid_devs>: The number of devices composing the array.
-	Each device consists of two entries.  The first is the device
-	containing the metadata (if any); the second is the one containing the
-	data. A Maximum of 64 metadata/data device entries are supported
-	up to target version 1.8.0.
-	1.9.0 supports up to 253 which is enforced by the used MD kernel runtime.
-
-	If a drive has failed or is missing at creation time, a '-' can be
-	given for both the metadata and data drives for a given position.
-
-
-Example Tables
---------------
-
-::
-
-  # RAID4 - 4 data drives, 1 parity (no metadata devices)
-  # No metadata devices specified to hold superblock/bitmap info
-  # Chunk size of 1MiB
-  # (Lines separated for easy reading)
-
-  0 1960893648 raid \
-          raid4 1 2048 \
-          5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
-
-  # RAID4 - 4 data drives, 1 parity (with metadata devices)
-  # Chunk size of 1MiB, force RAID initialization,
-  #       min recovery rate at 20 kiB/sec/disk
-
-  0 1960893648 raid \
-          raid4 4 2048 sync min_recovery_rate 20 \
-          5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
-
-
-Status Output
--------------
-'dmsetup table' displays the table used to construct the mapping.
-The optional parameters are always printed in the order listed
-above with "sync" or "nosync" always output ahead of the other
-arguments, regardless of the order used when originally loading the table.
-Arguments that can be repeated are ordered by value.
-
-
-'dmsetup status' yields information on the state and health of the array.
-The output is as follows (normally a single line, but expanded here for
-clarity)::
-
-  1: <s> <l> raid \
-  2:      <raid_type> <#devices> <health_chars> \
-  3:      <sync_ratio> <sync_action> <mismatch_cnt>
-
-Line 1 is the standard output produced by device-mapper.
-
-Line 2 & 3 are produced by the raid target and are best explained by example::
-
-        0 1960893648 raid raid4 5 AAAAA 2/490221568 init 0
-
-Here we can see the RAID type is raid4, there are 5 devices - all of
-which are 'A'live, and the array is 2/490221568 complete with its initial
-recovery.  Here is a fuller description of the individual fields:
-
-	=============== =========================================================
-	<raid_type>     Same as the <raid_type> used to create the array.
-	<health_chars>  One char for each device, indicating:
-
-			- 'A' = alive and in-sync
-			- 'a' = alive but not in-sync
-			- 'D' = dead/failed.
-	<sync_ratio>    The ratio indicating how much of the array has undergone
-			the process described by 'sync_action'.  If the
-			'sync_action' is "check" or "repair", then the process
-			of "resync" or "recover" can be considered complete.
-	<sync_action>   One of the following possible states:
-
-			idle
-				- No synchronization action is being performed.
-			frozen
-				- The current action has been halted.
-			resync
-				- Array is undergoing its initial synchronization
-				  or is resynchronizing after an unclean shutdown
-				  (possibly aided by a bitmap).
-			recover
-				- A device in the array is being rebuilt or
-				  replaced.
-			check
-				- A user-initiated full check of the array is
-				  being performed.  All blocks are read and
-				  checked for consistency.  The number of
-				  discrepancies found are recorded in
-				  <mismatch_cnt>.  No changes are made to the
-				  array by this action.
-			repair
-				- The same as "check", but discrepancies are
-				  corrected.
-			reshape
-				- The array is undergoing a reshape.
-	<mismatch_cnt>  The number of discrepancies found between mirror copies
-			in RAID1/10 or wrong parity values found in RAID4/5/6.
-			This value is valid only after a "check" of the array
-			is performed.  A healthy array has a 'mismatch_cnt' of 0.
-	<data_offset>   The current data offset to the start of the user data on
-			each component device of a raid set (see the respective
-			raid parameter to support out-of-place reshaping).
-	<journal_char>	- 'A' - active write-through journal device.
-			- 'a' - active write-back journal device.
-			- 'D' - dead journal device.
-			- '-' - no journal device.
-	=============== =========================================================
-
-
-Message Interface
------------------
-The dm-raid target will accept certain actions through the 'message' interface.
-('man dmsetup' for more information on the message interface.)  These actions
-include:
-
-	========= ================================================
-	"idle"    Halt the current sync action.
-	"frozen"  Freeze the current sync action.
-	"resync"  Initiate/continue a resync.
-	"recover" Initiate/continue a recover process.
-	"check"   Initiate a check (i.e. a "scrub") of the array.
-	"repair"  Initiate a repair of the array.
-	========= ================================================
-
-
-Discard Support
----------------
-The implementation of discard support among hardware vendors varies.
-When a block is discarded, some storage devices will return zeroes when
-the block is read.  These devices set the 'discard_zeroes_data'
-attribute.  Other devices will return random data.  Confusingly, some
-devices that advertise 'discard_zeroes_data' will not reliably return
-zeroes when discarded blocks are read!  Since RAID 4/5/6 uses blocks
-from a number of devices to calculate parity blocks and (for performance
-reasons) relies on 'discard_zeroes_data' being reliable, it is important
-that the devices be consistent.  Blocks may be discarded in the middle
-of a RAID 4/5/6 stripe and if subsequent read results are not
-consistent, the parity blocks may be calculated differently at any time;
-making the parity blocks useless for redundancy.  It is important to
-understand how your hardware behaves with discards if you are going to
-enable discards with RAID 4/5/6.
-
-Since the behavior of storage devices is unreliable in this respect,
-even when reporting 'discard_zeroes_data', by default RAID 4/5/6
-discard support is disabled -- this ensures data integrity at the
-expense of losing some performance.
-
-Storage devices that properly support 'discard_zeroes_data' are
-increasingly whitelisted in the kernel and can thus be trusted.
-
-For trusted devices, the following dm-raid module parameter can be set
-to safely enable discard support for RAID 4/5/6:
-
-    'devices_handle_discards_safely'
-
-
-Version History
----------------
-
-::
-
- 1.0.0	Initial version.  Support for RAID 4/5/6
- 1.1.0	Added support for RAID 1
- 1.2.0	Handle creation of arrays that contain failed devices.
- 1.3.0	Added support for RAID 10
- 1.3.1	Allow device replacement/rebuild for RAID 10
- 1.3.2	Fix/improve redundancy checking for RAID10
- 1.4.0	Non-functional change.  Removes arg from mapping function.
- 1.4.1	RAID10 fix redundancy validation checks (commit 55ebbb5).
- 1.4.2	Add RAID10 "far" and "offset" algorithm support.
- 1.5.0	Add message interface to allow manipulation of the sync_action.
-	New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
- 1.5.1	Add ability to restore transiently failed devices on resume.
- 1.5.2	'mismatch_cnt' is zero unless [last_]sync_action is "check".
- 1.6.0	Add discard support (and devices_handle_discard_safely module param).
- 1.7.0	Add support for MD RAID0 mappings.
- 1.8.0	Explicitly check for compatible flags in the superblock metadata
-	and reject to start the raid set if any are set by a newer
-	target version, thus avoiding data corruption on a raid set
-	with a reshape in progress.
- 1.9.0	Add support for RAID level takeover/reshape/region size
-	and set size reduction.
- 1.9.1	Fix activation of existing RAID 4/10 mapped devices
- 1.9.2	Don't emit '- -' on the status table line in case the constructor
-	fails reading a superblock. Correctly emit 'maj:min1 maj:min2' and
-	'D' on the status line.  If '- -' is passed into the constructor, emit
-	'- -' on the table line and '-' as the status line health character.
- 1.10.0	Add support for raid4/5/6 journal device
- 1.10.1	Fix data corruption on reshape request
- 1.11.0	Fix table line argument order
-	(wrong raid10_copies/raid10_format sequence)
- 1.11.1	Add raid4/5/6 journal write-back support via journal_mode option
- 1.12.1	Fix for MD deadlock between mddev_suspend() and md_write_start() available
- 1.13.0	Fix dev_health status at end of "recover" (was 'a', now 'A')
- 1.13.1	Fix deadlock caused by early md_stop_writes().  Also fix size an
-	state races.
- 1.13.2	Fix raid redundancy validation and avoid keeping raid set frozen
- 1.14.0	Fix reshape race on small devices.  Fix stripe adding reshape
-	deadlock/potential data corruption.  Update superblock when
-	specific devices are requested via rebuild.  Fix RAID leg
-	rebuild errors.
diff --git a/Documentation/device-mapper/dm-service-time.rst b/Documentation/device-mapper/dm-service-time.rst
deleted file mode 100644
index facf277fc13c..000000000000
--- a/Documentation/device-mapper/dm-service-time.rst
+++ /dev/null
@@ -1,101 +0,0 @@
-===============
-dm-service-time
-===============
-
-dm-service-time is a path selector module for device-mapper targets,
-which selects a path with the shortest estimated service time for
-the incoming I/O.
-
-The service time for each path is estimated by dividing the total size
-of in-flight I/Os on a path with the performance value of the path.
-The performance value is a relative throughput value among all paths
-in a path-group, and it can be specified as a table argument.
-
-The path selector name is 'service-time'.
-
-Table parameters for each path:
-
-    [<repeat_count> [<relative_throughput>]]
-	<repeat_count>:
-			The number of I/Os to dispatch using the selected
-			path before switching to the next path.
-			If not given, internal default is used.  To check
-			the default value, see the activated table.
-	<relative_throughput>:
-			The relative throughput value of the path
-			among all paths in the path-group.
-			The valid range is 0-100.
-			If not given, minimum value '1' is used.
-			If '0' is given, the path isn't selected while
-			other paths having a positive value are available.
-
-Status for each path:
-
-    <status> <fail-count> <in-flight-size> <relative_throughput>
-	<status>:
-		'A' if the path is active, 'F' if the path is failed.
-	<fail-count>:
-		The number of path failures.
-	<in-flight-size>:
-		The size of in-flight I/Os on the path.
-	<relative_throughput>:
-		The relative throughput value of the path
-		among all paths in the path-group.
-
-
-Algorithm
-=========
-
-dm-service-time adds the I/O size to 'in-flight-size' when the I/O is
-dispatched and subtracts when completed.
-Basically, dm-service-time selects a path having minimum service time
-which is calculated by::
-
-	('in-flight-size' + 'size-of-incoming-io') / 'relative_throughput'
-
-However, some optimizations below are used to reduce the calculation
-as much as possible.
-
-	1. If the paths have the same 'relative_throughput', skip
-	   the division and just compare the 'in-flight-size'.
-
-	2. If the paths have the same 'in-flight-size', skip the division
-	   and just compare the 'relative_throughput'.
-
-	3. If some paths have non-zero 'relative_throughput' and others
-	   have zero 'relative_throughput', ignore those paths with zero
-	   'relative_throughput'.
-
-If such optimizations can't be applied, calculate service time, and
-compare service time.
-If calculated service time is equal, the path having maximum
-'relative_throughput' may be better.  So compare 'relative_throughput'
-then.
-
-
-Examples
-========
-In case that 2 paths (sda and sdb) are used with repeat_count == 128
-and sda has an average throughput 1GB/s and sdb has 4GB/s,
-'relative_throughput' value may be '1' for sda and '4' for sdb::
-
-  # echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4" \
-    dmsetup create test
-  #
-  # dmsetup table
-  test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4
-  #
-  # dmsetup status
-  test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 1 8:16 A 0 0 4
-
-
-Or '2' for sda and '8' for sdb would be also true::
-
-  # echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8" \
-    dmsetup create test
-  #
-  # dmsetup table
-  test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8
-  #
-  # dmsetup status
-  test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 2 8:16 A 0 0 8
diff --git a/Documentation/device-mapper/dm-uevent.rst b/Documentation/device-mapper/dm-uevent.rst
deleted file mode 100644
index 4a8ee8d069c9..000000000000
--- a/Documentation/device-mapper/dm-uevent.rst
+++ /dev/null
@@ -1,110 +0,0 @@
-====================
-device-mapper uevent
-====================
-
-The device-mapper uevent code adds the capability to device-mapper to create
-and send kobject uevents (uevents).  Previously device-mapper events were only
-available through the ioctl interface.  The advantage of the uevents interface
-is the event contains environment attributes providing increased context for
-the event avoiding the need to query the state of the device-mapper device after
-the event is received.
-
-There are two functions currently for device-mapper events.  The first function
-listed creates the event and the second function sends the event(s)::
-
-  void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
-                      const char *path, unsigned nr_valid_paths)
-
-  void dm_send_uevents(struct list_head *events, struct kobject *kobj)
-
-
-The variables added to the uevent environment are:
-
-Variable Name: DM_TARGET
-------------------------
-:Uevent Action(s): KOBJ_CHANGE
-:Type: string
-:Description:
-:Value: Name of device-mapper target that generated the event.
-
-Variable Name: DM_ACTION
-------------------------
-:Uevent Action(s): KOBJ_CHANGE
-:Type: string
-:Description:
-:Value: Device-mapper specific action that caused the uevent action.
-	PATH_FAILED - A path has failed;
-	PATH_REINSTATED - A path has been reinstated.
-
-Variable Name: DM_SEQNUM
-------------------------
-:Uevent Action(s): KOBJ_CHANGE
-:Type: unsigned integer
-:Description: A sequence number for this specific device-mapper device.
-:Value: Valid unsigned integer range.
-
-Variable Name: DM_PATH
-----------------------
-:Uevent Action(s): KOBJ_CHANGE
-:Type: string
-:Description: Major and minor number of the path device pertaining to this
-	      event.
-:Value: Path name in the form of "Major:Minor"
-
-Variable Name: DM_NR_VALID_PATHS
---------------------------------
-:Uevent Action(s): KOBJ_CHANGE
-:Type: unsigned integer
-:Description:
-:Value: Valid unsigned integer range.
-
-Variable Name: DM_NAME
-----------------------
-:Uevent Action(s): KOBJ_CHANGE
-:Type: string
-:Description: Name of the device-mapper device.
-:Value: Name
-
-Variable Name: DM_UUID
-----------------------
-:Uevent Action(s): KOBJ_CHANGE
-:Type: string
-:Description: UUID of the device-mapper device.
-:Value: UUID. (Empty string if there isn't one.)
-
-An example of the uevents generated as captured by udevmonitor is shown
-below
-
-1.) Path failure::
-
-	UEVENT[1192521009.711215] change@/block/dm-3
-	ACTION=change
-	DEVPATH=/block/dm-3
-	SUBSYSTEM=block
-	DM_TARGET=multipath
-	DM_ACTION=PATH_FAILED
-	DM_SEQNUM=1
-	DM_PATH=8:32
-	DM_NR_VALID_PATHS=0
-	DM_NAME=mpath2
-	DM_UUID=mpath-35333333000002328
-	MINOR=3
-	MAJOR=253
-	SEQNUM=1130
-
-2.) Path reinstate::
-
-	UEVENT[1192521132.989927] change@/block/dm-3
-	ACTION=change
-	DEVPATH=/block/dm-3
-	SUBSYSTEM=block
-	DM_TARGET=multipath
-	DM_ACTION=PATH_REINSTATED
-	DM_SEQNUM=2
-	DM_PATH=8:32
-	DM_NR_VALID_PATHS=1
-	DM_NAME=mpath2
-	DM_UUID=mpath-35333333000002328
-	MINOR=3
-	MAJOR=253
-	SEQNUM=1131
diff --git a/Documentation/device-mapper/dm-zoned.rst b/Documentation/device-mapper/dm-zoned.rst
deleted file mode 100644
index 07f56ebc1730..000000000000
--- a/Documentation/device-mapper/dm-zoned.rst
+++ /dev/null
@@ -1,146 +0,0 @@
-========
-dm-zoned
-========
-
-The dm-zoned device mapper target exposes a zoned block device (ZBC and
-ZAC compliant devices) as a regular block device without any write
-pattern constraints. In effect, it implements a drive-managed zoned
-block device which hides from the user (a file system or an application
-doing raw block device accesses) the sequential write constraints of
-host-managed zoned block devices and can mitigate the potential
-device-side performance degradation due to excessive random writes on
-host-aware zoned block devices.
-
-For a more detailed description of the zoned block device models and
-their constraints see (for SCSI devices):
-
-http://www.t10.org/drafts.htm#ZBC_Family
-
-and (for ATA devices):
-
-http://www.t13.org/Documents/UploadedDocuments/docs2015/di537r05-Zoned_Device_ATA_Command_Set_ZAC.pdf
-
-The dm-zoned implementation is simple and minimizes system overhead (CPU
-and memory usage as well as storage capacity loss). For a 10TB
-host-managed disk with 256 MB zones, dm-zoned memory usage per disk
-instance is at most 4.5 MB and as little as 5 zones will be used
-internally for storing metadata and performaing reclaim operations.
-
-dm-zoned target devices are formatted and checked using the dmzadm
-utility available at:
-
-https://github.com/hgst/dm-zoned-tools
-
-Algorithm
-=========
-
-dm-zoned implements an on-disk buffering scheme to handle non-sequential
-write accesses to the sequential zones of a zoned block device.
-Conventional zones are used for caching as well as for storing internal
-metadata.
-
-The zones of the device are separated into 2 types:
-
-1) Metadata zones: these are conventional zones used to store metadata.
-Metadata zones are not reported as useable capacity to the user.
-
-2) Data zones: all remaining zones, the vast majority of which will be
-sequential zones used exclusively to store user data. The conventional
-zones of the device may be used also for buffering user random writes.
-Data in these zones may be directly mapped to the conventional zone, but
-later moved to a sequential zone so that the conventional zone can be
-reused for buffering incoming random writes.
-
-dm-zoned exposes a logical device with a sector size of 4096 bytes,
-irrespective of the physical sector size of the backend zoned block
-device being used. This allows reducing the amount of metadata needed to
-manage valid blocks (blocks written).
-
-The on-disk metadata format is as follows:
-
-1) The first block of the first conventional zone found contains the
-super block which describes the on disk amount and position of metadata
-blocks.
-
-2) Following the super block, a set of blocks is used to describe the
-mapping of the logical device blocks. The mapping is done per chunk of
-blocks, with the chunk size equal to the zoned block device size. The
-mapping table is indexed by chunk number and each mapping entry
-indicates the zone number of the device storing the chunk of data. Each
-mapping entry may also indicate if the zone number of a conventional
-zone used to buffer random modification to the data zone.
-
-3) A set of blocks used to store bitmaps indicating the validity of
-blocks in the data zones follows the mapping table. A valid block is
-defined as a block that was written and not discarded. For a buffered
-data chunk, a block is always valid only in the data zone mapping the
-chunk or in the buffer zone of the chunk.
-
-For a logical chunk mapped to a conventional zone, all write operations
-are processed by directly writing to the zone. If the mapping zone is a
-sequential zone, the write operation is processed directly only if the
-write offset within the logical chunk is equal to the write pointer
-offset within of the sequential data zone (i.e. the write operation is
-aligned on the zone write pointer). Otherwise, write operations are
-processed indirectly using a buffer zone. In that case, an unused
-conventional zone is allocated and assigned to the chunk being
-accessed. Writing a block to the buffer zone of a chunk will
-automatically invalidate the same block in the sequential zone mapping
-the chunk. If all blocks of the sequential zone become invalid, the zone
-is freed and the chunk buffer zone becomes the primary zone mapping the
-chunk, resulting in native random write performance similar to a regular
-block device.
-
-Read operations are processed according to the block validity
-information provided by the bitmaps. Valid blocks are read either from
-the sequential zone mapping a chunk, or if the chunk is buffered, from
-the buffer zone assigned. If the accessed chunk has no mapping, or the
-accessed blocks are invalid, the read buffer is zeroed and the read
-operation terminated.
-
-After some time, the limited number of convnetional zones available may
-be exhausted (all used to map chunks or buffer sequential zones) and
-unaligned writes to unbuffered chunks become impossible. To avoid this
-situation, a reclaim process regularly scans used conventional zones and
-tries to reclaim the least recently used zones by copying the valid
-blocks of the buffer zone to a free sequential zone. Once the copy
-completes, the chunk mapping is updated to point to the sequential zone
-and the buffer zone freed for reuse.
-
-Metadata Protection
-===================
-
-To protect metadata against corruption in case of sudden power loss or
-system crash, 2 sets of metadata zones are used. One set, the primary
-set, is used as the main metadata region, while the secondary set is
-used as a staging area. Modified metadata is first written to the
-secondary set and validated by updating the super block in the secondary
-set, a generation counter is used to indicate that this set contains the
-newest metadata. Once this operation completes, in place of metadata
-block updates can be done in the primary metadata set. This ensures that
-one of the set is always consistent (all modifications committed or none
-at all). Flush operations are used as a commit point. Upon reception of
-a flush request, metadata modification activity is temporarily blocked
-(for both incoming BIO processing and reclaim process) and all dirty
-metadata blocks are staged and updated. Normal operation is then
-resumed. Flushing metadata thus only temporarily delays write and
-discard requests. Read requests can be processed concurrently while
-metadata flush is being executed.
-
-Usage
-=====
-
-A zoned block device must first be formatted using the dmzadm tool. This
-will analyze the device zone configuration, determine where to place the
-metadata sets on the device and initialize the metadata sets.
-
-Ex::
-
-	dmzadm --format /dev/sdxx
-
-For a formatted device, the target can be created normally with the
-dmsetup utility. The only parameter that dm-zoned requires is the
-underlying zoned block device name. Ex::
-
-	echo "0 `blockdev --getsize ${dev}` zoned ${dev}" | \
-	dmsetup create dmz-`basename ${dev}`
diff --git a/Documentation/device-mapper/era.rst b/Documentation/device-mapper/era.rst
deleted file mode 100644
index 90dd5c670b9f..000000000000
--- a/Documentation/device-mapper/era.rst
+++ /dev/null
@@ -1,116 +0,0 @@
-======
-dm-era
-======
-
-Introduction
-============
-
-dm-era is a target that behaves similar to the linear target.  In
-addition it keeps track of which blocks were written within a user
-defined period of time called an 'era'.  Each era target instance
-maintains the current era as a monotonically increasing 32-bit
-counter.
-
-Use cases include tracking changed blocks for backup software, and
-partially invalidating the contents of a cache to restore cache
-coherency after rolling back a vendor snapshot.
-
-Constructor
-===========
-
-era <metadata dev> <origin dev> <block size>
-
- ================ ======================================================
- metadata dev     fast device holding the persistent metadata
- origin dev	  device holding data blocks that may change
- block size       block size of origin data device, granularity that is
-		  tracked by the target
- ================ ======================================================
-
-Messages
-========
-
-None of the dm messages take any arguments.
-
-checkpoint
-----------
-
-Possibly move to a new era.  You shouldn't assume the era has
-incremented.  After sending this message, you should check the
-current era via the status line.
-
-take_metadata_snap
-------------------
-
-Create a clone of the metadata, to allow a userland process to read it.
-
-drop_metadata_snap
-------------------
-
-Drop the metadata snapshot.
-
-Status
-======
-
-<metadata block size> <#used metadata blocks>/<#total metadata blocks>
-<current era> <held metadata root | '-'>
-
-========================= ==============================================
-metadata block size	  Fixed block size for each metadata block in
-			  sectors
-#used metadata blocks	  Number of metadata blocks used
-#total metadata blocks	  Total number of metadata blocks
-current era		  The current era
-held metadata root	  The location, in blocks, of the metadata root
-			  that has been 'held' for userspace read
-			  access. '-' indicates there is no held root
-========================= ==============================================
-
-Detailed use case
-=================
-
-The scenario of invalidating a cache when rolling back a vendor
-snapshot was the primary use case when developing this target:
-
-Taking a vendor snapshot
-------------------------
-
-- Send a checkpoint message to the era target
-- Make a note of the current era in its status line
-- Take vendor snapshot (the era and snapshot should be forever
-  associated now).
-
-Rolling back to an vendor snapshot
-----------------------------------
-
-- Cache enters passthrough mode (see: dm-cache's docs in cache.txt)
-- Rollback vendor storage
-- Take metadata snapshot
-- Ascertain which blocks have been written since the snapshot was taken
-  by checking each block's era
-- Invalidate those blocks in the caching software
-- Cache returns to writeback/writethrough mode
-
-Memory usage
-============
-
-The target uses a bitset to record writes in the current era.  It also
-has a spare bitset ready for switching over to a new era.  Other than
-that it uses a few 4k blocks for updating metadata::
-
-   (4 * nr_blocks) bytes + buffers
-
-Resilience
-==========
-
-Metadata is updated on disk before a write to a previously unwritten
-block is performed.  As such dm-era should not be effected by a hard
-crash such as power failure.
-
-Userland tools
-==============
-
-Userland tools are found in the increasingly poorly named
-thin-provisioning-tools project:
-
-    https://github.com/jthornber/thin-provisioning-tools
diff --git a/Documentation/device-mapper/index.rst b/Documentation/device-mapper/index.rst
deleted file mode 100644
index 105e253bc231..000000000000
--- a/Documentation/device-mapper/index.rst
+++ /dev/null
@@ -1,44 +0,0 @@
-:orphan:
-
-=============
-Device Mapper
-=============
-
-.. toctree::
-    :maxdepth: 1
-
-    cache-policies
-    cache
-    delay
-    dm-crypt
-    dm-flakey
-    dm-init
-    dm-integrity
-    dm-io
-    dm-log
-    dm-queue-length
-    dm-raid
-    dm-service-time
-    dm-uevent
-    dm-zoned
-    era
-    kcopyd
-    linear
-    log-writes
-    persistent-data
-    snapshot
-    statistics
-    striped
-    switch
-    thin-provisioning
-    unstriped
-    verity
-    writecache
-    zero
-
-.. only::  subproject and html
-
-   Indices
-   =======
-
-   * :ref:`genindex`
diff --git a/Documentation/device-mapper/kcopyd.rst b/Documentation/device-mapper/kcopyd.rst
deleted file mode 100644
index 7651d395127f..000000000000
--- a/Documentation/device-mapper/kcopyd.rst
+++ /dev/null
@@ -1,47 +0,0 @@
-======
-kcopyd
-======
-
-Kcopyd provides the ability to copy a range of sectors from one block-device
-to one or more other block-devices, with an asynchronous completion
-notification. It is used by dm-snapshot and dm-mirror.
-
-Users of kcopyd must first create a client and indicate how many memory pages
-to set aside for their copy jobs. This is done with a call to
-kcopyd_client_create()::
-
-   int kcopyd_client_create(unsigned int num_pages,
-                            struct kcopyd_client **result);
-
-To start a copy job, the user must set up io_region structures to describe
-the source and destinations of the copy. Each io_region indicates a
-block-device along with the starting sector and size of the region. The source
-of the copy is given as one io_region structure, and the destinations of the
-copy are given as an array of io_region structures::
-
-   struct io_region {
-      struct block_device *bdev;
-      sector_t sector;
-      sector_t count;
-   };
-
-To start the copy, the user calls kcopyd_copy(), passing in the client
-pointer, pointers to the source and destination io_regions, the name of a
-completion callback routine, and a pointer to some context data for the copy::
-
-   int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
-                   unsigned int num_dests, struct io_region *dests,
-                   unsigned int flags, kcopyd_notify_fn fn, void *context);
-
-   typedef void (*kcopyd_notify_fn)(int read_err, unsigned int write_err,
-				    void *context);
-
-When the copy completes, kcopyd will call the user's completion routine,
-passing back the user's context pointer. It will also indicate if a read or
-write error occurred during the copy.
-
-When a user is done with all their copy jobs, they should call
-kcopyd_client_destroy() to delete the kcopyd client, which will release the
-associated memory pages::
-
-   void kcopyd_client_destroy(struct kcopyd_client *kc);
diff --git a/Documentation/device-mapper/linear.rst b/Documentation/device-mapper/linear.rst
deleted file mode 100644
index 9d17fc6e64a9..000000000000
--- a/Documentation/device-mapper/linear.rst
+++ /dev/null
@@ -1,63 +0,0 @@
-=========
-dm-linear
-=========
-
-Device-Mapper's "linear" target maps a linear range of the Device-Mapper
-device onto a linear range of another device.  This is the basic building
-block of logical volume managers.
-
-Parameters: <dev path> <offset>
-    <dev path>:
-	Full pathname to the underlying block-device, or a
-        "major:minor" device-number.
-    <offset>:
-	Starting sector within the device.
-
-
-Example scripts
-===============
-
-::
-
-  #!/bin/sh
-  # Create an identity mapping for a device
-  echo "0 `blockdev --getsz $1` linear $1 0" | dmsetup create identity
-
-::
-
-  #!/bin/sh
-  # Join 2 devices together
-  size1=`blockdev --getsz $1`
-  size2=`blockdev --getsz $2`
-  echo "0 $size1 linear $1 0
-  $size1 $size2 linear $2 0" | dmsetup create joined
-
-::
-
-  #!/usr/bin/perl -w
-  # Split a device into 4M chunks and then join them together in reverse order.
-
-  my $name = "reverse";
-  my $extent_size = 4 * 1024 * 2;
-  my $dev = $ARGV[0];
-  my $table = "";
-  my $count = 0;
-
-  if (!defined($dev)) {
-          die("Please specify a device.\n");
-  }
-
-  my $dev_size = `blockdev --getsz $dev`;
-  my $extents = int($dev_size / $extent_size) -
-                (($dev_size % $extent_size) ? 1 : 0);
-
-  while ($extents > 0) {
-          my $this_start = $count * $extent_size;
-          $extents--;
-          $count++;
-          my $this_offset = $extents * $extent_size;
-
-          $table .= "$this_start $extent_size linear $dev $this_offset\n";
-  }
-
-  `echo \"$table\" | dmsetup create $name`;
diff --git a/Documentation/device-mapper/log-writes.rst b/Documentation/device-mapper/log-writes.rst
deleted file mode 100644
index 23141f2ffb7c..000000000000
--- a/Documentation/device-mapper/log-writes.rst
+++ /dev/null
@@ -1,145 +0,0 @@
-=============
-dm-log-writes
-=============
-
-This target takes 2 devices, one to pass all IO to normally, and one to log all
-of the write operations to.  This is intended for file system developers wishing
-to verify the integrity of metadata or data as the file system is written to.
-There is a log_write_entry written for every WRITE request and the target is
-able to take arbitrary data from userspace to insert into the log.  The data
-that is in the WRITE requests is copied into the log to make the replay happen
-exactly as it happened originally.
-
-Log Ordering
-============
-
-We log things in order of completion once we are sure the write is no longer in
-cache.  This means that normal WRITE requests are not actually logged until the
-next REQ_PREFLUSH request.  This is to make it easier for userspace to replay
-the log in a way that correlates to what is on disk and not what is in cache,
-to make it easier to detect improper waiting/flushing.
-
-This works by attaching all WRITE requests to a list once the write completes.
-Once we see a REQ_PREFLUSH request we splice this list onto the request and once
-the FLUSH request completes we log all of the WRITEs and then the FLUSH.  Only
-completed WRITEs, at the time the REQ_PREFLUSH is issued, are added in order to
-simulate the worst case scenario with regard to power failures.  Consider the
-following example (W means write, C means complete):
-
-	W1,W2,W3,C3,C2,Wflush,C1,Cflush
-
-The log would show the following:
-
-	W3,W2,flush,W1....
-
-Again this is to simulate what is actually on disk, this allows us to detect
-cases where a power failure at a particular point in time would create an
-inconsistent file system.
-
-Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as
-they complete as those requests will obviously bypass the device cache.
-
-Any REQ_OP_DISCARD requests are treated like WRITE requests.  Otherwise we would
-have all the DISCARD requests, and then the WRITE requests and then the FLUSH
-request.  Consider the following example:
-
-	WRITE block 1, DISCARD block 1, FLUSH
-
-If we logged DISCARD when it completed, the replay would look like this:
-
-	DISCARD 1, WRITE 1, FLUSH
-
-which isn't quite what happened and wouldn't be caught during the log replay.
-
-Target interface
-================
-
-i) Constructor
-
-   log-writes <dev_path> <log_dev_path>
-
-   ============= ==============================================
-   dev_path	 Device that all of the IO will go to normally.
-   log_dev_path  Device where the log entries are written to.
-   ============= ==============================================
-
-ii) Status
-
-    <#logged entries> <highest allocated sector>
-
-    =========================== ========================
-    #logged entries	        Number of logged entries
-    highest allocated sector    Highest allocated sector
-    =========================== ========================
-
-iii) Messages
-
-    mark <description>
-
-	You can use a dmsetup message to set an arbitrary mark in a log.
-	For example say you want to fsck a file system after every
-	write, but first you need to replay up to the mkfs to make sure
-	we're fsck'ing something reasonable, you would do something like
-	this::
-
-	  mkfs.btrfs -f /dev/mapper/log
-	  dmsetup message log 0 mark mkfs
-	  <run test>
-
-	This would allow you to replay the log up to the mkfs mark and
-	then replay from that point on doing the fsck check in the
-	interval that you want.
-
-	Every log has a mark at the end labeled "dm-log-writes-end".
-
-Userspace component
-===================
-
-There is a userspace tool that will replay the log for you in various ways.
-It can be found here: https://github.com/josefbacik/log-writes
-
-Example usage
-=============
-
-Say you want to test fsync on your file system.  You would do something like
-this::
-
-  TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
-  dmsetup create log --table "$TABLE"
-  mkfs.btrfs -f /dev/mapper/log
-  dmsetup message log 0 mark mkfs
-
-  mount /dev/mapper/log /mnt/btrfs-test
-  <some test that does fsync at the end>
-  dmsetup message log 0 mark fsync
-  md5sum /mnt/btrfs-test/foo
-  umount /mnt/btrfs-test
-
-  dmsetup remove log
-  replay-log --log /dev/sdc --replay /dev/sdb --end-mark fsync
-  mount /dev/sdb /mnt/btrfs-test
-  md5sum /mnt/btrfs-test/foo
-  <verify md5sum's are correct>
-
-  Another option is to do a complicated file system operation and verify the file
-  system is consistent during the entire operation.  You could do this with:
-
-  TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
-  dmsetup create log --table "$TABLE"
-  mkfs.btrfs -f /dev/mapper/log
-  dmsetup message log 0 mark mkfs
-
-  mount /dev/mapper/log /mnt/btrfs-test
-  <fsstress to dirty the fs>
-  btrfs filesystem balance /mnt/btrfs-test
-  umount /mnt/btrfs-test
-  dmsetup remove log
-
-  replay-log --log /dev/sdc --replay /dev/sdb --end-mark mkfs
-  btrfsck /dev/sdb
-  replay-log --log /dev/sdc --replay /dev/sdb --start-mark mkfs \
-	--fsck "btrfsck /dev/sdb" --check fua
-
-And that will replay the log until it sees a FUA request, run the fsck command
-and if the fsck passes it will replay to the next FUA, until it is completed or
-the fsck command exists abnormally.
diff --git a/Documentation/device-mapper/persistent-data.rst b/Documentation/device-mapper/persistent-data.rst
deleted file mode 100644
index 2065c3c5a091..000000000000
--- a/Documentation/device-mapper/persistent-data.rst
+++ /dev/null
@@ -1,88 +0,0 @@
-===============
-Persistent data
-===============
-
-Introduction
-============
-
-The more-sophisticated device-mapper targets require complex metadata
-that is managed in kernel.  In late 2010 we were seeing that various
-different targets were rolling their own data structures, for example:
-
-- Mikulas Patocka's multisnap implementation
-- Heinz Mauelshagen's thin provisioning target
-- Another btree-based caching target posted to dm-devel
-- Another multi-snapshot target based on a design of Daniel Phillips
-
-Maintaining these data structures takes a lot of work, so if possible
-we'd like to reduce the number.
-
-The persistent-data library is an attempt to provide a re-usable
-framework for people who want to store metadata in device-mapper
-targets.  It's currently used by the thin-provisioning target and an
-upcoming hierarchical storage target.
-
-Overview
-========
-
-The main documentation is in the header files which can all be found
-under drivers/md/persistent-data.
-
-The block manager
------------------
-
-dm-block-manager.[hc]
-
-This provides access to the data on disk in fixed sized-blocks.  There
-is a read/write locking interface to prevent concurrent accesses, and
-keep data that is being used in the cache.
-
-Clients of persistent-data are unlikely to use this directly.
-
-The transaction manager
------------------------
-
-dm-transaction-manager.[hc]
-
-This restricts access to blocks and enforces copy-on-write semantics.
-The only way you can get hold of a writable block through the
-transaction manager is by shadowing an existing block (ie. doing
-copy-on-write) or allocating a fresh one.  Shadowing is elided within
-the same transaction so performance is reasonable.  The commit method
-ensures that all data is flushed before it writes the superblock.
-On power failure your metadata will be as it was when last committed.
-
-The Space Maps
---------------
-
-dm-space-map.h
-dm-space-map-metadata.[hc]
-dm-space-map-disk.[hc]
-
-On-disk data structures that keep track of reference counts of blocks.
-Also acts as the allocator of new blocks.  Currently two
-implementations: a simpler one for managing blocks on a different
-device (eg. thinly-provisioned data blocks); and one for managing
-the metadata space.  The latter is complicated by the need to store
-its own data within the space it's managing.
-
-The data structures
--------------------
-
-dm-btree.[hc]
-dm-btree-remove.c
-dm-btree-spine.c
-dm-btree-internal.h
-
-Currently there is only one data structure, a hierarchical btree.
-There are plans to add more.  For example, something with an
-array-like interface would see a lot of use.
-
-The btree is 'hierarchical' in that you can define it to be composed
-of nested btrees, and take multiple keys.  For example, the
-thin-provisioning target uses a btree with two levels of nesting.
-The first maps a device id to a mapping tree, and that in turn maps a
-virtual block to a physical block.
-
-Values stored in the btrees can have arbitrary size.  Keys are always
-64bits, although nesting allows you to use multiple keys.
diff --git a/Documentation/device-mapper/snapshot.rst b/Documentation/device-mapper/snapshot.rst
deleted file mode 100644
index ccdd8b587a74..000000000000
--- a/Documentation/device-mapper/snapshot.rst
+++ /dev/null
@@ -1,196 +0,0 @@
-==============================
-Device-mapper snapshot support
-==============================
-
-Device-mapper allows you, without massive data copying:
-
--  To create snapshots of any block device i.e. mountable, saved states of
-   the block device which are also writable without interfering with the
-   original content;
--  To create device "forks", i.e. multiple different versions of the
-   same data stream.
--  To merge a snapshot of a block device back into the snapshot's origin
-   device.
-
-In the first two cases, dm copies only the chunks of data that get
-changed and uses a separate copy-on-write (COW) block device for
-storage.
-
-For snapshot merge the contents of the COW storage are merged back into
-the origin device.
-
-
-There are three dm targets available:
-snapshot, snapshot-origin, and snapshot-merge.
-
--  snapshot-origin <origin>
-
-which will normally have one or more snapshots based on it.
-Reads will be mapped directly to the backing device. For each write, the
-original data will be saved in the <COW device> of each snapshot to keep
-its visible content unchanged, at least until the <COW device> fills up.
-
-
--  snapshot <origin> <COW device> <persistent?> <chunksize>
-   [<# feature args> [<arg>]*]
-
-A snapshot of the <origin> block device is created. Changed chunks of
-<chunksize> sectors will be stored on the <COW device>.  Writes will
-only go to the <COW device>.  Reads will come from the <COW device> or
-from <origin> for unchanged data.  <COW device> will often be
-smaller than the origin and if it fills up the snapshot will become
-useless and be disabled, returning errors.  So it is important to monitor
-the amount of free space and expand the <COW device> before it fills up.
-
-<persistent?> is P (Persistent) or N (Not persistent - will not survive
-after reboot).  O (Overflow) can be added as a persistent store option
-to allow userspace to advertise its support for seeing "Overflow" in the
-snapshot status.  So supported store types are "P", "PO" and "N".
-
-The difference between persistent and transient is with transient
-snapshots less metadata must be saved on disk - they can be kept in
-memory by the kernel.
-
-When loading or unloading the snapshot target, the corresponding
-snapshot-origin or snapshot-merge target must be suspended. A failure to
-suspend the origin target could result in data corruption.
-
-Optional features:
-
-   discard_zeroes_cow - a discard issued to the snapshot device that
-   maps to entire chunks to will zero the corresponding exception(s) in
-   the snapshot's exception store.
-
-   discard_passdown_origin - a discard to the snapshot device is passed
-   down to the snapshot-origin's underlying device.  This doesn't cause
-   copy-out to the snapshot exception store because the snapshot-origin
-   target is bypassed.
-
-   The discard_passdown_origin feature depends on the discard_zeroes_cow
-   feature being enabled.
-
-
--  snapshot-merge <origin> <COW device> <persistent> <chunksize>
-   [<# feature args> [<arg>]*]
-
-takes the same table arguments as the snapshot target except it only
-works with persistent snapshots.  This target assumes the role of the
-"snapshot-origin" target and must not be loaded if the "snapshot-origin"
-is still present for <origin>.
-
-Creates a merging snapshot that takes control of the changed chunks
-stored in the <COW device> of an existing snapshot, through a handover
-procedure, and merges these chunks back into the <origin>.  Once merging
-has started (in the background) the <origin> may be opened and the merge
-will continue while I/O is flowing to it.  Changes to the <origin> are
-deferred until the merging snapshot's corresponding chunk(s) have been
-merged.  Once merging has started the snapshot device, associated with
-the "snapshot" target, will return -EIO when accessed.
-
-
-How snapshot is used by LVM2
-============================
-When you create the first LVM2 snapshot of a volume, four dm devices are used:
-
-1) a device containing the original mapping table of the source volume;
-2) a device used as the <COW device>;
-3) a "snapshot" device, combining #1 and #2, which is the visible snapshot
-   volume;
-4) the "original" volume (which uses the device number used by the original
-   source volume), whose table is replaced by a "snapshot-origin" mapping
-   from device #1.
-
-A fixed naming scheme is used, so with the following commands::
-
-  lvcreate -L 1G -n base volumeGroup
-  lvcreate -L 100M --snapshot -n snap volumeGroup/base
-
-we'll have this situation (with volumes in above order)::
-
-  # dmsetup table|grep volumeGroup
-
-  volumeGroup-base-real: 0 2097152 linear 8:19 384
-  volumeGroup-snap-cow: 0 204800 linear 8:19 2097536
-  volumeGroup-snap: 0 2097152 snapshot 254:11 254:12 P 16
-  volumeGroup-base: 0 2097152 snapshot-origin 254:11
-
-  # ls -lL /dev/mapper/volumeGroup-*
-  brw-------  1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
-  brw-------  1 root root 254, 12 29 ago 18:15 /dev/mapper/volumeGroup-snap-cow
-  brw-------  1 root root 254, 13 29 ago 18:15 /dev/mapper/volumeGroup-snap
-  brw-------  1 root root 254, 10 29 ago 18:14 /dev/mapper/volumeGroup-base
-
-
-How snapshot-merge is used by LVM2
-==================================
-A merging snapshot assumes the role of the "snapshot-origin" while
-merging.  As such the "snapshot-origin" is replaced with
-"snapshot-merge".  The "-real" device is not changed and the "-cow"
-device is renamed to <origin name>-cow to aid LVM2's cleanup of the
-merging snapshot after it completes.  The "snapshot" that hands over its
-COW device to the "snapshot-merge" is deactivated (unless using lvchange
---refresh); but if it is left active it will simply return I/O errors.
-
-A snapshot will merge into its origin with the following command::
-
-  lvconvert --merge volumeGroup/snap
-
-we'll now have this situation::
-
-  # dmsetup table|grep volumeGroup
-
-  volumeGroup-base-real: 0 2097152 linear 8:19 384
-  volumeGroup-base-cow: 0 204800 linear 8:19 2097536
-  volumeGroup-base: 0 2097152 snapshot-merge 254:11 254:12 P 16
-
-  # ls -lL /dev/mapper/volumeGroup-*
-  brw-------  1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
-  brw-------  1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow
-  brw-------  1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base
-
-
-How to determine when a merging is complete
-===========================================
-The snapshot-merge and snapshot status lines end with:
-
-  <sectors_allocated>/<total_sectors> <metadata_sectors>
-
-Both <sectors_allocated> and <total_sectors> include both data and metadata.
-During merging, the number of sectors allocated gets smaller and
-smaller.  Merging has finished when the number of sectors holding data
-is zero, in other words <sectors_allocated> == <metadata_sectors>.
-
-Here is a practical example (using a hybrid of lvm and dmsetup commands)::
-
-  # lvs
-    LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
-    base    volumeGroup owi-a- 4.00g
-    snap    volumeGroup swi-a- 1.00g base  18.97
-
-  # dmsetup status volumeGroup-snap
-  0 8388608 snapshot 397896/2097152 1560
-                                    ^^^^ metadata sectors
-
-  # lvconvert --merge -b volumeGroup/snap
-    Merging of volume snap started.
-
-  # lvs volumeGroup/snap
-    LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
-    base    volumeGroup Owi-a- 4.00g          17.23
-
-  # dmsetup status volumeGroup-base
-  0 8388608 snapshot-merge 281688/2097152 1104
-
-  # dmsetup status volumeGroup-base
-  0 8388608 snapshot-merge 180480/2097152 712
-
-  # dmsetup status volumeGroup-base
-  0 8388608 snapshot-merge 16/2097152 16
-
-Merging has finished.
-
-::
-
-  # lvs
-    LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
-    base    volumeGroup owi-a- 4.00g
diff --git a/Documentation/device-mapper/statistics.rst b/Documentation/device-mapper/statistics.rst
deleted file mode 100644
index 3d80a9f850cc..000000000000
--- a/Documentation/device-mapper/statistics.rst
+++ /dev/null
@@ -1,225 +0,0 @@
-=============
-DM statistics
-=============
-
-Device Mapper supports the collection of I/O statistics on user-defined
-regions of a DM device.	 If no regions are defined no statistics are
-collected so there isn't any performance impact.  Only bio-based DM
-devices are currently supported.
-
-Each user-defined region specifies a starting sector, length and step.
-Individual statistics will be collected for each step-sized area within
-the range specified.
-
-The I/O statistics counters for each step-sized area of a region are
-in the same format as `/sys/block/*/stat` or `/proc/diskstats` (see:
-Documentation/iostats.txt).  But two extra counters (12 and 13) are
-provided: total time spent reading and writing.  When the histogram
-argument is used, the 14th parameter is reported that represents the
-histogram of latencies.  All these counters may be accessed by sending
-the @stats_print message to the appropriate DM device via dmsetup.
-
-The reported times are in milliseconds and the granularity depends on
-the kernel ticks.  When the option precise_timestamps is used, the
-reported times are in nanoseconds.
-
-Each region has a corresponding unique identifier, which we call a
-region_id, that is assigned when the region is created.	 The region_id
-must be supplied when querying statistics about the region, deleting the
-region, etc.  Unique region_ids enable multiple userspace programs to
-request and process statistics for the same DM device without stepping
-on each other's data.
-
-The creation of DM statistics will allocate memory via kmalloc or
-fallback to using vmalloc space.  At most, 1/4 of the overall system
-memory may be allocated by DM statistics.  The admin can see how much
-memory is used by reading:
-
-	/sys/module/dm_mod/parameters/stats_current_allocated_bytes
-
-Messages
-========
-
-    @stats_create <range> <step> [<number_of_optional_arguments> <optional_arguments>...] [<program_id> [<aux_data>]]
-	Create a new region and return the region_id.
-
-	<range>
-	  "-"
-		whole device
-	  "<start_sector>+<length>"
-		a range of <length> 512-byte sectors
-		starting with <start_sector>.
-
-	<step>
-	  "<area_size>"
-		the range is subdivided into areas each containing
-		<area_size> sectors.
-	  "/<number_of_areas>"
-		the range is subdivided into the specified
-		number of areas.
-
-	<number_of_optional_arguments>
-	  The number of optional arguments
-
-	<optional_arguments>
-	  The following optional arguments are supported:
-
-	  precise_timestamps
-		use precise timer with nanosecond resolution
-		instead of the "jiffies" variable.  When this argument is
-		used, the resulting times are in nanoseconds instead of
-		milliseconds.  Precise timestamps are a little bit slower
-		to obtain than jiffies-based timestamps.
-	  histogram:n1,n2,n3,n4,...
-		collect histogram of latencies.  The
-		numbers n1, n2, etc are times that represent the boundaries
-		of the histogram.  If precise_timestamps is not used, the
-		times are in milliseconds, otherwise they are in
-		nanoseconds.  For each range, the kernel will report the
-		number of requests that completed within this range. For
-		example, if we use "histogram:10,20,30", the kernel will
-		report four numbers a:b:c:d. a is the number of requests
-		that took 0-10 ms to complete, b is the number of requests
-		that took 10-20 ms to complete, c is the number of requests
-		that took 20-30 ms to complete and d is the number of
-		requests that took more than 30 ms to complete.
-
-	<program_id>
-	  An optional parameter.  A name that uniquely identifies
-	  the userspace owner of the range.  This groups ranges together
-	  so that userspace programs can identify the ranges they
-	  created and ignore those created by others.
-	  The kernel returns this string back in the output of
-	  @stats_list message, but it doesn't use it for anything else.
-	  If we omit the number of optional arguments, program id must not
-	  be a number, otherwise it would be interpreted as the number of
-	  optional arguments.
-
-	<aux_data>
-	  An optional parameter.  A word that provides auxiliary data
-	  that is useful to the client program that created the range.
-	  The kernel returns this string back in the output of
-	  @stats_list message, but it doesn't use this value for anything.
-
-    @stats_delete <region_id>
-	Delete the region with the specified id.
-
-	<region_id>
-	  region_id returned from @stats_create
-
-    @stats_clear <region_id>
-	Clear all the counters except the in-flight i/o counters.
-
-	<region_id>
-	  region_id returned from @stats_create
-
-    @stats_list [<program_id>]
-	List all regions registered with @stats_create.
-
-	<program_id>
-	  An optional parameter.
-	  If this parameter is specified, only matching regions
-	  are returned.
-	  If it is not specified, all regions are returned.
-
-	Output format:
-	  <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
-	        precise_timestamps histogram:n1,n2,n3,...
-
-	The strings "precise_timestamps" and "histogram" are printed only
-	if they were specified when creating the region.
-
-    @stats_print <region_id> [<starting_line> <number_of_lines>]
-	Print counters for each step-sized area of a region.
-
-	<region_id>
-	  region_id returned from @stats_create
-
-	<starting_line>
-	  The index of the starting line in the output.
-	  If omitted, all lines are returned.
-
-	<number_of_lines>
-	  The number of lines to include in the output.
-	  If omitted, all lines are returned.
-
-	Output format for each step-sized area of a region:
-
-	  <start_sector>+<length>
-		counters
-
-	  The first 11 counters have the same meaning as
-	  `/sys/block/*/stat or /proc/diskstats`.
-
-	  Please refer to Documentation/iostats.txt for details.
-
-	  1. the number of reads completed
-	  2. the number of reads merged
-	  3. the number of sectors read
-	  4. the number of milliseconds spent reading
-	  5. the number of writes completed
-	  6. the number of writes merged
-	  7. the number of sectors written
-	  8. the number of milliseconds spent writing
-	  9. the number of I/Os currently in progress
-	  10. the number of milliseconds spent doing I/Os
-	  11. the weighted number of milliseconds spent doing I/Os
-
-	  Additional counters:
-
-	  12. the total time spent reading in milliseconds
-	  13. the total time spent writing in milliseconds
-
-    @stats_print_clear <region_id> [<starting_line> <number_of_lines>]
-	Atomically print and then clear all the counters except the
-	in-flight i/o counters.	 Useful when the client consuming the
-	statistics does not want to lose any statistics (those updated
-	between printing and clearing).
-
-	<region_id>
-	  region_id returned from @stats_create
-
-	<starting_line>
-	  The index of the starting line in the output.
-	  If omitted, all lines are printed and then cleared.
-
-	<number_of_lines>
-	  The number of lines to process.
-	  If omitted, all lines are printed and then cleared.
-
-    @stats_set_aux <region_id> <aux_data>
-	Store auxiliary data aux_data for the specified region.
-
-	<region_id>
-	  region_id returned from @stats_create
-
-	<aux_data>
-	  The string that identifies data which is useful to the client
-	  program that created the range.  The kernel returns this
-	  string back in the output of @stats_list message, but it
-	  doesn't use this value for anything.
-
-Examples
-========
-
-Subdivide the DM device 'vol' into 100 pieces and start collecting
-statistics on them::
-
-  dmsetup message vol 0 @stats_create - /100
-
-Set the auxiliary data string to "foo bar baz" (the escape for each
-space must also be escaped, otherwise the shell will consume them)::
-
-  dmsetup message vol 0 @stats_set_aux 0 foo\\ bar\\ baz
-
-List the statistics::
-
-  dmsetup message vol 0 @stats_list
-
-Print the statistics::
-
-  dmsetup message vol 0 @stats_print 0
-
-Delete the statistics::
-
-  dmsetup message vol 0 @stats_delete 0
diff --git a/Documentation/device-mapper/striped.rst b/Documentation/device-mapper/striped.rst
deleted file mode 100644
index e9a8da192ae1..000000000000
--- a/Documentation/device-mapper/striped.rst
+++ /dev/null
@@ -1,61 +0,0 @@
-=========
-dm-stripe
-=========
-
-Device-Mapper's "striped" target is used to create a striped (i.e. RAID-0)
-device across one or more underlying devices. Data is written in "chunks",
-with consecutive chunks rotating among the underlying devices. This can
-potentially provide improved I/O throughput by utilizing several physical
-devices in parallel.
-
-Parameters: <num devs> <chunk size> [<dev path> <offset>]+
-    <num devs>:
-	Number of underlying devices.
-    <chunk size>:
-	Size of each chunk of data. Must be at least as
-        large as the system's PAGE_SIZE.
-    <dev path>:
-	Full pathname to the underlying block-device, or a
-	"major:minor" device-number.
-    <offset>:
-	Starting sector within the device.
-
-One or more underlying devices can be specified. The striped device size must
-be a multiple of the chunk size multiplied by the number of underlying devices.
-
-
-Example scripts
-===============
-
-::
-
-  #!/usr/bin/perl -w
-  # Create a striped device across any number of underlying devices. The device
-  # will be called "stripe_dev" and have a chunk-size of 128k.
-
-  my $chunk_size = 128 * 2;
-  my $dev_name = "stripe_dev";
-  my $num_devs = @ARGV;
-  my @devs = @ARGV;
-  my ($min_dev_size, $stripe_dev_size, $i);
-
-  if (!$num_devs) {
-          die("Specify at least one device\n");
-  }
-
-  $min_dev_size = `blockdev --getsz $devs[0]`;
-  for ($i = 1; $i < $num_devs; $i++) {
-          my $this_size = `blockdev --getsz $devs[$i]`;
-          $min_dev_size = ($min_dev_size < $this_size) ?
-                          $min_dev_size : $this_size;
-  }
-
-  $stripe_dev_size = $min_dev_size * $num_devs;
-  $stripe_dev_size -= $stripe_dev_size % ($chunk_size * $num_devs);
-
-  $table = "0 $stripe_dev_size striped $num_devs $chunk_size";
-  for ($i = 0; $i < $num_devs; $i++) {
-          $table .= " $devs[$i] 0";
-  }
-
-  `echo $table | dmsetup create $dev_name`;
diff --git a/Documentation/device-mapper/switch.rst b/Documentation/device-mapper/switch.rst
deleted file mode 100644
index 7dde06be1a4f..000000000000
--- a/Documentation/device-mapper/switch.rst
+++ /dev/null
@@ -1,141 +0,0 @@
-=========
-dm-switch
-=========
-
-The device-mapper switch target creates a device that supports an
-arbitrary mapping of fixed-size regions of I/O across a fixed set of
-paths.  The path used for any specific region can be switched
-dynamically by sending the target a message.
-
-It maps I/O to underlying block devices efficiently when there is a large
-number of fixed-sized address regions but there is no simple pattern
-that would allow for a compact representation of the mapping such as
-dm-stripe.
-
-Background
-----------
-
-Dell EqualLogic and some other iSCSI storage arrays use a distributed
-frameless architecture.  In this architecture, the storage group
-consists of a number of distinct storage arrays ("members") each having
-independent controllers, disk storage and network adapters.  When a LUN
-is created it is spread across multiple members.  The details of the
-spreading are hidden from initiators connected to this storage system.
-The storage group exposes a single target discovery portal, no matter
-how many members are being used.  When iSCSI sessions are created, each
-session is connected to an eth port on a single member.  Data to a LUN
-can be sent on any iSCSI session, and if the blocks being accessed are
-stored on another member the I/O will be forwarded as required.  This
-forwarding is invisible to the initiator.  The storage layout is also
-dynamic, and the blocks stored on disk may be moved from member to
-member as needed to balance the load.
-
-This architecture simplifies the management and configuration of both
-the storage group and initiators.  In a multipathing configuration, it
-is possible to set up multiple iSCSI sessions to use multiple network
-interfaces on both the host and target to take advantage of the
-increased network bandwidth.  An initiator could use a simple round
-robin algorithm to send I/O across all paths and let the storage array
-members forward it as necessary, but there is a performance advantage to
-sending data directly to the correct member.
-
-A device-mapper table already lets you map different regions of a
-device onto different targets.  However in this architecture the LUN is
-spread with an address region size on the order of 10s of MBs, which
-means the resulting table could have more than a million entries and
-consume far too much memory.
-
-Using this device-mapper switch target we can now build a two-layer
-device hierarchy:
-
-    Upper Tier - Determine which array member the I/O should be sent to.
-    Lower Tier - Load balance amongst paths to a particular member.
-
-The lower tier consists of a single dm multipath device for each member.
-Each of these multipath devices contains the set of paths directly to
-the array member in one priority group, and leverages existing path
-selectors to load balance amongst these paths.  We also build a
-non-preferred priority group containing paths to other array members for
-failover reasons.
-
-The upper tier consists of a single dm-switch device.  This device uses
-a bitmap to look up the location of the I/O and choose the appropriate
-lower tier device to route the I/O.  By using a bitmap we are able to
-use 4 bits for each address range in a 16 member group (which is very
-large for us).  This is a much denser representation than the dm table
-b-tree can achieve.
-
-Construction Parameters
-=======================
-
-    <num_paths> <region_size> <num_optional_args> [<optional_args>...] [<dev_path> <offset>]+
-	<num_paths>
-	    The number of paths across which to distribute the I/O.
-
-	<region_size>
-	    The number of 512-byte sectors in a region. Each region can be redirected
-	    to any of the available paths.
-
-	<num_optional_args>
-	    The number of optional arguments. Currently, no optional arguments
-	    are supported and so this must be zero.
-
-	<dev_path>
-	    The block device that represents a specific path to the device.
-
-	<offset>
-	    The offset of the start of data on the specific <dev_path> (in units
-	    of 512-byte sectors). This number is added to the sector number when
-	    forwarding the request to the specific path. Typically it is zero.
-
-Messages
-========
-
-set_region_mappings <index>:<path_nr> [<index>]:<path_nr> [<index>]:<path_nr>...
-
-Modify the region table by specifying which regions are redirected to
-which paths.
-
-<index>
-    The region number (region size was specified in constructor parameters).
-    If index is omitted, the next region (previous index + 1) is used.
-    Expressed in hexadecimal (WITHOUT any prefix like 0x).
-
-<path_nr>
-    The path number in the range 0 ... (<num_paths> - 1).
-    Expressed in hexadecimal (WITHOUT any prefix like 0x).
-
-R<n>,<m>
-    This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
-    are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
-    slots.
-
-Status
-======
-
-No status line is reported.
-
-Example
-=======
-
-Assume that you have volumes vg1/switch0 vg1/switch1 vg1/switch2 with
-the same size.
-
-Create a switch device with 64kB region size::
-
-    dmsetup create switch --table "0 `blockdev --getsz /dev/vg1/switch0`
-	switch 3 128 0 /dev/vg1/switch0 0 /dev/vg1/switch1 0 /dev/vg1/switch2 0"
-
-Set mappings for the first 7 entries to point to devices switch0, switch1,
-switch2, switch0, switch1, switch2, switch1::
-
-    dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1
-
-Set repetitive mapping. This command::
-
-    dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
-
-is equivalent to::
-
-    dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
-	:1 :2 :1 :2 :1 :2 :1 :2 :1 :2
diff --git a/Documentation/device-mapper/thin-provisioning.rst b/Documentation/device-mapper/thin-provisioning.rst
deleted file mode 100644
index bafebf79da4b..000000000000
--- a/Documentation/device-mapper/thin-provisioning.rst
+++ /dev/null
@@ -1,427 +0,0 @@
-=================
-Thin provisioning
-=================
-
-Introduction
-============
-
-This document describes a collection of device-mapper targets that
-between them implement thin-provisioning and snapshots.
-
-The main highlight of this implementation, compared to the previous
-implementation of snapshots, is that it allows many virtual devices to
-be stored on the same data volume.  This simplifies administration and
-allows the sharing of data between volumes, thus reducing disk usage.
-
-Another significant feature is support for an arbitrary depth of
-recursive snapshots (snapshots of snapshots of snapshots ...).  The
-previous implementation of snapshots did this by chaining together
-lookup tables, and so performance was O(depth).  This new
-implementation uses a single data structure to avoid this degradation
-with depth.  Fragmentation may still be an issue, however, in some
-scenarios.
-
-Metadata is stored on a separate device from data, giving the
-administrator some freedom, for example to:
-
-- Improve metadata resilience by storing metadata on a mirrored volume
-  but data on a non-mirrored one.
-
-- Improve performance by storing the metadata on SSD.
-
-Status
-======
-
-These targets are considered safe for production use.  But different use
-cases will have different performance characteristics, for example due
-to fragmentation of the data volume.
-
-If you find this software is not performing as expected please mail
-dm-devel@redhat.com with details and we'll try our best to improve
-things for you.
-
-Userspace tools for checking and repairing the metadata have been fully
-developed and are available as 'thin_check' and 'thin_repair'.  The name
-of the package that provides these utilities varies by distribution (on
-a Red Hat distribution it is named 'device-mapper-persistent-data').
-
-Cookbook
-========
-
-This section describes some quick recipes for using thin provisioning.
-They use the dmsetup program to control the device-mapper driver
-directly.  End users will be advised to use a higher-level volume
-manager such as LVM2 once support has been added.
-
-Pool device
------------
-
-The pool device ties together the metadata volume and the data volume.
-It maps I/O linearly to the data volume and updates the metadata via
-two mechanisms:
-
-- Function calls from the thin targets
-
-- Device-mapper 'messages' from userspace which control the creation of new
-  virtual devices amongst other things.
-
-Setting up a fresh pool device
-------------------------------
-
-Setting up a pool device requires a valid metadata device, and a
-data device.  If you do not have an existing metadata device you can
-make one by zeroing the first 4k to indicate empty metadata.
-
-    dd if=/dev/zero of=$metadata_dev bs=4096 count=1
-
-The amount of metadata you need will vary according to how many blocks
-are shared between thin devices (i.e. through snapshots).  If you have
-less sharing than average you'll need a larger-than-average metadata device.
-
-As a guide, we suggest you calculate the number of bytes to use in the
-metadata device as 48 * $data_dev_size / $data_block_size but round it up
-to 2MB if the answer is smaller.  If you're creating large numbers of
-snapshots which are recording large amounts of change, you may find you
-need to increase this.
-
-The largest size supported is 16GB: If the device is larger,
-a warning will be issued and the excess space will not be used.
-
-Reloading a pool table
-----------------------
-
-You may reload a pool's table, indeed this is how the pool is resized
-if it runs out of space.  (N.B. While specifying a different metadata
-device when reloading is not forbidden at the moment, things will go
-wrong if it does not route I/O to exactly the same on-disk location as
-previously.)
-
-Using an existing pool device
------------------------------
-
-::
-
-    dmsetup create pool \
-	--table "0 20971520 thin-pool $metadata_dev $data_dev \
-		 $data_block_size $low_water_mark"
-
-$data_block_size gives the smallest unit of disk space that can be
-allocated at a time expressed in units of 512-byte sectors.
-$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a
-multiple of 128 (64KB).  $data_block_size cannot be changed after the
-thin-pool is created.  People primarily interested in thin provisioning
-may want to use a value such as 1024 (512KB).  People doing lots of
-snapshotting may want a smaller value such as 128 (64KB).  If you are
-not zeroing newly-allocated data, a larger $data_block_size in the
-region of 256000 (128MB) is suggested.
-
-$low_water_mark is expressed in blocks of size $data_block_size.  If
-free space on the data device drops below this level then a dm event
-will be triggered which a userspace daemon should catch allowing it to
-extend the pool device.  Only one such event will be sent.
-
-No special event is triggered if a just resumed device's free space is below
-the low water mark. However, resuming a device always triggers an
-event; a userspace daemon should verify that free space exceeds the low
-water mark when handling this event.
-
-A low water mark for the metadata device is maintained in the kernel and
-will trigger a dm event if free space on the metadata device drops below
-it.
-
-Updating on-disk metadata
--------------------------
-
-On-disk metadata is committed every time a FLUSH or FUA bio is written.
-If no such requests are made then commits will occur every second.  This
-means the thin-provisioning target behaves like a physical disk that has
-a volatile write cache.  If power is lost you may lose some recent
-writes.  The metadata should always be consistent in spite of any crash.
-
-If data space is exhausted the pool will either error or queue IO
-according to the configuration (see: error_if_no_space).  If metadata
-space is exhausted or a metadata operation fails: the pool will error IO
-until the pool is taken offline and repair is performed to 1) fix any
-potential inconsistencies and 2) clear the flag that imposes repair.
-Once the pool's metadata device is repaired it may be resized, which
-will allow the pool to return to normal operation.  Note that if a pool
-is flagged as needing repair, the pool's data and metadata devices
-cannot be resized until repair is performed.  It should also be noted
-that when the pool's metadata space is exhausted the current metadata
-transaction is aborted.  Given that the pool will cache IO whose
-completion may have already been acknowledged to upper IO layers
-(e.g. filesystem) it is strongly suggested that consistency checks
-(e.g. fsck) be performed on those layers when repair of the pool is
-required.
-
-Thin provisioning
------------------
-
-i) Creating a new thinly-provisioned volume.
-
-  To create a new thinly- provisioned volume you must send a message to an
-  active pool device, /dev/mapper/pool in this example::
-
-    dmsetup message /dev/mapper/pool 0 "create_thin 0"
-
-  Here '0' is an identifier for the volume, a 24-bit number.  It's up
-  to the caller to allocate and manage these identifiers.  If the
-  identifier is already in use, the message will fail with -EEXIST.
-
-ii) Using a thinly-provisioned volume.
-
-  Thinly-provisioned volumes are activated using the 'thin' target::
-
-    dmsetup create thin --table "0 2097152 thin /dev/mapper/pool 0"
-
-  The last parameter is the identifier for the thinp device.
-
-Internal snapshots
-------------------
-
-i) Creating an internal snapshot.
-
-  Snapshots are created with another message to the pool.
-
-  N.B.  If the origin device that you wish to snapshot is active, you
-  must suspend it before creating the snapshot to avoid corruption.
-  This is NOT enforced at the moment, so please be careful!
-
-  ::
-
-    dmsetup suspend /dev/mapper/thin
-    dmsetup message /dev/mapper/pool 0 "create_snap 1 0"
-    dmsetup resume /dev/mapper/thin
-
-  Here '1' is the identifier for the volume, a 24-bit number.  '0' is the
-  identifier for the origin device.
-
-ii) Using an internal snapshot.
-
-  Once created, the user doesn't have to worry about any connection
-  between the origin and the snapshot.  Indeed the snapshot is no
-  different from any other thinly-provisioned device and can be
-  snapshotted itself via the same method.  It's perfectly legal to
-  have only one of them active, and there's no ordering requirement on
-  activating or removing them both.  (This differs from conventional
-  device-mapper snapshots.)
-
-  Activate it exactly the same way as any other thinly-provisioned volume::
-
-    dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"
-
-External snapshots
-------------------
-
-You can use an external **read only** device as an origin for a
-thinly-provisioned volume.  Any read to an unprovisioned area of the
-thin device will be passed through to the origin.  Writes trigger
-the allocation of new blocks as usual.
-
-One use case for this is VM hosts that want to run guests on
-thinly-provisioned volumes but have the base image on another device
-(possibly shared between many VMs).
-
-You must not write to the origin device if you use this technique!
-Of course, you may write to the thin device and take internal snapshots
-of the thin volume.
-
-i) Creating a snapshot of an external device
-
-  This is the same as creating a thin device.
-  You don't mention the origin at this stage.
-
-  ::
-
-    dmsetup message /dev/mapper/pool 0 "create_thin 0"
-
-ii) Using a snapshot of an external device.
-
-  Append an extra parameter to the thin target specifying the origin::
-
-    dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 0 /dev/image"
-
-  N.B. All descendants (internal snapshots) of this snapshot require the
-  same extra origin parameter.
-
-Deactivation
-------------
-
-All devices using a pool must be deactivated before the pool itself
-can be.
-
-::
-
-    dmsetup remove thin
-    dmsetup remove snap
-    dmsetup remove pool
-
-Reference
-=========
-
-'thin-pool' target
-------------------
-
-i) Constructor
-
-    ::
-
-      thin-pool <metadata dev> <data dev> <data block size (sectors)> \
-	        <low water mark (blocks)> [<number of feature args> [<arg>]*]
-
-    Optional feature arguments:
-
-      skip_block_zeroing:
-	Skip the zeroing of newly-provisioned blocks.
-
-      ignore_discard:
-	Disable discard support.
-
-      no_discard_passdown:
-	Don't pass discards down to the underlying
-	data device, but just remove the mapping.
-
-      read_only:
-		 Don't allow any changes to be made to the pool
-		 metadata.  This mode is only available after the
-		 thin-pool has been created and first used in full
-		 read/write mode.  It cannot be specified on initial
-		 thin-pool creation.
-
-      error_if_no_space:
-	Error IOs, instead of queueing, if no space.
-
-    Data block size must be between 64KB (128 sectors) and 1GB
-    (2097152 sectors) inclusive.
-
-
-ii) Status
-
-    ::
-
-      <transaction id> <used metadata blocks>/<total metadata blocks>
-      <used data blocks>/<total data blocks> <held metadata root>
-      ro|rw|out_of_data_space [no_]discard_passdown [error|queue]_if_no_space
-      needs_check|- metadata_low_watermark
-
-    transaction id:
-	A 64-bit number used by userspace to help synchronise with metadata
-	from volume managers.
-
-    used data blocks / total data blocks
-	If the number of free blocks drops below the pool's low water mark a
-	dm event will be sent to userspace.  This event is edge-triggered and
-	it will occur only once after each resume so volume manager writers
-	should register for the event and then check the target's status.
-
-    held metadata root:
-	The location, in blocks, of the metadata root that has been
-	'held' for userspace read access.  '-' indicates there is no
-	held root.
-
-    discard_passdown|no_discard_passdown
-	Whether or not discards are actually being passed down to the
-	underlying device.  When this is enabled when loading the table,
-	it can get disabled if the underlying device doesn't support it.
-
-    ro|rw|out_of_data_space
-	If the pool encounters certain types of device failures it will
-	drop into a read-only metadata mode in which no changes to
-	the pool metadata (like allocating new blocks) are permitted.
-
-	In serious cases where even a read-only mode is deemed unsafe
-	no further I/O will be permitted and the status will just
-	contain the string 'Fail'.  The userspace recovery tools
-	should then be used.
-
-    error_if_no_space|queue_if_no_space
-	If the pool runs out of data or metadata space, the pool will
-	either queue or error the IO destined to the data device.  The
-	default is to queue the IO until more space is added or the
-	'no_space_timeout' expires.  The 'no_space_timeout' dm-thin-pool
-	module parameter can be used to change this timeout -- it
-	defaults to 60 seconds but may be disabled using a value of 0.
-
-    needs_check
-	A metadata operation has failed, resulting in the needs_check
-	flag being set in the metadata's superblock.  The metadata
-	device must be deactivated and checked/repaired before the
-	thin-pool can be made fully operational again.  '-' indicates
-	needs_check is not set.
-
-    metadata_low_watermark:
-	Value of metadata low watermark in blocks.  The kernel sets this
-	value internally but userspace needs to know this value to
-	determine if an event was caused by crossing this threshold.
-
-iii) Messages
-
-    create_thin <dev id>
-	Create a new thinly-provisioned device.
-	<dev id> is an arbitrary unique 24-bit identifier chosen by
-	the caller.
-
-    create_snap <dev id> <origin id>
-	Create a new snapshot of another thinly-provisioned device.
-	<dev id> is an arbitrary unique 24-bit identifier chosen by
-	the caller.
-	<origin id> is the identifier of the thinly-provisioned device
-	of which the new device will be a snapshot.
-
-    delete <dev id>
-	Deletes a thin device.  Irreversible.
-
-    set_transaction_id <current id> <new id>
-	Userland volume managers, such as LVM, need a way to
-	synchronise their external metadata with the internal metadata of the
-	pool target.  The thin-pool target offers to store an
-	arbitrary 64-bit transaction id and return it on the target's
-	status line.  To avoid races you must provide what you think
-	the current transaction id is when you change it with this
-	compare-and-swap message.
-
-    reserve_metadata_snap
-        Reserve a copy of the data mapping btree for use by userland.
-        This allows userland to inspect the mappings as they were when
-        this message was executed.  Use the pool's status command to
-        get the root block associated with the metadata snapshot.
-
-    release_metadata_snap
-        Release a previously reserved copy of the data mapping btree.
-
-'thin' target
--------------
-
-i) Constructor
-
-    ::
-
-        thin <pool dev> <dev id> [<external origin dev>]
-
-    pool dev:
-	the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
-
-    dev id:
-	the internal device identifier of the device to be
-	activated.
-
-    external origin dev:
-	an optional block device outside the pool to be treated as a
-	read-only snapshot origin: reads to unprovisioned areas of the
-	thin target will be mapped to this device.
-
-The pool doesn't store any size against the thin devices.  If you
-load a thin target that is smaller than you've been using previously,
-then you'll have no access to blocks mapped beyond the end.  If you
-load a target that is bigger than before, then extra blocks will be
-provisioned as and when needed.
-
-ii) Status
-
-    <nr mapped sectors> <highest mapped sector>
-	If the pool has encountered device errors and failed, the status
-	will just contain the string 'Fail'.  The userspace recovery
-	tools should then be used.
-
-    In the case where <nr mapped sectors> is 0, there is no highest
-    mapped sector and the value of <highest mapped sector> is unspecified.
diff --git a/Documentation/device-mapper/unstriped.rst b/Documentation/device-mapper/unstriped.rst
deleted file mode 100644
index 0a8d3eb3f072..000000000000
--- a/Documentation/device-mapper/unstriped.rst
+++ /dev/null
@@ -1,135 +0,0 @@
-================================
-Device-mapper "unstriped" target
-================================
-
-Introduction
-============
-
-The device-mapper "unstriped" target provides a transparent mechanism to
-unstripe a device-mapper "striped" target to access the underlying disks
-without having to touch the true backing block-device.  It can also be
-used to unstripe a hardware RAID-0 to access backing disks.
-
-Parameters:
-<number of stripes> <chunk size> <stripe #> <dev_path> <offset>
-
-<number of stripes>
-        The number of stripes in the RAID 0.
-
-<chunk size>
-	The amount of 512B sectors in the chunk striping.
-
-<dev_path>
-	The block device you wish to unstripe.
-
-<stripe #>
-        The stripe number within the device that corresponds to physical
-        drive you wish to unstripe.  This must be 0 indexed.
-
-
-Why use this module?
-====================
-
-An example of undoing an existing dm-stripe
--------------------------------------------
-
-This small bash script will setup 4 loop devices and use the existing
-striped target to combine the 4 devices into one.  It then will use
-the unstriped target ontop of the striped device to access the
-individual backing loop devices.  We write data to the newly exposed
-unstriped devices and verify the data written matches the correct
-underlying device on the striped array::
-
-  #!/bin/bash
-
-  MEMBER_SIZE=$((128 * 1024 * 1024))
-  NUM=4
-  SEQ_END=$((${NUM}-1))
-  CHUNK=256
-  BS=4096
-
-  RAID_SIZE=$((${MEMBER_SIZE}*${NUM}/512))
-  DM_PARMS="0 ${RAID_SIZE} striped ${NUM} ${CHUNK}"
-  COUNT=$((${MEMBER_SIZE} / ${BS}))
-
-  for i in $(seq 0 ${SEQ_END}); do
-    dd if=/dev/zero of=member-${i} bs=${MEMBER_SIZE} count=1 oflag=direct
-    losetup /dev/loop${i} member-${i}
-    DM_PARMS+=" /dev/loop${i} 0"
-  done
-
-  echo $DM_PARMS | dmsetup create raid0
-  for i in $(seq 0 ${SEQ_END}); do
-    echo "0 1 unstriped ${NUM} ${CHUNK} ${i} /dev/mapper/raid0 0" | dmsetup create set-${i}
-  done;
-
-  for i in $(seq 0 ${SEQ_END}); do
-    dd if=/dev/urandom of=/dev/mapper/set-${i} bs=${BS} count=${COUNT} oflag=direct
-    diff /dev/mapper/set-${i} member-${i}
-  done;
-
-  for i in $(seq 0 ${SEQ_END}); do
-    dmsetup remove set-${i}
-  done
-
-  dmsetup remove raid0
-
-  for i in $(seq 0 ${SEQ_END}); do
-    losetup -d /dev/loop${i}
-    rm -f member-${i}
-  done
-
-Another example
----------------
-
-Intel NVMe drives contain two cores on the physical device.
-Each core of the drive has segregated access to its LBA range.
-The current LBA model has a RAID 0 128k chunk on each core, resulting
-in a 256k stripe across the two cores::
-
-   Core 0:       Core 1:
-  __________    __________
-  | LBA 512|    | LBA 768|
-  | LBA 0  |    | LBA 256|
-  ----------    ----------
-
-The purpose of this unstriping is to provide better QoS in noisy
-neighbor environments. When two partitions are created on the
-aggregate drive without this unstriping, reads on one partition
-can affect writes on another partition.  This is because the partitions
-are striped across the two cores.  When we unstripe this hardware RAID 0
-and make partitions on each new exposed device the two partitions are now
-physically separated.
-
-With the dm-unstriped target we're able to segregate an fio script that
-has read and write jobs that are independent of each other.  Compared to
-when we run the test on a combined drive with partitions, we were able
-to get a 92% reduction in read latency using this device mapper target.
-
-
-Example dmsetup usage
-=====================
-
-unstriped ontop of Intel NVMe device that has 2 cores
------------------------------------------------------
-
-::
-
-  dmsetup create nvmset0 --table '0 512 unstriped 2 256 0 /dev/nvme0n1 0'
-  dmsetup create nvmset1 --table '0 512 unstriped 2 256 1 /dev/nvme0n1 0'
-
-There will now be two devices that expose Intel NVMe core 0 and 1
-respectively::
-
-  /dev/mapper/nvmset0
-  /dev/mapper/nvmset1
-
-unstriped ontop of striped with 4 drives using 128K chunk size
---------------------------------------------------------------
-
-::
-
-  dmsetup create raid_disk0 --table '0 512 unstriped 4 256 0 /dev/mapper/striped 0'
-  dmsetup create raid_disk1 --table '0 512 unstriped 4 256 1 /dev/mapper/striped 0'
-  dmsetup create raid_disk2 --table '0 512 unstriped 4 256 2 /dev/mapper/striped 0'
-  dmsetup create raid_disk3 --table '0 512 unstriped 4 256 3 /dev/mapper/striped 0'
diff --git a/Documentation/device-mapper/verity.rst b/Documentation/device-mapper/verity.rst
deleted file mode 100644
index a4d1c1476d72..000000000000
--- a/Documentation/device-mapper/verity.rst
+++ /dev/null
@@ -1,229 +0,0 @@
-=========
-dm-verity
-=========
-
-Device-Mapper's "verity" target provides transparent integrity checking of
-block devices using a cryptographic digest provided by the kernel crypto API.
-This target is read-only.
-
-Construction Parameters
-=======================
-
-::
-
-    <version> <dev> <hash_dev>
-    <data_block_size> <hash_block_size>
-    <num_data_blocks> <hash_start_block>
-    <algorithm> <digest> <salt>
-    [<#opt_params> <opt_params>]
-
-<version>
-    This is the type of the on-disk hash format.
-
-    0 is the original format used in the Chromium OS.
-      The salt is appended when hashing, digests are stored continuously and
-      the rest of the block is padded with zeroes.
-
-    1 is the current format that should be used for new devices.
-      The salt is prepended when hashing and each digest is
-      padded with zeroes to the power of two.
-
-<dev>
-    This is the device containing data, the integrity of which needs to be
-    checked.  It may be specified as a path, like /dev/sdaX, or a device number,
-    <major>:<minor>.
-
-<hash_dev>
-    This is the device that supplies the hash tree data.  It may be
-    specified similarly to the device path and may be the same device.  If the
-    same device is used, the hash_start should be outside the configured
-    dm-verity device.
-
-<data_block_size>
-    The block size on a data device in bytes.
-    Each block corresponds to one digest on the hash device.
-
-<hash_block_size>
-    The size of a hash block in bytes.
-
-<num_data_blocks>
-    The number of data blocks on the data device.  Additional blocks are
-    inaccessible.  You can place hashes to the same partition as data, in this
-    case hashes are placed after <num_data_blocks>.
-
-<hash_start_block>
-    This is the offset, in <hash_block_size>-blocks, from the start of hash_dev
-    to the root block of the hash tree.
-
-<algorithm>
-    The cryptographic hash algorithm used for this device.  This should
-    be the name of the algorithm, like "sha1".
-
-<digest>
-    The hexadecimal encoding of the cryptographic hash of the root hash block
-    and the salt.  This hash should be trusted as there is no other authenticity
-    beyond this point.
-
-<salt>
-    The hexadecimal encoding of the salt value.
-
-<#opt_params>
-    Number of optional parameters. If there are no optional parameters,
-    the optional paramaters section can be skipped or #opt_params can be zero.
-    Otherwise #opt_params is the number of following arguments.
-
-    Example of optional parameters section:
-        1 ignore_corruption
-
-ignore_corruption
-    Log corrupted blocks, but allow read operations to proceed normally.
-
-restart_on_corruption
-    Restart the system when a corrupted block is discovered. This option is
-    not compatible with ignore_corruption and requires user space support to
-    avoid restart loops.
-
-ignore_zero_blocks
-    Do not verify blocks that are expected to contain zeroes and always return
-    zeroes instead. This may be useful if the partition contains unused blocks
-    that are not guaranteed to contain zeroes.
-
-use_fec_from_device <fec_dev>
-    Use forward error correction (FEC) to recover from corruption if hash
-    verification fails. Use encoding data from the specified device. This
-    may be the same device where data and hash blocks reside, in which case
-    fec_start must be outside data and hash areas.
-
-    If the encoding data covers additional metadata, it must be accessible
-    on the hash device after the hash blocks.
-
-    Note: block sizes for data and hash devices must match. Also, if the
-    verity <dev> is encrypted the <fec_dev> should be too.
-
-fec_roots <num>
-    Number of generator roots. This equals to the number of parity bytes in
-    the encoding data. For example, in RS(M, N) encoding, the number of roots
-    is M-N.
-
-fec_blocks <num>
-    The number of encoding data blocks on the FEC device. The block size for
-    the FEC device is <data_block_size>.
-
-fec_start <offset>
-    This is the offset, in <data_block_size> blocks, from the start of the
-    FEC device to the beginning of the encoding data.
-
-check_at_most_once
-    Verify data blocks only the first time they are read from the data device,
-    rather than every time.  This reduces the overhead of dm-verity so that it
-    can be used on systems that are memory and/or CPU constrained.  However, it
-    provides a reduced level of security because only offline tampering of the
-    data device's content will be detected, not online tampering.
-
-    Hash blocks are still verified each time they are read from the hash device,
-    since verification of hash blocks is less performance critical than data
-    blocks, and a hash block will not be verified any more after all the data
-    blocks it covers have been verified anyway.
-
-Theory of operation
-===================
-
-dm-verity is meant to be set up as part of a verified boot path.  This
-may be anything ranging from a boot using tboot or trustedgrub to just
-booting from a known-good device (like a USB drive or CD).
-
-When a dm-verity device is configured, it is expected that the caller
-has been authenticated in some way (cryptographic signatures, etc).
-After instantiation, all hashes will be verified on-demand during
-disk access.  If they cannot be verified up to the root node of the
-tree, the root hash, then the I/O will fail.  This should detect
-tampering with any data on the device and the hash data.
-
-Cryptographic hashes are used to assert the integrity of the device on a
-per-block basis. This allows for a lightweight hash computation on first read
-into the page cache. Block hashes are stored linearly, aligned to the nearest
-block size.
-
-If forward error correction (FEC) support is enabled any recovery of
-corrupted data will be verified using the cryptographic hash of the
-corresponding data. This is why combining error correction with
-integrity checking is essential.
-
-Hash Tree
----------
-
-Each node in the tree is a cryptographic hash.  If it is a leaf node, the hash
-of some data block on disk is calculated. If it is an intermediary node,
-the hash of a number of child nodes is calculated.
-
-Each entry in the tree is a collection of neighboring nodes that fit in one
-block.  The number is determined based on block_size and the size of the
-selected cryptographic digest algorithm.  The hashes are linearly-ordered in
-this entry and any unaligned trailing space is ignored but included when
-calculating the parent node.
-
-The tree looks something like:
-
-	alg = sha256, num_blocks = 32768, block_size = 4096
-
-::
-
-                                 [   root    ]
-                                /    . . .    \
-                     [entry_0]                 [entry_1]
-                    /  . . .  \                 . . .   \
-         [entry_0_0]   . . .  [entry_0_127]    . . . .  [entry_1_127]
-           / ... \             /   . . .  \             /           \
-     blk_0 ... blk_127  blk_16256   blk_16383      blk_32640 . . . blk_32767
-
-
-On-disk format
-==============
-
-The verity kernel code does not read the verity metadata on-disk header.
-It only reads the hash blocks which directly follow the header.
-It is expected that a user-space tool will verify the integrity of the
-verity header.
-
-Alternatively, the header can be omitted and the dmsetup parameters can
-be passed via the kernel command-line in a rooted chain of trust where
-the command-line is verified.
-
-Directly following the header (and with sector number padded to the next hash
-block boundary) are the hash blocks which are stored a depth at a time
-(starting from the root), sorted in order of increasing index.
-
-The full specification of kernel parameters and on-disk metadata format
-is available at the cryptsetup project's wiki page
-
-  https://gitlab.com/cryptsetup/cryptsetup/wikis/DMVerity
-
-Status
-======
-V (for Valid) is returned if every check performed so far was valid.
-If any check failed, C (for Corruption) is returned.
-
-Example
-=======
-Set up a device::
-
-  # dmsetup create vroot --readonly --table \
-    "0 2097152 verity 1 /dev/sda1 /dev/sda2 4096 4096 262144 1 sha256 "\
-    "4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 "\
-    "1234000000000000000000000000000000000000000000000000000000000000"
-
-A command line tool veritysetup is available to compute or verify
-the hash tree or activate the kernel device. This is available from
-the cryptsetup upstream repository https://gitlab.com/cryptsetup/cryptsetup/
-(as a libcryptsetup extension).
-
-Create hash on the device::
-
-  # veritysetup format /dev/sda1 /dev/sda2
-  ...
-  Root hash: 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
-
-Activate the device::
-
-  # veritysetup create vroot /dev/sda1 /dev/sda2 \
-    4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
diff --git a/Documentation/device-mapper/writecache.rst b/Documentation/device-mapper/writecache.rst
deleted file mode 100644
index d3d7690f5e8d..000000000000
--- a/Documentation/device-mapper/writecache.rst
+++ /dev/null
@@ -1,79 +0,0 @@
-=================
-Writecache target
-=================
-
-The writecache target caches writes on persistent memory or on SSD. It
-doesn't cache reads because reads are supposed to be cached in page cache
-in normal RAM.
-
-When the device is constructed, the first sector should be zeroed or the
-first sector should contain valid superblock from previous invocation.
-
-Constructor parameters:
-
-1. type of the cache device - "p" or "s"
-
-	- p - persistent memory
-	- s - SSD
-2. the underlying device that will be cached
-3. the cache device
-4. block size (4096 is recommended; the maximum block size is the page
-   size)
-5. the number of optional parameters (the parameters with an argument
-   count as two)
-
-	start_sector n		(default: 0)
-		offset from the start of cache device in 512-byte sectors
-	high_watermark n	(default: 50)
-		start writeback when the number of used blocks reach this
-		watermark
-	low_watermark x		(default: 45)
-		stop writeback when the number of used blocks drops below
-		this watermark
-	writeback_jobs n	(default: unlimited)
-		limit the number of blocks that are in flight during
-		writeback. Setting this value reduces writeback
-		throughput, but it may improve latency of read requests
-	autocommit_blocks n	(default: 64 for pmem, 65536 for ssd)
-		when the application writes this amount of blocks without
-		issuing the FLUSH request, the blocks are automatically
-		commited
-	autocommit_time ms	(default: 1000)
-		autocommit time in milliseconds. The data is automatically
-		commited if this time passes and no FLUSH request is
-		received
-	fua			(by default on)
-		applicable only to persistent memory - use the FUA flag
-		when writing data from persistent memory back to the
-		underlying device
-	nofua
-		applicable only to persistent memory - don't use the FUA
-		flag when writing back data and send the FLUSH request
-		afterwards
-
-		- some underlying devices perform better with fua, some
-		  with nofua. The user should test it
-
-Status:
-1. error indicator - 0 if there was no error, otherwise error number
-2. the number of blocks
-3. the number of free blocks
-4. the number of blocks under writeback
-
-Messages:
-	flush
-		flush the cache device. The message returns successfully
-		if the cache device was flushed without an error
-	flush_on_suspend
-		flush the cache device on next suspend. Use this message
-		when you are going to remove the cache device. The proper
-		sequence for removing the cache device is:
-
-		1. send the "flush_on_suspend" message
-		2. load an inactive table with a linear target that maps
-		   to the underlying device
-		3. suspend the device
-		4. ask for status and verify that there are no errors
-		5. resume the device, so that it will use the linear
-		   target
-		6. the cache device is now inactive and it can be deleted
diff --git a/Documentation/device-mapper/zero.rst b/Documentation/device-mapper/zero.rst
deleted file mode 100644
index 11fb5cf4597c..000000000000
--- a/Documentation/device-mapper/zero.rst
+++ /dev/null
@@ -1,37 +0,0 @@
-=======
-dm-zero
-=======
-
-Device-Mapper's "zero" target provides a block-device that always returns
-zero'd data on reads and silently drops writes. This is similar behavior to
-/dev/zero, but as a block-device instead of a character-device.
-
-Dm-zero has no target-specific parameters.
-
-One very interesting use of dm-zero is for creating "sparse" devices in
-conjunction with dm-snapshot. A sparse device reports a device-size larger
-than the amount of actual storage space available for that device. A user can
-write data anywhere within the sparse device and read it back like a normal
-device. Reads to previously unwritten areas will return a zero'd buffer. When
-enough data has been written to fill up the actual storage space, the sparse
-device is deactivated. This can be very useful for testing device and
-filesystem limitations.
-
-To create a sparse device, start by creating a dm-zero device that's the
-desired size of the sparse device. For this example, we'll assume a 10TB
-sparse device::
-
-  TEN_TERABYTES=`expr 10 \* 1024 \* 1024 \* 1024 \* 2`   # 10 TB in sectors
-  echo "0 $TEN_TERABYTES zero" | dmsetup create zero1
-
-Then create a snapshot of the zero device, using any available block-device as
-the COW device. The size of the COW device will determine the amount of real
-space available to the sparse device. For this example, we'll assume /dev/sdb1
-is an available 10GB partition::
-
-  echo "0 $TEN_TERABYTES snapshot /dev/mapper/zero1 /dev/sdb1 p 128" | \
-     dmsetup create sparse1
-
-This will create a 10TB sparse device called /dev/mapper/sparse1 that has
-10GB of actual storage space available. If more than 10GB of data is written
-to this device, it will start returning I/O errors.
-- 
cgit 


From ec4b78a0e7dd4751423089b7cfd32168f9052377 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 15:00:25 -0300
Subject: docs: early-userspace: move to driver-api guide

Those documents describe a kAPI. So, add to the driver-api
book.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../driver-api/early-userspace/buffer-format.rst   | 119 ++++++++++++++++
 .../early-userspace/early_userspace_support.rst    | 154 +++++++++++++++++++++
 Documentation/driver-api/early-userspace/index.rst |  16 +++
 Documentation/driver-api/index.rst                 |   1 +
 Documentation/early-userspace/buffer-format.rst    | 119 ----------------
 .../early-userspace/early_userspace_support.rst    | 154 ---------------------
 Documentation/early-userspace/index.rst            |  18 ---
 Documentation/filesystems/nfs/nfsroot.txt          |   2 +-
 .../filesystems/ramfs-rootfs-initramfs.txt         |   4 +-
 9 files changed, 293 insertions(+), 294 deletions(-)
 create mode 100644 Documentation/driver-api/early-userspace/buffer-format.rst
 create mode 100644 Documentation/driver-api/early-userspace/early_userspace_support.rst
 create mode 100644 Documentation/driver-api/early-userspace/index.rst
 delete mode 100644 Documentation/early-userspace/buffer-format.rst
 delete mode 100644 Documentation/early-userspace/early_userspace_support.rst
 delete mode 100644 Documentation/early-userspace/index.rst

(limited to 'Documentation')

diff --git a/Documentation/driver-api/early-userspace/buffer-format.rst b/Documentation/driver-api/early-userspace/buffer-format.rst
new file mode 100644
index 000000000000..7f74e301fdf3
--- /dev/null
+++ b/Documentation/driver-api/early-userspace/buffer-format.rst
@@ -0,0 +1,119 @@
+=======================
+initramfs buffer format
+=======================
+
+Al Viro, H. Peter Anvin
+
+Last revision: 2002-01-13
+
+Starting with kernel 2.5.x, the old "initial ramdisk" protocol is
+getting {replaced/complemented} with the new "initial ramfs"
+(initramfs) protocol.  The initramfs contents is passed using the same
+memory buffer protocol used by the initrd protocol, but the contents
+is different.  The initramfs buffer contains an archive which is
+expanded into a ramfs filesystem; this document details the format of
+the initramfs buffer format.
+
+The initramfs buffer format is based around the "newc" or "crc" CPIO
+formats, and can be created with the cpio(1) utility.  The cpio
+archive can be compressed using gzip(1).  One valid version of an
+initramfs buffer is thus a single .cpio.gz file.
+
+The full format of the initramfs buffer is defined by the following
+grammar, where::
+
+	*	is used to indicate "0 or more occurrences of"
+	(|)	indicates alternatives
+	+	indicates concatenation
+	GZIP()	indicates the gzip(1) of the operand
+	ALGN(n)	means padding with null bytes to an n-byte boundary
+
+	initramfs  := ("\0" | cpio_archive | cpio_gzip_archive)*
+
+	cpio_gzip_archive := GZIP(cpio_archive)
+
+	cpio_archive := cpio_file* + (<nothing> | cpio_trailer)
+
+	cpio_file := ALGN(4) + cpio_header + filename + "\0" + ALGN(4) + data
+
+	cpio_trailer := ALGN(4) + cpio_header + "TRAILER!!!\0" + ALGN(4)
+
+
+In human terms, the initramfs buffer contains a collection of
+compressed and/or uncompressed cpio archives (in the "newc" or "crc"
+formats); arbitrary amounts zero bytes (for padding) can be added
+between members.
+
+The cpio "TRAILER!!!" entry (cpio end-of-archive) is optional, but is
+not ignored; see "handling of hard links" below.
+
+The structure of the cpio_header is as follows (all fields contain
+hexadecimal ASCII numbers fully padded with '0' on the left to the
+full width of the field, for example, the integer 4780 is represented
+by the ASCII string "000012ac"):
+
+============= ================== ==============================================
+Field name    Field size	 Meaning
+============= ================== ==============================================
+c_magic	      6 bytes		 The string "070701" or "070702"
+c_ino	      8 bytes		 File inode number
+c_mode	      8 bytes		 File mode and permissions
+c_uid	      8 bytes		 File uid
+c_gid	      8 bytes		 File gid
+c_nlink	      8 bytes		 Number of links
+c_mtime	      8 bytes		 Modification time
+c_filesize    8 bytes		 Size of data field
+c_maj	      8 bytes		 Major part of file device number
+c_min	      8 bytes		 Minor part of file device number
+c_rmaj	      8 bytes		 Major part of device node reference
+c_rmin	      8 bytes		 Minor part of device node reference
+c_namesize    8 bytes		 Length of filename, including final \0
+c_chksum      8 bytes		 Checksum of data field if c_magic is 070702;
+				 otherwise zero
+============= ================== ==============================================
+
+The c_mode field matches the contents of st_mode returned by stat(2)
+on Linux, and encodes the file type and file permissions.
+
+The c_filesize should be zero for any file which is not a regular file
+or symlink.
+
+The c_chksum field contains a simple 32-bit unsigned sum of all the
+bytes in the data field.  cpio(1) refers to this as "crc", which is
+clearly incorrect (a cyclic redundancy check is a different and
+significantly stronger integrity check), however, this is the
+algorithm used.
+
+If the filename is "TRAILER!!!" this is actually an end-of-archive
+marker; the c_filesize for an end-of-archive marker must be zero.
+
+
+Handling of hard links
+======================
+
+When a nondirectory with c_nlink > 1 is seen, the (c_maj,c_min,c_ino)
+tuple is looked up in a tuple buffer.  If not found, it is entered in
+the tuple buffer and the entry is created as usual; if found, a hard
+link rather than a second copy of the file is created.  It is not
+necessary (but permitted) to include a second copy of the file
+contents; if the file contents is not included, the c_filesize field
+should be set to zero to indicate no data section follows.  If data is
+present, the previous instance of the file is overwritten; this allows
+the data-carrying instance of a file to occur anywhere in the sequence
+(GNU cpio is reported to attach the data to the last instance of a
+file only.)
+
+c_filesize must not be zero for a symlink.
+
+When a "TRAILER!!!" end-of-archive marker is seen, the tuple buffer is
+reset.  This permits archives which are generated independently to be
+concatenated.
+
+To combine file data from different sources (without having to
+regenerate the (c_maj,c_min,c_ino) fields), therefore, either one of
+the following techniques can be used:
+
+a) Separate the different file data sources with a "TRAILER!!!"
+   end-of-archive marker, or
+
+b) Make sure c_nlink == 1 for all nondirectory entries.
diff --git a/Documentation/driver-api/early-userspace/early_userspace_support.rst b/Documentation/driver-api/early-userspace/early_userspace_support.rst
new file mode 100644
index 000000000000..3deefb34046b
--- /dev/null
+++ b/Documentation/driver-api/early-userspace/early_userspace_support.rst
@@ -0,0 +1,154 @@
+=======================
+Early userspace support
+=======================
+
+Last update: 2004-12-20 tlh
+
+
+"Early userspace" is a set of libraries and programs that provide
+various pieces of functionality that are important enough to be
+available while a Linux kernel is coming up, but that don't need to be
+run inside the kernel itself.
+
+It consists of several major infrastructure components:
+
+- gen_init_cpio, a program that builds a cpio-format archive
+  containing a root filesystem image.  This archive is compressed, and
+  the compressed image is linked into the kernel image.
+- initramfs, a chunk of code that unpacks the compressed cpio image
+  midway through the kernel boot process.
+- klibc, a userspace C library, currently packaged separately, that is
+  optimized for correctness and small size.
+
+The cpio file format used by initramfs is the "newc" (aka "cpio -H newc")
+format, and is documented in the file "buffer-format.txt".  There are
+two ways to add an early userspace image: specify an existing cpio
+archive to be used as the image or have the kernel build process build
+the image from specifications.
+
+CPIO ARCHIVE method
+-------------------
+
+You can create a cpio archive that contains the early userspace image.
+Your cpio archive should be specified in CONFIG_INITRAMFS_SOURCE and it
+will be used directly.  Only a single cpio file may be specified in
+CONFIG_INITRAMFS_SOURCE and directory and file names are not allowed in
+combination with a cpio archive.
+
+IMAGE BUILDING method
+---------------------
+
+The kernel build process can also build an early userspace image from
+source parts rather than supplying a cpio archive.  This method provides
+a way to create images with root-owned files even though the image was
+built by an unprivileged user.
+
+The image is specified as one or more sources in
+CONFIG_INITRAMFS_SOURCE.  Sources can be either directories or files -
+cpio archives are *not* allowed when building from sources.
+
+A source directory will have it and all of its contents packaged.  The
+specified directory name will be mapped to '/'.  When packaging a
+directory, limited user and group ID translation can be performed.
+INITRAMFS_ROOT_UID can be set to a user ID that needs to be mapped to
+user root (0).  INITRAMFS_ROOT_GID can be set to a group ID that needs
+to be mapped to group root (0).
+
+A source file must be directives in the format required by the
+usr/gen_init_cpio utility (run 'usr/gen_init_cpio -h' to get the
+file format).  The directives in the file will be passed directly to
+usr/gen_init_cpio.
+
+When a combination of directories and files are specified then the
+initramfs image will be an aggregate of all of them.  In this way a user
+can create a 'root-image' directory and install all files into it.
+Because device-special files cannot be created by a unprivileged user,
+special files can be listed in a 'root-files' file.  Both 'root-image'
+and 'root-files' can be listed in CONFIG_INITRAMFS_SOURCE and a complete
+early userspace image can be built by an unprivileged user.
+
+As a technical note, when directories and files are specified, the
+entire CONFIG_INITRAMFS_SOURCE is passed to
+usr/gen_initramfs_list.sh.  This means that CONFIG_INITRAMFS_SOURCE
+can really be interpreted as any legal argument to
+gen_initramfs_list.sh.  If a directory is specified as an argument then
+the contents are scanned, uid/gid translation is performed, and
+usr/gen_init_cpio file directives are output.  If a directory is
+specified as an argument to usr/gen_initramfs_list.sh then the
+contents of the file are simply copied to the output.  All of the output
+directives from directory scanning and file contents copying are
+processed by usr/gen_init_cpio.
+
+See also 'usr/gen_initramfs_list.sh -h'.
+
+Where's this all leading?
+=========================
+
+The klibc distribution contains some of the necessary software to make
+early userspace useful.  The klibc distribution is currently
+maintained separately from the kernel.
+
+You can obtain somewhat infrequent snapshots of klibc from
+https://www.kernel.org/pub/linux/libs/klibc/
+
+For active users, you are better off using the klibc git
+repository, at http://git.kernel.org/?p=libs/klibc/klibc.git
+
+The standalone klibc distribution currently provides three components,
+in addition to the klibc library:
+
+- ipconfig, a program that configures network interfaces.  It can
+  configure them statically, or use DHCP to obtain information
+  dynamically (aka "IP autoconfiguration").
+- nfsmount, a program that can mount an NFS filesystem.
+- kinit, the "glue" that uses ipconfig and nfsmount to replace the old
+  support for IP autoconfig, mount a filesystem over NFS, and continue
+  system boot using that filesystem as root.
+
+kinit is built as a single statically linked binary to save space.
+
+Eventually, several more chunks of kernel functionality will hopefully
+move to early userspace:
+
+- Almost all of init/do_mounts* (the beginning of this is already in
+  place)
+- ACPI table parsing
+- Insert unwieldy subsystem that doesn't really need to be in kernel
+  space here
+
+If kinit doesn't meet your current needs and you've got bytes to burn,
+the klibc distribution includes a small Bourne-compatible shell (ash)
+and a number of other utilities, so you can replace kinit and build
+custom initramfs images that meet your needs exactly.
+
+For questions and help, you can sign up for the early userspace
+mailing list at http://www.zytor.com/mailman/listinfo/klibc
+
+How does it work?
+=================
+
+The kernel has currently 3 ways to mount the root filesystem:
+
+a) all required device and filesystem drivers compiled into the kernel, no
+   initrd.  init/main.c:init() will call prepare_namespace() to mount the
+   final root filesystem, based on the root= option and optional init= to run
+   some other init binary than listed at the end of init/main.c:init().
+
+b) some device and filesystem drivers built as modules and stored in an
+   initrd.  The initrd must contain a binary '/linuxrc' which is supposed to
+   load these driver modules.  It is also possible to mount the final root
+   filesystem via linuxrc and use the pivot_root syscall.  The initrd is
+   mounted and executed via prepare_namespace().
+
+c) using initramfs.  The call to prepare_namespace() must be skipped.
+   This means that a binary must do all the work.  Said binary can be stored
+   into initramfs either via modifying usr/gen_init_cpio.c or via the new
+   initrd format, an cpio archive.  It must be called "/init".  This binary
+   is responsible to do all the things prepare_namespace() would do.
+
+   To maintain backwards compatibility, the /init binary will only run if it
+   comes via an initramfs cpio archive.  If this is not the case,
+   init/main.c:init() will run prepare_namespace() to mount the final root
+   and exec one of the predefined init binaries.
+
+Bryan O'Sullivan <bos@serpentine.com>
diff --git a/Documentation/driver-api/early-userspace/index.rst b/Documentation/driver-api/early-userspace/index.rst
new file mode 100644
index 000000000000..6f20c3c560d8
--- /dev/null
+++ b/Documentation/driver-api/early-userspace/index.rst
@@ -0,0 +1,16 @@
+===============
+Early Userspace
+===============
+
+.. toctree::
+    :maxdepth: 1
+
+    early_userspace_support
+    buffer-format
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index baa77a666e46..0f281f4f648f 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -16,6 +16,7 @@ available subsections can be seen below.
 
    basics
    infrastructure
+   early-userspace/index
    pm/index
    clk
    device-io
diff --git a/Documentation/early-userspace/buffer-format.rst b/Documentation/early-userspace/buffer-format.rst
deleted file mode 100644
index 7f74e301fdf3..000000000000
--- a/Documentation/early-userspace/buffer-format.rst
+++ /dev/null
@@ -1,119 +0,0 @@
-=======================
-initramfs buffer format
-=======================
-
-Al Viro, H. Peter Anvin
-
-Last revision: 2002-01-13
-
-Starting with kernel 2.5.x, the old "initial ramdisk" protocol is
-getting {replaced/complemented} with the new "initial ramfs"
-(initramfs) protocol.  The initramfs contents is passed using the same
-memory buffer protocol used by the initrd protocol, but the contents
-is different.  The initramfs buffer contains an archive which is
-expanded into a ramfs filesystem; this document details the format of
-the initramfs buffer format.
-
-The initramfs buffer format is based around the "newc" or "crc" CPIO
-formats, and can be created with the cpio(1) utility.  The cpio
-archive can be compressed using gzip(1).  One valid version of an
-initramfs buffer is thus a single .cpio.gz file.
-
-The full format of the initramfs buffer is defined by the following
-grammar, where::
-
-	*	is used to indicate "0 or more occurrences of"
-	(|)	indicates alternatives
-	+	indicates concatenation
-	GZIP()	indicates the gzip(1) of the operand
-	ALGN(n)	means padding with null bytes to an n-byte boundary
-
-	initramfs  := ("\0" | cpio_archive | cpio_gzip_archive)*
-
-	cpio_gzip_archive := GZIP(cpio_archive)
-
-	cpio_archive := cpio_file* + (<nothing> | cpio_trailer)
-
-	cpio_file := ALGN(4) + cpio_header + filename + "\0" + ALGN(4) + data
-
-	cpio_trailer := ALGN(4) + cpio_header + "TRAILER!!!\0" + ALGN(4)
-
-
-In human terms, the initramfs buffer contains a collection of
-compressed and/or uncompressed cpio archives (in the "newc" or "crc"
-formats); arbitrary amounts zero bytes (for padding) can be added
-between members.
-
-The cpio "TRAILER!!!" entry (cpio end-of-archive) is optional, but is
-not ignored; see "handling of hard links" below.
-
-The structure of the cpio_header is as follows (all fields contain
-hexadecimal ASCII numbers fully padded with '0' on the left to the
-full width of the field, for example, the integer 4780 is represented
-by the ASCII string "000012ac"):
-
-============= ================== ==============================================
-Field name    Field size	 Meaning
-============= ================== ==============================================
-c_magic	      6 bytes		 The string "070701" or "070702"
-c_ino	      8 bytes		 File inode number
-c_mode	      8 bytes		 File mode and permissions
-c_uid	      8 bytes		 File uid
-c_gid	      8 bytes		 File gid
-c_nlink	      8 bytes		 Number of links
-c_mtime	      8 bytes		 Modification time
-c_filesize    8 bytes		 Size of data field
-c_maj	      8 bytes		 Major part of file device number
-c_min	      8 bytes		 Minor part of file device number
-c_rmaj	      8 bytes		 Major part of device node reference
-c_rmin	      8 bytes		 Minor part of device node reference
-c_namesize    8 bytes		 Length of filename, including final \0
-c_chksum      8 bytes		 Checksum of data field if c_magic is 070702;
-				 otherwise zero
-============= ================== ==============================================
-
-The c_mode field matches the contents of st_mode returned by stat(2)
-on Linux, and encodes the file type and file permissions.
-
-The c_filesize should be zero for any file which is not a regular file
-or symlink.
-
-The c_chksum field contains a simple 32-bit unsigned sum of all the
-bytes in the data field.  cpio(1) refers to this as "crc", which is
-clearly incorrect (a cyclic redundancy check is a different and
-significantly stronger integrity check), however, this is the
-algorithm used.
-
-If the filename is "TRAILER!!!" this is actually an end-of-archive
-marker; the c_filesize for an end-of-archive marker must be zero.
-
-
-Handling of hard links
-======================
-
-When a nondirectory with c_nlink > 1 is seen, the (c_maj,c_min,c_ino)
-tuple is looked up in a tuple buffer.  If not found, it is entered in
-the tuple buffer and the entry is created as usual; if found, a hard
-link rather than a second copy of the file is created.  It is not
-necessary (but permitted) to include a second copy of the file
-contents; if the file contents is not included, the c_filesize field
-should be set to zero to indicate no data section follows.  If data is
-present, the previous instance of the file is overwritten; this allows
-the data-carrying instance of a file to occur anywhere in the sequence
-(GNU cpio is reported to attach the data to the last instance of a
-file only.)
-
-c_filesize must not be zero for a symlink.
-
-When a "TRAILER!!!" end-of-archive marker is seen, the tuple buffer is
-reset.  This permits archives which are generated independently to be
-concatenated.
-
-To combine file data from different sources (without having to
-regenerate the (c_maj,c_min,c_ino) fields), therefore, either one of
-the following techniques can be used:
-
-a) Separate the different file data sources with a "TRAILER!!!"
-   end-of-archive marker, or
-
-b) Make sure c_nlink == 1 for all nondirectory entries.
diff --git a/Documentation/early-userspace/early_userspace_support.rst b/Documentation/early-userspace/early_userspace_support.rst
deleted file mode 100644
index 3deefb34046b..000000000000
--- a/Documentation/early-userspace/early_userspace_support.rst
+++ /dev/null
@@ -1,154 +0,0 @@
-=======================
-Early userspace support
-=======================
-
-Last update: 2004-12-20 tlh
-
-
-"Early userspace" is a set of libraries and programs that provide
-various pieces of functionality that are important enough to be
-available while a Linux kernel is coming up, but that don't need to be
-run inside the kernel itself.
-
-It consists of several major infrastructure components:
-
-- gen_init_cpio, a program that builds a cpio-format archive
-  containing a root filesystem image.  This archive is compressed, and
-  the compressed image is linked into the kernel image.
-- initramfs, a chunk of code that unpacks the compressed cpio image
-  midway through the kernel boot process.
-- klibc, a userspace C library, currently packaged separately, that is
-  optimized for correctness and small size.
-
-The cpio file format used by initramfs is the "newc" (aka "cpio -H newc")
-format, and is documented in the file "buffer-format.txt".  There are
-two ways to add an early userspace image: specify an existing cpio
-archive to be used as the image or have the kernel build process build
-the image from specifications.
-
-CPIO ARCHIVE method
--------------------
-
-You can create a cpio archive that contains the early userspace image.
-Your cpio archive should be specified in CONFIG_INITRAMFS_SOURCE and it
-will be used directly.  Only a single cpio file may be specified in
-CONFIG_INITRAMFS_SOURCE and directory and file names are not allowed in
-combination with a cpio archive.
-
-IMAGE BUILDING method
----------------------
-
-The kernel build process can also build an early userspace image from
-source parts rather than supplying a cpio archive.  This method provides
-a way to create images with root-owned files even though the image was
-built by an unprivileged user.
-
-The image is specified as one or more sources in
-CONFIG_INITRAMFS_SOURCE.  Sources can be either directories or files -
-cpio archives are *not* allowed when building from sources.
-
-A source directory will have it and all of its contents packaged.  The
-specified directory name will be mapped to '/'.  When packaging a
-directory, limited user and group ID translation can be performed.
-INITRAMFS_ROOT_UID can be set to a user ID that needs to be mapped to
-user root (0).  INITRAMFS_ROOT_GID can be set to a group ID that needs
-to be mapped to group root (0).
-
-A source file must be directives in the format required by the
-usr/gen_init_cpio utility (run 'usr/gen_init_cpio -h' to get the
-file format).  The directives in the file will be passed directly to
-usr/gen_init_cpio.
-
-When a combination of directories and files are specified then the
-initramfs image will be an aggregate of all of them.  In this way a user
-can create a 'root-image' directory and install all files into it.
-Because device-special files cannot be created by a unprivileged user,
-special files can be listed in a 'root-files' file.  Both 'root-image'
-and 'root-files' can be listed in CONFIG_INITRAMFS_SOURCE and a complete
-early userspace image can be built by an unprivileged user.
-
-As a technical note, when directories and files are specified, the
-entire CONFIG_INITRAMFS_SOURCE is passed to
-usr/gen_initramfs_list.sh.  This means that CONFIG_INITRAMFS_SOURCE
-can really be interpreted as any legal argument to
-gen_initramfs_list.sh.  If a directory is specified as an argument then
-the contents are scanned, uid/gid translation is performed, and
-usr/gen_init_cpio file directives are output.  If a directory is
-specified as an argument to usr/gen_initramfs_list.sh then the
-contents of the file are simply copied to the output.  All of the output
-directives from directory scanning and file contents copying are
-processed by usr/gen_init_cpio.
-
-See also 'usr/gen_initramfs_list.sh -h'.
-
-Where's this all leading?
-=========================
-
-The klibc distribution contains some of the necessary software to make
-early userspace useful.  The klibc distribution is currently
-maintained separately from the kernel.
-
-You can obtain somewhat infrequent snapshots of klibc from
-https://www.kernel.org/pub/linux/libs/klibc/
-
-For active users, you are better off using the klibc git
-repository, at http://git.kernel.org/?p=libs/klibc/klibc.git
-
-The standalone klibc distribution currently provides three components,
-in addition to the klibc library:
-
-- ipconfig, a program that configures network interfaces.  It can
-  configure them statically, or use DHCP to obtain information
-  dynamically (aka "IP autoconfiguration").
-- nfsmount, a program that can mount an NFS filesystem.
-- kinit, the "glue" that uses ipconfig and nfsmount to replace the old
-  support for IP autoconfig, mount a filesystem over NFS, and continue
-  system boot using that filesystem as root.
-
-kinit is built as a single statically linked binary to save space.
-
-Eventually, several more chunks of kernel functionality will hopefully
-move to early userspace:
-
-- Almost all of init/do_mounts* (the beginning of this is already in
-  place)
-- ACPI table parsing
-- Insert unwieldy subsystem that doesn't really need to be in kernel
-  space here
-
-If kinit doesn't meet your current needs and you've got bytes to burn,
-the klibc distribution includes a small Bourne-compatible shell (ash)
-and a number of other utilities, so you can replace kinit and build
-custom initramfs images that meet your needs exactly.
-
-For questions and help, you can sign up for the early userspace
-mailing list at http://www.zytor.com/mailman/listinfo/klibc
-
-How does it work?
-=================
-
-The kernel has currently 3 ways to mount the root filesystem:
-
-a) all required device and filesystem drivers compiled into the kernel, no
-   initrd.  init/main.c:init() will call prepare_namespace() to mount the
-   final root filesystem, based on the root= option and optional init= to run
-   some other init binary than listed at the end of init/main.c:init().
-
-b) some device and filesystem drivers built as modules and stored in an
-   initrd.  The initrd must contain a binary '/linuxrc' which is supposed to
-   load these driver modules.  It is also possible to mount the final root
-   filesystem via linuxrc and use the pivot_root syscall.  The initrd is
-   mounted and executed via prepare_namespace().
-
-c) using initramfs.  The call to prepare_namespace() must be skipped.
-   This means that a binary must do all the work.  Said binary can be stored
-   into initramfs either via modifying usr/gen_init_cpio.c or via the new
-   initrd format, an cpio archive.  It must be called "/init".  This binary
-   is responsible to do all the things prepare_namespace() would do.
-
-   To maintain backwards compatibility, the /init binary will only run if it
-   comes via an initramfs cpio archive.  If this is not the case,
-   init/main.c:init() will run prepare_namespace() to mount the final root
-   and exec one of the predefined init binaries.
-
-Bryan O'Sullivan <bos@serpentine.com>
diff --git a/Documentation/early-userspace/index.rst b/Documentation/early-userspace/index.rst
deleted file mode 100644
index 2b8eb6132058..000000000000
--- a/Documentation/early-userspace/index.rst
+++ /dev/null
@@ -1,18 +0,0 @@
-:orphan:
-
-===============
-Early Userspace
-===============
-
-.. toctree::
-    :maxdepth: 1
-
-    early_userspace_support
-    buffer-format
-
-.. only::  subproject and html
-
-   Indices
-   =======
-
-   * :ref:`genindex`
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt
index 4862d3d77e27..ae4332464560 100644
--- a/Documentation/filesystems/nfs/nfsroot.txt
+++ b/Documentation/filesystems/nfs/nfsroot.txt
@@ -239,7 +239,7 @@ rdinit=<executable file>
   A description of the process of mounting the root file system can be
   found in:
 
-    Documentation/early-userspace/early_userspace_support.rst
+    Documentation/driver-api/early-userspace/early_userspace_support.rst
 
 
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
index fa985909dbca..97d42ccaa92d 100644
--- a/Documentation/filesystems/ramfs-rootfs-initramfs.txt
+++ b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
@@ -105,7 +105,7 @@ All this differs from the old initrd in several ways:
   - The old initrd file was a gzipped filesystem image (in some file format,
     such as ext2, that needed a driver built into the kernel), while the new
     initramfs archive is a gzipped cpio archive (like tar only simpler,
-    see cpio(1) and Documentation/early-userspace/buffer-format.rst).  The
+    see cpio(1) and Documentation/driver-api/early-userspace/buffer-format.rst).  The
     kernel's cpio extraction code is not only extremely small, it's also
     __init text and data that can be discarded during the boot process.
 
@@ -159,7 +159,7 @@ One advantage of the configuration file is that root access is not required to
 set permissions or create device nodes in the new archive.  (Note that those
 two example "file" entries expect to find files named "init.sh" and "busybox" in
 a directory called "initramfs", under the linux-2.6.* directory.  See
-Documentation/early-userspace/early_userspace_support.rst for more details.)
+Documentation/driver-api/early-userspace/early_userspace_support.rst for more details.)
 
 The kernel does not depend on external cpio tools.  If you specify a
 directory instead of a configuration file, the kernel's build infrastructure
-- 
cgit 


From 570432470275c3da15b85362bc1461945b9c1919 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 22 Apr 2019 16:48:00 -0300
Subject: docs: admin-guide: move sysctl directory to it

The stuff under sysctl describes /sys interface from userspace
point of view. So, add it to the admin-guide and remove the
:orphan: from its index file.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/admin-guide/index.rst             |    1 +
 Documentation/admin-guide/kernel-parameters.txt |    2 +-
 Documentation/admin-guide/mm/index.rst          |    2 +-
 Documentation/admin-guide/mm/ksm.rst            |    2 +-
 Documentation/admin-guide/sysctl/abi.rst        |   67 ++
 Documentation/admin-guide/sysctl/fs.rst         |  384 ++++++++
 Documentation/admin-guide/sysctl/index.rst      |   98 ++
 Documentation/admin-guide/sysctl/kernel.rst     | 1177 +++++++++++++++++++++++
 Documentation/admin-guide/sysctl/net.rst        |  461 +++++++++
 Documentation/admin-guide/sysctl/sunrpc.rst     |   25 +
 Documentation/admin-guide/sysctl/user.rst       |   78 ++
 Documentation/admin-guide/sysctl/vm.rst         |  964 +++++++++++++++++++
 Documentation/core-api/printk-formats.rst       |    2 +-
 Documentation/filesystems/proc.txt              |    2 +-
 Documentation/networking/ip-sysctl.txt          |    2 +-
 Documentation/sysctl/abi.rst                    |   67 --
 Documentation/sysctl/fs.rst                     |  384 --------
 Documentation/sysctl/index.rst                  |  100 --
 Documentation/sysctl/kernel.rst                 | 1177 -----------------------
 Documentation/sysctl/net.rst                    |  461 ---------
 Documentation/sysctl/sunrpc.rst                 |   25 -
 Documentation/sysctl/user.rst                   |   78 --
 Documentation/sysctl/vm.rst                     |  964 -------------------
 Documentation/vm/unevictable-lru.rst            |    2 +-
 24 files changed, 3262 insertions(+), 3263 deletions(-)
 create mode 100644 Documentation/admin-guide/sysctl/abi.rst
 create mode 100644 Documentation/admin-guide/sysctl/fs.rst
 create mode 100644 Documentation/admin-guide/sysctl/index.rst
 create mode 100644 Documentation/admin-guide/sysctl/kernel.rst
 create mode 100644 Documentation/admin-guide/sysctl/net.rst
 create mode 100644 Documentation/admin-guide/sysctl/sunrpc.rst
 create mode 100644 Documentation/admin-guide/sysctl/user.rst
 create mode 100644 Documentation/admin-guide/sysctl/vm.rst
 delete mode 100644 Documentation/sysctl/abi.rst
 delete mode 100644 Documentation/sysctl/fs.rst
 delete mode 100644 Documentation/sysctl/index.rst
 delete mode 100644 Documentation/sysctl/kernel.rst
 delete mode 100644 Documentation/sysctl/net.rst
 delete mode 100644 Documentation/sysctl/sunrpc.rst
 delete mode 100644 Documentation/sysctl/user.rst
 delete mode 100644 Documentation/sysctl/vm.rst

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 64e97a969857..5c6ae1ccee1a 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -16,6 +16,7 @@ etc.
    README
    kernel-parameters
    devices
+   sysctl/index
 
 This section describes CPU vulnerabilities and their mitigations.
 
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e8e28cac32a3..b323f5d4366a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3144,7 +3144,7 @@
 	numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
 			'node', 'default' can be specified
 			This can be set from sysctl after boot.
-			See Documentation/sysctl/vm.rst for details.
+			See Documentation/admin-guide/sysctl/vm.rst for details.
 
 	ohci1394_dma=early	[HW] enable debugging via the ohci1394 driver.
 			See Documentation/debugging-via-ohci1394.txt for more
diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst
index f5e92f33f96e..5f61a6c429e0 100644
--- a/Documentation/admin-guide/mm/index.rst
+++ b/Documentation/admin-guide/mm/index.rst
@@ -11,7 +11,7 @@ processes address space and many other cool things.
 Linux memory management is a complex system with many configurable
 settings. Most of these settings are available via ``/proc``
 filesystem and can be quired and adjusted using ``sysctl``. These APIs
-are described in Documentation/sysctl/vm.rst and in `man 5 proc`_.
+are described in Documentation/admin-guide/sysctl/vm.rst and in `man 5 proc`_.
 
 .. _man 5 proc: http://man7.org/linux/man-pages/man5/proc.5.html
 
diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst
index 7b2b8767c0b4..874eb0c77d34 100644
--- a/Documentation/admin-guide/mm/ksm.rst
+++ b/Documentation/admin-guide/mm/ksm.rst
@@ -59,7 +59,7 @@ MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
 
 If a region of memory must be split into at least one new MADV_MERGEABLE
 or MADV_UNMERGEABLE region, the madvise may return ENOMEM if the process
-will exceed ``vm.max_map_count`` (see Documentation/sysctl/vm.rst).
+will exceed ``vm.max_map_count`` (see Documentation/admin-guide/sysctl/vm.rst).
 
 Like other madvise calls, they are intended for use on mapped areas of
 the user address space: they will report ENOMEM if the specified range
diff --git a/Documentation/admin-guide/sysctl/abi.rst b/Documentation/admin-guide/sysctl/abi.rst
new file mode 100644
index 000000000000..599bcde7f0b7
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/abi.rst
@@ -0,0 +1,67 @@
+================================
+Documentation for /proc/sys/abi/
+================================
+
+kernel version 2.6.0.test2
+
+Copyright (c) 2003,  Fabian Frederick <ffrederick@users.sourceforge.net>
+
+For general info: index.rst.
+
+------------------------------------------------------------------------------
+
+This path is binary emulation relevant aka personality types aka abi.
+When a process is executed, it's linked to an exec_domain whose
+personality is defined using values available from /proc/sys/abi.
+You can find further details about abi in include/linux/personality.h.
+
+Here are the files featuring in 2.6 kernel:
+
+- defhandler_coff
+- defhandler_elf
+- defhandler_lcall7
+- defhandler_libcso
+- fake_utsname
+- trace
+
+defhandler_coff
+---------------
+
+defined value:
+	PER_SCOSVR3::
+
+		0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS | SHORT_INODE
+
+defhandler_elf
+--------------
+
+defined value:
+	PER_LINUX::
+
+		0
+
+defhandler_lcall7
+-----------------
+
+defined value :
+	PER_SVR4::
+
+		0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
+
+defhandler_libsco
+-----------------
+
+defined value:
+	PER_SVR4::
+
+		0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
+
+fake_utsname
+------------
+
+Unused
+
+trace
+-----
+
+Unused
diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst
new file mode 100644
index 000000000000..2a45119e3331
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/fs.rst
@@ -0,0 +1,384 @@
+===============================
+Documentation for /proc/sys/fs/
+===============================
+
+kernel version 2.2.10
+
+Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
+
+Copyright (c) 2009,        Shen Feng<shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in intro.rst.
+
+------------------------------------------------------------------------------
+
+This file contains documentation for the sysctl files in
+/proc/sys/fs/ and is valid for Linux kernel version 2.2.
+
+The files in this directory can be used to tune and monitor
+miscellaneous and general things in the operation of the Linux
+kernel. Since some of the files _can_ be used to screw up your
+system, it is advisable to read both documentation and source
+before actually making adjustments.
+
+1. /proc/sys/fs
+===============
+
+Currently, these files are in /proc/sys/fs:
+
+- aio-max-nr
+- aio-nr
+- dentry-state
+- dquot-max
+- dquot-nr
+- file-max
+- file-nr
+- inode-max
+- inode-nr
+- inode-state
+- nr_open
+- overflowuid
+- overflowgid
+- pipe-user-pages-hard
+- pipe-user-pages-soft
+- protected_fifos
+- protected_hardlinks
+- protected_regular
+- protected_symlinks
+- suid_dumpable
+- super-max
+- super-nr
+
+
+aio-nr & aio-max-nr
+-------------------
+
+aio-nr is the running total of the number of events specified on the
+io_setup system call for all currently active aio contexts.  If aio-nr
+reaches aio-max-nr then io_setup will fail with EAGAIN.  Note that
+raising aio-max-nr does not result in the pre-allocation or re-sizing
+of any kernel data structures.
+
+
+dentry-state
+------------
+
+From linux/include/linux/dcache.h::
+
+  struct dentry_stat_t dentry_stat {
+        int nr_dentry;
+        int nr_unused;
+        int age_limit;         /* age in seconds */
+        int want_pages;        /* pages requested by system */
+        int nr_negative;       /* # of unused negative dentries */
+        int dummy;             /* Reserved for future use */
+  };
+
+Dentries are dynamically allocated and deallocated.
+
+nr_dentry shows the total number of dentries allocated (active
++ unused). nr_unused shows the number of dentries that are not
+actively used, but are saved in the LRU list for future reuse.
+
+Age_limit is the age in seconds after which dcache entries
+can be reclaimed when memory is short and want_pages is
+nonzero when shrink_dcache_pages() has been called and the
+dcache isn't pruned yet.
+
+nr_negative shows the number of unused dentries that are also
+negative dentries which do not map to any files. Instead,
+they help speeding up rejection of non-existing files provided
+by the users.
+
+
+dquot-max & dquot-nr
+--------------------
+
+The file dquot-max shows the maximum number of cached disk
+quota entries.
+
+The file dquot-nr shows the number of allocated disk quota
+entries and the number of free disk quota entries.
+
+If the number of free cached disk quotas is very low and
+you have some awesome number of simultaneous system users,
+you might want to raise the limit.
+
+
+file-max & file-nr
+------------------
+
+The value in file-max denotes the maximum number of file-
+handles that the Linux kernel will allocate. When you get lots
+of error messages about running out of file handles, you might
+want to increase this limit.
+
+Historically,the kernel was able to allocate file handles
+dynamically, but not to free them again. The three values in
+file-nr denote the number of allocated file handles, the number
+of allocated but unused file handles, and the maximum number of
+file handles. Linux 2.6 always reports 0 as the number of free
+file handles -- this is not an error, it just means that the
+number of allocated file handles exactly matches the number of
+used file handles.
+
+Attempts to allocate more file descriptors than file-max are
+reported with printk, look for "VFS: file-max limit <number>
+reached".
+
+
+nr_open
+-------
+
+This denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
+
+inode-max, inode-nr & inode-state
+---------------------------------
+
+As with file handles, the kernel allocates the inode structures
+dynamically, but can't free them yet.
+
+The value in inode-max denotes the maximum number of inode
+handlers. This value should be 3-4 times larger than the value
+in file-max, since stdin, stdout and network sockets also
+need an inode struct to handle them. When you regularly run
+out of inodes, you need to increase this value.
+
+The file inode-nr contains the first two items from
+inode-state, so we'll skip to that file...
+
+Inode-state contains three actual numbers and four dummies.
+The actual numbers are, in order of appearance, nr_inodes,
+nr_free_inodes and preshrink.
+
+Nr_inodes stands for the number of inodes the system has
+allocated, this can be slightly more than inode-max because
+Linux allocates them one pageful at a time.
+
+Nr_free_inodes represents the number of free inodes (?) and
+preshrink is nonzero when the nr_inodes > inode-max and the
+system needs to prune the inode list instead of allocating
+more.
+
+
+overflowgid & overflowuid
+-------------------------
+
+Some filesystems only support 16-bit UIDs and GIDs, although in Linux
+UIDs and GIDs are 32 bits. When one of these filesystems is mounted
+with writes enabled, any UID or GID that would exceed 65535 is translated
+to a fixed value before being written to disk.
+
+These sysctls allow you to change the value of the fixed UID and GID.
+The default is 65534.
+
+
+pipe-user-pages-hard
+--------------------
+
+Maximum total number of pages a non-privileged user may allocate for pipes.
+Once this limit is reached, no new pipes may be allocated until usage goes
+below the limit again. When set to 0, no limit is applied, which is the default
+setting.
+
+
+pipe-user-pages-soft
+--------------------
+
+Maximum total number of pages a non-privileged user may allocate for pipes
+before the pipe size gets limited to a single page. Once this limit is reached,
+new pipes will be limited to a single page in size for this user in order to
+limit total memory usage, and trying to increase them using fcntl() will be
+denied until usage goes below the limit again. The default value allows to
+allocate up to 1024 pipes at their default size. When set to 0, no limit is
+applied.
+
+
+protected_fifos
+---------------
+
+The intent of this protection is to avoid unintentional writes to
+an attacker-controlled FIFO, where a program expected to create a regular
+file.
+
+When set to "0", writing to FIFOs is unrestricted.
+
+When set to "1" don't allow O_CREAT open on FIFOs that we don't own
+in world writable sticky directories, unless they are owned by the
+owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+This protection is based on the restrictions in Openwall.
+
+
+protected_hardlinks
+--------------------
+
+A long-standing class of security issues is the hardlink-based
+time-of-check-time-of-use race, most commonly seen in world-writable
+directories like /tmp. The common method of exploitation of this flaw
+is to cross privilege boundaries when following a given hardlink (i.e. a
+root process follows a hardlink created by another user). Additionally,
+on systems without separated partitions, this stops unauthorized users
+from "pinning" vulnerable setuid/setgid files against being upgraded by
+the administrator, or linking to special files.
+
+When set to "0", hardlink creation behavior is unrestricted.
+
+When set to "1" hardlinks cannot be created by users if they do not
+already own the source file, or do not have read/write access to it.
+
+This protection is based on the restrictions in Openwall and grsecurity.
+
+
+protected_regular
+-----------------
+
+This protection is similar to protected_fifos, but it
+avoids writes to an attacker-controlled regular file, where a program
+expected to create one.
+
+When set to "0", writing to regular files is unrestricted.
+
+When set to "1" don't allow O_CREAT open on regular files that we
+don't own in world writable sticky directories, unless they are
+owned by the owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+
+protected_symlinks
+------------------
+
+A long-standing class of security issues is the symlink-based
+time-of-check-time-of-use race, most commonly seen in world-writable
+directories like /tmp. The common method of exploitation of this flaw
+is to cross privilege boundaries when following a given symlink (i.e. a
+root process follows a symlink belonging to another user). For a likely
+incomplete list of hundreds of examples across the years, please see:
+http://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=/tmp
+
+When set to "0", symlink following behavior is unrestricted.
+
+When set to "1" symlinks are permitted to be followed only when outside
+a sticky world-writable directory, or when the uid of the symlink and
+follower match, or when the directory owner matches the symlink's owner.
+
+This protection is based on the restrictions in Openwall and grsecurity.
+
+
+suid_dumpable:
+--------------
+
+This value can be used to query and set the core dump mode for setuid
+or otherwise protected/tainted binaries. The modes are
+
+=   ==========  ===============================================================
+0   (default)	traditional behaviour. Any process which has changed
+		privilege levels or is execute only will not be dumped.
+1   (debug)	all processes dump core when possible. The core dump is
+		owned by the current user and no security is applied. This is
+		intended for system debugging situations only.
+		Ptrace is unchecked.
+		This is insecure as it allows regular users to examine the
+		memory contents of privileged processes.
+2   (suidsafe)	any binary which normally would not be dumped is dumped
+		anyway, but only if the "core_pattern" kernel sysctl is set to
+		either a pipe handler or a fully qualified path. (For more
+		details on this limitation, see CVE-2006-2451.) This mode is
+		appropriate when administrators are attempting to debug
+		problems in a normal environment, and either have a core dump
+		pipe handler that knows to treat privileged core dumps with
+		care, or specific directory defined for catching core dumps.
+		If a core dump happens without a pipe handler or fully
+		qualified path, a message will be emitted to syslog warning
+		about the lack of a correct setting.
+=   ==========  ===============================================================
+
+
+super-max & super-nr
+--------------------
+
+These numbers control the maximum number of superblocks, and
+thus the maximum number of mounted filesystems the kernel
+can have. You only need to increase super-max if you need to
+mount more filesystems than the current value in super-max
+allows you to.
+
+
+aio-nr & aio-max-nr
+-------------------
+
+aio-nr shows the current system-wide number of asynchronous io
+requests.  aio-max-nr allows you to change the maximum value
+aio-nr can grow to.
+
+
+mount-max
+---------
+
+This denotes the maximum number of mounts that may exist
+in a mount namespace.
+
+
+
+2. /proc/sys/fs/binfmt_misc
+===========================
+
+Documentation for the files in /proc/sys/fs/binfmt_misc is
+in Documentation/admin-guide/binfmt-misc.rst.
+
+
+3. /proc/sys/fs/mqueue - POSIX message queues filesystem
+========================================================
+
+
+The "mqueue"  filesystem provides  the necessary kernel features to enable the
+creation of a  user space  library that  implements  the  POSIX message queues
+API (as noted by the  MSG tag in the  POSIX 1003.1-2001 version  of the System
+Interfaces specification.)
+
+The "mqueue" filesystem contains values for determining/setting  the amount of
+resources used by the file system.
+
+/proc/sys/fs/mqueue/queues_max is a read/write  file for  setting/getting  the
+maximum number of message queues allowed on the system.
+
+/proc/sys/fs/mqueue/msg_max  is  a  read/write file  for  setting/getting  the
+maximum number of messages in a queue value.  In fact it is the limiting value
+for another (user) limit which is set in mq_open invocation. This attribute of
+a queue must be less or equal then msg_max.
+
+/proc/sys/fs/mqueue/msgsize_max is  a read/write  file for setting/getting the
+maximum  message size value (it is every  message queue's attribute set during
+its creation).
+
+/proc/sys/fs/mqueue/msg_default is  a read/write  file for setting/getting the
+default number of messages in a queue value if attr parameter of mq_open(2) is
+NULL. If it exceed msg_max, the default value is initialized msg_max.
+
+/proc/sys/fs/mqueue/msgsize_default is a read/write file for setting/getting
+the default message size value if attr parameter of mq_open(2) is NULL. If it
+exceed msgsize_max, the default value is initialized msgsize_max.
+
+4. /proc/sys/fs/epoll - Configuration options for the epoll interface
+=====================================================================
+
+This directory contains configuration options for the epoll(7) interface.
+
+max_user_watches
+----------------
+
+Every epoll file descriptor can store a number of files to be monitored
+for event readiness. Each one of these monitored files constitutes a "watch".
+This configuration option sets the maximum number of "watches" that are
+allowed for each user.
+Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes
+on a 64bit one.
+The current default value for  max_user_watches  is the 1/32 of the available
+low memory, divided for the "watch" cost in bytes.
diff --git a/Documentation/admin-guide/sysctl/index.rst b/Documentation/admin-guide/sysctl/index.rst
new file mode 100644
index 000000000000..03346f98c7b9
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/index.rst
@@ -0,0 +1,98 @@
+===========================
+Documentation for /proc/sys
+===========================
+
+Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
+
+------------------------------------------------------------------------------
+
+'Why', I hear you ask, 'would anyone even _want_ documentation
+for them sysctl files? If anybody really needs it, it's all in
+the source...'
+
+Well, this documentation is written because some people either
+don't know they need to tweak something, or because they don't
+have the time or knowledge to read the source code.
+
+Furthermore, the programmers who built sysctl have built it to
+be actually used, not just for the fun of programming it :-)
+
+------------------------------------------------------------------------------
+
+Legal blurb:
+
+As usual, there are two main things to consider:
+
+1. you get what you pay for
+2. it's free
+
+The consequences are that I won't guarantee the correctness of
+this document, and if you come to me complaining about how you
+screwed up your system because of wrong documentation, I won't
+feel sorry for you. I might even laugh at you...
+
+But of course, if you _do_ manage to screw up your system using
+only the sysctl options used in this file, I'd like to hear of
+it. Not only to have a great laugh, but also to make sure that
+you're the last RTFMing person to screw up.
+
+In short, e-mail your suggestions, corrections and / or horror
+stories to: <riel@nl.linux.org>
+
+Rik van Riel.
+
+--------------------------------------------------------------
+
+Introduction
+============
+
+Sysctl is a means of configuring certain aspects of the kernel
+at run-time, and the /proc/sys/ directory is there so that you
+don't even need special tools to do it!
+In fact, there are only four things needed to use these config
+facilities:
+
+- a running Linux system
+- root access
+- common sense (this is especially hard to come by these days)
+- knowledge of what all those values mean
+
+As a quick 'ls /proc/sys' will show, the directory consists of
+several (arch-dependent?) subdirs. Each subdir is mainly about
+one part of the kernel, so you can do configuration on a piece
+by piece basis, or just some 'thematic frobbing'.
+
+This documentation is about:
+
+=============== ===============================================================
+abi/		execution domains & personalities
+debug/		<empty>
+dev/		device specific information (eg dev/cdrom/info)
+fs/		specific filesystems
+		filehandle, inode, dentry and quota tuning
+		binfmt_misc <Documentation/admin-guide/binfmt-misc.rst>
+kernel/		global kernel info / tuning
+		miscellaneous stuff
+net/		networking stuff, for documentation look in:
+		<Documentation/networking/>
+proc/		<empty>
+sunrpc/		SUN Remote Procedure Call (NFS)
+vm/		memory management tuning
+		buffer and cache management
+user/		Per user per user namespace limits
+=============== ===============================================================
+
+These are the subdirs I have on my system. There might be more
+or other subdirs in another setup. If you see another dir, I'd
+really like to hear about it :-)
+
+.. toctree::
+   :maxdepth: 1
+
+   abi
+   fs
+   kernel
+   net
+   sunrpc
+   user
+   vm
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
new file mode 100644
index 000000000000..a0c1d4ce403a
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -0,0 +1,1177 @@
+===================================
+Documentation for /proc/sys/kernel/
+===================================
+
+kernel version 2.2.10
+
+Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
+
+Copyright (c) 2009,        Shen Feng<shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
+
+This file contains documentation for the sysctl files in
+/proc/sys/kernel/ and is valid for Linux kernel version 2.2.
+
+The files in this directory can be used to tune and monitor
+miscellaneous and general things in the operation of the Linux
+kernel. Since some of the files _can_ be used to screw up your
+system, it is advisable to read both documentation and source
+before actually making adjustments.
+
+Currently, these files might (depending on your configuration)
+show up in /proc/sys/kernel:
+
+- acct
+- acpi_video_flags
+- auto_msgmni
+- bootloader_type	     [ X86 only ]
+- bootloader_version	     [ X86 only ]
+- cap_last_cap
+- core_pattern
+- core_pipe_limit
+- core_uses_pid
+- ctrl-alt-del
+- dmesg_restrict
+- domainname
+- hostname
+- hotplug
+- hardlockup_all_cpu_backtrace
+- hardlockup_panic
+- hung_task_panic
+- hung_task_check_count
+- hung_task_timeout_secs
+- hung_task_check_interval_secs
+- hung_task_warnings
+- hyperv_record_panic_msg
+- kexec_load_disabled
+- kptr_restrict
+- l2cr                        [ PPC only ]
+- modprobe                    ==> Documentation/debugging-modules.txt
+- modules_disabled
+- msg_next_id		      [ sysv ipc ]
+- msgmax
+- msgmnb
+- msgmni
+- nmi_watchdog
+- osrelease
+- ostype
+- overflowgid
+- overflowuid
+- panic
+- panic_on_oops
+- panic_on_stackoverflow
+- panic_on_unrecovered_nmi
+- panic_on_warn
+- panic_print
+- panic_on_rcu_stall
+- perf_cpu_time_max_percent
+- perf_event_paranoid
+- perf_event_max_stack
+- perf_event_mlock_kb
+- perf_event_max_contexts_per_stack
+- pid_max
+- powersave-nap               [ PPC only ]
+- printk
+- printk_delay
+- printk_ratelimit
+- printk_ratelimit_burst
+- pty                         ==> Documentation/filesystems/devpts.txt
+- randomize_va_space
+- real-root-dev               ==> Documentation/admin-guide/initrd.rst
+- reboot-cmd                  [ SPARC only ]
+- rtsig-max
+- rtsig-nr
+- sched_energy_aware
+- seccomp/                    ==> Documentation/userspace-api/seccomp_filter.rst
+- sem
+- sem_next_id		      [ sysv ipc ]
+- sg-big-buff                 [ generic SCSI device (sg) ]
+- shm_next_id		      [ sysv ipc ]
+- shm_rmid_forced
+- shmall
+- shmmax                      [ sysv ipc ]
+- shmmni
+- softlockup_all_cpu_backtrace
+- soft_watchdog
+- stack_erasing
+- stop-a                      [ SPARC only ]
+- sysrq                       ==> Documentation/admin-guide/sysrq.rst
+- sysctl_writes_strict
+- tainted                     ==> Documentation/admin-guide/tainted-kernels.rst
+- threads-max
+- unknown_nmi_panic
+- watchdog
+- watchdog_thresh
+- version
+
+
+acct:
+=====
+
+highwater lowwater frequency
+
+If BSD-style process accounting is enabled these values control
+its behaviour. If free space on filesystem where the log lives
+goes below <lowwater>% accounting suspends. If free space gets
+above <highwater>% accounting resumes. <Frequency> determines
+how often do we check the amount of free space (value is in
+seconds). Default:
+4 2 30
+That is, suspend accounting if there left <= 2% free; resume it
+if we got >=4%; consider information about amount of free space
+valid for 30 seconds.
+
+
+acpi_video_flags:
+=================
+
+flags
+
+See Doc*/kernel/power/video.txt, it allows mode of video boot to be
+set during run time.
+
+
+auto_msgmni:
+============
+
+This variable has no effect and may be removed in future kernel
+releases. Reading it always returns 0.
+Up to Linux 3.17, it enabled/disabled automatic recomputing of msgmni
+upon memory add/remove or upon ipc namespace creation/removal.
+Echoing "1" into this file enabled msgmni automatic recomputing.
+Echoing "0" turned it off. auto_msgmni default value was 1.
+
+
+bootloader_type:
+================
+
+x86 bootloader identification
+
+This gives the bootloader type number as indicated by the bootloader,
+shifted left by 4, and OR'd with the low four bits of the bootloader
+version.  The reason for this encoding is that this used to match the
+type_of_loader field in the kernel header; the encoding is kept for
+backwards compatibility.  That is, if the full bootloader type number
+is 0x15 and the full version number is 0x234, this file will contain
+the value 340 = 0x154.
+
+See the type_of_loader and ext_loader_type fields in
+Documentation/x86/boot.rst for additional information.
+
+
+bootloader_version:
+===================
+
+x86 bootloader version
+
+The complete bootloader version number.  In the example above, this
+file will contain the value 564 = 0x234.
+
+See the type_of_loader and ext_loader_ver fields in
+Documentation/x86/boot.rst for additional information.
+
+
+cap_last_cap:
+=============
+
+Highest valid capability of the running kernel.  Exports
+CAP_LAST_CAP from the kernel.
+
+
+core_pattern:
+=============
+
+core_pattern is used to specify a core dumpfile pattern name.
+
+* max length 127 characters; default value is "core"
+* core_pattern is used as a pattern template for the output filename;
+  certain string patterns (beginning with '%') are substituted with
+  their actual values.
+* backward compatibility with core_uses_pid:
+
+	If core_pattern does not include "%p" (default does not)
+	and core_uses_pid is set, then .PID will be appended to
+	the filename.
+
+* corename format specifiers::
+
+	%<NUL>	'%' is dropped
+	%%	output one '%'
+	%p	pid
+	%P	global pid (init PID namespace)
+	%i	tid
+	%I	global tid (init PID namespace)
+	%u	uid (in initial user namespace)
+	%g	gid (in initial user namespace)
+	%d	dump mode, matches PR_SET_DUMPABLE and
+		/proc/sys/fs/suid_dumpable
+	%s	signal number
+	%t	UNIX time of dump
+	%h	hostname
+	%e	executable filename (may be shortened)
+	%E	executable path
+	%<OTHER> both are dropped
+
+* If the first character of the pattern is a '|', the kernel will treat
+  the rest of the pattern as a command to run.  The core dump will be
+  written to the standard input of that program instead of to a file.
+
+
+core_pipe_limit:
+================
+
+This sysctl is only applicable when core_pattern is configured to pipe
+core files to a user space helper (when the first character of
+core_pattern is a '|', see above).  When collecting cores via a pipe
+to an application, it is occasionally useful for the collecting
+application to gather data about the crashing process from its
+/proc/pid directory.  In order to do this safely, the kernel must wait
+for the collecting process to exit, so as not to remove the crashing
+processes proc files prematurely.  This in turn creates the
+possibility that a misbehaving userspace collecting process can block
+the reaping of a crashed process simply by never exiting.  This sysctl
+defends against that.  It defines how many concurrent crashing
+processes may be piped to user space applications in parallel.  If
+this value is exceeded, then those crashing processes above that value
+are noted via the kernel log and their cores are skipped.  0 is a
+special value, indicating that unlimited processes may be captured in
+parallel, but that no waiting will take place (i.e. the collecting
+process is not guaranteed access to /proc/<crashing pid>/).  This
+value defaults to 0.
+
+
+core_uses_pid:
+==============
+
+The default coredump filename is "core".  By setting
+core_uses_pid to 1, the coredump filename becomes core.PID.
+If core_pattern does not include "%p" (default does not)
+and core_uses_pid is set, then .PID will be appended to
+the filename.
+
+
+ctrl-alt-del:
+=============
+
+When the value in this file is 0, ctrl-alt-del is trapped and
+sent to the init(1) program to handle a graceful restart.
+When, however, the value is > 0, Linux's reaction to a Vulcan
+Nerve Pinch (tm) will be an immediate reboot, without even
+syncing its dirty buffers.
+
+Note:
+  when a program (like dosemu) has the keyboard in 'raw'
+  mode, the ctrl-alt-del is intercepted by the program before it
+  ever reaches the kernel tty layer, and it's up to the program
+  to decide what to do with it.
+
+
+dmesg_restrict:
+===============
+
+This toggle indicates whether unprivileged users are prevented
+from using dmesg(8) to view messages from the kernel's log buffer.
+When dmesg_restrict is set to (0) there are no restrictions. When
+dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use
+dmesg(8).
+
+The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the
+default value of dmesg_restrict.
+
+
+domainname & hostname:
+======================
+
+These files can be used to set the NIS/YP domainname and the
+hostname of your box in exactly the same way as the commands
+domainname and hostname, i.e.::
+
+	# echo "darkstar" > /proc/sys/kernel/hostname
+	# echo "mydomain" > /proc/sys/kernel/domainname
+
+has the same effect as::
+
+	# hostname "darkstar"
+	# domainname "mydomain"
+
+Note, however, that the classic darkstar.frop.org has the
+hostname "darkstar" and DNS (Internet Domain Name Server)
+domainname "frop.org", not to be confused with the NIS (Network
+Information Service) or YP (Yellow Pages) domainname. These two
+domain names are in general different. For a detailed discussion
+see the hostname(1) man page.
+
+
+hardlockup_all_cpu_backtrace:
+=============================
+
+This value controls the hard lockup detector behavior when a hard
+lockup condition is detected as to whether or not to gather further
+debug information. If enabled, arch-specific all-CPU stack dumping
+will be initiated.
+
+0: do nothing. This is the default behavior.
+
+1: on detection capture more debug information.
+
+
+hardlockup_panic:
+=================
+
+This parameter can be used to control whether the kernel panics
+when a hard lockup is detected.
+
+   0 - don't panic on hard lockup
+   1 - panic on hard lockup
+
+See Documentation/lockup-watchdogs.txt for more information.  This can
+also be set using the nmi_watchdog kernel parameter.
+
+
+hotplug:
+========
+
+Path for the hotplug policy agent.
+Default value is "/sbin/hotplug".
+
+
+hung_task_panic:
+================
+
+Controls the kernel's behavior when a hung task is detected.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+0: continue operation. This is the default behavior.
+
+1: panic immediately.
+
+
+hung_task_check_count:
+======================
+
+The upper bound on the number of tasks that are checked.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+
+hung_task_timeout_secs:
+=======================
+
+When a task in D state did not get scheduled
+for more than this value report a warning.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+0: means infinite timeout - no checking done.
+
+Possible values to set are in range {0..LONG_MAX/HZ}.
+
+
+hung_task_check_interval_secs:
+==============================
+
+Hung task check interval. If hung task checking is enabled
+(see hung_task_timeout_secs), the check is done every
+hung_task_check_interval_secs seconds.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+0 (default): means use hung_task_timeout_secs as checking interval.
+Possible values to set are in range {0..LONG_MAX/HZ}.
+
+
+hung_task_warnings:
+===================
+
+The maximum number of warnings to report. During a check interval
+if a hung task is detected, this value is decreased by 1.
+When this value reaches 0, no more warnings will be reported.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+-1: report an infinite number of warnings.
+
+
+hyperv_record_panic_msg:
+========================
+
+Controls whether the panic kmsg data should be reported to Hyper-V.
+
+0: do not report panic kmsg data.
+
+1: report the panic kmsg data. This is the default behavior.
+
+
+kexec_load_disabled:
+====================
+
+A toggle indicating if the kexec_load syscall has been disabled. This
+value defaults to 0 (false: kexec_load enabled), but can be set to 1
+(true: kexec_load disabled). Once true, kexec can no longer be used, and
+the toggle cannot be set back to false. This allows a kexec image to be
+loaded before disabling the syscall, allowing a system to set up (and
+later use) an image without it being altered. Generally used together
+with the "modules_disabled" sysctl.
+
+
+kptr_restrict:
+==============
+
+This toggle indicates whether restrictions are placed on
+exposing kernel addresses via /proc and other interfaces.
+
+When kptr_restrict is set to 0 (the default) the address is hashed before
+printing. (This is the equivalent to %p.)
+
+When kptr_restrict is set to (1), kernel pointers printed using the %pK
+format specifier will be replaced with 0's unless the user has CAP_SYSLOG
+and effective user and group ids are equal to the real ids. This is
+because %pK checks are done at read() time rather than open() time, so
+if permissions are elevated between the open() and the read() (e.g via
+a setuid binary) then %pK will not leak kernel pointers to unprivileged
+users. Note, this is a temporary solution only. The correct long-term
+solution is to do the permission checks at open() time. Consider removing
+world read permissions from files that use %pK, and using dmesg_restrict
+to protect against uses of %pK in dmesg(8) if leaking kernel pointer
+values to unprivileged users is a concern.
+
+When kptr_restrict is set to (2), kernel pointers printed using
+%pK will be replaced with 0's regardless of privileges.
+
+
+l2cr: (PPC only)
+================
+
+This flag controls the L2 cache of G3 processor boards. If
+0, the cache is disabled. Enabled if nonzero.
+
+
+modules_disabled:
+=================
+
+A toggle value indicating if modules are allowed to be loaded
+in an otherwise modular kernel.  This toggle defaults to off
+(0), but can be set true (1).  Once true, modules can be
+neither loaded nor unloaded, and the toggle cannot be set back
+to false.  Generally used with the "kexec_load_disabled" toggle.
+
+
+msg_next_id, sem_next_id, and shm_next_id:
+==========================================
+
+These three toggles allows to specify desired id for next allocated IPC
+object: message, semaphore or shared memory respectively.
+
+By default they are equal to -1, which means generic allocation logic.
+Possible values to set are in range {0..INT_MAX}.
+
+Notes:
+  1) kernel doesn't guarantee, that new object will have desired id. So,
+     it's up to userspace, how to handle an object with "wrong" id.
+  2) Toggle with non-default value will be set back to -1 by kernel after
+     successful IPC object allocation. If an IPC object allocation syscall
+     fails, it is undefined if the value remains unmodified or is reset to -1.
+
+
+nmi_watchdog:
+=============
+
+This parameter can be used to control the NMI watchdog
+(i.e. the hard lockup detector) on x86 systems.
+
+0 - disable the hard lockup detector
+
+1 - enable the hard lockup detector
+
+The hard lockup detector monitors each CPU for its ability to respond to
+timer interrupts. The mechanism utilizes CPU performance counter registers
+that are programmed to generate Non-Maskable Interrupts (NMIs) periodically
+while a CPU is busy. Hence, the alternative name 'NMI watchdog'.
+
+The NMI watchdog is disabled by default if the kernel is running as a guest
+in a KVM virtual machine. This default can be overridden by adding::
+
+   nmi_watchdog=1
+
+to the guest kernel command line (see Documentation/admin-guide/kernel-parameters.rst).
+
+
+numa_balancing:
+===============
+
+Enables/disables automatic page fault based NUMA memory
+balancing. Memory is moved automatically to nodes
+that access it often.
+
+Enables/disables automatic NUMA memory balancing. On NUMA machines, there
+is a performance penalty if remote memory is accessed by a CPU. When this
+feature is enabled the kernel samples what task thread is accessing memory
+by periodically unmapping pages and later trapping a page fault. At the
+time of the page fault, it is determined if the data being accessed should
+be migrated to a local memory node.
+
+The unmapping of pages and trapping faults incur additional overhead that
+ideally is offset by improved memory locality but there is no universal
+guarantee. If the target workload is already bound to NUMA nodes then this
+feature should be disabled. Otherwise, if the system overhead from the
+feature is too high then the rate the kernel samples for NUMA hinting
+faults may be controlled by the numa_balancing_scan_period_min_ms,
+numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
+numa_balancing_scan_size_mb, and numa_balancing_settle_count sysctls.
+
+numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
+===============================================================================================================================
+
+
+Automatic NUMA balancing scans tasks address space and unmaps pages to
+detect if pages are properly placed or if the data should be migrated to a
+memory node local to where the task is running.  Every "scan delay" the task
+scans the next "scan size" number of pages in its address space. When the
+end of the address space is reached the scanner restarts from the beginning.
+
+In combination, the "scan delay" and "scan size" determine the scan rate.
+When "scan delay" decreases, the scan rate increases.  The scan delay and
+hence the scan rate of every task is adaptive and depends on historical
+behaviour. If pages are properly placed then the scan delay increases,
+otherwise the scan delay decreases.  The "scan size" is not adaptive but
+the higher the "scan size", the higher the scan rate.
+
+Higher scan rates incur higher system overhead as page faults must be
+trapped and potentially data must be migrated. However, the higher the scan
+rate, the more quickly a tasks memory is migrated to a local node if the
+workload pattern changes and minimises performance impact due to remote
+memory accesses. These sysctls control the thresholds for scan delays and
+the number of pages scanned.
+
+numa_balancing_scan_period_min_ms is the minimum time in milliseconds to
+scan a tasks virtual memory. It effectively controls the maximum scanning
+rate for each task.
+
+numa_balancing_scan_delay_ms is the starting "scan delay" used for a task
+when it initially forks.
+
+numa_balancing_scan_period_max_ms is the maximum time in milliseconds to
+scan a tasks virtual memory. It effectively controls the minimum scanning
+rate for each task.
+
+numa_balancing_scan_size_mb is how many megabytes worth of pages are
+scanned for a given scan.
+
+
+osrelease, ostype & version:
+============================
+
+::
+
+  # cat osrelease
+  2.1.88
+  # cat ostype
+  Linux
+  # cat version
+  #5 Wed Feb 25 21:49:24 MET 1998
+
+The files osrelease and ostype should be clear enough. Version
+needs a little more clarification however. The '#5' means that
+this is the fifth kernel built from this source base and the
+date behind it indicates the time the kernel was built.
+The only way to tune these values is to rebuild the kernel :-)
+
+
+overflowgid & overflowuid:
+==========================
+
+if your architecture did not always support 32-bit UIDs (i.e. arm,
+i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to
+applications that use the old 16-bit UID/GID system calls, if the
+actual UID or GID would exceed 65535.
+
+These sysctls allow you to change the value of the fixed UID and GID.
+The default is 65534.
+
+
+panic:
+======
+
+The value in this file represents the number of seconds the kernel
+waits before rebooting on a panic. When you use the software watchdog,
+the recommended setting is 60.
+
+
+panic_on_io_nmi:
+================
+
+Controls the kernel's behavior when a CPU receives an NMI caused by
+an IO error.
+
+0: try to continue operation (default)
+
+1: panic immediately. The IO error triggered an NMI. This indicates a
+   serious system condition which could result in IO data corruption.
+   Rather than continuing, panicking might be a better choice. Some
+   servers issue this sort of NMI when the dump button is pushed,
+   and you can use this option to take a crash dump.
+
+
+panic_on_oops:
+==============
+
+Controls the kernel's behaviour when an oops or BUG is encountered.
+
+0: try to continue operation
+
+1: panic immediately.  If the `panic` sysctl is also non-zero then the
+   machine will be rebooted.
+
+
+panic_on_stackoverflow:
+=======================
+
+Controls the kernel's behavior when detecting the overflows of
+kernel, IRQ and exception stacks except a user stack.
+This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
+
+0: try to continue operation.
+
+1: panic immediately.
+
+
+panic_on_unrecovered_nmi:
+=========================
+
+The default Linux behaviour on an NMI of either memory or unknown is
+to continue operation. For many environments such as scientific
+computing it is preferable that the box is taken out and the error
+dealt with than an uncorrected parity/ECC error get propagated.
+
+A small number of systems do generate NMI's for bizarre random reasons
+such as power management so the default is off. That sysctl works like
+the existing panic controls already in that directory.
+
+
+panic_on_warn:
+==============
+
+Calls panic() in the WARN() path when set to 1.  This is useful to avoid
+a kernel rebuild when attempting to kdump at the location of a WARN().
+
+0: only WARN(), default behaviour.
+
+1: call panic() after printing out WARN() location.
+
+
+panic_print:
+============
+
+Bitmask for printing system info when panic happens. User can chose
+combination of the following bits:
+
+=====  ========================================
+bit 0  print all tasks info
+bit 1  print system memory info
+bit 2  print timer info
+bit 3  print locks info if CONFIG_LOCKDEP is on
+bit 4  print ftrace buffer
+=====  ========================================
+
+So for example to print tasks and memory info on panic, user can::
+
+  echo 3 > /proc/sys/kernel/panic_print
+
+
+panic_on_rcu_stall:
+===================
+
+When set to 1, calls panic() after RCU stall detection messages. This
+is useful to define the root cause of RCU stalls using a vmcore.
+
+0: do not panic() when RCU stall takes place, default behavior.
+
+1: panic() after printing RCU stall messages.
+
+
+perf_cpu_time_max_percent:
+==========================
+
+Hints to the kernel how much CPU time it should be allowed to
+use to handle perf sampling events.  If the perf subsystem
+is informed that its samples are exceeding this limit, it
+will drop its sampling frequency to attempt to reduce its CPU
+usage.
+
+Some perf sampling happens in NMIs.  If these samples
+unexpectedly take too long to execute, the NMIs can become
+stacked up next to each other so much that nothing else is
+allowed to execute.
+
+0:
+   disable the mechanism.  Do not monitor or correct perf's
+   sampling rate no matter how CPU time it takes.
+
+1-100:
+   attempt to throttle perf's sample rate to this
+   percentage of CPU.  Note: the kernel calculates an
+   "expected" length of each sample event.  100 here means
+   100% of that expected length.  Even if this is set to
+   100, you may still see sample throttling if this
+   length is exceeded.  Set to 0 if you truly do not care
+   how much CPU is consumed.
+
+
+perf_event_paranoid:
+====================
+
+Controls use of the performance events system by unprivileged
+users (without CAP_SYS_ADMIN).  The default value is 2.
+
+===  ==================================================================
+ -1  Allow use of (almost) all events by all users
+
+     Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
+
+>=0  Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN
+
+     Disallow raw tracepoint access by users without CAP_SYS_ADMIN
+
+>=1  Disallow CPU event access by users without CAP_SYS_ADMIN
+
+>=2  Disallow kernel profiling by users without CAP_SYS_ADMIN
+===  ==================================================================
+
+
+perf_event_max_stack:
+=====================
+
+Controls maximum number of stack frames to copy for (attr.sample_type &
+PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
+'perf record -g' or 'perf trace --call-graph fp'.
+
+This can only be done when no events are in use that have callchains
+enabled, otherwise writing to this file will return -EBUSY.
+
+The default value is 127.
+
+
+perf_event_mlock_kb:
+====================
+
+Control size of per-cpu ring buffer not counted agains mlock limit.
+
+The default value is 512 + 1 page
+
+
+perf_event_max_contexts_per_stack:
+==================================
+
+Controls maximum number of stack frame context entries for
+(attr.sample_type & PERF_SAMPLE_CALLCHAIN) configured events, for
+instance, when using 'perf record -g' or 'perf trace --call-graph fp'.
+
+This can only be done when no events are in use that have callchains
+enabled, otherwise writing to this file will return -EBUSY.
+
+The default value is 8.
+
+
+pid_max:
+========
+
+PID allocation wrap value.  When the kernel's next PID value
+reaches this value, it wraps back to a minimum PID value.
+PIDs of value pid_max or larger are not allocated.
+
+
+ns_last_pid:
+============
+
+The last pid allocated in the current (the one task using this sysctl
+lives in) pid namespace. When selecting a pid for a next task on fork
+kernel tries to allocate a number starting from this one.
+
+
+powersave-nap: (PPC only)
+=========================
+
+If set, Linux-PPC will use the 'nap' mode of powersaving,
+otherwise the 'doze' mode will be used.
+
+==============================================================
+
+printk:
+=======
+
+The four values in printk denote: console_loglevel,
+default_message_loglevel, minimum_console_loglevel and
+default_console_loglevel respectively.
+
+These values influence printk() behavior when printing or
+logging error messages. See 'man 2 syslog' for more info on
+the different loglevels.
+
+- console_loglevel:
+	messages with a higher priority than
+	this will be printed to the console
+- default_message_loglevel:
+	messages without an explicit priority
+	will be printed with this priority
+- minimum_console_loglevel:
+	minimum (highest) value to which
+	console_loglevel can be set
+- default_console_loglevel:
+	default value for console_loglevel
+
+
+printk_delay:
+=============
+
+Delay each printk message in printk_delay milliseconds
+
+Value from 0 - 10000 is allowed.
+
+
+printk_ratelimit:
+=================
+
+Some warning messages are rate limited. printk_ratelimit specifies
+the minimum length of time between these messages (in jiffies), by
+default we allow one every 5 seconds.
+
+A value of 0 will disable rate limiting.
+
+
+printk_ratelimit_burst:
+=======================
+
+While long term we enforce one message per printk_ratelimit
+seconds, we do allow a burst of messages to pass through.
+printk_ratelimit_burst specifies the number of messages we can
+send before ratelimiting kicks in.
+
+
+printk_devkmsg:
+===============
+
+Control the logging to /dev/kmsg from userspace:
+
+ratelimit:
+	default, ratelimited
+
+on: unlimited logging to /dev/kmsg from userspace
+
+off: logging to /dev/kmsg disabled
+
+The kernel command line parameter printk.devkmsg= overrides this and is
+a one-time setting until next reboot: once set, it cannot be changed by
+this sysctl interface anymore.
+
+
+randomize_va_space:
+===================
+
+This option can be used to select the type of process address
+space randomization that is used in the system, for architectures
+that support this feature.
+
+==  ===========================================================================
+0   Turn the process address space randomization off.  This is the
+    default for architectures that do not support this feature anyways,
+    and kernels that are booted with the "norandmaps" parameter.
+
+1   Make the addresses of mmap base, stack and VDSO page randomized.
+    This, among other things, implies that shared libraries will be
+    loaded to random addresses.  Also for PIE-linked binaries, the
+    location of code start is randomized.  This is the default if the
+    CONFIG_COMPAT_BRK option is enabled.
+
+2   Additionally enable heap randomization.  This is the default if
+    CONFIG_COMPAT_BRK is disabled.
+
+    There are a few legacy applications out there (such as some ancient
+    versions of libc.so.5 from 1996) that assume that brk area starts
+    just after the end of the code+bss.  These applications break when
+    start of the brk area is randomized.  There are however no known
+    non-legacy applications that would be broken this way, so for most
+    systems it is safe to choose full randomization.
+
+    Systems with ancient and/or broken binaries should be configured
+    with CONFIG_COMPAT_BRK enabled, which excludes the heap from process
+    address space randomization.
+==  ===========================================================================
+
+
+reboot-cmd: (Sparc only)
+========================
+
+??? This seems to be a way to give an argument to the Sparc
+ROM/Flash boot loader. Maybe to tell it what to do after
+rebooting. ???
+
+
+rtsig-max & rtsig-nr:
+=====================
+
+The file rtsig-max can be used to tune the maximum number
+of POSIX realtime (queued) signals that can be outstanding
+in the system.
+
+rtsig-nr shows the number of RT signals currently queued.
+
+
+sched_energy_aware:
+===================
+
+Enables/disables Energy Aware Scheduling (EAS). EAS starts
+automatically on platforms where it can run (that is,
+platforms with asymmetric CPU topologies and having an Energy
+Model available). If your platform happens to meet the
+requirements for EAS but you do not want to use it, change
+this value to 0.
+
+
+sched_schedstats:
+=================
+
+Enables/disables scheduler statistics. Enabling this feature
+incurs a small amount of overhead in the scheduler but is
+useful for debugging and performance tuning.
+
+
+sg-big-buff:
+============
+
+This file shows the size of the generic SCSI (sg) buffer.
+You can't tune it just yet, but you could change it on
+compile time by editing include/scsi/sg.h and changing
+the value of SG_BIG_BUFF.
+
+There shouldn't be any reason to change this value. If
+you can come up with one, you probably know what you
+are doing anyway :)
+
+
+shmall:
+=======
+
+This parameter sets the total amount of shared memory pages that
+can be used system wide. Hence, SHMALL should always be at least
+ceil(shmmax/PAGE_SIZE).
+
+If you are not sure what the default PAGE_SIZE is on your Linux
+system, you can run the following command:
+
+	# getconf PAGE_SIZE
+
+
+shmmax:
+=======
+
+This value can be used to query and set the run time limit
+on the maximum shared memory segment size that can be created.
+Shared memory segments up to 1Gb are now supported in the
+kernel.  This value defaults to SHMMAX.
+
+
+shm_rmid_forced:
+================
+
+Linux lets you set resource limits, including how much memory one
+process can consume, via setrlimit(2).  Unfortunately, shared memory
+segments are allowed to exist without association with any process, and
+thus might not be counted against any resource limits.  If enabled,
+shared memory segments are automatically destroyed when their attach
+count becomes zero after a detach or a process termination.  It will
+also destroy segments that were created, but never attached to, on exit
+from the process.  The only use left for IPC_RMID is to immediately
+destroy an unattached segment.  Of course, this breaks the way things are
+defined, so some applications might stop working.  Note that this
+feature will do you no good unless you also configure your resource
+limits (in particular, RLIMIT_AS and RLIMIT_NPROC).  Most systems don't
+need this.
+
+Note that if you change this from 0 to 1, already created segments
+without users and with a dead originative process will be destroyed.
+
+
+sysctl_writes_strict:
+=====================
+
+Control how file position affects the behavior of updating sysctl values
+via the /proc/sys interface:
+
+  ==   ======================================================================
+  -1   Legacy per-write sysctl value handling, with no printk warnings.
+       Each write syscall must fully contain the sysctl value to be
+       written, and multiple writes on the same sysctl file descriptor
+       will rewrite the sysctl value, regardless of file position.
+   0   Same behavior as above, but warn about processes that perform writes
+       to a sysctl file descriptor when the file position is not 0.
+   1   (default) Respect file position when writing sysctl strings. Multiple
+       writes will append to the sysctl value buffer. Anything past the max
+       length of the sysctl value buffer will be ignored. Writes to numeric
+       sysctl entries must always be at file position 0 and the value must
+       be fully contained in the buffer sent in the write syscall.
+  ==   ======================================================================
+
+
+softlockup_all_cpu_backtrace:
+=============================
+
+This value controls the soft lockup detector thread's behavior
+when a soft lockup condition is detected as to whether or not
+to gather further debug information. If enabled, each cpu will
+be issued an NMI and instructed to capture stack trace.
+
+This feature is only applicable for architectures which support
+NMI.
+
+0: do nothing. This is the default behavior.
+
+1: on detection capture more debug information.
+
+
+soft_watchdog:
+==============
+
+This parameter can be used to control the soft lockup detector.
+
+   0 - disable the soft lockup detector
+
+   1 - enable the soft lockup detector
+
+The soft lockup detector monitors CPUs for threads that are hogging the CPUs
+without rescheduling voluntarily, and thus prevent the 'watchdog/N' threads
+from running. The mechanism depends on the CPUs ability to respond to timer
+interrupts which are needed for the 'watchdog/N' threads to be woken up by
+the watchdog timer function, otherwise the NMI watchdog - if enabled - can
+detect a hard lockup condition.
+
+
+stack_erasing:
+==============
+
+This parameter can be used to control kernel stack erasing at the end
+of syscalls for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK.
+
+That erasing reduces the information which kernel stack leak bugs
+can reveal and blocks some uninitialized stack variable attacks.
+The tradeoff is the performance impact: on a single CPU system kernel
+compilation sees a 1% slowdown, other systems and workloads may vary.
+
+  0: kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
+
+  1: kernel stack erasing is enabled (default), it is performed before
+     returning to the userspace at the end of syscalls.
+
+
+tainted
+=======
+
+Non-zero if the kernel has been tainted. Numeric values, which can be
+ORed together. The letters are seen in "Tainted" line of Oops reports.
+
+======  =====  ==============================================================
+     1  `(P)`  proprietary module was loaded
+     2  `(F)`  module was force loaded
+     4  `(S)`  SMP kernel oops on an officially SMP incapable processor
+     8  `(R)`  module was force unloaded
+    16  `(M)`  processor reported a Machine Check Exception (MCE)
+    32  `(B)`  bad page referenced or some unexpected page flags
+    64  `(U)`  taint requested by userspace application
+   128  `(D)`  kernel died recently, i.e. there was an OOPS or BUG
+   256  `(A)`  an ACPI table was overridden by user
+   512  `(W)`  kernel issued warning
+  1024  `(C)`  staging driver was loaded
+  2048  `(I)`  workaround for bug in platform firmware applied
+  4096  `(O)`  externally-built ("out-of-tree") module was loaded
+  8192  `(E)`  unsigned module was loaded
+ 16384  `(L)`  soft lockup occurred
+ 32768  `(K)`  kernel has been live patched
+ 65536  `(X)`  Auxiliary taint, defined and used by for distros
+131072  `(T)`  The kernel was built with the struct randomization plugin
+======  =====  ==============================================================
+
+See Documentation/admin-guide/tainted-kernels.rst for more information.
+
+
+threads-max:
+============
+
+This value controls the maximum number of threads that can be created
+using fork().
+
+During initialization the kernel sets this value such that even if the
+maximum number of threads is created, the thread structures occupy only
+a part (1/8th) of the available RAM pages.
+
+The minimum value that can be written to threads-max is 20.
+
+The maximum value that can be written to threads-max is given by the
+constant FUTEX_TID_MASK (0x3fffffff).
+
+If a value outside of this range is written to threads-max an error
+EINVAL occurs.
+
+The value written is checked against the available RAM pages. If the
+thread structures would occupy too much (more than 1/8th) of the
+available RAM pages threads-max is reduced accordingly.
+
+
+unknown_nmi_panic:
+==================
+
+The value in this file affects behavior of handling NMI. When the
+value is non-zero, unknown NMI is trapped and then panic occurs. At
+that time, kernel debugging information is displayed on console.
+
+NMI switch that most IA32 servers have fires unknown NMI up, for
+example.  If a system hangs up, try pressing the NMI switch.
+
+
+watchdog:
+=========
+
+This parameter can be used to disable or enable the soft lockup detector
+_and_ the NMI watchdog (i.e. the hard lockup detector) at the same time.
+
+   0 - disable both lockup detectors
+
+   1 - enable both lockup detectors
+
+The soft lockup detector and the NMI watchdog can also be disabled or
+enabled individually, using the soft_watchdog and nmi_watchdog parameters.
+If the watchdog parameter is read, for example by executing::
+
+   cat /proc/sys/kernel/watchdog
+
+the output of this command (0 or 1) shows the logical OR of soft_watchdog
+and nmi_watchdog.
+
+
+watchdog_cpumask:
+=================
+
+This value can be used to control on which cpus the watchdog may run.
+The default cpumask is all possible cores, but if NO_HZ_FULL is
+enabled in the kernel config, and cores are specified with the
+nohz_full= boot argument, those cores are excluded by default.
+Offline cores can be included in this mask, and if the core is later
+brought online, the watchdog will be started based on the mask value.
+
+Typically this value would only be touched in the nohz_full case
+to re-enable cores that by default were not running the watchdog,
+if a kernel lockup was suspected on those cores.
+
+The argument value is the standard cpulist format for cpumasks,
+so for example to enable the watchdog on cores 0, 2, 3, and 4 you
+might say::
+
+  echo 0,2-4 > /proc/sys/kernel/watchdog_cpumask
+
+
+watchdog_thresh:
+================
+
+This value can be used to control the frequency of hrtimer and NMI
+events and the soft and hard lockup thresholds. The default threshold
+is 10 seconds.
+
+The softlockup threshold is (2 * watchdog_thresh). Setting this
+tunable to zero will disable lockup detection altogether.
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
new file mode 100644
index 000000000000..a7d44e71019d
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -0,0 +1,461 @@
+================================
+Documentation for /proc/sys/net/
+================================
+
+Copyright
+
+Copyright (c) 1999
+
+	- Terrehon Bowden <terrehon@pacbell.net>
+	- Bodo Bauer <bb@ricochet.net>
+
+Copyright (c) 2000
+
+	- Jorge Nerin <comandante@zaralinux.com>
+
+Copyright (c) 2009
+
+	- Shen Feng <shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
+
+This file contains the documentation for the sysctl files in
+/proc/sys/net
+
+The interface  to  the  networking  parts  of  the  kernel  is  located  in
+/proc/sys/net. The following table shows all possible subdirectories.  You may
+see only some of them, depending on your kernel's configuration.
+
+
+Table : Subdirectories in /proc/sys/net
+
+ ========= =================== = ========== ==================
+ Directory Content               Directory  Content
+ ========= =================== = ========== ==================
+ core      General parameter     appletalk  Appletalk protocol
+ unix      Unix domain sockets   netrom     NET/ROM
+ 802       E802 protocol         ax25       AX25
+ ethernet  Ethernet protocol     rose       X.25 PLP layer
+ ipv4      IP version 4          x25        X.25 protocol
+ ipx       IPX                   token-ring IBM token ring
+ bridge    Bridging              decnet     DEC net
+ ipv6      IP version 6          tipc       TIPC
+ ========= =================== = ========== ==================
+
+1. /proc/sys/net/core - Network core options
+============================================
+
+bpf_jit_enable
+--------------
+
+This enables the BPF Just in Time (JIT) compiler. BPF is a flexible
+and efficient infrastructure allowing to execute bytecode at various
+hook points. It is used in a number of Linux kernel subsystems such
+as networking (e.g. XDP, tc), tracing (e.g. kprobes, uprobes, tracepoints)
+and security (e.g. seccomp). LLVM has a BPF back end that can compile
+restricted C into a sequence of BPF instructions. After program load
+through bpf(2) and passing a verifier in the kernel, a JIT will then
+translate these BPF proglets into native CPU instructions. There are
+two flavors of JITs, the newer eBPF JIT currently supported on:
+
+  - x86_64
+  - x86_32
+  - arm64
+  - arm32
+  - ppc64
+  - sparc64
+  - mips64
+  - s390x
+  - riscv
+
+And the older cBPF JIT supported on the following archs:
+
+  - mips
+  - ppc
+  - sparc
+
+eBPF JITs are a superset of cBPF JITs, meaning the kernel will
+migrate cBPF instructions into eBPF instructions and then JIT
+compile them transparently. Older cBPF JITs can only translate
+tcpdump filters, seccomp rules, etc, but not mentioned eBPF
+programs loaded through bpf(2).
+
+Values:
+
+	- 0 - disable the JIT (default value)
+	- 1 - enable the JIT
+	- 2 - enable the JIT and ask the compiler to emit traces on kernel log.
+
+bpf_jit_harden
+--------------
+
+This enables hardening for the BPF JIT compiler. Supported are eBPF
+JIT backends. Enabling hardening trades off performance, but can
+mitigate JIT spraying.
+
+Values:
+
+	- 0 - disable JIT hardening (default value)
+	- 1 - enable JIT hardening for unprivileged users only
+	- 2 - enable JIT hardening for all users
+
+bpf_jit_kallsyms
+----------------
+
+When BPF JIT compiler is enabled, then compiled images are unknown
+addresses to the kernel, meaning they neither show up in traces nor
+in /proc/kallsyms. This enables export of these addresses, which can
+be used for debugging/tracing. If bpf_jit_harden is enabled, this
+feature is disabled.
+
+Values :
+
+	- 0 - disable JIT kallsyms export (default value)
+	- 1 - enable JIT kallsyms export for privileged users only
+
+bpf_jit_limit
+-------------
+
+This enforces a global limit for memory allocations to the BPF JIT
+compiler in order to reject unprivileged JIT requests once it has
+been surpassed. bpf_jit_limit contains the value of the global limit
+in bytes.
+
+dev_weight
+----------
+
+The maximum number of packets that kernel can handle on a NAPI interrupt,
+it's a Per-CPU variable. For drivers that support LRO or GRO_HW, a hardware
+aggregated packet is counted as one packet in this context.
+
+Default: 64
+
+dev_weight_rx_bias
+------------------
+
+RPS (e.g. RFS, aRFS) processing is competing with the registered NAPI poll function
+of the driver for the per softirq cycle netdev_budget. This parameter influences
+the proportion of the configured netdev_budget that is spent on RPS based packet
+processing during RX softirq cycles. It is further meant for making current
+dev_weight adaptable for asymmetric CPU needs on RX/TX side of the network stack.
+(see dev_weight_tx_bias) It is effective on a per CPU basis. Determination is based
+on dev_weight and is calculated multiplicative (dev_weight * dev_weight_rx_bias).
+
+Default: 1
+
+dev_weight_tx_bias
+------------------
+
+Scales the maximum number of packets that can be processed during a TX softirq cycle.
+Effective on a per CPU basis. Allows scaling of current dev_weight for asymmetric
+net stack processing needs. Be careful to avoid making TX softirq processing a CPU hog.
+
+Calculation is based on dev_weight (dev_weight * dev_weight_tx_bias).
+
+Default: 1
+
+default_qdisc
+-------------
+
+The default queuing discipline to use for network devices. This allows
+overriding the default of pfifo_fast with an alternative. Since the default
+queuing discipline is created without additional parameters so is best suited
+to queuing disciplines that work well without configuration like stochastic
+fair queue (sfq), CoDel (codel) or fair queue CoDel (fq_codel). Don't use
+queuing disciplines like Hierarchical Token Bucket or Deficit Round Robin
+which require setting up classes and bandwidths. Note that physical multiqueue
+interfaces still use mq as root qdisc, which in turn uses this default for its
+leaves. Virtual devices (like e.g. lo or veth) ignore this setting and instead
+default to noqueue.
+
+Default: pfifo_fast
+
+busy_read
+---------
+
+Low latency busy poll timeout for socket reads. (needs CONFIG_NET_RX_BUSY_POLL)
+Approximate time in us to busy loop waiting for packets on the device queue.
+This sets the default value of the SO_BUSY_POLL socket option.
+Can be set or overridden per socket by setting socket option SO_BUSY_POLL,
+which is the preferred method of enabling. If you need to enable the feature
+globally via sysctl, a value of 50 is recommended.
+
+Will increase power usage.
+
+Default: 0 (off)
+
+busy_poll
+----------------
+Low latency busy poll timeout for poll and select. (needs CONFIG_NET_RX_BUSY_POLL)
+Approximate time in us to busy loop waiting for events.
+Recommended value depends on the number of sockets you poll on.
+For several sockets 50, for several hundreds 100.
+For more than that you probably want to use epoll.
+Note that only sockets with SO_BUSY_POLL set will be busy polled,
+so you want to either selectively set SO_BUSY_POLL on those sockets or set
+sysctl.net.busy_read globally.
+
+Will increase power usage.
+
+Default: 0 (off)
+
+rmem_default
+------------
+
+The default setting of the socket receive buffer in bytes.
+
+rmem_max
+--------
+
+The maximum receive socket buffer size in bytes.
+
+tstamp_allow_data
+-----------------
+Allow processes to receive tx timestamps looped together with the original
+packet contents. If disabled, transmit timestamp requests from unprivileged
+processes are dropped unless socket option SOF_TIMESTAMPING_OPT_TSONLY is set.
+
+Default: 1 (on)
+
+
+wmem_default
+------------
+
+The default setting (in bytes) of the socket send buffer.
+
+wmem_max
+--------
+
+The maximum send socket buffer size in bytes.
+
+message_burst and message_cost
+------------------------------
+
+These parameters  are used to limit the warning messages written to the kernel
+log from  the  networking  code.  They  enforce  a  rate  limit  to  make  a
+denial-of-service attack  impossible. A higher message_cost factor, results in
+fewer messages that will be written. Message_burst controls when messages will
+be dropped.  The  default  settings  limit  warning messages to one every five
+seconds.
+
+warnings
+--------
+
+This sysctl is now unused.
+
+This was used to control console messages from the networking stack that
+occur because of problems on the network like duplicate address or bad
+checksums.
+
+These messages are now emitted at KERN_DEBUG and can generally be enabled
+and controlled by the dynamic_debug facility.
+
+netdev_budget
+-------------
+
+Maximum number of packets taken from all interfaces in one polling cycle (NAPI
+poll). In one polling cycle interfaces which are registered to polling are
+probed in a round-robin manner. Also, a polling cycle may not exceed
+netdev_budget_usecs microseconds, even if netdev_budget has not been
+exhausted.
+
+netdev_budget_usecs
+---------------------
+
+Maximum number of microseconds in one NAPI polling cycle. Polling
+will exit when either netdev_budget_usecs have elapsed during the
+poll cycle or the number of packets processed reaches netdev_budget.
+
+netdev_max_backlog
+------------------
+
+Maximum number  of  packets,  queued  on  the  INPUT  side, when the interface
+receives packets faster than kernel can process them.
+
+netdev_rss_key
+--------------
+
+RSS (Receive Side Scaling) enabled drivers use a 40 bytes host key that is
+randomly generated.
+Some user space might need to gather its content even if drivers do not
+provide ethtool -x support yet.
+
+::
+
+  myhost:~# cat /proc/sys/net/core/netdev_rss_key
+  84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8: ... (52 bytes total)
+
+File contains nul bytes if no driver ever called netdev_rss_key_fill() function.
+
+Note:
+  /proc/sys/net/core/netdev_rss_key contains 52 bytes of key,
+  but most drivers only use 40 bytes of it.
+
+::
+
+  myhost:~# ethtool -x eth0
+  RX flow hash indirection table for eth0 with 8 RX ring(s):
+      0:    0     1     2     3     4     5     6     7
+  RSS hash key:
+  84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8:43:e3:c9:0c:fd:17:55:c2:3a:4d:69:ed:f1:42:89
+
+netdev_tstamp_prequeue
+----------------------
+
+If set to 0, RX packet timestamps can be sampled after RPS processing, when
+the target CPU processes packets. It might give some delay on timestamps, but
+permit to distribute the load on several cpus.
+
+If set to 1 (default), timestamps are sampled as soon as possible, before
+queueing.
+
+optmem_max
+----------
+
+Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
+of struct cmsghdr structures with appended data.
+
+fb_tunnels_only_for_init_net
+----------------------------
+
+Controls if fallback tunnels (like tunl0, gre0, gretap0, erspan0,
+sit0, ip6tnl0, ip6gre0) are automatically created when a new
+network namespace is created, if corresponding tunnel is present
+in initial network namespace.
+If set to 1, these devices are not automatically created, and
+user space is responsible for creating them if needed.
+
+Default : 0  (for compatibility reasons)
+
+devconf_inherit_init_net
+------------------------
+
+Controls if a new network namespace should inherit all current
+settings under /proc/sys/net/{ipv4,ipv6}/conf/{all,default}/. By
+default, we keep the current behavior: for IPv4 we inherit all current
+settings from init_net and for IPv6 we reset all settings to default.
+
+If set to 1, both IPv4 and IPv6 settings are forced to inherit from
+current ones in init_net. If set to 2, both IPv4 and IPv6 settings are
+forced to reset to their default values.
+
+Default : 0  (for compatibility reasons)
+
+2. /proc/sys/net/unix - Parameters for Unix domain sockets
+----------------------------------------------------------
+
+There is only one file in this directory.
+unix_dgram_qlen limits the max number of datagrams queued in Unix domain
+socket's buffer. It will not take effect unless PF_UNIX flag is specified.
+
+
+3. /proc/sys/net/ipv4 - IPV4 settings
+-------------------------------------
+Please see: Documentation/networking/ip-sysctl.txt and ipvs-sysctl.txt for
+descriptions of these entries.
+
+
+4. Appletalk
+------------
+
+The /proc/sys/net/appletalk  directory  holds the Appletalk configuration data
+when Appletalk is loaded. The configurable parameters are:
+
+aarp-expiry-time
+----------------
+
+The amount  of  time  we keep an ARP entry before expiring it. Used to age out
+old hosts.
+
+aarp-resolve-time
+-----------------
+
+The amount of time we will spend trying to resolve an Appletalk address.
+
+aarp-retransmit-limit
+---------------------
+
+The number of times we will retransmit a query before giving up.
+
+aarp-tick-time
+--------------
+
+Controls the rate at which expires are checked.
+
+The directory  /proc/net/appletalk  holds the list of active Appletalk sockets
+on a machine.
+
+The fields  indicate  the DDP type, the local address (in network:node format)
+the remote  address,  the  size of the transmit pending queue, the size of the
+received queue  (bytes waiting for applications to read) the state and the uid
+owning the socket.
+
+/proc/net/atalk_iface lists  all  the  interfaces  configured for appletalk.It
+shows the  name  of the interface, its Appletalk address, the network range on
+that address  (or  network number for phase 1 networks), and the status of the
+interface.
+
+/proc/net/atalk_route lists  each  known  network  route.  It lists the target
+(network) that the route leads to, the router (may be directly connected), the
+route flags, and the device the route is using.
+
+
+5. IPX
+------
+
+The IPX protocol has no tunable values in proc/sys/net.
+
+The IPX  protocol  does,  however,  provide  proc/net/ipx. This lists each IPX
+socket giving  the  local  and  remote  addresses  in  Novell  format (that is
+network:node:port). In  accordance  with  the  strange  Novell  tradition,
+everything but the port is in hex. Not_Connected is displayed for sockets that
+are not  tied to a specific remote address. The Tx and Rx queue sizes indicate
+the number  of  bytes  pending  for  transmission  and  reception.  The  state
+indicates the  state  the  socket  is  in and the uid is the owning uid of the
+socket.
+
+The /proc/net/ipx_interface  file lists all IPX interfaces. For each interface
+it gives  the network number, the node number, and indicates if the network is
+the primary  network.  It  also  indicates  which  device  it  is bound to (or
+Internal for  internal  networks)  and  the  Frame  Type if appropriate. Linux
+supports 802.3,  802.2,  802.2  SNAP  and DIX (Blue Book) ethernet framing for
+IPX.
+
+The /proc/net/ipx_route  table  holds  a list of IPX routes. For each route it
+gives the  destination  network, the router node (or Directly) and the network
+address of the router (or Connected) for internal networks.
+
+6. TIPC
+-------
+
+tipc_rmem
+---------
+
+The TIPC protocol now has a tunable for the receive memory, similar to the
+tcp_rmem - i.e. a vector of 3 INTEGERs: (min, default, max)
+
+::
+
+    # cat /proc/sys/net/tipc/tipc_rmem
+    4252725 34021800        68043600
+    #
+
+The max value is set to CONN_OVERLOAD_LIMIT, and the default and min values
+are scaled (shifted) versions of that same value.  Note that the min value
+is not at this point in time used in any meaningful way, but the triplet is
+preserved in order to be consistent with things like tcp_rmem.
+
+named_timeout
+-------------
+
+TIPC name table updates are distributed asynchronously in a cluster, without
+any form of transaction handling. This means that different race scenarios are
+possible. One such is that a name withdrawal sent out by one node and received
+by another node may arrive after a second, overlapping name publication already
+has been accepted from a third node, although the conflicting updates
+originally may have been issued in the correct sequential order.
+If named_timeout is nonzero, failed topology updates will be placed on a defer
+queue until another event arrives that clears the error, or until the timeout
+expires. Value is in milliseconds.
diff --git a/Documentation/admin-guide/sysctl/sunrpc.rst b/Documentation/admin-guide/sysctl/sunrpc.rst
new file mode 100644
index 000000000000..09780a682afd
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/sunrpc.rst
@@ -0,0 +1,25 @@
+===================================
+Documentation for /proc/sys/sunrpc/
+===================================
+
+kernel version 2.2.10
+
+Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
+
+This file contains the documentation for the sysctl files in
+/proc/sys/sunrpc and is valid for Linux kernel version 2.2.
+
+The files in this directory can be used to (re)set the debug
+flags of the SUN Remote Procedure Call (RPC) subsystem in
+the Linux kernel. This stuff is used for NFS, KNFSD and
+maybe a few other things as well.
+
+The files in there are used to control the debugging flags:
+rpc_debug, nfs_debug, nfsd_debug and nlm_debug.
+
+These flags are for kernel hackers only. You should read the
+source code in net/sunrpc/ for more information.
diff --git a/Documentation/admin-guide/sysctl/user.rst b/Documentation/admin-guide/sysctl/user.rst
new file mode 100644
index 000000000000..650eaa03f15e
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/user.rst
@@ -0,0 +1,78 @@
+=================================
+Documentation for /proc/sys/user/
+=================================
+
+kernel version 4.9.0
+
+Copyright (c) 2016		Eric Biederman <ebiederm@xmission.com>
+
+------------------------------------------------------------------------------
+
+This file contains the documentation for the sysctl files in
+/proc/sys/user.
+
+The files in this directory can be used to override the default
+limits on the number of namespaces and other objects that have
+per user per user namespace limits.
+
+The primary purpose of these limits is to stop programs that
+malfunction and attempt to create a ridiculous number of objects,
+before the malfunction becomes a system wide problem.  It is the
+intention that the defaults of these limits are set high enough that
+no program in normal operation should run into these limits.
+
+The creation of per user per user namespace objects are charged to
+the user in the user namespace who created the object and
+verified to be below the per user limit in that user namespace.
+
+The creation of objects is also charged to all of the users
+who created user namespaces the creation of the object happens
+in (user namespaces can be nested) and verified to be below the per user
+limits in the user namespaces of those users.
+
+This recursive counting of created objects ensures that creating a
+user namespace does not allow a user to escape their current limits.
+
+Currently, these files are in /proc/sys/user:
+
+max_cgroup_namespaces
+=====================
+
+  The maximum number of cgroup namespaces that any user in the current
+  user namespace may create.
+
+max_ipc_namespaces
+==================
+
+  The maximum number of ipc namespaces that any user in the current
+  user namespace may create.
+
+max_mnt_namespaces
+==================
+
+  The maximum number of mount namespaces that any user in the current
+  user namespace may create.
+
+max_net_namespaces
+==================
+
+  The maximum number of network namespaces that any user in the
+  current user namespace may create.
+
+max_pid_namespaces
+==================
+
+  The maximum number of pid namespaces that any user in the current
+  user namespace may create.
+
+max_user_namespaces
+===================
+
+  The maximum number of user namespaces that any user in the current
+  user namespace may create.
+
+max_uts_namespaces
+==================
+
+  The maximum number of user namespaces that any user in the current
+  user namespace may create.
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
new file mode 100644
index 000000000000..5aceb5cd5ce7
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -0,0 +1,964 @@
+===============================
+Documentation for /proc/sys/vm/
+===============================
+
+kernel version 2.6.29
+
+Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
+
+Copyright (c) 2008         Peter W. Morreale <pmorreale@novell.com>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
+
+This file contains the documentation for the sysctl files in
+/proc/sys/vm and is valid for Linux kernel version 2.6.29.
+
+The files in this directory can be used to tune the operation
+of the virtual memory (VM) subsystem of the Linux kernel and
+the writeout of dirty data to disk.
+
+Default values and initialization routines for most of these
+files can be found in mm/swap.c.
+
+Currently, these files are in /proc/sys/vm:
+
+- admin_reserve_kbytes
+- block_dump
+- compact_memory
+- compact_unevictable_allowed
+- dirty_background_bytes
+- dirty_background_ratio
+- dirty_bytes
+- dirty_expire_centisecs
+- dirty_ratio
+- dirtytime_expire_seconds
+- dirty_writeback_centisecs
+- drop_caches
+- extfrag_threshold
+- hugetlb_shm_group
+- laptop_mode
+- legacy_va_layout
+- lowmem_reserve_ratio
+- max_map_count
+- memory_failure_early_kill
+- memory_failure_recovery
+- min_free_kbytes
+- min_slab_ratio
+- min_unmapped_ratio
+- mmap_min_addr
+- mmap_rnd_bits
+- mmap_rnd_compat_bits
+- nr_hugepages
+- nr_hugepages_mempolicy
+- nr_overcommit_hugepages
+- nr_trim_pages         (only if CONFIG_MMU=n)
+- numa_zonelist_order
+- oom_dump_tasks
+- oom_kill_allocating_task
+- overcommit_kbytes
+- overcommit_memory
+- overcommit_ratio
+- page-cluster
+- panic_on_oom
+- percpu_pagelist_fraction
+- stat_interval
+- stat_refresh
+- numa_stat
+- swappiness
+- unprivileged_userfaultfd
+- user_reserve_kbytes
+- vfs_cache_pressure
+- watermark_boost_factor
+- watermark_scale_factor
+- zone_reclaim_mode
+
+
+admin_reserve_kbytes
+====================
+
+The amount of free memory in the system that should be reserved for users
+with the capability cap_sys_admin.
+
+admin_reserve_kbytes defaults to min(3% of free pages, 8MB)
+
+That should provide enough for the admin to log in and kill a process,
+if necessary, under the default overcommit 'guess' mode.
+
+Systems running under overcommit 'never' should increase this to account
+for the full Virtual Memory Size of programs used to recover. Otherwise,
+root may not be able to log in to recover the system.
+
+How do you calculate a minimum useful reserve?
+
+sshd or login + bash (or some other shell) + top (or ps, kill, etc.)
+
+For overcommit 'guess', we can sum resident set sizes (RSS).
+On x86_64 this is about 8MB.
+
+For overcommit 'never', we can take the max of their virtual sizes (VSZ)
+and add the sum of their RSS.
+On x86_64 this is about 128MB.
+
+Changing this takes effect whenever an application requests memory.
+
+
+block_dump
+==========
+
+block_dump enables block I/O debugging when set to a nonzero value. More
+information on block I/O debugging is in Documentation/laptops/laptop-mode.rst.
+
+
+compact_memory
+==============
+
+Available only when CONFIG_COMPACTION is set. When 1 is written to the file,
+all zones are compacted such that free memory is available in contiguous
+blocks where possible. This can be important for example in the allocation of
+huge pages although processes will also directly compact memory as required.
+
+
+compact_unevictable_allowed
+===========================
+
+Available only when CONFIG_COMPACTION is set. When set to 1, compaction is
+allowed to examine the unevictable lru (mlocked pages) for pages to compact.
+This should be used on systems where stalls for minor page faults are an
+acceptable trade for large contiguous free memory.  Set to 0 to prevent
+compaction from moving pages that are unevictable.  Default value is 1.
+
+
+dirty_background_bytes
+======================
+
+Contains the amount of dirty memory at which the background kernel
+flusher threads will start writeback.
+
+Note:
+  dirty_background_bytes is the counterpart of dirty_background_ratio. Only
+  one of them may be specified at a time. When one sysctl is written it is
+  immediately taken into account to evaluate the dirty memory limits and the
+  other appears as 0 when read.
+
+
+dirty_background_ratio
+======================
+
+Contains, as a percentage of total available memory that contains free pages
+and reclaimable pages, the number of pages at which the background kernel
+flusher threads will start writing out dirty data.
+
+The total available memory is not equal to total system memory.
+
+
+dirty_bytes
+===========
+
+Contains the amount of dirty memory at which a process generating disk writes
+will itself start writeback.
+
+Note: dirty_bytes is the counterpart of dirty_ratio. Only one of them may be
+specified at a time. When one sysctl is written it is immediately taken into
+account to evaluate the dirty memory limits and the other appears as 0 when
+read.
+
+Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any
+value lower than this limit will be ignored and the old configuration will be
+retained.
+
+
+dirty_expire_centisecs
+======================
+
+This tunable is used to define when dirty data is old enough to be eligible
+for writeout by the kernel flusher threads.  It is expressed in 100'ths
+of a second.  Data which has been dirty in-memory for longer than this
+interval will be written out next time a flusher thread wakes up.
+
+
+dirty_ratio
+===========
+
+Contains, as a percentage of total available memory that contains free pages
+and reclaimable pages, the number of pages at which a process which is
+generating disk writes will itself start writing out dirty data.
+
+The total available memory is not equal to total system memory.
+
+
+dirtytime_expire_seconds
+========================
+
+When a lazytime inode is constantly having its pages dirtied, the inode with
+an updated timestamp will never get chance to be written out.  And, if the
+only thing that has happened on the file system is a dirtytime inode caused
+by an atime update, a worker will be scheduled to make sure that inode
+eventually gets pushed out to disk.  This tunable is used to define when dirty
+inode is old enough to be eligible for writeback by the kernel flusher threads.
+And, it is also used as the interval to wakeup dirtytime_writeback thread.
+
+
+dirty_writeback_centisecs
+=========================
+
+The kernel flusher threads will periodically wake up and write `old` data
+out to disk.  This tunable expresses the interval between those wakeups, in
+100'ths of a second.
+
+Setting this to zero disables periodic writeback altogether.
+
+
+drop_caches
+===========
+
+Writing to this will cause the kernel to drop clean caches, as well as
+reclaimable slab objects like dentries and inodes.  Once dropped, their
+memory becomes free.
+
+To free pagecache::
+
+	echo 1 > /proc/sys/vm/drop_caches
+
+To free reclaimable slab objects (includes dentries and inodes)::
+
+	echo 2 > /proc/sys/vm/drop_caches
+
+To free slab objects and pagecache::
+
+	echo 3 > /proc/sys/vm/drop_caches
+
+This is a non-destructive operation and will not free any dirty objects.
+To increase the number of objects freed by this operation, the user may run
+`sync` prior to writing to /proc/sys/vm/drop_caches.  This will minimize the
+number of dirty objects on the system and create more candidates to be
+dropped.
+
+This file is not a means to control the growth of the various kernel caches
+(inodes, dentries, pagecache, etc...)  These objects are automatically
+reclaimed by the kernel when memory is needed elsewhere on the system.
+
+Use of this file can cause performance problems.  Since it discards cached
+objects, it may cost a significant amount of I/O and CPU to recreate the
+dropped objects, especially if they were under heavy use.  Because of this,
+use outside of a testing or debugging environment is not recommended.
+
+You may see informational messages in your kernel log when this file is
+used::
+
+	cat (1234): drop_caches: 3
+
+These are informational only.  They do not mean that anything is wrong
+with your system.  To disable them, echo 4 (bit 2) into drop_caches.
+
+
+extfrag_threshold
+=================
+
+This parameter affects whether the kernel will compact memory or direct
+reclaim to satisfy a high-order allocation. The extfrag/extfrag_index file in
+debugfs shows what the fragmentation index for each order is in each zone in
+the system. Values tending towards 0 imply allocations would fail due to lack
+of memory, values towards 1000 imply failures are due to fragmentation and -1
+implies that the allocation will succeed as long as watermarks are met.
+
+The kernel will not compact memory in a zone if the
+fragmentation index is <= extfrag_threshold. The default value is 500.
+
+
+highmem_is_dirtyable
+====================
+
+Available only for systems with CONFIG_HIGHMEM enabled (32b systems).
+
+This parameter controls whether the high memory is considered for dirty
+writers throttling.  This is not the case by default which means that
+only the amount of memory directly visible/usable by the kernel can
+be dirtied. As a result, on systems with a large amount of memory and
+lowmem basically depleted writers might be throttled too early and
+streaming writes can get very slow.
+
+Changing the value to non zero would allow more memory to be dirtied
+and thus allow writers to write more data which can be flushed to the
+storage more effectively. Note this also comes with a risk of pre-mature
+OOM killer because some writers (e.g. direct block device writes) can
+only use the low memory and they can fill it up with dirty data without
+any throttling.
+
+
+hugetlb_shm_group
+=================
+
+hugetlb_shm_group contains group id that is allowed to create SysV
+shared memory segment using hugetlb page.
+
+
+laptop_mode
+===========
+
+laptop_mode is a knob that controls "laptop mode". All the things that are
+controlled by this knob are discussed in Documentation/laptops/laptop-mode.rst.
+
+
+legacy_va_layout
+================
+
+If non-zero, this sysctl disables the new 32-bit mmap layout - the kernel
+will use the legacy (2.4) layout for all processes.
+
+
+lowmem_reserve_ratio
+====================
+
+For some specialised workloads on highmem machines it is dangerous for
+the kernel to allow process memory to be allocated from the "lowmem"
+zone.  This is because that memory could then be pinned via the mlock()
+system call, or by unavailability of swapspace.
+
+And on large highmem machines this lack of reclaimable lowmem memory
+can be fatal.
+
+So the Linux page allocator has a mechanism which prevents allocations
+which *could* use highmem from using too much lowmem.  This means that
+a certain amount of lowmem is defended from the possibility of being
+captured into pinned user memory.
+
+(The same argument applies to the old 16 megabyte ISA DMA region.  This
+mechanism will also defend that region from allocations which could use
+highmem or lowmem).
+
+The `lowmem_reserve_ratio` tunable determines how aggressive the kernel is
+in defending these lower zones.
+
+If you have a machine which uses highmem or ISA DMA and your
+applications are using mlock(), or if you are running with no swap then
+you probably should change the lowmem_reserve_ratio setting.
+
+The lowmem_reserve_ratio is an array. You can see them by reading this file::
+
+	% cat /proc/sys/vm/lowmem_reserve_ratio
+	256     256     32
+
+But, these values are not used directly. The kernel calculates # of protection
+pages for each zones from them. These are shown as array of protection pages
+in /proc/zoneinfo like followings. (This is an example of x86-64 box).
+Each zone has an array of protection pages like this::
+
+  Node 0, zone      DMA
+    pages free     1355
+          min      3
+          low      3
+          high     4
+	:
+	:
+      numa_other   0
+          protection: (0, 2004, 2004, 2004)
+	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    pagesets
+      cpu: 0 pcp: 0
+          :
+
+These protections are added to score to judge whether this zone should be used
+for page allocation or should be reclaimed.
+
+In this example, if normal pages (index=2) are required to this DMA zone and
+watermark[WMARK_HIGH] is used for watermark, the kernel judges this zone should
+not be used because pages_free(1355) is smaller than watermark + protection[2]
+(4 + 2004 = 2008). If this protection value is 0, this zone would be used for
+normal page requirement. If requirement is DMA zone(index=0), protection[0]
+(=0) is used.
+
+zone[i]'s protection[j] is calculated by following expression::
+
+  (i < j):
+    zone[i]->protection[j]
+    = (total sums of managed_pages from zone[i+1] to zone[j] on the node)
+      / lowmem_reserve_ratio[i];
+  (i = j):
+     (should not be protected. = 0;
+  (i > j):
+     (not necessary, but looks 0)
+
+The default values of lowmem_reserve_ratio[i] are
+
+    === ====================================
+    256 (if zone[i] means DMA or DMA32 zone)
+    32  (others)
+    === ====================================
+
+As above expression, they are reciprocal number of ratio.
+256 means 1/256. # of protection pages becomes about "0.39%" of total managed
+pages of higher zones on the node.
+
+If you would like to protect more pages, smaller values are effective.
+The minimum value is 1 (1/1 -> 100%). The value less than 1 completely
+disables protection of the pages.
+
+
+max_map_count:
+==============
+
+This file contains the maximum number of memory map areas a process
+may have. Memory map areas are used as a side-effect of calling
+malloc, directly by mmap, mprotect, and madvise, and also when loading
+shared libraries.
+
+While most applications need less than a thousand maps, certain
+programs, particularly malloc debuggers, may consume lots of them,
+e.g., up to one or two maps per allocation.
+
+The default value is 65536.
+
+
+memory_failure_early_kill:
+==========================
+
+Control how to kill processes when uncorrected memory error (typically
+a 2bit error in a memory module) is detected in the background by hardware
+that cannot be handled by the kernel. In some cases (like the page
+still having a valid copy on disk) the kernel will handle the failure
+transparently without affecting any applications. But if there is
+no other uptodate copy of the data it will kill to prevent any data
+corruptions from propagating.
+
+1: Kill all processes that have the corrupted and not reloadable page mapped
+as soon as the corruption is detected.  Note this is not supported
+for a few types of pages, like kernel internally allocated data or
+the swap cache, but works for the majority of user pages.
+
+0: Only unmap the corrupted page from all processes and only kill a process
+who tries to access it.
+
+The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
+handle this if they want to.
+
+This is only active on architectures/platforms with advanced machine
+check handling and depends on the hardware capabilities.
+
+Applications can override this setting individually with the PR_MCE_KILL prctl
+
+
+memory_failure_recovery
+=======================
+
+Enable memory failure recovery (when supported by the platform)
+
+1: Attempt recovery.
+
+0: Always panic on a memory failure.
+
+
+min_free_kbytes
+===============
+
+This is used to force the Linux VM to keep a minimum number
+of kilobytes free.  The VM uses this number to compute a
+watermark[WMARK_MIN] value for each lowmem zone in the system.
+Each lowmem zone gets a number of reserved free pages based
+proportionally on its size.
+
+Some minimal amount of memory is needed to satisfy PF_MEMALLOC
+allocations; if you set this to lower than 1024KB, your system will
+become subtly broken, and prone to deadlock under high loads.
+
+Setting this too high will OOM your machine instantly.
+
+
+min_slab_ratio
+==============
+
+This is available only on NUMA kernels.
+
+A percentage of the total pages in each zone.  On Zone reclaim
+(fallback from the local zone occurs) slabs will be reclaimed if more
+than this percentage of pages in a zone are reclaimable slab pages.
+This insures that the slab growth stays under control even in NUMA
+systems that rarely perform global reclaim.
+
+The default is 5 percent.
+
+Note that slab reclaim is triggered in a per zone / node fashion.
+The process of reclaiming slab memory is currently not node specific
+and may not be fast.
+
+
+min_unmapped_ratio
+==================
+
+This is available only on NUMA kernels.
+
+This is a percentage of the total pages in each zone. Zone reclaim will
+only occur if more than this percentage of pages are in a state that
+zone_reclaim_mode allows to be reclaimed.
+
+If zone_reclaim_mode has the value 4 OR'd, then the percentage is compared
+against all file-backed unmapped pages including swapcache pages and tmpfs
+files. Otherwise, only unmapped pages backed by normal files but not tmpfs
+files and similar are considered.
+
+The default is 1 percent.
+
+
+mmap_min_addr
+=============
+
+This file indicates the amount of address space  which a user process will
+be restricted from mmapping.  Since kernel null dereference bugs could
+accidentally operate based on the information in the first couple of pages
+of memory userspace processes should not be allowed to write to them.  By
+default this value is set to 0 and no protections will be enforced by the
+security module.  Setting this value to something like 64k will allow the
+vast majority of applications to work correctly and provide defense in depth
+against future potential kernel bugs.
+
+
+mmap_rnd_bits
+=============
+
+This value can be used to select the number of bits to use to
+determine the random offset to the base address of vma regions
+resulting from mmap allocations on architectures which support
+tuning address space randomization.  This value will be bounded
+by the architecture's minimum and maximum supported values.
+
+This value can be changed after boot using the
+/proc/sys/vm/mmap_rnd_bits tunable
+
+
+mmap_rnd_compat_bits
+====================
+
+This value can be used to select the number of bits to use to
+determine the random offset to the base address of vma regions
+resulting from mmap allocations for applications run in
+compatibility mode on architectures which support tuning address
+space randomization.  This value will be bounded by the
+architecture's minimum and maximum supported values.
+
+This value can be changed after boot using the
+/proc/sys/vm/mmap_rnd_compat_bits tunable
+
+
+nr_hugepages
+============
+
+Change the minimum size of the hugepage pool.
+
+See Documentation/admin-guide/mm/hugetlbpage.rst
+
+
+nr_hugepages_mempolicy
+======================
+
+Change the size of the hugepage pool at run-time on a specific
+set of NUMA nodes.
+
+See Documentation/admin-guide/mm/hugetlbpage.rst
+
+
+nr_overcommit_hugepages
+=======================
+
+Change the maximum size of the hugepage pool. The maximum is
+nr_hugepages + nr_overcommit_hugepages.
+
+See Documentation/admin-guide/mm/hugetlbpage.rst
+
+
+nr_trim_pages
+=============
+
+This is available only on NOMMU kernels.
+
+This value adjusts the excess page trimming behaviour of power-of-2 aligned
+NOMMU mmap allocations.
+
+A value of 0 disables trimming of allocations entirely, while a value of 1
+trims excess pages aggressively. Any value >= 1 acts as the watermark where
+trimming of allocations is initiated.
+
+The default value is 1.
+
+See Documentation/nommu-mmap.txt for more information.
+
+
+numa_zonelist_order
+===================
+
+This sysctl is only for NUMA and it is deprecated. Anything but
+Node order will fail!
+
+'where the memory is allocated from' is controlled by zonelists.
+
+(This documentation ignores ZONE_HIGHMEM/ZONE_DMA32 for simple explanation.
+you may be able to read ZONE_DMA as ZONE_DMA32...)
+
+In non-NUMA case, a zonelist for GFP_KERNEL is ordered as following.
+ZONE_NORMAL -> ZONE_DMA
+This means that a memory allocation request for GFP_KERNEL will
+get memory from ZONE_DMA only when ZONE_NORMAL is not available.
+
+In NUMA case, you can think of following 2 types of order.
+Assume 2 node NUMA and below is zonelist of Node(0)'s GFP_KERNEL::
+
+  (A) Node(0) ZONE_NORMAL -> Node(0) ZONE_DMA -> Node(1) ZONE_NORMAL
+  (B) Node(0) ZONE_NORMAL -> Node(1) ZONE_NORMAL -> Node(0) ZONE_DMA.
+
+Type(A) offers the best locality for processes on Node(0), but ZONE_DMA
+will be used before ZONE_NORMAL exhaustion. This increases possibility of
+out-of-memory(OOM) of ZONE_DMA because ZONE_DMA is tend to be small.
+
+Type(B) cannot offer the best locality but is more robust against OOM of
+the DMA zone.
+
+Type(A) is called as "Node" order. Type (B) is "Zone" order.
+
+"Node order" orders the zonelists by node, then by zone within each node.
+Specify "[Nn]ode" for node order
+
+"Zone Order" orders the zonelists by zone type, then by node within each
+zone.  Specify "[Zz]one" for zone order.
+
+Specify "[Dd]efault" to request automatic configuration.
+
+On 32-bit, the Normal zone needs to be preserved for allocations accessible
+by the kernel, so "zone" order will be selected.
+
+On 64-bit, devices that require DMA32/DMA are relatively rare, so "node"
+order will be selected.
+
+Default order is recommended unless this is causing problems for your
+system/application.
+
+
+oom_dump_tasks
+==============
+
+Enables a system-wide task dump (excluding kernel threads) to be produced
+when the kernel performs an OOM-killing and includes such information as
+pid, uid, tgid, vm size, rss, pgtables_bytes, swapents, oom_score_adj
+score, and name.  This is helpful to determine why the OOM killer was
+invoked, to identify the rogue task that caused it, and to determine why
+the OOM killer chose the task it did to kill.
+
+If this is set to zero, this information is suppressed.  On very
+large systems with thousands of tasks it may not be feasible to dump
+the memory state information for each one.  Such systems should not
+be forced to incur a performance penalty in OOM conditions when the
+information may not be desired.
+
+If this is set to non-zero, this information is shown whenever the
+OOM killer actually kills a memory-hogging task.
+
+The default value is 1 (enabled).
+
+
+oom_kill_allocating_task
+========================
+
+This enables or disables killing the OOM-triggering task in
+out-of-memory situations.
+
+If this is set to zero, the OOM killer will scan through the entire
+tasklist and select a task based on heuristics to kill.  This normally
+selects a rogue memory-hogging task that frees up a large amount of
+memory when killed.
+
+If this is set to non-zero, the OOM killer simply kills the task that
+triggered the out-of-memory condition.  This avoids the expensive
+tasklist scan.
+
+If panic_on_oom is selected, it takes precedence over whatever value
+is used in oom_kill_allocating_task.
+
+The default value is 0.
+
+
+overcommit_kbytes
+=================
+
+When overcommit_memory is set to 2, the committed address space is not
+permitted to exceed swap plus this amount of physical RAM. See below.
+
+Note: overcommit_kbytes is the counterpart of overcommit_ratio. Only one
+of them may be specified at a time. Setting one disables the other (which
+then appears as 0 when read).
+
+
+overcommit_memory
+=================
+
+This value contains a flag that enables memory overcommitment.
+
+When this flag is 0, the kernel attempts to estimate the amount
+of free memory left when userspace requests more memory.
+
+When this flag is 1, the kernel pretends there is always enough
+memory until it actually runs out.
+
+When this flag is 2, the kernel uses a "never overcommit"
+policy that attempts to prevent any overcommit of memory.
+Note that user_reserve_kbytes affects this policy.
+
+This feature can be very useful because there are a lot of
+programs that malloc() huge amounts of memory "just-in-case"
+and don't use much of it.
+
+The default value is 0.
+
+See Documentation/vm/overcommit-accounting.rst and
+mm/util.c::__vm_enough_memory() for more information.
+
+
+overcommit_ratio
+================
+
+When overcommit_memory is set to 2, the committed address
+space is not permitted to exceed swap plus this percentage
+of physical RAM.  See above.
+
+
+page-cluster
+============
+
+page-cluster controls the number of pages up to which consecutive pages
+are read in from swap in a single attempt. This is the swap counterpart
+to page cache readahead.
+The mentioned consecutivity is not in terms of virtual/physical addresses,
+but consecutive on swap space - that means they were swapped out together.
+
+It is a logarithmic value - setting it to zero means "1 page", setting
+it to 1 means "2 pages", setting it to 2 means "4 pages", etc.
+Zero disables swap readahead completely.
+
+The default value is three (eight pages at a time).  There may be some
+small benefits in tuning this to a different value if your workload is
+swap-intensive.
+
+Lower values mean lower latencies for initial faults, but at the same time
+extra faults and I/O delays for following faults if they would have been part of
+that consecutive pages readahead would have brought in.
+
+
+panic_on_oom
+============
+
+This enables or disables panic on out-of-memory feature.
+
+If this is set to 0, the kernel will kill some rogue process,
+called oom_killer.  Usually, oom_killer can kill rogue processes and
+system will survive.
+
+If this is set to 1, the kernel panics when out-of-memory happens.
+However, if a process limits using nodes by mempolicy/cpusets,
+and those nodes become memory exhaustion status, one process
+may be killed by oom-killer. No panic occurs in this case.
+Because other nodes' memory may be free. This means system total status
+may be not fatal yet.
+
+If this is set to 2, the kernel panics compulsorily even on the
+above-mentioned. Even oom happens under memory cgroup, the whole
+system panics.
+
+The default value is 0.
+
+1 and 2 are for failover of clustering. Please select either
+according to your policy of failover.
+
+panic_on_oom=2+kdump gives you very strong tool to investigate
+why oom happens. You can get snapshot.
+
+
+percpu_pagelist_fraction
+========================
+
+This is the fraction of pages at most (high mark pcp->high) in each zone that
+are allocated for each per cpu page list.  The min value for this is 8.  It
+means that we don't allow more than 1/8th of pages in each zone to be
+allocated in any single per_cpu_pagelist.  This entry only changes the value
+of hot per cpu pagelists.  User can specify a number like 100 to allocate
+1/100th of each zone to each per cpu page list.
+
+The batch value of each per cpu pagelist is also updated as a result.  It is
+set to pcp->high/4.  The upper limit of batch is (PAGE_SHIFT * 8)
+
+The initial value is zero.  Kernel does not use this value at boot time to set
+the high water marks for each per cpu page list.  If the user writes '0' to this
+sysctl, it will revert to this default behavior.
+
+
+stat_interval
+=============
+
+The time interval between which vm statistics are updated.  The default
+is 1 second.
+
+
+stat_refresh
+============
+
+Any read or write (by root only) flushes all the per-cpu vm statistics
+into their global totals, for more accurate reports when testing
+e.g. cat /proc/sys/vm/stat_refresh /proc/meminfo
+
+As a side-effect, it also checks for negative totals (elsewhere reported
+as 0) and "fails" with EINVAL if any are found, with a warning in dmesg.
+(At time of writing, a few stats are known sometimes to be found negative,
+with no ill effects: errors and warnings on these stats are suppressed.)
+
+
+numa_stat
+=========
+
+This interface allows runtime configuration of numa statistics.
+
+When page allocation performance becomes a bottleneck and you can tolerate
+some possible tool breakage and decreased numa counter precision, you can
+do::
+
+	echo 0 > /proc/sys/vm/numa_stat
+
+When page allocation performance is not a bottleneck and you want all
+tooling to work, you can do::
+
+	echo 1 > /proc/sys/vm/numa_stat
+
+
+swappiness
+==========
+
+This control is used to define how aggressive the kernel will swap
+memory pages.  Higher values will increase aggressiveness, lower values
+decrease the amount of swap.  A value of 0 instructs the kernel not to
+initiate swap until the amount of free and file-backed pages is less
+than the high water mark in a zone.
+
+The default value is 60.
+
+
+unprivileged_userfaultfd
+========================
+
+This flag controls whether unprivileged users can use the userfaultfd
+system calls.  Set this to 1 to allow unprivileged users to use the
+userfaultfd system calls, or set this to 0 to restrict userfaultfd to only
+privileged users (with SYS_CAP_PTRACE capability).
+
+The default value is 1.
+
+
+user_reserve_kbytes
+===================
+
+When overcommit_memory is set to 2, "never overcommit" mode, reserve
+min(3% of current process size, user_reserve_kbytes) of free memory.
+This is intended to prevent a user from starting a single memory hogging
+process, such that they cannot recover (kill the hog).
+
+user_reserve_kbytes defaults to min(3% of the current process size, 128MB).
+
+If this is reduced to zero, then the user will be allowed to allocate
+all free memory with a single process, minus admin_reserve_kbytes.
+Any subsequent attempts to execute a command will result in
+"fork: Cannot allocate memory".
+
+Changing this takes effect whenever an application requests memory.
+
+
+vfs_cache_pressure
+==================
+
+This percentage value controls the tendency of the kernel to reclaim
+the memory which is used for caching of directory and inode objects.
+
+At the default value of vfs_cache_pressure=100 the kernel will attempt to
+reclaim dentries and inodes at a "fair" rate with respect to pagecache and
+swapcache reclaim.  Decreasing vfs_cache_pressure causes the kernel to prefer
+to retain dentry and inode caches. When vfs_cache_pressure=0, the kernel will
+never reclaim dentries and inodes due to memory pressure and this can easily
+lead to out-of-memory conditions. Increasing vfs_cache_pressure beyond 100
+causes the kernel to prefer to reclaim dentries and inodes.
+
+Increasing vfs_cache_pressure significantly beyond 100 may have negative
+performance impact. Reclaim code needs to take various locks to find freeable
+directory and inode objects. With vfs_cache_pressure=1000, it will look for
+ten times more freeable objects than there are.
+
+
+watermark_boost_factor
+======================
+
+This factor controls the level of reclaim when memory is being fragmented.
+It defines the percentage of the high watermark of a zone that will be
+reclaimed if pages of different mobility are being mixed within pageblocks.
+The intent is that compaction has less work to do in the future and to
+increase the success rate of future high-order allocations such as SLUB
+allocations, THP and hugetlbfs pages.
+
+To make it sensible with respect to the watermark_scale_factor
+parameter, the unit is in fractions of 10,000. The default value of
+15,000 on !DISCONTIGMEM configurations means that up to 150% of the high
+watermark will be reclaimed in the event of a pageblock being mixed due
+to fragmentation. The level of reclaim is determined by the number of
+fragmentation events that occurred in the recent past. If this value is
+smaller than a pageblock then a pageblocks worth of pages will be reclaimed
+(e.g.  2MB on 64-bit x86). A boost factor of 0 will disable the feature.
+
+
+watermark_scale_factor
+======================
+
+This factor controls the aggressiveness of kswapd. It defines the
+amount of memory left in a node/system before kswapd is woken up and
+how much memory needs to be free before kswapd goes back to sleep.
+
+The unit is in fractions of 10,000. The default value of 10 means the
+distances between watermarks are 0.1% of the available memory in the
+node/system. The maximum value is 1000, or 10% of memory.
+
+A high rate of threads entering direct reclaim (allocstall) or kswapd
+going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate
+that the number of free pages kswapd maintains for latency reasons is
+too small for the allocation bursts occurring in the system. This knob
+can then be used to tune kswapd aggressiveness accordingly.
+
+
+zone_reclaim_mode
+=================
+
+Zone_reclaim_mode allows someone to set more or less aggressive approaches to
+reclaim memory when a zone runs out of memory. If it is set to zero then no
+zone reclaim occurs. Allocations will be satisfied from other zones / nodes
+in the system.
+
+This is value OR'ed together of
+
+=	===================================
+1	Zone reclaim on
+2	Zone reclaim writes dirty pages out
+4	Zone reclaim swaps pages
+=	===================================
+
+zone_reclaim_mode is disabled by default.  For file servers or workloads
+that benefit from having their data cached, zone_reclaim_mode should be
+left disabled as the caching effect is likely to be more important than
+data locality.
+
+zone_reclaim may be enabled if it's known that the workload is partitioned
+such that each partition fits within a NUMA node and that accessing remote
+memory would cause a measurable performance reduction.  The page allocator
+will then reclaim easily reusable pages (those page cache pages that are
+currently not used) before allocating off node pages.
+
+Allowing zone reclaim to write out pages stops processes that are
+writing large amounts of data from dirtying pages on other nodes. Zone
+reclaim will write out dirty pages if a zone fills up and so effectively
+throttle the process. This may decrease the performance of a single process
+since it cannot use all of system memory to buffer the outgoing writes
+anymore but it preserve the memory on other nodes so that the performance
+of other processes running on other nodes will not be affected.
+
+Allowing regular swap effectively restricts allocations to the local
+node unless explicitly overridden by memory policies or cpuset
+configurations.
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index 1d8e748f909f..c6224d039bcb 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -119,7 +119,7 @@ Kernel Pointers
 
 For printing kernel pointers which should be hidden from unprivileged
 users. The behaviour of %pK depends on the kptr_restrict sysctl - see
-Documentation/sysctl/kernel.rst for more details.
+Documentation/admin-guide/sysctl/kernel.rst for more details.
 
 Unmodified Addresses
 --------------------
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index d750b6926899..fb4735fd73b0 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1500,7 +1500,7 @@ review the kernel documentation in the directory /usr/src/linux/Documentation.
 This chapter  is  heavily  based  on the documentation included in the pre 2.2
 kernels, and became part of it in version 2.2.1 of the Linux kernel.
 
-Please see: Documentation/sysctl/ directory for descriptions of these
+Please see: Documentation/admin-guide/sysctl/ directory for descriptions of these
 entries.
 
 ------------------------------------------------------------------------------
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 5c3399cde1c4..df33674799b5 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -2287,7 +2287,7 @@ addr_scope_policy - INTEGER
 
 
 /proc/sys/net/core/*
-	Please see: Documentation/sysctl/net.rst for descriptions of these entries.
+	Please see: Documentation/admin-guide/sysctl/net.rst for descriptions of these entries.
 
 
 /proc/sys/net/unix/*
diff --git a/Documentation/sysctl/abi.rst b/Documentation/sysctl/abi.rst
deleted file mode 100644
index 599bcde7f0b7..000000000000
--- a/Documentation/sysctl/abi.rst
+++ /dev/null
@@ -1,67 +0,0 @@
-================================
-Documentation for /proc/sys/abi/
-================================
-
-kernel version 2.6.0.test2
-
-Copyright (c) 2003,  Fabian Frederick <ffrederick@users.sourceforge.net>
-
-For general info: index.rst.
-
-------------------------------------------------------------------------------
-
-This path is binary emulation relevant aka personality types aka abi.
-When a process is executed, it's linked to an exec_domain whose
-personality is defined using values available from /proc/sys/abi.
-You can find further details about abi in include/linux/personality.h.
-
-Here are the files featuring in 2.6 kernel:
-
-- defhandler_coff
-- defhandler_elf
-- defhandler_lcall7
-- defhandler_libcso
-- fake_utsname
-- trace
-
-defhandler_coff
----------------
-
-defined value:
-	PER_SCOSVR3::
-
-		0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS | SHORT_INODE
-
-defhandler_elf
---------------
-
-defined value:
-	PER_LINUX::
-
-		0
-
-defhandler_lcall7
------------------
-
-defined value :
-	PER_SVR4::
-
-		0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
-
-defhandler_libsco
------------------
-
-defined value:
-	PER_SVR4::
-
-		0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
-
-fake_utsname
-------------
-
-Unused
-
-trace
------
-
-Unused
diff --git a/Documentation/sysctl/fs.rst b/Documentation/sysctl/fs.rst
deleted file mode 100644
index 2a45119e3331..000000000000
--- a/Documentation/sysctl/fs.rst
+++ /dev/null
@@ -1,384 +0,0 @@
-===============================
-Documentation for /proc/sys/fs/
-===============================
-
-kernel version 2.2.10
-
-Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
-
-Copyright (c) 2009,        Shen Feng<shen@cn.fujitsu.com>
-
-For general info and legal blurb, please look in intro.rst.
-
-------------------------------------------------------------------------------
-
-This file contains documentation for the sysctl files in
-/proc/sys/fs/ and is valid for Linux kernel version 2.2.
-
-The files in this directory can be used to tune and monitor
-miscellaneous and general things in the operation of the Linux
-kernel. Since some of the files _can_ be used to screw up your
-system, it is advisable to read both documentation and source
-before actually making adjustments.
-
-1. /proc/sys/fs
-===============
-
-Currently, these files are in /proc/sys/fs:
-
-- aio-max-nr
-- aio-nr
-- dentry-state
-- dquot-max
-- dquot-nr
-- file-max
-- file-nr
-- inode-max
-- inode-nr
-- inode-state
-- nr_open
-- overflowuid
-- overflowgid
-- pipe-user-pages-hard
-- pipe-user-pages-soft
-- protected_fifos
-- protected_hardlinks
-- protected_regular
-- protected_symlinks
-- suid_dumpable
-- super-max
-- super-nr
-
-
-aio-nr & aio-max-nr
--------------------
-
-aio-nr is the running total of the number of events specified on the
-io_setup system call for all currently active aio contexts.  If aio-nr
-reaches aio-max-nr then io_setup will fail with EAGAIN.  Note that
-raising aio-max-nr does not result in the pre-allocation or re-sizing
-of any kernel data structures.
-
-
-dentry-state
-------------
-
-From linux/include/linux/dcache.h::
-
-  struct dentry_stat_t dentry_stat {
-        int nr_dentry;
-        int nr_unused;
-        int age_limit;         /* age in seconds */
-        int want_pages;        /* pages requested by system */
-        int nr_negative;       /* # of unused negative dentries */
-        int dummy;             /* Reserved for future use */
-  };
-
-Dentries are dynamically allocated and deallocated.
-
-nr_dentry shows the total number of dentries allocated (active
-+ unused). nr_unused shows the number of dentries that are not
-actively used, but are saved in the LRU list for future reuse.
-
-Age_limit is the age in seconds after which dcache entries
-can be reclaimed when memory is short and want_pages is
-nonzero when shrink_dcache_pages() has been called and the
-dcache isn't pruned yet.
-
-nr_negative shows the number of unused dentries that are also
-negative dentries which do not map to any files. Instead,
-they help speeding up rejection of non-existing files provided
-by the users.
-
-
-dquot-max & dquot-nr
---------------------
-
-The file dquot-max shows the maximum number of cached disk
-quota entries.
-
-The file dquot-nr shows the number of allocated disk quota
-entries and the number of free disk quota entries.
-
-If the number of free cached disk quotas is very low and
-you have some awesome number of simultaneous system users,
-you might want to raise the limit.
-
-
-file-max & file-nr
-------------------
-
-The value in file-max denotes the maximum number of file-
-handles that the Linux kernel will allocate. When you get lots
-of error messages about running out of file handles, you might
-want to increase this limit.
-
-Historically,the kernel was able to allocate file handles
-dynamically, but not to free them again. The three values in
-file-nr denote the number of allocated file handles, the number
-of allocated but unused file handles, and the maximum number of
-file handles. Linux 2.6 always reports 0 as the number of free
-file handles -- this is not an error, it just means that the
-number of allocated file handles exactly matches the number of
-used file handles.
-
-Attempts to allocate more file descriptors than file-max are
-reported with printk, look for "VFS: file-max limit <number>
-reached".
-
-
-nr_open
--------
-
-This denotes the maximum number of file-handles a process can
-allocate. Default value is 1024*1024 (1048576) which should be
-enough for most machines. Actual limit depends on RLIMIT_NOFILE
-resource limit.
-
-
-inode-max, inode-nr & inode-state
----------------------------------
-
-As with file handles, the kernel allocates the inode structures
-dynamically, but can't free them yet.
-
-The value in inode-max denotes the maximum number of inode
-handlers. This value should be 3-4 times larger than the value
-in file-max, since stdin, stdout and network sockets also
-need an inode struct to handle them. When you regularly run
-out of inodes, you need to increase this value.
-
-The file inode-nr contains the first two items from
-inode-state, so we'll skip to that file...
-
-Inode-state contains three actual numbers and four dummies.
-The actual numbers are, in order of appearance, nr_inodes,
-nr_free_inodes and preshrink.
-
-Nr_inodes stands for the number of inodes the system has
-allocated, this can be slightly more than inode-max because
-Linux allocates them one pageful at a time.
-
-Nr_free_inodes represents the number of free inodes (?) and
-preshrink is nonzero when the nr_inodes > inode-max and the
-system needs to prune the inode list instead of allocating
-more.
-
-
-overflowgid & overflowuid
--------------------------
-
-Some filesystems only support 16-bit UIDs and GIDs, although in Linux
-UIDs and GIDs are 32 bits. When one of these filesystems is mounted
-with writes enabled, any UID or GID that would exceed 65535 is translated
-to a fixed value before being written to disk.
-
-These sysctls allow you to change the value of the fixed UID and GID.
-The default is 65534.
-
-
-pipe-user-pages-hard
---------------------
-
-Maximum total number of pages a non-privileged user may allocate for pipes.
-Once this limit is reached, no new pipes may be allocated until usage goes
-below the limit again. When set to 0, no limit is applied, which is the default
-setting.
-
-
-pipe-user-pages-soft
---------------------
-
-Maximum total number of pages a non-privileged user may allocate for pipes
-before the pipe size gets limited to a single page. Once this limit is reached,
-new pipes will be limited to a single page in size for this user in order to
-limit total memory usage, and trying to increase them using fcntl() will be
-denied until usage goes below the limit again. The default value allows to
-allocate up to 1024 pipes at their default size. When set to 0, no limit is
-applied.
-
-
-protected_fifos
----------------
-
-The intent of this protection is to avoid unintentional writes to
-an attacker-controlled FIFO, where a program expected to create a regular
-file.
-
-When set to "0", writing to FIFOs is unrestricted.
-
-When set to "1" don't allow O_CREAT open on FIFOs that we don't own
-in world writable sticky directories, unless they are owned by the
-owner of the directory.
-
-When set to "2" it also applies to group writable sticky directories.
-
-This protection is based on the restrictions in Openwall.
-
-
-protected_hardlinks
---------------------
-
-A long-standing class of security issues is the hardlink-based
-time-of-check-time-of-use race, most commonly seen in world-writable
-directories like /tmp. The common method of exploitation of this flaw
-is to cross privilege boundaries when following a given hardlink (i.e. a
-root process follows a hardlink created by another user). Additionally,
-on systems without separated partitions, this stops unauthorized users
-from "pinning" vulnerable setuid/setgid files against being upgraded by
-the administrator, or linking to special files.
-
-When set to "0", hardlink creation behavior is unrestricted.
-
-When set to "1" hardlinks cannot be created by users if they do not
-already own the source file, or do not have read/write access to it.
-
-This protection is based on the restrictions in Openwall and grsecurity.
-
-
-protected_regular
------------------
-
-This protection is similar to protected_fifos, but it
-avoids writes to an attacker-controlled regular file, where a program
-expected to create one.
-
-When set to "0", writing to regular files is unrestricted.
-
-When set to "1" don't allow O_CREAT open on regular files that we
-don't own in world writable sticky directories, unless they are
-owned by the owner of the directory.
-
-When set to "2" it also applies to group writable sticky directories.
-
-
-protected_symlinks
-------------------
-
-A long-standing class of security issues is the symlink-based
-time-of-check-time-of-use race, most commonly seen in world-writable
-directories like /tmp. The common method of exploitation of this flaw
-is to cross privilege boundaries when following a given symlink (i.e. a
-root process follows a symlink belonging to another user). For a likely
-incomplete list of hundreds of examples across the years, please see:
-http://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=/tmp
-
-When set to "0", symlink following behavior is unrestricted.
-
-When set to "1" symlinks are permitted to be followed only when outside
-a sticky world-writable directory, or when the uid of the symlink and
-follower match, or when the directory owner matches the symlink's owner.
-
-This protection is based on the restrictions in Openwall and grsecurity.
-
-
-suid_dumpable:
---------------
-
-This value can be used to query and set the core dump mode for setuid
-or otherwise protected/tainted binaries. The modes are
-
-=   ==========  ===============================================================
-0   (default)	traditional behaviour. Any process which has changed
-		privilege levels or is execute only will not be dumped.
-1   (debug)	all processes dump core when possible. The core dump is
-		owned by the current user and no security is applied. This is
-		intended for system debugging situations only.
-		Ptrace is unchecked.
-		This is insecure as it allows regular users to examine the
-		memory contents of privileged processes.
-2   (suidsafe)	any binary which normally would not be dumped is dumped
-		anyway, but only if the "core_pattern" kernel sysctl is set to
-		either a pipe handler or a fully qualified path. (For more
-		details on this limitation, see CVE-2006-2451.) This mode is
-		appropriate when administrators are attempting to debug
-		problems in a normal environment, and either have a core dump
-		pipe handler that knows to treat privileged core dumps with
-		care, or specific directory defined for catching core dumps.
-		If a core dump happens without a pipe handler or fully
-		qualified path, a message will be emitted to syslog warning
-		about the lack of a correct setting.
-=   ==========  ===============================================================
-
-
-super-max & super-nr
---------------------
-
-These numbers control the maximum number of superblocks, and
-thus the maximum number of mounted filesystems the kernel
-can have. You only need to increase super-max if you need to
-mount more filesystems than the current value in super-max
-allows you to.
-
-
-aio-nr & aio-max-nr
--------------------
-
-aio-nr shows the current system-wide number of asynchronous io
-requests.  aio-max-nr allows you to change the maximum value
-aio-nr can grow to.
-
-
-mount-max
----------
-
-This denotes the maximum number of mounts that may exist
-in a mount namespace.
-
-
-
-2. /proc/sys/fs/binfmt_misc
-===========================
-
-Documentation for the files in /proc/sys/fs/binfmt_misc is
-in Documentation/admin-guide/binfmt-misc.rst.
-
-
-3. /proc/sys/fs/mqueue - POSIX message queues filesystem
-========================================================
-
-
-The "mqueue"  filesystem provides  the necessary kernel features to enable the
-creation of a  user space  library that  implements  the  POSIX message queues
-API (as noted by the  MSG tag in the  POSIX 1003.1-2001 version  of the System
-Interfaces specification.)
-
-The "mqueue" filesystem contains values for determining/setting  the amount of
-resources used by the file system.
-
-/proc/sys/fs/mqueue/queues_max is a read/write  file for  setting/getting  the
-maximum number of message queues allowed on the system.
-
-/proc/sys/fs/mqueue/msg_max  is  a  read/write file  for  setting/getting  the
-maximum number of messages in a queue value.  In fact it is the limiting value
-for another (user) limit which is set in mq_open invocation. This attribute of
-a queue must be less or equal then msg_max.
-
-/proc/sys/fs/mqueue/msgsize_max is  a read/write  file for setting/getting the
-maximum  message size value (it is every  message queue's attribute set during
-its creation).
-
-/proc/sys/fs/mqueue/msg_default is  a read/write  file for setting/getting the
-default number of messages in a queue value if attr parameter of mq_open(2) is
-NULL. If it exceed msg_max, the default value is initialized msg_max.
-
-/proc/sys/fs/mqueue/msgsize_default is a read/write file for setting/getting
-the default message size value if attr parameter of mq_open(2) is NULL. If it
-exceed msgsize_max, the default value is initialized msgsize_max.
-
-4. /proc/sys/fs/epoll - Configuration options for the epoll interface
-=====================================================================
-
-This directory contains configuration options for the epoll(7) interface.
-
-max_user_watches
-----------------
-
-Every epoll file descriptor can store a number of files to be monitored
-for event readiness. Each one of these monitored files constitutes a "watch".
-This configuration option sets the maximum number of "watches" that are
-allowed for each user.
-Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes
-on a 64bit one.
-The current default value for  max_user_watches  is the 1/32 of the available
-low memory, divided for the "watch" cost in bytes.
diff --git a/Documentation/sysctl/index.rst b/Documentation/sysctl/index.rst
deleted file mode 100644
index efbcde8c1c9c..000000000000
--- a/Documentation/sysctl/index.rst
+++ /dev/null
@@ -1,100 +0,0 @@
-:orphan:
-
-===========================
-Documentation for /proc/sys
-===========================
-
-Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
-
-------------------------------------------------------------------------------
-
-'Why', I hear you ask, 'would anyone even _want_ documentation
-for them sysctl files? If anybody really needs it, it's all in
-the source...'
-
-Well, this documentation is written because some people either
-don't know they need to tweak something, or because they don't
-have the time or knowledge to read the source code.
-
-Furthermore, the programmers who built sysctl have built it to
-be actually used, not just for the fun of programming it :-)
-
-------------------------------------------------------------------------------
-
-Legal blurb:
-
-As usual, there are two main things to consider:
-
-1. you get what you pay for
-2. it's free
-
-The consequences are that I won't guarantee the correctness of
-this document, and if you come to me complaining about how you
-screwed up your system because of wrong documentation, I won't
-feel sorry for you. I might even laugh at you...
-
-But of course, if you _do_ manage to screw up your system using
-only the sysctl options used in this file, I'd like to hear of
-it. Not only to have a great laugh, but also to make sure that
-you're the last RTFMing person to screw up.
-
-In short, e-mail your suggestions, corrections and / or horror
-stories to: <riel@nl.linux.org>
-
-Rik van Riel.
-
---------------------------------------------------------------
-
-Introduction
-============
-
-Sysctl is a means of configuring certain aspects of the kernel
-at run-time, and the /proc/sys/ directory is there so that you
-don't even need special tools to do it!
-In fact, there are only four things needed to use these config
-facilities:
-
-- a running Linux system
-- root access
-- common sense (this is especially hard to come by these days)
-- knowledge of what all those values mean
-
-As a quick 'ls /proc/sys' will show, the directory consists of
-several (arch-dependent?) subdirs. Each subdir is mainly about
-one part of the kernel, so you can do configuration on a piece
-by piece basis, or just some 'thematic frobbing'.
-
-This documentation is about:
-
-=============== ===============================================================
-abi/		execution domains & personalities
-debug/		<empty>
-dev/		device specific information (eg dev/cdrom/info)
-fs/		specific filesystems
-		filehandle, inode, dentry and quota tuning
-		binfmt_misc <Documentation/admin-guide/binfmt-misc.rst>
-kernel/		global kernel info / tuning
-		miscellaneous stuff
-net/		networking stuff, for documentation look in:
-		<Documentation/networking/>
-proc/		<empty>
-sunrpc/		SUN Remote Procedure Call (NFS)
-vm/		memory management tuning
-		buffer and cache management
-user/		Per user per user namespace limits
-=============== ===============================================================
-
-These are the subdirs I have on my system. There might be more
-or other subdirs in another setup. If you see another dir, I'd
-really like to hear about it :-)
-
-.. toctree::
-   :maxdepth: 1
-
-   abi
-   fs
-   kernel
-   net
-   sunrpc
-   user
-   vm
diff --git a/Documentation/sysctl/kernel.rst b/Documentation/sysctl/kernel.rst
deleted file mode 100644
index a0c1d4ce403a..000000000000
--- a/Documentation/sysctl/kernel.rst
+++ /dev/null
@@ -1,1177 +0,0 @@
-===================================
-Documentation for /proc/sys/kernel/
-===================================
-
-kernel version 2.2.10
-
-Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
-
-Copyright (c) 2009,        Shen Feng<shen@cn.fujitsu.com>
-
-For general info and legal blurb, please look in index.rst.
-
-------------------------------------------------------------------------------
-
-This file contains documentation for the sysctl files in
-/proc/sys/kernel/ and is valid for Linux kernel version 2.2.
-
-The files in this directory can be used to tune and monitor
-miscellaneous and general things in the operation of the Linux
-kernel. Since some of the files _can_ be used to screw up your
-system, it is advisable to read both documentation and source
-before actually making adjustments.
-
-Currently, these files might (depending on your configuration)
-show up in /proc/sys/kernel:
-
-- acct
-- acpi_video_flags
-- auto_msgmni
-- bootloader_type	     [ X86 only ]
-- bootloader_version	     [ X86 only ]
-- cap_last_cap
-- core_pattern
-- core_pipe_limit
-- core_uses_pid
-- ctrl-alt-del
-- dmesg_restrict
-- domainname
-- hostname
-- hotplug
-- hardlockup_all_cpu_backtrace
-- hardlockup_panic
-- hung_task_panic
-- hung_task_check_count
-- hung_task_timeout_secs
-- hung_task_check_interval_secs
-- hung_task_warnings
-- hyperv_record_panic_msg
-- kexec_load_disabled
-- kptr_restrict
-- l2cr                        [ PPC only ]
-- modprobe                    ==> Documentation/debugging-modules.txt
-- modules_disabled
-- msg_next_id		      [ sysv ipc ]
-- msgmax
-- msgmnb
-- msgmni
-- nmi_watchdog
-- osrelease
-- ostype
-- overflowgid
-- overflowuid
-- panic
-- panic_on_oops
-- panic_on_stackoverflow
-- panic_on_unrecovered_nmi
-- panic_on_warn
-- panic_print
-- panic_on_rcu_stall
-- perf_cpu_time_max_percent
-- perf_event_paranoid
-- perf_event_max_stack
-- perf_event_mlock_kb
-- perf_event_max_contexts_per_stack
-- pid_max
-- powersave-nap               [ PPC only ]
-- printk
-- printk_delay
-- printk_ratelimit
-- printk_ratelimit_burst
-- pty                         ==> Documentation/filesystems/devpts.txt
-- randomize_va_space
-- real-root-dev               ==> Documentation/admin-guide/initrd.rst
-- reboot-cmd                  [ SPARC only ]
-- rtsig-max
-- rtsig-nr
-- sched_energy_aware
-- seccomp/                    ==> Documentation/userspace-api/seccomp_filter.rst
-- sem
-- sem_next_id		      [ sysv ipc ]
-- sg-big-buff                 [ generic SCSI device (sg) ]
-- shm_next_id		      [ sysv ipc ]
-- shm_rmid_forced
-- shmall
-- shmmax                      [ sysv ipc ]
-- shmmni
-- softlockup_all_cpu_backtrace
-- soft_watchdog
-- stack_erasing
-- stop-a                      [ SPARC only ]
-- sysrq                       ==> Documentation/admin-guide/sysrq.rst
-- sysctl_writes_strict
-- tainted                     ==> Documentation/admin-guide/tainted-kernels.rst
-- threads-max
-- unknown_nmi_panic
-- watchdog
-- watchdog_thresh
-- version
-
-
-acct:
-=====
-
-highwater lowwater frequency
-
-If BSD-style process accounting is enabled these values control
-its behaviour. If free space on filesystem where the log lives
-goes below <lowwater>% accounting suspends. If free space gets
-above <highwater>% accounting resumes. <Frequency> determines
-how often do we check the amount of free space (value is in
-seconds). Default:
-4 2 30
-That is, suspend accounting if there left <= 2% free; resume it
-if we got >=4%; consider information about amount of free space
-valid for 30 seconds.
-
-
-acpi_video_flags:
-=================
-
-flags
-
-See Doc*/kernel/power/video.txt, it allows mode of video boot to be
-set during run time.
-
-
-auto_msgmni:
-============
-
-This variable has no effect and may be removed in future kernel
-releases. Reading it always returns 0.
-Up to Linux 3.17, it enabled/disabled automatic recomputing of msgmni
-upon memory add/remove or upon ipc namespace creation/removal.
-Echoing "1" into this file enabled msgmni automatic recomputing.
-Echoing "0" turned it off. auto_msgmni default value was 1.
-
-
-bootloader_type:
-================
-
-x86 bootloader identification
-
-This gives the bootloader type number as indicated by the bootloader,
-shifted left by 4, and OR'd with the low four bits of the bootloader
-version.  The reason for this encoding is that this used to match the
-type_of_loader field in the kernel header; the encoding is kept for
-backwards compatibility.  That is, if the full bootloader type number
-is 0x15 and the full version number is 0x234, this file will contain
-the value 340 = 0x154.
-
-See the type_of_loader and ext_loader_type fields in
-Documentation/x86/boot.rst for additional information.
-
-
-bootloader_version:
-===================
-
-x86 bootloader version
-
-The complete bootloader version number.  In the example above, this
-file will contain the value 564 = 0x234.
-
-See the type_of_loader and ext_loader_ver fields in
-Documentation/x86/boot.rst for additional information.
-
-
-cap_last_cap:
-=============
-
-Highest valid capability of the running kernel.  Exports
-CAP_LAST_CAP from the kernel.
-
-
-core_pattern:
-=============
-
-core_pattern is used to specify a core dumpfile pattern name.
-
-* max length 127 characters; default value is "core"
-* core_pattern is used as a pattern template for the output filename;
-  certain string patterns (beginning with '%') are substituted with
-  their actual values.
-* backward compatibility with core_uses_pid:
-
-	If core_pattern does not include "%p" (default does not)
-	and core_uses_pid is set, then .PID will be appended to
-	the filename.
-
-* corename format specifiers::
-
-	%<NUL>	'%' is dropped
-	%%	output one '%'
-	%p	pid
-	%P	global pid (init PID namespace)
-	%i	tid
-	%I	global tid (init PID namespace)
-	%u	uid (in initial user namespace)
-	%g	gid (in initial user namespace)
-	%d	dump mode, matches PR_SET_DUMPABLE and
-		/proc/sys/fs/suid_dumpable
-	%s	signal number
-	%t	UNIX time of dump
-	%h	hostname
-	%e	executable filename (may be shortened)
-	%E	executable path
-	%<OTHER> both are dropped
-
-* If the first character of the pattern is a '|', the kernel will treat
-  the rest of the pattern as a command to run.  The core dump will be
-  written to the standard input of that program instead of to a file.
-
-
-core_pipe_limit:
-================
-
-This sysctl is only applicable when core_pattern is configured to pipe
-core files to a user space helper (when the first character of
-core_pattern is a '|', see above).  When collecting cores via a pipe
-to an application, it is occasionally useful for the collecting
-application to gather data about the crashing process from its
-/proc/pid directory.  In order to do this safely, the kernel must wait
-for the collecting process to exit, so as not to remove the crashing
-processes proc files prematurely.  This in turn creates the
-possibility that a misbehaving userspace collecting process can block
-the reaping of a crashed process simply by never exiting.  This sysctl
-defends against that.  It defines how many concurrent crashing
-processes may be piped to user space applications in parallel.  If
-this value is exceeded, then those crashing processes above that value
-are noted via the kernel log and their cores are skipped.  0 is a
-special value, indicating that unlimited processes may be captured in
-parallel, but that no waiting will take place (i.e. the collecting
-process is not guaranteed access to /proc/<crashing pid>/).  This
-value defaults to 0.
-
-
-core_uses_pid:
-==============
-
-The default coredump filename is "core".  By setting
-core_uses_pid to 1, the coredump filename becomes core.PID.
-If core_pattern does not include "%p" (default does not)
-and core_uses_pid is set, then .PID will be appended to
-the filename.
-
-
-ctrl-alt-del:
-=============
-
-When the value in this file is 0, ctrl-alt-del is trapped and
-sent to the init(1) program to handle a graceful restart.
-When, however, the value is > 0, Linux's reaction to a Vulcan
-Nerve Pinch (tm) will be an immediate reboot, without even
-syncing its dirty buffers.
-
-Note:
-  when a program (like dosemu) has the keyboard in 'raw'
-  mode, the ctrl-alt-del is intercepted by the program before it
-  ever reaches the kernel tty layer, and it's up to the program
-  to decide what to do with it.
-
-
-dmesg_restrict:
-===============
-
-This toggle indicates whether unprivileged users are prevented
-from using dmesg(8) to view messages from the kernel's log buffer.
-When dmesg_restrict is set to (0) there are no restrictions. When
-dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use
-dmesg(8).
-
-The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the
-default value of dmesg_restrict.
-
-
-domainname & hostname:
-======================
-
-These files can be used to set the NIS/YP domainname and the
-hostname of your box in exactly the same way as the commands
-domainname and hostname, i.e.::
-
-	# echo "darkstar" > /proc/sys/kernel/hostname
-	# echo "mydomain" > /proc/sys/kernel/domainname
-
-has the same effect as::
-
-	# hostname "darkstar"
-	# domainname "mydomain"
-
-Note, however, that the classic darkstar.frop.org has the
-hostname "darkstar" and DNS (Internet Domain Name Server)
-domainname "frop.org", not to be confused with the NIS (Network
-Information Service) or YP (Yellow Pages) domainname. These two
-domain names are in general different. For a detailed discussion
-see the hostname(1) man page.
-
-
-hardlockup_all_cpu_backtrace:
-=============================
-
-This value controls the hard lockup detector behavior when a hard
-lockup condition is detected as to whether or not to gather further
-debug information. If enabled, arch-specific all-CPU stack dumping
-will be initiated.
-
-0: do nothing. This is the default behavior.
-
-1: on detection capture more debug information.
-
-
-hardlockup_panic:
-=================
-
-This parameter can be used to control whether the kernel panics
-when a hard lockup is detected.
-
-   0 - don't panic on hard lockup
-   1 - panic on hard lockup
-
-See Documentation/lockup-watchdogs.txt for more information.  This can
-also be set using the nmi_watchdog kernel parameter.
-
-
-hotplug:
-========
-
-Path for the hotplug policy agent.
-Default value is "/sbin/hotplug".
-
-
-hung_task_panic:
-================
-
-Controls the kernel's behavior when a hung task is detected.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
-
-0: continue operation. This is the default behavior.
-
-1: panic immediately.
-
-
-hung_task_check_count:
-======================
-
-The upper bound on the number of tasks that are checked.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
-
-
-hung_task_timeout_secs:
-=======================
-
-When a task in D state did not get scheduled
-for more than this value report a warning.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
-
-0: means infinite timeout - no checking done.
-
-Possible values to set are in range {0..LONG_MAX/HZ}.
-
-
-hung_task_check_interval_secs:
-==============================
-
-Hung task check interval. If hung task checking is enabled
-(see hung_task_timeout_secs), the check is done every
-hung_task_check_interval_secs seconds.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
-
-0 (default): means use hung_task_timeout_secs as checking interval.
-Possible values to set are in range {0..LONG_MAX/HZ}.
-
-
-hung_task_warnings:
-===================
-
-The maximum number of warnings to report. During a check interval
-if a hung task is detected, this value is decreased by 1.
-When this value reaches 0, no more warnings will be reported.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
-
--1: report an infinite number of warnings.
-
-
-hyperv_record_panic_msg:
-========================
-
-Controls whether the panic kmsg data should be reported to Hyper-V.
-
-0: do not report panic kmsg data.
-
-1: report the panic kmsg data. This is the default behavior.
-
-
-kexec_load_disabled:
-====================
-
-A toggle indicating if the kexec_load syscall has been disabled. This
-value defaults to 0 (false: kexec_load enabled), but can be set to 1
-(true: kexec_load disabled). Once true, kexec can no longer be used, and
-the toggle cannot be set back to false. This allows a kexec image to be
-loaded before disabling the syscall, allowing a system to set up (and
-later use) an image without it being altered. Generally used together
-with the "modules_disabled" sysctl.
-
-
-kptr_restrict:
-==============
-
-This toggle indicates whether restrictions are placed on
-exposing kernel addresses via /proc and other interfaces.
-
-When kptr_restrict is set to 0 (the default) the address is hashed before
-printing. (This is the equivalent to %p.)
-
-When kptr_restrict is set to (1), kernel pointers printed using the %pK
-format specifier will be replaced with 0's unless the user has CAP_SYSLOG
-and effective user and group ids are equal to the real ids. This is
-because %pK checks are done at read() time rather than open() time, so
-if permissions are elevated between the open() and the read() (e.g via
-a setuid binary) then %pK will not leak kernel pointers to unprivileged
-users. Note, this is a temporary solution only. The correct long-term
-solution is to do the permission checks at open() time. Consider removing
-world read permissions from files that use %pK, and using dmesg_restrict
-to protect against uses of %pK in dmesg(8) if leaking kernel pointer
-values to unprivileged users is a concern.
-
-When kptr_restrict is set to (2), kernel pointers printed using
-%pK will be replaced with 0's regardless of privileges.
-
-
-l2cr: (PPC only)
-================
-
-This flag controls the L2 cache of G3 processor boards. If
-0, the cache is disabled. Enabled if nonzero.
-
-
-modules_disabled:
-=================
-
-A toggle value indicating if modules are allowed to be loaded
-in an otherwise modular kernel.  This toggle defaults to off
-(0), but can be set true (1).  Once true, modules can be
-neither loaded nor unloaded, and the toggle cannot be set back
-to false.  Generally used with the "kexec_load_disabled" toggle.
-
-
-msg_next_id, sem_next_id, and shm_next_id:
-==========================================
-
-These three toggles allows to specify desired id for next allocated IPC
-object: message, semaphore or shared memory respectively.
-
-By default they are equal to -1, which means generic allocation logic.
-Possible values to set are in range {0..INT_MAX}.
-
-Notes:
-  1) kernel doesn't guarantee, that new object will have desired id. So,
-     it's up to userspace, how to handle an object with "wrong" id.
-  2) Toggle with non-default value will be set back to -1 by kernel after
-     successful IPC object allocation. If an IPC object allocation syscall
-     fails, it is undefined if the value remains unmodified or is reset to -1.
-
-
-nmi_watchdog:
-=============
-
-This parameter can be used to control the NMI watchdog
-(i.e. the hard lockup detector) on x86 systems.
-
-0 - disable the hard lockup detector
-
-1 - enable the hard lockup detector
-
-The hard lockup detector monitors each CPU for its ability to respond to
-timer interrupts. The mechanism utilizes CPU performance counter registers
-that are programmed to generate Non-Maskable Interrupts (NMIs) periodically
-while a CPU is busy. Hence, the alternative name 'NMI watchdog'.
-
-The NMI watchdog is disabled by default if the kernel is running as a guest
-in a KVM virtual machine. This default can be overridden by adding::
-
-   nmi_watchdog=1
-
-to the guest kernel command line (see Documentation/admin-guide/kernel-parameters.rst).
-
-
-numa_balancing:
-===============
-
-Enables/disables automatic page fault based NUMA memory
-balancing. Memory is moved automatically to nodes
-that access it often.
-
-Enables/disables automatic NUMA memory balancing. On NUMA machines, there
-is a performance penalty if remote memory is accessed by a CPU. When this
-feature is enabled the kernel samples what task thread is accessing memory
-by periodically unmapping pages and later trapping a page fault. At the
-time of the page fault, it is determined if the data being accessed should
-be migrated to a local memory node.
-
-The unmapping of pages and trapping faults incur additional overhead that
-ideally is offset by improved memory locality but there is no universal
-guarantee. If the target workload is already bound to NUMA nodes then this
-feature should be disabled. Otherwise, if the system overhead from the
-feature is too high then the rate the kernel samples for NUMA hinting
-faults may be controlled by the numa_balancing_scan_period_min_ms,
-numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
-numa_balancing_scan_size_mb, and numa_balancing_settle_count sysctls.
-
-numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
-===============================================================================================================================
-
-
-Automatic NUMA balancing scans tasks address space and unmaps pages to
-detect if pages are properly placed or if the data should be migrated to a
-memory node local to where the task is running.  Every "scan delay" the task
-scans the next "scan size" number of pages in its address space. When the
-end of the address space is reached the scanner restarts from the beginning.
-
-In combination, the "scan delay" and "scan size" determine the scan rate.
-When "scan delay" decreases, the scan rate increases.  The scan delay and
-hence the scan rate of every task is adaptive and depends on historical
-behaviour. If pages are properly placed then the scan delay increases,
-otherwise the scan delay decreases.  The "scan size" is not adaptive but
-the higher the "scan size", the higher the scan rate.
-
-Higher scan rates incur higher system overhead as page faults must be
-trapped and potentially data must be migrated. However, the higher the scan
-rate, the more quickly a tasks memory is migrated to a local node if the
-workload pattern changes and minimises performance impact due to remote
-memory accesses. These sysctls control the thresholds for scan delays and
-the number of pages scanned.
-
-numa_balancing_scan_period_min_ms is the minimum time in milliseconds to
-scan a tasks virtual memory. It effectively controls the maximum scanning
-rate for each task.
-
-numa_balancing_scan_delay_ms is the starting "scan delay" used for a task
-when it initially forks.
-
-numa_balancing_scan_period_max_ms is the maximum time in milliseconds to
-scan a tasks virtual memory. It effectively controls the minimum scanning
-rate for each task.
-
-numa_balancing_scan_size_mb is how many megabytes worth of pages are
-scanned for a given scan.
-
-
-osrelease, ostype & version:
-============================
-
-::
-
-  # cat osrelease
-  2.1.88
-  # cat ostype
-  Linux
-  # cat version
-  #5 Wed Feb 25 21:49:24 MET 1998
-
-The files osrelease and ostype should be clear enough. Version
-needs a little more clarification however. The '#5' means that
-this is the fifth kernel built from this source base and the
-date behind it indicates the time the kernel was built.
-The only way to tune these values is to rebuild the kernel :-)
-
-
-overflowgid & overflowuid:
-==========================
-
-if your architecture did not always support 32-bit UIDs (i.e. arm,
-i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to
-applications that use the old 16-bit UID/GID system calls, if the
-actual UID or GID would exceed 65535.
-
-These sysctls allow you to change the value of the fixed UID and GID.
-The default is 65534.
-
-
-panic:
-======
-
-The value in this file represents the number of seconds the kernel
-waits before rebooting on a panic. When you use the software watchdog,
-the recommended setting is 60.
-
-
-panic_on_io_nmi:
-================
-
-Controls the kernel's behavior when a CPU receives an NMI caused by
-an IO error.
-
-0: try to continue operation (default)
-
-1: panic immediately. The IO error triggered an NMI. This indicates a
-   serious system condition which could result in IO data corruption.
-   Rather than continuing, panicking might be a better choice. Some
-   servers issue this sort of NMI when the dump button is pushed,
-   and you can use this option to take a crash dump.
-
-
-panic_on_oops:
-==============
-
-Controls the kernel's behaviour when an oops or BUG is encountered.
-
-0: try to continue operation
-
-1: panic immediately.  If the `panic` sysctl is also non-zero then the
-   machine will be rebooted.
-
-
-panic_on_stackoverflow:
-=======================
-
-Controls the kernel's behavior when detecting the overflows of
-kernel, IRQ and exception stacks except a user stack.
-This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
-
-0: try to continue operation.
-
-1: panic immediately.
-
-
-panic_on_unrecovered_nmi:
-=========================
-
-The default Linux behaviour on an NMI of either memory or unknown is
-to continue operation. For many environments such as scientific
-computing it is preferable that the box is taken out and the error
-dealt with than an uncorrected parity/ECC error get propagated.
-
-A small number of systems do generate NMI's for bizarre random reasons
-such as power management so the default is off. That sysctl works like
-the existing panic controls already in that directory.
-
-
-panic_on_warn:
-==============
-
-Calls panic() in the WARN() path when set to 1.  This is useful to avoid
-a kernel rebuild when attempting to kdump at the location of a WARN().
-
-0: only WARN(), default behaviour.
-
-1: call panic() after printing out WARN() location.
-
-
-panic_print:
-============
-
-Bitmask for printing system info when panic happens. User can chose
-combination of the following bits:
-
-=====  ========================================
-bit 0  print all tasks info
-bit 1  print system memory info
-bit 2  print timer info
-bit 3  print locks info if CONFIG_LOCKDEP is on
-bit 4  print ftrace buffer
-=====  ========================================
-
-So for example to print tasks and memory info on panic, user can::
-
-  echo 3 > /proc/sys/kernel/panic_print
-
-
-panic_on_rcu_stall:
-===================
-
-When set to 1, calls panic() after RCU stall detection messages. This
-is useful to define the root cause of RCU stalls using a vmcore.
-
-0: do not panic() when RCU stall takes place, default behavior.
-
-1: panic() after printing RCU stall messages.
-
-
-perf_cpu_time_max_percent:
-==========================
-
-Hints to the kernel how much CPU time it should be allowed to
-use to handle perf sampling events.  If the perf subsystem
-is informed that its samples are exceeding this limit, it
-will drop its sampling frequency to attempt to reduce its CPU
-usage.
-
-Some perf sampling happens in NMIs.  If these samples
-unexpectedly take too long to execute, the NMIs can become
-stacked up next to each other so much that nothing else is
-allowed to execute.
-
-0:
-   disable the mechanism.  Do not monitor or correct perf's
-   sampling rate no matter how CPU time it takes.
-
-1-100:
-   attempt to throttle perf's sample rate to this
-   percentage of CPU.  Note: the kernel calculates an
-   "expected" length of each sample event.  100 here means
-   100% of that expected length.  Even if this is set to
-   100, you may still see sample throttling if this
-   length is exceeded.  Set to 0 if you truly do not care
-   how much CPU is consumed.
-
-
-perf_event_paranoid:
-====================
-
-Controls use of the performance events system by unprivileged
-users (without CAP_SYS_ADMIN).  The default value is 2.
-
-===  ==================================================================
- -1  Allow use of (almost) all events by all users
-
-     Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
-
->=0  Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN
-
-     Disallow raw tracepoint access by users without CAP_SYS_ADMIN
-
->=1  Disallow CPU event access by users without CAP_SYS_ADMIN
-
->=2  Disallow kernel profiling by users without CAP_SYS_ADMIN
-===  ==================================================================
-
-
-perf_event_max_stack:
-=====================
-
-Controls maximum number of stack frames to copy for (attr.sample_type &
-PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
-'perf record -g' or 'perf trace --call-graph fp'.
-
-This can only be done when no events are in use that have callchains
-enabled, otherwise writing to this file will return -EBUSY.
-
-The default value is 127.
-
-
-perf_event_mlock_kb:
-====================
-
-Control size of per-cpu ring buffer not counted agains mlock limit.
-
-The default value is 512 + 1 page
-
-
-perf_event_max_contexts_per_stack:
-==================================
-
-Controls maximum number of stack frame context entries for
-(attr.sample_type & PERF_SAMPLE_CALLCHAIN) configured events, for
-instance, when using 'perf record -g' or 'perf trace --call-graph fp'.
-
-This can only be done when no events are in use that have callchains
-enabled, otherwise writing to this file will return -EBUSY.
-
-The default value is 8.
-
-
-pid_max:
-========
-
-PID allocation wrap value.  When the kernel's next PID value
-reaches this value, it wraps back to a minimum PID value.
-PIDs of value pid_max or larger are not allocated.
-
-
-ns_last_pid:
-============
-
-The last pid allocated in the current (the one task using this sysctl
-lives in) pid namespace. When selecting a pid for a next task on fork
-kernel tries to allocate a number starting from this one.
-
-
-powersave-nap: (PPC only)
-=========================
-
-If set, Linux-PPC will use the 'nap' mode of powersaving,
-otherwise the 'doze' mode will be used.
-
-==============================================================
-
-printk:
-=======
-
-The four values in printk denote: console_loglevel,
-default_message_loglevel, minimum_console_loglevel and
-default_console_loglevel respectively.
-
-These values influence printk() behavior when printing or
-logging error messages. See 'man 2 syslog' for more info on
-the different loglevels.
-
-- console_loglevel:
-	messages with a higher priority than
-	this will be printed to the console
-- default_message_loglevel:
-	messages without an explicit priority
-	will be printed with this priority
-- minimum_console_loglevel:
-	minimum (highest) value to which
-	console_loglevel can be set
-- default_console_loglevel:
-	default value for console_loglevel
-
-
-printk_delay:
-=============
-
-Delay each printk message in printk_delay milliseconds
-
-Value from 0 - 10000 is allowed.
-
-
-printk_ratelimit:
-=================
-
-Some warning messages are rate limited. printk_ratelimit specifies
-the minimum length of time between these messages (in jiffies), by
-default we allow one every 5 seconds.
-
-A value of 0 will disable rate limiting.
-
-
-printk_ratelimit_burst:
-=======================
-
-While long term we enforce one message per printk_ratelimit
-seconds, we do allow a burst of messages to pass through.
-printk_ratelimit_burst specifies the number of messages we can
-send before ratelimiting kicks in.
-
-
-printk_devkmsg:
-===============
-
-Control the logging to /dev/kmsg from userspace:
-
-ratelimit:
-	default, ratelimited
-
-on: unlimited logging to /dev/kmsg from userspace
-
-off: logging to /dev/kmsg disabled
-
-The kernel command line parameter printk.devkmsg= overrides this and is
-a one-time setting until next reboot: once set, it cannot be changed by
-this sysctl interface anymore.
-
-
-randomize_va_space:
-===================
-
-This option can be used to select the type of process address
-space randomization that is used in the system, for architectures
-that support this feature.
-
-==  ===========================================================================
-0   Turn the process address space randomization off.  This is the
-    default for architectures that do not support this feature anyways,
-    and kernels that are booted with the "norandmaps" parameter.
-
-1   Make the addresses of mmap base, stack and VDSO page randomized.
-    This, among other things, implies that shared libraries will be
-    loaded to random addresses.  Also for PIE-linked binaries, the
-    location of code start is randomized.  This is the default if the
-    CONFIG_COMPAT_BRK option is enabled.
-
-2   Additionally enable heap randomization.  This is the default if
-    CONFIG_COMPAT_BRK is disabled.
-
-    There are a few legacy applications out there (such as some ancient
-    versions of libc.so.5 from 1996) that assume that brk area starts
-    just after the end of the code+bss.  These applications break when
-    start of the brk area is randomized.  There are however no known
-    non-legacy applications that would be broken this way, so for most
-    systems it is safe to choose full randomization.
-
-    Systems with ancient and/or broken binaries should be configured
-    with CONFIG_COMPAT_BRK enabled, which excludes the heap from process
-    address space randomization.
-==  ===========================================================================
-
-
-reboot-cmd: (Sparc only)
-========================
-
-??? This seems to be a way to give an argument to the Sparc
-ROM/Flash boot loader. Maybe to tell it what to do after
-rebooting. ???
-
-
-rtsig-max & rtsig-nr:
-=====================
-
-The file rtsig-max can be used to tune the maximum number
-of POSIX realtime (queued) signals that can be outstanding
-in the system.
-
-rtsig-nr shows the number of RT signals currently queued.
-
-
-sched_energy_aware:
-===================
-
-Enables/disables Energy Aware Scheduling (EAS). EAS starts
-automatically on platforms where it can run (that is,
-platforms with asymmetric CPU topologies and having an Energy
-Model available). If your platform happens to meet the
-requirements for EAS but you do not want to use it, change
-this value to 0.
-
-
-sched_schedstats:
-=================
-
-Enables/disables scheduler statistics. Enabling this feature
-incurs a small amount of overhead in the scheduler but is
-useful for debugging and performance tuning.
-
-
-sg-big-buff:
-============
-
-This file shows the size of the generic SCSI (sg) buffer.
-You can't tune it just yet, but you could change it on
-compile time by editing include/scsi/sg.h and changing
-the value of SG_BIG_BUFF.
-
-There shouldn't be any reason to change this value. If
-you can come up with one, you probably know what you
-are doing anyway :)
-
-
-shmall:
-=======
-
-This parameter sets the total amount of shared memory pages that
-can be used system wide. Hence, SHMALL should always be at least
-ceil(shmmax/PAGE_SIZE).
-
-If you are not sure what the default PAGE_SIZE is on your Linux
-system, you can run the following command:
-
-	# getconf PAGE_SIZE
-
-
-shmmax:
-=======
-
-This value can be used to query and set the run time limit
-on the maximum shared memory segment size that can be created.
-Shared memory segments up to 1Gb are now supported in the
-kernel.  This value defaults to SHMMAX.
-
-
-shm_rmid_forced:
-================
-
-Linux lets you set resource limits, including how much memory one
-process can consume, via setrlimit(2).  Unfortunately, shared memory
-segments are allowed to exist without association with any process, and
-thus might not be counted against any resource limits.  If enabled,
-shared memory segments are automatically destroyed when their attach
-count becomes zero after a detach or a process termination.  It will
-also destroy segments that were created, but never attached to, on exit
-from the process.  The only use left for IPC_RMID is to immediately
-destroy an unattached segment.  Of course, this breaks the way things are
-defined, so some applications might stop working.  Note that this
-feature will do you no good unless you also configure your resource
-limits (in particular, RLIMIT_AS and RLIMIT_NPROC).  Most systems don't
-need this.
-
-Note that if you change this from 0 to 1, already created segments
-without users and with a dead originative process will be destroyed.
-
-
-sysctl_writes_strict:
-=====================
-
-Control how file position affects the behavior of updating sysctl values
-via the /proc/sys interface:
-
-  ==   ======================================================================
-  -1   Legacy per-write sysctl value handling, with no printk warnings.
-       Each write syscall must fully contain the sysctl value to be
-       written, and multiple writes on the same sysctl file descriptor
-       will rewrite the sysctl value, regardless of file position.
-   0   Same behavior as above, but warn about processes that perform writes
-       to a sysctl file descriptor when the file position is not 0.
-   1   (default) Respect file position when writing sysctl strings. Multiple
-       writes will append to the sysctl value buffer. Anything past the max
-       length of the sysctl value buffer will be ignored. Writes to numeric
-       sysctl entries must always be at file position 0 and the value must
-       be fully contained in the buffer sent in the write syscall.
-  ==   ======================================================================
-
-
-softlockup_all_cpu_backtrace:
-=============================
-
-This value controls the soft lockup detector thread's behavior
-when a soft lockup condition is detected as to whether or not
-to gather further debug information. If enabled, each cpu will
-be issued an NMI and instructed to capture stack trace.
-
-This feature is only applicable for architectures which support
-NMI.
-
-0: do nothing. This is the default behavior.
-
-1: on detection capture more debug information.
-
-
-soft_watchdog:
-==============
-
-This parameter can be used to control the soft lockup detector.
-
-   0 - disable the soft lockup detector
-
-   1 - enable the soft lockup detector
-
-The soft lockup detector monitors CPUs for threads that are hogging the CPUs
-without rescheduling voluntarily, and thus prevent the 'watchdog/N' threads
-from running. The mechanism depends on the CPUs ability to respond to timer
-interrupts which are needed for the 'watchdog/N' threads to be woken up by
-the watchdog timer function, otherwise the NMI watchdog - if enabled - can
-detect a hard lockup condition.
-
-
-stack_erasing:
-==============
-
-This parameter can be used to control kernel stack erasing at the end
-of syscalls for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK.
-
-That erasing reduces the information which kernel stack leak bugs
-can reveal and blocks some uninitialized stack variable attacks.
-The tradeoff is the performance impact: on a single CPU system kernel
-compilation sees a 1% slowdown, other systems and workloads may vary.
-
-  0: kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
-
-  1: kernel stack erasing is enabled (default), it is performed before
-     returning to the userspace at the end of syscalls.
-
-
-tainted
-=======
-
-Non-zero if the kernel has been tainted. Numeric values, which can be
-ORed together. The letters are seen in "Tainted" line of Oops reports.
-
-======  =====  ==============================================================
-     1  `(P)`  proprietary module was loaded
-     2  `(F)`  module was force loaded
-     4  `(S)`  SMP kernel oops on an officially SMP incapable processor
-     8  `(R)`  module was force unloaded
-    16  `(M)`  processor reported a Machine Check Exception (MCE)
-    32  `(B)`  bad page referenced or some unexpected page flags
-    64  `(U)`  taint requested by userspace application
-   128  `(D)`  kernel died recently, i.e. there was an OOPS or BUG
-   256  `(A)`  an ACPI table was overridden by user
-   512  `(W)`  kernel issued warning
-  1024  `(C)`  staging driver was loaded
-  2048  `(I)`  workaround for bug in platform firmware applied
-  4096  `(O)`  externally-built ("out-of-tree") module was loaded
-  8192  `(E)`  unsigned module was loaded
- 16384  `(L)`  soft lockup occurred
- 32768  `(K)`  kernel has been live patched
- 65536  `(X)`  Auxiliary taint, defined and used by for distros
-131072  `(T)`  The kernel was built with the struct randomization plugin
-======  =====  ==============================================================
-
-See Documentation/admin-guide/tainted-kernels.rst for more information.
-
-
-threads-max:
-============
-
-This value controls the maximum number of threads that can be created
-using fork().
-
-During initialization the kernel sets this value such that even if the
-maximum number of threads is created, the thread structures occupy only
-a part (1/8th) of the available RAM pages.
-
-The minimum value that can be written to threads-max is 20.
-
-The maximum value that can be written to threads-max is given by the
-constant FUTEX_TID_MASK (0x3fffffff).
-
-If a value outside of this range is written to threads-max an error
-EINVAL occurs.
-
-The value written is checked against the available RAM pages. If the
-thread structures would occupy too much (more than 1/8th) of the
-available RAM pages threads-max is reduced accordingly.
-
-
-unknown_nmi_panic:
-==================
-
-The value in this file affects behavior of handling NMI. When the
-value is non-zero, unknown NMI is trapped and then panic occurs. At
-that time, kernel debugging information is displayed on console.
-
-NMI switch that most IA32 servers have fires unknown NMI up, for
-example.  If a system hangs up, try pressing the NMI switch.
-
-
-watchdog:
-=========
-
-This parameter can be used to disable or enable the soft lockup detector
-_and_ the NMI watchdog (i.e. the hard lockup detector) at the same time.
-
-   0 - disable both lockup detectors
-
-   1 - enable both lockup detectors
-
-The soft lockup detector and the NMI watchdog can also be disabled or
-enabled individually, using the soft_watchdog and nmi_watchdog parameters.
-If the watchdog parameter is read, for example by executing::
-
-   cat /proc/sys/kernel/watchdog
-
-the output of this command (0 or 1) shows the logical OR of soft_watchdog
-and nmi_watchdog.
-
-
-watchdog_cpumask:
-=================
-
-This value can be used to control on which cpus the watchdog may run.
-The default cpumask is all possible cores, but if NO_HZ_FULL is
-enabled in the kernel config, and cores are specified with the
-nohz_full= boot argument, those cores are excluded by default.
-Offline cores can be included in this mask, and if the core is later
-brought online, the watchdog will be started based on the mask value.
-
-Typically this value would only be touched in the nohz_full case
-to re-enable cores that by default were not running the watchdog,
-if a kernel lockup was suspected on those cores.
-
-The argument value is the standard cpulist format for cpumasks,
-so for example to enable the watchdog on cores 0, 2, 3, and 4 you
-might say::
-
-  echo 0,2-4 > /proc/sys/kernel/watchdog_cpumask
-
-
-watchdog_thresh:
-================
-
-This value can be used to control the frequency of hrtimer and NMI
-events and the soft and hard lockup thresholds. The default threshold
-is 10 seconds.
-
-The softlockup threshold is (2 * watchdog_thresh). Setting this
-tunable to zero will disable lockup detection altogether.
diff --git a/Documentation/sysctl/net.rst b/Documentation/sysctl/net.rst
deleted file mode 100644
index a7d44e71019d..000000000000
--- a/Documentation/sysctl/net.rst
+++ /dev/null
@@ -1,461 +0,0 @@
-================================
-Documentation for /proc/sys/net/
-================================
-
-Copyright
-
-Copyright (c) 1999
-
-	- Terrehon Bowden <terrehon@pacbell.net>
-	- Bodo Bauer <bb@ricochet.net>
-
-Copyright (c) 2000
-
-	- Jorge Nerin <comandante@zaralinux.com>
-
-Copyright (c) 2009
-
-	- Shen Feng <shen@cn.fujitsu.com>
-
-For general info and legal blurb, please look in index.rst.
-
-------------------------------------------------------------------------------
-
-This file contains the documentation for the sysctl files in
-/proc/sys/net
-
-The interface  to  the  networking  parts  of  the  kernel  is  located  in
-/proc/sys/net. The following table shows all possible subdirectories.  You may
-see only some of them, depending on your kernel's configuration.
-
-
-Table : Subdirectories in /proc/sys/net
-
- ========= =================== = ========== ==================
- Directory Content               Directory  Content
- ========= =================== = ========== ==================
- core      General parameter     appletalk  Appletalk protocol
- unix      Unix domain sockets   netrom     NET/ROM
- 802       E802 protocol         ax25       AX25
- ethernet  Ethernet protocol     rose       X.25 PLP layer
- ipv4      IP version 4          x25        X.25 protocol
- ipx       IPX                   token-ring IBM token ring
- bridge    Bridging              decnet     DEC net
- ipv6      IP version 6          tipc       TIPC
- ========= =================== = ========== ==================
-
-1. /proc/sys/net/core - Network core options
-============================================
-
-bpf_jit_enable
---------------
-
-This enables the BPF Just in Time (JIT) compiler. BPF is a flexible
-and efficient infrastructure allowing to execute bytecode at various
-hook points. It is used in a number of Linux kernel subsystems such
-as networking (e.g. XDP, tc), tracing (e.g. kprobes, uprobes, tracepoints)
-and security (e.g. seccomp). LLVM has a BPF back end that can compile
-restricted C into a sequence of BPF instructions. After program load
-through bpf(2) and passing a verifier in the kernel, a JIT will then
-translate these BPF proglets into native CPU instructions. There are
-two flavors of JITs, the newer eBPF JIT currently supported on:
-
-  - x86_64
-  - x86_32
-  - arm64
-  - arm32
-  - ppc64
-  - sparc64
-  - mips64
-  - s390x
-  - riscv
-
-And the older cBPF JIT supported on the following archs:
-
-  - mips
-  - ppc
-  - sparc
-
-eBPF JITs are a superset of cBPF JITs, meaning the kernel will
-migrate cBPF instructions into eBPF instructions and then JIT
-compile them transparently. Older cBPF JITs can only translate
-tcpdump filters, seccomp rules, etc, but not mentioned eBPF
-programs loaded through bpf(2).
-
-Values:
-
-	- 0 - disable the JIT (default value)
-	- 1 - enable the JIT
-	- 2 - enable the JIT and ask the compiler to emit traces on kernel log.
-
-bpf_jit_harden
---------------
-
-This enables hardening for the BPF JIT compiler. Supported are eBPF
-JIT backends. Enabling hardening trades off performance, but can
-mitigate JIT spraying.
-
-Values:
-
-	- 0 - disable JIT hardening (default value)
-	- 1 - enable JIT hardening for unprivileged users only
-	- 2 - enable JIT hardening for all users
-
-bpf_jit_kallsyms
-----------------
-
-When BPF JIT compiler is enabled, then compiled images are unknown
-addresses to the kernel, meaning they neither show up in traces nor
-in /proc/kallsyms. This enables export of these addresses, which can
-be used for debugging/tracing. If bpf_jit_harden is enabled, this
-feature is disabled.
-
-Values :
-
-	- 0 - disable JIT kallsyms export (default value)
-	- 1 - enable JIT kallsyms export for privileged users only
-
-bpf_jit_limit
--------------
-
-This enforces a global limit for memory allocations to the BPF JIT
-compiler in order to reject unprivileged JIT requests once it has
-been surpassed. bpf_jit_limit contains the value of the global limit
-in bytes.
-
-dev_weight
-----------
-
-The maximum number of packets that kernel can handle on a NAPI interrupt,
-it's a Per-CPU variable. For drivers that support LRO or GRO_HW, a hardware
-aggregated packet is counted as one packet in this context.
-
-Default: 64
-
-dev_weight_rx_bias
-------------------
-
-RPS (e.g. RFS, aRFS) processing is competing with the registered NAPI poll function
-of the driver for the per softirq cycle netdev_budget. This parameter influences
-the proportion of the configured netdev_budget that is spent on RPS based packet
-processing during RX softirq cycles. It is further meant for making current
-dev_weight adaptable for asymmetric CPU needs on RX/TX side of the network stack.
-(see dev_weight_tx_bias) It is effective on a per CPU basis. Determination is based
-on dev_weight and is calculated multiplicative (dev_weight * dev_weight_rx_bias).
-
-Default: 1
-
-dev_weight_tx_bias
-------------------
-
-Scales the maximum number of packets that can be processed during a TX softirq cycle.
-Effective on a per CPU basis. Allows scaling of current dev_weight for asymmetric
-net stack processing needs. Be careful to avoid making TX softirq processing a CPU hog.
-
-Calculation is based on dev_weight (dev_weight * dev_weight_tx_bias).
-
-Default: 1
-
-default_qdisc
--------------
-
-The default queuing discipline to use for network devices. This allows
-overriding the default of pfifo_fast with an alternative. Since the default
-queuing discipline is created without additional parameters so is best suited
-to queuing disciplines that work well without configuration like stochastic
-fair queue (sfq), CoDel (codel) or fair queue CoDel (fq_codel). Don't use
-queuing disciplines like Hierarchical Token Bucket or Deficit Round Robin
-which require setting up classes and bandwidths. Note that physical multiqueue
-interfaces still use mq as root qdisc, which in turn uses this default for its
-leaves. Virtual devices (like e.g. lo or veth) ignore this setting and instead
-default to noqueue.
-
-Default: pfifo_fast
-
-busy_read
----------
-
-Low latency busy poll timeout for socket reads. (needs CONFIG_NET_RX_BUSY_POLL)
-Approximate time in us to busy loop waiting for packets on the device queue.
-This sets the default value of the SO_BUSY_POLL socket option.
-Can be set or overridden per socket by setting socket option SO_BUSY_POLL,
-which is the preferred method of enabling. If you need to enable the feature
-globally via sysctl, a value of 50 is recommended.
-
-Will increase power usage.
-
-Default: 0 (off)
-
-busy_poll
-----------------
-Low latency busy poll timeout for poll and select. (needs CONFIG_NET_RX_BUSY_POLL)
-Approximate time in us to busy loop waiting for events.
-Recommended value depends on the number of sockets you poll on.
-For several sockets 50, for several hundreds 100.
-For more than that you probably want to use epoll.
-Note that only sockets with SO_BUSY_POLL set will be busy polled,
-so you want to either selectively set SO_BUSY_POLL on those sockets or set
-sysctl.net.busy_read globally.
-
-Will increase power usage.
-
-Default: 0 (off)
-
-rmem_default
-------------
-
-The default setting of the socket receive buffer in bytes.
-
-rmem_max
---------
-
-The maximum receive socket buffer size in bytes.
-
-tstamp_allow_data
------------------
-Allow processes to receive tx timestamps looped together with the original
-packet contents. If disabled, transmit timestamp requests from unprivileged
-processes are dropped unless socket option SOF_TIMESTAMPING_OPT_TSONLY is set.
-
-Default: 1 (on)
-
-
-wmem_default
-------------
-
-The default setting (in bytes) of the socket send buffer.
-
-wmem_max
---------
-
-The maximum send socket buffer size in bytes.
-
-message_burst and message_cost
-------------------------------
-
-These parameters  are used to limit the warning messages written to the kernel
-log from  the  networking  code.  They  enforce  a  rate  limit  to  make  a
-denial-of-service attack  impossible. A higher message_cost factor, results in
-fewer messages that will be written. Message_burst controls when messages will
-be dropped.  The  default  settings  limit  warning messages to one every five
-seconds.
-
-warnings
---------
-
-This sysctl is now unused.
-
-This was used to control console messages from the networking stack that
-occur because of problems on the network like duplicate address or bad
-checksums.
-
-These messages are now emitted at KERN_DEBUG and can generally be enabled
-and controlled by the dynamic_debug facility.
-
-netdev_budget
--------------
-
-Maximum number of packets taken from all interfaces in one polling cycle (NAPI
-poll). In one polling cycle interfaces which are registered to polling are
-probed in a round-robin manner. Also, a polling cycle may not exceed
-netdev_budget_usecs microseconds, even if netdev_budget has not been
-exhausted.
-
-netdev_budget_usecs
----------------------
-
-Maximum number of microseconds in one NAPI polling cycle. Polling
-will exit when either netdev_budget_usecs have elapsed during the
-poll cycle or the number of packets processed reaches netdev_budget.
-
-netdev_max_backlog
-------------------
-
-Maximum number  of  packets,  queued  on  the  INPUT  side, when the interface
-receives packets faster than kernel can process them.
-
-netdev_rss_key
---------------
-
-RSS (Receive Side Scaling) enabled drivers use a 40 bytes host key that is
-randomly generated.
-Some user space might need to gather its content even if drivers do not
-provide ethtool -x support yet.
-
-::
-
-  myhost:~# cat /proc/sys/net/core/netdev_rss_key
-  84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8: ... (52 bytes total)
-
-File contains nul bytes if no driver ever called netdev_rss_key_fill() function.
-
-Note:
-  /proc/sys/net/core/netdev_rss_key contains 52 bytes of key,
-  but most drivers only use 40 bytes of it.
-
-::
-
-  myhost:~# ethtool -x eth0
-  RX flow hash indirection table for eth0 with 8 RX ring(s):
-      0:    0     1     2     3     4     5     6     7
-  RSS hash key:
-  84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8:43:e3:c9:0c:fd:17:55:c2:3a:4d:69:ed:f1:42:89
-
-netdev_tstamp_prequeue
-----------------------
-
-If set to 0, RX packet timestamps can be sampled after RPS processing, when
-the target CPU processes packets. It might give some delay on timestamps, but
-permit to distribute the load on several cpus.
-
-If set to 1 (default), timestamps are sampled as soon as possible, before
-queueing.
-
-optmem_max
-----------
-
-Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
-of struct cmsghdr structures with appended data.
-
-fb_tunnels_only_for_init_net
-----------------------------
-
-Controls if fallback tunnels (like tunl0, gre0, gretap0, erspan0,
-sit0, ip6tnl0, ip6gre0) are automatically created when a new
-network namespace is created, if corresponding tunnel is present
-in initial network namespace.
-If set to 1, these devices are not automatically created, and
-user space is responsible for creating them if needed.
-
-Default : 0  (for compatibility reasons)
-
-devconf_inherit_init_net
-------------------------
-
-Controls if a new network namespace should inherit all current
-settings under /proc/sys/net/{ipv4,ipv6}/conf/{all,default}/. By
-default, we keep the current behavior: for IPv4 we inherit all current
-settings from init_net and for IPv6 we reset all settings to default.
-
-If set to 1, both IPv4 and IPv6 settings are forced to inherit from
-current ones in init_net. If set to 2, both IPv4 and IPv6 settings are
-forced to reset to their default values.
-
-Default : 0  (for compatibility reasons)
-
-2. /proc/sys/net/unix - Parameters for Unix domain sockets
-----------------------------------------------------------
-
-There is only one file in this directory.
-unix_dgram_qlen limits the max number of datagrams queued in Unix domain
-socket's buffer. It will not take effect unless PF_UNIX flag is specified.
-
-
-3. /proc/sys/net/ipv4 - IPV4 settings
--------------------------------------
-Please see: Documentation/networking/ip-sysctl.txt and ipvs-sysctl.txt for
-descriptions of these entries.
-
-
-4. Appletalk
-------------
-
-The /proc/sys/net/appletalk  directory  holds the Appletalk configuration data
-when Appletalk is loaded. The configurable parameters are:
-
-aarp-expiry-time
-----------------
-
-The amount  of  time  we keep an ARP entry before expiring it. Used to age out
-old hosts.
-
-aarp-resolve-time
------------------
-
-The amount of time we will spend trying to resolve an Appletalk address.
-
-aarp-retransmit-limit
----------------------
-
-The number of times we will retransmit a query before giving up.
-
-aarp-tick-time
---------------
-
-Controls the rate at which expires are checked.
-
-The directory  /proc/net/appletalk  holds the list of active Appletalk sockets
-on a machine.
-
-The fields  indicate  the DDP type, the local address (in network:node format)
-the remote  address,  the  size of the transmit pending queue, the size of the
-received queue  (bytes waiting for applications to read) the state and the uid
-owning the socket.
-
-/proc/net/atalk_iface lists  all  the  interfaces  configured for appletalk.It
-shows the  name  of the interface, its Appletalk address, the network range on
-that address  (or  network number for phase 1 networks), and the status of the
-interface.
-
-/proc/net/atalk_route lists  each  known  network  route.  It lists the target
-(network) that the route leads to, the router (may be directly connected), the
-route flags, and the device the route is using.
-
-
-5. IPX
-------
-
-The IPX protocol has no tunable values in proc/sys/net.
-
-The IPX  protocol  does,  however,  provide  proc/net/ipx. This lists each IPX
-socket giving  the  local  and  remote  addresses  in  Novell  format (that is
-network:node:port). In  accordance  with  the  strange  Novell  tradition,
-everything but the port is in hex. Not_Connected is displayed for sockets that
-are not  tied to a specific remote address. The Tx and Rx queue sizes indicate
-the number  of  bytes  pending  for  transmission  and  reception.  The  state
-indicates the  state  the  socket  is  in and the uid is the owning uid of the
-socket.
-
-The /proc/net/ipx_interface  file lists all IPX interfaces. For each interface
-it gives  the network number, the node number, and indicates if the network is
-the primary  network.  It  also  indicates  which  device  it  is bound to (or
-Internal for  internal  networks)  and  the  Frame  Type if appropriate. Linux
-supports 802.3,  802.2,  802.2  SNAP  and DIX (Blue Book) ethernet framing for
-IPX.
-
-The /proc/net/ipx_route  table  holds  a list of IPX routes. For each route it
-gives the  destination  network, the router node (or Directly) and the network
-address of the router (or Connected) for internal networks.
-
-6. TIPC
--------
-
-tipc_rmem
----------
-
-The TIPC protocol now has a tunable for the receive memory, similar to the
-tcp_rmem - i.e. a vector of 3 INTEGERs: (min, default, max)
-
-::
-
-    # cat /proc/sys/net/tipc/tipc_rmem
-    4252725 34021800        68043600
-    #
-
-The max value is set to CONN_OVERLOAD_LIMIT, and the default and min values
-are scaled (shifted) versions of that same value.  Note that the min value
-is not at this point in time used in any meaningful way, but the triplet is
-preserved in order to be consistent with things like tcp_rmem.
-
-named_timeout
--------------
-
-TIPC name table updates are distributed asynchronously in a cluster, without
-any form of transaction handling. This means that different race scenarios are
-possible. One such is that a name withdrawal sent out by one node and received
-by another node may arrive after a second, overlapping name publication already
-has been accepted from a third node, although the conflicting updates
-originally may have been issued in the correct sequential order.
-If named_timeout is nonzero, failed topology updates will be placed on a defer
-queue until another event arrives that clears the error, or until the timeout
-expires. Value is in milliseconds.
diff --git a/Documentation/sysctl/sunrpc.rst b/Documentation/sysctl/sunrpc.rst
deleted file mode 100644
index 09780a682afd..000000000000
--- a/Documentation/sysctl/sunrpc.rst
+++ /dev/null
@@ -1,25 +0,0 @@
-===================================
-Documentation for /proc/sys/sunrpc/
-===================================
-
-kernel version 2.2.10
-
-Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
-
-For general info and legal blurb, please look in index.rst.
-
-------------------------------------------------------------------------------
-
-This file contains the documentation for the sysctl files in
-/proc/sys/sunrpc and is valid for Linux kernel version 2.2.
-
-The files in this directory can be used to (re)set the debug
-flags of the SUN Remote Procedure Call (RPC) subsystem in
-the Linux kernel. This stuff is used for NFS, KNFSD and
-maybe a few other things as well.
-
-The files in there are used to control the debugging flags:
-rpc_debug, nfs_debug, nfsd_debug and nlm_debug.
-
-These flags are for kernel hackers only. You should read the
-source code in net/sunrpc/ for more information.
diff --git a/Documentation/sysctl/user.rst b/Documentation/sysctl/user.rst
deleted file mode 100644
index 650eaa03f15e..000000000000
--- a/Documentation/sysctl/user.rst
+++ /dev/null
@@ -1,78 +0,0 @@
-=================================
-Documentation for /proc/sys/user/
-=================================
-
-kernel version 4.9.0
-
-Copyright (c) 2016		Eric Biederman <ebiederm@xmission.com>
-
-------------------------------------------------------------------------------
-
-This file contains the documentation for the sysctl files in
-/proc/sys/user.
-
-The files in this directory can be used to override the default
-limits on the number of namespaces and other objects that have
-per user per user namespace limits.
-
-The primary purpose of these limits is to stop programs that
-malfunction and attempt to create a ridiculous number of objects,
-before the malfunction becomes a system wide problem.  It is the
-intention that the defaults of these limits are set high enough that
-no program in normal operation should run into these limits.
-
-The creation of per user per user namespace objects are charged to
-the user in the user namespace who created the object and
-verified to be below the per user limit in that user namespace.
-
-The creation of objects is also charged to all of the users
-who created user namespaces the creation of the object happens
-in (user namespaces can be nested) and verified to be below the per user
-limits in the user namespaces of those users.
-
-This recursive counting of created objects ensures that creating a
-user namespace does not allow a user to escape their current limits.
-
-Currently, these files are in /proc/sys/user:
-
-max_cgroup_namespaces
-=====================
-
-  The maximum number of cgroup namespaces that any user in the current
-  user namespace may create.
-
-max_ipc_namespaces
-==================
-
-  The maximum number of ipc namespaces that any user in the current
-  user namespace may create.
-
-max_mnt_namespaces
-==================
-
-  The maximum number of mount namespaces that any user in the current
-  user namespace may create.
-
-max_net_namespaces
-==================
-
-  The maximum number of network namespaces that any user in the
-  current user namespace may create.
-
-max_pid_namespaces
-==================
-
-  The maximum number of pid namespaces that any user in the current
-  user namespace may create.
-
-max_user_namespaces
-===================
-
-  The maximum number of user namespaces that any user in the current
-  user namespace may create.
-
-max_uts_namespaces
-==================
-
-  The maximum number of user namespaces that any user in the current
-  user namespace may create.
diff --git a/Documentation/sysctl/vm.rst b/Documentation/sysctl/vm.rst
deleted file mode 100644
index 5aceb5cd5ce7..000000000000
--- a/Documentation/sysctl/vm.rst
+++ /dev/null
@@ -1,964 +0,0 @@
-===============================
-Documentation for /proc/sys/vm/
-===============================
-
-kernel version 2.6.29
-
-Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
-
-Copyright (c) 2008         Peter W. Morreale <pmorreale@novell.com>
-
-For general info and legal blurb, please look in index.rst.
-
-------------------------------------------------------------------------------
-
-This file contains the documentation for the sysctl files in
-/proc/sys/vm and is valid for Linux kernel version 2.6.29.
-
-The files in this directory can be used to tune the operation
-of the virtual memory (VM) subsystem of the Linux kernel and
-the writeout of dirty data to disk.
-
-Default values and initialization routines for most of these
-files can be found in mm/swap.c.
-
-Currently, these files are in /proc/sys/vm:
-
-- admin_reserve_kbytes
-- block_dump
-- compact_memory
-- compact_unevictable_allowed
-- dirty_background_bytes
-- dirty_background_ratio
-- dirty_bytes
-- dirty_expire_centisecs
-- dirty_ratio
-- dirtytime_expire_seconds
-- dirty_writeback_centisecs
-- drop_caches
-- extfrag_threshold
-- hugetlb_shm_group
-- laptop_mode
-- legacy_va_layout
-- lowmem_reserve_ratio
-- max_map_count
-- memory_failure_early_kill
-- memory_failure_recovery
-- min_free_kbytes
-- min_slab_ratio
-- min_unmapped_ratio
-- mmap_min_addr
-- mmap_rnd_bits
-- mmap_rnd_compat_bits
-- nr_hugepages
-- nr_hugepages_mempolicy
-- nr_overcommit_hugepages
-- nr_trim_pages         (only if CONFIG_MMU=n)
-- numa_zonelist_order
-- oom_dump_tasks
-- oom_kill_allocating_task
-- overcommit_kbytes
-- overcommit_memory
-- overcommit_ratio
-- page-cluster
-- panic_on_oom
-- percpu_pagelist_fraction
-- stat_interval
-- stat_refresh
-- numa_stat
-- swappiness
-- unprivileged_userfaultfd
-- user_reserve_kbytes
-- vfs_cache_pressure
-- watermark_boost_factor
-- watermark_scale_factor
-- zone_reclaim_mode
-
-
-admin_reserve_kbytes
-====================
-
-The amount of free memory in the system that should be reserved for users
-with the capability cap_sys_admin.
-
-admin_reserve_kbytes defaults to min(3% of free pages, 8MB)
-
-That should provide enough for the admin to log in and kill a process,
-if necessary, under the default overcommit 'guess' mode.
-
-Systems running under overcommit 'never' should increase this to account
-for the full Virtual Memory Size of programs used to recover. Otherwise,
-root may not be able to log in to recover the system.
-
-How do you calculate a minimum useful reserve?
-
-sshd or login + bash (or some other shell) + top (or ps, kill, etc.)
-
-For overcommit 'guess', we can sum resident set sizes (RSS).
-On x86_64 this is about 8MB.
-
-For overcommit 'never', we can take the max of their virtual sizes (VSZ)
-and add the sum of their RSS.
-On x86_64 this is about 128MB.
-
-Changing this takes effect whenever an application requests memory.
-
-
-block_dump
-==========
-
-block_dump enables block I/O debugging when set to a nonzero value. More
-information on block I/O debugging is in Documentation/laptops/laptop-mode.rst.
-
-
-compact_memory
-==============
-
-Available only when CONFIG_COMPACTION is set. When 1 is written to the file,
-all zones are compacted such that free memory is available in contiguous
-blocks where possible. This can be important for example in the allocation of
-huge pages although processes will also directly compact memory as required.
-
-
-compact_unevictable_allowed
-===========================
-
-Available only when CONFIG_COMPACTION is set. When set to 1, compaction is
-allowed to examine the unevictable lru (mlocked pages) for pages to compact.
-This should be used on systems where stalls for minor page faults are an
-acceptable trade for large contiguous free memory.  Set to 0 to prevent
-compaction from moving pages that are unevictable.  Default value is 1.
-
-
-dirty_background_bytes
-======================
-
-Contains the amount of dirty memory at which the background kernel
-flusher threads will start writeback.
-
-Note:
-  dirty_background_bytes is the counterpart of dirty_background_ratio. Only
-  one of them may be specified at a time. When one sysctl is written it is
-  immediately taken into account to evaluate the dirty memory limits and the
-  other appears as 0 when read.
-
-
-dirty_background_ratio
-======================
-
-Contains, as a percentage of total available memory that contains free pages
-and reclaimable pages, the number of pages at which the background kernel
-flusher threads will start writing out dirty data.
-
-The total available memory is not equal to total system memory.
-
-
-dirty_bytes
-===========
-
-Contains the amount of dirty memory at which a process generating disk writes
-will itself start writeback.
-
-Note: dirty_bytes is the counterpart of dirty_ratio. Only one of them may be
-specified at a time. When one sysctl is written it is immediately taken into
-account to evaluate the dirty memory limits and the other appears as 0 when
-read.
-
-Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any
-value lower than this limit will be ignored and the old configuration will be
-retained.
-
-
-dirty_expire_centisecs
-======================
-
-This tunable is used to define when dirty data is old enough to be eligible
-for writeout by the kernel flusher threads.  It is expressed in 100'ths
-of a second.  Data which has been dirty in-memory for longer than this
-interval will be written out next time a flusher thread wakes up.
-
-
-dirty_ratio
-===========
-
-Contains, as a percentage of total available memory that contains free pages
-and reclaimable pages, the number of pages at which a process which is
-generating disk writes will itself start writing out dirty data.
-
-The total available memory is not equal to total system memory.
-
-
-dirtytime_expire_seconds
-========================
-
-When a lazytime inode is constantly having its pages dirtied, the inode with
-an updated timestamp will never get chance to be written out.  And, if the
-only thing that has happened on the file system is a dirtytime inode caused
-by an atime update, a worker will be scheduled to make sure that inode
-eventually gets pushed out to disk.  This tunable is used to define when dirty
-inode is old enough to be eligible for writeback by the kernel flusher threads.
-And, it is also used as the interval to wakeup dirtytime_writeback thread.
-
-
-dirty_writeback_centisecs
-=========================
-
-The kernel flusher threads will periodically wake up and write `old` data
-out to disk.  This tunable expresses the interval between those wakeups, in
-100'ths of a second.
-
-Setting this to zero disables periodic writeback altogether.
-
-
-drop_caches
-===========
-
-Writing to this will cause the kernel to drop clean caches, as well as
-reclaimable slab objects like dentries and inodes.  Once dropped, their
-memory becomes free.
-
-To free pagecache::
-
-	echo 1 > /proc/sys/vm/drop_caches
-
-To free reclaimable slab objects (includes dentries and inodes)::
-
-	echo 2 > /proc/sys/vm/drop_caches
-
-To free slab objects and pagecache::
-
-	echo 3 > /proc/sys/vm/drop_caches
-
-This is a non-destructive operation and will not free any dirty objects.
-To increase the number of objects freed by this operation, the user may run
-`sync` prior to writing to /proc/sys/vm/drop_caches.  This will minimize the
-number of dirty objects on the system and create more candidates to be
-dropped.
-
-This file is not a means to control the growth of the various kernel caches
-(inodes, dentries, pagecache, etc...)  These objects are automatically
-reclaimed by the kernel when memory is needed elsewhere on the system.
-
-Use of this file can cause performance problems.  Since it discards cached
-objects, it may cost a significant amount of I/O and CPU to recreate the
-dropped objects, especially if they were under heavy use.  Because of this,
-use outside of a testing or debugging environment is not recommended.
-
-You may see informational messages in your kernel log when this file is
-used::
-
-	cat (1234): drop_caches: 3
-
-These are informational only.  They do not mean that anything is wrong
-with your system.  To disable them, echo 4 (bit 2) into drop_caches.
-
-
-extfrag_threshold
-=================
-
-This parameter affects whether the kernel will compact memory or direct
-reclaim to satisfy a high-order allocation. The extfrag/extfrag_index file in
-debugfs shows what the fragmentation index for each order is in each zone in
-the system. Values tending towards 0 imply allocations would fail due to lack
-of memory, values towards 1000 imply failures are due to fragmentation and -1
-implies that the allocation will succeed as long as watermarks are met.
-
-The kernel will not compact memory in a zone if the
-fragmentation index is <= extfrag_threshold. The default value is 500.
-
-
-highmem_is_dirtyable
-====================
-
-Available only for systems with CONFIG_HIGHMEM enabled (32b systems).
-
-This parameter controls whether the high memory is considered for dirty
-writers throttling.  This is not the case by default which means that
-only the amount of memory directly visible/usable by the kernel can
-be dirtied. As a result, on systems with a large amount of memory and
-lowmem basically depleted writers might be throttled too early and
-streaming writes can get very slow.
-
-Changing the value to non zero would allow more memory to be dirtied
-and thus allow writers to write more data which can be flushed to the
-storage more effectively. Note this also comes with a risk of pre-mature
-OOM killer because some writers (e.g. direct block device writes) can
-only use the low memory and they can fill it up with dirty data without
-any throttling.
-
-
-hugetlb_shm_group
-=================
-
-hugetlb_shm_group contains group id that is allowed to create SysV
-shared memory segment using hugetlb page.
-
-
-laptop_mode
-===========
-
-laptop_mode is a knob that controls "laptop mode". All the things that are
-controlled by this knob are discussed in Documentation/laptops/laptop-mode.rst.
-
-
-legacy_va_layout
-================
-
-If non-zero, this sysctl disables the new 32-bit mmap layout - the kernel
-will use the legacy (2.4) layout for all processes.
-
-
-lowmem_reserve_ratio
-====================
-
-For some specialised workloads on highmem machines it is dangerous for
-the kernel to allow process memory to be allocated from the "lowmem"
-zone.  This is because that memory could then be pinned via the mlock()
-system call, or by unavailability of swapspace.
-
-And on large highmem machines this lack of reclaimable lowmem memory
-can be fatal.
-
-So the Linux page allocator has a mechanism which prevents allocations
-which *could* use highmem from using too much lowmem.  This means that
-a certain amount of lowmem is defended from the possibility of being
-captured into pinned user memory.
-
-(The same argument applies to the old 16 megabyte ISA DMA region.  This
-mechanism will also defend that region from allocations which could use
-highmem or lowmem).
-
-The `lowmem_reserve_ratio` tunable determines how aggressive the kernel is
-in defending these lower zones.
-
-If you have a machine which uses highmem or ISA DMA and your
-applications are using mlock(), or if you are running with no swap then
-you probably should change the lowmem_reserve_ratio setting.
-
-The lowmem_reserve_ratio is an array. You can see them by reading this file::
-
-	% cat /proc/sys/vm/lowmem_reserve_ratio
-	256     256     32
-
-But, these values are not used directly. The kernel calculates # of protection
-pages for each zones from them. These are shown as array of protection pages
-in /proc/zoneinfo like followings. (This is an example of x86-64 box).
-Each zone has an array of protection pages like this::
-
-  Node 0, zone      DMA
-    pages free     1355
-          min      3
-          low      3
-          high     4
-	:
-	:
-      numa_other   0
-          protection: (0, 2004, 2004, 2004)
-	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    pagesets
-      cpu: 0 pcp: 0
-          :
-
-These protections are added to score to judge whether this zone should be used
-for page allocation or should be reclaimed.
-
-In this example, if normal pages (index=2) are required to this DMA zone and
-watermark[WMARK_HIGH] is used for watermark, the kernel judges this zone should
-not be used because pages_free(1355) is smaller than watermark + protection[2]
-(4 + 2004 = 2008). If this protection value is 0, this zone would be used for
-normal page requirement. If requirement is DMA zone(index=0), protection[0]
-(=0) is used.
-
-zone[i]'s protection[j] is calculated by following expression::
-
-  (i < j):
-    zone[i]->protection[j]
-    = (total sums of managed_pages from zone[i+1] to zone[j] on the node)
-      / lowmem_reserve_ratio[i];
-  (i = j):
-     (should not be protected. = 0;
-  (i > j):
-     (not necessary, but looks 0)
-
-The default values of lowmem_reserve_ratio[i] are
-
-    === ====================================
-    256 (if zone[i] means DMA or DMA32 zone)
-    32  (others)
-    === ====================================
-
-As above expression, they are reciprocal number of ratio.
-256 means 1/256. # of protection pages becomes about "0.39%" of total managed
-pages of higher zones on the node.
-
-If you would like to protect more pages, smaller values are effective.
-The minimum value is 1 (1/1 -> 100%). The value less than 1 completely
-disables protection of the pages.
-
-
-max_map_count:
-==============
-
-This file contains the maximum number of memory map areas a process
-may have. Memory map areas are used as a side-effect of calling
-malloc, directly by mmap, mprotect, and madvise, and also when loading
-shared libraries.
-
-While most applications need less than a thousand maps, certain
-programs, particularly malloc debuggers, may consume lots of them,
-e.g., up to one or two maps per allocation.
-
-The default value is 65536.
-
-
-memory_failure_early_kill:
-==========================
-
-Control how to kill processes when uncorrected memory error (typically
-a 2bit error in a memory module) is detected in the background by hardware
-that cannot be handled by the kernel. In some cases (like the page
-still having a valid copy on disk) the kernel will handle the failure
-transparently without affecting any applications. But if there is
-no other uptodate copy of the data it will kill to prevent any data
-corruptions from propagating.
-
-1: Kill all processes that have the corrupted and not reloadable page mapped
-as soon as the corruption is detected.  Note this is not supported
-for a few types of pages, like kernel internally allocated data or
-the swap cache, but works for the majority of user pages.
-
-0: Only unmap the corrupted page from all processes and only kill a process
-who tries to access it.
-
-The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
-handle this if they want to.
-
-This is only active on architectures/platforms with advanced machine
-check handling and depends on the hardware capabilities.
-
-Applications can override this setting individually with the PR_MCE_KILL prctl
-
-
-memory_failure_recovery
-=======================
-
-Enable memory failure recovery (when supported by the platform)
-
-1: Attempt recovery.
-
-0: Always panic on a memory failure.
-
-
-min_free_kbytes
-===============
-
-This is used to force the Linux VM to keep a minimum number
-of kilobytes free.  The VM uses this number to compute a
-watermark[WMARK_MIN] value for each lowmem zone in the system.
-Each lowmem zone gets a number of reserved free pages based
-proportionally on its size.
-
-Some minimal amount of memory is needed to satisfy PF_MEMALLOC
-allocations; if you set this to lower than 1024KB, your system will
-become subtly broken, and prone to deadlock under high loads.
-
-Setting this too high will OOM your machine instantly.
-
-
-min_slab_ratio
-==============
-
-This is available only on NUMA kernels.
-
-A percentage of the total pages in each zone.  On Zone reclaim
-(fallback from the local zone occurs) slabs will be reclaimed if more
-than this percentage of pages in a zone are reclaimable slab pages.
-This insures that the slab growth stays under control even in NUMA
-systems that rarely perform global reclaim.
-
-The default is 5 percent.
-
-Note that slab reclaim is triggered in a per zone / node fashion.
-The process of reclaiming slab memory is currently not node specific
-and may not be fast.
-
-
-min_unmapped_ratio
-==================
-
-This is available only on NUMA kernels.
-
-This is a percentage of the total pages in each zone. Zone reclaim will
-only occur if more than this percentage of pages are in a state that
-zone_reclaim_mode allows to be reclaimed.
-
-If zone_reclaim_mode has the value 4 OR'd, then the percentage is compared
-against all file-backed unmapped pages including swapcache pages and tmpfs
-files. Otherwise, only unmapped pages backed by normal files but not tmpfs
-files and similar are considered.
-
-The default is 1 percent.
-
-
-mmap_min_addr
-=============
-
-This file indicates the amount of address space  which a user process will
-be restricted from mmapping.  Since kernel null dereference bugs could
-accidentally operate based on the information in the first couple of pages
-of memory userspace processes should not be allowed to write to them.  By
-default this value is set to 0 and no protections will be enforced by the
-security module.  Setting this value to something like 64k will allow the
-vast majority of applications to work correctly and provide defense in depth
-against future potential kernel bugs.
-
-
-mmap_rnd_bits
-=============
-
-This value can be used to select the number of bits to use to
-determine the random offset to the base address of vma regions
-resulting from mmap allocations on architectures which support
-tuning address space randomization.  This value will be bounded
-by the architecture's minimum and maximum supported values.
-
-This value can be changed after boot using the
-/proc/sys/vm/mmap_rnd_bits tunable
-
-
-mmap_rnd_compat_bits
-====================
-
-This value can be used to select the number of bits to use to
-determine the random offset to the base address of vma regions
-resulting from mmap allocations for applications run in
-compatibility mode on architectures which support tuning address
-space randomization.  This value will be bounded by the
-architecture's minimum and maximum supported values.
-
-This value can be changed after boot using the
-/proc/sys/vm/mmap_rnd_compat_bits tunable
-
-
-nr_hugepages
-============
-
-Change the minimum size of the hugepage pool.
-
-See Documentation/admin-guide/mm/hugetlbpage.rst
-
-
-nr_hugepages_mempolicy
-======================
-
-Change the size of the hugepage pool at run-time on a specific
-set of NUMA nodes.
-
-See Documentation/admin-guide/mm/hugetlbpage.rst
-
-
-nr_overcommit_hugepages
-=======================
-
-Change the maximum size of the hugepage pool. The maximum is
-nr_hugepages + nr_overcommit_hugepages.
-
-See Documentation/admin-guide/mm/hugetlbpage.rst
-
-
-nr_trim_pages
-=============
-
-This is available only on NOMMU kernels.
-
-This value adjusts the excess page trimming behaviour of power-of-2 aligned
-NOMMU mmap allocations.
-
-A value of 0 disables trimming of allocations entirely, while a value of 1
-trims excess pages aggressively. Any value >= 1 acts as the watermark where
-trimming of allocations is initiated.
-
-The default value is 1.
-
-See Documentation/nommu-mmap.txt for more information.
-
-
-numa_zonelist_order
-===================
-
-This sysctl is only for NUMA and it is deprecated. Anything but
-Node order will fail!
-
-'where the memory is allocated from' is controlled by zonelists.
-
-(This documentation ignores ZONE_HIGHMEM/ZONE_DMA32 for simple explanation.
-you may be able to read ZONE_DMA as ZONE_DMA32...)
-
-In non-NUMA case, a zonelist for GFP_KERNEL is ordered as following.
-ZONE_NORMAL -> ZONE_DMA
-This means that a memory allocation request for GFP_KERNEL will
-get memory from ZONE_DMA only when ZONE_NORMAL is not available.
-
-In NUMA case, you can think of following 2 types of order.
-Assume 2 node NUMA and below is zonelist of Node(0)'s GFP_KERNEL::
-
-  (A) Node(0) ZONE_NORMAL -> Node(0) ZONE_DMA -> Node(1) ZONE_NORMAL
-  (B) Node(0) ZONE_NORMAL -> Node(1) ZONE_NORMAL -> Node(0) ZONE_DMA.
-
-Type(A) offers the best locality for processes on Node(0), but ZONE_DMA
-will be used before ZONE_NORMAL exhaustion. This increases possibility of
-out-of-memory(OOM) of ZONE_DMA because ZONE_DMA is tend to be small.
-
-Type(B) cannot offer the best locality but is more robust against OOM of
-the DMA zone.
-
-Type(A) is called as "Node" order. Type (B) is "Zone" order.
-
-"Node order" orders the zonelists by node, then by zone within each node.
-Specify "[Nn]ode" for node order
-
-"Zone Order" orders the zonelists by zone type, then by node within each
-zone.  Specify "[Zz]one" for zone order.
-
-Specify "[Dd]efault" to request automatic configuration.
-
-On 32-bit, the Normal zone needs to be preserved for allocations accessible
-by the kernel, so "zone" order will be selected.
-
-On 64-bit, devices that require DMA32/DMA are relatively rare, so "node"
-order will be selected.
-
-Default order is recommended unless this is causing problems for your
-system/application.
-
-
-oom_dump_tasks
-==============
-
-Enables a system-wide task dump (excluding kernel threads) to be produced
-when the kernel performs an OOM-killing and includes such information as
-pid, uid, tgid, vm size, rss, pgtables_bytes, swapents, oom_score_adj
-score, and name.  This is helpful to determine why the OOM killer was
-invoked, to identify the rogue task that caused it, and to determine why
-the OOM killer chose the task it did to kill.
-
-If this is set to zero, this information is suppressed.  On very
-large systems with thousands of tasks it may not be feasible to dump
-the memory state information for each one.  Such systems should not
-be forced to incur a performance penalty in OOM conditions when the
-information may not be desired.
-
-If this is set to non-zero, this information is shown whenever the
-OOM killer actually kills a memory-hogging task.
-
-The default value is 1 (enabled).
-
-
-oom_kill_allocating_task
-========================
-
-This enables or disables killing the OOM-triggering task in
-out-of-memory situations.
-
-If this is set to zero, the OOM killer will scan through the entire
-tasklist and select a task based on heuristics to kill.  This normally
-selects a rogue memory-hogging task that frees up a large amount of
-memory when killed.
-
-If this is set to non-zero, the OOM killer simply kills the task that
-triggered the out-of-memory condition.  This avoids the expensive
-tasklist scan.
-
-If panic_on_oom is selected, it takes precedence over whatever value
-is used in oom_kill_allocating_task.
-
-The default value is 0.
-
-
-overcommit_kbytes
-=================
-
-When overcommit_memory is set to 2, the committed address space is not
-permitted to exceed swap plus this amount of physical RAM. See below.
-
-Note: overcommit_kbytes is the counterpart of overcommit_ratio. Only one
-of them may be specified at a time. Setting one disables the other (which
-then appears as 0 when read).
-
-
-overcommit_memory
-=================
-
-This value contains a flag that enables memory overcommitment.
-
-When this flag is 0, the kernel attempts to estimate the amount
-of free memory left when userspace requests more memory.
-
-When this flag is 1, the kernel pretends there is always enough
-memory until it actually runs out.
-
-When this flag is 2, the kernel uses a "never overcommit"
-policy that attempts to prevent any overcommit of memory.
-Note that user_reserve_kbytes affects this policy.
-
-This feature can be very useful because there are a lot of
-programs that malloc() huge amounts of memory "just-in-case"
-and don't use much of it.
-
-The default value is 0.
-
-See Documentation/vm/overcommit-accounting.rst and
-mm/util.c::__vm_enough_memory() for more information.
-
-
-overcommit_ratio
-================
-
-When overcommit_memory is set to 2, the committed address
-space is not permitted to exceed swap plus this percentage
-of physical RAM.  See above.
-
-
-page-cluster
-============
-
-page-cluster controls the number of pages up to which consecutive pages
-are read in from swap in a single attempt. This is the swap counterpart
-to page cache readahead.
-The mentioned consecutivity is not in terms of virtual/physical addresses,
-but consecutive on swap space - that means they were swapped out together.
-
-It is a logarithmic value - setting it to zero means "1 page", setting
-it to 1 means "2 pages", setting it to 2 means "4 pages", etc.
-Zero disables swap readahead completely.
-
-The default value is three (eight pages at a time).  There may be some
-small benefits in tuning this to a different value if your workload is
-swap-intensive.
-
-Lower values mean lower latencies for initial faults, but at the same time
-extra faults and I/O delays for following faults if they would have been part of
-that consecutive pages readahead would have brought in.
-
-
-panic_on_oom
-============
-
-This enables or disables panic on out-of-memory feature.
-
-If this is set to 0, the kernel will kill some rogue process,
-called oom_killer.  Usually, oom_killer can kill rogue processes and
-system will survive.
-
-If this is set to 1, the kernel panics when out-of-memory happens.
-However, if a process limits using nodes by mempolicy/cpusets,
-and those nodes become memory exhaustion status, one process
-may be killed by oom-killer. No panic occurs in this case.
-Because other nodes' memory may be free. This means system total status
-may be not fatal yet.
-
-If this is set to 2, the kernel panics compulsorily even on the
-above-mentioned. Even oom happens under memory cgroup, the whole
-system panics.
-
-The default value is 0.
-
-1 and 2 are for failover of clustering. Please select either
-according to your policy of failover.
-
-panic_on_oom=2+kdump gives you very strong tool to investigate
-why oom happens. You can get snapshot.
-
-
-percpu_pagelist_fraction
-========================
-
-This is the fraction of pages at most (high mark pcp->high) in each zone that
-are allocated for each per cpu page list.  The min value for this is 8.  It
-means that we don't allow more than 1/8th of pages in each zone to be
-allocated in any single per_cpu_pagelist.  This entry only changes the value
-of hot per cpu pagelists.  User can specify a number like 100 to allocate
-1/100th of each zone to each per cpu page list.
-
-The batch value of each per cpu pagelist is also updated as a result.  It is
-set to pcp->high/4.  The upper limit of batch is (PAGE_SHIFT * 8)
-
-The initial value is zero.  Kernel does not use this value at boot time to set
-the high water marks for each per cpu page list.  If the user writes '0' to this
-sysctl, it will revert to this default behavior.
-
-
-stat_interval
-=============
-
-The time interval between which vm statistics are updated.  The default
-is 1 second.
-
-
-stat_refresh
-============
-
-Any read or write (by root only) flushes all the per-cpu vm statistics
-into their global totals, for more accurate reports when testing
-e.g. cat /proc/sys/vm/stat_refresh /proc/meminfo
-
-As a side-effect, it also checks for negative totals (elsewhere reported
-as 0) and "fails" with EINVAL if any are found, with a warning in dmesg.
-(At time of writing, a few stats are known sometimes to be found negative,
-with no ill effects: errors and warnings on these stats are suppressed.)
-
-
-numa_stat
-=========
-
-This interface allows runtime configuration of numa statistics.
-
-When page allocation performance becomes a bottleneck and you can tolerate
-some possible tool breakage and decreased numa counter precision, you can
-do::
-
-	echo 0 > /proc/sys/vm/numa_stat
-
-When page allocation performance is not a bottleneck and you want all
-tooling to work, you can do::
-
-	echo 1 > /proc/sys/vm/numa_stat
-
-
-swappiness
-==========
-
-This control is used to define how aggressive the kernel will swap
-memory pages.  Higher values will increase aggressiveness, lower values
-decrease the amount of swap.  A value of 0 instructs the kernel not to
-initiate swap until the amount of free and file-backed pages is less
-than the high water mark in a zone.
-
-The default value is 60.
-
-
-unprivileged_userfaultfd
-========================
-
-This flag controls whether unprivileged users can use the userfaultfd
-system calls.  Set this to 1 to allow unprivileged users to use the
-userfaultfd system calls, or set this to 0 to restrict userfaultfd to only
-privileged users (with SYS_CAP_PTRACE capability).
-
-The default value is 1.
-
-
-user_reserve_kbytes
-===================
-
-When overcommit_memory is set to 2, "never overcommit" mode, reserve
-min(3% of current process size, user_reserve_kbytes) of free memory.
-This is intended to prevent a user from starting a single memory hogging
-process, such that they cannot recover (kill the hog).
-
-user_reserve_kbytes defaults to min(3% of the current process size, 128MB).
-
-If this is reduced to zero, then the user will be allowed to allocate
-all free memory with a single process, minus admin_reserve_kbytes.
-Any subsequent attempts to execute a command will result in
-"fork: Cannot allocate memory".
-
-Changing this takes effect whenever an application requests memory.
-
-
-vfs_cache_pressure
-==================
-
-This percentage value controls the tendency of the kernel to reclaim
-the memory which is used for caching of directory and inode objects.
-
-At the default value of vfs_cache_pressure=100 the kernel will attempt to
-reclaim dentries and inodes at a "fair" rate with respect to pagecache and
-swapcache reclaim.  Decreasing vfs_cache_pressure causes the kernel to prefer
-to retain dentry and inode caches. When vfs_cache_pressure=0, the kernel will
-never reclaim dentries and inodes due to memory pressure and this can easily
-lead to out-of-memory conditions. Increasing vfs_cache_pressure beyond 100
-causes the kernel to prefer to reclaim dentries and inodes.
-
-Increasing vfs_cache_pressure significantly beyond 100 may have negative
-performance impact. Reclaim code needs to take various locks to find freeable
-directory and inode objects. With vfs_cache_pressure=1000, it will look for
-ten times more freeable objects than there are.
-
-
-watermark_boost_factor
-======================
-
-This factor controls the level of reclaim when memory is being fragmented.
-It defines the percentage of the high watermark of a zone that will be
-reclaimed if pages of different mobility are being mixed within pageblocks.
-The intent is that compaction has less work to do in the future and to
-increase the success rate of future high-order allocations such as SLUB
-allocations, THP and hugetlbfs pages.
-
-To make it sensible with respect to the watermark_scale_factor
-parameter, the unit is in fractions of 10,000. The default value of
-15,000 on !DISCONTIGMEM configurations means that up to 150% of the high
-watermark will be reclaimed in the event of a pageblock being mixed due
-to fragmentation. The level of reclaim is determined by the number of
-fragmentation events that occurred in the recent past. If this value is
-smaller than a pageblock then a pageblocks worth of pages will be reclaimed
-(e.g.  2MB on 64-bit x86). A boost factor of 0 will disable the feature.
-
-
-watermark_scale_factor
-======================
-
-This factor controls the aggressiveness of kswapd. It defines the
-amount of memory left in a node/system before kswapd is woken up and
-how much memory needs to be free before kswapd goes back to sleep.
-
-The unit is in fractions of 10,000. The default value of 10 means the
-distances between watermarks are 0.1% of the available memory in the
-node/system. The maximum value is 1000, or 10% of memory.
-
-A high rate of threads entering direct reclaim (allocstall) or kswapd
-going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate
-that the number of free pages kswapd maintains for latency reasons is
-too small for the allocation bursts occurring in the system. This knob
-can then be used to tune kswapd aggressiveness accordingly.
-
-
-zone_reclaim_mode
-=================
-
-Zone_reclaim_mode allows someone to set more or less aggressive approaches to
-reclaim memory when a zone runs out of memory. If it is set to zero then no
-zone reclaim occurs. Allocations will be satisfied from other zones / nodes
-in the system.
-
-This is value OR'ed together of
-
-=	===================================
-1	Zone reclaim on
-2	Zone reclaim writes dirty pages out
-4	Zone reclaim swaps pages
-=	===================================
-
-zone_reclaim_mode is disabled by default.  For file servers or workloads
-that benefit from having their data cached, zone_reclaim_mode should be
-left disabled as the caching effect is likely to be more important than
-data locality.
-
-zone_reclaim may be enabled if it's known that the workload is partitioned
-such that each partition fits within a NUMA node and that accessing remote
-memory would cause a measurable performance reduction.  The page allocator
-will then reclaim easily reusable pages (those page cache pages that are
-currently not used) before allocating off node pages.
-
-Allowing zone reclaim to write out pages stops processes that are
-writing large amounts of data from dirtying pages on other nodes. Zone
-reclaim will write out dirty pages if a zone fills up and so effectively
-throttle the process. This may decrease the performance of a single process
-since it cannot use all of system memory to buffer the outgoing writes
-anymore but it preserve the memory on other nodes so that the performance
-of other processes running on other nodes will not be affected.
-
-Allowing regular swap effectively restricts allocations to the local
-node unless explicitly overridden by memory policies or cpuset
-configurations.
diff --git a/Documentation/vm/unevictable-lru.rst b/Documentation/vm/unevictable-lru.rst
index 8ba656f37cd8..109052215bce 100644
--- a/Documentation/vm/unevictable-lru.rst
+++ b/Documentation/vm/unevictable-lru.rst
@@ -439,7 +439,7 @@ Compacting MLOCKED Pages
 
 The unevictable LRU can be scanned for compactable regions and the default
 behavior is to do so.  /proc/sys/vm/compact_unevictable_allowed controls
-this behavior (see Documentation/sysctl/vm.rst).  Once scanning of the
+this behavior (see Documentation/admin-guide/sysctl/vm.rst).  Once scanning of the
 unevictable LRU is enabled, the work of compaction is mostly handled by
 the page migration code and the same work flow as described in MIGRATING
 MLOCKED PAGES will apply.
-- 
cgit 


From 9e1cbede267916e737c4a755059418da3ac4de95 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 13 Jun 2019 15:07:43 -0300
Subject: docs: admin-guide: add laptops documentation

The docs under Documentation/laptops contain users specific
information.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com>
---
 Documentation/ABI/testing/sysfs-block-device       |    2 +-
 .../ABI/testing/sysfs-platform-asus-laptop         |    2 +-
 Documentation/admin-guide/index.rst                |    1 +
 Documentation/admin-guide/kernel-parameters.txt    |    2 +-
 Documentation/admin-guide/laptops/asus-laptop.rst  |  271 ++++
 .../admin-guide/laptops/disk-shock-protection.rst  |  151 ++
 Documentation/admin-guide/laptops/index.rst        |   16 +
 Documentation/admin-guide/laptops/laptop-mode.rst  |  781 ++++++++++
 Documentation/admin-guide/laptops/lg-laptop.rst    |   84 ++
 Documentation/admin-guide/laptops/sony-laptop.rst  |  174 +++
 Documentation/admin-guide/laptops/sonypi.rst       |  160 ++
 .../admin-guide/laptops/thinkpad-acpi.rst          | 1562 ++++++++++++++++++++
 Documentation/admin-guide/laptops/toshiba_haps.rst |   87 ++
 Documentation/admin-guide/sysctl/vm.rst            |    4 +-
 Documentation/laptops/asus-laptop.rst              |  271 ----
 Documentation/laptops/disk-shock-protection.rst    |  151 --
 Documentation/laptops/index.rst                    |   17 -
 Documentation/laptops/laptop-mode.rst              |  781 ----------
 Documentation/laptops/lg-laptop.rst                |   85 --
 Documentation/laptops/sony-laptop.rst              |  174 ---
 Documentation/laptops/sonypi.rst                   |  160 --
 Documentation/laptops/thinkpad-acpi.rst            | 1562 --------------------
 Documentation/laptops/toshiba_haps.rst             |   87 --
 23 files changed, 3292 insertions(+), 3293 deletions(-)
 create mode 100644 Documentation/admin-guide/laptops/asus-laptop.rst
 create mode 100644 Documentation/admin-guide/laptops/disk-shock-protection.rst
 create mode 100644 Documentation/admin-guide/laptops/index.rst
 create mode 100644 Documentation/admin-guide/laptops/laptop-mode.rst
 create mode 100644 Documentation/admin-guide/laptops/lg-laptop.rst
 create mode 100644 Documentation/admin-guide/laptops/sony-laptop.rst
 create mode 100644 Documentation/admin-guide/laptops/sonypi.rst
 create mode 100644 Documentation/admin-guide/laptops/thinkpad-acpi.rst
 create mode 100644 Documentation/admin-guide/laptops/toshiba_haps.rst
 delete mode 100644 Documentation/laptops/asus-laptop.rst
 delete mode 100644 Documentation/laptops/disk-shock-protection.rst
 delete mode 100644 Documentation/laptops/index.rst
 delete mode 100644 Documentation/laptops/laptop-mode.rst
 delete mode 100644 Documentation/laptops/lg-laptop.rst
 delete mode 100644 Documentation/laptops/sony-laptop.rst
 delete mode 100644 Documentation/laptops/sonypi.rst
 delete mode 100644 Documentation/laptops/thinkpad-acpi.rst
 delete mode 100644 Documentation/laptops/toshiba_haps.rst

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-block-device b/Documentation/ABI/testing/sysfs-block-device
index 0d57bbb4fddc..17f2bc7dd261 100644
--- a/Documentation/ABI/testing/sysfs-block-device
+++ b/Documentation/ABI/testing/sysfs-block-device
@@ -45,7 +45,7 @@ Description:
 		- Values below -2 are rejected with -EINVAL
 
 		For more information, see
-		Documentation/laptops/disk-shock-protection.rst
+		Documentation/admin-guide/laptops/disk-shock-protection.rst
 
 
 What:		/sys/block/*/device/ncq_prio_enable
diff --git a/Documentation/ABI/testing/sysfs-platform-asus-laptop b/Documentation/ABI/testing/sysfs-platform-asus-laptop
index d67fa4bafa70..8b0e8205a6a2 100644
--- a/Documentation/ABI/testing/sysfs-platform-asus-laptop
+++ b/Documentation/ABI/testing/sysfs-platform-asus-laptop
@@ -31,7 +31,7 @@ Description:
 		To control the LED display, use the following :
 		    echo 0x0T000DDD > /sys/devices/platform/asus_laptop/
 		where T control the 3 letters display, and DDD the 3 digits display.
-		The DDD table can be found in Documentation/laptops/asus-laptop.rst
+		The DDD table can be found in Documentation/admin-guide/laptops/asus-laptop.rst
 
 What:		/sys/devices/platform/asus_laptop/bluetooth
 Date:		January 2007
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 5c6ae1ccee1a..6fcc83aaa9b6 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -82,6 +82,7 @@ configure specific aspects of kernel behavior to your liking.
    perf-security
    acpi/index
    device-mapper/index
+   laptops/index
 
 .. only::  subproject and html
 
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b323f5d4366a..4821175a3769 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4347,7 +4347,7 @@
 			Format: <integer>
 
 	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
-			See Documentation/laptops/sonypi.rst
+			See Documentation/admin-guide/laptops/sonypi.rst
 
 	spectre_v2=	[X86] Control mitigation of Spectre variant 2
 			(indirect branch speculation) vulnerability.
diff --git a/Documentation/admin-guide/laptops/asus-laptop.rst b/Documentation/admin-guide/laptops/asus-laptop.rst
new file mode 100644
index 000000000000..95176321a25a
--- /dev/null
+++ b/Documentation/admin-guide/laptops/asus-laptop.rst
@@ -0,0 +1,271 @@
+==================
+Asus Laptop Extras
+==================
+
+Version 0.1
+
+August 6, 2009
+
+Corentin Chary <corentincj@iksaif.net>
+http://acpi4asus.sf.net/
+
+ This driver provides support for extra features of ACPI-compatible ASUS laptops.
+ It may also support some MEDION, JVC or VICTOR laptops (such as MEDION 9675 or
+ VICTOR XP7210 for example). It makes all the extra buttons generate input
+ events (like keyboards).
+
+ On some models adds support for changing the display brightness and output,
+ switching the LCD backlight on and off, and most importantly, allows you to
+ blink those fancy LEDs intended for reporting mail and wireless status.
+
+This driver supersedes the old asus_acpi driver.
+
+Requirements
+------------
+
+  Kernel 2.6.X sources, configured for your computer, with ACPI support.
+  You also need CONFIG_INPUT and CONFIG_ACPI.
+
+Status
+------
+
+ The features currently supported are the following (see below for
+ detailed description):
+
+ - Fn key combinations
+ - Bluetooth enable and disable
+ - Wlan enable and disable
+ - GPS enable and disable
+ - Video output switching
+ - Ambient Light Sensor on and off
+ - LED control
+ - LED Display control
+ - LCD brightness control
+ - LCD on and off
+
+ A compatibility table by model and feature is maintained on the web
+ site, http://acpi4asus.sf.net/.
+
+Usage
+-----
+
+  Try "modprobe asus-laptop". Check your dmesg (simply type dmesg). You should
+  see some lines like this :
+
+      Asus Laptop Extras version 0.42
+        - L2D model detected.
+
+  If it is not the output you have on your laptop, send it (and the laptop's
+  DSDT) to me.
+
+  That's all, now, all the events generated by the hotkeys of your laptop
+  should be reported via netlink events. You can check with
+  "acpi_genl monitor" (part of the acpica project).
+
+  Hotkeys are also reported as input keys (like keyboards) you can check
+  which key are supported using "xev" under X11.
+
+  You can get information on the version of your DSDT table by reading the
+  /sys/devices/platform/asus-laptop/infos entry. If you have a question or a
+  bug report to do, please include the output of this entry.
+
+LEDs
+----
+
+  You can modify LEDs be echoing values to `/sys/class/leds/asus/*/brightness`::
+
+    echo 1 >  /sys/class/leds/asus::mail/brightness
+
+  will switch the mail LED on.
+
+  You can also know if they are on/off by reading their content and use
+  kernel triggers like disk-activity or heartbeat.
+
+Backlight
+---------
+
+  You can control lcd backlight power and brightness with
+  /sys/class/backlight/asus-laptop/. Brightness Values are between 0 and 15.
+
+Wireless devices
+----------------
+
+  You can turn the internal Bluetooth adapter on/off with the bluetooth entry
+  (only on models with Bluetooth). This usually controls the associated LED.
+  Same for Wlan adapter.
+
+Display switching
+-----------------
+
+  Note: the display switching code is currently considered EXPERIMENTAL.
+
+  Switching works for the following models:
+
+    - L3800C
+    - A2500H
+    - L5800C
+    - M5200N
+    - W1000N (albeit with some glitches)
+    - M6700R
+    - A6JC
+    - F3J
+
+  Switching doesn't work for the following:
+
+    - M3700N
+    - L2X00D (locks the laptop under certain conditions)
+
+  To switch the displays, echo values from 0 to 15 to
+  /sys/devices/platform/asus-laptop/display. The significance of those values
+  is as follows:
+
+  +-------+-----+-----+-----+-----+-----+
+  | Bin   | Val | DVI | TV  | CRT | LCD |
+  +-------+-----+-----+-----+-----+-----+
+  | 0000  |   0 |     |     |     |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 0001  |   1 |     |     |     |  X  |
+  +-------+-----+-----+-----+-----+-----+
+  | 0010  |   2 |     |     |  X  |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 0011  |   3 |     |     |  X  |  X  |
+  +-------+-----+-----+-----+-----+-----+
+  | 0100  |   4 |     |  X  |     |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 0101  |   5 |     |  X  |     | X   |
+  +-------+-----+-----+-----+-----+-----+
+  | 0110  |   6 |     |  X  |  X  |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 0111  |   7 |     |  X  |  X  |  X  |
+  +-------+-----+-----+-----+-----+-----+
+  | 1000  |   8 |  X  |     |     |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 1001  |   9 |  X  |     |     |  X  |
+  +-------+-----+-----+-----+-----+-----+
+  | 1010  |  10 |  X  |     |  X  |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 1011  |  11 |  X  |     |  X  |  X  |
+  +-------+-----+-----+-----+-----+-----+
+  | 1100  |  12 |  X  |  X  |     |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 1101  |  13 |  X  |  X  |     |  X  |
+  +-------+-----+-----+-----+-----+-----+
+  | 1110  |  14 |  X  |  X  |  X  |     |
+  +-------+-----+-----+-----+-----+-----+
+  | 1111  |  15 |  X  |  X  |  X  |  X  |
+  +-------+-----+-----+-----+-----+-----+
+
+  In most cases, the appropriate displays must be plugged in for the above
+  combinations to work. TV-Out may need to be initialized at boot time.
+
+  Debugging:
+
+  1) Check whether the Fn+F8 key:
+
+     a) does not lock the laptop (try a boot with noapic / nolapic if it does)
+     b) generates events (0x6n, where n is the value corresponding to the
+        configuration above)
+     c) actually works
+
+     Record the disp value at every configuration.
+  2) Echo values from 0 to 15 to /sys/devices/platform/asus-laptop/display.
+     Record its value, note any change. If nothing changes, try a broader range,
+     up to 65535.
+  3) Send ANY output (both positive and negative reports are needed, unless your
+     machine is already listed above) to the acpi4asus-user mailing list.
+
+  Note: on some machines (e.g. L3C), after the module has been loaded, only 0x6n
+  events are generated and no actual switching occurs. In such a case, a line
+  like::
+
+    echo $((10#$arg-60)) > /sys/devices/platform/asus-laptop/display
+
+  will usually do the trick ($arg is the 0000006n-like event passed to acpid).
+
+  Note: there is currently no reliable way to read display status on xxN
+  (Centrino) models.
+
+LED display
+-----------
+
+  Some models like the W1N have a LED display that can be used to display
+  several items of information.
+
+  LED display works for the following models:
+
+    - W1000N
+    - W1J
+
+  To control the LED display, use the following::
+
+    echo 0x0T000DDD > /sys/devices/platform/asus-laptop/
+
+  where T control the 3 letters display, and DDD the 3 digits display,
+  according to the tables below::
+
+         DDD (digits)
+         000 to 999 = display digits
+         AAA        = ---
+         BBB to FFF = turn-off
+
+         T  (type)
+         0 = off
+         1 = dvd
+         2 = vcd
+         3 = mp3
+         4 = cd
+         5 = tv
+         6 = cpu
+         7 = vol
+
+  For example "echo 0x01000001 >/sys/devices/platform/asus-laptop/ledd"
+  would display "DVD001".
+
+Driver options
+--------------
+
+ Options can be passed to the asus-laptop driver using the standard
+ module argument syntax (<param>=<value> when passing the option to the
+ module or asus-laptop.<param>=<value> on the kernel boot line when
+ asus-laptop is statically linked into the kernel).
+
+	     wapf: WAPF defines the behavior of the Fn+Fx wlan key
+		   The significance of values is yet to be found, but
+		   most of the time:
+
+		   - 0x0 should do nothing
+		   - 0x1 should allow to control the device with Fn+Fx key.
+		   - 0x4 should send an ACPI event (0x88) while pressing the Fn+Fx key
+		   - 0x5 like 0x1 or 0x4
+
+ The default value is 0x1.
+
+Unsupported models
+------------------
+
+ These models will never be supported by this module, as they use a completely
+ different mechanism to handle LEDs and extra stuff (meaning we have no clue
+ how it works):
+
+ - ASUS A1300 (A1B), A1370D
+ - ASUS L7300G
+ - ASUS L8400
+
+Patches, Errors, Questions
+--------------------------
+
+ I appreciate any success or failure
+ reports, especially if they add to or correct the compatibility table.
+ Please include the following information in your report:
+
+ - Asus model name
+ - a copy of your ACPI tables, using the "acpidump" utility
+ - a copy of /sys/devices/platform/asus-laptop/infos
+ - which driver features work and which don't
+ - the observed behavior of non-working features
+
+ Any other comments or patches are also more than welcome.
+
+ acpi4asus-user@lists.sourceforge.net
+
+ http://sourceforge.net/projects/acpi4asus
diff --git a/Documentation/admin-guide/laptops/disk-shock-protection.rst b/Documentation/admin-guide/laptops/disk-shock-protection.rst
new file mode 100644
index 000000000000..e97c5f78d8c3
--- /dev/null
+++ b/Documentation/admin-guide/laptops/disk-shock-protection.rst
@@ -0,0 +1,151 @@
+==========================
+Hard disk shock protection
+==========================
+
+Author: Elias Oltmanns <eo@nebensachen.de>
+
+Last modified: 2008-10-03
+
+
+.. 0. Contents
+
+   1. Intro
+   2. The interface
+   3. References
+   4. CREDITS
+
+
+1. Intro
+--------
+
+ATA/ATAPI-7 specifies the IDLE IMMEDIATE command with unload feature.
+Issuing this command should cause the drive to switch to idle mode and
+unload disk heads. This feature is being used in modern laptops in
+conjunction with accelerometers and appropriate software to implement
+a shock protection facility. The idea is to stop all I/O operations on
+the internal hard drive and park its heads on the ramp when critical
+situations are anticipated. The desire to have such a feature
+available on GNU/Linux systems has been the original motivation to
+implement a generic disk head parking interface in the Linux kernel.
+Please note, however, that other components have to be set up on your
+system in order to get disk shock protection working (see
+section 3. References below for pointers to more information about
+that).
+
+
+2. The interface
+----------------
+
+For each ATA device, the kernel exports the file
+`block/*/device/unload_heads` in sysfs (here assumed to be mounted under
+/sys). Access to `/sys/block/*/device/unload_heads` is denied with
+-EOPNOTSUPP if the device does not support the unload feature.
+Otherwise, writing an integer value to this file will take the heads
+of the respective drive off the platter and block all I/O operations
+for the specified number of milliseconds. When the timeout expires and
+no further disk head park request has been issued in the meantime,
+normal operation will be resumed. The maximal value accepted for a
+timeout is 30000 milliseconds. Exceeding this limit will return
+-EOVERFLOW, but heads will be parked anyway and the timeout will be
+set to 30 seconds. However, you can always change a timeout to any
+value between 0 and 30000 by issuing a subsequent head park request
+before the timeout of the previous one has expired. In particular, the
+total timeout can exceed 30 seconds and, more importantly, you can
+cancel a previously set timeout and resume normal operation
+immediately by specifying a timeout of 0. Values below -2 are rejected
+with -EINVAL (see below for the special meaning of -1 and -2). If the
+timeout specified for a recent head park request has not yet expired,
+reading from `/sys/block/*/device/unload_heads` will report the number
+of milliseconds remaining until normal operation will be resumed;
+otherwise, reading the unload_heads attribute will return 0.
+
+For example, do the following in order to park the heads of drive
+/dev/sda and stop all I/O operations for five seconds::
+
+	# echo 5000 > /sys/block/sda/device/unload_heads
+
+A simple::
+
+	# cat /sys/block/sda/device/unload_heads
+
+will show you how many milliseconds are left before normal operation
+will be resumed.
+
+A word of caution: The fact that the interface operates on a basis of
+milliseconds may raise expectations that cannot be satisfied in
+reality. In fact, the ATA specs clearly state that the time for an
+unload operation to complete is vendor specific. The hint in ATA-7
+that this will typically be within 500 milliseconds apparently has
+been dropped in ATA-8.
+
+There is a technical detail of this implementation that may cause some
+confusion and should be discussed here. When a head park request has
+been issued to a device successfully, all I/O operations on the
+controller port this device is attached to will be deferred. That is
+to say, any other device that may be connected to the same port will
+be affected too. The only exception is that a subsequent head unload
+request to that other device will be executed immediately. Further
+operations on that port will be deferred until the timeout specified
+for either device on the port has expired. As far as PATA (old style
+IDE) configurations are concerned, there can only be two devices
+attached to any single port. In SATA world we have port multipliers
+which means that a user-issued head parking request to one device may
+actually result in stopping I/O to a whole bunch of devices. However,
+since this feature is supposed to be used on laptops and does not seem
+to be very useful in any other environment, there will be mostly one
+device per port. Even if the CD/DVD writer happens to be connected to
+the same port as the hard drive, it generally *should* recover just
+fine from the occasional buffer under-run incurred by a head park
+request to the HD. Actually, when you are using an ide driver rather
+than its libata counterpart (i.e. your disk is called /dev/hda
+instead of /dev/sda), then parking the heads of one drive (drive X)
+will generally not affect the mode of operation of another drive
+(drive Y) on the same port as described above. It is only when a port
+reset is required to recover from an exception on drive Y that further
+I/O operations on that drive (and the reset itself) will be delayed
+until drive X is no longer in the parked state.
+
+Finally, there are some hard drives that only comply with an earlier
+version of the ATA standard than ATA-7, but do support the unload
+feature nonetheless. Unfortunately, there is no safe way Linux can
+detect these devices, so you won't be able to write to the
+unload_heads attribute. If you know that your device really does
+support the unload feature (for instance, because the vendor of your
+laptop or the hard drive itself told you so), then you can tell the
+kernel to enable the usage of this feature for that drive by writing
+the special value -1 to the unload_heads attribute::
+
+	# echo -1 > /sys/block/sda/device/unload_heads
+
+will enable the feature for /dev/sda, and giving -2 instead of -1 will
+disable it again.
+
+
+3. References
+-------------
+
+There are several laptops from different vendors featuring shock
+protection capabilities. As manufacturers have refused to support open
+source development of the required software components so far, Linux
+support for shock protection varies considerably between different
+hardware implementations. Ideally, this section should contain a list
+of pointers at different projects aiming at an implementation of shock
+protection on different systems. Unfortunately, I only know of a
+single project which, although still considered experimental, is fit
+for use. Please feel free to add projects that have been the victims
+of my ignorance.
+
+- http://www.thinkwiki.org/wiki/HDAPS
+
+  See this page for information about Linux support of the hard disk
+  active protection system as implemented in IBM/Lenovo Thinkpads.
+
+
+4. CREDITS
+----------
+
+This implementation of disk head parking has been inspired by a patch
+originally published by Jon Escombe <lists@dresco.co.uk>. My efforts
+to develop an implementation of this feature that is fit to be merged
+into mainline have been aided by various kernel developers, in
+particular by Tejun Heo and Bartlomiej Zolnierkiewicz.
diff --git a/Documentation/admin-guide/laptops/index.rst b/Documentation/admin-guide/laptops/index.rst
new file mode 100644
index 000000000000..6b554e39863b
--- /dev/null
+++ b/Documentation/admin-guide/laptops/index.rst
@@ -0,0 +1,16 @@
+
+==============
+Laptop Drivers
+==============
+
+.. toctree::
+   :maxdepth: 1
+
+   asus-laptop
+   disk-shock-protection
+   laptop-mode
+   lg-laptop
+   sony-laptop
+   sonypi
+   thinkpad-acpi
+   toshiba_haps
diff --git a/Documentation/admin-guide/laptops/laptop-mode.rst b/Documentation/admin-guide/laptops/laptop-mode.rst
new file mode 100644
index 000000000000..c984c4262f2e
--- /dev/null
+++ b/Documentation/admin-guide/laptops/laptop-mode.rst
@@ -0,0 +1,781 @@
+===============================================
+How to conserve battery power using laptop-mode
+===============================================
+
+Document Author: Bart Samwel (bart@samwel.tk)
+
+Date created: January 2, 2004
+
+Last modified: December 06, 2004
+
+Introduction
+------------
+
+Laptop mode is used to minimize the time that the hard disk needs to be spun up,
+to conserve battery power on laptops. It has been reported to cause significant
+power savings.
+
+.. Contents
+
+   * Introduction
+   * Installation
+   * Caveats
+   * The Details
+   * Tips & Tricks
+   * Control script
+   * ACPI integration
+   * Monitoring tool
+
+
+Installation
+------------
+
+To use laptop mode, you don't need to set any kernel configuration options
+or anything. Simply install all the files included in this document, and
+laptop mode will automatically be started when you're on battery. For
+your convenience, a tarball containing an installer can be downloaded at:
+
+	http://www.samwel.tk/laptop_mode/laptop_mode/
+
+To configure laptop mode, you need to edit the configuration file, which is
+located in /etc/default/laptop-mode on Debian-based systems, or in
+/etc/sysconfig/laptop-mode on other systems.
+
+Unfortunately, automatic enabling of laptop mode does not work for
+laptops that don't have ACPI. On those laptops, you need to start laptop
+mode manually. To start laptop mode, run "laptop_mode start", and to
+stop it, run "laptop_mode stop". (Note: The laptop mode tools package now
+has experimental support for APM, you might want to try that first.)
+
+
+Caveats
+-------
+
+* The downside of laptop mode is that you have a chance of losing up to 10
+  minutes of work. If you cannot afford this, don't use it! The supplied ACPI
+  scripts automatically turn off laptop mode when the battery almost runs out,
+  so that you won't lose any data at the end of your battery life.
+
+* Most desktop hard drives have a very limited lifetime measured in spindown
+  cycles, typically about 50.000 times (it's usually listed on the spec sheet).
+  Check your drive's rating, and don't wear down your drive's lifetime if you
+  don't need to.
+
+* If you mount some of your ext3/reiserfs filesystems with the -n option, then
+  the control script will not be able to remount them correctly. You must set
+  DO_REMOUNTS=0 in the control script, otherwise it will remount them with the
+  wrong options -- or it will fail because it cannot write to /etc/mtab.
+
+* If you have your filesystems listed as type "auto" in fstab, like I did, then
+  the control script will not recognize them as filesystems that need remounting.
+  You must list the filesystems with their true type instead.
+
+* It has been reported that some versions of the mutt mail client use file access
+  times to determine whether a folder contains new mail. If you use mutt and
+  experience this, you must disable the noatime remounting by setting the option
+  DO_REMOUNT_NOATIME to 0 in the configuration file.
+
+
+The Details
+-----------
+
+Laptop mode is controlled by the knob /proc/sys/vm/laptop_mode. This knob is
+present for all kernels that have the laptop mode patch, regardless of any
+configuration options. When the knob is set, any physical disk I/O (that might
+have caused the hard disk to spin up) causes Linux to flush all dirty blocks. The
+result of this is that after a disk has spun down, it will not be spun up
+anymore to write dirty blocks, because those blocks had already been written
+immediately after the most recent read operation. The value of the laptop_mode
+knob determines the time between the occurrence of disk I/O and when the flush
+is triggered. A sensible value for the knob is 5 seconds. Setting the knob to
+0 disables laptop mode.
+
+To increase the effectiveness of the laptop_mode strategy, the laptop_mode
+control script increases dirty_expire_centisecs and dirty_writeback_centisecs in
+/proc/sys/vm to about 10 minutes (by default), which means that pages that are
+dirtied are not forced to be written to disk as often. The control script also
+changes the dirty background ratio, so that background writeback of dirty pages
+is not done anymore. Combined with a higher commit value (also 10 minutes) for
+ext3 or ReiserFS filesystems (also done automatically by the control script),
+this results in concentration of disk activity in a small time interval which
+occurs only once every 10 minutes, or whenever the disk is forced to spin up by
+a cache miss. The disk can then be spun down in the periods of inactivity.
+
+If you want to find out which process caused the disk to spin up, you can
+gather information by setting the flag /proc/sys/vm/block_dump. When this flag
+is set, Linux reports all disk read and write operations that take place, and
+all block dirtyings done to files. This makes it possible to debug why a disk
+needs to spin up, and to increase battery life even more. The output of
+block_dump is written to the kernel output, and it can be retrieved using
+"dmesg". When you use block_dump and your kernel logging level also includes
+kernel debugging messages, you probably want to turn off klogd, otherwise
+the output of block_dump will be logged, causing disk activity that is not
+normally there.
+
+
+Configuration
+-------------
+
+The laptop mode configuration file is located in /etc/default/laptop-mode on
+Debian-based systems, or in /etc/sysconfig/laptop-mode on other systems. It
+contains the following options:
+
+MAX_AGE:
+
+Maximum time, in seconds, of hard drive spindown time that you are
+comfortable with. Worst case, it's possible that you could lose this
+amount of work if your battery fails while you're in laptop mode.
+
+MINIMUM_BATTERY_MINUTES:
+
+Automatically disable laptop mode if the remaining number of minutes of
+battery power is less than this value. Default is 10 minutes.
+
+AC_HD/BATT_HD:
+
+The idle timeout that should be set on your hard drive when laptop mode
+is active (BATT_HD) and when it is not active (AC_HD). The defaults are
+20 seconds (value 4) for BATT_HD  and 2 hours (value 244) for AC_HD. The
+possible values are those listed in the manual page for "hdparm" for the
+"-S" option.
+
+HD:
+
+The devices for which the spindown timeout should be adjusted by laptop mode.
+Default is /dev/hda. If you specify multiple devices, separate them by a space.
+
+READAHEAD:
+
+Disk readahead, in 512-byte sectors, while laptop mode is active. A large
+readahead can prevent disk accesses for things like executable pages (which are
+loaded on demand while the application executes) and sequentially accessed data
+(MP3s).
+
+DO_REMOUNTS:
+
+The control script automatically remounts any mounted journaled filesystems
+with appropriate commit interval options. When this option is set to 0, this
+feature is disabled.
+
+DO_REMOUNT_NOATIME:
+
+When remounting, should the filesystems be remounted with the noatime option?
+Normally, this is set to "1" (enabled), but there may be programs that require
+access time recording.
+
+DIRTY_RATIO:
+
+The percentage of memory that is allowed to contain "dirty" or unsaved data
+before a writeback is forced, while laptop mode is active. Corresponds to
+the /proc/sys/vm/dirty_ratio sysctl.
+
+DIRTY_BACKGROUND_RATIO:
+
+The percentage of memory that is allowed to contain "dirty" or unsaved data
+after a forced writeback is done due to an exceeding of DIRTY_RATIO. Set
+this nice and low. This corresponds to the /proc/sys/vm/dirty_background_ratio
+sysctl.
+
+Note that the behaviour of dirty_background_ratio is quite different
+when laptop mode is active and when it isn't. When laptop mode is inactive,
+dirty_background_ratio is the threshold percentage at which background writeouts
+start taking place. When laptop mode is active, however, background writeouts
+are disabled, and the dirty_background_ratio only determines how much writeback
+is done when dirty_ratio is reached.
+
+DO_CPU:
+
+Enable CPU frequency scaling when in laptop mode. (Requires CPUFreq to be setup.
+See Documentation/admin-guide/pm/cpufreq.rst for more info. Disabled by default.)
+
+CPU_MAXFREQ:
+
+When on battery, what is the maximum CPU speed that the system should use? Legal
+values are "slowest" for the slowest speed that your CPU is able to operate at,
+or a value listed in /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies.
+
+
+Tips & Tricks
+-------------
+
+* Bartek Kania reports getting up to 50 minutes of extra battery life (on top
+  of his regular 3 to 3.5 hours) using a spindown time of 5 seconds (BATT_HD=1).
+
+* You can spin down the disk while playing MP3, by setting disk readahead
+  to 8MB (READAHEAD=16384). Effectively, the disk will read a complete MP3 at
+  once, and will then spin down while the MP3 is playing. (Thanks to Bartek
+  Kania.)
+
+* Drew Scott Daniels observed: "I don't know why, but when I decrease the number
+  of colours that my display uses it consumes less battery power. I've seen
+  this on powerbooks too. I hope that this is a piece of information that
+  might be useful to the Laptop Mode patch or its users."
+
+* In syslog.conf, you can prefix entries with a dash `-` to omit syncing the
+  file after every logging. When you're using laptop-mode and your disk doesn't
+  spin down, this is a likely culprit.
+
+* Richard Atterer observed that laptop mode does not work well with noflushd
+  (http://noflushd.sourceforge.net/), it seems that noflushd prevents laptop-mode
+  from doing its thing.
+
+* If you're worried about your data, you might want to consider using a USB
+  memory stick or something like that as a "working area". (Be aware though
+  that flash memory can only handle a limited number of writes, and overuse
+  may wear out your memory stick pretty quickly. Do _not_ use journalling
+  filesystems on flash memory sticks.)
+
+
+Configuration file for control and ACPI battery scripts
+-------------------------------------------------------
+
+This allows the tunables to be changed for the scripts via an external
+configuration file
+
+It should be installed as /etc/default/laptop-mode on Debian, and as
+/etc/sysconfig/laptop-mode on Red Hat, SUSE, Mandrake, and other work-alikes.
+
+Config file::
+
+  # Maximum time, in seconds, of hard drive spindown time that you are
+  # comfortable with. Worst case, it's possible that you could lose this
+  # amount of work if your battery fails you while in laptop mode.
+  #MAX_AGE=600
+
+  # Automatically disable laptop mode when the number of minutes of battery
+  # that you have left goes below this threshold.
+  MINIMUM_BATTERY_MINUTES=10
+
+  # Read-ahead, in 512-byte sectors. You can spin down the disk while playing MP3/OGG
+  # by setting the disk readahead to 8MB (READAHEAD=16384). Effectively, the disk
+  # will read a complete MP3 at once, and will then spin down while the MP3/OGG is
+  # playing.
+  #READAHEAD=4096
+
+  # Shall we remount journaled fs. with appropriate commit interval? (1=yes)
+  #DO_REMOUNTS=1
+
+  # And shall we add the "noatime" option to that as well? (1=yes)
+  #DO_REMOUNT_NOATIME=1
+
+  # Dirty synchronous ratio.  At this percentage of dirty pages the process
+  # which
+  # calls write() does its own writeback
+  #DIRTY_RATIO=40
+
+  #
+  # Allowed dirty background ratio, in percent.  Once DIRTY_RATIO has been
+  # exceeded, the kernel will wake flusher threads which will then reduce the
+  # amount of dirty memory to dirty_background_ratio.  Set this nice and low,
+  # so once some writeout has commenced, we do a lot of it.
+  #
+  #DIRTY_BACKGROUND_RATIO=5
+
+  # kernel default dirty buffer age
+  #DEF_AGE=30
+  #DEF_UPDATE=5
+  #DEF_DIRTY_BACKGROUND_RATIO=10
+  #DEF_DIRTY_RATIO=40
+  #DEF_XFS_AGE_BUFFER=15
+  #DEF_XFS_SYNC_INTERVAL=30
+  #DEF_XFS_BUFD_INTERVAL=1
+
+  # This must be adjusted manually to the value of HZ in the running kernel
+  # on 2.4, until the XFS people change their 2.4 external interfaces to work in
+  # centisecs. This can be automated, but it's a work in progress that still
+  # needs# some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for
+  # external interfaces, and that is currently always set to 100. So you don't
+  # need to change this on 2.6.
+  #XFS_HZ=100
+
+  # Should the maximum CPU frequency be adjusted down while on battery?
+  # Requires CPUFreq to be setup.
+  # See Documentation/admin-guide/pm/cpufreq.rst for more info
+  #DO_CPU=0
+
+  # When on battery what is the maximum CPU speed that the system should
+  # use? Legal values are "slowest" for the slowest speed that your
+  # CPU is able to operate at, or a value listed in:
+  # /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies
+  # Only applicable if DO_CPU=1.
+  #CPU_MAXFREQ=slowest
+
+  # Idle timeout for your hard drive (man hdparm for valid values, -S option)
+  # Default is 2 hours on AC (AC_HD=244) and 20 seconds for battery (BATT_HD=4).
+  #AC_HD=244
+  #BATT_HD=4
+
+  # The drives for which to adjust the idle timeout. Separate them by a space,
+  # e.g. HD="/dev/hda /dev/hdb".
+  #HD="/dev/hda"
+
+  # Set the spindown timeout on a hard drive?
+  #DO_HD=1
+
+
+Control script
+--------------
+
+Please note that this control script works for the Linux 2.4 and 2.6 series (thanks
+to Kiko Piris).
+
+Control script::
+
+  #!/bin/bash
+
+  # start or stop laptop_mode, best run by a power management daemon when
+  # ac gets connected/disconnected from a laptop
+  #
+  # install as /sbin/laptop_mode
+  #
+  # Contributors to this script:   Kiko Piris
+  #				 Bart Samwel
+  #				 Micha Feigin
+  #				 Andrew Morton
+  #				 Herve Eychenne
+  #				 Dax Kelson
+  #
+  # Original Linux 2.4 version by: Jens Axboe
+
+  #############################################################################
+
+  # Source config
+  if [ -f /etc/default/laptop-mode ] ; then
+	# Debian
+	. /etc/default/laptop-mode
+  elif [ -f /etc/sysconfig/laptop-mode ] ; then
+	# Others
+          . /etc/sysconfig/laptop-mode
+  fi
+
+  # Don't raise an error if the config file is incomplete
+  # set defaults instead:
+
+  # Maximum time, in seconds, of hard drive spindown time that you are
+  # comfortable with. Worst case, it's possible that you could lose this
+  # amount of work if your battery fails you while in laptop mode.
+  MAX_AGE=${MAX_AGE:-'600'}
+
+  # Read-ahead, in kilobytes
+  READAHEAD=${READAHEAD:-'4096'}
+
+  # Shall we remount journaled fs. with appropriate commit interval? (1=yes)
+  DO_REMOUNTS=${DO_REMOUNTS:-'1'}
+
+  # And shall we add the "noatime" option to that as well? (1=yes)
+  DO_REMOUNT_NOATIME=${DO_REMOUNT_NOATIME:-'1'}
+
+  # Shall we adjust the idle timeout on a hard drive?
+  DO_HD=${DO_HD:-'1'}
+
+  # Adjust idle timeout on which hard drive?
+  HD="${HD:-'/dev/hda'}"
+
+  # spindown time for HD (hdparm -S values)
+  AC_HD=${AC_HD:-'244'}
+  BATT_HD=${BATT_HD:-'4'}
+
+  # Dirty synchronous ratio.  At this percentage of dirty pages the process which
+  # calls write() does its own writeback
+  DIRTY_RATIO=${DIRTY_RATIO:-'40'}
+
+  # cpu frequency scaling
+  # See Documentation/admin-guide/pm/cpufreq.rst for more info
+  DO_CPU=${CPU_MANAGE:-'0'}
+  CPU_MAXFREQ=${CPU_MAXFREQ:-'slowest'}
+
+  #
+  # Allowed dirty background ratio, in percent.  Once DIRTY_RATIO has been
+  # exceeded, the kernel will wake flusher threads which will then reduce the
+  # amount of dirty memory to dirty_background_ratio.  Set this nice and low,
+  # so once some writeout has commenced, we do a lot of it.
+  #
+  DIRTY_BACKGROUND_RATIO=${DIRTY_BACKGROUND_RATIO:-'5'}
+
+  # kernel default dirty buffer age
+  DEF_AGE=${DEF_AGE:-'30'}
+  DEF_UPDATE=${DEF_UPDATE:-'5'}
+  DEF_DIRTY_BACKGROUND_RATIO=${DEF_DIRTY_BACKGROUND_RATIO:-'10'}
+  DEF_DIRTY_RATIO=${DEF_DIRTY_RATIO:-'40'}
+  DEF_XFS_AGE_BUFFER=${DEF_XFS_AGE_BUFFER:-'15'}
+  DEF_XFS_SYNC_INTERVAL=${DEF_XFS_SYNC_INTERVAL:-'30'}
+  DEF_XFS_BUFD_INTERVAL=${DEF_XFS_BUFD_INTERVAL:-'1'}
+
+  # This must be adjusted manually to the value of HZ in the running kernel
+  # on 2.4, until the XFS people change their 2.4 external interfaces to work in
+  # centisecs. This can be automated, but it's a work in progress that still needs
+  # some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for external
+  # interfaces, and that is currently always set to 100. So you don't need to
+  # change this on 2.6.
+  XFS_HZ=${XFS_HZ:-'100'}
+
+  #############################################################################
+
+  KLEVEL="$(uname -r |
+               {
+	       IFS='.' read a b c
+	       echo $a.$b
+	     }
+  )"
+  case "$KLEVEL" in
+	"2.4"|"2.6")
+		;;
+	*)
+		echo "Unhandled kernel version: $KLEVEL ('uname -r' = '$(uname -r)')" >&2
+		exit 1
+		;;
+  esac
+
+  if [ ! -e /proc/sys/vm/laptop_mode ] ; then
+	echo "Kernel is not patched with laptop_mode patch." >&2
+	exit 1
+  fi
+
+  if [ ! -w /proc/sys/vm/laptop_mode ] ; then
+	echo "You do not have enough privileges to enable laptop_mode." >&2
+	exit 1
+  fi
+
+  # Remove an option (the first parameter) of the form option=<number> from
+  # a mount options string (the rest of the parameters).
+  parse_mount_opts () {
+	OPT="$1"
+	shift
+	echo ",$*," | sed		\
+	 -e 's/,'"$OPT"'=[0-9]*,/,/g'	\
+	 -e 's/,,*/,/g'			\
+	 -e 's/^,//'			\
+	 -e 's/,$//'
+  }
+
+  # Remove an option (the first parameter) without any arguments from
+  # a mount option string (the rest of the parameters).
+  parse_nonumber_mount_opts () {
+	OPT="$1"
+	shift
+	echo ",$*," | sed		\
+	 -e 's/,'"$OPT"',/,/g'		\
+	 -e 's/,,*/,/g'			\
+	 -e 's/^,//'			\
+	 -e 's/,$//'
+  }
+
+  # Find out the state of a yes/no option (e.g. "atime"/"noatime") in
+  # fstab for a given filesystem, and use this state to replace the
+  # value of the option in another mount options string. The device
+  # is the first argument, the option name the second, and the default
+  # value the third. The remainder is the mount options string.
+  #
+  # Example:
+  # parse_yesno_opts_wfstab /dev/hda1 atime atime defaults,noatime
+  #
+  # If fstab contains, say, "rw" for this filesystem, then the result
+  # will be "defaults,atime".
+  parse_yesno_opts_wfstab () {
+	L_DEV="$1"
+	OPT="$2"
+	DEF_OPT="$3"
+	shift 3
+	L_OPTS="$*"
+	PARSEDOPTS1="$(parse_nonumber_mount_opts $OPT $L_OPTS)"
+	PARSEDOPTS1="$(parse_nonumber_mount_opts no$OPT $PARSEDOPTS1)"
+	# Watch for a default atime in fstab
+	FSTAB_OPTS="$(awk '$1 == "'$L_DEV'" { print $4 }' /etc/fstab)"
+	if echo "$FSTAB_OPTS" | grep "$OPT" > /dev/null ; then
+		# option specified in fstab: extract the value and use it
+		if echo "$FSTAB_OPTS" | grep "no$OPT" > /dev/null ; then
+			echo "$PARSEDOPTS1,no$OPT"
+		else
+			# no$OPT not found -- so we must have $OPT.
+			echo "$PARSEDOPTS1,$OPT"
+		fi
+	else
+		# option not specified in fstab -- choose the default.
+		echo "$PARSEDOPTS1,$DEF_OPT"
+	fi
+  }
+
+  # Find out the state of a numbered option (e.g. "commit=NNN") in
+  # fstab for a given filesystem, and use this state to replace the
+  # value of the option in another mount options string. The device
+  # is the first argument, and the option name the second. The
+  # remainder is the mount options string in which the replacement
+  # must be done.
+  #
+  # Example:
+  # parse_mount_opts_wfstab /dev/hda1 commit defaults,commit=7
+  #
+  # If fstab contains, say, "commit=3,rw" for this filesystem, then the
+  # result will be "rw,commit=3".
+  parse_mount_opts_wfstab () {
+	L_DEV="$1"
+	OPT="$2"
+	shift 2
+	L_OPTS="$*"
+	PARSEDOPTS1="$(parse_mount_opts $OPT $L_OPTS)"
+	# Watch for a default commit in fstab
+	FSTAB_OPTS="$(awk '$1 == "'$L_DEV'" { print $4 }' /etc/fstab)"
+	if echo "$FSTAB_OPTS" | grep "$OPT=" > /dev/null ; then
+		# option specified in fstab: extract the value, and use it
+		echo -n "$PARSEDOPTS1,$OPT="
+		echo ",$FSTAB_OPTS," | sed \
+		 -e 's/.*,'"$OPT"'=//'	\
+		 -e 's/,.*//'
+	else
+		# option not specified in fstab: set it to 0
+		echo "$PARSEDOPTS1,$OPT=0"
+	fi
+  }
+
+  deduce_fstype () {
+	MP="$1"
+	# My root filesystem unfortunately has
+	# type "unknown" in /etc/mtab. If we encounter
+	# "unknown", we try to get the type from fstab.
+	cat /etc/fstab |
+	grep -v '^#' |
+	while read FSTAB_DEV FSTAB_MP FSTAB_FST FSTAB_OPTS FSTAB_DUMP FSTAB_DUMP ; do
+		if [ "$FSTAB_MP" = "$MP" ]; then
+			echo $FSTAB_FST
+			exit 0
+		fi
+	done
+  }
+
+  if [ $DO_REMOUNT_NOATIME -eq 1 ] ; then
+	NOATIME_OPT=",noatime"
+  fi
+
+  case "$1" in
+	start)
+		AGE=$((100*$MAX_AGE))
+		XFS_AGE=$(($XFS_HZ*$MAX_AGE))
+		echo -n "Starting laptop_mode"
+
+		if [ -d /proc/sys/vm/pagebuf ] ; then
+			# (For 2.4 and early 2.6.)
+			# This only needs to be set, not reset -- it is only used when
+			# laptop mode is enabled.
+			echo $XFS_AGE > /proc/sys/vm/pagebuf/lm_flush_age
+			echo $XFS_AGE > /proc/sys/fs/xfs/lm_sync_interval
+		elif [ -f /proc/sys/fs/xfs/lm_age_buffer ] ; then
+			# (A couple of early 2.6 laptop mode patches had these.)
+			# The same goes for these.
+			echo $XFS_AGE > /proc/sys/fs/xfs/lm_age_buffer
+			echo $XFS_AGE > /proc/sys/fs/xfs/lm_sync_interval
+		elif [ -f /proc/sys/fs/xfs/age_buffer ] ; then
+			# (2.6.6)
+			# But not for these -- they are also used in normal
+			# operation.
+			echo $XFS_AGE > /proc/sys/fs/xfs/age_buffer
+			echo $XFS_AGE > /proc/sys/fs/xfs/sync_interval
+		elif [ -f /proc/sys/fs/xfs/age_buffer_centisecs ] ; then
+			# (2.6.7 upwards)
+			# And not for these either. These are in centisecs,
+			# not USER_HZ, so we have to use $AGE, not $XFS_AGE.
+			echo $AGE > /proc/sys/fs/xfs/age_buffer_centisecs
+			echo $AGE > /proc/sys/fs/xfs/xfssyncd_centisecs
+			echo 3000 > /proc/sys/fs/xfs/xfsbufd_centisecs
+		fi
+
+		case "$KLEVEL" in
+			"2.4")
+				echo 1					> /proc/sys/vm/laptop_mode
+				echo "30 500 0 0 $AGE $AGE 60 20 0"	> /proc/sys/vm/bdflush
+				;;
+			"2.6")
+				echo 5					> /proc/sys/vm/laptop_mode
+				echo "$AGE"				> /proc/sys/vm/dirty_writeback_centisecs
+				echo "$AGE"				> /proc/sys/vm/dirty_expire_centisecs
+				echo "$DIRTY_RATIO"			> /proc/sys/vm/dirty_ratio
+				echo "$DIRTY_BACKGROUND_RATIO"		> /proc/sys/vm/dirty_background_ratio
+				;;
+		esac
+		if [ $DO_REMOUNTS -eq 1 ]; then
+			cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do
+				PARSEDOPTS="$(parse_mount_opts "$OPTS")"
+				if [ "$FST" = 'unknown' ]; then
+					FST=$(deduce_fstype $MP)
+				fi
+				case "$FST" in
+					"ext3"|"reiserfs")
+						PARSEDOPTS="$(parse_mount_opts commit "$OPTS")"
+						mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE$NOATIME_OPT
+						;;
+					"xfs")
+						mount $DEV -t $FST $MP -o remount,$OPTS$NOATIME_OPT
+						;;
+				esac
+				if [ -b $DEV ] ; then
+					blockdev --setra $(($READAHEAD * 2)) $DEV
+				fi
+			done
+		fi
+		if [ $DO_HD -eq 1 ] ; then
+			for THISHD in $HD ; do
+				/sbin/hdparm -S $BATT_HD $THISHD > /dev/null 2>&1
+				/sbin/hdparm -B 1 $THISHD > /dev/null 2>&1
+			done
+		fi
+		if [ $DO_CPU -eq 1 -a -e /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq ]; then
+			if [ $CPU_MAXFREQ = 'slowest' ]; then
+				CPU_MAXFREQ=`cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq`
+			fi
+			echo $CPU_MAXFREQ > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
+		fi
+		echo "."
+		;;
+	stop)
+		U_AGE=$((100*$DEF_UPDATE))
+		B_AGE=$((100*$DEF_AGE))
+		echo -n "Stopping laptop_mode"
+		echo 0 > /proc/sys/vm/laptop_mode
+		if [ -f /proc/sys/fs/xfs/age_buffer -a ! -f /proc/sys/fs/xfs/lm_age_buffer ] ; then
+			# These need to be restored, if there are no lm_*.
+			echo $(($XFS_HZ*$DEF_XFS_AGE_BUFFER))	 	> /proc/sys/fs/xfs/age_buffer
+			echo $(($XFS_HZ*$DEF_XFS_SYNC_INTERVAL)) 	> /proc/sys/fs/xfs/sync_interval
+		elif [ -f /proc/sys/fs/xfs/age_buffer_centisecs ] ; then
+			# These need to be restored as well.
+			echo $((100*$DEF_XFS_AGE_BUFFER))	> /proc/sys/fs/xfs/age_buffer_centisecs
+			echo $((100*$DEF_XFS_SYNC_INTERVAL))	> /proc/sys/fs/xfs/xfssyncd_centisecs
+			echo $((100*$DEF_XFS_BUFD_INTERVAL))	> /proc/sys/fs/xfs/xfsbufd_centisecs
+		fi
+		case "$KLEVEL" in
+			"2.4")
+				echo "30 500 0 0 $U_AGE $B_AGE 60 20 0"	> /proc/sys/vm/bdflush
+				;;
+			"2.6")
+				echo "$U_AGE"				> /proc/sys/vm/dirty_writeback_centisecs
+				echo "$B_AGE"				> /proc/sys/vm/dirty_expire_centisecs
+				echo "$DEF_DIRTY_RATIO"			> /proc/sys/vm/dirty_ratio
+				echo "$DEF_DIRTY_BACKGROUND_RATIO"	> /proc/sys/vm/dirty_background_ratio
+				;;
+		esac
+		if [ $DO_REMOUNTS -eq 1 ] ; then
+			cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do
+				# Reset commit and atime options to defaults.
+				if [ "$FST" = 'unknown' ]; then
+					FST=$(deduce_fstype $MP)
+				fi
+				case "$FST" in
+					"ext3"|"reiserfs")
+						PARSEDOPTS="$(parse_mount_opts_wfstab $DEV commit $OPTS)"
+						PARSEDOPTS="$(parse_yesno_opts_wfstab $DEV atime atime $PARSEDOPTS)"
+						mount $DEV -t $FST $MP -o remount,$PARSEDOPTS
+						;;
+					"xfs")
+						PARSEDOPTS="$(parse_yesno_opts_wfstab $DEV atime atime $OPTS)"
+						mount $DEV -t $FST $MP -o remount,$PARSEDOPTS
+						;;
+				esac
+				if [ -b $DEV ] ; then
+					blockdev --setra 256 $DEV
+				fi
+			done
+		fi
+		if [ $DO_HD -eq 1 ] ; then
+			for THISHD in $HD ; do
+				/sbin/hdparm -S $AC_HD $THISHD > /dev/null 2>&1
+				/sbin/hdparm -B 255 $THISHD > /dev/null 2>&1
+			done
+		fi
+		if [ $DO_CPU -eq 1 -a -e /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq ]; then
+			echo `cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq` > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
+		fi
+		echo "."
+		;;
+	*)
+		echo "Usage: $0 {start|stop}" 2>&1
+		exit 1
+		;;
+
+  esac
+
+  exit 0
+
+
+ACPI integration
+----------------
+
+Dax Kelson submitted this so that the ACPI acpid daemon will
+kick off the laptop_mode script and run hdparm. The part that
+automatically disables laptop mode when the battery is low was
+written by Jan Topinski.
+
+/etc/acpi/events/ac_adapter::
+
+	event=ac_adapter
+	action=/etc/acpi/actions/ac.sh %e
+
+/etc/acpi/events/battery::
+
+	event=battery.*
+	action=/etc/acpi/actions/battery.sh %e
+
+/etc/acpi/actions/ac.sh::
+
+  #!/bin/bash
+
+  # ac on/offline event handler
+
+  status=`awk '/^state: / { print $2 }' /proc/acpi/ac_adapter/$2/state`
+
+  case $status in
+          "on-line")
+                  /sbin/laptop_mode stop
+                  exit 0
+          ;;
+          "off-line")
+                  /sbin/laptop_mode start
+                  exit 0
+          ;;
+  esac
+
+
+/etc/acpi/actions/battery.sh::
+
+  #! /bin/bash
+
+  # Automatically disable laptop mode when the battery almost runs out.
+
+  BATT_INFO=/proc/acpi/battery/$2/state
+
+  if [[ -f /proc/sys/vm/laptop_mode ]]
+  then
+     LM=`cat /proc/sys/vm/laptop_mode`
+     if [[ $LM -gt 0 ]]
+     then
+       if [[ -f $BATT_INFO ]]
+       then
+          # Source the config file only now that we know we need
+          if [ -f /etc/default/laptop-mode ] ; then
+                  # Debian
+                  . /etc/default/laptop-mode
+          elif [ -f /etc/sysconfig/laptop-mode ] ; then
+                  # Others
+                  . /etc/sysconfig/laptop-mode
+          fi
+          MINIMUM_BATTERY_MINUTES=${MINIMUM_BATTERY_MINUTES:-'10'}
+
+          ACTION="`cat $BATT_INFO | grep charging | cut -c 26-`"
+          if [[ ACTION -eq "discharging" ]]
+          then
+             PRESENT_RATE=`cat $BATT_INFO | grep "present rate:" | sed  "s/.* \([0-9][0-9]* \).*/\1/" `
+             REMAINING=`cat $BATT_INFO | grep "remaining capacity:" | sed  "s/.* \([0-9][0-9]* \).*/\1/" `
+          fi
+          if (($REMAINING * 60 / $PRESENT_RATE < $MINIMUM_BATTERY_MINUTES))
+          then
+             /sbin/laptop_mode stop
+          fi
+       else
+         logger -p daemon.warning "You are using laptop mode and your battery interface $BATT_INFO is missing. This may lead to loss of data when the battery runs out. Check kernel ACPI support and /proc/acpi/battery folder, and edit /etc/acpi/battery.sh to set BATT_INFO to the correct path."
+       fi
+     fi
+  fi
+
+
+Monitoring tool
+---------------
+
+Bartek Kania submitted this, it can be used to measure how much time your disk
+spends spun up/down.  See tools/laptop/dslm/dslm.c
diff --git a/Documentation/admin-guide/laptops/lg-laptop.rst b/Documentation/admin-guide/laptops/lg-laptop.rst
new file mode 100644
index 000000000000..ce9b14671cb9
--- /dev/null
+++ b/Documentation/admin-guide/laptops/lg-laptop.rst
@@ -0,0 +1,84 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+
+LG Gram laptop extra features
+=============================
+
+By Matan Ziv-Av <matan@svgalib.org>
+
+
+Hotkeys
+-------
+
+The following FN keys are ignored by the kernel without this driver:
+
+- FN-F1 (LG control panel)   - Generates F15
+- FN-F5 (Touchpad toggle)    - Generates F13
+- FN-F6 (Airplane mode)      - Generates RFKILL
+- FN-F8 (Keyboard backlight) - Generates F16.
+  This key also changes keyboard backlight mode.
+- FN-F9 (Reader mode)        - Generates F14
+
+The rest of the FN keys work without a need for a special driver.
+
+
+Reader mode
+-----------
+
+Writing 0/1 to /sys/devices/platform/lg-laptop/reader_mode disables/enables
+reader mode. In this mode the screen colors change (blue color reduced),
+and the reader mode indicator LED (on F9 key) turns on.
+
+
+FN Lock
+-------
+
+Writing 0/1 to /sys/devices/platform/lg-laptop/fn_lock disables/enables
+FN lock.
+
+
+Battery care limit
+------------------
+
+Writing 80/100 to /sys/devices/platform/lg-laptop/battery_care_limit
+sets the maximum capacity to charge the battery. Limiting the charge
+reduces battery capacity loss over time.
+
+This value is reset to 100 when the kernel boots.
+
+
+Fan mode
+--------
+
+Writing 1/0 to /sys/devices/platform/lg-laptop/fan_mode disables/enables
+the fan silent mode.
+
+
+USB charge
+----------
+
+Writing 0/1 to /sys/devices/platform/lg-laptop/usb_charge disables/enables
+charging another device from the USB port while the device is turned off.
+
+This value is reset to 0 when the kernel boots.
+
+
+LEDs
+~~~~
+
+The are two LED devices supported by the driver:
+
+Keyboard backlight
+------------------
+
+A led device named kbd_led controls the keyboard backlight. There are three
+lighting level: off (0), low (127) and high (255).
+
+The keyboard backlight is also controlled by the key combination FN-F8
+which cycles through those levels.
+
+
+Touchpad indicator LED
+----------------------
+
+On the F5 key. Controlled by led device names tpad_led.
diff --git a/Documentation/admin-guide/laptops/sony-laptop.rst b/Documentation/admin-guide/laptops/sony-laptop.rst
new file mode 100644
index 000000000000..9edcc7f6612f
--- /dev/null
+++ b/Documentation/admin-guide/laptops/sony-laptop.rst
@@ -0,0 +1,174 @@
+=========================================
+Sony Notebook Control Driver (SNC) Readme
+=========================================
+
+	- Copyright (C) 2004- 2005 Stelian Pop <stelian@popies.net>
+	- Copyright (C) 2007 Mattia Dongili <malattia@linux.it>
+
+This mini-driver drives the SNC and SPIC device present in the ACPI BIOS of the
+Sony Vaio laptops. This driver mixes both devices functions under the same
+(hopefully consistent) interface. This also means that the sonypi driver is
+obsoleted by sony-laptop now.
+
+Fn keys (hotkeys):
+------------------
+
+Some models report hotkeys through the SNC or SPIC devices, such events are
+reported both through the ACPI subsystem as acpi events and through the INPUT
+subsystem. See the logs of /proc/bus/input/devices to find out what those
+events are and which input devices are created by the driver.
+Additionally, loading the driver with the debug option will report all events
+in the kernel log.
+
+The "scancodes" passed to the input system (that can be remapped with udev)
+are indexes to the table "sony_laptop_input_keycode_map" in the sony-laptop.c
+module.  For example the "FN/E" key combination (EJECTCD on some models)
+generates the scancode 20 (0x14).
+
+Backlight control:
+------------------
+If your laptop model supports it, you will find sysfs files in the
+/sys/class/backlight/sony/
+directory. You will be able to query and set the current screen
+brightness:
+
+	======================	=========================================
+	brightness		get/set screen brightness (an integer
+				between 0 and 7)
+	actual_brightness	reading from this file will query the HW
+				to get real brightness value
+	max_brightness		the maximum brightness value
+	======================	=========================================
+
+
+Platform specific:
+------------------
+Loading the sony-laptop module will create a
+/sys/devices/platform/sony-laptop/
+directory populated with some files.
+
+You then read/write integer values from/to those files by using
+standard UNIX tools.
+
+The files are:
+
+	======================	==========================================
+	brightness_default	screen brightness which will be set
+				when the laptop will be rebooted
+	cdpower			power on/off the internal CD drive
+	audiopower		power on/off the internal sound card
+	lanpower		power on/off the internal ethernet card
+				(only in debug mode)
+	bluetoothpower		power on/off the internal bluetooth device
+	fanspeed		get/set the fan speed
+	======================	==========================================
+
+Note that some files may be missing if they are not supported
+by your particular laptop model.
+
+Example usage::
+
+	# echo "1" > /sys/devices/platform/sony-laptop/brightness_default
+
+sets the lowest screen brightness for the next and later reboots
+
+::
+
+	# echo "8" > /sys/devices/platform/sony-laptop/brightness_default
+
+sets the highest screen brightness for the next and later reboots
+
+::
+
+	# cat /sys/devices/platform/sony-laptop/brightness_default
+
+retrieves the value
+
+::
+
+	# echo "0" > /sys/devices/platform/sony-laptop/audiopower
+
+powers off the sound card
+
+::
+
+	# echo "1" > /sys/devices/platform/sony-laptop/audiopower
+
+powers on the sound card.
+
+
+RFkill control:
+---------------
+More recent Vaio models expose a consistent set of ACPI methods to
+control radio frequency emitting devices. If you are a lucky owner of
+such a laptop you will find the necessary rfkill devices under
+/sys/class/rfkill. Check those starting with sony-* in::
+
+	# grep . /sys/class/rfkill/*/{state,name}
+
+
+Development:
+------------
+
+If you want to help with the development of this driver (and
+you are not afraid of any side effects doing strange things with
+your ACPI BIOS could have on your laptop), load the driver and
+pass the option 'debug=1'.
+
+REPEAT:
+	**DON'T DO THIS IF YOU DON'T LIKE RISKY BUSINESS.**
+
+In your kernel logs you will find the list of all ACPI methods
+the SNC device has on your laptop.
+
+* For new models you will see a long list of meaningless method names,
+  reading the DSDT table source should reveal that:
+
+(1) the SNC device uses an internal capability lookup table
+(2) SN00 is used to find values in the lookup table
+(3) SN06 and SN07 are used to call into the real methods based on
+    offsets you can obtain iterating the table using SN00
+(4) SN02 used to enable events.
+
+Some values in the capability lookup table are more or less known, see
+the code for all sony_call_snc_handle calls, others are more obscure.
+
+* For old models you can see the GCDP/GCDP methods used to pwer on/off
+  the CD drive, but there are others and they are usually different from
+  model to model.
+
+**I HAVE NO IDEA WHAT THOSE METHODS DO.**
+
+The sony-laptop driver creates, for some of those methods (the most
+current ones found on several Vaio models), an entry under
+/sys/devices/platform/sony-laptop, just like the 'cdpower' one.
+You can create other entries corresponding to your own laptop methods by
+further editing the source (see the 'sony_nc_values' table, and add a new
+entry to this table with your get/set method names using the
+SNC_HANDLE_NAMES macro).
+
+Your mission, should you accept it, is to try finding out what
+those entries are for, by reading/writing random values from/to those
+files and find out what is the impact on your laptop.
+
+Should you find anything interesting, please report it back to me,
+I will not disavow all knowledge of your actions :)
+
+See also http://www.linux.it/~malattia/wiki/index.php/Sony_drivers for other
+useful info.
+
+Bugs/Limitations:
+-----------------
+
+* This driver is not based on official documentation from Sony
+  (because there is none), so there is no guarantee this driver
+  will work at all, or do the right thing. Although this hasn't
+  happened to me, this driver could do very bad things to your
+  laptop, including permanent damage.
+
+* The sony-laptop and sonypi drivers do not interact at all. In the
+  future, sonypi will be removed and replaced by sony-laptop.
+
+* spicctrl, which is the userspace tool used to communicate with the
+  sonypi driver (through /dev/sonypi) is deprecated as well since all
+  its features are now available under the sysfs tree via sony-laptop.
diff --git a/Documentation/admin-guide/laptops/sonypi.rst b/Documentation/admin-guide/laptops/sonypi.rst
new file mode 100644
index 000000000000..2a1975ed7ee4
--- /dev/null
+++ b/Documentation/admin-guide/laptops/sonypi.rst
@@ -0,0 +1,160 @@
+==================================================
+Sony Programmable I/O Control Device Driver Readme
+==================================================
+
+	- Copyright (C) 2001-2004 Stelian Pop <stelian@popies.net>
+	- Copyright (C) 2001-2002 Alcôve <www.alcove.com>
+	- Copyright (C) 2001 Michael Ashley <m.ashley@unsw.edu.au>
+	- Copyright (C) 2001 Junichi Morita <jun1m@mars.dti.ne.jp>
+	- Copyright (C) 2000 Takaya Kinjo <t-kinjo@tc4.so-net.ne.jp>
+	- Copyright (C) 2000 Andrew Tridgell <tridge@samba.org>
+
+This driver enables access to the Sony Programmable I/O Control Device which
+can be found in many Sony Vaio laptops. Some newer Sony laptops (seems to be
+limited to new FX series laptops, at least the FX501 and the FX702) lack a
+sonypi device and are not supported at all by this driver.
+
+It will give access (through a user space utility) to some events those laptops
+generate, like:
+
+	- jogdial events (the small wheel on the side of Vaios)
+	- capture button events (only on Vaio Picturebook series)
+	- Fn keys
+	- bluetooth button (only on C1VR model)
+	- programmable keys, back, help, zoom, thumbphrase buttons, etc.
+	  (when available)
+
+Those events (see linux/sonypi.h) can be polled using the character device node
+/dev/sonypi (major 10, minor auto allocated or specified as a option).
+A simple daemon which translates the jogdial movements into mouse wheel events
+can be downloaded at: <http://popies.net/sonypi/>
+
+Another option to intercept the events is to get them directly through the
+input layer.
+
+This driver supports also some ioctl commands for setting the LCD screen
+brightness and querying the batteries charge information (some more
+commands may be added in the future).
+
+This driver can also be used to set the camera controls on Picturebook series
+(brightness, contrast etc), and is used by the video4linux driver for the
+Motion Eye camera.
+
+Please note that this driver was created by reverse engineering the Windows
+driver and the ACPI BIOS, because Sony doesn't agree to release any programming
+specs for its laptops. If someone convinces them to do so, drop me a note.
+
+Driver options:
+---------------
+
+Several options can be passed to the sonypi driver using the standard
+module argument syntax (<param>=<value> when passing the option to the
+module or sonypi.<param>=<value> on the kernel boot line when sonypi is
+statically linked into the kernel). Those options are:
+
+	=============== =======================================================
+	minor: 		minor number of the misc device /dev/sonypi,
+			default is -1 (automatic allocation, see /proc/misc
+			or kernel logs)
+
+	camera:		if you have a PictureBook series Vaio (with the
+			integrated MotionEye camera), set this parameter to 1
+			in order to let the driver access to the camera
+
+	fnkeyinit:	on some Vaios (C1VE, C1VR etc), the Fn key events don't
+			get enabled unless you set this parameter to 1.
+			Do not use this option unless it's actually necessary,
+			some Vaio models don't deal well with this option.
+			This option is available only if the kernel is
+			compiled without ACPI support (since it conflicts
+			with it and it shouldn't be required anyway if
+			ACPI is already enabled).
+
+	verbose:	set to 1 to print unknown events received from the
+			sonypi device.
+			set to 2 to print all events received from the
+			sonypi device.
+
+	compat:		uses some compatibility code for enabling the sonypi
+			events. If the driver worked for you in the past
+			(prior to version 1.5) and does not work anymore,
+			add this option and report to the author.
+
+	mask:		event mask telling the driver what events will be
+			reported to the user. This parameter is required for
+			some Vaio models where the hardware reuses values
+			used in other Vaio models (like the FX series who does
+			not have a jogdial but reuses the jogdial events for
+			programmable keys events). The default event mask is
+			set to 0xffffffff, meaning that all possible events
+			will be tried. You can use the following bits to
+			construct your own event mask (from
+			drivers/char/sonypi.h):
+
+				========================	======
+				SONYPI_JOGGER_MASK 		0x0001
+				SONYPI_CAPTURE_MASK 		0x0002
+				SONYPI_FNKEY_MASK 		0x0004
+				SONYPI_BLUETOOTH_MASK 		0x0008
+				SONYPI_PKEY_MASK 		0x0010
+				SONYPI_BACK_MASK 		0x0020
+				SONYPI_HELP_MASK 		0x0040
+				SONYPI_LID_MASK 		0x0080
+				SONYPI_ZOOM_MASK 		0x0100
+				SONYPI_THUMBPHRASE_MASK 	0x0200
+				SONYPI_MEYE_MASK		0x0400
+				SONYPI_MEMORYSTICK_MASK		0x0800
+				SONYPI_BATTERY_MASK		0x1000
+				SONYPI_WIRELESS_MASK		0x2000
+				========================	======
+
+	useinput:	if set (which is the default) two input devices are
+			created, one which interprets the jogdial events as
+			mouse events, the other one which acts like a
+			keyboard reporting the pressing of the special keys.
+	=============== =======================================================
+
+Module use:
+-----------
+
+In order to automatically load the sonypi module on use, you can put those
+lines a configuration file in /etc/modprobe.d/::
+
+	alias char-major-10-250 sonypi
+	options sonypi minor=250
+
+This supposes the use of minor 250 for the sonypi device::
+
+	# mknod /dev/sonypi c 10 250
+
+Bugs:
+-----
+
+	- several users reported that this driver disables the BIOS-managed
+	  Fn-keys which put the laptop in sleeping state, or switch the
+	  external monitor on/off. There is no workaround yet, since this
+	  driver disables all APM management for those keys, by enabling the
+	  ACPI management (and the ACPI core stuff is not complete yet). If
+	  you have one of those laptops with working Fn keys and want to
+	  continue to use them, don't use this driver.
+
+	- some users reported that the laptop speed is lower (dhrystone
+	  tested) when using the driver with the fnkeyinit parameter. I cannot
+	  reproduce it on my laptop and not all users have this problem.
+	  This happens because the fnkeyinit parameter enables the ACPI
+	  mode (but without additional ACPI control, like processor
+	  speed handling etc). Use ACPI instead of APM if it works on your
+	  laptop.
+
+	- sonypi lacks the ability to distinguish between certain key
+	  events on some models.
+
+	- some models with the nvidia card (geforce go 6200 tc) uses a
+	  different way to adjust the backlighting of the screen. There
+	  is a userspace utility to adjust the brightness on those models,
+	  which can be downloaded from
+	  http://www.acc.umu.se/~erikw/program/smartdimmer-0.1.tar.bz2
+
+	- since all development was done by reverse engineering, there is
+	  *absolutely no guarantee* that this driver will not crash your
+	  laptop. Permanently.
diff --git a/Documentation/admin-guide/laptops/thinkpad-acpi.rst b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
new file mode 100644
index 000000000000..19d52fc3c5e9
--- /dev/null
+++ b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
@@ -0,0 +1,1562 @@
+===========================
+ThinkPad ACPI Extras Driver
+===========================
+
+Version 0.25
+
+October 16th,  2013
+
+- Borislav Deianov <borislav@users.sf.net>
+- Henrique de Moraes Holschuh <hmh@hmh.eng.br>
+
+http://ibm-acpi.sf.net/
+
+This is a Linux driver for the IBM and Lenovo ThinkPad laptops. It
+supports various features of these laptops which are accessible
+through the ACPI and ACPI EC framework, but not otherwise fully
+supported by the generic Linux ACPI drivers.
+
+This driver used to be named ibm-acpi until kernel 2.6.21 and release
+0.13-20070314.  It used to be in the drivers/acpi tree, but it was
+moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel
+2.6.22, and release 0.14.  It was moved to drivers/platform/x86 for
+kernel 2.6.29 and release 0.22.
+
+The driver is named "thinkpad-acpi".  In some places, like module
+names and log messages, "thinkpad_acpi" is used because of userspace
+issues.
+
+"tpacpi" is used as a shorthand where "thinkpad-acpi" would be too
+long due to length limitations on some Linux kernel versions.
+
+Status
+------
+
+The features currently supported are the following (see below for
+detailed description):
+
+	- Fn key combinations
+	- Bluetooth enable and disable
+	- video output switching, expansion control
+	- ThinkLight on and off
+	- CMOS/UCMS control
+	- LED control
+	- ACPI sounds
+	- temperature sensors
+	- Experimental: embedded controller register dump
+	- LCD brightness control
+	- Volume control
+	- Fan control and monitoring: fan speed, fan enable/disable
+	- WAN enable and disable
+	- UWB enable and disable
+
+A compatibility table by model and feature is maintained on the web
+site, http://ibm-acpi.sf.net/. I appreciate any success or failure
+reports, especially if they add to or correct the compatibility table.
+Please include the following information in your report:
+
+	- ThinkPad model name
+	- a copy of your ACPI tables, using the "acpidump" utility
+	- a copy of the output of dmidecode, with serial numbers
+	  and UUIDs masked off
+	- which driver features work and which don't
+	- the observed behavior of non-working features
+
+Any other comments or patches are also more than welcome.
+
+
+Installation
+------------
+
+If you are compiling this driver as included in the Linux kernel
+sources, look for the CONFIG_THINKPAD_ACPI Kconfig option.
+It is located on the menu path: "Device Drivers" -> "X86 Platform
+Specific Device Drivers" -> "ThinkPad ACPI Laptop Extras".
+
+
+Features
+--------
+
+The driver exports two different interfaces to userspace, which can be
+used to access the features it provides.  One is a legacy procfs-based
+interface, which will be removed at some time in the future.  The other
+is a new sysfs-based interface which is not complete yet.
+
+The procfs interface creates the /proc/acpi/ibm directory.  There is a
+file under that directory for each feature it supports.  The procfs
+interface is mostly frozen, and will change very little if at all: it
+will not be extended to add any new functionality in the driver, instead
+all new functionality will be implemented on the sysfs interface.
+
+The sysfs interface tries to blend in the generic Linux sysfs subsystems
+and classes as much as possible.  Since some of these subsystems are not
+yet ready or stabilized, it is expected that this interface will change,
+and any and all userspace programs must deal with it.
+
+
+Notes about the sysfs interface
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Unlike what was done with the procfs interface, correctness when talking
+to the sysfs interfaces will be enforced, as will correctness in the
+thinkpad-acpi's implementation of sysfs interfaces.
+
+Also, any bugs in the thinkpad-acpi sysfs driver code or in the
+thinkpad-acpi's implementation of the sysfs interfaces will be fixed for
+maximum correctness, even if that means changing an interface in
+non-compatible ways.  As these interfaces mature both in the kernel and
+in thinkpad-acpi, such changes should become quite rare.
+
+Applications interfacing to the thinkpad-acpi sysfs interfaces must
+follow all sysfs guidelines and correctly process all errors (the sysfs
+interface makes extensive use of errors).  File descriptors and open /
+close operations to the sysfs inodes must also be properly implemented.
+
+The version of thinkpad-acpi's sysfs interface is exported by the driver
+as a driver attribute (see below).
+
+Sysfs driver attributes are on the driver's sysfs attribute space,
+for 2.6.23+ this is /sys/bus/platform/drivers/thinkpad_acpi/ and
+/sys/bus/platform/drivers/thinkpad_hwmon/
+
+Sysfs device attributes are on the thinkpad_acpi device sysfs attribute
+space, for 2.6.23+ this is /sys/devices/platform/thinkpad_acpi/.
+
+Sysfs device attributes for the sensors and fan are on the
+thinkpad_hwmon device's sysfs attribute space, but you should locate it
+looking for a hwmon device with the name attribute of "thinkpad", or
+better yet, through libsensors. For 4.14+ sysfs attributes were moved to the
+hwmon device (/sys/bus/platform/devices/thinkpad_hwmon/hwmon/hwmon? or
+/sys/class/hwmon/hwmon?).
+
+Driver version
+--------------
+
+procfs: /proc/acpi/ibm/driver
+
+sysfs driver attribute: version
+
+The driver name and version. No commands can be written to this file.
+
+
+Sysfs interface version
+-----------------------
+
+sysfs driver attribute: interface_version
+
+Version of the thinkpad-acpi sysfs interface, as an unsigned long
+(output in hex format: 0xAAAABBCC), where:
+
+	AAAA
+	  - major revision
+	BB
+	  - minor revision
+	CC
+	  - bugfix revision
+
+The sysfs interface version changelog for the driver can be found at the
+end of this document.  Changes to the sysfs interface done by the kernel
+subsystems are not documented here, nor are they tracked by this
+attribute.
+
+Changes to the thinkpad-acpi sysfs interface are only considered
+non-experimental when they are submitted to Linux mainline, at which
+point the changes in this interface are documented and interface_version
+may be updated.  If you are using any thinkpad-acpi features not yet
+sent to mainline for merging, you do so on your own risk: these features
+may disappear, or be implemented in a different and incompatible way by
+the time they are merged in Linux mainline.
+
+Changes that are backwards-compatible by nature (e.g. the addition of
+attributes that do not change the way the other attributes work) do not
+always warrant an update of interface_version.  Therefore, one must
+expect that an attribute might not be there, and deal with it properly
+(an attribute not being there *is* a valid way to make it clear that a
+feature is not available in sysfs).
+
+
+Hot keys
+--------
+
+procfs: /proc/acpi/ibm/hotkey
+
+sysfs device attribute: hotkey_*
+
+In a ThinkPad, the ACPI HKEY handler is responsible for communicating
+some important events and also keyboard hot key presses to the operating
+system.  Enabling the hotkey functionality of thinkpad-acpi signals the
+firmware that such a driver is present, and modifies how the ThinkPad
+firmware will behave in many situations.
+
+The driver enables the HKEY ("hot key") event reporting automatically
+when loaded, and disables it when it is removed.
+
+The driver will report HKEY events in the following format::
+
+	ibm/hotkey HKEY 00000080 0000xxxx
+
+Some of these events refer to hot key presses, but not all of them.
+
+The driver will generate events over the input layer for hot keys and
+radio switches, and over the ACPI netlink layer for other events.  The
+input layer support accepts the standard IOCTLs to remap the keycodes
+assigned to each hot key.
+
+The hot key bit mask allows some control over which hot keys generate
+events.  If a key is "masked" (bit set to 0 in the mask), the firmware
+will handle it.  If it is "unmasked", it signals the firmware that
+thinkpad-acpi would prefer to handle it, if the firmware would be so
+kind to allow it (and it often doesn't!).
+
+Not all bits in the mask can be modified.  Not all bits that can be
+modified do anything.  Not all hot keys can be individually controlled
+by the mask.  Some models do not support the mask at all.  The behaviour
+of the mask is, therefore, highly dependent on the ThinkPad model.
+
+The driver will filter out any unmasked hotkeys, so even if the firmware
+doesn't allow disabling an specific hotkey, the driver will not report
+events for unmasked hotkeys.
+
+Note that unmasking some keys prevents their default behavior.  For
+example, if Fn+F5 is unmasked, that key will no longer enable/disable
+Bluetooth by itself in firmware.
+
+Note also that not all Fn key combinations are supported through ACPI
+depending on the ThinkPad model and firmware version.  On those
+ThinkPads, it is still possible to support some extra hotkeys by
+polling the "CMOS NVRAM" at least 10 times per second.  The driver
+attempts to enables this functionality automatically when required.
+
+procfs notes
+^^^^^^^^^^^^
+
+The following commands can be written to the /proc/acpi/ibm/hotkey file::
+
+	echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys
+	echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys
+	... any other 8-hex-digit mask ...
+	echo reset > /proc/acpi/ibm/hotkey -- restore the recommended mask
+
+The following commands have been deprecated and will cause the kernel
+to log a warning::
+
+	echo enable > /proc/acpi/ibm/hotkey -- does nothing
+	echo disable > /proc/acpi/ibm/hotkey -- returns an error
+
+The procfs interface does not support NVRAM polling control.  So as to
+maintain maximum bug-to-bug compatibility, it does not report any masks,
+nor does it allow one to manipulate the hot key mask when the firmware
+does not support masks at all, even if NVRAM polling is in use.
+
+sysfs notes
+^^^^^^^^^^^
+
+	hotkey_bios_enabled:
+		DEPRECATED, WILL BE REMOVED SOON.
+
+		Returns 0.
+
+	hotkey_bios_mask:
+		DEPRECATED, DON'T USE, WILL BE REMOVED IN THE FUTURE.
+
+		Returns the hot keys mask when thinkpad-acpi was loaded.
+		Upon module unload, the hot keys mask will be restored
+		to this value.   This is always 0x80c, because those are
+		the hotkeys that were supported by ancient firmware
+		without mask support.
+
+	hotkey_enable:
+		DEPRECATED, WILL BE REMOVED SOON.
+
+		0: returns -EPERM
+		1: does nothing
+
+	hotkey_mask:
+		bit mask to enable reporting (and depending on
+		the firmware, ACPI event generation) for each hot key
+		(see above).  Returns the current status of the hot keys
+		mask, and allows one to modify it.
+
+	hotkey_all_mask:
+		bit mask that should enable event reporting for all
+		supported hot keys, when echoed to hotkey_mask above.
+		Unless you know which events need to be handled
+		passively (because the firmware *will* handle them
+		anyway), do *not* use hotkey_all_mask.  Use
+		hotkey_recommended_mask, instead. You have been warned.
+
+	hotkey_recommended_mask:
+		bit mask that should enable event reporting for all
+		supported hot keys, except those which are always
+		handled by the firmware anyway.  Echo it to
+		hotkey_mask above, to use.  This is the default mask
+		used by the driver.
+
+	hotkey_source_mask:
+		bit mask that selects which hot keys will the driver
+		poll the NVRAM for.  This is auto-detected by the driver
+		based on the capabilities reported by the ACPI firmware,
+		but it can be overridden at runtime.
+
+		Hot keys whose bits are set in hotkey_source_mask are
+		polled for in NVRAM, and reported as hotkey events if
+		enabled in hotkey_mask.  Only a few hot keys are
+		available through CMOS NVRAM polling.
+
+		Warning: when in NVRAM mode, the volume up/down/mute
+		keys are synthesized according to changes in the mixer,
+		which uses a single volume up or volume down hotkey
+		press to unmute, as per the ThinkPad volume mixer user
+		interface.  When in ACPI event mode, volume up/down/mute
+		events are reported by the firmware and can behave
+		differently (and that behaviour changes with firmware
+		version -- not just with firmware models -- as well as
+		OSI(Linux) state).
+
+	hotkey_poll_freq:
+		frequency in Hz for hot key polling. It must be between
+		0 and 25 Hz.  Polling is only carried out when strictly
+		needed.
+
+		Setting hotkey_poll_freq to zero disables polling, and
+		will cause hot key presses that require NVRAM polling
+		to never be reported.
+
+		Setting hotkey_poll_freq too low may cause repeated
+		pressings of the same hot key to be misreported as a
+		single key press, or to not even be detected at all.
+		The recommended polling frequency is 10Hz.
+
+	hotkey_radio_sw:
+		If the ThinkPad has a hardware radio switch, this
+		attribute will read 0 if the switch is in the "radios
+		disabled" position, and 1 if the switch is in the
+		"radios enabled" position.
+
+		This attribute has poll()/select() support.
+
+	hotkey_tablet_mode:
+		If the ThinkPad has tablet capabilities, this attribute
+		will read 0 if the ThinkPad is in normal mode, and
+		1 if the ThinkPad is in tablet mode.
+
+		This attribute has poll()/select() support.
+
+	wakeup_reason:
+		Set to 1 if the system is waking up because the user
+		requested a bay ejection.  Set to 2 if the system is
+		waking up because the user requested the system to
+		undock.  Set to zero for normal wake-ups or wake-ups
+		due to unknown reasons.
+
+		This attribute has poll()/select() support.
+
+	wakeup_hotunplug_complete:
+		Set to 1 if the system was waken up because of an
+		undock or bay ejection request, and that request
+		was successfully completed.  At this point, it might
+		be useful to send the system back to sleep, at the
+		user's choice.  Refer to HKEY events 0x4003 and
+		0x3003, below.
+
+		This attribute has poll()/select() support.
+
+input layer notes
+^^^^^^^^^^^^^^^^^
+
+A Hot key is mapped to a single input layer EV_KEY event, possibly
+followed by an EV_MSC MSC_SCAN event that shall contain that key's scan
+code.  An EV_SYN event will always be generated to mark the end of the
+event block.
+
+Do not use the EV_MSC MSC_SCAN events to process keys.  They are to be
+used as a helper to remap keys, only.  They are particularly useful when
+remapping KEY_UNKNOWN keys.
+
+The events are available in an input device, with the following id:
+
+	==============  ==============================
+	Bus		BUS_HOST
+	vendor		0x1014 (PCI_VENDOR_ID_IBM)  or
+			0x17aa (PCI_VENDOR_ID_LENOVO)
+	product		0x5054 ("TP")
+	version		0x4101
+	==============  ==============================
+
+The version will have its LSB incremented if the keymap changes in a
+backwards-compatible way.  The MSB shall always be 0x41 for this input
+device.  If the MSB is not 0x41, do not use the device as described in
+this section, as it is either something else (e.g. another input device
+exported by a thinkpad driver, such as HDAPS) or its functionality has
+been changed in a non-backwards compatible way.
+
+Adding other event types for other functionalities shall be considered a
+backwards-compatible change for this input device.
+
+Thinkpad-acpi Hot Key event map (version 0x4101):
+
+=======	=======	==============	==============================================
+ACPI	Scan
+event	code	Key		Notes
+=======	=======	==============	==============================================
+0x1001	0x00	FN+F1		-
+
+0x1002	0x01	FN+F2		IBM: battery (rare)
+				Lenovo: Screen lock
+
+0x1003	0x02	FN+F3		Many IBM models always report
+				this hot key, even with hot keys
+				disabled or with Fn+F3 masked
+				off
+				IBM: screen lock, often turns
+				off the ThinkLight as side-effect
+				Lenovo: battery
+
+0x1004	0x03	FN+F4		Sleep button (ACPI sleep button
+				semantics, i.e. sleep-to-RAM).
+				It always generates some kind
+				of event, either the hot key
+				event or an ACPI sleep button
+				event. The firmware may
+				refuse to generate further FN+F4
+				key presses until a S3 or S4 ACPI
+				sleep cycle is performed or some
+				time passes.
+
+0x1005	0x04	FN+F5		Radio.  Enables/disables
+				the internal Bluetooth hardware
+				and W-WAN card if left in control
+				of the firmware.  Does not affect
+				the WLAN card.
+				Should be used to turn on/off all
+				radios (Bluetooth+W-WAN+WLAN),
+				really.
+
+0x1006	0x05	FN+F6		-
+
+0x1007	0x06	FN+F7		Video output cycle.
+				Do you feel lucky today?
+
+0x1008	0x07	FN+F8		IBM: toggle screen expand
+				Lenovo: configure UltraNav,
+				or toggle screen expand
+
+0x1009	0x08	FN+F9		-
+
+...	...	...		...
+
+0x100B	0x0A	FN+F11		-
+
+0x100C	0x0B	FN+F12		Sleep to disk.  You are always
+				supposed to handle it yourself,
+				either through the ACPI event,
+				or through a hotkey event.
+				The firmware may refuse to
+				generate further FN+F12 key
+				press events until a S3 or S4
+				ACPI sleep cycle is performed,
+				or some time passes.
+
+0x100D	0x0C	FN+BACKSPACE	-
+0x100E	0x0D	FN+INSERT	-
+0x100F	0x0E	FN+DELETE	-
+
+0x1010	0x0F	FN+HOME		Brightness up.  This key is
+				always handled by the firmware
+				in IBM ThinkPads, even when
+				unmasked.  Just leave it alone.
+				For Lenovo ThinkPads with a new
+				BIOS, it has to be handled either
+				by the ACPI OSI, or by userspace.
+				The driver does the right thing,
+				never mess with this.
+0x1011	0x10	FN+END		Brightness down.  See brightness
+				up for details.
+
+0x1012	0x11	FN+PGUP		ThinkLight toggle.  This key is
+				always handled by the firmware,
+				even when unmasked.
+
+0x1013	0x12	FN+PGDOWN	-
+
+0x1014	0x13	FN+SPACE	Zoom key
+
+0x1015	0x14	VOLUME UP	Internal mixer volume up. This
+				key is always handled by the
+				firmware, even when unmasked.
+				NOTE: Lenovo seems to be changing
+				this.
+0x1016	0x15	VOLUME DOWN	Internal mixer volume up. This
+				key is always handled by the
+				firmware, even when unmasked.
+				NOTE: Lenovo seems to be changing
+				this.
+0x1017	0x16	MUTE		Mute internal mixer. This
+				key is always handled by the
+				firmware, even when unmasked.
+
+0x1018	0x17	THINKPAD	ThinkPad/Access IBM/Lenovo key
+
+0x1019	0x18	unknown
+
+...	...	...
+
+0x1020	0x1F	unknown
+=======	=======	==============	==============================================
+
+The ThinkPad firmware does not allow one to differentiate when most hot
+keys are pressed or released (either that, or we don't know how to, yet).
+For these keys, the driver generates a set of events for a key press and
+immediately issues the same set of events for a key release.  It is
+unknown by the driver if the ThinkPad firmware triggered these events on
+hot key press or release, but the firmware will do it for either one, not
+both.
+
+If a key is mapped to KEY_RESERVED, it generates no input events at all.
+If a key is mapped to KEY_UNKNOWN, it generates an input event that
+includes an scan code.  If a key is mapped to anything else, it will
+generate input device EV_KEY events.
+
+In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW
+events for switches:
+
+==============	==============================================
+SW_RFKILL_ALL	T60 and later hardware rfkill rocker switch
+SW_TABLET_MODE	Tablet ThinkPads HKEY events 0x5009 and 0x500A
+==============	==============================================
+
+Non hotkey ACPI HKEY event map
+------------------------------
+
+Events that are never propagated by the driver:
+
+======		==================================================
+0x2304		System is waking up from suspend to undock
+0x2305		System is waking up from suspend to eject bay
+0x2404		System is waking up from hibernation to undock
+0x2405		System is waking up from hibernation to eject bay
+0x5001		Lid closed
+0x5002		Lid opened
+0x5009		Tablet swivel: switched to tablet mode
+0x500A		Tablet swivel: switched to normal mode
+0x5010		Brightness level changed/control event
+0x6000		KEYBOARD: Numlock key pressed
+0x6005		KEYBOARD: Fn key pressed (TO BE VERIFIED)
+0x7000		Radio Switch may have changed state
+======		==================================================
+
+
+Events that are propagated by the driver to userspace:
+
+======		=====================================================
+0x2313		ALARM: System is waking up from suspend because
+		the battery is nearly empty
+0x2413		ALARM: System is waking up from hibernation because
+		the battery is nearly empty
+0x3003		Bay ejection (see 0x2x05) complete, can sleep again
+0x3006		Bay hotplug request (hint to power up SATA link when
+		the optical drive tray is ejected)
+0x4003		Undocked (see 0x2x04), can sleep again
+0x4010		Docked into hotplug port replicator (non-ACPI dock)
+0x4011		Undocked from hotplug port replicator (non-ACPI dock)
+0x500B		Tablet pen inserted into its storage bay
+0x500C		Tablet pen removed from its storage bay
+0x6011		ALARM: battery is too hot
+0x6012		ALARM: battery is extremely hot
+0x6021		ALARM: a sensor is too hot
+0x6022		ALARM: a sensor is extremely hot
+0x6030		System thermal table changed
+0x6032		Thermal Control command set completion  (DYTC, Windows)
+0x6040		Nvidia Optimus/AC adapter related (TO BE VERIFIED)
+0x60C0		X1 Yoga 2016, Tablet mode status changed
+0x60F0		Thermal Transformation changed (GMTS, Windows)
+======		=====================================================
+
+Battery nearly empty alarms are a last resort attempt to get the
+operating system to hibernate or shutdown cleanly (0x2313), or shutdown
+cleanly (0x2413) before power is lost.  They must be acted upon, as the
+wake up caused by the firmware will have negated most safety nets...
+
+When any of the "too hot" alarms happen, according to Lenovo the user
+should suspend or hibernate the laptop (and in the case of battery
+alarms, unplug the AC adapter) to let it cool down.  These alarms do
+signal that something is wrong, they should never happen on normal
+operating conditions.
+
+The "extremely hot" alarms are emergencies.  According to Lenovo, the
+operating system is to force either an immediate suspend or hibernate
+cycle, or a system shutdown.  Obviously, something is very wrong if this
+happens.
+
+
+Brightness hotkey notes
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Don't mess with the brightness hotkeys in a Thinkpad.  If you want
+notifications for OSD, use the sysfs backlight class event support.
+
+The driver will issue KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN events
+automatically for the cases were userspace has to do something to
+implement brightness changes.  When you override these events, you will
+either fail to handle properly the ThinkPads that require explicit
+action to change backlight brightness, or the ThinkPads that require
+that no action be taken to work properly.
+
+
+Bluetooth
+---------
+
+procfs: /proc/acpi/ibm/bluetooth
+
+sysfs device attribute: bluetooth_enable (deprecated)
+
+sysfs rfkill class: switch "tpacpi_bluetooth_sw"
+
+This feature shows the presence and current state of a ThinkPad
+Bluetooth device in the internal ThinkPad CDC slot.
+
+If the ThinkPad supports it, the Bluetooth state is stored in NVRAM,
+so it is kept across reboots and power-off.
+
+Procfs notes
+^^^^^^^^^^^^
+
+If Bluetooth is installed, the following commands can be used::
+
+	echo enable > /proc/acpi/ibm/bluetooth
+	echo disable > /proc/acpi/ibm/bluetooth
+
+Sysfs notes
+^^^^^^^^^^^
+
+	If the Bluetooth CDC card is installed, it can be enabled /
+	disabled through the "bluetooth_enable" thinkpad-acpi device
+	attribute, and its current status can also be queried.
+
+	enable:
+
+		- 0: disables Bluetooth / Bluetooth is disabled
+		- 1: enables Bluetooth / Bluetooth is enabled.
+
+	Note: this interface has been superseded by the	generic rfkill
+	class.  It has been deprecated, and it will be removed in year
+	2010.
+
+	rfkill controller switch "tpacpi_bluetooth_sw": refer to
+	Documentation/rfkill.txt for details.
+
+
+Video output control -- /proc/acpi/ibm/video
+--------------------------------------------
+
+This feature allows control over the devices used for video output -
+LCD, CRT or DVI (if available). The following commands are available::
+
+	echo lcd_enable > /proc/acpi/ibm/video
+	echo lcd_disable > /proc/acpi/ibm/video
+	echo crt_enable > /proc/acpi/ibm/video
+	echo crt_disable > /proc/acpi/ibm/video
+	echo dvi_enable > /proc/acpi/ibm/video
+	echo dvi_disable > /proc/acpi/ibm/video
+	echo auto_enable > /proc/acpi/ibm/video
+	echo auto_disable > /proc/acpi/ibm/video
+	echo expand_toggle > /proc/acpi/ibm/video
+	echo video_switch > /proc/acpi/ibm/video
+
+NOTE:
+  Access to this feature is restricted to processes owning the
+  CAP_SYS_ADMIN capability for safety reasons, as it can interact badly
+  enough with some versions of X.org to crash it.
+
+Each video output device can be enabled or disabled individually.
+Reading /proc/acpi/ibm/video shows the status of each device.
+
+Automatic video switching can be enabled or disabled.  When automatic
+video switching is enabled, certain events (e.g. opening the lid,
+docking or undocking) cause the video output device to change
+automatically. While this can be useful, it also causes flickering
+and, on the X40, video corruption. By disabling automatic switching,
+the flickering or video corruption can be avoided.
+
+The video_switch command cycles through the available video outputs
+(it simulates the behavior of Fn-F7).
+
+Video expansion can be toggled through this feature. This controls
+whether the display is expanded to fill the entire LCD screen when a
+mode with less than full resolution is used. Note that the current
+video expansion status cannot be determined through this feature.
+
+Note that on many models (particularly those using Radeon graphics
+chips) the X driver configures the video card in a way which prevents
+Fn-F7 from working. This also disables the video output switching
+features of this driver, as it uses the same ACPI methods as
+Fn-F7. Video switching on the console should still work.
+
+UPDATE: refer to https://bugs.freedesktop.org/show_bug.cgi?id=2000
+
+
+ThinkLight control
+------------------
+
+procfs: /proc/acpi/ibm/light
+
+sysfs attributes: as per LED class, for the "tpacpi::thinklight" LED
+
+procfs notes
+^^^^^^^^^^^^
+
+The ThinkLight status can be read and set through the procfs interface.  A
+few models which do not make the status available will show the ThinkLight
+status as "unknown". The available commands are::
+
+	echo on  > /proc/acpi/ibm/light
+	echo off > /proc/acpi/ibm/light
+
+sysfs notes
+^^^^^^^^^^^
+
+The ThinkLight sysfs interface is documented by the LED class
+documentation, in Documentation/leds/leds-class.rst.  The ThinkLight LED name
+is "tpacpi::thinklight".
+
+Due to limitations in the sysfs LED class, if the status of the ThinkLight
+cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
+It is impossible to know if the status returned through sysfs is valid.
+
+
+CMOS/UCMS control
+-----------------
+
+procfs: /proc/acpi/ibm/cmos
+
+sysfs device attribute: cmos_command
+
+This feature is mostly used internally by the ACPI firmware to keep the legacy
+CMOS NVRAM bits in sync with the current machine state, and to record this
+state so that the ThinkPad will retain such settings across reboots.
+
+Some of these commands actually perform actions in some ThinkPad models, but
+this is expected to disappear more and more in newer models.  As an example, in
+a T43 and in a X40, commands 12 and 13 still control the ThinkLight state for
+real, but commands 0 to 2 don't control the mixer anymore (they have been
+phased out) and just update the NVRAM.
+
+The range of valid cmos command numbers is 0 to 21, but not all have an
+effect and the behavior varies from model to model.  Here is the behavior
+on the X40 (tpb is the ThinkPad Buttons utility):
+
+	- 0 - Related to "Volume down" key press
+	- 1 - Related to "Volume up" key press
+	- 2 - Related to "Mute on" key press
+	- 3 - Related to "Access IBM" key press
+	- 4 - Related to "LCD brightness up" key press
+	- 5 - Related to "LCD brightness down" key press
+	- 11 - Related to "toggle screen expansion" key press/function
+	- 12 - Related to "ThinkLight on"
+	- 13 - Related to "ThinkLight off"
+	- 14 - Related to "ThinkLight" key press (toggle ThinkLight)
+
+The cmos command interface is prone to firmware split-brain problems, as
+in newer ThinkPads it is just a compatibility layer.  Do not use it, it is
+exported just as a debug tool.
+
+
+LED control
+-----------
+
+procfs: /proc/acpi/ibm/led
+sysfs attributes: as per LED class, see below for names
+
+Some of the LED indicators can be controlled through this feature.  On
+some older ThinkPad models, it is possible to query the status of the
+LED indicators as well.  Newer ThinkPads cannot query the real status
+of the LED indicators.
+
+Because misuse of the LEDs could induce an unaware user to perform
+dangerous actions (like undocking or ejecting a bay device while the
+buses are still active), or mask an important alarm (such as a nearly
+empty battery, or a broken battery), access to most LEDs is
+restricted.
+
+Unrestricted access to all LEDs requires that thinkpad-acpi be
+compiled with the CONFIG_THINKPAD_ACPI_UNSAFE_LEDS option enabled.
+Distributions must never enable this option.  Individual users that
+are aware of the consequences are welcome to enabling it.
+
+Audio mute and microphone mute LEDs are supported, but currently not
+visible to userspace. They are used by the snd-hda-intel audio driver.
+
+procfs notes
+^^^^^^^^^^^^
+
+The available commands are::
+
+	echo '<LED number> on' >/proc/acpi/ibm/led
+	echo '<LED number> off' >/proc/acpi/ibm/led
+	echo '<LED number> blink' >/proc/acpi/ibm/led
+
+The <LED number> range is 0 to 15. The set of LEDs that can be
+controlled varies from model to model. Here is the common ThinkPad
+mapping:
+
+	- 0 - power
+	- 1 - battery (orange)
+	- 2 - battery (green)
+	- 3 - UltraBase/dock
+	- 4 - UltraBay
+	- 5 - UltraBase battery slot
+	- 6 - (unknown)
+	- 7 - standby
+	- 8 - dock status 1
+	- 9 - dock status 2
+	- 10, 11 - (unknown)
+	- 12 - thinkvantage
+	- 13, 14, 15 - (unknown)
+
+All of the above can be turned on and off and can be made to blink.
+
+sysfs notes
+^^^^^^^^^^^
+
+The ThinkPad LED sysfs interface is described in detail by the LED class
+documentation, in Documentation/leds/leds-class.rst.
+
+The LEDs are named (in LED ID order, from 0 to 12):
+"tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt",
+"tpacpi::dock_active", "tpacpi::bay_active", "tpacpi::dock_batt",
+"tpacpi::unknown_led", "tpacpi::standby", "tpacpi::dock_status1",
+"tpacpi::dock_status2", "tpacpi::unknown_led2", "tpacpi::unknown_led3",
+"tpacpi::thinkvantage".
+
+Due to limitations in the sysfs LED class, if the status of the LED
+indicators cannot be read due to an error, thinkpad-acpi will report it as
+a brightness of zero (same as LED off).
+
+If the thinkpad firmware doesn't support reading the current status,
+trying to read the current LED brightness will just return whatever
+brightness was last written to that attribute.
+
+These LEDs can blink using hardware acceleration.  To request that a
+ThinkPad indicator LED should blink in hardware accelerated mode, use the
+"timer" trigger, and leave the delay_on and delay_off parameters set to
+zero (to request hardware acceleration autodetection).
+
+LEDs that are known not to exist in a given ThinkPad model are not
+made available through the sysfs interface.  If you have a dock and you
+notice there are LEDs listed for your ThinkPad that do not exist (and
+are not in the dock), or if you notice that there are missing LEDs,
+a report to ibm-acpi-devel@lists.sourceforge.net is appreciated.
+
+
+ACPI sounds -- /proc/acpi/ibm/beep
+----------------------------------
+
+The BEEP method is used internally by the ACPI firmware to provide
+audible alerts in various situations. This feature allows the same
+sounds to be triggered manually.
+
+The commands are non-negative integer numbers::
+
+	echo <number> >/proc/acpi/ibm/beep
+
+The valid <number> range is 0 to 17. Not all numbers trigger sounds
+and the sounds vary from model to model. Here is the behavior on the
+X40:
+
+	- 0 - stop a sound in progress (but use 17 to stop 16)
+	- 2 - two beeps, pause, third beep ("low battery")
+	- 3 - single beep
+	- 4 - high, followed by low-pitched beep ("unable")
+	- 5 - single beep
+	- 6 - very high, followed by high-pitched beep ("AC/DC")
+	- 7 - high-pitched beep
+	- 9 - three short beeps
+	- 10 - very long beep
+	- 12 - low-pitched beep
+	- 15 - three high-pitched beeps repeating constantly, stop with 0
+	- 16 - one medium-pitched beep repeating constantly, stop with 17
+	- 17 - stop 16
+
+
+Temperature sensors
+-------------------
+
+procfs: /proc/acpi/ibm/thermal
+
+sysfs device attributes: (hwmon "thinkpad") temp*_input
+
+Most ThinkPads include six or more separate temperature sensors but only
+expose the CPU temperature through the standard ACPI methods.  This
+feature shows readings from up to eight different sensors on older
+ThinkPads, and up to sixteen different sensors on newer ThinkPads.
+
+For example, on the X40, a typical output may be:
+
+temperatures:
+	42 42 45 41 36 -128 33 -128
+
+On the T43/p, a typical output may be:
+
+temperatures:
+	48 48 36 52 38 -128 31 -128 48 52 48 -128 -128 -128 -128 -128
+
+The mapping of thermal sensors to physical locations varies depending on
+system-board model (and thus, on ThinkPad model).
+
+http://thinkwiki.org/wiki/Thermal_Sensors is a public wiki page that
+tries to track down these locations for various models.
+
+Most (newer?) models seem to follow this pattern:
+
+- 1:  CPU
+- 2:  (depends on model)
+- 3:  (depends on model)
+- 4:  GPU
+- 5:  Main battery: main sensor
+- 6:  Bay battery: main sensor
+- 7:  Main battery: secondary sensor
+- 8:  Bay battery: secondary sensor
+- 9-15: (depends on model)
+
+For the R51 (source: Thomas Gruber):
+
+- 2:  Mini-PCI
+- 3:  Internal HDD
+
+For the T43, T43/p (source: Shmidoax/Thinkwiki.org)
+http://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_T43.2C_T43p
+
+- 2:  System board, left side (near PCMCIA slot), reported as HDAPS temp
+- 3:  PCMCIA slot
+- 9:  MCH (northbridge) to DRAM Bus
+- 10: Clock-generator, mini-pci card and ICH (southbridge), under Mini-PCI
+      card, under touchpad
+- 11: Power regulator, underside of system board, below F2 key
+
+The A31 has a very atypical layout for the thermal sensors
+(source: Milos Popovic, http://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_A31)
+
+- 1:  CPU
+- 2:  Main Battery: main sensor
+- 3:  Power Converter
+- 4:  Bay Battery: main sensor
+- 5:  MCH (northbridge)
+- 6:  PCMCIA/ambient
+- 7:  Main Battery: secondary sensor
+- 8:  Bay Battery: secondary sensor
+
+
+Procfs notes
+^^^^^^^^^^^^
+
+	Readings from sensors that are not available return -128.
+	No commands can be written to this file.
+
+Sysfs notes
+^^^^^^^^^^^
+
+	Sensors that are not available return the ENXIO error.  This
+	status may change at runtime, as there are hotplug thermal
+	sensors, like those inside the batteries and docks.
+
+	thinkpad-acpi thermal sensors are reported through the hwmon
+	subsystem, and follow all of the hwmon guidelines at
+	Documentation/hwmon.
+
+EXPERIMENTAL: Embedded controller register dump
+-----------------------------------------------
+
+This feature is not included in the thinkpad driver anymore.
+Instead the EC can be accessed through /sys/kernel/debug/ec with
+a userspace tool which can be found here:
+ftp://ftp.suse.com/pub/people/trenn/sources/ec
+
+Use it to determine the register holding the fan
+speed on some models. To do that, do the following:
+
+	- make sure the battery is fully charged
+	- make sure the fan is running
+	- use above mentioned tool to read out the EC
+
+Often fan and temperature values vary between
+readings. Since temperatures don't change vary fast, you can take
+several quick dumps to eliminate them.
+
+You can use a similar method to figure out the meaning of other
+embedded controller registers - e.g. make sure nothing else changes
+except the charging or discharging battery to determine which
+registers contain the current battery capacity, etc. If you experiment
+with this, do send me your results (including some complete dumps with
+a description of the conditions when they were taken.)
+
+
+LCD brightness control
+----------------------
+
+procfs: /proc/acpi/ibm/brightness
+
+sysfs backlight device "thinkpad_screen"
+
+This feature allows software control of the LCD brightness on ThinkPad
+models which don't have a hardware brightness slider.
+
+It has some limitations: the LCD backlight cannot be actually turned
+on or off by this interface, it just controls the backlight brightness
+level.
+
+On IBM (and some of the earlier Lenovo) ThinkPads, the backlight control
+has eight brightness levels, ranging from 0 to 7.  Some of the levels
+may not be distinct.  Later Lenovo models that implement the ACPI
+display backlight brightness control methods have 16 levels, ranging
+from 0 to 15.
+
+For IBM ThinkPads, there are two interfaces to the firmware for direct
+brightness control, EC and UCMS (or CMOS).  To select which one should be
+used, use the brightness_mode module parameter: brightness_mode=1 selects
+EC mode, brightness_mode=2 selects UCMS mode, brightness_mode=3 selects EC
+mode with NVRAM backing (so that brightness changes are remembered across
+shutdown/reboot).
+
+The driver tries to select which interface to use from a table of
+defaults for each ThinkPad model.  If it makes a wrong choice, please
+report this as a bug, so that we can fix it.
+
+Lenovo ThinkPads only support brightness_mode=2 (UCMS).
+
+When display backlight brightness controls are available through the
+standard ACPI interface, it is best to use it instead of this direct
+ThinkPad-specific interface.  The driver will disable its native
+backlight brightness control interface if it detects that the standard
+ACPI interface is available in the ThinkPad.
+
+If you want to use the thinkpad-acpi backlight brightness control
+instead of the generic ACPI video backlight brightness control for some
+reason, you should use the acpi_backlight=vendor kernel parameter.
+
+The brightness_enable module parameter can be used to control whether
+the LCD brightness control feature will be enabled when available.
+brightness_enable=0 forces it to be disabled.  brightness_enable=1
+forces it to be enabled when available, even if the standard ACPI
+interface is also available.
+
+Procfs notes
+^^^^^^^^^^^^
+
+The available commands are::
+
+	echo up   >/proc/acpi/ibm/brightness
+	echo down >/proc/acpi/ibm/brightness
+	echo 'level <level>' >/proc/acpi/ibm/brightness
+
+Sysfs notes
+^^^^^^^^^^^
+
+The interface is implemented through the backlight sysfs class, which is
+poorly documented at this time.
+
+Locate the thinkpad_screen device under /sys/class/backlight, and inside
+it there will be the following attributes:
+
+	max_brightness:
+		Reads the maximum brightness the hardware can be set to.
+		The minimum is always zero.
+
+	actual_brightness:
+		Reads what brightness the screen is set to at this instant.
+
+	brightness:
+		Writes request the driver to change brightness to the
+		given value.  Reads will tell you what brightness the
+		driver is trying to set the display to when "power" is set
+		to zero and the display has not been dimmed by a kernel
+		power management event.
+
+	power:
+		power management mode, where 0 is "display on", and 1 to 3
+		will dim the display backlight to brightness level 0
+		because thinkpad-acpi cannot really turn the backlight
+		off.  Kernel power management events can temporarily
+		increase the current power management level, i.e. they can
+		dim the display.
+
+
+WARNING:
+
+    Whatever you do, do NOT ever call thinkpad-acpi backlight-level change
+    interface and the ACPI-based backlight level change interface
+    (available on newer BIOSes, and driven by the Linux ACPI video driver)
+    at the same time.  The two will interact in bad ways, do funny things,
+    and maybe reduce the life of the backlight lamps by needlessly kicking
+    its level up and down at every change.
+
+
+Volume control (Console Audio control)
+--------------------------------------
+
+procfs: /proc/acpi/ibm/volume
+
+ALSA: "ThinkPad Console Audio Control", default ID: "ThinkPadEC"
+
+NOTE: by default, the volume control interface operates in read-only
+mode, as it is supposed to be used for on-screen-display purposes.
+The read/write mode can be enabled through the use of the
+"volume_control=1" module parameter.
+
+NOTE: distros are urged to not enable volume_control by default, this
+should be done by the local admin only.  The ThinkPad UI is for the
+console audio control to be done through the volume keys only, and for
+the desktop environment to just provide on-screen-display feedback.
+Software volume control should be done only in the main AC97/HDA
+mixer.
+
+
+About the ThinkPad Console Audio control
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ThinkPads have a built-in amplifier and muting circuit that drives the
+console headphone and speakers.  This circuit is after the main AC97
+or HDA mixer in the audio path, and under exclusive control of the
+firmware.
+
+ThinkPads have three special hotkeys to interact with the console
+audio control: volume up, volume down and mute.
+
+It is worth noting that the normal way the mute function works (on
+ThinkPads that do not have a "mute LED") is:
+
+1. Press mute to mute.  It will *always* mute, you can press it as
+   many times as you want, and the sound will remain mute.
+
+2. Press either volume key to unmute the ThinkPad (it will _not_
+   change the volume, it will just unmute).
+
+This is a very superior design when compared to the cheap software-only
+mute-toggle solution found on normal consumer laptops:  you can be
+absolutely sure the ThinkPad will not make noise if you press the mute
+button, no matter the previous state.
+
+The IBM ThinkPads, and the earlier Lenovo ThinkPads have variable-gain
+amplifiers driving the speakers and headphone output, and the firmware
+also handles volume control for the headphone and speakers on these
+ThinkPads without any help from the operating system (this volume
+control stage exists after the main AC97 or HDA mixer in the audio
+path).
+
+The newer Lenovo models only have firmware mute control, and depend on
+the main HDA mixer to do volume control (which is done by the operating
+system).  In this case, the volume keys are filtered out for unmute
+key press (there are some firmware bugs in this area) and delivered as
+normal key presses to the operating system (thinkpad-acpi is not
+involved).
+
+
+The ThinkPad-ACPI volume control
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The preferred way to interact with the Console Audio control is the
+ALSA interface.
+
+The legacy procfs interface allows one to read the current state,
+and if volume control is enabled, accepts the following commands::
+
+	echo up   >/proc/acpi/ibm/volume
+	echo down >/proc/acpi/ibm/volume
+	echo mute >/proc/acpi/ibm/volume
+	echo unmute >/proc/acpi/ibm/volume
+	echo 'level <level>' >/proc/acpi/ibm/volume
+
+The <level> number range is 0 to 14 although not all of them may be
+distinct. To unmute the volume after the mute command, use either the
+up or down command (the level command will not unmute the volume), or
+the unmute command.
+
+You can use the volume_capabilities parameter to tell the driver
+whether your thinkpad has volume control or mute-only control:
+volume_capabilities=1 for mixers with mute and volume control,
+volume_capabilities=2 for mixers with only mute control.
+
+If the driver misdetects the capabilities for your ThinkPad model,
+please report this to ibm-acpi-devel@lists.sourceforge.net, so that we
+can update the driver.
+
+There are two strategies for volume control.  To select which one
+should be used, use the volume_mode module parameter: volume_mode=1
+selects EC mode, and volume_mode=3 selects EC mode with NVRAM backing
+(so that volume/mute changes are remembered across shutdown/reboot).
+
+The driver will operate in volume_mode=3 by default. If that does not
+work well on your ThinkPad model, please report this to
+ibm-acpi-devel@lists.sourceforge.net.
+
+The driver supports the standard ALSA module parameters.  If the ALSA
+mixer is disabled, the driver will disable all volume functionality.
+
+
+Fan control and monitoring: fan speed, fan enable/disable
+---------------------------------------------------------
+
+procfs: /proc/acpi/ibm/fan
+
+sysfs device attributes: (hwmon "thinkpad") fan1_input, pwm1, pwm1_enable, fan2_input
+
+sysfs hwmon driver attributes: fan_watchdog
+
+NOTE NOTE NOTE:
+   fan control operations are disabled by default for
+   safety reasons.  To enable them, the module parameter "fan_control=1"
+   must be given to thinkpad-acpi.
+
+This feature attempts to show the current fan speed, control mode and
+other fan data that might be available.  The speed is read directly
+from the hardware registers of the embedded controller.  This is known
+to work on later R, T, X and Z series ThinkPads but may show a bogus
+value on other models.
+
+Some Lenovo ThinkPads support a secondary fan.  This fan cannot be
+controlled separately, it shares the main fan control.
+
+Fan levels
+^^^^^^^^^^
+
+Most ThinkPad fans work in "levels" at the firmware interface.  Level 0
+stops the fan.  The higher the level, the higher the fan speed, although
+adjacent levels often map to the same fan speed.  7 is the highest
+level, where the fan reaches the maximum recommended speed.
+
+Level "auto" means the EC changes the fan level according to some
+internal algorithm, usually based on readings from the thermal sensors.
+
+There is also a "full-speed" level, also known as "disengaged" level.
+In this level, the EC disables the speed-locked closed-loop fan control,
+and drives the fan as fast as it can go, which might exceed hardware
+limits, so use this level with caution.
+
+The fan usually ramps up or down slowly from one speed to another, and
+it is normal for the EC to take several seconds to react to fan
+commands.  The full-speed level may take up to two minutes to ramp up to
+maximum speed, and in some ThinkPads, the tachometer readings go stale
+while the EC is transitioning to the full-speed level.
+
+WARNING WARNING WARNING: do not leave the fan disabled unless you are
+monitoring all of the temperature sensor readings and you are ready to
+enable it if necessary to avoid overheating.
+
+An enabled fan in level "auto" may stop spinning if the EC decides the
+ThinkPad is cool enough and doesn't need the extra airflow.  This is
+normal, and the EC will spin the fan up if the various thermal readings
+rise too much.
+
+On the X40, this seems to depend on the CPU and HDD temperatures.
+Specifically, the fan is turned on when either the CPU temperature
+climbs to 56 degrees or the HDD temperature climbs to 46 degrees.  The
+fan is turned off when the CPU temperature drops to 49 degrees and the
+HDD temperature drops to 41 degrees.  These thresholds cannot
+currently be controlled.
+
+The ThinkPad's ACPI DSDT code will reprogram the fan on its own when
+certain conditions are met.  It will override any fan programming done
+through thinkpad-acpi.
+
+The thinkpad-acpi kernel driver can be programmed to revert the fan
+level to a safe setting if userspace does not issue one of the procfs
+fan commands: "enable", "disable", "level" or "watchdog", or if there
+are no writes to pwm1_enable (or to pwm1 *if and only if* pwm1_enable is
+set to 1, manual mode) within a configurable amount of time of up to
+120 seconds.  This functionality is called fan safety watchdog.
+
+Note that the watchdog timer stops after it enables the fan.  It will be
+rearmed again automatically (using the same interval) when one of the
+above mentioned fan commands is received.  The fan watchdog is,
+therefore, not suitable to protect against fan mode changes made through
+means other than the "enable", "disable", and "level" procfs fan
+commands, or the hwmon fan control sysfs interface.
+
+Procfs notes
+^^^^^^^^^^^^
+
+The fan may be enabled or disabled with the following commands::
+
+	echo enable  >/proc/acpi/ibm/fan
+	echo disable >/proc/acpi/ibm/fan
+
+Placing a fan on level 0 is the same as disabling it.  Enabling a fan
+will try to place it in a safe level if it is too slow or disabled.
+
+The fan level can be controlled with the command::
+
+	echo 'level <level>' > /proc/acpi/ibm/fan
+
+Where <level> is an integer from 0 to 7, or one of the words "auto" or
+"full-speed" (without the quotes).  Not all ThinkPads support the "auto"
+and "full-speed" levels.  The driver accepts "disengaged" as an alias for
+"full-speed", and reports it as "disengaged" for backwards
+compatibility.
+
+On the X31 and X40 (and ONLY on those models), the fan speed can be
+controlled to a certain degree.  Once the fan is running, it can be
+forced to run faster or slower with the following command::
+
+	echo 'speed <speed>' > /proc/acpi/ibm/fan
+
+The sustainable range of fan speeds on the X40 appears to be from about
+3700 to about 7350. Values outside this range either do not have any
+effect or the fan speed eventually settles somewhere in that range.  The
+fan cannot be stopped or started with this command.  This functionality
+is incomplete, and not available through the sysfs interface.
+
+To program the safety watchdog, use the "watchdog" command::
+
+	echo 'watchdog <interval in seconds>' > /proc/acpi/ibm/fan
+
+If you want to disable the watchdog, use 0 as the interval.
+
+Sysfs notes
+^^^^^^^^^^^
+
+The sysfs interface follows the hwmon subsystem guidelines for the most
+part, and the exception is the fan safety watchdog.
+
+Writes to any of the sysfs attributes may return the EINVAL error if
+that operation is not supported in a given ThinkPad or if the parameter
+is out-of-bounds, and EPERM if it is forbidden.  They may also return
+EINTR (interrupted system call), and EIO (I/O error while trying to talk
+to the firmware).
+
+Features not yet implemented by the driver return ENOSYS.
+
+hwmon device attribute pwm1_enable:
+	- 0: PWM offline (fan is set to full-speed mode)
+	- 1: Manual PWM control (use pwm1 to set fan level)
+	- 2: Hardware PWM control (EC "auto" mode)
+	- 3: reserved (Software PWM control, not implemented yet)
+
+	Modes 0 and 2 are not supported by all ThinkPads, and the
+	driver is not always able to detect this.  If it does know a
+	mode is unsupported, it will return -EINVAL.
+
+hwmon device attribute pwm1:
+	Fan level, scaled from the firmware values of 0-7 to the hwmon
+	scale of 0-255.  0 means fan stopped, 255 means highest normal
+	speed (level 7).
+
+	This attribute only commands the fan if pmw1_enable is set to 1
+	(manual PWM control).
+
+hwmon device attribute fan1_input:
+	Fan tachometer reading, in RPM.  May go stale on certain
+	ThinkPads while the EC transitions the PWM to offline mode,
+	which can take up to two minutes.  May return rubbish on older
+	ThinkPads.
+
+hwmon device attribute fan2_input:
+	Fan tachometer reading, in RPM, for the secondary fan.
+	Available only on some ThinkPads.  If the secondary fan is
+	not installed, will always read 0.
+
+hwmon driver attribute fan_watchdog:
+	Fan safety watchdog timer interval, in seconds.  Minimum is
+	1 second, maximum is 120 seconds.  0 disables the watchdog.
+
+To stop the fan: set pwm1 to zero, and pwm1_enable to 1.
+
+To start the fan in a safe mode: set pwm1_enable to 2.  If that fails
+with EINVAL, try to set pwm1_enable to 1 and pwm1 to at least 128 (255
+would be the safest choice, though).
+
+
+WAN
+---
+
+procfs: /proc/acpi/ibm/wan
+
+sysfs device attribute: wwan_enable (deprecated)
+
+sysfs rfkill class: switch "tpacpi_wwan_sw"
+
+This feature shows the presence and current state of the built-in
+Wireless WAN device.
+
+If the ThinkPad supports it, the WWAN state is stored in NVRAM,
+so it is kept across reboots and power-off.
+
+It was tested on a Lenovo ThinkPad X60. It should probably work on other
+ThinkPad models which come with this module installed.
+
+Procfs notes
+^^^^^^^^^^^^
+
+If the W-WAN card is installed, the following commands can be used::
+
+	echo enable > /proc/acpi/ibm/wan
+	echo disable > /proc/acpi/ibm/wan
+
+Sysfs notes
+^^^^^^^^^^^
+
+	If the W-WAN card is installed, it can be enabled /
+	disabled through the "wwan_enable" thinkpad-acpi device
+	attribute, and its current status can also be queried.
+
+	enable:
+		- 0: disables WWAN card / WWAN card is disabled
+		- 1: enables WWAN card / WWAN card is enabled.
+
+	Note: this interface has been superseded by the	generic rfkill
+	class.  It has been deprecated, and it will be removed in year
+	2010.
+
+	rfkill controller switch "tpacpi_wwan_sw": refer to
+	Documentation/rfkill.txt for details.
+
+
+EXPERIMENTAL: UWB
+-----------------
+
+This feature is considered EXPERIMENTAL because it has not been extensively
+tested and validated in various ThinkPad models yet.  The feature may not
+work as expected. USE WITH CAUTION! To use this feature, you need to supply
+the experimental=1 parameter when loading the module.
+
+sysfs rfkill class: switch "tpacpi_uwb_sw"
+
+This feature exports an rfkill controller for the UWB device, if one is
+present and enabled in the BIOS.
+
+Sysfs notes
+^^^^^^^^^^^
+
+	rfkill controller switch "tpacpi_uwb_sw": refer to
+	Documentation/rfkill.txt for details.
+
+Adaptive keyboard
+-----------------
+
+sysfs device attribute: adaptive_kbd_mode
+
+This sysfs attribute controls the keyboard "face" that will be shown on the
+Lenovo X1 Carbon 2nd gen (2014)'s adaptive keyboard. The value can be read
+and set.
+
+- 1 = Home mode
+- 2 = Web-browser mode
+- 3 = Web-conference mode
+- 4 = Function mode
+- 5 = Layflat mode
+
+For more details about which buttons will appear depending on the mode, please
+review the laptop's user guide:
+http://www.lenovo.com/shop/americas/content/user_guides/x1carbon_2_ug_en.pdf
+
+Multiple Commands, Module Parameters
+------------------------------------
+
+Multiple commands can be written to the proc files in one shot by
+separating them with commas, for example::
+
+	echo enable,0xffff > /proc/acpi/ibm/hotkey
+	echo lcd_disable,crt_enable > /proc/acpi/ibm/video
+
+Commands can also be specified when loading the thinkpad-acpi module,
+for example::
+
+	modprobe thinkpad_acpi hotkey=enable,0xffff video=auto_disable
+
+
+Enabling debugging output
+-------------------------
+
+The module takes a debug parameter which can be used to selectively
+enable various classes of debugging output, for example::
+
+	 modprobe thinkpad_acpi debug=0xffff
+
+will enable all debugging output classes.  It takes a bitmask, so
+to enable more than one output class, just add their values.
+
+	=============		======================================
+	Debug bitmask		Description
+	=============		======================================
+	0x8000			Disclose PID of userspace programs
+				accessing some functions of the driver
+	0x0001			Initialization and probing
+	0x0002			Removal
+	0x0004			RF Transmitter control (RFKILL)
+				(bluetooth, WWAN, UWB...)
+	0x0008			HKEY event interface, hotkeys
+	0x0010			Fan control
+	0x0020			Backlight brightness
+	0x0040			Audio mixer/volume control
+	=============		======================================
+
+There is also a kernel build option to enable more debugging
+information, which may be necessary to debug driver problems.
+
+The level of debugging information output by the driver can be changed
+at runtime through sysfs, using the driver attribute debug_level.  The
+attribute takes the same bitmask as the debug module parameter above.
+
+
+Force loading of module
+-----------------------
+
+If thinkpad-acpi refuses to detect your ThinkPad, you can try to specify
+the module parameter force_load=1.  Regardless of whether this works or
+not, please contact ibm-acpi-devel@lists.sourceforge.net with a report.
+
+
+Sysfs interface changelog
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+=========	===============================================================
+0x000100:	Initial sysfs support, as a single platform driver and
+		device.
+0x000200:	Hot key support for 32 hot keys, and radio slider switch
+		support.
+0x010000:	Hot keys are now handled by default over the input
+		layer, the radio switch generates input event EV_RADIO,
+		and the driver enables hot key handling by default in
+		the firmware.
+
+0x020000:	ABI fix: added a separate hwmon platform device and
+		driver, which must be located by name (thinkpad)
+		and the hwmon class for libsensors4 (lm-sensors 3)
+		compatibility.  Moved all hwmon attributes to this
+		new platform device.
+
+0x020100:	Marker for thinkpad-acpi with hot key NVRAM polling
+		support.  If you must, use it to know you should not
+		start a userspace NVRAM poller (allows to detect when
+		NVRAM is compiled out by the user because it is
+		unneeded/undesired in the first place).
+0x020101:	Marker for thinkpad-acpi with hot key NVRAM polling
+		and proper hotkey_mask semantics (version 8 of the
+		NVRAM polling patch).  Some development snapshots of
+		0.18 had an earlier version that did strange things
+		to hotkey_mask.
+
+0x020200:	Add poll()/select() support to the following attributes:
+		hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason
+
+0x020300:	hotkey enable/disable support removed, attributes
+		hotkey_bios_enabled and hotkey_enable deprecated and
+		marked for removal.
+
+0x020400:	Marker for 16 LEDs support.  Also, LEDs that are known
+		to not exist in a given model are not registered with
+		the LED sysfs class anymore.
+
+0x020500:	Updated hotkey driver, hotkey_mask is always available
+		and it is always able to disable hot keys.  Very old
+		thinkpads are properly supported.  hotkey_bios_mask
+		is deprecated and marked for removal.
+
+0x020600:	Marker for backlight change event support.
+
+0x020700:	Support for mute-only mixers.
+		Volume control in read-only mode by default.
+		Marker for ALSA mixer support.
+
+0x030000:	Thermal and fan sysfs attributes were moved to the hwmon
+		device instead of being attached to the backing platform
+		device.
+=========	===============================================================
diff --git a/Documentation/admin-guide/laptops/toshiba_haps.rst b/Documentation/admin-guide/laptops/toshiba_haps.rst
new file mode 100644
index 000000000000..11dfc428c080
--- /dev/null
+++ b/Documentation/admin-guide/laptops/toshiba_haps.rst
@@ -0,0 +1,87 @@
+====================================
+Toshiba HDD Active Protection Sensor
+====================================
+
+Kernel driver: toshiba_haps
+
+Author: Azael Avalos <coproscefalo@gmail.com>
+
+
+.. 0. Contents
+
+   1. Description
+   2. Interface
+   3. Accelerometer axes
+   4. Supported devices
+   5. Usage
+
+
+1. Description
+--------------
+
+This driver provides support for the accelerometer found in various Toshiba
+laptops, being called "Toshiba HDD Protection - Shock Sensor" officially,
+and detects laptops automatically with this device.
+On Windows, Toshiba provided software monitors this device and provides
+automatic HDD protection (head unload) on sudden moves or harsh vibrations,
+however, this driver only provides a notification via a sysfs file to let
+userspace tools or daemons act accordingly, as well as providing a sysfs
+file to set the desired protection level or sensor sensibility.
+
+
+2. Interface
+------------
+
+This device comes with 3 methods:
+
+====	=====================================================================
+_STA    Checks existence of the device, returning Zero if the device does not
+	exists or is not supported.
+PTLV    Sets the desired protection level.
+RSSS    Shuts down the HDD protection interface for a few seconds,
+	then restores normal operation.
+====	=====================================================================
+
+Note:
+  The presence of Solid State Drives (SSD) can make this driver to fail loading,
+  given the fact that such drives have no movable parts, and thus, not requiring
+  any "protection" as well as failing during the evaluation of the _STA method
+  found under this device.
+
+
+3. Accelerometer axes
+---------------------
+
+This device does not report any axes, however, to query the sensor position
+a couple HCI (Hardware Configuration Interface) calls (0x6D and 0xA6) are
+provided to query such information, handled by the kernel module toshiba_acpi
+since kernel version 3.15.
+
+
+4. Supported devices
+--------------------
+
+This driver binds itself to the ACPI device TOS620A, and any Toshiba laptop
+with this device is supported, given the fact that they have the presence of
+conventional HDD and not only SSD, or a combination of both HDD and SSD.
+
+
+5. Usage
+--------
+
+The sysfs files under /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS620A:00/ are:
+
+================   ============================================================
+protection_level   The protection_level is readable and writeable, and
+		   provides a way to let userspace query the current protection
+		   level, as well as set the desired protection level, the
+		   available protection levels are:
+
+		   ============   =======   ==========   ========
+		   0 - Disabled   1 - Low   2 - Medium   3 - High
+		   ============   =======   ==========   ========
+
+reset_protection   The reset_protection entry is writeable only, being "1"
+		   the only parameter it accepts, it is used to trigger
+		   a reset of the protection interface.
+================   ============================================================
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index 5aceb5cd5ce7..64aeee1009ca 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -108,7 +108,7 @@ block_dump
 ==========
 
 block_dump enables block I/O debugging when set to a nonzero value. More
-information on block I/O debugging is in Documentation/laptops/laptop-mode.rst.
+information on block I/O debugging is in Documentation/admin-guide/laptops/laptop-mode.rst.
 
 
 compact_memory
@@ -298,7 +298,7 @@ laptop_mode
 ===========
 
 laptop_mode is a knob that controls "laptop mode". All the things that are
-controlled by this knob are discussed in Documentation/laptops/laptop-mode.rst.
+controlled by this knob are discussed in Documentation/admin-guide/laptops/laptop-mode.rst.
 
 
 legacy_va_layout
diff --git a/Documentation/laptops/asus-laptop.rst b/Documentation/laptops/asus-laptop.rst
deleted file mode 100644
index 95176321a25a..000000000000
--- a/Documentation/laptops/asus-laptop.rst
+++ /dev/null
@@ -1,271 +0,0 @@
-==================
-Asus Laptop Extras
-==================
-
-Version 0.1
-
-August 6, 2009
-
-Corentin Chary <corentincj@iksaif.net>
-http://acpi4asus.sf.net/
-
- This driver provides support for extra features of ACPI-compatible ASUS laptops.
- It may also support some MEDION, JVC or VICTOR laptops (such as MEDION 9675 or
- VICTOR XP7210 for example). It makes all the extra buttons generate input
- events (like keyboards).
-
- On some models adds support for changing the display brightness and output,
- switching the LCD backlight on and off, and most importantly, allows you to
- blink those fancy LEDs intended for reporting mail and wireless status.
-
-This driver supersedes the old asus_acpi driver.
-
-Requirements
-------------
-
-  Kernel 2.6.X sources, configured for your computer, with ACPI support.
-  You also need CONFIG_INPUT and CONFIG_ACPI.
-
-Status
-------
-
- The features currently supported are the following (see below for
- detailed description):
-
- - Fn key combinations
- - Bluetooth enable and disable
- - Wlan enable and disable
- - GPS enable and disable
- - Video output switching
- - Ambient Light Sensor on and off
- - LED control
- - LED Display control
- - LCD brightness control
- - LCD on and off
-
- A compatibility table by model and feature is maintained on the web
- site, http://acpi4asus.sf.net/.
-
-Usage
------
-
-  Try "modprobe asus-laptop". Check your dmesg (simply type dmesg). You should
-  see some lines like this :
-
-      Asus Laptop Extras version 0.42
-        - L2D model detected.
-
-  If it is not the output you have on your laptop, send it (and the laptop's
-  DSDT) to me.
-
-  That's all, now, all the events generated by the hotkeys of your laptop
-  should be reported via netlink events. You can check with
-  "acpi_genl monitor" (part of the acpica project).
-
-  Hotkeys are also reported as input keys (like keyboards) you can check
-  which key are supported using "xev" under X11.
-
-  You can get information on the version of your DSDT table by reading the
-  /sys/devices/platform/asus-laptop/infos entry. If you have a question or a
-  bug report to do, please include the output of this entry.
-
-LEDs
-----
-
-  You can modify LEDs be echoing values to `/sys/class/leds/asus/*/brightness`::
-
-    echo 1 >  /sys/class/leds/asus::mail/brightness
-
-  will switch the mail LED on.
-
-  You can also know if they are on/off by reading their content and use
-  kernel triggers like disk-activity or heartbeat.
-
-Backlight
----------
-
-  You can control lcd backlight power and brightness with
-  /sys/class/backlight/asus-laptop/. Brightness Values are between 0 and 15.
-
-Wireless devices
-----------------
-
-  You can turn the internal Bluetooth adapter on/off with the bluetooth entry
-  (only on models with Bluetooth). This usually controls the associated LED.
-  Same for Wlan adapter.
-
-Display switching
------------------
-
-  Note: the display switching code is currently considered EXPERIMENTAL.
-
-  Switching works for the following models:
-
-    - L3800C
-    - A2500H
-    - L5800C
-    - M5200N
-    - W1000N (albeit with some glitches)
-    - M6700R
-    - A6JC
-    - F3J
-
-  Switching doesn't work for the following:
-
-    - M3700N
-    - L2X00D (locks the laptop under certain conditions)
-
-  To switch the displays, echo values from 0 to 15 to
-  /sys/devices/platform/asus-laptop/display. The significance of those values
-  is as follows:
-
-  +-------+-----+-----+-----+-----+-----+
-  | Bin   | Val | DVI | TV  | CRT | LCD |
-  +-------+-----+-----+-----+-----+-----+
-  | 0000  |   0 |     |     |     |     |
-  +-------+-----+-----+-----+-----+-----+
-  | 0001  |   1 |     |     |     |  X  |
-  +-------+-----+-----+-----+-----+-----+
-  | 0010  |   2 |     |     |  X  |     |
-  +-------+-----+-----+-----+-----+-----+
-  | 0011  |   3 |     |     |  X  |  X  |
-  +-------+-----+-----+-----+-----+-----+
-  | 0100  |   4 |     |  X  |     |     |
-  +-------+-----+-----+-----+-----+-----+
-  | 0101  |   5 |     |  X  |     | X   |
-  +-------+-----+-----+-----+-----+-----+
-  | 0110  |   6 |     |  X  |  X  |     |
-  +-------+-----+-----+-----+-----+-----+
-  | 0111  |   7 |     |  X  |  X  |  X  |
-  +-------+-----+-----+-----+-----+-----+
-  | 1000  |   8 |  X  |     |     |     |
-  +-------+-----+-----+-----+-----+-----+
-  | 1001  |   9 |  X  |     |     |  X  |
-  +-------+-----+-----+-----+-----+-----+
-  | 1010  |  10 |  X  |     |  X  |     |
-  +-------+-----+-----+-----+-----+-----+
-  | 1011  |  11 |  X  |     |  X  |  X  |
-  +-------+-----+-----+-----+-----+-----+
-  | 1100  |  12 |  X  |  X  |     |     |
-  +-------+-----+-----+-----+-----+-----+
-  | 1101  |  13 |  X  |  X  |     |  X  |
-  +-------+-----+-----+-----+-----+-----+
-  | 1110  |  14 |  X  |  X  |  X  |     |
-  +-------+-----+-----+-----+-----+-----+
-  | 1111  |  15 |  X  |  X  |  X  |  X  |
-  +-------+-----+-----+-----+-----+-----+
-
-  In most cases, the appropriate displays must be plugged in for the above
-  combinations to work. TV-Out may need to be initialized at boot time.
-
-  Debugging:
-
-  1) Check whether the Fn+F8 key:
-
-     a) does not lock the laptop (try a boot with noapic / nolapic if it does)
-     b) generates events (0x6n, where n is the value corresponding to the
-        configuration above)
-     c) actually works
-
-     Record the disp value at every configuration.
-  2) Echo values from 0 to 15 to /sys/devices/platform/asus-laptop/display.
-     Record its value, note any change. If nothing changes, try a broader range,
-     up to 65535.
-  3) Send ANY output (both positive and negative reports are needed, unless your
-     machine is already listed above) to the acpi4asus-user mailing list.
-
-  Note: on some machines (e.g. L3C), after the module has been loaded, only 0x6n
-  events are generated and no actual switching occurs. In such a case, a line
-  like::
-
-    echo $((10#$arg-60)) > /sys/devices/platform/asus-laptop/display
-
-  will usually do the trick ($arg is the 0000006n-like event passed to acpid).
-
-  Note: there is currently no reliable way to read display status on xxN
-  (Centrino) models.
-
-LED display
------------
-
-  Some models like the W1N have a LED display that can be used to display
-  several items of information.
-
-  LED display works for the following models:
-
-    - W1000N
-    - W1J
-
-  To control the LED display, use the following::
-
-    echo 0x0T000DDD > /sys/devices/platform/asus-laptop/
-
-  where T control the 3 letters display, and DDD the 3 digits display,
-  according to the tables below::
-
-         DDD (digits)
-         000 to 999 = display digits
-         AAA        = ---
-         BBB to FFF = turn-off
-
-         T  (type)
-         0 = off
-         1 = dvd
-         2 = vcd
-         3 = mp3
-         4 = cd
-         5 = tv
-         6 = cpu
-         7 = vol
-
-  For example "echo 0x01000001 >/sys/devices/platform/asus-laptop/ledd"
-  would display "DVD001".
-
-Driver options
---------------
-
- Options can be passed to the asus-laptop driver using the standard
- module argument syntax (<param>=<value> when passing the option to the
- module or asus-laptop.<param>=<value> on the kernel boot line when
- asus-laptop is statically linked into the kernel).
-
-	     wapf: WAPF defines the behavior of the Fn+Fx wlan key
-		   The significance of values is yet to be found, but
-		   most of the time:
-
-		   - 0x0 should do nothing
-		   - 0x1 should allow to control the device with Fn+Fx key.
-		   - 0x4 should send an ACPI event (0x88) while pressing the Fn+Fx key
-		   - 0x5 like 0x1 or 0x4
-
- The default value is 0x1.
-
-Unsupported models
-------------------
-
- These models will never be supported by this module, as they use a completely
- different mechanism to handle LEDs and extra stuff (meaning we have no clue
- how it works):
-
- - ASUS A1300 (A1B), A1370D
- - ASUS L7300G
- - ASUS L8400
-
-Patches, Errors, Questions
---------------------------
-
- I appreciate any success or failure
- reports, especially if they add to or correct the compatibility table.
- Please include the following information in your report:
-
- - Asus model name
- - a copy of your ACPI tables, using the "acpidump" utility
- - a copy of /sys/devices/platform/asus-laptop/infos
- - which driver features work and which don't
- - the observed behavior of non-working features
-
- Any other comments or patches are also more than welcome.
-
- acpi4asus-user@lists.sourceforge.net
-
- http://sourceforge.net/projects/acpi4asus
diff --git a/Documentation/laptops/disk-shock-protection.rst b/Documentation/laptops/disk-shock-protection.rst
deleted file mode 100644
index e97c5f78d8c3..000000000000
--- a/Documentation/laptops/disk-shock-protection.rst
+++ /dev/null
@@ -1,151 +0,0 @@
-==========================
-Hard disk shock protection
-==========================
-
-Author: Elias Oltmanns <eo@nebensachen.de>
-
-Last modified: 2008-10-03
-
-
-.. 0. Contents
-
-   1. Intro
-   2. The interface
-   3. References
-   4. CREDITS
-
-
-1. Intro
---------
-
-ATA/ATAPI-7 specifies the IDLE IMMEDIATE command with unload feature.
-Issuing this command should cause the drive to switch to idle mode and
-unload disk heads. This feature is being used in modern laptops in
-conjunction with accelerometers and appropriate software to implement
-a shock protection facility. The idea is to stop all I/O operations on
-the internal hard drive and park its heads on the ramp when critical
-situations are anticipated. The desire to have such a feature
-available on GNU/Linux systems has been the original motivation to
-implement a generic disk head parking interface in the Linux kernel.
-Please note, however, that other components have to be set up on your
-system in order to get disk shock protection working (see
-section 3. References below for pointers to more information about
-that).
-
-
-2. The interface
-----------------
-
-For each ATA device, the kernel exports the file
-`block/*/device/unload_heads` in sysfs (here assumed to be mounted under
-/sys). Access to `/sys/block/*/device/unload_heads` is denied with
--EOPNOTSUPP if the device does not support the unload feature.
-Otherwise, writing an integer value to this file will take the heads
-of the respective drive off the platter and block all I/O operations
-for the specified number of milliseconds. When the timeout expires and
-no further disk head park request has been issued in the meantime,
-normal operation will be resumed. The maximal value accepted for a
-timeout is 30000 milliseconds. Exceeding this limit will return
--EOVERFLOW, but heads will be parked anyway and the timeout will be
-set to 30 seconds. However, you can always change a timeout to any
-value between 0 and 30000 by issuing a subsequent head park request
-before the timeout of the previous one has expired. In particular, the
-total timeout can exceed 30 seconds and, more importantly, you can
-cancel a previously set timeout and resume normal operation
-immediately by specifying a timeout of 0. Values below -2 are rejected
-with -EINVAL (see below for the special meaning of -1 and -2). If the
-timeout specified for a recent head park request has not yet expired,
-reading from `/sys/block/*/device/unload_heads` will report the number
-of milliseconds remaining until normal operation will be resumed;
-otherwise, reading the unload_heads attribute will return 0.
-
-For example, do the following in order to park the heads of drive
-/dev/sda and stop all I/O operations for five seconds::
-
-	# echo 5000 > /sys/block/sda/device/unload_heads
-
-A simple::
-
-	# cat /sys/block/sda/device/unload_heads
-
-will show you how many milliseconds are left before normal operation
-will be resumed.
-
-A word of caution: The fact that the interface operates on a basis of
-milliseconds may raise expectations that cannot be satisfied in
-reality. In fact, the ATA specs clearly state that the time for an
-unload operation to complete is vendor specific. The hint in ATA-7
-that this will typically be within 500 milliseconds apparently has
-been dropped in ATA-8.
-
-There is a technical detail of this implementation that may cause some
-confusion and should be discussed here. When a head park request has
-been issued to a device successfully, all I/O operations on the
-controller port this device is attached to will be deferred. That is
-to say, any other device that may be connected to the same port will
-be affected too. The only exception is that a subsequent head unload
-request to that other device will be executed immediately. Further
-operations on that port will be deferred until the timeout specified
-for either device on the port has expired. As far as PATA (old style
-IDE) configurations are concerned, there can only be two devices
-attached to any single port. In SATA world we have port multipliers
-which means that a user-issued head parking request to one device may
-actually result in stopping I/O to a whole bunch of devices. However,
-since this feature is supposed to be used on laptops and does not seem
-to be very useful in any other environment, there will be mostly one
-device per port. Even if the CD/DVD writer happens to be connected to
-the same port as the hard drive, it generally *should* recover just
-fine from the occasional buffer under-run incurred by a head park
-request to the HD. Actually, when you are using an ide driver rather
-than its libata counterpart (i.e. your disk is called /dev/hda
-instead of /dev/sda), then parking the heads of one drive (drive X)
-will generally not affect the mode of operation of another drive
-(drive Y) on the same port as described above. It is only when a port
-reset is required to recover from an exception on drive Y that further
-I/O operations on that drive (and the reset itself) will be delayed
-until drive X is no longer in the parked state.
-
-Finally, there are some hard drives that only comply with an earlier
-version of the ATA standard than ATA-7, but do support the unload
-feature nonetheless. Unfortunately, there is no safe way Linux can
-detect these devices, so you won't be able to write to the
-unload_heads attribute. If you know that your device really does
-support the unload feature (for instance, because the vendor of your
-laptop or the hard drive itself told you so), then you can tell the
-kernel to enable the usage of this feature for that drive by writing
-the special value -1 to the unload_heads attribute::
-
-	# echo -1 > /sys/block/sda/device/unload_heads
-
-will enable the feature for /dev/sda, and giving -2 instead of -1 will
-disable it again.
-
-
-3. References
--------------
-
-There are several laptops from different vendors featuring shock
-protection capabilities. As manufacturers have refused to support open
-source development of the required software components so far, Linux
-support for shock protection varies considerably between different
-hardware implementations. Ideally, this section should contain a list
-of pointers at different projects aiming at an implementation of shock
-protection on different systems. Unfortunately, I only know of a
-single project which, although still considered experimental, is fit
-for use. Please feel free to add projects that have been the victims
-of my ignorance.
-
-- http://www.thinkwiki.org/wiki/HDAPS
-
-  See this page for information about Linux support of the hard disk
-  active protection system as implemented in IBM/Lenovo Thinkpads.
-
-
-4. CREDITS
-----------
-
-This implementation of disk head parking has been inspired by a patch
-originally published by Jon Escombe <lists@dresco.co.uk>. My efforts
-to develop an implementation of this feature that is fit to be merged
-into mainline have been aided by various kernel developers, in
-particular by Tejun Heo and Bartlomiej Zolnierkiewicz.
diff --git a/Documentation/laptops/index.rst b/Documentation/laptops/index.rst
deleted file mode 100644
index 001a30910d09..000000000000
--- a/Documentation/laptops/index.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-:orphan:
-
-==============
-Laptop Drivers
-==============
-
-.. toctree::
-   :maxdepth: 1
-
-   asus-laptop
-   disk-shock-protection
-   laptop-mode
-   lg-laptop
-   sony-laptop
-   sonypi
-   thinkpad-acpi
-   toshiba_haps
diff --git a/Documentation/laptops/laptop-mode.rst b/Documentation/laptops/laptop-mode.rst
deleted file mode 100644
index c984c4262f2e..000000000000
--- a/Documentation/laptops/laptop-mode.rst
+++ /dev/null
@@ -1,781 +0,0 @@
-===============================================
-How to conserve battery power using laptop-mode
-===============================================
-
-Document Author: Bart Samwel (bart@samwel.tk)
-
-Date created: January 2, 2004
-
-Last modified: December 06, 2004
-
-Introduction
-------------
-
-Laptop mode is used to minimize the time that the hard disk needs to be spun up,
-to conserve battery power on laptops. It has been reported to cause significant
-power savings.
-
-.. Contents
-
-   * Introduction
-   * Installation
-   * Caveats
-   * The Details
-   * Tips & Tricks
-   * Control script
-   * ACPI integration
-   * Monitoring tool
-
-
-Installation
-------------
-
-To use laptop mode, you don't need to set any kernel configuration options
-or anything. Simply install all the files included in this document, and
-laptop mode will automatically be started when you're on battery. For
-your convenience, a tarball containing an installer can be downloaded at:
-
-	http://www.samwel.tk/laptop_mode/laptop_mode/
-
-To configure laptop mode, you need to edit the configuration file, which is
-located in /etc/default/laptop-mode on Debian-based systems, or in
-/etc/sysconfig/laptop-mode on other systems.
-
-Unfortunately, automatic enabling of laptop mode does not work for
-laptops that don't have ACPI. On those laptops, you need to start laptop
-mode manually. To start laptop mode, run "laptop_mode start", and to
-stop it, run "laptop_mode stop". (Note: The laptop mode tools package now
-has experimental support for APM, you might want to try that first.)
-
-
-Caveats
--------
-
-* The downside of laptop mode is that you have a chance of losing up to 10
-  minutes of work. If you cannot afford this, don't use it! The supplied ACPI
-  scripts automatically turn off laptop mode when the battery almost runs out,
-  so that you won't lose any data at the end of your battery life.
-
-* Most desktop hard drives have a very limited lifetime measured in spindown
-  cycles, typically about 50.000 times (it's usually listed on the spec sheet).
-  Check your drive's rating, and don't wear down your drive's lifetime if you
-  don't need to.
-
-* If you mount some of your ext3/reiserfs filesystems with the -n option, then
-  the control script will not be able to remount them correctly. You must set
-  DO_REMOUNTS=0 in the control script, otherwise it will remount them with the
-  wrong options -- or it will fail because it cannot write to /etc/mtab.
-
-* If you have your filesystems listed as type "auto" in fstab, like I did, then
-  the control script will not recognize them as filesystems that need remounting.
-  You must list the filesystems with their true type instead.
-
-* It has been reported that some versions of the mutt mail client use file access
-  times to determine whether a folder contains new mail. If you use mutt and
-  experience this, you must disable the noatime remounting by setting the option
-  DO_REMOUNT_NOATIME to 0 in the configuration file.
-
-
-The Details
------------
-
-Laptop mode is controlled by the knob /proc/sys/vm/laptop_mode. This knob is
-present for all kernels that have the laptop mode patch, regardless of any
-configuration options. When the knob is set, any physical disk I/O (that might
-have caused the hard disk to spin up) causes Linux to flush all dirty blocks. The
-result of this is that after a disk has spun down, it will not be spun up
-anymore to write dirty blocks, because those blocks had already been written
-immediately after the most recent read operation. The value of the laptop_mode
-knob determines the time between the occurrence of disk I/O and when the flush
-is triggered. A sensible value for the knob is 5 seconds. Setting the knob to
-0 disables laptop mode.
-
-To increase the effectiveness of the laptop_mode strategy, the laptop_mode
-control script increases dirty_expire_centisecs and dirty_writeback_centisecs in
-/proc/sys/vm to about 10 minutes (by default), which means that pages that are
-dirtied are not forced to be written to disk as often. The control script also
-changes the dirty background ratio, so that background writeback of dirty pages
-is not done anymore. Combined with a higher commit value (also 10 minutes) for
-ext3 or ReiserFS filesystems (also done automatically by the control script),
-this results in concentration of disk activity in a small time interval which
-occurs only once every 10 minutes, or whenever the disk is forced to spin up by
-a cache miss. The disk can then be spun down in the periods of inactivity.
-
-If you want to find out which process caused the disk to spin up, you can
-gather information by setting the flag /proc/sys/vm/block_dump. When this flag
-is set, Linux reports all disk read and write operations that take place, and
-all block dirtyings done to files. This makes it possible to debug why a disk
-needs to spin up, and to increase battery life even more. The output of
-block_dump is written to the kernel output, and it can be retrieved using
-"dmesg". When you use block_dump and your kernel logging level also includes
-kernel debugging messages, you probably want to turn off klogd, otherwise
-the output of block_dump will be logged, causing disk activity that is not
-normally there.
-
-
-Configuration
--------------
-
-The laptop mode configuration file is located in /etc/default/laptop-mode on
-Debian-based systems, or in /etc/sysconfig/laptop-mode on other systems. It
-contains the following options:
-
-MAX_AGE:
-
-Maximum time, in seconds, of hard drive spindown time that you are
-comfortable with. Worst case, it's possible that you could lose this
-amount of work if your battery fails while you're in laptop mode.
-
-MINIMUM_BATTERY_MINUTES:
-
-Automatically disable laptop mode if the remaining number of minutes of
-battery power is less than this value. Default is 10 minutes.
-
-AC_HD/BATT_HD:
-
-The idle timeout that should be set on your hard drive when laptop mode
-is active (BATT_HD) and when it is not active (AC_HD). The defaults are
-20 seconds (value 4) for BATT_HD  and 2 hours (value 244) for AC_HD. The
-possible values are those listed in the manual page for "hdparm" for the
-"-S" option.
-
-HD:
-
-The devices for which the spindown timeout should be adjusted by laptop mode.
-Default is /dev/hda. If you specify multiple devices, separate them by a space.
-
-READAHEAD:
-
-Disk readahead, in 512-byte sectors, while laptop mode is active. A large
-readahead can prevent disk accesses for things like executable pages (which are
-loaded on demand while the application executes) and sequentially accessed data
-(MP3s).
-
-DO_REMOUNTS:
-
-The control script automatically remounts any mounted journaled filesystems
-with appropriate commit interval options. When this option is set to 0, this
-feature is disabled.
-
-DO_REMOUNT_NOATIME:
-
-When remounting, should the filesystems be remounted with the noatime option?
-Normally, this is set to "1" (enabled), but there may be programs that require
-access time recording.
-
-DIRTY_RATIO:
-
-The percentage of memory that is allowed to contain "dirty" or unsaved data
-before a writeback is forced, while laptop mode is active. Corresponds to
-the /proc/sys/vm/dirty_ratio sysctl.
-
-DIRTY_BACKGROUND_RATIO:
-
-The percentage of memory that is allowed to contain "dirty" or unsaved data
-after a forced writeback is done due to an exceeding of DIRTY_RATIO. Set
-this nice and low. This corresponds to the /proc/sys/vm/dirty_background_ratio
-sysctl.
-
-Note that the behaviour of dirty_background_ratio is quite different
-when laptop mode is active and when it isn't. When laptop mode is inactive,
-dirty_background_ratio is the threshold percentage at which background writeouts
-start taking place. When laptop mode is active, however, background writeouts
-are disabled, and the dirty_background_ratio only determines how much writeback
-is done when dirty_ratio is reached.
-
-DO_CPU:
-
-Enable CPU frequency scaling when in laptop mode. (Requires CPUFreq to be setup.
-See Documentation/admin-guide/pm/cpufreq.rst for more info. Disabled by default.)
-
-CPU_MAXFREQ:
-
-When on battery, what is the maximum CPU speed that the system should use? Legal
-values are "slowest" for the slowest speed that your CPU is able to operate at,
-or a value listed in /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies.
-
-
-Tips & Tricks
--------------
-
-* Bartek Kania reports getting up to 50 minutes of extra battery life (on top
-  of his regular 3 to 3.5 hours) using a spindown time of 5 seconds (BATT_HD=1).
-
-* You can spin down the disk while playing MP3, by setting disk readahead
-  to 8MB (READAHEAD=16384). Effectively, the disk will read a complete MP3 at
-  once, and will then spin down while the MP3 is playing. (Thanks to Bartek
-  Kania.)
-
-* Drew Scott Daniels observed: "I don't know why, but when I decrease the number
-  of colours that my display uses it consumes less battery power. I've seen
-  this on powerbooks too. I hope that this is a piece of information that
-  might be useful to the Laptop Mode patch or its users."
-
-* In syslog.conf, you can prefix entries with a dash `-` to omit syncing the
-  file after every logging. When you're using laptop-mode and your disk doesn't
-  spin down, this is a likely culprit.
-
-* Richard Atterer observed that laptop mode does not work well with noflushd
-  (http://noflushd.sourceforge.net/), it seems that noflushd prevents laptop-mode
-  from doing its thing.
-
-* If you're worried about your data, you might want to consider using a USB
-  memory stick or something like that as a "working area". (Be aware though
-  that flash memory can only handle a limited number of writes, and overuse
-  may wear out your memory stick pretty quickly. Do _not_ use journalling
-  filesystems on flash memory sticks.)
-
-
-Configuration file for control and ACPI battery scripts
--------------------------------------------------------
-
-This allows the tunables to be changed for the scripts via an external
-configuration file
-
-It should be installed as /etc/default/laptop-mode on Debian, and as
-/etc/sysconfig/laptop-mode on Red Hat, SUSE, Mandrake, and other work-alikes.
-
-Config file::
-
-  # Maximum time, in seconds, of hard drive spindown time that you are
-  # comfortable with. Worst case, it's possible that you could lose this
-  # amount of work if your battery fails you while in laptop mode.
-  #MAX_AGE=600
-
-  # Automatically disable laptop mode when the number of minutes of battery
-  # that you have left goes below this threshold.
-  MINIMUM_BATTERY_MINUTES=10
-
-  # Read-ahead, in 512-byte sectors. You can spin down the disk while playing MP3/OGG
-  # by setting the disk readahead to 8MB (READAHEAD=16384). Effectively, the disk
-  # will read a complete MP3 at once, and will then spin down while the MP3/OGG is
-  # playing.
-  #READAHEAD=4096
-
-  # Shall we remount journaled fs. with appropriate commit interval? (1=yes)
-  #DO_REMOUNTS=1
-
-  # And shall we add the "noatime" option to that as well? (1=yes)
-  #DO_REMOUNT_NOATIME=1
-
-  # Dirty synchronous ratio.  At this percentage of dirty pages the process
-  # which
-  # calls write() does its own writeback
-  #DIRTY_RATIO=40
-
-  #
-  # Allowed dirty background ratio, in percent.  Once DIRTY_RATIO has been
-  # exceeded, the kernel will wake flusher threads which will then reduce the
-  # amount of dirty memory to dirty_background_ratio.  Set this nice and low,
-  # so once some writeout has commenced, we do a lot of it.
-  #
-  #DIRTY_BACKGROUND_RATIO=5
-
-  # kernel default dirty buffer age
-  #DEF_AGE=30
-  #DEF_UPDATE=5
-  #DEF_DIRTY_BACKGROUND_RATIO=10
-  #DEF_DIRTY_RATIO=40
-  #DEF_XFS_AGE_BUFFER=15
-  #DEF_XFS_SYNC_INTERVAL=30
-  #DEF_XFS_BUFD_INTERVAL=1
-
-  # This must be adjusted manually to the value of HZ in the running kernel
-  # on 2.4, until the XFS people change their 2.4 external interfaces to work in
-  # centisecs. This can be automated, but it's a work in progress that still
-  # needs# some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for
-  # external interfaces, and that is currently always set to 100. So you don't
-  # need to change this on 2.6.
-  #XFS_HZ=100
-
-  # Should the maximum CPU frequency be adjusted down while on battery?
-  # Requires CPUFreq to be setup.
-  # See Documentation/admin-guide/pm/cpufreq.rst for more info
-  #DO_CPU=0
-
-  # When on battery what is the maximum CPU speed that the system should
-  # use? Legal values are "slowest" for the slowest speed that your
-  # CPU is able to operate at, or a value listed in:
-  # /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies
-  # Only applicable if DO_CPU=1.
-  #CPU_MAXFREQ=slowest
-
-  # Idle timeout for your hard drive (man hdparm for valid values, -S option)
-  # Default is 2 hours on AC (AC_HD=244) and 20 seconds for battery (BATT_HD=4).
-  #AC_HD=244
-  #BATT_HD=4
-
-  # The drives for which to adjust the idle timeout. Separate them by a space,
-  # e.g. HD="/dev/hda /dev/hdb".
-  #HD="/dev/hda"
-
-  # Set the spindown timeout on a hard drive?
-  #DO_HD=1
-
-
-Control script
---------------
-
-Please note that this control script works for the Linux 2.4 and 2.6 series (thanks
-to Kiko Piris).
-
-Control script::
-
-  #!/bin/bash
-
-  # start or stop laptop_mode, best run by a power management daemon when
-  # ac gets connected/disconnected from a laptop
-  #
-  # install as /sbin/laptop_mode
-  #
-  # Contributors to this script:   Kiko Piris
-  #				 Bart Samwel
-  #				 Micha Feigin
-  #				 Andrew Morton
-  #				 Herve Eychenne
-  #				 Dax Kelson
-  #
-  # Original Linux 2.4 version by: Jens Axboe
-
-  #############################################################################
-
-  # Source config
-  if [ -f /etc/default/laptop-mode ] ; then
-	# Debian
-	. /etc/default/laptop-mode
-  elif [ -f /etc/sysconfig/laptop-mode ] ; then
-	# Others
-          . /etc/sysconfig/laptop-mode
-  fi
-
-  # Don't raise an error if the config file is incomplete
-  # set defaults instead:
-
-  # Maximum time, in seconds, of hard drive spindown time that you are
-  # comfortable with. Worst case, it's possible that you could lose this
-  # amount of work if your battery fails you while in laptop mode.
-  MAX_AGE=${MAX_AGE:-'600'}
-
-  # Read-ahead, in kilobytes
-  READAHEAD=${READAHEAD:-'4096'}
-
-  # Shall we remount journaled fs. with appropriate commit interval? (1=yes)
-  DO_REMOUNTS=${DO_REMOUNTS:-'1'}
-
-  # And shall we add the "noatime" option to that as well? (1=yes)
-  DO_REMOUNT_NOATIME=${DO_REMOUNT_NOATIME:-'1'}
-
-  # Shall we adjust the idle timeout on a hard drive?
-  DO_HD=${DO_HD:-'1'}
-
-  # Adjust idle timeout on which hard drive?
-  HD="${HD:-'/dev/hda'}"
-
-  # spindown time for HD (hdparm -S values)
-  AC_HD=${AC_HD:-'244'}
-  BATT_HD=${BATT_HD:-'4'}
-
-  # Dirty synchronous ratio.  At this percentage of dirty pages the process which
-  # calls write() does its own writeback
-  DIRTY_RATIO=${DIRTY_RATIO:-'40'}
-
-  # cpu frequency scaling
-  # See Documentation/admin-guide/pm/cpufreq.rst for more info
-  DO_CPU=${CPU_MANAGE:-'0'}
-  CPU_MAXFREQ=${CPU_MAXFREQ:-'slowest'}
-
-  #
-  # Allowed dirty background ratio, in percent.  Once DIRTY_RATIO has been
-  # exceeded, the kernel will wake flusher threads which will then reduce the
-  # amount of dirty memory to dirty_background_ratio.  Set this nice and low,
-  # so once some writeout has commenced, we do a lot of it.
-  #
-  DIRTY_BACKGROUND_RATIO=${DIRTY_BACKGROUND_RATIO:-'5'}
-
-  # kernel default dirty buffer age
-  DEF_AGE=${DEF_AGE:-'30'}
-  DEF_UPDATE=${DEF_UPDATE:-'5'}
-  DEF_DIRTY_BACKGROUND_RATIO=${DEF_DIRTY_BACKGROUND_RATIO:-'10'}
-  DEF_DIRTY_RATIO=${DEF_DIRTY_RATIO:-'40'}
-  DEF_XFS_AGE_BUFFER=${DEF_XFS_AGE_BUFFER:-'15'}
-  DEF_XFS_SYNC_INTERVAL=${DEF_XFS_SYNC_INTERVAL:-'30'}
-  DEF_XFS_BUFD_INTERVAL=${DEF_XFS_BUFD_INTERVAL:-'1'}
-
-  # This must be adjusted manually to the value of HZ in the running kernel
-  # on 2.4, until the XFS people change their 2.4 external interfaces to work in
-  # centisecs. This can be automated, but it's a work in progress that still needs
-  # some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for external
-  # interfaces, and that is currently always set to 100. So you don't need to
-  # change this on 2.6.
-  XFS_HZ=${XFS_HZ:-'100'}
-
-  #############################################################################
-
-  KLEVEL="$(uname -r |
-               {
-	       IFS='.' read a b c
-	       echo $a.$b
-	     }
-  )"
-  case "$KLEVEL" in
-	"2.4"|"2.6")
-		;;
-	*)
-		echo "Unhandled kernel version: $KLEVEL ('uname -r' = '$(uname -r)')" >&2
-		exit 1
-		;;
-  esac
-
-  if [ ! -e /proc/sys/vm/laptop_mode ] ; then
-	echo "Kernel is not patched with laptop_mode patch." >&2
-	exit 1
-  fi
-
-  if [ ! -w /proc/sys/vm/laptop_mode ] ; then
-	echo "You do not have enough privileges to enable laptop_mode." >&2
-	exit 1
-  fi
-
-  # Remove an option (the first parameter) of the form option=<number> from
-  # a mount options string (the rest of the parameters).
-  parse_mount_opts () {
-	OPT="$1"
-	shift
-	echo ",$*," | sed		\
-	 -e 's/,'"$OPT"'=[0-9]*,/,/g'	\
-	 -e 's/,,*/,/g'			\
-	 -e 's/^,//'			\
-	 -e 's/,$//'
-  }
-
-  # Remove an option (the first parameter) without any arguments from
-  # a mount option string (the rest of the parameters).
-  parse_nonumber_mount_opts () {
-	OPT="$1"
-	shift
-	echo ",$*," | sed		\
-	 -e 's/,'"$OPT"',/,/g'		\
-	 -e 's/,,*/,/g'			\
-	 -e 's/^,//'			\
-	 -e 's/,$//'
-  }
-
-  # Find out the state of a yes/no option (e.g. "atime"/"noatime") in
-  # fstab for a given filesystem, and use this state to replace the
-  # value of the option in another mount options string. The device
-  # is the first argument, the option name the second, and the default
-  # value the third. The remainder is the mount options string.
-  #
-  # Example:
-  # parse_yesno_opts_wfstab /dev/hda1 atime atime defaults,noatime
-  #
-  # If fstab contains, say, "rw" for this filesystem, then the result
-  # will be "defaults,atime".
-  parse_yesno_opts_wfstab () {
-	L_DEV="$1"
-	OPT="$2"
-	DEF_OPT="$3"
-	shift 3
-	L_OPTS="$*"
-	PARSEDOPTS1="$(parse_nonumber_mount_opts $OPT $L_OPTS)"
-	PARSEDOPTS1="$(parse_nonumber_mount_opts no$OPT $PARSEDOPTS1)"
-	# Watch for a default atime in fstab
-	FSTAB_OPTS="$(awk '$1 == "'$L_DEV'" { print $4 }' /etc/fstab)"
-	if echo "$FSTAB_OPTS" | grep "$OPT" > /dev/null ; then
-		# option specified in fstab: extract the value and use it
-		if echo "$FSTAB_OPTS" | grep "no$OPT" > /dev/null ; then
-			echo "$PARSEDOPTS1,no$OPT"
-		else
-			# no$OPT not found -- so we must have $OPT.
-			echo "$PARSEDOPTS1,$OPT"
-		fi
-	else
-		# option not specified in fstab -- choose the default.
-		echo "$PARSEDOPTS1,$DEF_OPT"
-	fi
-  }
-
-  # Find out the state of a numbered option (e.g. "commit=NNN") in
-  # fstab for a given filesystem, and use this state to replace the
-  # value of the option in another mount options string. The device
-  # is the first argument, and the option name the second. The
-  # remainder is the mount options string in which the replacement
-  # must be done.
-  #
-  # Example:
-  # parse_mount_opts_wfstab /dev/hda1 commit defaults,commit=7
-  #
-  # If fstab contains, say, "commit=3,rw" for this filesystem, then the
-  # result will be "rw,commit=3".
-  parse_mount_opts_wfstab () {
-	L_DEV="$1"
-	OPT="$2"
-	shift 2
-	L_OPTS="$*"
-	PARSEDOPTS1="$(parse_mount_opts $OPT $L_OPTS)"
-	# Watch for a default commit in fstab
-	FSTAB_OPTS="$(awk '$1 == "'$L_DEV'" { print $4 }' /etc/fstab)"
-	if echo "$FSTAB_OPTS" | grep "$OPT=" > /dev/null ; then
-		# option specified in fstab: extract the value, and use it
-		echo -n "$PARSEDOPTS1,$OPT="
-		echo ",$FSTAB_OPTS," | sed \
-		 -e 's/.*,'"$OPT"'=//'	\
-		 -e 's/,.*//'
-	else
-		# option not specified in fstab: set it to 0
-		echo "$PARSEDOPTS1,$OPT=0"
-	fi
-  }
-
-  deduce_fstype () {
-	MP="$1"
-	# My root filesystem unfortunately has
-	# type "unknown" in /etc/mtab. If we encounter
-	# "unknown", we try to get the type from fstab.
-	cat /etc/fstab |
-	grep -v '^#' |
-	while read FSTAB_DEV FSTAB_MP FSTAB_FST FSTAB_OPTS FSTAB_DUMP FSTAB_DUMP ; do
-		if [ "$FSTAB_MP" = "$MP" ]; then
-			echo $FSTAB_FST
-			exit 0
-		fi
-	done
-  }
-
-  if [ $DO_REMOUNT_NOATIME -eq 1 ] ; then
-	NOATIME_OPT=",noatime"
-  fi
-
-  case "$1" in
-	start)
-		AGE=$((100*$MAX_AGE))
-		XFS_AGE=$(($XFS_HZ*$MAX_AGE))
-		echo -n "Starting laptop_mode"
-
-		if [ -d /proc/sys/vm/pagebuf ] ; then
-			# (For 2.4 and early 2.6.)
-			# This only needs to be set, not reset -- it is only used when
-			# laptop mode is enabled.
-			echo $XFS_AGE > /proc/sys/vm/pagebuf/lm_flush_age
-			echo $XFS_AGE > /proc/sys/fs/xfs/lm_sync_interval
-		elif [ -f /proc/sys/fs/xfs/lm_age_buffer ] ; then
-			# (A couple of early 2.6 laptop mode patches had these.)
-			# The same goes for these.
-			echo $XFS_AGE > /proc/sys/fs/xfs/lm_age_buffer
-			echo $XFS_AGE > /proc/sys/fs/xfs/lm_sync_interval
-		elif [ -f /proc/sys/fs/xfs/age_buffer ] ; then
-			# (2.6.6)
-			# But not for these -- they are also used in normal
-			# operation.
-			echo $XFS_AGE > /proc/sys/fs/xfs/age_buffer
-			echo $XFS_AGE > /proc/sys/fs/xfs/sync_interval
-		elif [ -f /proc/sys/fs/xfs/age_buffer_centisecs ] ; then
-			# (2.6.7 upwards)
-			# And not for these either. These are in centisecs,
-			# not USER_HZ, so we have to use $AGE, not $XFS_AGE.
-			echo $AGE > /proc/sys/fs/xfs/age_buffer_centisecs
-			echo $AGE > /proc/sys/fs/xfs/xfssyncd_centisecs
-			echo 3000 > /proc/sys/fs/xfs/xfsbufd_centisecs
-		fi
-
-		case "$KLEVEL" in
-			"2.4")
-				echo 1					> /proc/sys/vm/laptop_mode
-				echo "30 500 0 0 $AGE $AGE 60 20 0"	> /proc/sys/vm/bdflush
-				;;
-			"2.6")
-				echo 5					> /proc/sys/vm/laptop_mode
-				echo "$AGE"				> /proc/sys/vm/dirty_writeback_centisecs
-				echo "$AGE"				> /proc/sys/vm/dirty_expire_centisecs
-				echo "$DIRTY_RATIO"			> /proc/sys/vm/dirty_ratio
-				echo "$DIRTY_BACKGROUND_RATIO"		> /proc/sys/vm/dirty_background_ratio
-				;;
-		esac
-		if [ $DO_REMOUNTS -eq 1 ]; then
-			cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do
-				PARSEDOPTS="$(parse_mount_opts "$OPTS")"
-				if [ "$FST" = 'unknown' ]; then
-					FST=$(deduce_fstype $MP)
-				fi
-				case "$FST" in
-					"ext3"|"reiserfs")
-						PARSEDOPTS="$(parse_mount_opts commit "$OPTS")"
-						mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE$NOATIME_OPT
-						;;
-					"xfs")
-						mount $DEV -t $FST $MP -o remount,$OPTS$NOATIME_OPT
-						;;
-				esac
-				if [ -b $DEV ] ; then
-					blockdev --setra $(($READAHEAD * 2)) $DEV
-				fi
-			done
-		fi
-		if [ $DO_HD -eq 1 ] ; then
-			for THISHD in $HD ; do
-				/sbin/hdparm -S $BATT_HD $THISHD > /dev/null 2>&1
-				/sbin/hdparm -B 1 $THISHD > /dev/null 2>&1
-			done
-		fi
-		if [ $DO_CPU -eq 1 -a -e /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq ]; then
-			if [ $CPU_MAXFREQ = 'slowest' ]; then
-				CPU_MAXFREQ=`cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq`
-			fi
-			echo $CPU_MAXFREQ > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
-		fi
-		echo "."
-		;;
-	stop)
-		U_AGE=$((100*$DEF_UPDATE))
-		B_AGE=$((100*$DEF_AGE))
-		echo -n "Stopping laptop_mode"
-		echo 0 > /proc/sys/vm/laptop_mode
-		if [ -f /proc/sys/fs/xfs/age_buffer -a ! -f /proc/sys/fs/xfs/lm_age_buffer ] ; then
-			# These need to be restored, if there are no lm_*.
-			echo $(($XFS_HZ*$DEF_XFS_AGE_BUFFER))	 	> /proc/sys/fs/xfs/age_buffer
-			echo $(($XFS_HZ*$DEF_XFS_SYNC_INTERVAL)) 	> /proc/sys/fs/xfs/sync_interval
-		elif [ -f /proc/sys/fs/xfs/age_buffer_centisecs ] ; then
-			# These need to be restored as well.
-			echo $((100*$DEF_XFS_AGE_BUFFER))	> /proc/sys/fs/xfs/age_buffer_centisecs
-			echo $((100*$DEF_XFS_SYNC_INTERVAL))	> /proc/sys/fs/xfs/xfssyncd_centisecs
-			echo $((100*$DEF_XFS_BUFD_INTERVAL))	> /proc/sys/fs/xfs/xfsbufd_centisecs
-		fi
-		case "$KLEVEL" in
-			"2.4")
-				echo "30 500 0 0 $U_AGE $B_AGE 60 20 0"	> /proc/sys/vm/bdflush
-				;;
-			"2.6")
-				echo "$U_AGE"				> /proc/sys/vm/dirty_writeback_centisecs
-				echo "$B_AGE"				> /proc/sys/vm/dirty_expire_centisecs
-				echo "$DEF_DIRTY_RATIO"			> /proc/sys/vm/dirty_ratio
-				echo "$DEF_DIRTY_BACKGROUND_RATIO"	> /proc/sys/vm/dirty_background_ratio
-				;;
-		esac
-		if [ $DO_REMOUNTS -eq 1 ] ; then
-			cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do
-				# Reset commit and atime options to defaults.
-				if [ "$FST" = 'unknown' ]; then
-					FST=$(deduce_fstype $MP)
-				fi
-				case "$FST" in
-					"ext3"|"reiserfs")
-						PARSEDOPTS="$(parse_mount_opts_wfstab $DEV commit $OPTS)"
-						PARSEDOPTS="$(parse_yesno_opts_wfstab $DEV atime atime $PARSEDOPTS)"
-						mount $DEV -t $FST $MP -o remount,$PARSEDOPTS
-						;;
-					"xfs")
-						PARSEDOPTS="$(parse_yesno_opts_wfstab $DEV atime atime $OPTS)"
-						mount $DEV -t $FST $MP -o remount,$PARSEDOPTS
-						;;
-				esac
-				if [ -b $DEV ] ; then
-					blockdev --setra 256 $DEV
-				fi
-			done
-		fi
-		if [ $DO_HD -eq 1 ] ; then
-			for THISHD in $HD ; do
-				/sbin/hdparm -S $AC_HD $THISHD > /dev/null 2>&1
-				/sbin/hdparm -B 255 $THISHD > /dev/null 2>&1
-			done
-		fi
-		if [ $DO_CPU -eq 1 -a -e /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq ]; then
-			echo `cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq` > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
-		fi
-		echo "."
-		;;
-	*)
-		echo "Usage: $0 {start|stop}" 2>&1
-		exit 1
-		;;
-
-  esac
-
-  exit 0
-
-
-ACPI integration
-----------------
-
-Dax Kelson submitted this so that the ACPI acpid daemon will
-kick off the laptop_mode script and run hdparm. The part that
-automatically disables laptop mode when the battery is low was
-written by Jan Topinski.
-
-/etc/acpi/events/ac_adapter::
-
-	event=ac_adapter
-	action=/etc/acpi/actions/ac.sh %e
-
-/etc/acpi/events/battery::
-
-	event=battery.*
-	action=/etc/acpi/actions/battery.sh %e
-
-/etc/acpi/actions/ac.sh::
-
-  #!/bin/bash
-
-  # ac on/offline event handler
-
-  status=`awk '/^state: / { print $2 }' /proc/acpi/ac_adapter/$2/state`
-
-  case $status in
-          "on-line")
-                  /sbin/laptop_mode stop
-                  exit 0
-          ;;
-          "off-line")
-                  /sbin/laptop_mode start
-                  exit 0
-          ;;
-  esac
-
-
-/etc/acpi/actions/battery.sh::
-
-  #! /bin/bash
-
-  # Automatically disable laptop mode when the battery almost runs out.
-
-  BATT_INFO=/proc/acpi/battery/$2/state
-
-  if [[ -f /proc/sys/vm/laptop_mode ]]
-  then
-     LM=`cat /proc/sys/vm/laptop_mode`
-     if [[ $LM -gt 0 ]]
-     then
-       if [[ -f $BATT_INFO ]]
-       then
-          # Source the config file only now that we know we need
-          if [ -f /etc/default/laptop-mode ] ; then
-                  # Debian
-                  . /etc/default/laptop-mode
-          elif [ -f /etc/sysconfig/laptop-mode ] ; then
-                  # Others
-                  . /etc/sysconfig/laptop-mode
-          fi
-          MINIMUM_BATTERY_MINUTES=${MINIMUM_BATTERY_MINUTES:-'10'}
-
-          ACTION="`cat $BATT_INFO | grep charging | cut -c 26-`"
-          if [[ ACTION -eq "discharging" ]]
-          then
-             PRESENT_RATE=`cat $BATT_INFO | grep "present rate:" | sed  "s/.* \([0-9][0-9]* \).*/\1/" `
-             REMAINING=`cat $BATT_INFO | grep "remaining capacity:" | sed  "s/.* \([0-9][0-9]* \).*/\1/" `
-          fi
-          if (($REMAINING * 60 / $PRESENT_RATE < $MINIMUM_BATTERY_MINUTES))
-          then
-             /sbin/laptop_mode stop
-          fi
-       else
-         logger -p daemon.warning "You are using laptop mode and your battery interface $BATT_INFO is missing. This may lead to loss of data when the battery runs out. Check kernel ACPI support and /proc/acpi/battery folder, and edit /etc/acpi/battery.sh to set BATT_INFO to the correct path."
-       fi
-     fi
-  fi
-
-
-Monitoring tool
----------------
-
-Bartek Kania submitted this, it can be used to measure how much time your disk
-spends spun up/down.  See tools/laptop/dslm/dslm.c
diff --git a/Documentation/laptops/lg-laptop.rst b/Documentation/laptops/lg-laptop.rst
deleted file mode 100644
index f2c2ffe31101..000000000000
--- a/Documentation/laptops/lg-laptop.rst
+++ /dev/null
@@ -1,85 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0+
-
-:orphan:
-
-LG Gram laptop extra features
-=============================
-
-By Matan Ziv-Av <matan@svgalib.org>
-
-
-Hotkeys
--------
-
-The following FN keys are ignored by the kernel without this driver:
-
-- FN-F1 (LG control panel)   - Generates F15
-- FN-F5 (Touchpad toggle)    - Generates F13
-- FN-F6 (Airplane mode)      - Generates RFKILL
-- FN-F8 (Keyboard backlight) - Generates F16.
-  This key also changes keyboard backlight mode.
-- FN-F9 (Reader mode)        - Generates F14
-
-The rest of the FN keys work without a need for a special driver.
-
-
-Reader mode
------------
-
-Writing 0/1 to /sys/devices/platform/lg-laptop/reader_mode disables/enables
-reader mode. In this mode the screen colors change (blue color reduced),
-and the reader mode indicator LED (on F9 key) turns on.
-
-
-FN Lock
--------
-
-Writing 0/1 to /sys/devices/platform/lg-laptop/fn_lock disables/enables
-FN lock.
-
-
-Battery care limit
-------------------
-
-Writing 80/100 to /sys/devices/platform/lg-laptop/battery_care_limit
-sets the maximum capacity to charge the battery. Limiting the charge
-reduces battery capacity loss over time.
-
-This value is reset to 100 when the kernel boots.
-
-
-Fan mode
---------
-
-Writing 1/0 to /sys/devices/platform/lg-laptop/fan_mode disables/enables
-the fan silent mode.
-
-
-USB charge
-----------
-
-Writing 0/1 to /sys/devices/platform/lg-laptop/usb_charge disables/enables
-charging another device from the USB port while the device is turned off.
-
-This value is reset to 0 when the kernel boots.
-
-
-LEDs
-~~~~
-
-The are two LED devices supported by the driver:
-
-Keyboard backlight
-------------------
-
-A led device named kbd_led controls the keyboard backlight. There are three
-lighting level: off (0), low (127) and high (255).
-
-The keyboard backlight is also controlled by the key combination FN-F8
-which cycles through those levels.
-
-
-Touchpad indicator LED
-----------------------
-
-On the F5 key. Controlled by led device names tpad_led.
diff --git a/Documentation/laptops/sony-laptop.rst b/Documentation/laptops/sony-laptop.rst
deleted file mode 100644
index 9edcc7f6612f..000000000000
--- a/Documentation/laptops/sony-laptop.rst
+++ /dev/null
@@ -1,174 +0,0 @@
-=========================================
-Sony Notebook Control Driver (SNC) Readme
-=========================================
-
-	- Copyright (C) 2004- 2005 Stelian Pop <stelian@popies.net>
-	- Copyright (C) 2007 Mattia Dongili <malattia@linux.it>
-
-This mini-driver drives the SNC and SPIC device present in the ACPI BIOS of the
-Sony Vaio laptops. This driver mixes both devices functions under the same
-(hopefully consistent) interface. This also means that the sonypi driver is
-obsoleted by sony-laptop now.
-
-Fn keys (hotkeys):
-------------------
-
-Some models report hotkeys through the SNC or SPIC devices, such events are
-reported both through the ACPI subsystem as acpi events and through the INPUT
-subsystem. See the logs of /proc/bus/input/devices to find out what those
-events are and which input devices are created by the driver.
-Additionally, loading the driver with the debug option will report all events
-in the kernel log.
-
-The "scancodes" passed to the input system (that can be remapped with udev)
-are indexes to the table "sony_laptop_input_keycode_map" in the sony-laptop.c
-module.  For example the "FN/E" key combination (EJECTCD on some models)
-generates the scancode 20 (0x14).
-
-Backlight control:
-------------------
-If your laptop model supports it, you will find sysfs files in the
-/sys/class/backlight/sony/
-directory. You will be able to query and set the current screen
-brightness:
-
-	======================	=========================================
-	brightness		get/set screen brightness (an integer
-				between 0 and 7)
-	actual_brightness	reading from this file will query the HW
-				to get real brightness value
-	max_brightness		the maximum brightness value
-	======================	=========================================
-
-
-Platform specific:
-------------------
-Loading the sony-laptop module will create a
-/sys/devices/platform/sony-laptop/
-directory populated with some files.
-
-You then read/write integer values from/to those files by using
-standard UNIX tools.
-
-The files are:
-
-	======================	==========================================
-	brightness_default	screen brightness which will be set
-				when the laptop will be rebooted
-	cdpower			power on/off the internal CD drive
-	audiopower		power on/off the internal sound card
-	lanpower		power on/off the internal ethernet card
-				(only in debug mode)
-	bluetoothpower		power on/off the internal bluetooth device
-	fanspeed		get/set the fan speed
-	======================	==========================================
-
-Note that some files may be missing if they are not supported
-by your particular laptop model.
-
-Example usage::
-
-	# echo "1" > /sys/devices/platform/sony-laptop/brightness_default
-
-sets the lowest screen brightness for the next and later reboots
-
-::
-
-	# echo "8" > /sys/devices/platform/sony-laptop/brightness_default
-
-sets the highest screen brightness for the next and later reboots
-
-::
-
-	# cat /sys/devices/platform/sony-laptop/brightness_default
-
-retrieves the value
-
-::
-
-	# echo "0" > /sys/devices/platform/sony-laptop/audiopower
-
-powers off the sound card
-
-::
-
-	# echo "1" > /sys/devices/platform/sony-laptop/audiopower
-
-powers on the sound card.
-
-
-RFkill control:
----------------
-More recent Vaio models expose a consistent set of ACPI methods to
-control radio frequency emitting devices. If you are a lucky owner of
-such a laptop you will find the necessary rfkill devices under
-/sys/class/rfkill. Check those starting with sony-* in::
-
-	# grep . /sys/class/rfkill/*/{state,name}
-
-
-Development:
-------------
-
-If you want to help with the development of this driver (and
-you are not afraid of any side effects doing strange things with
-your ACPI BIOS could have on your laptop), load the driver and
-pass the option 'debug=1'.
-
-REPEAT:
-	**DON'T DO THIS IF YOU DON'T LIKE RISKY BUSINESS.**
-
-In your kernel logs you will find the list of all ACPI methods
-the SNC device has on your laptop.
-
-* For new models you will see a long list of meaningless method names,
-  reading the DSDT table source should reveal that:
-
-(1) the SNC device uses an internal capability lookup table
-(2) SN00 is used to find values in the lookup table
-(3) SN06 and SN07 are used to call into the real methods based on
-    offsets you can obtain iterating the table using SN00
-(4) SN02 used to enable events.
-
-Some values in the capability lookup table are more or less known, see
-the code for all sony_call_snc_handle calls, others are more obscure.
-
-* For old models you can see the GCDP/GCDP methods used to pwer on/off
-  the CD drive, but there are others and they are usually different from
-  model to model.
-
-**I HAVE NO IDEA WHAT THOSE METHODS DO.**
-
-The sony-laptop driver creates, for some of those methods (the most
-current ones found on several Vaio models), an entry under
-/sys/devices/platform/sony-laptop, just like the 'cdpower' one.
-You can create other entries corresponding to your own laptop methods by
-further editing the source (see the 'sony_nc_values' table, and add a new
-entry to this table with your get/set method names using the
-SNC_HANDLE_NAMES macro).
-
-Your mission, should you accept it, is to try finding out what
-those entries are for, by reading/writing random values from/to those
-files and find out what is the impact on your laptop.
-
-Should you find anything interesting, please report it back to me,
-I will not disavow all knowledge of your actions :)
-
-See also http://www.linux.it/~malattia/wiki/index.php/Sony_drivers for other
-useful info.
-
-Bugs/Limitations:
------------------
-
-* This driver is not based on official documentation from Sony
-  (because there is none), so there is no guarantee this driver
-  will work at all, or do the right thing. Although this hasn't
-  happened to me, this driver could do very bad things to your
-  laptop, including permanent damage.
-
-* The sony-laptop and sonypi drivers do not interact at all. In the
-  future, sonypi will be removed and replaced by sony-laptop.
-
-* spicctrl, which is the userspace tool used to communicate with the
-  sonypi driver (through /dev/sonypi) is deprecated as well since all
-  its features are now available under the sysfs tree via sony-laptop.
diff --git a/Documentation/laptops/sonypi.rst b/Documentation/laptops/sonypi.rst
deleted file mode 100644
index 2a1975ed7ee4..000000000000
--- a/Documentation/laptops/sonypi.rst
+++ /dev/null
@@ -1,160 +0,0 @@
-==================================================
-Sony Programmable I/O Control Device Driver Readme
-==================================================
-
-	- Copyright (C) 2001-2004 Stelian Pop <stelian@popies.net>
-	- Copyright (C) 2001-2002 Alcôve <www.alcove.com>
-	- Copyright (C) 2001 Michael Ashley <m.ashley@unsw.edu.au>
-	- Copyright (C) 2001 Junichi Morita <jun1m@mars.dti.ne.jp>
-	- Copyright (C) 2000 Takaya Kinjo <t-kinjo@tc4.so-net.ne.jp>
-	- Copyright (C) 2000 Andrew Tridgell <tridge@samba.org>
-
-This driver enables access to the Sony Programmable I/O Control Device which
-can be found in many Sony Vaio laptops. Some newer Sony laptops (seems to be
-limited to new FX series laptops, at least the FX501 and the FX702) lack a
-sonypi device and are not supported at all by this driver.
-
-It will give access (through a user space utility) to some events those laptops
-generate, like:
-
-	- jogdial events (the small wheel on the side of Vaios)
-	- capture button events (only on Vaio Picturebook series)
-	- Fn keys
-	- bluetooth button (only on C1VR model)
-	- programmable keys, back, help, zoom, thumbphrase buttons, etc.
-	  (when available)
-
-Those events (see linux/sonypi.h) can be polled using the character device node
-/dev/sonypi (major 10, minor auto allocated or specified as a option).
-A simple daemon which translates the jogdial movements into mouse wheel events
-can be downloaded at: <http://popies.net/sonypi/>
-
-Another option to intercept the events is to get them directly through the
-input layer.
-
-This driver supports also some ioctl commands for setting the LCD screen
-brightness and querying the batteries charge information (some more
-commands may be added in the future).
-
-This driver can also be used to set the camera controls on Picturebook series
-(brightness, contrast etc), and is used by the video4linux driver for the
-Motion Eye camera.
-
-Please note that this driver was created by reverse engineering the Windows
-driver and the ACPI BIOS, because Sony doesn't agree to release any programming
-specs for its laptops. If someone convinces them to do so, drop me a note.
-
-Driver options:
----------------
-
-Several options can be passed to the sonypi driver using the standard
-module argument syntax (<param>=<value> when passing the option to the
-module or sonypi.<param>=<value> on the kernel boot line when sonypi is
-statically linked into the kernel). Those options are:
-
-	=============== =======================================================
-	minor: 		minor number of the misc device /dev/sonypi,
-			default is -1 (automatic allocation, see /proc/misc
-			or kernel logs)
-
-	camera:		if you have a PictureBook series Vaio (with the
-			integrated MotionEye camera), set this parameter to 1
-			in order to let the driver access to the camera
-
-	fnkeyinit:	on some Vaios (C1VE, C1VR etc), the Fn key events don't
-			get enabled unless you set this parameter to 1.
-			Do not use this option unless it's actually necessary,
-			some Vaio models don't deal well with this option.
-			This option is available only if the kernel is
-			compiled without ACPI support (since it conflicts
-			with it and it shouldn't be required anyway if
-			ACPI is already enabled).
-
-	verbose:	set to 1 to print unknown events received from the
-			sonypi device.
-			set to 2 to print all events received from the
-			sonypi device.
-
-	compat:		uses some compatibility code for enabling the sonypi
-			events. If the driver worked for you in the past
-			(prior to version 1.5) and does not work anymore,
-			add this option and report to the author.
-
-	mask:		event mask telling the driver what events will be
-			reported to the user. This parameter is required for
-			some Vaio models where the hardware reuses values
-			used in other Vaio models (like the FX series who does
-			not have a jogdial but reuses the jogdial events for
-			programmable keys events). The default event mask is
-			set to 0xffffffff, meaning that all possible events
-			will be tried. You can use the following bits to
-			construct your own event mask (from
-			drivers/char/sonypi.h):
-
-				========================	======
-				SONYPI_JOGGER_MASK 		0x0001
-				SONYPI_CAPTURE_MASK 		0x0002
-				SONYPI_FNKEY_MASK 		0x0004
-				SONYPI_BLUETOOTH_MASK 		0x0008
-				SONYPI_PKEY_MASK 		0x0010
-				SONYPI_BACK_MASK 		0x0020
-				SONYPI_HELP_MASK 		0x0040
-				SONYPI_LID_MASK 		0x0080
-				SONYPI_ZOOM_MASK 		0x0100
-				SONYPI_THUMBPHRASE_MASK 	0x0200
-				SONYPI_MEYE_MASK		0x0400
-				SONYPI_MEMORYSTICK_MASK		0x0800
-				SONYPI_BATTERY_MASK		0x1000
-				SONYPI_WIRELESS_MASK		0x2000
-				========================	======
-
-	useinput:	if set (which is the default) two input devices are
-			created, one which interprets the jogdial events as
-			mouse events, the other one which acts like a
-			keyboard reporting the pressing of the special keys.
-	=============== =======================================================
-
-Module use:
------------
-
-In order to automatically load the sonypi module on use, you can put those
-lines a configuration file in /etc/modprobe.d/::
-
-	alias char-major-10-250 sonypi
-	options sonypi minor=250
-
-This supposes the use of minor 250 for the sonypi device::
-
-	# mknod /dev/sonypi c 10 250
-
-Bugs:
------
-
-	- several users reported that this driver disables the BIOS-managed
-	  Fn-keys which put the laptop in sleeping state, or switch the
-	  external monitor on/off. There is no workaround yet, since this
-	  driver disables all APM management for those keys, by enabling the
-	  ACPI management (and the ACPI core stuff is not complete yet). If
-	  you have one of those laptops with working Fn keys and want to
-	  continue to use them, don't use this driver.
-
-	- some users reported that the laptop speed is lower (dhrystone
-	  tested) when using the driver with the fnkeyinit parameter. I cannot
-	  reproduce it on my laptop and not all users have this problem.
-	  This happens because the fnkeyinit parameter enables the ACPI
-	  mode (but without additional ACPI control, like processor
-	  speed handling etc). Use ACPI instead of APM if it works on your
-	  laptop.
-
-	- sonypi lacks the ability to distinguish between certain key
-	  events on some models.
-
-	- some models with the nvidia card (geforce go 6200 tc) uses a
-	  different way to adjust the backlighting of the screen. There
-	  is a userspace utility to adjust the brightness on those models,
-	  which can be downloaded from
-	  http://www.acc.umu.se/~erikw/program/smartdimmer-0.1.tar.bz2
-
-	- since all development was done by reverse engineering, there is
-	  *absolutely no guarantee* that this driver will not crash your
-	  laptop. Permanently.
diff --git a/Documentation/laptops/thinkpad-acpi.rst b/Documentation/laptops/thinkpad-acpi.rst
deleted file mode 100644
index 19d52fc3c5e9..000000000000
--- a/Documentation/laptops/thinkpad-acpi.rst
+++ /dev/null
@@ -1,1562 +0,0 @@
-===========================
-ThinkPad ACPI Extras Driver
-===========================
-
-Version 0.25
-
-October 16th,  2013
-
-- Borislav Deianov <borislav@users.sf.net>
-- Henrique de Moraes Holschuh <hmh@hmh.eng.br>
-
-http://ibm-acpi.sf.net/
-
-This is a Linux driver for the IBM and Lenovo ThinkPad laptops. It
-supports various features of these laptops which are accessible
-through the ACPI and ACPI EC framework, but not otherwise fully
-supported by the generic Linux ACPI drivers.
-
-This driver used to be named ibm-acpi until kernel 2.6.21 and release
-0.13-20070314.  It used to be in the drivers/acpi tree, but it was
-moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel
-2.6.22, and release 0.14.  It was moved to drivers/platform/x86 for
-kernel 2.6.29 and release 0.22.
-
-The driver is named "thinkpad-acpi".  In some places, like module
-names and log messages, "thinkpad_acpi" is used because of userspace
-issues.
-
-"tpacpi" is used as a shorthand where "thinkpad-acpi" would be too
-long due to length limitations on some Linux kernel versions.
-
-Status
-------
-
-The features currently supported are the following (see below for
-detailed description):
-
-	- Fn key combinations
-	- Bluetooth enable and disable
-	- video output switching, expansion control
-	- ThinkLight on and off
-	- CMOS/UCMS control
-	- LED control
-	- ACPI sounds
-	- temperature sensors
-	- Experimental: embedded controller register dump
-	- LCD brightness control
-	- Volume control
-	- Fan control and monitoring: fan speed, fan enable/disable
-	- WAN enable and disable
-	- UWB enable and disable
-
-A compatibility table by model and feature is maintained on the web
-site, http://ibm-acpi.sf.net/. I appreciate any success or failure
-reports, especially if they add to or correct the compatibility table.
-Please include the following information in your report:
-
-	- ThinkPad model name
-	- a copy of your ACPI tables, using the "acpidump" utility
-	- a copy of the output of dmidecode, with serial numbers
-	  and UUIDs masked off
-	- which driver features work and which don't
-	- the observed behavior of non-working features
-
-Any other comments or patches are also more than welcome.
-
-
-Installation
-------------
-
-If you are compiling this driver as included in the Linux kernel
-sources, look for the CONFIG_THINKPAD_ACPI Kconfig option.
-It is located on the menu path: "Device Drivers" -> "X86 Platform
-Specific Device Drivers" -> "ThinkPad ACPI Laptop Extras".
-
-
-Features
---------
-
-The driver exports two different interfaces to userspace, which can be
-used to access the features it provides.  One is a legacy procfs-based
-interface, which will be removed at some time in the future.  The other
-is a new sysfs-based interface which is not complete yet.
-
-The procfs interface creates the /proc/acpi/ibm directory.  There is a
-file under that directory for each feature it supports.  The procfs
-interface is mostly frozen, and will change very little if at all: it
-will not be extended to add any new functionality in the driver, instead
-all new functionality will be implemented on the sysfs interface.
-
-The sysfs interface tries to blend in the generic Linux sysfs subsystems
-and classes as much as possible.  Since some of these subsystems are not
-yet ready or stabilized, it is expected that this interface will change,
-and any and all userspace programs must deal with it.
-
-
-Notes about the sysfs interface
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Unlike what was done with the procfs interface, correctness when talking
-to the sysfs interfaces will be enforced, as will correctness in the
-thinkpad-acpi's implementation of sysfs interfaces.
-
-Also, any bugs in the thinkpad-acpi sysfs driver code or in the
-thinkpad-acpi's implementation of the sysfs interfaces will be fixed for
-maximum correctness, even if that means changing an interface in
-non-compatible ways.  As these interfaces mature both in the kernel and
-in thinkpad-acpi, such changes should become quite rare.
-
-Applications interfacing to the thinkpad-acpi sysfs interfaces must
-follow all sysfs guidelines and correctly process all errors (the sysfs
-interface makes extensive use of errors).  File descriptors and open /
-close operations to the sysfs inodes must also be properly implemented.
-
-The version of thinkpad-acpi's sysfs interface is exported by the driver
-as a driver attribute (see below).
-
-Sysfs driver attributes are on the driver's sysfs attribute space,
-for 2.6.23+ this is /sys/bus/platform/drivers/thinkpad_acpi/ and
-/sys/bus/platform/drivers/thinkpad_hwmon/
-
-Sysfs device attributes are on the thinkpad_acpi device sysfs attribute
-space, for 2.6.23+ this is /sys/devices/platform/thinkpad_acpi/.
-
-Sysfs device attributes for the sensors and fan are on the
-thinkpad_hwmon device's sysfs attribute space, but you should locate it
-looking for a hwmon device with the name attribute of "thinkpad", or
-better yet, through libsensors. For 4.14+ sysfs attributes were moved to the
-hwmon device (/sys/bus/platform/devices/thinkpad_hwmon/hwmon/hwmon? or
-/sys/class/hwmon/hwmon?).
-
-Driver version
---------------
-
-procfs: /proc/acpi/ibm/driver
-
-sysfs driver attribute: version
-
-The driver name and version. No commands can be written to this file.
-
-
-Sysfs interface version
------------------------
-
-sysfs driver attribute: interface_version
-
-Version of the thinkpad-acpi sysfs interface, as an unsigned long
-(output in hex format: 0xAAAABBCC), where:
-
-	AAAA
-	  - major revision
-	BB
-	  - minor revision
-	CC
-	  - bugfix revision
-
-The sysfs interface version changelog for the driver can be found at the
-end of this document.  Changes to the sysfs interface done by the kernel
-subsystems are not documented here, nor are they tracked by this
-attribute.
-
-Changes to the thinkpad-acpi sysfs interface are only considered
-non-experimental when they are submitted to Linux mainline, at which
-point the changes in this interface are documented and interface_version
-may be updated.  If you are using any thinkpad-acpi features not yet
-sent to mainline for merging, you do so on your own risk: these features
-may disappear, or be implemented in a different and incompatible way by
-the time they are merged in Linux mainline.
-
-Changes that are backwards-compatible by nature (e.g. the addition of
-attributes that do not change the way the other attributes work) do not
-always warrant an update of interface_version.  Therefore, one must
-expect that an attribute might not be there, and deal with it properly
-(an attribute not being there *is* a valid way to make it clear that a
-feature is not available in sysfs).
-
-
-Hot keys
---------
-
-procfs: /proc/acpi/ibm/hotkey
-
-sysfs device attribute: hotkey_*
-
-In a ThinkPad, the ACPI HKEY handler is responsible for communicating
-some important events and also keyboard hot key presses to the operating
-system.  Enabling the hotkey functionality of thinkpad-acpi signals the
-firmware that such a driver is present, and modifies how the ThinkPad
-firmware will behave in many situations.
-
-The driver enables the HKEY ("hot key") event reporting automatically
-when loaded, and disables it when it is removed.
-
-The driver will report HKEY events in the following format::
-
-	ibm/hotkey HKEY 00000080 0000xxxx
-
-Some of these events refer to hot key presses, but not all of them.
-
-The driver will generate events over the input layer for hot keys and
-radio switches, and over the ACPI netlink layer for other events.  The
-input layer support accepts the standard IOCTLs to remap the keycodes
-assigned to each hot key.
-
-The hot key bit mask allows some control over which hot keys generate
-events.  If a key is "masked" (bit set to 0 in the mask), the firmware
-will handle it.  If it is "unmasked", it signals the firmware that
-thinkpad-acpi would prefer to handle it, if the firmware would be so
-kind to allow it (and it often doesn't!).
-
-Not all bits in the mask can be modified.  Not all bits that can be
-modified do anything.  Not all hot keys can be individually controlled
-by the mask.  Some models do not support the mask at all.  The behaviour
-of the mask is, therefore, highly dependent on the ThinkPad model.
-
-The driver will filter out any unmasked hotkeys, so even if the firmware
-doesn't allow disabling an specific hotkey, the driver will not report
-events for unmasked hotkeys.
-
-Note that unmasking some keys prevents their default behavior.  For
-example, if Fn+F5 is unmasked, that key will no longer enable/disable
-Bluetooth by itself in firmware.
-
-Note also that not all Fn key combinations are supported through ACPI
-depending on the ThinkPad model and firmware version.  On those
-ThinkPads, it is still possible to support some extra hotkeys by
-polling the "CMOS NVRAM" at least 10 times per second.  The driver
-attempts to enables this functionality automatically when required.
-
-procfs notes
-^^^^^^^^^^^^
-
-The following commands can be written to the /proc/acpi/ibm/hotkey file::
-
-	echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys
-	echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys
-	... any other 8-hex-digit mask ...
-	echo reset > /proc/acpi/ibm/hotkey -- restore the recommended mask
-
-The following commands have been deprecated and will cause the kernel
-to log a warning::
-
-	echo enable > /proc/acpi/ibm/hotkey -- does nothing
-	echo disable > /proc/acpi/ibm/hotkey -- returns an error
-
-The procfs interface does not support NVRAM polling control.  So as to
-maintain maximum bug-to-bug compatibility, it does not report any masks,
-nor does it allow one to manipulate the hot key mask when the firmware
-does not support masks at all, even if NVRAM polling is in use.
-
-sysfs notes
-^^^^^^^^^^^
-
-	hotkey_bios_enabled:
-		DEPRECATED, WILL BE REMOVED SOON.
-
-		Returns 0.
-
-	hotkey_bios_mask:
-		DEPRECATED, DON'T USE, WILL BE REMOVED IN THE FUTURE.
-
-		Returns the hot keys mask when thinkpad-acpi was loaded.
-		Upon module unload, the hot keys mask will be restored
-		to this value.   This is always 0x80c, because those are
-		the hotkeys that were supported by ancient firmware
-		without mask support.
-
-	hotkey_enable:
-		DEPRECATED, WILL BE REMOVED SOON.
-
-		0: returns -EPERM
-		1: does nothing
-
-	hotkey_mask:
-		bit mask to enable reporting (and depending on
-		the firmware, ACPI event generation) for each hot key
-		(see above).  Returns the current status of the hot keys
-		mask, and allows one to modify it.
-
-	hotkey_all_mask:
-		bit mask that should enable event reporting for all
-		supported hot keys, when echoed to hotkey_mask above.
-		Unless you know which events need to be handled
-		passively (because the firmware *will* handle them
-		anyway), do *not* use hotkey_all_mask.  Use
-		hotkey_recommended_mask, instead. You have been warned.
-
-	hotkey_recommended_mask:
-		bit mask that should enable event reporting for all
-		supported hot keys, except those which are always
-		handled by the firmware anyway.  Echo it to
-		hotkey_mask above, to use.  This is the default mask
-		used by the driver.
-
-	hotkey_source_mask:
-		bit mask that selects which hot keys will the driver
-		poll the NVRAM for.  This is auto-detected by the driver
-		based on the capabilities reported by the ACPI firmware,
-		but it can be overridden at runtime.
-
-		Hot keys whose bits are set in hotkey_source_mask are
-		polled for in NVRAM, and reported as hotkey events if
-		enabled in hotkey_mask.  Only a few hot keys are
-		available through CMOS NVRAM polling.
-
-		Warning: when in NVRAM mode, the volume up/down/mute
-		keys are synthesized according to changes in the mixer,
-		which uses a single volume up or volume down hotkey
-		press to unmute, as per the ThinkPad volume mixer user
-		interface.  When in ACPI event mode, volume up/down/mute
-		events are reported by the firmware and can behave
-		differently (and that behaviour changes with firmware
-		version -- not just with firmware models -- as well as
-		OSI(Linux) state).
-
-	hotkey_poll_freq:
-		frequency in Hz for hot key polling. It must be between
-		0 and 25 Hz.  Polling is only carried out when strictly
-		needed.
-
-		Setting hotkey_poll_freq to zero disables polling, and
-		will cause hot key presses that require NVRAM polling
-		to never be reported.
-
-		Setting hotkey_poll_freq too low may cause repeated
-		pressings of the same hot key to be misreported as a
-		single key press, or to not even be detected at all.
-		The recommended polling frequency is 10Hz.
-
-	hotkey_radio_sw:
-		If the ThinkPad has a hardware radio switch, this
-		attribute will read 0 if the switch is in the "radios
-		disabled" position, and 1 if the switch is in the
-		"radios enabled" position.
-
-		This attribute has poll()/select() support.
-
-	hotkey_tablet_mode:
-		If the ThinkPad has tablet capabilities, this attribute
-		will read 0 if the ThinkPad is in normal mode, and
-		1 if the ThinkPad is in tablet mode.
-
-		This attribute has poll()/select() support.
-
-	wakeup_reason:
-		Set to 1 if the system is waking up because the user
-		requested a bay ejection.  Set to 2 if the system is
-		waking up because the user requested the system to
-		undock.  Set to zero for normal wake-ups or wake-ups
-		due to unknown reasons.
-
-		This attribute has poll()/select() support.
-
-	wakeup_hotunplug_complete:
-		Set to 1 if the system was waken up because of an
-		undock or bay ejection request, and that request
-		was successfully completed.  At this point, it might
-		be useful to send the system back to sleep, at the
-		user's choice.  Refer to HKEY events 0x4003 and
-		0x3003, below.
-
-		This attribute has poll()/select() support.
-
-input layer notes
-^^^^^^^^^^^^^^^^^
-
-A Hot key is mapped to a single input layer EV_KEY event, possibly
-followed by an EV_MSC MSC_SCAN event that shall contain that key's scan
-code.  An EV_SYN event will always be generated to mark the end of the
-event block.
-
-Do not use the EV_MSC MSC_SCAN events to process keys.  They are to be
-used as a helper to remap keys, only.  They are particularly useful when
-remapping KEY_UNKNOWN keys.
-
-The events are available in an input device, with the following id:
-
-	==============  ==============================
-	Bus		BUS_HOST
-	vendor		0x1014 (PCI_VENDOR_ID_IBM)  or
-			0x17aa (PCI_VENDOR_ID_LENOVO)
-	product		0x5054 ("TP")
-	version		0x4101
-	==============  ==============================
-
-The version will have its LSB incremented if the keymap changes in a
-backwards-compatible way.  The MSB shall always be 0x41 for this input
-device.  If the MSB is not 0x41, do not use the device as described in
-this section, as it is either something else (e.g. another input device
-exported by a thinkpad driver, such as HDAPS) or its functionality has
-been changed in a non-backwards compatible way.
-
-Adding other event types for other functionalities shall be considered a
-backwards-compatible change for this input device.
-
-Thinkpad-acpi Hot Key event map (version 0x4101):
-
-=======	=======	==============	==============================================
-ACPI	Scan
-event	code	Key		Notes
-=======	=======	==============	==============================================
-0x1001	0x00	FN+F1		-
-
-0x1002	0x01	FN+F2		IBM: battery (rare)
-				Lenovo: Screen lock
-
-0x1003	0x02	FN+F3		Many IBM models always report
-				this hot key, even with hot keys
-				disabled or with Fn+F3 masked
-				off
-				IBM: screen lock, often turns
-				off the ThinkLight as side-effect
-				Lenovo: battery
-
-0x1004	0x03	FN+F4		Sleep button (ACPI sleep button
-				semantics, i.e. sleep-to-RAM).
-				It always generates some kind
-				of event, either the hot key
-				event or an ACPI sleep button
-				event. The firmware may
-				refuse to generate further FN+F4
-				key presses until a S3 or S4 ACPI
-				sleep cycle is performed or some
-				time passes.
-
-0x1005	0x04	FN+F5		Radio.  Enables/disables
-				the internal Bluetooth hardware
-				and W-WAN card if left in control
-				of the firmware.  Does not affect
-				the WLAN card.
-				Should be used to turn on/off all
-				radios (Bluetooth+W-WAN+WLAN),
-				really.
-
-0x1006	0x05	FN+F6		-
-
-0x1007	0x06	FN+F7		Video output cycle.
-				Do you feel lucky today?
-
-0x1008	0x07	FN+F8		IBM: toggle screen expand
-				Lenovo: configure UltraNav,
-				or toggle screen expand
-
-0x1009	0x08	FN+F9		-
-
-...	...	...		...
-
-0x100B	0x0A	FN+F11		-
-
-0x100C	0x0B	FN+F12		Sleep to disk.  You are always
-				supposed to handle it yourself,
-				either through the ACPI event,
-				or through a hotkey event.
-				The firmware may refuse to
-				generate further FN+F12 key
-				press events until a S3 or S4
-				ACPI sleep cycle is performed,
-				or some time passes.
-
-0x100D	0x0C	FN+BACKSPACE	-
-0x100E	0x0D	FN+INSERT	-
-0x100F	0x0E	FN+DELETE	-
-
-0x1010	0x0F	FN+HOME		Brightness up.  This key is
-				always handled by the firmware
-				in IBM ThinkPads, even when
-				unmasked.  Just leave it alone.
-				For Lenovo ThinkPads with a new
-				BIOS, it has to be handled either
-				by the ACPI OSI, or by userspace.
-				The driver does the right thing,
-				never mess with this.
-0x1011	0x10	FN+END		Brightness down.  See brightness
-				up for details.
-
-0x1012	0x11	FN+PGUP		ThinkLight toggle.  This key is
-				always handled by the firmware,
-				even when unmasked.
-
-0x1013	0x12	FN+PGDOWN	-
-
-0x1014	0x13	FN+SPACE	Zoom key
-
-0x1015	0x14	VOLUME UP	Internal mixer volume up. This
-				key is always handled by the
-				firmware, even when unmasked.
-				NOTE: Lenovo seems to be changing
-				this.
-0x1016	0x15	VOLUME DOWN	Internal mixer volume up. This
-				key is always handled by the
-				firmware, even when unmasked.
-				NOTE: Lenovo seems to be changing
-				this.
-0x1017	0x16	MUTE		Mute internal mixer. This
-				key is always handled by the
-				firmware, even when unmasked.
-
-0x1018	0x17	THINKPAD	ThinkPad/Access IBM/Lenovo key
-
-0x1019	0x18	unknown
-
-...	...	...
-
-0x1020	0x1F	unknown
-=======	=======	==============	==============================================
-
-The ThinkPad firmware does not allow one to differentiate when most hot
-keys are pressed or released (either that, or we don't know how to, yet).
-For these keys, the driver generates a set of events for a key press and
-immediately issues the same set of events for a key release.  It is
-unknown by the driver if the ThinkPad firmware triggered these events on
-hot key press or release, but the firmware will do it for either one, not
-both.
-
-If a key is mapped to KEY_RESERVED, it generates no input events at all.
-If a key is mapped to KEY_UNKNOWN, it generates an input event that
-includes an scan code.  If a key is mapped to anything else, it will
-generate input device EV_KEY events.
-
-In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW
-events for switches:
-
-==============	==============================================
-SW_RFKILL_ALL	T60 and later hardware rfkill rocker switch
-SW_TABLET_MODE	Tablet ThinkPads HKEY events 0x5009 and 0x500A
-==============	==============================================
-
-Non hotkey ACPI HKEY event map
-------------------------------
-
-Events that are never propagated by the driver:
-
-======		==================================================
-0x2304		System is waking up from suspend to undock
-0x2305		System is waking up from suspend to eject bay
-0x2404		System is waking up from hibernation to undock
-0x2405		System is waking up from hibernation to eject bay
-0x5001		Lid closed
-0x5002		Lid opened
-0x5009		Tablet swivel: switched to tablet mode
-0x500A		Tablet swivel: switched to normal mode
-0x5010		Brightness level changed/control event
-0x6000		KEYBOARD: Numlock key pressed
-0x6005		KEYBOARD: Fn key pressed (TO BE VERIFIED)
-0x7000		Radio Switch may have changed state
-======		==================================================
-
-
-Events that are propagated by the driver to userspace:
-
-======		=====================================================
-0x2313		ALARM: System is waking up from suspend because
-		the battery is nearly empty
-0x2413		ALARM: System is waking up from hibernation because
-		the battery is nearly empty
-0x3003		Bay ejection (see 0x2x05) complete, can sleep again
-0x3006		Bay hotplug request (hint to power up SATA link when
-		the optical drive tray is ejected)
-0x4003		Undocked (see 0x2x04), can sleep again
-0x4010		Docked into hotplug port replicator (non-ACPI dock)
-0x4011		Undocked from hotplug port replicator (non-ACPI dock)
-0x500B		Tablet pen inserted into its storage bay
-0x500C		Tablet pen removed from its storage bay
-0x6011		ALARM: battery is too hot
-0x6012		ALARM: battery is extremely hot
-0x6021		ALARM: a sensor is too hot
-0x6022		ALARM: a sensor is extremely hot
-0x6030		System thermal table changed
-0x6032		Thermal Control command set completion  (DYTC, Windows)
-0x6040		Nvidia Optimus/AC adapter related (TO BE VERIFIED)
-0x60C0		X1 Yoga 2016, Tablet mode status changed
-0x60F0		Thermal Transformation changed (GMTS, Windows)
-======		=====================================================
-
-Battery nearly empty alarms are a last resort attempt to get the
-operating system to hibernate or shutdown cleanly (0x2313), or shutdown
-cleanly (0x2413) before power is lost.  They must be acted upon, as the
-wake up caused by the firmware will have negated most safety nets...
-
-When any of the "too hot" alarms happen, according to Lenovo the user
-should suspend or hibernate the laptop (and in the case of battery
-alarms, unplug the AC adapter) to let it cool down.  These alarms do
-signal that something is wrong, they should never happen on normal
-operating conditions.
-
-The "extremely hot" alarms are emergencies.  According to Lenovo, the
-operating system is to force either an immediate suspend or hibernate
-cycle, or a system shutdown.  Obviously, something is very wrong if this
-happens.
-
-
-Brightness hotkey notes
-^^^^^^^^^^^^^^^^^^^^^^^
-
-Don't mess with the brightness hotkeys in a Thinkpad.  If you want
-notifications for OSD, use the sysfs backlight class event support.
-
-The driver will issue KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN events
-automatically for the cases were userspace has to do something to
-implement brightness changes.  When you override these events, you will
-either fail to handle properly the ThinkPads that require explicit
-action to change backlight brightness, or the ThinkPads that require
-that no action be taken to work properly.
-
-
-Bluetooth
----------
-
-procfs: /proc/acpi/ibm/bluetooth
-
-sysfs device attribute: bluetooth_enable (deprecated)
-
-sysfs rfkill class: switch "tpacpi_bluetooth_sw"
-
-This feature shows the presence and current state of a ThinkPad
-Bluetooth device in the internal ThinkPad CDC slot.
-
-If the ThinkPad supports it, the Bluetooth state is stored in NVRAM,
-so it is kept across reboots and power-off.
-
-Procfs notes
-^^^^^^^^^^^^
-
-If Bluetooth is installed, the following commands can be used::
-
-	echo enable > /proc/acpi/ibm/bluetooth
-	echo disable > /proc/acpi/ibm/bluetooth
-
-Sysfs notes
-^^^^^^^^^^^
-
-	If the Bluetooth CDC card is installed, it can be enabled /
-	disabled through the "bluetooth_enable" thinkpad-acpi device
-	attribute, and its current status can also be queried.
-
-	enable:
-
-		- 0: disables Bluetooth / Bluetooth is disabled
-		- 1: enables Bluetooth / Bluetooth is enabled.
-
-	Note: this interface has been superseded by the	generic rfkill
-	class.  It has been deprecated, and it will be removed in year
-	2010.
-
-	rfkill controller switch "tpacpi_bluetooth_sw": refer to
-	Documentation/rfkill.txt for details.
-
-
-Video output control -- /proc/acpi/ibm/video
---------------------------------------------
-
-This feature allows control over the devices used for video output -
-LCD, CRT or DVI (if available). The following commands are available::
-
-	echo lcd_enable > /proc/acpi/ibm/video
-	echo lcd_disable > /proc/acpi/ibm/video
-	echo crt_enable > /proc/acpi/ibm/video
-	echo crt_disable > /proc/acpi/ibm/video
-	echo dvi_enable > /proc/acpi/ibm/video
-	echo dvi_disable > /proc/acpi/ibm/video
-	echo auto_enable > /proc/acpi/ibm/video
-	echo auto_disable > /proc/acpi/ibm/video
-	echo expand_toggle > /proc/acpi/ibm/video
-	echo video_switch > /proc/acpi/ibm/video
-
-NOTE:
-  Access to this feature is restricted to processes owning the
-  CAP_SYS_ADMIN capability for safety reasons, as it can interact badly
-  enough with some versions of X.org to crash it.
-
-Each video output device can be enabled or disabled individually.
-Reading /proc/acpi/ibm/video shows the status of each device.
-
-Automatic video switching can be enabled or disabled.  When automatic
-video switching is enabled, certain events (e.g. opening the lid,
-docking or undocking) cause the video output device to change
-automatically. While this can be useful, it also causes flickering
-and, on the X40, video corruption. By disabling automatic switching,
-the flickering or video corruption can be avoided.
-
-The video_switch command cycles through the available video outputs
-(it simulates the behavior of Fn-F7).
-
-Video expansion can be toggled through this feature. This controls
-whether the display is expanded to fill the entire LCD screen when a
-mode with less than full resolution is used. Note that the current
-video expansion status cannot be determined through this feature.
-
-Note that on many models (particularly those using Radeon graphics
-chips) the X driver configures the video card in a way which prevents
-Fn-F7 from working. This also disables the video output switching
-features of this driver, as it uses the same ACPI methods as
-Fn-F7. Video switching on the console should still work.
-
-UPDATE: refer to https://bugs.freedesktop.org/show_bug.cgi?id=2000
-
-
-ThinkLight control
-------------------
-
-procfs: /proc/acpi/ibm/light
-
-sysfs attributes: as per LED class, for the "tpacpi::thinklight" LED
-
-procfs notes
-^^^^^^^^^^^^
-
-The ThinkLight status can be read and set through the procfs interface.  A
-few models which do not make the status available will show the ThinkLight
-status as "unknown". The available commands are::
-
-	echo on  > /proc/acpi/ibm/light
-	echo off > /proc/acpi/ibm/light
-
-sysfs notes
-^^^^^^^^^^^
-
-The ThinkLight sysfs interface is documented by the LED class
-documentation, in Documentation/leds/leds-class.rst.  The ThinkLight LED name
-is "tpacpi::thinklight".
-
-Due to limitations in the sysfs LED class, if the status of the ThinkLight
-cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
-It is impossible to know if the status returned through sysfs is valid.
-
-
-CMOS/UCMS control
------------------
-
-procfs: /proc/acpi/ibm/cmos
-
-sysfs device attribute: cmos_command
-
-This feature is mostly used internally by the ACPI firmware to keep the legacy
-CMOS NVRAM bits in sync with the current machine state, and to record this
-state so that the ThinkPad will retain such settings across reboots.
-
-Some of these commands actually perform actions in some ThinkPad models, but
-this is expected to disappear more and more in newer models.  As an example, in
-a T43 and in a X40, commands 12 and 13 still control the ThinkLight state for
-real, but commands 0 to 2 don't control the mixer anymore (they have been
-phased out) and just update the NVRAM.
-
-The range of valid cmos command numbers is 0 to 21, but not all have an
-effect and the behavior varies from model to model.  Here is the behavior
-on the X40 (tpb is the ThinkPad Buttons utility):
-
-	- 0 - Related to "Volume down" key press
-	- 1 - Related to "Volume up" key press
-	- 2 - Related to "Mute on" key press
-	- 3 - Related to "Access IBM" key press
-	- 4 - Related to "LCD brightness up" key press
-	- 5 - Related to "LCD brightness down" key press
-	- 11 - Related to "toggle screen expansion" key press/function
-	- 12 - Related to "ThinkLight on"
-	- 13 - Related to "ThinkLight off"
-	- 14 - Related to "ThinkLight" key press (toggle ThinkLight)
-
-The cmos command interface is prone to firmware split-brain problems, as
-in newer ThinkPads it is just a compatibility layer.  Do not use it, it is
-exported just as a debug tool.
-
-
-LED control
------------
-
-procfs: /proc/acpi/ibm/led
-sysfs attributes: as per LED class, see below for names
-
-Some of the LED indicators can be controlled through this feature.  On
-some older ThinkPad models, it is possible to query the status of the
-LED indicators as well.  Newer ThinkPads cannot query the real status
-of the LED indicators.
-
-Because misuse of the LEDs could induce an unaware user to perform
-dangerous actions (like undocking or ejecting a bay device while the
-buses are still active), or mask an important alarm (such as a nearly
-empty battery, or a broken battery), access to most LEDs is
-restricted.
-
-Unrestricted access to all LEDs requires that thinkpad-acpi be
-compiled with the CONFIG_THINKPAD_ACPI_UNSAFE_LEDS option enabled.
-Distributions must never enable this option.  Individual users that
-are aware of the consequences are welcome to enabling it.
-
-Audio mute and microphone mute LEDs are supported, but currently not
-visible to userspace. They are used by the snd-hda-intel audio driver.
-
-procfs notes
-^^^^^^^^^^^^
-
-The available commands are::
-
-	echo '<LED number> on' >/proc/acpi/ibm/led
-	echo '<LED number> off' >/proc/acpi/ibm/led
-	echo '<LED number> blink' >/proc/acpi/ibm/led
-
-The <LED number> range is 0 to 15. The set of LEDs that can be
-controlled varies from model to model. Here is the common ThinkPad
-mapping:
-
-	- 0 - power
-	- 1 - battery (orange)
-	- 2 - battery (green)
-	- 3 - UltraBase/dock
-	- 4 - UltraBay
-	- 5 - UltraBase battery slot
-	- 6 - (unknown)
-	- 7 - standby
-	- 8 - dock status 1
-	- 9 - dock status 2
-	- 10, 11 - (unknown)
-	- 12 - thinkvantage
-	- 13, 14, 15 - (unknown)
-
-All of the above can be turned on and off and can be made to blink.
-
-sysfs notes
-^^^^^^^^^^^
-
-The ThinkPad LED sysfs interface is described in detail by the LED class
-documentation, in Documentation/leds/leds-class.rst.
-
-The LEDs are named (in LED ID order, from 0 to 12):
-"tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt",
-"tpacpi::dock_active", "tpacpi::bay_active", "tpacpi::dock_batt",
-"tpacpi::unknown_led", "tpacpi::standby", "tpacpi::dock_status1",
-"tpacpi::dock_status2", "tpacpi::unknown_led2", "tpacpi::unknown_led3",
-"tpacpi::thinkvantage".
-
-Due to limitations in the sysfs LED class, if the status of the LED
-indicators cannot be read due to an error, thinkpad-acpi will report it as
-a brightness of zero (same as LED off).
-
-If the thinkpad firmware doesn't support reading the current status,
-trying to read the current LED brightness will just return whatever
-brightness was last written to that attribute.
-
-These LEDs can blink using hardware acceleration.  To request that a
-ThinkPad indicator LED should blink in hardware accelerated mode, use the
-"timer" trigger, and leave the delay_on and delay_off parameters set to
-zero (to request hardware acceleration autodetection).
-
-LEDs that are known not to exist in a given ThinkPad model are not
-made available through the sysfs interface.  If you have a dock and you
-notice there are LEDs listed for your ThinkPad that do not exist (and
-are not in the dock), or if you notice that there are missing LEDs,
-a report to ibm-acpi-devel@lists.sourceforge.net is appreciated.
-
-
-ACPI sounds -- /proc/acpi/ibm/beep
-----------------------------------
-
-The BEEP method is used internally by the ACPI firmware to provide
-audible alerts in various situations. This feature allows the same
-sounds to be triggered manually.
-
-The commands are non-negative integer numbers::
-
-	echo <number> >/proc/acpi/ibm/beep
-
-The valid <number> range is 0 to 17. Not all numbers trigger sounds
-and the sounds vary from model to model. Here is the behavior on the
-X40:
-
-	- 0 - stop a sound in progress (but use 17 to stop 16)
-	- 2 - two beeps, pause, third beep ("low battery")
-	- 3 - single beep
-	- 4 - high, followed by low-pitched beep ("unable")
-	- 5 - single beep
-	- 6 - very high, followed by high-pitched beep ("AC/DC")
-	- 7 - high-pitched beep
-	- 9 - three short beeps
-	- 10 - very long beep
-	- 12 - low-pitched beep
-	- 15 - three high-pitched beeps repeating constantly, stop with 0
-	- 16 - one medium-pitched beep repeating constantly, stop with 17
-	- 17 - stop 16
-
-
-Temperature sensors
--------------------
-
-procfs: /proc/acpi/ibm/thermal
-
-sysfs device attributes: (hwmon "thinkpad") temp*_input
-
-Most ThinkPads include six or more separate temperature sensors but only
-expose the CPU temperature through the standard ACPI methods.  This
-feature shows readings from up to eight different sensors on older
-ThinkPads, and up to sixteen different sensors on newer ThinkPads.
-
-For example, on the X40, a typical output may be:
-
-temperatures:
-	42 42 45 41 36 -128 33 -128
-
-On the T43/p, a typical output may be:
-
-temperatures:
-	48 48 36 52 38 -128 31 -128 48 52 48 -128 -128 -128 -128 -128
-
-The mapping of thermal sensors to physical locations varies depending on
-system-board model (and thus, on ThinkPad model).
-
-http://thinkwiki.org/wiki/Thermal_Sensors is a public wiki page that
-tries to track down these locations for various models.
-
-Most (newer?) models seem to follow this pattern:
-
-- 1:  CPU
-- 2:  (depends on model)
-- 3:  (depends on model)
-- 4:  GPU
-- 5:  Main battery: main sensor
-- 6:  Bay battery: main sensor
-- 7:  Main battery: secondary sensor
-- 8:  Bay battery: secondary sensor
-- 9-15: (depends on model)
-
-For the R51 (source: Thomas Gruber):
-
-- 2:  Mini-PCI
-- 3:  Internal HDD
-
-For the T43, T43/p (source: Shmidoax/Thinkwiki.org)
-http://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_T43.2C_T43p
-
-- 2:  System board, left side (near PCMCIA slot), reported as HDAPS temp
-- 3:  PCMCIA slot
-- 9:  MCH (northbridge) to DRAM Bus
-- 10: Clock-generator, mini-pci card and ICH (southbridge), under Mini-PCI
-      card, under touchpad
-- 11: Power regulator, underside of system board, below F2 key
-
-The A31 has a very atypical layout for the thermal sensors
-(source: Milos Popovic, http://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_A31)
-
-- 1:  CPU
-- 2:  Main Battery: main sensor
-- 3:  Power Converter
-- 4:  Bay Battery: main sensor
-- 5:  MCH (northbridge)
-- 6:  PCMCIA/ambient
-- 7:  Main Battery: secondary sensor
-- 8:  Bay Battery: secondary sensor
-
-
-Procfs notes
-^^^^^^^^^^^^
-
-	Readings from sensors that are not available return -128.
-	No commands can be written to this file.
-
-Sysfs notes
-^^^^^^^^^^^
-
-	Sensors that are not available return the ENXIO error.  This
-	status may change at runtime, as there are hotplug thermal
-	sensors, like those inside the batteries and docks.
-
-	thinkpad-acpi thermal sensors are reported through the hwmon
-	subsystem, and follow all of the hwmon guidelines at
-	Documentation/hwmon.
-
-EXPERIMENTAL: Embedded controller register dump
------------------------------------------------
-
-This feature is not included in the thinkpad driver anymore.
-Instead the EC can be accessed through /sys/kernel/debug/ec with
-a userspace tool which can be found here:
-ftp://ftp.suse.com/pub/people/trenn/sources/ec
-
-Use it to determine the register holding the fan
-speed on some models. To do that, do the following:
-
-	- make sure the battery is fully charged
-	- make sure the fan is running
-	- use above mentioned tool to read out the EC
-
-Often fan and temperature values vary between
-readings. Since temperatures don't change vary fast, you can take
-several quick dumps to eliminate them.
-
-You can use a similar method to figure out the meaning of other
-embedded controller registers - e.g. make sure nothing else changes
-except the charging or discharging battery to determine which
-registers contain the current battery capacity, etc. If you experiment
-with this, do send me your results (including some complete dumps with
-a description of the conditions when they were taken.)
-
-
-LCD brightness control
-----------------------
-
-procfs: /proc/acpi/ibm/brightness
-
-sysfs backlight device "thinkpad_screen"
-
-This feature allows software control of the LCD brightness on ThinkPad
-models which don't have a hardware brightness slider.
-
-It has some limitations: the LCD backlight cannot be actually turned
-on or off by this interface, it just controls the backlight brightness
-level.
-
-On IBM (and some of the earlier Lenovo) ThinkPads, the backlight control
-has eight brightness levels, ranging from 0 to 7.  Some of the levels
-may not be distinct.  Later Lenovo models that implement the ACPI
-display backlight brightness control methods have 16 levels, ranging
-from 0 to 15.
-
-For IBM ThinkPads, there are two interfaces to the firmware for direct
-brightness control, EC and UCMS (or CMOS).  To select which one should be
-used, use the brightness_mode module parameter: brightness_mode=1 selects
-EC mode, brightness_mode=2 selects UCMS mode, brightness_mode=3 selects EC
-mode with NVRAM backing (so that brightness changes are remembered across
-shutdown/reboot).
-
-The driver tries to select which interface to use from a table of
-defaults for each ThinkPad model.  If it makes a wrong choice, please
-report this as a bug, so that we can fix it.
-
-Lenovo ThinkPads only support brightness_mode=2 (UCMS).
-
-When display backlight brightness controls are available through the
-standard ACPI interface, it is best to use it instead of this direct
-ThinkPad-specific interface.  The driver will disable its native
-backlight brightness control interface if it detects that the standard
-ACPI interface is available in the ThinkPad.
-
-If you want to use the thinkpad-acpi backlight brightness control
-instead of the generic ACPI video backlight brightness control for some
-reason, you should use the acpi_backlight=vendor kernel parameter.
-
-The brightness_enable module parameter can be used to control whether
-the LCD brightness control feature will be enabled when available.
-brightness_enable=0 forces it to be disabled.  brightness_enable=1
-forces it to be enabled when available, even if the standard ACPI
-interface is also available.
-
-Procfs notes
-^^^^^^^^^^^^
-
-The available commands are::
-
-	echo up   >/proc/acpi/ibm/brightness
-	echo down >/proc/acpi/ibm/brightness
-	echo 'level <level>' >/proc/acpi/ibm/brightness
-
-Sysfs notes
-^^^^^^^^^^^
-
-The interface is implemented through the backlight sysfs class, which is
-poorly documented at this time.
-
-Locate the thinkpad_screen device under /sys/class/backlight, and inside
-it there will be the following attributes:
-
-	max_brightness:
-		Reads the maximum brightness the hardware can be set to.
-		The minimum is always zero.
-
-	actual_brightness:
-		Reads what brightness the screen is set to at this instant.
-
-	brightness:
-		Writes request the driver to change brightness to the
-		given value.  Reads will tell you what brightness the
-		driver is trying to set the display to when "power" is set
-		to zero and the display has not been dimmed by a kernel
-		power management event.
-
-	power:
-		power management mode, where 0 is "display on", and 1 to 3
-		will dim the display backlight to brightness level 0
-		because thinkpad-acpi cannot really turn the backlight
-		off.  Kernel power management events can temporarily
-		increase the current power management level, i.e. they can
-		dim the display.
-
-
-WARNING:
-
-    Whatever you do, do NOT ever call thinkpad-acpi backlight-level change
-    interface and the ACPI-based backlight level change interface
-    (available on newer BIOSes, and driven by the Linux ACPI video driver)
-    at the same time.  The two will interact in bad ways, do funny things,
-    and maybe reduce the life of the backlight lamps by needlessly kicking
-    its level up and down at every change.
-
-
-Volume control (Console Audio control)
---------------------------------------
-
-procfs: /proc/acpi/ibm/volume
-
-ALSA: "ThinkPad Console Audio Control", default ID: "ThinkPadEC"
-
-NOTE: by default, the volume control interface operates in read-only
-mode, as it is supposed to be used for on-screen-display purposes.
-The read/write mode can be enabled through the use of the
-"volume_control=1" module parameter.
-
-NOTE: distros are urged to not enable volume_control by default, this
-should be done by the local admin only.  The ThinkPad UI is for the
-console audio control to be done through the volume keys only, and for
-the desktop environment to just provide on-screen-display feedback.
-Software volume control should be done only in the main AC97/HDA
-mixer.
-
-
-About the ThinkPad Console Audio control
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-ThinkPads have a built-in amplifier and muting circuit that drives the
-console headphone and speakers.  This circuit is after the main AC97
-or HDA mixer in the audio path, and under exclusive control of the
-firmware.
-
-ThinkPads have three special hotkeys to interact with the console
-audio control: volume up, volume down and mute.
-
-It is worth noting that the normal way the mute function works (on
-ThinkPads that do not have a "mute LED") is:
-
-1. Press mute to mute.  It will *always* mute, you can press it as
-   many times as you want, and the sound will remain mute.
-
-2. Press either volume key to unmute the ThinkPad (it will _not_
-   change the volume, it will just unmute).
-
-This is a very superior design when compared to the cheap software-only
-mute-toggle solution found on normal consumer laptops:  you can be
-absolutely sure the ThinkPad will not make noise if you press the mute
-button, no matter the previous state.
-
-The IBM ThinkPads, and the earlier Lenovo ThinkPads have variable-gain
-amplifiers driving the speakers and headphone output, and the firmware
-also handles volume control for the headphone and speakers on these
-ThinkPads without any help from the operating system (this volume
-control stage exists after the main AC97 or HDA mixer in the audio
-path).
-
-The newer Lenovo models only have firmware mute control, and depend on
-the main HDA mixer to do volume control (which is done by the operating
-system).  In this case, the volume keys are filtered out for unmute
-key press (there are some firmware bugs in this area) and delivered as
-normal key presses to the operating system (thinkpad-acpi is not
-involved).
-
-
-The ThinkPad-ACPI volume control
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The preferred way to interact with the Console Audio control is the
-ALSA interface.
-
-The legacy procfs interface allows one to read the current state,
-and if volume control is enabled, accepts the following commands::
-
-	echo up   >/proc/acpi/ibm/volume
-	echo down >/proc/acpi/ibm/volume
-	echo mute >/proc/acpi/ibm/volume
-	echo unmute >/proc/acpi/ibm/volume
-	echo 'level <level>' >/proc/acpi/ibm/volume
-
-The <level> number range is 0 to 14 although not all of them may be
-distinct. To unmute the volume after the mute command, use either the
-up or down command (the level command will not unmute the volume), or
-the unmute command.
-
-You can use the volume_capabilities parameter to tell the driver
-whether your thinkpad has volume control or mute-only control:
-volume_capabilities=1 for mixers with mute and volume control,
-volume_capabilities=2 for mixers with only mute control.
-
-If the driver misdetects the capabilities for your ThinkPad model,
-please report this to ibm-acpi-devel@lists.sourceforge.net, so that we
-can update the driver.
-
-There are two strategies for volume control.  To select which one
-should be used, use the volume_mode module parameter: volume_mode=1
-selects EC mode, and volume_mode=3 selects EC mode with NVRAM backing
-(so that volume/mute changes are remembered across shutdown/reboot).
-
-The driver will operate in volume_mode=3 by default. If that does not
-work well on your ThinkPad model, please report this to
-ibm-acpi-devel@lists.sourceforge.net.
-
-The driver supports the standard ALSA module parameters.  If the ALSA
-mixer is disabled, the driver will disable all volume functionality.
-
-
-Fan control and monitoring: fan speed, fan enable/disable
----------------------------------------------------------
-
-procfs: /proc/acpi/ibm/fan
-
-sysfs device attributes: (hwmon "thinkpad") fan1_input, pwm1, pwm1_enable, fan2_input
-
-sysfs hwmon driver attributes: fan_watchdog
-
-NOTE NOTE NOTE:
-   fan control operations are disabled by default for
-   safety reasons.  To enable them, the module parameter "fan_control=1"
-   must be given to thinkpad-acpi.
-
-This feature attempts to show the current fan speed, control mode and
-other fan data that might be available.  The speed is read directly
-from the hardware registers of the embedded controller.  This is known
-to work on later R, T, X and Z series ThinkPads but may show a bogus
-value on other models.
-
-Some Lenovo ThinkPads support a secondary fan.  This fan cannot be
-controlled separately, it shares the main fan control.
-
-Fan levels
-^^^^^^^^^^
-
-Most ThinkPad fans work in "levels" at the firmware interface.  Level 0
-stops the fan.  The higher the level, the higher the fan speed, although
-adjacent levels often map to the same fan speed.  7 is the highest
-level, where the fan reaches the maximum recommended speed.
-
-Level "auto" means the EC changes the fan level according to some
-internal algorithm, usually based on readings from the thermal sensors.
-
-There is also a "full-speed" level, also known as "disengaged" level.
-In this level, the EC disables the speed-locked closed-loop fan control,
-and drives the fan as fast as it can go, which might exceed hardware
-limits, so use this level with caution.
-
-The fan usually ramps up or down slowly from one speed to another, and
-it is normal for the EC to take several seconds to react to fan
-commands.  The full-speed level may take up to two minutes to ramp up to
-maximum speed, and in some ThinkPads, the tachometer readings go stale
-while the EC is transitioning to the full-speed level.
-
-WARNING WARNING WARNING: do not leave the fan disabled unless you are
-monitoring all of the temperature sensor readings and you are ready to
-enable it if necessary to avoid overheating.
-
-An enabled fan in level "auto" may stop spinning if the EC decides the
-ThinkPad is cool enough and doesn't need the extra airflow.  This is
-normal, and the EC will spin the fan up if the various thermal readings
-rise too much.
-
-On the X40, this seems to depend on the CPU and HDD temperatures.
-Specifically, the fan is turned on when either the CPU temperature
-climbs to 56 degrees or the HDD temperature climbs to 46 degrees.  The
-fan is turned off when the CPU temperature drops to 49 degrees and the
-HDD temperature drops to 41 degrees.  These thresholds cannot
-currently be controlled.
-
-The ThinkPad's ACPI DSDT code will reprogram the fan on its own when
-certain conditions are met.  It will override any fan programming done
-through thinkpad-acpi.
-
-The thinkpad-acpi kernel driver can be programmed to revert the fan
-level to a safe setting if userspace does not issue one of the procfs
-fan commands: "enable", "disable", "level" or "watchdog", or if there
-are no writes to pwm1_enable (or to pwm1 *if and only if* pwm1_enable is
-set to 1, manual mode) within a configurable amount of time of up to
-120 seconds.  This functionality is called fan safety watchdog.
-
-Note that the watchdog timer stops after it enables the fan.  It will be
-rearmed again automatically (using the same interval) when one of the
-above mentioned fan commands is received.  The fan watchdog is,
-therefore, not suitable to protect against fan mode changes made through
-means other than the "enable", "disable", and "level" procfs fan
-commands, or the hwmon fan control sysfs interface.
-
-Procfs notes
-^^^^^^^^^^^^
-
-The fan may be enabled or disabled with the following commands::
-
-	echo enable  >/proc/acpi/ibm/fan
-	echo disable >/proc/acpi/ibm/fan
-
-Placing a fan on level 0 is the same as disabling it.  Enabling a fan
-will try to place it in a safe level if it is too slow or disabled.
-
-The fan level can be controlled with the command::
-
-	echo 'level <level>' > /proc/acpi/ibm/fan
-
-Where <level> is an integer from 0 to 7, or one of the words "auto" or
-"full-speed" (without the quotes).  Not all ThinkPads support the "auto"
-and "full-speed" levels.  The driver accepts "disengaged" as an alias for
-"full-speed", and reports it as "disengaged" for backwards
-compatibility.
-
-On the X31 and X40 (and ONLY on those models), the fan speed can be
-controlled to a certain degree.  Once the fan is running, it can be
-forced to run faster or slower with the following command::
-
-	echo 'speed <speed>' > /proc/acpi/ibm/fan
-
-The sustainable range of fan speeds on the X40 appears to be from about
-3700 to about 7350. Values outside this range either do not have any
-effect or the fan speed eventually settles somewhere in that range.  The
-fan cannot be stopped or started with this command.  This functionality
-is incomplete, and not available through the sysfs interface.
-
-To program the safety watchdog, use the "watchdog" command::
-
-	echo 'watchdog <interval in seconds>' > /proc/acpi/ibm/fan
-
-If you want to disable the watchdog, use 0 as the interval.
-
-Sysfs notes
-^^^^^^^^^^^
-
-The sysfs interface follows the hwmon subsystem guidelines for the most
-part, and the exception is the fan safety watchdog.
-
-Writes to any of the sysfs attributes may return the EINVAL error if
-that operation is not supported in a given ThinkPad or if the parameter
-is out-of-bounds, and EPERM if it is forbidden.  They may also return
-EINTR (interrupted system call), and EIO (I/O error while trying to talk
-to the firmware).
-
-Features not yet implemented by the driver return ENOSYS.
-
-hwmon device attribute pwm1_enable:
-	- 0: PWM offline (fan is set to full-speed mode)
-	- 1: Manual PWM control (use pwm1 to set fan level)
-	- 2: Hardware PWM control (EC "auto" mode)
-	- 3: reserved (Software PWM control, not implemented yet)
-
-	Modes 0 and 2 are not supported by all ThinkPads, and the
-	driver is not always able to detect this.  If it does know a
-	mode is unsupported, it will return -EINVAL.
-
-hwmon device attribute pwm1:
-	Fan level, scaled from the firmware values of 0-7 to the hwmon
-	scale of 0-255.  0 means fan stopped, 255 means highest normal
-	speed (level 7).
-
-	This attribute only commands the fan if pmw1_enable is set to 1
-	(manual PWM control).
-
-hwmon device attribute fan1_input:
-	Fan tachometer reading, in RPM.  May go stale on certain
-	ThinkPads while the EC transitions the PWM to offline mode,
-	which can take up to two minutes.  May return rubbish on older
-	ThinkPads.
-
-hwmon device attribute fan2_input:
-	Fan tachometer reading, in RPM, for the secondary fan.
-	Available only on some ThinkPads.  If the secondary fan is
-	not installed, will always read 0.
-
-hwmon driver attribute fan_watchdog:
-	Fan safety watchdog timer interval, in seconds.  Minimum is
-	1 second, maximum is 120 seconds.  0 disables the watchdog.
-
-To stop the fan: set pwm1 to zero, and pwm1_enable to 1.
-
-To start the fan in a safe mode: set pwm1_enable to 2.  If that fails
-with EINVAL, try to set pwm1_enable to 1 and pwm1 to at least 128 (255
-would be the safest choice, though).
-
-
-WAN
----
-
-procfs: /proc/acpi/ibm/wan
-
-sysfs device attribute: wwan_enable (deprecated)
-
-sysfs rfkill class: switch "tpacpi_wwan_sw"
-
-This feature shows the presence and current state of the built-in
-Wireless WAN device.
-
-If the ThinkPad supports it, the WWAN state is stored in NVRAM,
-so it is kept across reboots and power-off.
-
-It was tested on a Lenovo ThinkPad X60. It should probably work on other
-ThinkPad models which come with this module installed.
-
-Procfs notes
-^^^^^^^^^^^^
-
-If the W-WAN card is installed, the following commands can be used::
-
-	echo enable > /proc/acpi/ibm/wan
-	echo disable > /proc/acpi/ibm/wan
-
-Sysfs notes
-^^^^^^^^^^^
-
-	If the W-WAN card is installed, it can be enabled /
-	disabled through the "wwan_enable" thinkpad-acpi device
-	attribute, and its current status can also be queried.
-
-	enable:
-		- 0: disables WWAN card / WWAN card is disabled
-		- 1: enables WWAN card / WWAN card is enabled.
-
-	Note: this interface has been superseded by the	generic rfkill
-	class.  It has been deprecated, and it will be removed in year
-	2010.
-
-	rfkill controller switch "tpacpi_wwan_sw": refer to
-	Documentation/rfkill.txt for details.
-
-
-EXPERIMENTAL: UWB
------------------
-
-This feature is considered EXPERIMENTAL because it has not been extensively
-tested and validated in various ThinkPad models yet.  The feature may not
-work as expected. USE WITH CAUTION! To use this feature, you need to supply
-the experimental=1 parameter when loading the module.
-
-sysfs rfkill class: switch "tpacpi_uwb_sw"
-
-This feature exports an rfkill controller for the UWB device, if one is
-present and enabled in the BIOS.
-
-Sysfs notes
-^^^^^^^^^^^
-
-	rfkill controller switch "tpacpi_uwb_sw": refer to
-	Documentation/rfkill.txt for details.
-
-Adaptive keyboard
------------------
-
-sysfs device attribute: adaptive_kbd_mode
-
-This sysfs attribute controls the keyboard "face" that will be shown on the
-Lenovo X1 Carbon 2nd gen (2014)'s adaptive keyboard. The value can be read
-and set.
-
-- 1 = Home mode
-- 2 = Web-browser mode
-- 3 = Web-conference mode
-- 4 = Function mode
-- 5 = Layflat mode
-
-For more details about which buttons will appear depending on the mode, please
-review the laptop's user guide:
-http://www.lenovo.com/shop/americas/content/user_guides/x1carbon_2_ug_en.pdf
-
-Multiple Commands, Module Parameters
-------------------------------------
-
-Multiple commands can be written to the proc files in one shot by
-separating them with commas, for example::
-
-	echo enable,0xffff > /proc/acpi/ibm/hotkey
-	echo lcd_disable,crt_enable > /proc/acpi/ibm/video
-
-Commands can also be specified when loading the thinkpad-acpi module,
-for example::
-
-	modprobe thinkpad_acpi hotkey=enable,0xffff video=auto_disable
-
-
-Enabling debugging output
--------------------------
-
-The module takes a debug parameter which can be used to selectively
-enable various classes of debugging output, for example::
-
-	 modprobe thinkpad_acpi debug=0xffff
-
-will enable all debugging output classes.  It takes a bitmask, so
-to enable more than one output class, just add their values.
-
-	=============		======================================
-	Debug bitmask		Description
-	=============		======================================
-	0x8000			Disclose PID of userspace programs
-				accessing some functions of the driver
-	0x0001			Initialization and probing
-	0x0002			Removal
-	0x0004			RF Transmitter control (RFKILL)
-				(bluetooth, WWAN, UWB...)
-	0x0008			HKEY event interface, hotkeys
-	0x0010			Fan control
-	0x0020			Backlight brightness
-	0x0040			Audio mixer/volume control
-	=============		======================================
-
-There is also a kernel build option to enable more debugging
-information, which may be necessary to debug driver problems.
-
-The level of debugging information output by the driver can be changed
-at runtime through sysfs, using the driver attribute debug_level.  The
-attribute takes the same bitmask as the debug module parameter above.
-
-
-Force loading of module
------------------------
-
-If thinkpad-acpi refuses to detect your ThinkPad, you can try to specify
-the module parameter force_load=1.  Regardless of whether this works or
-not, please contact ibm-acpi-devel@lists.sourceforge.net with a report.
-
-
-Sysfs interface changelog
-^^^^^^^^^^^^^^^^^^^^^^^^^
-
-=========	===============================================================
-0x000100:	Initial sysfs support, as a single platform driver and
-		device.
-0x000200:	Hot key support for 32 hot keys, and radio slider switch
-		support.
-0x010000:	Hot keys are now handled by default over the input
-		layer, the radio switch generates input event EV_RADIO,
-		and the driver enables hot key handling by default in
-		the firmware.
-
-0x020000:	ABI fix: added a separate hwmon platform device and
-		driver, which must be located by name (thinkpad)
-		and the hwmon class for libsensors4 (lm-sensors 3)
-		compatibility.  Moved all hwmon attributes to this
-		new platform device.
-
-0x020100:	Marker for thinkpad-acpi with hot key NVRAM polling
-		support.  If you must, use it to know you should not
-		start a userspace NVRAM poller (allows to detect when
-		NVRAM is compiled out by the user because it is
-		unneeded/undesired in the first place).
-0x020101:	Marker for thinkpad-acpi with hot key NVRAM polling
-		and proper hotkey_mask semantics (version 8 of the
-		NVRAM polling patch).  Some development snapshots of
-		0.18 had an earlier version that did strange things
-		to hotkey_mask.
-
-0x020200:	Add poll()/select() support to the following attributes:
-		hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason
-
-0x020300:	hotkey enable/disable support removed, attributes
-		hotkey_bios_enabled and hotkey_enable deprecated and
-		marked for removal.
-
-0x020400:	Marker for 16 LEDs support.  Also, LEDs that are known
-		to not exist in a given model are not registered with
-		the LED sysfs class anymore.
-
-0x020500:	Updated hotkey driver, hotkey_mask is always available
-		and it is always able to disable hot keys.  Very old
-		thinkpads are properly supported.  hotkey_bios_mask
-		is deprecated and marked for removal.
-
-0x020600:	Marker for backlight change event support.
-
-0x020700:	Support for mute-only mixers.
-		Volume control in read-only mode by default.
-		Marker for ALSA mixer support.
-
-0x030000:	Thermal and fan sysfs attributes were moved to the hwmon
-		device instead of being attached to the backing platform
-		device.
-=========	===============================================================
diff --git a/Documentation/laptops/toshiba_haps.rst b/Documentation/laptops/toshiba_haps.rst
deleted file mode 100644
index 11dfc428c080..000000000000
--- a/Documentation/laptops/toshiba_haps.rst
+++ /dev/null
@@ -1,87 +0,0 @@
-====================================
-Toshiba HDD Active Protection Sensor
-====================================
-
-Kernel driver: toshiba_haps
-
-Author: Azael Avalos <coproscefalo@gmail.com>
-
-
-.. 0. Contents
-
-   1. Description
-   2. Interface
-   3. Accelerometer axes
-   4. Supported devices
-   5. Usage
-
-
-1. Description
---------------
-
-This driver provides support for the accelerometer found in various Toshiba
-laptops, being called "Toshiba HDD Protection - Shock Sensor" officially,
-and detects laptops automatically with this device.
-On Windows, Toshiba provided software monitors this device and provides
-automatic HDD protection (head unload) on sudden moves or harsh vibrations,
-however, this driver only provides a notification via a sysfs file to let
-userspace tools or daemons act accordingly, as well as providing a sysfs
-file to set the desired protection level or sensor sensibility.
-
-
-2. Interface
-------------
-
-This device comes with 3 methods:
-
-====	=====================================================================
-_STA    Checks existence of the device, returning Zero if the device does not
-	exists or is not supported.
-PTLV    Sets the desired protection level.
-RSSS    Shuts down the HDD protection interface for a few seconds,
-	then restores normal operation.
-====	=====================================================================
-
-Note:
-  The presence of Solid State Drives (SSD) can make this driver to fail loading,
-  given the fact that such drives have no movable parts, and thus, not requiring
-  any "protection" as well as failing during the evaluation of the _STA method
-  found under this device.
-
-
-3. Accelerometer axes
----------------------
-
-This device does not report any axes, however, to query the sensor position
-a couple HCI (Hardware Configuration Interface) calls (0x6D and 0xA6) are
-provided to query such information, handled by the kernel module toshiba_acpi
-since kernel version 3.15.
-
-
-4. Supported devices
---------------------
-
-This driver binds itself to the ACPI device TOS620A, and any Toshiba laptop
-with this device is supported, given the fact that they have the presence of
-conventional HDD and not only SSD, or a combination of both HDD and SSD.
-
-
-5. Usage
---------
-
-The sysfs files under /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS620A:00/ are:
-
-================   ============================================================
-protection_level   The protection_level is readable and writeable, and
-		   provides a way to let userspace query the current protection
-		   level, as well as set the desired protection level, the
-		   available protection levels are:
-
-		   ============   =======   ==========   ========
-		   0 - Disabled   1 - Low   2 - Medium   3 - High
-		   ============   =======   ==========   ========
-
-reset_protection   The reset_protection entry is writeable only, being "1"
-		   the only parameter it accepts, it is used to trigger
-		   a reset of the protection interface.
-================   ============================================================
-- 
cgit 


From 330d48105245abfb8c9ca491dc53ea500657217a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 13 Jun 2019 15:21:39 -0300
Subject: docs: admin-guide: add kdump documentation into it

The Kdump documentation describes procedures with admins use
in order to solve issues on their systems.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/admin-guide/bug-hunting.rst         |   4 +-
 Documentation/admin-guide/index.rst               |   1 +
 Documentation/admin-guide/kdump/gdbmacros.txt     | 264 +++++++++++
 Documentation/admin-guide/kdump/index.rst         |  20 +
 Documentation/admin-guide/kdump/kdump.rst         | 534 ++++++++++++++++++++++
 Documentation/admin-guide/kdump/vmcoreinfo.rst    | 488 ++++++++++++++++++++
 Documentation/admin-guide/kernel-parameters.txt   |   6 +-
 Documentation/kdump/gdbmacros.txt                 | 264 -----------
 Documentation/kdump/index.rst                     |  21 -
 Documentation/kdump/kdump.rst                     | 534 ----------------------
 Documentation/kdump/vmcoreinfo.rst                | 488 --------------------
 Documentation/powerpc/firmware-assisted-dump.txt  |   2 +-
 Documentation/translations/zh_CN/oops-tracing.txt |   4 +-
 13 files changed, 1315 insertions(+), 1315 deletions(-)
 create mode 100644 Documentation/admin-guide/kdump/gdbmacros.txt
 create mode 100644 Documentation/admin-guide/kdump/index.rst
 create mode 100644 Documentation/admin-guide/kdump/kdump.rst
 create mode 100644 Documentation/admin-guide/kdump/vmcoreinfo.rst
 delete mode 100644 Documentation/kdump/gdbmacros.txt
 delete mode 100644 Documentation/kdump/index.rst
 delete mode 100644 Documentation/kdump/kdump.rst
 delete mode 100644 Documentation/kdump/vmcoreinfo.rst

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/bug-hunting.rst b/Documentation/admin-guide/bug-hunting.rst
index b761aa2a51d2..44b8a4edd348 100644
--- a/Documentation/admin-guide/bug-hunting.rst
+++ b/Documentation/admin-guide/bug-hunting.rst
@@ -90,9 +90,9 @@ the disk is not available then you have three options:
     run a null modem to a second machine and capture the output there
     using your favourite communication program.  Minicom works well.
 
-(3) Use Kdump (see Documentation/kdump/kdump.rst),
+(3) Use Kdump (see Documentation/admin-guide/kdump/kdump.rst),
     extract the kernel ring buffer from old memory with using dmesg
-    gdbmacro in Documentation/kdump/gdbmacros.txt.
+    gdbmacro in Documentation/admin-guide/kdump/gdbmacros.txt.
 
 Finding the bug's location
 --------------------------
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 6fcc83aaa9b6..5b63182ceb5f 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -39,6 +39,7 @@ problems and bugs in particular.
    ramoops
    dynamic-debug-howto
    init
+   kdump/index
    perf/index
 
 This is the beginning of a section with information of interest to
diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt
new file mode 100644
index 000000000000..220d0a80ca2c
--- /dev/null
+++ b/Documentation/admin-guide/kdump/gdbmacros.txt
@@ -0,0 +1,264 @@
+#
+# This file contains a few gdb macros (user defined commands) to extract
+# useful information from kernel crashdump (kdump) like stack traces of
+# all the processes or a particular process and trapinfo.
+#
+# These macros can be used by copying this file in .gdbinit (put in home
+# directory or current directory) or by invoking gdb command with
+# --command=<command-file-name> option
+#
+# Credits:
+# Alexander Nyberg <alexn@telia.com>
+# V Srivatsa <vatsa@in.ibm.com>
+# Maneesh Soni <maneesh@in.ibm.com>
+#
+
+define bttnobp
+	set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
+	set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next)
+	set $init_t=&init_task
+	set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
+	set var $stacksize = sizeof(union thread_union)
+	while ($next_t != $init_t)
+		set $next_t=(struct task_struct *)$next_t
+		printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm
+		printf "===================\n"
+		set var $stackp = $next_t.thread.sp
+		set var $stack_top = ($stackp & ~($stacksize - 1)) + $stacksize
+
+		while ($stackp < $stack_top)
+			if (*($stackp) > _stext && *($stackp) < _sinittext)
+				info symbol *($stackp)
+			end
+			set $stackp += 4
+		end
+		set $next_th=(((char *)$next_t->thread_group.next) - $pid_off)
+		while ($next_th != $next_t)
+			set $next_th=(struct task_struct *)$next_th
+			printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm
+			printf "===================\n"
+			set var $stackp = $next_t.thread.sp
+			set var $stack_top = ($stackp & ~($stacksize - 1)) + stacksize
+
+			while ($stackp < $stack_top)
+				if (*($stackp) > _stext && *($stackp) < _sinittext)
+					info symbol *($stackp)
+				end
+				set $stackp += 4
+			end
+			set $next_th=(((char *)$next_th->thread_group.next) - $pid_off)
+		end
+		set $next_t=(char *)($next_t->tasks.next) - $tasks_off
+	end
+end
+document bttnobp
+	dump all thread stack traces on a kernel compiled with !CONFIG_FRAME_POINTER
+end
+
+define btthreadstack
+	set var $pid_task = $arg0
+
+	printf "\npid %d; comm %s:\n", $pid_task.pid, $pid_task.comm
+	printf "task struct: "
+	print $pid_task
+	printf "===================\n"
+	set var $stackp = $pid_task.thread.sp
+	set var $stacksize = sizeof(union thread_union)
+	set var $stack_top = ($stackp & ~($stacksize - 1)) + $stacksize
+	set var $stack_bot = ($stackp & ~($stacksize - 1))
+
+	set $stackp = *((unsigned long *) $stackp)
+	while (($stackp < $stack_top) && ($stackp > $stack_bot))
+		set var $addr = *(((unsigned long *) $stackp) + 1)
+		info symbol $addr
+		set $stackp = *((unsigned long *) $stackp)
+	end
+end
+document btthreadstack
+	 dump a thread stack using the given task structure pointer
+end
+
+
+define btt
+	set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
+	set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next)
+	set $init_t=&init_task
+	set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
+	while ($next_t != $init_t)
+		set $next_t=(struct task_struct *)$next_t
+		btthreadstack $next_t
+
+		set $next_th=(((char *)$next_t->thread_group.next) - $pid_off)
+		while ($next_th != $next_t)
+			set $next_th=(struct task_struct *)$next_th
+			btthreadstack $next_th
+			set $next_th=(((char *)$next_th->thread_group.next) - $pid_off)
+		end
+		set $next_t=(char *)($next_t->tasks.next) - $tasks_off
+	end
+end
+document btt
+	dump all thread stack traces on a kernel compiled with CONFIG_FRAME_POINTER
+end
+
+define btpid
+	set var $pid = $arg0
+	set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
+	set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next)
+	set $init_t=&init_task
+	set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
+	set var $pid_task = 0
+
+	while ($next_t != $init_t)
+		set $next_t=(struct task_struct *)$next_t
+
+		if ($next_t.pid == $pid)
+			set $pid_task = $next_t
+		end
+
+		set $next_th=(((char *)$next_t->thread_group.next) - $pid_off)
+		while ($next_th != $next_t)
+			set $next_th=(struct task_struct *)$next_th
+			if ($next_th.pid == $pid)
+				set $pid_task = $next_th
+			end
+			set $next_th=(((char *)$next_th->thread_group.next) - $pid_off)
+		end
+		set $next_t=(char *)($next_t->tasks.next) - $tasks_off
+	end
+
+	btthreadstack $pid_task
+end
+document btpid
+	backtrace of pid
+end
+
+
+define trapinfo
+	set var $pid = $arg0
+	set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
+	set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next)
+	set $init_t=&init_task
+	set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
+	set var $pid_task = 0
+
+	while ($next_t != $init_t)
+		set $next_t=(struct task_struct *)$next_t
+
+		if ($next_t.pid == $pid)
+			set $pid_task = $next_t
+		end
+
+		set $next_th=(((char *)$next_t->thread_group.next) - $pid_off)
+		while ($next_th != $next_t)
+			set $next_th=(struct task_struct *)$next_th
+			if ($next_th.pid == $pid)
+				set $pid_task = $next_th
+			end
+			set $next_th=(((char *)$next_th->thread_group.next) - $pid_off)
+		end
+		set $next_t=(char *)($next_t->tasks.next) - $tasks_off
+	end
+
+	printf "Trapno %ld, cr2 0x%lx, error_code %ld\n", $pid_task.thread.trap_no, \
+				$pid_task.thread.cr2, $pid_task.thread.error_code
+
+end
+document trapinfo
+	Run info threads and lookup pid of thread #1
+	'trapinfo <pid>' will tell you by which trap & possibly
+	address the kernel panicked.
+end
+
+define dump_log_idx
+	set $idx = $arg0
+	if ($argc > 1)
+		set $prev_flags = $arg1
+	else
+		set $prev_flags = 0
+	end
+	set $msg = ((struct printk_log *) (log_buf + $idx))
+	set $prefix = 1
+	set $newline = 1
+	set $log = log_buf + $idx + sizeof(*$msg)
+
+	# prev & LOG_CONT && !(msg->flags & LOG_PREIX)
+	if (($prev_flags & 8) && !($msg->flags & 4))
+		set $prefix = 0
+	end
+
+	# msg->flags & LOG_CONT
+	if ($msg->flags & 8)
+		# (prev & LOG_CONT && !(prev & LOG_NEWLINE))
+		if (($prev_flags & 8) && !($prev_flags & 2))
+			set $prefix = 0
+		end
+		# (!(msg->flags & LOG_NEWLINE))
+		if (!($msg->flags & 2))
+			set $newline = 0
+		end
+	end
+
+	if ($prefix)
+		printf "[%5lu.%06lu] ", $msg->ts_nsec / 1000000000, $msg->ts_nsec % 1000000000
+	end
+	if ($msg->text_len != 0)
+		eval "printf \"%%%d.%ds\", $log", $msg->text_len, $msg->text_len
+	end
+	if ($newline)
+		printf "\n"
+	end
+	if ($msg->dict_len > 0)
+		set $dict = $log + $msg->text_len
+		set $idx = 0
+		set $line = 1
+		while ($idx < $msg->dict_len)
+			if ($line)
+				printf " "
+				set $line = 0
+			end
+			set $c = $dict[$idx]
+			if ($c == '\0')
+				printf "\n"
+				set $line = 1
+			else
+				if ($c < ' ' || $c >= 127 || $c == '\\')
+					printf "\\x%02x", $c
+				else
+					printf "%c", $c
+				end
+			end
+			set $idx = $idx + 1
+		end
+		printf "\n"
+	end
+end
+document dump_log_idx
+	Dump a single log given its index in the log buffer.  The first
+	parameter is the index into log_buf, the second is optional and
+	specified the previous log buffer's flags, used for properly
+	formatting continued lines.
+end
+
+define dmesg
+	set $i = log_first_idx
+	set $end_idx = log_first_idx
+	set $prev_flags = 0
+
+	while (1)
+		set $msg = ((struct printk_log *) (log_buf + $i))
+		if ($msg->len == 0)
+			set $i = 0
+		else
+			dump_log_idx $i $prev_flags
+			set $i = $i + $msg->len
+			set $prev_flags = $msg->flags
+		end
+		if ($i == $end_idx)
+			loop_break
+		end
+	end
+end
+document dmesg
+	print the kernel ring buffer
+end
diff --git a/Documentation/admin-guide/kdump/index.rst b/Documentation/admin-guide/kdump/index.rst
new file mode 100644
index 000000000000..8e2ebd0383cd
--- /dev/null
+++ b/Documentation/admin-guide/kdump/index.rst
@@ -0,0 +1,20 @@
+
+================================================================
+Documentation for Kdump - The kexec-based Crash Dumping Solution
+================================================================
+
+This document includes overview, setup and installation, and analysis
+information.
+
+.. toctree::
+    :maxdepth: 1
+
+    kdump
+    vmcoreinfo
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst
new file mode 100644
index 000000000000..ac7e131d2935
--- /dev/null
+++ b/Documentation/admin-guide/kdump/kdump.rst
@@ -0,0 +1,534 @@
+================================================================
+Documentation for Kdump - The kexec-based Crash Dumping Solution
+================================================================
+
+This document includes overview, setup and installation, and analysis
+information.
+
+Overview
+========
+
+Kdump uses kexec to quickly boot to a dump-capture kernel whenever a
+dump of the system kernel's memory needs to be taken (for example, when
+the system panics). The system kernel's memory image is preserved across
+the reboot and is accessible to the dump-capture kernel.
+
+You can use common commands, such as cp and scp, to copy the
+memory image to a dump file on the local disk, or across the network to
+a remote system.
+
+Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
+s390x, arm and arm64 architectures.
+
+When the system kernel boots, it reserves a small section of memory for
+the dump-capture kernel. This ensures that ongoing Direct Memory Access
+(DMA) from the system kernel does not corrupt the dump-capture kernel.
+The kexec -p command loads the dump-capture kernel into this reserved
+memory.
+
+On x86 machines, the first 640 KB of physical memory is needed to boot,
+regardless of where the kernel loads. Therefore, kexec backs up this
+region just before rebooting into the dump-capture kernel.
+
+Similarly on PPC64 machines first 32KB of physical memory is needed for
+booting regardless of where the kernel is loaded and to support 64K page
+size kexec backs up the first 64KB memory.
+
+For s390x, when kdump is triggered, the crashkernel region is exchanged
+with the region [0, crashkernel region size] and then the kdump kernel
+runs in [0, crashkernel region size]. Therefore no relocatable kernel is
+needed for s390x.
+
+All of the necessary information about the system kernel's core image is
+encoded in the ELF format, and stored in a reserved area of memory
+before a crash. The physical address of the start of the ELF header is
+passed to the dump-capture kernel through the elfcorehdr= boot
+parameter. Optionally the size of the ELF header can also be passed
+when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax.
+
+
+With the dump-capture kernel, you can access the memory image through
+/proc/vmcore. This exports the dump as an ELF-format file that you can
+write out using file copy commands such as cp or scp. Further, you can
+use analysis tools such as the GNU Debugger (GDB) and the Crash tool to
+debug the dump file. This method ensures that the dump pages are correctly
+ordered.
+
+
+Setup and Installation
+======================
+
+Install kexec-tools
+-------------------
+
+1) Login as the root user.
+
+2) Download the kexec-tools user-space package from the following URL:
+
+http://kernel.org/pub/linux/utils/kernel/kexec/kexec-tools.tar.gz
+
+This is a symlink to the latest version.
+
+The latest kexec-tools git tree is available at:
+
+- git://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git
+- http://www.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git
+
+There is also a gitweb interface available at
+http://www.kernel.org/git/?p=utils/kernel/kexec/kexec-tools.git
+
+More information about kexec-tools can be found at
+http://horms.net/projects/kexec/
+
+3) Unpack the tarball with the tar command, as follows::
+
+	tar xvpzf kexec-tools.tar.gz
+
+4) Change to the kexec-tools directory, as follows::
+
+	cd kexec-tools-VERSION
+
+5) Configure the package, as follows::
+
+	./configure
+
+6) Compile the package, as follows::
+
+	make
+
+7) Install the package, as follows::
+
+	make install
+
+
+Build the system and dump-capture kernels
+-----------------------------------------
+There are two possible methods of using Kdump.
+
+1) Build a separate custom dump-capture kernel for capturing the
+   kernel core dump.
+
+2) Or use the system kernel binary itself as dump-capture kernel and there is
+   no need to build a separate dump-capture kernel. This is possible
+   only with the architectures which support a relocatable kernel. As
+   of today, i386, x86_64, ppc64, ia64, arm and arm64 architectures support
+   relocatable kernel.
+
+Building a relocatable kernel is advantageous from the point of view that
+one does not have to build a second kernel for capturing the dump. But
+at the same time one might want to build a custom dump capture kernel
+suitable to his needs.
+
+Following are the configuration setting required for system and
+dump-capture kernels for enabling kdump support.
+
+System kernel config options
+----------------------------
+
+1) Enable "kexec system call" in "Processor type and features."::
+
+	CONFIG_KEXEC=y
+
+2) Enable "sysfs file system support" in "Filesystem" -> "Pseudo
+   filesystems." This is usually enabled by default::
+
+	CONFIG_SYSFS=y
+
+   Note that "sysfs file system support" might not appear in the "Pseudo
+   filesystems" menu if "Configure standard kernel features (for small
+   systems)" is not enabled in "General Setup." In this case, check the
+   .config file itself to ensure that sysfs is turned on, as follows::
+
+	grep 'CONFIG_SYSFS' .config
+
+3) Enable "Compile the kernel with debug info" in "Kernel hacking."::
+
+	CONFIG_DEBUG_INFO=Y
+
+   This causes the kernel to be built with debug symbols. The dump
+   analysis tools require a vmlinux with debug symbols in order to read
+   and analyze a dump file.
+
+Dump-capture kernel config options (Arch Independent)
+-----------------------------------------------------
+
+1) Enable "kernel crash dumps" support under "Processor type and
+   features"::
+
+	CONFIG_CRASH_DUMP=y
+
+2) Enable "/proc/vmcore support" under "Filesystems" -> "Pseudo filesystems"::
+
+	CONFIG_PROC_VMCORE=y
+
+   (CONFIG_PROC_VMCORE is set by default when CONFIG_CRASH_DUMP is selected.)
+
+Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
+--------------------------------------------------------------------
+
+1) On i386, enable high memory support under "Processor type and
+   features"::
+
+	CONFIG_HIGHMEM64G=y
+
+   or::
+
+	CONFIG_HIGHMEM4G
+
+2) On i386 and x86_64, disable symmetric multi-processing support
+   under "Processor type and features"::
+
+	CONFIG_SMP=n
+
+   (If CONFIG_SMP=y, then specify maxcpus=1 on the kernel command line
+   when loading the dump-capture kernel, see section "Load the Dump-capture
+   Kernel".)
+
+3) If one wants to build and use a relocatable kernel,
+   Enable "Build a relocatable kernel" support under "Processor type and
+   features"::
+
+	CONFIG_RELOCATABLE=y
+
+4) Use a suitable value for "Physical address where the kernel is
+   loaded" (under "Processor type and features"). This only appears when
+   "kernel crash dumps" is enabled. A suitable value depends upon
+   whether kernel is relocatable or not.
+
+   If you are using a relocatable kernel use CONFIG_PHYSICAL_START=0x100000
+   This will compile the kernel for physical address 1MB, but given the fact
+   kernel is relocatable, it can be run from any physical address hence
+   kexec boot loader will load it in memory region reserved for dump-capture
+   kernel.
+
+   Otherwise it should be the start of memory region reserved for
+   second kernel using boot parameter "crashkernel=Y@X". Here X is
+   start of memory region reserved for dump-capture kernel.
+   Generally X is 16MB (0x1000000). So you can set
+   CONFIG_PHYSICAL_START=0x1000000
+
+5) Make and install the kernel and its modules. DO NOT add this kernel
+   to the boot loader configuration files.
+
+Dump-capture kernel config options (Arch Dependent, ppc64)
+----------------------------------------------------------
+
+1) Enable "Build a kdump crash kernel" support under "Kernel" options::
+
+	CONFIG_CRASH_DUMP=y
+
+2)   Enable "Build a relocatable kernel" support::
+
+	CONFIG_RELOCATABLE=y
+
+   Make and install the kernel and its modules.
+
+Dump-capture kernel config options (Arch Dependent, ia64)
+----------------------------------------------------------
+
+- No specific options are required to create a dump-capture kernel
+  for ia64, other than those specified in the arch independent section
+  above. This means that it is possible to use the system kernel
+  as a dump-capture kernel if desired.
+
+  The crashkernel region can be automatically placed by the system
+  kernel at run time. This is done by specifying the base address as 0,
+  or omitting it all together::
+
+	crashkernel=256M@0
+
+  or::
+
+	crashkernel=256M
+
+  If the start address is specified, note that the start address of the
+  kernel will be aligned to 64Mb, so if the start address is not then
+  any space below the alignment point will be wasted.
+
+Dump-capture kernel config options (Arch Dependent, arm)
+----------------------------------------------------------
+
+-   To use a relocatable kernel,
+    Enable "AUTO_ZRELADDR" support under "Boot" options::
+
+	AUTO_ZRELADDR=y
+
+Dump-capture kernel config options (Arch Dependent, arm64)
+----------------------------------------------------------
+
+- Please note that kvm of the dump-capture kernel will not be enabled
+  on non-VHE systems even if it is configured. This is because the CPU
+  will not be reset to EL2 on panic.
+
+Extended crashkernel syntax
+===========================
+
+While the "crashkernel=size[@offset]" syntax is sufficient for most
+configurations, sometimes it's handy to have the reserved memory dependent
+on the value of System RAM -- that's mostly for distributors that pre-setup
+the kernel command line to avoid a unbootable system after some memory has
+been removed from the machine.
+
+The syntax is::
+
+    crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
+    range=start-[end]
+
+For example::
+
+    crashkernel=512M-2G:64M,2G-:128M
+
+This would mean:
+
+    1) if the RAM is smaller than 512M, then don't reserve anything
+       (this is the "rescue" case)
+    2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M
+    3) if the RAM size is larger than 2G, then reserve 128M
+
+
+
+Boot into System Kernel
+=======================
+
+1) Update the boot loader (such as grub, yaboot, or lilo) configuration
+   files as necessary.
+
+2) Boot the system kernel with the boot parameter "crashkernel=Y@X",
+   where Y specifies how much memory to reserve for the dump-capture kernel
+   and X specifies the beginning of this reserved memory. For example,
+   "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory
+   starting at physical address 0x01000000 (16MB) for the dump-capture kernel.
+
+   On x86 and x86_64, use "crashkernel=64M@16M".
+
+   On ppc64, use "crashkernel=128M@32M".
+
+   On ia64, 256M@256M is a generous value that typically works.
+   The region may be automatically placed on ia64, see the
+   dump-capture kernel config option notes above.
+   If use sparse memory, the size should be rounded to GRANULE boundaries.
+
+   On s390x, typically use "crashkernel=xxM". The value of xx is dependent
+   on the memory consumption of the kdump system. In general this is not
+   dependent on the memory size of the production system.
+
+   On arm, the use of "crashkernel=Y@X" is no longer necessary; the
+   kernel will automatically locate the crash kernel image within the
+   first 512MB of RAM if X is not given.
+
+   On arm64, use "crashkernel=Y[@X]".  Note that the start address of
+   the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
+
+Load the Dump-capture Kernel
+============================
+
+After booting to the system kernel, dump-capture kernel needs to be
+loaded.
+
+Based on the architecture and type of image (relocatable or not), one
+can choose to load the uncompressed vmlinux or compressed bzImage/vmlinuz
+of dump-capture kernel. Following is the summary.
+
+For i386 and x86_64:
+
+	- Use vmlinux if kernel is not relocatable.
+	- Use bzImage/vmlinuz if kernel is relocatable.
+
+For ppc64:
+
+	- Use vmlinux
+
+For ia64:
+
+	- Use vmlinux or vmlinuz.gz
+
+For s390x:
+
+	- Use image or bzImage
+
+For arm:
+
+	- Use zImage
+
+For arm64:
+
+	- Use vmlinux or Image
+
+If you are using an uncompressed vmlinux image then use following command
+to load dump-capture kernel::
+
+   kexec -p <dump-capture-kernel-vmlinux-image> \
+   --initrd=<initrd-for-dump-capture-kernel> --args-linux \
+   --append="root=<root-dev> <arch-specific-options>"
+
+If you are using a compressed bzImage/vmlinuz, then use following command
+to load dump-capture kernel::
+
+   kexec -p <dump-capture-kernel-bzImage> \
+   --initrd=<initrd-for-dump-capture-kernel> \
+   --append="root=<root-dev> <arch-specific-options>"
+
+If you are using a compressed zImage, then use following command
+to load dump-capture kernel::
+
+   kexec --type zImage -p <dump-capture-kernel-bzImage> \
+   --initrd=<initrd-for-dump-capture-kernel> \
+   --dtb=<dtb-for-dump-capture-kernel> \
+   --append="root=<root-dev> <arch-specific-options>"
+
+If you are using an uncompressed Image, then use following command
+to load dump-capture kernel::
+
+   kexec -p <dump-capture-kernel-Image> \
+   --initrd=<initrd-for-dump-capture-kernel> \
+   --append="root=<root-dev> <arch-specific-options>"
+
+Please note, that --args-linux does not need to be specified for ia64.
+It is planned to make this a no-op on that architecture, but for now
+it should be omitted
+
+Following are the arch specific command line options to be used while
+loading dump-capture kernel.
+
+For i386, x86_64 and ia64:
+
+	"1 irqpoll maxcpus=1 reset_devices"
+
+For ppc64:
+
+	"1 maxcpus=1 noirqdistrib reset_devices"
+
+For s390x:
+
+	"1 maxcpus=1 cgroup_disable=memory"
+
+For arm:
+
+	"1 maxcpus=1 reset_devices"
+
+For arm64:
+
+	"1 maxcpus=1 reset_devices"
+
+Notes on loading the dump-capture kernel:
+
+* By default, the ELF headers are stored in ELF64 format to support
+  systems with more than 4GB memory. On i386, kexec automatically checks if
+  the physical RAM size exceeds the 4 GB limit and if not, uses ELF32.
+  So, on non-PAE systems, ELF32 is always used.
+
+  The --elf32-core-headers option can be used to force the generation of ELF32
+  headers. This is necessary because GDB currently cannot open vmcore files
+  with ELF64 headers on 32-bit systems.
+
+* The "irqpoll" boot parameter reduces driver initialization failures
+  due to shared interrupts in the dump-capture kernel.
+
+* You must specify <root-dev> in the format corresponding to the root
+  device name in the output of mount command.
+
+* Boot parameter "1" boots the dump-capture kernel into single-user
+  mode without networking. If you want networking, use "3".
+
+* We generally don't have to bring up a SMP kernel just to capture the
+  dump. Hence generally it is useful either to build a UP dump-capture
+  kernel or specify maxcpus=1 option while loading dump-capture kernel.
+  Note, though maxcpus always works, you had better replace it with
+  nr_cpus to save memory if supported by the current ARCH, such as x86.
+
+* You should enable multi-cpu support in dump-capture kernel if you intend
+  to use multi-thread programs with it, such as parallel dump feature of
+  makedumpfile. Otherwise, the multi-thread program may have a great
+  performance degradation. To enable multi-cpu support, you should bring up an
+  SMP dump-capture kernel and specify maxcpus/nr_cpus, disable_cpu_apicid=[X]
+  options while loading it.
+
+* For s390x there are two kdump modes: If a ELF header is specified with
+  the elfcorehdr= kernel parameter, it is used by the kdump kernel as it
+  is done on all other architectures. If no elfcorehdr= kernel parameter is
+  specified, the s390x kdump kernel dynamically creates the header. The
+  second mode has the advantage that for CPU and memory hotplug, kdump has
+  not to be reloaded with kexec_load().
+
+* For s390x systems with many attached devices the "cio_ignore" kernel
+  parameter should be used for the kdump kernel in order to prevent allocation
+  of kernel memory for devices that are not relevant for kdump. The same
+  applies to systems that use SCSI/FCP devices. In that case the
+  "allow_lun_scan" zfcp module parameter should be set to zero before
+  setting FCP devices online.
+
+Kernel Panic
+============
+
+After successfully loading the dump-capture kernel as previously
+described, the system will reboot into the dump-capture kernel if a
+system crash is triggered.  Trigger points are located in panic(),
+die(), die_nmi() and in the sysrq handler (ALT-SysRq-c).
+
+The following conditions will execute a crash trigger point:
+
+If a hard lockup is detected and "NMI watchdog" is configured, the system
+will boot into the dump-capture kernel ( die_nmi() ).
+
+If die() is called, and it happens to be a thread with pid 0 or 1, or die()
+is called inside interrupt context or die() is called and panic_on_oops is set,
+the system will boot into the dump-capture kernel.
+
+On powerpc systems when a soft-reset is generated, die() is called by all cpus
+and the system will boot into the dump-capture kernel.
+
+For testing purposes, you can trigger a crash by using "ALT-SysRq-c",
+"echo c > /proc/sysrq-trigger" or write a module to force the panic.
+
+Write Out the Dump File
+=======================
+
+After the dump-capture kernel is booted, write out the dump file with
+the following command::
+
+   cp /proc/vmcore <dump-file>
+
+
+Analysis
+========
+
+Before analyzing the dump image, you should reboot into a stable kernel.
+
+You can do limited analysis using GDB on the dump file copied out of
+/proc/vmcore. Use the debug vmlinux built with -g and run the following
+command::
+
+   gdb vmlinux <dump-file>
+
+Stack trace for the task on processor 0, register display, and memory
+display work fine.
+
+Note: GDB cannot analyze core files generated in ELF64 format for x86.
+On systems with a maximum of 4GB of memory, you can generate
+ELF32-format headers using the --elf32-core-headers kernel option on the
+dump kernel.
+
+You can also use the Crash utility to analyze dump files in Kdump
+format. Crash is available on Dave Anderson's site at the following URL:
+
+   http://people.redhat.com/~anderson/
+
+Trigger Kdump on WARN()
+=======================
+
+The kernel parameter, panic_on_warn, calls panic() in all WARN() paths.  This
+will cause a kdump to occur at the panic() call.  In cases where a user wants
+to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1
+to achieve the same behaviour.
+
+Contact
+=======
+
+- Vivek Goyal (vgoyal@redhat.com)
+- Maneesh Soni (maneesh@in.ibm.com)
+
+GDB macros
+==========
+
+.. include:: gdbmacros.txt
+   :literal:
diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
new file mode 100644
index 000000000000..007a6b86e0ee
--- /dev/null
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -0,0 +1,488 @@
+==========
+VMCOREINFO
+==========
+
+What is it?
+===========
+
+VMCOREINFO is a special ELF note section. It contains various
+information from the kernel like structure size, page size, symbol
+values, field offsets, etc. These data are packed into an ELF note
+section and used by user-space tools like crash and makedumpfile to
+analyze a kernel's memory layout.
+
+Common variables
+================
+
+init_uts_ns.name.release
+------------------------
+
+The version of the Linux kernel. Used to find the corresponding source
+code from which the kernel has been built. For example, crash uses it to
+find the corresponding vmlinux in order to process vmcore.
+
+PAGE_SIZE
+---------
+
+The size of a page. It is the smallest unit of data used by the memory
+management facilities. It is usually 4096 bytes of size and a page is
+aligned on 4096 bytes. Used for computing page addresses.
+
+init_uts_ns
+-----------
+
+The UTS namespace which is used to isolate two specific elements of the
+system that relate to the uname(2) system call. It is named after the
+data structure used to store information returned by the uname(2) system
+call.
+
+User-space tools can get the kernel name, host name, kernel release
+number, kernel version, architecture name and OS type from it.
+
+node_online_map
+---------------
+
+An array node_states[N_ONLINE] which represents the set of online nodes
+in a system, one bit position per node number. Used to keep track of
+which nodes are in the system and online.
+
+swapper_pg_dir
+--------------
+
+The global page directory pointer of the kernel. Used to translate
+virtual to physical addresses.
+
+_stext
+------
+
+Defines the beginning of the text section. In general, _stext indicates
+the kernel start address. Used to convert a virtual address from the
+direct kernel map to a physical address.
+
+vmap_area_list
+--------------
+
+Stores the virtual area list. makedumpfile gets the vmalloc start value
+from this variable and its value is necessary for vmalloc translation.
+
+mem_map
+-------
+
+Physical addresses are translated to struct pages by treating them as
+an index into the mem_map array. Right-shifting a physical address
+PAGE_SHIFT bits converts it into a page frame number which is an index
+into that mem_map array.
+
+Used to map an address to the corresponding struct page.
+
+contig_page_data
+----------------
+
+Makedumpfile gets the pglist_data structure from this symbol, which is
+used to describe the memory layout.
+
+User-space tools use this to exclude free pages when dumping memory.
+
+mem_section|(mem_section, NR_SECTION_ROOTS)|(mem_section, section_mem_map)
+--------------------------------------------------------------------------
+
+The address of the mem_section array, its length, structure size, and
+the section_mem_map offset.
+
+It exists in the sparse memory mapping model, and it is also somewhat
+similar to the mem_map variable, both of them are used to translate an
+address.
+
+page
+----
+
+The size of a page structure. struct page is an important data structure
+and it is widely used to compute contiguous memory.
+
+pglist_data
+-----------
+
+The size of a pglist_data structure. This value is used to check if the
+pglist_data structure is valid. It is also used for checking the memory
+type.
+
+zone
+----
+
+The size of a zone structure. This value is used to check if the zone
+structure has been found. It is also used for excluding free pages.
+
+free_area
+---------
+
+The size of a free_area structure. It indicates whether the free_area
+structure is valid or not. Useful when excluding free pages.
+
+list_head
+---------
+
+The size of a list_head structure. Used when iterating lists in a
+post-mortem analysis session.
+
+nodemask_t
+----------
+
+The size of a nodemask_t type. Used to compute the number of online
+nodes.
+
+(page, flags|_refcount|mapping|lru|_mapcount|private|compound_dtor|compound_order|compound_head)
+-------------------------------------------------------------------------------------------------
+
+User-space tools compute their values based on the offset of these
+variables. The variables are used when excluding unnecessary pages.
+
+(pglist_data, node_zones|nr_zones|node_mem_map|node_start_pfn|node_spanned_pages|node_id)
+-----------------------------------------------------------------------------------------
+
+On NUMA machines, each NUMA node has a pg_data_t to describe its memory
+layout. On UMA machines there is a single pglist_data which describes the
+whole memory.
+
+These values are used to check the memory type and to compute the
+virtual address for memory map.
+
+(zone, free_area|vm_stat|spanned_pages)
+---------------------------------------
+
+Each node is divided into a number of blocks called zones which
+represent ranges within memory. A zone is described by a structure zone.
+
+User-space tools compute required values based on the offset of these
+variables.
+
+(free_area, free_list)
+----------------------
+
+Offset of the free_list's member. This value is used to compute the number
+of free pages.
+
+Each zone has a free_area structure array called free_area[MAX_ORDER].
+The free_list represents a linked list of free page blocks.
+
+(list_head, next|prev)
+----------------------
+
+Offsets of the list_head's members. list_head is used to define a
+circular linked list. User-space tools need these in order to traverse
+lists.
+
+(vmap_area, va_start|list)
+--------------------------
+
+Offsets of the vmap_area's members. They carry vmalloc-specific
+information. Makedumpfile gets the start address of the vmalloc region
+from this.
+
+(zone.free_area, MAX_ORDER)
+---------------------------
+
+Free areas descriptor. User-space tools use this value to iterate the
+free_area ranges. MAX_ORDER is used by the zone buddy allocator.
+
+log_first_idx
+-------------
+
+Index of the first record stored in the buffer log_buf. Used by
+user-space tools to read the strings in the log_buf.
+
+log_buf
+-------
+
+Console output is written to the ring buffer log_buf at index
+log_first_idx. Used to get the kernel log.
+
+log_buf_len
+-----------
+
+log_buf's length.
+
+clear_idx
+---------
+
+The index that the next printk() record to read after the last clear
+command. It indicates the first record after the last SYSLOG_ACTION
+_CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump
+the dmesg log.
+
+log_next_idx
+------------
+
+The index of the next record to store in the buffer log_buf. Used to
+compute the index of the current buffer position.
+
+printk_log
+----------
+
+The size of a structure printk_log. Used to compute the size of
+messages, and extract dmesg log. It encapsulates header information for
+log_buf, such as timestamp, syslog level, etc.
+
+(printk_log, ts_nsec|len|text_len|dict_len)
+-------------------------------------------
+
+It represents field offsets in struct printk_log. User space tools
+parse it and check whether the values of printk_log's members have been
+changed.
+
+(free_area.free_list, MIGRATE_TYPES)
+------------------------------------
+
+The number of migrate types for pages. The free_list is described by the
+array. Used by tools to compute the number of free pages.
+
+NR_FREE_PAGES
+-------------
+
+On linux-2.6.21 or later, the number of free pages is in
+vm_stat[NR_FREE_PAGES]. Used to get the number of free pages.
+
+PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_slab|PG_hwpoision|PG_head_mask
+------------------------------------------------------------------------------
+
+Page attributes. These flags are used to filter various unnecessary for
+dumping pages.
+
+PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline)
+-----------------------------------------------------------------------------
+
+More page attributes. These flags are used to filter various unnecessary for
+dumping pages.
+
+
+HUGETLB_PAGE_DTOR
+-----------------
+
+The HUGETLB_PAGE_DTOR flag denotes hugetlbfs pages. Makedumpfile
+excludes these pages.
+
+x86_64
+======
+
+phys_base
+---------
+
+Used to convert the virtual address of an exported kernel symbol to its
+corresponding physical address.
+
+init_top_pgt
+------------
+
+Used to walk through the whole page table and convert virtual addresses
+to physical addresses. The init_top_pgt is somewhat similar to
+swapper_pg_dir, but it is only used in x86_64.
+
+pgtable_l5_enabled
+------------------
+
+User-space tools need to know whether the crash kernel was in 5-level
+paging mode.
+
+node_data
+---------
+
+This is a struct pglist_data array and stores all NUMA nodes
+information. Makedumpfile gets the pglist_data structure from it.
+
+(node_data, MAX_NUMNODES)
+-------------------------
+
+The maximum number of nodes in system.
+
+KERNELOFFSET
+------------
+
+The kernel randomization offset. Used to compute the page offset. If
+KASLR is disabled, this value is zero.
+
+KERNEL_IMAGE_SIZE
+-----------------
+
+Currently unused by Makedumpfile. Used to compute the module virtual
+address by Crash.
+
+sme_mask
+--------
+
+AMD-specific with SME support: it indicates the secure memory encryption
+mask. Makedumpfile tools need to know whether the crash kernel was
+encrypted. If SME is enabled in the first kernel, the crash kernel's
+page table entries (pgd/pud/pmd/pte) contain the memory encryption
+mask. This is used to remove the SME mask and obtain the true physical
+address.
+
+Currently, sme_mask stores the value of the C-bit position. If needed,
+additional SME-relevant info can be placed in that variable.
+
+For example::
+
+  [ misc	        ][ enc bit  ][ other misc SME info       ]
+  0000_0000_0000_0000_1000_0000_0000_0000_0000_0000_..._0000
+  63   59   55   51   47   43   39   35   31   27   ... 3
+
+x86_32
+======
+
+X86_PAE
+-------
+
+Denotes whether physical address extensions are enabled. It has the cost
+of a higher page table lookup overhead, and also consumes more page
+table space per process. Used to check whether PAE was enabled in the
+crash kernel when converting virtual addresses to physical addresses.
+
+ia64
+====
+
+pgdat_list|(pgdat_list, MAX_NUMNODES)
+-------------------------------------
+
+pg_data_t array storing all NUMA nodes information. MAX_NUMNODES
+indicates the number of the nodes.
+
+node_memblk|(node_memblk, NR_NODE_MEMBLKS)
+------------------------------------------
+
+List of node memory chunks. Filled when parsing the SRAT table to obtain
+information about memory nodes. NR_NODE_MEMBLKS indicates the number of
+node memory chunks.
+
+These values are used to compute the number of nodes the crashed kernel used.
+
+node_memblk_s|(node_memblk_s, start_paddr)|(node_memblk_s, size)
+----------------------------------------------------------------
+
+The size of a struct node_memblk_s and the offsets of the
+node_memblk_s's members. Used to compute the number of nodes.
+
+PGTABLE_3|PGTABLE_4
+-------------------
+
+User-space tools need to know whether the crash kernel was in 3-level or
+4-level paging mode. Used to distinguish the page table.
+
+ARM64
+=====
+
+VA_BITS
+-------
+
+The maximum number of bits for virtual addresses. Used to compute the
+virtual memory ranges.
+
+kimage_voffset
+--------------
+
+The offset between the kernel virtual and physical mappings. Used to
+translate virtual to physical addresses.
+
+PHYS_OFFSET
+-----------
+
+Indicates the physical address of the start of memory. Similar to
+kimage_voffset, which is used to translate virtual to physical
+addresses.
+
+KERNELOFFSET
+------------
+
+The kernel randomization offset. Used to compute the page offset. If
+KASLR is disabled, this value is zero.
+
+arm
+===
+
+ARM_LPAE
+--------
+
+It indicates whether the crash kernel supports large physical address
+extensions. Used to translate virtual to physical addresses.
+
+s390
+====
+
+lowcore_ptr
+-----------
+
+An array with a pointer to the lowcore of every CPU. Used to print the
+psw and all registers information.
+
+high_memory
+-----------
+
+Used to get the vmalloc_start address from the high_memory symbol.
+
+(lowcore_ptr, NR_CPUS)
+----------------------
+
+The maximum number of CPUs.
+
+powerpc
+=======
+
+
+node_data|(node_data, MAX_NUMNODES)
+-----------------------------------
+
+See above.
+
+contig_page_data
+----------------
+
+See above.
+
+vmemmap_list
+------------
+
+The vmemmap_list maintains the entire vmemmap physical mapping. Used
+to get vmemmap list count and populated vmemmap regions info. If the
+vmemmap address translation information is stored in the crash kernel,
+it is used to translate vmemmap kernel virtual addresses.
+
+mmu_vmemmap_psize
+-----------------
+
+The size of a page. Used to translate virtual to physical addresses.
+
+mmu_psize_defs
+--------------
+
+Page size definitions, i.e. 4k, 64k, or 16M.
+
+Used to make vtop translations.
+
+vmemmap_backing|(vmemmap_backing, list)|(vmemmap_backing, phys)|(vmemmap_backing, virt_addr)
+--------------------------------------------------------------------------------------------
+
+The vmemmap virtual address space management does not have a traditional
+page table to track which virtual struct pages are backed by a physical
+mapping. The virtual to physical mappings are tracked in a simple linked
+list format.
+
+User-space tools need to know the offset of list, phys and virt_addr
+when computing the count of vmemmap regions.
+
+mmu_psize_def|(mmu_psize_def, shift)
+------------------------------------
+
+The size of a struct mmu_psize_def and the offset of mmu_psize_def's
+member.
+
+Used in vtop translations.
+
+sh
+==
+
+node_data|(node_data, MAX_NUMNODES)
+-----------------------------------
+
+See above.
+
+X2TLB
+-----
+
+Indicates whether the crashed kernel enabled SH extended mode.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 4821175a3769..e645b3ab4b6f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -708,14 +708,14 @@
 			[KNL, x86_64] select a region under 4G first, and
 			fall back to reserve region above 4G when '@offset'
 			hasn't been specified.
-			See Documentation/kdump/kdump.rst for further details.
+			See Documentation/admin-guide/kdump/kdump.rst for further details.
 
 	crashkernel=range1:size1[,range2:size2,...][@offset]
 			[KNL] Same as above, but depends on the memory
 			in the running system. The syntax of range is
 			start-[end] where start and end are both
 			a memory unit (amount[KMG]). See also
-			Documentation/kdump/kdump.rst for an example.
+			Documentation/admin-guide/kdump/kdump.rst for an example.
 
 	crashkernel=size[KMG],high
 			[KNL, x86_64] range could be above 4G. Allow kernel
@@ -1207,7 +1207,7 @@
 			Specifies physical address of start of kernel core
 			image elf header and optionally the size. Generally
 			kexec loader will pass this option to capture kernel.
-			See Documentation/kdump/kdump.rst for details.
+			See Documentation/admin-guide/kdump/kdump.rst for details.
 
 	enable_mtrr_cleanup [X86]
 			The kernel tries to adjust MTRR layout from continuous
diff --git a/Documentation/kdump/gdbmacros.txt b/Documentation/kdump/gdbmacros.txt
deleted file mode 100644
index 220d0a80ca2c..000000000000
--- a/Documentation/kdump/gdbmacros.txt
+++ /dev/null
@@ -1,264 +0,0 @@
-#
-# This file contains a few gdb macros (user defined commands) to extract
-# useful information from kernel crashdump (kdump) like stack traces of
-# all the processes or a particular process and trapinfo.
-#
-# These macros can be used by copying this file in .gdbinit (put in home
-# directory or current directory) or by invoking gdb command with
-# --command=<command-file-name> option
-#
-# Credits:
-# Alexander Nyberg <alexn@telia.com>
-# V Srivatsa <vatsa@in.ibm.com>
-# Maneesh Soni <maneesh@in.ibm.com>
-#
-
-define bttnobp
-	set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
-	set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next)
-	set $init_t=&init_task
-	set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
-	set var $stacksize = sizeof(union thread_union)
-	while ($next_t != $init_t)
-		set $next_t=(struct task_struct *)$next_t
-		printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm
-		printf "===================\n"
-		set var $stackp = $next_t.thread.sp
-		set var $stack_top = ($stackp & ~($stacksize - 1)) + $stacksize
-
-		while ($stackp < $stack_top)
-			if (*($stackp) > _stext && *($stackp) < _sinittext)
-				info symbol *($stackp)
-			end
-			set $stackp += 4
-		end
-		set $next_th=(((char *)$next_t->thread_group.next) - $pid_off)
-		while ($next_th != $next_t)
-			set $next_th=(struct task_struct *)$next_th
-			printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm
-			printf "===================\n"
-			set var $stackp = $next_t.thread.sp
-			set var $stack_top = ($stackp & ~($stacksize - 1)) + stacksize
-
-			while ($stackp < $stack_top)
-				if (*($stackp) > _stext && *($stackp) < _sinittext)
-					info symbol *($stackp)
-				end
-				set $stackp += 4
-			end
-			set $next_th=(((char *)$next_th->thread_group.next) - $pid_off)
-		end
-		set $next_t=(char *)($next_t->tasks.next) - $tasks_off
-	end
-end
-document bttnobp
-	dump all thread stack traces on a kernel compiled with !CONFIG_FRAME_POINTER
-end
-
-define btthreadstack
-	set var $pid_task = $arg0
-
-	printf "\npid %d; comm %s:\n", $pid_task.pid, $pid_task.comm
-	printf "task struct: "
-	print $pid_task
-	printf "===================\n"
-	set var $stackp = $pid_task.thread.sp
-	set var $stacksize = sizeof(union thread_union)
-	set var $stack_top = ($stackp & ~($stacksize - 1)) + $stacksize
-	set var $stack_bot = ($stackp & ~($stacksize - 1))
-
-	set $stackp = *((unsigned long *) $stackp)
-	while (($stackp < $stack_top) && ($stackp > $stack_bot))
-		set var $addr = *(((unsigned long *) $stackp) + 1)
-		info symbol $addr
-		set $stackp = *((unsigned long *) $stackp)
-	end
-end
-document btthreadstack
-	 dump a thread stack using the given task structure pointer
-end
-
-
-define btt
-	set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
-	set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next)
-	set $init_t=&init_task
-	set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
-	while ($next_t != $init_t)
-		set $next_t=(struct task_struct *)$next_t
-		btthreadstack $next_t
-
-		set $next_th=(((char *)$next_t->thread_group.next) - $pid_off)
-		while ($next_th != $next_t)
-			set $next_th=(struct task_struct *)$next_th
-			btthreadstack $next_th
-			set $next_th=(((char *)$next_th->thread_group.next) - $pid_off)
-		end
-		set $next_t=(char *)($next_t->tasks.next) - $tasks_off
-	end
-end
-document btt
-	dump all thread stack traces on a kernel compiled with CONFIG_FRAME_POINTER
-end
-
-define btpid
-	set var $pid = $arg0
-	set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
-	set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next)
-	set $init_t=&init_task
-	set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
-	set var $pid_task = 0
-
-	while ($next_t != $init_t)
-		set $next_t=(struct task_struct *)$next_t
-
-		if ($next_t.pid == $pid)
-			set $pid_task = $next_t
-		end
-
-		set $next_th=(((char *)$next_t->thread_group.next) - $pid_off)
-		while ($next_th != $next_t)
-			set $next_th=(struct task_struct *)$next_th
-			if ($next_th.pid == $pid)
-				set $pid_task = $next_th
-			end
-			set $next_th=(((char *)$next_th->thread_group.next) - $pid_off)
-		end
-		set $next_t=(char *)($next_t->tasks.next) - $tasks_off
-	end
-
-	btthreadstack $pid_task
-end
-document btpid
-	backtrace of pid
-end
-
-
-define trapinfo
-	set var $pid = $arg0
-	set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
-	set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next)
-	set $init_t=&init_task
-	set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
-	set var $pid_task = 0
-
-	while ($next_t != $init_t)
-		set $next_t=(struct task_struct *)$next_t
-
-		if ($next_t.pid == $pid)
-			set $pid_task = $next_t
-		end
-
-		set $next_th=(((char *)$next_t->thread_group.next) - $pid_off)
-		while ($next_th != $next_t)
-			set $next_th=(struct task_struct *)$next_th
-			if ($next_th.pid == $pid)
-				set $pid_task = $next_th
-			end
-			set $next_th=(((char *)$next_th->thread_group.next) - $pid_off)
-		end
-		set $next_t=(char *)($next_t->tasks.next) - $tasks_off
-	end
-
-	printf "Trapno %ld, cr2 0x%lx, error_code %ld\n", $pid_task.thread.trap_no, \
-				$pid_task.thread.cr2, $pid_task.thread.error_code
-
-end
-document trapinfo
-	Run info threads and lookup pid of thread #1
-	'trapinfo <pid>' will tell you by which trap & possibly
-	address the kernel panicked.
-end
-
-define dump_log_idx
-	set $idx = $arg0
-	if ($argc > 1)
-		set $prev_flags = $arg1
-	else
-		set $prev_flags = 0
-	end
-	set $msg = ((struct printk_log *) (log_buf + $idx))
-	set $prefix = 1
-	set $newline = 1
-	set $log = log_buf + $idx + sizeof(*$msg)
-
-	# prev & LOG_CONT && !(msg->flags & LOG_PREIX)
-	if (($prev_flags & 8) && !($msg->flags & 4))
-		set $prefix = 0
-	end
-
-	# msg->flags & LOG_CONT
-	if ($msg->flags & 8)
-		# (prev & LOG_CONT && !(prev & LOG_NEWLINE))
-		if (($prev_flags & 8) && !($prev_flags & 2))
-			set $prefix = 0
-		end
-		# (!(msg->flags & LOG_NEWLINE))
-		if (!($msg->flags & 2))
-			set $newline = 0
-		end
-	end
-
-	if ($prefix)
-		printf "[%5lu.%06lu] ", $msg->ts_nsec / 1000000000, $msg->ts_nsec % 1000000000
-	end
-	if ($msg->text_len != 0)
-		eval "printf \"%%%d.%ds\", $log", $msg->text_len, $msg->text_len
-	end
-	if ($newline)
-		printf "\n"
-	end
-	if ($msg->dict_len > 0)
-		set $dict = $log + $msg->text_len
-		set $idx = 0
-		set $line = 1
-		while ($idx < $msg->dict_len)
-			if ($line)
-				printf " "
-				set $line = 0
-			end
-			set $c = $dict[$idx]
-			if ($c == '\0')
-				printf "\n"
-				set $line = 1
-			else
-				if ($c < ' ' || $c >= 127 || $c == '\\')
-					printf "\\x%02x", $c
-				else
-					printf "%c", $c
-				end
-			end
-			set $idx = $idx + 1
-		end
-		printf "\n"
-	end
-end
-document dump_log_idx
-	Dump a single log given its index in the log buffer.  The first
-	parameter is the index into log_buf, the second is optional and
-	specified the previous log buffer's flags, used for properly
-	formatting continued lines.
-end
-
-define dmesg
-	set $i = log_first_idx
-	set $end_idx = log_first_idx
-	set $prev_flags = 0
-
-	while (1)
-		set $msg = ((struct printk_log *) (log_buf + $i))
-		if ($msg->len == 0)
-			set $i = 0
-		else
-			dump_log_idx $i $prev_flags
-			set $i = $i + $msg->len
-			set $prev_flags = $msg->flags
-		end
-		if ($i == $end_idx)
-			loop_break
-		end
-	end
-end
-document dmesg
-	print the kernel ring buffer
-end
diff --git a/Documentation/kdump/index.rst b/Documentation/kdump/index.rst
deleted file mode 100644
index 2b17fcf6867a..000000000000
--- a/Documentation/kdump/index.rst
+++ /dev/null
@@ -1,21 +0,0 @@
-:orphan:
-
-================================================================
-Documentation for Kdump - The kexec-based Crash Dumping Solution
-================================================================
-
-This document includes overview, setup and installation, and analysis
-information.
-
-.. toctree::
-    :maxdepth: 1
-
-    kdump
-    vmcoreinfo
-
-.. only::  subproject and html
-
-   Indices
-   =======
-
-   * :ref:`genindex`
diff --git a/Documentation/kdump/kdump.rst b/Documentation/kdump/kdump.rst
deleted file mode 100644
index ac7e131d2935..000000000000
--- a/Documentation/kdump/kdump.rst
+++ /dev/null
@@ -1,534 +0,0 @@
-================================================================
-Documentation for Kdump - The kexec-based Crash Dumping Solution
-================================================================
-
-This document includes overview, setup and installation, and analysis
-information.
-
-Overview
-========
-
-Kdump uses kexec to quickly boot to a dump-capture kernel whenever a
-dump of the system kernel's memory needs to be taken (for example, when
-the system panics). The system kernel's memory image is preserved across
-the reboot and is accessible to the dump-capture kernel.
-
-You can use common commands, such as cp and scp, to copy the
-memory image to a dump file on the local disk, or across the network to
-a remote system.
-
-Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
-s390x, arm and arm64 architectures.
-
-When the system kernel boots, it reserves a small section of memory for
-the dump-capture kernel. This ensures that ongoing Direct Memory Access
-(DMA) from the system kernel does not corrupt the dump-capture kernel.
-The kexec -p command loads the dump-capture kernel into this reserved
-memory.
-
-On x86 machines, the first 640 KB of physical memory is needed to boot,
-regardless of where the kernel loads. Therefore, kexec backs up this
-region just before rebooting into the dump-capture kernel.
-
-Similarly on PPC64 machines first 32KB of physical memory is needed for
-booting regardless of where the kernel is loaded and to support 64K page
-size kexec backs up the first 64KB memory.
-
-For s390x, when kdump is triggered, the crashkernel region is exchanged
-with the region [0, crashkernel region size] and then the kdump kernel
-runs in [0, crashkernel region size]. Therefore no relocatable kernel is
-needed for s390x.
-
-All of the necessary information about the system kernel's core image is
-encoded in the ELF format, and stored in a reserved area of memory
-before a crash. The physical address of the start of the ELF header is
-passed to the dump-capture kernel through the elfcorehdr= boot
-parameter. Optionally the size of the ELF header can also be passed
-when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax.
-
-
-With the dump-capture kernel, you can access the memory image through
-/proc/vmcore. This exports the dump as an ELF-format file that you can
-write out using file copy commands such as cp or scp. Further, you can
-use analysis tools such as the GNU Debugger (GDB) and the Crash tool to
-debug the dump file. This method ensures that the dump pages are correctly
-ordered.
-
-
-Setup and Installation
-======================
-
-Install kexec-tools
--------------------
-
-1) Login as the root user.
-
-2) Download the kexec-tools user-space package from the following URL:
-
-http://kernel.org/pub/linux/utils/kernel/kexec/kexec-tools.tar.gz
-
-This is a symlink to the latest version.
-
-The latest kexec-tools git tree is available at:
-
-- git://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git
-- http://www.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git
-
-There is also a gitweb interface available at
-http://www.kernel.org/git/?p=utils/kernel/kexec/kexec-tools.git
-
-More information about kexec-tools can be found at
-http://horms.net/projects/kexec/
-
-3) Unpack the tarball with the tar command, as follows::
-
-	tar xvpzf kexec-tools.tar.gz
-
-4) Change to the kexec-tools directory, as follows::
-
-	cd kexec-tools-VERSION
-
-5) Configure the package, as follows::
-
-	./configure
-
-6) Compile the package, as follows::
-
-	make
-
-7) Install the package, as follows::
-
-	make install
-
-
-Build the system and dump-capture kernels
------------------------------------------
-There are two possible methods of using Kdump.
-
-1) Build a separate custom dump-capture kernel for capturing the
-   kernel core dump.
-
-2) Or use the system kernel binary itself as dump-capture kernel and there is
-   no need to build a separate dump-capture kernel. This is possible
-   only with the architectures which support a relocatable kernel. As
-   of today, i386, x86_64, ppc64, ia64, arm and arm64 architectures support
-   relocatable kernel.
-
-Building a relocatable kernel is advantageous from the point of view that
-one does not have to build a second kernel for capturing the dump. But
-at the same time one might want to build a custom dump capture kernel
-suitable to his needs.
-
-Following are the configuration setting required for system and
-dump-capture kernels for enabling kdump support.
-
-System kernel config options
-----------------------------
-
-1) Enable "kexec system call" in "Processor type and features."::
-
-	CONFIG_KEXEC=y
-
-2) Enable "sysfs file system support" in "Filesystem" -> "Pseudo
-   filesystems." This is usually enabled by default::
-
-	CONFIG_SYSFS=y
-
-   Note that "sysfs file system support" might not appear in the "Pseudo
-   filesystems" menu if "Configure standard kernel features (for small
-   systems)" is not enabled in "General Setup." In this case, check the
-   .config file itself to ensure that sysfs is turned on, as follows::
-
-	grep 'CONFIG_SYSFS' .config
-
-3) Enable "Compile the kernel with debug info" in "Kernel hacking."::
-
-	CONFIG_DEBUG_INFO=Y
-
-   This causes the kernel to be built with debug symbols. The dump
-   analysis tools require a vmlinux with debug symbols in order to read
-   and analyze a dump file.
-
-Dump-capture kernel config options (Arch Independent)
------------------------------------------------------
-
-1) Enable "kernel crash dumps" support under "Processor type and
-   features"::
-
-	CONFIG_CRASH_DUMP=y
-
-2) Enable "/proc/vmcore support" under "Filesystems" -> "Pseudo filesystems"::
-
-	CONFIG_PROC_VMCORE=y
-
-   (CONFIG_PROC_VMCORE is set by default when CONFIG_CRASH_DUMP is selected.)
-
-Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
---------------------------------------------------------------------
-
-1) On i386, enable high memory support under "Processor type and
-   features"::
-
-	CONFIG_HIGHMEM64G=y
-
-   or::
-
-	CONFIG_HIGHMEM4G
-
-2) On i386 and x86_64, disable symmetric multi-processing support
-   under "Processor type and features"::
-
-	CONFIG_SMP=n
-
-   (If CONFIG_SMP=y, then specify maxcpus=1 on the kernel command line
-   when loading the dump-capture kernel, see section "Load the Dump-capture
-   Kernel".)
-
-3) If one wants to build and use a relocatable kernel,
-   Enable "Build a relocatable kernel" support under "Processor type and
-   features"::
-
-	CONFIG_RELOCATABLE=y
-
-4) Use a suitable value for "Physical address where the kernel is
-   loaded" (under "Processor type and features"). This only appears when
-   "kernel crash dumps" is enabled. A suitable value depends upon
-   whether kernel is relocatable or not.
-
-   If you are using a relocatable kernel use CONFIG_PHYSICAL_START=0x100000
-   This will compile the kernel for physical address 1MB, but given the fact
-   kernel is relocatable, it can be run from any physical address hence
-   kexec boot loader will load it in memory region reserved for dump-capture
-   kernel.
-
-   Otherwise it should be the start of memory region reserved for
-   second kernel using boot parameter "crashkernel=Y@X". Here X is
-   start of memory region reserved for dump-capture kernel.
-   Generally X is 16MB (0x1000000). So you can set
-   CONFIG_PHYSICAL_START=0x1000000
-
-5) Make and install the kernel and its modules. DO NOT add this kernel
-   to the boot loader configuration files.
-
-Dump-capture kernel config options (Arch Dependent, ppc64)
-----------------------------------------------------------
-
-1) Enable "Build a kdump crash kernel" support under "Kernel" options::
-
-	CONFIG_CRASH_DUMP=y
-
-2)   Enable "Build a relocatable kernel" support::
-
-	CONFIG_RELOCATABLE=y
-
-   Make and install the kernel and its modules.
-
-Dump-capture kernel config options (Arch Dependent, ia64)
-----------------------------------------------------------
-
-- No specific options are required to create a dump-capture kernel
-  for ia64, other than those specified in the arch independent section
-  above. This means that it is possible to use the system kernel
-  as a dump-capture kernel if desired.
-
-  The crashkernel region can be automatically placed by the system
-  kernel at run time. This is done by specifying the base address as 0,
-  or omitting it all together::
-
-	crashkernel=256M@0
-
-  or::
-
-	crashkernel=256M
-
-  If the start address is specified, note that the start address of the
-  kernel will be aligned to 64Mb, so if the start address is not then
-  any space below the alignment point will be wasted.
-
-Dump-capture kernel config options (Arch Dependent, arm)
-----------------------------------------------------------
-
--   To use a relocatable kernel,
-    Enable "AUTO_ZRELADDR" support under "Boot" options::
-
-	AUTO_ZRELADDR=y
-
-Dump-capture kernel config options (Arch Dependent, arm64)
-----------------------------------------------------------
-
-- Please note that kvm of the dump-capture kernel will not be enabled
-  on non-VHE systems even if it is configured. This is because the CPU
-  will not be reset to EL2 on panic.
-
-Extended crashkernel syntax
-===========================
-
-While the "crashkernel=size[@offset]" syntax is sufficient for most
-configurations, sometimes it's handy to have the reserved memory dependent
-on the value of System RAM -- that's mostly for distributors that pre-setup
-the kernel command line to avoid a unbootable system after some memory has
-been removed from the machine.
-
-The syntax is::
-
-    crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
-    range=start-[end]
-
-For example::
-
-    crashkernel=512M-2G:64M,2G-:128M
-
-This would mean:
-
-    1) if the RAM is smaller than 512M, then don't reserve anything
-       (this is the "rescue" case)
-    2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M
-    3) if the RAM size is larger than 2G, then reserve 128M
-
-
-
-Boot into System Kernel
-=======================
-
-1) Update the boot loader (such as grub, yaboot, or lilo) configuration
-   files as necessary.
-
-2) Boot the system kernel with the boot parameter "crashkernel=Y@X",
-   where Y specifies how much memory to reserve for the dump-capture kernel
-   and X specifies the beginning of this reserved memory. For example,
-   "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory
-   starting at physical address 0x01000000 (16MB) for the dump-capture kernel.
-
-   On x86 and x86_64, use "crashkernel=64M@16M".
-
-   On ppc64, use "crashkernel=128M@32M".
-
-   On ia64, 256M@256M is a generous value that typically works.
-   The region may be automatically placed on ia64, see the
-   dump-capture kernel config option notes above.
-   If use sparse memory, the size should be rounded to GRANULE boundaries.
-
-   On s390x, typically use "crashkernel=xxM". The value of xx is dependent
-   on the memory consumption of the kdump system. In general this is not
-   dependent on the memory size of the production system.
-
-   On arm, the use of "crashkernel=Y@X" is no longer necessary; the
-   kernel will automatically locate the crash kernel image within the
-   first 512MB of RAM if X is not given.
-
-   On arm64, use "crashkernel=Y[@X]".  Note that the start address of
-   the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
-
-Load the Dump-capture Kernel
-============================
-
-After booting to the system kernel, dump-capture kernel needs to be
-loaded.
-
-Based on the architecture and type of image (relocatable or not), one
-can choose to load the uncompressed vmlinux or compressed bzImage/vmlinuz
-of dump-capture kernel. Following is the summary.
-
-For i386 and x86_64:
-
-	- Use vmlinux if kernel is not relocatable.
-	- Use bzImage/vmlinuz if kernel is relocatable.
-
-For ppc64:
-
-	- Use vmlinux
-
-For ia64:
-
-	- Use vmlinux or vmlinuz.gz
-
-For s390x:
-
-	- Use image or bzImage
-
-For arm:
-
-	- Use zImage
-
-For arm64:
-
-	- Use vmlinux or Image
-
-If you are using an uncompressed vmlinux image then use following command
-to load dump-capture kernel::
-
-   kexec -p <dump-capture-kernel-vmlinux-image> \
-   --initrd=<initrd-for-dump-capture-kernel> --args-linux \
-   --append="root=<root-dev> <arch-specific-options>"
-
-If you are using a compressed bzImage/vmlinuz, then use following command
-to load dump-capture kernel::
-
-   kexec -p <dump-capture-kernel-bzImage> \
-   --initrd=<initrd-for-dump-capture-kernel> \
-   --append="root=<root-dev> <arch-specific-options>"
-
-If you are using a compressed zImage, then use following command
-to load dump-capture kernel::
-
-   kexec --type zImage -p <dump-capture-kernel-bzImage> \
-   --initrd=<initrd-for-dump-capture-kernel> \
-   --dtb=<dtb-for-dump-capture-kernel> \
-   --append="root=<root-dev> <arch-specific-options>"
-
-If you are using an uncompressed Image, then use following command
-to load dump-capture kernel::
-
-   kexec -p <dump-capture-kernel-Image> \
-   --initrd=<initrd-for-dump-capture-kernel> \
-   --append="root=<root-dev> <arch-specific-options>"
-
-Please note, that --args-linux does not need to be specified for ia64.
-It is planned to make this a no-op on that architecture, but for now
-it should be omitted
-
-Following are the arch specific command line options to be used while
-loading dump-capture kernel.
-
-For i386, x86_64 and ia64:
-
-	"1 irqpoll maxcpus=1 reset_devices"
-
-For ppc64:
-
-	"1 maxcpus=1 noirqdistrib reset_devices"
-
-For s390x:
-
-	"1 maxcpus=1 cgroup_disable=memory"
-
-For arm:
-
-	"1 maxcpus=1 reset_devices"
-
-For arm64:
-
-	"1 maxcpus=1 reset_devices"
-
-Notes on loading the dump-capture kernel:
-
-* By default, the ELF headers are stored in ELF64 format to support
-  systems with more than 4GB memory. On i386, kexec automatically checks if
-  the physical RAM size exceeds the 4 GB limit and if not, uses ELF32.
-  So, on non-PAE systems, ELF32 is always used.
-
-  The --elf32-core-headers option can be used to force the generation of ELF32
-  headers. This is necessary because GDB currently cannot open vmcore files
-  with ELF64 headers on 32-bit systems.
-
-* The "irqpoll" boot parameter reduces driver initialization failures
-  due to shared interrupts in the dump-capture kernel.
-
-* You must specify <root-dev> in the format corresponding to the root
-  device name in the output of mount command.
-
-* Boot parameter "1" boots the dump-capture kernel into single-user
-  mode without networking. If you want networking, use "3".
-
-* We generally don't have to bring up a SMP kernel just to capture the
-  dump. Hence generally it is useful either to build a UP dump-capture
-  kernel or specify maxcpus=1 option while loading dump-capture kernel.
-  Note, though maxcpus always works, you had better replace it with
-  nr_cpus to save memory if supported by the current ARCH, such as x86.
-
-* You should enable multi-cpu support in dump-capture kernel if you intend
-  to use multi-thread programs with it, such as parallel dump feature of
-  makedumpfile. Otherwise, the multi-thread program may have a great
-  performance degradation. To enable multi-cpu support, you should bring up an
-  SMP dump-capture kernel and specify maxcpus/nr_cpus, disable_cpu_apicid=[X]
-  options while loading it.
-
-* For s390x there are two kdump modes: If a ELF header is specified with
-  the elfcorehdr= kernel parameter, it is used by the kdump kernel as it
-  is done on all other architectures. If no elfcorehdr= kernel parameter is
-  specified, the s390x kdump kernel dynamically creates the header. The
-  second mode has the advantage that for CPU and memory hotplug, kdump has
-  not to be reloaded with kexec_load().
-
-* For s390x systems with many attached devices the "cio_ignore" kernel
-  parameter should be used for the kdump kernel in order to prevent allocation
-  of kernel memory for devices that are not relevant for kdump. The same
-  applies to systems that use SCSI/FCP devices. In that case the
-  "allow_lun_scan" zfcp module parameter should be set to zero before
-  setting FCP devices online.
-
-Kernel Panic
-============
-
-After successfully loading the dump-capture kernel as previously
-described, the system will reboot into the dump-capture kernel if a
-system crash is triggered.  Trigger points are located in panic(),
-die(), die_nmi() and in the sysrq handler (ALT-SysRq-c).
-
-The following conditions will execute a crash trigger point:
-
-If a hard lockup is detected and "NMI watchdog" is configured, the system
-will boot into the dump-capture kernel ( die_nmi() ).
-
-If die() is called, and it happens to be a thread with pid 0 or 1, or die()
-is called inside interrupt context or die() is called and panic_on_oops is set,
-the system will boot into the dump-capture kernel.
-
-On powerpc systems when a soft-reset is generated, die() is called by all cpus
-and the system will boot into the dump-capture kernel.
-
-For testing purposes, you can trigger a crash by using "ALT-SysRq-c",
-"echo c > /proc/sysrq-trigger" or write a module to force the panic.
-
-Write Out the Dump File
-=======================
-
-After the dump-capture kernel is booted, write out the dump file with
-the following command::
-
-   cp /proc/vmcore <dump-file>
-
-
-Analysis
-========
-
-Before analyzing the dump image, you should reboot into a stable kernel.
-
-You can do limited analysis using GDB on the dump file copied out of
-/proc/vmcore. Use the debug vmlinux built with -g and run the following
-command::
-
-   gdb vmlinux <dump-file>
-
-Stack trace for the task on processor 0, register display, and memory
-display work fine.
-
-Note: GDB cannot analyze core files generated in ELF64 format for x86.
-On systems with a maximum of 4GB of memory, you can generate
-ELF32-format headers using the --elf32-core-headers kernel option on the
-dump kernel.
-
-You can also use the Crash utility to analyze dump files in Kdump
-format. Crash is available on Dave Anderson's site at the following URL:
-
-   http://people.redhat.com/~anderson/
-
-Trigger Kdump on WARN()
-=======================
-
-The kernel parameter, panic_on_warn, calls panic() in all WARN() paths.  This
-will cause a kdump to occur at the panic() call.  In cases where a user wants
-to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1
-to achieve the same behaviour.
-
-Contact
-=======
-
-- Vivek Goyal (vgoyal@redhat.com)
-- Maneesh Soni (maneesh@in.ibm.com)
-
-GDB macros
-==========
-
-.. include:: gdbmacros.txt
-   :literal:
diff --git a/Documentation/kdump/vmcoreinfo.rst b/Documentation/kdump/vmcoreinfo.rst
deleted file mode 100644
index 007a6b86e0ee..000000000000
--- a/Documentation/kdump/vmcoreinfo.rst
+++ /dev/null
@@ -1,488 +0,0 @@
-==========
-VMCOREINFO
-==========
-
-What is it?
-===========
-
-VMCOREINFO is a special ELF note section. It contains various
-information from the kernel like structure size, page size, symbol
-values, field offsets, etc. These data are packed into an ELF note
-section and used by user-space tools like crash and makedumpfile to
-analyze a kernel's memory layout.
-
-Common variables
-================
-
-init_uts_ns.name.release
-------------------------
-
-The version of the Linux kernel. Used to find the corresponding source
-code from which the kernel has been built. For example, crash uses it to
-find the corresponding vmlinux in order to process vmcore.
-
-PAGE_SIZE
----------
-
-The size of a page. It is the smallest unit of data used by the memory
-management facilities. It is usually 4096 bytes of size and a page is
-aligned on 4096 bytes. Used for computing page addresses.
-
-init_uts_ns
------------
-
-The UTS namespace which is used to isolate two specific elements of the
-system that relate to the uname(2) system call. It is named after the
-data structure used to store information returned by the uname(2) system
-call.
-
-User-space tools can get the kernel name, host name, kernel release
-number, kernel version, architecture name and OS type from it.
-
-node_online_map
----------------
-
-An array node_states[N_ONLINE] which represents the set of online nodes
-in a system, one bit position per node number. Used to keep track of
-which nodes are in the system and online.
-
-swapper_pg_dir
---------------
-
-The global page directory pointer of the kernel. Used to translate
-virtual to physical addresses.
-
-_stext
-------
-
-Defines the beginning of the text section. In general, _stext indicates
-the kernel start address. Used to convert a virtual address from the
-direct kernel map to a physical address.
-
-vmap_area_list
---------------
-
-Stores the virtual area list. makedumpfile gets the vmalloc start value
-from this variable and its value is necessary for vmalloc translation.
-
-mem_map
--------
-
-Physical addresses are translated to struct pages by treating them as
-an index into the mem_map array. Right-shifting a physical address
-PAGE_SHIFT bits converts it into a page frame number which is an index
-into that mem_map array.
-
-Used to map an address to the corresponding struct page.
-
-contig_page_data
-----------------
-
-Makedumpfile gets the pglist_data structure from this symbol, which is
-used to describe the memory layout.
-
-User-space tools use this to exclude free pages when dumping memory.
-
-mem_section|(mem_section, NR_SECTION_ROOTS)|(mem_section, section_mem_map)
---------------------------------------------------------------------------
-
-The address of the mem_section array, its length, structure size, and
-the section_mem_map offset.
-
-It exists in the sparse memory mapping model, and it is also somewhat
-similar to the mem_map variable, both of them are used to translate an
-address.
-
-page
-----
-
-The size of a page structure. struct page is an important data structure
-and it is widely used to compute contiguous memory.
-
-pglist_data
------------
-
-The size of a pglist_data structure. This value is used to check if the
-pglist_data structure is valid. It is also used for checking the memory
-type.
-
-zone
-----
-
-The size of a zone structure. This value is used to check if the zone
-structure has been found. It is also used for excluding free pages.
-
-free_area
----------
-
-The size of a free_area structure. It indicates whether the free_area
-structure is valid or not. Useful when excluding free pages.
-
-list_head
----------
-
-The size of a list_head structure. Used when iterating lists in a
-post-mortem analysis session.
-
-nodemask_t
-----------
-
-The size of a nodemask_t type. Used to compute the number of online
-nodes.
-
-(page, flags|_refcount|mapping|lru|_mapcount|private|compound_dtor|compound_order|compound_head)
--------------------------------------------------------------------------------------------------
-
-User-space tools compute their values based on the offset of these
-variables. The variables are used when excluding unnecessary pages.
-
-(pglist_data, node_zones|nr_zones|node_mem_map|node_start_pfn|node_spanned_pages|node_id)
------------------------------------------------------------------------------------------
-
-On NUMA machines, each NUMA node has a pg_data_t to describe its memory
-layout. On UMA machines there is a single pglist_data which describes the
-whole memory.
-
-These values are used to check the memory type and to compute the
-virtual address for memory map.
-
-(zone, free_area|vm_stat|spanned_pages)
----------------------------------------
-
-Each node is divided into a number of blocks called zones which
-represent ranges within memory. A zone is described by a structure zone.
-
-User-space tools compute required values based on the offset of these
-variables.
-
-(free_area, free_list)
-----------------------
-
-Offset of the free_list's member. This value is used to compute the number
-of free pages.
-
-Each zone has a free_area structure array called free_area[MAX_ORDER].
-The free_list represents a linked list of free page blocks.
-
-(list_head, next|prev)
-----------------------
-
-Offsets of the list_head's members. list_head is used to define a
-circular linked list. User-space tools need these in order to traverse
-lists.
-
-(vmap_area, va_start|list)
---------------------------
-
-Offsets of the vmap_area's members. They carry vmalloc-specific
-information. Makedumpfile gets the start address of the vmalloc region
-from this.
-
-(zone.free_area, MAX_ORDER)
----------------------------
-
-Free areas descriptor. User-space tools use this value to iterate the
-free_area ranges. MAX_ORDER is used by the zone buddy allocator.
-
-log_first_idx
--------------
-
-Index of the first record stored in the buffer log_buf. Used by
-user-space tools to read the strings in the log_buf.
-
-log_buf
--------
-
-Console output is written to the ring buffer log_buf at index
-log_first_idx. Used to get the kernel log.
-
-log_buf_len
------------
-
-log_buf's length.
-
-clear_idx
----------
-
-The index that the next printk() record to read after the last clear
-command. It indicates the first record after the last SYSLOG_ACTION
-_CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump
-the dmesg log.
-
-log_next_idx
-------------
-
-The index of the next record to store in the buffer log_buf. Used to
-compute the index of the current buffer position.
-
-printk_log
-----------
-
-The size of a structure printk_log. Used to compute the size of
-messages, and extract dmesg log. It encapsulates header information for
-log_buf, such as timestamp, syslog level, etc.
-
-(printk_log, ts_nsec|len|text_len|dict_len)
--------------------------------------------
-
-It represents field offsets in struct printk_log. User space tools
-parse it and check whether the values of printk_log's members have been
-changed.
-
-(free_area.free_list, MIGRATE_TYPES)
-------------------------------------
-
-The number of migrate types for pages. The free_list is described by the
-array. Used by tools to compute the number of free pages.
-
-NR_FREE_PAGES
--------------
-
-On linux-2.6.21 or later, the number of free pages is in
-vm_stat[NR_FREE_PAGES]. Used to get the number of free pages.
-
-PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_slab|PG_hwpoision|PG_head_mask
-------------------------------------------------------------------------------
-
-Page attributes. These flags are used to filter various unnecessary for
-dumping pages.
-
-PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline)
------------------------------------------------------------------------------
-
-More page attributes. These flags are used to filter various unnecessary for
-dumping pages.
-
-
-HUGETLB_PAGE_DTOR
------------------
-
-The HUGETLB_PAGE_DTOR flag denotes hugetlbfs pages. Makedumpfile
-excludes these pages.
-
-x86_64
-======
-
-phys_base
----------
-
-Used to convert the virtual address of an exported kernel symbol to its
-corresponding physical address.
-
-init_top_pgt
-------------
-
-Used to walk through the whole page table and convert virtual addresses
-to physical addresses. The init_top_pgt is somewhat similar to
-swapper_pg_dir, but it is only used in x86_64.
-
-pgtable_l5_enabled
-------------------
-
-User-space tools need to know whether the crash kernel was in 5-level
-paging mode.
-
-node_data
----------
-
-This is a struct pglist_data array and stores all NUMA nodes
-information. Makedumpfile gets the pglist_data structure from it.
-
-(node_data, MAX_NUMNODES)
--------------------------
-
-The maximum number of nodes in system.
-
-KERNELOFFSET
-------------
-
-The kernel randomization offset. Used to compute the page offset. If
-KASLR is disabled, this value is zero.
-
-KERNEL_IMAGE_SIZE
------------------
-
-Currently unused by Makedumpfile. Used to compute the module virtual
-address by Crash.
-
-sme_mask
---------
-
-AMD-specific with SME support: it indicates the secure memory encryption
-mask. Makedumpfile tools need to know whether the crash kernel was
-encrypted. If SME is enabled in the first kernel, the crash kernel's
-page table entries (pgd/pud/pmd/pte) contain the memory encryption
-mask. This is used to remove the SME mask and obtain the true physical
-address.
-
-Currently, sme_mask stores the value of the C-bit position. If needed,
-additional SME-relevant info can be placed in that variable.
-
-For example::
-
-  [ misc	        ][ enc bit  ][ other misc SME info       ]
-  0000_0000_0000_0000_1000_0000_0000_0000_0000_0000_..._0000
-  63   59   55   51   47   43   39   35   31   27   ... 3
-
-x86_32
-======
-
-X86_PAE
--------
-
-Denotes whether physical address extensions are enabled. It has the cost
-of a higher page table lookup overhead, and also consumes more page
-table space per process. Used to check whether PAE was enabled in the
-crash kernel when converting virtual addresses to physical addresses.
-
-ia64
-====
-
-pgdat_list|(pgdat_list, MAX_NUMNODES)
--------------------------------------
-
-pg_data_t array storing all NUMA nodes information. MAX_NUMNODES
-indicates the number of the nodes.
-
-node_memblk|(node_memblk, NR_NODE_MEMBLKS)
-------------------------------------------
-
-List of node memory chunks. Filled when parsing the SRAT table to obtain
-information about memory nodes. NR_NODE_MEMBLKS indicates the number of
-node memory chunks.
-
-These values are used to compute the number of nodes the crashed kernel used.
-
-node_memblk_s|(node_memblk_s, start_paddr)|(node_memblk_s, size)
-----------------------------------------------------------------
-
-The size of a struct node_memblk_s and the offsets of the
-node_memblk_s's members. Used to compute the number of nodes.
-
-PGTABLE_3|PGTABLE_4
--------------------
-
-User-space tools need to know whether the crash kernel was in 3-level or
-4-level paging mode. Used to distinguish the page table.
-
-ARM64
-=====
-
-VA_BITS
--------
-
-The maximum number of bits for virtual addresses. Used to compute the
-virtual memory ranges.
-
-kimage_voffset
---------------
-
-The offset between the kernel virtual and physical mappings. Used to
-translate virtual to physical addresses.
-
-PHYS_OFFSET
------------
-
-Indicates the physical address of the start of memory. Similar to
-kimage_voffset, which is used to translate virtual to physical
-addresses.
-
-KERNELOFFSET
-------------
-
-The kernel randomization offset. Used to compute the page offset. If
-KASLR is disabled, this value is zero.
-
-arm
-===
-
-ARM_LPAE
---------
-
-It indicates whether the crash kernel supports large physical address
-extensions. Used to translate virtual to physical addresses.
-
-s390
-====
-
-lowcore_ptr
------------
-
-An array with a pointer to the lowcore of every CPU. Used to print the
-psw and all registers information.
-
-high_memory
------------
-
-Used to get the vmalloc_start address from the high_memory symbol.
-
-(lowcore_ptr, NR_CPUS)
-----------------------
-
-The maximum number of CPUs.
-
-powerpc
-=======
-
-
-node_data|(node_data, MAX_NUMNODES)
------------------------------------
-
-See above.
-
-contig_page_data
-----------------
-
-See above.
-
-vmemmap_list
-------------
-
-The vmemmap_list maintains the entire vmemmap physical mapping. Used
-to get vmemmap list count and populated vmemmap regions info. If the
-vmemmap address translation information is stored in the crash kernel,
-it is used to translate vmemmap kernel virtual addresses.
-
-mmu_vmemmap_psize
------------------
-
-The size of a page. Used to translate virtual to physical addresses.
-
-mmu_psize_defs
---------------
-
-Page size definitions, i.e. 4k, 64k, or 16M.
-
-Used to make vtop translations.
-
-vmemmap_backing|(vmemmap_backing, list)|(vmemmap_backing, phys)|(vmemmap_backing, virt_addr)
---------------------------------------------------------------------------------------------
-
-The vmemmap virtual address space management does not have a traditional
-page table to track which virtual struct pages are backed by a physical
-mapping. The virtual to physical mappings are tracked in a simple linked
-list format.
-
-User-space tools need to know the offset of list, phys and virt_addr
-when computing the count of vmemmap regions.
-
-mmu_psize_def|(mmu_psize_def, shift)
-------------------------------------
-
-The size of a struct mmu_psize_def and the offset of mmu_psize_def's
-member.
-
-Used in vtop translations.
-
-sh
-==
-
-node_data|(node_data, MAX_NUMNODES)
------------------------------------
-
-See above.
-
-X2TLB
------
-
-Indicates whether the crashed kernel enabled SH extended mode.
diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.txt
index 0c41d6d463f3..10e7f4d16c14 100644
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -59,7 +59,7 @@ as follows:
          the default calculated size. Use this option if default
          boot memory size is not sufficient for second kernel to
          boot successfully. For syntax of crashkernel= parameter,
-         refer to Documentation/kdump/kdump.rst. If any offset is
+         refer to Documentation/admin-guide/kdump/kdump.rst. If any offset is
          provided in crashkernel= parameter, it will be ignored
          as fadump uses a predefined offset to reserve memory
          for boot memory dump preservation in case of a crash.
diff --git a/Documentation/translations/zh_CN/oops-tracing.txt b/Documentation/translations/zh_CN/oops-tracing.txt
index 368ddd05b304..c5f3bda7abcb 100644
--- a/Documentation/translations/zh_CN/oops-tracing.txt
+++ b/Documentation/translations/zh_CN/oops-tracing.txt
@@ -53,8 +53,8 @@ cat /proc/kmsg > file， 然而你必须介入中止传输， kmsg是一个“
 （2）用串口终端启动（请参看Documentation/admin-guide/serial-console.rst），运行一个null
 modem到另一台机器并用你喜欢的通讯工具获取输出。Minicom工作地很好。
 
-（3）使用Kdump（请参看Documentation/kdump/kdump.rst），
-使用在Documentation/kdump/gdbmacros.txt中定义的dmesg gdb宏，从旧的内存中提取内核
+（3）使用Kdump（请参看Documentation/admin-guide/kdump/kdump.rst），
+使用在Documentation/admin-guide/kdump/gdbmacros.txt中定义的dmesg gdb宏，从旧的内存中提取内核
 环形缓冲区。
 
 完整信息
-- 
cgit 


From e7751617dd0599ceadf4221cb08e04307b00aa1f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 11:47:10 -0300
Subject: docs: blockdev: add it to the admin-guide

The blockdev book basically contains user-faced documentation.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../blockdev/drbd/DRBD-8.3-data-packets.svg        | 588 +++++++++++++++++++++
 .../blockdev/drbd/DRBD-data-packets.svg            | 459 ++++++++++++++++
 .../admin-guide/blockdev/drbd/conn-states-8.dot    |  18 +
 .../blockdev/drbd/data-structure-v9.rst            |  42 ++
 .../admin-guide/blockdev/drbd/disk-states-8.dot    |  16 +
 .../drbd/drbd-connection-state-overview.dot        |  85 +++
 .../admin-guide/blockdev/drbd/figures.rst          |  28 +
 Documentation/admin-guide/blockdev/drbd/index.rst  |  19 +
 .../admin-guide/blockdev/drbd/node-states-8.dot    |  13 +
 Documentation/admin-guide/blockdev/floppy.rst      | 255 +++++++++
 Documentation/admin-guide/blockdev/index.rst       |  14 +
 Documentation/admin-guide/blockdev/nbd.rst         |  31 ++
 Documentation/admin-guide/blockdev/paride.rst      | 439 +++++++++++++++
 Documentation/admin-guide/blockdev/ramdisk.rst     | 177 +++++++
 Documentation/admin-guide/blockdev/zram.rst        | 422 +++++++++++++++
 Documentation/admin-guide/index.rst                |   1 +
 Documentation/admin-guide/kernel-parameters.txt    |  18 +-
 .../blockdev/drbd/DRBD-8.3-data-packets.svg        | 588 ---------------------
 Documentation/blockdev/drbd/DRBD-data-packets.svg  | 459 ----------------
 Documentation/blockdev/drbd/conn-states-8.dot      |  18 -
 Documentation/blockdev/drbd/data-structure-v9.rst  |  42 --
 Documentation/blockdev/drbd/disk-states-8.dot      |  16 -
 .../drbd/drbd-connection-state-overview.dot        |  85 ---
 Documentation/blockdev/drbd/figures.rst            |  28 -
 Documentation/blockdev/drbd/index.rst              |  19 -
 Documentation/blockdev/drbd/node-states-8.dot      |  14 -
 Documentation/blockdev/floppy.rst                  | 255 ---------
 Documentation/blockdev/index.rst                   |  16 -
 Documentation/blockdev/nbd.rst                     |  31 --
 Documentation/blockdev/paride.rst                  | 439 ---------------
 Documentation/blockdev/ramdisk.rst                 | 177 -------
 Documentation/blockdev/zram.rst                    | 422 ---------------
 32 files changed, 2616 insertions(+), 2618 deletions(-)
 create mode 100644 Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg
 create mode 100644 Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg
 create mode 100644 Documentation/admin-guide/blockdev/drbd/conn-states-8.dot
 create mode 100644 Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst
 create mode 100644 Documentation/admin-guide/blockdev/drbd/disk-states-8.dot
 create mode 100644 Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot
 create mode 100644 Documentation/admin-guide/blockdev/drbd/figures.rst
 create mode 100644 Documentation/admin-guide/blockdev/drbd/index.rst
 create mode 100644 Documentation/admin-guide/blockdev/drbd/node-states-8.dot
 create mode 100644 Documentation/admin-guide/blockdev/floppy.rst
 create mode 100644 Documentation/admin-guide/blockdev/index.rst
 create mode 100644 Documentation/admin-guide/blockdev/nbd.rst
 create mode 100644 Documentation/admin-guide/blockdev/paride.rst
 create mode 100644 Documentation/admin-guide/blockdev/ramdisk.rst
 create mode 100644 Documentation/admin-guide/blockdev/zram.rst
 delete mode 100644 Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg
 delete mode 100644 Documentation/blockdev/drbd/DRBD-data-packets.svg
 delete mode 100644 Documentation/blockdev/drbd/conn-states-8.dot
 delete mode 100644 Documentation/blockdev/drbd/data-structure-v9.rst
 delete mode 100644 Documentation/blockdev/drbd/disk-states-8.dot
 delete mode 100644 Documentation/blockdev/drbd/drbd-connection-state-overview.dot
 delete mode 100644 Documentation/blockdev/drbd/figures.rst
 delete mode 100644 Documentation/blockdev/drbd/index.rst
 delete mode 100644 Documentation/blockdev/drbd/node-states-8.dot
 delete mode 100644 Documentation/blockdev/floppy.rst
 delete mode 100644 Documentation/blockdev/index.rst
 delete mode 100644 Documentation/blockdev/nbd.rst
 delete mode 100644 Documentation/blockdev/paride.rst
 delete mode 100644 Documentation/blockdev/ramdisk.rst
 delete mode 100644 Documentation/blockdev/zram.rst

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg b/Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg
new file mode 100644
index 000000000000..f87cfa0dc2fb
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg
@@ -0,0 +1,588 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   version="1.0"
+   width="210mm"
+   height="297mm"
+   viewBox="0 0 21000 29700"
+   id="svg2"
+   style="fill-rule:evenodd">
+  <defs
+     id="defs4" />
+  <g
+     id="Default"
+     style="visibility:visible">
+    <desc
+       id="desc180">Master slide</desc>
+  </g>
+  <path
+     d="M 11999,8601 L 11899,8301 L 12099,8301 L 11999,8601 z"
+     id="path193"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 11999,7801 L 11999,8361"
+     id="path197"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 7999,10401 L 7899,10101 L 8099,10101 L 7999,10401 z"
+     id="path209"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 7999,9601 L 7999,10161"
+     id="path213"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 11999,7801 L 11685,7840 L 11724,7644 L 11999,7801 z"
+     id="path225"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 7999,7001 L 11764,7754"
+     id="path229"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <g
+     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-1244.4792,1416.5139)"
+     id="g245"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text247">
+      <tspan
+         x="9139 9368 9579 9808 9986 10075 10252 10481 10659 10837 10909"
+         y="9284"
+         id="tspan249">RSDataReply</tspan>
+    </text>
+  </g>
+  <path
+     d="M 7999,9601 L 8281,9458 L 8311,9655 L 7999,9601 z"
+     id="path259"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 11999,9001 L 8236,9565"
+     id="path263"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <g
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,1620.9382,-1639.4947)"
+     id="g279"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text281">
+      <tspan
+         x="8743 8972 9132 9310 9573 9801 10013 10242 10419 10597 10775 10953 11114"
+         y="7023"
+         id="tspan283">CsumRSRequest</tspan>
+    </text>
+  </g>
+  <text
+     id="text297"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692"
+       y="5707"
+       id="tspan299">w_make_resync_request()</tspan>
+  </text>
+  <text
+     id="text313"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378"
+       y="7806"
+       id="tspan315">receive_DataRequest()</tspan>
+  </text>
+  <text
+     id="text329"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399"
+       y="8606"
+       id="tspan331">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text345"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13825 13986 14164 14426 14604 14710 14871 15049 15154 15332 15510 15616"
+       y="9007"
+       id="tspan347">w_e_end_csum_rs_req()</tspan>
+  </text>
+  <text
+     id="text361"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4444 4550 4728 4889 5066 5138 5299 5477 5655 5883 6095 6324 6501 6590 6768 6997 7175 7352 7424 7585 7691"
+       y="9507"
+       id="tspan363">receive_RSDataReply()</tspan>
+  </text>
+  <text
+     id="text377"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4457 4635 4741 4918 5096 5274 5452 5630 5807 5879 6057 6235 6464 6569 6641 6730 6908 7086 7247 7425 7585 7691"
+       y="10407"
+       id="tspan379">drbd_endio_write_sec()</tspan>
+  </text>
+  <text
+     id="text393"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4647 4825 5003 5180 5358 5536 5714 5820 5997 6158 6319 6497 6658 6836 7013 7085 7263 7424 7585 7691"
+       y="10907"
+       id="tspan395">e_end_resync_block()</tspan>
+  </text>
+  <path
+     d="M 11999,11601 L 11685,11640 L 11724,11444 L 11999,11601 z"
+     id="path405"
+     style="fill:#000080;visibility:visible" />
+  <path
+     d="M 7999,10801 L 11764,11554"
+     id="path409"
+     style="fill:none;stroke:#000080;visibility:visible" />
+  <g
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,2434.7562,-1674.649)"
+     id="g425"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text427">
+      <tspan
+         x="9320 9621 9726 9798 9887 10065 10277 10438"
+         y="10943"
+         id="tspan429">WriteAck</tspan>
+    </text>
+  </g>
+  <text
+     id="text443"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12377 12555 12644 12821 13033 13105 13283 13444 13604 13816 13977 14138 14244"
+       y="11559"
+       id="tspan445">got_BlockAck()</tspan>
+  </text>
+  <text
+     id="text459"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="7999 8304 8541 8778 8990 9201 9413 9650 10001 10120 10357 10594 10806 11043 11280 11398 11703 11940 12152 12364 12601 12812 12931 13049 13261 13498 13710 13947 14065 14302 14540 14658 14777 14870 15107 15225 15437 15649 15886"
+       y="4877"
+       id="tspan461">Checksum based Resync, case not in sync</tspan>
+  </text>
+  <text
+     id="text475"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="6961 7266 7571 7854 8159 8299 8536 8654 8891 9010 9247 9484 9603 9840 9958 10077 10170 10407"
+       y="2806"
+       id="tspan477">DRBD-8.3 data flow</tspan>
+  </text>
+  <text
+     id="text491"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5190 5419 5596 5774 5952 6113 6291 6468 6646 6824 6985 7146 7324 7586 7692"
+       y="7005"
+       id="tspan493">w_e_send_csum()</tspan>
+  </text>
+  <path
+     d="M 11999,17601 L 11899,17301 L 12099,17301 L 11999,17601 z"
+     id="path503"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 11999,16801 L 11999,17361"
+     id="path507"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 11999,16801 L 11685,16840 L 11724,16644 L 11999,16801 z"
+     id="path519"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 7999,16001 L 11764,16754"
+     id="path523"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <g
+     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-2539.5806,1529.3491)"
+     id="g539"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text541">
+      <tspan
+         x="9269 9498 9709 9798 9959 10048 10226 10437 10598 10776"
+         y="18265"
+         id="tspan543">RSIsInSync</tspan>
+    </text>
+  </g>
+  <path
+     d="M 7999,18601 L 8281,18458 L 8311,18655 L 7999,18601 z"
+     id="path553"
+     style="fill:#000080;visibility:visible" />
+  <path
+     d="M 11999,18001 L 8236,18565"
+     id="path557"
+     style="fill:none;stroke:#000080;visibility:visible" />
+  <g
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,3461.4027,-1449.3012)"
+     id="g573"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text575">
+      <tspan
+         x="8743 8972 9132 9310 9573 9801 10013 10242 10419 10597 10775 10953 11114"
+         y="16023"
+         id="tspan577">CsumRSRequest</tspan>
+    </text>
+  </g>
+  <text
+     id="text591"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378"
+       y="16806"
+       id="tspan593">receive_DataRequest()</tspan>
+  </text>
+  <text
+     id="text607"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399"
+       y="17606"
+       id="tspan609">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text623"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13825 13986 14164 14426 14604 14710 14871 15049 15154 15332 15510 15616"
+       y="18007"
+       id="tspan625">w_e_end_csum_rs_req()</tspan>
+  </text>
+  <text
+     id="text639"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5735 5913 6091 6180 6357 6446 6607 6696 6874 7085 7246 7424 7585 7691"
+       y="18507"
+       id="tspan641">got_IsInSync()</tspan>
+  </text>
+  <text
+     id="text655"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="7999 8304 8541 8778 8990 9201 9413 9650 10001 10120 10357 10594 10806 11043 11280 11398 11703 11940 12152 12364 12601 12812 12931 13049 13261 13498 13710 13947 14065 14159 14396 14514 14726 14937 15175"
+       y="13877"
+       id="tspan657">Checksum based Resync, case in sync</tspan>
+  </text>
+  <path
+     d="M 12000,24601 L 11900,24301 L 12100,24301 L 12000,24601 z"
+     id="path667"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 12000,23801 L 12000,24361"
+     id="path671"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 8000,26401 L 7900,26101 L 8100,26101 L 8000,26401 z"
+     id="path683"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,25601 L 8000,26161"
+     id="path687"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 12000,23801 L 11686,23840 L 11725,23644 L 12000,23801 z"
+     id="path699"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,23001 L 11765,23754"
+     id="path703"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <g
+     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-3543.8452,1630.5143)"
+     id="g719"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text721">
+      <tspan
+         x="9464 9710 9921 10150 10328 10505 10577"
+         y="25236"
+         id="tspan723">OVReply</tspan>
+    </text>
+  </g>
+  <path
+     d="M 8000,25601 L 8282,25458 L 8312,25655 L 8000,25601 z"
+     id="path733"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 12000,25001 L 8237,25565"
+     id="path737"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <g
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,4918.2801,-1381.2128)"
+     id="g753"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text755">
+      <tspan
+         x="9142 9388 9599 9828 10006 10183 10361 10539 10700"
+         y="23106"
+         id="tspan757">OVRequest</tspan>
+    </text>
+  </g>
+  <text
+     id="text771"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13656 13868 14097 14274 14452 14630 14808 14969 15058 15163"
+       y="23806"
+       id="tspan773">receive_OVRequest()</tspan>
+  </text>
+  <text
+     id="text787"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14084 14262 14439 14617 14795 14956 15134 15295 15400"
+       y="24606"
+       id="tspan789">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text803"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12192 12421 12598 12776 12954 13132 13310 13487 13665 13843 14004 14182 14288 14465 14643 14749"
+       y="25007"
+       id="tspan805">w_e_end_ov_req()</tspan>
+  </text>
+  <text
+     id="text819"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5101 5207 5385 5546 5723 5795 5956 6134 6312 6557 6769 6998 7175 7353 7425 7586 7692"
+       y="25507"
+       id="tspan821">receive_OVReply()</tspan>
+  </text>
+  <text
+     id="text835"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692"
+       y="26407"
+       id="tspan837">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text851"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4902 5131 5308 5486 5664 5842 6020 6197 6375 6553 6714 6892 6998 7175 7353 7425 7586 7692"
+       y="26907"
+       id="tspan853">w_e_end_ov_reply()</tspan>
+  </text>
+  <path
+     d="M 12000,27601 L 11686,27640 L 11725,27444 L 12000,27601 z"
+     id="path863"
+     style="fill:#000080;visibility:visible" />
+  <path
+     d="M 8000,26801 L 11765,27554"
+     id="path867"
+     style="fill:none;stroke:#000080;visibility:visible" />
+  <g
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,5704.1907,-1328.312)"
+     id="g883"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text885">
+      <tspan
+         x="9279 9525 9736 9965 10143 10303 10481 10553"
+         y="26935"
+         id="tspan887">OVResult</tspan>
+    </text>
+  </g>
+  <text
+     id="text901"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12378 12556 12645 12822 13068 13280 13508 13686 13847 14025 14097 14185 14291"
+       y="27559"
+       id="tspan903">got_OVResult()</tspan>
+  </text>
+  <text
+     id="text917"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="8000 8330 8567 8660 8754 8991 9228 9346 9558 9795 9935 10028 10146"
+       y="21877"
+       id="tspan919">Online verify</tspan>
+  </text>
+  <text
+     id="text933"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4641 4870 5047 5310 5488 5649 5826 6004 6182 6343 6521 6626 6804 6982 7160 7338 7499 7587 7693"
+       y="23005"
+       id="tspan935">w_make_ov_request()</tspan>
+  </text>
+  <path
+     d="M 8000,6500 L 7900,6200 L 8100,6200 L 8000,6500 z"
+     id="path945"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,5700 L 8000,6260"
+     id="path949"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 3900,5500 L 3700,5500 L 3700,11000 L 3900,11000"
+     id="path961"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <path
+     d="M 3900,14500 L 3700,14500 L 3700,18600 L 3900,18600"
+     id="path973"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <path
+     d="M 3900,22800 L 3700,22800 L 3700,26900 L 3900,26900"
+     id="path985"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <text
+     id="text1001"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692"
+       y="6506"
+       id="tspan1003">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text1017"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692"
+       y="14708"
+       id="tspan1019">w_make_resync_request()</tspan>
+  </text>
+  <text
+     id="text1033"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5190 5419 5596 5774 5952 6113 6291 6468 6646 6824 6985 7146 7324 7586 7692"
+       y="16006"
+       id="tspan1035">w_e_send_csum()</tspan>
+  </text>
+  <path
+     d="M 8000,15501 L 7900,15201 L 8100,15201 L 8000,15501 z"
+     id="path1045"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,14701 L 8000,15261"
+     id="path1049"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     id="text1065"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692"
+       y="15507"
+       id="tspan1067">drbd_endio_read_sec()</tspan>
+  </text>
+  <path
+     d="M 16100,9000 L 16300,9000 L 16300,7500 L 16100,7500"
+     id="path1077"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <path
+     d="M 16100,18000 L 16300,18000 L 16300,16500 L 16100,16500"
+     id="path1089"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <path
+     d="M 16100,25000 L 16300,25000 L 16300,23500 L 16100,23500"
+     id="path1101"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <text
+     id="text1117"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="2026 2132 2293 2471 2648 2826 3004 3076 3254 3431 3503 3681 3787"
+       y="5402"
+       id="tspan1119">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1133"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="2027 2133 2294 2472 2649 2827 3005 3077 3255 3432 3504 3682 3788"
+       y="14402"
+       id="tspan1135">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1149"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="2026 2132 2293 2471 2648 2826 3004 3076 3254 3431 3503 3681 3787"
+       y="22602"
+       id="tspan1151">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1165"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="1426 1532 1693 1871 2031 2209 2472 2649 2721 2899 2988 3166 3344 3416 3593 3699"
+       y="11302"
+       id="tspan1167">rs_complete_io()</tspan>
+  </text>
+  <text
+     id="text1181"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="1526 1632 1793 1971 2131 2309 2572 2749 2821 2999 3088 3266 3444 3516 3693 3799"
+       y="18931"
+       id="tspan1183">rs_complete_io()</tspan>
+  </text>
+  <text
+     id="text1197"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="1526 1632 1793 1971 2131 2309 2572 2749 2821 2999 3088 3266 3444 3516 3693 3799"
+       y="27231"
+       id="tspan1199">rs_complete_io()</tspan>
+  </text>
+  <text
+     id="text1213"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16126 16232 16393 16571 16748 16926 17104 17176 17354 17531 17603 17781 17887"
+       y="7402"
+       id="tspan1215">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1229"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16127 16233 16394 16572 16749 16927 17105 17177 17355 17532 17604 17782 17888"
+       y="16331"
+       id="tspan1231">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1245"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16127 16233 16394 16572 16749 16927 17105 17177 17355 17532 17604 17782 17888"
+       y="23302"
+       id="tspan1247">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1261"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388"
+       y="9302"
+       id="tspan1263">rs_complete_io()</tspan>
+  </text>
+  <text
+     id="text1277"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388"
+       y="18331"
+       id="tspan1279">rs_complete_io()</tspan>
+  </text>
+  <text
+     id="text1293"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16126 16232 16393 16571 16731 16909 17172 17349 17421 17599 17688 17866 18044 18116 18293 18399"
+       y="25302"
+       id="tspan1295">rs_complete_io()</tspan>
+  </text>
+</svg>
diff --git a/Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg b/Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg
new file mode 100644
index 000000000000..48a1e2165fec
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg
@@ -0,0 +1,459 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   version="1.0"
+   width="210mm"
+   height="297mm"
+   viewBox="0 0 21000 29700"
+   id="svg2"
+   style="fill-rule:evenodd">
+  <defs
+     id="defs4" />
+  <g
+     id="Default"
+     style="visibility:visible">
+    <desc
+       id="desc176">Master slide</desc>
+  </g>
+  <path
+     d="M 11999,19601 L 11899,19301 L 12099,19301 L 11999,19601 z"
+     id="path189"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 11999,18801 L 11999,19361"
+     id="path193"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 7999,21401 L 7899,21101 L 8099,21101 L 7999,21401 z"
+     id="path205"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 7999,20601 L 7999,21161"
+     id="path209"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 11999,18801 L 11685,18840 L 11724,18644 L 11999,18801 z"
+     id="path221"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 7999,18001 L 11764,18754"
+     id="path225"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     x="-3023.845"
+     y="1106.8124"
+     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)"
+     id="text243"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="6115.1553 6344.1553 6555.1553 6784.1553 6962.1553 7051.1553 7228.1553 7457.1553 7635.1553 7813.1553 7885.1553"
+       y="21390.812"
+       id="tspan245">RSDataReply</tspan>
+  </text>
+  <path
+     d="M 7999,20601 L 8281,20458 L 8311,20655 L 7999,20601 z"
+     id="path255"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 11999,20001 L 8236,20565"
+     id="path259"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     x="3502.5356"
+     y="-2184.6621"
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
+     id="text277"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12321.536 12550.536 12761.536 12990.536 13168.536 13257.536 13434.536 13663.536 13841.536 14019.536 14196.536 14374.536 14535.536"
+       y="15854.338"
+       id="tspan279">RSDataRequest</tspan>
+  </text>
+  <text
+     id="text293"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692"
+       y="17807"
+       id="tspan295">w_make_resync_request()</tspan>
+  </text>
+  <text
+     id="text309"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378"
+       y="18806"
+       id="tspan311">receive_DataRequest()</tspan>
+  </text>
+  <text
+     id="text325"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399"
+       y="19606"
+       id="tspan327">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text341"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13770 13931 14109 14287 14375 14553 14731 14837 15015 15192 15298"
+       y="20007"
+       id="tspan343">w_e_end_rsdata_req()</tspan>
+  </text>
+  <text
+     id="text357"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4444 4550 4728 4889 5066 5138 5299 5477 5655 5883 6095 6324 6501 6590 6768 6997 7175 7352 7424 7585 7691"
+       y="20507"
+       id="tspan359">receive_RSDataReply()</tspan>
+  </text>
+  <text
+     id="text373"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4457 4635 4741 4918 5096 5274 5452 5630 5807 5879 6057 6235 6464 6569 6641 6730 6908 7086 7247 7425 7585 7691"
+       y="21407"
+       id="tspan375">drbd_endio_write_sec()</tspan>
+  </text>
+  <text
+     id="text389"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4647 4825 5003 5180 5358 5536 5714 5820 5997 6158 6319 6497 6658 6836 7013 7085 7263 7424 7585 7691"
+       y="21907"
+       id="tspan391">e_end_resync_block()</tspan>
+  </text>
+  <path
+     d="M 11999,22601 L 11685,22640 L 11724,22444 L 11999,22601 z"
+     id="path401"
+     style="fill:#000080;visibility:visible" />
+  <path
+     d="M 7999,21801 L 11764,22554"
+     id="path405"
+     style="fill:none;stroke:#000080;visibility:visible" />
+  <text
+     x="4290.3008"
+     y="-2369.6162"
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
+     id="text423"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="13610.301 13911.301 14016.301 14088.301 14177.301 14355.301 14567.301 14728.301"
+       y="19573.385"
+       id="tspan425">WriteAck</tspan>
+  </text>
+  <text
+     id="text439"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12377 12555 12644 12821 13033 13105 13283 13444 13604 13816 13977 14138 14244"
+       y="22559"
+       id="tspan441">got_BlockAck()</tspan>
+  </text>
+  <text
+     id="text455"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="7999 8304 8541 8753 8964 9201 9413 9531 9769 9862 10099 10310 10522 10734 10852 10971 11208 11348 11585 11822"
+       y="16877"
+       id="tspan457">Resync blocks, 4-32K</tspan>
+  </text>
+  <path
+     d="M 12000,7601 L 11900,7301 L 12100,7301 L 12000,7601 z"
+     id="path467"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 12000,6801 L 12000,7361"
+     id="path471"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 12000,6801 L 11686,6840 L 11725,6644 L 12000,6801 z"
+     id="path483"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,6001 L 11765,6754"
+     id="path487"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     x="-1288.1796"
+     y="1279.7666"
+     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)"
+     id="text505"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="8174.8208 8475.8203 8580.8203 8652.8203 8741.8203 8919.8203 9131.8203 9292.8203"
+       y="9516.7666"
+       id="tspan507">WriteAck</tspan>
+  </text>
+  <path
+     d="M 8000,8601 L 8282,8458 L 8312,8655 L 8000,8601 z"
+     id="path517"
+     style="fill:#000080;visibility:visible" />
+  <path
+     d="M 12000,8001 L 8237,8565"
+     id="path521"
+     style="fill:none;stroke:#000080;visibility:visible" />
+  <text
+     x="1065.6655"
+     y="-2097.7664"
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
+     id="text539"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="10682.666 10911.666 11088.666 11177.666"
+       y="4107.2339"
+       id="tspan541">Data</tspan>
+  </text>
+  <text
+     id="text555"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4746 4924 5030 5207 5385 5563 5826 6003 6164 6342 6520 6626 6803 6981 7159 7337 7498 7587 7692"
+       y="5505"
+       id="tspan557">drbd_make_request()</tspan>
+  </text>
+  <text
+     id="text571"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13639 13817 13906 14084 14190"
+       y="6806"
+       id="tspan573">receive_Data()</tspan>
+  </text>
+  <text
+     id="text587"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14207 14312 14384 14473 14651 14829 14990 15168 15328 15434"
+       y="7606"
+       id="tspan589">drbd_endio_write_sec()</tspan>
+  </text>
+  <text
+     id="text603"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12192 12370 12548 12725 12903 13081 13259 13437 13509 13686 13847 14008 14114"
+       y="8007"
+       id="tspan605">e_end_block()</tspan>
+  </text>
+  <text
+     id="text619"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5647 5825 6003 6092 6269 6481 6553 6731 6892 7052 7264 7425 7586 7692"
+       y="8606"
+       id="tspan621">got_BlockAck()</tspan>
+  </text>
+  <text
+     id="text635"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="8000 8305 8542 8779 9016 9109 9346 9486 9604 9956 10049 10189 10328 10565 10705 10942 11179 11298 11603 11742 11835 11954 12191 12310 12428 12665 12902 13139 13279 13516 13753"
+       y="4877"
+       id="tspan637">Regular mirrored write, 512-32K</tspan>
+  </text>
+  <text
+     id="text651"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5381 5610 5787 5948 6126 6304 6482 6659 6837 7015 7087 7265 7426 7587 7692"
+       y="6003"
+       id="tspan653">w_send_dblock()</tspan>
+  </text>
+  <path
+     d="M 8000,6800 L 7900,6500 L 8100,6500 L 8000,6800 z"
+     id="path663"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,6000 L 8000,6560"
+     id="path667"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     id="text683"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4602 4780 4886 5063 5241 5419 5597 5775 5952 6024 6202 6380 6609 6714 6786 6875 7053 7231 7409 7515 7587 7692"
+       y="6905"
+       id="tspan685">drbd_endio_write_pri()</tspan>
+  </text>
+  <path
+     d="M 12000,13602 L 11900,13302 L 12100,13302 L 12000,13602 z"
+     id="path695"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 12000,12802 L 12000,13362"
+     id="path699"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 12000,12802 L 11686,12841 L 11725,12645 L 12000,12802 z"
+     id="path711"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,12002 L 11765,12755"
+     id="path715"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     x="-2155.5266"
+     y="1201.5964"
+     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)"
+     id="text733"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="7202.4736 7431.4736 7608.4736 7697.4736 7875.4736 8104.4736 8282.4736 8459.4736 8531.4736"
+       y="15454.597"
+       id="tspan735">DataReply</tspan>
+  </text>
+  <path
+     d="M 8000,14602 L 8282,14459 L 8312,14656 L 8000,14602 z"
+     id="path745"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 12000,14002 L 8237,14566"
+     id="path749"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     x="2280.3804"
+     y="-2103.2141"
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
+     id="text767"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="11316.381 11545.381 11722.381 11811.381 11989.381 12218.381 12396.381 12573.381 12751.381 12929.381 13090.381"
+       y="9981.7861"
+       id="tspan769">DataRequest</tspan>
+  </text>
+  <text
+     id="text783"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4746 4924 5030 5207 5385 5563 5826 6003 6164 6342 6520 6626 6803 6981 7159 7337 7498 7587 7692"
+       y="11506"
+       id="tspan785">drbd_make_request()</tspan>
+  </text>
+  <text
+     id="text799"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13639 13817 13906 14084 14312 14490 14668 14846 15024 15185 15273 15379"
+       y="12807"
+       id="tspan801">receive_DataRequest()</tspan>
+  </text>
+  <text
+     id="text815"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14084 14262 14439 14617 14795 14956 15134 15295 15400"
+       y="13607"
+       id="tspan817">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text831"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12192 12421 12598 12776 12954 13132 13310 13487 13665 13843 14021 14110 14288 14465 14571 14749 14927 15033"
+       y="14008"
+       id="tspan833">w_e_end_data_req()</tspan>
+  </text>
+  <g
+     id="g835"
+     style="visibility:visible">
+    <desc
+       id="desc837">Drawing</desc>
+    <text
+       id="text847"
+       style="font-size:318px;font-weight:400;fill:#008000;font-family:Helvetica embedded">
+      <tspan
+         x="4885 4991 5169 5330 5507 5579 5740 5918 6096 6324 6502 6591 6769 6997 7175 7353 7425 7586 7692"
+         y="14607"
+         id="tspan849">receive_DataReply()</tspan>
+    </text>
+  </g>
+  <text
+     id="text863"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="8000 8305 8398 8610 8821 8914 9151 9363 9575 9693 9833 10070 10307 10544 10663 10781 11018 11255 11493 11632 11869 12106"
+       y="10878"
+       id="tspan865">Diskless read, 512-32K</tspan>
+  </text>
+  <text
+     id="text879"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5029 5258 5435 5596 5774 5952 6130 6307 6413 6591 6769 6947 7125 7230 7408 7586 7692"
+       y="12004"
+       id="tspan881">w_send_read_req()</tspan>
+  </text>
+  <text
+     id="text895"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="6961 7266 7571 7854 8159 8278 8515 8633 8870 9107 9226 9463 9581 9700 9793 10030"
+       y="2806"
+       id="tspan897">DRBD 8 data flow</tspan>
+  </text>
+  <path
+     d="M 3900,5300 L 3700,5300 L 3700,7000 L 3900,7000"
+     id="path907"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <path
+     d="M 3900,17600 L 3700,17600 L 3700,22000 L 3900,22000"
+     id="path919"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <path
+     d="M 16100,20000 L 16300,20000 L 16300,18500 L 16100,18500"
+     id="path931"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <text
+     id="text947"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="2126 2304 2376 2554 2731 2909 3087 3159 3337 3515 3587 3764 3870"
+       y="5202"
+       id="tspan949">al_begin_io()</tspan>
+  </text>
+  <text
+     id="text963"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="1632 1810 1882 2060 2220 2398 2661 2839 2910 3088 3177 3355 3533 3605 3783 3888"
+       y="7331"
+       id="tspan965">al_complete_io()</tspan>
+  </text>
+  <text
+     id="text979"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="2126 2232 2393 2571 2748 2926 3104 3176 3354 3531 3603 3781 3887"
+       y="17431"
+       id="tspan981">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text995"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="1626 1732 1893 2071 2231 2409 2672 2849 2921 3099 3188 3366 3544 3616 3793 3899"
+       y="22331"
+       id="tspan997">rs_complete_io()</tspan>
+  </text>
+  <text
+     id="text1011"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16027 16133 16294 16472 16649 16827 17005 17077 17255 17432 17504 17682 17788"
+       y="18402"
+       id="tspan1013">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1027"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388"
+       y="20331"
+       id="tspan1029">rs_complete_io()</tspan>
+  </text>
+</svg>
diff --git a/Documentation/admin-guide/blockdev/drbd/conn-states-8.dot b/Documentation/admin-guide/blockdev/drbd/conn-states-8.dot
new file mode 100644
index 000000000000..025e8cf5e64a
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/conn-states-8.dot
@@ -0,0 +1,18 @@
+digraph conn_states {
+	StandAllone  -> WFConnection   [ label = "ioctl_set_net()" ]
+	WFConnection -> Unconnected    [ label = "unable to bind()" ]
+	WFConnection -> WFReportParams [ label = "in connect() after accept" ]
+	WFReportParams -> StandAllone  [ label = "checks in receive_param()" ]
+	WFReportParams -> Connected    [ label = "in receive_param()" ]
+	WFReportParams -> WFBitMapS    [ label = "sync_handshake()" ]
+	WFReportParams -> WFBitMapT    [ label = "sync_handshake()" ]
+	WFBitMapS -> SyncSource        [ label = "receive_bitmap()" ]
+	WFBitMapT -> SyncTarget        [ label = "receive_bitmap()" ]
+	SyncSource -> Connected
+	SyncTarget -> Connected
+	SyncSource -> PausedSyncS
+	SyncTarget -> PausedSyncT
+	PausedSyncS -> SyncSource
+	PausedSyncT -> SyncTarget
+	Connected   -> WFConnection    [ label = "* on network error" ]
+}
diff --git a/Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst b/Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst
new file mode 100644
index 000000000000..66036b901644
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst
@@ -0,0 +1,42 @@
+================================
+kernel data structure for DRBD-9
+================================
+
+This describes the in kernel data structure for DRBD-9. Starting with
+Linux v3.14 we are reorganizing DRBD to use this data structure.
+
+Basic Data Structure
+====================
+
+A node has a number of DRBD resources.  Each such resource has a number of
+devices (aka volumes) and connections to other nodes ("peer nodes"). Each DRBD
+device is represented by a block device locally.
+
+The DRBD objects are interconnected to form a matrix as depicted below; a
+drbd_peer_device object sits at each intersection between a drbd_device and a
+drbd_connection::
+
+  /--------------+---------------+.....+---------------\
+  |   resource   |    device     |     |    device     |
+  +--------------+---------------+.....+---------------+
+  |  connection  |  peer_device  |     |  peer_device  |
+  +--------------+---------------+.....+---------------+
+  :              :               :     :               :
+  :              :               :     :               :
+  +--------------+---------------+.....+---------------+
+  |  connection  |  peer_device  |     |  peer_device  |
+  \--------------+---------------+.....+---------------/
+
+In this table, horizontally, devices can be accessed from resources by their
+volume number.  Likewise, peer_devices can be accessed from connections by
+their volume number.  Objects in the vertical direction are connected by double
+linked lists.  There are back pointers from peer_devices to their connections a
+devices, and from connections and devices to their resource.
+
+All resources are in the drbd_resources double-linked list.  In addition, all
+devices can be accessed by their minor device number via the drbd_devices idr.
+
+The drbd_resource, drbd_connection, and drbd_device objects are reference
+counted.  The peer_device objects only serve to establish the links between
+devices and connections; their lifetime is determined by the lifetime of the
+device and connection which they reference.
diff --git a/Documentation/admin-guide/blockdev/drbd/disk-states-8.dot b/Documentation/admin-guide/blockdev/drbd/disk-states-8.dot
new file mode 100644
index 000000000000..d06cfb46fb98
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/disk-states-8.dot
@@ -0,0 +1,16 @@
+digraph disk_states {
+	Diskless -> Inconsistent       [ label = "ioctl_set_disk()" ]
+	Diskless -> Consistent         [ label = "ioctl_set_disk()" ]
+	Diskless -> Outdated           [ label = "ioctl_set_disk()" ]
+	Consistent -> Outdated         [ label = "receive_param()" ]
+	Consistent -> UpToDate         [ label = "receive_param()" ]
+	Consistent -> Inconsistent     [ label = "start resync" ]
+	Outdated   -> Inconsistent     [ label = "start resync" ]
+	UpToDate   -> Inconsistent     [ label = "ioctl_replicate" ]
+	Inconsistent -> UpToDate       [ label = "resync completed" ]
+	Consistent -> Failed           [ label = "io completion error" ]
+	Outdated   -> Failed           [ label = "io completion error" ]
+	UpToDate   -> Failed           [ label = "io completion error" ]
+	Inconsistent -> Failed         [ label = "io completion error" ]
+	Failed -> Diskless             [ label = "sending notify to peer" ]
+}
diff --git a/Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot b/Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot
new file mode 100644
index 000000000000..6d9cf0a7b11d
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot
@@ -0,0 +1,85 @@
+// vim: set sw=2 sts=2 :
+digraph {
+  rankdir=BT
+  bgcolor=white
+
+  node [shape=plaintext]
+  node [fontcolor=black]
+
+  StandAlone     [ style=filled,fillcolor=gray,label=StandAlone ]
+
+  node [fontcolor=lightgray]
+
+  Unconnected    [ label=Unconnected ]
+
+  CommTrouble [ shape=record,
+    label="{communication loss|{Timeout|BrokenPipe|NetworkFailure}}" ]
+
+  node [fontcolor=gray]
+
+  subgraph cluster_try_connect {
+    label="try to connect, handshake"
+    rank=max
+    WFConnection   [ label=WFConnection ]
+    WFReportParams [ label=WFReportParams ]
+  }
+
+  TearDown       [ label=TearDown ]
+
+  Connected      [ label=Connected,style=filled,fillcolor=green,fontcolor=black ]
+
+  node [fontcolor=lightblue]
+
+  StartingSyncS  [ label=StartingSyncS ]
+  StartingSyncT  [ label=StartingSyncT ]
+
+  subgraph cluster_bitmap_exchange {
+    node [fontcolor=red]
+    fontcolor=red
+    label="new application (WRITE?) requests blocked\lwhile bitmap is exchanged"
+
+    WFBitMapT      [ label=WFBitMapT ]
+    WFSyncUUID     [ label=WFSyncUUID ]
+    WFBitMapS      [ label=WFBitMapS ]
+  }
+
+  node [fontcolor=blue]
+
+  cluster_resync [ shape=record,label="{<any>resynchronisation process running\l'concurrent' application requests allowed|{{<T>PausedSyncT\nSyncTarget}|{<S>PausedSyncS\nSyncSource}}}" ]
+
+  node [shape=box,fontcolor=black]
+
+  // drbdadm [label="drbdadm connect"]
+  // handshake [label="drbd_connect()\ndrbd_do_handshake\ndrbd_sync_handshake() etc."]
+  // comm_error [label="communication trouble"]
+
+  //
+  // edges
+  // --------------------------------------
+
+  StandAlone -> Unconnected [ label="drbdadm connect" ]
+  Unconnected -> StandAlone  [ label="drbdadm disconnect\lor serious communication trouble" ]
+  Unconnected -> WFConnection [ label="receiver thread is started" ]
+  WFConnection -> WFReportParams [ headlabel="accept()\land/or                        \lconnect()\l" ]
+
+  WFReportParams -> StandAlone [ label="during handshake\lpeers do not agree\labout something essential" ]
+  WFReportParams -> Connected [ label="data identical\lno sync needed",color=green,fontcolor=green ]
+
+    WFReportParams -> WFBitMapS
+    WFReportParams -> WFBitMapT
+    WFBitMapT -> WFSyncUUID [minlen=0.1,constraint=false]
+
+      WFBitMapS -> cluster_resync:S
+      WFSyncUUID -> cluster_resync:T
+
+  edge [color=green]
+  cluster_resync:any -> Connected [ label="resnyc done",fontcolor=green ]
+
+  edge [color=red]
+  WFReportParams -> CommTrouble
+  Connected -> CommTrouble
+  cluster_resync:any -> CommTrouble
+  edge [color=black]
+  CommTrouble -> Unconnected [label="receiver thread is stopped" ]
+
+}
diff --git a/Documentation/admin-guide/blockdev/drbd/figures.rst b/Documentation/admin-guide/blockdev/drbd/figures.rst
new file mode 100644
index 000000000000..3e3fd4b8a478
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/figures.rst
@@ -0,0 +1,28 @@
+.. The here included files are intended to help understand the implementation
+
+Data flows that Relate some functions, and write packets
+========================================================
+
+.. kernel-figure:: DRBD-8.3-data-packets.svg
+    :alt:   DRBD-8.3-data-packets.svg
+    :align: center
+
+.. kernel-figure:: DRBD-data-packets.svg
+    :alt:   DRBD-data-packets.svg
+    :align: center
+
+
+Sub graphs of DRBD's state transitions
+======================================
+
+.. kernel-figure:: conn-states-8.dot
+    :alt:   conn-states-8.dot
+    :align: center
+
+.. kernel-figure:: disk-states-8.dot
+    :alt:   disk-states-8.dot
+    :align: center
+
+.. kernel-figure:: node-states-8.dot
+    :alt:   node-states-8.dot
+    :align: center
diff --git a/Documentation/admin-guide/blockdev/drbd/index.rst b/Documentation/admin-guide/blockdev/drbd/index.rst
new file mode 100644
index 000000000000..68ecd5c113e9
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/index.rst
@@ -0,0 +1,19 @@
+==========================================
+Distributed Replicated Block Device - DRBD
+==========================================
+
+Description
+===========
+
+  DRBD is a shared-nothing, synchronously replicated block device. It
+  is designed to serve as a building block for high availability
+  clusters and in this context, is a "drop-in" replacement for shared
+  storage. Simplistically, you could see it as a network RAID 1.
+
+  Please visit http://www.drbd.org to find out more.
+
+.. toctree::
+   :maxdepth: 1
+
+   data-structure-v9
+   figures
diff --git a/Documentation/admin-guide/blockdev/drbd/node-states-8.dot b/Documentation/admin-guide/blockdev/drbd/node-states-8.dot
new file mode 100644
index 000000000000..bfa54e1f8016
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/node-states-8.dot
@@ -0,0 +1,13 @@
+digraph node_states {
+	Secondary -> Primary           [ label = "ioctl_set_state()" ]
+	Primary   -> Secondary 	       [ label = "ioctl_set_state()" ]
+}
+
+digraph peer_states {
+	Secondary -> Primary           [ label = "recv state packet" ]
+	Primary   -> Secondary 	       [ label = "recv state packet" ]
+	Primary   -> Unknown 	       [ label = "connection lost" ]
+	Secondary  -> Unknown  	       [ label = "connection lost" ]
+	Unknown   -> Primary           [ label = "connected" ]
+	Unknown   -> Secondary         [ label = "connected" ]
+}
diff --git a/Documentation/admin-guide/blockdev/floppy.rst b/Documentation/admin-guide/blockdev/floppy.rst
new file mode 100644
index 000000000000..4a8f31cf4139
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/floppy.rst
@@ -0,0 +1,255 @@
+=============
+Floppy Driver
+=============
+
+FAQ list:
+=========
+
+A FAQ list may be found in the fdutils package (see below), and also
+at <http://fdutils.linux.lu/faq.html>.
+
+
+LILO configuration options (Thinkpad users, read this)
+======================================================
+
+The floppy driver is configured using the 'floppy=' option in
+lilo. This option can be typed at the boot prompt, or entered in the
+lilo configuration file.
+
+Example: If your kernel is called linux-2.6.9, type the following line
+at the lilo boot prompt (if you have a thinkpad)::
+
+ linux-2.6.9 floppy=thinkpad
+
+You may also enter the following line in /etc/lilo.conf, in the description
+of linux-2.6.9::
+
+ append = "floppy=thinkpad"
+
+Several floppy related options may be given, example::
+
+ linux-2.6.9 floppy=daring floppy=two_fdc
+ append = "floppy=daring floppy=two_fdc"
+
+If you give options both in the lilo config file and on the boot
+prompt, the option strings of both places are concatenated, the boot
+prompt options coming last. That's why there are also options to
+restore the default behavior.
+
+
+Module configuration options
+============================
+
+If you use the floppy driver as a module, use the following syntax::
+
+	modprobe floppy floppy="<options>"
+
+Example::
+
+	modprobe floppy floppy="omnibook messages"
+
+If you need certain options enabled every time you load the floppy driver,
+you can put::
+
+	options floppy floppy="omnibook messages"
+
+in a configuration file in /etc/modprobe.d/.
+
+
+The floppy driver related options are:
+
+ floppy=asus_pci
+	Sets the bit mask to allow only units 0 and 1. (default)
+
+ floppy=daring
+	Tells the floppy driver that you have a well behaved floppy controller.
+	This allows more efficient and smoother operation, but may fail on
+	certain controllers. This may speed up certain operations.
+
+ floppy=0,daring
+	Tells the floppy driver that your floppy controller should be used
+	with caution.
+
+ floppy=one_fdc
+	Tells the floppy driver that you have only one floppy controller.
+	(default)
+
+ floppy=two_fdc / floppy=<address>,two_fdc
+	Tells the floppy driver that you have two floppy controllers.
+	The second floppy controller is assumed to be at <address>.
+	This option is not needed if the second controller is at address
+	0x370, and if you use the 'cmos' option.
+
+ floppy=thinkpad
+	Tells the floppy driver that you have a Thinkpad. Thinkpads use an
+	inverted convention for the disk change line.
+
+ floppy=0,thinkpad
+	Tells the floppy driver that you don't have a Thinkpad.
+
+ floppy=omnibook / floppy=nodma
+	Tells the floppy driver not to use Dma for data transfers.
+	This is needed on HP Omnibooks, which don't have a workable
+	DMA channel for the floppy driver. This option is also useful
+	if you frequently get "Unable to allocate DMA memory" messages.
+	Indeed, dma memory needs to be continuous in physical memory,
+	and is thus harder to find, whereas non-dma buffers may be
+	allocated in virtual memory. However, I advise against this if
+	you have an FDC without a FIFO (8272A or 82072). 82072A and
+	later are OK. You also need at least a 486 to use nodma.
+	If you use nodma mode, I suggest you also set the FIFO
+	threshold to 10 or lower, in order to limit the number of data
+	transfer interrupts.
+
+	If you have a FIFO-able FDC, the floppy driver automatically
+	falls back on non DMA mode if no DMA-able memory can be found.
+	If you want to avoid this, explicitly ask for 'yesdma'.
+
+ floppy=yesdma
+	Tells the floppy driver that a workable DMA channel is available.
+	(default)
+
+ floppy=nofifo
+	Disables the FIFO entirely. This is needed if you get "Bus
+	master arbitration error" messages from your Ethernet card (or
+	from other devices) while accessing the floppy.
+
+ floppy=usefifo
+	Enables the FIFO. (default)
+
+ floppy=<threshold>,fifo_depth
+	Sets the FIFO threshold. This is mostly relevant in DMA
+	mode. If this is higher, the floppy driver tolerates more
+	interrupt latency, but it triggers more interrupts (i.e. it
+	imposes more load on the rest of the system). If this is
+	lower, the interrupt latency should be lower too (faster
+	processor). The benefit of a lower threshold is less
+	interrupts.
+
+	To tune the fifo threshold, switch on over/underrun messages
+	using 'floppycontrol --messages'. Then access a floppy
+	disk. If you get a huge amount of "Over/Underrun - retrying"
+	messages, then the fifo threshold is too low. Try with a
+	higher value, until you only get an occasional Over/Underrun.
+	It is a good idea to compile the floppy driver as a module
+	when doing this tuning. Indeed, it allows to try different
+	fifo values without rebooting the machine for each test. Note
+	that you need to do 'floppycontrol --messages' every time you
+	re-insert the module.
+
+	Usually, tuning the fifo threshold should not be needed, as
+	the default (0xa) is reasonable.
+
+ floppy=<drive>,<type>,cmos
+	Sets the CMOS type of <drive> to <type>. This is mandatory if
+	you have more than two floppy drives (only two can be
+	described in the physical CMOS), or if your BIOS uses
+	non-standard CMOS types. The CMOS types are:
+
+	       ==  ==================================
+		0  Use the value of the physical CMOS
+		1  5 1/4 DD
+		2  5 1/4 HD
+		3  3 1/2 DD
+		4  3 1/2 HD
+		5  3 1/2 ED
+		6  3 1/2 ED
+	       16  unknown or not installed
+	       ==  ==================================
+
+	(Note: there are two valid types for ED drives. This is because 5 was
+	initially chosen to represent floppy *tapes*, and 6 for ED drives.
+	AMI ignored this, and used 5 for ED drives. That's why the floppy
+	driver handles both.)
+
+ floppy=unexpected_interrupts
+	Print a warning message when an unexpected interrupt is received.
+	(default)
+
+ floppy=no_unexpected_interrupts / floppy=L40SX
+	Don't print a message when an unexpected interrupt is received. This
+	is needed on IBM L40SX laptops in certain video modes. (There seems
+	to be an interaction between video and floppy. The unexpected
+	interrupts affect only performance, and can be safely ignored.)
+
+ floppy=broken_dcl
+	Don't use the disk change line, but assume that the disk was
+	changed whenever the device node is reopened. Needed on some
+	boxes where the disk change line is broken or unsupported.
+	This should be regarded as a stopgap measure, indeed it makes
+	floppy operation less efficient due to unneeded cache
+	flushings, and slightly more unreliable. Please verify your
+	cable, connection and jumper settings if you have any DCL
+	problems. However, some older drives, and also some laptops
+	are known not to have a DCL.
+
+ floppy=debug
+	Print debugging messages.
+
+ floppy=messages
+	Print informational messages for some operations (disk change
+	notifications, warnings about over and underruns, and about
+	autodetection).
+
+ floppy=silent_dcl_clear
+	Uses a less noisy way to clear the disk change line (which
+	doesn't involve seeks). Implied by 'daring' option.
+
+ floppy=<nr>,irq
+	Sets the floppy IRQ to <nr> instead of 6.
+
+ floppy=<nr>,dma
+	Sets the floppy DMA channel to <nr> instead of 2.
+
+ floppy=slow
+	Use PS/2 stepping rate::
+
+	   PS/2 floppies have much slower step rates than regular floppies.
+	   It's been recommended that take about 1/4 of the default speed
+	   in some more extreme cases.
+
+
+Supporting utilities and additional documentation:
+==================================================
+
+Additional parameters of the floppy driver can be configured at
+runtime. Utilities which do this can be found in the fdutils package.
+This package also contains a new version of mtools which allows to
+access high capacity disks (up to 1992K on a high density 3 1/2 disk!).
+It also contains additional documentation about the floppy driver.
+
+The latest version can be found at fdutils homepage:
+
+ http://fdutils.linux.lu
+
+The fdutils releases can be found at:
+
+ http://fdutils.linux.lu/download.html
+
+ http://www.tux.org/pub/knaff/fdutils/
+
+ ftp://metalab.unc.edu/pub/Linux/utils/disk-management/
+
+Reporting problems about the floppy driver
+==========================================
+
+If you have a question or a bug report about the floppy driver, mail
+me at Alain.Knaff@poboxes.com . If you post to Usenet, preferably use
+comp.os.linux.hardware. As the volume in these groups is rather high,
+be sure to include the word "floppy" (or "FLOPPY") in the subject
+line.  If the reported problem happens when mounting floppy disks, be
+sure to mention also the type of the filesystem in the subject line.
+
+Be sure to read the FAQ before mailing/posting any bug reports!
+
+Alain
+
+Changelog
+=========
+
+10-30-2004 :
+		Cleanup, updating, add reference to module configuration.
+		James Nelson <james4765@gmail.com>
+
+6-3-2000 :
+		Original Document
diff --git a/Documentation/admin-guide/blockdev/index.rst b/Documentation/admin-guide/blockdev/index.rst
new file mode 100644
index 000000000000..20a738d9d047
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/index.rst
@@ -0,0 +1,14 @@
+===========================
+The Linux RapidIO Subsystem
+===========================
+
+.. toctree::
+   :maxdepth: 1
+
+   floppy
+   nbd
+   paride
+   ramdisk
+   zram
+
+   drbd/index
diff --git a/Documentation/admin-guide/blockdev/nbd.rst b/Documentation/admin-guide/blockdev/nbd.rst
new file mode 100644
index 000000000000..d78dfe559dcf
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/nbd.rst
@@ -0,0 +1,31 @@
+==================================
+Network Block Device (TCP version)
+==================================
+
+1) Overview
+-----------
+
+What is it: With this compiled in the kernel (or as a module), Linux
+can use a remote server as one of its block devices. So every time
+the client computer wants to read, e.g., /dev/nb0, it sends a
+request over TCP to the server, which will reply with the data read.
+This can be used for stations with low disk space (or even diskless)
+to borrow disk space from another computer.
+Unlike NFS, it is possible to put any filesystem on it, etc.
+
+For more information, or to download the nbd-client and nbd-server
+tools, go to http://nbd.sf.net/.
+
+The nbd kernel module need only be installed on the client
+system, as the nbd-server is completely in userspace. In fact,
+the nbd-server has been successfully ported to other operating
+systems, including Windows.
+
+A) NBD parameters
+-----------------
+
+max_part
+	Number of partitions per device (default: 0).
+
+nbds_max
+	Number of block devices that should be initialized (default: 16).
diff --git a/Documentation/admin-guide/blockdev/paride.rst b/Documentation/admin-guide/blockdev/paride.rst
new file mode 100644
index 000000000000..87b4278bf314
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/paride.rst
@@ -0,0 +1,439 @@
+===================================
+Linux and parallel port IDE devices
+===================================
+
+PARIDE v1.03   (c) 1997-8  Grant Guenther <grant@torque.net>
+
+1. Introduction
+===============
+
+Owing to the simplicity and near universality of the parallel port interface
+to personal computers, many external devices such as portable hard-disk,
+CD-ROM, LS-120 and tape drives use the parallel port to connect to their
+host computer.  While some devices (notably scanners) use ad-hoc methods
+to pass commands and data through the parallel port interface, most
+external devices are actually identical to an internal model, but with
+a parallel-port adapter chip added in.  Some of the original parallel port
+adapters were little more than mechanisms for multiplexing a SCSI bus.
+(The Iomega PPA-3 adapter used in the ZIP drives is an example of this
+approach).  Most current designs, however, take a different approach.
+The adapter chip reproduces a small ISA or IDE bus in the external device
+and the communication protocol provides operations for reading and writing
+device registers, as well as data block transfer functions.  Sometimes,
+the device being addressed via the parallel cable is a standard SCSI
+controller like an NCR 5380.  The "ditto" family of external tape
+drives use the ISA replicator to interface a floppy disk controller,
+which is then connected to a floppy-tape mechanism.  The vast majority
+of external parallel port devices, however, are now based on standard
+IDE type devices, which require no intermediate controller.  If one
+were to open up a parallel port CD-ROM drive, for instance, one would
+find a standard ATAPI CD-ROM drive, a power supply, and a single adapter
+that interconnected a standard PC parallel port cable and a standard
+IDE cable.  It is usually possible to exchange the CD-ROM device with
+any other device using the IDE interface.
+
+The document describes the support in Linux for parallel port IDE
+devices.  It does not cover parallel port SCSI devices, "ditto" tape
+drives or scanners.  Many different devices are supported by the
+parallel port IDE subsystem, including:
+
+	- MicroSolutions backpack CD-ROM
+	- MicroSolutions backpack PD/CD
+	- MicroSolutions backpack hard-drives
+	- MicroSolutions backpack 8000t tape drive
+	- SyQuest EZ-135, EZ-230 & SparQ drives
+	- Avatar Shark
+	- Imation Superdisk LS-120
+	- Maxell Superdisk LS-120
+	- FreeCom Power CD
+	- Hewlett-Packard 5GB and 8GB tape drives
+	- Hewlett-Packard 7100 and 7200 CD-RW drives
+
+as well as most of the clone and no-name products on the market.
+
+To support such a wide range of devices, PARIDE, the parallel port IDE
+subsystem, is actually structured in three parts.   There is a base
+paride module which provides a registry and some common methods for
+accessing the parallel ports.  The second component is a set of
+high-level drivers for each of the different types of supported devices:
+
+	===	=============
+	pd	IDE disk
+	pcd	ATAPI CD-ROM
+	pf	ATAPI disk
+	pt	ATAPI tape
+	pg	ATAPI generic
+	===	=============
+
+(Currently, the pg driver is only used with CD-R drives).
+
+The high-level drivers function according to the relevant standards.
+The third component of PARIDE is a set of low-level protocol drivers
+for each of the parallel port IDE adapter chips.  Thanks to the interest
+and encouragement of Linux users from many parts of the world,
+support is available for almost all known adapter protocols:
+
+	====    ====================================== ====
+        aten    ATEN EH-100                            (HK)
+        bpck    Microsolutions backpack                (US)
+        comm    DataStor (old-type) "commuter" adapter (TW)
+        dstr    DataStor EP-2000                       (TW)
+        epat    Shuttle EPAT                           (UK)
+        epia    Shuttle EPIA                           (UK)
+	fit2    FIT TD-2000			       (US)
+	fit3    FIT TD-3000			       (US)
+	friq    Freecom IQ cable                       (DE)
+        frpw    Freecom Power                          (DE)
+        kbic    KingByte KBIC-951A and KBIC-971A       (TW)
+	ktti    KT Technology PHd adapter              (SG)
+        on20    OnSpec 90c20                           (US)
+        on26    OnSpec 90c26                           (US)
+	====    ====================================== ====
+
+
+2. Using the PARIDE subsystem
+=============================
+
+While configuring the Linux kernel, you may choose either to build
+the PARIDE drivers into your kernel, or to build them as modules.
+
+In either case, you will need to select "Parallel port IDE device support"
+as well as at least one of the high-level drivers and at least one
+of the parallel port communication protocols.  If you do not know
+what kind of parallel port adapter is used in your drive, you could
+begin by checking the file names and any text files on your DOS
+installation floppy.  Alternatively, you can look at the markings on
+the adapter chip itself.  That's usually sufficient to identify the
+correct device.
+
+You can actually select all the protocol modules, and allow the PARIDE
+subsystem to try them all for you.
+
+For the "brand-name" products listed above, here are the protocol
+and high-level drivers that you would use:
+
+	================	============	======	========
+	Manufacturer		Model		Driver	Protocol
+	================	============	======	========
+	MicroSolutions		CD-ROM		pcd	bpck
+	MicroSolutions		PD drive	pf	bpck
+	MicroSolutions		hard-drive	pd	bpck
+	MicroSolutions          8000t tape      pt      bpck
+	SyQuest			EZ, SparQ	pd	epat
+	Imation			Superdisk	pf	epat
+	Maxell                  Superdisk       pf      friq
+	Avatar			Shark		pd	epat
+	FreeCom			CD-ROM		pcd	frpw
+	Hewlett-Packard		5GB Tape	pt	epat
+	Hewlett-Packard		7200e (CD)	pcd	epat
+	Hewlett-Packard		7200e (CD-R)	pg	epat
+	================	============	======	========
+
+2.1  Configuring built-in drivers
+---------------------------------
+
+We recommend that you get to know how the drivers work and how to
+configure them as loadable modules, before attempting to compile a
+kernel with the drivers built-in.
+
+If you built all of your PARIDE support directly into your kernel,
+and you have just a single parallel port IDE device, your kernel should
+locate it automatically for you.  If you have more than one device,
+you may need to give some command line options to your bootloader
+(eg: LILO), how to do that is beyond the scope of this document.
+
+The high-level drivers accept a number of command line parameters, all
+of which are documented in the source files in linux/drivers/block/paride.
+By default, each driver will automatically try all parallel ports it
+can find, and all protocol types that have been installed, until it finds
+a parallel port IDE adapter.  Once it finds one, the probe stops.  So,
+if you have more than one device, you will need to tell the drivers
+how to identify them.  This requires specifying the port address, the
+protocol identification number and, for some devices, the drive's
+chain ID.  While your system is booting, a number of messages are
+displayed on the console.  Like all such messages, they can be
+reviewed with the 'dmesg' command.  Among those messages will be
+some lines like::
+
+	paride: bpck registered as protocol 0
+	paride: epat registered as protocol 1
+
+The numbers will always be the same until you build a new kernel with
+different protocol selections.  You should note these numbers as you
+will need them to identify the devices.
+
+If you happen to be using a MicroSolutions backpack device, you will
+also need to know the unit ID number for each drive.  This is usually
+the last two digits of the drive's serial number (but read MicroSolutions'
+documentation about this).
+
+As an example, let's assume that you have a MicroSolutions PD/CD drive
+with unit ID number 36 connected to the parallel port at 0x378, a SyQuest
+EZ-135 connected to the chained port on the PD/CD drive and also an
+Imation Superdisk connected to port 0x278.  You could give the following
+options on your boot command::
+
+	pd.drive0=0x378,1 pf.drive0=0x278,1 pf.drive1=0x378,0,36
+
+In the last option, pf.drive1 configures device /dev/pf1, the 0x378
+is the parallel port base address, the 0 is the protocol registration
+number and 36 is the chain ID.
+
+Please note:  while PARIDE will work both with and without the
+PARPORT parallel port sharing system that is included by the
+"Parallel port support" option, PARPORT must be included and enabled
+if you want to use chains of devices on the same parallel port.
+
+2.2  Loading and configuring PARIDE as modules
+----------------------------------------------
+
+It is much faster and simpler to get to understand the PARIDE drivers
+if you use them as loadable kernel modules.
+
+Note 1:
+	using these drivers with the "kerneld" automatic module loading
+	system is not recommended for beginners, and is not documented here.
+
+Note 2:
+	if you build PARPORT support as a loadable module, PARIDE must
+	also be built as loadable modules, and PARPORT must be loaded before
+	the PARIDE modules.
+
+To use PARIDE, you must begin by::
+
+	insmod paride
+
+this loads a base module which provides a registry for the protocols,
+among other tasks.
+
+Then, load as many of the protocol modules as you think you might need.
+As you load each module, it will register the protocols that it supports,
+and print a log message to your kernel log file and your console. For
+example::
+
+	# insmod epat
+	paride: epat registered as protocol 0
+	# insmod kbic
+	paride: k951 registered as protocol 1
+        paride: k971 registered as protocol 2
+
+Finally, you can load high-level drivers for each kind of device that
+you have connected.  By default, each driver will autoprobe for a single
+device, but you can support up to four similar devices by giving their
+individual co-ordinates when you load the driver.
+
+For example, if you had two no-name CD-ROM drives both using the
+KingByte KBIC-951A adapter, one on port 0x378 and the other on 0x3bc
+you could give the following command::
+
+	# insmod pcd drive0=0x378,1 drive1=0x3bc,1
+
+For most adapters, giving a port address and protocol number is sufficient,
+but check the source files in linux/drivers/block/paride for more
+information.  (Hopefully someone will write some man pages one day !).
+
+As another example, here's what happens when PARPORT is installed, and
+a SyQuest EZ-135 is attached to port 0x378::
+
+	# insmod paride
+	paride: version 1.0 installed
+	# insmod epat
+	paride: epat registered as protocol 0
+	# insmod pd
+	pd: pd version 1.0, major 45, cluster 64, nice 0
+	pda: Sharing parport1 at 0x378
+	pda: epat 1.0, Shuttle EPAT chip c3 at 0x378, mode 5 (EPP-32), delay 1
+	pda: SyQuest EZ135A, 262144 blocks [128M], (512/16/32), removable media
+	 pda: pda1
+
+Note that the last line is the output from the generic partition table
+scanner - in this case it reports that it has found a disk with one partition.
+
+2.3  Using a PARIDE device
+--------------------------
+
+Once the drivers have been loaded, you can access PARIDE devices in the
+same way as their traditional counterparts.  You will probably need to
+create the device "special files".  Here is a simple script that you can
+cut to a file and execute::
+
+  #!/bin/bash
+  #
+  # mkd -- a script to create the device special files for the PARIDE subsystem
+  #
+  function mkdev {
+    mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1
+  }
+  #
+  function pd {
+    D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) )
+    mkdev pd$D b 45 $[ $1 * 16 ]
+    for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+    do mkdev pd$D$P b 45 $[ $1 * 16 + $P ]
+    done
+  }
+  #
+  cd /dev
+  #
+  for u in 0 1 2 3 ; do pd $u ; done
+  for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done
+  for u in 0 1 2 3 ; do mkdev pf$u  b 47 $u ; done
+  for u in 0 1 2 3 ; do mkdev pt$u  c 96 $u ; done
+  for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done
+  for u in 0 1 2 3 ; do mkdev pg$u  c 97 $u ; done
+  #
+  # end of mkd
+
+With the device files and drivers in place, you can access PARIDE devices
+like any other Linux device.   For example, to mount a CD-ROM in pcd0, use::
+
+	mount /dev/pcd0 /cdrom
+
+If you have a fresh Avatar Shark cartridge, and the drive is pda, you
+might do something like::
+
+	fdisk /dev/pda		-- make a new partition table with
+				   partition 1 of type 83
+
+	mke2fs /dev/pda1	-- to build the file system
+
+	mkdir /shark		-- make a place to mount the disk
+
+	mount /dev/pda1 /shark
+
+Devices like the Imation superdisk work in the same way, except that
+they do not have a partition table.  For example to make a 120MB
+floppy that you could share with a DOS system::
+
+	mkdosfs /dev/pf0
+	mount /dev/pf0 /mnt
+
+
+2.4  The pf driver
+------------------
+
+The pf driver is intended for use with parallel port ATAPI disk
+devices.  The most common devices in this category are PD drives
+and LS-120 drives.  Traditionally, media for these devices are not
+partitioned.  Consequently, the pf driver does not support partitioned
+media.  This may be changed in a future version of the driver.
+
+2.5  Using the pt driver
+------------------------
+
+The pt driver for parallel port ATAPI tape drives is a minimal driver.
+It does not yet support many of the standard tape ioctl operations.
+For best performance, a block size of 32KB should be used.  You will
+probably want to set the parallel port delay to 0, if you can.
+
+2.6  Using the pg driver
+------------------------
+
+The pg driver can be used in conjunction with the cdrecord program
+to create CD-ROMs.  Please get cdrecord version 1.6.1 or later
+from ftp://ftp.fokus.gmd.de/pub/unix/cdrecord/ .  To record CD-R media
+your parallel port should ideally be set to EPP mode, and the "port delay"
+should be set to 0.  With those settings it is possible to record at 2x
+speed without any buffer underruns.  If you cannot get the driver to work
+in EPP mode, try to use "bidirectional" or "PS/2" mode and 1x speeds only.
+
+
+3. Troubleshooting
+==================
+
+3.1  Use EPP mode if you can
+----------------------------
+
+The most common problems that people report with the PARIDE drivers
+concern the parallel port CMOS settings.  At this time, none of the
+PARIDE protocol modules support ECP mode, or any ECP combination modes.
+If you are able to do so, please set your parallel port into EPP mode
+using your CMOS setup procedure.
+
+3.2  Check the port delay
+-------------------------
+
+Some parallel ports cannot reliably transfer data at full speed.  To
+offset the errors, the PARIDE protocol modules introduce a "port
+delay" between each access to the i/o ports.  Each protocol sets
+a default value for this delay.  In most cases, the user can override
+the default and set it to 0 - resulting in somewhat higher transfer
+rates.  In some rare cases (especially with older 486 systems) the
+default delays are not long enough.  if you experience corrupt data
+transfers, or unexpected failures, you may wish to increase the
+port delay.   The delay can be programmed using the "driveN" parameters
+to each of the high-level drivers.  Please see the notes above, or
+read the comments at the beginning of the driver source files in
+linux/drivers/block/paride.
+
+3.3  Some drives need a printer reset
+-------------------------------------
+
+There appear to be a number of "noname" external drives on the market
+that do not always power up correctly.  We have noticed this with some
+drives based on OnSpec and older Freecom adapters.  In these rare cases,
+the adapter can often be reinitialised by issuing a "printer reset" on
+the parallel port.  As the reset operation is potentially disruptive in
+multiple device environments, the PARIDE drivers will not do it
+automatically.  You can however, force a printer reset by doing::
+
+	insmod lp reset=1
+	rmmod lp
+
+If you have one of these marginal cases, you should probably build
+your paride drivers as modules, and arrange to do the printer reset
+before loading the PARIDE drivers.
+
+3.4  Use the verbose option and dmesg if you need help
+------------------------------------------------------
+
+While a lot of testing has gone into these drivers to make them work
+as smoothly as possible, problems will arise.  If you do have problems,
+please check all the obvious things first:  does the drive work in
+DOS with the manufacturer's drivers ?  If that doesn't yield any useful
+clues, then please make sure that only one drive is hooked to your system,
+and that either (a) PARPORT is enabled or (b) no other device driver
+is using your parallel port (check in /proc/ioports).  Then, load the
+appropriate drivers (you can load several protocol modules if you want)
+as in::
+
+	# insmod paride
+	# insmod epat
+	# insmod bpck
+	# insmod kbic
+	...
+	# insmod pd verbose=1
+
+(using the correct driver for the type of device you have, of course).
+The verbose=1 parameter will cause the drivers to log a trace of their
+activity as they attempt to locate your drive.
+
+Use 'dmesg' to capture a log of all the PARIDE messages (any messages
+beginning with paride:, a protocol module's name or a driver's name) and
+include that with your bug report.  You can submit a bug report in one
+of two ways.  Either send it directly to the author of the PARIDE suite,
+by e-mail to grant@torque.net, or join the linux-parport mailing list
+and post your report there.
+
+3.5  For more information or help
+---------------------------------
+
+You can join the linux-parport mailing list by sending a mail message
+to:
+
+		linux-parport-request@torque.net
+
+with the single word::
+
+		subscribe
+
+in the body of the mail message (not in the subject line).   Please be
+sure that your mail program is correctly set up when you do this,  as
+the list manager is a robot that will subscribe you using the reply
+address in your mail headers.  REMOVE any anti-spam gimmicks you may
+have in your mail headers, when sending mail to the list server.
+
+You might also find some useful information on the linux-parport
+web pages (although they are not always up to date) at
+
+	http://web.archive.org/web/%2E/http://www.torque.net/parport/
diff --git a/Documentation/admin-guide/blockdev/ramdisk.rst b/Documentation/admin-guide/blockdev/ramdisk.rst
new file mode 100644
index 000000000000..b7c2268f8dec
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/ramdisk.rst
@@ -0,0 +1,177 @@
+==========================================
+Using the RAM disk block device with Linux
+==========================================
+
+.. Contents:
+
+	1) Overview
+	2) Kernel Command Line Parameters
+	3) Using "rdev -r"
+	4) An Example of Creating a Compressed RAM Disk
+
+
+1) Overview
+-----------
+
+The RAM disk driver is a way to use main system memory as a block device.  It
+is required for initrd, an initial filesystem used if you need to load modules
+in order to access the root filesystem (see Documentation/admin-guide/initrd.rst).  It can
+also be used for a temporary filesystem for crypto work, since the contents
+are erased on reboot.
+
+The RAM disk dynamically grows as more space is required. It does this by using
+RAM from the buffer cache. The driver marks the buffers it is using as dirty
+so that the VM subsystem does not try to reclaim them later.
+
+The RAM disk supports up to 16 RAM disks by default, and can be reconfigured
+to support an unlimited number of RAM disks (at your own risk).  Just change
+the configuration symbol BLK_DEV_RAM_COUNT in the Block drivers config menu
+and (re)build the kernel.
+
+To use RAM disk support with your system, run './MAKEDEV ram' from the /dev
+directory.  RAM disks are all major number 1, and start with minor number 0
+for /dev/ram0, etc.  If used, modern kernels use /dev/ram0 for an initrd.
+
+The new RAM disk also has the ability to load compressed RAM disk images,
+allowing one to squeeze more programs onto an average installation or
+rescue floppy disk.
+
+
+2) Parameters
+---------------------------------
+
+2a) Kernel Command Line Parameters
+
+	ramdisk_size=N
+		Size of the ramdisk.
+
+This parameter tells the RAM disk driver to set up RAM disks of N k size.  The
+default is 4096 (4 MB).
+
+2b) Module parameters
+
+	rd_nr
+		/dev/ramX devices created.
+
+	max_part
+		Maximum partition number.
+
+	rd_size
+		See ramdisk_size.
+
+3) Using "rdev -r"
+------------------
+
+The usage of the word (two bytes) that "rdev -r" sets in the kernel image is
+as follows. The low 11 bits (0 -> 10) specify an offset (in 1 k blocks) of up
+to 2 MB (2^11) of where to find the RAM disk (this used to be the size). Bit
+14 indicates that a RAM disk is to be loaded, and bit 15 indicates whether a
+prompt/wait sequence is to be given before trying to read the RAM disk. Since
+the RAM disk dynamically grows as data is being written into it, a size field
+is not required. Bits 11 to 13 are not currently used and may as well be zero.
+These numbers are no magical secrets, as seen below::
+
+  ./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK     0x07FF
+  ./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG          0x8000
+  ./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG            0x4000
+
+Consider a typical two floppy disk setup, where you will have the
+kernel on disk one, and have already put a RAM disk image onto disk #2.
+
+Hence you want to set bits 0 to 13 as 0, meaning that your RAM disk
+starts at an offset of 0 kB from the beginning of the floppy.
+The command line equivalent is: "ramdisk_start=0"
+
+You want bit 14 as one, indicating that a RAM disk is to be loaded.
+The command line equivalent is: "load_ramdisk=1"
+
+You want bit 15 as one, indicating that you want a prompt/keypress
+sequence so that you have a chance to switch floppy disks.
+The command line equivalent is: "prompt_ramdisk=1"
+
+Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word.
+So to create disk one of the set, you would do::
+
+	/usr/src/linux# cat arch/x86/boot/zImage > /dev/fd0
+	/usr/src/linux# rdev /dev/fd0 /dev/fd0
+	/usr/src/linux# rdev -r /dev/fd0 49152
+
+If you make a boot disk that has LILO, then for the above, you would use::
+
+	append = "ramdisk_start=0 load_ramdisk=1 prompt_ramdisk=1"
+
+Since the default start = 0 and the default prompt = 1, you could use::
+
+	append = "load_ramdisk=1"
+
+
+4) An Example of Creating a Compressed RAM Disk
+-----------------------------------------------
+
+To create a RAM disk image, you will need a spare block device to
+construct it on. This can be the RAM disk device itself, or an
+unused disk partition (such as an unmounted swap partition). For this
+example, we will use the RAM disk device, "/dev/ram0".
+
+Note: This technique should not be done on a machine with less than 8 MB
+of RAM. If using a spare disk partition instead of /dev/ram0, then this
+restriction does not apply.
+
+a) Decide on the RAM disk size that you want. Say 2 MB for this example.
+   Create it by writing to the RAM disk device. (This step is not currently
+   required, but may be in the future.) It is wise to zero out the
+   area (esp. for disks) so that maximal compression is achieved for
+   the unused blocks of the image that you are about to create::
+
+	dd if=/dev/zero of=/dev/ram0 bs=1k count=2048
+
+b) Make a filesystem on it. Say ext2fs for this example::
+
+	mke2fs -vm0 /dev/ram0 2048
+
+c) Mount it, copy the files you want to it (eg: /etc/* /dev/* ...)
+   and unmount it again.
+
+d) Compress the contents of the RAM disk. The level of compression
+   will be approximately 50% of the space used by the files. Unused
+   space on the RAM disk will compress to almost nothing::
+
+	dd if=/dev/ram0 bs=1k count=2048 | gzip -v9 > /tmp/ram_image.gz
+
+e) Put the kernel onto the floppy::
+
+	dd if=zImage of=/dev/fd0 bs=1k
+
+f) Put the RAM disk image onto the floppy, after the kernel. Use an offset
+   that is slightly larger than the kernel, so that you can put another
+   (possibly larger) kernel onto the same floppy later without overlapping
+   the RAM disk image. An offset of 400 kB for kernels about 350 kB in
+   size would be reasonable. Make sure offset+size of ram_image.gz is
+   not larger than the total space on your floppy (usually 1440 kB)::
+
+	dd if=/tmp/ram_image.gz of=/dev/fd0 bs=1k seek=400
+
+g) Use "rdev" to set the boot device, RAM disk offset, prompt flag, etc.
+   For prompt_ramdisk=1, load_ramdisk=1, ramdisk_start=400, one would
+   have 2^15 + 2^14 + 400 = 49552::
+
+	rdev /dev/fd0 /dev/fd0
+	rdev -r /dev/fd0 49552
+
+That is it. You now have your boot/root compressed RAM disk floppy. Some
+users may wish to combine steps (d) and (f) by using a pipe.
+
+
+						Paul Gortmaker 12/95
+
+Changelog:
+----------
+
+10-22-04 :
+		Updated to reflect changes in command line options, remove
+		obsolete references, general cleanup.
+		James Nelson (james4765@gmail.com)
+
+
+12-95 :
+		Original Document
diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
new file mode 100644
index 000000000000..6eccf13219ff
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -0,0 +1,422 @@
+========================================
+zram: Compressed RAM based block devices
+========================================
+
+Introduction
+============
+
+The zram module creates RAM based block devices named /dev/zram<id>
+(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
+in memory itself. These disks allow very fast I/O and compression provides
+good amounts of memory savings. Some of the usecases include /tmp storage,
+use as swap disks, various caches under /var and maybe many more :)
+
+Statistics for individual zram devices are exported through sysfs nodes at
+/sys/block/zram<id>/
+
+Usage
+=====
+
+There are several ways to configure and manage zram device(-s):
+
+a) using zram and zram_control sysfs attributes
+b) using zramctl utility, provided by util-linux (util-linux@vger.kernel.org).
+
+In this document we will describe only 'manual' zram configuration steps,
+IOW, zram and zram_control sysfs attributes.
+
+In order to get a better idea about zramctl please consult util-linux
+documentation, zramctl man-page or `zramctl --help`. Please be informed
+that zram maintainers do not develop/maintain util-linux or zramctl, should
+you have any questions please contact util-linux@vger.kernel.org
+
+Following shows a typical sequence of steps for using zram.
+
+WARNING
+=======
+
+For the sake of simplicity we skip error checking parts in most of the
+examples below. However, it is your sole responsibility to handle errors.
+
+zram sysfs attributes always return negative values in case of errors.
+The list of possible return codes:
+
+========  =============================================================
+-EBUSY	  an attempt to modify an attribute that cannot be changed once
+	  the device has been initialised. Please reset device first;
+-ENOMEM	  zram was not able to allocate enough memory to fulfil your
+	  needs;
+-EINVAL	  invalid input has been provided.
+========  =============================================================
+
+If you use 'echo', the returned value that is changed by 'echo' utility,
+and, in general case, something like::
+
+	echo 3 > /sys/block/zram0/max_comp_streams
+	if [ $? -ne 0 ];
+		handle_error
+	fi
+
+should suffice.
+
+1) Load Module
+==============
+
+::
+
+	modprobe zram num_devices=4
+	This creates 4 devices: /dev/zram{0,1,2,3}
+
+num_devices parameter is optional and tells zram how many devices should be
+pre-created. Default: 1.
+
+2) Set max number of compression streams
+========================================
+
+Regardless the value passed to this attribute, ZRAM will always
+allocate multiple compression streams - one per online CPUs - thus
+allowing several concurrent compression operations. The number of
+allocated compression streams goes down when some of the CPUs
+become offline. There is no single-compression-stream mode anymore,
+unless you are running a UP system or has only 1 CPU online.
+
+To find out how many streams are currently available::
+
+	cat /sys/block/zram0/max_comp_streams
+
+3) Select compression algorithm
+===============================
+
+Using comp_algorithm device attribute one can see available and
+currently selected (shown in square brackets) compression algorithms,
+change selected compression algorithm (once the device is initialised
+there is no way to change compression algorithm).
+
+Examples::
+
+	#show supported compression algorithms
+	cat /sys/block/zram0/comp_algorithm
+	lzo [lz4]
+
+	#select lzo compression algorithm
+	echo lzo > /sys/block/zram0/comp_algorithm
+
+For the time being, the `comp_algorithm` content does not necessarily
+show every compression algorithm supported by the kernel. We keep this
+list primarily to simplify device configuration and one can configure
+a new device with a compression algorithm that is not listed in
+`comp_algorithm`. The thing is that, internally, ZRAM uses Crypto API
+and, if some of the algorithms were built as modules, it's impossible
+to list all of them using, for instance, /proc/crypto or any other
+method. This, however, has an advantage of permitting the usage of
+custom crypto compression modules (implementing S/W or H/W compression).
+
+4) Set Disksize
+===============
+
+Set disk size by writing the value to sysfs node 'disksize'.
+The value can be either in bytes or you can use mem suffixes.
+Examples::
+
+	# Initialize /dev/zram0 with 50MB disksize
+	echo $((50*1024*1024)) > /sys/block/zram0/disksize
+
+	# Using mem suffixes
+	echo 256K > /sys/block/zram0/disksize
+	echo 512M > /sys/block/zram0/disksize
+	echo 1G > /sys/block/zram0/disksize
+
+Note:
+There is little point creating a zram of greater than twice the size of memory
+since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
+size of the disk when not in use so a huge zram is wasteful.
+
+5) Set memory limit: Optional
+=============================
+
+Set memory limit by writing the value to sysfs node 'mem_limit'.
+The value can be either in bytes or you can use mem suffixes.
+In addition, you could change the value in runtime.
+Examples::
+
+	# limit /dev/zram0 with 50MB memory
+	echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
+
+	# Using mem suffixes
+	echo 256K > /sys/block/zram0/mem_limit
+	echo 512M > /sys/block/zram0/mem_limit
+	echo 1G > /sys/block/zram0/mem_limit
+
+	# To disable memory limit
+	echo 0 > /sys/block/zram0/mem_limit
+
+6) Activate
+===========
+
+::
+
+	mkswap /dev/zram0
+	swapon /dev/zram0
+
+	mkfs.ext4 /dev/zram1
+	mount /dev/zram1 /tmp
+
+7) Add/remove zram devices
+==========================
+
+zram provides a control interface, which enables dynamic (on-demand) device
+addition and removal.
+
+In order to add a new /dev/zramX device, perform read operation on hot_add
+attribute. This will return either new device's device id (meaning that you
+can use /dev/zram<id>) or error code.
+
+Example::
+
+	cat /sys/class/zram-control/hot_add
+	1
+
+To remove the existing /dev/zramX device (where X is a device id)
+execute::
+
+	echo X > /sys/class/zram-control/hot_remove
+
+8) Stats
+========
+
+Per-device statistics are exported as various nodes under /sys/block/zram<id>/
+
+A brief description of exported device attributes. For more details please
+read Documentation/ABI/testing/sysfs-block-zram.
+
+======================  ======  ===============================================
+Name            	access            description
+======================  ======  ===============================================
+disksize          	RW	show and set the device's disk size
+initstate         	RO	shows the initialization state of the device
+reset             	WO	trigger device reset
+mem_used_max      	WO	reset the `mem_used_max` counter (see later)
+mem_limit         	WO	specifies the maximum amount of memory ZRAM can
+				use to store the compressed data
+writeback_limit   	WO	specifies the maximum amount of write IO zram
+				can write out to backing device as 4KB unit
+writeback_limit_enable  RW	show and set writeback_limit feature
+max_comp_streams  	RW	the number of possible concurrent compress
+				operations
+comp_algorithm    	RW	show and change the compression algorithm
+compact           	WO	trigger memory compaction
+debug_stat        	RO	this file is used for zram debugging purposes
+backing_dev	  	RW	set up backend storage for zram to write out
+idle		  	WO	mark allocated slot as idle
+======================  ======  ===============================================
+
+
+User space is advised to use the following files to read the device statistics.
+
+File /sys/block/zram<id>/stat
+
+Represents block layer statistics. Read Documentation/block/stat.rst for
+details.
+
+File /sys/block/zram<id>/io_stat
+
+The stat file represents device's I/O statistics not accounted by block
+layer and, thus, not available in zram<id>/stat file. It consists of a
+single line of text and contains the following stats separated by
+whitespace:
+
+ =============    =============================================================
+ failed_reads     The number of failed reads
+ failed_writes    The number of failed writes
+ invalid_io       The number of non-page-size-aligned I/O requests
+ notify_free      Depending on device usage scenario it may account
+
+                  a) the number of pages freed because of swap slot free
+                     notifications
+                  b) the number of pages freed because of
+                     REQ_OP_DISCARD requests sent by bio. The former ones are
+                     sent to a swap block device when a swap slot is freed,
+                     which implies that this disk is being used as a swap disk.
+
+                  The latter ones are sent by filesystem mounted with
+                  discard option, whenever some data blocks are getting
+                  discarded.
+ =============    =============================================================
+
+File /sys/block/zram<id>/mm_stat
+
+The stat file represents device's mm statistics. It consists of a single
+line of text and contains the following stats separated by whitespace:
+
+ ================ =============================================================
+ orig_data_size   uncompressed size of data stored in this disk.
+		  This excludes same-element-filled pages (same_pages) since
+		  no memory is allocated for them.
+                  Unit: bytes
+ compr_data_size  compressed size of data stored in this disk
+ mem_used_total   the amount of memory allocated for this disk. This
+                  includes allocator fragmentation and metadata overhead,
+                  allocated for this disk. So, allocator space efficiency
+                  can be calculated using compr_data_size and this statistic.
+                  Unit: bytes
+ mem_limit        the maximum amount of memory ZRAM can use to store
+                  the compressed data
+ mem_used_max     the maximum amount of memory zram have consumed to
+                  store the data
+ same_pages       the number of same element filled pages written to this disk.
+                  No memory is allocated for such pages.
+ pages_compacted  the number of pages freed during compaction
+ huge_pages	  the number of incompressible pages
+ ================ =============================================================
+
+File /sys/block/zram<id>/bd_stat
+
+The stat file represents device's backing device statistics. It consists of
+a single line of text and contains the following stats separated by whitespace:
+
+ ============== =============================================================
+ bd_count	size of data written in backing device.
+		Unit: 4K bytes
+ bd_reads	the number of reads from backing device
+		Unit: 4K bytes
+ bd_writes	the number of writes to backing device
+		Unit: 4K bytes
+ ============== =============================================================
+
+9) Deactivate
+=============
+
+::
+
+	swapoff /dev/zram0
+	umount /dev/zram1
+
+10) Reset
+=========
+
+	Write any positive value to 'reset' sysfs node::
+
+		echo 1 > /sys/block/zram0/reset
+		echo 1 > /sys/block/zram1/reset
+
+	This frees all the memory allocated for the given device and
+	resets the disksize to zero. You must set the disksize again
+	before reusing the device.
+
+Optional Feature
+================
+
+writeback
+---------
+
+With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page
+to backing storage rather than keeping it in memory.
+To use the feature, admin should set up backing device via::
+
+	echo /dev/sda5 > /sys/block/zramX/backing_dev
+
+before disksize setting. It supports only partition at this moment.
+If admin want to use incompressible page writeback, they could do via::
+
+	echo huge > /sys/block/zramX/write
+
+To use idle page writeback, first, user need to declare zram pages
+as idle::
+
+	echo all > /sys/block/zramX/idle
+
+From now on, any pages on zram are idle pages. The idle mark
+will be removed until someone request access of the block.
+IOW, unless there is access request, those pages are still idle pages.
+
+Admin can request writeback of those idle pages at right timing via::
+
+	echo idle > /sys/block/zramX/writeback
+
+With the command, zram writeback idle pages from memory to the storage.
+
+If there are lots of write IO with flash device, potentially, it has
+flash wearout problem so that admin needs to design write limitation
+to guarantee storage health for entire product life.
+
+To overcome the concern, zram supports "writeback_limit" feature.
+The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
+any writeback. IOW, if admin want to apply writeback budget, he should
+enable writeback_limit_enable via::
+
+	$ echo 1 > /sys/block/zramX/writeback_limit_enable
+
+Once writeback_limit_enable is set, zram doesn't allow any writeback
+until admin set the budget via /sys/block/zramX/writeback_limit.
+
+(If admin doesn't enable writeback_limit_enable, writeback_limit's value
+assigned via /sys/block/zramX/writeback_limit is meaninless.)
+
+If admin want to limit writeback as per-day 400M, he could do it
+like below::
+
+	$ MB_SHIFT=20
+	$ 4K_SHIFT=12
+	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+		/sys/block/zram0/writeback_limit.
+	$ echo 1 > /sys/block/zram0/writeback_limit_enable
+
+If admin want to allow further write again once the bugdet is exausted,
+he could do it like below::
+
+	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+		/sys/block/zram0/writeback_limit
+
+If admin want to see remaining writeback budget since he set::
+
+	$ cat /sys/block/zramX/writeback_limit
+
+If admin want to disable writeback limit, he could do::
+
+	$ echo 0 > /sys/block/zramX/writeback_limit_enable
+
+The writeback_limit count will reset whenever you reset zram(e.g.,
+system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
+writeback happened until you reset the zram to allocate extra writeback
+budget in next setting is user's job.
+
+If admin want to measure writeback count in a certain period, he could
+know it via /sys/block/zram0/bd_stat's 3rd column.
+
+memory tracking
+===============
+
+With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
+zram block. It could be useful to catch cold or incompressible
+pages of the process with*pagemap.
+
+If you enable the feature, you could see block state via
+/sys/kernel/debug/zram/zram0/block_state". The output is as follows::
+
+	  300    75.033841 .wh.
+	  301    63.806904 s...
+	  302    63.806919 ..hi
+
+First column
+	zram's block index.
+Second column
+	access time since the system was booted
+Third column
+	state of the block:
+
+	s:
+		same page
+	w:
+		written page to backing store
+	h:
+		huge page
+	i:
+		idle page
+
+First line of above example says 300th block is accessed at 75.033841sec
+and the block's state is huge so it is written back to the backing
+storage. It's a debugging feature so anyone shouldn't rely on it to work
+properly.
+
+Nitin Gupta
+ngupta@vflare.org
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 5b63182ceb5f..9228fbf5ce4e 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -73,6 +73,7 @@ configure specific aspects of kernel behavior to your liking.
    java
    ras
    bcache
+   blockdev/index
    ext4
    binderfs
    pm/index
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e645b3ab4b6f..78576aa45cce 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1249,7 +1249,7 @@
 			See also Documentation/fault-injection/.
 
 	floppy=		[HW]
-			See Documentation/blockdev/floppy.rst.
+			See Documentation/admin-guide/blockdev/floppy.rst.
 
 	force_pal_cache_flush
 			[IA-64] Avoid check_sal_cache_flush which may hang on
@@ -2234,7 +2234,7 @@
 	memblock=debug	[KNL] Enable memblock debug messages.
 
 	load_ramdisk=	[RAM] List of ramdisks to load from floppy
-			See Documentation/blockdev/ramdisk.rst.
+			See Documentation/admin-guide/blockdev/ramdisk.rst.
 
 	lockd.nlm_grace_period=P  [NFS] Assign grace period.
 			Format: <integer>
@@ -3268,7 +3268,7 @@
 
 	pcd.		[PARIDE]
 			See header of drivers/block/paride/pcd.c.
-			See also Documentation/blockdev/paride.rst.
+			See also Documentation/admin-guide/blockdev/paride.rst.
 
 	pci=option[,option...]	[PCI] various PCI subsystem options.
 
@@ -3512,7 +3512,7 @@
 			needed on a platform with proper driver support.
 
 	pd.		[PARIDE]
-			See Documentation/blockdev/paride.rst.
+			See Documentation/admin-guide/blockdev/paride.rst.
 
 	pdcchassis=	[PARISC,HW] Disable/Enable PDC Chassis Status codes at
 			boot time.
@@ -3527,10 +3527,10 @@
 			and performance comparison.
 
 	pf.		[PARIDE]
-			See Documentation/blockdev/paride.rst.
+			See Documentation/admin-guide/blockdev/paride.rst.
 
 	pg.		[PARIDE]
-			See Documentation/blockdev/paride.rst.
+			See Documentation/admin-guide/blockdev/paride.rst.
 
 	pirq=		[SMP,APIC] Manual mp-table setup
 			See Documentation/x86/i386/IO-APIC.rst.
@@ -3642,7 +3642,7 @@
 
 	prompt_ramdisk=	[RAM] List of RAM disks to prompt for floppy disk
 			before loading.
-			See Documentation/blockdev/ramdisk.rst.
+			See Documentation/admin-guide/blockdev/ramdisk.rst.
 
 	psi=		[KNL] Enable or disable pressure stall information
 			tracking.
@@ -3664,7 +3664,7 @@
 	pstore.backend=	Specify the name of the pstore backend to use
 
 	pt.		[PARIDE]
-			See Documentation/blockdev/paride.rst.
+			See Documentation/admin-guide/blockdev/paride.rst.
 
 	pti=		[X86_64] Control Page Table Isolation of user and
 			kernel address spaces.  Disabling this feature
@@ -3693,7 +3693,7 @@
 			See Documentation/admin-guide/md.rst.
 
 	ramdisk_size=	[RAM] Sizes of RAM disks in kilobytes
-			See Documentation/blockdev/ramdisk.rst.
+			See Documentation/admin-guide/blockdev/ramdisk.rst.
 
 	random.trust_cpu={on,off}
 			[KNL] Enable or disable trusting the use of the
diff --git a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg
deleted file mode 100644
index f87cfa0dc2fb..000000000000
--- a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg
+++ /dev/null
@@ -1,588 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Created with Inkscape (http://www.inkscape.org/) -->
-<svg
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   version="1.0"
-   width="210mm"
-   height="297mm"
-   viewBox="0 0 21000 29700"
-   id="svg2"
-   style="fill-rule:evenodd">
-  <defs
-     id="defs4" />
-  <g
-     id="Default"
-     style="visibility:visible">
-    <desc
-       id="desc180">Master slide</desc>
-  </g>
-  <path
-     d="M 11999,8601 L 11899,8301 L 12099,8301 L 11999,8601 z"
-     id="path193"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 11999,7801 L 11999,8361"
-     id="path197"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <path
-     d="M 7999,10401 L 7899,10101 L 8099,10101 L 7999,10401 z"
-     id="path209"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 7999,9601 L 7999,10161"
-     id="path213"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <path
-     d="M 11999,7801 L 11685,7840 L 11724,7644 L 11999,7801 z"
-     id="path225"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 7999,7001 L 11764,7754"
-     id="path229"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <g
-     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-1244.4792,1416.5139)"
-     id="g245"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <text
-       id="text247">
-      <tspan
-         x="9139 9368 9579 9808 9986 10075 10252 10481 10659 10837 10909"
-         y="9284"
-         id="tspan249">RSDataReply</tspan>
-    </text>
-  </g>
-  <path
-     d="M 7999,9601 L 8281,9458 L 8311,9655 L 7999,9601 z"
-     id="path259"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 11999,9001 L 8236,9565"
-     id="path263"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <g
-     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,1620.9382,-1639.4947)"
-     id="g279"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <text
-       id="text281">
-      <tspan
-         x="8743 8972 9132 9310 9573 9801 10013 10242 10419 10597 10775 10953 11114"
-         y="7023"
-         id="tspan283">CsumRSRequest</tspan>
-    </text>
-  </g>
-  <text
-     id="text297"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692"
-       y="5707"
-       id="tspan299">w_make_resync_request()</tspan>
-  </text>
-  <text
-     id="text313"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378"
-       y="7806"
-       id="tspan315">receive_DataRequest()</tspan>
-  </text>
-  <text
-     id="text329"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399"
-       y="8606"
-       id="tspan331">drbd_endio_read_sec()</tspan>
-  </text>
-  <text
-     id="text345"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13825 13986 14164 14426 14604 14710 14871 15049 15154 15332 15510 15616"
-       y="9007"
-       id="tspan347">w_e_end_csum_rs_req()</tspan>
-  </text>
-  <text
-     id="text361"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4444 4550 4728 4889 5066 5138 5299 5477 5655 5883 6095 6324 6501 6590 6768 6997 7175 7352 7424 7585 7691"
-       y="9507"
-       id="tspan363">receive_RSDataReply()</tspan>
-  </text>
-  <text
-     id="text377"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4457 4635 4741 4918 5096 5274 5452 5630 5807 5879 6057 6235 6464 6569 6641 6730 6908 7086 7247 7425 7585 7691"
-       y="10407"
-       id="tspan379">drbd_endio_write_sec()</tspan>
-  </text>
-  <text
-     id="text393"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4647 4825 5003 5180 5358 5536 5714 5820 5997 6158 6319 6497 6658 6836 7013 7085 7263 7424 7585 7691"
-       y="10907"
-       id="tspan395">e_end_resync_block()</tspan>
-  </text>
-  <path
-     d="M 11999,11601 L 11685,11640 L 11724,11444 L 11999,11601 z"
-     id="path405"
-     style="fill:#000080;visibility:visible" />
-  <path
-     d="M 7999,10801 L 11764,11554"
-     id="path409"
-     style="fill:none;stroke:#000080;visibility:visible" />
-  <g
-     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,2434.7562,-1674.649)"
-     id="g425"
-     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
-    <text
-       id="text427">
-      <tspan
-         x="9320 9621 9726 9798 9887 10065 10277 10438"
-         y="10943"
-         id="tspan429">WriteAck</tspan>
-    </text>
-  </g>
-  <text
-     id="text443"
-     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12199 12377 12555 12644 12821 13033 13105 13283 13444 13604 13816 13977 14138 14244"
-       y="11559"
-       id="tspan445">got_BlockAck()</tspan>
-  </text>
-  <text
-     id="text459"
-     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="7999 8304 8541 8778 8990 9201 9413 9650 10001 10120 10357 10594 10806 11043 11280 11398 11703 11940 12152 12364 12601 12812 12931 13049 13261 13498 13710 13947 14065 14302 14540 14658 14777 14870 15107 15225 15437 15649 15886"
-       y="4877"
-       id="tspan461">Checksum based Resync, case not in sync</tspan>
-  </text>
-  <text
-     id="text475"
-     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="6961 7266 7571 7854 8159 8299 8536 8654 8891 9010 9247 9484 9603 9840 9958 10077 10170 10407"
-       y="2806"
-       id="tspan477">DRBD-8.3 data flow</tspan>
-  </text>
-  <text
-     id="text491"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="5190 5419 5596 5774 5952 6113 6291 6468 6646 6824 6985 7146 7324 7586 7692"
-       y="7005"
-       id="tspan493">w_e_send_csum()</tspan>
-  </text>
-  <path
-     d="M 11999,17601 L 11899,17301 L 12099,17301 L 11999,17601 z"
-     id="path503"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 11999,16801 L 11999,17361"
-     id="path507"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <path
-     d="M 11999,16801 L 11685,16840 L 11724,16644 L 11999,16801 z"
-     id="path519"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 7999,16001 L 11764,16754"
-     id="path523"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <g
-     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-2539.5806,1529.3491)"
-     id="g539"
-     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
-    <text
-       id="text541">
-      <tspan
-         x="9269 9498 9709 9798 9959 10048 10226 10437 10598 10776"
-         y="18265"
-         id="tspan543">RSIsInSync</tspan>
-    </text>
-  </g>
-  <path
-     d="M 7999,18601 L 8281,18458 L 8311,18655 L 7999,18601 z"
-     id="path553"
-     style="fill:#000080;visibility:visible" />
-  <path
-     d="M 11999,18001 L 8236,18565"
-     id="path557"
-     style="fill:none;stroke:#000080;visibility:visible" />
-  <g
-     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,3461.4027,-1449.3012)"
-     id="g573"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <text
-       id="text575">
-      <tspan
-         x="8743 8972 9132 9310 9573 9801 10013 10242 10419 10597 10775 10953 11114"
-         y="16023"
-         id="tspan577">CsumRSRequest</tspan>
-    </text>
-  </g>
-  <text
-     id="text591"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378"
-       y="16806"
-       id="tspan593">receive_DataRequest()</tspan>
-  </text>
-  <text
-     id="text607"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399"
-       y="17606"
-       id="tspan609">drbd_endio_read_sec()</tspan>
-  </text>
-  <text
-     id="text623"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13825 13986 14164 14426 14604 14710 14871 15049 15154 15332 15510 15616"
-       y="18007"
-       id="tspan625">w_e_end_csum_rs_req()</tspan>
-  </text>
-  <text
-     id="text639"
-     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="5735 5913 6091 6180 6357 6446 6607 6696 6874 7085 7246 7424 7585 7691"
-       y="18507"
-       id="tspan641">got_IsInSync()</tspan>
-  </text>
-  <text
-     id="text655"
-     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="7999 8304 8541 8778 8990 9201 9413 9650 10001 10120 10357 10594 10806 11043 11280 11398 11703 11940 12152 12364 12601 12812 12931 13049 13261 13498 13710 13947 14065 14159 14396 14514 14726 14937 15175"
-       y="13877"
-       id="tspan657">Checksum based Resync, case in sync</tspan>
-  </text>
-  <path
-     d="M 12000,24601 L 11900,24301 L 12100,24301 L 12000,24601 z"
-     id="path667"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 12000,23801 L 12000,24361"
-     id="path671"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <path
-     d="M 8000,26401 L 7900,26101 L 8100,26101 L 8000,26401 z"
-     id="path683"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 8000,25601 L 8000,26161"
-     id="path687"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <path
-     d="M 12000,23801 L 11686,23840 L 11725,23644 L 12000,23801 z"
-     id="path699"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 8000,23001 L 11765,23754"
-     id="path703"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <g
-     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-3543.8452,1630.5143)"
-     id="g719"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <text
-       id="text721">
-      <tspan
-         x="9464 9710 9921 10150 10328 10505 10577"
-         y="25236"
-         id="tspan723">OVReply</tspan>
-    </text>
-  </g>
-  <path
-     d="M 8000,25601 L 8282,25458 L 8312,25655 L 8000,25601 z"
-     id="path733"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 12000,25001 L 8237,25565"
-     id="path737"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <g
-     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,4918.2801,-1381.2128)"
-     id="g753"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <text
-       id="text755">
-      <tspan
-         x="9142 9388 9599 9828 10006 10183 10361 10539 10700"
-         y="23106"
-         id="tspan757">OVRequest</tspan>
-    </text>
-  </g>
-  <text
-     id="text771"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13656 13868 14097 14274 14452 14630 14808 14969 15058 15163"
-       y="23806"
-       id="tspan773">receive_OVRequest()</tspan>
-  </text>
-  <text
-     id="text787"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14084 14262 14439 14617 14795 14956 15134 15295 15400"
-       y="24606"
-       id="tspan789">drbd_endio_read_sec()</tspan>
-  </text>
-  <text
-     id="text803"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12192 12421 12598 12776 12954 13132 13310 13487 13665 13843 14004 14182 14288 14465 14643 14749"
-       y="25007"
-       id="tspan805">w_e_end_ov_req()</tspan>
-  </text>
-  <text
-     id="text819"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="5101 5207 5385 5546 5723 5795 5956 6134 6312 6557 6769 6998 7175 7353 7425 7586 7692"
-       y="25507"
-       id="tspan821">receive_OVReply()</tspan>
-  </text>
-  <text
-     id="text835"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692"
-       y="26407"
-       id="tspan837">drbd_endio_read_sec()</tspan>
-  </text>
-  <text
-     id="text851"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4902 5131 5308 5486 5664 5842 6020 6197 6375 6553 6714 6892 6998 7175 7353 7425 7586 7692"
-       y="26907"
-       id="tspan853">w_e_end_ov_reply()</tspan>
-  </text>
-  <path
-     d="M 12000,27601 L 11686,27640 L 11725,27444 L 12000,27601 z"
-     id="path863"
-     style="fill:#000080;visibility:visible" />
-  <path
-     d="M 8000,26801 L 11765,27554"
-     id="path867"
-     style="fill:none;stroke:#000080;visibility:visible" />
-  <g
-     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,5704.1907,-1328.312)"
-     id="g883"
-     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
-    <text
-       id="text885">
-      <tspan
-         x="9279 9525 9736 9965 10143 10303 10481 10553"
-         y="26935"
-         id="tspan887">OVResult</tspan>
-    </text>
-  </g>
-  <text
-     id="text901"
-     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12200 12378 12556 12645 12822 13068 13280 13508 13686 13847 14025 14097 14185 14291"
-       y="27559"
-       id="tspan903">got_OVResult()</tspan>
-  </text>
-  <text
-     id="text917"
-     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="8000 8330 8567 8660 8754 8991 9228 9346 9558 9795 9935 10028 10146"
-       y="21877"
-       id="tspan919">Online verify</tspan>
-  </text>
-  <text
-     id="text933"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4641 4870 5047 5310 5488 5649 5826 6004 6182 6343 6521 6626 6804 6982 7160 7338 7499 7587 7693"
-       y="23005"
-       id="tspan935">w_make_ov_request()</tspan>
-  </text>
-  <path
-     d="M 8000,6500 L 7900,6200 L 8100,6200 L 8000,6500 z"
-     id="path945"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 8000,5700 L 8000,6260"
-     id="path949"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <path
-     d="M 3900,5500 L 3700,5500 L 3700,11000 L 3900,11000"
-     id="path961"
-     style="fill:none;stroke:#000000;visibility:visible" />
-  <path
-     d="M 3900,14500 L 3700,14500 L 3700,18600 L 3900,18600"
-     id="path973"
-     style="fill:none;stroke:#000000;visibility:visible" />
-  <path
-     d="M 3900,22800 L 3700,22800 L 3700,26900 L 3900,26900"
-     id="path985"
-     style="fill:none;stroke:#000000;visibility:visible" />
-  <text
-     id="text1001"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692"
-       y="6506"
-       id="tspan1003">drbd_endio_read_sec()</tspan>
-  </text>
-  <text
-     id="text1017"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692"
-       y="14708"
-       id="tspan1019">w_make_resync_request()</tspan>
-  </text>
-  <text
-     id="text1033"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="5190 5419 5596 5774 5952 6113 6291 6468 6646 6824 6985 7146 7324 7586 7692"
-       y="16006"
-       id="tspan1035">w_e_send_csum()</tspan>
-  </text>
-  <path
-     d="M 8000,15501 L 7900,15201 L 8100,15201 L 8000,15501 z"
-     id="path1045"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 8000,14701 L 8000,15261"
-     id="path1049"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <text
-     id="text1065"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692"
-       y="15507"
-       id="tspan1067">drbd_endio_read_sec()</tspan>
-  </text>
-  <path
-     d="M 16100,9000 L 16300,9000 L 16300,7500 L 16100,7500"
-     id="path1077"
-     style="fill:none;stroke:#000000;visibility:visible" />
-  <path
-     d="M 16100,18000 L 16300,18000 L 16300,16500 L 16100,16500"
-     id="path1089"
-     style="fill:none;stroke:#000000;visibility:visible" />
-  <path
-     d="M 16100,25000 L 16300,25000 L 16300,23500 L 16100,23500"
-     id="path1101"
-     style="fill:none;stroke:#000000;visibility:visible" />
-  <text
-     id="text1117"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="2026 2132 2293 2471 2648 2826 3004 3076 3254 3431 3503 3681 3787"
-       y="5402"
-       id="tspan1119">rs_begin_io()</tspan>
-  </text>
-  <text
-     id="text1133"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="2027 2133 2294 2472 2649 2827 3005 3077 3255 3432 3504 3682 3788"
-       y="14402"
-       id="tspan1135">rs_begin_io()</tspan>
-  </text>
-  <text
-     id="text1149"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="2026 2132 2293 2471 2648 2826 3004 3076 3254 3431 3503 3681 3787"
-       y="22602"
-       id="tspan1151">rs_begin_io()</tspan>
-  </text>
-  <text
-     id="text1165"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="1426 1532 1693 1871 2031 2209 2472 2649 2721 2899 2988 3166 3344 3416 3593 3699"
-       y="11302"
-       id="tspan1167">rs_complete_io()</tspan>
-  </text>
-  <text
-     id="text1181"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="1526 1632 1793 1971 2131 2309 2572 2749 2821 2999 3088 3266 3444 3516 3693 3799"
-       y="18931"
-       id="tspan1183">rs_complete_io()</tspan>
-  </text>
-  <text
-     id="text1197"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="1526 1632 1793 1971 2131 2309 2572 2749 2821 2999 3088 3266 3444 3516 3693 3799"
-       y="27231"
-       id="tspan1199">rs_complete_io()</tspan>
-  </text>
-  <text
-     id="text1213"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="16126 16232 16393 16571 16748 16926 17104 17176 17354 17531 17603 17781 17887"
-       y="7402"
-       id="tspan1215">rs_begin_io()</tspan>
-  </text>
-  <text
-     id="text1229"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="16127 16233 16394 16572 16749 16927 17105 17177 17355 17532 17604 17782 17888"
-       y="16331"
-       id="tspan1231">rs_begin_io()</tspan>
-  </text>
-  <text
-     id="text1245"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="16127 16233 16394 16572 16749 16927 17105 17177 17355 17532 17604 17782 17888"
-       y="23302"
-       id="tspan1247">rs_begin_io()</tspan>
-  </text>
-  <text
-     id="text1261"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388"
-       y="9302"
-       id="tspan1263">rs_complete_io()</tspan>
-  </text>
-  <text
-     id="text1277"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388"
-       y="18331"
-       id="tspan1279">rs_complete_io()</tspan>
-  </text>
-  <text
-     id="text1293"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="16126 16232 16393 16571 16731 16909 17172 17349 17421 17599 17688 17866 18044 18116 18293 18399"
-       y="25302"
-       id="tspan1295">rs_complete_io()</tspan>
-  </text>
-</svg>
diff --git a/Documentation/blockdev/drbd/DRBD-data-packets.svg b/Documentation/blockdev/drbd/DRBD-data-packets.svg
deleted file mode 100644
index 48a1e2165fec..000000000000
--- a/Documentation/blockdev/drbd/DRBD-data-packets.svg
+++ /dev/null
@@ -1,459 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Created with Inkscape (http://www.inkscape.org/) -->
-<svg
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   version="1.0"
-   width="210mm"
-   height="297mm"
-   viewBox="0 0 21000 29700"
-   id="svg2"
-   style="fill-rule:evenodd">
-  <defs
-     id="defs4" />
-  <g
-     id="Default"
-     style="visibility:visible">
-    <desc
-       id="desc176">Master slide</desc>
-  </g>
-  <path
-     d="M 11999,19601 L 11899,19301 L 12099,19301 L 11999,19601 z"
-     id="path189"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 11999,18801 L 11999,19361"
-     id="path193"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <path
-     d="M 7999,21401 L 7899,21101 L 8099,21101 L 7999,21401 z"
-     id="path205"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 7999,20601 L 7999,21161"
-     id="path209"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <path
-     d="M 11999,18801 L 11685,18840 L 11724,18644 L 11999,18801 z"
-     id="path221"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 7999,18001 L 11764,18754"
-     id="path225"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <text
-     x="-3023.845"
-     y="1106.8124"
-     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)"
-     id="text243"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="6115.1553 6344.1553 6555.1553 6784.1553 6962.1553 7051.1553 7228.1553 7457.1553 7635.1553 7813.1553 7885.1553"
-       y="21390.812"
-       id="tspan245">RSDataReply</tspan>
-  </text>
-  <path
-     d="M 7999,20601 L 8281,20458 L 8311,20655 L 7999,20601 z"
-     id="path255"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 11999,20001 L 8236,20565"
-     id="path259"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <text
-     x="3502.5356"
-     y="-2184.6621"
-     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
-     id="text277"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12321.536 12550.536 12761.536 12990.536 13168.536 13257.536 13434.536 13663.536 13841.536 14019.536 14196.536 14374.536 14535.536"
-       y="15854.338"
-       id="tspan279">RSDataRequest</tspan>
-  </text>
-  <text
-     id="text293"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692"
-       y="17807"
-       id="tspan295">w_make_resync_request()</tspan>
-  </text>
-  <text
-     id="text309"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378"
-       y="18806"
-       id="tspan311">receive_DataRequest()</tspan>
-  </text>
-  <text
-     id="text325"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399"
-       y="19606"
-       id="tspan327">drbd_endio_read_sec()</tspan>
-  </text>
-  <text
-     id="text341"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13770 13931 14109 14287 14375 14553 14731 14837 15015 15192 15298"
-       y="20007"
-       id="tspan343">w_e_end_rsdata_req()</tspan>
-  </text>
-  <text
-     id="text357"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4444 4550 4728 4889 5066 5138 5299 5477 5655 5883 6095 6324 6501 6590 6768 6997 7175 7352 7424 7585 7691"
-       y="20507"
-       id="tspan359">receive_RSDataReply()</tspan>
-  </text>
-  <text
-     id="text373"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4457 4635 4741 4918 5096 5274 5452 5630 5807 5879 6057 6235 6464 6569 6641 6730 6908 7086 7247 7425 7585 7691"
-       y="21407"
-       id="tspan375">drbd_endio_write_sec()</tspan>
-  </text>
-  <text
-     id="text389"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4647 4825 5003 5180 5358 5536 5714 5820 5997 6158 6319 6497 6658 6836 7013 7085 7263 7424 7585 7691"
-       y="21907"
-       id="tspan391">e_end_resync_block()</tspan>
-  </text>
-  <path
-     d="M 11999,22601 L 11685,22640 L 11724,22444 L 11999,22601 z"
-     id="path401"
-     style="fill:#000080;visibility:visible" />
-  <path
-     d="M 7999,21801 L 11764,22554"
-     id="path405"
-     style="fill:none;stroke:#000080;visibility:visible" />
-  <text
-     x="4290.3008"
-     y="-2369.6162"
-     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
-     id="text423"
-     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="13610.301 13911.301 14016.301 14088.301 14177.301 14355.301 14567.301 14728.301"
-       y="19573.385"
-       id="tspan425">WriteAck</tspan>
-  </text>
-  <text
-     id="text439"
-     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12199 12377 12555 12644 12821 13033 13105 13283 13444 13604 13816 13977 14138 14244"
-       y="22559"
-       id="tspan441">got_BlockAck()</tspan>
-  </text>
-  <text
-     id="text455"
-     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="7999 8304 8541 8753 8964 9201 9413 9531 9769 9862 10099 10310 10522 10734 10852 10971 11208 11348 11585 11822"
-       y="16877"
-       id="tspan457">Resync blocks, 4-32K</tspan>
-  </text>
-  <path
-     d="M 12000,7601 L 11900,7301 L 12100,7301 L 12000,7601 z"
-     id="path467"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 12000,6801 L 12000,7361"
-     id="path471"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <path
-     d="M 12000,6801 L 11686,6840 L 11725,6644 L 12000,6801 z"
-     id="path483"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 8000,6001 L 11765,6754"
-     id="path487"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <text
-     x="-1288.1796"
-     y="1279.7666"
-     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)"
-     id="text505"
-     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="8174.8208 8475.8203 8580.8203 8652.8203 8741.8203 8919.8203 9131.8203 9292.8203"
-       y="9516.7666"
-       id="tspan507">WriteAck</tspan>
-  </text>
-  <path
-     d="M 8000,8601 L 8282,8458 L 8312,8655 L 8000,8601 z"
-     id="path517"
-     style="fill:#000080;visibility:visible" />
-  <path
-     d="M 12000,8001 L 8237,8565"
-     id="path521"
-     style="fill:none;stroke:#000080;visibility:visible" />
-  <text
-     x="1065.6655"
-     y="-2097.7664"
-     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
-     id="text539"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="10682.666 10911.666 11088.666 11177.666"
-       y="4107.2339"
-       id="tspan541">Data</tspan>
-  </text>
-  <text
-     id="text555"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4746 4924 5030 5207 5385 5563 5826 6003 6164 6342 6520 6626 6803 6981 7159 7337 7498 7587 7692"
-       y="5505"
-       id="tspan557">drbd_make_request()</tspan>
-  </text>
-  <text
-     id="text571"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13639 13817 13906 14084 14190"
-       y="6806"
-       id="tspan573">receive_Data()</tspan>
-  </text>
-  <text
-     id="text587"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14207 14312 14384 14473 14651 14829 14990 15168 15328 15434"
-       y="7606"
-       id="tspan589">drbd_endio_write_sec()</tspan>
-  </text>
-  <text
-     id="text603"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12192 12370 12548 12725 12903 13081 13259 13437 13509 13686 13847 14008 14114"
-       y="8007"
-       id="tspan605">e_end_block()</tspan>
-  </text>
-  <text
-     id="text619"
-     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="5647 5825 6003 6092 6269 6481 6553 6731 6892 7052 7264 7425 7586 7692"
-       y="8606"
-       id="tspan621">got_BlockAck()</tspan>
-  </text>
-  <text
-     id="text635"
-     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="8000 8305 8542 8779 9016 9109 9346 9486 9604 9956 10049 10189 10328 10565 10705 10942 11179 11298 11603 11742 11835 11954 12191 12310 12428 12665 12902 13139 13279 13516 13753"
-       y="4877"
-       id="tspan637">Regular mirrored write, 512-32K</tspan>
-  </text>
-  <text
-     id="text651"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="5381 5610 5787 5948 6126 6304 6482 6659 6837 7015 7087 7265 7426 7587 7692"
-       y="6003"
-       id="tspan653">w_send_dblock()</tspan>
-  </text>
-  <path
-     d="M 8000,6800 L 7900,6500 L 8100,6500 L 8000,6800 z"
-     id="path663"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 8000,6000 L 8000,6560"
-     id="path667"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <text
-     id="text683"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4602 4780 4886 5063 5241 5419 5597 5775 5952 6024 6202 6380 6609 6714 6786 6875 7053 7231 7409 7515 7587 7692"
-       y="6905"
-       id="tspan685">drbd_endio_write_pri()</tspan>
-  </text>
-  <path
-     d="M 12000,13602 L 11900,13302 L 12100,13302 L 12000,13602 z"
-     id="path695"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 12000,12802 L 12000,13362"
-     id="path699"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <path
-     d="M 12000,12802 L 11686,12841 L 11725,12645 L 12000,12802 z"
-     id="path711"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 8000,12002 L 11765,12755"
-     id="path715"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <text
-     x="-2155.5266"
-     y="1201.5964"
-     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)"
-     id="text733"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="7202.4736 7431.4736 7608.4736 7697.4736 7875.4736 8104.4736 8282.4736 8459.4736 8531.4736"
-       y="15454.597"
-       id="tspan735">DataReply</tspan>
-  </text>
-  <path
-     d="M 8000,14602 L 8282,14459 L 8312,14656 L 8000,14602 z"
-     id="path745"
-     style="fill:#008000;visibility:visible" />
-  <path
-     d="M 12000,14002 L 8237,14566"
-     id="path749"
-     style="fill:none;stroke:#008000;visibility:visible" />
-  <text
-     x="2280.3804"
-     y="-2103.2141"
-     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
-     id="text767"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="11316.381 11545.381 11722.381 11811.381 11989.381 12218.381 12396.381 12573.381 12751.381 12929.381 13090.381"
-       y="9981.7861"
-       id="tspan769">DataRequest</tspan>
-  </text>
-  <text
-     id="text783"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="4746 4924 5030 5207 5385 5563 5826 6003 6164 6342 6520 6626 6803 6981 7159 7337 7498 7587 7692"
-       y="11506"
-       id="tspan785">drbd_make_request()</tspan>
-  </text>
-  <text
-     id="text799"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13639 13817 13906 14084 14312 14490 14668 14846 15024 15185 15273 15379"
-       y="12807"
-       id="tspan801">receive_DataRequest()</tspan>
-  </text>
-  <text
-     id="text815"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14084 14262 14439 14617 14795 14956 15134 15295 15400"
-       y="13607"
-       id="tspan817">drbd_endio_read_sec()</tspan>
-  </text>
-  <text
-     id="text831"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="12192 12421 12598 12776 12954 13132 13310 13487 13665 13843 14021 14110 14288 14465 14571 14749 14927 15033"
-       y="14008"
-       id="tspan833">w_e_end_data_req()</tspan>
-  </text>
-  <g
-     id="g835"
-     style="visibility:visible">
-    <desc
-       id="desc837">Drawing</desc>
-    <text
-       id="text847"
-       style="font-size:318px;font-weight:400;fill:#008000;font-family:Helvetica embedded">
-      <tspan
-         x="4885 4991 5169 5330 5507 5579 5740 5918 6096 6324 6502 6591 6769 6997 7175 7353 7425 7586 7692"
-         y="14607"
-         id="tspan849">receive_DataReply()</tspan>
-    </text>
-  </g>
-  <text
-     id="text863"
-     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="8000 8305 8398 8610 8821 8914 9151 9363 9575 9693 9833 10070 10307 10544 10663 10781 11018 11255 11493 11632 11869 12106"
-       y="10878"
-       id="tspan865">Diskless read, 512-32K</tspan>
-  </text>
-  <text
-     id="text879"
-     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="5029 5258 5435 5596 5774 5952 6130 6307 6413 6591 6769 6947 7125 7230 7408 7586 7692"
-       y="12004"
-       id="tspan881">w_send_read_req()</tspan>
-  </text>
-  <text
-     id="text895"
-     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="6961 7266 7571 7854 8159 8278 8515 8633 8870 9107 9226 9463 9581 9700 9793 10030"
-       y="2806"
-       id="tspan897">DRBD 8 data flow</tspan>
-  </text>
-  <path
-     d="M 3900,5300 L 3700,5300 L 3700,7000 L 3900,7000"
-     id="path907"
-     style="fill:none;stroke:#000000;visibility:visible" />
-  <path
-     d="M 3900,17600 L 3700,17600 L 3700,22000 L 3900,22000"
-     id="path919"
-     style="fill:none;stroke:#000000;visibility:visible" />
-  <path
-     d="M 16100,20000 L 16300,20000 L 16300,18500 L 16100,18500"
-     id="path931"
-     style="fill:none;stroke:#000000;visibility:visible" />
-  <text
-     id="text947"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="2126 2304 2376 2554 2731 2909 3087 3159 3337 3515 3587 3764 3870"
-       y="5202"
-       id="tspan949">al_begin_io()</tspan>
-  </text>
-  <text
-     id="text963"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="1632 1810 1882 2060 2220 2398 2661 2839 2910 3088 3177 3355 3533 3605 3783 3888"
-       y="7331"
-       id="tspan965">al_complete_io()</tspan>
-  </text>
-  <text
-     id="text979"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="2126 2232 2393 2571 2748 2926 3104 3176 3354 3531 3603 3781 3887"
-       y="17431"
-       id="tspan981">rs_begin_io()</tspan>
-  </text>
-  <text
-     id="text995"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="1626 1732 1893 2071 2231 2409 2672 2849 2921 3099 3188 3366 3544 3616 3793 3899"
-       y="22331"
-       id="tspan997">rs_complete_io()</tspan>
-  </text>
-  <text
-     id="text1011"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="16027 16133 16294 16472 16649 16827 17005 17077 17255 17432 17504 17682 17788"
-       y="18402"
-       id="tspan1013">rs_begin_io()</tspan>
-  </text>
-  <text
-     id="text1027"
-     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
-    <tspan
-       x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388"
-       y="20331"
-       id="tspan1029">rs_complete_io()</tspan>
-  </text>
-</svg>
diff --git a/Documentation/blockdev/drbd/conn-states-8.dot b/Documentation/blockdev/drbd/conn-states-8.dot
deleted file mode 100644
index 025e8cf5e64a..000000000000
--- a/Documentation/blockdev/drbd/conn-states-8.dot
+++ /dev/null
@@ -1,18 +0,0 @@
-digraph conn_states {
-	StandAllone  -> WFConnection   [ label = "ioctl_set_net()" ]
-	WFConnection -> Unconnected    [ label = "unable to bind()" ]
-	WFConnection -> WFReportParams [ label = "in connect() after accept" ]
-	WFReportParams -> StandAllone  [ label = "checks in receive_param()" ]
-	WFReportParams -> Connected    [ label = "in receive_param()" ]
-	WFReportParams -> WFBitMapS    [ label = "sync_handshake()" ]
-	WFReportParams -> WFBitMapT    [ label = "sync_handshake()" ]
-	WFBitMapS -> SyncSource        [ label = "receive_bitmap()" ]
-	WFBitMapT -> SyncTarget        [ label = "receive_bitmap()" ]
-	SyncSource -> Connected
-	SyncTarget -> Connected
-	SyncSource -> PausedSyncS
-	SyncTarget -> PausedSyncT
-	PausedSyncS -> SyncSource
-	PausedSyncT -> SyncTarget
-	Connected   -> WFConnection    [ label = "* on network error" ]
-}
diff --git a/Documentation/blockdev/drbd/data-structure-v9.rst b/Documentation/blockdev/drbd/data-structure-v9.rst
deleted file mode 100644
index 66036b901644..000000000000
--- a/Documentation/blockdev/drbd/data-structure-v9.rst
+++ /dev/null
@@ -1,42 +0,0 @@
-================================
-kernel data structure for DRBD-9
-================================
-
-This describes the in kernel data structure for DRBD-9. Starting with
-Linux v3.14 we are reorganizing DRBD to use this data structure.
-
-Basic Data Structure
-====================
-
-A node has a number of DRBD resources.  Each such resource has a number of
-devices (aka volumes) and connections to other nodes ("peer nodes"). Each DRBD
-device is represented by a block device locally.
-
-The DRBD objects are interconnected to form a matrix as depicted below; a
-drbd_peer_device object sits at each intersection between a drbd_device and a
-drbd_connection::
-
-  /--------------+---------------+.....+---------------\
-  |   resource   |    device     |     |    device     |
-  +--------------+---------------+.....+---------------+
-  |  connection  |  peer_device  |     |  peer_device  |
-  +--------------+---------------+.....+---------------+
-  :              :               :     :               :
-  :              :               :     :               :
-  +--------------+---------------+.....+---------------+
-  |  connection  |  peer_device  |     |  peer_device  |
-  \--------------+---------------+.....+---------------/
-
-In this table, horizontally, devices can be accessed from resources by their
-volume number.  Likewise, peer_devices can be accessed from connections by
-their volume number.  Objects in the vertical direction are connected by double
-linked lists.  There are back pointers from peer_devices to their connections a
-devices, and from connections and devices to their resource.
-
-All resources are in the drbd_resources double-linked list.  In addition, all
-devices can be accessed by their minor device number via the drbd_devices idr.
-
-The drbd_resource, drbd_connection, and drbd_device objects are reference
-counted.  The peer_device objects only serve to establish the links between
-devices and connections; their lifetime is determined by the lifetime of the
-device and connection which they reference.
diff --git a/Documentation/blockdev/drbd/disk-states-8.dot b/Documentation/blockdev/drbd/disk-states-8.dot
deleted file mode 100644
index d06cfb46fb98..000000000000
--- a/Documentation/blockdev/drbd/disk-states-8.dot
+++ /dev/null
@@ -1,16 +0,0 @@
-digraph disk_states {
-	Diskless -> Inconsistent       [ label = "ioctl_set_disk()" ]
-	Diskless -> Consistent         [ label = "ioctl_set_disk()" ]
-	Diskless -> Outdated           [ label = "ioctl_set_disk()" ]
-	Consistent -> Outdated         [ label = "receive_param()" ]
-	Consistent -> UpToDate         [ label = "receive_param()" ]
-	Consistent -> Inconsistent     [ label = "start resync" ]
-	Outdated   -> Inconsistent     [ label = "start resync" ]
-	UpToDate   -> Inconsistent     [ label = "ioctl_replicate" ]
-	Inconsistent -> UpToDate       [ label = "resync completed" ]
-	Consistent -> Failed           [ label = "io completion error" ]
-	Outdated   -> Failed           [ label = "io completion error" ]
-	UpToDate   -> Failed           [ label = "io completion error" ]
-	Inconsistent -> Failed         [ label = "io completion error" ]
-	Failed -> Diskless             [ label = "sending notify to peer" ]
-}
diff --git a/Documentation/blockdev/drbd/drbd-connection-state-overview.dot b/Documentation/blockdev/drbd/drbd-connection-state-overview.dot
deleted file mode 100644
index 6d9cf0a7b11d..000000000000
--- a/Documentation/blockdev/drbd/drbd-connection-state-overview.dot
+++ /dev/null
@@ -1,85 +0,0 @@
-// vim: set sw=2 sts=2 :
-digraph {
-  rankdir=BT
-  bgcolor=white
-
-  node [shape=plaintext]
-  node [fontcolor=black]
-
-  StandAlone     [ style=filled,fillcolor=gray,label=StandAlone ]
-
-  node [fontcolor=lightgray]
-
-  Unconnected    [ label=Unconnected ]
-
-  CommTrouble [ shape=record,
-    label="{communication loss|{Timeout|BrokenPipe|NetworkFailure}}" ]
-
-  node [fontcolor=gray]
-
-  subgraph cluster_try_connect {
-    label="try to connect, handshake"
-    rank=max
-    WFConnection   [ label=WFConnection ]
-    WFReportParams [ label=WFReportParams ]
-  }
-
-  TearDown       [ label=TearDown ]
-
-  Connected      [ label=Connected,style=filled,fillcolor=green,fontcolor=black ]
-
-  node [fontcolor=lightblue]
-
-  StartingSyncS  [ label=StartingSyncS ]
-  StartingSyncT  [ label=StartingSyncT ]
-
-  subgraph cluster_bitmap_exchange {
-    node [fontcolor=red]
-    fontcolor=red
-    label="new application (WRITE?) requests blocked\lwhile bitmap is exchanged"
-
-    WFBitMapT      [ label=WFBitMapT ]
-    WFSyncUUID     [ label=WFSyncUUID ]
-    WFBitMapS      [ label=WFBitMapS ]
-  }
-
-  node [fontcolor=blue]
-
-  cluster_resync [ shape=record,label="{<any>resynchronisation process running\l'concurrent' application requests allowed|{{<T>PausedSyncT\nSyncTarget}|{<S>PausedSyncS\nSyncSource}}}" ]
-
-  node [shape=box,fontcolor=black]
-
-  // drbdadm [label="drbdadm connect"]
-  // handshake [label="drbd_connect()\ndrbd_do_handshake\ndrbd_sync_handshake() etc."]
-  // comm_error [label="communication trouble"]
-
-  //
-  // edges
-  // --------------------------------------
-
-  StandAlone -> Unconnected [ label="drbdadm connect" ]
-  Unconnected -> StandAlone  [ label="drbdadm disconnect\lor serious communication trouble" ]
-  Unconnected -> WFConnection [ label="receiver thread is started" ]
-  WFConnection -> WFReportParams [ headlabel="accept()\land/or                        \lconnect()\l" ]
-
-  WFReportParams -> StandAlone [ label="during handshake\lpeers do not agree\labout something essential" ]
-  WFReportParams -> Connected [ label="data identical\lno sync needed",color=green,fontcolor=green ]
-
-    WFReportParams -> WFBitMapS
-    WFReportParams -> WFBitMapT
-    WFBitMapT -> WFSyncUUID [minlen=0.1,constraint=false]
-
-      WFBitMapS -> cluster_resync:S
-      WFSyncUUID -> cluster_resync:T
-
-  edge [color=green]
-  cluster_resync:any -> Connected [ label="resnyc done",fontcolor=green ]
-
-  edge [color=red]
-  WFReportParams -> CommTrouble
-  Connected -> CommTrouble
-  cluster_resync:any -> CommTrouble
-  edge [color=black]
-  CommTrouble -> Unconnected [label="receiver thread is stopped" ]
-
-}
diff --git a/Documentation/blockdev/drbd/figures.rst b/Documentation/blockdev/drbd/figures.rst
deleted file mode 100644
index 3e3fd4b8a478..000000000000
--- a/Documentation/blockdev/drbd/figures.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-.. The here included files are intended to help understand the implementation
-
-Data flows that Relate some functions, and write packets
-========================================================
-
-.. kernel-figure:: DRBD-8.3-data-packets.svg
-    :alt:   DRBD-8.3-data-packets.svg
-    :align: center
-
-.. kernel-figure:: DRBD-data-packets.svg
-    :alt:   DRBD-data-packets.svg
-    :align: center
-
-
-Sub graphs of DRBD's state transitions
-======================================
-
-.. kernel-figure:: conn-states-8.dot
-    :alt:   conn-states-8.dot
-    :align: center
-
-.. kernel-figure:: disk-states-8.dot
-    :alt:   disk-states-8.dot
-    :align: center
-
-.. kernel-figure:: node-states-8.dot
-    :alt:   node-states-8.dot
-    :align: center
diff --git a/Documentation/blockdev/drbd/index.rst b/Documentation/blockdev/drbd/index.rst
deleted file mode 100644
index 68ecd5c113e9..000000000000
--- a/Documentation/blockdev/drbd/index.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-==========================================
-Distributed Replicated Block Device - DRBD
-==========================================
-
-Description
-===========
-
-  DRBD is a shared-nothing, synchronously replicated block device. It
-  is designed to serve as a building block for high availability
-  clusters and in this context, is a "drop-in" replacement for shared
-  storage. Simplistically, you could see it as a network RAID 1.
-
-  Please visit http://www.drbd.org to find out more.
-
-.. toctree::
-   :maxdepth: 1
-
-   data-structure-v9
-   figures
diff --git a/Documentation/blockdev/drbd/node-states-8.dot b/Documentation/blockdev/drbd/node-states-8.dot
deleted file mode 100644
index 4a2b00c23547..000000000000
--- a/Documentation/blockdev/drbd/node-states-8.dot
+++ /dev/null
@@ -1,14 +0,0 @@
-digraph node_states {
-	Secondary -> Primary           [ label = "ioctl_set_state()" ]
-	Primary   -> Secondary 	       [ label = "ioctl_set_state()" ]
-}
-
-digraph peer_states {
-	Secondary -> Primary           [ label = "recv state packet" ]
-	Primary   -> Secondary 	       [ label = "recv state packet" ]
-	Primary   -> Unknown 	       [ label = "connection lost" ]
-	Secondary  -> Unknown  	       [ label = "connection lost" ]
-	Unknown   -> Primary           [ label = "connected" ]
-	Unknown   -> Secondary         [ label = "connected" ]
-}
-
diff --git a/Documentation/blockdev/floppy.rst b/Documentation/blockdev/floppy.rst
deleted file mode 100644
index 4a8f31cf4139..000000000000
--- a/Documentation/blockdev/floppy.rst
+++ /dev/null
@@ -1,255 +0,0 @@
-=============
-Floppy Driver
-=============
-
-FAQ list:
-=========
-
-A FAQ list may be found in the fdutils package (see below), and also
-at <http://fdutils.linux.lu/faq.html>.
-
-
-LILO configuration options (Thinkpad users, read this)
-======================================================
-
-The floppy driver is configured using the 'floppy=' option in
-lilo. This option can be typed at the boot prompt, or entered in the
-lilo configuration file.
-
-Example: If your kernel is called linux-2.6.9, type the following line
-at the lilo boot prompt (if you have a thinkpad)::
-
- linux-2.6.9 floppy=thinkpad
-
-You may also enter the following line in /etc/lilo.conf, in the description
-of linux-2.6.9::
-
- append = "floppy=thinkpad"
-
-Several floppy related options may be given, example::
-
- linux-2.6.9 floppy=daring floppy=two_fdc
- append = "floppy=daring floppy=two_fdc"
-
-If you give options both in the lilo config file and on the boot
-prompt, the option strings of both places are concatenated, the boot
-prompt options coming last. That's why there are also options to
-restore the default behavior.
-
-
-Module configuration options
-============================
-
-If you use the floppy driver as a module, use the following syntax::
-
-	modprobe floppy floppy="<options>"
-
-Example::
-
-	modprobe floppy floppy="omnibook messages"
-
-If you need certain options enabled every time you load the floppy driver,
-you can put::
-
-	options floppy floppy="omnibook messages"
-
-in a configuration file in /etc/modprobe.d/.
-
-
-The floppy driver related options are:
-
- floppy=asus_pci
-	Sets the bit mask to allow only units 0 and 1. (default)
-
- floppy=daring
-	Tells the floppy driver that you have a well behaved floppy controller.
-	This allows more efficient and smoother operation, but may fail on
-	certain controllers. This may speed up certain operations.
-
- floppy=0,daring
-	Tells the floppy driver that your floppy controller should be used
-	with caution.
-
- floppy=one_fdc
-	Tells the floppy driver that you have only one floppy controller.
-	(default)
-
- floppy=two_fdc / floppy=<address>,two_fdc
-	Tells the floppy driver that you have two floppy controllers.
-	The second floppy controller is assumed to be at <address>.
-	This option is not needed if the second controller is at address
-	0x370, and if you use the 'cmos' option.
-
- floppy=thinkpad
-	Tells the floppy driver that you have a Thinkpad. Thinkpads use an
-	inverted convention for the disk change line.
-
- floppy=0,thinkpad
-	Tells the floppy driver that you don't have a Thinkpad.
-
- floppy=omnibook / floppy=nodma
-	Tells the floppy driver not to use Dma for data transfers.
-	This is needed on HP Omnibooks, which don't have a workable
-	DMA channel for the floppy driver. This option is also useful
-	if you frequently get "Unable to allocate DMA memory" messages.
-	Indeed, dma memory needs to be continuous in physical memory,
-	and is thus harder to find, whereas non-dma buffers may be
-	allocated in virtual memory. However, I advise against this if
-	you have an FDC without a FIFO (8272A or 82072). 82072A and
-	later are OK. You also need at least a 486 to use nodma.
-	If you use nodma mode, I suggest you also set the FIFO
-	threshold to 10 or lower, in order to limit the number of data
-	transfer interrupts.
-
-	If you have a FIFO-able FDC, the floppy driver automatically
-	falls back on non DMA mode if no DMA-able memory can be found.
-	If you want to avoid this, explicitly ask for 'yesdma'.
-
- floppy=yesdma
-	Tells the floppy driver that a workable DMA channel is available.
-	(default)
-
- floppy=nofifo
-	Disables the FIFO entirely. This is needed if you get "Bus
-	master arbitration error" messages from your Ethernet card (or
-	from other devices) while accessing the floppy.
-
- floppy=usefifo
-	Enables the FIFO. (default)
-
- floppy=<threshold>,fifo_depth
-	Sets the FIFO threshold. This is mostly relevant in DMA
-	mode. If this is higher, the floppy driver tolerates more
-	interrupt latency, but it triggers more interrupts (i.e. it
-	imposes more load on the rest of the system). If this is
-	lower, the interrupt latency should be lower too (faster
-	processor). The benefit of a lower threshold is less
-	interrupts.
-
-	To tune the fifo threshold, switch on over/underrun messages
-	using 'floppycontrol --messages'. Then access a floppy
-	disk. If you get a huge amount of "Over/Underrun - retrying"
-	messages, then the fifo threshold is too low. Try with a
-	higher value, until you only get an occasional Over/Underrun.
-	It is a good idea to compile the floppy driver as a module
-	when doing this tuning. Indeed, it allows to try different
-	fifo values without rebooting the machine for each test. Note
-	that you need to do 'floppycontrol --messages' every time you
-	re-insert the module.
-
-	Usually, tuning the fifo threshold should not be needed, as
-	the default (0xa) is reasonable.
-
- floppy=<drive>,<type>,cmos
-	Sets the CMOS type of <drive> to <type>. This is mandatory if
-	you have more than two floppy drives (only two can be
-	described in the physical CMOS), or if your BIOS uses
-	non-standard CMOS types. The CMOS types are:
-
-	       ==  ==================================
-		0  Use the value of the physical CMOS
-		1  5 1/4 DD
-		2  5 1/4 HD
-		3  3 1/2 DD
-		4  3 1/2 HD
-		5  3 1/2 ED
-		6  3 1/2 ED
-	       16  unknown or not installed
-	       ==  ==================================
-
-	(Note: there are two valid types for ED drives. This is because 5 was
-	initially chosen to represent floppy *tapes*, and 6 for ED drives.
-	AMI ignored this, and used 5 for ED drives. That's why the floppy
-	driver handles both.)
-
- floppy=unexpected_interrupts
-	Print a warning message when an unexpected interrupt is received.
-	(default)
-
- floppy=no_unexpected_interrupts / floppy=L40SX
-	Don't print a message when an unexpected interrupt is received. This
-	is needed on IBM L40SX laptops in certain video modes. (There seems
-	to be an interaction between video and floppy. The unexpected
-	interrupts affect only performance, and can be safely ignored.)
-
- floppy=broken_dcl
-	Don't use the disk change line, but assume that the disk was
-	changed whenever the device node is reopened. Needed on some
-	boxes where the disk change line is broken or unsupported.
-	This should be regarded as a stopgap measure, indeed it makes
-	floppy operation less efficient due to unneeded cache
-	flushings, and slightly more unreliable. Please verify your
-	cable, connection and jumper settings if you have any DCL
-	problems. However, some older drives, and also some laptops
-	are known not to have a DCL.
-
- floppy=debug
-	Print debugging messages.
-
- floppy=messages
-	Print informational messages for some operations (disk change
-	notifications, warnings about over and underruns, and about
-	autodetection).
-
- floppy=silent_dcl_clear
-	Uses a less noisy way to clear the disk change line (which
-	doesn't involve seeks). Implied by 'daring' option.
-
- floppy=<nr>,irq
-	Sets the floppy IRQ to <nr> instead of 6.
-
- floppy=<nr>,dma
-	Sets the floppy DMA channel to <nr> instead of 2.
-
- floppy=slow
-	Use PS/2 stepping rate::
-
-	   PS/2 floppies have much slower step rates than regular floppies.
-	   It's been recommended that take about 1/4 of the default speed
-	   in some more extreme cases.
-
-
-Supporting utilities and additional documentation:
-==================================================
-
-Additional parameters of the floppy driver can be configured at
-runtime. Utilities which do this can be found in the fdutils package.
-This package also contains a new version of mtools which allows to
-access high capacity disks (up to 1992K on a high density 3 1/2 disk!).
-It also contains additional documentation about the floppy driver.
-
-The latest version can be found at fdutils homepage:
-
- http://fdutils.linux.lu
-
-The fdutils releases can be found at:
-
- http://fdutils.linux.lu/download.html
-
- http://www.tux.org/pub/knaff/fdutils/
-
- ftp://metalab.unc.edu/pub/Linux/utils/disk-management/
-
-Reporting problems about the floppy driver
-==========================================
-
-If you have a question or a bug report about the floppy driver, mail
-me at Alain.Knaff@poboxes.com . If you post to Usenet, preferably use
-comp.os.linux.hardware. As the volume in these groups is rather high,
-be sure to include the word "floppy" (or "FLOPPY") in the subject
-line.  If the reported problem happens when mounting floppy disks, be
-sure to mention also the type of the filesystem in the subject line.
-
-Be sure to read the FAQ before mailing/posting any bug reports!
-
-Alain
-
-Changelog
-=========
-
-10-30-2004 :
-		Cleanup, updating, add reference to module configuration.
-		James Nelson <james4765@gmail.com>
-
-6-3-2000 :
-		Original Document
diff --git a/Documentation/blockdev/index.rst b/Documentation/blockdev/index.rst
deleted file mode 100644
index a9af6ed8b4aa..000000000000
--- a/Documentation/blockdev/index.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-:orphan:
-
-===========================
-The Linux RapidIO Subsystem
-===========================
-
-.. toctree::
-   :maxdepth: 1
-
-   floppy
-   nbd
-   paride
-   ramdisk
-   zram
-
-   drbd/index
diff --git a/Documentation/blockdev/nbd.rst b/Documentation/blockdev/nbd.rst
deleted file mode 100644
index d78dfe559dcf..000000000000
--- a/Documentation/blockdev/nbd.rst
+++ /dev/null
@@ -1,31 +0,0 @@
-==================================
-Network Block Device (TCP version)
-==================================
-
-1) Overview
------------
-
-What is it: With this compiled in the kernel (or as a module), Linux
-can use a remote server as one of its block devices. So every time
-the client computer wants to read, e.g., /dev/nb0, it sends a
-request over TCP to the server, which will reply with the data read.
-This can be used for stations with low disk space (or even diskless)
-to borrow disk space from another computer.
-Unlike NFS, it is possible to put any filesystem on it, etc.
-
-For more information, or to download the nbd-client and nbd-server
-tools, go to http://nbd.sf.net/.
-
-The nbd kernel module need only be installed on the client
-system, as the nbd-server is completely in userspace. In fact,
-the nbd-server has been successfully ported to other operating
-systems, including Windows.
-
-A) NBD parameters
------------------
-
-max_part
-	Number of partitions per device (default: 0).
-
-nbds_max
-	Number of block devices that should be initialized (default: 16).
diff --git a/Documentation/blockdev/paride.rst b/Documentation/blockdev/paride.rst
deleted file mode 100644
index 87b4278bf314..000000000000
--- a/Documentation/blockdev/paride.rst
+++ /dev/null
@@ -1,439 +0,0 @@
-===================================
-Linux and parallel port IDE devices
-===================================
-
-PARIDE v1.03   (c) 1997-8  Grant Guenther <grant@torque.net>
-
-1. Introduction
-===============
-
-Owing to the simplicity and near universality of the parallel port interface
-to personal computers, many external devices such as portable hard-disk,
-CD-ROM, LS-120 and tape drives use the parallel port to connect to their
-host computer.  While some devices (notably scanners) use ad-hoc methods
-to pass commands and data through the parallel port interface, most
-external devices are actually identical to an internal model, but with
-a parallel-port adapter chip added in.  Some of the original parallel port
-adapters were little more than mechanisms for multiplexing a SCSI bus.
-(The Iomega PPA-3 adapter used in the ZIP drives is an example of this
-approach).  Most current designs, however, take a different approach.
-The adapter chip reproduces a small ISA or IDE bus in the external device
-and the communication protocol provides operations for reading and writing
-device registers, as well as data block transfer functions.  Sometimes,
-the device being addressed via the parallel cable is a standard SCSI
-controller like an NCR 5380.  The "ditto" family of external tape
-drives use the ISA replicator to interface a floppy disk controller,
-which is then connected to a floppy-tape mechanism.  The vast majority
-of external parallel port devices, however, are now based on standard
-IDE type devices, which require no intermediate controller.  If one
-were to open up a parallel port CD-ROM drive, for instance, one would
-find a standard ATAPI CD-ROM drive, a power supply, and a single adapter
-that interconnected a standard PC parallel port cable and a standard
-IDE cable.  It is usually possible to exchange the CD-ROM device with
-any other device using the IDE interface.
-
-The document describes the support in Linux for parallel port IDE
-devices.  It does not cover parallel port SCSI devices, "ditto" tape
-drives or scanners.  Many different devices are supported by the
-parallel port IDE subsystem, including:
-
-	- MicroSolutions backpack CD-ROM
-	- MicroSolutions backpack PD/CD
-	- MicroSolutions backpack hard-drives
-	- MicroSolutions backpack 8000t tape drive
-	- SyQuest EZ-135, EZ-230 & SparQ drives
-	- Avatar Shark
-	- Imation Superdisk LS-120
-	- Maxell Superdisk LS-120
-	- FreeCom Power CD
-	- Hewlett-Packard 5GB and 8GB tape drives
-	- Hewlett-Packard 7100 and 7200 CD-RW drives
-
-as well as most of the clone and no-name products on the market.
-
-To support such a wide range of devices, PARIDE, the parallel port IDE
-subsystem, is actually structured in three parts.   There is a base
-paride module which provides a registry and some common methods for
-accessing the parallel ports.  The second component is a set of
-high-level drivers for each of the different types of supported devices:
-
-	===	=============
-	pd	IDE disk
-	pcd	ATAPI CD-ROM
-	pf	ATAPI disk
-	pt	ATAPI tape
-	pg	ATAPI generic
-	===	=============
-
-(Currently, the pg driver is only used with CD-R drives).
-
-The high-level drivers function according to the relevant standards.
-The third component of PARIDE is a set of low-level protocol drivers
-for each of the parallel port IDE adapter chips.  Thanks to the interest
-and encouragement of Linux users from many parts of the world,
-support is available for almost all known adapter protocols:
-
-	====    ====================================== ====
-        aten    ATEN EH-100                            (HK)
-        bpck    Microsolutions backpack                (US)
-        comm    DataStor (old-type) "commuter" adapter (TW)
-        dstr    DataStor EP-2000                       (TW)
-        epat    Shuttle EPAT                           (UK)
-        epia    Shuttle EPIA                           (UK)
-	fit2    FIT TD-2000			       (US)
-	fit3    FIT TD-3000			       (US)
-	friq    Freecom IQ cable                       (DE)
-        frpw    Freecom Power                          (DE)
-        kbic    KingByte KBIC-951A and KBIC-971A       (TW)
-	ktti    KT Technology PHd adapter              (SG)
-        on20    OnSpec 90c20                           (US)
-        on26    OnSpec 90c26                           (US)
-	====    ====================================== ====
-
-
-2. Using the PARIDE subsystem
-=============================
-
-While configuring the Linux kernel, you may choose either to build
-the PARIDE drivers into your kernel, or to build them as modules.
-
-In either case, you will need to select "Parallel port IDE device support"
-as well as at least one of the high-level drivers and at least one
-of the parallel port communication protocols.  If you do not know
-what kind of parallel port adapter is used in your drive, you could
-begin by checking the file names and any text files on your DOS
-installation floppy.  Alternatively, you can look at the markings on
-the adapter chip itself.  That's usually sufficient to identify the
-correct device.
-
-You can actually select all the protocol modules, and allow the PARIDE
-subsystem to try them all for you.
-
-For the "brand-name" products listed above, here are the protocol
-and high-level drivers that you would use:
-
-	================	============	======	========
-	Manufacturer		Model		Driver	Protocol
-	================	============	======	========
-	MicroSolutions		CD-ROM		pcd	bpck
-	MicroSolutions		PD drive	pf	bpck
-	MicroSolutions		hard-drive	pd	bpck
-	MicroSolutions          8000t tape      pt      bpck
-	SyQuest			EZ, SparQ	pd	epat
-	Imation			Superdisk	pf	epat
-	Maxell                  Superdisk       pf      friq
-	Avatar			Shark		pd	epat
-	FreeCom			CD-ROM		pcd	frpw
-	Hewlett-Packard		5GB Tape	pt	epat
-	Hewlett-Packard		7200e (CD)	pcd	epat
-	Hewlett-Packard		7200e (CD-R)	pg	epat
-	================	============	======	========
-
-2.1  Configuring built-in drivers
----------------------------------
-
-We recommend that you get to know how the drivers work and how to
-configure them as loadable modules, before attempting to compile a
-kernel with the drivers built-in.
-
-If you built all of your PARIDE support directly into your kernel,
-and you have just a single parallel port IDE device, your kernel should
-locate it automatically for you.  If you have more than one device,
-you may need to give some command line options to your bootloader
-(eg: LILO), how to do that is beyond the scope of this document.
-
-The high-level drivers accept a number of command line parameters, all
-of which are documented in the source files in linux/drivers/block/paride.
-By default, each driver will automatically try all parallel ports it
-can find, and all protocol types that have been installed, until it finds
-a parallel port IDE adapter.  Once it finds one, the probe stops.  So,
-if you have more than one device, you will need to tell the drivers
-how to identify them.  This requires specifying the port address, the
-protocol identification number and, for some devices, the drive's
-chain ID.  While your system is booting, a number of messages are
-displayed on the console.  Like all such messages, they can be
-reviewed with the 'dmesg' command.  Among those messages will be
-some lines like::
-
-	paride: bpck registered as protocol 0
-	paride: epat registered as protocol 1
-
-The numbers will always be the same until you build a new kernel with
-different protocol selections.  You should note these numbers as you
-will need them to identify the devices.
-
-If you happen to be using a MicroSolutions backpack device, you will
-also need to know the unit ID number for each drive.  This is usually
-the last two digits of the drive's serial number (but read MicroSolutions'
-documentation about this).
-
-As an example, let's assume that you have a MicroSolutions PD/CD drive
-with unit ID number 36 connected to the parallel port at 0x378, a SyQuest
-EZ-135 connected to the chained port on the PD/CD drive and also an
-Imation Superdisk connected to port 0x278.  You could give the following
-options on your boot command::
-
-	pd.drive0=0x378,1 pf.drive0=0x278,1 pf.drive1=0x378,0,36
-
-In the last option, pf.drive1 configures device /dev/pf1, the 0x378
-is the parallel port base address, the 0 is the protocol registration
-number and 36 is the chain ID.
-
-Please note:  while PARIDE will work both with and without the
-PARPORT parallel port sharing system that is included by the
-"Parallel port support" option, PARPORT must be included and enabled
-if you want to use chains of devices on the same parallel port.
-
-2.2  Loading and configuring PARIDE as modules
-----------------------------------------------
-
-It is much faster and simpler to get to understand the PARIDE drivers
-if you use them as loadable kernel modules.
-
-Note 1:
-	using these drivers with the "kerneld" automatic module loading
-	system is not recommended for beginners, and is not documented here.
-
-Note 2:
-	if you build PARPORT support as a loadable module, PARIDE must
-	also be built as loadable modules, and PARPORT must be loaded before
-	the PARIDE modules.
-
-To use PARIDE, you must begin by::
-
-	insmod paride
-
-this loads a base module which provides a registry for the protocols,
-among other tasks.
-
-Then, load as many of the protocol modules as you think you might need.
-As you load each module, it will register the protocols that it supports,
-and print a log message to your kernel log file and your console. For
-example::
-
-	# insmod epat
-	paride: epat registered as protocol 0
-	# insmod kbic
-	paride: k951 registered as protocol 1
-        paride: k971 registered as protocol 2
-
-Finally, you can load high-level drivers for each kind of device that
-you have connected.  By default, each driver will autoprobe for a single
-device, but you can support up to four similar devices by giving their
-individual co-ordinates when you load the driver.
-
-For example, if you had two no-name CD-ROM drives both using the
-KingByte KBIC-951A adapter, one on port 0x378 and the other on 0x3bc
-you could give the following command::
-
-	# insmod pcd drive0=0x378,1 drive1=0x3bc,1
-
-For most adapters, giving a port address and protocol number is sufficient,
-but check the source files in linux/drivers/block/paride for more
-information.  (Hopefully someone will write some man pages one day !).
-
-As another example, here's what happens when PARPORT is installed, and
-a SyQuest EZ-135 is attached to port 0x378::
-
-	# insmod paride
-	paride: version 1.0 installed
-	# insmod epat
-	paride: epat registered as protocol 0
-	# insmod pd
-	pd: pd version 1.0, major 45, cluster 64, nice 0
-	pda: Sharing parport1 at 0x378
-	pda: epat 1.0, Shuttle EPAT chip c3 at 0x378, mode 5 (EPP-32), delay 1
-	pda: SyQuest EZ135A, 262144 blocks [128M], (512/16/32), removable media
-	 pda: pda1
-
-Note that the last line is the output from the generic partition table
-scanner - in this case it reports that it has found a disk with one partition.
-
-2.3  Using a PARIDE device
---------------------------
-
-Once the drivers have been loaded, you can access PARIDE devices in the
-same way as their traditional counterparts.  You will probably need to
-create the device "special files".  Here is a simple script that you can
-cut to a file and execute::
-
-  #!/bin/bash
-  #
-  # mkd -- a script to create the device special files for the PARIDE subsystem
-  #
-  function mkdev {
-    mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1
-  }
-  #
-  function pd {
-    D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) )
-    mkdev pd$D b 45 $[ $1 * 16 ]
-    for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
-    do mkdev pd$D$P b 45 $[ $1 * 16 + $P ]
-    done
-  }
-  #
-  cd /dev
-  #
-  for u in 0 1 2 3 ; do pd $u ; done
-  for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done
-  for u in 0 1 2 3 ; do mkdev pf$u  b 47 $u ; done
-  for u in 0 1 2 3 ; do mkdev pt$u  c 96 $u ; done
-  for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done
-  for u in 0 1 2 3 ; do mkdev pg$u  c 97 $u ; done
-  #
-  # end of mkd
-
-With the device files and drivers in place, you can access PARIDE devices
-like any other Linux device.   For example, to mount a CD-ROM in pcd0, use::
-
-	mount /dev/pcd0 /cdrom
-
-If you have a fresh Avatar Shark cartridge, and the drive is pda, you
-might do something like::
-
-	fdisk /dev/pda		-- make a new partition table with
-				   partition 1 of type 83
-
-	mke2fs /dev/pda1	-- to build the file system
-
-	mkdir /shark		-- make a place to mount the disk
-
-	mount /dev/pda1 /shark
-
-Devices like the Imation superdisk work in the same way, except that
-they do not have a partition table.  For example to make a 120MB
-floppy that you could share with a DOS system::
-
-	mkdosfs /dev/pf0
-	mount /dev/pf0 /mnt
-
-
-2.4  The pf driver
-------------------
-
-The pf driver is intended for use with parallel port ATAPI disk
-devices.  The most common devices in this category are PD drives
-and LS-120 drives.  Traditionally, media for these devices are not
-partitioned.  Consequently, the pf driver does not support partitioned
-media.  This may be changed in a future version of the driver.
-
-2.5  Using the pt driver
-------------------------
-
-The pt driver for parallel port ATAPI tape drives is a minimal driver.
-It does not yet support many of the standard tape ioctl operations.
-For best performance, a block size of 32KB should be used.  You will
-probably want to set the parallel port delay to 0, if you can.
-
-2.6  Using the pg driver
-------------------------
-
-The pg driver can be used in conjunction with the cdrecord program
-to create CD-ROMs.  Please get cdrecord version 1.6.1 or later
-from ftp://ftp.fokus.gmd.de/pub/unix/cdrecord/ .  To record CD-R media
-your parallel port should ideally be set to EPP mode, and the "port delay"
-should be set to 0.  With those settings it is possible to record at 2x
-speed without any buffer underruns.  If you cannot get the driver to work
-in EPP mode, try to use "bidirectional" or "PS/2" mode and 1x speeds only.
-
-
-3. Troubleshooting
-==================
-
-3.1  Use EPP mode if you can
-----------------------------
-
-The most common problems that people report with the PARIDE drivers
-concern the parallel port CMOS settings.  At this time, none of the
-PARIDE protocol modules support ECP mode, or any ECP combination modes.
-If you are able to do so, please set your parallel port into EPP mode
-using your CMOS setup procedure.
-
-3.2  Check the port delay
--------------------------
-
-Some parallel ports cannot reliably transfer data at full speed.  To
-offset the errors, the PARIDE protocol modules introduce a "port
-delay" between each access to the i/o ports.  Each protocol sets
-a default value for this delay.  In most cases, the user can override
-the default and set it to 0 - resulting in somewhat higher transfer
-rates.  In some rare cases (especially with older 486 systems) the
-default delays are not long enough.  if you experience corrupt data
-transfers, or unexpected failures, you may wish to increase the
-port delay.   The delay can be programmed using the "driveN" parameters
-to each of the high-level drivers.  Please see the notes above, or
-read the comments at the beginning of the driver source files in
-linux/drivers/block/paride.
-
-3.3  Some drives need a printer reset
--------------------------------------
-
-There appear to be a number of "noname" external drives on the market
-that do not always power up correctly.  We have noticed this with some
-drives based on OnSpec and older Freecom adapters.  In these rare cases,
-the adapter can often be reinitialised by issuing a "printer reset" on
-the parallel port.  As the reset operation is potentially disruptive in
-multiple device environments, the PARIDE drivers will not do it
-automatically.  You can however, force a printer reset by doing::
-
-	insmod lp reset=1
-	rmmod lp
-
-If you have one of these marginal cases, you should probably build
-your paride drivers as modules, and arrange to do the printer reset
-before loading the PARIDE drivers.
-
-3.4  Use the verbose option and dmesg if you need help
-------------------------------------------------------
-
-While a lot of testing has gone into these drivers to make them work
-as smoothly as possible, problems will arise.  If you do have problems,
-please check all the obvious things first:  does the drive work in
-DOS with the manufacturer's drivers ?  If that doesn't yield any useful
-clues, then please make sure that only one drive is hooked to your system,
-and that either (a) PARPORT is enabled or (b) no other device driver
-is using your parallel port (check in /proc/ioports).  Then, load the
-appropriate drivers (you can load several protocol modules if you want)
-as in::
-
-	# insmod paride
-	# insmod epat
-	# insmod bpck
-	# insmod kbic
-	...
-	# insmod pd verbose=1
-
-(using the correct driver for the type of device you have, of course).
-The verbose=1 parameter will cause the drivers to log a trace of their
-activity as they attempt to locate your drive.
-
-Use 'dmesg' to capture a log of all the PARIDE messages (any messages
-beginning with paride:, a protocol module's name or a driver's name) and
-include that with your bug report.  You can submit a bug report in one
-of two ways.  Either send it directly to the author of the PARIDE suite,
-by e-mail to grant@torque.net, or join the linux-parport mailing list
-and post your report there.
-
-3.5  For more information or help
----------------------------------
-
-You can join the linux-parport mailing list by sending a mail message
-to:
-
-		linux-parport-request@torque.net
-
-with the single word::
-
-		subscribe
-
-in the body of the mail message (not in the subject line).   Please be
-sure that your mail program is correctly set up when you do this,  as
-the list manager is a robot that will subscribe you using the reply
-address in your mail headers.  REMOVE any anti-spam gimmicks you may
-have in your mail headers, when sending mail to the list server.
-
-You might also find some useful information on the linux-parport
-web pages (although they are not always up to date) at
-
-	http://web.archive.org/web/%2E/http://www.torque.net/parport/
diff --git a/Documentation/blockdev/ramdisk.rst b/Documentation/blockdev/ramdisk.rst
deleted file mode 100644
index b7c2268f8dec..000000000000
--- a/Documentation/blockdev/ramdisk.rst
+++ /dev/null
@@ -1,177 +0,0 @@
-==========================================
-Using the RAM disk block device with Linux
-==========================================
-
-.. Contents:
-
-	1) Overview
-	2) Kernel Command Line Parameters
-	3) Using "rdev -r"
-	4) An Example of Creating a Compressed RAM Disk
-
-
-1) Overview
------------
-
-The RAM disk driver is a way to use main system memory as a block device.  It
-is required for initrd, an initial filesystem used if you need to load modules
-in order to access the root filesystem (see Documentation/admin-guide/initrd.rst).  It can
-also be used for a temporary filesystem for crypto work, since the contents
-are erased on reboot.
-
-The RAM disk dynamically grows as more space is required. It does this by using
-RAM from the buffer cache. The driver marks the buffers it is using as dirty
-so that the VM subsystem does not try to reclaim them later.
-
-The RAM disk supports up to 16 RAM disks by default, and can be reconfigured
-to support an unlimited number of RAM disks (at your own risk).  Just change
-the configuration symbol BLK_DEV_RAM_COUNT in the Block drivers config menu
-and (re)build the kernel.
-
-To use RAM disk support with your system, run './MAKEDEV ram' from the /dev
-directory.  RAM disks are all major number 1, and start with minor number 0
-for /dev/ram0, etc.  If used, modern kernels use /dev/ram0 for an initrd.
-
-The new RAM disk also has the ability to load compressed RAM disk images,
-allowing one to squeeze more programs onto an average installation or
-rescue floppy disk.
-
-
-2) Parameters
----------------------------------
-
-2a) Kernel Command Line Parameters
-
-	ramdisk_size=N
-		Size of the ramdisk.
-
-This parameter tells the RAM disk driver to set up RAM disks of N k size.  The
-default is 4096 (4 MB).
-
-2b) Module parameters
-
-	rd_nr
-		/dev/ramX devices created.
-
-	max_part
-		Maximum partition number.
-
-	rd_size
-		See ramdisk_size.
-
-3) Using "rdev -r"
-------------------
-
-The usage of the word (two bytes) that "rdev -r" sets in the kernel image is
-as follows. The low 11 bits (0 -> 10) specify an offset (in 1 k blocks) of up
-to 2 MB (2^11) of where to find the RAM disk (this used to be the size). Bit
-14 indicates that a RAM disk is to be loaded, and bit 15 indicates whether a
-prompt/wait sequence is to be given before trying to read the RAM disk. Since
-the RAM disk dynamically grows as data is being written into it, a size field
-is not required. Bits 11 to 13 are not currently used and may as well be zero.
-These numbers are no magical secrets, as seen below::
-
-  ./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK     0x07FF
-  ./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG          0x8000
-  ./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG            0x4000
-
-Consider a typical two floppy disk setup, where you will have the
-kernel on disk one, and have already put a RAM disk image onto disk #2.
-
-Hence you want to set bits 0 to 13 as 0, meaning that your RAM disk
-starts at an offset of 0 kB from the beginning of the floppy.
-The command line equivalent is: "ramdisk_start=0"
-
-You want bit 14 as one, indicating that a RAM disk is to be loaded.
-The command line equivalent is: "load_ramdisk=1"
-
-You want bit 15 as one, indicating that you want a prompt/keypress
-sequence so that you have a chance to switch floppy disks.
-The command line equivalent is: "prompt_ramdisk=1"
-
-Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word.
-So to create disk one of the set, you would do::
-
-	/usr/src/linux# cat arch/x86/boot/zImage > /dev/fd0
-	/usr/src/linux# rdev /dev/fd0 /dev/fd0
-	/usr/src/linux# rdev -r /dev/fd0 49152
-
-If you make a boot disk that has LILO, then for the above, you would use::
-
-	append = "ramdisk_start=0 load_ramdisk=1 prompt_ramdisk=1"
-
-Since the default start = 0 and the default prompt = 1, you could use::
-
-	append = "load_ramdisk=1"
-
-
-4) An Example of Creating a Compressed RAM Disk
------------------------------------------------
-
-To create a RAM disk image, you will need a spare block device to
-construct it on. This can be the RAM disk device itself, or an
-unused disk partition (such as an unmounted swap partition). For this
-example, we will use the RAM disk device, "/dev/ram0".
-
-Note: This technique should not be done on a machine with less than 8 MB
-of RAM. If using a spare disk partition instead of /dev/ram0, then this
-restriction does not apply.
-
-a) Decide on the RAM disk size that you want. Say 2 MB for this example.
-   Create it by writing to the RAM disk device. (This step is not currently
-   required, but may be in the future.) It is wise to zero out the
-   area (esp. for disks) so that maximal compression is achieved for
-   the unused blocks of the image that you are about to create::
-
-	dd if=/dev/zero of=/dev/ram0 bs=1k count=2048
-
-b) Make a filesystem on it. Say ext2fs for this example::
-
-	mke2fs -vm0 /dev/ram0 2048
-
-c) Mount it, copy the files you want to it (eg: /etc/* /dev/* ...)
-   and unmount it again.
-
-d) Compress the contents of the RAM disk. The level of compression
-   will be approximately 50% of the space used by the files. Unused
-   space on the RAM disk will compress to almost nothing::
-
-	dd if=/dev/ram0 bs=1k count=2048 | gzip -v9 > /tmp/ram_image.gz
-
-e) Put the kernel onto the floppy::
-
-	dd if=zImage of=/dev/fd0 bs=1k
-
-f) Put the RAM disk image onto the floppy, after the kernel. Use an offset
-   that is slightly larger than the kernel, so that you can put another
-   (possibly larger) kernel onto the same floppy later without overlapping
-   the RAM disk image. An offset of 400 kB for kernels about 350 kB in
-   size would be reasonable. Make sure offset+size of ram_image.gz is
-   not larger than the total space on your floppy (usually 1440 kB)::
-
-	dd if=/tmp/ram_image.gz of=/dev/fd0 bs=1k seek=400
-
-g) Use "rdev" to set the boot device, RAM disk offset, prompt flag, etc.
-   For prompt_ramdisk=1, load_ramdisk=1, ramdisk_start=400, one would
-   have 2^15 + 2^14 + 400 = 49552::
-
-	rdev /dev/fd0 /dev/fd0
-	rdev -r /dev/fd0 49552
-
-That is it. You now have your boot/root compressed RAM disk floppy. Some
-users may wish to combine steps (d) and (f) by using a pipe.
-
-
-						Paul Gortmaker 12/95
-
-Changelog:
-----------
-
-10-22-04 :
-		Updated to reflect changes in command line options, remove
-		obsolete references, general cleanup.
-		James Nelson (james4765@gmail.com)
-
-
-12-95 :
-		Original Document
diff --git a/Documentation/blockdev/zram.rst b/Documentation/blockdev/zram.rst
deleted file mode 100644
index 6eccf13219ff..000000000000
--- a/Documentation/blockdev/zram.rst
+++ /dev/null
@@ -1,422 +0,0 @@
-========================================
-zram: Compressed RAM based block devices
-========================================
-
-Introduction
-============
-
-The zram module creates RAM based block devices named /dev/zram<id>
-(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
-in memory itself. These disks allow very fast I/O and compression provides
-good amounts of memory savings. Some of the usecases include /tmp storage,
-use as swap disks, various caches under /var and maybe many more :)
-
-Statistics for individual zram devices are exported through sysfs nodes at
-/sys/block/zram<id>/
-
-Usage
-=====
-
-There are several ways to configure and manage zram device(-s):
-
-a) using zram and zram_control sysfs attributes
-b) using zramctl utility, provided by util-linux (util-linux@vger.kernel.org).
-
-In this document we will describe only 'manual' zram configuration steps,
-IOW, zram and zram_control sysfs attributes.
-
-In order to get a better idea about zramctl please consult util-linux
-documentation, zramctl man-page or `zramctl --help`. Please be informed
-that zram maintainers do not develop/maintain util-linux or zramctl, should
-you have any questions please contact util-linux@vger.kernel.org
-
-Following shows a typical sequence of steps for using zram.
-
-WARNING
-=======
-
-For the sake of simplicity we skip error checking parts in most of the
-examples below. However, it is your sole responsibility to handle errors.
-
-zram sysfs attributes always return negative values in case of errors.
-The list of possible return codes:
-
-========  =============================================================
--EBUSY	  an attempt to modify an attribute that cannot be changed once
-	  the device has been initialised. Please reset device first;
--ENOMEM	  zram was not able to allocate enough memory to fulfil your
-	  needs;
--EINVAL	  invalid input has been provided.
-========  =============================================================
-
-If you use 'echo', the returned value that is changed by 'echo' utility,
-and, in general case, something like::
-
-	echo 3 > /sys/block/zram0/max_comp_streams
-	if [ $? -ne 0 ];
-		handle_error
-	fi
-
-should suffice.
-
-1) Load Module
-==============
-
-::
-
-	modprobe zram num_devices=4
-	This creates 4 devices: /dev/zram{0,1,2,3}
-
-num_devices parameter is optional and tells zram how many devices should be
-pre-created. Default: 1.
-
-2) Set max number of compression streams
-========================================
-
-Regardless the value passed to this attribute, ZRAM will always
-allocate multiple compression streams - one per online CPUs - thus
-allowing several concurrent compression operations. The number of
-allocated compression streams goes down when some of the CPUs
-become offline. There is no single-compression-stream mode anymore,
-unless you are running a UP system or has only 1 CPU online.
-
-To find out how many streams are currently available::
-
-	cat /sys/block/zram0/max_comp_streams
-
-3) Select compression algorithm
-===============================
-
-Using comp_algorithm device attribute one can see available and
-currently selected (shown in square brackets) compression algorithms,
-change selected compression algorithm (once the device is initialised
-there is no way to change compression algorithm).
-
-Examples::
-
-	#show supported compression algorithms
-	cat /sys/block/zram0/comp_algorithm
-	lzo [lz4]
-
-	#select lzo compression algorithm
-	echo lzo > /sys/block/zram0/comp_algorithm
-
-For the time being, the `comp_algorithm` content does not necessarily
-show every compression algorithm supported by the kernel. We keep this
-list primarily to simplify device configuration and one can configure
-a new device with a compression algorithm that is not listed in
-`comp_algorithm`. The thing is that, internally, ZRAM uses Crypto API
-and, if some of the algorithms were built as modules, it's impossible
-to list all of them using, for instance, /proc/crypto or any other
-method. This, however, has an advantage of permitting the usage of
-custom crypto compression modules (implementing S/W or H/W compression).
-
-4) Set Disksize
-===============
-
-Set disk size by writing the value to sysfs node 'disksize'.
-The value can be either in bytes or you can use mem suffixes.
-Examples::
-
-	# Initialize /dev/zram0 with 50MB disksize
-	echo $((50*1024*1024)) > /sys/block/zram0/disksize
-
-	# Using mem suffixes
-	echo 256K > /sys/block/zram0/disksize
-	echo 512M > /sys/block/zram0/disksize
-	echo 1G > /sys/block/zram0/disksize
-
-Note:
-There is little point creating a zram of greater than twice the size of memory
-since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
-size of the disk when not in use so a huge zram is wasteful.
-
-5) Set memory limit: Optional
-=============================
-
-Set memory limit by writing the value to sysfs node 'mem_limit'.
-The value can be either in bytes or you can use mem suffixes.
-In addition, you could change the value in runtime.
-Examples::
-
-	# limit /dev/zram0 with 50MB memory
-	echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
-
-	# Using mem suffixes
-	echo 256K > /sys/block/zram0/mem_limit
-	echo 512M > /sys/block/zram0/mem_limit
-	echo 1G > /sys/block/zram0/mem_limit
-
-	# To disable memory limit
-	echo 0 > /sys/block/zram0/mem_limit
-
-6) Activate
-===========
-
-::
-
-	mkswap /dev/zram0
-	swapon /dev/zram0
-
-	mkfs.ext4 /dev/zram1
-	mount /dev/zram1 /tmp
-
-7) Add/remove zram devices
-==========================
-
-zram provides a control interface, which enables dynamic (on-demand) device
-addition and removal.
-
-In order to add a new /dev/zramX device, perform read operation on hot_add
-attribute. This will return either new device's device id (meaning that you
-can use /dev/zram<id>) or error code.
-
-Example::
-
-	cat /sys/class/zram-control/hot_add
-	1
-
-To remove the existing /dev/zramX device (where X is a device id)
-execute::
-
-	echo X > /sys/class/zram-control/hot_remove
-
-8) Stats
-========
-
-Per-device statistics are exported as various nodes under /sys/block/zram<id>/
-
-A brief description of exported device attributes. For more details please
-read Documentation/ABI/testing/sysfs-block-zram.
-
-======================  ======  ===============================================
-Name            	access            description
-======================  ======  ===============================================
-disksize          	RW	show and set the device's disk size
-initstate         	RO	shows the initialization state of the device
-reset             	WO	trigger device reset
-mem_used_max      	WO	reset the `mem_used_max` counter (see later)
-mem_limit         	WO	specifies the maximum amount of memory ZRAM can
-				use to store the compressed data
-writeback_limit   	WO	specifies the maximum amount of write IO zram
-				can write out to backing device as 4KB unit
-writeback_limit_enable  RW	show and set writeback_limit feature
-max_comp_streams  	RW	the number of possible concurrent compress
-				operations
-comp_algorithm    	RW	show and change the compression algorithm
-compact           	WO	trigger memory compaction
-debug_stat        	RO	this file is used for zram debugging purposes
-backing_dev	  	RW	set up backend storage for zram to write out
-idle		  	WO	mark allocated slot as idle
-======================  ======  ===============================================
-
-
-User space is advised to use the following files to read the device statistics.
-
-File /sys/block/zram<id>/stat
-
-Represents block layer statistics. Read Documentation/block/stat.rst for
-details.
-
-File /sys/block/zram<id>/io_stat
-
-The stat file represents device's I/O statistics not accounted by block
-layer and, thus, not available in zram<id>/stat file. It consists of a
-single line of text and contains the following stats separated by
-whitespace:
-
- =============    =============================================================
- failed_reads     The number of failed reads
- failed_writes    The number of failed writes
- invalid_io       The number of non-page-size-aligned I/O requests
- notify_free      Depending on device usage scenario it may account
-
-                  a) the number of pages freed because of swap slot free
-                     notifications
-                  b) the number of pages freed because of
-                     REQ_OP_DISCARD requests sent by bio. The former ones are
-                     sent to a swap block device when a swap slot is freed,
-                     which implies that this disk is being used as a swap disk.
-
-                  The latter ones are sent by filesystem mounted with
-                  discard option, whenever some data blocks are getting
-                  discarded.
- =============    =============================================================
-
-File /sys/block/zram<id>/mm_stat
-
-The stat file represents device's mm statistics. It consists of a single
-line of text and contains the following stats separated by whitespace:
-
- ================ =============================================================
- orig_data_size   uncompressed size of data stored in this disk.
-		  This excludes same-element-filled pages (same_pages) since
-		  no memory is allocated for them.
-                  Unit: bytes
- compr_data_size  compressed size of data stored in this disk
- mem_used_total   the amount of memory allocated for this disk. This
-                  includes allocator fragmentation and metadata overhead,
-                  allocated for this disk. So, allocator space efficiency
-                  can be calculated using compr_data_size and this statistic.
-                  Unit: bytes
- mem_limit        the maximum amount of memory ZRAM can use to store
-                  the compressed data
- mem_used_max     the maximum amount of memory zram have consumed to
-                  store the data
- same_pages       the number of same element filled pages written to this disk.
-                  No memory is allocated for such pages.
- pages_compacted  the number of pages freed during compaction
- huge_pages	  the number of incompressible pages
- ================ =============================================================
-
-File /sys/block/zram<id>/bd_stat
-
-The stat file represents device's backing device statistics. It consists of
-a single line of text and contains the following stats separated by whitespace:
-
- ============== =============================================================
- bd_count	size of data written in backing device.
-		Unit: 4K bytes
- bd_reads	the number of reads from backing device
-		Unit: 4K bytes
- bd_writes	the number of writes to backing device
-		Unit: 4K bytes
- ============== =============================================================
-
-9) Deactivate
-=============
-
-::
-
-	swapoff /dev/zram0
-	umount /dev/zram1
-
-10) Reset
-=========
-
-	Write any positive value to 'reset' sysfs node::
-
-		echo 1 > /sys/block/zram0/reset
-		echo 1 > /sys/block/zram1/reset
-
-	This frees all the memory allocated for the given device and
-	resets the disksize to zero. You must set the disksize again
-	before reusing the device.
-
-Optional Feature
-================
-
-writeback
----------
-
-With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page
-to backing storage rather than keeping it in memory.
-To use the feature, admin should set up backing device via::
-
-	echo /dev/sda5 > /sys/block/zramX/backing_dev
-
-before disksize setting. It supports only partition at this moment.
-If admin want to use incompressible page writeback, they could do via::
-
-	echo huge > /sys/block/zramX/write
-
-To use idle page writeback, first, user need to declare zram pages
-as idle::
-
-	echo all > /sys/block/zramX/idle
-
-From now on, any pages on zram are idle pages. The idle mark
-will be removed until someone request access of the block.
-IOW, unless there is access request, those pages are still idle pages.
-
-Admin can request writeback of those idle pages at right timing via::
-
-	echo idle > /sys/block/zramX/writeback
-
-With the command, zram writeback idle pages from memory to the storage.
-
-If there are lots of write IO with flash device, potentially, it has
-flash wearout problem so that admin needs to design write limitation
-to guarantee storage health for entire product life.
-
-To overcome the concern, zram supports "writeback_limit" feature.
-The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
-any writeback. IOW, if admin want to apply writeback budget, he should
-enable writeback_limit_enable via::
-
-	$ echo 1 > /sys/block/zramX/writeback_limit_enable
-
-Once writeback_limit_enable is set, zram doesn't allow any writeback
-until admin set the budget via /sys/block/zramX/writeback_limit.
-
-(If admin doesn't enable writeback_limit_enable, writeback_limit's value
-assigned via /sys/block/zramX/writeback_limit is meaninless.)
-
-If admin want to limit writeback as per-day 400M, he could do it
-like below::
-
-	$ MB_SHIFT=20
-	$ 4K_SHIFT=12
-	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
-		/sys/block/zram0/writeback_limit.
-	$ echo 1 > /sys/block/zram0/writeback_limit_enable
-
-If admin want to allow further write again once the bugdet is exausted,
-he could do it like below::
-
-	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
-		/sys/block/zram0/writeback_limit
-
-If admin want to see remaining writeback budget since he set::
-
-	$ cat /sys/block/zramX/writeback_limit
-
-If admin want to disable writeback limit, he could do::
-
-	$ echo 0 > /sys/block/zramX/writeback_limit_enable
-
-The writeback_limit count will reset whenever you reset zram(e.g.,
-system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
-writeback happened until you reset the zram to allocate extra writeback
-budget in next setting is user's job.
-
-If admin want to measure writeback count in a certain period, he could
-know it via /sys/block/zram0/bd_stat's 3rd column.
-
-memory tracking
-===============
-
-With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
-zram block. It could be useful to catch cold or incompressible
-pages of the process with*pagemap.
-
-If you enable the feature, you could see block state via
-/sys/kernel/debug/zram/zram0/block_state". The output is as follows::
-
-	  300    75.033841 .wh.
-	  301    63.806904 s...
-	  302    63.806919 ..hi
-
-First column
-	zram's block index.
-Second column
-	access time since the system was booted
-Third column
-	state of the block:
-
-	s:
-		same page
-	w:
-		written page to backing store
-	h:
-		huge page
-	i:
-		idle page
-
-First line of above example says 300th block is accessed at 75.033841sec
-and the block's state is huge so it is written back to the backing
-storage. It's a debugging feature so anyone shouldn't rely on it to work
-properly.
-
-Nitin Gupta
-ngupta@vflare.org
-- 
cgit 


From 4d3beaa06d3536aa8968d1828a66bd5ccb5036ac Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 19 Apr 2019 21:39:29 -0300
Subject: docs: security: move some books to it and update

The following files belong to security:

  Documentation/security/LSM.rst -> Documentation/security/lsm-development.rst
  Documentation/lsm.txt -> Documentation/security/lsm.rst
  Documentation/SAK.txt -> Documentation/security/sak.rst
  Documentation/siphash.txt -> Documentation/security/siphash.rst

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/SAK.txt                       |  91 -------------
 Documentation/lsm.txt                       | 201 ----------------------------
 Documentation/security/LSM.rst              |  17 ---
 Documentation/security/index.rst            |   5 +-
 Documentation/security/lsm-development.rst  |  17 +++
 Documentation/security/lsm.rst              | 201 ++++++++++++++++++++++++++++
 Documentation/security/sak.rst              |  91 +++++++++++++
 Documentation/security/siphash.rst          | 189 ++++++++++++++++++++++++++
 Documentation/security/tpm/index.rst        |   1 +
 Documentation/security/tpm/xen-tpmfront.rst |   2 -
 Documentation/siphash.txt                   | 189 --------------------------
 11 files changed, 503 insertions(+), 501 deletions(-)
 delete mode 100644 Documentation/SAK.txt
 delete mode 100644 Documentation/lsm.txt
 delete mode 100644 Documentation/security/LSM.rst
 create mode 100644 Documentation/security/lsm-development.rst
 create mode 100644 Documentation/security/lsm.rst
 create mode 100644 Documentation/security/sak.rst
 create mode 100644 Documentation/security/siphash.rst
 delete mode 100644 Documentation/siphash.txt

(limited to 'Documentation')

diff --git a/Documentation/SAK.txt b/Documentation/SAK.txt
deleted file mode 100644
index 260e1d3687bd..000000000000
--- a/Documentation/SAK.txt
+++ /dev/null
@@ -1,91 +0,0 @@
-=========================================
-Linux Secure Attention Key (SAK) handling
-=========================================
-
-:Date: 18 March 2001
-:Author: Andrew Morton
-
-An operating system's Secure Attention Key is a security tool which is
-provided as protection against trojan password capturing programs.  It
-is an undefeatable way of killing all programs which could be
-masquerading as login applications.  Users need to be taught to enter
-this key sequence before they log in to the system.
-
-From the PC keyboard, Linux has two similar but different ways of
-providing SAK.  One is the ALT-SYSRQ-K sequence.  You shouldn't use
-this sequence.  It is only available if the kernel was compiled with
-sysrq support.
-
-The proper way of generating a SAK is to define the key sequence using
-``loadkeys``.  This will work whether or not sysrq support is compiled
-into the kernel.
-
-SAK works correctly when the keyboard is in raw mode.  This means that
-once defined, SAK will kill a running X server.  If the system is in
-run level 5, the X server will restart.  This is what you want to
-happen.
-
-What key sequence should you use? Well, CTRL-ALT-DEL is used to reboot
-the machine.  CTRL-ALT-BACKSPACE is magical to the X server.  We'll
-choose CTRL-ALT-PAUSE.
-
-In your rc.sysinit (or rc.local) file, add the command::
-
-	echo "control alt keycode 101 = SAK" | /bin/loadkeys
-
-And that's it!  Only the superuser may reprogram the SAK key.
-
-
-.. note::
-
-  1. Linux SAK is said to be not a "true SAK" as is required by
-     systems which implement C2 level security.  This author does not
-     know why.
-
-
-  2. On the PC keyboard, SAK kills all applications which have
-     /dev/console opened.
-
-     Unfortunately this includes a number of things which you don't
-     actually want killed.  This is because these applications are
-     incorrectly holding /dev/console open.  Be sure to complain to your
-     Linux distributor about this!
-
-     You can identify processes which will be killed by SAK with the
-     command::
-
-	# ls -l /proc/[0-9]*/fd/* | grep console
-	l-wx------    1 root     root           64 Mar 18 00:46 /proc/579/fd/0 -> /dev/console
-
-     Then::
-
-	# ps aux|grep 579
-	root       579  0.0  0.1  1088  436 ?        S    00:43   0:00 gpm -t ps/2
-
-     So ``gpm`` will be killed by SAK.  This is a bug in gpm.  It should
-     be closing standard input.  You can work around this by finding the
-     initscript which launches gpm and changing it thusly:
-
-     Old::
-
-	daemon gpm
-
-     New::
-
-	daemon gpm < /dev/null
-
-     Vixie cron also seems to have this problem, and needs the same treatment.
-
-     Also, one prominent Linux distribution has the following three
-     lines in its rc.sysinit and rc scripts::
-
-	exec 3<&0
-	exec 4>&1
-	exec 5>&2
-
-     These commands cause **all** daemons which are launched by the
-     initscripts to have file descriptors 3, 4 and 5 attached to
-     /dev/console.  So SAK kills them all.  A workaround is to simply
-     delete these lines, but this may cause system management
-     applications to malfunction - test everything well.
-
diff --git a/Documentation/lsm.txt b/Documentation/lsm.txt
deleted file mode 100644
index ad4dfd020e0d..000000000000
--- a/Documentation/lsm.txt
+++ /dev/null
@@ -1,201 +0,0 @@
-========================================================
-Linux Security Modules: General Security Hooks for Linux
-========================================================
-
-:Author: Stephen Smalley
-:Author: Timothy Fraser
-:Author: Chris Vance
-
-.. note::
-
-   The APIs described in this book are outdated.
-
-Introduction
-============
-
-In March 2001, the National Security Agency (NSA) gave a presentation
-about Security-Enhanced Linux (SELinux) at the 2.5 Linux Kernel Summit.
-SELinux is an implementation of flexible and fine-grained
-nondiscretionary access controls in the Linux kernel, originally
-implemented as its own particular kernel patch. Several other security
-projects (e.g. RSBAC, Medusa) have also developed flexible access
-control architectures for the Linux kernel, and various projects have
-developed particular access control models for Linux (e.g. LIDS, DTE,
-SubDomain). Each project has developed and maintained its own kernel
-patch to support its security needs.
-
-In response to the NSA presentation, Linus Torvalds made a set of
-remarks that described a security framework he would be willing to
-consider for inclusion in the mainstream Linux kernel. He described a
-general framework that would provide a set of security hooks to control
-operations on kernel objects and a set of opaque security fields in
-kernel data structures for maintaining security attributes. This
-framework could then be used by loadable kernel modules to implement any
-desired model of security. Linus also suggested the possibility of
-migrating the Linux capabilities code into such a module.
-
-The Linux Security Modules (LSM) project was started by WireX to develop
-such a framework. LSM is a joint development effort by several security
-projects, including Immunix, SELinux, SGI and Janus, and several
-individuals, including Greg Kroah-Hartman and James Morris, to develop a
-Linux kernel patch that implements this framework. The patch is
-currently tracking the 2.4 series and is targeted for integration into
-the 2.5 development series. This technical report provides an overview
-of the framework and the example capabilities security module provided
-by the LSM kernel patch.
-
-LSM Framework
-=============
-
-The LSM kernel patch provides a general kernel framework to support
-security modules. In particular, the LSM framework is primarily focused
-on supporting access control modules, although future development is
-likely to address other security needs such as auditing. By itself, the
-framework does not provide any additional security; it merely provides
-the infrastructure to support security modules. The LSM kernel patch
-also moves most of the capabilities logic into an optional security
-module, with the system defaulting to the traditional superuser logic.
-This capabilities module is discussed further in
-`LSM Capabilities Module <#cap>`__.
-
-The LSM kernel patch adds security fields to kernel data structures and
-inserts calls to hook functions at critical points in the kernel code to
-manage the security fields and to perform access control. It also adds
-functions for registering and unregistering security modules, and adds a
-general :c:func:`security()` system call to support new system calls
-for security-aware applications.
-
-The LSM security fields are simply ``void*`` pointers. For process and
-program execution security information, security fields were added to
-:c:type:`struct task_struct <task_struct>` and
-:c:type:`struct linux_binprm <linux_binprm>`. For filesystem
-security information, a security field was added to :c:type:`struct
-super_block <super_block>`. For pipe, file, and socket security
-information, security fields were added to :c:type:`struct inode
-<inode>` and :c:type:`struct file <file>`. For packet and
-network device security information, security fields were added to
-:c:type:`struct sk_buff <sk_buff>` and :c:type:`struct
-net_device <net_device>`. For System V IPC security information,
-security fields were added to :c:type:`struct kern_ipc_perm
-<kern_ipc_perm>` and :c:type:`struct msg_msg
-<msg_msg>`; additionally, the definitions for :c:type:`struct
-msg_msg <msg_msg>`, struct msg_queue, and struct shmid_kernel
-were moved to header files (``include/linux/msg.h`` and
-``include/linux/shm.h`` as appropriate) to allow the security modules to
-use these definitions.
-
-Each LSM hook is a function pointer in a global table, security_ops.
-This table is a :c:type:`struct security_operations
-<security_operations>` structure as defined by
-``include/linux/security.h``. Detailed documentation for each hook is
-included in this header file. At present, this structure consists of a
-collection of substructures that group related hooks based on the kernel
-object (e.g. task, inode, file, sk_buff, etc) as well as some top-level
-hook function pointers for system operations. This structure is likely
-to be flattened in the future for performance. The placement of the hook
-calls in the kernel code is described by the "called:" lines in the
-per-hook documentation in the header file. The hook calls can also be
-easily found in the kernel code by looking for the string
-"security_ops->".
-
-Linus mentioned per-process security hooks in his original remarks as a
-possible alternative to global security hooks. However, if LSM were to
-start from the perspective of per-process hooks, then the base framework
-would have to deal with how to handle operations that involve multiple
-processes (e.g. kill), since each process might have its own hook for
-controlling the operation. This would require a general mechanism for
-composing hooks in the base framework. Additionally, LSM would still
-need global hooks for operations that have no process context (e.g.
-network input operations). Consequently, LSM provides global security
-hooks, but a security module is free to implement per-process hooks
-(where that makes sense) by storing a security_ops table in each
-process' security field and then invoking these per-process hooks from
-the global hooks. The problem of composition is thus deferred to the
-module.
-
-The global security_ops table is initialized to a set of hook functions
-provided by a dummy security module that provides traditional superuser
-logic. A :c:func:`register_security()` function (in
-``security/security.c``) is provided to allow a security module to set
-security_ops to refer to its own hook functions, and an
-:c:func:`unregister_security()` function is provided to revert
-security_ops to the dummy module hooks. This mechanism is used to set
-the primary security module, which is responsible for making the final
-decision for each hook.
-
-LSM also provides a simple mechanism for stacking additional security
-modules with the primary security module. It defines
-:c:func:`register_security()` and
-:c:func:`unregister_security()` hooks in the :c:type:`struct
-security_operations <security_operations>` structure and
-provides :c:func:`mod_reg_security()` and
-:c:func:`mod_unreg_security()` functions that invoke these hooks
-after performing some sanity checking. A security module can call these
-functions in order to stack with other modules. However, the actual
-details of how this stacking is handled are deferred to the module,
-which can implement these hooks in any way it wishes (including always
-returning an error if it does not wish to support stacking). In this
-manner, LSM again defers the problem of composition to the module.
-
-Although the LSM hooks are organized into substructures based on kernel
-object, all of the hooks can be viewed as falling into two major
-categories: hooks that are used to manage the security fields and hooks
-that are used to perform access control. Examples of the first category
-of hooks include the :c:func:`alloc_security()` and
-:c:func:`free_security()` hooks defined for each kernel data
-structure that has a security field. These hooks are used to allocate
-and free security structures for kernel objects. The first category of
-hooks also includes hooks that set information in the security field
-after allocation, such as the :c:func:`post_lookup()` hook in
-:c:type:`struct inode_security_ops <inode_security_ops>`.
-This hook is used to set security information for inodes after
-successful lookup operations. An example of the second category of hooks
-is the :c:func:`permission()` hook in :c:type:`struct
-inode_security_ops <inode_security_ops>`. This hook checks
-permission when accessing an inode.
-
-LSM Capabilities Module
-=======================
-
-The LSM kernel patch moves most of the existing POSIX.1e capabilities
-logic into an optional security module stored in the file
-``security/capability.c``. This change allows users who do not want to
-use capabilities to omit this code entirely from their kernel, instead
-using the dummy module for traditional superuser logic or any other
-module that they desire. This change also allows the developers of the
-capabilities logic to maintain and enhance their code more freely,
-without needing to integrate patches back into the base kernel.
-
-In addition to moving the capabilities logic, the LSM kernel patch could
-move the capability-related fields from the kernel data structures into
-the new security fields managed by the security modules. However, at
-present, the LSM kernel patch leaves the capability fields in the kernel
-data structures. In his original remarks, Linus suggested that this
-might be preferable so that other security modules can be easily stacked
-with the capabilities module without needing to chain multiple security
-structures on the security field. It also avoids imposing extra overhead
-on the capabilities module to manage the security fields. However, the
-LSM framework could certainly support such a move if it is determined to
-be desirable, with only a few additional changes described below.
-
-At present, the capabilities logic for computing process capabilities on
-:c:func:`execve()` and :c:func:`set\*uid()`, checking
-capabilities for a particular process, saving and checking capabilities
-for netlink messages, and handling the :c:func:`capget()` and
-:c:func:`capset()` system calls have been moved into the
-capabilities module. There are still a few locations in the base kernel
-where capability-related fields are directly examined or modified, but
-the current version of the LSM patch does allow a security module to
-completely replace the assignment and testing of capabilities. These few
-locations would need to be changed if the capability-related fields were
-moved into the security field. The following is a list of known
-locations that still perform such direct examination or modification of
-capability-related fields:
-
--  ``fs/open.c``::c:func:`sys_access()`
-
--  ``fs/lockd/host.c``::c:func:`nlm_bind_host()`
-
--  ``fs/nfsd/auth.c``::c:func:`nfsd_setuser()`
-
--  ``fs/proc/array.c``::c:func:`task_cap()`
diff --git a/Documentation/security/LSM.rst b/Documentation/security/LSM.rst
deleted file mode 100644
index 31d92bc5fdd2..000000000000
--- a/Documentation/security/LSM.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-=================================
-Linux Security Module Development
-=================================
-
-Based on https://lkml.org/lkml/2007/10/26/215,
-a new LSM is accepted into the kernel when its intent (a description of
-what it tries to protect against and in what cases one would expect to
-use it) has been appropriately documented in ``Documentation/admin-guide/LSM/``.
-This allows an LSM's code to be easily compared to its goals, and so
-that end users and distros can make a more informed decision about which
-LSMs suit their requirements.
-
-For extensive documentation on the available LSM hook interfaces, please
-see ``include/linux/lsm_hooks.h`` and associated structures:
-
-.. kernel-doc:: include/linux/lsm_hooks.h
-   :internal:
diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst
index aad6d92ffe31..fc503dd689a7 100644
--- a/Documentation/security/index.rst
+++ b/Documentation/security/index.rst
@@ -8,7 +8,10 @@ Security Documentation
    credentials
    IMA-templates
    keys/index
-   LSM
+   lsm
+   lsm-development
+   sak
    SCTP
    self-protection
+   siphash
    tpm/index
diff --git a/Documentation/security/lsm-development.rst b/Documentation/security/lsm-development.rst
new file mode 100644
index 000000000000..31d92bc5fdd2
--- /dev/null
+++ b/Documentation/security/lsm-development.rst
@@ -0,0 +1,17 @@
+=================================
+Linux Security Module Development
+=================================
+
+Based on https://lkml.org/lkml/2007/10/26/215,
+a new LSM is accepted into the kernel when its intent (a description of
+what it tries to protect against and in what cases one would expect to
+use it) has been appropriately documented in ``Documentation/admin-guide/LSM/``.
+This allows an LSM's code to be easily compared to its goals, and so
+that end users and distros can make a more informed decision about which
+LSMs suit their requirements.
+
+For extensive documentation on the available LSM hook interfaces, please
+see ``include/linux/lsm_hooks.h`` and associated structures:
+
+.. kernel-doc:: include/linux/lsm_hooks.h
+   :internal:
diff --git a/Documentation/security/lsm.rst b/Documentation/security/lsm.rst
new file mode 100644
index 000000000000..ad4dfd020e0d
--- /dev/null
+++ b/Documentation/security/lsm.rst
@@ -0,0 +1,201 @@
+========================================================
+Linux Security Modules: General Security Hooks for Linux
+========================================================
+
+:Author: Stephen Smalley
+:Author: Timothy Fraser
+:Author: Chris Vance
+
+.. note::
+
+   The APIs described in this book are outdated.
+
+Introduction
+============
+
+In March 2001, the National Security Agency (NSA) gave a presentation
+about Security-Enhanced Linux (SELinux) at the 2.5 Linux Kernel Summit.
+SELinux is an implementation of flexible and fine-grained
+nondiscretionary access controls in the Linux kernel, originally
+implemented as its own particular kernel patch. Several other security
+projects (e.g. RSBAC, Medusa) have also developed flexible access
+control architectures for the Linux kernel, and various projects have
+developed particular access control models for Linux (e.g. LIDS, DTE,
+SubDomain). Each project has developed and maintained its own kernel
+patch to support its security needs.
+
+In response to the NSA presentation, Linus Torvalds made a set of
+remarks that described a security framework he would be willing to
+consider for inclusion in the mainstream Linux kernel. He described a
+general framework that would provide a set of security hooks to control
+operations on kernel objects and a set of opaque security fields in
+kernel data structures for maintaining security attributes. This
+framework could then be used by loadable kernel modules to implement any
+desired model of security. Linus also suggested the possibility of
+migrating the Linux capabilities code into such a module.
+
+The Linux Security Modules (LSM) project was started by WireX to develop
+such a framework. LSM is a joint development effort by several security
+projects, including Immunix, SELinux, SGI and Janus, and several
+individuals, including Greg Kroah-Hartman and James Morris, to develop a
+Linux kernel patch that implements this framework. The patch is
+currently tracking the 2.4 series and is targeted for integration into
+the 2.5 development series. This technical report provides an overview
+of the framework and the example capabilities security module provided
+by the LSM kernel patch.
+
+LSM Framework
+=============
+
+The LSM kernel patch provides a general kernel framework to support
+security modules. In particular, the LSM framework is primarily focused
+on supporting access control modules, although future development is
+likely to address other security needs such as auditing. By itself, the
+framework does not provide any additional security; it merely provides
+the infrastructure to support security modules. The LSM kernel patch
+also moves most of the capabilities logic into an optional security
+module, with the system defaulting to the traditional superuser logic.
+This capabilities module is discussed further in
+`LSM Capabilities Module <#cap>`__.
+
+The LSM kernel patch adds security fields to kernel data structures and
+inserts calls to hook functions at critical points in the kernel code to
+manage the security fields and to perform access control. It also adds
+functions for registering and unregistering security modules, and adds a
+general :c:func:`security()` system call to support new system calls
+for security-aware applications.
+
+The LSM security fields are simply ``void*`` pointers. For process and
+program execution security information, security fields were added to
+:c:type:`struct task_struct <task_struct>` and
+:c:type:`struct linux_binprm <linux_binprm>`. For filesystem
+security information, a security field was added to :c:type:`struct
+super_block <super_block>`. For pipe, file, and socket security
+information, security fields were added to :c:type:`struct inode
+<inode>` and :c:type:`struct file <file>`. For packet and
+network device security information, security fields were added to
+:c:type:`struct sk_buff <sk_buff>` and :c:type:`struct
+net_device <net_device>`. For System V IPC security information,
+security fields were added to :c:type:`struct kern_ipc_perm
+<kern_ipc_perm>` and :c:type:`struct msg_msg
+<msg_msg>`; additionally, the definitions for :c:type:`struct
+msg_msg <msg_msg>`, struct msg_queue, and struct shmid_kernel
+were moved to header files (``include/linux/msg.h`` and
+``include/linux/shm.h`` as appropriate) to allow the security modules to
+use these definitions.
+
+Each LSM hook is a function pointer in a global table, security_ops.
+This table is a :c:type:`struct security_operations
+<security_operations>` structure as defined by
+``include/linux/security.h``. Detailed documentation for each hook is
+included in this header file. At present, this structure consists of a
+collection of substructures that group related hooks based on the kernel
+object (e.g. task, inode, file, sk_buff, etc) as well as some top-level
+hook function pointers for system operations. This structure is likely
+to be flattened in the future for performance. The placement of the hook
+calls in the kernel code is described by the "called:" lines in the
+per-hook documentation in the header file. The hook calls can also be
+easily found in the kernel code by looking for the string
+"security_ops->".
+
+Linus mentioned per-process security hooks in his original remarks as a
+possible alternative to global security hooks. However, if LSM were to
+start from the perspective of per-process hooks, then the base framework
+would have to deal with how to handle operations that involve multiple
+processes (e.g. kill), since each process might have its own hook for
+controlling the operation. This would require a general mechanism for
+composing hooks in the base framework. Additionally, LSM would still
+need global hooks for operations that have no process context (e.g.
+network input operations). Consequently, LSM provides global security
+hooks, but a security module is free to implement per-process hooks
+(where that makes sense) by storing a security_ops table in each
+process' security field and then invoking these per-process hooks from
+the global hooks. The problem of composition is thus deferred to the
+module.
+
+The global security_ops table is initialized to a set of hook functions
+provided by a dummy security module that provides traditional superuser
+logic. A :c:func:`register_security()` function (in
+``security/security.c``) is provided to allow a security module to set
+security_ops to refer to its own hook functions, and an
+:c:func:`unregister_security()` function is provided to revert
+security_ops to the dummy module hooks. This mechanism is used to set
+the primary security module, which is responsible for making the final
+decision for each hook.
+
+LSM also provides a simple mechanism for stacking additional security
+modules with the primary security module. It defines
+:c:func:`register_security()` and
+:c:func:`unregister_security()` hooks in the :c:type:`struct
+security_operations <security_operations>` structure and
+provides :c:func:`mod_reg_security()` and
+:c:func:`mod_unreg_security()` functions that invoke these hooks
+after performing some sanity checking. A security module can call these
+functions in order to stack with other modules. However, the actual
+details of how this stacking is handled are deferred to the module,
+which can implement these hooks in any way it wishes (including always
+returning an error if it does not wish to support stacking). In this
+manner, LSM again defers the problem of composition to the module.
+
+Although the LSM hooks are organized into substructures based on kernel
+object, all of the hooks can be viewed as falling into two major
+categories: hooks that are used to manage the security fields and hooks
+that are used to perform access control. Examples of the first category
+of hooks include the :c:func:`alloc_security()` and
+:c:func:`free_security()` hooks defined for each kernel data
+structure that has a security field. These hooks are used to allocate
+and free security structures for kernel objects. The first category of
+hooks also includes hooks that set information in the security field
+after allocation, such as the :c:func:`post_lookup()` hook in
+:c:type:`struct inode_security_ops <inode_security_ops>`.
+This hook is used to set security information for inodes after
+successful lookup operations. An example of the second category of hooks
+is the :c:func:`permission()` hook in :c:type:`struct
+inode_security_ops <inode_security_ops>`. This hook checks
+permission when accessing an inode.
+
+LSM Capabilities Module
+=======================
+
+The LSM kernel patch moves most of the existing POSIX.1e capabilities
+logic into an optional security module stored in the file
+``security/capability.c``. This change allows users who do not want to
+use capabilities to omit this code entirely from their kernel, instead
+using the dummy module for traditional superuser logic or any other
+module that they desire. This change also allows the developers of the
+capabilities logic to maintain and enhance their code more freely,
+without needing to integrate patches back into the base kernel.
+
+In addition to moving the capabilities logic, the LSM kernel patch could
+move the capability-related fields from the kernel data structures into
+the new security fields managed by the security modules. However, at
+present, the LSM kernel patch leaves the capability fields in the kernel
+data structures. In his original remarks, Linus suggested that this
+might be preferable so that other security modules can be easily stacked
+with the capabilities module without needing to chain multiple security
+structures on the security field. It also avoids imposing extra overhead
+on the capabilities module to manage the security fields. However, the
+LSM framework could certainly support such a move if it is determined to
+be desirable, with only a few additional changes described below.
+
+At present, the capabilities logic for computing process capabilities on
+:c:func:`execve()` and :c:func:`set\*uid()`, checking
+capabilities for a particular process, saving and checking capabilities
+for netlink messages, and handling the :c:func:`capget()` and
+:c:func:`capset()` system calls have been moved into the
+capabilities module. There are still a few locations in the base kernel
+where capability-related fields are directly examined or modified, but
+the current version of the LSM patch does allow a security module to
+completely replace the assignment and testing of capabilities. These few
+locations would need to be changed if the capability-related fields were
+moved into the security field. The following is a list of known
+locations that still perform such direct examination or modification of
+capability-related fields:
+
+-  ``fs/open.c``::c:func:`sys_access()`
+
+-  ``fs/lockd/host.c``::c:func:`nlm_bind_host()`
+
+-  ``fs/nfsd/auth.c``::c:func:`nfsd_setuser()`
+
+-  ``fs/proc/array.c``::c:func:`task_cap()`
diff --git a/Documentation/security/sak.rst b/Documentation/security/sak.rst
new file mode 100644
index 000000000000..260e1d3687bd
--- /dev/null
+++ b/Documentation/security/sak.rst
@@ -0,0 +1,91 @@
+=========================================
+Linux Secure Attention Key (SAK) handling
+=========================================
+
+:Date: 18 March 2001
+:Author: Andrew Morton
+
+An operating system's Secure Attention Key is a security tool which is
+provided as protection against trojan password capturing programs.  It
+is an undefeatable way of killing all programs which could be
+masquerading as login applications.  Users need to be taught to enter
+this key sequence before they log in to the system.
+
+From the PC keyboard, Linux has two similar but different ways of
+providing SAK.  One is the ALT-SYSRQ-K sequence.  You shouldn't use
+this sequence.  It is only available if the kernel was compiled with
+sysrq support.
+
+The proper way of generating a SAK is to define the key sequence using
+``loadkeys``.  This will work whether or not sysrq support is compiled
+into the kernel.
+
+SAK works correctly when the keyboard is in raw mode.  This means that
+once defined, SAK will kill a running X server.  If the system is in
+run level 5, the X server will restart.  This is what you want to
+happen.
+
+What key sequence should you use? Well, CTRL-ALT-DEL is used to reboot
+the machine.  CTRL-ALT-BACKSPACE is magical to the X server.  We'll
+choose CTRL-ALT-PAUSE.
+
+In your rc.sysinit (or rc.local) file, add the command::
+
+	echo "control alt keycode 101 = SAK" | /bin/loadkeys
+
+And that's it!  Only the superuser may reprogram the SAK key.
+
+
+.. note::
+
+  1. Linux SAK is said to be not a "true SAK" as is required by
+     systems which implement C2 level security.  This author does not
+     know why.
+
+
+  2. On the PC keyboard, SAK kills all applications which have
+     /dev/console opened.
+
+     Unfortunately this includes a number of things which you don't
+     actually want killed.  This is because these applications are
+     incorrectly holding /dev/console open.  Be sure to complain to your
+     Linux distributor about this!
+
+     You can identify processes which will be killed by SAK with the
+     command::
+
+	# ls -l /proc/[0-9]*/fd/* | grep console
+	l-wx------    1 root     root           64 Mar 18 00:46 /proc/579/fd/0 -> /dev/console
+
+     Then::
+
+	# ps aux|grep 579
+	root       579  0.0  0.1  1088  436 ?        S    00:43   0:00 gpm -t ps/2
+
+     So ``gpm`` will be killed by SAK.  This is a bug in gpm.  It should
+     be closing standard input.  You can work around this by finding the
+     initscript which launches gpm and changing it thusly:
+
+     Old::
+
+	daemon gpm
+
+     New::
+
+	daemon gpm < /dev/null
+
+     Vixie cron also seems to have this problem, and needs the same treatment.
+
+     Also, one prominent Linux distribution has the following three
+     lines in its rc.sysinit and rc scripts::
+
+	exec 3<&0
+	exec 4>&1
+	exec 5>&2
+
+     These commands cause **all** daemons which are launched by the
+     initscripts to have file descriptors 3, 4 and 5 attached to
+     /dev/console.  So SAK kills them all.  A workaround is to simply
+     delete these lines, but this may cause system management
+     applications to malfunction - test everything well.
+
diff --git a/Documentation/security/siphash.rst b/Documentation/security/siphash.rst
new file mode 100644
index 000000000000..9965821ab333
--- /dev/null
+++ b/Documentation/security/siphash.rst
@@ -0,0 +1,189 @@
+===========================
+SipHash - a short input PRF
+===========================
+
+:Author: Written by Jason A. Donenfeld <jason@zx2c4.com>
+
+SipHash is a cryptographically secure PRF -- a keyed hash function -- that
+performs very well for short inputs, hence the name. It was designed by
+cryptographers Daniel J. Bernstein and Jean-Philippe Aumasson. It is intended
+as a replacement for some uses of: `jhash`, `md5_transform`, `sha_transform`,
+and so forth.
+
+SipHash takes a secret key filled with randomly generated numbers and either
+an input buffer or several input integers. It spits out an integer that is
+indistinguishable from random. You may then use that integer as part of secure
+sequence numbers, secure cookies, or mask it off for use in a hash table.
+
+Generating a key
+================
+
+Keys should always be generated from a cryptographically secure source of
+random numbers, either using get_random_bytes or get_random_once::
+
+	siphash_key_t key;
+	get_random_bytes(&key, sizeof(key));
+
+If you're not deriving your key from here, you're doing it wrong.
+
+Using the functions
+===================
+
+There are two variants of the function, one that takes a list of integers, and
+one that takes a buffer::
+
+	u64 siphash(const void *data, size_t len, const siphash_key_t *key);
+
+And::
+
+	u64 siphash_1u64(u64, const siphash_key_t *key);
+	u64 siphash_2u64(u64, u64, const siphash_key_t *key);
+	u64 siphash_3u64(u64, u64, u64, const siphash_key_t *key);
+	u64 siphash_4u64(u64, u64, u64, u64, const siphash_key_t *key);
+	u64 siphash_1u32(u32, const siphash_key_t *key);
+	u64 siphash_2u32(u32, u32, const siphash_key_t *key);
+	u64 siphash_3u32(u32, u32, u32, const siphash_key_t *key);
+	u64 siphash_4u32(u32, u32, u32, u32, const siphash_key_t *key);
+
+If you pass the generic siphash function something of a constant length, it
+will constant fold at compile-time and automatically choose one of the
+optimized functions.
+
+Hashtable key function usage::
+
+	struct some_hashtable {
+		DECLARE_HASHTABLE(hashtable, 8);
+		siphash_key_t key;
+	};
+
+	void init_hashtable(struct some_hashtable *table)
+	{
+		get_random_bytes(&table->key, sizeof(table->key));
+	}
+
+	static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
+	{
+		return &table->hashtable[siphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
+	}
+
+You may then iterate like usual over the returned hash bucket.
+
+Security
+========
+
+SipHash has a very high security margin, with its 128-bit key. So long as the
+key is kept secret, it is impossible for an attacker to guess the outputs of
+the function, even if being able to observe many outputs, since 2^128 outputs
+is significant.
+
+Linux implements the "2-4" variant of SipHash.
+
+Struct-passing Pitfalls
+=======================
+
+Often times the XuY functions will not be large enough, and instead you'll
+want to pass a pre-filled struct to siphash. When doing this, it's important
+to always ensure the struct has no padding holes. The easiest way to do this
+is to simply arrange the members of the struct in descending order of size,
+and to use offsetendof() instead of sizeof() for getting the size. For
+performance reasons, if possible, it's probably a good thing to align the
+struct to the right boundary. Here's an example::
+
+	const struct {
+		struct in6_addr saddr;
+		u32 counter;
+		u16 dport;
+	} __aligned(SIPHASH_ALIGNMENT) combined = {
+		.saddr = *(struct in6_addr *)saddr,
+		.counter = counter,
+		.dport = dport
+	};
+	u64 h = siphash(&combined, offsetofend(typeof(combined), dport), &secret);
+
+Resources
+=========
+
+Read the SipHash paper if you're interested in learning more:
+https://131002.net/siphash/siphash.pdf
+
+-------------------------------------------------------------------------------
+
+===============================================
+HalfSipHash - SipHash's insecure younger cousin
+===============================================
+
+:Author: Written by Jason A. Donenfeld <jason@zx2c4.com>
+
+On the off-chance that SipHash is not fast enough for your needs, you might be
+able to justify using HalfSipHash, a terrifying but potentially useful
+possibility. HalfSipHash cuts SipHash's rounds down from "2-4" to "1-3" and,
+even scarier, uses an easily brute-forcable 64-bit key (with a 32-bit output)
+instead of SipHash's 128-bit key. However, this may appeal to some
+high-performance `jhash` users.
+
+Danger!
+
+Do not ever use HalfSipHash except for as a hashtable key function, and only
+then when you can be absolutely certain that the outputs will never be
+transmitted out of the kernel. This is only remotely useful over `jhash` as a
+means of mitigating hashtable flooding denial of service attacks.
+
+Generating a key
+================
+
+Keys should always be generated from a cryptographically secure source of
+random numbers, either using get_random_bytes or get_random_once:
+
+hsiphash_key_t key;
+get_random_bytes(&key, sizeof(key));
+
+If you're not deriving your key from here, you're doing it wrong.
+
+Using the functions
+===================
+
+There are two variants of the function, one that takes a list of integers, and
+one that takes a buffer::
+
+	u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key);
+
+And::
+
+	u32 hsiphash_1u32(u32, const hsiphash_key_t *key);
+	u32 hsiphash_2u32(u32, u32, const hsiphash_key_t *key);
+	u32 hsiphash_3u32(u32, u32, u32, const hsiphash_key_t *key);
+	u32 hsiphash_4u32(u32, u32, u32, u32, const hsiphash_key_t *key);
+
+If you pass the generic hsiphash function something of a constant length, it
+will constant fold at compile-time and automatically choose one of the
+optimized functions.
+
+Hashtable key function usage
+============================
+
+::
+
+	struct some_hashtable {
+		DECLARE_HASHTABLE(hashtable, 8);
+		hsiphash_key_t key;
+	};
+
+	void init_hashtable(struct some_hashtable *table)
+	{
+		get_random_bytes(&table->key, sizeof(table->key));
+	}
+
+	static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
+	{
+		return &table->hashtable[hsiphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
+	}
+
+You may then iterate like usual over the returned hash bucket.
+
+Performance
+===========
+
+HalfSipHash is roughly 3 times slower than JenkinsHash. For many replacements,
+this will not be a problem, as the hashtable lookup isn't the bottleneck. And
+in general, this is probably a good sacrifice to make for the security and DoS
+resistance of HalfSipHash.
diff --git a/Documentation/security/tpm/index.rst b/Documentation/security/tpm/index.rst
index af77a7bbb070..3296533e54cf 100644
--- a/Documentation/security/tpm/index.rst
+++ b/Documentation/security/tpm/index.rst
@@ -5,3 +5,4 @@ Trusted Platform Module documentation
 .. toctree::
 
    tpm_vtpm_proxy
+   xen-tpmfront
diff --git a/Documentation/security/tpm/xen-tpmfront.rst b/Documentation/security/tpm/xen-tpmfront.rst
index 98a16ab87360..00d5b1db227d 100644
--- a/Documentation/security/tpm/xen-tpmfront.rst
+++ b/Documentation/security/tpm/xen-tpmfront.rst
@@ -1,5 +1,3 @@
-:orphan:
-
 ﻿=============================
 Virtual TPM interface for Xen
 =============================
diff --git a/Documentation/siphash.txt b/Documentation/siphash.txt
deleted file mode 100644
index 9965821ab333..000000000000
--- a/Documentation/siphash.txt
+++ /dev/null
@@ -1,189 +0,0 @@
-===========================
-SipHash - a short input PRF
-===========================
-
-:Author: Written by Jason A. Donenfeld <jason@zx2c4.com>
-
-SipHash is a cryptographically secure PRF -- a keyed hash function -- that
-performs very well for short inputs, hence the name. It was designed by
-cryptographers Daniel J. Bernstein and Jean-Philippe Aumasson. It is intended
-as a replacement for some uses of: `jhash`, `md5_transform`, `sha_transform`,
-and so forth.
-
-SipHash takes a secret key filled with randomly generated numbers and either
-an input buffer or several input integers. It spits out an integer that is
-indistinguishable from random. You may then use that integer as part of secure
-sequence numbers, secure cookies, or mask it off for use in a hash table.
-
-Generating a key
-================
-
-Keys should always be generated from a cryptographically secure source of
-random numbers, either using get_random_bytes or get_random_once::
-
-	siphash_key_t key;
-	get_random_bytes(&key, sizeof(key));
-
-If you're not deriving your key from here, you're doing it wrong.
-
-Using the functions
-===================
-
-There are two variants of the function, one that takes a list of integers, and
-one that takes a buffer::
-
-	u64 siphash(const void *data, size_t len, const siphash_key_t *key);
-
-And::
-
-	u64 siphash_1u64(u64, const siphash_key_t *key);
-	u64 siphash_2u64(u64, u64, const siphash_key_t *key);
-	u64 siphash_3u64(u64, u64, u64, const siphash_key_t *key);
-	u64 siphash_4u64(u64, u64, u64, u64, const siphash_key_t *key);
-	u64 siphash_1u32(u32, const siphash_key_t *key);
-	u64 siphash_2u32(u32, u32, const siphash_key_t *key);
-	u64 siphash_3u32(u32, u32, u32, const siphash_key_t *key);
-	u64 siphash_4u32(u32, u32, u32, u32, const siphash_key_t *key);
-
-If you pass the generic siphash function something of a constant length, it
-will constant fold at compile-time and automatically choose one of the
-optimized functions.
-
-Hashtable key function usage::
-
-	struct some_hashtable {
-		DECLARE_HASHTABLE(hashtable, 8);
-		siphash_key_t key;
-	};
-
-	void init_hashtable(struct some_hashtable *table)
-	{
-		get_random_bytes(&table->key, sizeof(table->key));
-	}
-
-	static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
-	{
-		return &table->hashtable[siphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
-	}
-
-You may then iterate like usual over the returned hash bucket.
-
-Security
-========
-
-SipHash has a very high security margin, with its 128-bit key. So long as the
-key is kept secret, it is impossible for an attacker to guess the outputs of
-the function, even if being able to observe many outputs, since 2^128 outputs
-is significant.
-
-Linux implements the "2-4" variant of SipHash.
-
-Struct-passing Pitfalls
-=======================
-
-Often times the XuY functions will not be large enough, and instead you'll
-want to pass a pre-filled struct to siphash. When doing this, it's important
-to always ensure the struct has no padding holes. The easiest way to do this
-is to simply arrange the members of the struct in descending order of size,
-and to use offsetendof() instead of sizeof() for getting the size. For
-performance reasons, if possible, it's probably a good thing to align the
-struct to the right boundary. Here's an example::
-
-	const struct {
-		struct in6_addr saddr;
-		u32 counter;
-		u16 dport;
-	} __aligned(SIPHASH_ALIGNMENT) combined = {
-		.saddr = *(struct in6_addr *)saddr,
-		.counter = counter,
-		.dport = dport
-	};
-	u64 h = siphash(&combined, offsetofend(typeof(combined), dport), &secret);
-
-Resources
-=========
-
-Read the SipHash paper if you're interested in learning more:
-https://131002.net/siphash/siphash.pdf
-
--------------------------------------------------------------------------------
-
-===============================================
-HalfSipHash - SipHash's insecure younger cousin
-===============================================
-
-:Author: Written by Jason A. Donenfeld <jason@zx2c4.com>
-
-On the off-chance that SipHash is not fast enough for your needs, you might be
-able to justify using HalfSipHash, a terrifying but potentially useful
-possibility. HalfSipHash cuts SipHash's rounds down from "2-4" to "1-3" and,
-even scarier, uses an easily brute-forcable 64-bit key (with a 32-bit output)
-instead of SipHash's 128-bit key. However, this may appeal to some
-high-performance `jhash` users.
-
-Danger!
-
-Do not ever use HalfSipHash except for as a hashtable key function, and only
-then when you can be absolutely certain that the outputs will never be
-transmitted out of the kernel. This is only remotely useful over `jhash` as a
-means of mitigating hashtable flooding denial of service attacks.
-
-Generating a key
-================
-
-Keys should always be generated from a cryptographically secure source of
-random numbers, either using get_random_bytes or get_random_once:
-
-hsiphash_key_t key;
-get_random_bytes(&key, sizeof(key));
-
-If you're not deriving your key from here, you're doing it wrong.
-
-Using the functions
-===================
-
-There are two variants of the function, one that takes a list of integers, and
-one that takes a buffer::
-
-	u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key);
-
-And::
-
-	u32 hsiphash_1u32(u32, const hsiphash_key_t *key);
-	u32 hsiphash_2u32(u32, u32, const hsiphash_key_t *key);
-	u32 hsiphash_3u32(u32, u32, u32, const hsiphash_key_t *key);
-	u32 hsiphash_4u32(u32, u32, u32, u32, const hsiphash_key_t *key);
-
-If you pass the generic hsiphash function something of a constant length, it
-will constant fold at compile-time and automatically choose one of the
-optimized functions.
-
-Hashtable key function usage
-============================
-
-::
-
-	struct some_hashtable {
-		DECLARE_HASHTABLE(hashtable, 8);
-		hsiphash_key_t key;
-	};
-
-	void init_hashtable(struct some_hashtable *table)
-	{
-		get_random_bytes(&table->key, sizeof(table->key));
-	}
-
-	static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
-	{
-		return &table->hashtable[hsiphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
-	}
-
-You may then iterate like usual over the returned hash bucket.
-
-Performance
-===========
-
-HalfSipHash is roughly 3 times slower than JenkinsHash. For many replacements,
-this will not be a problem, as the hashtable lookup isn't the bottleneck. And
-in general, this is probably a good sacrifice to make for the security and DoS
-resistance of HalfSipHash.
-- 
cgit 


From e8d776f20f92b9c679bcdcbdf3aee5026d5265f5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sat, 20 Apr 2019 09:20:52 -0300
Subject: docs: x86: move two x86-specific files to x86 arch dir

Those two docs belong to the x86 architecture:

   Documentation/Intel-IOMMU.txt -> Documentation/x86/intel-iommu.rst
   Documentation/intel_txt.txt -> Documentation/x86/intel_txt.rst

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/Intel-IOMMU.txt     | 114 -------------------
 Documentation/intel_txt.txt       | 227 --------------------------------------
 Documentation/x86/index.rst       |   2 +
 Documentation/x86/intel-iommu.rst | 114 +++++++++++++++++++
 Documentation/x86/intel_txt.rst   | 227 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 343 insertions(+), 341 deletions(-)
 delete mode 100644 Documentation/Intel-IOMMU.txt
 delete mode 100644 Documentation/intel_txt.txt
 create mode 100644 Documentation/x86/intel-iommu.rst
 create mode 100644 Documentation/x86/intel_txt.rst

(limited to 'Documentation')

diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt
deleted file mode 100644
index 9dae6b47e398..000000000000
--- a/Documentation/Intel-IOMMU.txt
+++ /dev/null
@@ -1,114 +0,0 @@
-===================
-Linux IOMMU Support
-===================
-
-The architecture spec can be obtained from the below location.
-
-http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
-
-This guide gives a quick cheat sheet for some basic understanding.
-
-Some Keywords
-
-- DMAR - DMA remapping
-- DRHD - DMA Remapping Hardware Unit Definition
-- RMRR - Reserved memory Region Reporting Structure
-- ZLR  - Zero length reads from PCI devices
-- IOVA - IO Virtual address.
-
-Basic stuff
------------
-
-ACPI enumerates and lists the different DMA engines in the platform, and
-device scope relationships between PCI devices and which DMA engine  controls
-them.
-
-What is RMRR?
--------------
-
-There are some devices the BIOS controls, for e.g USB devices to perform
-PS2 emulation. The regions of memory used for these devices are marked
-reserved in the e820 map. When we turn on DMA translation, DMA to those
-regions will fail. Hence BIOS uses RMRR to specify these regions along with
-devices that need to access these regions. OS is expected to setup
-unity mappings for these regions for these devices to access these regions.
-
-How is IOVA generated?
-----------------------
-
-Well behaved drivers call pci_map_*() calls before sending command to device
-that needs to perform DMA. Once DMA is completed and mapping is no longer
-required, device performs a pci_unmap_*() calls to unmap the region.
-
-The Intel IOMMU driver allocates a virtual address per domain. Each PCIE
-device has its own domain (hence protection). Devices under p2p bridges
-share the virtual address with all devices under the p2p bridge due to
-transaction id aliasing for p2p bridges.
-
-IOVA generation is pretty generic. We used the same technique as vmalloc()
-but these are not global address spaces, but separate for each domain.
-Different DMA engines may support different number of domains.
-
-We also allocate guard pages with each mapping, so we can attempt to catch
-any overflow that might happen.
-
-
-Graphics Problems?
-------------------
-If you encounter issues with graphics devices, you can try adding
-option intel_iommu=igfx_off to turn off the integrated graphics engine.
-If this fixes anything, please ensure you file a bug reporting the problem.
-
-Some exceptions to IOVA
------------------------
-Interrupt ranges are not address translated, (0xfee00000 - 0xfeefffff).
-The same is true for peer to peer transactions. Hence we reserve the
-address from PCI MMIO ranges so they are not allocated for IOVA addresses.
-
-
-Fault reporting
----------------
-When errors are reported, the DMA engine signals via an interrupt. The fault
-reason and device that caused it with fault reason is printed on console.
-
-See below for sample.
-
-
-Boot Message Sample
--------------------
-
-Something like this gets printed indicating presence of DMAR tables
-in ACPI.
-
-ACPI: DMAR (v001 A M I  OEMDMAR  0x00000001 MSFT 0x00000097) @ 0x000000007f5b5ef0
-
-When DMAR is being processed and initialized by ACPI, prints DMAR locations
-and any RMRR's processed::
-
-	ACPI DMAR:Host address width 36
-	ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed90000
-	ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed91000
-	ACPI DMAR:DRHD (flags: 0x00000001)base: 0x00000000fed93000
-	ACPI DMAR:RMRR base: 0x00000000000ed000 end: 0x00000000000effff
-	ACPI DMAR:RMRR base: 0x000000007f600000 end: 0x000000007fffffff
-
-When DMAR is enabled for use, you will notice..
-
-PCI-DMA: Using DMAR IOMMU
-
-Fault reporting
----------------
-
-::
-
-	DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
-	DMAR:[fault reason 05] PTE Write access is not set
-	DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
-	DMAR:[fault reason 05] PTE Write access is not set
-
-TBD
-----
-
-- For compatibility testing, could use unity map domain for all devices, just
-  provide a 1-1 for all useful memory under a single domain for all devices.
-- API for paravirt ops for abstracting functionality for VMM folks.
diff --git a/Documentation/intel_txt.txt b/Documentation/intel_txt.txt
deleted file mode 100644
index d83c1a2122c9..000000000000
--- a/Documentation/intel_txt.txt
+++ /dev/null
@@ -1,227 +0,0 @@
-=====================
-Intel(R) TXT Overview
-=====================
-
-Intel's technology for safer computing, Intel(R) Trusted Execution
-Technology (Intel(R) TXT), defines platform-level enhancements that
-provide the building blocks for creating trusted platforms.
-
-Intel TXT was formerly known by the code name LaGrande Technology (LT).
-
-Intel TXT in Brief:
-
--  Provides dynamic root of trust for measurement (DRTM)
--  Data protection in case of improper shutdown
--  Measurement and verification of launched environment
-
-Intel TXT is part of the vPro(TM) brand and is also available some
-non-vPro systems.  It is currently available on desktop systems
-based on the Q35, X38, Q45, and Q43 Express chipsets (e.g. Dell
-Optiplex 755, HP dc7800, etc.) and mobile systems based on the GM45,
-PM45, and GS45 Express chipsets.
-
-For more information, see http://www.intel.com/technology/security/.
-This site also has a link to the Intel TXT MLE Developers Manual,
-which has been updated for the new released platforms.
-
-Intel TXT has been presented at various events over the past few
-years, some of which are:
-
-      - LinuxTAG 2008:
-          http://www.linuxtag.org/2008/en/conf/events/vp-donnerstag.html
-
-      - TRUST2008:
-          http://www.trust-conference.eu/downloads/Keynote-Speakers/
-          3_David-Grawrock_The-Front-Door-of-Trusted-Computing.pdf
-
-      - IDF, Shanghai:
-          http://www.prcidf.com.cn/index_en.html
-
-      - IDFs 2006, 2007
-	  (I'm not sure if/where they are online)
-
-Trusted Boot Project Overview
-=============================
-
-Trusted Boot (tboot) is an open source, pre-kernel/VMM module that
-uses Intel TXT to perform a measured and verified launch of an OS
-kernel/VMM.
-
-It is hosted on SourceForge at http://sourceforge.net/projects/tboot.
-The mercurial source repo is available at http://www.bughost.org/
-repos.hg/tboot.hg.
-
-Tboot currently supports launching Xen (open source VMM/hypervisor
-w/ TXT support since v3.2), and now Linux kernels.
-
-
-Value Proposition for Linux or "Why should you care?"
-=====================================================
-
-While there are many products and technologies that attempt to
-measure or protect the integrity of a running kernel, they all
-assume the kernel is "good" to begin with.  The Integrity
-Measurement Architecture (IMA) and Linux Integrity Module interface
-are examples of such solutions.
-
-To get trust in the initial kernel without using Intel TXT, a
-static root of trust must be used.  This bases trust in BIOS
-starting at system reset and requires measurement of all code
-executed between system reset through the completion of the kernel
-boot as well as data objects used by that code.  In the case of a
-Linux kernel, this means all of BIOS, any option ROMs, the
-bootloader and the boot config.  In practice, this is a lot of
-code/data, much of which is subject to change from boot to boot
-(e.g. changing NICs may change option ROMs).  Without reference
-hashes, these measurement changes are difficult to assess or
-confirm as benign.  This process also does not provide DMA
-protection, memory configuration/alias checks and locks, crash
-protection, or policy support.
-
-By using the hardware-based root of trust that Intel TXT provides,
-many of these issues can be mitigated.  Specifically: many
-pre-launch components can be removed from the trust chain, DMA
-protection is provided to all launched components, a large number
-of platform configuration checks are performed and values locked,
-protection is provided for any data in the event of an improper
-shutdown, and there is support for policy-based execution/verification.
-This provides a more stable measurement and a higher assurance of
-system configuration and initial state than would be otherwise
-possible.  Since the tboot project is open source, source code for
-almost all parts of the trust chain is available (excepting SMM and
-Intel-provided firmware).
-
-How Does it Work?
-=================
-
--  Tboot is an executable that is launched by the bootloader as
-   the "kernel" (the binary the bootloader executes).
--  It performs all of the work necessary to determine if the
-   platform supports Intel TXT and, if so, executes the GETSEC[SENTER]
-   processor instruction that initiates the dynamic root of trust.
-
-   -  If tboot determines that the system does not support Intel TXT
-      or is not configured correctly (e.g. the SINIT AC Module was
-      incorrect), it will directly launch the kernel with no changes
-      to any state.
-   -  Tboot will output various information about its progress to the
-      terminal, serial port, and/or an in-memory log; the output
-      locations can be configured with a command line switch.
-
--  The GETSEC[SENTER] instruction will return control to tboot and
-   tboot then verifies certain aspects of the environment (e.g. TPM NV
-   lock, e820 table does not have invalid entries, etc.).
--  It will wake the APs from the special sleep state the GETSEC[SENTER]
-   instruction had put them in and place them into a wait-for-SIPI
-   state.
-
-   -  Because the processors will not respond to an INIT or SIPI when
-      in the TXT environment, it is necessary to create a small VT-x
-      guest for the APs.  When they run in this guest, they will
-      simply wait for the INIT-SIPI-SIPI sequence, which will cause
-      VMEXITs, and then disable VT and jump to the SIPI vector.  This
-      approach seemed like a better choice than having to insert
-      special code into the kernel's MP wakeup sequence.
-
--  Tboot then applies an (optional) user-defined launch policy to
-   verify the kernel and initrd.
-
-   -  This policy is rooted in TPM NV and is described in the tboot
-      project.  The tboot project also contains code for tools to
-      create and provision the policy.
-   -  Policies are completely under user control and if not present
-      then any kernel will be launched.
-   -  Policy action is flexible and can include halting on failures
-      or simply logging them and continuing.
-
--  Tboot adjusts the e820 table provided by the bootloader to reserve
-   its own location in memory as well as to reserve certain other
-   TXT-related regions.
--  As part of its launch, tboot DMA protects all of RAM (using the
-   VT-d PMRs).  Thus, the kernel must be booted with 'intel_iommu=on'
-   in order to remove this blanket protection and use VT-d's
-   page-level protection.
--  Tboot will populate a shared page with some data about itself and
-   pass this to the Linux kernel as it transfers control.
-
-   -  The location of the shared page is passed via the boot_params
-      struct as a physical address.
-
--  The kernel will look for the tboot shared page address and, if it
-   exists, map it.
--  As one of the checks/protections provided by TXT, it makes a copy
-   of the VT-d DMARs in a DMA-protected region of memory and verifies
-   them for correctness.  The VT-d code will detect if the kernel was
-   launched with tboot and use this copy instead of the one in the
-   ACPI table.
--  At this point, tboot and TXT are out of the picture until a
-   shutdown (S<n>)
--  In order to put a system into any of the sleep states after a TXT
-   launch, TXT must first be exited.  This is to prevent attacks that
-   attempt to crash the system to gain control on reboot and steal
-   data left in memory.
-
-   -  The kernel will perform all of its sleep preparation and
-      populate the shared page with the ACPI data needed to put the
-      platform in the desired sleep state.
-   -  Then the kernel jumps into tboot via the vector specified in the
-      shared page.
-   -  Tboot will clean up the environment and disable TXT, then use the
-      kernel-provided ACPI information to actually place the platform
-      into the desired sleep state.
-   -  In the case of S3, tboot will also register itself as the resume
-      vector.  This is necessary because it must re-establish the
-      measured environment upon resume.  Once the TXT environment
-      has been restored, it will restore the TPM PCRs and then
-      transfer control back to the kernel's S3 resume vector.
-      In order to preserve system integrity across S3, the kernel
-      provides tboot with a set of memory ranges (RAM and RESERVED_KERN
-      in the e820 table, but not any memory that BIOS might alter over
-      the S3 transition) that tboot will calculate a MAC (message
-      authentication code) over and then seal with the TPM. On resume
-      and once the measured environment has been re-established, tboot
-      will re-calculate the MAC and verify it against the sealed value.
-      Tboot's policy determines what happens if the verification fails.
-      Note that the c/s 194 of tboot which has the new MAC code supports
-      this.
-
-That's pretty much it for TXT support.
-
-
-Configuring the System
-======================
-
-This code works with 32bit, 32bit PAE, and 64bit (x86_64) kernels.
-
-In BIOS, the user must enable:  TPM, TXT, VT-x, VT-d.  Not all BIOSes
-allow these to be individually enabled/disabled and the screens in
-which to find them are BIOS-specific.
-
-grub.conf needs to be modified as follows::
-
-        title Linux 2.6.29-tip w/ tboot
-          root (hd0,0)
-                kernel /tboot.gz logging=serial,vga,memory
-                module /vmlinuz-2.6.29-tip intel_iommu=on ro
-                       root=LABEL=/ rhgb console=ttyS0,115200 3
-                module /initrd-2.6.29-tip.img
-                module /Q35_SINIT_17.BIN
-
-The kernel option for enabling Intel TXT support is found under the
-Security top-level menu and is called "Enable Intel(R) Trusted
-Execution Technology (TXT)".  It is considered EXPERIMENTAL and
-depends on the generic x86 support (to allow maximum flexibility in
-kernel build options), since the tboot code will detect whether the
-platform actually supports Intel TXT and thus whether any of the
-kernel code is executed.
-
-The Q35_SINIT_17.BIN file is what Intel TXT refers to as an
-Authenticated Code Module.  It is specific to the chipset in the
-system and can also be found on the Trusted Boot site.  It is an
-(unencrypted) module signed by Intel that is used as part of the
-DRTM process to verify and configure the system.  It is signed
-because it operates at a higher privilege level in the system than
-any other macrocode and its correct operation is critical to the
-establishment of the DRTM.  The process for determining the correct
-SINIT ACM for a system is documented in the SINIT-guide.txt file
-that is on the tboot SourceForge site under the SINIT ACM downloads.
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index f2de1b2d3ac7..af64c4bb4447 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -20,6 +20,8 @@ x86-specific Documentation
    mtrr
    pat
    intel_mpx
+   intel-iommu
+   intel_txt
    amd-memory-encryption
    pti
    mds
diff --git a/Documentation/x86/intel-iommu.rst b/Documentation/x86/intel-iommu.rst
new file mode 100644
index 000000000000..9dae6b47e398
--- /dev/null
+++ b/Documentation/x86/intel-iommu.rst
@@ -0,0 +1,114 @@
+===================
+Linux IOMMU Support
+===================
+
+The architecture spec can be obtained from the below location.
+
+http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
+
+This guide gives a quick cheat sheet for some basic understanding.
+
+Some Keywords
+
+- DMAR - DMA remapping
+- DRHD - DMA Remapping Hardware Unit Definition
+- RMRR - Reserved memory Region Reporting Structure
+- ZLR  - Zero length reads from PCI devices
+- IOVA - IO Virtual address.
+
+Basic stuff
+-----------
+
+ACPI enumerates and lists the different DMA engines in the platform, and
+device scope relationships between PCI devices and which DMA engine  controls
+them.
+
+What is RMRR?
+-------------
+
+There are some devices the BIOS controls, for e.g USB devices to perform
+PS2 emulation. The regions of memory used for these devices are marked
+reserved in the e820 map. When we turn on DMA translation, DMA to those
+regions will fail. Hence BIOS uses RMRR to specify these regions along with
+devices that need to access these regions. OS is expected to setup
+unity mappings for these regions for these devices to access these regions.
+
+How is IOVA generated?
+----------------------
+
+Well behaved drivers call pci_map_*() calls before sending command to device
+that needs to perform DMA. Once DMA is completed and mapping is no longer
+required, device performs a pci_unmap_*() calls to unmap the region.
+
+The Intel IOMMU driver allocates a virtual address per domain. Each PCIE
+device has its own domain (hence protection). Devices under p2p bridges
+share the virtual address with all devices under the p2p bridge due to
+transaction id aliasing for p2p bridges.
+
+IOVA generation is pretty generic. We used the same technique as vmalloc()
+but these are not global address spaces, but separate for each domain.
+Different DMA engines may support different number of domains.
+
+We also allocate guard pages with each mapping, so we can attempt to catch
+any overflow that might happen.
+
+
+Graphics Problems?
+------------------
+If you encounter issues with graphics devices, you can try adding
+option intel_iommu=igfx_off to turn off the integrated graphics engine.
+If this fixes anything, please ensure you file a bug reporting the problem.
+
+Some exceptions to IOVA
+-----------------------
+Interrupt ranges are not address translated, (0xfee00000 - 0xfeefffff).
+The same is true for peer to peer transactions. Hence we reserve the
+address from PCI MMIO ranges so they are not allocated for IOVA addresses.
+
+
+Fault reporting
+---------------
+When errors are reported, the DMA engine signals via an interrupt. The fault
+reason and device that caused it with fault reason is printed on console.
+
+See below for sample.
+
+
+Boot Message Sample
+-------------------
+
+Something like this gets printed indicating presence of DMAR tables
+in ACPI.
+
+ACPI: DMAR (v001 A M I  OEMDMAR  0x00000001 MSFT 0x00000097) @ 0x000000007f5b5ef0
+
+When DMAR is being processed and initialized by ACPI, prints DMAR locations
+and any RMRR's processed::
+
+	ACPI DMAR:Host address width 36
+	ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed90000
+	ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed91000
+	ACPI DMAR:DRHD (flags: 0x00000001)base: 0x00000000fed93000
+	ACPI DMAR:RMRR base: 0x00000000000ed000 end: 0x00000000000effff
+	ACPI DMAR:RMRR base: 0x000000007f600000 end: 0x000000007fffffff
+
+When DMAR is enabled for use, you will notice..
+
+PCI-DMA: Using DMAR IOMMU
+
+Fault reporting
+---------------
+
+::
+
+	DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
+	DMAR:[fault reason 05] PTE Write access is not set
+	DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
+	DMAR:[fault reason 05] PTE Write access is not set
+
+TBD
+----
+
+- For compatibility testing, could use unity map domain for all devices, just
+  provide a 1-1 for all useful memory under a single domain for all devices.
+- API for paravirt ops for abstracting functionality for VMM folks.
diff --git a/Documentation/x86/intel_txt.rst b/Documentation/x86/intel_txt.rst
new file mode 100644
index 000000000000..d83c1a2122c9
--- /dev/null
+++ b/Documentation/x86/intel_txt.rst
@@ -0,0 +1,227 @@
+=====================
+Intel(R) TXT Overview
+=====================
+
+Intel's technology for safer computing, Intel(R) Trusted Execution
+Technology (Intel(R) TXT), defines platform-level enhancements that
+provide the building blocks for creating trusted platforms.
+
+Intel TXT was formerly known by the code name LaGrande Technology (LT).
+
+Intel TXT in Brief:
+
+-  Provides dynamic root of trust for measurement (DRTM)
+-  Data protection in case of improper shutdown
+-  Measurement and verification of launched environment
+
+Intel TXT is part of the vPro(TM) brand and is also available some
+non-vPro systems.  It is currently available on desktop systems
+based on the Q35, X38, Q45, and Q43 Express chipsets (e.g. Dell
+Optiplex 755, HP dc7800, etc.) and mobile systems based on the GM45,
+PM45, and GS45 Express chipsets.
+
+For more information, see http://www.intel.com/technology/security/.
+This site also has a link to the Intel TXT MLE Developers Manual,
+which has been updated for the new released platforms.
+
+Intel TXT has been presented at various events over the past few
+years, some of which are:
+
+      - LinuxTAG 2008:
+          http://www.linuxtag.org/2008/en/conf/events/vp-donnerstag.html
+
+      - TRUST2008:
+          http://www.trust-conference.eu/downloads/Keynote-Speakers/
+          3_David-Grawrock_The-Front-Door-of-Trusted-Computing.pdf
+
+      - IDF, Shanghai:
+          http://www.prcidf.com.cn/index_en.html
+
+      - IDFs 2006, 2007
+	  (I'm not sure if/where they are online)
+
+Trusted Boot Project Overview
+=============================
+
+Trusted Boot (tboot) is an open source, pre-kernel/VMM module that
+uses Intel TXT to perform a measured and verified launch of an OS
+kernel/VMM.
+
+It is hosted on SourceForge at http://sourceforge.net/projects/tboot.
+The mercurial source repo is available at http://www.bughost.org/
+repos.hg/tboot.hg.
+
+Tboot currently supports launching Xen (open source VMM/hypervisor
+w/ TXT support since v3.2), and now Linux kernels.
+
+
+Value Proposition for Linux or "Why should you care?"
+=====================================================
+
+While there are many products and technologies that attempt to
+measure or protect the integrity of a running kernel, they all
+assume the kernel is "good" to begin with.  The Integrity
+Measurement Architecture (IMA) and Linux Integrity Module interface
+are examples of such solutions.
+
+To get trust in the initial kernel without using Intel TXT, a
+static root of trust must be used.  This bases trust in BIOS
+starting at system reset and requires measurement of all code
+executed between system reset through the completion of the kernel
+boot as well as data objects used by that code.  In the case of a
+Linux kernel, this means all of BIOS, any option ROMs, the
+bootloader and the boot config.  In practice, this is a lot of
+code/data, much of which is subject to change from boot to boot
+(e.g. changing NICs may change option ROMs).  Without reference
+hashes, these measurement changes are difficult to assess or
+confirm as benign.  This process also does not provide DMA
+protection, memory configuration/alias checks and locks, crash
+protection, or policy support.
+
+By using the hardware-based root of trust that Intel TXT provides,
+many of these issues can be mitigated.  Specifically: many
+pre-launch components can be removed from the trust chain, DMA
+protection is provided to all launched components, a large number
+of platform configuration checks are performed and values locked,
+protection is provided for any data in the event of an improper
+shutdown, and there is support for policy-based execution/verification.
+This provides a more stable measurement and a higher assurance of
+system configuration and initial state than would be otherwise
+possible.  Since the tboot project is open source, source code for
+almost all parts of the trust chain is available (excepting SMM and
+Intel-provided firmware).
+
+How Does it Work?
+=================
+
+-  Tboot is an executable that is launched by the bootloader as
+   the "kernel" (the binary the bootloader executes).
+-  It performs all of the work necessary to determine if the
+   platform supports Intel TXT and, if so, executes the GETSEC[SENTER]
+   processor instruction that initiates the dynamic root of trust.
+
+   -  If tboot determines that the system does not support Intel TXT
+      or is not configured correctly (e.g. the SINIT AC Module was
+      incorrect), it will directly launch the kernel with no changes
+      to any state.
+   -  Tboot will output various information about its progress to the
+      terminal, serial port, and/or an in-memory log; the output
+      locations can be configured with a command line switch.
+
+-  The GETSEC[SENTER] instruction will return control to tboot and
+   tboot then verifies certain aspects of the environment (e.g. TPM NV
+   lock, e820 table does not have invalid entries, etc.).
+-  It will wake the APs from the special sleep state the GETSEC[SENTER]
+   instruction had put them in and place them into a wait-for-SIPI
+   state.
+
+   -  Because the processors will not respond to an INIT or SIPI when
+      in the TXT environment, it is necessary to create a small VT-x
+      guest for the APs.  When they run in this guest, they will
+      simply wait for the INIT-SIPI-SIPI sequence, which will cause
+      VMEXITs, and then disable VT and jump to the SIPI vector.  This
+      approach seemed like a better choice than having to insert
+      special code into the kernel's MP wakeup sequence.
+
+-  Tboot then applies an (optional) user-defined launch policy to
+   verify the kernel and initrd.
+
+   -  This policy is rooted in TPM NV and is described in the tboot
+      project.  The tboot project also contains code for tools to
+      create and provision the policy.
+   -  Policies are completely under user control and if not present
+      then any kernel will be launched.
+   -  Policy action is flexible and can include halting on failures
+      or simply logging them and continuing.
+
+-  Tboot adjusts the e820 table provided by the bootloader to reserve
+   its own location in memory as well as to reserve certain other
+   TXT-related regions.
+-  As part of its launch, tboot DMA protects all of RAM (using the
+   VT-d PMRs).  Thus, the kernel must be booted with 'intel_iommu=on'
+   in order to remove this blanket protection and use VT-d's
+   page-level protection.
+-  Tboot will populate a shared page with some data about itself and
+   pass this to the Linux kernel as it transfers control.
+
+   -  The location of the shared page is passed via the boot_params
+      struct as a physical address.
+
+-  The kernel will look for the tboot shared page address and, if it
+   exists, map it.
+-  As one of the checks/protections provided by TXT, it makes a copy
+   of the VT-d DMARs in a DMA-protected region of memory and verifies
+   them for correctness.  The VT-d code will detect if the kernel was
+   launched with tboot and use this copy instead of the one in the
+   ACPI table.
+-  At this point, tboot and TXT are out of the picture until a
+   shutdown (S<n>)
+-  In order to put a system into any of the sleep states after a TXT
+   launch, TXT must first be exited.  This is to prevent attacks that
+   attempt to crash the system to gain control on reboot and steal
+   data left in memory.
+
+   -  The kernel will perform all of its sleep preparation and
+      populate the shared page with the ACPI data needed to put the
+      platform in the desired sleep state.
+   -  Then the kernel jumps into tboot via the vector specified in the
+      shared page.
+   -  Tboot will clean up the environment and disable TXT, then use the
+      kernel-provided ACPI information to actually place the platform
+      into the desired sleep state.
+   -  In the case of S3, tboot will also register itself as the resume
+      vector.  This is necessary because it must re-establish the
+      measured environment upon resume.  Once the TXT environment
+      has been restored, it will restore the TPM PCRs and then
+      transfer control back to the kernel's S3 resume vector.
+      In order to preserve system integrity across S3, the kernel
+      provides tboot with a set of memory ranges (RAM and RESERVED_KERN
+      in the e820 table, but not any memory that BIOS might alter over
+      the S3 transition) that tboot will calculate a MAC (message
+      authentication code) over and then seal with the TPM. On resume
+      and once the measured environment has been re-established, tboot
+      will re-calculate the MAC and verify it against the sealed value.
+      Tboot's policy determines what happens if the verification fails.
+      Note that the c/s 194 of tboot which has the new MAC code supports
+      this.
+
+That's pretty much it for TXT support.
+
+
+Configuring the System
+======================
+
+This code works with 32bit, 32bit PAE, and 64bit (x86_64) kernels.
+
+In BIOS, the user must enable:  TPM, TXT, VT-x, VT-d.  Not all BIOSes
+allow these to be individually enabled/disabled and the screens in
+which to find them are BIOS-specific.
+
+grub.conf needs to be modified as follows::
+
+        title Linux 2.6.29-tip w/ tboot
+          root (hd0,0)
+                kernel /tboot.gz logging=serial,vga,memory
+                module /vmlinuz-2.6.29-tip intel_iommu=on ro
+                       root=LABEL=/ rhgb console=ttyS0,115200 3
+                module /initrd-2.6.29-tip.img
+                module /Q35_SINIT_17.BIN
+
+The kernel option for enabling Intel TXT support is found under the
+Security top-level menu and is called "Enable Intel(R) Trusted
+Execution Technology (TXT)".  It is considered EXPERIMENTAL and
+depends on the generic x86 support (to allow maximum flexibility in
+kernel build options), since the tboot code will detect whether the
+platform actually supports Intel TXT and thus whether any of the
+kernel code is executed.
+
+The Q35_SINIT_17.BIN file is what Intel TXT refers to as an
+Authenticated Code Module.  It is specific to the chipset in the
+system and can also be found on the Trusted Boot site.  It is an
+(unencrypted) module signed by Intel that is used as part of the
+DRTM process to verify and configure the system.  It is signed
+because it operates at a higher privilege level in the system than
+any other macrocode and its correct operation is critical to the
+establishment of the DRTM.  The process for determining the correct
+SINIT ACM for a system is documented in the SINIT-guide.txt file
+that is on the tboot SourceForge site under the SINIT ACM downloads.
-- 
cgit 


From 2dbc0838bcf24ca59cabc3130cf3b1d6809cdcd4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 11:39:21 -0300
Subject: docs: ocxl.rst: add it to the uAPI book

The content of this file is user-faced.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
---
 Documentation/accelerators/ocxl.rst               | 178 ----------------------
 Documentation/userspace-api/accelerators/ocxl.rst | 176 +++++++++++++++++++++
 Documentation/userspace-api/index.rst             |   1 +
 3 files changed, 177 insertions(+), 178 deletions(-)
 delete mode 100644 Documentation/accelerators/ocxl.rst
 create mode 100644 Documentation/userspace-api/accelerators/ocxl.rst

(limited to 'Documentation')

diff --git a/Documentation/accelerators/ocxl.rst b/Documentation/accelerators/ocxl.rst
deleted file mode 100644
index b1cea19a90f5..000000000000
--- a/Documentation/accelerators/ocxl.rst
+++ /dev/null
@@ -1,178 +0,0 @@
-:orphan:
-
-========================================================
-OpenCAPI (Open Coherent Accelerator Processor Interface)
-========================================================
-
-OpenCAPI is an interface between processors and accelerators. It aims
-at being low-latency and high-bandwidth. The specification is
-developed by the `OpenCAPI Consortium <http://opencapi.org/>`_.
-
-It allows an accelerator (which could be a FPGA, ASICs, ...) to access
-the host memory coherently, using virtual addresses. An OpenCAPI
-device can also host its own memory, that can be accessed from the
-host.
-
-OpenCAPI is known in linux as 'ocxl', as the open, processor-agnostic
-evolution of 'cxl' (the driver for the IBM CAPI interface for
-powerpc), which was named that way to avoid confusion with the ISDN
-CAPI subsystem.
-
-
-High-level view
-===============
-
-OpenCAPI defines a Data Link Layer (DL) and Transaction Layer (TL), to
-be implemented on top of a physical link. Any processor or device
-implementing the DL and TL can start sharing memory.
-
-::
-
-  +-----------+                         +-------------+
-  |           |                         |             |
-  |           |                         | Accelerated |
-  | Processor |                         |  Function   |
-  |           |  +--------+             |    Unit     |  +--------+
-  |           |--| Memory |             |    (AFU)    |--| Memory |
-  |           |  +--------+             |             |  +--------+
-  +-----------+                         +-------------+
-       |                                       |
-  +-----------+                         +-------------+
-  |    TL     |                         |    TLX      |
-  +-----------+                         +-------------+
-       |                                       |
-  +-----------+                         +-------------+
-  |    DL     |                         |    DLX      |
-  +-----------+                         +-------------+
-       |                                       |
-       |                   PHY                 |
-       +---------------------------------------+
-
-
-
-Device discovery
-================
-
-OpenCAPI relies on a PCI-like configuration space, implemented on the
-device. So the host can discover AFUs by querying the config space.
-
-OpenCAPI devices in Linux are treated like PCI devices (with a few
-caveats). The firmware is expected to abstract the hardware as if it
-was a PCI link. A lot of the existing PCI infrastructure is reused:
-devices are scanned and BARs are assigned during the standard PCI
-enumeration. Commands like 'lspci' can therefore be used to see what
-devices are available.
-
-The configuration space defines the AFU(s) that can be found on the
-physical adapter, such as its name, how many memory contexts it can
-work with, the size of its MMIO areas, ...
-
-
-
-MMIO
-====
-
-OpenCAPI defines two MMIO areas for each AFU:
-
-* the global MMIO area, with registers pertinent to the whole AFU.
-* a per-process MMIO area, which has a fixed size for each context.
-
-
-
-AFU interrupts
-==============
-
-OpenCAPI includes the possibility for an AFU to send an interrupt to a
-host process. It is done through a 'intrp_req' defined in the
-Transaction Layer, specifying a 64-bit object handle which defines the
-interrupt.
-
-The driver allows a process to allocate an interrupt and obtain its
-64-bit object handle, that can be passed to the AFU.
-
-
-
-char devices
-============
-
-The driver creates one char device per AFU found on the physical
-device. A physical device may have multiple functions and each
-function can have multiple AFUs. At the time of this writing though,
-it has only been tested with devices exporting only one AFU.
-
-Char devices can be found in /dev/ocxl/ and are named as:
-/dev/ocxl/<AFU name>.<location>.<index>
-
-where <AFU name> is a max 20-character long name, as found in the
-config space of the AFU.
-<location> is added by the driver and can help distinguish devices
-when a system has more than one instance of the same OpenCAPI device.
-<index> is also to help distinguish AFUs in the unlikely case where a
-device carries multiple copies of the same AFU.
-
-
-
-Sysfs class
-===========
-
-An ocxl class is added for the devices representing the AFUs. See
-/sys/class/ocxl. The layout is described in
-Documentation/ABI/testing/sysfs-class-ocxl
-
-
-
-User API
-========
-
-open
-----
-
-Based on the AFU definition found in the config space, an AFU may
-support working with more than one memory context, in which case the
-associated char device may be opened multiple times by different
-processes.
-
-
-ioctl
------
-
-OCXL_IOCTL_ATTACH:
-
-  Attach the memory context of the calling process to the AFU so that
-  the AFU can access its memory.
-
-OCXL_IOCTL_IRQ_ALLOC:
-
-  Allocate an AFU interrupt and return an identifier.
-
-OCXL_IOCTL_IRQ_FREE:
-
-  Free a previously allocated AFU interrupt.
-
-OCXL_IOCTL_IRQ_SET_FD:
-
-  Associate an event fd to an AFU interrupt so that the user process
-  can be notified when the AFU sends an interrupt.
-
-OCXL_IOCTL_GET_METADATA:
-
-  Obtains configuration information from the card, such at the size of
-  MMIO areas, the AFU version, and the PASID for the current context.
-
-OCXL_IOCTL_ENABLE_P9_WAIT:
-
-  Allows the AFU to wake a userspace thread executing 'wait'. Returns
-  information to userspace to allow it to configure the AFU. Note that
-  this is only available on POWER9.
-
-OCXL_IOCTL_GET_FEATURES:
-
-  Reports on which CPU features that affect OpenCAPI are usable from
-  userspace.
-
-
-mmap
-----
-
-A process can mmap the per-process MMIO area for interactions with the
-AFU.
diff --git a/Documentation/userspace-api/accelerators/ocxl.rst b/Documentation/userspace-api/accelerators/ocxl.rst
new file mode 100644
index 000000000000..14cefc020e2d
--- /dev/null
+++ b/Documentation/userspace-api/accelerators/ocxl.rst
@@ -0,0 +1,176 @@
+========================================================
+OpenCAPI (Open Coherent Accelerator Processor Interface)
+========================================================
+
+OpenCAPI is an interface between processors and accelerators. It aims
+at being low-latency and high-bandwidth. The specification is
+developed by the `OpenCAPI Consortium <http://opencapi.org/>`_.
+
+It allows an accelerator (which could be a FPGA, ASICs, ...) to access
+the host memory coherently, using virtual addresses. An OpenCAPI
+device can also host its own memory, that can be accessed from the
+host.
+
+OpenCAPI is known in linux as 'ocxl', as the open, processor-agnostic
+evolution of 'cxl' (the driver for the IBM CAPI interface for
+powerpc), which was named that way to avoid confusion with the ISDN
+CAPI subsystem.
+
+
+High-level view
+===============
+
+OpenCAPI defines a Data Link Layer (DL) and Transaction Layer (TL), to
+be implemented on top of a physical link. Any processor or device
+implementing the DL and TL can start sharing memory.
+
+::
+
+  +-----------+                         +-------------+
+  |           |                         |             |
+  |           |                         | Accelerated |
+  | Processor |                         |  Function   |
+  |           |  +--------+             |    Unit     |  +--------+
+  |           |--| Memory |             |    (AFU)    |--| Memory |
+  |           |  +--------+             |             |  +--------+
+  +-----------+                         +-------------+
+       |                                       |
+  +-----------+                         +-------------+
+  |    TL     |                         |    TLX      |
+  +-----------+                         +-------------+
+       |                                       |
+  +-----------+                         +-------------+
+  |    DL     |                         |    DLX      |
+  +-----------+                         +-------------+
+       |                                       |
+       |                   PHY                 |
+       +---------------------------------------+
+
+
+
+Device discovery
+================
+
+OpenCAPI relies on a PCI-like configuration space, implemented on the
+device. So the host can discover AFUs by querying the config space.
+
+OpenCAPI devices in Linux are treated like PCI devices (with a few
+caveats). The firmware is expected to abstract the hardware as if it
+was a PCI link. A lot of the existing PCI infrastructure is reused:
+devices are scanned and BARs are assigned during the standard PCI
+enumeration. Commands like 'lspci' can therefore be used to see what
+devices are available.
+
+The configuration space defines the AFU(s) that can be found on the
+physical adapter, such as its name, how many memory contexts it can
+work with, the size of its MMIO areas, ...
+
+
+
+MMIO
+====
+
+OpenCAPI defines two MMIO areas for each AFU:
+
+* the global MMIO area, with registers pertinent to the whole AFU.
+* a per-process MMIO area, which has a fixed size for each context.
+
+
+
+AFU interrupts
+==============
+
+OpenCAPI includes the possibility for an AFU to send an interrupt to a
+host process. It is done through a 'intrp_req' defined in the
+Transaction Layer, specifying a 64-bit object handle which defines the
+interrupt.
+
+The driver allows a process to allocate an interrupt and obtain its
+64-bit object handle, that can be passed to the AFU.
+
+
+
+char devices
+============
+
+The driver creates one char device per AFU found on the physical
+device. A physical device may have multiple functions and each
+function can have multiple AFUs. At the time of this writing though,
+it has only been tested with devices exporting only one AFU.
+
+Char devices can be found in /dev/ocxl/ and are named as:
+/dev/ocxl/<AFU name>.<location>.<index>
+
+where <AFU name> is a max 20-character long name, as found in the
+config space of the AFU.
+<location> is added by the driver and can help distinguish devices
+when a system has more than one instance of the same OpenCAPI device.
+<index> is also to help distinguish AFUs in the unlikely case where a
+device carries multiple copies of the same AFU.
+
+
+
+Sysfs class
+===========
+
+An ocxl class is added for the devices representing the AFUs. See
+/sys/class/ocxl. The layout is described in
+Documentation/ABI/testing/sysfs-class-ocxl
+
+
+
+User API
+========
+
+open
+----
+
+Based on the AFU definition found in the config space, an AFU may
+support working with more than one memory context, in which case the
+associated char device may be opened multiple times by different
+processes.
+
+
+ioctl
+-----
+
+OCXL_IOCTL_ATTACH:
+
+  Attach the memory context of the calling process to the AFU so that
+  the AFU can access its memory.
+
+OCXL_IOCTL_IRQ_ALLOC:
+
+  Allocate an AFU interrupt and return an identifier.
+
+OCXL_IOCTL_IRQ_FREE:
+
+  Free a previously allocated AFU interrupt.
+
+OCXL_IOCTL_IRQ_SET_FD:
+
+  Associate an event fd to an AFU interrupt so that the user process
+  can be notified when the AFU sends an interrupt.
+
+OCXL_IOCTL_GET_METADATA:
+
+  Obtains configuration information from the card, such at the size of
+  MMIO areas, the AFU version, and the PASID for the current context.
+
+OCXL_IOCTL_ENABLE_P9_WAIT:
+
+  Allows the AFU to wake a userspace thread executing 'wait'. Returns
+  information to userspace to allow it to configure the AFU. Note that
+  this is only available on POWER9.
+
+OCXL_IOCTL_GET_FEATURES:
+
+  Reports on which CPU features that affect OpenCAPI are usable from
+  userspace.
+
+
+mmap
+----
+
+A process can mmap the per-process MMIO area for interactions with the
+AFU.
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index a3233da7fa88..ad494da40009 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -20,6 +20,7 @@ place where this information is gathered.
    seccomp_filter
    unshare
    spec_ctrl
+   accelerators/ocxl
 
 .. only::  subproject and html
 
-- 
cgit 


From 56198359b64125dd0f9fa991972b61e4bc4fc6b5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 11:44:24 -0300
Subject: docs: lp855x-driver.rst: add it to the driver-api book

The content of this file is intended for backlight Kernel
developers.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/backlight/lp855x-driver.rst          | 83 ----------------------
 .../driver-api/backlight/lp855x-driver.rst         | 81 +++++++++++++++++++++
 Documentation/driver-api/index.rst                 |  1 +
 3 files changed, 82 insertions(+), 83 deletions(-)
 delete mode 100644 Documentation/backlight/lp855x-driver.rst
 create mode 100644 Documentation/driver-api/backlight/lp855x-driver.rst

(limited to 'Documentation')

diff --git a/Documentation/backlight/lp855x-driver.rst b/Documentation/backlight/lp855x-driver.rst
deleted file mode 100644
index 62b7ed847a77..000000000000
--- a/Documentation/backlight/lp855x-driver.rst
+++ /dev/null
@@ -1,83 +0,0 @@
-:orphan:
-
-====================
-Kernel driver lp855x
-====================
-
-Backlight driver for LP855x ICs
-
-Supported chips:
-
-	Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and
-	LP8557
-
-Author: Milo(Woogyom) Kim <milo.kim@ti.com>
-
-Description
------------
-
-* Brightness control
-
-  Brightness can be controlled by the pwm input or the i2c command.
-  The lp855x driver supports both cases.
-
-* Device attributes
-
-  1) bl_ctl_mode
-
-  Backlight control mode.
-
-  Value: pwm based or register based
-
-  2) chip_id
-
-  The lp855x chip id.
-
-  Value: lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557
-
-Platform data for lp855x
-------------------------
-
-For supporting platform specific data, the lp855x platform data can be used.
-
-* name:
-	Backlight driver name. If it is not defined, default name is set.
-* device_control:
-	Value of DEVICE CONTROL register.
-* initial_brightness:
-	Initial value of backlight brightness.
-* period_ns:
-	Platform specific PWM period value. unit is nano.
-	Only valid when brightness is pwm input mode.
-* size_program:
-	Total size of lp855x_rom_data.
-* rom_data:
-	List of new eeprom/eprom registers.
-
-Examples
-========
-
-1) lp8552 platform data: i2c register mode with new eeprom data::
-
-    #define EEPROM_A5_ADDR	0xA5
-    #define EEPROM_A5_VAL	0x4f	/* EN_VSYNC=0 */
-
-    static struct lp855x_rom_data lp8552_eeprom_arr[] = {
-	{EEPROM_A5_ADDR, EEPROM_A5_VAL},
-    };
-
-    static struct lp855x_platform_data lp8552_pdata = {
-	.name = "lcd-bl",
-	.device_control = I2C_CONFIG(LP8552),
-	.initial_brightness = INITIAL_BRT,
-	.size_program = ARRAY_SIZE(lp8552_eeprom_arr),
-	.rom_data = lp8552_eeprom_arr,
-    };
-
-2) lp8556 platform data: pwm input mode with default rom data::
-
-    static struct lp855x_platform_data lp8556_pdata = {
-	.device_control = PWM_CONFIG(LP8556),
-	.initial_brightness = INITIAL_BRT,
-	.period_ns = 1000000,
-    };
diff --git a/Documentation/driver-api/backlight/lp855x-driver.rst b/Documentation/driver-api/backlight/lp855x-driver.rst
new file mode 100644
index 000000000000..1e0b224fc397
--- /dev/null
+++ b/Documentation/driver-api/backlight/lp855x-driver.rst
@@ -0,0 +1,81 @@
+====================
+Kernel driver lp855x
+====================
+
+Backlight driver for LP855x ICs
+
+Supported chips:
+
+	Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and
+	LP8557
+
+Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+
+Description
+-----------
+
+* Brightness control
+
+  Brightness can be controlled by the pwm input or the i2c command.
+  The lp855x driver supports both cases.
+
+* Device attributes
+
+  1) bl_ctl_mode
+
+  Backlight control mode.
+
+  Value: pwm based or register based
+
+  2) chip_id
+
+  The lp855x chip id.
+
+  Value: lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557
+
+Platform data for lp855x
+------------------------
+
+For supporting platform specific data, the lp855x platform data can be used.
+
+* name:
+	Backlight driver name. If it is not defined, default name is set.
+* device_control:
+	Value of DEVICE CONTROL register.
+* initial_brightness:
+	Initial value of backlight brightness.
+* period_ns:
+	Platform specific PWM period value. unit is nano.
+	Only valid when brightness is pwm input mode.
+* size_program:
+	Total size of lp855x_rom_data.
+* rom_data:
+	List of new eeprom/eprom registers.
+
+Examples
+========
+
+1) lp8552 platform data: i2c register mode with new eeprom data::
+
+    #define EEPROM_A5_ADDR	0xA5
+    #define EEPROM_A5_VAL	0x4f	/* EN_VSYNC=0 */
+
+    static struct lp855x_rom_data lp8552_eeprom_arr[] = {
+	{EEPROM_A5_ADDR, EEPROM_A5_VAL},
+    };
+
+    static struct lp855x_platform_data lp8552_pdata = {
+	.name = "lcd-bl",
+	.device_control = I2C_CONFIG(LP8552),
+	.initial_brightness = INITIAL_BRT,
+	.size_program = ARRAY_SIZE(lp8552_eeprom_arr),
+	.rom_data = lp8552_eeprom_arr,
+    };
+
+2) lp8556 platform data: pwm input mode with default rom data::
+
+    static struct lp855x_platform_data lp8556_pdata = {
+	.device_control = PWM_CONFIG(LP8556),
+	.initial_brightness = INITIAL_BRT,
+	.period_ns = 1000000,
+    };
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 0f281f4f648f..b4c993ff7655 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -66,6 +66,7 @@ available subsections can be seen below.
    soundwire/index
    fpga/index
    acpi/index
+   backlight/lp855x-driver.rst
    generic-counter
 
 .. only::  subproject and html
-- 
cgit 


From fe34c89d25429e079ba67416529514120dd715f8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 12:34:59 -0300
Subject: docs: driver-model: move it to the driver-api book

The audience for the Kernel driver-model is clearly Kernel hackers.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> # ice driver changes
---
 Documentation/driver-api/driver-model/binding.rst  |  98 +++++
 Documentation/driver-api/driver-model/bus.rst      | 146 +++++++
 Documentation/driver-api/driver-model/class.rst    | 149 +++++++
 .../driver-api/driver-model/design-patterns.rst    | 116 ++++++
 Documentation/driver-api/driver-model/device.rst   | 109 +++++
 Documentation/driver-api/driver-model/devres.rst   | 414 +++++++++++++++++++
 Documentation/driver-api/driver-model/driver.rst   | 223 ++++++++++
 Documentation/driver-api/driver-model/index.rst    |  24 ++
 Documentation/driver-api/driver-model/overview.rst | 124 ++++++
 Documentation/driver-api/driver-model/platform.rst | 246 +++++++++++
 Documentation/driver-api/driver-model/porting.rst  | 448 +++++++++++++++++++++
 Documentation/driver-api/gpio/driver.rst           |   2 +-
 Documentation/driver-api/index.rst                 |   1 +
 Documentation/driver-model/binding.rst             |  98 -----
 Documentation/driver-model/bus.rst                 | 146 -------
 Documentation/driver-model/class.rst               | 149 -------
 Documentation/driver-model/design-patterns.rst     | 116 ------
 Documentation/driver-model/device.rst              | 109 -----
 Documentation/driver-model/devres.rst              | 414 -------------------
 Documentation/driver-model/driver.rst              | 223 ----------
 Documentation/driver-model/index.rst               |  26 --
 Documentation/driver-model/overview.rst            | 124 ------
 Documentation/driver-model/platform.rst            | 246 -----------
 Documentation/driver-model/porting.rst             | 448 ---------------------
 Documentation/eisa.txt                             |   4 +-
 Documentation/filesystems/sysfs.txt                |   2 +-
 Documentation/hwmon/submitting-patches.rst         |   2 +-
 .../translations/zh_CN/filesystems/sysfs.txt       |   2 +-
 28 files changed, 2104 insertions(+), 2105 deletions(-)
 create mode 100644 Documentation/driver-api/driver-model/binding.rst
 create mode 100644 Documentation/driver-api/driver-model/bus.rst
 create mode 100644 Documentation/driver-api/driver-model/class.rst
 create mode 100644 Documentation/driver-api/driver-model/design-patterns.rst
 create mode 100644 Documentation/driver-api/driver-model/device.rst
 create mode 100644 Documentation/driver-api/driver-model/devres.rst
 create mode 100644 Documentation/driver-api/driver-model/driver.rst
 create mode 100644 Documentation/driver-api/driver-model/index.rst
 create mode 100644 Documentation/driver-api/driver-model/overview.rst
 create mode 100644 Documentation/driver-api/driver-model/platform.rst
 create mode 100644 Documentation/driver-api/driver-model/porting.rst
 delete mode 100644 Documentation/driver-model/binding.rst
 delete mode 100644 Documentation/driver-model/bus.rst
 delete mode 100644 Documentation/driver-model/class.rst
 delete mode 100644 Documentation/driver-model/design-patterns.rst
 delete mode 100644 Documentation/driver-model/device.rst
 delete mode 100644 Documentation/driver-model/devres.rst
 delete mode 100644 Documentation/driver-model/driver.rst
 delete mode 100644 Documentation/driver-model/index.rst
 delete mode 100644 Documentation/driver-model/overview.rst
 delete mode 100644 Documentation/driver-model/platform.rst
 delete mode 100644 Documentation/driver-model/porting.rst

(limited to 'Documentation')

diff --git a/Documentation/driver-api/driver-model/binding.rst b/Documentation/driver-api/driver-model/binding.rst
new file mode 100644
index 000000000000..7ea1d7a41e1d
--- /dev/null
+++ b/Documentation/driver-api/driver-model/binding.rst
@@ -0,0 +1,98 @@
+==============
+Driver Binding
+==============
+
+Driver binding is the process of associating a device with a device
+driver that can control it. Bus drivers have typically handled this
+because there have been bus-specific structures to represent the
+devices and the drivers. With generic device and device driver
+structures, most of the binding can take place using common code.
+
+
+Bus
+~~~
+
+The bus type structure contains a list of all devices that are on that bus
+type in the system. When device_register is called for a device, it is
+inserted into the end of this list. The bus object also contains a
+list of all drivers of that bus type. When driver_register is called
+for a driver, it is inserted at the end of this list. These are the
+two events which trigger driver binding.
+
+
+device_register
+~~~~~~~~~~~~~~~
+
+When a new device is added, the bus's list of drivers is iterated over
+to find one that supports it. In order to determine that, the device
+ID of the device must match one of the device IDs that the driver
+supports. The format and semantics for comparing IDs is bus-specific.
+Instead of trying to derive a complex state machine and matching
+algorithm, it is up to the bus driver to provide a callback to compare
+a device against the IDs of a driver. The bus returns 1 if a match was
+found; 0 otherwise.
+
+int match(struct device * dev, struct device_driver * drv);
+
+If a match is found, the device's driver field is set to the driver
+and the driver's probe callback is called. This gives the driver a
+chance to verify that it really does support the hardware, and that
+it's in a working state.
+
+Device Class
+~~~~~~~~~~~~
+
+Upon the successful completion of probe, the device is registered with
+the class to which it belongs. Device drivers belong to one and only one
+class, and that is set in the driver's devclass field.
+devclass_add_device is called to enumerate the device within the class
+and actually register it with the class, which happens with the
+class's register_dev callback.
+
+
+Driver
+~~~~~~
+
+When a driver is attached to a device, the device is inserted into the
+driver's list of devices.
+
+
+sysfs
+~~~~~
+
+A symlink is created in the bus's 'devices' directory that points to
+the device's directory in the physical hierarchy.
+
+A symlink is created in the driver's 'devices' directory that points
+to the device's directory in the physical hierarchy.
+
+A directory for the device is created in the class's directory. A
+symlink is created in that directory that points to the device's
+physical location in the sysfs tree.
+
+A symlink can be created (though this isn't done yet) in the device's
+physical directory to either its class directory, or the class's
+top-level directory. One can also be created to point to its driver's
+directory also.
+
+
+driver_register
+~~~~~~~~~~~~~~~
+
+The process is almost identical for when a new driver is added.
+The bus's list of devices is iterated over to find a match. Devices
+that already have a driver are skipped. All the devices are iterated
+over, to bind as many devices as possible to the driver.
+
+
+Removal
+~~~~~~~
+
+When a device is removed, the reference count for it will eventually
+go to 0. When it does, the remove callback of the driver is called. It
+is removed from the driver's list of devices and the reference count
+of the driver is decremented. All symlinks between the two are removed.
+
+When a driver is removed, the list of devices that it supports is
+iterated over, and the driver's remove callback is called for each
+one. The device is removed from that list and the symlinks removed.
diff --git a/Documentation/driver-api/driver-model/bus.rst b/Documentation/driver-api/driver-model/bus.rst
new file mode 100644
index 000000000000..016b15a6e8ea
--- /dev/null
+++ b/Documentation/driver-api/driver-model/bus.rst
@@ -0,0 +1,146 @@
+=========
+Bus Types
+=========
+
+Definition
+~~~~~~~~~~
+See the kerneldoc for the struct bus_type.
+
+int bus_register(struct bus_type * bus);
+
+
+Declaration
+~~~~~~~~~~~
+
+Each bus type in the kernel (PCI, USB, etc) should declare one static
+object of this type. They must initialize the name field, and may
+optionally initialize the match callback::
+
+   struct bus_type pci_bus_type = {
+          .name	= "pci",
+          .match	= pci_bus_match,
+   };
+
+The structure should be exported to drivers in a header file:
+
+extern struct bus_type pci_bus_type;
+
+
+Registration
+~~~~~~~~~~~~
+
+When a bus driver is initialized, it calls bus_register. This
+initializes the rest of the fields in the bus object and inserts it
+into a global list of bus types. Once the bus object is registered,
+the fields in it are usable by the bus driver.
+
+
+Callbacks
+~~~~~~~~~
+
+match(): Attaching Drivers to Devices
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The format of device ID structures and the semantics for comparing
+them are inherently bus-specific. Drivers typically declare an array
+of device IDs of devices they support that reside in a bus-specific
+driver structure.
+
+The purpose of the match callback is to give the bus an opportunity to
+determine if a particular driver supports a particular device by
+comparing the device IDs the driver supports with the device ID of a
+particular device, without sacrificing bus-specific functionality or
+type-safety.
+
+When a driver is registered with the bus, the bus's list of devices is
+iterated over, and the match callback is called for each device that
+does not have a driver associated with it.
+
+
+
+Device and Driver Lists
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The lists of devices and drivers are intended to replace the local
+lists that many buses keep. They are lists of struct devices and
+struct device_drivers, respectively. Bus drivers are free to use the
+lists as they please, but conversion to the bus-specific type may be
+necessary.
+
+The LDM core provides helper functions for iterating over each list::
+
+  int bus_for_each_dev(struct bus_type * bus, struct device * start,
+		       void * data,
+		       int (*fn)(struct device *, void *));
+
+  int bus_for_each_drv(struct bus_type * bus, struct device_driver * start,
+		       void * data, int (*fn)(struct device_driver *, void *));
+
+These helpers iterate over the respective list, and call the callback
+for each device or driver in the list. All list accesses are
+synchronized by taking the bus's lock (read currently). The reference
+count on each object in the list is incremented before the callback is
+called; it is decremented after the next object has been obtained. The
+lock is not held when calling the callback.
+
+
+sysfs
+~~~~~~~~
+There is a top-level directory named 'bus'.
+
+Each bus gets a directory in the bus directory, along with two default
+directories::
+
+	/sys/bus/pci/
+	|-- devices
+	`-- drivers
+
+Drivers registered with the bus get a directory in the bus's drivers
+directory::
+
+	/sys/bus/pci/
+	|-- devices
+	`-- drivers
+	    |-- Intel ICH
+	    |-- Intel ICH Joystick
+	    |-- agpgart
+	    `-- e100
+
+Each device that is discovered on a bus of that type gets a symlink in
+the bus's devices directory to the device's directory in the physical
+hierarchy::
+
+	/sys/bus/pci/
+	|-- devices
+	|   |-- 00:00.0 -> ../../../root/pci0/00:00.0
+	|   |-- 00:01.0 -> ../../../root/pci0/00:01.0
+	|   `-- 00:02.0 -> ../../../root/pci0/00:02.0
+	`-- drivers
+
+
+Exporting Attributes
+~~~~~~~~~~~~~~~~~~~~
+
+::
+
+  struct bus_attribute {
+	struct attribute	attr;
+	ssize_t (*show)(struct bus_type *, char * buf);
+	ssize_t (*store)(struct bus_type *, const char * buf, size_t count);
+  };
+
+Bus drivers can export attributes using the BUS_ATTR_RW macro that works
+similarly to the DEVICE_ATTR_RW macro for devices. For example, a
+definition like this::
+
+	static BUS_ATTR_RW(debug);
+
+is equivalent to declaring::
+
+	static bus_attribute bus_attr_debug;
+
+This can then be used to add and remove the attribute from the bus's
+sysfs directory using::
+
+	int bus_create_file(struct bus_type *, struct bus_attribute *);
+	void bus_remove_file(struct bus_type *, struct bus_attribute *);
diff --git a/Documentation/driver-api/driver-model/class.rst b/Documentation/driver-api/driver-model/class.rst
new file mode 100644
index 000000000000..fff55b80e86a
--- /dev/null
+++ b/Documentation/driver-api/driver-model/class.rst
@@ -0,0 +1,149 @@
+==============
+Device Classes
+==============
+
+Introduction
+~~~~~~~~~~~~
+A device class describes a type of device, like an audio or network
+device. The following device classes have been identified:
+
+<Insert List of Device Classes Here>
+
+
+Each device class defines a set of semantics and a programming interface
+that devices of that class adhere to. Device drivers are the
+implementation of that programming interface for a particular device on
+a particular bus.
+
+Device classes are agnostic with respect to what bus a device resides
+on.
+
+
+Programming Interface
+~~~~~~~~~~~~~~~~~~~~~
+The device class structure looks like::
+
+
+  typedef int (*devclass_add)(struct device *);
+  typedef void (*devclass_remove)(struct device *);
+
+See the kerneldoc for the struct class.
+
+A typical device class definition would look like::
+
+  struct device_class input_devclass = {
+        .name		= "input",
+        .add_device	= input_add_device,
+	.remove_device	= input_remove_device,
+  };
+
+Each device class structure should be exported in a header file so it
+can be used by drivers, extensions and interfaces.
+
+Device classes are registered and unregistered with the core using::
+
+  int devclass_register(struct device_class * cls);
+  void devclass_unregister(struct device_class * cls);
+
+
+Devices
+~~~~~~~
+As devices are bound to drivers, they are added to the device class
+that the driver belongs to. Before the driver model core, this would
+typically happen during the driver's probe() callback, once the device
+has been initialized. It now happens after the probe() callback
+finishes from the core.
+
+The device is enumerated in the class. Each time a device is added to
+the class, the class's devnum field is incremented and assigned to the
+device. The field is never decremented, so if the device is removed
+from the class and re-added, it will receive a different enumerated
+value.
+
+The class is allowed to create a class-specific structure for the
+device and store it in the device's class_data pointer.
+
+There is no list of devices in the device class. Each driver has a
+list of devices that it supports. The device class has a list of
+drivers of that particular class. To access all of the devices in the
+class, iterate over the device lists of each driver in the class.
+
+
+Device Drivers
+~~~~~~~~~~~~~~
+Device drivers are added to device classes when they are registered
+with the core. A driver specifies the class it belongs to by setting
+the struct device_driver::devclass field.
+
+
+sysfs directory structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+There is a top-level sysfs directory named 'class'.
+
+Each class gets a directory in the class directory, along with two
+default subdirectories::
+
+        class/
+        `-- input
+            |-- devices
+            `-- drivers
+
+
+Drivers registered with the class get a symlink in the drivers/ directory
+that points to the driver's directory (under its bus directory)::
+
+   class/
+   `-- input
+       |-- devices
+       `-- drivers
+           `-- usb:usb_mouse -> ../../../bus/drivers/usb_mouse/
+
+
+Each device gets a symlink in the devices/ directory that points to the
+device's directory in the physical hierarchy::
+
+   class/
+   `-- input
+       |-- devices
+       |   `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/
+       `-- drivers
+
+
+Exporting Attributes
+~~~~~~~~~~~~~~~~~~~~
+
+::
+
+  struct devclass_attribute {
+        struct attribute        attr;
+        ssize_t (*show)(struct device_class *, char * buf, size_t count, loff_t off);
+        ssize_t (*store)(struct device_class *, const char * buf, size_t count, loff_t off);
+  };
+
+Class drivers can export attributes using the DEVCLASS_ATTR macro that works
+similarly to the DEVICE_ATTR macro for devices. For example, a definition
+like this::
+
+  static DEVCLASS_ATTR(debug,0644,show_debug,store_debug);
+
+is equivalent to declaring::
+
+  static devclass_attribute devclass_attr_debug;
+
+The bus driver can add and remove the attribute from the class's
+sysfs directory using::
+
+  int devclass_create_file(struct device_class *, struct devclass_attribute *);
+  void devclass_remove_file(struct device_class *, struct devclass_attribute *);
+
+In the example above, the file will be named 'debug' in placed in the
+class's directory in sysfs.
+
+
+Interfaces
+~~~~~~~~~~
+There may exist multiple mechanisms for accessing the same device of a
+particular class type. Device interfaces describe these mechanisms.
+
+When a device is added to a device class, the core attempts to add it
+to every interface that is registered with the device class.
diff --git a/Documentation/driver-api/driver-model/design-patterns.rst b/Documentation/driver-api/driver-model/design-patterns.rst
new file mode 100644
index 000000000000..41eb8f41f7dd
--- /dev/null
+++ b/Documentation/driver-api/driver-model/design-patterns.rst
@@ -0,0 +1,116 @@
+=============================
+Device Driver Design Patterns
+=============================
+
+This document describes a few common design patterns found in device drivers.
+It is likely that subsystem maintainers will ask driver developers to
+conform to these design patterns.
+
+1. State Container
+2. container_of()
+
+
+1. State Container
+~~~~~~~~~~~~~~~~~~
+
+While the kernel contains a few device drivers that assume that they will
+only be probed() once on a certain system (singletons), it is custom to assume
+that the device the driver binds to will appear in several instances. This
+means that the probe() function and all callbacks need to be reentrant.
+
+The most common way to achieve this is to use the state container design
+pattern. It usually has this form::
+
+  struct foo {
+      spinlock_t lock; /* Example member */
+      (...)
+  };
+
+  static int foo_probe(...)
+  {
+      struct foo *foo;
+
+      foo = devm_kzalloc(dev, sizeof(*foo), GFP_KERNEL);
+      if (!foo)
+          return -ENOMEM;
+      spin_lock_init(&foo->lock);
+      (...)
+  }
+
+This will create an instance of struct foo in memory every time probe() is
+called. This is our state container for this instance of the device driver.
+Of course it is then necessary to always pass this instance of the
+state around to all functions that need access to the state and its members.
+
+For example, if the driver is registering an interrupt handler, you would
+pass around a pointer to struct foo like this::
+
+  static irqreturn_t foo_handler(int irq, void *arg)
+  {
+      struct foo *foo = arg;
+      (...)
+  }
+
+  static int foo_probe(...)
+  {
+      struct foo *foo;
+
+      (...)
+      ret = request_irq(irq, foo_handler, 0, "foo", foo);
+  }
+
+This way you always get a pointer back to the correct instance of foo in
+your interrupt handler.
+
+
+2. container_of()
+~~~~~~~~~~~~~~~~~
+
+Continuing on the above example we add an offloaded work::
+
+  struct foo {
+      spinlock_t lock;
+      struct workqueue_struct *wq;
+      struct work_struct offload;
+      (...)
+  };
+
+  static void foo_work(struct work_struct *work)
+  {
+      struct foo *foo = container_of(work, struct foo, offload);
+
+      (...)
+  }
+
+  static irqreturn_t foo_handler(int irq, void *arg)
+  {
+      struct foo *foo = arg;
+
+      queue_work(foo->wq, &foo->offload);
+      (...)
+  }
+
+  static int foo_probe(...)
+  {
+      struct foo *foo;
+
+      foo->wq = create_singlethread_workqueue("foo-wq");
+      INIT_WORK(&foo->offload, foo_work);
+      (...)
+  }
+
+The design pattern is the same for an hrtimer or something similar that will
+return a single argument which is a pointer to a struct member in the
+callback.
+
+container_of() is a macro defined in <linux/kernel.h>
+
+What container_of() does is to obtain a pointer to the containing struct from
+a pointer to a member by a simple subtraction using the offsetof() macro from
+standard C, which allows something similar to object oriented behaviours.
+Notice that the contained member must not be a pointer, but an actual member
+for this to work.
+
+We can see here that we avoid having global pointers to our struct foo *
+instance this way, while still keeping the number of parameters passed to the
+work function to a single pointer.
diff --git a/Documentation/driver-api/driver-model/device.rst b/Documentation/driver-api/driver-model/device.rst
new file mode 100644
index 000000000000..2b868d49d349
--- /dev/null
+++ b/Documentation/driver-api/driver-model/device.rst
@@ -0,0 +1,109 @@
+==========================
+The Basic Device Structure
+==========================
+
+See the kerneldoc for the struct device.
+
+
+Programming Interface
+~~~~~~~~~~~~~~~~~~~~~
+The bus driver that discovers the device uses this to register the
+device with the core::
+
+  int device_register(struct device * dev);
+
+The bus should initialize the following fields:
+
+    - parent
+    - name
+    - bus_id
+    - bus
+
+A device is removed from the core when its reference count goes to
+0. The reference count can be adjusted using::
+
+  struct device * get_device(struct device * dev);
+  void put_device(struct device * dev);
+
+get_device() will return a pointer to the struct device passed to it
+if the reference is not already 0 (if it's in the process of being
+removed already).
+
+A driver can access the lock in the device structure using::
+
+  void lock_device(struct device * dev);
+  void unlock_device(struct device * dev);
+
+
+Attributes
+~~~~~~~~~~
+
+::
+
+  struct device_attribute {
+	struct attribute	attr;
+	ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count);
+  };
+
+Attributes of devices can be exported by a device driver through sysfs.
+
+Please see Documentation/filesystems/sysfs.txt for more information
+on how sysfs works.
+
+As explained in Documentation/kobject.txt, device attributes must be
+created before the KOBJ_ADD uevent is generated. The only way to realize
+that is by defining an attribute group.
+
+Attributes are declared using a macro called DEVICE_ATTR::
+
+  #define DEVICE_ATTR(name,mode,show,store)
+
+Example:::
+
+  static DEVICE_ATTR(type, 0444, show_type, NULL);
+  static DEVICE_ATTR(power, 0644, show_power, store_power);
+
+This declares two structures of type struct device_attribute with respective
+names 'dev_attr_type' and 'dev_attr_power'. These two attributes can be
+organized as follows into a group::
+
+  static struct attribute *dev_attrs[] = {
+	&dev_attr_type.attr,
+	&dev_attr_power.attr,
+	NULL,
+  };
+
+  static struct attribute_group dev_attr_group = {
+	.attrs = dev_attrs,
+  };
+
+  static const struct attribute_group *dev_attr_groups[] = {
+	&dev_attr_group,
+	NULL,
+  };
+
+This array of groups can then be associated with a device by setting the
+group pointer in struct device before device_register() is invoked::
+
+        dev->groups = dev_attr_groups;
+        device_register(dev);
+
+The device_register() function will use the 'groups' pointer to create the
+device attributes and the device_unregister() function will use this pointer
+to remove the device attributes.
+
+Word of warning:  While the kernel allows device_create_file() and
+device_remove_file() to be called on a device at any time, userspace has
+strict expectations on when attributes get created.  When a new device is
+registered in the kernel, a uevent is generated to notify userspace (like
+udev) that a new device is available.  If attributes are added after the
+device is registered, then userspace won't get notified and userspace will
+not know about the new attributes.
+
+This is important for device driver that need to publish additional
+attributes for a device at driver probe time.  If the device driver simply
+calls device_create_file() on the device structure passed to it, then
+userspace will never be notified of the new attributes.
diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
new file mode 100644
index 000000000000..4ac99122b5f1
--- /dev/null
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -0,0 +1,414 @@
+================================
+Devres - Managed Device Resource
+================================
+
+Tejun Heo	<teheo@suse.de>
+
+First draft	10 January 2007
+
+.. contents
+
+   1. Intro			: Huh? Devres?
+   2. Devres			: Devres in a nutshell
+   3. Devres Group		: Group devres'es and release them together
+   4. Details			: Life time rules, calling context, ...
+   5. Overhead			: How much do we have to pay for this?
+   6. List of managed interfaces: Currently implemented managed interfaces
+
+
+1. Intro
+--------
+
+devres came up while trying to convert libata to use iomap.  Each
+iomapped address should be kept and unmapped on driver detach.  For
+example, a plain SFF ATA controller (that is, good old PCI IDE) in
+native mode makes use of 5 PCI BARs and all of them should be
+maintained.
+
+As with many other device drivers, libata low level drivers have
+sufficient bugs in ->remove and ->probe failure path.  Well, yes,
+that's probably because libata low level driver developers are lazy
+bunch, but aren't all low level driver developers?  After spending a
+day fiddling with braindamaged hardware with no document or
+braindamaged document, if it's finally working, well, it's working.
+
+For one reason or another, low level drivers don't receive as much
+attention or testing as core code, and bugs on driver detach or
+initialization failure don't happen often enough to be noticeable.
+Init failure path is worse because it's much less travelled while
+needs to handle multiple entry points.
+
+So, many low level drivers end up leaking resources on driver detach
+and having half broken failure path implementation in ->probe() which
+would leak resources or even cause oops when failure occurs.  iomap
+adds more to this mix.  So do msi and msix.
+
+
+2. Devres
+---------
+
+devres is basically linked list of arbitrarily sized memory areas
+associated with a struct device.  Each devres entry is associated with
+a release function.  A devres can be released in several ways.  No
+matter what, all devres entries are released on driver detach.  On
+release, the associated release function is invoked and then the
+devres entry is freed.
+
+Managed interface is created for resources commonly used by device
+drivers using devres.  For example, coherent DMA memory is acquired
+using dma_alloc_coherent().  The managed version is called
+dmam_alloc_coherent().  It is identical to dma_alloc_coherent() except
+for the DMA memory allocated using it is managed and will be
+automatically released on driver detach.  Implementation looks like
+the following::
+
+  struct dma_devres {
+	size_t		size;
+	void		*vaddr;
+	dma_addr_t	dma_handle;
+  };
+
+  static void dmam_coherent_release(struct device *dev, void *res)
+  {
+	struct dma_devres *this = res;
+
+	dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle);
+  }
+
+  dmam_alloc_coherent(dev, size, dma_handle, gfp)
+  {
+	struct dma_devres *dr;
+	void *vaddr;
+
+	dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp);
+	...
+
+	/* alloc DMA memory as usual */
+	vaddr = dma_alloc_coherent(...);
+	...
+
+	/* record size, vaddr, dma_handle in dr */
+	dr->vaddr = vaddr;
+	...
+
+	devres_add(dev, dr);
+
+	return vaddr;
+  }
+
+If a driver uses dmam_alloc_coherent(), the area is guaranteed to be
+freed whether initialization fails half-way or the device gets
+detached.  If most resources are acquired using managed interface, a
+driver can have much simpler init and exit code.  Init path basically
+looks like the following::
+
+  my_init_one()
+  {
+	struct mydev *d;
+
+	d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->ring = dmam_alloc_coherent(...);
+	if (!d->ring)
+		return -ENOMEM;
+
+	if (check something)
+		return -EINVAL;
+	...
+
+	return register_to_upper_layer(d);
+  }
+
+And exit path::
+
+  my_remove_one()
+  {
+	unregister_from_upper_layer(d);
+	shutdown_my_hardware();
+  }
+
+As shown above, low level drivers can be simplified a lot by using
+devres.  Complexity is shifted from less maintained low level drivers
+to better maintained higher layer.  Also, as init failure path is
+shared with exit path, both can get more testing.
+
+Note though that when converting current calls or assignments to
+managed devm_* versions it is up to you to check if internal operations
+like allocating memory, have failed. Managed resources pertains to the
+freeing of these resources *only* - all other checks needed are still
+on you. In some cases this may mean introducing checks that were not
+necessary before moving to the managed devm_* calls.
+
+
+3. Devres group
+---------------
+
+Devres entries can be grouped using devres group.  When a group is
+released, all contained normal devres entries and properly nested
+groups are released.  One usage is to rollback series of acquired
+resources on failure.  For example::
+
+  if (!devres_open_group(dev, NULL, GFP_KERNEL))
+	return -ENOMEM;
+
+  acquire A;
+  if (failed)
+	goto err;
+
+  acquire B;
+  if (failed)
+	goto err;
+  ...
+
+  devres_remove_group(dev, NULL);
+  return 0;
+
+ err:
+  devres_release_group(dev, NULL);
+  return err_code;
+
+As resource acquisition failure usually means probe failure, constructs
+like above are usually useful in midlayer driver (e.g. libata core
+layer) where interface function shouldn't have side effect on failure.
+For LLDs, just returning error code suffices in most cases.
+
+Each group is identified by `void *id`.  It can either be explicitly
+specified by @id argument to devres_open_group() or automatically
+created by passing NULL as @id as in the above example.  In both
+cases, devres_open_group() returns the group's id.  The returned id
+can be passed to other devres functions to select the target group.
+If NULL is given to those functions, the latest open group is
+selected.
+
+For example, you can do something like the following::
+
+  int my_midlayer_create_something()
+  {
+	if (!devres_open_group(dev, my_midlayer_create_something, GFP_KERNEL))
+		return -ENOMEM;
+
+	...
+
+	devres_close_group(dev, my_midlayer_create_something);
+	return 0;
+  }
+
+  void my_midlayer_destroy_something()
+  {
+	devres_release_group(dev, my_midlayer_create_something);
+  }
+
+
+4. Details
+----------
+
+Lifetime of a devres entry begins on devres allocation and finishes
+when it is released or destroyed (removed and freed) - no reference
+counting.
+
+devres core guarantees atomicity to all basic devres operations and
+has support for single-instance devres types (atomic
+lookup-and-add-if-not-found).  Other than that, synchronizing
+concurrent accesses to allocated devres data is caller's
+responsibility.  This is usually non-issue because bus ops and
+resource allocations already do the job.
+
+For an example of single-instance devres type, read pcim_iomap_table()
+in lib/devres.c.
+
+All devres interface functions can be called without context if the
+right gfp mask is given.
+
+
+5. Overhead
+-----------
+
+Each devres bookkeeping info is allocated together with requested data
+area.  With debug option turned off, bookkeeping info occupies 16
+bytes on 32bit machines and 24 bytes on 64bit (three pointers rounded
+up to ull alignment).  If singly linked list is used, it can be
+reduced to two pointers (8 bytes on 32bit, 16 bytes on 64bit).
+
+Each devres group occupies 8 pointers.  It can be reduced to 6 if
+singly linked list is used.
+
+Memory space overhead on ahci controller with two ports is between 300
+and 400 bytes on 32bit machine after naive conversion (we can
+certainly invest a bit more effort into libata core layer).
+
+
+6. List of managed interfaces
+-----------------------------
+
+CLOCK
+  devm_clk_get()
+  devm_clk_get_optional()
+  devm_clk_put()
+  devm_clk_hw_register()
+  devm_of_clk_add_hw_provider()
+  devm_clk_hw_register_clkdev()
+
+DMA
+  dmaenginem_async_device_register()
+  dmam_alloc_coherent()
+  dmam_alloc_attrs()
+  dmam_free_coherent()
+  dmam_pool_create()
+  dmam_pool_destroy()
+
+DRM
+  devm_drm_dev_init()
+
+GPIO
+  devm_gpiod_get()
+  devm_gpiod_get_index()
+  devm_gpiod_get_index_optional()
+  devm_gpiod_get_optional()
+  devm_gpiod_put()
+  devm_gpiod_unhinge()
+  devm_gpiochip_add_data()
+  devm_gpio_request()
+  devm_gpio_request_one()
+  devm_gpio_free()
+
+I2C
+  devm_i2c_new_dummy_device()
+
+IIO
+  devm_iio_device_alloc()
+  devm_iio_device_free()
+  devm_iio_device_register()
+  devm_iio_device_unregister()
+  devm_iio_kfifo_allocate()
+  devm_iio_kfifo_free()
+  devm_iio_triggered_buffer_setup()
+  devm_iio_triggered_buffer_cleanup()
+  devm_iio_trigger_alloc()
+  devm_iio_trigger_free()
+  devm_iio_trigger_register()
+  devm_iio_trigger_unregister()
+  devm_iio_channel_get()
+  devm_iio_channel_release()
+  devm_iio_channel_get_all()
+  devm_iio_channel_release_all()
+
+INPUT
+  devm_input_allocate_device()
+
+IO region
+  devm_release_mem_region()
+  devm_release_region()
+  devm_release_resource()
+  devm_request_mem_region()
+  devm_request_region()
+  devm_request_resource()
+
+IOMAP
+  devm_ioport_map()
+  devm_ioport_unmap()
+  devm_ioremap()
+  devm_ioremap_nocache()
+  devm_ioremap_wc()
+  devm_ioremap_resource() : checks resource, requests memory region, ioremaps
+  devm_iounmap()
+  pcim_iomap()
+  pcim_iomap_regions()	: do request_region() and iomap() on multiple BARs
+  pcim_iomap_table()	: array of mapped addresses indexed by BAR
+  pcim_iounmap()
+
+IRQ
+  devm_free_irq()
+  devm_request_any_context_irq()
+  devm_request_irq()
+  devm_request_threaded_irq()
+  devm_irq_alloc_descs()
+  devm_irq_alloc_desc()
+  devm_irq_alloc_desc_at()
+  devm_irq_alloc_desc_from()
+  devm_irq_alloc_descs_from()
+  devm_irq_alloc_generic_chip()
+  devm_irq_setup_generic_chip()
+  devm_irq_sim_init()
+
+LED
+  devm_led_classdev_register()
+  devm_led_classdev_unregister()
+
+MDIO
+  devm_mdiobus_alloc()
+  devm_mdiobus_alloc_size()
+  devm_mdiobus_free()
+
+MEM
+  devm_free_pages()
+  devm_get_free_pages()
+  devm_kasprintf()
+  devm_kcalloc()
+  devm_kfree()
+  devm_kmalloc()
+  devm_kmalloc_array()
+  devm_kmemdup()
+  devm_kstrdup()
+  devm_kvasprintf()
+  devm_kzalloc()
+
+MFD
+  devm_mfd_add_devices()
+
+MUX
+  devm_mux_chip_alloc()
+  devm_mux_chip_register()
+  devm_mux_control_get()
+
+PER-CPU MEM
+  devm_alloc_percpu()
+  devm_free_percpu()
+
+PCI
+  devm_pci_alloc_host_bridge()  : managed PCI host bridge allocation
+  devm_pci_remap_cfgspace()	: ioremap PCI configuration space
+  devm_pci_remap_cfg_resource()	: ioremap PCI configuration space resource
+  pcim_enable_device()		: after success, all PCI ops become managed
+  pcim_pin_device()		: keep PCI device enabled after release
+
+PHY
+  devm_usb_get_phy()
+  devm_usb_put_phy()
+
+PINCTRL
+  devm_pinctrl_get()
+  devm_pinctrl_put()
+  devm_pinctrl_register()
+  devm_pinctrl_unregister()
+
+POWER
+  devm_reboot_mode_register()
+  devm_reboot_mode_unregister()
+
+PWM
+  devm_pwm_get()
+  devm_pwm_put()
+
+REGULATOR
+  devm_regulator_bulk_get()
+  devm_regulator_get()
+  devm_regulator_put()
+  devm_regulator_register()
+
+RESET
+  devm_reset_control_get()
+  devm_reset_controller_register()
+
+SERDEV
+  devm_serdev_device_open()
+
+SLAVE DMA ENGINE
+  devm_acpi_dma_controller_register()
+
+SPI
+  devm_spi_register_master()
+
+WATCHDOG
+  devm_watchdog_register_device()
diff --git a/Documentation/driver-api/driver-model/driver.rst b/Documentation/driver-api/driver-model/driver.rst
new file mode 100644
index 000000000000..11d281506a04
--- /dev/null
+++ b/Documentation/driver-api/driver-model/driver.rst
@@ -0,0 +1,223 @@
+==============
+Device Drivers
+==============
+
+See the kerneldoc for the struct device_driver.
+
+
+Allocation
+~~~~~~~~~~
+
+Device drivers are statically allocated structures. Though there may
+be multiple devices in a system that a driver supports, struct
+device_driver represents the driver as a whole (not a particular
+device instance).
+
+Initialization
+~~~~~~~~~~~~~~
+
+The driver must initialize at least the name and bus fields. It should
+also initialize the devclass field (when it arrives), so it may obtain
+the proper linkage internally. It should also initialize as many of
+the callbacks as possible, though each is optional.
+
+Declaration
+~~~~~~~~~~~
+
+As stated above, struct device_driver objects are statically
+allocated. Below is an example declaration of the eepro100
+driver. This declaration is hypothetical only; it relies on the driver
+being converted completely to the new model::
+
+  static struct device_driver eepro100_driver = {
+         .name		= "eepro100",
+         .bus		= &pci_bus_type,
+
+         .probe		= eepro100_probe,
+         .remove		= eepro100_remove,
+         .suspend		= eepro100_suspend,
+         .resume		= eepro100_resume,
+  };
+
+Most drivers will not be able to be converted completely to the new
+model because the bus they belong to has a bus-specific structure with
+bus-specific fields that cannot be generalized.
+
+The most common example of this are device ID structures. A driver
+typically defines an array of device IDs that it supports. The format
+of these structures and the semantics for comparing device IDs are
+completely bus-specific. Defining them as bus-specific entities would
+sacrifice type-safety, so we keep bus-specific structures around.
+
+Bus-specific drivers should include a generic struct device_driver in
+the definition of the bus-specific driver. Like this::
+
+  struct pci_driver {
+         const struct pci_device_id *id_table;
+         struct device_driver	  driver;
+  };
+
+A definition that included bus-specific fields would look like
+(using the eepro100 driver again)::
+
+  static struct pci_driver eepro100_driver = {
+         .id_table       = eepro100_pci_tbl,
+         .driver	       = {
+		.name		= "eepro100",
+		.bus		= &pci_bus_type,
+		.probe		= eepro100_probe,
+		.remove		= eepro100_remove,
+		.suspend	= eepro100_suspend,
+		.resume		= eepro100_resume,
+         },
+  };
+
+Some may find the syntax of embedded struct initialization awkward or
+even a bit ugly. So far, it's the best way we've found to do what we want...
+
+Registration
+~~~~~~~~~~~~
+
+::
+
+  int driver_register(struct device_driver *drv);
+
+The driver registers the structure on startup. For drivers that have
+no bus-specific fields (i.e. don't have a bus-specific driver
+structure), they would use driver_register and pass a pointer to their
+struct device_driver object.
+
+Most drivers, however, will have a bus-specific structure and will
+need to register with the bus using something like pci_driver_register.
+
+It is important that drivers register their driver structure as early as
+possible. Registration with the core initializes several fields in the
+struct device_driver object, including the reference count and the
+lock. These fields are assumed to be valid at all times and may be
+used by the device model core or the bus driver.
+
+
+Transition Bus Drivers
+~~~~~~~~~~~~~~~~~~~~~~
+
+By defining wrapper functions, the transition to the new model can be
+made easier. Drivers can ignore the generic structure altogether and
+let the bus wrapper fill in the fields. For the callbacks, the bus can
+define generic callbacks that forward the call to the bus-specific
+callbacks of the drivers.
+
+This solution is intended to be only temporary. In order to get class
+information in the driver, the drivers must be modified anyway. Since
+converting drivers to the new model should reduce some infrastructural
+complexity and code size, it is recommended that they are converted as
+class information is added.
+
+Access
+~~~~~~
+
+Once the object has been registered, it may access the common fields of
+the object, like the lock and the list of devices::
+
+  int driver_for_each_dev(struct device_driver *drv, void *data,
+			  int (*callback)(struct device *dev, void *data));
+
+The devices field is a list of all the devices that have been bound to
+the driver. The LDM core provides a helper function to operate on all
+the devices a driver controls. This helper locks the driver on each
+node access, and does proper reference counting on each device as it
+accesses it.
+
+
+sysfs
+~~~~~
+
+When a driver is registered, a sysfs directory is created in its
+bus's directory. In this directory, the driver can export an interface
+to userspace to control operation of the driver on a global basis;
+e.g. toggling debugging output in the driver.
+
+A future feature of this directory will be a 'devices' directory. This
+directory will contain symlinks to the directories of devices it
+supports.
+
+
+
+Callbacks
+~~~~~~~~~
+
+::
+
+	int	(*probe)	(struct device *dev);
+
+The probe() entry is called in task context, with the bus's rwsem locked
+and the driver partially bound to the device.  Drivers commonly use
+container_of() to convert "dev" to a bus-specific type, both in probe()
+and other routines.  That type often provides device resource data, such
+as pci_dev.resource[] or platform_device.resources, which is used in
+addition to dev->platform_data to initialize the driver.
+
+This callback holds the driver-specific logic to bind the driver to a
+given device.  That includes verifying that the device is present, that
+it's a version the driver can handle, that driver data structures can
+be allocated and initialized, and that any hardware can be initialized.
+Drivers often store a pointer to their state with dev_set_drvdata().
+When the driver has successfully bound itself to that device, then probe()
+returns zero and the driver model code will finish its part of binding
+the driver to that device.
+
+A driver's probe() may return a negative errno value to indicate that
+the driver did not bind to this device, in which case it should have
+released all resources it allocated::
+
+	int 	(*remove)	(struct device *dev);
+
+remove is called to unbind a driver from a device. This may be
+called if a device is physically removed from the system, if the
+driver module is being unloaded, during a reboot sequence, or
+in other cases.
+
+It is up to the driver to determine if the device is present or
+not. It should free any resources allocated specifically for the
+device; i.e. anything in the device's driver_data field.
+
+If the device is still present, it should quiesce the device and place
+it into a supported low-power state::
+
+	int	(*suspend)	(struct device *dev, pm_message_t state);
+
+suspend is called to put the device in a low power state::
+
+	int	(*resume)	(struct device *dev);
+
+Resume is used to bring a device back from a low power state.
+
+
+Attributes
+~~~~~~~~~~
+
+::
+
+  struct driver_attribute {
+          struct attribute        attr;
+          ssize_t (*show)(struct device_driver *driver, char *buf);
+          ssize_t (*store)(struct device_driver *, const char *buf, size_t count);
+  };
+
+Device drivers can export attributes via their sysfs directories.
+Drivers can declare attributes using a DRIVER_ATTR_RW and DRIVER_ATTR_RO
+macro that works identically to the DEVICE_ATTR_RW and DEVICE_ATTR_RO
+macros.
+
+Example::
+
+	DRIVER_ATTR_RW(debug);
+
+This is equivalent to declaring::
+
+	struct driver_attribute driver_attr_debug;
+
+This can then be used to add and remove the attribute from the
+driver's directory using::
+
+  int driver_create_file(struct device_driver *, const struct driver_attribute *);
+  void driver_remove_file(struct device_driver *, const struct driver_attribute *);
diff --git a/Documentation/driver-api/driver-model/index.rst b/Documentation/driver-api/driver-model/index.rst
new file mode 100644
index 000000000000..755016422269
--- /dev/null
+++ b/Documentation/driver-api/driver-model/index.rst
@@ -0,0 +1,24 @@
+============
+Driver Model
+============
+
+.. toctree::
+   :maxdepth: 1
+
+   binding
+   bus
+   class
+   design-patterns
+   device
+   devres
+   driver
+   overview
+   platform
+   porting
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/driver-api/driver-model/overview.rst b/Documentation/driver-api/driver-model/overview.rst
new file mode 100644
index 000000000000..d4d1e9b40e0c
--- /dev/null
+++ b/Documentation/driver-api/driver-model/overview.rst
@@ -0,0 +1,124 @@
+=============================
+The Linux Kernel Device Model
+=============================
+
+Patrick Mochel	<mochel@digitalimplant.org>
+
+Drafted 26 August 2002
+Updated 31 January 2006
+
+
+Overview
+~~~~~~~~
+
+The Linux Kernel Driver Model is a unification of all the disparate driver
+models that were previously used in the kernel. It is intended to augment the
+bus-specific drivers for bridges and devices by consolidating a set of data
+and operations into globally accessible data structures.
+
+Traditional driver models implemented some sort of tree-like structure
+(sometimes just a list) for the devices they control. There wasn't any
+uniformity across the different bus types.
+
+The current driver model provides a common, uniform data model for describing
+a bus and the devices that can appear under the bus. The unified bus
+model includes a set of common attributes which all busses carry, and a set
+of common callbacks, such as device discovery during bus probing, bus
+shutdown, bus power management, etc.
+
+The common device and bridge interface reflects the goals of the modern
+computer: namely the ability to do seamless device "plug and play", power
+management, and hot plug. In particular, the model dictated by Intel and
+Microsoft (namely ACPI) ensures that almost every device on almost any bus
+on an x86-compatible system can work within this paradigm.  Of course,
+not every bus is able to support all such operations, although most
+buses support most of those operations.
+
+
+Downstream Access
+~~~~~~~~~~~~~~~~~
+
+Common data fields have been moved out of individual bus layers into a common
+data structure. These fields must still be accessed by the bus layers,
+and sometimes by the device-specific drivers.
+
+Other bus layers are encouraged to do what has been done for the PCI layer.
+struct pci_dev now looks like this::
+
+  struct pci_dev {
+	...
+
+	struct device dev;     /* Generic device interface */
+	...
+  };
+
+Note first that the struct device dev within the struct pci_dev is
+statically allocated. This means only one allocation on device discovery.
+
+Note also that that struct device dev is not necessarily defined at the
+front of the pci_dev structure.  This is to make people think about what
+they're doing when switching between the bus driver and the global driver,
+and to discourage meaningless and incorrect casts between the two.
+
+The PCI bus layer freely accesses the fields of struct device. It knows about
+the structure of struct pci_dev, and it should know the structure of struct
+device. Individual PCI device drivers that have been converted to the current
+driver model generally do not and should not touch the fields of struct device,
+unless there is a compelling reason to do so.
+
+The above abstraction prevents unnecessary pain during transitional phases.
+If it were not done this way, then when a field was renamed or removed, every
+downstream driver would break.  On the other hand, if only the bus layer
+(and not the device layer) accesses the struct device, it is only the bus
+layer that needs to change.
+
+
+User Interface
+~~~~~~~~~~~~~~
+
+By virtue of having a complete hierarchical view of all the devices in the
+system, exporting a complete hierarchical view to userspace becomes relatively
+easy. This has been accomplished by implementing a special purpose virtual
+file system named sysfs.
+
+Almost all mainstream Linux distros mount this filesystem automatically; you
+can see some variation of the following in the output of the "mount" command::
+
+  $ mount
+  ...
+  none on /sys type sysfs (rw,noexec,nosuid,nodev)
+  ...
+  $
+
+The auto-mounting of sysfs is typically accomplished by an entry similar to
+the following in the /etc/fstab file::
+
+  none     	/sys	sysfs    defaults	  	0 0
+
+or something similar in the /lib/init/fstab file on Debian-based systems::
+
+  none            /sys    sysfs    nodev,noexec,nosuid    0 0
+
+If sysfs is not automatically mounted, you can always do it manually with::
+
+	# mount -t sysfs sysfs /sys
+
+Whenever a device is inserted into the tree, a directory is created for it.
+This directory may be populated at each layer of discovery - the global layer,
+the bus layer, or the device layer.
+
+The global layer currently creates two files - 'name' and 'power'. The
+former only reports the name of the device. The latter reports the
+current power state of the device. It will also be used to set the current
+power state.
+
+The bus layer may also create files for the devices it finds while probing the
+bus. For example, the PCI layer currently creates 'irq' and 'resource' files
+for each PCI device.
+
+A device-specific driver may also export files in its directory to expose
+device-specific data or tunable interfaces.
+
+More information about the sysfs directory layout can be found in
+the other documents in this directory and in the file
+Documentation/filesystems/sysfs.txt.
diff --git a/Documentation/driver-api/driver-model/platform.rst b/Documentation/driver-api/driver-model/platform.rst
new file mode 100644
index 000000000000..334dd4071ae4
--- /dev/null
+++ b/Documentation/driver-api/driver-model/platform.rst
@@ -0,0 +1,246 @@
+============================
+Platform Devices and Drivers
+============================
+
+See <linux/platform_device.h> for the driver model interface to the
+platform bus:  platform_device, and platform_driver.  This pseudo-bus
+is used to connect devices on busses with minimal infrastructure,
+like those used to integrate peripherals on many system-on-chip
+processors, or some "legacy" PC interconnects; as opposed to large
+formally specified ones like PCI or USB.
+
+
+Platform devices
+~~~~~~~~~~~~~~~~
+Platform devices are devices that typically appear as autonomous
+entities in the system. This includes legacy port-based devices and
+host bridges to peripheral buses, and most controllers integrated
+into system-on-chip platforms.  What they usually have in common
+is direct addressing from a CPU bus.  Rarely, a platform_device will
+be connected through a segment of some other kind of bus; but its
+registers will still be directly addressable.
+
+Platform devices are given a name, used in driver binding, and a
+list of resources such as addresses and IRQs::
+
+  struct platform_device {
+	const char	*name;
+	u32		id;
+	struct device	dev;
+	u32		num_resources;
+	struct resource	*resource;
+  };
+
+
+Platform drivers
+~~~~~~~~~~~~~~~~
+Platform drivers follow the standard driver model convention, where
+discovery/enumeration is handled outside the drivers, and drivers
+provide probe() and remove() methods.  They support power management
+and shutdown notifications using the standard conventions::
+
+  struct platform_driver {
+	int (*probe)(struct platform_device *);
+	int (*remove)(struct platform_device *);
+	void (*shutdown)(struct platform_device *);
+	int (*suspend)(struct platform_device *, pm_message_t state);
+	int (*suspend_late)(struct platform_device *, pm_message_t state);
+	int (*resume_early)(struct platform_device *);
+	int (*resume)(struct platform_device *);
+	struct device_driver driver;
+  };
+
+Note that probe() should in general verify that the specified device hardware
+actually exists; sometimes platform setup code can't be sure.  The probing
+can use device resources, including clocks, and device platform_data.
+
+Platform drivers register themselves the normal way::
+
+	int platform_driver_register(struct platform_driver *drv);
+
+Or, in common situations where the device is known not to be hot-pluggable,
+the probe() routine can live in an init section to reduce the driver's
+runtime memory footprint::
+
+	int platform_driver_probe(struct platform_driver *drv,
+			  int (*probe)(struct platform_device *))
+
+Kernel modules can be composed of several platform drivers. The platform core
+provides helpers to register and unregister an array of drivers::
+
+	int __platform_register_drivers(struct platform_driver * const *drivers,
+				      unsigned int count, struct module *owner);
+	void platform_unregister_drivers(struct platform_driver * const *drivers,
+					 unsigned int count);
+
+If one of the drivers fails to register, all drivers registered up to that
+point will be unregistered in reverse order. Note that there is a convenience
+macro that passes THIS_MODULE as owner parameter::
+
+	#define platform_register_drivers(drivers, count)
+
+
+Device Enumeration
+~~~~~~~~~~~~~~~~~~
+As a rule, platform specific (and often board-specific) setup code will
+register platform devices::
+
+	int platform_device_register(struct platform_device *pdev);
+
+	int platform_add_devices(struct platform_device **pdevs, int ndev);
+
+The general rule is to register only those devices that actually exist,
+but in some cases extra devices might be registered.  For example, a kernel
+might be configured to work with an external network adapter that might not
+be populated on all boards, or likewise to work with an integrated controller
+that some boards might not hook up to any peripherals.
+
+In some cases, boot firmware will export tables describing the devices
+that are populated on a given board.   Without such tables, often the
+only way for system setup code to set up the correct devices is to build
+a kernel for a specific target board.  Such board-specific kernels are
+common with embedded and custom systems development.
+
+In many cases, the memory and IRQ resources associated with the platform
+device are not enough to let the device's driver work.  Board setup code
+will often provide additional information using the device's platform_data
+field to hold additional information.
+
+Embedded systems frequently need one or more clocks for platform devices,
+which are normally kept off until they're actively needed (to save power).
+System setup also associates those clocks with the device, so that that
+calls to clk_get(&pdev->dev, clock_name) return them as needed.
+
+
+Legacy Drivers:  Device Probing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Some drivers are not fully converted to the driver model, because they take
+on a non-driver role:  the driver registers its platform device, rather than
+leaving that for system infrastructure.  Such drivers can't be hotplugged
+or coldplugged, since those mechanisms require device creation to be in a
+different system component than the driver.
+
+The only "good" reason for this is to handle older system designs which, like
+original IBM PCs, rely on error-prone "probe-the-hardware" models for hardware
+configuration.  Newer systems have largely abandoned that model, in favor of
+bus-level support for dynamic configuration (PCI, USB), or device tables
+provided by the boot firmware (e.g. PNPACPI on x86).  There are too many
+conflicting options about what might be where, and even educated guesses by
+an operating system will be wrong often enough to make trouble.
+
+This style of driver is discouraged.  If you're updating such a driver,
+please try to move the device enumeration to a more appropriate location,
+outside the driver.  This will usually be cleanup, since such drivers
+tend to already have "normal" modes, such as ones using device nodes that
+were created by PNP or by platform device setup.
+
+None the less, there are some APIs to support such legacy drivers.  Avoid
+using these calls except with such hotplug-deficient drivers::
+
+	struct platform_device *platform_device_alloc(
+			const char *name, int id);
+
+You can use platform_device_alloc() to dynamically allocate a device, which
+you will then initialize with resources and platform_device_register().
+A better solution is usually::
+
+	struct platform_device *platform_device_register_simple(
+			const char *name, int id,
+			struct resource *res, unsigned int nres);
+
+You can use platform_device_register_simple() as a one-step call to allocate
+and register a device.
+
+
+Device Naming and Driver Binding
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The platform_device.dev.bus_id is the canonical name for the devices.
+It's built from two components:
+
+    * platform_device.name ... which is also used to for driver matching.
+
+    * platform_device.id ... the device instance number, or else "-1"
+      to indicate there's only one.
+
+These are concatenated, so name/id "serial"/0 indicates bus_id "serial.0", and
+"serial/3" indicates bus_id "serial.3"; both would use the platform_driver
+named "serial".  While "my_rtc"/-1 would be bus_id "my_rtc" (no instance id)
+and use the platform_driver called "my_rtc".
+
+Driver binding is performed automatically by the driver core, invoking
+driver probe() after finding a match between device and driver.  If the
+probe() succeeds, the driver and device are bound as usual.  There are
+three different ways to find such a match:
+
+    - Whenever a device is registered, the drivers for that bus are
+      checked for matches.  Platform devices should be registered very
+      early during system boot.
+
+    - When a driver is registered using platform_driver_register(), all
+      unbound devices on that bus are checked for matches.  Drivers
+      usually register later during booting, or by module loading.
+
+    - Registering a driver using platform_driver_probe() works just like
+      using platform_driver_register(), except that the driver won't
+      be probed later if another device registers.  (Which is OK, since
+      this interface is only for use with non-hotpluggable devices.)
+
+
+Early Platform Devices and Drivers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The early platform interfaces provide platform data to platform device
+drivers early on during the system boot. The code is built on top of the
+early_param() command line parsing and can be executed very early on.
+
+Example: "earlyprintk" class early serial console in 6 steps
+
+1. Registering early platform device data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The architecture code registers platform device data using the function
+early_platform_add_devices(). In the case of early serial console this
+should be hardware configuration for the serial port. Devices registered
+at this point will later on be matched against early platform drivers.
+
+2. Parsing kernel command line
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The architecture code calls parse_early_param() to parse the kernel
+command line. This will execute all matching early_param() callbacks.
+User specified early platform devices will be registered at this point.
+For the early serial console case the user can specify port on the
+kernel command line as "earlyprintk=serial.0" where "earlyprintk" is
+the class string, "serial" is the name of the platform driver and
+0 is the platform device id. If the id is -1 then the dot and the
+id can be omitted.
+
+3. Installing early platform drivers belonging to a certain class
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The architecture code may optionally force registration of all early
+platform drivers belonging to a certain class using the function
+early_platform_driver_register_all(). User specified devices from
+step 2 have priority over these. This step is omitted by the serial
+driver example since the early serial driver code should be disabled
+unless the user has specified port on the kernel command line.
+
+4. Early platform driver registration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Compiled-in platform drivers making use of early_platform_init() are
+automatically registered during step 2 or 3. The serial driver example
+should use early_platform_init("earlyprintk", &platform_driver).
+
+5. Probing of early platform drivers belonging to a certain class
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The architecture code calls early_platform_driver_probe() to match
+registered early platform devices associated with a certain class with
+registered early platform drivers. Matched devices will get probed().
+This step can be executed at any point during the early boot. As soon
+as possible may be good for the serial port case.
+
+6. Inside the early platform driver probe()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The driver code needs to take special care during early boot, especially
+when it comes to memory allocation and interrupt registration. The code
+in the probe() function can use is_early_platform_device() to check if
+it is called at early platform device or at the regular platform device
+time. The early serial driver performs register_console() at this point.
+
+For further information, see <linux/platform_device.h>.
diff --git a/Documentation/driver-api/driver-model/porting.rst b/Documentation/driver-api/driver-model/porting.rst
new file mode 100644
index 000000000000..931ea879af3f
--- /dev/null
+++ b/Documentation/driver-api/driver-model/porting.rst
@@ -0,0 +1,448 @@
+=======================================
+Porting Drivers to the New Driver Model
+=======================================
+
+Patrick Mochel
+
+7 January 2003
+
+
+Overview
+
+Please refer to `Documentation/driver-api/driver-model/*.rst` for definitions of
+various driver types and concepts.
+
+Most of the work of porting devices drivers to the new model happens
+at the bus driver layer. This was intentional, to minimize the
+negative effect on kernel drivers, and to allow a gradual transition
+of bus drivers.
+
+In a nutshell, the driver model consists of a set of objects that can
+be embedded in larger, bus-specific objects. Fields in these generic
+objects can replace fields in the bus-specific objects.
+
+The generic objects must be registered with the driver model core. By
+doing so, they will exported via the sysfs filesystem. sysfs can be
+mounted by doing::
+
+	# mount -t sysfs sysfs /sys
+
+
+
+The Process
+
+Step 0: Read include/linux/device.h for object and function definitions.
+
+Step 1: Registering the bus driver.
+
+
+- Define a struct bus_type for the bus driver::
+
+    struct bus_type pci_bus_type = {
+          .name           = "pci",
+    };
+
+
+- Register the bus type.
+
+  This should be done in the initialization function for the bus type,
+  which is usually the module_init(), or equivalent, function::
+
+    static int __init pci_driver_init(void)
+    {
+            return bus_register(&pci_bus_type);
+    }
+
+    subsys_initcall(pci_driver_init);
+
+
+  The bus type may be unregistered (if the bus driver may be compiled
+  as a module) by doing::
+
+     bus_unregister(&pci_bus_type);
+
+
+- Export the bus type for others to use.
+
+  Other code may wish to reference the bus type, so declare it in a
+  shared header file and export the symbol.
+
+From include/linux/pci.h::
+
+  extern struct bus_type pci_bus_type;
+
+
+From file the above code appears in::
+
+  EXPORT_SYMBOL(pci_bus_type);
+
+
+
+- This will cause the bus to show up in /sys/bus/pci/ with two
+  subdirectories: 'devices' and 'drivers'::
+
+    # tree -d /sys/bus/pci/
+    /sys/bus/pci/
+    |-- devices
+    `-- drivers
+
+
+
+Step 2: Registering Devices.
+
+struct device represents a single device. It mainly contains metadata
+describing the relationship the device has to other entities.
+
+
+- Embed a struct device in the bus-specific device type::
+
+
+    struct pci_dev {
+           ...
+           struct  device  dev;            /* Generic device interface */
+           ...
+    };
+
+  It is recommended that the generic device not be the first item in
+  the struct to discourage programmers from doing mindless casts
+  between the object types. Instead macros, or inline functions,
+  should be created to convert from the generic object type::
+
+
+    #define to_pci_dev(n) container_of(n, struct pci_dev, dev)
+
+    or
+
+    static inline struct pci_dev * to_pci_dev(struct kobject * kobj)
+    {
+	return container_of(n, struct pci_dev, dev);
+    }
+
+  This allows the compiler to verify type-safety of the operations
+  that are performed (which is Good).
+
+
+- Initialize the device on registration.
+
+  When devices are discovered or registered with the bus type, the
+  bus driver should initialize the generic device. The most important
+  things to initialize are the bus_id, parent, and bus fields.
+
+  The bus_id is an ASCII string that contains the device's address on
+  the bus. The format of this string is bus-specific. This is
+  necessary for representing devices in sysfs.
+
+  parent is the physical parent of the device. It is important that
+  the bus driver sets this field correctly.
+
+  The driver model maintains an ordered list of devices that it uses
+  for power management. This list must be in order to guarantee that
+  devices are shutdown before their physical parents, and vice versa.
+  The order of this list is determined by the parent of registered
+  devices.
+
+  Also, the location of the device's sysfs directory depends on a
+  device's parent. sysfs exports a directory structure that mirrors
+  the device hierarchy. Accurately setting the parent guarantees that
+  sysfs will accurately represent the hierarchy.
+
+  The device's bus field is a pointer to the bus type the device
+  belongs to. This should be set to the bus_type that was declared
+  and initialized before.
+
+  Optionally, the bus driver may set the device's name and release
+  fields.
+
+  The name field is an ASCII string describing the device, like
+
+     "ATI Technologies Inc Radeon QD"
+
+  The release field is a callback that the driver model core calls
+  when the device has been removed, and all references to it have
+  been released. More on this in a moment.
+
+
+- Register the device.
+
+  Once the generic device has been initialized, it can be registered
+  with the driver model core by doing::
+
+       device_register(&dev->dev);
+
+  It can later be unregistered by doing::
+
+       device_unregister(&dev->dev);
+
+  This should happen on buses that support hotpluggable devices.
+  If a bus driver unregisters a device, it should not immediately free
+  it. It should instead wait for the driver model core to call the
+  device's release method, then free the bus-specific object.
+  (There may be other code that is currently referencing the device
+  structure, and it would be rude to free the device while that is
+  happening).
+
+
+  When the device is registered, a directory in sysfs is created.
+  The PCI tree in sysfs looks like::
+
+    /sys/devices/pci0/
+    |-- 00:00.0
+    |-- 00:01.0
+    |   `-- 01:00.0
+    |-- 00:02.0
+    |   `-- 02:1f.0
+    |       `-- 03:00.0
+    |-- 00:1e.0
+    |   `-- 04:04.0
+    |-- 00:1f.0
+    |-- 00:1f.1
+    |   |-- ide0
+    |   |   |-- 0.0
+    |   |   `-- 0.1
+    |   `-- ide1
+    |       `-- 1.0
+    |-- 00:1f.2
+    |-- 00:1f.3
+    `-- 00:1f.5
+
+  Also, symlinks are created in the bus's 'devices' directory
+  that point to the device's directory in the physical hierarchy::
+
+    /sys/bus/pci/devices/
+    |-- 00:00.0 -> ../../../devices/pci0/00:00.0
+    |-- 00:01.0 -> ../../../devices/pci0/00:01.0
+    |-- 00:02.0 -> ../../../devices/pci0/00:02.0
+    |-- 00:1e.0 -> ../../../devices/pci0/00:1e.0
+    |-- 00:1f.0 -> ../../../devices/pci0/00:1f.0
+    |-- 00:1f.1 -> ../../../devices/pci0/00:1f.1
+    |-- 00:1f.2 -> ../../../devices/pci0/00:1f.2
+    |-- 00:1f.3 -> ../../../devices/pci0/00:1f.3
+    |-- 00:1f.5 -> ../../../devices/pci0/00:1f.5
+    |-- 01:00.0 -> ../../../devices/pci0/00:01.0/01:00.0
+    |-- 02:1f.0 -> ../../../devices/pci0/00:02.0/02:1f.0
+    |-- 03:00.0 -> ../../../devices/pci0/00:02.0/02:1f.0/03:00.0
+    `-- 04:04.0 -> ../../../devices/pci0/00:1e.0/04:04.0
+
+
+
+Step 3: Registering Drivers.
+
+struct device_driver is a simple driver structure that contains a set
+of operations that the driver model core may call.
+
+
+- Embed a struct device_driver in the bus-specific driver.
+
+  Just like with devices, do something like::
+
+    struct pci_driver {
+           ...
+           struct device_driver    driver;
+    };
+
+
+- Initialize the generic driver structure.
+
+  When the driver registers with the bus (e.g. doing pci_register_driver()),
+  initialize the necessary fields of the driver: the name and bus
+  fields.
+
+
+- Register the driver.
+
+  After the generic driver has been initialized, call::
+
+	driver_register(&drv->driver);
+
+  to register the driver with the core.
+
+  When the driver is unregistered from the bus, unregister it from the
+  core by doing::
+
+        driver_unregister(&drv->driver);
+
+  Note that this will block until all references to the driver have
+  gone away. Normally, there will not be any.
+
+
+- Sysfs representation.
+
+  Drivers are exported via sysfs in their bus's 'driver's directory.
+  For example::
+
+    /sys/bus/pci/drivers/
+    |-- 3c59x
+    |-- Ensoniq AudioPCI
+    |-- agpgart-amdk7
+    |-- e100
+    `-- serial
+
+
+Step 4: Define Generic Methods for Drivers.
+
+struct device_driver defines a set of operations that the driver model
+core calls. Most of these operations are probably similar to
+operations the bus already defines for drivers, but taking different
+parameters.
+
+It would be difficult and tedious to force every driver on a bus to
+simultaneously convert their drivers to generic format. Instead, the
+bus driver should define single instances of the generic methods that
+forward call to the bus-specific drivers. For instance::
+
+
+  static int pci_device_remove(struct device * dev)
+  {
+          struct pci_dev * pci_dev = to_pci_dev(dev);
+          struct pci_driver * drv = pci_dev->driver;
+
+          if (drv) {
+                  if (drv->remove)
+                          drv->remove(pci_dev);
+                  pci_dev->driver = NULL;
+          }
+          return 0;
+  }
+
+
+The generic driver should be initialized with these methods before it
+is registered::
+
+        /* initialize common driver fields */
+        drv->driver.name = drv->name;
+        drv->driver.bus = &pci_bus_type;
+        drv->driver.probe = pci_device_probe;
+        drv->driver.resume = pci_device_resume;
+        drv->driver.suspend = pci_device_suspend;
+        drv->driver.remove = pci_device_remove;
+
+        /* register with core */
+        driver_register(&drv->driver);
+
+
+Ideally, the bus should only initialize the fields if they are not
+already set. This allows the drivers to implement their own generic
+methods.
+
+
+Step 5: Support generic driver binding.
+
+The model assumes that a device or driver can be dynamically
+registered with the bus at any time. When registration happens,
+devices must be bound to a driver, or drivers must be bound to all
+devices that it supports.
+
+A driver typically contains a list of device IDs that it supports. The
+bus driver compares these IDs to the IDs of devices registered with it.
+The format of the device IDs, and the semantics for comparing them are
+bus-specific, so the generic model does attempt to generalize them.
+
+Instead, a bus may supply a method in struct bus_type that does the
+comparison::
+
+  int (*match)(struct device * dev, struct device_driver * drv);
+
+match should return positive value if the driver supports the device,
+and zero otherwise. It may also return error code (for example
+-EPROBE_DEFER) if determining that given driver supports the device is
+not possible.
+
+When a device is registered, the bus's list of drivers is iterated
+over. bus->match() is called for each one until a match is found.
+
+When a driver is registered, the bus's list of devices is iterated
+over. bus->match() is called for each device that is not already
+claimed by a driver.
+
+When a device is successfully bound to a driver, device->driver is
+set, the device is added to a per-driver list of devices, and a
+symlink is created in the driver's sysfs directory that points to the
+device's physical directory::
+
+  /sys/bus/pci/drivers/
+  |-- 3c59x
+  |   `-- 00:0b.0 -> ../../../../devices/pci0/00:0b.0
+  |-- Ensoniq AudioPCI
+  |-- agpgart-amdk7
+  |   `-- 00:00.0 -> ../../../../devices/pci0/00:00.0
+  |-- e100
+  |   `-- 00:0c.0 -> ../../../../devices/pci0/00:0c.0
+  `-- serial
+
+
+This driver binding should replace the existing driver binding
+mechanism the bus currently uses.
+
+
+Step 6: Supply a hotplug callback.
+
+Whenever a device is registered with the driver model core, the
+userspace program /sbin/hotplug is called to notify userspace.
+Users can define actions to perform when a device is inserted or
+removed.
+
+The driver model core passes several arguments to userspace via
+environment variables, including
+
+- ACTION: set to 'add' or 'remove'
+- DEVPATH: set to the device's physical path in sysfs.
+
+A bus driver may also supply additional parameters for userspace to
+consume. To do this, a bus must implement the 'hotplug' method in
+struct bus_type::
+
+     int (*hotplug) (struct device *dev, char **envp,
+                     int num_envp, char *buffer, int buffer_size);
+
+This is called immediately before /sbin/hotplug is executed.
+
+
+Step 7: Cleaning up the bus driver.
+
+The generic bus, device, and driver structures provide several fields
+that can replace those defined privately to the bus driver.
+
+- Device list.
+
+struct bus_type contains a list of all devices registered with the bus
+type. This includes all devices on all instances of that bus type.
+An internal list that the bus uses may be removed, in favor of using
+this one.
+
+The core provides an iterator to access these devices::
+
+  int bus_for_each_dev(struct bus_type * bus, struct device * start,
+                       void * data, int (*fn)(struct device *, void *));
+
+
+- Driver list.
+
+struct bus_type also contains a list of all drivers registered with
+it. An internal list of drivers that the bus driver maintains may
+be removed in favor of using the generic one.
+
+The drivers may be iterated over, like devices::
+
+  int bus_for_each_drv(struct bus_type * bus, struct device_driver * start,
+                       void * data, int (*fn)(struct device_driver *, void *));
+
+
+Please see drivers/base/bus.c for more information.
+
+
+- rwsem
+
+struct bus_type contains an rwsem that protects all core accesses to
+the device and driver lists. This can be used by the bus driver
+internally, and should be used when accessing the device or driver
+lists the bus maintains.
+
+
+- Device and driver fields.
+
+Some of the fields in struct device and struct device_driver duplicate
+fields in the bus-specific representations of these objects. Feel free
+to remove the bus-specific ones and favor the generic ones. Note
+though, that this will likely mean fixing up all the drivers that
+reference the bus-specific fields (though those should all be 1-line
+changes).
diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index 349f2dc33029..921c71a3d683 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -399,7 +399,7 @@ symbol:
   will pass the struct gpio_chip* for the chip to all IRQ callbacks, so the
   callbacks need to embed the gpio_chip in its state container and obtain a
   pointer to the container using container_of().
-  (See Documentation/driver-model/design-patterns.rst)
+  (See Documentation/driver-api/driver-model/design-patterns.rst)
 
 - gpiochip_irqchip_add_nested(): adds a nested cascaded irqchip to a gpiochip,
   as discussed above regarding different types of cascaded irqchips. The
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index b4c993ff7655..9fb03b7bdeb1 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -14,6 +14,7 @@ available subsections can be seen below.
 .. toctree::
    :maxdepth: 2
 
+   driver-model/index
    basics
    infrastructure
    early-userspace/index
diff --git a/Documentation/driver-model/binding.rst b/Documentation/driver-model/binding.rst
deleted file mode 100644
index 7ea1d7a41e1d..000000000000
--- a/Documentation/driver-model/binding.rst
+++ /dev/null
@@ -1,98 +0,0 @@
-==============
-Driver Binding
-==============
-
-Driver binding is the process of associating a device with a device
-driver that can control it. Bus drivers have typically handled this
-because there have been bus-specific structures to represent the
-devices and the drivers. With generic device and device driver
-structures, most of the binding can take place using common code.
-
-
-Bus
-~~~
-
-The bus type structure contains a list of all devices that are on that bus
-type in the system. When device_register is called for a device, it is
-inserted into the end of this list. The bus object also contains a
-list of all drivers of that bus type. When driver_register is called
-for a driver, it is inserted at the end of this list. These are the
-two events which trigger driver binding.
-
-
-device_register
-~~~~~~~~~~~~~~~
-
-When a new device is added, the bus's list of drivers is iterated over
-to find one that supports it. In order to determine that, the device
-ID of the device must match one of the device IDs that the driver
-supports. The format and semantics for comparing IDs is bus-specific.
-Instead of trying to derive a complex state machine and matching
-algorithm, it is up to the bus driver to provide a callback to compare
-a device against the IDs of a driver. The bus returns 1 if a match was
-found; 0 otherwise.
-
-int match(struct device * dev, struct device_driver * drv);
-
-If a match is found, the device's driver field is set to the driver
-and the driver's probe callback is called. This gives the driver a
-chance to verify that it really does support the hardware, and that
-it's in a working state.
-
-Device Class
-~~~~~~~~~~~~
-
-Upon the successful completion of probe, the device is registered with
-the class to which it belongs. Device drivers belong to one and only one
-class, and that is set in the driver's devclass field.
-devclass_add_device is called to enumerate the device within the class
-and actually register it with the class, which happens with the
-class's register_dev callback.
-
-
-Driver
-~~~~~~
-
-When a driver is attached to a device, the device is inserted into the
-driver's list of devices.
-
-
-sysfs
-~~~~~
-
-A symlink is created in the bus's 'devices' directory that points to
-the device's directory in the physical hierarchy.
-
-A symlink is created in the driver's 'devices' directory that points
-to the device's directory in the physical hierarchy.
-
-A directory for the device is created in the class's directory. A
-symlink is created in that directory that points to the device's
-physical location in the sysfs tree.
-
-A symlink can be created (though this isn't done yet) in the device's
-physical directory to either its class directory, or the class's
-top-level directory. One can also be created to point to its driver's
-directory also.
-
-
-driver_register
-~~~~~~~~~~~~~~~
-
-The process is almost identical for when a new driver is added.
-The bus's list of devices is iterated over to find a match. Devices
-that already have a driver are skipped. All the devices are iterated
-over, to bind as many devices as possible to the driver.
-
-
-Removal
-~~~~~~~
-
-When a device is removed, the reference count for it will eventually
-go to 0. When it does, the remove callback of the driver is called. It
-is removed from the driver's list of devices and the reference count
-of the driver is decremented. All symlinks between the two are removed.
-
-When a driver is removed, the list of devices that it supports is
-iterated over, and the driver's remove callback is called for each
-one. The device is removed from that list and the symlinks removed.
diff --git a/Documentation/driver-model/bus.rst b/Documentation/driver-model/bus.rst
deleted file mode 100644
index 016b15a6e8ea..000000000000
--- a/Documentation/driver-model/bus.rst
+++ /dev/null
@@ -1,146 +0,0 @@
-=========
-Bus Types
-=========
-
-Definition
-~~~~~~~~~~
-See the kerneldoc for the struct bus_type.
-
-int bus_register(struct bus_type * bus);
-
-
-Declaration
-~~~~~~~~~~~
-
-Each bus type in the kernel (PCI, USB, etc) should declare one static
-object of this type. They must initialize the name field, and may
-optionally initialize the match callback::
-
-   struct bus_type pci_bus_type = {
-          .name	= "pci",
-          .match	= pci_bus_match,
-   };
-
-The structure should be exported to drivers in a header file:
-
-extern struct bus_type pci_bus_type;
-
-
-Registration
-~~~~~~~~~~~~
-
-When a bus driver is initialized, it calls bus_register. This
-initializes the rest of the fields in the bus object and inserts it
-into a global list of bus types. Once the bus object is registered,
-the fields in it are usable by the bus driver.
-
-
-Callbacks
-~~~~~~~~~
-
-match(): Attaching Drivers to Devices
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The format of device ID structures and the semantics for comparing
-them are inherently bus-specific. Drivers typically declare an array
-of device IDs of devices they support that reside in a bus-specific
-driver structure.
-
-The purpose of the match callback is to give the bus an opportunity to
-determine if a particular driver supports a particular device by
-comparing the device IDs the driver supports with the device ID of a
-particular device, without sacrificing bus-specific functionality or
-type-safety.
-
-When a driver is registered with the bus, the bus's list of devices is
-iterated over, and the match callback is called for each device that
-does not have a driver associated with it.
-
-
-
-Device and Driver Lists
-~~~~~~~~~~~~~~~~~~~~~~~
-
-The lists of devices and drivers are intended to replace the local
-lists that many buses keep. They are lists of struct devices and
-struct device_drivers, respectively. Bus drivers are free to use the
-lists as they please, but conversion to the bus-specific type may be
-necessary.
-
-The LDM core provides helper functions for iterating over each list::
-
-  int bus_for_each_dev(struct bus_type * bus, struct device * start,
-		       void * data,
-		       int (*fn)(struct device *, void *));
-
-  int bus_for_each_drv(struct bus_type * bus, struct device_driver * start,
-		       void * data, int (*fn)(struct device_driver *, void *));
-
-These helpers iterate over the respective list, and call the callback
-for each device or driver in the list. All list accesses are
-synchronized by taking the bus's lock (read currently). The reference
-count on each object in the list is incremented before the callback is
-called; it is decremented after the next object has been obtained. The
-lock is not held when calling the callback.
-
-
-sysfs
-~~~~~~~~
-There is a top-level directory named 'bus'.
-
-Each bus gets a directory in the bus directory, along with two default
-directories::
-
-	/sys/bus/pci/
-	|-- devices
-	`-- drivers
-
-Drivers registered with the bus get a directory in the bus's drivers
-directory::
-
-	/sys/bus/pci/
-	|-- devices
-	`-- drivers
-	    |-- Intel ICH
-	    |-- Intel ICH Joystick
-	    |-- agpgart
-	    `-- e100
-
-Each device that is discovered on a bus of that type gets a symlink in
-the bus's devices directory to the device's directory in the physical
-hierarchy::
-
-	/sys/bus/pci/
-	|-- devices
-	|   |-- 00:00.0 -> ../../../root/pci0/00:00.0
-	|   |-- 00:01.0 -> ../../../root/pci0/00:01.0
-	|   `-- 00:02.0 -> ../../../root/pci0/00:02.0
-	`-- drivers
-
-
-Exporting Attributes
-~~~~~~~~~~~~~~~~~~~~
-
-::
-
-  struct bus_attribute {
-	struct attribute	attr;
-	ssize_t (*show)(struct bus_type *, char * buf);
-	ssize_t (*store)(struct bus_type *, const char * buf, size_t count);
-  };
-
-Bus drivers can export attributes using the BUS_ATTR_RW macro that works
-similarly to the DEVICE_ATTR_RW macro for devices. For example, a
-definition like this::
-
-	static BUS_ATTR_RW(debug);
-
-is equivalent to declaring::
-
-	static bus_attribute bus_attr_debug;
-
-This can then be used to add and remove the attribute from the bus's
-sysfs directory using::
-
-	int bus_create_file(struct bus_type *, struct bus_attribute *);
-	void bus_remove_file(struct bus_type *, struct bus_attribute *);
diff --git a/Documentation/driver-model/class.rst b/Documentation/driver-model/class.rst
deleted file mode 100644
index fff55b80e86a..000000000000
--- a/Documentation/driver-model/class.rst
+++ /dev/null
@@ -1,149 +0,0 @@
-==============
-Device Classes
-==============
-
-Introduction
-~~~~~~~~~~~~
-A device class describes a type of device, like an audio or network
-device. The following device classes have been identified:
-
-<Insert List of Device Classes Here>
-
-
-Each device class defines a set of semantics and a programming interface
-that devices of that class adhere to. Device drivers are the
-implementation of that programming interface for a particular device on
-a particular bus.
-
-Device classes are agnostic with respect to what bus a device resides
-on.
-
-
-Programming Interface
-~~~~~~~~~~~~~~~~~~~~~
-The device class structure looks like::
-
-
-  typedef int (*devclass_add)(struct device *);
-  typedef void (*devclass_remove)(struct device *);
-
-See the kerneldoc for the struct class.
-
-A typical device class definition would look like::
-
-  struct device_class input_devclass = {
-        .name		= "input",
-        .add_device	= input_add_device,
-	.remove_device	= input_remove_device,
-  };
-
-Each device class structure should be exported in a header file so it
-can be used by drivers, extensions and interfaces.
-
-Device classes are registered and unregistered with the core using::
-
-  int devclass_register(struct device_class * cls);
-  void devclass_unregister(struct device_class * cls);
-
-
-Devices
-~~~~~~~
-As devices are bound to drivers, they are added to the device class
-that the driver belongs to. Before the driver model core, this would
-typically happen during the driver's probe() callback, once the device
-has been initialized. It now happens after the probe() callback
-finishes from the core.
-
-The device is enumerated in the class. Each time a device is added to
-the class, the class's devnum field is incremented and assigned to the
-device. The field is never decremented, so if the device is removed
-from the class and re-added, it will receive a different enumerated
-value.
-
-The class is allowed to create a class-specific structure for the
-device and store it in the device's class_data pointer.
-
-There is no list of devices in the device class. Each driver has a
-list of devices that it supports. The device class has a list of
-drivers of that particular class. To access all of the devices in the
-class, iterate over the device lists of each driver in the class.
-
-
-Device Drivers
-~~~~~~~~~~~~~~
-Device drivers are added to device classes when they are registered
-with the core. A driver specifies the class it belongs to by setting
-the struct device_driver::devclass field.
-
-
-sysfs directory structure
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-There is a top-level sysfs directory named 'class'.
-
-Each class gets a directory in the class directory, along with two
-default subdirectories::
-
-        class/
-        `-- input
-            |-- devices
-            `-- drivers
-
-
-Drivers registered with the class get a symlink in the drivers/ directory
-that points to the driver's directory (under its bus directory)::
-
-   class/
-   `-- input
-       |-- devices
-       `-- drivers
-           `-- usb:usb_mouse -> ../../../bus/drivers/usb_mouse/
-
-
-Each device gets a symlink in the devices/ directory that points to the
-device's directory in the physical hierarchy::
-
-   class/
-   `-- input
-       |-- devices
-       |   `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/
-       `-- drivers
-
-
-Exporting Attributes
-~~~~~~~~~~~~~~~~~~~~
-
-::
-
-  struct devclass_attribute {
-        struct attribute        attr;
-        ssize_t (*show)(struct device_class *, char * buf, size_t count, loff_t off);
-        ssize_t (*store)(struct device_class *, const char * buf, size_t count, loff_t off);
-  };
-
-Class drivers can export attributes using the DEVCLASS_ATTR macro that works
-similarly to the DEVICE_ATTR macro for devices. For example, a definition
-like this::
-
-  static DEVCLASS_ATTR(debug,0644,show_debug,store_debug);
-
-is equivalent to declaring::
-
-  static devclass_attribute devclass_attr_debug;
-
-The bus driver can add and remove the attribute from the class's
-sysfs directory using::
-
-  int devclass_create_file(struct device_class *, struct devclass_attribute *);
-  void devclass_remove_file(struct device_class *, struct devclass_attribute *);
-
-In the example above, the file will be named 'debug' in placed in the
-class's directory in sysfs.
-
-
-Interfaces
-~~~~~~~~~~
-There may exist multiple mechanisms for accessing the same device of a
-particular class type. Device interfaces describe these mechanisms.
-
-When a device is added to a device class, the core attempts to add it
-to every interface that is registered with the device class.
diff --git a/Documentation/driver-model/design-patterns.rst b/Documentation/driver-model/design-patterns.rst
deleted file mode 100644
index 41eb8f41f7dd..000000000000
--- a/Documentation/driver-model/design-patterns.rst
+++ /dev/null
@@ -1,116 +0,0 @@
-=============================
-Device Driver Design Patterns
-=============================
-
-This document describes a few common design patterns found in device drivers.
-It is likely that subsystem maintainers will ask driver developers to
-conform to these design patterns.
-
-1. State Container
-2. container_of()
-
-
-1. State Container
-~~~~~~~~~~~~~~~~~~
-
-While the kernel contains a few device drivers that assume that they will
-only be probed() once on a certain system (singletons), it is custom to assume
-that the device the driver binds to will appear in several instances. This
-means that the probe() function and all callbacks need to be reentrant.
-
-The most common way to achieve this is to use the state container design
-pattern. It usually has this form::
-
-  struct foo {
-      spinlock_t lock; /* Example member */
-      (...)
-  };
-
-  static int foo_probe(...)
-  {
-      struct foo *foo;
-
-      foo = devm_kzalloc(dev, sizeof(*foo), GFP_KERNEL);
-      if (!foo)
-          return -ENOMEM;
-      spin_lock_init(&foo->lock);
-      (...)
-  }
-
-This will create an instance of struct foo in memory every time probe() is
-called. This is our state container for this instance of the device driver.
-Of course it is then necessary to always pass this instance of the
-state around to all functions that need access to the state and its members.
-
-For example, if the driver is registering an interrupt handler, you would
-pass around a pointer to struct foo like this::
-
-  static irqreturn_t foo_handler(int irq, void *arg)
-  {
-      struct foo *foo = arg;
-      (...)
-  }
-
-  static int foo_probe(...)
-  {
-      struct foo *foo;
-
-      (...)
-      ret = request_irq(irq, foo_handler, 0, "foo", foo);
-  }
-
-This way you always get a pointer back to the correct instance of foo in
-your interrupt handler.
-
-
-2. container_of()
-~~~~~~~~~~~~~~~~~
-
-Continuing on the above example we add an offloaded work::
-
-  struct foo {
-      spinlock_t lock;
-      struct workqueue_struct *wq;
-      struct work_struct offload;
-      (...)
-  };
-
-  static void foo_work(struct work_struct *work)
-  {
-      struct foo *foo = container_of(work, struct foo, offload);
-
-      (...)
-  }
-
-  static irqreturn_t foo_handler(int irq, void *arg)
-  {
-      struct foo *foo = arg;
-
-      queue_work(foo->wq, &foo->offload);
-      (...)
-  }
-
-  static int foo_probe(...)
-  {
-      struct foo *foo;
-
-      foo->wq = create_singlethread_workqueue("foo-wq");
-      INIT_WORK(&foo->offload, foo_work);
-      (...)
-  }
-
-The design pattern is the same for an hrtimer or something similar that will
-return a single argument which is a pointer to a struct member in the
-callback.
-
-container_of() is a macro defined in <linux/kernel.h>
-
-What container_of() does is to obtain a pointer to the containing struct from
-a pointer to a member by a simple subtraction using the offsetof() macro from
-standard C, which allows something similar to object oriented behaviours.
-Notice that the contained member must not be a pointer, but an actual member
-for this to work.
-
-We can see here that we avoid having global pointers to our struct foo *
-instance this way, while still keeping the number of parameters passed to the
-work function to a single pointer.
diff --git a/Documentation/driver-model/device.rst b/Documentation/driver-model/device.rst
deleted file mode 100644
index 2b868d49d349..000000000000
--- a/Documentation/driver-model/device.rst
+++ /dev/null
@@ -1,109 +0,0 @@
-==========================
-The Basic Device Structure
-==========================
-
-See the kerneldoc for the struct device.
-
-
-Programming Interface
-~~~~~~~~~~~~~~~~~~~~~
-The bus driver that discovers the device uses this to register the
-device with the core::
-
-  int device_register(struct device * dev);
-
-The bus should initialize the following fields:
-
-    - parent
-    - name
-    - bus_id
-    - bus
-
-A device is removed from the core when its reference count goes to
-0. The reference count can be adjusted using::
-
-  struct device * get_device(struct device * dev);
-  void put_device(struct device * dev);
-
-get_device() will return a pointer to the struct device passed to it
-if the reference is not already 0 (if it's in the process of being
-removed already).
-
-A driver can access the lock in the device structure using::
-
-  void lock_device(struct device * dev);
-  void unlock_device(struct device * dev);
-
-
-Attributes
-~~~~~~~~~~
-
-::
-
-  struct device_attribute {
-	struct attribute	attr;
-	ssize_t (*show)(struct device *dev, struct device_attribute *attr,
-			char *buf);
-	ssize_t (*store)(struct device *dev, struct device_attribute *attr,
-			 const char *buf, size_t count);
-  };
-
-Attributes of devices can be exported by a device driver through sysfs.
-
-Please see Documentation/filesystems/sysfs.txt for more information
-on how sysfs works.
-
-As explained in Documentation/kobject.txt, device attributes must be
-created before the KOBJ_ADD uevent is generated. The only way to realize
-that is by defining an attribute group.
-
-Attributes are declared using a macro called DEVICE_ATTR::
-
-  #define DEVICE_ATTR(name,mode,show,store)
-
-Example:::
-
-  static DEVICE_ATTR(type, 0444, show_type, NULL);
-  static DEVICE_ATTR(power, 0644, show_power, store_power);
-
-This declares two structures of type struct device_attribute with respective
-names 'dev_attr_type' and 'dev_attr_power'. These two attributes can be
-organized as follows into a group::
-
-  static struct attribute *dev_attrs[] = {
-	&dev_attr_type.attr,
-	&dev_attr_power.attr,
-	NULL,
-  };
-
-  static struct attribute_group dev_attr_group = {
-	.attrs = dev_attrs,
-  };
-
-  static const struct attribute_group *dev_attr_groups[] = {
-	&dev_attr_group,
-	NULL,
-  };
-
-This array of groups can then be associated with a device by setting the
-group pointer in struct device before device_register() is invoked::
-
-        dev->groups = dev_attr_groups;
-        device_register(dev);
-
-The device_register() function will use the 'groups' pointer to create the
-device attributes and the device_unregister() function will use this pointer
-to remove the device attributes.
-
-Word of warning:  While the kernel allows device_create_file() and
-device_remove_file() to be called on a device at any time, userspace has
-strict expectations on when attributes get created.  When a new device is
-registered in the kernel, a uevent is generated to notify userspace (like
-udev) that a new device is available.  If attributes are added after the
-device is registered, then userspace won't get notified and userspace will
-not know about the new attributes.
-
-This is important for device driver that need to publish additional
-attributes for a device at driver probe time.  If the device driver simply
-calls device_create_file() on the device structure passed to it, then
-userspace will never be notified of the new attributes.
diff --git a/Documentation/driver-model/devres.rst b/Documentation/driver-model/devres.rst
deleted file mode 100644
index 4ac99122b5f1..000000000000
--- a/Documentation/driver-model/devres.rst
+++ /dev/null
@@ -1,414 +0,0 @@
-================================
-Devres - Managed Device Resource
-================================
-
-Tejun Heo	<teheo@suse.de>
-
-First draft	10 January 2007
-
-.. contents
-
-   1. Intro			: Huh? Devres?
-   2. Devres			: Devres in a nutshell
-   3. Devres Group		: Group devres'es and release them together
-   4. Details			: Life time rules, calling context, ...
-   5. Overhead			: How much do we have to pay for this?
-   6. List of managed interfaces: Currently implemented managed interfaces
-
-
-1. Intro
---------
-
-devres came up while trying to convert libata to use iomap.  Each
-iomapped address should be kept and unmapped on driver detach.  For
-example, a plain SFF ATA controller (that is, good old PCI IDE) in
-native mode makes use of 5 PCI BARs and all of them should be
-maintained.
-
-As with many other device drivers, libata low level drivers have
-sufficient bugs in ->remove and ->probe failure path.  Well, yes,
-that's probably because libata low level driver developers are lazy
-bunch, but aren't all low level driver developers?  After spending a
-day fiddling with braindamaged hardware with no document or
-braindamaged document, if it's finally working, well, it's working.
-
-For one reason or another, low level drivers don't receive as much
-attention or testing as core code, and bugs on driver detach or
-initialization failure don't happen often enough to be noticeable.
-Init failure path is worse because it's much less travelled while
-needs to handle multiple entry points.
-
-So, many low level drivers end up leaking resources on driver detach
-and having half broken failure path implementation in ->probe() which
-would leak resources or even cause oops when failure occurs.  iomap
-adds more to this mix.  So do msi and msix.
-
-
-2. Devres
----------
-
-devres is basically linked list of arbitrarily sized memory areas
-associated with a struct device.  Each devres entry is associated with
-a release function.  A devres can be released in several ways.  No
-matter what, all devres entries are released on driver detach.  On
-release, the associated release function is invoked and then the
-devres entry is freed.
-
-Managed interface is created for resources commonly used by device
-drivers using devres.  For example, coherent DMA memory is acquired
-using dma_alloc_coherent().  The managed version is called
-dmam_alloc_coherent().  It is identical to dma_alloc_coherent() except
-for the DMA memory allocated using it is managed and will be
-automatically released on driver detach.  Implementation looks like
-the following::
-
-  struct dma_devres {
-	size_t		size;
-	void		*vaddr;
-	dma_addr_t	dma_handle;
-  };
-
-  static void dmam_coherent_release(struct device *dev, void *res)
-  {
-	struct dma_devres *this = res;
-
-	dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle);
-  }
-
-  dmam_alloc_coherent(dev, size, dma_handle, gfp)
-  {
-	struct dma_devres *dr;
-	void *vaddr;
-
-	dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp);
-	...
-
-	/* alloc DMA memory as usual */
-	vaddr = dma_alloc_coherent(...);
-	...
-
-	/* record size, vaddr, dma_handle in dr */
-	dr->vaddr = vaddr;
-	...
-
-	devres_add(dev, dr);
-
-	return vaddr;
-  }
-
-If a driver uses dmam_alloc_coherent(), the area is guaranteed to be
-freed whether initialization fails half-way or the device gets
-detached.  If most resources are acquired using managed interface, a
-driver can have much simpler init and exit code.  Init path basically
-looks like the following::
-
-  my_init_one()
-  {
-	struct mydev *d;
-
-	d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
-	if (!d)
-		return -ENOMEM;
-
-	d->ring = dmam_alloc_coherent(...);
-	if (!d->ring)
-		return -ENOMEM;
-
-	if (check something)
-		return -EINVAL;
-	...
-
-	return register_to_upper_layer(d);
-  }
-
-And exit path::
-
-  my_remove_one()
-  {
-	unregister_from_upper_layer(d);
-	shutdown_my_hardware();
-  }
-
-As shown above, low level drivers can be simplified a lot by using
-devres.  Complexity is shifted from less maintained low level drivers
-to better maintained higher layer.  Also, as init failure path is
-shared with exit path, both can get more testing.
-
-Note though that when converting current calls or assignments to
-managed devm_* versions it is up to you to check if internal operations
-like allocating memory, have failed. Managed resources pertains to the
-freeing of these resources *only* - all other checks needed are still
-on you. In some cases this may mean introducing checks that were not
-necessary before moving to the managed devm_* calls.
-
-
-3. Devres group
----------------
-
-Devres entries can be grouped using devres group.  When a group is
-released, all contained normal devres entries and properly nested
-groups are released.  One usage is to rollback series of acquired
-resources on failure.  For example::
-
-  if (!devres_open_group(dev, NULL, GFP_KERNEL))
-	return -ENOMEM;
-
-  acquire A;
-  if (failed)
-	goto err;
-
-  acquire B;
-  if (failed)
-	goto err;
-  ...
-
-  devres_remove_group(dev, NULL);
-  return 0;
-
- err:
-  devres_release_group(dev, NULL);
-  return err_code;
-
-As resource acquisition failure usually means probe failure, constructs
-like above are usually useful in midlayer driver (e.g. libata core
-layer) where interface function shouldn't have side effect on failure.
-For LLDs, just returning error code suffices in most cases.
-
-Each group is identified by `void *id`.  It can either be explicitly
-specified by @id argument to devres_open_group() or automatically
-created by passing NULL as @id as in the above example.  In both
-cases, devres_open_group() returns the group's id.  The returned id
-can be passed to other devres functions to select the target group.
-If NULL is given to those functions, the latest open group is
-selected.
-
-For example, you can do something like the following::
-
-  int my_midlayer_create_something()
-  {
-	if (!devres_open_group(dev, my_midlayer_create_something, GFP_KERNEL))
-		return -ENOMEM;
-
-	...
-
-	devres_close_group(dev, my_midlayer_create_something);
-	return 0;
-  }
-
-  void my_midlayer_destroy_something()
-  {
-	devres_release_group(dev, my_midlayer_create_something);
-  }
-
-
-4. Details
-----------
-
-Lifetime of a devres entry begins on devres allocation and finishes
-when it is released or destroyed (removed and freed) - no reference
-counting.
-
-devres core guarantees atomicity to all basic devres operations and
-has support for single-instance devres types (atomic
-lookup-and-add-if-not-found).  Other than that, synchronizing
-concurrent accesses to allocated devres data is caller's
-responsibility.  This is usually non-issue because bus ops and
-resource allocations already do the job.
-
-For an example of single-instance devres type, read pcim_iomap_table()
-in lib/devres.c.
-
-All devres interface functions can be called without context if the
-right gfp mask is given.
-
-
-5. Overhead
------------
-
-Each devres bookkeeping info is allocated together with requested data
-area.  With debug option turned off, bookkeeping info occupies 16
-bytes on 32bit machines and 24 bytes on 64bit (three pointers rounded
-up to ull alignment).  If singly linked list is used, it can be
-reduced to two pointers (8 bytes on 32bit, 16 bytes on 64bit).
-
-Each devres group occupies 8 pointers.  It can be reduced to 6 if
-singly linked list is used.
-
-Memory space overhead on ahci controller with two ports is between 300
-and 400 bytes on 32bit machine after naive conversion (we can
-certainly invest a bit more effort into libata core layer).
-
-
-6. List of managed interfaces
------------------------------
-
-CLOCK
-  devm_clk_get()
-  devm_clk_get_optional()
-  devm_clk_put()
-  devm_clk_hw_register()
-  devm_of_clk_add_hw_provider()
-  devm_clk_hw_register_clkdev()
-
-DMA
-  dmaenginem_async_device_register()
-  dmam_alloc_coherent()
-  dmam_alloc_attrs()
-  dmam_free_coherent()
-  dmam_pool_create()
-  dmam_pool_destroy()
-
-DRM
-  devm_drm_dev_init()
-
-GPIO
-  devm_gpiod_get()
-  devm_gpiod_get_index()
-  devm_gpiod_get_index_optional()
-  devm_gpiod_get_optional()
-  devm_gpiod_put()
-  devm_gpiod_unhinge()
-  devm_gpiochip_add_data()
-  devm_gpio_request()
-  devm_gpio_request_one()
-  devm_gpio_free()
-
-I2C
-  devm_i2c_new_dummy_device()
-
-IIO
-  devm_iio_device_alloc()
-  devm_iio_device_free()
-  devm_iio_device_register()
-  devm_iio_device_unregister()
-  devm_iio_kfifo_allocate()
-  devm_iio_kfifo_free()
-  devm_iio_triggered_buffer_setup()
-  devm_iio_triggered_buffer_cleanup()
-  devm_iio_trigger_alloc()
-  devm_iio_trigger_free()
-  devm_iio_trigger_register()
-  devm_iio_trigger_unregister()
-  devm_iio_channel_get()
-  devm_iio_channel_release()
-  devm_iio_channel_get_all()
-  devm_iio_channel_release_all()
-
-INPUT
-  devm_input_allocate_device()
-
-IO region
-  devm_release_mem_region()
-  devm_release_region()
-  devm_release_resource()
-  devm_request_mem_region()
-  devm_request_region()
-  devm_request_resource()
-
-IOMAP
-  devm_ioport_map()
-  devm_ioport_unmap()
-  devm_ioremap()
-  devm_ioremap_nocache()
-  devm_ioremap_wc()
-  devm_ioremap_resource() : checks resource, requests memory region, ioremaps
-  devm_iounmap()
-  pcim_iomap()
-  pcim_iomap_regions()	: do request_region() and iomap() on multiple BARs
-  pcim_iomap_table()	: array of mapped addresses indexed by BAR
-  pcim_iounmap()
-
-IRQ
-  devm_free_irq()
-  devm_request_any_context_irq()
-  devm_request_irq()
-  devm_request_threaded_irq()
-  devm_irq_alloc_descs()
-  devm_irq_alloc_desc()
-  devm_irq_alloc_desc_at()
-  devm_irq_alloc_desc_from()
-  devm_irq_alloc_descs_from()
-  devm_irq_alloc_generic_chip()
-  devm_irq_setup_generic_chip()
-  devm_irq_sim_init()
-
-LED
-  devm_led_classdev_register()
-  devm_led_classdev_unregister()
-
-MDIO
-  devm_mdiobus_alloc()
-  devm_mdiobus_alloc_size()
-  devm_mdiobus_free()
-
-MEM
-  devm_free_pages()
-  devm_get_free_pages()
-  devm_kasprintf()
-  devm_kcalloc()
-  devm_kfree()
-  devm_kmalloc()
-  devm_kmalloc_array()
-  devm_kmemdup()
-  devm_kstrdup()
-  devm_kvasprintf()
-  devm_kzalloc()
-
-MFD
-  devm_mfd_add_devices()
-
-MUX
-  devm_mux_chip_alloc()
-  devm_mux_chip_register()
-  devm_mux_control_get()
-
-PER-CPU MEM
-  devm_alloc_percpu()
-  devm_free_percpu()
-
-PCI
-  devm_pci_alloc_host_bridge()  : managed PCI host bridge allocation
-  devm_pci_remap_cfgspace()	: ioremap PCI configuration space
-  devm_pci_remap_cfg_resource()	: ioremap PCI configuration space resource
-  pcim_enable_device()		: after success, all PCI ops become managed
-  pcim_pin_device()		: keep PCI device enabled after release
-
-PHY
-  devm_usb_get_phy()
-  devm_usb_put_phy()
-
-PINCTRL
-  devm_pinctrl_get()
-  devm_pinctrl_put()
-  devm_pinctrl_register()
-  devm_pinctrl_unregister()
-
-POWER
-  devm_reboot_mode_register()
-  devm_reboot_mode_unregister()
-
-PWM
-  devm_pwm_get()
-  devm_pwm_put()
-
-REGULATOR
-  devm_regulator_bulk_get()
-  devm_regulator_get()
-  devm_regulator_put()
-  devm_regulator_register()
-
-RESET
-  devm_reset_control_get()
-  devm_reset_controller_register()
-
-SERDEV
-  devm_serdev_device_open()
-
-SLAVE DMA ENGINE
-  devm_acpi_dma_controller_register()
-
-SPI
-  devm_spi_register_master()
-
-WATCHDOG
-  devm_watchdog_register_device()
diff --git a/Documentation/driver-model/driver.rst b/Documentation/driver-model/driver.rst
deleted file mode 100644
index 11d281506a04..000000000000
--- a/Documentation/driver-model/driver.rst
+++ /dev/null
@@ -1,223 +0,0 @@
-==============
-Device Drivers
-==============
-
-See the kerneldoc for the struct device_driver.
-
-
-Allocation
-~~~~~~~~~~
-
-Device drivers are statically allocated structures. Though there may
-be multiple devices in a system that a driver supports, struct
-device_driver represents the driver as a whole (not a particular
-device instance).
-
-Initialization
-~~~~~~~~~~~~~~
-
-The driver must initialize at least the name and bus fields. It should
-also initialize the devclass field (when it arrives), so it may obtain
-the proper linkage internally. It should also initialize as many of
-the callbacks as possible, though each is optional.
-
-Declaration
-~~~~~~~~~~~
-
-As stated above, struct device_driver objects are statically
-allocated. Below is an example declaration of the eepro100
-driver. This declaration is hypothetical only; it relies on the driver
-being converted completely to the new model::
-
-  static struct device_driver eepro100_driver = {
-         .name		= "eepro100",
-         .bus		= &pci_bus_type,
-
-         .probe		= eepro100_probe,
-         .remove		= eepro100_remove,
-         .suspend		= eepro100_suspend,
-         .resume		= eepro100_resume,
-  };
-
-Most drivers will not be able to be converted completely to the new
-model because the bus they belong to has a bus-specific structure with
-bus-specific fields that cannot be generalized.
-
-The most common example of this are device ID structures. A driver
-typically defines an array of device IDs that it supports. The format
-of these structures and the semantics for comparing device IDs are
-completely bus-specific. Defining them as bus-specific entities would
-sacrifice type-safety, so we keep bus-specific structures around.
-
-Bus-specific drivers should include a generic struct device_driver in
-the definition of the bus-specific driver. Like this::
-
-  struct pci_driver {
-         const struct pci_device_id *id_table;
-         struct device_driver	  driver;
-  };
-
-A definition that included bus-specific fields would look like
-(using the eepro100 driver again)::
-
-  static struct pci_driver eepro100_driver = {
-         .id_table       = eepro100_pci_tbl,
-         .driver	       = {
-		.name		= "eepro100",
-		.bus		= &pci_bus_type,
-		.probe		= eepro100_probe,
-		.remove		= eepro100_remove,
-		.suspend	= eepro100_suspend,
-		.resume		= eepro100_resume,
-         },
-  };
-
-Some may find the syntax of embedded struct initialization awkward or
-even a bit ugly. So far, it's the best way we've found to do what we want...
-
-Registration
-~~~~~~~~~~~~
-
-::
-
-  int driver_register(struct device_driver *drv);
-
-The driver registers the structure on startup. For drivers that have
-no bus-specific fields (i.e. don't have a bus-specific driver
-structure), they would use driver_register and pass a pointer to their
-struct device_driver object.
-
-Most drivers, however, will have a bus-specific structure and will
-need to register with the bus using something like pci_driver_register.
-
-It is important that drivers register their driver structure as early as
-possible. Registration with the core initializes several fields in the
-struct device_driver object, including the reference count and the
-lock. These fields are assumed to be valid at all times and may be
-used by the device model core or the bus driver.
-
-
-Transition Bus Drivers
-~~~~~~~~~~~~~~~~~~~~~~
-
-By defining wrapper functions, the transition to the new model can be
-made easier. Drivers can ignore the generic structure altogether and
-let the bus wrapper fill in the fields. For the callbacks, the bus can
-define generic callbacks that forward the call to the bus-specific
-callbacks of the drivers.
-
-This solution is intended to be only temporary. In order to get class
-information in the driver, the drivers must be modified anyway. Since
-converting drivers to the new model should reduce some infrastructural
-complexity and code size, it is recommended that they are converted as
-class information is added.
-
-Access
-~~~~~~
-
-Once the object has been registered, it may access the common fields of
-the object, like the lock and the list of devices::
-
-  int driver_for_each_dev(struct device_driver *drv, void *data,
-			  int (*callback)(struct device *dev, void *data));
-
-The devices field is a list of all the devices that have been bound to
-the driver. The LDM core provides a helper function to operate on all
-the devices a driver controls. This helper locks the driver on each
-node access, and does proper reference counting on each device as it
-accesses it.
-
-
-sysfs
-~~~~~
-
-When a driver is registered, a sysfs directory is created in its
-bus's directory. In this directory, the driver can export an interface
-to userspace to control operation of the driver on a global basis;
-e.g. toggling debugging output in the driver.
-
-A future feature of this directory will be a 'devices' directory. This
-directory will contain symlinks to the directories of devices it
-supports.
-
-
-
-Callbacks
-~~~~~~~~~
-
-::
-
-	int	(*probe)	(struct device *dev);
-
-The probe() entry is called in task context, with the bus's rwsem locked
-and the driver partially bound to the device.  Drivers commonly use
-container_of() to convert "dev" to a bus-specific type, both in probe()
-and other routines.  That type often provides device resource data, such
-as pci_dev.resource[] or platform_device.resources, which is used in
-addition to dev->platform_data to initialize the driver.
-
-This callback holds the driver-specific logic to bind the driver to a
-given device.  That includes verifying that the device is present, that
-it's a version the driver can handle, that driver data structures can
-be allocated and initialized, and that any hardware can be initialized.
-Drivers often store a pointer to their state with dev_set_drvdata().
-When the driver has successfully bound itself to that device, then probe()
-returns zero and the driver model code will finish its part of binding
-the driver to that device.
-
-A driver's probe() may return a negative errno value to indicate that
-the driver did not bind to this device, in which case it should have
-released all resources it allocated::
-
-	int 	(*remove)	(struct device *dev);
-
-remove is called to unbind a driver from a device. This may be
-called if a device is physically removed from the system, if the
-driver module is being unloaded, during a reboot sequence, or
-in other cases.
-
-It is up to the driver to determine if the device is present or
-not. It should free any resources allocated specifically for the
-device; i.e. anything in the device's driver_data field.
-
-If the device is still present, it should quiesce the device and place
-it into a supported low-power state::
-
-	int	(*suspend)	(struct device *dev, pm_message_t state);
-
-suspend is called to put the device in a low power state::
-
-	int	(*resume)	(struct device *dev);
-
-Resume is used to bring a device back from a low power state.
-
-
-Attributes
-~~~~~~~~~~
-
-::
-
-  struct driver_attribute {
-          struct attribute        attr;
-          ssize_t (*show)(struct device_driver *driver, char *buf);
-          ssize_t (*store)(struct device_driver *, const char *buf, size_t count);
-  };
-
-Device drivers can export attributes via their sysfs directories.
-Drivers can declare attributes using a DRIVER_ATTR_RW and DRIVER_ATTR_RO
-macro that works identically to the DEVICE_ATTR_RW and DEVICE_ATTR_RO
-macros.
-
-Example::
-
-	DRIVER_ATTR_RW(debug);
-
-This is equivalent to declaring::
-
-	struct driver_attribute driver_attr_debug;
-
-This can then be used to add and remove the attribute from the
-driver's directory using::
-
-  int driver_create_file(struct device_driver *, const struct driver_attribute *);
-  void driver_remove_file(struct device_driver *, const struct driver_attribute *);
diff --git a/Documentation/driver-model/index.rst b/Documentation/driver-model/index.rst
deleted file mode 100644
index 9f85d579ce56..000000000000
--- a/Documentation/driver-model/index.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-:orphan:
-
-============
-Driver Model
-============
-
-.. toctree::
-   :maxdepth: 1
-
-   binding
-   bus
-   class
-   design-patterns
-   device
-   devres
-   driver
-   overview
-   platform
-   porting
-
-.. only::  subproject and html
-
-   Indices
-   =======
-
-   * :ref:`genindex`
diff --git a/Documentation/driver-model/overview.rst b/Documentation/driver-model/overview.rst
deleted file mode 100644
index d4d1e9b40e0c..000000000000
--- a/Documentation/driver-model/overview.rst
+++ /dev/null
@@ -1,124 +0,0 @@
-=============================
-The Linux Kernel Device Model
-=============================
-
-Patrick Mochel	<mochel@digitalimplant.org>
-
-Drafted 26 August 2002
-Updated 31 January 2006
-
-
-Overview
-~~~~~~~~
-
-The Linux Kernel Driver Model is a unification of all the disparate driver
-models that were previously used in the kernel. It is intended to augment the
-bus-specific drivers for bridges and devices by consolidating a set of data
-and operations into globally accessible data structures.
-
-Traditional driver models implemented some sort of tree-like structure
-(sometimes just a list) for the devices they control. There wasn't any
-uniformity across the different bus types.
-
-The current driver model provides a common, uniform data model for describing
-a bus and the devices that can appear under the bus. The unified bus
-model includes a set of common attributes which all busses carry, and a set
-of common callbacks, such as device discovery during bus probing, bus
-shutdown, bus power management, etc.
-
-The common device and bridge interface reflects the goals of the modern
-computer: namely the ability to do seamless device "plug and play", power
-management, and hot plug. In particular, the model dictated by Intel and
-Microsoft (namely ACPI) ensures that almost every device on almost any bus
-on an x86-compatible system can work within this paradigm.  Of course,
-not every bus is able to support all such operations, although most
-buses support most of those operations.
-
-
-Downstream Access
-~~~~~~~~~~~~~~~~~
-
-Common data fields have been moved out of individual bus layers into a common
-data structure. These fields must still be accessed by the bus layers,
-and sometimes by the device-specific drivers.
-
-Other bus layers are encouraged to do what has been done for the PCI layer.
-struct pci_dev now looks like this::
-
-  struct pci_dev {
-	...
-
-	struct device dev;     /* Generic device interface */
-	...
-  };
-
-Note first that the struct device dev within the struct pci_dev is
-statically allocated. This means only one allocation on device discovery.
-
-Note also that that struct device dev is not necessarily defined at the
-front of the pci_dev structure.  This is to make people think about what
-they're doing when switching between the bus driver and the global driver,
-and to discourage meaningless and incorrect casts between the two.
-
-The PCI bus layer freely accesses the fields of struct device. It knows about
-the structure of struct pci_dev, and it should know the structure of struct
-device. Individual PCI device drivers that have been converted to the current
-driver model generally do not and should not touch the fields of struct device,
-unless there is a compelling reason to do so.
-
-The above abstraction prevents unnecessary pain during transitional phases.
-If it were not done this way, then when a field was renamed or removed, every
-downstream driver would break.  On the other hand, if only the bus layer
-(and not the device layer) accesses the struct device, it is only the bus
-layer that needs to change.
-
-
-User Interface
-~~~~~~~~~~~~~~
-
-By virtue of having a complete hierarchical view of all the devices in the
-system, exporting a complete hierarchical view to userspace becomes relatively
-easy. This has been accomplished by implementing a special purpose virtual
-file system named sysfs.
-
-Almost all mainstream Linux distros mount this filesystem automatically; you
-can see some variation of the following in the output of the "mount" command::
-
-  $ mount
-  ...
-  none on /sys type sysfs (rw,noexec,nosuid,nodev)
-  ...
-  $
-
-The auto-mounting of sysfs is typically accomplished by an entry similar to
-the following in the /etc/fstab file::
-
-  none     	/sys	sysfs    defaults	  	0 0
-
-or something similar in the /lib/init/fstab file on Debian-based systems::
-
-  none            /sys    sysfs    nodev,noexec,nosuid    0 0
-
-If sysfs is not automatically mounted, you can always do it manually with::
-
-	# mount -t sysfs sysfs /sys
-
-Whenever a device is inserted into the tree, a directory is created for it.
-This directory may be populated at each layer of discovery - the global layer,
-the bus layer, or the device layer.
-
-The global layer currently creates two files - 'name' and 'power'. The
-former only reports the name of the device. The latter reports the
-current power state of the device. It will also be used to set the current
-power state.
-
-The bus layer may also create files for the devices it finds while probing the
-bus. For example, the PCI layer currently creates 'irq' and 'resource' files
-for each PCI device.
-
-A device-specific driver may also export files in its directory to expose
-device-specific data or tunable interfaces.
-
-More information about the sysfs directory layout can be found in
-the other documents in this directory and in the file
-Documentation/filesystems/sysfs.txt.
diff --git a/Documentation/driver-model/platform.rst b/Documentation/driver-model/platform.rst
deleted file mode 100644
index 334dd4071ae4..000000000000
--- a/Documentation/driver-model/platform.rst
+++ /dev/null
@@ -1,246 +0,0 @@
-============================
-Platform Devices and Drivers
-============================
-
-See <linux/platform_device.h> for the driver model interface to the
-platform bus:  platform_device, and platform_driver.  This pseudo-bus
-is used to connect devices on busses with minimal infrastructure,
-like those used to integrate peripherals on many system-on-chip
-processors, or some "legacy" PC interconnects; as opposed to large
-formally specified ones like PCI or USB.
-
-
-Platform devices
-~~~~~~~~~~~~~~~~
-Platform devices are devices that typically appear as autonomous
-entities in the system. This includes legacy port-based devices and
-host bridges to peripheral buses, and most controllers integrated
-into system-on-chip platforms.  What they usually have in common
-is direct addressing from a CPU bus.  Rarely, a platform_device will
-be connected through a segment of some other kind of bus; but its
-registers will still be directly addressable.
-
-Platform devices are given a name, used in driver binding, and a
-list of resources such as addresses and IRQs::
-
-  struct platform_device {
-	const char	*name;
-	u32		id;
-	struct device	dev;
-	u32		num_resources;
-	struct resource	*resource;
-  };
-
-
-Platform drivers
-~~~~~~~~~~~~~~~~
-Platform drivers follow the standard driver model convention, where
-discovery/enumeration is handled outside the drivers, and drivers
-provide probe() and remove() methods.  They support power management
-and shutdown notifications using the standard conventions::
-
-  struct platform_driver {
-	int (*probe)(struct platform_device *);
-	int (*remove)(struct platform_device *);
-	void (*shutdown)(struct platform_device *);
-	int (*suspend)(struct platform_device *, pm_message_t state);
-	int (*suspend_late)(struct platform_device *, pm_message_t state);
-	int (*resume_early)(struct platform_device *);
-	int (*resume)(struct platform_device *);
-	struct device_driver driver;
-  };
-
-Note that probe() should in general verify that the specified device hardware
-actually exists; sometimes platform setup code can't be sure.  The probing
-can use device resources, including clocks, and device platform_data.
-
-Platform drivers register themselves the normal way::
-
-	int platform_driver_register(struct platform_driver *drv);
-
-Or, in common situations where the device is known not to be hot-pluggable,
-the probe() routine can live in an init section to reduce the driver's
-runtime memory footprint::
-
-	int platform_driver_probe(struct platform_driver *drv,
-			  int (*probe)(struct platform_device *))
-
-Kernel modules can be composed of several platform drivers. The platform core
-provides helpers to register and unregister an array of drivers::
-
-	int __platform_register_drivers(struct platform_driver * const *drivers,
-				      unsigned int count, struct module *owner);
-	void platform_unregister_drivers(struct platform_driver * const *drivers,
-					 unsigned int count);
-
-If one of the drivers fails to register, all drivers registered up to that
-point will be unregistered in reverse order. Note that there is a convenience
-macro that passes THIS_MODULE as owner parameter::
-
-	#define platform_register_drivers(drivers, count)
-
-
-Device Enumeration
-~~~~~~~~~~~~~~~~~~
-As a rule, platform specific (and often board-specific) setup code will
-register platform devices::
-
-	int platform_device_register(struct platform_device *pdev);
-
-	int platform_add_devices(struct platform_device **pdevs, int ndev);
-
-The general rule is to register only those devices that actually exist,
-but in some cases extra devices might be registered.  For example, a kernel
-might be configured to work with an external network adapter that might not
-be populated on all boards, or likewise to work with an integrated controller
-that some boards might not hook up to any peripherals.
-
-In some cases, boot firmware will export tables describing the devices
-that are populated on a given board.   Without such tables, often the
-only way for system setup code to set up the correct devices is to build
-a kernel for a specific target board.  Such board-specific kernels are
-common with embedded and custom systems development.
-
-In many cases, the memory and IRQ resources associated with the platform
-device are not enough to let the device's driver work.  Board setup code
-will often provide additional information using the device's platform_data
-field to hold additional information.
-
-Embedded systems frequently need one or more clocks for platform devices,
-which are normally kept off until they're actively needed (to save power).
-System setup also associates those clocks with the device, so that that
-calls to clk_get(&pdev->dev, clock_name) return them as needed.
-
-
-Legacy Drivers:  Device Probing
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Some drivers are not fully converted to the driver model, because they take
-on a non-driver role:  the driver registers its platform device, rather than
-leaving that for system infrastructure.  Such drivers can't be hotplugged
-or coldplugged, since those mechanisms require device creation to be in a
-different system component than the driver.
-
-The only "good" reason for this is to handle older system designs which, like
-original IBM PCs, rely on error-prone "probe-the-hardware" models for hardware
-configuration.  Newer systems have largely abandoned that model, in favor of
-bus-level support for dynamic configuration (PCI, USB), or device tables
-provided by the boot firmware (e.g. PNPACPI on x86).  There are too many
-conflicting options about what might be where, and even educated guesses by
-an operating system will be wrong often enough to make trouble.
-
-This style of driver is discouraged.  If you're updating such a driver,
-please try to move the device enumeration to a more appropriate location,
-outside the driver.  This will usually be cleanup, since such drivers
-tend to already have "normal" modes, such as ones using device nodes that
-were created by PNP or by platform device setup.
-
-None the less, there are some APIs to support such legacy drivers.  Avoid
-using these calls except with such hotplug-deficient drivers::
-
-	struct platform_device *platform_device_alloc(
-			const char *name, int id);
-
-You can use platform_device_alloc() to dynamically allocate a device, which
-you will then initialize with resources and platform_device_register().
-A better solution is usually::
-
-	struct platform_device *platform_device_register_simple(
-			const char *name, int id,
-			struct resource *res, unsigned int nres);
-
-You can use platform_device_register_simple() as a one-step call to allocate
-and register a device.
-
-
-Device Naming and Driver Binding
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The platform_device.dev.bus_id is the canonical name for the devices.
-It's built from two components:
-
-    * platform_device.name ... which is also used to for driver matching.
-
-    * platform_device.id ... the device instance number, or else "-1"
-      to indicate there's only one.
-
-These are concatenated, so name/id "serial"/0 indicates bus_id "serial.0", and
-"serial/3" indicates bus_id "serial.3"; both would use the platform_driver
-named "serial".  While "my_rtc"/-1 would be bus_id "my_rtc" (no instance id)
-and use the platform_driver called "my_rtc".
-
-Driver binding is performed automatically by the driver core, invoking
-driver probe() after finding a match between device and driver.  If the
-probe() succeeds, the driver and device are bound as usual.  There are
-three different ways to find such a match:
-
-    - Whenever a device is registered, the drivers for that bus are
-      checked for matches.  Platform devices should be registered very
-      early during system boot.
-
-    - When a driver is registered using platform_driver_register(), all
-      unbound devices on that bus are checked for matches.  Drivers
-      usually register later during booting, or by module loading.
-
-    - Registering a driver using platform_driver_probe() works just like
-      using platform_driver_register(), except that the driver won't
-      be probed later if another device registers.  (Which is OK, since
-      this interface is only for use with non-hotpluggable devices.)
-
-
-Early Platform Devices and Drivers
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The early platform interfaces provide platform data to platform device
-drivers early on during the system boot. The code is built on top of the
-early_param() command line parsing and can be executed very early on.
-
-Example: "earlyprintk" class early serial console in 6 steps
-
-1. Registering early platform device data
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The architecture code registers platform device data using the function
-early_platform_add_devices(). In the case of early serial console this
-should be hardware configuration for the serial port. Devices registered
-at this point will later on be matched against early platform drivers.
-
-2. Parsing kernel command line
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The architecture code calls parse_early_param() to parse the kernel
-command line. This will execute all matching early_param() callbacks.
-User specified early platform devices will be registered at this point.
-For the early serial console case the user can specify port on the
-kernel command line as "earlyprintk=serial.0" where "earlyprintk" is
-the class string, "serial" is the name of the platform driver and
-0 is the platform device id. If the id is -1 then the dot and the
-id can be omitted.
-
-3. Installing early platform drivers belonging to a certain class
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The architecture code may optionally force registration of all early
-platform drivers belonging to a certain class using the function
-early_platform_driver_register_all(). User specified devices from
-step 2 have priority over these. This step is omitted by the serial
-driver example since the early serial driver code should be disabled
-unless the user has specified port on the kernel command line.
-
-4. Early platform driver registration
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Compiled-in platform drivers making use of early_platform_init() are
-automatically registered during step 2 or 3. The serial driver example
-should use early_platform_init("earlyprintk", &platform_driver).
-
-5. Probing of early platform drivers belonging to a certain class
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The architecture code calls early_platform_driver_probe() to match
-registered early platform devices associated with a certain class with
-registered early platform drivers. Matched devices will get probed().
-This step can be executed at any point during the early boot. As soon
-as possible may be good for the serial port case.
-
-6. Inside the early platform driver probe()
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The driver code needs to take special care during early boot, especially
-when it comes to memory allocation and interrupt registration. The code
-in the probe() function can use is_early_platform_device() to check if
-it is called at early platform device or at the regular platform device
-time. The early serial driver performs register_console() at this point.
-
-For further information, see <linux/platform_device.h>.
diff --git a/Documentation/driver-model/porting.rst b/Documentation/driver-model/porting.rst
deleted file mode 100644
index ae4bf843c1d6..000000000000
--- a/Documentation/driver-model/porting.rst
+++ /dev/null
@@ -1,448 +0,0 @@
-=======================================
-Porting Drivers to the New Driver Model
-=======================================
-
-Patrick Mochel
-
-7 January 2003
-
-
-Overview
-
-Please refer to `Documentation/driver-model/*.rst` for definitions of
-various driver types and concepts.
-
-Most of the work of porting devices drivers to the new model happens
-at the bus driver layer. This was intentional, to minimize the
-negative effect on kernel drivers, and to allow a gradual transition
-of bus drivers.
-
-In a nutshell, the driver model consists of a set of objects that can
-be embedded in larger, bus-specific objects. Fields in these generic
-objects can replace fields in the bus-specific objects.
-
-The generic objects must be registered with the driver model core. By
-doing so, they will exported via the sysfs filesystem. sysfs can be
-mounted by doing::
-
-	# mount -t sysfs sysfs /sys
-
-
-
-The Process
-
-Step 0: Read include/linux/device.h for object and function definitions.
-
-Step 1: Registering the bus driver.
-
-
-- Define a struct bus_type for the bus driver::
-
-    struct bus_type pci_bus_type = {
-          .name           = "pci",
-    };
-
-
-- Register the bus type.
-
-  This should be done in the initialization function for the bus type,
-  which is usually the module_init(), or equivalent, function::
-
-    static int __init pci_driver_init(void)
-    {
-            return bus_register(&pci_bus_type);
-    }
-
-    subsys_initcall(pci_driver_init);
-
-
-  The bus type may be unregistered (if the bus driver may be compiled
-  as a module) by doing::
-
-     bus_unregister(&pci_bus_type);
-
-
-- Export the bus type for others to use.
-
-  Other code may wish to reference the bus type, so declare it in a
-  shared header file and export the symbol.
-
-From include/linux/pci.h::
-
-  extern struct bus_type pci_bus_type;
-
-
-From file the above code appears in::
-
-  EXPORT_SYMBOL(pci_bus_type);
-
-
-
-- This will cause the bus to show up in /sys/bus/pci/ with two
-  subdirectories: 'devices' and 'drivers'::
-
-    # tree -d /sys/bus/pci/
-    /sys/bus/pci/
-    |-- devices
-    `-- drivers
-
-
-
-Step 2: Registering Devices.
-
-struct device represents a single device. It mainly contains metadata
-describing the relationship the device has to other entities.
-
-
-- Embed a struct device in the bus-specific device type::
-
-
-    struct pci_dev {
-           ...
-           struct  device  dev;            /* Generic device interface */
-           ...
-    };
-
-  It is recommended that the generic device not be the first item in
-  the struct to discourage programmers from doing mindless casts
-  between the object types. Instead macros, or inline functions,
-  should be created to convert from the generic object type::
-
-
-    #define to_pci_dev(n) container_of(n, struct pci_dev, dev)
-
-    or
-
-    static inline struct pci_dev * to_pci_dev(struct kobject * kobj)
-    {
-	return container_of(n, struct pci_dev, dev);
-    }
-
-  This allows the compiler to verify type-safety of the operations
-  that are performed (which is Good).
-
-
-- Initialize the device on registration.
-
-  When devices are discovered or registered with the bus type, the
-  bus driver should initialize the generic device. The most important
-  things to initialize are the bus_id, parent, and bus fields.
-
-  The bus_id is an ASCII string that contains the device's address on
-  the bus. The format of this string is bus-specific. This is
-  necessary for representing devices in sysfs.
-
-  parent is the physical parent of the device. It is important that
-  the bus driver sets this field correctly.
-
-  The driver model maintains an ordered list of devices that it uses
-  for power management. This list must be in order to guarantee that
-  devices are shutdown before their physical parents, and vice versa.
-  The order of this list is determined by the parent of registered
-  devices.
-
-  Also, the location of the device's sysfs directory depends on a
-  device's parent. sysfs exports a directory structure that mirrors
-  the device hierarchy. Accurately setting the parent guarantees that
-  sysfs will accurately represent the hierarchy.
-
-  The device's bus field is a pointer to the bus type the device
-  belongs to. This should be set to the bus_type that was declared
-  and initialized before.
-
-  Optionally, the bus driver may set the device's name and release
-  fields.
-
-  The name field is an ASCII string describing the device, like
-
-     "ATI Technologies Inc Radeon QD"
-
-  The release field is a callback that the driver model core calls
-  when the device has been removed, and all references to it have
-  been released. More on this in a moment.
-
-
-- Register the device.
-
-  Once the generic device has been initialized, it can be registered
-  with the driver model core by doing::
-
-       device_register(&dev->dev);
-
-  It can later be unregistered by doing::
-
-       device_unregister(&dev->dev);
-
-  This should happen on buses that support hotpluggable devices.
-  If a bus driver unregisters a device, it should not immediately free
-  it. It should instead wait for the driver model core to call the
-  device's release method, then free the bus-specific object.
-  (There may be other code that is currently referencing the device
-  structure, and it would be rude to free the device while that is
-  happening).
-
-
-  When the device is registered, a directory in sysfs is created.
-  The PCI tree in sysfs looks like::
-
-    /sys/devices/pci0/
-    |-- 00:00.0
-    |-- 00:01.0
-    |   `-- 01:00.0
-    |-- 00:02.0
-    |   `-- 02:1f.0
-    |       `-- 03:00.0
-    |-- 00:1e.0
-    |   `-- 04:04.0
-    |-- 00:1f.0
-    |-- 00:1f.1
-    |   |-- ide0
-    |   |   |-- 0.0
-    |   |   `-- 0.1
-    |   `-- ide1
-    |       `-- 1.0
-    |-- 00:1f.2
-    |-- 00:1f.3
-    `-- 00:1f.5
-
-  Also, symlinks are created in the bus's 'devices' directory
-  that point to the device's directory in the physical hierarchy::
-
-    /sys/bus/pci/devices/
-    |-- 00:00.0 -> ../../../devices/pci0/00:00.0
-    |-- 00:01.0 -> ../../../devices/pci0/00:01.0
-    |-- 00:02.0 -> ../../../devices/pci0/00:02.0
-    |-- 00:1e.0 -> ../../../devices/pci0/00:1e.0
-    |-- 00:1f.0 -> ../../../devices/pci0/00:1f.0
-    |-- 00:1f.1 -> ../../../devices/pci0/00:1f.1
-    |-- 00:1f.2 -> ../../../devices/pci0/00:1f.2
-    |-- 00:1f.3 -> ../../../devices/pci0/00:1f.3
-    |-- 00:1f.5 -> ../../../devices/pci0/00:1f.5
-    |-- 01:00.0 -> ../../../devices/pci0/00:01.0/01:00.0
-    |-- 02:1f.0 -> ../../../devices/pci0/00:02.0/02:1f.0
-    |-- 03:00.0 -> ../../../devices/pci0/00:02.0/02:1f.0/03:00.0
-    `-- 04:04.0 -> ../../../devices/pci0/00:1e.0/04:04.0
-
-
-
-Step 3: Registering Drivers.
-
-struct device_driver is a simple driver structure that contains a set
-of operations that the driver model core may call.
-
-
-- Embed a struct device_driver in the bus-specific driver.
-
-  Just like with devices, do something like::
-
-    struct pci_driver {
-           ...
-           struct device_driver    driver;
-    };
-
-
-- Initialize the generic driver structure.
-
-  When the driver registers with the bus (e.g. doing pci_register_driver()),
-  initialize the necessary fields of the driver: the name and bus
-  fields.
-
-
-- Register the driver.
-
-  After the generic driver has been initialized, call::
-
-	driver_register(&drv->driver);
-
-  to register the driver with the core.
-
-  When the driver is unregistered from the bus, unregister it from the
-  core by doing::
-
-        driver_unregister(&drv->driver);
-
-  Note that this will block until all references to the driver have
-  gone away. Normally, there will not be any.
-
-
-- Sysfs representation.
-
-  Drivers are exported via sysfs in their bus's 'driver's directory.
-  For example::
-
-    /sys/bus/pci/drivers/
-    |-- 3c59x
-    |-- Ensoniq AudioPCI
-    |-- agpgart-amdk7
-    |-- e100
-    `-- serial
-
-
-Step 4: Define Generic Methods for Drivers.
-
-struct device_driver defines a set of operations that the driver model
-core calls. Most of these operations are probably similar to
-operations the bus already defines for drivers, but taking different
-parameters.
-
-It would be difficult and tedious to force every driver on a bus to
-simultaneously convert their drivers to generic format. Instead, the
-bus driver should define single instances of the generic methods that
-forward call to the bus-specific drivers. For instance::
-
-
-  static int pci_device_remove(struct device * dev)
-  {
-          struct pci_dev * pci_dev = to_pci_dev(dev);
-          struct pci_driver * drv = pci_dev->driver;
-
-          if (drv) {
-                  if (drv->remove)
-                          drv->remove(pci_dev);
-                  pci_dev->driver = NULL;
-          }
-          return 0;
-  }
-
-
-The generic driver should be initialized with these methods before it
-is registered::
-
-        /* initialize common driver fields */
-        drv->driver.name = drv->name;
-        drv->driver.bus = &pci_bus_type;
-        drv->driver.probe = pci_device_probe;
-        drv->driver.resume = pci_device_resume;
-        drv->driver.suspend = pci_device_suspend;
-        drv->driver.remove = pci_device_remove;
-
-        /* register with core */
-        driver_register(&drv->driver);
-
-
-Ideally, the bus should only initialize the fields if they are not
-already set. This allows the drivers to implement their own generic
-methods.
-
-
-Step 5: Support generic driver binding.
-
-The model assumes that a device or driver can be dynamically
-registered with the bus at any time. When registration happens,
-devices must be bound to a driver, or drivers must be bound to all
-devices that it supports.
-
-A driver typically contains a list of device IDs that it supports. The
-bus driver compares these IDs to the IDs of devices registered with it.
-The format of the device IDs, and the semantics for comparing them are
-bus-specific, so the generic model does attempt to generalize them.
-
-Instead, a bus may supply a method in struct bus_type that does the
-comparison::
-
-  int (*match)(struct device * dev, struct device_driver * drv);
-
-match should return positive value if the driver supports the device,
-and zero otherwise. It may also return error code (for example
--EPROBE_DEFER) if determining that given driver supports the device is
-not possible.
-
-When a device is registered, the bus's list of drivers is iterated
-over. bus->match() is called for each one until a match is found.
-
-When a driver is registered, the bus's list of devices is iterated
-over. bus->match() is called for each device that is not already
-claimed by a driver.
-
-When a device is successfully bound to a driver, device->driver is
-set, the device is added to a per-driver list of devices, and a
-symlink is created in the driver's sysfs directory that points to the
-device's physical directory::
-
-  /sys/bus/pci/drivers/
-  |-- 3c59x
-  |   `-- 00:0b.0 -> ../../../../devices/pci0/00:0b.0
-  |-- Ensoniq AudioPCI
-  |-- agpgart-amdk7
-  |   `-- 00:00.0 -> ../../../../devices/pci0/00:00.0
-  |-- e100
-  |   `-- 00:0c.0 -> ../../../../devices/pci0/00:0c.0
-  `-- serial
-
-
-This driver binding should replace the existing driver binding
-mechanism the bus currently uses.
-
-
-Step 6: Supply a hotplug callback.
-
-Whenever a device is registered with the driver model core, the
-userspace program /sbin/hotplug is called to notify userspace.
-Users can define actions to perform when a device is inserted or
-removed.
-
-The driver model core passes several arguments to userspace via
-environment variables, including
-
-- ACTION: set to 'add' or 'remove'
-- DEVPATH: set to the device's physical path in sysfs.
-
-A bus driver may also supply additional parameters for userspace to
-consume. To do this, a bus must implement the 'hotplug' method in
-struct bus_type::
-
-     int (*hotplug) (struct device *dev, char **envp,
-                     int num_envp, char *buffer, int buffer_size);
-
-This is called immediately before /sbin/hotplug is executed.
-
-
-Step 7: Cleaning up the bus driver.
-
-The generic bus, device, and driver structures provide several fields
-that can replace those defined privately to the bus driver.
-
-- Device list.
-
-struct bus_type contains a list of all devices registered with the bus
-type. This includes all devices on all instances of that bus type.
-An internal list that the bus uses may be removed, in favor of using
-this one.
-
-The core provides an iterator to access these devices::
-
-  int bus_for_each_dev(struct bus_type * bus, struct device * start,
-                       void * data, int (*fn)(struct device *, void *));
-
-
-- Driver list.
-
-struct bus_type also contains a list of all drivers registered with
-it. An internal list of drivers that the bus driver maintains may
-be removed in favor of using the generic one.
-
-The drivers may be iterated over, like devices::
-
-  int bus_for_each_drv(struct bus_type * bus, struct device_driver * start,
-                       void * data, int (*fn)(struct device_driver *, void *));
-
-
-Please see drivers/base/bus.c for more information.
-
-
-- rwsem
-
-struct bus_type contains an rwsem that protects all core accesses to
-the device and driver lists. This can be used by the bus driver
-internally, and should be used when accessing the device or driver
-lists the bus maintains.
-
-
-- Device and driver fields.
-
-Some of the fields in struct device and struct device_driver duplicate
-fields in the bus-specific representations of these objects. Feel free
-to remove the bus-specific ones and favor the generic ones. Note
-though, that this will likely mean fixing up all the drivers that
-reference the bus-specific fields (though those should all be 1-line
-changes).
diff --git a/Documentation/eisa.txt b/Documentation/eisa.txt
index f388545a85a7..c07565ba57da 100644
--- a/Documentation/eisa.txt
+++ b/Documentation/eisa.txt
@@ -103,7 +103,7 @@ id_table	an array of NULL terminated EISA id strings,
 		(driver_data).
 
 driver		a generic driver, such as described in
-		Documentation/driver-model/driver.rst. Only .name,
+		Documentation/driver-api/driver-model/driver.rst. Only .name,
 		.probe and .remove members are mandatory.
 =============== ====================================================
 
@@ -152,7 +152,7 @@ state    set of flags indicating the state of the device. Current
 	 flags are EISA_CONFIG_ENABLED and EISA_CONFIG_FORCED.
 res	 set of four 256 bytes I/O regions allocated to this device
 dma_mask DMA mask set from the parent device.
-dev	 generic device (see Documentation/driver-model/device.rst)
+dev	 generic device (see Documentation/driver-api/driver-model/device.rst)
 ======== ============================================================
 
 You can get the 'struct eisa_device' from 'struct device' using the
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index 5b5311f9358d..ddf15b1b0d5a 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -319,7 +319,7 @@ quick way to lookup the sysfs interface for a device from the result of
 a stat(2) operation.
 
 More information can driver-model specific features can be found in
-Documentation/driver-model/. 
+Documentation/driver-api/driver-model/.
 
 
 TODO: Finish this section.
diff --git a/Documentation/hwmon/submitting-patches.rst b/Documentation/hwmon/submitting-patches.rst
index d5b05d3e54ba..452fc28d8e0b 100644
--- a/Documentation/hwmon/submitting-patches.rst
+++ b/Documentation/hwmon/submitting-patches.rst
@@ -89,7 +89,7 @@ increase the chances of your change being accepted.
   console. Excessive logging can seriously affect system performance.
 
 * Use devres functions whenever possible to allocate resources. For rationale
-  and supported functions, please see Documentation/driver-model/devres.rst.
+  and supported functions, please see Documentation/driver-api/driver-model/devres.rst.
   If a function is not supported by devres, consider using devm_add_action().
 
 * If the driver has a detect function, make sure it is silent. Debug messages
diff --git a/Documentation/translations/zh_CN/filesystems/sysfs.txt b/Documentation/translations/zh_CN/filesystems/sysfs.txt
index 452271dda141..ee1f37da5b23 100644
--- a/Documentation/translations/zh_CN/filesystems/sysfs.txt
+++ b/Documentation/translations/zh_CN/filesystems/sysfs.txt
@@ -288,7 +288,7 @@ dev/ 包含两个子目录： char/ 和 block/。在这两个子目录中，有
 中相应的设备。/sys/dev 提供一个通过一个 stat(2) 操作结果，查找
 设备 sysfs 接口快捷的方法。
 
-更多有关 driver-model 的特性信息可以在 Documentation/driver-model/
+更多有关 driver-model 的特性信息可以在 Documentation/driver-api/driver-model/
 中找到。
 
 
-- 
cgit 


From df1b7ce784c220373d202ea9f8bc0c424f2c9f7c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 17:16:23 -0300
Subject: docs: add some documentation dirs to the driver-api book

Those are subsystem docs, with a mix of kABI and user-faced
docs. While they're not split, keep the dirs where they are,
adding just a pointer to the main index.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/accounting/index.rst | 2 +-
 Documentation/block/index.rst      | 2 +-
 Documentation/hid/index.rst        | 2 +-
 Documentation/iio/index.rst        | 2 +-
 Documentation/index.rst            | 4 ++++
 5 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/accounting/index.rst b/Documentation/accounting/index.rst
index e1f6284b5ff3..9369d8bf32be 100644
--- a/Documentation/accounting/index.rst
+++ b/Documentation/accounting/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ==========
 Accounting
diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst
index 8cd226a0e86e..3fa7a52fafa4 100644
--- a/Documentation/block/index.rst
+++ b/Documentation/block/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 =====
 Block
diff --git a/Documentation/hid/index.rst b/Documentation/hid/index.rst
index af4324902622..737d66dc16a1 100644
--- a/Documentation/hid/index.rst
+++ b/Documentation/hid/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 =============================
 Human Interface Devices (HID)
diff --git a/Documentation/iio/index.rst b/Documentation/iio/index.rst
index 0593dca89a94..58b7a4ebac51 100644
--- a/Documentation/iio/index.rst
+++ b/Documentation/iio/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ==============
 Industrial I/O
diff --git a/Documentation/index.rst b/Documentation/index.rst
index a322c8721d13..dcdaaff71633 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -92,6 +92,10 @@ needed).
 
    driver-api/index
    core-api/index
+   accounting/index
+   block/index
+   hid/index
+   iio/index
    leds/index
    media/index
    networking/index
-- 
cgit 


From 83bbf6e103544d65f17f4b2ccea1c6a51c0b0769 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 27 Jun 2019 12:59:40 -0300
Subject: docs: aoe: add it to the driver-api book

Those files belong to the admin guide, so add them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Justin Sanders <justin@coraid.com>
---
 Documentation/admin-guide/aoe/aoe.rst         | 150 ++++++++++++++++++++++++++
 Documentation/admin-guide/aoe/autoload.sh     |  17 +++
 Documentation/admin-guide/aoe/examples.rst    |  23 ++++
 Documentation/admin-guide/aoe/index.rst       |  17 +++
 Documentation/admin-guide/aoe/status.sh       |  30 ++++++
 Documentation/admin-guide/aoe/todo.rst        |  17 +++
 Documentation/admin-guide/aoe/udev-install.sh |  33 ++++++
 Documentation/admin-guide/aoe/udev.txt        |  26 +++++
 Documentation/admin-guide/index.rst           |   1 +
 Documentation/aoe/aoe.rst                     | 150 --------------------------
 Documentation/aoe/autoload.sh                 |  17 ---
 Documentation/aoe/examples.rst                |  23 ----
 Documentation/aoe/index.rst                   |  19 ----
 Documentation/aoe/status.sh                   |  30 ------
 Documentation/aoe/todo.rst                    |  17 ---
 Documentation/aoe/udev-install.sh             |  33 ------
 Documentation/aoe/udev.txt                    |  26 -----
 17 files changed, 314 insertions(+), 315 deletions(-)
 create mode 100644 Documentation/admin-guide/aoe/aoe.rst
 create mode 100644 Documentation/admin-guide/aoe/autoload.sh
 create mode 100644 Documentation/admin-guide/aoe/examples.rst
 create mode 100644 Documentation/admin-guide/aoe/index.rst
 create mode 100644 Documentation/admin-guide/aoe/status.sh
 create mode 100644 Documentation/admin-guide/aoe/todo.rst
 create mode 100644 Documentation/admin-guide/aoe/udev-install.sh
 create mode 100644 Documentation/admin-guide/aoe/udev.txt
 delete mode 100644 Documentation/aoe/aoe.rst
 delete mode 100644 Documentation/aoe/autoload.sh
 delete mode 100644 Documentation/aoe/examples.rst
 delete mode 100644 Documentation/aoe/index.rst
 delete mode 100644 Documentation/aoe/status.sh
 delete mode 100644 Documentation/aoe/todo.rst
 delete mode 100644 Documentation/aoe/udev-install.sh
 delete mode 100644 Documentation/aoe/udev.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/aoe/aoe.rst b/Documentation/admin-guide/aoe/aoe.rst
new file mode 100644
index 000000000000..a05e751363a0
--- /dev/null
+++ b/Documentation/admin-guide/aoe/aoe.rst
@@ -0,0 +1,150 @@
+Introduction
+============
+
+ATA over Ethernet is a network protocol that provides simple access to
+block storage on the LAN.
+
+  http://support.coraid.com/documents/AoEr11.txt
+
+The EtherDrive (R) HOWTO for 2.6 and 3.x kernels is found at ...
+
+  http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO.html
+
+It has many tips and hints!  Please see, especially, recommended
+tunings for virtual memory:
+
+  http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO-5.html#ss5.19
+
+The aoetools are userland programs that are designed to work with this
+driver.  The aoetools are on sourceforge.
+
+  http://aoetools.sourceforge.net/
+
+The scripts in this Documentation/admin-guide/aoe directory are intended to
+document the use of the driver and are not necessary if you install
+the aoetools.
+
+
+Creating Device Nodes
+=====================
+
+  Users of udev should find the block device nodes created
+  automatically, but to create all the necessary device nodes, use the
+  udev configuration rules provided in udev.txt (in this directory).
+
+  There is a udev-install.sh script that shows how to install these
+  rules on your system.
+
+  There is also an autoload script that shows how to edit
+  /etc/modprobe.d/aoe.conf to ensure that the aoe module is loaded when
+  necessary.  Preloading the aoe module is preferable to autoloading,
+  however, because AoE discovery takes a few seconds.  It can be
+  confusing when an AoE device is not present the first time the a
+  command is run but appears a second later.
+
+Using Device Nodes
+==================
+
+  "cat /dev/etherd/err" blocks, waiting for error diagnostic output,
+  like any retransmitted packets.
+
+  "echo eth2 eth4 > /dev/etherd/interfaces" tells the aoe driver to
+  limit ATA over Ethernet traffic to eth2 and eth4.  AoE traffic from
+  untrusted networks should be ignored as a matter of security.  See
+  also the aoe_iflist driver option described below.
+
+  "echo > /dev/etherd/discover" tells the driver to find out what AoE
+  devices are available.
+
+  In the future these character devices may disappear and be replaced
+  by sysfs counterparts.  Using the commands in aoetools insulates
+  users from these implementation details.
+
+  The block devices are named like this::
+
+	e{shelf}.{slot}
+	e{shelf}.{slot}p{part}
+
+  ... so that "e0.2" is the third blade from the left (slot 2) in the
+  first shelf (shelf address zero).  That's the whole disk.  The first
+  partition on that disk would be "e0.2p1".
+
+Using sysfs
+===========
+
+  Each aoe block device in /sys/block has the extra attributes of
+  state, mac, and netif.  The state attribute is "up" when the device
+  is ready for I/O and "down" if detected but unusable.  The
+  "down,closewait" state shows that the device is still open and
+  cannot come up again until it has been closed.
+
+  The mac attribute is the ethernet address of the remote AoE device.
+  The netif attribute is the network interface on the localhost
+  through which we are communicating with the remote AoE device.
+
+  There is a script in this directory that formats this information in
+  a convenient way.  Users with aoetools should use the aoe-stat
+  command::
+
+    root@makki root# sh Documentation/admin-guide/aoe/status.sh
+       e10.0            eth3              up
+       e10.1            eth3              up
+       e10.2            eth3              up
+       e10.3            eth3              up
+       e10.4            eth3              up
+       e10.5            eth3              up
+       e10.6            eth3              up
+       e10.7            eth3              up
+       e10.8            eth3              up
+       e10.9            eth3              up
+        e4.0            eth1              up
+        e4.1            eth1              up
+        e4.2            eth1              up
+        e4.3            eth1              up
+        e4.4            eth1              up
+        e4.5            eth1              up
+        e4.6            eth1              up
+        e4.7            eth1              up
+        e4.8            eth1              up
+        e4.9            eth1              up
+
+  Use /sys/module/aoe/parameters/aoe_iflist (or better, the driver
+  option discussed below) instead of /dev/etherd/interfaces to limit
+  AoE traffic to the network interfaces in the given
+  whitespace-separated list.  Unlike the old character device, the
+  sysfs entry can be read from as well as written to.
+
+  It's helpful to trigger discovery after setting the list of allowed
+  interfaces.  The aoetools package provides an aoe-discover script
+  for this purpose.  You can also directly use the
+  /dev/etherd/discover special file described above.
+
+Driver Options
+==============
+
+  There is a boot option for the built-in aoe driver and a
+  corresponding module parameter, aoe_iflist.  Without this option,
+  all network interfaces may be used for ATA over Ethernet.  Here is a
+  usage example for the module parameter::
+
+    modprobe aoe_iflist="eth1 eth3"
+
+  The aoe_deadsecs module parameter determines the maximum number of
+  seconds that the driver will wait for an AoE device to provide a
+  response to an AoE command.  After aoe_deadsecs seconds have
+  elapsed, the AoE device will be marked as "down".  A value of zero
+  is supported for testing purposes and makes the aoe driver keep
+  trying AoE commands forever.
+
+  The aoe_maxout module parameter has a default of 128.  This is the
+  maximum number of unresponded packets that will be sent to an AoE
+  target at one time.
+
+  The aoe_dyndevs module parameter defaults to 1, meaning that the
+  driver will assign a block device minor number to a discovered AoE
+  target based on the order of its discovery.  With dynamic minor
+  device numbers in use, a greater range of AoE shelf and slot
+  addresses can be supported.  Users with udev will never have to
+  think about minor numbers.  Using aoe_dyndevs=0 allows device nodes
+  to be pre-created using a static minor-number scheme with the
+  aoe-mkshelf script in the aoetools.
diff --git a/Documentation/admin-guide/aoe/autoload.sh b/Documentation/admin-guide/aoe/autoload.sh
new file mode 100644
index 000000000000..815dff4691c9
--- /dev/null
+++ b/Documentation/admin-guide/aoe/autoload.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+# set aoe to autoload by installing the
+# aliases in /etc/modprobe.d/
+
+f=/etc/modprobe.d/aoe.conf
+
+if test ! -r $f || test ! -w $f; then
+	echo "cannot configure $f for module autoloading" 1>&2
+	exit 1
+fi
+
+grep major-152 $f >/dev/null
+if [ $? = 1 ]; then
+	echo alias block-major-152 aoe >> $f
+	echo alias char-major-152 aoe >> $f
+fi
+
diff --git a/Documentation/admin-guide/aoe/examples.rst b/Documentation/admin-guide/aoe/examples.rst
new file mode 100644
index 000000000000..91f3198e52c1
--- /dev/null
+++ b/Documentation/admin-guide/aoe/examples.rst
@@ -0,0 +1,23 @@
+Example of udev rules
+---------------------
+
+ .. include:: udev.txt
+    :literal:
+
+Example of udev install rules script
+------------------------------------
+
+ .. literalinclude:: udev-install.sh
+    :language: shell
+
+Example script to get status
+----------------------------
+
+ .. literalinclude:: status.sh
+    :language: shell
+
+Example of AoE autoload script
+------------------------------
+
+ .. literalinclude:: autoload.sh
+    :language: shell
diff --git a/Documentation/admin-guide/aoe/index.rst b/Documentation/admin-guide/aoe/index.rst
new file mode 100644
index 000000000000..d71c5df15922
--- /dev/null
+++ b/Documentation/admin-guide/aoe/index.rst
@@ -0,0 +1,17 @@
+=======================
+ATA over Ethernet (AoE)
+=======================
+
+.. toctree::
+    :maxdepth: 1
+
+    aoe
+    todo
+    examples
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/admin-guide/aoe/status.sh b/Documentation/admin-guide/aoe/status.sh
new file mode 100644
index 000000000000..eeec7baae57a
--- /dev/null
+++ b/Documentation/admin-guide/aoe/status.sh
@@ -0,0 +1,30 @@
+#! /bin/sh
+# collate and present sysfs information about AoE storage
+#
+# A more complete version of this script is aoe-stat, in the
+# aoetools.
+
+set -e
+format="%8s\t%8s\t%8s\n"
+me=`basename $0`
+sysd=${sysfs_dir:-/sys}
+
+# printf "$format" device mac netif state
+
+# Suse 9.1 Pro doesn't put /sys in /etc/mtab
+#test -z "`mount | grep sysfs`" && {
+test ! -d "$sysd/block" && {
+	echo "$me Error: sysfs is not mounted" 1>&2
+	exit 1
+}
+
+for d in `ls -d $sysd/block/etherd* 2>/dev/null | grep -v p` end; do
+	# maybe ls comes up empty, so we use "end"
+	test $d = end && continue
+
+	dev=`echo "$d" | sed 's/.*!//'`
+	printf "$format" \
+		"$dev" \
+		"`cat \"$d/netif\"`" \
+		"`cat \"$d/state\"`"
+done | sort
diff --git a/Documentation/admin-guide/aoe/todo.rst b/Documentation/admin-guide/aoe/todo.rst
new file mode 100644
index 000000000000..dea8db5a33e1
--- /dev/null
+++ b/Documentation/admin-guide/aoe/todo.rst
@@ -0,0 +1,17 @@
+TODO
+====
+
+There is a potential for deadlock when allocating a struct sk_buff for
+data that needs to be written out to aoe storage.  If the data is
+being written from a dirty page in order to free that page, and if
+there are no other pages available, then deadlock may occur when a
+free page is needed for the sk_buff allocation.  This situation has
+not been observed, but it would be nice to eliminate any potential for
+deadlock under memory pressure.
+
+Because ATA over Ethernet is not fragmented by the kernel's IP code,
+the destructor member of the struct sk_buff is available to the aoe
+driver.  By using a mempool for allocating all but the first few
+sk_buffs, and by registering a destructor, we should be able to
+efficiently allocate sk_buffs without introducing any potential for
+deadlock.
diff --git a/Documentation/admin-guide/aoe/udev-install.sh b/Documentation/admin-guide/aoe/udev-install.sh
new file mode 100644
index 000000000000..15e86f58c036
--- /dev/null
+++ b/Documentation/admin-guide/aoe/udev-install.sh
@@ -0,0 +1,33 @@
+# install the aoe-specific udev rules from udev.txt into 
+# the system's udev configuration
+# 
+
+me="`basename $0`"
+
+# find udev.conf, often /etc/udev/udev.conf
+# (or environment can specify where to find udev.conf)
+#
+if test -z "$conf"; then
+	if test -r /etc/udev/udev.conf; then
+		conf=/etc/udev/udev.conf
+	else
+		conf="`find /etc -type f -name udev.conf 2> /dev/null`"
+		if test -z "$conf" || test ! -r "$conf"; then
+			echo "$me Error: no udev.conf found" 1>&2
+			exit 1
+		fi
+	fi
+fi
+
+# find the directory where udev rules are stored, often
+# /etc/udev/rules.d
+#
+rules_d="`sed -n '/^udev_rules=/{ s!udev_rules=!!; s!\"!!g; p; }' $conf`"
+if test -z "$rules_d" ; then
+	rules_d=/etc/udev/rules.d
+fi
+if test ! -d "$rules_d"; then
+	echo "$me Error: cannot find udev rules directory" 1>&2
+	exit 1
+fi
+sh -xc "cp `dirname $0`/udev.txt $rules_d/60-aoe.rules"
diff --git a/Documentation/admin-guide/aoe/udev.txt b/Documentation/admin-guide/aoe/udev.txt
new file mode 100644
index 000000000000..5fb756466bc7
--- /dev/null
+++ b/Documentation/admin-guide/aoe/udev.txt
@@ -0,0 +1,26 @@
+# These rules tell udev what device nodes to create for aoe support.
+# They may be installed along the following lines.  Check the section
+# 8 udev manpage to see whether your udev supports SUBSYSTEM, and
+# whether it uses one or two equal signs for SUBSYSTEM and KERNEL.
+# 
+#   ecashin@makki ~$ su
+#   Password:
+#   bash# find /etc -type f -name udev.conf
+#   /etc/udev/udev.conf
+#   bash# grep udev_rules= /etc/udev/udev.conf
+#   udev_rules="/etc/udev/rules.d/"
+#   bash# ls /etc/udev/rules.d/
+#   10-wacom.rules  50-udev.rules
+#   bash# cp /path/to/linux/Documentation/admin-guide/aoe/udev.txt \
+#           /etc/udev/rules.d/60-aoe.rules
+#  
+
+# aoe char devices
+SUBSYSTEM=="aoe", KERNEL=="discover",	NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="err",	NAME="etherd/%k", GROUP="disk", MODE="0440"
+SUBSYSTEM=="aoe", KERNEL=="interfaces",	NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="revalidate",	NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="flush",	NAME="etherd/%k", GROUP="disk", MODE="0220"
+
+# aoe block devices     
+KERNEL=="etherd*",       GROUP="disk"
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 9228fbf5ce4e..1f0d9b939311 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -83,6 +83,7 @@ configure specific aspects of kernel behavior to your liking.
    namespaces/index
    perf-security
    acpi/index
+   aoe/index
    device-mapper/index
    laptops/index
 
diff --git a/Documentation/aoe/aoe.rst b/Documentation/aoe/aoe.rst
deleted file mode 100644
index 58747ecec71d..000000000000
--- a/Documentation/aoe/aoe.rst
+++ /dev/null
@@ -1,150 +0,0 @@
-Introduction
-============
-
-ATA over Ethernet is a network protocol that provides simple access to
-block storage on the LAN.
-
-  http://support.coraid.com/documents/AoEr11.txt
-
-The EtherDrive (R) HOWTO for 2.6 and 3.x kernels is found at ...
-
-  http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO.html
-
-It has many tips and hints!  Please see, especially, recommended
-tunings for virtual memory:
-
-  http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO-5.html#ss5.19
-
-The aoetools are userland programs that are designed to work with this
-driver.  The aoetools are on sourceforge.
-
-  http://aoetools.sourceforge.net/
-
-The scripts in this Documentation/aoe directory are intended to
-document the use of the driver and are not necessary if you install
-the aoetools.
-
-
-Creating Device Nodes
-=====================
-
-  Users of udev should find the block device nodes created
-  automatically, but to create all the necessary device nodes, use the
-  udev configuration rules provided in udev.txt (in this directory).
-
-  There is a udev-install.sh script that shows how to install these
-  rules on your system.
-
-  There is also an autoload script that shows how to edit
-  /etc/modprobe.d/aoe.conf to ensure that the aoe module is loaded when
-  necessary.  Preloading the aoe module is preferable to autoloading,
-  however, because AoE discovery takes a few seconds.  It can be
-  confusing when an AoE device is not present the first time the a
-  command is run but appears a second later.
-
-Using Device Nodes
-==================
-
-  "cat /dev/etherd/err" blocks, waiting for error diagnostic output,
-  like any retransmitted packets.
-
-  "echo eth2 eth4 > /dev/etherd/interfaces" tells the aoe driver to
-  limit ATA over Ethernet traffic to eth2 and eth4.  AoE traffic from
-  untrusted networks should be ignored as a matter of security.  See
-  also the aoe_iflist driver option described below.
-
-  "echo > /dev/etherd/discover" tells the driver to find out what AoE
-  devices are available.
-
-  In the future these character devices may disappear and be replaced
-  by sysfs counterparts.  Using the commands in aoetools insulates
-  users from these implementation details.
-
-  The block devices are named like this::
-
-	e{shelf}.{slot}
-	e{shelf}.{slot}p{part}
-
-  ... so that "e0.2" is the third blade from the left (slot 2) in the
-  first shelf (shelf address zero).  That's the whole disk.  The first
-  partition on that disk would be "e0.2p1".
-
-Using sysfs
-===========
-
-  Each aoe block device in /sys/block has the extra attributes of
-  state, mac, and netif.  The state attribute is "up" when the device
-  is ready for I/O and "down" if detected but unusable.  The
-  "down,closewait" state shows that the device is still open and
-  cannot come up again until it has been closed.
-
-  The mac attribute is the ethernet address of the remote AoE device.
-  The netif attribute is the network interface on the localhost
-  through which we are communicating with the remote AoE device.
-
-  There is a script in this directory that formats this information in
-  a convenient way.  Users with aoetools should use the aoe-stat
-  command::
-
-    root@makki root# sh Documentation/aoe/status.sh
-       e10.0            eth3              up
-       e10.1            eth3              up
-       e10.2            eth3              up
-       e10.3            eth3              up
-       e10.4            eth3              up
-       e10.5            eth3              up
-       e10.6            eth3              up
-       e10.7            eth3              up
-       e10.8            eth3              up
-       e10.9            eth3              up
-        e4.0            eth1              up
-        e4.1            eth1              up
-        e4.2            eth1              up
-        e4.3            eth1              up
-        e4.4            eth1              up
-        e4.5            eth1              up
-        e4.6            eth1              up
-        e4.7            eth1              up
-        e4.8            eth1              up
-        e4.9            eth1              up
-
-  Use /sys/module/aoe/parameters/aoe_iflist (or better, the driver
-  option discussed below) instead of /dev/etherd/interfaces to limit
-  AoE traffic to the network interfaces in the given
-  whitespace-separated list.  Unlike the old character device, the
-  sysfs entry can be read from as well as written to.
-
-  It's helpful to trigger discovery after setting the list of allowed
-  interfaces.  The aoetools package provides an aoe-discover script
-  for this purpose.  You can also directly use the
-  /dev/etherd/discover special file described above.
-
-Driver Options
-==============
-
-  There is a boot option for the built-in aoe driver and a
-  corresponding module parameter, aoe_iflist.  Without this option,
-  all network interfaces may be used for ATA over Ethernet.  Here is a
-  usage example for the module parameter::
-
-    modprobe aoe_iflist="eth1 eth3"
-
-  The aoe_deadsecs module parameter determines the maximum number of
-  seconds that the driver will wait for an AoE device to provide a
-  response to an AoE command.  After aoe_deadsecs seconds have
-  elapsed, the AoE device will be marked as "down".  A value of zero
-  is supported for testing purposes and makes the aoe driver keep
-  trying AoE commands forever.
-
-  The aoe_maxout module parameter has a default of 128.  This is the
-  maximum number of unresponded packets that will be sent to an AoE
-  target at one time.
-
-  The aoe_dyndevs module parameter defaults to 1, meaning that the
-  driver will assign a block device minor number to a discovered AoE
-  target based on the order of its discovery.  With dynamic minor
-  device numbers in use, a greater range of AoE shelf and slot
-  addresses can be supported.  Users with udev will never have to
-  think about minor numbers.  Using aoe_dyndevs=0 allows device nodes
-  to be pre-created using a static minor-number scheme with the
-  aoe-mkshelf script in the aoetools.
diff --git a/Documentation/aoe/autoload.sh b/Documentation/aoe/autoload.sh
deleted file mode 100644
index 815dff4691c9..000000000000
--- a/Documentation/aoe/autoload.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/sh
-# set aoe to autoload by installing the
-# aliases in /etc/modprobe.d/
-
-f=/etc/modprobe.d/aoe.conf
-
-if test ! -r $f || test ! -w $f; then
-	echo "cannot configure $f for module autoloading" 1>&2
-	exit 1
-fi
-
-grep major-152 $f >/dev/null
-if [ $? = 1 ]; then
-	echo alias block-major-152 aoe >> $f
-	echo alias char-major-152 aoe >> $f
-fi
-
diff --git a/Documentation/aoe/examples.rst b/Documentation/aoe/examples.rst
deleted file mode 100644
index 91f3198e52c1..000000000000
--- a/Documentation/aoe/examples.rst
+++ /dev/null
@@ -1,23 +0,0 @@
-Example of udev rules
----------------------
-
- .. include:: udev.txt
-    :literal:
-
-Example of udev install rules script
-------------------------------------
-
- .. literalinclude:: udev-install.sh
-    :language: shell
-
-Example script to get status
-----------------------------
-
- .. literalinclude:: status.sh
-    :language: shell
-
-Example of AoE autoload script
-------------------------------
-
- .. literalinclude:: autoload.sh
-    :language: shell
diff --git a/Documentation/aoe/index.rst b/Documentation/aoe/index.rst
deleted file mode 100644
index 4394b9b7913c..000000000000
--- a/Documentation/aoe/index.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-:orphan:
-
-=======================
-ATA over Ethernet (AoE)
-=======================
-
-.. toctree::
-    :maxdepth: 1
-
-    aoe
-    todo
-    examples
-
-.. only::  subproject and html
-
-   Indices
-   =======
-
-   * :ref:`genindex`
diff --git a/Documentation/aoe/status.sh b/Documentation/aoe/status.sh
deleted file mode 100644
index eeec7baae57a..000000000000
--- a/Documentation/aoe/status.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#! /bin/sh
-# collate and present sysfs information about AoE storage
-#
-# A more complete version of this script is aoe-stat, in the
-# aoetools.
-
-set -e
-format="%8s\t%8s\t%8s\n"
-me=`basename $0`
-sysd=${sysfs_dir:-/sys}
-
-# printf "$format" device mac netif state
-
-# Suse 9.1 Pro doesn't put /sys in /etc/mtab
-#test -z "`mount | grep sysfs`" && {
-test ! -d "$sysd/block" && {
-	echo "$me Error: sysfs is not mounted" 1>&2
-	exit 1
-}
-
-for d in `ls -d $sysd/block/etherd* 2>/dev/null | grep -v p` end; do
-	# maybe ls comes up empty, so we use "end"
-	test $d = end && continue
-
-	dev=`echo "$d" | sed 's/.*!//'`
-	printf "$format" \
-		"$dev" \
-		"`cat \"$d/netif\"`" \
-		"`cat \"$d/state\"`"
-done | sort
diff --git a/Documentation/aoe/todo.rst b/Documentation/aoe/todo.rst
deleted file mode 100644
index dea8db5a33e1..000000000000
--- a/Documentation/aoe/todo.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-TODO
-====
-
-There is a potential for deadlock when allocating a struct sk_buff for
-data that needs to be written out to aoe storage.  If the data is
-being written from a dirty page in order to free that page, and if
-there are no other pages available, then deadlock may occur when a
-free page is needed for the sk_buff allocation.  This situation has
-not been observed, but it would be nice to eliminate any potential for
-deadlock under memory pressure.
-
-Because ATA over Ethernet is not fragmented by the kernel's IP code,
-the destructor member of the struct sk_buff is available to the aoe
-driver.  By using a mempool for allocating all but the first few
-sk_buffs, and by registering a destructor, we should be able to
-efficiently allocate sk_buffs without introducing any potential for
-deadlock.
diff --git a/Documentation/aoe/udev-install.sh b/Documentation/aoe/udev-install.sh
deleted file mode 100644
index 15e86f58c036..000000000000
--- a/Documentation/aoe/udev-install.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# install the aoe-specific udev rules from udev.txt into 
-# the system's udev configuration
-# 
-
-me="`basename $0`"
-
-# find udev.conf, often /etc/udev/udev.conf
-# (or environment can specify where to find udev.conf)
-#
-if test -z "$conf"; then
-	if test -r /etc/udev/udev.conf; then
-		conf=/etc/udev/udev.conf
-	else
-		conf="`find /etc -type f -name udev.conf 2> /dev/null`"
-		if test -z "$conf" || test ! -r "$conf"; then
-			echo "$me Error: no udev.conf found" 1>&2
-			exit 1
-		fi
-	fi
-fi
-
-# find the directory where udev rules are stored, often
-# /etc/udev/rules.d
-#
-rules_d="`sed -n '/^udev_rules=/{ s!udev_rules=!!; s!\"!!g; p; }' $conf`"
-if test -z "$rules_d" ; then
-	rules_d=/etc/udev/rules.d
-fi
-if test ! -d "$rules_d"; then
-	echo "$me Error: cannot find udev rules directory" 1>&2
-	exit 1
-fi
-sh -xc "cp `dirname $0`/udev.txt $rules_d/60-aoe.rules"
diff --git a/Documentation/aoe/udev.txt b/Documentation/aoe/udev.txt
deleted file mode 100644
index 54feda5a0772..000000000000
--- a/Documentation/aoe/udev.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-# These rules tell udev what device nodes to create for aoe support.
-# They may be installed along the following lines.  Check the section
-# 8 udev manpage to see whether your udev supports SUBSYSTEM, and
-# whether it uses one or two equal signs for SUBSYSTEM and KERNEL.
-# 
-#   ecashin@makki ~$ su
-#   Password:
-#   bash# find /etc -type f -name udev.conf
-#   /etc/udev/udev.conf
-#   bash# grep udev_rules= /etc/udev/udev.conf
-#   udev_rules="/etc/udev/rules.d/"
-#   bash# ls /etc/udev/rules.d/
-#   10-wacom.rules  50-udev.rules
-#   bash# cp /path/to/linux/Documentation/aoe/udev.txt \
-#           /etc/udev/rules.d/60-aoe.rules
-#  
-
-# aoe char devices
-SUBSYSTEM=="aoe", KERNEL=="discover",	NAME="etherd/%k", GROUP="disk", MODE="0220"
-SUBSYSTEM=="aoe", KERNEL=="err",	NAME="etherd/%k", GROUP="disk", MODE="0440"
-SUBSYSTEM=="aoe", KERNEL=="interfaces",	NAME="etherd/%k", GROUP="disk", MODE="0220"
-SUBSYSTEM=="aoe", KERNEL=="revalidate",	NAME="etherd/%k", GROUP="disk", MODE="0220"
-SUBSYSTEM=="aoe", KERNEL=="flush",	NAME="etherd/%k", GROUP="disk", MODE="0220"
-
-# aoe block devices     
-KERNEL=="etherd*",       GROUP="disk"
-- 
cgit 


From da82c92f1150f66afabf78d2c85ef9ac18dc6d38 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 27 Jun 2019 13:08:35 -0300
Subject: docs: cgroup-v1: add it to the admin-guide book

Those files belong to the admin guide, so add them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../admin-guide/cgroup-v1/blkio-controller.rst     |  302 ++++++
 Documentation/admin-guide/cgroup-v1/cgroups.rst    |  695 ++++++++++++++
 Documentation/admin-guide/cgroup-v1/cpuacct.rst    |   50 +
 Documentation/admin-guide/cgroup-v1/cpusets.rst    |  866 +++++++++++++++++
 Documentation/admin-guide/cgroup-v1/devices.rst    |  132 +++
 .../admin-guide/cgroup-v1/freezer-subsystem.rst    |  127 +++
 Documentation/admin-guide/cgroup-v1/hugetlb.rst    |   50 +
 Documentation/admin-guide/cgroup-v1/index.rst      |   28 +
 Documentation/admin-guide/cgroup-v1/memcg_test.rst |  355 +++++++
 Documentation/admin-guide/cgroup-v1/memory.rst     | 1003 ++++++++++++++++++++
 Documentation/admin-guide/cgroup-v1/net_cls.rst    |   44 +
 Documentation/admin-guide/cgroup-v1/net_prio.rst   |   57 ++
 Documentation/admin-guide/cgroup-v1/pids.rst       |   92 ++
 Documentation/admin-guide/cgroup-v1/rdma.rst       |  117 +++
 Documentation/admin-guide/cgroup-v2.rst            |    2 +-
 Documentation/admin-guide/index.rst                |    1 +
 Documentation/admin-guide/kernel-parameters.txt    |    4 +-
 .../admin-guide/mm/numa_memory_policy.rst          |    2 +-
 Documentation/block/bfq-iosched.rst                |    2 +-
 Documentation/cgroup-v1/blkio-controller.rst       |  302 ------
 Documentation/cgroup-v1/cgroups.rst                |  695 --------------
 Documentation/cgroup-v1/cpuacct.rst                |   50 -
 Documentation/cgroup-v1/cpusets.rst                |  866 -----------------
 Documentation/cgroup-v1/devices.rst                |  132 ---
 Documentation/cgroup-v1/freezer-subsystem.rst      |  127 ---
 Documentation/cgroup-v1/hugetlb.rst                |   50 -
 Documentation/cgroup-v1/index.rst                  |   30 -
 Documentation/cgroup-v1/memcg_test.rst             |  355 -------
 Documentation/cgroup-v1/memory.rst                 | 1003 --------------------
 Documentation/cgroup-v1/net_cls.rst                |   44 -
 Documentation/cgroup-v1/net_prio.rst               |   57 --
 Documentation/cgroup-v1/pids.rst                   |   92 --
 Documentation/cgroup-v1/rdma.rst                   |  117 ---
 Documentation/filesystems/tmpfs.txt                |    2 +-
 Documentation/kernel-per-CPU-kthreads.txt          |    2 +-
 Documentation/scheduler/sched-deadline.rst         |    2 +-
 Documentation/scheduler/sched-design-CFS.rst       |    2 +-
 Documentation/scheduler/sched-rt-group.rst         |    2 +-
 Documentation/vm/numa.rst                          |    4 +-
 Documentation/vm/page_migration.rst                |    2 +-
 Documentation/vm/unevictable-lru.rst               |    2 +-
 Documentation/x86/x86_64/fake-numa-for-cpusets.rst |    4 +-
 42 files changed, 3935 insertions(+), 3936 deletions(-)
 create mode 100644 Documentation/admin-guide/cgroup-v1/blkio-controller.rst
 create mode 100644 Documentation/admin-guide/cgroup-v1/cgroups.rst
 create mode 100644 Documentation/admin-guide/cgroup-v1/cpuacct.rst
 create mode 100644 Documentation/admin-guide/cgroup-v1/cpusets.rst
 create mode 100644 Documentation/admin-guide/cgroup-v1/devices.rst
 create mode 100644 Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst
 create mode 100644 Documentation/admin-guide/cgroup-v1/hugetlb.rst
 create mode 100644 Documentation/admin-guide/cgroup-v1/index.rst
 create mode 100644 Documentation/admin-guide/cgroup-v1/memcg_test.rst
 create mode 100644 Documentation/admin-guide/cgroup-v1/memory.rst
 create mode 100644 Documentation/admin-guide/cgroup-v1/net_cls.rst
 create mode 100644 Documentation/admin-guide/cgroup-v1/net_prio.rst
 create mode 100644 Documentation/admin-guide/cgroup-v1/pids.rst
 create mode 100644 Documentation/admin-guide/cgroup-v1/rdma.rst
 delete mode 100644 Documentation/cgroup-v1/blkio-controller.rst
 delete mode 100644 Documentation/cgroup-v1/cgroups.rst
 delete mode 100644 Documentation/cgroup-v1/cpuacct.rst
 delete mode 100644 Documentation/cgroup-v1/cpusets.rst
 delete mode 100644 Documentation/cgroup-v1/devices.rst
 delete mode 100644 Documentation/cgroup-v1/freezer-subsystem.rst
 delete mode 100644 Documentation/cgroup-v1/hugetlb.rst
 delete mode 100644 Documentation/cgroup-v1/index.rst
 delete mode 100644 Documentation/cgroup-v1/memcg_test.rst
 delete mode 100644 Documentation/cgroup-v1/memory.rst
 delete mode 100644 Documentation/cgroup-v1/net_cls.rst
 delete mode 100644 Documentation/cgroup-v1/net_prio.rst
 delete mode 100644 Documentation/cgroup-v1/pids.rst
 delete mode 100644 Documentation/cgroup-v1/rdma.rst

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/cgroup-v1/blkio-controller.rst b/Documentation/admin-guide/cgroup-v1/blkio-controller.rst
new file mode 100644
index 000000000000..1d7d962933be
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/blkio-controller.rst
@@ -0,0 +1,302 @@
+===================
+Block IO Controller
+===================
+
+Overview
+========
+cgroup subsys "blkio" implements the block io controller. There seems to be
+a need of various kinds of IO control policies (like proportional BW, max BW)
+both at leaf nodes as well as at intermediate nodes in a storage hierarchy.
+Plan is to use the same cgroup based management interface for blkio controller
+and based on user options switch IO policies in the background.
+
+One IO control policy is throttling policy which can be used to
+specify upper IO rate limits on devices. This policy is implemented in
+generic block layer and can be used on leaf nodes as well as higher
+level logical devices like device mapper.
+
+HOWTO
+=====
+Throttling/Upper Limit policy
+-----------------------------
+- Enable Block IO controller::
+
+	CONFIG_BLK_CGROUP=y
+
+- Enable throttling in block layer::
+
+	CONFIG_BLK_DEV_THROTTLING=y
+
+- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)::
+
+        mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
+
+- Specify a bandwidth rate on particular device for root group. The format
+  for policy is "<major>:<minor>  <bytes_per_second>"::
+
+        echo "8:16  1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
+
+  Above will put a limit of 1MB/second on reads happening for root group
+  on device having major/minor number 8:16.
+
+- Run dd to read a file and see if rate is throttled to 1MB/s or not::
+
+        # dd iflag=direct if=/mnt/common/zerofile of=/dev/null bs=4K count=1024
+        1024+0 records in
+        1024+0 records out
+        4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
+
+ Limits for writes can be put using blkio.throttle.write_bps_device file.
+
+Hierarchical Cgroups
+====================
+
+Throttling implements hierarchy support; however,
+throttling's hierarchy support is enabled iff "sane_behavior" is
+enabled from cgroup side, which currently is a development option and
+not publicly available.
+
+If somebody created a hierarchy like as follows::
+
+			root
+			/  \
+		     test1 test2
+			|
+		     test3
+
+Throttling with "sane_behavior" will handle the
+hierarchy correctly. For throttling, all limits apply
+to the whole subtree while all statistics are local to the IOs
+directly generated by tasks in that cgroup.
+
+Throttling without "sane_behavior" enabled from cgroup side will
+practically treat all groups at same level as if it looks like the
+following::
+
+				pivot
+			     /  /   \  \
+			root  test1 test2  test3
+
+Various user visible config options
+===================================
+CONFIG_BLK_CGROUP
+	- Block IO controller.
+
+CONFIG_BFQ_CGROUP_DEBUG
+	- Debug help. Right now some additional stats file show up in cgroup
+	  if this option is enabled.
+
+CONFIG_BLK_DEV_THROTTLING
+	- Enable block device throttling support in block layer.
+
+Details of cgroup files
+=======================
+Proportional weight policy files
+--------------------------------
+- blkio.weight
+	- Specifies per cgroup weight. This is default weight of the group
+	  on all the devices until and unless overridden by per device rule.
+	  (See blkio.weight_device).
+	  Currently allowed range of weights is from 10 to 1000.
+
+- blkio.weight_device
+	- One can specify per cgroup per device rules using this interface.
+	  These rules override the default value of group weight as specified
+	  by blkio.weight.
+
+	  Following is the format::
+
+	    # echo dev_maj:dev_minor weight > blkio.weight_device
+
+	  Configure weight=300 on /dev/sdb (8:16) in this cgroup::
+
+	    # echo 8:16 300 > blkio.weight_device
+	    # cat blkio.weight_device
+	    dev     weight
+	    8:16    300
+
+	  Configure weight=500 on /dev/sda (8:0) in this cgroup::
+
+	    # echo 8:0 500 > blkio.weight_device
+	    # cat blkio.weight_device
+	    dev     weight
+	    8:0     500
+	    8:16    300
+
+	  Remove specific weight for /dev/sda in this cgroup::
+
+	    # echo 8:0 0 > blkio.weight_device
+	    # cat blkio.weight_device
+	    dev     weight
+	    8:16    300
+
+- blkio.leaf_weight[_device]
+	- Equivalents of blkio.weight[_device] for the purpose of
+          deciding how much weight tasks in the given cgroup has while
+          competing with the cgroup's child cgroups. For details,
+          please refer to Documentation/block/cfq-iosched.txt.
+
+- blkio.time
+	- disk time allocated to cgroup per device in milliseconds. First
+	  two fields specify the major and minor number of the device and
+	  third field specifies the disk time allocated to group in
+	  milliseconds.
+
+- blkio.sectors
+	- number of sectors transferred to/from disk by the group. First
+	  two fields specify the major and minor number of the device and
+	  third field specifies the number of sectors transferred by the
+	  group to/from the device.
+
+- blkio.io_service_bytes
+	- Number of bytes transferred to/from the disk by the group. These
+	  are further divided by the type of operation - read or write, sync
+	  or async. First two fields specify the major and minor number of the
+	  device, third field specifies the operation type and the fourth field
+	  specifies the number of bytes.
+
+- blkio.io_serviced
+	- Number of IOs (bio) issued to the disk by the group. These
+	  are further divided by the type of operation - read or write, sync
+	  or async. First two fields specify the major and minor number of the
+	  device, third field specifies the operation type and the fourth field
+	  specifies the number of IOs.
+
+- blkio.io_service_time
+	- Total amount of time between request dispatch and request completion
+	  for the IOs done by this cgroup. This is in nanoseconds to make it
+	  meaningful for flash devices too. For devices with queue depth of 1,
+	  this time represents the actual service time. When queue_depth > 1,
+	  that is no longer true as requests may be served out of order. This
+	  may cause the service time for a given IO to include the service time
+	  of multiple IOs when served out of order which may result in total
+	  io_service_time > actual time elapsed. This time is further divided by
+	  the type of operation - read or write, sync or async. First two fields
+	  specify the major and minor number of the device, third field
+	  specifies the operation type and the fourth field specifies the
+	  io_service_time in ns.
+
+- blkio.io_wait_time
+	- Total amount of time the IOs for this cgroup spent waiting in the
+	  scheduler queues for service. This can be greater than the total time
+	  elapsed since it is cumulative io_wait_time for all IOs. It is not a
+	  measure of total time the cgroup spent waiting but rather a measure of
+	  the wait_time for its individual IOs. For devices with queue_depth > 1
+	  this metric does not include the time spent waiting for service once
+	  the IO is dispatched to the device but till it actually gets serviced
+	  (there might be a time lag here due to re-ordering of requests by the
+	  device). This is in nanoseconds to make it meaningful for flash
+	  devices too. This time is further divided by the type of operation -
+	  read or write, sync or async. First two fields specify the major and
+	  minor number of the device, third field specifies the operation type
+	  and the fourth field specifies the io_wait_time in ns.
+
+- blkio.io_merged
+	- Total number of bios/requests merged into requests belonging to this
+	  cgroup. This is further divided by the type of operation - read or
+	  write, sync or async.
+
+- blkio.io_queued
+	- Total number of requests queued up at any given instant for this
+	  cgroup. This is further divided by the type of operation - read or
+	  write, sync or async.
+
+- blkio.avg_queue_size
+	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
+	  The average queue size for this cgroup over the entire time of this
+	  cgroup's existence. Queue size samples are taken each time one of the
+	  queues of this cgroup gets a timeslice.
+
+- blkio.group_wait_time
+	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
+	  This is the amount of time the cgroup had to wait since it became busy
+	  (i.e., went from 0 to 1 request queued) to get a timeslice for one of
+	  its queues. This is different from the io_wait_time which is the
+	  cumulative total of the amount of time spent by each IO in that cgroup
+	  waiting in the scheduler queue. This is in nanoseconds. If this is
+	  read when the cgroup is in a waiting (for timeslice) state, the stat
+	  will only report the group_wait_time accumulated till the last time it
+	  got a timeslice and will not include the current delta.
+
+- blkio.empty_time
+	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
+	  This is the amount of time a cgroup spends without any pending
+	  requests when not being served, i.e., it does not include any time
+	  spent idling for one of the queues of the cgroup. This is in
+	  nanoseconds. If this is read when the cgroup is in an empty state,
+	  the stat will only report the empty_time accumulated till the last
+	  time it had a pending request and will not include the current delta.
+
+- blkio.idle_time
+	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
+	  This is the amount of time spent by the IO scheduler idling for a
+	  given cgroup in anticipation of a better request than the existing ones
+	  from other queues/cgroups. This is in nanoseconds. If this is read
+	  when the cgroup is in an idling state, the stat will only report the
+	  idle_time accumulated till the last idle period and will not include
+	  the current delta.
+
+- blkio.dequeue
+	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y. This
+	  gives the statistics about how many a times a group was dequeued
+	  from service tree of the device. First two fields specify the major
+	  and minor number of the device and third field specifies the number
+	  of times a group was dequeued from a particular device.
+
+- blkio.*_recursive
+	- Recursive version of various stats. These files show the
+          same information as their non-recursive counterparts but
+          include stats from all the descendant cgroups.
+
+Throttling/Upper limit policy files
+-----------------------------------
+- blkio.throttle.read_bps_device
+	- Specifies upper limit on READ rate from the device. IO rate is
+	  specified in bytes per second. Rules are per device. Following is
+	  the format::
+
+	    echo "<major>:<minor>  <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
+
+- blkio.throttle.write_bps_device
+	- Specifies upper limit on WRITE rate to the device. IO rate is
+	  specified in bytes per second. Rules are per device. Following is
+	  the format::
+
+	    echo "<major>:<minor>  <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
+
+- blkio.throttle.read_iops_device
+	- Specifies upper limit on READ rate from the device. IO rate is
+	  specified in IO per second. Rules are per device. Following is
+	  the format::
+
+	   echo "<major>:<minor>  <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
+
+- blkio.throttle.write_iops_device
+	- Specifies upper limit on WRITE rate to the device. IO rate is
+	  specified in io per second. Rules are per device. Following is
+	  the format::
+
+	    echo "<major>:<minor>  <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
+
+Note: If both BW and IOPS rules are specified for a device, then IO is
+      subjected to both the constraints.
+
+- blkio.throttle.io_serviced
+	- Number of IOs (bio) issued to the disk by the group. These
+	  are further divided by the type of operation - read or write, sync
+	  or async. First two fields specify the major and minor number of the
+	  device, third field specifies the operation type and the fourth field
+	  specifies the number of IOs.
+
+- blkio.throttle.io_service_bytes
+	- Number of bytes transferred to/from the disk by the group. These
+	  are further divided by the type of operation - read or write, sync
+	  or async. First two fields specify the major and minor number of the
+	  device, third field specifies the operation type and the fourth field
+	  specifies the number of bytes.
+
+Common files among various policies
+-----------------------------------
+- blkio.reset_stats
+	- Writing an int to this file will result in resetting all the stats
+	  for that cgroup.
diff --git a/Documentation/admin-guide/cgroup-v1/cgroups.rst b/Documentation/admin-guide/cgroup-v1/cgroups.rst
new file mode 100644
index 000000000000..b0688011ed06
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/cgroups.rst
@@ -0,0 +1,695 @@
+==============
+Control Groups
+==============
+
+Written by Paul Menage <menage@google.com> based on
+Documentation/admin-guide/cgroup-v1/cpusets.rst
+
+Original copyright statements from cpusets.txt:
+
+Portions Copyright (C) 2004 BULL SA.
+
+Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+
+Modified by Paul Jackson <pj@sgi.com>
+
+Modified by Christoph Lameter <cl@linux.com>
+
+.. CONTENTS:
+
+	1. Control Groups
+	1.1 What are cgroups ?
+	1.2 Why are cgroups needed ?
+	1.3 How are cgroups implemented ?
+	1.4 What does notify_on_release do ?
+	1.5 What does clone_children do ?
+	1.6 How do I use cgroups ?
+	2. Usage Examples and Syntax
+	2.1 Basic Usage
+	2.2 Attaching processes
+	2.3 Mounting hierarchies by name
+	3. Kernel API
+	3.1 Overview
+	3.2 Synchronization
+	3.3 Subsystem API
+	4. Extended attributes usage
+	5. Questions
+
+1. Control Groups
+=================
+
+1.1 What are cgroups ?
+----------------------
+
+Control Groups provide a mechanism for aggregating/partitioning sets of
+tasks, and all their future children, into hierarchical groups with
+specialized behaviour.
+
+Definitions:
+
+A *cgroup* associates a set of tasks with a set of parameters for one
+or more subsystems.
+
+A *subsystem* is a module that makes use of the task grouping
+facilities provided by cgroups to treat groups of tasks in
+particular ways. A subsystem is typically a "resource controller" that
+schedules a resource or applies per-cgroup limits, but it may be
+anything that wants to act on a group of processes, e.g. a
+virtualization subsystem.
+
+A *hierarchy* is a set of cgroups arranged in a tree, such that
+every task in the system is in exactly one of the cgroups in the
+hierarchy, and a set of subsystems; each subsystem has system-specific
+state attached to each cgroup in the hierarchy.  Each hierarchy has
+an instance of the cgroup virtual filesystem associated with it.
+
+At any one time there may be multiple active hierarchies of task
+cgroups. Each hierarchy is a partition of all tasks in the system.
+
+User-level code may create and destroy cgroups by name in an
+instance of the cgroup virtual file system, specify and query to
+which cgroup a task is assigned, and list the task PIDs assigned to
+a cgroup. Those creations and assignments only affect the hierarchy
+associated with that instance of the cgroup file system.
+
+On their own, the only use for cgroups is for simple job
+tracking. The intention is that other subsystems hook into the generic
+cgroup support to provide new attributes for cgroups, such as
+accounting/limiting the resources which processes in a cgroup can
+access. For example, cpusets (see Documentation/admin-guide/cgroup-v1/cpusets.rst) allow
+you to associate a set of CPUs and a set of memory nodes with the
+tasks in each cgroup.
+
+1.2 Why are cgroups needed ?
+----------------------------
+
+There are multiple efforts to provide process aggregations in the
+Linux kernel, mainly for resource-tracking purposes. Such efforts
+include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
+namespaces. These all require the basic notion of a
+grouping/partitioning of processes, with newly forked processes ending
+up in the same group (cgroup) as their parent process.
+
+The kernel cgroup patch provides the minimum essential kernel
+mechanisms required to efficiently implement such groups. It has
+minimal impact on the system fast paths, and provides hooks for
+specific subsystems such as cpusets to provide additional behaviour as
+desired.
+
+Multiple hierarchy support is provided to allow for situations where
+the division of tasks into cgroups is distinctly different for
+different subsystems - having parallel hierarchies allows each
+hierarchy to be a natural division of tasks, without having to handle
+complex combinations of tasks that would be present if several
+unrelated subsystems needed to be forced into the same tree of
+cgroups.
+
+At one extreme, each resource controller or subsystem could be in a
+separate hierarchy; at the other extreme, all subsystems
+would be attached to the same hierarchy.
+
+As an example of a scenario (originally proposed by vatsa@in.ibm.com)
+that can benefit from multiple hierarchies, consider a large
+university server with various users - students, professors, system
+tasks etc. The resource planning for this server could be along the
+following lines::
+
+       CPU :          "Top cpuset"
+                       /       \
+               CPUSet1         CPUSet2
+                  |               |
+               (Professors)    (Students)
+
+               In addition (system tasks) are attached to topcpuset (so
+               that they can run anywhere) with a limit of 20%
+
+       Memory : Professors (50%), Students (30%), system (20%)
+
+       Disk : Professors (50%), Students (30%), system (20%)
+
+       Network : WWW browsing (20%), Network File System (60%), others (20%)
+                               / \
+               Professors (15%)  students (5%)
+
+Browsers like Firefox/Lynx go into the WWW network class, while (k)nfsd goes
+into the NFS network class.
+
+At the same time Firefox/Lynx will share an appropriate CPU/Memory class
+depending on who launched it (prof/student).
+
+With the ability to classify tasks differently for different resources
+(by putting those resource subsystems in different hierarchies),
+the admin can easily set up a script which receives exec notifications
+and depending on who is launching the browser he can::
+
+    # echo browser_pid > /sys/fs/cgroup/<restype>/<userclass>/tasks
+
+With only a single hierarchy, he now would potentially have to create
+a separate cgroup for every browser launched and associate it with
+appropriate network and other resource class.  This may lead to
+proliferation of such cgroups.
+
+Also let's say that the administrator would like to give enhanced network
+access temporarily to a student's browser (since it is night and the user
+wants to do online gaming :))  OR give one of the student's simulation
+apps enhanced CPU power.
+
+With ability to write PIDs directly to resource classes, it's just a
+matter of::
+
+       # echo pid > /sys/fs/cgroup/network/<new_class>/tasks
+       (after some time)
+       # echo pid > /sys/fs/cgroup/network/<orig_class>/tasks
+
+Without this ability, the administrator would have to split the cgroup into
+multiple separate ones and then associate the new cgroups with the
+new resource classes.
+
+
+
+1.3 How are cgroups implemented ?
+---------------------------------
+
+Control Groups extends the kernel as follows:
+
+ - Each task in the system has a reference-counted pointer to a
+   css_set.
+
+ - A css_set contains a set of reference-counted pointers to
+   cgroup_subsys_state objects, one for each cgroup subsystem
+   registered in the system. There is no direct link from a task to
+   the cgroup of which it's a member in each hierarchy, but this
+   can be determined by following pointers through the
+   cgroup_subsys_state objects. This is because accessing the
+   subsystem state is something that's expected to happen frequently
+   and in performance-critical code, whereas operations that require a
+   task's actual cgroup assignments (in particular, moving between
+   cgroups) are less common. A linked list runs through the cg_list
+   field of each task_struct using the css_set, anchored at
+   css_set->tasks.
+
+ - A cgroup hierarchy filesystem can be mounted for browsing and
+   manipulation from user space.
+
+ - You can list all the tasks (by PID) attached to any cgroup.
+
+The implementation of cgroups requires a few, simple hooks
+into the rest of the kernel, none in performance-critical paths:
+
+ - in init/main.c, to initialize the root cgroups and initial
+   css_set at system boot.
+
+ - in fork and exit, to attach and detach a task from its css_set.
+
+In addition, a new file system of type "cgroup" may be mounted, to
+enable browsing and modifying the cgroups presently known to the
+kernel.  When mounting a cgroup hierarchy, you may specify a
+comma-separated list of subsystems to mount as the filesystem mount
+options.  By default, mounting the cgroup filesystem attempts to
+mount a hierarchy containing all registered subsystems.
+
+If an active hierarchy with exactly the same set of subsystems already
+exists, it will be reused for the new mount. If no existing hierarchy
+matches, and any of the requested subsystems are in use in an existing
+hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
+is activated, associated with the requested subsystems.
+
+It's not currently possible to bind a new subsystem to an active
+cgroup hierarchy, or to unbind a subsystem from an active cgroup
+hierarchy. This may be possible in future, but is fraught with nasty
+error-recovery issues.
+
+When a cgroup filesystem is unmounted, if there are any
+child cgroups created below the top-level cgroup, that hierarchy
+will remain active even though unmounted; if there are no
+child cgroups then the hierarchy will be deactivated.
+
+No new system calls are added for cgroups - all support for
+querying and modifying cgroups is via this cgroup file system.
+
+Each task under /proc has an added file named 'cgroup' displaying,
+for each active hierarchy, the subsystem names and the cgroup name
+as the path relative to the root of the cgroup file system.
+
+Each cgroup is represented by a directory in the cgroup file system
+containing the following files describing that cgroup:
+
+ - tasks: list of tasks (by PID) attached to that cgroup.  This list
+   is not guaranteed to be sorted.  Writing a thread ID into this file
+   moves the thread into this cgroup.
+ - cgroup.procs: list of thread group IDs in the cgroup.  This list is
+   not guaranteed to be sorted or free of duplicate TGIDs, and userspace
+   should sort/uniquify the list if this property is required.
+   Writing a thread group ID into this file moves all threads in that
+   group into this cgroup.
+ - notify_on_release flag: run the release agent on exit?
+ - release_agent: the path to use for release notifications (this file
+   exists in the top cgroup only)
+
+Other subsystems such as cpusets may add additional files in each
+cgroup dir.
+
+New cgroups are created using the mkdir system call or shell
+command.  The properties of a cgroup, such as its flags, are
+modified by writing to the appropriate file in that cgroups
+directory, as listed above.
+
+The named hierarchical structure of nested cgroups allows partitioning
+a large system into nested, dynamically changeable, "soft-partitions".
+
+The attachment of each task, automatically inherited at fork by any
+children of that task, to a cgroup allows organizing the work load
+on a system into related sets of tasks.  A task may be re-attached to
+any other cgroup, if allowed by the permissions on the necessary
+cgroup file system directories.
+
+When a task is moved from one cgroup to another, it gets a new
+css_set pointer - if there's an already existing css_set with the
+desired collection of cgroups then that group is reused, otherwise a new
+css_set is allocated. The appropriate existing css_set is located by
+looking into a hash table.
+
+To allow access from a cgroup to the css_sets (and hence tasks)
+that comprise it, a set of cg_cgroup_link objects form a lattice;
+each cg_cgroup_link is linked into a list of cg_cgroup_links for
+a single cgroup on its cgrp_link_list field, and a list of
+cg_cgroup_links for a single css_set on its cg_link_list.
+
+Thus the set of tasks in a cgroup can be listed by iterating over
+each css_set that references the cgroup, and sub-iterating over
+each css_set's task set.
+
+The use of a Linux virtual file system (vfs) to represent the
+cgroup hierarchy provides for a familiar permission and name space
+for cgroups, with a minimum of additional kernel code.
+
+1.4 What does notify_on_release do ?
+------------------------------------
+
+If the notify_on_release flag is enabled (1) in a cgroup, then
+whenever the last task in the cgroup leaves (exits or attaches to
+some other cgroup) and the last child cgroup of that cgroup
+is removed, then the kernel runs the command specified by the contents
+of the "release_agent" file in that hierarchy's root directory,
+supplying the pathname (relative to the mount point of the cgroup
+file system) of the abandoned cgroup.  This enables automatic
+removal of abandoned cgroups.  The default value of
+notify_on_release in the root cgroup at system boot is disabled
+(0).  The default value of other cgroups at creation is the current
+value of their parents' notify_on_release settings. The default value of
+a cgroup hierarchy's release_agent path is empty.
+
+1.5 What does clone_children do ?
+---------------------------------
+
+This flag only affects the cpuset controller. If the clone_children
+flag is enabled (1) in a cgroup, a new cpuset cgroup will copy its
+configuration from the parent during initialization.
+
+1.6 How do I use cgroups ?
+--------------------------
+
+To start a new job that is to be contained within a cgroup, using
+the "cpuset" cgroup subsystem, the steps are something like::
+
+ 1) mount -t tmpfs cgroup_root /sys/fs/cgroup
+ 2) mkdir /sys/fs/cgroup/cpuset
+ 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ 4) Create the new cgroup by doing mkdir's and write's (or echo's) in
+    the /sys/fs/cgroup/cpuset virtual file system.
+ 5) Start a task that will be the "founding father" of the new job.
+ 6) Attach that task to the new cgroup by writing its PID to the
+    /sys/fs/cgroup/cpuset tasks file for that cgroup.
+ 7) fork, exec or clone the job tasks from this founding father task.
+
+For example, the following sequence of commands will setup a cgroup
+named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
+and then start a subshell 'sh' in that cgroup::
+
+  mount -t tmpfs cgroup_root /sys/fs/cgroup
+  mkdir /sys/fs/cgroup/cpuset
+  mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset
+  cd /sys/fs/cgroup/cpuset
+  mkdir Charlie
+  cd Charlie
+  /bin/echo 2-3 > cpuset.cpus
+  /bin/echo 1 > cpuset.mems
+  /bin/echo $$ > tasks
+  sh
+  # The subshell 'sh' is now running in cgroup Charlie
+  # The next line should display '/Charlie'
+  cat /proc/self/cgroup
+
+2. Usage Examples and Syntax
+============================
+
+2.1 Basic Usage
+---------------
+
+Creating, modifying, using cgroups can be done through the cgroup
+virtual filesystem.
+
+To mount a cgroup hierarchy with all available subsystems, type::
+
+  # mount -t cgroup xxx /sys/fs/cgroup
+
+The "xxx" is not interpreted by the cgroup code, but will appear in
+/proc/mounts so may be any useful identifying string that you like.
+
+Note: Some subsystems do not work without some user input first.  For instance,
+if cpusets are enabled the user will have to populate the cpus and mems files
+for each new cgroup created before that group can be used.
+
+As explained in section `1.2 Why are cgroups needed?` you should create
+different hierarchies of cgroups for each single resource or group of
+resources you want to control. Therefore, you should mount a tmpfs on
+/sys/fs/cgroup and create directories for each cgroup resource or resource
+group::
+
+  # mount -t tmpfs cgroup_root /sys/fs/cgroup
+  # mkdir /sys/fs/cgroup/rg1
+
+To mount a cgroup hierarchy with just the cpuset and memory
+subsystems, type::
+
+  # mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1
+
+While remounting cgroups is currently supported, it is not recommend
+to use it. Remounting allows changing bound subsystems and
+release_agent. Rebinding is hardly useful as it only works when the
+hierarchy is empty and release_agent itself should be replaced with
+conventional fsnotify. The support for remounting will be removed in
+the future.
+
+To Specify a hierarchy's release_agent::
+
+  # mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
+    xxx /sys/fs/cgroup/rg1
+
+Note that specifying 'release_agent' more than once will return failure.
+
+Note that changing the set of subsystems is currently only supported
+when the hierarchy consists of a single (root) cgroup. Supporting
+the ability to arbitrarily bind/unbind subsystems from an existing
+cgroup hierarchy is intended to be implemented in the future.
+
+Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the
+tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1
+is the cgroup that holds the whole system.
+
+If you want to change the value of release_agent::
+
+  # echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent
+
+It can also be changed via remount.
+
+If you want to create a new cgroup under /sys/fs/cgroup/rg1::
+
+  # cd /sys/fs/cgroup/rg1
+  # mkdir my_cgroup
+
+Now you want to do something with this cgroup:
+
+  # cd my_cgroup
+
+In this directory you can find several files::
+
+  # ls
+  cgroup.procs notify_on_release tasks
+  (plus whatever files added by the attached subsystems)
+
+Now attach your shell to this cgroup::
+
+  # /bin/echo $$ > tasks
+
+You can also create cgroups inside your cgroup by using mkdir in this
+directory::
+
+  # mkdir my_sub_cs
+
+To remove a cgroup, just use rmdir::
+
+  # rmdir my_sub_cs
+
+This will fail if the cgroup is in use (has cgroups inside, or
+has processes attached, or is held alive by other subsystem-specific
+reference).
+
+2.2 Attaching processes
+-----------------------
+
+::
+
+  # /bin/echo PID > tasks
+
+Note that it is PID, not PIDs. You can only attach ONE task at a time.
+If you have several tasks to attach, you have to do it one after another::
+
+  # /bin/echo PID1 > tasks
+  # /bin/echo PID2 > tasks
+	  ...
+  # /bin/echo PIDn > tasks
+
+You can attach the current shell task by echoing 0::
+
+  # echo 0 > tasks
+
+You can use the cgroup.procs file instead of the tasks file to move all
+threads in a threadgroup at once. Echoing the PID of any task in a
+threadgroup to cgroup.procs causes all tasks in that threadgroup to be
+attached to the cgroup. Writing 0 to cgroup.procs moves all tasks
+in the writing task's threadgroup.
+
+Note: Since every task is always a member of exactly one cgroup in each
+mounted hierarchy, to remove a task from its current cgroup you must
+move it into a new cgroup (possibly the root cgroup) by writing to the
+new cgroup's tasks file.
+
+Note: Due to some restrictions enforced by some cgroup subsystems, moving
+a process to another cgroup can fail.
+
+2.3 Mounting hierarchies by name
+--------------------------------
+
+Passing the name=<x> option when mounting a cgroups hierarchy
+associates the given name with the hierarchy.  This can be used when
+mounting a pre-existing hierarchy, in order to refer to it by name
+rather than by its set of active subsystems.  Each hierarchy is either
+nameless, or has a unique name.
+
+The name should match [\w.-]+
+
+When passing a name=<x> option for a new hierarchy, you need to
+specify subsystems manually; the legacy behaviour of mounting all
+subsystems when none are explicitly specified is not supported when
+you give a subsystem a name.
+
+The name of the subsystem appears as part of the hierarchy description
+in /proc/mounts and /proc/<pid>/cgroups.
+
+
+3. Kernel API
+=============
+
+3.1 Overview
+------------
+
+Each kernel subsystem that wants to hook into the generic cgroup
+system needs to create a cgroup_subsys object. This contains
+various methods, which are callbacks from the cgroup system, along
+with a subsystem ID which will be assigned by the cgroup system.
+
+Other fields in the cgroup_subsys object include:
+
+- subsys_id: a unique array index for the subsystem, indicating which
+  entry in cgroup->subsys[] this subsystem should be managing.
+
+- name: should be initialized to a unique subsystem name. Should be
+  no longer than MAX_CGROUP_TYPE_NAMELEN.
+
+- early_init: indicate if the subsystem needs early initialization
+  at system boot.
+
+Each cgroup object created by the system has an array of pointers,
+indexed by subsystem ID; this pointer is entirely managed by the
+subsystem; the generic cgroup code will never touch this pointer.
+
+3.2 Synchronization
+-------------------
+
+There is a global mutex, cgroup_mutex, used by the cgroup
+system. This should be taken by anything that wants to modify a
+cgroup. It may also be taken to prevent cgroups from being
+modified, but more specific locks may be more appropriate in that
+situation.
+
+See kernel/cgroup.c for more details.
+
+Subsystems can take/release the cgroup_mutex via the functions
+cgroup_lock()/cgroup_unlock().
+
+Accessing a task's cgroup pointer may be done in the following ways:
+- while holding cgroup_mutex
+- while holding the task's alloc_lock (via task_lock())
+- inside an rcu_read_lock() section via rcu_dereference()
+
+3.3 Subsystem API
+-----------------
+
+Each subsystem should:
+
+- add an entry in linux/cgroup_subsys.h
+- define a cgroup_subsys object called <name>_cgrp_subsys
+
+Each subsystem may export the following methods. The only mandatory
+methods are css_alloc/free. Any others that are null are presumed to
+be successful no-ops.
+
+``struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp)``
+(cgroup_mutex held by caller)
+
+Called to allocate a subsystem state object for a cgroup. The
+subsystem should allocate its subsystem state object for the passed
+cgroup, returning a pointer to the new object on success or a
+ERR_PTR() value. On success, the subsystem pointer should point to
+a structure of type cgroup_subsys_state (typically embedded in a
+larger subsystem-specific object), which will be initialized by the
+cgroup system. Note that this will be called at initialization to
+create the root subsystem state for this subsystem; this case can be
+identified by the passed cgroup object having a NULL parent (since
+it's the root of the hierarchy) and may be an appropriate place for
+initialization code.
+
+``int css_online(struct cgroup *cgrp)``
+(cgroup_mutex held by caller)
+
+Called after @cgrp successfully completed all allocations and made
+visible to cgroup_for_each_child/descendant_*() iterators. The
+subsystem may choose to fail creation by returning -errno. This
+callback can be used to implement reliable state sharing and
+propagation along the hierarchy. See the comment on
+cgroup_for_each_descendant_pre() for details.
+
+``void css_offline(struct cgroup *cgrp);``
+(cgroup_mutex held by caller)
+
+This is the counterpart of css_online() and called iff css_online()
+has succeeded on @cgrp. This signifies the beginning of the end of
+@cgrp. @cgrp is being removed and the subsystem should start dropping
+all references it's holding on @cgrp. When all references are dropped,
+cgroup removal will proceed to the next step - css_free(). After this
+callback, @cgrp should be considered dead to the subsystem.
+
+``void css_free(struct cgroup *cgrp)``
+(cgroup_mutex held by caller)
+
+The cgroup system is about to free @cgrp; the subsystem should free
+its subsystem state object. By the time this method is called, @cgrp
+is completely unused; @cgrp->parent is still valid. (Note - can also
+be called for a newly-created cgroup if an error occurs after this
+subsystem's create() method has been called for the new cgroup).
+
+``int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
+(cgroup_mutex held by caller)
+
+Called prior to moving one or more tasks into a cgroup; if the
+subsystem returns an error, this will abort the attach operation.
+@tset contains the tasks to be attached and is guaranteed to have at
+least one task in it.
+
+If there are multiple tasks in the taskset, then:
+  - it's guaranteed that all are from the same thread group
+  - @tset contains all tasks from the thread group whether or not
+    they're switching cgroups
+  - the first task is the leader
+
+Each @tset entry also contains the task's old cgroup and tasks which
+aren't switching cgroup can be skipped easily using the
+cgroup_taskset_for_each() iterator. Note that this isn't called on a
+fork. If this method returns 0 (success) then this should remain valid
+while the caller holds cgroup_mutex and it is ensured that either
+attach() or cancel_attach() will be called in future.
+
+``void css_reset(struct cgroup_subsys_state *css)``
+(cgroup_mutex held by caller)
+
+An optional operation which should restore @css's configuration to the
+initial state.  This is currently only used on the unified hierarchy
+when a subsystem is disabled on a cgroup through
+"cgroup.subtree_control" but should remain enabled because other
+subsystems depend on it.  cgroup core makes such a css invisible by
+removing the associated interface files and invokes this callback so
+that the hidden subsystem can return to the initial neutral state.
+This prevents unexpected resource control from a hidden css and
+ensures that the configuration is in the initial state when it is made
+visible again later.
+
+``void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
+(cgroup_mutex held by caller)
+
+Called when a task attach operation has failed after can_attach() has succeeded.
+A subsystem whose can_attach() has some side-effects should provide this
+function, so that the subsystem can implement a rollback. If not, not necessary.
+This will be called only about subsystems whose can_attach() operation have
+succeeded. The parameters are identical to can_attach().
+
+``void attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
+(cgroup_mutex held by caller)
+
+Called after the task has been attached to the cgroup, to allow any
+post-attachment activity that requires memory allocations or blocking.
+The parameters are identical to can_attach().
+
+``void fork(struct task_struct *task)``
+
+Called when a task is forked into a cgroup.
+
+``void exit(struct task_struct *task)``
+
+Called during task exit.
+
+``void free(struct task_struct *task)``
+
+Called when the task_struct is freed.
+
+``void bind(struct cgroup *root)``
+(cgroup_mutex held by caller)
+
+Called when a cgroup subsystem is rebound to a different hierarchy
+and root cgroup. Currently this will only involve movement between
+the default hierarchy (which never has sub-cgroups) and a hierarchy
+that is being created/destroyed (and hence has no sub-cgroups).
+
+4. Extended attribute usage
+===========================
+
+cgroup filesystem supports certain types of extended attributes in its
+directories and files.  The current supported types are:
+
+	- Trusted (XATTR_TRUSTED)
+	- Security (XATTR_SECURITY)
+
+Both require CAP_SYS_ADMIN capability to set.
+
+Like in tmpfs, the extended attributes in cgroup filesystem are stored
+using kernel memory and it's advised to keep the usage at minimum.  This
+is the reason why user defined extended attributes are not supported, since
+any user can do it and there's no limit in the value size.
+
+The current known users for this feature are SELinux to limit cgroup usage
+in containers and systemd for assorted meta data like main PID in a cgroup
+(systemd creates a cgroup per service).
+
+5. Questions
+============
+
+::
+
+  Q: what's up with this '/bin/echo' ?
+  A: bash's builtin 'echo' command does not check calls to write() against
+     errors. If you use it in the cgroup file system, you won't be
+     able to tell whether a command succeeded or failed.
+
+  Q: When I attach processes, only the first of the line gets really attached !
+  A: We can only return one error code per call to write(). So you should also
+     put only ONE PID.
diff --git a/Documentation/admin-guide/cgroup-v1/cpuacct.rst b/Documentation/admin-guide/cgroup-v1/cpuacct.rst
new file mode 100644
index 000000000000..d30ed81d2ad7
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/cpuacct.rst
@@ -0,0 +1,50 @@
+=========================
+CPU Accounting Controller
+=========================
+
+The CPU accounting controller is used to group tasks using cgroups and
+account the CPU usage of these groups of tasks.
+
+The CPU accounting controller supports multi-hierarchy groups. An accounting
+group accumulates the CPU usage of all of its child groups and the tasks
+directly present in its group.
+
+Accounting groups can be created by first mounting the cgroup filesystem::
+
+  # mount -t cgroup -ocpuacct none /sys/fs/cgroup
+
+With the above step, the initial or the parent accounting group becomes
+visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
+the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
+/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained
+by this group which is essentially the CPU time obtained by all the tasks
+in the system.
+
+New accounting groups can be created under the parent group /sys/fs/cgroup::
+
+  # cd /sys/fs/cgroup
+  # mkdir g1
+  # echo $$ > g1/tasks
+
+The above steps create a new group g1 and move the current shell
+process (bash) into it. CPU time consumed by this bash and its children
+can be obtained from g1/cpuacct.usage and the same is accumulated in
+/sys/fs/cgroup/cpuacct.usage also.
+
+cpuacct.stat file lists a few statistics which further divide the
+CPU time obtained by the cgroup into user and system times. Currently
+the following statistics are supported:
+
+user: Time spent by tasks of the cgroup in user mode.
+system: Time spent by tasks of the cgroup in kernel mode.
+
+user and system are in USER_HZ unit.
+
+cpuacct controller uses percpu_counter interface to collect user and
+system times. This has two side effects:
+
+- It is theoretically possible to see wrong values for user and system times.
+  This is because percpu_counter_read() on 32bit systems isn't safe
+  against concurrent writes.
+- It is possible to see slightly outdated values for user and system times
+  due to the batch processing nature of percpu_counter.
diff --git a/Documentation/admin-guide/cgroup-v1/cpusets.rst b/Documentation/admin-guide/cgroup-v1/cpusets.rst
new file mode 100644
index 000000000000..86a6ae995d54
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/cpusets.rst
@@ -0,0 +1,866 @@
+=======
+CPUSETS
+=======
+
+Copyright (C) 2004 BULL SA.
+
+Written by Simon.Derr@bull.net
+
+- Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+- Modified by Paul Jackson <pj@sgi.com>
+- Modified by Christoph Lameter <cl@linux.com>
+- Modified by Paul Menage <menage@google.com>
+- Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+
+.. CONTENTS:
+
+   1. Cpusets
+     1.1 What are cpusets ?
+     1.2 Why are cpusets needed ?
+     1.3 How are cpusets implemented ?
+     1.4 What are exclusive cpusets ?
+     1.5 What is memory_pressure ?
+     1.6 What is memory spread ?
+     1.7 What is sched_load_balance ?
+     1.8 What is sched_relax_domain_level ?
+     1.9 How do I use cpusets ?
+   2. Usage Examples and Syntax
+     2.1 Basic Usage
+     2.2 Adding/removing cpus
+     2.3 Setting flags
+     2.4 Attaching processes
+   3. Questions
+   4. Contact
+
+1. Cpusets
+==========
+
+1.1 What are cpusets ?
+----------------------
+
+Cpusets provide a mechanism for assigning a set of CPUs and Memory
+Nodes to a set of tasks.   In this document "Memory Node" refers to
+an on-line node that contains memory.
+
+Cpusets constrain the CPU and Memory placement of tasks to only
+the resources within a task's current cpuset.  They form a nested
+hierarchy visible in a virtual file system.  These are the essential
+hooks, beyond what is already present, required to manage dynamic
+job placement on large systems.
+
+Cpusets use the generic cgroup subsystem described in
+Documentation/admin-guide/cgroup-v1/cgroups.rst.
+
+Requests by a task, using the sched_setaffinity(2) system call to
+include CPUs in its CPU affinity mask, and using the mbind(2) and
+set_mempolicy(2) system calls to include Memory Nodes in its memory
+policy, are both filtered through that task's cpuset, filtering out any
+CPUs or Memory Nodes not in that cpuset.  The scheduler will not
+schedule a task on a CPU that is not allowed in its cpus_allowed
+vector, and the kernel page allocator will not allocate a page on a
+node that is not allowed in the requesting task's mems_allowed vector.
+
+User level code may create and destroy cpusets by name in the cgroup
+virtual file system, manage the attributes and permissions of these
+cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
+specify and query to which cpuset a task is assigned, and list the
+task pids assigned to a cpuset.
+
+
+1.2 Why are cpusets needed ?
+----------------------------
+
+The management of large computer systems, with many processors (CPUs),
+complex memory cache hierarchies and multiple Memory Nodes having
+non-uniform access times (NUMA) presents additional challenges for
+the efficient scheduling and memory placement of processes.
+
+Frequently more modest sized systems can be operated with adequate
+efficiency just by letting the operating system automatically share
+the available CPU and Memory resources amongst the requesting tasks.
+
+But larger systems, which benefit more from careful processor and
+memory placement to reduce memory access times and contention,
+and which typically represent a larger investment for the customer,
+can benefit from explicitly placing jobs on properly sized subsets of
+the system.
+
+This can be especially valuable on:
+
+    * Web Servers running multiple instances of the same web application,
+    * Servers running different applications (for instance, a web server
+      and a database), or
+    * NUMA systems running large HPC applications with demanding
+      performance characteristics.
+
+These subsets, or "soft partitions" must be able to be dynamically
+adjusted, as the job mix changes, without impacting other concurrently
+executing jobs. The location of the running jobs pages may also be moved
+when the memory locations are changed.
+
+The kernel cpuset patch provides the minimum essential kernel
+mechanisms required to efficiently implement such subsets.  It
+leverages existing CPU and Memory Placement facilities in the Linux
+kernel to avoid any additional impact on the critical scheduler or
+memory allocator code.
+
+
+1.3 How are cpusets implemented ?
+---------------------------------
+
+Cpusets provide a Linux kernel mechanism to constrain which CPUs and
+Memory Nodes are used by a process or set of processes.
+
+The Linux kernel already has a pair of mechanisms to specify on which
+CPUs a task may be scheduled (sched_setaffinity) and on which Memory
+Nodes it may obtain memory (mbind, set_mempolicy).
+
+Cpusets extends these two mechanisms as follows:
+
+ - Cpusets are sets of allowed CPUs and Memory Nodes, known to the
+   kernel.
+ - Each task in the system is attached to a cpuset, via a pointer
+   in the task structure to a reference counted cgroup structure.
+ - Calls to sched_setaffinity are filtered to just those CPUs
+   allowed in that task's cpuset.
+ - Calls to mbind and set_mempolicy are filtered to just
+   those Memory Nodes allowed in that task's cpuset.
+ - The root cpuset contains all the systems CPUs and Memory
+   Nodes.
+ - For any cpuset, one can define child cpusets containing a subset
+   of the parents CPU and Memory Node resources.
+ - The hierarchy of cpusets can be mounted at /dev/cpuset, for
+   browsing and manipulation from user space.
+ - A cpuset may be marked exclusive, which ensures that no other
+   cpuset (except direct ancestors and descendants) may contain
+   any overlapping CPUs or Memory Nodes.
+ - You can list all the tasks (by pid) attached to any cpuset.
+
+The implementation of cpusets requires a few, simple hooks
+into the rest of the kernel, none in performance critical paths:
+
+ - in init/main.c, to initialize the root cpuset at system boot.
+ - in fork and exit, to attach and detach a task from its cpuset.
+ - in sched_setaffinity, to mask the requested CPUs by what's
+   allowed in that task's cpuset.
+ - in sched.c migrate_live_tasks(), to keep migrating tasks within
+   the CPUs allowed by their cpuset, if possible.
+ - in the mbind and set_mempolicy system calls, to mask the requested
+   Memory Nodes by what's allowed in that task's cpuset.
+ - in page_alloc.c, to restrict memory to allowed nodes.
+ - in vmscan.c, to restrict page recovery to the current cpuset.
+
+You should mount the "cgroup" filesystem type in order to enable
+browsing and modifying the cpusets presently known to the kernel.  No
+new system calls are added for cpusets - all support for querying and
+modifying cpusets is via this cpuset file system.
+
+The /proc/<pid>/status file for each task has four added lines,
+displaying the task's cpus_allowed (on which CPUs it may be scheduled)
+and mems_allowed (on which Memory Nodes it may obtain memory),
+in the two formats seen in the following example::
+
+  Cpus_allowed:   ffffffff,ffffffff,ffffffff,ffffffff
+  Cpus_allowed_list:      0-127
+  Mems_allowed:   ffffffff,ffffffff
+  Mems_allowed_list:      0-63
+
+Each cpuset is represented by a directory in the cgroup file system
+containing (on top of the standard cgroup files) the following
+files describing that cpuset:
+
+ - cpuset.cpus: list of CPUs in that cpuset
+ - cpuset.mems: list of Memory Nodes in that cpuset
+ - cpuset.memory_migrate flag: if set, move pages to cpusets nodes
+ - cpuset.cpu_exclusive flag: is cpu placement exclusive?
+ - cpuset.mem_exclusive flag: is memory placement exclusive?
+ - cpuset.mem_hardwall flag:  is memory allocation hardwalled
+ - cpuset.memory_pressure: measure of how much paging pressure in cpuset
+ - cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes
+ - cpuset.memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes
+ - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
+ - cpuset.sched_relax_domain_level: the searching range when migrating tasks
+
+In addition, only the root cpuset has the following file:
+
+ - cpuset.memory_pressure_enabled flag: compute memory_pressure?
+
+New cpusets are created using the mkdir system call or shell
+command.  The properties of a cpuset, such as its flags, allowed
+CPUs and Memory Nodes, and attached tasks, are modified by writing
+to the appropriate file in that cpusets directory, as listed above.
+
+The named hierarchical structure of nested cpusets allows partitioning
+a large system into nested, dynamically changeable, "soft-partitions".
+
+The attachment of each task, automatically inherited at fork by any
+children of that task, to a cpuset allows organizing the work load
+on a system into related sets of tasks such that each set is constrained
+to using the CPUs and Memory Nodes of a particular cpuset.  A task
+may be re-attached to any other cpuset, if allowed by the permissions
+on the necessary cpuset file system directories.
+
+Such management of a system "in the large" integrates smoothly with
+the detailed placement done on individual tasks and memory regions
+using the sched_setaffinity, mbind and set_mempolicy system calls.
+
+The following rules apply to each cpuset:
+
+ - Its CPUs and Memory Nodes must be a subset of its parents.
+ - It can't be marked exclusive unless its parent is.
+ - If its cpu or memory is exclusive, they may not overlap any sibling.
+
+These rules, and the natural hierarchy of cpusets, enable efficient
+enforcement of the exclusive guarantee, without having to scan all
+cpusets every time any of them change to ensure nothing overlaps a
+exclusive cpuset.  Also, the use of a Linux virtual file system (vfs)
+to represent the cpuset hierarchy provides for a familiar permission
+and name space for cpusets, with a minimum of additional kernel code.
+
+The cpus and mems files in the root (top_cpuset) cpuset are
+read-only.  The cpus file automatically tracks the value of
+cpu_online_mask using a CPU hotplug notifier, and the mems file
+automatically tracks the value of node_states[N_MEMORY]--i.e.,
+nodes with memory--using the cpuset_track_online_nodes() hook.
+
+
+1.4 What are exclusive cpusets ?
+--------------------------------
+
+If a cpuset is cpu or mem exclusive, no other cpuset, other than
+a direct ancestor or descendant, may share any of the same CPUs or
+Memory Nodes.
+
+A cpuset that is cpuset.mem_exclusive *or* cpuset.mem_hardwall is "hardwalled",
+i.e. it restricts kernel allocations for page, buffer and other data
+commonly shared by the kernel across multiple users.  All cpusets,
+whether hardwalled or not, restrict allocations of memory for user
+space.  This enables configuring a system so that several independent
+jobs can share common kernel data, such as file system pages, while
+isolating each job's user allocation in its own cpuset.  To do this,
+construct a large mem_exclusive cpuset to hold all the jobs, and
+construct child, non-mem_exclusive cpusets for each individual job.
+Only a small amount of typical kernel memory, such as requests from
+interrupt handlers, is allowed to be taken outside even a
+mem_exclusive cpuset.
+
+
+1.5 What is memory_pressure ?
+-----------------------------
+The memory_pressure of a cpuset provides a simple per-cpuset metric
+of the rate that the tasks in a cpuset are attempting to free up in
+use memory on the nodes of the cpuset to satisfy additional memory
+requests.
+
+This enables batch managers monitoring jobs running in dedicated
+cpusets to efficiently detect what level of memory pressure that job
+is causing.
+
+This is useful both on tightly managed systems running a wide mix of
+submitted jobs, which may choose to terminate or re-prioritize jobs that
+are trying to use more memory than allowed on the nodes assigned to them,
+and with tightly coupled, long running, massively parallel scientific
+computing jobs that will dramatically fail to meet required performance
+goals if they start to use more memory than allowed to them.
+
+This mechanism provides a very economical way for the batch manager
+to monitor a cpuset for signs of memory pressure.  It's up to the
+batch manager or other user code to decide what to do about it and
+take action.
+
+==>
+    Unless this feature is enabled by writing "1" to the special file
+    /dev/cpuset/memory_pressure_enabled, the hook in the rebalance
+    code of __alloc_pages() for this metric reduces to simply noticing
+    that the cpuset_memory_pressure_enabled flag is zero.  So only
+    systems that enable this feature will compute the metric.
+
+Why a per-cpuset, running average:
+
+    Because this meter is per-cpuset, rather than per-task or mm,
+    the system load imposed by a batch scheduler monitoring this
+    metric is sharply reduced on large systems, because a scan of
+    the tasklist can be avoided on each set of queries.
+
+    Because this meter is a running average, instead of an accumulating
+    counter, a batch scheduler can detect memory pressure with a
+    single read, instead of having to read and accumulate results
+    for a period of time.
+
+    Because this meter is per-cpuset rather than per-task or mm,
+    the batch scheduler can obtain the key information, memory
+    pressure in a cpuset, with a single read, rather than having to
+    query and accumulate results over all the (dynamically changing)
+    set of tasks in the cpuset.
+
+A per-cpuset simple digital filter (requires a spinlock and 3 words
+of data per-cpuset) is kept, and updated by any task attached to that
+cpuset, if it enters the synchronous (direct) page reclaim code.
+
+A per-cpuset file provides an integer number representing the recent
+(half-life of 10 seconds) rate of direct page reclaims caused by
+the tasks in the cpuset, in units of reclaims attempted per second,
+times 1000.
+
+
+1.6 What is memory spread ?
+---------------------------
+There are two boolean flag files per cpuset that control where the
+kernel allocates pages for the file system buffers and related in
+kernel data structures.  They are called 'cpuset.memory_spread_page' and
+'cpuset.memory_spread_slab'.
+
+If the per-cpuset boolean flag file 'cpuset.memory_spread_page' is set, then
+the kernel will spread the file system buffers (page cache) evenly
+over all the nodes that the faulting task is allowed to use, instead
+of preferring to put those pages on the node where the task is running.
+
+If the per-cpuset boolean flag file 'cpuset.memory_spread_slab' is set,
+then the kernel will spread some file system related slab caches,
+such as for inodes and dentries evenly over all the nodes that the
+faulting task is allowed to use, instead of preferring to put those
+pages on the node where the task is running.
+
+The setting of these flags does not affect anonymous data segment or
+stack segment pages of a task.
+
+By default, both kinds of memory spreading are off, and memory
+pages are allocated on the node local to where the task is running,
+except perhaps as modified by the task's NUMA mempolicy or cpuset
+configuration, so long as sufficient free memory pages are available.
+
+When new cpusets are created, they inherit the memory spread settings
+of their parent.
+
+Setting memory spreading causes allocations for the affected page
+or slab caches to ignore the task's NUMA mempolicy and be spread
+instead.    Tasks using mbind() or set_mempolicy() calls to set NUMA
+mempolicies will not notice any change in these calls as a result of
+their containing task's memory spread settings.  If memory spreading
+is turned off, then the currently specified NUMA mempolicy once again
+applies to memory page allocations.
+
+Both 'cpuset.memory_spread_page' and 'cpuset.memory_spread_slab' are boolean flag
+files.  By default they contain "0", meaning that the feature is off
+for that cpuset.  If a "1" is written to that file, then that turns
+the named feature on.
+
+The implementation is simple.
+
+Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag
+PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently
+joins that cpuset.  The page allocation calls for the page cache
+is modified to perform an inline check for this PFA_SPREAD_PAGE task
+flag, and if set, a call to a new routine cpuset_mem_spread_node()
+returns the node to prefer for the allocation.
+
+Similarly, setting 'cpuset.memory_spread_slab' turns on the flag
+PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate
+pages from the node returned by cpuset_mem_spread_node().
+
+The cpuset_mem_spread_node() routine is also simple.  It uses the
+value of a per-task rotor cpuset_mem_spread_rotor to select the next
+node in the current task's mems_allowed to prefer for the allocation.
+
+This memory placement policy is also known (in other contexts) as
+round-robin or interleave.
+
+This policy can provide substantial improvements for jobs that need
+to place thread local data on the corresponding node, but that need
+to access large file system data sets that need to be spread across
+the several nodes in the jobs cpuset in order to fit.  Without this
+policy, especially for jobs that might have one thread reading in the
+data set, the memory allocation across the nodes in the jobs cpuset
+can become very uneven.
+
+1.7 What is sched_load_balance ?
+--------------------------------
+
+The kernel scheduler (kernel/sched/core.c) automatically load balances
+tasks.  If one CPU is underutilized, kernel code running on that
+CPU will look for tasks on other more overloaded CPUs and move those
+tasks to itself, within the constraints of such placement mechanisms
+as cpusets and sched_setaffinity.
+
+The algorithmic cost of load balancing and its impact on key shared
+kernel data structures such as the task list increases more than
+linearly with the number of CPUs being balanced.  So the scheduler
+has support to partition the systems CPUs into a number of sched
+domains such that it only load balances within each sched domain.
+Each sched domain covers some subset of the CPUs in the system;
+no two sched domains overlap; some CPUs might not be in any sched
+domain and hence won't be load balanced.
+
+Put simply, it costs less to balance between two smaller sched domains
+than one big one, but doing so means that overloads in one of the
+two domains won't be load balanced to the other one.
+
+By default, there is one sched domain covering all CPUs, including those
+marked isolated using the kernel boot time "isolcpus=" argument. However,
+the isolated CPUs will not participate in load balancing, and will not
+have tasks running on them unless explicitly assigned.
+
+This default load balancing across all CPUs is not well suited for
+the following two situations:
+
+ 1) On large systems, load balancing across many CPUs is expensive.
+    If the system is managed using cpusets to place independent jobs
+    on separate sets of CPUs, full load balancing is unnecessary.
+ 2) Systems supporting realtime on some CPUs need to minimize
+    system overhead on those CPUs, including avoiding task load
+    balancing if that is not needed.
+
+When the per-cpuset flag "cpuset.sched_load_balance" is enabled (the default
+setting), it requests that all the CPUs in that cpusets allowed 'cpuset.cpus'
+be contained in a single sched domain, ensuring that load balancing
+can move a task (not otherwised pinned, as by sched_setaffinity)
+from any CPU in that cpuset to any other.
+
+When the per-cpuset flag "cpuset.sched_load_balance" is disabled, then the
+scheduler will avoid load balancing across the CPUs in that cpuset,
+--except-- in so far as is necessary because some overlapping cpuset
+has "sched_load_balance" enabled.
+
+So, for example, if the top cpuset has the flag "cpuset.sched_load_balance"
+enabled, then the scheduler will have one sched domain covering all
+CPUs, and the setting of the "cpuset.sched_load_balance" flag in any other
+cpusets won't matter, as we're already fully load balancing.
+
+Therefore in the above two situations, the top cpuset flag
+"cpuset.sched_load_balance" should be disabled, and only some of the smaller,
+child cpusets have this flag enabled.
+
+When doing this, you don't usually want to leave any unpinned tasks in
+the top cpuset that might use non-trivial amounts of CPU, as such tasks
+may be artificially constrained to some subset of CPUs, depending on
+the particulars of this flag setting in descendant cpusets.  Even if
+such a task could use spare CPU cycles in some other CPUs, the kernel
+scheduler might not consider the possibility of load balancing that
+task to that underused CPU.
+
+Of course, tasks pinned to a particular CPU can be left in a cpuset
+that disables "cpuset.sched_load_balance" as those tasks aren't going anywhere
+else anyway.
+
+There is an impedance mismatch here, between cpusets and sched domains.
+Cpusets are hierarchical and nest.  Sched domains are flat; they don't
+overlap and each CPU is in at most one sched domain.
+
+It is necessary for sched domains to be flat because load balancing
+across partially overlapping sets of CPUs would risk unstable dynamics
+that would be beyond our understanding.  So if each of two partially
+overlapping cpusets enables the flag 'cpuset.sched_load_balance', then we
+form a single sched domain that is a superset of both.  We won't move
+a task to a CPU outside its cpuset, but the scheduler load balancing
+code might waste some compute cycles considering that possibility.
+
+This mismatch is why there is not a simple one-to-one relation
+between which cpusets have the flag "cpuset.sched_load_balance" enabled,
+and the sched domain configuration.  If a cpuset enables the flag, it
+will get balancing across all its CPUs, but if it disables the flag,
+it will only be assured of no load balancing if no other overlapping
+cpuset enables the flag.
+
+If two cpusets have partially overlapping 'cpuset.cpus' allowed, and only
+one of them has this flag enabled, then the other may find its
+tasks only partially load balanced, just on the overlapping CPUs.
+This is just the general case of the top_cpuset example given a few
+paragraphs above.  In the general case, as in the top cpuset case,
+don't leave tasks that might use non-trivial amounts of CPU in
+such partially load balanced cpusets, as they may be artificially
+constrained to some subset of the CPUs allowed to them, for lack of
+load balancing to the other CPUs.
+
+CPUs in "cpuset.isolcpus" were excluded from load balancing by the
+isolcpus= kernel boot option, and will never be load balanced regardless
+of the value of "cpuset.sched_load_balance" in any cpuset.
+
+1.7.1 sched_load_balance implementation details.
+------------------------------------------------
+
+The per-cpuset flag 'cpuset.sched_load_balance' defaults to enabled (contrary
+to most cpuset flags.)  When enabled for a cpuset, the kernel will
+ensure that it can load balance across all the CPUs in that cpuset
+(makes sure that all the CPUs in the cpus_allowed of that cpuset are
+in the same sched domain.)
+
+If two overlapping cpusets both have 'cpuset.sched_load_balance' enabled,
+then they will be (must be) both in the same sched domain.
+
+If, as is the default, the top cpuset has 'cpuset.sched_load_balance' enabled,
+then by the above that means there is a single sched domain covering
+the whole system, regardless of any other cpuset settings.
+
+The kernel commits to user space that it will avoid load balancing
+where it can.  It will pick as fine a granularity partition of sched
+domains as it can while still providing load balancing for any set
+of CPUs allowed to a cpuset having 'cpuset.sched_load_balance' enabled.
+
+The internal kernel cpuset to scheduler interface passes from the
+cpuset code to the scheduler code a partition of the load balanced
+CPUs in the system. This partition is a set of subsets (represented
+as an array of struct cpumask) of CPUs, pairwise disjoint, that cover
+all the CPUs that must be load balanced.
+
+The cpuset code builds a new such partition and passes it to the
+scheduler sched domain setup code, to have the sched domains rebuilt
+as necessary, whenever:
+
+ - the 'cpuset.sched_load_balance' flag of a cpuset with non-empty CPUs changes,
+ - or CPUs come or go from a cpuset with this flag enabled,
+ - or 'cpuset.sched_relax_domain_level' value of a cpuset with non-empty CPUs
+   and with this flag enabled changes,
+ - or a cpuset with non-empty CPUs and with this flag enabled is removed,
+ - or a cpu is offlined/onlined.
+
+This partition exactly defines what sched domains the scheduler should
+setup - one sched domain for each element (struct cpumask) in the
+partition.
+
+The scheduler remembers the currently active sched domain partitions.
+When the scheduler routine partition_sched_domains() is invoked from
+the cpuset code to update these sched domains, it compares the new
+partition requested with the current, and updates its sched domains,
+removing the old and adding the new, for each change.
+
+
+1.8 What is sched_relax_domain_level ?
+--------------------------------------
+
+In sched domain, the scheduler migrates tasks in 2 ways; periodic load
+balance on tick, and at time of some schedule events.
+
+When a task is woken up, scheduler try to move the task on idle CPU.
+For example, if a task A running on CPU X activates another task B
+on the same CPU X, and if CPU Y is X's sibling and performing idle,
+then scheduler migrate task B to CPU Y so that task B can start on
+CPU Y without waiting task A on CPU X.
+
+And if a CPU run out of tasks in its runqueue, the CPU try to pull
+extra tasks from other busy CPUs to help them before it is going to
+be idle.
+
+Of course it takes some searching cost to find movable tasks and/or
+idle CPUs, the scheduler might not search all CPUs in the domain
+every time.  In fact, in some architectures, the searching ranges on
+events are limited in the same socket or node where the CPU locates,
+while the load balance on tick searches all.
+
+For example, assume CPU Z is relatively far from CPU X.  Even if CPU Z
+is idle while CPU X and the siblings are busy, scheduler can't migrate
+woken task B from X to Z since it is out of its searching range.
+As the result, task B on CPU X need to wait task A or wait load balance
+on the next tick.  For some applications in special situation, waiting
+1 tick may be too long.
+
+The 'cpuset.sched_relax_domain_level' file allows you to request changing
+this searching range as you like.  This file takes int value which
+indicates size of searching range in levels ideally as follows,
+otherwise initial value -1 that indicates the cpuset has no request.
+
+====== ===========================================================
+  -1   no request. use system default or follow request of others.
+   0   no search.
+   1   search siblings (hyperthreads in a core).
+   2   search cores in a package.
+   3   search cpus in a node [= system wide on non-NUMA system]
+   4   search nodes in a chunk of node [on NUMA system]
+   5   search system wide [on NUMA system]
+====== ===========================================================
+
+The system default is architecture dependent.  The system default
+can be changed using the relax_domain_level= boot parameter.
+
+This file is per-cpuset and affect the sched domain where the cpuset
+belongs to.  Therefore if the flag 'cpuset.sched_load_balance' of a cpuset
+is disabled, then 'cpuset.sched_relax_domain_level' have no effect since
+there is no sched domain belonging the cpuset.
+
+If multiple cpusets are overlapping and hence they form a single sched
+domain, the largest value among those is used.  Be careful, if one
+requests 0 and others are -1 then 0 is used.
+
+Note that modifying this file will have both good and bad effects,
+and whether it is acceptable or not depends on your situation.
+Don't modify this file if you are not sure.
+
+If your situation is:
+
+ - The migration costs between each cpu can be assumed considerably
+   small(for you) due to your special application's behavior or
+   special hardware support for CPU cache etc.
+ - The searching cost doesn't have impact(for you) or you can make
+   the searching cost enough small by managing cpuset to compact etc.
+ - The latency is required even it sacrifices cache hit rate etc.
+   then increasing 'sched_relax_domain_level' would benefit you.
+
+
+1.9 How do I use cpusets ?
+--------------------------
+
+In order to minimize the impact of cpusets on critical kernel
+code, such as the scheduler, and due to the fact that the kernel
+does not support one task updating the memory placement of another
+task directly, the impact on a task of changing its cpuset CPU
+or Memory Node placement, or of changing to which cpuset a task
+is attached, is subtle.
+
+If a cpuset has its Memory Nodes modified, then for each task attached
+to that cpuset, the next time that the kernel attempts to allocate
+a page of memory for that task, the kernel will notice the change
+in the task's cpuset, and update its per-task memory placement to
+remain within the new cpusets memory placement.  If the task was using
+mempolicy MPOL_BIND, and the nodes to which it was bound overlap with
+its new cpuset, then the task will continue to use whatever subset
+of MPOL_BIND nodes are still allowed in the new cpuset.  If the task
+was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed
+in the new cpuset, then the task will be essentially treated as if it
+was MPOL_BIND bound to the new cpuset (even though its NUMA placement,
+as queried by get_mempolicy(), doesn't change).  If a task is moved
+from one cpuset to another, then the kernel will adjust the task's
+memory placement, as above, the next time that the kernel attempts
+to allocate a page of memory for that task.
+
+If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset
+will have its allowed CPU placement changed immediately.  Similarly,
+if a task's pid is written to another cpuset's 'tasks' file, then its
+allowed CPU placement is changed immediately.  If such a task had been
+bound to some subset of its cpuset using the sched_setaffinity() call,
+the task will be allowed to run on any CPU allowed in its new cpuset,
+negating the effect of the prior sched_setaffinity() call.
+
+In summary, the memory placement of a task whose cpuset is changed is
+updated by the kernel, on the next allocation of a page for that task,
+and the processor placement is updated immediately.
+
+Normally, once a page is allocated (given a physical page
+of main memory) then that page stays on whatever node it
+was allocated, so long as it remains allocated, even if the
+cpusets memory placement policy 'cpuset.mems' subsequently changes.
+If the cpuset flag file 'cpuset.memory_migrate' is set true, then when
+tasks are attached to that cpuset, any pages that task had
+allocated to it on nodes in its previous cpuset are migrated
+to the task's new cpuset. The relative placement of the page within
+the cpuset is preserved during these migration operations if possible.
+For example if the page was on the second valid node of the prior cpuset
+then the page will be placed on the second valid node of the new cpuset.
+
+Also if 'cpuset.memory_migrate' is set true, then if that cpuset's
+'cpuset.mems' file is modified, pages allocated to tasks in that
+cpuset, that were on nodes in the previous setting of 'cpuset.mems',
+will be moved to nodes in the new setting of 'mems.'
+Pages that were not in the task's prior cpuset, or in the cpuset's
+prior 'cpuset.mems' setting, will not be moved.
+
+There is an exception to the above.  If hotplug functionality is used
+to remove all the CPUs that are currently assigned to a cpuset,
+then all the tasks in that cpuset will be moved to the nearest ancestor
+with non-empty cpus.  But the moving of some (or all) tasks might fail if
+cpuset is bound with another cgroup subsystem which has some restrictions
+on task attaching.  In this failing case, those tasks will stay
+in the original cpuset, and the kernel will automatically update
+their cpus_allowed to allow all online CPUs.  When memory hotplug
+functionality for removing Memory Nodes is available, a similar exception
+is expected to apply there as well.  In general, the kernel prefers to
+violate cpuset placement, over starving a task that has had all
+its allowed CPUs or Memory Nodes taken offline.
+
+There is a second exception to the above.  GFP_ATOMIC requests are
+kernel internal allocations that must be satisfied, immediately.
+The kernel may drop some request, in rare cases even panic, if a
+GFP_ATOMIC alloc fails.  If the request cannot be satisfied within
+the current task's cpuset, then we relax the cpuset, and look for
+memory anywhere we can find it.  It's better to violate the cpuset
+than stress the kernel.
+
+To start a new job that is to be contained within a cpuset, the steps are:
+
+ 1) mkdir /sys/fs/cgroup/cpuset
+ 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ 3) Create the new cpuset by doing mkdir's and write's (or echo's) in
+    the /sys/fs/cgroup/cpuset virtual file system.
+ 4) Start a task that will be the "founding father" of the new job.
+ 5) Attach that task to the new cpuset by writing its pid to the
+    /sys/fs/cgroup/cpuset tasks file for that cpuset.
+ 6) fork, exec or clone the job tasks from this founding father task.
+
+For example, the following sequence of commands will setup a cpuset
+named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
+and then start a subshell 'sh' in that cpuset::
+
+  mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+  cd /sys/fs/cgroup/cpuset
+  mkdir Charlie
+  cd Charlie
+  /bin/echo 2-3 > cpuset.cpus
+  /bin/echo 1 > cpuset.mems
+  /bin/echo $$ > tasks
+  sh
+  # The subshell 'sh' is now running in cpuset Charlie
+  # The next line should display '/Charlie'
+  cat /proc/self/cpuset
+
+There are ways to query or modify cpusets:
+
+ - via the cpuset file system directly, using the various cd, mkdir, echo,
+   cat, rmdir commands from the shell, or their equivalent from C.
+ - via the C library libcpuset.
+ - via the C library libcgroup.
+   (http://sourceforge.net/projects/libcg/)
+ - via the python application cset.
+   (http://code.google.com/p/cpuset/)
+
+The sched_setaffinity calls can also be done at the shell prompt using
+SGI's runon or Robert Love's taskset.  The mbind and set_mempolicy
+calls can be done at the shell prompt using the numactl command
+(part of Andi Kleen's numa package).
+
+2. Usage Examples and Syntax
+============================
+
+2.1 Basic Usage
+---------------
+
+Creating, modifying, using the cpusets can be done through the cpuset
+virtual filesystem.
+
+To mount it, type:
+# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset
+
+Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the
+tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset
+is the cpuset that holds the whole system.
+
+If you want to create a new cpuset under /sys/fs/cgroup/cpuset::
+
+  # cd /sys/fs/cgroup/cpuset
+  # mkdir my_cpuset
+
+Now you want to do something with this cpuset::
+
+  # cd my_cpuset
+
+In this directory you can find several files::
+
+  # ls
+  cgroup.clone_children  cpuset.memory_pressure
+  cgroup.event_control   cpuset.memory_spread_page
+  cgroup.procs           cpuset.memory_spread_slab
+  cpuset.cpu_exclusive   cpuset.mems
+  cpuset.cpus            cpuset.sched_load_balance
+  cpuset.mem_exclusive   cpuset.sched_relax_domain_level
+  cpuset.mem_hardwall    notify_on_release
+  cpuset.memory_migrate  tasks
+
+Reading them will give you information about the state of this cpuset:
+the CPUs and Memory Nodes it can use, the processes that are using
+it, its properties.  By writing to these files you can manipulate
+the cpuset.
+
+Set some flags::
+
+  # /bin/echo 1 > cpuset.cpu_exclusive
+
+Add some cpus::
+
+  # /bin/echo 0-7 > cpuset.cpus
+
+Add some mems::
+
+  # /bin/echo 0-7 > cpuset.mems
+
+Now attach your shell to this cpuset::
+
+  # /bin/echo $$ > tasks
+
+You can also create cpusets inside your cpuset by using mkdir in this
+directory::
+
+  # mkdir my_sub_cs
+
+To remove a cpuset, just use rmdir::
+
+  # rmdir my_sub_cs
+
+This will fail if the cpuset is in use (has cpusets inside, or has
+processes attached).
+
+Note that for legacy reasons, the "cpuset" filesystem exists as a
+wrapper around the cgroup filesystem.
+
+The command::
+
+  mount -t cpuset X /sys/fs/cgroup/cpuset
+
+is equivalent to::
+
+  mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset
+  echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent
+
+2.2 Adding/removing cpus
+------------------------
+
+This is the syntax to use when writing in the cpus or mems files
+in cpuset directories::
+
+  # /bin/echo 1-4 > cpuset.cpus		-> set cpus list to cpus 1,2,3,4
+  # /bin/echo 1,2,3,4 > cpuset.cpus	-> set cpus list to cpus 1,2,3,4
+
+To add a CPU to a cpuset, write the new list of CPUs including the
+CPU to be added. To add 6 to the above cpuset::
+
+  # /bin/echo 1-4,6 > cpuset.cpus	-> set cpus list to cpus 1,2,3,4,6
+
+Similarly to remove a CPU from a cpuset, write the new list of CPUs
+without the CPU to be removed.
+
+To remove all the CPUs::
+
+  # /bin/echo "" > cpuset.cpus		-> clear cpus list
+
+2.3 Setting flags
+-----------------
+
+The syntax is very simple::
+
+  # /bin/echo 1 > cpuset.cpu_exclusive 	-> set flag 'cpuset.cpu_exclusive'
+  # /bin/echo 0 > cpuset.cpu_exclusive 	-> unset flag 'cpuset.cpu_exclusive'
+
+2.4 Attaching processes
+-----------------------
+
+::
+
+  # /bin/echo PID > tasks
+
+Note that it is PID, not PIDs. You can only attach ONE task at a time.
+If you have several tasks to attach, you have to do it one after another::
+
+  # /bin/echo PID1 > tasks
+  # /bin/echo PID2 > tasks
+	...
+  # /bin/echo PIDn > tasks
+
+
+3. Questions
+============
+
+Q:
+   what's up with this '/bin/echo' ?
+
+A:
+   bash's builtin 'echo' command does not check calls to write() against
+   errors. If you use it in the cpuset file system, you won't be
+   able to tell whether a command succeeded or failed.
+
+Q:
+   When I attach processes, only the first of the line gets really attached !
+
+A:
+   We can only return one error code per call to write(). So you should also
+   put only ONE pid.
+
+4. Contact
+==========
+
+Web: http://www.bullopensource.org/cpuset
diff --git a/Documentation/admin-guide/cgroup-v1/devices.rst b/Documentation/admin-guide/cgroup-v1/devices.rst
new file mode 100644
index 000000000000..e1886783961e
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/devices.rst
@@ -0,0 +1,132 @@
+===========================
+Device Whitelist Controller
+===========================
+
+1. Description
+==============
+
+Implement a cgroup to track and enforce open and mknod restrictions
+on device files.  A device cgroup associates a device access
+whitelist with each cgroup.  A whitelist entry has 4 fields.
+'type' is a (all), c (char), or b (block).  'all' means it applies
+to all types and all major and minor numbers.  Major and minor are
+either an integer or * for all.  Access is a composition of r
+(read), w (write), and m (mknod).
+
+The root device cgroup starts with rwm to 'all'.  A child device
+cgroup gets a copy of the parent.  Administrators can then remove
+devices from the whitelist or add new entries.  A child cgroup can
+never receive a device access which is denied by its parent.
+
+2. User Interface
+=================
+
+An entry is added using devices.allow, and removed using
+devices.deny.  For instance::
+
+	echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow
+
+allows cgroup 1 to read and mknod the device usually known as
+/dev/null.  Doing::
+
+	echo a > /sys/fs/cgroup/1/devices.deny
+
+will remove the default 'a *:* rwm' entry. Doing::
+
+	echo a > /sys/fs/cgroup/1/devices.allow
+
+will add the 'a *:* rwm' entry to the whitelist.
+
+3. Security
+===========
+
+Any task can move itself between cgroups.  This clearly won't
+suffice, but we can decide the best way to adequately restrict
+movement as people get some experience with this.  We may just want
+to require CAP_SYS_ADMIN, which at least is a separate bit from
+CAP_MKNOD.  We may want to just refuse moving to a cgroup which
+isn't a descendant of the current one.  Or we may want to use
+CAP_MAC_ADMIN, since we really are trying to lock down root.
+
+CAP_SYS_ADMIN is needed to modify the whitelist or move another
+task to a new cgroup.  (Again we'll probably want to change that).
+
+A cgroup may not be granted more permissions than the cgroup's
+parent has.
+
+4. Hierarchy
+============
+
+device cgroups maintain hierarchy by making sure a cgroup never has more
+access permissions than its parent.  Every time an entry is written to
+a cgroup's devices.deny file, all its children will have that entry removed
+from their whitelist and all the locally set whitelist entries will be
+re-evaluated.  In case one of the locally set whitelist entries would provide
+more access than the cgroup's parent, it'll be removed from the whitelist.
+
+Example::
+
+      A
+     / \
+        B
+
+    group        behavior	exceptions
+    A            allow		"b 8:* rwm", "c 116:1 rw"
+    B            deny		"c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"
+
+If a device is denied in group A::
+
+	# echo "c 116:* r" > A/devices.deny
+
+it'll propagate down and after revalidating B's entries, the whitelist entry
+"c 116:2 rwm" will be removed::
+
+    group        whitelist entries                        denied devices
+    A            all                                      "b 8:* rwm", "c 116:* rw"
+    B            "c 1:3 rwm", "b 3:* rwm"                 all the rest
+
+In case parent's exceptions change and local exceptions are not allowed
+anymore, they'll be deleted.
+
+Notice that new whitelist entries will not be propagated::
+
+      A
+     / \
+        B
+
+    group        whitelist entries                        denied devices
+    A            "c 1:3 rwm", "c 1:5 r"                   all the rest
+    B            "c 1:3 rwm", "c 1:5 r"                   all the rest
+
+when adding ``c *:3 rwm``::
+
+	# echo "c *:3 rwm" >A/devices.allow
+
+the result::
+
+    group        whitelist entries                        denied devices
+    A            "c *:3 rwm", "c 1:5 r"                   all the rest
+    B            "c 1:3 rwm", "c 1:5 r"                   all the rest
+
+but now it'll be possible to add new entries to B::
+
+	# echo "c 2:3 rwm" >B/devices.allow
+	# echo "c 50:3 r" >B/devices.allow
+
+or even::
+
+	# echo "c *:3 rwm" >B/devices.allow
+
+Allowing or denying all by writing 'a' to devices.allow or devices.deny will
+not be possible once the device cgroups has children.
+
+4.1 Hierarchy (internal implementation)
+---------------------------------------
+
+device cgroups is implemented internally using a behavior (ALLOW, DENY) and a
+list of exceptions.  The internal state is controlled using the same user
+interface to preserve compatibility with the previous whitelist-only
+implementation.  Removal or addition of exceptions that will reduce the access
+to devices will be propagated down the hierarchy.
+For every propagated exception, the effective rules will be re-evaluated based
+on current parent's access rules.
diff --git a/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst b/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst
new file mode 100644
index 000000000000..582d3427de3f
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst
@@ -0,0 +1,127 @@
+==============
+Cgroup Freezer
+==============
+
+The cgroup freezer is useful to batch job management system which start
+and stop sets of tasks in order to schedule the resources of a machine
+according to the desires of a system administrator. This sort of program
+is often used on HPC clusters to schedule access to the cluster as a
+whole. The cgroup freezer uses cgroups to describe the set of tasks to
+be started/stopped by the batch job management system. It also provides
+a means to start and stop the tasks composing the job.
+
+The cgroup freezer will also be useful for checkpointing running groups
+of tasks. The freezer allows the checkpoint code to obtain a consistent
+image of the tasks by attempting to force the tasks in a cgroup into a
+quiescent state. Once the tasks are quiescent another task can
+walk /proc or invoke a kernel interface to gather information about the
+quiesced tasks. Checkpointed tasks can be restarted later should a
+recoverable error occur. This also allows the checkpointed tasks to be
+migrated between nodes in a cluster by copying the gathered information
+to another node and restarting the tasks there.
+
+Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
+and resuming tasks in userspace. Both of these signals are observable
+from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
+blocked, or ignored it can be seen by waiting or ptracing parent tasks.
+SIGCONT is especially unsuitable since it can be caught by the task. Any
+programs designed to watch for SIGSTOP and SIGCONT could be broken by
+attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
+demonstrate this problem using nested bash shells::
+
+	$ echo $$
+	16644
+	$ bash
+	$ echo $$
+	16690
+
+	From a second, unrelated bash shell:
+	$ kill -SIGSTOP 16690
+	$ kill -SIGCONT 16690
+
+	<at this point 16690 exits and causes 16644 to exit too>
+
+This happens because bash can observe both signals and choose how it
+responds to them.
+
+Another example of a program which catches and responds to these
+signals is gdb. In fact any program designed to use ptrace is likely to
+have a problem with this method of stopping and resuming tasks.
+
+In contrast, the cgroup freezer uses the kernel freezer code to
+prevent the freeze/unfreeze cycle from becoming visible to the tasks
+being frozen. This allows the bash example above and gdb to run as
+expected.
+
+The cgroup freezer is hierarchical. Freezing a cgroup freezes all
+tasks belonging to the cgroup and all its descendant cgroups. Each
+cgroup has its own state (self-state) and the state inherited from the
+parent (parent-state). Iff both states are THAWED, the cgroup is
+THAWED.
+
+The following cgroupfs files are created by cgroup freezer.
+
+* freezer.state: Read-write.
+
+  When read, returns the effective state of the cgroup - "THAWED",
+  "FREEZING" or "FROZEN". This is the combined self and parent-states.
+  If any is freezing, the cgroup is freezing (FREEZING or FROZEN).
+
+  FREEZING cgroup transitions into FROZEN state when all tasks
+  belonging to the cgroup and its descendants become frozen. Note that
+  a cgroup reverts to FREEZING from FROZEN after a new task is added
+  to the cgroup or one of its descendant cgroups until the new task is
+  frozen.
+
+  When written, sets the self-state of the cgroup. Two values are
+  allowed - "FROZEN" and "THAWED". If FROZEN is written, the cgroup,
+  if not already freezing, enters FREEZING state along with all its
+  descendant cgroups.
+
+  If THAWED is written, the self-state of the cgroup is changed to
+  THAWED.  Note that the effective state may not change to THAWED if
+  the parent-state is still freezing. If a cgroup's effective state
+  becomes THAWED, all its descendants which are freezing because of
+  the cgroup also leave the freezing state.
+
+* freezer.self_freezing: Read only.
+
+  Shows the self-state. 0 if the self-state is THAWED; otherwise, 1.
+  This value is 1 iff the last write to freezer.state was "FROZEN".
+
+* freezer.parent_freezing: Read only.
+
+  Shows the parent-state.  0 if none of the cgroup's ancestors is
+  frozen; otherwise, 1.
+
+The root cgroup is non-freezable and the above interface files don't
+exist.
+
+* Examples of usage::
+
+   # mkdir /sys/fs/cgroup/freezer
+   # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer
+   # mkdir /sys/fs/cgroup/freezer/0
+   # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks
+
+to get status of the freezer subsystem::
+
+   # cat /sys/fs/cgroup/freezer/0/freezer.state
+   THAWED
+
+to freeze all tasks in the container::
+
+   # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state
+   # cat /sys/fs/cgroup/freezer/0/freezer.state
+   FREEZING
+   # cat /sys/fs/cgroup/freezer/0/freezer.state
+   FROZEN
+
+to unfreeze all tasks in the container::
+
+   # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state
+   # cat /sys/fs/cgroup/freezer/0/freezer.state
+   THAWED
+
+This is the basic mechanism which should do the right thing for user space task
+in a simple scenario.
diff --git a/Documentation/admin-guide/cgroup-v1/hugetlb.rst b/Documentation/admin-guide/cgroup-v1/hugetlb.rst
new file mode 100644
index 000000000000..a3902aa253a9
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/hugetlb.rst
@@ -0,0 +1,50 @@
+==================
+HugeTLB Controller
+==================
+
+The HugeTLB controller allows to limit the HugeTLB usage per control group and
+enforces the controller limit during page fault. Since HugeTLB doesn't
+support page reclaim, enforcing the limit at page fault time implies that,
+the application will get SIGBUS signal if it tries to access HugeTLB pages
+beyond its limit. This requires the application to know beforehand how much
+HugeTLB pages it would require for its use.
+
+HugeTLB controller can be created by first mounting the cgroup filesystem.
+
+# mount -t cgroup -o hugetlb none /sys/fs/cgroup
+
+With the above step, the initial or the parent HugeTLB group becomes
+visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
+the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
+
+New groups can be created under the parent group /sys/fs/cgroup::
+
+  # cd /sys/fs/cgroup
+  # mkdir g1
+  # echo $$ > g1/tasks
+
+The above steps create a new group g1 and move the current shell
+process (bash) into it.
+
+Brief summary of control files::
+
+ hugetlb.<hugepagesize>.limit_in_bytes     # set/show limit of "hugepagesize" hugetlb usage
+ hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb  usage recorded
+ hugetlb.<hugepagesize>.usage_in_bytes     # show current usage for "hugepagesize" hugetlb
+ hugetlb.<hugepagesize>.failcnt		   # show the number of allocation failure due to HugeTLB limit
+
+For a system supporting three hugepage sizes (64k, 32M and 1G), the control
+files include::
+
+  hugetlb.1GB.limit_in_bytes
+  hugetlb.1GB.max_usage_in_bytes
+  hugetlb.1GB.usage_in_bytes
+  hugetlb.1GB.failcnt
+  hugetlb.64KB.limit_in_bytes
+  hugetlb.64KB.max_usage_in_bytes
+  hugetlb.64KB.usage_in_bytes
+  hugetlb.64KB.failcnt
+  hugetlb.32MB.limit_in_bytes
+  hugetlb.32MB.max_usage_in_bytes
+  hugetlb.32MB.usage_in_bytes
+  hugetlb.32MB.failcnt
diff --git a/Documentation/admin-guide/cgroup-v1/index.rst b/Documentation/admin-guide/cgroup-v1/index.rst
new file mode 100644
index 000000000000..10bf48bae0b0
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/index.rst
@@ -0,0 +1,28 @@
+========================
+Control Groups version 1
+========================
+
+.. toctree::
+    :maxdepth: 1
+
+    cgroups
+
+    blkio-controller
+    cpuacct
+    cpusets
+    devices
+    freezer-subsystem
+    hugetlb
+    memcg_test
+    memory
+    net_cls
+    net_prio
+    pids
+    rdma
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/admin-guide/cgroup-v1/memcg_test.rst b/Documentation/admin-guide/cgroup-v1/memcg_test.rst
new file mode 100644
index 000000000000..3f7115e07b5d
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/memcg_test.rst
@@ -0,0 +1,355 @@
+=====================================================
+Memory Resource Controller(Memcg) Implementation Memo
+=====================================================
+
+Last Updated: 2010/2
+
+Base Kernel Version: based on 2.6.33-rc7-mm(candidate for 34).
+
+Because VM is getting complex (one of reasons is memcg...), memcg's behavior
+is complex. This is a document for memcg's internal behavior.
+Please note that implementation details can be changed.
+
+(*) Topics on API should be in Documentation/admin-guide/cgroup-v1/memory.rst)
+
+0. How to record usage ?
+========================
+
+   2 objects are used.
+
+   page_cgroup ....an object per page.
+
+	Allocated at boot or memory hotplug. Freed at memory hot removal.
+
+   swap_cgroup ... an entry per swp_entry.
+
+	Allocated at swapon(). Freed at swapoff().
+
+   The page_cgroup has USED bit and double count against a page_cgroup never
+   occurs. swap_cgroup is used only when a charged page is swapped-out.
+
+1. Charge
+=========
+
+   a page/swp_entry may be charged (usage += PAGE_SIZE) at
+
+	mem_cgroup_try_charge()
+
+2. Uncharge
+===========
+
+  a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
+
+	mem_cgroup_uncharge()
+	  Called when a page's refcount goes down to 0.
+
+	mem_cgroup_uncharge_swap()
+	  Called when swp_entry's refcnt goes down to 0. A charge against swap
+	  disappears.
+
+3. charge-commit-cancel
+=======================
+
+	Memcg pages are charged in two steps:
+
+		- mem_cgroup_try_charge()
+		- mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
+
+	At try_charge(), there are no flags to say "this page is charged".
+	at this point, usage += PAGE_SIZE.
+
+	At commit(), the page is associated with the memcg.
+
+	At cancel(), simply usage -= PAGE_SIZE.
+
+Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
+
+4. Anonymous
+============
+
+	Anonymous page is newly allocated at
+		  - page fault into MAP_ANONYMOUS mapping.
+		  - Copy-On-Write.
+
+	4.1 Swap-in.
+	At swap-in, the page is taken from swap-cache. There are 2 cases.
+
+	(a) If the SwapCache is newly allocated and read, it has no charges.
+	(b) If the SwapCache has been mapped by processes, it has been
+	    charged already.
+
+	4.2 Swap-out.
+	At swap-out, typical state transition is below.
+
+	(a) add to swap cache. (marked as SwapCache)
+	    swp_entry's refcnt += 1.
+	(b) fully unmapped.
+	    swp_entry's refcnt += # of ptes.
+	(c) write back to swap.
+	(d) delete from swap cache. (remove from SwapCache)
+	    swp_entry's refcnt -= 1.
+
+
+	Finally, at task exit,
+	(e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
+
+5. Page Cache
+=============
+
+	Page Cache is charged at
+	- add_to_page_cache_locked().
+
+	The logic is very clear. (About migration, see below)
+
+	Note:
+	  __remove_from_page_cache() is called by remove_from_page_cache()
+	  and __remove_mapping().
+
+6. Shmem(tmpfs) Page Cache
+===========================
+
+	The best way to understand shmem's page state transition is to read
+	mm/shmem.c.
+
+	But brief explanation of the behavior of memcg around shmem will be
+	helpful to understand the logic.
+
+	Shmem's page (just leaf page, not direct/indirect block) can be on
+
+		- radix-tree of shmem's inode.
+		- SwapCache.
+		- Both on radix-tree and SwapCache. This happens at swap-in
+		  and swap-out,
+
+	It's charged when...
+
+	- A new page is added to shmem's radix-tree.
+	- A swp page is read. (move a charge from swap_cgroup to page_cgroup)
+
+7. Page Migration
+=================
+
+	mem_cgroup_migrate()
+
+8. LRU
+======
+        Each memcg has its own private LRU. Now, its handling is under global
+	VM's control (means that it's handled under global pgdat->lru_lock).
+	Almost all routines around memcg's LRU is called by global LRU's
+	list management functions under pgdat->lru_lock.
+
+	A special function is mem_cgroup_isolate_pages(). This scans
+	memcg's private LRU and call __isolate_lru_page() to extract a page
+	from LRU.
+
+	(By __isolate_lru_page(), the page is removed from both of global and
+	private LRU.)
+
+
+9. Typical Tests.
+=================
+
+ Tests for racy cases.
+
+9.1 Small limit to memcg.
+-------------------------
+
+	When you do test to do racy case, it's good test to set memcg's limit
+	to be very small rather than GB. Many races found in the test under
+	xKB or xxMB limits.
+
+	(Memory behavior under GB and Memory behavior under MB shows very
+	different situation.)
+
+9.2 Shmem
+---------
+
+	Historically, memcg's shmem handling was poor and we saw some amount
+	of troubles here. This is because shmem is page-cache but can be
+	SwapCache. Test with shmem/tmpfs is always good test.
+
+9.3 Migration
+-------------
+
+	For NUMA, migration is an another special case. To do easy test, cpuset
+	is useful. Following is a sample script to do migration::
+
+		mount -t cgroup -o cpuset none /opt/cpuset
+
+		mkdir /opt/cpuset/01
+		echo 1 > /opt/cpuset/01/cpuset.cpus
+		echo 0 > /opt/cpuset/01/cpuset.mems
+		echo 1 > /opt/cpuset/01/cpuset.memory_migrate
+		mkdir /opt/cpuset/02
+		echo 1 > /opt/cpuset/02/cpuset.cpus
+		echo 1 > /opt/cpuset/02/cpuset.mems
+		echo 1 > /opt/cpuset/02/cpuset.memory_migrate
+
+	In above set, when you moves a task from 01 to 02, page migration to
+	node 0 to node 1 will occur. Following is a script to migrate all
+	under cpuset.::
+
+		--
+		move_task()
+		{
+		for pid in $1
+		do
+			/bin/echo $pid >$2/tasks 2>/dev/null
+			echo -n $pid
+			echo -n " "
+		done
+		echo END
+		}
+
+		G1_TASK=`cat ${G1}/tasks`
+		G2_TASK=`cat ${G2}/tasks`
+		move_task "${G1_TASK}" ${G2} &
+		--
+
+9.4 Memory hotplug
+------------------
+
+	memory hotplug test is one of good test.
+
+	to offline memory, do following::
+
+		# echo offline > /sys/devices/system/memory/memoryXXX/state
+
+	(XXX is the place of memory)
+
+	This is an easy way to test page migration, too.
+
+9.5 mkdir/rmdir
+---------------
+
+	When using hierarchy, mkdir/rmdir test should be done.
+	Use tests like the following::
+
+		echo 1 >/opt/cgroup/01/memory/use_hierarchy
+		mkdir /opt/cgroup/01/child_a
+		mkdir /opt/cgroup/01/child_b
+
+		set limit to 01.
+		add limit to 01/child_b
+		run jobs under child_a and child_b
+
+	create/delete following groups at random while jobs are running::
+
+		/opt/cgroup/01/child_a/child_aa
+		/opt/cgroup/01/child_b/child_bb
+		/opt/cgroup/01/child_c
+
+	running new jobs in new group is also good.
+
+9.6 Mount with other subsystems
+-------------------------------
+
+	Mounting with other subsystems is a good test because there is a
+	race and lock dependency with other cgroup subsystems.
+
+	example::
+
+		# mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices
+
+	and do task move, mkdir, rmdir etc...under this.
+
+9.7 swapoff
+-----------
+
+	Besides management of swap is one of complicated parts of memcg,
+	call path of swap-in at swapoff is not same as usual swap-in path..
+	It's worth to be tested explicitly.
+
+	For example, test like following is good:
+
+	(Shell-A)::
+
+		# mount -t cgroup none /cgroup -o memory
+		# mkdir /cgroup/test
+		# echo 40M > /cgroup/test/memory.limit_in_bytes
+		# echo 0 > /cgroup/test/tasks
+
+	Run malloc(100M) program under this. You'll see 60M of swaps.
+
+	(Shell-B)::
+
+		# move all tasks in /cgroup/test to /cgroup
+		# /sbin/swapoff -a
+		# rmdir /cgroup/test
+		# kill malloc task.
+
+	Of course, tmpfs v.s. swapoff test should be tested, too.
+
+9.8 OOM-Killer
+--------------
+
+	Out-of-memory caused by memcg's limit will kill tasks under
+	the memcg. When hierarchy is used, a task under hierarchy
+	will be killed by the kernel.
+
+	In this case, panic_on_oom shouldn't be invoked and tasks
+	in other groups shouldn't be killed.
+
+	It's not difficult to cause OOM under memcg as following.
+
+	Case A) when you can swapoff::
+
+		#swapoff -a
+		#echo 50M > /memory.limit_in_bytes
+
+	run 51M of malloc
+
+	Case B) when you use mem+swap limitation::
+
+		#echo 50M > memory.limit_in_bytes
+		#echo 50M > memory.memsw.limit_in_bytes
+
+	run 51M of malloc
+
+9.9 Move charges at task migration
+----------------------------------
+
+	Charges associated with a task can be moved along with task migration.
+
+	(Shell-A)::
+
+		#mkdir /cgroup/A
+		#echo $$ >/cgroup/A/tasks
+
+	run some programs which uses some amount of memory in /cgroup/A.
+
+	(Shell-B)::
+
+		#mkdir /cgroup/B
+		#echo 1 >/cgroup/B/memory.move_charge_at_immigrate
+		#echo "pid of the program running in group A" >/cgroup/B/tasks
+
+	You can see charges have been moved by reading ``*.usage_in_bytes`` or
+	memory.stat of both A and B.
+
+	See 8.2 of Documentation/admin-guide/cgroup-v1/memory.rst to see what value should
+	be written to move_charge_at_immigrate.
+
+9.10 Memory thresholds
+----------------------
+
+	Memory controller implements memory thresholds using cgroups notification
+	API. You can use tools/cgroup/cgroup_event_listener.c to test it.
+
+	(Shell-A) Create cgroup and run event listener::
+
+		# mkdir /cgroup/A
+		# ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M
+
+	(Shell-B) Add task to cgroup and try to allocate and free memory::
+
+		# echo $$ >/cgroup/A/tasks
+		# a="$(dd if=/dev/zero bs=1M count=10)"
+		# a=
+
+	You will see message from cgroup_event_listener every time you cross
+	the thresholds.
+
+	Use /cgroup/A/memory.memsw.usage_in_bytes to test memsw thresholds.
+
+	It's good idea to test root cgroup as well.
diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
new file mode 100644
index 000000000000..41bdc038dad9
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -0,0 +1,1003 @@
+==========================
+Memory Resource Controller
+==========================
+
+NOTE:
+      This document is hopelessly outdated and it asks for a complete
+      rewrite. It still contains a useful information so we are keeping it
+      here but make sure to check the current code if you need a deeper
+      understanding.
+
+NOTE:
+      The Memory Resource Controller has generically been referred to as the
+      memory controller in this document. Do not confuse memory controller
+      used here with the memory controller that is used in hardware.
+
+(For editors) In this document:
+      When we mention a cgroup (cgroupfs's directory) with memory controller,
+      we call it "memory cgroup". When you see git-log and source code, you'll
+      see patch's title and function names tend to use "memcg".
+      In this document, we avoid using it.
+
+Benefits and Purpose of the memory controller
+=============================================
+
+The memory controller isolates the memory behaviour of a group of tasks
+from the rest of the system. The article on LWN [12] mentions some probable
+uses of the memory controller. The memory controller can be used to
+
+a. Isolate an application or a group of applications
+   Memory-hungry applications can be isolated and limited to a smaller
+   amount of memory.
+b. Create a cgroup with a limited amount of memory; this can be used
+   as a good alternative to booting with mem=XXXX.
+c. Virtualization solutions can control the amount of memory they want
+   to assign to a virtual machine instance.
+d. A CD/DVD burner could control the amount of memory used by the
+   rest of the system to ensure that burning does not fail due to lack
+   of available memory.
+e. There are several other use cases; find one or use the controller just
+   for fun (to learn and hack on the VM subsystem).
+
+Current Status: linux-2.6.34-mmotm(development version of 2010/April)
+
+Features:
+
+ - accounting anonymous pages, file caches, swap caches usage and limiting them.
+ - pages are linked to per-memcg LRU exclusively, and there is no global LRU.
+ - optionally, memory+swap usage can be accounted and limited.
+ - hierarchical accounting
+ - soft limit
+ - moving (recharging) account at moving a task is selectable.
+ - usage threshold notifier
+ - memory pressure notifier
+ - oom-killer disable knob and oom-notifier
+ - Root cgroup has no limit controls.
+
+ Kernel memory support is a work in progress, and the current version provides
+ basically functionality. (See Section 2.7)
+
+Brief summary of control files.
+
+==================================== ==========================================
+ tasks				     attach a task(thread) and show list of
+				     threads
+ cgroup.procs			     show list of processes
+ cgroup.event_control		     an interface for event_fd()
+ memory.usage_in_bytes		     show current usage for memory
+				     (See 5.5 for details)
+ memory.memsw.usage_in_bytes	     show current usage for memory+Swap
+				     (See 5.5 for details)
+ memory.limit_in_bytes		     set/show limit of memory usage
+ memory.memsw.limit_in_bytes	     set/show limit of memory+Swap usage
+ memory.failcnt			     show the number of memory usage hits limits
+ memory.memsw.failcnt		     show the number of memory+Swap hits limits
+ memory.max_usage_in_bytes	     show max memory usage recorded
+ memory.memsw.max_usage_in_bytes     show max memory+Swap usage recorded
+ memory.soft_limit_in_bytes	     set/show soft limit of memory usage
+ memory.stat			     show various statistics
+ memory.use_hierarchy		     set/show hierarchical account enabled
+ memory.force_empty		     trigger forced page reclaim
+ memory.pressure_level		     set memory pressure notifications
+ memory.swappiness		     set/show swappiness parameter of vmscan
+				     (See sysctl's vm.swappiness)
+ memory.move_charge_at_immigrate     set/show controls of moving charges
+ memory.oom_control		     set/show oom controls.
+ memory.numa_stat		     show the number of memory usage per numa
+				     node
+
+ memory.kmem.limit_in_bytes          set/show hard limit for kernel memory
+ memory.kmem.usage_in_bytes          show current kernel memory allocation
+ memory.kmem.failcnt                 show the number of kernel memory usage
+				     hits limits
+ memory.kmem.max_usage_in_bytes      show max kernel memory usage recorded
+
+ memory.kmem.tcp.limit_in_bytes      set/show hard limit for tcp buf memory
+ memory.kmem.tcp.usage_in_bytes      show current tcp buf memory allocation
+ memory.kmem.tcp.failcnt             show the number of tcp buf memory usage
+				     hits limits
+ memory.kmem.tcp.max_usage_in_bytes  show max tcp buf memory usage recorded
+==================================== ==========================================
+
+1. History
+==========
+
+The memory controller has a long history. A request for comments for the memory
+controller was posted by Balbir Singh [1]. At the time the RFC was posted
+there were several implementations for memory control. The goal of the
+RFC was to build consensus and agreement for the minimal features required
+for memory control. The first RSS controller was posted by Balbir Singh[2]
+in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the
+RSS controller. At OLS, at the resource management BoF, everyone suggested
+that we handle both page cache and RSS together. Another request was raised
+to allow user space handling of OOM. The current memory controller is
+at version 6; it combines both mapped (RSS) and unmapped Page
+Cache Control [11].
+
+2. Memory Control
+=================
+
+Memory is a unique resource in the sense that it is present in a limited
+amount. If a task requires a lot of CPU processing, the task can spread
+its processing over a period of hours, days, months or years, but with
+memory, the same physical memory needs to be reused to accomplish the task.
+
+The memory controller implementation has been divided into phases. These
+are:
+
+1. Memory controller
+2. mlock(2) controller
+3. Kernel user memory accounting and slab control
+4. user mappings length controller
+
+The memory controller is the first controller developed.
+
+2.1. Design
+-----------
+
+The core of the design is a counter called the page_counter. The
+page_counter tracks the current memory usage and limit of the group of
+processes associated with the controller. Each cgroup has a memory controller
+specific data structure (mem_cgroup) associated with it.
+
+2.2. Accounting
+---------------
+
+::
+
+		+--------------------+
+		|  mem_cgroup        |
+		|  (page_counter)    |
+		+--------------------+
+		 /            ^      \
+		/             |       \
+           +---------------+  |        +---------------+
+           | mm_struct     |  |....    | mm_struct     |
+           |               |  |        |               |
+           +---------------+  |        +---------------+
+                              |
+                              + --------------+
+                                              |
+           +---------------+           +------+--------+
+           | page          +---------->  page_cgroup|
+           |               |           |               |
+           +---------------+           +---------------+
+
+             (Figure 1: Hierarchy of Accounting)
+
+
+Figure 1 shows the important aspects of the controller
+
+1. Accounting happens per cgroup
+2. Each mm_struct knows about which cgroup it belongs to
+3. Each page has a pointer to the page_cgroup, which in turn knows the
+   cgroup it belongs to
+
+The accounting is done as follows: mem_cgroup_charge_common() is invoked to
+set up the necessary data structures and check if the cgroup that is being
+charged is over its limit. If it is, then reclaim is invoked on the cgroup.
+More details can be found in the reclaim section of this document.
+If everything goes well, a page meta-data-structure called page_cgroup is
+updated. page_cgroup has its own LRU on cgroup.
+(*) page_cgroup structure is allocated at boot/memory-hotplug time.
+
+2.2.1 Accounting details
+------------------------
+
+All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
+Some pages which are never reclaimable and will not be on the LRU
+are not accounted. We just account pages under usual VM management.
+
+RSS pages are accounted at page_fault unless they've already been accounted
+for earlier. A file page will be accounted for as Page Cache when it's
+inserted into inode (radix-tree). While it's mapped into the page tables of
+processes, duplicate accounting is carefully avoided.
+
+An RSS page is unaccounted when it's fully unmapped. A PageCache page is
+unaccounted when it's removed from radix-tree. Even if RSS pages are fully
+unmapped (by kswapd), they may exist as SwapCache in the system until they
+are really freed. Such SwapCaches are also accounted.
+A swapped-in page is not accounted until it's mapped.
+
+Note: The kernel does swapin-readahead and reads multiple swaps at once.
+This means swapped-in pages may contain pages for other tasks than a task
+causing page fault. So, we avoid accounting at swap-in I/O.
+
+At page migration, accounting information is kept.
+
+Note: we just account pages-on-LRU because our purpose is to control amount
+of used pages; not-on-LRU pages tend to be out-of-control from VM view.
+
+2.3 Shared Page Accounting
+--------------------------
+
+Shared pages are accounted on the basis of the first touch approach. The
+cgroup that first touches a page is accounted for the page. The principle
+behind this approach is that a cgroup that aggressively uses a shared
+page will eventually get charged for it (once it is uncharged from
+the cgroup that brought it in -- this will happen on memory pressure).
+
+But see section 8.2: when moving a task to another cgroup, its pages may
+be recharged to the new cgroup, if move_charge_at_immigrate has been chosen.
+
+Exception: If CONFIG_MEMCG_SWAP is not used.
+When you do swapoff and make swapped-out pages of shmem(tmpfs) to
+be backed into memory in force, charges for pages are accounted against the
+caller of swapoff rather than the users of shmem.
+
+2.4 Swap Extension (CONFIG_MEMCG_SWAP)
+--------------------------------------
+
+Swap Extension allows you to record charge for swap. A swapped-in page is
+charged back to original page allocator if possible.
+
+When swap is accounted, following files are added.
+
+ - memory.memsw.usage_in_bytes.
+ - memory.memsw.limit_in_bytes.
+
+memsw means memory+swap. Usage of memory+swap is limited by
+memsw.limit_in_bytes.
+
+Example: Assume a system with 4G of swap. A task which allocates 6G of memory
+(by mistake) under 2G memory limitation will use all swap.
+In this case, setting memsw.limit_in_bytes=3G will prevent bad use of swap.
+By using the memsw limit, you can avoid system OOM which can be caused by swap
+shortage.
+
+**why 'memory+swap' rather than swap**
+
+The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
+to move account from memory to swap...there is no change in usage of
+memory+swap. In other words, when we want to limit the usage of swap without
+affecting global LRU, memory+swap limit is better than just limiting swap from
+an OS point of view.
+
+**What happens when a cgroup hits memory.memsw.limit_in_bytes**
+
+When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out
+in this cgroup. Then, swap-out will not be done by cgroup routine and file
+caches are dropped. But as mentioned above, global LRU can do swapout memory
+from it for sanity of the system's memory management state. You can't forbid
+it by cgroup.
+
+2.5 Reclaim
+-----------
+
+Each cgroup maintains a per cgroup LRU which has the same structure as
+global VM. When a cgroup goes over its limit, we first try
+to reclaim memory from the cgroup so as to make space for the new
+pages that the cgroup has touched. If the reclaim is unsuccessful,
+an OOM routine is invoked to select and kill the bulkiest task in the
+cgroup. (See 10. OOM Control below.)
+
+The reclaim algorithm has not been modified for cgroups, except that
+pages that are selected for reclaiming come from the per-cgroup LRU
+list.
+
+NOTE:
+  Reclaim does not work for the root cgroup, since we cannot set any
+  limits on the root cgroup.
+
+Note2:
+  When panic_on_oom is set to "2", the whole system will panic.
+
+When oom event notifier is registered, event will be delivered.
+(See oom_control section)
+
+2.6 Locking
+-----------
+
+   lock_page_cgroup()/unlock_page_cgroup() should not be called under
+   the i_pages lock.
+
+   Other lock order is following:
+
+   PG_locked.
+     mm->page_table_lock
+         pgdat->lru_lock
+	   lock_page_cgroup.
+
+  In many cases, just lock_page_cgroup() is called.
+
+  per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
+  pgdat->lru_lock, it has no lock of its own.
+
+2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
+-----------------------------------------------
+
+With the Kernel memory extension, the Memory Controller is able to limit
+the amount of kernel memory used by the system. Kernel memory is fundamentally
+different than user memory, since it can't be swapped out, which makes it
+possible to DoS the system by consuming too much of this precious resource.
+
+Kernel memory accounting is enabled for all memory cgroups by default. But
+it can be disabled system-wide by passing cgroup.memory=nokmem to the kernel
+at boot time. In this case, kernel memory will not be accounted at all.
+
+Kernel memory limits are not imposed for the root cgroup. Usage for the root
+cgroup may or may not be accounted. The memory used is accumulated into
+memory.kmem.usage_in_bytes, or in a separate counter when it makes sense.
+(currently only for tcp).
+
+The main "kmem" counter is fed into the main counter, so kmem charges will
+also be visible from the user counter.
+
+Currently no soft limit is implemented for kernel memory. It is future work
+to trigger slab reclaim when those limits are reached.
+
+2.7.1 Current Kernel Memory resources accounted
+-----------------------------------------------
+
+stack pages:
+  every process consumes some stack pages. By accounting into
+  kernel memory, we prevent new processes from being created when the kernel
+  memory usage is too high.
+
+slab pages:
+  pages allocated by the SLAB or SLUB allocator are tracked. A copy
+  of each kmem_cache is created every time the cache is touched by the first time
+  from inside the memcg. The creation is done lazily, so some objects can still be
+  skipped while the cache is being created. All objects in a slab page should
+  belong to the same memcg. This only fails to hold when a task is migrated to a
+  different memcg during the page allocation by the cache.
+
+sockets memory pressure:
+  some sockets protocols have memory pressure
+  thresholds. The Memory Controller allows them to be controlled individually
+  per cgroup, instead of globally.
+
+tcp memory pressure:
+  sockets memory pressure for the tcp protocol.
+
+2.7.2 Common use cases
+----------------------
+
+Because the "kmem" counter is fed to the main user counter, kernel memory can
+never be limited completely independently of user memory. Say "U" is the user
+limit, and "K" the kernel limit. There are three possible ways limits can be
+set:
+
+U != 0, K = unlimited:
+    This is the standard memcg limitation mechanism already present before kmem
+    accounting. Kernel memory is completely ignored.
+
+U != 0, K < U:
+    Kernel memory is a subset of the user memory. This setup is useful in
+    deployments where the total amount of memory per-cgroup is overcommited.
+    Overcommiting kernel memory limits is definitely not recommended, since the
+    box can still run out of non-reclaimable memory.
+    In this case, the admin could set up K so that the sum of all groups is
+    never greater than the total memory, and freely set U at the cost of his
+    QoS.
+
+WARNING:
+    In the current implementation, memory reclaim will NOT be
+    triggered for a cgroup when it hits K while staying below U, which makes
+    this setup impractical.
+
+U != 0, K >= U:
+    Since kmem charges will also be fed to the user counter and reclaim will be
+    triggered for the cgroup for both kinds of memory. This setup gives the
+    admin a unified view of memory, and it is also useful for people who just
+    want to track kernel memory usage.
+
+3. User Interface
+=================
+
+3.0. Configuration
+------------------
+
+a. Enable CONFIG_CGROUPS
+b. Enable CONFIG_MEMCG
+c. Enable CONFIG_MEMCG_SWAP (to use swap extension)
+d. Enable CONFIG_MEMCG_KMEM (to use kmem extension)
+
+3.1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
+-------------------------------------------------------------------
+
+::
+
+	# mount -t tmpfs none /sys/fs/cgroup
+	# mkdir /sys/fs/cgroup/memory
+	# mount -t cgroup none /sys/fs/cgroup/memory -o memory
+
+3.2. Make the new group and move bash into it::
+
+	# mkdir /sys/fs/cgroup/memory/0
+	# echo $$ > /sys/fs/cgroup/memory/0/tasks
+
+Since now we're in the 0 cgroup, we can alter the memory limit::
+
+	# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes
+
+NOTE:
+  We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
+  mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes,
+  Gibibytes.)
+
+NOTE:
+  We can write "-1" to reset the ``*.limit_in_bytes(unlimited)``.
+
+NOTE:
+  We cannot set limits on the root cgroup any more.
+
+::
+
+  # cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
+  4194304
+
+We can check the usage::
+
+  # cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes
+  1216512
+
+A successful write to this file does not guarantee a successful setting of
+this limit to the value written into the file. This can be due to a
+number of factors, such as rounding up to page boundaries or the total
+availability of memory on the system. The user is required to re-read
+this file after a write to guarantee the value committed by the kernel::
+
+  # echo 1 > memory.limit_in_bytes
+  # cat memory.limit_in_bytes
+  4096
+
+The memory.failcnt field gives the number of times that the cgroup limit was
+exceeded.
+
+The memory.stat file gives accounting information. Now, the number of
+caches, RSS and Active pages/Inactive pages are shown.
+
+4. Testing
+==========
+
+For testing features and implementation, see memcg_test.txt.
+
+Performance test is also important. To see pure memory controller's overhead,
+testing on tmpfs will give you good numbers of small overheads.
+Example: do kernel make on tmpfs.
+
+Page-fault scalability is also important. At measuring parallel
+page fault test, multi-process test may be better than multi-thread
+test because it has noise of shared objects/status.
+
+But the above two are testing extreme situations.
+Trying usual test under memory controller is always helpful.
+
+4.1 Troubleshooting
+-------------------
+
+Sometimes a user might find that the application under a cgroup is
+terminated by the OOM killer. There are several causes for this:
+
+1. The cgroup limit is too low (just too low to do anything useful)
+2. The user is using anonymous memory and swap is turned off or too low
+
+A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of
+some of the pages cached in the cgroup (page cache pages).
+
+To know what happens, disabling OOM_Kill as per "10. OOM Control" (below) and
+seeing what happens will be helpful.
+
+4.2 Task migration
+------------------
+
+When a task migrates from one cgroup to another, its charge is not
+carried forward by default. The pages allocated from the original cgroup still
+remain charged to it, the charge is dropped when the page is freed or
+reclaimed.
+
+You can move charges of a task along with task migration.
+See 8. "Move charges at task migration"
+
+4.3 Removing a cgroup
+---------------------
+
+A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
+cgroup might have some charge associated with it, even though all
+tasks have migrated away from it. (because we charge against pages, not
+against tasks.)
+
+We move the stats to root (if use_hierarchy==0) or parent (if
+use_hierarchy==1), and no change on the charge except uncharging
+from the child.
+
+Charges recorded in swap information is not updated at removal of cgroup.
+Recorded information is discarded and a cgroup which uses swap (swapcache)
+will be charged as a new owner of it.
+
+About use_hierarchy, see Section 6.
+
+5. Misc. interfaces
+===================
+
+5.1 force_empty
+---------------
+  memory.force_empty interface is provided to make cgroup's memory usage empty.
+  When writing anything to this::
+
+    # echo 0 > memory.force_empty
+
+  the cgroup will be reclaimed and as many pages reclaimed as possible.
+
+  The typical use case for this interface is before calling rmdir().
+  Though rmdir() offlines memcg, but the memcg may still stay there due to
+  charged file caches. Some out-of-use page caches may keep charged until
+  memory pressure happens. If you want to avoid that, force_empty will be useful.
+
+  Also, note that when memory.kmem.limit_in_bytes is set the charges due to
+  kernel pages will still be seen. This is not considered a failure and the
+  write will still return success. In this case, it is expected that
+  memory.kmem.usage_in_bytes == memory.usage_in_bytes.
+
+  About use_hierarchy, see Section 6.
+
+5.2 stat file
+-------------
+
+memory.stat file includes following statistics
+
+per-memory cgroup local status
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+=============== ===============================================================
+cache		# of bytes of page cache memory.
+rss		# of bytes of anonymous and swap cache memory (includes
+		transparent hugepages).
+rss_huge	# of bytes of anonymous transparent hugepages.
+mapped_file	# of bytes of mapped file (includes tmpfs/shmem)
+pgpgin		# of charging events to the memory cgroup. The charging
+		event happens each time a page is accounted as either mapped
+		anon page(RSS) or cache page(Page Cache) to the cgroup.
+pgpgout		# of uncharging events to the memory cgroup. The uncharging
+		event happens each time a page is unaccounted from the cgroup.
+swap		# of bytes of swap usage
+dirty		# of bytes that are waiting to get written back to the disk.
+writeback	# of bytes of file/anon cache that are queued for syncing to
+		disk.
+inactive_anon	# of bytes of anonymous and swap cache memory on inactive
+		LRU list.
+active_anon	# of bytes of anonymous and swap cache memory on active
+		LRU list.
+inactive_file	# of bytes of file-backed memory on inactive LRU list.
+active_file	# of bytes of file-backed memory on active LRU list.
+unevictable	# of bytes of memory that cannot be reclaimed (mlocked etc).
+=============== ===============================================================
+
+status considering hierarchy (see memory.use_hierarchy settings)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+========================= ===================================================
+hierarchical_memory_limit # of bytes of memory limit with regard to hierarchy
+			  under which the memory cgroup is
+hierarchical_memsw_limit  # of bytes of memory+swap limit with regard to
+			  hierarchy under which memory cgroup is.
+
+total_<counter>		  # hierarchical version of <counter>, which in
+			  addition to the cgroup's own value includes the
+			  sum of all hierarchical children's values of
+			  <counter>, i.e. total_cache
+========================= ===================================================
+
+The following additional stats are dependent on CONFIG_DEBUG_VM
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+========================= ========================================
+recent_rotated_anon	  VM internal parameter. (see mm/vmscan.c)
+recent_rotated_file	  VM internal parameter. (see mm/vmscan.c)
+recent_scanned_anon	  VM internal parameter. (see mm/vmscan.c)
+recent_scanned_file	  VM internal parameter. (see mm/vmscan.c)
+========================= ========================================
+
+Memo:
+	recent_rotated means recent frequency of LRU rotation.
+	recent_scanned means recent # of scans to LRU.
+	showing for better debug please see the code for meanings.
+
+Note:
+	Only anonymous and swap cache memory is listed as part of 'rss' stat.
+	This should not be confused with the true 'resident set size' or the
+	amount of physical memory used by the cgroup.
+
+	'rss + mapped_file" will give you resident set size of cgroup.
+
+	(Note: file and shmem may be shared among other cgroups. In that case,
+	mapped_file is accounted only when the memory cgroup is owner of page
+	cache.)
+
+5.3 swappiness
+--------------
+
+Overrides /proc/sys/vm/swappiness for the particular group. The tunable
+in the root cgroup corresponds to the global swappiness setting.
+
+Please note that unlike during the global reclaim, limit reclaim
+enforces that 0 swappiness really prevents from any swapping even if
+there is a swap storage available. This might lead to memcg OOM killer
+if there are no file pages to reclaim.
+
+5.4 failcnt
+-----------
+
+A memory cgroup provides memory.failcnt and memory.memsw.failcnt files.
+This failcnt(== failure count) shows the number of times that a usage counter
+hit its limit. When a memory cgroup hits a limit, failcnt increases and
+memory under it will be reclaimed.
+
+You can reset failcnt by writing 0 to failcnt file::
+
+	# echo 0 > .../memory.failcnt
+
+5.5 usage_in_bytes
+------------------
+
+For efficiency, as other kernel components, memory cgroup uses some optimization
+to avoid unnecessary cacheline false sharing. usage_in_bytes is affected by the
+method and doesn't show 'exact' value of memory (and swap) usage, it's a fuzz
+value for efficient access. (Of course, when necessary, it's synchronized.)
+If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP)
+value in memory.stat(see 5.2).
+
+5.6 numa_stat
+-------------
+
+This is similar to numa_maps but operates on a per-memcg basis.  This is
+useful for providing visibility into the numa locality information within
+an memcg since the pages are allowed to be allocated from any physical
+node.  One of the use cases is evaluating application performance by
+combining this information with the application's CPU allocation.
+
+Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable"
+per-node page counts including "hierarchical_<counter>" which sums up all
+hierarchical children's values in addition to the memcg's own value.
+
+The output format of memory.numa_stat is::
+
+  total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
+  file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
+  anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+  unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+  hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
+
+The "total" count is sum of file + anon + unevictable.
+
+6. Hierarchy support
+====================
+
+The memory controller supports a deep hierarchy and hierarchical accounting.
+The hierarchy is created by creating the appropriate cgroups in the
+cgroup filesystem. Consider for example, the following cgroup filesystem
+hierarchy::
+
+	       root
+	     /  |   \
+            /	|    \
+	   a	b     c
+		      | \
+		      |  \
+		      d   e
+
+In the diagram above, with hierarchical accounting enabled, all memory
+usage of e, is accounted to its ancestors up until the root (i.e, c and root),
+that has memory.use_hierarchy enabled. If one of the ancestors goes over its
+limit, the reclaim algorithm reclaims from the tasks in the ancestor and the
+children of the ancestor.
+
+6.1 Enabling hierarchical accounting and reclaim
+------------------------------------------------
+
+A memory cgroup by default disables the hierarchy feature. Support
+can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup::
+
+	# echo 1 > memory.use_hierarchy
+
+The feature can be disabled by::
+
+	# echo 0 > memory.use_hierarchy
+
+NOTE1:
+       Enabling/disabling will fail if either the cgroup already has other
+       cgroups created below it, or if the parent cgroup has use_hierarchy
+       enabled.
+
+NOTE2:
+       When panic_on_oom is set to "2", the whole system will panic in
+       case of an OOM event in any cgroup.
+
+7. Soft limits
+==============
+
+Soft limits allow for greater sharing of memory. The idea behind soft limits
+is to allow control groups to use as much of the memory as needed, provided
+
+a. There is no memory contention
+b. They do not exceed their hard limit
+
+When the system detects memory contention or low memory, control groups
+are pushed back to their soft limits. If the soft limit of each control
+group is very high, they are pushed back as much as possible to make
+sure that one control group does not starve the others of memory.
+
+Please note that soft limits is a best-effort feature; it comes with
+no guarantees, but it does its best to make sure that when memory is
+heavily contended for, memory is allocated based on the soft limit
+hints/setup. Currently soft limit based reclaim is set up such that
+it gets invoked from balance_pgdat (kswapd).
+
+7.1 Interface
+-------------
+
+Soft limits can be setup by using the following commands (in this example we
+assume a soft limit of 256 MiB)::
+
+	# echo 256M > memory.soft_limit_in_bytes
+
+If we want to change this to 1G, we can at any time use::
+
+	# echo 1G > memory.soft_limit_in_bytes
+
+NOTE1:
+       Soft limits take effect over a long period of time, since they involve
+       reclaiming memory for balancing between memory cgroups
+NOTE2:
+       It is recommended to set the soft limit always below the hard limit,
+       otherwise the hard limit will take precedence.
+
+8. Move charges at task migration
+=================================
+
+Users can move charges associated with a task along with task migration, that
+is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
+This feature is not supported in !CONFIG_MMU environments because of lack of
+page tables.
+
+8.1 Interface
+-------------
+
+This feature is disabled by default. It can be enabled (and disabled again) by
+writing to memory.move_charge_at_immigrate of the destination cgroup.
+
+If you want to enable it::
+
+	# echo (some positive value) > memory.move_charge_at_immigrate
+
+Note:
+      Each bits of move_charge_at_immigrate has its own meaning about what type
+      of charges should be moved. See 8.2 for details.
+Note:
+      Charges are moved only when you move mm->owner, in other words,
+      a leader of a thread group.
+Note:
+      If we cannot find enough space for the task in the destination cgroup, we
+      try to make space by reclaiming memory. Task migration may fail if we
+      cannot make enough space.
+Note:
+      It can take several seconds if you move charges much.
+
+And if you want disable it again::
+
+	# echo 0 > memory.move_charge_at_immigrate
+
+8.2 Type of charges which can be moved
+--------------------------------------
+
+Each bit in move_charge_at_immigrate has its own meaning about what type of
+charges should be moved. But in any case, it must be noted that an account of
+a page or a swap can be moved only when it is charged to the task's current
+(old) memory cgroup.
+
++---+--------------------------------------------------------------------------+
+|bit| what type of charges would be moved ?                                    |
++===+==========================================================================+
+| 0 | A charge of an anonymous page (or swap of it) used by the target task.   |
+|   | You must enable Swap Extension (see 2.4) to enable move of swap charges. |
++---+--------------------------------------------------------------------------+
+| 1 | A charge of file pages (normal file, tmpfs file (e.g. ipc shared memory) |
+|   | and swaps of tmpfs file) mmapped by the target task. Unlike the case of  |
+|   | anonymous pages, file pages (and swaps) in the range mmapped by the task |
+|   | will be moved even if the task hasn't done page fault, i.e. they might   |
+|   | not be the task's "RSS", but other task's "RSS" that maps the same file. |
+|   | And mapcount of the page is ignored (the page can be moved even if       |
+|   | page_mapcount(page) > 1). You must enable Swap Extension (see 2.4) to    |
+|   | enable move of swap charges.                                             |
++---+--------------------------------------------------------------------------+
+
+8.3 TODO
+--------
+
+- All of moving charge operations are done under cgroup_mutex. It's not good
+  behavior to hold the mutex too long, so we may need some trick.
+
+9. Memory thresholds
+====================
+
+Memory cgroup implements memory thresholds using the cgroups notification
+API (see cgroups.txt). It allows to register multiple memory and memsw
+thresholds and gets notifications when it crosses.
+
+To register a threshold, an application must:
+
+- create an eventfd using eventfd(2);
+- open memory.usage_in_bytes or memory.memsw.usage_in_bytes;
+- write string like "<event_fd> <fd of memory.usage_in_bytes> <threshold>" to
+  cgroup.event_control.
+
+Application will be notified through eventfd when memory usage crosses
+threshold in any direction.
+
+It's applicable for root and non-root cgroup.
+
+10. OOM Control
+===============
+
+memory.oom_control file is for OOM notification and other controls.
+
+Memory cgroup implements OOM notifier using the cgroup notification
+API (See cgroups.txt). It allows to register multiple OOM notification
+delivery and gets notification when OOM happens.
+
+To register a notifier, an application must:
+
+ - create an eventfd using eventfd(2)
+ - open memory.oom_control file
+ - write string like "<event_fd> <fd of memory.oom_control>" to
+   cgroup.event_control
+
+The application will be notified through eventfd when OOM happens.
+OOM notification doesn't work for the root cgroup.
+
+You can disable the OOM-killer by writing "1" to memory.oom_control file, as:
+
+	#echo 1 > memory.oom_control
+
+If OOM-killer is disabled, tasks under cgroup will hang/sleep
+in memory cgroup's OOM-waitqueue when they request accountable memory.
+
+For running them, you have to relax the memory cgroup's OOM status by
+
+	* enlarge limit or reduce usage.
+
+To reduce usage,
+
+	* kill some tasks.
+	* move some tasks to other group with account migration.
+	* remove some files (on tmpfs?)
+
+Then, stopped tasks will work again.
+
+At reading, current status of OOM is shown.
+
+	- oom_kill_disable 0 or 1
+	  (if 1, oom-killer is disabled)
+	- under_oom	   0 or 1
+	  (if 1, the memory cgroup is under OOM, tasks may be stopped.)
+
+11. Memory Pressure
+===================
+
+The pressure level notifications can be used to monitor the memory
+allocation cost; based on the pressure, applications can implement
+different strategies of managing their memory resources. The pressure
+levels are defined as following:
+
+The "low" level means that the system is reclaiming memory for new
+allocations. Monitoring this reclaiming activity might be useful for
+maintaining cache level. Upon notification, the program (typically
+"Activity Manager") might analyze vmstat and act in advance (i.e.
+prematurely shutdown unimportant services).
+
+The "medium" level means that the system is experiencing medium memory
+pressure, the system might be making swap, paging out active file caches,
+etc. Upon this event applications may decide to further analyze
+vmstat/zoneinfo/memcg or internal memory usage statistics and free any
+resources that can be easily reconstructed or re-read from a disk.
+
+The "critical" level means that the system is actively thrashing, it is
+about to out of memory (OOM) or even the in-kernel OOM killer is on its
+way to trigger. Applications should do whatever they can to help the
+system. It might be too late to consult with vmstat or any other
+statistics, so it's advisable to take an immediate action.
+
+By default, events are propagated upward until the event is handled, i.e. the
+events are not pass-through. For example, you have three cgroups: A->B->C. Now
+you set up an event listener on cgroups A, B and C, and suppose group C
+experiences some pressure. In this situation, only group C will receive the
+notification, i.e. groups A and B will not receive it. This is done to avoid
+excessive "broadcasting" of messages, which disturbs the system and which is
+especially bad if we are low on memory or thrashing. Group B, will receive
+notification only if there are no event listers for group C.
+
+There are three optional modes that specify different propagation behavior:
+
+ - "default": this is the default behavior specified above. This mode is the
+   same as omitting the optional mode parameter, preserved by backwards
+   compatibility.
+
+ - "hierarchy": events always propagate up to the root, similar to the default
+   behavior, except that propagation continues regardless of whether there are
+   event listeners at each level, with the "hierarchy" mode. In the above
+   example, groups A, B, and C will receive notification of memory pressure.
+
+ - "local": events are pass-through, i.e. they only receive notifications when
+   memory pressure is experienced in the memcg for which the notification is
+   registered. In the above example, group C will receive notification if
+   registered for "local" notification and the group experiences memory
+   pressure. However, group B will never receive notification, regardless if
+   there is an event listener for group C or not, if group B is registered for
+   local notification.
+
+The level and event notification mode ("hierarchy" or "local", if necessary) are
+specified by a comma-delimited string, i.e. "low,hierarchy" specifies
+hierarchical, pass-through, notification for all ancestor memcgs. Notification
+that is the default, non pass-through behavior, does not specify a mode.
+"medium,local" specifies pass-through notification for the medium level.
+
+The file memory.pressure_level is only used to setup an eventfd. To
+register a notification, an application must:
+
+- create an eventfd using eventfd(2);
+- open memory.pressure_level;
+- write string as "<event_fd> <fd of memory.pressure_level> <level[,mode]>"
+  to cgroup.event_control.
+
+Application will be notified through eventfd when memory pressure is at
+the specific level (or higher). Read/write operations to
+memory.pressure_level are no implemented.
+
+Test:
+
+   Here is a small script example that makes a new cgroup, sets up a
+   memory limit, sets up a notification in the cgroup and then makes child
+   cgroup experience a critical pressure::
+
+	# cd /sys/fs/cgroup/memory/
+	# mkdir foo
+	# cd foo
+	# cgroup_event_listener memory.pressure_level low,hierarchy &
+	# echo 8000000 > memory.limit_in_bytes
+	# echo 8000000 > memory.memsw.limit_in_bytes
+	# echo $$ > tasks
+	# dd if=/dev/zero | read x
+
+   (Expect a bunch of notifications, and eventually, the oom-killer will
+   trigger.)
+
+12. TODO
+========
+
+1. Make per-cgroup scanner reclaim not-shared pages first
+2. Teach controller to account for shared-pages
+3. Start reclamation in the background when the limit is
+   not yet hit but the usage is getting closer
+
+Summary
+=======
+
+Overall, the memory controller has been a stable controller and has been
+commented and discussed quite extensively in the community.
+
+References
+==========
+
+1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/
+2. Singh, Balbir. Memory Controller (RSS Control),
+   http://lwn.net/Articles/222762/
+3. Emelianov, Pavel. Resource controllers based on process cgroups
+   http://lkml.org/lkml/2007/3/6/198
+4. Emelianov, Pavel. RSS controller based on process cgroups (v2)
+   http://lkml.org/lkml/2007/4/9/78
+5. Emelianov, Pavel. RSS controller based on process cgroups (v3)
+   http://lkml.org/lkml/2007/5/30/244
+6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/
+7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control
+   subsystem (v3), http://lwn.net/Articles/235534/
+8. Singh, Balbir. RSS controller v2 test results (lmbench),
+   http://lkml.org/lkml/2007/5/17/232
+9. Singh, Balbir. RSS controller v2 AIM9 results
+   http://lkml.org/lkml/2007/5/18/1
+10. Singh, Balbir. Memory controller v6 test results,
+    http://lkml.org/lkml/2007/8/19/36
+11. Singh, Balbir. Memory controller introduction (v6),
+    http://lkml.org/lkml/2007/8/17/69
+12. Corbet, Jonathan, Controlling memory use in cgroups,
+    http://lwn.net/Articles/243795/
diff --git a/Documentation/admin-guide/cgroup-v1/net_cls.rst b/Documentation/admin-guide/cgroup-v1/net_cls.rst
new file mode 100644
index 000000000000..a2cf272af7a0
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/net_cls.rst
@@ -0,0 +1,44 @@
+=========================
+Network classifier cgroup
+=========================
+
+The Network classifier cgroup provides an interface to
+tag network packets with a class identifier (classid).
+
+The Traffic Controller (tc) can be used to assign
+different priorities to packets from different cgroups.
+Also, Netfilter (iptables) can use this tag to perform
+actions on such packets.
+
+Creating a net_cls cgroups instance creates a net_cls.classid file.
+This net_cls.classid value is initialized to 0.
+
+You can write hexadecimal values to net_cls.classid; the format for these
+values is 0xAAAABBBB; AAAA is the major handle number and BBBB
+is the minor handle number.
+Reading net_cls.classid yields a decimal result.
+
+Example::
+
+	mkdir /sys/fs/cgroup/net_cls
+	mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls
+	mkdir /sys/fs/cgroup/net_cls/0
+	echo 0x100001 >  /sys/fs/cgroup/net_cls/0/net_cls.classid
+
+- setting a 10:1 handle::
+
+	cat /sys/fs/cgroup/net_cls/0/net_cls.classid
+	1048577
+
+- configuring tc::
+
+	tc qdisc add dev eth0 root handle 10: htb
+	tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit
+
+- creating traffic class 10:1::
+
+	tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup
+
+configuring iptables, basic example::
+
+	iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP
diff --git a/Documentation/admin-guide/cgroup-v1/net_prio.rst b/Documentation/admin-guide/cgroup-v1/net_prio.rst
new file mode 100644
index 000000000000..b40905871c64
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/net_prio.rst
@@ -0,0 +1,57 @@
+=======================
+Network priority cgroup
+=======================
+
+The Network priority cgroup provides an interface to allow an administrator to
+dynamically set the priority of network traffic generated by various
+applications
+
+Nominally, an application would set the priority of its traffic via the
+SO_PRIORITY socket option.  This however, is not always possible because:
+
+1) The application may not have been coded to set this value
+2) The priority of application traffic is often a site-specific administrative
+   decision rather than an application defined one.
+
+This cgroup allows an administrator to assign a process to a group which defines
+the priority of egress traffic on a given interface. Network priority groups can
+be created by first mounting the cgroup filesystem::
+
+	# mount -t cgroup -onet_prio none /sys/fs/cgroup/net_prio
+
+With the above step, the initial group acting as the parent accounting group
+becomes visible at '/sys/fs/cgroup/net_prio'.  This group includes all tasks in
+the system. '/sys/fs/cgroup/net_prio/tasks' lists the tasks in this cgroup.
+
+Each net_prio cgroup contains two files that are subsystem specific
+
+net_prio.prioidx
+  This file is read-only, and is simply informative.  It contains a unique
+  integer value that the kernel uses as an internal representation of this
+  cgroup.
+
+net_prio.ifpriomap
+  This file contains a map of the priorities assigned to traffic originating
+  from processes in this group and egressing the system on various interfaces.
+  It contains a list of tuples in the form <ifname priority>.  Contents of this
+  file can be modified by echoing a string into the file using the same tuple
+  format. For example::
+
+	echo "eth0 5" > /sys/fs/cgroups/net_prio/iscsi/net_prio.ifpriomap
+
+This command would force any traffic originating from processes belonging to the
+iscsi net_prio cgroup and egressing on interface eth0 to have the priority of
+said traffic set to the value 5. The parent accounting group also has a
+writeable 'net_prio.ifpriomap' file that can be used to set a system default
+priority.
+
+Priorities are set immediately prior to queueing a frame to the device
+queueing discipline (qdisc) so priorities will be assigned prior to the hardware
+queue selection being made.
+
+One usage for the net_prio cgroup is with mqprio qdisc allowing application
+traffic to be steered to hardware/driver based traffic classes. These mappings
+can then be managed by administrators or other networking protocols such as
+DCBX.
+
+A new net_prio cgroup inherits the parent's configuration.
diff --git a/Documentation/admin-guide/cgroup-v1/pids.rst b/Documentation/admin-guide/cgroup-v1/pids.rst
new file mode 100644
index 000000000000..6acebd9e72c8
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/pids.rst
@@ -0,0 +1,92 @@
+=========================
+Process Number Controller
+=========================
+
+Abstract
+--------
+
+The process number controller is used to allow a cgroup hierarchy to stop any
+new tasks from being fork()'d or clone()'d after a certain limit is reached.
+
+Since it is trivial to hit the task limit without hitting any kmemcg limits in
+place, PIDs are a fundamental resource. As such, PID exhaustion must be
+preventable in the scope of a cgroup hierarchy by allowing resource limiting of
+the number of tasks in a cgroup.
+
+Usage
+-----
+
+In order to use the `pids` controller, set the maximum number of tasks in
+pids.max (this is not available in the root cgroup for obvious reasons). The
+number of processes currently in the cgroup is given by pids.current.
+
+Organisational operations are not blocked by cgroup policies, so it is possible
+to have pids.current > pids.max. This can be done by either setting the limit to
+be smaller than pids.current, or attaching enough processes to the cgroup such
+that pids.current > pids.max. However, it is not possible to violate a cgroup
+policy through fork() or clone(). fork() and clone() will return -EAGAIN if the
+creation of a new process would cause a cgroup policy to be violated.
+
+To set a cgroup to have no limit, set pids.max to "max". This is the default for
+all new cgroups (N.B. that PID limits are hierarchical, so the most stringent
+limit in the hierarchy is followed).
+
+pids.current tracks all child cgroup hierarchies, so parent/pids.current is a
+superset of parent/child/pids.current.
+
+The pids.events file contains event counters:
+
+  - max: Number of times fork failed because limit was hit.
+
+Example
+-------
+
+First, we mount the pids controller::
+
+	# mkdir -p /sys/fs/cgroup/pids
+	# mount -t cgroup -o pids none /sys/fs/cgroup/pids
+
+Then we create a hierarchy, set limits and attach processes to it::
+
+	# mkdir -p /sys/fs/cgroup/pids/parent/child
+	# echo 2 > /sys/fs/cgroup/pids/parent/pids.max
+	# echo $$ > /sys/fs/cgroup/pids/parent/cgroup.procs
+	# cat /sys/fs/cgroup/pids/parent/pids.current
+	2
+	#
+
+It should be noted that attempts to overcome the set limit (2 in this case) will
+fail::
+
+	# cat /sys/fs/cgroup/pids/parent/pids.current
+	2
+	# ( /bin/echo "Here's some processes for you." | cat )
+	sh: fork: Resource temporary unavailable
+	#
+
+Even if we migrate to a child cgroup (which doesn't have a set limit), we will
+not be able to overcome the most stringent limit in the hierarchy (in this case,
+parent's)::
+
+	# echo $$ > /sys/fs/cgroup/pids/parent/child/cgroup.procs
+	# cat /sys/fs/cgroup/pids/parent/pids.current
+	2
+	# cat /sys/fs/cgroup/pids/parent/child/pids.current
+	2
+	# cat /sys/fs/cgroup/pids/parent/child/pids.max
+	max
+	# ( /bin/echo "Here's some processes for you." | cat )
+	sh: fork: Resource temporary unavailable
+	#
+
+We can set a limit that is smaller than pids.current, which will stop any new
+processes from being forked at all (note that the shell itself counts towards
+pids.current)::
+
+	# echo 1 > /sys/fs/cgroup/pids/parent/pids.max
+	# /bin/echo "We can't even spawn a single process now."
+	sh: fork: Resource temporary unavailable
+	# echo 0 > /sys/fs/cgroup/pids/parent/pids.max
+	# /bin/echo "We can't even spawn a single process now."
+	sh: fork: Resource temporary unavailable
+	#
diff --git a/Documentation/admin-guide/cgroup-v1/rdma.rst b/Documentation/admin-guide/cgroup-v1/rdma.rst
new file mode 100644
index 000000000000..2fcb0a9bf790
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/rdma.rst
@@ -0,0 +1,117 @@
+===============
+RDMA Controller
+===============
+
+.. Contents
+
+   1. Overview
+     1-1. What is RDMA controller?
+     1-2. Why RDMA controller needed?
+     1-3. How is RDMA controller implemented?
+   2. Usage Examples
+
+1. Overview
+===========
+
+1-1. What is RDMA controller?
+-----------------------------
+
+RDMA controller allows user to limit RDMA/IB specific resources that a given
+set of processes can use. These processes are grouped using RDMA controller.
+
+RDMA controller defines two resources which can be limited for processes of a
+cgroup.
+
+1-2. Why RDMA controller needed?
+--------------------------------
+
+Currently user space applications can easily take away all the rdma verb
+specific resources such as AH, CQ, QP, MR etc. Due to which other applications
+in other cgroup or kernel space ULPs may not even get chance to allocate any
+rdma resources. This can lead to service unavailability.
+
+Therefore RDMA controller is needed through which resource consumption
+of processes can be limited. Through this controller different rdma
+resources can be accounted.
+
+1-3. How is RDMA controller implemented?
+----------------------------------------
+
+RDMA cgroup allows limit configuration of resources. Rdma cgroup maintains
+resource accounting per cgroup, per device using resource pool structure.
+Each such resource pool is limited up to 64 resources in given resource pool
+by rdma cgroup, which can be extended later if required.
+
+This resource pool object is linked to the cgroup css. Typically there
+are 0 to 4 resource pool instances per cgroup, per device in most use cases.
+But nothing limits to have it more. At present hundreds of RDMA devices per
+single cgroup may not be handled optimally, however there is no
+known use case or requirement for such configuration either.
+
+Since RDMA resources can be allocated from any process and can be freed by any
+of the child processes which shares the address space, rdma resources are
+always owned by the creator cgroup css. This allows process migration from one
+to other cgroup without major complexity of transferring resource ownership;
+because such ownership is not really present due to shared nature of
+rdma resources. Linking resources around css also ensures that cgroups can be
+deleted after processes migrated. This allow progress migration as well with
+active resources, even though that is not a primary use case.
+
+Whenever RDMA resource charging occurs, owner rdma cgroup is returned to
+the caller. Same rdma cgroup should be passed while uncharging the resource.
+This also allows process migrated with active RDMA resource to charge
+to new owner cgroup for new resource. It also allows to uncharge resource of
+a process from previously charged cgroup which is migrated to new cgroup,
+even though that is not a primary use case.
+
+Resource pool object is created in following situations.
+(a) User sets the limit and no previous resource pool exist for the device
+of interest for the cgroup.
+(b) No resource limits were configured, but IB/RDMA stack tries to
+charge the resource. So that it correctly uncharge them when applications are
+running without limits and later on when limits are enforced during uncharging,
+otherwise usage count will drop to negative.
+
+Resource pool is destroyed if all the resource limits are set to max and
+it is the last resource getting deallocated.
+
+User should set all the limit to max value if it intents to remove/unconfigure
+the resource pool for a particular device.
+
+IB stack honors limits enforced by the rdma controller. When application
+query about maximum resource limits of IB device, it returns minimum of
+what is configured by user for a given cgroup and what is supported by
+IB device.
+
+Following resources can be accounted by rdma controller.
+
+  ==========    =============================
+  hca_handle	Maximum number of HCA Handles
+  hca_object 	Maximum number of HCA Objects
+  ==========    =============================
+
+2. Usage Examples
+=================
+
+(a) Configure resource limit::
+
+	echo mlx4_0 hca_handle=2 hca_object=2000 > /sys/fs/cgroup/rdma/1/rdma.max
+	echo ocrdma1 hca_handle=3 > /sys/fs/cgroup/rdma/2/rdma.max
+
+(b) Query resource limit::
+
+	cat /sys/fs/cgroup/rdma/2/rdma.max
+	#Output:
+	mlx4_0 hca_handle=2 hca_object=2000
+	ocrdma1 hca_handle=3 hca_object=max
+
+(c) Query current usage::
+
+	cat /sys/fs/cgroup/rdma/2/rdma.current
+	#Output:
+	mlx4_0 hca_handle=1 hca_object=20
+	ocrdma1 hca_handle=1 hca_object=23
+
+(d) Delete resource limit::
+
+	echo echo mlx4_0 hca_handle=max hca_object=max > /sys/fs/cgroup/rdma/1/rdma.max
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 080b18ce2a5d..ed4c5977d6e1 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -9,7 +9,7 @@ This is the authoritative documentation on the design, interface and
 conventions of cgroup v2.  It describes all userland-visible aspects
 of cgroup including core and specific controller behaviors.  All
 future changes must be reflected in this document.  Documentation for
-v1 is available under Documentation/cgroup-v1/.
+v1 is available under Documentation/admin-guide/cgroup-v1/.
 
 .. CONTENTS
 
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 1f0d9b939311..a5fdb1a846ce 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -59,6 +59,7 @@ configure specific aspects of kernel behavior to your liking.
 
    initrd
    cgroup-v2
+   cgroup-v1/index
    serial-console
    braille-console
    parport
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 78576aa45cce..a571a67e0c85 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4089,7 +4089,7 @@
 
 	relax_domain_level=
 			[KNL, SMP] Set scheduler's default relax_domain_level.
-			See Documentation/cgroup-v1/cpusets.rst.
+			See Documentation/admin-guide/cgroup-v1/cpusets.rst.
 
 	reserve=	[KNL,BUGS] Force kernel to ignore I/O ports or memory
 			Format: <base1>,<size1>[,<base2>,<size2>,...]
@@ -4599,7 +4599,7 @@
 	swapaccount=[0|1]
 			[KNL] Enable accounting of swap in memory resource
 			controller if no parameter or 1 is given or disable
-			it if 0 is given (See Documentation/cgroup-v1/memory.rst)
+			it if 0 is given (See Documentation/admin-guide/cgroup-v1/memory.rst)
 
 	swiotlb=	[ARM,IA-64,PPC,MIPS,X86]
 			Format: { <int> | force | noforce }
diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst
index 546f174e5d6a..8463f5538fda 100644
--- a/Documentation/admin-guide/mm/numa_memory_policy.rst
+++ b/Documentation/admin-guide/mm/numa_memory_policy.rst
@@ -15,7 +15,7 @@ document attempts to describe the concepts and APIs of the 2.6 memory policy
 support.
 
 Memory policies should not be confused with cpusets
-(``Documentation/cgroup-v1/cpusets.rst``)
+(``Documentation/admin-guide/cgroup-v1/cpusets.rst``)
 which is an administrative mechanism for restricting the nodes from which
 memory may be allocated by a set of processes. Memory policies are a
 programming interface that a NUMA-aware application can take advantage of.  When
diff --git a/Documentation/block/bfq-iosched.rst b/Documentation/block/bfq-iosched.rst
index 2c13b2fc1888..0d237d402860 100644
--- a/Documentation/block/bfq-iosched.rst
+++ b/Documentation/block/bfq-iosched.rst
@@ -547,7 +547,7 @@ As for cgroups-v1 (blkio controller), the exact set of stat files
 created, and kept up-to-date by bfq, depends on whether
 CONFIG_BFQ_CGROUP_DEBUG is set. If it is set, then bfq creates all
 the stat files documented in
-Documentation/cgroup-v1/blkio-controller.rst. If, instead,
+Documentation/admin-guide/cgroup-v1/blkio-controller.rst. If, instead,
 CONFIG_BFQ_CGROUP_DEBUG is not set, then bfq creates only the files::
 
   blkio.bfq.io_service_bytes
diff --git a/Documentation/cgroup-v1/blkio-controller.rst b/Documentation/cgroup-v1/blkio-controller.rst
deleted file mode 100644
index 1d7d962933be..000000000000
--- a/Documentation/cgroup-v1/blkio-controller.rst
+++ /dev/null
@@ -1,302 +0,0 @@
-===================
-Block IO Controller
-===================
-
-Overview
-========
-cgroup subsys "blkio" implements the block io controller. There seems to be
-a need of various kinds of IO control policies (like proportional BW, max BW)
-both at leaf nodes as well as at intermediate nodes in a storage hierarchy.
-Plan is to use the same cgroup based management interface for blkio controller
-and based on user options switch IO policies in the background.
-
-One IO control policy is throttling policy which can be used to
-specify upper IO rate limits on devices. This policy is implemented in
-generic block layer and can be used on leaf nodes as well as higher
-level logical devices like device mapper.
-
-HOWTO
-=====
-Throttling/Upper Limit policy
------------------------------
-- Enable Block IO controller::
-
-	CONFIG_BLK_CGROUP=y
-
-- Enable throttling in block layer::
-
-	CONFIG_BLK_DEV_THROTTLING=y
-
-- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)::
-
-        mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
-
-- Specify a bandwidth rate on particular device for root group. The format
-  for policy is "<major>:<minor>  <bytes_per_second>"::
-
-        echo "8:16  1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
-
-  Above will put a limit of 1MB/second on reads happening for root group
-  on device having major/minor number 8:16.
-
-- Run dd to read a file and see if rate is throttled to 1MB/s or not::
-
-        # dd iflag=direct if=/mnt/common/zerofile of=/dev/null bs=4K count=1024
-        1024+0 records in
-        1024+0 records out
-        4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
-
- Limits for writes can be put using blkio.throttle.write_bps_device file.
-
-Hierarchical Cgroups
-====================
-
-Throttling implements hierarchy support; however,
-throttling's hierarchy support is enabled iff "sane_behavior" is
-enabled from cgroup side, which currently is a development option and
-not publicly available.
-
-If somebody created a hierarchy like as follows::
-
-			root
-			/  \
-		     test1 test2
-			|
-		     test3
-
-Throttling with "sane_behavior" will handle the
-hierarchy correctly. For throttling, all limits apply
-to the whole subtree while all statistics are local to the IOs
-directly generated by tasks in that cgroup.
-
-Throttling without "sane_behavior" enabled from cgroup side will
-practically treat all groups at same level as if it looks like the
-following::
-
-				pivot
-			     /  /   \  \
-			root  test1 test2  test3
-
-Various user visible config options
-===================================
-CONFIG_BLK_CGROUP
-	- Block IO controller.
-
-CONFIG_BFQ_CGROUP_DEBUG
-	- Debug help. Right now some additional stats file show up in cgroup
-	  if this option is enabled.
-
-CONFIG_BLK_DEV_THROTTLING
-	- Enable block device throttling support in block layer.
-
-Details of cgroup files
-=======================
-Proportional weight policy files
---------------------------------
-- blkio.weight
-	- Specifies per cgroup weight. This is default weight of the group
-	  on all the devices until and unless overridden by per device rule.
-	  (See blkio.weight_device).
-	  Currently allowed range of weights is from 10 to 1000.
-
-- blkio.weight_device
-	- One can specify per cgroup per device rules using this interface.
-	  These rules override the default value of group weight as specified
-	  by blkio.weight.
-
-	  Following is the format::
-
-	    # echo dev_maj:dev_minor weight > blkio.weight_device
-
-	  Configure weight=300 on /dev/sdb (8:16) in this cgroup::
-
-	    # echo 8:16 300 > blkio.weight_device
-	    # cat blkio.weight_device
-	    dev     weight
-	    8:16    300
-
-	  Configure weight=500 on /dev/sda (8:0) in this cgroup::
-
-	    # echo 8:0 500 > blkio.weight_device
-	    # cat blkio.weight_device
-	    dev     weight
-	    8:0     500
-	    8:16    300
-
-	  Remove specific weight for /dev/sda in this cgroup::
-
-	    # echo 8:0 0 > blkio.weight_device
-	    # cat blkio.weight_device
-	    dev     weight
-	    8:16    300
-
-- blkio.leaf_weight[_device]
-	- Equivalents of blkio.weight[_device] for the purpose of
-          deciding how much weight tasks in the given cgroup has while
-          competing with the cgroup's child cgroups. For details,
-          please refer to Documentation/block/cfq-iosched.txt.
-
-- blkio.time
-	- disk time allocated to cgroup per device in milliseconds. First
-	  two fields specify the major and minor number of the device and
-	  third field specifies the disk time allocated to group in
-	  milliseconds.
-
-- blkio.sectors
-	- number of sectors transferred to/from disk by the group. First
-	  two fields specify the major and minor number of the device and
-	  third field specifies the number of sectors transferred by the
-	  group to/from the device.
-
-- blkio.io_service_bytes
-	- Number of bytes transferred to/from the disk by the group. These
-	  are further divided by the type of operation - read or write, sync
-	  or async. First two fields specify the major and minor number of the
-	  device, third field specifies the operation type and the fourth field
-	  specifies the number of bytes.
-
-- blkio.io_serviced
-	- Number of IOs (bio) issued to the disk by the group. These
-	  are further divided by the type of operation - read or write, sync
-	  or async. First two fields specify the major and minor number of the
-	  device, third field specifies the operation type and the fourth field
-	  specifies the number of IOs.
-
-- blkio.io_service_time
-	- Total amount of time between request dispatch and request completion
-	  for the IOs done by this cgroup. This is in nanoseconds to make it
-	  meaningful for flash devices too. For devices with queue depth of 1,
-	  this time represents the actual service time. When queue_depth > 1,
-	  that is no longer true as requests may be served out of order. This
-	  may cause the service time for a given IO to include the service time
-	  of multiple IOs when served out of order which may result in total
-	  io_service_time > actual time elapsed. This time is further divided by
-	  the type of operation - read or write, sync or async. First two fields
-	  specify the major and minor number of the device, third field
-	  specifies the operation type and the fourth field specifies the
-	  io_service_time in ns.
-
-- blkio.io_wait_time
-	- Total amount of time the IOs for this cgroup spent waiting in the
-	  scheduler queues for service. This can be greater than the total time
-	  elapsed since it is cumulative io_wait_time for all IOs. It is not a
-	  measure of total time the cgroup spent waiting but rather a measure of
-	  the wait_time for its individual IOs. For devices with queue_depth > 1
-	  this metric does not include the time spent waiting for service once
-	  the IO is dispatched to the device but till it actually gets serviced
-	  (there might be a time lag here due to re-ordering of requests by the
-	  device). This is in nanoseconds to make it meaningful for flash
-	  devices too. This time is further divided by the type of operation -
-	  read or write, sync or async. First two fields specify the major and
-	  minor number of the device, third field specifies the operation type
-	  and the fourth field specifies the io_wait_time in ns.
-
-- blkio.io_merged
-	- Total number of bios/requests merged into requests belonging to this
-	  cgroup. This is further divided by the type of operation - read or
-	  write, sync or async.
-
-- blkio.io_queued
-	- Total number of requests queued up at any given instant for this
-	  cgroup. This is further divided by the type of operation - read or
-	  write, sync or async.
-
-- blkio.avg_queue_size
-	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
-	  The average queue size for this cgroup over the entire time of this
-	  cgroup's existence. Queue size samples are taken each time one of the
-	  queues of this cgroup gets a timeslice.
-
-- blkio.group_wait_time
-	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
-	  This is the amount of time the cgroup had to wait since it became busy
-	  (i.e., went from 0 to 1 request queued) to get a timeslice for one of
-	  its queues. This is different from the io_wait_time which is the
-	  cumulative total of the amount of time spent by each IO in that cgroup
-	  waiting in the scheduler queue. This is in nanoseconds. If this is
-	  read when the cgroup is in a waiting (for timeslice) state, the stat
-	  will only report the group_wait_time accumulated till the last time it
-	  got a timeslice and will not include the current delta.
-
-- blkio.empty_time
-	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
-	  This is the amount of time a cgroup spends without any pending
-	  requests when not being served, i.e., it does not include any time
-	  spent idling for one of the queues of the cgroup. This is in
-	  nanoseconds. If this is read when the cgroup is in an empty state,
-	  the stat will only report the empty_time accumulated till the last
-	  time it had a pending request and will not include the current delta.
-
-- blkio.idle_time
-	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
-	  This is the amount of time spent by the IO scheduler idling for a
-	  given cgroup in anticipation of a better request than the existing ones
-	  from other queues/cgroups. This is in nanoseconds. If this is read
-	  when the cgroup is in an idling state, the stat will only report the
-	  idle_time accumulated till the last idle period and will not include
-	  the current delta.
-
-- blkio.dequeue
-	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y. This
-	  gives the statistics about how many a times a group was dequeued
-	  from service tree of the device. First two fields specify the major
-	  and minor number of the device and third field specifies the number
-	  of times a group was dequeued from a particular device.
-
-- blkio.*_recursive
-	- Recursive version of various stats. These files show the
-          same information as their non-recursive counterparts but
-          include stats from all the descendant cgroups.
-
-Throttling/Upper limit policy files
------------------------------------
-- blkio.throttle.read_bps_device
-	- Specifies upper limit on READ rate from the device. IO rate is
-	  specified in bytes per second. Rules are per device. Following is
-	  the format::
-
-	    echo "<major>:<minor>  <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
-
-- blkio.throttle.write_bps_device
-	- Specifies upper limit on WRITE rate to the device. IO rate is
-	  specified in bytes per second. Rules are per device. Following is
-	  the format::
-
-	    echo "<major>:<minor>  <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
-
-- blkio.throttle.read_iops_device
-	- Specifies upper limit on READ rate from the device. IO rate is
-	  specified in IO per second. Rules are per device. Following is
-	  the format::
-
-	   echo "<major>:<minor>  <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
-
-- blkio.throttle.write_iops_device
-	- Specifies upper limit on WRITE rate to the device. IO rate is
-	  specified in io per second. Rules are per device. Following is
-	  the format::
-
-	    echo "<major>:<minor>  <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
-
-Note: If both BW and IOPS rules are specified for a device, then IO is
-      subjected to both the constraints.
-
-- blkio.throttle.io_serviced
-	- Number of IOs (bio) issued to the disk by the group. These
-	  are further divided by the type of operation - read or write, sync
-	  or async. First two fields specify the major and minor number of the
-	  device, third field specifies the operation type and the fourth field
-	  specifies the number of IOs.
-
-- blkio.throttle.io_service_bytes
-	- Number of bytes transferred to/from the disk by the group. These
-	  are further divided by the type of operation - read or write, sync
-	  or async. First two fields specify the major and minor number of the
-	  device, third field specifies the operation type and the fourth field
-	  specifies the number of bytes.
-
-Common files among various policies
------------------------------------
-- blkio.reset_stats
-	- Writing an int to this file will result in resetting all the stats
-	  for that cgroup.
diff --git a/Documentation/cgroup-v1/cgroups.rst b/Documentation/cgroup-v1/cgroups.rst
deleted file mode 100644
index 46bbe7e022d4..000000000000
--- a/Documentation/cgroup-v1/cgroups.rst
+++ /dev/null
@@ -1,695 +0,0 @@
-==============
-Control Groups
-==============
-
-Written by Paul Menage <menage@google.com> based on
-Documentation/cgroup-v1/cpusets.rst
-
-Original copyright statements from cpusets.txt:
-
-Portions Copyright (C) 2004 BULL SA.
-
-Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
-
-Modified by Paul Jackson <pj@sgi.com>
-
-Modified by Christoph Lameter <cl@linux.com>
-
-.. CONTENTS:
-
-	1. Control Groups
-	1.1 What are cgroups ?
-	1.2 Why are cgroups needed ?
-	1.3 How are cgroups implemented ?
-	1.4 What does notify_on_release do ?
-	1.5 What does clone_children do ?
-	1.6 How do I use cgroups ?
-	2. Usage Examples and Syntax
-	2.1 Basic Usage
-	2.2 Attaching processes
-	2.3 Mounting hierarchies by name
-	3. Kernel API
-	3.1 Overview
-	3.2 Synchronization
-	3.3 Subsystem API
-	4. Extended attributes usage
-	5. Questions
-
-1. Control Groups
-=================
-
-1.1 What are cgroups ?
-----------------------
-
-Control Groups provide a mechanism for aggregating/partitioning sets of
-tasks, and all their future children, into hierarchical groups with
-specialized behaviour.
-
-Definitions:
-
-A *cgroup* associates a set of tasks with a set of parameters for one
-or more subsystems.
-
-A *subsystem* is a module that makes use of the task grouping
-facilities provided by cgroups to treat groups of tasks in
-particular ways. A subsystem is typically a "resource controller" that
-schedules a resource or applies per-cgroup limits, but it may be
-anything that wants to act on a group of processes, e.g. a
-virtualization subsystem.
-
-A *hierarchy* is a set of cgroups arranged in a tree, such that
-every task in the system is in exactly one of the cgroups in the
-hierarchy, and a set of subsystems; each subsystem has system-specific
-state attached to each cgroup in the hierarchy.  Each hierarchy has
-an instance of the cgroup virtual filesystem associated with it.
-
-At any one time there may be multiple active hierarchies of task
-cgroups. Each hierarchy is a partition of all tasks in the system.
-
-User-level code may create and destroy cgroups by name in an
-instance of the cgroup virtual file system, specify and query to
-which cgroup a task is assigned, and list the task PIDs assigned to
-a cgroup. Those creations and assignments only affect the hierarchy
-associated with that instance of the cgroup file system.
-
-On their own, the only use for cgroups is for simple job
-tracking. The intention is that other subsystems hook into the generic
-cgroup support to provide new attributes for cgroups, such as
-accounting/limiting the resources which processes in a cgroup can
-access. For example, cpusets (see Documentation/cgroup-v1/cpusets.rst) allow
-you to associate a set of CPUs and a set of memory nodes with the
-tasks in each cgroup.
-
-1.2 Why are cgroups needed ?
-----------------------------
-
-There are multiple efforts to provide process aggregations in the
-Linux kernel, mainly for resource-tracking purposes. Such efforts
-include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
-namespaces. These all require the basic notion of a
-grouping/partitioning of processes, with newly forked processes ending
-up in the same group (cgroup) as their parent process.
-
-The kernel cgroup patch provides the minimum essential kernel
-mechanisms required to efficiently implement such groups. It has
-minimal impact on the system fast paths, and provides hooks for
-specific subsystems such as cpusets to provide additional behaviour as
-desired.
-
-Multiple hierarchy support is provided to allow for situations where
-the division of tasks into cgroups is distinctly different for
-different subsystems - having parallel hierarchies allows each
-hierarchy to be a natural division of tasks, without having to handle
-complex combinations of tasks that would be present if several
-unrelated subsystems needed to be forced into the same tree of
-cgroups.
-
-At one extreme, each resource controller or subsystem could be in a
-separate hierarchy; at the other extreme, all subsystems
-would be attached to the same hierarchy.
-
-As an example of a scenario (originally proposed by vatsa@in.ibm.com)
-that can benefit from multiple hierarchies, consider a large
-university server with various users - students, professors, system
-tasks etc. The resource planning for this server could be along the
-following lines::
-
-       CPU :          "Top cpuset"
-                       /       \
-               CPUSet1         CPUSet2
-                  |               |
-               (Professors)    (Students)
-
-               In addition (system tasks) are attached to topcpuset (so
-               that they can run anywhere) with a limit of 20%
-
-       Memory : Professors (50%), Students (30%), system (20%)
-
-       Disk : Professors (50%), Students (30%), system (20%)
-
-       Network : WWW browsing (20%), Network File System (60%), others (20%)
-                               / \
-               Professors (15%)  students (5%)
-
-Browsers like Firefox/Lynx go into the WWW network class, while (k)nfsd goes
-into the NFS network class.
-
-At the same time Firefox/Lynx will share an appropriate CPU/Memory class
-depending on who launched it (prof/student).
-
-With the ability to classify tasks differently for different resources
-(by putting those resource subsystems in different hierarchies),
-the admin can easily set up a script which receives exec notifications
-and depending on who is launching the browser he can::
-
-    # echo browser_pid > /sys/fs/cgroup/<restype>/<userclass>/tasks
-
-With only a single hierarchy, he now would potentially have to create
-a separate cgroup for every browser launched and associate it with
-appropriate network and other resource class.  This may lead to
-proliferation of such cgroups.
-
-Also let's say that the administrator would like to give enhanced network
-access temporarily to a student's browser (since it is night and the user
-wants to do online gaming :))  OR give one of the student's simulation
-apps enhanced CPU power.
-
-With ability to write PIDs directly to resource classes, it's just a
-matter of::
-
-       # echo pid > /sys/fs/cgroup/network/<new_class>/tasks
-       (after some time)
-       # echo pid > /sys/fs/cgroup/network/<orig_class>/tasks
-
-Without this ability, the administrator would have to split the cgroup into
-multiple separate ones and then associate the new cgroups with the
-new resource classes.
-
-
-
-1.3 How are cgroups implemented ?
----------------------------------
-
-Control Groups extends the kernel as follows:
-
- - Each task in the system has a reference-counted pointer to a
-   css_set.
-
- - A css_set contains a set of reference-counted pointers to
-   cgroup_subsys_state objects, one for each cgroup subsystem
-   registered in the system. There is no direct link from a task to
-   the cgroup of which it's a member in each hierarchy, but this
-   can be determined by following pointers through the
-   cgroup_subsys_state objects. This is because accessing the
-   subsystem state is something that's expected to happen frequently
-   and in performance-critical code, whereas operations that require a
-   task's actual cgroup assignments (in particular, moving between
-   cgroups) are less common. A linked list runs through the cg_list
-   field of each task_struct using the css_set, anchored at
-   css_set->tasks.
-
- - A cgroup hierarchy filesystem can be mounted for browsing and
-   manipulation from user space.
-
- - You can list all the tasks (by PID) attached to any cgroup.
-
-The implementation of cgroups requires a few, simple hooks
-into the rest of the kernel, none in performance-critical paths:
-
- - in init/main.c, to initialize the root cgroups and initial
-   css_set at system boot.
-
- - in fork and exit, to attach and detach a task from its css_set.
-
-In addition, a new file system of type "cgroup" may be mounted, to
-enable browsing and modifying the cgroups presently known to the
-kernel.  When mounting a cgroup hierarchy, you may specify a
-comma-separated list of subsystems to mount as the filesystem mount
-options.  By default, mounting the cgroup filesystem attempts to
-mount a hierarchy containing all registered subsystems.
-
-If an active hierarchy with exactly the same set of subsystems already
-exists, it will be reused for the new mount. If no existing hierarchy
-matches, and any of the requested subsystems are in use in an existing
-hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
-is activated, associated with the requested subsystems.
-
-It's not currently possible to bind a new subsystem to an active
-cgroup hierarchy, or to unbind a subsystem from an active cgroup
-hierarchy. This may be possible in future, but is fraught with nasty
-error-recovery issues.
-
-When a cgroup filesystem is unmounted, if there are any
-child cgroups created below the top-level cgroup, that hierarchy
-will remain active even though unmounted; if there are no
-child cgroups then the hierarchy will be deactivated.
-
-No new system calls are added for cgroups - all support for
-querying and modifying cgroups is via this cgroup file system.
-
-Each task under /proc has an added file named 'cgroup' displaying,
-for each active hierarchy, the subsystem names and the cgroup name
-as the path relative to the root of the cgroup file system.
-
-Each cgroup is represented by a directory in the cgroup file system
-containing the following files describing that cgroup:
-
- - tasks: list of tasks (by PID) attached to that cgroup.  This list
-   is not guaranteed to be sorted.  Writing a thread ID into this file
-   moves the thread into this cgroup.
- - cgroup.procs: list of thread group IDs in the cgroup.  This list is
-   not guaranteed to be sorted or free of duplicate TGIDs, and userspace
-   should sort/uniquify the list if this property is required.
-   Writing a thread group ID into this file moves all threads in that
-   group into this cgroup.
- - notify_on_release flag: run the release agent on exit?
- - release_agent: the path to use for release notifications (this file
-   exists in the top cgroup only)
-
-Other subsystems such as cpusets may add additional files in each
-cgroup dir.
-
-New cgroups are created using the mkdir system call or shell
-command.  The properties of a cgroup, such as its flags, are
-modified by writing to the appropriate file in that cgroups
-directory, as listed above.
-
-The named hierarchical structure of nested cgroups allows partitioning
-a large system into nested, dynamically changeable, "soft-partitions".
-
-The attachment of each task, automatically inherited at fork by any
-children of that task, to a cgroup allows organizing the work load
-on a system into related sets of tasks.  A task may be re-attached to
-any other cgroup, if allowed by the permissions on the necessary
-cgroup file system directories.
-
-When a task is moved from one cgroup to another, it gets a new
-css_set pointer - if there's an already existing css_set with the
-desired collection of cgroups then that group is reused, otherwise a new
-css_set is allocated. The appropriate existing css_set is located by
-looking into a hash table.
-
-To allow access from a cgroup to the css_sets (and hence tasks)
-that comprise it, a set of cg_cgroup_link objects form a lattice;
-each cg_cgroup_link is linked into a list of cg_cgroup_links for
-a single cgroup on its cgrp_link_list field, and a list of
-cg_cgroup_links for a single css_set on its cg_link_list.
-
-Thus the set of tasks in a cgroup can be listed by iterating over
-each css_set that references the cgroup, and sub-iterating over
-each css_set's task set.
-
-The use of a Linux virtual file system (vfs) to represent the
-cgroup hierarchy provides for a familiar permission and name space
-for cgroups, with a minimum of additional kernel code.
-
-1.4 What does notify_on_release do ?
-------------------------------------
-
-If the notify_on_release flag is enabled (1) in a cgroup, then
-whenever the last task in the cgroup leaves (exits or attaches to
-some other cgroup) and the last child cgroup of that cgroup
-is removed, then the kernel runs the command specified by the contents
-of the "release_agent" file in that hierarchy's root directory,
-supplying the pathname (relative to the mount point of the cgroup
-file system) of the abandoned cgroup.  This enables automatic
-removal of abandoned cgroups.  The default value of
-notify_on_release in the root cgroup at system boot is disabled
-(0).  The default value of other cgroups at creation is the current
-value of their parents' notify_on_release settings. The default value of
-a cgroup hierarchy's release_agent path is empty.
-
-1.5 What does clone_children do ?
----------------------------------
-
-This flag only affects the cpuset controller. If the clone_children
-flag is enabled (1) in a cgroup, a new cpuset cgroup will copy its
-configuration from the parent during initialization.
-
-1.6 How do I use cgroups ?
---------------------------
-
-To start a new job that is to be contained within a cgroup, using
-the "cpuset" cgroup subsystem, the steps are something like::
-
- 1) mount -t tmpfs cgroup_root /sys/fs/cgroup
- 2) mkdir /sys/fs/cgroup/cpuset
- 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
- 4) Create the new cgroup by doing mkdir's and write's (or echo's) in
-    the /sys/fs/cgroup/cpuset virtual file system.
- 5) Start a task that will be the "founding father" of the new job.
- 6) Attach that task to the new cgroup by writing its PID to the
-    /sys/fs/cgroup/cpuset tasks file for that cgroup.
- 7) fork, exec or clone the job tasks from this founding father task.
-
-For example, the following sequence of commands will setup a cgroup
-named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
-and then start a subshell 'sh' in that cgroup::
-
-  mount -t tmpfs cgroup_root /sys/fs/cgroup
-  mkdir /sys/fs/cgroup/cpuset
-  mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset
-  cd /sys/fs/cgroup/cpuset
-  mkdir Charlie
-  cd Charlie
-  /bin/echo 2-3 > cpuset.cpus
-  /bin/echo 1 > cpuset.mems
-  /bin/echo $$ > tasks
-  sh
-  # The subshell 'sh' is now running in cgroup Charlie
-  # The next line should display '/Charlie'
-  cat /proc/self/cgroup
-
-2. Usage Examples and Syntax
-============================
-
-2.1 Basic Usage
----------------
-
-Creating, modifying, using cgroups can be done through the cgroup
-virtual filesystem.
-
-To mount a cgroup hierarchy with all available subsystems, type::
-
-  # mount -t cgroup xxx /sys/fs/cgroup
-
-The "xxx" is not interpreted by the cgroup code, but will appear in
-/proc/mounts so may be any useful identifying string that you like.
-
-Note: Some subsystems do not work without some user input first.  For instance,
-if cpusets are enabled the user will have to populate the cpus and mems files
-for each new cgroup created before that group can be used.
-
-As explained in section `1.2 Why are cgroups needed?` you should create
-different hierarchies of cgroups for each single resource or group of
-resources you want to control. Therefore, you should mount a tmpfs on
-/sys/fs/cgroup and create directories for each cgroup resource or resource
-group::
-
-  # mount -t tmpfs cgroup_root /sys/fs/cgroup
-  # mkdir /sys/fs/cgroup/rg1
-
-To mount a cgroup hierarchy with just the cpuset and memory
-subsystems, type::
-
-  # mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1
-
-While remounting cgroups is currently supported, it is not recommend
-to use it. Remounting allows changing bound subsystems and
-release_agent. Rebinding is hardly useful as it only works when the
-hierarchy is empty and release_agent itself should be replaced with
-conventional fsnotify. The support for remounting will be removed in
-the future.
-
-To Specify a hierarchy's release_agent::
-
-  # mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
-    xxx /sys/fs/cgroup/rg1
-
-Note that specifying 'release_agent' more than once will return failure.
-
-Note that changing the set of subsystems is currently only supported
-when the hierarchy consists of a single (root) cgroup. Supporting
-the ability to arbitrarily bind/unbind subsystems from an existing
-cgroup hierarchy is intended to be implemented in the future.
-
-Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the
-tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1
-is the cgroup that holds the whole system.
-
-If you want to change the value of release_agent::
-
-  # echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent
-
-It can also be changed via remount.
-
-If you want to create a new cgroup under /sys/fs/cgroup/rg1::
-
-  # cd /sys/fs/cgroup/rg1
-  # mkdir my_cgroup
-
-Now you want to do something with this cgroup:
-
-  # cd my_cgroup
-
-In this directory you can find several files::
-
-  # ls
-  cgroup.procs notify_on_release tasks
-  (plus whatever files added by the attached subsystems)
-
-Now attach your shell to this cgroup::
-
-  # /bin/echo $$ > tasks
-
-You can also create cgroups inside your cgroup by using mkdir in this
-directory::
-
-  # mkdir my_sub_cs
-
-To remove a cgroup, just use rmdir::
-
-  # rmdir my_sub_cs
-
-This will fail if the cgroup is in use (has cgroups inside, or
-has processes attached, or is held alive by other subsystem-specific
-reference).
-
-2.2 Attaching processes
------------------------
-
-::
-
-  # /bin/echo PID > tasks
-
-Note that it is PID, not PIDs. You can only attach ONE task at a time.
-If you have several tasks to attach, you have to do it one after another::
-
-  # /bin/echo PID1 > tasks
-  # /bin/echo PID2 > tasks
-	  ...
-  # /bin/echo PIDn > tasks
-
-You can attach the current shell task by echoing 0::
-
-  # echo 0 > tasks
-
-You can use the cgroup.procs file instead of the tasks file to move all
-threads in a threadgroup at once. Echoing the PID of any task in a
-threadgroup to cgroup.procs causes all tasks in that threadgroup to be
-attached to the cgroup. Writing 0 to cgroup.procs moves all tasks
-in the writing task's threadgroup.
-
-Note: Since every task is always a member of exactly one cgroup in each
-mounted hierarchy, to remove a task from its current cgroup you must
-move it into a new cgroup (possibly the root cgroup) by writing to the
-new cgroup's tasks file.
-
-Note: Due to some restrictions enforced by some cgroup subsystems, moving
-a process to another cgroup can fail.
-
-2.3 Mounting hierarchies by name
---------------------------------
-
-Passing the name=<x> option when mounting a cgroups hierarchy
-associates the given name with the hierarchy.  This can be used when
-mounting a pre-existing hierarchy, in order to refer to it by name
-rather than by its set of active subsystems.  Each hierarchy is either
-nameless, or has a unique name.
-
-The name should match [\w.-]+
-
-When passing a name=<x> option for a new hierarchy, you need to
-specify subsystems manually; the legacy behaviour of mounting all
-subsystems when none are explicitly specified is not supported when
-you give a subsystem a name.
-
-The name of the subsystem appears as part of the hierarchy description
-in /proc/mounts and /proc/<pid>/cgroups.
-
-
-3. Kernel API
-=============
-
-3.1 Overview
-------------
-
-Each kernel subsystem that wants to hook into the generic cgroup
-system needs to create a cgroup_subsys object. This contains
-various methods, which are callbacks from the cgroup system, along
-with a subsystem ID which will be assigned by the cgroup system.
-
-Other fields in the cgroup_subsys object include:
-
-- subsys_id: a unique array index for the subsystem, indicating which
-  entry in cgroup->subsys[] this subsystem should be managing.
-
-- name: should be initialized to a unique subsystem name. Should be
-  no longer than MAX_CGROUP_TYPE_NAMELEN.
-
-- early_init: indicate if the subsystem needs early initialization
-  at system boot.
-
-Each cgroup object created by the system has an array of pointers,
-indexed by subsystem ID; this pointer is entirely managed by the
-subsystem; the generic cgroup code will never touch this pointer.
-
-3.2 Synchronization
--------------------
-
-There is a global mutex, cgroup_mutex, used by the cgroup
-system. This should be taken by anything that wants to modify a
-cgroup. It may also be taken to prevent cgroups from being
-modified, but more specific locks may be more appropriate in that
-situation.
-
-See kernel/cgroup.c for more details.
-
-Subsystems can take/release the cgroup_mutex via the functions
-cgroup_lock()/cgroup_unlock().
-
-Accessing a task's cgroup pointer may be done in the following ways:
-- while holding cgroup_mutex
-- while holding the task's alloc_lock (via task_lock())
-- inside an rcu_read_lock() section via rcu_dereference()
-
-3.3 Subsystem API
------------------
-
-Each subsystem should:
-
-- add an entry in linux/cgroup_subsys.h
-- define a cgroup_subsys object called <name>_cgrp_subsys
-
-Each subsystem may export the following methods. The only mandatory
-methods are css_alloc/free. Any others that are null are presumed to
-be successful no-ops.
-
-``struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp)``
-(cgroup_mutex held by caller)
-
-Called to allocate a subsystem state object for a cgroup. The
-subsystem should allocate its subsystem state object for the passed
-cgroup, returning a pointer to the new object on success or a
-ERR_PTR() value. On success, the subsystem pointer should point to
-a structure of type cgroup_subsys_state (typically embedded in a
-larger subsystem-specific object), which will be initialized by the
-cgroup system. Note that this will be called at initialization to
-create the root subsystem state for this subsystem; this case can be
-identified by the passed cgroup object having a NULL parent (since
-it's the root of the hierarchy) and may be an appropriate place for
-initialization code.
-
-``int css_online(struct cgroup *cgrp)``
-(cgroup_mutex held by caller)
-
-Called after @cgrp successfully completed all allocations and made
-visible to cgroup_for_each_child/descendant_*() iterators. The
-subsystem may choose to fail creation by returning -errno. This
-callback can be used to implement reliable state sharing and
-propagation along the hierarchy. See the comment on
-cgroup_for_each_descendant_pre() for details.
-
-``void css_offline(struct cgroup *cgrp);``
-(cgroup_mutex held by caller)
-
-This is the counterpart of css_online() and called iff css_online()
-has succeeded on @cgrp. This signifies the beginning of the end of
-@cgrp. @cgrp is being removed and the subsystem should start dropping
-all references it's holding on @cgrp. When all references are dropped,
-cgroup removal will proceed to the next step - css_free(). After this
-callback, @cgrp should be considered dead to the subsystem.
-
-``void css_free(struct cgroup *cgrp)``
-(cgroup_mutex held by caller)
-
-The cgroup system is about to free @cgrp; the subsystem should free
-its subsystem state object. By the time this method is called, @cgrp
-is completely unused; @cgrp->parent is still valid. (Note - can also
-be called for a newly-created cgroup if an error occurs after this
-subsystem's create() method has been called for the new cgroup).
-
-``int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
-(cgroup_mutex held by caller)
-
-Called prior to moving one or more tasks into a cgroup; if the
-subsystem returns an error, this will abort the attach operation.
-@tset contains the tasks to be attached and is guaranteed to have at
-least one task in it.
-
-If there are multiple tasks in the taskset, then:
-  - it's guaranteed that all are from the same thread group
-  - @tset contains all tasks from the thread group whether or not
-    they're switching cgroups
-  - the first task is the leader
-
-Each @tset entry also contains the task's old cgroup and tasks which
-aren't switching cgroup can be skipped easily using the
-cgroup_taskset_for_each() iterator. Note that this isn't called on a
-fork. If this method returns 0 (success) then this should remain valid
-while the caller holds cgroup_mutex and it is ensured that either
-attach() or cancel_attach() will be called in future.
-
-``void css_reset(struct cgroup_subsys_state *css)``
-(cgroup_mutex held by caller)
-
-An optional operation which should restore @css's configuration to the
-initial state.  This is currently only used on the unified hierarchy
-when a subsystem is disabled on a cgroup through
-"cgroup.subtree_control" but should remain enabled because other
-subsystems depend on it.  cgroup core makes such a css invisible by
-removing the associated interface files and invokes this callback so
-that the hidden subsystem can return to the initial neutral state.
-This prevents unexpected resource control from a hidden css and
-ensures that the configuration is in the initial state when it is made
-visible again later.
-
-``void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
-(cgroup_mutex held by caller)
-
-Called when a task attach operation has failed after can_attach() has succeeded.
-A subsystem whose can_attach() has some side-effects should provide this
-function, so that the subsystem can implement a rollback. If not, not necessary.
-This will be called only about subsystems whose can_attach() operation have
-succeeded. The parameters are identical to can_attach().
-
-``void attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
-(cgroup_mutex held by caller)
-
-Called after the task has been attached to the cgroup, to allow any
-post-attachment activity that requires memory allocations or blocking.
-The parameters are identical to can_attach().
-
-``void fork(struct task_struct *task)``
-
-Called when a task is forked into a cgroup.
-
-``void exit(struct task_struct *task)``
-
-Called during task exit.
-
-``void free(struct task_struct *task)``
-
-Called when the task_struct is freed.
-
-``void bind(struct cgroup *root)``
-(cgroup_mutex held by caller)
-
-Called when a cgroup subsystem is rebound to a different hierarchy
-and root cgroup. Currently this will only involve movement between
-the default hierarchy (which never has sub-cgroups) and a hierarchy
-that is being created/destroyed (and hence has no sub-cgroups).
-
-4. Extended attribute usage
-===========================
-
-cgroup filesystem supports certain types of extended attributes in its
-directories and files.  The current supported types are:
-
-	- Trusted (XATTR_TRUSTED)
-	- Security (XATTR_SECURITY)
-
-Both require CAP_SYS_ADMIN capability to set.
-
-Like in tmpfs, the extended attributes in cgroup filesystem are stored
-using kernel memory and it's advised to keep the usage at minimum.  This
-is the reason why user defined extended attributes are not supported, since
-any user can do it and there's no limit in the value size.
-
-The current known users for this feature are SELinux to limit cgroup usage
-in containers and systemd for assorted meta data like main PID in a cgroup
-(systemd creates a cgroup per service).
-
-5. Questions
-============
-
-::
-
-  Q: what's up with this '/bin/echo' ?
-  A: bash's builtin 'echo' command does not check calls to write() against
-     errors. If you use it in the cgroup file system, you won't be
-     able to tell whether a command succeeded or failed.
-
-  Q: When I attach processes, only the first of the line gets really attached !
-  A: We can only return one error code per call to write(). So you should also
-     put only ONE PID.
diff --git a/Documentation/cgroup-v1/cpuacct.rst b/Documentation/cgroup-v1/cpuacct.rst
deleted file mode 100644
index d30ed81d2ad7..000000000000
--- a/Documentation/cgroup-v1/cpuacct.rst
+++ /dev/null
@@ -1,50 +0,0 @@
-=========================
-CPU Accounting Controller
-=========================
-
-The CPU accounting controller is used to group tasks using cgroups and
-account the CPU usage of these groups of tasks.
-
-The CPU accounting controller supports multi-hierarchy groups. An accounting
-group accumulates the CPU usage of all of its child groups and the tasks
-directly present in its group.
-
-Accounting groups can be created by first mounting the cgroup filesystem::
-
-  # mount -t cgroup -ocpuacct none /sys/fs/cgroup
-
-With the above step, the initial or the parent accounting group becomes
-visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
-the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
-/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained
-by this group which is essentially the CPU time obtained by all the tasks
-in the system.
-
-New accounting groups can be created under the parent group /sys/fs/cgroup::
-
-  # cd /sys/fs/cgroup
-  # mkdir g1
-  # echo $$ > g1/tasks
-
-The above steps create a new group g1 and move the current shell
-process (bash) into it. CPU time consumed by this bash and its children
-can be obtained from g1/cpuacct.usage and the same is accumulated in
-/sys/fs/cgroup/cpuacct.usage also.
-
-cpuacct.stat file lists a few statistics which further divide the
-CPU time obtained by the cgroup into user and system times. Currently
-the following statistics are supported:
-
-user: Time spent by tasks of the cgroup in user mode.
-system: Time spent by tasks of the cgroup in kernel mode.
-
-user and system are in USER_HZ unit.
-
-cpuacct controller uses percpu_counter interface to collect user and
-system times. This has two side effects:
-
-- It is theoretically possible to see wrong values for user and system times.
-  This is because percpu_counter_read() on 32bit systems isn't safe
-  against concurrent writes.
-- It is possible to see slightly outdated values for user and system times
-  due to the batch processing nature of percpu_counter.
diff --git a/Documentation/cgroup-v1/cpusets.rst b/Documentation/cgroup-v1/cpusets.rst
deleted file mode 100644
index b6a42cdea72b..000000000000
--- a/Documentation/cgroup-v1/cpusets.rst
+++ /dev/null
@@ -1,866 +0,0 @@
-=======
-CPUSETS
-=======
-
-Copyright (C) 2004 BULL SA.
-
-Written by Simon.Derr@bull.net
-
-- Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
-- Modified by Paul Jackson <pj@sgi.com>
-- Modified by Christoph Lameter <cl@linux.com>
-- Modified by Paul Menage <menage@google.com>
-- Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
-
-.. CONTENTS:
-
-   1. Cpusets
-     1.1 What are cpusets ?
-     1.2 Why are cpusets needed ?
-     1.3 How are cpusets implemented ?
-     1.4 What are exclusive cpusets ?
-     1.5 What is memory_pressure ?
-     1.6 What is memory spread ?
-     1.7 What is sched_load_balance ?
-     1.8 What is sched_relax_domain_level ?
-     1.9 How do I use cpusets ?
-   2. Usage Examples and Syntax
-     2.1 Basic Usage
-     2.2 Adding/removing cpus
-     2.3 Setting flags
-     2.4 Attaching processes
-   3. Questions
-   4. Contact
-
-1. Cpusets
-==========
-
-1.1 What are cpusets ?
-----------------------
-
-Cpusets provide a mechanism for assigning a set of CPUs and Memory
-Nodes to a set of tasks.   In this document "Memory Node" refers to
-an on-line node that contains memory.
-
-Cpusets constrain the CPU and Memory placement of tasks to only
-the resources within a task's current cpuset.  They form a nested
-hierarchy visible in a virtual file system.  These are the essential
-hooks, beyond what is already present, required to manage dynamic
-job placement on large systems.
-
-Cpusets use the generic cgroup subsystem described in
-Documentation/cgroup-v1/cgroups.rst.
-
-Requests by a task, using the sched_setaffinity(2) system call to
-include CPUs in its CPU affinity mask, and using the mbind(2) and
-set_mempolicy(2) system calls to include Memory Nodes in its memory
-policy, are both filtered through that task's cpuset, filtering out any
-CPUs or Memory Nodes not in that cpuset.  The scheduler will not
-schedule a task on a CPU that is not allowed in its cpus_allowed
-vector, and the kernel page allocator will not allocate a page on a
-node that is not allowed in the requesting task's mems_allowed vector.
-
-User level code may create and destroy cpusets by name in the cgroup
-virtual file system, manage the attributes and permissions of these
-cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
-specify and query to which cpuset a task is assigned, and list the
-task pids assigned to a cpuset.
-
-
-1.2 Why are cpusets needed ?
-----------------------------
-
-The management of large computer systems, with many processors (CPUs),
-complex memory cache hierarchies and multiple Memory Nodes having
-non-uniform access times (NUMA) presents additional challenges for
-the efficient scheduling and memory placement of processes.
-
-Frequently more modest sized systems can be operated with adequate
-efficiency just by letting the operating system automatically share
-the available CPU and Memory resources amongst the requesting tasks.
-
-But larger systems, which benefit more from careful processor and
-memory placement to reduce memory access times and contention,
-and which typically represent a larger investment for the customer,
-can benefit from explicitly placing jobs on properly sized subsets of
-the system.
-
-This can be especially valuable on:
-
-    * Web Servers running multiple instances of the same web application,
-    * Servers running different applications (for instance, a web server
-      and a database), or
-    * NUMA systems running large HPC applications with demanding
-      performance characteristics.
-
-These subsets, or "soft partitions" must be able to be dynamically
-adjusted, as the job mix changes, without impacting other concurrently
-executing jobs. The location of the running jobs pages may also be moved
-when the memory locations are changed.
-
-The kernel cpuset patch provides the minimum essential kernel
-mechanisms required to efficiently implement such subsets.  It
-leverages existing CPU and Memory Placement facilities in the Linux
-kernel to avoid any additional impact on the critical scheduler or
-memory allocator code.
-
-
-1.3 How are cpusets implemented ?
----------------------------------
-
-Cpusets provide a Linux kernel mechanism to constrain which CPUs and
-Memory Nodes are used by a process or set of processes.
-
-The Linux kernel already has a pair of mechanisms to specify on which
-CPUs a task may be scheduled (sched_setaffinity) and on which Memory
-Nodes it may obtain memory (mbind, set_mempolicy).
-
-Cpusets extends these two mechanisms as follows:
-
- - Cpusets are sets of allowed CPUs and Memory Nodes, known to the
-   kernel.
- - Each task in the system is attached to a cpuset, via a pointer
-   in the task structure to a reference counted cgroup structure.
- - Calls to sched_setaffinity are filtered to just those CPUs
-   allowed in that task's cpuset.
- - Calls to mbind and set_mempolicy are filtered to just
-   those Memory Nodes allowed in that task's cpuset.
- - The root cpuset contains all the systems CPUs and Memory
-   Nodes.
- - For any cpuset, one can define child cpusets containing a subset
-   of the parents CPU and Memory Node resources.
- - The hierarchy of cpusets can be mounted at /dev/cpuset, for
-   browsing and manipulation from user space.
- - A cpuset may be marked exclusive, which ensures that no other
-   cpuset (except direct ancestors and descendants) may contain
-   any overlapping CPUs or Memory Nodes.
- - You can list all the tasks (by pid) attached to any cpuset.
-
-The implementation of cpusets requires a few, simple hooks
-into the rest of the kernel, none in performance critical paths:
-
- - in init/main.c, to initialize the root cpuset at system boot.
- - in fork and exit, to attach and detach a task from its cpuset.
- - in sched_setaffinity, to mask the requested CPUs by what's
-   allowed in that task's cpuset.
- - in sched.c migrate_live_tasks(), to keep migrating tasks within
-   the CPUs allowed by their cpuset, if possible.
- - in the mbind and set_mempolicy system calls, to mask the requested
-   Memory Nodes by what's allowed in that task's cpuset.
- - in page_alloc.c, to restrict memory to allowed nodes.
- - in vmscan.c, to restrict page recovery to the current cpuset.
-
-You should mount the "cgroup" filesystem type in order to enable
-browsing and modifying the cpusets presently known to the kernel.  No
-new system calls are added for cpusets - all support for querying and
-modifying cpusets is via this cpuset file system.
-
-The /proc/<pid>/status file for each task has four added lines,
-displaying the task's cpus_allowed (on which CPUs it may be scheduled)
-and mems_allowed (on which Memory Nodes it may obtain memory),
-in the two formats seen in the following example::
-
-  Cpus_allowed:   ffffffff,ffffffff,ffffffff,ffffffff
-  Cpus_allowed_list:      0-127
-  Mems_allowed:   ffffffff,ffffffff
-  Mems_allowed_list:      0-63
-
-Each cpuset is represented by a directory in the cgroup file system
-containing (on top of the standard cgroup files) the following
-files describing that cpuset:
-
- - cpuset.cpus: list of CPUs in that cpuset
- - cpuset.mems: list of Memory Nodes in that cpuset
- - cpuset.memory_migrate flag: if set, move pages to cpusets nodes
- - cpuset.cpu_exclusive flag: is cpu placement exclusive?
- - cpuset.mem_exclusive flag: is memory placement exclusive?
- - cpuset.mem_hardwall flag:  is memory allocation hardwalled
- - cpuset.memory_pressure: measure of how much paging pressure in cpuset
- - cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes
- - cpuset.memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes
- - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
- - cpuset.sched_relax_domain_level: the searching range when migrating tasks
-
-In addition, only the root cpuset has the following file:
-
- - cpuset.memory_pressure_enabled flag: compute memory_pressure?
-
-New cpusets are created using the mkdir system call or shell
-command.  The properties of a cpuset, such as its flags, allowed
-CPUs and Memory Nodes, and attached tasks, are modified by writing
-to the appropriate file in that cpusets directory, as listed above.
-
-The named hierarchical structure of nested cpusets allows partitioning
-a large system into nested, dynamically changeable, "soft-partitions".
-
-The attachment of each task, automatically inherited at fork by any
-children of that task, to a cpuset allows organizing the work load
-on a system into related sets of tasks such that each set is constrained
-to using the CPUs and Memory Nodes of a particular cpuset.  A task
-may be re-attached to any other cpuset, if allowed by the permissions
-on the necessary cpuset file system directories.
-
-Such management of a system "in the large" integrates smoothly with
-the detailed placement done on individual tasks and memory regions
-using the sched_setaffinity, mbind and set_mempolicy system calls.
-
-The following rules apply to each cpuset:
-
- - Its CPUs and Memory Nodes must be a subset of its parents.
- - It can't be marked exclusive unless its parent is.
- - If its cpu or memory is exclusive, they may not overlap any sibling.
-
-These rules, and the natural hierarchy of cpusets, enable efficient
-enforcement of the exclusive guarantee, without having to scan all
-cpusets every time any of them change to ensure nothing overlaps a
-exclusive cpuset.  Also, the use of a Linux virtual file system (vfs)
-to represent the cpuset hierarchy provides for a familiar permission
-and name space for cpusets, with a minimum of additional kernel code.
-
-The cpus and mems files in the root (top_cpuset) cpuset are
-read-only.  The cpus file automatically tracks the value of
-cpu_online_mask using a CPU hotplug notifier, and the mems file
-automatically tracks the value of node_states[N_MEMORY]--i.e.,
-nodes with memory--using the cpuset_track_online_nodes() hook.
-
-
-1.4 What are exclusive cpusets ?
---------------------------------
-
-If a cpuset is cpu or mem exclusive, no other cpuset, other than
-a direct ancestor or descendant, may share any of the same CPUs or
-Memory Nodes.
-
-A cpuset that is cpuset.mem_exclusive *or* cpuset.mem_hardwall is "hardwalled",
-i.e. it restricts kernel allocations for page, buffer and other data
-commonly shared by the kernel across multiple users.  All cpusets,
-whether hardwalled or not, restrict allocations of memory for user
-space.  This enables configuring a system so that several independent
-jobs can share common kernel data, such as file system pages, while
-isolating each job's user allocation in its own cpuset.  To do this,
-construct a large mem_exclusive cpuset to hold all the jobs, and
-construct child, non-mem_exclusive cpusets for each individual job.
-Only a small amount of typical kernel memory, such as requests from
-interrupt handlers, is allowed to be taken outside even a
-mem_exclusive cpuset.
-
-
-1.5 What is memory_pressure ?
------------------------------
-The memory_pressure of a cpuset provides a simple per-cpuset metric
-of the rate that the tasks in a cpuset are attempting to free up in
-use memory on the nodes of the cpuset to satisfy additional memory
-requests.
-
-This enables batch managers monitoring jobs running in dedicated
-cpusets to efficiently detect what level of memory pressure that job
-is causing.
-
-This is useful both on tightly managed systems running a wide mix of
-submitted jobs, which may choose to terminate or re-prioritize jobs that
-are trying to use more memory than allowed on the nodes assigned to them,
-and with tightly coupled, long running, massively parallel scientific
-computing jobs that will dramatically fail to meet required performance
-goals if they start to use more memory than allowed to them.
-
-This mechanism provides a very economical way for the batch manager
-to monitor a cpuset for signs of memory pressure.  It's up to the
-batch manager or other user code to decide what to do about it and
-take action.
-
-==>
-    Unless this feature is enabled by writing "1" to the special file
-    /dev/cpuset/memory_pressure_enabled, the hook in the rebalance
-    code of __alloc_pages() for this metric reduces to simply noticing
-    that the cpuset_memory_pressure_enabled flag is zero.  So only
-    systems that enable this feature will compute the metric.
-
-Why a per-cpuset, running average:
-
-    Because this meter is per-cpuset, rather than per-task or mm,
-    the system load imposed by a batch scheduler monitoring this
-    metric is sharply reduced on large systems, because a scan of
-    the tasklist can be avoided on each set of queries.
-
-    Because this meter is a running average, instead of an accumulating
-    counter, a batch scheduler can detect memory pressure with a
-    single read, instead of having to read and accumulate results
-    for a period of time.
-
-    Because this meter is per-cpuset rather than per-task or mm,
-    the batch scheduler can obtain the key information, memory
-    pressure in a cpuset, with a single read, rather than having to
-    query and accumulate results over all the (dynamically changing)
-    set of tasks in the cpuset.
-
-A per-cpuset simple digital filter (requires a spinlock and 3 words
-of data per-cpuset) is kept, and updated by any task attached to that
-cpuset, if it enters the synchronous (direct) page reclaim code.
-
-A per-cpuset file provides an integer number representing the recent
-(half-life of 10 seconds) rate of direct page reclaims caused by
-the tasks in the cpuset, in units of reclaims attempted per second,
-times 1000.
-
-
-1.6 What is memory spread ?
----------------------------
-There are two boolean flag files per cpuset that control where the
-kernel allocates pages for the file system buffers and related in
-kernel data structures.  They are called 'cpuset.memory_spread_page' and
-'cpuset.memory_spread_slab'.
-
-If the per-cpuset boolean flag file 'cpuset.memory_spread_page' is set, then
-the kernel will spread the file system buffers (page cache) evenly
-over all the nodes that the faulting task is allowed to use, instead
-of preferring to put those pages on the node where the task is running.
-
-If the per-cpuset boolean flag file 'cpuset.memory_spread_slab' is set,
-then the kernel will spread some file system related slab caches,
-such as for inodes and dentries evenly over all the nodes that the
-faulting task is allowed to use, instead of preferring to put those
-pages on the node where the task is running.
-
-The setting of these flags does not affect anonymous data segment or
-stack segment pages of a task.
-
-By default, both kinds of memory spreading are off, and memory
-pages are allocated on the node local to where the task is running,
-except perhaps as modified by the task's NUMA mempolicy or cpuset
-configuration, so long as sufficient free memory pages are available.
-
-When new cpusets are created, they inherit the memory spread settings
-of their parent.
-
-Setting memory spreading causes allocations for the affected page
-or slab caches to ignore the task's NUMA mempolicy and be spread
-instead.    Tasks using mbind() or set_mempolicy() calls to set NUMA
-mempolicies will not notice any change in these calls as a result of
-their containing task's memory spread settings.  If memory spreading
-is turned off, then the currently specified NUMA mempolicy once again
-applies to memory page allocations.
-
-Both 'cpuset.memory_spread_page' and 'cpuset.memory_spread_slab' are boolean flag
-files.  By default they contain "0", meaning that the feature is off
-for that cpuset.  If a "1" is written to that file, then that turns
-the named feature on.
-
-The implementation is simple.
-
-Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag
-PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently
-joins that cpuset.  The page allocation calls for the page cache
-is modified to perform an inline check for this PFA_SPREAD_PAGE task
-flag, and if set, a call to a new routine cpuset_mem_spread_node()
-returns the node to prefer for the allocation.
-
-Similarly, setting 'cpuset.memory_spread_slab' turns on the flag
-PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate
-pages from the node returned by cpuset_mem_spread_node().
-
-The cpuset_mem_spread_node() routine is also simple.  It uses the
-value of a per-task rotor cpuset_mem_spread_rotor to select the next
-node in the current task's mems_allowed to prefer for the allocation.
-
-This memory placement policy is also known (in other contexts) as
-round-robin or interleave.
-
-This policy can provide substantial improvements for jobs that need
-to place thread local data on the corresponding node, but that need
-to access large file system data sets that need to be spread across
-the several nodes in the jobs cpuset in order to fit.  Without this
-policy, especially for jobs that might have one thread reading in the
-data set, the memory allocation across the nodes in the jobs cpuset
-can become very uneven.
-
-1.7 What is sched_load_balance ?
---------------------------------
-
-The kernel scheduler (kernel/sched/core.c) automatically load balances
-tasks.  If one CPU is underutilized, kernel code running on that
-CPU will look for tasks on other more overloaded CPUs and move those
-tasks to itself, within the constraints of such placement mechanisms
-as cpusets and sched_setaffinity.
-
-The algorithmic cost of load balancing and its impact on key shared
-kernel data structures such as the task list increases more than
-linearly with the number of CPUs being balanced.  So the scheduler
-has support to partition the systems CPUs into a number of sched
-domains such that it only load balances within each sched domain.
-Each sched domain covers some subset of the CPUs in the system;
-no two sched domains overlap; some CPUs might not be in any sched
-domain and hence won't be load balanced.
-
-Put simply, it costs less to balance between two smaller sched domains
-than one big one, but doing so means that overloads in one of the
-two domains won't be load balanced to the other one.
-
-By default, there is one sched domain covering all CPUs, including those
-marked isolated using the kernel boot time "isolcpus=" argument. However,
-the isolated CPUs will not participate in load balancing, and will not
-have tasks running on them unless explicitly assigned.
-
-This default load balancing across all CPUs is not well suited for
-the following two situations:
-
- 1) On large systems, load balancing across many CPUs is expensive.
-    If the system is managed using cpusets to place independent jobs
-    on separate sets of CPUs, full load balancing is unnecessary.
- 2) Systems supporting realtime on some CPUs need to minimize
-    system overhead on those CPUs, including avoiding task load
-    balancing if that is not needed.
-
-When the per-cpuset flag "cpuset.sched_load_balance" is enabled (the default
-setting), it requests that all the CPUs in that cpusets allowed 'cpuset.cpus'
-be contained in a single sched domain, ensuring that load balancing
-can move a task (not otherwised pinned, as by sched_setaffinity)
-from any CPU in that cpuset to any other.
-
-When the per-cpuset flag "cpuset.sched_load_balance" is disabled, then the
-scheduler will avoid load balancing across the CPUs in that cpuset,
---except-- in so far as is necessary because some overlapping cpuset
-has "sched_load_balance" enabled.
-
-So, for example, if the top cpuset has the flag "cpuset.sched_load_balance"
-enabled, then the scheduler will have one sched domain covering all
-CPUs, and the setting of the "cpuset.sched_load_balance" flag in any other
-cpusets won't matter, as we're already fully load balancing.
-
-Therefore in the above two situations, the top cpuset flag
-"cpuset.sched_load_balance" should be disabled, and only some of the smaller,
-child cpusets have this flag enabled.
-
-When doing this, you don't usually want to leave any unpinned tasks in
-the top cpuset that might use non-trivial amounts of CPU, as such tasks
-may be artificially constrained to some subset of CPUs, depending on
-the particulars of this flag setting in descendant cpusets.  Even if
-such a task could use spare CPU cycles in some other CPUs, the kernel
-scheduler might not consider the possibility of load balancing that
-task to that underused CPU.
-
-Of course, tasks pinned to a particular CPU can be left in a cpuset
-that disables "cpuset.sched_load_balance" as those tasks aren't going anywhere
-else anyway.
-
-There is an impedance mismatch here, between cpusets and sched domains.
-Cpusets are hierarchical and nest.  Sched domains are flat; they don't
-overlap and each CPU is in at most one sched domain.
-
-It is necessary for sched domains to be flat because load balancing
-across partially overlapping sets of CPUs would risk unstable dynamics
-that would be beyond our understanding.  So if each of two partially
-overlapping cpusets enables the flag 'cpuset.sched_load_balance', then we
-form a single sched domain that is a superset of both.  We won't move
-a task to a CPU outside its cpuset, but the scheduler load balancing
-code might waste some compute cycles considering that possibility.
-
-This mismatch is why there is not a simple one-to-one relation
-between which cpusets have the flag "cpuset.sched_load_balance" enabled,
-and the sched domain configuration.  If a cpuset enables the flag, it
-will get balancing across all its CPUs, but if it disables the flag,
-it will only be assured of no load balancing if no other overlapping
-cpuset enables the flag.
-
-If two cpusets have partially overlapping 'cpuset.cpus' allowed, and only
-one of them has this flag enabled, then the other may find its
-tasks only partially load balanced, just on the overlapping CPUs.
-This is just the general case of the top_cpuset example given a few
-paragraphs above.  In the general case, as in the top cpuset case,
-don't leave tasks that might use non-trivial amounts of CPU in
-such partially load balanced cpusets, as they may be artificially
-constrained to some subset of the CPUs allowed to them, for lack of
-load balancing to the other CPUs.
-
-CPUs in "cpuset.isolcpus" were excluded from load balancing by the
-isolcpus= kernel boot option, and will never be load balanced regardless
-of the value of "cpuset.sched_load_balance" in any cpuset.
-
-1.7.1 sched_load_balance implementation details.
-------------------------------------------------
-
-The per-cpuset flag 'cpuset.sched_load_balance' defaults to enabled (contrary
-to most cpuset flags.)  When enabled for a cpuset, the kernel will
-ensure that it can load balance across all the CPUs in that cpuset
-(makes sure that all the CPUs in the cpus_allowed of that cpuset are
-in the same sched domain.)
-
-If two overlapping cpusets both have 'cpuset.sched_load_balance' enabled,
-then they will be (must be) both in the same sched domain.
-
-If, as is the default, the top cpuset has 'cpuset.sched_load_balance' enabled,
-then by the above that means there is a single sched domain covering
-the whole system, regardless of any other cpuset settings.
-
-The kernel commits to user space that it will avoid load balancing
-where it can.  It will pick as fine a granularity partition of sched
-domains as it can while still providing load balancing for any set
-of CPUs allowed to a cpuset having 'cpuset.sched_load_balance' enabled.
-
-The internal kernel cpuset to scheduler interface passes from the
-cpuset code to the scheduler code a partition of the load balanced
-CPUs in the system. This partition is a set of subsets (represented
-as an array of struct cpumask) of CPUs, pairwise disjoint, that cover
-all the CPUs that must be load balanced.
-
-The cpuset code builds a new such partition and passes it to the
-scheduler sched domain setup code, to have the sched domains rebuilt
-as necessary, whenever:
-
- - the 'cpuset.sched_load_balance' flag of a cpuset with non-empty CPUs changes,
- - or CPUs come or go from a cpuset with this flag enabled,
- - or 'cpuset.sched_relax_domain_level' value of a cpuset with non-empty CPUs
-   and with this flag enabled changes,
- - or a cpuset with non-empty CPUs and with this flag enabled is removed,
- - or a cpu is offlined/onlined.
-
-This partition exactly defines what sched domains the scheduler should
-setup - one sched domain for each element (struct cpumask) in the
-partition.
-
-The scheduler remembers the currently active sched domain partitions.
-When the scheduler routine partition_sched_domains() is invoked from
-the cpuset code to update these sched domains, it compares the new
-partition requested with the current, and updates its sched domains,
-removing the old and adding the new, for each change.
-
-
-1.8 What is sched_relax_domain_level ?
---------------------------------------
-
-In sched domain, the scheduler migrates tasks in 2 ways; periodic load
-balance on tick, and at time of some schedule events.
-
-When a task is woken up, scheduler try to move the task on idle CPU.
-For example, if a task A running on CPU X activates another task B
-on the same CPU X, and if CPU Y is X's sibling and performing idle,
-then scheduler migrate task B to CPU Y so that task B can start on
-CPU Y without waiting task A on CPU X.
-
-And if a CPU run out of tasks in its runqueue, the CPU try to pull
-extra tasks from other busy CPUs to help them before it is going to
-be idle.
-
-Of course it takes some searching cost to find movable tasks and/or
-idle CPUs, the scheduler might not search all CPUs in the domain
-every time.  In fact, in some architectures, the searching ranges on
-events are limited in the same socket or node where the CPU locates,
-while the load balance on tick searches all.
-
-For example, assume CPU Z is relatively far from CPU X.  Even if CPU Z
-is idle while CPU X and the siblings are busy, scheduler can't migrate
-woken task B from X to Z since it is out of its searching range.
-As the result, task B on CPU X need to wait task A or wait load balance
-on the next tick.  For some applications in special situation, waiting
-1 tick may be too long.
-
-The 'cpuset.sched_relax_domain_level' file allows you to request changing
-this searching range as you like.  This file takes int value which
-indicates size of searching range in levels ideally as follows,
-otherwise initial value -1 that indicates the cpuset has no request.
-
-====== ===========================================================
-  -1   no request. use system default or follow request of others.
-   0   no search.
-   1   search siblings (hyperthreads in a core).
-   2   search cores in a package.
-   3   search cpus in a node [= system wide on non-NUMA system]
-   4   search nodes in a chunk of node [on NUMA system]
-   5   search system wide [on NUMA system]
-====== ===========================================================
-
-The system default is architecture dependent.  The system default
-can be changed using the relax_domain_level= boot parameter.
-
-This file is per-cpuset and affect the sched domain where the cpuset
-belongs to.  Therefore if the flag 'cpuset.sched_load_balance' of a cpuset
-is disabled, then 'cpuset.sched_relax_domain_level' have no effect since
-there is no sched domain belonging the cpuset.
-
-If multiple cpusets are overlapping and hence they form a single sched
-domain, the largest value among those is used.  Be careful, if one
-requests 0 and others are -1 then 0 is used.
-
-Note that modifying this file will have both good and bad effects,
-and whether it is acceptable or not depends on your situation.
-Don't modify this file if you are not sure.
-
-If your situation is:
-
- - The migration costs between each cpu can be assumed considerably
-   small(for you) due to your special application's behavior or
-   special hardware support for CPU cache etc.
- - The searching cost doesn't have impact(for you) or you can make
-   the searching cost enough small by managing cpuset to compact etc.
- - The latency is required even it sacrifices cache hit rate etc.
-   then increasing 'sched_relax_domain_level' would benefit you.
-
-
-1.9 How do I use cpusets ?
---------------------------
-
-In order to minimize the impact of cpusets on critical kernel
-code, such as the scheduler, and due to the fact that the kernel
-does not support one task updating the memory placement of another
-task directly, the impact on a task of changing its cpuset CPU
-or Memory Node placement, or of changing to which cpuset a task
-is attached, is subtle.
-
-If a cpuset has its Memory Nodes modified, then for each task attached
-to that cpuset, the next time that the kernel attempts to allocate
-a page of memory for that task, the kernel will notice the change
-in the task's cpuset, and update its per-task memory placement to
-remain within the new cpusets memory placement.  If the task was using
-mempolicy MPOL_BIND, and the nodes to which it was bound overlap with
-its new cpuset, then the task will continue to use whatever subset
-of MPOL_BIND nodes are still allowed in the new cpuset.  If the task
-was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed
-in the new cpuset, then the task will be essentially treated as if it
-was MPOL_BIND bound to the new cpuset (even though its NUMA placement,
-as queried by get_mempolicy(), doesn't change).  If a task is moved
-from one cpuset to another, then the kernel will adjust the task's
-memory placement, as above, the next time that the kernel attempts
-to allocate a page of memory for that task.
-
-If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset
-will have its allowed CPU placement changed immediately.  Similarly,
-if a task's pid is written to another cpuset's 'tasks' file, then its
-allowed CPU placement is changed immediately.  If such a task had been
-bound to some subset of its cpuset using the sched_setaffinity() call,
-the task will be allowed to run on any CPU allowed in its new cpuset,
-negating the effect of the prior sched_setaffinity() call.
-
-In summary, the memory placement of a task whose cpuset is changed is
-updated by the kernel, on the next allocation of a page for that task,
-and the processor placement is updated immediately.
-
-Normally, once a page is allocated (given a physical page
-of main memory) then that page stays on whatever node it
-was allocated, so long as it remains allocated, even if the
-cpusets memory placement policy 'cpuset.mems' subsequently changes.
-If the cpuset flag file 'cpuset.memory_migrate' is set true, then when
-tasks are attached to that cpuset, any pages that task had
-allocated to it on nodes in its previous cpuset are migrated
-to the task's new cpuset. The relative placement of the page within
-the cpuset is preserved during these migration operations if possible.
-For example if the page was on the second valid node of the prior cpuset
-then the page will be placed on the second valid node of the new cpuset.
-
-Also if 'cpuset.memory_migrate' is set true, then if that cpuset's
-'cpuset.mems' file is modified, pages allocated to tasks in that
-cpuset, that were on nodes in the previous setting of 'cpuset.mems',
-will be moved to nodes in the new setting of 'mems.'
-Pages that were not in the task's prior cpuset, or in the cpuset's
-prior 'cpuset.mems' setting, will not be moved.
-
-There is an exception to the above.  If hotplug functionality is used
-to remove all the CPUs that are currently assigned to a cpuset,
-then all the tasks in that cpuset will be moved to the nearest ancestor
-with non-empty cpus.  But the moving of some (or all) tasks might fail if
-cpuset is bound with another cgroup subsystem which has some restrictions
-on task attaching.  In this failing case, those tasks will stay
-in the original cpuset, and the kernel will automatically update
-their cpus_allowed to allow all online CPUs.  When memory hotplug
-functionality for removing Memory Nodes is available, a similar exception
-is expected to apply there as well.  In general, the kernel prefers to
-violate cpuset placement, over starving a task that has had all
-its allowed CPUs or Memory Nodes taken offline.
-
-There is a second exception to the above.  GFP_ATOMIC requests are
-kernel internal allocations that must be satisfied, immediately.
-The kernel may drop some request, in rare cases even panic, if a
-GFP_ATOMIC alloc fails.  If the request cannot be satisfied within
-the current task's cpuset, then we relax the cpuset, and look for
-memory anywhere we can find it.  It's better to violate the cpuset
-than stress the kernel.
-
-To start a new job that is to be contained within a cpuset, the steps are:
-
- 1) mkdir /sys/fs/cgroup/cpuset
- 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
- 3) Create the new cpuset by doing mkdir's and write's (or echo's) in
-    the /sys/fs/cgroup/cpuset virtual file system.
- 4) Start a task that will be the "founding father" of the new job.
- 5) Attach that task to the new cpuset by writing its pid to the
-    /sys/fs/cgroup/cpuset tasks file for that cpuset.
- 6) fork, exec or clone the job tasks from this founding father task.
-
-For example, the following sequence of commands will setup a cpuset
-named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
-and then start a subshell 'sh' in that cpuset::
-
-  mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
-  cd /sys/fs/cgroup/cpuset
-  mkdir Charlie
-  cd Charlie
-  /bin/echo 2-3 > cpuset.cpus
-  /bin/echo 1 > cpuset.mems
-  /bin/echo $$ > tasks
-  sh
-  # The subshell 'sh' is now running in cpuset Charlie
-  # The next line should display '/Charlie'
-  cat /proc/self/cpuset
-
-There are ways to query or modify cpusets:
-
- - via the cpuset file system directly, using the various cd, mkdir, echo,
-   cat, rmdir commands from the shell, or their equivalent from C.
- - via the C library libcpuset.
- - via the C library libcgroup.
-   (http://sourceforge.net/projects/libcg/)
- - via the python application cset.
-   (http://code.google.com/p/cpuset/)
-
-The sched_setaffinity calls can also be done at the shell prompt using
-SGI's runon or Robert Love's taskset.  The mbind and set_mempolicy
-calls can be done at the shell prompt using the numactl command
-(part of Andi Kleen's numa package).
-
-2. Usage Examples and Syntax
-============================
-
-2.1 Basic Usage
----------------
-
-Creating, modifying, using the cpusets can be done through the cpuset
-virtual filesystem.
-
-To mount it, type:
-# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset
-
-Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the
-tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset
-is the cpuset that holds the whole system.
-
-If you want to create a new cpuset under /sys/fs/cgroup/cpuset::
-
-  # cd /sys/fs/cgroup/cpuset
-  # mkdir my_cpuset
-
-Now you want to do something with this cpuset::
-
-  # cd my_cpuset
-
-In this directory you can find several files::
-
-  # ls
-  cgroup.clone_children  cpuset.memory_pressure
-  cgroup.event_control   cpuset.memory_spread_page
-  cgroup.procs           cpuset.memory_spread_slab
-  cpuset.cpu_exclusive   cpuset.mems
-  cpuset.cpus            cpuset.sched_load_balance
-  cpuset.mem_exclusive   cpuset.sched_relax_domain_level
-  cpuset.mem_hardwall    notify_on_release
-  cpuset.memory_migrate  tasks
-
-Reading them will give you information about the state of this cpuset:
-the CPUs and Memory Nodes it can use, the processes that are using
-it, its properties.  By writing to these files you can manipulate
-the cpuset.
-
-Set some flags::
-
-  # /bin/echo 1 > cpuset.cpu_exclusive
-
-Add some cpus::
-
-  # /bin/echo 0-7 > cpuset.cpus
-
-Add some mems::
-
-  # /bin/echo 0-7 > cpuset.mems
-
-Now attach your shell to this cpuset::
-
-  # /bin/echo $$ > tasks
-
-You can also create cpusets inside your cpuset by using mkdir in this
-directory::
-
-  # mkdir my_sub_cs
-
-To remove a cpuset, just use rmdir::
-
-  # rmdir my_sub_cs
-
-This will fail if the cpuset is in use (has cpusets inside, or has
-processes attached).
-
-Note that for legacy reasons, the "cpuset" filesystem exists as a
-wrapper around the cgroup filesystem.
-
-The command::
-
-  mount -t cpuset X /sys/fs/cgroup/cpuset
-
-is equivalent to::
-
-  mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset
-  echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent
-
-2.2 Adding/removing cpus
-------------------------
-
-This is the syntax to use when writing in the cpus or mems files
-in cpuset directories::
-
-  # /bin/echo 1-4 > cpuset.cpus		-> set cpus list to cpus 1,2,3,4
-  # /bin/echo 1,2,3,4 > cpuset.cpus	-> set cpus list to cpus 1,2,3,4
-
-To add a CPU to a cpuset, write the new list of CPUs including the
-CPU to be added. To add 6 to the above cpuset::
-
-  # /bin/echo 1-4,6 > cpuset.cpus	-> set cpus list to cpus 1,2,3,4,6
-
-Similarly to remove a CPU from a cpuset, write the new list of CPUs
-without the CPU to be removed.
-
-To remove all the CPUs::
-
-  # /bin/echo "" > cpuset.cpus		-> clear cpus list
-
-2.3 Setting flags
------------------
-
-The syntax is very simple::
-
-  # /bin/echo 1 > cpuset.cpu_exclusive 	-> set flag 'cpuset.cpu_exclusive'
-  # /bin/echo 0 > cpuset.cpu_exclusive 	-> unset flag 'cpuset.cpu_exclusive'
-
-2.4 Attaching processes
------------------------
-
-::
-
-  # /bin/echo PID > tasks
-
-Note that it is PID, not PIDs. You can only attach ONE task at a time.
-If you have several tasks to attach, you have to do it one after another::
-
-  # /bin/echo PID1 > tasks
-  # /bin/echo PID2 > tasks
-	...
-  # /bin/echo PIDn > tasks
-
-
-3. Questions
-============
-
-Q:
-   what's up with this '/bin/echo' ?
-
-A:
-   bash's builtin 'echo' command does not check calls to write() against
-   errors. If you use it in the cpuset file system, you won't be
-   able to tell whether a command succeeded or failed.
-
-Q:
-   When I attach processes, only the first of the line gets really attached !
-
-A:
-   We can only return one error code per call to write(). So you should also
-   put only ONE pid.
-
-4. Contact
-==========
-
-Web: http://www.bullopensource.org/cpuset
diff --git a/Documentation/cgroup-v1/devices.rst b/Documentation/cgroup-v1/devices.rst
deleted file mode 100644
index e1886783961e..000000000000
--- a/Documentation/cgroup-v1/devices.rst
+++ /dev/null
@@ -1,132 +0,0 @@
-===========================
-Device Whitelist Controller
-===========================
-
-1. Description
-==============
-
-Implement a cgroup to track and enforce open and mknod restrictions
-on device files.  A device cgroup associates a device access
-whitelist with each cgroup.  A whitelist entry has 4 fields.
-'type' is a (all), c (char), or b (block).  'all' means it applies
-to all types and all major and minor numbers.  Major and minor are
-either an integer or * for all.  Access is a composition of r
-(read), w (write), and m (mknod).
-
-The root device cgroup starts with rwm to 'all'.  A child device
-cgroup gets a copy of the parent.  Administrators can then remove
-devices from the whitelist or add new entries.  A child cgroup can
-never receive a device access which is denied by its parent.
-
-2. User Interface
-=================
-
-An entry is added using devices.allow, and removed using
-devices.deny.  For instance::
-
-	echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow
-
-allows cgroup 1 to read and mknod the device usually known as
-/dev/null.  Doing::
-
-	echo a > /sys/fs/cgroup/1/devices.deny
-
-will remove the default 'a *:* rwm' entry. Doing::
-
-	echo a > /sys/fs/cgroup/1/devices.allow
-
-will add the 'a *:* rwm' entry to the whitelist.
-
-3. Security
-===========
-
-Any task can move itself between cgroups.  This clearly won't
-suffice, but we can decide the best way to adequately restrict
-movement as people get some experience with this.  We may just want
-to require CAP_SYS_ADMIN, which at least is a separate bit from
-CAP_MKNOD.  We may want to just refuse moving to a cgroup which
-isn't a descendant of the current one.  Or we may want to use
-CAP_MAC_ADMIN, since we really are trying to lock down root.
-
-CAP_SYS_ADMIN is needed to modify the whitelist or move another
-task to a new cgroup.  (Again we'll probably want to change that).
-
-A cgroup may not be granted more permissions than the cgroup's
-parent has.
-
-4. Hierarchy
-============
-
-device cgroups maintain hierarchy by making sure a cgroup never has more
-access permissions than its parent.  Every time an entry is written to
-a cgroup's devices.deny file, all its children will have that entry removed
-from their whitelist and all the locally set whitelist entries will be
-re-evaluated.  In case one of the locally set whitelist entries would provide
-more access than the cgroup's parent, it'll be removed from the whitelist.
-
-Example::
-
-      A
-     / \
-        B
-
-    group        behavior	exceptions
-    A            allow		"b 8:* rwm", "c 116:1 rw"
-    B            deny		"c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"
-
-If a device is denied in group A::
-
-	# echo "c 116:* r" > A/devices.deny
-
-it'll propagate down and after revalidating B's entries, the whitelist entry
-"c 116:2 rwm" will be removed::
-
-    group        whitelist entries                        denied devices
-    A            all                                      "b 8:* rwm", "c 116:* rw"
-    B            "c 1:3 rwm", "b 3:* rwm"                 all the rest
-
-In case parent's exceptions change and local exceptions are not allowed
-anymore, they'll be deleted.
-
-Notice that new whitelist entries will not be propagated::
-
-      A
-     / \
-        B
-
-    group        whitelist entries                        denied devices
-    A            "c 1:3 rwm", "c 1:5 r"                   all the rest
-    B            "c 1:3 rwm", "c 1:5 r"                   all the rest
-
-when adding ``c *:3 rwm``::
-
-	# echo "c *:3 rwm" >A/devices.allow
-
-the result::
-
-    group        whitelist entries                        denied devices
-    A            "c *:3 rwm", "c 1:5 r"                   all the rest
-    B            "c 1:3 rwm", "c 1:5 r"                   all the rest
-
-but now it'll be possible to add new entries to B::
-
-	# echo "c 2:3 rwm" >B/devices.allow
-	# echo "c 50:3 r" >B/devices.allow
-
-or even::
-
-	# echo "c *:3 rwm" >B/devices.allow
-
-Allowing or denying all by writing 'a' to devices.allow or devices.deny will
-not be possible once the device cgroups has children.
-
-4.1 Hierarchy (internal implementation)
----------------------------------------
-
-device cgroups is implemented internally using a behavior (ALLOW, DENY) and a
-list of exceptions.  The internal state is controlled using the same user
-interface to preserve compatibility with the previous whitelist-only
-implementation.  Removal or addition of exceptions that will reduce the access
-to devices will be propagated down the hierarchy.
-For every propagated exception, the effective rules will be re-evaluated based
-on current parent's access rules.
diff --git a/Documentation/cgroup-v1/freezer-subsystem.rst b/Documentation/cgroup-v1/freezer-subsystem.rst
deleted file mode 100644
index 582d3427de3f..000000000000
--- a/Documentation/cgroup-v1/freezer-subsystem.rst
+++ /dev/null
@@ -1,127 +0,0 @@
-==============
-Cgroup Freezer
-==============
-
-The cgroup freezer is useful to batch job management system which start
-and stop sets of tasks in order to schedule the resources of a machine
-according to the desires of a system administrator. This sort of program
-is often used on HPC clusters to schedule access to the cluster as a
-whole. The cgroup freezer uses cgroups to describe the set of tasks to
-be started/stopped by the batch job management system. It also provides
-a means to start and stop the tasks composing the job.
-
-The cgroup freezer will also be useful for checkpointing running groups
-of tasks. The freezer allows the checkpoint code to obtain a consistent
-image of the tasks by attempting to force the tasks in a cgroup into a
-quiescent state. Once the tasks are quiescent another task can
-walk /proc or invoke a kernel interface to gather information about the
-quiesced tasks. Checkpointed tasks can be restarted later should a
-recoverable error occur. This also allows the checkpointed tasks to be
-migrated between nodes in a cluster by copying the gathered information
-to another node and restarting the tasks there.
-
-Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
-and resuming tasks in userspace. Both of these signals are observable
-from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
-blocked, or ignored it can be seen by waiting or ptracing parent tasks.
-SIGCONT is especially unsuitable since it can be caught by the task. Any
-programs designed to watch for SIGSTOP and SIGCONT could be broken by
-attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
-demonstrate this problem using nested bash shells::
-
-	$ echo $$
-	16644
-	$ bash
-	$ echo $$
-	16690
-
-	From a second, unrelated bash shell:
-	$ kill -SIGSTOP 16690
-	$ kill -SIGCONT 16690
-
-	<at this point 16690 exits and causes 16644 to exit too>
-
-This happens because bash can observe both signals and choose how it
-responds to them.
-
-Another example of a program which catches and responds to these
-signals is gdb. In fact any program designed to use ptrace is likely to
-have a problem with this method of stopping and resuming tasks.
-
-In contrast, the cgroup freezer uses the kernel freezer code to
-prevent the freeze/unfreeze cycle from becoming visible to the tasks
-being frozen. This allows the bash example above and gdb to run as
-expected.
-
-The cgroup freezer is hierarchical. Freezing a cgroup freezes all
-tasks belonging to the cgroup and all its descendant cgroups. Each
-cgroup has its own state (self-state) and the state inherited from the
-parent (parent-state). Iff both states are THAWED, the cgroup is
-THAWED.
-
-The following cgroupfs files are created by cgroup freezer.
-
-* freezer.state: Read-write.
-
-  When read, returns the effective state of the cgroup - "THAWED",
-  "FREEZING" or "FROZEN". This is the combined self and parent-states.
-  If any is freezing, the cgroup is freezing (FREEZING or FROZEN).
-
-  FREEZING cgroup transitions into FROZEN state when all tasks
-  belonging to the cgroup and its descendants become frozen. Note that
-  a cgroup reverts to FREEZING from FROZEN after a new task is added
-  to the cgroup or one of its descendant cgroups until the new task is
-  frozen.
-
-  When written, sets the self-state of the cgroup. Two values are
-  allowed - "FROZEN" and "THAWED". If FROZEN is written, the cgroup,
-  if not already freezing, enters FREEZING state along with all its
-  descendant cgroups.
-
-  If THAWED is written, the self-state of the cgroup is changed to
-  THAWED.  Note that the effective state may not change to THAWED if
-  the parent-state is still freezing. If a cgroup's effective state
-  becomes THAWED, all its descendants which are freezing because of
-  the cgroup also leave the freezing state.
-
-* freezer.self_freezing: Read only.
-
-  Shows the self-state. 0 if the self-state is THAWED; otherwise, 1.
-  This value is 1 iff the last write to freezer.state was "FROZEN".
-
-* freezer.parent_freezing: Read only.
-
-  Shows the parent-state.  0 if none of the cgroup's ancestors is
-  frozen; otherwise, 1.
-
-The root cgroup is non-freezable and the above interface files don't
-exist.
-
-* Examples of usage::
-
-   # mkdir /sys/fs/cgroup/freezer
-   # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer
-   # mkdir /sys/fs/cgroup/freezer/0
-   # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks
-
-to get status of the freezer subsystem::
-
-   # cat /sys/fs/cgroup/freezer/0/freezer.state
-   THAWED
-
-to freeze all tasks in the container::
-
-   # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state
-   # cat /sys/fs/cgroup/freezer/0/freezer.state
-   FREEZING
-   # cat /sys/fs/cgroup/freezer/0/freezer.state
-   FROZEN
-
-to unfreeze all tasks in the container::
-
-   # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state
-   # cat /sys/fs/cgroup/freezer/0/freezer.state
-   THAWED
-
-This is the basic mechanism which should do the right thing for user space task
-in a simple scenario.
diff --git a/Documentation/cgroup-v1/hugetlb.rst b/Documentation/cgroup-v1/hugetlb.rst
deleted file mode 100644
index a3902aa253a9..000000000000
--- a/Documentation/cgroup-v1/hugetlb.rst
+++ /dev/null
@@ -1,50 +0,0 @@
-==================
-HugeTLB Controller
-==================
-
-The HugeTLB controller allows to limit the HugeTLB usage per control group and
-enforces the controller limit during page fault. Since HugeTLB doesn't
-support page reclaim, enforcing the limit at page fault time implies that,
-the application will get SIGBUS signal if it tries to access HugeTLB pages
-beyond its limit. This requires the application to know beforehand how much
-HugeTLB pages it would require for its use.
-
-HugeTLB controller can be created by first mounting the cgroup filesystem.
-
-# mount -t cgroup -o hugetlb none /sys/fs/cgroup
-
-With the above step, the initial or the parent HugeTLB group becomes
-visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
-the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
-
-New groups can be created under the parent group /sys/fs/cgroup::
-
-  # cd /sys/fs/cgroup
-  # mkdir g1
-  # echo $$ > g1/tasks
-
-The above steps create a new group g1 and move the current shell
-process (bash) into it.
-
-Brief summary of control files::
-
- hugetlb.<hugepagesize>.limit_in_bytes     # set/show limit of "hugepagesize" hugetlb usage
- hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb  usage recorded
- hugetlb.<hugepagesize>.usage_in_bytes     # show current usage for "hugepagesize" hugetlb
- hugetlb.<hugepagesize>.failcnt		   # show the number of allocation failure due to HugeTLB limit
-
-For a system supporting three hugepage sizes (64k, 32M and 1G), the control
-files include::
-
-  hugetlb.1GB.limit_in_bytes
-  hugetlb.1GB.max_usage_in_bytes
-  hugetlb.1GB.usage_in_bytes
-  hugetlb.1GB.failcnt
-  hugetlb.64KB.limit_in_bytes
-  hugetlb.64KB.max_usage_in_bytes
-  hugetlb.64KB.usage_in_bytes
-  hugetlb.64KB.failcnt
-  hugetlb.32MB.limit_in_bytes
-  hugetlb.32MB.max_usage_in_bytes
-  hugetlb.32MB.usage_in_bytes
-  hugetlb.32MB.failcnt
diff --git a/Documentation/cgroup-v1/index.rst b/Documentation/cgroup-v1/index.rst
deleted file mode 100644
index fe76d42edc11..000000000000
--- a/Documentation/cgroup-v1/index.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-:orphan:
-
-========================
-Control Groups version 1
-========================
-
-.. toctree::
-    :maxdepth: 1
-
-    cgroups
-
-    blkio-controller
-    cpuacct
-    cpusets
-    devices
-    freezer-subsystem
-    hugetlb
-    memcg_test
-    memory
-    net_cls
-    net_prio
-    pids
-    rdma
-
-.. only::  subproject and html
-
-   Indices
-   =======
-
-   * :ref:`genindex`
diff --git a/Documentation/cgroup-v1/memcg_test.rst b/Documentation/cgroup-v1/memcg_test.rst
deleted file mode 100644
index 91bd18c6a514..000000000000
--- a/Documentation/cgroup-v1/memcg_test.rst
+++ /dev/null
@@ -1,355 +0,0 @@
-=====================================================
-Memory Resource Controller(Memcg) Implementation Memo
-=====================================================
-
-Last Updated: 2010/2
-
-Base Kernel Version: based on 2.6.33-rc7-mm(candidate for 34).
-
-Because VM is getting complex (one of reasons is memcg...), memcg's behavior
-is complex. This is a document for memcg's internal behavior.
-Please note that implementation details can be changed.
-
-(*) Topics on API should be in Documentation/cgroup-v1/memory.rst)
-
-0. How to record usage ?
-========================
-
-   2 objects are used.
-
-   page_cgroup ....an object per page.
-
-	Allocated at boot or memory hotplug. Freed at memory hot removal.
-
-   swap_cgroup ... an entry per swp_entry.
-
-	Allocated at swapon(). Freed at swapoff().
-
-   The page_cgroup has USED bit and double count against a page_cgroup never
-   occurs. swap_cgroup is used only when a charged page is swapped-out.
-
-1. Charge
-=========
-
-   a page/swp_entry may be charged (usage += PAGE_SIZE) at
-
-	mem_cgroup_try_charge()
-
-2. Uncharge
-===========
-
-  a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
-
-	mem_cgroup_uncharge()
-	  Called when a page's refcount goes down to 0.
-
-	mem_cgroup_uncharge_swap()
-	  Called when swp_entry's refcnt goes down to 0. A charge against swap
-	  disappears.
-
-3. charge-commit-cancel
-=======================
-
-	Memcg pages are charged in two steps:
-
-		- mem_cgroup_try_charge()
-		- mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
-
-	At try_charge(), there are no flags to say "this page is charged".
-	at this point, usage += PAGE_SIZE.
-
-	At commit(), the page is associated with the memcg.
-
-	At cancel(), simply usage -= PAGE_SIZE.
-
-Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
-
-4. Anonymous
-============
-
-	Anonymous page is newly allocated at
-		  - page fault into MAP_ANONYMOUS mapping.
-		  - Copy-On-Write.
-
-	4.1 Swap-in.
-	At swap-in, the page is taken from swap-cache. There are 2 cases.
-
-	(a) If the SwapCache is newly allocated and read, it has no charges.
-	(b) If the SwapCache has been mapped by processes, it has been
-	    charged already.
-
-	4.2 Swap-out.
-	At swap-out, typical state transition is below.
-
-	(a) add to swap cache. (marked as SwapCache)
-	    swp_entry's refcnt += 1.
-	(b) fully unmapped.
-	    swp_entry's refcnt += # of ptes.
-	(c) write back to swap.
-	(d) delete from swap cache. (remove from SwapCache)
-	    swp_entry's refcnt -= 1.
-
-
-	Finally, at task exit,
-	(e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
-
-5. Page Cache
-=============
-
-	Page Cache is charged at
-	- add_to_page_cache_locked().
-
-	The logic is very clear. (About migration, see below)
-
-	Note:
-	  __remove_from_page_cache() is called by remove_from_page_cache()
-	  and __remove_mapping().
-
-6. Shmem(tmpfs) Page Cache
-===========================
-
-	The best way to understand shmem's page state transition is to read
-	mm/shmem.c.
-
-	But brief explanation of the behavior of memcg around shmem will be
-	helpful to understand the logic.
-
-	Shmem's page (just leaf page, not direct/indirect block) can be on
-
-		- radix-tree of shmem's inode.
-		- SwapCache.
-		- Both on radix-tree and SwapCache. This happens at swap-in
-		  and swap-out,
-
-	It's charged when...
-
-	- A new page is added to shmem's radix-tree.
-	- A swp page is read. (move a charge from swap_cgroup to page_cgroup)
-
-7. Page Migration
-=================
-
-	mem_cgroup_migrate()
-
-8. LRU
-======
-        Each memcg has its own private LRU. Now, its handling is under global
-	VM's control (means that it's handled under global pgdat->lru_lock).
-	Almost all routines around memcg's LRU is called by global LRU's
-	list management functions under pgdat->lru_lock.
-
-	A special function is mem_cgroup_isolate_pages(). This scans
-	memcg's private LRU and call __isolate_lru_page() to extract a page
-	from LRU.
-
-	(By __isolate_lru_page(), the page is removed from both of global and
-	private LRU.)
-
-
-9. Typical Tests.
-=================
-
- Tests for racy cases.
-
-9.1 Small limit to memcg.
--------------------------
-
-	When you do test to do racy case, it's good test to set memcg's limit
-	to be very small rather than GB. Many races found in the test under
-	xKB or xxMB limits.
-
-	(Memory behavior under GB and Memory behavior under MB shows very
-	different situation.)
-
-9.2 Shmem
----------
-
-	Historically, memcg's shmem handling was poor and we saw some amount
-	of troubles here. This is because shmem is page-cache but can be
-	SwapCache. Test with shmem/tmpfs is always good test.
-
-9.3 Migration
--------------
-
-	For NUMA, migration is an another special case. To do easy test, cpuset
-	is useful. Following is a sample script to do migration::
-
-		mount -t cgroup -o cpuset none /opt/cpuset
-
-		mkdir /opt/cpuset/01
-		echo 1 > /opt/cpuset/01/cpuset.cpus
-		echo 0 > /opt/cpuset/01/cpuset.mems
-		echo 1 > /opt/cpuset/01/cpuset.memory_migrate
-		mkdir /opt/cpuset/02
-		echo 1 > /opt/cpuset/02/cpuset.cpus
-		echo 1 > /opt/cpuset/02/cpuset.mems
-		echo 1 > /opt/cpuset/02/cpuset.memory_migrate
-
-	In above set, when you moves a task from 01 to 02, page migration to
-	node 0 to node 1 will occur. Following is a script to migrate all
-	under cpuset.::
-
-		--
-		move_task()
-		{
-		for pid in $1
-		do
-			/bin/echo $pid >$2/tasks 2>/dev/null
-			echo -n $pid
-			echo -n " "
-		done
-		echo END
-		}
-
-		G1_TASK=`cat ${G1}/tasks`
-		G2_TASK=`cat ${G2}/tasks`
-		move_task "${G1_TASK}" ${G2} &
-		--
-
-9.4 Memory hotplug
-------------------
-
-	memory hotplug test is one of good test.
-
-	to offline memory, do following::
-
-		# echo offline > /sys/devices/system/memory/memoryXXX/state
-
-	(XXX is the place of memory)
-
-	This is an easy way to test page migration, too.
-
-9.5 mkdir/rmdir
----------------
-
-	When using hierarchy, mkdir/rmdir test should be done.
-	Use tests like the following::
-
-		echo 1 >/opt/cgroup/01/memory/use_hierarchy
-		mkdir /opt/cgroup/01/child_a
-		mkdir /opt/cgroup/01/child_b
-
-		set limit to 01.
-		add limit to 01/child_b
-		run jobs under child_a and child_b
-
-	create/delete following groups at random while jobs are running::
-
-		/opt/cgroup/01/child_a/child_aa
-		/opt/cgroup/01/child_b/child_bb
-		/opt/cgroup/01/child_c
-
-	running new jobs in new group is also good.
-
-9.6 Mount with other subsystems
--------------------------------
-
-	Mounting with other subsystems is a good test because there is a
-	race and lock dependency with other cgroup subsystems.
-
-	example::
-
-		# mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices
-
-	and do task move, mkdir, rmdir etc...under this.
-
-9.7 swapoff
------------
-
-	Besides management of swap is one of complicated parts of memcg,
-	call path of swap-in at swapoff is not same as usual swap-in path..
-	It's worth to be tested explicitly.
-
-	For example, test like following is good:
-
-	(Shell-A)::
-
-		# mount -t cgroup none /cgroup -o memory
-		# mkdir /cgroup/test
-		# echo 40M > /cgroup/test/memory.limit_in_bytes
-		# echo 0 > /cgroup/test/tasks
-
-	Run malloc(100M) program under this. You'll see 60M of swaps.
-
-	(Shell-B)::
-
-		# move all tasks in /cgroup/test to /cgroup
-		# /sbin/swapoff -a
-		# rmdir /cgroup/test
-		# kill malloc task.
-
-	Of course, tmpfs v.s. swapoff test should be tested, too.
-
-9.8 OOM-Killer
---------------
-
-	Out-of-memory caused by memcg's limit will kill tasks under
-	the memcg. When hierarchy is used, a task under hierarchy
-	will be killed by the kernel.
-
-	In this case, panic_on_oom shouldn't be invoked and tasks
-	in other groups shouldn't be killed.
-
-	It's not difficult to cause OOM under memcg as following.
-
-	Case A) when you can swapoff::
-
-		#swapoff -a
-		#echo 50M > /memory.limit_in_bytes
-
-	run 51M of malloc
-
-	Case B) when you use mem+swap limitation::
-
-		#echo 50M > memory.limit_in_bytes
-		#echo 50M > memory.memsw.limit_in_bytes
-
-	run 51M of malloc
-
-9.9 Move charges at task migration
-----------------------------------
-
-	Charges associated with a task can be moved along with task migration.
-
-	(Shell-A)::
-
-		#mkdir /cgroup/A
-		#echo $$ >/cgroup/A/tasks
-
-	run some programs which uses some amount of memory in /cgroup/A.
-
-	(Shell-B)::
-
-		#mkdir /cgroup/B
-		#echo 1 >/cgroup/B/memory.move_charge_at_immigrate
-		#echo "pid of the program running in group A" >/cgroup/B/tasks
-
-	You can see charges have been moved by reading ``*.usage_in_bytes`` or
-	memory.stat of both A and B.
-
-	See 8.2 of Documentation/cgroup-v1/memory.rst to see what value should
-	be written to move_charge_at_immigrate.
-
-9.10 Memory thresholds
-----------------------
-
-	Memory controller implements memory thresholds using cgroups notification
-	API. You can use tools/cgroup/cgroup_event_listener.c to test it.
-
-	(Shell-A) Create cgroup and run event listener::
-
-		# mkdir /cgroup/A
-		# ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M
-
-	(Shell-B) Add task to cgroup and try to allocate and free memory::
-
-		# echo $$ >/cgroup/A/tasks
-		# a="$(dd if=/dev/zero bs=1M count=10)"
-		# a=
-
-	You will see message from cgroup_event_listener every time you cross
-	the thresholds.
-
-	Use /cgroup/A/memory.memsw.usage_in_bytes to test memsw thresholds.
-
-	It's good idea to test root cgroup as well.
diff --git a/Documentation/cgroup-v1/memory.rst b/Documentation/cgroup-v1/memory.rst
deleted file mode 100644
index 41bdc038dad9..000000000000
--- a/Documentation/cgroup-v1/memory.rst
+++ /dev/null
@@ -1,1003 +0,0 @@
-==========================
-Memory Resource Controller
-==========================
-
-NOTE:
-      This document is hopelessly outdated and it asks for a complete
-      rewrite. It still contains a useful information so we are keeping it
-      here but make sure to check the current code if you need a deeper
-      understanding.
-
-NOTE:
-      The Memory Resource Controller has generically been referred to as the
-      memory controller in this document. Do not confuse memory controller
-      used here with the memory controller that is used in hardware.
-
-(For editors) In this document:
-      When we mention a cgroup (cgroupfs's directory) with memory controller,
-      we call it "memory cgroup". When you see git-log and source code, you'll
-      see patch's title and function names tend to use "memcg".
-      In this document, we avoid using it.
-
-Benefits and Purpose of the memory controller
-=============================================
-
-The memory controller isolates the memory behaviour of a group of tasks
-from the rest of the system. The article on LWN [12] mentions some probable
-uses of the memory controller. The memory controller can be used to
-
-a. Isolate an application or a group of applications
-   Memory-hungry applications can be isolated and limited to a smaller
-   amount of memory.
-b. Create a cgroup with a limited amount of memory; this can be used
-   as a good alternative to booting with mem=XXXX.
-c. Virtualization solutions can control the amount of memory they want
-   to assign to a virtual machine instance.
-d. A CD/DVD burner could control the amount of memory used by the
-   rest of the system to ensure that burning does not fail due to lack
-   of available memory.
-e. There are several other use cases; find one or use the controller just
-   for fun (to learn and hack on the VM subsystem).
-
-Current Status: linux-2.6.34-mmotm(development version of 2010/April)
-
-Features:
-
- - accounting anonymous pages, file caches, swap caches usage and limiting them.
- - pages are linked to per-memcg LRU exclusively, and there is no global LRU.
- - optionally, memory+swap usage can be accounted and limited.
- - hierarchical accounting
- - soft limit
- - moving (recharging) account at moving a task is selectable.
- - usage threshold notifier
- - memory pressure notifier
- - oom-killer disable knob and oom-notifier
- - Root cgroup has no limit controls.
-
- Kernel memory support is a work in progress, and the current version provides
- basically functionality. (See Section 2.7)
-
-Brief summary of control files.
-
-==================================== ==========================================
- tasks				     attach a task(thread) and show list of
-				     threads
- cgroup.procs			     show list of processes
- cgroup.event_control		     an interface for event_fd()
- memory.usage_in_bytes		     show current usage for memory
-				     (See 5.5 for details)
- memory.memsw.usage_in_bytes	     show current usage for memory+Swap
-				     (See 5.5 for details)
- memory.limit_in_bytes		     set/show limit of memory usage
- memory.memsw.limit_in_bytes	     set/show limit of memory+Swap usage
- memory.failcnt			     show the number of memory usage hits limits
- memory.memsw.failcnt		     show the number of memory+Swap hits limits
- memory.max_usage_in_bytes	     show max memory usage recorded
- memory.memsw.max_usage_in_bytes     show max memory+Swap usage recorded
- memory.soft_limit_in_bytes	     set/show soft limit of memory usage
- memory.stat			     show various statistics
- memory.use_hierarchy		     set/show hierarchical account enabled
- memory.force_empty		     trigger forced page reclaim
- memory.pressure_level		     set memory pressure notifications
- memory.swappiness		     set/show swappiness parameter of vmscan
-				     (See sysctl's vm.swappiness)
- memory.move_charge_at_immigrate     set/show controls of moving charges
- memory.oom_control		     set/show oom controls.
- memory.numa_stat		     show the number of memory usage per numa
-				     node
-
- memory.kmem.limit_in_bytes          set/show hard limit for kernel memory
- memory.kmem.usage_in_bytes          show current kernel memory allocation
- memory.kmem.failcnt                 show the number of kernel memory usage
-				     hits limits
- memory.kmem.max_usage_in_bytes      show max kernel memory usage recorded
-
- memory.kmem.tcp.limit_in_bytes      set/show hard limit for tcp buf memory
- memory.kmem.tcp.usage_in_bytes      show current tcp buf memory allocation
- memory.kmem.tcp.failcnt             show the number of tcp buf memory usage
-				     hits limits
- memory.kmem.tcp.max_usage_in_bytes  show max tcp buf memory usage recorded
-==================================== ==========================================
-
-1. History
-==========
-
-The memory controller has a long history. A request for comments for the memory
-controller was posted by Balbir Singh [1]. At the time the RFC was posted
-there were several implementations for memory control. The goal of the
-RFC was to build consensus and agreement for the minimal features required
-for memory control. The first RSS controller was posted by Balbir Singh[2]
-in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the
-RSS controller. At OLS, at the resource management BoF, everyone suggested
-that we handle both page cache and RSS together. Another request was raised
-to allow user space handling of OOM. The current memory controller is
-at version 6; it combines both mapped (RSS) and unmapped Page
-Cache Control [11].
-
-2. Memory Control
-=================
-
-Memory is a unique resource in the sense that it is present in a limited
-amount. If a task requires a lot of CPU processing, the task can spread
-its processing over a period of hours, days, months or years, but with
-memory, the same physical memory needs to be reused to accomplish the task.
-
-The memory controller implementation has been divided into phases. These
-are:
-
-1. Memory controller
-2. mlock(2) controller
-3. Kernel user memory accounting and slab control
-4. user mappings length controller
-
-The memory controller is the first controller developed.
-
-2.1. Design
------------
-
-The core of the design is a counter called the page_counter. The
-page_counter tracks the current memory usage and limit of the group of
-processes associated with the controller. Each cgroup has a memory controller
-specific data structure (mem_cgroup) associated with it.
-
-2.2. Accounting
----------------
-
-::
-
-		+--------------------+
-		|  mem_cgroup        |
-		|  (page_counter)    |
-		+--------------------+
-		 /            ^      \
-		/             |       \
-           +---------------+  |        +---------------+
-           | mm_struct     |  |....    | mm_struct     |
-           |               |  |        |               |
-           +---------------+  |        +---------------+
-                              |
-                              + --------------+
-                                              |
-           +---------------+           +------+--------+
-           | page          +---------->  page_cgroup|
-           |               |           |               |
-           +---------------+           +---------------+
-
-             (Figure 1: Hierarchy of Accounting)
-
-
-Figure 1 shows the important aspects of the controller
-
-1. Accounting happens per cgroup
-2. Each mm_struct knows about which cgroup it belongs to
-3. Each page has a pointer to the page_cgroup, which in turn knows the
-   cgroup it belongs to
-
-The accounting is done as follows: mem_cgroup_charge_common() is invoked to
-set up the necessary data structures and check if the cgroup that is being
-charged is over its limit. If it is, then reclaim is invoked on the cgroup.
-More details can be found in the reclaim section of this document.
-If everything goes well, a page meta-data-structure called page_cgroup is
-updated. page_cgroup has its own LRU on cgroup.
-(*) page_cgroup structure is allocated at boot/memory-hotplug time.
-
-2.2.1 Accounting details
-------------------------
-
-All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
-Some pages which are never reclaimable and will not be on the LRU
-are not accounted. We just account pages under usual VM management.
-
-RSS pages are accounted at page_fault unless they've already been accounted
-for earlier. A file page will be accounted for as Page Cache when it's
-inserted into inode (radix-tree). While it's mapped into the page tables of
-processes, duplicate accounting is carefully avoided.
-
-An RSS page is unaccounted when it's fully unmapped. A PageCache page is
-unaccounted when it's removed from radix-tree. Even if RSS pages are fully
-unmapped (by kswapd), they may exist as SwapCache in the system until they
-are really freed. Such SwapCaches are also accounted.
-A swapped-in page is not accounted until it's mapped.
-
-Note: The kernel does swapin-readahead and reads multiple swaps at once.
-This means swapped-in pages may contain pages for other tasks than a task
-causing page fault. So, we avoid accounting at swap-in I/O.
-
-At page migration, accounting information is kept.
-
-Note: we just account pages-on-LRU because our purpose is to control amount
-of used pages; not-on-LRU pages tend to be out-of-control from VM view.
-
-2.3 Shared Page Accounting
---------------------------
-
-Shared pages are accounted on the basis of the first touch approach. The
-cgroup that first touches a page is accounted for the page. The principle
-behind this approach is that a cgroup that aggressively uses a shared
-page will eventually get charged for it (once it is uncharged from
-the cgroup that brought it in -- this will happen on memory pressure).
-
-But see section 8.2: when moving a task to another cgroup, its pages may
-be recharged to the new cgroup, if move_charge_at_immigrate has been chosen.
-
-Exception: If CONFIG_MEMCG_SWAP is not used.
-When you do swapoff and make swapped-out pages of shmem(tmpfs) to
-be backed into memory in force, charges for pages are accounted against the
-caller of swapoff rather than the users of shmem.
-
-2.4 Swap Extension (CONFIG_MEMCG_SWAP)
---------------------------------------
-
-Swap Extension allows you to record charge for swap. A swapped-in page is
-charged back to original page allocator if possible.
-
-When swap is accounted, following files are added.
-
- - memory.memsw.usage_in_bytes.
- - memory.memsw.limit_in_bytes.
-
-memsw means memory+swap. Usage of memory+swap is limited by
-memsw.limit_in_bytes.
-
-Example: Assume a system with 4G of swap. A task which allocates 6G of memory
-(by mistake) under 2G memory limitation will use all swap.
-In this case, setting memsw.limit_in_bytes=3G will prevent bad use of swap.
-By using the memsw limit, you can avoid system OOM which can be caused by swap
-shortage.
-
-**why 'memory+swap' rather than swap**
-
-The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
-to move account from memory to swap...there is no change in usage of
-memory+swap. In other words, when we want to limit the usage of swap without
-affecting global LRU, memory+swap limit is better than just limiting swap from
-an OS point of view.
-
-**What happens when a cgroup hits memory.memsw.limit_in_bytes**
-
-When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out
-in this cgroup. Then, swap-out will not be done by cgroup routine and file
-caches are dropped. But as mentioned above, global LRU can do swapout memory
-from it for sanity of the system's memory management state. You can't forbid
-it by cgroup.
-
-2.5 Reclaim
------------
-
-Each cgroup maintains a per cgroup LRU which has the same structure as
-global VM. When a cgroup goes over its limit, we first try
-to reclaim memory from the cgroup so as to make space for the new
-pages that the cgroup has touched. If the reclaim is unsuccessful,
-an OOM routine is invoked to select and kill the bulkiest task in the
-cgroup. (See 10. OOM Control below.)
-
-The reclaim algorithm has not been modified for cgroups, except that
-pages that are selected for reclaiming come from the per-cgroup LRU
-list.
-
-NOTE:
-  Reclaim does not work for the root cgroup, since we cannot set any
-  limits on the root cgroup.
-
-Note2:
-  When panic_on_oom is set to "2", the whole system will panic.
-
-When oom event notifier is registered, event will be delivered.
-(See oom_control section)
-
-2.6 Locking
------------
-
-   lock_page_cgroup()/unlock_page_cgroup() should not be called under
-   the i_pages lock.
-
-   Other lock order is following:
-
-   PG_locked.
-     mm->page_table_lock
-         pgdat->lru_lock
-	   lock_page_cgroup.
-
-  In many cases, just lock_page_cgroup() is called.
-
-  per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
-  pgdat->lru_lock, it has no lock of its own.
-
-2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
------------------------------------------------
-
-With the Kernel memory extension, the Memory Controller is able to limit
-the amount of kernel memory used by the system. Kernel memory is fundamentally
-different than user memory, since it can't be swapped out, which makes it
-possible to DoS the system by consuming too much of this precious resource.
-
-Kernel memory accounting is enabled for all memory cgroups by default. But
-it can be disabled system-wide by passing cgroup.memory=nokmem to the kernel
-at boot time. In this case, kernel memory will not be accounted at all.
-
-Kernel memory limits are not imposed for the root cgroup. Usage for the root
-cgroup may or may not be accounted. The memory used is accumulated into
-memory.kmem.usage_in_bytes, or in a separate counter when it makes sense.
-(currently only for tcp).
-
-The main "kmem" counter is fed into the main counter, so kmem charges will
-also be visible from the user counter.
-
-Currently no soft limit is implemented for kernel memory. It is future work
-to trigger slab reclaim when those limits are reached.
-
-2.7.1 Current Kernel Memory resources accounted
------------------------------------------------
-
-stack pages:
-  every process consumes some stack pages. By accounting into
-  kernel memory, we prevent new processes from being created when the kernel
-  memory usage is too high.
-
-slab pages:
-  pages allocated by the SLAB or SLUB allocator are tracked. A copy
-  of each kmem_cache is created every time the cache is touched by the first time
-  from inside the memcg. The creation is done lazily, so some objects can still be
-  skipped while the cache is being created. All objects in a slab page should
-  belong to the same memcg. This only fails to hold when a task is migrated to a
-  different memcg during the page allocation by the cache.
-
-sockets memory pressure:
-  some sockets protocols have memory pressure
-  thresholds. The Memory Controller allows them to be controlled individually
-  per cgroup, instead of globally.
-
-tcp memory pressure:
-  sockets memory pressure for the tcp protocol.
-
-2.7.2 Common use cases
-----------------------
-
-Because the "kmem" counter is fed to the main user counter, kernel memory can
-never be limited completely independently of user memory. Say "U" is the user
-limit, and "K" the kernel limit. There are three possible ways limits can be
-set:
-
-U != 0, K = unlimited:
-    This is the standard memcg limitation mechanism already present before kmem
-    accounting. Kernel memory is completely ignored.
-
-U != 0, K < U:
-    Kernel memory is a subset of the user memory. This setup is useful in
-    deployments where the total amount of memory per-cgroup is overcommited.
-    Overcommiting kernel memory limits is definitely not recommended, since the
-    box can still run out of non-reclaimable memory.
-    In this case, the admin could set up K so that the sum of all groups is
-    never greater than the total memory, and freely set U at the cost of his
-    QoS.
-
-WARNING:
-    In the current implementation, memory reclaim will NOT be
-    triggered for a cgroup when it hits K while staying below U, which makes
-    this setup impractical.
-
-U != 0, K >= U:
-    Since kmem charges will also be fed to the user counter and reclaim will be
-    triggered for the cgroup for both kinds of memory. This setup gives the
-    admin a unified view of memory, and it is also useful for people who just
-    want to track kernel memory usage.
-
-3. User Interface
-=================
-
-3.0. Configuration
-------------------
-
-a. Enable CONFIG_CGROUPS
-b. Enable CONFIG_MEMCG
-c. Enable CONFIG_MEMCG_SWAP (to use swap extension)
-d. Enable CONFIG_MEMCG_KMEM (to use kmem extension)
-
-3.1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
--------------------------------------------------------------------
-
-::
-
-	# mount -t tmpfs none /sys/fs/cgroup
-	# mkdir /sys/fs/cgroup/memory
-	# mount -t cgroup none /sys/fs/cgroup/memory -o memory
-
-3.2. Make the new group and move bash into it::
-
-	# mkdir /sys/fs/cgroup/memory/0
-	# echo $$ > /sys/fs/cgroup/memory/0/tasks
-
-Since now we're in the 0 cgroup, we can alter the memory limit::
-
-	# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes
-
-NOTE:
-  We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
-  mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes,
-  Gibibytes.)
-
-NOTE:
-  We can write "-1" to reset the ``*.limit_in_bytes(unlimited)``.
-
-NOTE:
-  We cannot set limits on the root cgroup any more.
-
-::
-
-  # cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
-  4194304
-
-We can check the usage::
-
-  # cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes
-  1216512
-
-A successful write to this file does not guarantee a successful setting of
-this limit to the value written into the file. This can be due to a
-number of factors, such as rounding up to page boundaries or the total
-availability of memory on the system. The user is required to re-read
-this file after a write to guarantee the value committed by the kernel::
-
-  # echo 1 > memory.limit_in_bytes
-  # cat memory.limit_in_bytes
-  4096
-
-The memory.failcnt field gives the number of times that the cgroup limit was
-exceeded.
-
-The memory.stat file gives accounting information. Now, the number of
-caches, RSS and Active pages/Inactive pages are shown.
-
-4. Testing
-==========
-
-For testing features and implementation, see memcg_test.txt.
-
-Performance test is also important. To see pure memory controller's overhead,
-testing on tmpfs will give you good numbers of small overheads.
-Example: do kernel make on tmpfs.
-
-Page-fault scalability is also important. At measuring parallel
-page fault test, multi-process test may be better than multi-thread
-test because it has noise of shared objects/status.
-
-But the above two are testing extreme situations.
-Trying usual test under memory controller is always helpful.
-
-4.1 Troubleshooting
--------------------
-
-Sometimes a user might find that the application under a cgroup is
-terminated by the OOM killer. There are several causes for this:
-
-1. The cgroup limit is too low (just too low to do anything useful)
-2. The user is using anonymous memory and swap is turned off or too low
-
-A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of
-some of the pages cached in the cgroup (page cache pages).
-
-To know what happens, disabling OOM_Kill as per "10. OOM Control" (below) and
-seeing what happens will be helpful.
-
-4.2 Task migration
-------------------
-
-When a task migrates from one cgroup to another, its charge is not
-carried forward by default. The pages allocated from the original cgroup still
-remain charged to it, the charge is dropped when the page is freed or
-reclaimed.
-
-You can move charges of a task along with task migration.
-See 8. "Move charges at task migration"
-
-4.3 Removing a cgroup
----------------------
-
-A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
-cgroup might have some charge associated with it, even though all
-tasks have migrated away from it. (because we charge against pages, not
-against tasks.)
-
-We move the stats to root (if use_hierarchy==0) or parent (if
-use_hierarchy==1), and no change on the charge except uncharging
-from the child.
-
-Charges recorded in swap information is not updated at removal of cgroup.
-Recorded information is discarded and a cgroup which uses swap (swapcache)
-will be charged as a new owner of it.
-
-About use_hierarchy, see Section 6.
-
-5. Misc. interfaces
-===================
-
-5.1 force_empty
----------------
-  memory.force_empty interface is provided to make cgroup's memory usage empty.
-  When writing anything to this::
-
-    # echo 0 > memory.force_empty
-
-  the cgroup will be reclaimed and as many pages reclaimed as possible.
-
-  The typical use case for this interface is before calling rmdir().
-  Though rmdir() offlines memcg, but the memcg may still stay there due to
-  charged file caches. Some out-of-use page caches may keep charged until
-  memory pressure happens. If you want to avoid that, force_empty will be useful.
-
-  Also, note that when memory.kmem.limit_in_bytes is set the charges due to
-  kernel pages will still be seen. This is not considered a failure and the
-  write will still return success. In this case, it is expected that
-  memory.kmem.usage_in_bytes == memory.usage_in_bytes.
-
-  About use_hierarchy, see Section 6.
-
-5.2 stat file
--------------
-
-memory.stat file includes following statistics
-
-per-memory cgroup local status
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-=============== ===============================================================
-cache		# of bytes of page cache memory.
-rss		# of bytes of anonymous and swap cache memory (includes
-		transparent hugepages).
-rss_huge	# of bytes of anonymous transparent hugepages.
-mapped_file	# of bytes of mapped file (includes tmpfs/shmem)
-pgpgin		# of charging events to the memory cgroup. The charging
-		event happens each time a page is accounted as either mapped
-		anon page(RSS) or cache page(Page Cache) to the cgroup.
-pgpgout		# of uncharging events to the memory cgroup. The uncharging
-		event happens each time a page is unaccounted from the cgroup.
-swap		# of bytes of swap usage
-dirty		# of bytes that are waiting to get written back to the disk.
-writeback	# of bytes of file/anon cache that are queued for syncing to
-		disk.
-inactive_anon	# of bytes of anonymous and swap cache memory on inactive
-		LRU list.
-active_anon	# of bytes of anonymous and swap cache memory on active
-		LRU list.
-inactive_file	# of bytes of file-backed memory on inactive LRU list.
-active_file	# of bytes of file-backed memory on active LRU list.
-unevictable	# of bytes of memory that cannot be reclaimed (mlocked etc).
-=============== ===============================================================
-
-status considering hierarchy (see memory.use_hierarchy settings)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-========================= ===================================================
-hierarchical_memory_limit # of bytes of memory limit with regard to hierarchy
-			  under which the memory cgroup is
-hierarchical_memsw_limit  # of bytes of memory+swap limit with regard to
-			  hierarchy under which memory cgroup is.
-
-total_<counter>		  # hierarchical version of <counter>, which in
-			  addition to the cgroup's own value includes the
-			  sum of all hierarchical children's values of
-			  <counter>, i.e. total_cache
-========================= ===================================================
-
-The following additional stats are dependent on CONFIG_DEBUG_VM
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-========================= ========================================
-recent_rotated_anon	  VM internal parameter. (see mm/vmscan.c)
-recent_rotated_file	  VM internal parameter. (see mm/vmscan.c)
-recent_scanned_anon	  VM internal parameter. (see mm/vmscan.c)
-recent_scanned_file	  VM internal parameter. (see mm/vmscan.c)
-========================= ========================================
-
-Memo:
-	recent_rotated means recent frequency of LRU rotation.
-	recent_scanned means recent # of scans to LRU.
-	showing for better debug please see the code for meanings.
-
-Note:
-	Only anonymous and swap cache memory is listed as part of 'rss' stat.
-	This should not be confused with the true 'resident set size' or the
-	amount of physical memory used by the cgroup.
-
-	'rss + mapped_file" will give you resident set size of cgroup.
-
-	(Note: file and shmem may be shared among other cgroups. In that case,
-	mapped_file is accounted only when the memory cgroup is owner of page
-	cache.)
-
-5.3 swappiness
---------------
-
-Overrides /proc/sys/vm/swappiness for the particular group. The tunable
-in the root cgroup corresponds to the global swappiness setting.
-
-Please note that unlike during the global reclaim, limit reclaim
-enforces that 0 swappiness really prevents from any swapping even if
-there is a swap storage available. This might lead to memcg OOM killer
-if there are no file pages to reclaim.
-
-5.4 failcnt
------------
-
-A memory cgroup provides memory.failcnt and memory.memsw.failcnt files.
-This failcnt(== failure count) shows the number of times that a usage counter
-hit its limit. When a memory cgroup hits a limit, failcnt increases and
-memory under it will be reclaimed.
-
-You can reset failcnt by writing 0 to failcnt file::
-
-	# echo 0 > .../memory.failcnt
-
-5.5 usage_in_bytes
-------------------
-
-For efficiency, as other kernel components, memory cgroup uses some optimization
-to avoid unnecessary cacheline false sharing. usage_in_bytes is affected by the
-method and doesn't show 'exact' value of memory (and swap) usage, it's a fuzz
-value for efficient access. (Of course, when necessary, it's synchronized.)
-If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP)
-value in memory.stat(see 5.2).
-
-5.6 numa_stat
--------------
-
-This is similar to numa_maps but operates on a per-memcg basis.  This is
-useful for providing visibility into the numa locality information within
-an memcg since the pages are allowed to be allocated from any physical
-node.  One of the use cases is evaluating application performance by
-combining this information with the application's CPU allocation.
-
-Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable"
-per-node page counts including "hierarchical_<counter>" which sums up all
-hierarchical children's values in addition to the memcg's own value.
-
-The output format of memory.numa_stat is::
-
-  total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
-  file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
-  anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
-  unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
-  hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
-
-The "total" count is sum of file + anon + unevictable.
-
-6. Hierarchy support
-====================
-
-The memory controller supports a deep hierarchy and hierarchical accounting.
-The hierarchy is created by creating the appropriate cgroups in the
-cgroup filesystem. Consider for example, the following cgroup filesystem
-hierarchy::
-
-	       root
-	     /  |   \
-            /	|    \
-	   a	b     c
-		      | \
-		      |  \
-		      d   e
-
-In the diagram above, with hierarchical accounting enabled, all memory
-usage of e, is accounted to its ancestors up until the root (i.e, c and root),
-that has memory.use_hierarchy enabled. If one of the ancestors goes over its
-limit, the reclaim algorithm reclaims from the tasks in the ancestor and the
-children of the ancestor.
-
-6.1 Enabling hierarchical accounting and reclaim
-------------------------------------------------
-
-A memory cgroup by default disables the hierarchy feature. Support
-can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup::
-
-	# echo 1 > memory.use_hierarchy
-
-The feature can be disabled by::
-
-	# echo 0 > memory.use_hierarchy
-
-NOTE1:
-       Enabling/disabling will fail if either the cgroup already has other
-       cgroups created below it, or if the parent cgroup has use_hierarchy
-       enabled.
-
-NOTE2:
-       When panic_on_oom is set to "2", the whole system will panic in
-       case of an OOM event in any cgroup.
-
-7. Soft limits
-==============
-
-Soft limits allow for greater sharing of memory. The idea behind soft limits
-is to allow control groups to use as much of the memory as needed, provided
-
-a. There is no memory contention
-b. They do not exceed their hard limit
-
-When the system detects memory contention or low memory, control groups
-are pushed back to their soft limits. If the soft limit of each control
-group is very high, they are pushed back as much as possible to make
-sure that one control group does not starve the others of memory.
-
-Please note that soft limits is a best-effort feature; it comes with
-no guarantees, but it does its best to make sure that when memory is
-heavily contended for, memory is allocated based on the soft limit
-hints/setup. Currently soft limit based reclaim is set up such that
-it gets invoked from balance_pgdat (kswapd).
-
-7.1 Interface
--------------
-
-Soft limits can be setup by using the following commands (in this example we
-assume a soft limit of 256 MiB)::
-
-	# echo 256M > memory.soft_limit_in_bytes
-
-If we want to change this to 1G, we can at any time use::
-
-	# echo 1G > memory.soft_limit_in_bytes
-
-NOTE1:
-       Soft limits take effect over a long period of time, since they involve
-       reclaiming memory for balancing between memory cgroups
-NOTE2:
-       It is recommended to set the soft limit always below the hard limit,
-       otherwise the hard limit will take precedence.
-
-8. Move charges at task migration
-=================================
-
-Users can move charges associated with a task along with task migration, that
-is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
-This feature is not supported in !CONFIG_MMU environments because of lack of
-page tables.
-
-8.1 Interface
--------------
-
-This feature is disabled by default. It can be enabled (and disabled again) by
-writing to memory.move_charge_at_immigrate of the destination cgroup.
-
-If you want to enable it::
-
-	# echo (some positive value) > memory.move_charge_at_immigrate
-
-Note:
-      Each bits of move_charge_at_immigrate has its own meaning about what type
-      of charges should be moved. See 8.2 for details.
-Note:
-      Charges are moved only when you move mm->owner, in other words,
-      a leader of a thread group.
-Note:
-      If we cannot find enough space for the task in the destination cgroup, we
-      try to make space by reclaiming memory. Task migration may fail if we
-      cannot make enough space.
-Note:
-      It can take several seconds if you move charges much.
-
-And if you want disable it again::
-
-	# echo 0 > memory.move_charge_at_immigrate
-
-8.2 Type of charges which can be moved
---------------------------------------
-
-Each bit in move_charge_at_immigrate has its own meaning about what type of
-charges should be moved. But in any case, it must be noted that an account of
-a page or a swap can be moved only when it is charged to the task's current
-(old) memory cgroup.
-
-+---+--------------------------------------------------------------------------+
-|bit| what type of charges would be moved ?                                    |
-+===+==========================================================================+
-| 0 | A charge of an anonymous page (or swap of it) used by the target task.   |
-|   | You must enable Swap Extension (see 2.4) to enable move of swap charges. |
-+---+--------------------------------------------------------------------------+
-| 1 | A charge of file pages (normal file, tmpfs file (e.g. ipc shared memory) |
-|   | and swaps of tmpfs file) mmapped by the target task. Unlike the case of  |
-|   | anonymous pages, file pages (and swaps) in the range mmapped by the task |
-|   | will be moved even if the task hasn't done page fault, i.e. they might   |
-|   | not be the task's "RSS", but other task's "RSS" that maps the same file. |
-|   | And mapcount of the page is ignored (the page can be moved even if       |
-|   | page_mapcount(page) > 1). You must enable Swap Extension (see 2.4) to    |
-|   | enable move of swap charges.                                             |
-+---+--------------------------------------------------------------------------+
-
-8.3 TODO
---------
-
-- All of moving charge operations are done under cgroup_mutex. It's not good
-  behavior to hold the mutex too long, so we may need some trick.
-
-9. Memory thresholds
-====================
-
-Memory cgroup implements memory thresholds using the cgroups notification
-API (see cgroups.txt). It allows to register multiple memory and memsw
-thresholds and gets notifications when it crosses.
-
-To register a threshold, an application must:
-
-- create an eventfd using eventfd(2);
-- open memory.usage_in_bytes or memory.memsw.usage_in_bytes;
-- write string like "<event_fd> <fd of memory.usage_in_bytes> <threshold>" to
-  cgroup.event_control.
-
-Application will be notified through eventfd when memory usage crosses
-threshold in any direction.
-
-It's applicable for root and non-root cgroup.
-
-10. OOM Control
-===============
-
-memory.oom_control file is for OOM notification and other controls.
-
-Memory cgroup implements OOM notifier using the cgroup notification
-API (See cgroups.txt). It allows to register multiple OOM notification
-delivery and gets notification when OOM happens.
-
-To register a notifier, an application must:
-
- - create an eventfd using eventfd(2)
- - open memory.oom_control file
- - write string like "<event_fd> <fd of memory.oom_control>" to
-   cgroup.event_control
-
-The application will be notified through eventfd when OOM happens.
-OOM notification doesn't work for the root cgroup.
-
-You can disable the OOM-killer by writing "1" to memory.oom_control file, as:
-
-	#echo 1 > memory.oom_control
-
-If OOM-killer is disabled, tasks under cgroup will hang/sleep
-in memory cgroup's OOM-waitqueue when they request accountable memory.
-
-For running them, you have to relax the memory cgroup's OOM status by
-
-	* enlarge limit or reduce usage.
-
-To reduce usage,
-
-	* kill some tasks.
-	* move some tasks to other group with account migration.
-	* remove some files (on tmpfs?)
-
-Then, stopped tasks will work again.
-
-At reading, current status of OOM is shown.
-
-	- oom_kill_disable 0 or 1
-	  (if 1, oom-killer is disabled)
-	- under_oom	   0 or 1
-	  (if 1, the memory cgroup is under OOM, tasks may be stopped.)
-
-11. Memory Pressure
-===================
-
-The pressure level notifications can be used to monitor the memory
-allocation cost; based on the pressure, applications can implement
-different strategies of managing their memory resources. The pressure
-levels are defined as following:
-
-The "low" level means that the system is reclaiming memory for new
-allocations. Monitoring this reclaiming activity might be useful for
-maintaining cache level. Upon notification, the program (typically
-"Activity Manager") might analyze vmstat and act in advance (i.e.
-prematurely shutdown unimportant services).
-
-The "medium" level means that the system is experiencing medium memory
-pressure, the system might be making swap, paging out active file caches,
-etc. Upon this event applications may decide to further analyze
-vmstat/zoneinfo/memcg or internal memory usage statistics and free any
-resources that can be easily reconstructed or re-read from a disk.
-
-The "critical" level means that the system is actively thrashing, it is
-about to out of memory (OOM) or even the in-kernel OOM killer is on its
-way to trigger. Applications should do whatever they can to help the
-system. It might be too late to consult with vmstat or any other
-statistics, so it's advisable to take an immediate action.
-
-By default, events are propagated upward until the event is handled, i.e. the
-events are not pass-through. For example, you have three cgroups: A->B->C. Now
-you set up an event listener on cgroups A, B and C, and suppose group C
-experiences some pressure. In this situation, only group C will receive the
-notification, i.e. groups A and B will not receive it. This is done to avoid
-excessive "broadcasting" of messages, which disturbs the system and which is
-especially bad if we are low on memory or thrashing. Group B, will receive
-notification only if there are no event listers for group C.
-
-There are three optional modes that specify different propagation behavior:
-
- - "default": this is the default behavior specified above. This mode is the
-   same as omitting the optional mode parameter, preserved by backwards
-   compatibility.
-
- - "hierarchy": events always propagate up to the root, similar to the default
-   behavior, except that propagation continues regardless of whether there are
-   event listeners at each level, with the "hierarchy" mode. In the above
-   example, groups A, B, and C will receive notification of memory pressure.
-
- - "local": events are pass-through, i.e. they only receive notifications when
-   memory pressure is experienced in the memcg for which the notification is
-   registered. In the above example, group C will receive notification if
-   registered for "local" notification and the group experiences memory
-   pressure. However, group B will never receive notification, regardless if
-   there is an event listener for group C or not, if group B is registered for
-   local notification.
-
-The level and event notification mode ("hierarchy" or "local", if necessary) are
-specified by a comma-delimited string, i.e. "low,hierarchy" specifies
-hierarchical, pass-through, notification for all ancestor memcgs. Notification
-that is the default, non pass-through behavior, does not specify a mode.
-"medium,local" specifies pass-through notification for the medium level.
-
-The file memory.pressure_level is only used to setup an eventfd. To
-register a notification, an application must:
-
-- create an eventfd using eventfd(2);
-- open memory.pressure_level;
-- write string as "<event_fd> <fd of memory.pressure_level> <level[,mode]>"
-  to cgroup.event_control.
-
-Application will be notified through eventfd when memory pressure is at
-the specific level (or higher). Read/write operations to
-memory.pressure_level are no implemented.
-
-Test:
-
-   Here is a small script example that makes a new cgroup, sets up a
-   memory limit, sets up a notification in the cgroup and then makes child
-   cgroup experience a critical pressure::
-
-	# cd /sys/fs/cgroup/memory/
-	# mkdir foo
-	# cd foo
-	# cgroup_event_listener memory.pressure_level low,hierarchy &
-	# echo 8000000 > memory.limit_in_bytes
-	# echo 8000000 > memory.memsw.limit_in_bytes
-	# echo $$ > tasks
-	# dd if=/dev/zero | read x
-
-   (Expect a bunch of notifications, and eventually, the oom-killer will
-   trigger.)
-
-12. TODO
-========
-
-1. Make per-cgroup scanner reclaim not-shared pages first
-2. Teach controller to account for shared-pages
-3. Start reclamation in the background when the limit is
-   not yet hit but the usage is getting closer
-
-Summary
-=======
-
-Overall, the memory controller has been a stable controller and has been
-commented and discussed quite extensively in the community.
-
-References
-==========
-
-1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/
-2. Singh, Balbir. Memory Controller (RSS Control),
-   http://lwn.net/Articles/222762/
-3. Emelianov, Pavel. Resource controllers based on process cgroups
-   http://lkml.org/lkml/2007/3/6/198
-4. Emelianov, Pavel. RSS controller based on process cgroups (v2)
-   http://lkml.org/lkml/2007/4/9/78
-5. Emelianov, Pavel. RSS controller based on process cgroups (v3)
-   http://lkml.org/lkml/2007/5/30/244
-6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/
-7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control
-   subsystem (v3), http://lwn.net/Articles/235534/
-8. Singh, Balbir. RSS controller v2 test results (lmbench),
-   http://lkml.org/lkml/2007/5/17/232
-9. Singh, Balbir. RSS controller v2 AIM9 results
-   http://lkml.org/lkml/2007/5/18/1
-10. Singh, Balbir. Memory controller v6 test results,
-    http://lkml.org/lkml/2007/8/19/36
-11. Singh, Balbir. Memory controller introduction (v6),
-    http://lkml.org/lkml/2007/8/17/69
-12. Corbet, Jonathan, Controlling memory use in cgroups,
-    http://lwn.net/Articles/243795/
diff --git a/Documentation/cgroup-v1/net_cls.rst b/Documentation/cgroup-v1/net_cls.rst
deleted file mode 100644
index a2cf272af7a0..000000000000
--- a/Documentation/cgroup-v1/net_cls.rst
+++ /dev/null
@@ -1,44 +0,0 @@
-=========================
-Network classifier cgroup
-=========================
-
-The Network classifier cgroup provides an interface to
-tag network packets with a class identifier (classid).
-
-The Traffic Controller (tc) can be used to assign
-different priorities to packets from different cgroups.
-Also, Netfilter (iptables) can use this tag to perform
-actions on such packets.
-
-Creating a net_cls cgroups instance creates a net_cls.classid file.
-This net_cls.classid value is initialized to 0.
-
-You can write hexadecimal values to net_cls.classid; the format for these
-values is 0xAAAABBBB; AAAA is the major handle number and BBBB
-is the minor handle number.
-Reading net_cls.classid yields a decimal result.
-
-Example::
-
-	mkdir /sys/fs/cgroup/net_cls
-	mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls
-	mkdir /sys/fs/cgroup/net_cls/0
-	echo 0x100001 >  /sys/fs/cgroup/net_cls/0/net_cls.classid
-
-- setting a 10:1 handle::
-
-	cat /sys/fs/cgroup/net_cls/0/net_cls.classid
-	1048577
-
-- configuring tc::
-
-	tc qdisc add dev eth0 root handle 10: htb
-	tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit
-
-- creating traffic class 10:1::
-
-	tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup
-
-configuring iptables, basic example::
-
-	iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP
diff --git a/Documentation/cgroup-v1/net_prio.rst b/Documentation/cgroup-v1/net_prio.rst
deleted file mode 100644
index b40905871c64..000000000000
--- a/Documentation/cgroup-v1/net_prio.rst
+++ /dev/null
@@ -1,57 +0,0 @@
-=======================
-Network priority cgroup
-=======================
-
-The Network priority cgroup provides an interface to allow an administrator to
-dynamically set the priority of network traffic generated by various
-applications
-
-Nominally, an application would set the priority of its traffic via the
-SO_PRIORITY socket option.  This however, is not always possible because:
-
-1) The application may not have been coded to set this value
-2) The priority of application traffic is often a site-specific administrative
-   decision rather than an application defined one.
-
-This cgroup allows an administrator to assign a process to a group which defines
-the priority of egress traffic on a given interface. Network priority groups can
-be created by first mounting the cgroup filesystem::
-
-	# mount -t cgroup -onet_prio none /sys/fs/cgroup/net_prio
-
-With the above step, the initial group acting as the parent accounting group
-becomes visible at '/sys/fs/cgroup/net_prio'.  This group includes all tasks in
-the system. '/sys/fs/cgroup/net_prio/tasks' lists the tasks in this cgroup.
-
-Each net_prio cgroup contains two files that are subsystem specific
-
-net_prio.prioidx
-  This file is read-only, and is simply informative.  It contains a unique
-  integer value that the kernel uses as an internal representation of this
-  cgroup.
-
-net_prio.ifpriomap
-  This file contains a map of the priorities assigned to traffic originating
-  from processes in this group and egressing the system on various interfaces.
-  It contains a list of tuples in the form <ifname priority>.  Contents of this
-  file can be modified by echoing a string into the file using the same tuple
-  format. For example::
-
-	echo "eth0 5" > /sys/fs/cgroups/net_prio/iscsi/net_prio.ifpriomap
-
-This command would force any traffic originating from processes belonging to the
-iscsi net_prio cgroup and egressing on interface eth0 to have the priority of
-said traffic set to the value 5. The parent accounting group also has a
-writeable 'net_prio.ifpriomap' file that can be used to set a system default
-priority.
-
-Priorities are set immediately prior to queueing a frame to the device
-queueing discipline (qdisc) so priorities will be assigned prior to the hardware
-queue selection being made.
-
-One usage for the net_prio cgroup is with mqprio qdisc allowing application
-traffic to be steered to hardware/driver based traffic classes. These mappings
-can then be managed by administrators or other networking protocols such as
-DCBX.
-
-A new net_prio cgroup inherits the parent's configuration.
diff --git a/Documentation/cgroup-v1/pids.rst b/Documentation/cgroup-v1/pids.rst
deleted file mode 100644
index 6acebd9e72c8..000000000000
--- a/Documentation/cgroup-v1/pids.rst
+++ /dev/null
@@ -1,92 +0,0 @@
-=========================
-Process Number Controller
-=========================
-
-Abstract
---------
-
-The process number controller is used to allow a cgroup hierarchy to stop any
-new tasks from being fork()'d or clone()'d after a certain limit is reached.
-
-Since it is trivial to hit the task limit without hitting any kmemcg limits in
-place, PIDs are a fundamental resource. As such, PID exhaustion must be
-preventable in the scope of a cgroup hierarchy by allowing resource limiting of
-the number of tasks in a cgroup.
-
-Usage
------
-
-In order to use the `pids` controller, set the maximum number of tasks in
-pids.max (this is not available in the root cgroup for obvious reasons). The
-number of processes currently in the cgroup is given by pids.current.
-
-Organisational operations are not blocked by cgroup policies, so it is possible
-to have pids.current > pids.max. This can be done by either setting the limit to
-be smaller than pids.current, or attaching enough processes to the cgroup such
-that pids.current > pids.max. However, it is not possible to violate a cgroup
-policy through fork() or clone(). fork() and clone() will return -EAGAIN if the
-creation of a new process would cause a cgroup policy to be violated.
-
-To set a cgroup to have no limit, set pids.max to "max". This is the default for
-all new cgroups (N.B. that PID limits are hierarchical, so the most stringent
-limit in the hierarchy is followed).
-
-pids.current tracks all child cgroup hierarchies, so parent/pids.current is a
-superset of parent/child/pids.current.
-
-The pids.events file contains event counters:
-
-  - max: Number of times fork failed because limit was hit.
-
-Example
--------
-
-First, we mount the pids controller::
-
-	# mkdir -p /sys/fs/cgroup/pids
-	# mount -t cgroup -o pids none /sys/fs/cgroup/pids
-
-Then we create a hierarchy, set limits and attach processes to it::
-
-	# mkdir -p /sys/fs/cgroup/pids/parent/child
-	# echo 2 > /sys/fs/cgroup/pids/parent/pids.max
-	# echo $$ > /sys/fs/cgroup/pids/parent/cgroup.procs
-	# cat /sys/fs/cgroup/pids/parent/pids.current
-	2
-	#
-
-It should be noted that attempts to overcome the set limit (2 in this case) will
-fail::
-
-	# cat /sys/fs/cgroup/pids/parent/pids.current
-	2
-	# ( /bin/echo "Here's some processes for you." | cat )
-	sh: fork: Resource temporary unavailable
-	#
-
-Even if we migrate to a child cgroup (which doesn't have a set limit), we will
-not be able to overcome the most stringent limit in the hierarchy (in this case,
-parent's)::
-
-	# echo $$ > /sys/fs/cgroup/pids/parent/child/cgroup.procs
-	# cat /sys/fs/cgroup/pids/parent/pids.current
-	2
-	# cat /sys/fs/cgroup/pids/parent/child/pids.current
-	2
-	# cat /sys/fs/cgroup/pids/parent/child/pids.max
-	max
-	# ( /bin/echo "Here's some processes for you." | cat )
-	sh: fork: Resource temporary unavailable
-	#
-
-We can set a limit that is smaller than pids.current, which will stop any new
-processes from being forked at all (note that the shell itself counts towards
-pids.current)::
-
-	# echo 1 > /sys/fs/cgroup/pids/parent/pids.max
-	# /bin/echo "We can't even spawn a single process now."
-	sh: fork: Resource temporary unavailable
-	# echo 0 > /sys/fs/cgroup/pids/parent/pids.max
-	# /bin/echo "We can't even spawn a single process now."
-	sh: fork: Resource temporary unavailable
-	#
diff --git a/Documentation/cgroup-v1/rdma.rst b/Documentation/cgroup-v1/rdma.rst
deleted file mode 100644
index 2fcb0a9bf790..000000000000
--- a/Documentation/cgroup-v1/rdma.rst
+++ /dev/null
@@ -1,117 +0,0 @@
-===============
-RDMA Controller
-===============
-
-.. Contents
-
-   1. Overview
-     1-1. What is RDMA controller?
-     1-2. Why RDMA controller needed?
-     1-3. How is RDMA controller implemented?
-   2. Usage Examples
-
-1. Overview
-===========
-
-1-1. What is RDMA controller?
------------------------------
-
-RDMA controller allows user to limit RDMA/IB specific resources that a given
-set of processes can use. These processes are grouped using RDMA controller.
-
-RDMA controller defines two resources which can be limited for processes of a
-cgroup.
-
-1-2. Why RDMA controller needed?
---------------------------------
-
-Currently user space applications can easily take away all the rdma verb
-specific resources such as AH, CQ, QP, MR etc. Due to which other applications
-in other cgroup or kernel space ULPs may not even get chance to allocate any
-rdma resources. This can lead to service unavailability.
-
-Therefore RDMA controller is needed through which resource consumption
-of processes can be limited. Through this controller different rdma
-resources can be accounted.
-
-1-3. How is RDMA controller implemented?
-----------------------------------------
-
-RDMA cgroup allows limit configuration of resources. Rdma cgroup maintains
-resource accounting per cgroup, per device using resource pool structure.
-Each such resource pool is limited up to 64 resources in given resource pool
-by rdma cgroup, which can be extended later if required.
-
-This resource pool object is linked to the cgroup css. Typically there
-are 0 to 4 resource pool instances per cgroup, per device in most use cases.
-But nothing limits to have it more. At present hundreds of RDMA devices per
-single cgroup may not be handled optimally, however there is no
-known use case or requirement for such configuration either.
-
-Since RDMA resources can be allocated from any process and can be freed by any
-of the child processes which shares the address space, rdma resources are
-always owned by the creator cgroup css. This allows process migration from one
-to other cgroup without major complexity of transferring resource ownership;
-because such ownership is not really present due to shared nature of
-rdma resources. Linking resources around css also ensures that cgroups can be
-deleted after processes migrated. This allow progress migration as well with
-active resources, even though that is not a primary use case.
-
-Whenever RDMA resource charging occurs, owner rdma cgroup is returned to
-the caller. Same rdma cgroup should be passed while uncharging the resource.
-This also allows process migrated with active RDMA resource to charge
-to new owner cgroup for new resource. It also allows to uncharge resource of
-a process from previously charged cgroup which is migrated to new cgroup,
-even though that is not a primary use case.
-
-Resource pool object is created in following situations.
-(a) User sets the limit and no previous resource pool exist for the device
-of interest for the cgroup.
-(b) No resource limits were configured, but IB/RDMA stack tries to
-charge the resource. So that it correctly uncharge them when applications are
-running without limits and later on when limits are enforced during uncharging,
-otherwise usage count will drop to negative.
-
-Resource pool is destroyed if all the resource limits are set to max and
-it is the last resource getting deallocated.
-
-User should set all the limit to max value if it intents to remove/unconfigure
-the resource pool for a particular device.
-
-IB stack honors limits enforced by the rdma controller. When application
-query about maximum resource limits of IB device, it returns minimum of
-what is configured by user for a given cgroup and what is supported by
-IB device.
-
-Following resources can be accounted by rdma controller.
-
-  ==========    =============================
-  hca_handle	Maximum number of HCA Handles
-  hca_object 	Maximum number of HCA Objects
-  ==========    =============================
-
-2. Usage Examples
-=================
-
-(a) Configure resource limit::
-
-	echo mlx4_0 hca_handle=2 hca_object=2000 > /sys/fs/cgroup/rdma/1/rdma.max
-	echo ocrdma1 hca_handle=3 > /sys/fs/cgroup/rdma/2/rdma.max
-
-(b) Query resource limit::
-
-	cat /sys/fs/cgroup/rdma/2/rdma.max
-	#Output:
-	mlx4_0 hca_handle=2 hca_object=2000
-	ocrdma1 hca_handle=3 hca_object=max
-
-(c) Query current usage::
-
-	cat /sys/fs/cgroup/rdma/2/rdma.current
-	#Output:
-	mlx4_0 hca_handle=1 hca_object=20
-	ocrdma1 hca_handle=1 hca_object=23
-
-(d) Delete resource limit::
-
-	echo echo mlx4_0 hca_handle=max hca_object=max > /sys/fs/cgroup/rdma/1/rdma.max
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index cad797a8a39e..5ecbc03e6b2f 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -98,7 +98,7 @@ A memory policy with a valid NodeList will be saved, as specified, for
 use at file creation time.  When a task allocates a file in the file
 system, the mount option memory policy will be applied with a NodeList,
 if any, modified by the calling task's cpuset constraints
-[See Documentation/cgroup-v1/cpusets.rst] and any optional flags, listed
+[See Documentation/admin-guide/cgroup-v1/cpusets.rst] and any optional flags, listed
 below.  If the resulting NodeLists is the empty set, the effective memory
 policy for the file will revert to "default" policy.
 
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index 5623b9916411..4f18456dd3b1 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -12,7 +12,7 @@ References
 
 -	Documentation/IRQ-affinity.txt:  Binding interrupts to sets of CPUs.
 
--	Documentation/cgroup-v1:  Using cgroups to bind tasks to sets of CPUs.
+-	Documentation/admin-guide/cgroup-v1:  Using cgroups to bind tasks to sets of CPUs.
 
 -	man taskset:  Using the taskset command to bind tasks to sets
 	of CPUs.
diff --git a/Documentation/scheduler/sched-deadline.rst b/Documentation/scheduler/sched-deadline.rst
index 3391e86d810c..14a2f7bf63fe 100644
--- a/Documentation/scheduler/sched-deadline.rst
+++ b/Documentation/scheduler/sched-deadline.rst
@@ -669,7 +669,7 @@ Deadline Task Scheduling
 
  -deadline tasks cannot have an affinity mask smaller that the entire
  root_domain they are created on. However, affinities can be specified
- through the cpuset facility (Documentation/cgroup-v1/cpusets.rst).
+ through the cpuset facility (Documentation/admin-guide/cgroup-v1/cpusets.rst).
 
 5.1 SCHED_DEADLINE and cpusets HOWTO
 ------------------------------------
diff --git a/Documentation/scheduler/sched-design-CFS.rst b/Documentation/scheduler/sched-design-CFS.rst
index 53b30d1967cf..a96c72651877 100644
--- a/Documentation/scheduler/sched-design-CFS.rst
+++ b/Documentation/scheduler/sched-design-CFS.rst
@@ -222,7 +222,7 @@ SCHED_BATCH) tasks.
 
    These options need CONFIG_CGROUPS to be defined, and let the administrator
    create arbitrary groups of tasks, using the "cgroup" pseudo filesystem.  See
-   Documentation/cgroup-v1/cgroups.rst for more information about this filesystem.
+   Documentation/admin-guide/cgroup-v1/cgroups.rst for more information about this filesystem.
 
 When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each
 group created using the pseudo filesystem.  See example steps below to create
diff --git a/Documentation/scheduler/sched-rt-group.rst b/Documentation/scheduler/sched-rt-group.rst
index d27d3f3712fd..655a096ec8fb 100644
--- a/Documentation/scheduler/sched-rt-group.rst
+++ b/Documentation/scheduler/sched-rt-group.rst
@@ -133,7 +133,7 @@ This uses the cgroup virtual file system and "<cgroup>/cpu.rt_runtime_us"
 to control the CPU time reserved for each control group.
 
 For more information on working with control groups, you should read
-Documentation/cgroup-v1/cgroups.rst as well.
+Documentation/admin-guide/cgroup-v1/cgroups.rst as well.
 
 Group settings are checked against the following limits in order to keep the
 configuration schedulable:
diff --git a/Documentation/vm/numa.rst b/Documentation/vm/numa.rst
index 130f3cfa1c19..99fdeca917ca 100644
--- a/Documentation/vm/numa.rst
+++ b/Documentation/vm/numa.rst
@@ -67,7 +67,7 @@ nodes.  Each emulated node will manage a fraction of the underlying cells'
 physical memory.  NUMA emluation is useful for testing NUMA kernel and
 application features on non-NUMA platforms, and as a sort of memory resource
 management mechanism when used together with cpusets.
-[see Documentation/cgroup-v1/cpusets.rst]
+[see Documentation/admin-guide/cgroup-v1/cpusets.rst]
 
 For each node with memory, Linux constructs an independent memory management
 subsystem, complete with its own free page lists, in-use page lists, usage
@@ -114,7 +114,7 @@ allocation behavior using Linux NUMA memory policy. [see
 
 System administrators can restrict the CPUs and nodes' memories that a non-
 privileged user can specify in the scheduling or NUMA commands and functions
-using control groups and CPUsets.  [see Documentation/cgroup-v1/cpusets.rst]
+using control groups and CPUsets.  [see Documentation/admin-guide/cgroup-v1/cpusets.rst]
 
 On architectures that do not hide memoryless nodes, Linux will include only
 zones [nodes] with memory in the zonelists.  This means that for a memoryless
diff --git a/Documentation/vm/page_migration.rst b/Documentation/vm/page_migration.rst
index 35bba27d5fff..1d6cd7db4e43 100644
--- a/Documentation/vm/page_migration.rst
+++ b/Documentation/vm/page_migration.rst
@@ -41,7 +41,7 @@ locations.
 Larger installations usually partition the system using cpusets into
 sections of nodes. Paul Jackson has equipped cpusets with the ability to
 move pages when a task is moved to another cpuset (See
-Documentation/cgroup-v1/cpusets.rst).
+Documentation/admin-guide/cgroup-v1/cpusets.rst).
 Cpusets allows the automation of process locality. If a task is moved to
 a new cpuset then also all its pages are moved with it so that the
 performance of the process does not sink dramatically. Also the pages
diff --git a/Documentation/vm/unevictable-lru.rst b/Documentation/vm/unevictable-lru.rst
index 109052215bce..17d0861b0f1d 100644
--- a/Documentation/vm/unevictable-lru.rst
+++ b/Documentation/vm/unevictable-lru.rst
@@ -98,7 +98,7 @@ Memory Control Group Interaction
 --------------------------------
 
 The unevictable LRU facility interacts with the memory control group [aka
-memory controller; see Documentation/cgroup-v1/memory.rst] by extending the
+memory controller; see Documentation/admin-guide/cgroup-v1/memory.rst] by extending the
 lru_list enum.
 
 The memory controller data structure automatically gets a per-zone unevictable
diff --git a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
index 30108684ae87..ff9bcfd2cc14 100644
--- a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
+++ b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
@@ -15,7 +15,7 @@ assign them to cpusets and their attached tasks.  This is a way of limiting the
 amount of system memory that are available to a certain class of tasks.
 
 For more information on the features of cpusets, see
-Documentation/cgroup-v1/cpusets.rst.
+Documentation/admin-guide/cgroup-v1/cpusets.rst.
 There are a number of different configurations you can use for your needs.  For
 more information on the numa=fake command line option and its various ways of
 configuring fake nodes, see Documentation/x86/x86_64/boot-options.rst.
@@ -40,7 +40,7 @@ A machine may be split as follows with "numa=fake=4*512," as reported by dmesg::
 	On node 3 totalpages: 131072
 
 Now following the instructions for mounting the cpusets filesystem from
-Documentation/cgroup-v1/cpusets.rst, you can assign fake nodes (i.e. contiguous memory
+Documentation/admin-guide/cgroup-v1/cpusets.rst, you can assign fake nodes (i.e. contiguous memory
 address spaces) to individual cpusets::
 
 	[root@xroads /]# mkdir exampleset
-- 
cgit 


From 4f4cfa6c560c93ba180c30675cf845e1597de44c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 27 Jun 2019 14:56:51 -0300
Subject: docs: admin-guide: add a series of orphaned documents

There are lots of documents that belong to the admin-guide but
are on random places (most under Documentation root dir).

Move them to the admin guide.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 Documentation/ABI/stable/sysfs-devices-node        |   2 +-
 Documentation/ABI/testing/procfs-diskstats         |   2 +-
 Documentation/ABI/testing/sysfs-block              |   2 +-
 Documentation/ABI/testing/sysfs-devices-system-cpu |   4 +-
 Documentation/admin-guide/btmrvl.rst               | 124 +++++++
 Documentation/admin-guide/clearing-warn-once.rst   |   9 +
 Documentation/admin-guide/cpu-load.rst             | 114 +++++++
 Documentation/admin-guide/cputopology.rst          | 177 ++++++++++
 .../admin-guide/device-mapper/statistics.rst       |   4 +-
 Documentation/admin-guide/efi-stub.rst             | 100 ++++++
 Documentation/admin-guide/highuid.rst              |  80 +++++
 Documentation/admin-guide/hw-vuln/l1tf.rst         |   2 +-
 Documentation/admin-guide/hw_random.rst            | 105 ++++++
 Documentation/admin-guide/index.rst                |  17 +
 Documentation/admin-guide/iostats.rst              | 197 ++++++++++++
 Documentation/admin-guide/kernel-parameters.txt    |   2 +-
 .../admin-guide/kernel-per-CPU-kthreads.rst        | 356 +++++++++++++++++++++
 Documentation/admin-guide/lcd-panel-cgram.rst      |  27 ++
 Documentation/admin-guide/ldm.rst                  | 121 +++++++
 Documentation/admin-guide/lockup-watchdogs.rst     |  83 +++++
 Documentation/admin-guide/mm/cma_debugfs.rst       |  25 ++
 Documentation/admin-guide/mm/index.rst             |   1 +
 Documentation/admin-guide/numastat.rst             |  30 ++
 Documentation/admin-guide/pnp.rst                  | 292 +++++++++++++++++
 Documentation/admin-guide/rtc.rst                  | 140 ++++++++
 Documentation/admin-guide/svga.rst                 | 249 ++++++++++++++
 Documentation/admin-guide/sysctl/kernel.rst        |   2 +-
 Documentation/admin-guide/video-output.rst         |  34 ++
 Documentation/auxdisplay/lcd-panel-cgram.rst       |  29 --
 Documentation/btmrvl.txt                           | 124 -------
 Documentation/clearing-warn-once.txt               |   9 -
 Documentation/cma/debugfs.rst                      |  27 --
 Documentation/cpu-load.txt                         | 114 -------
 Documentation/cputopology.txt                      | 177 ----------
 Documentation/efi-stub.txt                         | 100 ------
 Documentation/fb/vesafb.rst                        |   2 +-
 Documentation/highuid.txt                          |  80 -----
 Documentation/hw_random.txt                        | 105 ------
 Documentation/iostats.txt                          | 197 ------------
 Documentation/kernel-per-CPU-kthreads.txt          | 356 ---------------------
 Documentation/ldm.txt                              | 121 -------
 Documentation/lockup-watchdogs.txt                 |  83 -----
 Documentation/numastat.txt                         |  30 --
 Documentation/pnp.txt                              | 292 -----------------
 Documentation/rtc.txt                              | 140 --------
 Documentation/svga.txt                             | 249 --------------
 Documentation/video-output.txt                     |  34 --
 Documentation/x86/topology.rst                     |   2 +-
 48 files changed, 2293 insertions(+), 2279 deletions(-)
 create mode 100644 Documentation/admin-guide/btmrvl.rst
 create mode 100644 Documentation/admin-guide/clearing-warn-once.rst
 create mode 100644 Documentation/admin-guide/cpu-load.rst
 create mode 100644 Documentation/admin-guide/cputopology.rst
 create mode 100644 Documentation/admin-guide/efi-stub.rst
 create mode 100644 Documentation/admin-guide/highuid.rst
 create mode 100644 Documentation/admin-guide/hw_random.rst
 create mode 100644 Documentation/admin-guide/iostats.rst
 create mode 100644 Documentation/admin-guide/kernel-per-CPU-kthreads.rst
 create mode 100644 Documentation/admin-guide/lcd-panel-cgram.rst
 create mode 100644 Documentation/admin-guide/ldm.rst
 create mode 100644 Documentation/admin-guide/lockup-watchdogs.rst
 create mode 100644 Documentation/admin-guide/mm/cma_debugfs.rst
 create mode 100644 Documentation/admin-guide/numastat.rst
 create mode 100644 Documentation/admin-guide/pnp.rst
 create mode 100644 Documentation/admin-guide/rtc.rst
 create mode 100644 Documentation/admin-guide/svga.rst
 create mode 100644 Documentation/admin-guide/video-output.rst
 delete mode 100644 Documentation/auxdisplay/lcd-panel-cgram.rst
 delete mode 100644 Documentation/btmrvl.txt
 delete mode 100644 Documentation/clearing-warn-once.txt
 delete mode 100644 Documentation/cma/debugfs.rst
 delete mode 100644 Documentation/cpu-load.txt
 delete mode 100644 Documentation/cputopology.txt
 delete mode 100644 Documentation/efi-stub.txt
 delete mode 100644 Documentation/highuid.txt
 delete mode 100644 Documentation/hw_random.txt
 delete mode 100644 Documentation/iostats.txt
 delete mode 100644 Documentation/kernel-per-CPU-kthreads.txt
 delete mode 100644 Documentation/ldm.txt
 delete mode 100644 Documentation/lockup-watchdogs.txt
 delete mode 100644 Documentation/numastat.txt
 delete mode 100644 Documentation/pnp.txt
 delete mode 100644 Documentation/rtc.txt
 delete mode 100644 Documentation/svga.txt
 delete mode 100644 Documentation/video-output.txt

(limited to 'Documentation')

diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index f7ce68fbd4b9..df8413cf1468 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -61,7 +61,7 @@ Date:		October 2002
 Contact:	Linux Memory Management list <linux-mm@kvack.org>
 Description:
 		The node's hit/miss statistics, in units of pages.
-		See Documentation/numastat.txt
+		See Documentation/admin-guide/numastat.rst
 
 What:		/sys/devices/system/node/nodeX/distance
 Date:		October 2002
diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats
index abac31d216de..2c44b4f1b060 100644
--- a/Documentation/ABI/testing/procfs-diskstats
+++ b/Documentation/ABI/testing/procfs-diskstats
@@ -29,4 +29,4 @@ Description:
 		17 - sectors discarded
 		18 - time spent discarding
 
-		For more details refer to Documentation/iostats.txt
+		For more details refer to Documentation/admin-guide/iostats.rst
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index dfad7427817c..f8c7c7126bb1 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -15,7 +15,7 @@ Description:
 		 9 - I/Os currently in progress
 		10 - time spent doing I/Os (ms)
 		11 - weighted time spent doing I/Os (ms)
-		For more details refer Documentation/iostats.txt
+		For more details refer Documentation/admin-guide/iostats.rst
 
 
 What:		/sys/block/<disk>/<part>/stat
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index d404603c6b52..5f7d7b14fa44 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -34,7 +34,7 @@ Description:	CPU topology files that describe kernel limits related to
 		present: cpus that have been identified as being present in
 		the system.
 
-		See Documentation/cputopology.txt for more information.
+		See Documentation/admin-guide/cputopology.rst for more information.
 
 
 What:		/sys/devices/system/cpu/probe
@@ -103,7 +103,7 @@ Description:	CPU topology files that describe a logical CPU's relationship
 		thread_siblings_list: human-readable list of cpu#'s hardware
 		threads within the same core as cpu#
 
-		See Documentation/cputopology.txt for more information.
+		See Documentation/admin-guide/cputopology.rst for more information.
 
 
 What:		/sys/devices/system/cpu/cpuidle/current_driver
diff --git a/Documentation/admin-guide/btmrvl.rst b/Documentation/admin-guide/btmrvl.rst
new file mode 100644
index 000000000000..ec57740ead0c
--- /dev/null
+++ b/Documentation/admin-guide/btmrvl.rst
@@ -0,0 +1,124 @@
+=============
+btmrvl driver
+=============
+
+All commands are used via debugfs interface.
+
+Set/get driver configurations
+=============================
+
+Path:	/debug/btmrvl/config/
+
+gpiogap=[n], hscfgcmd
+	These commands are used to configure the host sleep parameters::
+	bit 8:0  -- Gap
+	bit 16:8 -- GPIO
+
+	where GPIO is the pin number of GPIO used to wake up the host.
+	It could be any valid GPIO pin# (e.g. 0-7) or 0xff (SDIO interface
+	wakeup will be used instead).
+
+	where Gap is the gap in milli seconds between wakeup signal and
+	wakeup event, or 0xff for special host sleep setting.
+
+	Usage::
+
+		# Use SDIO interface to wake up the host and set GAP to 0x80:
+		echo 0xff80 > /debug/btmrvl/config/gpiogap
+		echo 1 > /debug/btmrvl/config/hscfgcmd
+
+		# Use GPIO pin #3 to wake up the host and set GAP to 0xff:
+		echo 0x03ff >  /debug/btmrvl/config/gpiogap
+		echo 1 > /debug/btmrvl/config/hscfgcmd
+
+psmode=[n], pscmd
+	These commands are used to enable/disable auto sleep mode
+
+	where the option is::
+
+			1 	-- Enable auto sleep mode
+			0 	-- Disable auto sleep mode
+
+	Usage::
+
+		# Enable auto sleep mode
+		echo 1 > /debug/btmrvl/config/psmode
+		echo 1 > /debug/btmrvl/config/pscmd
+
+		# Disable auto sleep mode
+		echo 0 > /debug/btmrvl/config/psmode
+		echo 1 > /debug/btmrvl/config/pscmd
+
+
+hsmode=[n], hscmd
+	These commands are used to enable host sleep or wake up firmware
+
+	where the option is::
+
+			1	-- Enable host sleep
+			0	-- Wake up firmware
+
+	Usage::
+
+		# Enable host sleep
+		echo 1 > /debug/btmrvl/config/hsmode
+		echo 1 > /debug/btmrvl/config/hscmd
+
+		# Wake up firmware
+		echo 0 > /debug/btmrvl/config/hsmode
+		echo 1 > /debug/btmrvl/config/hscmd
+
+
+Get driver status
+=================
+
+Path:	/debug/btmrvl/status/
+
+Usage::
+
+	cat /debug/btmrvl/status/<args>
+
+where the args are:
+
+curpsmode
+	This command displays current auto sleep status.
+
+psstate
+	This command display the power save state.
+
+hsstate
+	This command display the host sleep state.
+
+txdnldrdy
+	This command displays the value of Tx download ready flag.
+
+Issuing a raw hci command
+=========================
+
+Use hcitool to issue raw hci command, refer to hcitool manual
+
+Usage::
+
+	Hcitool cmd <ogf> <ocf> [Parameters]
+
+Interface Control Command::
+
+	hcitool cmd 0x3f 0x5b 0xf5 0x01 0x00    --Enable All interface
+	hcitool cmd 0x3f 0x5b 0xf5 0x01 0x01    --Enable Wlan interface
+	hcitool cmd 0x3f 0x5b 0xf5 0x01 0x02    --Enable BT interface
+	hcitool cmd 0x3f 0x5b 0xf5 0x00 0x00    --Disable All interface
+	hcitool cmd 0x3f 0x5b 0xf5 0x00 0x01    --Disable Wlan interface
+	hcitool cmd 0x3f 0x5b 0xf5 0x00 0x02    --Disable BT interface
+
+SD8688 firmware
+===============
+
+Images:
+
+- /lib/firmware/sd8688_helper.bin
+- /lib/firmware/sd8688.bin
+
+
+The images can be downloaded from:
+
+git.infradead.org/users/dwmw2/linux-firmware.git/libertas/
diff --git a/Documentation/admin-guide/clearing-warn-once.rst b/Documentation/admin-guide/clearing-warn-once.rst
new file mode 100644
index 000000000000..211fd926cf00
--- /dev/null
+++ b/Documentation/admin-guide/clearing-warn-once.rst
@@ -0,0 +1,9 @@
+Clearing WARN_ONCE
+------------------
+
+WARN_ONCE / WARN_ON_ONCE / printk_once only emit a message once.
+
+echo 1 > /sys/kernel/debug/clear_warn_once
+
+clears the state and allows the warnings to print once again.
+This can be useful after test suite runs to reproduce problems.
diff --git a/Documentation/admin-guide/cpu-load.rst b/Documentation/admin-guide/cpu-load.rst
new file mode 100644
index 000000000000..2d01ce43d2a2
--- /dev/null
+++ b/Documentation/admin-guide/cpu-load.rst
@@ -0,0 +1,114 @@
+========
+CPU load
+========
+
+Linux exports various bits of information via ``/proc/stat`` and
+``/proc/uptime`` that userland tools, such as top(1), use to calculate
+the average time system spent in a particular state, for example::
+
+    $ iostat
+    Linux 2.6.18.3-exp (linmac)     02/20/2007
+
+    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
+              10.01    0.00    2.92    5.44    0.00   81.63
+
+    ...
+
+Here the system thinks that over the default sampling period the
+system spent 10.01% of the time doing work in user space, 2.92% in the
+kernel, and was overall 81.63% of the time idle.
+
+In most cases the ``/proc/stat``	 information reflects the reality quite
+closely, however due to the nature of how/when the kernel collects
+this data sometimes it can not be trusted at all.
+
+So how is this information collected?  Whenever timer interrupt is
+signalled the kernel looks what kind of task was running at this
+moment and increments the counter that corresponds to this tasks
+kind/state.  The problem with this is that the system could have
+switched between various states multiple times between two timer
+interrupts yet the counter is incremented only for the last state.
+
+
+Example
+-------
+
+If we imagine the system with one task that periodically burns cycles
+in the following manner::
+
+     time line between two timer interrupts
+    |--------------------------------------|
+     ^                                    ^
+     |_ something begins working          |
+                                          |_ something goes to sleep
+                                         (only to be awaken quite soon)
+
+In the above situation the system will be 0% loaded according to the
+``/proc/stat`` (since the timer interrupt will always happen when the
+system is executing the idle handler), but in reality the load is
+closer to 99%.
+
+One can imagine many more situations where this behavior of the kernel
+will lead to quite erratic information inside ``/proc/stat``::
+
+
+	/* gcc -o hog smallhog.c */
+	#include <time.h>
+	#include <limits.h>
+	#include <signal.h>
+	#include <sys/time.h>
+	#define HIST 10
+
+	static volatile sig_atomic_t stop;
+
+	static void sighandler (int signr)
+	{
+	(void) signr;
+	stop = 1;
+	}
+	static unsigned long hog (unsigned long niters)
+	{
+	stop = 0;
+	while (!stop && --niters);
+	return niters;
+	}
+	int main (void)
+	{
+	int i;
+	struct itimerval it = { .it_interval = { .tv_sec = 0, .tv_usec = 1 },
+				.it_value = { .tv_sec = 0, .tv_usec = 1 } };
+	sigset_t set;
+	unsigned long v[HIST];
+	double tmp = 0.0;
+	unsigned long n;
+	signal (SIGALRM, &sighandler);
+	setitimer (ITIMER_REAL, &it, NULL);
+
+	hog (ULONG_MAX);
+	for (i = 0; i < HIST; ++i) v[i] = ULONG_MAX - hog (ULONG_MAX);
+	for (i = 0; i < HIST; ++i) tmp += v[i];
+	tmp /= HIST;
+	n = tmp - (tmp / 3.0);
+
+	sigemptyset (&set);
+	sigaddset (&set, SIGALRM);
+
+	for (;;) {
+		hog (n);
+		sigwait (&set, &i);
+	}
+	return 0;
+	}
+
+
+References
+----------
+
+- http://lkml.org/lkml/2007/2/12/6
+- Documentation/filesystems/proc.txt (1.8)
+
+
+Thanks
+------
+
+Con Kolivas, Pavel Machek
diff --git a/Documentation/admin-guide/cputopology.rst b/Documentation/admin-guide/cputopology.rst
new file mode 100644
index 000000000000..b90dafcc8237
--- /dev/null
+++ b/Documentation/admin-guide/cputopology.rst
@@ -0,0 +1,177 @@
+===========================================
+How CPU topology info is exported via sysfs
+===========================================
+
+Export CPU topology info via sysfs. Items (attributes) are similar
+to /proc/cpuinfo output of some architectures.  They reside in
+/sys/devices/system/cpu/cpuX/topology/:
+
+physical_package_id:
+
+	physical package id of cpuX. Typically corresponds to a physical
+	socket number, but the actual value is architecture and platform
+	dependent.
+
+die_id:
+
+	the CPU die ID of cpuX. Typically it is the hardware platform's
+	identifier (rather than the kernel's).  The actual value is
+	architecture and platform dependent.
+
+core_id:
+
+	the CPU core ID of cpuX. Typically it is the hardware platform's
+	identifier (rather than the kernel's).  The actual value is
+	architecture and platform dependent.
+
+book_id:
+
+	the book ID of cpuX. Typically it is the hardware platform's
+	identifier (rather than the kernel's).	The actual value is
+	architecture and platform dependent.
+
+drawer_id:
+
+	the drawer ID of cpuX. Typically it is the hardware platform's
+	identifier (rather than the kernel's).	The actual value is
+	architecture and platform dependent.
+
+core_cpus:
+
+	internal kernel map of CPUs within the same core.
+	(deprecated name: "thread_siblings")
+
+core_cpus_list:
+
+	human-readable list of CPUs within the same core.
+	(deprecated name: "thread_siblings_list");
+
+package_cpus:
+
+	internal kernel map of the CPUs sharing the same physical_package_id.
+	(deprecated name: "core_siblings")
+
+package_cpus_list:
+
+	human-readable list of CPUs sharing the same physical_package_id.
+	(deprecated name: "core_siblings_list")
+
+die_cpus:
+
+	internal kernel map of CPUs within the same die.
+
+die_cpus_list:
+
+	human-readable list of CPUs within the same die.
+
+book_siblings:
+
+	internal kernel map of cpuX's hardware threads within the same
+	book_id.
+
+book_siblings_list:
+
+	human-readable list of cpuX's hardware threads within the same
+	book_id.
+
+drawer_siblings:
+
+	internal kernel map of cpuX's hardware threads within the same
+	drawer_id.
+
+drawer_siblings_list:
+
+	human-readable list of cpuX's hardware threads within the same
+	drawer_id.
+
+Architecture-neutral, drivers/base/topology.c, exports these attributes.
+However, the book and drawer related sysfs files will only be created if
+CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are selected, respectively.
+
+CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are currently only used on s390,
+where they reflect the cpu and cache hierarchy.
+
+For an architecture to support this feature, it must define some of
+these macros in include/asm-XXX/topology.h::
+
+	#define topology_physical_package_id(cpu)
+	#define topology_die_id(cpu)
+	#define topology_core_id(cpu)
+	#define topology_book_id(cpu)
+	#define topology_drawer_id(cpu)
+	#define topology_sibling_cpumask(cpu)
+	#define topology_core_cpumask(cpu)
+	#define topology_die_cpumask(cpu)
+	#define topology_book_cpumask(cpu)
+	#define topology_drawer_cpumask(cpu)
+
+The type of ``**_id macros`` is int.
+The type of ``**_cpumask macros`` is ``(const) struct cpumask *``. The latter
+correspond with appropriate ``**_siblings`` sysfs attributes (except for
+topology_sibling_cpumask() which corresponds with thread_siblings).
+
+To be consistent on all architectures, include/linux/topology.h
+provides default definitions for any of the above macros that are
+not defined by include/asm-XXX/topology.h:
+
+1) topology_physical_package_id: -1
+2) topology_die_id: -1
+3) topology_core_id: 0
+4) topology_sibling_cpumask: just the given CPU
+5) topology_core_cpumask: just the given CPU
+6) topology_die_cpumask: just the given CPU
+
+For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
+default definitions for topology_book_id() and topology_book_cpumask().
+For architectures that don't support drawers (CONFIG_SCHED_DRAWER) there are
+no default definitions for topology_drawer_id() and topology_drawer_cpumask().
+
+Additionally, CPU topology information is provided under
+/sys/devices/system/cpu and includes these files.  The internal
+source for the output is in brackets ("[]").
+
+    =========== ==========================================================
+    kernel_max: the maximum CPU index allowed by the kernel configuration.
+		[NR_CPUS-1]
+
+    offline:	CPUs that are not online because they have been
+		HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit
+		of CPUs allowed by the kernel configuration (kernel_max
+		above). [~cpu_online_mask + cpus >= NR_CPUS]
+
+    online:	CPUs that are online and being scheduled [cpu_online_mask]
+
+    possible:	CPUs that have been allocated resources and can be
+		brought online if they are present. [cpu_possible_mask]
+
+    present:	CPUs that have been identified as being present in the
+		system. [cpu_present_mask]
+    =========== ==========================================================
+
+The format for the above output is compatible with cpulist_parse()
+[see <linux/cpumask.h>].  Some examples follow.
+
+In this example, there are 64 CPUs in the system but cpus 32-63 exceed
+the kernel max which is limited to 0..31 by the NR_CPUS config option
+being 32.  Note also that CPUs 2 and 4-31 are not online but could be
+brought online as they are both present and possible::
+
+     kernel_max: 31
+        offline: 2,4-31,32-63
+         online: 0-1,3
+       possible: 0-31
+        present: 0-31
+
+In this example, the NR_CPUS config option is 128, but the kernel was
+started with possible_cpus=144.  There are 4 CPUs in the system and cpu2
+was manually taken offline (and is the only CPU that can be brought
+online.)::
+
+     kernel_max: 127
+        offline: 2,4-127,128-143
+         online: 0-1,3
+       possible: 0-127
+        present: 0-3
+
+See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter
+as well as more information on the various cpumasks.
diff --git a/Documentation/admin-guide/device-mapper/statistics.rst b/Documentation/admin-guide/device-mapper/statistics.rst
index 3d80a9f850cc..41ded0bc5933 100644
--- a/Documentation/admin-guide/device-mapper/statistics.rst
+++ b/Documentation/admin-guide/device-mapper/statistics.rst
@@ -13,7 +13,7 @@ the range specified.
 
 The I/O statistics counters for each step-sized area of a region are
 in the same format as `/sys/block/*/stat` or `/proc/diskstats` (see:
-Documentation/iostats.txt).  But two extra counters (12 and 13) are
+Documentation/admin-guide/iostats.rst).  But two extra counters (12 and 13) are
 provided: total time spent reading and writing.  When the histogram
 argument is used, the 14th parameter is reported that represents the
 histogram of latencies.  All these counters may be accessed by sending
@@ -151,7 +151,7 @@ Messages
 	  The first 11 counters have the same meaning as
 	  `/sys/block/*/stat or /proc/diskstats`.
 
-	  Please refer to Documentation/iostats.txt for details.
+	  Please refer to Documentation/admin-guide/iostats.rst for details.
 
 	  1. the number of reads completed
 	  2. the number of reads merged
diff --git a/Documentation/admin-guide/efi-stub.rst b/Documentation/admin-guide/efi-stub.rst
new file mode 100644
index 000000000000..833edb0d0bc4
--- /dev/null
+++ b/Documentation/admin-guide/efi-stub.rst
@@ -0,0 +1,100 @@
+=================
+The EFI Boot Stub
+=================
+
+On the x86 and ARM platforms, a kernel zImage/bzImage can masquerade
+as a PE/COFF image, thereby convincing EFI firmware loaders to load
+it as an EFI executable. The code that modifies the bzImage header,
+along with the EFI-specific entry point that the firmware loader
+jumps to are collectively known as the "EFI boot stub", and live in
+arch/x86/boot/header.S and arch/x86/boot/compressed/eboot.c,
+respectively. For ARM the EFI stub is implemented in
+arch/arm/boot/compressed/efi-header.S and
+arch/arm/boot/compressed/efi-stub.c. EFI stub code that is shared
+between architectures is in drivers/firmware/efi/libstub.
+
+For arm64, there is no compressed kernel support, so the Image itself
+masquerades as a PE/COFF image and the EFI stub is linked into the
+kernel. The arm64 EFI stub lives in arch/arm64/kernel/efi-entry.S
+and drivers/firmware/efi/libstub/arm64-stub.c.
+
+By using the EFI boot stub it's possible to boot a Linux kernel
+without the use of a conventional EFI boot loader, such as grub or
+elilo. Since the EFI boot stub performs the jobs of a boot loader, in
+a certain sense it *IS* the boot loader.
+
+The EFI boot stub is enabled with the CONFIG_EFI_STUB kernel option.
+
+
+How to install bzImage.efi
+--------------------------
+
+The bzImage located in arch/x86/boot/bzImage must be copied to the EFI
+System Partition (ESP) and renamed with the extension ".efi". Without
+the extension the EFI firmware loader will refuse to execute it. It's
+not possible to execute bzImage.efi from the usual Linux file systems
+because EFI firmware doesn't have support for them. For ARM the
+arch/arm/boot/zImage should be copied to the system partition, and it
+may not need to be renamed. Similarly for arm64, arch/arm64/boot/Image
+should be copied but not necessarily renamed.
+
+
+Passing kernel parameters from the EFI shell
+--------------------------------------------
+
+Arguments to the kernel can be passed after bzImage.efi, e.g.::
+
+	fs0:> bzImage.efi console=ttyS0 root=/dev/sda4
+
+
+The "initrd=" option
+--------------------
+
+Like most boot loaders, the EFI stub allows the user to specify
+multiple initrd files using the "initrd=" option. This is the only EFI
+stub-specific command line parameter, everything else is passed to the
+kernel when it boots.
+
+The path to the initrd file must be an absolute path from the
+beginning of the ESP, relative path names do not work. Also, the path
+is an EFI-style path and directory elements must be separated with
+backslashes (\). For example, given the following directory layout::
+
+  fs0:>
+	Kernels\
+			bzImage.efi
+			initrd-large.img
+
+	Ramdisks\
+			initrd-small.img
+			initrd-medium.img
+
+to boot with the initrd-large.img file if the current working
+directory is fs0:\Kernels, the following command must be used::
+
+	fs0:\Kernels> bzImage.efi initrd=\Kernels\initrd-large.img
+
+Notice how bzImage.efi can be specified with a relative path. That's
+because the image we're executing is interpreted by the EFI shell,
+which understands relative paths, whereas the rest of the command line
+is passed to bzImage.efi.
+
+
+The "dtb=" option
+-----------------
+
+For the ARM and arm64 architectures, a device tree must be provided to
+the kernel. Normally firmware shall supply the device tree via the
+EFI CONFIGURATION TABLE. However, the "dtb=" command line option can
+be used to override the firmware supplied device tree, or to supply
+one when firmware is unable to.
+
+Please note: Firmware adds runtime configuration information to the
+device tree before booting the kernel. If dtb= is used to override
+the device tree, then any runtime data provided by firmware will be
+lost. The dtb= option should only be used either as a debug tool, or
+as a last resort when a device tree is not provided in the EFI
+CONFIGURATION TABLE.
+
+"dtb=" is processed in the same manner as the "initrd=" option that is
+described above.
diff --git a/Documentation/admin-guide/highuid.rst b/Documentation/admin-guide/highuid.rst
new file mode 100644
index 000000000000..6ee70465c0ea
--- /dev/null
+++ b/Documentation/admin-guide/highuid.rst
@@ -0,0 +1,80 @@
+===================================================
+Notes on the change from 16-bit UIDs to 32-bit UIDs
+===================================================
+
+:Author: Chris Wing <wingc@umich.edu>
+:Last updated: January 11, 2000
+
+- kernel code MUST take into account __kernel_uid_t and __kernel_uid32_t
+  when communicating between user and kernel space in an ioctl or data
+  structure.
+
+- kernel code should use uid_t and gid_t in kernel-private structures and
+  code.
+
+What's left to be done for 32-bit UIDs on all Linux architectures:
+
+- Disk quotas have an interesting limitation that is not related to the
+  maximum UID/GID. They are limited by the maximum file size on the
+  underlying filesystem, because quota records are written at offsets
+  corresponding to the UID in question.
+  Further investigation is needed to see if the quota system can cope
+  properly with huge UIDs. If it can deal with 64-bit file offsets on all 
+  architectures, this should not be a problem.
+
+- Decide whether or not to keep backwards compatibility with the system
+  accounting file, or if we should break it as the comments suggest
+  (currently, the old 16-bit UID and GID are still written to disk, and
+  part of the former pad space is used to store separate 32-bit UID and
+  GID)
+
+- Need to validate that OS emulation calls the 16-bit UID
+  compatibility syscalls, if the OS being emulated used 16-bit UIDs, or
+  uses the 32-bit UID system calls properly otherwise.
+
+  This affects at least:
+
+	- iBCS on Intel
+
+	- sparc32 emulation on sparc64
+	  (need to support whatever new 32-bit UID system calls are added to
+	  sparc32)
+
+- Validate that all filesystems behave properly.
+
+  At present, 32-bit UIDs _should_ work for:
+
+	- ext2
+	- ufs
+	- isofs
+	- nfs
+	- coda
+	- udf
+
+  Ioctl() fixups have been made for:
+
+	- ncpfs
+	- smbfs
+
+  Filesystems with simple fixups to prevent 16-bit UID wraparound:
+
+	- minix
+	- sysv
+	- qnx4
+
+  Other filesystems have not been checked yet.
+
+- The ncpfs and smpfs filesystems cannot presently use 32-bit UIDs in
+  all ioctl()s. Some new ioctl()s have been added with 32-bit UIDs, but
+  more are needed. (as well as new user<->kernel data structures)
+
+- The ELF core dump format only supports 16-bit UIDs on arm, i386, m68k,
+  sh, and sparc32. Fixing this is probably not that important, but would
+  require adding a new ELF section.
+
+- The ioctl()s used to control the in-kernel NFS server only support
+  16-bit UIDs on arm, i386, m68k, sh, and sparc32.
+
+- make sure that the UID mapping feature of AX25 networking works properly
+  (it should be safe because it's always used a 32-bit integer to
+  communicate between user and kernel)
diff --git a/Documentation/admin-guide/hw-vuln/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst
index 656aee262e23..f83212fae4d5 100644
--- a/Documentation/admin-guide/hw-vuln/l1tf.rst
+++ b/Documentation/admin-guide/hw-vuln/l1tf.rst
@@ -241,7 +241,7 @@ Guest mitigation mechanisms
    For further information about confining guests to a single or to a group
    of cores consult the cpusets documentation:
 
-   https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.rst
+   https://www.kernel.org/doc/Documentation/admin-guide/cgroup-v1/cpusets.rst
 
 .. _interrupt_isolation:
 
diff --git a/Documentation/admin-guide/hw_random.rst b/Documentation/admin-guide/hw_random.rst
new file mode 100644
index 000000000000..121de96e395e
--- /dev/null
+++ b/Documentation/admin-guide/hw_random.rst
@@ -0,0 +1,105 @@
+==========================================================
+Linux support for random number generator in i8xx chipsets
+==========================================================
+
+Introduction
+============
+
+The hw_random framework is software that makes use of a
+special hardware feature on your CPU or motherboard,
+a Random Number Generator (RNG).  The software has two parts:
+a core providing the /dev/hwrng character device and its
+sysfs support, plus a hardware-specific driver that plugs
+into that core.
+
+To make the most effective use of these mechanisms, you
+should download the support software as well.  Download the
+latest version of the "rng-tools" package from the
+hw_random driver's official Web site:
+
+	http://sourceforge.net/projects/gkernel/
+
+Those tools use /dev/hwrng to fill the kernel entropy pool,
+which is used internally and exported by the /dev/urandom and
+/dev/random special files.
+
+Theory of operation
+===================
+
+CHARACTER DEVICE.  Using the standard open()
+and read() system calls, you can read random data from
+the hardware RNG device.  This data is NOT CHECKED by any
+fitness tests, and could potentially be bogus (if the
+hardware is faulty or has been tampered with).  Data is only
+output if the hardware "has-data" flag is set, but nevertheless
+a security-conscious person would run fitness tests on the
+data before assuming it is truly random.
+
+The rng-tools package uses such tests in "rngd", and lets you
+run them by hand with a "rngtest" utility.
+
+/dev/hwrng is char device major 10, minor 183.
+
+CLASS DEVICE.  There is a /sys/class/misc/hw_random node with
+two unique attributes, "rng_available" and "rng_current".  The
+"rng_available" attribute lists the hardware-specific drivers
+available, while "rng_current" lists the one which is currently
+connected to /dev/hwrng.  If your system has more than one
+RNG available, you may change the one used by writing a name from
+the list in "rng_available" into "rng_current".
+
+==========================================================================
+
+
+Hardware driver for Intel/AMD/VIA Random Number Generators (RNG)
+	- Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com>
+	- Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com>
+
+
+About the Intel RNG hardware, from the firmware hub datasheet
+=============================================================
+
+The Firmware Hub integrates a Random Number Generator (RNG)
+using thermal noise generated from inherently random quantum
+mechanical properties of silicon. When not generating new random
+bits the RNG circuitry will enter a low power state. Intel will
+provide a binary software driver to give third party software
+access to our RNG for use as a security feature. At this time,
+the RNG is only to be used with a system in an OS-present state.
+
+Intel RNG Driver notes
+======================
+
+FIXME: support poll(2)
+
+.. note::
+
+	request_mem_region was removed, for three reasons:
+
+	1) Only one RNG is supported by this driver;
+	2) The location used by the RNG is a fixed location in
+	   MMIO-addressable memory;
+	3) users with properly working BIOS e820 handling will always
+	   have the region in which the RNG is located reserved, so
+	   request_mem_region calls always fail for proper setups.
+	   However, for people who use mem=XX, BIOS e820 information is
+	   **not** in /proc/iomem, and request_mem_region(RNG_ADDR) can
+	   succeed.
+
+Driver details
+==============
+
+Based on:
+	Intel 82802AB/82802AC Firmware Hub (FWH) Datasheet
+	May 1999 Order Number: 290658-002 R
+
+Intel 82802 Firmware Hub:
+	Random Number Generator
+	Programmer's Reference Manual
+	December 1999 Order Number: 298029-001 R
+
+Intel 82802 Firmware HUB Random Number Generator Driver
+	Copyright (c) 2000 Matt Sottek <msottek@quiknet.com>
+
+Special thanks to Matt Sottek.  I did the "guts", he
+did the "brains" and all the testing.
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index a5fdb1a846ce..4e98f5596da0 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -85,8 +85,25 @@ configure specific aspects of kernel behavior to your liking.
    perf-security
    acpi/index
    aoe/index
+   btmrvl
+   clearing-warn-once
+   cpu-load
+   cputopology
    device-mapper/index
+   efi-stub
+   highuid
+   hw_random
+   iostats
+   kernel-per-CPU-kthreads
    laptops/index
+   lcd-panel-cgram
+   ldm
+   lockup-watchdogs
+   numastat
+   pnp
+   rtc
+   svga
+   video-output
 
 .. only::  subproject and html
 
diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst
new file mode 100644
index 000000000000..5d63b18bd6d1
--- /dev/null
+++ b/Documentation/admin-guide/iostats.rst
@@ -0,0 +1,197 @@
+=====================
+I/O statistics fields
+=====================
+
+Since 2.4.20 (and some versions before, with patches), and 2.5.45,
+more extensive disk statistics have been introduced to help measure disk
+activity. Tools such as ``sar`` and ``iostat`` typically interpret these and do
+the work for you, but in case you are interested in creating your own
+tools, the fields are explained here.
+
+In 2.4 now, the information is found as additional fields in
+``/proc/partitions``.  In 2.6 and upper, the same information is found in two
+places: one is in the file ``/proc/diskstats``, and the other is within
+the sysfs file system, which must be mounted in order to obtain
+the information. Throughout this document we'll assume that sysfs
+is mounted on ``/sys``, although of course it may be mounted anywhere.
+Both ``/proc/diskstats`` and sysfs use the same source for the information
+and so should not differ.
+
+Here are examples of these different formats::
+
+   2.4:
+      3     0   39082680 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
+      3     1    9221278 hda1 35486 0 35496 38030 0 0 0 0 0 38030 38030
+
+   2.6+ sysfs:
+      446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
+      35486    38030    38030    38030
+
+   2.6+ diskstats:
+      3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
+      3    1   hda1 35486 38030 38030 38030
+
+   4.18+ diskstats:
+      3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160 0 0 0 0
+
+On 2.4 you might execute ``grep 'hda ' /proc/partitions``. On 2.6+, you have
+a choice of ``cat /sys/block/hda/stat`` or ``grep 'hda ' /proc/diskstats``.
+
+The advantage of one over the other is that the sysfs choice works well
+if you are watching a known, small set of disks.  ``/proc/diskstats`` may
+be a better choice if you are watching a large number of disks because
+you'll avoid the overhead of 50, 100, or 500 or more opens/closes with
+each snapshot of your disk statistics.
+
+In 2.4, the statistics fields are those after the device name. In
+the above example, the first field of statistics would be 446216.
+By contrast, in 2.6+ if you look at ``/sys/block/hda/stat``, you'll
+find just the eleven fields, beginning with 446216.  If you look at
+``/proc/diskstats``, the eleven fields will be preceded by the major and
+minor device numbers, and device name.  Each of these formats provides
+eleven fields of statistics, each meaning exactly the same things.
+All fields except field 9 are cumulative since boot.  Field 9 should
+go to zero as I/Os complete; all others only increase (unless they
+overflow and wrap).  Yes, these are (32-bit or 64-bit) unsigned long
+(native word size) numbers, and on a very busy or long-lived system they
+may wrap. Applications should be prepared to deal with that; unless
+your observations are measured in large numbers of minutes or hours,
+they should not wrap twice before you notice them.
+
+Each set of stats only applies to the indicated device; if you want
+system-wide stats you'll have to find all the devices and sum them all up.
+
+Field  1 -- # of reads completed
+    This is the total number of reads completed successfully.
+
+Field  2 -- # of reads merged, field 6 -- # of writes merged
+    Reads and writes which are adjacent to each other may be merged for
+    efficiency.  Thus two 4K reads may become one 8K read before it is
+    ultimately handed to the disk, and so it will be counted (and queued)
+    as only one I/O.  This field lets you know how often this was done.
+
+Field  3 -- # of sectors read
+    This is the total number of sectors read successfully.
+
+Field  4 -- # of milliseconds spent reading
+    This is the total number of milliseconds spent by all reads (as
+    measured from __make_request() to end_that_request_last()).
+
+Field  5 -- # of writes completed
+    This is the total number of writes completed successfully.
+
+Field  6 -- # of writes merged
+    See the description of field 2.
+
+Field  7 -- # of sectors written
+    This is the total number of sectors written successfully.
+
+Field  8 -- # of milliseconds spent writing
+    This is the total number of milliseconds spent by all writes (as
+    measured from __make_request() to end_that_request_last()).
+
+Field  9 -- # of I/Os currently in progress
+    The only field that should go to zero. Incremented as requests are
+    given to appropriate struct request_queue and decremented as they finish.
+
+Field 10 -- # of milliseconds spent doing I/Os
+    This field increases so long as field 9 is nonzero.
+
+    Since 5.0 this field counts jiffies when at least one request was
+    started or completed. If request runs more than 2 jiffies then some
+    I/O time will not be accounted unless there are other requests.
+
+Field 11 -- weighted # of milliseconds spent doing I/Os
+    This field is incremented at each I/O start, I/O completion, I/O
+    merge, or read of these stats by the number of I/Os in progress
+    (field 9) times the number of milliseconds spent doing I/O since the
+    last update of this field.  This can provide an easy measure of both
+    I/O completion time and the backlog that may be accumulating.
+
+Field 12 -- # of discards completed
+    This is the total number of discards completed successfully.
+
+Field 13 -- # of discards merged
+    See the description of field 2
+
+Field 14 -- # of sectors discarded
+    This is the total number of sectors discarded successfully.
+
+Field 15 -- # of milliseconds spent discarding
+    This is the total number of milliseconds spent by all discards (as
+    measured from __make_request() to end_that_request_last()).
+
+To avoid introducing performance bottlenecks, no locks are held while
+modifying these counters.  This implies that minor inaccuracies may be
+introduced when changes collide, so (for instance) adding up all the
+read I/Os issued per partition should equal those made to the disks ...
+but due to the lack of locking it may only be very close.
+
+In 2.6+, there are counters for each CPU, which make the lack of locking
+almost a non-issue.  When the statistics are read, the per-CPU counters
+are summed (possibly overflowing the unsigned long variable they are
+summed to) and the result given to the user.  There is no convenient
+user interface for accessing the per-CPU counters themselves.
+
+Disks vs Partitions
+-------------------
+
+There were significant changes between 2.4 and 2.6+ in the I/O subsystem.
+As a result, some statistic information disappeared. The translation from
+a disk address relative to a partition to the disk address relative to
+the host disk happens much earlier.  All merges and timings now happen
+at the disk level rather than at both the disk and partition level as
+in 2.4.  Consequently, you'll see a different statistics output on 2.6+ for
+partitions from that for disks.  There are only *four* fields available
+for partitions on 2.6+ machines.  This is reflected in the examples above.
+
+Field  1 -- # of reads issued
+    This is the total number of reads issued to this partition.
+
+Field  2 -- # of sectors read
+    This is the total number of sectors requested to be read from this
+    partition.
+
+Field  3 -- # of writes issued
+    This is the total number of writes issued to this partition.
+
+Field  4 -- # of sectors written
+    This is the total number of sectors requested to be written to
+    this partition.
+
+Note that since the address is translated to a disk-relative one, and no
+record of the partition-relative address is kept, the subsequent success
+or failure of the read cannot be attributed to the partition.  In other
+words, the number of reads for partitions is counted slightly before time
+of queuing for partitions, and at completion for whole disks.  This is
+a subtle distinction that is probably uninteresting for most cases.
+
+More significant is the error induced by counting the numbers of
+reads/writes before merges for partitions and after for disks. Since a
+typical workload usually contains a lot of successive and adjacent requests,
+the number of reads/writes issued can be several times higher than the
+number of reads/writes completed.
+
+In 2.6.25, the full statistic set is again available for partitions and
+disk and partition statistics are consistent again. Since we still don't
+keep record of the partition-relative address, an operation is attributed to
+the partition which contains the first sector of the request after the
+eventual merges. As requests can be merged across partition, this could lead
+to some (probably insignificant) inaccuracy.
+
+Additional notes
+----------------
+
+In 2.6+, sysfs is not mounted by default.  If your distribution of
+Linux hasn't added it already, here's the line you'll want to add to
+your ``/etc/fstab``::
+
+	none /sys sysfs defaults 0 0
+
+
+In 2.6+, all disk statistics were removed from ``/proc/stat``.  In 2.4, they
+appear in both ``/proc/partitions`` and ``/proc/stat``, although the ones in
+``/proc/stat`` take a very different format from those in ``/proc/partitions``
+(see proc(5), if your system has it.)
+
+-- ricklind@us.ibm.com
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a571a67e0c85..19b1e3bef56c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5066,7 +5066,7 @@
 
 	vga=		[BOOT,X86-32] Select a particular video mode
 			See Documentation/x86/boot.rst and
-			Documentation/svga.txt.
+			Documentation/admin-guide/svga.rst.
 			Use vga=ask for menu.
 			This is actually a boot loader parameter; the value is
 			passed to the kernel using a special protocol.
diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
new file mode 100644
index 000000000000..4f18456dd3b1
--- /dev/null
+++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
@@ -0,0 +1,356 @@
+==========================================
+Reducing OS jitter due to per-cpu kthreads
+==========================================
+
+This document lists per-CPU kthreads in the Linux kernel and presents
+options to control their OS jitter.  Note that non-per-CPU kthreads are
+not listed here.  To reduce OS jitter from non-per-CPU kthreads, bind
+them to a "housekeeping" CPU dedicated to such work.
+
+References
+==========
+
+-	Documentation/IRQ-affinity.txt:  Binding interrupts to sets of CPUs.
+
+-	Documentation/admin-guide/cgroup-v1:  Using cgroups to bind tasks to sets of CPUs.
+
+-	man taskset:  Using the taskset command to bind tasks to sets
+	of CPUs.
+
+-	man sched_setaffinity:  Using the sched_setaffinity() system
+	call to bind tasks to sets of CPUs.
+
+-	/sys/devices/system/cpu/cpuN/online:  Control CPU N's hotplug state,
+	writing "0" to offline and "1" to online.
+
+-	In order to locate kernel-generated OS jitter on CPU N:
+
+		cd /sys/kernel/debug/tracing
+		echo 1 > max_graph_depth # Increase the "1" for more detail
+		echo function_graph > current_tracer
+		# run workload
+		cat per_cpu/cpuN/trace
+
+kthreads
+========
+
+Name:
+  ehca_comp/%u
+
+Purpose:
+  Periodically process Infiniband-related work.
+
+To reduce its OS jitter, do any of the following:
+
+1.	Don't use eHCA Infiniband hardware, instead choosing hardware
+	that does not require per-CPU kthreads.  This will prevent these
+	kthreads from being created in the first place.  (This will
+	work for most people, as this hardware, though important, is
+	relatively old and is produced in relatively low unit volumes.)
+2.	Do all eHCA-Infiniband-related work on other CPUs, including
+	interrupts.
+3.	Rework the eHCA driver so that its per-CPU kthreads are
+	provisioned only on selected CPUs.
+
+
+Name:
+  irq/%d-%s
+
+Purpose:
+  Handle threaded interrupts.
+
+To reduce its OS jitter, do the following:
+
+1.	Use irq affinity to force the irq threads to execute on
+	some other CPU.
+
+Name:
+  kcmtpd_ctr_%d
+
+Purpose:
+  Handle Bluetooth work.
+
+To reduce its OS jitter, do one of the following:
+
+1.	Don't use Bluetooth, in which case these kthreads won't be
+	created in the first place.
+2.	Use irq affinity to force Bluetooth-related interrupts to
+	occur on some other CPU and furthermore initiate all
+	Bluetooth activity on some other CPU.
+
+Name:
+  ksoftirqd/%u
+
+Purpose:
+  Execute softirq handlers when threaded or when under heavy load.
+
+To reduce its OS jitter, each softirq vector must be handled
+separately as follows:
+
+TIMER_SOFTIRQ
+-------------
+
+Do all of the following:
+
+1.	To the extent possible, keep the CPU out of the kernel when it
+	is non-idle, for example, by avoiding system calls and by forcing
+	both kernel threads and interrupts to execute elsewhere.
+2.	Build with CONFIG_HOTPLUG_CPU=y.  After boot completes, force
+	the CPU offline, then bring it back online.  This forces
+	recurring timers to migrate elsewhere.	If you are concerned
+	with multiple CPUs, force them all offline before bringing the
+	first one back online.  Once you have onlined the CPUs in question,
+	do not offline any other CPUs, because doing so could force the
+	timer back onto one of the CPUs in question.
+
+NET_TX_SOFTIRQ and NET_RX_SOFTIRQ
+---------------------------------
+
+Do all of the following:
+
+1.	Force networking interrupts onto other CPUs.
+2.	Initiate any network I/O on other CPUs.
+3.	Once your application has started, prevent CPU-hotplug operations
+	from being initiated from tasks that might run on the CPU to
+	be de-jittered.  (It is OK to force this CPU offline and then
+	bring it back online before you start your application.)
+
+BLOCK_SOFTIRQ
+-------------
+
+Do all of the following:
+
+1.	Force block-device interrupts onto some other CPU.
+2.	Initiate any block I/O on other CPUs.
+3.	Once your application has started, prevent CPU-hotplug operations
+	from being initiated from tasks that might run on the CPU to
+	be de-jittered.  (It is OK to force this CPU offline and then
+	bring it back online before you start your application.)
+
+IRQ_POLL_SOFTIRQ
+----------------
+
+Do all of the following:
+
+1.	Force block-device interrupts onto some other CPU.
+2.	Initiate any block I/O and block-I/O polling on other CPUs.
+3.	Once your application has started, prevent CPU-hotplug operations
+	from being initiated from tasks that might run on the CPU to
+	be de-jittered.  (It is OK to force this CPU offline and then
+	bring it back online before you start your application.)
+
+TASKLET_SOFTIRQ
+---------------
+
+Do one or more of the following:
+
+1.	Avoid use of drivers that use tasklets.  (Such drivers will contain
+	calls to things like tasklet_schedule().)
+2.	Convert all drivers that you must use from tasklets to workqueues.
+3.	Force interrupts for drivers using tasklets onto other CPUs,
+	and also do I/O involving these drivers on other CPUs.
+
+SCHED_SOFTIRQ
+-------------
+
+Do all of the following:
+
+1.	Avoid sending scheduler IPIs to the CPU to be de-jittered,
+	for example, ensure that at most one runnable kthread is present
+	on that CPU.  If a thread that expects to run on the de-jittered
+	CPU awakens, the scheduler will send an IPI that can result in
+	a subsequent SCHED_SOFTIRQ.
+2.	CONFIG_NO_HZ_FULL=y and ensure that the CPU to be de-jittered
+	is marked as an adaptive-ticks CPU using the "nohz_full="
+	boot parameter.  This reduces the number of scheduler-clock
+	interrupts that the de-jittered CPU receives, minimizing its
+	chances of being selected to do the load balancing work that
+	runs in SCHED_SOFTIRQ context.
+3.	To the extent possible, keep the CPU out of the kernel when it
+	is non-idle, for example, by avoiding system calls and by
+	forcing both kernel threads and interrupts to execute elsewhere.
+	This further reduces the number of scheduler-clock interrupts
+	received by the de-jittered CPU.
+
+HRTIMER_SOFTIRQ
+---------------
+
+Do all of the following:
+
+1.	To the extent possible, keep the CPU out of the kernel when it
+	is non-idle.  For example, avoid system calls and force both
+	kernel threads and interrupts to execute elsewhere.
+2.	Build with CONFIG_HOTPLUG_CPU=y.  Once boot completes, force the
+	CPU offline, then bring it back online.  This forces recurring
+	timers to migrate elsewhere.  If you are concerned with multiple
+	CPUs, force them all offline before bringing the first one
+	back online.  Once you have onlined the CPUs in question, do not
+	offline any other CPUs, because doing so could force the timer
+	back onto one of the CPUs in question.
+
+RCU_SOFTIRQ
+-----------
+
+Do at least one of the following:
+
+1.	Offload callbacks and keep the CPU in either dyntick-idle or
+	adaptive-ticks state by doing all of the following:
+
+	a.	CONFIG_NO_HZ_FULL=y and ensure that the CPU to be
+		de-jittered is marked as an adaptive-ticks CPU using the
+		"nohz_full=" boot parameter.  Bind the rcuo kthreads to
+		housekeeping CPUs, which can tolerate OS jitter.
+	b.	To the extent possible, keep the CPU out of the kernel
+		when it is non-idle, for example, by avoiding system
+		calls and by forcing both kernel threads and interrupts
+		to execute elsewhere.
+
+2.	Enable RCU to do its processing remotely via dyntick-idle by
+	doing all of the following:
+
+	a.	Build with CONFIG_NO_HZ=y and CONFIG_RCU_FAST_NO_HZ=y.
+	b.	Ensure that the CPU goes idle frequently, allowing other
+		CPUs to detect that it has passed through an RCU quiescent
+		state.	If the kernel is built with CONFIG_NO_HZ_FULL=y,
+		userspace execution also allows other CPUs to detect that
+		the CPU in question has passed through a quiescent state.
+	c.	To the extent possible, keep the CPU out of the kernel
+		when it is non-idle, for example, by avoiding system
+		calls and by forcing both kernel threads and interrupts
+		to execute elsewhere.
+
+Name:
+  kworker/%u:%d%s (cpu, id, priority)
+
+Purpose:
+  Execute workqueue requests
+
+To reduce its OS jitter, do any of the following:
+
+1.	Run your workload at a real-time priority, which will allow
+	preempting the kworker daemons.
+2.	A given workqueue can be made visible in the sysfs filesystem
+	by passing the WQ_SYSFS to that workqueue's alloc_workqueue().
+	Such a workqueue can be confined to a given subset of the
+	CPUs using the ``/sys/devices/virtual/workqueue/*/cpumask`` sysfs
+	files.	The set of WQ_SYSFS workqueues can be displayed using
+	"ls sys/devices/virtual/workqueue".  That said, the workqueues
+	maintainer would like to caution people against indiscriminately
+	sprinkling WQ_SYSFS across all the workqueues.	The reason for
+	caution is that it is easy to add WQ_SYSFS, but because sysfs is
+	part of the formal user/kernel API, it can be nearly impossible
+	to remove it, even if its addition was a mistake.
+3.	Do any of the following needed to avoid jitter that your
+	application cannot tolerate:
+
+	a.	Build your kernel with CONFIG_SLUB=y rather than
+		CONFIG_SLAB=y, thus avoiding the slab allocator's periodic
+		use of each CPU's workqueues to run its cache_reap()
+		function.
+	b.	Avoid using oprofile, thus avoiding OS jitter from
+		wq_sync_buffer().
+	c.	Limit your CPU frequency so that a CPU-frequency
+		governor is not required, possibly enlisting the aid of
+		special heatsinks or other cooling technologies.  If done
+		correctly, and if you CPU architecture permits, you should
+		be able to build your kernel with CONFIG_CPU_FREQ=n to
+		avoid the CPU-frequency governor periodically running
+		on each CPU, including cs_dbs_timer() and od_dbs_timer().
+
+		WARNING:  Please check your CPU specifications to
+		make sure that this is safe on your particular system.
+	d.	As of v3.18, Christoph Lameter's on-demand vmstat workers
+		commit prevents OS jitter due to vmstat_update() on
+		CONFIG_SMP=y systems.  Before v3.18, is not possible
+		to entirely get rid of the OS jitter, but you can
+		decrease its frequency by writing a large value to
+		/proc/sys/vm/stat_interval.  The default value is HZ,
+		for an interval of one second.	Of course, larger values
+		will make your virtual-memory statistics update more
+		slowly.  Of course, you can also run your workload at
+		a real-time priority, thus preempting vmstat_update(),
+		but if your workload is CPU-bound, this is a bad idea.
+		However, there is an RFC patch from Christoph Lameter
+		(based on an earlier one from Gilad Ben-Yossef) that
+		reduces or even eliminates vmstat overhead for some
+		workloads at https://lkml.org/lkml/2013/9/4/379.
+	e.	Boot with "elevator=noop" to avoid workqueue use by
+		the block layer.
+	f.	If running on high-end powerpc servers, build with
+		CONFIG_PPC_RTAS_DAEMON=n.  This prevents the RTAS
+		daemon from running on each CPU every second or so.
+		(This will require editing Kconfig files and will defeat
+		this platform's RAS functionality.)  This avoids jitter
+		due to the rtas_event_scan() function.
+		WARNING:  Please check your CPU specifications to
+		make sure that this is safe on your particular system.
+	g.	If running on Cell Processor, build your kernel with
+		CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from
+		spu_gov_work().
+		WARNING:  Please check your CPU specifications to
+		make sure that this is safe on your particular system.
+	h.	If running on PowerMAC, build your kernel with
+		CONFIG_PMAC_RACKMETER=n to disable the CPU-meter,
+		avoiding OS jitter from rackmeter_do_timer().
+
+Name:
+  rcuc/%u
+
+Purpose:
+  Execute RCU callbacks in CONFIG_RCU_BOOST=y kernels.
+
+To reduce its OS jitter, do at least one of the following:
+
+1.	Build the kernel with CONFIG_PREEMPT=n.  This prevents these
+	kthreads from being created in the first place, and also obviates
+	the need for RCU priority boosting.  This approach is feasible
+	for workloads that do not require high degrees of responsiveness.
+2.	Build the kernel with CONFIG_RCU_BOOST=n.  This prevents these
+	kthreads from being created in the first place.  This approach
+	is feasible only if your workload never requires RCU priority
+	boosting, for example, if you ensure frequent idle time on all
+	CPUs that might execute within the kernel.
+3.	Build with CONFIG_RCU_NOCB_CPU=y and boot with the rcu_nocbs=
+	boot parameter offloading RCU callbacks from all CPUs susceptible
+	to OS jitter.  This approach prevents the rcuc/%u kthreads from
+	having any work to do, so that they are never awakened.
+4.	Ensure that the CPU never enters the kernel, and, in particular,
+	avoid initiating any CPU hotplug operations on this CPU.  This is
+	another way of preventing any callbacks from being queued on the
+	CPU, again preventing the rcuc/%u kthreads from having any work
+	to do.
+
+Name:
+  rcuop/%d and rcuos/%d
+
+Purpose:
+  Offload RCU callbacks from the corresponding CPU.
+
+To reduce its OS jitter, do at least one of the following:
+
+1.	Use affinity, cgroups, or other mechanism to force these kthreads
+	to execute on some other CPU.
+2.	Build with CONFIG_RCU_NOCB_CPU=n, which will prevent these
+	kthreads from being created in the first place.  However, please
+	note that this will not eliminate OS jitter, but will instead
+	shift it to RCU_SOFTIRQ.
+
+Name:
+  watchdog/%u
+
+Purpose:
+  Detect software lockups on each CPU.
+
+To reduce its OS jitter, do at least one of the following:
+
+1.	Build with CONFIG_LOCKUP_DETECTOR=n, which will prevent these
+	kthreads from being created in the first place.
+2.	Boot with "nosoftlockup=0", which will also prevent these kthreads
+	from being created.  Other related watchdog and softlockup boot
+	parameters may be found in Documentation/admin-guide/kernel-parameters.rst
+	and Documentation/watchdog/watchdog-parameters.rst.
+3.	Echo a zero to /proc/sys/kernel/watchdog to disable the
+	watchdog timer.
+4.	Echo a large number of /proc/sys/kernel/watchdog_thresh in
+	order to reduce the frequency of OS jitter due to the watchdog
+	timer down to a level that is acceptable for your workload.
diff --git a/Documentation/admin-guide/lcd-panel-cgram.rst b/Documentation/admin-guide/lcd-panel-cgram.rst
new file mode 100644
index 000000000000..a3eb00c62f53
--- /dev/null
+++ b/Documentation/admin-guide/lcd-panel-cgram.rst
@@ -0,0 +1,27 @@
+======================================
+Parallel port LCD/Keypad Panel support
+======================================
+
+Some LCDs allow you to define up to 8 characters, mapped to ASCII
+characters 0 to 7. The escape code to define a new character is
+'\e[LG' followed by one digit from 0 to 7, representing the character
+number, and up to 8 couples of hex digits terminated by a semi-colon
+(';'). Each couple of digits represents a line, with 1-bits for each
+illuminated pixel with LSB on the right. Lines are numbered from the
+top of the character to the bottom. On a 5x7 matrix, only the 5 lower
+bits of the 7 first bytes are used for each character. If the string
+is incomplete, only complete lines will be redefined. Here are some
+examples::
+
+  printf "\e[LG0010101050D1F0C04;"  => 0 = [enter]
+  printf "\e[LG1040E1F0000000000;"  => 1 = [up]
+  printf "\e[LG2000000001F0E0400;"  => 2 = [down]
+  printf "\e[LG3040E1F001F0E0400;"  => 3 = [up-down]
+  printf "\e[LG40002060E1E0E0602;"  => 4 = [left]
+  printf "\e[LG500080C0E0F0E0C08;"  => 5 = [right]
+  printf "\e[LG60016051516141400;"  => 6 = "IP"
+
+  printf "\e[LG00103071F1F070301;"  => big speaker
+  printf "\e[LG00002061E1E060200;"  => small speaker
+
+Willy
diff --git a/Documentation/admin-guide/ldm.rst b/Documentation/admin-guide/ldm.rst
new file mode 100644
index 000000000000..12c571368e73
--- /dev/null
+++ b/Documentation/admin-guide/ldm.rst
@@ -0,0 +1,121 @@
+==========================================
+LDM - Logical Disk Manager (Dynamic Disks)
+==========================================
+
+:Author: Originally Written by FlatCap - Richard Russon <ldm@flatcap.org>.
+:Last Updated: Anton Altaparmakov on 30 March 2007 for Windows Vista.
+
+Overview
+--------
+
+Windows 2000, XP, and Vista use a new partitioning scheme.  It is a complete
+replacement for the MSDOS style partitions.  It stores its information in a
+1MiB journalled database at the end of the physical disk.  The size of
+partitions is limited only by disk space.  The maximum number of partitions is
+nearly 2000.
+
+Any partitions created under the LDM are called "Dynamic Disks".  There are no
+longer any primary or extended partitions.  Normal MSDOS style partitions are
+now known as Basic Disks.
+
+If you wish to use Spanned, Striped, Mirrored or RAID 5 Volumes, you must use
+Dynamic Disks.  The journalling allows Windows to make changes to these
+partitions and filesystems without the need to reboot.
+
+Once the LDM driver has divided up the disk, you can use the MD driver to
+assemble any multi-partition volumes, e.g.  Stripes, RAID5.
+
+To prevent legacy applications from repartitioning the disk, the LDM creates a
+dummy MSDOS partition containing one disk-sized partition.  This is what is
+supported with the Linux LDM driver.
+
+A newer approach that has been implemented with Vista is to put LDM on top of a
+GPT label disk.  This is not supported by the Linux LDM driver yet.
+
+
+Example
+-------
+
+Below we have a 50MiB disk, divided into seven partitions.
+
+.. note::
+
+   The missing 1MiB at the end of the disk is where the LDM database is
+   stored.
+
++-------++--------------+---------+-----++--------------+---------+----+
+|Device || Offset Bytes | Sectors | MiB || Size   Bytes | Sectors | MiB|
++=======++==============+=========+=====++==============+=========+====+
+|hda    ||            0 |       0 |   0 ||     52428800 |  102400 |  50|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda1   ||     51380224 |  100352 |  49 ||      1048576 |    2048 |   1|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda2   ||        16384 |      32 |   0 ||      6979584 |   13632 |   6|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda3   ||      6995968 |   13664 |   6 ||     10485760 |   20480 |  10|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda4   ||     17481728 |   34144 |  16 ||      4194304 |    8192 |   4|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda5   ||     21676032 |   42336 |  20 ||      5242880 |   10240 |   5|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda6   ||     26918912 |   52576 |  25 ||     10485760 |   20480 |  10|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda7   ||     37404672 |   73056 |  35 ||     13959168 |   27264 |  13|
++-------++--------------+---------+-----++--------------+---------+----+
+
+The LDM Database may not store the partitions in the order that they appear on
+disk, but the driver will sort them.
+
+When Linux boots, you will see something like::
+
+  hda: 102400 sectors w/32KiB Cache, CHS=50/64/32
+  hda: [LDM] hda1 hda2 hda3 hda4 hda5 hda6 hda7
+
+
+Compiling LDM Support
+---------------------
+
+To enable LDM, choose the following two options: 
+
+  - "Advanced partition selection" CONFIG_PARTITION_ADVANCED
+  - "Windows Logical Disk Manager (Dynamic Disk) support" CONFIG_LDM_PARTITION
+
+If you believe the driver isn't working as it should, you can enable the extra
+debugging code.  This will produce a LOT of output.  The option is:
+
+  - "Windows LDM extra logging" CONFIG_LDM_DEBUG
+
+N.B. The partition code cannot be compiled as a module.
+
+As with all the partition code, if the driver doesn't see signs of its type of
+partition, it will pass control to another driver, so there is no harm in
+enabling it.
+
+If you have Dynamic Disks but don't enable the driver, then all you will see
+is a dummy MSDOS partition filling the whole disk.  You won't be able to mount
+any of the volumes on the disk.
+
+
+Booting
+-------
+
+If you enable LDM support, then lilo is capable of booting from any of the
+discovered partitions.  However, grub does not understand the LDM partitioning
+and cannot boot from a Dynamic Disk.
+
+
+More Documentation
+------------------
+
+There is an Overview of the LDM together with complete Technical Documentation.
+It is available for download.
+
+  http://www.linux-ntfs.org/
+
+If you have any LDM questions that aren't answered in the documentation, email
+me.
+
+Cheers,
+    FlatCap - Richard Russon
+    ldm@flatcap.org
+
diff --git a/Documentation/admin-guide/lockup-watchdogs.rst b/Documentation/admin-guide/lockup-watchdogs.rst
new file mode 100644
index 000000000000..290840c160af
--- /dev/null
+++ b/Documentation/admin-guide/lockup-watchdogs.rst
@@ -0,0 +1,83 @@
+===============================================================
+Softlockup detector and hardlockup detector (aka nmi_watchdog)
+===============================================================
+
+The Linux kernel can act as a watchdog to detect both soft and hard
+lockups.
+
+A 'softlockup' is defined as a bug that causes the kernel to loop in
+kernel mode for more than 20 seconds (see "Implementation" below for
+details), without giving other tasks a chance to run. The current
+stack trace is displayed upon detection and, by default, the system
+will stay locked up. Alternatively, the kernel can be configured to
+panic; a sysctl, "kernel.softlockup_panic", a kernel parameter,
+"softlockup_panic" (see "Documentation/admin-guide/kernel-parameters.rst" for
+details), and a compile option, "BOOTPARAM_SOFTLOCKUP_PANIC", are
+provided for this.
+
+A 'hardlockup' is defined as a bug that causes the CPU to loop in
+kernel mode for more than 10 seconds (see "Implementation" below for
+details), without letting other interrupts have a chance to run.
+Similarly to the softlockup case, the current stack trace is displayed
+upon detection and the system will stay locked up unless the default
+behavior is changed, which can be done through a sysctl,
+'hardlockup_panic', a compile time knob, "BOOTPARAM_HARDLOCKUP_PANIC",
+and a kernel parameter, "nmi_watchdog"
+(see "Documentation/admin-guide/kernel-parameters.rst" for details).
+
+The panic option can be used in combination with panic_timeout (this
+timeout is set through the confusingly named "kernel.panic" sysctl),
+to cause the system to reboot automatically after a specified amount
+of time.
+
+Implementation
+==============
+
+The soft and hard lockup detectors are built on top of the hrtimer and
+perf subsystems, respectively. A direct consequence of this is that,
+in principle, they should work in any architecture where these
+subsystems are present.
+
+A periodic hrtimer runs to generate interrupts and kick the watchdog
+task. An NMI perf event is generated every "watchdog_thresh"
+(compile-time initialized to 10 and configurable through sysctl of the
+same name) seconds to check for hardlockups. If any CPU in the system
+does not receive any hrtimer interrupt during that time the
+'hardlockup detector' (the handler for the NMI perf event) will
+generate a kernel warning or call panic, depending on the
+configuration.
+
+The watchdog task is a high priority kernel thread that updates a
+timestamp every time it is scheduled. If that timestamp is not updated
+for 2*watchdog_thresh seconds (the softlockup threshold) the
+'softlockup detector' (coded inside the hrtimer callback function)
+will dump useful debug information to the system log, after which it
+will call panic if it was instructed to do so or resume execution of
+other kernel code.
+
+The period of the hrtimer is 2*watchdog_thresh/5, which means it has
+two or three chances to generate an interrupt before the hardlockup
+detector kicks in.
+
+As explained above, a kernel knob is provided that allows
+administrators to configure the period of the hrtimer and the perf
+event. The right value for a particular environment is a trade-off
+between fast response to lockups and detection overhead.
+
+By default, the watchdog runs on all online cores.  However, on a
+kernel configured with NO_HZ_FULL, by default the watchdog runs only
+on the housekeeping cores, not the cores specified in the "nohz_full"
+boot argument.  If we allowed the watchdog to run by default on
+the "nohz_full" cores, we would have to run timer ticks to activate
+the scheduler, which would prevent the "nohz_full" functionality
+from protecting the user code on those cores from the kernel.
+Of course, disabling it by default on the nohz_full cores means that
+when those cores do enter the kernel, by default we will not be
+able to detect if they lock up.  However, allowing the watchdog
+to continue to run on the housekeeping (non-tickless) cores means
+that we will continue to detect lockups properly on those cores.
+
+In either case, the set of cores excluded from running the watchdog
+may be adjusted via the kernel.watchdog_cpumask sysctl.  For
+nohz_full cores, this may be useful for debugging a case where the
+kernel seems to be hanging on the nohz_full cores.
diff --git a/Documentation/admin-guide/mm/cma_debugfs.rst b/Documentation/admin-guide/mm/cma_debugfs.rst
new file mode 100644
index 000000000000..4e06ffabd78a
--- /dev/null
+++ b/Documentation/admin-guide/mm/cma_debugfs.rst
@@ -0,0 +1,25 @@
+=====================
+CMA Debugfs Interface
+=====================
+
+The CMA debugfs interface is useful to retrieve basic information out of the
+different CMA areas and to test allocation/release in each of the areas.
+
+Each CMA zone represents a directory under <debugfs>/cma/, indexed by the
+kernel's CMA index. So the first CMA zone would be:
+
+	<debugfs>/cma/cma-0
+
+The structure of the files created under that directory is as follows:
+
+ - [RO] base_pfn: The base PFN (Page Frame Number) of the zone.
+ - [RO] count: Amount of memory in the CMA area.
+ - [RO] order_per_bit: Order of pages represented by one bit.
+ - [RO] bitmap: The bitmap of page states in the zone.
+ - [WO] alloc: Allocate N pages from that CMA area. For example::
+
+	echo 5 > <debugfs>/cma/cma-2/alloc
+
+would try to allocate 5 pages from the cma-2 area.
+
+ - [WO] free: Free N pages from that CMA area, similar to the above.
diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst
index 5f61a6c429e0..11db46448354 100644
--- a/Documentation/admin-guide/mm/index.rst
+++ b/Documentation/admin-guide/mm/index.rst
@@ -26,6 +26,7 @@ the Linux memory management.
    :maxdepth: 1
 
    concepts
+   cma_debugfs
    hugetlbpage
    idle_page_tracking
    ksm
diff --git a/Documentation/admin-guide/numastat.rst b/Documentation/admin-guide/numastat.rst
new file mode 100644
index 000000000000..aaf1667489f8
--- /dev/null
+++ b/Documentation/admin-guide/numastat.rst
@@ -0,0 +1,30 @@
+===============================
+Numa policy hit/miss statistics
+===============================
+
+/sys/devices/system/node/node*/numastat
+
+All units are pages. Hugepages have separate counters.
+
+=============== ============================================================
+numa_hit	A process wanted to allocate memory from this node,
+		and succeeded.
+
+numa_miss	A process wanted to allocate memory from another node,
+		but ended up with memory from this node.
+
+numa_foreign	A process wanted to allocate on this node,
+		but ended up with memory from another one.
+
+local_node	A process ran on this node and got memory from it.
+
+other_node	A process ran on this node and got memory from another node.
+
+interleave_hit 	Interleaving wanted to allocate from this node
+		and succeeded.
+=============== ============================================================
+
+For easier reading you can use the numastat utility from the numactl package
+(http://oss.sgi.com/projects/libnuma/). Note that it only works
+well right now on machines with a small number of CPUs.
+
diff --git a/Documentation/admin-guide/pnp.rst b/Documentation/admin-guide/pnp.rst
new file mode 100644
index 000000000000..bab2d10631f0
--- /dev/null
+++ b/Documentation/admin-guide/pnp.rst
@@ -0,0 +1,292 @@
+=================================
+Linux Plug and Play Documentation
+=================================
+
+:Author: Adam Belay <ambx1@neo.rr.com>
+:Last updated: Oct. 16, 2002
+
+
+Overview
+--------
+
+Plug and Play provides a means of detecting and setting resources for legacy or
+otherwise unconfigurable devices.  The Linux Plug and Play Layer provides these 
+services to compatible drivers.
+
+
+The User Interface
+------------------
+
+The Linux Plug and Play user interface provides a means to activate PnP devices
+for legacy and user level drivers that do not support Linux Plug and Play.  The 
+user interface is integrated into sysfs.
+
+In addition to the standard sysfs file the following are created in each
+device's directory:
+- id - displays a list of support EISA IDs
+- options - displays possible resource configurations
+- resources - displays currently allocated resources and allows resource changes
+
+activating a device
+^^^^^^^^^^^^^^^^^^^
+
+::
+
+	# echo "auto" > resources
+
+this will invoke the automatic resource config system to activate the device
+
+manually activating a device
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+::
+
+	# echo "manual <depnum> <mode>" > resources
+
+	<depnum> - the configuration number
+	<mode> - static or dynamic
+		 static = for next boot
+		 dynamic = now
+
+disabling a device
+^^^^^^^^^^^^^^^^^^
+
+::
+
+	# echo "disable" > resources
+
+
+EXAMPLE:
+
+Suppose you need to activate the floppy disk controller.
+
+1. change to the proper directory, in my case it is
+   /driver/bus/pnp/devices/00:0f::
+
+	# cd /driver/bus/pnp/devices/00:0f
+	# cat name
+	PC standard floppy disk controller
+
+2. check if the device is already active::
+
+	# cat resources
+	DISABLED
+
+  - Notice the string "DISABLED".  This means the device is not active.
+
+3. check the device's possible configurations (optional)::
+
+	# cat options
+	Dependent: 01 - Priority acceptable
+	    port 0x3f0-0x3f0, align 0x7, size 0x6, 16-bit address decoding
+	    port 0x3f7-0x3f7, align 0x0, size 0x1, 16-bit address decoding
+	    irq 6
+	    dma 2 8-bit compatible
+	Dependent: 02 - Priority acceptable
+	    port 0x370-0x370, align 0x7, size 0x6, 16-bit address decoding
+	    port 0x377-0x377, align 0x0, size 0x1, 16-bit address decoding
+	    irq 6
+	    dma 2 8-bit compatible
+
+4. now activate the device::
+
+	# echo "auto" > resources
+
+5. finally check if the device is active::
+
+	# cat resources
+	io 0x3f0-0x3f5
+	io 0x3f7-0x3f7
+	irq 6
+	dma 2
+
+also there are a series of kernel parameters::
+
+	pnp_reserve_irq=irq1[,irq2] ....
+	pnp_reserve_dma=dma1[,dma2] ....
+	pnp_reserve_io=io1,size1[,io2,size2] ....
+	pnp_reserve_mem=mem1,size1[,mem2,size2] ....
+
+
+
+The Unified Plug and Play Layer
+-------------------------------
+
+All Plug and Play drivers, protocols, and services meet at a central location
+called the Plug and Play Layer.  This layer is responsible for the exchange of 
+information between PnP drivers and PnP protocols.  Thus it automatically 
+forwards commands to the proper protocol.  This makes writing PnP drivers 
+significantly easier.
+
+The following functions are available from the Plug and Play Layer:
+
+pnp_get_protocol
+  increments the number of uses by one
+
+pnp_put_protocol
+  deincrements the number of uses by one
+
+pnp_register_protocol
+  use this to register a new PnP protocol
+
+pnp_unregister_protocol
+  use this function to remove a PnP protocol from the Plug and Play Layer
+
+pnp_register_driver
+  adds a PnP driver to the Plug and Play Layer
+
+  this includes driver model integration
+  returns zero for success or a negative error number for failure; count
+  calls to the .add() method if you need to know how many devices bind to
+  the driver
+
+pnp_unregister_driver
+  removes a PnP driver from the Plug and Play Layer
+
+
+
+Plug and Play Protocols
+-----------------------
+
+This section contains information for PnP protocol developers.
+
+The following Protocols are currently available in the computing world:
+
+- PNPBIOS:
+    used for system devices such as serial and parallel ports.
+- ISAPNP:
+    provides PnP support for the ISA bus
+- ACPI:
+    among its many uses, ACPI provides information about system level
+    devices.
+
+It is meant to replace the PNPBIOS.  It is not currently supported by Linux
+Plug and Play but it is planned to be in the near future.
+
+
+Requirements for a Linux PnP protocol:
+1. the protocol must use EISA IDs
+2. the protocol must inform the PnP Layer of a device's current configuration
+
+- the ability to set resources is optional but preferred.
+
+The following are PnP protocol related functions:
+
+pnp_add_device
+  use this function to add a PnP device to the PnP layer
+
+  only call this function when all wanted values are set in the pnp_dev
+  structure
+
+pnp_init_device
+  call this to initialize the PnP structure
+
+pnp_remove_device
+  call this to remove a device from the Plug and Play Layer.
+  it will fail if the device is still in use.
+  automatically will free mem used by the device and related structures
+
+pnp_add_id
+  adds an EISA ID to the list of supported IDs for the specified device
+
+For more information consult the source of a protocol such as
+/drivers/pnp/pnpbios/core.c.
+
+
+
+Linux Plug and Play Drivers
+---------------------------
+
+This section contains information for Linux PnP driver developers.
+
+The New Way
+^^^^^^^^^^^
+
+1. first make a list of supported EISA IDS
+
+   ex::
+
+	static const struct pnp_id pnp_dev_table[] = {
+		/* Standard LPT Printer Port */
+		{.id = "PNP0400", .driver_data = 0},
+		/* ECP Printer Port */
+		{.id = "PNP0401", .driver_data = 0},
+		{.id = ""}
+	};
+
+   Please note that the character 'X' can be used as a wild card in the function
+   portion (last four characters).
+
+   ex::
+
+	/* Unknown PnP modems */
+	{	"PNPCXXX",		UNKNOWN_DEV	},
+
+   Supported PnP card IDs can optionally be defined.
+   ex::
+
+	static const struct pnp_id pnp_card_table[] = {
+		{	"ANYDEVS",		0	},
+		{	"",			0	}
+	};
+
+2. Optionally define probe and remove functions.  It may make sense not to
+   define these functions if the driver already has a reliable method of detecting
+   the resources, such as the parport_pc driver.
+
+   ex::
+
+	static int
+	serial_pnp_probe(struct pnp_dev * dev, const struct pnp_id *card_id, const
+			struct pnp_id *dev_id)
+	{
+	. . .
+
+   ex::
+
+	static void serial_pnp_remove(struct pnp_dev * dev)
+	{
+	. . .
+
+   consult /drivers/serial/8250_pnp.c for more information.
+
+3. create a driver structure
+
+   ex::
+
+	static struct pnp_driver serial_pnp_driver = {
+		.name		= "serial",
+		.card_id_table	= pnp_card_table,
+		.id_table	= pnp_dev_table,
+		.probe		= serial_pnp_probe,
+		.remove		= serial_pnp_remove,
+	};
+
+   * name and id_table cannot be NULL.
+
+4. register the driver
+
+   ex::
+
+	static int __init serial8250_pnp_init(void)
+	{
+		return pnp_register_driver(&serial_pnp_driver);
+	}
+
+The Old Way
+^^^^^^^^^^^
+
+A series of compatibility functions have been created to make it easy to convert
+ISAPNP drivers.  They should serve as a temporary solution only.
+
+They are as follows::
+
+	struct pnp_card *pnp_find_card(unsigned short vendor,
+				       unsigned short device,
+				       struct pnp_card *from)
+
+	struct pnp_dev *pnp_find_dev(struct pnp_card *card,
+				     unsigned short vendor,
+				     unsigned short function,
+				     struct pnp_dev *from)
+
diff --git a/Documentation/admin-guide/rtc.rst b/Documentation/admin-guide/rtc.rst
new file mode 100644
index 000000000000..688c95b11919
--- /dev/null
+++ b/Documentation/admin-guide/rtc.rst
@@ -0,0 +1,140 @@
+=======================================
+Real Time Clock (RTC) Drivers for Linux
+=======================================
+
+When Linux developers talk about a "Real Time Clock", they usually mean
+something that tracks wall clock time and is battery backed so that it
+works even with system power off.  Such clocks will normally not track
+the local time zone or daylight savings time -- unless they dual boot
+with MS-Windows -- but will instead be set to Coordinated Universal Time
+(UTC, formerly "Greenwich Mean Time").
+
+The newest non-PC hardware tends to just count seconds, like the time(2)
+system call reports, but RTCs also very commonly represent time using
+the Gregorian calendar and 24 hour time, as reported by gmtime(3).
+
+Linux has two largely-compatible userspace RTC API families you may
+need to know about:
+
+    *	/dev/rtc ... is the RTC provided by PC compatible systems,
+	so it's not very portable to non-x86 systems.
+
+    *	/dev/rtc0, /dev/rtc1 ... are part of a framework that's
+	supported by a wide variety of RTC chips on all systems.
+
+Programmers need to understand that the PC/AT functionality is not
+always available, and some systems can do much more.  That is, the
+RTCs use the same API to make requests in both RTC frameworks (using
+different filenames of course), but the hardware may not offer the
+same functionality.  For example, not every RTC is hooked up to an
+IRQ, so they can't all issue alarms; and where standard PC RTCs can
+only issue an alarm up to 24 hours in the future, other hardware may
+be able to schedule one any time in the upcoming century.
+
+
+Old PC/AT-Compatible driver:  /dev/rtc
+--------------------------------------
+
+All PCs (even Alpha machines) have a Real Time Clock built into them.
+Usually they are built into the chipset of the computer, but some may
+actually have a Motorola MC146818 (or clone) on the board. This is the
+clock that keeps the date and time while your computer is turned off.
+
+ACPI has standardized that MC146818 functionality, and extended it in
+a few ways (enabling longer alarm periods, and wake-from-hibernate).
+That functionality is NOT exposed in the old driver.
+
+However it can also be used to generate signals from a slow 2Hz to a
+relatively fast 8192Hz, in increments of powers of two. These signals
+are reported by interrupt number 8. (Oh! So *that* is what IRQ 8 is
+for...) It can also function as a 24hr alarm, raising IRQ 8 when the
+alarm goes off. The alarm can also be programmed to only check any
+subset of the three programmable values, meaning that it could be set to
+ring on the 30th second of the 30th minute of every hour, for example.
+The clock can also be set to generate an interrupt upon every clock
+update, thus generating a 1Hz signal.
+
+The interrupts are reported via /dev/rtc (major 10, minor 135, read only
+character device) in the form of an unsigned long. The low byte contains
+the type of interrupt (update-done, alarm-rang, or periodic) that was
+raised, and the remaining bytes contain the number of interrupts since
+the last read.  Status information is reported through the pseudo-file
+/proc/driver/rtc if the /proc filesystem was enabled.  The driver has
+built in locking so that only one process is allowed to have the /dev/rtc
+interface open at a time.
+
+A user process can monitor these interrupts by doing a read(2) or a
+select(2) on /dev/rtc -- either will block/stop the user process until
+the next interrupt is received. This is useful for things like
+reasonably high frequency data acquisition where one doesn't want to
+burn up 100% CPU by polling gettimeofday etc. etc.
+
+At high frequencies, or under high loads, the user process should check
+the number of interrupts received since the last read to determine if
+there has been any interrupt "pileup" so to speak. Just for reference, a
+typical 486-33 running a tight read loop on /dev/rtc will start to suffer
+occasional interrupt pileup (i.e. > 1 IRQ event since last read) for
+frequencies above 1024Hz. So you really should check the high bytes
+of the value you read, especially at frequencies above that of the
+normal timer interrupt, which is 100Hz.
+
+Programming and/or enabling interrupt frequencies greater than 64Hz is
+only allowed by root. This is perhaps a bit conservative, but we don't want
+an evil user generating lots of IRQs on a slow 386sx-16, where it might have
+a negative impact on performance. This 64Hz limit can be changed by writing
+a different value to /proc/sys/dev/rtc/max-user-freq. Note that the
+interrupt handler is only a few lines of code to minimize any possibility
+of this effect.
+
+Also, if the kernel time is synchronized with an external source, the 
+kernel will write the time back to the CMOS clock every 11 minutes. In 
+the process of doing this, the kernel briefly turns off RTC periodic 
+interrupts, so be aware of this if you are doing serious work. If you
+don't synchronize the kernel time with an external source (via ntp or
+whatever) then the kernel will keep its hands off the RTC, allowing you
+exclusive access to the device for your applications.
+
+The alarm and/or interrupt frequency are programmed into the RTC via
+various ioctl(2) calls as listed in ./include/linux/rtc.h
+Rather than write 50 pages describing the ioctl() and so on, it is
+perhaps more useful to include a small test program that demonstrates
+how to use them, and demonstrates the features of the driver. This is
+probably a lot more useful to people interested in writing applications
+that will be using this driver.  See the code at the end of this document.
+
+(The original /dev/rtc driver was written by Paul Gortmaker.)
+
+
+New portable "RTC Class" drivers:  /dev/rtcN
+--------------------------------------------
+
+Because Linux supports many non-ACPI and non-PC platforms, some of which
+have more than one RTC style clock, it needed a more portable solution
+than expecting a single battery-backed MC146818 clone on every system.
+Accordingly, a new "RTC Class" framework has been defined.  It offers
+three different userspace interfaces:
+
+    *	/dev/rtcN ... much the same as the older /dev/rtc interface
+
+    *	/sys/class/rtc/rtcN ... sysfs attributes support readonly
+	access to some RTC attributes.
+
+    *	/proc/driver/rtc ... the system clock RTC may expose itself
+	using a procfs interface. If there is no RTC for the system clock,
+	rtc0 is used by default. More information is (currently) shown
+	here than through sysfs.
+
+The RTC Class framework supports a wide variety of RTCs, ranging from those
+integrated into embeddable system-on-chip (SOC) processors to discrete chips
+using I2C, SPI, or some other bus to communicate with the host CPU.  There's
+even support for PC-style RTCs ... including the features exposed on newer PCs
+through ACPI.
+
+The new framework also removes the "one RTC per system" restriction.  For
+example, maybe the low-power battery-backed RTC is a discrete I2C chip, but
+a high functionality RTC is integrated into the SOC.  That system might read
+the system clock from the discrete RTC, but use the integrated one for all
+other tasks, because of its greater functionality.
+
+Check out tools/testing/selftests/rtc/rtctest.c for an example usage of the
+ioctl interface.
diff --git a/Documentation/admin-guide/svga.rst b/Documentation/admin-guide/svga.rst
new file mode 100644
index 000000000000..b6c2f9acca92
--- /dev/null
+++ b/Documentation/admin-guide/svga.rst
@@ -0,0 +1,249 @@
+.. include:: <isonum.txt>
+
+=================================
+Video Mode Selection Support 2.13
+=================================
+
+:Copyright: |copy| 1995--1999 Martin Mares, <mj@ucw.cz>
+
+Intro
+~~~~~
+
+This small document describes the "Video Mode Selection" feature which
+allows the use of various special video modes supported by the video BIOS. Due
+to usage of the BIOS, the selection is limited to boot time (before the
+kernel decompression starts) and works only on 80X86 machines.
+
+.. note::
+
+   Short intro for the impatient: Just use vga=ask for the first time,
+   enter ``scan`` on the video mode prompt, pick the mode you want to use,
+   remember its mode ID (the four-digit hexadecimal number) and then
+   set the vga parameter to this number (converted to decimal first).
+
+The video mode to be used is selected by a kernel parameter which can be
+specified in the kernel Makefile (the SVGA_MODE=... line) or by the "vga=..."
+option of LILO (or some other boot loader you use) or by the "vidmode" utility
+(present in standard Linux utility packages). You can use the following values
+of this parameter::
+
+   NORMAL_VGA - Standard 80x25 mode available on all display adapters.
+
+   EXTENDED_VGA	- Standard 8-pixel font mode: 80x43 on EGA, 80x50 on VGA.
+
+   ASK_VGA - Display a video mode menu upon startup (see below).
+
+   0..35 - Menu item number (when you have used the menu to view the list of
+      modes available on your adapter, you can specify the menu item you want
+      to use). 0..9 correspond to "0".."9", 10..35 to "a".."z". Warning: the
+      mode list displayed may vary as the kernel version changes, because the
+      modes are listed in a "first detected -- first displayed" manner. It's
+      better to use absolute mode numbers instead.
+
+   0x.... - Hexadecimal video mode ID (also displayed on the menu, see below
+      for exact meaning of the ID). Warning: rdev and LILO don't support
+      hexadecimal numbers -- you have to convert it to decimal manually.
+
+Menu
+~~~~
+
+The ASK_VGA mode causes the kernel to offer a video mode menu upon
+bootup. It displays a "Press <RETURN> to see video modes available, <SPACE>
+to continue or wait 30 secs" message. If you press <RETURN>, you enter the
+menu, if you press <SPACE> or wait 30 seconds, the kernel will boot up in
+the standard 80x25 mode.
+
+The menu looks like::
+
+	Video adapter: <name-of-detected-video-adapter>
+	Mode:    COLSxROWS:
+	0  0F00  80x25
+	1  0F01  80x50
+	2  0F02  80x43
+	3  0F03  80x26
+	....
+	Enter mode number or ``scan``: <flashing-cursor-here>
+
+<name-of-detected-video-adapter> tells what video adapter did Linux detect
+-- it's either a generic adapter name (MDA, CGA, HGC, EGA, VGA, VESA VGA [a VGA
+with VESA-compliant BIOS]) or a chipset name (e.g., Trident). Direct detection
+of chipsets is turned off by default as it's inherently unreliable due to
+absolutely insane PC design.
+
+"0  0F00  80x25" means that the first menu item (the menu items are numbered
+from "0" to "9" and from "a" to "z") is a 80x25 mode with ID=0x0f00 (see the
+next section for a description of mode IDs).
+
+<flashing-cursor-here> encourages you to enter the item number or mode ID
+you wish to set and press <RETURN>. If the computer complains something about
+"Unknown mode ID", it is trying to tell you that it isn't possible to set such
+a mode. It's also possible to press only <RETURN> which leaves the current mode.
+
+The mode list usually contains a few basic modes and some VESA modes.  In
+case your chipset has been detected, some chipset-specific modes are shown as
+well (some of these might be missing or unusable on your machine as different
+BIOSes are often shipped with the same card and the mode numbers depend purely
+on the VGA BIOS).
+
+The modes displayed on the menu are partially sorted: The list starts with
+the standard modes (80x25 and 80x50) followed by "special" modes (80x28 and
+80x43), local modes (if the local modes feature is enabled), VESA modes and
+finally SVGA modes for the auto-detected adapter.
+
+If you are not happy with the mode list offered (e.g., if you think your card
+is able to do more), you can enter "scan" instead of item number / mode ID.  The
+program will try to ask the BIOS for all possible video mode numbers and test
+what happens then. The screen will be probably flashing wildly for some time and
+strange noises will be heard from inside the monitor and so on and then, really
+all consistent video modes supported by your BIOS will appear (plus maybe some
+``ghost modes``). If you are afraid this could damage your monitor, don't use
+this function.
+
+After scanning, the mode ordering is a bit different: the auto-detected SVGA
+modes are not listed at all and the modes revealed by ``scan`` are shown before
+all VESA modes.
+
+Mode IDs
+~~~~~~~~
+
+Because of the complexity of all the video stuff, the video mode IDs
+used here are also a bit complex. A video mode ID is a 16-bit number usually
+expressed in a hexadecimal notation (starting with "0x"). You can set a mode
+by entering its mode directly if you know it even if it isn't shown on the menu.
+
+The ID numbers can be divided to those regions::
+
+   0x0000 to 0x00ff - menu item references. 0x0000 is the first item. Don't use
+	outside the menu as this can change from boot to boot (especially if you
+	have used the ``scan`` feature).
+
+   0x0100 to 0x017f - standard BIOS modes. The ID is a BIOS video mode number
+	(as presented to INT 10, function 00) increased by 0x0100.
+
+   0x0200 to 0x08ff - VESA BIOS modes. The ID is a VESA mode ID increased by
+	0x0100. All VESA modes should be autodetected and shown on the menu.
+
+   0x0900 to 0x09ff - Video7 special modes. Set by calling INT 0x10, AX=0x6f05.
+	(Usually 940=80x43, 941=132x25, 942=132x44, 943=80x60, 944=100x60,
+	945=132x28 for the standard Video7 BIOS)
+
+   0x0f00 to 0x0fff - special modes (they are set by various tricks -- usually
+	by modifying one of the standard modes). Currently available:
+	0x0f00	standard 80x25, don't reset mode if already set (=FFFF)
+	0x0f01	standard with 8-point font: 80x43 on EGA, 80x50 on VGA
+	0x0f02	VGA 80x43 (VGA switched to 350 scanlines with a 8-point font)
+	0x0f03	VGA 80x28 (standard VGA scans, but 14-point font)
+	0x0f04	leave current video mode
+	0x0f05	VGA 80x30 (480 scans, 16-point font)
+	0x0f06	VGA 80x34 (480 scans, 14-point font)
+	0x0f07	VGA 80x60 (480 scans, 8-point font)
+	0x0f08	Graphics hack (see the VIDEO_GFX_HACK paragraph below)
+
+   0x1000 to 0x7fff - modes specified by resolution. The code has a "0xRRCC"
+	form where RR is a number of rows and CC is a number of columns.
+	E.g., 0x1950 corresponds to a 80x25 mode, 0x2b84 to 132x43 etc.
+	This is the only fully portable way to refer to a non-standard mode,
+	but it relies on the mode being found and displayed on the menu
+	(remember that mode scanning is not done automatically).
+
+   0xff00 to 0xffff - aliases for backward compatibility:
+	0xffff	equivalent to 0x0f00 (standard 80x25)
+	0xfffe	equivalent to 0x0f01 (EGA 80x43 or VGA 80x50)
+
+If you add 0x8000 to the mode ID, the program will try to recalculate
+vertical display timing according to mode parameters, which can be used to
+eliminate some annoying bugs of certain VGA BIOSes (usually those used for
+cards with S3 chipsets and old Cirrus Logic BIOSes) -- mainly extra lines at the
+end of the display.
+
+Options
+~~~~~~~
+
+Build options for arch/x86/boot/* are selected by the kernel kconfig
+utility and the kernel .config file.
+
+VIDEO_GFX_HACK - includes special hack for setting of graphics modes
+to be used later by special drivers.
+Allows to set _any_ BIOS mode including graphic ones and forcing specific
+text screen resolution instead of peeking it from BIOS variables. Don't use
+unless you think you know what you're doing. To activate this setup, use
+mode number 0x0f08 (see the Mode IDs section above).
+
+Still doesn't work?
+~~~~~~~~~~~~~~~~~~~
+
+When the mode detection doesn't work (e.g., the mode list is incorrect or
+the machine hangs instead of displaying the menu), try to switch off some of
+the configuration options listed under "Options". If it fails, you can still use
+your kernel with the video mode set directly via the kernel parameter.
+
+In either case, please send me a bug report containing what _exactly_
+happens and how do the configuration switches affect the behaviour of the bug.
+
+If you start Linux from M$-DOS, you might also use some DOS tools for
+video mode setting. In this case, you must specify the 0x0f04 mode ("leave
+current settings") to Linux, because if you don't and you use any non-standard
+mode, Linux will switch to 80x25 automatically.
+
+If you set some extended mode and there's one or more extra lines on the
+bottom of the display containing already scrolled-out text, your VGA BIOS
+contains the most common video BIOS bug called "incorrect vertical display
+end setting". Adding 0x8000 to the mode ID might fix the problem. Unfortunately,
+this must be done manually -- no autodetection mechanisms are available.
+
+History
+~~~~~~~
+
+=============== ================================================================
+1.0 (??-Nov-95)	First version supporting all adapters supported by the old
+		setup.S + Cirrus Logic 54XX. Present in some 1.3.4? kernels
+		and then removed due to instability on some machines.
+2.0 (28-Jan-96)	Rewritten from scratch. Cirrus Logic 64XX support added, almost
+		everything is configurable, the VESA support should be much more
+		stable, explicit mode numbering allowed, "scan" implemented etc.
+2.1 (30-Jan-96) VESA modes moved to 0x200-0x3ff. Mode selection by resolution
+		supported. Few bugs fixed. VESA modes are listed prior to
+		modes supplied by SVGA autodetection as they are more reliable.
+		CLGD autodetect works better. Doesn't depend on 80x25 being
+		active when started. Scanning fixed. 80x43 (any VGA) added.
+		Code cleaned up.
+2.2 (01-Feb-96)	EGA 80x43 fixed. VESA extended to 0x200-0x4ff (non-standard 02XX
+		VESA modes work now). Display end bug workaround supported.
+		Special modes renumbered to allow adding of the "recalculate"
+		flag, 0xffff and 0xfffe became aliases instead of real IDs.
+		Screen contents retained during mode changes.
+2.3 (15-Mar-96)	Changed to work with 1.3.74 kernel.
+2.4 (18-Mar-96)	Added patches by Hans Lermen fixing a memory overwrite problem
+		with some boot loaders. Memory management rewritten to reflect
+		these changes. Unfortunately, screen contents retaining works
+		only with some loaders now.
+		Added a Tseng 132x60 mode.
+2.5 (19-Mar-96)	Fixed a VESA mode scanning bug introduced in 2.4.
+2.6 (25-Mar-96)	Some VESA BIOS errors not reported -- it fixes error reports on
+		several cards with broken VESA code (e.g., ATI VGA).
+2.7 (09-Apr-96)	- Accepted all VESA modes in range 0x100 to 0x7ff, because some
+		  cards use very strange mode numbers.
+		- Added Realtek VGA modes (thanks to Gonzalo Tornaria).
+		- Hardware testing order slightly changed, tests based on ROM
+		  contents done as first.
+		- Added support for special Video7 mode switching functions
+		  (thanks to Tom Vander Aa).
+		- Added 480-scanline modes (especially useful for notebooks,
+		  original version written by hhanemaa@cs.ruu.nl, patched by
+		  Jeff Chua, rewritten by me).
+		- Screen store/restore fixed.
+2.8 (14-Apr-96) - Previous release was not compilable without CONFIG_VIDEO_SVGA.
+		- Better recognition of text modes during mode scan.
+2.9 (12-May-96)	- Ignored VESA modes 0x80 - 0xff (more VESA BIOS bugs!)
+2.10(11-Nov-96) - The whole thing made optional.
+		- Added the CONFIG_VIDEO_400_HACK switch.
+		- Added the CONFIG_VIDEO_GFX_HACK switch.
+		- Code cleanup.
+2.11(03-May-97) - Yet another cleanup, now including also the documentation.
+		- Direct testing of SVGA adapters turned off by default, ``scan``
+		  offered explicitly on the prompt line.
+		- Removed the doc section describing adding of new probing
+		  functions as I try to get rid of _all_ hardware probing here.
+2.12(25-May-98) Added support for VESA frame buffer graphics.
+2.13(14-May-99) Minor documentation fixes.
+=============== ================================================================
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index a0c1d4ce403a..032c7cd3cede 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -327,7 +327,7 @@ when a hard lockup is detected.
    0 - don't panic on hard lockup
    1 - panic on hard lockup
 
-See Documentation/lockup-watchdogs.txt for more information.  This can
+See Documentation/admin-guide/lockup-watchdogs.rst for more information.  This can
 also be set using the nmi_watchdog kernel parameter.
 
 
diff --git a/Documentation/admin-guide/video-output.rst b/Documentation/admin-guide/video-output.rst
new file mode 100644
index 000000000000..56d6fa2e2368
--- /dev/null
+++ b/Documentation/admin-guide/video-output.rst
@@ -0,0 +1,34 @@
+Video Output Switcher Control
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+2006 luming.yu@intel.com
+
+The output sysfs class driver provides an abstract video output layer that
+can be used to hook platform specific methods to enable/disable video output
+device through common sysfs interface. For example, on my IBM ThinkPad T42
+laptop, The ACPI video driver registered its output devices and read/write
+method for 'state' with output sysfs class. The user interface under sysfs is::
+
+  linux:/sys/class/video_output # tree .
+  .
+  |-- CRT0
+  |   |-- device -> ../../../devices/pci0000:00/0000:00:01.0
+  |   |-- state
+  |   |-- subsystem -> ../../../class/video_output
+  |   `-- uevent
+  |-- DVI0
+  |   |-- device -> ../../../devices/pci0000:00/0000:00:01.0
+  |   |-- state
+  |   |-- subsystem -> ../../../class/video_output
+  |   `-- uevent
+  |-- LCD0
+  |   |-- device -> ../../../devices/pci0000:00/0000:00:01.0
+  |   |-- state
+  |   |-- subsystem -> ../../../class/video_output
+  |   `-- uevent
+  `-- TV0
+     |-- device -> ../../../devices/pci0000:00/0000:00:01.0
+     |-- state
+     |-- subsystem -> ../../../class/video_output
+     `-- uevent
+
diff --git a/Documentation/auxdisplay/lcd-panel-cgram.rst b/Documentation/auxdisplay/lcd-panel-cgram.rst
deleted file mode 100644
index dfef50286018..000000000000
--- a/Documentation/auxdisplay/lcd-panel-cgram.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-:orphan:
-
-======================================
-Parallel port LCD/Keypad Panel support
-======================================
-
-Some LCDs allow you to define up to 8 characters, mapped to ASCII
-characters 0 to 7. The escape code to define a new character is
-'\e[LG' followed by one digit from 0 to 7, representing the character
-number, and up to 8 couples of hex digits terminated by a semi-colon
-(';'). Each couple of digits represents a line, with 1-bits for each
-illuminated pixel with LSB on the right. Lines are numbered from the
-top of the character to the bottom. On a 5x7 matrix, only the 5 lower
-bits of the 7 first bytes are used for each character. If the string
-is incomplete, only complete lines will be redefined. Here are some
-examples::
-
-  printf "\e[LG0010101050D1F0C04;"  => 0 = [enter]
-  printf "\e[LG1040E1F0000000000;"  => 1 = [up]
-  printf "\e[LG2000000001F0E0400;"  => 2 = [down]
-  printf "\e[LG3040E1F001F0E0400;"  => 3 = [up-down]
-  printf "\e[LG40002060E1E0E0602;"  => 4 = [left]
-  printf "\e[LG500080C0E0F0E0C08;"  => 5 = [right]
-  printf "\e[LG60016051516141400;"  => 6 = "IP"
-
-  printf "\e[LG00103071F1F070301;"  => big speaker
-  printf "\e[LG00002061E1E060200;"  => small speaker
-
-Willy
diff --git a/Documentation/btmrvl.txt b/Documentation/btmrvl.txt
deleted file mode 100644
index ec57740ead0c..000000000000
--- a/Documentation/btmrvl.txt
+++ /dev/null
@@ -1,124 +0,0 @@
-=============
-btmrvl driver
-=============
-
-All commands are used via debugfs interface.
-
-Set/get driver configurations
-=============================
-
-Path:	/debug/btmrvl/config/
-
-gpiogap=[n], hscfgcmd
-	These commands are used to configure the host sleep parameters::
-	bit 8:0  -- Gap
-	bit 16:8 -- GPIO
-
-	where GPIO is the pin number of GPIO used to wake up the host.
-	It could be any valid GPIO pin# (e.g. 0-7) or 0xff (SDIO interface
-	wakeup will be used instead).
-
-	where Gap is the gap in milli seconds between wakeup signal and
-	wakeup event, or 0xff for special host sleep setting.
-
-	Usage::
-
-		# Use SDIO interface to wake up the host and set GAP to 0x80:
-		echo 0xff80 > /debug/btmrvl/config/gpiogap
-		echo 1 > /debug/btmrvl/config/hscfgcmd
-
-		# Use GPIO pin #3 to wake up the host and set GAP to 0xff:
-		echo 0x03ff >  /debug/btmrvl/config/gpiogap
-		echo 1 > /debug/btmrvl/config/hscfgcmd
-
-psmode=[n], pscmd
-	These commands are used to enable/disable auto sleep mode
-
-	where the option is::
-
-			1 	-- Enable auto sleep mode
-			0 	-- Disable auto sleep mode
-
-	Usage::
-
-		# Enable auto sleep mode
-		echo 1 > /debug/btmrvl/config/psmode
-		echo 1 > /debug/btmrvl/config/pscmd
-
-		# Disable auto sleep mode
-		echo 0 > /debug/btmrvl/config/psmode
-		echo 1 > /debug/btmrvl/config/pscmd
-
-
-hsmode=[n], hscmd
-	These commands are used to enable host sleep or wake up firmware
-
-	where the option is::
-
-			1	-- Enable host sleep
-			0	-- Wake up firmware
-
-	Usage::
-
-		# Enable host sleep
-		echo 1 > /debug/btmrvl/config/hsmode
-		echo 1 > /debug/btmrvl/config/hscmd
-
-		# Wake up firmware
-		echo 0 > /debug/btmrvl/config/hsmode
-		echo 1 > /debug/btmrvl/config/hscmd
-
-
-Get driver status
-=================
-
-Path:	/debug/btmrvl/status/
-
-Usage::
-
-	cat /debug/btmrvl/status/<args>
-
-where the args are:
-
-curpsmode
-	This command displays current auto sleep status.
-
-psstate
-	This command display the power save state.
-
-hsstate
-	This command display the host sleep state.
-
-txdnldrdy
-	This command displays the value of Tx download ready flag.
-
-Issuing a raw hci command
-=========================
-
-Use hcitool to issue raw hci command, refer to hcitool manual
-
-Usage::
-
-	Hcitool cmd <ogf> <ocf> [Parameters]
-
-Interface Control Command::
-
-	hcitool cmd 0x3f 0x5b 0xf5 0x01 0x00    --Enable All interface
-	hcitool cmd 0x3f 0x5b 0xf5 0x01 0x01    --Enable Wlan interface
-	hcitool cmd 0x3f 0x5b 0xf5 0x01 0x02    --Enable BT interface
-	hcitool cmd 0x3f 0x5b 0xf5 0x00 0x00    --Disable All interface
-	hcitool cmd 0x3f 0x5b 0xf5 0x00 0x01    --Disable Wlan interface
-	hcitool cmd 0x3f 0x5b 0xf5 0x00 0x02    --Disable BT interface
-
-SD8688 firmware
-===============
-
-Images:
-
-- /lib/firmware/sd8688_helper.bin
-- /lib/firmware/sd8688.bin
-
-
-The images can be downloaded from:
-
-git.infradead.org/users/dwmw2/linux-firmware.git/libertas/
diff --git a/Documentation/clearing-warn-once.txt b/Documentation/clearing-warn-once.txt
deleted file mode 100644
index 211fd926cf00..000000000000
--- a/Documentation/clearing-warn-once.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-Clearing WARN_ONCE
-------------------
-
-WARN_ONCE / WARN_ON_ONCE / printk_once only emit a message once.
-
-echo 1 > /sys/kernel/debug/clear_warn_once
-
-clears the state and allows the warnings to print once again.
-This can be useful after test suite runs to reproduce problems.
diff --git a/Documentation/cma/debugfs.rst b/Documentation/cma/debugfs.rst
deleted file mode 100644
index 518fe401b5ee..000000000000
--- a/Documentation/cma/debugfs.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-:orphan:
-
-=====================
-CMA Debugfs Interface
-=====================
-
-The CMA debugfs interface is useful to retrieve basic information out of the
-different CMA areas and to test allocation/release in each of the areas.
-
-Each CMA zone represents a directory under <debugfs>/cma/, indexed by the
-kernel's CMA index. So the first CMA zone would be:
-
-	<debugfs>/cma/cma-0
-
-The structure of the files created under that directory is as follows:
-
- - [RO] base_pfn: The base PFN (Page Frame Number) of the zone.
- - [RO] count: Amount of memory in the CMA area.
- - [RO] order_per_bit: Order of pages represented by one bit.
- - [RO] bitmap: The bitmap of page states in the zone.
- - [WO] alloc: Allocate N pages from that CMA area. For example::
-
-	echo 5 > <debugfs>/cma/cma-2/alloc
-
-would try to allocate 5 pages from the cma-2 area.
-
- - [WO] free: Free N pages from that CMA area, similar to the above.
diff --git a/Documentation/cpu-load.txt b/Documentation/cpu-load.txt
deleted file mode 100644
index 2d01ce43d2a2..000000000000
--- a/Documentation/cpu-load.txt
+++ /dev/null
@@ -1,114 +0,0 @@
-========
-CPU load
-========
-
-Linux exports various bits of information via ``/proc/stat`` and
-``/proc/uptime`` that userland tools, such as top(1), use to calculate
-the average time system spent in a particular state, for example::
-
-    $ iostat
-    Linux 2.6.18.3-exp (linmac)     02/20/2007
-
-    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
-              10.01    0.00    2.92    5.44    0.00   81.63
-
-    ...
-
-Here the system thinks that over the default sampling period the
-system spent 10.01% of the time doing work in user space, 2.92% in the
-kernel, and was overall 81.63% of the time idle.
-
-In most cases the ``/proc/stat``	 information reflects the reality quite
-closely, however due to the nature of how/when the kernel collects
-this data sometimes it can not be trusted at all.
-
-So how is this information collected?  Whenever timer interrupt is
-signalled the kernel looks what kind of task was running at this
-moment and increments the counter that corresponds to this tasks
-kind/state.  The problem with this is that the system could have
-switched between various states multiple times between two timer
-interrupts yet the counter is incremented only for the last state.
-
-
-Example
--------
-
-If we imagine the system with one task that periodically burns cycles
-in the following manner::
-
-     time line between two timer interrupts
-    |--------------------------------------|
-     ^                                    ^
-     |_ something begins working          |
-                                          |_ something goes to sleep
-                                         (only to be awaken quite soon)
-
-In the above situation the system will be 0% loaded according to the
-``/proc/stat`` (since the timer interrupt will always happen when the
-system is executing the idle handler), but in reality the load is
-closer to 99%.
-
-One can imagine many more situations where this behavior of the kernel
-will lead to quite erratic information inside ``/proc/stat``::
-
-
-	/* gcc -o hog smallhog.c */
-	#include <time.h>
-	#include <limits.h>
-	#include <signal.h>
-	#include <sys/time.h>
-	#define HIST 10
-
-	static volatile sig_atomic_t stop;
-
-	static void sighandler (int signr)
-	{
-	(void) signr;
-	stop = 1;
-	}
-	static unsigned long hog (unsigned long niters)
-	{
-	stop = 0;
-	while (!stop && --niters);
-	return niters;
-	}
-	int main (void)
-	{
-	int i;
-	struct itimerval it = { .it_interval = { .tv_sec = 0, .tv_usec = 1 },
-				.it_value = { .tv_sec = 0, .tv_usec = 1 } };
-	sigset_t set;
-	unsigned long v[HIST];
-	double tmp = 0.0;
-	unsigned long n;
-	signal (SIGALRM, &sighandler);
-	setitimer (ITIMER_REAL, &it, NULL);
-
-	hog (ULONG_MAX);
-	for (i = 0; i < HIST; ++i) v[i] = ULONG_MAX - hog (ULONG_MAX);
-	for (i = 0; i < HIST; ++i) tmp += v[i];
-	tmp /= HIST;
-	n = tmp - (tmp / 3.0);
-
-	sigemptyset (&set);
-	sigaddset (&set, SIGALRM);
-
-	for (;;) {
-		hog (n);
-		sigwait (&set, &i);
-	}
-	return 0;
-	}
-
-
-References
-----------
-
-- http://lkml.org/lkml/2007/2/12/6
-- Documentation/filesystems/proc.txt (1.8)
-
-
-Thanks
-------
-
-Con Kolivas, Pavel Machek
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
deleted file mode 100644
index b90dafcc8237..000000000000
--- a/Documentation/cputopology.txt
+++ /dev/null
@@ -1,177 +0,0 @@
-===========================================
-How CPU topology info is exported via sysfs
-===========================================
-
-Export CPU topology info via sysfs. Items (attributes) are similar
-to /proc/cpuinfo output of some architectures.  They reside in
-/sys/devices/system/cpu/cpuX/topology/:
-
-physical_package_id:
-
-	physical package id of cpuX. Typically corresponds to a physical
-	socket number, but the actual value is architecture and platform
-	dependent.
-
-die_id:
-
-	the CPU die ID of cpuX. Typically it is the hardware platform's
-	identifier (rather than the kernel's).  The actual value is
-	architecture and platform dependent.
-
-core_id:
-
-	the CPU core ID of cpuX. Typically it is the hardware platform's
-	identifier (rather than the kernel's).  The actual value is
-	architecture and platform dependent.
-
-book_id:
-
-	the book ID of cpuX. Typically it is the hardware platform's
-	identifier (rather than the kernel's).	The actual value is
-	architecture and platform dependent.
-
-drawer_id:
-
-	the drawer ID of cpuX. Typically it is the hardware platform's
-	identifier (rather than the kernel's).	The actual value is
-	architecture and platform dependent.
-
-core_cpus:
-
-	internal kernel map of CPUs within the same core.
-	(deprecated name: "thread_siblings")
-
-core_cpus_list:
-
-	human-readable list of CPUs within the same core.
-	(deprecated name: "thread_siblings_list");
-
-package_cpus:
-
-	internal kernel map of the CPUs sharing the same physical_package_id.
-	(deprecated name: "core_siblings")
-
-package_cpus_list:
-
-	human-readable list of CPUs sharing the same physical_package_id.
-	(deprecated name: "core_siblings_list")
-
-die_cpus:
-
-	internal kernel map of CPUs within the same die.
-
-die_cpus_list:
-
-	human-readable list of CPUs within the same die.
-
-book_siblings:
-
-	internal kernel map of cpuX's hardware threads within the same
-	book_id.
-
-book_siblings_list:
-
-	human-readable list of cpuX's hardware threads within the same
-	book_id.
-
-drawer_siblings:
-
-	internal kernel map of cpuX's hardware threads within the same
-	drawer_id.
-
-drawer_siblings_list:
-
-	human-readable list of cpuX's hardware threads within the same
-	drawer_id.
-
-Architecture-neutral, drivers/base/topology.c, exports these attributes.
-However, the book and drawer related sysfs files will only be created if
-CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are selected, respectively.
-
-CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are currently only used on s390,
-where they reflect the cpu and cache hierarchy.
-
-For an architecture to support this feature, it must define some of
-these macros in include/asm-XXX/topology.h::
-
-	#define topology_physical_package_id(cpu)
-	#define topology_die_id(cpu)
-	#define topology_core_id(cpu)
-	#define topology_book_id(cpu)
-	#define topology_drawer_id(cpu)
-	#define topology_sibling_cpumask(cpu)
-	#define topology_core_cpumask(cpu)
-	#define topology_die_cpumask(cpu)
-	#define topology_book_cpumask(cpu)
-	#define topology_drawer_cpumask(cpu)
-
-The type of ``**_id macros`` is int.
-The type of ``**_cpumask macros`` is ``(const) struct cpumask *``. The latter
-correspond with appropriate ``**_siblings`` sysfs attributes (except for
-topology_sibling_cpumask() which corresponds with thread_siblings).
-
-To be consistent on all architectures, include/linux/topology.h
-provides default definitions for any of the above macros that are
-not defined by include/asm-XXX/topology.h:
-
-1) topology_physical_package_id: -1
-2) topology_die_id: -1
-3) topology_core_id: 0
-4) topology_sibling_cpumask: just the given CPU
-5) topology_core_cpumask: just the given CPU
-6) topology_die_cpumask: just the given CPU
-
-For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
-default definitions for topology_book_id() and topology_book_cpumask().
-For architectures that don't support drawers (CONFIG_SCHED_DRAWER) there are
-no default definitions for topology_drawer_id() and topology_drawer_cpumask().
-
-Additionally, CPU topology information is provided under
-/sys/devices/system/cpu and includes these files.  The internal
-source for the output is in brackets ("[]").
-
-    =========== ==========================================================
-    kernel_max: the maximum CPU index allowed by the kernel configuration.
-		[NR_CPUS-1]
-
-    offline:	CPUs that are not online because they have been
-		HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit
-		of CPUs allowed by the kernel configuration (kernel_max
-		above). [~cpu_online_mask + cpus >= NR_CPUS]
-
-    online:	CPUs that are online and being scheduled [cpu_online_mask]
-
-    possible:	CPUs that have been allocated resources and can be
-		brought online if they are present. [cpu_possible_mask]
-
-    present:	CPUs that have been identified as being present in the
-		system. [cpu_present_mask]
-    =========== ==========================================================
-
-The format for the above output is compatible with cpulist_parse()
-[see <linux/cpumask.h>].  Some examples follow.
-
-In this example, there are 64 CPUs in the system but cpus 32-63 exceed
-the kernel max which is limited to 0..31 by the NR_CPUS config option
-being 32.  Note also that CPUs 2 and 4-31 are not online but could be
-brought online as they are both present and possible::
-
-     kernel_max: 31
-        offline: 2,4-31,32-63
-         online: 0-1,3
-       possible: 0-31
-        present: 0-31
-
-In this example, the NR_CPUS config option is 128, but the kernel was
-started with possible_cpus=144.  There are 4 CPUs in the system and cpu2
-was manually taken offline (and is the only CPU that can be brought
-online.)::
-
-     kernel_max: 127
-        offline: 2,4-127,128-143
-         online: 0-1,3
-       possible: 0-127
-        present: 0-3
-
-See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter
-as well as more information on the various cpumasks.
diff --git a/Documentation/efi-stub.txt b/Documentation/efi-stub.txt
deleted file mode 100644
index 833edb0d0bc4..000000000000
--- a/Documentation/efi-stub.txt
+++ /dev/null
@@ -1,100 +0,0 @@
-=================
-The EFI Boot Stub
-=================
-
-On the x86 and ARM platforms, a kernel zImage/bzImage can masquerade
-as a PE/COFF image, thereby convincing EFI firmware loaders to load
-it as an EFI executable. The code that modifies the bzImage header,
-along with the EFI-specific entry point that the firmware loader
-jumps to are collectively known as the "EFI boot stub", and live in
-arch/x86/boot/header.S and arch/x86/boot/compressed/eboot.c,
-respectively. For ARM the EFI stub is implemented in
-arch/arm/boot/compressed/efi-header.S and
-arch/arm/boot/compressed/efi-stub.c. EFI stub code that is shared
-between architectures is in drivers/firmware/efi/libstub.
-
-For arm64, there is no compressed kernel support, so the Image itself
-masquerades as a PE/COFF image and the EFI stub is linked into the
-kernel. The arm64 EFI stub lives in arch/arm64/kernel/efi-entry.S
-and drivers/firmware/efi/libstub/arm64-stub.c.
-
-By using the EFI boot stub it's possible to boot a Linux kernel
-without the use of a conventional EFI boot loader, such as grub or
-elilo. Since the EFI boot stub performs the jobs of a boot loader, in
-a certain sense it *IS* the boot loader.
-
-The EFI boot stub is enabled with the CONFIG_EFI_STUB kernel option.
-
-
-How to install bzImage.efi
---------------------------
-
-The bzImage located in arch/x86/boot/bzImage must be copied to the EFI
-System Partition (ESP) and renamed with the extension ".efi". Without
-the extension the EFI firmware loader will refuse to execute it. It's
-not possible to execute bzImage.efi from the usual Linux file systems
-because EFI firmware doesn't have support for them. For ARM the
-arch/arm/boot/zImage should be copied to the system partition, and it
-may not need to be renamed. Similarly for arm64, arch/arm64/boot/Image
-should be copied but not necessarily renamed.
-
-
-Passing kernel parameters from the EFI shell
---------------------------------------------
-
-Arguments to the kernel can be passed after bzImage.efi, e.g.::
-
-	fs0:> bzImage.efi console=ttyS0 root=/dev/sda4
-
-
-The "initrd=" option
---------------------
-
-Like most boot loaders, the EFI stub allows the user to specify
-multiple initrd files using the "initrd=" option. This is the only EFI
-stub-specific command line parameter, everything else is passed to the
-kernel when it boots.
-
-The path to the initrd file must be an absolute path from the
-beginning of the ESP, relative path names do not work. Also, the path
-is an EFI-style path and directory elements must be separated with
-backslashes (\). For example, given the following directory layout::
-
-  fs0:>
-	Kernels\
-			bzImage.efi
-			initrd-large.img
-
-	Ramdisks\
-			initrd-small.img
-			initrd-medium.img
-
-to boot with the initrd-large.img file if the current working
-directory is fs0:\Kernels, the following command must be used::
-
-	fs0:\Kernels> bzImage.efi initrd=\Kernels\initrd-large.img
-
-Notice how bzImage.efi can be specified with a relative path. That's
-because the image we're executing is interpreted by the EFI shell,
-which understands relative paths, whereas the rest of the command line
-is passed to bzImage.efi.
-
-
-The "dtb=" option
------------------
-
-For the ARM and arm64 architectures, a device tree must be provided to
-the kernel. Normally firmware shall supply the device tree via the
-EFI CONFIGURATION TABLE. However, the "dtb=" command line option can
-be used to override the firmware supplied device tree, or to supply
-one when firmware is unable to.
-
-Please note: Firmware adds runtime configuration information to the
-device tree before booting the kernel. If dtb= is used to override
-the device tree, then any runtime data provided by firmware will be
-lost. The dtb= option should only be used either as a debug tool, or
-as a last resort when a device tree is not provided in the EFI
-CONFIGURATION TABLE.
-
-"dtb=" is processed in the same manner as the "initrd=" option that is
-described above.
diff --git a/Documentation/fb/vesafb.rst b/Documentation/fb/vesafb.rst
index 2ed0dfb661cf..6821c87b7893 100644
--- a/Documentation/fb/vesafb.rst
+++ b/Documentation/fb/vesafb.rst
@@ -30,7 +30,7 @@ How to use it?
 ==============
 
 Switching modes is done using the vga=... boot parameter.  Read
-Documentation/svga.txt for details.
+Documentation/admin-guide/svga.rst for details.
 
 You should compile in both vgacon (for text mode) and vesafb (for
 graphics mode). Which of them takes over the console depends on
diff --git a/Documentation/highuid.txt b/Documentation/highuid.txt
deleted file mode 100644
index 6ee70465c0ea..000000000000
--- a/Documentation/highuid.txt
+++ /dev/null
@@ -1,80 +0,0 @@
-===================================================
-Notes on the change from 16-bit UIDs to 32-bit UIDs
-===================================================
-
-:Author: Chris Wing <wingc@umich.edu>
-:Last updated: January 11, 2000
-
-- kernel code MUST take into account __kernel_uid_t and __kernel_uid32_t
-  when communicating between user and kernel space in an ioctl or data
-  structure.
-
-- kernel code should use uid_t and gid_t in kernel-private structures and
-  code.
-
-What's left to be done for 32-bit UIDs on all Linux architectures:
-
-- Disk quotas have an interesting limitation that is not related to the
-  maximum UID/GID. They are limited by the maximum file size on the
-  underlying filesystem, because quota records are written at offsets
-  corresponding to the UID in question.
-  Further investigation is needed to see if the quota system can cope
-  properly with huge UIDs. If it can deal with 64-bit file offsets on all 
-  architectures, this should not be a problem.
-
-- Decide whether or not to keep backwards compatibility with the system
-  accounting file, or if we should break it as the comments suggest
-  (currently, the old 16-bit UID and GID are still written to disk, and
-  part of the former pad space is used to store separate 32-bit UID and
-  GID)
-
-- Need to validate that OS emulation calls the 16-bit UID
-  compatibility syscalls, if the OS being emulated used 16-bit UIDs, or
-  uses the 32-bit UID system calls properly otherwise.
-
-  This affects at least:
-
-	- iBCS on Intel
-
-	- sparc32 emulation on sparc64
-	  (need to support whatever new 32-bit UID system calls are added to
-	  sparc32)
-
-- Validate that all filesystems behave properly.
-
-  At present, 32-bit UIDs _should_ work for:
-
-	- ext2
-	- ufs
-	- isofs
-	- nfs
-	- coda
-	- udf
-
-  Ioctl() fixups have been made for:
-
-	- ncpfs
-	- smbfs
-
-  Filesystems with simple fixups to prevent 16-bit UID wraparound:
-
-	- minix
-	- sysv
-	- qnx4
-
-  Other filesystems have not been checked yet.
-
-- The ncpfs and smpfs filesystems cannot presently use 32-bit UIDs in
-  all ioctl()s. Some new ioctl()s have been added with 32-bit UIDs, but
-  more are needed. (as well as new user<->kernel data structures)
-
-- The ELF core dump format only supports 16-bit UIDs on arm, i386, m68k,
-  sh, and sparc32. Fixing this is probably not that important, but would
-  require adding a new ELF section.
-
-- The ioctl()s used to control the in-kernel NFS server only support
-  16-bit UIDs on arm, i386, m68k, sh, and sparc32.
-
-- make sure that the UID mapping feature of AX25 networking works properly
-  (it should be safe because it's always used a 32-bit integer to
-  communicate between user and kernel)
diff --git a/Documentation/hw_random.txt b/Documentation/hw_random.txt
deleted file mode 100644
index 121de96e395e..000000000000
--- a/Documentation/hw_random.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-==========================================================
-Linux support for random number generator in i8xx chipsets
-==========================================================
-
-Introduction
-============
-
-The hw_random framework is software that makes use of a
-special hardware feature on your CPU or motherboard,
-a Random Number Generator (RNG).  The software has two parts:
-a core providing the /dev/hwrng character device and its
-sysfs support, plus a hardware-specific driver that plugs
-into that core.
-
-To make the most effective use of these mechanisms, you
-should download the support software as well.  Download the
-latest version of the "rng-tools" package from the
-hw_random driver's official Web site:
-
-	http://sourceforge.net/projects/gkernel/
-
-Those tools use /dev/hwrng to fill the kernel entropy pool,
-which is used internally and exported by the /dev/urandom and
-/dev/random special files.
-
-Theory of operation
-===================
-
-CHARACTER DEVICE.  Using the standard open()
-and read() system calls, you can read random data from
-the hardware RNG device.  This data is NOT CHECKED by any
-fitness tests, and could potentially be bogus (if the
-hardware is faulty or has been tampered with).  Data is only
-output if the hardware "has-data" flag is set, but nevertheless
-a security-conscious person would run fitness tests on the
-data before assuming it is truly random.
-
-The rng-tools package uses such tests in "rngd", and lets you
-run them by hand with a "rngtest" utility.
-
-/dev/hwrng is char device major 10, minor 183.
-
-CLASS DEVICE.  There is a /sys/class/misc/hw_random node with
-two unique attributes, "rng_available" and "rng_current".  The
-"rng_available" attribute lists the hardware-specific drivers
-available, while "rng_current" lists the one which is currently
-connected to /dev/hwrng.  If your system has more than one
-RNG available, you may change the one used by writing a name from
-the list in "rng_available" into "rng_current".
-
-==========================================================================
-
-
-Hardware driver for Intel/AMD/VIA Random Number Generators (RNG)
-	- Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com>
-	- Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com>
-
-
-About the Intel RNG hardware, from the firmware hub datasheet
-=============================================================
-
-The Firmware Hub integrates a Random Number Generator (RNG)
-using thermal noise generated from inherently random quantum
-mechanical properties of silicon. When not generating new random
-bits the RNG circuitry will enter a low power state. Intel will
-provide a binary software driver to give third party software
-access to our RNG for use as a security feature. At this time,
-the RNG is only to be used with a system in an OS-present state.
-
-Intel RNG Driver notes
-======================
-
-FIXME: support poll(2)
-
-.. note::
-
-	request_mem_region was removed, for three reasons:
-
-	1) Only one RNG is supported by this driver;
-	2) The location used by the RNG is a fixed location in
-	   MMIO-addressable memory;
-	3) users with properly working BIOS e820 handling will always
-	   have the region in which the RNG is located reserved, so
-	   request_mem_region calls always fail for proper setups.
-	   However, for people who use mem=XX, BIOS e820 information is
-	   **not** in /proc/iomem, and request_mem_region(RNG_ADDR) can
-	   succeed.
-
-Driver details
-==============
-
-Based on:
-	Intel 82802AB/82802AC Firmware Hub (FWH) Datasheet
-	May 1999 Order Number: 290658-002 R
-
-Intel 82802 Firmware Hub:
-	Random Number Generator
-	Programmer's Reference Manual
-	December 1999 Order Number: 298029-001 R
-
-Intel 82802 Firmware HUB Random Number Generator Driver
-	Copyright (c) 2000 Matt Sottek <msottek@quiknet.com>
-
-Special thanks to Matt Sottek.  I did the "guts", he
-did the "brains" and all the testing.
diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt
deleted file mode 100644
index 5d63b18bd6d1..000000000000
--- a/Documentation/iostats.txt
+++ /dev/null
@@ -1,197 +0,0 @@
-=====================
-I/O statistics fields
-=====================
-
-Since 2.4.20 (and some versions before, with patches), and 2.5.45,
-more extensive disk statistics have been introduced to help measure disk
-activity. Tools such as ``sar`` and ``iostat`` typically interpret these and do
-the work for you, but in case you are interested in creating your own
-tools, the fields are explained here.
-
-In 2.4 now, the information is found as additional fields in
-``/proc/partitions``.  In 2.6 and upper, the same information is found in two
-places: one is in the file ``/proc/diskstats``, and the other is within
-the sysfs file system, which must be mounted in order to obtain
-the information. Throughout this document we'll assume that sysfs
-is mounted on ``/sys``, although of course it may be mounted anywhere.
-Both ``/proc/diskstats`` and sysfs use the same source for the information
-and so should not differ.
-
-Here are examples of these different formats::
-
-   2.4:
-      3     0   39082680 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
-      3     1    9221278 hda1 35486 0 35496 38030 0 0 0 0 0 38030 38030
-
-   2.6+ sysfs:
-      446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
-      35486    38030    38030    38030
-
-   2.6+ diskstats:
-      3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
-      3    1   hda1 35486 38030 38030 38030
-
-   4.18+ diskstats:
-      3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160 0 0 0 0
-
-On 2.4 you might execute ``grep 'hda ' /proc/partitions``. On 2.6+, you have
-a choice of ``cat /sys/block/hda/stat`` or ``grep 'hda ' /proc/diskstats``.
-
-The advantage of one over the other is that the sysfs choice works well
-if you are watching a known, small set of disks.  ``/proc/diskstats`` may
-be a better choice if you are watching a large number of disks because
-you'll avoid the overhead of 50, 100, or 500 or more opens/closes with
-each snapshot of your disk statistics.
-
-In 2.4, the statistics fields are those after the device name. In
-the above example, the first field of statistics would be 446216.
-By contrast, in 2.6+ if you look at ``/sys/block/hda/stat``, you'll
-find just the eleven fields, beginning with 446216.  If you look at
-``/proc/diskstats``, the eleven fields will be preceded by the major and
-minor device numbers, and device name.  Each of these formats provides
-eleven fields of statistics, each meaning exactly the same things.
-All fields except field 9 are cumulative since boot.  Field 9 should
-go to zero as I/Os complete; all others only increase (unless they
-overflow and wrap).  Yes, these are (32-bit or 64-bit) unsigned long
-(native word size) numbers, and on a very busy or long-lived system they
-may wrap. Applications should be prepared to deal with that; unless
-your observations are measured in large numbers of minutes or hours,
-they should not wrap twice before you notice them.
-
-Each set of stats only applies to the indicated device; if you want
-system-wide stats you'll have to find all the devices and sum them all up.
-
-Field  1 -- # of reads completed
-    This is the total number of reads completed successfully.
-
-Field  2 -- # of reads merged, field 6 -- # of writes merged
-    Reads and writes which are adjacent to each other may be merged for
-    efficiency.  Thus two 4K reads may become one 8K read before it is
-    ultimately handed to the disk, and so it will be counted (and queued)
-    as only one I/O.  This field lets you know how often this was done.
-
-Field  3 -- # of sectors read
-    This is the total number of sectors read successfully.
-
-Field  4 -- # of milliseconds spent reading
-    This is the total number of milliseconds spent by all reads (as
-    measured from __make_request() to end_that_request_last()).
-
-Field  5 -- # of writes completed
-    This is the total number of writes completed successfully.
-
-Field  6 -- # of writes merged
-    See the description of field 2.
-
-Field  7 -- # of sectors written
-    This is the total number of sectors written successfully.
-
-Field  8 -- # of milliseconds spent writing
-    This is the total number of milliseconds spent by all writes (as
-    measured from __make_request() to end_that_request_last()).
-
-Field  9 -- # of I/Os currently in progress
-    The only field that should go to zero. Incremented as requests are
-    given to appropriate struct request_queue and decremented as they finish.
-
-Field 10 -- # of milliseconds spent doing I/Os
-    This field increases so long as field 9 is nonzero.
-
-    Since 5.0 this field counts jiffies when at least one request was
-    started or completed. If request runs more than 2 jiffies then some
-    I/O time will not be accounted unless there are other requests.
-
-Field 11 -- weighted # of milliseconds spent doing I/Os
-    This field is incremented at each I/O start, I/O completion, I/O
-    merge, or read of these stats by the number of I/Os in progress
-    (field 9) times the number of milliseconds spent doing I/O since the
-    last update of this field.  This can provide an easy measure of both
-    I/O completion time and the backlog that may be accumulating.
-
-Field 12 -- # of discards completed
-    This is the total number of discards completed successfully.
-
-Field 13 -- # of discards merged
-    See the description of field 2
-
-Field 14 -- # of sectors discarded
-    This is the total number of sectors discarded successfully.
-
-Field 15 -- # of milliseconds spent discarding
-    This is the total number of milliseconds spent by all discards (as
-    measured from __make_request() to end_that_request_last()).
-
-To avoid introducing performance bottlenecks, no locks are held while
-modifying these counters.  This implies that minor inaccuracies may be
-introduced when changes collide, so (for instance) adding up all the
-read I/Os issued per partition should equal those made to the disks ...
-but due to the lack of locking it may only be very close.
-
-In 2.6+, there are counters for each CPU, which make the lack of locking
-almost a non-issue.  When the statistics are read, the per-CPU counters
-are summed (possibly overflowing the unsigned long variable they are
-summed to) and the result given to the user.  There is no convenient
-user interface for accessing the per-CPU counters themselves.
-
-Disks vs Partitions
--------------------
-
-There were significant changes between 2.4 and 2.6+ in the I/O subsystem.
-As a result, some statistic information disappeared. The translation from
-a disk address relative to a partition to the disk address relative to
-the host disk happens much earlier.  All merges and timings now happen
-at the disk level rather than at both the disk and partition level as
-in 2.4.  Consequently, you'll see a different statistics output on 2.6+ for
-partitions from that for disks.  There are only *four* fields available
-for partitions on 2.6+ machines.  This is reflected in the examples above.
-
-Field  1 -- # of reads issued
-    This is the total number of reads issued to this partition.
-
-Field  2 -- # of sectors read
-    This is the total number of sectors requested to be read from this
-    partition.
-
-Field  3 -- # of writes issued
-    This is the total number of writes issued to this partition.
-
-Field  4 -- # of sectors written
-    This is the total number of sectors requested to be written to
-    this partition.
-
-Note that since the address is translated to a disk-relative one, and no
-record of the partition-relative address is kept, the subsequent success
-or failure of the read cannot be attributed to the partition.  In other
-words, the number of reads for partitions is counted slightly before time
-of queuing for partitions, and at completion for whole disks.  This is
-a subtle distinction that is probably uninteresting for most cases.
-
-More significant is the error induced by counting the numbers of
-reads/writes before merges for partitions and after for disks. Since a
-typical workload usually contains a lot of successive and adjacent requests,
-the number of reads/writes issued can be several times higher than the
-number of reads/writes completed.
-
-In 2.6.25, the full statistic set is again available for partitions and
-disk and partition statistics are consistent again. Since we still don't
-keep record of the partition-relative address, an operation is attributed to
-the partition which contains the first sector of the request after the
-eventual merges. As requests can be merged across partition, this could lead
-to some (probably insignificant) inaccuracy.
-
-Additional notes
-----------------
-
-In 2.6+, sysfs is not mounted by default.  If your distribution of
-Linux hasn't added it already, here's the line you'll want to add to
-your ``/etc/fstab``::
-
-	none /sys sysfs defaults 0 0
-
-
-In 2.6+, all disk statistics were removed from ``/proc/stat``.  In 2.4, they
-appear in both ``/proc/partitions`` and ``/proc/stat``, although the ones in
-``/proc/stat`` take a very different format from those in ``/proc/partitions``
-(see proc(5), if your system has it.)
-
--- ricklind@us.ibm.com
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
deleted file mode 100644
index 4f18456dd3b1..000000000000
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ /dev/null
@@ -1,356 +0,0 @@
-==========================================
-Reducing OS jitter due to per-cpu kthreads
-==========================================
-
-This document lists per-CPU kthreads in the Linux kernel and presents
-options to control their OS jitter.  Note that non-per-CPU kthreads are
-not listed here.  To reduce OS jitter from non-per-CPU kthreads, bind
-them to a "housekeeping" CPU dedicated to such work.
-
-References
-==========
-
--	Documentation/IRQ-affinity.txt:  Binding interrupts to sets of CPUs.
-
--	Documentation/admin-guide/cgroup-v1:  Using cgroups to bind tasks to sets of CPUs.
-
--	man taskset:  Using the taskset command to bind tasks to sets
-	of CPUs.
-
--	man sched_setaffinity:  Using the sched_setaffinity() system
-	call to bind tasks to sets of CPUs.
-
--	/sys/devices/system/cpu/cpuN/online:  Control CPU N's hotplug state,
-	writing "0" to offline and "1" to online.
-
--	In order to locate kernel-generated OS jitter on CPU N:
-
-		cd /sys/kernel/debug/tracing
-		echo 1 > max_graph_depth # Increase the "1" for more detail
-		echo function_graph > current_tracer
-		# run workload
-		cat per_cpu/cpuN/trace
-
-kthreads
-========
-
-Name:
-  ehca_comp/%u
-
-Purpose:
-  Periodically process Infiniband-related work.
-
-To reduce its OS jitter, do any of the following:
-
-1.	Don't use eHCA Infiniband hardware, instead choosing hardware
-	that does not require per-CPU kthreads.  This will prevent these
-	kthreads from being created in the first place.  (This will
-	work for most people, as this hardware, though important, is
-	relatively old and is produced in relatively low unit volumes.)
-2.	Do all eHCA-Infiniband-related work on other CPUs, including
-	interrupts.
-3.	Rework the eHCA driver so that its per-CPU kthreads are
-	provisioned only on selected CPUs.
-
-
-Name:
-  irq/%d-%s
-
-Purpose:
-  Handle threaded interrupts.
-
-To reduce its OS jitter, do the following:
-
-1.	Use irq affinity to force the irq threads to execute on
-	some other CPU.
-
-Name:
-  kcmtpd_ctr_%d
-
-Purpose:
-  Handle Bluetooth work.
-
-To reduce its OS jitter, do one of the following:
-
-1.	Don't use Bluetooth, in which case these kthreads won't be
-	created in the first place.
-2.	Use irq affinity to force Bluetooth-related interrupts to
-	occur on some other CPU and furthermore initiate all
-	Bluetooth activity on some other CPU.
-
-Name:
-  ksoftirqd/%u
-
-Purpose:
-  Execute softirq handlers when threaded or when under heavy load.
-
-To reduce its OS jitter, each softirq vector must be handled
-separately as follows:
-
-TIMER_SOFTIRQ
--------------
-
-Do all of the following:
-
-1.	To the extent possible, keep the CPU out of the kernel when it
-	is non-idle, for example, by avoiding system calls and by forcing
-	both kernel threads and interrupts to execute elsewhere.
-2.	Build with CONFIG_HOTPLUG_CPU=y.  After boot completes, force
-	the CPU offline, then bring it back online.  This forces
-	recurring timers to migrate elsewhere.	If you are concerned
-	with multiple CPUs, force them all offline before bringing the
-	first one back online.  Once you have onlined the CPUs in question,
-	do not offline any other CPUs, because doing so could force the
-	timer back onto one of the CPUs in question.
-
-NET_TX_SOFTIRQ and NET_RX_SOFTIRQ
----------------------------------
-
-Do all of the following:
-
-1.	Force networking interrupts onto other CPUs.
-2.	Initiate any network I/O on other CPUs.
-3.	Once your application has started, prevent CPU-hotplug operations
-	from being initiated from tasks that might run on the CPU to
-	be de-jittered.  (It is OK to force this CPU offline and then
-	bring it back online before you start your application.)
-
-BLOCK_SOFTIRQ
--------------
-
-Do all of the following:
-
-1.	Force block-device interrupts onto some other CPU.
-2.	Initiate any block I/O on other CPUs.
-3.	Once your application has started, prevent CPU-hotplug operations
-	from being initiated from tasks that might run on the CPU to
-	be de-jittered.  (It is OK to force this CPU offline and then
-	bring it back online before you start your application.)
-
-IRQ_POLL_SOFTIRQ
-----------------
-
-Do all of the following:
-
-1.	Force block-device interrupts onto some other CPU.
-2.	Initiate any block I/O and block-I/O polling on other CPUs.
-3.	Once your application has started, prevent CPU-hotplug operations
-	from being initiated from tasks that might run on the CPU to
-	be de-jittered.  (It is OK to force this CPU offline and then
-	bring it back online before you start your application.)
-
-TASKLET_SOFTIRQ
----------------
-
-Do one or more of the following:
-
-1.	Avoid use of drivers that use tasklets.  (Such drivers will contain
-	calls to things like tasklet_schedule().)
-2.	Convert all drivers that you must use from tasklets to workqueues.
-3.	Force interrupts for drivers using tasklets onto other CPUs,
-	and also do I/O involving these drivers on other CPUs.
-
-SCHED_SOFTIRQ
--------------
-
-Do all of the following:
-
-1.	Avoid sending scheduler IPIs to the CPU to be de-jittered,
-	for example, ensure that at most one runnable kthread is present
-	on that CPU.  If a thread that expects to run on the de-jittered
-	CPU awakens, the scheduler will send an IPI that can result in
-	a subsequent SCHED_SOFTIRQ.
-2.	CONFIG_NO_HZ_FULL=y and ensure that the CPU to be de-jittered
-	is marked as an adaptive-ticks CPU using the "nohz_full="
-	boot parameter.  This reduces the number of scheduler-clock
-	interrupts that the de-jittered CPU receives, minimizing its
-	chances of being selected to do the load balancing work that
-	runs in SCHED_SOFTIRQ context.
-3.	To the extent possible, keep the CPU out of the kernel when it
-	is non-idle, for example, by avoiding system calls and by
-	forcing both kernel threads and interrupts to execute elsewhere.
-	This further reduces the number of scheduler-clock interrupts
-	received by the de-jittered CPU.
-
-HRTIMER_SOFTIRQ
----------------
-
-Do all of the following:
-
-1.	To the extent possible, keep the CPU out of the kernel when it
-	is non-idle.  For example, avoid system calls and force both
-	kernel threads and interrupts to execute elsewhere.
-2.	Build with CONFIG_HOTPLUG_CPU=y.  Once boot completes, force the
-	CPU offline, then bring it back online.  This forces recurring
-	timers to migrate elsewhere.  If you are concerned with multiple
-	CPUs, force them all offline before bringing the first one
-	back online.  Once you have onlined the CPUs in question, do not
-	offline any other CPUs, because doing so could force the timer
-	back onto one of the CPUs in question.
-
-RCU_SOFTIRQ
------------
-
-Do at least one of the following:
-
-1.	Offload callbacks and keep the CPU in either dyntick-idle or
-	adaptive-ticks state by doing all of the following:
-
-	a.	CONFIG_NO_HZ_FULL=y and ensure that the CPU to be
-		de-jittered is marked as an adaptive-ticks CPU using the
-		"nohz_full=" boot parameter.  Bind the rcuo kthreads to
-		housekeeping CPUs, which can tolerate OS jitter.
-	b.	To the extent possible, keep the CPU out of the kernel
-		when it is non-idle, for example, by avoiding system
-		calls and by forcing both kernel threads and interrupts
-		to execute elsewhere.
-
-2.	Enable RCU to do its processing remotely via dyntick-idle by
-	doing all of the following:
-
-	a.	Build with CONFIG_NO_HZ=y and CONFIG_RCU_FAST_NO_HZ=y.
-	b.	Ensure that the CPU goes idle frequently, allowing other
-		CPUs to detect that it has passed through an RCU quiescent
-		state.	If the kernel is built with CONFIG_NO_HZ_FULL=y,
-		userspace execution also allows other CPUs to detect that
-		the CPU in question has passed through a quiescent state.
-	c.	To the extent possible, keep the CPU out of the kernel
-		when it is non-idle, for example, by avoiding system
-		calls and by forcing both kernel threads and interrupts
-		to execute elsewhere.
-
-Name:
-  kworker/%u:%d%s (cpu, id, priority)
-
-Purpose:
-  Execute workqueue requests
-
-To reduce its OS jitter, do any of the following:
-
-1.	Run your workload at a real-time priority, which will allow
-	preempting the kworker daemons.
-2.	A given workqueue can be made visible in the sysfs filesystem
-	by passing the WQ_SYSFS to that workqueue's alloc_workqueue().
-	Such a workqueue can be confined to a given subset of the
-	CPUs using the ``/sys/devices/virtual/workqueue/*/cpumask`` sysfs
-	files.	The set of WQ_SYSFS workqueues can be displayed using
-	"ls sys/devices/virtual/workqueue".  That said, the workqueues
-	maintainer would like to caution people against indiscriminately
-	sprinkling WQ_SYSFS across all the workqueues.	The reason for
-	caution is that it is easy to add WQ_SYSFS, but because sysfs is
-	part of the formal user/kernel API, it can be nearly impossible
-	to remove it, even if its addition was a mistake.
-3.	Do any of the following needed to avoid jitter that your
-	application cannot tolerate:
-
-	a.	Build your kernel with CONFIG_SLUB=y rather than
-		CONFIG_SLAB=y, thus avoiding the slab allocator's periodic
-		use of each CPU's workqueues to run its cache_reap()
-		function.
-	b.	Avoid using oprofile, thus avoiding OS jitter from
-		wq_sync_buffer().
-	c.	Limit your CPU frequency so that a CPU-frequency
-		governor is not required, possibly enlisting the aid of
-		special heatsinks or other cooling technologies.  If done
-		correctly, and if you CPU architecture permits, you should
-		be able to build your kernel with CONFIG_CPU_FREQ=n to
-		avoid the CPU-frequency governor periodically running
-		on each CPU, including cs_dbs_timer() and od_dbs_timer().
-
-		WARNING:  Please check your CPU specifications to
-		make sure that this is safe on your particular system.
-	d.	As of v3.18, Christoph Lameter's on-demand vmstat workers
-		commit prevents OS jitter due to vmstat_update() on
-		CONFIG_SMP=y systems.  Before v3.18, is not possible
-		to entirely get rid of the OS jitter, but you can
-		decrease its frequency by writing a large value to
-		/proc/sys/vm/stat_interval.  The default value is HZ,
-		for an interval of one second.	Of course, larger values
-		will make your virtual-memory statistics update more
-		slowly.  Of course, you can also run your workload at
-		a real-time priority, thus preempting vmstat_update(),
-		but if your workload is CPU-bound, this is a bad idea.
-		However, there is an RFC patch from Christoph Lameter
-		(based on an earlier one from Gilad Ben-Yossef) that
-		reduces or even eliminates vmstat overhead for some
-		workloads at https://lkml.org/lkml/2013/9/4/379.
-	e.	Boot with "elevator=noop" to avoid workqueue use by
-		the block layer.
-	f.	If running on high-end powerpc servers, build with
-		CONFIG_PPC_RTAS_DAEMON=n.  This prevents the RTAS
-		daemon from running on each CPU every second or so.
-		(This will require editing Kconfig files and will defeat
-		this platform's RAS functionality.)  This avoids jitter
-		due to the rtas_event_scan() function.
-		WARNING:  Please check your CPU specifications to
-		make sure that this is safe on your particular system.
-	g.	If running on Cell Processor, build your kernel with
-		CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from
-		spu_gov_work().
-		WARNING:  Please check your CPU specifications to
-		make sure that this is safe on your particular system.
-	h.	If running on PowerMAC, build your kernel with
-		CONFIG_PMAC_RACKMETER=n to disable the CPU-meter,
-		avoiding OS jitter from rackmeter_do_timer().
-
-Name:
-  rcuc/%u
-
-Purpose:
-  Execute RCU callbacks in CONFIG_RCU_BOOST=y kernels.
-
-To reduce its OS jitter, do at least one of the following:
-
-1.	Build the kernel with CONFIG_PREEMPT=n.  This prevents these
-	kthreads from being created in the first place, and also obviates
-	the need for RCU priority boosting.  This approach is feasible
-	for workloads that do not require high degrees of responsiveness.
-2.	Build the kernel with CONFIG_RCU_BOOST=n.  This prevents these
-	kthreads from being created in the first place.  This approach
-	is feasible only if your workload never requires RCU priority
-	boosting, for example, if you ensure frequent idle time on all
-	CPUs that might execute within the kernel.
-3.	Build with CONFIG_RCU_NOCB_CPU=y and boot with the rcu_nocbs=
-	boot parameter offloading RCU callbacks from all CPUs susceptible
-	to OS jitter.  This approach prevents the rcuc/%u kthreads from
-	having any work to do, so that they are never awakened.
-4.	Ensure that the CPU never enters the kernel, and, in particular,
-	avoid initiating any CPU hotplug operations on this CPU.  This is
-	another way of preventing any callbacks from being queued on the
-	CPU, again preventing the rcuc/%u kthreads from having any work
-	to do.
-
-Name:
-  rcuop/%d and rcuos/%d
-
-Purpose:
-  Offload RCU callbacks from the corresponding CPU.
-
-To reduce its OS jitter, do at least one of the following:
-
-1.	Use affinity, cgroups, or other mechanism to force these kthreads
-	to execute on some other CPU.
-2.	Build with CONFIG_RCU_NOCB_CPU=n, which will prevent these
-	kthreads from being created in the first place.  However, please
-	note that this will not eliminate OS jitter, but will instead
-	shift it to RCU_SOFTIRQ.
-
-Name:
-  watchdog/%u
-
-Purpose:
-  Detect software lockups on each CPU.
-
-To reduce its OS jitter, do at least one of the following:
-
-1.	Build with CONFIG_LOCKUP_DETECTOR=n, which will prevent these
-	kthreads from being created in the first place.
-2.	Boot with "nosoftlockup=0", which will also prevent these kthreads
-	from being created.  Other related watchdog and softlockup boot
-	parameters may be found in Documentation/admin-guide/kernel-parameters.rst
-	and Documentation/watchdog/watchdog-parameters.rst.
-3.	Echo a zero to /proc/sys/kernel/watchdog to disable the
-	watchdog timer.
-4.	Echo a large number of /proc/sys/kernel/watchdog_thresh in
-	order to reduce the frequency of OS jitter due to the watchdog
-	timer down to a level that is acceptable for your workload.
diff --git a/Documentation/ldm.txt b/Documentation/ldm.txt
deleted file mode 100644
index 12c571368e73..000000000000
--- a/Documentation/ldm.txt
+++ /dev/null
@@ -1,121 +0,0 @@
-==========================================
-LDM - Logical Disk Manager (Dynamic Disks)
-==========================================
-
-:Author: Originally Written by FlatCap - Richard Russon <ldm@flatcap.org>.
-:Last Updated: Anton Altaparmakov on 30 March 2007 for Windows Vista.
-
-Overview
---------
-
-Windows 2000, XP, and Vista use a new partitioning scheme.  It is a complete
-replacement for the MSDOS style partitions.  It stores its information in a
-1MiB journalled database at the end of the physical disk.  The size of
-partitions is limited only by disk space.  The maximum number of partitions is
-nearly 2000.
-
-Any partitions created under the LDM are called "Dynamic Disks".  There are no
-longer any primary or extended partitions.  Normal MSDOS style partitions are
-now known as Basic Disks.
-
-If you wish to use Spanned, Striped, Mirrored or RAID 5 Volumes, you must use
-Dynamic Disks.  The journalling allows Windows to make changes to these
-partitions and filesystems without the need to reboot.
-
-Once the LDM driver has divided up the disk, you can use the MD driver to
-assemble any multi-partition volumes, e.g.  Stripes, RAID5.
-
-To prevent legacy applications from repartitioning the disk, the LDM creates a
-dummy MSDOS partition containing one disk-sized partition.  This is what is
-supported with the Linux LDM driver.
-
-A newer approach that has been implemented with Vista is to put LDM on top of a
-GPT label disk.  This is not supported by the Linux LDM driver yet.
-
-
-Example
--------
-
-Below we have a 50MiB disk, divided into seven partitions.
-
-.. note::
-
-   The missing 1MiB at the end of the disk is where the LDM database is
-   stored.
-
-+-------++--------------+---------+-----++--------------+---------+----+
-|Device || Offset Bytes | Sectors | MiB || Size   Bytes | Sectors | MiB|
-+=======++==============+=========+=====++==============+=========+====+
-|hda    ||            0 |       0 |   0 ||     52428800 |  102400 |  50|
-+-------++--------------+---------+-----++--------------+---------+----+
-|hda1   ||     51380224 |  100352 |  49 ||      1048576 |    2048 |   1|
-+-------++--------------+---------+-----++--------------+---------+----+
-|hda2   ||        16384 |      32 |   0 ||      6979584 |   13632 |   6|
-+-------++--------------+---------+-----++--------------+---------+----+
-|hda3   ||      6995968 |   13664 |   6 ||     10485760 |   20480 |  10|
-+-------++--------------+---------+-----++--------------+---------+----+
-|hda4   ||     17481728 |   34144 |  16 ||      4194304 |    8192 |   4|
-+-------++--------------+---------+-----++--------------+---------+----+
-|hda5   ||     21676032 |   42336 |  20 ||      5242880 |   10240 |   5|
-+-------++--------------+---------+-----++--------------+---------+----+
-|hda6   ||     26918912 |   52576 |  25 ||     10485760 |   20480 |  10|
-+-------++--------------+---------+-----++--------------+---------+----+
-|hda7   ||     37404672 |   73056 |  35 ||     13959168 |   27264 |  13|
-+-------++--------------+---------+-----++--------------+---------+----+
-
-The LDM Database may not store the partitions in the order that they appear on
-disk, but the driver will sort them.
-
-When Linux boots, you will see something like::
-
-  hda: 102400 sectors w/32KiB Cache, CHS=50/64/32
-  hda: [LDM] hda1 hda2 hda3 hda4 hda5 hda6 hda7
-
-
-Compiling LDM Support
----------------------
-
-To enable LDM, choose the following two options: 
-
-  - "Advanced partition selection" CONFIG_PARTITION_ADVANCED
-  - "Windows Logical Disk Manager (Dynamic Disk) support" CONFIG_LDM_PARTITION
-
-If you believe the driver isn't working as it should, you can enable the extra
-debugging code.  This will produce a LOT of output.  The option is:
-
-  - "Windows LDM extra logging" CONFIG_LDM_DEBUG
-
-N.B. The partition code cannot be compiled as a module.
-
-As with all the partition code, if the driver doesn't see signs of its type of
-partition, it will pass control to another driver, so there is no harm in
-enabling it.
-
-If you have Dynamic Disks but don't enable the driver, then all you will see
-is a dummy MSDOS partition filling the whole disk.  You won't be able to mount
-any of the volumes on the disk.
-
-
-Booting
--------
-
-If you enable LDM support, then lilo is capable of booting from any of the
-discovered partitions.  However, grub does not understand the LDM partitioning
-and cannot boot from a Dynamic Disk.
-
-
-More Documentation
-------------------
-
-There is an Overview of the LDM together with complete Technical Documentation.
-It is available for download.
-
-  http://www.linux-ntfs.org/
-
-If you have any LDM questions that aren't answered in the documentation, email
-me.
-
-Cheers,
-    FlatCap - Richard Russon
-    ldm@flatcap.org
-
diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt
deleted file mode 100644
index 290840c160af..000000000000
--- a/Documentation/lockup-watchdogs.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-===============================================================
-Softlockup detector and hardlockup detector (aka nmi_watchdog)
-===============================================================
-
-The Linux kernel can act as a watchdog to detect both soft and hard
-lockups.
-
-A 'softlockup' is defined as a bug that causes the kernel to loop in
-kernel mode for more than 20 seconds (see "Implementation" below for
-details), without giving other tasks a chance to run. The current
-stack trace is displayed upon detection and, by default, the system
-will stay locked up. Alternatively, the kernel can be configured to
-panic; a sysctl, "kernel.softlockup_panic", a kernel parameter,
-"softlockup_panic" (see "Documentation/admin-guide/kernel-parameters.rst" for
-details), and a compile option, "BOOTPARAM_SOFTLOCKUP_PANIC", are
-provided for this.
-
-A 'hardlockup' is defined as a bug that causes the CPU to loop in
-kernel mode for more than 10 seconds (see "Implementation" below for
-details), without letting other interrupts have a chance to run.
-Similarly to the softlockup case, the current stack trace is displayed
-upon detection and the system will stay locked up unless the default
-behavior is changed, which can be done through a sysctl,
-'hardlockup_panic', a compile time knob, "BOOTPARAM_HARDLOCKUP_PANIC",
-and a kernel parameter, "nmi_watchdog"
-(see "Documentation/admin-guide/kernel-parameters.rst" for details).
-
-The panic option can be used in combination with panic_timeout (this
-timeout is set through the confusingly named "kernel.panic" sysctl),
-to cause the system to reboot automatically after a specified amount
-of time.
-
-Implementation
-==============
-
-The soft and hard lockup detectors are built on top of the hrtimer and
-perf subsystems, respectively. A direct consequence of this is that,
-in principle, they should work in any architecture where these
-subsystems are present.
-
-A periodic hrtimer runs to generate interrupts and kick the watchdog
-task. An NMI perf event is generated every "watchdog_thresh"
-(compile-time initialized to 10 and configurable through sysctl of the
-same name) seconds to check for hardlockups. If any CPU in the system
-does not receive any hrtimer interrupt during that time the
-'hardlockup detector' (the handler for the NMI perf event) will
-generate a kernel warning or call panic, depending on the
-configuration.
-
-The watchdog task is a high priority kernel thread that updates a
-timestamp every time it is scheduled. If that timestamp is not updated
-for 2*watchdog_thresh seconds (the softlockup threshold) the
-'softlockup detector' (coded inside the hrtimer callback function)
-will dump useful debug information to the system log, after which it
-will call panic if it was instructed to do so or resume execution of
-other kernel code.
-
-The period of the hrtimer is 2*watchdog_thresh/5, which means it has
-two or three chances to generate an interrupt before the hardlockup
-detector kicks in.
-
-As explained above, a kernel knob is provided that allows
-administrators to configure the period of the hrtimer and the perf
-event. The right value for a particular environment is a trade-off
-between fast response to lockups and detection overhead.
-
-By default, the watchdog runs on all online cores.  However, on a
-kernel configured with NO_HZ_FULL, by default the watchdog runs only
-on the housekeeping cores, not the cores specified in the "nohz_full"
-boot argument.  If we allowed the watchdog to run by default on
-the "nohz_full" cores, we would have to run timer ticks to activate
-the scheduler, which would prevent the "nohz_full" functionality
-from protecting the user code on those cores from the kernel.
-Of course, disabling it by default on the nohz_full cores means that
-when those cores do enter the kernel, by default we will not be
-able to detect if they lock up.  However, allowing the watchdog
-to continue to run on the housekeeping (non-tickless) cores means
-that we will continue to detect lockups properly on those cores.
-
-In either case, the set of cores excluded from running the watchdog
-may be adjusted via the kernel.watchdog_cpumask sysctl.  For
-nohz_full cores, this may be useful for debugging a case where the
-kernel seems to be hanging on the nohz_full cores.
diff --git a/Documentation/numastat.txt b/Documentation/numastat.txt
deleted file mode 100644
index aaf1667489f8..000000000000
--- a/Documentation/numastat.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-===============================
-Numa policy hit/miss statistics
-===============================
-
-/sys/devices/system/node/node*/numastat
-
-All units are pages. Hugepages have separate counters.
-
-=============== ============================================================
-numa_hit	A process wanted to allocate memory from this node,
-		and succeeded.
-
-numa_miss	A process wanted to allocate memory from another node,
-		but ended up with memory from this node.
-
-numa_foreign	A process wanted to allocate on this node,
-		but ended up with memory from another one.
-
-local_node	A process ran on this node and got memory from it.
-
-other_node	A process ran on this node and got memory from another node.
-
-interleave_hit 	Interleaving wanted to allocate from this node
-		and succeeded.
-=============== ============================================================
-
-For easier reading you can use the numastat utility from the numactl package
-(http://oss.sgi.com/projects/libnuma/). Note that it only works
-well right now on machines with a small number of CPUs.
-
diff --git a/Documentation/pnp.txt b/Documentation/pnp.txt
deleted file mode 100644
index bab2d10631f0..000000000000
--- a/Documentation/pnp.txt
+++ /dev/null
@@ -1,292 +0,0 @@
-=================================
-Linux Plug and Play Documentation
-=================================
-
-:Author: Adam Belay <ambx1@neo.rr.com>
-:Last updated: Oct. 16, 2002
-
-
-Overview
---------
-
-Plug and Play provides a means of detecting and setting resources for legacy or
-otherwise unconfigurable devices.  The Linux Plug and Play Layer provides these 
-services to compatible drivers.
-
-
-The User Interface
-------------------
-
-The Linux Plug and Play user interface provides a means to activate PnP devices
-for legacy and user level drivers that do not support Linux Plug and Play.  The 
-user interface is integrated into sysfs.
-
-In addition to the standard sysfs file the following are created in each
-device's directory:
-- id - displays a list of support EISA IDs
-- options - displays possible resource configurations
-- resources - displays currently allocated resources and allows resource changes
-
-activating a device
-^^^^^^^^^^^^^^^^^^^
-
-::
-
-	# echo "auto" > resources
-
-this will invoke the automatic resource config system to activate the device
-
-manually activating a device
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-::
-
-	# echo "manual <depnum> <mode>" > resources
-
-	<depnum> - the configuration number
-	<mode> - static or dynamic
-		 static = for next boot
-		 dynamic = now
-
-disabling a device
-^^^^^^^^^^^^^^^^^^
-
-::
-
-	# echo "disable" > resources
-
-
-EXAMPLE:
-
-Suppose you need to activate the floppy disk controller.
-
-1. change to the proper directory, in my case it is
-   /driver/bus/pnp/devices/00:0f::
-
-	# cd /driver/bus/pnp/devices/00:0f
-	# cat name
-	PC standard floppy disk controller
-
-2. check if the device is already active::
-
-	# cat resources
-	DISABLED
-
-  - Notice the string "DISABLED".  This means the device is not active.
-
-3. check the device's possible configurations (optional)::
-
-	# cat options
-	Dependent: 01 - Priority acceptable
-	    port 0x3f0-0x3f0, align 0x7, size 0x6, 16-bit address decoding
-	    port 0x3f7-0x3f7, align 0x0, size 0x1, 16-bit address decoding
-	    irq 6
-	    dma 2 8-bit compatible
-	Dependent: 02 - Priority acceptable
-	    port 0x370-0x370, align 0x7, size 0x6, 16-bit address decoding
-	    port 0x377-0x377, align 0x0, size 0x1, 16-bit address decoding
-	    irq 6
-	    dma 2 8-bit compatible
-
-4. now activate the device::
-
-	# echo "auto" > resources
-
-5. finally check if the device is active::
-
-	# cat resources
-	io 0x3f0-0x3f5
-	io 0x3f7-0x3f7
-	irq 6
-	dma 2
-
-also there are a series of kernel parameters::
-
-	pnp_reserve_irq=irq1[,irq2] ....
-	pnp_reserve_dma=dma1[,dma2] ....
-	pnp_reserve_io=io1,size1[,io2,size2] ....
-	pnp_reserve_mem=mem1,size1[,mem2,size2] ....
-
-
-
-The Unified Plug and Play Layer
--------------------------------
-
-All Plug and Play drivers, protocols, and services meet at a central location
-called the Plug and Play Layer.  This layer is responsible for the exchange of 
-information between PnP drivers and PnP protocols.  Thus it automatically 
-forwards commands to the proper protocol.  This makes writing PnP drivers 
-significantly easier.
-
-The following functions are available from the Plug and Play Layer:
-
-pnp_get_protocol
-  increments the number of uses by one
-
-pnp_put_protocol
-  deincrements the number of uses by one
-
-pnp_register_protocol
-  use this to register a new PnP protocol
-
-pnp_unregister_protocol
-  use this function to remove a PnP protocol from the Plug and Play Layer
-
-pnp_register_driver
-  adds a PnP driver to the Plug and Play Layer
-
-  this includes driver model integration
-  returns zero for success or a negative error number for failure; count
-  calls to the .add() method if you need to know how many devices bind to
-  the driver
-
-pnp_unregister_driver
-  removes a PnP driver from the Plug and Play Layer
-
-
-
-Plug and Play Protocols
------------------------
-
-This section contains information for PnP protocol developers.
-
-The following Protocols are currently available in the computing world:
-
-- PNPBIOS:
-    used for system devices such as serial and parallel ports.
-- ISAPNP:
-    provides PnP support for the ISA bus
-- ACPI:
-    among its many uses, ACPI provides information about system level
-    devices.
-
-It is meant to replace the PNPBIOS.  It is not currently supported by Linux
-Plug and Play but it is planned to be in the near future.
-
-
-Requirements for a Linux PnP protocol:
-1. the protocol must use EISA IDs
-2. the protocol must inform the PnP Layer of a device's current configuration
-
-- the ability to set resources is optional but preferred.
-
-The following are PnP protocol related functions:
-
-pnp_add_device
-  use this function to add a PnP device to the PnP layer
-
-  only call this function when all wanted values are set in the pnp_dev
-  structure
-
-pnp_init_device
-  call this to initialize the PnP structure
-
-pnp_remove_device
-  call this to remove a device from the Plug and Play Layer.
-  it will fail if the device is still in use.
-  automatically will free mem used by the device and related structures
-
-pnp_add_id
-  adds an EISA ID to the list of supported IDs for the specified device
-
-For more information consult the source of a protocol such as
-/drivers/pnp/pnpbios/core.c.
-
-
-
-Linux Plug and Play Drivers
----------------------------
-
-This section contains information for Linux PnP driver developers.
-
-The New Way
-^^^^^^^^^^^
-
-1. first make a list of supported EISA IDS
-
-   ex::
-
-	static const struct pnp_id pnp_dev_table[] = {
-		/* Standard LPT Printer Port */
-		{.id = "PNP0400", .driver_data = 0},
-		/* ECP Printer Port */
-		{.id = "PNP0401", .driver_data = 0},
-		{.id = ""}
-	};
-
-   Please note that the character 'X' can be used as a wild card in the function
-   portion (last four characters).
-
-   ex::
-
-	/* Unknown PnP modems */
-	{	"PNPCXXX",		UNKNOWN_DEV	},
-
-   Supported PnP card IDs can optionally be defined.
-   ex::
-
-	static const struct pnp_id pnp_card_table[] = {
-		{	"ANYDEVS",		0	},
-		{	"",			0	}
-	};
-
-2. Optionally define probe and remove functions.  It may make sense not to
-   define these functions if the driver already has a reliable method of detecting
-   the resources, such as the parport_pc driver.
-
-   ex::
-
-	static int
-	serial_pnp_probe(struct pnp_dev * dev, const struct pnp_id *card_id, const
-			struct pnp_id *dev_id)
-	{
-	. . .
-
-   ex::
-
-	static void serial_pnp_remove(struct pnp_dev * dev)
-	{
-	. . .
-
-   consult /drivers/serial/8250_pnp.c for more information.
-
-3. create a driver structure
-
-   ex::
-
-	static struct pnp_driver serial_pnp_driver = {
-		.name		= "serial",
-		.card_id_table	= pnp_card_table,
-		.id_table	= pnp_dev_table,
-		.probe		= serial_pnp_probe,
-		.remove		= serial_pnp_remove,
-	};
-
-   * name and id_table cannot be NULL.
-
-4. register the driver
-
-   ex::
-
-	static int __init serial8250_pnp_init(void)
-	{
-		return pnp_register_driver(&serial_pnp_driver);
-	}
-
-The Old Way
-^^^^^^^^^^^
-
-A series of compatibility functions have been created to make it easy to convert
-ISAPNP drivers.  They should serve as a temporary solution only.
-
-They are as follows::
-
-	struct pnp_card *pnp_find_card(unsigned short vendor,
-				       unsigned short device,
-				       struct pnp_card *from)
-
-	struct pnp_dev *pnp_find_dev(struct pnp_card *card,
-				     unsigned short vendor,
-				     unsigned short function,
-				     struct pnp_dev *from)
-
diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
deleted file mode 100644
index 688c95b11919..000000000000
--- a/Documentation/rtc.txt
+++ /dev/null
@@ -1,140 +0,0 @@
-=======================================
-Real Time Clock (RTC) Drivers for Linux
-=======================================
-
-When Linux developers talk about a "Real Time Clock", they usually mean
-something that tracks wall clock time and is battery backed so that it
-works even with system power off.  Such clocks will normally not track
-the local time zone or daylight savings time -- unless they dual boot
-with MS-Windows -- but will instead be set to Coordinated Universal Time
-(UTC, formerly "Greenwich Mean Time").
-
-The newest non-PC hardware tends to just count seconds, like the time(2)
-system call reports, but RTCs also very commonly represent time using
-the Gregorian calendar and 24 hour time, as reported by gmtime(3).
-
-Linux has two largely-compatible userspace RTC API families you may
-need to know about:
-
-    *	/dev/rtc ... is the RTC provided by PC compatible systems,
-	so it's not very portable to non-x86 systems.
-
-    *	/dev/rtc0, /dev/rtc1 ... are part of a framework that's
-	supported by a wide variety of RTC chips on all systems.
-
-Programmers need to understand that the PC/AT functionality is not
-always available, and some systems can do much more.  That is, the
-RTCs use the same API to make requests in both RTC frameworks (using
-different filenames of course), but the hardware may not offer the
-same functionality.  For example, not every RTC is hooked up to an
-IRQ, so they can't all issue alarms; and where standard PC RTCs can
-only issue an alarm up to 24 hours in the future, other hardware may
-be able to schedule one any time in the upcoming century.
-
-
-Old PC/AT-Compatible driver:  /dev/rtc
---------------------------------------
-
-All PCs (even Alpha machines) have a Real Time Clock built into them.
-Usually they are built into the chipset of the computer, but some may
-actually have a Motorola MC146818 (or clone) on the board. This is the
-clock that keeps the date and time while your computer is turned off.
-
-ACPI has standardized that MC146818 functionality, and extended it in
-a few ways (enabling longer alarm periods, and wake-from-hibernate).
-That functionality is NOT exposed in the old driver.
-
-However it can also be used to generate signals from a slow 2Hz to a
-relatively fast 8192Hz, in increments of powers of two. These signals
-are reported by interrupt number 8. (Oh! So *that* is what IRQ 8 is
-for...) It can also function as a 24hr alarm, raising IRQ 8 when the
-alarm goes off. The alarm can also be programmed to only check any
-subset of the three programmable values, meaning that it could be set to
-ring on the 30th second of the 30th minute of every hour, for example.
-The clock can also be set to generate an interrupt upon every clock
-update, thus generating a 1Hz signal.
-
-The interrupts are reported via /dev/rtc (major 10, minor 135, read only
-character device) in the form of an unsigned long. The low byte contains
-the type of interrupt (update-done, alarm-rang, or periodic) that was
-raised, and the remaining bytes contain the number of interrupts since
-the last read.  Status information is reported through the pseudo-file
-/proc/driver/rtc if the /proc filesystem was enabled.  The driver has
-built in locking so that only one process is allowed to have the /dev/rtc
-interface open at a time.
-
-A user process can monitor these interrupts by doing a read(2) or a
-select(2) on /dev/rtc -- either will block/stop the user process until
-the next interrupt is received. This is useful for things like
-reasonably high frequency data acquisition where one doesn't want to
-burn up 100% CPU by polling gettimeofday etc. etc.
-
-At high frequencies, or under high loads, the user process should check
-the number of interrupts received since the last read to determine if
-there has been any interrupt "pileup" so to speak. Just for reference, a
-typical 486-33 running a tight read loop on /dev/rtc will start to suffer
-occasional interrupt pileup (i.e. > 1 IRQ event since last read) for
-frequencies above 1024Hz. So you really should check the high bytes
-of the value you read, especially at frequencies above that of the
-normal timer interrupt, which is 100Hz.
-
-Programming and/or enabling interrupt frequencies greater than 64Hz is
-only allowed by root. This is perhaps a bit conservative, but we don't want
-an evil user generating lots of IRQs on a slow 386sx-16, where it might have
-a negative impact on performance. This 64Hz limit can be changed by writing
-a different value to /proc/sys/dev/rtc/max-user-freq. Note that the
-interrupt handler is only a few lines of code to minimize any possibility
-of this effect.
-
-Also, if the kernel time is synchronized with an external source, the 
-kernel will write the time back to the CMOS clock every 11 minutes. In 
-the process of doing this, the kernel briefly turns off RTC periodic 
-interrupts, so be aware of this if you are doing serious work. If you
-don't synchronize the kernel time with an external source (via ntp or
-whatever) then the kernel will keep its hands off the RTC, allowing you
-exclusive access to the device for your applications.
-
-The alarm and/or interrupt frequency are programmed into the RTC via
-various ioctl(2) calls as listed in ./include/linux/rtc.h
-Rather than write 50 pages describing the ioctl() and so on, it is
-perhaps more useful to include a small test program that demonstrates
-how to use them, and demonstrates the features of the driver. This is
-probably a lot more useful to people interested in writing applications
-that will be using this driver.  See the code at the end of this document.
-
-(The original /dev/rtc driver was written by Paul Gortmaker.)
-
-
-New portable "RTC Class" drivers:  /dev/rtcN
---------------------------------------------
-
-Because Linux supports many non-ACPI and non-PC platforms, some of which
-have more than one RTC style clock, it needed a more portable solution
-than expecting a single battery-backed MC146818 clone on every system.
-Accordingly, a new "RTC Class" framework has been defined.  It offers
-three different userspace interfaces:
-
-    *	/dev/rtcN ... much the same as the older /dev/rtc interface
-
-    *	/sys/class/rtc/rtcN ... sysfs attributes support readonly
-	access to some RTC attributes.
-
-    *	/proc/driver/rtc ... the system clock RTC may expose itself
-	using a procfs interface. If there is no RTC for the system clock,
-	rtc0 is used by default. More information is (currently) shown
-	here than through sysfs.
-
-The RTC Class framework supports a wide variety of RTCs, ranging from those
-integrated into embeddable system-on-chip (SOC) processors to discrete chips
-using I2C, SPI, or some other bus to communicate with the host CPU.  There's
-even support for PC-style RTCs ... including the features exposed on newer PCs
-through ACPI.
-
-The new framework also removes the "one RTC per system" restriction.  For
-example, maybe the low-power battery-backed RTC is a discrete I2C chip, but
-a high functionality RTC is integrated into the SOC.  That system might read
-the system clock from the discrete RTC, but use the integrated one for all
-other tasks, because of its greater functionality.
-
-Check out tools/testing/selftests/rtc/rtctest.c for an example usage of the
-ioctl interface.
diff --git a/Documentation/svga.txt b/Documentation/svga.txt
deleted file mode 100644
index b6c2f9acca92..000000000000
--- a/Documentation/svga.txt
+++ /dev/null
@@ -1,249 +0,0 @@
-.. include:: <isonum.txt>
-
-=================================
-Video Mode Selection Support 2.13
-=================================
-
-:Copyright: |copy| 1995--1999 Martin Mares, <mj@ucw.cz>
-
-Intro
-~~~~~
-
-This small document describes the "Video Mode Selection" feature which
-allows the use of various special video modes supported by the video BIOS. Due
-to usage of the BIOS, the selection is limited to boot time (before the
-kernel decompression starts) and works only on 80X86 machines.
-
-.. note::
-
-   Short intro for the impatient: Just use vga=ask for the first time,
-   enter ``scan`` on the video mode prompt, pick the mode you want to use,
-   remember its mode ID (the four-digit hexadecimal number) and then
-   set the vga parameter to this number (converted to decimal first).
-
-The video mode to be used is selected by a kernel parameter which can be
-specified in the kernel Makefile (the SVGA_MODE=... line) or by the "vga=..."
-option of LILO (or some other boot loader you use) or by the "vidmode" utility
-(present in standard Linux utility packages). You can use the following values
-of this parameter::
-
-   NORMAL_VGA - Standard 80x25 mode available on all display adapters.
-
-   EXTENDED_VGA	- Standard 8-pixel font mode: 80x43 on EGA, 80x50 on VGA.
-
-   ASK_VGA - Display a video mode menu upon startup (see below).
-
-   0..35 - Menu item number (when you have used the menu to view the list of
-      modes available on your adapter, you can specify the menu item you want
-      to use). 0..9 correspond to "0".."9", 10..35 to "a".."z". Warning: the
-      mode list displayed may vary as the kernel version changes, because the
-      modes are listed in a "first detected -- first displayed" manner. It's
-      better to use absolute mode numbers instead.
-
-   0x.... - Hexadecimal video mode ID (also displayed on the menu, see below
-      for exact meaning of the ID). Warning: rdev and LILO don't support
-      hexadecimal numbers -- you have to convert it to decimal manually.
-
-Menu
-~~~~
-
-The ASK_VGA mode causes the kernel to offer a video mode menu upon
-bootup. It displays a "Press <RETURN> to see video modes available, <SPACE>
-to continue or wait 30 secs" message. If you press <RETURN>, you enter the
-menu, if you press <SPACE> or wait 30 seconds, the kernel will boot up in
-the standard 80x25 mode.
-
-The menu looks like::
-
-	Video adapter: <name-of-detected-video-adapter>
-	Mode:    COLSxROWS:
-	0  0F00  80x25
-	1  0F01  80x50
-	2  0F02  80x43
-	3  0F03  80x26
-	....
-	Enter mode number or ``scan``: <flashing-cursor-here>
-
-<name-of-detected-video-adapter> tells what video adapter did Linux detect
--- it's either a generic adapter name (MDA, CGA, HGC, EGA, VGA, VESA VGA [a VGA
-with VESA-compliant BIOS]) or a chipset name (e.g., Trident). Direct detection
-of chipsets is turned off by default as it's inherently unreliable due to
-absolutely insane PC design.
-
-"0  0F00  80x25" means that the first menu item (the menu items are numbered
-from "0" to "9" and from "a" to "z") is a 80x25 mode with ID=0x0f00 (see the
-next section for a description of mode IDs).
-
-<flashing-cursor-here> encourages you to enter the item number or mode ID
-you wish to set and press <RETURN>. If the computer complains something about
-"Unknown mode ID", it is trying to tell you that it isn't possible to set such
-a mode. It's also possible to press only <RETURN> which leaves the current mode.
-
-The mode list usually contains a few basic modes and some VESA modes.  In
-case your chipset has been detected, some chipset-specific modes are shown as
-well (some of these might be missing or unusable on your machine as different
-BIOSes are often shipped with the same card and the mode numbers depend purely
-on the VGA BIOS).
-
-The modes displayed on the menu are partially sorted: The list starts with
-the standard modes (80x25 and 80x50) followed by "special" modes (80x28 and
-80x43), local modes (if the local modes feature is enabled), VESA modes and
-finally SVGA modes for the auto-detected adapter.
-
-If you are not happy with the mode list offered (e.g., if you think your card
-is able to do more), you can enter "scan" instead of item number / mode ID.  The
-program will try to ask the BIOS for all possible video mode numbers and test
-what happens then. The screen will be probably flashing wildly for some time and
-strange noises will be heard from inside the monitor and so on and then, really
-all consistent video modes supported by your BIOS will appear (plus maybe some
-``ghost modes``). If you are afraid this could damage your monitor, don't use
-this function.
-
-After scanning, the mode ordering is a bit different: the auto-detected SVGA
-modes are not listed at all and the modes revealed by ``scan`` are shown before
-all VESA modes.
-
-Mode IDs
-~~~~~~~~
-
-Because of the complexity of all the video stuff, the video mode IDs
-used here are also a bit complex. A video mode ID is a 16-bit number usually
-expressed in a hexadecimal notation (starting with "0x"). You can set a mode
-by entering its mode directly if you know it even if it isn't shown on the menu.
-
-The ID numbers can be divided to those regions::
-
-   0x0000 to 0x00ff - menu item references. 0x0000 is the first item. Don't use
-	outside the menu as this can change from boot to boot (especially if you
-	have used the ``scan`` feature).
-
-   0x0100 to 0x017f - standard BIOS modes. The ID is a BIOS video mode number
-	(as presented to INT 10, function 00) increased by 0x0100.
-
-   0x0200 to 0x08ff - VESA BIOS modes. The ID is a VESA mode ID increased by
-	0x0100. All VESA modes should be autodetected and shown on the menu.
-
-   0x0900 to 0x09ff - Video7 special modes. Set by calling INT 0x10, AX=0x6f05.
-	(Usually 940=80x43, 941=132x25, 942=132x44, 943=80x60, 944=100x60,
-	945=132x28 for the standard Video7 BIOS)
-
-   0x0f00 to 0x0fff - special modes (they are set by various tricks -- usually
-	by modifying one of the standard modes). Currently available:
-	0x0f00	standard 80x25, don't reset mode if already set (=FFFF)
-	0x0f01	standard with 8-point font: 80x43 on EGA, 80x50 on VGA
-	0x0f02	VGA 80x43 (VGA switched to 350 scanlines with a 8-point font)
-	0x0f03	VGA 80x28 (standard VGA scans, but 14-point font)
-	0x0f04	leave current video mode
-	0x0f05	VGA 80x30 (480 scans, 16-point font)
-	0x0f06	VGA 80x34 (480 scans, 14-point font)
-	0x0f07	VGA 80x60 (480 scans, 8-point font)
-	0x0f08	Graphics hack (see the VIDEO_GFX_HACK paragraph below)
-
-   0x1000 to 0x7fff - modes specified by resolution. The code has a "0xRRCC"
-	form where RR is a number of rows and CC is a number of columns.
-	E.g., 0x1950 corresponds to a 80x25 mode, 0x2b84 to 132x43 etc.
-	This is the only fully portable way to refer to a non-standard mode,
-	but it relies on the mode being found and displayed on the menu
-	(remember that mode scanning is not done automatically).
-
-   0xff00 to 0xffff - aliases for backward compatibility:
-	0xffff	equivalent to 0x0f00 (standard 80x25)
-	0xfffe	equivalent to 0x0f01 (EGA 80x43 or VGA 80x50)
-
-If you add 0x8000 to the mode ID, the program will try to recalculate
-vertical display timing according to mode parameters, which can be used to
-eliminate some annoying bugs of certain VGA BIOSes (usually those used for
-cards with S3 chipsets and old Cirrus Logic BIOSes) -- mainly extra lines at the
-end of the display.
-
-Options
-~~~~~~~
-
-Build options for arch/x86/boot/* are selected by the kernel kconfig
-utility and the kernel .config file.
-
-VIDEO_GFX_HACK - includes special hack for setting of graphics modes
-to be used later by special drivers.
-Allows to set _any_ BIOS mode including graphic ones and forcing specific
-text screen resolution instead of peeking it from BIOS variables. Don't use
-unless you think you know what you're doing. To activate this setup, use
-mode number 0x0f08 (see the Mode IDs section above).
-
-Still doesn't work?
-~~~~~~~~~~~~~~~~~~~
-
-When the mode detection doesn't work (e.g., the mode list is incorrect or
-the machine hangs instead of displaying the menu), try to switch off some of
-the configuration options listed under "Options". If it fails, you can still use
-your kernel with the video mode set directly via the kernel parameter.
-
-In either case, please send me a bug report containing what _exactly_
-happens and how do the configuration switches affect the behaviour of the bug.
-
-If you start Linux from M$-DOS, you might also use some DOS tools for
-video mode setting. In this case, you must specify the 0x0f04 mode ("leave
-current settings") to Linux, because if you don't and you use any non-standard
-mode, Linux will switch to 80x25 automatically.
-
-If you set some extended mode and there's one or more extra lines on the
-bottom of the display containing already scrolled-out text, your VGA BIOS
-contains the most common video BIOS bug called "incorrect vertical display
-end setting". Adding 0x8000 to the mode ID might fix the problem. Unfortunately,
-this must be done manually -- no autodetection mechanisms are available.
-
-History
-~~~~~~~
-
-=============== ================================================================
-1.0 (??-Nov-95)	First version supporting all adapters supported by the old
-		setup.S + Cirrus Logic 54XX. Present in some 1.3.4? kernels
-		and then removed due to instability on some machines.
-2.0 (28-Jan-96)	Rewritten from scratch. Cirrus Logic 64XX support added, almost
-		everything is configurable, the VESA support should be much more
-		stable, explicit mode numbering allowed, "scan" implemented etc.
-2.1 (30-Jan-96) VESA modes moved to 0x200-0x3ff. Mode selection by resolution
-		supported. Few bugs fixed. VESA modes are listed prior to
-		modes supplied by SVGA autodetection as they are more reliable.
-		CLGD autodetect works better. Doesn't depend on 80x25 being
-		active when started. Scanning fixed. 80x43 (any VGA) added.
-		Code cleaned up.
-2.2 (01-Feb-96)	EGA 80x43 fixed. VESA extended to 0x200-0x4ff (non-standard 02XX
-		VESA modes work now). Display end bug workaround supported.
-		Special modes renumbered to allow adding of the "recalculate"
-		flag, 0xffff and 0xfffe became aliases instead of real IDs.
-		Screen contents retained during mode changes.
-2.3 (15-Mar-96)	Changed to work with 1.3.74 kernel.
-2.4 (18-Mar-96)	Added patches by Hans Lermen fixing a memory overwrite problem
-		with some boot loaders. Memory management rewritten to reflect
-		these changes. Unfortunately, screen contents retaining works
-		only with some loaders now.
-		Added a Tseng 132x60 mode.
-2.5 (19-Mar-96)	Fixed a VESA mode scanning bug introduced in 2.4.
-2.6 (25-Mar-96)	Some VESA BIOS errors not reported -- it fixes error reports on
-		several cards with broken VESA code (e.g., ATI VGA).
-2.7 (09-Apr-96)	- Accepted all VESA modes in range 0x100 to 0x7ff, because some
-		  cards use very strange mode numbers.
-		- Added Realtek VGA modes (thanks to Gonzalo Tornaria).
-		- Hardware testing order slightly changed, tests based on ROM
-		  contents done as first.
-		- Added support for special Video7 mode switching functions
-		  (thanks to Tom Vander Aa).
-		- Added 480-scanline modes (especially useful for notebooks,
-		  original version written by hhanemaa@cs.ruu.nl, patched by
-		  Jeff Chua, rewritten by me).
-		- Screen store/restore fixed.
-2.8 (14-Apr-96) - Previous release was not compilable without CONFIG_VIDEO_SVGA.
-		- Better recognition of text modes during mode scan.
-2.9 (12-May-96)	- Ignored VESA modes 0x80 - 0xff (more VESA BIOS bugs!)
-2.10(11-Nov-96) - The whole thing made optional.
-		- Added the CONFIG_VIDEO_400_HACK switch.
-		- Added the CONFIG_VIDEO_GFX_HACK switch.
-		- Code cleanup.
-2.11(03-May-97) - Yet another cleanup, now including also the documentation.
-		- Direct testing of SVGA adapters turned off by default, ``scan``
-		  offered explicitly on the prompt line.
-		- Removed the doc section describing adding of new probing
-		  functions as I try to get rid of _all_ hardware probing here.
-2.12(25-May-98) Added support for VESA frame buffer graphics.
-2.13(14-May-99) Minor documentation fixes.
-=============== ================================================================
diff --git a/Documentation/video-output.txt b/Documentation/video-output.txt
deleted file mode 100644
index 56d6fa2e2368..000000000000
--- a/Documentation/video-output.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-Video Output Switcher Control
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-2006 luming.yu@intel.com
-
-The output sysfs class driver provides an abstract video output layer that
-can be used to hook platform specific methods to enable/disable video output
-device through common sysfs interface. For example, on my IBM ThinkPad T42
-laptop, The ACPI video driver registered its output devices and read/write
-method for 'state' with output sysfs class. The user interface under sysfs is::
-
-  linux:/sys/class/video_output # tree .
-  .
-  |-- CRT0
-  |   |-- device -> ../../../devices/pci0000:00/0000:00:01.0
-  |   |-- state
-  |   |-- subsystem -> ../../../class/video_output
-  |   `-- uevent
-  |-- DVI0
-  |   |-- device -> ../../../devices/pci0000:00/0000:00:01.0
-  |   |-- state
-  |   |-- subsystem -> ../../../class/video_output
-  |   `-- uevent
-  |-- LCD0
-  |   |-- device -> ../../../devices/pci0000:00/0000:00:01.0
-  |   |-- state
-  |   |-- subsystem -> ../../../class/video_output
-  |   `-- uevent
-  `-- TV0
-     |-- device -> ../../../devices/pci0000:00/0000:00:01.0
-     |-- state
-     |-- subsystem -> ../../../class/video_output
-     `-- uevent
-
diff --git a/Documentation/x86/topology.rst b/Documentation/x86/topology.rst
index 8e9704f61017..e29739904e37 100644
--- a/Documentation/x86/topology.rst
+++ b/Documentation/x86/topology.rst
@@ -9,7 +9,7 @@ representation in the kernel. Update/change when doing changes to the
 respective code.
 
 The architecture-agnostic topology definitions are in
-Documentation/cputopology.txt. This file holds x86-specific
+Documentation/admin-guide/cputopology.rst. This file holds x86-specific
 differences/specialities which must not necessarily apply to the generic
 definitions. Thus, the way to read up on Linux topology on x86 is to start
 with the generic one and look at this one in parallel for the x86 specifics.
-- 
cgit 


From baa293e9544bea71361950d071579f0e4d5713ed Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 27 Jun 2019 15:39:22 -0300
Subject: docs: driver-api: add a series of orphaned documents

There are lots of documents under Documentation/*.txt and a few other
orphan documents elsehwere that belong to the driver-API book.

Move them to their right place.

Reviewed-by: Cornelia Huck <cohuck@redhat.com> # vfio-related parts
Acked-by: Logan Gunthorpe <logang@deltatee.com> # switchtec
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/ABI/removed/sysfs-class-rfkill       |    2 +-
 Documentation/ABI/stable/sysfs-class-rfkill        |    2 +-
 Documentation/ABI/testing/sysfs-class-switchtec    |    2 +-
 Documentation/EDID/howto.rst                       |   58 -
 Documentation/SM501.txt                            |   74 -
 Documentation/admin-guide/kernel-parameters.txt    |    2 +-
 .../admin-guide/laptops/thinkpad-acpi.rst          |    6 +-
 Documentation/bt8xxgpio.txt                        |   62 -
 Documentation/connector/connector.rst              |  156 --
 Documentation/console/console.rst                  |  152 --
 Documentation/dcdbas.txt                           |   99 --
 Documentation/dell_rbu.txt                         |  128 --
 Documentation/driver-api/bt8xxgpio.rst             |   62 +
 Documentation/driver-api/connector.rst             |  156 ++
 Documentation/driver-api/console.rst               |  152 ++
 Documentation/driver-api/dcdbas.rst                |   99 ++
 Documentation/driver-api/dell_rbu.rst              |  128 ++
 Documentation/driver-api/edid.rst                  |   58 +
 Documentation/driver-api/eisa.rst                  |  230 +++
 Documentation/driver-api/index.rst                 |   26 +
 Documentation/driver-api/isa.rst                   |  122 ++
 Documentation/driver-api/isapnp.rst                |   15 +
 Documentation/driver-api/lightnvm-pblk.rst         |   21 +
 Documentation/driver-api/men-chameleon-bus.rst     |  175 ++
 Documentation/driver-api/ntb.rst                   |  236 +++
 Documentation/driver-api/nvmem.rst                 |  189 ++
 Documentation/driver-api/parport-lowlevel.rst      | 1832 ++++++++++++++++++++
 Documentation/driver-api/pti_intel_mid.rst         |  106 ++
 Documentation/driver-api/pwm.rst                   |  165 ++
 Documentation/driver-api/rfkill.rst                |  132 ++
 Documentation/driver-api/sgi-ioc4.rst              |   49 +
 Documentation/driver-api/sm501.rst                 |   74 +
 Documentation/driver-api/smsc_ece1099.rst          |   60 +
 Documentation/driver-api/switchtec.rst             |  102 ++
 Documentation/driver-api/sync_file.rst             |   86 +
 Documentation/driver-api/vfio-mediated-device.rst  |  414 +++++
 Documentation/driver-api/vfio.rst                  |  520 ++++++
 Documentation/driver-api/xillybus.rst              |  379 ++++
 Documentation/driver-api/zorro.rst                 |  104 ++
 Documentation/eisa.txt                             |  230 ---
 Documentation/fb/fbcon.rst                         |    4 +-
 Documentation/isa.txt                              |  122 --
 Documentation/isapnp.txt                           |   15 -
 Documentation/lightnvm/pblk.txt                    |   21 -
 Documentation/men-chameleon-bus.txt                |  175 --
 Documentation/ntb.txt                              |  236 ---
 Documentation/nvmem/nvmem.rst                      |  189 --
 Documentation/parport-lowlevel.txt                 | 1832 --------------------
 Documentation/pti/pti_intel_mid.rst                |  106 --
 Documentation/pwm.txt                              |  165 --
 Documentation/rfkill.txt                           |  132 --
 Documentation/s390/vfio-ccw.rst                    |    6 +-
 Documentation/sgi-ioc4.txt                         |   49 -
 Documentation/smsc_ece1099.txt                     |   60 -
 Documentation/switchtec.txt                        |  102 --
 Documentation/sync_file.txt                        |   86 -
 Documentation/vfio-mediated-device.txt             |  414 -----
 Documentation/vfio.txt                             |  520 ------
 Documentation/w1/w1.netlink                        |    2 +-
 Documentation/xillybus.txt                         |  379 ----
 Documentation/zorro.txt                            |  104 --
 61 files changed, 5705 insertions(+), 5679 deletions(-)
 delete mode 100644 Documentation/EDID/howto.rst
 delete mode 100644 Documentation/SM501.txt
 delete mode 100644 Documentation/bt8xxgpio.txt
 delete mode 100644 Documentation/connector/connector.rst
 delete mode 100644 Documentation/console/console.rst
 delete mode 100644 Documentation/dcdbas.txt
 delete mode 100644 Documentation/dell_rbu.txt
 create mode 100644 Documentation/driver-api/bt8xxgpio.rst
 create mode 100644 Documentation/driver-api/connector.rst
 create mode 100644 Documentation/driver-api/console.rst
 create mode 100644 Documentation/driver-api/dcdbas.rst
 create mode 100644 Documentation/driver-api/dell_rbu.rst
 create mode 100644 Documentation/driver-api/edid.rst
 create mode 100644 Documentation/driver-api/eisa.rst
 create mode 100644 Documentation/driver-api/isa.rst
 create mode 100644 Documentation/driver-api/isapnp.rst
 create mode 100644 Documentation/driver-api/lightnvm-pblk.rst
 create mode 100644 Documentation/driver-api/men-chameleon-bus.rst
 create mode 100644 Documentation/driver-api/ntb.rst
 create mode 100644 Documentation/driver-api/nvmem.rst
 create mode 100644 Documentation/driver-api/parport-lowlevel.rst
 create mode 100644 Documentation/driver-api/pti_intel_mid.rst
 create mode 100644 Documentation/driver-api/pwm.rst
 create mode 100644 Documentation/driver-api/rfkill.rst
 create mode 100644 Documentation/driver-api/sgi-ioc4.rst
 create mode 100644 Documentation/driver-api/sm501.rst
 create mode 100644 Documentation/driver-api/smsc_ece1099.rst
 create mode 100644 Documentation/driver-api/switchtec.rst
 create mode 100644 Documentation/driver-api/sync_file.rst
 create mode 100644 Documentation/driver-api/vfio-mediated-device.rst
 create mode 100644 Documentation/driver-api/vfio.rst
 create mode 100644 Documentation/driver-api/xillybus.rst
 create mode 100644 Documentation/driver-api/zorro.rst
 delete mode 100644 Documentation/eisa.txt
 delete mode 100644 Documentation/isa.txt
 delete mode 100644 Documentation/isapnp.txt
 delete mode 100644 Documentation/lightnvm/pblk.txt
 delete mode 100644 Documentation/men-chameleon-bus.txt
 delete mode 100644 Documentation/ntb.txt
 delete mode 100644 Documentation/nvmem/nvmem.rst
 delete mode 100644 Documentation/parport-lowlevel.txt
 delete mode 100644 Documentation/pti/pti_intel_mid.rst
 delete mode 100644 Documentation/pwm.txt
 delete mode 100644 Documentation/rfkill.txt
 delete mode 100644 Documentation/sgi-ioc4.txt
 delete mode 100644 Documentation/smsc_ece1099.txt
 delete mode 100644 Documentation/switchtec.txt
 delete mode 100644 Documentation/sync_file.txt
 delete mode 100644 Documentation/vfio-mediated-device.txt
 delete mode 100644 Documentation/vfio.txt
 delete mode 100644 Documentation/xillybus.txt
 delete mode 100644 Documentation/zorro.txt

(limited to 'Documentation')

diff --git a/Documentation/ABI/removed/sysfs-class-rfkill b/Documentation/ABI/removed/sysfs-class-rfkill
index 3ce6231f20b2..9c08c7f98ffb 100644
--- a/Documentation/ABI/removed/sysfs-class-rfkill
+++ b/Documentation/ABI/removed/sysfs-class-rfkill
@@ -1,6 +1,6 @@
 rfkill - radio frequency (RF) connector kill switch support
 
-For details to this subsystem look at Documentation/rfkill.txt.
+For details to this subsystem look at Documentation/driver-api/rfkill.rst.
 
 What:		/sys/class/rfkill/rfkill[0-9]+/claim
 Date:		09-Jul-2007
diff --git a/Documentation/ABI/stable/sysfs-class-rfkill b/Documentation/ABI/stable/sysfs-class-rfkill
index 80151a409d67..5b154f922643 100644
--- a/Documentation/ABI/stable/sysfs-class-rfkill
+++ b/Documentation/ABI/stable/sysfs-class-rfkill
@@ -1,6 +1,6 @@
 rfkill - radio frequency (RF) connector kill switch support
 
-For details to this subsystem look at Documentation/rfkill.txt.
+For details to this subsystem look at Documentation/driver-api/rfkill.rst.
 
 For the deprecated /sys/class/rfkill/*/claim knobs of this interface look in
 Documentation/ABI/removed/sysfs-class-rfkill.
diff --git a/Documentation/ABI/testing/sysfs-class-switchtec b/Documentation/ABI/testing/sysfs-class-switchtec
index 48cb4c15e430..76c7a661a595 100644
--- a/Documentation/ABI/testing/sysfs-class-switchtec
+++ b/Documentation/ABI/testing/sysfs-class-switchtec
@@ -1,6 +1,6 @@
 switchtec - Microsemi Switchtec PCI Switch Management Endpoint
 
-For details on this subsystem look at Documentation/switchtec.txt.
+For details on this subsystem look at Documentation/driver-api/switchtec.rst.
 
 What: 		/sys/class/switchtec
 Date:		05-Jan-2017
diff --git a/Documentation/EDID/howto.rst b/Documentation/EDID/howto.rst
deleted file mode 100644
index 725fd49a88ca..000000000000
--- a/Documentation/EDID/howto.rst
+++ /dev/null
@@ -1,58 +0,0 @@
-:orphan:
-
-====
-EDID
-====
-
-In the good old days when graphics parameters were configured explicitly
-in a file called xorg.conf, even broken hardware could be managed.
-
-Today, with the advent of Kernel Mode Setting, a graphics board is
-either correctly working because all components follow the standards -
-or the computer is unusable, because the screen remains dark after
-booting or it displays the wrong area. Cases when this happens are:
-- The graphics board does not recognize the monitor.
-- The graphics board is unable to detect any EDID data.
-- The graphics board incorrectly forwards EDID data to the driver.
-- The monitor sends no or bogus EDID data.
-- A KVM sends its own EDID data instead of querying the connected monitor.
-Adding the kernel parameter "nomodeset" helps in most cases, but causes
-restrictions later on.
-
-As a remedy for such situations, the kernel configuration item
-CONFIG_DRM_LOAD_EDID_FIRMWARE was introduced. It allows to provide an
-individually prepared or corrected EDID data set in the /lib/firmware
-directory from where it is loaded via the firmware interface. The code
-(see drivers/gpu/drm/drm_edid_load.c) contains built-in data sets for
-commonly used screen resolutions (800x600, 1024x768, 1280x1024, 1600x1200,
-1680x1050, 1920x1080) as binary blobs, but the kernel source tree does
-not contain code to create these data. In order to elucidate the origin
-of the built-in binary EDID blobs and to facilitate the creation of
-individual data for a specific misbehaving monitor, commented sources
-and a Makefile environment are given here.
-
-To create binary EDID and C source code files from the existing data
-material, simply type "make".
-
-If you want to create your own EDID file, copy the file 1024x768.S,
-replace the settings with your own data and add a new target to the
-Makefile. Please note that the EDID data structure expects the timing
-values in a different way as compared to the standard X11 format.
-
-X11:
-  HTimings:
-    hdisp hsyncstart hsyncend htotal
-  VTimings:
-    vdisp vsyncstart vsyncend vtotal
-
-EDID::
-
-  #define XPIX hdisp
-  #define XBLANK htotal-hdisp
-  #define XOFFSET hsyncstart-hdisp
-  #define XPULSE hsyncend-hsyncstart
-
-  #define YPIX vdisp
-  #define YBLANK vtotal-vdisp
-  #define YOFFSET vsyncstart-vdisp
-  #define YPULSE vsyncend-vsyncstart
diff --git a/Documentation/SM501.txt b/Documentation/SM501.txt
deleted file mode 100644
index 882507453ba4..000000000000
--- a/Documentation/SM501.txt
+++ /dev/null
@@ -1,74 +0,0 @@
-.. include:: <isonum.txt>
-
-============
-SM501 Driver
-============
-
-:Copyright: |copy| 2006, 2007 Simtec Electronics
-
-The Silicon Motion SM501 multimedia companion chip is a multifunction device
-which may provide numerous interfaces including USB host controller USB gadget,
-asynchronous serial ports, audio functions, and a dual display video interface.
-The device may be connected by PCI or local bus with varying functions enabled.
-
-Core
-----
-
-The core driver in drivers/mfd provides common services for the
-drivers which manage the specific hardware blocks. These services
-include locking for common registers, clock control and resource
-management.
-
-The core registers drivers for both PCI and generic bus based
-chips via the platform device and driver system.
-
-On detection of a device, the core initialises the chip (which may
-be specified by the platform data) and then exports the selected
-peripheral set as platform devices for the specific drivers.
-
-The core re-uses the platform device system as the platform device
-system provides enough features to support the drivers without the
-need to create a new bus-type and the associated code to go with it.
-
-
-Resources
----------
-
-Each peripheral has a view of the device which is implicitly narrowed to
-the specific set of resources that peripheral requires in order to
-function correctly.
-
-The centralised memory allocation allows the driver to ensure that the
-maximum possible resource allocation can be made to the video subsystem
-as this is by-far the most resource-sensitive of the on-chip functions.
-
-The primary issue with memory allocation is that of moving the video
-buffers once a display mode is chosen. Indeed when a video mode change
-occurs the memory footprint of the video subsystem changes.
-
-Since video memory is difficult to move without changing the display
-(unless sufficient contiguous memory can be provided for the old and new
-modes simultaneously) the video driver fully utilises the memory area
-given to it by aligning fb0 to the start of the area and fb1 to the end
-of it. Any memory left over in the middle is used for the acceleration
-functions, which are transient and thus their location is less critical
-as it can be moved.
-
-
-Configuration
--------------
-
-The platform device driver uses a set of platform data to pass
-configurations through to the core and the subsidiary drivers
-so that there can be support for more than one system carrying
-an SM501 built into a single kernel image.
-
-The PCI driver assumes that the PCI card behaves as per the Silicon
-Motion reference design.
-
-There is an errata (AB-5) affecting the selection of the
-of the M1XCLK and M1CLK frequencies. These two clocks
-must be sourced from the same PLL, although they can then
-be divided down individually. If this is not set, then SM501 may
-lock and hang the whole system. The driver will refuse to
-attach if the PLL selection is different.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 19b1e3bef56c..04f7b537ee51 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -930,7 +930,7 @@
 			edid/1680x1050.bin, or edid/1920x1080.bin is given
 			and no file with the same name exists. Details and
 			instructions how to build your own EDID data are
-			available in Documentation/EDID/howto.rst. An EDID
+			available in Documentation/driver-api/edid.rst. An EDID
 			data set will only be used for a particular connector,
 			if its name and a colon are prepended to the EDID
 			name. Each connector may use a unique EDID data
diff --git a/Documentation/admin-guide/laptops/thinkpad-acpi.rst b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
index 19d52fc3c5e9..adea0bf2acc5 100644
--- a/Documentation/admin-guide/laptops/thinkpad-acpi.rst
+++ b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
@@ -643,7 +643,7 @@ Sysfs notes
 	2010.
 
 	rfkill controller switch "tpacpi_bluetooth_sw": refer to
-	Documentation/rfkill.txt for details.
+	Documentation/driver-api/rfkill.rst for details.
 
 
 Video output control -- /proc/acpi/ibm/video
@@ -1406,7 +1406,7 @@ Sysfs notes
 	2010.
 
 	rfkill controller switch "tpacpi_wwan_sw": refer to
-	Documentation/rfkill.txt for details.
+	Documentation/driver-api/rfkill.rst for details.
 
 
 EXPERIMENTAL: UWB
@@ -1426,7 +1426,7 @@ Sysfs notes
 ^^^^^^^^^^^
 
 	rfkill controller switch "tpacpi_uwb_sw": refer to
-	Documentation/rfkill.txt for details.
+	Documentation/driver-api/rfkill.rst for details.
 
 Adaptive keyboard
 -----------------
diff --git a/Documentation/bt8xxgpio.txt b/Documentation/bt8xxgpio.txt
deleted file mode 100644
index a845feb074de..000000000000
--- a/Documentation/bt8xxgpio.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-===================================================================
-A driver for a selfmade cheap BT8xx based PCI GPIO-card (bt8xxgpio)
-===================================================================
-
-For advanced documentation, see http://www.bu3sch.de/btgpio.php
-
-A generic digital 24-port PCI GPIO card can be built out of an ordinary
-Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The
-Brooktree chip is used in old analog Hauppauge WinTV PCI cards. You can easily
-find them used for low prices on the net.
-
-The bt8xx chip does have 24 digital GPIO ports.
-These ports are accessible via 24 pins on the SMD chip package.
-
-
-How to physically access the GPIO pins
-======================================
-
-The are several ways to access these pins. One might unsolder the whole chip
-and put it on a custom PCI board, or one might only unsolder each individual
-GPIO pin and solder that to some tiny wire. As the chip package really is tiny
-there are some advanced soldering skills needed in any case.
-
-The physical pinouts are drawn in the following ASCII art.
-The GPIO pins are marked with G00-G23::
-
-                                           G G G G G G G G G G G G     G G G G G G
-                                           0 0 0 0 0 0 0 0 0 0 1 1     1 1 1 1 1 1
-                                           0 1 2 3 4 5 6 7 8 9 0 1     2 3 4 5 6 7
-           | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
-           ---------------------------------------------------------------------------
-         --|                               ^                                     ^   |--
-         --|                               pin 86                           pin 67   |--
-         --|                                                                         |--
-         --|                                                               pin 61 >  |-- G18
-         --|                                                                         |-- G19
-         --|                                                                         |-- G20
-         --|                                                                         |-- G21
-         --|                                                                         |-- G22
-         --|                                                               pin 56 >  |-- G23
-         --|                                                                         |--
-         --|                           Brooktree 878/879                             |--
-         --|                                                                         |--
-         --|                                                                         |--
-         --|                                                                         |--
-         --|                                                                         |--
-         --|                                                                         |--
-         --|                                                                         |--
-         --|                                                                         |--
-         --|                                                                         |--
-         --|                                                                         |--
-         --|                                                                         |--
-         --|                                                                         |--
-         --|                                                                         |--
-         --|                                                                         |--
-         --|   O                                                                     |--
-         --|                                                                         |--
-           ---------------------------------------------------------------------------
-           | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
-           ^
-           This is pin 1
-
diff --git a/Documentation/connector/connector.rst b/Documentation/connector/connector.rst
deleted file mode 100644
index 24e26dc22dbf..000000000000
--- a/Documentation/connector/connector.rst
+++ /dev/null
@@ -1,156 +0,0 @@
-:orphan:
-
-================
-Kernel Connector
-================
-
-Kernel connector - new netlink based userspace <-> kernel space easy
-to use communication module.
-
-The Connector driver makes it easy to connect various agents using a
-netlink based network.  One must register a callback and an identifier.
-When the driver receives a special netlink message with the appropriate
-identifier, the appropriate callback will be called.
-
-From the userspace point of view it's quite straightforward:
-
-	- socket();
-	- bind();
-	- send();
-	- recv();
-
-But if kernelspace wants to use the full power of such connections, the
-driver writer must create special sockets, must know about struct sk_buff
-handling, etc...  The Connector driver allows any kernelspace agents to use
-netlink based networking for inter-process communication in a significantly
-easier way::
-
-  int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
-  void cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __group, int gfp_mask);
-  void cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, int gfp_mask);
-
-  struct cb_id
-  {
-	__u32			idx;
-	__u32			val;
-  };
-
-idx and val are unique identifiers which must be registered in the
-connector.h header for in-kernel usage.  `void (*callback) (void *)` is a
-callback function which will be called when a message with above idx.val
-is received by the connector core.  The argument for that function must
-be dereferenced to `struct cn_msg *`::
-
-  struct cn_msg
-  {
-	struct cb_id		id;
-
-	__u32			seq;
-	__u32			ack;
-
-	__u32			len;	/* Length of the following data */
-	__u8			data[0];
-  };
-
-Connector interfaces
-====================
-
- .. kernel-doc:: include/linux/connector.h
-
- Note:
-   When registering new callback user, connector core assigns
-   netlink group to the user which is equal to its id.idx.
-
-Protocol description
-====================
-
-The current framework offers a transport layer with fixed headers.  The
-recommended protocol which uses such a header is as following:
-
-msg->seq and msg->ack are used to determine message genealogy.  When
-someone sends a message, they use a locally unique sequence and random
-acknowledge number.  The sequence number may be copied into
-nlmsghdr->nlmsg_seq too.
-
-The sequence number is incremented with each message sent.
-
-If you expect a reply to the message, then the sequence number in the
-received message MUST be the same as in the original message, and the
-acknowledge number MUST be the same + 1.
-
-If we receive a message and its sequence number is not equal to one we
-are expecting, then it is a new message.  If we receive a message and
-its sequence number is the same as one we are expecting, but its
-acknowledge is not equal to the sequence number in the original
-message + 1, then it is a new message.
-
-Obviously, the protocol header contains the above id.
-
-The connector allows event notification in the following form: kernel
-driver or userspace process can ask connector to notify it when
-selected ids will be turned on or off (registered or unregistered its
-callback).  It is done by sending a special command to the connector
-driver (it also registers itself with id={-1, -1}).
-
-As example of this usage can be found in the cn_test.c module which
-uses the connector to request notification and to send messages.
-
-Reliability
-===========
-
-Netlink itself is not a reliable protocol.  That means that messages can
-be lost due to memory pressure or process' receiving queue overflowed,
-so caller is warned that it must be prepared.  That is why the struct
-cn_msg [main connector's message header] contains u32 seq and u32 ack
-fields.
-
-Userspace usage
-===============
-
-2.6.14 has a new netlink socket implementation, which by default does not
-allow people to send data to netlink groups other than 1.
-So, if you wish to use a netlink socket (for example using connector)
-with a different group number, the userspace application must subscribe to
-that group first.  It can be achieved by the following pseudocode::
-
-  s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
-
-  l_local.nl_family = AF_NETLINK;
-  l_local.nl_groups = 12345;
-  l_local.nl_pid = 0;
-
-  if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
-	perror("bind");
-	close(s);
-	return -1;
-  }
-
-  {
-	int on = l_local.nl_groups;
-	setsockopt(s, 270, 1, &on, sizeof(on));
-  }
-
-Where 270 above is SOL_NETLINK, and 1 is a NETLINK_ADD_MEMBERSHIP socket
-option.  To drop a multicast subscription, one should call the above socket
-option with the NETLINK_DROP_MEMBERSHIP parameter which is defined as 0.
-
-2.6.14 netlink code only allows to select a group which is less or equal to
-the maximum group number, which is used at netlink_kernel_create() time.
-In case of connector it is CN_NETLINK_USERS + 0xf, so if you want to use
-group number 12345, you must increment CN_NETLINK_USERS to that number.
-Additional 0xf numbers are allocated to be used by non-in-kernel users.
-
-Due to this limitation, group 0xffffffff does not work now, so one can
-not use add/remove connector's group notifications, but as far as I know,
-only cn_test.c test module used it.
-
-Some work in netlink area is still being done, so things can be changed in
-2.6.15 timeframe, if it will happen, documentation will be updated for that
-kernel.
-
-Code samples
-============
-
-Sample code for a connector test module and user space can be found
-in samples/connector/. To build this code, enable CONFIG_CONNECTOR
-and CONFIG_SAMPLES.
diff --git a/Documentation/console/console.rst b/Documentation/console/console.rst
deleted file mode 100644
index b374141b027e..000000000000
--- a/Documentation/console/console.rst
+++ /dev/null
@@ -1,152 +0,0 @@
-:orphan:
-
-===============
-Console Drivers
-===============
-
-The Linux kernel has 2 general types of console drivers.  The first type is
-assigned by the kernel to all the virtual consoles during the boot process.
-This type will be called 'system driver', and only one system driver is allowed
-to exist. The system driver is persistent and it can never be unloaded, though
-it may become inactive.
-
-The second type has to be explicitly loaded and unloaded. This will be called
-'modular driver' by this document. Multiple modular drivers can coexist at
-any time with each driver sharing the console with other drivers including
-the system driver. However, modular drivers cannot take over the console
-that is currently occupied by another modular driver. (Exception: Drivers that
-call do_take_over_console() will succeed in the takeover regardless of the type
-of driver occupying the consoles.) They can only take over the console that is
-occupied by the system driver. In the same token, if the modular driver is
-released by the console, the system driver will take over.
-
-Modular drivers, from the programmer's point of view, have to call::
-
-	 do_take_over_console() - load and bind driver to console layer
-	 give_up_console() - unload driver; it will only work if driver
-			     is fully unbound
-
-In newer kernels, the following are also available::
-
-	 do_register_con_driver()
-	 do_unregister_con_driver()
-
-If sysfs is enabled, the contents of /sys/class/vtconsole can be
-examined. This shows the console backends currently registered by the
-system which are named vtcon<n> where <n> is an integer from 0 to 15.
-Thus::
-
-       ls /sys/class/vtconsole
-       .  ..  vtcon0  vtcon1
-
-Each directory in /sys/class/vtconsole has 3 files::
-
-     ls /sys/class/vtconsole/vtcon0
-     .  ..  bind  name  uevent
-
-What do these files signify?
-
-     1. bind - this is a read/write file. It shows the status of the driver if
-        read, or acts to bind or unbind the driver to the virtual consoles
-        when written to. The possible values are:
-
-	0
-	  - means the driver is not bound and if echo'ed, commands the driver
-	    to unbind
-
-        1
-	  - means the driver is bound and if echo'ed, commands the driver to
-	    bind
-
-     2. name - read-only file. Shows the name of the driver in this format::
-
-	  cat /sys/class/vtconsole/vtcon0/name
-	  (S) VGA+
-
-	      '(S)' stands for a (S)ystem driver, i.e., it cannot be directly
-	      commanded to bind or unbind
-
-	      'VGA+' is the name of the driver
-
-	  cat /sys/class/vtconsole/vtcon1/name
-	  (M) frame buffer device
-
-	      In this case, '(M)' stands for a (M)odular driver, one that can be
-	      directly commanded to bind or unbind.
-
-     3. uevent - ignore this file
-
-When unbinding, the modular driver is detached first, and then the system
-driver takes over the consoles vacated by the driver. Binding, on the other
-hand, will bind the driver to the consoles that are currently occupied by a
-system driver.
-
-NOTE1:
-  Binding and unbinding must be selected in Kconfig. It's under::
-
-    Device Drivers ->
-	Character devices ->
-		Support for binding and unbinding console drivers
-
-NOTE2:
-  If any of the virtual consoles are in KD_GRAPHICS mode, then binding or
-  unbinding will not succeed. An example of an application that sets the
-  console to KD_GRAPHICS is X.
-
-How useful is this feature? This is very useful for console driver
-developers. By unbinding the driver from the console layer, one can unload the
-driver, make changes, recompile, reload and rebind the driver without any need
-for rebooting the kernel. For regular users who may want to switch from
-framebuffer console to VGA console and vice versa, this feature also makes
-this possible. (NOTE NOTE NOTE: Please read fbcon.txt under Documentation/fb
-for more details.)
-
-Notes for developers
-====================
-
-do_take_over_console() is now broken up into::
-
-     do_register_con_driver()
-     do_bind_con_driver() - private function
-
-give_up_console() is a wrapper to do_unregister_con_driver(), and a driver must
-be fully unbound for this call to succeed. con_is_bound() will check if the
-driver is bound or not.
-
-Guidelines for console driver writers
-=====================================
-
-In order for binding to and unbinding from the console to properly work,
-console drivers must follow these guidelines:
-
-1. All drivers, except system drivers, must call either do_register_con_driver()
-   or do_take_over_console(). do_register_con_driver() will just add the driver
-   to the console's internal list. It won't take over the
-   console. do_take_over_console(), as it name implies, will also take over (or
-   bind to) the console.
-
-2. All resources allocated during con->con_init() must be released in
-   con->con_deinit().
-
-3. All resources allocated in con->con_startup() must be released when the
-   driver, which was previously bound, becomes unbound.  The console layer
-   does not have a complementary call to con->con_startup() so it's up to the
-   driver to check when it's legal to release these resources. Calling
-   con_is_bound() in con->con_deinit() will help.  If the call returned
-   false(), then it's safe to release the resources.  This balance has to be
-   ensured because con->con_startup() can be called again when a request to
-   rebind the driver to the console arrives.
-
-4. Upon exit of the driver, ensure that the driver is totally unbound. If the
-   condition is satisfied, then the driver must call do_unregister_con_driver()
-   or give_up_console().
-
-5. do_unregister_con_driver() can also be called on conditions which make it
-   impossible for the driver to service console requests.  This can happen
-   with the framebuffer console that suddenly lost all of its drivers.
-
-The current crop of console drivers should still work correctly, but binding
-and unbinding them may cause problems. With minimal fixes, these drivers can
-be made to work correctly.
-
-Antonino Daplas <adaplas@pol.net>
diff --git a/Documentation/dcdbas.txt b/Documentation/dcdbas.txt
deleted file mode 100644
index 309cc57a7c1c..000000000000
--- a/Documentation/dcdbas.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-===================================
-Dell Systems Management Base Driver
-===================================
-
-Overview
-========
-
-The Dell Systems Management Base Driver provides a sysfs interface for
-systems management software such as Dell OpenManage to perform system
-management interrupts and host control actions (system power cycle or
-power off after OS shutdown) on certain Dell systems.
-
-Dell OpenManage requires this driver on the following Dell PowerEdge systems:
-300, 1300, 1400, 400SC, 500SC, 1500SC, 1550, 600SC, 1600SC, 650, 1655MC,
-700, and 750.  Other Dell software such as the open source libsmbios project
-is expected to make use of this driver, and it may include the use of this
-driver on other Dell systems.
-
-The Dell libsmbios project aims towards providing access to as much BIOS
-information as possible.  See http://linux.dell.com/libsmbios/main/ for
-more information about the libsmbios project.
-
-
-System Management Interrupt
-===========================
-
-On some Dell systems, systems management software must access certain
-management information via a system management interrupt (SMI).  The SMI data
-buffer must reside in 32-bit address space, and the physical address of the
-buffer is required for the SMI.  The driver maintains the memory required for
-the SMI and provides a way for the application to generate the SMI.
-The driver creates the following sysfs entries for systems management
-software to perform these system management interrupts::
-
-	/sys/devices/platform/dcdbas/smi_data
-	/sys/devices/platform/dcdbas/smi_data_buf_phys_addr
-	/sys/devices/platform/dcdbas/smi_data_buf_size
-	/sys/devices/platform/dcdbas/smi_request
-
-Systems management software must perform the following steps to execute
-a SMI using this driver:
-
-1) Lock smi_data.
-2) Write system management command to smi_data.
-3) Write "1" to smi_request to generate a calling interface SMI or
-   "2" to generate a raw SMI.
-4) Read system management command response from smi_data.
-5) Unlock smi_data.
-
-
-Host Control Action
-===================
-
-Dell OpenManage supports a host control feature that allows the administrator
-to perform a power cycle or power off of the system after the OS has finished
-shutting down.  On some Dell systems, this host control feature requires that
-a driver perform a SMI after the OS has finished shutting down.
-
-The driver creates the following sysfs entries for systems management software
-to schedule the driver to perform a power cycle or power off host control
-action after the system has finished shutting down:
-
-/sys/devices/platform/dcdbas/host_control_action
-/sys/devices/platform/dcdbas/host_control_smi_type
-/sys/devices/platform/dcdbas/host_control_on_shutdown
-
-Dell OpenManage performs the following steps to execute a power cycle or
-power off host control action using this driver:
-
-1) Write host control action to be performed to host_control_action.
-2) Write type of SMI that driver needs to perform to host_control_smi_type.
-3) Write "1" to host_control_on_shutdown to enable host control action.
-4) Initiate OS shutdown.
-   (Driver will perform host control SMI when it is notified that the OS
-   has finished shutting down.)
-
-
-Host Control SMI Type
-=====================
-
-The following table shows the value to write to host_control_smi_type to
-perform a power cycle or power off host control action:
-
-=================== =====================
-PowerEdge System    Host Control SMI Type
-=================== =====================
-      300             HC_SMITYPE_TYPE1
-     1300             HC_SMITYPE_TYPE1
-     1400             HC_SMITYPE_TYPE2
-      500SC           HC_SMITYPE_TYPE2
-     1500SC           HC_SMITYPE_TYPE2
-     1550             HC_SMITYPE_TYPE2
-      600SC           HC_SMITYPE_TYPE2
-     1600SC           HC_SMITYPE_TYPE2
-      650             HC_SMITYPE_TYPE2
-     1655MC           HC_SMITYPE_TYPE2
-      700             HC_SMITYPE_TYPE3
-      750             HC_SMITYPE_TYPE3
-=================== =====================
diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt
deleted file mode 100644
index 5d1ce7bcd04d..000000000000
--- a/Documentation/dell_rbu.txt
+++ /dev/null
@@ -1,128 +0,0 @@
-=============================================================
-Usage of the new open sourced rbu (Remote BIOS Update) driver
-=============================================================
-
-Purpose
-=======
-
-Document demonstrating the use of the Dell Remote BIOS Update driver.
-for updating BIOS images on Dell servers and desktops.
-
-Scope
-=====
-
-This document discusses the functionality of the rbu driver only.
-It does not cover the support needed from applications to enable the BIOS to
-update itself with the image downloaded in to the memory.
-
-Overview
-========
-
-This driver works with Dell OpenManage or Dell Update Packages for updating
-the BIOS on Dell servers (starting from servers sold since 1999), desktops
-and notebooks (starting from those sold in 2005).
-
-Please go to  http://support.dell.com register and you can find info on
-OpenManage and Dell Update packages (DUP).
-
-Libsmbios can also be used to update BIOS on Dell systems go to
-http://linux.dell.com/libsmbios/ for details.
-
-Dell_RBU driver supports BIOS update using the monolithic image and packetized
-image methods. In case of monolithic the driver allocates a contiguous chunk
-of physical pages having the BIOS image. In case of packetized the app
-using the driver breaks the image in to packets of fixed sizes and the driver
-would place each packet in contiguous physical memory. The driver also
-maintains a link list of packets for reading them back.
-
-If the dell_rbu driver is unloaded all the allocated memory is freed.
-
-The rbu driver needs to have an application (as mentioned above)which will
-inform the BIOS to enable the update in the next system reboot.
-
-The user should not unload the rbu driver after downloading the BIOS image
-or updating.
-
-The driver load creates the following directories under the /sys file system::
-
-	/sys/class/firmware/dell_rbu/loading
-	/sys/class/firmware/dell_rbu/data
-	/sys/devices/platform/dell_rbu/image_type
-	/sys/devices/platform/dell_rbu/data
-	/sys/devices/platform/dell_rbu/packet_size
-
-The driver supports two types of update mechanism; monolithic and packetized.
-These update mechanism depends upon the BIOS currently running on the system.
-Most of the Dell systems support a monolithic update where the BIOS image is
-copied to a single contiguous block of physical memory.
-
-In case of packet mechanism the single memory can be broken in smaller chunks
-of contiguous memory and the BIOS image is scattered in these packets.
-
-By default the driver uses monolithic memory for the update type. This can be
-changed to packets during the driver load time by specifying the load
-parameter image_type=packet.  This can also be changed later as below::
-
-	echo packet > /sys/devices/platform/dell_rbu/image_type
-
-In packet update mode the packet size has to be given before any packets can
-be downloaded. It is done as below::
-
-	echo XXXX > /sys/devices/platform/dell_rbu/packet_size
-
-In the packet update mechanism, the user needs to create a new file having
-packets of data arranged back to back. It can be done as follows
-The user creates packets header, gets the chunk of the BIOS image and
-places it next to the packetheader; now, the packetheader + BIOS image chunk
-added together should match the specified packet_size. This makes one
-packet, the user needs to create more such packets out of the entire BIOS
-image file and then arrange all these packets back to back in to one single
-file.
-
-This file is then copied to /sys/class/firmware/dell_rbu/data.
-Once this file gets to the driver, the driver extracts packet_size data from
-the file and spreads it across the physical memory in contiguous packet_sized
-space.
-
-This method makes sure that all the packets get to the driver in a single operation.
-
-In monolithic update the user simply get the BIOS image (.hdr file) and copies
-to the data file as is without any change to the BIOS image itself.
-
-Do the steps below to download the BIOS image.
-
-1) echo 1 > /sys/class/firmware/dell_rbu/loading
-2) cp bios_image.hdr /sys/class/firmware/dell_rbu/data
-3) echo 0 > /sys/class/firmware/dell_rbu/loading
-
-The /sys/class/firmware/dell_rbu/ entries will remain till the following is
-done.
-
-::
-
-	echo -1 > /sys/class/firmware/dell_rbu/loading
-
-Until this step is completed the driver cannot be unloaded.
-
-Also echoing either mono, packet or init in to image_type will free up the
-memory allocated by the driver.
-
-If a user by accident executes steps 1 and 3 above without executing step 2;
-it will make the /sys/class/firmware/dell_rbu/ entries disappear.
-
-The entries can be recreated by doing the following::
-
-	echo init > /sys/devices/platform/dell_rbu/image_type
-
-.. note:: echoing init in image_type does not change it original value.
-
-Also the driver provides /sys/devices/platform/dell_rbu/data readonly file to
-read back the image downloaded.
-
-.. note::
-
-   After updating the BIOS image a user mode application needs to execute
-   code which sends the BIOS update request to the BIOS. So on the next reboot
-   the BIOS knows about the new image downloaded and it updates itself.
-   Also don't unload the rbu driver if the image has to be updated.
-
diff --git a/Documentation/driver-api/bt8xxgpio.rst b/Documentation/driver-api/bt8xxgpio.rst
new file mode 100644
index 000000000000..a845feb074de
--- /dev/null
+++ b/Documentation/driver-api/bt8xxgpio.rst
@@ -0,0 +1,62 @@
+===================================================================
+A driver for a selfmade cheap BT8xx based PCI GPIO-card (bt8xxgpio)
+===================================================================
+
+For advanced documentation, see http://www.bu3sch.de/btgpio.php
+
+A generic digital 24-port PCI GPIO card can be built out of an ordinary
+Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The
+Brooktree chip is used in old analog Hauppauge WinTV PCI cards. You can easily
+find them used for low prices on the net.
+
+The bt8xx chip does have 24 digital GPIO ports.
+These ports are accessible via 24 pins on the SMD chip package.
+
+
+How to physically access the GPIO pins
+======================================
+
+The are several ways to access these pins. One might unsolder the whole chip
+and put it on a custom PCI board, or one might only unsolder each individual
+GPIO pin and solder that to some tiny wire. As the chip package really is tiny
+there are some advanced soldering skills needed in any case.
+
+The physical pinouts are drawn in the following ASCII art.
+The GPIO pins are marked with G00-G23::
+
+                                           G G G G G G G G G G G G     G G G G G G
+                                           0 0 0 0 0 0 0 0 0 0 1 1     1 1 1 1 1 1
+                                           0 1 2 3 4 5 6 7 8 9 0 1     2 3 4 5 6 7
+           | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+           ---------------------------------------------------------------------------
+         --|                               ^                                     ^   |--
+         --|                               pin 86                           pin 67   |--
+         --|                                                                         |--
+         --|                                                               pin 61 >  |-- G18
+         --|                                                                         |-- G19
+         --|                                                                         |-- G20
+         --|                                                                         |-- G21
+         --|                                                                         |-- G22
+         --|                                                               pin 56 >  |-- G23
+         --|                                                                         |--
+         --|                           Brooktree 878/879                             |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|   O                                                                     |--
+         --|                                                                         |--
+           ---------------------------------------------------------------------------
+           | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+           ^
+           This is pin 1
+
diff --git a/Documentation/driver-api/connector.rst b/Documentation/driver-api/connector.rst
new file mode 100644
index 000000000000..c100c7482289
--- /dev/null
+++ b/Documentation/driver-api/connector.rst
@@ -0,0 +1,156 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+Kernel Connector
+================
+
+Kernel connector - new netlink based userspace <-> kernel space easy
+to use communication module.
+
+The Connector driver makes it easy to connect various agents using a
+netlink based network.  One must register a callback and an identifier.
+When the driver receives a special netlink message with the appropriate
+identifier, the appropriate callback will be called.
+
+From the userspace point of view it's quite straightforward:
+
+	- socket();
+	- bind();
+	- send();
+	- recv();
+
+But if kernelspace wants to use the full power of such connections, the
+driver writer must create special sockets, must know about struct sk_buff
+handling, etc...  The Connector driver allows any kernelspace agents to use
+netlink based networking for inter-process communication in a significantly
+easier way::
+
+  int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
+  void cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __group, int gfp_mask);
+  void cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, int gfp_mask);
+
+  struct cb_id
+  {
+	__u32			idx;
+	__u32			val;
+  };
+
+idx and val are unique identifiers which must be registered in the
+connector.h header for in-kernel usage.  `void (*callback) (void *)` is a
+callback function which will be called when a message with above idx.val
+is received by the connector core.  The argument for that function must
+be dereferenced to `struct cn_msg *`::
+
+  struct cn_msg
+  {
+	struct cb_id		id;
+
+	__u32			seq;
+	__u32			ack;
+
+	__u32			len;	/* Length of the following data */
+	__u8			data[0];
+  };
+
+Connector interfaces
+====================
+
+ .. kernel-doc:: include/linux/connector.h
+
+ Note:
+   When registering new callback user, connector core assigns
+   netlink group to the user which is equal to its id.idx.
+
+Protocol description
+====================
+
+The current framework offers a transport layer with fixed headers.  The
+recommended protocol which uses such a header is as following:
+
+msg->seq and msg->ack are used to determine message genealogy.  When
+someone sends a message, they use a locally unique sequence and random
+acknowledge number.  The sequence number may be copied into
+nlmsghdr->nlmsg_seq too.
+
+The sequence number is incremented with each message sent.
+
+If you expect a reply to the message, then the sequence number in the
+received message MUST be the same as in the original message, and the
+acknowledge number MUST be the same + 1.
+
+If we receive a message and its sequence number is not equal to one we
+are expecting, then it is a new message.  If we receive a message and
+its sequence number is the same as one we are expecting, but its
+acknowledge is not equal to the sequence number in the original
+message + 1, then it is a new message.
+
+Obviously, the protocol header contains the above id.
+
+The connector allows event notification in the following form: kernel
+driver or userspace process can ask connector to notify it when
+selected ids will be turned on or off (registered or unregistered its
+callback).  It is done by sending a special command to the connector
+driver (it also registers itself with id={-1, -1}).
+
+As example of this usage can be found in the cn_test.c module which
+uses the connector to request notification and to send messages.
+
+Reliability
+===========
+
+Netlink itself is not a reliable protocol.  That means that messages can
+be lost due to memory pressure or process' receiving queue overflowed,
+so caller is warned that it must be prepared.  That is why the struct
+cn_msg [main connector's message header] contains u32 seq and u32 ack
+fields.
+
+Userspace usage
+===============
+
+2.6.14 has a new netlink socket implementation, which by default does not
+allow people to send data to netlink groups other than 1.
+So, if you wish to use a netlink socket (for example using connector)
+with a different group number, the userspace application must subscribe to
+that group first.  It can be achieved by the following pseudocode::
+
+  s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+
+  l_local.nl_family = AF_NETLINK;
+  l_local.nl_groups = 12345;
+  l_local.nl_pid = 0;
+
+  if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
+	perror("bind");
+	close(s);
+	return -1;
+  }
+
+  {
+	int on = l_local.nl_groups;
+	setsockopt(s, 270, 1, &on, sizeof(on));
+  }
+
+Where 270 above is SOL_NETLINK, and 1 is a NETLINK_ADD_MEMBERSHIP socket
+option.  To drop a multicast subscription, one should call the above socket
+option with the NETLINK_DROP_MEMBERSHIP parameter which is defined as 0.
+
+2.6.14 netlink code only allows to select a group which is less or equal to
+the maximum group number, which is used at netlink_kernel_create() time.
+In case of connector it is CN_NETLINK_USERS + 0xf, so if you want to use
+group number 12345, you must increment CN_NETLINK_USERS to that number.
+Additional 0xf numbers are allocated to be used by non-in-kernel users.
+
+Due to this limitation, group 0xffffffff does not work now, so one can
+not use add/remove connector's group notifications, but as far as I know,
+only cn_test.c test module used it.
+
+Some work in netlink area is still being done, so things can be changed in
+2.6.15 timeframe, if it will happen, documentation will be updated for that
+kernel.
+
+Code samples
+============
+
+Sample code for a connector test module and user space can be found
+in samples/connector/. To build this code, enable CONFIG_CONNECTOR
+and CONFIG_SAMPLES.
diff --git a/Documentation/driver-api/console.rst b/Documentation/driver-api/console.rst
new file mode 100644
index 000000000000..8394ad7747ac
--- /dev/null
+++ b/Documentation/driver-api/console.rst
@@ -0,0 +1,152 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Console Drivers
+===============
+
+The Linux kernel has 2 general types of console drivers.  The first type is
+assigned by the kernel to all the virtual consoles during the boot process.
+This type will be called 'system driver', and only one system driver is allowed
+to exist. The system driver is persistent and it can never be unloaded, though
+it may become inactive.
+
+The second type has to be explicitly loaded and unloaded. This will be called
+'modular driver' by this document. Multiple modular drivers can coexist at
+any time with each driver sharing the console with other drivers including
+the system driver. However, modular drivers cannot take over the console
+that is currently occupied by another modular driver. (Exception: Drivers that
+call do_take_over_console() will succeed in the takeover regardless of the type
+of driver occupying the consoles.) They can only take over the console that is
+occupied by the system driver. In the same token, if the modular driver is
+released by the console, the system driver will take over.
+
+Modular drivers, from the programmer's point of view, have to call::
+
+	 do_take_over_console() - load and bind driver to console layer
+	 give_up_console() - unload driver; it will only work if driver
+			     is fully unbound
+
+In newer kernels, the following are also available::
+
+	 do_register_con_driver()
+	 do_unregister_con_driver()
+
+If sysfs is enabled, the contents of /sys/class/vtconsole can be
+examined. This shows the console backends currently registered by the
+system which are named vtcon<n> where <n> is an integer from 0 to 15.
+Thus::
+
+       ls /sys/class/vtconsole
+       .  ..  vtcon0  vtcon1
+
+Each directory in /sys/class/vtconsole has 3 files::
+
+     ls /sys/class/vtconsole/vtcon0
+     .  ..  bind  name  uevent
+
+What do these files signify?
+
+     1. bind - this is a read/write file. It shows the status of the driver if
+        read, or acts to bind or unbind the driver to the virtual consoles
+        when written to. The possible values are:
+
+	0
+	  - means the driver is not bound and if echo'ed, commands the driver
+	    to unbind
+
+        1
+	  - means the driver is bound and if echo'ed, commands the driver to
+	    bind
+
+     2. name - read-only file. Shows the name of the driver in this format::
+
+	  cat /sys/class/vtconsole/vtcon0/name
+	  (S) VGA+
+
+	      '(S)' stands for a (S)ystem driver, i.e., it cannot be directly
+	      commanded to bind or unbind
+
+	      'VGA+' is the name of the driver
+
+	  cat /sys/class/vtconsole/vtcon1/name
+	  (M) frame buffer device
+
+	      In this case, '(M)' stands for a (M)odular driver, one that can be
+	      directly commanded to bind or unbind.
+
+     3. uevent - ignore this file
+
+When unbinding, the modular driver is detached first, and then the system
+driver takes over the consoles vacated by the driver. Binding, on the other
+hand, will bind the driver to the consoles that are currently occupied by a
+system driver.
+
+NOTE1:
+  Binding and unbinding must be selected in Kconfig. It's under::
+
+    Device Drivers ->
+	Character devices ->
+		Support for binding and unbinding console drivers
+
+NOTE2:
+  If any of the virtual consoles are in KD_GRAPHICS mode, then binding or
+  unbinding will not succeed. An example of an application that sets the
+  console to KD_GRAPHICS is X.
+
+How useful is this feature? This is very useful for console driver
+developers. By unbinding the driver from the console layer, one can unload the
+driver, make changes, recompile, reload and rebind the driver without any need
+for rebooting the kernel. For regular users who may want to switch from
+framebuffer console to VGA console and vice versa, this feature also makes
+this possible. (NOTE NOTE NOTE: Please read fbcon.txt under Documentation/fb
+for more details.)
+
+Notes for developers
+====================
+
+do_take_over_console() is now broken up into::
+
+     do_register_con_driver()
+     do_bind_con_driver() - private function
+
+give_up_console() is a wrapper to do_unregister_con_driver(), and a driver must
+be fully unbound for this call to succeed. con_is_bound() will check if the
+driver is bound or not.
+
+Guidelines for console driver writers
+=====================================
+
+In order for binding to and unbinding from the console to properly work,
+console drivers must follow these guidelines:
+
+1. All drivers, except system drivers, must call either do_register_con_driver()
+   or do_take_over_console(). do_register_con_driver() will just add the driver
+   to the console's internal list. It won't take over the
+   console. do_take_over_console(), as it name implies, will also take over (or
+   bind to) the console.
+
+2. All resources allocated during con->con_init() must be released in
+   con->con_deinit().
+
+3. All resources allocated in con->con_startup() must be released when the
+   driver, which was previously bound, becomes unbound.  The console layer
+   does not have a complementary call to con->con_startup() so it's up to the
+   driver to check when it's legal to release these resources. Calling
+   con_is_bound() in con->con_deinit() will help.  If the call returned
+   false(), then it's safe to release the resources.  This balance has to be
+   ensured because con->con_startup() can be called again when a request to
+   rebind the driver to the console arrives.
+
+4. Upon exit of the driver, ensure that the driver is totally unbound. If the
+   condition is satisfied, then the driver must call do_unregister_con_driver()
+   or give_up_console().
+
+5. do_unregister_con_driver() can also be called on conditions which make it
+   impossible for the driver to service console requests.  This can happen
+   with the framebuffer console that suddenly lost all of its drivers.
+
+The current crop of console drivers should still work correctly, but binding
+and unbinding them may cause problems. With minimal fixes, these drivers can
+be made to work correctly.
+
+Antonino Daplas <adaplas@pol.net>
diff --git a/Documentation/driver-api/dcdbas.rst b/Documentation/driver-api/dcdbas.rst
new file mode 100644
index 000000000000..309cc57a7c1c
--- /dev/null
+++ b/Documentation/driver-api/dcdbas.rst
@@ -0,0 +1,99 @@
+===================================
+Dell Systems Management Base Driver
+===================================
+
+Overview
+========
+
+The Dell Systems Management Base Driver provides a sysfs interface for
+systems management software such as Dell OpenManage to perform system
+management interrupts and host control actions (system power cycle or
+power off after OS shutdown) on certain Dell systems.
+
+Dell OpenManage requires this driver on the following Dell PowerEdge systems:
+300, 1300, 1400, 400SC, 500SC, 1500SC, 1550, 600SC, 1600SC, 650, 1655MC,
+700, and 750.  Other Dell software such as the open source libsmbios project
+is expected to make use of this driver, and it may include the use of this
+driver on other Dell systems.
+
+The Dell libsmbios project aims towards providing access to as much BIOS
+information as possible.  See http://linux.dell.com/libsmbios/main/ for
+more information about the libsmbios project.
+
+
+System Management Interrupt
+===========================
+
+On some Dell systems, systems management software must access certain
+management information via a system management interrupt (SMI).  The SMI data
+buffer must reside in 32-bit address space, and the physical address of the
+buffer is required for the SMI.  The driver maintains the memory required for
+the SMI and provides a way for the application to generate the SMI.
+The driver creates the following sysfs entries for systems management
+software to perform these system management interrupts::
+
+	/sys/devices/platform/dcdbas/smi_data
+	/sys/devices/platform/dcdbas/smi_data_buf_phys_addr
+	/sys/devices/platform/dcdbas/smi_data_buf_size
+	/sys/devices/platform/dcdbas/smi_request
+
+Systems management software must perform the following steps to execute
+a SMI using this driver:
+
+1) Lock smi_data.
+2) Write system management command to smi_data.
+3) Write "1" to smi_request to generate a calling interface SMI or
+   "2" to generate a raw SMI.
+4) Read system management command response from smi_data.
+5) Unlock smi_data.
+
+
+Host Control Action
+===================
+
+Dell OpenManage supports a host control feature that allows the administrator
+to perform a power cycle or power off of the system after the OS has finished
+shutting down.  On some Dell systems, this host control feature requires that
+a driver perform a SMI after the OS has finished shutting down.
+
+The driver creates the following sysfs entries for systems management software
+to schedule the driver to perform a power cycle or power off host control
+action after the system has finished shutting down:
+
+/sys/devices/platform/dcdbas/host_control_action
+/sys/devices/platform/dcdbas/host_control_smi_type
+/sys/devices/platform/dcdbas/host_control_on_shutdown
+
+Dell OpenManage performs the following steps to execute a power cycle or
+power off host control action using this driver:
+
+1) Write host control action to be performed to host_control_action.
+2) Write type of SMI that driver needs to perform to host_control_smi_type.
+3) Write "1" to host_control_on_shutdown to enable host control action.
+4) Initiate OS shutdown.
+   (Driver will perform host control SMI when it is notified that the OS
+   has finished shutting down.)
+
+
+Host Control SMI Type
+=====================
+
+The following table shows the value to write to host_control_smi_type to
+perform a power cycle or power off host control action:
+
+=================== =====================
+PowerEdge System    Host Control SMI Type
+=================== =====================
+      300             HC_SMITYPE_TYPE1
+     1300             HC_SMITYPE_TYPE1
+     1400             HC_SMITYPE_TYPE2
+      500SC           HC_SMITYPE_TYPE2
+     1500SC           HC_SMITYPE_TYPE2
+     1550             HC_SMITYPE_TYPE2
+      600SC           HC_SMITYPE_TYPE2
+     1600SC           HC_SMITYPE_TYPE2
+      650             HC_SMITYPE_TYPE2
+     1655MC           HC_SMITYPE_TYPE2
+      700             HC_SMITYPE_TYPE3
+      750             HC_SMITYPE_TYPE3
+=================== =====================
diff --git a/Documentation/driver-api/dell_rbu.rst b/Documentation/driver-api/dell_rbu.rst
new file mode 100644
index 000000000000..5d1ce7bcd04d
--- /dev/null
+++ b/Documentation/driver-api/dell_rbu.rst
@@ -0,0 +1,128 @@
+=============================================================
+Usage of the new open sourced rbu (Remote BIOS Update) driver
+=============================================================
+
+Purpose
+=======
+
+Document demonstrating the use of the Dell Remote BIOS Update driver.
+for updating BIOS images on Dell servers and desktops.
+
+Scope
+=====
+
+This document discusses the functionality of the rbu driver only.
+It does not cover the support needed from applications to enable the BIOS to
+update itself with the image downloaded in to the memory.
+
+Overview
+========
+
+This driver works with Dell OpenManage or Dell Update Packages for updating
+the BIOS on Dell servers (starting from servers sold since 1999), desktops
+and notebooks (starting from those sold in 2005).
+
+Please go to  http://support.dell.com register and you can find info on
+OpenManage and Dell Update packages (DUP).
+
+Libsmbios can also be used to update BIOS on Dell systems go to
+http://linux.dell.com/libsmbios/ for details.
+
+Dell_RBU driver supports BIOS update using the monolithic image and packetized
+image methods. In case of monolithic the driver allocates a contiguous chunk
+of physical pages having the BIOS image. In case of packetized the app
+using the driver breaks the image in to packets of fixed sizes and the driver
+would place each packet in contiguous physical memory. The driver also
+maintains a link list of packets for reading them back.
+
+If the dell_rbu driver is unloaded all the allocated memory is freed.
+
+The rbu driver needs to have an application (as mentioned above)which will
+inform the BIOS to enable the update in the next system reboot.
+
+The user should not unload the rbu driver after downloading the BIOS image
+or updating.
+
+The driver load creates the following directories under the /sys file system::
+
+	/sys/class/firmware/dell_rbu/loading
+	/sys/class/firmware/dell_rbu/data
+	/sys/devices/platform/dell_rbu/image_type
+	/sys/devices/platform/dell_rbu/data
+	/sys/devices/platform/dell_rbu/packet_size
+
+The driver supports two types of update mechanism; monolithic and packetized.
+These update mechanism depends upon the BIOS currently running on the system.
+Most of the Dell systems support a monolithic update where the BIOS image is
+copied to a single contiguous block of physical memory.
+
+In case of packet mechanism the single memory can be broken in smaller chunks
+of contiguous memory and the BIOS image is scattered in these packets.
+
+By default the driver uses monolithic memory for the update type. This can be
+changed to packets during the driver load time by specifying the load
+parameter image_type=packet.  This can also be changed later as below::
+
+	echo packet > /sys/devices/platform/dell_rbu/image_type
+
+In packet update mode the packet size has to be given before any packets can
+be downloaded. It is done as below::
+
+	echo XXXX > /sys/devices/platform/dell_rbu/packet_size
+
+In the packet update mechanism, the user needs to create a new file having
+packets of data arranged back to back. It can be done as follows
+The user creates packets header, gets the chunk of the BIOS image and
+places it next to the packetheader; now, the packetheader + BIOS image chunk
+added together should match the specified packet_size. This makes one
+packet, the user needs to create more such packets out of the entire BIOS
+image file and then arrange all these packets back to back in to one single
+file.
+
+This file is then copied to /sys/class/firmware/dell_rbu/data.
+Once this file gets to the driver, the driver extracts packet_size data from
+the file and spreads it across the physical memory in contiguous packet_sized
+space.
+
+This method makes sure that all the packets get to the driver in a single operation.
+
+In monolithic update the user simply get the BIOS image (.hdr file) and copies
+to the data file as is without any change to the BIOS image itself.
+
+Do the steps below to download the BIOS image.
+
+1) echo 1 > /sys/class/firmware/dell_rbu/loading
+2) cp bios_image.hdr /sys/class/firmware/dell_rbu/data
+3) echo 0 > /sys/class/firmware/dell_rbu/loading
+
+The /sys/class/firmware/dell_rbu/ entries will remain till the following is
+done.
+
+::
+
+	echo -1 > /sys/class/firmware/dell_rbu/loading
+
+Until this step is completed the driver cannot be unloaded.
+
+Also echoing either mono, packet or init in to image_type will free up the
+memory allocated by the driver.
+
+If a user by accident executes steps 1 and 3 above without executing step 2;
+it will make the /sys/class/firmware/dell_rbu/ entries disappear.
+
+The entries can be recreated by doing the following::
+
+	echo init > /sys/devices/platform/dell_rbu/image_type
+
+.. note:: echoing init in image_type does not change it original value.
+
+Also the driver provides /sys/devices/platform/dell_rbu/data readonly file to
+read back the image downloaded.
+
+.. note::
+
+   After updating the BIOS image a user mode application needs to execute
+   code which sends the BIOS update request to the BIOS. So on the next reboot
+   the BIOS knows about the new image downloaded and it updates itself.
+   Also don't unload the rbu driver if the image has to be updated.
+
diff --git a/Documentation/driver-api/edid.rst b/Documentation/driver-api/edid.rst
new file mode 100644
index 000000000000..b1b5acd501ed
--- /dev/null
+++ b/Documentation/driver-api/edid.rst
@@ -0,0 +1,58 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====
+EDID
+====
+
+In the good old days when graphics parameters were configured explicitly
+in a file called xorg.conf, even broken hardware could be managed.
+
+Today, with the advent of Kernel Mode Setting, a graphics board is
+either correctly working because all components follow the standards -
+or the computer is unusable, because the screen remains dark after
+booting or it displays the wrong area. Cases when this happens are:
+- The graphics board does not recognize the monitor.
+- The graphics board is unable to detect any EDID data.
+- The graphics board incorrectly forwards EDID data to the driver.
+- The monitor sends no or bogus EDID data.
+- A KVM sends its own EDID data instead of querying the connected monitor.
+Adding the kernel parameter "nomodeset" helps in most cases, but causes
+restrictions later on.
+
+As a remedy for such situations, the kernel configuration item
+CONFIG_DRM_LOAD_EDID_FIRMWARE was introduced. It allows to provide an
+individually prepared or corrected EDID data set in the /lib/firmware
+directory from where it is loaded via the firmware interface. The code
+(see drivers/gpu/drm/drm_edid_load.c) contains built-in data sets for
+commonly used screen resolutions (800x600, 1024x768, 1280x1024, 1600x1200,
+1680x1050, 1920x1080) as binary blobs, but the kernel source tree does
+not contain code to create these data. In order to elucidate the origin
+of the built-in binary EDID blobs and to facilitate the creation of
+individual data for a specific misbehaving monitor, commented sources
+and a Makefile environment are given here.
+
+To create binary EDID and C source code files from the existing data
+material, simply type "make".
+
+If you want to create your own EDID file, copy the file 1024x768.S,
+replace the settings with your own data and add a new target to the
+Makefile. Please note that the EDID data structure expects the timing
+values in a different way as compared to the standard X11 format.
+
+X11:
+  HTimings:
+    hdisp hsyncstart hsyncend htotal
+  VTimings:
+    vdisp vsyncstart vsyncend vtotal
+
+EDID::
+
+  #define XPIX hdisp
+  #define XBLANK htotal-hdisp
+  #define XOFFSET hsyncstart-hdisp
+  #define XPULSE hsyncend-hsyncstart
+
+  #define YPIX vdisp
+  #define YBLANK vtotal-vdisp
+  #define YOFFSET vsyncstart-vdisp
+  #define YPULSE vsyncend-vsyncstart
diff --git a/Documentation/driver-api/eisa.rst b/Documentation/driver-api/eisa.rst
new file mode 100644
index 000000000000..c07565ba57da
--- /dev/null
+++ b/Documentation/driver-api/eisa.rst
@@ -0,0 +1,230 @@
+================
+EISA bus support
+================
+
+:Author: Marc Zyngier <maz@wild-wind.fr.eu.org>
+
+This document groups random notes about porting EISA drivers to the
+new EISA/sysfs API.
+
+Starting from version 2.5.59, the EISA bus is almost given the same
+status as other much more mainstream busses such as PCI or USB. This
+has been possible through sysfs, which defines a nice enough set of
+abstractions to manage busses, devices and drivers.
+
+Although the new API is quite simple to use, converting existing
+drivers to the new infrastructure is not an easy task (mostly because
+detection code is generally also used to probe ISA cards). Moreover,
+most EISA drivers are among the oldest Linux drivers so, as you can
+imagine, some dust has settled here over the years.
+
+The EISA infrastructure is made up of three parts:
+
+    - The bus code implements most of the generic code. It is shared
+      among all the architectures that the EISA code runs on. It
+      implements bus probing (detecting EISA cards available on the bus),
+      allocates I/O resources, allows fancy naming through sysfs, and
+      offers interfaces for driver to register.
+
+    - The bus root driver implements the glue between the bus hardware
+      and the generic bus code. It is responsible for discovering the
+      device implementing the bus, and setting it up to be latter probed
+      by the bus code. This can go from something as simple as reserving
+      an I/O region on x86, to the rather more complex, like the hppa
+      EISA code. This is the part to implement in order to have EISA
+      running on an "new" platform.
+
+    - The driver offers the bus a list of devices that it manages, and
+      implements the necessary callbacks to probe and release devices
+      whenever told to.
+
+Every function/structure below lives in <linux/eisa.h>, which depends
+heavily on <linux/device.h>.
+
+Bus root driver
+===============
+
+::
+
+	int eisa_root_register (struct eisa_root_device *root);
+
+The eisa_root_register function is used to declare a device as the
+root of an EISA bus. The eisa_root_device structure holds a reference
+to this device, as well as some parameters for probing purposes::
+
+	struct eisa_root_device {
+		struct device   *dev;	 /* Pointer to bridge device */
+		struct resource *res;
+		unsigned long    bus_base_addr;
+		int		 slots;  /* Max slot number */
+		int		 force_probe; /* Probe even when no slot 0 */
+		u64		 dma_mask; /* from bridge device */
+		int              bus_nr; /* Set by eisa_root_register */
+		struct resource  eisa_root_res;	/* ditto */
+	};
+
+============= ======================================================
+node          used for eisa_root_register internal purpose
+dev           pointer to the root device
+res           root device I/O resource
+bus_base_addr slot 0 address on this bus
+slots	      max slot number to probe
+force_probe   Probe even when slot 0 is empty (no EISA mainboard)
+dma_mask      Default DMA mask. Usually the bridge device dma_mask.
+bus_nr	      unique bus id, set by eisa_root_register
+============= ======================================================
+
+Driver
+======
+
+::
+
+	int eisa_driver_register (struct eisa_driver *edrv);
+	void eisa_driver_unregister (struct eisa_driver *edrv);
+
+Clear enough ?
+
+::
+
+	struct eisa_device_id {
+		char sig[EISA_SIG_LEN];
+		unsigned long driver_data;
+	};
+
+	struct eisa_driver {
+		const struct eisa_device_id *id_table;
+		struct device_driver         driver;
+	};
+
+=============== ====================================================
+id_table	an array of NULL terminated EISA id strings,
+		followed by an empty string. Each string can
+		optionally be paired with a driver-dependent value
+		(driver_data).
+
+driver		a generic driver, such as described in
+		Documentation/driver-api/driver-model/driver.rst. Only .name,
+		.probe and .remove members are mandatory.
+=============== ====================================================
+
+An example is the 3c59x driver::
+
+	static struct eisa_device_id vortex_eisa_ids[] = {
+		{ "TCM5920", EISA_3C592_OFFSET },
+		{ "TCM5970", EISA_3C597_OFFSET },
+		{ "" }
+	};
+
+	static struct eisa_driver vortex_eisa_driver = {
+		.id_table = vortex_eisa_ids,
+		.driver   = {
+			.name    = "3c59x",
+			.probe   = vortex_eisa_probe,
+			.remove  = vortex_eisa_remove
+		}
+	};
+
+Device
+======
+
+The sysfs framework calls .probe and .remove functions upon device
+discovery and removal (note that the .remove function is only called
+when driver is built as a module).
+
+Both functions are passed a pointer to a 'struct device', which is
+encapsulated in a 'struct eisa_device' described as follows::
+
+	struct eisa_device {
+		struct eisa_device_id id;
+		int                   slot;
+		int                   state;
+		unsigned long         base_addr;
+		struct resource       res[EISA_MAX_RESOURCES];
+		u64                   dma_mask;
+		struct device         dev; /* generic device */
+	};
+
+======== ============================================================
+id	 EISA id, as read from device. id.driver_data is set from the
+	 matching driver EISA id.
+slot	 slot number which the device was detected on
+state    set of flags indicating the state of the device. Current
+	 flags are EISA_CONFIG_ENABLED and EISA_CONFIG_FORCED.
+res	 set of four 256 bytes I/O regions allocated to this device
+dma_mask DMA mask set from the parent device.
+dev	 generic device (see Documentation/driver-api/driver-model/device.rst)
+======== ============================================================
+
+You can get the 'struct eisa_device' from 'struct device' using the
+'to_eisa_device' macro.
+
+Misc stuff
+==========
+
+::
+
+	void eisa_set_drvdata (struct eisa_device *edev, void *data);
+
+Stores data into the device's driver_data area.
+
+::
+
+	void *eisa_get_drvdata (struct eisa_device *edev):
+
+Gets the pointer previously stored into the device's driver_data area.
+
+::
+
+	int eisa_get_region_index (void *addr);
+
+Returns the region number (0 <= x < EISA_MAX_RESOURCES) of a given
+address.
+
+Kernel parameters
+=================
+
+eisa_bus.enable_dev
+	A comma-separated list of slots to be enabled, even if the firmware
+	set the card as disabled. The driver must be able to properly
+	initialize the device in such conditions.
+
+eisa_bus.disable_dev
+	A comma-separated list of slots to be enabled, even if the firmware
+	set the card as enabled. The driver won't be called to handle this
+	device.
+
+virtual_root.force_probe
+	Force the probing code to probe EISA slots even when it cannot find an
+	EISA compliant mainboard (nothing appears on slot 0). Defaults to 0
+	(don't force), and set to 1 (force probing) when either
+	CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set.
+
+Random notes
+============
+
+Converting an EISA driver to the new API mostly involves *deleting*
+code (since probing is now in the core EISA code). Unfortunately, most
+drivers share their probing routine between ISA, and EISA. Special
+care must be taken when ripping out the EISA code, so other busses
+won't suffer from these surgical strikes...
+
+You *must not* expect any EISA device to be detected when returning
+from eisa_driver_register, since the chances are that the bus has not
+yet been probed. In fact, that's what happens most of the time (the
+bus root driver usually kicks in rather late in the boot process).
+Unfortunately, most drivers are doing the probing by themselves, and
+expect to have explored the whole machine when they exit their probe
+routine.
+
+For example, switching your favorite EISA SCSI card to the "hotplug"
+model is "the right thing"(tm).
+
+Thanks
+======
+
+I'd like to thank the following people for their help:
+
+- Xavier Benigni for lending me a wonderful Alpha Jensen,
+- James Bottomley, Jeff Garzik for getting this stuff into the kernel,
+- Andries Brouwer for contributing numerous EISA ids,
+- Catrin Jones for coping with far too many machines at home.
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 9fb03b7bdeb1..d1c6513dd20d 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -68,7 +68,33 @@ available subsections can be seen below.
    fpga/index
    acpi/index
    backlight/lp855x-driver.rst
+   bt8xxgpio
+   connector
+   console
+   dcdbas
+   dell_rbu
+   edid
+   eisa
+   isa
+   isapnp
    generic-counter
+   lightnvm-pblk
+   men-chameleon-bus
+   ntb
+   nvmem
+   parport-lowlevel
+   pti_intel_mid
+   pwm
+   rfkill
+   sgi-ioc4
+   sm501
+   smsc_ece1099
+   switchtec
+   sync_file
+   vfio-mediated-device
+   vfio
+   xillybus
+   zorro
 
 .. only::  subproject and html
 
diff --git a/Documentation/driver-api/isa.rst b/Documentation/driver-api/isa.rst
new file mode 100644
index 000000000000..def4a7b690b5
--- /dev/null
+++ b/Documentation/driver-api/isa.rst
@@ -0,0 +1,122 @@
+===========
+ISA Drivers
+===========
+
+The following text is adapted from the commit message of the initial
+commit of the ISA bus driver authored by Rene Herman.
+
+During the recent "isa drivers using platform devices" discussion it was
+pointed out that (ALSA) ISA drivers ran into the problem of not having
+the option to fail driver load (device registration rather) upon not
+finding their hardware due to a probe() error not being passed up
+through the driver model. In the course of that, I suggested a separate
+ISA bus might be best; Russell King agreed and suggested this bus could
+use the .match() method for the actual device discovery.
+
+The attached does this. For this old non (generically) discoverable ISA
+hardware only the driver itself can do discovery so as a difference with
+the platform_bus, this isa_bus also distributes match() up to the
+driver.
+
+As another difference: these devices only exist in the driver model due
+to the driver creating them because it might want to drive them, meaning
+that all device creation has been made internal as well.
+
+The usage model this provides is nice, and has been acked from the ALSA
+side by Takashi Iwai and Jaroslav Kysela. The ALSA driver module_init's
+now (for oldisa-only drivers) become::
+
+	static int __init alsa_card_foo_init(void)
+	{
+		return isa_register_driver(&snd_foo_isa_driver, SNDRV_CARDS);
+	}
+
+	static void __exit alsa_card_foo_exit(void)
+	{
+		isa_unregister_driver(&snd_foo_isa_driver);
+	}
+
+Quite like the other bus models therefore. This removes a lot of
+duplicated init code from the ALSA ISA drivers.
+
+The passed in isa_driver struct is the regular driver struct embedding a
+struct device_driver, the normal probe/remove/shutdown/suspend/resume
+callbacks, and as indicated that .match callback.
+
+The "SNDRV_CARDS" you see being passed in is a "unsigned int ndev"
+parameter, indicating how many devices to create and call our methods
+with.
+
+The platform_driver callbacks are called with a platform_device param;
+the isa_driver callbacks are being called with a ``struct device *dev,
+unsigned int id`` pair directly -- with the device creation completely
+internal to the bus it's much cleaner to not leak isa_dev's by passing
+them in at all. The id is the only thing we ever want other then the
+struct device anyways, and it makes for nicer code in the callbacks as
+well.
+
+With this additional .match() callback ISA drivers have all options. If
+ALSA would want to keep the old non-load behaviour, it could stick all
+of the old .probe in .match, which would only keep them registered after
+everything was found to be present and accounted for. If it wanted the
+behaviour of always loading as it inadvertently did for a bit after the
+changeover to platform devices, it could just not provide a .match() and
+do everything in .probe() as before.
+
+If it, as Takashi Iwai already suggested earlier as a way of following
+the model from saner buses more closely, wants to load when a later bind
+could conceivably succeed, it could use .match() for the prerequisites
+(such as checking the user wants the card enabled and that port/irq/dma
+values have been passed in) and .probe() for everything else. This is
+the nicest model.
+
+To the code...
+
+This exports only two functions; isa_{,un}register_driver().
+
+isa_register_driver() register's the struct device_driver, and then
+loops over the passed in ndev creating devices and registering them.
+This causes the bus match method to be called for them, which is::
+
+	int isa_bus_match(struct device *dev, struct device_driver *driver)
+	{
+		struct isa_driver *isa_driver = to_isa_driver(driver);
+
+		if (dev->platform_data == isa_driver) {
+			if (!isa_driver->match ||
+				isa_driver->match(dev, to_isa_dev(dev)->id))
+				return 1;
+			dev->platform_data = NULL;
+		}
+		return 0;
+	}
+
+The first thing this does is check if this device is in fact one of this
+driver's devices by seeing if the device's platform_data pointer is set
+to this driver. Platform devices compare strings, but we don't need to
+do that with everything being internal, so isa_register_driver() abuses
+dev->platform_data as a isa_driver pointer which we can then check here.
+I believe platform_data is available for this, but if rather not, moving
+the isa_driver pointer to the private struct isa_dev is ofcourse fine as
+well.
+
+Then, if the the driver did not provide a .match, it matches. If it did,
+the driver match() method is called to determine a match.
+
+If it did **not** match, dev->platform_data is reset to indicate this to
+isa_register_driver which can then unregister the device again.
+
+If during all this, there's any error, or no devices matched at all
+everything is backed out again and the error, or -ENODEV, is returned.
+
+isa_unregister_driver() just unregisters the matched devices and the
+driver itself.
+
+module_isa_driver is a helper macro for ISA drivers which do not do
+anything special in module init/exit. This eliminates a lot of
+boilerplate code. Each module may only use this macro once, and calling
+it replaces module_init and module_exit.
+
+max_num_isa_dev is a macro to determine the maximum possible number of
+ISA devices which may be registered in the I/O port address space given
+the address extent of the ISA devices.
diff --git a/Documentation/driver-api/isapnp.rst b/Documentation/driver-api/isapnp.rst
new file mode 100644
index 000000000000..8d0840ac847b
--- /dev/null
+++ b/Documentation/driver-api/isapnp.rst
@@ -0,0 +1,15 @@
+==========================================================
+ISA Plug & Play support by Jaroslav Kysela <perex@suse.cz>
+==========================================================
+
+Interface /proc/isapnp
+======================
+
+The interface has been removed. See pnp.txt for more details.
+
+Interface /proc/bus/isapnp
+==========================
+
+This directory allows access to ISA PnP cards and logical devices.
+The regular files contain the contents of ISA PnP registers for
+a logical device.
diff --git a/Documentation/driver-api/lightnvm-pblk.rst b/Documentation/driver-api/lightnvm-pblk.rst
new file mode 100644
index 000000000000..1040ed1cec81
--- /dev/null
+++ b/Documentation/driver-api/lightnvm-pblk.rst
@@ -0,0 +1,21 @@
+pblk: Physical Block Device Target
+==================================
+
+pblk implements a fully associative, host-based FTL that exposes a traditional
+block I/O interface. Its primary responsibilities are:
+
+  - Map logical addresses onto physical addresses (4KB granularity) in a
+    logical-to-physical (L2P) table.
+  - Maintain the integrity and consistency of the L2P table as well as its
+    recovery from normal tear down and power outage.
+  - Deal with controller- and media-specific constrains.
+  - Handle I/O errors.
+  - Implement garbage collection.
+  - Maintain consistency across the I/O stack during synchronization points.
+
+For more information please refer to:
+
+  http://lightnvm.io
+
+which maintains updated FAQs, manual pages, technical documentation, tools,
+contacts, etc.
diff --git a/Documentation/driver-api/men-chameleon-bus.rst b/Documentation/driver-api/men-chameleon-bus.rst
new file mode 100644
index 000000000000..1b1f048aa748
--- /dev/null
+++ b/Documentation/driver-api/men-chameleon-bus.rst
@@ -0,0 +1,175 @@
+=================
+MEN Chameleon Bus
+=================
+
+.. Table of Contents
+   =================
+   1 Introduction
+       1.1 Scope of this Document
+       1.2 Limitations of the current implementation
+   2 Architecture
+       2.1 MEN Chameleon Bus
+       2.2 Carrier Devices
+       2.3 Parser
+   3 Resource handling
+       3.1 Memory Resources
+       3.2 IRQs
+   4 Writing an MCB driver
+       4.1 The driver structure
+       4.2 Probing and attaching
+       4.3 Initializing the driver
+
+
+Introduction
+============
+
+This document describes the architecture and implementation of the MEN
+Chameleon Bus (called MCB throughout this document).
+
+Scope of this Document
+----------------------
+
+This document is intended to be a short overview of the current
+implementation and does by no means describe the complete possibilities of MCB
+based devices.
+
+Limitations of the current implementation
+-----------------------------------------
+
+The current implementation is limited to PCI and PCIe based carrier devices
+that only use a single memory resource and share the PCI legacy IRQ.  Not
+implemented are:
+
+- Multi-resource MCB devices like the VME Controller or M-Module carrier.
+- MCB devices that need another MCB device, like SRAM for a DMA Controller's
+  buffer descriptors or a video controller's video memory.
+- A per-carrier IRQ domain for carrier devices that have one (or more) IRQs
+  per MCB device like PCIe based carriers with MSI or MSI-X support.
+
+Architecture
+============
+
+MCB is divided into 3 functional blocks:
+
+- The MEN Chameleon Bus itself,
+- drivers for MCB Carrier Devices and
+- the parser for the Chameleon table.
+
+MEN Chameleon Bus
+-----------------
+
+The MEN Chameleon Bus is an artificial bus system that attaches to a so
+called Chameleon FPGA device found on some hardware produced my MEN Mikro
+Elektronik GmbH. These devices are multi-function devices implemented in a
+single FPGA and usually attached via some sort of PCI or PCIe link. Each
+FPGA contains a header section describing the content of the FPGA. The
+header lists the device id, PCI BAR, offset from the beginning of the PCI
+BAR, size in the FPGA, interrupt number and some other properties currently
+not handled by the MCB implementation.
+
+Carrier Devices
+---------------
+
+A carrier device is just an abstraction for the real world physical bus the
+Chameleon FPGA is attached to. Some IP Core drivers may need to interact with
+properties of the carrier device (like querying the IRQ number of a PCI
+device). To provide abstraction from the real hardware bus, an MCB carrier
+device provides callback methods to translate the driver's MCB function calls
+to hardware related function calls. For example a carrier device may
+implement the get_irq() method which can be translated into a hardware bus
+query for the IRQ number the device should use.
+
+Parser
+------
+
+The parser reads the first 512 bytes of a Chameleon device and parses the
+Chameleon table. Currently the parser only supports the Chameleon v2 variant
+of the Chameleon table but can easily be adopted to support an older or
+possible future variant. While parsing the table's entries new MCB devices
+are allocated and their resources are assigned according to the resource
+assignment in the Chameleon table. After resource assignment is finished, the
+MCB devices are registered at the MCB and thus at the driver core of the
+Linux kernel.
+
+Resource handling
+=================
+
+The current implementation assigns exactly one memory and one IRQ resource
+per MCB device. But this is likely going to change in the future.
+
+Memory Resources
+----------------
+
+Each MCB device has exactly one memory resource, which can be requested from
+the MCB bus. This memory resource is the physical address of the MCB device
+inside the carrier and is intended to be passed to ioremap() and friends. It
+is already requested from the kernel by calling request_mem_region().
+
+IRQs
+----
+
+Each MCB device has exactly one IRQ resource, which can be requested from the
+MCB bus. If a carrier device driver implements the ->get_irq() callback
+method, the IRQ number assigned by the carrier device will be returned,
+otherwise the IRQ number inside the Chameleon table will be returned. This
+number is suitable to be passed to request_irq().
+
+Writing an MCB driver
+=====================
+
+The driver structure
+--------------------
+
+Each MCB driver has a structure to identify the device driver as well as
+device ids which identify the IP Core inside the FPGA. The driver structure
+also contains callback methods which get executed on driver probe and
+removal from the system::
+
+	static const struct mcb_device_id foo_ids[] = {
+		{ .device = 0x123 },
+		{ }
+	};
+	MODULE_DEVICE_TABLE(mcb, foo_ids);
+
+	static struct mcb_driver foo_driver = {
+	driver = {
+		.name = "foo-bar",
+		.owner = THIS_MODULE,
+	},
+		.probe = foo_probe,
+		.remove = foo_remove,
+		.id_table = foo_ids,
+	};
+
+Probing and attaching
+---------------------
+
+When a driver is loaded and the MCB devices it services are found, the MCB
+core will call the driver's probe callback method. When the driver is removed
+from the system, the MCB core will call the driver's remove callback method::
+
+	static init foo_probe(struct mcb_device *mdev, const struct mcb_device_id *id);
+	static void foo_remove(struct mcb_device *mdev);
+
+Initializing the driver
+-----------------------
+
+When the kernel is booted or your foo driver module is inserted, you have to
+perform driver initialization. Usually it is enough to register your driver
+module at the MCB core::
+
+	static int __init foo_init(void)
+	{
+		return mcb_register_driver(&foo_driver);
+	}
+	module_init(foo_init);
+
+	static void __exit foo_exit(void)
+	{
+		mcb_unregister_driver(&foo_driver);
+	}
+	module_exit(foo_exit);
+
+The module_mcb_driver() macro can be used to reduce the above code::
+
+	module_mcb_driver(foo_driver);
diff --git a/Documentation/driver-api/ntb.rst b/Documentation/driver-api/ntb.rst
new file mode 100644
index 000000000000..074a423c853c
--- /dev/null
+++ b/Documentation/driver-api/ntb.rst
@@ -0,0 +1,236 @@
+===========
+NTB Drivers
+===========
+
+NTB (Non-Transparent Bridge) is a type of PCI-Express bridge chip that connects
+the separate memory systems of two or more computers to the same PCI-Express
+fabric. Existing NTB hardware supports a common feature set: doorbell
+registers and memory translation windows, as well as non common features like
+scratchpad and message registers. Scratchpad registers are read-and-writable
+registers that are accessible from either side of the device, so that peers can
+exchange a small amount of information at a fixed address. Message registers can
+be utilized for the same purpose. Additionally they are provided with with
+special status bits to make sure the information isn't rewritten by another
+peer. Doorbell registers provide a way for peers to send interrupt events.
+Memory windows allow translated read and write access to the peer memory.
+
+NTB Core Driver (ntb)
+=====================
+
+The NTB core driver defines an api wrapping the common feature set, and allows
+clients interested in NTB features to discover NTB the devices supported by
+hardware drivers.  The term "client" is used here to mean an upper layer
+component making use of the NTB api.  The term "driver," or "hardware driver,"
+is used here to mean a driver for a specific vendor and model of NTB hardware.
+
+NTB Client Drivers
+==================
+
+NTB client drivers should register with the NTB core driver.  After
+registering, the client probe and remove functions will be called appropriately
+as ntb hardware, or hardware drivers, are inserted and removed.  The
+registration uses the Linux Device framework, so it should feel familiar to
+anyone who has written a pci driver.
+
+NTB Typical client driver implementation
+----------------------------------------
+
+Primary purpose of NTB is to share some peace of memory between at least two
+systems. So the NTB device features like Scratchpad/Message registers are
+mainly used to perform the proper memory window initialization. Typically
+there are two types of memory window interfaces supported by the NTB API:
+inbound translation configured on the local ntb port and outbound translation
+configured by the peer, on the peer ntb port. The first type is
+depicted on the next figure::
+
+ Inbound translation:
+
+ Memory:              Local NTB Port:      Peer NTB Port:      Peer MMIO:
+  ____________
+ | dma-mapped |-ntb_mw_set_trans(addr)  |
+ | memory     |        _v____________   |   ______________
+ | (addr)     |<======| MW xlat addr |<====| MW base addr |<== memory-mapped IO
+ |------------|       |--------------|  |  |--------------|
+
+So typical scenario of the first type memory window initialization looks:
+1) allocate a memory region, 2) put translated address to NTB config,
+3) somehow notify a peer device of performed initialization, 4) peer device
+maps corresponding outbound memory window so to have access to the shared
+memory region.
+
+The second type of interface, that implies the shared windows being
+initialized by a peer device, is depicted on the figure::
+
+ Outbound translation:
+
+ Memory:        Local NTB Port:    Peer NTB Port:      Peer MMIO:
+  ____________                      ______________
+ | dma-mapped |                |   | MW base addr |<== memory-mapped IO
+ | memory     |                |   |--------------|
+ | (addr)     |<===================| MW xlat addr |<-ntb_peer_mw_set_trans(addr)
+ |------------|                |   |--------------|
+
+Typical scenario of the second type interface initialization would be:
+1) allocate a memory region, 2) somehow deliver a translated address to a peer
+device, 3) peer puts the translated address to NTB config, 4) peer device maps
+outbound memory window so to have access to the shared memory region.
+
+As one can see the described scenarios can be combined in one portable
+algorithm.
+
+ Local device:
+  1) Allocate memory for a shared window
+  2) Initialize memory window by translated address of the allocated region
+     (it may fail if local memory window initialization is unsupported)
+  3) Send the translated address and memory window index to a peer device
+
+ Peer device:
+  1) Initialize memory window with retrieved address of the allocated
+     by another device memory region (it may fail if peer memory window
+     initialization is unsupported)
+  2) Map outbound memory window
+
+In accordance with this scenario, the NTB Memory Window API can be used as
+follows:
+
+ Local device:
+  1) ntb_mw_count(pidx) - retrieve number of memory ranges, which can
+     be allocated for memory windows between local device and peer device
+     of port with specified index.
+  2) ntb_get_align(pidx, midx) - retrieve parameters restricting the
+     shared memory region alignment and size. Then memory can be properly
+     allocated.
+  3) Allocate physically contiguous memory region in compliance with
+     restrictions retrieved in 2).
+  4) ntb_mw_set_trans(pidx, midx) - try to set translation address of
+     the memory window with specified index for the defined peer device
+     (it may fail if local translated address setting is not supported)
+  5) Send translated base address (usually together with memory window
+     number) to the peer device using, for instance, scratchpad or message
+     registers.
+
+ Peer device:
+  1) ntb_peer_mw_set_trans(pidx, midx) - try to set received from other
+     device (related to pidx) translated address for specified memory
+     window. It may fail if retrieved address, for instance, exceeds
+     maximum possible address or isn't properly aligned.
+  2) ntb_peer_mw_get_addr(widx) - retrieve MMIO address to map the memory
+     window so to have an access to the shared memory.
+
+Also it is worth to note, that method ntb_mw_count(pidx) should return the
+same value as ntb_peer_mw_count() on the peer with port index - pidx.
+
+NTB Transport Client (ntb\_transport) and NTB Netdev (ntb\_netdev)
+------------------------------------------------------------------
+
+The primary client for NTB is the Transport client, used in tandem with NTB
+Netdev.  These drivers function together to create a logical link to the peer,
+across the ntb, to exchange packets of network data.  The Transport client
+establishes a logical link to the peer, and creates queue pairs to exchange
+messages and data.  The NTB Netdev then creates an ethernet device using a
+Transport queue pair.  Network data is copied between socket buffers and the
+Transport queue pair buffer.  The Transport client may be used for other things
+besides Netdev, however no other applications have yet been written.
+
+NTB Ping Pong Test Client (ntb\_pingpong)
+-----------------------------------------
+
+The Ping Pong test client serves as a demonstration to exercise the doorbell
+and scratchpad registers of NTB hardware, and as an example simple NTB client.
+Ping Pong enables the link when started, waits for the NTB link to come up, and
+then proceeds to read and write the doorbell scratchpad registers of the NTB.
+The peers interrupt each other using a bit mask of doorbell bits, which is
+shifted by one in each round, to test the behavior of multiple doorbell bits
+and interrupt vectors.  The Ping Pong driver also reads the first local
+scratchpad, and writes the value plus one to the first peer scratchpad, each
+round before writing the peer doorbell register.
+
+Module Parameters:
+
+* unsafe - Some hardware has known issues with scratchpad and doorbell
+	registers.  By default, Ping Pong will not attempt to exercise such
+	hardware.  You may override this behavior at your own risk by setting
+	unsafe=1.
+* delay\_ms - Specify the delay between receiving a doorbell
+	interrupt event and setting the peer doorbell register for the next
+	round.
+* init\_db - Specify the doorbell bits to start new series of rounds.  A new
+	series begins once all the doorbell bits have been shifted out of
+	range.
+* dyndbg - It is suggested to specify dyndbg=+p when loading this module, and
+	then to observe debugging output on the console.
+
+NTB Tool Test Client (ntb\_tool)
+--------------------------------
+
+The Tool test client serves for debugging, primarily, ntb hardware and drivers.
+The Tool provides access through debugfs for reading, setting, and clearing the
+NTB doorbell, and reading and writing scratchpads.
+
+The Tool does not currently have any module parameters.
+
+Debugfs Files:
+
+* *debugfs*/ntb\_tool/*hw*/
+	A directory in debugfs will be created for each
+	NTB device probed by the tool.  This directory is shortened to *hw*
+	below.
+* *hw*/db
+	This file is used to read, set, and clear the local doorbell.  Not
+	all operations may be supported by all hardware.  To read the doorbell,
+	read the file.  To set the doorbell, write `s` followed by the bits to
+	set (eg: `echo 's 0x0101' > db`).  To clear the doorbell, write `c`
+	followed by the bits to clear.
+* *hw*/mask
+	This file is used to read, set, and clear the local doorbell mask.
+	See *db* for details.
+* *hw*/peer\_db
+	This file is used to read, set, and clear the peer doorbell.
+	See *db* for details.
+* *hw*/peer\_mask
+	This file is used to read, set, and clear the peer doorbell
+	mask.  See *db* for details.
+* *hw*/spad
+	This file is used to read and write local scratchpads.  To read
+	the values of all scratchpads, read the file.  To write values, write a
+	series of pairs of scratchpad number and value
+	(eg: `echo '4 0x123 7 0xabc' > spad`
+	# to set scratchpads `4` and `7` to `0x123` and `0xabc`, respectively).
+* *hw*/peer\_spad
+	This file is used to read and write peer scratchpads.  See
+	*spad* for details.
+
+NTB Hardware Drivers
+====================
+
+NTB hardware drivers should register devices with the NTB core driver.  After
+registering, clients probe and remove functions will be called.
+
+NTB Intel Hardware Driver (ntb\_hw\_intel)
+------------------------------------------
+
+The Intel hardware driver supports NTB on Xeon and Atom CPUs.
+
+Module Parameters:
+
+* b2b\_mw\_idx
+	If the peer ntb is to be accessed via a memory window, then use
+	this memory window to access the peer ntb.  A value of zero or positive
+	starts from the first mw idx, and a negative value starts from the last
+	mw idx.  Both sides MUST set the same value here!  The default value is
+	`-1`.
+* b2b\_mw\_share
+	If the peer ntb is to be accessed via a memory window, and if
+	the memory window is large enough, still allow the client to use the
+	second half of the memory window for address translation to the peer.
+* xeon\_b2b\_usd\_bar2\_addr64
+	If using B2B topology on Xeon hardware, use
+	this 64 bit address on the bus between the NTB devices for the window
+	at BAR2, on the upstream side of the link.
+* xeon\_b2b\_usd\_bar4\_addr64 - See *xeon\_b2b\_bar2\_addr64*.
+* xeon\_b2b\_usd\_bar4\_addr32 - See *xeon\_b2b\_bar2\_addr64*.
+* xeon\_b2b\_usd\_bar5\_addr32 - See *xeon\_b2b\_bar2\_addr64*.
+* xeon\_b2b\_dsd\_bar2\_addr64 - See *xeon\_b2b\_bar2\_addr64*.
+* xeon\_b2b\_dsd\_bar4\_addr64 - See *xeon\_b2b\_bar2\_addr64*.
+* xeon\_b2b\_dsd\_bar4\_addr32 - See *xeon\_b2b\_bar2\_addr64*.
+* xeon\_b2b\_dsd\_bar5\_addr32 - See *xeon\_b2b\_bar2\_addr64*.
diff --git a/Documentation/driver-api/nvmem.rst b/Documentation/driver-api/nvmem.rst
new file mode 100644
index 000000000000..d9d958d5c824
--- /dev/null
+++ b/Documentation/driver-api/nvmem.rst
@@ -0,0 +1,189 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+NVMEM Subsystem
+===============
+
+ Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+This document explains the NVMEM Framework along with the APIs provided,
+and how to use it.
+
+1. Introduction
+===============
+*NVMEM* is the abbreviation for Non Volatile Memory layer. It is used to
+retrieve configuration of SOC or Device specific data from non volatile
+memories like eeprom, efuses and so on.
+
+Before this framework existed, NVMEM drivers like eeprom were stored in
+drivers/misc, where they all had to duplicate pretty much the same code to
+register a sysfs file, allow in-kernel users to access the content of the
+devices they were driving, etc.
+
+This was also a problem as far as other in-kernel users were involved, since
+the solutions used were pretty much different from one driver to another, there
+was a rather big abstraction leak.
+
+This framework aims at solve these problems. It also introduces DT
+representation for consumer devices to go get the data they require (MAC
+Addresses, SoC/Revision ID, part numbers, and so on) from the NVMEMs. This
+framework is based on regmap, so that most of the abstraction available in
+regmap can be reused, across multiple types of buses.
+
+NVMEM Providers
++++++++++++++++
+
+NVMEM provider refers to an entity that implements methods to initialize, read
+and write the non-volatile memory.
+
+2. Registering/Unregistering the NVMEM provider
+===============================================
+
+A NVMEM provider can register with NVMEM core by supplying relevant
+nvmem configuration to nvmem_register(), on success core would return a valid
+nvmem_device pointer.
+
+nvmem_unregister(nvmem) is used to unregister a previously registered provider.
+
+For example, a simple qfprom case::
+
+  static struct nvmem_config econfig = {
+	.name = "qfprom",
+	.owner = THIS_MODULE,
+  };
+
+  static int qfprom_probe(struct platform_device *pdev)
+  {
+	...
+	econfig.dev = &pdev->dev;
+	nvmem = nvmem_register(&econfig);
+	...
+  }
+
+It is mandatory that the NVMEM provider has a regmap associated with its
+struct device. Failure to do would return error code from nvmem_register().
+
+Users of board files can define and register nvmem cells using the
+nvmem_cell_table struct::
+
+  static struct nvmem_cell_info foo_nvmem_cells[] = {
+	{
+		.name		= "macaddr",
+		.offset		= 0x7f00,
+		.bytes		= ETH_ALEN,
+	}
+  };
+
+  static struct nvmem_cell_table foo_nvmem_cell_table = {
+	.nvmem_name		= "i2c-eeprom",
+	.cells			= foo_nvmem_cells,
+	.ncells			= ARRAY_SIZE(foo_nvmem_cells),
+  };
+
+  nvmem_add_cell_table(&foo_nvmem_cell_table);
+
+Additionally it is possible to create nvmem cell lookup entries and register
+them with the nvmem framework from machine code as shown in the example below::
+
+  static struct nvmem_cell_lookup foo_nvmem_lookup = {
+	.nvmem_name		= "i2c-eeprom",
+	.cell_name		= "macaddr",
+	.dev_id			= "foo_mac.0",
+	.con_id			= "mac-address",
+  };
+
+  nvmem_add_cell_lookups(&foo_nvmem_lookup, 1);
+
+NVMEM Consumers
++++++++++++++++
+
+NVMEM consumers are the entities which make use of the NVMEM provider to
+read from and to NVMEM.
+
+3. NVMEM cell based consumer APIs
+=================================
+
+NVMEM cells are the data entries/fields in the NVMEM.
+The NVMEM framework provides 3 APIs to read/write NVMEM cells::
+
+  struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *name);
+  struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *name);
+
+  void nvmem_cell_put(struct nvmem_cell *cell);
+  void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
+
+  void *nvmem_cell_read(struct nvmem_cell *cell, ssize_t *len);
+  int nvmem_cell_write(struct nvmem_cell *cell, void *buf, ssize_t len);
+
+`*nvmem_cell_get()` apis will get a reference to nvmem cell for a given id,
+and nvmem_cell_read/write() can then read or write to the cell.
+Once the usage of the cell is finished the consumer should call
+`*nvmem_cell_put()` to free all the allocation memory for the cell.
+
+4. Direct NVMEM device based consumer APIs
+==========================================
+
+In some instances it is necessary to directly read/write the NVMEM.
+To facilitate such consumers NVMEM framework provides below apis::
+
+  struct nvmem_device *nvmem_device_get(struct device *dev, const char *name);
+  struct nvmem_device *devm_nvmem_device_get(struct device *dev,
+					   const char *name);
+  void nvmem_device_put(struct nvmem_device *nvmem);
+  int nvmem_device_read(struct nvmem_device *nvmem, unsigned int offset,
+		      size_t bytes, void *buf);
+  int nvmem_device_write(struct nvmem_device *nvmem, unsigned int offset,
+		       size_t bytes, void *buf);
+  int nvmem_device_cell_read(struct nvmem_device *nvmem,
+			   struct nvmem_cell_info *info, void *buf);
+  int nvmem_device_cell_write(struct nvmem_device *nvmem,
+			    struct nvmem_cell_info *info, void *buf);
+
+Before the consumers can read/write NVMEM directly, it should get hold
+of nvmem_controller from one of the `*nvmem_device_get()` api.
+
+The difference between these apis and cell based apis is that these apis always
+take nvmem_device as parameter.
+
+5. Releasing a reference to the NVMEM
+=====================================
+
+When a consumer no longer needs the NVMEM, it has to release the reference
+to the NVMEM it has obtained using the APIs mentioned in the above section.
+The NVMEM framework provides 2 APIs to release a reference to the NVMEM::
+
+  void nvmem_cell_put(struct nvmem_cell *cell);
+  void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
+  void nvmem_device_put(struct nvmem_device *nvmem);
+  void devm_nvmem_device_put(struct device *dev, struct nvmem_device *nvmem);
+
+Both these APIs are used to release a reference to the NVMEM and
+devm_nvmem_cell_put and devm_nvmem_device_put destroys the devres associated
+with this NVMEM.
+
+Userspace
++++++++++
+
+6. Userspace binary interface
+==============================
+
+Userspace can read/write the raw NVMEM file located at::
+
+	/sys/bus/nvmem/devices/*/nvmem
+
+ex::
+
+  hexdump /sys/bus/nvmem/devices/qfprom0/nvmem
+
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  00000a0 db10 2240 0000 e000 0c00 0c00 0000 0c00
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  ...
+  *
+  0001000
+
+7. DeviceTree Binding
+=====================
+
+See Documentation/devicetree/bindings/nvmem/nvmem.txt
diff --git a/Documentation/driver-api/parport-lowlevel.rst b/Documentation/driver-api/parport-lowlevel.rst
new file mode 100644
index 000000000000..0633d70ffda7
--- /dev/null
+++ b/Documentation/driver-api/parport-lowlevel.rst
@@ -0,0 +1,1832 @@
+===============================
+PARPORT interface documentation
+===============================
+
+:Time-stamp: <2000-02-24 13:30:20 twaugh>
+
+Described here are the following functions:
+
+Global functions::
+  parport_register_driver
+  parport_unregister_driver
+  parport_enumerate
+  parport_register_device
+  parport_unregister_device
+  parport_claim
+  parport_claim_or_block
+  parport_release
+  parport_yield
+  parport_yield_blocking
+  parport_wait_peripheral
+  parport_poll_peripheral
+  parport_wait_event
+  parport_negotiate
+  parport_read
+  parport_write
+  parport_open
+  parport_close
+  parport_device_id
+  parport_device_coords
+  parport_find_class
+  parport_find_device
+  parport_set_timeout
+
+Port functions (can be overridden by low-level drivers):
+
+  SPP::
+    port->ops->read_data
+    port->ops->write_data
+    port->ops->read_status
+    port->ops->read_control
+    port->ops->write_control
+    port->ops->frob_control
+    port->ops->enable_irq
+    port->ops->disable_irq
+    port->ops->data_forward
+    port->ops->data_reverse
+
+  EPP::
+    port->ops->epp_write_data
+    port->ops->epp_read_data
+    port->ops->epp_write_addr
+    port->ops->epp_read_addr
+
+  ECP::
+    port->ops->ecp_write_data
+    port->ops->ecp_read_data
+    port->ops->ecp_write_addr
+
+  Other::
+    port->ops->nibble_read_data
+    port->ops->byte_read_data
+    port->ops->compat_write_data
+
+The parport subsystem comprises ``parport`` (the core port-sharing
+code), and a variety of low-level drivers that actually do the port
+accesses.  Each low-level driver handles a particular style of port
+(PC, Amiga, and so on).
+
+The parport interface to the device driver author can be broken down
+into global functions and port functions.
+
+The global functions are mostly for communicating between the device
+driver and the parport subsystem: acquiring a list of available ports,
+claiming a port for exclusive use, and so on.  They also include
+``generic`` functions for doing standard things that will work on any
+IEEE 1284-capable architecture.
+
+The port functions are provided by the low-level drivers, although the
+core parport module provides generic ``defaults`` for some routines.
+The port functions can be split into three groups: SPP, EPP, and ECP.
+
+SPP (Standard Parallel Port) functions modify so-called ``SPP``
+registers: data, status, and control.  The hardware may not actually
+have registers exactly like that, but the PC does and this interface is
+modelled after common PC implementations.  Other low-level drivers may
+be able to emulate most of the functionality.
+
+EPP (Enhanced Parallel Port) functions are provided for reading and
+writing in IEEE 1284 EPP mode, and ECP (Extended Capabilities Port)
+functions are used for IEEE 1284 ECP mode. (What about BECP? Does
+anyone care?)
+
+Hardware assistance for EPP and/or ECP transfers may or may not be
+available, and if it is available it may or may not be used.  If
+hardware is not used, the transfer will be software-driven.  In order
+to cope with peripherals that only tenuously support IEEE 1284, a
+low-level driver specific function is provided, for altering 'fudge
+factors'.
+
+Global functions
+================
+
+parport_register_driver - register a device driver with parport
+---------------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_driver {
+		const char *name;
+		void (*attach) (struct parport *);
+		void (*detach) (struct parport *);
+		struct parport_driver *next;
+	};
+	int parport_register_driver (struct parport_driver *driver);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+In order to be notified about parallel ports when they are detected,
+parport_register_driver should be called.  Your driver will
+immediately be notified of all ports that have already been detected,
+and of each new port as low-level drivers are loaded.
+
+A ``struct parport_driver`` contains the textual name of your driver,
+a pointer to a function to handle new ports, and a pointer to a
+function to handle ports going away due to a low-level driver
+unloading.  Ports will only be detached if they are not being used
+(i.e. there are no devices registered on them).
+
+The visible parts of the ``struct parport *`` argument given to
+attach/detach are::
+
+	struct parport
+	{
+		struct parport *next; /* next parport in list */
+		const char *name;     /* port's name */
+		unsigned int modes;   /* bitfield of hardware modes */
+		struct parport_device_info probe_info;
+				/* IEEE1284 info */
+		int number;           /* parport index */
+		struct parport_operations *ops;
+		...
+	};
+
+There are other members of the structure, but they should not be
+touched.
+
+The ``modes`` member summarises the capabilities of the underlying
+hardware.  It consists of flags which may be bitwise-ored together:
+
+  ============================= ===============================================
+  PARPORT_MODE_PCSPP		IBM PC registers are available,
+				i.e. functions that act on data,
+				control and status registers are
+				probably writing directly to the
+				hardware.
+  PARPORT_MODE_TRISTATE		The data drivers may be turned off.
+				This allows the data lines to be used
+				for reverse (peripheral to host)
+				transfers.
+  PARPORT_MODE_COMPAT		The hardware can assist with
+				compatibility-mode (printer)
+				transfers, i.e. compat_write_block.
+  PARPORT_MODE_EPP		The hardware can assist with EPP
+				transfers.
+  PARPORT_MODE_ECP		The hardware can assist with ECP
+				transfers.
+  PARPORT_MODE_DMA		The hardware can use DMA, so you might
+				want to pass ISA DMA-able memory
+				(i.e. memory allocated using the
+				GFP_DMA flag with kmalloc) to the
+				low-level driver in order to take
+				advantage of it.
+  ============================= ===============================================
+
+There may be other flags in ``modes`` as well.
+
+The contents of ``modes`` is advisory only.  For example, if the
+hardware is capable of DMA, and PARPORT_MODE_DMA is in ``modes``, it
+doesn't necessarily mean that DMA will always be used when possible.
+Similarly, hardware that is capable of assisting ECP transfers won't
+necessarily be used.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+Zero on success, otherwise an error code.
+
+ERRORS
+^^^^^^
+
+None. (Can it fail? Why return int?)
+
+EXAMPLE
+^^^^^^^
+
+::
+
+	static void lp_attach (struct parport *port)
+	{
+		...
+		private = kmalloc (...);
+		dev[count++] = parport_register_device (...);
+		...
+	}
+
+	static void lp_detach (struct parport *port)
+	{
+		...
+	}
+
+	static struct parport_driver lp_driver = {
+		"lp",
+		lp_attach,
+		lp_detach,
+		NULL /* always put NULL here */
+	};
+
+	int lp_init (void)
+	{
+		...
+		if (parport_register_driver (&lp_driver)) {
+			/* Failed; nothing we can do. */
+			return -EIO;
+		}
+		...
+	}
+
+
+SEE ALSO
+^^^^^^^^
+
+parport_unregister_driver, parport_register_device, parport_enumerate
+
+
+
+parport_unregister_driver - tell parport to forget about this driver
+--------------------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_driver {
+		const char *name;
+		void (*attach) (struct parport *);
+		void (*detach) (struct parport *);
+		struct parport_driver *next;
+	};
+	void parport_unregister_driver (struct parport_driver *driver);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+This tells parport not to notify the device driver of new ports or of
+ports going away.  Registered devices belonging to that driver are NOT
+unregistered: parport_unregister_device must be used for each one.
+
+EXAMPLE
+^^^^^^^
+
+::
+
+	void cleanup_module (void)
+	{
+		...
+		/* Stop notifications. */
+		parport_unregister_driver (&lp_driver);
+
+		/* Unregister devices. */
+		for (i = 0; i < NUM_DEVS; i++)
+			parport_unregister_device (dev[i]);
+		...
+	}
+
+SEE ALSO
+^^^^^^^^
+
+parport_register_driver, parport_enumerate
+
+
+
+parport_enumerate - retrieve a list of parallel ports (DEPRECATED)
+------------------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport *parport_enumerate (void);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Retrieve the first of a list of valid parallel ports for this machine.
+Successive parallel ports can be found using the ``struct parport
+*next`` element of the ``struct parport *`` that is returned.  If ``next``
+is NULL, there are no more parallel ports in the list.  The number of
+ports in the list will not exceed PARPORT_MAX.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+A ``struct parport *`` describing a valid parallel port for the machine,
+or NULL if there are none.
+
+ERRORS
+^^^^^^
+
+This function can return NULL to indicate that there are no parallel
+ports to use.
+
+EXAMPLE
+^^^^^^^
+
+::
+
+	int detect_device (void)
+	{
+		struct parport *port;
+
+		for (port = parport_enumerate ();
+		port != NULL;
+		port = port->next) {
+			/* Try to detect a device on the port... */
+			...
+		}
+		}
+
+		...
+	}
+
+NOTES
+^^^^^
+
+parport_enumerate is deprecated; parport_register_driver should be
+used instead.
+
+SEE ALSO
+^^^^^^^^
+
+parport_register_driver, parport_unregister_driver
+
+
+
+parport_register_device - register to use a port
+------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	typedef int (*preempt_func) (void *handle);
+	typedef void (*wakeup_func) (void *handle);
+	typedef int (*irq_func) (int irq, void *handle, struct pt_regs *);
+
+	struct pardevice *parport_register_device(struct parport *port,
+						  const char *name,
+						  preempt_func preempt,
+						  wakeup_func wakeup,
+						  irq_func irq,
+						  int flags,
+						  void *handle);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Use this function to register your device driver on a parallel port
+(``port``).  Once you have done that, you will be able to use
+parport_claim and parport_release in order to use the port.
+
+The (``name``) argument is the name of the device that appears in /proc
+filesystem. The string must be valid for the whole lifetime of the
+device (until parport_unregister_device is called).
+
+This function will register three callbacks into your driver:
+``preempt``, ``wakeup`` and ``irq``.  Each of these may be NULL in order to
+indicate that you do not want a callback.
+
+When the ``preempt`` function is called, it is because another driver
+wishes to use the parallel port.  The ``preempt`` function should return
+non-zero if the parallel port cannot be released yet -- if zero is
+returned, the port is lost to another driver and the port must be
+re-claimed before use.
+
+The ``wakeup`` function is called once another driver has released the
+port and no other driver has yet claimed it.  You can claim the
+parallel port from within the ``wakeup`` function (in which case the
+claim is guaranteed to succeed), or choose not to if you don't need it
+now.
+
+If an interrupt occurs on the parallel port your driver has claimed,
+the ``irq`` function will be called. (Write something about shared
+interrupts here.)
+
+The ``handle`` is a pointer to driver-specific data, and is passed to
+the callback functions.
+
+``flags`` may be a bitwise combination of the following flags:
+
+  ===================== =================================================
+        Flag            Meaning
+  ===================== =================================================
+  PARPORT_DEV_EXCL	The device cannot share the parallel port at all.
+			Use this only when absolutely necessary.
+  ===================== =================================================
+
+The typedefs are not actually defined -- they are only shown in order
+to make the function prototype more readable.
+
+The visible parts of the returned ``struct pardevice`` are::
+
+	struct pardevice {
+		struct parport *port;	/* Associated port */
+		void *private;		/* Device driver's 'handle' */
+		...
+	};
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+A ``struct pardevice *``: a handle to the registered parallel port
+device that can be used for parport_claim, parport_release, etc.
+
+ERRORS
+^^^^^^
+
+A return value of NULL indicates that there was a problem registering
+a device on that port.
+
+EXAMPLE
+^^^^^^^
+
+::
+
+	static int preempt (void *handle)
+	{
+		if (busy_right_now)
+			return 1;
+
+		must_reclaim_port = 1;
+		return 0;
+	}
+
+	static void wakeup (void *handle)
+	{
+		struct toaster *private = handle;
+		struct pardevice *dev = private->dev;
+		if (!dev) return; /* avoid races */
+
+		if (want_port)
+			parport_claim (dev);
+	}
+
+	static int toaster_detect (struct toaster *private, struct parport *port)
+	{
+		private->dev = parport_register_device (port, "toaster", preempt,
+							wakeup, NULL, 0,
+							private);
+		if (!private->dev)
+			/* Couldn't register with parport. */
+			return -EIO;
+
+		must_reclaim_port = 0;
+		busy_right_now = 1;
+		parport_claim_or_block (private->dev);
+		...
+		/* Don't need the port while the toaster warms up. */
+		busy_right_now = 0;
+		...
+		busy_right_now = 1;
+		if (must_reclaim_port) {
+			parport_claim_or_block (private->dev);
+			must_reclaim_port = 0;
+		}
+		...
+	}
+
+SEE ALSO
+^^^^^^^^
+
+parport_unregister_device, parport_claim
+
+
+
+parport_unregister_device - finish using a port
+-----------------------------------------------
+
+SYNPOPSIS
+
+::
+
+	#include <linux/parport.h>
+
+	void parport_unregister_device (struct pardevice *dev);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+This function is the opposite of parport_register_device.  After using
+parport_unregister_device, ``dev`` is no longer a valid device handle.
+
+You should not unregister a device that is currently claimed, although
+if you do it will be released automatically.
+
+EXAMPLE
+^^^^^^^
+
+::
+
+	...
+	kfree (dev->private); /* before we lose the pointer */
+	parport_unregister_device (dev);
+	...
+
+SEE ALSO
+^^^^^^^^
+
+
+parport_unregister_driver
+
+parport_claim, parport_claim_or_block - claim the parallel port for a device
+----------------------------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	int parport_claim (struct pardevice *dev);
+	int parport_claim_or_block (struct pardevice *dev);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+These functions attempt to gain control of the parallel port on which
+``dev`` is registered.  ``parport_claim`` does not block, but
+``parport_claim_or_block`` may do. (Put something here about blocking
+interruptibly or non-interruptibly.)
+
+You should not try to claim a port that you have already claimed.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+A return value of zero indicates that the port was successfully
+claimed, and the caller now has possession of the parallel port.
+
+If ``parport_claim_or_block`` blocks before returning successfully, the
+return value is positive.
+
+ERRORS
+^^^^^^
+
+========== ==========================================================
+  -EAGAIN  The port is unavailable at the moment, but another attempt
+           to claim it may succeed.
+========== ==========================================================
+
+SEE ALSO
+^^^^^^^^
+
+
+parport_release
+
+parport_release - release the parallel port
+-------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	void parport_release (struct pardevice *dev);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Once a parallel port device has been claimed, it can be released using
+``parport_release``.  It cannot fail, but you should not release a
+device that you do not have possession of.
+
+EXAMPLE
+^^^^^^^
+
+::
+
+	static size_t write (struct pardevice *dev, const void *buf,
+			size_t len)
+	{
+		...
+		written = dev->port->ops->write_ecp_data (dev->port, buf,
+							len);
+		parport_release (dev);
+		...
+	}
+
+
+SEE ALSO
+^^^^^^^^
+
+change_mode, parport_claim, parport_claim_or_block, parport_yield
+
+
+
+parport_yield, parport_yield_blocking - temporarily release a parallel port
+---------------------------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	int parport_yield (struct pardevice *dev)
+	int parport_yield_blocking (struct pardevice *dev);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+When a driver has control of a parallel port, it may allow another
+driver to temporarily ``borrow`` it.  ``parport_yield`` does not block;
+``parport_yield_blocking`` may do.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+A return value of zero indicates that the caller still owns the port
+and the call did not block.
+
+A positive return value from ``parport_yield_blocking`` indicates that
+the caller still owns the port and the call blocked.
+
+A return value of -EAGAIN indicates that the caller no longer owns the
+port, and it must be re-claimed before use.
+
+ERRORS
+^^^^^^
+
+========= ==========================================================
+  -EAGAIN  Ownership of the parallel port was given away.
+========= ==========================================================
+
+SEE ALSO
+^^^^^^^^
+
+parport_release
+
+
+
+parport_wait_peripheral - wait for status lines, up to 35ms
+-----------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	int parport_wait_peripheral (struct parport *port,
+				     unsigned char mask,
+				     unsigned char val);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Wait for the status lines in mask to match the values in val.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+======== ==========================================================
+ -EINTR  a signal is pending
+      0  the status lines in mask have values in val
+      1  timed out while waiting (35ms elapsed)
+======== ==========================================================
+
+SEE ALSO
+^^^^^^^^
+
+parport_poll_peripheral
+
+
+
+parport_poll_peripheral - wait for status lines, in usec
+--------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	int parport_poll_peripheral (struct parport *port,
+				     unsigned char mask,
+				     unsigned char val,
+				     int usec);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Wait for the status lines in mask to match the values in val.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+======== ==========================================================
+ -EINTR  a signal is pending
+      0  the status lines in mask have values in val
+      1  timed out while waiting (usec microseconds have elapsed)
+======== ==========================================================
+
+SEE ALSO
+^^^^^^^^
+
+parport_wait_peripheral
+
+
+
+parport_wait_event - wait for an event on a port
+------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	int parport_wait_event (struct parport *port, signed long timeout)
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Wait for an event (e.g. interrupt) on a port.  The timeout is in
+jiffies.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+======= ==========================================================
+      0  success
+     <0  error (exit as soon as possible)
+     >0  timed out
+======= ==========================================================
+
+parport_negotiate - perform IEEE 1284 negotiation
+-------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	int parport_negotiate (struct parport *, int mode);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Perform IEEE 1284 negotiation.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+======= ==========================================================
+     0  handshake OK; IEEE 1284 peripheral and mode available
+    -1  handshake failed; peripheral not compliant (or none present)
+     1  handshake OK; IEEE 1284 peripheral present but mode not
+        available
+======= ==========================================================
+
+SEE ALSO
+^^^^^^^^
+
+parport_read, parport_write
+
+
+
+parport_read - read data from device
+------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	ssize_t parport_read (struct parport *, void *buf, size_t len);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Read data from device in current IEEE 1284 transfer mode.  This only
+works for modes that support reverse data transfer.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+If negative, an error code; otherwise the number of bytes transferred.
+
+SEE ALSO
+^^^^^^^^
+
+parport_write, parport_negotiate
+
+
+
+parport_write - write data to device
+------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	ssize_t parport_write (struct parport *, const void *buf, size_t len);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Write data to device in current IEEE 1284 transfer mode.  This only
+works for modes that support forward data transfer.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+If negative, an error code; otherwise the number of bytes transferred.
+
+SEE ALSO
+^^^^^^^^
+
+parport_read, parport_negotiate
+
+
+
+parport_open - register device for particular device number
+-----------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct pardevice *parport_open (int devnum, const char *name,
+				        int (*pf) (void *),
+					void (*kf) (void *),
+					void (*irqf) (int, void *,
+						      struct pt_regs *),
+					int flags, void *handle);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+This is like parport_register_device but takes a device number instead
+of a pointer to a struct parport.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+See parport_register_device.  If no device is associated with devnum,
+NULL is returned.
+
+SEE ALSO
+^^^^^^^^
+
+parport_register_device
+
+
+
+parport_close - unregister device for particular device number
+--------------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	void parport_close (struct pardevice *dev);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+This is the equivalent of parport_unregister_device for parport_open.
+
+SEE ALSO
+^^^^^^^^
+
+parport_unregister_device, parport_open
+
+
+
+parport_device_id - obtain IEEE 1284 Device ID
+----------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	ssize_t parport_device_id (int devnum, char *buffer, size_t len);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Obtains the IEEE 1284 Device ID associated with a given device.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+If negative, an error code; otherwise, the number of bytes of buffer
+that contain the device ID.  The format of the device ID is as
+follows::
+
+	[length][ID]
+
+The first two bytes indicate the inclusive length of the entire Device
+ID, and are in big-endian order.  The ID is a sequence of pairs of the
+form::
+
+	key:value;
+
+NOTES
+^^^^^
+
+Many devices have ill-formed IEEE 1284 Device IDs.
+
+SEE ALSO
+^^^^^^^^
+
+parport_find_class, parport_find_device
+
+
+
+parport_device_coords - convert device number to device coordinates
+-------------------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	int parport_device_coords (int devnum, int *parport, int *mux,
+				   int *daisy);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Convert between device number (zero-based) and device coordinates
+(port, multiplexor, daisy chain address).
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+Zero on success, in which case the coordinates are (``*parport``, ``*mux``,
+``*daisy``).
+
+SEE ALSO
+^^^^^^^^
+
+parport_open, parport_device_id
+
+
+
+parport_find_class - find a device by its class
+-----------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	typedef enum {
+		PARPORT_CLASS_LEGACY = 0,       /* Non-IEEE1284 device */
+		PARPORT_CLASS_PRINTER,
+		PARPORT_CLASS_MODEM,
+		PARPORT_CLASS_NET,
+		PARPORT_CLASS_HDC,              /* Hard disk controller */
+		PARPORT_CLASS_PCMCIA,
+		PARPORT_CLASS_MEDIA,            /* Multimedia device */
+		PARPORT_CLASS_FDC,              /* Floppy disk controller */
+		PARPORT_CLASS_PORTS,
+		PARPORT_CLASS_SCANNER,
+		PARPORT_CLASS_DIGCAM,
+		PARPORT_CLASS_OTHER,            /* Anything else */
+		PARPORT_CLASS_UNSPEC,           /* No CLS field in ID */
+		PARPORT_CLASS_SCSIADAPTER
+	} parport_device_class;
+
+	int parport_find_class (parport_device_class cls, int from);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Find a device by class.  The search starts from device number from+1.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+The device number of the next device in that class, or -1 if no such
+device exists.
+
+NOTES
+^^^^^
+
+Example usage::
+
+	int devnum = -1;
+	while ((devnum = parport_find_class (PARPORT_CLASS_DIGCAM, devnum)) != -1) {
+		struct pardevice *dev = parport_open (devnum, ...);
+		...
+	}
+
+SEE ALSO
+^^^^^^^^
+
+parport_find_device, parport_open, parport_device_id
+
+
+
+parport_find_device - find a device by its class
+------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	int parport_find_device (const char *mfg, const char *mdl, int from);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Find a device by vendor and model.  The search starts from device
+number from+1.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+The device number of the next device matching the specifications, or
+-1 if no such device exists.
+
+NOTES
+^^^^^
+
+Example usage::
+
+	int devnum = -1;
+	while ((devnum = parport_find_device ("IOMEGA", "ZIP+", devnum)) != -1) {
+		struct pardevice *dev = parport_open (devnum, ...);
+		...
+	}
+
+SEE ALSO
+^^^^^^^^
+
+parport_find_class, parport_open, parport_device_id
+
+
+
+parport_set_timeout - set the inactivity timeout
+------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	long parport_set_timeout (struct pardevice *dev, long inactivity);
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Set the inactivity timeout, in jiffies, for a registered device.  The
+previous timeout is returned.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+The previous timeout, in jiffies.
+
+NOTES
+^^^^^
+
+Some of the port->ops functions for a parport may take time, owing to
+delays at the peripheral.  After the peripheral has not responded for
+``inactivity`` jiffies, a timeout will occur and the blocking function
+will return.
+
+A timeout of 0 jiffies is a special case: the function must do as much
+as it can without blocking or leaving the hardware in an unknown
+state.  If port operations are performed from within an interrupt
+handler, for instance, a timeout of 0 jiffies should be used.
+
+Once set for a registered device, the timeout will remain at the set
+value until set again.
+
+SEE ALSO
+^^^^^^^^
+
+port->ops->xxx_read/write_yyy
+
+
+
+
+PORT FUNCTIONS
+==============
+
+The functions in the port->ops structure (struct parport_operations)
+are provided by the low-level driver responsible for that port.
+
+port->ops->read_data - read the data register
+---------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		unsigned char (*read_data) (struct parport *port);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+If port->modes contains the PARPORT_MODE_TRISTATE flag and the
+PARPORT_CONTROL_DIRECTION bit in the control register is set, this
+returns the value on the data pins.  If port->modes contains the
+PARPORT_MODE_TRISTATE flag and the PARPORT_CONTROL_DIRECTION bit is
+not set, the return value _may_ be the last value written to the data
+register.  Otherwise the return value is undefined.
+
+SEE ALSO
+^^^^^^^^
+
+write_data, read_status, write_control
+
+
+
+port->ops->write_data - write the data register
+-----------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		void (*write_data) (struct parport *port, unsigned char d);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Writes to the data register.  May have side-effects (a STROBE pulse,
+for instance).
+
+SEE ALSO
+^^^^^^^^
+
+read_data, read_status, write_control
+
+
+
+port->ops->read_status - read the status register
+-------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		unsigned char (*read_status) (struct parport *port);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Reads from the status register.  This is a bitmask:
+
+- PARPORT_STATUS_ERROR (printer fault, "nFault")
+- PARPORT_STATUS_SELECT (on-line, "Select")
+- PARPORT_STATUS_PAPEROUT (no paper, "PError")
+- PARPORT_STATUS_ACK (handshake, "nAck")
+- PARPORT_STATUS_BUSY (busy, "Busy")
+
+There may be other bits set.
+
+SEE ALSO
+^^^^^^^^
+
+read_data, write_data, write_control
+
+
+
+port->ops->read_control - read the control register
+---------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		unsigned char (*read_control) (struct parport *port);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Returns the last value written to the control register (either from
+write_control or frob_control).  No port access is performed.
+
+SEE ALSO
+^^^^^^^^
+
+read_data, write_data, read_status, write_control
+
+
+
+port->ops->write_control - write the control register
+-----------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		void (*write_control) (struct parport *port, unsigned char s);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Writes to the control register. This is a bitmask::
+
+				  _______
+	- PARPORT_CONTROL_STROBE (nStrobe)
+				  _______
+	- PARPORT_CONTROL_AUTOFD (nAutoFd)
+				_____
+	- PARPORT_CONTROL_INIT (nInit)
+				  _________
+	- PARPORT_CONTROL_SELECT (nSelectIn)
+
+SEE ALSO
+^^^^^^^^
+
+read_data, write_data, read_status, frob_control
+
+
+
+port->ops->frob_control - write control register bits
+-----------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		unsigned char (*frob_control) (struct parport *port,
+					unsigned char mask,
+					unsigned char val);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+This is equivalent to reading from the control register, masking out
+the bits in mask, exclusive-or'ing with the bits in val, and writing
+the result to the control register.
+
+As some ports don't allow reads from the control port, a software copy
+of its contents is maintained, so frob_control is in fact only one
+port access.
+
+SEE ALSO
+^^^^^^^^
+
+read_data, write_data, read_status, write_control
+
+
+
+port->ops->enable_irq - enable interrupt generation
+---------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		void (*enable_irq) (struct parport *port);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+The parallel port hardware is instructed to generate interrupts at
+appropriate moments, although those moments are
+architecture-specific.  For the PC architecture, interrupts are
+commonly generated on the rising edge of nAck.
+
+SEE ALSO
+^^^^^^^^
+
+disable_irq
+
+
+
+port->ops->disable_irq - disable interrupt generation
+-----------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		void (*disable_irq) (struct parport *port);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+The parallel port hardware is instructed not to generate interrupts.
+The interrupt itself is not masked.
+
+SEE ALSO
+^^^^^^^^
+
+enable_irq
+
+
+
+port->ops->data_forward - enable data drivers
+---------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		void (*data_forward) (struct parport *port);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Enables the data line drivers, for 8-bit host-to-peripheral
+communications.
+
+SEE ALSO
+^^^^^^^^
+
+data_reverse
+
+
+
+port->ops->data_reverse - tristate the buffer
+---------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		void (*data_reverse) (struct parport *port);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Places the data bus in a high impedance state, if port->modes has the
+PARPORT_MODE_TRISTATE bit set.
+
+SEE ALSO
+^^^^^^^^
+
+data_forward
+
+
+
+port->ops->epp_write_data - write EPP data
+------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*epp_write_data) (struct parport *port, const void *buf,
+					size_t len, int flags);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Writes data in EPP mode, and returns the number of bytes written.
+
+The ``flags`` parameter may be one or more of the following,
+bitwise-or'ed together:
+
+======================= =================================================
+PARPORT_EPP_FAST	Use fast transfers. Some chips provide 16-bit and
+			32-bit registers.  However, if a transfer
+			times out, the return value may be unreliable.
+======================= =================================================
+
+SEE ALSO
+^^^^^^^^
+
+epp_read_data, epp_write_addr, epp_read_addr
+
+
+
+port->ops->epp_read_data - read EPP data
+----------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*epp_read_data) (struct parport *port, void *buf,
+					size_t len, int flags);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Reads data in EPP mode, and returns the number of bytes read.
+
+The ``flags`` parameter may be one or more of the following,
+bitwise-or'ed together:
+
+======================= =================================================
+PARPORT_EPP_FAST	Use fast transfers. Some chips provide 16-bit and
+			32-bit registers.  However, if a transfer
+			times out, the return value may be unreliable.
+======================= =================================================
+
+SEE ALSO
+^^^^^^^^
+
+epp_write_data, epp_write_addr, epp_read_addr
+
+
+
+port->ops->epp_write_addr - write EPP address
+---------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*epp_write_addr) (struct parport *port,
+					const void *buf, size_t len, int flags);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Writes EPP addresses (8 bits each), and returns the number written.
+
+The ``flags`` parameter may be one or more of the following,
+bitwise-or'ed together:
+
+======================= =================================================
+PARPORT_EPP_FAST	Use fast transfers. Some chips provide 16-bit and
+			32-bit registers.  However, if a transfer
+			times out, the return value may be unreliable.
+======================= =================================================
+
+(Does PARPORT_EPP_FAST make sense for this function?)
+
+SEE ALSO
+^^^^^^^^
+
+epp_write_data, epp_read_data, epp_read_addr
+
+
+
+port->ops->epp_read_addr - read EPP address
+-------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*epp_read_addr) (struct parport *port, void *buf,
+					size_t len, int flags);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Reads EPP addresses (8 bits each), and returns the number read.
+
+The ``flags`` parameter may be one or more of the following,
+bitwise-or'ed together:
+
+======================= =================================================
+PARPORT_EPP_FAST	Use fast transfers. Some chips provide 16-bit and
+			32-bit registers.  However, if a transfer
+			times out, the return value may be unreliable.
+======================= =================================================
+
+(Does PARPORT_EPP_FAST make sense for this function?)
+
+SEE ALSO
+^^^^^^^^
+
+epp_write_data, epp_read_data, epp_write_addr
+
+
+
+port->ops->ecp_write_data - write a block of ECP data
+-----------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*ecp_write_data) (struct parport *port,
+					const void *buf, size_t len, int flags);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Writes a block of ECP data.  The ``flags`` parameter is ignored.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+The number of bytes written.
+
+SEE ALSO
+^^^^^^^^
+
+ecp_read_data, ecp_write_addr
+
+
+
+port->ops->ecp_read_data - read a block of ECP data
+---------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*ecp_read_data) (struct parport *port,
+					void *buf, size_t len, int flags);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Reads a block of ECP data.  The ``flags`` parameter is ignored.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+The number of bytes read.  NB. There may be more unread data in a
+FIFO.  Is there a way of stunning the FIFO to prevent this?
+
+SEE ALSO
+^^^^^^^^
+
+ecp_write_block, ecp_write_addr
+
+
+
+port->ops->ecp_write_addr - write a block of ECP addresses
+----------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*ecp_write_addr) (struct parport *port,
+					const void *buf, size_t len, int flags);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Writes a block of ECP addresses.  The ``flags`` parameter is ignored.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+The number of bytes written.
+
+NOTES
+^^^^^
+
+This may use a FIFO, and if so shall not return until the FIFO is empty.
+
+SEE ALSO
+^^^^^^^^
+
+ecp_read_data, ecp_write_data
+
+
+
+port->ops->nibble_read_data - read a block of data in nibble mode
+-----------------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*nibble_read_data) (struct parport *port,
+					void *buf, size_t len, int flags);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Reads a block of data in nibble mode.  The ``flags`` parameter is ignored.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+The number of whole bytes read.
+
+SEE ALSO
+^^^^^^^^
+
+byte_read_data, compat_write_data
+
+
+
+port->ops->byte_read_data - read a block of data in byte mode
+-------------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*byte_read_data) (struct parport *port,
+					void *buf, size_t len, int flags);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Reads a block of data in byte mode.  The ``flags`` parameter is ignored.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+The number of bytes read.
+
+SEE ALSO
+^^^^^^^^
+
+nibble_read_data, compat_write_data
+
+
+
+port->ops->compat_write_data - write a block of data in compatibility mode
+--------------------------------------------------------------------------
+
+SYNOPSIS
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*compat_write_data) (struct parport *port,
+					const void *buf, size_t len, int flags);
+		...
+	};
+
+DESCRIPTION
+^^^^^^^^^^^
+
+Writes a block of data in compatibility mode.  The ``flags`` parameter
+is ignored.
+
+RETURN VALUE
+^^^^^^^^^^^^
+
+The number of bytes written.
+
+SEE ALSO
+^^^^^^^^
+
+nibble_read_data, byte_read_data
diff --git a/Documentation/driver-api/pti_intel_mid.rst b/Documentation/driver-api/pti_intel_mid.rst
new file mode 100644
index 000000000000..20f1cff42d5f
--- /dev/null
+++ b/Documentation/driver-api/pti_intel_mid.rst
@@ -0,0 +1,106 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Intel MID PTI
+=============
+
+The Intel MID PTI project is HW implemented in Intel Atom
+system-on-a-chip designs based on the Parallel Trace
+Interface for MIPI P1149.7 cJTAG standard.  The kernel solution
+for this platform involves the following files::
+
+	./include/linux/pti.h
+	./drivers/.../n_tracesink.h
+	./drivers/.../n_tracerouter.c
+	./drivers/.../n_tracesink.c
+	./drivers/.../pti.c
+
+pti.c is the driver that enables various debugging features
+popular on platforms from certain mobile manufacturers.
+n_tracerouter.c and n_tracesink.c allow extra system information to
+be collected and routed to the pti driver, such as trace
+debugging data from a modem.  Although n_tracerouter
+and n_tracesink are a part of the complete PTI solution,
+these two line disciplines can work separately from
+pti.c and route any data stream from one /dev/tty node
+to another /dev/tty node via kernel-space.  This provides
+a stable, reliable connection that will not break unless
+the user-space application shuts down (plus avoids
+kernel->user->kernel context switch overheads of routing
+data).
+
+An example debugging usage for this driver system:
+
+  * Hook /dev/ttyPTI0 to syslogd.  Opening this port will also start
+    a console device to further capture debugging messages to PTI.
+  * Hook /dev/ttyPTI1 to modem debugging data to write to PTI HW.
+    This is where n_tracerouter and n_tracesink are used.
+  * Hook /dev/pti to a user-level debugging application for writing
+    to PTI HW.
+  * `Use mipi_` Kernel Driver API in other device drivers for
+    debugging to PTI by first requesting a PTI write address via
+    mipi_request_masterchannel(1).
+
+Below is example pseudo-code on how a 'privileged' application
+can hook up n_tracerouter and n_tracesink to any tty on
+a system.  'Privileged' means the application has enough
+privileges to successfully manipulate the ldisc drivers
+but is not just blindly executing as 'root'. Keep in mind
+the use of ioctl(,TIOCSETD,) is not specific to the n_tracerouter
+and n_tracesink line discpline drivers but is a generic
+operation for a program to use a line discpline driver
+on a tty port other than the default n_tty::
+
+  /////////// To hook up n_tracerouter and n_tracesink /////////
+
+  // Note that n_tracerouter depends on n_tracesink.
+  #include <errno.h>
+  #define ONE_TTY "/dev/ttyOne"
+  #define TWO_TTY "/dev/ttyTwo"
+
+  // needed global to hand onto ldisc connection
+  static int g_fd_source = -1;
+  static int g_fd_sink  = -1;
+
+  // these two vars used to grab LDISC values from loaded ldisc drivers
+  // in OS.  Look at /proc/tty/ldiscs to get the right numbers from
+  // the ldiscs loaded in the system.
+  int source_ldisc_num, sink_ldisc_num = -1;
+  int retval;
+
+  g_fd_source = open(ONE_TTY, O_RDWR); // must be R/W
+  g_fd_sink   = open(TWO_TTY, O_RDWR); // must be R/W
+
+  if (g_fd_source <= 0) || (g_fd_sink <= 0) {
+     // doubt you'll want to use these exact error lines of code
+     printf("Error on open(). errno: %d\n",errno);
+     return errno;
+  }
+
+  retval = ioctl(g_fd_sink, TIOCSETD, &sink_ldisc_num);
+  if (retval < 0) {
+     printf("Error on ioctl().  errno: %d\n", errno);
+     return errno;
+  }
+
+  retval = ioctl(g_fd_source, TIOCSETD, &source_ldisc_num);
+  if (retval < 0) {
+     printf("Error on ioctl().  errno: %d\n", errno);
+     return errno;
+  }
+
+  /////////// To disconnect n_tracerouter and n_tracesink ////////
+
+  // First make sure data through the ldiscs has stopped.
+
+  // Second, disconnect ldiscs.  This provides a
+  // little cleaner shutdown on tty stack.
+  sink_ldisc_num = 0;
+  source_ldisc_num = 0;
+  ioctl(g_fd_uart, TIOCSETD, &sink_ldisc_num);
+  ioctl(g_fd_gadget, TIOCSETD, &source_ldisc_num);
+
+  // Three, program closes connection, and cleanup:
+  close(g_fd_uart);
+  close(g_fd_gadget);
+  g_fd_uart = g_fd_gadget = NULL;
diff --git a/Documentation/driver-api/pwm.rst b/Documentation/driver-api/pwm.rst
new file mode 100644
index 000000000000..ab62f1bb0366
--- /dev/null
+++ b/Documentation/driver-api/pwm.rst
@@ -0,0 +1,165 @@
+======================================
+Pulse Width Modulation (PWM) interface
+======================================
+
+This provides an overview about the Linux PWM interface
+
+PWMs are commonly used for controlling LEDs, fans or vibrators in
+cell phones. PWMs with a fixed purpose have no need implementing
+the Linux PWM API (although they could). However, PWMs are often
+found as discrete devices on SoCs which have no fixed purpose. It's
+up to the board designer to connect them to LEDs or fans. To provide
+this kind of flexibility the generic PWM API exists.
+
+Identifying PWMs
+----------------
+
+Users of the legacy PWM API use unique IDs to refer to PWM devices.
+
+Instead of referring to a PWM device via its unique ID, board setup code
+should instead register a static mapping that can be used to match PWM
+consumers to providers, as given in the following example::
+
+	static struct pwm_lookup board_pwm_lookup[] = {
+		PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL,
+			   50000, PWM_POLARITY_NORMAL),
+	};
+
+	static void __init board_init(void)
+	{
+		...
+		pwm_add_table(board_pwm_lookup, ARRAY_SIZE(board_pwm_lookup));
+		...
+	}
+
+Using PWMs
+----------
+
+Legacy users can request a PWM device using pwm_request() and free it
+after usage with pwm_free().
+
+New users should use the pwm_get() function and pass to it the consumer
+device or a consumer name. pwm_put() is used to free the PWM device. Managed
+variants of these functions, devm_pwm_get() and devm_pwm_put(), also exist.
+
+After being requested, a PWM has to be configured using::
+
+	int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state);
+
+This API controls both the PWM period/duty_cycle config and the
+enable/disable state.
+
+The pwm_config(), pwm_enable() and pwm_disable() functions are just wrappers
+around pwm_apply_state() and should not be used if the user wants to change
+several parameter at once. For example, if you see pwm_config() and
+pwm_{enable,disable}() calls in the same function, this probably means you
+should switch to pwm_apply_state().
+
+The PWM user API also allows one to query the PWM state with pwm_get_state().
+
+In addition to the PWM state, the PWM API also exposes PWM arguments, which
+are the reference PWM config one should use on this PWM.
+PWM arguments are usually platform-specific and allows the PWM user to only
+care about dutycycle relatively to the full period (like, duty = 50% of the
+period). struct pwm_args contains 2 fields (period and polarity) and should
+be used to set the initial PWM config (usually done in the probe function
+of the PWM user). PWM arguments are retrieved with pwm_get_args().
+
+All consumers should really be reconfiguring the PWM upon resume as
+appropriate. This is the only way to ensure that everything is resumed in
+the proper order.
+
+Using PWMs with the sysfs interface
+-----------------------------------
+
+If CONFIG_SYSFS is enabled in your kernel configuration a simple sysfs
+interface is provided to use the PWMs from userspace. It is exposed at
+/sys/class/pwm/. Each probed PWM controller/chip will be exported as
+pwmchipN, where N is the base of the PWM chip. Inside the directory you
+will find:
+
+  npwm
+    The number of PWM channels this chip supports (read-only).
+
+  export
+    Exports a PWM channel for use with sysfs (write-only).
+
+  unexport
+   Unexports a PWM channel from sysfs (write-only).
+
+The PWM channels are numbered using a per-chip index from 0 to npwm-1.
+
+When a PWM channel is exported a pwmX directory will be created in the
+pwmchipN directory it is associated with, where X is the number of the
+channel that was exported. The following properties will then be available:
+
+  period
+    The total period of the PWM signal (read/write).
+    Value is in nanoseconds and is the sum of the active and inactive
+    time of the PWM.
+
+  duty_cycle
+    The active time of the PWM signal (read/write).
+    Value is in nanoseconds and must be less than the period.
+
+  polarity
+    Changes the polarity of the PWM signal (read/write).
+    Writes to this property only work if the PWM chip supports changing
+    the polarity. The polarity can only be changed if the PWM is not
+    enabled. Value is the string "normal" or "inversed".
+
+  enable
+    Enable/disable the PWM signal (read/write).
+
+	- 0 - disabled
+	- 1 - enabled
+
+Implementing a PWM driver
+-------------------------
+
+Currently there are two ways to implement pwm drivers. Traditionally
+there only has been the barebone API meaning that each driver has
+to implement the pwm_*() functions itself. This means that it's impossible
+to have multiple PWM drivers in the system. For this reason it's mandatory
+for new drivers to use the generic PWM framework.
+
+A new PWM controller/chip can be added using pwmchip_add() and removed
+again with pwmchip_remove(). pwmchip_add() takes a filled in struct
+pwm_chip as argument which provides a description of the PWM chip, the
+number of PWM devices provided by the chip and the chip-specific
+implementation of the supported PWM operations to the framework.
+
+When implementing polarity support in a PWM driver, make sure to respect the
+signal conventions in the PWM framework. By definition, normal polarity
+characterizes a signal starts high for the duration of the duty cycle and
+goes low for the remainder of the period. Conversely, a signal with inversed
+polarity starts low for the duration of the duty cycle and goes high for the
+remainder of the period.
+
+Drivers are encouraged to implement ->apply() instead of the legacy
+->enable(), ->disable() and ->config() methods. Doing that should provide
+atomicity in the PWM config workflow, which is required when the PWM controls
+a critical device (like a regulator).
+
+The implementation of ->get_state() (a method used to retrieve initial PWM
+state) is also encouraged for the same reason: letting the PWM user know
+about the current PWM state would allow him to avoid glitches.
+
+Drivers should not implement any power management. In other words,
+consumers should implement it as described in the "Using PWMs" section.
+
+Locking
+-------
+
+The PWM core list manipulations are protected by a mutex, so pwm_request()
+and pwm_free() may not be called from an atomic context. Currently the
+PWM core does not enforce any locking to pwm_enable(), pwm_disable() and
+pwm_config(), so the calling context is currently driver specific. This
+is an issue derived from the former barebone API and should be fixed soon.
+
+Helpers
+-------
+
+Currently a PWM can only be configured with period_ns and duty_ns. For several
+use cases freq_hz and duty_percent might be better. Instead of calculating
+this in your driver please consider adding appropriate helpers to the framework.
diff --git a/Documentation/driver-api/rfkill.rst b/Documentation/driver-api/rfkill.rst
new file mode 100644
index 000000000000..7d3684e81df6
--- /dev/null
+++ b/Documentation/driver-api/rfkill.rst
@@ -0,0 +1,132 @@
+===============================
+rfkill - RF kill switch support
+===============================
+
+
+.. contents::
+   :depth: 2
+
+Introduction
+============
+
+The rfkill subsystem provides a generic interface for disabling any radio
+transmitter in the system. When a transmitter is blocked, it shall not
+radiate any power.
+
+The subsystem also provides the ability to react on button presses and
+disable all transmitters of a certain type (or all). This is intended for
+situations where transmitters need to be turned off, for example on
+aircraft.
+
+The rfkill subsystem has a concept of "hard" and "soft" block, which
+differ little in their meaning (block == transmitters off) but rather in
+whether they can be changed or not:
+
+ - hard block
+	read-only radio block that cannot be overridden by software
+
+ - soft block
+	writable radio block (need not be readable) that is set by
+        the system software.
+
+The rfkill subsystem has two parameters, rfkill.default_state and
+rfkill.master_switch_mode, which are documented in
+admin-guide/kernel-parameters.rst.
+
+
+Implementation details
+======================
+
+The rfkill subsystem is composed of three main components:
+
+ * the rfkill core,
+ * the deprecated rfkill-input module (an input layer handler, being
+   replaced by userspace policy code) and
+ * the rfkill drivers.
+
+The rfkill core provides API for kernel drivers to register their radio
+transmitter with the kernel, methods for turning it on and off, and letting
+the system know about hardware-disabled states that may be implemented on
+the device.
+
+The rfkill core code also notifies userspace of state changes, and provides
+ways for userspace to query the current states. See the "Userspace support"
+section below.
+
+When the device is hard-blocked (either by a call to rfkill_set_hw_state()
+or from query_hw_block), set_block() will be invoked for additional software
+block, but drivers can ignore the method call since they can use the return
+value of the function rfkill_set_hw_state() to sync the software state
+instead of keeping track of calls to set_block(). In fact, drivers should
+use the return value of rfkill_set_hw_state() unless the hardware actually
+keeps track of soft and hard block separately.
+
+
+Kernel API
+==========
+
+Drivers for radio transmitters normally implement an rfkill driver.
+
+Platform drivers might implement input devices if the rfkill button is just
+that, a button. If that button influences the hardware then you need to
+implement an rfkill driver instead. This also applies if the platform provides
+a way to turn on/off the transmitter(s).
+
+For some platforms, it is possible that the hardware state changes during
+suspend/hibernation, in which case it will be necessary to update the rfkill
+core with the current state at resume time.
+
+To create an rfkill driver, driver's Kconfig needs to have::
+
+	depends on RFKILL || !RFKILL
+
+to ensure the driver cannot be built-in when rfkill is modular. The !RFKILL
+case allows the driver to be built when rfkill is not configured, in which
+case all rfkill API can still be used but will be provided by static inlines
+which compile to almost nothing.
+
+Calling rfkill_set_hw_state() when a state change happens is required from
+rfkill drivers that control devices that can be hard-blocked unless they also
+assign the poll_hw_block() callback (then the rfkill core will poll the
+device). Don't do this unless you cannot get the event in any other way.
+
+rfkill provides per-switch LED triggers, which can be used to drive LEDs
+according to the switch state (LED_FULL when blocked, LED_OFF otherwise).
+
+
+Userspace support
+=================
+
+The recommended userspace interface to use is /dev/rfkill, which is a misc
+character device that allows userspace to obtain and set the state of rfkill
+devices and sets of devices. It also notifies userspace about device addition
+and removal. The API is a simple read/write API that is defined in
+linux/rfkill.h, with one ioctl that allows turning off the deprecated input
+handler in the kernel for the transition period.
+
+Except for the one ioctl, communication with the kernel is done via read()
+and write() of instances of 'struct rfkill_event'. In this structure, the
+soft and hard block are properly separated (unlike sysfs, see below) and
+userspace is able to get a consistent snapshot of all rfkill devices in the
+system. Also, it is possible to switch all rfkill drivers (or all drivers of
+a specified type) into a state which also updates the default state for
+hotplugged devices.
+
+After an application opens /dev/rfkill, it can read the current state of all
+devices. Changes can be obtained by either polling the descriptor for
+hotplug or state change events or by listening for uevents emitted by the
+rfkill core framework.
+
+Additionally, each rfkill device is registered in sysfs and emits uevents.
+
+rfkill devices issue uevents (with an action of "change"), with the following
+environment variables set::
+
+	RFKILL_NAME
+	RFKILL_STATE
+	RFKILL_TYPE
+
+The content of these variables corresponds to the "name", "state" and
+"type" sysfs files explained above.
+
+For further details consult Documentation/ABI/stable/sysfs-class-rfkill.
diff --git a/Documentation/driver-api/sgi-ioc4.rst b/Documentation/driver-api/sgi-ioc4.rst
new file mode 100644
index 000000000000..72709222d3c0
--- /dev/null
+++ b/Documentation/driver-api/sgi-ioc4.rst
@@ -0,0 +1,49 @@
+====================================
+SGI IOC4 PCI (multi function) device
+====================================
+
+The SGI IOC4 PCI device is a bit of a strange beast, so some notes on
+it are in order.
+
+First, even though the IOC4 performs multiple functions, such as an
+IDE controller, a serial controller, a PS/2 keyboard/mouse controller,
+and an external interrupt mechanism, it's not implemented as a
+multifunction device.  The consequence of this from a software
+standpoint is that all these functions share a single IRQ, and
+they can't all register to own the same PCI device ID.  To make
+matters a bit worse, some of the register blocks (and even registers
+themselves) present in IOC4 are mixed-purpose between these several
+functions, meaning that there's no clear "owning" device driver.
+
+The solution is to organize the IOC4 driver into several independent
+drivers, "ioc4", "sgiioc4", and "ioc4_serial".  Note that there is no
+PS/2 controller driver as this functionality has never been wired up
+on a shipping IO card.
+
+ioc4
+====
+This is the core (or shim) driver for IOC4.  It is responsible for
+initializing the basic functionality of the chip, and allocating
+the PCI resources that are shared between the IOC4 functions.
+
+This driver also provides registration functions that the other
+IOC4 drivers can call to make their presence known.  Each driver
+needs to provide a probe and remove function, which are invoked
+by the core driver at appropriate times.  The interface of these
+IOC4 function probe and remove operations isn't precisely the same
+as PCI device probe and remove operations, but is logically the
+same operation.
+
+sgiioc4
+=======
+This is the IDE driver for IOC4.  Its name isn't very descriptive
+simply for historical reasons (it used to be the only IOC4 driver
+component).  There's not much to say about it other than it hooks
+up to the ioc4 driver via the appropriate registration, probe, and
+remove functions.
+
+ioc4_serial
+===========
+This is the serial driver for IOC4.  There's not much to say about it
+other than it hooks up to the ioc4 driver via the appropriate registration,
+probe, and remove functions.
diff --git a/Documentation/driver-api/sm501.rst b/Documentation/driver-api/sm501.rst
new file mode 100644
index 000000000000..882507453ba4
--- /dev/null
+++ b/Documentation/driver-api/sm501.rst
@@ -0,0 +1,74 @@
+.. include:: <isonum.txt>
+
+============
+SM501 Driver
+============
+
+:Copyright: |copy| 2006, 2007 Simtec Electronics
+
+The Silicon Motion SM501 multimedia companion chip is a multifunction device
+which may provide numerous interfaces including USB host controller USB gadget,
+asynchronous serial ports, audio functions, and a dual display video interface.
+The device may be connected by PCI or local bus with varying functions enabled.
+
+Core
+----
+
+The core driver in drivers/mfd provides common services for the
+drivers which manage the specific hardware blocks. These services
+include locking for common registers, clock control and resource
+management.
+
+The core registers drivers for both PCI and generic bus based
+chips via the platform device and driver system.
+
+On detection of a device, the core initialises the chip (which may
+be specified by the platform data) and then exports the selected
+peripheral set as platform devices for the specific drivers.
+
+The core re-uses the platform device system as the platform device
+system provides enough features to support the drivers without the
+need to create a new bus-type and the associated code to go with it.
+
+
+Resources
+---------
+
+Each peripheral has a view of the device which is implicitly narrowed to
+the specific set of resources that peripheral requires in order to
+function correctly.
+
+The centralised memory allocation allows the driver to ensure that the
+maximum possible resource allocation can be made to the video subsystem
+as this is by-far the most resource-sensitive of the on-chip functions.
+
+The primary issue with memory allocation is that of moving the video
+buffers once a display mode is chosen. Indeed when a video mode change
+occurs the memory footprint of the video subsystem changes.
+
+Since video memory is difficult to move without changing the display
+(unless sufficient contiguous memory can be provided for the old and new
+modes simultaneously) the video driver fully utilises the memory area
+given to it by aligning fb0 to the start of the area and fb1 to the end
+of it. Any memory left over in the middle is used for the acceleration
+functions, which are transient and thus their location is less critical
+as it can be moved.
+
+
+Configuration
+-------------
+
+The platform device driver uses a set of platform data to pass
+configurations through to the core and the subsidiary drivers
+so that there can be support for more than one system carrying
+an SM501 built into a single kernel image.
+
+The PCI driver assumes that the PCI card behaves as per the Silicon
+Motion reference design.
+
+There is an errata (AB-5) affecting the selection of the
+of the M1XCLK and M1CLK frequencies. These two clocks
+must be sourced from the same PLL, although they can then
+be divided down individually. If this is not set, then SM501 may
+lock and hang the whole system. The driver will refuse to
+attach if the PLL selection is different.
diff --git a/Documentation/driver-api/smsc_ece1099.rst b/Documentation/driver-api/smsc_ece1099.rst
new file mode 100644
index 000000000000..079277421eaf
--- /dev/null
+++ b/Documentation/driver-api/smsc_ece1099.rst
@@ -0,0 +1,60 @@
+=================================================
+Msc Keyboard Scan Expansion/GPIO Expansion device
+=================================================
+
+What is smsc-ece1099?
+----------------------
+
+The ECE1099 is a 40-Pin 3.3V Keyboard Scan Expansion
+or GPIO Expansion device. The device supports a keyboard
+scan matrix of 23x8. The device is connected to a Master
+via the SMSC BC-Link interface or via the SMBus.
+Keypad scan Input(KSI) and Keypad Scan Output(KSO) signals
+are multiplexed with GPIOs.
+
+Interrupt generation
+--------------------
+
+Interrupts can be generated by an edge detection on a GPIO
+pin or an edge detection on one of the bus interface pins.
+Interrupts can also be detected on the keyboard scan interface.
+The bus interrupt pin (BC_INT# or SMBUS_INT#) is asserted if
+any bit in one of the Interrupt Status registers is 1 and
+the corresponding Interrupt Mask bit is also 1.
+
+In order for software to determine which device is the source
+of an interrupt, it should first read the Group Interrupt Status Register
+to determine which Status register group is a source for the interrupt.
+Software should read both the Status register and the associated Mask register,
+then AND the two values together. Bits that are 1 in the result of the AND
+are active interrupts. Software clears an interrupt by writing a 1 to the
+corresponding bit in the Status register.
+
+Communication Protocol
+----------------------
+
+- SMbus slave Interface
+	The host processor communicates with the ECE1099 device
+	through a series of read/write registers via the SMBus
+	interface. SMBus is a serial communication protocol between
+	a computer host and its peripheral devices. The SMBus data
+	rate is 10KHz minimum to 400 KHz maximum
+
+- Slave Bus Interface
+	The ECE1099 device SMBus implementation is a subset of the
+	SMBus interface to the host. The device is a slave-only SMBus device.
+	The implementation in the device is a subset of SMBus since it
+	only supports four protocols.
+
+	The Write Byte, Read Byte, Send Byte, and Receive Byte protocols are the
+	only valid SMBus protocols for the device.
+
+- BC-LinkTM Interface
+	The BC-Link is a proprietary bus that allows communication
+	between a Master device and a Companion device. The Master
+	device uses this serial bus to read and write registers
+	located on the Companion device. The bus comprises three signals,
+	BC_CLK, BC_DAT and BC_INT#. The Master device always provides the
+	clock, BC_CLK, and the Companion device is the source for an
+	independent asynchronous interrupt signal, BC_INT#. The ECE1099
+	supports BC-Link speeds up to 24MHz.
diff --git a/Documentation/driver-api/switchtec.rst b/Documentation/driver-api/switchtec.rst
new file mode 100644
index 000000000000..7611fdc53e19
--- /dev/null
+++ b/Documentation/driver-api/switchtec.rst
@@ -0,0 +1,102 @@
+========================
+Linux Switchtec Support
+========================
+
+Microsemi's "Switchtec" line of PCI switch devices is already
+supported by the kernel with standard PCI switch drivers. However, the
+Switchtec device advertises a special management endpoint which
+enables some additional functionality. This includes:
+
+* Packet and Byte Counters
+* Firmware Upgrades
+* Event and Error logs
+* Querying port link status
+* Custom user firmware commands
+
+The switchtec kernel module implements this functionality.
+
+
+Interface
+=========
+
+The primary means of communicating with the Switchtec management firmware is
+through the Memory-mapped Remote Procedure Call (MRPC) interface.
+Commands are submitted to the interface with a 4-byte command
+identifier and up to 1KB of command specific data. The firmware will
+respond with a 4-byte return code and up to 1KB of command-specific
+data. The interface only processes a single command at a time.
+
+
+Userspace Interface
+===================
+
+The MRPC interface will be exposed to userspace through a simple char
+device: /dev/switchtec#, one for each management endpoint in the system.
+
+The char device has the following semantics:
+
+* A write must consist of at least 4 bytes and no more than 1028 bytes.
+  The first 4 bytes will be interpreted as the Command ID and the
+  remainder will be used as the input data. A write will send the
+  command to the firmware to begin processing.
+
+* Each write must be followed by exactly one read. Any double write will
+  produce an error and any read that doesn't follow a write will
+  produce an error.
+
+* A read will block until the firmware completes the command and return
+  the 4-byte Command Return Value plus up to 1024 bytes of output
+  data. (The length will be specified by the size parameter of the read
+  call -- reading less than 4 bytes will produce an error.)
+
+* The poll call will also be supported for userspace applications that
+  need to do other things while waiting for the command to complete.
+
+The following IOCTLs are also supported by the device:
+
+* SWITCHTEC_IOCTL_FLASH_INFO - Retrieve firmware length and number
+  of partitions in the device.
+
+* SWITCHTEC_IOCTL_FLASH_PART_INFO - Retrieve address and lengeth for
+  any specified partition in flash.
+
+* SWITCHTEC_IOCTL_EVENT_SUMMARY - Read a structure of bitmaps
+  indicating all uncleared events.
+
+* SWITCHTEC_IOCTL_EVENT_CTL - Get the current count, clear and set flags
+  for any event. This ioctl takes in a switchtec_ioctl_event_ctl struct
+  with the event_id, index and flags set (index being the partition or PFF
+  number for non-global events). It returns whether the event has
+  occurred, the number of times and any event specific data. The flags
+  can be used to clear the count or enable and disable actions to
+  happen when the event occurs.
+  By using the SWITCHTEC_IOCTL_EVENT_FLAG_EN_POLL flag,
+  you can set an event to trigger a poll command to return with
+  POLLPRI. In this way, userspace can wait for events to occur.
+
+* SWITCHTEC_IOCTL_PFF_TO_PORT and SWITCHTEC_IOCTL_PORT_TO_PFF convert
+  between PCI Function Framework number (used by the event system)
+  and Switchtec Logic Port ID and Partition number (which is more
+  user friendly).
+
+
+Non-Transparent Bridge (NTB) Driver
+===================================
+
+An NTB hardware driver is provided for the Switchtec hardware in
+ntb_hw_switchtec. Currently, it only supports switches configured with
+exactly 2 NT partitions and zero or more non-NT partitions. It also requires
+the following configuration settings:
+
+* Both NT partitions must be able to access each other's GAS spaces.
+  Thus, the bits in the GAS Access Vector under Management Settings
+  must be set to support this.
+* Kernel configuration MUST include support for NTB (CONFIG_NTB needs
+  to be set)
+
+NT EP BAR 2 will be dynamically configured as a Direct Window, and
+the configuration file does not need to configure it explicitly.
+
+Please refer to Documentation/driver-api/ntb.rst in Linux source tree for an overall
+understanding of the Linux NTB stack. ntb_hw_switchtec works as an NTB
+Hardware Driver in this stack.
diff --git a/Documentation/driver-api/sync_file.rst b/Documentation/driver-api/sync_file.rst
new file mode 100644
index 000000000000..496fb2c3b3e6
--- /dev/null
+++ b/Documentation/driver-api/sync_file.rst
@@ -0,0 +1,86 @@
+===================
+Sync File API Guide
+===================
+
+:Author: Gustavo Padovan <gustavo at padovan dot org>
+
+This document serves as a guide for device drivers writers on what the
+sync_file API is, and how drivers can support it. Sync file is the carrier of
+the fences(struct dma_fence) that are needed to synchronize between drivers or
+across process boundaries.
+
+The sync_file API is meant to be used to send and receive fence information
+to/from userspace. It enables userspace to do explicit fencing, where instead
+of attaching a fence to the buffer a producer driver (such as a GPU or V4L
+driver) sends the fence related to the buffer to userspace via a sync_file.
+
+The sync_file then can be sent to the consumer (DRM driver for example), that
+will not use the buffer for anything before the fence(s) signals, i.e., the
+driver that issued the fence is not using/processing the buffer anymore, so it
+signals that the buffer is ready to use. And vice-versa for the consumer ->
+producer part of the cycle.
+
+Sync files allows userspace awareness on buffer sharing synchronization between
+drivers.
+
+Sync file was originally added in the Android kernel but current Linux Desktop
+can benefit a lot from it.
+
+in-fences and out-fences
+------------------------
+
+Sync files can go either to or from userspace. When a sync_file is sent from
+the driver to userspace we call the fences it contains 'out-fences'. They are
+related to a buffer that the driver is processing or is going to process, so
+the driver creates an out-fence to be able to notify, through
+dma_fence_signal(), when it has finished using (or processing) that buffer.
+Out-fences are fences that the driver creates.
+
+On the other hand if the driver receives fence(s) through a sync_file from
+userspace we call these fence(s) 'in-fences'. Receiving in-fences means that
+we need to wait for the fence(s) to signal before using any buffer related to
+the in-fences.
+
+Creating Sync Files
+-------------------
+
+When a driver needs to send an out-fence userspace it creates a sync_file.
+
+Interface::
+
+	struct sync_file *sync_file_create(struct dma_fence *fence);
+
+The caller pass the out-fence and gets back the sync_file. That is just the
+first step, next it needs to install an fd on sync_file->file. So it gets an
+fd::
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+
+and installs it on sync_file->file::
+
+	fd_install(fd, sync_file->file);
+
+The sync_file fd now can be sent to userspace.
+
+If the creation process fail, or the sync_file needs to be released by any
+other reason fput(sync_file->file) should be used.
+
+Receiving Sync Files from Userspace
+-----------------------------------
+
+When userspace needs to send an in-fence to the driver it passes file descriptor
+of the Sync File to the kernel. The kernel can then retrieve the fences
+from it.
+
+Interface::
+
+	struct dma_fence *sync_file_get_fence(int fd);
+
+
+The returned reference is owned by the caller and must be disposed of
+afterwards using dma_fence_put(). In case of error, a NULL is returned instead.
+
+References:
+
+1. struct sync_file in include/linux/sync_file.h
+2. All interfaces mentioned above defined in include/linux/sync_file.h
diff --git a/Documentation/driver-api/vfio-mediated-device.rst b/Documentation/driver-api/vfio-mediated-device.rst
new file mode 100644
index 000000000000..25eb7d5b834b
--- /dev/null
+++ b/Documentation/driver-api/vfio-mediated-device.rst
@@ -0,0 +1,414 @@
+.. include:: <isonum.txt>
+
+=====================
+VFIO Mediated devices
+=====================
+
+:Copyright: |copy| 2016, NVIDIA CORPORATION. All rights reserved.
+:Author: Neo Jia <cjia@nvidia.com>
+:Author: Kirti Wankhede <kwankhede@nvidia.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License version 2 as
+published by the Free Software Foundation.
+
+
+Virtual Function I/O (VFIO) Mediated devices[1]
+===============================================
+
+The number of use cases for virtualizing DMA devices that do not have built-in
+SR_IOV capability is increasing. Previously, to virtualize such devices,
+developers had to create their own management interfaces and APIs, and then
+integrate them with user space software. To simplify integration with user space
+software, we have identified common requirements and a unified management
+interface for such devices.
+
+The VFIO driver framework provides unified APIs for direct device access. It is
+an IOMMU/device-agnostic framework for exposing direct device access to user
+space in a secure, IOMMU-protected environment. This framework is used for
+multiple devices, such as GPUs, network adapters, and compute accelerators. With
+direct device access, virtual machines or user space applications have direct
+access to the physical device. This framework is reused for mediated devices.
+
+The mediated core driver provides a common interface for mediated device
+management that can be used by drivers of different devices. This module
+provides a generic interface to perform these operations:
+
+* Create and destroy a mediated device
+* Add a mediated device to and remove it from a mediated bus driver
+* Add a mediated device to and remove it from an IOMMU group
+
+The mediated core driver also provides an interface to register a bus driver.
+For example, the mediated VFIO mdev driver is designed for mediated devices and
+supports VFIO APIs. The mediated bus driver adds a mediated device to and
+removes it from a VFIO group.
+
+The following high-level block diagram shows the main components and interfaces
+in the VFIO mediated driver framework. The diagram shows NVIDIA, Intel, and IBM
+devices as examples, as these devices are the first devices to use this module::
+
+     +---------------+
+     |               |
+     | +-----------+ |  mdev_register_driver() +--------------+
+     | |           | +<------------------------+              |
+     | |  mdev     | |                         |              |
+     | |  bus      | +------------------------>+ vfio_mdev.ko |<-> VFIO user
+     | |  driver   | |     probe()/remove()    |              |    APIs
+     | |           | |                         +--------------+
+     | +-----------+ |
+     |               |
+     |  MDEV CORE    |
+     |   MODULE      |
+     |   mdev.ko     |
+     | +-----------+ |  mdev_register_device() +--------------+
+     | |           | +<------------------------+              |
+     | |           | |                         |  nvidia.ko   |<-> physical
+     | |           | +------------------------>+              |    device
+     | |           | |        callbacks        +--------------+
+     | | Physical  | |
+     | |  device   | |  mdev_register_device() +--------------+
+     | | interface | |<------------------------+              |
+     | |           | |                         |  i915.ko     |<-> physical
+     | |           | +------------------------>+              |    device
+     | |           | |        callbacks        +--------------+
+     | |           | |
+     | |           | |  mdev_register_device() +--------------+
+     | |           | +<------------------------+              |
+     | |           | |                         | ccw_device.ko|<-> physical
+     | |           | +------------------------>+              |    device
+     | |           | |        callbacks        +--------------+
+     | +-----------+ |
+     +---------------+
+
+
+Registration Interfaces
+=======================
+
+The mediated core driver provides the following types of registration
+interfaces:
+
+* Registration interface for a mediated bus driver
+* Physical device driver interface
+
+Registration Interface for a Mediated Bus Driver
+------------------------------------------------
+
+The registration interface for a mediated bus driver provides the following
+structure to represent a mediated device's driver::
+
+     /*
+      * struct mdev_driver [2] - Mediated device's driver
+      * @name: driver name
+      * @probe: called when new device created
+      * @remove: called when device removed
+      * @driver: device driver structure
+      */
+     struct mdev_driver {
+	     const char *name;
+	     int  (*probe)  (struct device *dev);
+	     void (*remove) (struct device *dev);
+	     struct device_driver    driver;
+     };
+
+A mediated bus driver for mdev should use this structure in the function calls
+to register and unregister itself with the core driver:
+
+* Register::
+
+    extern int  mdev_register_driver(struct mdev_driver *drv,
+				   struct module *owner);
+
+* Unregister::
+
+    extern void mdev_unregister_driver(struct mdev_driver *drv);
+
+The mediated bus driver is responsible for adding mediated devices to the VFIO
+group when devices are bound to the driver and removing mediated devices from
+the VFIO when devices are unbound from the driver.
+
+
+Physical Device Driver Interface
+--------------------------------
+
+The physical device driver interface provides the mdev_parent_ops[3] structure
+to define the APIs to manage work in the mediated core driver that is related
+to the physical device.
+
+The structures in the mdev_parent_ops structure are as follows:
+
+* dev_attr_groups: attributes of the parent device
+* mdev_attr_groups: attributes of the mediated device
+* supported_config: attributes to define supported configurations
+
+The functions in the mdev_parent_ops structure are as follows:
+
+* create: allocate basic resources in a driver for a mediated device
+* remove: free resources in a driver when a mediated device is destroyed
+
+(Note that mdev-core provides no implicit serialization of create/remove
+callbacks per mdev parent device, per mdev type, or any other categorization.
+Vendor drivers are expected to be fully asynchronous in this respect or
+provide their own internal resource protection.)
+
+The callbacks in the mdev_parent_ops structure are as follows:
+
+* open: open callback of mediated device
+* close: close callback of mediated device
+* ioctl: ioctl callback of mediated device
+* read : read emulation callback
+* write: write emulation callback
+* mmap: mmap emulation callback
+
+A driver should use the mdev_parent_ops structure in the function call to
+register itself with the mdev core driver::
+
+	extern int  mdev_register_device(struct device *dev,
+	                                 const struct mdev_parent_ops *ops);
+
+However, the mdev_parent_ops structure is not required in the function call
+that a driver should use to unregister itself with the mdev core driver::
+
+	extern void mdev_unregister_device(struct device *dev);
+
+
+Mediated Device Management Interface Through sysfs
+==================================================
+
+The management interface through sysfs enables user space software, such as
+libvirt, to query and configure mediated devices in a hardware-agnostic fashion.
+This management interface provides flexibility to the underlying physical
+device's driver to support features such as:
+
+* Mediated device hot plug
+* Multiple mediated devices in a single virtual machine
+* Multiple mediated devices from different physical devices
+
+Links in the mdev_bus Class Directory
+-------------------------------------
+The /sys/class/mdev_bus/ directory contains links to devices that are registered
+with the mdev core driver.
+
+Directories and files under the sysfs for Each Physical Device
+--------------------------------------------------------------
+
+::
+
+  |- [parent physical device]
+  |--- Vendor-specific-attributes [optional]
+  |--- [mdev_supported_types]
+  |     |--- [<type-id>]
+  |     |   |--- create
+  |     |   |--- name
+  |     |   |--- available_instances
+  |     |   |--- device_api
+  |     |   |--- description
+  |     |   |--- [devices]
+  |     |--- [<type-id>]
+  |     |   |--- create
+  |     |   |--- name
+  |     |   |--- available_instances
+  |     |   |--- device_api
+  |     |   |--- description
+  |     |   |--- [devices]
+  |     |--- [<type-id>]
+  |          |--- create
+  |          |--- name
+  |          |--- available_instances
+  |          |--- device_api
+  |          |--- description
+  |          |--- [devices]
+
+* [mdev_supported_types]
+
+  The list of currently supported mediated device types and their details.
+
+  [<type-id>], device_api, and available_instances are mandatory attributes
+  that should be provided by vendor driver.
+
+* [<type-id>]
+
+  The [<type-id>] name is created by adding the device driver string as a prefix
+  to the string provided by the vendor driver. This format of this name is as
+  follows::
+
+	sprintf(buf, "%s-%s", dev_driver_string(parent->dev), group->name);
+
+  (or using mdev_parent_dev(mdev) to arrive at the parent device outside
+  of the core mdev code)
+
+* device_api
+
+  This attribute should show which device API is being created, for example,
+  "vfio-pci" for a PCI device.
+
+* available_instances
+
+  This attribute should show the number of devices of type <type-id> that can be
+  created.
+
+* [device]
+
+  This directory contains links to the devices of type <type-id> that have been
+  created.
+
+* name
+
+  This attribute should show human readable name. This is optional attribute.
+
+* description
+
+  This attribute should show brief features/description of the type. This is
+  optional attribute.
+
+Directories and Files Under the sysfs for Each mdev Device
+----------------------------------------------------------
+
+::
+
+  |- [parent phy device]
+  |--- [$MDEV_UUID]
+         |--- remove
+         |--- mdev_type {link to its type}
+         |--- vendor-specific-attributes [optional]
+
+* remove (write only)
+
+Writing '1' to the 'remove' file destroys the mdev device. The vendor driver can
+fail the remove() callback if that device is active and the vendor driver
+doesn't support hot unplug.
+
+Example::
+
+	# echo 1 > /sys/bus/mdev/devices/$mdev_UUID/remove
+
+Mediated device Hot plug
+------------------------
+
+Mediated devices can be created and assigned at runtime. The procedure to hot
+plug a mediated device is the same as the procedure to hot plug a PCI device.
+
+Translation APIs for Mediated Devices
+=====================================
+
+The following APIs are provided for translating user pfn to host pfn in a VFIO
+driver::
+
+	extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
+				  int npage, int prot, unsigned long *phys_pfn);
+
+	extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
+				    int npage);
+
+These functions call back into the back-end IOMMU module by using the pin_pages
+and unpin_pages callbacks of the struct vfio_iommu_driver_ops[4]. Currently
+these callbacks are supported in the TYPE1 IOMMU module. To enable them for
+other IOMMU backend modules, such as PPC64 sPAPR module, they need to provide
+these two callback functions.
+
+Using the Sample Code
+=====================
+
+mtty.c in samples/vfio-mdev/ directory is a sample driver program to
+demonstrate how to use the mediated device framework.
+
+The sample driver creates an mdev device that simulates a serial port over a PCI
+card.
+
+1. Build and load the mtty.ko module.
+
+   This step creates a dummy device, /sys/devices/virtual/mtty/mtty/
+
+   Files in this device directory in sysfs are similar to the following::
+
+     # tree /sys/devices/virtual/mtty/mtty/
+        /sys/devices/virtual/mtty/mtty/
+        |-- mdev_supported_types
+        |   |-- mtty-1
+        |   |   |-- available_instances
+        |   |   |-- create
+        |   |   |-- device_api
+        |   |   |-- devices
+        |   |   `-- name
+        |   `-- mtty-2
+        |       |-- available_instances
+        |       |-- create
+        |       |-- device_api
+        |       |-- devices
+        |       `-- name
+        |-- mtty_dev
+        |   `-- sample_mtty_dev
+        |-- power
+        |   |-- autosuspend_delay_ms
+        |   |-- control
+        |   |-- runtime_active_time
+        |   |-- runtime_status
+        |   `-- runtime_suspended_time
+        |-- subsystem -> ../../../../class/mtty
+        `-- uevent
+
+2. Create a mediated device by using the dummy device that you created in the
+   previous step::
+
+     # echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001" >	\
+              /sys/devices/virtual/mtty/mtty/mdev_supported_types/mtty-2/create
+
+3. Add parameters to qemu-kvm::
+
+     -device vfio-pci,\
+      sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
+
+4. Boot the VM.
+
+   In the Linux guest VM, with no hardware on the host, the device appears
+   as  follows::
+
+     # lspci -s 00:05.0 -xxvv
+     00:05.0 Serial controller: Device 4348:3253 (rev 10) (prog-if 02 [16550])
+             Subsystem: Device 4348:3253
+             Physical Slot: 5
+             Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr-
+     Stepping- SERR- FastB2B- DisINTx-
+             Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort-
+     <TAbort- <MAbort- >SERR- <PERR- INTx-
+             Interrupt: pin A routed to IRQ 10
+             Region 0: I/O ports at c150 [size=8]
+             Region 1: I/O ports at c158 [size=8]
+             Kernel driver in use: serial
+     00: 48 43 53 32 01 00 00 02 10 02 00 07 00 00 00 00
+     10: 51 c1 00 00 59 c1 00 00 00 00 00 00 00 00 00 00
+     20: 00 00 00 00 00 00 00 00 00 00 00 00 48 43 53 32
+     30: 00 00 00 00 00 00 00 00 00 00 00 00 0a 01 00 00
+
+     In the Linux guest VM, dmesg output for the device is as follows:
+
+     serial 0000:00:05.0: PCI INT A -> Link[LNKA] -> GSI 10 (level, high) -> IRQ 10
+     0000:00:05.0: ttyS1 at I/O 0xc150 (irq = 10) is a 16550A
+     0000:00:05.0: ttyS2 at I/O 0xc158 (irq = 10) is a 16550A
+
+
+5. In the Linux guest VM, check the serial ports::
+
+     # setserial -g /dev/ttyS*
+     /dev/ttyS0, UART: 16550A, Port: 0x03f8, IRQ: 4
+     /dev/ttyS1, UART: 16550A, Port: 0xc150, IRQ: 10
+     /dev/ttyS2, UART: 16550A, Port: 0xc158, IRQ: 10
+
+6. Using minicom or any terminal emulation program, open port /dev/ttyS1 or
+   /dev/ttyS2 with hardware flow control disabled.
+
+7. Type data on the minicom terminal or send data to the terminal emulation
+   program and read the data.
+
+   Data is loop backed from hosts mtty driver.
+
+8. Destroy the mediated device that you created::
+
+     # echo 1 > /sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001/remove
+
+References
+==========
+
+1. See Documentation/driver-api/vfio.rst for more information on VFIO.
+2. struct mdev_driver in include/linux/mdev.h
+3. struct mdev_parent_ops in include/linux/mdev.h
+4. struct vfio_iommu_driver_ops in include/linux/vfio.h
diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst
new file mode 100644
index 000000000000..f1a4d3c3ba0b
--- /dev/null
+++ b/Documentation/driver-api/vfio.rst
@@ -0,0 +1,520 @@
+==================================
+VFIO - "Virtual Function I/O" [1]_
+==================================
+
+Many modern system now provide DMA and interrupt remapping facilities
+to help ensure I/O devices behave within the boundaries they've been
+allotted.  This includes x86 hardware with AMD-Vi and Intel VT-d,
+POWER systems with Partitionable Endpoints (PEs) and embedded PowerPC
+systems such as Freescale PAMU.  The VFIO driver is an IOMMU/device
+agnostic framework for exposing direct device access to userspace, in
+a secure, IOMMU protected environment.  In other words, this allows
+safe [2]_, non-privileged, userspace drivers.
+
+Why do we want that?  Virtual machines often make use of direct device
+access ("device assignment") when configured for the highest possible
+I/O performance.  From a device and host perspective, this simply
+turns the VM into a userspace driver, with the benefits of
+significantly reduced latency, higher bandwidth, and direct use of
+bare-metal device drivers [3]_.
+
+Some applications, particularly in the high performance computing
+field, also benefit from low-overhead, direct device access from
+userspace.  Examples include network adapters (often non-TCP/IP based)
+and compute accelerators.  Prior to VFIO, these drivers had to either
+go through the full development cycle to become proper upstream
+driver, be maintained out of tree, or make use of the UIO framework,
+which has no notion of IOMMU protection, limited interrupt support,
+and requires root privileges to access things like PCI configuration
+space.
+
+The VFIO driver framework intends to unify these, replacing both the
+KVM PCI specific device assignment code as well as provide a more
+secure, more featureful userspace driver environment than UIO.
+
+Groups, Devices, and IOMMUs
+---------------------------
+
+Devices are the main target of any I/O driver.  Devices typically
+create a programming interface made up of I/O access, interrupts,
+and DMA.  Without going into the details of each of these, DMA is
+by far the most critical aspect for maintaining a secure environment
+as allowing a device read-write access to system memory imposes the
+greatest risk to the overall system integrity.
+
+To help mitigate this risk, many modern IOMMUs now incorporate
+isolation properties into what was, in many cases, an interface only
+meant for translation (ie. solving the addressing problems of devices
+with limited address spaces).  With this, devices can now be isolated
+from each other and from arbitrary memory access, thus allowing
+things like secure direct assignment of devices into virtual machines.
+
+This isolation is not always at the granularity of a single device
+though.  Even when an IOMMU is capable of this, properties of devices,
+interconnects, and IOMMU topologies can each reduce this isolation.
+For instance, an individual device may be part of a larger multi-
+function enclosure.  While the IOMMU may be able to distinguish
+between devices within the enclosure, the enclosure may not require
+transactions between devices to reach the IOMMU.  Examples of this
+could be anything from a multi-function PCI device with backdoors
+between functions to a non-PCI-ACS (Access Control Services) capable
+bridge allowing redirection without reaching the IOMMU.  Topology
+can also play a factor in terms of hiding devices.  A PCIe-to-PCI
+bridge masks the devices behind it, making transaction appear as if
+from the bridge itself.  Obviously IOMMU design plays a major factor
+as well.
+
+Therefore, while for the most part an IOMMU may have device level
+granularity, any system is susceptible to reduced granularity.  The
+IOMMU API therefore supports a notion of IOMMU groups.  A group is
+a set of devices which is isolatable from all other devices in the
+system.  Groups are therefore the unit of ownership used by VFIO.
+
+While the group is the minimum granularity that must be used to
+ensure secure user access, it's not necessarily the preferred
+granularity.  In IOMMUs which make use of page tables, it may be
+possible to share a set of page tables between different groups,
+reducing the overhead both to the platform (reduced TLB thrashing,
+reduced duplicate page tables), and to the user (programming only
+a single set of translations).  For this reason, VFIO makes use of
+a container class, which may hold one or more groups.  A container
+is created by simply opening the /dev/vfio/vfio character device.
+
+On its own, the container provides little functionality, with all
+but a couple version and extension query interfaces locked away.
+The user needs to add a group into the container for the next level
+of functionality.  To do this, the user first needs to identify the
+group associated with the desired device.  This can be done using
+the sysfs links described in the example below.  By unbinding the
+device from the host driver and binding it to a VFIO driver, a new
+VFIO group will appear for the group as /dev/vfio/$GROUP, where
+$GROUP is the IOMMU group number of which the device is a member.
+If the IOMMU group contains multiple devices, each will need to
+be bound to a VFIO driver before operations on the VFIO group
+are allowed (it's also sufficient to only unbind the device from
+host drivers if a VFIO driver is unavailable; this will make the
+group available, but not that particular device).  TBD - interface
+for disabling driver probing/locking a device.
+
+Once the group is ready, it may be added to the container by opening
+the VFIO group character device (/dev/vfio/$GROUP) and using the
+VFIO_GROUP_SET_CONTAINER ioctl, passing the file descriptor of the
+previously opened container file.  If desired and if the IOMMU driver
+supports sharing the IOMMU context between groups, multiple groups may
+be set to the same container.  If a group fails to set to a container
+with existing groups, a new empty container will need to be used
+instead.
+
+With a group (or groups) attached to a container, the remaining
+ioctls become available, enabling access to the VFIO IOMMU interfaces.
+Additionally, it now becomes possible to get file descriptors for each
+device within a group using an ioctl on the VFIO group file descriptor.
+
+The VFIO device API includes ioctls for describing the device, the I/O
+regions and their read/write/mmap offsets on the device descriptor, as
+well as mechanisms for describing and registering interrupt
+notifications.
+
+VFIO Usage Example
+------------------
+
+Assume user wants to access PCI device 0000:06:0d.0::
+
+	$ readlink /sys/bus/pci/devices/0000:06:0d.0/iommu_group
+	../../../../kernel/iommu_groups/26
+
+This device is therefore in IOMMU group 26.  This device is on the
+pci bus, therefore the user will make use of vfio-pci to manage the
+group::
+
+	# modprobe vfio-pci
+
+Binding this device to the vfio-pci driver creates the VFIO group
+character devices for this group::
+
+	$ lspci -n -s 0000:06:0d.0
+	06:0d.0 0401: 1102:0002 (rev 08)
+	# echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind
+	# echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id
+
+Now we need to look at what other devices are in the group to free
+it for use by VFIO::
+
+	$ ls -l /sys/bus/pci/devices/0000:06:0d.0/iommu_group/devices
+	total 0
+	lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:00:1e.0 ->
+		../../../../devices/pci0000:00/0000:00:1e.0
+	lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.0 ->
+		../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.0
+	lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.1 ->
+		../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.1
+
+This device is behind a PCIe-to-PCI bridge [4]_, therefore we also
+need to add device 0000:06:0d.1 to the group following the same
+procedure as above.  Device 0000:00:1e.0 is a bridge that does
+not currently have a host driver, therefore it's not required to
+bind this device to the vfio-pci driver (vfio-pci does not currently
+support PCI bridges).
+
+The final step is to provide the user with access to the group if
+unprivileged operation is desired (note that /dev/vfio/vfio provides
+no capabilities on its own and is therefore expected to be set to
+mode 0666 by the system)::
+
+	# chown user:user /dev/vfio/26
+
+The user now has full access to all the devices and the iommu for this
+group and can access them as follows::
+
+	int container, group, device, i;
+	struct vfio_group_status group_status =
+					{ .argsz = sizeof(group_status) };
+	struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
+	struct vfio_iommu_type1_dma_map dma_map = { .argsz = sizeof(dma_map) };
+	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+
+	/* Create a new container */
+	container = open("/dev/vfio/vfio", O_RDWR);
+
+	if (ioctl(container, VFIO_GET_API_VERSION) != VFIO_API_VERSION)
+		/* Unknown API version */
+
+	if (!ioctl(container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU))
+		/* Doesn't support the IOMMU driver we want. */
+
+	/* Open the group */
+	group = open("/dev/vfio/26", O_RDWR);
+
+	/* Test the group is viable and available */
+	ioctl(group, VFIO_GROUP_GET_STATUS, &group_status);
+
+	if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE))
+		/* Group is not viable (ie, not all devices bound for vfio) */
+
+	/* Add the group to the container */
+	ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
+
+	/* Enable the IOMMU model we want */
+	ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
+
+	/* Get addition IOMMU info */
+	ioctl(container, VFIO_IOMMU_GET_INFO, &iommu_info);
+
+	/* Allocate some space and setup a DMA mapping */
+	dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE,
+			     MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+	dma_map.size = 1024 * 1024;
+	dma_map.iova = 0; /* 1MB starting at 0x0 from device view */
+	dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+
+	ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
+
+	/* Get a file descriptor for the device */
+	device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0");
+
+	/* Test and setup the device */
+	ioctl(device, VFIO_DEVICE_GET_INFO, &device_info);
+
+	for (i = 0; i < device_info.num_regions; i++) {
+		struct vfio_region_info reg = { .argsz = sizeof(reg) };
+
+		reg.index = i;
+
+		ioctl(device, VFIO_DEVICE_GET_REGION_INFO, &reg);
+
+		/* Setup mappings... read/write offsets, mmaps
+		 * For PCI devices, config space is a region */
+	}
+
+	for (i = 0; i < device_info.num_irqs; i++) {
+		struct vfio_irq_info irq = { .argsz = sizeof(irq) };
+
+		irq.index = i;
+
+		ioctl(device, VFIO_DEVICE_GET_IRQ_INFO, &irq);
+
+		/* Setup IRQs... eventfds, VFIO_DEVICE_SET_IRQS */
+	}
+
+	/* Gratuitous device reset and go... */
+	ioctl(device, VFIO_DEVICE_RESET);
+
+VFIO User API
+-------------------------------------------------------------------------------
+
+Please see include/linux/vfio.h for complete API documentation.
+
+VFIO bus driver API
+-------------------------------------------------------------------------------
+
+VFIO bus drivers, such as vfio-pci make use of only a few interfaces
+into VFIO core.  When devices are bound and unbound to the driver,
+the driver should call vfio_add_group_dev() and vfio_del_group_dev()
+respectively::
+
+	extern int vfio_add_group_dev(struct device *dev,
+				      const struct vfio_device_ops *ops,
+				      void *device_data);
+
+	extern void *vfio_del_group_dev(struct device *dev);
+
+vfio_add_group_dev() indicates to the core to begin tracking the
+iommu_group of the specified dev and register the dev as owned by
+a VFIO bus driver.  The driver provides an ops structure for callbacks
+similar to a file operations structure::
+
+	struct vfio_device_ops {
+		int	(*open)(void *device_data);
+		void	(*release)(void *device_data);
+		ssize_t	(*read)(void *device_data, char __user *buf,
+				size_t count, loff_t *ppos);
+		ssize_t	(*write)(void *device_data, const char __user *buf,
+				 size_t size, loff_t *ppos);
+		long	(*ioctl)(void *device_data, unsigned int cmd,
+				 unsigned long arg);
+		int	(*mmap)(void *device_data, struct vm_area_struct *vma);
+	};
+
+Each function is passed the device_data that was originally registered
+in the vfio_add_group_dev() call above.  This allows the bus driver
+an easy place to store its opaque, private data.  The open/release
+callbacks are issued when a new file descriptor is created for a
+device (via VFIO_GROUP_GET_DEVICE_FD).  The ioctl interface provides
+a direct pass through for VFIO_DEVICE_* ioctls.  The read/write/mmap
+interfaces implement the device region access defined by the device's
+own VFIO_DEVICE_GET_REGION_INFO ioctl.
+
+
+PPC64 sPAPR implementation note
+-------------------------------
+
+This implementation has some specifics:
+
+1) On older systems (POWER7 with P5IOC2/IODA1) only one IOMMU group per
+   container is supported as an IOMMU table is allocated at the boot time,
+   one table per a IOMMU group which is a Partitionable Endpoint (PE)
+   (PE is often a PCI domain but not always).
+
+   Newer systems (POWER8 with IODA2) have improved hardware design which allows
+   to remove this limitation and have multiple IOMMU groups per a VFIO
+   container.
+
+2) The hardware supports so called DMA windows - the PCI address range
+   within which DMA transfer is allowed, any attempt to access address space
+   out of the window leads to the whole PE isolation.
+
+3) PPC64 guests are paravirtualized but not fully emulated. There is an API
+   to map/unmap pages for DMA, and it normally maps 1..32 pages per call and
+   currently there is no way to reduce the number of calls. In order to make
+   things faster, the map/unmap handling has been implemented in real mode
+   which provides an excellent performance which has limitations such as
+   inability to do locked pages accounting in real time.
+
+4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O
+   subtree that can be treated as a unit for the purposes of partitioning and
+   error recovery. A PE may be a single or multi-function IOA (IO Adapter), a
+   function of a multi-function IOA, or multiple IOAs (possibly including
+   switch and bridge structures above the multiple IOAs). PPC64 guests detect
+   PCI errors and recover from them via EEH RTAS services, which works on the
+   basis of additional ioctl commands.
+
+   So 4 additional ioctls have been added:
+
+	VFIO_IOMMU_SPAPR_TCE_GET_INFO
+		returns the size and the start of the DMA window on the PCI bus.
+
+	VFIO_IOMMU_ENABLE
+		enables the container. The locked pages accounting
+		is done at this point. This lets user first to know what
+		the DMA window is and adjust rlimit before doing any real job.
+
+	VFIO_IOMMU_DISABLE
+		disables the container.
+
+	VFIO_EEH_PE_OP
+		provides an API for EEH setup, error detection and recovery.
+
+   The code flow from the example above should be slightly changed::
+
+	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
+
+	.....
+	/* Add the group to the container */
+	ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
+
+	/* Enable the IOMMU model we want */
+	ioctl(container, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU)
+
+	/* Get addition sPAPR IOMMU info */
+	vfio_iommu_spapr_tce_info spapr_iommu_info;
+	ioctl(container, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &spapr_iommu_info);
+
+	if (ioctl(container, VFIO_IOMMU_ENABLE))
+		/* Cannot enable container, may be low rlimit */
+
+	/* Allocate some space and setup a DMA mapping */
+	dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE,
+			     MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+
+	dma_map.size = 1024 * 1024;
+	dma_map.iova = 0; /* 1MB starting at 0x0 from device view */
+	dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+
+	/* Check here is .iova/.size are within DMA window from spapr_iommu_info */
+	ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
+
+	/* Get a file descriptor for the device */
+	device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0");
+
+	....
+
+	/* Gratuitous device reset and go... */
+	ioctl(device, VFIO_DEVICE_RESET);
+
+	/* Make sure EEH is supported */
+	ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH);
+
+	/* Enable the EEH functionality on the device */
+	pe_op.op = VFIO_EEH_PE_ENABLE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* You're suggested to create additional data struct to represent
+	 * PE, and put child devices belonging to same IOMMU group to the
+	 * PE instance for later reference.
+	 */
+
+	/* Check the PE's state and make sure it's in functional state */
+	pe_op.op = VFIO_EEH_PE_GET_STATE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Save device state using pci_save_state().
+	 * EEH should be enabled on the specified device.
+	 */
+
+	....
+
+	/* Inject EEH error, which is expected to be caused by 32-bits
+	 * config load.
+	 */
+	pe_op.op = VFIO_EEH_PE_INJECT_ERR;
+	pe_op.err.type = EEH_ERR_TYPE_32;
+	pe_op.err.func = EEH_ERR_FUNC_LD_CFG_ADDR;
+	pe_op.err.addr = 0ul;
+	pe_op.err.mask = 0ul;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	....
+
+	/* When 0xFF's returned from reading PCI config space or IO BARs
+	 * of the PCI device. Check the PE's state to see if that has been
+	 * frozen.
+	 */
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Waiting for pending PCI transactions to be completed and don't
+	 * produce any more PCI traffic from/to the affected PE until
+	 * recovery is finished.
+	 */
+
+	/* Enable IO for the affected PE and collect logs. Usually, the
+	 * standard part of PCI config space, AER registers are dumped
+	 * as logs for further analysis.
+	 */
+	pe_op.op = VFIO_EEH_PE_UNFREEZE_IO;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/*
+	 * Issue PE reset: hot or fundamental reset. Usually, hot reset
+	 * is enough. However, the firmware of some PCI adapters would
+	 * require fundamental reset.
+	 */
+	pe_op.op = VFIO_EEH_PE_RESET_HOT;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+	pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Configure the PCI bridges for the affected PE */
+	pe_op.op = VFIO_EEH_PE_CONFIGURE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Restored state we saved at initialization time. pci_restore_state()
+	 * is good enough as an example.
+	 */
+
+	/* Hopefully, error is recovered successfully. Now, you can resume to
+	 * start PCI traffic to/from the affected PE.
+	 */
+
+	....
+
+5) There is v2 of SPAPR TCE IOMMU. It deprecates VFIO_IOMMU_ENABLE/
+   VFIO_IOMMU_DISABLE and implements 2 new ioctls:
+   VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY
+   (which are unsupported in v1 IOMMU).
+
+   PPC64 paravirtualized guests generate a lot of map/unmap requests,
+   and the handling of those includes pinning/unpinning pages and updating
+   mm::locked_vm counter to make sure we do not exceed the rlimit.
+   The v2 IOMMU splits accounting and pinning into separate operations:
+
+   - VFIO_IOMMU_SPAPR_REGISTER_MEMORY/VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY ioctls
+     receive a user space address and size of the block to be pinned.
+     Bisecting is not supported and VFIO_IOMMU_UNREGISTER_MEMORY is expected to
+     be called with the exact address and size used for registering
+     the memory block. The userspace is not expected to call these often.
+     The ranges are stored in a linked list in a VFIO container.
+
+   - VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA ioctls only update the actual
+     IOMMU table and do not do pinning; instead these check that the userspace
+     address is from pre-registered range.
+
+   This separation helps in optimizing DMA for guests.
+
+6) sPAPR specification allows guests to have an additional DMA window(s) on
+   a PCI bus with a variable page size. Two ioctls have been added to support
+   this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE.
+   The platform has to support the functionality or error will be returned to
+   the userspace. The existing hardware supports up to 2 DMA windows, one is
+   2GB long, uses 4K pages and called "default 32bit window"; the other can
+   be as big as entire RAM, use different page size, it is optional - guests
+   create those in run-time if the guest driver supports 64bit DMA.
+
+   VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and
+   a number of TCE table levels (if a TCE table is going to be big enough and
+   the kernel may not be able to allocate enough of physically contiguous
+   memory). It creates a new window in the available slot and returns the bus
+   address where the new window starts. Due to hardware limitation, the user
+   space cannot choose the location of DMA windows.
+
+   VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window
+   and removes it.
+
+-------------------------------------------------------------------------------
+
+.. [1] VFIO was originally an acronym for "Virtual Function I/O" in its
+   initial implementation by Tom Lyon while as Cisco.  We've since
+   outgrown the acronym, but it's catchy.
+
+.. [2] "safe" also depends upon a device being "well behaved".  It's
+   possible for multi-function devices to have backdoors between
+   functions and even for single function devices to have alternative
+   access to things like PCI config space through MMIO registers.  To
+   guard against the former we can include additional precautions in the
+   IOMMU driver to group multi-function PCI devices together
+   (iommu=group_mf).  The latter we can't prevent, but the IOMMU should
+   still provide isolation.  For PCI, SR-IOV Virtual Functions are the
+   best indicator of "well behaved", as these are designed for
+   virtualization usage models.
+
+.. [3] As always there are trade-offs to virtual machine device
+   assignment that are beyond the scope of VFIO.  It's expected that
+   future IOMMU technologies will reduce some, but maybe not all, of
+   these trade-offs.
+
+.. [4] In this case the device is below a PCI bridge, so transactions
+   from either function of the device are indistinguishable to the iommu::
+
+	-[0000:00]-+-1e.0-[06]--+-0d.0
+				\-0d.1
+
+	00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 90)
diff --git a/Documentation/driver-api/xillybus.rst b/Documentation/driver-api/xillybus.rst
new file mode 100644
index 000000000000..2446ee303c09
--- /dev/null
+++ b/Documentation/driver-api/xillybus.rst
@@ -0,0 +1,379 @@
+==========================================
+Xillybus driver for generic FPGA interface
+==========================================
+
+:Author: Eli Billauer, Xillybus Ltd. (http://xillybus.com)
+:Email:  eli.billauer@gmail.com or as advertised on Xillybus' site.
+
+.. Contents:
+
+ - Introduction
+  -- Background
+  -- Xillybus Overview
+
+ - Usage
+  -- User interface
+  -- Synchronization
+  -- Seekable pipes
+
+ - Internals
+  -- Source code organization
+  -- Pipe attributes
+  -- Host never reads from the FPGA
+  -- Channels, pipes, and the message channel
+  -- Data streaming
+  -- Data granularity
+  -- Probing
+  -- Buffer allocation
+  -- The "nonempty" message (supporting poll)
+
+
+Introduction
+============
+
+Background
+----------
+
+An FPGA (Field Programmable Gate Array) is a piece of logic hardware, which
+can be programmed to become virtually anything that is usually found as a
+dedicated chipset: For instance, a display adapter, network interface card,
+or even a processor with its peripherals. FPGAs are the LEGO of hardware:
+Based upon certain building blocks, you make your own toys the way you like
+them. It's usually pointless to reimplement something that is already
+available on the market as a chipset, so FPGAs are mostly used when some
+special functionality is needed, and the production volume is relatively low
+(hence not justifying the development of an ASIC).
+
+The challenge with FPGAs is that everything is implemented at a very low
+level, even lower than assembly language. In order to allow FPGA designers to
+focus on their specific project, and not reinvent the wheel over and over
+again, pre-designed building blocks, IP cores, are often used. These are the
+FPGA parallels of library functions. IP cores may implement certain
+mathematical functions, a functional unit (e.g. a USB interface), an entire
+processor (e.g. ARM) or anything that might come handy. Think of them as a
+building block, with electrical wires dangling on the sides for connection to
+other blocks.
+
+One of the daunting tasks in FPGA design is communicating with a fullblown
+operating system (actually, with the processor running it): Implementing the
+low-level bus protocol and the somewhat higher-level interface with the host
+(registers, interrupts, DMA etc.) is a project in itself. When the FPGA's
+function is a well-known one (e.g. a video adapter card, or a NIC), it can
+make sense to design the FPGA's interface logic specifically for the project.
+A special driver is then written to present the FPGA as a well-known interface
+to the kernel and/or user space. In that case, there is no reason to treat the
+FPGA differently than any device on the bus.
+
+It's however common that the desired data communication doesn't fit any well-
+known peripheral function. Also, the effort of designing an elegant
+abstraction for the data exchange is often considered too big. In those cases,
+a quicker and possibly less elegant solution is sought: The driver is
+effectively written as a user space program, leaving the kernel space part
+with just elementary data transport. This still requires designing some
+interface logic for the FPGA, and write a simple ad-hoc driver for the kernel.
+
+Xillybus Overview
+-----------------
+
+Xillybus is an IP core and a Linux driver. Together, they form a kit for
+elementary data transport between an FPGA and the host, providing pipe-like
+data streams with a straightforward user interface. It's intended as a low-
+effort solution for mixed FPGA-host projects, for which it makes sense to
+have the project-specific part of the driver running in a user-space program.
+
+Since the communication requirements may vary significantly from one FPGA
+project to another (the number of data pipes needed in each direction and
+their attributes), there isn't one specific chunk of logic being the Xillybus
+IP core. Rather, the IP core is configured and built based upon a
+specification given by its end user.
+
+Xillybus presents independent data streams, which resemble pipes or TCP/IP
+communication to the user. At the host side, a character device file is used
+just like any pipe file. On the FPGA side, hardware FIFOs are used to stream
+the data. This is contrary to a common method of communicating through fixed-
+sized buffers (even though such buffers are used by Xillybus under the hood).
+There may be more than a hundred of these streams on a single IP core, but
+also no more than one, depending on the configuration.
+
+In order to ease the deployment of the Xillybus IP core, it contains a simple
+data structure which completely defines the core's configuration. The Linux
+driver fetches this data structure during its initialization process, and sets
+up the DMA buffers and character devices accordingly. As a result, a single
+driver is used to work out of the box with any Xillybus IP core.
+
+The data structure just mentioned should not be confused with PCI's
+configuration space or the Flattened Device Tree.
+
+Usage
+=====
+
+User interface
+--------------
+
+On the host, all interface with Xillybus is done through /dev/xillybus_*
+device files, which are generated automatically as the drivers loads. The
+names of these files depend on the IP core that is loaded in the FPGA (see
+Probing below). To communicate with the FPGA, open the device file that
+corresponds to the hardware FIFO you want to send data or receive data from,
+and use plain write() or read() calls, just like with a regular pipe. In
+particular, it makes perfect sense to go::
+
+	$ cat mydata > /dev/xillybus_thisfifo
+
+	$ cat /dev/xillybus_thatfifo > hisdata
+
+possibly pressing CTRL-C as some stage, even though the xillybus_* pipes have
+the capability to send an EOF (but may not use it).
+
+The driver and hardware are designed to behave sensibly as pipes, including:
+
+* Supporting non-blocking I/O (by setting O_NONBLOCK on open() ).
+
+* Supporting poll() and select().
+
+* Being bandwidth efficient under load (using DMA) but also handle small
+  pieces of data sent across (like TCP/IP) by autoflushing.
+
+A device file can be read only, write only or bidirectional. Bidirectional
+device files are treated like two independent pipes (except for sharing a
+"channel" structure in the implementation code).
+
+Synchronization
+---------------
+
+Xillybus pipes are configured (on the IP core) to be either synchronous or
+asynchronous. For a synchronous pipe, write() returns successfully only after
+some data has been submitted and acknowledged by the FPGA. This slows down
+bulk data transfers, and is nearly impossible for use with streams that
+require data at a constant rate: There is no data transmitted to the FPGA
+between write() calls, in particular when the process loses the CPU.
+
+When a pipe is configured asynchronous, write() returns if there was enough
+room in the buffers to store any of the data in the buffers.
+
+For FPGA to host pipes, asynchronous pipes allow data transfer from the FPGA
+as soon as the respective device file is opened, regardless of if the data
+has been requested by a read() call. On synchronous pipes, only the amount
+of data requested by a read() call is transmitted.
+
+In summary, for synchronous pipes, data between the host and FPGA is
+transmitted only to satisfy the read() or write() call currently handled
+by the driver, and those calls wait for the transmission to complete before
+returning.
+
+Note that the synchronization attribute has nothing to do with the possibility
+that read() or write() completes less bytes than requested. There is a
+separate configuration flag ("allowpartial") that determines whether such a
+partial completion is allowed.
+
+Seekable pipes
+--------------
+
+A synchronous pipe can be configured to have the stream's position exposed
+to the user logic at the FPGA. Such a pipe is also seekable on the host API.
+With this feature, a memory or register interface can be attached on the
+FPGA side to the seekable stream. Reading or writing to a certain address in
+the attached memory is done by seeking to the desired address, and calling
+read() or write() as required.
+
+
+Internals
+=========
+
+Source code organization
+------------------------
+
+The Xillybus driver consists of a core module, xillybus_core.c, and modules
+that depend on the specific bus interface (xillybus_of.c and xillybus_pcie.c).
+
+The bus specific modules are those probed when a suitable device is found by
+the kernel. Since the DMA mapping and synchronization functions, which are bus
+dependent by their nature, are used by the core module, a
+xilly_endpoint_hardware structure is passed to the core module on
+initialization. This structure is populated with pointers to wrapper functions
+which execute the DMA-related operations on the bus.
+
+Pipe attributes
+---------------
+
+Each pipe has a number of attributes which are set when the FPGA component
+(IP core) is built. They are fetched from the IDT (the data structure which
+defines the core's configuration, see Probing below) by xilly_setupchannels()
+in xillybus_core.c as follows:
+
+* is_writebuf: The pipe's direction. A non-zero value means it's an FPGA to
+  host pipe (the FPGA "writes").
+
+* channelnum: The pipe's identification number in communication between the
+  host and FPGA.
+
+* format: The underlying data width. See Data Granularity below.
+
+* allowpartial: A non-zero value means that a read() or write() (whichever
+  applies) may return with less than the requested number of bytes. The common
+  choice is a non-zero value, to match standard UNIX behavior.
+
+* synchronous: A non-zero value means that the pipe is synchronous. See
+  Synchronization above.
+
+* bufsize: Each DMA buffer's size. Always a power of two.
+
+* bufnum: The number of buffers allocated for this pipe. Always a power of two.
+
+* exclusive_open: A non-zero value forces exclusive opening of the associated
+  device file. If the device file is bidirectional, and already opened only in
+  one direction, the opposite direction may be opened once.
+
+* seekable: A non-zero value indicates that the pipe is seekable. See
+  Seekable pipes above.
+
+* supports_nonempty: A non-zero value (which is typical) indicates that the
+  hardware will send the messages that are necessary to support select() and
+  poll() for this pipe.
+
+Host never reads from the FPGA
+------------------------------
+
+Even though PCI Express is hotpluggable in general, a typical motherboard
+doesn't expect a card to go away all of the sudden. But since the PCIe card
+is based upon reprogrammable logic, a sudden disappearance from the bus is
+quite likely as a result of an accidental reprogramming of the FPGA while the
+host is up. In practice, nothing happens immediately in such a situation. But
+if the host attempts to read from an address that is mapped to the PCI Express
+device, that leads to an immediate freeze of the system on some motherboards,
+even though the PCIe standard requires a graceful recovery.
+
+In order to avoid these freezes, the Xillybus driver refrains completely from
+reading from the device's register space. All communication from the FPGA to
+the host is done through DMA. In particular, the Interrupt Service Routine
+doesn't follow the common practice of checking a status register when it's
+invoked. Rather, the FPGA prepares a small buffer which contains short
+messages, which inform the host what the interrupt was about.
+
+This mechanism is used on non-PCIe buses as well for the sake of uniformity.
+
+
+Channels, pipes, and the message channel
+----------------------------------------
+
+Each of the (possibly bidirectional) pipes presented to the user is allocated
+a data channel between the FPGA and the host. The distinction between channels
+and pipes is necessary only because of channel 0, which is used for interrupt-
+related messages from the FPGA, and has no pipe attached to it.
+
+Data streaming
+--------------
+
+Even though a non-segmented data stream is presented to the user at both
+sides, the implementation relies on a set of DMA buffers which is allocated
+for each channel. For the sake of illustration, let's take the FPGA to host
+direction: As data streams into the respective channel's interface in the
+FPGA, the Xillybus IP core writes it to one of the DMA buffers. When the
+buffer is full, the FPGA informs the host about that (appending a
+XILLYMSG_OPCODE_RELEASEBUF message channel 0 and sending an interrupt if
+necessary). The host responds by making the data available for reading through
+the character device. When all data has been read, the host writes on the
+the FPGA's buffer control register, allowing the buffer's overwriting. Flow
+control mechanisms exist on both sides to prevent underflows and overflows.
+
+This is not good enough for creating a TCP/IP-like stream: If the data flow
+stops momentarily before a DMA buffer is filled, the intuitive expectation is
+that the partial data in buffer will arrive anyhow, despite the buffer not
+being completed. This is implemented by adding a field in the
+XILLYMSG_OPCODE_RELEASEBUF message, through which the FPGA informs not just
+which buffer is submitted, but how much data it contains.
+
+But the FPGA will submit a partially filled buffer only if directed to do so
+by the host. This situation occurs when the read() method has been blocking
+for XILLY_RX_TIMEOUT jiffies (currently 10 ms), after which the host commands
+the FPGA to submit a DMA buffer as soon as it can. This timeout mechanism
+balances between bus bandwidth efficiency (preventing a lot of partially
+filled buffers being sent) and a latency held fairly low for tails of data.
+
+A similar setting is used in the host to FPGA direction. The handling of
+partial DMA buffers is somewhat different, though. The user can tell the
+driver to submit all data it has in the buffers to the FPGA, by issuing a
+write() with the byte count set to zero. This is similar to a flush request,
+but it doesn't block. There is also an autoflushing mechanism, which triggers
+an equivalent flush roughly XILLY_RX_TIMEOUT jiffies after the last write().
+This allows the user to be oblivious about the underlying buffering mechanism
+and yet enjoy a stream-like interface.
+
+Note that the issue of partial buffer flushing is irrelevant for pipes having
+the "synchronous" attribute nonzero, since synchronous pipes don't allow data
+to lay around in the DMA buffers between read() and write() anyhow.
+
+Data granularity
+----------------
+
+The data arrives or is sent at the FPGA as 8, 16 or 32 bit wide words, as
+configured by the "format" attribute. Whenever possible, the driver attempts
+to hide this when the pipe is accessed differently from its natural alignment.
+For example, reading single bytes from a pipe with 32 bit granularity works
+with no issues. Writing single bytes to pipes with 16 or 32 bit granularity
+will also work, but the driver can't send partially completed words to the
+FPGA, so the transmission of up to one word may be held until it's fully
+occupied with user data.
+
+This somewhat complicates the handling of host to FPGA streams, because
+when a buffer is flushed, it may contain up to 3 bytes don't form a word in
+the FPGA, and hence can't be sent. To prevent loss of data, these leftover
+bytes need to be moved to the next buffer. The parts in xillybus_core.c
+that mention "leftovers" in some way are related to this complication.
+
+Probing
+-------
+
+As mentioned earlier, the number of pipes that are created when the driver
+loads and their attributes depend on the Xillybus IP core in the FPGA. During
+the driver's initialization, a blob containing configuration info, the
+Interface Description Table (IDT), is sent from the FPGA to the host. The
+bootstrap process is done in three phases:
+
+1. Acquire the length of the IDT, so a buffer can be allocated for it. This
+   is done by sending a quiesce command to the device, since the acknowledge
+   for this command contains the IDT's buffer length.
+
+2. Acquire the IDT itself.
+
+3. Create the interfaces according to the IDT.
+
+Buffer allocation
+-----------------
+
+In order to simplify the logic that prevents illegal boundary crossings of
+PCIe packets, the following rule applies: If a buffer is smaller than 4kB,
+it must not cross a 4kB boundary. Otherwise, it must be 4kB aligned. The
+xilly_setupchannels() functions allocates these buffers by requesting whole
+pages from the kernel, and diving them into DMA buffers as necessary. Since
+all buffers' sizes are powers of two, it's possible to pack any set of such
+buffers, with a maximal waste of one page of memory.
+
+All buffers are allocated when the driver is loaded. This is necessary,
+since large continuous physical memory segments are sometimes requested,
+which are more likely to be available when the system is freshly booted.
+
+The allocation of buffer memory takes place in the same order they appear in
+the IDT. The driver relies on a rule that the pipes are sorted with decreasing
+buffer size in the IDT. If a requested buffer is larger or equal to a page,
+the necessary number of pages is requested from the kernel, and these are
+used for this buffer. If the requested buffer is smaller than a page, one
+single page is requested from the kernel, and that page is partially used.
+Or, if there already is a partially used page at hand, the buffer is packed
+into that page. It can be shown that all pages requested from the kernel
+(except possibly for the last) are 100% utilized this way.
+
+The "nonempty" message (supporting poll)
+----------------------------------------
+
+In order to support the "poll" method (and hence select() ), there is a small
+catch regarding the FPGA to host direction: The FPGA may have filled a DMA
+buffer with some data, but not submitted that buffer. If the host waited for
+the buffer's submission by the FPGA, there would be a possibility that the
+FPGA side has sent data, but a select() call would still block, because the
+host has not received any notification about this. This is solved with
+XILLYMSG_OPCODE_NONEMPTY messages sent by the FPGA when a channel goes from
+completely empty to containing some data.
+
+These messages are used only to support poll() and select(). The IP core can
+be configured not to send them for a slight reduction of bandwidth.
diff --git a/Documentation/driver-api/zorro.rst b/Documentation/driver-api/zorro.rst
new file mode 100644
index 000000000000..664072b017e3
--- /dev/null
+++ b/Documentation/driver-api/zorro.rst
@@ -0,0 +1,104 @@
+========================================
+Writing Device Drivers for Zorro Devices
+========================================
+
+:Author: Written by Geert Uytterhoeven <geert@linux-m68k.org>
+:Last revised: September 5, 2003
+
+
+Introduction
+------------
+
+The Zorro bus is the bus used in the Amiga family of computers. Thanks to
+AutoConfig(tm), it's 100% Plug-and-Play.
+
+There are two types of Zorro buses, Zorro II and Zorro III:
+
+  - The Zorro II address space is 24-bit and lies within the first 16 MB of the
+    Amiga's address map.
+
+  - Zorro III is a 32-bit extension of Zorro II, which is backwards compatible
+    with Zorro II. The Zorro III address space lies outside the first 16 MB.
+
+
+Probing for Zorro Devices
+-------------------------
+
+Zorro devices are found by calling ``zorro_find_device()``, which returns a
+pointer to the ``next`` Zorro device with the specified Zorro ID. A probe loop
+for the board with Zorro ID ``ZORRO_PROD_xxx`` looks like::
+
+    struct zorro_dev *z = NULL;
+
+    while ((z = zorro_find_device(ZORRO_PROD_xxx, z))) {
+	if (!zorro_request_region(z->resource.start+MY_START, MY_SIZE,
+				  "My explanation"))
+	...
+    }
+
+``ZORRO_WILDCARD`` acts as a wildcard and finds any Zorro device. If your driver
+supports different types of boards, you can use a construct like::
+
+    struct zorro_dev *z = NULL;
+
+    while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
+	if (z->id != ZORRO_PROD_xxx1 && z->id != ZORRO_PROD_xxx2 && ...)
+	    continue;
+	if (!zorro_request_region(z->resource.start+MY_START, MY_SIZE,
+				  "My explanation"))
+	...
+    }
+
+
+Zorro Resources
+---------------
+
+Before you can access a Zorro device's registers, you have to make sure it's
+not yet in use. This is done using the I/O memory space resource management
+functions::
+
+    request_mem_region()
+    release_mem_region()
+
+Shortcuts to claim the whole device's address space are provided as well::
+
+    zorro_request_device
+    zorro_release_device
+
+
+Accessing the Zorro Address Space
+---------------------------------
+
+The address regions in the Zorro device resources are Zorro bus address
+regions. Due to the identity bus-physical address mapping on the Zorro bus,
+they are CPU physical addresses as well.
+
+The treatment of these regions depends on the type of Zorro space:
+
+  - Zorro II address space is always mapped and does not have to be mapped
+    explicitly using z_ioremap().
+    
+    Conversion from bus/physical Zorro II addresses to kernel virtual addresses
+    and vice versa is done using::
+
+	virt_addr = ZTWO_VADDR(bus_addr);
+	bus_addr = ZTWO_PADDR(virt_addr);
+
+  - Zorro III address space must be mapped explicitly using z_ioremap() first
+    before it can be accessed::
+ 
+	virt_addr = z_ioremap(bus_addr, size);
+	...
+	z_iounmap(virt_addr);
+
+
+References
+----------
+
+#. linux/include/linux/zorro.h
+#. linux/include/uapi/linux/zorro.h
+#. linux/include/uapi/linux/zorro_ids.h
+#. linux/arch/m68k/include/asm/zorro.h
+#. linux/drivers/zorro
+#. /proc/bus/zorro
+
diff --git a/Documentation/eisa.txt b/Documentation/eisa.txt
deleted file mode 100644
index c07565ba57da..000000000000
--- a/Documentation/eisa.txt
+++ /dev/null
@@ -1,230 +0,0 @@
-================
-EISA bus support
-================
-
-:Author: Marc Zyngier <maz@wild-wind.fr.eu.org>
-
-This document groups random notes about porting EISA drivers to the
-new EISA/sysfs API.
-
-Starting from version 2.5.59, the EISA bus is almost given the same
-status as other much more mainstream busses such as PCI or USB. This
-has been possible through sysfs, which defines a nice enough set of
-abstractions to manage busses, devices and drivers.
-
-Although the new API is quite simple to use, converting existing
-drivers to the new infrastructure is not an easy task (mostly because
-detection code is generally also used to probe ISA cards). Moreover,
-most EISA drivers are among the oldest Linux drivers so, as you can
-imagine, some dust has settled here over the years.
-
-The EISA infrastructure is made up of three parts:
-
-    - The bus code implements most of the generic code. It is shared
-      among all the architectures that the EISA code runs on. It
-      implements bus probing (detecting EISA cards available on the bus),
-      allocates I/O resources, allows fancy naming through sysfs, and
-      offers interfaces for driver to register.
-
-    - The bus root driver implements the glue between the bus hardware
-      and the generic bus code. It is responsible for discovering the
-      device implementing the bus, and setting it up to be latter probed
-      by the bus code. This can go from something as simple as reserving
-      an I/O region on x86, to the rather more complex, like the hppa
-      EISA code. This is the part to implement in order to have EISA
-      running on an "new" platform.
-
-    - The driver offers the bus a list of devices that it manages, and
-      implements the necessary callbacks to probe and release devices
-      whenever told to.
-
-Every function/structure below lives in <linux/eisa.h>, which depends
-heavily on <linux/device.h>.
-
-Bus root driver
-===============
-
-::
-
-	int eisa_root_register (struct eisa_root_device *root);
-
-The eisa_root_register function is used to declare a device as the
-root of an EISA bus. The eisa_root_device structure holds a reference
-to this device, as well as some parameters for probing purposes::
-
-	struct eisa_root_device {
-		struct device   *dev;	 /* Pointer to bridge device */
-		struct resource *res;
-		unsigned long    bus_base_addr;
-		int		 slots;  /* Max slot number */
-		int		 force_probe; /* Probe even when no slot 0 */
-		u64		 dma_mask; /* from bridge device */
-		int              bus_nr; /* Set by eisa_root_register */
-		struct resource  eisa_root_res;	/* ditto */
-	};
-
-============= ======================================================
-node          used for eisa_root_register internal purpose
-dev           pointer to the root device
-res           root device I/O resource
-bus_base_addr slot 0 address on this bus
-slots	      max slot number to probe
-force_probe   Probe even when slot 0 is empty (no EISA mainboard)
-dma_mask      Default DMA mask. Usually the bridge device dma_mask.
-bus_nr	      unique bus id, set by eisa_root_register
-============= ======================================================
-
-Driver
-======
-
-::
-
-	int eisa_driver_register (struct eisa_driver *edrv);
-	void eisa_driver_unregister (struct eisa_driver *edrv);
-
-Clear enough ?
-
-::
-
-	struct eisa_device_id {
-		char sig[EISA_SIG_LEN];
-		unsigned long driver_data;
-	};
-
-	struct eisa_driver {
-		const struct eisa_device_id *id_table;
-		struct device_driver         driver;
-	};
-
-=============== ====================================================
-id_table	an array of NULL terminated EISA id strings,
-		followed by an empty string. Each string can
-		optionally be paired with a driver-dependent value
-		(driver_data).
-
-driver		a generic driver, such as described in
-		Documentation/driver-api/driver-model/driver.rst. Only .name,
-		.probe and .remove members are mandatory.
-=============== ====================================================
-
-An example is the 3c59x driver::
-
-	static struct eisa_device_id vortex_eisa_ids[] = {
-		{ "TCM5920", EISA_3C592_OFFSET },
-		{ "TCM5970", EISA_3C597_OFFSET },
-		{ "" }
-	};
-
-	static struct eisa_driver vortex_eisa_driver = {
-		.id_table = vortex_eisa_ids,
-		.driver   = {
-			.name    = "3c59x",
-			.probe   = vortex_eisa_probe,
-			.remove  = vortex_eisa_remove
-		}
-	};
-
-Device
-======
-
-The sysfs framework calls .probe and .remove functions upon device
-discovery and removal (note that the .remove function is only called
-when driver is built as a module).
-
-Both functions are passed a pointer to a 'struct device', which is
-encapsulated in a 'struct eisa_device' described as follows::
-
-	struct eisa_device {
-		struct eisa_device_id id;
-		int                   slot;
-		int                   state;
-		unsigned long         base_addr;
-		struct resource       res[EISA_MAX_RESOURCES];
-		u64                   dma_mask;
-		struct device         dev; /* generic device */
-	};
-
-======== ============================================================
-id	 EISA id, as read from device. id.driver_data is set from the
-	 matching driver EISA id.
-slot	 slot number which the device was detected on
-state    set of flags indicating the state of the device. Current
-	 flags are EISA_CONFIG_ENABLED and EISA_CONFIG_FORCED.
-res	 set of four 256 bytes I/O regions allocated to this device
-dma_mask DMA mask set from the parent device.
-dev	 generic device (see Documentation/driver-api/driver-model/device.rst)
-======== ============================================================
-
-You can get the 'struct eisa_device' from 'struct device' using the
-'to_eisa_device' macro.
-
-Misc stuff
-==========
-
-::
-
-	void eisa_set_drvdata (struct eisa_device *edev, void *data);
-
-Stores data into the device's driver_data area.
-
-::
-
-	void *eisa_get_drvdata (struct eisa_device *edev):
-
-Gets the pointer previously stored into the device's driver_data area.
-
-::
-
-	int eisa_get_region_index (void *addr);
-
-Returns the region number (0 <= x < EISA_MAX_RESOURCES) of a given
-address.
-
-Kernel parameters
-=================
-
-eisa_bus.enable_dev
-	A comma-separated list of slots to be enabled, even if the firmware
-	set the card as disabled. The driver must be able to properly
-	initialize the device in such conditions.
-
-eisa_bus.disable_dev
-	A comma-separated list of slots to be enabled, even if the firmware
-	set the card as enabled. The driver won't be called to handle this
-	device.
-
-virtual_root.force_probe
-	Force the probing code to probe EISA slots even when it cannot find an
-	EISA compliant mainboard (nothing appears on slot 0). Defaults to 0
-	(don't force), and set to 1 (force probing) when either
-	CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set.
-
-Random notes
-============
-
-Converting an EISA driver to the new API mostly involves *deleting*
-code (since probing is now in the core EISA code). Unfortunately, most
-drivers share their probing routine between ISA, and EISA. Special
-care must be taken when ripping out the EISA code, so other busses
-won't suffer from these surgical strikes...
-
-You *must not* expect any EISA device to be detected when returning
-from eisa_driver_register, since the chances are that the bus has not
-yet been probed. In fact, that's what happens most of the time (the
-bus root driver usually kicks in rather late in the boot process).
-Unfortunately, most drivers are doing the probing by themselves, and
-expect to have explored the whole machine when they exit their probe
-routine.
-
-For example, switching your favorite EISA SCSI card to the "hotplug"
-model is "the right thing"(tm).
-
-Thanks
-======
-
-I'd like to thank the following people for their help:
-
-- Xavier Benigni for lending me a wonderful Alpha Jensen,
-- James Bottomley, Jeff Garzik for getting this stuff into the kernel,
-- Andries Brouwer for contributing numerous EISA ids,
-- Catrin Jones for coping with far too many machines at home.
diff --git a/Documentation/fb/fbcon.rst b/Documentation/fb/fbcon.rst
index 26bc5cdaabab..ebca41785abe 100644
--- a/Documentation/fb/fbcon.rst
+++ b/Documentation/fb/fbcon.rst
@@ -187,7 +187,7 @@ the hardware. Thus, in a VGA console::
 Assuming the VGA driver can be unloaded, one must first unbind the VGA driver
 from the console layer before unloading the driver.  The VGA driver cannot be
 unloaded if it is still bound to the console layer. (See
-Documentation/console/console.rst for more information).
+Documentation/driver-api/console.rst for more information).
 
 This is more complicated in the case of the framebuffer console (fbcon),
 because fbcon is an intermediate layer between the console and the drivers::
@@ -204,7 +204,7 @@ fbcon. Thus, there is no need to explicitly unbind the fbdev drivers from
 fbcon.
 
 So, how do we unbind fbcon from the console? Part of the answer is in
-Documentation/console/console.rst. To summarize:
+Documentation/driver-api/console.rst. To summarize:
 
 Echo a value to the bind file that represents the framebuffer console
 driver. So assuming vtcon1 represents fbcon, then::
diff --git a/Documentation/isa.txt b/Documentation/isa.txt
deleted file mode 100644
index def4a7b690b5..000000000000
--- a/Documentation/isa.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-===========
-ISA Drivers
-===========
-
-The following text is adapted from the commit message of the initial
-commit of the ISA bus driver authored by Rene Herman.
-
-During the recent "isa drivers using platform devices" discussion it was
-pointed out that (ALSA) ISA drivers ran into the problem of not having
-the option to fail driver load (device registration rather) upon not
-finding their hardware due to a probe() error not being passed up
-through the driver model. In the course of that, I suggested a separate
-ISA bus might be best; Russell King agreed and suggested this bus could
-use the .match() method for the actual device discovery.
-
-The attached does this. For this old non (generically) discoverable ISA
-hardware only the driver itself can do discovery so as a difference with
-the platform_bus, this isa_bus also distributes match() up to the
-driver.
-
-As another difference: these devices only exist in the driver model due
-to the driver creating them because it might want to drive them, meaning
-that all device creation has been made internal as well.
-
-The usage model this provides is nice, and has been acked from the ALSA
-side by Takashi Iwai and Jaroslav Kysela. The ALSA driver module_init's
-now (for oldisa-only drivers) become::
-
-	static int __init alsa_card_foo_init(void)
-	{
-		return isa_register_driver(&snd_foo_isa_driver, SNDRV_CARDS);
-	}
-
-	static void __exit alsa_card_foo_exit(void)
-	{
-		isa_unregister_driver(&snd_foo_isa_driver);
-	}
-
-Quite like the other bus models therefore. This removes a lot of
-duplicated init code from the ALSA ISA drivers.
-
-The passed in isa_driver struct is the regular driver struct embedding a
-struct device_driver, the normal probe/remove/shutdown/suspend/resume
-callbacks, and as indicated that .match callback.
-
-The "SNDRV_CARDS" you see being passed in is a "unsigned int ndev"
-parameter, indicating how many devices to create and call our methods
-with.
-
-The platform_driver callbacks are called with a platform_device param;
-the isa_driver callbacks are being called with a ``struct device *dev,
-unsigned int id`` pair directly -- with the device creation completely
-internal to the bus it's much cleaner to not leak isa_dev's by passing
-them in at all. The id is the only thing we ever want other then the
-struct device anyways, and it makes for nicer code in the callbacks as
-well.
-
-With this additional .match() callback ISA drivers have all options. If
-ALSA would want to keep the old non-load behaviour, it could stick all
-of the old .probe in .match, which would only keep them registered after
-everything was found to be present and accounted for. If it wanted the
-behaviour of always loading as it inadvertently did for a bit after the
-changeover to platform devices, it could just not provide a .match() and
-do everything in .probe() as before.
-
-If it, as Takashi Iwai already suggested earlier as a way of following
-the model from saner buses more closely, wants to load when a later bind
-could conceivably succeed, it could use .match() for the prerequisites
-(such as checking the user wants the card enabled and that port/irq/dma
-values have been passed in) and .probe() for everything else. This is
-the nicest model.
-
-To the code...
-
-This exports only two functions; isa_{,un}register_driver().
-
-isa_register_driver() register's the struct device_driver, and then
-loops over the passed in ndev creating devices and registering them.
-This causes the bus match method to be called for them, which is::
-
-	int isa_bus_match(struct device *dev, struct device_driver *driver)
-	{
-		struct isa_driver *isa_driver = to_isa_driver(driver);
-
-		if (dev->platform_data == isa_driver) {
-			if (!isa_driver->match ||
-				isa_driver->match(dev, to_isa_dev(dev)->id))
-				return 1;
-			dev->platform_data = NULL;
-		}
-		return 0;
-	}
-
-The first thing this does is check if this device is in fact one of this
-driver's devices by seeing if the device's platform_data pointer is set
-to this driver. Platform devices compare strings, but we don't need to
-do that with everything being internal, so isa_register_driver() abuses
-dev->platform_data as a isa_driver pointer which we can then check here.
-I believe platform_data is available for this, but if rather not, moving
-the isa_driver pointer to the private struct isa_dev is ofcourse fine as
-well.
-
-Then, if the the driver did not provide a .match, it matches. If it did,
-the driver match() method is called to determine a match.
-
-If it did **not** match, dev->platform_data is reset to indicate this to
-isa_register_driver which can then unregister the device again.
-
-If during all this, there's any error, or no devices matched at all
-everything is backed out again and the error, or -ENODEV, is returned.
-
-isa_unregister_driver() just unregisters the matched devices and the
-driver itself.
-
-module_isa_driver is a helper macro for ISA drivers which do not do
-anything special in module init/exit. This eliminates a lot of
-boilerplate code. Each module may only use this macro once, and calling
-it replaces module_init and module_exit.
-
-max_num_isa_dev is a macro to determine the maximum possible number of
-ISA devices which may be registered in the I/O port address space given
-the address extent of the ISA devices.
diff --git a/Documentation/isapnp.txt b/Documentation/isapnp.txt
deleted file mode 100644
index 8d0840ac847b..000000000000
--- a/Documentation/isapnp.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-==========================================================
-ISA Plug & Play support by Jaroslav Kysela <perex@suse.cz>
-==========================================================
-
-Interface /proc/isapnp
-======================
-
-The interface has been removed. See pnp.txt for more details.
-
-Interface /proc/bus/isapnp
-==========================
-
-This directory allows access to ISA PnP cards and logical devices.
-The regular files contain the contents of ISA PnP registers for
-a logical device.
diff --git a/Documentation/lightnvm/pblk.txt b/Documentation/lightnvm/pblk.txt
deleted file mode 100644
index 1040ed1cec81..000000000000
--- a/Documentation/lightnvm/pblk.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-pblk: Physical Block Device Target
-==================================
-
-pblk implements a fully associative, host-based FTL that exposes a traditional
-block I/O interface. Its primary responsibilities are:
-
-  - Map logical addresses onto physical addresses (4KB granularity) in a
-    logical-to-physical (L2P) table.
-  - Maintain the integrity and consistency of the L2P table as well as its
-    recovery from normal tear down and power outage.
-  - Deal with controller- and media-specific constrains.
-  - Handle I/O errors.
-  - Implement garbage collection.
-  - Maintain consistency across the I/O stack during synchronization points.
-
-For more information please refer to:
-
-  http://lightnvm.io
-
-which maintains updated FAQs, manual pages, technical documentation, tools,
-contacts, etc.
diff --git a/Documentation/men-chameleon-bus.txt b/Documentation/men-chameleon-bus.txt
deleted file mode 100644
index 1b1f048aa748..000000000000
--- a/Documentation/men-chameleon-bus.txt
+++ /dev/null
@@ -1,175 +0,0 @@
-=================
-MEN Chameleon Bus
-=================
-
-.. Table of Contents
-   =================
-   1 Introduction
-       1.1 Scope of this Document
-       1.2 Limitations of the current implementation
-   2 Architecture
-       2.1 MEN Chameleon Bus
-       2.2 Carrier Devices
-       2.3 Parser
-   3 Resource handling
-       3.1 Memory Resources
-       3.2 IRQs
-   4 Writing an MCB driver
-       4.1 The driver structure
-       4.2 Probing and attaching
-       4.3 Initializing the driver
-
-
-Introduction
-============
-
-This document describes the architecture and implementation of the MEN
-Chameleon Bus (called MCB throughout this document).
-
-Scope of this Document
-----------------------
-
-This document is intended to be a short overview of the current
-implementation and does by no means describe the complete possibilities of MCB
-based devices.
-
-Limitations of the current implementation
------------------------------------------
-
-The current implementation is limited to PCI and PCIe based carrier devices
-that only use a single memory resource and share the PCI legacy IRQ.  Not
-implemented are:
-
-- Multi-resource MCB devices like the VME Controller or M-Module carrier.
-- MCB devices that need another MCB device, like SRAM for a DMA Controller's
-  buffer descriptors or a video controller's video memory.
-- A per-carrier IRQ domain for carrier devices that have one (or more) IRQs
-  per MCB device like PCIe based carriers with MSI or MSI-X support.
-
-Architecture
-============
-
-MCB is divided into 3 functional blocks:
-
-- The MEN Chameleon Bus itself,
-- drivers for MCB Carrier Devices and
-- the parser for the Chameleon table.
-
-MEN Chameleon Bus
------------------
-
-The MEN Chameleon Bus is an artificial bus system that attaches to a so
-called Chameleon FPGA device found on some hardware produced my MEN Mikro
-Elektronik GmbH. These devices are multi-function devices implemented in a
-single FPGA and usually attached via some sort of PCI or PCIe link. Each
-FPGA contains a header section describing the content of the FPGA. The
-header lists the device id, PCI BAR, offset from the beginning of the PCI
-BAR, size in the FPGA, interrupt number and some other properties currently
-not handled by the MCB implementation.
-
-Carrier Devices
----------------
-
-A carrier device is just an abstraction for the real world physical bus the
-Chameleon FPGA is attached to. Some IP Core drivers may need to interact with
-properties of the carrier device (like querying the IRQ number of a PCI
-device). To provide abstraction from the real hardware bus, an MCB carrier
-device provides callback methods to translate the driver's MCB function calls
-to hardware related function calls. For example a carrier device may
-implement the get_irq() method which can be translated into a hardware bus
-query for the IRQ number the device should use.
-
-Parser
-------
-
-The parser reads the first 512 bytes of a Chameleon device and parses the
-Chameleon table. Currently the parser only supports the Chameleon v2 variant
-of the Chameleon table but can easily be adopted to support an older or
-possible future variant. While parsing the table's entries new MCB devices
-are allocated and their resources are assigned according to the resource
-assignment in the Chameleon table. After resource assignment is finished, the
-MCB devices are registered at the MCB and thus at the driver core of the
-Linux kernel.
-
-Resource handling
-=================
-
-The current implementation assigns exactly one memory and one IRQ resource
-per MCB device. But this is likely going to change in the future.
-
-Memory Resources
-----------------
-
-Each MCB device has exactly one memory resource, which can be requested from
-the MCB bus. This memory resource is the physical address of the MCB device
-inside the carrier and is intended to be passed to ioremap() and friends. It
-is already requested from the kernel by calling request_mem_region().
-
-IRQs
-----
-
-Each MCB device has exactly one IRQ resource, which can be requested from the
-MCB bus. If a carrier device driver implements the ->get_irq() callback
-method, the IRQ number assigned by the carrier device will be returned,
-otherwise the IRQ number inside the Chameleon table will be returned. This
-number is suitable to be passed to request_irq().
-
-Writing an MCB driver
-=====================
-
-The driver structure
---------------------
-
-Each MCB driver has a structure to identify the device driver as well as
-device ids which identify the IP Core inside the FPGA. The driver structure
-also contains callback methods which get executed on driver probe and
-removal from the system::
-
-	static const struct mcb_device_id foo_ids[] = {
-		{ .device = 0x123 },
-		{ }
-	};
-	MODULE_DEVICE_TABLE(mcb, foo_ids);
-
-	static struct mcb_driver foo_driver = {
-	driver = {
-		.name = "foo-bar",
-		.owner = THIS_MODULE,
-	},
-		.probe = foo_probe,
-		.remove = foo_remove,
-		.id_table = foo_ids,
-	};
-
-Probing and attaching
----------------------
-
-When a driver is loaded and the MCB devices it services are found, the MCB
-core will call the driver's probe callback method. When the driver is removed
-from the system, the MCB core will call the driver's remove callback method::
-
-	static init foo_probe(struct mcb_device *mdev, const struct mcb_device_id *id);
-	static void foo_remove(struct mcb_device *mdev);
-
-Initializing the driver
------------------------
-
-When the kernel is booted or your foo driver module is inserted, you have to
-perform driver initialization. Usually it is enough to register your driver
-module at the MCB core::
-
-	static int __init foo_init(void)
-	{
-		return mcb_register_driver(&foo_driver);
-	}
-	module_init(foo_init);
-
-	static void __exit foo_exit(void)
-	{
-		mcb_unregister_driver(&foo_driver);
-	}
-	module_exit(foo_exit);
-
-The module_mcb_driver() macro can be used to reduce the above code::
-
-	module_mcb_driver(foo_driver);
diff --git a/Documentation/ntb.txt b/Documentation/ntb.txt
deleted file mode 100644
index 074a423c853c..000000000000
--- a/Documentation/ntb.txt
+++ /dev/null
@@ -1,236 +0,0 @@
-===========
-NTB Drivers
-===========
-
-NTB (Non-Transparent Bridge) is a type of PCI-Express bridge chip that connects
-the separate memory systems of two or more computers to the same PCI-Express
-fabric. Existing NTB hardware supports a common feature set: doorbell
-registers and memory translation windows, as well as non common features like
-scratchpad and message registers. Scratchpad registers are read-and-writable
-registers that are accessible from either side of the device, so that peers can
-exchange a small amount of information at a fixed address. Message registers can
-be utilized for the same purpose. Additionally they are provided with with
-special status bits to make sure the information isn't rewritten by another
-peer. Doorbell registers provide a way for peers to send interrupt events.
-Memory windows allow translated read and write access to the peer memory.
-
-NTB Core Driver (ntb)
-=====================
-
-The NTB core driver defines an api wrapping the common feature set, and allows
-clients interested in NTB features to discover NTB the devices supported by
-hardware drivers.  The term "client" is used here to mean an upper layer
-component making use of the NTB api.  The term "driver," or "hardware driver,"
-is used here to mean a driver for a specific vendor and model of NTB hardware.
-
-NTB Client Drivers
-==================
-
-NTB client drivers should register with the NTB core driver.  After
-registering, the client probe and remove functions will be called appropriately
-as ntb hardware, or hardware drivers, are inserted and removed.  The
-registration uses the Linux Device framework, so it should feel familiar to
-anyone who has written a pci driver.
-
-NTB Typical client driver implementation
-----------------------------------------
-
-Primary purpose of NTB is to share some peace of memory between at least two
-systems. So the NTB device features like Scratchpad/Message registers are
-mainly used to perform the proper memory window initialization. Typically
-there are two types of memory window interfaces supported by the NTB API:
-inbound translation configured on the local ntb port and outbound translation
-configured by the peer, on the peer ntb port. The first type is
-depicted on the next figure::
-
- Inbound translation:
-
- Memory:              Local NTB Port:      Peer NTB Port:      Peer MMIO:
-  ____________
- | dma-mapped |-ntb_mw_set_trans(addr)  |
- | memory     |        _v____________   |   ______________
- | (addr)     |<======| MW xlat addr |<====| MW base addr |<== memory-mapped IO
- |------------|       |--------------|  |  |--------------|
-
-So typical scenario of the first type memory window initialization looks:
-1) allocate a memory region, 2) put translated address to NTB config,
-3) somehow notify a peer device of performed initialization, 4) peer device
-maps corresponding outbound memory window so to have access to the shared
-memory region.
-
-The second type of interface, that implies the shared windows being
-initialized by a peer device, is depicted on the figure::
-
- Outbound translation:
-
- Memory:        Local NTB Port:    Peer NTB Port:      Peer MMIO:
-  ____________                      ______________
- | dma-mapped |                |   | MW base addr |<== memory-mapped IO
- | memory     |                |   |--------------|
- | (addr)     |<===================| MW xlat addr |<-ntb_peer_mw_set_trans(addr)
- |------------|                |   |--------------|
-
-Typical scenario of the second type interface initialization would be:
-1) allocate a memory region, 2) somehow deliver a translated address to a peer
-device, 3) peer puts the translated address to NTB config, 4) peer device maps
-outbound memory window so to have access to the shared memory region.
-
-As one can see the described scenarios can be combined in one portable
-algorithm.
-
- Local device:
-  1) Allocate memory for a shared window
-  2) Initialize memory window by translated address of the allocated region
-     (it may fail if local memory window initialization is unsupported)
-  3) Send the translated address and memory window index to a peer device
-
- Peer device:
-  1) Initialize memory window with retrieved address of the allocated
-     by another device memory region (it may fail if peer memory window
-     initialization is unsupported)
-  2) Map outbound memory window
-
-In accordance with this scenario, the NTB Memory Window API can be used as
-follows:
-
- Local device:
-  1) ntb_mw_count(pidx) - retrieve number of memory ranges, which can
-     be allocated for memory windows between local device and peer device
-     of port with specified index.
-  2) ntb_get_align(pidx, midx) - retrieve parameters restricting the
-     shared memory region alignment and size. Then memory can be properly
-     allocated.
-  3) Allocate physically contiguous memory region in compliance with
-     restrictions retrieved in 2).
-  4) ntb_mw_set_trans(pidx, midx) - try to set translation address of
-     the memory window with specified index for the defined peer device
-     (it may fail if local translated address setting is not supported)
-  5) Send translated base address (usually together with memory window
-     number) to the peer device using, for instance, scratchpad or message
-     registers.
-
- Peer device:
-  1) ntb_peer_mw_set_trans(pidx, midx) - try to set received from other
-     device (related to pidx) translated address for specified memory
-     window. It may fail if retrieved address, for instance, exceeds
-     maximum possible address or isn't properly aligned.
-  2) ntb_peer_mw_get_addr(widx) - retrieve MMIO address to map the memory
-     window so to have an access to the shared memory.
-
-Also it is worth to note, that method ntb_mw_count(pidx) should return the
-same value as ntb_peer_mw_count() on the peer with port index - pidx.
-
-NTB Transport Client (ntb\_transport) and NTB Netdev (ntb\_netdev)
-------------------------------------------------------------------
-
-The primary client for NTB is the Transport client, used in tandem with NTB
-Netdev.  These drivers function together to create a logical link to the peer,
-across the ntb, to exchange packets of network data.  The Transport client
-establishes a logical link to the peer, and creates queue pairs to exchange
-messages and data.  The NTB Netdev then creates an ethernet device using a
-Transport queue pair.  Network data is copied between socket buffers and the
-Transport queue pair buffer.  The Transport client may be used for other things
-besides Netdev, however no other applications have yet been written.
-
-NTB Ping Pong Test Client (ntb\_pingpong)
------------------------------------------
-
-The Ping Pong test client serves as a demonstration to exercise the doorbell
-and scratchpad registers of NTB hardware, and as an example simple NTB client.
-Ping Pong enables the link when started, waits for the NTB link to come up, and
-then proceeds to read and write the doorbell scratchpad registers of the NTB.
-The peers interrupt each other using a bit mask of doorbell bits, which is
-shifted by one in each round, to test the behavior of multiple doorbell bits
-and interrupt vectors.  The Ping Pong driver also reads the first local
-scratchpad, and writes the value plus one to the first peer scratchpad, each
-round before writing the peer doorbell register.
-
-Module Parameters:
-
-* unsafe - Some hardware has known issues with scratchpad and doorbell
-	registers.  By default, Ping Pong will not attempt to exercise such
-	hardware.  You may override this behavior at your own risk by setting
-	unsafe=1.
-* delay\_ms - Specify the delay between receiving a doorbell
-	interrupt event and setting the peer doorbell register for the next
-	round.
-* init\_db - Specify the doorbell bits to start new series of rounds.  A new
-	series begins once all the doorbell bits have been shifted out of
-	range.
-* dyndbg - It is suggested to specify dyndbg=+p when loading this module, and
-	then to observe debugging output on the console.
-
-NTB Tool Test Client (ntb\_tool)
---------------------------------
-
-The Tool test client serves for debugging, primarily, ntb hardware and drivers.
-The Tool provides access through debugfs for reading, setting, and clearing the
-NTB doorbell, and reading and writing scratchpads.
-
-The Tool does not currently have any module parameters.
-
-Debugfs Files:
-
-* *debugfs*/ntb\_tool/*hw*/
-	A directory in debugfs will be created for each
-	NTB device probed by the tool.  This directory is shortened to *hw*
-	below.
-* *hw*/db
-	This file is used to read, set, and clear the local doorbell.  Not
-	all operations may be supported by all hardware.  To read the doorbell,
-	read the file.  To set the doorbell, write `s` followed by the bits to
-	set (eg: `echo 's 0x0101' > db`).  To clear the doorbell, write `c`
-	followed by the bits to clear.
-* *hw*/mask
-	This file is used to read, set, and clear the local doorbell mask.
-	See *db* for details.
-* *hw*/peer\_db
-	This file is used to read, set, and clear the peer doorbell.
-	See *db* for details.
-* *hw*/peer\_mask
-	This file is used to read, set, and clear the peer doorbell
-	mask.  See *db* for details.
-* *hw*/spad
-	This file is used to read and write local scratchpads.  To read
-	the values of all scratchpads, read the file.  To write values, write a
-	series of pairs of scratchpad number and value
-	(eg: `echo '4 0x123 7 0xabc' > spad`
-	# to set scratchpads `4` and `7` to `0x123` and `0xabc`, respectively).
-* *hw*/peer\_spad
-	This file is used to read and write peer scratchpads.  See
-	*spad* for details.
-
-NTB Hardware Drivers
-====================
-
-NTB hardware drivers should register devices with the NTB core driver.  After
-registering, clients probe and remove functions will be called.
-
-NTB Intel Hardware Driver (ntb\_hw\_intel)
-------------------------------------------
-
-The Intel hardware driver supports NTB on Xeon and Atom CPUs.
-
-Module Parameters:
-
-* b2b\_mw\_idx
-	If the peer ntb is to be accessed via a memory window, then use
-	this memory window to access the peer ntb.  A value of zero or positive
-	starts from the first mw idx, and a negative value starts from the last
-	mw idx.  Both sides MUST set the same value here!  The default value is
-	`-1`.
-* b2b\_mw\_share
-	If the peer ntb is to be accessed via a memory window, and if
-	the memory window is large enough, still allow the client to use the
-	second half of the memory window for address translation to the peer.
-* xeon\_b2b\_usd\_bar2\_addr64
-	If using B2B topology on Xeon hardware, use
-	this 64 bit address on the bus between the NTB devices for the window
-	at BAR2, on the upstream side of the link.
-* xeon\_b2b\_usd\_bar4\_addr64 - See *xeon\_b2b\_bar2\_addr64*.
-* xeon\_b2b\_usd\_bar4\_addr32 - See *xeon\_b2b\_bar2\_addr64*.
-* xeon\_b2b\_usd\_bar5\_addr32 - See *xeon\_b2b\_bar2\_addr64*.
-* xeon\_b2b\_dsd\_bar2\_addr64 - See *xeon\_b2b\_bar2\_addr64*.
-* xeon\_b2b\_dsd\_bar4\_addr64 - See *xeon\_b2b\_bar2\_addr64*.
-* xeon\_b2b\_dsd\_bar4\_addr32 - See *xeon\_b2b\_bar2\_addr64*.
-* xeon\_b2b\_dsd\_bar5\_addr32 - See *xeon\_b2b\_bar2\_addr64*.
diff --git a/Documentation/nvmem/nvmem.rst b/Documentation/nvmem/nvmem.rst
deleted file mode 100644
index 3866b6e066d5..000000000000
--- a/Documentation/nvmem/nvmem.rst
+++ /dev/null
@@ -1,189 +0,0 @@
-:orphan:
-
-===============
-NVMEM Subsystem
-===============
-
- Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
-
-This document explains the NVMEM Framework along with the APIs provided,
-and how to use it.
-
-1. Introduction
-===============
-*NVMEM* is the abbreviation for Non Volatile Memory layer. It is used to
-retrieve configuration of SOC or Device specific data from non volatile
-memories like eeprom, efuses and so on.
-
-Before this framework existed, NVMEM drivers like eeprom were stored in
-drivers/misc, where they all had to duplicate pretty much the same code to
-register a sysfs file, allow in-kernel users to access the content of the
-devices they were driving, etc.
-
-This was also a problem as far as other in-kernel users were involved, since
-the solutions used were pretty much different from one driver to another, there
-was a rather big abstraction leak.
-
-This framework aims at solve these problems. It also introduces DT
-representation for consumer devices to go get the data they require (MAC
-Addresses, SoC/Revision ID, part numbers, and so on) from the NVMEMs. This
-framework is based on regmap, so that most of the abstraction available in
-regmap can be reused, across multiple types of buses.
-
-NVMEM Providers
-+++++++++++++++
-
-NVMEM provider refers to an entity that implements methods to initialize, read
-and write the non-volatile memory.
-
-2. Registering/Unregistering the NVMEM provider
-===============================================
-
-A NVMEM provider can register with NVMEM core by supplying relevant
-nvmem configuration to nvmem_register(), on success core would return a valid
-nvmem_device pointer.
-
-nvmem_unregister(nvmem) is used to unregister a previously registered provider.
-
-For example, a simple qfprom case::
-
-  static struct nvmem_config econfig = {
-	.name = "qfprom",
-	.owner = THIS_MODULE,
-  };
-
-  static int qfprom_probe(struct platform_device *pdev)
-  {
-	...
-	econfig.dev = &pdev->dev;
-	nvmem = nvmem_register(&econfig);
-	...
-  }
-
-It is mandatory that the NVMEM provider has a regmap associated with its
-struct device. Failure to do would return error code from nvmem_register().
-
-Users of board files can define and register nvmem cells using the
-nvmem_cell_table struct::
-
-  static struct nvmem_cell_info foo_nvmem_cells[] = {
-	{
-		.name		= "macaddr",
-		.offset		= 0x7f00,
-		.bytes		= ETH_ALEN,
-	}
-  };
-
-  static struct nvmem_cell_table foo_nvmem_cell_table = {
-	.nvmem_name		= "i2c-eeprom",
-	.cells			= foo_nvmem_cells,
-	.ncells			= ARRAY_SIZE(foo_nvmem_cells),
-  };
-
-  nvmem_add_cell_table(&foo_nvmem_cell_table);
-
-Additionally it is possible to create nvmem cell lookup entries and register
-them with the nvmem framework from machine code as shown in the example below::
-
-  static struct nvmem_cell_lookup foo_nvmem_lookup = {
-	.nvmem_name		= "i2c-eeprom",
-	.cell_name		= "macaddr",
-	.dev_id			= "foo_mac.0",
-	.con_id			= "mac-address",
-  };
-
-  nvmem_add_cell_lookups(&foo_nvmem_lookup, 1);
-
-NVMEM Consumers
-+++++++++++++++
-
-NVMEM consumers are the entities which make use of the NVMEM provider to
-read from and to NVMEM.
-
-3. NVMEM cell based consumer APIs
-=================================
-
-NVMEM cells are the data entries/fields in the NVMEM.
-The NVMEM framework provides 3 APIs to read/write NVMEM cells::
-
-  struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *name);
-  struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *name);
-
-  void nvmem_cell_put(struct nvmem_cell *cell);
-  void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
-
-  void *nvmem_cell_read(struct nvmem_cell *cell, ssize_t *len);
-  int nvmem_cell_write(struct nvmem_cell *cell, void *buf, ssize_t len);
-
-`*nvmem_cell_get()` apis will get a reference to nvmem cell for a given id,
-and nvmem_cell_read/write() can then read or write to the cell.
-Once the usage of the cell is finished the consumer should call
-`*nvmem_cell_put()` to free all the allocation memory for the cell.
-
-4. Direct NVMEM device based consumer APIs
-==========================================
-
-In some instances it is necessary to directly read/write the NVMEM.
-To facilitate such consumers NVMEM framework provides below apis::
-
-  struct nvmem_device *nvmem_device_get(struct device *dev, const char *name);
-  struct nvmem_device *devm_nvmem_device_get(struct device *dev,
-					   const char *name);
-  void nvmem_device_put(struct nvmem_device *nvmem);
-  int nvmem_device_read(struct nvmem_device *nvmem, unsigned int offset,
-		      size_t bytes, void *buf);
-  int nvmem_device_write(struct nvmem_device *nvmem, unsigned int offset,
-		       size_t bytes, void *buf);
-  int nvmem_device_cell_read(struct nvmem_device *nvmem,
-			   struct nvmem_cell_info *info, void *buf);
-  int nvmem_device_cell_write(struct nvmem_device *nvmem,
-			    struct nvmem_cell_info *info, void *buf);
-
-Before the consumers can read/write NVMEM directly, it should get hold
-of nvmem_controller from one of the `*nvmem_device_get()` api.
-
-The difference between these apis and cell based apis is that these apis always
-take nvmem_device as parameter.
-
-5. Releasing a reference to the NVMEM
-=====================================
-
-When a consumer no longer needs the NVMEM, it has to release the reference
-to the NVMEM it has obtained using the APIs mentioned in the above section.
-The NVMEM framework provides 2 APIs to release a reference to the NVMEM::
-
-  void nvmem_cell_put(struct nvmem_cell *cell);
-  void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
-  void nvmem_device_put(struct nvmem_device *nvmem);
-  void devm_nvmem_device_put(struct device *dev, struct nvmem_device *nvmem);
-
-Both these APIs are used to release a reference to the NVMEM and
-devm_nvmem_cell_put and devm_nvmem_device_put destroys the devres associated
-with this NVMEM.
-
-Userspace
-+++++++++
-
-6. Userspace binary interface
-==============================
-
-Userspace can read/write the raw NVMEM file located at::
-
-	/sys/bus/nvmem/devices/*/nvmem
-
-ex::
-
-  hexdump /sys/bus/nvmem/devices/qfprom0/nvmem
-
-  0000000 0000 0000 0000 0000 0000 0000 0000 0000
-  *
-  00000a0 db10 2240 0000 e000 0c00 0c00 0000 0c00
-  0000000 0000 0000 0000 0000 0000 0000 0000 0000
-  ...
-  *
-  0001000
-
-7. DeviceTree Binding
-=====================
-
-See Documentation/devicetree/bindings/nvmem/nvmem.txt
diff --git a/Documentation/parport-lowlevel.txt b/Documentation/parport-lowlevel.txt
deleted file mode 100644
index 0633d70ffda7..000000000000
--- a/Documentation/parport-lowlevel.txt
+++ /dev/null
@@ -1,1832 +0,0 @@
-===============================
-PARPORT interface documentation
-===============================
-
-:Time-stamp: <2000-02-24 13:30:20 twaugh>
-
-Described here are the following functions:
-
-Global functions::
-  parport_register_driver
-  parport_unregister_driver
-  parport_enumerate
-  parport_register_device
-  parport_unregister_device
-  parport_claim
-  parport_claim_or_block
-  parport_release
-  parport_yield
-  parport_yield_blocking
-  parport_wait_peripheral
-  parport_poll_peripheral
-  parport_wait_event
-  parport_negotiate
-  parport_read
-  parport_write
-  parport_open
-  parport_close
-  parport_device_id
-  parport_device_coords
-  parport_find_class
-  parport_find_device
-  parport_set_timeout
-
-Port functions (can be overridden by low-level drivers):
-
-  SPP::
-    port->ops->read_data
-    port->ops->write_data
-    port->ops->read_status
-    port->ops->read_control
-    port->ops->write_control
-    port->ops->frob_control
-    port->ops->enable_irq
-    port->ops->disable_irq
-    port->ops->data_forward
-    port->ops->data_reverse
-
-  EPP::
-    port->ops->epp_write_data
-    port->ops->epp_read_data
-    port->ops->epp_write_addr
-    port->ops->epp_read_addr
-
-  ECP::
-    port->ops->ecp_write_data
-    port->ops->ecp_read_data
-    port->ops->ecp_write_addr
-
-  Other::
-    port->ops->nibble_read_data
-    port->ops->byte_read_data
-    port->ops->compat_write_data
-
-The parport subsystem comprises ``parport`` (the core port-sharing
-code), and a variety of low-level drivers that actually do the port
-accesses.  Each low-level driver handles a particular style of port
-(PC, Amiga, and so on).
-
-The parport interface to the device driver author can be broken down
-into global functions and port functions.
-
-The global functions are mostly for communicating between the device
-driver and the parport subsystem: acquiring a list of available ports,
-claiming a port for exclusive use, and so on.  They also include
-``generic`` functions for doing standard things that will work on any
-IEEE 1284-capable architecture.
-
-The port functions are provided by the low-level drivers, although the
-core parport module provides generic ``defaults`` for some routines.
-The port functions can be split into three groups: SPP, EPP, and ECP.
-
-SPP (Standard Parallel Port) functions modify so-called ``SPP``
-registers: data, status, and control.  The hardware may not actually
-have registers exactly like that, but the PC does and this interface is
-modelled after common PC implementations.  Other low-level drivers may
-be able to emulate most of the functionality.
-
-EPP (Enhanced Parallel Port) functions are provided for reading and
-writing in IEEE 1284 EPP mode, and ECP (Extended Capabilities Port)
-functions are used for IEEE 1284 ECP mode. (What about BECP? Does
-anyone care?)
-
-Hardware assistance for EPP and/or ECP transfers may or may not be
-available, and if it is available it may or may not be used.  If
-hardware is not used, the transfer will be software-driven.  In order
-to cope with peripherals that only tenuously support IEEE 1284, a
-low-level driver specific function is provided, for altering 'fudge
-factors'.
-
-Global functions
-================
-
-parport_register_driver - register a device driver with parport
----------------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_driver {
-		const char *name;
-		void (*attach) (struct parport *);
-		void (*detach) (struct parport *);
-		struct parport_driver *next;
-	};
-	int parport_register_driver (struct parport_driver *driver);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-In order to be notified about parallel ports when they are detected,
-parport_register_driver should be called.  Your driver will
-immediately be notified of all ports that have already been detected,
-and of each new port as low-level drivers are loaded.
-
-A ``struct parport_driver`` contains the textual name of your driver,
-a pointer to a function to handle new ports, and a pointer to a
-function to handle ports going away due to a low-level driver
-unloading.  Ports will only be detached if they are not being used
-(i.e. there are no devices registered on them).
-
-The visible parts of the ``struct parport *`` argument given to
-attach/detach are::
-
-	struct parport
-	{
-		struct parport *next; /* next parport in list */
-		const char *name;     /* port's name */
-		unsigned int modes;   /* bitfield of hardware modes */
-		struct parport_device_info probe_info;
-				/* IEEE1284 info */
-		int number;           /* parport index */
-		struct parport_operations *ops;
-		...
-	};
-
-There are other members of the structure, but they should not be
-touched.
-
-The ``modes`` member summarises the capabilities of the underlying
-hardware.  It consists of flags which may be bitwise-ored together:
-
-  ============================= ===============================================
-  PARPORT_MODE_PCSPP		IBM PC registers are available,
-				i.e. functions that act on data,
-				control and status registers are
-				probably writing directly to the
-				hardware.
-  PARPORT_MODE_TRISTATE		The data drivers may be turned off.
-				This allows the data lines to be used
-				for reverse (peripheral to host)
-				transfers.
-  PARPORT_MODE_COMPAT		The hardware can assist with
-				compatibility-mode (printer)
-				transfers, i.e. compat_write_block.
-  PARPORT_MODE_EPP		The hardware can assist with EPP
-				transfers.
-  PARPORT_MODE_ECP		The hardware can assist with ECP
-				transfers.
-  PARPORT_MODE_DMA		The hardware can use DMA, so you might
-				want to pass ISA DMA-able memory
-				(i.e. memory allocated using the
-				GFP_DMA flag with kmalloc) to the
-				low-level driver in order to take
-				advantage of it.
-  ============================= ===============================================
-
-There may be other flags in ``modes`` as well.
-
-The contents of ``modes`` is advisory only.  For example, if the
-hardware is capable of DMA, and PARPORT_MODE_DMA is in ``modes``, it
-doesn't necessarily mean that DMA will always be used when possible.
-Similarly, hardware that is capable of assisting ECP transfers won't
-necessarily be used.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-Zero on success, otherwise an error code.
-
-ERRORS
-^^^^^^
-
-None. (Can it fail? Why return int?)
-
-EXAMPLE
-^^^^^^^
-
-::
-
-	static void lp_attach (struct parport *port)
-	{
-		...
-		private = kmalloc (...);
-		dev[count++] = parport_register_device (...);
-		...
-	}
-
-	static void lp_detach (struct parport *port)
-	{
-		...
-	}
-
-	static struct parport_driver lp_driver = {
-		"lp",
-		lp_attach,
-		lp_detach,
-		NULL /* always put NULL here */
-	};
-
-	int lp_init (void)
-	{
-		...
-		if (parport_register_driver (&lp_driver)) {
-			/* Failed; nothing we can do. */
-			return -EIO;
-		}
-		...
-	}
-
-
-SEE ALSO
-^^^^^^^^
-
-parport_unregister_driver, parport_register_device, parport_enumerate
-
-
-
-parport_unregister_driver - tell parport to forget about this driver
---------------------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_driver {
-		const char *name;
-		void (*attach) (struct parport *);
-		void (*detach) (struct parport *);
-		struct parport_driver *next;
-	};
-	void parport_unregister_driver (struct parport_driver *driver);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-This tells parport not to notify the device driver of new ports or of
-ports going away.  Registered devices belonging to that driver are NOT
-unregistered: parport_unregister_device must be used for each one.
-
-EXAMPLE
-^^^^^^^
-
-::
-
-	void cleanup_module (void)
-	{
-		...
-		/* Stop notifications. */
-		parport_unregister_driver (&lp_driver);
-
-		/* Unregister devices. */
-		for (i = 0; i < NUM_DEVS; i++)
-			parport_unregister_device (dev[i]);
-		...
-	}
-
-SEE ALSO
-^^^^^^^^
-
-parport_register_driver, parport_enumerate
-
-
-
-parport_enumerate - retrieve a list of parallel ports (DEPRECATED)
-------------------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport *parport_enumerate (void);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Retrieve the first of a list of valid parallel ports for this machine.
-Successive parallel ports can be found using the ``struct parport
-*next`` element of the ``struct parport *`` that is returned.  If ``next``
-is NULL, there are no more parallel ports in the list.  The number of
-ports in the list will not exceed PARPORT_MAX.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-A ``struct parport *`` describing a valid parallel port for the machine,
-or NULL if there are none.
-
-ERRORS
-^^^^^^
-
-This function can return NULL to indicate that there are no parallel
-ports to use.
-
-EXAMPLE
-^^^^^^^
-
-::
-
-	int detect_device (void)
-	{
-		struct parport *port;
-
-		for (port = parport_enumerate ();
-		port != NULL;
-		port = port->next) {
-			/* Try to detect a device on the port... */
-			...
-		}
-		}
-
-		...
-	}
-
-NOTES
-^^^^^
-
-parport_enumerate is deprecated; parport_register_driver should be
-used instead.
-
-SEE ALSO
-^^^^^^^^
-
-parport_register_driver, parport_unregister_driver
-
-
-
-parport_register_device - register to use a port
-------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	typedef int (*preempt_func) (void *handle);
-	typedef void (*wakeup_func) (void *handle);
-	typedef int (*irq_func) (int irq, void *handle, struct pt_regs *);
-
-	struct pardevice *parport_register_device(struct parport *port,
-						  const char *name,
-						  preempt_func preempt,
-						  wakeup_func wakeup,
-						  irq_func irq,
-						  int flags,
-						  void *handle);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Use this function to register your device driver on a parallel port
-(``port``).  Once you have done that, you will be able to use
-parport_claim and parport_release in order to use the port.
-
-The (``name``) argument is the name of the device that appears in /proc
-filesystem. The string must be valid for the whole lifetime of the
-device (until parport_unregister_device is called).
-
-This function will register three callbacks into your driver:
-``preempt``, ``wakeup`` and ``irq``.  Each of these may be NULL in order to
-indicate that you do not want a callback.
-
-When the ``preempt`` function is called, it is because another driver
-wishes to use the parallel port.  The ``preempt`` function should return
-non-zero if the parallel port cannot be released yet -- if zero is
-returned, the port is lost to another driver and the port must be
-re-claimed before use.
-
-The ``wakeup`` function is called once another driver has released the
-port and no other driver has yet claimed it.  You can claim the
-parallel port from within the ``wakeup`` function (in which case the
-claim is guaranteed to succeed), or choose not to if you don't need it
-now.
-
-If an interrupt occurs on the parallel port your driver has claimed,
-the ``irq`` function will be called. (Write something about shared
-interrupts here.)
-
-The ``handle`` is a pointer to driver-specific data, and is passed to
-the callback functions.
-
-``flags`` may be a bitwise combination of the following flags:
-
-  ===================== =================================================
-        Flag            Meaning
-  ===================== =================================================
-  PARPORT_DEV_EXCL	The device cannot share the parallel port at all.
-			Use this only when absolutely necessary.
-  ===================== =================================================
-
-The typedefs are not actually defined -- they are only shown in order
-to make the function prototype more readable.
-
-The visible parts of the returned ``struct pardevice`` are::
-
-	struct pardevice {
-		struct parport *port;	/* Associated port */
-		void *private;		/* Device driver's 'handle' */
-		...
-	};
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-A ``struct pardevice *``: a handle to the registered parallel port
-device that can be used for parport_claim, parport_release, etc.
-
-ERRORS
-^^^^^^
-
-A return value of NULL indicates that there was a problem registering
-a device on that port.
-
-EXAMPLE
-^^^^^^^
-
-::
-
-	static int preempt (void *handle)
-	{
-		if (busy_right_now)
-			return 1;
-
-		must_reclaim_port = 1;
-		return 0;
-	}
-
-	static void wakeup (void *handle)
-	{
-		struct toaster *private = handle;
-		struct pardevice *dev = private->dev;
-		if (!dev) return; /* avoid races */
-
-		if (want_port)
-			parport_claim (dev);
-	}
-
-	static int toaster_detect (struct toaster *private, struct parport *port)
-	{
-		private->dev = parport_register_device (port, "toaster", preempt,
-							wakeup, NULL, 0,
-							private);
-		if (!private->dev)
-			/* Couldn't register with parport. */
-			return -EIO;
-
-		must_reclaim_port = 0;
-		busy_right_now = 1;
-		parport_claim_or_block (private->dev);
-		...
-		/* Don't need the port while the toaster warms up. */
-		busy_right_now = 0;
-		...
-		busy_right_now = 1;
-		if (must_reclaim_port) {
-			parport_claim_or_block (private->dev);
-			must_reclaim_port = 0;
-		}
-		...
-	}
-
-SEE ALSO
-^^^^^^^^
-
-parport_unregister_device, parport_claim
-
-
-
-parport_unregister_device - finish using a port
------------------------------------------------
-
-SYNPOPSIS
-
-::
-
-	#include <linux/parport.h>
-
-	void parport_unregister_device (struct pardevice *dev);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-This function is the opposite of parport_register_device.  After using
-parport_unregister_device, ``dev`` is no longer a valid device handle.
-
-You should not unregister a device that is currently claimed, although
-if you do it will be released automatically.
-
-EXAMPLE
-^^^^^^^
-
-::
-
-	...
-	kfree (dev->private); /* before we lose the pointer */
-	parport_unregister_device (dev);
-	...
-
-SEE ALSO
-^^^^^^^^
-
-
-parport_unregister_driver
-
-parport_claim, parport_claim_or_block - claim the parallel port for a device
-----------------------------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	int parport_claim (struct pardevice *dev);
-	int parport_claim_or_block (struct pardevice *dev);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-These functions attempt to gain control of the parallel port on which
-``dev`` is registered.  ``parport_claim`` does not block, but
-``parport_claim_or_block`` may do. (Put something here about blocking
-interruptibly or non-interruptibly.)
-
-You should not try to claim a port that you have already claimed.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-A return value of zero indicates that the port was successfully
-claimed, and the caller now has possession of the parallel port.
-
-If ``parport_claim_or_block`` blocks before returning successfully, the
-return value is positive.
-
-ERRORS
-^^^^^^
-
-========== ==========================================================
-  -EAGAIN  The port is unavailable at the moment, but another attempt
-           to claim it may succeed.
-========== ==========================================================
-
-SEE ALSO
-^^^^^^^^
-
-
-parport_release
-
-parport_release - release the parallel port
--------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	void parport_release (struct pardevice *dev);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Once a parallel port device has been claimed, it can be released using
-``parport_release``.  It cannot fail, but you should not release a
-device that you do not have possession of.
-
-EXAMPLE
-^^^^^^^
-
-::
-
-	static size_t write (struct pardevice *dev, const void *buf,
-			size_t len)
-	{
-		...
-		written = dev->port->ops->write_ecp_data (dev->port, buf,
-							len);
-		parport_release (dev);
-		...
-	}
-
-
-SEE ALSO
-^^^^^^^^
-
-change_mode, parport_claim, parport_claim_or_block, parport_yield
-
-
-
-parport_yield, parport_yield_blocking - temporarily release a parallel port
----------------------------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	int parport_yield (struct pardevice *dev)
-	int parport_yield_blocking (struct pardevice *dev);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-When a driver has control of a parallel port, it may allow another
-driver to temporarily ``borrow`` it.  ``parport_yield`` does not block;
-``parport_yield_blocking`` may do.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-A return value of zero indicates that the caller still owns the port
-and the call did not block.
-
-A positive return value from ``parport_yield_blocking`` indicates that
-the caller still owns the port and the call blocked.
-
-A return value of -EAGAIN indicates that the caller no longer owns the
-port, and it must be re-claimed before use.
-
-ERRORS
-^^^^^^
-
-========= ==========================================================
-  -EAGAIN  Ownership of the parallel port was given away.
-========= ==========================================================
-
-SEE ALSO
-^^^^^^^^
-
-parport_release
-
-
-
-parport_wait_peripheral - wait for status lines, up to 35ms
------------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	int parport_wait_peripheral (struct parport *port,
-				     unsigned char mask,
-				     unsigned char val);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Wait for the status lines in mask to match the values in val.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-======== ==========================================================
- -EINTR  a signal is pending
-      0  the status lines in mask have values in val
-      1  timed out while waiting (35ms elapsed)
-======== ==========================================================
-
-SEE ALSO
-^^^^^^^^
-
-parport_poll_peripheral
-
-
-
-parport_poll_peripheral - wait for status lines, in usec
---------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	int parport_poll_peripheral (struct parport *port,
-				     unsigned char mask,
-				     unsigned char val,
-				     int usec);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Wait for the status lines in mask to match the values in val.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-======== ==========================================================
- -EINTR  a signal is pending
-      0  the status lines in mask have values in val
-      1  timed out while waiting (usec microseconds have elapsed)
-======== ==========================================================
-
-SEE ALSO
-^^^^^^^^
-
-parport_wait_peripheral
-
-
-
-parport_wait_event - wait for an event on a port
-------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	int parport_wait_event (struct parport *port, signed long timeout)
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Wait for an event (e.g. interrupt) on a port.  The timeout is in
-jiffies.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-======= ==========================================================
-      0  success
-     <0  error (exit as soon as possible)
-     >0  timed out
-======= ==========================================================
-
-parport_negotiate - perform IEEE 1284 negotiation
--------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	int parport_negotiate (struct parport *, int mode);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Perform IEEE 1284 negotiation.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-======= ==========================================================
-     0  handshake OK; IEEE 1284 peripheral and mode available
-    -1  handshake failed; peripheral not compliant (or none present)
-     1  handshake OK; IEEE 1284 peripheral present but mode not
-        available
-======= ==========================================================
-
-SEE ALSO
-^^^^^^^^
-
-parport_read, parport_write
-
-
-
-parport_read - read data from device
-------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	ssize_t parport_read (struct parport *, void *buf, size_t len);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Read data from device in current IEEE 1284 transfer mode.  This only
-works for modes that support reverse data transfer.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-If negative, an error code; otherwise the number of bytes transferred.
-
-SEE ALSO
-^^^^^^^^
-
-parport_write, parport_negotiate
-
-
-
-parport_write - write data to device
-------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	ssize_t parport_write (struct parport *, const void *buf, size_t len);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Write data to device in current IEEE 1284 transfer mode.  This only
-works for modes that support forward data transfer.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-If negative, an error code; otherwise the number of bytes transferred.
-
-SEE ALSO
-^^^^^^^^
-
-parport_read, parport_negotiate
-
-
-
-parport_open - register device for particular device number
------------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct pardevice *parport_open (int devnum, const char *name,
-				        int (*pf) (void *),
-					void (*kf) (void *),
-					void (*irqf) (int, void *,
-						      struct pt_regs *),
-					int flags, void *handle);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-This is like parport_register_device but takes a device number instead
-of a pointer to a struct parport.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-See parport_register_device.  If no device is associated with devnum,
-NULL is returned.
-
-SEE ALSO
-^^^^^^^^
-
-parport_register_device
-
-
-
-parport_close - unregister device for particular device number
---------------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	void parport_close (struct pardevice *dev);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-This is the equivalent of parport_unregister_device for parport_open.
-
-SEE ALSO
-^^^^^^^^
-
-parport_unregister_device, parport_open
-
-
-
-parport_device_id - obtain IEEE 1284 Device ID
-----------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	ssize_t parport_device_id (int devnum, char *buffer, size_t len);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Obtains the IEEE 1284 Device ID associated with a given device.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-If negative, an error code; otherwise, the number of bytes of buffer
-that contain the device ID.  The format of the device ID is as
-follows::
-
-	[length][ID]
-
-The first two bytes indicate the inclusive length of the entire Device
-ID, and are in big-endian order.  The ID is a sequence of pairs of the
-form::
-
-	key:value;
-
-NOTES
-^^^^^
-
-Many devices have ill-formed IEEE 1284 Device IDs.
-
-SEE ALSO
-^^^^^^^^
-
-parport_find_class, parport_find_device
-
-
-
-parport_device_coords - convert device number to device coordinates
--------------------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	int parport_device_coords (int devnum, int *parport, int *mux,
-				   int *daisy);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Convert between device number (zero-based) and device coordinates
-(port, multiplexor, daisy chain address).
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-Zero on success, in which case the coordinates are (``*parport``, ``*mux``,
-``*daisy``).
-
-SEE ALSO
-^^^^^^^^
-
-parport_open, parport_device_id
-
-
-
-parport_find_class - find a device by its class
------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	typedef enum {
-		PARPORT_CLASS_LEGACY = 0,       /* Non-IEEE1284 device */
-		PARPORT_CLASS_PRINTER,
-		PARPORT_CLASS_MODEM,
-		PARPORT_CLASS_NET,
-		PARPORT_CLASS_HDC,              /* Hard disk controller */
-		PARPORT_CLASS_PCMCIA,
-		PARPORT_CLASS_MEDIA,            /* Multimedia device */
-		PARPORT_CLASS_FDC,              /* Floppy disk controller */
-		PARPORT_CLASS_PORTS,
-		PARPORT_CLASS_SCANNER,
-		PARPORT_CLASS_DIGCAM,
-		PARPORT_CLASS_OTHER,            /* Anything else */
-		PARPORT_CLASS_UNSPEC,           /* No CLS field in ID */
-		PARPORT_CLASS_SCSIADAPTER
-	} parport_device_class;
-
-	int parport_find_class (parport_device_class cls, int from);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Find a device by class.  The search starts from device number from+1.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-The device number of the next device in that class, or -1 if no such
-device exists.
-
-NOTES
-^^^^^
-
-Example usage::
-
-	int devnum = -1;
-	while ((devnum = parport_find_class (PARPORT_CLASS_DIGCAM, devnum)) != -1) {
-		struct pardevice *dev = parport_open (devnum, ...);
-		...
-	}
-
-SEE ALSO
-^^^^^^^^
-
-parport_find_device, parport_open, parport_device_id
-
-
-
-parport_find_device - find a device by its class
-------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	int parport_find_device (const char *mfg, const char *mdl, int from);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Find a device by vendor and model.  The search starts from device
-number from+1.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-The device number of the next device matching the specifications, or
--1 if no such device exists.
-
-NOTES
-^^^^^
-
-Example usage::
-
-	int devnum = -1;
-	while ((devnum = parport_find_device ("IOMEGA", "ZIP+", devnum)) != -1) {
-		struct pardevice *dev = parport_open (devnum, ...);
-		...
-	}
-
-SEE ALSO
-^^^^^^^^
-
-parport_find_class, parport_open, parport_device_id
-
-
-
-parport_set_timeout - set the inactivity timeout
-------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	long parport_set_timeout (struct pardevice *dev, long inactivity);
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Set the inactivity timeout, in jiffies, for a registered device.  The
-previous timeout is returned.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-The previous timeout, in jiffies.
-
-NOTES
-^^^^^
-
-Some of the port->ops functions for a parport may take time, owing to
-delays at the peripheral.  After the peripheral has not responded for
-``inactivity`` jiffies, a timeout will occur and the blocking function
-will return.
-
-A timeout of 0 jiffies is a special case: the function must do as much
-as it can without blocking or leaving the hardware in an unknown
-state.  If port operations are performed from within an interrupt
-handler, for instance, a timeout of 0 jiffies should be used.
-
-Once set for a registered device, the timeout will remain at the set
-value until set again.
-
-SEE ALSO
-^^^^^^^^
-
-port->ops->xxx_read/write_yyy
-
-
-
-
-PORT FUNCTIONS
-==============
-
-The functions in the port->ops structure (struct parport_operations)
-are provided by the low-level driver responsible for that port.
-
-port->ops->read_data - read the data register
----------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		unsigned char (*read_data) (struct parport *port);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-If port->modes contains the PARPORT_MODE_TRISTATE flag and the
-PARPORT_CONTROL_DIRECTION bit in the control register is set, this
-returns the value on the data pins.  If port->modes contains the
-PARPORT_MODE_TRISTATE flag and the PARPORT_CONTROL_DIRECTION bit is
-not set, the return value _may_ be the last value written to the data
-register.  Otherwise the return value is undefined.
-
-SEE ALSO
-^^^^^^^^
-
-write_data, read_status, write_control
-
-
-
-port->ops->write_data - write the data register
------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		void (*write_data) (struct parport *port, unsigned char d);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Writes to the data register.  May have side-effects (a STROBE pulse,
-for instance).
-
-SEE ALSO
-^^^^^^^^
-
-read_data, read_status, write_control
-
-
-
-port->ops->read_status - read the status register
--------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		unsigned char (*read_status) (struct parport *port);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Reads from the status register.  This is a bitmask:
-
-- PARPORT_STATUS_ERROR (printer fault, "nFault")
-- PARPORT_STATUS_SELECT (on-line, "Select")
-- PARPORT_STATUS_PAPEROUT (no paper, "PError")
-- PARPORT_STATUS_ACK (handshake, "nAck")
-- PARPORT_STATUS_BUSY (busy, "Busy")
-
-There may be other bits set.
-
-SEE ALSO
-^^^^^^^^
-
-read_data, write_data, write_control
-
-
-
-port->ops->read_control - read the control register
----------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		unsigned char (*read_control) (struct parport *port);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Returns the last value written to the control register (either from
-write_control or frob_control).  No port access is performed.
-
-SEE ALSO
-^^^^^^^^
-
-read_data, write_data, read_status, write_control
-
-
-
-port->ops->write_control - write the control register
------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		void (*write_control) (struct parport *port, unsigned char s);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Writes to the control register. This is a bitmask::
-
-				  _______
-	- PARPORT_CONTROL_STROBE (nStrobe)
-				  _______
-	- PARPORT_CONTROL_AUTOFD (nAutoFd)
-				_____
-	- PARPORT_CONTROL_INIT (nInit)
-				  _________
-	- PARPORT_CONTROL_SELECT (nSelectIn)
-
-SEE ALSO
-^^^^^^^^
-
-read_data, write_data, read_status, frob_control
-
-
-
-port->ops->frob_control - write control register bits
------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		unsigned char (*frob_control) (struct parport *port,
-					unsigned char mask,
-					unsigned char val);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-This is equivalent to reading from the control register, masking out
-the bits in mask, exclusive-or'ing with the bits in val, and writing
-the result to the control register.
-
-As some ports don't allow reads from the control port, a software copy
-of its contents is maintained, so frob_control is in fact only one
-port access.
-
-SEE ALSO
-^^^^^^^^
-
-read_data, write_data, read_status, write_control
-
-
-
-port->ops->enable_irq - enable interrupt generation
----------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		void (*enable_irq) (struct parport *port);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-The parallel port hardware is instructed to generate interrupts at
-appropriate moments, although those moments are
-architecture-specific.  For the PC architecture, interrupts are
-commonly generated on the rising edge of nAck.
-
-SEE ALSO
-^^^^^^^^
-
-disable_irq
-
-
-
-port->ops->disable_irq - disable interrupt generation
------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		void (*disable_irq) (struct parport *port);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-The parallel port hardware is instructed not to generate interrupts.
-The interrupt itself is not masked.
-
-SEE ALSO
-^^^^^^^^
-
-enable_irq
-
-
-
-port->ops->data_forward - enable data drivers
----------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		void (*data_forward) (struct parport *port);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Enables the data line drivers, for 8-bit host-to-peripheral
-communications.
-
-SEE ALSO
-^^^^^^^^
-
-data_reverse
-
-
-
-port->ops->data_reverse - tristate the buffer
----------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		void (*data_reverse) (struct parport *port);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Places the data bus in a high impedance state, if port->modes has the
-PARPORT_MODE_TRISTATE bit set.
-
-SEE ALSO
-^^^^^^^^
-
-data_forward
-
-
-
-port->ops->epp_write_data - write EPP data
-------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		size_t (*epp_write_data) (struct parport *port, const void *buf,
-					size_t len, int flags);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Writes data in EPP mode, and returns the number of bytes written.
-
-The ``flags`` parameter may be one or more of the following,
-bitwise-or'ed together:
-
-======================= =================================================
-PARPORT_EPP_FAST	Use fast transfers. Some chips provide 16-bit and
-			32-bit registers.  However, if a transfer
-			times out, the return value may be unreliable.
-======================= =================================================
-
-SEE ALSO
-^^^^^^^^
-
-epp_read_data, epp_write_addr, epp_read_addr
-
-
-
-port->ops->epp_read_data - read EPP data
-----------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		size_t (*epp_read_data) (struct parport *port, void *buf,
-					size_t len, int flags);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Reads data in EPP mode, and returns the number of bytes read.
-
-The ``flags`` parameter may be one or more of the following,
-bitwise-or'ed together:
-
-======================= =================================================
-PARPORT_EPP_FAST	Use fast transfers. Some chips provide 16-bit and
-			32-bit registers.  However, if a transfer
-			times out, the return value may be unreliable.
-======================= =================================================
-
-SEE ALSO
-^^^^^^^^
-
-epp_write_data, epp_write_addr, epp_read_addr
-
-
-
-port->ops->epp_write_addr - write EPP address
----------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		size_t (*epp_write_addr) (struct parport *port,
-					const void *buf, size_t len, int flags);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Writes EPP addresses (8 bits each), and returns the number written.
-
-The ``flags`` parameter may be one or more of the following,
-bitwise-or'ed together:
-
-======================= =================================================
-PARPORT_EPP_FAST	Use fast transfers. Some chips provide 16-bit and
-			32-bit registers.  However, if a transfer
-			times out, the return value may be unreliable.
-======================= =================================================
-
-(Does PARPORT_EPP_FAST make sense for this function?)
-
-SEE ALSO
-^^^^^^^^
-
-epp_write_data, epp_read_data, epp_read_addr
-
-
-
-port->ops->epp_read_addr - read EPP address
--------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		size_t (*epp_read_addr) (struct parport *port, void *buf,
-					size_t len, int flags);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Reads EPP addresses (8 bits each), and returns the number read.
-
-The ``flags`` parameter may be one or more of the following,
-bitwise-or'ed together:
-
-======================= =================================================
-PARPORT_EPP_FAST	Use fast transfers. Some chips provide 16-bit and
-			32-bit registers.  However, if a transfer
-			times out, the return value may be unreliable.
-======================= =================================================
-
-(Does PARPORT_EPP_FAST make sense for this function?)
-
-SEE ALSO
-^^^^^^^^
-
-epp_write_data, epp_read_data, epp_write_addr
-
-
-
-port->ops->ecp_write_data - write a block of ECP data
------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		size_t (*ecp_write_data) (struct parport *port,
-					const void *buf, size_t len, int flags);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Writes a block of ECP data.  The ``flags`` parameter is ignored.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-The number of bytes written.
-
-SEE ALSO
-^^^^^^^^
-
-ecp_read_data, ecp_write_addr
-
-
-
-port->ops->ecp_read_data - read a block of ECP data
----------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		size_t (*ecp_read_data) (struct parport *port,
-					void *buf, size_t len, int flags);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Reads a block of ECP data.  The ``flags`` parameter is ignored.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-The number of bytes read.  NB. There may be more unread data in a
-FIFO.  Is there a way of stunning the FIFO to prevent this?
-
-SEE ALSO
-^^^^^^^^
-
-ecp_write_block, ecp_write_addr
-
-
-
-port->ops->ecp_write_addr - write a block of ECP addresses
-----------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		size_t (*ecp_write_addr) (struct parport *port,
-					const void *buf, size_t len, int flags);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Writes a block of ECP addresses.  The ``flags`` parameter is ignored.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-The number of bytes written.
-
-NOTES
-^^^^^
-
-This may use a FIFO, and if so shall not return until the FIFO is empty.
-
-SEE ALSO
-^^^^^^^^
-
-ecp_read_data, ecp_write_data
-
-
-
-port->ops->nibble_read_data - read a block of data in nibble mode
------------------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		size_t (*nibble_read_data) (struct parport *port,
-					void *buf, size_t len, int flags);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Reads a block of data in nibble mode.  The ``flags`` parameter is ignored.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-The number of whole bytes read.
-
-SEE ALSO
-^^^^^^^^
-
-byte_read_data, compat_write_data
-
-
-
-port->ops->byte_read_data - read a block of data in byte mode
--------------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		size_t (*byte_read_data) (struct parport *port,
-					void *buf, size_t len, int flags);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Reads a block of data in byte mode.  The ``flags`` parameter is ignored.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-The number of bytes read.
-
-SEE ALSO
-^^^^^^^^
-
-nibble_read_data, compat_write_data
-
-
-
-port->ops->compat_write_data - write a block of data in compatibility mode
---------------------------------------------------------------------------
-
-SYNOPSIS
-^^^^^^^^
-
-::
-
-	#include <linux/parport.h>
-
-	struct parport_operations {
-		...
-		size_t (*compat_write_data) (struct parport *port,
-					const void *buf, size_t len, int flags);
-		...
-	};
-
-DESCRIPTION
-^^^^^^^^^^^
-
-Writes a block of data in compatibility mode.  The ``flags`` parameter
-is ignored.
-
-RETURN VALUE
-^^^^^^^^^^^^
-
-The number of bytes written.
-
-SEE ALSO
-^^^^^^^^
-
-nibble_read_data, byte_read_data
diff --git a/Documentation/pti/pti_intel_mid.rst b/Documentation/pti/pti_intel_mid.rst
deleted file mode 100644
index ea05725174cb..000000000000
--- a/Documentation/pti/pti_intel_mid.rst
+++ /dev/null
@@ -1,106 +0,0 @@
-:orphan:
-
-=============
-Intel MID PTI
-=============
-
-The Intel MID PTI project is HW implemented in Intel Atom
-system-on-a-chip designs based on the Parallel Trace
-Interface for MIPI P1149.7 cJTAG standard.  The kernel solution
-for this platform involves the following files::
-
-	./include/linux/pti.h
-	./drivers/.../n_tracesink.h
-	./drivers/.../n_tracerouter.c
-	./drivers/.../n_tracesink.c
-	./drivers/.../pti.c
-
-pti.c is the driver that enables various debugging features
-popular on platforms from certain mobile manufacturers.
-n_tracerouter.c and n_tracesink.c allow extra system information to
-be collected and routed to the pti driver, such as trace
-debugging data from a modem.  Although n_tracerouter
-and n_tracesink are a part of the complete PTI solution,
-these two line disciplines can work separately from
-pti.c and route any data stream from one /dev/tty node
-to another /dev/tty node via kernel-space.  This provides
-a stable, reliable connection that will not break unless
-the user-space application shuts down (plus avoids
-kernel->user->kernel context switch overheads of routing
-data).
-
-An example debugging usage for this driver system:
-
-  * Hook /dev/ttyPTI0 to syslogd.  Opening this port will also start
-    a console device to further capture debugging messages to PTI.
-  * Hook /dev/ttyPTI1 to modem debugging data to write to PTI HW.
-    This is where n_tracerouter and n_tracesink are used.
-  * Hook /dev/pti to a user-level debugging application for writing
-    to PTI HW.
-  * `Use mipi_` Kernel Driver API in other device drivers for
-    debugging to PTI by first requesting a PTI write address via
-    mipi_request_masterchannel(1).
-
-Below is example pseudo-code on how a 'privileged' application
-can hook up n_tracerouter and n_tracesink to any tty on
-a system.  'Privileged' means the application has enough
-privileges to successfully manipulate the ldisc drivers
-but is not just blindly executing as 'root'. Keep in mind
-the use of ioctl(,TIOCSETD,) is not specific to the n_tracerouter
-and n_tracesink line discpline drivers but is a generic
-operation for a program to use a line discpline driver
-on a tty port other than the default n_tty::
-
-  /////////// To hook up n_tracerouter and n_tracesink /////////
-
-  // Note that n_tracerouter depends on n_tracesink.
-  #include <errno.h>
-  #define ONE_TTY "/dev/ttyOne"
-  #define TWO_TTY "/dev/ttyTwo"
-
-  // needed global to hand onto ldisc connection
-  static int g_fd_source = -1;
-  static int g_fd_sink  = -1;
-
-  // these two vars used to grab LDISC values from loaded ldisc drivers
-  // in OS.  Look at /proc/tty/ldiscs to get the right numbers from
-  // the ldiscs loaded in the system.
-  int source_ldisc_num, sink_ldisc_num = -1;
-  int retval;
-
-  g_fd_source = open(ONE_TTY, O_RDWR); // must be R/W
-  g_fd_sink   = open(TWO_TTY, O_RDWR); // must be R/W
-
-  if (g_fd_source <= 0) || (g_fd_sink <= 0) {
-     // doubt you'll want to use these exact error lines of code
-     printf("Error on open(). errno: %d\n",errno);
-     return errno;
-  }
-
-  retval = ioctl(g_fd_sink, TIOCSETD, &sink_ldisc_num);
-  if (retval < 0) {
-     printf("Error on ioctl().  errno: %d\n", errno);
-     return errno;
-  }
-
-  retval = ioctl(g_fd_source, TIOCSETD, &source_ldisc_num);
-  if (retval < 0) {
-     printf("Error on ioctl().  errno: %d\n", errno);
-     return errno;
-  }
-
-  /////////// To disconnect n_tracerouter and n_tracesink ////////
-
-  // First make sure data through the ldiscs has stopped.
-
-  // Second, disconnect ldiscs.  This provides a
-  // little cleaner shutdown on tty stack.
-  sink_ldisc_num = 0;
-  source_ldisc_num = 0;
-  ioctl(g_fd_uart, TIOCSETD, &sink_ldisc_num);
-  ioctl(g_fd_gadget, TIOCSETD, &source_ldisc_num);
-
-  // Three, program closes connection, and cleanup:
-  close(g_fd_uart);
-  close(g_fd_gadget);
-  g_fd_uart = g_fd_gadget = NULL;
diff --git a/Documentation/pwm.txt b/Documentation/pwm.txt
deleted file mode 100644
index ab62f1bb0366..000000000000
--- a/Documentation/pwm.txt
+++ /dev/null
@@ -1,165 +0,0 @@
-======================================
-Pulse Width Modulation (PWM) interface
-======================================
-
-This provides an overview about the Linux PWM interface
-
-PWMs are commonly used for controlling LEDs, fans or vibrators in
-cell phones. PWMs with a fixed purpose have no need implementing
-the Linux PWM API (although they could). However, PWMs are often
-found as discrete devices on SoCs which have no fixed purpose. It's
-up to the board designer to connect them to LEDs or fans. To provide
-this kind of flexibility the generic PWM API exists.
-
-Identifying PWMs
-----------------
-
-Users of the legacy PWM API use unique IDs to refer to PWM devices.
-
-Instead of referring to a PWM device via its unique ID, board setup code
-should instead register a static mapping that can be used to match PWM
-consumers to providers, as given in the following example::
-
-	static struct pwm_lookup board_pwm_lookup[] = {
-		PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL,
-			   50000, PWM_POLARITY_NORMAL),
-	};
-
-	static void __init board_init(void)
-	{
-		...
-		pwm_add_table(board_pwm_lookup, ARRAY_SIZE(board_pwm_lookup));
-		...
-	}
-
-Using PWMs
-----------
-
-Legacy users can request a PWM device using pwm_request() and free it
-after usage with pwm_free().
-
-New users should use the pwm_get() function and pass to it the consumer
-device or a consumer name. pwm_put() is used to free the PWM device. Managed
-variants of these functions, devm_pwm_get() and devm_pwm_put(), also exist.
-
-After being requested, a PWM has to be configured using::
-
-	int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state);
-
-This API controls both the PWM period/duty_cycle config and the
-enable/disable state.
-
-The pwm_config(), pwm_enable() and pwm_disable() functions are just wrappers
-around pwm_apply_state() and should not be used if the user wants to change
-several parameter at once. For example, if you see pwm_config() and
-pwm_{enable,disable}() calls in the same function, this probably means you
-should switch to pwm_apply_state().
-
-The PWM user API also allows one to query the PWM state with pwm_get_state().
-
-In addition to the PWM state, the PWM API also exposes PWM arguments, which
-are the reference PWM config one should use on this PWM.
-PWM arguments are usually platform-specific and allows the PWM user to only
-care about dutycycle relatively to the full period (like, duty = 50% of the
-period). struct pwm_args contains 2 fields (period and polarity) and should
-be used to set the initial PWM config (usually done in the probe function
-of the PWM user). PWM arguments are retrieved with pwm_get_args().
-
-All consumers should really be reconfiguring the PWM upon resume as
-appropriate. This is the only way to ensure that everything is resumed in
-the proper order.
-
-Using PWMs with the sysfs interface
------------------------------------
-
-If CONFIG_SYSFS is enabled in your kernel configuration a simple sysfs
-interface is provided to use the PWMs from userspace. It is exposed at
-/sys/class/pwm/. Each probed PWM controller/chip will be exported as
-pwmchipN, where N is the base of the PWM chip. Inside the directory you
-will find:
-
-  npwm
-    The number of PWM channels this chip supports (read-only).
-
-  export
-    Exports a PWM channel for use with sysfs (write-only).
-
-  unexport
-   Unexports a PWM channel from sysfs (write-only).
-
-The PWM channels are numbered using a per-chip index from 0 to npwm-1.
-
-When a PWM channel is exported a pwmX directory will be created in the
-pwmchipN directory it is associated with, where X is the number of the
-channel that was exported. The following properties will then be available:
-
-  period
-    The total period of the PWM signal (read/write).
-    Value is in nanoseconds and is the sum of the active and inactive
-    time of the PWM.
-
-  duty_cycle
-    The active time of the PWM signal (read/write).
-    Value is in nanoseconds and must be less than the period.
-
-  polarity
-    Changes the polarity of the PWM signal (read/write).
-    Writes to this property only work if the PWM chip supports changing
-    the polarity. The polarity can only be changed if the PWM is not
-    enabled. Value is the string "normal" or "inversed".
-
-  enable
-    Enable/disable the PWM signal (read/write).
-
-	- 0 - disabled
-	- 1 - enabled
-
-Implementing a PWM driver
--------------------------
-
-Currently there are two ways to implement pwm drivers. Traditionally
-there only has been the barebone API meaning that each driver has
-to implement the pwm_*() functions itself. This means that it's impossible
-to have multiple PWM drivers in the system. For this reason it's mandatory
-for new drivers to use the generic PWM framework.
-
-A new PWM controller/chip can be added using pwmchip_add() and removed
-again with pwmchip_remove(). pwmchip_add() takes a filled in struct
-pwm_chip as argument which provides a description of the PWM chip, the
-number of PWM devices provided by the chip and the chip-specific
-implementation of the supported PWM operations to the framework.
-
-When implementing polarity support in a PWM driver, make sure to respect the
-signal conventions in the PWM framework. By definition, normal polarity
-characterizes a signal starts high for the duration of the duty cycle and
-goes low for the remainder of the period. Conversely, a signal with inversed
-polarity starts low for the duration of the duty cycle and goes high for the
-remainder of the period.
-
-Drivers are encouraged to implement ->apply() instead of the legacy
-->enable(), ->disable() and ->config() methods. Doing that should provide
-atomicity in the PWM config workflow, which is required when the PWM controls
-a critical device (like a regulator).
-
-The implementation of ->get_state() (a method used to retrieve initial PWM
-state) is also encouraged for the same reason: letting the PWM user know
-about the current PWM state would allow him to avoid glitches.
-
-Drivers should not implement any power management. In other words,
-consumers should implement it as described in the "Using PWMs" section.
-
-Locking
--------
-
-The PWM core list manipulations are protected by a mutex, so pwm_request()
-and pwm_free() may not be called from an atomic context. Currently the
-PWM core does not enforce any locking to pwm_enable(), pwm_disable() and
-pwm_config(), so the calling context is currently driver specific. This
-is an issue derived from the former barebone API and should be fixed soon.
-
-Helpers
--------
-
-Currently a PWM can only be configured with period_ns and duty_ns. For several
-use cases freq_hz and duty_percent might be better. Instead of calculating
-this in your driver please consider adding appropriate helpers to the framework.
diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
deleted file mode 100644
index 7d3684e81df6..000000000000
--- a/Documentation/rfkill.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-===============================
-rfkill - RF kill switch support
-===============================
-
-
-.. contents::
-   :depth: 2
-
-Introduction
-============
-
-The rfkill subsystem provides a generic interface for disabling any radio
-transmitter in the system. When a transmitter is blocked, it shall not
-radiate any power.
-
-The subsystem also provides the ability to react on button presses and
-disable all transmitters of a certain type (or all). This is intended for
-situations where transmitters need to be turned off, for example on
-aircraft.
-
-The rfkill subsystem has a concept of "hard" and "soft" block, which
-differ little in their meaning (block == transmitters off) but rather in
-whether they can be changed or not:
-
- - hard block
-	read-only radio block that cannot be overridden by software
-
- - soft block
-	writable radio block (need not be readable) that is set by
-        the system software.
-
-The rfkill subsystem has two parameters, rfkill.default_state and
-rfkill.master_switch_mode, which are documented in
-admin-guide/kernel-parameters.rst.
-
-
-Implementation details
-======================
-
-The rfkill subsystem is composed of three main components:
-
- * the rfkill core,
- * the deprecated rfkill-input module (an input layer handler, being
-   replaced by userspace policy code) and
- * the rfkill drivers.
-
-The rfkill core provides API for kernel drivers to register their radio
-transmitter with the kernel, methods for turning it on and off, and letting
-the system know about hardware-disabled states that may be implemented on
-the device.
-
-The rfkill core code also notifies userspace of state changes, and provides
-ways for userspace to query the current states. See the "Userspace support"
-section below.
-
-When the device is hard-blocked (either by a call to rfkill_set_hw_state()
-or from query_hw_block), set_block() will be invoked for additional software
-block, but drivers can ignore the method call since they can use the return
-value of the function rfkill_set_hw_state() to sync the software state
-instead of keeping track of calls to set_block(). In fact, drivers should
-use the return value of rfkill_set_hw_state() unless the hardware actually
-keeps track of soft and hard block separately.
-
-
-Kernel API
-==========
-
-Drivers for radio transmitters normally implement an rfkill driver.
-
-Platform drivers might implement input devices if the rfkill button is just
-that, a button. If that button influences the hardware then you need to
-implement an rfkill driver instead. This also applies if the platform provides
-a way to turn on/off the transmitter(s).
-
-For some platforms, it is possible that the hardware state changes during
-suspend/hibernation, in which case it will be necessary to update the rfkill
-core with the current state at resume time.
-
-To create an rfkill driver, driver's Kconfig needs to have::
-
-	depends on RFKILL || !RFKILL
-
-to ensure the driver cannot be built-in when rfkill is modular. The !RFKILL
-case allows the driver to be built when rfkill is not configured, in which
-case all rfkill API can still be used but will be provided by static inlines
-which compile to almost nothing.
-
-Calling rfkill_set_hw_state() when a state change happens is required from
-rfkill drivers that control devices that can be hard-blocked unless they also
-assign the poll_hw_block() callback (then the rfkill core will poll the
-device). Don't do this unless you cannot get the event in any other way.
-
-rfkill provides per-switch LED triggers, which can be used to drive LEDs
-according to the switch state (LED_FULL when blocked, LED_OFF otherwise).
-
-
-Userspace support
-=================
-
-The recommended userspace interface to use is /dev/rfkill, which is a misc
-character device that allows userspace to obtain and set the state of rfkill
-devices and sets of devices. It also notifies userspace about device addition
-and removal. The API is a simple read/write API that is defined in
-linux/rfkill.h, with one ioctl that allows turning off the deprecated input
-handler in the kernel for the transition period.
-
-Except for the one ioctl, communication with the kernel is done via read()
-and write() of instances of 'struct rfkill_event'. In this structure, the
-soft and hard block are properly separated (unlike sysfs, see below) and
-userspace is able to get a consistent snapshot of all rfkill devices in the
-system. Also, it is possible to switch all rfkill drivers (or all drivers of
-a specified type) into a state which also updates the default state for
-hotplugged devices.
-
-After an application opens /dev/rfkill, it can read the current state of all
-devices. Changes can be obtained by either polling the descriptor for
-hotplug or state change events or by listening for uevents emitted by the
-rfkill core framework.
-
-Additionally, each rfkill device is registered in sysfs and emits uevents.
-
-rfkill devices issue uevents (with an action of "change"), with the following
-environment variables set::
-
-	RFKILL_NAME
-	RFKILL_STATE
-	RFKILL_TYPE
-
-The content of these variables corresponds to the "name", "state" and
-"type" sysfs files explained above.
-
-For further details consult Documentation/ABI/stable/sysfs-class-rfkill.
diff --git a/Documentation/s390/vfio-ccw.rst b/Documentation/s390/vfio-ccw.rst
index 1f6d0b56d53e..1e210c6afa88 100644
--- a/Documentation/s390/vfio-ccw.rst
+++ b/Documentation/s390/vfio-ccw.rst
@@ -38,7 +38,7 @@ every detail. More information/reference could be found here:
   qemu/hw/s390x/css.c
 
 For vfio mediated device framework:
-- Documentation/vfio-mediated-device.txt
+- Documentation/driver-api/vfio-mediated-device.rst
 
 Motivation of vfio-ccw
 ----------------------
@@ -322,5 +322,5 @@ Reference
 2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
 3. https://en.wikipedia.org/wiki/Channel_I/O
 4. Documentation/s390/cds.rst
-5. Documentation/vfio.txt
-6. Documentation/vfio-mediated-device.txt
+5. Documentation/driver-api/vfio.rst
+6. Documentation/driver-api/vfio-mediated-device.rst
diff --git a/Documentation/sgi-ioc4.txt b/Documentation/sgi-ioc4.txt
deleted file mode 100644
index 72709222d3c0..000000000000
--- a/Documentation/sgi-ioc4.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-====================================
-SGI IOC4 PCI (multi function) device
-====================================
-
-The SGI IOC4 PCI device is a bit of a strange beast, so some notes on
-it are in order.
-
-First, even though the IOC4 performs multiple functions, such as an
-IDE controller, a serial controller, a PS/2 keyboard/mouse controller,
-and an external interrupt mechanism, it's not implemented as a
-multifunction device.  The consequence of this from a software
-standpoint is that all these functions share a single IRQ, and
-they can't all register to own the same PCI device ID.  To make
-matters a bit worse, some of the register blocks (and even registers
-themselves) present in IOC4 are mixed-purpose between these several
-functions, meaning that there's no clear "owning" device driver.
-
-The solution is to organize the IOC4 driver into several independent
-drivers, "ioc4", "sgiioc4", and "ioc4_serial".  Note that there is no
-PS/2 controller driver as this functionality has never been wired up
-on a shipping IO card.
-
-ioc4
-====
-This is the core (or shim) driver for IOC4.  It is responsible for
-initializing the basic functionality of the chip, and allocating
-the PCI resources that are shared between the IOC4 functions.
-
-This driver also provides registration functions that the other
-IOC4 drivers can call to make their presence known.  Each driver
-needs to provide a probe and remove function, which are invoked
-by the core driver at appropriate times.  The interface of these
-IOC4 function probe and remove operations isn't precisely the same
-as PCI device probe and remove operations, but is logically the
-same operation.
-
-sgiioc4
-=======
-This is the IDE driver for IOC4.  Its name isn't very descriptive
-simply for historical reasons (it used to be the only IOC4 driver
-component).  There's not much to say about it other than it hooks
-up to the ioc4 driver via the appropriate registration, probe, and
-remove functions.
-
-ioc4_serial
-===========
-This is the serial driver for IOC4.  There's not much to say about it
-other than it hooks up to the ioc4 driver via the appropriate registration,
-probe, and remove functions.
diff --git a/Documentation/smsc_ece1099.txt b/Documentation/smsc_ece1099.txt
deleted file mode 100644
index 079277421eaf..000000000000
--- a/Documentation/smsc_ece1099.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-=================================================
-Msc Keyboard Scan Expansion/GPIO Expansion device
-=================================================
-
-What is smsc-ece1099?
-----------------------
-
-The ECE1099 is a 40-Pin 3.3V Keyboard Scan Expansion
-or GPIO Expansion device. The device supports a keyboard
-scan matrix of 23x8. The device is connected to a Master
-via the SMSC BC-Link interface or via the SMBus.
-Keypad scan Input(KSI) and Keypad Scan Output(KSO) signals
-are multiplexed with GPIOs.
-
-Interrupt generation
---------------------
-
-Interrupts can be generated by an edge detection on a GPIO
-pin or an edge detection on one of the bus interface pins.
-Interrupts can also be detected on the keyboard scan interface.
-The bus interrupt pin (BC_INT# or SMBUS_INT#) is asserted if
-any bit in one of the Interrupt Status registers is 1 and
-the corresponding Interrupt Mask bit is also 1.
-
-In order for software to determine which device is the source
-of an interrupt, it should first read the Group Interrupt Status Register
-to determine which Status register group is a source for the interrupt.
-Software should read both the Status register and the associated Mask register,
-then AND the two values together. Bits that are 1 in the result of the AND
-are active interrupts. Software clears an interrupt by writing a 1 to the
-corresponding bit in the Status register.
-
-Communication Protocol
-----------------------
-
-- SMbus slave Interface
-	The host processor communicates with the ECE1099 device
-	through a series of read/write registers via the SMBus
-	interface. SMBus is a serial communication protocol between
-	a computer host and its peripheral devices. The SMBus data
-	rate is 10KHz minimum to 400 KHz maximum
-
-- Slave Bus Interface
-	The ECE1099 device SMBus implementation is a subset of the
-	SMBus interface to the host. The device is a slave-only SMBus device.
-	The implementation in the device is a subset of SMBus since it
-	only supports four protocols.
-
-	The Write Byte, Read Byte, Send Byte, and Receive Byte protocols are the
-	only valid SMBus protocols for the device.
-
-- BC-LinkTM Interface
-	The BC-Link is a proprietary bus that allows communication
-	between a Master device and a Companion device. The Master
-	device uses this serial bus to read and write registers
-	located on the Companion device. The bus comprises three signals,
-	BC_CLK, BC_DAT and BC_INT#. The Master device always provides the
-	clock, BC_CLK, and the Companion device is the source for an
-	independent asynchronous interrupt signal, BC_INT#. The ECE1099
-	supports BC-Link speeds up to 24MHz.
diff --git a/Documentation/switchtec.txt b/Documentation/switchtec.txt
deleted file mode 100644
index 30d6a64e53f7..000000000000
--- a/Documentation/switchtec.txt
+++ /dev/null
@@ -1,102 +0,0 @@
-========================
-Linux Switchtec Support
-========================
-
-Microsemi's "Switchtec" line of PCI switch devices is already
-supported by the kernel with standard PCI switch drivers. However, the
-Switchtec device advertises a special management endpoint which
-enables some additional functionality. This includes:
-
-* Packet and Byte Counters
-* Firmware Upgrades
-* Event and Error logs
-* Querying port link status
-* Custom user firmware commands
-
-The switchtec kernel module implements this functionality.
-
-
-Interface
-=========
-
-The primary means of communicating with the Switchtec management firmware is
-through the Memory-mapped Remote Procedure Call (MRPC) interface.
-Commands are submitted to the interface with a 4-byte command
-identifier and up to 1KB of command specific data. The firmware will
-respond with a 4-byte return code and up to 1KB of command-specific
-data. The interface only processes a single command at a time.
-
-
-Userspace Interface
-===================
-
-The MRPC interface will be exposed to userspace through a simple char
-device: /dev/switchtec#, one for each management endpoint in the system.
-
-The char device has the following semantics:
-
-* A write must consist of at least 4 bytes and no more than 1028 bytes.
-  The first 4 bytes will be interpreted as the Command ID and the
-  remainder will be used as the input data. A write will send the
-  command to the firmware to begin processing.
-
-* Each write must be followed by exactly one read. Any double write will
-  produce an error and any read that doesn't follow a write will
-  produce an error.
-
-* A read will block until the firmware completes the command and return
-  the 4-byte Command Return Value plus up to 1024 bytes of output
-  data. (The length will be specified by the size parameter of the read
-  call -- reading less than 4 bytes will produce an error.)
-
-* The poll call will also be supported for userspace applications that
-  need to do other things while waiting for the command to complete.
-
-The following IOCTLs are also supported by the device:
-
-* SWITCHTEC_IOCTL_FLASH_INFO - Retrieve firmware length and number
-  of partitions in the device.
-
-* SWITCHTEC_IOCTL_FLASH_PART_INFO - Retrieve address and lengeth for
-  any specified partition in flash.
-
-* SWITCHTEC_IOCTL_EVENT_SUMMARY - Read a structure of bitmaps
-  indicating all uncleared events.
-
-* SWITCHTEC_IOCTL_EVENT_CTL - Get the current count, clear and set flags
-  for any event. This ioctl takes in a switchtec_ioctl_event_ctl struct
-  with the event_id, index and flags set (index being the partition or PFF
-  number for non-global events). It returns whether the event has
-  occurred, the number of times and any event specific data. The flags
-  can be used to clear the count or enable and disable actions to
-  happen when the event occurs.
-  By using the SWITCHTEC_IOCTL_EVENT_FLAG_EN_POLL flag,
-  you can set an event to trigger a poll command to return with
-  POLLPRI. In this way, userspace can wait for events to occur.
-
-* SWITCHTEC_IOCTL_PFF_TO_PORT and SWITCHTEC_IOCTL_PORT_TO_PFF convert
-  between PCI Function Framework number (used by the event system)
-  and Switchtec Logic Port ID and Partition number (which is more
-  user friendly).
-
-
-Non-Transparent Bridge (NTB) Driver
-===================================
-
-An NTB hardware driver is provided for the Switchtec hardware in
-ntb_hw_switchtec. Currently, it only supports switches configured with
-exactly 2 NT partitions and zero or more non-NT partitions. It also requires
-the following configuration settings:
-
-* Both NT partitions must be able to access each other's GAS spaces.
-  Thus, the bits in the GAS Access Vector under Management Settings
-  must be set to support this.
-* Kernel configuration MUST include support for NTB (CONFIG_NTB needs
-  to be set)
-
-NT EP BAR 2 will be dynamically configured as a Direct Window, and
-the configuration file does not need to configure it explicitly.
-
-Please refer to Documentation/ntb.txt in Linux source tree for an overall
-understanding of the Linux NTB stack. ntb_hw_switchtec works as an NTB
-Hardware Driver in this stack.
diff --git a/Documentation/sync_file.txt b/Documentation/sync_file.txt
deleted file mode 100644
index 496fb2c3b3e6..000000000000
--- a/Documentation/sync_file.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-===================
-Sync File API Guide
-===================
-
-:Author: Gustavo Padovan <gustavo at padovan dot org>
-
-This document serves as a guide for device drivers writers on what the
-sync_file API is, and how drivers can support it. Sync file is the carrier of
-the fences(struct dma_fence) that are needed to synchronize between drivers or
-across process boundaries.
-
-The sync_file API is meant to be used to send and receive fence information
-to/from userspace. It enables userspace to do explicit fencing, where instead
-of attaching a fence to the buffer a producer driver (such as a GPU or V4L
-driver) sends the fence related to the buffer to userspace via a sync_file.
-
-The sync_file then can be sent to the consumer (DRM driver for example), that
-will not use the buffer for anything before the fence(s) signals, i.e., the
-driver that issued the fence is not using/processing the buffer anymore, so it
-signals that the buffer is ready to use. And vice-versa for the consumer ->
-producer part of the cycle.
-
-Sync files allows userspace awareness on buffer sharing synchronization between
-drivers.
-
-Sync file was originally added in the Android kernel but current Linux Desktop
-can benefit a lot from it.
-
-in-fences and out-fences
-------------------------
-
-Sync files can go either to or from userspace. When a sync_file is sent from
-the driver to userspace we call the fences it contains 'out-fences'. They are
-related to a buffer that the driver is processing or is going to process, so
-the driver creates an out-fence to be able to notify, through
-dma_fence_signal(), when it has finished using (or processing) that buffer.
-Out-fences are fences that the driver creates.
-
-On the other hand if the driver receives fence(s) through a sync_file from
-userspace we call these fence(s) 'in-fences'. Receiving in-fences means that
-we need to wait for the fence(s) to signal before using any buffer related to
-the in-fences.
-
-Creating Sync Files
--------------------
-
-When a driver needs to send an out-fence userspace it creates a sync_file.
-
-Interface::
-
-	struct sync_file *sync_file_create(struct dma_fence *fence);
-
-The caller pass the out-fence and gets back the sync_file. That is just the
-first step, next it needs to install an fd on sync_file->file. So it gets an
-fd::
-
-	fd = get_unused_fd_flags(O_CLOEXEC);
-
-and installs it on sync_file->file::
-
-	fd_install(fd, sync_file->file);
-
-The sync_file fd now can be sent to userspace.
-
-If the creation process fail, or the sync_file needs to be released by any
-other reason fput(sync_file->file) should be used.
-
-Receiving Sync Files from Userspace
------------------------------------
-
-When userspace needs to send an in-fence to the driver it passes file descriptor
-of the Sync File to the kernel. The kernel can then retrieve the fences
-from it.
-
-Interface::
-
-	struct dma_fence *sync_file_get_fence(int fd);
-
-
-The returned reference is owned by the caller and must be disposed of
-afterwards using dma_fence_put(). In case of error, a NULL is returned instead.
-
-References:
-
-1. struct sync_file in include/linux/sync_file.h
-2. All interfaces mentioned above defined in include/linux/sync_file.h
diff --git a/Documentation/vfio-mediated-device.txt b/Documentation/vfio-mediated-device.txt
deleted file mode 100644
index c3f69bcaf96e..000000000000
--- a/Documentation/vfio-mediated-device.txt
+++ /dev/null
@@ -1,414 +0,0 @@
-.. include:: <isonum.txt>
-
-=====================
-VFIO Mediated devices
-=====================
-
-:Copyright: |copy| 2016, NVIDIA CORPORATION. All rights reserved.
-:Author: Neo Jia <cjia@nvidia.com>
-:Author: Kirti Wankhede <kwankhede@nvidia.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License version 2 as
-published by the Free Software Foundation.
-
-
-Virtual Function I/O (VFIO) Mediated devices[1]
-===============================================
-
-The number of use cases for virtualizing DMA devices that do not have built-in
-SR_IOV capability is increasing. Previously, to virtualize such devices,
-developers had to create their own management interfaces and APIs, and then
-integrate them with user space software. To simplify integration with user space
-software, we have identified common requirements and a unified management
-interface for such devices.
-
-The VFIO driver framework provides unified APIs for direct device access. It is
-an IOMMU/device-agnostic framework for exposing direct device access to user
-space in a secure, IOMMU-protected environment. This framework is used for
-multiple devices, such as GPUs, network adapters, and compute accelerators. With
-direct device access, virtual machines or user space applications have direct
-access to the physical device. This framework is reused for mediated devices.
-
-The mediated core driver provides a common interface for mediated device
-management that can be used by drivers of different devices. This module
-provides a generic interface to perform these operations:
-
-* Create and destroy a mediated device
-* Add a mediated device to and remove it from a mediated bus driver
-* Add a mediated device to and remove it from an IOMMU group
-
-The mediated core driver also provides an interface to register a bus driver.
-For example, the mediated VFIO mdev driver is designed for mediated devices and
-supports VFIO APIs. The mediated bus driver adds a mediated device to and
-removes it from a VFIO group.
-
-The following high-level block diagram shows the main components and interfaces
-in the VFIO mediated driver framework. The diagram shows NVIDIA, Intel, and IBM
-devices as examples, as these devices are the first devices to use this module::
-
-     +---------------+
-     |               |
-     | +-----------+ |  mdev_register_driver() +--------------+
-     | |           | +<------------------------+              |
-     | |  mdev     | |                         |              |
-     | |  bus      | +------------------------>+ vfio_mdev.ko |<-> VFIO user
-     | |  driver   | |     probe()/remove()    |              |    APIs
-     | |           | |                         +--------------+
-     | +-----------+ |
-     |               |
-     |  MDEV CORE    |
-     |   MODULE      |
-     |   mdev.ko     |
-     | +-----------+ |  mdev_register_device() +--------------+
-     | |           | +<------------------------+              |
-     | |           | |                         |  nvidia.ko   |<-> physical
-     | |           | +------------------------>+              |    device
-     | |           | |        callbacks        +--------------+
-     | | Physical  | |
-     | |  device   | |  mdev_register_device() +--------------+
-     | | interface | |<------------------------+              |
-     | |           | |                         |  i915.ko     |<-> physical
-     | |           | +------------------------>+              |    device
-     | |           | |        callbacks        +--------------+
-     | |           | |
-     | |           | |  mdev_register_device() +--------------+
-     | |           | +<------------------------+              |
-     | |           | |                         | ccw_device.ko|<-> physical
-     | |           | +------------------------>+              |    device
-     | |           | |        callbacks        +--------------+
-     | +-----------+ |
-     +---------------+
-
-
-Registration Interfaces
-=======================
-
-The mediated core driver provides the following types of registration
-interfaces:
-
-* Registration interface for a mediated bus driver
-* Physical device driver interface
-
-Registration Interface for a Mediated Bus Driver
-------------------------------------------------
-
-The registration interface for a mediated bus driver provides the following
-structure to represent a mediated device's driver::
-
-     /*
-      * struct mdev_driver [2] - Mediated device's driver
-      * @name: driver name
-      * @probe: called when new device created
-      * @remove: called when device removed
-      * @driver: device driver structure
-      */
-     struct mdev_driver {
-	     const char *name;
-	     int  (*probe)  (struct device *dev);
-	     void (*remove) (struct device *dev);
-	     struct device_driver    driver;
-     };
-
-A mediated bus driver for mdev should use this structure in the function calls
-to register and unregister itself with the core driver:
-
-* Register::
-
-    extern int  mdev_register_driver(struct mdev_driver *drv,
-				   struct module *owner);
-
-* Unregister::
-
-    extern void mdev_unregister_driver(struct mdev_driver *drv);
-
-The mediated bus driver is responsible for adding mediated devices to the VFIO
-group when devices are bound to the driver and removing mediated devices from
-the VFIO when devices are unbound from the driver.
-
-
-Physical Device Driver Interface
---------------------------------
-
-The physical device driver interface provides the mdev_parent_ops[3] structure
-to define the APIs to manage work in the mediated core driver that is related
-to the physical device.
-
-The structures in the mdev_parent_ops structure are as follows:
-
-* dev_attr_groups: attributes of the parent device
-* mdev_attr_groups: attributes of the mediated device
-* supported_config: attributes to define supported configurations
-
-The functions in the mdev_parent_ops structure are as follows:
-
-* create: allocate basic resources in a driver for a mediated device
-* remove: free resources in a driver when a mediated device is destroyed
-
-(Note that mdev-core provides no implicit serialization of create/remove
-callbacks per mdev parent device, per mdev type, or any other categorization.
-Vendor drivers are expected to be fully asynchronous in this respect or
-provide their own internal resource protection.)
-
-The callbacks in the mdev_parent_ops structure are as follows:
-
-* open: open callback of mediated device
-* close: close callback of mediated device
-* ioctl: ioctl callback of mediated device
-* read : read emulation callback
-* write: write emulation callback
-* mmap: mmap emulation callback
-
-A driver should use the mdev_parent_ops structure in the function call to
-register itself with the mdev core driver::
-
-	extern int  mdev_register_device(struct device *dev,
-	                                 const struct mdev_parent_ops *ops);
-
-However, the mdev_parent_ops structure is not required in the function call
-that a driver should use to unregister itself with the mdev core driver::
-
-	extern void mdev_unregister_device(struct device *dev);
-
-
-Mediated Device Management Interface Through sysfs
-==================================================
-
-The management interface through sysfs enables user space software, such as
-libvirt, to query and configure mediated devices in a hardware-agnostic fashion.
-This management interface provides flexibility to the underlying physical
-device's driver to support features such as:
-
-* Mediated device hot plug
-* Multiple mediated devices in a single virtual machine
-* Multiple mediated devices from different physical devices
-
-Links in the mdev_bus Class Directory
--------------------------------------
-The /sys/class/mdev_bus/ directory contains links to devices that are registered
-with the mdev core driver.
-
-Directories and files under the sysfs for Each Physical Device
---------------------------------------------------------------
-
-::
-
-  |- [parent physical device]
-  |--- Vendor-specific-attributes [optional]
-  |--- [mdev_supported_types]
-  |     |--- [<type-id>]
-  |     |   |--- create
-  |     |   |--- name
-  |     |   |--- available_instances
-  |     |   |--- device_api
-  |     |   |--- description
-  |     |   |--- [devices]
-  |     |--- [<type-id>]
-  |     |   |--- create
-  |     |   |--- name
-  |     |   |--- available_instances
-  |     |   |--- device_api
-  |     |   |--- description
-  |     |   |--- [devices]
-  |     |--- [<type-id>]
-  |          |--- create
-  |          |--- name
-  |          |--- available_instances
-  |          |--- device_api
-  |          |--- description
-  |          |--- [devices]
-
-* [mdev_supported_types]
-
-  The list of currently supported mediated device types and their details.
-
-  [<type-id>], device_api, and available_instances are mandatory attributes
-  that should be provided by vendor driver.
-
-* [<type-id>]
-
-  The [<type-id>] name is created by adding the device driver string as a prefix
-  to the string provided by the vendor driver. This format of this name is as
-  follows::
-
-	sprintf(buf, "%s-%s", dev_driver_string(parent->dev), group->name);
-
-  (or using mdev_parent_dev(mdev) to arrive at the parent device outside
-  of the core mdev code)
-
-* device_api
-
-  This attribute should show which device API is being created, for example,
-  "vfio-pci" for a PCI device.
-
-* available_instances
-
-  This attribute should show the number of devices of type <type-id> that can be
-  created.
-
-* [device]
-
-  This directory contains links to the devices of type <type-id> that have been
-  created.
-
-* name
-
-  This attribute should show human readable name. This is optional attribute.
-
-* description
-
-  This attribute should show brief features/description of the type. This is
-  optional attribute.
-
-Directories and Files Under the sysfs for Each mdev Device
-----------------------------------------------------------
-
-::
-
-  |- [parent phy device]
-  |--- [$MDEV_UUID]
-         |--- remove
-         |--- mdev_type {link to its type}
-         |--- vendor-specific-attributes [optional]
-
-* remove (write only)
-
-Writing '1' to the 'remove' file destroys the mdev device. The vendor driver can
-fail the remove() callback if that device is active and the vendor driver
-doesn't support hot unplug.
-
-Example::
-
-	# echo 1 > /sys/bus/mdev/devices/$mdev_UUID/remove
-
-Mediated device Hot plug
-------------------------
-
-Mediated devices can be created and assigned at runtime. The procedure to hot
-plug a mediated device is the same as the procedure to hot plug a PCI device.
-
-Translation APIs for Mediated Devices
-=====================================
-
-The following APIs are provided for translating user pfn to host pfn in a VFIO
-driver::
-
-	extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
-				  int npage, int prot, unsigned long *phys_pfn);
-
-	extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
-				    int npage);
-
-These functions call back into the back-end IOMMU module by using the pin_pages
-and unpin_pages callbacks of the struct vfio_iommu_driver_ops[4]. Currently
-these callbacks are supported in the TYPE1 IOMMU module. To enable them for
-other IOMMU backend modules, such as PPC64 sPAPR module, they need to provide
-these two callback functions.
-
-Using the Sample Code
-=====================
-
-mtty.c in samples/vfio-mdev/ directory is a sample driver program to
-demonstrate how to use the mediated device framework.
-
-The sample driver creates an mdev device that simulates a serial port over a PCI
-card.
-
-1. Build and load the mtty.ko module.
-
-   This step creates a dummy device, /sys/devices/virtual/mtty/mtty/
-
-   Files in this device directory in sysfs are similar to the following::
-
-     # tree /sys/devices/virtual/mtty/mtty/
-        /sys/devices/virtual/mtty/mtty/
-        |-- mdev_supported_types
-        |   |-- mtty-1
-        |   |   |-- available_instances
-        |   |   |-- create
-        |   |   |-- device_api
-        |   |   |-- devices
-        |   |   `-- name
-        |   `-- mtty-2
-        |       |-- available_instances
-        |       |-- create
-        |       |-- device_api
-        |       |-- devices
-        |       `-- name
-        |-- mtty_dev
-        |   `-- sample_mtty_dev
-        |-- power
-        |   |-- autosuspend_delay_ms
-        |   |-- control
-        |   |-- runtime_active_time
-        |   |-- runtime_status
-        |   `-- runtime_suspended_time
-        |-- subsystem -> ../../../../class/mtty
-        `-- uevent
-
-2. Create a mediated device by using the dummy device that you created in the
-   previous step::
-
-     # echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001" >	\
-              /sys/devices/virtual/mtty/mtty/mdev_supported_types/mtty-2/create
-
-3. Add parameters to qemu-kvm::
-
-     -device vfio-pci,\
-      sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
-
-4. Boot the VM.
-
-   In the Linux guest VM, with no hardware on the host, the device appears
-   as  follows::
-
-     # lspci -s 00:05.0 -xxvv
-     00:05.0 Serial controller: Device 4348:3253 (rev 10) (prog-if 02 [16550])
-             Subsystem: Device 4348:3253
-             Physical Slot: 5
-             Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr-
-     Stepping- SERR- FastB2B- DisINTx-
-             Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort-
-     <TAbort- <MAbort- >SERR- <PERR- INTx-
-             Interrupt: pin A routed to IRQ 10
-             Region 0: I/O ports at c150 [size=8]
-             Region 1: I/O ports at c158 [size=8]
-             Kernel driver in use: serial
-     00: 48 43 53 32 01 00 00 02 10 02 00 07 00 00 00 00
-     10: 51 c1 00 00 59 c1 00 00 00 00 00 00 00 00 00 00
-     20: 00 00 00 00 00 00 00 00 00 00 00 00 48 43 53 32
-     30: 00 00 00 00 00 00 00 00 00 00 00 00 0a 01 00 00
-
-     In the Linux guest VM, dmesg output for the device is as follows:
-
-     serial 0000:00:05.0: PCI INT A -> Link[LNKA] -> GSI 10 (level, high) -> IRQ 10
-     0000:00:05.0: ttyS1 at I/O 0xc150 (irq = 10) is a 16550A
-     0000:00:05.0: ttyS2 at I/O 0xc158 (irq = 10) is a 16550A
-
-
-5. In the Linux guest VM, check the serial ports::
-
-     # setserial -g /dev/ttyS*
-     /dev/ttyS0, UART: 16550A, Port: 0x03f8, IRQ: 4
-     /dev/ttyS1, UART: 16550A, Port: 0xc150, IRQ: 10
-     /dev/ttyS2, UART: 16550A, Port: 0xc158, IRQ: 10
-
-6. Using minicom or any terminal emulation program, open port /dev/ttyS1 or
-   /dev/ttyS2 with hardware flow control disabled.
-
-7. Type data on the minicom terminal or send data to the terminal emulation
-   program and read the data.
-
-   Data is loop backed from hosts mtty driver.
-
-8. Destroy the mediated device that you created::
-
-     # echo 1 > /sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001/remove
-
-References
-==========
-
-1. See Documentation/vfio.txt for more information on VFIO.
-2. struct mdev_driver in include/linux/mdev.h
-3. struct mdev_parent_ops in include/linux/mdev.h
-4. struct vfio_iommu_driver_ops in include/linux/vfio.h
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
deleted file mode 100644
index f1a4d3c3ba0b..000000000000
--- a/Documentation/vfio.txt
+++ /dev/null
@@ -1,520 +0,0 @@
-==================================
-VFIO - "Virtual Function I/O" [1]_
-==================================
-
-Many modern system now provide DMA and interrupt remapping facilities
-to help ensure I/O devices behave within the boundaries they've been
-allotted.  This includes x86 hardware with AMD-Vi and Intel VT-d,
-POWER systems with Partitionable Endpoints (PEs) and embedded PowerPC
-systems such as Freescale PAMU.  The VFIO driver is an IOMMU/device
-agnostic framework for exposing direct device access to userspace, in
-a secure, IOMMU protected environment.  In other words, this allows
-safe [2]_, non-privileged, userspace drivers.
-
-Why do we want that?  Virtual machines often make use of direct device
-access ("device assignment") when configured for the highest possible
-I/O performance.  From a device and host perspective, this simply
-turns the VM into a userspace driver, with the benefits of
-significantly reduced latency, higher bandwidth, and direct use of
-bare-metal device drivers [3]_.
-
-Some applications, particularly in the high performance computing
-field, also benefit from low-overhead, direct device access from
-userspace.  Examples include network adapters (often non-TCP/IP based)
-and compute accelerators.  Prior to VFIO, these drivers had to either
-go through the full development cycle to become proper upstream
-driver, be maintained out of tree, or make use of the UIO framework,
-which has no notion of IOMMU protection, limited interrupt support,
-and requires root privileges to access things like PCI configuration
-space.
-
-The VFIO driver framework intends to unify these, replacing both the
-KVM PCI specific device assignment code as well as provide a more
-secure, more featureful userspace driver environment than UIO.
-
-Groups, Devices, and IOMMUs
----------------------------
-
-Devices are the main target of any I/O driver.  Devices typically
-create a programming interface made up of I/O access, interrupts,
-and DMA.  Without going into the details of each of these, DMA is
-by far the most critical aspect for maintaining a secure environment
-as allowing a device read-write access to system memory imposes the
-greatest risk to the overall system integrity.
-
-To help mitigate this risk, many modern IOMMUs now incorporate
-isolation properties into what was, in many cases, an interface only
-meant for translation (ie. solving the addressing problems of devices
-with limited address spaces).  With this, devices can now be isolated
-from each other and from arbitrary memory access, thus allowing
-things like secure direct assignment of devices into virtual machines.
-
-This isolation is not always at the granularity of a single device
-though.  Even when an IOMMU is capable of this, properties of devices,
-interconnects, and IOMMU topologies can each reduce this isolation.
-For instance, an individual device may be part of a larger multi-
-function enclosure.  While the IOMMU may be able to distinguish
-between devices within the enclosure, the enclosure may not require
-transactions between devices to reach the IOMMU.  Examples of this
-could be anything from a multi-function PCI device with backdoors
-between functions to a non-PCI-ACS (Access Control Services) capable
-bridge allowing redirection without reaching the IOMMU.  Topology
-can also play a factor in terms of hiding devices.  A PCIe-to-PCI
-bridge masks the devices behind it, making transaction appear as if
-from the bridge itself.  Obviously IOMMU design plays a major factor
-as well.
-
-Therefore, while for the most part an IOMMU may have device level
-granularity, any system is susceptible to reduced granularity.  The
-IOMMU API therefore supports a notion of IOMMU groups.  A group is
-a set of devices which is isolatable from all other devices in the
-system.  Groups are therefore the unit of ownership used by VFIO.
-
-While the group is the minimum granularity that must be used to
-ensure secure user access, it's not necessarily the preferred
-granularity.  In IOMMUs which make use of page tables, it may be
-possible to share a set of page tables between different groups,
-reducing the overhead both to the platform (reduced TLB thrashing,
-reduced duplicate page tables), and to the user (programming only
-a single set of translations).  For this reason, VFIO makes use of
-a container class, which may hold one or more groups.  A container
-is created by simply opening the /dev/vfio/vfio character device.
-
-On its own, the container provides little functionality, with all
-but a couple version and extension query interfaces locked away.
-The user needs to add a group into the container for the next level
-of functionality.  To do this, the user first needs to identify the
-group associated with the desired device.  This can be done using
-the sysfs links described in the example below.  By unbinding the
-device from the host driver and binding it to a VFIO driver, a new
-VFIO group will appear for the group as /dev/vfio/$GROUP, where
-$GROUP is the IOMMU group number of which the device is a member.
-If the IOMMU group contains multiple devices, each will need to
-be bound to a VFIO driver before operations on the VFIO group
-are allowed (it's also sufficient to only unbind the device from
-host drivers if a VFIO driver is unavailable; this will make the
-group available, but not that particular device).  TBD - interface
-for disabling driver probing/locking a device.
-
-Once the group is ready, it may be added to the container by opening
-the VFIO group character device (/dev/vfio/$GROUP) and using the
-VFIO_GROUP_SET_CONTAINER ioctl, passing the file descriptor of the
-previously opened container file.  If desired and if the IOMMU driver
-supports sharing the IOMMU context between groups, multiple groups may
-be set to the same container.  If a group fails to set to a container
-with existing groups, a new empty container will need to be used
-instead.
-
-With a group (or groups) attached to a container, the remaining
-ioctls become available, enabling access to the VFIO IOMMU interfaces.
-Additionally, it now becomes possible to get file descriptors for each
-device within a group using an ioctl on the VFIO group file descriptor.
-
-The VFIO device API includes ioctls for describing the device, the I/O
-regions and their read/write/mmap offsets on the device descriptor, as
-well as mechanisms for describing and registering interrupt
-notifications.
-
-VFIO Usage Example
-------------------
-
-Assume user wants to access PCI device 0000:06:0d.0::
-
-	$ readlink /sys/bus/pci/devices/0000:06:0d.0/iommu_group
-	../../../../kernel/iommu_groups/26
-
-This device is therefore in IOMMU group 26.  This device is on the
-pci bus, therefore the user will make use of vfio-pci to manage the
-group::
-
-	# modprobe vfio-pci
-
-Binding this device to the vfio-pci driver creates the VFIO group
-character devices for this group::
-
-	$ lspci -n -s 0000:06:0d.0
-	06:0d.0 0401: 1102:0002 (rev 08)
-	# echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind
-	# echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id
-
-Now we need to look at what other devices are in the group to free
-it for use by VFIO::
-
-	$ ls -l /sys/bus/pci/devices/0000:06:0d.0/iommu_group/devices
-	total 0
-	lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:00:1e.0 ->
-		../../../../devices/pci0000:00/0000:00:1e.0
-	lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.0 ->
-		../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.0
-	lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.1 ->
-		../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.1
-
-This device is behind a PCIe-to-PCI bridge [4]_, therefore we also
-need to add device 0000:06:0d.1 to the group following the same
-procedure as above.  Device 0000:00:1e.0 is a bridge that does
-not currently have a host driver, therefore it's not required to
-bind this device to the vfio-pci driver (vfio-pci does not currently
-support PCI bridges).
-
-The final step is to provide the user with access to the group if
-unprivileged operation is desired (note that /dev/vfio/vfio provides
-no capabilities on its own and is therefore expected to be set to
-mode 0666 by the system)::
-
-	# chown user:user /dev/vfio/26
-
-The user now has full access to all the devices and the iommu for this
-group and can access them as follows::
-
-	int container, group, device, i;
-	struct vfio_group_status group_status =
-					{ .argsz = sizeof(group_status) };
-	struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
-	struct vfio_iommu_type1_dma_map dma_map = { .argsz = sizeof(dma_map) };
-	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
-
-	/* Create a new container */
-	container = open("/dev/vfio/vfio", O_RDWR);
-
-	if (ioctl(container, VFIO_GET_API_VERSION) != VFIO_API_VERSION)
-		/* Unknown API version */
-
-	if (!ioctl(container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU))
-		/* Doesn't support the IOMMU driver we want. */
-
-	/* Open the group */
-	group = open("/dev/vfio/26", O_RDWR);
-
-	/* Test the group is viable and available */
-	ioctl(group, VFIO_GROUP_GET_STATUS, &group_status);
-
-	if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE))
-		/* Group is not viable (ie, not all devices bound for vfio) */
-
-	/* Add the group to the container */
-	ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
-
-	/* Enable the IOMMU model we want */
-	ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
-
-	/* Get addition IOMMU info */
-	ioctl(container, VFIO_IOMMU_GET_INFO, &iommu_info);
-
-	/* Allocate some space and setup a DMA mapping */
-	dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE,
-			     MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
-	dma_map.size = 1024 * 1024;
-	dma_map.iova = 0; /* 1MB starting at 0x0 from device view */
-	dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
-
-	ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
-
-	/* Get a file descriptor for the device */
-	device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0");
-
-	/* Test and setup the device */
-	ioctl(device, VFIO_DEVICE_GET_INFO, &device_info);
-
-	for (i = 0; i < device_info.num_regions; i++) {
-		struct vfio_region_info reg = { .argsz = sizeof(reg) };
-
-		reg.index = i;
-
-		ioctl(device, VFIO_DEVICE_GET_REGION_INFO, &reg);
-
-		/* Setup mappings... read/write offsets, mmaps
-		 * For PCI devices, config space is a region */
-	}
-
-	for (i = 0; i < device_info.num_irqs; i++) {
-		struct vfio_irq_info irq = { .argsz = sizeof(irq) };
-
-		irq.index = i;
-
-		ioctl(device, VFIO_DEVICE_GET_IRQ_INFO, &irq);
-
-		/* Setup IRQs... eventfds, VFIO_DEVICE_SET_IRQS */
-	}
-
-	/* Gratuitous device reset and go... */
-	ioctl(device, VFIO_DEVICE_RESET);
-
-VFIO User API
--------------------------------------------------------------------------------
-
-Please see include/linux/vfio.h for complete API documentation.
-
-VFIO bus driver API
--------------------------------------------------------------------------------
-
-VFIO bus drivers, such as vfio-pci make use of only a few interfaces
-into VFIO core.  When devices are bound and unbound to the driver,
-the driver should call vfio_add_group_dev() and vfio_del_group_dev()
-respectively::
-
-	extern int vfio_add_group_dev(struct device *dev,
-				      const struct vfio_device_ops *ops,
-				      void *device_data);
-
-	extern void *vfio_del_group_dev(struct device *dev);
-
-vfio_add_group_dev() indicates to the core to begin tracking the
-iommu_group of the specified dev and register the dev as owned by
-a VFIO bus driver.  The driver provides an ops structure for callbacks
-similar to a file operations structure::
-
-	struct vfio_device_ops {
-		int	(*open)(void *device_data);
-		void	(*release)(void *device_data);
-		ssize_t	(*read)(void *device_data, char __user *buf,
-				size_t count, loff_t *ppos);
-		ssize_t	(*write)(void *device_data, const char __user *buf,
-				 size_t size, loff_t *ppos);
-		long	(*ioctl)(void *device_data, unsigned int cmd,
-				 unsigned long arg);
-		int	(*mmap)(void *device_data, struct vm_area_struct *vma);
-	};
-
-Each function is passed the device_data that was originally registered
-in the vfio_add_group_dev() call above.  This allows the bus driver
-an easy place to store its opaque, private data.  The open/release
-callbacks are issued when a new file descriptor is created for a
-device (via VFIO_GROUP_GET_DEVICE_FD).  The ioctl interface provides
-a direct pass through for VFIO_DEVICE_* ioctls.  The read/write/mmap
-interfaces implement the device region access defined by the device's
-own VFIO_DEVICE_GET_REGION_INFO ioctl.
-
-
-PPC64 sPAPR implementation note
--------------------------------
-
-This implementation has some specifics:
-
-1) On older systems (POWER7 with P5IOC2/IODA1) only one IOMMU group per
-   container is supported as an IOMMU table is allocated at the boot time,
-   one table per a IOMMU group which is a Partitionable Endpoint (PE)
-   (PE is often a PCI domain but not always).
-
-   Newer systems (POWER8 with IODA2) have improved hardware design which allows
-   to remove this limitation and have multiple IOMMU groups per a VFIO
-   container.
-
-2) The hardware supports so called DMA windows - the PCI address range
-   within which DMA transfer is allowed, any attempt to access address space
-   out of the window leads to the whole PE isolation.
-
-3) PPC64 guests are paravirtualized but not fully emulated. There is an API
-   to map/unmap pages for DMA, and it normally maps 1..32 pages per call and
-   currently there is no way to reduce the number of calls. In order to make
-   things faster, the map/unmap handling has been implemented in real mode
-   which provides an excellent performance which has limitations such as
-   inability to do locked pages accounting in real time.
-
-4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O
-   subtree that can be treated as a unit for the purposes of partitioning and
-   error recovery. A PE may be a single or multi-function IOA (IO Adapter), a
-   function of a multi-function IOA, or multiple IOAs (possibly including
-   switch and bridge structures above the multiple IOAs). PPC64 guests detect
-   PCI errors and recover from them via EEH RTAS services, which works on the
-   basis of additional ioctl commands.
-
-   So 4 additional ioctls have been added:
-
-	VFIO_IOMMU_SPAPR_TCE_GET_INFO
-		returns the size and the start of the DMA window on the PCI bus.
-
-	VFIO_IOMMU_ENABLE
-		enables the container. The locked pages accounting
-		is done at this point. This lets user first to know what
-		the DMA window is and adjust rlimit before doing any real job.
-
-	VFIO_IOMMU_DISABLE
-		disables the container.
-
-	VFIO_EEH_PE_OP
-		provides an API for EEH setup, error detection and recovery.
-
-   The code flow from the example above should be slightly changed::
-
-	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
-
-	.....
-	/* Add the group to the container */
-	ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
-
-	/* Enable the IOMMU model we want */
-	ioctl(container, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU)
-
-	/* Get addition sPAPR IOMMU info */
-	vfio_iommu_spapr_tce_info spapr_iommu_info;
-	ioctl(container, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &spapr_iommu_info);
-
-	if (ioctl(container, VFIO_IOMMU_ENABLE))
-		/* Cannot enable container, may be low rlimit */
-
-	/* Allocate some space and setup a DMA mapping */
-	dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE,
-			     MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
-
-	dma_map.size = 1024 * 1024;
-	dma_map.iova = 0; /* 1MB starting at 0x0 from device view */
-	dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
-
-	/* Check here is .iova/.size are within DMA window from spapr_iommu_info */
-	ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
-
-	/* Get a file descriptor for the device */
-	device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0");
-
-	....
-
-	/* Gratuitous device reset and go... */
-	ioctl(device, VFIO_DEVICE_RESET);
-
-	/* Make sure EEH is supported */
-	ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH);
-
-	/* Enable the EEH functionality on the device */
-	pe_op.op = VFIO_EEH_PE_ENABLE;
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
-
-	/* You're suggested to create additional data struct to represent
-	 * PE, and put child devices belonging to same IOMMU group to the
-	 * PE instance for later reference.
-	 */
-
-	/* Check the PE's state and make sure it's in functional state */
-	pe_op.op = VFIO_EEH_PE_GET_STATE;
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
-
-	/* Save device state using pci_save_state().
-	 * EEH should be enabled on the specified device.
-	 */
-
-	....
-
-	/* Inject EEH error, which is expected to be caused by 32-bits
-	 * config load.
-	 */
-	pe_op.op = VFIO_EEH_PE_INJECT_ERR;
-	pe_op.err.type = EEH_ERR_TYPE_32;
-	pe_op.err.func = EEH_ERR_FUNC_LD_CFG_ADDR;
-	pe_op.err.addr = 0ul;
-	pe_op.err.mask = 0ul;
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
-
-	....
-
-	/* When 0xFF's returned from reading PCI config space or IO BARs
-	 * of the PCI device. Check the PE's state to see if that has been
-	 * frozen.
-	 */
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
-
-	/* Waiting for pending PCI transactions to be completed and don't
-	 * produce any more PCI traffic from/to the affected PE until
-	 * recovery is finished.
-	 */
-
-	/* Enable IO for the affected PE and collect logs. Usually, the
-	 * standard part of PCI config space, AER registers are dumped
-	 * as logs for further analysis.
-	 */
-	pe_op.op = VFIO_EEH_PE_UNFREEZE_IO;
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
-
-	/*
-	 * Issue PE reset: hot or fundamental reset. Usually, hot reset
-	 * is enough. However, the firmware of some PCI adapters would
-	 * require fundamental reset.
-	 */
-	pe_op.op = VFIO_EEH_PE_RESET_HOT;
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
-	pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
-
-	/* Configure the PCI bridges for the affected PE */
-	pe_op.op = VFIO_EEH_PE_CONFIGURE;
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
-
-	/* Restored state we saved at initialization time. pci_restore_state()
-	 * is good enough as an example.
-	 */
-
-	/* Hopefully, error is recovered successfully. Now, you can resume to
-	 * start PCI traffic to/from the affected PE.
-	 */
-
-	....
-
-5) There is v2 of SPAPR TCE IOMMU. It deprecates VFIO_IOMMU_ENABLE/
-   VFIO_IOMMU_DISABLE and implements 2 new ioctls:
-   VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY
-   (which are unsupported in v1 IOMMU).
-
-   PPC64 paravirtualized guests generate a lot of map/unmap requests,
-   and the handling of those includes pinning/unpinning pages and updating
-   mm::locked_vm counter to make sure we do not exceed the rlimit.
-   The v2 IOMMU splits accounting and pinning into separate operations:
-
-   - VFIO_IOMMU_SPAPR_REGISTER_MEMORY/VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY ioctls
-     receive a user space address and size of the block to be pinned.
-     Bisecting is not supported and VFIO_IOMMU_UNREGISTER_MEMORY is expected to
-     be called with the exact address and size used for registering
-     the memory block. The userspace is not expected to call these often.
-     The ranges are stored in a linked list in a VFIO container.
-
-   - VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA ioctls only update the actual
-     IOMMU table and do not do pinning; instead these check that the userspace
-     address is from pre-registered range.
-
-   This separation helps in optimizing DMA for guests.
-
-6) sPAPR specification allows guests to have an additional DMA window(s) on
-   a PCI bus with a variable page size. Two ioctls have been added to support
-   this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE.
-   The platform has to support the functionality or error will be returned to
-   the userspace. The existing hardware supports up to 2 DMA windows, one is
-   2GB long, uses 4K pages and called "default 32bit window"; the other can
-   be as big as entire RAM, use different page size, it is optional - guests
-   create those in run-time if the guest driver supports 64bit DMA.
-
-   VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and
-   a number of TCE table levels (if a TCE table is going to be big enough and
-   the kernel may not be able to allocate enough of physically contiguous
-   memory). It creates a new window in the available slot and returns the bus
-   address where the new window starts. Due to hardware limitation, the user
-   space cannot choose the location of DMA windows.
-
-   VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window
-   and removes it.
-
--------------------------------------------------------------------------------
-
-.. [1] VFIO was originally an acronym for "Virtual Function I/O" in its
-   initial implementation by Tom Lyon while as Cisco.  We've since
-   outgrown the acronym, but it's catchy.
-
-.. [2] "safe" also depends upon a device being "well behaved".  It's
-   possible for multi-function devices to have backdoors between
-   functions and even for single function devices to have alternative
-   access to things like PCI config space through MMIO registers.  To
-   guard against the former we can include additional precautions in the
-   IOMMU driver to group multi-function PCI devices together
-   (iommu=group_mf).  The latter we can't prevent, but the IOMMU should
-   still provide isolation.  For PCI, SR-IOV Virtual Functions are the
-   best indicator of "well behaved", as these are designed for
-   virtualization usage models.
-
-.. [3] As always there are trade-offs to virtual machine device
-   assignment that are beyond the scope of VFIO.  It's expected that
-   future IOMMU technologies will reduce some, but maybe not all, of
-   these trade-offs.
-
-.. [4] In this case the device is below a PCI bridge, so transactions
-   from either function of the device are indistinguishable to the iommu::
-
-	-[0000:00]-+-1e.0-[06]--+-0d.0
-				\-0d.1
-
-	00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 90)
diff --git a/Documentation/w1/w1.netlink b/Documentation/w1/w1.netlink
index ef2727192d69..94ad4c420828 100644
--- a/Documentation/w1/w1.netlink
+++ b/Documentation/w1/w1.netlink
@@ -183,7 +183,7 @@ acknowledge number is set to seq+1.
 Additional documantion, source code examples.
 ============================================
 
-1. Documentation/connector
+1. Documentation/driver-api/connector.rst
 2. http://www.ioremap.net/archive/w1
 This archive includes userspace application w1d.c which uses
 read/write/search commands for all master/slave devices found on the bus.
diff --git a/Documentation/xillybus.txt b/Documentation/xillybus.txt
deleted file mode 100644
index 2446ee303c09..000000000000
--- a/Documentation/xillybus.txt
+++ /dev/null
@@ -1,379 +0,0 @@
-==========================================
-Xillybus driver for generic FPGA interface
-==========================================
-
-:Author: Eli Billauer, Xillybus Ltd. (http://xillybus.com)
-:Email:  eli.billauer@gmail.com or as advertised on Xillybus' site.
-
-.. Contents:
-
- - Introduction
-  -- Background
-  -- Xillybus Overview
-
- - Usage
-  -- User interface
-  -- Synchronization
-  -- Seekable pipes
-
- - Internals
-  -- Source code organization
-  -- Pipe attributes
-  -- Host never reads from the FPGA
-  -- Channels, pipes, and the message channel
-  -- Data streaming
-  -- Data granularity
-  -- Probing
-  -- Buffer allocation
-  -- The "nonempty" message (supporting poll)
-
-
-Introduction
-============
-
-Background
-----------
-
-An FPGA (Field Programmable Gate Array) is a piece of logic hardware, which
-can be programmed to become virtually anything that is usually found as a
-dedicated chipset: For instance, a display adapter, network interface card,
-or even a processor with its peripherals. FPGAs are the LEGO of hardware:
-Based upon certain building blocks, you make your own toys the way you like
-them. It's usually pointless to reimplement something that is already
-available on the market as a chipset, so FPGAs are mostly used when some
-special functionality is needed, and the production volume is relatively low
-(hence not justifying the development of an ASIC).
-
-The challenge with FPGAs is that everything is implemented at a very low
-level, even lower than assembly language. In order to allow FPGA designers to
-focus on their specific project, and not reinvent the wheel over and over
-again, pre-designed building blocks, IP cores, are often used. These are the
-FPGA parallels of library functions. IP cores may implement certain
-mathematical functions, a functional unit (e.g. a USB interface), an entire
-processor (e.g. ARM) or anything that might come handy. Think of them as a
-building block, with electrical wires dangling on the sides for connection to
-other blocks.
-
-One of the daunting tasks in FPGA design is communicating with a fullblown
-operating system (actually, with the processor running it): Implementing the
-low-level bus protocol and the somewhat higher-level interface with the host
-(registers, interrupts, DMA etc.) is a project in itself. When the FPGA's
-function is a well-known one (e.g. a video adapter card, or a NIC), it can
-make sense to design the FPGA's interface logic specifically for the project.
-A special driver is then written to present the FPGA as a well-known interface
-to the kernel and/or user space. In that case, there is no reason to treat the
-FPGA differently than any device on the bus.
-
-It's however common that the desired data communication doesn't fit any well-
-known peripheral function. Also, the effort of designing an elegant
-abstraction for the data exchange is often considered too big. In those cases,
-a quicker and possibly less elegant solution is sought: The driver is
-effectively written as a user space program, leaving the kernel space part
-with just elementary data transport. This still requires designing some
-interface logic for the FPGA, and write a simple ad-hoc driver for the kernel.
-
-Xillybus Overview
------------------
-
-Xillybus is an IP core and a Linux driver. Together, they form a kit for
-elementary data transport between an FPGA and the host, providing pipe-like
-data streams with a straightforward user interface. It's intended as a low-
-effort solution for mixed FPGA-host projects, for which it makes sense to
-have the project-specific part of the driver running in a user-space program.
-
-Since the communication requirements may vary significantly from one FPGA
-project to another (the number of data pipes needed in each direction and
-their attributes), there isn't one specific chunk of logic being the Xillybus
-IP core. Rather, the IP core is configured and built based upon a
-specification given by its end user.
-
-Xillybus presents independent data streams, which resemble pipes or TCP/IP
-communication to the user. At the host side, a character device file is used
-just like any pipe file. On the FPGA side, hardware FIFOs are used to stream
-the data. This is contrary to a common method of communicating through fixed-
-sized buffers (even though such buffers are used by Xillybus under the hood).
-There may be more than a hundred of these streams on a single IP core, but
-also no more than one, depending on the configuration.
-
-In order to ease the deployment of the Xillybus IP core, it contains a simple
-data structure which completely defines the core's configuration. The Linux
-driver fetches this data structure during its initialization process, and sets
-up the DMA buffers and character devices accordingly. As a result, a single
-driver is used to work out of the box with any Xillybus IP core.
-
-The data structure just mentioned should not be confused with PCI's
-configuration space or the Flattened Device Tree.
-
-Usage
-=====
-
-User interface
---------------
-
-On the host, all interface with Xillybus is done through /dev/xillybus_*
-device files, which are generated automatically as the drivers loads. The
-names of these files depend on the IP core that is loaded in the FPGA (see
-Probing below). To communicate with the FPGA, open the device file that
-corresponds to the hardware FIFO you want to send data or receive data from,
-and use plain write() or read() calls, just like with a regular pipe. In
-particular, it makes perfect sense to go::
-
-	$ cat mydata > /dev/xillybus_thisfifo
-
-	$ cat /dev/xillybus_thatfifo > hisdata
-
-possibly pressing CTRL-C as some stage, even though the xillybus_* pipes have
-the capability to send an EOF (but may not use it).
-
-The driver and hardware are designed to behave sensibly as pipes, including:
-
-* Supporting non-blocking I/O (by setting O_NONBLOCK on open() ).
-
-* Supporting poll() and select().
-
-* Being bandwidth efficient under load (using DMA) but also handle small
-  pieces of data sent across (like TCP/IP) by autoflushing.
-
-A device file can be read only, write only or bidirectional. Bidirectional
-device files are treated like two independent pipes (except for sharing a
-"channel" structure in the implementation code).
-
-Synchronization
----------------
-
-Xillybus pipes are configured (on the IP core) to be either synchronous or
-asynchronous. For a synchronous pipe, write() returns successfully only after
-some data has been submitted and acknowledged by the FPGA. This slows down
-bulk data transfers, and is nearly impossible for use with streams that
-require data at a constant rate: There is no data transmitted to the FPGA
-between write() calls, in particular when the process loses the CPU.
-
-When a pipe is configured asynchronous, write() returns if there was enough
-room in the buffers to store any of the data in the buffers.
-
-For FPGA to host pipes, asynchronous pipes allow data transfer from the FPGA
-as soon as the respective device file is opened, regardless of if the data
-has been requested by a read() call. On synchronous pipes, only the amount
-of data requested by a read() call is transmitted.
-
-In summary, for synchronous pipes, data between the host and FPGA is
-transmitted only to satisfy the read() or write() call currently handled
-by the driver, and those calls wait for the transmission to complete before
-returning.
-
-Note that the synchronization attribute has nothing to do with the possibility
-that read() or write() completes less bytes than requested. There is a
-separate configuration flag ("allowpartial") that determines whether such a
-partial completion is allowed.
-
-Seekable pipes
---------------
-
-A synchronous pipe can be configured to have the stream's position exposed
-to the user logic at the FPGA. Such a pipe is also seekable on the host API.
-With this feature, a memory or register interface can be attached on the
-FPGA side to the seekable stream. Reading or writing to a certain address in
-the attached memory is done by seeking to the desired address, and calling
-read() or write() as required.
-
-
-Internals
-=========
-
-Source code organization
-------------------------
-
-The Xillybus driver consists of a core module, xillybus_core.c, and modules
-that depend on the specific bus interface (xillybus_of.c and xillybus_pcie.c).
-
-The bus specific modules are those probed when a suitable device is found by
-the kernel. Since the DMA mapping and synchronization functions, which are bus
-dependent by their nature, are used by the core module, a
-xilly_endpoint_hardware structure is passed to the core module on
-initialization. This structure is populated with pointers to wrapper functions
-which execute the DMA-related operations on the bus.
-
-Pipe attributes
----------------
-
-Each pipe has a number of attributes which are set when the FPGA component
-(IP core) is built. They are fetched from the IDT (the data structure which
-defines the core's configuration, see Probing below) by xilly_setupchannels()
-in xillybus_core.c as follows:
-
-* is_writebuf: The pipe's direction. A non-zero value means it's an FPGA to
-  host pipe (the FPGA "writes").
-
-* channelnum: The pipe's identification number in communication between the
-  host and FPGA.
-
-* format: The underlying data width. See Data Granularity below.
-
-* allowpartial: A non-zero value means that a read() or write() (whichever
-  applies) may return with less than the requested number of bytes. The common
-  choice is a non-zero value, to match standard UNIX behavior.
-
-* synchronous: A non-zero value means that the pipe is synchronous. See
-  Synchronization above.
-
-* bufsize: Each DMA buffer's size. Always a power of two.
-
-* bufnum: The number of buffers allocated for this pipe. Always a power of two.
-
-* exclusive_open: A non-zero value forces exclusive opening of the associated
-  device file. If the device file is bidirectional, and already opened only in
-  one direction, the opposite direction may be opened once.
-
-* seekable: A non-zero value indicates that the pipe is seekable. See
-  Seekable pipes above.
-
-* supports_nonempty: A non-zero value (which is typical) indicates that the
-  hardware will send the messages that are necessary to support select() and
-  poll() for this pipe.
-
-Host never reads from the FPGA
-------------------------------
-
-Even though PCI Express is hotpluggable in general, a typical motherboard
-doesn't expect a card to go away all of the sudden. But since the PCIe card
-is based upon reprogrammable logic, a sudden disappearance from the bus is
-quite likely as a result of an accidental reprogramming of the FPGA while the
-host is up. In practice, nothing happens immediately in such a situation. But
-if the host attempts to read from an address that is mapped to the PCI Express
-device, that leads to an immediate freeze of the system on some motherboards,
-even though the PCIe standard requires a graceful recovery.
-
-In order to avoid these freezes, the Xillybus driver refrains completely from
-reading from the device's register space. All communication from the FPGA to
-the host is done through DMA. In particular, the Interrupt Service Routine
-doesn't follow the common practice of checking a status register when it's
-invoked. Rather, the FPGA prepares a small buffer which contains short
-messages, which inform the host what the interrupt was about.
-
-This mechanism is used on non-PCIe buses as well for the sake of uniformity.
-
-
-Channels, pipes, and the message channel
-----------------------------------------
-
-Each of the (possibly bidirectional) pipes presented to the user is allocated
-a data channel between the FPGA and the host. The distinction between channels
-and pipes is necessary only because of channel 0, which is used for interrupt-
-related messages from the FPGA, and has no pipe attached to it.
-
-Data streaming
---------------
-
-Even though a non-segmented data stream is presented to the user at both
-sides, the implementation relies on a set of DMA buffers which is allocated
-for each channel. For the sake of illustration, let's take the FPGA to host
-direction: As data streams into the respective channel's interface in the
-FPGA, the Xillybus IP core writes it to one of the DMA buffers. When the
-buffer is full, the FPGA informs the host about that (appending a
-XILLYMSG_OPCODE_RELEASEBUF message channel 0 and sending an interrupt if
-necessary). The host responds by making the data available for reading through
-the character device. When all data has been read, the host writes on the
-the FPGA's buffer control register, allowing the buffer's overwriting. Flow
-control mechanisms exist on both sides to prevent underflows and overflows.
-
-This is not good enough for creating a TCP/IP-like stream: If the data flow
-stops momentarily before a DMA buffer is filled, the intuitive expectation is
-that the partial data in buffer will arrive anyhow, despite the buffer not
-being completed. This is implemented by adding a field in the
-XILLYMSG_OPCODE_RELEASEBUF message, through which the FPGA informs not just
-which buffer is submitted, but how much data it contains.
-
-But the FPGA will submit a partially filled buffer only if directed to do so
-by the host. This situation occurs when the read() method has been blocking
-for XILLY_RX_TIMEOUT jiffies (currently 10 ms), after which the host commands
-the FPGA to submit a DMA buffer as soon as it can. This timeout mechanism
-balances between bus bandwidth efficiency (preventing a lot of partially
-filled buffers being sent) and a latency held fairly low for tails of data.
-
-A similar setting is used in the host to FPGA direction. The handling of
-partial DMA buffers is somewhat different, though. The user can tell the
-driver to submit all data it has in the buffers to the FPGA, by issuing a
-write() with the byte count set to zero. This is similar to a flush request,
-but it doesn't block. There is also an autoflushing mechanism, which triggers
-an equivalent flush roughly XILLY_RX_TIMEOUT jiffies after the last write().
-This allows the user to be oblivious about the underlying buffering mechanism
-and yet enjoy a stream-like interface.
-
-Note that the issue of partial buffer flushing is irrelevant for pipes having
-the "synchronous" attribute nonzero, since synchronous pipes don't allow data
-to lay around in the DMA buffers between read() and write() anyhow.
-
-Data granularity
-----------------
-
-The data arrives or is sent at the FPGA as 8, 16 or 32 bit wide words, as
-configured by the "format" attribute. Whenever possible, the driver attempts
-to hide this when the pipe is accessed differently from its natural alignment.
-For example, reading single bytes from a pipe with 32 bit granularity works
-with no issues. Writing single bytes to pipes with 16 or 32 bit granularity
-will also work, but the driver can't send partially completed words to the
-FPGA, so the transmission of up to one word may be held until it's fully
-occupied with user data.
-
-This somewhat complicates the handling of host to FPGA streams, because
-when a buffer is flushed, it may contain up to 3 bytes don't form a word in
-the FPGA, and hence can't be sent. To prevent loss of data, these leftover
-bytes need to be moved to the next buffer. The parts in xillybus_core.c
-that mention "leftovers" in some way are related to this complication.
-
-Probing
--------
-
-As mentioned earlier, the number of pipes that are created when the driver
-loads and their attributes depend on the Xillybus IP core in the FPGA. During
-the driver's initialization, a blob containing configuration info, the
-Interface Description Table (IDT), is sent from the FPGA to the host. The
-bootstrap process is done in three phases:
-
-1. Acquire the length of the IDT, so a buffer can be allocated for it. This
-   is done by sending a quiesce command to the device, since the acknowledge
-   for this command contains the IDT's buffer length.
-
-2. Acquire the IDT itself.
-
-3. Create the interfaces according to the IDT.
-
-Buffer allocation
------------------
-
-In order to simplify the logic that prevents illegal boundary crossings of
-PCIe packets, the following rule applies: If a buffer is smaller than 4kB,
-it must not cross a 4kB boundary. Otherwise, it must be 4kB aligned. The
-xilly_setupchannels() functions allocates these buffers by requesting whole
-pages from the kernel, and diving them into DMA buffers as necessary. Since
-all buffers' sizes are powers of two, it's possible to pack any set of such
-buffers, with a maximal waste of one page of memory.
-
-All buffers are allocated when the driver is loaded. This is necessary,
-since large continuous physical memory segments are sometimes requested,
-which are more likely to be available when the system is freshly booted.
-
-The allocation of buffer memory takes place in the same order they appear in
-the IDT. The driver relies on a rule that the pipes are sorted with decreasing
-buffer size in the IDT. If a requested buffer is larger or equal to a page,
-the necessary number of pages is requested from the kernel, and these are
-used for this buffer. If the requested buffer is smaller than a page, one
-single page is requested from the kernel, and that page is partially used.
-Or, if there already is a partially used page at hand, the buffer is packed
-into that page. It can be shown that all pages requested from the kernel
-(except possibly for the last) are 100% utilized this way.
-
-The "nonempty" message (supporting poll)
-----------------------------------------
-
-In order to support the "poll" method (and hence select() ), there is a small
-catch regarding the FPGA to host direction: The FPGA may have filled a DMA
-buffer with some data, but not submitted that buffer. If the host waited for
-the buffer's submission by the FPGA, there would be a possibility that the
-FPGA side has sent data, but a select() call would still block, because the
-host has not received any notification about this. This is solved with
-XILLYMSG_OPCODE_NONEMPTY messages sent by the FPGA when a channel goes from
-completely empty to containing some data.
-
-These messages are used only to support poll() and select(). The IP core can
-be configured not to send them for a slight reduction of bandwidth.
diff --git a/Documentation/zorro.txt b/Documentation/zorro.txt
deleted file mode 100644
index 664072b017e3..000000000000
--- a/Documentation/zorro.txt
+++ /dev/null
@@ -1,104 +0,0 @@
-========================================
-Writing Device Drivers for Zorro Devices
-========================================
-
-:Author: Written by Geert Uytterhoeven <geert@linux-m68k.org>
-:Last revised: September 5, 2003
-
-
-Introduction
-------------
-
-The Zorro bus is the bus used in the Amiga family of computers. Thanks to
-AutoConfig(tm), it's 100% Plug-and-Play.
-
-There are two types of Zorro buses, Zorro II and Zorro III:
-
-  - The Zorro II address space is 24-bit and lies within the first 16 MB of the
-    Amiga's address map.
-
-  - Zorro III is a 32-bit extension of Zorro II, which is backwards compatible
-    with Zorro II. The Zorro III address space lies outside the first 16 MB.
-
-
-Probing for Zorro Devices
--------------------------
-
-Zorro devices are found by calling ``zorro_find_device()``, which returns a
-pointer to the ``next`` Zorro device with the specified Zorro ID. A probe loop
-for the board with Zorro ID ``ZORRO_PROD_xxx`` looks like::
-
-    struct zorro_dev *z = NULL;
-
-    while ((z = zorro_find_device(ZORRO_PROD_xxx, z))) {
-	if (!zorro_request_region(z->resource.start+MY_START, MY_SIZE,
-				  "My explanation"))
-	...
-    }
-
-``ZORRO_WILDCARD`` acts as a wildcard and finds any Zorro device. If your driver
-supports different types of boards, you can use a construct like::
-
-    struct zorro_dev *z = NULL;
-
-    while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
-	if (z->id != ZORRO_PROD_xxx1 && z->id != ZORRO_PROD_xxx2 && ...)
-	    continue;
-	if (!zorro_request_region(z->resource.start+MY_START, MY_SIZE,
-				  "My explanation"))
-	...
-    }
-
-
-Zorro Resources
----------------
-
-Before you can access a Zorro device's registers, you have to make sure it's
-not yet in use. This is done using the I/O memory space resource management
-functions::
-
-    request_mem_region()
-    release_mem_region()
-
-Shortcuts to claim the whole device's address space are provided as well::
-
-    zorro_request_device
-    zorro_release_device
-
-
-Accessing the Zorro Address Space
----------------------------------
-
-The address regions in the Zorro device resources are Zorro bus address
-regions. Due to the identity bus-physical address mapping on the Zorro bus,
-they are CPU physical addresses as well.
-
-The treatment of these regions depends on the type of Zorro space:
-
-  - Zorro II address space is always mapped and does not have to be mapped
-    explicitly using z_ioremap().
-    
-    Conversion from bus/physical Zorro II addresses to kernel virtual addresses
-    and vice versa is done using::
-
-	virt_addr = ZTWO_VADDR(bus_addr);
-	bus_addr = ZTWO_PADDR(virt_addr);
-
-  - Zorro III address space must be mapped explicitly using z_ioremap() first
-    before it can be accessed::
- 
-	virt_addr = z_ioremap(bus_addr, size);
-	...
-	z_iounmap(virt_addr);
-
-
-References
-----------
-
-#. linux/include/linux/zorro.h
-#. linux/include/uapi/linux/zorro.h
-#. linux/include/uapi/linux/zorro_ids.h
-#. linux/arch/m68k/include/asm/zorro.h
-#. linux/drivers/zorro
-#. /proc/bus/zorro
-
-- 
cgit 


From fb8c5327b3c6c78b74a27a3c42e4f32b2cc30a04 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 13 Jun 2019 14:40:42 -0300
Subject: docs: driver-api: add xilinx driver API documentation

The current file there (emmi) provides a description of
the driver uAPI and kAPI.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/driver-api/index.rst        |  1 +
 Documentation/driver-api/xilinx/eemi.rst  | 67 +++++++++++++++++++++++++++++++
 Documentation/driver-api/xilinx/index.rst | 16 ++++++++
 Documentation/xilinx/eemi.rst             | 67 -------------------------------
 Documentation/xilinx/index.rst            | 17 --------
 5 files changed, 84 insertions(+), 84 deletions(-)
 create mode 100644 Documentation/driver-api/xilinx/eemi.rst
 create mode 100644 Documentation/driver-api/xilinx/index.rst
 delete mode 100644 Documentation/xilinx/eemi.rst
 delete mode 100644 Documentation/xilinx/index.rst

(limited to 'Documentation')

diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index d1c6513dd20d..77322753c1bc 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -93,6 +93,7 @@ available subsections can be seen below.
    sync_file
    vfio-mediated-device
    vfio
+   xilinx/index
    xillybus
    zorro
 
diff --git a/Documentation/driver-api/xilinx/eemi.rst b/Documentation/driver-api/xilinx/eemi.rst
new file mode 100644
index 000000000000..9dcbc6f18d75
--- /dev/null
+++ b/Documentation/driver-api/xilinx/eemi.rst
@@ -0,0 +1,67 @@
+====================================
+Xilinx Zynq MPSoC EEMI Documentation
+====================================
+
+Xilinx Zynq MPSoC Firmware Interface
+-------------------------------------
+The zynqmp-firmware node describes the interface to platform firmware.
+ZynqMP has an interface to communicate with secure firmware. Firmware
+driver provides an interface to firmware APIs. Interface APIs can be
+used by any driver to communicate with PMC(Platform Management Controller).
+
+Embedded Energy Management Interface (EEMI)
+----------------------------------------------
+The embedded energy management interface is used to allow software
+components running across different processing clusters on a chip or
+device to communicate with a power management controller (PMC) on a
+device to issue or respond to power management requests.
+
+EEMI ops is a structure containing all eemi APIs supported by Zynq MPSoC.
+The zynqmp-firmware driver maintain all EEMI APIs in zynqmp_eemi_ops
+structure. Any driver who want to communicate with PMC using EEMI APIs
+can call zynqmp_pm_get_eemi_ops().
+
+Example of EEMI ops::
+
+	/* zynqmp-firmware driver maintain all EEMI APIs */
+	struct zynqmp_eemi_ops {
+		int (*get_api_version)(u32 *version);
+		int (*query_data)(struct zynqmp_pm_query_data qdata, u32 *out);
+	};
+
+	static const struct zynqmp_eemi_ops eemi_ops = {
+		.get_api_version = zynqmp_pm_get_api_version,
+		.query_data = zynqmp_pm_query_data,
+	};
+
+Example of EEMI ops usage::
+
+	static const struct zynqmp_eemi_ops *eemi_ops;
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	eemi_ops = zynqmp_pm_get_eemi_ops();
+	if (IS_ERR(eemi_ops))
+		return PTR_ERR(eemi_ops);
+
+	ret = eemi_ops->query_data(qdata, ret_payload);
+
+IOCTL
+------
+IOCTL API is for device control and configuration. It is not a system
+IOCTL but it is an EEMI API. This API can be used by master to control
+any device specific configuration. IOCTL definitions can be platform
+specific. This API also manage shared device configuration.
+
+The following IOCTL IDs are valid for device control:
+- IOCTL_SET_PLL_FRAC_MODE	8
+- IOCTL_GET_PLL_FRAC_MODE	9
+- IOCTL_SET_PLL_FRAC_DATA	10
+- IOCTL_GET_PLL_FRAC_DATA	11
+
+Refer EEMI API guide [0] for IOCTL specific parameters and other EEMI APIs.
+
+References
+----------
+[0] Embedded Energy Management Interface (EEMI) API guide:
+    https://www.xilinx.com/support/documentation/user_guides/ug1200-eemi-api.pdf
diff --git a/Documentation/driver-api/xilinx/index.rst b/Documentation/driver-api/xilinx/index.rst
new file mode 100644
index 000000000000..13f7589ed442
--- /dev/null
+++ b/Documentation/driver-api/xilinx/index.rst
@@ -0,0 +1,16 @@
+
+===========
+Xilinx FPGA
+===========
+
+.. toctree::
+    :maxdepth: 1
+
+    eemi
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/xilinx/eemi.rst b/Documentation/xilinx/eemi.rst
deleted file mode 100644
index 9dcbc6f18d75..000000000000
--- a/Documentation/xilinx/eemi.rst
+++ /dev/null
@@ -1,67 +0,0 @@
-====================================
-Xilinx Zynq MPSoC EEMI Documentation
-====================================
-
-Xilinx Zynq MPSoC Firmware Interface
--------------------------------------
-The zynqmp-firmware node describes the interface to platform firmware.
-ZynqMP has an interface to communicate with secure firmware. Firmware
-driver provides an interface to firmware APIs. Interface APIs can be
-used by any driver to communicate with PMC(Platform Management Controller).
-
-Embedded Energy Management Interface (EEMI)
-----------------------------------------------
-The embedded energy management interface is used to allow software
-components running across different processing clusters on a chip or
-device to communicate with a power management controller (PMC) on a
-device to issue or respond to power management requests.
-
-EEMI ops is a structure containing all eemi APIs supported by Zynq MPSoC.
-The zynqmp-firmware driver maintain all EEMI APIs in zynqmp_eemi_ops
-structure. Any driver who want to communicate with PMC using EEMI APIs
-can call zynqmp_pm_get_eemi_ops().
-
-Example of EEMI ops::
-
-	/* zynqmp-firmware driver maintain all EEMI APIs */
-	struct zynqmp_eemi_ops {
-		int (*get_api_version)(u32 *version);
-		int (*query_data)(struct zynqmp_pm_query_data qdata, u32 *out);
-	};
-
-	static const struct zynqmp_eemi_ops eemi_ops = {
-		.get_api_version = zynqmp_pm_get_api_version,
-		.query_data = zynqmp_pm_query_data,
-	};
-
-Example of EEMI ops usage::
-
-	static const struct zynqmp_eemi_ops *eemi_ops;
-	u32 ret_payload[PAYLOAD_ARG_CNT];
-	int ret;
-
-	eemi_ops = zynqmp_pm_get_eemi_ops();
-	if (IS_ERR(eemi_ops))
-		return PTR_ERR(eemi_ops);
-
-	ret = eemi_ops->query_data(qdata, ret_payload);
-
-IOCTL
-------
-IOCTL API is for device control and configuration. It is not a system
-IOCTL but it is an EEMI API. This API can be used by master to control
-any device specific configuration. IOCTL definitions can be platform
-specific. This API also manage shared device configuration.
-
-The following IOCTL IDs are valid for device control:
-- IOCTL_SET_PLL_FRAC_MODE	8
-- IOCTL_GET_PLL_FRAC_MODE	9
-- IOCTL_SET_PLL_FRAC_DATA	10
-- IOCTL_GET_PLL_FRAC_DATA	11
-
-Refer EEMI API guide [0] for IOCTL specific parameters and other EEMI APIs.
-
-References
-----------
-[0] Embedded Energy Management Interface (EEMI) API guide:
-    https://www.xilinx.com/support/documentation/user_guides/ug1200-eemi-api.pdf
diff --git a/Documentation/xilinx/index.rst b/Documentation/xilinx/index.rst
deleted file mode 100644
index 01cc1a0714df..000000000000
--- a/Documentation/xilinx/index.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-:orphan:
-
-===========
-Xilinx FPGA
-===========
-
-.. toctree::
-    :maxdepth: 1
-
-    eemi
-
-.. only::  subproject and html
-
-   Indices
-   =======
-
-   * :ref:`genindex`
-- 
cgit 


From c92992fc609fe99d926855eb1945f38ef4ad8e6c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 22 Apr 2019 16:49:11 -0300
Subject: docs: driver-api: add remaining converted dirs to it

There are a number of driver-specific descriptions that contain a
mix of userspace and kernelspace documentation. Just like we did
with other similar subsystems, add them at the driver-api
groupset, but don't move the directories.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/driver-api/index.rst | 2 ++
 Documentation/driver-api/pps.rst   | 2 +-
 Documentation/driver-api/ptp.rst   | 2 +-
 Documentation/index.rst            | 3 +++
 Documentation/mic/index.rst        | 2 --
 Documentation/phy/samsung-usb2.rst | 2 --
 Documentation/scheduler/index.rst  | 2 --
 7 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 77322753c1bc..1dde9692075c 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -83,6 +83,8 @@ available subsections can be seen below.
    ntb
    nvmem
    parport-lowlevel
+   pps
+   ptp
    pti_intel_mid
    pwm
    rfkill
diff --git a/Documentation/driver-api/pps.rst b/Documentation/driver-api/pps.rst
index 1456d2c32ebd..2d6b99766ee8 100644
--- a/Documentation/driver-api/pps.rst
+++ b/Documentation/driver-api/pps.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ======================
 PPS - Pulse Per Second
diff --git a/Documentation/driver-api/ptp.rst b/Documentation/driver-api/ptp.rst
index b6e65d66d37a..a15192e32347 100644
--- a/Documentation/driver-api/ptp.rst
+++ b/Documentation/driver-api/ptp.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ===========================================
 PTP hardware clock infrastructure for Linux
diff --git a/Documentation/index.rst b/Documentation/index.rst
index dcdaaff71633..041ffe442960 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -110,6 +110,9 @@ needed).
    bpf/index
    usb/index
    misc-devices/index
+   mic/index
+   phy/samsung-usb2
+   scheduler/index
 
 Architecture-specific documentation
 -----------------------------------
diff --git a/Documentation/mic/index.rst b/Documentation/mic/index.rst
index 082fa8f6a260..3a8d06367ef1 100644
--- a/Documentation/mic/index.rst
+++ b/Documentation/mic/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
 =============================================
 Intel Many Integrated Core (MIC) architecture
 =============================================
diff --git a/Documentation/phy/samsung-usb2.rst b/Documentation/phy/samsung-usb2.rst
index 98b5952fcb97..c48c8b9797b9 100644
--- a/Documentation/phy/samsung-usb2.rst
+++ b/Documentation/phy/samsung-usb2.rst
@@ -1,5 +1,3 @@
-:orphan:
-
 ====================================
 Samsung USB 2.0 PHY adaptation layer
 ====================================
diff --git a/Documentation/scheduler/index.rst b/Documentation/scheduler/index.rst
index 058be77a4c34..69074e5de9c4 100644
--- a/Documentation/scheduler/index.rst
+++ b/Documentation/scheduler/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
 ===============
 Linux Scheduler
 ===============
-- 
cgit 


From 65388dad1bbb51a4eb6cc91b9fa865b57646fb67 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 27 Jun 2019 16:31:35 -0300
Subject: docs: serial: move it to the driver-api

The contents of this directory is mostly driver-api stuff.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/driver-api/index.rst                 |   1 +
 Documentation/driver-api/serial/cyclades_z.rst     |  11 +
 Documentation/driver-api/serial/driver.rst         | 549 ++++++++++++++++++
 Documentation/driver-api/serial/index.rst          |  32 ++
 Documentation/driver-api/serial/moxa-smartio.rst   | 615 +++++++++++++++++++++
 Documentation/driver-api/serial/n_gsm.rst          | 103 ++++
 Documentation/driver-api/serial/rocket.rst         | 185 +++++++
 Documentation/driver-api/serial/serial-iso7816.rst |  90 +++
 Documentation/driver-api/serial/serial-rs485.rst   | 103 ++++
 Documentation/driver-api/serial/tty.rst            | 328 +++++++++++
 Documentation/serial/cyclades_z.rst                |  11 -
 Documentation/serial/driver.rst                    | 549 ------------------
 Documentation/serial/index.rst                     |  32 --
 Documentation/serial/moxa-smartio.rst              | 615 ---------------------
 Documentation/serial/n_gsm.rst                     | 103 ----
 Documentation/serial/rocket.rst                    | 185 -------
 Documentation/serial/serial-iso7816.rst            |  90 ---
 Documentation/serial/serial-rs485.rst              | 103 ----
 Documentation/serial/tty.rst                       | 328 -----------
 19 files changed, 2017 insertions(+), 2016 deletions(-)
 create mode 100644 Documentation/driver-api/serial/cyclades_z.rst
 create mode 100644 Documentation/driver-api/serial/driver.rst
 create mode 100644 Documentation/driver-api/serial/index.rst
 create mode 100644 Documentation/driver-api/serial/moxa-smartio.rst
 create mode 100644 Documentation/driver-api/serial/n_gsm.rst
 create mode 100644 Documentation/driver-api/serial/rocket.rst
 create mode 100644 Documentation/driver-api/serial/serial-iso7816.rst
 create mode 100644 Documentation/driver-api/serial/serial-rs485.rst
 create mode 100644 Documentation/driver-api/serial/tty.rst
 delete mode 100644 Documentation/serial/cyclades_z.rst
 delete mode 100644 Documentation/serial/driver.rst
 delete mode 100644 Documentation/serial/index.rst
 delete mode 100644 Documentation/serial/moxa-smartio.rst
 delete mode 100644 Documentation/serial/n_gsm.rst
 delete mode 100644 Documentation/serial/rocket.rst
 delete mode 100644 Documentation/serial/serial-iso7816.rst
 delete mode 100644 Documentation/serial/serial-rs485.rst
 delete mode 100644 Documentation/serial/tty.rst

(limited to 'Documentation')

diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 1dde9692075c..cf39b8f9d0f9 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -88,6 +88,7 @@ available subsections can be seen below.
    pti_intel_mid
    pwm
    rfkill
+   serial/index
    sgi-ioc4
    sm501
    smsc_ece1099
diff --git a/Documentation/driver-api/serial/cyclades_z.rst b/Documentation/driver-api/serial/cyclades_z.rst
new file mode 100644
index 000000000000..532ff67e2f1c
--- /dev/null
+++ b/Documentation/driver-api/serial/cyclades_z.rst
@@ -0,0 +1,11 @@
+================
+Cyclades-Z notes
+================
+
+The Cyclades-Z must have firmware loaded onto the card before it will
+operate.  This operation should be performed during system startup,
+
+The firmware, loader program and the latest device driver code are
+available from Cyclades at
+
+    ftp://ftp.cyclades.com/pub/cyclades/cyclades-z/linux/
diff --git a/Documentation/driver-api/serial/driver.rst b/Documentation/driver-api/serial/driver.rst
new file mode 100644
index 000000000000..31bd4e16fb1f
--- /dev/null
+++ b/Documentation/driver-api/serial/driver.rst
@@ -0,0 +1,549 @@
+====================
+Low Level Serial API
+====================
+
+
+This document is meant as a brief overview of some aspects of the new serial
+driver.  It is not complete, any questions you have should be directed to
+<rmk@arm.linux.org.uk>
+
+The reference implementation is contained within amba-pl011.c.
+
+
+
+Low Level Serial Hardware Driver
+--------------------------------
+
+The low level serial hardware driver is responsible for supplying port
+information (defined by uart_port) and a set of control methods (defined
+by uart_ops) to the core serial driver.  The low level driver is also
+responsible for handling interrupts for the port, and providing any
+console support.
+
+
+Console Support
+---------------
+
+The serial core provides a few helper functions.  This includes identifing
+the correct port structure (via uart_get_console) and decoding command line
+arguments (uart_parse_options).
+
+There is also a helper function (uart_console_write) which performs a
+character by character write, translating newlines to CRLF sequences.
+Driver writers are recommended to use this function rather than implementing
+their own version.
+
+
+Locking
+-------
+
+It is the responsibility of the low level hardware driver to perform the
+necessary locking using port->lock.  There are some exceptions (which
+are described in the uart_ops listing below.)
+
+There are two locks.  A per-port spinlock, and an overall semaphore.
+
+From the core driver perspective, the port->lock locks the following
+data::
+
+	port->mctrl
+	port->icount
+	port->state->xmit.head (circ_buf->head)
+	port->state->xmit.tail (circ_buf->tail)
+
+The low level driver is free to use this lock to provide any additional
+locking.
+
+The port_sem semaphore is used to protect against ports being added/
+removed or reconfigured at inappropriate times. Since v2.6.27, this
+semaphore has been the 'mutex' member of the tty_port struct, and
+commonly referred to as the port mutex.
+
+
+uart_ops
+--------
+
+The uart_ops structure is the main interface between serial_core and the
+hardware specific driver.  It contains all the methods to control the
+hardware.
+
+  tx_empty(port)
+	This function tests whether the transmitter fifo and shifter
+	for the port described by 'port' is empty.  If it is empty,
+	this function should return TIOCSER_TEMT, otherwise return 0.
+	If the port does not support this operation, then it should
+	return TIOCSER_TEMT.
+
+	Locking: none.
+
+	Interrupts: caller dependent.
+
+	This call must not sleep
+
+  set_mctrl(port, mctrl)
+	This function sets the modem control lines for port described
+	by 'port' to the state described by mctrl.  The relevant bits
+	of mctrl are:
+
+		- TIOCM_RTS	RTS signal.
+		- TIOCM_DTR	DTR signal.
+		- TIOCM_OUT1	OUT1 signal.
+		- TIOCM_OUT2	OUT2 signal.
+		- TIOCM_LOOP	Set the port into loopback mode.
+
+	If the appropriate bit is set, the signal should be driven
+	active.  If the bit is clear, the signal should be driven
+	inactive.
+
+	Locking: port->lock taken.
+
+	Interrupts: locally disabled.
+
+	This call must not sleep
+
+  get_mctrl(port)
+	Returns the current state of modem control inputs.  The state
+	of the outputs should not be returned, since the core keeps
+	track of their state.  The state information should include:
+
+		- TIOCM_CAR	state of DCD signal
+		- TIOCM_CTS	state of CTS signal
+		- TIOCM_DSR	state of DSR signal
+		- TIOCM_RI	state of RI signal
+
+	The bit is set if the signal is currently driven active.  If
+	the port does not support CTS, DCD or DSR, the driver should
+	indicate that the signal is permanently active.  If RI is
+	not available, the signal should not be indicated as active.
+
+	Locking: port->lock taken.
+
+	Interrupts: locally disabled.
+
+	This call must not sleep
+
+  stop_tx(port)
+	Stop transmitting characters.  This might be due to the CTS
+	line becoming inactive or the tty layer indicating we want
+	to stop transmission due to an XOFF character.
+
+	The driver should stop transmitting characters as soon as
+	possible.
+
+	Locking: port->lock taken.
+
+	Interrupts: locally disabled.
+
+	This call must not sleep
+
+  start_tx(port)
+	Start transmitting characters.
+
+	Locking: port->lock taken.
+
+	Interrupts: locally disabled.
+
+	This call must not sleep
+
+  throttle(port)
+	Notify the serial driver that input buffers for the line discipline are
+	close to full, and it should somehow signal that no more characters
+	should be sent to the serial port.
+	This will be called only if hardware assisted flow control is enabled.
+
+	Locking: serialized with .unthrottle() and termios modification by the
+	tty layer.
+
+  unthrottle(port)
+	Notify the serial driver that characters can now be sent to the serial
+	port without fear of overrunning the input buffers of the line
+	disciplines.
+
+	This will be called only if hardware assisted flow control is enabled.
+
+	Locking: serialized with .throttle() and termios modification by the
+	tty layer.
+
+  send_xchar(port,ch)
+	Transmit a high priority character, even if the port is stopped.
+	This is used to implement XON/XOFF flow control and tcflow().  If
+	the serial driver does not implement this function, the tty core
+	will append the character to the circular buffer and then call
+	start_tx() / stop_tx() to flush the data out.
+
+	Do not transmit if ch == '\0' (__DISABLED_CHAR).
+
+	Locking: none.
+
+	Interrupts: caller dependent.
+
+  stop_rx(port)
+	Stop receiving characters; the port is in the process of
+	being closed.
+
+	Locking: port->lock taken.
+
+	Interrupts: locally disabled.
+
+	This call must not sleep
+
+  enable_ms(port)
+	Enable the modem status interrupts.
+
+	This method may be called multiple times.  Modem status
+	interrupts should be disabled when the shutdown method is
+	called.
+
+	Locking: port->lock taken.
+
+	Interrupts: locally disabled.
+
+	This call must not sleep
+
+  break_ctl(port,ctl)
+	Control the transmission of a break signal.  If ctl is
+	nonzero, the break signal should be transmitted.  The signal
+	should be terminated when another call is made with a zero
+	ctl.
+
+	Locking: caller holds tty_port->mutex
+
+  startup(port)
+	Grab any interrupt resources and initialise any low level driver
+	state.  Enable the port for reception.  It should not activate
+	RTS nor DTR; this will be done via a separate call to set_mctrl.
+
+	This method will only be called when the port is initially opened.
+
+	Locking: port_sem taken.
+
+	Interrupts: globally disabled.
+
+  shutdown(port)
+	Disable the port, disable any break condition that may be in
+	effect, and free any interrupt resources.  It should not disable
+	RTS nor DTR; this will have already been done via a separate
+	call to set_mctrl.
+
+	Drivers must not access port->state once this call has completed.
+
+	This method will only be called when there are no more users of
+	this port.
+
+	Locking: port_sem taken.
+
+	Interrupts: caller dependent.
+
+  flush_buffer(port)
+	Flush any write buffers, reset any DMA state and stop any
+	ongoing DMA transfers.
+
+	This will be called whenever the port->state->xmit circular
+	buffer is cleared.
+
+	Locking: port->lock taken.
+
+	Interrupts: locally disabled.
+
+	This call must not sleep
+
+  set_termios(port,termios,oldtermios)
+	Change the port parameters, including word length, parity, stop
+	bits.  Update read_status_mask and ignore_status_mask to indicate
+	the types of events we are interested in receiving.  Relevant
+	termios->c_cflag bits are:
+
+		CSIZE
+			- word size
+		CSTOPB
+			- 2 stop bits
+		PARENB
+			- parity enable
+		PARODD
+			- odd parity (when PARENB is in force)
+		CREAD
+			- enable reception of characters (if not set,
+			  still receive characters from the port, but
+			  throw them away.
+		CRTSCTS
+			- if set, enable CTS status change reporting
+		CLOCAL
+			- if not set, enable modem status change
+			  reporting.
+
+	Relevant termios->c_iflag bits are:
+
+		INPCK
+			- enable frame and parity error events to be
+			  passed to the TTY layer.
+		BRKINT / PARMRK
+			- both of these enable break events to be
+			  passed to the TTY layer.
+
+		IGNPAR
+			- ignore parity and framing errors
+		IGNBRK
+			- ignore break errors,  If IGNPAR is also
+			  set, ignore overrun errors as well.
+
+	The interaction of the iflag bits is as follows (parity error
+	given as an example):
+
+	=============== ======= ======  =============================
+	Parity error	INPCK	IGNPAR
+	=============== ======= ======  =============================
+	n/a		0	n/a	character received, marked as
+					TTY_NORMAL
+	None		1	n/a	character received, marked as
+					TTY_NORMAL
+	Yes		1	0	character received, marked as
+					TTY_PARITY
+	Yes		1	1	character discarded
+	=============== ======= ======  =============================
+
+	Other flags may be used (eg, xon/xoff characters) if your
+	hardware supports hardware "soft" flow control.
+
+	Locking: caller holds tty_port->mutex
+
+	Interrupts: caller dependent.
+
+	This call must not sleep
+
+  set_ldisc(port,termios)
+	Notifier for discipline change. See Documentation/driver-api/serial/tty.rst.
+
+	Locking: caller holds tty_port->mutex
+
+  pm(port,state,oldstate)
+	Perform any power management related activities on the specified
+	port.  State indicates the new state (defined by
+	enum uart_pm_state), oldstate indicates the previous state.
+
+	This function should not be used to grab any resources.
+
+	This will be called when the port is initially opened and finally
+	closed, except when the port is also the system console.  This
+	will occur even if CONFIG_PM is not set.
+
+	Locking: none.
+
+	Interrupts: caller dependent.
+
+  type(port)
+	Return a pointer to a string constant describing the specified
+	port, or return NULL, in which case the string 'unknown' is
+	substituted.
+
+	Locking: none.
+
+	Interrupts: caller dependent.
+
+  release_port(port)
+	Release any memory and IO region resources currently in use by
+	the port.
+
+	Locking: none.
+
+	Interrupts: caller dependent.
+
+  request_port(port)
+	Request any memory and IO region resources required by the port.
+	If any fail, no resources should be registered when this function
+	returns, and it should return -EBUSY on failure.
+
+	Locking: none.
+
+	Interrupts: caller dependent.
+
+  config_port(port,type)
+	Perform any autoconfiguration steps required for the port.  `type`
+	contains a bit mask of the required configuration.  UART_CONFIG_TYPE
+	indicates that the port requires detection and identification.
+	port->type should be set to the type found, or PORT_UNKNOWN if
+	no port was detected.
+
+	UART_CONFIG_IRQ indicates autoconfiguration of the interrupt signal,
+	which should be probed using standard kernel autoprobing techniques.
+	This is not necessary on platforms where ports have interrupts
+	internally hard wired (eg, system on a chip implementations).
+
+	Locking: none.
+
+	Interrupts: caller dependent.
+
+  verify_port(port,serinfo)
+	Verify the new serial port information contained within serinfo is
+	suitable for this port type.
+
+	Locking: none.
+
+	Interrupts: caller dependent.
+
+  ioctl(port,cmd,arg)
+	Perform any port specific IOCTLs.  IOCTL commands must be defined
+	using the standard numbering system found in <asm/ioctl.h>
+
+	Locking: none.
+
+	Interrupts: caller dependent.
+
+  poll_init(port)
+	Called by kgdb to perform the minimal hardware initialization needed
+	to support poll_put_char() and poll_get_char().  Unlike ->startup()
+	this should not request interrupts.
+
+	Locking: tty_mutex and tty_port->mutex taken.
+
+	Interrupts: n/a.
+
+  poll_put_char(port,ch)
+	Called by kgdb to write a single character directly to the serial
+	port.  It can and should block until there is space in the TX FIFO.
+
+	Locking: none.
+
+	Interrupts: caller dependent.
+
+	This call must not sleep
+
+  poll_get_char(port)
+	Called by kgdb to read a single character directly from the serial
+	port.  If data is available, it should be returned; otherwise
+	the function should return NO_POLL_CHAR immediately.
+
+	Locking: none.
+
+	Interrupts: caller dependent.
+
+	This call must not sleep
+
+Other functions
+---------------
+
+uart_update_timeout(port,cflag,baud)
+	Update the FIFO drain timeout, port->timeout, according to the
+	number of bits, parity, stop bits and baud rate.
+
+	Locking: caller is expected to take port->lock
+
+	Interrupts: n/a
+
+uart_get_baud_rate(port,termios,old,min,max)
+	Return the numeric baud rate for the specified termios, taking
+	account of the special 38400 baud "kludge".  The B0 baud rate
+	is mapped to 9600 baud.
+
+	If the baud rate is not within min..max, then if old is non-NULL,
+	the original baud rate will be tried.  If that exceeds the
+	min..max constraint, 9600 baud will be returned.  termios will
+	be updated to the baud rate in use.
+
+	Note: min..max must always allow 9600 baud to be selected.
+
+	Locking: caller dependent.
+
+	Interrupts: n/a
+
+uart_get_divisor(port,baud)
+	Return the divisor (baud_base / baud) for the specified baud
+	rate, appropriately rounded.
+
+	If 38400 baud and custom divisor is selected, return the
+	custom divisor instead.
+
+	Locking: caller dependent.
+
+	Interrupts: n/a
+
+uart_match_port(port1,port2)
+	This utility function can be used to determine whether two
+	uart_port structures describe the same port.
+
+	Locking: n/a
+
+	Interrupts: n/a
+
+uart_write_wakeup(port)
+	A driver is expected to call this function when the number of
+	characters in the transmit buffer have dropped below a threshold.
+
+	Locking: port->lock should be held.
+
+	Interrupts: n/a
+
+uart_register_driver(drv)
+	Register a uart driver with the core driver.  We in turn register
+	with the tty layer, and initialise the core driver per-port state.
+
+	drv->port should be NULL, and the per-port structures should be
+	registered using uart_add_one_port after this call has succeeded.
+
+	Locking: none
+
+	Interrupts: enabled
+
+uart_unregister_driver()
+	Remove all references to a driver from the core driver.  The low
+	level driver must have removed all its ports via the
+	uart_remove_one_port() if it registered them with uart_add_one_port().
+
+	Locking: none
+
+	Interrupts: enabled
+
+**uart_suspend_port()**
+
+**uart_resume_port()**
+
+**uart_add_one_port()**
+
+**uart_remove_one_port()**
+
+Other notes
+-----------
+
+It is intended some day to drop the 'unused' entries from uart_port, and
+allow low level drivers to register their own individual uart_port's with
+the core.  This will allow drivers to use uart_port as a pointer to a
+structure containing both the uart_port entry with their own extensions,
+thus::
+
+	struct my_port {
+		struct uart_port	port;
+		int			my_stuff;
+	};
+
+Modem control lines via GPIO
+----------------------------
+
+Some helpers are provided in order to set/get modem control lines via GPIO.
+
+mctrl_gpio_init(port, idx):
+	This will get the {cts,rts,...}-gpios from device tree if they are
+	present and request them, set direction etc, and return an
+	allocated structure. `devm_*` functions are used, so there's no need
+	to call mctrl_gpio_free().
+	As this sets up the irq handling make sure to not handle changes to the
+	gpio input lines in your driver, too.
+
+mctrl_gpio_free(dev, gpios):
+	This will free the requested gpios in mctrl_gpio_init().
+	As `devm_*` functions are used, there's generally no need to call
+	this function.
+
+mctrl_gpio_to_gpiod(gpios, gidx)
+	This returns the gpio_desc structure associated to the modem line
+	index.
+
+mctrl_gpio_set(gpios, mctrl):
+	This will sets the gpios according to the mctrl state.
+
+mctrl_gpio_get(gpios, mctrl):
+	This will update mctrl with the gpios values.
+
+mctrl_gpio_enable_ms(gpios):
+	Enables irqs and handling of changes to the ms lines.
+
+mctrl_gpio_disable_ms(gpios):
+	Disables irqs and handling of changes to the ms lines.
diff --git a/Documentation/driver-api/serial/index.rst b/Documentation/driver-api/serial/index.rst
new file mode 100644
index 000000000000..33ad10d05b26
--- /dev/null
+++ b/Documentation/driver-api/serial/index.rst
@@ -0,0 +1,32 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+Support for Serial devices
+==========================
+
+.. toctree::
+    :maxdepth: 1
+
+
+    driver
+    tty
+
+Serial drivers
+==============
+
+.. toctree::
+    :maxdepth: 1
+
+    cyclades_z
+    moxa-smartio
+    n_gsm
+    rocket
+    serial-iso7816
+    serial-rs485
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/driver-api/serial/moxa-smartio.rst b/Documentation/driver-api/serial/moxa-smartio.rst
new file mode 100644
index 000000000000..156100f17c3f
--- /dev/null
+++ b/Documentation/driver-api/serial/moxa-smartio.rst
@@ -0,0 +1,615 @@
+=============================================================
+MOXA Smartio/Industio Family Device Driver Installation Guide
+=============================================================
+
+.. note::
+
+   This file is outdated. It needs some care in order to make it
+   updated to Kernel 5.0 and upper
+
+Copyright (C) 2008, Moxa Inc.
+
+Date: 01/21/2008
+
+.. Content
+
+   1. Introduction
+   2. System Requirement
+   3. Installation
+      3.1 Hardware installation
+      3.2 Driver files
+      3.3 Device naming convention
+      3.4 Module driver configuration
+      3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x.
+      3.6 Custom configuration
+      3.7 Verify driver installation
+   4. Utilities
+   5. Setserial
+   6. Troubleshooting
+
+1. Introduction
+^^^^^^^^^^^^^^^
+
+   The Smartio/Industio/UPCI family Linux driver supports following multiport
+   boards.
+
+    - 2 ports multiport board
+	CP-102U, CP-102UL, CP-102UF
+	CP-132U-I, CP-132UL,
+	CP-132, CP-132I, CP132S, CP-132IS,
+	CI-132, CI-132I, CI-132IS,
+	(C102H, C102HI, C102HIS, C102P, CP-102, CP-102S)
+
+    - 4 ports multiport board
+	CP-104EL,
+	CP-104UL, CP-104JU,
+	CP-134U, CP-134U-I,
+	C104H/PCI, C104HS/PCI,
+	CP-114, CP-114I, CP-114S, CP-114IS, CP-114UL,
+	C104H, C104HS,
+	CI-104J, CI-104JS,
+	CI-134, CI-134I, CI-134IS,
+	(C114HI, CT-114I, C104P),
+	POS-104UL,
+	CB-114,
+	CB-134I
+
+    - 8 ports multiport board
+	CP-118EL, CP-168EL,
+	CP-118U, CP-168U,
+	C168H/PCI,
+	C168H, C168HS,
+	(C168P),
+	CB-108
+
+   This driver and installation procedure have been developed upon Linux Kernel
+   2.4.x and 2.6.x. This driver supports Intel x86 hardware platform. In order
+   to maintain compatibility, this version has also been properly tested with
+   RedHat, Mandrake, Fedora and S.u.S.E Linux. However, if compatibility problem
+   occurs, please contact Moxa at support@moxa.com.tw.
+
+   In addition to device driver, useful utilities are also provided in this
+   version. They are:
+
+    - msdiag
+		 Diagnostic program for displaying installed Moxa
+                 Smartio/Industio boards.
+    - msmon
+		 Monitor program to observe data count and line status signals.
+    - msterm     A simple terminal program which is useful in testing serial
+	         ports.
+    - io-irq.exe
+		 Configuration program to setup ISA boards. Please note that
+                 this program can only be executed under DOS.
+
+   All the drivers and utilities are published in form of source code under
+   GNU General Public License in this version. Please refer to GNU General
+   Public License announcement in each source code file for more detail.
+
+   In Moxa's Web sites, you may always find latest driver at http://www.moxa.com/.
+
+   This version of driver can be installed as Loadable Module (Module driver)
+   or built-in into kernel (Static driver). You may refer to following
+   installation procedure for suitable one. Before you install the driver,
+   please refer to hardware installation procedure in the User's Manual.
+
+   We assume the user should be familiar with following documents.
+
+   - Serial-HOWTO
+   - Kernel-HOWTO
+
+2. System Requirement
+^^^^^^^^^^^^^^^^^^^^^
+
+   - Hardware platform: Intel x86 machine
+   - Kernel version: 2.4.x or 2.6.x
+   - gcc version 2.72 or later
+   - Maximum 4 boards can be installed in combination
+
+3. Installation
+^^^^^^^^^^^^^^^
+
+3.1 Hardware installation
+=========================
+
+   There are two types of buses, ISA and PCI, for Smartio/Industio
+   family multiport board.
+
+ISA board
+---------
+
+   You'll have to configure CAP address, I/O address, Interrupt Vector
+   as well as IRQ before installing this driver. Please refer to hardware
+   installation procedure in User's Manual before proceed any further.
+   Please make sure the JP1 is open after the ISA board is set properly.
+
+PCI/UPCI board
+--------------
+
+   You may need to adjust IRQ usage in BIOS to avoid from IRQ conflict
+   with other ISA devices. Please refer to hardware installation
+   procedure in User's Manual in advance.
+
+PCI IRQ Sharing
+---------------
+
+   Each port within the same multiport board shares the same IRQ. Up to
+   4 Moxa Smartio/Industio PCI Family multiport boards can be installed
+   together on one system and they can share the same IRQ.
+
+
+3.2 Driver files
+================
+
+   The driver file may be obtained from ftp, CD-ROM or floppy disk. The
+   first step, anyway, is to copy driver file "mxser.tgz" into specified
+   directory. e.g. /moxa. The execute commands as below::
+
+       # cd /
+       # mkdir moxa
+       # cd /moxa
+       # tar xvf /dev/fd0
+
+or::
+
+       # cd /
+       # mkdir moxa
+       # cd /moxa
+       # cp /mnt/cdrom/<driver directory>/mxser.tgz .
+       # tar xvfz mxser.tgz
+
+
+3.3 Device naming convention
+============================
+
+   You may find all the driver and utilities files in /moxa/mxser.
+   Following installation procedure depends on the model you'd like to
+   run the driver. If you prefer module driver, please refer to 3.4.
+   If static driver is required, please refer to 3.5.
+
+Dialin and callout port
+-----------------------
+
+   This driver remains traditional serial device properties. There are
+   two special file name for each serial port. One is dial-in port
+   which is named "ttyMxx". For callout port, the naming convention
+   is "cumxx".
+
+Device naming when more than 2 boards installed
+-----------------------------------------------
+
+   Naming convention for each Smartio/Industio multiport board is
+   pre-defined as below.
+
+   ============ ===============       ==============
+   Board Num.	 Dial-in Port	      Callout port
+   1st board	ttyM0  - ttyM7	      cum0  - cum7
+   2nd board	ttyM8  - ttyM15       cum8  - cum15
+   3rd board	ttyM16 - ttyM23       cum16 - cum23
+   4th board	ttyM24 - ttym31       cum24 - cum31
+   ============ ===============       ==============
+
+.. note::
+
+   Under Kernel 2.6 and upper, the cum Device is Obsolete. So use ttyM*
+   device instead.
+
+Board sequence
+--------------
+
+   This driver will activate ISA boards according to the parameter set
+   in the driver. After all specified ISA board activated, PCI board
+   will be installed in the system automatically driven.
+   Therefore the board number is sorted by the CAP address of ISA boards.
+   For PCI boards, their sequence will be after ISA boards and C168H/PCI
+   has higher priority than C104H/PCI boards.
+
+3.4 Module driver configuration
+===============================
+
+   Module driver is easiest way to install. If you prefer static driver
+   installation, please skip this paragraph.
+
+
+   ------------- Prepare to use the MOXA driver --------------------
+
+3.4.1 Create tty device with correct major number
+-------------------------------------------------
+
+   Before using MOXA driver, your system must have the tty devices
+   which are created with driver's major number. We offer one shell
+   script "msmknod" to simplify the procedure.
+   This step is only needed to be executed once. But you still
+   need to do this procedure when:
+
+   a. You change the driver's major number. Please refer the "3.7"
+      section.
+   b. Your total installed MOXA boards number is changed. Maybe you
+      add/delete one MOXA board.
+   c. You want to change the tty name. This needs to modify the
+      shell script "msmknod"
+
+   The procedure is::
+
+	 # cd /moxa/mxser/driver
+	 # ./msmknod
+
+   This shell script will require the major number for dial-in
+   device and callout device to create tty device. You also need
+   to specify the total installed MOXA board number. Default major
+   numbers for dial-in device and callout device are 30, 35. If
+   you need to change to other number, please refer section "3.7"
+   for more detailed procedure.
+   Msmknod will delete any special files occupying the same device
+   naming.
+
+3.4.2 Build the MOXA driver and utilities
+-----------------------------------------
+
+   Before using the MOXA driver and utilities, you need compile the
+   all the source code. This step is only need to be executed once.
+   But you still re-compile the source code if you modify the source
+   code. For example, if you change the driver's major number (see
+   "3.7" section), then you need to do this step again.
+
+   Find "Makefile" in /moxa/mxser, then run
+
+	 # make clean; make install
+
+   ..note::
+
+	 For Red Hat 9, Red Hat Enterprise Linux AS3/ES3/WS3 & Fedora Core1:
+	 # make clean; make installsp1
+
+	 For Red Hat Enterprise Linux AS4/ES4/WS4:
+	 # make clean; make installsp2
+
+   The driver files "mxser.o" and utilities will be properly compiled
+   and copied to system directories respectively.
+
+------------- Load MOXA driver--------------------
+
+3.4.3 Load the MOXA driver
+--------------------------
+
+   ::
+
+	 # modprobe mxser <argument>
+
+   will activate the module driver. You may run "lsmod" to check
+   if "mxser" is activated. If the MOXA board is ISA board, the
+   <argument> is needed. Please refer to section "3.4.5" for more
+   information.
+
+------------- Load MOXA driver on boot --------------------
+
+3.4.4 Load the mxser driver
+---------------------------
+
+
+   For the above description, you may manually execute
+   "modprobe mxser" to activate this driver and run
+   "rmmod mxser" to remove it.
+
+   However, it's better to have a boot time configuration to
+   eliminate manual operation. Boot time configuration can be
+   achieved by rc file. We offer one "rc.mxser" file to simplify
+   the procedure under "moxa/mxser/driver".
+
+   But if you use ISA board, please modify the "modprobe ..." command
+   to add the argument (see "3.4.5" section). After modifying the
+   rc.mxser, please try to execute "/moxa/mxser/driver/rc.mxser"
+   manually to make sure the modification is ok. If any error
+   encountered, please try to modify again. If the modification is
+   completed, follow the below step.
+
+   Run following command for setting rc files::
+
+	 # cd /moxa/mxser/driver
+	 # cp ./rc.mxser /etc/rc.d
+	 # cd /etc/rc.d
+
+   Check "rc.serial" is existed or not. If "rc.serial" doesn't exist,
+   create it by vi, run "chmod 755 rc.serial" to change the permission.
+
+   Add "/etc/rc.d/rc.mxser" in last line.
+
+   Reboot and check if moxa.o activated by "lsmod" command.
+
+3.4.5. specify CAP address
+--------------------------
+
+   If you'd like to drive Smartio/Industio ISA boards in the system,
+   you'll have to add parameter to specify CAP address of given
+   board while activating "mxser.o". The format for parameters are
+   as follows.::
+
+	   modprobe mxser ioaddr=0x???,0x???,0x???,0x???
+				  |  |  |    |
+				  |  |  |    +- 4th ISA board
+				  |  |  +------ 3rd ISA board
+				  |  +------------ 2nd ISA board
+				  +-------------------1st ISA board
+
+3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x
+================================================================
+
+    Note:
+          To use static driver, you must install the linux kernel
+          source package.
+
+3.5.1 Backup the built-in driver in the kernel
+----------------------------------------------
+
+    ::
+
+       # cd /usr/src/linux/drivers/char
+       # mv mxser.c mxser.c.old
+
+       For Red Hat 7.x user, you need to create link:
+       # cd /usr/src
+       # ln -s linux-2.4 linux
+
+3.5.2 Create link
+-----------------
+    ::
+
+	  # cd /usr/src/linux/drivers/char
+	  # ln -s /moxa/mxser/driver/mxser.c mxser.c
+
+3.5.3 Add CAP address list for ISA boards.
+------------------------------------------
+
+    For PCI boards user, please skip this step.
+
+    In module mode, the CAP address for ISA board is given by
+    parameter. In static driver configuration, you'll have to
+    assign it within driver's source code. If you will not
+    install any ISA boards, you may skip to next portion.
+    The instructions to modify driver source code are as
+    below.
+
+    a. run::
+
+	# cd /moxa/mxser/driver
+	# vi mxser.c
+
+    b. Find the array mxserBoardCAP[] as below::
+
+	  static int mxserBoardCAP[] = {0x00, 0x00, 0x00, 0x00};
+
+    c. Change the address within this array using vi. For
+       example, to driver 2 ISA boards with CAP address
+       0x280 and 0x180 as 1st and 2nd board. Just to change
+       the source code as follows::
+
+	  static int mxserBoardCAP[] = {0x280, 0x180, 0x00, 0x00};
+
+3.5.4 Setup kernel configuration
+--------------------------------
+
+    Configure the kernel::
+
+      # cd /usr/src/linux
+      # make menuconfig
+
+    You will go into a menu-driven system. Please select [Character
+    devices][Non-standard serial port support], enable the [Moxa
+    SmartIO support] driver with "[*]" for built-in (not "[M]"), then
+    select [Exit] to exit this program.
+
+3.5.5 Rebuild kernel
+--------------------
+
+    The following are for Linux kernel rebuilding, for your
+    reference only.
+
+    For appropriate details, please refer to the Linux document:
+
+        a. Run the following commands::
+
+	     cd /usr/src/linux
+	     make clean		     # take a few minutes
+	     make dep		     # take a few minutes
+	     make bzImage	     # take probably 10-20 minutes
+	     make install	     # copy boot image to correct position
+
+	f. Please make sure the boot kernel (vmlinuz) is in the
+	   correct position.
+	g. If you use 'lilo' utility, you should check /etc/lilo.conf
+	   'image' item specified the path which is the 'vmlinuz' path,
+	   or you will load wrong (or old) boot kernel image (vmlinuz).
+	   After checking /etc/lilo.conf, please run "lilo".
+
+	  Note that if the result of "make bzImage" is ERROR, then you have to
+	  go back to Linux configuration Setup. Type "make menuconfig" in
+          directory /usr/src/linux.
+
+
+3.5.6 Make tty device and special file
+--------------------------------------
+
+    ::
+       # cd /moxa/mxser/driver
+       # ./msmknod
+
+3.5.7 Make utility
+------------------
+
+    ::
+
+	  # cd /moxa/mxser/utility
+	  # make clean; make install
+
+3.5.8 Reboot
+------------
+
+
+
+3.6 Custom configuration
+========================
+
+    Although this driver already provides you default configuration, you
+    still can change the device name and major number. The instruction to
+    change these parameters are shown as below.
+
+a. Change Device name
+
+    If you'd like to use other device names instead of default naming
+    convention, all you have to do is to modify the internal code
+    within the shell script "msmknod". First, you have to open "msmknod"
+    by vi. Locate each line contains "ttyM" and "cum" and change them
+    to the device name you desired. "msmknod" creates the device names
+    you need next time executed.
+
+b. Change Major number
+
+    If major number 30 and 35 had been occupied, you may have to select
+    2 free major numbers for this driver. There are 3 steps to change
+    major numbers.
+
+3.6.1 Find free major numbers
+-----------------------------
+
+    In /proc/devices, you may find all the major numbers occupied
+    in the system. Please select 2 major numbers that are available.
+    e.g. 40, 45.
+
+3.6.2 Create special files
+--------------------------
+
+   Run /moxa/mxser/driver/msmknod to create special files with
+   specified major numbers.
+
+3.6.3 Modify driver with new major number
+-----------------------------------------
+
+   Run vi to open /moxa/mxser/driver/mxser.c. Locate the line
+   contains "MXSERMAJOR". Change the content as below::
+
+	  #define	  MXSERMAJOR		  40
+	  #define	  MXSERCUMAJOR		  45
+
+    3.6.4 Run "make clean; make install" in /moxa/mxser/driver.
+
+3.7 Verify driver installation
+==============================
+
+    You may refer to /var/log/messages to check the latest status
+    log reported by this driver whenever it's activated.
+
+4. Utilities
+^^^^^^^^^^^^
+
+   There are 3 utilities contained in this driver. They are msdiag, msmon and
+   msterm. These 3 utilities are released in form of source code. They should
+   be compiled into executable file and copied into /usr/bin.
+
+   Before using these utilities, please load driver (refer 3.4 & 3.5) and
+   make sure you had run the "msmknod" utility.
+
+msdiag - Diagnostic
+===================
+
+   This utility provides the function to display what Moxa Smartio/Industio
+   board found by driver in the system.
+
+msmon - Port Monitoring
+=======================
+
+   This utility gives the user a quick view about all the MOXA ports'
+   activities. One can easily learn each port's total received/transmitted
+   (Rx/Tx) character count since the time when the monitoring is started.
+
+   Rx/Tx throughputs per second are also reported in interval basis (e.g.
+   the last 5 seconds) and in average basis (since the time the monitoring
+   is started). You can reset all ports' count by <HOME> key. <+> <->
+   (plus/minus) keys to change the displaying time interval. Press <ENTER>
+   on the port, that cursor stay, to view the port's communication
+   parameters, signal status, and input/output queue.
+
+msterm - Terminal Emulation
+===========================
+
+   This utility provides data sending and receiving ability of all tty ports,
+   especially for MOXA ports. It is quite useful for testing simple
+   application, for example, sending AT command to a modem connected to the
+   port or used as a terminal for login purpose. Note that this is only a
+   dumb terminal emulation without handling full screen operation.
+
+5. Setserial
+^^^^^^^^^^^^
+
+   Supported Setserial parameters are listed as below.
+
+   ============== =========================================================
+   uart		  set UART type(16450-->disable FIFO, 16550A-->enable FIFO)
+   close_delay	  set the amount of time(in 1/100 of a second) that DTR
+		  should be kept low while being closed.
+   closing_wait   set the amount of time(in 1/100 of a second) that the
+		  serial port should wait for data to be drained while
+		  being closed, before the receiver is disable.
+   spd_hi	  Use  57.6kb  when  the application requests 38.4kb.
+   spd_vhi	  Use  115.2kb	when  the application requests 38.4kb.
+   spd_shi	  Use  230.4kb	when  the application requests 38.4kb.
+   spd_warp	  Use  460.8kb	when  the application requests 38.4kb.
+   spd_normal	  Use  38.4kb  when  the application requests 38.4kb.
+   spd_cust	  Use  the custom divisor to set the speed when  the
+		  application requests 38.4kb.
+   divisor	  This option set the custom division.
+   baud_base	  This option set the base baud rate.
+   ============== =========================================================
+
+6. Troubleshooting
+^^^^^^^^^^^^^^^^^^
+
+   The boot time error messages and solutions are stated as clearly as
+   possible. If all the possible solutions fail, please contact our technical
+   support team to get more help.
+
+
+   Error msg:
+	      More than 4 Moxa Smartio/Industio family boards found. Fifth board
+              and after are ignored.
+
+   Solution:
+   To avoid this problem, please unplug fifth and after board, because Moxa
+   driver supports up to 4 boards.
+
+   Error msg:
+	      Request_irq fail, IRQ(?) may be conflict with another device.
+
+   Solution:
+   Other PCI or ISA devices occupy the assigned IRQ. If you are not sure
+   which device causes the situation, please check /proc/interrupts to find
+   free IRQ and simply change another free IRQ for Moxa board.
+
+   Error msg:
+	      Board #: C1xx Series(CAP=xxx) interrupt number invalid.
+
+   Solution:
+   Each port within the same multiport board shares the same IRQ. Please set
+   one IRQ (IRQ doesn't equal to zero) for one Moxa board.
+
+   Error msg:
+	      No interrupt vector be set for Moxa ISA board(CAP=xxx).
+
+   Solution:
+   Moxa ISA board needs an interrupt vector.Please refer to user's manual
+   "Hardware Installation" chapter to set interrupt vector.
+
+   Error msg:
+              Couldn't install MOXA Smartio/Industio family driver!
+
+   Solution:
+   Load Moxa driver fail, the major number may conflict with other devices.
+   Please refer to previous section 3.7 to change a free major number for
+   Moxa driver.
+
+   Error msg:
+              Couldn't install MOXA Smartio/Industio family callout driver!
+
+   Solution:
+   Load Moxa callout driver fail, the callout device major number may
+   conflict with other devices. Please refer to previous section 3.7 to
+   change a free callout device major number for Moxa driver.
diff --git a/Documentation/driver-api/serial/n_gsm.rst b/Documentation/driver-api/serial/n_gsm.rst
new file mode 100644
index 000000000000..f3ad9fd26408
--- /dev/null
+++ b/Documentation/driver-api/serial/n_gsm.rst
@@ -0,0 +1,103 @@
+==============================
+GSM 0710 tty multiplexor HOWTO
+==============================
+
+This line discipline implements the GSM 07.10 multiplexing protocol
+detailed in the following 3GPP document:
+
+	http://www.3gpp.org/ftp/Specs/archive/07_series/07.10/0710-720.zip
+
+This document give some hints on how to use this driver with GPRS and 3G
+modems connected to a physical serial port.
+
+How to use it
+-------------
+1. initialize the modem in 0710 mux mode (usually AT+CMUX= command) through
+   its serial port. Depending on the modem used, you can pass more or less
+   parameters to this command,
+2. switch the serial line to using the n_gsm line discipline by using
+   TIOCSETD ioctl,
+3. configure the mux using GSMIOC_GETCONF / GSMIOC_SETCONF ioctl,
+
+Major parts of the initialization program :
+(a good starting point is util-linux-ng/sys-utils/ldattach.c)::
+
+  #include <linux/gsmmux.h>
+  #define N_GSM0710	21	/* GSM 0710 Mux */
+  #define DEFAULT_SPEED	B115200
+  #define SERIAL_PORT	/dev/ttyS0
+
+	int ldisc = N_GSM0710;
+	struct gsm_config c;
+	struct termios configuration;
+
+	/* open the serial port connected to the modem */
+	fd = open(SERIAL_PORT, O_RDWR | O_NOCTTY | O_NDELAY);
+
+	/* configure the serial port : speed, flow control ... */
+
+	/* send the AT commands to switch the modem to CMUX mode
+	   and check that it's successful (should return OK) */
+	write(fd, "AT+CMUX=0\r", 10);
+
+	/* experience showed that some modems need some time before
+	   being able to answer to the first MUX packet so a delay
+	   may be needed here in some case */
+	sleep(3);
+
+	/* use n_gsm line discipline */
+	ioctl(fd, TIOCSETD, &ldisc);
+
+	/* get n_gsm configuration */
+	ioctl(fd, GSMIOC_GETCONF, &c);
+	/* we are initiator and need encoding 0 (basic) */
+	c.initiator = 1;
+	c.encapsulation = 0;
+	/* our modem defaults to a maximum size of 127 bytes */
+	c.mru = 127;
+	c.mtu = 127;
+	/* set the new configuration */
+	ioctl(fd, GSMIOC_SETCONF, &c);
+
+	/* and wait for ever to keep the line discipline enabled */
+	daemon(0,0);
+	pause();
+
+4. create the devices corresponding to the "virtual" serial ports (take care,
+   each modem has its configuration and some DLC have dedicated functions,
+   for example GPS), starting with minor 1 (DLC0 is reserved for the management
+   of the mux)::
+
+     MAJOR=`cat /proc/devices |grep gsmtty | awk '{print $1}`
+     for i in `seq 1 4`; do
+	mknod /dev/ttygsm$i c $MAJOR $i
+     done
+
+5. use these devices as plain serial ports.
+
+   for example, it's possible:
+
+   - and to use gnokii to send / receive SMS on ttygsm1
+   - to use ppp to establish a datalink on ttygsm2
+
+6. first close all virtual ports before closing the physical port.
+
+   Note that after closing the physical port the modem is still in multiplexing
+   mode. This may prevent a successful re-opening of the port later. To avoid
+   this situation either reset the modem if your hardware allows that or send
+   a disconnect command frame manually before initializing the multiplexing mode
+   for the second time. The byte sequence for the disconnect command frame is::
+
+      0xf9, 0x03, 0xef, 0x03, 0xc3, 0x16, 0xf9.
+
+Additional Documentation
+------------------------
+More practical details on the protocol and how it's supported by industrial
+modems can be found in the following documents :
+
+- http://www.telit.com/module/infopool/download.php?id=616
+- http://www.u-blox.com/images/downloads/Product_Docs/LEON-G100-G200-MuxImplementation_ApplicationNote_%28GSM%20G1-CS-10002%29.pdf
+- http://www.sierrawireless.com/Support/Downloads/AirPrime/WMP_Series/~/media/Support_Downloads/AirPrime/Application_notes/CMUX_Feature_Application_Note-Rev004.ashx
+- http://wm.sim.com/sim/News/photo/2010721161442.pdf
+
+11-03-08 - Eric Bénard - <eric@eukrea.com>
diff --git a/Documentation/driver-api/serial/rocket.rst b/Documentation/driver-api/serial/rocket.rst
new file mode 100644
index 000000000000..23761eae4282
--- /dev/null
+++ b/Documentation/driver-api/serial/rocket.rst
@@ -0,0 +1,185 @@
+================================================
+Comtrol(tm) RocketPort(R)/RocketModem(TM) Series
+================================================
+
+Device Driver for the Linux Operating System
+============================================
+
+Product overview
+----------------
+
+This driver provides a loadable kernel driver for the Comtrol RocketPort
+and RocketModem PCI boards. These boards provide, 2, 4, 8, 16, or 32
+high-speed serial ports or modems.  This driver supports up to a combination
+of four RocketPort or RocketModems boards in one machine simultaneously.
+This file assumes that you are using the RocketPort driver which is
+integrated into the kernel sources.
+
+The driver can also be installed as an external module using the usual
+"make;make install" routine.  This external module driver, obtainable
+from the Comtrol website listed below, is useful for updating the driver
+or installing it into kernels which do not have the driver configured
+into them.  Installations instructions for the external module
+are in the included README and HW_INSTALL files.
+
+RocketPort ISA and RocketModem II PCI boards currently are only supported by
+this driver in module form.
+
+The RocketPort ISA board requires I/O ports to be configured by the DIP
+switches on the board.  See the section "ISA Rocketport Boards" below for
+information on how to set the DIP switches.
+
+You pass the I/O port to the driver using the following module parameters:
+
+board1:
+	I/O port for the first ISA board
+board2:
+	I/O port for the second ISA board
+board3:
+	I/O port for the third ISA board
+board4:
+	I/O port for the fourth ISA board
+
+There is a set of utilities and scripts provided with the external driver
+(downloadable from http://www.comtrol.com) that ease the configuration and
+setup of the ISA cards.
+
+The RocketModem II PCI boards require firmware to be loaded into the card
+before it will function.  The driver has only been tested as a module for this
+board.
+
+Installation Procedures
+-----------------------
+
+RocketPort/RocketModem PCI cards require no driver configuration, they are
+automatically detected and configured.
+
+The RocketPort driver can be installed as a module (recommended) or built
+into the kernel. This is selected, as for other drivers, through the `make config`
+command from the root of the Linux source tree during the kernel build process.
+
+The RocketPort/RocketModem serial ports installed by this driver are assigned
+device major number 46, and will be named /dev/ttyRx, where x is the port number
+starting at zero (ex. /dev/ttyR0, /devttyR1, ...).  If you have multiple cards
+installed in the system, the mapping of port names to serial ports is displayed
+in the system log at /var/log/messages.
+
+If installed as a module, the module must be loaded.  This can be done
+manually by entering "modprobe rocket".  To have the module loaded automatically
+upon system boot, edit a `/etc/modprobe.d/*.conf` file and add the line
+"alias char-major-46 rocket".
+
+In order to use the ports, their device names (nodes) must be created with mknod.
+This is only required once, the system will retain the names once created.  To
+create the RocketPort/RocketModem device names, use the command
+"mknod /dev/ttyRx c 46 x" where x is the port number starting at zero.
+
+For example::
+
+	> mknod /dev/ttyR0 c 46 0
+	> mknod /dev/ttyR1 c 46 1
+	> mknod /dev/ttyR2 c 46 2
+
+The Linux script MAKEDEV will create the first 16 ttyRx device names (nodes)
+for you::
+
+	>/dev/MAKEDEV ttyR
+
+ISA Rocketport Boards
+---------------------
+
+You must assign and configure the I/O addresses used by the ISA Rocketport
+card before installing and using it.  This is done by setting a set of DIP
+switches on the Rocketport board.
+
+
+Setting the I/O address
+-----------------------
+
+Before installing RocketPort(R) or RocketPort RA boards, you must find
+a range of I/O addresses for it to use. The first RocketPort card
+requires a 68-byte contiguous block of I/O addresses, starting at one
+of the following: 0x100h, 0x140h, 0x180h, 0x200h, 0x240h, 0x280h,
+0x300h, 0x340h, 0x380h.  This I/O address must be reflected in the DIP
+switches of *all* of the Rocketport cards.
+
+The second, third, and fourth RocketPort cards require a 64-byte
+contiguous block of I/O addresses, starting at one of the following
+I/O addresses: 0x100h, 0x140h, 0x180h, 0x1C0h, 0x200h, 0x240h, 0x280h,
+0x2C0h, 0x300h, 0x340h, 0x380h, 0x3C0h.  The I/O address used by the
+second, third, and fourth Rocketport cards (if present) are set via
+software control.  The DIP switch settings for the I/O address must be
+set to the value of the first Rocketport cards.
+
+In order to distinguish each of the card from the others, each card
+must have a unique board ID set on the dip switches.  The first
+Rocketport board must be set with the DIP switches corresponding to
+the first board, the second board must be set with the DIP switches
+corresponding to the second board, etc.  IMPORTANT: The board ID is
+the only place where the DIP switch settings should differ between the
+various Rocketport boards in a system.
+
+The I/O address range used by any of the RocketPort cards must not
+conflict with any other cards in the system, including other
+RocketPort cards.  Below, you will find a list of commonly used I/O
+address ranges which may be in use by other devices in your system.
+On a Linux system, "cat /proc/ioports" will also be helpful in
+identifying what I/O addresses are being used by devices on your
+system.
+
+Remember, the FIRST RocketPort uses 68 I/O addresses.  So, if you set it
+for 0x100, it will occupy 0x100 to 0x143.  This would mean that you
+CAN NOT set the second, third or fourth board for address 0x140 since
+the first 4 bytes of that range are used by the first board.  You would
+need to set the second, third, or fourth board to one of the next available
+blocks such as 0x180.
+
+RocketPort and RocketPort RA SW1 Settings::
+
+            +-------------------------------+
+            | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 |
+            +-------+-------+---------------+
+            | Unused| Card  | I/O Port Block|
+            +-------------------------------+
+
+  DIP Switches                             DIP Switches
+  7    8                                   6    5
+  ===================                      ===================
+  On   On   UNUSED, MUST BE ON.            On   On   First Card    <==== Default
+                                           On   Off  Second Card
+                                           Off  On   Third Card
+                                           Off  Off  Fourth Card
+
+  DIP Switches         I/O Address Range
+  4    3    2    1     Used by the First Card
+  =====================================
+  On   Off  On   Off   100-143
+  On   Off  Off  On    140-183
+  On   Off  Off  Off   180-1C3       <==== Default
+  Off  On   On   Off   200-243
+  Off  On   Off  On    240-283
+  Off  On   Off  Off   280-2C3
+  Off  Off  On   Off   300-343
+  Off  Off  Off  On    340-383
+  Off  Off  Off  Off   380-3C3
+
+Reporting Bugs
+--------------
+
+For technical support, please provide the following
+information: Driver version, kernel release, distribution of
+kernel, and type of board you are using. Error messages and log
+printouts port configuration details are especially helpful.
+
+USA:
+    :Phone: (612) 494-4100
+    :FAX: (612) 494-4199
+    :email: support@comtrol.com
+
+Comtrol Europe:
+    :Phone: +44 (0) 1 869 323-220
+    :FAX: +44 (0) 1 869 323-211
+    :email: support@comtrol.co.uk
+
+Web:	http://www.comtrol.com
+FTP:	ftp.comtrol.com
diff --git a/Documentation/driver-api/serial/serial-iso7816.rst b/Documentation/driver-api/serial/serial-iso7816.rst
new file mode 100644
index 000000000000..d990143de0c6
--- /dev/null
+++ b/Documentation/driver-api/serial/serial-iso7816.rst
@@ -0,0 +1,90 @@
+=============================
+ISO7816 Serial Communications
+=============================
+
+1. Introduction
+===============
+
+  ISO/IEC7816 is a series of standards specifying integrated circuit cards (ICC)
+  also known as smart cards.
+
+2. Hardware-related considerations
+==================================
+
+  Some CPUs/UARTs (e.g., Microchip AT91) contain a built-in mode capable of
+  handling communication with a smart card.
+
+  For these microcontrollers, the Linux driver should be made capable of
+  working in both modes, and proper ioctls (see later) should be made
+  available at user-level to allow switching from one mode to the other, and
+  vice versa.
+
+3. Data Structures Already Available in the Kernel
+==================================================
+
+  The Linux kernel provides the serial_iso7816 structure (see [1]) to handle
+  ISO7816 communications. This data structure is used to set and configure
+  ISO7816 parameters in ioctls.
+
+  Any driver for devices capable of working both as RS232 and ISO7816 should
+  implement the iso7816_config callback in the uart_port structure. The
+  serial_core calls iso7816_config to do the device specific part in response
+  to TIOCGISO7816 and TIOCSISO7816 ioctls (see below). The iso7816_config
+  callback receives a pointer to struct serial_iso7816.
+
+4. Usage from user-level
+========================
+
+  From user-level, ISO7816 configuration can be get/set using the previous
+  ioctls. For instance, to set ISO7816 you can use the following code::
+
+	#include <linux/serial.h>
+
+	/* Include definition for ISO7816 ioctls: TIOCSISO7816 and TIOCGISO7816 */
+	#include <sys/ioctl.h>
+
+	/* Open your specific device (e.g., /dev/mydevice): */
+	int fd = open ("/dev/mydevice", O_RDWR);
+	if (fd < 0) {
+		/* Error handling. See errno. */
+	}
+
+	struct serial_iso7816 iso7816conf;
+
+	/* Reserved fields as to be zeroed */
+	memset(&iso7816conf, 0, sizeof(iso7816conf));
+
+	/* Enable ISO7816 mode: */
+	iso7816conf.flags |= SER_ISO7816_ENABLED;
+
+	/* Select the protocol: */
+	/* T=0 */
+	iso7816conf.flags |= SER_ISO7816_T(0);
+	/* or T=1 */
+	iso7816conf.flags |= SER_ISO7816_T(1);
+
+	/* Set the guard time: */
+	iso7816conf.tg = 2;
+
+	/* Set the clock frequency*/
+	iso7816conf.clk = 3571200;
+
+	/* Set transmission factors: */
+	iso7816conf.sc_fi = 372;
+	iso7816conf.sc_di = 1;
+
+	if (ioctl(fd_usart, TIOCSISO7816, &iso7816conf) < 0) {
+		/* Error handling. See errno. */
+	}
+
+	/* Use read() and write() syscalls here... */
+
+	/* Close the device when finished: */
+	if (close (fd) < 0) {
+		/* Error handling. See errno. */
+	}
+
+5. References
+=============
+
+ [1]    include/uapi/linux/serial.h
diff --git a/Documentation/driver-api/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst
new file mode 100644
index 000000000000..6bc824f948f9
--- /dev/null
+++ b/Documentation/driver-api/serial/serial-rs485.rst
@@ -0,0 +1,103 @@
+===========================
+RS485 Serial Communications
+===========================
+
+1. Introduction
+===============
+
+   EIA-485, also known as TIA/EIA-485 or RS-485, is a standard defining the
+   electrical characteristics of drivers and receivers for use in balanced
+   digital multipoint systems.
+   This standard is widely used for communications in industrial automation
+   because it can be used effectively over long distances and in electrically
+   noisy environments.
+
+2. Hardware-related Considerations
+==================================
+
+   Some CPUs/UARTs (e.g., Atmel AT91 or 16C950 UART) contain a built-in
+   half-duplex mode capable of automatically controlling line direction by
+   toggling RTS or DTR signals. That can be used to control external
+   half-duplex hardware like an RS485 transceiver or any RS232-connected
+   half-duplex devices like some modems.
+
+   For these microcontrollers, the Linux driver should be made capable of
+   working in both modes, and proper ioctls (see later) should be made
+   available at user-level to allow switching from one mode to the other, and
+   vice versa.
+
+3. Data Structures Already Available in the Kernel
+==================================================
+
+   The Linux kernel provides the serial_rs485 structure (see [1]) to handle
+   RS485 communications. This data structure is used to set and configure RS485
+   parameters in the platform data and in ioctls.
+
+   The device tree can also provide RS485 boot time parameters (see [2]
+   for bindings). The driver is in charge of filling this data structure from
+   the values given by the device tree.
+
+   Any driver for devices capable of working both as RS232 and RS485 should
+   implement the rs485_config callback in the uart_port structure. The
+   serial_core calls rs485_config to do the device specific part in response
+   to TIOCSRS485 and TIOCGRS485 ioctls (see below). The rs485_config callback
+   receives a pointer to struct serial_rs485.
+
+4. Usage from user-level
+========================
+
+   From user-level, RS485 configuration can be get/set using the previous
+   ioctls. For instance, to set RS485 you can use the following code::
+
+	#include <linux/serial.h>
+
+	/* Include definition for RS485 ioctls: TIOCGRS485 and TIOCSRS485 */
+	#include <sys/ioctl.h>
+
+	/* Open your specific device (e.g., /dev/mydevice): */
+	int fd = open ("/dev/mydevice", O_RDWR);
+	if (fd < 0) {
+		/* Error handling. See errno. */
+	}
+
+	struct serial_rs485 rs485conf;
+
+	/* Enable RS485 mode: */
+	rs485conf.flags |= SER_RS485_ENABLED;
+
+	/* Set logical level for RTS pin equal to 1 when sending: */
+	rs485conf.flags |= SER_RS485_RTS_ON_SEND;
+	/* or, set logical level for RTS pin equal to 0 when sending: */
+	rs485conf.flags &= ~(SER_RS485_RTS_ON_SEND);
+
+	/* Set logical level for RTS pin equal to 1 after sending: */
+	rs485conf.flags |= SER_RS485_RTS_AFTER_SEND;
+	/* or, set logical level for RTS pin equal to 0 after sending: */
+	rs485conf.flags &= ~(SER_RS485_RTS_AFTER_SEND);
+
+	/* Set rts delay before send, if needed: */
+	rs485conf.delay_rts_before_send = ...;
+
+	/* Set rts delay after send, if needed: */
+	rs485conf.delay_rts_after_send = ...;
+
+	/* Set this flag if you want to receive data even while sending data */
+	rs485conf.flags |= SER_RS485_RX_DURING_TX;
+
+	if (ioctl (fd, TIOCSRS485, &rs485conf) < 0) {
+		/* Error handling. See errno. */
+	}
+
+	/* Use read() and write() syscalls here... */
+
+	/* Close the device when finished: */
+	if (close (fd) < 0) {
+		/* Error handling. See errno. */
+	}
+
+5. References
+=============
+
+ [1]	include/uapi/linux/serial.h
+
+ [2]	Documentation/devicetree/bindings/serial/rs485.txt
diff --git a/Documentation/driver-api/serial/tty.rst b/Documentation/driver-api/serial/tty.rst
new file mode 100644
index 000000000000..dd972caacf3e
--- /dev/null
+++ b/Documentation/driver-api/serial/tty.rst
@@ -0,0 +1,328 @@
+=================
+The Lockronomicon
+=================
+
+Your guide to the ancient and twisted locking policies of the tty layer and
+the warped logic behind them. Beware all ye who read on.
+
+
+Line Discipline
+---------------
+
+Line disciplines are registered with tty_register_ldisc() passing the
+discipline number and the ldisc structure. At the point of registration the
+discipline must be ready to use and it is possible it will get used before
+the call returns success. If the call returns an error then it won't get
+called. Do not re-use ldisc numbers as they are part of the userspace ABI
+and writing over an existing ldisc will cause demons to eat your computer.
+After the return the ldisc data has been copied so you may free your own
+copy of the structure. You must not re-register over the top of the line
+discipline even with the same data or your computer again will be eaten by
+demons.
+
+In order to remove a line discipline call tty_unregister_ldisc().
+In ancient times this always worked. In modern times the function will
+return -EBUSY if the ldisc is currently in use. Since the ldisc referencing
+code manages the module counts this should not usually be a concern.
+
+Heed this warning: the reference count field of the registered copies of the
+tty_ldisc structure in the ldisc table counts the number of lines using this
+discipline. The reference count of the tty_ldisc structure within a tty
+counts the number of active users of the ldisc at this instant. In effect it
+counts the number of threads of execution within an ldisc method (plus those
+about to enter and exit although this detail matters not).
+
+Line Discipline Methods
+-----------------------
+
+TTY side interfaces
+^^^^^^^^^^^^^^^^^^^
+
+======================= =======================================================
+open()			Called when the line discipline is attached to
+			the terminal. No other call into the line
+			discipline for this tty will occur until it
+			completes successfully. Should initialize any
+			state needed by the ldisc, and set receive_room
+			in the tty_struct to the maximum amount of data
+			the line discipline is willing to accept from the
+			driver with a single call to receive_buf().
+			Returning an error will prevent the ldisc from
+			being attached. Can sleep.
+
+close()			This is called on a terminal when the line
+			discipline is being unplugged. At the point of
+			execution no further users will enter the
+			ldisc code for this tty. Can sleep.
+
+hangup()		Called when the tty line is hung up.
+			The line discipline should cease I/O to the tty.
+			No further calls into the ldisc code will occur.
+			The return value is ignored. Can sleep.
+
+read()			(optional) A process requests reading data from
+			the line. Multiple read calls may occur in parallel
+			and the ldisc must deal with serialization issues.
+			If not defined, the process will receive an EIO
+			error. May sleep.
+
+write()			(optional) A process requests writing data to the
+			line. Multiple write calls are serialized by the
+			tty layer for the ldisc. If not defined, the
+			process will receive an EIO error. May sleep.
+
+flush_buffer()		(optional) May be called at any point between
+			open and close, and instructs the line discipline
+			to empty its input buffer.
+
+set_termios()		(optional) Called on termios structure changes.
+			The caller passes the old termios data and the
+			current data is in the tty. Called under the
+			termios semaphore so allowed to sleep. Serialized
+			against itself only.
+
+poll()			(optional) Check the status for the poll/select
+			calls. Multiple poll calls may occur in parallel.
+			May sleep.
+
+ioctl()			(optional) Called when an ioctl is handed to the
+			tty layer that might be for the ldisc. Multiple
+			ioctl calls may occur in parallel. May sleep.
+
+compat_ioctl()		(optional) Called when a 32 bit ioctl is handed
+			to the tty layer that might be for the ldisc.
+			Multiple ioctl calls may occur in parallel.
+			May sleep.
+======================= =======================================================
+
+Driver Side Interfaces
+^^^^^^^^^^^^^^^^^^^^^^
+
+======================= =======================================================
+receive_buf()		(optional) Called by the low-level driver to hand
+			a buffer of received bytes to the ldisc for
+			processing. The number of bytes is guaranteed not
+			to exceed the current value of tty->receive_room.
+			All bytes must be processed.
+
+receive_buf2()		(optional) Called by the low-level driver to hand
+			a buffer of received bytes to the ldisc for
+			processing. Returns the number of bytes processed.
+
+			If both receive_buf() and receive_buf2() are
+			defined, receive_buf2() should be preferred.
+
+write_wakeup()		May be called at any point between open and close.
+			The TTY_DO_WRITE_WAKEUP flag indicates if a call
+			is needed but always races versus calls. Thus the
+			ldisc must be careful about setting order and to
+			handle unexpected calls. Must not sleep.
+
+			The driver is forbidden from calling this directly
+			from the ->write call from the ldisc as the ldisc
+			is permitted to call the driver write method from
+			this function. In such a situation defer it.
+
+dcd_change()		Report to the tty line the current DCD pin status
+			changes and the relative timestamp. The timestamp
+			cannot be NULL.
+======================= =======================================================
+
+
+Driver Access
+^^^^^^^^^^^^^
+
+Line discipline methods can call the following methods of the underlying
+hardware driver through the function pointers within the tty->driver
+structure:
+
+======================= =======================================================
+write()			Write a block of characters to the tty device.
+			Returns the number of characters accepted. The
+			character buffer passed to this method is already
+			in kernel space.
+
+put_char()		Queues a character for writing to the tty device.
+			If there is no room in the queue, the character is
+			ignored.
+
+flush_chars()		(Optional) If defined, must be called after
+			queueing characters with put_char() in order to
+			start transmission.
+
+write_room()		Returns the numbers of characters the tty driver
+			will accept for queueing to be written.
+
+ioctl()			Invoke device specific ioctl.
+			Expects data pointers to refer to userspace.
+			Returns ENOIOCTLCMD for unrecognized ioctl numbers.
+
+set_termios()		Notify the tty driver that the device's termios
+			settings have changed. New settings are in
+			tty->termios. Previous settings should be passed in
+			the "old" argument.
+
+			The API is defined such that the driver should return
+			the actual modes selected. This means that the
+			driver function is responsible for modifying any
+			bits in the request it cannot fulfill to indicate
+			the actual modes being used. A device with no
+			hardware capability for change (e.g. a USB dongle or
+			virtual port) can provide NULL for this method.
+
+throttle()		Notify the tty driver that input buffers for the
+			line discipline are close to full, and it should
+			somehow signal that no more characters should be
+			sent to the tty.
+
+unthrottle()		Notify the tty driver that characters can now be
+			sent to the tty without fear of overrunning the
+			input buffers of the line disciplines.
+
+stop()			Ask the tty driver to stop outputting characters
+			to the tty device.
+
+start()			Ask the tty driver to resume sending characters
+			to the tty device.
+
+hangup()		Ask the tty driver to hang up the tty device.
+
+break_ctl()		(Optional) Ask the tty driver to turn on or off
+			BREAK status on the RS-232 port.  If state is -1,
+			then the BREAK status should be turned on; if
+			state is 0, then BREAK should be turned off.
+			If this routine is not implemented, use ioctls
+			TIOCSBRK / TIOCCBRK instead.
+
+wait_until_sent()	Waits until the device has written out all of the
+			characters in its transmitter FIFO.
+
+send_xchar()		Send a high-priority XON/XOFF character to the device.
+======================= =======================================================
+
+
+Flags
+^^^^^
+
+Line discipline methods have access to tty->flags field containing the
+following interesting flags:
+
+======================= =======================================================
+TTY_THROTTLED		Driver input is throttled. The ldisc should call
+			tty->driver->unthrottle() in order to resume
+			reception when it is ready to process more data.
+
+TTY_DO_WRITE_WAKEUP	If set, causes the driver to call the ldisc's
+			write_wakeup() method in order to resume
+			transmission when it can accept more data
+			to transmit.
+
+TTY_IO_ERROR		If set, causes all subsequent userspace read/write
+			calls on the tty to fail, returning -EIO.
+
+TTY_OTHER_CLOSED	Device is a pty and the other side has closed.
+
+TTY_NO_WRITE_SPLIT	Prevent driver from splitting up writes into
+			smaller chunks.
+======================= =======================================================
+
+
+Locking
+^^^^^^^
+
+Callers to the line discipline functions from the tty layer are required to
+take line discipline locks. The same is true of calls from the driver side
+but not yet enforced.
+
+Three calls are now provided::
+
+	ldisc = tty_ldisc_ref(tty);
+
+takes a handle to the line discipline in the tty and returns it. If no ldisc
+is currently attached or the ldisc is being closed and re-opened at this
+point then NULL is returned. While this handle is held the ldisc will not
+change or go away::
+
+	tty_ldisc_deref(ldisc)
+
+Returns the ldisc reference and allows the ldisc to be closed. Returning the
+reference takes away your right to call the ldisc functions until you take
+a new reference::
+
+	ldisc = tty_ldisc_ref_wait(tty);
+
+Performs the same function as tty_ldisc_ref except that it will wait for an
+ldisc change to complete and then return a reference to the new ldisc.
+
+While these functions are slightly slower than the old code they should have
+minimal impact as most receive logic uses the flip buffers and they only
+need to take a reference when they push bits up through the driver.
+
+A caution: The ldisc->open(), ldisc->close() and driver->set_ldisc
+functions are called with the ldisc unavailable. Thus tty_ldisc_ref will
+fail in this situation if used within these functions. Ldisc and driver
+code calling its own functions must be careful in this case.
+
+
+Driver Interface
+----------------
+
+======================= =======================================================
+open()			Called when a device is opened. May sleep
+
+close()			Called when a device is closed. At the point of
+			return from this call the driver must make no
+			further ldisc calls of any kind. May sleep
+
+write()			Called to write bytes to the device. May not
+			sleep. May occur in parallel in special cases.
+			Because this includes panic paths drivers generally
+			shouldn't try and do clever locking here.
+
+put_char()		Stuff a single character onto the queue. The
+			driver is guaranteed following up calls to
+			flush_chars.
+
+flush_chars()		Ask the kernel to write put_char queue
+
+write_room()		Return the number of characters that can be stuffed
+			into the port buffers without overflow (or less).
+			The ldisc is responsible for being intelligent
+			about multi-threading of write_room/write calls
+
+ioctl()			Called when an ioctl may be for the driver
+
+set_termios()		Called on termios change, serialized against
+			itself by a semaphore. May sleep.
+
+set_ldisc()		Notifier for discipline change. At the point this
+			is done the discipline is not yet usable. Can now
+			sleep (I think)
+
+throttle()		Called by the ldisc to ask the driver to do flow
+			control.  Serialization including with unthrottle
+			is the job of the ldisc layer.
+
+unthrottle()		Called by the ldisc to ask the driver to stop flow
+			control.
+
+stop()			Ldisc notifier to the driver to stop output. As with
+			throttle the serializations with start() are down
+			to the ldisc layer.
+
+start()			Ldisc notifier to the driver to start output.
+
+hangup()		Ask the tty driver to cause a hangup initiated
+			from the host side. [Can sleep ??]
+
+break_ctl()		Send RS232 break. Can sleep. Can get called in
+			parallel, driver must serialize (for now), and
+			with write calls.
+
+wait_until_sent()	Wait for characters to exit the hardware queue
+			of the driver. Can sleep
+
+send_xchar()	  	Send XON/XOFF and if possible jump the queue with
+			it in order to get fast flow control responses.
+			Cannot sleep ??
+======================= =======================================================
diff --git a/Documentation/serial/cyclades_z.rst b/Documentation/serial/cyclades_z.rst
deleted file mode 100644
index 532ff67e2f1c..000000000000
--- a/Documentation/serial/cyclades_z.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-================
-Cyclades-Z notes
-================
-
-The Cyclades-Z must have firmware loaded onto the card before it will
-operate.  This operation should be performed during system startup,
-
-The firmware, loader program and the latest device driver code are
-available from Cyclades at
-
-    ftp://ftp.cyclades.com/pub/cyclades/cyclades-z/linux/
diff --git a/Documentation/serial/driver.rst b/Documentation/serial/driver.rst
deleted file mode 100644
index 4537119bf624..000000000000
--- a/Documentation/serial/driver.rst
+++ /dev/null
@@ -1,549 +0,0 @@
-====================
-Low Level Serial API
-====================
-
-
-This document is meant as a brief overview of some aspects of the new serial
-driver.  It is not complete, any questions you have should be directed to
-<rmk@arm.linux.org.uk>
-
-The reference implementation is contained within amba-pl011.c.
-
-
-
-Low Level Serial Hardware Driver
---------------------------------
-
-The low level serial hardware driver is responsible for supplying port
-information (defined by uart_port) and a set of control methods (defined
-by uart_ops) to the core serial driver.  The low level driver is also
-responsible for handling interrupts for the port, and providing any
-console support.
-
-
-Console Support
----------------
-
-The serial core provides a few helper functions.  This includes identifing
-the correct port structure (via uart_get_console) and decoding command line
-arguments (uart_parse_options).
-
-There is also a helper function (uart_console_write) which performs a
-character by character write, translating newlines to CRLF sequences.
-Driver writers are recommended to use this function rather than implementing
-their own version.
-
-
-Locking
--------
-
-It is the responsibility of the low level hardware driver to perform the
-necessary locking using port->lock.  There are some exceptions (which
-are described in the uart_ops listing below.)
-
-There are two locks.  A per-port spinlock, and an overall semaphore.
-
-From the core driver perspective, the port->lock locks the following
-data::
-
-	port->mctrl
-	port->icount
-	port->state->xmit.head (circ_buf->head)
-	port->state->xmit.tail (circ_buf->tail)
-
-The low level driver is free to use this lock to provide any additional
-locking.
-
-The port_sem semaphore is used to protect against ports being added/
-removed or reconfigured at inappropriate times. Since v2.6.27, this
-semaphore has been the 'mutex' member of the tty_port struct, and
-commonly referred to as the port mutex.
-
-
-uart_ops
---------
-
-The uart_ops structure is the main interface between serial_core and the
-hardware specific driver.  It contains all the methods to control the
-hardware.
-
-  tx_empty(port)
-	This function tests whether the transmitter fifo and shifter
-	for the port described by 'port' is empty.  If it is empty,
-	this function should return TIOCSER_TEMT, otherwise return 0.
-	If the port does not support this operation, then it should
-	return TIOCSER_TEMT.
-
-	Locking: none.
-
-	Interrupts: caller dependent.
-
-	This call must not sleep
-
-  set_mctrl(port, mctrl)
-	This function sets the modem control lines for port described
-	by 'port' to the state described by mctrl.  The relevant bits
-	of mctrl are:
-
-		- TIOCM_RTS	RTS signal.
-		- TIOCM_DTR	DTR signal.
-		- TIOCM_OUT1	OUT1 signal.
-		- TIOCM_OUT2	OUT2 signal.
-		- TIOCM_LOOP	Set the port into loopback mode.
-
-	If the appropriate bit is set, the signal should be driven
-	active.  If the bit is clear, the signal should be driven
-	inactive.
-
-	Locking: port->lock taken.
-
-	Interrupts: locally disabled.
-
-	This call must not sleep
-
-  get_mctrl(port)
-	Returns the current state of modem control inputs.  The state
-	of the outputs should not be returned, since the core keeps
-	track of their state.  The state information should include:
-
-		- TIOCM_CAR	state of DCD signal
-		- TIOCM_CTS	state of CTS signal
-		- TIOCM_DSR	state of DSR signal
-		- TIOCM_RI	state of RI signal
-
-	The bit is set if the signal is currently driven active.  If
-	the port does not support CTS, DCD or DSR, the driver should
-	indicate that the signal is permanently active.  If RI is
-	not available, the signal should not be indicated as active.
-
-	Locking: port->lock taken.
-
-	Interrupts: locally disabled.
-
-	This call must not sleep
-
-  stop_tx(port)
-	Stop transmitting characters.  This might be due to the CTS
-	line becoming inactive or the tty layer indicating we want
-	to stop transmission due to an XOFF character.
-
-	The driver should stop transmitting characters as soon as
-	possible.
-
-	Locking: port->lock taken.
-
-	Interrupts: locally disabled.
-
-	This call must not sleep
-
-  start_tx(port)
-	Start transmitting characters.
-
-	Locking: port->lock taken.
-
-	Interrupts: locally disabled.
-
-	This call must not sleep
-
-  throttle(port)
-	Notify the serial driver that input buffers for the line discipline are
-	close to full, and it should somehow signal that no more characters
-	should be sent to the serial port.
-	This will be called only if hardware assisted flow control is enabled.
-
-	Locking: serialized with .unthrottle() and termios modification by the
-	tty layer.
-
-  unthrottle(port)
-	Notify the serial driver that characters can now be sent to the serial
-	port without fear of overrunning the input buffers of the line
-	disciplines.
-
-	This will be called only if hardware assisted flow control is enabled.
-
-	Locking: serialized with .throttle() and termios modification by the
-	tty layer.
-
-  send_xchar(port,ch)
-	Transmit a high priority character, even if the port is stopped.
-	This is used to implement XON/XOFF flow control and tcflow().  If
-	the serial driver does not implement this function, the tty core
-	will append the character to the circular buffer and then call
-	start_tx() / stop_tx() to flush the data out.
-
-	Do not transmit if ch == '\0' (__DISABLED_CHAR).
-
-	Locking: none.
-
-	Interrupts: caller dependent.
-
-  stop_rx(port)
-	Stop receiving characters; the port is in the process of
-	being closed.
-
-	Locking: port->lock taken.
-
-	Interrupts: locally disabled.
-
-	This call must not sleep
-
-  enable_ms(port)
-	Enable the modem status interrupts.
-
-	This method may be called multiple times.  Modem status
-	interrupts should be disabled when the shutdown method is
-	called.
-
-	Locking: port->lock taken.
-
-	Interrupts: locally disabled.
-
-	This call must not sleep
-
-  break_ctl(port,ctl)
-	Control the transmission of a break signal.  If ctl is
-	nonzero, the break signal should be transmitted.  The signal
-	should be terminated when another call is made with a zero
-	ctl.
-
-	Locking: caller holds tty_port->mutex
-
-  startup(port)
-	Grab any interrupt resources and initialise any low level driver
-	state.  Enable the port for reception.  It should not activate
-	RTS nor DTR; this will be done via a separate call to set_mctrl.
-
-	This method will only be called when the port is initially opened.
-
-	Locking: port_sem taken.
-
-	Interrupts: globally disabled.
-
-  shutdown(port)
-	Disable the port, disable any break condition that may be in
-	effect, and free any interrupt resources.  It should not disable
-	RTS nor DTR; this will have already been done via a separate
-	call to set_mctrl.
-
-	Drivers must not access port->state once this call has completed.
-
-	This method will only be called when there are no more users of
-	this port.
-
-	Locking: port_sem taken.
-
-	Interrupts: caller dependent.
-
-  flush_buffer(port)
-	Flush any write buffers, reset any DMA state and stop any
-	ongoing DMA transfers.
-
-	This will be called whenever the port->state->xmit circular
-	buffer is cleared.
-
-	Locking: port->lock taken.
-
-	Interrupts: locally disabled.
-
-	This call must not sleep
-
-  set_termios(port,termios,oldtermios)
-	Change the port parameters, including word length, parity, stop
-	bits.  Update read_status_mask and ignore_status_mask to indicate
-	the types of events we are interested in receiving.  Relevant
-	termios->c_cflag bits are:
-
-		CSIZE
-			- word size
-		CSTOPB
-			- 2 stop bits
-		PARENB
-			- parity enable
-		PARODD
-			- odd parity (when PARENB is in force)
-		CREAD
-			- enable reception of characters (if not set,
-			  still receive characters from the port, but
-			  throw them away.
-		CRTSCTS
-			- if set, enable CTS status change reporting
-		CLOCAL
-			- if not set, enable modem status change
-			  reporting.
-
-	Relevant termios->c_iflag bits are:
-
-		INPCK
-			- enable frame and parity error events to be
-			  passed to the TTY layer.
-		BRKINT / PARMRK
-			- both of these enable break events to be
-			  passed to the TTY layer.
-
-		IGNPAR
-			- ignore parity and framing errors
-		IGNBRK
-			- ignore break errors,  If IGNPAR is also
-			  set, ignore overrun errors as well.
-
-	The interaction of the iflag bits is as follows (parity error
-	given as an example):
-
-	=============== ======= ======  =============================
-	Parity error	INPCK	IGNPAR
-	=============== ======= ======  =============================
-	n/a		0	n/a	character received, marked as
-					TTY_NORMAL
-	None		1	n/a	character received, marked as
-					TTY_NORMAL
-	Yes		1	0	character received, marked as
-					TTY_PARITY
-	Yes		1	1	character discarded
-	=============== ======= ======  =============================
-
-	Other flags may be used (eg, xon/xoff characters) if your
-	hardware supports hardware "soft" flow control.
-
-	Locking: caller holds tty_port->mutex
-
-	Interrupts: caller dependent.
-
-	This call must not sleep
-
-  set_ldisc(port,termios)
-	Notifier for discipline change. See Documentation/serial/tty.rst.
-
-	Locking: caller holds tty_port->mutex
-
-  pm(port,state,oldstate)
-	Perform any power management related activities on the specified
-	port.  State indicates the new state (defined by
-	enum uart_pm_state), oldstate indicates the previous state.
-
-	This function should not be used to grab any resources.
-
-	This will be called when the port is initially opened and finally
-	closed, except when the port is also the system console.  This
-	will occur even if CONFIG_PM is not set.
-
-	Locking: none.
-
-	Interrupts: caller dependent.
-
-  type(port)
-	Return a pointer to a string constant describing the specified
-	port, or return NULL, in which case the string 'unknown' is
-	substituted.
-
-	Locking: none.
-
-	Interrupts: caller dependent.
-
-  release_port(port)
-	Release any memory and IO region resources currently in use by
-	the port.
-
-	Locking: none.
-
-	Interrupts: caller dependent.
-
-  request_port(port)
-	Request any memory and IO region resources required by the port.
-	If any fail, no resources should be registered when this function
-	returns, and it should return -EBUSY on failure.
-
-	Locking: none.
-
-	Interrupts: caller dependent.
-
-  config_port(port,type)
-	Perform any autoconfiguration steps required for the port.  `type`
-	contains a bit mask of the required configuration.  UART_CONFIG_TYPE
-	indicates that the port requires detection and identification.
-	port->type should be set to the type found, or PORT_UNKNOWN if
-	no port was detected.
-
-	UART_CONFIG_IRQ indicates autoconfiguration of the interrupt signal,
-	which should be probed using standard kernel autoprobing techniques.
-	This is not necessary on platforms where ports have interrupts
-	internally hard wired (eg, system on a chip implementations).
-
-	Locking: none.
-
-	Interrupts: caller dependent.
-
-  verify_port(port,serinfo)
-	Verify the new serial port information contained within serinfo is
-	suitable for this port type.
-
-	Locking: none.
-
-	Interrupts: caller dependent.
-
-  ioctl(port,cmd,arg)
-	Perform any port specific IOCTLs.  IOCTL commands must be defined
-	using the standard numbering system found in <asm/ioctl.h>
-
-	Locking: none.
-
-	Interrupts: caller dependent.
-
-  poll_init(port)
-	Called by kgdb to perform the minimal hardware initialization needed
-	to support poll_put_char() and poll_get_char().  Unlike ->startup()
-	this should not request interrupts.
-
-	Locking: tty_mutex and tty_port->mutex taken.
-
-	Interrupts: n/a.
-
-  poll_put_char(port,ch)
-	Called by kgdb to write a single character directly to the serial
-	port.  It can and should block until there is space in the TX FIFO.
-
-	Locking: none.
-
-	Interrupts: caller dependent.
-
-	This call must not sleep
-
-  poll_get_char(port)
-	Called by kgdb to read a single character directly from the serial
-	port.  If data is available, it should be returned; otherwise
-	the function should return NO_POLL_CHAR immediately.
-
-	Locking: none.
-
-	Interrupts: caller dependent.
-
-	This call must not sleep
-
-Other functions
----------------
-
-uart_update_timeout(port,cflag,baud)
-	Update the FIFO drain timeout, port->timeout, according to the
-	number of bits, parity, stop bits and baud rate.
-
-	Locking: caller is expected to take port->lock
-
-	Interrupts: n/a
-
-uart_get_baud_rate(port,termios,old,min,max)
-	Return the numeric baud rate for the specified termios, taking
-	account of the special 38400 baud "kludge".  The B0 baud rate
-	is mapped to 9600 baud.
-
-	If the baud rate is not within min..max, then if old is non-NULL,
-	the original baud rate will be tried.  If that exceeds the
-	min..max constraint, 9600 baud will be returned.  termios will
-	be updated to the baud rate in use.
-
-	Note: min..max must always allow 9600 baud to be selected.
-
-	Locking: caller dependent.
-
-	Interrupts: n/a
-
-uart_get_divisor(port,baud)
-	Return the divisor (baud_base / baud) for the specified baud
-	rate, appropriately rounded.
-
-	If 38400 baud and custom divisor is selected, return the
-	custom divisor instead.
-
-	Locking: caller dependent.
-
-	Interrupts: n/a
-
-uart_match_port(port1,port2)
-	This utility function can be used to determine whether two
-	uart_port structures describe the same port.
-
-	Locking: n/a
-
-	Interrupts: n/a
-
-uart_write_wakeup(port)
-	A driver is expected to call this function when the number of
-	characters in the transmit buffer have dropped below a threshold.
-
-	Locking: port->lock should be held.
-
-	Interrupts: n/a
-
-uart_register_driver(drv)
-	Register a uart driver with the core driver.  We in turn register
-	with the tty layer, and initialise the core driver per-port state.
-
-	drv->port should be NULL, and the per-port structures should be
-	registered using uart_add_one_port after this call has succeeded.
-
-	Locking: none
-
-	Interrupts: enabled
-
-uart_unregister_driver()
-	Remove all references to a driver from the core driver.  The low
-	level driver must have removed all its ports via the
-	uart_remove_one_port() if it registered them with uart_add_one_port().
-
-	Locking: none
-
-	Interrupts: enabled
-
-**uart_suspend_port()**
-
-**uart_resume_port()**
-
-**uart_add_one_port()**
-
-**uart_remove_one_port()**
-
-Other notes
------------
-
-It is intended some day to drop the 'unused' entries from uart_port, and
-allow low level drivers to register their own individual uart_port's with
-the core.  This will allow drivers to use uart_port as a pointer to a
-structure containing both the uart_port entry with their own extensions,
-thus::
-
-	struct my_port {
-		struct uart_port	port;
-		int			my_stuff;
-	};
-
-Modem control lines via GPIO
-----------------------------
-
-Some helpers are provided in order to set/get modem control lines via GPIO.
-
-mctrl_gpio_init(port, idx):
-	This will get the {cts,rts,...}-gpios from device tree if they are
-	present and request them, set direction etc, and return an
-	allocated structure. `devm_*` functions are used, so there's no need
-	to call mctrl_gpio_free().
-	As this sets up the irq handling make sure to not handle changes to the
-	gpio input lines in your driver, too.
-
-mctrl_gpio_free(dev, gpios):
-	This will free the requested gpios in mctrl_gpio_init().
-	As `devm_*` functions are used, there's generally no need to call
-	this function.
-
-mctrl_gpio_to_gpiod(gpios, gidx)
-	This returns the gpio_desc structure associated to the modem line
-	index.
-
-mctrl_gpio_set(gpios, mctrl):
-	This will sets the gpios according to the mctrl state.
-
-mctrl_gpio_get(gpios, mctrl):
-	This will update mctrl with the gpios values.
-
-mctrl_gpio_enable_ms(gpios):
-	Enables irqs and handling of changes to the ms lines.
-
-mctrl_gpio_disable_ms(gpios):
-	Disables irqs and handling of changes to the ms lines.
diff --git a/Documentation/serial/index.rst b/Documentation/serial/index.rst
deleted file mode 100644
index d0ba22ea23bf..000000000000
--- a/Documentation/serial/index.rst
+++ /dev/null
@@ -1,32 +0,0 @@
-:orphan:
-
-==========================
-Support for Serial devices
-==========================
-
-.. toctree::
-    :maxdepth: 1
-
-
-    driver
-    tty
-
-Serial drivers
-==============
-
-.. toctree::
-    :maxdepth: 1
-
-    cyclades_z
-    moxa-smartio
-    n_gsm
-    rocket
-    serial-iso7816
-    serial-rs485
-
-.. only::  subproject and html
-
-   Indices
-   =======
-
-   * :ref:`genindex`
diff --git a/Documentation/serial/moxa-smartio.rst b/Documentation/serial/moxa-smartio.rst
deleted file mode 100644
index 156100f17c3f..000000000000
--- a/Documentation/serial/moxa-smartio.rst
+++ /dev/null
@@ -1,615 +0,0 @@
-=============================================================
-MOXA Smartio/Industio Family Device Driver Installation Guide
-=============================================================
-
-.. note::
-
-   This file is outdated. It needs some care in order to make it
-   updated to Kernel 5.0 and upper
-
-Copyright (C) 2008, Moxa Inc.
-
-Date: 01/21/2008
-
-.. Content
-
-   1. Introduction
-   2. System Requirement
-   3. Installation
-      3.1 Hardware installation
-      3.2 Driver files
-      3.3 Device naming convention
-      3.4 Module driver configuration
-      3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x.
-      3.6 Custom configuration
-      3.7 Verify driver installation
-   4. Utilities
-   5. Setserial
-   6. Troubleshooting
-
-1. Introduction
-^^^^^^^^^^^^^^^
-
-   The Smartio/Industio/UPCI family Linux driver supports following multiport
-   boards.
-
-    - 2 ports multiport board
-	CP-102U, CP-102UL, CP-102UF
-	CP-132U-I, CP-132UL,
-	CP-132, CP-132I, CP132S, CP-132IS,
-	CI-132, CI-132I, CI-132IS,
-	(C102H, C102HI, C102HIS, C102P, CP-102, CP-102S)
-
-    - 4 ports multiport board
-	CP-104EL,
-	CP-104UL, CP-104JU,
-	CP-134U, CP-134U-I,
-	C104H/PCI, C104HS/PCI,
-	CP-114, CP-114I, CP-114S, CP-114IS, CP-114UL,
-	C104H, C104HS,
-	CI-104J, CI-104JS,
-	CI-134, CI-134I, CI-134IS,
-	(C114HI, CT-114I, C104P),
-	POS-104UL,
-	CB-114,
-	CB-134I
-
-    - 8 ports multiport board
-	CP-118EL, CP-168EL,
-	CP-118U, CP-168U,
-	C168H/PCI,
-	C168H, C168HS,
-	(C168P),
-	CB-108
-
-   This driver and installation procedure have been developed upon Linux Kernel
-   2.4.x and 2.6.x. This driver supports Intel x86 hardware platform. In order
-   to maintain compatibility, this version has also been properly tested with
-   RedHat, Mandrake, Fedora and S.u.S.E Linux. However, if compatibility problem
-   occurs, please contact Moxa at support@moxa.com.tw.
-
-   In addition to device driver, useful utilities are also provided in this
-   version. They are:
-
-    - msdiag
-		 Diagnostic program for displaying installed Moxa
-                 Smartio/Industio boards.
-    - msmon
-		 Monitor program to observe data count and line status signals.
-    - msterm     A simple terminal program which is useful in testing serial
-	         ports.
-    - io-irq.exe
-		 Configuration program to setup ISA boards. Please note that
-                 this program can only be executed under DOS.
-
-   All the drivers and utilities are published in form of source code under
-   GNU General Public License in this version. Please refer to GNU General
-   Public License announcement in each source code file for more detail.
-
-   In Moxa's Web sites, you may always find latest driver at http://www.moxa.com/.
-
-   This version of driver can be installed as Loadable Module (Module driver)
-   or built-in into kernel (Static driver). You may refer to following
-   installation procedure for suitable one. Before you install the driver,
-   please refer to hardware installation procedure in the User's Manual.
-
-   We assume the user should be familiar with following documents.
-
-   - Serial-HOWTO
-   - Kernel-HOWTO
-
-2. System Requirement
-^^^^^^^^^^^^^^^^^^^^^
-
-   - Hardware platform: Intel x86 machine
-   - Kernel version: 2.4.x or 2.6.x
-   - gcc version 2.72 or later
-   - Maximum 4 boards can be installed in combination
-
-3. Installation
-^^^^^^^^^^^^^^^
-
-3.1 Hardware installation
-=========================
-
-   There are two types of buses, ISA and PCI, for Smartio/Industio
-   family multiport board.
-
-ISA board
----------
-
-   You'll have to configure CAP address, I/O address, Interrupt Vector
-   as well as IRQ before installing this driver. Please refer to hardware
-   installation procedure in User's Manual before proceed any further.
-   Please make sure the JP1 is open after the ISA board is set properly.
-
-PCI/UPCI board
---------------
-
-   You may need to adjust IRQ usage in BIOS to avoid from IRQ conflict
-   with other ISA devices. Please refer to hardware installation
-   procedure in User's Manual in advance.
-
-PCI IRQ Sharing
----------------
-
-   Each port within the same multiport board shares the same IRQ. Up to
-   4 Moxa Smartio/Industio PCI Family multiport boards can be installed
-   together on one system and they can share the same IRQ.
-
-
-3.2 Driver files
-================
-
-   The driver file may be obtained from ftp, CD-ROM or floppy disk. The
-   first step, anyway, is to copy driver file "mxser.tgz" into specified
-   directory. e.g. /moxa. The execute commands as below::
-
-       # cd /
-       # mkdir moxa
-       # cd /moxa
-       # tar xvf /dev/fd0
-
-or::
-
-       # cd /
-       # mkdir moxa
-       # cd /moxa
-       # cp /mnt/cdrom/<driver directory>/mxser.tgz .
-       # tar xvfz mxser.tgz
-
-
-3.3 Device naming convention
-============================
-
-   You may find all the driver and utilities files in /moxa/mxser.
-   Following installation procedure depends on the model you'd like to
-   run the driver. If you prefer module driver, please refer to 3.4.
-   If static driver is required, please refer to 3.5.
-
-Dialin and callout port
------------------------
-
-   This driver remains traditional serial device properties. There are
-   two special file name for each serial port. One is dial-in port
-   which is named "ttyMxx". For callout port, the naming convention
-   is "cumxx".
-
-Device naming when more than 2 boards installed
------------------------------------------------
-
-   Naming convention for each Smartio/Industio multiport board is
-   pre-defined as below.
-
-   ============ ===============       ==============
-   Board Num.	 Dial-in Port	      Callout port
-   1st board	ttyM0  - ttyM7	      cum0  - cum7
-   2nd board	ttyM8  - ttyM15       cum8  - cum15
-   3rd board	ttyM16 - ttyM23       cum16 - cum23
-   4th board	ttyM24 - ttym31       cum24 - cum31
-   ============ ===============       ==============
-
-.. note::
-
-   Under Kernel 2.6 and upper, the cum Device is Obsolete. So use ttyM*
-   device instead.
-
-Board sequence
---------------
-
-   This driver will activate ISA boards according to the parameter set
-   in the driver. After all specified ISA board activated, PCI board
-   will be installed in the system automatically driven.
-   Therefore the board number is sorted by the CAP address of ISA boards.
-   For PCI boards, their sequence will be after ISA boards and C168H/PCI
-   has higher priority than C104H/PCI boards.
-
-3.4 Module driver configuration
-===============================
-
-   Module driver is easiest way to install. If you prefer static driver
-   installation, please skip this paragraph.
-
-
-   ------------- Prepare to use the MOXA driver --------------------
-
-3.4.1 Create tty device with correct major number
--------------------------------------------------
-
-   Before using MOXA driver, your system must have the tty devices
-   which are created with driver's major number. We offer one shell
-   script "msmknod" to simplify the procedure.
-   This step is only needed to be executed once. But you still
-   need to do this procedure when:
-
-   a. You change the driver's major number. Please refer the "3.7"
-      section.
-   b. Your total installed MOXA boards number is changed. Maybe you
-      add/delete one MOXA board.
-   c. You want to change the tty name. This needs to modify the
-      shell script "msmknod"
-
-   The procedure is::
-
-	 # cd /moxa/mxser/driver
-	 # ./msmknod
-
-   This shell script will require the major number for dial-in
-   device and callout device to create tty device. You also need
-   to specify the total installed MOXA board number. Default major
-   numbers for dial-in device and callout device are 30, 35. If
-   you need to change to other number, please refer section "3.7"
-   for more detailed procedure.
-   Msmknod will delete any special files occupying the same device
-   naming.
-
-3.4.2 Build the MOXA driver and utilities
------------------------------------------
-
-   Before using the MOXA driver and utilities, you need compile the
-   all the source code. This step is only need to be executed once.
-   But you still re-compile the source code if you modify the source
-   code. For example, if you change the driver's major number (see
-   "3.7" section), then you need to do this step again.
-
-   Find "Makefile" in /moxa/mxser, then run
-
-	 # make clean; make install
-
-   ..note::
-
-	 For Red Hat 9, Red Hat Enterprise Linux AS3/ES3/WS3 & Fedora Core1:
-	 # make clean; make installsp1
-
-	 For Red Hat Enterprise Linux AS4/ES4/WS4:
-	 # make clean; make installsp2
-
-   The driver files "mxser.o" and utilities will be properly compiled
-   and copied to system directories respectively.
-
-------------- Load MOXA driver--------------------
-
-3.4.3 Load the MOXA driver
---------------------------
-
-   ::
-
-	 # modprobe mxser <argument>
-
-   will activate the module driver. You may run "lsmod" to check
-   if "mxser" is activated. If the MOXA board is ISA board, the
-   <argument> is needed. Please refer to section "3.4.5" for more
-   information.
-
-------------- Load MOXA driver on boot --------------------
-
-3.4.4 Load the mxser driver
----------------------------
-
-
-   For the above description, you may manually execute
-   "modprobe mxser" to activate this driver and run
-   "rmmod mxser" to remove it.
-
-   However, it's better to have a boot time configuration to
-   eliminate manual operation. Boot time configuration can be
-   achieved by rc file. We offer one "rc.mxser" file to simplify
-   the procedure under "moxa/mxser/driver".
-
-   But if you use ISA board, please modify the "modprobe ..." command
-   to add the argument (see "3.4.5" section). After modifying the
-   rc.mxser, please try to execute "/moxa/mxser/driver/rc.mxser"
-   manually to make sure the modification is ok. If any error
-   encountered, please try to modify again. If the modification is
-   completed, follow the below step.
-
-   Run following command for setting rc files::
-
-	 # cd /moxa/mxser/driver
-	 # cp ./rc.mxser /etc/rc.d
-	 # cd /etc/rc.d
-
-   Check "rc.serial" is existed or not. If "rc.serial" doesn't exist,
-   create it by vi, run "chmod 755 rc.serial" to change the permission.
-
-   Add "/etc/rc.d/rc.mxser" in last line.
-
-   Reboot and check if moxa.o activated by "lsmod" command.
-
-3.4.5. specify CAP address
---------------------------
-
-   If you'd like to drive Smartio/Industio ISA boards in the system,
-   you'll have to add parameter to specify CAP address of given
-   board while activating "mxser.o". The format for parameters are
-   as follows.::
-
-	   modprobe mxser ioaddr=0x???,0x???,0x???,0x???
-				  |  |  |    |
-				  |  |  |    +- 4th ISA board
-				  |  |  +------ 3rd ISA board
-				  |  +------------ 2nd ISA board
-				  +-------------------1st ISA board
-
-3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x
-================================================================
-
-    Note:
-          To use static driver, you must install the linux kernel
-          source package.
-
-3.5.1 Backup the built-in driver in the kernel
-----------------------------------------------
-
-    ::
-
-       # cd /usr/src/linux/drivers/char
-       # mv mxser.c mxser.c.old
-
-       For Red Hat 7.x user, you need to create link:
-       # cd /usr/src
-       # ln -s linux-2.4 linux
-
-3.5.2 Create link
------------------
-    ::
-
-	  # cd /usr/src/linux/drivers/char
-	  # ln -s /moxa/mxser/driver/mxser.c mxser.c
-
-3.5.3 Add CAP address list for ISA boards.
-------------------------------------------
-
-    For PCI boards user, please skip this step.
-
-    In module mode, the CAP address for ISA board is given by
-    parameter. In static driver configuration, you'll have to
-    assign it within driver's source code. If you will not
-    install any ISA boards, you may skip to next portion.
-    The instructions to modify driver source code are as
-    below.
-
-    a. run::
-
-	# cd /moxa/mxser/driver
-	# vi mxser.c
-
-    b. Find the array mxserBoardCAP[] as below::
-
-	  static int mxserBoardCAP[] = {0x00, 0x00, 0x00, 0x00};
-
-    c. Change the address within this array using vi. For
-       example, to driver 2 ISA boards with CAP address
-       0x280 and 0x180 as 1st and 2nd board. Just to change
-       the source code as follows::
-
-	  static int mxserBoardCAP[] = {0x280, 0x180, 0x00, 0x00};
-
-3.5.4 Setup kernel configuration
---------------------------------
-
-    Configure the kernel::
-
-      # cd /usr/src/linux
-      # make menuconfig
-
-    You will go into a menu-driven system. Please select [Character
-    devices][Non-standard serial port support], enable the [Moxa
-    SmartIO support] driver with "[*]" for built-in (not "[M]"), then
-    select [Exit] to exit this program.
-
-3.5.5 Rebuild kernel
---------------------
-
-    The following are for Linux kernel rebuilding, for your
-    reference only.
-
-    For appropriate details, please refer to the Linux document:
-
-        a. Run the following commands::
-
-	     cd /usr/src/linux
-	     make clean		     # take a few minutes
-	     make dep		     # take a few minutes
-	     make bzImage	     # take probably 10-20 minutes
-	     make install	     # copy boot image to correct position
-
-	f. Please make sure the boot kernel (vmlinuz) is in the
-	   correct position.
-	g. If you use 'lilo' utility, you should check /etc/lilo.conf
-	   'image' item specified the path which is the 'vmlinuz' path,
-	   or you will load wrong (or old) boot kernel image (vmlinuz).
-	   After checking /etc/lilo.conf, please run "lilo".
-
-	  Note that if the result of "make bzImage" is ERROR, then you have to
-	  go back to Linux configuration Setup. Type "make menuconfig" in
-          directory /usr/src/linux.
-
-
-3.5.6 Make tty device and special file
---------------------------------------
-
-    ::
-       # cd /moxa/mxser/driver
-       # ./msmknod
-
-3.5.7 Make utility
-------------------
-
-    ::
-
-	  # cd /moxa/mxser/utility
-	  # make clean; make install
-
-3.5.8 Reboot
-------------
-
-
-
-3.6 Custom configuration
-========================
-
-    Although this driver already provides you default configuration, you
-    still can change the device name and major number. The instruction to
-    change these parameters are shown as below.
-
-a. Change Device name
-
-    If you'd like to use other device names instead of default naming
-    convention, all you have to do is to modify the internal code
-    within the shell script "msmknod". First, you have to open "msmknod"
-    by vi. Locate each line contains "ttyM" and "cum" and change them
-    to the device name you desired. "msmknod" creates the device names
-    you need next time executed.
-
-b. Change Major number
-
-    If major number 30 and 35 had been occupied, you may have to select
-    2 free major numbers for this driver. There are 3 steps to change
-    major numbers.
-
-3.6.1 Find free major numbers
------------------------------
-
-    In /proc/devices, you may find all the major numbers occupied
-    in the system. Please select 2 major numbers that are available.
-    e.g. 40, 45.
-
-3.6.2 Create special files
---------------------------
-
-   Run /moxa/mxser/driver/msmknod to create special files with
-   specified major numbers.
-
-3.6.3 Modify driver with new major number
------------------------------------------
-
-   Run vi to open /moxa/mxser/driver/mxser.c. Locate the line
-   contains "MXSERMAJOR". Change the content as below::
-
-	  #define	  MXSERMAJOR		  40
-	  #define	  MXSERCUMAJOR		  45
-
-    3.6.4 Run "make clean; make install" in /moxa/mxser/driver.
-
-3.7 Verify driver installation
-==============================
-
-    You may refer to /var/log/messages to check the latest status
-    log reported by this driver whenever it's activated.
-
-4. Utilities
-^^^^^^^^^^^^
-
-   There are 3 utilities contained in this driver. They are msdiag, msmon and
-   msterm. These 3 utilities are released in form of source code. They should
-   be compiled into executable file and copied into /usr/bin.
-
-   Before using these utilities, please load driver (refer 3.4 & 3.5) and
-   make sure you had run the "msmknod" utility.
-
-msdiag - Diagnostic
-===================
-
-   This utility provides the function to display what Moxa Smartio/Industio
-   board found by driver in the system.
-
-msmon - Port Monitoring
-=======================
-
-   This utility gives the user a quick view about all the MOXA ports'
-   activities. One can easily learn each port's total received/transmitted
-   (Rx/Tx) character count since the time when the monitoring is started.
-
-   Rx/Tx throughputs per second are also reported in interval basis (e.g.
-   the last 5 seconds) and in average basis (since the time the monitoring
-   is started). You can reset all ports' count by <HOME> key. <+> <->
-   (plus/minus) keys to change the displaying time interval. Press <ENTER>
-   on the port, that cursor stay, to view the port's communication
-   parameters, signal status, and input/output queue.
-
-msterm - Terminal Emulation
-===========================
-
-   This utility provides data sending and receiving ability of all tty ports,
-   especially for MOXA ports. It is quite useful for testing simple
-   application, for example, sending AT command to a modem connected to the
-   port or used as a terminal for login purpose. Note that this is only a
-   dumb terminal emulation without handling full screen operation.
-
-5. Setserial
-^^^^^^^^^^^^
-
-   Supported Setserial parameters are listed as below.
-
-   ============== =========================================================
-   uart		  set UART type(16450-->disable FIFO, 16550A-->enable FIFO)
-   close_delay	  set the amount of time(in 1/100 of a second) that DTR
-		  should be kept low while being closed.
-   closing_wait   set the amount of time(in 1/100 of a second) that the
-		  serial port should wait for data to be drained while
-		  being closed, before the receiver is disable.
-   spd_hi	  Use  57.6kb  when  the application requests 38.4kb.
-   spd_vhi	  Use  115.2kb	when  the application requests 38.4kb.
-   spd_shi	  Use  230.4kb	when  the application requests 38.4kb.
-   spd_warp	  Use  460.8kb	when  the application requests 38.4kb.
-   spd_normal	  Use  38.4kb  when  the application requests 38.4kb.
-   spd_cust	  Use  the custom divisor to set the speed when  the
-		  application requests 38.4kb.
-   divisor	  This option set the custom division.
-   baud_base	  This option set the base baud rate.
-   ============== =========================================================
-
-6. Troubleshooting
-^^^^^^^^^^^^^^^^^^
-
-   The boot time error messages and solutions are stated as clearly as
-   possible. If all the possible solutions fail, please contact our technical
-   support team to get more help.
-
-
-   Error msg:
-	      More than 4 Moxa Smartio/Industio family boards found. Fifth board
-              and after are ignored.
-
-   Solution:
-   To avoid this problem, please unplug fifth and after board, because Moxa
-   driver supports up to 4 boards.
-
-   Error msg:
-	      Request_irq fail, IRQ(?) may be conflict with another device.
-
-   Solution:
-   Other PCI or ISA devices occupy the assigned IRQ. If you are not sure
-   which device causes the situation, please check /proc/interrupts to find
-   free IRQ and simply change another free IRQ for Moxa board.
-
-   Error msg:
-	      Board #: C1xx Series(CAP=xxx) interrupt number invalid.
-
-   Solution:
-   Each port within the same multiport board shares the same IRQ. Please set
-   one IRQ (IRQ doesn't equal to zero) for one Moxa board.
-
-   Error msg:
-	      No interrupt vector be set for Moxa ISA board(CAP=xxx).
-
-   Solution:
-   Moxa ISA board needs an interrupt vector.Please refer to user's manual
-   "Hardware Installation" chapter to set interrupt vector.
-
-   Error msg:
-              Couldn't install MOXA Smartio/Industio family driver!
-
-   Solution:
-   Load Moxa driver fail, the major number may conflict with other devices.
-   Please refer to previous section 3.7 to change a free major number for
-   Moxa driver.
-
-   Error msg:
-              Couldn't install MOXA Smartio/Industio family callout driver!
-
-   Solution:
-   Load Moxa callout driver fail, the callout device major number may
-   conflict with other devices. Please refer to previous section 3.7 to
-   change a free callout device major number for Moxa driver.
diff --git a/Documentation/serial/n_gsm.rst b/Documentation/serial/n_gsm.rst
deleted file mode 100644
index f3ad9fd26408..000000000000
--- a/Documentation/serial/n_gsm.rst
+++ /dev/null
@@ -1,103 +0,0 @@
-==============================
-GSM 0710 tty multiplexor HOWTO
-==============================
-
-This line discipline implements the GSM 07.10 multiplexing protocol
-detailed in the following 3GPP document:
-
-	http://www.3gpp.org/ftp/Specs/archive/07_series/07.10/0710-720.zip
-
-This document give some hints on how to use this driver with GPRS and 3G
-modems connected to a physical serial port.
-
-How to use it
--------------
-1. initialize the modem in 0710 mux mode (usually AT+CMUX= command) through
-   its serial port. Depending on the modem used, you can pass more or less
-   parameters to this command,
-2. switch the serial line to using the n_gsm line discipline by using
-   TIOCSETD ioctl,
-3. configure the mux using GSMIOC_GETCONF / GSMIOC_SETCONF ioctl,
-
-Major parts of the initialization program :
-(a good starting point is util-linux-ng/sys-utils/ldattach.c)::
-
-  #include <linux/gsmmux.h>
-  #define N_GSM0710	21	/* GSM 0710 Mux */
-  #define DEFAULT_SPEED	B115200
-  #define SERIAL_PORT	/dev/ttyS0
-
-	int ldisc = N_GSM0710;
-	struct gsm_config c;
-	struct termios configuration;
-
-	/* open the serial port connected to the modem */
-	fd = open(SERIAL_PORT, O_RDWR | O_NOCTTY | O_NDELAY);
-
-	/* configure the serial port : speed, flow control ... */
-
-	/* send the AT commands to switch the modem to CMUX mode
-	   and check that it's successful (should return OK) */
-	write(fd, "AT+CMUX=0\r", 10);
-
-	/* experience showed that some modems need some time before
-	   being able to answer to the first MUX packet so a delay
-	   may be needed here in some case */
-	sleep(3);
-
-	/* use n_gsm line discipline */
-	ioctl(fd, TIOCSETD, &ldisc);
-
-	/* get n_gsm configuration */
-	ioctl(fd, GSMIOC_GETCONF, &c);
-	/* we are initiator and need encoding 0 (basic) */
-	c.initiator = 1;
-	c.encapsulation = 0;
-	/* our modem defaults to a maximum size of 127 bytes */
-	c.mru = 127;
-	c.mtu = 127;
-	/* set the new configuration */
-	ioctl(fd, GSMIOC_SETCONF, &c);
-
-	/* and wait for ever to keep the line discipline enabled */
-	daemon(0,0);
-	pause();
-
-4. create the devices corresponding to the "virtual" serial ports (take care,
-   each modem has its configuration and some DLC have dedicated functions,
-   for example GPS), starting with minor 1 (DLC0 is reserved for the management
-   of the mux)::
-
-     MAJOR=`cat /proc/devices |grep gsmtty | awk '{print $1}`
-     for i in `seq 1 4`; do
-	mknod /dev/ttygsm$i c $MAJOR $i
-     done
-
-5. use these devices as plain serial ports.
-
-   for example, it's possible:
-
-   - and to use gnokii to send / receive SMS on ttygsm1
-   - to use ppp to establish a datalink on ttygsm2
-
-6. first close all virtual ports before closing the physical port.
-
-   Note that after closing the physical port the modem is still in multiplexing
-   mode. This may prevent a successful re-opening of the port later. To avoid
-   this situation either reset the modem if your hardware allows that or send
-   a disconnect command frame manually before initializing the multiplexing mode
-   for the second time. The byte sequence for the disconnect command frame is::
-
-      0xf9, 0x03, 0xef, 0x03, 0xc3, 0x16, 0xf9.
-
-Additional Documentation
-------------------------
-More practical details on the protocol and how it's supported by industrial
-modems can be found in the following documents :
-
-- http://www.telit.com/module/infopool/download.php?id=616
-- http://www.u-blox.com/images/downloads/Product_Docs/LEON-G100-G200-MuxImplementation_ApplicationNote_%28GSM%20G1-CS-10002%29.pdf
-- http://www.sierrawireless.com/Support/Downloads/AirPrime/WMP_Series/~/media/Support_Downloads/AirPrime/Application_notes/CMUX_Feature_Application_Note-Rev004.ashx
-- http://wm.sim.com/sim/News/photo/2010721161442.pdf
-
-11-03-08 - Eric Bénard - <eric@eukrea.com>
diff --git a/Documentation/serial/rocket.rst b/Documentation/serial/rocket.rst
deleted file mode 100644
index 23761eae4282..000000000000
--- a/Documentation/serial/rocket.rst
+++ /dev/null
@@ -1,185 +0,0 @@
-================================================
-Comtrol(tm) RocketPort(R)/RocketModem(TM) Series
-================================================
-
-Device Driver for the Linux Operating System
-============================================
-
-Product overview
-----------------
-
-This driver provides a loadable kernel driver for the Comtrol RocketPort
-and RocketModem PCI boards. These boards provide, 2, 4, 8, 16, or 32
-high-speed serial ports or modems.  This driver supports up to a combination
-of four RocketPort or RocketModems boards in one machine simultaneously.
-This file assumes that you are using the RocketPort driver which is
-integrated into the kernel sources.
-
-The driver can also be installed as an external module using the usual
-"make;make install" routine.  This external module driver, obtainable
-from the Comtrol website listed below, is useful for updating the driver
-or installing it into kernels which do not have the driver configured
-into them.  Installations instructions for the external module
-are in the included README and HW_INSTALL files.
-
-RocketPort ISA and RocketModem II PCI boards currently are only supported by
-this driver in module form.
-
-The RocketPort ISA board requires I/O ports to be configured by the DIP
-switches on the board.  See the section "ISA Rocketport Boards" below for
-information on how to set the DIP switches.
-
-You pass the I/O port to the driver using the following module parameters:
-
-board1:
-	I/O port for the first ISA board
-board2:
-	I/O port for the second ISA board
-board3:
-	I/O port for the third ISA board
-board4:
-	I/O port for the fourth ISA board
-
-There is a set of utilities and scripts provided with the external driver
-(downloadable from http://www.comtrol.com) that ease the configuration and
-setup of the ISA cards.
-
-The RocketModem II PCI boards require firmware to be loaded into the card
-before it will function.  The driver has only been tested as a module for this
-board.
-
-Installation Procedures
------------------------
-
-RocketPort/RocketModem PCI cards require no driver configuration, they are
-automatically detected and configured.
-
-The RocketPort driver can be installed as a module (recommended) or built
-into the kernel. This is selected, as for other drivers, through the `make config`
-command from the root of the Linux source tree during the kernel build process.
-
-The RocketPort/RocketModem serial ports installed by this driver are assigned
-device major number 46, and will be named /dev/ttyRx, where x is the port number
-starting at zero (ex. /dev/ttyR0, /devttyR1, ...).  If you have multiple cards
-installed in the system, the mapping of port names to serial ports is displayed
-in the system log at /var/log/messages.
-
-If installed as a module, the module must be loaded.  This can be done
-manually by entering "modprobe rocket".  To have the module loaded automatically
-upon system boot, edit a `/etc/modprobe.d/*.conf` file and add the line
-"alias char-major-46 rocket".
-
-In order to use the ports, their device names (nodes) must be created with mknod.
-This is only required once, the system will retain the names once created.  To
-create the RocketPort/RocketModem device names, use the command
-"mknod /dev/ttyRx c 46 x" where x is the port number starting at zero.
-
-For example::
-
-	> mknod /dev/ttyR0 c 46 0
-	> mknod /dev/ttyR1 c 46 1
-	> mknod /dev/ttyR2 c 46 2
-
-The Linux script MAKEDEV will create the first 16 ttyRx device names (nodes)
-for you::
-
-	>/dev/MAKEDEV ttyR
-
-ISA Rocketport Boards
----------------------
-
-You must assign and configure the I/O addresses used by the ISA Rocketport
-card before installing and using it.  This is done by setting a set of DIP
-switches on the Rocketport board.
-
-
-Setting the I/O address
------------------------
-
-Before installing RocketPort(R) or RocketPort RA boards, you must find
-a range of I/O addresses for it to use. The first RocketPort card
-requires a 68-byte contiguous block of I/O addresses, starting at one
-of the following: 0x100h, 0x140h, 0x180h, 0x200h, 0x240h, 0x280h,
-0x300h, 0x340h, 0x380h.  This I/O address must be reflected in the DIP
-switches of *all* of the Rocketport cards.
-
-The second, third, and fourth RocketPort cards require a 64-byte
-contiguous block of I/O addresses, starting at one of the following
-I/O addresses: 0x100h, 0x140h, 0x180h, 0x1C0h, 0x200h, 0x240h, 0x280h,
-0x2C0h, 0x300h, 0x340h, 0x380h, 0x3C0h.  The I/O address used by the
-second, third, and fourth Rocketport cards (if present) are set via
-software control.  The DIP switch settings for the I/O address must be
-set to the value of the first Rocketport cards.
-
-In order to distinguish each of the card from the others, each card
-must have a unique board ID set on the dip switches.  The first
-Rocketport board must be set with the DIP switches corresponding to
-the first board, the second board must be set with the DIP switches
-corresponding to the second board, etc.  IMPORTANT: The board ID is
-the only place where the DIP switch settings should differ between the
-various Rocketport boards in a system.
-
-The I/O address range used by any of the RocketPort cards must not
-conflict with any other cards in the system, including other
-RocketPort cards.  Below, you will find a list of commonly used I/O
-address ranges which may be in use by other devices in your system.
-On a Linux system, "cat /proc/ioports" will also be helpful in
-identifying what I/O addresses are being used by devices on your
-system.
-
-Remember, the FIRST RocketPort uses 68 I/O addresses.  So, if you set it
-for 0x100, it will occupy 0x100 to 0x143.  This would mean that you
-CAN NOT set the second, third or fourth board for address 0x140 since
-the first 4 bytes of that range are used by the first board.  You would
-need to set the second, third, or fourth board to one of the next available
-blocks such as 0x180.
-
-RocketPort and RocketPort RA SW1 Settings::
-
-            +-------------------------------+
-            | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 |
-            +-------+-------+---------------+
-            | Unused| Card  | I/O Port Block|
-            +-------------------------------+
-
-  DIP Switches                             DIP Switches
-  7    8                                   6    5
-  ===================                      ===================
-  On   On   UNUSED, MUST BE ON.            On   On   First Card    <==== Default
-                                           On   Off  Second Card
-                                           Off  On   Third Card
-                                           Off  Off  Fourth Card
-
-  DIP Switches         I/O Address Range
-  4    3    2    1     Used by the First Card
-  =====================================
-  On   Off  On   Off   100-143
-  On   Off  Off  On    140-183
-  On   Off  Off  Off   180-1C3       <==== Default
-  Off  On   On   Off   200-243
-  Off  On   Off  On    240-283
-  Off  On   Off  Off   280-2C3
-  Off  Off  On   Off   300-343
-  Off  Off  Off  On    340-383
-  Off  Off  Off  Off   380-3C3
-
-Reporting Bugs
---------------
-
-For technical support, please provide the following
-information: Driver version, kernel release, distribution of
-kernel, and type of board you are using. Error messages and log
-printouts port configuration details are especially helpful.
-
-USA:
-    :Phone: (612) 494-4100
-    :FAX: (612) 494-4199
-    :email: support@comtrol.com
-
-Comtrol Europe:
-    :Phone: +44 (0) 1 869 323-220
-    :FAX: +44 (0) 1 869 323-211
-    :email: support@comtrol.co.uk
-
-Web:	http://www.comtrol.com
-FTP:	ftp.comtrol.com
diff --git a/Documentation/serial/serial-iso7816.rst b/Documentation/serial/serial-iso7816.rst
deleted file mode 100644
index d990143de0c6..000000000000
--- a/Documentation/serial/serial-iso7816.rst
+++ /dev/null
@@ -1,90 +0,0 @@
-=============================
-ISO7816 Serial Communications
-=============================
-
-1. Introduction
-===============
-
-  ISO/IEC7816 is a series of standards specifying integrated circuit cards (ICC)
-  also known as smart cards.
-
-2. Hardware-related considerations
-==================================
-
-  Some CPUs/UARTs (e.g., Microchip AT91) contain a built-in mode capable of
-  handling communication with a smart card.
-
-  For these microcontrollers, the Linux driver should be made capable of
-  working in both modes, and proper ioctls (see later) should be made
-  available at user-level to allow switching from one mode to the other, and
-  vice versa.
-
-3. Data Structures Already Available in the Kernel
-==================================================
-
-  The Linux kernel provides the serial_iso7816 structure (see [1]) to handle
-  ISO7816 communications. This data structure is used to set and configure
-  ISO7816 parameters in ioctls.
-
-  Any driver for devices capable of working both as RS232 and ISO7816 should
-  implement the iso7816_config callback in the uart_port structure. The
-  serial_core calls iso7816_config to do the device specific part in response
-  to TIOCGISO7816 and TIOCSISO7816 ioctls (see below). The iso7816_config
-  callback receives a pointer to struct serial_iso7816.
-
-4. Usage from user-level
-========================
-
-  From user-level, ISO7816 configuration can be get/set using the previous
-  ioctls. For instance, to set ISO7816 you can use the following code::
-
-	#include <linux/serial.h>
-
-	/* Include definition for ISO7816 ioctls: TIOCSISO7816 and TIOCGISO7816 */
-	#include <sys/ioctl.h>
-
-	/* Open your specific device (e.g., /dev/mydevice): */
-	int fd = open ("/dev/mydevice", O_RDWR);
-	if (fd < 0) {
-		/* Error handling. See errno. */
-	}
-
-	struct serial_iso7816 iso7816conf;
-
-	/* Reserved fields as to be zeroed */
-	memset(&iso7816conf, 0, sizeof(iso7816conf));
-
-	/* Enable ISO7816 mode: */
-	iso7816conf.flags |= SER_ISO7816_ENABLED;
-
-	/* Select the protocol: */
-	/* T=0 */
-	iso7816conf.flags |= SER_ISO7816_T(0);
-	/* or T=1 */
-	iso7816conf.flags |= SER_ISO7816_T(1);
-
-	/* Set the guard time: */
-	iso7816conf.tg = 2;
-
-	/* Set the clock frequency*/
-	iso7816conf.clk = 3571200;
-
-	/* Set transmission factors: */
-	iso7816conf.sc_fi = 372;
-	iso7816conf.sc_di = 1;
-
-	if (ioctl(fd_usart, TIOCSISO7816, &iso7816conf) < 0) {
-		/* Error handling. See errno. */
-	}
-
-	/* Use read() and write() syscalls here... */
-
-	/* Close the device when finished: */
-	if (close (fd) < 0) {
-		/* Error handling. See errno. */
-	}
-
-5. References
-=============
-
- [1]    include/uapi/linux/serial.h
diff --git a/Documentation/serial/serial-rs485.rst b/Documentation/serial/serial-rs485.rst
deleted file mode 100644
index 6bc824f948f9..000000000000
--- a/Documentation/serial/serial-rs485.rst
+++ /dev/null
@@ -1,103 +0,0 @@
-===========================
-RS485 Serial Communications
-===========================
-
-1. Introduction
-===============
-
-   EIA-485, also known as TIA/EIA-485 or RS-485, is a standard defining the
-   electrical characteristics of drivers and receivers for use in balanced
-   digital multipoint systems.
-   This standard is widely used for communications in industrial automation
-   because it can be used effectively over long distances and in electrically
-   noisy environments.
-
-2. Hardware-related Considerations
-==================================
-
-   Some CPUs/UARTs (e.g., Atmel AT91 or 16C950 UART) contain a built-in
-   half-duplex mode capable of automatically controlling line direction by
-   toggling RTS or DTR signals. That can be used to control external
-   half-duplex hardware like an RS485 transceiver or any RS232-connected
-   half-duplex devices like some modems.
-
-   For these microcontrollers, the Linux driver should be made capable of
-   working in both modes, and proper ioctls (see later) should be made
-   available at user-level to allow switching from one mode to the other, and
-   vice versa.
-
-3. Data Structures Already Available in the Kernel
-==================================================
-
-   The Linux kernel provides the serial_rs485 structure (see [1]) to handle
-   RS485 communications. This data structure is used to set and configure RS485
-   parameters in the platform data and in ioctls.
-
-   The device tree can also provide RS485 boot time parameters (see [2]
-   for bindings). The driver is in charge of filling this data structure from
-   the values given by the device tree.
-
-   Any driver for devices capable of working both as RS232 and RS485 should
-   implement the rs485_config callback in the uart_port structure. The
-   serial_core calls rs485_config to do the device specific part in response
-   to TIOCSRS485 and TIOCGRS485 ioctls (see below). The rs485_config callback
-   receives a pointer to struct serial_rs485.
-
-4. Usage from user-level
-========================
-
-   From user-level, RS485 configuration can be get/set using the previous
-   ioctls. For instance, to set RS485 you can use the following code::
-
-	#include <linux/serial.h>
-
-	/* Include definition for RS485 ioctls: TIOCGRS485 and TIOCSRS485 */
-	#include <sys/ioctl.h>
-
-	/* Open your specific device (e.g., /dev/mydevice): */
-	int fd = open ("/dev/mydevice", O_RDWR);
-	if (fd < 0) {
-		/* Error handling. See errno. */
-	}
-
-	struct serial_rs485 rs485conf;
-
-	/* Enable RS485 mode: */
-	rs485conf.flags |= SER_RS485_ENABLED;
-
-	/* Set logical level for RTS pin equal to 1 when sending: */
-	rs485conf.flags |= SER_RS485_RTS_ON_SEND;
-	/* or, set logical level for RTS pin equal to 0 when sending: */
-	rs485conf.flags &= ~(SER_RS485_RTS_ON_SEND);
-
-	/* Set logical level for RTS pin equal to 1 after sending: */
-	rs485conf.flags |= SER_RS485_RTS_AFTER_SEND;
-	/* or, set logical level for RTS pin equal to 0 after sending: */
-	rs485conf.flags &= ~(SER_RS485_RTS_AFTER_SEND);
-
-	/* Set rts delay before send, if needed: */
-	rs485conf.delay_rts_before_send = ...;
-
-	/* Set rts delay after send, if needed: */
-	rs485conf.delay_rts_after_send = ...;
-
-	/* Set this flag if you want to receive data even while sending data */
-	rs485conf.flags |= SER_RS485_RX_DURING_TX;
-
-	if (ioctl (fd, TIOCSRS485, &rs485conf) < 0) {
-		/* Error handling. See errno. */
-	}
-
-	/* Use read() and write() syscalls here... */
-
-	/* Close the device when finished: */
-	if (close (fd) < 0) {
-		/* Error handling. See errno. */
-	}
-
-5. References
-=============
-
- [1]	include/uapi/linux/serial.h
-
- [2]	Documentation/devicetree/bindings/serial/rs485.txt
diff --git a/Documentation/serial/tty.rst b/Documentation/serial/tty.rst
deleted file mode 100644
index dd972caacf3e..000000000000
--- a/Documentation/serial/tty.rst
+++ /dev/null
@@ -1,328 +0,0 @@
-=================
-The Lockronomicon
-=================
-
-Your guide to the ancient and twisted locking policies of the tty layer and
-the warped logic behind them. Beware all ye who read on.
-
-
-Line Discipline
----------------
-
-Line disciplines are registered with tty_register_ldisc() passing the
-discipline number and the ldisc structure. At the point of registration the
-discipline must be ready to use and it is possible it will get used before
-the call returns success. If the call returns an error then it won't get
-called. Do not re-use ldisc numbers as they are part of the userspace ABI
-and writing over an existing ldisc will cause demons to eat your computer.
-After the return the ldisc data has been copied so you may free your own
-copy of the structure. You must not re-register over the top of the line
-discipline even with the same data or your computer again will be eaten by
-demons.
-
-In order to remove a line discipline call tty_unregister_ldisc().
-In ancient times this always worked. In modern times the function will
-return -EBUSY if the ldisc is currently in use. Since the ldisc referencing
-code manages the module counts this should not usually be a concern.
-
-Heed this warning: the reference count field of the registered copies of the
-tty_ldisc structure in the ldisc table counts the number of lines using this
-discipline. The reference count of the tty_ldisc structure within a tty
-counts the number of active users of the ldisc at this instant. In effect it
-counts the number of threads of execution within an ldisc method (plus those
-about to enter and exit although this detail matters not).
-
-Line Discipline Methods
------------------------
-
-TTY side interfaces
-^^^^^^^^^^^^^^^^^^^
-
-======================= =======================================================
-open()			Called when the line discipline is attached to
-			the terminal. No other call into the line
-			discipline for this tty will occur until it
-			completes successfully. Should initialize any
-			state needed by the ldisc, and set receive_room
-			in the tty_struct to the maximum amount of data
-			the line discipline is willing to accept from the
-			driver with a single call to receive_buf().
-			Returning an error will prevent the ldisc from
-			being attached. Can sleep.
-
-close()			This is called on a terminal when the line
-			discipline is being unplugged. At the point of
-			execution no further users will enter the
-			ldisc code for this tty. Can sleep.
-
-hangup()		Called when the tty line is hung up.
-			The line discipline should cease I/O to the tty.
-			No further calls into the ldisc code will occur.
-			The return value is ignored. Can sleep.
-
-read()			(optional) A process requests reading data from
-			the line. Multiple read calls may occur in parallel
-			and the ldisc must deal with serialization issues.
-			If not defined, the process will receive an EIO
-			error. May sleep.
-
-write()			(optional) A process requests writing data to the
-			line. Multiple write calls are serialized by the
-			tty layer for the ldisc. If not defined, the
-			process will receive an EIO error. May sleep.
-
-flush_buffer()		(optional) May be called at any point between
-			open and close, and instructs the line discipline
-			to empty its input buffer.
-
-set_termios()		(optional) Called on termios structure changes.
-			The caller passes the old termios data and the
-			current data is in the tty. Called under the
-			termios semaphore so allowed to sleep. Serialized
-			against itself only.
-
-poll()			(optional) Check the status for the poll/select
-			calls. Multiple poll calls may occur in parallel.
-			May sleep.
-
-ioctl()			(optional) Called when an ioctl is handed to the
-			tty layer that might be for the ldisc. Multiple
-			ioctl calls may occur in parallel. May sleep.
-
-compat_ioctl()		(optional) Called when a 32 bit ioctl is handed
-			to the tty layer that might be for the ldisc.
-			Multiple ioctl calls may occur in parallel.
-			May sleep.
-======================= =======================================================
-
-Driver Side Interfaces
-^^^^^^^^^^^^^^^^^^^^^^
-
-======================= =======================================================
-receive_buf()		(optional) Called by the low-level driver to hand
-			a buffer of received bytes to the ldisc for
-			processing. The number of bytes is guaranteed not
-			to exceed the current value of tty->receive_room.
-			All bytes must be processed.
-
-receive_buf2()		(optional) Called by the low-level driver to hand
-			a buffer of received bytes to the ldisc for
-			processing. Returns the number of bytes processed.
-
-			If both receive_buf() and receive_buf2() are
-			defined, receive_buf2() should be preferred.
-
-write_wakeup()		May be called at any point between open and close.
-			The TTY_DO_WRITE_WAKEUP flag indicates if a call
-			is needed but always races versus calls. Thus the
-			ldisc must be careful about setting order and to
-			handle unexpected calls. Must not sleep.
-
-			The driver is forbidden from calling this directly
-			from the ->write call from the ldisc as the ldisc
-			is permitted to call the driver write method from
-			this function. In such a situation defer it.
-
-dcd_change()		Report to the tty line the current DCD pin status
-			changes and the relative timestamp. The timestamp
-			cannot be NULL.
-======================= =======================================================
-
-
-Driver Access
-^^^^^^^^^^^^^
-
-Line discipline methods can call the following methods of the underlying
-hardware driver through the function pointers within the tty->driver
-structure:
-
-======================= =======================================================
-write()			Write a block of characters to the tty device.
-			Returns the number of characters accepted. The
-			character buffer passed to this method is already
-			in kernel space.
-
-put_char()		Queues a character for writing to the tty device.
-			If there is no room in the queue, the character is
-			ignored.
-
-flush_chars()		(Optional) If defined, must be called after
-			queueing characters with put_char() in order to
-			start transmission.
-
-write_room()		Returns the numbers of characters the tty driver
-			will accept for queueing to be written.
-
-ioctl()			Invoke device specific ioctl.
-			Expects data pointers to refer to userspace.
-			Returns ENOIOCTLCMD for unrecognized ioctl numbers.
-
-set_termios()		Notify the tty driver that the device's termios
-			settings have changed. New settings are in
-			tty->termios. Previous settings should be passed in
-			the "old" argument.
-
-			The API is defined such that the driver should return
-			the actual modes selected. This means that the
-			driver function is responsible for modifying any
-			bits in the request it cannot fulfill to indicate
-			the actual modes being used. A device with no
-			hardware capability for change (e.g. a USB dongle or
-			virtual port) can provide NULL for this method.
-
-throttle()		Notify the tty driver that input buffers for the
-			line discipline are close to full, and it should
-			somehow signal that no more characters should be
-			sent to the tty.
-
-unthrottle()		Notify the tty driver that characters can now be
-			sent to the tty without fear of overrunning the
-			input buffers of the line disciplines.
-
-stop()			Ask the tty driver to stop outputting characters
-			to the tty device.
-
-start()			Ask the tty driver to resume sending characters
-			to the tty device.
-
-hangup()		Ask the tty driver to hang up the tty device.
-
-break_ctl()		(Optional) Ask the tty driver to turn on or off
-			BREAK status on the RS-232 port.  If state is -1,
-			then the BREAK status should be turned on; if
-			state is 0, then BREAK should be turned off.
-			If this routine is not implemented, use ioctls
-			TIOCSBRK / TIOCCBRK instead.
-
-wait_until_sent()	Waits until the device has written out all of the
-			characters in its transmitter FIFO.
-
-send_xchar()		Send a high-priority XON/XOFF character to the device.
-======================= =======================================================
-
-
-Flags
-^^^^^
-
-Line discipline methods have access to tty->flags field containing the
-following interesting flags:
-
-======================= =======================================================
-TTY_THROTTLED		Driver input is throttled. The ldisc should call
-			tty->driver->unthrottle() in order to resume
-			reception when it is ready to process more data.
-
-TTY_DO_WRITE_WAKEUP	If set, causes the driver to call the ldisc's
-			write_wakeup() method in order to resume
-			transmission when it can accept more data
-			to transmit.
-
-TTY_IO_ERROR		If set, causes all subsequent userspace read/write
-			calls on the tty to fail, returning -EIO.
-
-TTY_OTHER_CLOSED	Device is a pty and the other side has closed.
-
-TTY_NO_WRITE_SPLIT	Prevent driver from splitting up writes into
-			smaller chunks.
-======================= =======================================================
-
-
-Locking
-^^^^^^^
-
-Callers to the line discipline functions from the tty layer are required to
-take line discipline locks. The same is true of calls from the driver side
-but not yet enforced.
-
-Three calls are now provided::
-
-	ldisc = tty_ldisc_ref(tty);
-
-takes a handle to the line discipline in the tty and returns it. If no ldisc
-is currently attached or the ldisc is being closed and re-opened at this
-point then NULL is returned. While this handle is held the ldisc will not
-change or go away::
-
-	tty_ldisc_deref(ldisc)
-
-Returns the ldisc reference and allows the ldisc to be closed. Returning the
-reference takes away your right to call the ldisc functions until you take
-a new reference::
-
-	ldisc = tty_ldisc_ref_wait(tty);
-
-Performs the same function as tty_ldisc_ref except that it will wait for an
-ldisc change to complete and then return a reference to the new ldisc.
-
-While these functions are slightly slower than the old code they should have
-minimal impact as most receive logic uses the flip buffers and they only
-need to take a reference when they push bits up through the driver.
-
-A caution: The ldisc->open(), ldisc->close() and driver->set_ldisc
-functions are called with the ldisc unavailable. Thus tty_ldisc_ref will
-fail in this situation if used within these functions. Ldisc and driver
-code calling its own functions must be careful in this case.
-
-
-Driver Interface
-----------------
-
-======================= =======================================================
-open()			Called when a device is opened. May sleep
-
-close()			Called when a device is closed. At the point of
-			return from this call the driver must make no
-			further ldisc calls of any kind. May sleep
-
-write()			Called to write bytes to the device. May not
-			sleep. May occur in parallel in special cases.
-			Because this includes panic paths drivers generally
-			shouldn't try and do clever locking here.
-
-put_char()		Stuff a single character onto the queue. The
-			driver is guaranteed following up calls to
-			flush_chars.
-
-flush_chars()		Ask the kernel to write put_char queue
-
-write_room()		Return the number of characters that can be stuffed
-			into the port buffers without overflow (or less).
-			The ldisc is responsible for being intelligent
-			about multi-threading of write_room/write calls
-
-ioctl()			Called when an ioctl may be for the driver
-
-set_termios()		Called on termios change, serialized against
-			itself by a semaphore. May sleep.
-
-set_ldisc()		Notifier for discipline change. At the point this
-			is done the discipline is not yet usable. Can now
-			sleep (I think)
-
-throttle()		Called by the ldisc to ask the driver to do flow
-			control.  Serialization including with unthrottle
-			is the job of the ldisc layer.
-
-unthrottle()		Called by the ldisc to ask the driver to stop flow
-			control.
-
-stop()			Ldisc notifier to the driver to stop output. As with
-			throttle the serializations with start() are down
-			to the ldisc layer.
-
-start()			Ldisc notifier to the driver to start output.
-
-hangup()		Ask the tty driver to cause a hangup initiated
-			from the host side. [Can sleep ??]
-
-break_ctl()		Send RS232 break. Can sleep. Can get called in
-			parallel, driver must serialize (for now), and
-			with write calls.
-
-wait_until_sent()	Wait for characters to exit the hardware queue
-			of the driver. Can sleep
-
-send_xchar()	  	Send XON/XOFF and if possible jump the queue with
-			it in order to get fast flow control responses.
-			Cannot sleep ??
-======================= =======================================================
-- 
cgit 


From 4745dc8abb0a0a9851c07265eea01d844886d5c8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 27 Jun 2019 16:36:04 -0300
Subject: docs: phy: place documentation under driver-api

This subsystem-specific documentation belongs to the
driver-api.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../devicetree/bindings/phy/phy-bindings.txt       |   2 +-
 .../devicetree/bindings/phy/phy-pxa-usb.txt        |   2 +-
 Documentation/driver-api/index.rst                 |   1 +
 Documentation/driver-api/phy/index.rst             |  16 ++
 Documentation/driver-api/phy/phy.rst               | 197 +++++++++++++++++++++
 Documentation/driver-api/phy/samsung-usb2.rst      | 137 ++++++++++++++
 Documentation/index.rst                            |   1 -
 Documentation/phy.txt                              | 197 ---------------------
 Documentation/phy/samsung-usb2.rst                 | 137 --------------
 9 files changed, 353 insertions(+), 337 deletions(-)
 create mode 100644 Documentation/driver-api/phy/index.rst
 create mode 100644 Documentation/driver-api/phy/phy.rst
 create mode 100644 Documentation/driver-api/phy/samsung-usb2.rst
 delete mode 100644 Documentation/phy.txt
 delete mode 100644 Documentation/phy/samsung-usb2.rst

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/phy/phy-bindings.txt b/Documentation/devicetree/bindings/phy/phy-bindings.txt
index a403b81d0679..c4eb38902533 100644
--- a/Documentation/devicetree/bindings/phy/phy-bindings.txt
+++ b/Documentation/devicetree/bindings/phy/phy-bindings.txt
@@ -1,5 +1,5 @@
 This document explains only the device tree data binding. For general
-information about PHY subsystem refer to Documentation/phy.txt
+information about PHY subsystem refer to Documentation/driver-api/phy/phy.rst
 
 PHY device node
 ===============
diff --git a/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt b/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt
index 93fc09c12954..d80e36a77ec5 100644
--- a/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt
+++ b/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt
@@ -15,4 +15,4 @@ Example:
 	};
 
 This document explains the device tree binding. For general
-information about PHY subsystem refer to Documentation/phy.txt
+information about PHY subsystem refer to Documentation/driver-api/phy/phy.rst
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index cf39b8f9d0f9..eff22db0ed14 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -85,6 +85,7 @@ available subsections can be seen below.
    parport-lowlevel
    pps
    ptp
+   phy/index
    pti_intel_mid
    pwm
    rfkill
diff --git a/Documentation/driver-api/phy/index.rst b/Documentation/driver-api/phy/index.rst
new file mode 100644
index 000000000000..fce9ffae2812
--- /dev/null
+++ b/Documentation/driver-api/phy/index.rst
@@ -0,0 +1,16 @@
+=====================
+Generic PHY Framework
+=====================
+
+.. toctree::
+
+   phy
+   samsung-usb2
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
+
diff --git a/Documentation/driver-api/phy/phy.rst b/Documentation/driver-api/phy/phy.rst
new file mode 100644
index 000000000000..457c3e0f86d6
--- /dev/null
+++ b/Documentation/driver-api/phy/phy.rst
@@ -0,0 +1,197 @@
+=============
+PHY subsystem
+=============
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
+
+This document explains the Generic PHY Framework along with the APIs provided,
+and how-to-use.
+
+Introduction
+============
+
+*PHY* is the abbreviation for physical layer. It is used to connect a device
+to the physical medium e.g., the USB controller has a PHY to provide functions
+such as serialization, de-serialization, encoding, decoding and is responsible
+for obtaining the required data transmission rate. Note that some USB
+controllers have PHY functionality embedded into it and others use an external
+PHY. Other peripherals that use PHY include Wireless LAN, Ethernet,
+SATA etc.
+
+The intention of creating this framework is to bring the PHY drivers spread
+all over the Linux kernel to drivers/phy to increase code re-use and for
+better code maintainability.
+
+This framework will be of use only to devices that use external PHY (PHY
+functionality is not embedded within the controller).
+
+Registering/Unregistering the PHY provider
+==========================================
+
+PHY provider refers to an entity that implements one or more PHY instances.
+For the simple case where the PHY provider implements only a single instance of
+the PHY, the framework provides its own implementation of of_xlate in
+of_phy_simple_xlate. If the PHY provider implements multiple instances, it
+should provide its own implementation of of_xlate. of_xlate is used only for
+dt boot case.
+
+::
+
+	#define of_phy_provider_register(dev, xlate)    \
+		__of_phy_provider_register((dev), NULL, THIS_MODULE, (xlate))
+
+	#define devm_of_phy_provider_register(dev, xlate)       \
+		__devm_of_phy_provider_register((dev), NULL, THIS_MODULE,
+						(xlate))
+
+of_phy_provider_register and devm_of_phy_provider_register macros can be used to
+register the phy_provider and it takes device and of_xlate as
+arguments. For the dt boot case, all PHY providers should use one of the above
+2 macros to register the PHY provider.
+
+Often the device tree nodes associated with a PHY provider will contain a set
+of children that each represent a single PHY. Some bindings may nest the child
+nodes within extra levels for context and extensibility, in which case the low
+level of_phy_provider_register_full() and devm_of_phy_provider_register_full()
+macros can be used to override the node containing the children.
+
+::
+
+	#define of_phy_provider_register_full(dev, children, xlate) \
+		__of_phy_provider_register(dev, children, THIS_MODULE, xlate)
+
+	#define devm_of_phy_provider_register_full(dev, children, xlate) \
+		__devm_of_phy_provider_register_full(dev, children,
+						     THIS_MODULE, xlate)
+
+	void devm_of_phy_provider_unregister(struct device *dev,
+		struct phy_provider *phy_provider);
+	void of_phy_provider_unregister(struct phy_provider *phy_provider);
+
+devm_of_phy_provider_unregister and of_phy_provider_unregister can be used to
+unregister the PHY.
+
+Creating the PHY
+================
+
+The PHY driver should create the PHY in order for other peripheral controllers
+to make use of it. The PHY framework provides 2 APIs to create the PHY.
+
+::
+
+	struct phy *phy_create(struct device *dev, struct device_node *node,
+			       const struct phy_ops *ops);
+	struct phy *devm_phy_create(struct device *dev,
+				    struct device_node *node,
+				    const struct phy_ops *ops);
+
+The PHY drivers can use one of the above 2 APIs to create the PHY by passing
+the device pointer and phy ops.
+phy_ops is a set of function pointers for performing PHY operations such as
+init, exit, power_on and power_off.
+
+Inorder to dereference the private data (in phy_ops), the phy provider driver
+can use phy_set_drvdata() after creating the PHY and use phy_get_drvdata() in
+phy_ops to get back the private data.
+
+4. Getting a reference to the PHY
+
+Before the controller can make use of the PHY, it has to get a reference to
+it. This framework provides the following APIs to get a reference to the PHY.
+
+::
+
+	struct phy *phy_get(struct device *dev, const char *string);
+	struct phy *phy_optional_get(struct device *dev, const char *string);
+	struct phy *devm_phy_get(struct device *dev, const char *string);
+	struct phy *devm_phy_optional_get(struct device *dev,
+					  const char *string);
+	struct phy *devm_of_phy_get_by_index(struct device *dev,
+					     struct device_node *np,
+					     int index);
+
+phy_get, phy_optional_get, devm_phy_get and devm_phy_optional_get can
+be used to get the PHY. In the case of dt boot, the string arguments
+should contain the phy name as given in the dt data and in the case of
+non-dt boot, it should contain the label of the PHY.  The two
+devm_phy_get associates the device with the PHY using devres on
+successful PHY get. On driver detach, release function is invoked on
+the devres data and devres data is freed. phy_optional_get and
+devm_phy_optional_get should be used when the phy is optional. These
+two functions will never return -ENODEV, but instead returns NULL when
+the phy cannot be found.Some generic drivers, such as ehci, may use multiple
+phys and for such drivers referencing phy(s) by name(s) does not make sense. In
+this case, devm_of_phy_get_by_index can be used to get a phy reference based on
+the index.
+
+It should be noted that NULL is a valid phy reference. All phy
+consumer calls on the NULL phy become NOPs. That is the release calls,
+the phy_init() and phy_exit() calls, and phy_power_on() and
+phy_power_off() calls are all NOP when applied to a NULL phy. The NULL
+phy is useful in devices for handling optional phy devices.
+
+Releasing a reference to the PHY
+================================
+
+When the controller no longer needs the PHY, it has to release the reference
+to the PHY it has obtained using the APIs mentioned in the above section. The
+PHY framework provides 2 APIs to release a reference to the PHY.
+
+::
+
+	void phy_put(struct phy *phy);
+	void devm_phy_put(struct device *dev, struct phy *phy);
+
+Both these APIs are used to release a reference to the PHY and devm_phy_put
+destroys the devres associated with this PHY.
+
+Destroying the PHY
+==================
+
+When the driver that created the PHY is unloaded, it should destroy the PHY it
+created using one of the following 2 APIs::
+
+	void phy_destroy(struct phy *phy);
+	void devm_phy_destroy(struct device *dev, struct phy *phy);
+
+Both these APIs destroy the PHY and devm_phy_destroy destroys the devres
+associated with this PHY.
+
+PM Runtime
+==========
+
+This subsystem is pm runtime enabled. So while creating the PHY,
+pm_runtime_enable of the phy device created by this subsystem is called and
+while destroying the PHY, pm_runtime_disable is called. Note that the phy
+device created by this subsystem will be a child of the device that calls
+phy_create (PHY provider device).
+
+So pm_runtime_get_sync of the phy_device created by this subsystem will invoke
+pm_runtime_get_sync of PHY provider device because of parent-child relationship.
+It should also be noted that phy_power_on and phy_power_off performs
+phy_pm_runtime_get_sync and phy_pm_runtime_put respectively.
+There are exported APIs like phy_pm_runtime_get, phy_pm_runtime_get_sync,
+phy_pm_runtime_put, phy_pm_runtime_put_sync, phy_pm_runtime_allow and
+phy_pm_runtime_forbid for performing PM operations.
+
+PHY Mappings
+============
+
+In order to get reference to a PHY without help from DeviceTree, the framework
+offers lookups which can be compared to clkdev that allow clk structures to be
+bound to devices. A lookup can be made be made during runtime when a handle to
+the struct phy already exists.
+
+The framework offers the following API for registering and unregistering the
+lookups::
+
+	int phy_create_lookup(struct phy *phy, const char *con_id,
+			      const char *dev_id);
+	void phy_remove_lookup(struct phy *phy, const char *con_id,
+			       const char *dev_id);
+
+DeviceTree Binding
+==================
+
+The documentation for PHY dt binding can be found @
+Documentation/devicetree/bindings/phy/phy-bindings.txt
diff --git a/Documentation/driver-api/phy/samsung-usb2.rst b/Documentation/driver-api/phy/samsung-usb2.rst
new file mode 100644
index 000000000000..c48c8b9797b9
--- /dev/null
+++ b/Documentation/driver-api/phy/samsung-usb2.rst
@@ -0,0 +1,137 @@
+====================================
+Samsung USB 2.0 PHY adaptation layer
+====================================
+
+1. Description
+--------------
+
+The architecture of the USB 2.0 PHY module in Samsung SoCs is similar
+among many SoCs. In spite of the similarities it proved difficult to
+create a one driver that would fit all these PHY controllers. Often
+the differences were minor and were found in particular bits of the
+registers of the PHY. In some rare cases the order of register writes or
+the PHY powering up process had to be altered. This adaptation layer is
+a compromise between having separate drivers and having a single driver
+with added support for many special cases.
+
+2. Files description
+--------------------
+
+- phy-samsung-usb2.c
+   This is the main file of the adaptation layer. This file contains
+   the probe function and provides two callbacks to the Generic PHY
+   Framework. This two callbacks are used to power on and power off the
+   phy. They carry out the common work that has to be done on all version
+   of the PHY module. Depending on which SoC was chosen they execute SoC
+   specific callbacks. The specific SoC version is selected by choosing
+   the appropriate compatible string. In addition, this file contains
+   struct of_device_id definitions for particular SoCs.
+
+- phy-samsung-usb2.h
+   This is the include file. It declares the structures used by this
+   driver. In addition it should contain extern declarations for
+   structures that describe particular SoCs.
+
+3. Supporting SoCs
+------------------
+
+To support a new SoC a new file should be added to the drivers/phy
+directory. Each SoC's configuration is stored in an instance of the
+struct samsung_usb2_phy_config::
+
+  struct samsung_usb2_phy_config {
+	const struct samsung_usb2_common_phy *phys;
+	int (*rate_to_clk)(unsigned long, u32 *);
+	unsigned int num_phys;
+	bool has_mode_switch;
+  };
+
+The num_phys is the number of phys handled by the driver. `*phys` is an
+array that contains the configuration for each phy. The has_mode_switch
+property is a boolean flag that determines whether the SoC has USB host
+and device on a single pair of pins. If so, a special register has to
+be modified to change the internal routing of these pins between a USB
+device or host module.
+
+For example the configuration for Exynos 4210 is following::
+
+  const struct samsung_usb2_phy_config exynos4210_usb2_phy_config = {
+	.has_mode_switch        = 0,
+	.num_phys		= EXYNOS4210_NUM_PHYS,
+	.phys			= exynos4210_phys,
+	.rate_to_clk		= exynos4210_rate_to_clk,
+  }
+
+- `int (*rate_to_clk)(unsigned long, u32 *)`
+
+	The rate_to_clk callback is to convert the rate of the clock
+	used as the reference clock for the PHY module to the value
+	that should be written in the hardware register.
+
+The exynos4210_phys configuration array is as follows::
+
+  static const struct samsung_usb2_common_phy exynos4210_phys[] = {
+	{
+		.label		= "device",
+		.id		= EXYNOS4210_DEVICE,
+		.power_on	= exynos4210_power_on,
+		.power_off	= exynos4210_power_off,
+	},
+	{
+		.label		= "host",
+		.id		= EXYNOS4210_HOST,
+		.power_on	= exynos4210_power_on,
+		.power_off	= exynos4210_power_off,
+	},
+	{
+		.label		= "hsic0",
+		.id		= EXYNOS4210_HSIC0,
+		.power_on	= exynos4210_power_on,
+		.power_off	= exynos4210_power_off,
+	},
+	{
+		.label		= "hsic1",
+		.id		= EXYNOS4210_HSIC1,
+		.power_on	= exynos4210_power_on,
+		.power_off	= exynos4210_power_off,
+	},
+	{},
+  };
+
+- `int (*power_on)(struct samsung_usb2_phy_instance *);`
+  `int (*power_off)(struct samsung_usb2_phy_instance *);`
+
+	These two callbacks are used to power on and power off the phy
+	by modifying appropriate registers.
+
+Final change to the driver is adding appropriate compatible value to the
+phy-samsung-usb2.c file. In case of Exynos 4210 the following lines were
+added to the struct of_device_id samsung_usb2_phy_of_match[] array::
+
+  #ifdef CONFIG_PHY_EXYNOS4210_USB2
+	{
+		.compatible = "samsung,exynos4210-usb2-phy",
+		.data = &exynos4210_usb2_phy_config,
+	},
+  #endif
+
+To add further flexibility to the driver the Kconfig file enables to
+include support for selected SoCs in the compiled driver. The Kconfig
+entry for Exynos 4210 is following::
+
+  config PHY_EXYNOS4210_USB2
+	bool "Support for Exynos 4210"
+	depends on PHY_SAMSUNG_USB2
+	depends on CPU_EXYNOS4210
+	help
+	  Enable USB PHY support for Exynos 4210. This option requires that
+	  Samsung USB 2.0 PHY driver is enabled and means that support for this
+	  particular SoC is compiled in the driver. In case of Exynos 4210 four
+	  phys are available - device, host, HSCI0 and HSCI1.
+
+The newly created file that supports the new SoC has to be also added to the
+Makefile. In case of Exynos 4210 the added line is following::
+
+  obj-$(CONFIG_PHY_EXYNOS4210_USB2)       += phy-exynos4210-usb2.o
+
+After completing these steps the support for the new SoC should be ready.
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 041ffe442960..dbfec00ba535 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -111,7 +111,6 @@ needed).
    usb/index
    misc-devices/index
    mic/index
-   phy/samsung-usb2
    scheduler/index
 
 Architecture-specific documentation
diff --git a/Documentation/phy.txt b/Documentation/phy.txt
deleted file mode 100644
index 457c3e0f86d6..000000000000
--- a/Documentation/phy.txt
+++ /dev/null
@@ -1,197 +0,0 @@
-=============
-PHY subsystem
-=============
-
-:Author: Kishon Vijay Abraham I <kishon@ti.com>
-
-This document explains the Generic PHY Framework along with the APIs provided,
-and how-to-use.
-
-Introduction
-============
-
-*PHY* is the abbreviation for physical layer. It is used to connect a device
-to the physical medium e.g., the USB controller has a PHY to provide functions
-such as serialization, de-serialization, encoding, decoding and is responsible
-for obtaining the required data transmission rate. Note that some USB
-controllers have PHY functionality embedded into it and others use an external
-PHY. Other peripherals that use PHY include Wireless LAN, Ethernet,
-SATA etc.
-
-The intention of creating this framework is to bring the PHY drivers spread
-all over the Linux kernel to drivers/phy to increase code re-use and for
-better code maintainability.
-
-This framework will be of use only to devices that use external PHY (PHY
-functionality is not embedded within the controller).
-
-Registering/Unregistering the PHY provider
-==========================================
-
-PHY provider refers to an entity that implements one or more PHY instances.
-For the simple case where the PHY provider implements only a single instance of
-the PHY, the framework provides its own implementation of of_xlate in
-of_phy_simple_xlate. If the PHY provider implements multiple instances, it
-should provide its own implementation of of_xlate. of_xlate is used only for
-dt boot case.
-
-::
-
-	#define of_phy_provider_register(dev, xlate)    \
-		__of_phy_provider_register((dev), NULL, THIS_MODULE, (xlate))
-
-	#define devm_of_phy_provider_register(dev, xlate)       \
-		__devm_of_phy_provider_register((dev), NULL, THIS_MODULE,
-						(xlate))
-
-of_phy_provider_register and devm_of_phy_provider_register macros can be used to
-register the phy_provider and it takes device and of_xlate as
-arguments. For the dt boot case, all PHY providers should use one of the above
-2 macros to register the PHY provider.
-
-Often the device tree nodes associated with a PHY provider will contain a set
-of children that each represent a single PHY. Some bindings may nest the child
-nodes within extra levels for context and extensibility, in which case the low
-level of_phy_provider_register_full() and devm_of_phy_provider_register_full()
-macros can be used to override the node containing the children.
-
-::
-
-	#define of_phy_provider_register_full(dev, children, xlate) \
-		__of_phy_provider_register(dev, children, THIS_MODULE, xlate)
-
-	#define devm_of_phy_provider_register_full(dev, children, xlate) \
-		__devm_of_phy_provider_register_full(dev, children,
-						     THIS_MODULE, xlate)
-
-	void devm_of_phy_provider_unregister(struct device *dev,
-		struct phy_provider *phy_provider);
-	void of_phy_provider_unregister(struct phy_provider *phy_provider);
-
-devm_of_phy_provider_unregister and of_phy_provider_unregister can be used to
-unregister the PHY.
-
-Creating the PHY
-================
-
-The PHY driver should create the PHY in order for other peripheral controllers
-to make use of it. The PHY framework provides 2 APIs to create the PHY.
-
-::
-
-	struct phy *phy_create(struct device *dev, struct device_node *node,
-			       const struct phy_ops *ops);
-	struct phy *devm_phy_create(struct device *dev,
-				    struct device_node *node,
-				    const struct phy_ops *ops);
-
-The PHY drivers can use one of the above 2 APIs to create the PHY by passing
-the device pointer and phy ops.
-phy_ops is a set of function pointers for performing PHY operations such as
-init, exit, power_on and power_off.
-
-Inorder to dereference the private data (in phy_ops), the phy provider driver
-can use phy_set_drvdata() after creating the PHY and use phy_get_drvdata() in
-phy_ops to get back the private data.
-
-4. Getting a reference to the PHY
-
-Before the controller can make use of the PHY, it has to get a reference to
-it. This framework provides the following APIs to get a reference to the PHY.
-
-::
-
-	struct phy *phy_get(struct device *dev, const char *string);
-	struct phy *phy_optional_get(struct device *dev, const char *string);
-	struct phy *devm_phy_get(struct device *dev, const char *string);
-	struct phy *devm_phy_optional_get(struct device *dev,
-					  const char *string);
-	struct phy *devm_of_phy_get_by_index(struct device *dev,
-					     struct device_node *np,
-					     int index);
-
-phy_get, phy_optional_get, devm_phy_get and devm_phy_optional_get can
-be used to get the PHY. In the case of dt boot, the string arguments
-should contain the phy name as given in the dt data and in the case of
-non-dt boot, it should contain the label of the PHY.  The two
-devm_phy_get associates the device with the PHY using devres on
-successful PHY get. On driver detach, release function is invoked on
-the devres data and devres data is freed. phy_optional_get and
-devm_phy_optional_get should be used when the phy is optional. These
-two functions will never return -ENODEV, but instead returns NULL when
-the phy cannot be found.Some generic drivers, such as ehci, may use multiple
-phys and for such drivers referencing phy(s) by name(s) does not make sense. In
-this case, devm_of_phy_get_by_index can be used to get a phy reference based on
-the index.
-
-It should be noted that NULL is a valid phy reference. All phy
-consumer calls on the NULL phy become NOPs. That is the release calls,
-the phy_init() and phy_exit() calls, and phy_power_on() and
-phy_power_off() calls are all NOP when applied to a NULL phy. The NULL
-phy is useful in devices for handling optional phy devices.
-
-Releasing a reference to the PHY
-================================
-
-When the controller no longer needs the PHY, it has to release the reference
-to the PHY it has obtained using the APIs mentioned in the above section. The
-PHY framework provides 2 APIs to release a reference to the PHY.
-
-::
-
-	void phy_put(struct phy *phy);
-	void devm_phy_put(struct device *dev, struct phy *phy);
-
-Both these APIs are used to release a reference to the PHY and devm_phy_put
-destroys the devres associated with this PHY.
-
-Destroying the PHY
-==================
-
-When the driver that created the PHY is unloaded, it should destroy the PHY it
-created using one of the following 2 APIs::
-
-	void phy_destroy(struct phy *phy);
-	void devm_phy_destroy(struct device *dev, struct phy *phy);
-
-Both these APIs destroy the PHY and devm_phy_destroy destroys the devres
-associated with this PHY.
-
-PM Runtime
-==========
-
-This subsystem is pm runtime enabled. So while creating the PHY,
-pm_runtime_enable of the phy device created by this subsystem is called and
-while destroying the PHY, pm_runtime_disable is called. Note that the phy
-device created by this subsystem will be a child of the device that calls
-phy_create (PHY provider device).
-
-So pm_runtime_get_sync of the phy_device created by this subsystem will invoke
-pm_runtime_get_sync of PHY provider device because of parent-child relationship.
-It should also be noted that phy_power_on and phy_power_off performs
-phy_pm_runtime_get_sync and phy_pm_runtime_put respectively.
-There are exported APIs like phy_pm_runtime_get, phy_pm_runtime_get_sync,
-phy_pm_runtime_put, phy_pm_runtime_put_sync, phy_pm_runtime_allow and
-phy_pm_runtime_forbid for performing PM operations.
-
-PHY Mappings
-============
-
-In order to get reference to a PHY without help from DeviceTree, the framework
-offers lookups which can be compared to clkdev that allow clk structures to be
-bound to devices. A lookup can be made be made during runtime when a handle to
-the struct phy already exists.
-
-The framework offers the following API for registering and unregistering the
-lookups::
-
-	int phy_create_lookup(struct phy *phy, const char *con_id,
-			      const char *dev_id);
-	void phy_remove_lookup(struct phy *phy, const char *con_id,
-			       const char *dev_id);
-
-DeviceTree Binding
-==================
-
-The documentation for PHY dt binding can be found @
-Documentation/devicetree/bindings/phy/phy-bindings.txt
diff --git a/Documentation/phy/samsung-usb2.rst b/Documentation/phy/samsung-usb2.rst
deleted file mode 100644
index c48c8b9797b9..000000000000
--- a/Documentation/phy/samsung-usb2.rst
+++ /dev/null
@@ -1,137 +0,0 @@
-====================================
-Samsung USB 2.0 PHY adaptation layer
-====================================
-
-1. Description
---------------
-
-The architecture of the USB 2.0 PHY module in Samsung SoCs is similar
-among many SoCs. In spite of the similarities it proved difficult to
-create a one driver that would fit all these PHY controllers. Often
-the differences were minor and were found in particular bits of the
-registers of the PHY. In some rare cases the order of register writes or
-the PHY powering up process had to be altered. This adaptation layer is
-a compromise between having separate drivers and having a single driver
-with added support for many special cases.
-
-2. Files description
---------------------
-
-- phy-samsung-usb2.c
-   This is the main file of the adaptation layer. This file contains
-   the probe function and provides two callbacks to the Generic PHY
-   Framework. This two callbacks are used to power on and power off the
-   phy. They carry out the common work that has to be done on all version
-   of the PHY module. Depending on which SoC was chosen they execute SoC
-   specific callbacks. The specific SoC version is selected by choosing
-   the appropriate compatible string. In addition, this file contains
-   struct of_device_id definitions for particular SoCs.
-
-- phy-samsung-usb2.h
-   This is the include file. It declares the structures used by this
-   driver. In addition it should contain extern declarations for
-   structures that describe particular SoCs.
-
-3. Supporting SoCs
-------------------
-
-To support a new SoC a new file should be added to the drivers/phy
-directory. Each SoC's configuration is stored in an instance of the
-struct samsung_usb2_phy_config::
-
-  struct samsung_usb2_phy_config {
-	const struct samsung_usb2_common_phy *phys;
-	int (*rate_to_clk)(unsigned long, u32 *);
-	unsigned int num_phys;
-	bool has_mode_switch;
-  };
-
-The num_phys is the number of phys handled by the driver. `*phys` is an
-array that contains the configuration for each phy. The has_mode_switch
-property is a boolean flag that determines whether the SoC has USB host
-and device on a single pair of pins. If so, a special register has to
-be modified to change the internal routing of these pins between a USB
-device or host module.
-
-For example the configuration for Exynos 4210 is following::
-
-  const struct samsung_usb2_phy_config exynos4210_usb2_phy_config = {
-	.has_mode_switch        = 0,
-	.num_phys		= EXYNOS4210_NUM_PHYS,
-	.phys			= exynos4210_phys,
-	.rate_to_clk		= exynos4210_rate_to_clk,
-  }
-
-- `int (*rate_to_clk)(unsigned long, u32 *)`
-
-	The rate_to_clk callback is to convert the rate of the clock
-	used as the reference clock for the PHY module to the value
-	that should be written in the hardware register.
-
-The exynos4210_phys configuration array is as follows::
-
-  static const struct samsung_usb2_common_phy exynos4210_phys[] = {
-	{
-		.label		= "device",
-		.id		= EXYNOS4210_DEVICE,
-		.power_on	= exynos4210_power_on,
-		.power_off	= exynos4210_power_off,
-	},
-	{
-		.label		= "host",
-		.id		= EXYNOS4210_HOST,
-		.power_on	= exynos4210_power_on,
-		.power_off	= exynos4210_power_off,
-	},
-	{
-		.label		= "hsic0",
-		.id		= EXYNOS4210_HSIC0,
-		.power_on	= exynos4210_power_on,
-		.power_off	= exynos4210_power_off,
-	},
-	{
-		.label		= "hsic1",
-		.id		= EXYNOS4210_HSIC1,
-		.power_on	= exynos4210_power_on,
-		.power_off	= exynos4210_power_off,
-	},
-	{},
-  };
-
-- `int (*power_on)(struct samsung_usb2_phy_instance *);`
-  `int (*power_off)(struct samsung_usb2_phy_instance *);`
-
-	These two callbacks are used to power on and power off the phy
-	by modifying appropriate registers.
-
-Final change to the driver is adding appropriate compatible value to the
-phy-samsung-usb2.c file. In case of Exynos 4210 the following lines were
-added to the struct of_device_id samsung_usb2_phy_of_match[] array::
-
-  #ifdef CONFIG_PHY_EXYNOS4210_USB2
-	{
-		.compatible = "samsung,exynos4210-usb2-phy",
-		.data = &exynos4210_usb2_phy_config,
-	},
-  #endif
-
-To add further flexibility to the driver the Kconfig file enables to
-include support for selected SoCs in the compiled driver. The Kconfig
-entry for Exynos 4210 is following::
-
-  config PHY_EXYNOS4210_USB2
-	bool "Support for Exynos 4210"
-	depends on PHY_SAMSUNG_USB2
-	depends on CPU_EXYNOS4210
-	help
-	  Enable USB PHY support for Exynos 4210. This option requires that
-	  Samsung USB 2.0 PHY driver is enabled and means that support for this
-	  particular SoC is compiled in the driver. In case of Exynos 4210 four
-	  phys are available - device, host, HSCI0 and HSCI1.
-
-The newly created file that supports the new SoC has to be also added to the
-Makefile. In case of Exynos 4210 the added line is following::
-
-  obj-$(CONFIG_PHY_EXYNOS4210_USB2)       += phy-exynos4210-usb2.o
-
-After completing these steps the support for the new SoC should be ready.
-- 
cgit 


From 652a49bc68ce3cf0355bde357b3998bd63e73915 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 18 Jun 2019 15:03:13 -0300
Subject: docs: add a memory-devices subdir to driver-api

There are two docs describing memory device drivers.

Add both to this new chapter of the driver-api.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/bus-devices/ti-gpmc.rst              | 179 ---------------------
 Documentation/driver-api/index.rst                 |   1 +
 Documentation/driver-api/memory-devices/index.rst  |  16 ++
 .../driver-api/memory-devices/ti-emif.rst          |  64 ++++++++
 .../driver-api/memory-devices/ti-gpmc.rst          | 179 +++++++++++++++++++++
 Documentation/memory-devices/ti-emif.rst           |  64 --------
 6 files changed, 260 insertions(+), 243 deletions(-)
 delete mode 100644 Documentation/bus-devices/ti-gpmc.rst
 create mode 100644 Documentation/driver-api/memory-devices/index.rst
 create mode 100644 Documentation/driver-api/memory-devices/ti-emif.rst
 create mode 100644 Documentation/driver-api/memory-devices/ti-gpmc.rst
 delete mode 100644 Documentation/memory-devices/ti-emif.rst

(limited to 'Documentation')

diff --git a/Documentation/bus-devices/ti-gpmc.rst b/Documentation/bus-devices/ti-gpmc.rst
deleted file mode 100644
index 87c366e418be..000000000000
--- a/Documentation/bus-devices/ti-gpmc.rst
+++ /dev/null
@@ -1,179 +0,0 @@
-:orphan:
-
-========================================
-GPMC (General Purpose Memory Controller)
-========================================
-
-GPMC is an unified memory controller dedicated to interfacing external
-memory devices like
-
- * Asynchronous SRAM like memories and application specific integrated
-   circuit devices.
- * Asynchronous, synchronous, and page mode burst NOR flash devices
-   NAND flash
- * Pseudo-SRAM devices
-
-GPMC is found on Texas Instruments SoC's (OMAP based)
-IP details: http://www.ti.com/lit/pdf/spruh73 section 7.1
-
-
-GPMC generic timing calculation:
-================================
-
-GPMC has certain timings that has to be programmed for proper
-functioning of the peripheral, while peripheral has another set of
-timings. To have peripheral work with gpmc, peripheral timings has to
-be translated to the form gpmc can understand. The way it has to be
-translated depends on the connected peripheral. Also there is a
-dependency for certain gpmc timings on gpmc clock frequency. Hence a
-generic timing routine was developed to achieve above requirements.
-
-Generic routine provides a generic method to calculate gpmc timings
-from gpmc peripheral timings. struct gpmc_device_timings fields has to
-be updated with timings from the datasheet of the peripheral that is
-connected to gpmc. A few of the peripheral timings can be fed either
-in time or in cycles, provision to handle this scenario has been
-provided (refer struct gpmc_device_timings definition). It may so
-happen that timing as specified by peripheral datasheet is not present
-in timing structure, in this scenario, try to correlate peripheral
-timing to the one available. If that doesn't work, try to add a new
-field as required by peripheral, educate generic timing routine to
-handle it, make sure that it does not break any of the existing.
-Then there may be cases where peripheral datasheet doesn't mention
-certain fields of struct gpmc_device_timings, zero those entries.
-
-Generic timing routine has been verified to work properly on
-multiple onenand's and tusb6010 peripherals.
-
-A word of caution: generic timing routine has been developed based
-on understanding of gpmc timings, peripheral timings, available
-custom timing routines, a kind of reverse engineering without
-most of the datasheets & hardware (to be exact none of those supported
-in mainline having custom timing routine) and by simulation.
-
-gpmc timing dependency on peripheral timings:
-
-[<gpmc_timing>: <peripheral timing1>, <peripheral timing2> ...]
-
-1. common
-
-cs_on:
-	t_ceasu
-adv_on:
-	t_avdasu, t_ceavd
-
-2. sync common
-
-sync_clk:
-	clk
-page_burst_access:
-	t_bacc
-clk_activation:
-	t_ces, t_avds
-
-3. read async muxed
-
-adv_rd_off:
-	t_avdp_r
-oe_on:
-	t_oeasu, t_aavdh
-access:
-	t_iaa, t_oe, t_ce, t_aa
-rd_cycle:
-	t_rd_cycle, t_cez_r, t_oez
-
-4. read async non-muxed
-
-adv_rd_off:
-	t_avdp_r
-oe_on:
-	t_oeasu
-access:
-	t_iaa, t_oe, t_ce, t_aa
-rd_cycle:
-	t_rd_cycle, t_cez_r, t_oez
-
-5. read sync muxed
-
-adv_rd_off:
-	t_avdp_r, t_avdh
-oe_on:
-	t_oeasu, t_ach, cyc_aavdh_oe
-access:
-	t_iaa, cyc_iaa, cyc_oe
-rd_cycle:
-	t_cez_r, t_oez, t_ce_rdyz
-
-6. read sync non-muxed
-
-adv_rd_off:
-	t_avdp_r
-oe_on:
-	t_oeasu
-access:
-	t_iaa, cyc_iaa, cyc_oe
-rd_cycle:
-	t_cez_r, t_oez, t_ce_rdyz
-
-7. write async muxed
-
-adv_wr_off:
-	t_avdp_w
-we_on, wr_data_mux_bus:
-	t_weasu, t_aavdh, cyc_aavhd_we
-we_off:
-	t_wpl
-cs_wr_off:
-	t_wph
-wr_cycle:
-	t_cez_w, t_wr_cycle
-
-8. write async non-muxed
-
-adv_wr_off:
-	t_avdp_w
-we_on, wr_data_mux_bus:
-	t_weasu
-we_off:
-	t_wpl
-cs_wr_off:
-	t_wph
-wr_cycle:
-	t_cez_w, t_wr_cycle
-
-9. write sync muxed
-
-adv_wr_off:
-	t_avdp_w, t_avdh
-we_on, wr_data_mux_bus:
-	t_weasu, t_rdyo, t_aavdh, cyc_aavhd_we
-we_off:
-	t_wpl, cyc_wpl
-cs_wr_off:
-	t_wph
-wr_cycle:
-	t_cez_w, t_ce_rdyz
-
-10. write sync non-muxed
-
-adv_wr_off:
-	t_avdp_w
-we_on, wr_data_mux_bus:
-	t_weasu, t_rdyo
-we_off:
-	t_wpl, cyc_wpl
-cs_wr_off:
-	t_wph
-wr_cycle:
-	t_cez_w, t_ce_rdyz
-
-
-Note:
-  Many of gpmc timings are dependent on other gpmc timings (a few
-  gpmc timings purely dependent on other gpmc timings, a reason that
-  some of the gpmc timings are missing above), and it will result in
-  indirect dependency of peripheral timings to gpmc timings other than
-  mentioned above, refer timing routine for more details. To know what
-  these peripheral timings correspond to, please see explanations in
-  struct gpmc_device_timings definition. And for gpmc timings refer
-  IP details (link above).
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index eff22db0ed14..d12a80f386a6 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -79,6 +79,7 @@ available subsections can be seen below.
    isapnp
    generic-counter
    lightnvm-pblk
+   memory-devices/index
    men-chameleon-bus
    ntb
    nvmem
diff --git a/Documentation/driver-api/memory-devices/index.rst b/Documentation/driver-api/memory-devices/index.rst
new file mode 100644
index 000000000000..87549828f6ab
--- /dev/null
+++ b/Documentation/driver-api/memory-devices/index.rst
@@ -0,0 +1,16 @@
+=========================
+Memory Controller drivers
+=========================
+
+.. toctree::
+    :maxdepth: 1
+
+    ti-emif
+    ti-gpmc
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/driver-api/memory-devices/ti-emif.rst b/Documentation/driver-api/memory-devices/ti-emif.rst
new file mode 100644
index 000000000000..dea2ad9bcd7e
--- /dev/null
+++ b/Documentation/driver-api/memory-devices/ti-emif.rst
@@ -0,0 +1,64 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+TI EMIF SDRAM Controller Driver
+===============================
+
+Author
+======
+Aneesh V <aneesh@ti.com>
+
+Location
+========
+driver/memory/emif.c
+
+Supported SoCs:
+===============
+TI OMAP44xx
+TI OMAP54xx
+
+Menuconfig option:
+==================
+Device Drivers
+	Memory devices
+		Texas Instruments EMIF driver
+
+Description
+===========
+This driver is for the EMIF module available in Texas Instruments
+SoCs. EMIF is an SDRAM controller that, based on its revision,
+supports one or more of DDR2, DDR3, and LPDDR2 SDRAM protocols.
+This driver takes care of only LPDDR2 memories presently. The
+functions of the driver includes re-configuring AC timing
+parameters and other settings during frequency, voltage and
+temperature changes
+
+Platform Data (see include/linux/platform_data/emif_plat.h)
+===========================================================
+DDR device details and other board dependent and SoC dependent
+information can be passed through platform data (struct emif_platform_data)
+
+- DDR device details: 'struct ddr_device_info'
+- Device AC timings: 'struct lpddr2_timings' and 'struct lpddr2_min_tck'
+- Custom configurations: customizable policy options through
+  'struct emif_custom_configs'
+- IP revision
+- PHY type
+
+Interface to the external world
+===============================
+EMIF driver registers notifiers for voltage and frequency changes
+affecting EMIF and takes appropriate actions when these are invoked.
+
+- freq_pre_notify_handling()
+- freq_post_notify_handling()
+- volt_notify_handling()
+
+Debugfs
+=======
+The driver creates two debugfs entries per device.
+
+- regcache_dump : dump of register values calculated and saved for all
+  frequencies used so far.
+- mr4 : last polled value of MR4 register in the LPDDR2 device. MR4
+  indicates the current temperature level of the device.
diff --git a/Documentation/driver-api/memory-devices/ti-gpmc.rst b/Documentation/driver-api/memory-devices/ti-gpmc.rst
new file mode 100644
index 000000000000..33efcb81f080
--- /dev/null
+++ b/Documentation/driver-api/memory-devices/ti-gpmc.rst
@@ -0,0 +1,179 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================================
+GPMC (General Purpose Memory Controller)
+========================================
+
+GPMC is an unified memory controller dedicated to interfacing external
+memory devices like
+
+ * Asynchronous SRAM like memories and application specific integrated
+   circuit devices.
+ * Asynchronous, synchronous, and page mode burst NOR flash devices
+   NAND flash
+ * Pseudo-SRAM devices
+
+GPMC is found on Texas Instruments SoC's (OMAP based)
+IP details: http://www.ti.com/lit/pdf/spruh73 section 7.1
+
+
+GPMC generic timing calculation:
+================================
+
+GPMC has certain timings that has to be programmed for proper
+functioning of the peripheral, while peripheral has another set of
+timings. To have peripheral work with gpmc, peripheral timings has to
+be translated to the form gpmc can understand. The way it has to be
+translated depends on the connected peripheral. Also there is a
+dependency for certain gpmc timings on gpmc clock frequency. Hence a
+generic timing routine was developed to achieve above requirements.
+
+Generic routine provides a generic method to calculate gpmc timings
+from gpmc peripheral timings. struct gpmc_device_timings fields has to
+be updated with timings from the datasheet of the peripheral that is
+connected to gpmc. A few of the peripheral timings can be fed either
+in time or in cycles, provision to handle this scenario has been
+provided (refer struct gpmc_device_timings definition). It may so
+happen that timing as specified by peripheral datasheet is not present
+in timing structure, in this scenario, try to correlate peripheral
+timing to the one available. If that doesn't work, try to add a new
+field as required by peripheral, educate generic timing routine to
+handle it, make sure that it does not break any of the existing.
+Then there may be cases where peripheral datasheet doesn't mention
+certain fields of struct gpmc_device_timings, zero those entries.
+
+Generic timing routine has been verified to work properly on
+multiple onenand's and tusb6010 peripherals.
+
+A word of caution: generic timing routine has been developed based
+on understanding of gpmc timings, peripheral timings, available
+custom timing routines, a kind of reverse engineering without
+most of the datasheets & hardware (to be exact none of those supported
+in mainline having custom timing routine) and by simulation.
+
+gpmc timing dependency on peripheral timings:
+
+[<gpmc_timing>: <peripheral timing1>, <peripheral timing2> ...]
+
+1. common
+
+cs_on:
+	t_ceasu
+adv_on:
+	t_avdasu, t_ceavd
+
+2. sync common
+
+sync_clk:
+	clk
+page_burst_access:
+	t_bacc
+clk_activation:
+	t_ces, t_avds
+
+3. read async muxed
+
+adv_rd_off:
+	t_avdp_r
+oe_on:
+	t_oeasu, t_aavdh
+access:
+	t_iaa, t_oe, t_ce, t_aa
+rd_cycle:
+	t_rd_cycle, t_cez_r, t_oez
+
+4. read async non-muxed
+
+adv_rd_off:
+	t_avdp_r
+oe_on:
+	t_oeasu
+access:
+	t_iaa, t_oe, t_ce, t_aa
+rd_cycle:
+	t_rd_cycle, t_cez_r, t_oez
+
+5. read sync muxed
+
+adv_rd_off:
+	t_avdp_r, t_avdh
+oe_on:
+	t_oeasu, t_ach, cyc_aavdh_oe
+access:
+	t_iaa, cyc_iaa, cyc_oe
+rd_cycle:
+	t_cez_r, t_oez, t_ce_rdyz
+
+6. read sync non-muxed
+
+adv_rd_off:
+	t_avdp_r
+oe_on:
+	t_oeasu
+access:
+	t_iaa, cyc_iaa, cyc_oe
+rd_cycle:
+	t_cez_r, t_oez, t_ce_rdyz
+
+7. write async muxed
+
+adv_wr_off:
+	t_avdp_w
+we_on, wr_data_mux_bus:
+	t_weasu, t_aavdh, cyc_aavhd_we
+we_off:
+	t_wpl
+cs_wr_off:
+	t_wph
+wr_cycle:
+	t_cez_w, t_wr_cycle
+
+8. write async non-muxed
+
+adv_wr_off:
+	t_avdp_w
+we_on, wr_data_mux_bus:
+	t_weasu
+we_off:
+	t_wpl
+cs_wr_off:
+	t_wph
+wr_cycle:
+	t_cez_w, t_wr_cycle
+
+9. write sync muxed
+
+adv_wr_off:
+	t_avdp_w, t_avdh
+we_on, wr_data_mux_bus:
+	t_weasu, t_rdyo, t_aavdh, cyc_aavhd_we
+we_off:
+	t_wpl, cyc_wpl
+cs_wr_off:
+	t_wph
+wr_cycle:
+	t_cez_w, t_ce_rdyz
+
+10. write sync non-muxed
+
+adv_wr_off:
+	t_avdp_w
+we_on, wr_data_mux_bus:
+	t_weasu, t_rdyo
+we_off:
+	t_wpl, cyc_wpl
+cs_wr_off:
+	t_wph
+wr_cycle:
+	t_cez_w, t_ce_rdyz
+
+
+Note:
+  Many of gpmc timings are dependent on other gpmc timings (a few
+  gpmc timings purely dependent on other gpmc timings, a reason that
+  some of the gpmc timings are missing above), and it will result in
+  indirect dependency of peripheral timings to gpmc timings other than
+  mentioned above, refer timing routine for more details. To know what
+  these peripheral timings correspond to, please see explanations in
+  struct gpmc_device_timings definition. And for gpmc timings refer
+  IP details (link above).
diff --git a/Documentation/memory-devices/ti-emif.rst b/Documentation/memory-devices/ti-emif.rst
deleted file mode 100644
index c9242294e63c..000000000000
--- a/Documentation/memory-devices/ti-emif.rst
+++ /dev/null
@@ -1,64 +0,0 @@
-:orphan:
-
-===============================
-TI EMIF SDRAM Controller Driver
-===============================
-
-Author
-======
-Aneesh V <aneesh@ti.com>
-
-Location
-========
-driver/memory/emif.c
-
-Supported SoCs:
-===============
-TI OMAP44xx
-TI OMAP54xx
-
-Menuconfig option:
-==================
-Device Drivers
-	Memory devices
-		Texas Instruments EMIF driver
-
-Description
-===========
-This driver is for the EMIF module available in Texas Instruments
-SoCs. EMIF is an SDRAM controller that, based on its revision,
-supports one or more of DDR2, DDR3, and LPDDR2 SDRAM protocols.
-This driver takes care of only LPDDR2 memories presently. The
-functions of the driver includes re-configuring AC timing
-parameters and other settings during frequency, voltage and
-temperature changes
-
-Platform Data (see include/linux/platform_data/emif_plat.h)
-===========================================================
-DDR device details and other board dependent and SoC dependent
-information can be passed through platform data (struct emif_platform_data)
-
-- DDR device details: 'struct ddr_device_info'
-- Device AC timings: 'struct lpddr2_timings' and 'struct lpddr2_min_tck'
-- Custom configurations: customizable policy options through
-  'struct emif_custom_configs'
-- IP revision
-- PHY type
-
-Interface to the external world
-===============================
-EMIF driver registers notifiers for voltage and frequency changes
-affecting EMIF and takes appropriate actions when these are invoked.
-
-- freq_pre_notify_handling()
-- freq_post_notify_handling()
-- volt_notify_handling()
-
-Debugfs
-=======
-The driver creates two debugfs entries per device.
-
-- regcache_dump : dump of register values calculated and saved for all
-  frequencies used so far.
-- mr4 : last polled value of MR4 register in the LPDDR2 device. MR4
-  indicates the current temperature level of the device.
-- 
cgit 


From 7e042736faab9457dd754668b9db2a1113cd322b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 28 Jun 2019 07:13:34 -0300
Subject: docs: add SPDX tags to new index files

All those new files I added are under GPL v2.0 license.

Add the corresponding SPDX headers to them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/admin-guide/blockdev/drbd/figures.rst | 2 ++
 Documentation/admin-guide/blockdev/index.rst        | 2 ++
 Documentation/admin-guide/laptops/index.rst         | 1 +
 Documentation/admin-guide/namespaces/index.rst      | 2 ++
 Documentation/admin-guide/perf/index.rst            | 2 ++
 Documentation/arm/index.rst                         | 2 ++
 Documentation/arm/nwfpe/index.rst                   | 2 ++
 Documentation/arm/omap/index.rst                    | 2 ++
 Documentation/arm/sa1100/index.rst                  | 2 ++
 Documentation/arm/samsung-s3c24xx/index.rst         | 2 ++
 Documentation/arm/samsung/index.rst                 | 2 ++
 Documentation/driver-api/early-userspace/index.rst  | 2 ++
 Documentation/driver-api/md/index.rst               | 2 ++
 Documentation/driver-api/memory-devices/index.rst   | 2 ++
 Documentation/driver-api/mmc/index.rst              | 2 ++
 Documentation/driver-api/mtd/index.rst              | 2 ++
 Documentation/driver-api/nfc/index.rst              | 2 ++
 Documentation/driver-api/nvdimm/index.rst           | 2 ++
 Documentation/driver-api/phy/index.rst              | 2 ++
 Documentation/driver-api/rapidio/index.rst          | 2 ++
 Documentation/ia64/index.rst                        | 2 ++
 21 files changed, 41 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/blockdev/drbd/figures.rst b/Documentation/admin-guide/blockdev/drbd/figures.rst
index 3e3fd4b8a478..bd9a4901fe46 100644
--- a/Documentation/admin-guide/blockdev/drbd/figures.rst
+++ b/Documentation/admin-guide/blockdev/drbd/figures.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 .. The here included files are intended to help understand the implementation
 
 Data flows that Relate some functions, and write packets
diff --git a/Documentation/admin-guide/blockdev/index.rst b/Documentation/admin-guide/blockdev/index.rst
index 20a738d9d047..b903cf152091 100644
--- a/Documentation/admin-guide/blockdev/index.rst
+++ b/Documentation/admin-guide/blockdev/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ===========================
 The Linux RapidIO Subsystem
 ===========================
diff --git a/Documentation/admin-guide/laptops/index.rst b/Documentation/admin-guide/laptops/index.rst
index 6b554e39863b..cd9a1c2695fd 100644
--- a/Documentation/admin-guide/laptops/index.rst
+++ b/Documentation/admin-guide/laptops/index.rst
@@ -1,3 +1,4 @@
+.. SPDX-License-Identifier: GPL-2.0
 
 ==============
 Laptop Drivers
diff --git a/Documentation/admin-guide/namespaces/index.rst b/Documentation/admin-guide/namespaces/index.rst
index 713ec4949fa7..384f2e0f33d2 100644
--- a/Documentation/admin-guide/namespaces/index.rst
+++ b/Documentation/admin-guide/namespaces/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ==========
 Namespaces
 ==========
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index 9d445451ea18..ee4bfd2a740f 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ===========================
 Performance monitor support
 ===========================
diff --git a/Documentation/arm/index.rst b/Documentation/arm/index.rst
index 9c2f781f4685..5fc072dd0c5e 100644
--- a/Documentation/arm/index.rst
+++ b/Documentation/arm/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ================
 ARM Architecture
 ================
diff --git a/Documentation/arm/nwfpe/index.rst b/Documentation/arm/nwfpe/index.rst
index 21fa8ce192ae..3c4d2f9aa10e 100644
--- a/Documentation/arm/nwfpe/index.rst
+++ b/Documentation/arm/nwfpe/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ===================================
 NetWinder's floating point emulator
 ===================================
diff --git a/Documentation/arm/omap/index.rst b/Documentation/arm/omap/index.rst
index f1e9c11d9f9b..8b365b212e49 100644
--- a/Documentation/arm/omap/index.rst
+++ b/Documentation/arm/omap/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 =======
 TI OMAP
 =======
diff --git a/Documentation/arm/sa1100/index.rst b/Documentation/arm/sa1100/index.rst
index fb2385b3accf..68c2a280a745 100644
--- a/Documentation/arm/sa1100/index.rst
+++ b/Documentation/arm/sa1100/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ====================
 Intel StrongARM 1100
 ====================
diff --git a/Documentation/arm/samsung-s3c24xx/index.rst b/Documentation/arm/samsung-s3c24xx/index.rst
index 6c7b241cbf37..5b8a7f9398d8 100644
--- a/Documentation/arm/samsung-s3c24xx/index.rst
+++ b/Documentation/arm/samsung-s3c24xx/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ﻿==========================
 Samsung S3C24XX SoC Family
 ==========================
diff --git a/Documentation/arm/samsung/index.rst b/Documentation/arm/samsung/index.rst
index f54d95734362..8142cce3d23e 100644
--- a/Documentation/arm/samsung/index.rst
+++ b/Documentation/arm/samsung/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ===========
 Samsung SoC
 ===========
diff --git a/Documentation/driver-api/early-userspace/index.rst b/Documentation/driver-api/early-userspace/index.rst
index 6f20c3c560d8..149c1822f06d 100644
--- a/Documentation/driver-api/early-userspace/index.rst
+++ b/Documentation/driver-api/early-userspace/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ===============
 Early Userspace
 ===============
diff --git a/Documentation/driver-api/md/index.rst b/Documentation/driver-api/md/index.rst
index 205080891a1a..18f54a7d7d6e 100644
--- a/Documentation/driver-api/md/index.rst
+++ b/Documentation/driver-api/md/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ====
 RAID
 ====
diff --git a/Documentation/driver-api/memory-devices/index.rst b/Documentation/driver-api/memory-devices/index.rst
index 87549828f6ab..28101458cda5 100644
--- a/Documentation/driver-api/memory-devices/index.rst
+++ b/Documentation/driver-api/memory-devices/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 =========================
 Memory Controller drivers
 =========================
diff --git a/Documentation/driver-api/mmc/index.rst b/Documentation/driver-api/mmc/index.rst
index 9aaf64951a8c..7339736ac774 100644
--- a/Documentation/driver-api/mmc/index.rst
+++ b/Documentation/driver-api/mmc/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ========================
 MMC/SD/SDIO card support
 ========================
diff --git a/Documentation/driver-api/mtd/index.rst b/Documentation/driver-api/mtd/index.rst
index 2e0e7cc4055e..436ba5a851d7 100644
--- a/Documentation/driver-api/mtd/index.rst
+++ b/Documentation/driver-api/mtd/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ==============================
 Memory Technology Device (MTD)
 ==============================
diff --git a/Documentation/driver-api/nfc/index.rst b/Documentation/driver-api/nfc/index.rst
index 3afb2c0c2e3c..b6e9eedbff29 100644
--- a/Documentation/driver-api/nfc/index.rst
+++ b/Documentation/driver-api/nfc/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ========================
 Near Field Communication
 ========================
diff --git a/Documentation/driver-api/nvdimm/index.rst b/Documentation/driver-api/nvdimm/index.rst
index 19dc8ee371dc..a4f8f98aeb94 100644
--- a/Documentation/driver-api/nvdimm/index.rst
+++ b/Documentation/driver-api/nvdimm/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ===================================
 Non-Volatile Memory Device (NVDIMM)
 ===================================
diff --git a/Documentation/driver-api/phy/index.rst b/Documentation/driver-api/phy/index.rst
index fce9ffae2812..69ba1216de72 100644
--- a/Documentation/driver-api/phy/index.rst
+++ b/Documentation/driver-api/phy/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 =====================
 Generic PHY Framework
 =====================
diff --git a/Documentation/driver-api/rapidio/index.rst b/Documentation/driver-api/rapidio/index.rst
index 4c5e51a05134..a41b4242d16f 100644
--- a/Documentation/driver-api/rapidio/index.rst
+++ b/Documentation/driver-api/rapidio/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ===========================
 The Linux RapidIO Subsystem
 ===========================
diff --git a/Documentation/ia64/index.rst b/Documentation/ia64/index.rst
index ef99475f672b..0436e1034115 100644
--- a/Documentation/ia64/index.rst
+++ b/Documentation/ia64/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ==================
 IA-64 Architecture
 ==================
-- 
cgit 


From 113094f743fc97559c068ad20fd2808b64f6989d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 28 Jun 2019 08:36:50 -0300
Subject: docs: add some directories to the main documentation index

The contents of those directories were orphaned at the documentation
body.

While those directories could likely be moved to be inside some guide,
I'm opting to just adding their indexes to the main one, removing the
:orphan: and adding the SPDX header.

For the drivers, the rationale is that the documentation contains
a mix of Kernelspace, uAPI and admin-guide. So, better to keep them on
separate directories, as we've be doing with similar subsystem-specific
docs that were not split yet.

For the others, well... I'm too lazy to do the move. Also, it
seems to make sense to keep at least some of those at the main
dir (like kbuild, for example). In any case, a latter patch
could do the move.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 Documentation/cdrom/index.rst           |  2 +-
 Documentation/fault-injection/index.rst |  2 +-
 Documentation/fb/index.rst              |  2 +-
 Documentation/fpga/index.rst            |  2 +-
 Documentation/ide/index.rst             |  2 +-
 Documentation/index.rst                 | 13 +++++++++++++
 Documentation/kbuild/index.rst          |  2 +-
 Documentation/livepatch/index.rst       |  2 +-
 Documentation/netlabel/index.rst        |  2 +-
 Documentation/pcmcia/index.rst          |  2 +-
 Documentation/target/index.rst          |  2 +-
 Documentation/timers/index.rst          |  2 +-
 Documentation/watchdog/index.rst        |  2 +-
 13 files changed, 25 insertions(+), 12 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/cdrom/index.rst b/Documentation/cdrom/index.rst
index efbd5d111825..338ad5f94e7c 100644
--- a/Documentation/cdrom/index.rst
+++ b/Documentation/cdrom/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 =====
 cdrom
diff --git a/Documentation/fault-injection/index.rst b/Documentation/fault-injection/index.rst
index 92b5639ed07a..8408a8a91b34 100644
--- a/Documentation/fault-injection/index.rst
+++ b/Documentation/fault-injection/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ===============
 fault-injection
diff --git a/Documentation/fb/index.rst b/Documentation/fb/index.rst
index d47313714635..baf02393d8ee 100644
--- a/Documentation/fb/index.rst
+++ b/Documentation/fb/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ============
 Frame Buffer
diff --git a/Documentation/fpga/index.rst b/Documentation/fpga/index.rst
index 2c87d1ea084f..f80f95667ca2 100644
--- a/Documentation/fpga/index.rst
+++ b/Documentation/fpga/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ====
 fpga
diff --git a/Documentation/ide/index.rst b/Documentation/ide/index.rst
index 45bc12d3957f..813dfe611a31 100644
--- a/Documentation/ide/index.rst
+++ b/Documentation/ide/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ==================================
 Integrated Drive Electronics (IDE)
diff --git a/Documentation/index.rst b/Documentation/index.rst
index dbfec00ba535..0cd4c3901456 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -35,6 +35,7 @@ trying to get it to work optimally on a given system.
    :maxdepth: 2
 
    admin-guide/index
+   kbuild/index
 
 Firmware-related documentation
 ------------------------------
@@ -77,6 +78,9 @@ merged much easier.
    kernel-hacking/index
    trace/index
    maintainer/index
+   fault-injection/index
+   livepatch/index
+
 
 Kernel API documentation
 ------------------------
@@ -94,11 +98,20 @@ needed).
    core-api/index
    accounting/index
    block/index
+   cdrom/index
+   ide/index
+   fb/index
+   fpga/index
    hid/index
    iio/index
    leds/index
    media/index
+   netlabel/index
    networking/index
+   pcmcia/index
+   target/index
+   timers/index
+   watchdog/index
    input/index
    hwmon/index
    gpu/index
diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst
index 42d4cbe4460c..e323a3f2cc81 100644
--- a/Documentation/kbuild/index.rst
+++ b/Documentation/kbuild/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ===================
 Kernel Build System
diff --git a/Documentation/livepatch/index.rst b/Documentation/livepatch/index.rst
index edd291d51847..17674a9e21b2 100644
--- a/Documentation/livepatch/index.rst
+++ b/Documentation/livepatch/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ===================
 Kernel Livepatching
diff --git a/Documentation/netlabel/index.rst b/Documentation/netlabel/index.rst
index 47f1e0e5acd1..984e1b191b12 100644
--- a/Documentation/netlabel/index.rst
+++ b/Documentation/netlabel/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ========
 NetLabel
diff --git a/Documentation/pcmcia/index.rst b/Documentation/pcmcia/index.rst
index 779c8527109e..7ae1f62fca14 100644
--- a/Documentation/pcmcia/index.rst
+++ b/Documentation/pcmcia/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ======
 pcmcia
diff --git a/Documentation/target/index.rst b/Documentation/target/index.rst
index b68f48982392..4b24f81f747e 100644
--- a/Documentation/target/index.rst
+++ b/Documentation/target/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ==================
 TCM Virtual Device
diff --git a/Documentation/timers/index.rst b/Documentation/timers/index.rst
index 91f6f8263c48..df510ad0c989 100644
--- a/Documentation/timers/index.rst
+++ b/Documentation/timers/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ======
 timers
diff --git a/Documentation/watchdog/index.rst b/Documentation/watchdog/index.rst
index 33a0de631e84..c177645081d8 100644
--- a/Documentation/watchdog/index.rst
+++ b/Documentation/watchdog/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ======================
 Linux Watchdog Support
-- 
cgit 


From 4c68060bf6d3eac6e86b995a200eb21b847236da Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 28 Jun 2019 07:29:15 -0300
Subject: docs: locking: add it to the main index

The locking directory is part of the Kernel API bookset. Add
it to the index file.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/index.rst         | 1 +
 Documentation/locking/index.rst | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/index.rst b/Documentation/index.rst
index 0cd4c3901456..eb5db850c4ef 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -96,6 +96,7 @@ needed).
 
    driver-api/index
    core-api/index
+   locking/index
    accounting/index
    block/index
    cdrom/index
diff --git a/Documentation/locking/index.rst b/Documentation/locking/index.rst
index ef5da7fe9aac..626a463f7e42 100644
--- a/Documentation/locking/index.rst
+++ b/Documentation/locking/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 =======
 locking
-- 
cgit 


From c2746a1eb741759590e8766958232d06a71840d5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 28 Jun 2019 08:14:42 -0300
Subject: docs: gpio: add sysfs interface to the admin-guide

While this is stated as obsoleted, the sysfs interface described
there is still valid, and belongs to the admin-guide.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/ABI/obsolete/sysfs-gpio             |   2 +-
 Documentation/admin-guide/gpio/index.rst          |  17 +++
 Documentation/admin-guide/gpio/sysfs.rst          | 167 ++++++++++++++++++++++
 Documentation/admin-guide/index.rst               |   1 +
 Documentation/firmware-guide/acpi/enumeration.rst |   2 +-
 Documentation/gpio/index.rst                      |  17 ---
 Documentation/gpio/sysfs.rst                      | 167 ----------------------
 Documentation/translations/zh_CN/gpio.txt         |   4 +-
 8 files changed, 189 insertions(+), 188 deletions(-)
 create mode 100644 Documentation/admin-guide/gpio/index.rst
 create mode 100644 Documentation/admin-guide/gpio/sysfs.rst
 delete mode 100644 Documentation/gpio/index.rst
 delete mode 100644 Documentation/gpio/sysfs.rst

(limited to 'Documentation')

diff --git a/Documentation/ABI/obsolete/sysfs-gpio b/Documentation/ABI/obsolete/sysfs-gpio
index 40d41ea1a3f5..e0d4e5e2dd90 100644
--- a/Documentation/ABI/obsolete/sysfs-gpio
+++ b/Documentation/ABI/obsolete/sysfs-gpio
@@ -11,7 +11,7 @@ Description:
   Kernel code may export it for complete or partial access.
 
   GPIOs are identified as they are inside the kernel, using integers in
-  the range 0..INT_MAX.  See Documentation/gpio for more information.
+  the range 0..INT_MAX.  See Documentation/admin-guide/gpio for more information.
 
     /sys/class/gpio
 	/export ... asks the kernel to export a GPIO to userspace
diff --git a/Documentation/admin-guide/gpio/index.rst b/Documentation/admin-guide/gpio/index.rst
new file mode 100644
index 000000000000..a244ba4e87d5
--- /dev/null
+++ b/Documentation/admin-guide/gpio/index.rst
@@ -0,0 +1,17 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====
+gpio
+====
+
+.. toctree::
+    :maxdepth: 1
+
+    sysfs
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/admin-guide/gpio/sysfs.rst b/Documentation/admin-guide/gpio/sysfs.rst
new file mode 100644
index 000000000000..ec09ffd983e7
--- /dev/null
+++ b/Documentation/admin-guide/gpio/sysfs.rst
@@ -0,0 +1,167 @@
+GPIO Sysfs Interface for Userspace
+==================================
+
+.. warning::
+
+  THIS ABI IS DEPRECATED, THE ABI DOCUMENTATION HAS BEEN MOVED TO
+  Documentation/ABI/obsolete/sysfs-gpio AND NEW USERSPACE CONSUMERS
+  ARE SUPPOSED TO USE THE CHARACTER DEVICE ABI. THIS OLD SYSFS ABI WILL
+  NOT BE DEVELOPED (NO NEW FEATURES), IT WILL JUST BE MAINTAINED.
+
+Refer to the examples in tools/gpio/* for an introduction to the new
+character device ABI. Also see the userspace header in
+include/uapi/linux/gpio.h
+
+The deprecated sysfs ABI
+------------------------
+Platforms which use the "gpiolib" implementors framework may choose to
+configure a sysfs user interface to GPIOs. This is different from the
+debugfs interface, since it provides control over GPIO direction and
+value instead of just showing a gpio state summary. Plus, it could be
+present on production systems without debugging support.
+
+Given appropriate hardware documentation for the system, userspace could
+know for example that GPIO #23 controls the write protect line used to
+protect boot loader segments in flash memory. System upgrade procedures
+may need to temporarily remove that protection, first importing a GPIO,
+then changing its output state, then updating the code before re-enabling
+the write protection. In normal use, GPIO #23 would never be touched,
+and the kernel would have no need to know about it.
+
+Again depending on appropriate hardware documentation, on some systems
+userspace GPIO can be used to determine system configuration data that
+standard kernels won't know about. And for some tasks, simple userspace
+GPIO drivers could be all that the system really needs.
+
+DO NOT ABUSE SYSFS TO CONTROL HARDWARE THAT HAS PROPER KERNEL DRIVERS.
+PLEASE READ THE DOCUMENT AT Documentation/driver-api/gpio/drivers-on-gpio.rst
+TO AVOID REINVENTING KERNEL WHEELS IN USERSPACE. I MEAN IT. REALLY.
+
+Paths in Sysfs
+--------------
+There are three kinds of entries in /sys/class/gpio:
+
+   -	Control interfaces used to get userspace control over GPIOs;
+
+   -	GPIOs themselves; and
+
+   -	GPIO controllers ("gpio_chip" instances).
+
+That's in addition to standard files including the "device" symlink.
+
+The control interfaces are write-only:
+
+    /sys/class/gpio/
+
+	"export" ...
+		Userspace may ask the kernel to export control of
+		a GPIO to userspace by writing its number to this file.
+
+		Example:  "echo 19 > export" will create a "gpio19" node
+		for GPIO #19, if that's not requested by kernel code.
+
+	"unexport" ...
+		Reverses the effect of exporting to userspace.
+
+		Example:  "echo 19 > unexport" will remove a "gpio19"
+		node exported using the "export" file.
+
+GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42)
+and have the following read/write attributes:
+
+    /sys/class/gpio/gpioN/
+
+	"direction" ...
+		reads as either "in" or "out". This value may
+		normally be written. Writing as "out" defaults to
+		initializing the value as low. To ensure glitch free
+		operation, values "low" and "high" may be written to
+		configure the GPIO as an output with that initial value.
+
+		Note that this attribute *will not exist* if the kernel
+		doesn't support changing the direction of a GPIO, or
+		it was exported by kernel code that didn't explicitly
+		allow userspace to reconfigure this GPIO's direction.
+
+	"value" ...
+		reads as either 0 (low) or 1 (high). If the GPIO
+		is configured as an output, this value may be written;
+		any nonzero value is treated as high.
+
+		If the pin can be configured as interrupt-generating interrupt
+		and if it has been configured to generate interrupts (see the
+		description of "edge"), you can poll(2) on that file and
+		poll(2) will return whenever the interrupt was triggered. If
+		you use poll(2), set the events POLLPRI and POLLERR. If you
+		use select(2), set the file descriptor in exceptfds. After
+		poll(2) returns, either lseek(2) to the beginning of the sysfs
+		file and read the new value or close the file and re-open it
+		to read the value.
+
+	"edge" ...
+		reads as either "none", "rising", "falling", or
+		"both". Write these strings to select the signal edge(s)
+		that will make poll(2) on the "value" file return.
+
+		This file exists only if the pin can be configured as an
+		interrupt generating input pin.
+
+	"active_low" ...
+		reads as either 0 (false) or 1 (true). Write
+		any nonzero value to invert the value attribute both
+		for reading and writing. Existing and subsequent
+		poll(2) support configuration via the edge attribute
+		for "rising" and "falling" edges will follow this
+		setting.
+
+GPIO controllers have paths like /sys/class/gpio/gpiochip42/ (for the
+controller implementing GPIOs starting at #42) and have the following
+read-only attributes:
+
+    /sys/class/gpio/gpiochipN/
+
+	"base" ...
+		same as N, the first GPIO managed by this chip
+
+	"label" ...
+		provided for diagnostics (not always unique)
+
+	"ngpio" ...
+		how many GPIOs this manages (N to N + ngpio - 1)
+
+Board documentation should in most cases cover what GPIOs are used for
+what purposes. However, those numbers are not always stable; GPIOs on
+a daughtercard might be different depending on the base board being used,
+or other cards in the stack. In such cases, you may need to use the
+gpiochip nodes (possibly in conjunction with schematics) to determine
+the correct GPIO number to use for a given signal.
+
+
+Exporting from Kernel code
+--------------------------
+Kernel code can explicitly manage exports of GPIOs which have already been
+requested using gpio_request()::
+
+	/* export the GPIO to userspace */
+	int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
+
+	/* reverse gpio_export() */
+	void gpiod_unexport(struct gpio_desc *desc);
+
+	/* create a sysfs link to an exported GPIO node */
+	int gpiod_export_link(struct device *dev, const char *name,
+		      struct gpio_desc *desc);
+
+After a kernel driver requests a GPIO, it may only be made available in
+the sysfs interface by gpiod_export(). The driver can control whether the
+signal direction may change. This helps drivers prevent userspace code
+from accidentally clobbering important system state.
+
+This explicit exporting can help with debugging (by making some kinds
+of experiments easier), or can provide an always-there interface that's
+suitable for documenting as part of a board support package.
+
+After the GPIO has been exported, gpiod_export_link() allows creating
+symlinks from elsewhere in sysfs to the GPIO sysfs node. Drivers can
+use this to provide the interface under their own device in sysfs with
+a descriptive name.
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 4e98f5596da0..280355d08af5 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -91,6 +91,7 @@ configure specific aspects of kernel behavior to your liking.
    cputopology
    device-mapper/index
    efi-stub
+   gpio/index
    highuid
    hw_random
    iostats
diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index 1252617b520f..0a72b6321f5f 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -316,7 +316,7 @@ specifies the path to the controller. In order to use these GPIOs in Linux
 we need to translate them to the corresponding Linux GPIO descriptors.
 
 There is a standard GPIO API for that and is documented in
-Documentation/gpio/.
+Documentation/admin-guide/gpio/.
 
 In the above example we can get the corresponding two GPIO descriptors with
 a code like this::
diff --git a/Documentation/gpio/index.rst b/Documentation/gpio/index.rst
deleted file mode 100644
index 09a4a553f434..000000000000
--- a/Documentation/gpio/index.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-:orphan:
-
-====
-gpio
-====
-
-.. toctree::
-    :maxdepth: 1
-
-    sysfs
-
-.. only::  subproject and html
-
-   Indices
-   =======
-
-   * :ref:`genindex`
diff --git a/Documentation/gpio/sysfs.rst b/Documentation/gpio/sysfs.rst
deleted file mode 100644
index ec09ffd983e7..000000000000
--- a/Documentation/gpio/sysfs.rst
+++ /dev/null
@@ -1,167 +0,0 @@
-GPIO Sysfs Interface for Userspace
-==================================
-
-.. warning::
-
-  THIS ABI IS DEPRECATED, THE ABI DOCUMENTATION HAS BEEN MOVED TO
-  Documentation/ABI/obsolete/sysfs-gpio AND NEW USERSPACE CONSUMERS
-  ARE SUPPOSED TO USE THE CHARACTER DEVICE ABI. THIS OLD SYSFS ABI WILL
-  NOT BE DEVELOPED (NO NEW FEATURES), IT WILL JUST BE MAINTAINED.
-
-Refer to the examples in tools/gpio/* for an introduction to the new
-character device ABI. Also see the userspace header in
-include/uapi/linux/gpio.h
-
-The deprecated sysfs ABI
-------------------------
-Platforms which use the "gpiolib" implementors framework may choose to
-configure a sysfs user interface to GPIOs. This is different from the
-debugfs interface, since it provides control over GPIO direction and
-value instead of just showing a gpio state summary. Plus, it could be
-present on production systems without debugging support.
-
-Given appropriate hardware documentation for the system, userspace could
-know for example that GPIO #23 controls the write protect line used to
-protect boot loader segments in flash memory. System upgrade procedures
-may need to temporarily remove that protection, first importing a GPIO,
-then changing its output state, then updating the code before re-enabling
-the write protection. In normal use, GPIO #23 would never be touched,
-and the kernel would have no need to know about it.
-
-Again depending on appropriate hardware documentation, on some systems
-userspace GPIO can be used to determine system configuration data that
-standard kernels won't know about. And for some tasks, simple userspace
-GPIO drivers could be all that the system really needs.
-
-DO NOT ABUSE SYSFS TO CONTROL HARDWARE THAT HAS PROPER KERNEL DRIVERS.
-PLEASE READ THE DOCUMENT AT Documentation/driver-api/gpio/drivers-on-gpio.rst
-TO AVOID REINVENTING KERNEL WHEELS IN USERSPACE. I MEAN IT. REALLY.
-
-Paths in Sysfs
---------------
-There are three kinds of entries in /sys/class/gpio:
-
-   -	Control interfaces used to get userspace control over GPIOs;
-
-   -	GPIOs themselves; and
-
-   -	GPIO controllers ("gpio_chip" instances).
-
-That's in addition to standard files including the "device" symlink.
-
-The control interfaces are write-only:
-
-    /sys/class/gpio/
-
-	"export" ...
-		Userspace may ask the kernel to export control of
-		a GPIO to userspace by writing its number to this file.
-
-		Example:  "echo 19 > export" will create a "gpio19" node
-		for GPIO #19, if that's not requested by kernel code.
-
-	"unexport" ...
-		Reverses the effect of exporting to userspace.
-
-		Example:  "echo 19 > unexport" will remove a "gpio19"
-		node exported using the "export" file.
-
-GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42)
-and have the following read/write attributes:
-
-    /sys/class/gpio/gpioN/
-
-	"direction" ...
-		reads as either "in" or "out". This value may
-		normally be written. Writing as "out" defaults to
-		initializing the value as low. To ensure glitch free
-		operation, values "low" and "high" may be written to
-		configure the GPIO as an output with that initial value.
-
-		Note that this attribute *will not exist* if the kernel
-		doesn't support changing the direction of a GPIO, or
-		it was exported by kernel code that didn't explicitly
-		allow userspace to reconfigure this GPIO's direction.
-
-	"value" ...
-		reads as either 0 (low) or 1 (high). If the GPIO
-		is configured as an output, this value may be written;
-		any nonzero value is treated as high.
-
-		If the pin can be configured as interrupt-generating interrupt
-		and if it has been configured to generate interrupts (see the
-		description of "edge"), you can poll(2) on that file and
-		poll(2) will return whenever the interrupt was triggered. If
-		you use poll(2), set the events POLLPRI and POLLERR. If you
-		use select(2), set the file descriptor in exceptfds. After
-		poll(2) returns, either lseek(2) to the beginning of the sysfs
-		file and read the new value or close the file and re-open it
-		to read the value.
-
-	"edge" ...
-		reads as either "none", "rising", "falling", or
-		"both". Write these strings to select the signal edge(s)
-		that will make poll(2) on the "value" file return.
-
-		This file exists only if the pin can be configured as an
-		interrupt generating input pin.
-
-	"active_low" ...
-		reads as either 0 (false) or 1 (true). Write
-		any nonzero value to invert the value attribute both
-		for reading and writing. Existing and subsequent
-		poll(2) support configuration via the edge attribute
-		for "rising" and "falling" edges will follow this
-		setting.
-
-GPIO controllers have paths like /sys/class/gpio/gpiochip42/ (for the
-controller implementing GPIOs starting at #42) and have the following
-read-only attributes:
-
-    /sys/class/gpio/gpiochipN/
-
-	"base" ...
-		same as N, the first GPIO managed by this chip
-
-	"label" ...
-		provided for diagnostics (not always unique)
-
-	"ngpio" ...
-		how many GPIOs this manages (N to N + ngpio - 1)
-
-Board documentation should in most cases cover what GPIOs are used for
-what purposes. However, those numbers are not always stable; GPIOs on
-a daughtercard might be different depending on the base board being used,
-or other cards in the stack. In such cases, you may need to use the
-gpiochip nodes (possibly in conjunction with schematics) to determine
-the correct GPIO number to use for a given signal.
-
-
-Exporting from Kernel code
---------------------------
-Kernel code can explicitly manage exports of GPIOs which have already been
-requested using gpio_request()::
-
-	/* export the GPIO to userspace */
-	int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
-
-	/* reverse gpio_export() */
-	void gpiod_unexport(struct gpio_desc *desc);
-
-	/* create a sysfs link to an exported GPIO node */
-	int gpiod_export_link(struct device *dev, const char *name,
-		      struct gpio_desc *desc);
-
-After a kernel driver requests a GPIO, it may only be made available in
-the sysfs interface by gpiod_export(). The driver can control whether the
-signal direction may change. This helps drivers prevent userspace code
-from accidentally clobbering important system state.
-
-This explicit exporting can help with debugging (by making some kinds
-of experiments easier), or can provide an always-there interface that's
-suitable for documenting as part of a board support package.
-
-After the GPIO has been exported, gpiod_export_link() allows creating
-symlinks from elsewhere in sysfs to the GPIO sysfs node. Drivers can
-use this to provide the interface under their own device in sysfs with
-a descriptive name.
diff --git a/Documentation/translations/zh_CN/gpio.txt b/Documentation/translations/zh_CN/gpio.txt
index 4cb1ba8b8fed..a23ee14fc927 100644
--- a/Documentation/translations/zh_CN/gpio.txt
+++ b/Documentation/translations/zh_CN/gpio.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/gpio
+Chinese translated version of Documentation/admin-guide/gpio
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -10,7 +10,7 @@ Maintainer: Grant Likely <grant.likely@secretlab.ca>
 		Linus Walleij <linus.walleij@linaro.org>
 Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
 ---------------------------------------------------------------------
-Documentation/gpio 的中文翻译
+Documentation/admin-guide/gpio 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
-- 
cgit 


From eddeed127b06ea2542dc18f2fe37d383b6369fec Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Sat, 6 Jul 2019 13:38:56 -0300
Subject: docs: don't use nested tables

Nested tables aren't supported for pdf output on Sphinx 1.7.9:

	admin-guide/laptops/sonypi:: nested tables are not yet implemented.
	admin-guide/laptops/toshiba_haps:: nested tables are not yet implemented.
	driver-api/nvdimm/btt:: nested tables are not yet implemented.
	s390/debugging390:: nested tables are not yet implemented.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # laptops
---
 Documentation/admin-guide/laptops/sonypi.rst       | 28 ++++++++++------------
 Documentation/admin-guide/laptops/toshiba_haps.rst |  8 +++----
 Documentation/driver-api/nvdimm/btt.rst            |  2 +-
 Documentation/s390/debugging390.rst                |  2 +-
 4 files changed, 19 insertions(+), 21 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/laptops/sonypi.rst b/Documentation/admin-guide/laptops/sonypi.rst
index 2a1975ed7ee4..c6eaaf48f7c1 100644
--- a/Documentation/admin-guide/laptops/sonypi.rst
+++ b/Documentation/admin-guide/laptops/sonypi.rst
@@ -53,7 +53,7 @@ module or sonypi.<param>=<value> on the kernel boot line when sonypi is
 statically linked into the kernel). Those options are:
 
 	=============== =======================================================
-	minor: 		minor number of the misc device /dev/sonypi,
+	minor:		minor number of the misc device /dev/sonypi,
 			default is -1 (automatic allocation, see /proc/misc
 			or kernel logs)
 
@@ -89,24 +89,22 @@ statically linked into the kernel). Those options are:
 			set to 0xffffffff, meaning that all possible events
 			will be tried. You can use the following bits to
 			construct your own event mask (from
-			drivers/char/sonypi.h):
-
-				========================	======
-				SONYPI_JOGGER_MASK 		0x0001
-				SONYPI_CAPTURE_MASK 		0x0002
-				SONYPI_FNKEY_MASK 		0x0004
-				SONYPI_BLUETOOTH_MASK 		0x0008
-				SONYPI_PKEY_MASK 		0x0010
-				SONYPI_BACK_MASK 		0x0020
-				SONYPI_HELP_MASK 		0x0040
-				SONYPI_LID_MASK 		0x0080
-				SONYPI_ZOOM_MASK 		0x0100
-				SONYPI_THUMBPHRASE_MASK 	0x0200
+			drivers/char/sonypi.h)::
+
+				SONYPI_JOGGER_MASK		0x0001
+				SONYPI_CAPTURE_MASK		0x0002
+				SONYPI_FNKEY_MASK		0x0004
+				SONYPI_BLUETOOTH_MASK		0x0008
+				SONYPI_PKEY_MASK		0x0010
+				SONYPI_BACK_MASK		0x0020
+				SONYPI_HELP_MASK		0x0040
+				SONYPI_LID_MASK			0x0080
+				SONYPI_ZOOM_MASK		0x0100
+				SONYPI_THUMBPHRASE_MASK		0x0200
 				SONYPI_MEYE_MASK		0x0400
 				SONYPI_MEMORYSTICK_MASK		0x0800
 				SONYPI_BATTERY_MASK		0x1000
 				SONYPI_WIRELESS_MASK		0x2000
-				========================	======
 
 	useinput:	if set (which is the default) two input devices are
 			created, one which interprets the jogdial events as
diff --git a/Documentation/admin-guide/laptops/toshiba_haps.rst b/Documentation/admin-guide/laptops/toshiba_haps.rst
index 11dfc428c080..d28b6c3f2849 100644
--- a/Documentation/admin-guide/laptops/toshiba_haps.rst
+++ b/Documentation/admin-guide/laptops/toshiba_haps.rst
@@ -75,11 +75,11 @@ The sysfs files under /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS620A:00/ are:
 protection_level   The protection_level is readable and writeable, and
 		   provides a way to let userspace query the current protection
 		   level, as well as set the desired protection level, the
-		   available protection levels are:
+		   available protection levels are::
 
-		   ============   =======   ==========   ========
-		   0 - Disabled   1 - Low   2 - Medium   3 - High
-		   ============   =======   ==========   ========
+		     ============   =======   ==========   ========
+		     0 - Disabled   1 - Low   2 - Medium   3 - High
+		     ============   =======   ==========   ========
 
 reset_protection   The reset_protection entry is writeable only, being "1"
 		   the only parameter it accepts, it is used to trigger
diff --git a/Documentation/driver-api/nvdimm/btt.rst b/Documentation/driver-api/nvdimm/btt.rst
index 2d8269f834bd..107395c042ae 100644
--- a/Documentation/driver-api/nvdimm/btt.rst
+++ b/Documentation/driver-api/nvdimm/btt.rst
@@ -83,7 +83,7 @@ flags, and the remaining form the internal block number.
 ======== =============================================================
 Bit      Description
 ======== =============================================================
-31 - 30	 Error and Zero flags - Used in the following way:
+31 - 30	 Error and Zero flags - Used in the following way::
 
 	   == ==  ====================================================
 	   31 30  Description
diff --git a/Documentation/s390/debugging390.rst b/Documentation/s390/debugging390.rst
index d49305fd5e1a..73ad0b06c666 100644
--- a/Documentation/s390/debugging390.rst
+++ b/Documentation/s390/debugging390.rst
@@ -170,7 +170,7 @@ currently running at.
 |        +----------------+-------------------------------------------------+
 |        |    32          | Basic Addressing Mode                           |
 |        |                |                                                 |
-|        |                | Used to set addressing mode                     |
+|        |                | Used to set addressing mode::                   |
 |        |                |                                                 |
 |        |                |    +---------+----------+----------+            |
 |        |                |    | PSW 31  | PSW 32   |          |            |
-- 
cgit 


From 38cbfed28b3178dd9004064b1a72a992a3b31969 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 9 Jul 2019 12:22:41 -0300
Subject: docs: arm: fix a breakage with pdf output

Add an extra blank line, as otherwise XeLaTex will complain with:

	! LaTeX Error: Too deeply nested.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/arm/spear/overview.rst | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/arm/spear/overview.rst b/Documentation/arm/spear/overview.rst
index 8a1a87aca427..1a77f6b213b6 100644
--- a/Documentation/arm/spear/overview.rst
+++ b/Documentation/arm/spear/overview.rst
@@ -15,6 +15,7 @@ Introduction
   Hierarchy in SPEAr is as follows:
 
   SPEAr (Platform)
+
 	- SPEAr3XX (3XX SOC series, based on ARM9)
 		- SPEAr300 (SOC)
 			- SPEAr300 Evaluation Board
-- 
cgit 


From 8bb0776b8b27d548c7e65828ec3a02cb31fe3eed Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 9 Jul 2019 12:36:09 -0300
Subject: docs: block: fix pdf output

Add an extra blank line and use a markup for the enumberated
list, in order to make it possible to build the block book
on pdf format.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/block/biodoc.rst | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/block/biodoc.rst b/Documentation/block/biodoc.rst
index d6e30b680405..c8eb28401332 100644
--- a/Documentation/block/biodoc.rst
+++ b/Documentation/block/biodoc.rst
@@ -9,6 +9,7 @@ Notes on the Generic Block Layer Rewrite in Linux 2.5
 	here might still be useful.
 
 Notes Written on Jan 15, 2002:
+
 	- Jens Axboe <jens.axboe@oracle.com>
 	- Suparna Bhattacharya <suparna@in.ibm.com>
 
@@ -172,8 +173,8 @@ Some new queue property settings:
 
 New queue flags:
 
-	QUEUE_FLAG_CLUSTER (see 3.2.2)
-	QUEUE_FLAG_QUEUED (see 3.2.4)
+	- QUEUE_FLAG_CLUSTER (see 3.2.2)
+	- QUEUE_FLAG_QUEUED (see 3.2.4)
 
 
 ii. High-mem i/o capabilities are now considered the default
@@ -478,7 +479,7 @@ With this multipage bio design:
 - Splitting of an i/o request across multiple devices (as in the case of
   lvm or raid) is achieved by cloning the bio (where the clone points to
   the same bi_io_vec array, but with the index and size accordingly modified)
-- A linked list of bios is used as before for unrelated merges [*]_ - this
+- A linked list of bios is used as before for unrelated merges [#]_ - this
   avoids reallocs and makes independent completions easier to handle.
 - Code that traverses the req list can find all the segments of a bio
   by using rq_for_each_segment.  This handles the fact that a request
@@ -489,7 +490,7 @@ With this multipage bio design:
   [TBD: Should preferably also have a bi_voffset and bi_vlen to avoid modifying
   bi_offset an len fields]
 
-.. [*]
+.. [#]
 
 	unrelated merges -- a request ends up containing two or more bios that
 	didn't originate from the same place.
-- 
cgit 


From 168869492e7009b6861b615f1d030c99bc805e83 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 9 Jul 2019 13:25:51 -0300
Subject: docs: kbuild: fix build with pdf and fix some minor issues

The tag ".. include" should be replaced by ".. literalinclude" at
issues.rst, otherwise it causes TeX to crash due to excessive usage
of stack with Sphinx 2.0.

While here, solve a few minor issues at the kbuild book output by
adding extra blank lines.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/kbuild/issues.rst           | 20 ++++++++++++--------
 Documentation/kbuild/kbuild.rst           |  3 ++-
 Documentation/kbuild/kconfig-language.rst | 12 ++++++++++++
 Documentation/kbuild/kconfig.rst          |  8 ++++++--
 Documentation/kbuild/makefiles.rst        |  1 +
 5 files changed, 33 insertions(+), 11 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/kbuild/issues.rst b/Documentation/kbuild/issues.rst
index 9fdded4b681c..bdab01f733f6 100644
--- a/Documentation/kbuild/issues.rst
+++ b/Documentation/kbuild/issues.rst
@@ -1,11 +1,15 @@
-Recursion issue #1
-------------------
+================
+Recursion issues
+================
 
- .. include:: Kconfig.recursion-issue-01
-    :literal:
+issue #1
+--------
 
-Recursion issue #2
-------------------
+.. literalinclude:: Kconfig.recursion-issue-01
+   :language: kconfig
 
- .. include:: Kconfig.recursion-issue-02
-    :literal:
+issue #2
+--------
+
+.. literalinclude:: Kconfig.recursion-issue-02
+   :language: kconfig
diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst
index b25548963d70..ce9b99c004ae 100644
--- a/Documentation/kbuild/kbuild.rst
+++ b/Documentation/kbuild/kbuild.rst
@@ -18,7 +18,7 @@ This file lists all modules that are built into the kernel. This is used
 by modprobe to not fail when trying to load something builtin.
 
 modules.builtin.modinfo
---------------------------------------------------
+-----------------------
 This file contains modinfo from all modules that are built into the kernel.
 Unlike modinfo of a separate module, all fields are prefixed with module name.
 
@@ -153,6 +153,7 @@ Install script called when using "make install".
 The default name is "installkernel".
 
 The script will be called with the following arguments:
+
    - $1 - kernel version
    - $2 - kernel image file
    - $3 - kernel map file
diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst
index 2bc8a7803365..74bef19f69f0 100644
--- a/Documentation/kbuild/kconfig-language.rst
+++ b/Documentation/kbuild/kconfig-language.rst
@@ -53,6 +53,7 @@ A menu entry can have a number of attributes. Not all of them are
 applicable everywhere (see syntax).
 
 - type definition: "bool"/"tristate"/"string"/"hex"/"int"
+
   Every config option must have a type. There are only two basic types:
   tristate and string; the other types are based on these two. The type
   definition optionally accepts an input prompt, so these two examples
@@ -66,11 +67,13 @@ applicable everywhere (see syntax).
 	prompt "Networking support"
 
 - input prompt: "prompt" <prompt> ["if" <expr>]
+
   Every menu entry can have at most one prompt, which is used to display
   to the user. Optionally dependencies only for this prompt can be added
   with "if".
 
 - default value: "default" <expr> ["if" <expr>]
+
   A config option can have any number of default values. If multiple
   default values are visible, only the first defined one is active.
   Default values are not limited to the menu entry where they are
@@ -112,6 +115,7 @@ applicable everywhere (see syntax).
   Optionally dependencies for this default value can be added with "if".
 
 - dependencies: "depends on" <expr>
+
   This defines a dependency for this menu entry. If multiple
   dependencies are defined, they are connected with '&&'. Dependencies
   are applied to all other options within this menu entry (which also
@@ -127,6 +131,7 @@ applicable everywhere (see syntax).
 	default y
 
 - reverse dependencies: "select" <symbol> ["if" <expr>]
+
   While normal dependencies reduce the upper limit of a symbol (see
   below), reverse dependencies can be used to force a lower limit of
   another symbol. The value of the current menu symbol is used as the
@@ -146,6 +151,7 @@ applicable everywhere (see syntax).
 	the illegal configurations all over.
 
 - weak reverse dependencies: "imply" <symbol> ["if" <expr>]
+
   This is similar to "select" as it enforces a lower limit on another
   symbol except that the "implied" symbol's value may still be set to n
   from a direct dependency or with a visible prompt.
@@ -176,6 +182,7 @@ applicable everywhere (see syntax).
   configure that subsystem out without also having to unset these drivers.
 
 - limiting menu display: "visible if" <expr>
+
   This attribute is only applicable to menu blocks, if the condition is
   false, the menu block is not displayed to the user (the symbols
   contained there can still be selected by other symbols, though). It is
@@ -183,12 +190,14 @@ applicable everywhere (see syntax).
   entries. Default value of "visible" is true.
 
 - numerical ranges: "range" <symbol> <symbol> ["if" <expr>]
+
   This allows to limit the range of possible input values for int
   and hex symbols. The user can only input a value which is larger than
   or equal to the first symbol and smaller than or equal to the second
   symbol.
 
 - help text: "help" or "---help---"
+
   This defines a help text. The end of the help text is determined by
   the indentation level, this means it ends at the first line which has
   a smaller indentation than the first line of the help text.
@@ -197,6 +206,7 @@ applicable everywhere (see syntax).
   the file as an aid to developers.
 
 - misc options: "option" <symbol>[=<value>]
+
   Various less common options can be defined via this option syntax,
   which can modify the behaviour of the menu entry and its config
   symbol. These options are currently possible:
@@ -325,6 +335,7 @@ end a menu entry:
 The first five also start the definition of a menu entry.
 
 config::
+
 	"config" <symbol>
 	<config options>
 
@@ -332,6 +343,7 @@ This defines a config symbol <symbol> and accepts any of above
 attributes as options.
 
 menuconfig::
+
 	"menuconfig" <symbol>
 	<config options>
 
diff --git a/Documentation/kbuild/kconfig.rst b/Documentation/kbuild/kconfig.rst
index 88129af7e539..a9a855f894b3 100644
--- a/Documentation/kbuild/kconfig.rst
+++ b/Documentation/kbuild/kconfig.rst
@@ -264,6 +264,7 @@ NCONFIG_MODE
 This mode shows all sub-menus in one large tree.
 
 Example::
+
 	make NCONFIG_MODE=single_menu nconfig
 
 ----------------------------------------------------------------------
@@ -277,9 +278,12 @@ Searching in xconfig:
 	names, so you have to know something close to what you are
 	looking for.
 
-	Example:
+	Example::
+
 		Ctrl-F hotplug
-	or
+
+	or::
+
 		Menu: File, Search, hotplug
 
 	lists all config symbol entries that contain "hotplug" in
diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst
index 093f2d79ab95..f31158457753 100644
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -384,6 +384,7 @@ more details, with real examples.
 -----------------------
 
 	Kbuild tracks dependencies on the following:
+
 	1) All prerequisite files (both `*.c` and `*.h`)
 	2) `CONFIG_` options used in all prerequisite files
 	3) Command-line used to compile target
-- 
cgit 


From 89b408a68b9dd163b2705b6f73d8e3cc3579b457 Mon Sep 17 00:00:00 2001
From: Sheriff Esseson <sheriffesseson@gmail.com>
Date: Mon, 15 Jul 2019 09:15:09 -0700
Subject: Documentation: filesystem: Convert xfs.txt to ReST

Move xfs.txt to admin-guide, convert xfs.txt to ReST and broken references

Signed-off-by: Sheriff Esseson <sheriffesseson@gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 Documentation/admin-guide/index.rst |   1 +
 Documentation/admin-guide/xfs.rst   | 466 +++++++++++++++++++++++++++++++++++
 Documentation/filesystems/dax.txt   |   2 +-
 Documentation/filesystems/xfs.txt   | 470 ------------------------------------
 4 files changed, 468 insertions(+), 471 deletions(-)
 create mode 100644 Documentation/admin-guide/xfs.rst
 delete mode 100644 Documentation/filesystems/xfs.txt

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 8001917ee012..3c18ba7912a7 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -70,6 +70,7 @@ configure specific aspects of kernel behavior to your liking.
    ras
    bcache
    ext4
+   xfs
    pm/index
    thunderbolt
    LSM/index
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst
new file mode 100644
index 000000000000..e76665a8f2f2
--- /dev/null
+++ b/Documentation/admin-guide/xfs.rst
@@ -0,0 +1,466 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+The SGI XFS Filesystem
+======================
+
+XFS is a high performance journaling filesystem which originated
+on the SGI IRIX platform.  It is completely multi-threaded, can
+support large files and large filesystems, extended attributes,
+variable block sizes, is extent based, and makes extensive use of
+Btrees (directories, extents, free space) to aid both performance
+and scalability.
+
+Refer to the documentation at https://xfs.wiki.kernel.org/
+for further details.  This implementation is on-disk compatible
+with the IRIX version of XFS.
+
+
+Mount Options
+=============
+
+When mounting an XFS filesystem, the following options are accepted.
+
+  allocsize=size
+	Sets the buffered I/O end-of-file preallocation size when
+	doing delayed allocation writeout (default size is 64KiB).
+	Valid values for this option are page size (typically 4KiB)
+	through to 1GiB, inclusive, in power-of-2 increments.
+
+	The default behaviour is for dynamic end-of-file
+	preallocation size, which uses a set of heuristics to
+	optimise the preallocation size based on the current
+	allocation patterns within the file and the access patterns
+	to the file. Specifying a fixed ``allocsize`` value turns off
+	the dynamic behaviour.
+
+  attr2 or noattr2
+	The options enable/disable an "opportunistic" improvement to
+	be made in the way inline extended attributes are stored
+	on-disk.  When the new form is used for the first time when
+	``attr2`` is selected (either when setting or removing extended
+	attributes) the on-disk superblock feature bit field will be
+	updated to reflect this format being in use.
+
+	The default behaviour is determined by the on-disk feature
+	bit indicating that ``attr2`` behaviour is active. If either
+	mount option is set, then that becomes the new default used
+	by the filesystem.
+
+	CRC enabled filesystems always use the ``attr2`` format, and so
+	will reject the ``noattr2`` mount option if it is set.
+
+  discard or nodiscard (default)
+	Enable/disable the issuing of commands to let the block
+	device reclaim space freed by the filesystem.  This is
+	useful for SSD devices, thinly provisioned LUNs and virtual
+	machine images, but may have a performance impact.
+
+	Note: It is currently recommended that you use the ``fstrim``
+	application to ``discard`` unused blocks rather than the ``discard``
+	mount option because the performance impact of this option
+	is quite severe.
+
+  grpid/bsdgroups or nogrpid/sysvgroups (default)
+	These options define what group ID a newly created file
+	gets.  When ``grpid`` is set, it takes the group ID of the
+	directory in which it is created; otherwise it takes the
+	``fsgid`` of the current process, unless the directory has the
+	``setgid`` bit set, in which case it takes the ``gid`` from the
+	parent directory, and also gets the ``setgid`` bit set if it is
+	a directory itself.
+
+  filestreams
+	Make the data allocator use the filestreams allocation mode
+	across the entire filesystem rather than just on directories
+	configured to use it.
+
+  ikeep or noikeep (default)
+	When ``ikeep`` is specified, XFS does not delete empty inode
+	clusters and keeps them around on disk.  When ``noikeep`` is
+	specified, empty inode clusters are returned to the free
+	space pool.
+
+  inode32 or inode64 (default)
+	When ``inode32`` is specified, it indicates that XFS limits
+	inode creation to locations which will not result in inode
+	numbers with more than 32 bits of significance.
+
+	When ``inode64`` is specified, it indicates that XFS is allowed
+	to create inodes at any location in the filesystem,
+	including those which will result in inode numbers occupying
+	more than 32 bits of significance.
+
+	``inode32`` is provided for backwards compatibility with older
+	systems and applications, since 64 bits inode numbers might
+	cause problems for some applications that cannot handle
+	large inode numbers.  If applications are in use which do
+	not handle inode numbers bigger than 32 bits, the ``inode32``
+	option should be specified.
+
+  largeio or nolargeio (default)
+	If ``nolargeio`` is specified, the optimal I/O reported in
+	``st_blksize`` by **stat(2)** will be as small as possible to allow
+	user applications to avoid inefficient read/modify/write
+	I/O.  This is typically the page size of the machine, as
+	this is the granularity of the page cache.
+
+	If ``largeio`` is specified, a filesystem that was created with a
+	``swidth`` specified will return the ``swidth`` value (in bytes)
+	in ``st_blksize``. If the filesystem does not have a ``swidth``
+	specified but does specify an ``allocsize`` then ``allocsize``
+	(in bytes) will be returned instead. Otherwise the behaviour
+	is the same as if ``nolargeio`` was specified.
+
+  logbufs=value
+	Set the number of in-memory log buffers.  Valid numbers
+	range from 2-8 inclusive.
+
+	The default value is 8 buffers.
+
+	If the memory cost of 8 log buffers is too high on small
+	systems, then it may be reduced at some cost to performance
+	on metadata intensive workloads. The ``logbsize`` option below
+	controls the size of each buffer and so is also relevant to
+	this case.
+
+  logbsize=value
+	Set the size of each in-memory log buffer.  The size may be
+	specified in bytes, or in kilobytes with a "k" suffix.
+	Valid sizes for version 1 and version 2 logs are 16384 (16k)
+	and 32768 (32k).  Valid sizes for version 2 logs also
+	include 65536 (64k), 131072 (128k) and 262144 (256k). The
+	logbsize must be an integer multiple of the log
+	stripe unit configured at **mkfs(8)** time.
+
+	The default value for for version 1 logs is 32768, while the
+	default value for version 2 logs is MAX(32768, log_sunit).
+
+  logdev=device and rtdev=device
+	Use an external log (metadata journal) and/or real-time device.
+	An XFS filesystem has up to three parts: a data section, a log
+	section, and a real-time section.  The real-time section is
+	optional, and the log section can be separate from the data
+	section or contained within it.
+
+  noalign
+	Data allocations will not be aligned at stripe unit
+	boundaries. This is only relevant to filesystems created
+	with non-zero data alignment parameters (``sunit``, ``swidth``) by
+	**mkfs(8)**.
+
+  norecovery
+	The filesystem will be mounted without running log recovery.
+	If the filesystem was not cleanly unmounted, it is likely to
+	be inconsistent when mounted in ``norecovery`` mode.
+	Some files or directories may not be accessible because of this.
+	Filesystems mounted ``norecovery`` must be mounted read-only or
+	the mount will fail.
+
+  nouuid
+	Don't check for double mounted file systems using the file
+	system ``uuid``.  This is useful to mount LVM snapshot volumes,
+	and often used in combination with ``norecovery`` for mounting
+	read-only snapshots.
+
+  noquota
+	Forcibly turns off all quota accounting and enforcement
+	within the filesystem.
+
+  uquota/usrquota/uqnoenforce/quota
+	User disk quota accounting enabled, and limits (optionally)
+	enforced.  Refer to **xfs_quota(8)** for further details.
+
+  gquota/grpquota/gqnoenforce
+	Group disk quota accounting enabled and limits (optionally)
+	enforced.  Refer to **xfs_quota(8)** for further details.
+
+  pquota/prjquota/pqnoenforce
+	Project disk quota accounting enabled and limits (optionally)
+	enforced.  Refer to **xfs_quota(8)** for further details.
+
+  sunit=value and swidth=value
+	Used to specify the stripe unit and width for a RAID device
+	or a stripe volume.  "value" must be specified in 512-byte
+	block units. These options are only relevant to filesystems
+	that were created with non-zero data alignment parameters.
+
+	The ``sunit`` and ``swidth`` parameters specified must be compatible
+	with the existing filesystem alignment characteristics.  In
+	general, that means the only valid changes to ``sunit`` are
+	increasing it by a power-of-2 multiple. Valid ``swidth`` values
+	are any integer multiple of a valid ``sunit`` value.
+
+	Typically the only time these mount options are necessary if
+	after an underlying RAID device has had it's geometry
+	modified, such as adding a new disk to a RAID5 lun and
+	reshaping it.
+
+  swalloc
+	Data allocations will be rounded up to stripe width boundaries
+	when the current end of file is being extended and the file
+	size is larger than the stripe width size.
+
+  wsync
+	When specified, all filesystem namespace operations are
+	executed synchronously. This ensures that when the namespace
+	operation (create, unlink, etc) completes, the change to the
+	namespace is on stable storage. This is useful in HA setups
+	where failover must not result in clients seeing
+	inconsistent namespace presentation during or after a
+	failover event.
+
+
+Deprecated Mount Options
+========================
+
+===========================     ================
+  Name				Removal Schedule
+===========================     ================
+===========================     ================
+
+
+Removed Mount Options
+=====================
+
+===========================     =======
+  Name				Removed
+===========================	=======
+  delaylog/nodelaylog		v4.0
+  ihashsize			v4.0
+  irixsgid			v4.0
+  osyncisdsync/osyncisosync	v4.0
+  barrier			v4.19
+  nobarrier			v4.19
+===========================     =======
+
+sysctls
+=======
+
+The following sysctls are available for the XFS filesystem:
+
+  fs.xfs.stats_clear		(Min: 0  Default: 0  Max: 1)
+	Setting this to "1" clears accumulated XFS statistics
+	in /proc/fs/xfs/stat.  It then immediately resets to "0".
+
+  fs.xfs.xfssyncd_centisecs	(Min: 100  Default: 3000  Max: 720000)
+	The interval at which the filesystem flushes metadata
+	out to disk and runs internal cache cleanup routines.
+
+  fs.xfs.filestream_centisecs	(Min: 1  Default: 3000  Max: 360000)
+	The interval at which the filesystem ages filestreams cache
+	references and returns timed-out AGs back to the free stream
+	pool.
+
+  fs.xfs.speculative_prealloc_lifetime
+		(Units: seconds   Min: 1  Default: 300  Max: 86400)
+	The interval at which the background scanning for inodes
+	with unused speculative preallocation runs. The scan
+	removes unused preallocation from clean inodes and releases
+	the unused space back to the free pool.
+
+  fs.xfs.error_level		(Min: 0  Default: 3  Max: 11)
+	A volume knob for error reporting when internal errors occur.
+	This will generate detailed messages & backtraces for filesystem
+	shutdowns, for example.  Current threshold values are:
+
+		XFS_ERRLEVEL_OFF:       0
+		XFS_ERRLEVEL_LOW:       1
+		XFS_ERRLEVEL_HIGH:      5
+
+  fs.xfs.panic_mask		(Min: 0  Default: 0  Max: 256)
+	Causes certain error conditions to call BUG(). Value is a bitmask;
+	OR together the tags which represent errors which should cause panics:
+
+		XFS_NO_PTAG                     0
+		XFS_PTAG_IFLUSH                 0x00000001
+		XFS_PTAG_LOGRES                 0x00000002
+		XFS_PTAG_AILDELETE              0x00000004
+		XFS_PTAG_ERROR_REPORT           0x00000008
+		XFS_PTAG_SHUTDOWN_CORRUPT       0x00000010
+		XFS_PTAG_SHUTDOWN_IOERROR       0x00000020
+		XFS_PTAG_SHUTDOWN_LOGERROR      0x00000040
+		XFS_PTAG_FSBLOCK_ZERO           0x00000080
+		XFS_PTAG_VERIFIER_ERROR         0x00000100
+
+	This option is intended for debugging only.
+
+  fs.xfs.irix_symlink_mode	(Min: 0  Default: 0  Max: 1)
+	Controls whether symlinks are created with mode 0777 (default)
+	or whether their mode is affected by the umask (irix mode).
+
+  fs.xfs.irix_sgid_inherit	(Min: 0  Default: 0  Max: 1)
+	Controls files created in SGID directories.
+	If the group ID of the new file does not match the effective group
+	ID or one of the supplementary group IDs of the parent dir, the
+	ISGID bit is cleared if the irix_sgid_inherit compatibility sysctl
+	is set.
+
+  fs.xfs.inherit_sync		(Min: 0  Default: 1  Max: 1)
+	Setting this to "1" will cause the "sync" flag set
+	by the **xfs_io(8)** chattr command on a directory to be
+	inherited by files in that directory.
+
+  fs.xfs.inherit_nodump		(Min: 0  Default: 1  Max: 1)
+	Setting this to "1" will cause the "nodump" flag set
+	by the **xfs_io(8)** chattr command on a directory to be
+	inherited by files in that directory.
+
+  fs.xfs.inherit_noatime	(Min: 0  Default: 1  Max: 1)
+	Setting this to "1" will cause the "noatime" flag set
+	by the **xfs_io(8)** chattr command on a directory to be
+	inherited by files in that directory.
+
+  fs.xfs.inherit_nosymlinks	(Min: 0  Default: 1  Max: 1)
+	Setting this to "1" will cause the "nosymlinks" flag set
+	by the **xfs_io(8)** chattr command on a directory to be
+	inherited by files in that directory.
+
+  fs.xfs.inherit_nodefrag	(Min: 0  Default: 1  Max: 1)
+	Setting this to "1" will cause the "nodefrag" flag set
+	by the **xfs_io(8)** chattr command on a directory to be
+	inherited by files in that directory.
+
+  fs.xfs.rotorstep		(Min: 1  Default: 1  Max: 256)
+	In "inode32" allocation mode, this option determines how many
+	files the allocator attempts to allocate in the same allocation
+	group before moving to the next allocation group.  The intent
+	is to control the rate at which the allocator moves between
+	allocation groups when allocating extents for new files.
+
+Deprecated Sysctls
+==================
+
+None at present.
+
+
+Removed Sysctls
+===============
+
+  Name				Removed
+  ----				-------
+  fs.xfs.xfsbufd_centisec	v4.0
+  fs.xfs.age_buffer_centisecs	v4.0
+
+
+Error handling
+==============
+
+XFS can act differently according to the type of error found during its
+operation. The implementation introduces the following concepts to the error
+handler:
+
+ -failure speed:
+	Defines how fast XFS should propagate an error upwards when a specific
+	error is found during the filesystem operation. It can propagate
+	immediately, after a defined number of retries, after a set time period,
+	or simply retry forever.
+
+ -error classes:
+	Specifies the subsystem the error configuration will apply to, such as
+	metadata IO or memory allocation. Different subsystems will have
+	different error handlers for which behaviour can be configured.
+
+ -error handlers:
+	Defines the behavior for a specific error.
+
+The filesystem behavior during an error can be set via ``sysfs`` files. Each
+error handler works independently - the first condition met by an error handler
+for a specific class will cause the error to be propagated rather than reset and
+retried.
+
+The action taken by the filesystem when the error is propagated is context
+dependent - it may cause a shut down in the case of an unrecoverable error,
+it may be reported back to userspace, or it may even be ignored because
+there's nothing useful we can with the error or anyone we can report it to (e.g.
+during unmount).
+
+The configuration files are organized into the following hierarchy for each
+mounted filesystem:
+
+  /sys/fs/xfs/<dev>/error/<class>/<error>/
+
+Where:
+  <dev>
+	The short device name of the mounted filesystem. This is the same device
+	name that shows up in XFS kernel error messages as "XFS(<dev>): ..."
+
+  <class>
+	The subsystem the error configuration belongs to. As of 4.9, the defined
+	classes are:
+
+		- "metadata": applies metadata buffer write IO
+
+  <error>
+	The individual error handler configurations.
+
+
+Each filesystem has "global" error configuration options defined in their top
+level directory:
+
+  /sys/fs/xfs/<dev>/error/
+
+  fail_at_unmount		(Min:  0  Default:  1  Max: 1)
+	Defines the filesystem error behavior at unmount time.
+
+	If set to a value of 1, XFS will override all other error configurations
+	during unmount and replace them with "immediate fail" characteristics.
+	i.e. no retries, no retry timeout. This will always allow unmount to
+	succeed when there are persistent errors present.
+
+	If set to 0, the configured retry behaviour will continue until all
+	retries and/or timeouts have been exhausted. This will delay unmount
+	completion when there are persistent errors, and it may prevent the
+	filesystem from ever unmounting fully in the case of "retry forever"
+	handler configurations.
+
+	Note: there is no guarantee that fail_at_unmount can be set while an
+	unmount is in progress. It is possible that the ``sysfs`` entries are
+	removed by the unmounting filesystem before a "retry forever" error
+	handler configuration causes unmount to hang, and hence the filesystem
+	must be configured appropriately before unmount begins to prevent
+	unmount hangs.
+
+Each filesystem has specific error class handlers that define the error
+propagation behaviour for specific errors. There is also a "default" error
+handler defined, which defines the behaviour for all errors that don't have
+specific handlers defined. Where multiple retry constraints are configured for
+a single error, the first retry configuration that expires will cause the error
+to be propagated. The handler configurations are found in the directory:
+
+  /sys/fs/xfs/<dev>/error/<class>/<error>/
+
+  max_retries			(Min: -1  Default: Varies  Max: INTMAX)
+	Defines the allowed number of retries of a specific error before
+	the filesystem will propagate the error. The retry count for a given
+	error context (e.g. a specific metadata buffer) is reset every time
+	there is a successful completion of the operation.
+
+	Setting the value to "-1" will cause XFS to retry forever for this
+	specific error.
+
+	Setting the value to "0" will cause XFS to fail immediately when the
+	specific error is reported.
+
+	Setting the value to "N" (where 0 < N < Max) will make XFS retry the
+	operation "N" times before propagating the error.
+
+  retry_timeout_seconds		(Min:  -1  Default:  Varies  Max: 1 day)
+	Define the amount of time (in seconds) that the filesystem is
+	allowed to retry its operations when the specific error is
+	found.
+
+	Setting the value to "-1" will allow XFS to retry forever for this
+	specific error.
+
+	Setting the value to "0" will cause XFS to fail immediately when the
+	specific error is reported.
+
+	Setting the value to "N" (where 0 < N < Max) will allow XFS to retry the
+	operation for up to "N" seconds before propagating the error.
+
+**Note:** The default behaviour for a specific error handler is dependent on both
+the class and error context. For example, the default values for
+"metadata/ENODEV" are "0" rather than "-1" so that this error handler defaults
+to "fail immediately" behaviour. This is done because ENODEV is a fatal,
+unrecoverable error no matter how many times the metadata IO is retried.
diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt
index 6d2c0d340dea..679729442fd2 100644
--- a/Documentation/filesystems/dax.txt
+++ b/Documentation/filesystems/dax.txt
@@ -76,7 +76,7 @@ exposure of uninitialized data through mmap.
 These filesystems may be used for inspiration:
 - ext2: see Documentation/filesystems/ext2.txt
 - ext4: see Documentation/filesystems/ext4/
-- xfs:  see Documentation/filesystems/xfs.txt
+- xfs:  see Documentation/admin-guide/xfs.rst
 
 
 Handling Media Errors
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt
deleted file mode 100644
index a5cbb5e0e3db..000000000000
--- a/Documentation/filesystems/xfs.txt
+++ /dev/null
@@ -1,470 +0,0 @@
-
-The SGI XFS Filesystem
-======================
-
-XFS is a high performance journaling filesystem which originated
-on the SGI IRIX platform.  It is completely multi-threaded, can
-support large files and large filesystems, extended attributes,
-variable block sizes, is extent based, and makes extensive use of
-Btrees (directories, extents, free space) to aid both performance
-and scalability.
-
-Refer to the documentation at https://xfs.wiki.kernel.org/
-for further details.  This implementation is on-disk compatible
-with the IRIX version of XFS.
-
-
-Mount Options
-=============
-
-When mounting an XFS filesystem, the following options are accepted.
-For boolean mount options, the names with the (*) suffix is the
-default behaviour.
-
-  allocsize=size
-	Sets the buffered I/O end-of-file preallocation size when
-	doing delayed allocation writeout (default size is 64KiB).
-	Valid values for this option are page size (typically 4KiB)
-	through to 1GiB, inclusive, in power-of-2 increments.
-
-	The default behaviour is for dynamic end-of-file
-	preallocation size, which uses a set of heuristics to
-	optimise the preallocation size based on the current
-	allocation patterns within the file and the access patterns
-	to the file. Specifying a fixed allocsize value turns off
-	the dynamic behaviour.
-
-  attr2
-  noattr2
-	The options enable/disable an "opportunistic" improvement to
-	be made in the way inline extended attributes are stored
-	on-disk.  When the new form is used for the first time when
-	attr2 is selected (either when setting or removing extended
-	attributes) the on-disk superblock feature bit field will be
-	updated to reflect this format being in use.
-
-	The default behaviour is determined by the on-disk feature
-	bit indicating that attr2 behaviour is active. If either
-	mount option it set, then that becomes the new default used
-	by the filesystem.
-
-	CRC enabled filesystems always use the attr2 format, and so
-	will reject the noattr2 mount option if it is set.
-
-  discard
-  nodiscard (*)
-	Enable/disable the issuing of commands to let the block
-	device reclaim space freed by the filesystem.  This is
-	useful for SSD devices, thinly provisioned LUNs and virtual
-	machine images, but may have a performance impact.
-
-	Note: It is currently recommended that you use the fstrim
-	application to discard unused blocks rather than the discard
-	mount option because the performance impact of this option
-	is quite severe.
-
-  grpid/bsdgroups
-  nogrpid/sysvgroups (*)
-	These options define what group ID a newly created file
-	gets.  When grpid is set, it takes the group ID of the
-	directory in which it is created; otherwise it takes the
-	fsgid of the current process, unless the directory has the
-	setgid bit set, in which case it takes the gid from the
-	parent directory, and also gets the setgid bit set if it is
-	a directory itself.
-
-  filestreams
-	Make the data allocator use the filestreams allocation mode
-	across the entire filesystem rather than just on directories
-	configured to use it.
-
-  ikeep
-  noikeep (*)
-	When ikeep is specified, XFS does not delete empty inode
-	clusters and keeps them around on disk.  When noikeep is
-	specified, empty inode clusters are returned to the free
-	space pool.
-
-  inode32
-  inode64 (*)
-	When inode32 is specified, it indicates that XFS limits
-	inode creation to locations which will not result in inode
-	numbers with more than 32 bits of significance.
-
-	When inode64 is specified, it indicates that XFS is allowed
-	to create inodes at any location in the filesystem,
-	including those which will result in inode numbers occupying
-	more than 32 bits of significance. 
-
-	inode32 is provided for backwards compatibility with older
-	systems and applications, since 64 bits inode numbers might
-	cause problems for some applications that cannot handle
-	large inode numbers.  If applications are in use which do
-	not handle inode numbers bigger than 32 bits, the inode32
-	option should be specified.
-
-
-  largeio
-  nolargeio (*)
-	If "nolargeio" is specified, the optimal I/O reported in
-	st_blksize by stat(2) will be as small as possible to allow
-	user applications to avoid inefficient read/modify/write
-	I/O.  This is typically the page size of the machine, as
-	this is the granularity of the page cache.
-
-	If "largeio" specified, a filesystem that was created with a
-	"swidth" specified will return the "swidth" value (in bytes)
-	in st_blksize. If the filesystem does not have a "swidth"
-	specified but does specify an "allocsize" then "allocsize"
-	(in bytes) will be returned instead. Otherwise the behaviour
-	is the same as if "nolargeio" was specified.
-
-  logbufs=value
-	Set the number of in-memory log buffers.  Valid numbers
-	range from 2-8 inclusive.
-
-	The default value is 8 buffers.
-
-	If the memory cost of 8 log buffers is too high on small
-	systems, then it may be reduced at some cost to performance
-	on metadata intensive workloads. The logbsize option below
-	controls the size of each buffer and so is also relevant to
-	this case.
-
-  logbsize=value
-	Set the size of each in-memory log buffer.  The size may be
-	specified in bytes, or in kilobytes with a "k" suffix.
-	Valid sizes for version 1 and version 2 logs are 16384 (16k)
-	and 32768 (32k).  Valid sizes for version 2 logs also
-	include 65536 (64k), 131072 (128k) and 262144 (256k). The
-	logbsize must be an integer multiple of the log
-	stripe unit configured at mkfs time.
-
-	The default value for for version 1 logs is 32768, while the
-	default value for version 2 logs is MAX(32768, log_sunit).
-
-  logdev=device and rtdev=device
-	Use an external log (metadata journal) and/or real-time device.
-	An XFS filesystem has up to three parts: a data section, a log
-	section, and a real-time section.  The real-time section is
-	optional, and the log section can be separate from the data
-	section or contained within it.
-
-  noalign
-	Data allocations will not be aligned at stripe unit
-	boundaries. This is only relevant to filesystems created
-	with non-zero data alignment parameters (sunit, swidth) by
-	mkfs.
-
-  norecovery
-	The filesystem will be mounted without running log recovery.
-	If the filesystem was not cleanly unmounted, it is likely to
-	be inconsistent when mounted in "norecovery" mode.
-	Some files or directories may not be accessible because of this.
-	Filesystems mounted "norecovery" must be mounted read-only or
-	the mount will fail.
-
-  nouuid
-	Don't check for double mounted file systems using the file
-	system uuid.  This is useful to mount LVM snapshot volumes,
-	and often used in combination with "norecovery" for mounting
-	read-only snapshots.
-
-  noquota
-	Forcibly turns off all quota accounting and enforcement
-	within the filesystem.
-
-  uquota/usrquota/uqnoenforce/quota
-	User disk quota accounting enabled, and limits (optionally)
-	enforced.  Refer to xfs_quota(8) for further details.
-
-  gquota/grpquota/gqnoenforce
-	Group disk quota accounting enabled and limits (optionally)
-	enforced.  Refer to xfs_quota(8) for further details.
-
-  pquota/prjquota/pqnoenforce
-	Project disk quota accounting enabled and limits (optionally)
-	enforced.  Refer to xfs_quota(8) for further details.
-
-  sunit=value and swidth=value
-	Used to specify the stripe unit and width for a RAID device
-	or a stripe volume.  "value" must be specified in 512-byte
-	block units. These options are only relevant to filesystems
-	that were created with non-zero data alignment parameters.
-
-	The sunit and swidth parameters specified must be compatible
-	with the existing filesystem alignment characteristics.  In
-	general, that means the only valid changes to sunit are
-	increasing it by a power-of-2 multiple. Valid swidth values
-	are any integer multiple of a valid sunit value.
-
-	Typically the only time these mount options are necessary if
-	after an underlying RAID device has had it's geometry
-	modified, such as adding a new disk to a RAID5 lun and
-	reshaping it.
-
-  swalloc
-	Data allocations will be rounded up to stripe width boundaries
-	when the current end of file is being extended and the file
-	size is larger than the stripe width size.
-
-  wsync
-	When specified, all filesystem namespace operations are
-	executed synchronously. This ensures that when the namespace
-	operation (create, unlink, etc) completes, the change to the
-	namespace is on stable storage. This is useful in HA setups
-	where failover must not result in clients seeing
-	inconsistent namespace presentation during or after a
-	failover event.
-
-
-Deprecated Mount Options
-========================
-
-  Name				Removal Schedule
-  ----				----------------
-
-
-Removed Mount Options
-=====================
-
-  Name				Removed
-  ----				-------
-  delaylog/nodelaylog		v4.0
-  ihashsize			v4.0
-  irixsgid			v4.0
-  osyncisdsync/osyncisosync	v4.0
-  barrier			v4.19
-  nobarrier			v4.19
-
-
-sysctls
-=======
-
-The following sysctls are available for the XFS filesystem:
-
-  fs.xfs.stats_clear		(Min: 0  Default: 0  Max: 1)
-	Setting this to "1" clears accumulated XFS statistics
-	in /proc/fs/xfs/stat.  It then immediately resets to "0".
-
-  fs.xfs.xfssyncd_centisecs	(Min: 100  Default: 3000  Max: 720000)
-	The interval at which the filesystem flushes metadata
-	out to disk and runs internal cache cleanup routines.
-
-  fs.xfs.filestream_centisecs	(Min: 1  Default: 3000  Max: 360000)
-	The interval at which the filesystem ages filestreams cache
-	references and returns timed-out AGs back to the free stream
-	pool.
-
-  fs.xfs.speculative_prealloc_lifetime
-		(Units: seconds   Min: 1  Default: 300  Max: 86400)
-	The interval at which the background scanning for inodes
-	with unused speculative preallocation runs. The scan
-	removes unused preallocation from clean inodes and releases
-	the unused space back to the free pool.
-
-  fs.xfs.error_level		(Min: 0  Default: 3  Max: 11)
-	A volume knob for error reporting when internal errors occur.
-	This will generate detailed messages & backtraces for filesystem
-	shutdowns, for example.  Current threshold values are:
-
-		XFS_ERRLEVEL_OFF:       0
-		XFS_ERRLEVEL_LOW:       1
-		XFS_ERRLEVEL_HIGH:      5
-
-  fs.xfs.panic_mask		(Min: 0  Default: 0  Max: 256)
-	Causes certain error conditions to call BUG(). Value is a bitmask;
-	OR together the tags which represent errors which should cause panics:
-
-		XFS_NO_PTAG                     0
-		XFS_PTAG_IFLUSH                 0x00000001
-		XFS_PTAG_LOGRES                 0x00000002
-		XFS_PTAG_AILDELETE              0x00000004
-		XFS_PTAG_ERROR_REPORT           0x00000008
-		XFS_PTAG_SHUTDOWN_CORRUPT       0x00000010
-		XFS_PTAG_SHUTDOWN_IOERROR       0x00000020
-		XFS_PTAG_SHUTDOWN_LOGERROR      0x00000040
-		XFS_PTAG_FSBLOCK_ZERO           0x00000080
-		XFS_PTAG_VERIFIER_ERROR         0x00000100
-
-	This option is intended for debugging only.
-
-  fs.xfs.irix_symlink_mode	(Min: 0  Default: 0  Max: 1)
-	Controls whether symlinks are created with mode 0777 (default)
-	or whether their mode is affected by the umask (irix mode).
-
-  fs.xfs.irix_sgid_inherit	(Min: 0  Default: 0  Max: 1)
-	Controls files created in SGID directories.
-	If the group ID of the new file does not match the effective group
-	ID or one of the supplementary group IDs of the parent dir, the
-	ISGID bit is cleared if the irix_sgid_inherit compatibility sysctl
-	is set.
-
-  fs.xfs.inherit_sync		(Min: 0  Default: 1  Max: 1)
-	Setting this to "1" will cause the "sync" flag set
-	by the xfs_io(8) chattr command on a directory to be
-	inherited by files in that directory.
-
-  fs.xfs.inherit_nodump		(Min: 0  Default: 1  Max: 1)
-	Setting this to "1" will cause the "nodump" flag set
-	by the xfs_io(8) chattr command on a directory to be
-	inherited by files in that directory.
-
-  fs.xfs.inherit_noatime	(Min: 0  Default: 1  Max: 1)
-	Setting this to "1" will cause the "noatime" flag set
-	by the xfs_io(8) chattr command on a directory to be
-	inherited by files in that directory.
-
-  fs.xfs.inherit_nosymlinks	(Min: 0  Default: 1  Max: 1)
-	Setting this to "1" will cause the "nosymlinks" flag set
-	by the xfs_io(8) chattr command on a directory to be
-	inherited by files in that directory.
-
-  fs.xfs.inherit_nodefrag	(Min: 0  Default: 1  Max: 1)
-	Setting this to "1" will cause the "nodefrag" flag set
-	by the xfs_io(8) chattr command on a directory to be
-	inherited by files in that directory.
-
-  fs.xfs.rotorstep		(Min: 1  Default: 1  Max: 256)
-	In "inode32" allocation mode, this option determines how many
-	files the allocator attempts to allocate in the same allocation
-	group before moving to the next allocation group.  The intent
-	is to control the rate at which the allocator moves between
-	allocation groups when allocating extents for new files.
-
-Deprecated Sysctls
-==================
-
-None at present.
-
-
-Removed Sysctls
-===============
-
-  Name				Removed
-  ----				-------
-  fs.xfs.xfsbufd_centisec	v4.0
-  fs.xfs.age_buffer_centisecs	v4.0
-
-
-Error handling
-==============
-
-XFS can act differently according to the type of error found during its
-operation. The implementation introduces the following concepts to the error
-handler:
-
- -failure speed:
-	Defines how fast XFS should propagate an error upwards when a specific
-	error is found during the filesystem operation. It can propagate
-	immediately, after a defined number of retries, after a set time period,
-	or simply retry forever.
-
- -error classes:
-	Specifies the subsystem the error configuration will apply to, such as
-	metadata IO or memory allocation. Different subsystems will have
-	different error handlers for which behaviour can be configured.
-
- -error handlers:
-	Defines the behavior for a specific error.
-
-The filesystem behavior during an error can be set via sysfs files. Each
-error handler works independently - the first condition met by an error handler
-for a specific class will cause the error to be propagated rather than reset and
-retried.
-
-The action taken by the filesystem when the error is propagated is context
-dependent - it may cause a shut down in the case of an unrecoverable error,
-it may be reported back to userspace, or it may even be ignored because
-there's nothing useful we can with the error or anyone we can report it to (e.g.
-during unmount).
-
-The configuration files are organized into the following hierarchy for each
-mounted filesystem:
-
-  /sys/fs/xfs/<dev>/error/<class>/<error>/
-
-Where:
-  <dev>
-	The short device name of the mounted filesystem. This is the same device
-	name that shows up in XFS kernel error messages as "XFS(<dev>): ..."
-
-  <class>
-	The subsystem the error configuration belongs to. As of 4.9, the defined
-	classes are:
-
-		- "metadata": applies metadata buffer write IO
-
-  <error>
-	The individual error handler configurations.
-
-
-Each filesystem has "global" error configuration options defined in their top
-level directory:
-
-  /sys/fs/xfs/<dev>/error/
-
-  fail_at_unmount		(Min:  0  Default:  1  Max: 1)
-	Defines the filesystem error behavior at unmount time.
-
-	If set to a value of 1, XFS will override all other error configurations
-	during unmount and replace them with "immediate fail" characteristics.
-	i.e. no retries, no retry timeout. This will always allow unmount to
-	succeed when there are persistent errors present.
-
-	If set to 0, the configured retry behaviour will continue until all
-	retries and/or timeouts have been exhausted. This will delay unmount
-	completion when there are persistent errors, and it may prevent the
-	filesystem from ever unmounting fully in the case of "retry forever"
-	handler configurations.
-
-	Note: there is no guarantee that fail_at_unmount can be set while an
-	unmount is in progress. It is possible that the sysfs entries are
-	removed by the unmounting filesystem before a "retry forever" error
-	handler configuration causes unmount to hang, and hence the filesystem
-	must be configured appropriately before unmount begins to prevent
-	unmount hangs.
-
-Each filesystem has specific error class handlers that define the error
-propagation behaviour for specific errors. There is also a "default" error
-handler defined, which defines the behaviour for all errors that don't have
-specific handlers defined. Where multiple retry constraints are configuredi for
-a single error, the first retry configuration that expires will cause the error
-to be propagated. The handler configurations are found in the directory:
-
-  /sys/fs/xfs/<dev>/error/<class>/<error>/
-
-  max_retries			(Min: -1  Default: Varies  Max: INTMAX)
-	Defines the allowed number of retries of a specific error before
-	the filesystem will propagate the error. The retry count for a given
-	error context (e.g. a specific metadata buffer) is reset every time
-	there is a successful completion of the operation.
-
-	Setting the value to "-1" will cause XFS to retry forever for this
-	specific error.
-
-	Setting the value to "0" will cause XFS to fail immediately when the
-	specific error is reported.
-
-	Setting the value to "N" (where 0 < N < Max) will make XFS retry the
-	operation "N" times before propagating the error.
-
-  retry_timeout_seconds		(Min:  -1  Default:  Varies  Max: 1 day)
-	Define the amount of time (in seconds) that the filesystem is
-	allowed to retry its operations when the specific error is
-	found.
-
-	Setting the value to "-1" will allow XFS to retry forever for this
-	specific error.
-
-	Setting the value to "0" will cause XFS to fail immediately when the
-	specific error is reported.
-
-	Setting the value to "N" (where 0 < N < Max) will allow XFS to retry the
-	operation for up to "N" seconds before propagating the error.
-
-Note: The default behaviour for a specific error handler is dependent on both
-the class and error context. For example, the default values for
-"metadata/ENODEV" are "0" rather than "-1" so that this error handler defaults
-to "fail immediately" behaviour. This is done because ENODEV is a fatal,
-unrecoverable error no matter how many times the metadata IO is retried.
-- 
cgit 


From c6c405336bd3b0ebd1d76aaf9ea88b35dba77e61 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@redhat.com>
Date: Tue, 16 Jul 2019 16:26:39 -0700
Subject: vmcore: add a kernel parameter novmcoredd

Since commit 2724273e8fd0 ("vmcore: add API to collect hardware dump in
second kernel"), drivers are allowed to add device related dump data to
vmcore as they want by using the device dump API.  This has a potential
issue, the data is stored in memory, drivers may append too much data
and use too much memory.  The vmcore is typically used in a kdump kernel
which runs in a pre-reserved small chunk of memory.  So as a result it
will make kdump unusable at all due to OOM issues.

So introduce new 'novmcoredd' command line option.  User can disable
device dump to reduce memory usage.  This is helpful if device dump is
using too much memory, disabling device dump could make sure a regular
vmcore without device dump data is still available.

[akpm@linux-foundation.org: tweak documentation]
[akpm@linux-foundation.org: vmcore.c needs moduleparam.h]
Link: http://lkml.kernel.org/r/20190528111856.7276-1-kasong@redhat.com
Signed-off-by: Kairui Song <kasong@redhat.com>
Acked-by: Dave Young <dyoung@redhat.com>
Reviewed-by: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Cc: "David S . Miller" <davem@davemloft.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f8b62360b18c..bf8221abfe0a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2877,6 +2877,17 @@
 			/sys/module/printk/parameters/console_suspend) to
 			turn on/off it dynamically.
 
+	novmcoredd	[KNL,KDUMP]
+			Disable device dump. Device dump allows drivers to
+			append dump data to vmcore so you can collect driver
+			specified debug info.  Drivers can append the data
+			without any limit and this data is stored in memory,
+			so this may cause significant memory stress.  Disabling
+			device dump can help save memory but the driver debug
+			data will be no longer available.  This parameter
+			is only available when CONFIG_PROC_VMCORE_DEVICE_DUMP
+			is set.
+
 	noaliencache	[MM, NUMA, SLAB] Disables the allocation of alien
 			caches in the slab allocator.  Saves per-node memory,
 			but will impact performance.
-- 
cgit 


From 65f50f255349959f15f2761abd17ead8530b2f33 Mon Sep 17 00:00:00 2001
From: Weitao Hou <houweitaoo@gmail.com>
Date: Tue, 16 Jul 2019 16:26:54 -0700
Subject: kernel: fix typos and some coding style in comments

fix lenght to length

Link: http://lkml.kernel.org/r/20190521050937.4370-1-houweitaoo@gmail.com
Signed-off-by: Weitao Hou <houweitaoo@gmail.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/devicetree/bindings/usb/s3c2410-usb.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/usb/s3c2410-usb.txt b/Documentation/devicetree/bindings/usb/s3c2410-usb.txt
index e45b38ce2986..26c85afd0b53 100644
--- a/Documentation/devicetree/bindings/usb/s3c2410-usb.txt
+++ b/Documentation/devicetree/bindings/usb/s3c2410-usb.txt
@@ -4,7 +4,7 @@ OHCI
 
 Required properties:
  - compatible: should be "samsung,s3c2410-ohci" for USB host controller
- - reg: address and lenght of the controller memory mapped region
+ - reg: address and length of the controller memory mapped region
  - interrupts: interrupt number for the USB OHCI controller
  - clocks: Should reference the bus and host clocks
  - clock-names: Should contain two strings
-- 
cgit 


From 6ced9aa7b56baeb241a715df4539e60d5e3118e2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 Jul 2019 16:28:32 -0700
Subject: coda: stop using 'struct timespec' in user API

We exchange file timestamps with user space using psdev device
read/write operations with a fixed but architecture specific binary
layout.

On 32-bit systems, this uses a 'timespec' structure that is defined by
the C library to contain two 32-bit values for seconds and nanoseconds.
As we get ready for the year 2038 overflow of the 32-bit signed seconds,
the kernel now uses 64-bit timestamps internally, and user space will do
the same change by changing the 'timespec' definition in the future.

Unfortunately, this breaks the layout of the coda_vattr structure, so we
need to redefine that in terms of something that does not change.  I'm
introducing a new 'struct vtimespec' structure here that keeps the
existing layout, and the same change has to be done in the coda user
space copy of linux/coda.h before anyone can use that on a 32-bit
architecture with 64-bit time_t.

An open question is what should happen to actual times past y2038, as
they are now truncated to the last valid date when sent to user space,
and interpreted as pre-1970 times when a timestamp with the MSB set is
read back into the kernel.  Alternatively, we could change the new
timespec64_to_coda()/coda_to_timespec64() functions to use a different
interpretation and extend the available range further to the future by
disallowing past timestamps.  This would require more changes in the
user space side though.

Link: http://lkml.kernel.org/r/562b7324149461743e4fbe2fedbf7c242f7e274a.1558117389.git.jaharkes@cs.cmu.edu
Link: https://patchwork.kernel.org/patch/10474735/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jan Harkes <jaharkes@cs.cmu.edu>
Acked-by: Jan Harkes <jaharkes@cs.cmu.edu>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Fabian Frederick <fabf@skynet.be>
Cc: Mikko Rapeli <mikko.rapeli@iki.fi>
Cc: Sam Protsenko <semen.protsenko@linaro.org>
Cc: Yann Droneaud <ydroneaud@opteya.com>
Cc: Zhouyang Jia <jiazhouyang09@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/coda.txt | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/coda.txt b/Documentation/filesystems/coda.txt
index 61311356025d..ea5969068895 100644
--- a/Documentation/filesystems/coda.txt
+++ b/Documentation/filesystems/coda.txt
@@ -481,7 +481,10 @@ kernel support.
 
 
-
+  struct vtimespec {
+          long            tv_sec;         /* seconds */
+          long            tv_nsec;        /* nanoseconds */
+  };
 
   struct coda_vattr {
           enum coda_vtype va_type;        /* vnode type (for create) */
@@ -493,9 +496,9 @@ kernel support.
           long            va_fileid;      /* file id */
           u_quad_t        va_size;        /* file size in bytes */
           long            va_blocksize;   /* blocksize preferred for i/o */
-          struct timespec va_atime;       /* time of last access */
-          struct timespec va_mtime;       /* time of last modification */
-          struct timespec va_ctime;       /* time file changed */
+          struct vtimespec va_atime;      /* time of last access */
+          struct vtimespec va_mtime;      /* time of last modification */
+          struct vtimespec va_ctime;      /* time file changed */
           u_long          va_gen;         /* generation number of file */
           u_long          va_flags;       /* flags defined for file */
           dev_t           va_rdev;        /* device special file represents */
-- 
cgit 


From 5e7c31dfe74703f428220384b2863525957cc160 Mon Sep 17 00:00:00 2001
From: Jan Harkes <jaharkes@cs.cmu.edu>
Date: Tue, 16 Jul 2019 16:28:35 -0700
Subject: coda: change Coda's user api to use 64-bit time_t in timespec

Move the 32-bit time_t problems to userspace.

Link: http://lkml.kernel.org/r/8d089068823bfb292a4020f773922fbd82ffad39.1558117389.git.jaharkes@cs.cmu.edu
Signed-off-by: Jan Harkes <jaharkes@cs.cmu.edu>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Fabian Frederick <fabf@skynet.be>
Cc: Mikko Rapeli <mikko.rapeli@iki.fi>
Cc: Sam Protsenko <semen.protsenko@linaro.org>
Cc: Yann Droneaud <ydroneaud@opteya.com>
Cc: Zhouyang Jia <jiazhouyang09@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/coda.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/coda.txt b/Documentation/filesystems/coda.txt
index ea5969068895..545262c167c3 100644
--- a/Documentation/filesystems/coda.txt
+++ b/Documentation/filesystems/coda.txt
@@ -481,8 +481,8 @@ kernel support.
 
 
-  struct vtimespec {
-          long            tv_sec;         /* seconds */
+  struct coda_timespec {
+          int64_t         tv_sec;         /* seconds */
           long            tv_nsec;        /* nanoseconds */
   };
 
@@ -496,9 +496,9 @@ kernel support.
           long            va_fileid;      /* file id */
           u_quad_t        va_size;        /* file size in bytes */
           long            va_blocksize;   /* blocksize preferred for i/o */
-          struct vtimespec va_atime;      /* time of last access */
-          struct vtimespec va_mtime;      /* time of last modification */
-          struct vtimespec va_ctime;      /* time file changed */
+          struct coda_timespec va_atime;  /* time of last access */
+          struct coda_timespec va_mtime;  /* time of last modification */
+          struct coda_timespec va_ctime;  /* time file changed */
           u_long          va_gen;         /* generation number of file */
           u_long          va_flags;       /* flags defined for file */
           dev_t           va_rdev;        /* device special file represents */
-- 
cgit 


From 814bbf49dcd0ad642e7ceb8991e57555c5472cce Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Sun, 14 Jul 2019 14:04:14 +0200
Subject: xen: remove tmem driver

The Xen tmem (transcendent memory) driver can be removed, as the
related Xen hypervisor feature never made it past the "experimental"
state and will be removed in future Xen versions (>= 4.13).

The xen-selfballoon driver depends on tmem, so it can be removed, too.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f8b62360b18c..99db4975e6b5 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4674,27 +4674,6 @@
 			Force threading of all interrupt handlers except those
 			marked explicitly IRQF_NO_THREAD.
 
-	tmem		[KNL,XEN]
-			Enable the Transcendent memory driver if built-in.
-
-	tmem.cleancache=0|1 [KNL, XEN]
-			Default is on (1). Disable the usage of the cleancache
-			API to send anonymous pages to the hypervisor.
-
-	tmem.frontswap=0|1 [KNL, XEN]
-			Default is on (1). Disable the usage of the frontswap
-			API to send swap pages to the hypervisor. If disabled
-			the selfballooning and selfshrinking are force disabled.
-
-	tmem.selfballooning=0|1 [KNL, XEN]
-			Default is on (1). Disable the driving of swap pages
-			to the hypervisor.
-
-	tmem.selfshrinking=0|1 [KNL, XEN]
-			Default is on (1). Partial swapoff that immediately
-			transfers pages from Xen hypervisor back to the
-			kernel based on different criteria.
-
 	topology=	[S390]
 			Format: {off | on}
 			Specify if the kernel should make use of the cpu
-- 
cgit 


From 30978346372e5c43a652cfbd4533c6bd5427c33b Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Date: Thu, 11 Jul 2019 20:02:09 +0800
Subject: x86: Add "nopv" parameter to disable PV extensions

In virtualization environment, PV extensions (drivers, interrupts,
timers, etc) are enabled in the majority of use cases which is the
best option.

However, in some cases (kexec not fully working, benchmarking)
we want to disable PV extensions. We have "xen_nopv" for that purpose
but only for XEN. For a consistent admin experience a common command
line parameter "nopv" set across all PV guest implementations is a
better choice.

There are guest types which just won't work without PV extensions,
like Xen PV, Xen PVH and jailhouse. add a "ignore_nopv" member to
struct hypervisor_x86 set to true for those guest types and call
the detect functions only if nopv is false or ignore_nopv is true.

Suggested-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 99db4975e6b5..936e8e7e6474 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5257,6 +5257,11 @@
 			improve timer resolution at the expense of processing
 			more timer interrupts.
 
+	nopv=		[X86,XEN,KVM,HYPER_V,VMWARE]
+			Disables the PV optimizations forcing the guest to run
+			as generic guest with no PV drivers. Currently support
+			XEN HVM, KVM, HYPER_V and VMWARE guest.
+
 	xirc2ps_cs=	[NET,PCMCIA]
 			Format:
 			<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
-- 
cgit 


From b39b049749ce08c7756be57082177730617bb9a0 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Date: Thu, 11 Jul 2019 20:02:10 +0800
Subject: xen: Map "xen_nopv" parameter to "nopv" and mark it obsolete

Clean up unnecessory code after that operation.

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 936e8e7e6474..2d990585402c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5243,6 +5243,8 @@
 	xen_nopv	[X86]
 			Disables the PV optimizations forcing the HVM guest to
 			run as generic HVM guest with no PV drivers.
+			This option is obsoleted by the "nopv" option, which
+			has equivalent effect for XEN platform.
 
 	xen_scrub_pages=	[XEN]
 			Boolean option to control scrubbing pages before giving them back
-- 
cgit 


From 5ef872636ca71d35ddc5ee5951c9272a51c74418 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sat, 13 Jul 2019 11:45:58 +0900
Subject: kbuild: get rid of misleading $(AS) from documents

The assembler files in the kernel are *.S instead of *.s, so they must
be preprocessed. Since 'as' of GNU binutils is not able to preprocess,
we always use $(CC) as an assembler driver.

$(AS) is almost unused in Kbuild. As of v5.2, there is just one place
that directly invokes $(AS).

  $ git grep -e '$(AS)' -e '${AS}' -e '$AS' -e '$(AS:' -e '${AS:' -- :^Documentation
  drivers/net/wan/Makefile:  AS68K = $(AS)

The documentation about *_AFLAGS* sounds like the flags were passed
to $(AS). This is somewhat misleading.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
---
 Documentation/kbuild/kbuild.rst    |  5 ++---
 Documentation/kbuild/makefiles.rst | 12 ++++++------
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst
index b25548963d70..727520b3d7b1 100644
--- a/Documentation/kbuild/kbuild.rst
+++ b/Documentation/kbuild/kbuild.rst
@@ -38,12 +38,11 @@ Additional options to the assembler (for built-in and modules).
 
 AFLAGS_MODULE
 -------------
-Additional module specific options to use for $(AS).
+Additional assembler options for modules.
 
 AFLAGS_KERNEL
 -------------
-Additional options for $(AS) when used for assembler
-code for code that is compiled as built-in.
+Additional assembler options for built-in.
 
 KCFLAGS
 -------
diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst
index 093f2d79ab95..67e47589d9d2 100644
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -328,7 +328,7 @@ more details, with real examples.
 	variable $(KBUILD_CFLAGS) and uses it for compilation flags for the
 	entire tree.
 
-	asflags-y specifies options for assembling with $(AS).
+	asflags-y specifies assembler options.
 
 	Example::
 
@@ -489,7 +489,7 @@ more details, with real examples.
 	as-instr checks if the assembler reports a specific instruction
 	and then outputs either option1 or option2
 	C escapes are supported in the test instruction
-	Note: as-instr-option uses KBUILD_AFLAGS for $(AS) options
+	Note: as-instr-option uses KBUILD_AFLAGS for assembler options
 
     cc-option
 	cc-option is used to check if $(CC) supports a given option, and if
@@ -905,7 +905,7 @@ When kbuild executes, the following steps are followed (roughly):
 	vmlinux. The usage of $(call if_changed,xxx) will be described later.
 
     KBUILD_AFLAGS
-	$(AS) assembler flags
+	Assembler flags
 
 	Default value - see top level Makefile
 	Append or modify as required per architecture.
@@ -948,16 +948,16 @@ When kbuild executes, the following steps are followed (roughly):
 	to 'y' when selected.
 
     KBUILD_AFLAGS_KERNEL
-	$(AS) options specific for built-in
+	Assembler options specific for built-in
 
 	$(KBUILD_AFLAGS_KERNEL) contains extra C compiler flags used to compile
 	resident kernel code.
 
     KBUILD_AFLAGS_MODULE
-	Options for $(AS) when building modules
+	Assembler options specific for modules
 
 	$(KBUILD_AFLAGS_MODULE) is used to add arch-specific options that
-	are used for $(AS).
+	are used for assembler.
 
 	From commandline AFLAGS_MODULE shall be used (see kbuild.txt).
 
-- 
cgit 


From 9af93db9e140a4e6e79cdb098919bc928a72cd59 Mon Sep 17 00:00:00 2001
From: Daniel Drake <drake@endlessm.com>
Date: Wed, 17 Jul 2019 13:10:58 +0800
Subject: platform/x86: asus: Rename "fan mode" to "fan boost mode"

The Asus WMI spec indicates that the function being controlled here
is called "Fan Boost Mode". The user-facing documentation also calls it
this.

The spec uses the term "fan mode" is used to refer to other things,
including functionality expected to appear on future products.
We missed this before as we are not dealing with the most readable of
specs, and didn't forsee any confusion around shortening the name.

Rename "fan mode" to "fan boost mode" to improve consistency with the
spec and to avoid a future naming conflict.

There is no interface breakage here since this has yet to be included
in an official kernel release. I also updated the kernel version listed
under ABI accordingly.

Signed-off-by: Daniel Drake <drake@endlessm.com>
Acked-by: Yurii Pavlovskyi <yurii.pavlovskyi@gmail.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 Documentation/ABI/testing/sysfs-platform-asus-wmi | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/ABI/testing/sysfs-platform-asus-wmi b/Documentation/ABI/testing/sysfs-platform-asus-wmi
index 87ae5cc983bf..9e99f2909612 100644
--- a/Documentation/ABI/testing/sysfs-platform-asus-wmi
+++ b/Documentation/ABI/testing/sysfs-platform-asus-wmi
@@ -37,9 +37,9 @@ Contact:	"AceLan Kao" <acelan.kao@canonical.com>
 Description:
 		Resume on lid open. 1 means on, 0 means off.
 
-What:		/sys/devices/platform/<platform>/fan_mode
-Date:		Apr 2019
-KernelVersion:	5.2
+What:		/sys/devices/platform/<platform>/fan_boost_mode
+Date:		Sep 2019
+KernelVersion:	5.3
 Contact:	"Yurii Pavlovskyi" <yurii.pavlovskyi@gmail.com>
 Description:
 		Fan boost mode:
-- 
cgit 


From b7dca6dd1e591ad19a9aae716f3898be8063f880 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 17 Jul 2019 15:17:57 +0900
Subject: kbuild: create *.mod with full directory path and remove MODVERDIR

While descending directories, Kbuild produces objects for modules,
but do not link final *.ko files; it is done in the modpost.

To keep track of modules, Kbuild creates a *.mod file in $(MODVERDIR)
for every module it is building. Some post-processing steps read the
necessary information from *.mod files. This avoids descending into
directories again. This mechanism was introduced in 2003 or so.

Later, commit 551559e13af1 ("kbuild: implement modules.order") added
modules.order. So, we can simply read it out to know all the modules
with directory paths. This is easier than parsing the first line of
*.mod files.

$(MODVERDIR) has a flat directory structure, that is, *.mod files
are named only with base names. This is based on the assumption that
the module name is unique across the tree. This assumption is really
fragile.

Stephen Rothwell reported a race condition caused by a module name
conflict:

  https://lkml.org/lkml/2019/5/13/991

In parallel building, two different threads could write to the same
$(MODVERDIR)/*.mod simultaneously.

Non-unique module names are the source of all kind of troubles, hence
commit 3a48a91901c5 ("kbuild: check uniqueness of module names")
introduced a new checker script.

However, it is still fragile in the build system point of view because
this race happens before scripts/modules-check.sh is invoked. If it
happens again, the modpost will emit unclear error messages.

To fix this issue completely, create *.mod with full directory path
so that two threads never attempt to write to the same file.

$(MODVERDIR) is no longer needed.

Since modules with directory paths are listed in modules.order, Kbuild
is still able to find *.mod files without additional descending.

I also killed cmd_secanalysis; scripts/mod/sumversion.c computes MD4 hash
for modules with MODULE_VERSION(). When CONFIG_DEBUG_SECTION_MISMATCH=y,
it occurs not only in the modpost stage, but also during directory
descending, where sumversion.c may parse stale *.mod files. It would emit
'No such file or directory' warning when an object consisting a module is
renamed, or when a single-obj module is turned into a multi-obj module or
vice versa.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
---
 Documentation/dontdiff | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 5eba889ea84d..9f4392876099 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -30,6 +30,7 @@
 *.lzo
 *.mo
 *.moc
+*.mod
 *.mod.c
 *.o
 *.o.*
-- 
cgit 


From c06306696f8368b08774e2a743dbc52d92a61693 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Thu, 18 Jul 2019 15:57:27 -0700
Subject: mm: thp: fix false negative of shmem vma's THP eligibility

Commit 7635d9cbe832 ("mm, thp, proc: report THP eligibility for each
vma") introduced THPeligible bit for processes' smaps.  But, when
checking the eligibility for shmem vma, __transparent_hugepage_enabled()
is called to override the result from shmem_huge_enabled().  It may
result in the anonymous vma's THP flag override shmem's.  For example,
running a simple test which create THP for shmem, but with anonymous THP
disabled, when reading the process's smaps, it may show:

  7fc92ec00000-7fc92f000000 rw-s 00000000 00:14 27764 /dev/shm/test
  Size:               4096 kB
  ...
  [snip]
  ...
  ShmemPmdMapped:     4096 kB
  ...
  [snip]
  ...
  THPeligible:    0

And, /proc/meminfo does show THP allocated and PMD mapped too:

  ShmemHugePages:     4096 kB
  ShmemPmdMapped:     4096 kB

This doesn't make too much sense.  The shmem objects should be treated
separately from anonymous THP.  Calling shmem_huge_enabled() with
checking MMF_DISABLE_THP sounds good enough.  And, we could skip stack
and dax vma check since we already checked if the vma is shmem already.

Also check if vma is suitable for THP by calling
transhuge_vma_suitable().

And minor fix to smaps output format and documentation.

Link: http://lkml.kernel.org/r/1560401041-32207-3-git-send-email-yang.shi@linux.alibaba.com
Fixes: 7635d9cbe832 ("mm, thp, proc: report THP eligibility for each vma")
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index fb4735fd73b0..99ca040e3f90 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -486,8 +486,8 @@ replaced by copy-on-write) part of the underlying shmem object out on swap.
 "SwapPss" shows proportional swap share of this mapping. Unlike "Swap", this
 does not take into account swapped out page of underlying shmem objects.
 "Locked" indicates whether the mapping is locked in memory or not.
-"THPeligible" indicates whether the mapping is eligible for THP pages - 1 if
-true, 0 otherwise.
+"THPeligible" indicates whether the mapping is eligible for allocating THP
+pages - 1 if true, 0 otherwise. It just shows the current status.
 
 "VmFlags" field deserves a separate description. This member represents the kernel
 flags associated with the particular virtual memory area in two letter encoded
-- 
cgit 


From a0653406a3a671c1609d54835f0443869525ca30 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 18 Jul 2019 15:58:29 -0700
Subject: mm: document ZONE_DEVICE memory-model implications
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Explain the general mechanisms of 'ZONE_DEVICE' pages and list the users
of 'devm_memremap_pages()'.

[dan.j.williams@intel.com: update ZONE_DEVICE memory model documentation]
  Link: http://lkml.kernel.org/r/156109575458.1409767.1885676287099277666.stgit@dwillia2-desk3.amr.corp.intel.com
Link: http://lkml.kernel.org/r/156092354985.979959.15763234410543451710.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Tested-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>	[ppc64]
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richardw.yang@linux.intel.com>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/memory-model.rst | 40 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/vm/memory-model.rst b/Documentation/vm/memory-model.rst
index 382f72ace1fc..58a12376b7df 100644
--- a/Documentation/vm/memory-model.rst
+++ b/Documentation/vm/memory-model.rst
@@ -181,3 +181,43 @@ that is eventually passed to vmemmap_populate() through a long chain
 of function calls. The vmemmap_populate() implementation may use the
 `vmem_altmap` along with :c:func:`altmap_alloc_block_buf` helper to
 allocate memory map on the persistent memory device.
+
+ZONE_DEVICE
+===========
+The `ZONE_DEVICE` facility builds upon `SPARSEMEM_VMEMMAP` to offer
+`struct page` `mem_map` services for device driver identified physical
+address ranges. The "device" aspect of `ZONE_DEVICE` relates to the fact
+that the page objects for these address ranges are never marked online,
+and that a reference must be taken against the device, not just the page
+to keep the memory pinned for active use. `ZONE_DEVICE`, via
+:c:func:`devm_memremap_pages`, performs just enough memory hotplug to
+turn on :c:func:`pfn_to_page`, :c:func:`page_to_pfn`, and
+:c:func:`get_user_pages` service for the given range of pfns. Since the
+page reference count never drops below 1 the page is never tracked as
+free memory and the page's `struct list_head lru` space is repurposed
+for back referencing to the host device / driver that mapped the memory.
+
+While `SPARSEMEM` presents memory as a collection of sections,
+optionally collected into memory blocks, `ZONE_DEVICE` users have a need
+for smaller granularity of populating the `mem_map`. Given that
+`ZONE_DEVICE` memory is never marked online it is subsequently never
+subject to its memory ranges being exposed through the sysfs memory
+hotplug api on memory block boundaries. The implementation relies on
+this lack of user-api constraint to allow sub-section sized memory
+ranges to be specified to :c:func:`arch_add_memory`, the top-half of
+memory hotplug. Sub-section support allows for 2MB as the cross-arch
+common alignment granularity for :c:func:`devm_memremap_pages`.
+
+The users of `ZONE_DEVICE` are:
+
+* pmem: Map platform persistent memory to be used as a direct-I/O target
+  via DAX mappings.
+
+* hmm: Extend `ZONE_DEVICE` with `->page_fault()` and `->page_free()`
+  event callbacks to allow a device-driver to coordinate memory management
+  events related to device-memory, typically GPU memory. See
+  Documentation/vm/hmm.rst.
+
+* p2pdma: Create `struct page` objects to allow peer devices in a
+  PCI/-E topology to coordinate direct-DMA operations between themselves,
+  i.e. bypass host memory.
-- 
cgit 


From 69d812f5eb249bf30069f81dd508aa4548ec6eb1 Mon Sep 17 00:00:00 2001
From: Guo Ren <ren_guo@c-sky.com>
Date: Thu, 6 Jun 2019 15:37:32 +0800
Subject: dt-bindings: interrupt-controller: Update csky mpintc

Add trigger type setting for csky,mpintc. The driver also could
support #interrupt-cells <1> and it wouldn't invalidate existing
DTs. Here we only show the complete format.

Signed-off-by: Guo Ren <ren_guo@c-sky.com>
Reviewed-by: Rob Herring <robh+dt@kernel.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
---
 .../bindings/interrupt-controller/csky,mpintc.txt    | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt b/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt
index ab921f1698fb..e13405355166 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt
@@ -6,11 +6,16 @@ C-SKY Multi-processors Interrupt Controller is designed for ck807/ck810/ck860
 SMP soc, and it also could be used in non-SMP system.
 
 Interrupt number definition:
-
   0-15  : software irq, and we use 15 as our IPI_IRQ.
  16-31  : private  irq, and we use 16 as the co-processor timer.
  31-1024: common irq for soc ip.
 
+Interrupt triger mode: (Defined in dt-bindings/interrupt-controller/irq.h)
+ IRQ_TYPE_LEVEL_HIGH (default)
+ IRQ_TYPE_LEVEL_LOW
+ IRQ_TYPE_EDGE_RISING
+ IRQ_TYPE_EDGE_FALLING
+
 =============================
 intc node bindings definition
 =============================
@@ -26,15 +31,22 @@ intc node bindings definition
 	- #interrupt-cells
 		Usage: required
 		Value type: <u32>
-		Definition: must be <1>
+		Definition: <2>
 	- interrupt-controller:
 		Usage: required
 
-Examples:
+Examples: ("interrupts = <irq_num IRQ_TYPE_XXX>")
 ---------
+#include <dt-bindings/interrupt-controller/irq.h>
 
 	intc: interrupt-controller {
 		compatible = "csky,mpintc";
-		#interrupt-cells = <1>;
+		#interrupt-cells = <2>;
 		interrupt-controller;
 	};
+
+	device: device-example {
+		...
+		interrupts = <34 IRQ_TYPE_EDGE_RISING>;
+		interrupt-parent = <&intc>;
+	};
-- 
cgit 


From 4d581034f9086f784a3408575bdb3c201740c6cb Mon Sep 17 00:00:00 2001
From: Mao Han <han_mao@c-sky.com>
Date: Tue, 4 Jun 2019 18:54:47 +0800
Subject: dt-bindings: csky: Add csky PMU bindings

This patch adds the documentation to describe that how to add pmu node in
dts.

Signed-off-by: Mao Han <han_mao@c-sky.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Rob Herring <robh+dt@kernel.org>
---
 Documentation/devicetree/bindings/csky/pmu.txt | 38 ++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/csky/pmu.txt

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/csky/pmu.txt b/Documentation/devicetree/bindings/csky/pmu.txt
new file mode 100644
index 000000000000..728d05ca6a1c
--- /dev/null
+++ b/Documentation/devicetree/bindings/csky/pmu.txt
@@ -0,0 +1,38 @@
+===============================
+C-SKY Performance Monitor Units
+===============================
+
+C-SKY Performance Monitor is designed for ck807/ck810/ck860 SMP soc and
+it could count cpu's events for helping analysis performance issues.
+
+============================
+PMU node bindings definition
+============================
+
+	Description: Describes PMU
+
+	PROPERTIES
+
+	- compatible
+		Usage: required
+		Value type: <string>
+		Definition: must be "csky,csky-pmu"
+	- interrupts
+		Usage: required
+		Value type: <u32 IRQ_TYPE_XXX>
+		Definition: must be pmu irq num defined by soc
+	- count-width
+		Usage: optional
+		Value type: <u32>
+		Definition: the width of pmu counter
+
+Examples:
+---------
+#include <dt-bindings/interrupt-controller/irq.h>
+
+	pmu: performace-monitor {
+		compatible = "csky,csky-pmu";
+		interrupts = <23 IRQ_TYPE_EDGE_RISING>;
+		interrupt-parent = <&intc>;
+		count-width = <48>;
+        };
-- 
cgit 


From 40ef768ab6eecc1b51461a034274350b31fc29d1 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@codemonkey.org.uk>
Date: Thu, 18 Jul 2019 23:51:56 -0400
Subject: Remove references to dead website.

This fell into disrepair a while ago, and the majority of hits to the
snapshots were from bots, so it's more trouble to keep running than it's worth.

Signed-off-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/dev-tools/sparse.rst          | 5 -----
 Documentation/translations/zh_CN/sparse.txt | 4 ----
 2 files changed, 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/dev-tools/sparse.rst b/Documentation/dev-tools/sparse.rst
index c401c952a340..6f4870528226 100644
--- a/Documentation/dev-tools/sparse.rst
+++ b/Documentation/dev-tools/sparse.rst
@@ -81,11 +81,6 @@ of sparse using git to clone::
 
         git://git.kernel.org/pub/scm/devel/sparse/sparse.git
 
-DaveJ has hourly generated tarballs of the git tree available at::
-
-        http://www.codemonkey.org.uk/projects/git-snapshots/sparse/
-
-
 Once you have it, just do::
 
         make
diff --git a/Documentation/translations/zh_CN/sparse.txt b/Documentation/translations/zh_CN/sparse.txt
index 47fc4a06ebe8..0f444b83d639 100644
--- a/Documentation/translations/zh_CN/sparse.txt
+++ b/Documentation/translations/zh_CN/sparse.txt
@@ -73,10 +73,6 @@ __bitwise"类型。
 
 	git://git.kernel.org/pub/scm/linux/kernel/git/josh/sparse.git
 
-DaveJ 把每小时自动生成的 git 源码树 tar 包放在以下地址：
-
-	http://www.codemonkey.org.uk/projects/git-snapshots/sparse/
-
 一旦你下载了源码，只要以普通用户身份运行：
 
 	make
-- 
cgit 


From 30cd8604323dbaf20a80e797fe7057f5b02e394d Mon Sep 17 00:00:00 2001
From: Eric Hankland <ehankland@google.com>
Date: Thu, 18 Jul 2019 11:38:18 -0700
Subject: KVM: x86: Add fixed counters to PMU filter

Updates KVM_CAP_PMU_EVENT_FILTER so it can also whitelist or blacklist
fixed counters.

Signed-off-by: Eric Hankland <ehankland@google.com>
[No need to check padding fields for zero. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 2cd6250b2896..e54a3f51ddc5 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4090,17 +4090,22 @@ Parameters: struct kvm_pmu_event_filter (in)
 Returns: 0 on success, -1 on error
 
 struct kvm_pmu_event_filter {
-       __u32 action;
-       __u32 nevents;
-       __u64 events[0];
+	__u32 action;
+	__u32 nevents;
+	__u32 fixed_counter_bitmap;
+	__u32 flags;
+	__u32 pad[4];
+	__u64 events[0];
 };
 
 This ioctl restricts the set of PMU events that the guest can program.
 The argument holds a list of events which will be allowed or denied.
 The eventsel+umask of each event the guest attempts to program is compared
 against the events field to determine whether the guest should have access.
-This only affects general purpose counters; fixed purpose counters can
-be disabled by changing the perfmon CPUID leaf.
+The events field only controls general purpose counters; fixed purpose
+counters are controlled by the fixed_counter_bitmap.
+
+No flags are defined yet, the field must be zero.
 
 Valid values for 'action':
 #define KVM_PMU_EVENT_ALLOW 0
-- 
cgit 


From 15ffef1ae69e99ebb54326f0220916b1fe619b24 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 3 Jul 2019 14:17:06 -0600
Subject: dt-bindings: Ensure child nodes are of type 'object'

Properties which are child node definitions need to have an explict
type. Otherwise, a matching (DT) property can silently match when an
error is desired. Fix this up tree-wide. Once this is fixed, the
meta-schema will enforce this on any child node definitions.

Cc: Chen-Yu Tsai <wens@csie.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: linux-mtd@lists.infradead.org
Cc: linux-gpio@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-spi@vger.kernel.org
Acked-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Maxime Ripard <maxime.ripard@bootlin.com>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml  | 1 +
 Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml | 1 +
 Documentation/devicetree/bindings/mtd/nand-controller.yaml          | 1 +
 Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml     | 3 +++
 Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml  | 1 +
 Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml  | 1 +
 6 files changed, 8 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
index fc2f63860cc8..be32f087c529 100644
--- a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
+++ b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
@@ -42,6 +42,7 @@ properties:
 
 patternProperties:
   "^.*@[0-9a-fA-F]+$":
+    type: object
     properties:
       reg:
         maxItems: 1
diff --git a/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
index e5a411518be1..b5b3cf5b1ac2 100644
--- a/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
@@ -55,6 +55,7 @@ patternProperties:
   "^pinctrl-[0-9]+$": true
 
   "^nand@[a-f0-9]+$":
+    type: object
     properties:
       reg:
         minimum: 0
diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
index 199ba5ac2a06..d261b7096c69 100644
--- a/Documentation/devicetree/bindings/mtd/nand-controller.yaml
+++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
@@ -40,6 +40,7 @@ properties:
 
 patternProperties:
   "^nand@[a-f0-9]$":
+    type: object
     properties:
       reg:
         description:
diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
index 06c4b66c3ee6..3ac5d2088e49 100644
--- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
@@ -55,6 +55,7 @@ properties:
 
 patternProperties:
   '^gpio@[0-9a-f]*$':
+    type: object
     properties:
       gpio-controller: true
       '#gpio-cells':
@@ -113,8 +114,10 @@ patternProperties:
       - st,bank-name
 
   '-[0-9]*$':
+    type: object
     patternProperties:
       '^pins':
+        type: object
         description: |
           A pinctrl node should contain at least one subnode representing the
           pinctrl group available on the machine. Each subnode will list the
diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
index c374fd4923a6..6d1329c28170 100644
--- a/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
@@ -50,6 +50,7 @@ properties:
 
 patternProperties:
   "^.*@[0-9a-f]+":
+    type: object
     properties:
       reg:
         items:
diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
index bda7a5befd8b..f36c46d236d7 100644
--- a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
@@ -55,6 +55,7 @@ properties:
 
 patternProperties:
   "^.*@[0-9a-f]+":
+    type: object
     properties:
       reg:
         items:
-- 
cgit 


From 7d9ef7f37d1f37981344d1a8c8578b67bdf4736a Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 26 Jun 2019 17:57:59 -0600
Subject: dt-bindings: riscv: Limit cpus schema to only check RiscV 'cpu' nodes

Matching on the 'cpus' node was a bad choice because the schema is
incorrectly applied to non-RiscV cpus nodes. As we now have a common cpus
schema which checks the general structure, it is also redundant to do so
in the Risc-V CPU schema.

The downside is one could conceivably mix different architecture's cpu
nodes or have typos in the compatible string. The latter problem pretty
much exists for every schema.

Acked-by: Paul Walmsley <paul.walmsley@sifive.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/riscv/cpus.yaml | 143 +++++++++-------------
 1 file changed, 61 insertions(+), 82 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml
index f97a4ecd7b91..c899111aa5e3 100644
--- a/Documentation/devicetree/bindings/riscv/cpus.yaml
+++ b/Documentation/devicetree/bindings/riscv/cpus.yaml
@@ -10,97 +10,76 @@ maintainers:
   - Paul Walmsley <paul.walmsley@sifive.com>
   - Palmer Dabbelt <palmer@sifive.com>
 
-allOf:
-  - $ref: /schemas/cpus.yaml#
-
 properties:
-  $nodename:
-    const: cpus
-    description: Container of cpu nodes
-
-  '#address-cells':
-    const: 1
-    description: |
-      A single unsigned 32-bit integer uniquely identifies each RISC-V
-      hart in a system.  (See the "reg" node under the "cpu" node,
-      below).
-
-  '#size-cells':
-    const: 0
+  compatible:
+    items:
+      - enum:
+          - sifive,rocket0
+          - sifive,e5
+          - sifive,e51
+          - sifive,u54-mc
+          - sifive,u54
+          - sifive,u5
+      - const: riscv
+    description:
+      Identifies that the hart uses the RISC-V instruction set
+      and identifies the type of the hart.
+
+  mmu-type:
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/string"
+      - enum:
+          - riscv,sv32
+          - riscv,sv39
+          - riscv,sv48
+    description:
+      Identifies the MMU address translation mode used on this
+      hart.  These values originate from the RISC-V Privileged
+      Specification document, available from
+      https://riscv.org/specifications/
+
+  riscv,isa:
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/string"
+      - enum:
+          - rv64imac
+          - rv64imafdc
+    description:
+      Identifies the specific RISC-V instruction set architecture
+      supported by the hart.  These are documented in the RISC-V
+      User-Level ISA document, available from
+      https://riscv.org/specifications/
+
+  timebase-frequency:
+    type: integer
+    minimum: 1
+    description:
+      Specifies the clock frequency of the system timer in Hz.
+      This value is common to all harts on a single system image.
+
+  interrupt-controller:
+    type: object
+    description: Describes the CPU's local interrupt controller
 
-patternProperties:
-  '^cpu@[0-9a-f]+$':
     properties:
-      compatible:
-        type: array
-        items:
-          - enum:
-              - sifive,rocket0
-              - sifive,e5
-              - sifive,e51
-              - sifive,u54-mc
-              - sifive,u54
-              - sifive,u5
-          - const: riscv
-        description:
-          Identifies that the hart uses the RISC-V instruction set
-          and identifies the type of the hart.
-
-      mmu-type:
-        allOf:
-          - $ref: "/schemas/types.yaml#/definitions/string"
-          - enum:
-              - riscv,sv32
-              - riscv,sv39
-              - riscv,sv48
-        description:
-          Identifies the MMU address translation mode used on this
-          hart.  These values originate from the RISC-V Privileged
-          Specification document, available from
-          https://riscv.org/specifications/
-
-      riscv,isa:
-        allOf:
-          - $ref: "/schemas/types.yaml#/definitions/string"
-          - enum:
-              - rv64imac
-              - rv64imafdc
-        description:
-          Identifies the specific RISC-V instruction set architecture
-          supported by the hart.  These are documented in the RISC-V
-          User-Level ISA document, available from
-          https://riscv.org/specifications/
+      '#interrupt-cells':
+        const: 1
 
-      timebase-frequency:
-        type: integer
-        minimum: 1
-        description:
-          Specifies the clock frequency of the system timer in Hz.
-          This value is common to all harts on a single system image.
-
-      interrupt-controller:
-        type: object
-        description: Describes the CPU's local interrupt controller
-
-        properties:
-          '#interrupt-cells':
-            const: 1
-
-          compatible:
-            const: riscv,cpu-intc
-
-          interrupt-controller: true
+      compatible:
+        const: riscv,cpu-intc
 
-        required:
-          - '#interrupt-cells'
-          - compatible
-          - interrupt-controller
+      interrupt-controller: true
 
     required:
-      - riscv,isa
-      - timebase-frequency
+      - '#interrupt-cells'
+      - compatible
       - interrupt-controller
 
+required:
+  - riscv,isa
+  - timebase-frequency
+  - interrupt-controller
+
 examples:
   - |
     // Example 1: SiFive Freedom U540G Development Kit
-- 
cgit 


From ad21a4ce040cc41b4a085417169b558e86af56b7 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 15 Jul 2019 16:37:25 -0600
Subject: dt-bindings: pinctrl: aspeed: Fix 'compatible' schema errors

The Aspeed pinctl schema have errors in the 'compatible' schema:

Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml: \
properties:compatible:enum: ['aspeed', 'ast2400-pinctrl', 'aspeed', 'g4-pinctrl'] has non-unique elements
Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml: \
properties:compatible:enum: ['aspeed', 'ast2500-pinctrl', 'aspeed', 'g5-pinctrl'] has non-unique elements

Flow style sequences have to be quoted if the vales contain ','. Fix
this by using the more common one line per entry formatting.

Fixes: 0a617de16730 ("dt-bindings: pinctrl: aspeed: Convert AST2500 bindings to json-schema")
Fixes: 07457937bb5c ("dt-bindings: pinctrl: aspeed: Convert AST2400 bindings to json-schema")
Cc: Andrew Jeffery <andrew@aj.id.au>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Joel Stanley <joel@jms.id.au>
Cc: linux-aspeed@lists.ozlabs.org
Cc: linux-gpio@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Acked-by: Andrew Jeffery <andrew@aj.id.au>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml | 4 +++-
 Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml
index 61a110a7db8a..125599a2dc5e 100644
--- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml
@@ -22,7 +22,9 @@ description: |+
 
 properties:
   compatible:
-    enum: [ aspeed,ast2400-pinctrl, aspeed,g4-pinctrl ]
+    enum:
+      - aspeed,ast2400-pinctrl
+      - aspeed,g4-pinctrl
 
 patternProperties:
   '^.*$':
diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml
index cf561bd55128..a464cfa0cba3 100644
--- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml
@@ -22,7 +22,9 @@ description: |+
 
 properties:
   compatible:
-    enum: [ aspeed,ast2500-pinctrl, aspeed,g5-pinctrl ]
+    enum:
+      - aspeed,ast2500-pinctrl
+      - aspeed,g5-pinctrl
   aspeed,external-nodes:
     minItems: 2
     maxItems: 2
-- 
cgit 


From fcbe7e3cf86d665bf4924eecb13a5af10bcfa372 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 15 Jul 2019 16:48:41 -0600
Subject: dt-bindings: pinctrl: aspeed: Fix AST2500 example errors

The schema examples are now validated against the schema itself. The
AST2500 pinctrl schema has a couple of errors:

Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.example.dt.yaml: \
example-0: $nodename:0: 'example-0' does not match '^(bus|soc|axi|ahb|apb)(@[0-9a-f]+)?$'
Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.example.dt.yaml: \
pinctrl: aspeed,external-nodes: [[1, 2]] is too short

Fixes: 0a617de16730 ("dt-bindings: pinctrl: aspeed: Convert AST2500 bindings to json-schema")
Cc: Andrew Jeffery <andrew@aj.id.au>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Joel Stanley <joel@jms.id.au>
Cc: linux-aspeed@lists.ozlabs.org
Cc: linux-gpio@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Acked-by: Andrew Jeffery <andrew@aj.id.au>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml          | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml
index a464cfa0cba3..3e6d85318577 100644
--- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml
@@ -76,9 +76,6 @@ required:
 
 examples:
   - |
-    compatible = "simple-bus";
-    ranges;
-
     apb {
         compatible = "simple-bus";
         #address-cells = <1>;
@@ -91,7 +88,7 @@ examples:
 
             pinctrl: pinctrl {
                 compatible = "aspeed,g5-pinctrl";
-                aspeed,external-nodes = <&gfx &lhc>;
+                aspeed,external-nodes = <&gfx>, <&lhc>;
 
                 pinctrl_i2c3_default: i2c3_default {
                     function = "I2C3";
-- 
cgit 


From fbbf2b6e9b74ffa79bef5e3da91200195045379e Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 16 Jul 2019 14:13:29 -0600
Subject: dt-bindings: iio: avia-hx711: Fix avdd-supply typo in example

Now that examples are validated against the DT schema, a typo in
avia-hx711 example generates a warning:

Documentation/devicetree/bindings/iio/adc/avia-hx711.example.dt.yaml: weight: 'avdd-supply' is a required property

Fix the typo.

Fixes: 5150ec3fe125 ("avia-hx711.yaml: transform DT binding to YAML")
Cc: Andreas Klinger <ak@it-klinger.de>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: linux-iio@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml b/Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml
index 8a4100ceeaf2..d76ece97c76c 100644
--- a/Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml
@@ -61,6 +61,6 @@ examples:
         compatible = "avia,hx711";
         sck-gpios = <&gpio3 10 GPIO_ACTIVE_HIGH>;
         dout-gpios = <&gpio0 7 GPIO_ACTIVE_HIGH>;
-        avdd-suppy = <&avdd>;
+        avdd-supply = <&avdd>;
         clock-frequency = <100000>;
     };
-- 
cgit 


From 20051f5fdf6770f05d677e2f03b0a2bab6b0fc64 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 16 Jul 2019 14:21:56 -0600
Subject: dt-bindings: iio: ad7124: Fix dtc warnings in example

With the conversion to DT schema, the examples are now compiled with
dtc. The ad7124 binding example has the following warning:

Documentation/devicetree/bindings/iio/adc/adi,ad7124.example.dts:19.11-21: \
Warning (reg_format): /example-0/adc@0:reg: property has invalid length (4 bytes) (#address-cells == 1, #size-cells == 1)

There's a default #size-cells and #address-cells values of 1 for
examples. For examples needing different values such as this one on a
SPI bus, they need to provide a SPI bus parent node.

Fixes: 26ae15e62d3c ("Convert AD7124 bindings documentation to YAML format.")

Cc: Jonathan Cameron <jic23@kernel.org>
Cc: linux-iio@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/iio/adc/adi,ad7124.yaml    | 71 ++++++++++++----------
 1 file changed, 38 insertions(+), 33 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
index cf494a08b837..9692b7f719f5 100644
--- a/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
@@ -114,42 +114,47 @@ patternProperties:
 
 examples:
   - |
-    adc@0 {
-      compatible = "adi,ad7124-4";
-      reg = <0>;
-      spi-max-frequency = <5000000>;
-      interrupts = <25 2>;
-      interrupt-parent = <&gpio>;
-      refin1-supply = <&adc_vref>;
-      clocks = <&ad7124_mclk>;
-      clock-names = "mclk";
-
+    spi {
       #address-cells = <1>;
       #size-cells = <0>;
 
-      channel@0 {
+      adc@0 {
+        compatible = "adi,ad7124-4";
         reg = <0>;
-        diff-channels = <0 1>;
-        adi,reference-select = <0>;
-        adi,buffered-positive;
-      };
-
-      channel@1 {
-        reg = <1>;
-        bipolar;
-        diff-channels = <2 3>;
-        adi,reference-select = <0>;
-        adi,buffered-positive;
-        adi,buffered-negative;
-      };
-
-      channel@2 {
-        reg = <2>;
-        diff-channels = <4 5>;
-      };
-
-      channel@3 {
-        reg = <3>;
-        diff-channels = <6 7>;
+        spi-max-frequency = <5000000>;
+        interrupts = <25 2>;
+        interrupt-parent = <&gpio>;
+        refin1-supply = <&adc_vref>;
+        clocks = <&ad7124_mclk>;
+        clock-names = "mclk";
+
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        channel@0 {
+          reg = <0>;
+          diff-channels = <0 1>;
+          adi,reference-select = <0>;
+          adi,buffered-positive;
+        };
+
+        channel@1 {
+          reg = <1>;
+          bipolar;
+          diff-channels = <2 3>;
+          adi,reference-select = <0>;
+          adi,buffered-positive;
+          adi,buffered-negative;
+        };
+
+        channel@2 {
+          reg = <2>;
+          diff-channels = <4 5>;
+        };
+
+        channel@3 {
+          reg = <3>;
+          diff-channels = <6 7>;
+        };
       };
     };
-- 
cgit 


From e2297f7c3ab3b68dda2ac732b1767212019d3bdf Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 16 Jul 2019 15:34:40 -0600
Subject: dt-bindings: pinctrl: stm32: Fix missing 'clocks' property in
 examples

Now that examples are validated against the DT schema, an error with
required 'clocks' property missing is exposed:

Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.example.dt.yaml: \
pinctrl@40020000: gpio@0: 'clocks' is a required property
Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.example.dt.yaml: \
pinctrl@50020000: gpio@1000: 'clocks' is a required property
Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.example.dt.yaml: \
pinctrl@50020000: gpio@2000: 'clocks' is a required property

Add the missing 'clocks' properties to the examples to fix the errors.

Fixes: 2c9239c125f0 ("dt-bindings: pinctrl: Convert stm32 pinctrl bindings to json-schema")
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: linux-gpio@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Acked-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
index 3ac5d2088e49..91d3e78b3395 100644
--- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
@@ -197,6 +197,7 @@ required:
 examples:
   - |
     #include <dt-bindings/pinctrl/stm32-pinfunc.h>
+    #include <dt-bindings/mfd/stm32f4-rcc.h>
     //Example 1
       pinctrl@40020000 {
               #address-cells = <1>;
@@ -210,6 +211,7 @@ examples:
                       #gpio-cells = <2>;
                       reg = <0x0 0x400>;
                       resets = <&reset_ahb1 0>;
+                      clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOA)>;
                       st,bank-name = "GPIOA";
               };
        };
@@ -227,6 +229,7 @@ examples:
                       #gpio-cells = <2>;
                       reg = <0x1000 0x400>;
                       resets = <&reset_ahb1 0>;
+                      clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOB)>;
                       st,bank-name = "GPIOB";
                       gpio-ranges = <&pinctrl 0 0 16>;
               };
@@ -236,6 +239,7 @@ examples:
                       #gpio-cells = <2>;
                       reg = <0x2000 0x400>;
                       resets = <&reset_ahb1 0>;
+                      clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOC)>;
                       st,bank-name = "GPIOC";
                       ngpios = <5>;
                       gpio-ranges = <&pinctrl 0 16 3>,
-- 
cgit 


From 1b03bc5c116383b8bc099e8d60978c379196a687 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 20 Jul 2019 23:17:30 -0400
Subject: typo fix: it's d_make_root, not d_make_inode...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/porting | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index f6714f7e6bb5..d16e2ef7ae0b 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -436,7 +436,7 @@ for the inode.  If d_make_root(inode) is passed a NULL inode it returns NULL
 and also requires no further error handling. Typical usage is:
 
 	inode = foofs_new_inode(....);
-	s->s_root = d_make_inode(inode);
+	s->s_root = d_make_root(inode);
 	if (!s->s_root)
 		/* Nothing needed for the inode cleanup */
 		return -ENOMEM;
-- 
cgit